summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 02:25:50 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 02:25:50 +0000
commit19f4f86bfed21c5326ed2acebe1163f3a83e832b (patch)
treed59b9989ce55ed23693e80974d94c856f1c2c8b1 /src
parentInitial commit. (diff)
downloadsystemd-19f4f86bfed21c5326ed2acebe1163f3a83e832b.tar.xz
systemd-19f4f86bfed21c5326ed2acebe1163f3a83e832b.zip
Adding upstream version 241.upstream/241upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/ac-power/ac-power.c90
-rw-r--r--src/activate/activate.c509
-rw-r--r--src/analyze/analyze-security.c2086
-rw-r--r--src/analyze/analyze-security.h12
-rw-r--r--src/analyze/analyze-verify.c287
-rw-r--r--src/analyze/analyze-verify.h12
-rw-r--r--src/analyze/analyze.c2053
-rw-r--r--src/analyze/meson.build9
-rw-r--r--src/ask-password/ask-password.c188
-rw-r--r--src/backlight/backlight.c431
-rw-r--r--src/basic/MurmurHash2.c90
-rw-r--r--src/basic/MurmurHash2.h30
-rw-r--r--src/basic/af-list.c40
-rw-r--r--src/basic/af-list.h25
-rw-r--r--src/basic/af-to-name.awk9
-rw-r--r--src/basic/alloc-util.c81
-rw-r--r--src/basic/alloc-util.h162
-rw-r--r--src/basic/architecture.c178
-rw-r--r--src/basic/architecture.h237
-rw-r--r--src/basic/arphrd-list.c41
-rw-r--r--src/basic/arphrd-list.h7
-rw-r--r--src/basic/arphrd-to-name.awk9
-rw-r--r--src/basic/async.c107
-rw-r--r--src/basic/async.h7
-rw-r--r--src/basic/audit-util.c89
-rw-r--r--src/basic/audit-util.h17
-rw-r--r--src/basic/blockdev-util.c181
-rw-r--r--src/basic/blockdev-util.h20
-rw-r--r--src/basic/btrfs-util.c1993
-rw-r--r--src/basic/btrfs-util.h120
-rw-r--r--src/basic/build.h156
-rw-r--r--src/basic/bus-label.c81
-rw-r--r--src/basic/bus-label.h15
-rw-r--r--src/basic/cap-list.c119
-rw-r--r--src/basic/cap-list.h9
-rw-r--r--src/basic/cap-to-name.awk9
-rw-r--r--src/basic/capability-util.c487
-rw-r--r--src/basic/capability-util.h70
-rw-r--r--src/basic/cgroup-util.c2933
-rw-r--r--src/basic/cgroup-util.h275
-rw-r--r--src/basic/chattr-util.c96
-rw-r--r--src/basic/chattr-util.h8
-rw-r--r--src/basic/conf-files.c325
-rw-r--r--src/basic/conf-files.h23
-rw-r--r--src/basic/copy.c936
-rw-r--r--src/basic/copy.h61
-rw-r--r--src/basic/def.h77
-rw-r--r--src/basic/device-nodes.c63
-rw-r--r--src/basic/device-nodes.h16
-rw-r--r--src/basic/dirent-util.c71
-rw-r--r--src/basic/dirent-util.h36
-rw-r--r--src/basic/env-file.c564
-rw-r--r--src/basic/env-file.h17
-rw-r--r--src/basic/env-util.c752
-rw-r--r--src/basic/env-util.h47
-rw-r--r--src/basic/errno-list.c37
-rw-r--r--src/basic/errno-list.h15
-rw-r--r--src/basic/errno-to-name.awk9
-rw-r--r--src/basic/escape.c506
-rw-r--r--src/basic/escape.h53
-rw-r--r--src/basic/ether-addr-util.c111
-rw-r--r--src/basic/ether-addr-util.h28
-rw-r--r--src/basic/extract-word.c285
-rw-r--r--src/basic/extract-word.h17
-rw-r--r--src/basic/fd-util.c967
-rw-r--r--src/basic/fd-util.h110
-rw-r--r--src/basic/fileio.c822
-rw-r--r--src/basic/fileio.h78
-rw-r--r--src/basic/format-util.h67
-rw-r--r--src/basic/fs-util.c1358
-rw-r--r--src/basic/fs-util.h111
-rw-r--r--src/basic/gcrypt-util.c52
-rw-r--r--src/basic/gcrypt-util.h34
-rwxr-xr-xsrc/basic/generate-af-list.sh6
-rwxr-xr-xsrc/basic/generate-arphrd-list.sh6
-rwxr-xr-xsrc/basic/generate-cap-list.sh6
-rwxr-xr-xsrc/basic/generate-errno-list.sh5
-rw-r--r--src/basic/glob-util.c73
-rw-r--r--src/basic/glob-util.h22
-rw-r--r--src/basic/gunicode.c110
-rw-r--r--src/basic/gunicode.h30
-rw-r--r--src/basic/hash-funcs.c91
-rw-r--r--src/basic/hash-funcs.h106
-rw-r--r--src/basic/hashmap.c1911
-rw-r--r--src/basic/hashmap.h429
-rw-r--r--src/basic/hexdecoct.c819
-rw-r--r--src/basic/hexdecoct.h38
-rw-r--r--src/basic/hostname-util.c308
-rw-r--r--src/basic/hostname-util.h28
-rw-r--r--src/basic/in-addr-util.c622
-rw-r--r--src/basic/in-addr-util.h70
-rw-r--r--src/basic/io-util.c264
-rw-r--r--src/basic/io-util.h75
-rw-r--r--src/basic/ioprio.h56
-rw-r--r--src/basic/khash.c322
-rw-r--r--src/basic/khash.h37
-rw-r--r--src/basic/label.c64
-rw-r--r--src/basic/label.h18
-rw-r--r--src/basic/list.h171
-rw-r--r--src/basic/locale-util.c460
-rw-r--r--src/basic/locale-util.h84
-rw-r--r--src/basic/log.c1324
-rw-r--r--src/basic/log.h324
-rw-r--r--src/basic/login-util.c14
-rw-r--r--src/basic/login-util.h11
-rw-r--r--src/basic/macro.h549
-rw-r--r--src/basic/memfd-util.c154
-rw-r--r--src/basic/memfd-util.h18
-rw-r--r--src/basic/mempool.c99
-rw-r--r--src/basic/mempool.h31
-rw-r--r--src/basic/meson.build310
-rw-r--r--src/basic/missing.h25
-rw-r--r--src/basic/missing_audit.h24
-rw-r--r--src/basic/missing_btrfs.h22
-rw-r--r--src/basic/missing_btrfs_tree.h109
-rw-r--r--src/basic/missing_capability.h12
-rw-r--r--src/basic/missing_drm.h10
-rw-r--r--src/basic/missing_ethtool.h131
-rw-r--r--src/basic/missing_fcntl.h60
-rw-r--r--src/basic/missing_fib_rules.h45
-rw-r--r--src/basic/missing_fou.h55
-rw-r--r--src/basic/missing_fs.h63
-rw-r--r--src/basic/missing_if_bridge.h21
-rw-r--r--src/basic/missing_if_link.h393
-rw-r--r--src/basic/missing_if_tunnel.h59
-rw-r--r--src/basic/missing_input.h45
-rw-r--r--src/basic/missing_keyctl.h78
-rw-r--r--src/basic/missing_magic.h34
-rw-r--r--src/basic/missing_mman.h12
-rw-r--r--src/basic/missing_network.h155
-rw-r--r--src/basic/missing_prctl.h14
-rw-r--r--src/basic/missing_random.h16
-rw-r--r--src/basic/missing_resource.h11
-rw-r--r--src/basic/missing_sched.h21
-rw-r--r--src/basic/missing_securebits.h17
-rw-r--r--src/basic/missing_socket.h60
-rw-r--r--src/basic/missing_stat.h51
-rw-r--r--src/basic/missing_stdlib.h13
-rw-r--r--src/basic/missing_syscall.h446
-rw-r--r--src/basic/missing_timerfd.h8
-rw-r--r--src/basic/missing_type.h12
-rw-r--r--src/basic/missing_vxcan.h12
-rw-r--r--src/basic/mkdir-label.c58
-rw-r--r--src/basic/mkdir.c166
-rw-r--r--src/basic/mkdir.h26
-rw-r--r--src/basic/mountpoint-util.c444
-rw-r--r--src/basic/mountpoint-util.h24
-rw-r--r--src/basic/nss-util.h183
-rw-r--r--src/basic/ordered-set.c47
-rw-r--r--src/basic/ordered-set.h62
-rw-r--r--src/basic/parse-util.c779
-rw-r--r--src/basic/parse-util.h120
-rw-r--r--src/basic/path-util.c1150
-rw-r--r--src/basic/path-util.h190
-rw-r--r--src/basic/prioq.c300
-rw-r--r--src/basic/prioq.h32
-rw-r--r--src/basic/proc-cmdline.c363
-rw-r--r--src/basic/proc-cmdline.h41
-rw-r--r--src/basic/process-util.c1565
-rw-r--r--src/basic/process-util.h194
-rw-r--r--src/basic/procfs-util.c268
-rw-r--r--src/basic/procfs-util.h17
-rw-r--r--src/basic/random-util.c265
-rw-r--r--src/basic/random-util.h33
-rw-r--r--src/basic/ratelimit.c38
-rw-r--r--src/basic/ratelimit.h40
-rw-r--r--src/basic/raw-clone.h79
-rw-r--r--src/basic/raw-reboot.h14
-rw-r--r--src/basic/refcnt.h54
-rw-r--r--src/basic/replace-var.c94
-rw-r--r--src/basic/replace-var.h4
-rw-r--r--src/basic/rlimit-util.c410
-rw-r--r--src/basic/rlimit-util.h25
-rw-r--r--src/basic/rm-rf.c220
-rw-r--r--src/basic/rm-rf.h32
-rw-r--r--src/basic/selinux-util.c518
-rw-r--r--src/basic/selinux-util.h34
-rw-r--r--src/basic/set.h130
-rw-r--r--src/basic/sigbus.c139
-rw-r--r--src/basic/sigbus.h7
-rw-r--r--src/basic/signal-util.c290
-rw-r--r--src/basic/signal-util.h43
-rw-r--r--src/basic/siphash24.c200
-rw-r--r--src/basic/siphash24.h28
-rw-r--r--src/basic/smack-util.c289
-rw-r--r--src/basic/smack-util.h42
-rw-r--r--src/basic/socket-label.c163
-rw-r--r--src/basic/socket-util.c1347
-rw-r--r--src/basic/socket-util.h200
-rw-r--r--src/basic/sparse-endian.h90
-rw-r--r--src/basic/special.h105
-rw-r--r--src/basic/stat-util.c427
-rw-r--r--src/basic/stat-util.h90
-rw-r--r--src/basic/static-destruct.h56
-rw-r--r--src/basic/stdio-util.h64
-rw-r--r--src/basic/strbuf.c183
-rw-r--r--src/basic/strbuf.h39
-rw-r--r--src/basic/string-table.c17
-rw-r--r--src/basic/string-table.h112
-rw-r--r--src/basic/string-util.c1099
-rw-r--r--src/basic/string-util.h266
-rw-r--r--src/basic/strv.c889
-rw-r--r--src/basic/strv.h190
-rw-r--r--src/basic/strxcpyx.c105
-rw-r--r--src/basic/strxcpyx.h12
-rw-r--r--src/basic/syslog-util.c99
-rw-r--r--src/basic/syslog-util.h14
-rw-r--r--src/basic/terminal-util.c1318
-rw-r--r--src/basic/terminal-util.h160
-rw-r--r--src/basic/time-util.c1471
-rw-r--r--src/basic/time-util.h179
-rw-r--r--src/basic/tmpfile-util.c330
-rw-r--r--src/basic/tmpfile-util.h19
-rw-r--r--src/basic/umask-util.h28
-rw-r--r--src/basic/unaligned.h99
-rw-r--r--src/basic/unit-def.c273
-rw-r--r--src/basic/unit-def.h284
-rw-r--r--src/basic/unit-name.c775
-rw-r--r--src/basic/unit-name.h65
-rw-r--r--src/basic/user-util.c857
-rw-r--r--src/basic/user-util.h115
-rw-r--r--src/basic/utf8.c532
-rw-r--r--src/basic/utf8.h51
-rw-r--r--src/basic/util.c637
-rw-r--r--src/basic/util.h253
-rw-r--r--src/basic/virt.c642
-rw-r--r--src/basic/virt.h57
-rw-r--r--src/basic/xattr-util.c217
-rw-r--r--src/basic/xattr-util.h25
-rw-r--r--src/binfmt/binfmt.c227
-rw-r--r--src/boot/bless-boot-generator.c71
-rw-r--r--src/boot/bless-boot.c472
-rw-r--r--src/boot/boot-check-no-failures.c102
-rw-r--r--src/boot/bootctl.c1275
-rw-r--r--src/boot/efi/boot.c2250
-rw-r--r--src/boot/efi/console.c227
-rw-r--r--src/boot/efi/console.h24
-rw-r--r--src/boot/efi/disk.c35
-rw-r--r--src/boot/efi/disk.h4
-rw-r--r--src/boot/efi/graphics.c77
-rw-r--r--src/boot/efi/graphics.h8
-rw-r--r--src/boot/efi/linux.c127
-rw-r--r--src/boot/efi/linux.h7
-rw-r--r--src/boot/efi/measure.c360
-rw-r--r--src/boot/efi/measure.h4
-rw-r--r--src/boot/efi/meson.build217
-rwxr-xr-xsrc/boot/efi/no-undefined-symbols.sh7
-rw-r--r--src/boot/efi/pe.c168
-rw-r--r--src/boot/efi/pe.h7
-rw-r--r--src/boot/efi/shim.c207
-rw-r--r--src/boot/efi/shim.h16
-rw-r--r--src/boot/efi/splash.c305
-rw-r--r--src/boot/efi/splash.h4
-rw-r--r--src/boot/efi/stub.c133
-rw-r--r--src/boot/efi/util.c342
-rw-r--r--src/boot/efi/util.h57
-rw-r--r--src/busctl/busctl-introspect.c732
-rw-r--r--src/busctl/busctl-introspect.h14
-rw-r--r--src/busctl/busctl.c2471
-rw-r--r--src/cgls/cgls.c302
-rw-r--r--src/cgroups-agent/cgroups-agent.c48
-rw-r--r--src/cgtop/cgtop.c1104
-rw-r--r--src/core/all-units.h14
-rw-r--r--src/core/audit-fd.c63
-rw-r--r--src/core/audit-fd.h5
-rw-r--r--src/core/automount.c1144
-rw-r--r--src/core/automount.h43
-rw-r--r--src/core/bpf-devices.c270
-rw-r--r--src/core/bpf-devices.h17
-rw-r--r--src/core/bpf-firewall.c767
-rw-r--r--src/core/bpf-firewall.h20
-rw-r--r--src/core/cgroup.c3074
-rw-r--r--src/core/cgroup.h221
-rw-r--r--src/core/chown-recursive.c133
-rw-r--r--src/core/chown-recursive.h6
-rw-r--r--src/core/dbus-automount.c64
-rw-r--r--src/core/dbus-automount.h11
-rw-r--r--src/core/dbus-cgroup.c1415
-rw-r--r--src/core/dbus-cgroup.h12
-rw-r--r--src/core/dbus-device.c11
-rw-r--r--src/core/dbus-device.h6
-rw-r--r--src/core/dbus-execute.c2423
-rw-r--r--src/core/dbus-execute.h29
-rw-r--r--src/core/dbus-job.c312
-rw-r--r--src/core/dbus-job.h19
-rw-r--r--src/core/dbus-kill.c61
-rw-r--r--src/core/dbus-kill.h12
-rw-r--r--src/core/dbus-manager.c2718
-rw-r--r--src/core/dbus-manager.h14
-rw-r--r--src/core/dbus-mount.c152
-rw-r--r--src/core/dbus-mount.h12
-rw-r--r--src/core/dbus-path.c159
-rw-r--r--src/core/dbus-path.h11
-rw-r--r--src/core/dbus-scope.c256
-rw-r--r--src/core/dbus-scope.h19
-rw-r--r--src/core/dbus-service.c437
-rw-r--r--src/core/dbus-service.h12
-rw-r--r--src/core/dbus-slice.c35
-rw-r--r--src/core/dbus-slice.h12
-rw-r--r--src/core/dbus-socket.c468
-rw-r--r--src/core/dbus-socket.h12
-rw-r--r--src/core/dbus-swap.c70
-rw-r--r--src/core/dbus-swap.h16
-rw-r--r--src/core/dbus-target.c9
-rw-r--r--src/core/dbus-target.h6
-rw-r--r--src/core/dbus-timer.c369
-rw-r--r--src/core/dbus-timer.h11
-rw-r--r--src/core/dbus-unit.c1951
-rw-r--r--src/core/dbus-unit.h33
-rw-r--r--src/core/dbus-util.c121
-rw-r--r--src/core/dbus-util.h248
-rw-r--r--src/core/dbus.c1315
-rw-r--r--src/core/dbus.h37
-rw-r--r--src/core/device.c1093
-rw-r--r--src/core/device.h43
-rw-r--r--src/core/dynamic-user.c824
-rw-r--r--src/core/dynamic-user.h40
-rw-r--r--src/core/emergency-action.c161
-rw-r--r--src/core/emergency-action.h34
-rw-r--r--src/core/execute.c5290
-rw-r--r--src/core/execute.h404
-rw-r--r--src/core/hostname-setup.c51
-rw-r--r--src/core/hostname-setup.h4
-rw-r--r--src/core/ima-setup.c90
-rw-r--r--src/core/ima-setup.h9
-rw-r--r--src/core/ip-address-access.c205
-rw-r--r--src/core/ip-address-access.h21
-rw-r--r--src/core/job.c1668
-rw-r--r--src/core/job.h239
-rw-r--r--src/core/kill.c54
-rw-r--r--src/core/kill.h49
-rw-r--r--src/core/killall.c234
-rw-r--r--src/core/killall.h6
-rw-r--r--src/core/kmod-setup.c129
-rw-r--r--src/core/kmod-setup.h4
-rw-r--r--src/core/load-dropin.c142
-rw-r--r--src/core/load-dropin.h19
-rw-r--r--src/core/load-fragment-gperf-nulstr.awk14
-rw-r--r--src/core/load-fragment-gperf.gperf.m4459
-rw-r--r--src/core/load-fragment.c4828
-rw-r--r--src/core/load-fragment.h112
-rw-r--r--src/core/locale-setup.c97
-rw-r--r--src/core/locale-setup.h4
-rw-r--r--src/core/loopback-setup.c212
-rw-r--r--src/core/loopback-setup.h4
-rw-r--r--src/core/machine-id-setup.c243
-rw-r--r--src/core/machine-id-setup.h5
-rw-r--r--src/core/macros.systemd.in145
-rw-r--r--src/core/main.c2690
-rw-r--r--src/core/manager.c4678
-rw-r--r--src/core/manager.h517
-rw-r--r--src/core/meson.build218
-rw-r--r--src/core/mount-setup.c566
-rw-r--r--src/core/mount-setup.h12
-rw-r--r--src/core/mount.c2043
-rw-r--r--src/core/mount.h99
-rw-r--r--src/core/namespace.c1733
-rw-r--r--src/core/namespace.h112
-rw-r--r--src/core/org.freedesktop.systemd1.conf400
-rw-r--r--src/core/org.freedesktop.systemd1.policy.in73
-rw-r--r--src/core/org.freedesktop.systemd1.service13
-rw-r--r--src/core/path.c787
-rw-r--r--src/core/path.h77
-rw-r--r--src/core/scope.c624
-rw-r--r--src/core/scope.h44
-rw-r--r--src/core/selinux-access.c271
-rw-r--r--src/core/selinux-access.h24
-rw-r--r--src/core/selinux-setup.c106
-rw-r--r--src/core/selinux-setup.h6
-rw-r--r--src/core/service.c4161
-rw-r--r--src/core/service.h211
-rw-r--r--src/core/show-status.c116
-rw-r--r--src/core/show-status.h29
-rw-r--r--src/core/shutdown.c545
-rw-r--r--src/core/slice.c397
-rw-r--r--src/core/slice.h18
-rw-r--r--src/core/smack-setup.c421
-rw-r--r--src/core/smack-setup.h10
-rw-r--r--src/core/socket.c3339
-rw-r--r--src/core/socket.h182
-rw-r--r--src/core/swap.c1555
-rw-r--r--src/core/swap.h97
-rw-r--r--src/core/system.conf.in64
-rw-r--r--src/core/systemd.pc.in40
-rw-r--r--src/core/target.c223
-rw-r--r--src/core/target.h16
-rw-r--r--src/core/timer.c881
-rw-r--r--src/core/timer.h76
-rw-r--r--src/core/transaction.c1119
-rw-r--r--src/core/transaction.h34
-rw-r--r--src/core/triggers.systemd.in143
-rw-r--r--src/core/umount.c699
-rw-r--r--src/core/umount.h30
-rw-r--r--src/core/unit-printf.c289
-rw-r--r--src/core/unit-printf.h7
-rw-r--r--src/core/unit.c5593
-rw-r--r--src/core/unit.h856
-rw-r--r--src/core/user.conf44
-rw-r--r--src/coredump/coredump-vacuum.c245
-rw-r--r--src/coredump/coredump-vacuum.h7
-rw-r--r--src/coredump/coredump.c1405
-rw-r--r--src/coredump/coredump.conf21
-rw-r--r--src/coredump/coredumpctl.c1096
-rw-r--r--src/coredump/meson.build28
-rw-r--r--src/coredump/stacktrace.c185
-rw-r--r--src/coredump/stacktrace.h4
-rw-r--r--src/coredump/test-coredump-vacuum.c13
-rw-r--r--src/cryptsetup/cryptsetup-generator.c609
-rw-r--r--src/cryptsetup/cryptsetup.c745
-rw-r--r--src/debug-generator/debug-generator.c165
-rw-r--r--src/delta/delta.c685
-rw-r--r--src/detect-virt/detect-virt.c177
-rw-r--r--src/dissect/dissect.c282
-rw-r--r--src/environment-d-generator/environment-d-generator.c89
-rw-r--r--src/escape/escape.c258
-rw-r--r--src/firstboot/firstboot.c994
-rw-r--r--src/fsck/fsck.c437
-rw-r--r--src/fstab-generator/fstab-generator.c922
-rw-r--r--src/fuzz/fuzz-bus-message.c47
-rw-r--r--src/fuzz/fuzz-catalog.c26
-rw-r--r--src/fuzz/fuzz-compress.c80
-rw-r--r--src/fuzz/fuzz-dhcp-server.c52
-rw-r--r--src/fuzz/fuzz-dhcp-server.options2
-rw-r--r--src/fuzz/fuzz-dhcp6-client.c59
-rw-r--r--src/fuzz/fuzz-dns-packet.c24
-rw-r--r--src/fuzz/fuzz-dns-packet.options2
-rw-r--r--src/fuzz/fuzz-journal-remote.c78
-rw-r--r--src/fuzz/fuzz-journal-remote.options2
-rw-r--r--src/fuzz/fuzz-journald-audit.c15
-rw-r--r--src/fuzz/fuzz-journald-kmsg.c18
-rw-r--r--src/fuzz/fuzz-journald-native-fd.c47
-rw-r--r--src/fuzz/fuzz-journald-native.c10
-rw-r--r--src/fuzz/fuzz-journald-stream.c36
-rw-r--r--src/fuzz/fuzz-journald-syslog.c10
-rw-r--r--src/fuzz/fuzz-journald.c46
-rw-r--r--src/fuzz/fuzz-journald.h12
-rw-r--r--src/fuzz/fuzz-json.c30
-rw-r--r--src/fuzz/fuzz-lldp.c40
-rw-r--r--src/fuzz/fuzz-main.c45
-rw-r--r--src/fuzz/fuzz-ndisc-rs.c58
-rw-r--r--src/fuzz/fuzz-udev-rules.c107
-rw-r--r--src/fuzz/fuzz-unit-file.c84
-rw-r--r--src/fuzz/fuzz.h8
-rw-r--r--src/fuzz/meson.build111
-rw-r--r--src/getty-generator/getty-generator.c206
-rw-r--r--src/gpt-auto-generator/gpt-auto-generator.c746
-rw-r--r--src/hibernate-resume/hibernate-resume-generator.c102
-rw-r--r--src/hibernate-resume/hibernate-resume.c63
-rw-r--r--src/hostname/hostnamectl.c449
-rw-r--r--src/hostname/hostnamed.c763
-rw-r--r--src/hostname/meson.build10
-rw-r--r--src/hostname/org.freedesktop.hostname1.conf29
-rw-r--r--src/hostname/org.freedesktop.hostname1.policy60
-rw-r--r--src/hostname/org.freedesktop.hostname1.service12
-rw-r--r--src/hwdb/hwdb.c129
-rw-r--r--src/id128/id128.c168
-rw-r--r--src/import/curl-util.c416
-rw-r--r--src/import/curl-util.h39
-rw-r--r--src/import/export-raw.c332
-rw-r--r--src/import/export-raw.h18
-rw-r--r--src/import/export-tar.c331
-rw-r--r--src/import/export-tar.h18
-rw-r--r--src/import/export.c298
-rw-r--r--src/import/import-common.c259
-rw-r--r--src/import/import-common.h12
-rw-r--r--src/import/import-compress.c466
-rw-r--r--src/import/import-compress.h47
-rw-r--r--src/import/import-fs.c327
-rw-r--r--src/import/import-pubring.gpgbin0 -> 9551 bytes
-rw-r--r--src/import/import-raw.c438
-rw-r--r--src/import/import-raw.h18
-rw-r--r--src/import/import-tar.c369
-rw-r--r--src/import/import-tar.h18
-rw-r--r--src/import/import.c322
-rw-r--r--src/import/importd.c1266
-rw-r--r--src/import/meson.build79
-rw-r--r--src/import/org.freedesktop.import1.conf84
-rw-r--r--src/import/org.freedesktop.import1.policy51
-rw-r--r--src/import/org.freedesktop.import1.service14
-rw-r--r--src/import/pull-common.c530
-rw-r--r--src/import/pull-common.h18
-rw-r--r--src/import/pull-job.c636
-rw-r--r--src/import/pull-job.h93
-rw-r--r--src/import/pull-raw.c750
-rw-r--r--src/import/pull-raw.h18
-rw-r--r--src/import/pull-tar.c559
-rw-r--r--src/import/pull-tar.h18
-rw-r--r--src/import/pull.c333
-rw-r--r--src/import/qcow2-util.c334
-rw-r--r--src/import/qcow2-util.h5
-rw-r--r--src/import/test-qcow2.c36
-rw-r--r--src/initctl/initctl.c405
-rw-r--r--src/journal-remote/browse.html547
-rw-r--r--src/journal-remote/journal-gatewayd.c1045
-rw-r--r--src/journal-remote/journal-remote-main.c1158
-rw-r--r--src/journal-remote/journal-remote-parse.c84
-rw-r--r--src/journal-remote/journal-remote-parse.h20
-rw-r--r--src/journal-remote/journal-remote-write.c105
-rw-r--r--src/journal-remote/journal-remote-write.h39
-rw-r--r--src/journal-remote/journal-remote.c533
-rw-r--r--src/journal-remote/journal-remote.conf.in19
-rw-r--r--src/journal-remote/journal-remote.h65
-rw-r--r--src/journal-remote/journal-upload-journal.c414
-rw-r--r--src/journal-remote/journal-upload.c854
-rw-r--r--src/journal-remote/journal-upload.conf.in18
-rw-r--r--src/journal-remote/journal-upload.h71
-rwxr-xr-xsrc/journal-remote/log-generator.py78
-rw-r--r--src/journal-remote/meson.build71
-rw-r--r--src/journal-remote/microhttpd-util.c312
-rw-r--r--src/journal-remote/microhttpd-util.h78
-rw-r--r--src/journal/audit-type.c6
-rw-r--r--src/journal/audit-type.h23
-rw-r--r--src/journal/audit_type-to-name.awk9
-rw-r--r--src/journal/cat.c168
-rw-r--r--src/journal/catalog.c743
-rw-r--r--src/journal/catalog.h18
-rw-r--r--src/journal/compress.c677
-rw-r--r--src/journal/compress.h67
-rw-r--r--src/journal/fsprg.c378
-rw-r--r--src/journal/fsprg.h62
-rwxr-xr-xsrc/journal/generate-audit_type-list.sh15
-rw-r--r--src/journal/journal-authenticate.c534
-rw-r--r--src/journal/journal-authenticate.h23
-rw-r--r--src/journal/journal-def.h230
-rw-r--r--src/journal/journal-file.c3908
-rw-r--r--src/journal/journal-file.h261
-rw-r--r--src/journal/journal-internal.h128
-rw-r--r--src/journal/journal-qrcode.c121
-rw-r--r--src/journal/journal-qrcode.h9
-rw-r--r--src/journal/journal-send.c543
-rw-r--r--src/journal/journal-vacuum.c321
-rw-r--r--src/journal/journal-vacuum.h9
-rw-r--r--src/journal/journal-verify.c1320
-rw-r--r--src/journal/journal-verify.h6
-rw-r--r--src/journal/journalctl.c2706
-rw-r--r--src/journal/journald-audit.c545
-rw-r--r--src/journal/journald-audit.h11
-rw-r--r--src/journal/journald-console.c103
-rw-r--r--src/journal/journald-console.h6
-rw-r--r--src/journal/journald-context.c782
-rw-r--r--src/journal/journald-context.h98
-rw-r--r--src/journal/journald-gperf.gperf51
-rw-r--r--src/journal/journald-kmsg.c450
-rw-r--r--src/journal/journald-kmsg.h13
-rw-r--r--src/journal/journald-native.c502
-rw-r--r--src/journal/journald-native.h23
-rw-r--r--src/journal/journald-rate-limit.c255
-rw-r--r--src/journal/journald-rate-limit.h10
-rw-r--r--src/journal/journald-server.c2192
-rw-r--r--src/journal/journald-server.h214
-rw-r--r--src/journal/journald-stream.c876
-rw-r--r--src/journal/journald-stream.h15
-rw-r--r--src/journal/journald-syslog.c516
-rw-r--r--src/journal/journald-syslog.h15
-rw-r--r--src/journal/journald-wall.c54
-rw-r--r--src/journal/journald-wall.h6
-rw-r--r--src/journal/journald.c112
-rw-r--r--src/journal/journald.conf43
-rw-r--r--src/journal/lookup3.c1005
-rw-r--r--src/journal/lookup3.h22
-rw-r--r--src/journal/meson.build130
-rw-r--r--src/journal/mmap-cache.c668
-rw-r--r--src/journal/mmap-cache.h34
-rw-r--r--src/journal/sd-journal.c3137
-rw-r--r--src/journal/test-audit-type.c25
-rw-r--r--src/journal/test-catalog.c240
-rw-r--r--src/journal/test-compress-benchmark.c175
-rw-r--r--src/journal/test-compress.c341
-rw-r--r--src/journal/test-journal-config.c53
-rw-r--r--src/journal/test-journal-enum.c37
-rw-r--r--src/journal/test-journal-flush.c58
-rw-r--r--src/journal/test-journal-init.c48
-rw-r--r--src/journal/test-journal-interleaving.c288
-rw-r--r--src/journal/test-journal-match.c62
-rw-r--r--src/journal/test-journal-send.c85
-rw-r--r--src/journal/test-journal-stream.c180
-rw-r--r--src/journal/test-journal-syslog.c59
-rw-r--r--src/journal/test-journal-verify.c134
-rw-r--r--src/journal/test-journal.c252
-rw-r--r--src/journal/test-mmap-cache.c66
-rw-r--r--src/kernel-install/50-depmod.install17
-rw-r--r--src/kernel-install/90-loaderentry.install124
-rw-r--r--src/kernel-install/kernel-install171
-rw-r--r--src/kernel-install/meson.build13
-rw-r--r--src/libsystemd-network/arp-util.c137
-rw-r--r--src/libsystemd-network/arp-util.h18
-rw-r--r--src/libsystemd-network/dhcp-identifier.c209
-rw-r--r--src/libsystemd-network/dhcp-identifier.h60
-rw-r--r--src/libsystemd-network/dhcp-internal.h55
-rw-r--r--src/libsystemd-network/dhcp-lease-internal.h91
-rw-r--r--src/libsystemd-network/dhcp-network.c230
-rw-r--r--src/libsystemd-network/dhcp-option.c274
-rw-r--r--src/libsystemd-network/dhcp-packet.c168
-rw-r--r--src/libsystemd-network/dhcp-protocol.h99
-rw-r--r--src/libsystemd-network/dhcp-server-internal.h82
-rw-r--r--src/libsystemd-network/dhcp6-internal.h107
-rw-r--r--src/libsystemd-network/dhcp6-lease-internal.h61
-rw-r--r--src/libsystemd-network/dhcp6-network.c75
-rw-r--r--src/libsystemd-network/dhcp6-option.c592
-rw-r--r--src/libsystemd-network/dhcp6-protocol.h99
-rw-r--r--src/libsystemd-network/icmp6-util.c217
-rw-r--r--src/libsystemd-network/icmp6-util.h24
-rw-r--r--src/libsystemd-network/lldp-internal.h39
-rw-r--r--src/libsystemd-network/lldp-neighbor.c769
-rw-r--r--src/libsystemd-network/lldp-neighbor.h91
-rw-r--r--src/libsystemd-network/lldp-network.c78
-rw-r--r--src/libsystemd-network/lldp-network.h6
-rw-r--r--src/libsystemd-network/meson.build50
-rw-r--r--src/libsystemd-network/ndisc-internal.h43
-rw-r--r--src/libsystemd-network/ndisc-router.c749
-rw-r--r--src/libsystemd-network/ndisc-router.h48
-rw-r--r--src/libsystemd-network/network-internal.c620
-rw-r--r--src/libsystemd-network/network-internal.h56
-rw-r--r--src/libsystemd-network/radv-internal.h103
-rw-r--r--src/libsystemd-network/sd-dhcp-client.c1945
-rw-r--r--src/libsystemd-network/sd-dhcp-lease.c1309
-rw-r--r--src/libsystemd-network/sd-dhcp-server.c1136
-rw-r--r--src/libsystemd-network/sd-dhcp6-client.c1516
-rw-r--r--src/libsystemd-network/sd-dhcp6-lease.c408
-rw-r--r--src/libsystemd-network/sd-ipv4acd.c483
-rw-r--r--src/libsystemd-network/sd-ipv4ll.c320
-rw-r--r--src/libsystemd-network/sd-lldp.c496
-rw-r--r--src/libsystemd-network/sd-ndisc.c388
-rw-r--r--src/libsystemd-network/sd-radv.c771
-rw-r--r--src/libsystemd-network/test-acd.c96
-rw-r--r--src/libsystemd-network/test-dhcp-client.c579
-rw-r--r--src/libsystemd-network/test-dhcp-option.c367
-rw-r--r--src/libsystemd-network/test-dhcp-server.c244
-rw-r--r--src/libsystemd-network/test-dhcp6-client.c949
-rw-r--r--src/libsystemd-network/test-ipv4ll-manual.c110
-rw-r--r--src/libsystemd-network/test-ipv4ll.c205
-rw-r--r--src/libsystemd-network/test-lldp.c378
-rw-r--r--src/libsystemd-network/test-ndisc-ra.c369
-rw-r--r--src/libsystemd-network/test-ndisc-rs.c417
-rw-r--r--src/libsystemd-network/test-sd-dhcp-lease.c90
-rw-r--r--src/libsystemd/disable-mempool.c5
-rw-r--r--src/libsystemd/libsystemd.pc.in20
-rw-r--r--src/libsystemd/libsystemd.sym678
-rw-r--r--src/libsystemd/meson.build116
-rw-r--r--src/libsystemd/sd-bus/GVARIANT-SERIALIZATION105
-rw-r--r--src/libsystemd/sd-bus/bus-common-errors.c105
-rw-r--r--src/libsystemd/sd-bus/bus-common-errors.h81
-rw-r--r--src/libsystemd/sd-bus/bus-container.c93
-rw-r--r--src/libsystemd/sd-bus/bus-container.h6
-rw-r--r--src/libsystemd/sd-bus/bus-control.c930
-rw-r--r--src/libsystemd/sd-bus/bus-control.h9
-rw-r--r--src/libsystemd/sd-bus/bus-convenience.c669
-rw-r--r--src/libsystemd/sd-bus/bus-creds.c1338
-rw-r--r--src/libsystemd/sd-bus/bus-creds.h72
-rw-r--r--src/libsystemd/sd-bus/bus-dump.c593
-rw-r--r--src/libsystemd/sd-bus/bus-dump.h19
-rw-r--r--src/libsystemd/sd-bus/bus-error.c610
-rw-r--r--src/libsystemd/sd-bus/bus-error.h48
-rw-r--r--src/libsystemd/sd-bus/bus-gvariant.c299
-rw-r--r--src/libsystemd/sd-bus/bus-gvariant.h12
-rw-r--r--src/libsystemd/sd-bus/bus-internal.c342
-rw-r--r--src/libsystemd/sd-bus/bus-internal.h407
-rw-r--r--src/libsystemd/sd-bus/bus-introspect.c199
-rw-r--r--src/libsystemd/sd-bus/bus-introspect.h22
-rw-r--r--src/libsystemd/sd-bus/bus-kernel.c50
-rw-r--r--src/libsystemd/sd-bus/bus-kernel.h24
-rw-r--r--src/libsystemd/sd-bus/bus-match.c1097
-rw-r--r--src/libsystemd/sd-bus/bus-match.h80
-rw-r--r--src/libsystemd/sd-bus/bus-message.c5846
-rw-r--r--src/libsystemd/sd-bus/bus-message.h213
-rw-r--r--src/libsystemd/sd-bus/bus-objects.c2816
-rw-r--r--src/libsystemd/sd-bus/bus-objects.h7
-rw-r--r--src/libsystemd/sd-bus/bus-protocol.h162
-rw-r--r--src/libsystemd/sd-bus/bus-signature.c148
-rw-r--r--src/libsystemd/sd-bus/bus-signature.h10
-rw-r--r--src/libsystemd/sd-bus/bus-slot.c311
-rw-r--r--src/libsystemd/sd-bus/bus-slot.h10
-rw-r--r--src/libsystemd/sd-bus/bus-socket.c1292
-rw-r--r--src/libsystemd/sd-bus/bus-socket.h20
-rw-r--r--src/libsystemd/sd-bus/bus-track.c492
-rw-r--r--src/libsystemd/sd-bus/bus-track.h5
-rw-r--r--src/libsystemd/sd-bus/bus-type.c137
-rw-r--r--src/libsystemd/sd-bus/bus-type.h16
-rw-r--r--src/libsystemd/sd-bus/sd-bus.c4146
-rw-r--r--src/libsystemd/sd-bus/test-bus-address.c69
-rw-r--r--src/libsystemd/sd-bus/test-bus-benchmark.c323
-rw-r--r--src/libsystemd/sd-bus/test-bus-chat.c547
-rw-r--r--src/libsystemd/sd-bus/test-bus-cleanup.c80
-rw-r--r--src/libsystemd/sd-bus/test-bus-creds.c35
-rw-r--r--src/libsystemd/sd-bus/test-bus-error.c215
-rw-r--r--src/libsystemd/sd-bus/test-bus-gvariant.c221
-rw-r--r--src/libsystemd/sd-bus/test-bus-introspect.c46
-rw-r--r--src/libsystemd/sd-bus/test-bus-marshal.c420
-rw-r--r--src/libsystemd/sd-bus/test-bus-match.c147
-rw-r--r--src/libsystemd/sd-bus/test-bus-objects.c536
-rw-r--r--src/libsystemd/sd-bus/test-bus-server.c198
-rw-r--r--src/libsystemd/sd-bus/test-bus-signature.c147
-rw-r--r--src/libsystemd/sd-bus/test-bus-track.c108
l---------src/libsystemd/sd-bus/test-bus-vtable-cc.cc1
-rw-r--r--src/libsystemd/sd-bus/test-bus-vtable.c81
-rw-r--r--src/libsystemd/sd-bus/test-bus-watch-bind.c221
-rw-r--r--src/libsystemd/sd-daemon/sd-daemon.c660
-rw-r--r--src/libsystemd/sd-device/device-enumerator-private.h17
-rw-r--r--src/libsystemd/sd-device/device-enumerator.c1004
-rw-r--r--src/libsystemd/sd-device/device-internal.h113
-rw-r--r--src/libsystemd/sd-device/device-monitor-private.h20
-rw-r--r--src/libsystemd/sd-device/device-monitor.c756
-rw-r--r--src/libsystemd/sd-device/device-private.c956
-rw-r--r--src/libsystemd/sd-device/device-private.h56
-rw-r--r--src/libsystemd/sd-device/device-util.h57
-rw-r--r--src/libsystemd/sd-device/sd-device.c1864
-rw-r--r--src/libsystemd/sd-device/test-sd-device-monitor.c218
-rw-r--r--src/libsystemd/sd-device/test-sd-device-thread.c39
-rw-r--r--src/libsystemd/sd-device/test-sd-device.c156
-rw-r--r--src/libsystemd/sd-device/test-udev-device-thread.c36
-rw-r--r--src/libsystemd/sd-event/event-source.h206
-rw-r--r--src/libsystemd/sd-event/event-util.c99
-rw-r--r--src/libsystemd/sd-event/event-util.h13
-rw-r--r--src/libsystemd/sd-event/sd-event.c3541
-rw-r--r--src/libsystemd/sd-event/test-event.c495
-rw-r--r--src/libsystemd/sd-hwdb/hwdb-internal.h65
-rw-r--r--src/libsystemd/sd-hwdb/hwdb-util.c686
-rw-r--r--src/libsystemd/sd-hwdb/hwdb-util.h10
-rw-r--r--src/libsystemd/sd-hwdb/sd-hwdb.c468
-rw-r--r--src/libsystemd/sd-id128/id128-util.c193
-rw-r--r--src/libsystemd/sd-id128/id128-util.h33
-rw-r--r--src/libsystemd/sd-id128/sd-id128.c340
-rw-r--r--src/libsystemd/sd-login/sd-login.c1060
-rw-r--r--src/libsystemd/sd-login/test-login.c287
-rw-r--r--src/libsystemd/sd-netlink/generic-netlink.c97
-rw-r--r--src/libsystemd/sd-netlink/local-addresses.c253
-rw-r--r--src/libsystemd/sd-netlink/local-addresses.h17
-rw-r--r--src/libsystemd/sd-netlink/netlink-internal.h149
-rw-r--r--src/libsystemd/sd-netlink/netlink-message.c1042
-rw-r--r--src/libsystemd/sd-netlink/netlink-slot.c202
-rw-r--r--src/libsystemd/sd-netlink/netlink-slot.h14
-rw-r--r--src/libsystemd/sd-netlink/netlink-socket.c458
-rw-r--r--src/libsystemd/sd-netlink/netlink-types.c949
-rw-r--r--src/libsystemd/sd-netlink/netlink-types.h88
-rw-r--r--src/libsystemd/sd-netlink/netlink-util.c108
-rw-r--r--src/libsystemd/sd-netlink/netlink-util.h60
-rw-r--r--src/libsystemd/sd-netlink/rtnl-message.c963
-rw-r--r--src/libsystemd/sd-netlink/sd-netlink.c907
-rw-r--r--src/libsystemd/sd-netlink/test-local-addresses.c42
-rw-r--r--src/libsystemd/sd-netlink/test-netlink.c567
-rw-r--r--src/libsystemd/sd-network/network-util.c20
-rw-r--r--src/libsystemd/sd-network/network-util.h6
-rw-r--r--src/libsystemd/sd-network/sd-network.c409
-rw-r--r--src/libsystemd/sd-path/sd-path.c639
-rw-r--r--src/libsystemd/sd-resolve/resolve-private.h39
-rw-r--r--src/libsystemd/sd-resolve/sd-resolve.c1290
-rw-r--r--src/libsystemd/sd-resolve/test-resolve.c109
-rw-r--r--src/libsystemd/sd-utf8/sd-utf8.c18
-rw-r--r--src/libudev/libudev-device-internal.h39
-rw-r--r--src/libudev/libudev-device.c836
-rw-r--r--src/libudev/libudev-enumerate.c392
-rw-r--r--src/libudev/libudev-hwdb.c119
-rw-r--r--src/libudev/libudev-list-internal.h21
-rw-r--r--src/libudev/libudev-list.c301
-rw-r--r--src/libudev/libudev-monitor.c309
-rw-r--r--src/libudev/libudev-queue.c236
-rw-r--r--src/libudev/libudev-util.c214
-rw-r--r--src/libudev/libudev-util.h24
-rw-r--r--src/libudev/libudev.c156
-rw-r--r--src/libudev/libudev.h189
-rw-r--r--src/libudev/libudev.pc.in19
-rw-r--r--src/libudev/libudev.sym120
-rw-r--r--src/libudev/meson.build29
-rw-r--r--src/locale/kbd-model-map68
-rw-r--r--src/locale/keymap-util.c788
-rw-r--r--src/locale/keymap-util.h44
-rw-r--r--src/locale/language-fallback-map13
-rw-r--r--src/locale/localectl.c527
-rw-r--r--src/locale/localed.c758
-rw-r--r--src/locale/meson.build38
-rw-r--r--src/locale/org.freedesktop.locale1.conf29
-rw-r--r--src/locale/org.freedesktop.locale1.policy42
-rw-r--r--src/locale/org.freedesktop.locale1.service14
-rw-r--r--src/locale/test-keymap-util.c203
-rw-r--r--src/login/70-power-switch.rules15
-rw-r--r--src/login/70-uaccess.rules.m484
-rw-r--r--src/login/71-seat.rules.in56
-rw-r--r--src/login/73-seat-late.rules.m420
-rw-r--r--src/login/inhibit.c321
-rw-r--r--src/login/loginctl.c1553
-rw-r--r--src/login/logind-acl.c254
-rw-r--r--src/login/logind-acl.h34
-rw-r--r--src/login/logind-action.c183
-rw-r--r--src/login/logind-action.h36
-rw-r--r--src/login/logind-button.c377
-rw-r--r--src/login/logind-button.h26
-rw-r--r--src/login/logind-core.c843
-rw-r--r--src/login/logind-dbus.c3215
-rw-r--r--src/login/logind-device.c106
-rw-r--r--src/login/logind-device.h25
-rw-r--r--src/login/logind-gperf.gperf44
-rw-r--r--src/login/logind-inhibit.c471
-rw-r--r--src/login/logind-inhibit.h70
-rw-r--r--src/login/logind-seat-dbus.c393
-rw-r--r--src/login/logind-seat.c669
-rw-r--r--src/login/logind-seat.h79
-rw-r--r--src/login/logind-session-dbus.c764
-rw-r--r--src/login/logind-session-device.c532
-rw-r--r--src/login/logind-session-device.h39
-rw-r--r--src/login/logind-session.c1420
-rw-r--r--src/login/logind-session.h185
-rw-r--r--src/login/logind-user-dbus.c362
-rw-r--r--src/login/logind-user.c859
-rw-r--r--src/login/logind-user.h86
-rw-r--r--src/login/logind-utmp.c171
-rw-r--r--src/login/logind.c1245
-rw-r--r--src/login/logind.conf.in37
-rw-r--r--src/login/logind.h193
-rw-r--r--src/login/meson.build116
-rw-r--r--src/login/org.freedesktop.login1.conf292
-rw-r--r--src/login/org.freedesktop.login1.policy361
-rw-r--r--src/login/org.freedesktop.login1.service14
-rw-r--r--src/login/pam_systemd.c770
-rw-r--r--src/login/pam_systemd.sym15
-rw-r--r--src/login/sysfs-show.c166
-rw-r--r--src/login/sysfs-show.h8
-rw-r--r--src/login/systemd-user.m412
-rw-r--r--src/login/test-inhibit.c95
-rw-r--r--src/login/test-login-shared.c22
-rw-r--r--src/login/test-login-tables.c17
-rw-r--r--src/login/user-runtime-dir.c198
-rw-r--r--src/machine-id-setup/machine-id-setup-main.c142
-rw-r--r--src/machine/image-dbus.c498
-rw-r--r--src/machine/image-dbus.h21
-rw-r--r--src/machine/machine-dbus.c1469
-rw-r--r--src/machine/machine-dbus.h27
-rw-r--r--src/machine/machine.c709
-rw-r--r--src/machine/machine.h94
-rw-r--r--src/machine/machinectl.c3127
-rw-r--r--src/machine/machined-core.c36
-rw-r--r--src/machine/machined-dbus.c1583
-rw-r--r--src/machine/machined.c390
-rw-r--r--src/machine/machined.h59
-rw-r--r--src/machine/meson.build42
-rw-r--r--src/machine/operation.c138
-rw-r--r--src/machine/operation.h31
-rw-r--r--src/machine/org.freedesktop.machine1.conf238
-rw-r--r--src/machine/org.freedesktop.machine1.policy104
-rw-r--r--src/machine/org.freedesktop.machine1.service14
-rw-r--r--src/machine/test-machine-tables.c12
-rw-r--r--src/modules-load/modules-load.c227
-rw-r--r--src/mount/mount-tool.c1620
-rw-r--r--src/network/fuzz-netdev-parser.c25
-rw-r--r--src/network/fuzz-network-parser.c25
-rw-r--r--src/network/meson.build191
-rw-r--r--src/network/netdev/bond.c561
-rw-r--r--src/network/netdev/bond.h163
-rw-r--r--src/network/netdev/bridge.c163
-rw-r--r--src/network/netdev/bridge.h24
-rw-r--r--src/network/netdev/dummy.c9
-rw-r--r--src/network/netdev/dummy.h11
-rw-r--r--src/network/netdev/fou-tunnel.c129
-rw-r--r--src/network/netdev/fou-tunnel.h36
-rw-r--r--src/network/netdev/geneve.c299
-rw-r--r--src/network/netdev/geneve.h38
-rw-r--r--src/network/netdev/ipvlan.c73
-rw-r--r--src/network/netdev/ipvlan.h42
-rw-r--r--src/network/netdev/macvlan.c72
-rw-r--r--src/network/netdev/macvlan.h31
-rw-r--r--src/network/netdev/netdev-gperf.gperf174
-rw-r--r--src/network/netdev/netdev.c816
-rw-r--r--src/network/netdev/netdev.h195
-rw-r--r--src/network/netdev/netdevsim.c10
-rw-r--r--src/network/netdev/netdevsim.h13
-rw-r--r--src/network/netdev/tunnel.c936
-rw-r--r--src/network/netdev/tunnel.h91
-rw-r--r--src/network/netdev/tuntap.c161
-rw-r--r--src/network/netdev/tuntap.h22
-rw-r--r--src/network/netdev/vcan.c9
-rw-r--r--src/network/netdev/vcan.h16
-rw-r--r--src/network/netdev/veth.c95
-rw-r--r--src/network/netdev/veth.h16
-rw-r--r--src/network/netdev/vlan.c92
-rw-r--r--src/network/netdev/vlan.h20
-rw-r--r--src/network/netdev/vrf.c33
-rw-r--r--src/network/netdev/vrf.h15
-rw-r--r--src/network/netdev/vxcan.c76
-rw-r--r--src/network/netdev/vxcan.h16
-rw-r--r--src/network/netdev/vxlan.c311
-rw-r--r--src/network/netdev/vxlan.h52
-rw-r--r--src/network/netdev/wireguard.c778
-rw-r--r--src/network/netdev/wireguard.h73
-rw-r--r--src/network/networkctl.c1183
-rw-r--r--src/network/networkd-address-label.c229
-rw-r--r--src/network/networkd-address-label.h38
-rw-r--r--src/network/networkd-address-pool.c156
-rw-r--r--src/network/networkd-address-pool.h26
-rw-r--r--src/network/networkd-address.c969
-rw-r--r--src/network/networkd-address.h69
-rw-r--r--src/network/networkd-brvlan.c340
-rw-r--r--src/network/networkd-brvlan.h18
-rw-r--r--src/network/networkd-conf.c166
-rw-r--r--src/network/networkd-conf.h17
-rw-r--r--src/network/networkd-dhcp4.c822
-rw-r--r--src/network/networkd-dhcp6.c702
-rw-r--r--src/network/networkd-fdb.c260
-rw-r--r--src/network/networkd-fdb.h34
-rw-r--r--src/network/networkd-gperf.gperf22
-rw-r--r--src/network/networkd-ipv4ll.c227
-rw-r--r--src/network/networkd-ipv6-proxy-ndp.c204
-rw-r--r--src/network/networkd-ipv6-proxy-ndp.h26
-rw-r--r--src/network/networkd-link-bus.c122
-rw-r--r--src/network/networkd-link.c4184
-rw-r--r--src/network/networkd-link.h216
-rw-r--r--src/network/networkd-lldp-tx.c399
-rw-r--r--src/network/networkd-lldp-tx.h18
-rw-r--r--src/network/networkd-manager-bus.c33
-rw-r--r--src/network/networkd-manager.c1860
-rw-r--r--src/network/networkd-manager.h98
-rw-r--r--src/network/networkd-ndisc.c671
-rw-r--r--src/network/networkd-ndisc.h22
-rw-r--r--src/network/networkd-neighbor.c237
-rw-r--r--src/network/networkd-neighbor.h37
-rw-r--r--src/network/networkd-network-bus.c137
-rw-r--r--src/network/networkd-network-gperf.gperf201
-rw-r--r--src/network/networkd-network.c1507
-rw-r--r--src/network/networkd-network.h335
-rw-r--r--src/network/networkd-radv.c512
-rw-r--r--src/network/networkd-radv.h36
-rw-r--r--src/network/networkd-route.c1192
-rw-r--r--src/network/networkd-route.h73
-rw-r--r--src/network/networkd-routing-policy-rule.c1274
-rw-r--r--src/network/networkd-routing-policy-rule.h84
-rw-r--r--src/network/networkd-util.c104
-rw-r--r--src/network/networkd-util.h23
-rw-r--r--src/network/networkd.c117
-rw-r--r--src/network/networkd.conf16
-rw-r--r--src/network/org.freedesktop.network1.conf42
-rw-r--r--src/network/org.freedesktop.network1.service14
-rw-r--r--src/network/systemd-networkd.pkla4
-rw-r--r--src/network/systemd-networkd.rules10
-rw-r--r--src/network/test-network-tables.c47
-rw-r--r--src/network/test-network.c249
-rw-r--r--src/network/test-networkd-conf.c223
-rw-r--r--src/network/test-routing-policy-rule.c92
-rw-r--r--src/network/wait-online/link.c116
-rw-r--r--src/network/wait-online/link.h26
-rw-r--r--src/network/wait-online/manager.c316
-rw-r--r--src/network/wait-online/manager.h35
-rw-r--r--src/network/wait-online/wait-online.c152
-rw-r--r--src/notify/notify.c216
-rw-r--r--src/nspawn/meson.build56
-rw-r--r--src/nspawn/nspawn-cgroup.c608
-rw-r--r--src/nspawn/nspawn-cgroup.h14
-rw-r--r--src/nspawn/nspawn-def.h8
-rw-r--r--src/nspawn/nspawn-expose-ports.c227
-rw-r--r--src/nspawn/nspawn-expose-ports.h26
-rw-r--r--src/nspawn/nspawn-gperf.gperf76
-rw-r--r--src/nspawn/nspawn-mount.c1075
-rw-r--r--src/nspawn/nspawn-mount.h58
-rw-r--r--src/nspawn/nspawn-network.c661
-rw-r--r--src/nspawn/nspawn-network.h21
-rw-r--r--src/nspawn/nspawn-patch-uid.c490
-rw-r--r--src/nspawn/nspawn-patch-uid.h7
-rw-r--r--src/nspawn/nspawn-register.c388
-rw-r--r--src/nspawn/nspawn-register.h14
-rw-r--r--src/nspawn/nspawn-seccomp.c240
-rw-r--r--src/nspawn/nspawn-seccomp.h6
-rw-r--r--src/nspawn/nspawn-settings.c807
-rw-r--r--src/nspawn/nspawn-settings.h192
-rw-r--r--src/nspawn/nspawn-setuid.c225
-rw-r--r--src/nspawn/nspawn-setuid.h4
-rw-r--r--src/nspawn/nspawn-stub-pid1.c191
-rw-r--r--src/nspawn/nspawn-stub-pid1.h6
-rw-r--r--src/nspawn/nspawn.c4613
-rw-r--r--src/nspawn/test-nspawn-tables.c11
-rw-r--r--src/nspawn/test-patch-uid.c43
-rw-r--r--src/nss-myhostname/nss-myhostname.c501
-rw-r--r--src/nss-myhostname/nss-myhostname.sym19
-rw-r--r--src/nss-mymachines/nss-mymachines.c776
-rw-r--r--src/nss-mymachines/nss-mymachines.sym21
-rw-r--r--src/nss-resolve/nss-resolve.c657
-rw-r--r--src/nss-resolve/nss-resolve.sym19
-rw-r--r--src/nss-systemd/nss-systemd.c834
-rw-r--r--src/nss-systemd/nss-systemd.sym23
-rw-r--r--src/partition/growfs.c328
-rw-r--r--src/partition/makefs.c83
-rw-r--r--src/path/path.c195
-rw-r--r--src/portable/meson.build29
-rw-r--r--src/portable/org.freedesktop.portable1.conf117
-rw-r--r--src/portable/org.freedesktop.portable1.policy43
-rw-r--r--src/portable/org.freedesktop.portable1.service7
-rw-r--r--src/portable/portable.c1443
-rw-r--r--src/portable/portable.h74
-rw-r--r--src/portable/portablectl.c969
-rw-r--r--src/portable/portabled-bus.c403
-rw-r--r--src/portable/portabled-bus.h10
-rw-r--r--src/portable/portabled-image-bus.c736
-rw-r--r--src/portable/portabled-image-bus.h41
-rw-r--r--src/portable/portabled-image.c104
-rw-r--r--src/portable/portabled-image.h12
-rw-r--r--src/portable/portabled-operation.c128
-rw-r--r--src/portable/portabled-operation.h29
-rw-r--r--src/portable/portabled.c163
-rw-r--r--src/portable/portabled.h25
-rw-r--r--src/portable/profile/default/service.conf32
-rw-r--r--src/portable/profile/nonetwork/service.conf32
-rw-r--r--src/portable/profile/strict/service.conf31
-rw-r--r--src/portable/profile/trusted/service.conf7
-rw-r--r--src/quotacheck/quotacheck.c100
-rw-r--r--src/random-seed/random-seed.c177
-rw-r--r--src/rc-local-generator/rc-local-generator.c77
-rw-r--r--src/remount-fs/remount-fs.c156
-rw-r--r--src/reply-password/reply-password.c98
-rw-r--r--src/resolve/RFCs60
-rw-r--r--src/resolve/dns-type.c316
-rw-r--r--src/resolve/dns-type.h144
-rw-r--r--src/resolve/dns_type-to-name.awk11
-rwxr-xr-xsrc/resolve/generate-dns_type-gperf.py24
-rw-r--r--src/resolve/generate-dns_type-list.sed1
-rw-r--r--src/resolve/meson.build231
-rw-r--r--src/resolve/org.freedesktop.resolve1.conf27
-rw-r--r--src/resolve/org.freedesktop.resolve1.policy43
-rw-r--r--src/resolve/org.freedesktop.resolve1.service14
-rw-r--r--src/resolve/resolv.conf18
-rw-r--r--src/resolve/resolvconf-compat.c275
-rw-r--r--src/resolve/resolvconf-compat.h4
-rw-r--r--src/resolve/resolvectl.c3163
-rw-r--r--src/resolve/resolvectl.h29
-rw-r--r--src/resolve/resolved-bus.c1951
-rw-r--r--src/resolve/resolved-bus.h10
-rw-r--r--src/resolve/resolved-conf.c403
-rw-r--r--src/resolve/resolved-conf.h39
-rw-r--r--src/resolve/resolved-def.h27
-rw-r--r--src/resolve/resolved-dns-answer.c757
-rw-r--r--src/resolve/resolved-dns-answer.h129
-rw-r--r--src/resolve/resolved-dns-cache.c1106
-rw-r--r--src/resolve/resolved-dns-cache.h34
-rw-r--r--src/resolve/resolved-dns-dnssec.c2299
-rw-r--r--src/resolve/resolved-dns-dnssec.h83
-rw-r--r--src/resolve/resolved-dns-packet.c2380
-rw-r--r--src/resolve/resolved-dns-packet.h302
-rw-r--r--src/resolve/resolved-dns-query.c1109
-rw-r--r--src/resolve/resolved-dns-query.h125
-rw-r--r--src/resolve/resolved-dns-question.c438
-rw-r--r--src/resolve/resolved-dns-question.h55
-rw-r--r--src/resolve/resolved-dns-rr.c1819
-rw-r--r--src/resolve/resolved-dns-rr.h338
-rw-r--r--src/resolve/resolved-dns-scope.c1461
-rw-r--r--src/resolve/resolved-dns-scope.h112
-rw-r--r--src/resolve/resolved-dns-search-domain.c192
-rw-r--r--src/resolve/resolved-dns-search-domain.h56
-rw-r--r--src/resolve/resolved-dns-server.c915
-rw-r--r--src/resolve/resolved-dns-server.h155
-rw-r--r--src/resolve/resolved-dns-stream.c567
-rw-r--r--src/resolve/resolved-dns-stream.h92
-rw-r--r--src/resolve/resolved-dns-stub.c586
-rw-r--r--src/resolve/resolved-dns-stub.h7
-rw-r--r--src/resolve/resolved-dns-synthesize.c434
-rw-r--r--src/resolve/resolved-dns-synthesize.h12
-rw-r--r--src/resolve/resolved-dns-transaction.c3277
-rw-r--r--src/resolve/resolved-dns-transaction.h178
-rw-r--r--src/resolve/resolved-dns-trust-anchor.c789
-rw-r--r--src/resolve/resolved-dns-trust-anchor.h25
-rw-r--r--src/resolve/resolved-dns-zone.c704
-rw-r--r--src/resolve/resolved-dns-zone.h69
-rw-r--r--src/resolve/resolved-dnssd-bus.c128
-rw-r--r--src/resolve/resolved-dnssd-bus.h10
-rw-r--r--src/resolve/resolved-dnssd-gperf.gperf24
-rw-r--r--src/resolve/resolved-dnssd.c366
-rw-r--r--src/resolve/resolved-dnssd.h59
-rw-r--r--src/resolve/resolved-dnstls-gnutls.c205
-rw-r--r--src/resolve/resolved-dnstls-gnutls.h20
-rw-r--r--src/resolve/resolved-dnstls-openssl.c353
-rw-r--r--src/resolve/resolved-dnstls-openssl.h21
-rw-r--r--src/resolve/resolved-dnstls.h32
-rw-r--r--src/resolve/resolved-etc-hosts.c377
-rw-r--r--src/resolve/resolved-etc-hosts.h25
-rw-r--r--src/resolve/resolved-gperf.gperf29
-rw-r--r--src/resolve/resolved-link-bus.c700
-rw-r--r--src/resolve/resolved-link-bus.h22
-rw-r--r--src/resolve/resolved-link.c1394
-rw-r--r--src/resolve/resolved-link.h112
-rw-r--r--src/resolve/resolved-llmnr.c447
-rw-r--r--src/resolve/resolved-llmnr.h14
-rw-r--r--src/resolve/resolved-manager.c1533
-rw-r--r--src/resolve/resolved-manager.h184
-rw-r--r--src/resolve/resolved-mdns.c481
-rw-r--r--src/resolve/resolved-mdns.h13
-rw-r--r--src/resolve/resolved-resolv-conf.c400
-rw-r--r--src/resolve/resolved-resolv-conf.h8
-rw-r--r--src/resolve/resolved.c87
-rw-r--r--src/resolve/resolved.conf.in24
-rw-r--r--src/resolve/test-dns-packet.c121
-rw-r--r--src/resolve/test-dnssec-complex.c223
-rw-r--r--src/resolve/test-dnssec.c514
-rw-r--r--src/resolve/test-resolve-tables.c54
-rw-r--r--src/resolve/test-resolved-etc-hosts.c151
-rw-r--r--src/resolve/test-resolved-packet.c34
-rw-r--r--src/rfkill/rfkill.c377
-rw-r--r--src/run-generator/run-generator.c136
-rw-r--r--src/run/run.c1655
-rw-r--r--src/shared/acl-util.c406
-rw-r--r--src/shared/acl-util.h30
-rw-r--r--src/shared/acpi-fpdt.c147
-rw-r--r--src/shared/acpi-fpdt.h6
-rw-r--r--src/shared/apparmor-util.c22
-rw-r--r--src/shared/apparmor-util.h6
-rw-r--r--src/shared/ask-password-api.c818
-rw-r--r--src/shared/ask-password-api.h20
-rw-r--r--src/shared/barrier.c394
-rw-r--r--src/shared/barrier.h74
-rw-r--r--src/shared/base-filesystem.c117
-rw-r--r--src/shared/base-filesystem.h6
-rw-r--r--src/shared/bitmap.c218
-rw-r--r--src/shared/bitmap.h31
-rw-r--r--src/shared/blkid-util.h10
-rw-r--r--src/shared/boot-timestamps.c46
-rw-r--r--src/shared/boot-timestamps.h6
-rw-r--r--src/shared/bootspec.c665
-rw-r--r--src/shared/bootspec.h51
-rw-r--r--src/shared/bpf-program.c237
-rw-r--r--src/shared/bpf-program.h42
-rw-r--r--src/shared/bus-unit-util.c2547
-rw-r--r--src/shared/bus-unit-util.h42
-rw-r--r--src/shared/bus-util.c1753
-rw-r--r--src/shared/bus-util.h179
-rw-r--r--src/shared/calendarspec.c1370
-rw-r--r--src/shared/calendarspec.h46
-rw-r--r--src/shared/cgroup-show.c354
-rw-r--r--src/shared/cgroup-show.h24
-rw-r--r--src/shared/clean-ipc.c453
-rw-r--r--src/shared/clean-ipc.h17
-rw-r--r--src/shared/clock-util.c157
-rw-r--r--src/shared/clock-util.h11
-rw-r--r--src/shared/condition.c733
-rw-r--r--src/shared/condition.h96
-rw-r--r--src/shared/conf-parser.c1113
-rw-r--r--src/shared/conf-parser.h289
-rw-r--r--src/shared/cpu-set-util.c99
-rw-r--r--src/shared/cpu-set-util.h35
-rw-r--r--src/shared/crypt-util.c28
-rw-r--r--src/shared/crypt-util.h17
-rw-r--r--src/shared/daemon-util.h22
-rw-r--r--src/shared/dev-setup.c115
-rw-r--r--src/shared/dev-setup.h8
-rw-r--r--src/shared/dissect-image.c1507
-rw-r--r--src/shared/dissect-image.h96
-rw-r--r--src/shared/dns-domain.c1375
-rw-r--r--src/shared/dns-domain.h112
-rw-r--r--src/shared/dropin.c255
-rw-r--r--src/shared/dropin.h39
-rw-r--r--src/shared/efivars.c914
-rw-r--r--src/shared/efivars.h141
-rw-r--r--src/shared/enable-mempool.c5
-rw-r--r--src/shared/env-file-label.c21
-rw-r--r--src/shared/env-file-label.h8
-rw-r--r--src/shared/exec-util.c353
-rw-r--r--src/shared/exec-util.h25
-rw-r--r--src/shared/exit-status.c284
-rw-r--r--src/shared/exit-status.h99
-rw-r--r--src/shared/fdset.c255
-rw-r--r--src/shared/fdset.h40
-rw-r--r--src/shared/fileio-label.c37
-rw-r--r--src/shared/fileio-label.h15
-rw-r--r--src/shared/firewall-util.c350
-rw-r--r--src/shared/firewall-util.h65
-rw-r--r--src/shared/format-table.c1625
-rw-r--r--src/shared/format-table.h78
-rw-r--r--src/shared/fstab-util.c268
-rw-r--r--src/shared/fstab-util.h35
-rwxr-xr-xsrc/shared/generate-ip-protocol-list.sh6
-rw-r--r--src/shared/generator.c504
-rw-r--r--src/shared/generator.h67
-rw-r--r--src/shared/gpt.h64
-rw-r--r--src/shared/id128-print.c65
-rw-r--r--src/shared/id128-print.h10
-rw-r--r--src/shared/ima-util.c15
-rw-r--r--src/shared/ima-util.h6
-rw-r--r--src/shared/import-util.c166
-rw-r--r--src/shared/import-util.h25
-rw-r--r--src/shared/initreq.h73
-rw-r--r--src/shared/install-printf.c148
-rw-r--r--src/shared/install-printf.h6
-rw-r--r--src/shared/install.c3383
-rw-r--r--src/shared/install.h230
-rw-r--r--src/shared/ip-protocol-list.c67
-rw-r--r--src/shared/ip-protocol-list.h6
-rw-r--r--src/shared/ip-protocol-to-name.awk9
-rw-r--r--src/shared/journal-importer.c504
-rw-r--r--src/shared/journal-importer.h64
-rw-r--r--src/shared/journal-util.c174
-rw-r--r--src/shared/journal-util.h11
-rw-r--r--src/shared/json-internal.h63
-rw-r--r--src/shared/json.c3480
-rw-r--r--src/shared/json.h285
-rw-r--r--src/shared/libshared.sym3
-rw-r--r--src/shared/linux-3.13/dm-ioctl.h355
-rw-r--r--src/shared/linux/auto_dev-ioctl.h229
-rw-r--r--src/shared/linux/bpf.h1109
-rw-r--r--src/shared/linux/bpf_common.h55
-rw-r--r--src/shared/linux/libbpf.h207
-rw-r--r--src/shared/lockfile-util.c137
-rw-r--r--src/shared/lockfile-util.h14
-rw-r--r--src/shared/logs-show.c1462
-rw-r--r--src/shared/logs-show.h63
-rw-r--r--src/shared/loop-util.c146
-rw-r--r--src/shared/loop-util.h23
-rw-r--r--src/shared/machine-image.c1249
-rw-r--r--src/shared/machine-image.h112
-rw-r--r--src/shared/machine-pool.c46
-rw-r--r--src/shared/machine-pool.h8
-rw-r--r--src/shared/main-func.h34
-rw-r--r--src/shared/meson.build277
-rw-r--r--src/shared/module-util.c72
-rw-r--r--src/shared/module-util.h12
-rw-r--r--src/shared/mount-util.c570
-rw-r--r--src/shared/mount-util.h34
-rw-r--r--src/shared/nscd-flush.c151
-rw-r--r--src/shared/nscd-flush.h4
-rw-r--r--src/shared/nsflags.c77
-rw-r--r--src/shared/nsflags.h29
-rw-r--r--src/shared/os-util.c117
-rw-r--r--src/shared/os-util.h12
-rw-r--r--src/shared/output-mode.c42
-rw-r--r--src/shared/output-mode.h50
-rw-r--r--src/shared/pager.c292
-rw-r--r--src/shared/pager.h17
-rw-r--r--src/shared/path-lookup.c903
-rw-r--r--src/shared/path-lookup.h74
-rw-r--r--src/shared/pretty-print.c247
-rw-r--r--src/shared/pretty-print.h17
-rw-r--r--src/shared/ptyfwd.c631
-rw-r--r--src/shared/ptyfwd.h42
-rw-r--r--src/shared/reboot-util.c83
-rw-r--r--src/shared/reboot-util.h12
-rw-r--r--src/shared/resolve-util.c29
-rw-r--r--src/shared/resolve-util.h60
-rw-r--r--src/shared/seccomp-util.c1764
-rw-r--r--src/shared/seccomp-util.h98
-rw-r--r--src/shared/securebits-util.c66
-rw-r--r--src/shared/securebits-util.h18
-rw-r--r--src/shared/serialize.c214
-rw-r--r--src/shared/serialize.h25
-rw-r--r--src/shared/sleep-config.c436
-rw-r--r--src/shared/sleep-config.h13
-rw-r--r--src/shared/spawn-ask-password-agent.c48
-rw-r--r--src/shared/spawn-ask-password-agent.h5
-rw-r--r--src/shared/spawn-polkit-agent.c85
-rw-r--r--src/shared/spawn-polkit-agent.h22
-rw-r--r--src/shared/specifier.c299
-rw-r--r--src/shared/specifier.h37
-rw-r--r--src/shared/switch-root.c130
-rw-r--r--src/shared/switch-root.h6
-rw-r--r--src/shared/sysctl-util.c71
-rw-r--r--src/shared/sysctl-util.h7
-rw-r--r--src/shared/test-tables.h44
-rw-r--r--src/shared/tests.c151
-rw-r--r--src/shared/tests.h14
-rw-r--r--src/shared/tmpfile-util-label.c26
-rw-r--r--src/shared/tmpfile-util-label.h10
-rw-r--r--src/shared/tomoyo-util.c15
-rw-r--r--src/shared/tomoyo-util.h6
-rw-r--r--src/shared/udev-util.c171
-rw-r--r--src/shared/udev-util.h29
-rw-r--r--src/shared/uid-range.c186
-rw-r--r--src/shared/uid-range.h15
-rw-r--r--src/shared/utmp-wtmp.c427
-rw-r--r--src/shared/utmp-wtmp.h56
-rw-r--r--src/shared/verbs.c128
-rw-r--r--src/shared/verbs.h23
-rw-r--r--src/shared/vlan-util.c82
-rw-r--r--src/shared/vlan-util.h20
-rw-r--r--src/shared/volatile-util.c44
-rw-r--r--src/shared/volatile-util.h15
-rw-r--r--src/shared/watchdog.c154
-rw-r--r--src/shared/watchdog.h16
-rw-r--r--src/shared/web-util.c53
-rw-r--r--src/shared/web-util.h12
-rw-r--r--src/shared/wireguard-netlink.h179
-rw-r--r--src/shared/xml.c238
-rw-r--r--src/shared/xml.h14
-rw-r--r--src/sleep/sleep.c378
-rw-r--r--src/sleep/sleep.conf25
-rw-r--r--src/socket-proxy/socket-proxyd.c665
-rw-r--r--src/stdio-bridge/stdio-bridge.c256
-rw-r--r--src/sulogin-shell/sulogin-shell.c122
-rw-r--r--src/sysctl/sysctl.c320
-rw-r--r--src/system-update-generator/system-update-generator.c70
-rw-r--r--src/systemctl/systemctl.c8823
-rwxr-xr-xsrc/systemctl/systemd-sysv-install.SKELETON49
-rw-r--r--src/systemd/_sd-common.h90
-rw-r--r--src/systemd/meson.build78
-rw-r--r--src/systemd/sd-bus-protocol.h99
-rw-r--r--src/systemd/sd-bus-vtable.h161
-rw-r--r--src/systemd/sd-bus.h506
-rw-r--r--src/systemd/sd-daemon.h320
-rw-r--r--src/systemd/sd-device.h124
-rw-r--r--src/systemd/sd-dhcp-client.h194
-rw-r--r--src/systemd/sd-dhcp-lease.h66
-rw-r--r--src/systemd/sd-dhcp-server.h62
-rw-r--r--src/systemd/sd-dhcp6-client.h153
-rw-r--r--src/systemd/sd-dhcp6-lease.h55
-rw-r--r--src/systemd/sd-event.h155
-rw-r--r--src/systemd/sd-hwdb.h45
-rw-r--r--src/systemd/sd-id128.h122
-rw-r--r--src/systemd/sd-ipv4acd.h57
-rw-r--r--src/systemd/sd-ipv4ll.h60
-rw-r--r--src/systemd/sd-journal.h172
-rw-r--r--src/systemd/sd-lldp.h190
-rw-r--r--src/systemd/sd-login.h242
-rw-r--r--src/systemd/sd-messages.h158
-rw-r--r--src/systemd/sd-ndisc.h131
-rw-r--r--src/systemd/sd-netlink.h211
-rw-r--r--src/systemd/sd-network.h191
-rw-r--r--src/systemd/sd-path.h89
-rw-r--r--src/systemd/sd-radv.h88
-rw-r--r--src/systemd/sd-resolve.h124
-rw-r--r--src/systemd/sd-utf8.h29
-rw-r--r--src/sysusers/sysusers.c1997
-rw-r--r--src/sysv-generator/sysv-generator.c956
-rwxr-xr-xsrc/test/generate-sym-test.py23
-rw-r--r--src/test/meson.build1067
-rw-r--r--src/test/test-acl-util.c68
-rw-r--r--src/test/test-af-list.c34
-rw-r--r--src/test/test-alloc-util.c81
-rw-r--r--src/test/test-architecture.c52
-rw-r--r--src/test/test-arphrd-list.c34
-rw-r--r--src/test/test-ask-password-api.c23
-rw-r--r--src/test/test-async.c38
-rw-r--r--src/test/test-barrier.c463
-rw-r--r--src/test/test-bitmap.c117
-rw-r--r--src/test/test-boot-timestamps.c89
-rw-r--r--src/test/test-bpf.c178
-rw-r--r--src/test/test-btrfs.c181
-rw-r--r--src/test/test-bus-util.c55
-rw-r--r--src/test/test-calendarspec.c250
-rw-r--r--src/test/test-cap-list.c123
-rw-r--r--src/test/test-capability.c245
-rw-r--r--src/test/test-cgroup-mask.c157
-rw-r--r--src/test/test-cgroup-util.c487
-rw-r--r--src/test/test-cgroup.c87
-rw-r--r--src/test/test-chown-rec.c158
-rw-r--r--src/test/test-clock.c78
-rw-r--r--src/test/test-condition.c694
-rw-r--r--src/test/test-conf-files.c161
-rw-r--r--src/test/test-conf-parser.c388
-rw-r--r--src/test/test-copy.c274
-rw-r--r--src/test/test-cpu-set-util.c126
-rw-r--r--src/test/test-daemon.c56
-rw-r--r--src/test/test-date.c112
-rw-r--r--src/test/test-dev-setup.c62
-rw-r--r--src/test/test-device-nodes.c40
-rw-r--r--src/test/test-dissect-image.c50
-rw-r--r--src/test/test-dlopen.c15
-rw-r--r--src/test/test-dns-domain.c815
-rw-r--r--src/test/test-ellipsize.c127
-rw-r--r--src/test/test-emergency-action.c51
-rw-r--r--src/test/test-engine.c130
-rw-r--r--src/test/test-env-util.c325
-rw-r--r--src/test/test-escape.c131
-rw-r--r--src/test/test-exec-util.c377
-rw-r--r--src/test/test-execute.c832
-rw-r--r--src/test/test-extract-word.c539
-rw-r--r--src/test/test-fd-util.c335
-rw-r--r--src/test/test-fdset.c204
-rw-r--r--src/test/test-fileio.c856
-rw-r--r--src/test/test-firewall-util.c42
-rw-r--r--src/test/test-format-table.c165
-rw-r--r--src/test/test-fs-util.c743
-rw-r--r--src/test/test-fstab-util.c158
-rw-r--r--src/test/test-gcrypt-util.c32
-rw-r--r--src/test/test-glob-util.c103
-rw-r--r--src/test/test-hash.c76
-rw-r--r--src/test/test-hashmap-ordered.awk11
-rw-r--r--src/test/test-hashmap-plain.c1014
-rw-r--r--src/test/test-hashmap.c180
-rw-r--r--src/test/test-helper.c35
-rw-r--r--src/test/test-helper.h31
-rw-r--r--src/test/test-hexdecoct.c355
-rw-r--r--src/test/test-hostname-util.c150
-rw-r--r--src/test/test-hostname.c14
-rw-r--r--src/test/test-id128.c159
-rw-r--r--src/test/test-in-addr-util.c86
-rw-r--r--src/test/test-install-root.c1082
-rw-r--r--src/test/test-install.c273
-rw-r--r--src/test/test-io-util.c52
-rw-r--r--src/test/test-ip-protocol-list.c64
-rw-r--r--src/test/test-ipcrm.c25
-rw-r--r--src/test/test-job-type.c82
-rw-r--r--src/test/test-journal-importer.c78
-rw-r--r--src/test/test-json.c461
-rw-r--r--src/test/test-libudev.c559
-rw-r--r--src/test/test-list.c207
-rw-r--r--src/test/test-locale-util.c104
-rw-r--r--src/test/test-log.c76
-rw-r--r--src/test/test-loopback.c20
-rw-r--r--src/test/test-mount-util.c70
-rw-r--r--src/test/test-mountpoint-util.c267
-rw-r--r--src/test/test-namespace.c136
-rw-r--r--src/test/test-netlink-manual.c127
-rw-r--r--src/test/test-ns.c93
-rw-r--r--src/test/test-nscd-flush.c20
-rw-r--r--src/test/test-nss.c516
-rw-r--r--src/test/test-os-util.c21
-rw-r--r--src/test/test-parse-util.c853
-rw-r--r--src/test/test-path-lookup.c94
-rw-r--r--src/test/test-path-util.c619
-rw-r--r--src/test/test-path.c273
-rw-r--r--src/test/test-pretty-print.c40
-rw-r--r--src/test/test-prioq.c128
-rw-r--r--src/test/test-proc-cmdline.c243
-rw-r--r--src/test/test-process-util.c632
-rw-r--r--src/test/test-procfs-util.c47
-rw-r--r--src/test/test-random-util.c67
-rw-r--r--src/test/test-ratelimit.c29
-rw-r--r--src/test/test-replace-var.c28
-rw-r--r--src/test/test-rlimit-util.c134
-rw-r--r--src/test/test-sched-prio.c79
-rw-r--r--src/test/test-sd-hwdb.c74
-rw-r--r--src/test/test-seccomp.c768
-rw-r--r--src/test/test-selinux.c105
-rw-r--r--src/test/test-serialize.c208
-rw-r--r--src/test/test-set-disable-mempool.c53
-rw-r--r--src/test/test-set.c89
-rw-r--r--src/test/test-sigbus.c59
-rw-r--r--src/test/test-signal-util.c151
-rw-r--r--src/test/test-siphash24.c107
-rw-r--r--src/test/test-sizeof.c76
-rw-r--r--src/test/test-sleep.c107
-rw-r--r--src/test/test-socket-util.c832
-rw-r--r--src/test/test-specifier.c49
-rw-r--r--src/test/test-stat-util.c167
-rw-r--r--src/test/test-static-destruct.c34
-rw-r--r--src/test/test-strbuf.c76
-rw-r--r--src/test/test-string-util.c616
-rw-r--r--src/test/test-strip-tab-ansi.c57
-rw-r--r--src/test/test-strv.c927
-rw-r--r--src/test/test-strxcpyx.c89
-rwxr-xr-xsrc/test/test-systemd-tmpfiles.py143
-rw-r--r--src/test/test-tables.c126
-rw-r--r--src/test/test-terminal-util.c65
-rw-r--r--src/test/test-time-util.c500
-rw-r--r--src/test/test-tmpfiles.c66
-rw-r--r--src/test/test-udev.c132
-rw-r--r--src/test/test-uid-range.c74
-rw-r--r--src/test/test-umount.c65
-rw-r--r--src/test/test-unaligned.c172
-rw-r--r--src/test/test-unit-file.c930
-rw-r--r--src/test/test-unit-name.c848
-rw-r--r--src/test/test-user-util.c225
-rw-r--r--src/test/test-utf8.c166
-rw-r--r--src/test/test-util.c421
-rw-r--r--src/test/test-verbs.c61
-rw-r--r--src/test/test-watch-pid.c88
-rw-r--r--src/test/test-watchdog.c40
-rw-r--r--src/test/test-web-util.c24
-rw-r--r--src/test/test-xattr-util.c88
-rw-r--r--src/test/test-xml.c66
-rw-r--r--src/time-wait-sync/time-wait-sync.c252
-rw-r--r--src/timedate/meson.build10
-rw-r--r--src/timedate/org.freedesktop.timedate1.conf29
-rw-r--r--src/timedate/org.freedesktop.timedate1.policy62
-rw-r--r--src/timedate/org.freedesktop.timedate1.service14
-rw-r--r--src/timedate/timedatectl.c883
-rw-r--r--src/timedate/timedated.c1048
-rw-r--r--src/timesync/meson.build51
-rw-r--r--src/timesync/org.freedesktop.timesync1.conf42
-rw-r--r--src/timesync/org.freedesktop.timesync1.service14
-rw-r--r--src/timesync/test-timesync.c34
-rw-r--r--src/timesync/timesyncd-bus.c201
-rw-r--r--src/timesync/timesyncd-bus.h6
-rw-r--r--src/timesync/timesyncd-conf.c124
-rw-r--r--src/timesync/timesyncd-conf.h14
-rw-r--r--src/timesync/timesyncd-gperf.gperf25
-rw-r--r--src/timesync/timesyncd-manager.c1124
-rw-r--r--src/timesync/timesyncd-manager.h109
-rw-r--r--src/timesync/timesyncd-ntp-message.h45
-rw-r--r--src/timesync/timesyncd-server.c130
-rw-r--r--src/timesync/timesyncd-server.h47
-rw-r--r--src/timesync/timesyncd.c175
-rw-r--r--src/timesync/timesyncd.conf.in19
-rw-r--r--src/tmpfiles/tmpfiles.c3275
-rw-r--r--src/tty-ask-password-agent/tty-ask-password-agent.c869
-rw-r--r--src/udev/.vimrc4
-rw-r--r--src/udev/ata_id/ata_id.c651
-rw-r--r--src/udev/cdrom_id/cdrom_id.c1048
-rwxr-xr-xsrc/udev/generate-keyboard-keys-gperf.sh16
-rwxr-xr-xsrc/udev/generate-keyboard-keys-list.sh7
-rw-r--r--src/udev/meson.build199
-rw-r--r--src/udev/mtd_probe/mtd_probe.c59
-rw-r--r--src/udev/mtd_probe/mtd_probe.h52
-rw-r--r--src/udev/mtd_probe/probe_smartmedia.c97
-rw-r--r--src/udev/net/ethtool-util.c787
-rw-r--r--src/udev/net/ethtool-util.h108
-rw-r--r--src/udev/net/link-config-gperf.gperf54
-rw-r--r--src/udev/net/link-config.c535
-rw-r--r--src/udev/net/link-config.h88
-rw-r--r--src/udev/net/naming-scheme.c64
-rw-r--r--src/udev/net/naming-scheme.h48
-rw-r--r--src/udev/scsi_id/README4
-rw-r--r--src/udev/scsi_id/scsi.h100
-rw-r--r--src/udev/scsi_id/scsi_id.c597
-rw-r--r--src/udev/scsi_id/scsi_id.h63
-rw-r--r--src/udev/scsi_id/scsi_serial.c897
-rw-r--r--src/udev/udev-builtin-blkid.c318
-rw-r--r--src/udev/udev-builtin-btrfs.c40
-rw-r--r--src/udev/udev-builtin-hwdb.c218
-rw-r--r--src/udev/udev-builtin-input_id.c364
-rw-r--r--src/udev/udev-builtin-keyboard.c258
-rw-r--r--src/udev/udev-builtin-kmod.c77
-rw-r--r--src/udev/udev-builtin-net_id.c961
-rw-r--r--src/udev/udev-builtin-net_setup_link.c85
-rw-r--r--src/udev/udev-builtin-path_id.c683
-rw-r--r--src/udev/udev-builtin-uaccess.c80
-rw-r--r--src/udev/udev-builtin-usb_id.c460
-rw-r--r--src/udev/udev-builtin.c145
-rw-r--r--src/udev/udev-builtin.h68
-rw-r--r--src/udev/udev-ctrl.c427
-rw-r--r--src/udev/udev-ctrl.h42
-rw-r--r--src/udev/udev-event.c893
-rw-r--r--src/udev/udev-node.c445
-rw-r--r--src/udev/udev-node.h15
-rw-r--r--src/udev/udev-rules.c2664
-rw-r--r--src/udev/udev-watch.c177
-rw-r--r--src/udev/udev-watch.h10
-rw-r--r--src/udev/udev.conf10
-rw-r--r--src/udev/udev.h82
-rw-r--r--src/udev/udev.pc.in5
-rw-r--r--src/udev/udevadm-control.c169
-rw-r--r--src/udev/udevadm-hwdb.c101
-rw-r--r--src/udev/udevadm-info.c470
-rw-r--r--src/udev/udevadm-monitor.c263
-rw-r--r--src/udev/udevadm-settle.c146
-rw-r--r--src/udev/udevadm-test-builtin.c97
-rw-r--r--src/udev/udevadm-test.c149
-rw-r--r--src/udev/udevadm-trigger.c379
-rw-r--r--src/udev/udevadm-util.c49
-rw-r--r--src/udev/udevadm-util.h6
-rw-r--r--src/udev/udevadm.c127
-rw-r--r--src/udev/udevadm.h22
-rw-r--r--src/udev/udevd.c1855
-rw-r--r--src/udev/v4l_id/v4l_id.c88
-rw-r--r--src/update-done/update-done.c57
-rw-r--r--src/update-utmp/update-utmp.c269
-rw-r--r--src/user-sessions/user-sessions.c41
-rw-r--r--src/vconsole/90-vconsole.rules.in12
-rw-r--r--src/vconsole/meson.build10
-rw-r--r--src/vconsole/vconsole-setup.c477
-rw-r--r--src/veritysetup/veritysetup-generator.c229
-rw-r--r--src/veritysetup/veritysetup.c116
-rw-r--r--src/version/version.h.in1
-rw-r--r--src/volatile-root/volatile-root.c125
1543 files changed, 495196 insertions, 0 deletions
diff --git a/src/ac-power/ac-power.c b/src/ac-power/ac-power.c
new file mode 100644
index 0000000..90ba5d2
--- /dev/null
+++ b/src/ac-power/ac-power.c
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+
+#include "main-func.h"
+#include "util.h"
+
+static bool arg_verbose = false;
+
+static void help(void) {
+ printf("%s\n\n"
+ "Report whether we are connected to an external power source.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -v --verbose Show state as text\n"
+ , program_invocation_short_name);
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "verbose", no_argument, NULL, 'v' },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hv", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ help();
+ return 0;
+
+ case ARG_VERSION:
+ return version();
+
+ case 'v':
+ arg_verbose = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s takes no arguments.",
+ program_invocation_short_name);
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ /* This is mostly intended to be used for scripts which want
+ * to detect whether AC power is plugged in or not. */
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = on_ac_power();
+ if (r < 0)
+ return log_error_errno(r, "Failed to read AC status: %m");
+
+ if (arg_verbose)
+ puts(yes_no(r));
+
+ return r == 0;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/activate/activate.c b/src/activate/activate.c
new file mode 100644
index 0000000..9a83bc7
--- /dev/null
+++ b/src/activate/activate.c
@@ -0,0 +1,509 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <sys/epoll.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+
+static char** arg_listen = NULL;
+static bool arg_accept = false;
+static int arg_socket_type = SOCK_STREAM;
+static char** arg_args = NULL;
+static char** arg_setenv = NULL;
+static char **arg_fdnames = NULL;
+static bool arg_inetd = false;
+
+static int add_epoll(int epoll_fd, int fd) {
+ struct epoll_event ev = {
+ .events = EPOLLIN,
+ .data.fd = fd,
+ };
+
+ assert(epoll_fd >= 0);
+ assert(fd >= 0);
+
+ if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
+ return log_error_errno(errno, "Failed to add event on epoll fd:%d for fd:%d: %m", epoll_fd, fd);
+
+ return 0;
+}
+
+static int open_sockets(int *epoll_fd, bool accept) {
+ char **address;
+ int n, fd, r;
+ int count = 0;
+
+ n = sd_listen_fds(true);
+ if (n < 0)
+ return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
+ if (n > 0) {
+ log_info("Received %i descriptors via the environment.", n);
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
+ r = fd_cloexec(fd, arg_accept);
+ if (r < 0)
+ return r;
+
+ count++;
+ }
+ }
+
+ /* Close logging and all other descriptors */
+ if (arg_listen) {
+ int except[3 + n];
+
+ for (fd = 0; fd < SD_LISTEN_FDS_START + n; fd++)
+ except[fd] = fd;
+
+ log_close();
+ close_all_fds(except, 3 + n);
+ }
+
+ /** Note: we leak some fd's on error here. I doesn't matter
+ * much, since the program will exit immediately anyway, but
+ * would be a pain to fix.
+ */
+
+ STRV_FOREACH(address, arg_listen) {
+ fd = make_socket_fd(LOG_DEBUG, *address, arg_socket_type, (arg_accept*SOCK_CLOEXEC));
+ if (fd < 0) {
+ log_open();
+ return log_error_errno(fd, "Failed to open '%s': %m", *address);
+ }
+
+ assert(fd == SD_LISTEN_FDS_START + count);
+ count++;
+ }
+
+ if (arg_listen)
+ log_open();
+
+ *epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (*epoll_fd < 0)
+ return log_error_errno(errno, "Failed to create epoll object: %m");
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + count; fd++) {
+ _cleanup_free_ char *name = NULL;
+
+ getsockname_pretty(fd, &name);
+ log_info("Listening on %s as %i.", strna(name), fd);
+
+ r = add_epoll(*epoll_fd, fd);
+ if (r < 0)
+ return r;
+ }
+
+ return count;
+}
+
+static int exec_process(const char* name, char **argv, char **env, int start_fd, size_t n_fds) {
+
+ _cleanup_strv_free_ char **envp = NULL;
+ _cleanup_free_ char *joined = NULL;
+ size_t n_env = 0, length;
+ const char *tocopy;
+ char **s;
+ int r;
+
+ if (arg_inetd && n_fds != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--inetd only supported for single file descriptors.");
+
+ length = strv_length(arg_setenv);
+
+ /* PATH, TERM, HOME, USER, LISTEN_FDS, LISTEN_PID, LISTEN_FDNAMES, NULL */
+ envp = new0(char *, length + 8);
+ if (!envp)
+ return log_oom();
+
+ STRV_FOREACH(s, arg_setenv) {
+
+ if (strchr(*s, '=')) {
+ char *k;
+
+ k = strdup(*s);
+ if (!k)
+ return log_oom();
+
+ envp[n_env++] = k;
+ } else {
+ _cleanup_free_ char *p;
+ const char *n;
+
+ p = strappend(*s, "=");
+ if (!p)
+ return log_oom();
+
+ n = strv_find_prefix(env, p);
+ if (!n)
+ continue;
+
+ envp[n_env] = strdup(n);
+ if (!envp[n_env])
+ return log_oom();
+
+ n_env++;
+ }
+ }
+
+ FOREACH_STRING(tocopy, "TERM=", "PATH=", "USER=", "HOME=") {
+ const char *n;
+
+ n = strv_find_prefix(env, tocopy);
+ if (!n)
+ continue;
+
+ envp[n_env] = strdup(n);
+ if (!envp[n_env])
+ return log_oom();
+
+ n_env++;
+ }
+
+ if (arg_inetd) {
+ assert(n_fds == 1);
+
+ r = rearrange_stdio(start_fd, start_fd, STDERR_FILENO); /* invalidates start_fd on success + error */
+ if (r < 0)
+ return log_error_errno(r, "Failed to move fd to stdin+stdout: %m");
+
+ } else {
+ if (start_fd != SD_LISTEN_FDS_START) {
+ assert(n_fds == 1);
+
+ if (dup2(start_fd, SD_LISTEN_FDS_START) < 0)
+ return log_error_errno(errno, "Failed to dup connection: %m");
+
+ safe_close(start_fd);
+ start_fd = SD_LISTEN_FDS_START;
+ }
+
+ if (asprintf((char**)(envp + n_env++), "LISTEN_FDS=%zu", n_fds) < 0)
+ return log_oom();
+
+ if (asprintf((char**)(envp + n_env++), "LISTEN_PID=" PID_FMT, getpid_cached()) < 0)
+ return log_oom();
+
+ if (arg_fdnames) {
+ _cleanup_free_ char *names = NULL;
+ size_t len;
+ char *e;
+
+ len = strv_length(arg_fdnames);
+ if (len == 1) {
+ size_t i;
+
+ for (i = 1; i < n_fds; i++) {
+ r = strv_extend(&arg_fdnames, arg_fdnames[0]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extend strv: %m");
+ }
+ } else if (len != n_fds)
+ log_warning("The number of fd names is different than number of fds: %zu vs %zu", len, n_fds);
+
+ names = strv_join(arg_fdnames, ":");
+ if (!names)
+ return log_oom();
+
+ e = strappend("LISTEN_FDNAMES=", names);
+ if (!e)
+ return log_oom();
+
+ envp[n_env++] = e;
+ }
+ }
+
+ joined = strv_join(argv, " ");
+ if (!joined)
+ return log_oom();
+
+ log_info("Execing %s (%s)", name, joined);
+ execvpe(name, argv, envp);
+
+ return log_error_errno(errno, "Failed to execp %s (%s): %m", name, joined);
+}
+
+static int fork_and_exec_process(const char* child, char** argv, char **env, int fd) {
+ _cleanup_free_ char *joined = NULL;
+ pid_t child_pid;
+ int r;
+
+ joined = strv_join(argv, " ");
+ if (!joined)
+ return log_oom();
+
+ r = safe_fork("(activate)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &child_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* In the child */
+ exec_process(child, argv, env, fd, 1);
+ _exit(EXIT_FAILURE);
+ }
+
+ log_info("Spawned %s (%s) as PID " PID_FMT ".", child, joined, child_pid);
+ return 0;
+}
+
+static int do_accept(const char* name, char **argv, char **envp, int fd) {
+ _cleanup_free_ char *local = NULL, *peer = NULL;
+ _cleanup_close_ int fd_accepted = -1;
+
+ fd_accepted = accept4(fd, NULL, NULL, 0);
+ if (fd_accepted < 0)
+ return log_error_errno(errno, "Failed to accept connection on fd:%d: %m", fd);
+
+ getsockname_pretty(fd_accepted, &local);
+ getpeername_pretty(fd_accepted, true, &peer);
+ log_info("Connection from %s to %s", strna(peer), strna(local));
+
+ return fork_and_exec_process(name, argv, envp, fd_accepted);
+}
+
+/* SIGCHLD handler. */
+static void sigchld_hdl(int sig) {
+ PROTECT_ERRNO;
+
+ for (;;) {
+ siginfo_t si;
+ int r;
+
+ si.si_pid = 0;
+ r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG);
+ if (r < 0) {
+ if (errno != ECHILD)
+ log_error_errno(errno, "Failed to reap children: %m");
+ return;
+ }
+ if (si.si_pid == 0)
+ return;
+
+ log_info("Child %d died with code %d", si.si_pid, si.si_status);
+ }
+}
+
+static int install_chld_handler(void) {
+ static const struct sigaction act = {
+ .sa_flags = SA_NOCLDSTOP|SA_RESTART,
+ .sa_handler = sigchld_hdl,
+ };
+
+ if (sigaction(SIGCHLD, &act, 0) < 0)
+ return log_error_errno(errno, "Failed to install SIGCHLD handler: %m");
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-socket-activate", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Listen on sockets and launch child on connection.\n\n"
+ "Options:\n"
+ " -h --help Show this help and exit\n"
+ " --version Print version string and exit\n"
+ " -l --listen=ADDR Listen for raw connections at ADDR\n"
+ " -d --datagram Listen on datagram instead of stream socket\n"
+ " --seqpacket Listen on SOCK_SEQPACKET instead of stream socket\n"
+ " -a --accept Spawn separate child for each connection\n"
+ " -E --setenv=NAME[=VALUE] Pass an environment variable to children\n"
+ " --fdname=NAME[:NAME...] Specify names for file descriptors\n"
+ " --inetd Enable inetd file descriptor passing protocol\n"
+ "\nNote: file descriptors from sd_listen_fds() will be passed through.\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_FDNAME,
+ ARG_SEQPACKET,
+ ARG_INETD,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "datagram", no_argument, NULL, 'd' },
+ { "seqpacket", no_argument, NULL, ARG_SEQPACKET },
+ { "listen", required_argument, NULL, 'l' },
+ { "accept", no_argument, NULL, 'a' },
+ { "setenv", required_argument, NULL, 'E' },
+ { "environment", required_argument, NULL, 'E' }, /* legacy alias */
+ { "fdname", required_argument, NULL, ARG_FDNAME },
+ { "inetd", no_argument, NULL, ARG_INETD },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+hl:aE:d", options, NULL)) >= 0)
+ switch(c) {
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'l':
+ r = strv_extend(&arg_listen, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case 'd':
+ if (arg_socket_type == SOCK_SEQPACKET)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--datagram may not be combined with --seqpacket.");
+
+ arg_socket_type = SOCK_DGRAM;
+ break;
+
+ case ARG_SEQPACKET:
+ if (arg_socket_type == SOCK_DGRAM)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--seqpacket may not be combined with --datagram.");
+
+ arg_socket_type = SOCK_SEQPACKET;
+ break;
+
+ case 'a':
+ arg_accept = true;
+ break;
+
+ case 'E':
+ r = strv_extend(&arg_setenv, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_FDNAME: {
+ _cleanup_strv_free_ char **names;
+ char **s;
+
+ names = strv_split(optarg, ":");
+ if (!names)
+ return log_oom();
+
+ STRV_FOREACH(s, names)
+ if (!fdname_is_valid(*s)) {
+ _cleanup_free_ char *esc;
+
+ esc = cescape(*s);
+ log_warning("File descriptor name \"%s\" is not valid.", esc);
+ }
+
+ /* Empty optargs means one empty name */
+ r = strv_extend_strv(&arg_fdnames,
+ strv_isempty(names) ? STRV_MAKE("") : names,
+ false);
+ if (r < 0)
+ return log_error_errno(r, "strv_extend_strv: %m");
+ break;
+ }
+
+ case ARG_INETD:
+ arg_inetd = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind == argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: command to execute is missing.",
+ program_invocation_short_name);
+
+ if (arg_socket_type == SOCK_DGRAM && arg_accept)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Datagram sockets do not accept connections. "
+ "The --datagram and --accept options may not be combined.");
+
+ arg_args = argv + optind;
+
+ return 1 /* work to do */;
+}
+
+int main(int argc, char **argv, char **envp) {
+ int r, n;
+ int epoll_fd = -1;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+
+ r = install_chld_handler();
+ if (r < 0)
+ return EXIT_FAILURE;
+
+ n = open_sockets(&epoll_fd, arg_accept);
+ if (n < 0)
+ return EXIT_FAILURE;
+ if (n == 0) {
+ log_error("No sockets to listen on specified or passed in.");
+ return EXIT_FAILURE;
+ }
+
+ for (;;) {
+ struct epoll_event event;
+
+ if (epoll_wait(epoll_fd, &event, 1, -1) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ log_error_errno(errno, "epoll_wait() failed: %m");
+ return EXIT_FAILURE;
+ }
+
+ log_info("Communication attempt on fd %i.", event.data.fd);
+ if (arg_accept) {
+ r = do_accept(argv[optind], argv + optind, envp, event.data.fd);
+ if (r < 0)
+ return EXIT_FAILURE;
+ } else
+ break;
+ }
+
+ exec_process(argv[optind], argv + optind, envp, SD_LISTEN_FDS_START, (size_t) n);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/analyze/analyze-security.c b/src/analyze/analyze-security.c
new file mode 100644
index 0000000..a007ed1
--- /dev/null
+++ b/src/analyze/analyze-security.c
@@ -0,0 +1,2086 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sched.h>
+#include <sys/utsname.h>
+
+#include "analyze-security.h"
+#include "bus-error.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "env-util.h"
+#include "format-table.h"
+#include "in-addr-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#if HAVE_SECCOMP
+# include "seccomp-util.h"
+#endif
+#include "set.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit-def.h"
+#include "unit-name.h"
+
+struct security_info {
+ char *id;
+ char *type;
+ char *load_state;
+ char *fragment_path;
+ bool default_dependencies;
+
+ uint64_t ambient_capabilities;
+ uint64_t capability_bounding_set;
+
+ char *user;
+ char **supplementary_groups;
+ bool dynamic_user;
+
+ bool ip_address_deny_all;
+ bool ip_address_allow_localhost;
+ bool ip_address_allow_other;
+
+ char *keyring_mode;
+ bool lock_personality;
+ bool memory_deny_write_execute;
+ bool no_new_privileges;
+ char *notify_access;
+
+ bool private_devices;
+ bool private_mounts;
+ bool private_network;
+ bool private_tmp;
+ bool private_users;
+
+ bool protect_control_groups;
+ bool protect_kernel_modules;
+ bool protect_kernel_tunables;
+
+ char *protect_home;
+ char *protect_system;
+
+ bool remove_ipc;
+
+ bool restrict_address_family_inet;
+ bool restrict_address_family_unix;
+ bool restrict_address_family_netlink;
+ bool restrict_address_family_packet;
+ bool restrict_address_family_other;
+
+ uint64_t restrict_namespaces;
+ bool restrict_realtime;
+
+ char *root_directory;
+ char *root_image;
+
+ bool delegate;
+ char *device_policy;
+ bool device_allow_non_empty;
+
+ char **system_call_architectures;
+
+ bool system_call_filter_whitelist;
+ Set *system_call_filter;
+
+ uint32_t _umask;
+};
+
+struct security_assessor {
+ const char *id;
+ const char *description_good;
+ const char *description_bad;
+ const char *description_na;
+ const char *url;
+ uint64_t weight;
+ uint64_t range;
+ int (*assess)(const struct security_assessor *a, const struct security_info *info, const void *data, uint64_t *ret_badness, char **ret_description);
+ size_t offset;
+ uint64_t parameter;
+ bool default_dependencies_only;
+};
+
+static void security_info_free(struct security_info *i) {
+ if (!i)
+ return;
+
+ free(i->id);
+ free(i->type);
+ free(i->load_state);
+ free(i->fragment_path);
+
+ free(i->user);
+
+ free(i->protect_home);
+ free(i->protect_system);
+
+ free(i->root_directory);
+ free(i->root_image);
+
+ free(i->keyring_mode);
+ free(i->notify_access);
+
+ free(i->device_policy);
+
+ strv_free(i->supplementary_groups);
+ strv_free(i->system_call_architectures);
+
+ set_free_free(i->system_call_filter);
+}
+
+static bool security_info_runs_privileged(const struct security_info *i) {
+ assert(i);
+
+ if (STRPTR_IN_SET(i->user, "0", "root"))
+ return true;
+
+ if (i->dynamic_user)
+ return false;
+
+ return isempty(i->user);
+}
+
+static int assess_bool(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ const bool *b = data;
+
+ assert(b);
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = a->parameter ? *b : !*b;
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_user(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ _cleanup_free_ char *d = NULL;
+ uint64_t b;
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (streq_ptr(info->user, NOBODY_USER_NAME)) {
+ d = strdup("Service runs under as '" NOBODY_USER_NAME "' user, which should not be used for services");
+ b = 9;
+ } else if (info->dynamic_user && !STR_IN_SET(info->user, "0", "root")) {
+ d = strdup("Service runs under a transient non-root user identity");
+ b = 0;
+ } else if (info->user && !STR_IN_SET(info->user, "0", "root", "")) {
+ d = strdup("Service runs under a static non-root user identity");
+ b = 0;
+ } else {
+ *ret_badness = 10;
+ *ret_description = NULL;
+ return 0;
+ }
+
+ if (!d)
+ return log_oom();
+
+ *ret_badness = b;
+ *ret_description = TAKE_PTR(d);
+
+ return 0;
+}
+
+static int assess_protect_home(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ const char *description;
+ uint64_t badness;
+ char *copy;
+ int r;
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ badness = 10;
+ description = "Service has full access to home directories";
+
+ r = parse_boolean(info->protect_home);
+ if (r < 0) {
+ if (streq_ptr(info->protect_home, "read-only")) {
+ badness = 5;
+ description = "Service has read-only access to home directories";
+ } else if (streq_ptr(info->protect_home, "tmpfs")) {
+ badness = 1;
+ description = "Service has access to fake empty home directories";
+ }
+ } else if (r > 0) {
+ badness = 0;
+ description = "Service has no access to home directories";
+ }
+
+ copy = strdup(description);
+ if (!copy)
+ return log_oom();
+
+ *ret_badness = badness;
+ *ret_description = copy;
+
+ return 0;
+}
+
+static int assess_protect_system(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ const char *description;
+ uint64_t badness;
+ char *copy;
+ int r;
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ badness = 10;
+ description = "Service has full access to the OS file hierarchy";
+
+ r = parse_boolean(info->protect_system);
+ if (r < 0) {
+ if (streq_ptr(info->protect_system, "full")) {
+ badness = 3;
+ description = "Service has very limited write access to the OS file hierarchy";
+ } else if (streq_ptr(info->protect_system, "strict")) {
+ badness = 0;
+ description = "Service has strict read-only access to the OS file hierarchy";
+ }
+ } else if (r > 0) {
+ badness = 5;
+ description = "Service has limited write access to the OS file hierarchy";
+ }
+
+ copy = strdup(description);
+ if (!copy)
+ return log_oom();
+
+ *ret_badness = badness;
+ *ret_description = copy;
+
+ return 0;
+}
+
+static int assess_root_directory(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness =
+ (isempty(info->root_directory) ||
+ path_equal(info->root_directory, "/")) &&
+ (isempty(info->root_image) ||
+ path_equal(info->root_image, "/"));
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_capability_bounding_set(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = !!(info->capability_bounding_set & a->parameter);
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_umask(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ char *copy = NULL;
+ const char *d;
+ uint64_t b;
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (!FLAGS_SET(info->_umask, 0002)) {
+ d = "Files created by service are world-writable by default";
+ b = 10;
+ } else if (!FLAGS_SET(info->_umask, 0004)) {
+ d = "Files created by service are world-readable by default";
+ b = 5;
+ } else if (!FLAGS_SET(info->_umask, 0020)) {
+ d = "Files created by service are group-writable by default";
+ b = 2;
+ } else if (!FLAGS_SET(info->_umask, 0040)) {
+ d = "Files created by service are group-readable by default";
+ b = 1;
+ } else {
+ d = "Files created by service are accessible only by service's own user by default";
+ b = 0;
+ }
+
+ copy = strdup(d);
+ if (!copy)
+ return log_oom();
+
+ *ret_badness = b;
+ *ret_description = copy;
+
+ return 0;
+}
+
+static int assess_keyring_mode(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = !streq_ptr(info->keyring_mode, "private");
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_notify_access(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = streq_ptr(info->notify_access, "all");
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_remove_ipc(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (security_info_runs_privileged(info))
+ *ret_badness = UINT64_MAX;
+ else
+ *ret_badness = !info->remove_ipc;
+
+ *ret_description = NULL;
+ return 0;
+}
+
+static int assess_supplementary_groups(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (security_info_runs_privileged(info))
+ *ret_badness = UINT64_MAX;
+ else
+ *ret_badness = !strv_isempty(info->supplementary_groups);
+
+ *ret_description = NULL;
+ return 0;
+}
+
+static int assess_restrict_namespaces(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = !!(info->restrict_namespaces & a->parameter);
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_system_call_architectures(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ char *d;
+ uint64_t b;
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (strv_isempty(info->system_call_architectures)) {
+ b = 10;
+ d = strdup("Service may execute system calls with all ABIs");
+ } else if (strv_equal(info->system_call_architectures, STRV_MAKE("native"))) {
+ b = 0;
+ d = strdup("Service may execute system calls only with native ABI");
+ } else {
+ b = 8;
+ d = strdup("Service may execute system calls with multiple ABIs");
+ }
+
+ if (!d)
+ return log_oom();
+
+ *ret_badness = b;
+ *ret_description = d;
+
+ return 0;
+}
+
+#if HAVE_SECCOMP
+
+static bool syscall_names_in_filter(Set *s, bool whitelist, const SyscallFilterSet *f) {
+ const char *syscall;
+
+ NULSTR_FOREACH(syscall, f->value) {
+ int id;
+
+ if (syscall[0] == '@') {
+ const SyscallFilterSet *g;
+
+ assert_se(g = syscall_filter_set_find(syscall));
+ if (syscall_names_in_filter(s, whitelist, g))
+ return true; /* bad! */
+
+ continue;
+ }
+
+ /* Let's see if the system call actually exists on this platform, before complaining */
+ id = seccomp_syscall_resolve_name(syscall);
+ if (id < 0)
+ continue;
+
+ if (set_contains(s, syscall) == whitelist) {
+ log_debug("Offending syscall filter item: %s", syscall);
+ return true; /* bad! */
+ }
+ }
+
+ return false;
+}
+
+static int assess_system_call_filter(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ const SyscallFilterSet *f;
+ char *d = NULL;
+ uint64_t b;
+
+ assert(a);
+ assert(info);
+ assert(ret_badness);
+ assert(ret_description);
+
+ assert(a->parameter < _SYSCALL_FILTER_SET_MAX);
+ f = syscall_filter_sets + a->parameter;
+
+ if (!info->system_call_filter_whitelist && set_isempty(info->system_call_filter)) {
+ d = strdup("Service does not filter system calls");
+ b = 10;
+ } else {
+ bool bad;
+
+ log_debug("Analyzing system call filter, checking against: %s", f->name);
+ bad = syscall_names_in_filter(info->system_call_filter, info->system_call_filter_whitelist, f);
+ log_debug("Result: %s", bad ? "bad" : "good");
+
+ if (info->system_call_filter_whitelist) {
+ if (bad) {
+ (void) asprintf(&d, "System call whitelist defined for service, and %s is included", f->name);
+ b = 9;
+ } else {
+ (void) asprintf(&d, "System call whitelist defined for service, and %s is not included", f->name);
+ b = 0;
+ }
+ } else {
+ if (bad) {
+ (void) asprintf(&d, "System call blacklist defined for service, and %s is not included", f->name);
+ b = 10;
+ } else {
+ (void) asprintf(&d, "System call blacklist defined for service, and %s is included", f->name);
+ b = 5;
+ }
+ }
+ }
+
+ if (!d)
+ return log_oom();
+
+ *ret_badness = b;
+ *ret_description = d;
+
+ return 0;
+}
+
+#endif
+
+static int assess_ip_address_allow(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ char *d = NULL;
+ uint64_t b;
+
+ assert(info);
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (!info->ip_address_deny_all) {
+ d = strdup("Service does not define an IP address whitelist");
+ b = 10;
+ } else if (info->ip_address_allow_other) {
+ d = strdup("Service defines IP address whitelist with non-localhost entries");
+ b = 5;
+ } else if (info->ip_address_allow_localhost) {
+ d = strdup("Service defines IP address whitelits with only localhost entries");
+ b = 2;
+ } else {
+ d = strdup("Service blocks all IP address ranges");
+ b = 0;
+ }
+
+ if (!d)
+ return log_oom();
+
+ *ret_badness = b;
+ *ret_description = d;
+
+ return 0;
+}
+
+static int assess_device_allow(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ char *d = NULL;
+ uint64_t b;
+
+ assert(info);
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (STRPTR_IN_SET(info->device_policy, "strict", "closed")) {
+
+ if (info->device_allow_non_empty) {
+ d = strdup("Service has a device ACL with some special devices");
+ b = 5;
+ } else {
+ d = strdup("Service has a minimal device ACL");
+ b = 0;
+ }
+ } else {
+ d = strdup("Service has no device ACL");
+ b = 10;
+ }
+
+ if (!d)
+ return log_oom();
+
+ *ret_badness = b;
+ *ret_description = d;
+
+ return 0;
+}
+
+static int assess_ambient_capabilities(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = info->ambient_capabilities != 0;
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static const struct security_assessor security_assessor_table[] = {
+ {
+ .id = "User=/DynamicUser=",
+ .description_bad = "Service runs as root user",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#User=",
+ .weight = 2000,
+ .range = 10,
+ .assess = assess_user,
+ },
+ {
+ .id = "SupplementaryGroups=",
+ .description_good = "Service has no supplementary groups",
+ .description_bad = "Service runs with supplementary groups",
+ .description_na = "Service runs as root, option does not matter",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SupplementaryGroups=",
+ .weight = 200,
+ .range = 1,
+ .assess = assess_supplementary_groups,
+ },
+ {
+ .id = "PrivateDevices=",
+ .description_good = "Service has no access to hardware devices",
+ .description_bad = "Service potentially has access to hardware devices",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#PrivateDevices=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, private_devices),
+ },
+ {
+ .id = "PrivateMounts=",
+ .description_good = "Service cannot install system mounts",
+ .description_bad = "Service may install system mounts",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#PrivateMounts=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, private_mounts),
+ },
+ {
+ .id = "PrivateNetwork=",
+ .description_good = "Service has no access to the host's network",
+ .description_bad = "Service has access to the host's network",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#PrivateNetwork=",
+ .weight = 2500,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, private_network),
+ },
+ {
+ .id = "PrivateTmp=",
+ .description_good = "Service has no access to other software's temporary files",
+ .description_bad = "Service has access to other software's temporary files",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#PrivateTmp=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, private_tmp),
+ .default_dependencies_only = true,
+ },
+ {
+ .id = "PrivateUsers=",
+ .description_good = "Service does not have access to other users",
+ .description_bad = "Service has access to other users",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#PrivateUsers=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, private_users),
+ },
+ {
+ .id = "ProtectControlGroups=",
+ .description_good = "Service cannot modify the control group file system",
+ .description_bad = "Service may modify to the control group file system",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectControlGroups=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, protect_control_groups),
+ },
+ {
+ .id = "ProtectKernelModules=",
+ .description_good = "Service cannot load or read kernel modules",
+ .description_bad = "Service may load or read kernel modules",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectKernelModules=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, protect_kernel_modules),
+ },
+ {
+ .id = "ProtectKernelTunables=",
+ .description_good = "Service cannot alter kernel tunables (/proc/sys, …)",
+ .description_bad = "Service may alter kernel tunables",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectKernelTunables=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, protect_kernel_tunables),
+ },
+ {
+ .id = "ProtectHome=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectHome=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_protect_home,
+ .default_dependencies_only = true,
+ },
+ {
+ .id = "ProtectSystem=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectSystem=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_protect_system,
+ .default_dependencies_only = true,
+ },
+ {
+ .id = "RootDirectory=/RootImage=",
+ .description_good = "Service has its own root directory/image",
+ .description_bad = "Service runs within the host's root directory",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RootDirectory=",
+ .weight = 200,
+ .range = 1,
+ .assess = assess_root_directory,
+ .default_dependencies_only = true,
+ },
+ {
+ .id = "LockPersonality=",
+ .description_good = "Service cannot change ABI personality",
+ .description_bad = "Service may change ABI personality",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#LockPersonality=",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, lock_personality),
+ },
+ {
+ .id = "MemoryDenyWriteExecute=",
+ .description_good = "Service cannot create writable executable memory mappings",
+ .description_bad = "Service may create writable executable memory mappings",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#MemoryDenyWriteExecute=",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, memory_deny_write_execute),
+ },
+ {
+ .id = "NoNewPrivileges=",
+ .description_good = "Service processes cannot acquire new privileges",
+ .description_bad = "Service processes may acquire new privileges",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#NoNewPrivileges=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, no_new_privileges),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_ADMIN",
+ .description_good = "Service has no administrator privileges",
+ .description_bad = "Service has administrator privileges",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = UINT64_C(1) << CAP_SYS_ADMIN,
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SET(UID|GID|PCAP)",
+ .description_good = "Service cannot change UID/GID identities/capabilities",
+ .description_bad = "Service may change UID/GID identities/capabilities",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SETUID)|
+ (UINT64_C(1) << CAP_SETGID)|
+ (UINT64_C(1) << CAP_SETPCAP),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_PTRACE",
+ .description_good = "Service has no ptrace() debugging abilities",
+ .description_bad = "Service has ptrace() debugging abilities",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_PTRACE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_TIME",
+ .description_good = "Service processes cannot change the system clock",
+ .description_bad = "Service processes may change the system clock",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = UINT64_C(1) << CAP_SYS_TIME,
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_NET_ADMIN",
+ .description_good = "Service has no network configuration privileges",
+ .description_bad = "Service has network configuration privileges",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_NET_ADMIN),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_RAWIO",
+ .description_good = "Service has no raw I/O access",
+ .description_bad = "Service has raw I/O access",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_RAWIO),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_MODULE",
+ .description_good = "Service cannot load kernel modules",
+ .description_bad = "Service may load kernel modules",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_MODULE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_AUDIT_*",
+ .description_good = "Service has no audit subsystem access",
+ .description_bad = "Service has audit subsystem access",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_AUDIT_CONTROL) |
+ (UINT64_C(1) << CAP_AUDIT_READ) |
+ (UINT64_C(1) << CAP_AUDIT_WRITE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYSLOG",
+ .description_good = "Service has no access to kernel logging",
+ .description_bad = "Service has access to kernel logging",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYSLOG),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_(NICE|RESOURCE)",
+ .description_good = "Service has no privileges to change resource use parameters",
+ .description_bad = "Service has privileges to change resource use parameters",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_NICE) |
+ (UINT64_C(1) << CAP_SYS_RESOURCE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_MKNOD",
+ .description_good = "Service cannot create device nodes",
+ .description_bad = "Service may create device nodes",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_MKNOD),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_(CHOWN|FSETID|SETFCAP)",
+ .description_good = "Service cannot change file ownership/access mode/capabilities",
+ .description_bad = "Service may change file ownership/access mode/capabilities unrestricted",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_CHOWN) |
+ (UINT64_C(1) << CAP_FSETID) |
+ (UINT64_C(1) << CAP_SETFCAP),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_(DAC_*|FOWNER|IPC_OWNER)",
+ .description_good = "Service cannot override UNIX file/IPC permission checks",
+ .description_bad = "Service may override UNIX file/IPC permission checks",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_DAC_OVERRIDE) |
+ (UINT64_C(1) << CAP_DAC_READ_SEARCH) |
+ (UINT64_C(1) << CAP_FOWNER) |
+ (UINT64_C(1) << CAP_IPC_OWNER),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_KILL",
+ .description_good = "Service cannot send UNIX signals to arbitrary processes",
+ .description_bad = "Service may send UNIX signals to arbitrary processes",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_KILL),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_NET_(BIND_SERVICE|BROADCAST|RAW)",
+ .description_good = "Service has no elevated networking privileges",
+ .description_bad = "Service has elevated networking privileges",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_NET_BIND_SERVICE) |
+ (UINT64_C(1) << CAP_NET_BROADCAST) |
+ (UINT64_C(1) << CAP_NET_RAW),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_BOOT",
+ .description_good = "Service cannot issue reboot()",
+ .description_bad = "Service may issue reboot()",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_BOOT),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_MAC_*",
+ .description_good = "Service cannot adjust SMACK MAC",
+ .description_bad = "Service may adjust SMACK MAC",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_MAC_ADMIN)|
+ (UINT64_C(1) << CAP_MAC_OVERRIDE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_LINUX_IMMUTABLE",
+ .description_good = "Service cannot mark files immutable",
+ .description_bad = "Service may mark files immutable",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 75,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_LINUX_IMMUTABLE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_IPC_LOCK",
+ .description_good = "Service cannot lock memory into RAM",
+ .description_bad = "Service may lock memory into RAM",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 50,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_IPC_LOCK),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_CHROOT",
+ .description_good = "Service cannot issue chroot()",
+ .description_bad = "Service may issue chroot()",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 50,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_CHROOT),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_BLOCK_SUSPEND",
+ .description_good = "Service cannot establish wake locks",
+ .description_bad = "Service may establish wake locks",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 25,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_BLOCK_SUSPEND),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_WAKE_ALARM",
+ .description_good = "Service cannot program timers that wake up the system",
+ .description_bad = "Service may program timers that wake up the system",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 25,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_WAKE_ALARM),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_LEASE",
+ .description_good = "Service cannot create file leases",
+ .description_bad = "Service may create file leases",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 25,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_LEASE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_TTY_CONFIG",
+ .description_good = "Service cannot issue vhangup()",
+ .description_bad = "Service may issue vhangup()",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 25,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_TTY_CONFIG),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_PACCT",
+ .description_good = "Service cannot use acct()",
+ .description_bad = "Service may use acct()",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 25,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_PACCT),
+ },
+ {
+ .id = "UMask=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#UMask=",
+ .weight = 100,
+ .range = 10,
+ .assess = assess_umask,
+ },
+ {
+ .id = "KeyringMode=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#KeyringMode=",
+ .description_good = "Service doesn't share key material with other services",
+ .description_bad = "Service shares key material with other service",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_keyring_mode,
+ },
+ {
+ .id = "NotifyAccess=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#NotifyAccess=",
+ .description_good = "Service child processes cannot alter service state",
+ .description_bad = "Service child processes may alter service state",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_notify_access,
+ },
+ {
+ .id = "RemoveIPC=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RemoveIPC=",
+ .description_good = "Service user cannot leave SysV IPC objects around",
+ .description_bad = "Service user may leave SysV IPC objects around",
+ .description_na = "Service runs as root, option does not apply",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_remove_ipc,
+ .offset = offsetof(struct security_info, remove_ipc),
+ },
+ {
+ .id = "Delegate=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Delegate=",
+ .description_good = "Service does not maintain its own delegated control group subtree",
+ .description_bad = "Service maintains its own delegated control group subtree",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, delegate),
+ .parameter = true, /* invert! */
+ },
+ {
+ .id = "RestrictRealtime=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictRealtime=",
+ .description_good = "Service realtime scheduling access is restricted",
+ .description_bad = "Service may acquire realtime scheduling",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_realtime),
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWUSER",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create user namespaces",
+ .description_bad = "Service may create user namespaces",
+ .weight = 1500,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWUSER,
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWNS",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create file system namespaces",
+ .description_bad = "Service may create file system namespaces",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWNS,
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWIPC",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create IPC namespaces",
+ .description_bad = "Service may create IPC namespaces",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWIPC,
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWPID",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create process namespaces",
+ .description_bad = "Service may create process namespaces",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWPID,
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWCGROUP",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create cgroup namespaces",
+ .description_bad = "Service may create cgroup namespaces",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWCGROUP,
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWNET",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create network namespaces",
+ .description_bad = "Service may create network namespaces",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWNET,
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWUTS",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create hostname namespaces",
+ .description_bad = "Service may create hostname namespaces",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWUTS,
+ },
+ {
+ .id = "RestrictAddressFamilies=~AF_(INET|INET6)",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictAddressFamilies=",
+ .description_good = "Service cannot allocate Internet sockets",
+ .description_bad = "Service may allocate Internet sockets",
+ .weight = 1500,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_address_family_inet),
+ },
+ {
+ .id = "RestrictAddressFamilies=~AF_UNIX",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictAddressFamilies=",
+ .description_good = "Service cannot allocate local sockets",
+ .description_bad = "Service may allocate local sockets",
+ .weight = 25,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_address_family_unix),
+ },
+ {
+ .id = "RestrictAddressFamilies=~AF_NETLINK",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictAddressFamilies=",
+ .description_good = "Service cannot allocate netlink sockets",
+ .description_bad = "Service may allocate netlink sockets",
+ .weight = 200,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_address_family_netlink),
+ },
+ {
+ .id = "RestrictAddressFamilies=~AF_PACKET",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictAddressFamilies=",
+ .description_good = "Service cannot allocate packet sockets",
+ .description_bad = "Service may allocate packet sockets",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_address_family_packet),
+ },
+ {
+ .id = "RestrictAddressFamilies=~…",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictAddressFamilies=",
+ .description_good = "Service cannot allocate exotic sockets",
+ .description_bad = "Service may allocate exotic sockets",
+ .weight = 1250,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_address_family_other),
+ },
+ {
+ .id = "SystemCallArchitectures=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallArchitectures=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_architectures,
+ },
+#if HAVE_SECCOMP
+ {
+ .id = "SystemCallFilter=~@swap",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_SWAP,
+ },
+ {
+ .id = "SystemCallFilter=~@obsolete",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 250,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_OBSOLETE,
+ },
+ {
+ .id = "SystemCallFilter=~@clock",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_CLOCK,
+ },
+ {
+ .id = "SystemCallFilter=~@cpu-emulation",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 250,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_CPU_EMULATION,
+ },
+ {
+ .id = "SystemCallFilter=~@debug",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_DEBUG,
+ },
+ {
+ .id = "SystemCallFilter=~@mount",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_MOUNT,
+ },
+ {
+ .id = "SystemCallFilter=~@module",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_MODULE,
+ },
+ {
+ .id = "SystemCallFilter=~@raw-io",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_RAW_IO,
+ },
+ {
+ .id = "SystemCallFilter=~@reboot",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_REBOOT,
+ },
+ {
+ .id = "SystemCallFilter=~@privileged",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 700,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_PRIVILEGED,
+ },
+ {
+ .id = "SystemCallFilter=~@resources",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 700,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_RESOURCES,
+ },
+#endif
+ {
+ .id = "IPAddressDeny=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#IPAddressDeny=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_ip_address_allow,
+ },
+ {
+ .id = "DeviceAllow=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#DeviceAllow=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_device_allow,
+ },
+ {
+ .id = "AmbientCapabilities=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#AmbientCapabilities=",
+ .description_good = "Service process does not receive ambient capabilities",
+ .description_bad = "Service process receives ambient capabilities",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_ambient_capabilities,
+ },
+};
+
+static int assess(const struct security_info *info, Table *overview_table, AnalyzeSecurityFlags flags) {
+ static const struct {
+ uint64_t exposure;
+ const char *name;
+ const char *color;
+ SpecialGlyph smiley;
+ } badness_table[] = {
+ { 100, "DANGEROUS", ANSI_HIGHLIGHT_RED, SPECIAL_GLYPH_DEPRESSED_SMILEY },
+ { 90, "UNSAFE", ANSI_HIGHLIGHT_RED, SPECIAL_GLYPH_UNHAPPY_SMILEY },
+ { 75, "EXPOSED", ANSI_HIGHLIGHT_YELLOW, SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY },
+ { 50, "MEDIUM", NULL, SPECIAL_GLYPH_NEUTRAL_SMILEY },
+ { 10, "OK", ANSI_HIGHLIGHT_GREEN, SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY },
+ { 1, "SAFE", ANSI_HIGHLIGHT_GREEN, SPECIAL_GLYPH_HAPPY_SMILEY },
+ { 0, "PERFECT", ANSI_HIGHLIGHT_GREEN, SPECIAL_GLYPH_ECSTATIC_SMILEY },
+ };
+
+ uint64_t badness_sum = 0, weight_sum = 0, exposure;
+ _cleanup_(table_unrefp) Table *details_table = NULL;
+ size_t i;
+ int r;
+
+ if (!FLAGS_SET(flags, ANALYZE_SECURITY_SHORT)) {
+ details_table = table_new(" ", "name", "description", "weight", "badness", "range", "exposure");
+ if (!details_table)
+ return log_oom();
+
+ (void) table_set_sort(details_table, 3, 1, (size_t) -1);
+ (void) table_set_reverse(details_table, 3, true);
+
+ if (getenv_bool("SYSTEMD_ANALYZE_DEBUG") <= 0)
+ (void) table_set_display(details_table, 0, 1, 2, 6, (size_t) -1);
+ }
+
+ for (i = 0; i < ELEMENTSOF(security_assessor_table); i++) {
+ const struct security_assessor *a = security_assessor_table + i;
+ _cleanup_free_ char *d = NULL;
+ uint64_t badness;
+ void *data;
+
+ data = (uint8_t*) info + a->offset;
+
+ if (a->default_dependencies_only && !info->default_dependencies) {
+ badness = UINT64_MAX;
+ d = strdup("Service runs in special boot phase, option does not apply");
+ if (!d)
+ return log_oom();
+ } else {
+ r = a->assess(a, info, data, &badness, &d);
+ if (r < 0)
+ return r;
+ }
+
+ assert(a->range > 0);
+
+ if (badness != UINT64_MAX) {
+ assert(badness <= a->range);
+
+ badness_sum += DIV_ROUND_UP(badness * a->weight, a->range);
+ weight_sum += a->weight;
+ }
+
+ if (details_table) {
+ const char *checkmark, *description, *color = NULL;
+ TableCell *cell;
+
+ if (badness == UINT64_MAX) {
+ checkmark = " ";
+ description = a->description_na;
+ color = NULL;
+ } else if (badness == a->range) {
+ checkmark = special_glyph(SPECIAL_GLYPH_CROSS_MARK);
+ description = a->description_bad;
+ color = ansi_highlight_red();
+ } else if (badness == 0) {
+ checkmark = special_glyph(SPECIAL_GLYPH_CHECK_MARK);
+ description = a->description_good;
+ color = ansi_highlight_green();
+ } else {
+ checkmark = special_glyph(SPECIAL_GLYPH_CROSS_MARK);
+ description = NULL;
+ color = ansi_highlight_red();
+ }
+
+ if (d)
+ description = d;
+
+ r = table_add_cell_full(details_table, &cell, TABLE_STRING, checkmark, 1, 1, 0, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add cell to table: %m");
+ if (color)
+ (void) table_set_color(details_table, cell, color);
+
+ r = table_add_cell(details_table, &cell, TABLE_STRING, a->id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add cell to table: %m");
+ if (a->url)
+ (void) table_set_url(details_table, cell, a->url);
+
+ r = table_add_cell(details_table, NULL, TABLE_STRING, description);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add cell to table: %m");
+
+ r = table_add_cell(details_table, &cell, TABLE_UINT64, &a->weight);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add cell to table: %m");
+ (void) table_set_align_percent(details_table, cell, 100);
+
+ r = table_add_cell(details_table, &cell, TABLE_UINT64, &badness);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add cell to table: %m");
+ (void) table_set_align_percent(details_table, cell, 100);
+
+ r = table_add_cell(details_table, &cell, TABLE_UINT64, &a->range);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add cell to table: %m");
+ (void) table_set_align_percent(details_table, cell, 100);
+
+ r = table_add_cell(details_table, &cell, TABLE_EMPTY, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add cell to table: %m");
+ (void) table_set_align_percent(details_table, cell, 100);
+ }
+ }
+
+ if (details_table) {
+ size_t row;
+
+ for (row = 1; row < table_get_rows(details_table); row++) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1 + DECIMAL_STR_MAX(uint64_t) + 1];
+ const uint64_t *weight, *badness, *range;
+ TableCell *cell;
+ uint64_t x;
+
+ assert_se(weight = table_get_at(details_table, row, 3));
+ assert_se(badness = table_get_at(details_table, row, 4));
+ assert_se(range = table_get_at(details_table, row, 5));
+
+ if (*badness == UINT64_MAX || *badness == 0)
+ continue;
+
+ assert_se(cell = table_get_cell(details_table, row, 6));
+
+ x = DIV_ROUND_UP(DIV_ROUND_UP(*badness * *weight * 100U, *range), weight_sum);
+ xsprintf(buf, "%" PRIu64 ".%" PRIu64, x / 10, x % 10);
+
+ r = table_update(details_table, cell, TABLE_STRING, buf);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update cell in table: %m");
+ }
+
+ r = table_print(details_table, stdout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to output table: %m");
+ }
+
+ assert(weight_sum > 0);
+ exposure = DIV_ROUND_UP(badness_sum * 100U, weight_sum);
+
+ for (i = 0; i < ELEMENTSOF(badness_table); i++)
+ if (exposure >= badness_table[i].exposure)
+ break;
+
+ assert(i < ELEMENTSOF(badness_table));
+
+ if (details_table) {
+ _cleanup_free_ char *clickable = NULL;
+ const char *name;
+
+ /* If we shall output the details table, also print the brief summary underneath */
+
+ if (info->fragment_path) {
+ r = terminal_urlify_path(info->fragment_path, info->id, &clickable);
+ if (r < 0)
+ return log_oom();
+
+ name = clickable;
+ } else
+ name = info->id;
+
+ printf("\n%s %sOverall exposure level for %s%s: %s%" PRIu64 ".%" PRIu64 " %s%s %s\n",
+ special_glyph(SPECIAL_GLYPH_ARROW),
+ ansi_highlight(),
+ name,
+ ansi_normal(),
+ colors_enabled() ? strempty(badness_table[i].color) : "",
+ exposure / 10, exposure % 10,
+ badness_table[i].name,
+ ansi_normal(),
+ special_glyph(badness_table[i].smiley));
+ }
+
+ fflush(stdout);
+
+ if (overview_table) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1 + DECIMAL_STR_MAX(uint64_t) + 1];
+ TableCell *cell;
+
+ r = table_add_cell(overview_table, &cell, TABLE_STRING, info->id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add cell to table: %m");
+ if (info->fragment_path) {
+ _cleanup_free_ char *url = NULL;
+
+ r = file_url_from_path(info->fragment_path, &url);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate URL from path: %m");
+
+ (void) table_set_url(overview_table, cell, url);
+ }
+
+ xsprintf(buf, "%" PRIu64 ".%" PRIu64, exposure / 10, exposure % 10);
+ r = table_add_cell(overview_table, &cell, TABLE_STRING, buf);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add cell to table: %m");
+ (void) table_set_align_percent(overview_table, cell, 100);
+
+ r = table_add_cell(overview_table, &cell, TABLE_STRING, badness_table[i].name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add cell to table: %m");
+ (void) table_set_color(overview_table, cell, strempty(badness_table[i].color));
+
+ r = table_add_cell(overview_table, NULL, TABLE_STRING, special_glyph(badness_table[i].smiley));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add cell to table: %m");
+ }
+
+ return 0;
+}
+
+static int property_read_restrict_address_families(
+ sd_bus *bus,
+ const char *member,
+ sd_bus_message *m,
+ sd_bus_error *error,
+ void *userdata) {
+
+ struct security_info *info = userdata;
+ int whitelist, r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "b", &whitelist);
+ if (r < 0)
+ return r;
+
+ info->restrict_address_family_inet =
+ info->restrict_address_family_unix =
+ info->restrict_address_family_netlink =
+ info->restrict_address_family_packet =
+ info->restrict_address_family_other = whitelist;
+
+ r = sd_bus_message_enter_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *name;
+
+ r = sd_bus_message_read(m, "s", &name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (STR_IN_SET(name, "AF_INET", "AF_INET6"))
+ info->restrict_address_family_inet = !whitelist;
+ else if (streq(name, "AF_UNIX"))
+ info->restrict_address_family_unix = !whitelist;
+ else if (streq(name, "AF_NETLINK"))
+ info->restrict_address_family_netlink = !whitelist;
+ else if (streq(name, "AF_PACKET"))
+ info->restrict_address_family_packet = !whitelist;
+ else
+ info->restrict_address_family_other = !whitelist;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_exit_container(m);
+}
+
+static int property_read_system_call_filter(
+ sd_bus *bus,
+ const char *member,
+ sd_bus_message *m,
+ sd_bus_error *error,
+ void *userdata) {
+
+ struct security_info *info = userdata;
+ int whitelist, r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "b", &whitelist);
+ if (r < 0)
+ return r;
+
+ info->system_call_filter_whitelist = whitelist;
+
+ r = sd_bus_message_enter_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *name;
+
+ r = sd_bus_message_read(m, "s", &name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = set_ensure_allocated(&info->system_call_filter, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put_strdup(info->system_call_filter, name);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_exit_container(m);
+}
+
+static int property_read_ip_address_allow(
+ sd_bus *bus,
+ const char *member,
+ sd_bus_message *m,
+ sd_bus_error *error,
+ void *userdata) {
+
+ struct security_info *info = userdata;
+ bool deny_ipv4 = false, deny_ipv6 = false;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, 'a', "(iayu)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const void *data;
+ size_t size;
+ int32_t family;
+ uint32_t prefixlen;
+
+ r = sd_bus_message_enter_container(m, 'r', "iayu");
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(m, "i", &family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(m, 'y', &data, &size);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "u", &prefixlen);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ if (streq(member, "IPAddressAllow")) {
+ union in_addr_union u;
+
+ if (family == AF_INET && size == 4 && prefixlen == 8)
+ memcpy(&u.in, data, size);
+ else if (family == AF_INET6 && size == 16 && prefixlen == 128)
+ memcpy(&u.in6, data, size);
+ else {
+ info->ip_address_allow_other = true;
+ continue;
+ }
+
+ if (in_addr_is_localhost(family, &u))
+ info->ip_address_allow_localhost = true;
+ else
+ info->ip_address_allow_other = true;
+ } else {
+ assert(streq(member, "IPAddressDeny"));
+
+ if (family == AF_INET && size == 4 && prefixlen == 0)
+ deny_ipv4 = true;
+ else if (family == AF_INET6 && size == 16 && prefixlen == 0)
+ deny_ipv6 = true;
+ }
+ }
+
+ info->ip_address_deny_all = deny_ipv4 && deny_ipv6;
+
+ return sd_bus_message_exit_container(m);
+}
+
+static int property_read_device_allow(
+ sd_bus *bus,
+ const char *member,
+ sd_bus_message *m,
+ sd_bus_error *error,
+ void *userdata) {
+
+ struct security_info *info = userdata;
+ size_t n = 0;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *name, *policy;
+
+ r = sd_bus_message_read(m, "(ss)", &name, &policy);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ n++;
+ }
+
+ info->device_allow_non_empty = n > 0;
+
+ return sd_bus_message_exit_container(m);
+}
+
+static int acquire_security_info(sd_bus *bus, const char *name, struct security_info *info, AnalyzeSecurityFlags flags) {
+
+ static const struct bus_properties_map security_map[] = {
+ { "AmbientCapabilities", "t", NULL, offsetof(struct security_info, ambient_capabilities) },
+ { "CapabilityBoundingSet", "t", NULL, offsetof(struct security_info, capability_bounding_set) },
+ { "DefaultDependencies", "b", NULL, offsetof(struct security_info, default_dependencies) },
+ { "Delegate", "b", NULL, offsetof(struct security_info, delegate) },
+ { "DeviceAllow", "a(ss)", property_read_device_allow, 0 },
+ { "DevicePolicy", "s", NULL, offsetof(struct security_info, device_policy) },
+ { "DynamicUser", "b", NULL, offsetof(struct security_info, dynamic_user) },
+ { "FragmentPath", "s", NULL, offsetof(struct security_info, fragment_path) },
+ { "IPAddressAllow", "a(iayu)", property_read_ip_address_allow, 0 },
+ { "IPAddressDeny", "a(iayu)", property_read_ip_address_allow, 0 },
+ { "Id", "s", NULL, offsetof(struct security_info, id) },
+ { "KeyringMode", "s", NULL, offsetof(struct security_info, keyring_mode) },
+ { "LoadState", "s", NULL, offsetof(struct security_info, load_state) },
+ { "LockPersonality", "b", NULL, offsetof(struct security_info, lock_personality) },
+ { "MemoryDenyWriteExecute", "b", NULL, offsetof(struct security_info, memory_deny_write_execute) },
+ { "NoNewPrivileges", "b", NULL, offsetof(struct security_info, no_new_privileges) },
+ { "NotifyAccess", "s", NULL, offsetof(struct security_info, notify_access) },
+ { "PrivateDevices", "b", NULL, offsetof(struct security_info, private_devices) },
+ { "PrivateMounts", "b", NULL, offsetof(struct security_info, private_mounts) },
+ { "PrivateNetwork", "b", NULL, offsetof(struct security_info, private_network) },
+ { "PrivateTmp", "b", NULL, offsetof(struct security_info, private_tmp) },
+ { "PrivateUsers", "b", NULL, offsetof(struct security_info, private_users) },
+ { "ProtectControlGroups", "b", NULL, offsetof(struct security_info, protect_control_groups) },
+ { "ProtectHome", "s", NULL, offsetof(struct security_info, protect_home) },
+ { "ProtectKernelModules", "b", NULL, offsetof(struct security_info, protect_kernel_modules) },
+ { "ProtectKernelTunables", "b", NULL, offsetof(struct security_info, protect_kernel_tunables) },
+ { "ProtectSystem", "s", NULL, offsetof(struct security_info, protect_system) },
+ { "RemoveIPC", "b", NULL, offsetof(struct security_info, remove_ipc) },
+ { "RestrictAddressFamilies", "(bas)", property_read_restrict_address_families, 0 },
+ { "RestrictNamespaces", "t", NULL, offsetof(struct security_info, restrict_namespaces) },
+ { "RestrictRealtime", "b", NULL, offsetof(struct security_info, restrict_realtime) },
+ { "RootDirectory", "s", NULL, offsetof(struct security_info, root_directory) },
+ { "RootImage", "s", NULL, offsetof(struct security_info, root_image) },
+ { "SupplementaryGroups", "as", NULL, offsetof(struct security_info, supplementary_groups) },
+ { "SystemCallArchitectures", "as", NULL, offsetof(struct security_info, system_call_architectures) },
+ { "SystemCallFilter", "(as)", property_read_system_call_filter, 0 },
+ { "Type", "s", NULL, offsetof(struct security_info, type) },
+ { "UMask", "u", NULL, offsetof(struct security_info, _umask) },
+ { "User", "s", NULL, offsetof(struct security_info, user) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ /* Note: this mangles *info on failure! */
+
+ assert(bus);
+ assert(name);
+ assert(info);
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return log_oom();
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.systemd1",
+ path,
+ security_map,
+ BUS_MAP_STRDUP|BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ NULL,
+ info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit properties: %s", bus_error_message(&error, r));
+
+ if (!streq_ptr(info->load_state, "loaded")) {
+
+ if (FLAGS_SET(flags, ANALYZE_SECURITY_ONLY_LOADED))
+ return -EMEDIUMTYPE;
+
+ if (streq_ptr(info->load_state, "not-found"))
+ log_error("Unit %s not found, cannot analyze.", name);
+ else if (streq_ptr(info->load_state, "masked"))
+ log_error("Unit %s is masked, cannot analyze.", name);
+ else
+ log_error("Unit %s not loaded properly, cannot analyze.", name);
+
+ return -EINVAL;
+ }
+
+ if (FLAGS_SET(flags, ANALYZE_SECURITY_ONLY_LONG_RUNNING) && streq_ptr(info->type, "oneshot"))
+ return -EMEDIUMTYPE;
+
+ if (info->private_devices ||
+ info->private_tmp ||
+ info->protect_control_groups ||
+ info->protect_kernel_tunables ||
+ info->protect_kernel_modules ||
+ !streq_ptr(info->protect_home, "no") ||
+ !streq_ptr(info->protect_system, "no") ||
+ info->root_image)
+ info->private_mounts = true;
+
+ if (info->protect_kernel_modules)
+ info->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE);
+
+ if (info->private_devices)
+ info->capability_bounding_set &= ~((UINT64_C(1) << CAP_MKNOD) |
+ (UINT64_C(1) << CAP_SYS_RAWIO));
+
+ return 0;
+}
+
+static int analyze_security_one(sd_bus *bus, const char *name, Table* overview_table, AnalyzeSecurityFlags flags) {
+ _cleanup_(security_info_free) struct security_info info = {
+ .default_dependencies = true,
+ .capability_bounding_set = UINT64_MAX,
+ .restrict_namespaces = UINT64_MAX,
+ ._umask = 0002,
+ };
+ int r;
+
+ assert(bus);
+ assert(name);
+
+ r = acquire_security_info(bus, name, &info, flags);
+ if (r == -EMEDIUMTYPE) /* Ignore this one because not loaded or Type is oneshot */
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = assess(&info, overview_table, flags);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int analyze_security(sd_bus *bus, char **units, AnalyzeSecurityFlags flags) {
+ _cleanup_(table_unrefp) Table *overview_table = NULL;
+ int ret = 0, r;
+
+ assert(bus);
+
+ if (strv_length(units) != 1) {
+ overview_table = table_new("unit", "exposure", "predicate", "happy");
+ if (!overview_table)
+ return log_oom();
+ }
+
+ if (strv_isempty(units)) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_strv_free_ char **list = NULL;
+ size_t allocated = 0, n = 0;
+ char **i;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListUnits",
+ &error, &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list units: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ UnitInfo info;
+ char *copy = NULL;
+
+ r = bus_parse_unit_info(reply, &info);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ if (!endswith(info.id, ".service"))
+ continue;
+
+ if (!GREEDY_REALLOC(list, allocated, n+2))
+ return log_oom();
+
+ copy = strdup(info.id);
+ if (!copy)
+ return log_oom();
+
+ list[n++] = copy;
+ list[n] = NULL;
+ }
+
+ strv_sort(list);
+
+ flags |= ANALYZE_SECURITY_SHORT|ANALYZE_SECURITY_ONLY_LOADED|ANALYZE_SECURITY_ONLY_LONG_RUNNING;
+
+ STRV_FOREACH(i, list) {
+ r = analyze_security_one(bus, *i, overview_table, flags);
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+
+ } else {
+ char **i;
+
+ STRV_FOREACH(i, units) {
+ _cleanup_free_ char *mangled = NULL, *instance = NULL;
+ const char *name;
+
+ if (!FLAGS_SET(flags, ANALYZE_SECURITY_SHORT) && i != units) {
+ putc('\n', stdout);
+ fflush(stdout);
+ }
+
+ r = unit_name_mangle_with_suffix(*i, 0, ".service", &mangled);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name '%s': %m", *i);
+
+ if (!endswith(mangled, ".service")) {
+ log_error("Unit %s is not a service unit, refusing.", *i);
+ return -EINVAL;
+ }
+
+ if (unit_name_is_valid(mangled, UNIT_NAME_TEMPLATE)) {
+ r = unit_name_replace_instance(mangled, "test-instance", &instance);
+ if (r < 0)
+ return log_oom();
+
+ name = instance;
+ } else
+ name = mangled;
+
+ r = analyze_security_one(bus, name, overview_table, flags);
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+ }
+
+ if (overview_table) {
+ if (!FLAGS_SET(flags, ANALYZE_SECURITY_SHORT)) {
+ putc('\n', stdout);
+ fflush(stdout);
+ }
+
+ r = table_print(overview_table, stdout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to output table: %m");
+ }
+
+ return ret;
+}
diff --git a/src/analyze/analyze-security.h b/src/analyze/analyze-security.h
new file mode 100644
index 0000000..c00ae7c
--- /dev/null
+++ b/src/analyze/analyze-security.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+typedef enum AnalyzeSecurityFlags {
+ ANALYZE_SECURITY_SHORT = 1 << 0,
+ ANALYZE_SECURITY_ONLY_LOADED = 1 << 1,
+ ANALYZE_SECURITY_ONLY_LONG_RUNNING = 1 << 2,
+} AnalyzeSecurityFlags;
+
+int analyze_security(sd_bus *bus, char **units, AnalyzeSecurityFlags flags);
diff --git a/src/analyze/analyze-verify.c b/src/analyze/analyze-verify.c
new file mode 100644
index 0000000..1d8a1ed
--- /dev/null
+++ b/src/analyze/analyze-verify.c
@@ -0,0 +1,287 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "all-units.h"
+#include "analyze-verify.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "log.h"
+#include "manager.h"
+#include "pager.h"
+#include "path-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+static int prepare_filename(const char *filename, char **ret) {
+ int r;
+ const char *name;
+ _cleanup_free_ char *abspath = NULL;
+ _cleanup_free_ char *dir = NULL;
+ _cleanup_free_ char *with_instance = NULL;
+ char *c;
+
+ assert(filename);
+ assert(ret);
+
+ r = path_make_absolute_cwd(filename, &abspath);
+ if (r < 0)
+ return r;
+
+ name = basename(abspath);
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
+ r = unit_name_replace_instance(name, "i", &with_instance);
+ if (r < 0)
+ return r;
+ }
+
+ dir = dirname_malloc(abspath);
+ if (!dir)
+ return -ENOMEM;
+
+ c = path_join(dir, with_instance ?: name);
+ if (!c)
+ return -ENOMEM;
+
+ *ret = c;
+ return 0;
+}
+
+static int generate_path(char **var, char **filenames) {
+ const char *old;
+ char **filename;
+
+ _cleanup_strv_free_ char **ans = NULL;
+ int r;
+
+ STRV_FOREACH(filename, filenames) {
+ char *t;
+
+ t = dirname_malloc(*filename);
+ if (!t)
+ return -ENOMEM;
+
+ r = strv_consume(&ans, t);
+ if (r < 0)
+ return r;
+ }
+
+ assert_se(strv_uniq(ans));
+
+ /* First, prepend our directories. Second, if some path was specified, use that, and
+ * otherwise use the defaults. Any duplicates will be filtered out in path-lookup.c.
+ * Treat explicit empty path to mean that nothing should be appended.
+ */
+ old = getenv("SYSTEMD_UNIT_PATH");
+ if (!streq_ptr(old, "")) {
+ if (!old)
+ old = ":";
+
+ r = strv_extend(&ans, old);
+ if (r < 0)
+ return r;
+ }
+
+ *var = strv_join(ans, ":");
+ if (!*var)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int verify_socket(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (u->type != UNIT_SOCKET)
+ return 0;
+
+ /* Cannot run this without the service being around */
+
+ /* This makes sure instance is created if necessary. */
+ r = socket_instantiate_service(SOCKET(u));
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Socket cannot be started, failed to create instance: %m");
+
+ /* This checks both type of sockets */
+ if (UNIT_ISSET(SOCKET(u)->service)) {
+ Service *service;
+
+ service = SERVICE(UNIT_DEREF(SOCKET(u)->service));
+ log_unit_debug(u, "Using %s", UNIT(service)->id);
+
+ if (UNIT(service)->load_state != UNIT_LOADED) {
+ log_unit_error(u, "Service %s not loaded, %s cannot be started.", UNIT(service)->id, u->id);
+ return -ENOENT;
+ }
+ }
+
+ return 0;
+}
+
+static int verify_executable(Unit *u, ExecCommand *exec) {
+ if (!exec)
+ return 0;
+
+ if (access(exec->path, X_OK) < 0)
+ return log_unit_error_errno(u, errno, "Command %s is not executable: %m", exec->path);
+
+ return 0;
+}
+
+static int verify_executables(Unit *u) {
+ ExecCommand *exec;
+ int r = 0, k;
+ unsigned i;
+
+ assert(u);
+
+ exec = u->type == UNIT_SOCKET ? SOCKET(u)->control_command :
+ u->type == UNIT_MOUNT ? MOUNT(u)->control_command :
+ u->type == UNIT_SWAP ? SWAP(u)->control_command : NULL;
+ k = verify_executable(u, exec);
+ if (k < 0 && r == 0)
+ r = k;
+
+ if (u->type == UNIT_SERVICE)
+ for (i = 0; i < ELEMENTSOF(SERVICE(u)->exec_command); i++) {
+ k = verify_executable(u, SERVICE(u)->exec_command[i]);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ if (u->type == UNIT_SOCKET)
+ for (i = 0; i < ELEMENTSOF(SOCKET(u)->exec_command); i++) {
+ k = verify_executable(u, SOCKET(u)->exec_command[i]);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int verify_documentation(Unit *u, bool check_man) {
+ char **p;
+ int r = 0, k;
+
+ STRV_FOREACH(p, u->documentation) {
+ log_unit_debug(u, "Found documentation item: %s", *p);
+
+ if (check_man && startswith(*p, "man:")) {
+ k = show_man_page(*p + 4, true);
+ if (k != 0) {
+ if (k < 0)
+ log_unit_error_errno(u, k, "Can't show %s: %m", *p + 4);
+ else {
+ log_unit_error(u, "Command 'man %s' failed with code %d", *p + 4, k);
+ k = -ENOEXEC;
+ }
+ if (r == 0)
+ r = k;
+ }
+ }
+ }
+
+ /* Check remote URLs? */
+
+ return r;
+}
+
+static int verify_unit(Unit *u, bool check_man) {
+ _cleanup_(sd_bus_error_free) sd_bus_error err = SD_BUS_ERROR_NULL;
+ int r, k;
+
+ assert(u);
+
+ if (DEBUG_LOGGING)
+ unit_dump(u, stdout, "\t");
+
+ log_unit_debug(u, "Creating %s/start job", u->id);
+ r = manager_add_job(u->manager, JOB_START, u, JOB_REPLACE, &err, NULL);
+ if (r < 0)
+ log_unit_error_errno(u, r, "Failed to create %s/start: %s", u->id, bus_error_message(&err, r));
+
+ k = verify_socket(u);
+ if (k < 0 && r == 0)
+ r = k;
+
+ k = verify_executables(u);
+ if (k < 0 && r == 0)
+ r = k;
+
+ k = verify_documentation(u, check_man);
+ if (k < 0 && r == 0)
+ r = k;
+
+ return r;
+}
+
+int verify_units(char **filenames, UnitFileScope scope, bool check_man, bool run_generators) {
+ const ManagerTestRunFlags flags =
+ MANAGER_TEST_RUN_BASIC |
+ MANAGER_TEST_RUN_ENV_GENERATORS |
+ run_generators * MANAGER_TEST_RUN_GENERATORS;
+
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *units[strv_length(filenames)];
+ _cleanup_free_ char *var = NULL;
+ int r = 0, k, i, count = 0;
+ char **filename;
+
+ if (strv_isempty(filenames))
+ return 0;
+
+ /* set the path */
+ r = generate_path(&var, filenames);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit load path: %m");
+
+ assert_se(set_unit_path(var) >= 0);
+
+ r = manager_new(scope, flags, &m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize manager: %m");
+
+ log_debug("Starting manager...");
+
+ r = manager_startup(m, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ manager_clear_jobs(m);
+
+ log_debug("Loading remaining units from the command line...");
+
+ STRV_FOREACH(filename, filenames) {
+ _cleanup_free_ char *prepared = NULL;
+
+ log_debug("Handling %s...", *filename);
+
+ k = prepare_filename(*filename, &prepared);
+ if (k < 0) {
+ log_error_errno(k, "Failed to prepare filename %s: %m", *filename);
+ if (r == 0)
+ r = k;
+ continue;
+ }
+
+ k = manager_load_startable_unit_or_warn(m, NULL, prepared, &units[count]);
+ if (k < 0 && r == 0)
+ r = k;
+ else
+ count++;
+ }
+
+ for (i = 0; i < count; i++) {
+ k = verify_unit(units[i], check_man);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
diff --git a/src/analyze/analyze-verify.h b/src/analyze/analyze-verify.h
new file mode 100644
index 0000000..f6b7f54
--- /dev/null
+++ b/src/analyze/analyze-verify.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "path-lookup.h"
+
+int verify_units(
+ char **filenames,
+ UnitFileScope scope,
+ bool check_man,
+ bool run_generators);
diff --git a/src/analyze/analyze.c b/src/analyze/analyze.c
new file mode 100644
index 0000000..3915b66
--- /dev/null
+++ b/src/analyze/analyze.c
@@ -0,0 +1,2053 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Simon Peeters
+***/
+
+#include <getopt.h>
+#include <inttypes.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "analyze-security.h"
+#include "analyze-verify.h"
+#include "build.h"
+#include "bus-error.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "calendarspec.h"
+#include "conf-files.h"
+#include "copy.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "glob-util.h"
+#include "hashmap.h"
+#include "locale-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#if HAVE_SECCOMP
+# include "seccomp-util.h"
+#endif
+#include "special.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "time-util.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+#include "util.h"
+#include "verbs.h"
+
+#define SCALE_X (0.1 / 1000.0) /* pixels per us */
+#define SCALE_Y (20.0)
+
+#define svg(...) printf(__VA_ARGS__)
+
+#define svg_bar(class, x1, x2, y) \
+ svg(" <rect class=\"%s\" x=\"%.03f\" y=\"%.03f\" width=\"%.03f\" height=\"%.03f\" />\n", \
+ (class), \
+ SCALE_X * (x1), SCALE_Y * (y), \
+ SCALE_X * ((x2) - (x1)), SCALE_Y - 1.0)
+
+#define svg_text(b, x, y, format, ...) \
+ do { \
+ svg(" <text class=\"%s\" x=\"%.03f\" y=\"%.03f\">", (b) ? "left" : "right", SCALE_X * (x) + (b ? 5.0 : -5.0), SCALE_Y * (y) + 14.0); \
+ svg(format, ## __VA_ARGS__); \
+ svg("</text>\n"); \
+ } while (false)
+
+static enum dot {
+ DEP_ALL,
+ DEP_ORDER,
+ DEP_REQUIRE
+} arg_dot = DEP_ALL;
+static char** arg_dot_from_patterns = NULL;
+static char** arg_dot_to_patterns = NULL;
+static usec_t arg_fuzz = 0;
+static PagerFlags arg_pager_flags = 0;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static UnitFileScope arg_scope = UNIT_FILE_SYSTEM;
+static bool arg_man = true;
+static bool arg_generators = false;
+static const char *arg_root = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_dot_from_patterns, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_dot_to_patterns, strv_freep);
+
+struct boot_times {
+ usec_t firmware_time;
+ usec_t loader_time;
+ usec_t kernel_time;
+ usec_t kernel_done_time;
+ usec_t initrd_time;
+ usec_t userspace_time;
+ usec_t finish_time;
+ usec_t security_start_time;
+ usec_t security_finish_time;
+ usec_t generators_start_time;
+ usec_t generators_finish_time;
+ usec_t unitsload_start_time;
+ usec_t unitsload_finish_time;
+ usec_t initrd_security_start_time;
+ usec_t initrd_security_finish_time;
+ usec_t initrd_generators_start_time;
+ usec_t initrd_generators_finish_time;
+ usec_t initrd_unitsload_start_time;
+ usec_t initrd_unitsload_finish_time;
+
+ /*
+ * If we're analyzing the user instance, all timestamps will be offset
+ * by its own start-up timestamp, which may be arbitrarily big.
+ * With "plot", this causes arbitrarily wide output SVG files which almost
+ * completely consist of empty space. Thus we cancel out this offset.
+ *
+ * This offset is subtracted from times above by acquire_boot_times(),
+ * but it still needs to be subtracted from unit-specific timestamps
+ * (so it is stored here for reference).
+ */
+ usec_t reverse_offset;
+};
+
+struct unit_times {
+ bool has_data;
+ char *name;
+ usec_t activating;
+ usec_t activated;
+ usec_t deactivated;
+ usec_t deactivating;
+ usec_t time;
+};
+
+struct host_info {
+ char *hostname;
+ char *kernel_name;
+ char *kernel_release;
+ char *kernel_version;
+ char *os_pretty_name;
+ char *virtualization;
+ char *architecture;
+};
+
+static int acquire_bus(sd_bus **bus, bool *use_full_bus) {
+ bool user = arg_scope != UNIT_FILE_SYSTEM;
+ int r;
+
+ if (use_full_bus && *use_full_bus) {
+ r = bus_connect_transport(arg_transport, arg_host, user, bus);
+ if (IN_SET(r, 0, -EHOSTDOWN))
+ return r;
+
+ *use_full_bus = false;
+ }
+
+ return bus_connect_transport_systemd(arg_transport, arg_host, user, bus);
+}
+
+static int bus_get_uint64_property(sd_bus *bus, const char *path, const char *interface, const char *property, uint64_t *val) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(property);
+ assert(val);
+
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ interface,
+ property,
+ &error,
+ 't', val);
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse reply: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int bus_get_unit_property_strv(sd_bus *bus, const char *path, const char *property, char ***strv) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(property);
+ assert(strv);
+
+ r = sd_bus_get_property_strv(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ property,
+ &error,
+ strv);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit property %s: %s", property, bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int compare_unit_time(const struct unit_times *a, const struct unit_times *b) {
+ return CMP(b->time, a->time);
+}
+
+static int compare_unit_start(const struct unit_times *a, const struct unit_times *b) {
+ return CMP(a->activating, b->activating);
+}
+
+static void unit_times_free(struct unit_times *t) {
+ struct unit_times *p;
+
+ for (p = t; p->has_data; p++)
+ free(p->name);
+ free(t);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct unit_times *, unit_times_free);
+
+static void subtract_timestamp(usec_t *a, usec_t b) {
+ assert(a);
+
+ if (*a > 0) {
+ assert(*a >= b);
+ *a -= b;
+ }
+}
+
+static int acquire_boot_times(sd_bus *bus, struct boot_times **bt) {
+ static const struct bus_properties_map property_map[] = {
+ { "FirmwareTimestampMonotonic", "t", NULL, offsetof(struct boot_times, firmware_time) },
+ { "LoaderTimestampMonotonic", "t", NULL, offsetof(struct boot_times, loader_time) },
+ { "KernelTimestamp", "t", NULL, offsetof(struct boot_times, kernel_time) },
+ { "InitRDTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_time) },
+ { "UserspaceTimestampMonotonic", "t", NULL, offsetof(struct boot_times, userspace_time) },
+ { "FinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, finish_time) },
+ { "SecurityStartTimestampMonotonic", "t", NULL, offsetof(struct boot_times, security_start_time) },
+ { "SecurityFinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, security_finish_time) },
+ { "GeneratorsStartTimestampMonotonic", "t", NULL, offsetof(struct boot_times, generators_start_time) },
+ { "GeneratorsFinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, generators_finish_time) },
+ { "UnitsLoadStartTimestampMonotonic", "t", NULL, offsetof(struct boot_times, unitsload_start_time) },
+ { "UnitsLoadFinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, unitsload_finish_time) },
+ { "InitRDSecurityStartTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_security_start_time) },
+ { "InitRDSecurityFinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_security_finish_time) },
+ { "InitRDGeneratorsStartTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_generators_start_time) },
+ { "InitRDGeneratorsFinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_generators_finish_time) },
+ { "InitRDUnitsLoadStartTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_unitsload_start_time) },
+ { "InitRDUnitsLoadFinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_unitsload_finish_time) },
+ {},
+ };
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ static struct boot_times times;
+ static bool cached = false;
+ int r;
+
+ if (cached)
+ goto finish;
+
+ assert_cc(sizeof(usec_t) == sizeof(uint64_t));
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ property_map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ &times);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get timestamp properties: %s", bus_error_message(&error, r));
+
+ if (times.finish_time <= 0) {
+ log_error("Bootup is not yet finished (org.freedesktop.systemd1.Manager.FinishTimestampMonotonic=%"PRIu64").\n"
+ "Please try again later.\n"
+ "Hint: Use 'systemctl%s list-jobs' to see active jobs",
+ times.finish_time,
+ arg_scope == UNIT_FILE_SYSTEM ? "" : " --user");
+ return -EINPROGRESS;
+ }
+
+ if (arg_scope == UNIT_FILE_SYSTEM && times.security_start_time > 0) {
+ /* security_start_time is set when systemd is not running under container environment. */
+ if (times.initrd_time > 0)
+ times.kernel_done_time = times.initrd_time;
+ else
+ times.kernel_done_time = times.userspace_time;
+ } else {
+ /*
+ * User-instance-specific or container-system-specific timestamps processing
+ * (see comment to reverse_offset in struct boot_times).
+ */
+ times.reverse_offset = times.userspace_time;
+
+ times.firmware_time = times.loader_time = times.kernel_time = times.initrd_time = times.userspace_time =
+ times.security_start_time = times.security_finish_time = 0;
+
+ subtract_timestamp(&times.finish_time, times.reverse_offset);
+
+ subtract_timestamp(&times.generators_start_time, times.reverse_offset);
+ subtract_timestamp(&times.generators_finish_time, times.reverse_offset);
+
+ subtract_timestamp(&times.unitsload_start_time, times.reverse_offset);
+ subtract_timestamp(&times.unitsload_finish_time, times.reverse_offset);
+ }
+
+ cached = true;
+
+finish:
+ *bt = &times;
+ return 0;
+}
+
+static void free_host_info(struct host_info *hi) {
+
+ if (!hi)
+ return;
+
+ free(hi->hostname);
+ free(hi->kernel_name);
+ free(hi->kernel_release);
+ free(hi->kernel_version);
+ free(hi->os_pretty_name);
+ free(hi->virtualization);
+ free(hi->architecture);
+ free(hi);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct host_info*, free_host_info);
+
+static int acquire_time_data(sd_bus *bus, struct unit_times **out) {
+ static const struct bus_properties_map property_map[] = {
+ { "InactiveExitTimestampMonotonic", "t", NULL, offsetof(struct unit_times, activating) },
+ { "ActiveEnterTimestampMonotonic", "t", NULL, offsetof(struct unit_times, activated) },
+ { "ActiveExitTimestampMonotonic", "t", NULL, offsetof(struct unit_times, deactivating) },
+ { "InactiveEnterTimestampMonotonic", "t", NULL, offsetof(struct unit_times, deactivated) },
+ {},
+ };
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(unit_times_freep) struct unit_times *unit_times = NULL;
+ struct boot_times *boot_times = NULL;
+ size_t allocated = 0, c = 0;
+ UnitInfo u;
+ int r;
+
+ r = acquire_boot_times(bus, &boot_times);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListUnits",
+ &error, &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list units: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = bus_parse_unit_info(reply, &u)) > 0) {
+ struct unit_times *t;
+
+ if (!GREEDY_REALLOC(unit_times, allocated, c+2))
+ return log_oom();
+
+ unit_times[c+1].has_data = false;
+ t = &unit_times[c];
+ t->name = NULL;
+
+ assert_cc(sizeof(usec_t) == sizeof(uint64_t));
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ u.unit_path,
+ property_map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get timestamp properties of unit %s: %s", u.id, bus_error_message(&error, r));
+
+ subtract_timestamp(&t->activating, boot_times->reverse_offset);
+ subtract_timestamp(&t->activated, boot_times->reverse_offset);
+ subtract_timestamp(&t->deactivating, boot_times->reverse_offset);
+ subtract_timestamp(&t->deactivated, boot_times->reverse_offset);
+
+ if (t->activated >= t->activating)
+ t->time = t->activated - t->activating;
+ else if (t->deactivated >= t->activating)
+ t->time = t->deactivated - t->activating;
+ else
+ t->time = 0;
+
+ if (t->activating == 0)
+ continue;
+
+ t->name = strdup(u.id);
+ if (!t->name)
+ return log_oom();
+
+ t->has_data = true;
+ c++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ *out = TAKE_PTR(unit_times);
+ return c;
+}
+
+static int acquire_host_info(sd_bus *bus, struct host_info **hi) {
+ static const struct bus_properties_map hostname_map[] = {
+ { "Hostname", "s", NULL, offsetof(struct host_info, hostname) },
+ { "KernelName", "s", NULL, offsetof(struct host_info, kernel_name) },
+ { "KernelRelease", "s", NULL, offsetof(struct host_info, kernel_release) },
+ { "KernelVersion", "s", NULL, offsetof(struct host_info, kernel_version) },
+ { "OperatingSystemPrettyName", "s", NULL, offsetof(struct host_info, os_pretty_name) },
+ {}
+ };
+
+ static const struct bus_properties_map manager_map[] = {
+ { "Virtualization", "s", NULL, offsetof(struct host_info, virtualization) },
+ { "Architecture", "s", NULL, offsetof(struct host_info, architecture) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *system_bus = NULL;
+ _cleanup_(free_host_infop) struct host_info *host;
+ int r;
+
+ host = new0(struct host_info, 1);
+ if (!host)
+ return log_oom();
+
+ if (arg_scope != UNIT_FILE_SYSTEM) {
+ r = bus_connect_transport(arg_transport, arg_host, false, &system_bus);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to connect to system bus, ignoring: %m");
+ goto manager;
+ }
+ }
+
+ r = bus_map_all_properties(system_bus ?: bus,
+ "org.freedesktop.hostname1",
+ "/org/freedesktop/hostname1",
+ hostname_map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ host);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to get host information from systemd-hostnamed, ignoring: %s", bus_error_message(&error, r));
+ sd_bus_error_free(&error);
+ }
+
+manager:
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ manager_map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ host);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get host information from systemd: %s", bus_error_message(&error, r));
+
+ *hi = TAKE_PTR(host);
+
+ return 0;
+}
+
+static int pretty_boot_time(sd_bus *bus, char **_buf) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ struct boot_times *t;
+ static char buf[4096];
+ size_t size;
+ char *ptr;
+ int r;
+ usec_t activated_time = USEC_INFINITY;
+ _cleanup_free_ char* path = NULL, *unit_id = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = acquire_boot_times(bus, &t);
+ if (r < 0)
+ return r;
+
+ path = unit_dbus_path_from_name(SPECIAL_DEFAULT_TARGET);
+ if (!path)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "Id",
+ &error,
+ &unit_id);
+ if (r < 0) {
+ log_error_errno(r, "default.target doesn't seem to exist: %s", bus_error_message(&error, r));
+ unit_id = NULL;
+ }
+
+ r = bus_get_uint64_property(bus, path,
+ "org.freedesktop.systemd1.Unit",
+ "ActiveEnterTimestampMonotonic",
+ &activated_time);
+ if (r < 0) {
+ log_info_errno(r, "Could not get time to reach default.target, ignoring: %m");
+ activated_time = USEC_INFINITY;
+ }
+
+ ptr = buf;
+ size = sizeof(buf);
+
+ size = strpcpyf(&ptr, size, "Startup finished in ");
+ if (t->firmware_time > 0)
+ size = strpcpyf(&ptr, size, "%s (firmware) + ", format_timespan(ts, sizeof(ts), t->firmware_time - t->loader_time, USEC_PER_MSEC));
+ if (t->loader_time > 0)
+ size = strpcpyf(&ptr, size, "%s (loader) + ", format_timespan(ts, sizeof(ts), t->loader_time, USEC_PER_MSEC));
+ if (t->kernel_done_time > 0)
+ size = strpcpyf(&ptr, size, "%s (kernel) + ", format_timespan(ts, sizeof(ts), t->kernel_done_time, USEC_PER_MSEC));
+ if (t->initrd_time > 0)
+ size = strpcpyf(&ptr, size, "%s (initrd) + ", format_timespan(ts, sizeof(ts), t->userspace_time - t->initrd_time, USEC_PER_MSEC));
+
+ size = strpcpyf(&ptr, size, "%s (userspace) ", format_timespan(ts, sizeof(ts), t->finish_time - t->userspace_time, USEC_PER_MSEC));
+ if (t->kernel_done_time > 0)
+ strpcpyf(&ptr, size, "= %s ", format_timespan(ts, sizeof(ts), t->firmware_time + t->finish_time, USEC_PER_MSEC));
+
+ if (unit_id && activated_time > 0 && activated_time != USEC_INFINITY) {
+ usec_t base = t->userspace_time > 0 ? t->userspace_time : t->reverse_offset;
+
+ size = strpcpyf(&ptr, size, "\n%s reached after %s in userspace", unit_id,
+ format_timespan(ts, sizeof(ts), activated_time - base, USEC_PER_MSEC));
+ } else if (unit_id && activated_time == 0)
+ size = strpcpyf(&ptr, size, "\n%s was never reached", unit_id);
+ else if (unit_id && activated_time == USEC_INFINITY)
+ size = strpcpyf(&ptr, size, "\nCould not get time to reach %s.", unit_id);
+ else if (!unit_id)
+ size = strpcpyf(&ptr, size, "\ncould not find default.target");
+
+ ptr = strdup(buf);
+ if (!ptr)
+ return log_oom();
+
+ *_buf = ptr;
+ return 0;
+}
+
+static void svg_graph_box(double height, double begin, double end) {
+ long long i;
+
+ /* outside box, fill */
+ svg("<rect class=\"box\" x=\"0\" y=\"0\" width=\"%.03f\" height=\"%.03f\" />\n",
+ SCALE_X * (end - begin), SCALE_Y * height);
+
+ for (i = ((long long) (begin / 100000)) * 100000; i <= end; i+=100000) {
+ /* lines for each second */
+ if (i % 5000000 == 0)
+ svg(" <line class=\"sec5\" x1=\"%.03f\" y1=\"0\" x2=\"%.03f\" y2=\"%.03f\" />\n"
+ " <text class=\"sec\" x=\"%.03f\" y=\"%.03f\" >%.01fs</text>\n",
+ SCALE_X * i, SCALE_X * i, SCALE_Y * height, SCALE_X * i, -5.0, 0.000001 * i);
+ else if (i % 1000000 == 0)
+ svg(" <line class=\"sec1\" x1=\"%.03f\" y1=\"0\" x2=\"%.03f\" y2=\"%.03f\" />\n"
+ " <text class=\"sec\" x=\"%.03f\" y=\"%.03f\" >%.01fs</text>\n",
+ SCALE_X * i, SCALE_X * i, SCALE_Y * height, SCALE_X * i, -5.0, 0.000001 * i);
+ else
+ svg(" <line class=\"sec01\" x1=\"%.03f\" y1=\"0\" x2=\"%.03f\" y2=\"%.03f\" />\n",
+ SCALE_X * i, SCALE_X * i, SCALE_Y * height);
+ }
+}
+
+static int plot_unit_times(struct unit_times *u, double width, int y) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ bool b;
+
+ if (!u->name)
+ return 0;
+
+ svg_bar("activating", u->activating, u->activated, y);
+ svg_bar("active", u->activated, u->deactivating, y);
+ svg_bar("deactivating", u->deactivating, u->deactivated, y);
+
+ /* place the text on the left if we have passed the half of the svg width */
+ b = u->activating * SCALE_X < width / 2;
+ if (u->time)
+ svg_text(b, u->activating, y, "%s (%s)",
+ u->name, format_timespan(ts, sizeof(ts), u->time, USEC_PER_MSEC));
+ else
+ svg_text(b, u->activating, y, "%s", u->name);
+
+ return 1;
+}
+
+static int analyze_plot(int argc, char *argv[], void *userdata) {
+ _cleanup_(free_host_infop) struct host_info *host = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(unit_times_freep) struct unit_times *times = NULL;
+ _cleanup_free_ char *pretty_times = NULL;
+ bool use_full_bus = arg_scope == UNIT_FILE_SYSTEM;
+ struct boot_times *boot;
+ struct unit_times *u;
+ int n, m = 1, y = 0, r;
+ double width;
+
+ r = acquire_bus(&bus, &use_full_bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ n = acquire_boot_times(bus, &boot);
+ if (n < 0)
+ return n;
+
+ n = pretty_boot_time(bus, &pretty_times);
+ if (n < 0)
+ return n;
+
+ if (use_full_bus || arg_scope != UNIT_FILE_SYSTEM) {
+ n = acquire_host_info(bus, &host);
+ if (n < 0)
+ return n;
+ }
+
+ n = acquire_time_data(bus, &times);
+ if (n <= 0)
+ return n;
+
+ typesafe_qsort(times, n, compare_unit_start);
+
+ width = SCALE_X * (boot->firmware_time + boot->finish_time);
+ if (width < 800.0)
+ width = 800.0;
+
+ if (boot->firmware_time > boot->loader_time)
+ m++;
+ if (boot->loader_time > 0) {
+ m++;
+ if (width < 1000.0)
+ width = 1000.0;
+ }
+ if (boot->initrd_time > 0)
+ m++;
+ if (boot->kernel_done_time > 0)
+ m++;
+
+ for (u = times; u->has_data; u++) {
+ double text_start, text_width;
+
+ if (u->activating > boot->finish_time) {
+ u->name = mfree(u->name);
+ continue;
+ }
+
+ /* If the text cannot fit on the left side then
+ * increase the svg width so it fits on the right.
+ * TODO: calculate the text width more accurately */
+ text_width = 8.0 * strlen(u->name);
+ text_start = (boot->firmware_time + u->activating) * SCALE_X;
+ if (text_width > text_start && text_width + text_start > width)
+ width = text_width + text_start;
+
+ if (u->deactivated > u->activating &&
+ u->deactivated <= boot->finish_time &&
+ u->activated == 0 && u->deactivating == 0)
+ u->activated = u->deactivating = u->deactivated;
+ if (u->activated < u->activating || u->activated > boot->finish_time)
+ u->activated = boot->finish_time;
+ if (u->deactivating < u->activated || u->deactivating > boot->finish_time)
+ u->deactivating = boot->finish_time;
+ if (u->deactivated < u->deactivating || u->deactivated > boot->finish_time)
+ u->deactivated = boot->finish_time;
+ m++;
+ }
+
+ svg("<?xml version=\"1.0\" standalone=\"no\"?>\n"
+ "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\" "
+ "\"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n");
+
+ svg("<svg width=\"%.0fpx\" height=\"%.0fpx\" version=\"1.1\" "
+ "xmlns=\"http://www.w3.org/2000/svg\">\n\n",
+ 80.0 + width, 150.0 + (m * SCALE_Y) +
+ 5 * SCALE_Y /* legend */);
+
+ /* write some basic info as a comment, including some help */
+ svg("<!-- This file is a systemd-analyze SVG file. It is best rendered in a -->\n"
+ "<!-- browser such as Chrome, Chromium or Firefox. Other applications -->\n"
+ "<!-- that render these files properly but much slower are ImageMagick, -->\n"
+ "<!-- gimp, inkscape, etc. To display the files on your system, just -->\n"
+ "<!-- point your browser to this file. -->\n\n"
+ "<!-- This plot was generated by systemd-analyze version %-16.16s -->\n\n", GIT_VERSION);
+
+ /* style sheet */
+ svg("<defs>\n <style type=\"text/css\">\n <![CDATA[\n"
+ " rect { stroke-width: 1; stroke-opacity: 0; }\n"
+ " rect.background { fill: rgb(255,255,255); }\n"
+ " rect.activating { fill: rgb(255,0,0); fill-opacity: 0.7; }\n"
+ " rect.active { fill: rgb(200,150,150); fill-opacity: 0.7; }\n"
+ " rect.deactivating { fill: rgb(150,100,100); fill-opacity: 0.7; }\n"
+ " rect.kernel { fill: rgb(150,150,150); fill-opacity: 0.7; }\n"
+ " rect.initrd { fill: rgb(150,150,150); fill-opacity: 0.7; }\n"
+ " rect.firmware { fill: rgb(150,150,150); fill-opacity: 0.7; }\n"
+ " rect.loader { fill: rgb(150,150,150); fill-opacity: 0.7; }\n"
+ " rect.userspace { fill: rgb(150,150,150); fill-opacity: 0.7; }\n"
+ " rect.security { fill: rgb(144,238,144); fill-opacity: 0.7; }\n"
+ " rect.generators { fill: rgb(102,204,255); fill-opacity: 0.7; }\n"
+ " rect.unitsload { fill: rgb( 82,184,255); fill-opacity: 0.7; }\n"
+ " rect.box { fill: rgb(240,240,240); stroke: rgb(192,192,192); }\n"
+ " line { stroke: rgb(64,64,64); stroke-width: 1; }\n"
+ "// line.sec1 { }\n"
+ " line.sec5 { stroke-width: 2; }\n"
+ " line.sec01 { stroke: rgb(224,224,224); stroke-width: 1; }\n"
+ " text { font-family: Verdana, Helvetica; font-size: 14px; }\n"
+ " text.left { font-family: Verdana, Helvetica; font-size: 14px; text-anchor: start; }\n"
+ " text.right { font-family: Verdana, Helvetica; font-size: 14px; text-anchor: end; }\n"
+ " text.sec { font-size: 10px; }\n"
+ " ]]>\n </style>\n</defs>\n\n");
+
+ svg("<rect class=\"background\" width=\"100%%\" height=\"100%%\" />\n");
+ svg("<text x=\"20\" y=\"50\">%s</text>", pretty_times);
+ if (host)
+ svg("<text x=\"20\" y=\"30\">%s %s (%s %s %s) %s %s</text>",
+ isempty(host->os_pretty_name) ? "Linux" : host->os_pretty_name,
+ strempty(host->hostname),
+ strempty(host->kernel_name),
+ strempty(host->kernel_release),
+ strempty(host->kernel_version),
+ strempty(host->architecture),
+ strempty(host->virtualization));
+
+ svg("<g transform=\"translate(%.3f,100)\">\n", 20.0 + (SCALE_X * boot->firmware_time));
+ svg_graph_box(m, -(double) boot->firmware_time, boot->finish_time);
+
+ if (boot->firmware_time > 0) {
+ svg_bar("firmware", -(double) boot->firmware_time, -(double) boot->loader_time, y);
+ svg_text(true, -(double) boot->firmware_time, y, "firmware");
+ y++;
+ }
+ if (boot->loader_time > 0) {
+ svg_bar("loader", -(double) boot->loader_time, 0, y);
+ svg_text(true, -(double) boot->loader_time, y, "loader");
+ y++;
+ }
+ if (boot->kernel_done_time > 0) {
+ svg_bar("kernel", 0, boot->kernel_done_time, y);
+ svg_text(true, 0, y, "kernel");
+ y++;
+ }
+ if (boot->initrd_time > 0) {
+ svg_bar("initrd", boot->initrd_time, boot->userspace_time, y);
+ if (boot->initrd_security_start_time < boot->initrd_security_finish_time)
+ svg_bar("security", boot->initrd_security_start_time, boot->initrd_security_finish_time, y);
+ if (boot->initrd_generators_start_time < boot->initrd_generators_finish_time)
+ svg_bar("generators", boot->initrd_generators_start_time, boot->initrd_generators_finish_time, y);
+ if (boot->initrd_unitsload_start_time < boot->initrd_unitsload_finish_time)
+ svg_bar("unitsload", boot->initrd_unitsload_start_time, boot->initrd_unitsload_finish_time, y);
+ svg_text(true, boot->initrd_time, y, "initrd");
+ y++;
+ }
+
+ for (u = times; u->has_data; u++) {
+ if (u->activating >= boot->userspace_time)
+ break;
+
+ y += plot_unit_times(u, width, y);
+ }
+
+ svg_bar("active", boot->userspace_time, boot->finish_time, y);
+ if (boot->security_start_time > 0)
+ svg_bar("security", boot->security_start_time, boot->security_finish_time, y);
+ svg_bar("generators", boot->generators_start_time, boot->generators_finish_time, y);
+ svg_bar("unitsload", boot->unitsload_start_time, boot->unitsload_finish_time, y);
+ svg_text(true, boot->userspace_time, y, "systemd");
+ y++;
+
+ for (; u->has_data; u++)
+ y += plot_unit_times(u, width, y);
+
+ svg("</g>\n");
+
+ /* Legend */
+ svg("<g transform=\"translate(20,100)\">\n");
+ y++;
+ svg_bar("activating", 0, 300000, y);
+ svg_text(true, 400000, y, "Activating");
+ y++;
+ svg_bar("active", 0, 300000, y);
+ svg_text(true, 400000, y, "Active");
+ y++;
+ svg_bar("deactivating", 0, 300000, y);
+ svg_text(true, 400000, y, "Deactivating");
+ y++;
+ if (boot->security_start_time > 0) {
+ svg_bar("security", 0, 300000, y);
+ svg_text(true, 400000, y, "Setting up security module");
+ y++;
+ }
+ svg_bar("generators", 0, 300000, y);
+ svg_text(true, 400000, y, "Generators");
+ y++;
+ svg_bar("unitsload", 0, 300000, y);
+ svg_text(true, 400000, y, "Loading unit files");
+ y++;
+
+ svg("</g>\n\n");
+
+ svg("</svg>\n");
+
+ return 0;
+}
+
+static int list_dependencies_print(const char *name, unsigned level, unsigned branches,
+ bool last, struct unit_times *times, struct boot_times *boot) {
+ unsigned i;
+ char ts[FORMAT_TIMESPAN_MAX], ts2[FORMAT_TIMESPAN_MAX];
+
+ for (i = level; i != 0; i--)
+ printf("%s", special_glyph(branches & (1 << (i-1)) ? SPECIAL_GLYPH_TREE_VERTICAL : SPECIAL_GLYPH_TREE_SPACE));
+
+ printf("%s", special_glyph(last ? SPECIAL_GLYPH_TREE_RIGHT : SPECIAL_GLYPH_TREE_BRANCH));
+
+ if (times) {
+ if (times->time > 0)
+ printf("%s%s @%s +%s%s", ansi_highlight_red(), name,
+ format_timespan(ts, sizeof(ts), times->activating - boot->userspace_time, USEC_PER_MSEC),
+ format_timespan(ts2, sizeof(ts2), times->time, USEC_PER_MSEC), ansi_normal());
+ else if (times->activated > boot->userspace_time)
+ printf("%s @%s", name, format_timespan(ts, sizeof(ts), times->activated - boot->userspace_time, USEC_PER_MSEC));
+ else
+ printf("%s", name);
+ } else
+ printf("%s", name);
+ printf("\n");
+
+ return 0;
+}
+
+static int list_dependencies_get_dependencies(sd_bus *bus, const char *name, char ***deps) {
+ _cleanup_free_ char *path = NULL;
+
+ assert(bus);
+ assert(name);
+ assert(deps);
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return -ENOMEM;
+
+ return bus_get_unit_property_strv(bus, path, "After", deps);
+}
+
+static Hashmap *unit_times_hashmap;
+
+static int list_dependencies_compare(char * const *a, char * const *b) {
+ usec_t usa = 0, usb = 0;
+ struct unit_times *times;
+
+ times = hashmap_get(unit_times_hashmap, *a);
+ if (times)
+ usa = times->activated;
+ times = hashmap_get(unit_times_hashmap, *b);
+ if (times)
+ usb = times->activated;
+
+ return CMP(usb, usa);
+}
+
+static bool times_in_range(const struct unit_times *times, const struct boot_times *boot) {
+ return times &&
+ times->activated > 0 && times->activated <= boot->finish_time;
+}
+
+static int list_dependencies_one(sd_bus *bus, const char *name, unsigned level, char ***units,
+ unsigned branches) {
+ _cleanup_strv_free_ char **deps = NULL;
+ char **c;
+ int r = 0;
+ usec_t service_longest = 0;
+ int to_print = 0;
+ struct unit_times *times;
+ struct boot_times *boot;
+
+ if (strv_extend(units, name))
+ return log_oom();
+
+ r = list_dependencies_get_dependencies(bus, name, &deps);
+ if (r < 0)
+ return r;
+
+ typesafe_qsort(deps, strv_length(deps), list_dependencies_compare);
+
+ r = acquire_boot_times(bus, &boot);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(c, deps) {
+ times = hashmap_get(unit_times_hashmap, *c);
+ if (times_in_range(times, boot) &&
+ times->activated >= service_longest)
+ service_longest = times->activated;
+ }
+
+ if (service_longest == 0)
+ return r;
+
+ STRV_FOREACH(c, deps) {
+ times = hashmap_get(unit_times_hashmap, *c);
+ if (times_in_range(times, boot) &&
+ service_longest - times->activated <= arg_fuzz)
+ to_print++;
+ }
+
+ if (!to_print)
+ return r;
+
+ STRV_FOREACH(c, deps) {
+ times = hashmap_get(unit_times_hashmap, *c);
+ if (!times_in_range(times, boot) ||
+ service_longest - times->activated > arg_fuzz)
+ continue;
+
+ to_print--;
+
+ r = list_dependencies_print(*c, level, branches, to_print == 0, times, boot);
+ if (r < 0)
+ return r;
+
+ if (strv_contains(*units, *c)) {
+ r = list_dependencies_print("...", level + 1, (branches << 1) | (to_print ? 1 : 0),
+ true, NULL, boot);
+ if (r < 0)
+ return r;
+ continue;
+ }
+
+ r = list_dependencies_one(bus, *c, level + 1, units,
+ (branches << 1) | (to_print ? 1 : 0));
+ if (r < 0)
+ return r;
+
+ if (to_print == 0)
+ break;
+ }
+ return 0;
+}
+
+static int list_dependencies(sd_bus *bus, const char *name) {
+ _cleanup_strv_free_ char **units = NULL;
+ char ts[FORMAT_TIMESPAN_MAX];
+ struct unit_times *times;
+ int r;
+ const char *id;
+ _cleanup_free_ char *path = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ struct boot_times *boot;
+
+ assert(bus);
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_get_property(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "Id",
+ &error,
+ &reply,
+ "s");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get ID: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &id);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ times = hashmap_get(unit_times_hashmap, id);
+
+ r = acquire_boot_times(bus, &boot);
+ if (r < 0)
+ return r;
+
+ if (times) {
+ if (times->time)
+ printf("%s%s +%s%s\n", ansi_highlight_red(), id,
+ format_timespan(ts, sizeof(ts), times->time, USEC_PER_MSEC), ansi_normal());
+ else if (times->activated > boot->userspace_time)
+ printf("%s @%s\n", id, format_timespan(ts, sizeof(ts), times->activated - boot->userspace_time, USEC_PER_MSEC));
+ else
+ printf("%s\n", id);
+ }
+
+ return list_dependencies_one(bus, name, 0, &units, 0);
+}
+
+static int analyze_critical_chain(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(unit_times_freep) struct unit_times *times = NULL;
+ struct unit_times *u;
+ Hashmap *h;
+ int n, r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ n = acquire_time_data(bus, &times);
+ if (n <= 0)
+ return n;
+
+ h = hashmap_new(&string_hash_ops);
+ if (!h)
+ return log_oom();
+
+ for (u = times; u->has_data; u++) {
+ r = hashmap_put(h, u->name, u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add entry to hashmap: %m");
+ }
+ unit_times_hashmap = h;
+
+ (void) pager_open(arg_pager_flags);
+
+ puts("The time after the unit is active or started is printed after the \"@\" character.\n"
+ "The time the unit takes to start is printed after the \"+\" character.\n");
+
+ if (argc > 1) {
+ char **name;
+ STRV_FOREACH(name, strv_skip(argv, 1))
+ list_dependencies(bus, *name);
+ } else
+ list_dependencies(bus, SPECIAL_DEFAULT_TARGET);
+
+ h = hashmap_free(h);
+ return 0;
+}
+
+static int analyze_blame(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(unit_times_freep) struct unit_times *times = NULL;
+ struct unit_times *u;
+ int n, r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ n = acquire_time_data(bus, &times);
+ if (n <= 0)
+ return n;
+
+ typesafe_qsort(times, n, compare_unit_time);
+
+ (void) pager_open(arg_pager_flags);
+
+ for (u = times; u->has_data; u++) {
+ char ts[FORMAT_TIMESPAN_MAX];
+
+ if (u->time > 0)
+ printf("%16s %s\n", format_timespan(ts, sizeof(ts), u->time, USEC_PER_MSEC), u->name);
+ }
+
+ return 0;
+}
+
+static int analyze_time(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *buf = NULL;
+ int r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ r = pretty_boot_time(bus, &buf);
+ if (r < 0)
+ return r;
+
+ puts(buf);
+ return 0;
+}
+
+static int graph_one_property(sd_bus *bus, const UnitInfo *u, const char* prop, const char *color, char* patterns[], char* from_patterns[], char* to_patterns[]) {
+ _cleanup_strv_free_ char **units = NULL;
+ char **unit;
+ int r;
+ bool match_patterns;
+
+ assert(u);
+ assert(prop);
+ assert(color);
+
+ match_patterns = strv_fnmatch(patterns, u->id, 0);
+
+ if (!strv_isempty(from_patterns) &&
+ !match_patterns &&
+ !strv_fnmatch(from_patterns, u->id, 0))
+ return 0;
+
+ r = bus_get_unit_property_strv(bus, u->unit_path, prop, &units);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(unit, units) {
+ bool match_patterns2;
+
+ match_patterns2 = strv_fnmatch(patterns, *unit, 0);
+
+ if (!strv_isempty(to_patterns) &&
+ !match_patterns2 &&
+ !strv_fnmatch(to_patterns, *unit, 0))
+ continue;
+
+ if (!strv_isempty(patterns) && !match_patterns && !match_patterns2)
+ continue;
+
+ printf("\t\"%s\"->\"%s\" [color=\"%s\"];\n", u->id, *unit, color);
+ }
+
+ return 0;
+}
+
+static int graph_one(sd_bus *bus, const UnitInfo *u, char *patterns[], char *from_patterns[], char *to_patterns[]) {
+ int r;
+
+ assert(bus);
+ assert(u);
+
+ if (IN_SET(arg_dot, DEP_ORDER, DEP_ALL)) {
+ r = graph_one_property(bus, u, "After", "green", patterns, from_patterns, to_patterns);
+ if (r < 0)
+ return r;
+ }
+
+ if (IN_SET(arg_dot, DEP_REQUIRE, DEP_ALL)) {
+ r = graph_one_property(bus, u, "Requires", "black", patterns, from_patterns, to_patterns);
+ if (r < 0)
+ return r;
+ r = graph_one_property(bus, u, "Requisite", "darkblue", patterns, from_patterns, to_patterns);
+ if (r < 0)
+ return r;
+ r = graph_one_property(bus, u, "Wants", "grey66", patterns, from_patterns, to_patterns);
+ if (r < 0)
+ return r;
+ r = graph_one_property(bus, u, "Conflicts", "red", patterns, from_patterns, to_patterns);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int expand_patterns(sd_bus *bus, char **patterns, char ***ret) {
+ _cleanup_strv_free_ char **expanded_patterns = NULL;
+ char **pattern;
+ int r;
+
+ STRV_FOREACH(pattern, patterns) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *unit = NULL, *unit_id = NULL;
+
+ if (strv_extend(&expanded_patterns, *pattern) < 0)
+ return log_oom();
+
+ if (string_is_glob(*pattern))
+ continue;
+
+ unit = unit_dbus_path_from_name(*pattern);
+ if (!unit)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ unit,
+ "org.freedesktop.systemd1.Unit",
+ "Id",
+ &error,
+ &unit_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get ID: %s", bus_error_message(&error, r));
+
+ if (!streq(*pattern, unit_id)) {
+ if (strv_extend(&expanded_patterns, unit_id) < 0)
+ return log_oom();
+ }
+ }
+
+ *ret = expanded_patterns;
+ expanded_patterns = NULL; /* do not free */
+
+ return 0;
+}
+
+static int dot(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_strv_free_ char **expanded_patterns = NULL;
+ _cleanup_strv_free_ char **expanded_from_patterns = NULL;
+ _cleanup_strv_free_ char **expanded_to_patterns = NULL;
+ int r;
+ UnitInfo u;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ r = expand_patterns(bus, strv_skip(argv, 1), &expanded_patterns);
+ if (r < 0)
+ return r;
+
+ r = expand_patterns(bus, arg_dot_from_patterns, &expanded_from_patterns);
+ if (r < 0)
+ return r;
+
+ r = expand_patterns(bus, arg_dot_to_patterns, &expanded_to_patterns);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListUnits",
+ &error,
+ &reply,
+ "");
+ if (r < 0)
+ log_error_errno(r, "Failed to list units: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ printf("digraph systemd {\n");
+
+ while ((r = bus_parse_unit_info(reply, &u)) > 0) {
+
+ r = graph_one(bus, &u, expanded_patterns, expanded_from_patterns, expanded_to_patterns);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ printf("}\n");
+
+ log_info(" Color legend: black = Requires\n"
+ " dark blue = Requisite\n"
+ " dark grey = Wants\n"
+ " red = Conflicts\n"
+ " green = After\n");
+
+ if (on_tty())
+ log_notice("-- You probably want to process this output with graphviz' dot tool.\n"
+ "-- Try a shell pipeline like 'systemd-analyze dot | dot -Tsvg > systemd.svg'!\n");
+
+ return 0;
+}
+
+static int dump_fallback(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *text = NULL;
+ int r;
+
+ assert(bus);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Dump",
+ &error,
+ &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call Dump: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &text);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ fputs(text, stdout);
+ return 0;
+}
+
+static int dump(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int fd = -1;
+ int r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ (void) pager_open(arg_pager_flags);
+
+ if (!sd_bus_can_send(bus, SD_BUS_TYPE_UNIX_FD))
+ return dump_fallback(bus);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "DumpByFileDescriptor",
+ &error,
+ &reply,
+ NULL);
+ if (r < 0) {
+ /* fall back to Dump if DumpByFileDescriptor is not supported */
+ if (!IN_SET(r, -EACCES, -EBADR))
+ return log_error_errno(r, "Failed to issue method call DumpByFileDescriptor: %s", bus_error_message(&error, r));
+
+ return dump_fallback(bus);
+ }
+
+ r = sd_bus_message_read(reply, "h", &fd);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ fflush(stdout);
+ return copy_bytes(fd, STDOUT_FILENO, (uint64_t) -1, 0);
+}
+
+static int cat_config(int argc, char *argv[], void *userdata) {
+ char **arg, **list;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ list = strv_skip(argv, 1);
+ STRV_FOREACH(arg, list) {
+ const char *t = NULL;
+
+ if (arg != list)
+ print_separator();
+
+ if (path_is_absolute(*arg)) {
+ const char *dir;
+
+ NULSTR_FOREACH(dir, CONF_PATHS_NULSTR("")) {
+ t = path_startswith(*arg, dir);
+ if (t)
+ break;
+ }
+
+ if (!t)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path %s does not start with any known prefix.",
+ *arg);
+ } else
+ t = *arg;
+
+ r = conf_files_cat(arg_root, t);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int set_log_level(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(argc == 2);
+ assert(argv);
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ r = sd_bus_set_property(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "LogLevel",
+ &error,
+ "s",
+ argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int get_log_level(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *level = NULL;
+ int r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "LogLevel",
+ &error,
+ &level);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get log level: %s", bus_error_message(&error, r));
+
+ puts(level);
+ return 0;
+}
+
+static int get_or_set_log_level(int argc, char *argv[], void *userdata) {
+ return (argc == 1) ? get_log_level(argc, argv, userdata) : set_log_level(argc, argv, userdata);
+}
+
+static int set_log_target(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(argc == 2);
+ assert(argv);
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ r = sd_bus_set_property(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "LogTarget",
+ &error,
+ "s",
+ argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int get_log_target(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *target = NULL;
+ int r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "LogTarget",
+ &error,
+ &target);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get log target: %s", bus_error_message(&error, r));
+
+ puts(target);
+ return 0;
+}
+
+static int get_or_set_log_target(int argc, char *argv[], void *userdata) {
+ return (argc == 1) ? get_log_target(argc, argv, userdata) : set_log_target(argc, argv, userdata);
+}
+
+static int dump_unit_paths(int argc, char *argv[], void *userdata) {
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ int r;
+ char **p;
+
+ r = lookup_paths_init(&paths, arg_scope, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "lookup_paths_init() failed: %m");
+
+ STRV_FOREACH(p, paths.search_path)
+ puts(*p);
+
+ return 0;
+}
+
+#if HAVE_SECCOMP
+
+static int load_kernel_syscalls(Set **ret) {
+ _cleanup_(set_free_freep) Set *syscalls = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ /* Let's read the available system calls from the list of available tracing events. Slightly dirty, but good
+ * enough for analysis purposes. */
+
+ f = fopen("/sys/kernel/debug/tracing/available_events", "re");
+ if (!f)
+ return log_full_errno(IN_SET(errno, EPERM, EACCES, ENOENT) ? LOG_DEBUG : LOG_WARNING, errno, "Can't read open /sys/kernel/debug/tracing/available_events: %m");
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *e;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read system call list: %m");
+ if (r == 0)
+ break;
+
+ e = startswith(line, "syscalls:sys_enter_");
+ if (!e)
+ continue;
+
+ /* These are named differently inside the kernel than their external name for historical reasons. Let's hide them here. */
+ if (STR_IN_SET(e, "newuname", "newfstat", "newstat", "newlstat", "sysctl"))
+ continue;
+
+ r = set_ensure_allocated(&syscalls, &string_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put_strdup(syscalls, e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add system call to list: %m");
+ }
+
+ *ret = TAKE_PTR(syscalls);
+ return 0;
+}
+
+static void kernel_syscalls_remove(Set *s, const SyscallFilterSet *set) {
+ const char *syscall;
+
+ NULSTR_FOREACH(syscall, set->value) {
+ if (syscall[0] == '@')
+ continue;
+
+ (void) set_remove(s, syscall);
+ }
+}
+
+static void dump_syscall_filter(const SyscallFilterSet *set) {
+ const char *syscall;
+
+ printf("%s%s%s\n"
+ " # %s\n",
+ ansi_highlight(),
+ set->name,
+ ansi_normal(),
+ set->help);
+
+ NULSTR_FOREACH(syscall, set->value)
+ printf(" %s%s%s\n",
+ syscall[0] == '@' ? ansi_underline() : "",
+ syscall,
+ ansi_normal());
+}
+
+static int dump_syscall_filters(int argc, char *argv[], void *userdata) {
+ bool first = true;
+
+ (void) pager_open(arg_pager_flags);
+
+ if (strv_isempty(strv_skip(argv, 1))) {
+ _cleanup_(set_free_freep) Set *kernel = NULL;
+ int i, k;
+
+ k = load_kernel_syscalls(&kernel);
+
+ for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) {
+ const SyscallFilterSet *set = syscall_filter_sets + i;
+ if (!first)
+ puts("");
+
+ dump_syscall_filter(set);
+ kernel_syscalls_remove(kernel, set);
+ first = false;
+ }
+
+ if (k < 0) {
+ fputc('\n', stdout);
+ fflush(stdout);
+ log_notice_errno(k, "# Not showing unlisted system calls, couldn't retrieve kernel system call list: %m");
+ } else if (!set_isempty(kernel)) {
+ const char *syscall;
+ Iterator j;
+
+ printf("\n"
+ "# %sUnlisted System Calls%s (supported by the local kernel, but not included in any of the groups listed above):\n",
+ ansi_highlight(), ansi_normal());
+
+ SET_FOREACH(syscall, kernel, j)
+ printf("# %s\n", syscall);
+ }
+ } else {
+ char **name;
+
+ STRV_FOREACH(name, strv_skip(argv, 1)) {
+ const SyscallFilterSet *set;
+
+ if (!first)
+ puts("");
+
+ set = syscall_filter_set_find(*name);
+ if (!set) {
+ /* make sure the error appears below normal output */
+ fflush(stdout);
+
+ log_error("Filter set \"%s\" not found.", *name);
+ return -ENOENT;
+ }
+
+ dump_syscall_filter(set);
+ first = false;
+ }
+ }
+
+ return 0;
+}
+
+#else
+static int dump_syscall_filters(int argc, char *argv[], void *userdata) {
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Not compiled with syscall filters, sorry.");
+}
+#endif
+
+static int dump_timespan(int argc, char *argv[], void *userdata) {
+ char **input_timespan;
+
+ STRV_FOREACH(input_timespan, strv_skip(argv, 1)) {
+ int r;
+ usec_t usec_magnitude = 1, output_usecs;
+ char ft_buf[FORMAT_TIMESPAN_MAX];
+
+ r = parse_time(*input_timespan, &output_usecs, USEC_PER_SEC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse time span '%s': %m", *input_timespan);
+
+ printf("Original: %s\n", *input_timespan);
+ printf(" %ss: %" PRIu64 "\n", special_glyph(SPECIAL_GLYPH_MU), output_usecs);
+ printf(" Human: %s\n", format_timespan(ft_buf, sizeof(ft_buf), output_usecs, usec_magnitude));
+
+ if (input_timespan[1])
+ putchar('\n');
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static int test_calendar(int argc, char *argv[], void *userdata) {
+ int ret = 0, r;
+ char **p;
+ usec_t n;
+
+ n = now(CLOCK_REALTIME);
+
+ STRV_FOREACH(p, strv_skip(argv, 1)) {
+ _cleanup_(calendar_spec_freep) CalendarSpec *spec = NULL;
+ _cleanup_free_ char *t = NULL;
+ usec_t next;
+
+ r = calendar_spec_from_string(*p, &spec);
+ if (r < 0) {
+ ret = log_error_errno(r, "Failed to parse calendar specification '%s': %m", *p);
+ continue;
+ }
+
+ r = calendar_spec_normalize(spec);
+ if (r < 0) {
+ ret = log_error_errno(r, "Failed to normalize calendar specification '%s': %m", *p);
+ continue;
+ }
+
+ r = calendar_spec_to_string(spec, &t);
+ if (r < 0) {
+ ret = log_error_errno(r, "Failed to format calendar specification '%s': %m", *p);
+ continue;
+ }
+
+ if (!streq(t, *p))
+ printf(" Original form: %s\n", *p);
+
+ printf("Normalized form: %s\n", t);
+
+ r = calendar_spec_next_usec(spec, n, &next);
+ if (r == -ENOENT)
+ printf(" Next elapse: never\n");
+ else if (r < 0) {
+ ret = log_error_errno(r, "Failed to determine next elapse for '%s': %m", *p);
+ continue;
+ } else {
+ char buffer[CONST_MAX(FORMAT_TIMESTAMP_MAX, FORMAT_TIMESTAMP_RELATIVE_MAX)];
+
+ printf(" Next elapse: %s\n", format_timestamp(buffer, sizeof(buffer), next));
+
+ if (!in_utc_timezone())
+ printf(" (in UTC): %s\n", format_timestamp_utc(buffer, sizeof(buffer), next));
+
+ printf(" From now: %s\n", format_timestamp_relative(buffer, sizeof(buffer), next));
+ }
+
+ if (*(p+1))
+ putchar('\n');
+ }
+
+ return ret;
+}
+
+static int service_watchdogs(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int b, r;
+
+ assert(IN_SET(argc, 1, 2));
+ assert(argv);
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ /* get ServiceWatchdogs */
+ if (argc == 1) {
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ServiceWatchdogs",
+ &error,
+ 'b',
+ &b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get service-watchdog state: %s", bus_error_message(&error, r));
+
+ printf("%s\n", yes_no(!!b));
+
+ return 0;
+ }
+
+ /* set ServiceWatchdogs */
+ b = parse_boolean(argv[1]);
+ if (b < 0) {
+ log_error("Failed to parse service-watchdogs argument.");
+ return -EINVAL;
+ }
+
+ r = sd_bus_set_property(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ServiceWatchdogs",
+ &error,
+ "b",
+ b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set service-watchdog state: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int do_verify(int argc, char *argv[], void *userdata) {
+ return verify_units(strv_skip(argv, 1), arg_scope, arg_man, arg_generators);
+}
+
+static int do_security(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ (void) pager_open(arg_pager_flags);
+
+ return analyze_security(bus, strv_skip(argv, 1), 0);
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("systemd-analyze", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Profile systemd, show unit dependencies, check unit files.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --system Operate on system systemd instance\n"
+ " --user Operate on user systemd instance\n"
+ " --global Operate on global user configuration\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --order Show only order in the graph\n"
+ " --require Show only requirement in the graph\n"
+ " --from-pattern=GLOB Show only origins in the graph\n"
+ " --to-pattern=GLOB Show only destinations in the graph\n"
+ " --fuzz=SECONDS Also print also services which finished SECONDS\n"
+ " earlier than the latest in the branch\n"
+ " --man[=BOOL] Do [not] check for existence of man pages\n"
+ " --generators[=BOOL] Do [not] run unit generators (requires privileges)\n"
+ "\nCommands:\n"
+ " time Print time spent in the kernel\n"
+ " blame Print list of running units ordered by time to init\n"
+ " critical-chain [UNIT...] Print a tree of the time critical chain of units\n"
+ " plot Output SVG graphic showing service initialization\n"
+ " dot [UNIT...] Output dependency graph in man:dot(1) format\n"
+ " log-level [LEVEL] Get/set logging threshold for manager\n"
+ " log-target [TARGET] Get/set logging target for manager\n"
+ " dump Output state serialization of service manager\n"
+ " cat-config Show configuration file and drop-ins\n"
+ " unit-paths List load directories for units\n"
+ " syscall-filter [NAME...] Print list of syscalls in seccomp filter\n"
+ " verify FILE... Check unit files for correctness\n"
+ " calendar SPEC... Validate repetitive calendar time events\n"
+ " service-watchdogs [BOOL] Get/set service watchdog state\n"
+ " timespan SPAN... Validate a time span\n"
+ " security [UNIT...] Analyze security of unit\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ /* When updating this list, including descriptions, apply changes to shell-completion/bash/systemd-analyze and
+ * shell-completion/zsh/_systemd-analyze too. */
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_ORDER,
+ ARG_REQUIRE,
+ ARG_ROOT,
+ ARG_SYSTEM,
+ ARG_USER,
+ ARG_GLOBAL,
+ ARG_DOT_FROM_PATTERN,
+ ARG_DOT_TO_PATTERN,
+ ARG_FUZZ,
+ ARG_NO_PAGER,
+ ARG_MAN,
+ ARG_GENERATORS,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "order", no_argument, NULL, ARG_ORDER },
+ { "require", no_argument, NULL, ARG_REQUIRE },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "user", no_argument, NULL, ARG_USER },
+ { "global", no_argument, NULL, ARG_GLOBAL },
+ { "from-pattern", required_argument, NULL, ARG_DOT_FROM_PATTERN },
+ { "to-pattern", required_argument, NULL, ARG_DOT_TO_PATTERN },
+ { "fuzz", required_argument, NULL, ARG_FUZZ },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "man", optional_argument, NULL, ARG_MAN },
+ { "generators", optional_argument, NULL, ARG_GENERATORS },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ {}
+ };
+
+ int r, c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hH:M:", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_ROOT:
+ arg_root = optarg;
+ break;
+
+ case ARG_SYSTEM:
+ arg_scope = UNIT_FILE_SYSTEM;
+ break;
+
+ case ARG_USER:
+ arg_scope = UNIT_FILE_USER;
+ break;
+
+ case ARG_GLOBAL:
+ arg_scope = UNIT_FILE_GLOBAL;
+ break;
+
+ case ARG_ORDER:
+ arg_dot = DEP_ORDER;
+ break;
+
+ case ARG_REQUIRE:
+ arg_dot = DEP_REQUIRE;
+ break;
+
+ case ARG_DOT_FROM_PATTERN:
+ if (strv_extend(&arg_dot_from_patterns, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_DOT_TO_PATTERN:
+ if (strv_extend(&arg_dot_to_patterns, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_FUZZ:
+ r = parse_sec(optarg, &arg_fuzz);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_MAN:
+ if (optarg) {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --man= argument.");
+
+ arg_man = r;
+ } else
+ arg_man = true;
+
+ break;
+
+ case ARG_GENERATORS:
+ if (optarg) {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --generators= argument.");
+
+ arg_generators = r;
+ } else
+ arg_generators = true;
+
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option code.");
+ }
+
+ if (arg_scope == UNIT_FILE_GLOBAL &&
+ !STR_IN_SET(argv[optind] ?: "time", "dot", "unit-paths", "verify"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --global only makes sense with verbs dot, unit-paths, verify.");
+
+ if (arg_root && !streq_ptr(argv[optind], "cat-config"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --root is only supported for cat-config right now.");
+
+ return 1; /* work to do */
+}
+
+static int run(int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "time", VERB_ANY, 1, VERB_DEFAULT, analyze_time },
+ { "blame", VERB_ANY, 1, 0, analyze_blame },
+ { "critical-chain", VERB_ANY, VERB_ANY, 0, analyze_critical_chain },
+ { "plot", VERB_ANY, 1, 0, analyze_plot },
+ { "dot", VERB_ANY, VERB_ANY, 0, dot },
+ { "log-level", VERB_ANY, 2, 0, get_or_set_log_level },
+ { "log-target", VERB_ANY, 2, 0, get_or_set_log_target },
+ /* The following four verbs are deprecated aliases */
+ { "set-log-level", 2, 2, 0, set_log_level },
+ { "get-log-level", VERB_ANY, 1, 0, get_log_level },
+ { "set-log-target", 2, 2, 0, set_log_target },
+ { "get-log-target", VERB_ANY, 1, 0, get_log_target },
+ { "dump", VERB_ANY, 1, 0, dump },
+ { "cat-config", 2, VERB_ANY, 0, cat_config },
+ { "unit-paths", 1, 1, 0, dump_unit_paths },
+ { "syscall-filter", VERB_ANY, VERB_ANY, 0, dump_syscall_filters },
+ { "verify", 2, VERB_ANY, 0, do_verify },
+ { "calendar", 2, VERB_ANY, 0, test_calendar },
+ { "service-watchdogs", VERB_ANY, 2, 0, service_watchdogs },
+ { "timespan", 2, VERB_ANY, 0, dump_timespan },
+ { "security", VERB_ANY, VERB_ANY, 0, do_security },
+ {}
+ };
+
+ int r;
+
+ setlocale(LC_ALL, "");
+ setlocale(LC_NUMERIC, "C"); /* we want to format/parse floats in C style */
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/analyze/meson.build b/src/analyze/meson.build
new file mode 100644
index 0000000..4db4dfa
--- /dev/null
+++ b/src/analyze/meson.build
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+systemd_analyze_sources = files('''
+ analyze.c
+ analyze-verify.c
+ analyze-verify.h
+ analyze-security.c
+ analyze-security.h
+'''.split())
diff --git a/src/ask-password/ask-password.c b/src/ask-password/ask-password.c
new file mode 100644
index 0000000..4637c32
--- /dev/null
+++ b/src/ask-password/ask-password.c
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <unistd.h>
+
+#include "ask-password-api.h"
+#include "def.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "strv.h"
+
+static const char *arg_icon = NULL;
+static const char *arg_id = NULL;
+static const char *arg_keyname = NULL;
+static char *arg_message = NULL;
+static usec_t arg_timeout = DEFAULT_TIMEOUT_USEC;
+static bool arg_multiple = false;
+static bool arg_no_output = false;
+static AskPasswordFlags arg_flags = ASK_PASSWORD_PUSH_CACHE;
+
+STATIC_DESTRUCTOR_REGISTER(arg_message, freep);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-ask-password", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] MESSAGE\n\n"
+ "Query the user for a system passphrase, via the TTY or an UI agent.\n\n"
+ " -h --help Show this help\n"
+ " --icon=NAME Icon name\n"
+ " --id=ID Query identifier (e.g. \"cryptsetup:/dev/sda5\")\n"
+ " --keyname=NAME Kernel key name for caching passwords (e.g. \"cryptsetup\")\n"
+ " --timeout=SEC Timeout in seconds\n"
+ " --echo Do not mask input (useful for usernames)\n"
+ " --no-tty Ask question via agent even on TTY\n"
+ " --accept-cached Accept cached passwords\n"
+ " --multiple List multiple passwords if available\n"
+ " --no-output Do not print password to standard output\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_ICON = 0x100,
+ ARG_TIMEOUT,
+ ARG_ECHO,
+ ARG_NO_TTY,
+ ARG_ACCEPT_CACHED,
+ ARG_MULTIPLE,
+ ARG_ID,
+ ARG_KEYNAME,
+ ARG_NO_OUTPUT,
+ ARG_VERSION,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "icon", required_argument, NULL, ARG_ICON },
+ { "timeout", required_argument, NULL, ARG_TIMEOUT },
+ { "echo", no_argument, NULL, ARG_ECHO },
+ { "no-tty", no_argument, NULL, ARG_NO_TTY },
+ { "accept-cached", no_argument, NULL, ARG_ACCEPT_CACHED },
+ { "multiple", no_argument, NULL, ARG_MULTIPLE },
+ { "id", required_argument, NULL, ARG_ID },
+ { "keyname", required_argument, NULL, ARG_KEYNAME },
+ { "no-output", no_argument, NULL, ARG_NO_OUTPUT },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_ICON:
+ arg_icon = optarg;
+ break;
+
+ case ARG_TIMEOUT:
+ if (parse_sec(optarg, &arg_timeout) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --timeout parameter %s",
+ optarg);
+ break;
+
+ case ARG_ECHO:
+ arg_flags |= ASK_PASSWORD_ECHO;
+ break;
+
+ case ARG_NO_TTY:
+ arg_flags |= ASK_PASSWORD_NO_TTY;
+ break;
+
+ case ARG_ACCEPT_CACHED:
+ arg_flags |= ASK_PASSWORD_ACCEPT_CACHED;
+ break;
+
+ case ARG_MULTIPLE:
+ arg_multiple = true;
+ break;
+
+ case ARG_ID:
+ arg_id = optarg;
+ break;
+
+ case ARG_KEYNAME:
+ arg_keyname = optarg;
+ break;
+
+ case ARG_NO_OUTPUT:
+ arg_no_output = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (argc > optind) {
+ arg_message = strv_join(argv + optind, " ");
+ if (!arg_message)
+ return log_oom();
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_strv_free_erase_ char **l = NULL;
+ usec_t timeout;
+ char **p;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_timeout > 0)
+ timeout = now(CLOCK_MONOTONIC) + arg_timeout;
+ else
+ timeout = 0;
+
+ r = ask_password_auto(arg_message, arg_icon, arg_id, arg_keyname, timeout, arg_flags, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query password: %m");
+
+ STRV_FOREACH(p, l) {
+ if (!arg_no_output)
+ puts(*p);
+
+ if (!arg_multiple)
+ break;
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/backlight/backlight.c b/src/backlight/backlight.c
new file mode 100644
index 0000000..780ad56
--- /dev/null
+++ b/src/backlight/backlight.c
@@ -0,0 +1,431 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-util.h"
+#include "escape.h"
+#include "fileio.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static int find_pci_or_platform_parent(sd_device *device, sd_device **ret) {
+ const char *subsystem, *sysname, *value;
+ sd_device *parent;
+ int r;
+
+ assert(device);
+ assert(ret);
+
+ r = sd_device_get_parent(device, &parent);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(parent, &subsystem);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_sysname(parent, &sysname);
+ if (r < 0)
+ return r;
+
+ if (streq(subsystem, "drm")) {
+ const char *c;
+
+ c = startswith(sysname, "card");
+ if (!c)
+ return -ENODATA;
+
+ c += strspn(c, DIGITS);
+ if (*c == '-') {
+ /* A connector DRM device, let's ignore all but LVDS and eDP! */
+ if (!STARTSWITH_SET(c, "-LVDS-", "-Embedded DisplayPort-"))
+ return -EOPNOTSUPP;
+ }
+
+ } else if (streq(subsystem, "pci") &&
+ sd_device_get_sysattr_value(parent, "class", &value) >= 0) {
+ unsigned long class = 0;
+
+ r = safe_atolu(value, &class);
+ if (r < 0)
+ return log_warning_errno(r, "Cannot parse PCI class '%s' of device %s:%s: %m",
+ value, subsystem, sysname);
+
+ /* Graphics card */
+ if (class == 0x30000) {
+ *ret = parent;
+ return 0;
+ }
+
+ } else if (streq(subsystem, "platform")) {
+ *ret = parent;
+ return 0;
+ }
+
+ return find_pci_or_platform_parent(parent, ret);
+}
+
+static int same_device(sd_device *a, sd_device *b) {
+ const char *a_val, *b_val;
+ int r;
+
+ assert(a);
+ assert(b);
+
+ r = sd_device_get_subsystem(a, &a_val);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(b, &b_val);
+ if (r < 0)
+ return r;
+
+ if (!streq(a_val, b_val))
+ return false;
+
+ r = sd_device_get_sysname(a, &a_val);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_sysname(b, &b_val);
+ if (r < 0)
+ return r;
+
+ return streq(a_val, b_val);
+}
+
+static int validate_device(sd_device *device) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *enumerate = NULL;
+ const char *v, *subsystem;
+ sd_device *parent, *other;
+ int r;
+
+ assert(device);
+
+ /* Verify whether we should actually care for a specific
+ * backlight device. For backlight devices there might be
+ * multiple ways to access the same control: "firmware"
+ * (i.e. ACPI), "platform" (i.e. via the machine's EC) and
+ * "raw" (via the graphics card). In general we should prefer
+ * "firmware" (i.e. ACPI) or "platform" access over "raw"
+ * access, in order not to confuse the BIOS/EC, and
+ * compatibility with possible low-level hotkey handling of
+ * screen brightness. The kernel will already make sure to
+ * expose only one of "firmware" and "platform" for the same
+ * device to userspace. However, we still need to make sure
+ * that we use "raw" only if no "firmware" or "platform"
+ * device for the same device exists. */
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r < 0)
+ return r;
+ if (!streq(subsystem, "backlight"))
+ return true;
+
+ r = sd_device_get_sysattr_value(device, "type", &v);
+ if (r < 0)
+ return r;
+ if (!streq(v, "raw"))
+ return true;
+
+ r = find_pci_or_platform_parent(device, &parent);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(parent, &subsystem);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_new(&enumerate);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(enumerate);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(enumerate, "backlight", true);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(enumerate, other) {
+ const char *other_subsystem;
+ sd_device *other_parent;
+
+ if (same_device(device, other) > 0)
+ continue;
+
+ if (sd_device_get_sysattr_value(other, "type", &v) < 0 ||
+ !STR_IN_SET(v, "platform", "firmware"))
+ continue;
+
+ /* OK, so there's another backlight device, and it's a
+ * platform or firmware device, so, let's see if we
+ * can verify it belongs to the same device as ours. */
+ if (find_pci_or_platform_parent(other, &other_parent) < 0)
+ continue;
+
+ if (same_device(parent, other_parent)) {
+ const char *device_sysname = NULL, *other_sysname = NULL;
+
+ /* Both have the same PCI parent, that means we are out. */
+
+ (void) sd_device_get_sysname(device, &device_sysname);
+ (void) sd_device_get_sysname(other, &other_sysname);
+
+ log_debug("Skipping backlight device %s, since device %s is on same PCI device and takes precedence.",
+ device_sysname, other_sysname);
+ return false;
+ }
+
+ if (sd_device_get_subsystem(other_parent, &other_subsystem) < 0)
+ continue;
+
+ if (streq(other_subsystem, "platform") && streq(subsystem, "pci")) {
+ const char *device_sysname = NULL, *other_sysname = NULL;
+
+ /* The other is connected to the platform bus and we are a PCI device, that also means we are out. */
+
+ (void) sd_device_get_sysname(device, &device_sysname);
+ (void) sd_device_get_sysname(other, &other_sysname);
+
+ log_debug("Skipping backlight device %s, since device %s is a platform device and takes precedence.",
+ device_sysname, other_sysname);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int get_max_brightness(sd_device *device, unsigned *ret) {
+ const char *max_brightness_str;
+ unsigned max_brightness;
+ int r;
+
+ assert(device);
+ assert(ret);
+
+ r = sd_device_get_sysattr_value(device, "max_brightness", &max_brightness_str);
+ if (r < 0)
+ return log_device_warning_errno(device, r, "Failed to read 'max_brightness' attribute: %m");
+
+ r = safe_atou(max_brightness_str, &max_brightness);
+ if (r < 0)
+ return log_device_warning_errno(device, r, "Failed to parse 'max_brightness' \"%s\": %m", max_brightness_str);
+
+ if (max_brightness <= 0) {
+ log_device_warning(device, "Maximum brightness is 0, ignoring device.");
+ return -EINVAL;
+ }
+
+ *ret = max_brightness;
+ return 0;
+}
+
+/* Some systems turn the backlight all the way off at the lowest levels.
+ * clamp_brightness clamps the saved brightness to at least 1 or 5% of
+ * max_brightness in case of 'backlight' subsystem. This avoids preserving
+ * an unreadably dim screen, which would otherwise force the user to
+ * disable state restoration. */
+static int clamp_brightness(sd_device *device, char **value, unsigned max_brightness) {
+ unsigned brightness, new_brightness, min_brightness;
+ const char *subsystem;
+ int r;
+
+ assert(value);
+ assert(*value);
+
+ r = safe_atou(*value, &brightness);
+ if (r < 0)
+ return log_device_warning_errno(device, r, "Failed to parse brightness \"%s\": %m", *value);
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r < 0)
+ return log_device_warning_errno(device, r, "Failed to get device subsystem: %m");
+
+ if (streq(subsystem, "backlight"))
+ min_brightness = MAX(1U, max_brightness/20);
+ else
+ min_brightness = 0;
+
+ new_brightness = CLAMP(brightness, min_brightness, max_brightness);
+ if (new_brightness != brightness) {
+ char *new_value;
+
+ r = asprintf(&new_value, "%u", new_brightness);
+ if (r < 0)
+ return log_oom();
+
+ log_device_info(device, "Saved brightness %s %s to %s.", *value,
+ new_brightness > brightness ?
+ "too low; increasing" : "too high; decreasing",
+ new_value);
+
+ free_and_replace(*value, new_value);
+ }
+
+ return 0;
+}
+
+static bool shall_clamp(sd_device *d) {
+ const char *s;
+ int r;
+
+ assert(d);
+
+ r = sd_device_get_property_value(d, "ID_BACKLIGHT_CLAMP", &s);
+ if (r < 0) {
+ log_device_debug_errno(d, r, "Failed to get ID_BACKLIGHT_CLAMP property, ignoring: %m");
+ return true;
+ }
+
+ r = parse_boolean(s);
+ if (r < 0) {
+ log_device_debug_errno(d, r, "Failed to parse ID_BACKLIGHT_CLAMP property, ignoring: %m");
+ return true;
+ }
+
+ return r;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ _cleanup_free_ char *escaped_ss = NULL, *escaped_sysname = NULL, *escaped_path_id = NULL;
+ const char *sysname, *path_id, *ss, *saved;
+ unsigned max_brightness;
+ int r;
+
+ if (argc != 3) {
+ log_error("This program requires two arguments.");
+ return -EINVAL;
+ }
+
+ log_setup_service();
+
+ umask(0022);
+
+ r = mkdir_p("/var/lib/systemd/backlight", 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create backlight directory /var/lib/systemd/backlight: %m");
+
+ sysname = strchr(argv[2], ':');
+ if (!sysname) {
+ log_error("Requires a subsystem and sysname pair specifying a backlight device.");
+ return -EINVAL;
+ }
+
+ ss = strndupa(argv[2], sysname - argv[2]);
+
+ sysname++;
+
+ if (!STR_IN_SET(ss, "backlight", "leds")) {
+ log_error("Not a backlight or LED device: '%s:%s'", ss, sysname);
+ return -EINVAL;
+ }
+
+ r = sd_device_new_from_subsystem_sysname(&device, ss, sysname);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get backlight or LED device '%s:%s': %m", ss, sysname);
+
+ /* If max_brightness is 0, then there is no actual backlight
+ * device. This happens on desktops with Asus mainboards
+ * that load the eeepc-wmi module. */
+ if (get_max_brightness(device, &max_brightness) < 0)
+ return 0;
+
+ escaped_ss = cescape(ss);
+ if (!escaped_ss)
+ return log_oom();
+
+ escaped_sysname = cescape(sysname);
+ if (!escaped_sysname)
+ return log_oom();
+
+ if (sd_device_get_property_value(device, "ID_PATH", &path_id) >= 0) {
+ escaped_path_id = cescape(path_id);
+ if (!escaped_path_id)
+ return log_oom();
+
+ saved = strjoina("/var/lib/systemd/backlight/", escaped_path_id, ":", escaped_ss, ":", escaped_sysname);
+ } else
+ saved = strjoina("/var/lib/systemd/backlight/", escaped_ss, ":", escaped_sysname);
+
+ /* If there are multiple conflicting backlight devices, then
+ * their probing at boot-time might happen in any order. This
+ * means the validity checking of the device then is not
+ * reliable, since it might not see other devices conflicting
+ * with a specific backlight. To deal with this, we will
+ * actively delete backlight state files at shutdown (where
+ * device probing should be complete), so that the validity
+ * check at boot time doesn't have to be reliable. */
+
+ if (streq(argv[1], "load")) {
+ _cleanup_free_ char *value = NULL;
+ bool clamp;
+
+ if (shall_restore_state() == 0)
+ return 0;
+
+ if (validate_device(device) == 0)
+ return 0;
+
+ clamp = shall_clamp(device);
+
+ r = read_one_line_file(saved, &value);
+ if (r == -ENOENT) {
+ const char *curval;
+
+ /* Fallback to clamping current brightness or exit early if
+ * clamping is not supported/enabled. */
+ if (!clamp)
+ return 0;
+
+ r = sd_device_get_sysattr_value(device, "brightness", &curval);
+ if (r < 0)
+ return log_device_warning_errno(device, r, "Failed to read 'brightness' attribute: %m");
+
+ value = strdup(curval);
+ if (!value)
+ return log_oom();
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to read %s: %m", saved);
+
+ if (clamp)
+ (void) clamp_brightness(device, &value, max_brightness);
+
+ r = sd_device_set_sysattr_value(device, "brightness", value);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to write system 'brightness' attribute: %m");
+
+ } else if (streq(argv[1], "save")) {
+ const char *value;
+
+ if (validate_device(device) == 0) {
+ unlink(saved);
+ return 0;
+ }
+
+ r = sd_device_get_sysattr_value(device, "brightness", &value);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to read system 'brightness' attribute: %m");
+
+ r = write_string_file(saved, value, WRITE_STRING_FILE_CREATE);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to write %s: %m", saved);
+
+ } else {
+ log_error("Unknown verb %s.", argv[1]);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/basic/MurmurHash2.c b/src/basic/MurmurHash2.c
new file mode 100644
index 0000000..5859af0
--- /dev/null
+++ b/src/basic/MurmurHash2.c
@@ -0,0 +1,90 @@
+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+// machines.
+
+#include "MurmurHash2.h"
+
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
+{
+ // 'm' and 'r' are mixing constants generated offline.
+ // They're not really 'magic', they just happen to work well.
+
+ const uint32_t m = 0x5bd1e995;
+ const int r = 24;
+
+ // Initialize the hash to a 'random' value
+
+ uint32_t h = seed ^ len;
+
+ // Mix 4 bytes at a time into the hash
+
+ const unsigned char * data = (const unsigned char *)key;
+
+ while (len >= 4)
+ {
+ uint32_t k = *(uint32_t*)data;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+
+ data += 4;
+ len -= 4;
+ }
+
+ // Handle the last few bytes of the input array
+
+ switch(len)
+ {
+ case 3: h ^= data[2] << 16; /* fall through */
+ case 2: h ^= data[1] << 8; /* fall through */
+ case 1: h ^= data[0]; /* fall through */
+ h *= m;
+ };
+
+ // Do a few final mixes of the hash to ensure the last few
+ // bytes are well-incorporated.
+
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+
+ return h;
+}
diff --git a/src/basic/MurmurHash2.h b/src/basic/MurmurHash2.h
new file mode 100644
index 0000000..1aef3af
--- /dev/null
+++ b/src/basic/MurmurHash2.h
@@ -0,0 +1,30 @@
+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#pragma once
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
diff --git a/src/basic/af-list.c b/src/basic/af-list.c
new file mode 100644
index 0000000..abad221
--- /dev/null
+++ b/src/basic/af-list.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "af-list.h"
+#include "macro.h"
+
+static const struct af_name* lookup_af(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "af-from-name.h"
+#include "af-to-name.h"
+
+const char *af_to_name(int id) {
+
+ if (id <= 0)
+ return NULL;
+
+ if ((size_t) id >= ELEMENTSOF(af_names))
+ return NULL;
+
+ return af_names[id];
+}
+
+int af_from_name(const char *name) {
+ const struct af_name *sc;
+
+ assert(name);
+
+ sc = lookup_af(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
+
+int af_max(void) {
+ return ELEMENTSOF(af_names);
+}
diff --git a/src/basic/af-list.h b/src/basic/af-list.h
new file mode 100644
index 0000000..8342323
--- /dev/null
+++ b/src/basic/af-list.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/socket.h>
+
+#include "string-util.h"
+
+const char *af_to_name(int id);
+int af_from_name(const char *name);
+
+static inline const char* af_to_name_short(int id) {
+ const char *f;
+
+ if (id == AF_UNSPEC)
+ return "*";
+
+ f = af_to_name(id);
+ if (!f)
+ return "unknown";
+
+ assert(startswith(f, "AF_"));
+ return f + 3;
+}
+
+int af_max(void);
diff --git a/src/basic/af-to-name.awk b/src/basic/af-to-name.awk
new file mode 100644
index 0000000..18d0a89
--- /dev/null
+++ b/src/basic/af-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const af_names[] = { "
+}
+!/AF_FILE/ && !/AF_ROUTE/ && !/AF_LOCAL/ {
+ printf " [%s] = \"%s\",\n", $1, $1
+}
+END{
+ print "};"
+}
diff --git a/src/basic/alloc-util.c b/src/basic/alloc-util.c
new file mode 100644
index 0000000..ab7a42c
--- /dev/null
+++ b/src/basic/alloc-util.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "util.h"
+
+void* memdup(const void *p, size_t l) {
+ void *ret;
+
+ assert(l == 0 || p);
+
+ ret = malloc(l ?: 1);
+ if (!ret)
+ return NULL;
+
+ memcpy(ret, p, l);
+ return ret;
+}
+
+void* memdup_suffix0(const void *p, size_t l) {
+ void *ret;
+
+ assert(l == 0 || p);
+
+ /* The same as memdup() but place a safety NUL byte after the allocated memory */
+
+ ret = malloc(l + 1);
+ if (!ret)
+ return NULL;
+
+ *((uint8_t*) mempcpy(ret, p, l)) = 0;
+ return ret;
+}
+
+void* greedy_realloc(void **p, size_t *allocated, size_t need, size_t size) {
+ size_t a, newalloc;
+ void *q;
+
+ assert(p);
+ assert(allocated);
+
+ if (*allocated >= need)
+ return *p;
+
+ newalloc = MAX(need * 2, 64u / size);
+ a = newalloc * size;
+
+ /* check for overflows */
+ if (a < size * need)
+ return NULL;
+
+ q = realloc(*p, a);
+ if (!q)
+ return NULL;
+
+ *p = q;
+ *allocated = newalloc;
+ return q;
+}
+
+void* greedy_realloc0(void **p, size_t *allocated, size_t need, size_t size) {
+ size_t prev;
+ uint8_t *q;
+
+ assert(p);
+ assert(allocated);
+
+ prev = *allocated;
+
+ q = greedy_realloc(p, allocated, need, size);
+ if (!q)
+ return NULL;
+
+ if (*allocated > prev)
+ memzero(q + prev * size, (*allocated - prev) * size);
+
+ return q;
+}
diff --git a/src/basic/alloc-util.h b/src/basic/alloc-util.h
new file mode 100644
index 0000000..893a123
--- /dev/null
+++ b/src/basic/alloc-util.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <alloca.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "macro.h"
+
+typedef void (*free_func_t)(void *p);
+
+/* If for some reason more than 4M are allocated on the stack, let's abort immediately. It's better than
+ * proceeding and smashing the stack limits. Note that by default RLIMIT_STACK is 8M on Linux. */
+#define ALLOCA_MAX (4U*1024U*1024U)
+
+#define new(t, n) ((t*) malloc_multiply(sizeof(t), (n)))
+
+#define new0(t, n) ((t*) calloc((n) ?: 1, sizeof(t)))
+
+#define newa(t, n) \
+ ({ \
+ size_t _n_ = n; \
+ assert(!size_multiply_overflow(sizeof(t), _n_)); \
+ assert(sizeof(t)*_n_ <= ALLOCA_MAX); \
+ (t*) alloca(sizeof(t)*_n_); \
+ })
+
+#define newa0(t, n) \
+ ({ \
+ size_t _n_ = n; \
+ assert(!size_multiply_overflow(sizeof(t), _n_)); \
+ assert(sizeof(t)*_n_ <= ALLOCA_MAX); \
+ (t*) alloca0(sizeof(t)*_n_); \
+ })
+
+#define newdup(t, p, n) ((t*) memdup_multiply(p, sizeof(t), (n)))
+
+#define newdup_suffix0(t, p, n) ((t*) memdup_suffix0_multiply(p, sizeof(t), (n)))
+
+#define malloc0(n) (calloc(1, (n)))
+
+static inline void *mfree(void *memory) {
+ free(memory);
+ return NULL;
+}
+
+#define free_and_replace(a, b) \
+ ({ \
+ free(a); \
+ (a) = (b); \
+ (b) = NULL; \
+ 0; \
+ })
+
+void* memdup(const void *p, size_t l) _alloc_(2);
+void* memdup_suffix0(const void *p, size_t l) _alloc_(2);
+
+#define memdupa(p, l) \
+ ({ \
+ void *_q_; \
+ size_t _l_ = l; \
+ assert(_l_ <= ALLOCA_MAX); \
+ _q_ = alloca(_l_); \
+ memcpy(_q_, p, _l_); \
+ })
+
+#define memdupa_suffix0(p, l) \
+ ({ \
+ void *_q_; \
+ size_t _l_ = l; \
+ assert(_l_ <= ALLOCA_MAX); \
+ _q_ = alloca(_l_ + 1); \
+ ((uint8_t*) _q_)[_l_] = 0; \
+ memcpy(_q_, p, _l_); \
+ })
+
+static inline void freep(void *p) {
+ free(*(void**) p);
+}
+
+#define _cleanup_free_ _cleanup_(freep)
+
+static inline bool size_multiply_overflow(size_t size, size_t need) {
+ return _unlikely_(need != 0 && size > (SIZE_MAX / need));
+}
+
+_malloc_ _alloc_(1, 2) static inline void *malloc_multiply(size_t size, size_t need) {
+ if (size_multiply_overflow(size, need))
+ return NULL;
+
+ return malloc(size * need ?: 1);
+}
+
+#if !HAVE_REALLOCARRAY
+_alloc_(2, 3) static inline void *reallocarray(void *p, size_t need, size_t size) {
+ if (size_multiply_overflow(size, need))
+ return NULL;
+
+ return realloc(p, size * need ?: 1);
+}
+#endif
+
+_alloc_(2, 3) static inline void *memdup_multiply(const void *p, size_t size, size_t need) {
+ if (size_multiply_overflow(size, need))
+ return NULL;
+
+ return memdup(p, size * need);
+}
+
+_alloc_(2, 3) static inline void *memdup_suffix0_multiply(const void *p, size_t size, size_t need) {
+ if (size_multiply_overflow(size, need))
+ return NULL;
+
+ return memdup_suffix0(p, size * need);
+}
+
+void* greedy_realloc(void **p, size_t *allocated, size_t need, size_t size);
+void* greedy_realloc0(void **p, size_t *allocated, size_t need, size_t size);
+
+#define GREEDY_REALLOC(array, allocated, need) \
+ greedy_realloc((void**) &(array), &(allocated), (need), sizeof((array)[0]))
+
+#define GREEDY_REALLOC0(array, allocated, need) \
+ greedy_realloc0((void**) &(array), &(allocated), (need), sizeof((array)[0]))
+
+#define alloca0(n) \
+ ({ \
+ char *_new_; \
+ size_t _len_ = n; \
+ assert(_len_ <= ALLOCA_MAX); \
+ _new_ = alloca(_len_); \
+ (void *) memset(_new_, 0, _len_); \
+ })
+
+/* It's not clear what alignment glibc/gcc alloca() guarantee, hence provide a guaranteed safe version */
+#define alloca_align(size, align) \
+ ({ \
+ void *_ptr_; \
+ size_t _mask_ = (align) - 1; \
+ size_t _size_ = size; \
+ assert(_size_ <= ALLOCA_MAX); \
+ _ptr_ = alloca(_size_ + _mask_); \
+ (void*)(((uintptr_t)_ptr_ + _mask_) & ~_mask_); \
+ })
+
+#define alloca0_align(size, align) \
+ ({ \
+ void *_new_; \
+ size_t _xsize_ = (size); \
+ _new_ = alloca_align(_xsize_, (align)); \
+ (void*)memset(_new_, 0, _xsize_); \
+ })
+
+/* Takes inspiration from Rusts's Option::take() method: reads and returns a pointer, but at the same time resets it to
+ * NULL. See: https://doc.rust-lang.org/std/option/enum.Option.html#method.take */
+#define TAKE_PTR(ptr) \
+ ({ \
+ typeof(ptr) _ptr_ = (ptr); \
+ (ptr) = NULL; \
+ _ptr_; \
+ })
diff --git a/src/basic/architecture.c b/src/basic/architecture.c
new file mode 100644
index 0000000..85837b5
--- /dev/null
+++ b/src/basic/architecture.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/utsname.h>
+
+#include "architecture.h"
+#include "macro.h"
+#include "string-table.h"
+#include "string-util.h"
+
+int uname_architecture(void) {
+
+ /* Return a sanitized enum identifying the architecture we are
+ * running on. This is based on uname(), and the user may
+ * hence control what this returns by using
+ * personality(). This puts the user in control on systems
+ * that can run binaries of multiple architectures.
+ *
+ * We do not translate the string returned by uname()
+ * 1:1. Instead we try to clean it up and break down the
+ * confusion on x86 and arm in particular.
+ *
+ * We do not try to distinguish CPUs not CPU features, but
+ * actual architectures, i.e. that have genuinely different
+ * code. */
+
+ static const struct {
+ const char *machine;
+ int arch;
+ } arch_map[] = {
+#if defined(__x86_64__) || defined(__i386__)
+ { "x86_64", ARCHITECTURE_X86_64 },
+ { "i686", ARCHITECTURE_X86 },
+ { "i586", ARCHITECTURE_X86 },
+ { "i486", ARCHITECTURE_X86 },
+ { "i386", ARCHITECTURE_X86 },
+#elif defined(__powerpc__) || defined(__powerpc64__)
+ { "ppc64", ARCHITECTURE_PPC64 },
+ { "ppc64le", ARCHITECTURE_PPC64_LE },
+ { "ppc", ARCHITECTURE_PPC },
+ { "ppcle", ARCHITECTURE_PPC_LE },
+#elif defined(__ia64__)
+ { "ia64", ARCHITECTURE_IA64 },
+#elif defined(__hppa__) || defined(__hppa64__)
+ { "parisc64", ARCHITECTURE_PARISC64 },
+ { "parisc", ARCHITECTURE_PARISC },
+#elif defined(__s390__) || defined(__s390x__)
+ { "s390x", ARCHITECTURE_S390X },
+ { "s390", ARCHITECTURE_S390 },
+#elif defined(__sparc__)
+ { "sparc64", ARCHITECTURE_SPARC64 },
+ { "sparc", ARCHITECTURE_SPARC },
+#elif defined(__mips__) || defined(__mips64__)
+ { "mips64", ARCHITECTURE_MIPS64 },
+ { "mips", ARCHITECTURE_MIPS },
+#elif defined(__alpha__)
+ { "alpha" , ARCHITECTURE_ALPHA },
+#elif defined(__arm__) || defined(__aarch64__)
+ { "aarch64", ARCHITECTURE_ARM64 },
+ { "aarch64_be", ARCHITECTURE_ARM64_BE },
+ { "armv4l", ARCHITECTURE_ARM },
+ { "armv4b", ARCHITECTURE_ARM_BE },
+ { "armv4tl", ARCHITECTURE_ARM },
+ { "armv4tb", ARCHITECTURE_ARM_BE },
+ { "armv5tl", ARCHITECTURE_ARM },
+ { "armv5tb", ARCHITECTURE_ARM_BE },
+ { "armv5tel", ARCHITECTURE_ARM },
+ { "armv5teb" , ARCHITECTURE_ARM_BE },
+ { "armv5tejl", ARCHITECTURE_ARM },
+ { "armv5tejb", ARCHITECTURE_ARM_BE },
+ { "armv6l", ARCHITECTURE_ARM },
+ { "armv6b", ARCHITECTURE_ARM_BE },
+ { "armv7l", ARCHITECTURE_ARM },
+ { "armv7b", ARCHITECTURE_ARM_BE },
+ { "armv7ml", ARCHITECTURE_ARM },
+ { "armv7mb", ARCHITECTURE_ARM_BE },
+ { "armv4l", ARCHITECTURE_ARM },
+ { "armv4b", ARCHITECTURE_ARM_BE },
+ { "armv4tl", ARCHITECTURE_ARM },
+ { "armv4tb", ARCHITECTURE_ARM_BE },
+ { "armv5tl", ARCHITECTURE_ARM },
+ { "armv5tb", ARCHITECTURE_ARM_BE },
+ { "armv5tel", ARCHITECTURE_ARM },
+ { "armv5teb", ARCHITECTURE_ARM_BE },
+ { "armv5tejl", ARCHITECTURE_ARM },
+ { "armv5tejb", ARCHITECTURE_ARM_BE },
+ { "armv6l", ARCHITECTURE_ARM },
+ { "armv6b", ARCHITECTURE_ARM_BE },
+ { "armv7l", ARCHITECTURE_ARM },
+ { "armv7b", ARCHITECTURE_ARM_BE },
+ { "armv7ml", ARCHITECTURE_ARM },
+ { "armv7mb", ARCHITECTURE_ARM_BE },
+ { "armv8l", ARCHITECTURE_ARM },
+ { "armv8b", ARCHITECTURE_ARM_BE },
+#elif defined(__sh__) || defined(__sh64__)
+ { "sh5", ARCHITECTURE_SH64 },
+ { "sh2", ARCHITECTURE_SH },
+ { "sh2a", ARCHITECTURE_SH },
+ { "sh3", ARCHITECTURE_SH },
+ { "sh4", ARCHITECTURE_SH },
+ { "sh4a", ARCHITECTURE_SH },
+#elif defined(__m68k__)
+ { "m68k", ARCHITECTURE_M68K },
+#elif defined(__tilegx__)
+ { "tilegx", ARCHITECTURE_TILEGX },
+#elif defined(__cris__)
+ { "crisv32", ARCHITECTURE_CRIS },
+#elif defined(__nios2__)
+ { "nios2", ARCHITECTURE_NIOS2 },
+#elif defined(__riscv__) || defined(__riscv)
+ /* __riscv__ is obsolete, remove in 2018 */
+ { "riscv32", ARCHITECTURE_RISCV32 },
+ { "riscv64", ARCHITECTURE_RISCV64 },
+# if __SIZEOF_POINTER__ == 4
+ { "riscv", ARCHITECTURE_RISCV32 },
+# elif __SIZEOF_POINTER__ == 8
+ { "riscv", ARCHITECTURE_RISCV64 },
+# endif
+#elif defined(__arc__)
+ { "arc", ARCHITECTURE_ARC },
+ { "arceb", ARCHITECTURE_ARC_BE },
+#else
+#error "Please register your architecture here!"
+#endif
+ };
+
+ static int cached = _ARCHITECTURE_INVALID;
+ struct utsname u;
+ unsigned i;
+
+ if (cached != _ARCHITECTURE_INVALID)
+ return cached;
+
+ assert_se(uname(&u) >= 0);
+
+ for (i = 0; i < ELEMENTSOF(arch_map); i++)
+ if (streq(arch_map[i].machine, u.machine))
+ return cached = arch_map[i].arch;
+
+ assert_not_reached("Couldn't identify architecture. You need to patch systemd.");
+ return _ARCHITECTURE_INVALID;
+}
+
+static const char *const architecture_table[_ARCHITECTURE_MAX] = {
+ [ARCHITECTURE_X86] = "x86",
+ [ARCHITECTURE_X86_64] = "x86-64",
+ [ARCHITECTURE_PPC] = "ppc",
+ [ARCHITECTURE_PPC_LE] = "ppc-le",
+ [ARCHITECTURE_PPC64] = "ppc64",
+ [ARCHITECTURE_PPC64_LE] = "ppc64-le",
+ [ARCHITECTURE_IA64] = "ia64",
+ [ARCHITECTURE_PARISC] = "parisc",
+ [ARCHITECTURE_PARISC64] = "parisc64",
+ [ARCHITECTURE_S390] = "s390",
+ [ARCHITECTURE_S390X] = "s390x",
+ [ARCHITECTURE_SPARC] = "sparc",
+ [ARCHITECTURE_SPARC64] = "sparc64",
+ [ARCHITECTURE_MIPS] = "mips",
+ [ARCHITECTURE_MIPS_LE] = "mips-le",
+ [ARCHITECTURE_MIPS64] = "mips64",
+ [ARCHITECTURE_MIPS64_LE] = "mips64-le",
+ [ARCHITECTURE_ALPHA] = "alpha",
+ [ARCHITECTURE_ARM] = "arm",
+ [ARCHITECTURE_ARM_BE] = "arm-be",
+ [ARCHITECTURE_ARM64] = "arm64",
+ [ARCHITECTURE_ARM64_BE] = "arm64-be",
+ [ARCHITECTURE_SH] = "sh",
+ [ARCHITECTURE_SH64] = "sh64",
+ [ARCHITECTURE_M68K] = "m68k",
+ [ARCHITECTURE_TILEGX] = "tilegx",
+ [ARCHITECTURE_CRIS] = "cris",
+ [ARCHITECTURE_NIOS2] = "nios2",
+ [ARCHITECTURE_RISCV32] = "riscv32",
+ [ARCHITECTURE_RISCV64] = "riscv64",
+ [ARCHITECTURE_ARC] = "arc",
+ [ARCHITECTURE_ARC_BE] = "arc-be",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(architecture, int);
diff --git a/src/basic/architecture.h b/src/basic/architecture.h
new file mode 100644
index 0000000..443e890
--- /dev/null
+++ b/src/basic/architecture.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <endian.h>
+
+#include "macro.h"
+#include "util.h"
+
+/* A cleaned up architecture definition. We don't want to get lost in
+ * processor features, models, generations or even ABIs. Hence we
+ * focus on general family, and distinguish word width and
+ * endianness. */
+
+enum {
+ ARCHITECTURE_X86 = 0,
+ ARCHITECTURE_X86_64,
+ ARCHITECTURE_PPC,
+ ARCHITECTURE_PPC_LE,
+ ARCHITECTURE_PPC64,
+ ARCHITECTURE_PPC64_LE,
+ ARCHITECTURE_IA64,
+ ARCHITECTURE_PARISC,
+ ARCHITECTURE_PARISC64,
+ ARCHITECTURE_S390,
+ ARCHITECTURE_S390X,
+ ARCHITECTURE_SPARC,
+ ARCHITECTURE_SPARC64,
+ ARCHITECTURE_MIPS,
+ ARCHITECTURE_MIPS_LE,
+ ARCHITECTURE_MIPS64,
+ ARCHITECTURE_MIPS64_LE,
+ ARCHITECTURE_ALPHA,
+ ARCHITECTURE_ARM,
+ ARCHITECTURE_ARM_BE,
+ ARCHITECTURE_ARM64,
+ ARCHITECTURE_ARM64_BE,
+ ARCHITECTURE_SH,
+ ARCHITECTURE_SH64,
+ ARCHITECTURE_M68K,
+ ARCHITECTURE_TILEGX,
+ ARCHITECTURE_CRIS,
+ ARCHITECTURE_NIOS2,
+ ARCHITECTURE_RISCV32,
+ ARCHITECTURE_RISCV64,
+ ARCHITECTURE_ARC,
+ ARCHITECTURE_ARC_BE,
+ _ARCHITECTURE_MAX,
+ _ARCHITECTURE_INVALID = -1
+};
+
+int uname_architecture(void);
+
+/*
+ * LIB_ARCH_TUPLE should resolve to the local library path
+ * architecture tuple systemd is built for, according to the Debian
+ * tuple list:
+ *
+ * https://wiki.debian.org/Multiarch/Tuples
+ *
+ * This is used in library search paths that should understand
+ * Debian's paths on all distributions.
+ */
+
+#if defined(__x86_64__)
+# define native_architecture() ARCHITECTURE_X86_64
+# if defined(__ILP32__)
+# define LIB_ARCH_TUPLE "x86_64-linux-gnux32"
+# else
+# define LIB_ARCH_TUPLE "x86_64-linux-gnu"
+# endif
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_X86
+#elif defined(__i386__)
+# define native_architecture() ARCHITECTURE_X86
+# define LIB_ARCH_TUPLE "i386-linux-gnu"
+#elif defined(__powerpc64__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_PPC64
+# define LIB_ARCH_TUPLE "ppc64-linux-gnu"
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_PPC
+# else
+# define native_architecture() ARCHITECTURE_PPC64_LE
+# define LIB_ARCH_TUPLE "powerpc64le-linux-gnu"
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_PPC_LE
+# endif
+#elif defined(__powerpc__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_PPC
+# if defined(__NO_FPRS__)
+# define LIB_ARCH_TUPLE "powerpc-linux-gnuspe"
+# else
+# define LIB_ARCH_TUPLE "powerpc-linux-gnu"
+# endif
+# else
+# define native_architecture() ARCHITECTURE_PPC_LE
+# error "Missing LIB_ARCH_TUPLE for PPCLE"
+# endif
+#elif defined(__ia64__)
+# define native_architecture() ARCHITECTURE_IA64
+# define LIB_ARCH_TUPLE "ia64-linux-gnu"
+#elif defined(__hppa64__)
+# define native_architecture() ARCHITECTURE_PARISC64
+# error "Missing LIB_ARCH_TUPLE for HPPA64"
+#elif defined(__hppa__)
+# define native_architecture() ARCHITECTURE_PARISC
+# define LIB_ARCH_TUPLE "hppa‑linux‑gnu"
+#elif defined(__s390x__)
+# define native_architecture() ARCHITECTURE_S390X
+# define LIB_ARCH_TUPLE "s390x-linux-gnu"
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_S390
+#elif defined(__s390__)
+# define native_architecture() ARCHITECTURE_S390
+# define LIB_ARCH_TUPLE "s390-linux-gnu"
+#elif defined(__sparc__) && defined (__arch64__)
+# define native_architecture() ARCHITECTURE_SPARC64
+# define LIB_ARCH_TUPLE "sparc64-linux-gnu"
+#elif defined(__sparc__)
+# define native_architecture() ARCHITECTURE_SPARC
+# define LIB_ARCH_TUPLE "sparc-linux-gnu"
+#elif defined(__mips64) && defined(__LP64__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_MIPS64
+# define LIB_ARCH_TUPLE "mips64-linux-gnuabi64"
+# else
+# define native_architecture() ARCHITECTURE_MIPS64_LE
+# define LIB_ARCH_TUPLE "mips64el-linux-gnuabi64"
+# endif
+#elif defined(__mips64)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_MIPS64
+# define LIB_ARCH_TUPLE "mips64-linux-gnuabin32"
+# else
+# define native_architecture() ARCHITECTURE_MIPS64_LE
+# define LIB_ARCH_TUPLE "mips64el-linux-gnuabin32"
+# endif
+#elif defined(__mips__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_MIPS
+# define LIB_ARCH_TUPLE "mips-linux-gnu"
+# else
+# define native_architecture() ARCHITECTURE_MIPS_LE
+# define LIB_ARCH_TUPLE "mipsel-linux-gnu"
+# endif
+#elif defined(__alpha__)
+# define native_architecture() ARCHITECTURE_ALPHA
+# define LIB_ARCH_TUPLE "alpha-linux-gnu"
+#elif defined(__aarch64__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_ARM64_BE
+# define LIB_ARCH_TUPLE "aarch64_be-linux-gnu"
+# else
+# define native_architecture() ARCHITECTURE_ARM64
+# define LIB_ARCH_TUPLE "aarch64-linux-gnu"
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_ARM
+# endif
+#elif defined(__arm__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_ARM_BE
+# if defined(__ARM_EABI__)
+# if defined(__ARM_PCS_VFP)
+# define LIB_ARCH_TUPLE "armeb-linux-gnueabihf"
+# else
+# define LIB_ARCH_TUPLE "armeb-linux-gnueabi"
+# endif
+# else
+# define LIB_ARCH_TUPLE "armeb-linux-gnu"
+# endif
+# else
+# define native_architecture() ARCHITECTURE_ARM
+# if defined(__ARM_EABI__)
+# if defined(__ARM_PCS_VFP)
+# define LIB_ARCH_TUPLE "arm-linux-gnueabihf"
+# else
+# define LIB_ARCH_TUPLE "arm-linux-gnueabi"
+# endif
+# else
+# define LIB_ARCH_TUPLE "arm-linux-gnu"
+# endif
+# endif
+#elif defined(__sh64__)
+# define native_architecture() ARCHITECTURE_SH64
+# error "Missing LIB_ARCH_TUPLE for SH64"
+#elif defined(__sh__)
+# define native_architecture() ARCHITECTURE_SH
+# if defined(__SH1__)
+# define LIB_ARCH_TUPLE "sh1-linux-gnu"
+# elif defined(__SH2__)
+# define LIB_ARCH_TUPLE "sh2-linux-gnu"
+# elif defined(__SH2A__)
+# define LIB_ARCH_TUPLE "sh2a-linux-gnu"
+# elif defined(__SH2E__)
+# define LIB_ARCH_TUPLE "sh2e-linux-gnu"
+# elif defined(__SH3__)
+# define LIB_ARCH_TUPLE "sh3-linux-gnu"
+# elif defined(__SH3E__)
+# define LIB_ARCH_TUPLE "sh3e-linux-gnu"
+# elif defined(__SH4__) && !defined(__SH4A__)
+# define LIB_ARCH_TUPLE "sh4-linux-gnu"
+# elif defined(__SH4A__)
+# define LIB_ARCH_TUPLE "sh4a-linux-gnu"
+# endif
+#elif defined(__m68k__)
+# define native_architecture() ARCHITECTURE_M68K
+# define LIB_ARCH_TUPLE "m68k-linux-gnu"
+#elif defined(__tilegx__)
+# define native_architecture() ARCHITECTURE_TILEGX
+# define LIB_ARCH_TUPLE "tilegx-linux-gnu"
+#elif defined(__cris__)
+# define native_architecture() ARCHITECTURE_CRIS
+# error "Missing LIB_ARCH_TUPLE for CRIS"
+#elif defined(__nios2__)
+# define native_architecture() ARCHITECTURE_NIOS2
+# define LIB_ARCH_TUPLE "nios2-linux-gnu"
+#elif defined(__riscv__) || defined(__riscv)
+ /* __riscv__ is obsolete, remove in 2018 */
+# if __SIZEOF_POINTER__ == 4
+# define native_architecture() ARCHITECTURE_RISCV32
+# define LIB_ARCH_TUPLE "riscv32-linux-gnu"
+# elif __SIZEOF_POINTER__ == 8
+# define native_architecture() ARCHITECTURE_RISCV64
+# define LIB_ARCH_TUPLE "riscv64-linux-gnu"
+# else
+# error "Unrecognized riscv architecture variant"
+# endif
+#elif defined(__arc__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_ARC_BE
+# define LIB_ARCH_TUPLE "arceb-linux"
+# else
+# define native_architecture() ARCHITECTURE_ARC
+# define LIB_ARCH_TUPLE "arc-linux"
+# endif
+#else
+# error "Please register your architecture here!"
+#endif
+
+const char *architecture_to_string(int a) _const_;
+int architecture_from_string(const char *s) _pure_;
diff --git a/src/basic/arphrd-list.c b/src/basic/arphrd-list.c
new file mode 100644
index 0000000..b6e2486
--- /dev/null
+++ b/src/basic/arphrd-list.c
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <net/if_arp.h>
+#include <string.h>
+
+#include "arphrd-list.h"
+#include "macro.h"
+#include "missing_network.h"
+
+static const struct arphrd_name* lookup_arphrd(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "arphrd-from-name.h"
+#include "arphrd-to-name.h"
+
+const char *arphrd_to_name(int id) {
+
+ if (id <= 0)
+ return NULL;
+
+ if ((size_t) id >= ELEMENTSOF(arphrd_names))
+ return NULL;
+
+ return arphrd_names[id];
+}
+
+int arphrd_from_name(const char *name) {
+ const struct arphrd_name *sc;
+
+ assert(name);
+
+ sc = lookup_arphrd(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
+
+int arphrd_max(void) {
+ return ELEMENTSOF(arphrd_names);
+}
diff --git a/src/basic/arphrd-list.h b/src/basic/arphrd-list.h
new file mode 100644
index 0000000..5dcfe5e
--- /dev/null
+++ b/src/basic/arphrd-list.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+const char *arphrd_to_name(int id);
+int arphrd_from_name(const char *name);
+
+int arphrd_max(void);
diff --git a/src/basic/arphrd-to-name.awk b/src/basic/arphrd-to-name.awk
new file mode 100644
index 0000000..5a35673
--- /dev/null
+++ b/src/basic/arphrd-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const arphrd_names[] = { "
+}
+!/CISCO/ {
+ printf " [ARPHRD_%s] = \"%s\",\n", $1, $1
+}
+END{
+ print "};"
+}
diff --git a/src/basic/async.c b/src/basic/async.c
new file mode 100644
index 0000000..c45ca01
--- /dev/null
+++ b/src/basic/async.c
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <unistd.h>
+
+#include "async.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "util.h"
+
+int asynchronous_job(void* (*func)(void *p), void *arg) {
+ sigset_t ss, saved_ss;
+ pthread_attr_t a;
+ pthread_t t;
+ int r, k;
+
+ /* It kinda sucks that we have to resort to threads to implement an asynchronous close(), but well, such is
+ * life. */
+
+ r = pthread_attr_init(&a);
+ if (r > 0)
+ return -r;
+
+ r = pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED);
+ if (r > 0) {
+ r = -r;
+ goto finish;
+ }
+
+ assert_se(sigfillset(&ss) >= 0);
+
+ /* Block all signals before forking off the thread, so that the new thread is started with all signals
+ * blocked. This way the existence of the new thread won't affect signal handling in other threads. */
+
+ r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss);
+ if (r > 0) {
+ r = -r;
+ goto finish;
+ }
+
+ r = pthread_create(&t, &a, func, arg);
+
+ k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL);
+
+ if (r > 0)
+ r = -r;
+ else if (k > 0)
+ r = -k;
+ else
+ r = 0;
+
+finish:
+ pthread_attr_destroy(&a);
+ return r;
+}
+
+int asynchronous_sync(pid_t *ret_pid) {
+ int r;
+
+ /* This forks off an invocation of fork() as a child process, in order to initiate synchronization to
+ * disk. Note that we implement this as helper process rather than thread as we don't want the sync() to hang our
+ * original process ever, and a thread would do that as the process can't exit with threads hanging in blocking
+ * syscalls. */
+
+ r = safe_fork("(sd-sync)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS, ret_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child process */
+ (void) sync();
+ _exit(EXIT_SUCCESS);
+ }
+
+ return 0;
+}
+
+static void *close_thread(void *p) {
+ (void) pthread_setname_np(pthread_self(), "close");
+
+ assert_se(close_nointr(PTR_TO_FD(p)) != -EBADF);
+ return NULL;
+}
+
+int asynchronous_close(int fd) {
+ int r;
+
+ /* This is supposed to behave similar to safe_close(), but
+ * actually invoke close() asynchronously, so that it will
+ * never block. Ideally the kernel would have an API for this,
+ * but it doesn't, so we work around it, and hide this as a
+ * far away as we can. */
+
+ if (fd >= 0) {
+ PROTECT_ERRNO;
+
+ r = asynchronous_job(close_thread, FD_TO_PTR(fd));
+ if (r < 0)
+ assert_se(close_nointr(fd) != -EBADF);
+ }
+
+ return -1;
+}
diff --git a/src/basic/async.h b/src/basic/async.h
new file mode 100644
index 0000000..3160613
--- /dev/null
+++ b/src/basic/async.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int asynchronous_job(void* (*func)(void *p), void *arg);
+
+int asynchronous_sync(pid_t *ret_pid);
+int asynchronous_close(int fd);
diff --git a/src/basic/audit-util.c b/src/basic/audit-util.c
new file mode 100644
index 0000000..5cbaef3
--- /dev/null
+++ b/src/basic/audit-util.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <linux/netlink.h>
+#include <stdio.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "user-util.h"
+
+int audit_session_from_pid(pid_t pid, uint32_t *id) {
+ _cleanup_free_ char *s = NULL;
+ const char *p;
+ uint32_t u;
+ int r;
+
+ assert(id);
+
+ /* We don't convert ENOENT to ESRCH here, since we can't
+ * really distuingish between "audit is not available in the
+ * kernel" and "the process does not exist", both which will
+ * result in ENOENT. */
+
+ p = procfs_file_alloca(pid, "sessionid");
+
+ r = read_one_line_file(p, &s);
+ if (r < 0)
+ return r;
+
+ r = safe_atou32(s, &u);
+ if (r < 0)
+ return r;
+
+ if (!audit_session_is_valid(u))
+ return -ENODATA;
+
+ *id = u;
+ return 0;
+}
+
+int audit_loginuid_from_pid(pid_t pid, uid_t *uid) {
+ _cleanup_free_ char *s = NULL;
+ const char *p;
+ uid_t u;
+ int r;
+
+ assert(uid);
+
+ p = procfs_file_alloca(pid, "loginuid");
+
+ r = read_one_line_file(p, &s);
+ if (r < 0)
+ return r;
+
+ r = parse_uid(s, &u);
+ if (r == -ENXIO) /* the UID was -1 */
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ *uid = u;
+ return 0;
+}
+
+bool use_audit(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0) {
+ int fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_AUDIT);
+ if (fd < 0) {
+ cached_use = !IN_SET(errno, EAFNOSUPPORT, EPROTONOSUPPORT, EPERM);
+ if (!cached_use)
+ log_debug_errno(errno, "Won't talk to audit: %m");
+ } else {
+ cached_use = true;
+ safe_close(fd);
+ }
+ }
+
+ return cached_use;
+}
diff --git a/src/basic/audit-util.h b/src/basic/audit-util.h
new file mode 100644
index 0000000..c9fc498
--- /dev/null
+++ b/src/basic/audit-util.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#define AUDIT_SESSION_INVALID ((uint32_t) -1)
+
+int audit_session_from_pid(pid_t pid, uint32_t *id);
+int audit_loginuid_from_pid(pid_t pid, uid_t *uid);
+
+bool use_audit(void);
+
+static inline bool audit_session_is_valid(uint32_t id) {
+ return id > 0 && id != AUDIT_SESSION_INVALID;
+}
diff --git a/src/basic/blockdev-util.c b/src/basic/blockdev-util.c
new file mode 100644
index 0000000..3017ecd
--- /dev/null
+++ b/src/basic/blockdev-util.c
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/stat.h>
+#include <sys/statfs.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "stat-util.h"
+
+int block_get_whole_disk(dev_t d, dev_t *ret) {
+ char p[SYS_BLOCK_PATH_MAX("/partition")];
+ _cleanup_free_ char *s = NULL;
+ dev_t devt;
+ int r;
+
+ assert(ret);
+
+ /* If it has a queue this is good enough for us */
+ xsprintf_sys_block_path(p, "/queue", d);
+ if (access(p, F_OK) >= 0) {
+ *ret = d;
+ return 0;
+ }
+
+ /* If it is a partition find the originating device */
+ xsprintf_sys_block_path(p, "/partition", d);
+ if (access(p, F_OK) < 0)
+ return -ENOENT;
+
+ /* Get parent dev_t */
+ xsprintf_sys_block_path(p, "/../dev", d);
+ r = read_one_line_file(p, &s);
+ if (r < 0)
+ return r;
+
+ r = parse_dev(s, &devt);
+ if (r < 0)
+ return r;
+
+ /* Only return this if it is really good enough for us. */
+ xsprintf_sys_block_path(p, "/queue", devt);
+ if (access(p, F_OK) < 0)
+ return -ENOENT;
+
+ *ret = devt;
+ return 0;
+}
+
+int get_block_device(const char *path, dev_t *dev) {
+ struct stat st;
+ struct statfs sfs;
+
+ assert(path);
+ assert(dev);
+
+ /* Get's the block device directly backing a file system. If
+ * the block device is encrypted, returns the device mapper
+ * block device. */
+
+ if (lstat(path, &st))
+ return -errno;
+
+ if (major(st.st_dev) != 0) {
+ *dev = st.st_dev;
+ return 1;
+ }
+
+ if (statfs(path, &sfs) < 0)
+ return -errno;
+
+ if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC))
+ return btrfs_get_block_device(path, dev);
+
+ *dev = 0;
+ return 0;
+}
+
+int block_get_originating(dev_t dt, dev_t *ret) {
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_free_ char *t = NULL;
+ char p[SYS_BLOCK_PATH_MAX("/slaves")];
+ struct dirent *de, *found = NULL;
+ const char *q;
+ dev_t devt;
+ int r;
+
+ /* For the specified block device tries to chase it through the layers, in case LUKS-style DM stacking is used,
+ * trying to find the next underlying layer. */
+
+ xsprintf_sys_block_path(p, "/slaves", dt);
+ d = opendir(p);
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
+ continue;
+
+ if (found) {
+ _cleanup_free_ char *u = NULL, *v = NULL, *a = NULL, *b = NULL;
+
+ /* We found a device backed by multiple other devices. We don't really support automatic
+ * discovery on such setups, with the exception of dm-verity partitions. In this case there are
+ * two backing devices: the data partition and the hash partition. We are fine with such
+ * setups, however, only if both partitions are on the same physical device. Hence, let's
+ * verify this. */
+
+ u = strjoin(p, "/", de->d_name, "/../dev");
+ if (!u)
+ return -ENOMEM;
+
+ v = strjoin(p, "/", found->d_name, "/../dev");
+ if (!v)
+ return -ENOMEM;
+
+ r = read_one_line_file(u, &a);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read %s: %m", u);
+
+ r = read_one_line_file(v, &b);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read %s: %m", v);
+
+ /* Check if the parent device is the same. If not, then the two backing devices are on
+ * different physical devices, and we don't support that. */
+ if (!streq(a, b))
+ return -ENOTUNIQ;
+ }
+
+ found = de;
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ q = strjoina(p, "/", found->d_name, "/dev");
+
+ r = read_one_line_file(q, &t);
+ if (r < 0)
+ return r;
+
+ r = parse_dev(t, &devt);
+ if (r < 0)
+ return -EINVAL;
+
+ if (major(devt) == 0)
+ return -ENOENT;
+
+ *ret = devt;
+ return 1;
+}
+
+int get_block_device_harder(const char *path, dev_t *ret) {
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ /* Gets the backing block device for a file system, and handles LUKS encrypted file systems, looking for its
+ * immediate parent, if there is one. */
+
+ r = get_block_device(path, ret);
+ if (r <= 0)
+ return r;
+
+ r = block_get_originating(*ret, ret);
+ if (r < 0)
+ log_debug_errno(r, "Failed to chase block device '%s', ignoring: %m", path);
+
+ return 1;
+}
diff --git a/src/basic/blockdev-util.h b/src/basic/blockdev-util.h
new file mode 100644
index 0000000..6d8a796
--- /dev/null
+++ b/src/basic/blockdev-util.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+#include "macro.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+#define SYS_BLOCK_PATH_MAX(suffix) \
+ (STRLEN("/sys/dev/block/") + DECIMAL_STR_MAX(dev_t) + 1 + DECIMAL_STR_MAX(dev_t) + strlen_ptr(suffix))
+#define xsprintf_sys_block_path(buf, suffix, devno) \
+ xsprintf(buf, "/sys/dev/block/%u:%u%s", major(devno), minor(devno), strempty(suffix))
+
+int block_get_whole_disk(dev_t d, dev_t *ret);
+int block_get_originating(dev_t d, dev_t *ret);
+
+int get_block_device(const char *path, dev_t *dev);
+
+int get_block_device_harder(const char *path, dev_t *dev);
diff --git a/src/basic/btrfs-util.c b/src/basic/btrfs-util.c
new file mode 100644
index 0000000..da4dd2a
--- /dev/null
+++ b/src/basic/btrfs-util.c
@@ -0,0 +1,1993 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "copy.h"
+#include "device-nodes.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "smack-util.h"
+#include "sparse-endian.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "util.h"
+
+/* WARNING: Be careful with file system ioctls! When we get an fd, we
+ * need to make sure it either refers to only a regular file or
+ * directory, or that it is located on btrfs, before invoking any
+ * btrfs ioctls. The ioctl numbers are reused by some device drivers
+ * (such as DRM), and hence might have bad effects when invoked on
+ * device nodes (that reference drivers) rather than fds to normal
+ * files or directories. */
+
+static int validate_subvolume_name(const char *name) {
+
+ if (!filename_is_valid(name))
+ return -EINVAL;
+
+ if (strlen(name) > BTRFS_SUBVOL_NAME_MAX)
+ return -E2BIG;
+
+ return 0;
+}
+
+static int extract_subvolume_name(const char *path, const char **subvolume) {
+ const char *fn;
+ int r;
+
+ assert(path);
+ assert(subvolume);
+
+ fn = basename(path);
+
+ r = validate_subvolume_name(fn);
+ if (r < 0)
+ return r;
+
+ *subvolume = fn;
+ return 0;
+}
+
+int btrfs_is_filesystem(int fd) {
+ struct statfs sfs;
+
+ assert(fd >= 0);
+
+ if (fstatfs(fd, &sfs) < 0)
+ return -errno;
+
+ return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
+}
+
+int btrfs_is_subvol_fd(int fd) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ /* On btrfs subvolumes always have the inode 256 */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
+ return 0;
+
+ return btrfs_is_filesystem(fd);
+}
+
+int btrfs_is_subvol(const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ assert(path);
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_is_subvol_fd(fd);
+}
+
+int btrfs_subvol_make_fd(int fd, const char *subvolume) {
+ struct btrfs_ioctl_vol_args args = {};
+ _cleanup_close_ int real_fd = -1;
+ int r;
+
+ assert(subvolume);
+
+ r = validate_subvolume_name(subvolume);
+ if (r < 0)
+ return r;
+
+ r = fcntl(fd, F_GETFL);
+ if (r < 0)
+ return -errno;
+ if (FLAGS_SET(r, O_PATH)) {
+ /* An O_PATH fd was specified, let's convert here to a proper one, as btrfs ioctl's can't deal with
+ * O_PATH. */
+
+ real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (real_fd < 0)
+ return real_fd;
+
+ fd = real_fd;
+ }
+
+ strncpy(args.name, subvolume, sizeof(args.name)-1);
+
+ if (ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_subvol_make(const char *path) {
+ _cleanup_close_ int fd = -1;
+ const char *subvolume;
+ int r;
+
+ assert(path);
+
+ r = extract_subvolume_name(path, &subvolume);
+ if (r < 0)
+ return r;
+
+ fd = open_parent(path, O_CLOEXEC, 0);
+ if (fd < 0)
+ return fd;
+
+ return btrfs_subvol_make_fd(fd, subvolume);
+}
+
+int btrfs_subvol_set_read_only_fd(int fd, bool b) {
+ uint64_t flags, nflags;
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
+ return -EINVAL;
+
+ if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
+ return -errno;
+
+ if (b)
+ nflags = flags | BTRFS_SUBVOL_RDONLY;
+ else
+ nflags = flags & ~BTRFS_SUBVOL_RDONLY;
+
+ if (flags == nflags)
+ return 0;
+
+ if (ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_subvol_set_read_only(const char *path, bool b) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_subvol_set_read_only_fd(fd, b);
+}
+
+int btrfs_subvol_get_read_only_fd(int fd) {
+ uint64_t flags;
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
+ return -EINVAL;
+
+ if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
+ return -errno;
+
+ return !!(flags & BTRFS_SUBVOL_RDONLY);
+}
+
+int btrfs_reflink(int infd, int outfd) {
+ int r;
+
+ assert(infd >= 0);
+ assert(outfd >= 0);
+
+ /* Make sure we invoke the ioctl on a regular file, so that no device driver accidentally gets it. */
+
+ r = fd_verify_regular(outfd);
+ if (r < 0)
+ return r;
+
+ if (ioctl(outfd, BTRFS_IOC_CLONE, infd) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
+ struct btrfs_ioctl_clone_range_args args = {
+ .src_fd = infd,
+ .src_offset = in_offset,
+ .src_length = sz,
+ .dest_offset = out_offset,
+ };
+ int r;
+
+ assert(infd >= 0);
+ assert(outfd >= 0);
+ assert(sz > 0);
+
+ r = fd_verify_regular(outfd);
+ if (r < 0)
+ return r;
+
+ if (ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_get_block_device_fd(int fd, dev_t *dev) {
+ struct btrfs_ioctl_fs_info_args fsi = {};
+ uint64_t id;
+ int r;
+
+ assert(fd >= 0);
+ assert(dev);
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
+ return -errno;
+
+ /* We won't do this for btrfs RAID */
+ if (fsi.num_devices != 1) {
+ *dev = 0;
+ return 0;
+ }
+
+ for (id = 1; id <= fsi.max_id; id++) {
+ struct btrfs_ioctl_dev_info_args di = {
+ .devid = id,
+ };
+ struct stat st;
+
+ if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
+ if (errno == ENODEV)
+ continue;
+
+ return -errno;
+ }
+
+ if (stat((char*) di.path, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENODEV;
+
+ if (major(st.st_rdev) == 0)
+ return -ENODEV;
+
+ *dev = st.st_rdev;
+ return 1;
+ }
+
+ return -ENODEV;
+}
+
+int btrfs_get_block_device(const char *path, dev_t *dev) {
+ _cleanup_close_ int fd = -1;
+
+ assert(path);
+ assert(dev);
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_get_block_device_fd(fd, dev);
+}
+
+int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
+ struct btrfs_ioctl_ino_lookup_args args = {
+ .objectid = BTRFS_FIRST_FREE_OBJECTID
+ };
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
+ return -errno;
+
+ *ret = args.treeid;
+ return 0;
+}
+
+int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
+ _cleanup_close_ int subvol_fd = -1;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (subvol_fd < 0)
+ return -errno;
+
+ return btrfs_subvol_get_id_fd(subvol_fd, ret);
+}
+
+static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
+ assert(args);
+
+ /* the objectid, type, offset together make up the btrfs key,
+ * which is considered a single 136byte integer when
+ * comparing. This call increases the counter by one, dealing
+ * with the overflow between the overflows */
+
+ if (args->key.min_offset < (uint64_t) -1) {
+ args->key.min_offset++;
+ return true;
+ }
+
+ if (args->key.min_type < (uint8_t) -1) {
+ args->key.min_type++;
+ args->key.min_offset = 0;
+ return true;
+ }
+
+ if (args->key.min_objectid < (uint64_t) -1) {
+ args->key.min_objectid++;
+ args->key.min_offset = 0;
+ args->key.min_type = 0;
+ return true;
+ }
+
+ return 0;
+}
+
+static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
+ assert(args);
+ assert(h);
+
+ args->key.min_objectid = h->objectid;
+ args->key.min_type = h->type;
+ args->key.min_offset = h->offset;
+}
+
+static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
+ int r;
+
+ assert(args);
+
+ /* Compare min and max */
+
+ r = CMP(args->key.min_objectid, args->key.max_objectid);
+ if (r != 0)
+ return r;
+
+ r = CMP(args->key.min_type, args->key.max_type);
+ if (r != 0)
+ return r;
+
+ return CMP(args->key.min_offset, args->key.max_offset);
+}
+
+#define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) \
+ for ((i) = 0, \
+ (sh) = (const struct btrfs_ioctl_search_header*) (args).buf; \
+ (i) < (args).key.nr_items; \
+ (i)++, \
+ (sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len))
+
+#define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \
+ ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header)))
+
+int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of tree roots */
+ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
+
+ /* Look precisely for the subvolume items */
+ .key.min_type = BTRFS_ROOT_ITEM_KEY,
+ .key.max_type = BTRFS_ROOT_ITEM_KEY,
+
+ .key.min_offset = 0,
+ .key.max_offset = (uint64_t) -1,
+
+ /* No restrictions on the other components */
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ bool found = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &subvol_id);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.key.min_objectid = args.key.max_objectid = subvol_id;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
+ return -errno;
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+
+ const struct btrfs_root_item *ri;
+
+ /* Make sure we start the next search at least from this entry */
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->objectid != subvol_id)
+ continue;
+ if (sh->type != BTRFS_ROOT_ITEM_KEY)
+ continue;
+
+ /* Older versions of the struct lacked the otime setting */
+ if (sh->len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
+ continue;
+
+ ri = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+
+ ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
+ (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
+
+ ret->subvol_id = subvol_id;
+ ret->read_only = le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY;
+
+ assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
+ memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
+ memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
+
+ found = true;
+ goto finish;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+finish:
+ if (!found)
+ return -ENODATA;
+
+ return 0;
+}
+
+int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
+
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of quota items */
+ .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
+
+ /* The object ID is always 0 */
+ .key.min_objectid = 0,
+ .key.max_objectid = 0,
+
+ /* Look precisely for the quota items */
+ .key.min_type = BTRFS_QGROUP_STATUS_KEY,
+ .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
+
+ /* No restrictions on the other components */
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ bool found_info = false, found_limit = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (qgroupid == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &qgroupid);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.key.min_offset = args.key.max_offset = qgroupid;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
+ if (errno == ENOENT) /* quota tree is missing: quota disabled */
+ break;
+
+ return -errno;
+ }
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+
+ /* Make sure we start the next search at least from this entry */
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->objectid != 0)
+ continue;
+ if (sh->offset != qgroupid)
+ continue;
+
+ if (sh->type == BTRFS_QGROUP_INFO_KEY) {
+ const struct btrfs_qgroup_info_item *qii = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+
+ ret->referenced = le64toh(qii->rfer);
+ ret->exclusive = le64toh(qii->excl);
+
+ found_info = true;
+
+ } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) {
+ const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+
+ if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
+ ret->referenced_max = le64toh(qli->max_rfer);
+ else
+ ret->referenced_max = (uint64_t) -1;
+
+ if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
+ ret->exclusive_max = le64toh(qli->max_excl);
+ else
+ ret->exclusive_max = (uint64_t) -1;
+
+ found_limit = true;
+ }
+
+ if (found_info && found_limit)
+ goto finish;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+finish:
+ if (!found_limit && !found_info)
+ return -ENODATA;
+
+ if (!found_info) {
+ ret->referenced = (uint64_t) -1;
+ ret->exclusive = (uint64_t) -1;
+ }
+
+ if (!found_limit) {
+ ret->referenced_max = (uint64_t) -1;
+ ret->exclusive_max = (uint64_t) -1;
+ }
+
+ return 0;
+}
+
+int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
+}
+
+int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
+ uint64_t level, lowest = (uint64_t) -1, lowest_qgroupid = 0;
+ _cleanup_free_ uint64_t *qgroups = NULL;
+ int r, n, i;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ /* This finds the "subtree" qgroup for a specific
+ * subvolume. This only works for subvolumes that have been
+ * prepared with btrfs_subvol_auto_qgroup_fd() with
+ * insert_intermediary_qgroup=true (or equivalent). For others
+ * it will return the leaf qgroup instead. The two cases may
+ * be distuingished via the return value, which is 1 in case
+ * an appropriate "subtree" qgroup was found, and 0
+ * otherwise. */
+
+ if (subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &subvol_id);
+ if (r < 0)
+ return r;
+ }
+
+ r = btrfs_qgroupid_split(subvol_id, &level, NULL);
+ if (r < 0)
+ return r;
+ if (level != 0) /* Input must be a leaf qgroup */
+ return -EINVAL;
+
+ n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
+ if (n < 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ uint64_t id;
+
+ r = btrfs_qgroupid_split(qgroups[i], &level, &id);
+ if (r < 0)
+ return r;
+
+ if (id != subvol_id)
+ continue;
+
+ if (lowest == (uint64_t) -1 || level < lowest) {
+ lowest_qgroupid = qgroups[i];
+ lowest = level;
+ }
+ }
+
+ if (lowest == (uint64_t) -1) {
+ /* No suitable higher-level qgroup found, let's return
+ * the leaf qgroup instead, and indicate that with the
+ * return value. */
+
+ *ret = subvol_id;
+ return 0;
+ }
+
+ *ret = lowest_qgroupid;
+ return 1;
+}
+
+int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
+ uint64_t qgroupid;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ /* This determines the quota data of the qgroup with the
+ * lowest level, that shares the id part with the specified
+ * subvolume. This is useful for determining the quota data
+ * for entire subvolume subtrees, as long as the subtrees have
+ * been set up with btrfs_qgroup_subvol_auto_fd() or in a
+ * compatible way */
+
+ r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
+ if (r < 0)
+ return r;
+
+ return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
+}
+
+int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
+}
+
+int btrfs_defrag_fd(int fd) {
+ int r;
+
+ assert(fd >= 0);
+
+ r = fd_verify_regular(fd);
+ if (r < 0)
+ return r;
+
+ if (ioctl(fd, BTRFS_IOC_DEFRAG, NULL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_defrag(const char *p) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_defrag_fd(fd);
+}
+
+int btrfs_quota_enable_fd(int fd, bool b) {
+ struct btrfs_ioctl_quota_ctl_args args = {
+ .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
+ };
+ int r;
+
+ assert(fd >= 0);
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ if (ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_quota_enable(const char *path, bool b) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_quota_enable_fd(fd, b);
+}
+
+int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
+
+ struct btrfs_ioctl_qgroup_limit_args args = {
+ .lim.max_rfer = referenced_max,
+ .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
+ };
+ unsigned c;
+ int r;
+
+ assert(fd >= 0);
+
+ if (qgroupid == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &qgroupid);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.qgroupid = qgroupid;
+
+ for (c = 0;; c++) {
+ if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
+
+ if (errno == EBUSY && c < 10) {
+ (void) btrfs_quota_scan_wait(fd);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
+}
+
+int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
+ uint64_t qgroupid;
+ int r;
+
+ assert(fd >= 0);
+
+ r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
+ if (r < 0)
+ return r;
+
+ return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
+}
+
+int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
+}
+
+int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
+ assert(ret);
+
+ if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
+ return -EINVAL;
+
+ if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
+ return -EINVAL;
+
+ *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
+ return 0;
+}
+
+int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
+ assert(level || id);
+
+ if (level)
+ *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
+
+ if (id)
+ *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
+
+ return 0;
+}
+
+static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
+
+ struct btrfs_ioctl_qgroup_create_args args = {
+ .create = b,
+ .qgroupid = qgroupid,
+ };
+ unsigned c;
+ int r;
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOTTY;
+
+ for (c = 0;; c++) {
+ if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
+
+ /* If quota is not enabled, we get EINVAL. Turn this into a recognizable error */
+ if (errno == EINVAL)
+ return -ENOPROTOOPT;
+
+ if (errno == EBUSY && c < 10) {
+ (void) btrfs_quota_scan_wait(fd);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
+ return qgroup_create_or_destroy(fd, true, qgroupid);
+}
+
+int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
+ return qgroup_create_or_destroy(fd, false, qgroupid);
+}
+
+int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
+ _cleanup_free_ uint64_t *qgroups = NULL;
+ uint64_t subvol_id;
+ int i, n, r;
+
+ /* Destroys the specified qgroup, but unassigns it from all
+ * its parents first. Also, it recursively destroys all
+ * qgroups it is assigned to that have the same id part of the
+ * qgroupid as the specified group. */
+
+ r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
+ if (r < 0)
+ return r;
+
+ n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
+ if (n < 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ uint64_t id;
+
+ r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
+ if (r < 0)
+ return r;
+
+ r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
+ if (r < 0)
+ return r;
+
+ if (id != subvol_id)
+ continue;
+
+ /* The parent qgroupid shares the same id part with
+ * us? If so, destroy it too. */
+
+ (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
+ }
+
+ return btrfs_qgroup_destroy(fd, qgroupid);
+}
+
+int btrfs_quota_scan_start(int fd) {
+ struct btrfs_ioctl_quota_rescan_args args = {};
+
+ assert(fd >= 0);
+
+ if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_quota_scan_wait(int fd) {
+ assert(fd >= 0);
+
+ if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_quota_scan_ongoing(int fd) {
+ struct btrfs_ioctl_quota_rescan_args args = {};
+
+ assert(fd >= 0);
+
+ if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
+ return -errno;
+
+ return !!args.flags;
+}
+
+static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
+ struct btrfs_ioctl_qgroup_assign_args args = {
+ .assign = b,
+ .src = child,
+ .dst = parent,
+ };
+ unsigned c;
+ int r;
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOTTY;
+
+ for (c = 0;; c++) {
+ r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
+ if (r < 0) {
+ if (errno == EBUSY && c < 10) {
+ (void) btrfs_quota_scan_wait(fd);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ if (r == 0)
+ return 0;
+
+ /* If the return value is > 0, we need to request a rescan */
+
+ (void) btrfs_quota_scan_start(fd);
+ return 1;
+ }
+}
+
+int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
+ return qgroup_assign_or_unassign(fd, true, child, parent);
+}
+
+int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
+ return qgroup_assign_or_unassign(fd, false, child, parent);
+}
+
+static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
+ struct btrfs_ioctl_search_args args = {
+ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
+
+ .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
+ .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
+
+ .key.min_type = BTRFS_ROOT_BACKREF_KEY,
+ .key.max_type = BTRFS_ROOT_BACKREF_KEY,
+
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ struct btrfs_ioctl_vol_args vol_args = {};
+ _cleanup_close_ int subvol_fd = -1;
+ struct stat st;
+ bool made_writable = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(subvolume);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -EINVAL;
+
+ subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (subvol_fd < 0)
+ return -errno;
+
+ /* Let's check if this is actually a subvolume. Note that this is mostly redundant, as BTRFS_IOC_SNAP_DESTROY
+ * would fail anyway if it is not. However, it's a good thing to check this ahead of time so that we can return
+ * ENOTTY unconditionally in this case. This is different from the ioctl() which will return EPERM/EACCES if we
+ * don't have the privileges to remove subvolumes, regardless if the specified directory is actually a
+ * subvolume or not. In order to make it easy for callers to cover the "this is not a btrfs subvolume" case
+ * let's prefer ENOTTY over EPERM/EACCES though. */
+ r = btrfs_is_subvol_fd(subvol_fd);
+ if (r < 0)
+ return r;
+ if (r == 0) /* Not a btrfs subvolume */
+ return -ENOTTY;
+
+ if (subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
+ if (r < 0)
+ return r;
+ }
+
+ /* First, try to remove the subvolume. If it happens to be
+ * already empty, this will just work. */
+ strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
+ if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
+ (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
+ return 0;
+ }
+ if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
+ return -errno;
+
+ /* OK, the subvolume is not empty, let's look for child
+ * subvolumes, and remove them, first */
+
+ args.key.min_offset = args.key.max_offset = subvol_id;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
+ return -errno;
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+ _cleanup_free_ char *p = NULL;
+ const struct btrfs_root_ref *ref;
+ struct btrfs_ioctl_ino_lookup_args ino_args;
+
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->type != BTRFS_ROOT_BACKREF_KEY)
+ continue;
+ if (sh->offset != subvol_id)
+ continue;
+
+ ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+
+ p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
+ if (!p)
+ return -ENOMEM;
+
+ zero(ino_args);
+ ino_args.treeid = subvol_id;
+ ino_args.objectid = htole64(ref->dirid);
+
+ if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
+ return -errno;
+
+ if (!made_writable) {
+ r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
+ if (r < 0)
+ return r;
+
+ made_writable = true;
+ }
+
+ if (isempty(ino_args.name))
+ /* Subvolume is in the top-level
+ * directory of the subvolume. */
+ r = subvol_remove_children(subvol_fd, p, sh->objectid, flags);
+ else {
+ _cleanup_close_ int child_fd = -1;
+
+ /* Subvolume is somewhere further down,
+ * hence we need to open the
+ * containing directory first */
+
+ child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (child_fd < 0)
+ return -errno;
+
+ r = subvol_remove_children(child_fd, p, sh->objectid, flags);
+ }
+ if (r < 0)
+ return r;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+ /* OK, the child subvolumes should all be gone now, let's try
+ * again to remove the subvolume */
+ if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
+ return -errno;
+
+ (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
+ return 0;
+}
+
+int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags) {
+ _cleanup_close_ int fd = -1;
+ const char *subvolume;
+ int r;
+
+ assert(path);
+
+ r = extract_subvolume_name(path, &subvolume);
+ if (r < 0)
+ return r;
+
+ fd = open_parent(path, O_CLOEXEC, 0);
+ if (fd < 0)
+ return fd;
+
+ return subvol_remove_children(fd, subvolume, 0, flags);
+}
+
+int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags) {
+ return subvol_remove_children(fd, subvolume, 0, flags);
+}
+
+int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
+
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of quota items */
+ .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
+
+ /* The object ID is always 0 */
+ .key.min_objectid = 0,
+ .key.max_objectid = 0,
+
+ /* Look precisely for the quota items */
+ .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
+ .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
+
+ /* For our qgroup */
+ .key.min_offset = old_qgroupid,
+ .key.max_offset = old_qgroupid,
+
+ /* No restrictions on the other components */
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ int r;
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
+ if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
+ break;
+
+ return -errno;
+ }
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+ const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+ struct btrfs_ioctl_qgroup_limit_args qargs;
+ unsigned c;
+
+ /* Make sure we start the next search at least from this entry */
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->objectid != 0)
+ continue;
+ if (sh->type != BTRFS_QGROUP_LIMIT_KEY)
+ continue;
+ if (sh->offset != old_qgroupid)
+ continue;
+
+ /* We found the entry, now copy things over. */
+
+ qargs = (struct btrfs_ioctl_qgroup_limit_args) {
+ .qgroupid = new_qgroupid,
+
+ .lim.max_rfer = le64toh(qli->max_rfer),
+ .lim.max_excl = le64toh(qli->max_excl),
+ .lim.rsv_rfer = le64toh(qli->rsv_rfer),
+ .lim.rsv_excl = le64toh(qli->rsv_excl),
+
+ .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
+ BTRFS_QGROUP_LIMIT_MAX_EXCL|
+ BTRFS_QGROUP_LIMIT_RSV_RFER|
+ BTRFS_QGROUP_LIMIT_RSV_EXCL),
+ };
+
+ for (c = 0;; c++) {
+ if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
+ if (errno == EBUSY && c < 10) {
+ (void) btrfs_quota_scan_wait(fd);
+ continue;
+ }
+ return -errno;
+ }
+
+ break;
+ }
+
+ return 1;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+ return 0;
+}
+
+static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
+ _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
+ bool copy_from_parent = false, insert_intermediary_qgroup = false;
+ int n_old_qgroups, n_old_parent_qgroups, r, i;
+ uint64_t old_parent_id;
+
+ assert(fd >= 0);
+
+ /* Copies a reduced form of quota information from the old to
+ * the new subvolume. */
+
+ n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
+ if (n_old_qgroups <= 0) /* Nothing to copy */
+ return n_old_qgroups;
+
+ r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
+ if (r == -ENXIO)
+ /* We have no parent, hence nothing to copy. */
+ n_old_parent_qgroups = 0;
+ else if (r < 0)
+ return r;
+ else {
+ n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
+ if (n_old_parent_qgroups < 0)
+ return n_old_parent_qgroups;
+ }
+
+ for (i = 0; i < n_old_qgroups; i++) {
+ uint64_t id;
+ int j;
+
+ r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
+ if (r < 0)
+ return r;
+
+ if (id == old_subvol_id) {
+ /* The old subvolume was member of a qgroup
+ * that had the same id, but a different level
+ * as it self. Let's set up something similar
+ * in the destination. */
+ insert_intermediary_qgroup = true;
+ break;
+ }
+
+ for (j = 0; j < n_old_parent_qgroups; j++)
+ if (old_parent_qgroups[j] == old_qgroups[i]) {
+ /* The old subvolume shared a common
+ * parent qgroup with its parent
+ * subvolume. Let's set up something
+ * similar in the destination. */
+ copy_from_parent = true;
+ }
+ }
+
+ if (!insert_intermediary_qgroup && !copy_from_parent)
+ return 0;
+
+ return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
+}
+
+static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
+ uint64_t old_subtree_qgroup, new_subtree_qgroup;
+ bool changed;
+ int r;
+
+ /* First copy the leaf limits */
+ r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
+ if (r < 0)
+ return r;
+ changed = r > 0;
+
+ /* Then, try to copy the subtree limits, if there are any. */
+ r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return changed;
+
+ r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return changed;
+
+ r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
+ if (r != 0)
+ return r;
+
+ return changed;
+}
+
+static int subvol_snapshot_children(
+ int old_fd,
+ int new_fd,
+ const char *subvolume,
+ uint64_t old_subvol_id,
+ BtrfsSnapshotFlags flags) {
+
+ struct btrfs_ioctl_search_args args = {
+ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
+
+ .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
+ .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
+
+ .key.min_type = BTRFS_ROOT_BACKREF_KEY,
+ .key.max_type = BTRFS_ROOT_BACKREF_KEY,
+
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ struct btrfs_ioctl_vol_args_v2 vol_args = {
+ .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
+ .fd = old_fd,
+ };
+ _cleanup_close_ int subvolume_fd = -1;
+ uint64_t new_subvol_id;
+ int r;
+
+ assert(old_fd >= 0);
+ assert(new_fd >= 0);
+ assert(subvolume);
+
+ strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
+
+ if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
+ return -errno;
+
+ if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
+ !(flags & BTRFS_SNAPSHOT_QUOTA))
+ return 0;
+
+ if (old_subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
+ if (r < 0)
+ return r;
+ }
+
+ r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
+ if (r < 0)
+ return r;
+
+ if (flags & BTRFS_SNAPSHOT_QUOTA)
+ (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
+
+ if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
+
+ if (flags & BTRFS_SNAPSHOT_QUOTA)
+ (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
+
+ return 0;
+ }
+
+ args.key.min_offset = args.key.max_offset = old_subvol_id;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
+ return -errno;
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+ _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
+ struct btrfs_ioctl_ino_lookup_args ino_args;
+ const struct btrfs_root_ref *ref;
+ _cleanup_close_ int old_child_fd = -1, new_child_fd = -1;
+
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->type != BTRFS_ROOT_BACKREF_KEY)
+ continue;
+
+ /* Avoid finding the source subvolume a second
+ * time */
+ if (sh->offset != old_subvol_id)
+ continue;
+
+ /* Avoid running into loops if the new
+ * subvolume is below the old one. */
+ if (sh->objectid == new_subvol_id)
+ continue;
+
+ ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+ p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
+ if (!p)
+ return -ENOMEM;
+
+ zero(ino_args);
+ ino_args.treeid = old_subvol_id;
+ ino_args.objectid = htole64(ref->dirid);
+
+ if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
+ return -errno;
+
+ /* The kernel returns an empty name if the
+ * subvolume is in the top-level directory,
+ * and otherwise appends a slash, so that we
+ * can just concatenate easily here, without
+ * adding a slash. */
+ c = strappend(ino_args.name, p);
+ if (!c)
+ return -ENOMEM;
+
+ old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (old_child_fd < 0)
+ return -errno;
+
+ np = strjoin(subvolume, "/", ino_args.name);
+ if (!np)
+ return -ENOMEM;
+
+ new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (new_child_fd < 0)
+ return -errno;
+
+ if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
+ /* If the snapshot is read-only we
+ * need to mark it writable
+ * temporarily, to put the subsnapshot
+ * into place. */
+
+ if (subvolume_fd < 0) {
+ subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (subvolume_fd < 0)
+ return -errno;
+ }
+
+ r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
+ if (r < 0)
+ return r;
+ }
+
+ /* When btrfs clones the subvolumes, child
+ * subvolumes appear as empty directories. Remove
+ * them, so that we can create a new snapshot
+ * in their place */
+ if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
+ int k = -errno;
+
+ if (flags & BTRFS_SNAPSHOT_READ_ONLY)
+ (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
+
+ return k;
+ }
+
+ r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh->objectid, flags & ~BTRFS_SNAPSHOT_FALLBACK_COPY);
+
+ /* Restore the readonly flag */
+ if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
+ int k;
+
+ k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
+ if (r >= 0 && k < 0)
+ return k;
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+ if (flags & BTRFS_SNAPSHOT_QUOTA)
+ (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
+
+ return 0;
+}
+
+int btrfs_subvol_snapshot_fd_full(
+ int old_fd,
+ const char *new_path,
+ BtrfsSnapshotFlags flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_close_ int new_fd = -1;
+ const char *subvolume;
+ int r;
+
+ assert(old_fd >= 0);
+ assert(new_path);
+
+ r = btrfs_is_subvol_fd(old_fd);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ bool plain_directory = false;
+
+ /* If the source isn't a proper subvolume, fail unless fallback is requested */
+ if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
+ return -EISDIR;
+
+ r = btrfs_subvol_make(new_path);
+ if (r == -ENOTTY && (flags & BTRFS_SNAPSHOT_FALLBACK_DIRECTORY)) {
+ /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */
+ if (mkdir(new_path, 0755) < 0)
+ return -errno;
+
+ plain_directory = true;
+ } else if (r < 0)
+ return r;
+
+ r = copy_directory_fd_full(old_fd, new_path, COPY_MERGE|COPY_REFLINK, progress_path, progress_bytes, userdata);
+ if (r < 0)
+ goto fallback_fail;
+
+ if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
+
+ if (plain_directory) {
+ /* Plain directories have no recursive read-only flag, but something pretty close to
+ * it: the IMMUTABLE bit. Let's use this here, if this is requested. */
+
+ if (flags & BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE)
+ (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
+ } else {
+ r = btrfs_subvol_set_read_only(new_path, true);
+ if (r < 0)
+ goto fallback_fail;
+ }
+ }
+
+ return 0;
+
+ fallback_fail:
+ (void) rm_rf(new_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ return r;
+ }
+
+ r = extract_subvolume_name(new_path, &subvolume);
+ if (r < 0)
+ return r;
+
+ new_fd = open_parent(new_path, O_CLOEXEC, 0);
+ if (new_fd < 0)
+ return new_fd;
+
+ return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
+}
+
+int btrfs_subvol_snapshot_full(
+ const char *old_path,
+ const char *new_path,
+ BtrfsSnapshotFlags flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_close_ int old_fd = -1;
+
+ assert(old_path);
+ assert(new_path);
+
+ old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (old_fd < 0)
+ return -errno;
+
+ return btrfs_subvol_snapshot_fd_full(old_fd, new_path, flags, progress_path, progress_bytes, userdata);
+}
+
+int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
+
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of quota items */
+ .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
+
+ /* Look precisely for the quota relation items */
+ .key.min_type = BTRFS_QGROUP_RELATION_KEY,
+ .key.max_type = BTRFS_QGROUP_RELATION_KEY,
+
+ /* No restrictions on the other components */
+ .key.min_offset = 0,
+ .key.max_offset = (uint64_t) -1,
+
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ _cleanup_free_ uint64_t *items = NULL;
+ size_t n_items = 0, n_allocated = 0;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (qgroupid == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &qgroupid);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.key.min_objectid = args.key.max_objectid = qgroupid;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
+ if (errno == ENOENT) /* quota tree missing: quota is disabled */
+ break;
+
+ return -errno;
+ }
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+
+ /* Make sure we start the next search at least from this entry */
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->type != BTRFS_QGROUP_RELATION_KEY)
+ continue;
+ if (sh->offset < sh->objectid)
+ continue;
+ if (sh->objectid != qgroupid)
+ continue;
+
+ if (!GREEDY_REALLOC(items, n_allocated, n_items+1))
+ return -ENOMEM;
+
+ items[n_items++] = sh->offset;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+ if (n_items <= 0) {
+ *ret = NULL;
+ return 0;
+ }
+
+ *ret = TAKE_PTR(items);
+
+ return (int) n_items;
+}
+
+int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
+ _cleanup_free_ uint64_t *qgroups = NULL;
+ uint64_t parent_subvol;
+ bool changed = false;
+ int n = 0, r;
+
+ assert(fd >= 0);
+
+ /*
+ * Sets up the specified subvolume's qgroup automatically in
+ * one of two ways:
+ *
+ * If insert_intermediary_qgroup is false, the subvolume's
+ * leaf qgroup will be assigned to the same parent qgroups as
+ * the subvolume's parent subvolume.
+ *
+ * If insert_intermediary_qgroup is true a new intermediary
+ * higher-level qgroup is created, with a higher level number,
+ * but reusing the id of the subvolume. The level number is
+ * picked as one smaller than the lowest level qgroup the
+ * parent subvolume is a member of. If the parent subvolume's
+ * leaf qgroup is assigned to no higher-level qgroup a new
+ * qgroup of level 255 is created instead. Either way, the new
+ * qgroup is then assigned to the parent's higher-level
+ * qgroup, and the subvolume itself is assigned to it.
+ *
+ * If the subvolume is already assigned to a higher level
+ * qgroup, no operation is executed.
+ *
+ * Effectively this means: regardless if
+ * insert_intermediary_qgroup is true or not, after this
+ * function is invoked the subvolume will be accounted within
+ * the same qgroups as the parent. However, if it is true, it
+ * will also get its own higher-level qgroup, which may in
+ * turn be used by subvolumes created beneath this subvolume
+ * later on.
+ *
+ * This hence defines a simple default qgroup setup for
+ * subvolumes, as long as this function is invoked on each
+ * created subvolume: each subvolume is always accounting
+ * together with its immediate parents. Optionally, if
+ * insert_intermediary_qgroup is true, it will also get a
+ * qgroup that then includes all its own child subvolumes.
+ */
+
+ if (subvol_id == 0) {
+ r = btrfs_is_subvol_fd(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ r = btrfs_subvol_get_id_fd(fd, &subvol_id);
+ if (r < 0)
+ return r;
+ }
+
+ n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
+ if (n < 0)
+ return n;
+ if (n > 0) /* already parent qgroups set up, let's bail */
+ return 0;
+
+ qgroups = mfree(qgroups);
+
+ r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
+ if (r == -ENXIO)
+ /* No parent, hence no qgroup memberships */
+ n = 0;
+ else if (r < 0)
+ return r;
+ else {
+ n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
+ if (n < 0)
+ return n;
+ }
+
+ if (insert_intermediary_qgroup) {
+ uint64_t lowest = 256, new_qgroupid;
+ bool created = false;
+ int i;
+
+ /* Determine the lowest qgroup that the parent
+ * subvolume is assigned to. */
+
+ for (i = 0; i < n; i++) {
+ uint64_t level;
+
+ r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
+ if (r < 0)
+ return r;
+
+ if (level < lowest)
+ lowest = level;
+ }
+
+ if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
+ return -EBUSY;
+
+ r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
+ if (r < 0)
+ return r;
+
+ /* Create the new intermediary group, unless it already exists */
+ r = btrfs_qgroup_create(fd, new_qgroupid);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ if (r >= 0)
+ changed = created = true;
+
+ for (i = 0; i < n; i++) {
+ r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
+ if (r < 0 && r != -EEXIST) {
+ if (created)
+ (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
+
+ return r;
+ }
+ if (r >= 0)
+ changed = true;
+ }
+
+ r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
+ if (r < 0 && r != -EEXIST) {
+ if (created)
+ (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
+ return r;
+ }
+ if (r >= 0)
+ changed = true;
+
+ } else {
+ int i;
+
+ /* Assign our subvolume to all the same qgroups as the parent */
+
+ for (i = 0; i < n; i++) {
+ r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ if (r >= 0)
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
+}
+
+int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
+
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of tree roots */
+ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
+
+ /* Look precisely for the subvolume items */
+ .key.min_type = BTRFS_ROOT_BACKREF_KEY,
+ .key.max_type = BTRFS_ROOT_BACKREF_KEY,
+
+ /* No restrictions on the other components */
+ .key.min_offset = 0,
+ .key.max_offset = (uint64_t) -1,
+
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &subvol_id);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.key.min_objectid = args.key.max_objectid = subvol_id;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
+ return negative_errno();
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+
+ if (sh->type != BTRFS_ROOT_BACKREF_KEY)
+ continue;
+ if (sh->objectid != subvol_id)
+ continue;
+
+ *ret = sh->offset;
+ return 0;
+ }
+ }
+
+ return -ENXIO;
+}
diff --git a/src/basic/btrfs-util.h b/src/basic/btrfs-util.h
new file mode 100644
index 0000000..7d848a7
--- /dev/null
+++ b/src/basic/btrfs-util.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+#include "copy.h"
+#include "time-util.h"
+
+typedef struct BtrfsSubvolInfo {
+ uint64_t subvol_id;
+ usec_t otime;
+
+ sd_id128_t uuid;
+ sd_id128_t parent_uuid;
+
+ bool read_only;
+} BtrfsSubvolInfo;
+
+typedef struct BtrfsQuotaInfo {
+ uint64_t referenced;
+ uint64_t exclusive;
+ uint64_t referenced_max;
+ uint64_t exclusive_max;
+} BtrfsQuotaInfo;
+
+typedef enum BtrfsSnapshotFlags {
+ BTRFS_SNAPSHOT_FALLBACK_COPY = 1 << 0, /* If the source isn't a subvolume, reflink everything */
+ BTRFS_SNAPSHOT_READ_ONLY = 1 << 1,
+ BTRFS_SNAPSHOT_RECURSIVE = 1 << 2,
+ BTRFS_SNAPSHOT_QUOTA = 1 << 3,
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY = 1 << 4, /* If the destination doesn't support subvolumes, reflink/copy instead */
+ BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE = 1 << 5, /* When we can't create a subvolume, use the FS_IMMUTABLE attribute for indicating read-only */
+} BtrfsSnapshotFlags;
+
+typedef enum BtrfsRemoveFlags {
+ BTRFS_REMOVE_RECURSIVE = 1 << 0,
+ BTRFS_REMOVE_QUOTA = 1 << 1,
+} BtrfsRemoveFlags;
+
+int btrfs_is_filesystem(int fd);
+
+int btrfs_is_subvol_fd(int fd);
+int btrfs_is_subvol(const char *path);
+
+int btrfs_reflink(int infd, int outfd);
+int btrfs_clone_range(int infd, uint64_t in_offset, int ofd, uint64_t out_offset, uint64_t sz);
+
+int btrfs_get_block_device_fd(int fd, dev_t *dev);
+int btrfs_get_block_device(const char *path, dev_t *dev);
+
+int btrfs_defrag_fd(int fd);
+int btrfs_defrag(const char *p);
+
+int btrfs_quota_enable_fd(int fd, bool b);
+int btrfs_quota_enable(const char *path, bool b);
+
+int btrfs_quota_scan_start(int fd);
+int btrfs_quota_scan_wait(int fd);
+int btrfs_quota_scan_ongoing(int fd);
+
+int btrfs_subvol_make(const char *path);
+int btrfs_subvol_make_fd(int fd, const char *subvolume);
+
+int btrfs_subvol_snapshot_fd_full(int old_fd, const char *new_path, BtrfsSnapshotFlags flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int btrfs_subvol_snapshot_fd(int old_fd, const char *new_path, BtrfsSnapshotFlags flags) {
+ return btrfs_subvol_snapshot_fd_full(old_fd, new_path, flags, NULL, NULL, NULL);
+}
+
+int btrfs_subvol_snapshot_full(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int btrfs_subvol_snapshot(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags) {
+ return btrfs_subvol_snapshot_full(old_path, new_path, flags, NULL, NULL, NULL);
+}
+
+int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags);
+int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags);
+
+int btrfs_subvol_set_read_only_fd(int fd, bool b);
+int btrfs_subvol_set_read_only(const char *path, bool b);
+int btrfs_subvol_get_read_only_fd(int fd);
+
+int btrfs_subvol_get_id(int fd, const char *subvolume, uint64_t *ret);
+int btrfs_subvol_get_id_fd(int fd, uint64_t *ret);
+int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret);
+
+int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *info);
+
+int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret);
+
+int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *quota);
+int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *quota);
+
+int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max);
+int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max);
+
+int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool new_qgroup);
+int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup);
+
+int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret);
+int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id);
+
+int btrfs_qgroup_create(int fd, uint64_t qgroupid);
+int btrfs_qgroup_destroy(int fd, uint64_t qgroupid);
+int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid);
+
+int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max);
+int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max);
+
+int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid);
+
+int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent);
+int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent);
+
+int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret);
+
+int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *quota);
+int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *quota);
diff --git a/src/basic/build.h b/src/basic/build.h
new file mode 100644
index 0000000..7a59059
--- /dev/null
+++ b/src/basic/build.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "version.h"
+
+#if HAVE_PAM
+#define _PAM_FEATURE_ "+PAM"
+#else
+#define _PAM_FEATURE_ "-PAM"
+#endif
+
+#if HAVE_AUDIT
+#define _AUDIT_FEATURE_ "+AUDIT"
+#else
+#define _AUDIT_FEATURE_ "-AUDIT"
+#endif
+
+#if HAVE_SELINUX
+#define _SELINUX_FEATURE_ "+SELINUX"
+#else
+#define _SELINUX_FEATURE_ "-SELINUX"
+#endif
+
+#if HAVE_APPARMOR
+#define _APPARMOR_FEATURE_ "+APPARMOR"
+#else
+#define _APPARMOR_FEATURE_ "-APPARMOR"
+#endif
+
+#if ENABLE_IMA
+#define _IMA_FEATURE_ "+IMA"
+#else
+#define _IMA_FEATURE_ "-IMA"
+#endif
+
+#if ENABLE_SMACK
+#define _SMACK_FEATURE_ "+SMACK"
+#else
+#define _SMACK_FEATURE_ "-SMACK"
+#endif
+
+#if HAVE_SYSV_COMPAT
+#define _SYSVINIT_FEATURE_ "+SYSVINIT"
+#else
+#define _SYSVINIT_FEATURE_ "-SYSVINIT"
+#endif
+
+#if ENABLE_UTMP
+#define _UTMP_FEATURE_ "+UTMP"
+#else
+#define _UTMP_FEATURE_ "-UTMP"
+#endif
+
+#if HAVE_LIBCRYPTSETUP
+#define _LIBCRYPTSETUP_FEATURE_ "+LIBCRYPTSETUP"
+#else
+#define _LIBCRYPTSETUP_FEATURE_ "-LIBCRYPTSETUP"
+#endif
+
+#if HAVE_GCRYPT
+#define _GCRYPT_FEATURE_ "+GCRYPT"
+#else
+#define _GCRYPT_FEATURE_ "-GCRYPT"
+#endif
+
+#if HAVE_GNUTLS
+#define _GNUTLS_FEATURE_ "+GNUTLS"
+#else
+#define _GNUTLS_FEATURE_ "-GNUTLS"
+#endif
+
+#if HAVE_ACL
+#define _ACL_FEATURE_ "+ACL"
+#else
+#define _ACL_FEATURE_ "-ACL"
+#endif
+
+#if HAVE_XZ
+#define _XZ_FEATURE_ "+XZ"
+#else
+#define _XZ_FEATURE_ "-XZ"
+#endif
+
+#if HAVE_LZ4
+#define _LZ4_FEATURE_ "+LZ4"
+#else
+#define _LZ4_FEATURE_ "-LZ4"
+#endif
+
+#if HAVE_SECCOMP
+#define _SECCOMP_FEATURE_ "+SECCOMP"
+#else
+#define _SECCOMP_FEATURE_ "-SECCOMP"
+#endif
+
+#if HAVE_BLKID
+#define _BLKID_FEATURE_ "+BLKID"
+#else
+#define _BLKID_FEATURE_ "-BLKID"
+#endif
+
+#if HAVE_ELFUTILS
+#define _ELFUTILS_FEATURE_ "+ELFUTILS"
+#else
+#define _ELFUTILS_FEATURE_ "-ELFUTILS"
+#endif
+
+#if HAVE_KMOD
+#define _KMOD_FEATURE_ "+KMOD"
+#else
+#define _KMOD_FEATURE_ "-KMOD"
+#endif
+
+#if HAVE_LIBIDN2
+#define _IDN2_FEATURE_ "+IDN2"
+#else
+#define _IDN2_FEATURE_ "-IDN2"
+#endif
+
+#if HAVE_LIBIDN
+#define _IDN_FEATURE_ "+IDN"
+#else
+#define _IDN_FEATURE_ "-IDN"
+#endif
+
+#if HAVE_PCRE2
+#define _PCRE2_FEATURE_ "+PCRE2"
+#else
+#define _PCRE2_FEATURE_ "-PCRE2"
+#endif
+
+#define _CGROUP_HIEARCHY_ "default-hierarchy=" DEFAULT_HIERARCHY_NAME
+
+#define SYSTEMD_FEATURES \
+ _PAM_FEATURE_ " " \
+ _AUDIT_FEATURE_ " " \
+ _SELINUX_FEATURE_ " " \
+ _IMA_FEATURE_ " " \
+ _APPARMOR_FEATURE_ " " \
+ _SMACK_FEATURE_ " " \
+ _SYSVINIT_FEATURE_ " " \
+ _UTMP_FEATURE_ " " \
+ _LIBCRYPTSETUP_FEATURE_ " " \
+ _GCRYPT_FEATURE_ " " \
+ _GNUTLS_FEATURE_ " " \
+ _ACL_FEATURE_ " " \
+ _XZ_FEATURE_ " " \
+ _LZ4_FEATURE_ " " \
+ _SECCOMP_FEATURE_ " " \
+ _BLKID_FEATURE_ " " \
+ _ELFUTILS_FEATURE_ " " \
+ _KMOD_FEATURE_ " " \
+ _IDN2_FEATURE_ " " \
+ _IDN_FEATURE_ " " \
+ _PCRE2_FEATURE_ " " \
+ _CGROUP_HIEARCHY_
diff --git a/src/basic/bus-label.c b/src/basic/bus-label.c
new file mode 100644
index 0000000..1613cf7
--- /dev/null
+++ b/src/basic/bus-label.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "bus-label.h"
+#include "hexdecoct.h"
+#include "macro.h"
+
+char *bus_label_escape(const char *s) {
+ char *r, *t;
+ const char *f;
+
+ assert_return(s, NULL);
+
+ /* Escapes all chars that D-Bus' object path cannot deal
+ * with. Can be reversed with bus_path_unescape(). We special
+ * case the empty string. */
+
+ if (*s == 0)
+ return strdup("_");
+
+ r = new(char, strlen(s)*3 + 1);
+ if (!r)
+ return NULL;
+
+ for (f = s, t = r; *f; f++) {
+
+ /* Escape everything that is not a-zA-Z0-9. We also
+ * escape 0-9 if it's the first character */
+
+ if (!(*f >= 'A' && *f <= 'Z') &&
+ !(*f >= 'a' && *f <= 'z') &&
+ !(f > s && *f >= '0' && *f <= '9')) {
+ *(t++) = '_';
+ *(t++) = hexchar(*f >> 4);
+ *(t++) = hexchar(*f);
+ } else
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ return r;
+}
+
+char *bus_label_unescape_n(const char *f, size_t l) {
+ char *r, *t;
+ size_t i;
+
+ assert_return(f, NULL);
+
+ /* Special case for the empty string */
+ if (l == 1 && *f == '_')
+ return strdup("");
+
+ r = new(char, l + 1);
+ if (!r)
+ return NULL;
+
+ for (i = 0, t = r; i < l; ++i) {
+ if (f[i] == '_') {
+ int a, b;
+
+ if (l - i < 3 ||
+ (a = unhexchar(f[i + 1])) < 0 ||
+ (b = unhexchar(f[i + 2])) < 0) {
+ /* Invalid escape code, let's take it literal then */
+ *(t++) = '_';
+ } else {
+ *(t++) = (char) ((a << 4) | b);
+ i += 2;
+ }
+ } else
+ *(t++) = f[i];
+ }
+
+ *t = 0;
+
+ return r;
+}
diff --git a/src/basic/bus-label.h b/src/basic/bus-label.h
new file mode 100644
index 0000000..664cfaf
--- /dev/null
+++ b/src/basic/bus-label.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "string-util.h"
+
+char *bus_label_escape(const char *s);
+char *bus_label_unescape_n(const char *f, size_t l);
+
+static inline char *bus_label_unescape(const char *f) {
+ return bus_label_unescape_n(f, strlen_ptr(f));
+}
diff --git a/src/basic/cap-list.c b/src/basic/cap-list.c
new file mode 100644
index 0000000..29a17d9
--- /dev/null
+++ b/src/basic/cap-list.c
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "cap-list.h"
+#include "extract-word.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "util.h"
+
+static const struct capability_name* lookup_capability(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "cap-from-name.h"
+#include "cap-to-name.h"
+
+const char *capability_to_name(int id) {
+
+ if (id < 0)
+ return NULL;
+
+ if ((size_t) id >= ELEMENTSOF(capability_names))
+ return NULL;
+
+ return capability_names[id];
+}
+
+int capability_from_name(const char *name) {
+ const struct capability_name *sc;
+ int r, i;
+
+ assert(name);
+
+ /* Try to parse numeric capability */
+ r = safe_atoi(name, &i);
+ if (r >= 0) {
+ if (i >= 0 && (size_t) i < ELEMENTSOF(capability_names))
+ return i;
+ else
+ return -EINVAL;
+ }
+
+ /* Try to parse string capability */
+ sc = lookup_capability(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
+
+int capability_list_length(void) {
+ return (int) ELEMENTSOF(capability_names);
+}
+
+int capability_set_to_string_alloc(uint64_t set, char **s) {
+ _cleanup_free_ char *str = NULL;
+ unsigned long i;
+ size_t allocated = 0, n = 0;
+
+ assert(s);
+
+ for (i = 0; i < cap_last_cap(); i++)
+ if (set & (UINT64_C(1) << i)) {
+ const char *p;
+ size_t add;
+
+ p = capability_to_name(i);
+ if (!p)
+ return -EINVAL;
+
+ add = strlen(p);
+
+ if (!GREEDY_REALLOC(str, allocated, n + add + 2))
+ return -ENOMEM;
+
+ strcpy(mempcpy(str + n, p, add), " ");
+ n += add + 1;
+ }
+
+ if (!GREEDY_REALLOC(str, allocated, n + 1))
+ return -ENOMEM;
+
+ str[n > 0 ? n - 1 : 0] = '\0'; /* truncate the last space, if it's there */
+
+ *s = TAKE_PTR(str);
+
+ return 0;
+}
+
+int capability_set_from_string(const char *s, uint64_t *set) {
+ uint64_t val = 0;
+ const char *p;
+
+ assert(set);
+
+ for (p = s;;) {
+ _cleanup_free_ char *word = NULL;
+ int r;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return r;
+ if (r <= 0)
+ break;
+
+ r = capability_from_name(word);
+ if (r < 0)
+ continue;
+
+ val |= ((uint64_t) UINT64_C(1)) << (uint64_t) r;
+ }
+
+ *set = val;
+
+ return 0;
+}
diff --git a/src/basic/cap-list.h b/src/basic/cap-list.h
new file mode 100644
index 0000000..ab41924
--- /dev/null
+++ b/src/basic/cap-list.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+const char *capability_to_name(int id);
+int capability_from_name(const char *name);
+int capability_list_length(void);
+
+int capability_set_to_string_alloc(uint64_t set, char **s);
+int capability_set_from_string(const char *s, uint64_t *set);
diff --git a/src/basic/cap-to-name.awk b/src/basic/cap-to-name.awk
new file mode 100644
index 0000000..402a782
--- /dev/null
+++ b/src/basic/cap-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const capability_names[] = { "
+}
+{
+ printf " [%s] = \"%s\",\n", $1, tolower($1)
+}
+END{
+ print "};"
+}
diff --git a/src/basic/capability-util.c b/src/basic/capability-util.c
new file mode 100644
index 0000000..b944ee6
--- /dev/null
+++ b/src/basic/capability-util.c
@@ -0,0 +1,487 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <grp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/capability.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_prctl.h"
+#include "parse-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int have_effective_cap(int value) {
+ _cleanup_cap_free_ cap_t cap;
+ cap_flag_value_t fv;
+
+ cap = cap_get_proc();
+ if (!cap)
+ return -errno;
+
+ if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0)
+ return -errno;
+ else
+ return fv == CAP_SET;
+}
+
+unsigned long cap_last_cap(void) {
+ static thread_local unsigned long saved;
+ static thread_local bool valid = false;
+ _cleanup_free_ char *content = NULL;
+ unsigned long p = 0;
+ int r;
+
+ if (valid)
+ return saved;
+
+ /* available since linux-3.2 */
+ r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content);
+ if (r >= 0) {
+ r = safe_atolu(content, &p);
+ if (r >= 0) {
+ saved = p;
+ valid = true;
+ return p;
+ }
+ }
+
+ /* fall back to syscall-probing for pre linux-3.2 */
+ p = (unsigned long) CAP_LAST_CAP;
+
+ if (prctl(PR_CAPBSET_READ, p) < 0) {
+
+ /* Hmm, look downwards, until we find one that
+ * works */
+ for (p--; p > 0; p --)
+ if (prctl(PR_CAPBSET_READ, p) >= 0)
+ break;
+
+ } else {
+
+ /* Hmm, look upwards, until we find one that doesn't
+ * work */
+ for (;; p++)
+ if (prctl(PR_CAPBSET_READ, p+1) < 0)
+ break;
+ }
+
+ saved = p;
+ valid = true;
+
+ return p;
+}
+
+int capability_update_inherited_set(cap_t caps, uint64_t set) {
+ unsigned long i;
+
+ /* Add capabilities in the set to the inherited caps. Do not apply
+ * them yet. */
+
+ for (i = 0; i < cap_last_cap(); i++) {
+
+ if (set & (UINT64_C(1) << i)) {
+ cap_value_t v;
+
+ v = (cap_value_t) i;
+
+ /* Make the capability inheritable. */
+ if (cap_set_flag(caps, CAP_INHERITABLE, 1, &v, CAP_SET) < 0)
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+int capability_ambient_set_apply(uint64_t set, bool also_inherit) {
+ unsigned long i;
+ _cleanup_cap_free_ cap_t caps = NULL;
+
+ /* Add the capabilities to the ambient set. */
+
+ if (also_inherit) {
+ int r;
+ caps = cap_get_proc();
+ if (!caps)
+ return -errno;
+
+ r = capability_update_inherited_set(caps, set);
+ if (r < 0)
+ return -errno;
+
+ if (cap_set_proc(caps) < 0)
+ return -errno;
+ }
+
+ for (i = 0; i < cap_last_cap(); i++) {
+
+ if (set & (UINT64_C(1) << i)) {
+
+ /* Add the capability to the ambient set. */
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) < 0)
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+int capability_bounding_set_drop(uint64_t keep, bool right_now) {
+ _cleanup_cap_free_ cap_t before_cap = NULL, after_cap = NULL;
+ cap_flag_value_t fv;
+ unsigned long i;
+ int r;
+
+ /* If we are run as PID 1 we will lack CAP_SETPCAP by default
+ * in the effective set (yes, the kernel drops that when
+ * executing init!), so get it back temporarily so that we can
+ * call PR_CAPBSET_DROP. */
+
+ before_cap = cap_get_proc();
+ if (!before_cap)
+ return -errno;
+
+ if (cap_get_flag(before_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0)
+ return -errno;
+
+ if (fv != CAP_SET) {
+ _cleanup_cap_free_ cap_t temp_cap = NULL;
+ static const cap_value_t v = CAP_SETPCAP;
+
+ temp_cap = cap_dup(before_cap);
+ if (!temp_cap)
+ return -errno;
+
+ if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0)
+ return -errno;
+
+ if (cap_set_proc(temp_cap) < 0)
+ log_debug_errno(errno, "Can't acquire effective CAP_SETPCAP bit, ignoring: %m");
+
+ /* If we didn't manage to acquire the CAP_SETPCAP bit, we continue anyway, after all this just means
+ * we'll fail later, when we actually intend to drop some capabilities. */
+ }
+
+ after_cap = cap_dup(before_cap);
+ if (!after_cap)
+ return -errno;
+
+ for (i = 0; i <= cap_last_cap(); i++) {
+ cap_value_t v;
+
+ if ((keep & (UINT64_C(1) << i)))
+ continue;
+
+ /* Drop it from the bounding set */
+ if (prctl(PR_CAPBSET_DROP, i) < 0) {
+ r = -errno;
+
+ /* If dropping the capability failed, let's see if we didn't have it in the first place. If so,
+ * continue anyway, as dropping a capability we didn't have in the first place doesn't really
+ * matter anyway. */
+ if (prctl(PR_CAPBSET_READ, i) != 0)
+ goto finish;
+ }
+ v = (cap_value_t) i;
+
+ /* Also drop it from the inheritable set, so
+ * that anything we exec() loses the
+ * capability for good. */
+ if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ /* If we shall apply this right now drop it
+ * also from our own capability sets. */
+ if (right_now) {
+ if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 ||
+ cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+ }
+
+ r = 0;
+
+finish:
+ if (cap_set_proc(after_cap) < 0) {
+ /* If there are no actual changes anyway then let's ignore this error. */
+ if (cap_compare(before_cap, after_cap) != 0)
+ r = -errno;
+ }
+
+ return r;
+}
+
+static int drop_from_file(const char *fn, uint64_t keep) {
+ _cleanup_free_ char *p = NULL;
+ uint64_t current, after;
+ uint32_t hi, lo;
+ int r, k;
+
+ r = read_one_line_file(fn, &p);
+ if (r < 0)
+ return r;
+
+ assert_cc(sizeof(hi) == sizeof(unsigned));
+ assert_cc(sizeof(lo) == sizeof(unsigned));
+
+ k = sscanf(p, "%u %u", &lo, &hi);
+ if (k != 2)
+ return -EIO;
+
+ current = (uint64_t) lo | ((uint64_t) hi << 32ULL);
+ after = current & keep;
+
+ if (current == after)
+ return 0;
+
+ lo = (unsigned) (after & 0xFFFFFFFFULL);
+ hi = (unsigned) ((after >> 32ULL) & 0xFFFFFFFFULL);
+
+ return write_string_filef(fn, WRITE_STRING_FILE_CREATE, "%u %u", lo, hi);
+}
+
+int capability_bounding_set_drop_usermode(uint64_t keep) {
+ int r;
+
+ r = drop_from_file("/proc/sys/kernel/usermodehelper/inheritable", keep);
+ if (r < 0)
+ return r;
+
+ r = drop_from_file("/proc/sys/kernel/usermodehelper/bset", keep);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+
+int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
+ _cleanup_cap_free_ cap_t d = NULL;
+ unsigned i, j = 0;
+ int r;
+
+ /* Unfortunately we cannot leave privilege dropping to PID 1
+ * here, since we want to run as user but want to keep some
+ * capabilities. Since file capabilities have been introduced
+ * this cannot be done across exec() anymore, unless our
+ * binary has the capability configured in the file system,
+ * which we want to avoid. */
+
+ if (setresgid(gid, gid, gid) < 0)
+ return log_error_errno(errno, "Failed to change group ID: %m");
+
+ r = maybe_setgroups(0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop auxiliary groups list: %m");
+
+ /* Ensure we keep the permitted caps across the setresuid() */
+ if (prctl(PR_SET_KEEPCAPS, 1) < 0)
+ return log_error_errno(errno, "Failed to enable keep capabilities flag: %m");
+
+ if (setresuid(uid, uid, uid) < 0)
+ return log_error_errno(errno, "Failed to change user ID: %m");
+
+ if (prctl(PR_SET_KEEPCAPS, 0) < 0)
+ return log_error_errno(errno, "Failed to disable keep capabilities flag: %m");
+
+ /* Drop all caps from the bounding set, except the ones we want */
+ r = capability_bounding_set_drop(keep_capabilities, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop capabilities: %m");
+
+ /* Now upgrade the permitted caps we still kept to effective caps */
+ d = cap_init();
+ if (!d)
+ return log_oom();
+
+ if (keep_capabilities) {
+ cap_value_t bits[u64log2(keep_capabilities) + 1];
+
+ for (i = 0; i < ELEMENTSOF(bits); i++)
+ if (keep_capabilities & (1ULL << i))
+ bits[j++] = i;
+
+ /* use enough bits */
+ assert(i == 64 || (keep_capabilities >> i) == 0);
+ /* don't use too many bits */
+ assert(keep_capabilities & (1ULL << (i - 1)));
+
+ if (cap_set_flag(d, CAP_EFFECTIVE, j, bits, CAP_SET) < 0 ||
+ cap_set_flag(d, CAP_PERMITTED, j, bits, CAP_SET) < 0)
+ return log_error_errno(errno, "Failed to enable capabilities bits: %m");
+
+ if (cap_set_proc(d) < 0)
+ return log_error_errno(errno, "Failed to increase capabilities: %m");
+ }
+
+ return 0;
+}
+
+int drop_capability(cap_value_t cv) {
+ _cleanup_cap_free_ cap_t tmp_cap = NULL;
+
+ tmp_cap = cap_get_proc();
+ if (!tmp_cap)
+ return -errno;
+
+ if ((cap_set_flag(tmp_cap, CAP_INHERITABLE, 1, &cv, CAP_CLEAR) < 0) ||
+ (cap_set_flag(tmp_cap, CAP_PERMITTED, 1, &cv, CAP_CLEAR) < 0) ||
+ (cap_set_flag(tmp_cap, CAP_EFFECTIVE, 1, &cv, CAP_CLEAR) < 0))
+ return -errno;
+
+ if (cap_set_proc(tmp_cap) < 0)
+ return -errno;
+
+ return 0;
+}
+
+bool ambient_capabilities_supported(void) {
+ static int cache = -1;
+
+ if (cache >= 0)
+ return cache;
+
+ /* If PR_CAP_AMBIENT returns something valid, or an unexpected error code we assume that ambient caps are
+ * available. */
+
+ cache = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_KILL, 0, 0) >= 0 ||
+ !IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS);
+
+ return cache;
+}
+
+int capability_quintet_enforce(const CapabilityQuintet *q) {
+ _cleanup_cap_free_ cap_t c = NULL;
+ int r;
+
+ if (q->ambient != (uint64_t) -1) {
+ unsigned long i;
+ bool changed = false;
+
+ c = cap_get_proc();
+ if (!c)
+ return -errno;
+
+ /* In order to raise the ambient caps set we first need to raise the matching inheritable + permitted
+ * cap */
+ for (i = 0; i <= cap_last_cap(); i++) {
+ uint64_t m = UINT64_C(1) << i;
+ cap_value_t cv = (cap_value_t) i;
+ cap_flag_value_t old_value_inheritable, old_value_permitted;
+
+ if ((q->ambient & m) == 0)
+ continue;
+
+ if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value_inheritable) < 0)
+ return -errno;
+ if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value_permitted) < 0)
+ return -errno;
+
+ if (old_value_inheritable == CAP_SET && old_value_permitted == CAP_SET)
+ continue;
+
+ if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, CAP_SET) < 0)
+ return -errno;
+
+ if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
+ return -errno;
+
+ changed = true;
+ }
+
+ if (changed)
+ if (cap_set_proc(c) < 0)
+ return -errno;
+
+ r = capability_ambient_set_apply(q->ambient, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (q->inheritable != (uint64_t) -1 || q->permitted != (uint64_t) -1 || q->effective != (uint64_t) -1) {
+ bool changed = false;
+ unsigned long i;
+
+ if (!c) {
+ c = cap_get_proc();
+ if (!c)
+ return -errno;
+ }
+
+ for (i = 0; i <= cap_last_cap(); i++) {
+ uint64_t m = UINT64_C(1) << i;
+ cap_value_t cv = (cap_value_t) i;
+
+ if (q->inheritable != (uint64_t) -1) {
+ cap_flag_value_t old_value, new_value;
+
+ if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value) < 0)
+ return -errno;
+
+ new_value = (q->inheritable & m) ? CAP_SET : CAP_CLEAR;
+
+ if (old_value != new_value) {
+ changed = true;
+
+ if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, new_value) < 0)
+ return -errno;
+ }
+ }
+
+ if (q->permitted != (uint64_t) -1) {
+ cap_flag_value_t old_value, new_value;
+
+ if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value) < 0)
+ return -errno;
+
+ new_value = (q->permitted & m) ? CAP_SET : CAP_CLEAR;
+
+ if (old_value != new_value) {
+ changed = true;
+
+ if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, new_value) < 0)
+ return -errno;
+ }
+ }
+
+ if (q->effective != (uint64_t) -1) {
+ cap_flag_value_t old_value, new_value;
+
+ if (cap_get_flag(c, cv, CAP_EFFECTIVE, &old_value) < 0)
+ return -errno;
+
+ new_value = (q->effective & m) ? CAP_SET : CAP_CLEAR;
+
+ if (old_value != new_value) {
+ changed = true;
+
+ if (cap_set_flag(c, CAP_EFFECTIVE, 1, &cv, new_value) < 0)
+ return -errno;
+ }
+ }
+ }
+
+ if (changed)
+ if (cap_set_proc(c) < 0)
+ return -errno;
+ }
+
+ if (q->bounding != (uint64_t) -1) {
+ r = capability_bounding_set_drop(q->bounding, false);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/basic/capability-util.h b/src/basic/capability-util.h
new file mode 100644
index 0000000..02c7d5c
--- /dev/null
+++ b/src/basic/capability-util.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/capability.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "missing_capability.h"
+#include "util.h"
+
+#define CAP_ALL (uint64_t) -1
+
+unsigned long cap_last_cap(void);
+int have_effective_cap(int value);
+int capability_bounding_set_drop(uint64_t keep, bool right_now);
+int capability_bounding_set_drop_usermode(uint64_t keep);
+
+int capability_ambient_set_apply(uint64_t set, bool also_inherit);
+int capability_update_inherited_set(cap_t caps, uint64_t ambient_set);
+
+int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities);
+
+int drop_capability(cap_value_t cv);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(cap_t, cap_free);
+#define _cleanup_cap_free_ _cleanup_(cap_freep)
+
+static inline void cap_free_charpp(char **p) {
+ if (*p)
+ cap_free(*p);
+}
+#define _cleanup_cap_free_charp_ _cleanup_(cap_free_charpp)
+
+static inline bool cap_test_all(uint64_t caps) {
+ uint64_t m;
+ m = (UINT64_C(1) << (cap_last_cap() + 1)) - 1;
+ return FLAGS_SET(caps, m);
+}
+
+bool ambient_capabilities_supported(void);
+
+/* Identical to linux/capability.h's CAP_TO_MASK(), but uses an unsigned 1U instead of a signed 1 for shifting left, in
+ * order to avoid complaints about shifting a signed int left by 31 bits, which would make it negative. */
+#define CAP_TO_MASK_CORRECTED(x) (1U << ((x) & 31U))
+
+typedef struct CapabilityQuintet {
+ /* Stores all five types of capabilities in one go. Note that we use (uint64_t) -1 for unset here. This hence
+ * needs to be updated as soon as Linux learns more than 63 caps. */
+ uint64_t effective;
+ uint64_t bounding;
+ uint64_t inheritable;
+ uint64_t permitted;
+ uint64_t ambient;
+} CapabilityQuintet;
+
+assert_cc(CAP_LAST_CAP < 64);
+
+#define CAPABILITY_QUINTET_NULL { (uint64_t) -1, (uint64_t) -1, (uint64_t) -1, (uint64_t) -1, (uint64_t) -1 }
+
+static inline bool capability_quintet_is_set(const CapabilityQuintet *q) {
+ return q->effective != (uint64_t) -1 ||
+ q->bounding != (uint64_t) -1 ||
+ q->inheritable != (uint64_t) -1 ||
+ q->permitted != (uint64_t) -1 ||
+ q->ambient != (uint64_t) -1;
+}
+
+int capability_quintet_enforce(const CapabilityQuintet *q);
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
new file mode 100644
index 0000000..8ce7ccb
--- /dev/null
+++ b/src/basic/cgroup-util.c
@@ -0,0 +1,2933 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <ftw.h>
+#include <limits.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "login-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "set.h"
+#include "special.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
+ _cleanup_free_ char *fs = NULL;
+ FILE *f;
+ int r;
+
+ assert(_f);
+
+ r = cg_get_path(controller, path, "cgroup.procs", &fs);
+ if (r < 0)
+ return r;
+
+ f = fopen(fs, "re");
+ if (!f)
+ return -errno;
+
+ *_f = f;
+ return 0;
+}
+
+int cg_read_pid(FILE *f, pid_t *_pid) {
+ unsigned long ul;
+
+ /* Note that the cgroup.procs might contain duplicates! See
+ * cgroups.txt for details. */
+
+ assert(f);
+ assert(_pid);
+
+ errno = 0;
+ if (fscanf(f, "%lu", &ul) != 1) {
+
+ if (feof(f))
+ return 0;
+
+ return errno > 0 ? -errno : -EIO;
+ }
+
+ if (ul <= 0)
+ return -EIO;
+
+ *_pid = (pid_t) ul;
+ return 1;
+}
+
+int cg_read_event(
+ const char *controller,
+ const char *path,
+ const char *event,
+ char **val) {
+
+ _cleanup_free_ char *events = NULL, *content = NULL;
+ char *p, *line;
+ int r;
+
+ r = cg_get_path(controller, path, "cgroup.events", &events);
+ if (r < 0)
+ return r;
+
+ r = read_full_file(events, &content, NULL);
+ if (r < 0)
+ return r;
+
+ p = content;
+ while ((line = strsep(&p, "\n"))) {
+ char *key;
+
+ key = strsep(&line, " ");
+ if (!key || !line)
+ return -EINVAL;
+
+ if (strcmp(key, event))
+ continue;
+
+ *val = strdup(line);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+bool cg_ns_supported(void) {
+ static thread_local int enabled = -1;
+
+ if (enabled >= 0)
+ return enabled;
+
+ if (access("/proc/self/ns/cgroup", F_OK) < 0) {
+ if (errno != ENOENT)
+ log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
+ enabled = false;
+ } else
+ enabled = true;
+
+ return enabled;
+}
+
+int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+ DIR *d;
+
+ assert(_d);
+
+ /* This is not recursive! */
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ d = opendir(fs);
+ if (!d)
+ return -errno;
+
+ *_d = d;
+ return 0;
+}
+
+int cg_read_subgroup(DIR *d, char **fn) {
+ struct dirent *de;
+
+ assert(d);
+ assert(fn);
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ char *b;
+
+ if (de->d_type != DT_DIR)
+ continue;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ b = strdup(de->d_name);
+ if (!b)
+ return -ENOMEM;
+
+ *fn = b;
+ return 1;
+ }
+
+ return 0;
+}
+
+int cg_rmdir(const char *controller, const char *path) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = cg_get_path(controller, path, NULL, &p);
+ if (r < 0)
+ return r;
+
+ r = rmdir(p);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ r = cg_hybrid_unified();
+ if (r <= 0)
+ return r;
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
+ if (r < 0)
+ log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
+ }
+
+ return 0;
+}
+
+int cg_kill(
+ const char *controller,
+ const char *path,
+ int sig,
+ CGroupFlags flags,
+ Set *s,
+ cg_kill_log_func_t log_kill,
+ void *userdata) {
+
+ _cleanup_set_free_ Set *allocated_set = NULL;
+ bool done = false;
+ int r, ret = 0;
+ pid_t my_pid;
+
+ assert(sig >= 0);
+
+ /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
+ * SIGCONT on SIGKILL. */
+ if (IN_SET(sig, SIGCONT, SIGKILL))
+ flags &= ~CGROUP_SIGCONT;
+
+ /* This goes through the tasks list and kills them all. This
+ * is repeated until no further processes are added to the
+ * tasks list, to properly handle forking processes */
+
+ if (!s) {
+ s = allocated_set = set_new(NULL);
+ if (!s)
+ return -ENOMEM;
+ }
+
+ my_pid = getpid_cached();
+
+ do {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid = 0;
+ done = true;
+
+ r = cg_enumerate_processes(controller, path, &f);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+
+ if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
+ continue;
+
+ if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
+ continue;
+
+ if (log_kill)
+ log_kill(pid, sig, userdata);
+
+ /* If we haven't killed this process yet, kill
+ * it */
+ if (kill(pid, sig) < 0) {
+ if (ret >= 0 && errno != ESRCH)
+ ret = -errno;
+ } else {
+ if (flags & CGROUP_SIGCONT)
+ (void) kill(pid, SIGCONT);
+
+ if (ret == 0)
+ ret = 1;
+ }
+
+ done = false;
+
+ r = set_put(s, PID_TO_PTR(pid));
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ }
+
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+
+ /* To avoid racing against processes which fork
+ * quicker than we can kill them we repeat this until
+ * no new pids need to be killed. */
+
+ } while (!done);
+
+ return ret;
+}
+
+int cg_kill_recursive(
+ const char *controller,
+ const char *path,
+ int sig,
+ CGroupFlags flags,
+ Set *s,
+ cg_kill_log_func_t log_kill,
+ void *userdata) {
+
+ _cleanup_set_free_ Set *allocated_set = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ int r, ret;
+ char *fn;
+
+ assert(path);
+ assert(sig >= 0);
+
+ if (!s) {
+ s = allocated_set = set_new(NULL);
+ if (!s)
+ return -ENOMEM;
+ }
+
+ ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
+
+ r = cg_enumerate_subgroups(controller, path, &d);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin(path, "/", fn);
+ free(fn);
+ if (!p)
+ return -ENOMEM;
+
+ r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
+ if (r != 0 && ret >= 0)
+ ret = r;
+ }
+ if (ret >= 0 && r < 0)
+ ret = r;
+
+ if (flags & CGROUP_REMOVE) {
+ r = cg_rmdir(controller, path);
+ if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
+ return r;
+ }
+
+ return ret;
+}
+
+int cg_migrate(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ bool done = false;
+ _cleanup_set_free_ Set *s = NULL;
+ int r, ret = 0;
+ pid_t my_pid;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ s = set_new(NULL);
+ if (!s)
+ return -ENOMEM;
+
+ my_pid = getpid_cached();
+
+ do {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid = 0;
+ done = true;
+
+ r = cg_enumerate_processes(cfrom, pfrom, &f);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+
+ /* This might do weird stuff if we aren't a
+ * single-threaded program. However, we
+ * luckily know we are not */
+ if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
+ continue;
+
+ if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
+ continue;
+
+ /* Ignore kernel threads. Since they can only
+ * exist in the root cgroup, we only check for
+ * them there. */
+ if (cfrom &&
+ empty_or_root(pfrom) &&
+ is_kernel_thread(pid) > 0)
+ continue;
+
+ r = cg_attach(cto, pto, pid);
+ if (r < 0) {
+ if (ret >= 0 && r != -ESRCH)
+ ret = r;
+ } else if (ret == 0)
+ ret = 1;
+
+ done = false;
+
+ r = set_put(s, PID_TO_PTR(pid));
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ }
+
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ } while (!done);
+
+ return ret;
+}
+
+int cg_migrate_recursive(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ int r, ret = 0;
+ char *fn;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
+
+ r = cg_enumerate_subgroups(cfrom, pfrom, &d);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin(pfrom, "/", fn);
+ free(fn);
+ if (!p)
+ return -ENOMEM;
+
+ r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
+ if (r != 0 && ret >= 0)
+ ret = r;
+ }
+
+ if (r < 0 && ret >= 0)
+ ret = r;
+
+ if (flags & CGROUP_REMOVE) {
+ r = cg_rmdir(cfrom, pfrom);
+ if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
+ return r;
+ }
+
+ return ret;
+}
+
+int cg_migrate_recursive_fallback(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ int r;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
+ if (r < 0) {
+ char prefix[strlen(pto) + 1];
+
+ /* This didn't work? Then let's try all prefixes of the destination */
+
+ PATH_FOREACH_PREFIX(prefix, pto) {
+ int q;
+
+ q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
+ if (q >= 0)
+ return q;
+ }
+ }
+
+ return r;
+}
+
+static const char *controller_to_dirname(const char *controller) {
+ const char *e;
+
+ assert(controller);
+
+ /* Converts a controller name to the directory name below
+ * /sys/fs/cgroup/ we want to mount it to. Effectively, this
+ * just cuts off the name= prefixed used for named
+ * hierarchies, if it is specified. */
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ if (cg_hybrid_unified() > 0)
+ controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
+ else
+ controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
+ }
+
+ e = startswith(controller, "name=");
+ if (e)
+ return e;
+
+ return controller;
+}
+
+static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
+ const char *dn;
+ char *t = NULL;
+
+ assert(fs);
+ assert(controller);
+
+ dn = controller_to_dirname(controller);
+
+ if (isempty(path) && isempty(suffix))
+ t = strappend("/sys/fs/cgroup/", dn);
+ else if (isempty(path))
+ t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
+ else if (isempty(suffix))
+ t = strjoin("/sys/fs/cgroup/", dn, "/", path);
+ else
+ t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
+ if (!t)
+ return -ENOMEM;
+
+ *fs = t;
+ return 0;
+}
+
+static int join_path_unified(const char *path, const char *suffix, char **fs) {
+ char *t;
+
+ assert(fs);
+
+ if (isempty(path) && isempty(suffix))
+ t = strdup("/sys/fs/cgroup");
+ else if (isempty(path))
+ t = strappend("/sys/fs/cgroup/", suffix);
+ else if (isempty(suffix))
+ t = strappend("/sys/fs/cgroup/", path);
+ else
+ t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
+ if (!t)
+ return -ENOMEM;
+
+ *fs = t;
+ return 0;
+}
+
+int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
+ int r;
+
+ assert(fs);
+
+ if (!controller) {
+ char *t;
+
+ /* If no controller is specified, we return the path
+ * *below* the controllers, without any prefix. */
+
+ if (!path && !suffix)
+ return -EINVAL;
+
+ if (!suffix)
+ t = strdup(path);
+ else if (!path)
+ t = strdup(suffix);
+ else
+ t = strjoin(path, "/", suffix);
+ if (!t)
+ return -ENOMEM;
+
+ *fs = path_simplify(t, false);
+ return 0;
+ }
+
+ if (!cg_controller_is_valid(controller))
+ return -EINVAL;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ r = join_path_unified(path, suffix, fs);
+ else
+ r = join_path_legacy(controller, path, suffix, fs);
+ if (r < 0)
+ return r;
+
+ path_simplify(*fs, false);
+ return 0;
+}
+
+static int controller_is_accessible(const char *controller) {
+ int r;
+
+ assert(controller);
+
+ /* Checks whether a specific controller is accessible,
+ * i.e. its hierarchy mounted. In the unified hierarchy all
+ * controllers are considered accessible, except for the named
+ * hierarchies */
+
+ if (!cg_controller_is_valid(controller))
+ return -EINVAL;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* We don't support named hierarchies if we are using
+ * the unified hierarchy. */
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
+ return 0;
+
+ if (startswith(controller, "name="))
+ return -EOPNOTSUPP;
+
+ } else {
+ const char *cc, *dn;
+
+ dn = controller_to_dirname(controller);
+ cc = strjoina("/sys/fs/cgroup/", dn);
+
+ if (laccess(cc, F_OK) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
+ int r;
+
+ assert(controller);
+ assert(fs);
+
+ /* Check if the specified controller is actually accessible */
+ r = controller_is_accessible(controller);
+ if (r < 0)
+ return r;
+
+ return cg_get_path(controller, path, suffix, fs);
+}
+
+static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
+ assert(path);
+ assert(sb);
+ assert(ftwbuf);
+
+ if (typeflag != FTW_DP)
+ return 0;
+
+ if (ftwbuf->level < 1)
+ return 0;
+
+ (void) rmdir(path);
+ return 0;
+}
+
+int cg_trim(const char *controller, const char *path, bool delete_root) {
+ _cleanup_free_ char *fs = NULL;
+ int r = 0, q;
+
+ assert(path);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
+ if (errno == ENOENT)
+ r = 0;
+ else if (errno > 0)
+ r = -errno;
+ else
+ r = -EIO;
+ }
+
+ if (delete_root) {
+ if (rmdir(fs) < 0 && errno != ENOENT)
+ return -errno;
+ }
+
+ q = cg_hybrid_unified();
+ if (q < 0)
+ return q;
+ if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
+ if (q < 0)
+ log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
+ }
+
+ return r;
+}
+
+/* Create a cgroup in the hierarchy of controller.
+ * Returns 0 if the group already existed, 1 on success, negative otherwise.
+ */
+int cg_create(const char *controller, const char *path) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ r = cg_get_path_and_check(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ r = mkdir_parents(fs, 0755);
+ if (r < 0)
+ return r;
+
+ r = mkdir_errno_wrapper(fs, 0755);
+ if (r == -EEXIST)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+
+ if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
+ if (r < 0)
+ log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
+ }
+
+ return 1;
+}
+
+int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
+ int r, q;
+
+ assert(pid >= 0);
+
+ r = cg_create(controller, path);
+ if (r < 0)
+ return r;
+
+ q = cg_attach(controller, path, pid);
+ if (q < 0)
+ return q;
+
+ /* This does not remove the cgroup on failure */
+ return r;
+}
+
+int cg_attach(const char *controller, const char *path, pid_t pid) {
+ _cleanup_free_ char *fs = NULL;
+ char c[DECIMAL_STR_MAX(pid_t) + 2];
+ int r;
+
+ assert(path);
+ assert(pid >= 0);
+
+ r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
+ if (r < 0)
+ return r;
+
+ if (pid == 0)
+ pid = getpid_cached();
+
+ xsprintf(c, PID_FMT "\n", pid);
+
+ r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+
+ if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
+ }
+
+ return 0;
+}
+
+int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
+ int r;
+
+ assert(controller);
+ assert(path);
+ assert(pid >= 0);
+
+ r = cg_attach(controller, path, pid);
+ if (r < 0) {
+ char prefix[strlen(path) + 1];
+
+ /* This didn't work? Then let's try all prefixes of
+ * the destination */
+
+ PATH_FOREACH_PREFIX(prefix, path) {
+ int q;
+
+ q = cg_attach(controller, prefix, pid);
+ if (q >= 0)
+ return q;
+ }
+ }
+
+ return r;
+}
+
+int cg_set_access(
+ const char *controller,
+ const char *path,
+ uid_t uid,
+ gid_t gid) {
+
+ struct Attribute {
+ const char *name;
+ bool fatal;
+ };
+
+ /* cgroup v1, aka legacy/non-unified */
+ static const struct Attribute legacy_attributes[] = {
+ { "cgroup.procs", true },
+ { "tasks", false },
+ { "cgroup.clone_children", false },
+ {},
+ };
+
+ /* cgroup v2, aka unified */
+ static const struct Attribute unified_attributes[] = {
+ { "cgroup.procs", true },
+ { "cgroup.subtree_control", true },
+ { "cgroup.threads", false },
+ {},
+ };
+
+ static const struct Attribute* const attributes[] = {
+ [false] = legacy_attributes,
+ [true] = unified_attributes,
+ };
+
+ _cleanup_free_ char *fs = NULL;
+ const struct Attribute *i;
+ int r, unified;
+
+ assert(path);
+
+ if (uid == UID_INVALID && gid == GID_INVALID)
+ return 0;
+
+ unified = cg_unified_controller(controller);
+ if (unified < 0)
+ return unified;
+
+ /* Configure access to the cgroup itself */
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ r = chmod_and_chown(fs, 0755, uid, gid);
+ if (r < 0)
+ return r;
+
+ /* Configure access to the cgroup's attributes */
+ for (i = attributes[unified]; i->name; i++) {
+ fs = mfree(fs);
+
+ r = cg_get_path(controller, path, i->name, &fs);
+ if (r < 0)
+ return r;
+
+ r = chmod_and_chown(fs, 0644, uid, gid);
+ if (r < 0) {
+ if (i->fatal)
+ return r;
+
+ log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
+ }
+ }
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Always propagate access mode from unified to legacy controller */
+ r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
+ if (r < 0)
+ log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
+ }
+ }
+
+ return 0;
+}
+
+int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ assert(path);
+ assert(name);
+ assert(value || size <= 0);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ if (setxattr(fs, name, value, size, flags) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
+ _cleanup_free_ char *fs = NULL;
+ ssize_t n;
+ int r;
+
+ assert(path);
+ assert(name);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ n = getxattr(fs, name, value, size);
+ if (n < 0)
+ return -errno;
+
+ return (int) n;
+}
+
+int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *fs, *controller_str;
+ int unified, r;
+ size_t cs = 0;
+
+ assert(path);
+ assert(pid >= 0);
+
+ if (controller) {
+ if (!cg_controller_is_valid(controller))
+ return -EINVAL;
+ } else
+ controller = SYSTEMD_CGROUP_CONTROLLER;
+
+ unified = cg_unified_controller(controller);
+ if (unified < 0)
+ return unified;
+ if (unified == 0) {
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
+ controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
+ else
+ controller_str = controller;
+
+ cs = strlen(controller_str);
+ }
+
+ fs = procfs_file_alloca(pid, "cgroup");
+ f = fopen(fs, "re");
+ if (!f)
+ return errno == ENOENT ? -ESRCH : -errno;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *e, *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (unified) {
+ e = startswith(line, "0:");
+ if (!e)
+ continue;
+
+ e = strchr(e, ':');
+ if (!e)
+ continue;
+ } else {
+ char *l;
+ size_t k;
+ const char *word, *state;
+ bool found = false;
+
+ l = strchr(line, ':');
+ if (!l)
+ continue;
+
+ l++;
+ e = strchr(l, ':');
+ if (!e)
+ continue;
+
+ *e = 0;
+ FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
+ if (k == cs && memcmp(word, controller_str, cs) == 0) {
+ found = true;
+ break;
+ }
+ if (!found)
+ continue;
+ }
+
+ p = strdup(e + 1);
+ if (!p)
+ return -ENOMEM;
+
+ /* Truncate suffix indicating the process is a zombie */
+ e = endswith(p, " (deleted)");
+ if (e)
+ *e = 0;
+
+ *path = p;
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+int cg_install_release_agent(const char *controller, const char *agent) {
+ _cleanup_free_ char *fs = NULL, *contents = NULL;
+ const char *sc;
+ int r;
+
+ assert(agent);
+
+ r = cg_unified_controller(controller);
+ if (r < 0)
+ return r;
+ if (r > 0) /* doesn't apply to unified hierarchy */
+ return -EOPNOTSUPP;
+
+ r = cg_get_path(controller, NULL, "release_agent", &fs);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(fs, &contents);
+ if (r < 0)
+ return r;
+
+ sc = strstrip(contents);
+ if (isempty(sc)) {
+ r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+ } else if (!path_equal(sc, agent))
+ return -EEXIST;
+
+ fs = mfree(fs);
+ r = cg_get_path(controller, NULL, "notify_on_release", &fs);
+ if (r < 0)
+ return r;
+
+ contents = mfree(contents);
+ r = read_one_line_file(fs, &contents);
+ if (r < 0)
+ return r;
+
+ sc = strstrip(contents);
+ if (streq(sc, "0")) {
+ r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ if (!streq(sc, "1"))
+ return -EIO;
+
+ return 0;
+}
+
+int cg_uninstall_release_agent(const char *controller) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ r = cg_unified_controller(controller);
+ if (r < 0)
+ return r;
+ if (r > 0) /* Doesn't apply to unified hierarchy */
+ return -EOPNOTSUPP;
+
+ r = cg_get_path(controller, NULL, "notify_on_release", &fs);
+ if (r < 0)
+ return r;
+
+ r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ fs = mfree(fs);
+
+ r = cg_get_path(controller, NULL, "release_agent", &fs);
+ if (r < 0)
+ return r;
+
+ r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int cg_is_empty(const char *controller, const char *path) {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid;
+ int r;
+
+ assert(path);
+
+ r = cg_enumerate_processes(controller, path, &f);
+ if (r == -ENOENT)
+ return true;
+ if (r < 0)
+ return r;
+
+ r = cg_read_pid(f, &pid);
+ if (r < 0)
+ return r;
+
+ return r == 0;
+}
+
+int cg_is_empty_recursive(const char *controller, const char *path) {
+ int r;
+
+ assert(path);
+
+ /* The root cgroup is always populated */
+ if (controller && empty_or_root(path))
+ return false;
+
+ r = cg_unified_controller(controller);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ _cleanup_free_ char *t = NULL;
+
+ /* On the unified hierarchy we can check empty state
+ * via the "populated" attribute of "cgroup.events". */
+
+ r = cg_read_event(controller, path, "populated", &t);
+ if (r == -ENOENT)
+ return true;
+ if (r < 0)
+ return r;
+
+ return streq(t, "0");
+ } else {
+ _cleanup_closedir_ DIR *d = NULL;
+ char *fn;
+
+ r = cg_is_empty(controller, path);
+ if (r <= 0)
+ return r;
+
+ r = cg_enumerate_subgroups(controller, path, &d);
+ if (r == -ENOENT)
+ return true;
+ if (r < 0)
+ return r;
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin(path, "/", fn);
+ free(fn);
+ if (!p)
+ return -ENOMEM;
+
+ r = cg_is_empty_recursive(controller, p);
+ if (r <= 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ return true;
+ }
+}
+
+int cg_split_spec(const char *spec, char **controller, char **path) {
+ char *t = NULL, *u = NULL;
+ const char *e;
+
+ assert(spec);
+
+ if (*spec == '/') {
+ if (!path_is_normalized(spec))
+ return -EINVAL;
+
+ if (path) {
+ t = strdup(spec);
+ if (!t)
+ return -ENOMEM;
+
+ *path = path_simplify(t, false);
+ }
+
+ if (controller)
+ *controller = NULL;
+
+ return 0;
+ }
+
+ e = strchr(spec, ':');
+ if (!e) {
+ if (!cg_controller_is_valid(spec))
+ return -EINVAL;
+
+ if (controller) {
+ t = strdup(spec);
+ if (!t)
+ return -ENOMEM;
+
+ *controller = t;
+ }
+
+ if (path)
+ *path = NULL;
+
+ return 0;
+ }
+
+ t = strndup(spec, e-spec);
+ if (!t)
+ return -ENOMEM;
+ if (!cg_controller_is_valid(t)) {
+ free(t);
+ return -EINVAL;
+ }
+
+ if (isempty(e+1))
+ u = NULL;
+ else {
+ u = strdup(e+1);
+ if (!u) {
+ free(t);
+ return -ENOMEM;
+ }
+
+ if (!path_is_normalized(u) ||
+ !path_is_absolute(u)) {
+ free(t);
+ free(u);
+ return -EINVAL;
+ }
+
+ path_simplify(u, false);
+ }
+
+ if (controller)
+ *controller = t;
+ else
+ free(t);
+
+ if (path)
+ *path = u;
+ else
+ free(u);
+
+ return 0;
+}
+
+int cg_mangle_path(const char *path, char **result) {
+ _cleanup_free_ char *c = NULL, *p = NULL;
+ char *t;
+ int r;
+
+ assert(path);
+ assert(result);
+
+ /* First, check if it already is a filesystem path */
+ if (path_startswith(path, "/sys/fs/cgroup")) {
+
+ t = strdup(path);
+ if (!t)
+ return -ENOMEM;
+
+ *result = path_simplify(t, false);
+ return 0;
+ }
+
+ /* Otherwise, treat it as cg spec */
+ r = cg_split_spec(path, &c, &p);
+ if (r < 0)
+ return r;
+
+ return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
+}
+
+int cg_get_root_path(char **path) {
+ char *p, *e;
+ int r;
+
+ assert(path);
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
+ if (r < 0)
+ return r;
+
+ e = endswith(p, "/" SPECIAL_INIT_SCOPE);
+ if (!e)
+ e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
+ if (!e)
+ e = endswith(p, "/system"); /* even more legacy */
+ if (e)
+ *e = 0;
+
+ *path = p;
+ return 0;
+}
+
+int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
+ _cleanup_free_ char *rt = NULL;
+ char *p;
+ int r;
+
+ assert(cgroup);
+ assert(shifted);
+
+ if (!root) {
+ /* If the root was specified let's use that, otherwise
+ * let's determine it from PID 1 */
+
+ r = cg_get_root_path(&rt);
+ if (r < 0)
+ return r;
+
+ root = rt;
+ }
+
+ p = path_startswith(cgroup, root);
+ if (p && p > cgroup)
+ *shifted = p - 1;
+ else
+ *shifted = cgroup;
+
+ return 0;
+}
+
+int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
+ _cleanup_free_ char *raw = NULL;
+ const char *c;
+ int r;
+
+ assert(pid >= 0);
+ assert(cgroup);
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
+ if (r < 0)
+ return r;
+
+ r = cg_shift_path(raw, root, &c);
+ if (r < 0)
+ return r;
+
+ if (c == raw)
+ *cgroup = TAKE_PTR(raw);
+ else {
+ char *n;
+
+ n = strdup(c);
+ if (!n)
+ return -ENOMEM;
+
+ *cgroup = n;
+ }
+
+ return 0;
+}
+
+int cg_path_decode_unit(const char *cgroup, char **unit) {
+ char *c, *s;
+ size_t n;
+
+ assert(cgroup);
+ assert(unit);
+
+ n = strcspn(cgroup, "/");
+ if (n < 3)
+ return -ENXIO;
+
+ c = strndupa(cgroup, n);
+ c = cg_unescape(c);
+
+ if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
+ return -ENXIO;
+
+ s = strdup(c);
+ if (!s)
+ return -ENOMEM;
+
+ *unit = s;
+ return 0;
+}
+
+static bool valid_slice_name(const char *p, size_t n) {
+
+ if (!p)
+ return false;
+
+ if (n < STRLEN("x.slice"))
+ return false;
+
+ if (memcmp(p + n - 6, ".slice", 6) == 0) {
+ char buf[n+1], *c;
+
+ memcpy(buf, p, n);
+ buf[n] = 0;
+
+ c = cg_unescape(buf);
+
+ return unit_name_is_valid(c, UNIT_NAME_PLAIN);
+ }
+
+ return false;
+}
+
+static const char *skip_slices(const char *p) {
+ assert(p);
+
+ /* Skips over all slice assignments */
+
+ for (;;) {
+ size_t n;
+
+ p += strspn(p, "/");
+
+ n = strcspn(p, "/");
+ if (!valid_slice_name(p, n))
+ return p;
+
+ p += n;
+ }
+}
+
+int cg_path_get_unit(const char *path, char **ret) {
+ const char *e;
+ char *unit;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ e = skip_slices(path);
+
+ r = cg_path_decode_unit(e, &unit);
+ if (r < 0)
+ return r;
+
+ /* We skipped over the slices, don't accept any now */
+ if (endswith(unit, ".slice")) {
+ free(unit);
+ return -ENXIO;
+ }
+
+ *ret = unit;
+ return 0;
+}
+
+int cg_pid_get_unit(pid_t pid, char **unit) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(unit);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_unit(cgroup, unit);
+}
+
+/**
+ * Skip session-*.scope, but require it to be there.
+ */
+static const char *skip_session(const char *p) {
+ size_t n;
+
+ if (isempty(p))
+ return NULL;
+
+ p += strspn(p, "/");
+
+ n = strcspn(p, "/");
+ if (n < STRLEN("session-x.scope"))
+ return NULL;
+
+ if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
+ char buf[n - 8 - 6 + 1];
+
+ memcpy(buf, p + 8, n - 8 - 6);
+ buf[n - 8 - 6] = 0;
+
+ /* Note that session scopes never need unescaping,
+ * since they cannot conflict with the kernel's own
+ * names, hence we don't need to call cg_unescape()
+ * here. */
+
+ if (!session_id_valid(buf))
+ return false;
+
+ p += n;
+ p += strspn(p, "/");
+ return p;
+ }
+
+ return NULL;
+}
+
+/**
+ * Skip user@*.service, but require it to be there.
+ */
+static const char *skip_user_manager(const char *p) {
+ size_t n;
+
+ if (isempty(p))
+ return NULL;
+
+ p += strspn(p, "/");
+
+ n = strcspn(p, "/");
+ if (n < STRLEN("user@x.service"))
+ return NULL;
+
+ if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
+ char buf[n - 5 - 8 + 1];
+
+ memcpy(buf, p + 5, n - 5 - 8);
+ buf[n - 5 - 8] = 0;
+
+ /* Note that user manager services never need unescaping,
+ * since they cannot conflict with the kernel's own
+ * names, hence we don't need to call cg_unescape()
+ * here. */
+
+ if (parse_uid(buf, NULL) < 0)
+ return NULL;
+
+ p += n;
+ p += strspn(p, "/");
+
+ return p;
+ }
+
+ return NULL;
+}
+
+static const char *skip_user_prefix(const char *path) {
+ const char *e, *t;
+
+ assert(path);
+
+ /* Skip slices, if there are any */
+ e = skip_slices(path);
+
+ /* Skip the user manager, if it's in the path now... */
+ t = skip_user_manager(e);
+ if (t)
+ return t;
+
+ /* Alternatively skip the user session if it is in the path... */
+ return skip_session(e);
+}
+
+int cg_path_get_user_unit(const char *path, char **ret) {
+ const char *t;
+
+ assert(path);
+ assert(ret);
+
+ t = skip_user_prefix(path);
+ if (!t)
+ return -ENXIO;
+
+ /* And from here on it looks pretty much the same as for a
+ * system unit, hence let's use the same parser from here
+ * on. */
+ return cg_path_get_unit(t, ret);
+}
+
+int cg_pid_get_user_unit(pid_t pid, char **unit) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(unit);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_user_unit(cgroup, unit);
+}
+
+int cg_path_get_machine_name(const char *path, char **machine) {
+ _cleanup_free_ char *u = NULL;
+ const char *sl;
+ int r;
+
+ r = cg_path_get_unit(path, &u);
+ if (r < 0)
+ return r;
+
+ sl = strjoina("/run/systemd/machines/unit:", u);
+ return readlink_malloc(sl, machine);
+}
+
+int cg_pid_get_machine_name(pid_t pid, char **machine) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(machine);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_machine_name(cgroup, machine);
+}
+
+int cg_path_get_session(const char *path, char **session) {
+ _cleanup_free_ char *unit = NULL;
+ char *start, *end;
+ int r;
+
+ assert(path);
+
+ r = cg_path_get_unit(path, &unit);
+ if (r < 0)
+ return r;
+
+ start = startswith(unit, "session-");
+ if (!start)
+ return -ENXIO;
+ end = endswith(start, ".scope");
+ if (!end)
+ return -ENXIO;
+
+ *end = 0;
+ if (!session_id_valid(start))
+ return -ENXIO;
+
+ if (session) {
+ char *rr;
+
+ rr = strdup(start);
+ if (!rr)
+ return -ENOMEM;
+
+ *session = rr;
+ }
+
+ return 0;
+}
+
+int cg_pid_get_session(pid_t pid, char **session) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_session(cgroup, session);
+}
+
+int cg_path_get_owner_uid(const char *path, uid_t *uid) {
+ _cleanup_free_ char *slice = NULL;
+ char *start, *end;
+ int r;
+
+ assert(path);
+
+ r = cg_path_get_slice(path, &slice);
+ if (r < 0)
+ return r;
+
+ start = startswith(slice, "user-");
+ if (!start)
+ return -ENXIO;
+ end = endswith(start, ".slice");
+ if (!end)
+ return -ENXIO;
+
+ *end = 0;
+ if (parse_uid(start, uid) < 0)
+ return -ENXIO;
+
+ return 0;
+}
+
+int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_owner_uid(cgroup, uid);
+}
+
+int cg_path_get_slice(const char *p, char **slice) {
+ const char *e = NULL;
+
+ assert(p);
+ assert(slice);
+
+ /* Finds the right-most slice unit from the beginning, but
+ * stops before we come to the first non-slice unit. */
+
+ for (;;) {
+ size_t n;
+
+ p += strspn(p, "/");
+
+ n = strcspn(p, "/");
+ if (!valid_slice_name(p, n)) {
+
+ if (!e) {
+ char *s;
+
+ s = strdup(SPECIAL_ROOT_SLICE);
+ if (!s)
+ return -ENOMEM;
+
+ *slice = s;
+ return 0;
+ }
+
+ return cg_path_decode_unit(e, slice);
+ }
+
+ e = p;
+ p += n;
+ }
+}
+
+int cg_pid_get_slice(pid_t pid, char **slice) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(slice);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_slice(cgroup, slice);
+}
+
+int cg_path_get_user_slice(const char *p, char **slice) {
+ const char *t;
+ assert(p);
+ assert(slice);
+
+ t = skip_user_prefix(p);
+ if (!t)
+ return -ENXIO;
+
+ /* And now it looks pretty much the same as for a system
+ * slice, so let's just use the same parser from here on. */
+ return cg_path_get_slice(t, slice);
+}
+
+int cg_pid_get_user_slice(pid_t pid, char **slice) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(slice);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_user_slice(cgroup, slice);
+}
+
+char *cg_escape(const char *p) {
+ bool need_prefix = false;
+
+ /* This implements very minimal escaping for names to be used
+ * as file names in the cgroup tree: any name which might
+ * conflict with a kernel name or is prefixed with '_' is
+ * prefixed with a '_'. That way, when reading cgroup names it
+ * is sufficient to remove a single prefixing underscore if
+ * there is one. */
+
+ /* The return value of this function (unlike cg_unescape())
+ * needs free()! */
+
+ if (IN_SET(p[0], 0, '_', '.') ||
+ STR_IN_SET(p, "notify_on_release", "release_agent", "tasks") ||
+ startswith(p, "cgroup."))
+ need_prefix = true;
+ else {
+ const char *dot;
+
+ dot = strrchr(p, '.');
+ if (dot) {
+ CGroupController c;
+ size_t l = dot - p;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ const char *n;
+
+ n = cgroup_controller_to_string(c);
+
+ if (l != strlen(n))
+ continue;
+
+ if (memcmp(p, n, l) != 0)
+ continue;
+
+ need_prefix = true;
+ break;
+ }
+ }
+ }
+
+ if (need_prefix)
+ return strappend("_", p);
+
+ return strdup(p);
+}
+
+char *cg_unescape(const char *p) {
+ assert(p);
+
+ /* The return value of this function (unlike cg_escape())
+ * doesn't need free()! */
+
+ if (p[0] == '_')
+ return (char*) p+1;
+
+ return (char*) p;
+}
+
+#define CONTROLLER_VALID \
+ DIGITS LETTERS \
+ "_"
+
+bool cg_controller_is_valid(const char *p) {
+ const char *t, *s;
+
+ if (!p)
+ return false;
+
+ if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
+ return true;
+
+ s = startswith(p, "name=");
+ if (s)
+ p = s;
+
+ if (IN_SET(*p, 0, '_'))
+ return false;
+
+ for (t = p; *t; t++)
+ if (!strchr(CONTROLLER_VALID, *t))
+ return false;
+
+ if (t - p > FILENAME_MAX)
+ return false;
+
+ return true;
+}
+
+int cg_slice_to_path(const char *unit, char **ret) {
+ _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
+ const char *dash;
+ int r;
+
+ assert(unit);
+ assert(ret);
+
+ if (streq(unit, SPECIAL_ROOT_SLICE)) {
+ char *x;
+
+ x = strdup("");
+ if (!x)
+ return -ENOMEM;
+ *ret = x;
+ return 0;
+ }
+
+ if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ if (!endswith(unit, ".slice"))
+ return -EINVAL;
+
+ r = unit_name_to_prefix(unit, &p);
+ if (r < 0)
+ return r;
+
+ dash = strchr(p, '-');
+
+ /* Don't allow initial dashes */
+ if (dash == p)
+ return -EINVAL;
+
+ while (dash) {
+ _cleanup_free_ char *escaped = NULL;
+ char n[dash - p + sizeof(".slice")];
+
+#if HAS_FEATURE_MEMORY_SANITIZER
+ /* msan doesn't instrument stpncpy, so it thinks
+ * n is later used unitialized:
+ * https://github.com/google/sanitizers/issues/926
+ */
+ zero(n);
+#endif
+
+ /* Don't allow trailing or double dashes */
+ if (IN_SET(dash[1], 0, '-'))
+ return -EINVAL;
+
+ strcpy(stpncpy(n, p, dash - p), ".slice");
+ if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ escaped = cg_escape(n);
+ if (!escaped)
+ return -ENOMEM;
+
+ if (!strextend(&s, escaped, "/", NULL))
+ return -ENOMEM;
+
+ dash = strchr(dash+1, '-');
+ }
+
+ e = cg_escape(unit);
+ if (!e)
+ return -ENOMEM;
+
+ if (!strextend(&s, e, NULL))
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = cg_get_path(controller, path, attribute, &p);
+ if (r < 0)
+ return r;
+
+ return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
+}
+
+int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = cg_get_path(controller, path, attribute, &p);
+ if (r < 0)
+ return r;
+
+ return read_one_line_file(p, ret);
+}
+
+int cg_get_keyed_attribute(
+ const char *controller,
+ const char *path,
+ const char *attribute,
+ char **keys,
+ char **ret_values) {
+
+ _cleanup_free_ char *filename = NULL, *contents = NULL;
+ const char *p;
+ size_t n, i, n_done = 0;
+ char **v;
+ int r;
+
+ /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
+ * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
+ * entries as 'keys'. On success each entry will be set to the value of the matching key.
+ *
+ * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
+
+ r = cg_get_path(controller, path, attribute, &filename);
+ if (r < 0)
+ return r;
+
+ r = read_full_file(filename, &contents, NULL);
+ if (r < 0)
+ return r;
+
+ n = strv_length(keys);
+ if (n == 0) /* No keys to retrieve? That's easy, we are done then */
+ return 0;
+
+ /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
+ v = newa0(char*, n);
+
+ for (p = contents; *p;) {
+ const char *w = NULL;
+
+ for (i = 0; i < n; i++)
+ if (!v[i]) {
+ w = first_word(p, keys[i]);
+ if (w)
+ break;
+ }
+
+ if (w) {
+ size_t l;
+
+ l = strcspn(w, NEWLINE);
+ v[i] = strndup(w, l);
+ if (!v[i]) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n_done++;
+ if (n_done >= n)
+ goto done;
+
+ p = w + l;
+ } else
+ p += strcspn(p, NEWLINE);
+
+ p += strspn(p, NEWLINE);
+ }
+
+ r = -ENXIO;
+
+fail:
+ for (i = 0; i < n; i++)
+ free(v[i]);
+
+ return r;
+
+done:
+ memcpy(ret_values, v, sizeof(char*) * n);
+ return 0;
+
+}
+
+int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
+ CGroupController c;
+ CGroupMask done;
+ bool created;
+ int r;
+
+ /* This one will create a cgroup in our private tree, but also
+ * duplicate it in the trees specified in mask, and remove it
+ * in all others.
+ *
+ * Returns 0 if the group already existed in the systemd hierarchy,
+ * 1 on success, negative otherwise.
+ */
+
+ /* First create the cgroup in our own hierarchy. */
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
+ if (r < 0)
+ return r;
+ created = r;
+
+ /* If we are in the unified hierarchy, we are done now */
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return created;
+
+ supported &= CGROUP_MASK_V1;
+ mask = CGROUP_MASK_EXTEND_JOINED(mask);
+ done = 0;
+
+ /* Otherwise, do the same in the other hierarchies */
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ if (FLAGS_SET(mask, bit))
+ (void) cg_create(n, path);
+ else
+ (void) cg_trim(n, path, true);
+
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return created;
+}
+
+int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
+ CGroupController c;
+ CGroupMask done;
+ int r;
+
+ r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
+ if (r < 0)
+ return r;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ supported &= CGROUP_MASK_V1;
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *p = NULL;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ if (path_callback)
+ p = path_callback(bit, userdata);
+ if (!p)
+ p = path;
+
+ (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return 0;
+}
+
+int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
+ Iterator i;
+ void *pidp;
+ int r = 0;
+
+ SET_FOREACH(pidp, pids, i) {
+ pid_t pid = PTR_TO_PID(pidp);
+ int q;
+
+ q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
+ CGroupController c;
+ CGroupMask done;
+ int r = 0, q;
+
+ if (!path_equal(from, to)) {
+ r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
+ if (r < 0)
+ return r;
+ }
+
+ q = cg_all_unified();
+ if (q < 0)
+ return q;
+ if (q > 0)
+ return r;
+
+ supported &= CGROUP_MASK_V1;
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *p = NULL;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ if (to_callback)
+ p = to_callback(bit, userdata);
+ if (!p)
+ p = to;
+
+ (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return r;
+}
+
+int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
+ CGroupController c;
+ CGroupMask done;
+ int r, q;
+
+ r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
+ if (r < 0)
+ return r;
+
+ q = cg_all_unified();
+ if (q < 0)
+ return q;
+ if (q > 0)
+ return r;
+
+ supported &= CGROUP_MASK_V1;
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return r;
+}
+
+int cg_mask_to_string(CGroupMask mask, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t n = 0, allocated = 0;
+ bool space = false;
+ CGroupController c;
+
+ assert(ret);
+
+ if (mask == 0) {
+ *ret = NULL;
+ return 0;
+ }
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ const char *k;
+ size_t l;
+
+ if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
+ continue;
+
+ k = cgroup_controller_to_string(c);
+ l = strlen(k);
+
+ if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
+ return -ENOMEM;
+
+ if (space)
+ s[n] = ' ';
+ memcpy(s + n + space, k, l);
+ n += space + l;
+
+ space = true;
+ }
+
+ assert(s);
+
+ s[n] = 0;
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+int cg_mask_from_string(const char *value, CGroupMask *ret) {
+ CGroupMask m = 0;
+
+ assert(ret);
+ assert(value);
+
+ for (;;) {
+ _cleanup_free_ char *n = NULL;
+ CGroupController v;
+ int r;
+
+ r = extract_first_word(&value, &n, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ v = cgroup_controller_from_string(n);
+ if (v < 0)
+ continue;
+
+ m |= CGROUP_CONTROLLER_TO_MASK(v);
+ }
+
+ *ret = m;
+ return 0;
+}
+
+int cg_mask_supported(CGroupMask *ret) {
+ CGroupMask mask;
+ int r;
+
+ /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
+ * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
+ * pseudo-controllers. */
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
+
+ /* In the unified hierarchy we can read the supported
+ * and accessible controllers from a the top-level
+ * cgroup attribute */
+
+ r = cg_get_root_path(&root);
+ if (r < 0)
+ return r;
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(path, &controllers);
+ if (r < 0)
+ return r;
+
+ r = cg_mask_from_string(controllers, &mask);
+ if (r < 0)
+ return r;
+
+ /* Currently, we support the cpu, memory, io and pids controller in the unified hierarchy, mask
+ * everything else off. */
+ mask &= CGROUP_MASK_V2;
+
+ } else {
+ CGroupController c;
+
+ /* In the legacy hierarchy, we check which hierarchies are mounted. */
+
+ mask = 0;
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(CGROUP_MASK_V1, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ if (controller_is_accessible(n) >= 0)
+ mask |= bit;
+ }
+ }
+
+ *ret = mask;
+ return 0;
+}
+
+int cg_kernel_controllers(Set **ret) {
+ _cleanup_set_free_free_ Set *controllers = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(ret);
+
+ /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
+ * and controllers that aren't currently accessible (because not mounted). This does not include "name="
+ * pseudo-controllers. */
+
+ controllers = set_new(&string_hash_ops);
+ if (!controllers)
+ return -ENOMEM;
+
+ f = fopen("/proc/cgroups", "re");
+ if (!f) {
+ if (errno == ENOENT) {
+ *ret = NULL;
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ /* Ignore the header line */
+ (void) read_line(f, (size_t) -1, NULL);
+
+ for (;;) {
+ char *controller;
+ int enabled = 0;
+
+ errno = 0;
+ if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
+
+ if (feof(f))
+ break;
+
+ if (ferror(f) && errno > 0)
+ return -errno;
+
+ return -EBADMSG;
+ }
+
+ if (!enabled) {
+ free(controller);
+ continue;
+ }
+
+ if (!cg_controller_is_valid(controller)) {
+ free(controller);
+ return -EBADMSG;
+ }
+
+ r = set_consume(controllers, controller);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(controllers);
+
+ return 0;
+}
+
+static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
+
+/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on /sys/fs/cgroup/systemd. This
+ * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
+ * /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
+ * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
+ *
+ * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep cgroup v2
+ * process management but disable the compat dual layout, we return %true on
+ * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
+ */
+static thread_local bool unified_systemd_v232;
+
+static int cg_unified_update(void) {
+
+ struct statfs fs;
+
+ /* Checks if we support the unified hierarchy. Returns an
+ * error when the cgroup hierarchies aren't mounted yet or we
+ * have any other trouble determining if the unified hierarchy
+ * is supported. */
+
+ if (unified_cache >= CGROUP_UNIFIED_NONE)
+ return 0;
+
+ if (statfs("/sys/fs/cgroup/", &fs) < 0)
+ return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
+
+ if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
+ unified_cache = CGROUP_UNIFIED_ALL;
+ } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
+ if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
+ F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
+ unified_cache = CGROUP_UNIFIED_SYSTEMD;
+ unified_systemd_v232 = false;
+ } else {
+ if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
+ return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
+
+ if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
+ unified_cache = CGROUP_UNIFIED_SYSTEMD;
+ unified_systemd_v232 = true;
+ } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
+ log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
+ unified_cache = CGROUP_UNIFIED_NONE;
+ } else {
+ log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
+ (unsigned long long) fs.f_type);
+ unified_cache = CGROUP_UNIFIED_NONE;
+ }
+ }
+ } else
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
+ "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
+ (unsigned long long)fs.f_type);
+
+ return 0;
+}
+
+int cg_unified_controller(const char *controller) {
+ int r;
+
+ r = cg_unified_update();
+ if (r < 0)
+ return r;
+
+ if (unified_cache == CGROUP_UNIFIED_NONE)
+ return false;
+
+ if (unified_cache >= CGROUP_UNIFIED_ALL)
+ return true;
+
+ return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
+}
+
+int cg_all_unified(void) {
+ int r;
+
+ r = cg_unified_update();
+ if (r < 0)
+ return r;
+
+ return unified_cache >= CGROUP_UNIFIED_ALL;
+}
+
+int cg_hybrid_unified(void) {
+ int r;
+
+ r = cg_unified_update();
+ if (r < 0)
+ return r;
+
+ return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
+}
+
+int cg_unified_flush(void) {
+ unified_cache = CGROUP_UNIFIED_UNKNOWN;
+
+ return cg_unified_update();
+}
+
+int cg_enable_everywhere(
+ CGroupMask supported,
+ CGroupMask mask,
+ const char *p,
+ CGroupMask *ret_result_mask) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *fs = NULL;
+ CGroupController c;
+ CGroupMask ret = 0;
+ int r;
+
+ assert(p);
+
+ if (supported == 0) {
+ if (ret_result_mask)
+ *ret_result_mask = 0;
+ return 0;
+ }
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* On the legacy hiearchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
+ * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
+ * caller tends to use the returned mask later on to compare if all controllers where properly joined,
+ * and if not requeues realization. This use is the primary purpose of the return value, hence let's
+ * minimize surprises here and reduce triggers for re-realization by always saying we fully
+ * succeeded.) */
+ if (ret_result_mask)
+ *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
+ * CGROUP_MASK_V2: The 'supported' mask
+ * might contain pure-V1 or BPF
+ * controllers, and we never want to
+ * claim that we could enable those with
+ * cgroup.subtree_control */
+ return 0;
+ }
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
+ if (r < 0)
+ return r;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(CGROUP_MASK_V2, bit))
+ continue;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ {
+ char s[1 + strlen(n) + 1];
+
+ s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
+ strcpy(s + 1, n);
+
+ if (!f) {
+ f = fopen(fs, "we");
+ if (!f)
+ return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
+ }
+
+ r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
+ FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
+ clearerr(f);
+
+ /* If we can't turn off a controller, leave it on in the reported resulting mask. This
+ * happens for example when we attempt to turn off a controller up in the tree that is
+ * used down in the tree. */
+ if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
+ * only here, and not follow the same logic
+ * for other errors such as EINVAL or
+ * EOPNOTSUPP or anything else. That's
+ * because EBUSY indicates that the
+ * controllers is currently enabled and
+ * cannot be disabled because something down
+ * the hierarchy is still using it. Any other
+ * error most likely means something like "I
+ * never heard of this controller" or
+ * similar. In the former case it's hence
+ * safe to assume the controller is still on
+ * after the failed operation, while in the
+ * latter case it's safer to assume the
+ * controller is unknown and hence certainly
+ * not enabled. */
+ ret |= bit;
+ } else {
+ /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
+ if (FLAGS_SET(mask, bit))
+ ret |= bit;
+ }
+ }
+ }
+
+ /* Let's return the precise set of controllers now enabled for the cgroup. */
+ if (ret_result_mask)
+ *ret_result_mask = ret;
+
+ return 0;
+}
+
+bool cg_is_unified_wanted(void) {
+ static thread_local int wanted = -1;
+ int r;
+ bool b;
+ const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
+ _cleanup_free_ char *c = NULL;
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* If the hierarchy is already mounted, then follow whatever
+ * was chosen for it. */
+ if (cg_unified_flush() >= 0)
+ return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
+
+ /* If we were explicitly passed systemd.unified_cgroup_hierarchy,
+ * respect that. */
+ r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
+ if (r > 0)
+ return (wanted = b);
+
+ /* If we passed cgroup_no_v1=all with no other instructions, it seems
+ * highly unlikely that we want to use hybrid or legacy hierarchy. */
+ r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
+ if (r > 0 && streq_ptr(c, "all"))
+ return (wanted = true);
+
+ return (wanted = is_default);
+}
+
+bool cg_is_legacy_wanted(void) {
+ static thread_local int wanted = -1;
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* Check if we have cgroup v2 already mounted. */
+ if (cg_unified_flush() >= 0 &&
+ unified_cache == CGROUP_UNIFIED_ALL)
+ return (wanted = false);
+
+ /* Otherwise, assume that at least partial legacy is wanted,
+ * since cgroup v2 should already be mounted at this point. */
+ return (wanted = true);
+}
+
+bool cg_is_hybrid_wanted(void) {
+ static thread_local int wanted = -1;
+ int r;
+ bool b;
+ const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
+ /* We default to true if the default is "hybrid", obviously,
+ * but also when the default is "unified", because if we get
+ * called, it means that unified hierarchy was not mounted. */
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* If the hierarchy is already mounted, then follow whatever
+ * was chosen for it. */
+ if (cg_unified_flush() >= 0 &&
+ unified_cache == CGROUP_UNIFIED_ALL)
+ return (wanted = false);
+
+ /* Otherwise, let's see what the kernel command line has to say.
+ * Since checking is expensive, cache a non-error result. */
+ r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
+
+ /* The meaning of the kernel option is reversed wrt. to the return value
+ * of this function, hence the negation. */
+ return (wanted = r > 0 ? !b : is_default);
+}
+
+int cg_weight_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_WEIGHT_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
+ [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
+ [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
+ [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
+ [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
+};
+
+static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
+ [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
+ [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
+ [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
+ [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
+
+int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_CPU_SHARES_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_BLKIO_WEIGHT_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+bool is_cgroup_fs(const struct statfs *s) {
+ return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
+ is_fs_type(s, CGROUP2_SUPER_MAGIC);
+}
+
+bool fd_is_cgroup_fs(int fd) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_cgroup_fs(&s);
+}
+
+static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
+ [CGROUP_CONTROLLER_CPU] = "cpu",
+ [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
+ [CGROUP_CONTROLLER_IO] = "io",
+ [CGROUP_CONTROLLER_BLKIO] = "blkio",
+ [CGROUP_CONTROLLER_MEMORY] = "memory",
+ [CGROUP_CONTROLLER_DEVICES] = "devices",
+ [CGROUP_CONTROLLER_PIDS] = "pids",
+ [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
+ [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
+
+CGroupMask get_cpu_accounting_mask(void) {
+ static CGroupMask needed_mask = (CGroupMask) -1;
+
+ /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
+ * provided externally from the CPU controller, which means we don't
+ * need to enable the CPU controller just to get metrics. This is good,
+ * because enabling the CPU controller comes at a minor performance
+ * hit, especially when it's propagated deep into large hierarchies.
+ * There's also no separate CPU accounting controller available within
+ * a unified hierarchy.
+ *
+ * This combination of factors results in the desired cgroup mask to
+ * enable for CPU accounting varying as follows:
+ *
+ * ╔═════════════════════╤═════════════════════╗
+ * ║ Linux ≥4.15 │ Linux <4.15 ║
+ * ╔═══════════════╬═════════════════════╪═════════════════════╣
+ * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║
+ * ╟───────────────╫─────────────────────┼─────────────────────╢
+ * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
+ * ╚═══════════════╩═════════════════════╧═════════════════════╝
+ *
+ * We check kernel version here instead of manually checking whether
+ * cpu.stat is present for every cgroup, as that check in itself would
+ * already be fairly expensive.
+ *
+ * Kernels where this patch has been backported will therefore have the
+ * CPU controller enabled unnecessarily. This is more expensive than
+ * necessary, but harmless. ☺️
+ */
+
+ if (needed_mask == (CGroupMask) -1) {
+ if (cg_all_unified()) {
+ struct utsname u;
+ assert_se(uname(&u) >= 0);
+
+ if (str_verscmp(u.release, "4.15") < 0)
+ needed_mask = CGROUP_MASK_CPU;
+ else
+ needed_mask = 0;
+ } else
+ needed_mask = CGROUP_MASK_CPUACCT;
+ }
+
+ return needed_mask;
+}
+
+bool cpu_accounting_is_cheap(void) {
+ return get_cpu_accounting_mask() == 0;
+}
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
new file mode 100644
index 0000000..119b493
--- /dev/null
+++ b/src/basic/cgroup-util.h
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+
+#include "def.h"
+#include "set.h"
+
+#define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd"
+#define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified"
+#define SYSTEMD_CGROUP_CONTROLLER "_systemd"
+
+/* An enum of well known cgroup controllers */
+typedef enum CGroupController {
+ /* Original cgroup controllers */
+ CGROUP_CONTROLLER_CPU,
+ CGROUP_CONTROLLER_CPUACCT, /* v1 only */
+ CGROUP_CONTROLLER_IO, /* v2 only */
+ CGROUP_CONTROLLER_BLKIO, /* v1 only */
+ CGROUP_CONTROLLER_MEMORY,
+ CGROUP_CONTROLLER_DEVICES, /* v1 only */
+ CGROUP_CONTROLLER_PIDS,
+
+ /* BPF-based pseudo-controllers, v2 only */
+ CGROUP_CONTROLLER_BPF_FIREWALL,
+ CGROUP_CONTROLLER_BPF_DEVICES,
+
+ _CGROUP_CONTROLLER_MAX,
+ _CGROUP_CONTROLLER_INVALID = -1,
+} CGroupController;
+
+#define CGROUP_CONTROLLER_TO_MASK(c) (1U << (c))
+
+/* A bit mask of well known cgroup controllers */
+typedef enum CGroupMask {
+ CGROUP_MASK_CPU = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPU),
+ CGROUP_MASK_CPUACCT = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUACCT),
+ CGROUP_MASK_IO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_IO),
+ CGROUP_MASK_BLKIO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BLKIO),
+ CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY),
+ CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES),
+ CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS),
+ CGROUP_MASK_BPF_FIREWALL = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL),
+ CGROUP_MASK_BPF_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_DEVICES),
+
+ /* All real cgroup v1 controllers */
+ CGROUP_MASK_V1 = CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT|CGROUP_MASK_BLKIO|CGROUP_MASK_MEMORY|CGROUP_MASK_DEVICES|CGROUP_MASK_PIDS,
+
+ /* All real cgroup v2 controllers */
+ CGROUP_MASK_V2 = CGROUP_MASK_CPU|CGROUP_MASK_IO|CGROUP_MASK_MEMORY|CGROUP_MASK_PIDS,
+
+ /* All cgroup v2 BPF pseudo-controllers */
+ CGROUP_MASK_BPF = CGROUP_MASK_BPF_FIREWALL|CGROUP_MASK_BPF_DEVICES,
+
+ _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1
+} CGroupMask;
+
+static inline CGroupMask CGROUP_MASK_EXTEND_JOINED(CGroupMask mask) {
+ /* We always mount "cpu" and "cpuacct" in the same hierarchy. Hence, when one bit is set also set the other */
+
+ if (mask & (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT))
+ mask |= (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT);
+
+ return mask;
+}
+
+CGroupMask get_cpu_accounting_mask(void);
+bool cpu_accounting_is_cheap(void);
+
+/* Special values for all weight knobs on unified hierarchy */
+#define CGROUP_WEIGHT_INVALID ((uint64_t) -1)
+#define CGROUP_WEIGHT_MIN UINT64_C(1)
+#define CGROUP_WEIGHT_MAX UINT64_C(10000)
+#define CGROUP_WEIGHT_DEFAULT UINT64_C(100)
+
+#define CGROUP_LIMIT_MIN UINT64_C(0)
+#define CGROUP_LIMIT_MAX ((uint64_t) -1)
+
+static inline bool CGROUP_WEIGHT_IS_OK(uint64_t x) {
+ return
+ x == CGROUP_WEIGHT_INVALID ||
+ (x >= CGROUP_WEIGHT_MIN && x <= CGROUP_WEIGHT_MAX);
+}
+
+/* IO limits on unified hierarchy */
+typedef enum CGroupIOLimitType {
+ CGROUP_IO_RBPS_MAX,
+ CGROUP_IO_WBPS_MAX,
+ CGROUP_IO_RIOPS_MAX,
+ CGROUP_IO_WIOPS_MAX,
+
+ _CGROUP_IO_LIMIT_TYPE_MAX,
+ _CGROUP_IO_LIMIT_TYPE_INVALID = -1
+} CGroupIOLimitType;
+
+extern const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX];
+
+const char* cgroup_io_limit_type_to_string(CGroupIOLimitType t) _const_;
+CGroupIOLimitType cgroup_io_limit_type_from_string(const char *s) _pure_;
+
+/* Special values for the cpu.shares attribute */
+#define CGROUP_CPU_SHARES_INVALID ((uint64_t) -1)
+#define CGROUP_CPU_SHARES_MIN UINT64_C(2)
+#define CGROUP_CPU_SHARES_MAX UINT64_C(262144)
+#define CGROUP_CPU_SHARES_DEFAULT UINT64_C(1024)
+
+static inline bool CGROUP_CPU_SHARES_IS_OK(uint64_t x) {
+ return
+ x == CGROUP_CPU_SHARES_INVALID ||
+ (x >= CGROUP_CPU_SHARES_MIN && x <= CGROUP_CPU_SHARES_MAX);
+}
+
+/* Special values for the blkio.weight attribute */
+#define CGROUP_BLKIO_WEIGHT_INVALID ((uint64_t) -1)
+#define CGROUP_BLKIO_WEIGHT_MIN UINT64_C(10)
+#define CGROUP_BLKIO_WEIGHT_MAX UINT64_C(1000)
+#define CGROUP_BLKIO_WEIGHT_DEFAULT UINT64_C(500)
+
+static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) {
+ return
+ x == CGROUP_BLKIO_WEIGHT_INVALID ||
+ (x >= CGROUP_BLKIO_WEIGHT_MIN && x <= CGROUP_BLKIO_WEIGHT_MAX);
+}
+
+/* Default resource limits */
+#define DEFAULT_TASKS_MAX_PERCENTAGE 15U /* 15% of PIDs, 4915 on default settings */
+#define DEFAULT_USER_TASKS_MAX_PERCENTAGE 33U /* 33% of PIDs, 10813 on default settings */
+
+typedef enum CGroupUnified {
+ CGROUP_UNIFIED_UNKNOWN = -1,
+ CGROUP_UNIFIED_NONE = 0, /* Both systemd and controllers on legacy */
+ CGROUP_UNIFIED_SYSTEMD = 1, /* Only systemd on unified */
+ CGROUP_UNIFIED_ALL = 2, /* Both systemd and controllers on unified */
+} CGroupUnified;
+
+/*
+ * General rules:
+ *
+ * We accept named hierarchies in the syntax "foo" and "name=foo".
+ *
+ * We expect that named hierarchies do not conflict in name with a
+ * kernel hierarchy, modulo the "name=" prefix.
+ *
+ * We always generate "normalized" controller names, i.e. without the
+ * "name=" prefix.
+ *
+ * We require absolute cgroup paths. When returning, we will always
+ * generate paths with multiple adjacent / removed.
+ */
+
+int cg_enumerate_processes(const char *controller, const char *path, FILE **_f);
+int cg_read_pid(FILE *f, pid_t *_pid);
+int cg_read_event(const char *controller, const char *path, const char *event,
+ char **val);
+
+int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d);
+int cg_read_subgroup(DIR *d, char **fn);
+
+typedef enum CGroupFlags {
+ CGROUP_SIGCONT = 1 << 0,
+ CGROUP_IGNORE_SELF = 1 << 1,
+ CGROUP_REMOVE = 1 << 2,
+} CGroupFlags;
+
+typedef void (*cg_kill_log_func_t)(pid_t pid, int sig, void *userdata);
+
+int cg_kill(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata);
+int cg_kill_recursive(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata);
+
+int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+
+int cg_split_spec(const char *spec, char **controller, char **path);
+int cg_mangle_path(const char *path, char **result);
+
+int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs);
+int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs);
+
+int cg_pid_get_path(const char *controller, pid_t pid, char **path);
+
+int cg_trim(const char *controller, const char *path, bool delete_root);
+
+int cg_rmdir(const char *controller, const char *path);
+
+int cg_create(const char *controller, const char *path);
+int cg_attach(const char *controller, const char *path, pid_t pid);
+int cg_attach_fallback(const char *controller, const char *path, pid_t pid);
+int cg_create_and_attach(const char *controller, const char *path, pid_t pid);
+
+int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value);
+int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret);
+int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, char **keys, char **values);
+
+int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid);
+
+int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags);
+int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size);
+
+int cg_install_release_agent(const char *controller, const char *agent);
+int cg_uninstall_release_agent(const char *controller);
+
+int cg_is_empty(const char *controller, const char *path);
+int cg_is_empty_recursive(const char *controller, const char *path);
+
+int cg_get_root_path(char **path);
+
+int cg_path_get_session(const char *path, char **session);
+int cg_path_get_owner_uid(const char *path, uid_t *uid);
+int cg_path_get_unit(const char *path, char **unit);
+int cg_path_get_user_unit(const char *path, char **unit);
+int cg_path_get_machine_name(const char *path, char **machine);
+int cg_path_get_slice(const char *path, char **slice);
+int cg_path_get_user_slice(const char *path, char **slice);
+
+int cg_shift_path(const char *cgroup, const char *cached_root, const char **shifted);
+int cg_pid_get_path_shifted(pid_t pid, const char *cached_root, char **cgroup);
+
+int cg_pid_get_session(pid_t pid, char **session);
+int cg_pid_get_owner_uid(pid_t pid, uid_t *uid);
+int cg_pid_get_unit(pid_t pid, char **unit);
+int cg_pid_get_user_unit(pid_t pid, char **unit);
+int cg_pid_get_machine_name(pid_t pid, char **machine);
+int cg_pid_get_slice(pid_t pid, char **slice);
+int cg_pid_get_user_slice(pid_t pid, char **slice);
+
+int cg_path_decode_unit(const char *cgroup, char **unit);
+
+char *cg_escape(const char *p);
+char *cg_unescape(const char *p) _pure_;
+
+bool cg_controller_is_valid(const char *p);
+
+int cg_slice_to_path(const char *unit, char **ret);
+
+typedef const char* (*cg_migrate_callback_t)(CGroupMask mask, void *userdata);
+
+int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path);
+int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata);
+int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata);
+int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata);
+int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root);
+int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask);
+
+int cg_mask_supported(CGroupMask *ret);
+int cg_mask_from_string(const char *s, CGroupMask *ret);
+int cg_mask_to_string(CGroupMask mask, char **ret);
+
+int cg_kernel_controllers(Set **controllers);
+
+bool cg_ns_supported(void);
+
+int cg_all_unified(void);
+int cg_hybrid_unified(void);
+int cg_unified_controller(const char *controller);
+int cg_unified_flush(void);
+
+bool cg_is_unified_wanted(void);
+bool cg_is_legacy_wanted(void);
+bool cg_is_hybrid_wanted(void);
+
+const char* cgroup_controller_to_string(CGroupController c) _const_;
+CGroupController cgroup_controller_from_string(const char *s) _pure_;
+
+int cg_weight_parse(const char *s, uint64_t *ret);
+int cg_cpu_shares_parse(const char *s, uint64_t *ret);
+int cg_blkio_weight_parse(const char *s, uint64_t *ret);
+
+bool is_cgroup_fs(const struct statfs *s);
+bool fd_is_cgroup_fs(int fd);
diff --git a/src/basic/chattr-util.c b/src/basic/chattr-util.c
new file mode 100644
index 0000000..235cfb9
--- /dev/null
+++ b/src/basic/chattr-util.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <linux/fs.h>
+
+#include "chattr-util.h"
+#include "fd-util.h"
+#include "macro.h"
+
+int chattr_fd(int fd, unsigned value, unsigned mask, unsigned *previous) {
+ unsigned old_attr, new_attr;
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* Explicitly check whether this is a regular file or
+ * directory. If it is anything else (such as a device node or
+ * fifo), then the ioctl will not hit the file systems but
+ * possibly drivers, where the ioctl might have different
+ * effects. Notably, DRM is using the same ioctl() number. */
+
+ if (!S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode))
+ return -ENOTTY;
+
+ if (mask == 0 && !previous)
+ return 0;
+
+ if (ioctl(fd, FS_IOC_GETFLAGS, &old_attr) < 0)
+ return -errno;
+
+ new_attr = (old_attr & ~mask) | (value & mask);
+ if (new_attr == old_attr) {
+ if (previous)
+ *previous = old_attr;
+ return 0;
+ }
+
+ if (ioctl(fd, FS_IOC_SETFLAGS, &new_attr) < 0)
+ return -errno;
+
+ if (previous)
+ *previous = old_attr;
+
+ return 1;
+}
+
+int chattr_path(const char *p, unsigned value, unsigned mask, unsigned *previous) {
+ _cleanup_close_ int fd = -1;
+
+ assert(p);
+
+ if (mask == 0)
+ return 0;
+
+ fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return chattr_fd(fd, value, mask, previous);
+}
+
+int read_attr_fd(int fd, unsigned *ret) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode))
+ return -ENOTTY;
+
+ if (ioctl(fd, FS_IOC_GETFLAGS, ret) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int read_attr_path(const char *p, unsigned *ret) {
+ _cleanup_close_ int fd = -1;
+
+ assert(p);
+ assert(ret);
+
+ fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return read_attr_fd(fd, ret);
+}
diff --git a/src/basic/chattr-util.h b/src/basic/chattr-util.h
new file mode 100644
index 0000000..7570bba
--- /dev/null
+++ b/src/basic/chattr-util.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int chattr_fd(int fd, unsigned value, unsigned mask, unsigned *previous);
+int chattr_path(const char *p, unsigned value, unsigned mask, unsigned *previous);
+
+int read_attr_fd(int fd, unsigned *ret);
+int read_attr_path(const char *p, unsigned *ret);
diff --git a/src/basic/conf-files.c b/src/basic/conf-files.c
new file mode 100644
index 0000000..b70c6e5
--- /dev/null
+++ b/src/basic/conf-files.c
@@ -0,0 +1,325 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "conf-files.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "path-util.h"
+#include "set.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static int files_add(
+ Hashmap *h,
+ Set *masked,
+ const char *suffix,
+ const char *root,
+ unsigned flags,
+ const char *path) {
+
+ _cleanup_closedir_ DIR *dir = NULL;
+ const char *dirpath;
+ struct dirent *de;
+ int r;
+
+ assert(h);
+ assert((flags & CONF_FILES_FILTER_MASKED) == 0 || masked);
+ assert(path);
+
+ dirpath = prefix_roota(root, path);
+
+ dir = opendir(dirpath);
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_debug_errno(errno, "Failed to open directory '%s': %m", dirpath);
+ }
+
+ FOREACH_DIRENT(de, dir, return -errno) {
+ struct stat st;
+ char *p, *key;
+
+ /* Does this match the suffix? */
+ if (suffix && !endswith(de->d_name, suffix))
+ continue;
+
+ /* Has this file already been found in an earlier directory? */
+ if (hashmap_contains(h, de->d_name)) {
+ log_debug("Skipping overridden file '%s/%s'.", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Has this been masked in an earlier directory? */
+ if ((flags & CONF_FILES_FILTER_MASKED) && set_contains(masked, de->d_name)) {
+ log_debug("File '%s/%s' is masked by previous entry.", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Read file metadata if we shall validate the check for file masks, for node types or whether the node is marked executable. */
+ if (flags & (CONF_FILES_FILTER_MASKED|CONF_FILES_REGULAR|CONF_FILES_DIRECTORY|CONF_FILES_EXECUTABLE))
+ if (fstatat(dirfd(dir), de->d_name, &st, 0) < 0) {
+ log_debug_errno(errno, "Failed to stat '%s/%s', ignoring: %m", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Is this a masking entry? */
+ if ((flags & CONF_FILES_FILTER_MASKED))
+ if (null_or_empty(&st)) {
+ /* Mark this one as masked */
+ r = set_put_strdup(masked, de->d_name);
+ if (r < 0)
+ return r;
+
+ log_debug("File '%s/%s' is a mask.", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Does this node have the right type? */
+ if (flags & (CONF_FILES_REGULAR|CONF_FILES_DIRECTORY))
+ if (!((flags & CONF_FILES_DIRECTORY) && S_ISDIR(st.st_mode)) &&
+ !((flags & CONF_FILES_REGULAR) && S_ISREG(st.st_mode))) {
+ log_debug("Ignoring '%s/%s', as it is not a of the right type.", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Does this node have the executable bit set? */
+ if (flags & CONF_FILES_EXECUTABLE)
+ /* As requested: check if the file is marked exectuable. Note that we don't check access(X_OK)
+ * here, as we care about whether the file is marked executable at all, and not whether it is
+ * executable for us, because if so, such errors are stuff we should log about. */
+
+ if ((st.st_mode & 0111) == 0) { /* not executable */
+ log_debug("Ignoring '%s/%s', as it is not marked executable.", dirpath, de->d_name);
+ continue;
+ }
+
+ if (flags & CONF_FILES_BASENAME) {
+ p = strdup(de->d_name);
+ if (!p)
+ return -ENOMEM;
+
+ key = p;
+ } else {
+ p = strjoin(dirpath, "/", de->d_name);
+ if (!p)
+ return -ENOMEM;
+
+ key = basename(p);
+ }
+
+ r = hashmap_put(h, key, p);
+ if (r < 0) {
+ free(p);
+ return log_debug_errno(r, "Failed to add item to hashmap: %m");
+ }
+
+ assert(r > 0);
+ }
+
+ return 0;
+}
+
+static int base_cmp(char * const *a, char * const *b) {
+ return strcmp(basename(*a), basename(*b));
+}
+
+static int conf_files_list_strv_internal(char ***strv, const char *suffix, const char *root, unsigned flags, char **dirs) {
+ _cleanup_hashmap_free_ Hashmap *fh = NULL;
+ _cleanup_set_free_free_ Set *masked = NULL;
+ char **files, **p;
+ int r;
+
+ assert(strv);
+
+ /* This alters the dirs string array */
+ if (!path_strv_resolve_uniq(dirs, root))
+ return -ENOMEM;
+
+ fh = hashmap_new(&path_hash_ops);
+ if (!fh)
+ return -ENOMEM;
+
+ if (flags & CONF_FILES_FILTER_MASKED) {
+ masked = set_new(&path_hash_ops);
+ if (!masked)
+ return -ENOMEM;
+ }
+
+ STRV_FOREACH(p, dirs) {
+ r = files_add(fh, masked, suffix, root, flags, *p);
+ if (r == -ENOMEM)
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to search for files in %s, ignoring: %m", *p);
+ }
+
+ files = hashmap_get_strv(fh);
+ if (!files)
+ return -ENOMEM;
+
+ typesafe_qsort(files, hashmap_size(fh), base_cmp);
+ *strv = files;
+
+ return 0;
+}
+
+int conf_files_insert(char ***strv, const char *root, char **dirs, const char *path) {
+ /* Insert a path into strv, at the place honouring the usual sorting rules:
+ * - we first compare by the basename
+ * - and then we compare by dirname, allowing just one file with the given
+ * basename.
+ * This means that we will
+ * - add a new entry if basename(path) was not on the list,
+ * - do nothing if an entry with higher priority was already present,
+ * - do nothing if our new entry matches the existing entry,
+ * - replace the existing entry if our new entry has higher priority.
+ */
+ size_t i, n;
+ char *t;
+ int r;
+
+ n = strv_length(*strv);
+ for (i = 0; i < n; i++) {
+ int c;
+
+ c = base_cmp((char* const*) *strv + i, (char* const*) &path);
+ if (c == 0) {
+ char **dir;
+
+ /* Oh, there already is an entry with a matching name (the last component). */
+
+ STRV_FOREACH(dir, dirs) {
+ _cleanup_free_ char *rdir = NULL;
+ char *p1, *p2;
+
+ rdir = prefix_root(root, *dir);
+ if (!rdir)
+ return -ENOMEM;
+
+ p1 = path_startswith((*strv)[i], rdir);
+ if (p1)
+ /* Existing entry with higher priority
+ * or same priority, no need to do anything. */
+ return 0;
+
+ p2 = path_startswith(path, *dir);
+ if (p2) {
+ /* Our new entry has higher priority */
+
+ t = prefix_root(root, path);
+ if (!t)
+ return log_oom();
+
+ return free_and_replace((*strv)[i], t);
+ }
+ }
+
+ } else if (c > 0)
+ /* Following files have lower priority, let's go insert our
+ * new entry. */
+ break;
+
+ /* … we are not there yet, let's continue */
+ }
+
+ /* The new file has lower priority than all the existing entries */
+ t = prefix_root(root, path);
+ if (!t)
+ return -ENOMEM;
+
+ r = strv_insert(strv, i, t);
+ if (r < 0)
+ free(t);
+
+ return r;
+}
+
+int conf_files_list_strv(char ***strv, const char *suffix, const char *root, unsigned flags, const char* const* dirs) {
+ _cleanup_strv_free_ char **copy = NULL;
+
+ assert(strv);
+
+ copy = strv_copy((char**) dirs);
+ if (!copy)
+ return -ENOMEM;
+
+ return conf_files_list_strv_internal(strv, suffix, root, flags, copy);
+}
+
+int conf_files_list(char ***strv, const char *suffix, const char *root, unsigned flags, const char *dir, ...) {
+ _cleanup_strv_free_ char **dirs = NULL;
+ va_list ap;
+
+ assert(strv);
+
+ va_start(ap, dir);
+ dirs = strv_new_ap(dir, ap);
+ va_end(ap);
+
+ if (!dirs)
+ return -ENOMEM;
+
+ return conf_files_list_strv_internal(strv, suffix, root, flags, dirs);
+}
+
+int conf_files_list_nulstr(char ***strv, const char *suffix, const char *root, unsigned flags, const char *dirs) {
+ _cleanup_strv_free_ char **d = NULL;
+
+ assert(strv);
+
+ d = strv_split_nulstr(dirs);
+ if (!d)
+ return -ENOMEM;
+
+ return conf_files_list_strv_internal(strv, suffix, root, flags, d);
+}
+
+int conf_files_list_with_replacement(
+ const char *root,
+ char **config_dirs,
+ const char *replacement,
+ char ***files,
+ char **replace_file) {
+
+ _cleanup_strv_free_ char **f = NULL;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(config_dirs);
+ assert(files);
+ assert(replace_file || !replacement);
+
+ r = conf_files_list_strv(&f, ".conf", root, 0, (const char* const*) config_dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate config files: %m");
+
+ if (replacement) {
+ r = conf_files_insert(&f, root, config_dirs, replacement);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extend config file list: %m");
+
+ p = prefix_root(root, replacement);
+ if (!p)
+ return log_oom();
+ }
+
+ *files = TAKE_PTR(f);
+ if (replace_file)
+ *replace_file = TAKE_PTR(p);
+ return 0;
+}
diff --git a/src/basic/conf-files.h b/src/basic/conf-files.h
new file mode 100644
index 0000000..55ab326
--- /dev/null
+++ b/src/basic/conf-files.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "macro.h"
+
+enum {
+ CONF_FILES_EXECUTABLE = 1 << 0,
+ CONF_FILES_REGULAR = 1 << 1,
+ CONF_FILES_DIRECTORY = 1 << 2,
+ CONF_FILES_BASENAME = 1 << 3,
+ CONF_FILES_FILTER_MASKED = 1 << 4,
+};
+
+int conf_files_list(char ***ret, const char *suffix, const char *root, unsigned flags, const char *dir, ...) _sentinel_;
+int conf_files_list_strv(char ***ret, const char *suffix, const char *root, unsigned flags, const char* const* dirs);
+int conf_files_list_nulstr(char ***ret, const char *suffix, const char *root, unsigned flags, const char *dirs);
+int conf_files_insert(char ***strv, const char *root, char **dirs, const char *path);
+int conf_files_list_with_replacement(
+ const char *root,
+ char **config_dirs,
+ const char *replacement,
+ char ***files,
+ char **replace_file);
diff --git a/src/basic/copy.c b/src/basic/copy.c
new file mode 100644
index 0000000..46e02a3
--- /dev/null
+++ b/src/basic/copy.c
@@ -0,0 +1,936 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/sendfile.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "mountpoint-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "xattr-util.h"
+
+#define COPY_BUFFER_SIZE (16U*1024U)
+
+/* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
+ * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
+ * case of bind mount cycles and suchlike. */
+#define COPY_DEPTH_MAX 2048U
+
+static ssize_t try_copy_file_range(
+ int fd_in, loff_t *off_in,
+ int fd_out, loff_t *off_out,
+ size_t len,
+ unsigned flags) {
+
+ static int have = -1;
+ ssize_t r;
+
+ if (have == 0)
+ return -ENOSYS;
+
+ r = copy_file_range(fd_in, off_in, fd_out, off_out, len, flags);
+ if (have < 0)
+ have = r >= 0 || errno != ENOSYS;
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+enum {
+ FD_IS_NO_PIPE,
+ FD_IS_BLOCKING_PIPE,
+ FD_IS_NONBLOCKING_PIPE,
+};
+
+static int fd_is_nonblock_pipe(int fd) {
+ struct stat st;
+ int flags;
+
+ /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISFIFO(st.st_mode))
+ return FD_IS_NO_PIPE;
+
+ flags = fcntl(fd, F_GETFL);
+ if (flags < 0)
+ return -errno;
+
+ return FLAGS_SET(flags, O_NONBLOCK) ? FD_IS_NONBLOCKING_PIPE : FD_IS_BLOCKING_PIPE;
+}
+
+int copy_bytes_full(
+ int fdf, int fdt,
+ uint64_t max_bytes,
+ CopyFlags copy_flags,
+ void **ret_remains,
+ size_t *ret_remains_size,
+ copy_progress_bytes_t progress,
+ void *userdata) {
+
+ bool try_cfr = true, try_sendfile = true, try_splice = true;
+ int r, nonblock_pipe = -1;
+ size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
+ * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
+ * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
+ * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
+ * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
+ * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
+ * these parameters if non-NULL are set to NULL. */
+
+ if (ret_remains)
+ *ret_remains = NULL;
+ if (ret_remains_size)
+ *ret_remains_size = 0;
+
+ /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
+ * source and destination first. */
+ if ((copy_flags & COPY_REFLINK)) {
+ off_t foffset;
+
+ foffset = lseek(fdf, 0, SEEK_CUR);
+ if (foffset >= 0) {
+ off_t toffset;
+
+ toffset = lseek(fdt, 0, SEEK_CUR);
+ if (toffset >= 0) {
+
+ if (foffset == 0 && toffset == 0 && max_bytes == UINT64_MAX)
+ r = btrfs_reflink(fdf, fdt); /* full file reflink */
+ else
+ r = btrfs_clone_range(fdf, foffset, fdt, toffset, max_bytes == UINT64_MAX ? 0 : max_bytes); /* partial reflink */
+ if (r >= 0) {
+ off_t t;
+
+ /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
+ if (max_bytes == UINT64_MAX) {
+
+ /* We cloned to the end of the source file, let's position the read
+ * pointer there, and query it at the same time. */
+ t = lseek(fdf, 0, SEEK_END);
+ if (t < 0)
+ return -errno;
+ if (t < foffset)
+ return -ESPIPE;
+
+ /* Let's adjust the destination file write pointer by the same number
+ * of bytes. */
+ t = lseek(fdt, toffset + (t - foffset), SEEK_SET);
+ if (t < 0)
+ return -errno;
+
+ return 0; /* we copied the whole thing, hence hit EOF, return 0 */
+ } else {
+ t = lseek(fdf, foffset + max_bytes, SEEK_SET);
+ if (t < 0)
+ return -errno;
+
+ t = lseek(fdt, toffset + max_bytes, SEEK_SET);
+ if (t < 0)
+ return -errno;
+
+ return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
+ }
+ }
+ }
+ }
+ }
+
+ for (;;) {
+ ssize_t n;
+
+ if (max_bytes <= 0)
+ return 1; /* return > 0 if we hit the max_bytes limit */
+
+ if (max_bytes != UINT64_MAX && m > max_bytes)
+ m = max_bytes;
+
+ /* First try copy_file_range(), unless we already tried */
+ if (try_cfr) {
+ n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);
+ if (n < 0) {
+ if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV, -EBADF))
+ return n;
+
+ try_cfr = false;
+ /* use fallback below */
+ } else if (n == 0) /* EOF */
+ break;
+ else
+ /* Success! */
+ goto next;
+ }
+
+ /* First try sendfile(), unless we already tried */
+ if (try_sendfile) {
+ n = sendfile(fdt, fdf, NULL, m);
+ if (n < 0) {
+ if (!IN_SET(errno, EINVAL, ENOSYS))
+ return -errno;
+
+ try_sendfile = false;
+ /* use fallback below */
+ } else if (n == 0) /* EOF */
+ break;
+ else
+ /* Success! */
+ goto next;
+ }
+
+ /* Then try splice, unless we already tried. */
+ if (try_splice) {
+
+ /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
+ * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
+ * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour here, and
+ * check if either of the specified fds are a pipe, and if so, let's pass the flag
+ * automatically, depending on O_NONBLOCK being set.
+ *
+ * Here's a twist though: when we use it to move data between two pipes of which one has
+ * O_NONBLOCK set and the other has not, then we have no individual control over O_NONBLOCK
+ * behaviour. Hence in that case we can't use splice() and still guarantee systematic
+ * O_NONBLOCK behaviour, hence don't. */
+
+ if (nonblock_pipe < 0) {
+ int a, b;
+
+ /* Check if either of these fds is a pipe, and if so non-blocking or not */
+ a = fd_is_nonblock_pipe(fdf);
+ if (a < 0)
+ return a;
+
+ b = fd_is_nonblock_pipe(fdt);
+ if (b < 0)
+ return b;
+
+ if ((a == FD_IS_NO_PIPE && b == FD_IS_NO_PIPE) ||
+ (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) ||
+ (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE))
+
+ /* splice() only works if one of the fds is a pipe. If neither is, let's skip
+ * this step right-away. As mentioned above, if one of the two fds refers to a
+ * blocking pipe and the other to a non-blocking pipe, we can't use splice()
+ * either, hence don't try either. This hence means we can only use splice() if
+ * either only one of the two fds is a pipe, or if both are pipes with the same
+ * nonblocking flag setting. */
+
+ try_splice = false;
+ else
+ nonblock_pipe = a == FD_IS_NONBLOCKING_PIPE || b == FD_IS_NONBLOCKING_PIPE;
+ }
+ }
+
+ if (try_splice) {
+ n = splice(fdf, NULL, fdt, NULL, m, nonblock_pipe ? SPLICE_F_NONBLOCK : 0);
+ if (n < 0) {
+ if (!IN_SET(errno, EINVAL, ENOSYS))
+ return -errno;
+
+ try_splice = false;
+ /* use fallback below */
+ } else if (n == 0) /* EOF */
+ break;
+ else
+ /* Success! */
+ goto next;
+ }
+
+ /* As a fallback just copy bits by hand */
+ {
+ uint8_t buf[MIN(m, COPY_BUFFER_SIZE)], *p = buf;
+ ssize_t z;
+
+ n = read(fdf, buf, sizeof buf);
+ if (n < 0)
+ return -errno;
+ if (n == 0) /* EOF */
+ break;
+
+ z = (size_t) n;
+ do {
+ ssize_t k;
+
+ k = write(fdt, p, z);
+ if (k < 0) {
+ r = -errno;
+
+ if (ret_remains) {
+ void *copy;
+
+ copy = memdup(p, z);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret_remains = copy;
+ }
+
+ if (ret_remains_size)
+ *ret_remains_size = z;
+
+ return r;
+ }
+
+ assert(k <= z);
+ z -= k;
+ p += k;
+ } while (z > 0);
+ }
+
+ next:
+ if (progress) {
+ r = progress(n, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ if (max_bytes != (uint64_t) -1) {
+ assert(max_bytes >= (uint64_t) n);
+ max_bytes -= n;
+ }
+
+ /* sendfile accepts at most SSIZE_MAX-offset bytes to copy,
+ * so reduce our maximum by the amount we already copied,
+ * but don't go below our copy buffer size, unless we are
+ * close the limit of bytes we are allowed to copy. */
+ m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n);
+ }
+
+ return 0; /* return 0 if we hit EOF earlier than the size limit */
+}
+
+static int fd_copy_symlink(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags) {
+
+ _cleanup_free_ char *target = NULL;
+ int r;
+
+ assert(from);
+ assert(st);
+ assert(to);
+
+ r = readlinkat_malloc(df, from, &target);
+ if (r < 0)
+ return r;
+
+ if (symlinkat(target, dt, to) < 0)
+ return -errno;
+
+ if (fchownat(dt, to,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid,
+ AT_SYMLINK_NOFOLLOW) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int fd_copy_regular(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags,
+ copy_progress_bytes_t progress,
+ void *userdata) {
+
+ _cleanup_close_ int fdf = -1, fdt = -1;
+ struct timespec ts[2];
+ int r, q;
+
+ assert(from);
+ assert(st);
+ assert(to);
+
+ fdf = openat(df, from, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fdf < 0)
+ return -errno;
+
+ fdt = openat(dt, to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, st->st_mode & 07777);
+ if (fdt < 0)
+ return -errno;
+
+ r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress, userdata);
+ if (r < 0) {
+ (void) unlinkat(dt, to, 0);
+ return r;
+ }
+
+ if (fchown(fdt,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
+ r = -errno;
+
+ if (fchmod(fdt, st->st_mode & 07777) < 0)
+ r = -errno;
+
+ ts[0] = st->st_atim;
+ ts[1] = st->st_mtim;
+ (void) futimens(fdt, ts);
+ (void) copy_xattr(fdf, fdt);
+
+ q = close(fdt);
+ fdt = -1;
+
+ if (q < 0) {
+ r = -errno;
+ (void) unlinkat(dt, to, 0);
+ }
+
+ return r;
+}
+
+static int fd_copy_fifo(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags) {
+ int r;
+
+ assert(from);
+ assert(st);
+ assert(to);
+
+ r = mkfifoat(dt, to, st->st_mode & 07777);
+ if (r < 0)
+ return -errno;
+
+ if (fchownat(dt, to,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid,
+ AT_SYMLINK_NOFOLLOW) < 0)
+ r = -errno;
+
+ if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
+ r = -errno;
+
+ return r;
+}
+
+static int fd_copy_node(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags) {
+ int r;
+
+ assert(from);
+ assert(st);
+ assert(to);
+
+ r = mknodat(dt, to, st->st_mode, st->st_rdev);
+ if (r < 0)
+ return -errno;
+
+ if (fchownat(dt, to,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid,
+ AT_SYMLINK_NOFOLLOW) < 0)
+ r = -errno;
+
+ if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
+ r = -errno;
+
+ return r;
+}
+
+static int fd_copy_directory(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ dev_t original_device,
+ unsigned depth_left,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags,
+ const char *display_path,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_close_ int fdf = -1, fdt = -1;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ bool exists, created;
+ int r;
+
+ assert(st);
+ assert(to);
+
+ if (depth_left == 0)
+ return -ENAMETOOLONG;
+
+ if (from)
+ fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ else
+ fdf = fcntl(df, F_DUPFD_CLOEXEC, 3);
+ if (fdf < 0)
+ return -errno;
+
+ d = fdopendir(fdf);
+ if (!d)
+ return -errno;
+ fdf = -1;
+
+ exists = false;
+ if (copy_flags & COPY_MERGE_EMPTY) {
+ r = dir_is_empty_at(dt, to);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ else if (r == 1)
+ exists = true;
+ }
+
+ if (exists)
+ created = false;
+ else {
+ r = mkdirat(dt, to, st->st_mode & 07777);
+ if (r >= 0)
+ created = true;
+ else if (errno == EEXIST && (copy_flags & COPY_MERGE))
+ created = false;
+ else
+ return -errno;
+ }
+
+ fdt = openat(dt, to, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fdt < 0)
+ return -errno;
+
+ r = 0;
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ const char *child_display_path = NULL;
+ _cleanup_free_ char *dp = NULL;
+ struct stat buf;
+ int q;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (fstatat(dirfd(d), de->d_name, &buf, AT_SYMLINK_NOFOLLOW) < 0) {
+ r = -errno;
+ continue;
+ }
+
+ if (progress_path) {
+ if (display_path)
+ child_display_path = dp = strjoin(display_path, "/", de->d_name);
+ else
+ child_display_path = de->d_name;
+
+ r = progress_path(child_display_path, &buf, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ if (S_ISDIR(buf.st_mode)) {
+ /*
+ * Don't descend into directories on other file systems, if this is requested. We do a simple
+ * .st_dev check here, which basically comes for free. Note that we do this check only on
+ * directories, not other kind of file system objects, for two reason:
+ *
+ * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
+ * propagates the .st_dev field of the file system a file originates from all the way up
+ * through the stack to stat(). It doesn't do that for directories however. This means that
+ * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
+ * confusion we hence avoid relying on this check for regular files.
+ *
+ * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
+ * where we really want to avoid descending down in all eternity. However the .st_dev check
+ * is usually not sufficient for this protection anyway, as bind mount cycles from the same
+ * file system onto itself can't be detected that way. (Note we also do a recursion depth
+ * check, which is probably the better protection in this regard, which is why
+ * COPY_SAME_MOUNT is optional).
+ */
+
+ if (FLAGS_SET(copy_flags, COPY_SAME_MOUNT)) {
+ if (buf.st_dev != original_device)
+ continue;
+
+ r = fd_is_mount_point(dirfd(d), de->d_name, 0);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+ }
+
+ q = fd_copy_directory(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device, depth_left-1, override_uid, override_gid, copy_flags, child_display_path, progress_path, progress_bytes, userdata);
+ } else if (S_ISREG(buf.st_mode))
+ q = fd_copy_regular(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, progress_bytes, userdata);
+ else if (S_ISLNK(buf.st_mode))
+ q = fd_copy_symlink(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
+ else if (S_ISFIFO(buf.st_mode))
+ q = fd_copy_fifo(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
+ else if (S_ISBLK(buf.st_mode) || S_ISCHR(buf.st_mode) || S_ISSOCK(buf.st_mode))
+ q = fd_copy_node(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
+ else
+ q = -EOPNOTSUPP;
+
+ if (q == -EEXIST && (copy_flags & COPY_MERGE))
+ q = 0;
+
+ if (q < 0)
+ r = q;
+ }
+
+ if (created) {
+ struct timespec ut[2] = {
+ st->st_atim,
+ st->st_mtim
+ };
+
+ if (fchown(fdt,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
+ r = -errno;
+
+ if (fchmod(fdt, st->st_mode & 07777) < 0)
+ r = -errno;
+
+ (void) copy_xattr(dirfd(d), fdt);
+ (void) futimens(fdt, ut);
+ }
+
+ return r;
+}
+
+int copy_tree_at_full(
+ int fdf,
+ const char *from,
+ int fdt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ struct stat st;
+
+ assert(from);
+ assert(to);
+
+ if (fstatat(fdf, from, &st, AT_SYMLINK_NOFOLLOW) < 0)
+ return -errno;
+
+ if (S_ISREG(st.st_mode))
+ return fd_copy_regular(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, progress_bytes, userdata);
+ else if (S_ISDIR(st.st_mode))
+ return fd_copy_directory(fdf, from, &st, fdt, to, st.st_dev, COPY_DEPTH_MAX, override_uid, override_gid, copy_flags, NULL, progress_path, progress_bytes, userdata);
+ else if (S_ISLNK(st.st_mode))
+ return fd_copy_symlink(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
+ else if (S_ISFIFO(st.st_mode))
+ return fd_copy_fifo(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
+ else if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode) || S_ISSOCK(st.st_mode))
+ return fd_copy_node(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
+ else
+ return -EOPNOTSUPP;
+}
+
+int copy_directory_fd_full(
+ int dirfd,
+ const char *to,
+ CopyFlags copy_flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ struct stat st;
+
+ assert(dirfd >= 0);
+ assert(to);
+
+ if (fstat(dirfd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -ENOTDIR;
+
+ return fd_copy_directory(dirfd, NULL, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, progress_path, progress_bytes, userdata);
+}
+
+int copy_directory_full(
+ const char *from,
+ const char *to,
+ CopyFlags copy_flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ struct stat st;
+
+ assert(from);
+ assert(to);
+
+ if (lstat(from, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -ENOTDIR;
+
+ return fd_copy_directory(AT_FDCWD, from, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, progress_path, progress_bytes, userdata);
+}
+
+int copy_file_fd_full(
+ const char *from,
+ int fdt,
+ CopyFlags copy_flags,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_close_ int fdf = -1;
+ int r;
+
+ assert(from);
+ assert(fdt >= 0);
+
+ fdf = open(from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fdf < 0)
+ return -errno;
+
+ r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress_bytes, userdata);
+
+ (void) copy_times(fdf, fdt);
+ (void) copy_xattr(fdf, fdt);
+
+ return r;
+}
+
+int copy_file_full(
+ const char *from,
+ const char *to,
+ int flags,
+ mode_t mode,
+ unsigned chattr_flags,
+ CopyFlags copy_flags,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ int fdt = -1, r;
+
+ assert(from);
+ assert(to);
+
+ RUN_WITH_UMASK(0000) {
+ fdt = open(to, flags|O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY, mode);
+ if (fdt < 0)
+ return -errno;
+ }
+
+ if (chattr_flags != 0)
+ (void) chattr_fd(fdt, chattr_flags, (unsigned) -1, NULL);
+
+ r = copy_file_fd_full(from, fdt, copy_flags, progress_bytes, userdata);
+ if (r < 0) {
+ close(fdt);
+ (void) unlink(to);
+ return r;
+ }
+
+ if (close(fdt) < 0) {
+ unlink_noerrno(to);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int copy_file_atomic_full(
+ const char *from,
+ const char *to,
+ mode_t mode,
+ unsigned chattr_flags,
+ CopyFlags copy_flags,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_(unlink_and_freep) char *t = NULL;
+ _cleanup_close_ int fdt = -1;
+ int r;
+
+ assert(from);
+ assert(to);
+
+ /* We try to use O_TMPFILE here to create the file if we can. Note that that only works if COPY_REPLACE is not
+ * set though as we need to use linkat() for linking the O_TMPFILE file into the file system but that system
+ * call can't replace existing files. Hence, if COPY_REPLACE is set we create a temporary name in the file
+ * system right-away and unconditionally which we then can renameat() to the right name after we completed
+ * writing it. */
+
+ if (copy_flags & COPY_REPLACE) {
+ r = tempfn_random(to, NULL, &t);
+ if (r < 0)
+ return r;
+
+ fdt = open(t, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|O_WRONLY|O_CLOEXEC, 0600);
+ if (fdt < 0) {
+ t = mfree(t);
+ return -errno;
+ }
+ } else {
+ fdt = open_tmpfile_linkable(to, O_WRONLY|O_CLOEXEC, &t);
+ if (fdt < 0)
+ return fdt;
+ }
+
+ if (chattr_flags != 0)
+ (void) chattr_fd(fdt, chattr_flags, (unsigned) -1, NULL);
+
+ r = copy_file_fd_full(from, fdt, copy_flags, progress_bytes, userdata);
+ if (r < 0)
+ return r;
+
+ if (fchmod(fdt, mode) < 0)
+ return -errno;
+
+ if (copy_flags & COPY_REPLACE) {
+ if (renameat(AT_FDCWD, t, AT_FDCWD, to) < 0)
+ return -errno;
+ } else {
+ r = link_tmpfile(fdt, t, to);
+ if (r < 0)
+ return r;
+ }
+
+ t = mfree(t);
+ return 0;
+}
+
+int copy_times(int fdf, int fdt) {
+ struct timespec ut[2];
+ struct stat st;
+ usec_t crtime = 0;
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ if (fstat(fdf, &st) < 0)
+ return -errno;
+
+ ut[0] = st.st_atim;
+ ut[1] = st.st_mtim;
+
+ if (futimens(fdt, ut) < 0)
+ return -errno;
+
+ if (fd_getcrtime(fdf, &crtime) >= 0)
+ (void) fd_setcrtime(fdt, crtime);
+
+ return 0;
+}
+
+int copy_xattr(int fdf, int fdt) {
+ _cleanup_free_ char *bufa = NULL, *bufb = NULL;
+ size_t sza = 100, szb = 100;
+ ssize_t n;
+ int ret = 0;
+ const char *p;
+
+ for (;;) {
+ bufa = malloc(sza);
+ if (!bufa)
+ return -ENOMEM;
+
+ n = flistxattr(fdf, bufa, sza);
+ if (n == 0)
+ return 0;
+ if (n > 0)
+ break;
+ if (errno != ERANGE)
+ return -errno;
+
+ sza *= 2;
+
+ bufa = mfree(bufa);
+ }
+
+ p = bufa;
+ while (n > 0) {
+ size_t l;
+
+ l = strlen(p);
+ assert(l < (size_t) n);
+
+ if (startswith(p, "user.")) {
+ ssize_t m;
+
+ if (!bufb) {
+ bufb = malloc(szb);
+ if (!bufb)
+ return -ENOMEM;
+ }
+
+ m = fgetxattr(fdf, p, bufb, szb);
+ if (m < 0) {
+ if (errno == ERANGE) {
+ szb *= 2;
+ bufb = mfree(bufb);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ if (fsetxattr(fdt, p, bufb, m, 0) < 0)
+ ret = -errno;
+ }
+
+ p += l + 1;
+ n -= l + 1;
+ }
+
+ return ret;
+}
diff --git a/src/basic/copy.h b/src/basic/copy.h
new file mode 100644
index 0000000..f677021
--- /dev/null
+++ b/src/basic/copy.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+typedef enum CopyFlags {
+ COPY_REFLINK = 1 << 0, /* Try to reflink */
+ COPY_MERGE = 1 << 1, /* Merge existing trees with our new one to copy */
+ COPY_REPLACE = 1 << 2, /* Replace an existing file if there's one */
+ COPY_SAME_MOUNT = 1 << 3, /* Don't descend recursively into other file systems, across mount point boundaries */
+ COPY_MERGE_EMPTY = 1 << 4, /* Merge an existing, empty directory with our new tree to copy */
+} CopyFlags;
+
+typedef int (*copy_progress_bytes_t)(uint64_t n_bytes, void *userdata);
+typedef int (*copy_progress_path_t)(const char *path, const struct stat *st, void *userdata);
+
+int copy_file_fd_full(const char *from, int to, CopyFlags copy_flags, copy_progress_bytes_t progress, void *userdata);
+static inline int copy_file_fd(const char *from, int to, CopyFlags copy_flags) {
+ return copy_file_fd_full(from, to, copy_flags, NULL, NULL);
+}
+
+int copy_file_full(const char *from, const char *to, int open_flags, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags, copy_progress_bytes_t progress, void *userdata);
+static inline int copy_file(const char *from, const char *to, int open_flags, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags) {
+ return copy_file_full(from, to, open_flags, mode, chattr_flags, copy_flags, NULL, NULL);
+}
+
+int copy_file_atomic_full(const char *from, const char *to, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags, copy_progress_bytes_t progress, void *userdata);
+static inline int copy_file_atomic(const char *from, const char *to, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags) {
+ return copy_file_atomic_full(from, to, mode, chattr_flags, copy_flags, NULL, NULL);
+}
+
+int copy_tree_at_full(int fdf, const char *from, int fdt, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int copy_tree_at(int fdf, const char *from, int fdt, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags) {
+ return copy_tree_at_full(fdf, from, fdt, to, override_uid, override_gid, copy_flags, NULL, NULL, NULL);
+}
+static inline int copy_tree(const char *from, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags) {
+ return copy_tree_at_full(AT_FDCWD, from, AT_FDCWD, to, override_uid, override_gid, copy_flags, NULL, NULL, NULL);
+}
+
+int copy_directory_fd_full(int dirfd, const char *to, CopyFlags copy_flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int copy_directory_fd(int dirfd, const char *to, CopyFlags copy_flags) {
+ return copy_directory_fd_full(dirfd, to, copy_flags, NULL, NULL, NULL);
+}
+
+int copy_directory_full(const char *from, const char *to, CopyFlags copy_flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int copy_directory(const char *from, const char *to, CopyFlags copy_flags) {
+ return copy_directory_full(from, to, copy_flags, NULL, NULL, NULL);
+}
+
+int copy_bytes_full(int fdf, int fdt, uint64_t max_bytes, CopyFlags copy_flags, void **ret_remains, size_t *ret_remains_size, copy_progress_bytes_t progress, void *userdata);
+static inline int copy_bytes(int fdf, int fdt, uint64_t max_bytes, CopyFlags copy_flags) {
+ return copy_bytes_full(fdf, fdt, max_bytes, copy_flags, NULL, NULL, NULL, NULL);
+}
+
+int copy_times(int fdf, int fdt);
+int copy_xattr(int fdf, int fdt);
diff --git a/src/basic/def.h b/src/basic/def.h
new file mode 100644
index 0000000..5be018d
--- /dev/null
+++ b/src/basic/def.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "util.h"
+
+#define DEFAULT_TIMEOUT_USEC (90*USEC_PER_SEC)
+#define DEFAULT_RESTART_USEC (100*USEC_PER_MSEC)
+#define DEFAULT_CONFIRM_USEC (30*USEC_PER_SEC)
+
+#define DEFAULT_START_LIMIT_INTERVAL (10*USEC_PER_SEC)
+#define DEFAULT_START_LIMIT_BURST 5
+
+/* The default time after which exit-on-idle services exit. This
+ * should be kept lower than the watchdog timeout, because otherwise
+ * the watchdog pings will keep the loop busy. */
+#define DEFAULT_EXIT_USEC (30*USEC_PER_SEC)
+
+/* The default value for the net.unix.max_dgram_qlen sysctl */
+#define DEFAULT_UNIX_MAX_DGRAM_QLEN 512UL
+
+#define SIGNALS_CRASH_HANDLER SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT
+#define SIGNALS_IGNORE SIGPIPE
+
+#if HAVE_SPLIT_USR
+#define KBD_KEYMAP_DIRS \
+ "/usr/share/keymaps/\0" \
+ "/usr/share/kbd/keymaps/\0" \
+ "/usr/lib/kbd/keymaps/\0" \
+ "/lib/kbd/keymaps/\0"
+#else
+#define KBD_KEYMAP_DIRS \
+ "/usr/share/keymaps/\0" \
+ "/usr/share/kbd/keymaps/\0" \
+ "/usr/lib/kbd/keymaps/\0"
+#endif
+
+/* Note that we use the new /run prefix here (instead of /var/run) since we require them to be aliases and that way we
+ * become independent of /var being mounted */
+#define DEFAULT_SYSTEM_BUS_ADDRESS "unix:path=/run/dbus/system_bus_socket"
+#define DEFAULT_USER_BUS_ADDRESS_FMT "unix:path=%s/bus"
+
+#define PLYMOUTH_SOCKET { \
+ .un.sun_family = AF_UNIX, \
+ .un.sun_path = "\0/org/freedesktop/plymouthd", \
+ }
+
+#define NOTIFY_FD_MAX 768
+#define NOTIFY_BUFFER_MAX PIPE_BUF
+
+#if HAVE_SPLIT_USR
+# define _CONF_PATHS_SPLIT_USR_NULSTR(n) "/lib/" n "\0"
+# define _CONF_PATHS_SPLIT_USR(n) , "/lib/" n
+#else
+# define _CONF_PATHS_SPLIT_USR_NULSTR(n)
+# define _CONF_PATHS_SPLIT_USR(n)
+#endif
+
+/* Return a nulstr for a standard cascade of configuration paths,
+ * suitable to pass to conf_files_list_nulstr() or config_parse_many_nulstr()
+ * to implement drop-in directories for extending configuration
+ * files. */
+#define CONF_PATHS_NULSTR(n) \
+ "/etc/" n "\0" \
+ "/run/" n "\0" \
+ "/usr/local/lib/" n "\0" \
+ "/usr/lib/" n "\0" \
+ _CONF_PATHS_SPLIT_USR_NULSTR(n)
+
+#define CONF_PATHS_STRV(n) \
+ STRV_MAKE( \
+ "/etc/" n, \
+ "/run/" n, \
+ "/usr/local/lib/" n, \
+ "/usr/lib/" n \
+ _CONF_PATHS_SPLIT_USR(n))
+
+#define HIGH_RLIMIT_MEMLOCK (1024ULL*1024ULL*64ULL)
diff --git a/src/basic/device-nodes.c b/src/basic/device-nodes.c
new file mode 100644
index 0000000..5fcdf24
--- /dev/null
+++ b/src/basic/device-nodes.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "device-nodes.h"
+#include "utf8.h"
+
+int whitelisted_char_for_devnode(char c, const char *white) {
+
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z') ||
+ strchr("#+-.:=@_", c) != NULL ||
+ (white != NULL && strchr(white, c) != NULL))
+ return 1;
+
+ return 0;
+}
+
+int encode_devnode_name(const char *str, char *str_enc, size_t len) {
+ size_t i, j;
+
+ if (!str || !str_enc)
+ return -EINVAL;
+
+ for (i = 0, j = 0; str[i] != '\0'; i++) {
+ int seqlen;
+
+ seqlen = utf8_encoded_valid_unichar(&str[i]);
+ if (seqlen > 1) {
+
+ if (len-j < (size_t)seqlen)
+ return -EINVAL;
+
+ memcpy(&str_enc[j], &str[i], seqlen);
+ j += seqlen;
+ i += (seqlen-1);
+
+ } else if (str[i] == '\\' || !whitelisted_char_for_devnode(str[i], NULL)) {
+
+ if (len-j < 4)
+ return -EINVAL;
+
+ sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
+ j += 4;
+
+ } else {
+ if (len-j < 1)
+ return -EINVAL;
+
+ str_enc[j] = str[i];
+ j++;
+ }
+ }
+
+ if (len-j < 1)
+ return -EINVAL;
+
+ str_enc[j] = '\0';
+ return 0;
+}
diff --git a/src/basic/device-nodes.h b/src/basic/device-nodes.h
new file mode 100644
index 0000000..3840e6d
--- /dev/null
+++ b/src/basic/device-nodes.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stddef.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "stdio-util.h"
+
+int encode_devnode_name(const char *str, char *str_enc, size_t len);
+int whitelisted_char_for_devnode(char c, const char *additional);
+
+#define DEV_NUM_PATH_MAX \
+ (STRLEN("/dev/block/") + DECIMAL_STR_MAX(dev_t) + 1 + DECIMAL_STR_MAX(dev_t))
+#define xsprintf_dev_num_path(buf, type, devno) \
+ xsprintf(buf, "/dev/%s/%u:%u", type, major(devno), minor(devno))
diff --git a/src/basic/dirent-util.c b/src/basic/dirent-util.c
new file mode 100644
index 0000000..d1d2c0e
--- /dev/null
+++ b/src/basic/dirent-util.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include "dirent-util.h"
+#include "path-util.h"
+#include "string-util.h"
+
+int dirent_ensure_type(DIR *d, struct dirent *de) {
+ struct stat st;
+
+ assert(d);
+ assert(de);
+
+ if (de->d_type != DT_UNKNOWN)
+ return 0;
+
+ if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
+ return -errno;
+
+ de->d_type =
+ S_ISREG(st.st_mode) ? DT_REG :
+ S_ISDIR(st.st_mode) ? DT_DIR :
+ S_ISLNK(st.st_mode) ? DT_LNK :
+ S_ISFIFO(st.st_mode) ? DT_FIFO :
+ S_ISSOCK(st.st_mode) ? DT_SOCK :
+ S_ISCHR(st.st_mode) ? DT_CHR :
+ S_ISBLK(st.st_mode) ? DT_BLK :
+ DT_UNKNOWN;
+
+ return 0;
+}
+
+bool dirent_is_file(const struct dirent *de) {
+ assert(de);
+
+ if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
+ return false;
+
+ if (hidden_or_backup_file(de->d_name))
+ return false;
+
+ return true;
+}
+
+bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) {
+ assert(de);
+
+ if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
+ return false;
+
+ if (de->d_name[0] == '.')
+ return false;
+
+ if (!suffix)
+ return true;
+
+ return endswith(de->d_name, suffix);
+}
+
+struct dirent* readdir_no_dot(DIR *dirp) {
+ struct dirent* d;
+
+ for (;;) {
+ d = readdir(dirp);
+ if (d && dot_or_dot_dot(d->d_name))
+ continue;
+ return d;
+ }
+}
diff --git a/src/basic/dirent-util.h b/src/basic/dirent-util.h
new file mode 100644
index 0000000..b1b8767
--- /dev/null
+++ b/src/basic/dirent-util.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <dirent.h>
+#include <errno.h>
+#include <stdbool.h>
+
+#include "macro.h"
+#include "path-util.h"
+
+int dirent_ensure_type(DIR *d, struct dirent *de);
+
+bool dirent_is_file(const struct dirent *de) _pure_;
+bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) _pure_;
+
+struct dirent* readdir_no_dot(DIR *dirp);
+
+#define FOREACH_DIRENT(de, d, on_error) \
+ for (errno = 0, de = readdir(d);; errno = 0, de = readdir(d)) \
+ if (!de) { \
+ if (errno > 0) { \
+ on_error; \
+ } \
+ break; \
+ } else if (hidden_or_backup_file((de)->d_name)) \
+ continue; \
+ else
+
+#define FOREACH_DIRENT_ALL(de, d, on_error) \
+ for (errno = 0, de = readdir(d);; errno = 0, de = readdir(d)) \
+ if (!de) { \
+ if (errno > 0) { \
+ on_error; \
+ } \
+ break; \
+ } else
diff --git a/src/basic/env-file.c b/src/basic/env-file.c
new file mode 100644
index 0000000..7f10f9a
--- /dev/null
+++ b/src/basic/env-file.c
@@ -0,0 +1,564 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio_ext.h>
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "utf8.h"
+
+static int parse_env_file_internal(
+ FILE *f,
+ const char *fname,
+ int (*push) (const char *filename, unsigned line,
+ const char *key, char *value, void *userdata, int *n_pushed),
+ void *userdata,
+ int *n_pushed) {
+
+ size_t key_alloc = 0, n_key = 0, value_alloc = 0, n_value = 0, last_value_whitespace = (size_t) -1, last_key_whitespace = (size_t) -1;
+ _cleanup_free_ char *contents = NULL, *key = NULL, *value = NULL;
+ unsigned line = 1;
+ char *p;
+ int r;
+
+ enum {
+ PRE_KEY,
+ KEY,
+ PRE_VALUE,
+ VALUE,
+ VALUE_ESCAPE,
+ SINGLE_QUOTE_VALUE,
+ DOUBLE_QUOTE_VALUE,
+ DOUBLE_QUOTE_VALUE_ESCAPE,
+ COMMENT,
+ COMMENT_ESCAPE
+ } state = PRE_KEY;
+
+ if (f)
+ r = read_full_stream(f, &contents, NULL);
+ else
+ r = read_full_file(fname, &contents, NULL);
+ if (r < 0)
+ return r;
+
+ for (p = contents; *p; p++) {
+ char c = *p;
+
+ switch (state) {
+
+ case PRE_KEY:
+ if (strchr(COMMENTS, c))
+ state = COMMENT;
+ else if (!strchr(WHITESPACE, c)) {
+ state = KEY;
+ last_key_whitespace = (size_t) -1;
+
+ if (!GREEDY_REALLOC(key, key_alloc, n_key+2))
+ return -ENOMEM;
+
+ key[n_key++] = c;
+ }
+ break;
+
+ case KEY:
+ if (strchr(NEWLINE, c)) {
+ state = PRE_KEY;
+ line++;
+ n_key = 0;
+ } else if (c == '=') {
+ state = PRE_VALUE;
+ last_value_whitespace = (size_t) -1;
+ } else {
+ if (!strchr(WHITESPACE, c))
+ last_key_whitespace = (size_t) -1;
+ else if (last_key_whitespace == (size_t) -1)
+ last_key_whitespace = n_key;
+
+ if (!GREEDY_REALLOC(key, key_alloc, n_key+2))
+ return -ENOMEM;
+
+ key[n_key++] = c;
+ }
+
+ break;
+
+ case PRE_VALUE:
+ if (strchr(NEWLINE, c)) {
+ state = PRE_KEY;
+ line++;
+ key[n_key] = 0;
+
+ if (value)
+ value[n_value] = 0;
+
+ /* strip trailing whitespace from key */
+ if (last_key_whitespace != (size_t) -1)
+ key[last_key_whitespace] = 0;
+
+ r = push(fname, line, key, value, userdata, n_pushed);
+ if (r < 0)
+ return r;
+
+ n_key = 0;
+ value = NULL;
+ value_alloc = n_value = 0;
+
+ } else if (c == '\'')
+ state = SINGLE_QUOTE_VALUE;
+ else if (c == '"')
+ state = DOUBLE_QUOTE_VALUE;
+ else if (c == '\\')
+ state = VALUE_ESCAPE;
+ else if (!strchr(WHITESPACE, c)) {
+ state = VALUE;
+
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case VALUE:
+ if (strchr(NEWLINE, c)) {
+ state = PRE_KEY;
+ line++;
+
+ key[n_key] = 0;
+
+ if (value)
+ value[n_value] = 0;
+
+ /* Chomp off trailing whitespace from value */
+ if (last_value_whitespace != (size_t) -1)
+ value[last_value_whitespace] = 0;
+
+ /* strip trailing whitespace from key */
+ if (last_key_whitespace != (size_t) -1)
+ key[last_key_whitespace] = 0;
+
+ r = push(fname, line, key, value, userdata, n_pushed);
+ if (r < 0)
+ return r;
+
+ n_key = 0;
+ value = NULL;
+ value_alloc = n_value = 0;
+
+ } else if (c == '\\') {
+ state = VALUE_ESCAPE;
+ last_value_whitespace = (size_t) -1;
+ } else {
+ if (!strchr(WHITESPACE, c))
+ last_value_whitespace = (size_t) -1;
+ else if (last_value_whitespace == (size_t) -1)
+ last_value_whitespace = n_value;
+
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case VALUE_ESCAPE:
+ state = VALUE;
+
+ if (!strchr(NEWLINE, c)) {
+ /* Escaped newlines we eat up entirely */
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+ break;
+
+ case SINGLE_QUOTE_VALUE:
+ if (c == '\'')
+ state = PRE_VALUE;
+ else {
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case DOUBLE_QUOTE_VALUE:
+ if (c == '"')
+ state = PRE_VALUE;
+ else if (c == '\\')
+ state = DOUBLE_QUOTE_VALUE_ESCAPE;
+ else {
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case DOUBLE_QUOTE_VALUE_ESCAPE:
+ state = DOUBLE_QUOTE_VALUE;
+
+ if (c == '"') {
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+ value[n_value++] = '"';
+ } else if (!strchr(NEWLINE, c)) {
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+3))
+ return -ENOMEM;
+ value[n_value++] = '\\';
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case COMMENT:
+ if (c == '\\')
+ state = COMMENT_ESCAPE;
+ else if (strchr(NEWLINE, c)) {
+ state = PRE_KEY;
+ line++;
+ }
+ break;
+
+ case COMMENT_ESCAPE:
+ state = COMMENT;
+ break;
+ }
+ }
+
+ if (IN_SET(state,
+ PRE_VALUE,
+ VALUE,
+ VALUE_ESCAPE,
+ SINGLE_QUOTE_VALUE,
+ DOUBLE_QUOTE_VALUE,
+ DOUBLE_QUOTE_VALUE_ESCAPE)) {
+
+ key[n_key] = 0;
+
+ if (value)
+ value[n_value] = 0;
+
+ if (state == VALUE)
+ if (last_value_whitespace != (size_t) -1)
+ value[last_value_whitespace] = 0;
+
+ /* strip trailing whitespace from key */
+ if (last_key_whitespace != (size_t) -1)
+ key[last_key_whitespace] = 0;
+
+ r = push(fname, line, key, value, userdata, n_pushed);
+ if (r < 0)
+ return r;
+
+ value = NULL;
+ }
+
+ return 0;
+}
+
+static int check_utf8ness_and_warn(
+ const char *filename, unsigned line,
+ const char *key, char *value) {
+
+ if (!utf8_is_valid(key)) {
+ _cleanup_free_ char *p = NULL;
+
+ p = utf8_escape_invalid(key);
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s:%u: invalid UTF-8 in key '%s', ignoring.",
+ strna(filename), line, p);
+ }
+
+ if (value && !utf8_is_valid(value)) {
+ _cleanup_free_ char *p = NULL;
+
+ p = utf8_escape_invalid(value);
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s:%u: invalid UTF-8 value for key %s: '%s', ignoring.",
+ strna(filename), line, key, p);
+ }
+
+ return 0;
+}
+
+static int parse_env_file_push(
+ const char *filename, unsigned line,
+ const char *key, char *value,
+ void *userdata,
+ int *n_pushed) {
+
+ const char *k;
+ va_list aq, *ap = userdata;
+ int r;
+
+ r = check_utf8ness_and_warn(filename, line, key, value);
+ if (r < 0)
+ return r;
+
+ va_copy(aq, *ap);
+
+ while ((k = va_arg(aq, const char *))) {
+ char **v;
+
+ v = va_arg(aq, char **);
+
+ if (streq(key, k)) {
+ va_end(aq);
+ free(*v);
+ *v = value;
+
+ if (n_pushed)
+ (*n_pushed)++;
+
+ return 1;
+ }
+ }
+
+ va_end(aq);
+ free(value);
+
+ return 0;
+}
+
+int parse_env_filev(
+ FILE *f,
+ const char *fname,
+ va_list ap) {
+
+ int r, n_pushed = 0;
+ va_list aq;
+
+ va_copy(aq, ap);
+ r = parse_env_file_internal(f, fname, parse_env_file_push, &aq, &n_pushed);
+ va_end(aq);
+ if (r < 0)
+ return r;
+
+ return n_pushed;
+}
+
+int parse_env_file_sentinel(
+ FILE *f,
+ const char *fname,
+ ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, fname);
+ r = parse_env_filev(f, fname, ap);
+ va_end(ap);
+
+ return r;
+}
+
+static int load_env_file_push(
+ const char *filename, unsigned line,
+ const char *key, char *value,
+ void *userdata,
+ int *n_pushed) {
+ char ***m = userdata;
+ char *p;
+ int r;
+
+ r = check_utf8ness_and_warn(filename, line, key, value);
+ if (r < 0)
+ return r;
+
+ p = strjoin(key, "=", value);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_env_replace(m, p);
+ if (r < 0) {
+ free(p);
+ return r;
+ }
+
+ if (n_pushed)
+ (*n_pushed)++;
+
+ free(value);
+ return 0;
+}
+
+int load_env_file(FILE *f, const char *fname, char ***rl) {
+ char **m = NULL;
+ int r;
+
+ r = parse_env_file_internal(f, fname, load_env_file_push, &m, NULL);
+ if (r < 0) {
+ strv_free(m);
+ return r;
+ }
+
+ *rl = m;
+ return 0;
+}
+
+static int load_env_file_push_pairs(
+ const char *filename, unsigned line,
+ const char *key, char *value,
+ void *userdata,
+ int *n_pushed) {
+ char ***m = userdata;
+ int r;
+
+ r = check_utf8ness_and_warn(filename, line, key, value);
+ if (r < 0)
+ return r;
+
+ r = strv_extend(m, key);
+ if (r < 0)
+ return -ENOMEM;
+
+ if (!value) {
+ r = strv_extend(m, "");
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ r = strv_push(m, value);
+ if (r < 0)
+ return r;
+ }
+
+ if (n_pushed)
+ (*n_pushed)++;
+
+ return 0;
+}
+
+int load_env_file_pairs(FILE *f, const char *fname, char ***rl) {
+ char **m = NULL;
+ int r;
+
+ r = parse_env_file_internal(f, fname, load_env_file_push_pairs, &m, NULL);
+ if (r < 0) {
+ strv_free(m);
+ return r;
+ }
+
+ *rl = m;
+ return 0;
+}
+
+static int merge_env_file_push(
+ const char *filename, unsigned line,
+ const char *key, char *value,
+ void *userdata,
+ int *n_pushed) {
+
+ char ***env = userdata;
+ char *expanded_value;
+
+ assert(env);
+
+ if (!value) {
+ log_error("%s:%u: invalid syntax (around \"%s\"), ignoring.", strna(filename), line, key);
+ return 0;
+ }
+
+ if (!env_name_is_valid(key)) {
+ log_error("%s:%u: invalid variable name \"%s\", ignoring.", strna(filename), line, key);
+ free(value);
+ return 0;
+ }
+
+ expanded_value = replace_env(value, *env,
+ REPLACE_ENV_USE_ENVIRONMENT|
+ REPLACE_ENV_ALLOW_BRACELESS|
+ REPLACE_ENV_ALLOW_EXTENDED);
+ if (!expanded_value)
+ return -ENOMEM;
+
+ free_and_replace(value, expanded_value);
+
+ return load_env_file_push(filename, line, key, value, env, n_pushed);
+}
+
+int merge_env_file(
+ char ***env,
+ FILE *f,
+ const char *fname) {
+
+ /* NOTE: this function supports braceful and braceless variable expansions,
+ * plus "extended" substitutions, unlike other exported parsing functions.
+ */
+
+ return parse_env_file_internal(f, fname, merge_env_file_push, env, NULL);
+}
+
+static void write_env_var(FILE *f, const char *v) {
+ const char *p;
+
+ p = strchr(v, '=');
+ if (!p) {
+ /* Fallback */
+ fputs_unlocked(v, f);
+ fputc_unlocked('\n', f);
+ return;
+ }
+
+ p++;
+ fwrite_unlocked(v, 1, p-v, f);
+
+ if (string_has_cc(p, NULL) || chars_intersect(p, WHITESPACE SHELL_NEED_QUOTES)) {
+ fputc_unlocked('"', f);
+
+ for (; *p; p++) {
+ if (strchr(SHELL_NEED_ESCAPE, *p))
+ fputc_unlocked('\\', f);
+
+ fputc_unlocked(*p, f);
+ }
+
+ fputc_unlocked('"', f);
+ } else
+ fputs_unlocked(p, f);
+
+ fputc_unlocked('\n', f);
+}
+
+int write_env_file(const char *fname, char **l) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ char **i;
+ int r;
+
+ assert(fname);
+
+ r = fopen_temporary(fname, &f, &p);
+ if (r < 0)
+ return r;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod_umask(fileno(f), 0644);
+
+ STRV_FOREACH(i, l)
+ write_env_var(f, *i);
+
+ r = fflush_and_check(f);
+ if (r >= 0) {
+ if (rename(p, fname) >= 0)
+ return 0;
+
+ r = -errno;
+ }
+
+ unlink(p);
+ return r;
+}
diff --git a/src/basic/env-file.h b/src/basic/env-file.h
new file mode 100644
index 0000000..e1ca195
--- /dev/null
+++ b/src/basic/env-file.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "macro.h"
+
+int parse_env_filev(FILE *f, const char *fname, va_list ap);
+int parse_env_file_sentinel(FILE *f, const char *fname, ...) _sentinel_;
+#define parse_env_file(f, fname, ...) parse_env_file_sentinel(f, fname, __VA_ARGS__, NULL)
+int load_env_file(FILE *f, const char *fname, char ***l);
+int load_env_file_pairs(FILE *f, const char *fname, char ***l);
+
+int merge_env_file(char ***env, FILE *f, const char *fname);
+
+int write_env_file(const char *fname, char **l);
diff --git a/src/basic/env-util.c b/src/basic/env-util.c
new file mode 100644
index 0000000..fd449dc
--- /dev/null
+++ b/src/basic/env-util.c
@@ -0,0 +1,752 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+#define VALID_CHARS_ENV_NAME \
+ DIGITS LETTERS \
+ "_"
+
+static bool env_name_is_valid_n(const char *e, size_t n) {
+ const char *p;
+
+ if (!e)
+ return false;
+
+ if (n <= 0)
+ return false;
+
+ if (e[0] >= '0' && e[0] <= '9')
+ return false;
+
+ /* POSIX says the overall size of the environment block cannot
+ * be > ARG_MAX, an individual assignment hence cannot be
+ * either. Discounting the equal sign and trailing NUL this
+ * hence leaves ARG_MAX-2 as longest possible variable
+ * name. */
+ if (n > (size_t) sysconf(_SC_ARG_MAX) - 2)
+ return false;
+
+ for (p = e; p < e + n; p++)
+ if (!strchr(VALID_CHARS_ENV_NAME, *p))
+ return false;
+
+ return true;
+}
+
+bool env_name_is_valid(const char *e) {
+ if (!e)
+ return false;
+
+ return env_name_is_valid_n(e, strlen(e));
+}
+
+bool env_value_is_valid(const char *e) {
+ if (!e)
+ return false;
+
+ if (!utf8_is_valid(e))
+ return false;
+
+ /* bash allows tabs and newlines in environment variables, and so
+ * should we */
+ if (string_has_cc(e, "\t\n"))
+ return false;
+
+ /* POSIX says the overall size of the environment block cannot
+ * be > ARG_MAX, an individual assignment hence cannot be
+ * either. Discounting the shortest possible variable name of
+ * length 1, the equal sign and trailing NUL this hence leaves
+ * ARG_MAX-3 as longest possible variable value. */
+ if (strlen(e) > (size_t) sysconf(_SC_ARG_MAX) - 3)
+ return false;
+
+ return true;
+}
+
+bool env_assignment_is_valid(const char *e) {
+ const char *eq;
+
+ eq = strchr(e, '=');
+ if (!eq)
+ return false;
+
+ if (!env_name_is_valid_n(e, eq - e))
+ return false;
+
+ if (!env_value_is_valid(eq + 1))
+ return false;
+
+ /* POSIX says the overall size of the environment block cannot
+ * be > ARG_MAX, hence the individual variable assignments
+ * cannot be either, but let's leave room for one trailing NUL
+ * byte. */
+ if (strlen(e) > (size_t) sysconf(_SC_ARG_MAX) - 1)
+ return false;
+
+ return true;
+}
+
+bool strv_env_is_valid(char **e) {
+ char **p, **q;
+
+ STRV_FOREACH(p, e) {
+ size_t k;
+
+ if (!env_assignment_is_valid(*p))
+ return false;
+
+ /* Check if there are duplicate assignments */
+ k = strcspn(*p, "=");
+ STRV_FOREACH(q, p + 1)
+ if (strneq(*p, *q, k) && (*q)[k] == '=')
+ return false;
+ }
+
+ return true;
+}
+
+bool strv_env_name_is_valid(char **l) {
+ char **p;
+
+ STRV_FOREACH(p, l) {
+ if (!env_name_is_valid(*p))
+ return false;
+
+ if (strv_contains(p + 1, *p))
+ return false;
+ }
+
+ return true;
+}
+
+bool strv_env_name_or_assignment_is_valid(char **l) {
+ char **p;
+
+ STRV_FOREACH(p, l) {
+ if (!env_assignment_is_valid(*p) && !env_name_is_valid(*p))
+ return false;
+
+ if (strv_contains(p + 1, *p))
+ return false;
+ }
+
+ return true;
+}
+
+static int env_append(char **r, char ***k, char **a) {
+ assert(r);
+ assert(k);
+ assert(*k >= r);
+
+ if (!a)
+ return 0;
+
+ /* Expects the following arguments: 'r' shall point to the beginning of an strv we are going to append to, 'k'
+ * to a pointer pointing to the NULL entry at the end of the same array. 'a' shall point to another strv.
+ *
+ * This call adds every entry of 'a' to 'r', either overriding an existing matching entry, or appending to it.
+ *
+ * This call assumes 'r' has enough pre-allocated space to grow by all of 'a''s items. */
+
+ for (; *a; a++) {
+ char **j, *c;
+ size_t n;
+
+ n = strcspn(*a, "=");
+ if ((*a)[n] == '=')
+ n++;
+
+ for (j = r; j < *k; j++)
+ if (strneq(*j, *a, n))
+ break;
+
+ c = strdup(*a);
+ if (!c)
+ return -ENOMEM;
+
+ if (j >= *k) { /* Append to the end? */
+ (*k)[0] = c;
+ (*k)[1] = NULL;
+ (*k)++;
+ } else
+ free_and_replace(*j, c); /* Override existing item */
+ }
+
+ return 0;
+}
+
+char **strv_env_merge(size_t n_lists, ...) {
+ _cleanup_strv_free_ char **ret = NULL;
+ size_t n = 0, i;
+ char **l, **k;
+ va_list ap;
+
+ /* Merges an arbitrary number of environment sets */
+
+ va_start(ap, n_lists);
+ for (i = 0; i < n_lists; i++) {
+ l = va_arg(ap, char**);
+ n += strv_length(l);
+ }
+ va_end(ap);
+
+ ret = new(char*, n+1);
+ if (!ret)
+ return NULL;
+
+ *ret = NULL;
+ k = ret;
+
+ va_start(ap, n_lists);
+ for (i = 0; i < n_lists; i++) {
+ l = va_arg(ap, char**);
+ if (env_append(ret, &k, l) < 0) {
+ va_end(ap);
+ return NULL;
+ }
+ }
+ va_end(ap);
+
+ return TAKE_PTR(ret);
+}
+
+static bool env_match(const char *t, const char *pattern) {
+ assert(t);
+ assert(pattern);
+
+ /* pattern a matches string a
+ * a matches a=
+ * a matches a=b
+ * a= matches a=
+ * a=b matches a=b
+ * a= does not match a
+ * a=b does not match a=
+ * a=b does not match a
+ * a=b does not match a=c */
+
+ if (streq(t, pattern))
+ return true;
+
+ if (!strchr(pattern, '=')) {
+ size_t l = strlen(pattern);
+
+ return strneq(t, pattern, l) && t[l] == '=';
+ }
+
+ return false;
+}
+
+static bool env_entry_has_name(const char *entry, const char *name) {
+ const char *t;
+
+ assert(entry);
+ assert(name);
+
+ t = startswith(entry, name);
+ if (!t)
+ return false;
+
+ return *t == '=';
+}
+
+char **strv_env_delete(char **x, size_t n_lists, ...) {
+ size_t n, i = 0;
+ char **k, **r;
+ va_list ap;
+
+ /* Deletes every entry from x that is mentioned in the other
+ * string lists */
+
+ n = strv_length(x);
+
+ r = new(char*, n+1);
+ if (!r)
+ return NULL;
+
+ STRV_FOREACH(k, x) {
+ size_t v;
+
+ va_start(ap, n_lists);
+ for (v = 0; v < n_lists; v++) {
+ char **l, **j;
+
+ l = va_arg(ap, char**);
+ STRV_FOREACH(j, l)
+ if (env_match(*k, *j))
+ goto skip;
+ }
+ va_end(ap);
+
+ r[i] = strdup(*k);
+ if (!r[i]) {
+ strv_free(r);
+ return NULL;
+ }
+
+ i++;
+ continue;
+
+ skip:
+ va_end(ap);
+ }
+
+ r[i] = NULL;
+
+ assert(i <= n);
+
+ return r;
+}
+
+char **strv_env_unset(char **l, const char *p) {
+
+ char **f, **t;
+
+ if (!l)
+ return NULL;
+
+ assert(p);
+
+ /* Drops every occurrence of the env var setting p in the
+ * string list. Edits in-place. */
+
+ for (f = t = l; *f; f++) {
+
+ if (env_match(*f, p)) {
+ free(*f);
+ continue;
+ }
+
+ *(t++) = *f;
+ }
+
+ *t = NULL;
+ return l;
+}
+
+char **strv_env_unset_many(char **l, ...) {
+ char **f, **t;
+
+ if (!l)
+ return NULL;
+
+ /* Like strv_env_unset() but applies many at once. Edits in-place. */
+
+ for (f = t = l; *f; f++) {
+ bool found = false;
+ const char *p;
+ va_list ap;
+
+ va_start(ap, l);
+
+ while ((p = va_arg(ap, const char*))) {
+ if (env_match(*f, p)) {
+ found = true;
+ break;
+ }
+ }
+
+ va_end(ap);
+
+ if (found) {
+ free(*f);
+ continue;
+ }
+
+ *(t++) = *f;
+ }
+
+ *t = NULL;
+ return l;
+}
+
+int strv_env_replace(char ***l, char *p) {
+ const char *t, *name;
+ char **f;
+ int r;
+
+ assert(p);
+
+ /* Replace first occurrence of the env var or add a new one in the string list. Drop other occurrences. Edits
+ * in-place. Does not copy p. p must be a valid key=value assignment.
+ */
+
+ t = strchr(p, '=');
+ if (!t)
+ return -EINVAL;
+
+ name = strndupa(p, t - p);
+
+ STRV_FOREACH(f, *l)
+ if (env_entry_has_name(*f, name)) {
+ free_and_replace(*f, p);
+ strv_env_unset(f + 1, *f);
+ return 0;
+ }
+
+ /* We didn't find a match, we need to append p or create a new strv */
+ r = strv_push(l, p);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+char **strv_env_set(char **x, const char *p) {
+ _cleanup_strv_free_ char **ret = NULL;
+ size_t n, m;
+ char **k;
+
+ /* Overrides the env var setting of p, returns a new copy */
+
+ n = strv_length(x);
+ m = n + 2;
+ if (m < n) /* overflow? */
+ return NULL;
+
+ ret = new(char*, m);
+ if (!ret)
+ return NULL;
+
+ *ret = NULL;
+ k = ret;
+
+ if (env_append(ret, &k, x) < 0)
+ return NULL;
+
+ if (env_append(ret, &k, STRV_MAKE(p)) < 0)
+ return NULL;
+
+ return TAKE_PTR(ret);
+}
+
+char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) {
+ char **i;
+
+ assert(name);
+
+ if (k <= 0)
+ return NULL;
+
+ STRV_FOREACH_BACKWARDS(i, l)
+ if (strneq(*i, name, k) &&
+ (*i)[k] == '=')
+ return *i + k + 1;
+
+ if (flags & REPLACE_ENV_USE_ENVIRONMENT) {
+ const char *t;
+
+ t = strndupa(name, k);
+ return getenv(t);
+ };
+
+ return NULL;
+}
+
+char *strv_env_get(char **l, const char *name) {
+ assert(name);
+
+ return strv_env_get_n(l, name, strlen(name), 0);
+}
+
+char **strv_env_clean_with_callback(char **e, void (*invalid_callback)(const char *p, void *userdata), void *userdata) {
+ char **p, **q;
+ int k = 0;
+
+ STRV_FOREACH(p, e) {
+ size_t n;
+ bool duplicate = false;
+
+ if (!env_assignment_is_valid(*p)) {
+ if (invalid_callback)
+ invalid_callback(*p, userdata);
+ free(*p);
+ continue;
+ }
+
+ n = strcspn(*p, "=");
+ STRV_FOREACH(q, p + 1)
+ if (strneq(*p, *q, n) && (*q)[n] == '=') {
+ duplicate = true;
+ break;
+ }
+
+ if (duplicate) {
+ free(*p);
+ continue;
+ }
+
+ e[k++] = *p;
+ }
+
+ if (e)
+ e[k] = NULL;
+
+ return e;
+}
+
+char *replace_env_n(const char *format, size_t n, char **env, unsigned flags) {
+ enum {
+ WORD,
+ CURLY,
+ VARIABLE,
+ VARIABLE_RAW,
+ TEST,
+ DEFAULT_VALUE,
+ ALTERNATE_VALUE,
+ } state = WORD;
+
+ const char *e, *word = format, *test_value;
+ char *k;
+ _cleanup_free_ char *r = NULL;
+ size_t i, len;
+ int nest = 0;
+
+ assert(format);
+
+ for (e = format, i = 0; *e && i < n; e ++, i ++)
+ switch (state) {
+
+ case WORD:
+ if (*e == '$')
+ state = CURLY;
+ break;
+
+ case CURLY:
+ if (*e == '{') {
+ k = strnappend(r, word, e-word-1);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e-1;
+ state = VARIABLE;
+ nest++;
+ } else if (*e == '$') {
+ k = strnappend(r, word, e-word);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e+1;
+ state = WORD;
+
+ } else if (flags & REPLACE_ENV_ALLOW_BRACELESS && strchr(VALID_CHARS_ENV_NAME, *e)) {
+ k = strnappend(r, word, e-word-1);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e-1;
+ state = VARIABLE_RAW;
+
+ } else
+ state = WORD;
+ break;
+
+ case VARIABLE:
+ if (*e == '}') {
+ const char *t;
+
+ t = strv_env_get_n(env, word+2, e-word-2, flags);
+
+ k = strappend(r, t);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e+1;
+ state = WORD;
+ } else if (*e == ':') {
+ if (!(flags & REPLACE_ENV_ALLOW_EXTENDED))
+ /* Treat this as unsupported syntax, i.e. do no replacement */
+ state = WORD;
+ else {
+ len = e-word-2;
+ state = TEST;
+ }
+ }
+ break;
+
+ case TEST:
+ if (*e == '-')
+ state = DEFAULT_VALUE;
+ else if (*e == '+')
+ state = ALTERNATE_VALUE;
+ else {
+ state = WORD;
+ break;
+ }
+
+ test_value = e+1;
+ break;
+
+ case DEFAULT_VALUE: /* fall through */
+ case ALTERNATE_VALUE:
+ assert(flags & REPLACE_ENV_ALLOW_EXTENDED);
+
+ if (*e == '{') {
+ nest++;
+ break;
+ }
+
+ if (*e != '}')
+ break;
+
+ nest--;
+ if (nest == 0) {
+ const char *t;
+ _cleanup_free_ char *v = NULL;
+
+ t = strv_env_get_n(env, word+2, len, flags);
+
+ if (t && state == ALTERNATE_VALUE)
+ t = v = replace_env_n(test_value, e-test_value, env, flags);
+ else if (!t && state == DEFAULT_VALUE)
+ t = v = replace_env_n(test_value, e-test_value, env, flags);
+
+ k = strappend(r, t);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e+1;
+ state = WORD;
+ }
+ break;
+
+ case VARIABLE_RAW:
+ assert(flags & REPLACE_ENV_ALLOW_BRACELESS);
+
+ if (!strchr(VALID_CHARS_ENV_NAME, *e)) {
+ const char *t;
+
+ t = strv_env_get_n(env, word+1, e-word-1, flags);
+
+ k = strappend(r, t);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e--;
+ i--;
+ state = WORD;
+ }
+ break;
+ }
+
+ if (state == VARIABLE_RAW) {
+ const char *t;
+
+ assert(flags & REPLACE_ENV_ALLOW_BRACELESS);
+
+ t = strv_env_get_n(env, word+1, e-word-1, flags);
+ return strappend(r, t);
+ } else
+ return strnappend(r, word, e-word);
+}
+
+char **replace_env_argv(char **argv, char **env) {
+ char **ret, **i;
+ size_t k = 0, l = 0;
+
+ l = strv_length(argv);
+
+ ret = new(char*, l+1);
+ if (!ret)
+ return NULL;
+
+ STRV_FOREACH(i, argv) {
+
+ /* If $FOO appears as single word, replace it by the split up variable */
+ if ((*i)[0] == '$' && !IN_SET((*i)[1], '{', '$')) {
+ char *e;
+ char **w, **m = NULL;
+ size_t q;
+
+ e = strv_env_get(env, *i+1);
+ if (e) {
+ int r;
+
+ r = strv_split_extract(&m, e, WHITESPACE, EXTRACT_RELAX|EXTRACT_QUOTES);
+ if (r < 0) {
+ ret[k] = NULL;
+ strv_free(ret);
+ return NULL;
+ }
+ } else
+ m = NULL;
+
+ q = strv_length(m);
+ l = l + q - 1;
+
+ w = reallocarray(ret, l + 1, sizeof(char *));
+ if (!w) {
+ ret[k] = NULL;
+ strv_free(ret);
+ strv_free(m);
+ return NULL;
+ }
+
+ ret = w;
+ if (m) {
+ memcpy(ret + k, m, q * sizeof(char*));
+ free(m);
+ }
+
+ k += q;
+ continue;
+ }
+
+ /* If ${FOO} appears as part of a word, replace it by the variable as-is */
+ ret[k] = replace_env(*i, env, 0);
+ if (!ret[k]) {
+ strv_free(ret);
+ return NULL;
+ }
+ k++;
+ }
+
+ ret[k] = NULL;
+ return ret;
+}
+
+int getenv_bool(const char *p) {
+ const char *e;
+
+ e = getenv(p);
+ if (!e)
+ return -ENXIO;
+
+ return parse_boolean(e);
+}
+
+int getenv_bool_secure(const char *p) {
+ const char *e;
+
+ e = secure_getenv(p);
+ if (!e)
+ return -ENXIO;
+
+ return parse_boolean(e);
+}
diff --git a/src/basic/env-util.h b/src/basic/env-util.h
new file mode 100644
index 0000000..d54f996
--- /dev/null
+++ b/src/basic/env-util.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include "macro.h"
+#include "string.h"
+
+bool env_name_is_valid(const char *e);
+bool env_value_is_valid(const char *e);
+bool env_assignment_is_valid(const char *e);
+
+enum {
+ REPLACE_ENV_USE_ENVIRONMENT = 1 << 0,
+ REPLACE_ENV_ALLOW_BRACELESS = 1 << 1,
+ REPLACE_ENV_ALLOW_EXTENDED = 1 << 2,
+};
+
+char *replace_env_n(const char *format, size_t n, char **env, unsigned flags);
+char **replace_env_argv(char **argv, char **env);
+
+static inline char *replace_env(const char *format, char **env, unsigned flags) {
+ return replace_env_n(format, strlen(format), env, flags);
+}
+
+bool strv_env_is_valid(char **e);
+#define strv_env_clean(l) strv_env_clean_with_callback(l, NULL, NULL)
+char **strv_env_clean_with_callback(char **l, void (*invalid_callback)(const char *p, void *userdata), void *userdata);
+
+bool strv_env_name_is_valid(char **l);
+bool strv_env_name_or_assignment_is_valid(char **l);
+
+char **strv_env_merge(size_t n_lists, ...);
+char **strv_env_delete(char **x, size_t n_lists, ...); /* New copy */
+
+char **strv_env_set(char **x, const char *p); /* New copy ... */
+char **strv_env_unset(char **l, const char *p); /* In place ... */
+char **strv_env_unset_many(char **l, ...) _sentinel_;
+int strv_env_replace(char ***l, char *p); /* In place ... */
+
+char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) _pure_;
+char *strv_env_get(char **x, const char *n) _pure_;
+
+int getenv_bool(const char *p);
+int getenv_bool_secure(const char *p);
diff --git a/src/basic/errno-list.c b/src/basic/errno-list.c
new file mode 100644
index 0000000..44cc570
--- /dev/null
+++ b/src/basic/errno-list.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "errno-list.h"
+#include "macro.h"
+
+static const struct errno_name* lookup_errno(register const char *str,
+ register GPERF_LEN_TYPE len);
+
+#include "errno-from-name.h"
+#include "errno-to-name.h"
+
+const char *errno_to_name(int id) {
+
+ if (id < 0)
+ id = -id;
+
+ if ((size_t) id >= ELEMENTSOF(errno_names))
+ return NULL;
+
+ return errno_names[id];
+}
+
+int errno_from_name(const char *name) {
+ const struct errno_name *sc;
+
+ assert(name);
+
+ sc = lookup_errno(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ assert(sc->id > 0);
+ return sc->id;
+}
diff --git a/src/basic/errno-list.h b/src/basic/errno-list.h
new file mode 100644
index 0000000..9c639b4
--- /dev/null
+++ b/src/basic/errno-list.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+/*
+ * MAX_ERRNO is defined as 4095 in linux/err.h
+ * We use the same value here.
+ */
+#define ERRNO_MAX 4095
+
+const char *errno_to_name(int id);
+int errno_from_name(const char *name);
+static inline bool errno_is_valid(int n) {
+ return n > 0 && n <= ERRNO_MAX;
+}
diff --git a/src/basic/errno-to-name.awk b/src/basic/errno-to-name.awk
new file mode 100644
index 0000000..0878aba
--- /dev/null
+++ b/src/basic/errno-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const errno_names[] = { "
+}
+!/EDEADLOCK/ && !/EWOULDBLOCK/ && !/ENOTSUP/ {
+ printf " [%s] = \"%s\",\n", $1, $1
+}
+END{
+ print "};"
+}
diff --git a/src/basic/escape.c b/src/basic/escape.c
new file mode 100644
index 0000000..5f71515
--- /dev/null
+++ b/src/basic/escape.c
@@ -0,0 +1,506 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "utf8.h"
+
+int cescape_char(char c, char *buf) {
+ char *buf_old = buf;
+
+ /* Needs space for 4 characters in the buffer */
+
+ switch (c) {
+
+ case '\a':
+ *(buf++) = '\\';
+ *(buf++) = 'a';
+ break;
+ case '\b':
+ *(buf++) = '\\';
+ *(buf++) = 'b';
+ break;
+ case '\f':
+ *(buf++) = '\\';
+ *(buf++) = 'f';
+ break;
+ case '\n':
+ *(buf++) = '\\';
+ *(buf++) = 'n';
+ break;
+ case '\r':
+ *(buf++) = '\\';
+ *(buf++) = 'r';
+ break;
+ case '\t':
+ *(buf++) = '\\';
+ *(buf++) = 't';
+ break;
+ case '\v':
+ *(buf++) = '\\';
+ *(buf++) = 'v';
+ break;
+ case '\\':
+ *(buf++) = '\\';
+ *(buf++) = '\\';
+ break;
+ case '"':
+ *(buf++) = '\\';
+ *(buf++) = '"';
+ break;
+ case '\'':
+ *(buf++) = '\\';
+ *(buf++) = '\'';
+ break;
+
+ default:
+ /* For special chars we prefer octal over
+ * hexadecimal encoding, simply because glib's
+ * g_strescape() does the same */
+ if ((c < ' ') || (c >= 127)) {
+ *(buf++) = '\\';
+ *(buf++) = octchar((unsigned char) c >> 6);
+ *(buf++) = octchar((unsigned char) c >> 3);
+ *(buf++) = octchar((unsigned char) c);
+ } else
+ *(buf++) = c;
+ break;
+ }
+
+ return buf - buf_old;
+}
+
+char *cescape_length(const char *s, size_t n) {
+ const char *f;
+ char *r, *t;
+
+ assert(s || n == 0);
+
+ /* Does C style string escaping. May be reversed with
+ * cunescape(). */
+
+ r = new(char, n*4 + 1);
+ if (!r)
+ return NULL;
+
+ for (f = s, t = r; f < s + n; f++)
+ t += cescape_char(*f, t);
+
+ *t = 0;
+
+ return r;
+}
+
+char *cescape(const char *s) {
+ assert(s);
+
+ return cescape_length(s, strlen(s));
+}
+
+int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
+ int r = 1;
+
+ assert(p);
+ assert(ret);
+
+ /* Unescapes C style. Returns the unescaped character in ret.
+ * Sets *eight_bit to true if the escaped sequence either fits in
+ * one byte in UTF-8 or is a non-unicode literal byte and should
+ * instead be copied directly.
+ */
+
+ if (length != (size_t) -1 && length < 1)
+ return -EINVAL;
+
+ switch (p[0]) {
+
+ case 'a':
+ *ret = '\a';
+ break;
+ case 'b':
+ *ret = '\b';
+ break;
+ case 'f':
+ *ret = '\f';
+ break;
+ case 'n':
+ *ret = '\n';
+ break;
+ case 'r':
+ *ret = '\r';
+ break;
+ case 't':
+ *ret = '\t';
+ break;
+ case 'v':
+ *ret = '\v';
+ break;
+ case '\\':
+ *ret = '\\';
+ break;
+ case '"':
+ *ret = '"';
+ break;
+ case '\'':
+ *ret = '\'';
+ break;
+
+ case 's':
+ /* This is an extension of the XDG syntax files */
+ *ret = ' ';
+ break;
+
+ case 'x': {
+ /* hexadecimal encoding */
+ int a, b;
+
+ if (length != (size_t) -1 && length < 3)
+ return -EINVAL;
+
+ a = unhexchar(p[1]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unhexchar(p[2]);
+ if (b < 0)
+ return -EINVAL;
+
+ /* Don't allow NUL bytes */
+ if (a == 0 && b == 0)
+ return -EINVAL;
+
+ *ret = (a << 4U) | b;
+ *eight_bit = true;
+ r = 3;
+ break;
+ }
+
+ case 'u': {
+ /* C++11 style 16bit unicode */
+
+ int a[4];
+ size_t i;
+ uint32_t c;
+
+ if (length != (size_t) -1 && length < 5)
+ return -EINVAL;
+
+ for (i = 0; i < 4; i++) {
+ a[i] = unhexchar(p[1 + i]);
+ if (a[i] < 0)
+ return a[i];
+ }
+
+ c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
+
+ /* Don't allow 0 chars */
+ if (c == 0)
+ return -EINVAL;
+
+ *ret = c;
+ r = 5;
+ break;
+ }
+
+ case 'U': {
+ /* C++11 style 32bit unicode */
+
+ int a[8];
+ size_t i;
+ char32_t c;
+
+ if (length != (size_t) -1 && length < 9)
+ return -EINVAL;
+
+ for (i = 0; i < 8; i++) {
+ a[i] = unhexchar(p[1 + i]);
+ if (a[i] < 0)
+ return a[i];
+ }
+
+ c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
+ ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7];
+
+ /* Don't allow 0 chars */
+ if (c == 0)
+ return -EINVAL;
+
+ /* Don't allow invalid code points */
+ if (!unichar_is_valid(c))
+ return -EINVAL;
+
+ *ret = c;
+ r = 9;
+ break;
+ }
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7': {
+ /* octal encoding */
+ int a, b, c;
+ char32_t m;
+
+ if (length != (size_t) -1 && length < 3)
+ return -EINVAL;
+
+ a = unoctchar(p[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unoctchar(p[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unoctchar(p[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ /* don't allow NUL bytes */
+ if (a == 0 && b == 0 && c == 0)
+ return -EINVAL;
+
+ /* Don't allow bytes above 255 */
+ m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
+ if (m > 255)
+ return -EINVAL;
+
+ *ret = m;
+ *eight_bit = true;
+ r = 3;
+ break;
+ }
+
+ default:
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
+ char *r, *t;
+ const char *f;
+ size_t pl;
+
+ assert(s);
+ assert(ret);
+
+ /* Undoes C style string escaping, and optionally prefixes it. */
+
+ pl = strlen_ptr(prefix);
+
+ r = new(char, pl+length+1);
+ if (!r)
+ return -ENOMEM;
+
+ if (prefix)
+ memcpy(r, prefix, pl);
+
+ for (f = s, t = r + pl; f < s + length; f++) {
+ size_t remaining;
+ bool eight_bit = false;
+ char32_t u;
+ int k;
+
+ remaining = s + length - f;
+ assert(remaining > 0);
+
+ if (*f != '\\') {
+ /* A literal, copy verbatim */
+ *(t++) = *f;
+ continue;
+ }
+
+ if (remaining == 1) {
+ if (flags & UNESCAPE_RELAX) {
+ /* A trailing backslash, copy verbatim */
+ *(t++) = *f;
+ continue;
+ }
+
+ free(r);
+ return -EINVAL;
+ }
+
+ k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
+ if (k < 0) {
+ if (flags & UNESCAPE_RELAX) {
+ /* Invalid escape code, let's take it literal then */
+ *(t++) = '\\';
+ continue;
+ }
+
+ free(r);
+ return k;
+ }
+
+ f += k;
+ if (eight_bit)
+ /* One byte? Set directly as specified */
+ *(t++) = u;
+ else
+ /* Otherwise encode as multi-byte UTF-8 */
+ t += utf8_encode_unichar(t, u);
+ }
+
+ *t = 0;
+
+ *ret = r;
+ return t - r;
+}
+
+int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
+ return cunescape_length_with_prefix(s, length, NULL, flags, ret);
+}
+
+int cunescape(const char *s, UnescapeFlags flags, char **ret) {
+ return cunescape_length(s, strlen(s), flags, ret);
+}
+
+char *xescape(const char *s, const char *bad) {
+ char *r, *t;
+ const char *f;
+
+ /* Escapes all chars in bad, in addition to \ and all special
+ * chars, in \xFF style escaping. May be reversed with
+ * cunescape(). */
+
+ r = new(char, strlen(s) * 4 + 1);
+ if (!r)
+ return NULL;
+
+ for (f = s, t = r; *f; f++) {
+
+ if ((*f < ' ') || (*f >= 127) ||
+ (*f == '\\') || strchr(bad, *f)) {
+ *(t++) = '\\';
+ *(t++) = 'x';
+ *(t++) = hexchar(*f >> 4);
+ *(t++) = hexchar(*f);
+ } else
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ return r;
+}
+
+char *octescape(const char *s, size_t len) {
+ char *r, *t;
+ const char *f;
+
+ /* Escapes all chars in bad, in addition to \ and " chars,
+ * in \nnn style escaping. */
+
+ r = new(char, len * 4 + 1);
+ if (!r)
+ return NULL;
+
+ for (f = s, t = r; f < s + len; f++) {
+
+ if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) {
+ *(t++) = '\\';
+ *(t++) = '0' + (*f >> 6);
+ *(t++) = '0' + ((*f >> 3) & 8);
+ *(t++) = '0' + (*f & 8);
+ } else
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ return r;
+
+}
+
+static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
+ assert(bad);
+
+ for (; *s; s++) {
+ if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
+ *(t++) = '\\';
+ *(t++) = *s == '\n' ? 'n' : 't';
+ continue;
+ }
+
+ if (*s == '\\' || strchr(bad, *s))
+ *(t++) = '\\';
+
+ *(t++) = *s;
+ }
+
+ return t;
+}
+
+char *shell_escape(const char *s, const char *bad) {
+ char *r, *t;
+
+ r = new(char, strlen(s)*2+1);
+ if (!r)
+ return NULL;
+
+ t = strcpy_backslash_escaped(r, s, bad, false);
+ *t = 0;
+
+ return r;
+}
+
+char* shell_maybe_quote(const char *s, EscapeStyle style) {
+ const char *p;
+ char *r, *t;
+
+ assert(s);
+
+ /* Encloses a string in quotes if necessary to make it OK as a shell
+ * string. Note that we treat benign UTF-8 characters as needing
+ * escaping too, but that should be OK. */
+
+ for (p = s; *p; p++)
+ if (*p <= ' ' ||
+ *p >= 127 ||
+ strchr(SHELL_NEED_QUOTES, *p))
+ break;
+
+ if (!*p)
+ return strdup(s);
+
+ r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
+ if (!r)
+ return NULL;
+
+ t = r;
+ if (style == ESCAPE_BACKSLASH)
+ *(t++) = '"';
+ else if (style == ESCAPE_POSIX) {
+ *(t++) = '$';
+ *(t++) = '\'';
+ } else
+ assert_not_reached("Bad EscapeStyle");
+
+ t = mempcpy(t, s, p - s);
+
+ if (style == ESCAPE_BACKSLASH)
+ t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false);
+ else
+ t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
+
+ if (style == ESCAPE_BACKSLASH)
+ *(t++) = '"';
+ else
+ *(t++) = '\'';
+ *t = 0;
+
+ return r;
+}
diff --git a/src/basic/escape.h b/src/basic/escape.h
new file mode 100644
index 0000000..5156209
--- /dev/null
+++ b/src/basic/escape.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <uchar.h>
+
+#include "string-util.h"
+#include "missing_type.h"
+
+/* What characters are special in the shell? */
+/* must be escaped outside and inside double-quotes */
+#define SHELL_NEED_ESCAPE "\"\\`$"
+
+/* Those that can be escaped or double-quoted.
+ *
+ * Stricly speaking, ! does not need to be escaped, except in interactive
+ * mode, but let's be extra nice to the user and quote ! in case this
+ * output is ever used in interactive mode. */
+#define SHELL_NEED_QUOTES SHELL_NEED_ESCAPE GLOB_CHARS "'()<>|&;!"
+
+/* Note that we assume control characters would need to be escaped too in
+ * addition to the "special" characters listed here, if they appear in the
+ * string. Current users disallow control characters. Also '"' shall not
+ * be escaped.
+ */
+#define SHELL_NEED_ESCAPE_POSIX "\\\'"
+
+typedef enum UnescapeFlags {
+ UNESCAPE_RELAX = 1,
+} UnescapeFlags;
+
+typedef enum EscapeStyle {
+ ESCAPE_BACKSLASH = 1,
+ ESCAPE_POSIX = 2,
+} EscapeStyle;
+
+char *cescape(const char *s);
+char *cescape_length(const char *s, size_t n);
+int cescape_char(char c, char *buf);
+
+int cunescape(const char *s, UnescapeFlags flags, char **ret);
+int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret);
+int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret);
+int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit);
+
+char *xescape(const char *s, const char *bad);
+char *octescape(const char *s, size_t len);
+
+char *shell_escape(const char *s, const char *bad);
+char* shell_maybe_quote(const char *s, EscapeStyle style);
diff --git a/src/basic/ether-addr-util.c b/src/basic/ether-addr-util.c
new file mode 100644
index 0000000..e875696
--- /dev/null
+++ b/src/basic/ether-addr-util.c
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <net/ethernet.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "ether-addr-util.h"
+#include "macro.h"
+#include "string-util.h"
+
+char* ether_addr_to_string(const struct ether_addr *addr, char buffer[ETHER_ADDR_TO_STRING_MAX]) {
+ assert(addr);
+ assert(buffer);
+
+ /* Like ether_ntoa() but uses %02x instead of %x to print
+ * ethernet addresses, which makes them look less funny. Also,
+ * doesn't use a static buffer. */
+
+ sprintf(buffer, "%02x:%02x:%02x:%02x:%02x:%02x",
+ addr->ether_addr_octet[0],
+ addr->ether_addr_octet[1],
+ addr->ether_addr_octet[2],
+ addr->ether_addr_octet[3],
+ addr->ether_addr_octet[4],
+ addr->ether_addr_octet[5]);
+
+ return buffer;
+}
+
+int ether_addr_compare(const struct ether_addr *a, const struct ether_addr *b) {
+ return memcmp(a, b, ETH_ALEN);
+}
+
+static void ether_addr_hash_func(const struct ether_addr *p, struct siphash *state) {
+ siphash24_compress(p, sizeof(struct ether_addr), state);
+}
+
+DEFINE_HASH_OPS(ether_addr_hash_ops, struct ether_addr, ether_addr_hash_func, ether_addr_compare);
+
+int ether_addr_from_string(const char *s, struct ether_addr *ret) {
+ size_t pos = 0, n, field;
+ char sep = '\0';
+ const char *hex = HEXDIGITS, *hexoff;
+ size_t x;
+ bool touched;
+
+#define parse_fields(v) \
+ for (field = 0; field < ELEMENTSOF(v); field++) { \
+ touched = false; \
+ for (n = 0; n < (2 * sizeof(v[0])); n++) { \
+ if (s[pos] == '\0') \
+ break; \
+ hexoff = strchr(hex, s[pos]); \
+ if (!hexoff) \
+ break; \
+ assert(hexoff >= hex); \
+ x = hexoff - hex; \
+ if (x >= 16) \
+ x -= 6; /* A-F */ \
+ assert(x < 16); \
+ touched = true; \
+ v[field] <<= 4; \
+ v[field] += x; \
+ pos++; \
+ } \
+ if (!touched) \
+ return -EINVAL; \
+ if (field < (ELEMENTSOF(v)-1)) { \
+ if (s[pos] != sep) \
+ return -EINVAL; \
+ else \
+ pos++; \
+ } \
+ }
+
+ assert(s);
+ assert(ret);
+
+ s += strspn(s, WHITESPACE);
+ sep = s[strspn(s, hex)];
+
+ if (sep == '.') {
+ uint16_t shorts[3] = { 0 };
+
+ parse_fields(shorts);
+
+ if (s[pos] != '\0')
+ return -EINVAL;
+
+ for (n = 0; n < ELEMENTSOF(shorts); n++) {
+ ret->ether_addr_octet[2*n] = ((shorts[n] & (uint16_t)0xff00) >> 8);
+ ret->ether_addr_octet[2*n + 1] = (shorts[n] & (uint16_t)0x00ff);
+ }
+
+ } else if (IN_SET(sep, ':', '-')) {
+ struct ether_addr out = ETHER_ADDR_NULL;
+
+ parse_fields(out.ether_addr_octet);
+
+ if (s[pos] != '\0')
+ return -EINVAL;
+
+ for (n = 0; n < ELEMENTSOF(out.ether_addr_octet); n++)
+ ret->ether_addr_octet[n] = out.ether_addr_octet[n];
+
+ } else
+ return -EINVAL;
+
+ return 0;
+}
diff --git a/src/basic/ether-addr-util.h b/src/basic/ether-addr-util.h
new file mode 100644
index 0000000..4e44b30
--- /dev/null
+++ b/src/basic/ether-addr-util.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <net/ethernet.h>
+#include <stdbool.h>
+
+#include "hash-funcs.h"
+
+#define ETHER_ADDR_FORMAT_STR "%02X%02X%02X%02X%02X%02X"
+#define ETHER_ADDR_FORMAT_VAL(x) (x).ether_addr_octet[0], (x).ether_addr_octet[1], (x).ether_addr_octet[2], (x).ether_addr_octet[3], (x).ether_addr_octet[4], (x).ether_addr_octet[5]
+
+#define ETHER_ADDR_TO_STRING_MAX (3*6)
+char* ether_addr_to_string(const struct ether_addr *addr, char buffer[ETHER_ADDR_TO_STRING_MAX]);
+
+int ether_addr_compare(const struct ether_addr *a, const struct ether_addr *b);
+static inline bool ether_addr_equal(const struct ether_addr *a, const struct ether_addr *b) {
+ return ether_addr_compare(a, b) == 0;
+}
+
+#define ETHER_ADDR_NULL ((const struct ether_addr){})
+
+static inline bool ether_addr_is_null(const struct ether_addr *addr) {
+ return ether_addr_equal(addr, &ETHER_ADDR_NULL);
+}
+
+int ether_addr_from_string(const char *s, struct ether_addr *ret);
+
+extern const struct hash_ops ether_addr_hash_ops;
diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c
new file mode 100644
index 0000000..a861b56
--- /dev/null
+++ b/src/basic/extract-word.c
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "utf8.h"
+
+int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
+ _cleanup_free_ char *s = NULL;
+ size_t allocated = 0, sz = 0;
+ char c;
+ int r;
+
+ char quote = 0; /* 0 or ' or " */
+ bool backslash = false; /* whether we've just seen a backslash */
+
+ assert(p);
+ assert(ret);
+
+ /* Bail early if called after last value or with no input */
+ if (!*p)
+ goto finish;
+ c = **p;
+
+ if (!separators)
+ separators = WHITESPACE;
+
+ /* Parses the first word of a string, and returns it in
+ * *ret. Removes all quotes in the process. When parsing fails
+ * (because of an uneven number of quotes or similar), leaves
+ * the pointer *p at the first invalid character. */
+
+ if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
+ if (!GREEDY_REALLOC(s, allocated, sz+1))
+ return -ENOMEM;
+
+ for (;; (*p)++, c = **p) {
+ if (c == 0)
+ goto finish_force_terminate;
+ else if (strchr(separators, c)) {
+ if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
+ (*p)++;
+ goto finish_force_next;
+ }
+ } else {
+ /* We found a non-blank character, so we will always
+ * want to return a string (even if it is empty),
+ * allocate it here. */
+ if (!GREEDY_REALLOC(s, allocated, sz+1))
+ return -ENOMEM;
+ break;
+ }
+ }
+
+ for (;; (*p)++, c = **p) {
+ if (backslash) {
+ if (!GREEDY_REALLOC(s, allocated, sz+7))
+ return -ENOMEM;
+
+ if (c == 0) {
+ if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
+ (!quote || flags & EXTRACT_RELAX)) {
+ /* If we find an unquoted trailing backslash and we're in
+ * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
+ * output.
+ *
+ * Unbalanced quotes will only be allowed in EXTRACT_RELAX
+ * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
+ */
+ s[sz++] = '\\';
+ goto finish_force_terminate;
+ }
+ if (flags & EXTRACT_RELAX)
+ goto finish_force_terminate;
+ return -EINVAL;
+ }
+
+ if (flags & EXTRACT_CUNESCAPE) {
+ bool eight_bit = false;
+ char32_t u;
+
+ r = cunescape_one(*p, (size_t) -1, &u, &eight_bit);
+ if (r < 0) {
+ if (flags & EXTRACT_CUNESCAPE_RELAX) {
+ s[sz++] = '\\';
+ s[sz++] = c;
+ } else
+ return -EINVAL;
+ } else {
+ (*p) += r - 1;
+
+ if (eight_bit)
+ s[sz++] = u;
+ else
+ sz += utf8_encode_unichar(s + sz, u);
+ }
+ } else
+ s[sz++] = c;
+
+ backslash = false;
+
+ } else if (quote) { /* inside either single or double quotes */
+ for (;; (*p)++, c = **p) {
+ if (c == 0) {
+ if (flags & EXTRACT_RELAX)
+ goto finish_force_terminate;
+ return -EINVAL;
+ } else if (c == quote) { /* found the end quote */
+ quote = 0;
+ break;
+ } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
+ backslash = true;
+ break;
+ } else {
+ if (!GREEDY_REALLOC(s, allocated, sz+2))
+ return -ENOMEM;
+
+ s[sz++] = c;
+ }
+ }
+
+ } else {
+ for (;; (*p)++, c = **p) {
+ if (c == 0)
+ goto finish_force_terminate;
+ else if (IN_SET(c, '\'', '"') && (flags & EXTRACT_QUOTES)) {
+ quote = c;
+ break;
+ } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
+ backslash = true;
+ break;
+ } else if (strchr(separators, c)) {
+ if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
+ (*p)++;
+ goto finish_force_next;
+ }
+ /* Skip additional coalesced separators. */
+ for (;; (*p)++, c = **p) {
+ if (c == 0)
+ goto finish_force_terminate;
+ if (!strchr(separators, c))
+ break;
+ }
+ goto finish;
+
+ } else {
+ if (!GREEDY_REALLOC(s, allocated, sz+2))
+ return -ENOMEM;
+
+ s[sz++] = c;
+ }
+ }
+ }
+ }
+
+finish_force_terminate:
+ *p = NULL;
+finish:
+ if (!s) {
+ *p = NULL;
+ *ret = NULL;
+ return 0;
+ }
+
+finish_force_next:
+ s[sz] = 0;
+ *ret = TAKE_PTR(s);
+
+ return 1;
+}
+
+int extract_first_word_and_warn(
+ const char **p,
+ char **ret,
+ const char *separators,
+ ExtractFlags flags,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *rvalue) {
+
+ /* Try to unquote it, if it fails, warn about it and try again
+ * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
+ * backslashes verbatim in invalid escape sequences. */
+
+ const char *save;
+ int r;
+
+ save = *p;
+ r = extract_first_word(p, ret, separators, flags);
+ if (r >= 0)
+ return r;
+
+ if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
+
+ /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
+ *p = save;
+ r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
+ if (r >= 0) {
+ /* It worked this time, hence it must have been an invalid escape sequence. */
+ log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret);
+ return r;
+ }
+
+ /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
+ if (r == -EINVAL)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
+ }
+
+ /* Can be any error, report it */
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
+}
+
+/* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing
+ * an object that undergoes default argument promotion as an argument to va_start).
+ * Let's make sure that ExtractFlags fits into an unsigned int. */
+assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned));
+
+int extract_many_words(const char **p, const char *separators, unsigned flags, ...) {
+ va_list ap;
+ char **l;
+ int n = 0, i, c, r;
+
+ /* Parses a number of words from a string, stripping any
+ * quotes if necessary. */
+
+ assert(p);
+
+ /* Count how many words are expected */
+ va_start(ap, flags);
+ for (;;) {
+ if (!va_arg(ap, char **))
+ break;
+ n++;
+ }
+ va_end(ap);
+
+ if (n <= 0)
+ return 0;
+
+ /* Read all words into a temporary array */
+ l = newa0(char*, n);
+ for (c = 0; c < n; c++) {
+
+ r = extract_first_word(p, &l[c], separators, flags);
+ if (r < 0) {
+ int j;
+
+ for (j = 0; j < c; j++)
+ free(l[j]);
+
+ return r;
+ }
+
+ if (r == 0)
+ break;
+ }
+
+ /* If we managed to parse all words, return them in the passed
+ * in parameters */
+ va_start(ap, flags);
+ for (i = 0; i < n; i++) {
+ char **v;
+
+ v = va_arg(ap, char **);
+ assert(v);
+
+ *v = l[i];
+ }
+ va_end(ap);
+
+ return c;
+}
diff --git a/src/basic/extract-word.h b/src/basic/extract-word.h
new file mode 100644
index 0000000..705ebbe
--- /dev/null
+++ b/src/basic/extract-word.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "macro.h"
+
+typedef enum ExtractFlags {
+ EXTRACT_RELAX = 1 << 0,
+ EXTRACT_CUNESCAPE = 1 << 1,
+ EXTRACT_CUNESCAPE_RELAX = 1 << 2,
+ EXTRACT_QUOTES = 1 << 3,
+ EXTRACT_DONT_COALESCE_SEPARATORS = 1 << 4,
+ EXTRACT_RETAIN_ESCAPE = 1 << 5,
+} ExtractFlags;
+
+int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags);
+int extract_first_word_and_warn(const char **p, char **ret, const char *separators, ExtractFlags flags, const char *unit, const char *filename, unsigned line, const char *rvalue);
+int extract_many_words(const char **p, const char *separators, unsigned flags, ...) _sentinel_;
diff --git a/src/basic/fd-util.c b/src/basic/fd-util.c
new file mode 100644
index 0000000..3e6ef5a
--- /dev/null
+++ b/src/basic/fd-util.c
@@ -0,0 +1,967 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "memfd-util.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "util.h"
+#include "tmpfile-util.h"
+
+int close_nointr(int fd) {
+ assert(fd >= 0);
+
+ if (close(fd) >= 0)
+ return 0;
+
+ /*
+ * Just ignore EINTR; a retry loop is the wrong thing to do on
+ * Linux.
+ *
+ * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
+ * https://bugzilla.gnome.org/show_bug.cgi?id=682819
+ * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
+ * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
+ */
+ if (errno == EINTR)
+ return 0;
+
+ return -errno;
+}
+
+int safe_close(int fd) {
+
+ /*
+ * Like close_nointr() but cannot fail. Guarantees errno is
+ * unchanged. Is a NOP with negative fds passed, and returns
+ * -1, so that it can be used in this syntax:
+ *
+ * fd = safe_close(fd);
+ */
+
+ if (fd >= 0) {
+ PROTECT_ERRNO;
+
+ /* The kernel might return pretty much any error code
+ * via close(), but the fd will be closed anyway. The
+ * only condition we want to check for here is whether
+ * the fd was invalid at all... */
+
+ assert_se(close_nointr(fd) != -EBADF);
+ }
+
+ return -1;
+}
+
+void safe_close_pair(int p[static 2]) {
+ assert(p);
+
+ if (p[0] == p[1]) {
+ /* Special case pairs which use the same fd in both
+ * directions... */
+ p[0] = p[1] = safe_close(p[0]);
+ return;
+ }
+
+ p[0] = safe_close(p[0]);
+ p[1] = safe_close(p[1]);
+}
+
+void close_many(const int fds[], size_t n_fd) {
+ size_t i;
+
+ assert(fds || n_fd <= 0);
+
+ for (i = 0; i < n_fd; i++)
+ safe_close(fds[i]);
+}
+
+int fclose_nointr(FILE *f) {
+ assert(f);
+
+ /* Same as close_nointr(), but for fclose() */
+
+ if (fclose(f) == 0)
+ return 0;
+
+ if (errno == EINTR)
+ return 0;
+
+ return -errno;
+}
+
+FILE* safe_fclose(FILE *f) {
+
+ /* Same as safe_close(), but for fclose() */
+
+ if (f) {
+ PROTECT_ERRNO;
+
+ assert_se(fclose_nointr(f) != -EBADF);
+ }
+
+ return NULL;
+}
+
+DIR* safe_closedir(DIR *d) {
+
+ if (d) {
+ PROTECT_ERRNO;
+
+ assert_se(closedir(d) >= 0 || errno != EBADF);
+ }
+
+ return NULL;
+}
+
+int fd_nonblock(int fd, bool nonblock) {
+ int flags, nflags;
+
+ assert(fd >= 0);
+
+ flags = fcntl(fd, F_GETFL, 0);
+ if (flags < 0)
+ return -errno;
+
+ if (nonblock)
+ nflags = flags | O_NONBLOCK;
+ else
+ nflags = flags & ~O_NONBLOCK;
+
+ if (nflags == flags)
+ return 0;
+
+ if (fcntl(fd, F_SETFL, nflags) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int fd_cloexec(int fd, bool cloexec) {
+ int flags, nflags;
+
+ assert(fd >= 0);
+
+ flags = fcntl(fd, F_GETFD, 0);
+ if (flags < 0)
+ return -errno;
+
+ if (cloexec)
+ nflags = flags | FD_CLOEXEC;
+ else
+ nflags = flags & ~FD_CLOEXEC;
+
+ if (nflags == flags)
+ return 0;
+
+ if (fcntl(fd, F_SETFD, nflags) < 0)
+ return -errno;
+
+ return 0;
+}
+
+_pure_ static bool fd_in_set(int fd, const int fdset[], size_t n_fdset) {
+ size_t i;
+
+ assert(n_fdset == 0 || fdset);
+
+ for (i = 0; i < n_fdset; i++)
+ if (fdset[i] == fd)
+ return true;
+
+ return false;
+}
+
+static int get_max_fd(void) {
+ struct rlimit rl;
+ rlim_t m;
+
+ /* Return the highest possible fd, based RLIMIT_NOFILE, but enforcing FD_SETSIZE-1 as lower boundary
+ * and INT_MAX as upper boundary. */
+
+ if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
+ return -errno;
+
+ m = MAX(rl.rlim_cur, rl.rlim_max);
+ if (m < FD_SETSIZE) /* Let's always cover at least 1024 fds */
+ return FD_SETSIZE-1;
+
+ if (m == RLIM_INFINITY || m > INT_MAX) /* Saturate on overflow. After all fds are "int", hence can
+ * never be above INT_MAX */
+ return INT_MAX;
+
+ return (int) (m - 1);
+}
+
+int close_all_fds(const int except[], size_t n_except) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(n_except == 0 || except);
+
+ d = opendir("/proc/self/fd");
+ if (!d) {
+ int fd, max_fd;
+
+ /* When /proc isn't available (for example in chroots) the fallback is brute forcing through
+ * the fd table */
+
+ max_fd = get_max_fd();
+ if (max_fd < 0)
+ return max_fd;
+
+ for (fd = 3; fd >= 0; fd = fd < max_fd ? fd + 1 : -1) {
+ int q;
+
+ if (fd_in_set(fd, except, n_except))
+ continue;
+
+ q = close_nointr(fd);
+ if (q < 0 && q != -EBADF && r >= 0)
+ r = q;
+ }
+
+ return r;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ int fd = -1, q;
+
+ if (safe_atoi(de->d_name, &fd) < 0)
+ /* Let's better ignore this, just in case */
+ continue;
+
+ if (fd < 3)
+ continue;
+
+ if (fd == dirfd(d))
+ continue;
+
+ if (fd_in_set(fd, except, n_except))
+ continue;
+
+ q = close_nointr(fd);
+ if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
+ r = q;
+ }
+
+ return r;
+}
+
+int same_fd(int a, int b) {
+ struct stat sta, stb;
+ pid_t pid;
+ int r, fa, fb;
+
+ assert(a >= 0);
+ assert(b >= 0);
+
+ /* Compares two file descriptors. Note that semantics are
+ * quite different depending on whether we have kcmp() or we
+ * don't. If we have kcmp() this will only return true for
+ * dup()ed file descriptors, but not otherwise. If we don't
+ * have kcmp() this will also return true for two fds of the same
+ * file, created by separate open() calls. Since we use this
+ * call mostly for filtering out duplicates in the fd store
+ * this difference hopefully doesn't matter too much. */
+
+ if (a == b)
+ return true;
+
+ /* Try to use kcmp() if we have it. */
+ pid = getpid_cached();
+ r = kcmp(pid, pid, KCMP_FILE, a, b);
+ if (r == 0)
+ return true;
+ if (r > 0)
+ return false;
+ if (!IN_SET(errno, ENOSYS, EACCES, EPERM))
+ return -errno;
+
+ /* We don't have kcmp(), use fstat() instead. */
+ if (fstat(a, &sta) < 0)
+ return -errno;
+
+ if (fstat(b, &stb) < 0)
+ return -errno;
+
+ if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
+ return false;
+
+ /* We consider all device fds different, since two device fds
+ * might refer to quite different device contexts even though
+ * they share the same inode and backing dev_t. */
+
+ if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
+ return false;
+
+ if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
+ return false;
+
+ /* The fds refer to the same inode on disk, let's also check
+ * if they have the same fd flags. This is useful to
+ * distinguish the read and write side of a pipe created with
+ * pipe(). */
+ fa = fcntl(a, F_GETFL);
+ if (fa < 0)
+ return -errno;
+
+ fb = fcntl(b, F_GETFL);
+ if (fb < 0)
+ return -errno;
+
+ return fa == fb;
+}
+
+void cmsg_close_all(struct msghdr *mh) {
+ struct cmsghdr *cmsg;
+
+ assert(mh);
+
+ CMSG_FOREACH(cmsg, mh)
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
+ close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
+}
+
+bool fdname_is_valid(const char *s) {
+ const char *p;
+
+ /* Validates a name for $LISTEN_FDNAMES. We basically allow
+ * everything ASCII that's not a control character. Also, as
+ * special exception the ":" character is not allowed, as we
+ * use that as field separator in $LISTEN_FDNAMES.
+ *
+ * Note that the empty string is explicitly allowed
+ * here. However, we limit the length of the names to 255
+ * characters. */
+
+ if (!s)
+ return false;
+
+ for (p = s; *p; p++) {
+ if (*p < ' ')
+ return false;
+ if (*p >= 127)
+ return false;
+ if (*p == ':')
+ return false;
+ }
+
+ return p - s < 256;
+}
+
+int fd_get_path(int fd, char **ret) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ int r;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ r = readlink_malloc(procfs_path, ret);
+ if (r == -ENOENT) {
+ /* ENOENT can mean two things: that the fd does not exist or that /proc is not mounted. Let's make
+ * things debuggable and distuingish the two. */
+
+ if (access("/proc/self/fd/", F_OK) < 0)
+ /* /proc is not available or not set up properly, we're most likely in some chroot
+ * environment. */
+ return errno == ENOENT ? -EOPNOTSUPP : -errno;
+
+ return -EBADF; /* The directory exists, hence it's the fd that doesn't. */
+ }
+
+ return r;
+}
+
+int move_fd(int from, int to, int cloexec) {
+ int r;
+
+ /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
+ * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
+ * off, if it is > 0 it is turned on. */
+
+ if (from < 0)
+ return -EBADF;
+ if (to < 0)
+ return -EBADF;
+
+ if (from == to) {
+
+ if (cloexec >= 0) {
+ r = fd_cloexec(to, cloexec);
+ if (r < 0)
+ return r;
+ }
+
+ return to;
+ }
+
+ if (cloexec < 0) {
+ int fl;
+
+ fl = fcntl(from, F_GETFD, 0);
+ if (fl < 0)
+ return -errno;
+
+ cloexec = !!(fl & FD_CLOEXEC);
+ }
+
+ r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
+ if (r < 0)
+ return -errno;
+
+ assert(r == to);
+
+ safe_close(from);
+
+ return to;
+}
+
+int acquire_data_fd(const void *data, size_t size, unsigned flags) {
+
+ _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
+ char pattern[] = "/dev/shm/data-fd-XXXXXX";
+ _cleanup_close_ int fd = -1;
+ int isz = 0, r;
+ ssize_t n;
+ off_t f;
+
+ assert(data || size == 0);
+
+ /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
+ * complex than I wish it was. But here's why:
+ *
+ * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
+ * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
+ *
+ * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
+ * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
+ * clients can only bump their size to a system-wide limit, which might be quite low.
+ *
+ * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
+ * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
+ * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
+ *
+ * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
+ *
+ * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
+ * figure. */
+
+ if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
+ /* As a special case, return /dev/null if we have been called for an empty data block */
+ r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (r < 0)
+ return -errno;
+
+ return r;
+ }
+
+ if ((flags & ACQUIRE_NO_MEMFD) == 0) {
+ fd = memfd_new("data-fd");
+ if (fd < 0)
+ goto try_pipe;
+
+ n = write(fd, data, size);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n != size)
+ return -EIO;
+
+ f = lseek(fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ r = memfd_set_sealed(fd);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(fd);
+ }
+
+try_pipe:
+ if ((flags & ACQUIRE_NO_PIPE) == 0) {
+ if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
+ return -errno;
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ if ((size_t) isz < size) {
+ isz = (int) size;
+ if (isz < 0 || (size_t) isz != size)
+ return -E2BIG;
+
+ /* Try to bump the pipe size */
+ (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
+
+ /* See if that worked */
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ if ((size_t) isz < size)
+ goto try_dev_shm;
+ }
+
+ n = write(pipefds[1], data, size);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n != size)
+ return -EIO;
+
+ (void) fd_nonblock(pipefds[0], false);
+
+ return TAKE_FD(pipefds[0]);
+ }
+
+try_dev_shm:
+ if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
+ fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
+ if (fd < 0)
+ goto try_dev_shm_without_o_tmpfile;
+
+ n = write(fd, data, size);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n != size)
+ return -EIO;
+
+ /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
+ return fd_reopen(fd, O_RDONLY|O_CLOEXEC);
+ }
+
+try_dev_shm_without_o_tmpfile:
+ if ((flags & ACQUIRE_NO_REGULAR) == 0) {
+ fd = mkostemp_safe(pattern);
+ if (fd < 0)
+ return fd;
+
+ n = write(fd, data, size);
+ if (n < 0) {
+ r = -errno;
+ goto unlink_and_return;
+ }
+ if ((size_t) n != size) {
+ r = -EIO;
+ goto unlink_and_return;
+ }
+
+ /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
+ r = open(pattern, O_RDONLY|O_CLOEXEC);
+ if (r < 0)
+ r = -errno;
+
+ unlink_and_return:
+ (void) unlink(pattern);
+ return r;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+/* When the data is smaller or equal to 64K, try to place the copy in a memfd/pipe */
+#define DATA_FD_MEMORY_LIMIT (64U*1024U)
+
+/* If memfd/pipe didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp instead. */
+#define DATA_FD_TMP_LIMIT (1024U*1024U)
+
+int fd_duplicate_data_fd(int fd) {
+
+ _cleanup_close_ int copy_fd = -1, tmp_fd = -1;
+ _cleanup_free_ void *remains = NULL;
+ size_t remains_size = 0;
+ const char *td;
+ struct stat st;
+ int r;
+
+ /* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only fashion, but
+ * independent of it (i.e. the source fd can be closed and unmounted after this call succeeded). Tries to be
+ * somewhat smart about where to place the data. In the best case uses a memfd(). If memfd() are not supported
+ * uses a pipe instead. For larger data will use an unlinked file in /tmp, and for even larger data one in
+ * /var/tmp. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* For now, let's only accept regular files, sockets, pipes and char devices */
+ if (S_ISDIR(st.st_mode))
+ return -EISDIR;
+ if (S_ISLNK(st.st_mode))
+ return -ELOOP;
+ if (!S_ISREG(st.st_mode) && !S_ISSOCK(st.st_mode) && !S_ISFIFO(st.st_mode) && !S_ISCHR(st.st_mode))
+ return -EBADFD;
+
+ /* If we have reason to believe the data is bounded in size, then let's use memfds or pipes as backing fd. Note
+ * that we use the reported regular file size only as a hint, given that there are plenty special files in
+ * /proc and /sys which report a zero file size but can be read from. */
+
+ if (!S_ISREG(st.st_mode) || st.st_size < DATA_FD_MEMORY_LIMIT) {
+
+ /* Try a memfd first */
+ copy_fd = memfd_new("data-fd");
+ if (copy_fd >= 0) {
+ off_t f;
+
+ r = copy_bytes(fd, copy_fd, DATA_FD_MEMORY_LIMIT, 0);
+ if (r < 0)
+ return r;
+
+ f = lseek(copy_fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ if (r == 0) {
+ /* Did it fit into the limit? If so, we are done. */
+ r = memfd_set_sealed(copy_fd);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(copy_fd);
+ }
+
+ /* Hmm, pity, this didn't fit. Let's fall back to /tmp then, see below */
+
+ } else {
+ _cleanup_(close_pairp) int pipefds[2] = { -1, -1 };
+ int isz;
+
+ /* If memfds aren't available, use a pipe. Set O_NONBLOCK so that we will get EAGAIN rather
+ * then block indefinitely when we hit the pipe size limit */
+
+ if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
+ return -errno;
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ /* Try to enlarge the pipe size if necessary */
+ if ((size_t) isz < DATA_FD_MEMORY_LIMIT) {
+
+ (void) fcntl(pipefds[1], F_SETPIPE_SZ, DATA_FD_MEMORY_LIMIT);
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+ }
+
+ if ((size_t) isz >= DATA_FD_MEMORY_LIMIT) {
+
+ r = copy_bytes_full(fd, pipefds[1], DATA_FD_MEMORY_LIMIT, 0, &remains, &remains_size, NULL, NULL);
+ if (r < 0 && r != -EAGAIN)
+ return r; /* If we get EAGAIN it could be because of the source or because of
+ * the destination fd, we can't know, as sendfile() and friends won't
+ * tell us. Hence, treat this as reason to fall back, just to be
+ * sure. */
+ if (r == 0) {
+ /* Everything fit in, yay! */
+ (void) fd_nonblock(pipefds[0], false);
+
+ return TAKE_FD(pipefds[0]);
+ }
+
+ /* Things didn't fit in. But we read data into the pipe, let's remember that, so that
+ * when writing the new file we incorporate this first. */
+ copy_fd = TAKE_FD(pipefds[0]);
+ }
+ }
+ }
+
+ /* If we have reason to believe this will fit fine in /tmp, then use that as first fallback. */
+ if ((!S_ISREG(st.st_mode) || st.st_size < DATA_FD_TMP_LIMIT) &&
+ (DATA_FD_MEMORY_LIMIT + remains_size) < DATA_FD_TMP_LIMIT) {
+ off_t f;
+
+ tmp_fd = open_tmpfile_unlinkable(NULL /* NULL as directory means /tmp */, O_RDWR|O_CLOEXEC);
+ if (tmp_fd < 0)
+ return tmp_fd;
+
+ if (copy_fd >= 0) {
+ /* If we tried a memfd/pipe first and it ended up being too large, then copy this into the
+ * temporary file first. */
+
+ r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, 0);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ }
+
+ if (remains_size > 0) {
+ /* If there were remaining bytes (i.e. read into memory, but not written out yet) from the
+ * failed copy operation, let's flush them out next. */
+
+ r = loop_write(tmp_fd, remains, remains_size, false);
+ if (r < 0)
+ return r;
+ }
+
+ r = copy_bytes(fd, tmp_fd, DATA_FD_TMP_LIMIT - DATA_FD_MEMORY_LIMIT - remains_size, COPY_REFLINK);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ goto finish; /* Yay, it fit in */
+
+ /* It didn't fit in. Let's not forget to use what we already used */
+ f = lseek(tmp_fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ safe_close(copy_fd);
+ copy_fd = TAKE_FD(tmp_fd);
+
+ remains = mfree(remains);
+ remains_size = 0;
+ }
+
+ /* As last fallback use /var/tmp */
+ r = var_tmp_dir(&td);
+ if (r < 0)
+ return r;
+
+ tmp_fd = open_tmpfile_unlinkable(td, O_RDWR|O_CLOEXEC);
+ if (tmp_fd < 0)
+ return tmp_fd;
+
+ if (copy_fd >= 0) {
+ /* If we tried a memfd/pipe first, or a file in /tmp, and it ended up being too large, than copy this
+ * into the temporary file first. */
+ r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ }
+
+ if (remains_size > 0) {
+ /* Then, copy in any read but not yet written bytes. */
+ r = loop_write(tmp_fd, remains, remains_size, false);
+ if (r < 0)
+ return r;
+ }
+
+ /* Copy in the rest */
+ r = copy_bytes(fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+
+finish:
+ /* Now convert the O_RDWR file descriptor into an O_RDONLY one (and as side effect seek to the beginning of the
+ * file again */
+
+ return fd_reopen(tmp_fd, O_RDONLY|O_CLOEXEC);
+}
+
+int fd_move_above_stdio(int fd) {
+ int flags, copy;
+ PROTECT_ERRNO;
+
+ /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
+ * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
+ * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
+ * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
+ * stdin/stdout/stderr of unrelated code.
+ *
+ * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
+ * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
+ * been closed before.
+ *
+ * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
+ * error we simply return the original file descriptor, and we do not touch errno. */
+
+ if (fd < 0 || fd > 2)
+ return fd;
+
+ flags = fcntl(fd, F_GETFD, 0);
+ if (flags < 0)
+ return fd;
+
+ if (flags & FD_CLOEXEC)
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ else
+ copy = fcntl(fd, F_DUPFD, 3);
+ if (copy < 0)
+ return fd;
+
+ assert(copy > 2);
+
+ (void) close(fd);
+ return copy;
+}
+
+int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {
+
+ int fd[3] = { /* Put together an array of fds we work on */
+ original_input_fd,
+ original_output_fd,
+ original_error_fd
+ };
+
+ int r, i,
+ null_fd = -1, /* if we open /dev/null, we store the fd to it here */
+ copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
+ bool null_readable, null_writable;
+
+ /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
+ * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
+ * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
+ * on.
+ *
+ * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
+ * failure! Thus, callers should assume that when this function returns the input fds are invalidated.
+ *
+ * Note that when this function fails stdin/stdout/stderr might remain half set up!
+ *
+ * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
+ * stdin/stdout/stderr). */
+
+ null_readable = original_input_fd < 0;
+ null_writable = original_output_fd < 0 || original_error_fd < 0;
+
+ /* First step, open /dev/null once, if we need it */
+ if (null_readable || null_writable) {
+
+ /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
+ null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
+ null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC);
+ if (null_fd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ /* If this fd is in the 0…2 range, let's move it out of it */
+ if (null_fd < 3) {
+ int copy;
+
+ copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
+ if (copy < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ safe_close(null_fd);
+ null_fd = copy;
+ }
+ }
+
+ /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
+ for (i = 0; i < 3; i++) {
+
+ if (fd[i] < 0)
+ fd[i] = null_fd; /* A negative parameter means: connect this one to /dev/null */
+ else if (fd[i] != i && fd[i] < 3) {
+ /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
+ copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
+ if (copy_fd[i] < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ fd[i] = copy_fd[i];
+ }
+ }
+
+ /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
+ * have freedom to move them around. If the fds already were at the right places then the specific fds are
+ * -1. Let's now move them to the right places. This is the point of no return. */
+ for (i = 0; i < 3; i++) {
+
+ if (fd[i] == i) {
+
+ /* fd is already in place, but let's make sure O_CLOEXEC is off */
+ r = fd_cloexec(i, false);
+ if (r < 0)
+ goto finish;
+
+ } else {
+ assert(fd[i] > 2);
+
+ if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
+ r = -errno;
+ goto finish;
+ }
+ }
+ }
+
+ r = 0;
+
+finish:
+ /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
+ * fd passed in multiple times. */
+ safe_close_above_stdio(original_input_fd);
+ if (original_output_fd != original_input_fd)
+ safe_close_above_stdio(original_output_fd);
+ if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
+ safe_close_above_stdio(original_error_fd);
+
+ /* Close the copies we moved > 2 */
+ for (i = 0; i < 3; i++)
+ safe_close(copy_fd[i]);
+
+ /* Close our null fd, if it's > 2 */
+ safe_close_above_stdio(null_fd);
+
+ return r;
+}
+
+int fd_reopen(int fd, int flags) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ int new_fd;
+
+ /* Reopens the specified fd with new flags. This is useful for convert an O_PATH fd into a regular one, or to
+ * turn O_RDWR fds into O_RDONLY fds.
+ *
+ * This doesn't work on sockets (since they cannot be open()ed, ever).
+ *
+ * This implicitly resets the file read index to 0. */
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ new_fd = open(procfs_path, flags);
+ if (new_fd < 0)
+ return -errno;
+
+ return new_fd;
+}
+
+int read_nr_open(void) {
+ _cleanup_free_ char *nr_open = NULL;
+ int r;
+
+ /* Returns the kernel's current fd limit, either by reading it of /proc/sys if that works, or using the
+ * hard-coded default compiled-in value of current kernels (1M) if not. This call will never fail. */
+
+ r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /proc/sys/fs/nr_open, ignoring: %m");
+ else {
+ int v;
+
+ r = safe_atoi(nr_open, &v);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse /proc/sys/fs/nr_open value '%s', ignoring: %m", nr_open);
+ else
+ return v;
+ }
+
+ /* If we fail, fallback to the hard-coded kernel limit of 1024 * 1024. */
+ return 1024 * 1024;
+}
diff --git a/src/basic/fd-util.h b/src/basic/fd-util.h
new file mode 100644
index 0000000..4085a24
--- /dev/null
+++ b/src/basic/fd-util.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/socket.h>
+
+#include "macro.h"
+
+/* Make sure we can distinguish fd 0 and NULL */
+#define FD_TO_PTR(fd) INT_TO_PTR((fd)+1)
+#define PTR_TO_FD(p) (PTR_TO_INT(p)-1)
+
+int close_nointr(int fd);
+int safe_close(int fd);
+void safe_close_pair(int p[static 2]);
+
+static inline int safe_close_above_stdio(int fd) {
+ if (fd < 3) /* Don't close stdin/stdout/stderr, but still invalidate the fd by returning -1 */
+ return -1;
+
+ return safe_close(fd);
+}
+
+void close_many(const int fds[], size_t n_fd);
+
+int fclose_nointr(FILE *f);
+FILE* safe_fclose(FILE *f);
+DIR* safe_closedir(DIR *f);
+
+static inline void closep(int *fd) {
+ safe_close(*fd);
+}
+
+static inline void close_pairp(int (*p)[2]) {
+ safe_close_pair(*p);
+}
+
+static inline void fclosep(FILE **f) {
+ safe_fclose(*f);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, pclose);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DIR*, closedir);
+
+#define _cleanup_close_ _cleanup_(closep)
+#define _cleanup_fclose_ _cleanup_(fclosep)
+#define _cleanup_pclose_ _cleanup_(pclosep)
+#define _cleanup_closedir_ _cleanup_(closedirp)
+#define _cleanup_close_pair_ _cleanup_(close_pairp)
+
+int fd_nonblock(int fd, bool nonblock);
+int fd_cloexec(int fd, bool cloexec);
+
+int close_all_fds(const int except[], size_t n_except);
+
+int same_fd(int a, int b);
+
+void cmsg_close_all(struct msghdr *mh);
+
+bool fdname_is_valid(const char *s);
+
+int fd_get_path(int fd, char **ret);
+
+int move_fd(int from, int to, int cloexec);
+
+enum {
+ ACQUIRE_NO_DEV_NULL = 1 << 0,
+ ACQUIRE_NO_MEMFD = 1 << 1,
+ ACQUIRE_NO_PIPE = 1 << 2,
+ ACQUIRE_NO_TMPFILE = 1 << 3,
+ ACQUIRE_NO_REGULAR = 1 << 4,
+};
+
+int acquire_data_fd(const void *data, size_t size, unsigned flags);
+
+int fd_duplicate_data_fd(int fd);
+
+/* Hint: ENETUNREACH happens if we try to connect to "non-existing" special IP addresses, such as ::5 */
+/* The kernel sends e.g., EHOSTUNREACH or ENONET to userspace in some ICMP error cases.
+ * See the icmp_err_convert[] in net/ipv4/icmp.c in the kernel sources */
+#define ERRNO_IS_DISCONNECT(r) \
+ IN_SET(r, \
+ ENOTCONN, ECONNRESET, ECONNREFUSED, ECONNABORTED, EPIPE, \
+ ENETUNREACH, EHOSTUNREACH, ENOPROTOOPT, EHOSTDOWN, ENONET)
+
+/* Resource exhaustion, could be our fault or general system trouble */
+#define ERRNO_IS_RESOURCE(r) \
+ IN_SET(r, ENOMEM, EMFILE, ENFILE)
+
+int fd_move_above_stdio(int fd);
+
+int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd);
+
+static inline int make_null_stdio(void) {
+ return rearrange_stdio(-1, -1, -1);
+}
+
+/* Like TAKE_PTR() but for file descriptors, resetting them to -1 */
+#define TAKE_FD(fd) \
+ ({ \
+ int _fd_ = (fd); \
+ (fd) = -1; \
+ _fd_; \
+ })
+
+int fd_reopen(int fd, int flags);
+
+int read_nr_open(void);
diff --git a/src/basic/fileio.c b/src/basic/fileio.c
new file mode 100644
index 0000000..e18b842
--- /dev/null
+++ b/src/basic/fileio.c
@@ -0,0 +1,822 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+
+#define READ_FULL_BYTES_MAX (4U*1024U*1024U)
+
+int write_string_stream_ts(
+ FILE *f,
+ const char *line,
+ WriteStringFileFlags flags,
+ struct timespec *ts) {
+
+ bool needs_nl;
+ int r;
+
+ assert(f);
+ assert(line);
+
+ if (ferror(f))
+ return -EIO;
+
+ needs_nl = !(flags & WRITE_STRING_FILE_AVOID_NEWLINE) && !endswith(line, "\n");
+
+ if (needs_nl && (flags & WRITE_STRING_FILE_DISABLE_BUFFER)) {
+ /* If STDIO buffering was disabled, then let's append the newline character to the string itself, so
+ * that the write goes out in one go, instead of two */
+
+ line = strjoina(line, "\n");
+ needs_nl = false;
+ }
+
+ if (fputs(line, f) == EOF)
+ return -errno;
+
+ if (needs_nl)
+ if (fputc('\n', f) == EOF)
+ return -errno;
+
+ if (flags & WRITE_STRING_FILE_SYNC)
+ r = fflush_sync_and_check(f);
+ else
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ if (ts) {
+ struct timespec twice[2] = {*ts, *ts};
+
+ if (futimens(fileno(f), twice) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int write_string_file_atomic(
+ const char *fn,
+ const char *line,
+ WriteStringFileFlags flags,
+ struct timespec *ts) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(fn);
+ assert(line);
+
+ r = fopen_temporary(fn, &f, &p);
+ if (r < 0)
+ return r;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod_umask(fileno(f), 0644);
+
+ r = write_string_stream_ts(f, line, flags, ts);
+ if (r < 0)
+ goto fail;
+
+ if (rename(p, fn) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(p);
+ return r;
+}
+
+int write_string_file_ts(
+ const char *fn,
+ const char *line,
+ WriteStringFileFlags flags,
+ struct timespec *ts) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ int q, r;
+
+ assert(fn);
+ assert(line);
+
+ /* We don't know how to verify whether the file contents was already on-disk. */
+ assert(!((flags & WRITE_STRING_FILE_VERIFY_ON_FAILURE) && (flags & WRITE_STRING_FILE_SYNC)));
+
+ if (flags & WRITE_STRING_FILE_ATOMIC) {
+ assert(flags & WRITE_STRING_FILE_CREATE);
+
+ r = write_string_file_atomic(fn, line, flags, ts);
+ if (r < 0)
+ goto fail;
+
+ return r;
+ } else
+ assert(!ts);
+
+ if (flags & WRITE_STRING_FILE_CREATE) {
+ f = fopen(fn, "we");
+ if (!f) {
+ r = -errno;
+ goto fail;
+ }
+ } else {
+ int fd;
+
+ /* We manually build our own version of fopen(..., "we") that
+ * works without O_CREAT */
+ fd = open(fn, O_WRONLY|O_CLOEXEC|O_NOCTTY | ((flags & WRITE_STRING_FILE_NOFOLLOW) ? O_NOFOLLOW : 0));
+ if (fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ f = fdopen(fd, "w");
+ if (!f) {
+ r = -errno;
+ safe_close(fd);
+ goto fail;
+ }
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ if (flags & WRITE_STRING_FILE_DISABLE_BUFFER)
+ setvbuf(f, NULL, _IONBF, 0);
+
+ r = write_string_stream_ts(f, line, flags, ts);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ if (!(flags & WRITE_STRING_FILE_VERIFY_ON_FAILURE))
+ return r;
+
+ f = safe_fclose(f);
+
+ /* OK, the operation failed, but let's see if the right
+ * contents in place already. If so, eat up the error. */
+
+ q = verify_file(fn, line, !(flags & WRITE_STRING_FILE_AVOID_NEWLINE));
+ if (q <= 0)
+ return r;
+
+ return 0;
+}
+
+int write_string_filef(
+ const char *fn,
+ WriteStringFileFlags flags,
+ const char *format, ...) {
+
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return write_string_file(fn, p, flags);
+}
+
+int read_one_line_file(const char *fn, char **line) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(fn);
+ assert(line);
+
+ f = fopen(fn, "re");
+ if (!f)
+ return -errno;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ r = read_line(f, LONG_LINE_MAX, line);
+ return r < 0 ? r : 0;
+}
+
+int verify_file(const char *fn, const char *blob, bool accept_extra_nl) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *buf = NULL;
+ size_t l, k;
+
+ assert(fn);
+ assert(blob);
+
+ l = strlen(blob);
+
+ if (accept_extra_nl && endswith(blob, "\n"))
+ accept_extra_nl = false;
+
+ buf = malloc(l + accept_extra_nl + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ f = fopen(fn, "re");
+ if (!f)
+ return -errno;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ /* We try to read one byte more than we need, so that we know whether we hit eof */
+ errno = 0;
+ k = fread(buf, 1, l + accept_extra_nl + 1, f);
+ if (ferror(f))
+ return errno > 0 ? -errno : -EIO;
+
+ if (k != l && k != l + accept_extra_nl)
+ return 0;
+ if (memcmp(buf, blob, l) != 0)
+ return 0;
+ if (k > l && buf[l] != '\n')
+ return 0;
+
+ return 1;
+}
+
+int read_full_stream(
+ FILE *f,
+ char **ret_contents,
+ size_t *ret_size) {
+
+ _cleanup_free_ char *buf = NULL;
+ struct stat st;
+ size_t n, l;
+ int fd;
+
+ assert(f);
+ assert(ret_contents);
+
+ n = LINE_MAX; /* Start size */
+
+ fd = fileno(f);
+ if (fd >= 0) { /* If the FILE* object is backed by an fd (as opposed to memory or such, see fmemopen(), let's
+ * optimize our buffering) */
+
+ if (fstat(fileno(f), &st) < 0)
+ return -errno;
+
+ if (S_ISREG(st.st_mode)) {
+
+ /* Safety check */
+ if (st.st_size > READ_FULL_BYTES_MAX)
+ return -E2BIG;
+
+ /* Start with the right file size, but be prepared for files from /proc which generally report a file
+ * size of 0. Note that we increase the size to read here by one, so that the first read attempt
+ * already makes us notice the EOF. */
+ if (st.st_size > 0)
+ n = st.st_size + 1;
+ }
+ }
+
+ l = 0;
+ for (;;) {
+ char *t;
+ size_t k;
+
+ t = realloc(buf, n + 1);
+ if (!t)
+ return -ENOMEM;
+
+ buf = t;
+ errno = 0;
+ k = fread(buf + l, 1, n - l, f);
+ if (k > 0)
+ l += k;
+
+ if (ferror(f))
+ return errno > 0 ? -errno : -EIO;
+
+ if (feof(f))
+ break;
+
+ /* We aren't expecting fread() to return a short read outside
+ * of (error && eof), assert buffer is full and enlarge buffer.
+ */
+ assert(l == n);
+
+ /* Safety check */
+ if (n >= READ_FULL_BYTES_MAX)
+ return -E2BIG;
+
+ n = MIN(n * 2, READ_FULL_BYTES_MAX);
+ }
+
+ if (!ret_size) {
+ /* Safety check: if the caller doesn't want to know the size of what we just read it will rely on the
+ * trailing NUL byte. But if there's an embedded NUL byte, then we should refuse operation as otherwise
+ * there'd be ambiguity about what we just read. */
+
+ if (memchr(buf, 0, l))
+ return -EBADMSG;
+ }
+
+ buf[l] = 0;
+ *ret_contents = TAKE_PTR(buf);
+
+ if (ret_size)
+ *ret_size = l;
+
+ return 0;
+}
+
+int read_full_file(const char *fn, char **contents, size_t *size) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ assert(fn);
+ assert(contents);
+
+ f = fopen(fn, "re");
+ if (!f)
+ return -errno;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ return read_full_stream(f, contents, size);
+}
+
+int executable_is_script(const char *path, char **interpreter) {
+ _cleanup_free_ char *line = NULL;
+ size_t len;
+ char *ans;
+ int r;
+
+ assert(path);
+
+ r = read_one_line_file(path, &line);
+ if (r == -ENOBUFS) /* First line overly long? if so, then it's not a script */
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (!startswith(line, "#!"))
+ return 0;
+
+ ans = strstrip(line + 2);
+ len = strcspn(ans, " \t");
+
+ if (len == 0)
+ return 0;
+
+ ans = strndup(ans, len);
+ if (!ans)
+ return -ENOMEM;
+
+ *interpreter = ans;
+ return 1;
+}
+
+/**
+ * Retrieve one field from a file like /proc/self/status. pattern
+ * should not include whitespace or the delimiter (':'). pattern matches only
+ * the beginning of a line. Whitespace before ':' is skipped. Whitespace and
+ * zeros after the ':' will be skipped. field must be freed afterwards.
+ * terminator specifies the terminating characters of the field value (not
+ * included in the value).
+ */
+int get_proc_field(const char *filename, const char *pattern, const char *terminator, char **field) {
+ _cleanup_free_ char *status = NULL;
+ char *t, *f;
+ size_t len;
+ int r;
+
+ assert(terminator);
+ assert(filename);
+ assert(pattern);
+ assert(field);
+
+ r = read_full_file(filename, &status, NULL);
+ if (r < 0)
+ return r;
+
+ t = status;
+
+ do {
+ bool pattern_ok;
+
+ do {
+ t = strstr(t, pattern);
+ if (!t)
+ return -ENOENT;
+
+ /* Check that pattern occurs in beginning of line. */
+ pattern_ok = (t == status || t[-1] == '\n');
+
+ t += strlen(pattern);
+
+ } while (!pattern_ok);
+
+ t += strspn(t, " \t");
+ if (!*t)
+ return -ENOENT;
+
+ } while (*t != ':');
+
+ t++;
+
+ if (*t) {
+ t += strspn(t, " \t");
+
+ /* Also skip zeros, because when this is used for
+ * capabilities, we don't want the zeros. This way the
+ * same capability set always maps to the same string,
+ * irrespective of the total capability set size. For
+ * other numbers it shouldn't matter. */
+ t += strspn(t, "0");
+ /* Back off one char if there's nothing but whitespace
+ and zeros */
+ if (!*t || isspace(*t))
+ t--;
+ }
+
+ len = strcspn(t, terminator);
+
+ f = strndup(t, len);
+ if (!f)
+ return -ENOMEM;
+
+ *field = f;
+ return 0;
+}
+
+DIR *xopendirat(int fd, const char *name, int flags) {
+ int nfd;
+ DIR *d;
+
+ assert(!(flags & O_CREAT));
+
+ nfd = openat(fd, name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|flags, 0);
+ if (nfd < 0)
+ return NULL;
+
+ d = fdopendir(nfd);
+ if (!d) {
+ safe_close(nfd);
+ return NULL;
+ }
+
+ return d;
+}
+
+static int search_and_fopen_internal(const char *path, const char *mode, const char *root, char **search, FILE **_f) {
+ char **i;
+
+ assert(path);
+ assert(mode);
+ assert(_f);
+
+ if (!path_strv_resolve_uniq(search, root))
+ return -ENOMEM;
+
+ STRV_FOREACH(i, search) {
+ _cleanup_free_ char *p = NULL;
+ FILE *f;
+
+ if (root)
+ p = strjoin(root, *i, "/", path);
+ else
+ p = strjoin(*i, "/", path);
+ if (!p)
+ return -ENOMEM;
+
+ f = fopen(p, mode);
+ if (f) {
+ *_f = f;
+ return 0;
+ }
+
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ return -ENOENT;
+}
+
+int search_and_fopen(const char *path, const char *mode, const char *root, const char **search, FILE **_f) {
+ _cleanup_strv_free_ char **copy = NULL;
+
+ assert(path);
+ assert(mode);
+ assert(_f);
+
+ if (path_is_absolute(path)) {
+ FILE *f;
+
+ f = fopen(path, mode);
+ if (f) {
+ *_f = f;
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ copy = strv_copy((char**) search);
+ if (!copy)
+ return -ENOMEM;
+
+ return search_and_fopen_internal(path, mode, root, copy, _f);
+}
+
+int search_and_fopen_nulstr(const char *path, const char *mode, const char *root, const char *search, FILE **_f) {
+ _cleanup_strv_free_ char **s = NULL;
+
+ if (path_is_absolute(path)) {
+ FILE *f;
+
+ f = fopen(path, mode);
+ if (f) {
+ *_f = f;
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ s = strv_split_nulstr(search);
+ if (!s)
+ return -ENOMEM;
+
+ return search_and_fopen_internal(path, mode, root, s, _f);
+}
+
+int fflush_and_check(FILE *f) {
+ assert(f);
+
+ errno = 0;
+ fflush(f);
+
+ if (ferror(f))
+ return errno > 0 ? -errno : -EIO;
+
+ return 0;
+}
+
+int fflush_sync_and_check(FILE *f) {
+ int r;
+
+ assert(f);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ if (fsync(fileno(f)) < 0)
+ return -errno;
+
+ r = fsync_directory_of_file(fileno(f));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int write_timestamp_file_atomic(const char *fn, usec_t n) {
+ char ln[DECIMAL_STR_MAX(n)+2];
+
+ /* Creates a "timestamp" file, that contains nothing but a
+ * usec_t timestamp, formatted in ASCII. */
+
+ if (n <= 0 || n >= USEC_INFINITY)
+ return -ERANGE;
+
+ xsprintf(ln, USEC_FMT "\n", n);
+
+ return write_string_file(fn, ln, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC);
+}
+
+int read_timestamp_file(const char *fn, usec_t *ret) {
+ _cleanup_free_ char *ln = NULL;
+ uint64_t t;
+ int r;
+
+ r = read_one_line_file(fn, &ln);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(ln, &t);
+ if (r < 0)
+ return r;
+
+ if (t <= 0 || t >= (uint64_t) USEC_INFINITY)
+ return -ERANGE;
+
+ *ret = (usec_t) t;
+ return 0;
+}
+
+int fputs_with_space(FILE *f, const char *s, const char *separator, bool *space) {
+ int r;
+
+ assert(s);
+
+ /* Outputs the specified string with fputs(), but optionally prefixes it with a separator. The *space parameter
+ * when specified shall initially point to a boolean variable initialized to false. It is set to true after the
+ * first invocation. This call is supposed to be use in loops, where a separator shall be inserted between each
+ * element, but not before the first one. */
+
+ if (!f)
+ f = stdout;
+
+ if (space) {
+ if (!separator)
+ separator = " ";
+
+ if (*space) {
+ r = fputs(separator, f);
+ if (r < 0)
+ return r;
+ }
+
+ *space = true;
+ }
+
+ return fputs(s, f);
+}
+
+/* A bitmask of the EOL markers we know */
+typedef enum EndOfLineMarker {
+ EOL_NONE = 0,
+ EOL_ZERO = 1 << 0, /* \0 (aka NUL) */
+ EOL_TEN = 1 << 1, /* \n (aka NL, aka LF) */
+ EOL_THIRTEEN = 1 << 2, /* \r (aka CR) */
+} EndOfLineMarker;
+
+static EndOfLineMarker categorize_eol(char c, ReadLineFlags flags) {
+
+ if (!IN_SET(flags, READ_LINE_ONLY_NUL)) {
+ if (c == '\n')
+ return EOL_TEN;
+ if (c == '\r')
+ return EOL_THIRTEEN;
+ }
+
+ if (c == '\0')
+ return EOL_ZERO;
+
+ return EOL_NONE;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, funlockfile);
+
+int read_line_full(FILE *f, size_t limit, ReadLineFlags flags, char **ret) {
+ size_t n = 0, allocated = 0, count = 0;
+ _cleanup_free_ char *buffer = NULL;
+ int r;
+
+ assert(f);
+
+ /* Something like a bounded version of getline().
+ *
+ * Considers EOF, \n, \r and \0 end of line delimiters (or combinations of these), and does not include these
+ * delimiters in the string returned. Specifically, recognizes the following combinations of markers as line
+ * endings:
+ *
+ * • \n (UNIX)
+ * • \r (old MacOS)
+ * • \0 (C strings)
+ * • \n\0
+ * • \r\0
+ * • \r\n (Windows)
+ * • \n\r
+ * • \r\n\0
+ * • \n\r\0
+ *
+ * Returns the number of bytes read from the files (i.e. including delimiters — this hence usually differs from
+ * the number of characters in the returned string). When EOF is hit, 0 is returned.
+ *
+ * The input parameter limit is the maximum numbers of characters in the returned string, i.e. excluding
+ * delimiters. If the limit is hit we fail and return -ENOBUFS.
+ *
+ * If a line shall be skipped ret may be initialized as NULL. */
+
+ if (ret) {
+ if (!GREEDY_REALLOC(buffer, allocated, 1))
+ return -ENOMEM;
+ }
+
+ {
+ _unused_ _cleanup_(funlockfilep) FILE *flocked = f;
+ EndOfLineMarker previous_eol = EOL_NONE;
+ flockfile(f);
+
+ for (;;) {
+ EndOfLineMarker eol;
+ char c;
+
+ if (n >= limit)
+ return -ENOBUFS;
+
+ if (count >= INT_MAX) /* We couldn't return the counter anymore as "int", hence refuse this */
+ return -ENOBUFS;
+
+ r = safe_fgetc(f, &c);
+ if (r < 0)
+ return r;
+ if (r == 0) /* EOF is definitely EOL */
+ break;
+
+ eol = categorize_eol(c, flags);
+
+ if (FLAGS_SET(previous_eol, EOL_ZERO) ||
+ (eol == EOL_NONE && previous_eol != EOL_NONE) ||
+ (eol != EOL_NONE && (previous_eol & eol) != 0)) {
+ /* Previous char was a NUL? This is not an EOL, but the previous char was? This type of
+ * EOL marker has been seen right before? In either of these three cases we are
+ * done. But first, let's put this character back in the queue. (Note that we have to
+ * cast this to (unsigned char) here as ungetc() expects a positive 'int', and if we
+ * are on an architecture where 'char' equals 'signed char' we need to ensure we don't
+ * pass a negative value here. That said, to complicate things further ungetc() is
+ * actually happy with most negative characters and implicitly casts them back to
+ * positive ones as needed, except for \xff (aka -1, aka EOF), which it refuses. What a
+ * godawful API!) */
+ assert_se(ungetc((unsigned char) c, f) != EOF);
+ break;
+ }
+
+ count++;
+
+ if (eol != EOL_NONE) {
+ previous_eol |= eol;
+ continue;
+ }
+
+ if (ret) {
+ if (!GREEDY_REALLOC(buffer, allocated, n + 2))
+ return -ENOMEM;
+
+ buffer[n] = c;
+ }
+
+ n++;
+ }
+ }
+
+ if (ret) {
+ buffer[n] = 0;
+
+ *ret = TAKE_PTR(buffer);
+ }
+
+ return (int) count;
+}
+
+int safe_fgetc(FILE *f, char *ret) {
+ int k;
+
+ assert(f);
+
+ /* A safer version of plain fgetc(): let's propagate the error that happened while reading as such, and
+ * separate the EOF condition from the byte read, to avoid those confusion signed/unsigned issues fgetc()
+ * has. */
+
+ errno = 0;
+ k = fgetc(f);
+ if (k == EOF) {
+ if (ferror(f))
+ return errno > 0 ? -errno : -EIO;
+
+ if (ret)
+ *ret = 0;
+
+ return 0;
+ }
+
+ if (ret)
+ *ret = k;
+
+ return 1;
+}
diff --git a/src/basic/fileio.h b/src/basic/fileio.h
new file mode 100644
index 0000000..53e3f4e
--- /dev/null
+++ b/src/basic/fileio.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <dirent.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "time-util.h"
+
+#define LONG_LINE_MAX (1U*1024U*1024U)
+
+typedef enum {
+ WRITE_STRING_FILE_CREATE = 1 << 0,
+ WRITE_STRING_FILE_ATOMIC = 1 << 1,
+ WRITE_STRING_FILE_AVOID_NEWLINE = 1 << 2,
+ WRITE_STRING_FILE_VERIFY_ON_FAILURE = 1 << 3,
+ WRITE_STRING_FILE_SYNC = 1 << 4,
+ WRITE_STRING_FILE_DISABLE_BUFFER = 1 << 5,
+ WRITE_STRING_FILE_NOFOLLOW = 1 << 6,
+
+ /* And before you wonder, why write_string_file_atomic_label_ts() is a separate function instead of just one
+ more flag here: it's about linking: we don't want to pull -lselinux into all users of write_string_file()
+ and friends. */
+
+} WriteStringFileFlags;
+
+int write_string_stream_ts(FILE *f, const char *line, WriteStringFileFlags flags, struct timespec *ts);
+static inline int write_string_stream(FILE *f, const char *line, WriteStringFileFlags flags) {
+ return write_string_stream_ts(f, line, flags, NULL);
+}
+int write_string_file_ts(const char *fn, const char *line, WriteStringFileFlags flags, struct timespec *ts);
+static inline int write_string_file(const char *fn, const char *line, WriteStringFileFlags flags) {
+ return write_string_file_ts(fn, line, flags, NULL);
+}
+
+int write_string_filef(const char *fn, WriteStringFileFlags flags, const char *format, ...) _printf_(3, 4);
+
+int read_one_line_file(const char *fn, char **line);
+int read_full_file(const char *fn, char **contents, size_t *size);
+int read_full_stream(FILE *f, char **contents, size_t *size);
+
+int verify_file(const char *fn, const char *blob, bool accept_extra_nl);
+
+int executable_is_script(const char *path, char **interpreter);
+
+int get_proc_field(const char *filename, const char *pattern, const char *terminator, char **field);
+
+DIR *xopendirat(int dirfd, const char *name, int flags);
+
+int search_and_fopen(const char *path, const char *mode, const char *root, const char **search, FILE **_f);
+int search_and_fopen_nulstr(const char *path, const char *mode, const char *root, const char *search, FILE **_f);
+
+int fflush_and_check(FILE *f);
+int fflush_sync_and_check(FILE *f);
+
+int write_timestamp_file_atomic(const char *fn, usec_t n);
+int read_timestamp_file(const char *fn, usec_t *ret);
+
+int fputs_with_space(FILE *f, const char *s, const char *separator, bool *space);
+
+typedef enum ReadLineFlags {
+ READ_LINE_ONLY_NUL = 1 << 0,
+} ReadLineFlags;
+
+int read_line_full(FILE *f, size_t limit, ReadLineFlags flags, char **ret);
+
+static inline int read_line(FILE *f, size_t limit, char **ret) {
+ return read_line_full(f, limit, 0, ret);
+}
+
+static inline int read_nul_string(FILE *f, size_t limit, char **ret) {
+ return read_line_full(f, limit, READ_LINE_ONLY_NUL, ret);
+}
+
+int safe_fgetc(FILE *f, char *ret);
diff --git a/src/basic/format-util.h b/src/basic/format-util.h
new file mode 100644
index 0000000..dece5d3
--- /dev/null
+++ b/src/basic/format-util.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+
+#if SIZEOF_PID_T == 4
+# define PID_PRI PRIi32
+#elif SIZEOF_PID_T == 2
+# define PID_PRI PRIi16
+#else
+# error Unknown pid_t size
+#endif
+#define PID_FMT "%" PID_PRI
+
+#if SIZEOF_UID_T == 4
+# define UID_FMT "%" PRIu32
+#elif SIZEOF_UID_T == 2
+# define UID_FMT "%" PRIu16
+#else
+# error Unknown uid_t size
+#endif
+
+#if SIZEOF_GID_T == 4
+# define GID_FMT "%" PRIu32
+#elif SIZEOF_GID_T == 2
+# define GID_FMT "%" PRIu16
+#else
+# error Unknown gid_t size
+#endif
+
+#if SIZEOF_TIME_T == 8
+# define PRI_TIME PRIi64
+#elif SIZEOF_TIME_T == 4
+# define PRI_TIME "li"
+#else
+# error Unknown time_t size
+#endif
+
+#if defined __x86_64__ && defined __ILP32__
+# define PRI_TIMEX PRIi64
+#else
+# define PRI_TIMEX "li"
+#endif
+
+#if SIZEOF_RLIM_T == 8
+# define RLIM_FMT "%" PRIu64
+#elif SIZEOF_RLIM_T == 4
+# define RLIM_FMT "%" PRIu32
+#else
+# error Unknown rlim_t size
+#endif
+
+#if SIZEOF_DEV_T == 8
+# define DEV_FMT "%" PRIu64
+#elif SIZEOF_DEV_T == 4
+# define DEV_FMT "%" PRIu32
+#else
+# error Unknown dev_t size
+#endif
+
+#if SIZEOF_INO_T == 8
+# define INO_FMT "%" PRIu64
+#elif SIZEOF_INO_T == 4
+# define INO_FMT "%" PRIu32
+#else
+# error Unknown ino_t size
+#endif
diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c
new file mode 100644
index 0000000..f25bf2c
--- /dev/null
+++ b/src/basic/fs-util.c
@@ -0,0 +1,1358 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <linux/magic.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int unlink_noerrno(const char *path) {
+ PROTECT_ERRNO;
+ int r;
+
+ r = unlink(path);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int rmdir_parents(const char *path, const char *stop) {
+ size_t l;
+ int r = 0;
+
+ assert(path);
+ assert(stop);
+
+ l = strlen(path);
+
+ /* Skip trailing slashes */
+ while (l > 0 && path[l-1] == '/')
+ l--;
+
+ while (l > 0) {
+ char *t;
+
+ /* Skip last component */
+ while (l > 0 && path[l-1] != '/')
+ l--;
+
+ /* Skip trailing slashes */
+ while (l > 0 && path[l-1] == '/')
+ l--;
+
+ if (l <= 0)
+ break;
+
+ t = strndup(path, l);
+ if (!t)
+ return -ENOMEM;
+
+ if (path_startswith(stop, t)) {
+ free(t);
+ return 0;
+ }
+
+ r = rmdir(t);
+ free(t);
+
+ if (r < 0)
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) {
+ int r;
+
+ /* Try the ideal approach first */
+ if (renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_NOREPLACE) >= 0)
+ return 0;
+
+ /* renameat2() exists since Linux 3.15, btrfs and FAT added support for it later. If it is not implemented,
+ * fall back to a different method. */
+ if (!IN_SET(errno, EINVAL, ENOSYS, ENOTTY))
+ return -errno;
+
+ /* Let's try to use linkat()+unlinkat() as fallback. This doesn't work on directories and on some file systems
+ * that do not support hard links (such as FAT, most prominently), but for files it's pretty close to what we
+ * want — though not atomic (i.e. for a short period both the new and the old filename will exist). */
+ if (linkat(olddirfd, oldpath, newdirfd, newpath, 0) >= 0) {
+
+ if (unlinkat(olddirfd, oldpath, 0) < 0) {
+ r = -errno; /* Backup errno before the following unlinkat() alters it */
+ (void) unlinkat(newdirfd, newpath, 0);
+ return r;
+ }
+
+ return 0;
+ }
+
+ if (!IN_SET(errno, EINVAL, ENOSYS, ENOTTY, EPERM)) /* FAT returns EPERM on link()… */
+ return -errno;
+
+ /* OK, neither RENAME_NOREPLACE nor linkat()+unlinkat() worked. Let's then fallback to the racy TOCTOU
+ * vulnerable accessat(F_OK) check followed by classic, replacing renameat(), we have nothing better. */
+
+ if (faccessat(newdirfd, newpath, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
+ return -EEXIST;
+ if (errno != ENOENT)
+ return -errno;
+
+ if (renameat(olddirfd, oldpath, newdirfd, newpath) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int readlinkat_malloc(int fd, const char *p, char **ret) {
+ size_t l = FILENAME_MAX+1;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ for (;;) {
+ char *c;
+ ssize_t n;
+
+ c = new(char, l);
+ if (!c)
+ return -ENOMEM;
+
+ n = readlinkat(fd, p, c, l-1);
+ if (n < 0) {
+ r = -errno;
+ free(c);
+ return r;
+ }
+
+ if ((size_t) n < l-1) {
+ c[n] = 0;
+ *ret = c;
+ return 0;
+ }
+
+ free(c);
+ l *= 2;
+ }
+}
+
+int readlink_malloc(const char *p, char **ret) {
+ return readlinkat_malloc(AT_FDCWD, p, ret);
+}
+
+int readlink_value(const char *p, char **ret) {
+ _cleanup_free_ char *link = NULL;
+ char *value;
+ int r;
+
+ r = readlink_malloc(p, &link);
+ if (r < 0)
+ return r;
+
+ value = basename(link);
+ if (!value)
+ return -ENOENT;
+
+ value = strdup(value);
+ if (!value)
+ return -ENOMEM;
+
+ *ret = value;
+
+ return 0;
+}
+
+int readlink_and_make_absolute(const char *p, char **r) {
+ _cleanup_free_ char *target = NULL;
+ char *k;
+ int j;
+
+ assert(p);
+ assert(r);
+
+ j = readlink_malloc(p, &target);
+ if (j < 0)
+ return j;
+
+ k = file_in_same_dir(p, target);
+ if (!k)
+ return -ENOMEM;
+
+ *r = k;
+ return 0;
+}
+
+int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid) {
+ char fd_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ _cleanup_close_ int fd = -1;
+ assert(path);
+
+ /* Under the assumption that we are running privileged we first change the access mode and only then hand out
+ * ownership to avoid a window where access is too open. */
+
+ fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW); /* Let's acquire an O_PATH fd, as precaution to change mode/owner
+ * on the same file */
+ if (fd < 0)
+ return -errno;
+
+ xsprintf(fd_path, "/proc/self/fd/%i", fd);
+
+ if (mode != MODE_INVALID) {
+
+ if ((mode & S_IFMT) != 0) {
+ struct stat st;
+
+ if (stat(fd_path, &st) < 0)
+ return -errno;
+
+ if ((mode & S_IFMT) != (st.st_mode & S_IFMT))
+ return -EINVAL;
+ }
+
+ if (chmod(fd_path, mode & 07777) < 0)
+ return -errno;
+ }
+
+ if (uid != UID_INVALID || gid != GID_INVALID)
+ if (chown(fd_path, uid, gid) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid) {
+ /* Under the assumption that we are running privileged we first change the access mode and only then hand out
+ * ownership to avoid a window where access is too open. */
+
+ if (mode != MODE_INVALID) {
+
+ if ((mode & S_IFMT) != 0) {
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if ((mode & S_IFMT) != (st.st_mode & S_IFMT))
+ return -EINVAL;
+ }
+
+ if (fchmod(fd, mode & 0777) < 0)
+ return -errno;
+ }
+
+ if (uid != UID_INVALID || gid != GID_INVALID)
+ if (fchown(fd, uid, gid) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int fchmod_umask(int fd, mode_t m) {
+ mode_t u;
+ int r;
+
+ u = umask(0777);
+ r = fchmod(fd, m & (~u)) < 0 ? -errno : 0;
+ umask(u);
+
+ return r;
+}
+
+int fchmod_opath(int fd, mode_t m) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+
+ /* This function operates also on fd that might have been opened with
+ * O_PATH. Indeed fchmodat() doesn't have the AT_EMPTY_PATH flag like
+ * fchownat() does. */
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ if (chmod(procfs_path, m) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int fd_warn_permissions(const char *path, int fd) {
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (st.st_mode & 0111)
+ log_warning("Configuration file %s is marked executable. Please remove executable permission bits. Proceeding anyway.", path);
+
+ if (st.st_mode & 0002)
+ log_warning("Configuration file %s is marked world-writable. Please remove world writability permission bits. Proceeding anyway.", path);
+
+ if (getpid_cached() == 1 && (st.st_mode & 0044) != 0044)
+ log_warning("Configuration file %s is marked world-inaccessible. This has no effect as configuration data is accessible via APIs without restrictions. Proceeding anyway.", path);
+
+ return 0;
+}
+
+int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode) {
+ char fdpath[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ _cleanup_close_ int fd = -1;
+ int r, ret = 0;
+
+ assert(path);
+
+ /* Note that touch_file() does not follow symlinks: if invoked on an existing symlink, then it is the symlink
+ * itself which is updated, not its target
+ *
+ * Returns the first error we encounter, but tries to apply as much as possible. */
+
+ if (parents)
+ (void) mkdir_parents(path, 0755);
+
+ /* Initially, we try to open the node with O_PATH, so that we get a reference to the node. This is useful in
+ * case the path refers to an existing device or socket node, as we can open it successfully in all cases, and
+ * won't trigger any driver magic or so. */
+ fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* if the node doesn't exist yet, we create it, but with O_EXCL, so that we only create a regular file
+ * here, and nothing else */
+ fd = open(path, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, IN_SET(mode, 0, MODE_INVALID) ? 0644 : mode);
+ if (fd < 0)
+ return -errno;
+ }
+
+ /* Let's make a path from the fd, and operate on that. With this logic, we can adjust the access mode,
+ * ownership and time of the file node in all cases, even if the fd refers to an O_PATH object — which is
+ * something fchown(), fchmod(), futimensat() don't allow. */
+ xsprintf(fdpath, "/proc/self/fd/%i", fd);
+
+ if (mode != MODE_INVALID)
+ if (chmod(fdpath, mode) < 0)
+ ret = -errno;
+
+ if (uid_is_valid(uid) || gid_is_valid(gid))
+ if (chown(fdpath, uid, gid) < 0 && ret >= 0)
+ ret = -errno;
+
+ if (stamp != USEC_INFINITY) {
+ struct timespec ts[2];
+
+ timespec_store(&ts[0], stamp);
+ ts[1] = ts[0];
+ r = utimensat(AT_FDCWD, fdpath, ts, 0);
+ } else
+ r = utimensat(AT_FDCWD, fdpath, NULL, 0);
+ if (r < 0 && ret >= 0)
+ return -errno;
+
+ return ret;
+}
+
+int touch(const char *path) {
+ return touch_file(path, false, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
+}
+
+int symlink_idempotent(const char *from, const char *to, bool make_relative) {
+ _cleanup_free_ char *relpath = NULL;
+ int r;
+
+ assert(from);
+ assert(to);
+
+ if (make_relative) {
+ _cleanup_free_ char *parent = NULL;
+
+ parent = dirname_malloc(to);
+ if (!parent)
+ return -ENOMEM;
+
+ r = path_make_relative(parent, from, &relpath);
+ if (r < 0)
+ return r;
+
+ from = relpath;
+ }
+
+ if (symlink(from, to) < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ if (errno != EEXIST)
+ return -errno;
+
+ r = readlink_malloc(to, &p);
+ if (r == -EINVAL) /* Not a symlink? In that case return the original error we encountered: -EEXIST */
+ return -EEXIST;
+ if (r < 0) /* Any other error? In that case propagate it as is */
+ return r;
+
+ if (!streq(p, from)) /* Not the symlink we want it to be? In that case, propagate the original -EEXIST */
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+int symlink_atomic(const char *from, const char *to) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(from);
+ assert(to);
+
+ r = tempfn_random(to, NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (symlink(from, t) < 0)
+ return -errno;
+
+ if (rename(t, to) < 0) {
+ unlink_noerrno(t);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int mknod_atomic(const char *path, mode_t mode, dev_t dev) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(path);
+
+ r = tempfn_random(path, NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (mknod(t, mode, dev) < 0)
+ return -errno;
+
+ if (rename(t, path) < 0) {
+ unlink_noerrno(t);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int mkfifo_atomic(const char *path, mode_t mode) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(path);
+
+ r = tempfn_random(path, NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (mkfifo(t, mode) < 0)
+ return -errno;
+
+ if (rename(t, path) < 0) {
+ unlink_noerrno(t);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int mkfifoat_atomic(int dirfd, const char *path, mode_t mode) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(path);
+
+ if (path_is_absolute(path))
+ return mkfifo_atomic(path, mode);
+
+ /* We're only interested in the (random) filename. */
+ r = tempfn_random_child("", NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (mkfifoat(dirfd, t, mode) < 0)
+ return -errno;
+
+ if (renameat(dirfd, t, dirfd, path) < 0) {
+ unlink_noerrno(t);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int get_files_in_directory(const char *path, char ***list) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ size_t bufsize = 0, n = 0;
+ _cleanup_strv_free_ char **l = NULL;
+
+ assert(path);
+
+ /* Returns all files in a directory in *list, and the number
+ * of files as return value. If list is NULL returns only the
+ * number. */
+
+ d = opendir(path);
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ dirent_ensure_type(d, de);
+
+ if (!dirent_is_file(de))
+ continue;
+
+ if (list) {
+ /* one extra slot is needed for the terminating NULL */
+ if (!GREEDY_REALLOC(l, bufsize, n + 2))
+ return -ENOMEM;
+
+ l[n] = strdup(de->d_name);
+ if (!l[n])
+ return -ENOMEM;
+
+ l[++n] = NULL;
+ } else
+ n++;
+ }
+
+ if (list)
+ *list = TAKE_PTR(l);
+
+ return n;
+}
+
+static int getenv_tmp_dir(const char **ret_path) {
+ const char *n;
+ int r, ret = 0;
+
+ assert(ret_path);
+
+ /* We use the same order of environment variables python uses in tempfile.gettempdir():
+ * https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir */
+ FOREACH_STRING(n, "TMPDIR", "TEMP", "TMP") {
+ const char *e;
+
+ e = secure_getenv(n);
+ if (!e)
+ continue;
+ if (!path_is_absolute(e)) {
+ r = -ENOTDIR;
+ goto next;
+ }
+ if (!path_is_normalized(e)) {
+ r = -EPERM;
+ goto next;
+ }
+
+ r = is_dir(e, true);
+ if (r < 0)
+ goto next;
+ if (r == 0) {
+ r = -ENOTDIR;
+ goto next;
+ }
+
+ *ret_path = e;
+ return 1;
+
+ next:
+ /* Remember first error, to make this more debuggable */
+ if (ret >= 0)
+ ret = r;
+ }
+
+ if (ret < 0)
+ return ret;
+
+ *ret_path = NULL;
+ return ret;
+}
+
+static int tmp_dir_internal(const char *def, const char **ret) {
+ const char *e;
+ int r, k;
+
+ assert(def);
+ assert(ret);
+
+ r = getenv_tmp_dir(&e);
+ if (r > 0) {
+ *ret = e;
+ return 0;
+ }
+
+ k = is_dir(def, true);
+ if (k == 0)
+ k = -ENOTDIR;
+ if (k < 0)
+ return r < 0 ? r : k;
+
+ *ret = def;
+ return 0;
+}
+
+int var_tmp_dir(const char **ret) {
+
+ /* Returns the location for "larger" temporary files, that is backed by physical storage if available, and thus
+ * even might survive a boot: /var/tmp. If $TMPDIR (or related environment variables) are set, its value is
+ * returned preferably however. Note that both this function and tmp_dir() below are affected by $TMPDIR,
+ * making it a variable that overrides all temporary file storage locations. */
+
+ return tmp_dir_internal("/var/tmp", ret);
+}
+
+int tmp_dir(const char **ret) {
+
+ /* Similar to var_tmp_dir() above, but returns the location for "smaller" temporary files, which is usually
+ * backed by an in-memory file system: /tmp. */
+
+ return tmp_dir_internal("/tmp", ret);
+}
+
+int unlink_or_warn(const char *filename) {
+ if (unlink(filename) < 0 && errno != ENOENT)
+ /* If the file doesn't exist and the fs simply was read-only (in which
+ * case unlink() returns EROFS even if the file doesn't exist), don't
+ * complain */
+ if (errno != EROFS || access(filename, F_OK) >= 0)
+ return log_error_errno(errno, "Failed to remove \"%s\": %m", filename);
+
+ return 0;
+}
+
+int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
+ char path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ int r;
+
+ /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
+ xsprintf(path, "/proc/self/fd/%i", what);
+
+ r = inotify_add_watch(fd, path, mask);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+static bool unsafe_transition(const struct stat *a, const struct stat *b) {
+ /* Returns true if the transition from a to b is safe, i.e. that we never transition from unprivileged to
+ * privileged files or directories. Why bother? So that unprivileged code can't symlink to privileged files
+ * making us believe we read something safe even though it isn't safe in the specific context we open it in. */
+
+ if (a->st_uid == 0) /* Transitioning from privileged to unprivileged is always fine */
+ return false;
+
+ return a->st_uid != b->st_uid; /* Otherwise we need to stay within the same UID */
+}
+
+static int log_unsafe_transition(int a, int b, const char *path, unsigned flags) {
+ _cleanup_free_ char *n1 = NULL, *n2 = NULL;
+
+ if (!FLAGS_SET(flags, CHASE_WARN))
+ return -ENOLINK;
+
+ (void) fd_get_path(a, &n1);
+ (void) fd_get_path(b, &n2);
+
+ return log_warning_errno(SYNTHETIC_ERRNO(ENOLINK),
+ "Detected unsafe path transition %s %s %s during canonicalization of %s.",
+ n1, special_glyph(SPECIAL_GLYPH_ARROW), n2, path);
+}
+
+static int log_autofs_mount_point(int fd, const char *path, unsigned flags) {
+ _cleanup_free_ char *n1 = NULL;
+
+ if (!FLAGS_SET(flags, CHASE_WARN))
+ return -EREMOTE;
+
+ (void) fd_get_path(fd, &n1);
+
+ return log_warning_errno(SYNTHETIC_ERRNO(EREMOTE),
+ "Detected autofs mount point %s during canonicalization of %s.",
+ n1, path);
+}
+
+int chase_symlinks(const char *path, const char *original_root, unsigned flags, char **ret) {
+ _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL;
+ _cleanup_close_ int fd = -1;
+ unsigned max_follow = CHASE_SYMLINKS_MAX; /* how many symlinks to follow before giving up and returning ELOOP */
+ struct stat previous_stat;
+ bool exists = true;
+ char *todo;
+ int r;
+
+ assert(path);
+
+ /* Either the file may be missing, or we return an fd to the final object, but both make no sense */
+ if (FLAGS_SET(flags, CHASE_NONEXISTENT | CHASE_OPEN))
+ return -EINVAL;
+
+ if (FLAGS_SET(flags, CHASE_STEP | CHASE_OPEN))
+ return -EINVAL;
+
+ if (isempty(path))
+ return -EINVAL;
+
+ /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following
+ * symlinks relative to a root directory, instead of the root of the host.
+ *
+ * Note that "root" primarily matters if we encounter an absolute symlink. It is also used when following
+ * relative symlinks to ensure they cannot be used to "escape" the root directory. The path parameter passed is
+ * assumed to be already prefixed by it, except if the CHASE_PREFIX_ROOT flag is set, in which case it is first
+ * prefixed accordingly.
+ *
+ * Algorithmically this operates on two path buffers: "done" are the components of the path we already
+ * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to
+ * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning
+ * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no
+ * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races
+ * at a minimum.
+ *
+ * Suggested usage: whenever you want to canonicalize a path, use this function. Pass the absolute path you got
+ * as-is: fully qualified and relative to your host's root. Optionally, specify the root parameter to tell this
+ * function what to do when encountering a symlink with an absolute path as directory: prefix it by the
+ * specified path.
+ *
+ * There are three ways to invoke this function:
+ *
+ * 1. Without CHASE_STEP or CHASE_OPEN: in this case the path is resolved and the normalized path is returned
+ * in `ret`. The return value is < 0 on error. If CHASE_NONEXISTENT is also set 0 is returned if the file
+ * doesn't exist, > 0 otherwise. If CHASE_NONEXISTENT is not set >= 0 is returned if the destination was
+ * found, -ENOENT if it doesn't.
+ *
+ * 2. With CHASE_OPEN: in this case the destination is opened after chasing it as O_PATH and this file
+ * descriptor is returned as return value. This is useful to open files relative to some root
+ * directory. Note that the returned O_PATH file descriptors must be converted into a regular one (using
+ * fd_reopen() or such) before it can be used for reading/writing. CHASE_OPEN may not be combined with
+ * CHASE_NONEXISTENT.
+ *
+ * 3. With CHASE_STEP: in this case only a single step of the normalization is executed, i.e. only the first
+ * symlink or ".." component of the path is resolved, and the resulting path is returned. This is useful if
+ * a caller wants to trace the a path through the file system verbosely. Returns < 0 on error, > 0 if the
+ * path is fully normalized, and == 0 for each normalization step. This may be combined with
+ * CHASE_NONEXISTENT, in which case 1 is returned when a component is not found.
+ *
+ * 4. With CHASE_SAFE: in this case the path must not contain unsafe transitions, i.e. transitions from
+ * unprivileged to privileged files or directories. In such cases the return value is -ENOLINK. If
+ * CHASE_WARN is also set a warning describing the unsafe transition is emitted.
+ *
+ * 5. With CHASE_NO_AUTOFS: in this case if an autofs mount point is encountered, the path normalization is
+ * aborted and -EREMOTE is returned. If CHASE_WARN is also set a warning showing the path of the mount point
+ * is emitted.
+ *
+ * */
+
+ /* A root directory of "/" or "" is identical to none */
+ if (empty_or_root(original_root))
+ original_root = NULL;
+
+ if (!original_root && !ret && (flags & (CHASE_NONEXISTENT|CHASE_NO_AUTOFS|CHASE_SAFE|CHASE_OPEN|CHASE_STEP)) == CHASE_OPEN) {
+ /* Shortcut the CHASE_OPEN case if the caller isn't interested in the actual path and has no root set
+ * and doesn't care about any of the other special features we provide either. */
+ r = open(path, O_PATH|O_CLOEXEC|((flags & CHASE_NOFOLLOW) ? O_NOFOLLOW : 0));
+ if (r < 0)
+ return -errno;
+
+ return r;
+ }
+
+ if (original_root) {
+ r = path_make_absolute_cwd(original_root, &root);
+ if (r < 0)
+ return r;
+
+ if (flags & CHASE_PREFIX_ROOT) {
+
+ /* We don't support relative paths in combination with a root directory */
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ path = prefix_roota(root, path);
+ }
+ }
+
+ r = path_make_absolute_cwd(path, &buffer);
+ if (r < 0)
+ return r;
+
+ fd = open("/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ if (flags & CHASE_SAFE) {
+ if (fstat(fd, &previous_stat) < 0)
+ return -errno;
+ }
+
+ todo = buffer;
+ for (;;) {
+ _cleanup_free_ char *first = NULL;
+ _cleanup_close_ int child = -1;
+ struct stat st;
+ size_t n, m;
+
+ /* Determine length of first component in the path */
+ n = strspn(todo, "/"); /* The slashes */
+ m = n + strcspn(todo + n, "/"); /* The entire length of the component */
+
+ /* Extract the first component. */
+ first = strndup(todo, m);
+ if (!first)
+ return -ENOMEM;
+
+ todo += m;
+
+ /* Empty? Then we reached the end. */
+ if (isempty(first))
+ break;
+
+ /* Just a single slash? Then we reached the end. */
+ if (path_equal(first, "/")) {
+ /* Preserve the trailing slash */
+
+ if (flags & CHASE_TRAIL_SLASH)
+ if (!strextend(&done, "/", NULL))
+ return -ENOMEM;
+
+ break;
+ }
+
+ /* Just a dot? Then let's eat this up. */
+ if (path_equal(first, "/."))
+ continue;
+
+ /* Two dots? Then chop off the last bit of what we already found out. */
+ if (path_equal(first, "/..")) {
+ _cleanup_free_ char *parent = NULL;
+ _cleanup_close_ int fd_parent = -1;
+
+ /* If we already are at the top, then going up will not change anything. This is in-line with
+ * how the kernel handles this. */
+ if (empty_or_root(done))
+ continue;
+
+ parent = dirname_malloc(done);
+ if (!parent)
+ return -ENOMEM;
+
+ /* Don't allow this to leave the root dir. */
+ if (root &&
+ path_startswith(done, root) &&
+ !path_startswith(parent, root))
+ continue;
+
+ free_and_replace(done, parent);
+
+ if (flags & CHASE_STEP)
+ goto chased_one;
+
+ fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd_parent < 0)
+ return -errno;
+
+ if (flags & CHASE_SAFE) {
+ if (fstat(fd_parent, &st) < 0)
+ return -errno;
+
+ if (unsafe_transition(&previous_stat, &st))
+ return log_unsafe_transition(fd, fd_parent, path, flags);
+
+ previous_stat = st;
+ }
+
+ safe_close(fd);
+ fd = TAKE_FD(fd_parent);
+
+ continue;
+ }
+
+ /* Otherwise let's see what this is. */
+ child = openat(fd, first + n, O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (child < 0) {
+
+ if (errno == ENOENT &&
+ (flags & CHASE_NONEXISTENT) &&
+ (isempty(todo) || path_is_normalized(todo))) {
+
+ /* If CHASE_NONEXISTENT is set, and the path does not exist, then that's OK, return
+ * what we got so far. But don't allow this if the remaining path contains "../ or "./"
+ * or something else weird. */
+
+ /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
+ if (streq_ptr(done, "/"))
+ *done = '\0';
+
+ if (!strextend(&done, first, todo, NULL))
+ return -ENOMEM;
+
+ exists = false;
+ break;
+ }
+
+ return -errno;
+ }
+
+ if (fstat(child, &st) < 0)
+ return -errno;
+ if ((flags & CHASE_SAFE) &&
+ unsafe_transition(&previous_stat, &st))
+ return log_unsafe_transition(fd, child, path, flags);
+
+ previous_stat = st;
+
+ if ((flags & CHASE_NO_AUTOFS) &&
+ fd_is_fs_type(child, AUTOFS_SUPER_MAGIC) > 0)
+ return log_autofs_mount_point(child, path, flags);
+
+ if (S_ISLNK(st.st_mode) && !((flags & CHASE_NOFOLLOW) && isempty(todo))) {
+ char *joined;
+
+ _cleanup_free_ char *destination = NULL;
+
+ /* This is a symlink, in this case read the destination. But let's make sure we don't follow
+ * symlinks without bounds. */
+ if (--max_follow <= 0)
+ return -ELOOP;
+
+ r = readlinkat_malloc(fd, first + n, &destination);
+ if (r < 0)
+ return r;
+ if (isempty(destination))
+ return -EINVAL;
+
+ if (path_is_absolute(destination)) {
+
+ /* An absolute destination. Start the loop from the beginning, but use the root
+ * directory as base. */
+
+ safe_close(fd);
+ fd = open(root ?: "/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ if (flags & CHASE_SAFE) {
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (unsafe_transition(&previous_stat, &st))
+ return log_unsafe_transition(child, fd, path, flags);
+
+ previous_stat = st;
+ }
+
+ free(done);
+
+ /* Note that we do not revalidate the root, we take it as is. */
+ if (isempty(root))
+ done = NULL;
+ else {
+ done = strdup(root);
+ if (!done)
+ return -ENOMEM;
+ }
+
+ /* Prefix what's left to do with what we just read, and start the loop again, but
+ * remain in the current directory. */
+ joined = strjoin(destination, todo);
+ } else
+ joined = strjoin("/", destination, todo);
+ if (!joined)
+ return -ENOMEM;
+
+ free(buffer);
+ todo = buffer = joined;
+
+ if (flags & CHASE_STEP)
+ goto chased_one;
+
+ continue;
+ }
+
+ /* If this is not a symlink, then let's just add the name we read to what we already verified. */
+ if (!done)
+ done = TAKE_PTR(first);
+ else {
+ /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
+ if (streq(done, "/"))
+ *done = '\0';
+
+ if (!strextend(&done, first, NULL))
+ return -ENOMEM;
+ }
+
+ /* And iterate again, but go one directory further down. */
+ safe_close(fd);
+ fd = TAKE_FD(child);
+ }
+
+ if (!done) {
+ /* Special case, turn the empty string into "/", to indicate the root directory. */
+ done = strdup("/");
+ if (!done)
+ return -ENOMEM;
+ }
+
+ if (ret)
+ *ret = TAKE_PTR(done);
+
+ if (flags & CHASE_OPEN) {
+ /* Return the O_PATH fd we currently are looking to the caller. It can translate it to a proper fd by
+ * opening /proc/self/fd/xyz. */
+
+ assert(fd >= 0);
+ return TAKE_FD(fd);
+ }
+
+ if (flags & CHASE_STEP)
+ return 1;
+
+ return exists;
+
+chased_one:
+ if (ret) {
+ char *c;
+
+ c = strjoin(strempty(done), todo);
+ if (!c)
+ return -ENOMEM;
+
+ *ret = c;
+ }
+
+ return 0;
+}
+
+int chase_symlinks_and_open(
+ const char *path,
+ const char *root,
+ unsigned chase_flags,
+ int open_flags,
+ char **ret_path) {
+
+ _cleanup_close_ int path_fd = -1;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ if (chase_flags & CHASE_NONEXISTENT)
+ return -EINVAL;
+
+ if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
+ /* Shortcut this call if none of the special features of this call are requested */
+ r = open(path, open_flags);
+ if (r < 0)
+ return -errno;
+
+ return r;
+ }
+
+ path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL);
+ if (path_fd < 0)
+ return path_fd;
+
+ r = fd_reopen(path_fd, open_flags);
+ if (r < 0)
+ return r;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+
+ return r;
+}
+
+int chase_symlinks_and_opendir(
+ const char *path,
+ const char *root,
+ unsigned chase_flags,
+ char **ret_path,
+ DIR **ret_dir) {
+
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ _cleanup_close_ int path_fd = -1;
+ _cleanup_free_ char *p = NULL;
+ DIR *d;
+
+ if (!ret_dir)
+ return -EINVAL;
+ if (chase_flags & CHASE_NONEXISTENT)
+ return -EINVAL;
+
+ if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
+ /* Shortcut this call if none of the special features of this call are requested */
+ d = opendir(path);
+ if (!d)
+ return -errno;
+
+ *ret_dir = d;
+ return 0;
+ }
+
+ path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL);
+ if (path_fd < 0)
+ return path_fd;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", path_fd);
+ d = opendir(procfs_path);
+ if (!d)
+ return -errno;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+
+ *ret_dir = d;
+ return 0;
+}
+
+int chase_symlinks_and_stat(
+ const char *path,
+ const char *root,
+ unsigned chase_flags,
+ char **ret_path,
+ struct stat *ret_stat) {
+
+ _cleanup_close_ int path_fd = -1;
+ _cleanup_free_ char *p = NULL;
+
+ assert(path);
+ assert(ret_stat);
+
+ if (chase_flags & CHASE_NONEXISTENT)
+ return -EINVAL;
+
+ if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
+ /* Shortcut this call if none of the special features of this call are requested */
+ if (stat(path, ret_stat) < 0)
+ return -errno;
+
+ return 1;
+ }
+
+ path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL);
+ if (path_fd < 0)
+ return path_fd;
+
+ if (fstat(path_fd, ret_stat) < 0)
+ return -errno;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+
+ if (chase_flags & CHASE_OPEN)
+ return TAKE_FD(path_fd);
+
+ return 1;
+}
+
+int access_fd(int fd, int mode) {
+ char p[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(fd) + 1];
+ int r;
+
+ /* Like access() but operates on an already open fd */
+
+ xsprintf(p, "/proc/self/fd/%i", fd);
+ r = access(p, mode);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+void unlink_tempfilep(char (*p)[]) {
+ /* If the file is created with mkstemp(), it will (almost always)
+ * change the suffix. Treat this as a sign that the file was
+ * successfully created. We ignore both the rare case where the
+ * original suffix is used and unlink failures. */
+ if (!endswith(*p, ".XXXXXX"))
+ (void) unlink_noerrno(*p);
+}
+
+int unlinkat_deallocate(int fd, const char *name, int flags) {
+ _cleanup_close_ int truncate_fd = -1;
+ struct stat st;
+ off_t l, bs;
+
+ /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other
+ * link to it. This is useful to ensure that other processes that might have the file open for reading won't be
+ * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up
+ * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and
+ * returned to the free pool.
+ *
+ * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means
+ * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other
+ * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes
+ * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.)
+ * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file
+ * truncation (🔪), as our goal of deallocating the data space trumps our goal of being nice to readers (💐).
+ *
+ * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the
+ * primary job – to delete the file – is accomplished. */
+
+ if ((flags & AT_REMOVEDIR) == 0) {
+ truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
+ if (truncate_fd < 0) {
+
+ /* If this failed because the file doesn't exist propagate the error right-away. Also,
+ * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is
+ * returned when this is a directory but we are not supposed to delete those, hence propagate
+ * the error right-away too. */
+ if (IN_SET(errno, ENOENT, EISDIR))
+ return -errno;
+
+ if (errno != ELOOP) /* don't complain if this is a symlink */
+ log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name);
+ }
+ }
+
+ if (unlinkat(fd, name, flags) < 0)
+ return -errno;
+
+ if (truncate_fd < 0) /* Don't have a file handle, can't do more ☹️ */
+ return 0;
+
+ if (fstat(truncate_fd, &st) < 0) {
+ log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name);
+ return 0;
+ }
+
+ if (!S_ISREG(st.st_mode) || st.st_blocks == 0 || st.st_nlink > 0)
+ return 0;
+
+ /* If this is a regular file, it actually took up space on disk and there are no other links it's time to
+ * punch-hole/truncate this to release the disk space. */
+
+ bs = MAX(st.st_blksize, 512);
+ l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */
+
+ if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0)
+ return 0; /* Successfully punched a hole! 😊 */
+
+ /* Fall back to truncation */
+ if (ftruncate(truncate_fd, 0) < 0) {
+ log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m");
+ return 0;
+ }
+
+ return 0;
+}
+
+int fsync_directory_of_file(int fd) {
+ _cleanup_free_ char *path = NULL;
+ _cleanup_close_ int dfd = -1;
+ int r;
+
+ r = fd_verify_regular(fd);
+ if (r < 0)
+ return r;
+
+ r = fd_get_path(fd, &path);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to query /proc/self/fd/%d%s: %m",
+ fd,
+ r == -EOPNOTSUPP ? ", ignoring" : "");
+
+ if (r == -EOPNOTSUPP)
+ /* If /proc is not available, we're most likely running in some
+ * chroot environment, and syncing the directory is not very
+ * important in that case. Let's just silently do nothing. */
+ return 0;
+
+ return r;
+ }
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ dfd = open_parent(path, O_CLOEXEC, 0);
+ if (dfd < 0)
+ return dfd;
+
+ if (fsync(dfd) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int fsync_path_at(int at_fd, const char *path) {
+ _cleanup_close_ int opened_fd = -1;
+ int fd;
+
+ if (isempty(path)) {
+ if (at_fd == AT_FDCWD) {
+ opened_fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ if (opened_fd < 0)
+ return -errno;
+
+ fd = opened_fd;
+ } else
+ fd = at_fd;
+ } else {
+
+ opened_fd = openat(at_fd, path, O_RDONLY|O_CLOEXEC);
+ if (opened_fd < 0)
+ return -errno;
+
+ fd = opened_fd;
+ }
+
+ if (fsync(fd) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int open_parent(const char *path, int flags, mode_t mode) {
+ _cleanup_free_ char *parent = NULL;
+ int fd;
+
+ if (isempty(path))
+ return -EINVAL;
+ if (path_equal(path, "/")) /* requesting the parent of the root dir is fishy, let's prohibit that */
+ return -EINVAL;
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ /* Let's insist on O_DIRECTORY since the parent of a file or directory is a directory. Except if we open an
+ * O_TMPFILE file, because in that case we are actually create a regular file below the parent directory. */
+
+ if ((flags & O_PATH) == O_PATH)
+ flags |= O_DIRECTORY;
+ else if ((flags & O_TMPFILE) != O_TMPFILE)
+ flags |= O_DIRECTORY|O_RDONLY;
+
+ fd = open(parent, flags, mode);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h
new file mode 100644
index 0000000..7ad030b
--- /dev/null
+++ b/src/basic/fs-util.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/inotify.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "time-util.h"
+#include "util.h"
+
+int unlink_noerrno(const char *path);
+
+int rmdir_parents(const char *path, const char *stop);
+
+int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath);
+
+int readlinkat_malloc(int fd, const char *p, char **ret);
+int readlink_malloc(const char *p, char **r);
+int readlink_value(const char *p, char **ret);
+int readlink_and_make_absolute(const char *p, char **r);
+
+int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid);
+int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid);
+
+int fchmod_umask(int fd, mode_t mode);
+int fchmod_opath(int fd, mode_t m);
+
+int fd_warn_permissions(const char *path, int fd);
+
+#define laccess(path, mode) faccessat(AT_FDCWD, (path), (mode), AT_SYMLINK_NOFOLLOW)
+
+int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode);
+int touch(const char *path);
+
+int symlink_idempotent(const char *from, const char *to, bool make_relative);
+
+int symlink_atomic(const char *from, const char *to);
+int mknod_atomic(const char *path, mode_t mode, dev_t dev);
+int mkfifo_atomic(const char *path, mode_t mode);
+int mkfifoat_atomic(int dir_fd, const char *path, mode_t mode);
+
+int get_files_in_directory(const char *path, char ***list);
+
+int tmp_dir(const char **ret);
+int var_tmp_dir(const char **ret);
+
+int unlink_or_warn(const char *filename);
+
+#define INOTIFY_EVENT_MAX (sizeof(struct inotify_event) + NAME_MAX + 1)
+
+#define FOREACH_INOTIFY_EVENT(e, buffer, sz) \
+ for ((e) = &buffer.ev; \
+ (uint8_t*) (e) < (uint8_t*) (buffer.raw) + (sz); \
+ (e) = (struct inotify_event*) ((uint8_t*) (e) + sizeof(struct inotify_event) + (e)->len))
+
+union inotify_event_buffer {
+ struct inotify_event ev;
+ uint8_t raw[INOTIFY_EVENT_MAX];
+};
+
+int inotify_add_watch_fd(int fd, int what, uint32_t mask);
+
+enum {
+ CHASE_PREFIX_ROOT = 1 << 0, /* If set, the specified path will be prefixed by the specified root before beginning the iteration */
+ CHASE_NONEXISTENT = 1 << 1, /* If set, it's OK if the path doesn't actually exist. */
+ CHASE_NO_AUTOFS = 1 << 2, /* If set, return -EREMOTE if autofs mount point found */
+ CHASE_SAFE = 1 << 3, /* If set, return EPERM if we ever traverse from unprivileged to privileged files or directories */
+ CHASE_OPEN = 1 << 4, /* If set, return an O_PATH object to the final component */
+ CHASE_TRAIL_SLASH = 1 << 5, /* If set, any trailing slash will be preserved */
+ CHASE_STEP = 1 << 6, /* If set, just execute a single step of the normalization */
+ CHASE_NOFOLLOW = 1 << 7, /* Only valid with CHASE_OPEN: when the path's right-most component refers to symlink return O_PATH fd of the symlink, rather than following it. */
+ CHASE_WARN = 1 << 8, /* Emit an appropriate warning when an error is encountered */
+};
+
+/* How many iterations to execute before returning -ELOOP */
+#define CHASE_SYMLINKS_MAX 32
+
+int chase_symlinks(const char *path_with_prefix, const char *root, unsigned flags, char **ret);
+
+int chase_symlinks_and_open(const char *path, const char *root, unsigned chase_flags, int open_flags, char **ret_path);
+int chase_symlinks_and_opendir(const char *path, const char *root, unsigned chase_flags, char **ret_path, DIR **ret_dir);
+int chase_symlinks_and_stat(const char *path, const char *root, unsigned chase_flags, char **ret_path, struct stat *ret_stat);
+
+/* Useful for usage with _cleanup_(), removes a directory and frees the pointer */
+static inline void rmdir_and_free(char *p) {
+ PROTECT_ERRNO;
+ (void) rmdir(p);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rmdir_and_free);
+
+static inline void unlink_and_free(char *p) {
+ (void) unlink_noerrno(p);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, unlink_and_free);
+
+int access_fd(int fd, int mode);
+
+void unlink_tempfilep(char (*p)[]);
+int unlinkat_deallocate(int fd, const char *name, int flags);
+
+int fsync_directory_of_file(int fd);
+int fsync_path_at(int at_fd, const char *path);
+
+int open_parent(const char *path, int flags, mode_t mode);
diff --git a/src/basic/gcrypt-util.c b/src/basic/gcrypt-util.c
new file mode 100644
index 0000000..f304a2b
--- /dev/null
+++ b/src/basic/gcrypt-util.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_GCRYPT
+#include <gcrypt.h>
+
+#include "gcrypt-util.h"
+#include "hexdecoct.h"
+
+void initialize_libgcrypt(bool secmem) {
+ const char *p;
+ if (gcry_control(GCRYCTL_INITIALIZATION_FINISHED_P))
+ return;
+
+ p = gcry_check_version("1.4.5");
+ assert(p);
+
+ /* Turn off "secmem". Clients which wish to make use of this
+ * feature should initialize the library manually */
+ if (!secmem)
+ gcry_control(GCRYCTL_DISABLE_SECMEM);
+ gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0);
+}
+
+int string_hashsum(const char *s, size_t len, int md_algorithm, char **out) {
+ _cleanup_(gcry_md_closep) gcry_md_hd_t md = NULL;
+ size_t hash_size;
+ void *hash;
+ char *enc;
+
+ initialize_libgcrypt(false);
+
+ hash_size = gcry_md_get_algo_dlen(md_algorithm);
+ assert(hash_size > 0);
+
+ gcry_md_open(&md, md_algorithm, 0);
+ if (!md)
+ return -EIO;
+
+ gcry_md_write(md, s, len);
+
+ hash = gcry_md_read(md, 0);
+ if (!hash)
+ return -EIO;
+
+ enc = hexmem(hash, hash_size);
+ if (!enc)
+ return -ENOMEM;
+
+ *out = enc;
+ return 0;
+}
+#endif
diff --git a/src/basic/gcrypt-util.h b/src/basic/gcrypt-util.h
new file mode 100644
index 0000000..87eb606
--- /dev/null
+++ b/src/basic/gcrypt-util.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#if HAVE_GCRYPT
+#include <gcrypt.h>
+
+#include "macro.h"
+
+void initialize_libgcrypt(bool secmem);
+int string_hashsum(const char *s, size_t len, int md_algorithm, char **out);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(gcry_md_hd_t, gcry_md_close);
+#endif
+
+static inline int string_hashsum_sha224(const char *s, size_t len, char **out) {
+#if HAVE_GCRYPT
+ return string_hashsum(s, len, GCRY_MD_SHA224, out);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static inline int string_hashsum_sha256(const char *s, size_t len, char **out) {
+#if HAVE_GCRYPT
+ return string_hashsum(s, len, GCRY_MD_SHA256, out);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
diff --git a/src/basic/generate-af-list.sh b/src/basic/generate-af-list.sh
new file mode 100755
index 0000000..5bf244c
--- /dev/null
+++ b/src/basic/generate-af-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -E -dM -include sys/socket.h -include "$2" -include "$3" - </dev/null | \
+ grep -Ev 'AF_UNSPEC|AF_MAX' | \
+ awk '/^#define[ \t]+AF_[^ \t]+[ \t]+[AP]F_[^ \t]/ { print $2; }'
diff --git a/src/basic/generate-arphrd-list.sh b/src/basic/generate-arphrd-list.sh
new file mode 100755
index 0000000..e6e874a
--- /dev/null
+++ b/src/basic/generate-arphrd-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include net/if_arp.h -include "$2" -include "$3" - </dev/null | \
+ awk '/^#define[ \t]+ARPHRD_[^ \t]+[ \t]+[^ \t]/ { print $2; }' | \
+ sed -e 's/ARPHRD_//'
diff --git a/src/basic/generate-cap-list.sh b/src/basic/generate-cap-list.sh
new file mode 100755
index 0000000..0628d24
--- /dev/null
+++ b/src/basic/generate-cap-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include linux/capability.h -include "$2" -include "$3" - </dev/null | \
+ awk '/^#define[ \t]+CAP_[A-Z_]+[ \t]+/ { print $2; }' | \
+ grep -v CAP_LAST_CAP
diff --git a/src/basic/generate-errno-list.sh b/src/basic/generate-errno-list.sh
new file mode 100755
index 0000000..953d5e3
--- /dev/null
+++ b/src/basic/generate-errno-list.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include errno.h - </dev/null | \
+ awk '/^#define[ \t]+E[^ _]+[ \t]+/ { print $2; }'
diff --git a/src/basic/glob-util.c b/src/basic/glob-util.c
new file mode 100644
index 0000000..9fac676
--- /dev/null
+++ b/src/basic/glob-util.c
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <glob.h>
+#include <sys/types.h>
+
+#include "dirent-util.h"
+#include "glob-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "strv.h"
+
+static void closedir_wrapper(void* v) {
+ (void) closedir(v);
+}
+
+int safe_glob(const char *path, int flags, glob_t *pglob) {
+ int k;
+
+ /* We want to set GLOB_ALTDIRFUNC ourselves, don't allow it to be set. */
+ assert(!(flags & GLOB_ALTDIRFUNC));
+
+ if (!pglob->gl_closedir)
+ pglob->gl_closedir = closedir_wrapper;
+ if (!pglob->gl_readdir)
+ pglob->gl_readdir = (struct dirent *(*)(void *)) readdir_no_dot;
+ if (!pglob->gl_opendir)
+ pglob->gl_opendir = (void *(*)(const char *)) opendir;
+ if (!pglob->gl_lstat)
+ pglob->gl_lstat = lstat;
+ if (!pglob->gl_stat)
+ pglob->gl_stat = stat;
+
+ errno = 0;
+ k = glob(path, flags | GLOB_ALTDIRFUNC, NULL, pglob);
+
+ if (k == GLOB_NOMATCH)
+ return -ENOENT;
+ if (k == GLOB_NOSPACE)
+ return -ENOMEM;
+ if (k != 0)
+ return errno > 0 ? -errno : -EIO;
+ if (strv_isempty(pglob->gl_pathv))
+ return -ENOENT;
+
+ return 0;
+}
+
+int glob_exists(const char *path) {
+ _cleanup_globfree_ glob_t g = {};
+ int k;
+
+ assert(path);
+
+ k = safe_glob(path, GLOB_NOSORT|GLOB_BRACE, &g);
+ if (k == -ENOENT)
+ return false;
+ if (k < 0)
+ return k;
+ return true;
+}
+
+int glob_extend(char ***strv, const char *path) {
+ _cleanup_globfree_ glob_t g = {};
+ int k;
+
+ k = safe_glob(path, GLOB_NOSORT|GLOB_BRACE, &g);
+ if (k < 0)
+ return k;
+
+ return strv_extend_strv(strv, g.gl_pathv, false);
+}
diff --git a/src/basic/glob-util.h b/src/basic/glob-util.h
new file mode 100644
index 0000000..8e226c1
--- /dev/null
+++ b/src/basic/glob-util.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <glob.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "macro.h"
+#include "string-util.h"
+
+/* Note: this function modifies pglob to set various functions. */
+int safe_glob(const char *path, int flags, glob_t *pglob);
+
+int glob_exists(const char *path);
+int glob_extend(char ***strv, const char *path);
+
+#define _cleanup_globfree_ _cleanup_(globfree)
+
+_pure_ static inline bool string_is_glob(const char *p) {
+ /* Check if a string contains any glob patterns. */
+ return !!strpbrk(p, GLOB_CHARS);
+}
diff --git a/src/basic/gunicode.c b/src/basic/gunicode.c
new file mode 100644
index 0000000..c51b1a7
--- /dev/null
+++ b/src/basic/gunicode.c
@@ -0,0 +1,110 @@
+/* gunicode.c - Unicode manipulation functions
+ *
+ * Copyright (C) 1999, 2000 Tom Tromey
+ * Copyright © 2000, 2005 Red Hat, Inc.
+ */
+
+#include "gunicode.h"
+
+#define unichar uint32_t
+
+/**
+ * g_utf8_prev_char:
+ * @p: a pointer to a position within a UTF-8 encoded string
+ *
+ * Finds the previous UTF-8 character in the string before @p.
+ *
+ * @p does not have to be at the beginning of a UTF-8 character. No check
+ * is made to see if the character found is actually valid other than
+ * it starts with an appropriate byte. If @p might be the first
+ * character of the string, you must use g_utf8_find_prev_char() instead.
+ *
+ * Return value: a pointer to the found character.
+ **/
+char *
+utf8_prev_char (const char *p)
+{
+ for (;;)
+ {
+ p--;
+ if ((*p & 0xc0) != 0x80)
+ return (char *)p;
+ }
+}
+
+struct Interval
+{
+ unichar start, end;
+};
+
+static int
+interval_compare (const void *key, const void *elt)
+{
+ unichar c = (unichar) (long) (key);
+ struct Interval *interval = (struct Interval *)elt;
+
+ if (c < interval->start)
+ return -1;
+ if (c > interval->end)
+ return +1;
+
+ return 0;
+}
+
+/*
+ * NOTE:
+ *
+ * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are
+ * generated from the Unicode Character Database's file
+ * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py
+ * in this way:
+ *
+ * ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt
+ *
+ * Last update for Unicode 6.0.
+ */
+
+/**
+ * g_unichar_iswide:
+ * @c: a Unicode character
+ *
+ * Determines if a character is typically rendered in a double-width
+ * cell.
+ *
+ * Return value: %TRUE if the character is wide
+ **/
+bool
+unichar_iswide (unichar c)
+{
+ /* See NOTE earlier for how to update this table. */
+ static const struct Interval wide[] = {
+ {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
+ {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096},
+ {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA},
+ {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE},
+ {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C},
+ {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
+ {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6},
+ {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A},
+ {0x1F240, 0x1F248}, {0x1F250, 0x1F251},
+ {0x1F300, 0x1F567}, /* Miscellaneous Symbols and Pictographs */
+ {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD},
+ };
+
+ if (bsearch ((void *)(uintptr_t)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0],
+ interval_compare))
+ return true;
+
+ return false;
+}
+
+const char utf8_skip_data[256] = {
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
+};
diff --git a/src/basic/gunicode.h b/src/basic/gunicode.h
new file mode 100644
index 0000000..a16b7b6
--- /dev/null
+++ b/src/basic/gunicode.h
@@ -0,0 +1,30 @@
+#pragma once
+
+/* gunicode.h - Unicode manipulation functions
+ *
+ * Copyright (C) 1999, 2000 Tom Tromey
+ * Copyright © 2000, 2005 Red Hat, Inc.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+char *utf8_prev_char (const char *p);
+
+extern const char utf8_skip_data[256];
+
+/**
+ * g_utf8_next_char:
+ * @p: Pointer to the start of a valid UTF-8 character
+ *
+ * Skips to the next character in a UTF-8 string. The string must be
+ * valid; this macro is as fast as possible, and has no error-checking.
+ * You would use this macro to iterate over a string character by
+ * character. The macro returns the start of the next UTF-8 character.
+ * Before using this macro, use g_utf8_validate() to validate strings
+ * that may contain invalid UTF-8.
+ */
+#define utf8_next_char(p) (char *)((p) + utf8_skip_data[*(const unsigned char *)(p)])
+
+bool unichar_iswide (uint32_t c);
diff --git a/src/basic/hash-funcs.c b/src/basic/hash-funcs.c
new file mode 100644
index 0000000..1be43d4
--- /dev/null
+++ b/src/basic/hash-funcs.c
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+
+#include "hash-funcs.h"
+#include "path-util.h"
+
+void string_hash_func(const char *p, struct siphash *state) {
+ siphash24_compress(p, strlen(p) + 1, state);
+}
+
+DEFINE_HASH_OPS(string_hash_ops, char, string_hash_func, string_compare_func);
+
+void path_hash_func(const char *q, struct siphash *state) {
+ size_t n;
+
+ assert(q);
+ assert(state);
+
+ /* Calculates a hash for a path in a way this duplicate inner slashes don't make a differences, and also
+ * whether there's a trailing slash or not. This fits well with the semantics of path_compare(), which does
+ * similar checks and also doesn't care for trailing slashes. Note that relative and absolute paths (i.e. those
+ * which begin in a slash or not) will hash differently though. */
+
+ n = strspn(q, "/");
+ if (n > 0) { /* Eat up initial slashes, and add one "/" to the hash for all of them */
+ siphash24_compress(q, 1, state);
+ q += n;
+ }
+
+ for (;;) {
+ /* Determine length of next component */
+ n = strcspn(q, "/");
+ if (n == 0) /* Reached the end? */
+ break;
+
+ /* Add this component to the hash and skip over it */
+ siphash24_compress(q, n, state);
+ q += n;
+
+ /* How many slashes follow this component? */
+ n = strspn(q, "/");
+ if (q[n] == 0) /* Is this a trailing slash? If so, we are at the end, and don't care about the slashes anymore */
+ break;
+
+ /* We are not add the end yet. Hash exactly one slash for all of the ones we just encountered. */
+ siphash24_compress(q, 1, state);
+ q += n;
+ }
+}
+
+int path_compare_func(const char *a, const char *b) {
+ return path_compare(a, b);
+}
+
+DEFINE_HASH_OPS(path_hash_ops, char, path_hash_func, path_compare_func);
+
+void trivial_hash_func(const void *p, struct siphash *state) {
+ siphash24_compress(&p, sizeof(p), state);
+}
+
+int trivial_compare_func(const void *a, const void *b) {
+ return CMP(a, b);
+}
+
+const struct hash_ops trivial_hash_ops = {
+ .hash = trivial_hash_func,
+ .compare = trivial_compare_func,
+};
+
+void uint64_hash_func(const uint64_t *p, struct siphash *state) {
+ siphash24_compress(p, sizeof(uint64_t), state);
+}
+
+int uint64_compare_func(const uint64_t *a, const uint64_t *b) {
+ return CMP(*a, *b);
+}
+
+DEFINE_HASH_OPS(uint64_hash_ops, uint64_t, uint64_hash_func, uint64_compare_func);
+
+#if SIZEOF_DEV_T != 8
+void devt_hash_func(const dev_t *p, struct siphash *state) {
+ siphash24_compress(p, sizeof(dev_t), state);
+}
+
+int devt_compare_func(const dev_t *a, const dev_t *b) {
+ return CMP(*a, *b);
+}
+
+DEFINE_HASH_OPS(devt_hash_ops, dev_t, devt_hash_func, devt_compare_func);
+#endif
diff --git a/src/basic/hash-funcs.h b/src/basic/hash-funcs.h
new file mode 100644
index 0000000..3d2ae4b
--- /dev/null
+++ b/src/basic/hash-funcs.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "siphash24.h"
+
+typedef void (*hash_func_t)(const void *p, struct siphash *state);
+typedef int (*compare_func_t)(const void *a, const void *b);
+
+struct hash_ops {
+ hash_func_t hash;
+ compare_func_t compare;
+ free_func_t free_key;
+ free_func_t free_value;
+};
+
+#define _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, free_key_func, free_value_func, scope) \
+ _unused_ static void (* UNIQ_T(static_hash_wrapper, uq))(const type *, struct siphash *) = hash_func; \
+ _unused_ static int (* UNIQ_T(static_compare_wrapper, uq))(const type *, const type *) = compare_func; \
+ scope const struct hash_ops name = { \
+ .hash = (hash_func_t) hash_func, \
+ .compare = (compare_func_t) compare_func, \
+ .free_key = free_key_func, \
+ .free_value = free_value_func, \
+ }
+
+#define _DEFINE_FREE_FUNC(uq, type, wrapper_name, func) \
+ /* Type-safe free function */ \
+ static void UNIQ_T(wrapper_name, uq)(void *a) { \
+ type *_a = a; \
+ func(_a); \
+ }
+
+#define _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(uq, name, type, hash_func, compare_func, free_func, scope) \
+ _DEFINE_FREE_FUNC(uq, type, static_free_wrapper, free_func); \
+ _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \
+ UNIQ_T(static_free_wrapper, uq), NULL, scope)
+
+#define _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(uq, name, type, hash_func, compare_func, type_value, free_func, scope) \
+ _DEFINE_FREE_FUNC(uq, type_value, static_free_wrapper, free_func); \
+ _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \
+ NULL, UNIQ_T(static_free_wrapper, uq), scope)
+
+#define _DEFINE_HASH_OPS_FULL(uq, name, type, hash_func, compare_func, free_key_func, type_value, free_value_func, scope) \
+ _DEFINE_FREE_FUNC(uq, type, static_free_key_wrapper, free_key_func); \
+ _DEFINE_FREE_FUNC(uq, type_value, static_free_value_wrapper, free_value_func); \
+ _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \
+ UNIQ_T(static_free_key_wrapper, uq), \
+ UNIQ_T(static_free_value_wrapper, uq), scope)
+
+#define DEFINE_HASH_OPS(name, type, hash_func, compare_func) \
+ _DEFINE_HASH_OPS(UNIQ, name, type, hash_func, compare_func, NULL, NULL,)
+
+#define DEFINE_PRIVATE_HASH_OPS(name, type, hash_func, compare_func) \
+ _DEFINE_HASH_OPS(UNIQ, name, type, hash_func, compare_func, NULL, NULL, static)
+
+#define DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(name, type, hash_func, compare_func, free_func) \
+ _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, free_func,)
+
+#define DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(name, type, hash_func, compare_func, free_func) \
+ _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, free_func, static)
+
+#define DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(name, type, hash_func, compare_func, value_type, free_func) \
+ _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, value_type, free_func,)
+
+#define DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(name, type, hash_func, compare_func, value_type, free_func) \
+ _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, value_type, free_func, static)
+
+#define DEFINE_HASH_OPS_FULL(name, type, hash_func, compare_func, free_key_func, value_type, free_value_func) \
+ _DEFINE_HASH_OPS_FULL(UNIQ, name, type, hash_func, compare_func, free_key_func, value_type, free_value_func,)
+
+#define DEFINE_PRIVATE_HASH_OPS_FULL(name, type, hash_func, compare_func, free_key_func, value_type, free_value_func) \
+ _DEFINE_HASH_OPS_FULL(UNIQ, name, type, hash_func, compare_func, free_key_func, value_type, free_value_func, static)
+
+void string_hash_func(const char *p, struct siphash *state);
+#define string_compare_func strcmp
+extern const struct hash_ops string_hash_ops;
+
+void path_hash_func(const char *p, struct siphash *state);
+int path_compare_func(const char *a, const char *b) _pure_;
+extern const struct hash_ops path_hash_ops;
+
+/* This will compare the passed pointers directly, and will not dereference them. This is hence not useful for strings
+ * or suchlike. */
+void trivial_hash_func(const void *p, struct siphash *state);
+int trivial_compare_func(const void *a, const void *b) _const_;
+extern const struct hash_ops trivial_hash_ops;
+
+/* 32bit values we can always just embed in the pointer itself, but in order to support 32bit archs we need store 64bit
+ * values indirectly, since they don't fit in a pointer. */
+void uint64_hash_func(const uint64_t *p, struct siphash *state);
+int uint64_compare_func(const uint64_t *a, const uint64_t *b) _pure_;
+extern const struct hash_ops uint64_hash_ops;
+
+/* On some archs dev_t is 32bit, and on others 64bit. And sometimes it's 64bit on 32bit archs, and sometimes 32bit on
+ * 64bit archs. Yuck! */
+#if SIZEOF_DEV_T != 8
+void devt_hash_func(const dev_t *p, struct siphash *state) _pure_;
+int devt_compare_func(const dev_t *a, const dev_t *b) _pure_;
+extern const struct hash_ops devt_hash_ops;
+#else
+#define devt_hash_func uint64_hash_func
+#define devt_compare_func uint64_compare_func
+#define devt_hash_ops uint64_hash_ops
+#endif
diff --git a/src/basic/hashmap.c b/src/basic/hashmap.c
new file mode 100644
index 0000000..66e9e00
--- /dev/null
+++ b/src/basic/hashmap.c
@@ -0,0 +1,1911 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "mempool.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "set.h"
+#include "siphash24.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+#if ENABLE_DEBUG_HASHMAP
+#include <pthread.h>
+#include "list.h"
+#endif
+
+/*
+ * Implementation of hashmaps.
+ * Addressing: open
+ * - uses less RAM compared to closed addressing (chaining), because
+ * our entries are small (especially in Sets, which tend to contain
+ * the majority of entries in systemd).
+ * Collision resolution: Robin Hood
+ * - tends to equalize displacement of entries from their optimal buckets.
+ * Probe sequence: linear
+ * - though theoretically worse than random probing/uniform hashing/double
+ * hashing, it is good for cache locality.
+ *
+ * References:
+ * Celis, P. 1986. Robin Hood Hashing.
+ * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada.
+ * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
+ * - The results are derived for random probing. Suggests deletion with
+ * tombstones and two mean-centered search methods. None of that works
+ * well for linear probing.
+ *
+ * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies.
+ * ACM Trans. Algorithms 1, 2 (October 2005), 177-213.
+ * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964
+ * http://www.math.uu.se/~svante/papers/sj157.pdf
+ * - Applies to Robin Hood with linear probing. Contains remarks on
+ * the unsuitability of mean-centered search with linear probing.
+ *
+ * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing.
+ * ACM Trans. Algorithms 1, 2 (October 2005), 214-242.
+ * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965
+ * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes
+ * in a successful search), and Janson writes about displacement. C = d + 1.
+ *
+ * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion.
+ * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
+ * - Explanation of backward shift deletion with pictures.
+ *
+ * Khuong, P. 2013. The Other Robin Hood Hashing.
+ * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/
+ * - Short summary of random vs. linear probing, and tombstones vs. backward shift.
+ */
+
+/*
+ * XXX Ideas for improvement:
+ * For unordered hashmaps, randomize iteration order, similarly to Perl:
+ * http://blog.booking.com/hardening-perls-hash-function.html
+ */
+
+/* INV_KEEP_FREE = 1 / (1 - max_load_factor)
+ * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */
+#define INV_KEEP_FREE 5U
+
+/* Fields common to entries of all hashmap/set types */
+struct hashmap_base_entry {
+ const void *key;
+};
+
+/* Entry types for specific hashmap/set types
+ * hashmap_base_entry must be at the beginning of each entry struct. */
+
+struct plain_hashmap_entry {
+ struct hashmap_base_entry b;
+ void *value;
+};
+
+struct ordered_hashmap_entry {
+ struct plain_hashmap_entry p;
+ unsigned iterate_next, iterate_previous;
+};
+
+struct set_entry {
+ struct hashmap_base_entry b;
+};
+
+/* In several functions it is advantageous to have the hash table extended
+ * virtually by a couple of additional buckets. We reserve special index values
+ * for these "swap" buckets. */
+#define _IDX_SWAP_BEGIN (UINT_MAX - 3)
+#define IDX_PUT (_IDX_SWAP_BEGIN + 0)
+#define IDX_TMP (_IDX_SWAP_BEGIN + 1)
+#define _IDX_SWAP_END (_IDX_SWAP_BEGIN + 2)
+
+#define IDX_FIRST (UINT_MAX - 1) /* special index for freshly initialized iterators */
+#define IDX_NIL UINT_MAX /* special index value meaning "none" or "end" */
+
+assert_cc(IDX_FIRST == _IDX_SWAP_END);
+assert_cc(IDX_FIRST == _IDX_ITERATOR_FIRST);
+
+/* Storage space for the "swap" buckets.
+ * All entry types can fit into a ordered_hashmap_entry. */
+struct swap_entries {
+ struct ordered_hashmap_entry e[_IDX_SWAP_END - _IDX_SWAP_BEGIN];
+};
+
+/* Distance from Initial Bucket */
+typedef uint8_t dib_raw_t;
+#define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU) /* indicates DIB value is greater than representable */
+#define DIB_RAW_REHASH ((dib_raw_t)0xfeU) /* entry yet to be rehashed during in-place resize */
+#define DIB_RAW_FREE ((dib_raw_t)0xffU) /* a free bucket */
+#define DIB_RAW_INIT ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */
+
+#define DIB_FREE UINT_MAX
+
+#if ENABLE_DEBUG_HASHMAP
+struct hashmap_debug_info {
+ LIST_FIELDS(struct hashmap_debug_info, debug_list);
+ unsigned max_entries; /* high watermark of n_entries */
+
+ /* who allocated this hashmap */
+ int line;
+ const char *file;
+ const char *func;
+
+ /* fields to detect modification while iterating */
+ unsigned put_count; /* counts puts into the hashmap */
+ unsigned rem_count; /* counts removals from hashmap */
+ unsigned last_rem_idx; /* remembers last removal index */
+};
+
+/* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */
+static LIST_HEAD(struct hashmap_debug_info, hashmap_debug_list);
+static pthread_mutex_t hashmap_debug_list_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+#define HASHMAP_DEBUG_FIELDS struct hashmap_debug_info debug;
+
+#else /* !ENABLE_DEBUG_HASHMAP */
+#define HASHMAP_DEBUG_FIELDS
+#endif /* ENABLE_DEBUG_HASHMAP */
+
+enum HashmapType {
+ HASHMAP_TYPE_PLAIN,
+ HASHMAP_TYPE_ORDERED,
+ HASHMAP_TYPE_SET,
+ _HASHMAP_TYPE_MAX
+};
+
+struct _packed_ indirect_storage {
+ void *storage; /* where buckets and DIBs are stored */
+ uint8_t hash_key[HASH_KEY_SIZE]; /* hash key; changes during resize */
+
+ unsigned n_entries; /* number of stored entries */
+ unsigned n_buckets; /* number of buckets */
+
+ unsigned idx_lowest_entry; /* Index below which all buckets are free.
+ Makes "while(hashmap_steal_first())" loops
+ O(n) instead of O(n^2) for unordered hashmaps. */
+ uint8_t _pad[3]; /* padding for the whole HashmapBase */
+ /* The bitfields in HashmapBase complete the alignment of the whole thing. */
+};
+
+struct direct_storage {
+ /* This gives us 39 bytes on 64bit, or 35 bytes on 32bit.
+ * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64bit,
+ * or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32bit. */
+ uint8_t storage[sizeof(struct indirect_storage)];
+};
+
+#define DIRECT_BUCKETS(entry_t) \
+ (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t)))
+
+/* We should be able to store at least one entry directly. */
+assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry) >= 1);
+
+/* We have 3 bits for n_direct_entries. */
+assert_cc(DIRECT_BUCKETS(struct set_entry) < (1 << 3));
+
+/* Hashmaps with directly stored entries all use this shared hash key.
+ * It's no big deal if the key is guessed, because there can be only
+ * a handful of directly stored entries in a hashmap. When a hashmap
+ * outgrows direct storage, it gets its own key for indirect storage. */
+static uint8_t shared_hash_key[HASH_KEY_SIZE];
+static bool shared_hash_key_initialized;
+
+/* Fields that all hashmap/set types must have */
+struct HashmapBase {
+ const struct hash_ops *hash_ops; /* hash and compare ops to use */
+
+ union _packed_ {
+ struct indirect_storage indirect; /* if has_indirect */
+ struct direct_storage direct; /* if !has_indirect */
+ };
+
+ enum HashmapType type:2; /* HASHMAP_TYPE_* */
+ bool has_indirect:1; /* whether indirect storage is used */
+ unsigned n_direct_entries:3; /* Number of entries in direct storage.
+ * Only valid if !has_indirect. */
+ bool from_pool:1; /* whether was allocated from mempool */
+ bool dirty:1; /* whether dirtied since last iterated_cache_get() */
+ bool cached:1; /* whether this hashmap is being cached */
+ HASHMAP_DEBUG_FIELDS /* optional hashmap_debug_info */
+};
+
+/* Specific hash types
+ * HashmapBase must be at the beginning of each hashmap struct. */
+
+struct Hashmap {
+ struct HashmapBase b;
+};
+
+struct OrderedHashmap {
+ struct HashmapBase b;
+ unsigned iterate_list_head, iterate_list_tail;
+};
+
+struct Set {
+ struct HashmapBase b;
+};
+
+typedef struct CacheMem {
+ const void **ptr;
+ size_t n_populated, n_allocated;
+ bool active:1;
+} CacheMem;
+
+struct IteratedCache {
+ HashmapBase *hashmap;
+ CacheMem keys, values;
+};
+
+DEFINE_MEMPOOL(hashmap_pool, Hashmap, 8);
+DEFINE_MEMPOOL(ordered_hashmap_pool, OrderedHashmap, 8);
+/* No need for a separate Set pool */
+assert_cc(sizeof(Hashmap) == sizeof(Set));
+
+struct hashmap_type_info {
+ size_t head_size;
+ size_t entry_size;
+ struct mempool *mempool;
+ unsigned n_direct_buckets;
+};
+
+static const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX] = {
+ [HASHMAP_TYPE_PLAIN] = {
+ .head_size = sizeof(Hashmap),
+ .entry_size = sizeof(struct plain_hashmap_entry),
+ .mempool = &hashmap_pool,
+ .n_direct_buckets = DIRECT_BUCKETS(struct plain_hashmap_entry),
+ },
+ [HASHMAP_TYPE_ORDERED] = {
+ .head_size = sizeof(OrderedHashmap),
+ .entry_size = sizeof(struct ordered_hashmap_entry),
+ .mempool = &ordered_hashmap_pool,
+ .n_direct_buckets = DIRECT_BUCKETS(struct ordered_hashmap_entry),
+ },
+ [HASHMAP_TYPE_SET] = {
+ .head_size = sizeof(Set),
+ .entry_size = sizeof(struct set_entry),
+ .mempool = &hashmap_pool,
+ .n_direct_buckets = DIRECT_BUCKETS(struct set_entry),
+ },
+};
+
+#if VALGRIND
+_destructor_ static void cleanup_pools(void) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ /* Be nice to valgrind */
+
+ /* The pool is only allocated by the main thread, but the memory can
+ * be passed to other threads. Let's clean up if we are the main thread
+ * and no other threads are live. */
+ if (!is_main_thread())
+ return;
+
+ r = get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t);
+ if (r < 0 || !streq(t, "1"))
+ return;
+
+ mempool_drop(&hashmap_pool);
+ mempool_drop(&ordered_hashmap_pool);
+}
+#endif
+
+static unsigned n_buckets(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.n_buckets
+ : hashmap_type_info[h->type].n_direct_buckets;
+}
+
+static unsigned n_entries(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.n_entries
+ : h->n_direct_entries;
+}
+
+static void n_entries_inc(HashmapBase *h) {
+ if (h->has_indirect)
+ h->indirect.n_entries++;
+ else
+ h->n_direct_entries++;
+}
+
+static void n_entries_dec(HashmapBase *h) {
+ if (h->has_indirect)
+ h->indirect.n_entries--;
+ else
+ h->n_direct_entries--;
+}
+
+static void *storage_ptr(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.storage
+ : h->direct.storage;
+}
+
+static uint8_t *hash_key(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.hash_key
+ : shared_hash_key;
+}
+
+static unsigned base_bucket_hash(HashmapBase *h, const void *p) {
+ struct siphash state;
+ uint64_t hash;
+
+ siphash24_init(&state, hash_key(h));
+
+ h->hash_ops->hash(p, &state);
+
+ hash = siphash24_finalize(&state);
+
+ return (unsigned) (hash % n_buckets(h));
+}
+#define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p)
+
+static void base_set_dirty(HashmapBase *h) {
+ h->dirty = true;
+}
+#define hashmap_set_dirty(h) base_set_dirty(HASHMAP_BASE(h))
+
+static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) {
+ static uint8_t current[HASH_KEY_SIZE];
+ static bool current_initialized = false;
+
+ /* Returns a hash function key to use. In order to keep things
+ * fast we will not generate a new key each time we allocate a
+ * new hash table. Instead, we'll just reuse the most recently
+ * generated one, except if we never generated one or when we
+ * are rehashing an entire hash table because we reached a
+ * fill level */
+
+ if (!current_initialized || !reuse_is_ok) {
+ random_bytes(current, sizeof(current));
+ current_initialized = true;
+ }
+
+ memcpy(hash_key, current, sizeof(current));
+}
+
+static struct hashmap_base_entry *bucket_at(HashmapBase *h, unsigned idx) {
+ return (struct hashmap_base_entry*)
+ ((uint8_t*) storage_ptr(h) + idx * hashmap_type_info[h->type].entry_size);
+}
+
+static struct plain_hashmap_entry *plain_bucket_at(Hashmap *h, unsigned idx) {
+ return (struct plain_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
+}
+
+static struct ordered_hashmap_entry *ordered_bucket_at(OrderedHashmap *h, unsigned idx) {
+ return (struct ordered_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
+}
+
+static struct set_entry *set_bucket_at(Set *h, unsigned idx) {
+ return (struct set_entry*) bucket_at(HASHMAP_BASE(h), idx);
+}
+
+static struct ordered_hashmap_entry *bucket_at_swap(struct swap_entries *swap, unsigned idx) {
+ return &swap->e[idx - _IDX_SWAP_BEGIN];
+}
+
+/* Returns a pointer to the bucket at index idx.
+ * Understands real indexes and swap indexes, hence "_virtual". */
+static struct hashmap_base_entry *bucket_at_virtual(HashmapBase *h, struct swap_entries *swap,
+ unsigned idx) {
+ if (idx < _IDX_SWAP_BEGIN)
+ return bucket_at(h, idx);
+
+ if (idx < _IDX_SWAP_END)
+ return &bucket_at_swap(swap, idx)->p.b;
+
+ assert_not_reached("Invalid index");
+}
+
+static dib_raw_t *dib_raw_ptr(HashmapBase *h) {
+ return (dib_raw_t*)
+ ((uint8_t*) storage_ptr(h) + hashmap_type_info[h->type].entry_size * n_buckets(h));
+}
+
+static unsigned bucket_distance(HashmapBase *h, unsigned idx, unsigned from) {
+ return idx >= from ? idx - from
+ : n_buckets(h) + idx - from;
+}
+
+static unsigned bucket_calculate_dib(HashmapBase *h, unsigned idx, dib_raw_t raw_dib) {
+ unsigned initial_bucket;
+
+ if (raw_dib == DIB_RAW_FREE)
+ return DIB_FREE;
+
+ if (_likely_(raw_dib < DIB_RAW_OVERFLOW))
+ return raw_dib;
+
+ /*
+ * Having an overflow DIB value is very unlikely. The hash function
+ * would have to be bad. For example, in a table of size 2^24 filled
+ * to load factor 0.9 the maximum observed DIB is only about 60.
+ * In theory (assuming I used Maxima correctly), for an infinite size
+ * hash table with load factor 0.8 the probability of a given entry
+ * having DIB > 40 is 1.9e-8.
+ * This returns the correct DIB value by recomputing the hash value in
+ * the unlikely case. XXX Hitting this case could be a hint to rehash.
+ */
+ initial_bucket = bucket_hash(h, bucket_at(h, idx)->key);
+ return bucket_distance(h, idx, initial_bucket);
+}
+
+static void bucket_set_dib(HashmapBase *h, unsigned idx, unsigned dib) {
+ dib_raw_ptr(h)[idx] = dib != DIB_FREE ? MIN(dib, DIB_RAW_OVERFLOW) : DIB_RAW_FREE;
+}
+
+static unsigned skip_free_buckets(HashmapBase *h, unsigned idx) {
+ dib_raw_t *dibs;
+
+ dibs = dib_raw_ptr(h);
+
+ for ( ; idx < n_buckets(h); idx++)
+ if (dibs[idx] != DIB_RAW_FREE)
+ return idx;
+
+ return IDX_NIL;
+}
+
+static void bucket_mark_free(HashmapBase *h, unsigned idx) {
+ memzero(bucket_at(h, idx), hashmap_type_info[h->type].entry_size);
+ bucket_set_dib(h, idx, DIB_FREE);
+}
+
+static void bucket_move_entry(HashmapBase *h, struct swap_entries *swap,
+ unsigned from, unsigned to) {
+ struct hashmap_base_entry *e_from, *e_to;
+
+ assert(from != to);
+
+ e_from = bucket_at_virtual(h, swap, from);
+ e_to = bucket_at_virtual(h, swap, to);
+
+ memcpy(e_to, e_from, hashmap_type_info[h->type].entry_size);
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+ struct ordered_hashmap_entry *le, *le_to;
+
+ le_to = (struct ordered_hashmap_entry*) e_to;
+
+ if (le_to->iterate_next != IDX_NIL) {
+ le = (struct ordered_hashmap_entry*)
+ bucket_at_virtual(h, swap, le_to->iterate_next);
+ le->iterate_previous = to;
+ }
+
+ if (le_to->iterate_previous != IDX_NIL) {
+ le = (struct ordered_hashmap_entry*)
+ bucket_at_virtual(h, swap, le_to->iterate_previous);
+ le->iterate_next = to;
+ }
+
+ if (lh->iterate_list_head == from)
+ lh->iterate_list_head = to;
+ if (lh->iterate_list_tail == from)
+ lh->iterate_list_tail = to;
+ }
+}
+
+static unsigned next_idx(HashmapBase *h, unsigned idx) {
+ return (idx + 1U) % n_buckets(h);
+}
+
+static unsigned prev_idx(HashmapBase *h, unsigned idx) {
+ return (n_buckets(h) + idx - 1U) % n_buckets(h);
+}
+
+static void *entry_value(HashmapBase *h, struct hashmap_base_entry *e) {
+ switch (h->type) {
+
+ case HASHMAP_TYPE_PLAIN:
+ case HASHMAP_TYPE_ORDERED:
+ return ((struct plain_hashmap_entry*)e)->value;
+
+ case HASHMAP_TYPE_SET:
+ return (void*) e->key;
+
+ default:
+ assert_not_reached("Unknown hashmap type");
+ }
+}
+
+static void base_remove_entry(HashmapBase *h, unsigned idx) {
+ unsigned left, right, prev, dib;
+ dib_raw_t raw_dib, *dibs;
+
+ dibs = dib_raw_ptr(h);
+ assert(dibs[idx] != DIB_RAW_FREE);
+
+#if ENABLE_DEBUG_HASHMAP
+ h->debug.rem_count++;
+ h->debug.last_rem_idx = idx;
+#endif
+
+ left = idx;
+ /* Find the stop bucket ("right"). It is either free or has DIB == 0. */
+ for (right = next_idx(h, left); ; right = next_idx(h, right)) {
+ raw_dib = dibs[right];
+ if (IN_SET(raw_dib, 0, DIB_RAW_FREE))
+ break;
+
+ /* The buckets are not supposed to be all occupied and with DIB > 0.
+ * That would mean we could make everyone better off by shifting them
+ * backward. This scenario is impossible. */
+ assert(left != right);
+ }
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+ struct ordered_hashmap_entry *le = ordered_bucket_at(lh, idx);
+
+ if (le->iterate_next != IDX_NIL)
+ ordered_bucket_at(lh, le->iterate_next)->iterate_previous = le->iterate_previous;
+ else
+ lh->iterate_list_tail = le->iterate_previous;
+
+ if (le->iterate_previous != IDX_NIL)
+ ordered_bucket_at(lh, le->iterate_previous)->iterate_next = le->iterate_next;
+ else
+ lh->iterate_list_head = le->iterate_next;
+ }
+
+ /* Now shift all buckets in the interval (left, right) one step backwards */
+ for (prev = left, left = next_idx(h, left); left != right;
+ prev = left, left = next_idx(h, left)) {
+ dib = bucket_calculate_dib(h, left, dibs[left]);
+ assert(dib != 0);
+ bucket_move_entry(h, NULL, left, prev);
+ bucket_set_dib(h, prev, dib - 1);
+ }
+
+ bucket_mark_free(h, prev);
+ n_entries_dec(h);
+ base_set_dirty(h);
+}
+#define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx)
+
+static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap *h, Iterator *i) {
+ struct ordered_hashmap_entry *e;
+ unsigned idx;
+
+ assert(h);
+ assert(i);
+
+ if (i->idx == IDX_NIL)
+ goto at_end;
+
+ if (i->idx == IDX_FIRST && h->iterate_list_head == IDX_NIL)
+ goto at_end;
+
+ if (i->idx == IDX_FIRST) {
+ idx = h->iterate_list_head;
+ e = ordered_bucket_at(h, idx);
+ } else {
+ idx = i->idx;
+ e = ordered_bucket_at(h, idx);
+ /*
+ * We allow removing the current entry while iterating, but removal may cause
+ * a backward shift. The next entry may thus move one bucket to the left.
+ * To detect when it happens, we remember the key pointer of the entry we were
+ * going to iterate next. If it does not match, there was a backward shift.
+ */
+ if (e->p.b.key != i->next_key) {
+ idx = prev_idx(HASHMAP_BASE(h), idx);
+ e = ordered_bucket_at(h, idx);
+ }
+ assert(e->p.b.key == i->next_key);
+ }
+
+#if ENABLE_DEBUG_HASHMAP
+ i->prev_idx = idx;
+#endif
+
+ if (e->iterate_next != IDX_NIL) {
+ struct ordered_hashmap_entry *n;
+ i->idx = e->iterate_next;
+ n = ordered_bucket_at(h, i->idx);
+ i->next_key = n->p.b.key;
+ } else
+ i->idx = IDX_NIL;
+
+ return idx;
+
+at_end:
+ i->idx = IDX_NIL;
+ return IDX_NIL;
+}
+
+static unsigned hashmap_iterate_in_internal_order(HashmapBase *h, Iterator *i) {
+ unsigned idx;
+
+ assert(h);
+ assert(i);
+
+ if (i->idx == IDX_NIL)
+ goto at_end;
+
+ if (i->idx == IDX_FIRST) {
+ /* fast forward to the first occupied bucket */
+ if (h->has_indirect) {
+ i->idx = skip_free_buckets(h, h->indirect.idx_lowest_entry);
+ h->indirect.idx_lowest_entry = i->idx;
+ } else
+ i->idx = skip_free_buckets(h, 0);
+
+ if (i->idx == IDX_NIL)
+ goto at_end;
+ } else {
+ struct hashmap_base_entry *e;
+
+ assert(i->idx > 0);
+
+ e = bucket_at(h, i->idx);
+ /*
+ * We allow removing the current entry while iterating, but removal may cause
+ * a backward shift. The next entry may thus move one bucket to the left.
+ * To detect when it happens, we remember the key pointer of the entry we were
+ * going to iterate next. If it does not match, there was a backward shift.
+ */
+ if (e->key != i->next_key)
+ e = bucket_at(h, --i->idx);
+
+ assert(e->key == i->next_key);
+ }
+
+ idx = i->idx;
+#if ENABLE_DEBUG_HASHMAP
+ i->prev_idx = idx;
+#endif
+
+ i->idx = skip_free_buckets(h, i->idx + 1);
+ if (i->idx != IDX_NIL)
+ i->next_key = bucket_at(h, i->idx)->key;
+ else
+ i->idx = IDX_NIL;
+
+ return idx;
+
+at_end:
+ i->idx = IDX_NIL;
+ return IDX_NIL;
+}
+
+static unsigned hashmap_iterate_entry(HashmapBase *h, Iterator *i) {
+ if (!h) {
+ i->idx = IDX_NIL;
+ return IDX_NIL;
+ }
+
+#if ENABLE_DEBUG_HASHMAP
+ if (i->idx == IDX_FIRST) {
+ i->put_count = h->debug.put_count;
+ i->rem_count = h->debug.rem_count;
+ } else {
+ /* While iterating, must not add any new entries */
+ assert(i->put_count == h->debug.put_count);
+ /* ... or remove entries other than the current one */
+ assert(i->rem_count == h->debug.rem_count ||
+ (i->rem_count == h->debug.rem_count - 1 &&
+ i->prev_idx == h->debug.last_rem_idx));
+ /* Reset our removals counter */
+ i->rem_count = h->debug.rem_count;
+ }
+#endif
+
+ return h->type == HASHMAP_TYPE_ORDERED ? hashmap_iterate_in_insertion_order((OrderedHashmap*) h, i)
+ : hashmap_iterate_in_internal_order(h, i);
+}
+
+bool internal_hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key) {
+ struct hashmap_base_entry *e;
+ void *data;
+ unsigned idx;
+
+ idx = hashmap_iterate_entry(h, i);
+ if (idx == IDX_NIL) {
+ if (value)
+ *value = NULL;
+ if (key)
+ *key = NULL;
+
+ return false;
+ }
+
+ e = bucket_at(h, idx);
+ data = entry_value(h, e);
+ if (value)
+ *value = data;
+ if (key)
+ *key = e->key;
+
+ return true;
+}
+
+bool set_iterate(Set *s, Iterator *i, void **value) {
+ return internal_hashmap_iterate(HASHMAP_BASE(s), i, value, NULL);
+}
+
+#define HASHMAP_FOREACH_IDX(idx, h, i) \
+ for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \
+ (idx != IDX_NIL); \
+ (idx) = hashmap_iterate_entry((h), &(i)))
+
+IteratedCache *internal_hashmap_iterated_cache_new(HashmapBase *h) {
+ IteratedCache *cache;
+
+ assert(h);
+ assert(!h->cached);
+
+ if (h->cached)
+ return NULL;
+
+ cache = new0(IteratedCache, 1);
+ if (!cache)
+ return NULL;
+
+ cache->hashmap = h;
+ h->cached = true;
+
+ return cache;
+}
+
+static void reset_direct_storage(HashmapBase *h) {
+ const struct hashmap_type_info *hi = &hashmap_type_info[h->type];
+ void *p;
+
+ assert(!h->has_indirect);
+
+ p = mempset(h->direct.storage, 0, hi->entry_size * hi->n_direct_buckets);
+ memset(p, DIB_RAW_INIT, sizeof(dib_raw_t) * hi->n_direct_buckets);
+}
+
+static struct HashmapBase *hashmap_base_new(const struct hash_ops *hash_ops, enum HashmapType type HASHMAP_DEBUG_PARAMS) {
+ HashmapBase *h;
+ const struct hashmap_type_info *hi = &hashmap_type_info[type];
+ bool up;
+
+ up = mempool_enabled();
+
+ h = up ? mempool_alloc0_tile(hi->mempool) : malloc0(hi->head_size);
+ if (!h)
+ return NULL;
+
+ h->type = type;
+ h->from_pool = up;
+ h->hash_ops = hash_ops ?: &trivial_hash_ops;
+
+ if (type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*)h;
+ lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
+ }
+
+ reset_direct_storage(h);
+
+ if (!shared_hash_key_initialized) {
+ random_bytes(shared_hash_key, sizeof(shared_hash_key));
+ shared_hash_key_initialized= true;
+ }
+
+#if ENABLE_DEBUG_HASHMAP
+ h->debug.func = func;
+ h->debug.file = file;
+ h->debug.line = line;
+ assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
+ LIST_PREPEND(debug_list, hashmap_debug_list, &h->debug);
+ assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
+#endif
+
+ return h;
+}
+
+Hashmap *internal_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return (Hashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
+}
+
+OrderedHashmap *internal_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return (OrderedHashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
+}
+
+Set *internal_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return (Set*) hashmap_base_new(hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
+}
+
+static int hashmap_base_ensure_allocated(HashmapBase **h, const struct hash_ops *hash_ops,
+ enum HashmapType type HASHMAP_DEBUG_PARAMS) {
+ HashmapBase *q;
+
+ assert(h);
+
+ if (*h)
+ return 0;
+
+ q = hashmap_base_new(hash_ops, type HASHMAP_DEBUG_PASS_ARGS);
+ if (!q)
+ return -ENOMEM;
+
+ *h = q;
+ return 0;
+}
+
+int internal_hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
+}
+
+int internal_ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
+}
+
+int internal_set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return hashmap_base_ensure_allocated((HashmapBase**)s, hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
+}
+
+static void hashmap_free_no_clear(HashmapBase *h) {
+ assert(!h->has_indirect);
+ assert(h->n_direct_entries == 0);
+
+#if ENABLE_DEBUG_HASHMAP
+ assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
+ LIST_REMOVE(debug_list, hashmap_debug_list, &h->debug);
+ assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
+#endif
+
+ if (h->from_pool) {
+ /* Ensure that the object didn't get migrated between threads. */
+ assert_se(is_main_thread());
+ mempool_free_tile(hashmap_type_info[h->type].mempool, h);
+ } else
+ free(h);
+}
+
+HashmapBase *internal_hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
+ if (h) {
+ internal_hashmap_clear(h, default_free_key, default_free_value);
+ hashmap_free_no_clear(h);
+ }
+
+ return NULL;
+}
+
+void internal_hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
+ free_func_t free_key, free_value;
+ if (!h)
+ return;
+
+ free_key = h->hash_ops->free_key ?: default_free_key;
+ free_value = h->hash_ops->free_value ?: default_free_value;
+
+ if (free_key || free_value) {
+
+ /* If destructor calls are defined, let's destroy things defensively: let's take the item out of the
+ * hash table, and only then call the destructor functions. If these destructors then try to unregister
+ * themselves from our hash table a second time, the entry is already gone. */
+
+ while (internal_hashmap_size(h) > 0) {
+ void *k = NULL;
+ void *v;
+
+ v = internal_hashmap_first_key_and_value(h, true, &k);
+
+ if (free_key)
+ free_key(k);
+
+ if (free_value)
+ free_value(v);
+ }
+ }
+
+ if (h->has_indirect) {
+ free(h->indirect.storage);
+ h->has_indirect = false;
+ }
+
+ h->n_direct_entries = 0;
+ reset_direct_storage(h);
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+ lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
+ }
+
+ base_set_dirty(h);
+}
+
+static int resize_buckets(HashmapBase *h, unsigned entries_add);
+
+/*
+ * Finds an empty bucket to put an entry into, starting the scan at 'idx'.
+ * Performs Robin Hood swaps as it goes. The entry to put must be placed
+ * by the caller into swap slot IDX_PUT.
+ * If used for in-place resizing, may leave a displaced entry in swap slot
+ * IDX_PUT. Caller must rehash it next.
+ * Returns: true if it left a displaced entry to rehash next in IDX_PUT,
+ * false otherwise.
+ */
+static bool hashmap_put_robin_hood(HashmapBase *h, unsigned idx,
+ struct swap_entries *swap) {
+ dib_raw_t raw_dib, *dibs;
+ unsigned dib, distance;
+
+#if ENABLE_DEBUG_HASHMAP
+ h->debug.put_count++;
+#endif
+
+ dibs = dib_raw_ptr(h);
+
+ for (distance = 0; ; distance++) {
+ raw_dib = dibs[idx];
+ if (IN_SET(raw_dib, DIB_RAW_FREE, DIB_RAW_REHASH)) {
+ if (raw_dib == DIB_RAW_REHASH)
+ bucket_move_entry(h, swap, idx, IDX_TMP);
+
+ if (h->has_indirect && h->indirect.idx_lowest_entry > idx)
+ h->indirect.idx_lowest_entry = idx;
+
+ bucket_set_dib(h, idx, distance);
+ bucket_move_entry(h, swap, IDX_PUT, idx);
+ if (raw_dib == DIB_RAW_REHASH) {
+ bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
+ return true;
+ }
+
+ return false;
+ }
+
+ dib = bucket_calculate_dib(h, idx, raw_dib);
+
+ if (dib < distance) {
+ /* Found a wealthier entry. Go Robin Hood! */
+ bucket_set_dib(h, idx, distance);
+
+ /* swap the entries */
+ bucket_move_entry(h, swap, idx, IDX_TMP);
+ bucket_move_entry(h, swap, IDX_PUT, idx);
+ bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
+
+ distance = dib;
+ }
+
+ idx = next_idx(h, idx);
+ }
+}
+
+/*
+ * Puts an entry into a hashmap, boldly - no check whether key already exists.
+ * The caller must place the entry (only its key and value, not link indexes)
+ * in swap slot IDX_PUT.
+ * Caller must ensure: the key does not exist yet in the hashmap.
+ * that resize is not needed if !may_resize.
+ * Returns: 1 if entry was put successfully.
+ * -ENOMEM if may_resize==true and resize failed with -ENOMEM.
+ * Cannot return -ENOMEM if !may_resize.
+ */
+static int hashmap_base_put_boldly(HashmapBase *h, unsigned idx,
+ struct swap_entries *swap, bool may_resize) {
+ struct ordered_hashmap_entry *new_entry;
+ int r;
+
+ assert(idx < n_buckets(h));
+
+ new_entry = bucket_at_swap(swap, IDX_PUT);
+
+ if (may_resize) {
+ r = resize_buckets(h, 1);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ idx = bucket_hash(h, new_entry->p.b.key);
+ }
+ assert(n_entries(h) < n_buckets(h));
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+
+ new_entry->iterate_next = IDX_NIL;
+ new_entry->iterate_previous = lh->iterate_list_tail;
+
+ if (lh->iterate_list_tail != IDX_NIL) {
+ struct ordered_hashmap_entry *old_tail;
+
+ old_tail = ordered_bucket_at(lh, lh->iterate_list_tail);
+ assert(old_tail->iterate_next == IDX_NIL);
+ old_tail->iterate_next = IDX_PUT;
+ }
+
+ lh->iterate_list_tail = IDX_PUT;
+ if (lh->iterate_list_head == IDX_NIL)
+ lh->iterate_list_head = IDX_PUT;
+ }
+
+ assert_se(hashmap_put_robin_hood(h, idx, swap) == false);
+
+ n_entries_inc(h);
+#if ENABLE_DEBUG_HASHMAP
+ h->debug.max_entries = MAX(h->debug.max_entries, n_entries(h));
+#endif
+
+ base_set_dirty(h);
+
+ return 1;
+}
+#define hashmap_put_boldly(h, idx, swap, may_resize) \
+ hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize)
+
+/*
+ * Returns 0 if resize is not needed.
+ * 1 if successfully resized.
+ * -ENOMEM on allocation failure.
+ */
+static int resize_buckets(HashmapBase *h, unsigned entries_add) {
+ struct swap_entries swap;
+ void *new_storage;
+ dib_raw_t *old_dibs, *new_dibs;
+ const struct hashmap_type_info *hi;
+ unsigned idx, optimal_idx;
+ unsigned old_n_buckets, new_n_buckets, n_rehashed, new_n_entries;
+ uint8_t new_shift;
+ bool rehash_next;
+
+ assert(h);
+
+ hi = &hashmap_type_info[h->type];
+ new_n_entries = n_entries(h) + entries_add;
+
+ /* overflow? */
+ if (_unlikely_(new_n_entries < entries_add))
+ return -ENOMEM;
+
+ /* For direct storage we allow 100% load, because it's tiny. */
+ if (!h->has_indirect && new_n_entries <= hi->n_direct_buckets)
+ return 0;
+
+ /*
+ * Load factor = n/m = 1 - (1/INV_KEEP_FREE).
+ * From it follows: m = n + n/(INV_KEEP_FREE - 1)
+ */
+ new_n_buckets = new_n_entries + new_n_entries / (INV_KEEP_FREE - 1);
+ /* overflow? */
+ if (_unlikely_(new_n_buckets < new_n_entries))
+ return -ENOMEM;
+
+ if (_unlikely_(new_n_buckets > UINT_MAX / (hi->entry_size + sizeof(dib_raw_t))))
+ return -ENOMEM;
+
+ old_n_buckets = n_buckets(h);
+
+ if (_likely_(new_n_buckets <= old_n_buckets))
+ return 0;
+
+ new_shift = log2u_round_up(MAX(
+ new_n_buckets * (hi->entry_size + sizeof(dib_raw_t)),
+ 2 * sizeof(struct direct_storage)));
+
+ /* Realloc storage (buckets and DIB array). */
+ new_storage = realloc(h->has_indirect ? h->indirect.storage : NULL,
+ 1U << new_shift);
+ if (!new_storage)
+ return -ENOMEM;
+
+ /* Must upgrade direct to indirect storage. */
+ if (!h->has_indirect) {
+ memcpy(new_storage, h->direct.storage,
+ old_n_buckets * (hi->entry_size + sizeof(dib_raw_t)));
+ h->indirect.n_entries = h->n_direct_entries;
+ h->indirect.idx_lowest_entry = 0;
+ h->n_direct_entries = 0;
+ }
+
+ /* Get a new hash key. If we've just upgraded to indirect storage,
+ * allow reusing a previously generated key. It's still a different key
+ * from the shared one that we used for direct storage. */
+ get_hash_key(h->indirect.hash_key, !h->has_indirect);
+
+ h->has_indirect = true;
+ h->indirect.storage = new_storage;
+ h->indirect.n_buckets = (1U << new_shift) /
+ (hi->entry_size + sizeof(dib_raw_t));
+
+ old_dibs = (dib_raw_t*)((uint8_t*) new_storage + hi->entry_size * old_n_buckets);
+ new_dibs = dib_raw_ptr(h);
+
+ /*
+ * Move the DIB array to the new place, replacing valid DIB values with
+ * DIB_RAW_REHASH to indicate all of the used buckets need rehashing.
+ * Note: Overlap is not possible, because we have at least doubled the
+ * number of buckets and dib_raw_t is smaller than any entry type.
+ */
+ for (idx = 0; idx < old_n_buckets; idx++) {
+ assert(old_dibs[idx] != DIB_RAW_REHASH);
+ new_dibs[idx] = old_dibs[idx] == DIB_RAW_FREE ? DIB_RAW_FREE
+ : DIB_RAW_REHASH;
+ }
+
+ /* Zero the area of newly added entries (including the old DIB area) */
+ memzero(bucket_at(h, old_n_buckets),
+ (n_buckets(h) - old_n_buckets) * hi->entry_size);
+
+ /* The upper half of the new DIB array needs initialization */
+ memset(&new_dibs[old_n_buckets], DIB_RAW_INIT,
+ (n_buckets(h) - old_n_buckets) * sizeof(dib_raw_t));
+
+ /* Rehash entries that need it */
+ n_rehashed = 0;
+ for (idx = 0; idx < old_n_buckets; idx++) {
+ if (new_dibs[idx] != DIB_RAW_REHASH)
+ continue;
+
+ optimal_idx = bucket_hash(h, bucket_at(h, idx)->key);
+
+ /*
+ * Not much to do if by luck the entry hashes to its current
+ * location. Just set its DIB.
+ */
+ if (optimal_idx == idx) {
+ new_dibs[idx] = 0;
+ n_rehashed++;
+ continue;
+ }
+
+ new_dibs[idx] = DIB_RAW_FREE;
+ bucket_move_entry(h, &swap, idx, IDX_PUT);
+ /* bucket_move_entry does not clear the source */
+ memzero(bucket_at(h, idx), hi->entry_size);
+
+ do {
+ /*
+ * Find the new bucket for the current entry. This may make
+ * another entry homeless and load it into IDX_PUT.
+ */
+ rehash_next = hashmap_put_robin_hood(h, optimal_idx, &swap);
+ n_rehashed++;
+
+ /* Did the current entry displace another one? */
+ if (rehash_next)
+ optimal_idx = bucket_hash(h, bucket_at_swap(&swap, IDX_PUT)->p.b.key);
+ } while (rehash_next);
+ }
+
+ assert(n_rehashed == n_entries(h));
+
+ return 1;
+}
+
+/*
+ * Finds an entry with a matching key
+ * Returns: index of the found entry, or IDX_NIL if not found.
+ */
+static unsigned base_bucket_scan(HashmapBase *h, unsigned idx, const void *key) {
+ struct hashmap_base_entry *e;
+ unsigned dib, distance;
+ dib_raw_t *dibs = dib_raw_ptr(h);
+
+ assert(idx < n_buckets(h));
+
+ for (distance = 0; ; distance++) {
+ if (dibs[idx] == DIB_RAW_FREE)
+ return IDX_NIL;
+
+ dib = bucket_calculate_dib(h, idx, dibs[idx]);
+
+ if (dib < distance)
+ return IDX_NIL;
+ if (dib == distance) {
+ e = bucket_at(h, idx);
+ if (h->hash_ops->compare(e->key, key) == 0)
+ return idx;
+ }
+
+ idx = next_idx(h, idx);
+ }
+}
+#define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key)
+
+int hashmap_put(Hashmap *h, const void *key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ assert(h);
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx != IDX_NIL) {
+ e = plain_bucket_at(h, idx);
+ if (e->value == value)
+ return 0;
+ return -EEXIST;
+ }
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = key;
+ e->value = value;
+ return hashmap_put_boldly(h, hash, &swap, true);
+}
+
+int set_put(Set *s, const void *key) {
+ struct swap_entries swap;
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+
+ assert(s);
+
+ hash = bucket_hash(s, key);
+ idx = bucket_scan(s, hash, key);
+ if (idx != IDX_NIL)
+ return 0;
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ e->key = key;
+ return hashmap_put_boldly(s, hash, &swap, true);
+}
+
+int hashmap_replace(Hashmap *h, const void *key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ assert(h);
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx != IDX_NIL) {
+ e = plain_bucket_at(h, idx);
+#if ENABLE_DEBUG_HASHMAP
+ /* Although the key is equal, the key pointer may have changed,
+ * and this would break our assumption for iterating. So count
+ * this operation as incompatible with iteration. */
+ if (e->b.key != key) {
+ h->b.debug.put_count++;
+ h->b.debug.rem_count++;
+ h->b.debug.last_rem_idx = idx;
+ }
+#endif
+ e->b.key = key;
+ e->value = value;
+ hashmap_set_dirty(h);
+
+ return 0;
+ }
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = key;
+ e->value = value;
+ return hashmap_put_boldly(h, hash, &swap, true);
+}
+
+int hashmap_update(Hashmap *h, const void *key, void *value) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ assert(h);
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ e = plain_bucket_at(h, idx);
+ e->value = value;
+ hashmap_set_dirty(h);
+
+ return 0;
+}
+
+void *internal_hashmap_get(HashmapBase *h, const void *key) {
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ return entry_value(h, e);
+}
+
+void *hashmap_get2(Hashmap *h, const void *key, void **key2) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = plain_bucket_at(h, idx);
+ if (key2)
+ *key2 = (void*) e->b.key;
+
+ return e->value;
+}
+
+bool internal_hashmap_contains(HashmapBase *h, const void *key) {
+ unsigned hash;
+
+ if (!h)
+ return false;
+
+ hash = bucket_hash(h, key);
+ return bucket_scan(h, hash, key) != IDX_NIL;
+}
+
+void *internal_hashmap_remove(HashmapBase *h, const void *key) {
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+ void *data;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ data = entry_value(h, e);
+ remove_entry(h, idx);
+
+ return data;
+}
+
+void *hashmap_remove2(Hashmap *h, const void *key, void **rkey) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+ void *data;
+
+ if (!h) {
+ if (rkey)
+ *rkey = NULL;
+ return NULL;
+ }
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL) {
+ if (rkey)
+ *rkey = NULL;
+ return NULL;
+ }
+
+ e = plain_bucket_at(h, idx);
+ data = e->value;
+ if (rkey)
+ *rkey = (void*) e->b.key;
+
+ remove_entry(h, idx);
+
+ return data;
+}
+
+int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned old_hash, new_hash, idx;
+
+ if (!h)
+ return -ENOENT;
+
+ old_hash = bucket_hash(h, old_key);
+ idx = bucket_scan(h, old_hash, old_key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ new_hash = bucket_hash(h, new_key);
+ if (bucket_scan(h, new_hash, new_key) != IDX_NIL)
+ return -EEXIST;
+
+ remove_entry(h, idx);
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = new_key;
+ e->value = value;
+ assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
+
+ return 0;
+}
+
+int set_remove_and_put(Set *s, const void *old_key, const void *new_key) {
+ struct swap_entries swap;
+ struct hashmap_base_entry *e;
+ unsigned old_hash, new_hash, idx;
+
+ if (!s)
+ return -ENOENT;
+
+ old_hash = bucket_hash(s, old_key);
+ idx = bucket_scan(s, old_hash, old_key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ new_hash = bucket_hash(s, new_key);
+ if (bucket_scan(s, new_hash, new_key) != IDX_NIL)
+ return -EEXIST;
+
+ remove_entry(s, idx);
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ e->key = new_key;
+ assert_se(hashmap_put_boldly(s, new_hash, &swap, false) == 1);
+
+ return 0;
+}
+
+int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned old_hash, new_hash, idx_old, idx_new;
+
+ if (!h)
+ return -ENOENT;
+
+ old_hash = bucket_hash(h, old_key);
+ idx_old = bucket_scan(h, old_hash, old_key);
+ if (idx_old == IDX_NIL)
+ return -ENOENT;
+
+ old_key = bucket_at(HASHMAP_BASE(h), idx_old)->key;
+
+ new_hash = bucket_hash(h, new_key);
+ idx_new = bucket_scan(h, new_hash, new_key);
+ if (idx_new != IDX_NIL)
+ if (idx_old != idx_new) {
+ remove_entry(h, idx_new);
+ /* Compensate for a possible backward shift. */
+ if (old_key != bucket_at(HASHMAP_BASE(h), idx_old)->key)
+ idx_old = prev_idx(HASHMAP_BASE(h), idx_old);
+ assert(old_key == bucket_at(HASHMAP_BASE(h), idx_old)->key);
+ }
+
+ remove_entry(h, idx_old);
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = new_key;
+ e->value = value;
+ assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
+
+ return 0;
+}
+
+void *internal_hashmap_remove_value(HashmapBase *h, const void *key, void *value) {
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ if (entry_value(h, e) != value)
+ return NULL;
+
+ remove_entry(h, idx);
+
+ return value;
+}
+
+static unsigned find_first_entry(HashmapBase *h) {
+ Iterator i = ITERATOR_FIRST;
+
+ if (!h || !n_entries(h))
+ return IDX_NIL;
+
+ return hashmap_iterate_entry(h, &i);
+}
+
+void *internal_hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key) {
+ struct hashmap_base_entry *e;
+ void *key, *data;
+ unsigned idx;
+
+ idx = find_first_entry(h);
+ if (idx == IDX_NIL) {
+ if (ret_key)
+ *ret_key = NULL;
+ return NULL;
+ }
+
+ e = bucket_at(h, idx);
+ key = (void*) e->key;
+ data = entry_value(h, e);
+
+ if (remove)
+ remove_entry(h, idx);
+
+ if (ret_key)
+ *ret_key = key;
+
+ return data;
+}
+
+unsigned internal_hashmap_size(HashmapBase *h) {
+
+ if (!h)
+ return 0;
+
+ return n_entries(h);
+}
+
+unsigned internal_hashmap_buckets(HashmapBase *h) {
+
+ if (!h)
+ return 0;
+
+ return n_buckets(h);
+}
+
+int internal_hashmap_merge(Hashmap *h, Hashmap *other) {
+ Iterator i;
+ unsigned idx;
+
+ assert(h);
+
+ HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
+ struct plain_hashmap_entry *pe = plain_bucket_at(other, idx);
+ int r;
+
+ r = hashmap_put(h, pe->b.key, pe->value);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ return 0;
+}
+
+int set_merge(Set *s, Set *other) {
+ Iterator i;
+ unsigned idx;
+
+ assert(s);
+
+ HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
+ struct set_entry *se = set_bucket_at(other, idx);
+ int r;
+
+ r = set_put(s, se->b.key);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int internal_hashmap_reserve(HashmapBase *h, unsigned entries_add) {
+ int r;
+
+ assert(h);
+
+ r = resize_buckets(h, entries_add);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+/*
+ * The same as hashmap_merge(), but every new item from other is moved to h.
+ * Keys already in h are skipped and stay in other.
+ * Returns: 0 on success.
+ * -ENOMEM on alloc failure, in which case no move has been done.
+ */
+int internal_hashmap_move(HashmapBase *h, HashmapBase *other) {
+ struct swap_entries swap;
+ struct hashmap_base_entry *e, *n;
+ Iterator i;
+ unsigned idx;
+ int r;
+
+ assert(h);
+
+ if (!other)
+ return 0;
+
+ assert(other->type == h->type);
+
+ /*
+ * This reserves buckets for the worst case, where none of other's
+ * entries are yet present in h. This is preferable to risking
+ * an allocation failure in the middle of the moving and having to
+ * rollback or return a partial result.
+ */
+ r = resize_buckets(h, n_entries(other));
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_IDX(idx, other, i) {
+ unsigned h_hash;
+
+ e = bucket_at(other, idx);
+ h_hash = bucket_hash(h, e->key);
+ if (bucket_scan(h, h_hash, e->key) != IDX_NIL)
+ continue;
+
+ n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ n->key = e->key;
+ if (h->type != HASHMAP_TYPE_SET)
+ ((struct plain_hashmap_entry*) n)->value =
+ ((struct plain_hashmap_entry*) e)->value;
+ assert_se(hashmap_put_boldly(h, h_hash, &swap, false) == 1);
+
+ remove_entry(other, idx);
+ }
+
+ return 0;
+}
+
+int internal_hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key) {
+ struct swap_entries swap;
+ unsigned h_hash, other_hash, idx;
+ struct hashmap_base_entry *e, *n;
+ int r;
+
+ assert(h);
+
+ h_hash = bucket_hash(h, key);
+ if (bucket_scan(h, h_hash, key) != IDX_NIL)
+ return -EEXIST;
+
+ if (!other)
+ return -ENOENT;
+
+ assert(other->type == h->type);
+
+ other_hash = bucket_hash(other, key);
+ idx = bucket_scan(other, other_hash, key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ e = bucket_at(other, idx);
+
+ n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ n->key = e->key;
+ if (h->type != HASHMAP_TYPE_SET)
+ ((struct plain_hashmap_entry*) n)->value =
+ ((struct plain_hashmap_entry*) e)->value;
+ r = hashmap_put_boldly(h, h_hash, &swap, true);
+ if (r < 0)
+ return r;
+
+ remove_entry(other, idx);
+ return 0;
+}
+
+HashmapBase *internal_hashmap_copy(HashmapBase *h) {
+ HashmapBase *copy;
+ int r;
+
+ assert(h);
+
+ copy = hashmap_base_new(h->hash_ops, h->type HASHMAP_DEBUG_SRC_ARGS);
+ if (!copy)
+ return NULL;
+
+ switch (h->type) {
+ case HASHMAP_TYPE_PLAIN:
+ case HASHMAP_TYPE_ORDERED:
+ r = hashmap_merge((Hashmap*)copy, (Hashmap*)h);
+ break;
+ case HASHMAP_TYPE_SET:
+ r = set_merge((Set*)copy, (Set*)h);
+ break;
+ default:
+ assert_not_reached("Unknown hashmap type");
+ }
+
+ if (r < 0) {
+ internal_hashmap_free(copy, false, false);
+ return NULL;
+ }
+
+ return copy;
+}
+
+char **internal_hashmap_get_strv(HashmapBase *h) {
+ char **sv;
+ Iterator i;
+ unsigned idx, n;
+
+ sv = new(char*, n_entries(h)+1);
+ if (!sv)
+ return NULL;
+
+ n = 0;
+ HASHMAP_FOREACH_IDX(idx, h, i)
+ sv[n++] = entry_value(h, bucket_at(h, idx));
+ sv[n] = NULL;
+
+ return sv;
+}
+
+void *ordered_hashmap_next(OrderedHashmap *h, const void *key) {
+ struct ordered_hashmap_entry *e;
+ unsigned hash, idx;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = ordered_bucket_at(h, idx);
+ if (e->iterate_next == IDX_NIL)
+ return NULL;
+ return ordered_bucket_at(h, e->iterate_next)->p.value;
+}
+
+int set_consume(Set *s, void *value) {
+ int r;
+
+ assert(s);
+ assert(value);
+
+ r = set_put(s, value);
+ if (r <= 0)
+ free(value);
+
+ return r;
+}
+
+int set_put_strdup(Set *s, const char *p) {
+ char *c;
+
+ assert(s);
+ assert(p);
+
+ if (set_contains(s, (char*) p))
+ return 0;
+
+ c = strdup(p);
+ if (!c)
+ return -ENOMEM;
+
+ return set_consume(s, c);
+}
+
+int set_put_strdupv(Set *s, char **l) {
+ int n = 0, r;
+ char **i;
+
+ assert(s);
+
+ STRV_FOREACH(i, l) {
+ r = set_put_strdup(s, *i);
+ if (r < 0)
+ return r;
+
+ n += r;
+ }
+
+ return n;
+}
+
+int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags) {
+ const char *p = v;
+ int r;
+
+ assert(s);
+ assert(v);
+
+ for (;;) {
+ char *word;
+
+ r = extract_first_word(&p, &word, separators, flags);
+ if (r <= 0)
+ return r;
+
+ r = set_consume(s, word);
+ if (r < 0)
+ return r;
+ }
+}
+
+/* expand the cachemem if needed, return true if newly (re)activated. */
+static int cachemem_maintain(CacheMem *mem, unsigned size) {
+ assert(mem);
+
+ if (!GREEDY_REALLOC(mem->ptr, mem->n_allocated, size)) {
+ if (size > 0)
+ return -ENOMEM;
+ }
+
+ if (!mem->active) {
+ mem->active = true;
+ return true;
+ }
+
+ return false;
+}
+
+int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries) {
+ bool sync_keys = false, sync_values = false;
+ unsigned size;
+ int r;
+
+ assert(cache);
+ assert(cache->hashmap);
+
+ size = n_entries(cache->hashmap);
+
+ if (res_keys) {
+ r = cachemem_maintain(&cache->keys, size);
+ if (r < 0)
+ return r;
+
+ sync_keys = r;
+ } else
+ cache->keys.active = false;
+
+ if (res_values) {
+ r = cachemem_maintain(&cache->values, size);
+ if (r < 0)
+ return r;
+
+ sync_values = r;
+ } else
+ cache->values.active = false;
+
+ if (cache->hashmap->dirty) {
+ if (cache->keys.active)
+ sync_keys = true;
+ if (cache->values.active)
+ sync_values = true;
+
+ cache->hashmap->dirty = false;
+ }
+
+ if (sync_keys || sync_values) {
+ unsigned i, idx;
+ Iterator iter;
+
+ i = 0;
+ HASHMAP_FOREACH_IDX(idx, cache->hashmap, iter) {
+ struct hashmap_base_entry *e;
+
+ e = bucket_at(cache->hashmap, idx);
+
+ if (sync_keys)
+ cache->keys.ptr[i] = e->key;
+ if (sync_values)
+ cache->values.ptr[i] = entry_value(cache->hashmap, e);
+ i++;
+ }
+ }
+
+ if (res_keys)
+ *res_keys = cache->keys.ptr;
+ if (res_values)
+ *res_values = cache->values.ptr;
+ if (res_n_entries)
+ *res_n_entries = size;
+
+ return 0;
+}
+
+IteratedCache *iterated_cache_free(IteratedCache *cache) {
+ if (cache) {
+ free(cache->keys.ptr);
+ free(cache->values.ptr);
+ free(cache);
+ }
+
+ return NULL;
+}
diff --git a/src/basic/hashmap.h b/src/basic/hashmap.h
new file mode 100644
index 0000000..e16a9f9
--- /dev/null
+++ b/src/basic/hashmap.h
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "hash-funcs.h"
+#include "macro.h"
+#include "util.h"
+
+/*
+ * A hash table implementation. As a minor optimization a NULL hashmap object
+ * will be treated as empty hashmap for all read operations. That way it is not
+ * necessary to instantiate an object for each Hashmap use.
+ *
+ * If ENABLE_DEBUG_HASHMAP is defined (by configuring with --enable-debug=hashmap),
+ * the implementation will:
+ * - store extra data for debugging and statistics (see tools/gdb-sd_dump_hashmaps.py)
+ * - perform extra checks for invalid use of iterators
+ */
+
+#define HASH_KEY_SIZE 16
+
+typedef void* (*hashmap_destroy_t)(void *p);
+
+/* The base type for all hashmap and set types. Many functions in the
+ * implementation take (HashmapBase*) parameters and are run-time polymorphic,
+ * though the API is not meant to be polymorphic (do not call functions
+ * internal_*() directly). */
+typedef struct HashmapBase HashmapBase;
+
+/* Specific hashmap/set types */
+typedef struct Hashmap Hashmap; /* Maps keys to values */
+typedef struct OrderedHashmap OrderedHashmap; /* Like Hashmap, but also remembers entry insertion order */
+typedef struct Set Set; /* Stores just keys */
+
+typedef struct IteratedCache IteratedCache; /* Caches the iterated order of one of the above */
+
+/* Ideally the Iterator would be an opaque struct, but it is instantiated
+ * by hashmap users, so the definition has to be here. Do not use its fields
+ * directly. */
+typedef struct {
+ unsigned idx; /* index of an entry to be iterated next */
+ const void *next_key; /* expected value of that entry's key pointer */
+#if ENABLE_DEBUG_HASHMAP
+ unsigned put_count; /* hashmap's put_count recorded at start of iteration */
+ unsigned rem_count; /* hashmap's rem_count in previous iteration */
+ unsigned prev_idx; /* idx in previous iteration */
+#endif
+} Iterator;
+
+#define _IDX_ITERATOR_FIRST (UINT_MAX - 1)
+#define ITERATOR_FIRST ((Iterator) { .idx = _IDX_ITERATOR_FIRST, .next_key = NULL })
+
+/* Macros for type checking */
+#define PTR_COMPATIBLE_WITH_HASHMAP_BASE(h) \
+ (__builtin_types_compatible_p(typeof(h), HashmapBase*) || \
+ __builtin_types_compatible_p(typeof(h), Hashmap*) || \
+ __builtin_types_compatible_p(typeof(h), OrderedHashmap*) || \
+ __builtin_types_compatible_p(typeof(h), Set*))
+
+#define PTR_COMPATIBLE_WITH_PLAIN_HASHMAP(h) \
+ (__builtin_types_compatible_p(typeof(h), Hashmap*) || \
+ __builtin_types_compatible_p(typeof(h), OrderedHashmap*)) \
+
+#define HASHMAP_BASE(h) \
+ __builtin_choose_expr(PTR_COMPATIBLE_WITH_HASHMAP_BASE(h), \
+ (HashmapBase*)(h), \
+ (void)0)
+
+#define PLAIN_HASHMAP(h) \
+ __builtin_choose_expr(PTR_COMPATIBLE_WITH_PLAIN_HASHMAP(h), \
+ (Hashmap*)(h), \
+ (void)0)
+
+#if ENABLE_DEBUG_HASHMAP
+# define HASHMAP_DEBUG_PARAMS , const char *func, const char *file, int line
+# define HASHMAP_DEBUG_SRC_ARGS , __func__, __FILE__, __LINE__
+# define HASHMAP_DEBUG_PASS_ARGS , func, file, line
+#else
+# define HASHMAP_DEBUG_PARAMS
+# define HASHMAP_DEBUG_SRC_ARGS
+# define HASHMAP_DEBUG_PASS_ARGS
+#endif
+
+Hashmap *internal_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+OrderedHashmap *internal_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+#define hashmap_new(ops) internal_hashmap_new(ops HASHMAP_DEBUG_SRC_ARGS)
+#define ordered_hashmap_new(ops) internal_ordered_hashmap_new(ops HASHMAP_DEBUG_SRC_ARGS)
+
+HashmapBase *internal_hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value);
+static inline Hashmap *hashmap_free(Hashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), NULL, NULL);
+}
+static inline OrderedHashmap *ordered_hashmap_free(OrderedHashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), NULL, NULL);
+}
+
+static inline Hashmap *hashmap_free_free(Hashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), NULL, free);
+}
+static inline OrderedHashmap *ordered_hashmap_free_free(OrderedHashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), NULL, free);
+}
+
+static inline Hashmap *hashmap_free_free_key(Hashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), free, NULL);
+}
+static inline OrderedHashmap *ordered_hashmap_free_free_key(OrderedHashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), free, NULL);
+}
+
+static inline Hashmap *hashmap_free_free_free(Hashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), free, free);
+}
+static inline OrderedHashmap *ordered_hashmap_free_free_free(OrderedHashmap *h) {
+ return (void*) internal_hashmap_free(HASHMAP_BASE(h), free, free);
+}
+
+IteratedCache *iterated_cache_free(IteratedCache *cache);
+int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries);
+
+HashmapBase *internal_hashmap_copy(HashmapBase *h);
+static inline Hashmap *hashmap_copy(Hashmap *h) {
+ return (Hashmap*) internal_hashmap_copy(HASHMAP_BASE(h));
+}
+static inline OrderedHashmap *ordered_hashmap_copy(OrderedHashmap *h) {
+ return (OrderedHashmap*) internal_hashmap_copy(HASHMAP_BASE(h));
+}
+
+int internal_hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+int internal_ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+#define hashmap_ensure_allocated(h, ops) internal_hashmap_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS)
+#define ordered_hashmap_ensure_allocated(h, ops) internal_ordered_hashmap_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS)
+
+IteratedCache *internal_hashmap_iterated_cache_new(HashmapBase *h);
+static inline IteratedCache *hashmap_iterated_cache_new(Hashmap *h) {
+ return (IteratedCache*) internal_hashmap_iterated_cache_new(HASHMAP_BASE(h));
+}
+static inline IteratedCache *ordered_hashmap_iterated_cache_new(OrderedHashmap *h) {
+ return (IteratedCache*) internal_hashmap_iterated_cache_new(HASHMAP_BASE(h));
+}
+
+int hashmap_put(Hashmap *h, const void *key, void *value);
+static inline int ordered_hashmap_put(OrderedHashmap *h, const void *key, void *value) {
+ return hashmap_put(PLAIN_HASHMAP(h), key, value);
+}
+
+int hashmap_update(Hashmap *h, const void *key, void *value);
+static inline int ordered_hashmap_update(OrderedHashmap *h, const void *key, void *value) {
+ return hashmap_update(PLAIN_HASHMAP(h), key, value);
+}
+
+int hashmap_replace(Hashmap *h, const void *key, void *value);
+static inline int ordered_hashmap_replace(OrderedHashmap *h, const void *key, void *value) {
+ return hashmap_replace(PLAIN_HASHMAP(h), key, value);
+}
+
+void *internal_hashmap_get(HashmapBase *h, const void *key);
+static inline void *hashmap_get(Hashmap *h, const void *key) {
+ return internal_hashmap_get(HASHMAP_BASE(h), key);
+}
+static inline void *ordered_hashmap_get(OrderedHashmap *h, const void *key) {
+ return internal_hashmap_get(HASHMAP_BASE(h), key);
+}
+
+void *hashmap_get2(Hashmap *h, const void *key, void **rkey);
+static inline void *ordered_hashmap_get2(OrderedHashmap *h, const void *key, void **rkey) {
+ return hashmap_get2(PLAIN_HASHMAP(h), key, rkey);
+}
+
+bool internal_hashmap_contains(HashmapBase *h, const void *key);
+static inline bool hashmap_contains(Hashmap *h, const void *key) {
+ return internal_hashmap_contains(HASHMAP_BASE(h), key);
+}
+static inline bool ordered_hashmap_contains(OrderedHashmap *h, const void *key) {
+ return internal_hashmap_contains(HASHMAP_BASE(h), key);
+}
+
+void *internal_hashmap_remove(HashmapBase *h, const void *key);
+static inline void *hashmap_remove(Hashmap *h, const void *key) {
+ return internal_hashmap_remove(HASHMAP_BASE(h), key);
+}
+static inline void *ordered_hashmap_remove(OrderedHashmap *h, const void *key) {
+ return internal_hashmap_remove(HASHMAP_BASE(h), key);
+}
+
+void *hashmap_remove2(Hashmap *h, const void *key, void **rkey);
+static inline void *ordered_hashmap_remove2(OrderedHashmap *h, const void *key, void **rkey) {
+ return hashmap_remove2(PLAIN_HASHMAP(h), key, rkey);
+}
+
+void *internal_hashmap_remove_value(HashmapBase *h, const void *key, void *value);
+static inline void *hashmap_remove_value(Hashmap *h, const void *key, void *value) {
+ return internal_hashmap_remove_value(HASHMAP_BASE(h), key, value);
+}
+
+static inline void *ordered_hashmap_remove_value(OrderedHashmap *h, const void *key, void *value) {
+ return hashmap_remove_value(PLAIN_HASHMAP(h), key, value);
+}
+
+int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value);
+static inline int ordered_hashmap_remove_and_put(OrderedHashmap *h, const void *old_key, const void *new_key, void *value) {
+ return hashmap_remove_and_put(PLAIN_HASHMAP(h), old_key, new_key, value);
+}
+
+int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value);
+static inline int ordered_hashmap_remove_and_replace(OrderedHashmap *h, const void *old_key, const void *new_key, void *value) {
+ return hashmap_remove_and_replace(PLAIN_HASHMAP(h), old_key, new_key, value);
+}
+
+/* Since merging data from a OrderedHashmap into a Hashmap or vice-versa
+ * should just work, allow this by having looser type-checking here. */
+int internal_hashmap_merge(Hashmap *h, Hashmap *other);
+#define hashmap_merge(h, other) internal_hashmap_merge(PLAIN_HASHMAP(h), PLAIN_HASHMAP(other))
+#define ordered_hashmap_merge(h, other) hashmap_merge(h, other)
+
+int internal_hashmap_reserve(HashmapBase *h, unsigned entries_add);
+static inline int hashmap_reserve(Hashmap *h, unsigned entries_add) {
+ return internal_hashmap_reserve(HASHMAP_BASE(h), entries_add);
+}
+static inline int ordered_hashmap_reserve(OrderedHashmap *h, unsigned entries_add) {
+ return internal_hashmap_reserve(HASHMAP_BASE(h), entries_add);
+}
+
+int internal_hashmap_move(HashmapBase *h, HashmapBase *other);
+/* Unlike hashmap_merge, hashmap_move does not allow mixing the types. */
+static inline int hashmap_move(Hashmap *h, Hashmap *other) {
+ return internal_hashmap_move(HASHMAP_BASE(h), HASHMAP_BASE(other));
+}
+static inline int ordered_hashmap_move(OrderedHashmap *h, OrderedHashmap *other) {
+ return internal_hashmap_move(HASHMAP_BASE(h), HASHMAP_BASE(other));
+}
+
+int internal_hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key);
+static inline int hashmap_move_one(Hashmap *h, Hashmap *other, const void *key) {
+ return internal_hashmap_move_one(HASHMAP_BASE(h), HASHMAP_BASE(other), key);
+}
+static inline int ordered_hashmap_move_one(OrderedHashmap *h, OrderedHashmap *other, const void *key) {
+ return internal_hashmap_move_one(HASHMAP_BASE(h), HASHMAP_BASE(other), key);
+}
+
+unsigned internal_hashmap_size(HashmapBase *h) _pure_;
+static inline unsigned hashmap_size(Hashmap *h) {
+ return internal_hashmap_size(HASHMAP_BASE(h));
+}
+static inline unsigned ordered_hashmap_size(OrderedHashmap *h) {
+ return internal_hashmap_size(HASHMAP_BASE(h));
+}
+
+static inline bool hashmap_isempty(Hashmap *h) {
+ return hashmap_size(h) == 0;
+}
+static inline bool ordered_hashmap_isempty(OrderedHashmap *h) {
+ return ordered_hashmap_size(h) == 0;
+}
+
+unsigned internal_hashmap_buckets(HashmapBase *h) _pure_;
+static inline unsigned hashmap_buckets(Hashmap *h) {
+ return internal_hashmap_buckets(HASHMAP_BASE(h));
+}
+static inline unsigned ordered_hashmap_buckets(OrderedHashmap *h) {
+ return internal_hashmap_buckets(HASHMAP_BASE(h));
+}
+
+bool internal_hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key);
+static inline bool hashmap_iterate(Hashmap *h, Iterator *i, void **value, const void **key) {
+ return internal_hashmap_iterate(HASHMAP_BASE(h), i, value, key);
+}
+static inline bool ordered_hashmap_iterate(OrderedHashmap *h, Iterator *i, void **value, const void **key) {
+ return internal_hashmap_iterate(HASHMAP_BASE(h), i, value, key);
+}
+
+void internal_hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value);
+static inline void hashmap_clear(Hashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), NULL, NULL);
+}
+static inline void ordered_hashmap_clear(OrderedHashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), NULL, NULL);
+}
+
+static inline void hashmap_clear_free(Hashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), NULL, free);
+}
+static inline void ordered_hashmap_clear_free(OrderedHashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), NULL, free);
+}
+
+static inline void hashmap_clear_free_key(Hashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), free, NULL);
+}
+static inline void ordered_hashmap_clear_free_key(OrderedHashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), free, NULL);
+}
+
+static inline void hashmap_clear_free_free(Hashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), free, free);
+}
+static inline void ordered_hashmap_clear_free_free(OrderedHashmap *h) {
+ internal_hashmap_clear(HASHMAP_BASE(h), free, free);
+}
+
+/*
+ * Note about all *_first*() functions
+ *
+ * For plain Hashmaps and Sets the order of entries is undefined.
+ * The functions find whatever entry is first in the implementation
+ * internal order.
+ *
+ * Only for OrderedHashmaps the order is well defined and finding
+ * the first entry is O(1).
+ */
+
+void *internal_hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key);
+static inline void *hashmap_steal_first_key_and_value(Hashmap *h, void **ret) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), true, ret);
+}
+static inline void *ordered_hashmap_steal_first_key_and_value(OrderedHashmap *h, void **ret) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), true, ret);
+}
+static inline void *hashmap_first_key_and_value(Hashmap *h, void **ret) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), false, ret);
+}
+static inline void *ordered_hashmap_first_key_and_value(OrderedHashmap *h, void **ret) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), false, ret);
+}
+
+static inline void *hashmap_steal_first(Hashmap *h) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), true, NULL);
+}
+static inline void *ordered_hashmap_steal_first(OrderedHashmap *h) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), true, NULL);
+}
+static inline void *hashmap_first(Hashmap *h) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), false, NULL);
+}
+static inline void *ordered_hashmap_first(OrderedHashmap *h) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(h), false, NULL);
+}
+
+static inline void *internal_hashmap_first_key(HashmapBase *h, bool remove) {
+ void *key = NULL;
+
+ (void) internal_hashmap_first_key_and_value(HASHMAP_BASE(h), remove, &key);
+ return key;
+}
+static inline void *hashmap_steal_first_key(Hashmap *h) {
+ return internal_hashmap_first_key(HASHMAP_BASE(h), true);
+}
+static inline void *ordered_hashmap_steal_first_key(OrderedHashmap *h) {
+ return internal_hashmap_first_key(HASHMAP_BASE(h), true);
+}
+static inline void *hashmap_first_key(Hashmap *h) {
+ return internal_hashmap_first_key(HASHMAP_BASE(h), false);
+}
+static inline void *ordered_hashmap_first_key(OrderedHashmap *h) {
+ return internal_hashmap_first_key(HASHMAP_BASE(h), false);
+}
+
+#define hashmap_clear_with_destructor(_s, _f) \
+ ({ \
+ void *_item; \
+ while ((_item = hashmap_steal_first(_s))) \
+ _f(_item); \
+ })
+#define hashmap_free_with_destructor(_s, _f) \
+ ({ \
+ hashmap_clear_with_destructor(_s, _f); \
+ hashmap_free(_s); \
+ })
+#define ordered_hashmap_clear_with_destructor(_s, _f) \
+ ({ \
+ void *_item; \
+ while ((_item = ordered_hashmap_steal_first(_s))) \
+ _f(_item); \
+ })
+#define ordered_hashmap_free_with_destructor(_s, _f) \
+ ({ \
+ ordered_hashmap_clear_with_destructor(_s, _f); \
+ ordered_hashmap_free(_s); \
+ })
+
+/* no hashmap_next */
+void *ordered_hashmap_next(OrderedHashmap *h, const void *key);
+
+char **internal_hashmap_get_strv(HashmapBase *h);
+static inline char **hashmap_get_strv(Hashmap *h) {
+ return internal_hashmap_get_strv(HASHMAP_BASE(h));
+}
+static inline char **ordered_hashmap_get_strv(OrderedHashmap *h) {
+ return internal_hashmap_get_strv(HASHMAP_BASE(h));
+}
+
+/*
+ * Hashmaps are iterated in unpredictable order.
+ * OrderedHashmaps are an exception to this. They are iterated in the order
+ * the entries were inserted.
+ * It is safe to remove the current entry.
+ */
+#define HASHMAP_FOREACH(e, h, i) \
+ for ((i) = ITERATOR_FIRST; hashmap_iterate((h), &(i), (void**)&(e), NULL); )
+
+#define ORDERED_HASHMAP_FOREACH(e, h, i) \
+ for ((i) = ITERATOR_FIRST; ordered_hashmap_iterate((h), &(i), (void**)&(e), NULL); )
+
+#define HASHMAP_FOREACH_KEY(e, k, h, i) \
+ for ((i) = ITERATOR_FIRST; hashmap_iterate((h), &(i), (void**)&(e), (const void**) &(k)); )
+
+#define ORDERED_HASHMAP_FOREACH_KEY(e, k, h, i) \
+ for ((i) = ITERATOR_FIRST; ordered_hashmap_iterate((h), &(i), (void**)&(e), (const void**) &(k)); )
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free_free);
+
+#define _cleanup_hashmap_free_ _cleanup_(hashmap_freep)
+#define _cleanup_hashmap_free_free_ _cleanup_(hashmap_free_freep)
+#define _cleanup_hashmap_free_free_free_ _cleanup_(hashmap_free_free_freep)
+#define _cleanup_ordered_hashmap_free_ _cleanup_(ordered_hashmap_freep)
+#define _cleanup_ordered_hashmap_free_free_ _cleanup_(ordered_hashmap_free_freep)
+#define _cleanup_ordered_hashmap_free_free_free_ _cleanup_(ordered_hashmap_free_free_freep)
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(IteratedCache*, iterated_cache_free);
+
+#define _cleanup_iterated_cache_free_ _cleanup_(iterated_cache_freep)
diff --git a/src/basic/hexdecoct.c b/src/basic/hexdecoct.c
new file mode 100644
index 0000000..c0f9640
--- /dev/null
+++ b/src/basic/hexdecoct.c
@@ -0,0 +1,819 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "string-util.h"
+#include "util.h"
+
+char octchar(int x) {
+ return '0' + (x & 7);
+}
+
+int unoctchar(char c) {
+
+ if (c >= '0' && c <= '7')
+ return c - '0';
+
+ return -EINVAL;
+}
+
+char decchar(int x) {
+ return '0' + (x % 10);
+}
+
+int undecchar(char c) {
+
+ if (c >= '0' && c <= '9')
+ return c - '0';
+
+ return -EINVAL;
+}
+
+char hexchar(int x) {
+ static const char table[16] = "0123456789abcdef";
+
+ return table[x & 15];
+}
+
+int unhexchar(char c) {
+
+ if (c >= '0' && c <= '9')
+ return c - '0';
+
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+
+ return -EINVAL;
+}
+
+char *hexmem(const void *p, size_t l) {
+ const uint8_t *x;
+ char *r, *z;
+
+ z = r = new(char, l * 2 + 1);
+ if (!r)
+ return NULL;
+
+ for (x = p; x < (const uint8_t*) p + l; x++) {
+ *(z++) = hexchar(*x >> 4);
+ *(z++) = hexchar(*x & 15);
+ }
+
+ *z = 0;
+ return r;
+}
+
+static int unhex_next(const char **p, size_t *l) {
+ int r;
+
+ assert(p);
+ assert(l);
+
+ /* Find the next non-whitespace character, and decode it. We
+ * greedily skip all preceding and all following whitespace. */
+
+ for (;;) {
+ if (*l == 0)
+ return -EPIPE;
+
+ if (!strchr(WHITESPACE, **p))
+ break;
+
+ /* Skip leading whitespace */
+ (*p)++, (*l)--;
+ }
+
+ r = unhexchar(**p);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ (*p)++, (*l)--;
+
+ if (*l == 0 || !strchr(WHITESPACE, **p))
+ break;
+
+ /* Skip following whitespace */
+ }
+
+ return r;
+}
+
+int unhexmem(const char *p, size_t l, void **ret, size_t *ret_len) {
+ _cleanup_free_ uint8_t *buf = NULL;
+ const char *x;
+ uint8_t *z;
+
+ assert(ret);
+ assert(ret_len);
+ assert(p || l == 0);
+
+ if (l == (size_t) -1)
+ l = strlen(p);
+
+ /* Note that the calculation of memory size is an upper boundary, as we ignore whitespace while decoding */
+ buf = malloc((l + 1) / 2 + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ for (x = p, z = buf;;) {
+ int a, b;
+
+ a = unhex_next(&x, &l);
+ if (a == -EPIPE) /* End of string */
+ break;
+ if (a < 0)
+ return a;
+
+ b = unhex_next(&x, &l);
+ if (b < 0)
+ return b;
+
+ *(z++) = (uint8_t) a << 4 | (uint8_t) b;
+ }
+
+ *z = 0;
+
+ *ret_len = (size_t) (z - buf);
+ *ret = TAKE_PTR(buf);
+
+ return 0;
+}
+
+/* https://tools.ietf.org/html/rfc4648#section-6
+ * Notice that base32hex differs from base32 in the alphabet it uses.
+ * The distinction is that the base32hex representation preserves the
+ * order of the underlying data when compared as bytestrings, this is
+ * useful when representing NSEC3 hashes, as one can then verify the
+ * order of hashes directly from their representation. */
+char base32hexchar(int x) {
+ static const char table[32] = "0123456789"
+ "ABCDEFGHIJKLMNOPQRSTUV";
+
+ return table[x & 31];
+}
+
+int unbase32hexchar(char c) {
+ unsigned offset;
+
+ if (c >= '0' && c <= '9')
+ return c - '0';
+
+ offset = '9' - '0' + 1;
+
+ if (c >= 'A' && c <= 'V')
+ return c - 'A' + offset;
+
+ return -EINVAL;
+}
+
+char *base32hexmem(const void *p, size_t l, bool padding) {
+ char *r, *z;
+ const uint8_t *x;
+ size_t len;
+
+ assert(p || l == 0);
+
+ if (padding)
+ /* five input bytes makes eight output bytes, padding is added so we must round up */
+ len = 8 * (l + 4) / 5;
+ else {
+ /* same, but round down as there is no padding */
+ len = 8 * l / 5;
+
+ switch (l % 5) {
+ case 4:
+ len += 7;
+ break;
+ case 3:
+ len += 5;
+ break;
+ case 2:
+ len += 4;
+ break;
+ case 1:
+ len += 2;
+ break;
+ }
+ }
+
+ z = r = malloc(len + 1);
+ if (!r)
+ return NULL;
+
+ for (x = p; x < (const uint8_t*) p + (l / 5) * 5; x += 5) {
+ /* x[0] == XXXXXXXX; x[1] == YYYYYYYY; x[2] == ZZZZZZZZ
+ * x[3] == QQQQQQQQ; x[4] == WWWWWWWW */
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */
+ *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */
+ *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */
+ *(z++) = base32hexchar((x[2] & 15) << 1 | x[3] >> 7); /* 000ZZZZQ */
+ *(z++) = base32hexchar((x[3] & 127) >> 2); /* 000QQQQQ */
+ *(z++) = base32hexchar((x[3] & 3) << 3 | x[4] >> 5); /* 000QQWWW */
+ *(z++) = base32hexchar((x[4] & 31)); /* 000WWWWW */
+ }
+
+ switch (l % 5) {
+ case 4:
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */
+ *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */
+ *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */
+ *(z++) = base32hexchar((x[2] & 15) << 1 | x[3] >> 7); /* 000ZZZZQ */
+ *(z++) = base32hexchar((x[3] & 127) >> 2); /* 000QQQQQ */
+ *(z++) = base32hexchar((x[3] & 3) << 3); /* 000QQ000 */
+ if (padding)
+ *(z++) = '=';
+
+ break;
+
+ case 3:
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */
+ *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */
+ *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */
+ *(z++) = base32hexchar((x[2] & 15) << 1); /* 000ZZZZ0 */
+ if (padding) {
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ }
+
+ break;
+
+ case 2:
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */
+ *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */
+ *(z++) = base32hexchar((x[1] & 1) << 4); /* 000Y0000 */
+ if (padding) {
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ }
+
+ break;
+
+ case 1:
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2); /* 000XXX00 */
+ if (padding) {
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ }
+
+ break;
+ }
+
+ *z = 0;
+ return r;
+}
+
+int unbase32hexmem(const char *p, size_t l, bool padding, void **mem, size_t *_len) {
+ _cleanup_free_ uint8_t *r = NULL;
+ int a, b, c, d, e, f, g, h;
+ uint8_t *z;
+ const char *x;
+ size_t len;
+ unsigned pad = 0;
+
+ assert(p || l == 0);
+ assert(mem);
+ assert(_len);
+
+ if (l == (size_t) -1)
+ l = strlen(p);
+
+ /* padding ensures any base32hex input has input divisible by 8 */
+ if (padding && l % 8 != 0)
+ return -EINVAL;
+
+ if (padding) {
+ /* strip the padding */
+ while (l > 0 && p[l - 1] == '=' && pad < 7) {
+ pad++;
+ l--;
+ }
+ }
+
+ /* a group of eight input bytes needs five output bytes, in case of
+ * padding we need to add some extra bytes */
+ len = (l / 8) * 5;
+
+ switch (l % 8) {
+ case 7:
+ len += 4;
+ break;
+ case 5:
+ len += 3;
+ break;
+ case 4:
+ len += 2;
+ break;
+ case 2:
+ len += 1;
+ break;
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ z = r = malloc(len + 1);
+ if (!r)
+ return -ENOMEM;
+
+ for (x = p; x < p + (l / 8) * 8; x += 8) {
+ /* a == 000XXXXX; b == 000YYYYY; c == 000ZZZZZ; d == 000WWWWW
+ * e == 000SSSSS; f == 000QQQQQ; g == 000VVVVV; h == 000RRRRR */
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unbase32hexchar(x[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ d = unbase32hexchar(x[3]);
+ if (d < 0)
+ return -EINVAL;
+
+ e = unbase32hexchar(x[4]);
+ if (e < 0)
+ return -EINVAL;
+
+ f = unbase32hexchar(x[5]);
+ if (f < 0)
+ return -EINVAL;
+
+ g = unbase32hexchar(x[6]);
+ if (g < 0)
+ return -EINVAL;
+
+ h = unbase32hexchar(x[7]);
+ if (h < 0)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+ *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */
+ *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */
+ *(z++) = (uint8_t) e << 7 | (uint8_t) f << 2 | (uint8_t) g >> 3; /* SQQQQQVV */
+ *(z++) = (uint8_t) g << 5 | (uint8_t) h; /* VVVRRRRR */
+ }
+
+ switch (l % 8) {
+ case 7:
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unbase32hexchar(x[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ d = unbase32hexchar(x[3]);
+ if (d < 0)
+ return -EINVAL;
+
+ e = unbase32hexchar(x[4]);
+ if (e < 0)
+ return -EINVAL;
+
+ f = unbase32hexchar(x[5]);
+ if (f < 0)
+ return -EINVAL;
+
+ g = unbase32hexchar(x[6]);
+ if (g < 0)
+ return -EINVAL;
+
+ /* g == 000VV000 */
+ if (g & 7)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+ *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */
+ *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */
+ *(z++) = (uint8_t) e << 7 | (uint8_t) f << 2 | (uint8_t) g >> 3; /* SQQQQQVV */
+
+ break;
+ case 5:
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unbase32hexchar(x[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ d = unbase32hexchar(x[3]);
+ if (d < 0)
+ return -EINVAL;
+
+ e = unbase32hexchar(x[4]);
+ if (e < 0)
+ return -EINVAL;
+
+ /* e == 000SSSS0 */
+ if (e & 1)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+ *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */
+ *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */
+
+ break;
+ case 4:
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unbase32hexchar(x[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ d = unbase32hexchar(x[3]);
+ if (d < 0)
+ return -EINVAL;
+
+ /* d == 000W0000 */
+ if (d & 15)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+ *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */
+
+ break;
+ case 2:
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ /* b == 000YYY00 */
+ if (b & 3)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+
+ break;
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ *z = 0;
+
+ *mem = TAKE_PTR(r);
+ *_len = len;
+
+ return 0;
+}
+
+/* https://tools.ietf.org/html/rfc4648#section-4 */
+char base64char(int x) {
+ static const char table[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789+/";
+ return table[x & 63];
+}
+
+int unbase64char(char c) {
+ unsigned offset;
+
+ if (c >= 'A' && c <= 'Z')
+ return c - 'A';
+
+ offset = 'Z' - 'A' + 1;
+
+ if (c >= 'a' && c <= 'z')
+ return c - 'a' + offset;
+
+ offset += 'z' - 'a' + 1;
+
+ if (c >= '0' && c <= '9')
+ return c - '0' + offset;
+
+ offset += '9' - '0' + 1;
+
+ if (c == '+')
+ return offset;
+
+ offset++;
+
+ if (c == '/')
+ return offset;
+
+ return -EINVAL;
+}
+
+ssize_t base64mem(const void *p, size_t l, char **out) {
+ char *r, *z;
+ const uint8_t *x;
+
+ assert(p || l == 0);
+ assert(out);
+
+ /* three input bytes makes four output bytes, padding is added so we must round up */
+ z = r = malloc(4 * (l + 2) / 3 + 1);
+ if (!r)
+ return -ENOMEM;
+
+ for (x = p; x < (const uint8_t*) p + (l / 3) * 3; x += 3) {
+ /* x[0] == XXXXXXXX; x[1] == YYYYYYYY; x[2] == ZZZZZZZZ */
+ *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */
+ *(z++) = base64char((x[0] & 3) << 4 | x[1] >> 4); /* 00XXYYYY */
+ *(z++) = base64char((x[1] & 15) << 2 | x[2] >> 6); /* 00YYYYZZ */
+ *(z++) = base64char(x[2] & 63); /* 00ZZZZZZ */
+ }
+
+ switch (l % 3) {
+ case 2:
+ *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */
+ *(z++) = base64char((x[0] & 3) << 4 | x[1] >> 4); /* 00XXYYYY */
+ *(z++) = base64char((x[1] & 15) << 2); /* 00YYYY00 */
+ *(z++) = '=';
+
+ break;
+ case 1:
+ *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */
+ *(z++) = base64char((x[0] & 3) << 4); /* 00XX0000 */
+ *(z++) = '=';
+ *(z++) = '=';
+
+ break;
+ }
+
+ *z = 0;
+ *out = r;
+ return z - r;
+}
+
+static int base64_append_width(
+ char **prefix, int plen,
+ const char *sep, int indent,
+ const void *p, size_t l,
+ int width) {
+
+ _cleanup_free_ char *x = NULL;
+ char *t, *s;
+ ssize_t len, slen, avail, line, lines;
+
+ len = base64mem(p, l, &x);
+ if (len <= 0)
+ return len;
+
+ lines = DIV_ROUND_UP(len, width);
+
+ slen = strlen_ptr(sep);
+ if (lines > (SSIZE_MAX - plen - 1 - slen) / (indent + width + 1))
+ return -ENOMEM;
+
+ t = realloc(*prefix, plen + 1 + slen + (indent + width + 1) * lines);
+ if (!t)
+ return -ENOMEM;
+
+ memcpy_safe(t + plen, sep, slen);
+
+ for (line = 0, s = t + plen + slen, avail = len; line < lines; line++) {
+ int act = MIN(width, avail);
+
+ if (line > 0 || sep) {
+ memset(s, ' ', indent);
+ s += indent;
+ }
+
+ memcpy(s, x + width * line, act);
+ s += act;
+ *(s++) = line < lines - 1 ? '\n' : '\0';
+ avail -= act;
+ }
+ assert(avail == 0);
+
+ *prefix = t;
+ return 0;
+}
+
+int base64_append(
+ char **prefix, int plen,
+ const void *p, size_t l,
+ int indent, int width) {
+
+ if (plen > width / 2 || plen + indent > width)
+ /* leave indent on the left, keep last column free */
+ return base64_append_width(prefix, plen, "\n", indent, p, l, width - indent - 1);
+ else
+ /* leave plen on the left, keep last column free */
+ return base64_append_width(prefix, plen, NULL, plen, p, l, width - plen - 1);
+}
+
+static int unbase64_next(const char **p, size_t *l) {
+ int ret;
+
+ assert(p);
+ assert(l);
+
+ /* Find the next non-whitespace character, and decode it. If we find padding, we return it as INT_MAX. We
+ * greedily skip all preceding and all following whitespace. */
+
+ for (;;) {
+ if (*l == 0)
+ return -EPIPE;
+
+ if (!strchr(WHITESPACE, **p))
+ break;
+
+ /* Skip leading whitespace */
+ (*p)++, (*l)--;
+ }
+
+ if (**p == '=')
+ ret = INT_MAX; /* return padding as INT_MAX */
+ else {
+ ret = unbase64char(**p);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (;;) {
+ (*p)++, (*l)--;
+
+ if (*l == 0)
+ break;
+ if (!strchr(WHITESPACE, **p))
+ break;
+
+ /* Skip following whitespace */
+ }
+
+ return ret;
+}
+
+int unbase64mem(const char *p, size_t l, void **ret, size_t *ret_size) {
+ _cleanup_free_ uint8_t *buf = NULL;
+ const char *x;
+ uint8_t *z;
+ size_t len;
+
+ assert(p || l == 0);
+ assert(ret);
+ assert(ret_size);
+
+ if (l == (size_t) -1)
+ l = strlen(p);
+
+ /* A group of four input bytes needs three output bytes, in case of padding we need to add two or three extra
+ * bytes. Note that this calculation is an upper boundary, as we ignore whitespace while decoding */
+ len = (l / 4) * 3 + (l % 4 != 0 ? (l % 4) - 1 : 0);
+
+ buf = malloc(len + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ for (x = p, z = buf;;) {
+ int a, b, c, d; /* a == 00XXXXXX; b == 00YYYYYY; c == 00ZZZZZZ; d == 00WWWWWW */
+
+ a = unbase64_next(&x, &l);
+ if (a == -EPIPE) /* End of string */
+ break;
+ if (a < 0)
+ return a;
+ if (a == INT_MAX) /* Padding is not allowed at the beginning of a 4ch block */
+ return -EINVAL;
+
+ b = unbase64_next(&x, &l);
+ if (b < 0)
+ return b;
+ if (b == INT_MAX) /* Padding is not allowed at the second character of a 4ch block either */
+ return -EINVAL;
+
+ c = unbase64_next(&x, &l);
+ if (c < 0)
+ return c;
+
+ d = unbase64_next(&x, &l);
+ if (d < 0)
+ return d;
+
+ if (c == INT_MAX) { /* Padding at the third character */
+
+ if (d != INT_MAX) /* If the third character is padding, the fourth must be too */
+ return -EINVAL;
+
+ /* b == 00YY0000 */
+ if (b & 15)
+ return -EINVAL;
+
+ if (l > 0) /* Trailing rubbish? */
+ return -ENAMETOOLONG;
+
+ *(z++) = (uint8_t) a << 2 | (uint8_t) (b >> 4); /* XXXXXXYY */
+ break;
+ }
+
+ if (d == INT_MAX) {
+ /* c == 00ZZZZ00 */
+ if (c & 3)
+ return -EINVAL;
+
+ if (l > 0) /* Trailing rubbish? */
+ return -ENAMETOOLONG;
+
+ *(z++) = (uint8_t) a << 2 | (uint8_t) b >> 4; /* XXXXXXYY */
+ *(z++) = (uint8_t) b << 4 | (uint8_t) c >> 2; /* YYYYZZZZ */
+ break;
+ }
+
+ *(z++) = (uint8_t) a << 2 | (uint8_t) b >> 4; /* XXXXXXYY */
+ *(z++) = (uint8_t) b << 4 | (uint8_t) c >> 2; /* YYYYZZZZ */
+ *(z++) = (uint8_t) c << 6 | (uint8_t) d; /* ZZWWWWWW */
+ }
+
+ *z = 0;
+
+ *ret_size = (size_t) (z - buf);
+ *ret = TAKE_PTR(buf);
+
+ return 0;
+}
+
+void hexdump(FILE *f, const void *p, size_t s) {
+ const uint8_t *b = p;
+ unsigned n = 0;
+
+ assert(b || s == 0);
+
+ if (!f)
+ f = stdout;
+
+ while (s > 0) {
+ size_t i;
+
+ fprintf(f, "%04x ", n);
+
+ for (i = 0; i < 16; i++) {
+
+ if (i >= s)
+ fputs(" ", f);
+ else
+ fprintf(f, "%02x ", b[i]);
+
+ if (i == 7)
+ fputc(' ', f);
+ }
+
+ fputc(' ', f);
+
+ for (i = 0; i < 16; i++) {
+
+ if (i >= s)
+ fputc(' ', f);
+ else
+ fputc(isprint(b[i]) ? (char) b[i] : '.', f);
+ }
+
+ fputc('\n', f);
+
+ if (s < 16)
+ break;
+
+ n += 16;
+ b += 16;
+ s -= 16;
+ }
+}
diff --git a/src/basic/hexdecoct.h b/src/basic/hexdecoct.h
new file mode 100644
index 0000000..9477d16
--- /dev/null
+++ b/src/basic/hexdecoct.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+char octchar(int x) _const_;
+int unoctchar(char c) _const_;
+
+char decchar(int x) _const_;
+int undecchar(char c) _const_;
+
+char hexchar(int x) _const_;
+int unhexchar(char c) _const_;
+
+char *hexmem(const void *p, size_t l);
+int unhexmem(const char *p, size_t l, void **mem, size_t *len);
+
+char base32hexchar(int x) _const_;
+int unbase32hexchar(char c) _const_;
+
+char base64char(int x) _const_;
+int unbase64char(char c) _const_;
+
+char *base32hexmem(const void *p, size_t l, bool padding);
+int unbase32hexmem(const char *p, size_t l, bool padding, void **mem, size_t *len);
+
+ssize_t base64mem(const void *p, size_t l, char **out);
+int base64_append(char **prefix, int plen,
+ const void *p, size_t l,
+ int margin, int width);
+int unbase64mem(const char *p, size_t l, void **mem, size_t *len);
+
+void hexdump(FILE *f, const void *p, size_t s);
diff --git a/src/basic/hostname-util.c b/src/basic/hostname-util.c
new file mode 100644
index 0000000..5bfa028
--- /dev/null
+++ b/src/basic/hostname-util.c
@@ -0,0 +1,308 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "string-util.h"
+
+bool hostname_is_set(void) {
+ struct utsname u;
+
+ assert_se(uname(&u) >= 0);
+
+ if (isempty(u.nodename))
+ return false;
+
+ /* This is the built-in kernel default host name */
+ if (streq(u.nodename, "(none)"))
+ return false;
+
+ return true;
+}
+
+char* gethostname_malloc(void) {
+ struct utsname u;
+
+ /* This call tries to return something useful, either the actual hostname
+ * or it makes something up. The only reason it might fail is OOM.
+ * It might even return "localhost" if that's set. */
+
+ assert_se(uname(&u) >= 0);
+
+ if (isempty(u.nodename) || streq(u.nodename, "(none)"))
+ return strdup(FALLBACK_HOSTNAME);
+
+ return strdup(u.nodename);
+}
+
+int gethostname_strict(char **ret) {
+ struct utsname u;
+ char *k;
+
+ /* This call will rather fail than make up a name. It will not return "localhost" either. */
+
+ assert_se(uname(&u) >= 0);
+
+ if (isempty(u.nodename))
+ return -ENXIO;
+
+ if (streq(u.nodename, "(none)"))
+ return -ENXIO;
+
+ if (is_localhost(u.nodename))
+ return -ENXIO;
+
+ k = strdup(u.nodename);
+ if (!k)
+ return -ENOMEM;
+
+ *ret = k;
+ return 0;
+}
+
+bool valid_ldh_char(char c) {
+ return
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ c == '-';
+}
+
+/**
+ * Check if s looks like a valid host name or FQDN. This does not do
+ * full DNS validation, but only checks if the name is composed of
+ * allowed characters and the length is not above the maximum allowed
+ * by Linux (c.f. dns_name_is_valid()). Trailing dot is allowed if
+ * allow_trailing_dot is true and at least two components are present
+ * in the name. Note that due to the restricted charset and length
+ * this call is substantially more conservative than
+ * dns_name_is_valid().
+ */
+bool hostname_is_valid(const char *s, bool allow_trailing_dot) {
+ unsigned n_dots = 0;
+ const char *p;
+ bool dot, hyphen;
+
+ if (isempty(s))
+ return false;
+
+ /* Doesn't accept empty hostnames, hostnames with
+ * leading dots, and hostnames with multiple dots in a
+ * sequence. Also ensures that the length stays below
+ * HOST_NAME_MAX. */
+
+ for (p = s, dot = hyphen = true; *p; p++)
+ if (*p == '.') {
+ if (dot || hyphen)
+ return false;
+
+ dot = true;
+ hyphen = false;
+ n_dots++;
+
+ } else if (*p == '-') {
+ if (dot)
+ return false;
+
+ dot = false;
+ hyphen = true;
+
+ } else {
+ if (!valid_ldh_char(*p))
+ return false;
+
+ dot = false;
+ hyphen = false;
+ }
+
+ if (dot && (n_dots < 2 || !allow_trailing_dot))
+ return false;
+ if (hyphen)
+ return false;
+
+ if (p-s > HOST_NAME_MAX) /* Note that HOST_NAME_MAX is 64 on
+ * Linux, but DNS allows domain names
+ * up to 255 characters */
+ return false;
+
+ return true;
+}
+
+char* hostname_cleanup(char *s) {
+ char *p, *d;
+ bool dot, hyphen;
+
+ assert(s);
+
+ for (p = s, d = s, dot = hyphen = true; *p && d - s < HOST_NAME_MAX; p++)
+ if (*p == '.') {
+ if (dot || hyphen)
+ continue;
+
+ *(d++) = '.';
+ dot = true;
+ hyphen = false;
+
+ } else if (*p == '-') {
+ if (dot)
+ continue;
+
+ *(d++) = '-';
+ dot = false;
+ hyphen = true;
+
+ } else if (valid_ldh_char(*p)) {
+ *(d++) = *p;
+ dot = false;
+ hyphen = false;
+ }
+
+ if (d > s && IN_SET(d[-1], '-', '.'))
+ /* The dot can occur at most once, but we might have multiple
+ * hyphens, hence the loop */
+ d--;
+ *d = 0;
+
+ return s;
+}
+
+bool is_localhost(const char *hostname) {
+ assert(hostname);
+
+ /* This tries to identify local host and domain names
+ * described in RFC6761 plus the redhatism of localdomain */
+
+ return strcaseeq(hostname, "localhost") ||
+ strcaseeq(hostname, "localhost.") ||
+ strcaseeq(hostname, "localhost.localdomain") ||
+ strcaseeq(hostname, "localhost.localdomain.") ||
+ endswith_no_case(hostname, ".localhost") ||
+ endswith_no_case(hostname, ".localhost.") ||
+ endswith_no_case(hostname, ".localhost.localdomain") ||
+ endswith_no_case(hostname, ".localhost.localdomain.");
+}
+
+bool is_gateway_hostname(const char *hostname) {
+ assert(hostname);
+
+ /* This tries to identify the valid syntaxes for the our
+ * synthetic "gateway" host. */
+
+ return
+ strcaseeq(hostname, "_gateway") || strcaseeq(hostname, "_gateway.")
+#if ENABLE_COMPAT_GATEWAY_HOSTNAME
+ || strcaseeq(hostname, "gateway") || strcaseeq(hostname, "gateway.")
+#endif
+ ;
+}
+
+int sethostname_idempotent(const char *s) {
+ char buf[HOST_NAME_MAX + 1] = {};
+
+ assert(s);
+
+ if (gethostname(buf, sizeof(buf)) < 0)
+ return -errno;
+
+ if (streq(buf, s))
+ return 0;
+
+ if (sethostname(s, strlen(s)) < 0)
+ return -errno;
+
+ return 1;
+}
+
+int shorten_overlong(const char *s, char **ret) {
+ char *h, *p;
+
+ /* Shorten an overlong name to HOST_NAME_MAX or to the first dot,
+ * whatever comes earlier. */
+
+ assert(s);
+
+ h = strdup(s);
+ if (!h)
+ return -ENOMEM;
+
+ if (hostname_is_valid(h, false)) {
+ *ret = h;
+ return 0;
+ }
+
+ p = strchr(h, '.');
+ if (p)
+ *p = 0;
+
+ strshorten(h, HOST_NAME_MAX);
+
+ if (!hostname_is_valid(h, false)) {
+ free(h);
+ return -EDOM;
+ }
+
+ *ret = h;
+ return 1;
+}
+
+int read_etc_hostname_stream(FILE *f, char **ret) {
+ int r;
+
+ assert(f);
+ assert(ret);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0) /* EOF without any hostname? the file is empty, let's treat that exactly like no file at all: ENOENT */
+ return -ENOENT;
+
+ p = strstrip(line);
+
+ /* File may have empty lines or comments, ignore them */
+ if (!IN_SET(*p, '\0', '#')) {
+ char *copy;
+
+ hostname_cleanup(p); /* normalize the hostname */
+
+ if (!hostname_is_valid(p, true)) /* check that the hostname we return is valid */
+ return -EBADMSG;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return 0;
+ }
+ }
+}
+
+int read_etc_hostname(const char *path, char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ assert(ret);
+
+ if (!path)
+ path = "/etc/hostname";
+
+ f = fopen(path, "re");
+ if (!f)
+ return -errno;
+
+ return read_etc_hostname_stream(f, ret);
+
+}
diff --git a/src/basic/hostname-util.h b/src/basic/hostname-util.h
new file mode 100644
index 0000000..7ba386a
--- /dev/null
+++ b/src/basic/hostname-util.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "macro.h"
+
+bool hostname_is_set(void);
+
+char* gethostname_malloc(void);
+int gethostname_strict(char **ret);
+
+bool valid_ldh_char(char c) _const_;
+bool hostname_is_valid(const char *s, bool allow_trailing_dot) _pure_;
+char* hostname_cleanup(char *s);
+
+#define machine_name_is_valid(s) hostname_is_valid(s, false)
+
+bool is_localhost(const char *hostname);
+bool is_gateway_hostname(const char *hostname);
+
+int sethostname_idempotent(const char *s);
+
+int shorten_overlong(const char *s, char **ret);
+
+int read_etc_hostname_stream(FILE *f, char **ret);
+int read_etc_hostname(const char *path, char **ret);
diff --git a/src/basic/in-addr-util.c b/src/basic/in-addr-util.c
new file mode 100644
index 0000000..2bffe47
--- /dev/null
+++ b/src/basic/in-addr-util.c
@@ -0,0 +1,622 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <endian.h>
+#include <errno.h>
+#include <net/if.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "util.h"
+
+bool in4_addr_is_null(const struct in_addr *a) {
+ assert(a);
+
+ return a->s_addr == 0;
+}
+
+int in_addr_is_null(int family, const union in_addr_union *u) {
+ assert(u);
+
+ if (family == AF_INET)
+ return in4_addr_is_null(&u->in);
+
+ if (family == AF_INET6)
+ return IN6_IS_ADDR_UNSPECIFIED(&u->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+bool in4_addr_is_link_local(const struct in_addr *a) {
+ assert(a);
+
+ return (be32toh(a->s_addr) & UINT32_C(0xFFFF0000)) == (UINT32_C(169) << 24 | UINT32_C(254) << 16);
+}
+
+int in_addr_is_link_local(int family, const union in_addr_union *u) {
+ assert(u);
+
+ if (family == AF_INET)
+ return in4_addr_is_link_local(&u->in);
+
+ if (family == AF_INET6)
+ return IN6_IS_ADDR_LINKLOCAL(&u->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_is_multicast(int family, const union in_addr_union *u) {
+ assert(u);
+
+ if (family == AF_INET)
+ return IN_MULTICAST(be32toh(u->in.s_addr));
+
+ if (family == AF_INET6)
+ return IN6_IS_ADDR_MULTICAST(&u->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+bool in4_addr_is_localhost(const struct in_addr *a) {
+ assert(a);
+
+ /* All of 127.x.x.x is localhost. */
+ return (be32toh(a->s_addr) & UINT32_C(0xFF000000)) == UINT32_C(127) << 24;
+}
+
+int in_addr_is_localhost(int family, const union in_addr_union *u) {
+ assert(u);
+
+ if (family == AF_INET)
+ return in4_addr_is_localhost(&u->in);
+
+ if (family == AF_INET6)
+ return IN6_IS_ADDR_LOOPBACK(&u->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_equal(int family, const union in_addr_union *a, const union in_addr_union *b) {
+ assert(a);
+ assert(b);
+
+ if (family == AF_INET)
+ return a->in.s_addr == b->in.s_addr;
+
+ if (family == AF_INET6)
+ return
+ a->in6.s6_addr32[0] == b->in6.s6_addr32[0] &&
+ a->in6.s6_addr32[1] == b->in6.s6_addr32[1] &&
+ a->in6.s6_addr32[2] == b->in6.s6_addr32[2] &&
+ a->in6.s6_addr32[3] == b->in6.s6_addr32[3];
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_prefix_intersect(
+ int family,
+ const union in_addr_union *a,
+ unsigned aprefixlen,
+ const union in_addr_union *b,
+ unsigned bprefixlen) {
+
+ unsigned m;
+
+ assert(a);
+ assert(b);
+
+ /* Checks whether there are any addresses that are in both
+ * networks */
+
+ m = MIN(aprefixlen, bprefixlen);
+
+ if (family == AF_INET) {
+ uint32_t x, nm;
+
+ x = be32toh(a->in.s_addr ^ b->in.s_addr);
+ nm = (m == 0) ? 0 : 0xFFFFFFFFUL << (32 - m);
+
+ return (x & nm) == 0;
+ }
+
+ if (family == AF_INET6) {
+ unsigned i;
+
+ if (m > 128)
+ m = 128;
+
+ for (i = 0; i < 16; i++) {
+ uint8_t x, nm;
+
+ x = a->in6.s6_addr[i] ^ b->in6.s6_addr[i];
+
+ if (m < 8)
+ nm = 0xFF << (8 - m);
+ else
+ nm = 0xFF;
+
+ if ((x & nm) != 0)
+ return 0;
+
+ if (m > 8)
+ m -= 8;
+ else
+ m = 0;
+ }
+
+ return 1;
+ }
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen) {
+ assert(u);
+
+ /* Increases the network part of an address by one. Returns
+ * positive it that succeeds, or 0 if this overflows. */
+
+ if (prefixlen <= 0)
+ return 0;
+
+ if (family == AF_INET) {
+ uint32_t c, n;
+
+ if (prefixlen > 32)
+ prefixlen = 32;
+
+ c = be32toh(u->in.s_addr);
+ n = c + (1UL << (32 - prefixlen));
+ if (n < c)
+ return 0;
+ n &= 0xFFFFFFFFUL << (32 - prefixlen);
+
+ u->in.s_addr = htobe32(n);
+ return 1;
+ }
+
+ if (family == AF_INET6) {
+ struct in6_addr add = {}, result;
+ uint8_t overflow = 0;
+ unsigned i;
+
+ if (prefixlen > 128)
+ prefixlen = 128;
+
+ /* First calculate what we have to add */
+ add.s6_addr[(prefixlen-1) / 8] = 1 << (7 - (prefixlen-1) % 8);
+
+ for (i = 16; i > 0; i--) {
+ unsigned j = i - 1;
+
+ result.s6_addr[j] = u->in6.s6_addr[j] + add.s6_addr[j] + overflow;
+ overflow = (result.s6_addr[j] < u->in6.s6_addr[j]);
+ }
+
+ if (overflow)
+ return 0;
+
+ u->in6 = result;
+ return 1;
+ }
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_to_string(int family, const union in_addr_union *u, char **ret) {
+ char *x;
+ size_t l;
+
+ assert(u);
+ assert(ret);
+
+ if (family == AF_INET)
+ l = INET_ADDRSTRLEN;
+ else if (family == AF_INET6)
+ l = INET6_ADDRSTRLEN;
+ else
+ return -EAFNOSUPPORT;
+
+ x = new(char, l);
+ if (!x)
+ return -ENOMEM;
+
+ errno = 0;
+ if (!inet_ntop(family, u, x, l)) {
+ free(x);
+ return errno > 0 ? -errno : -EINVAL;
+ }
+
+ *ret = x;
+ return 0;
+}
+
+int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret) {
+ size_t l;
+ char *x;
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ /* Much like in_addr_to_string(), but optionally appends the zone interface index to the address, to properly
+ * handle IPv6 link-local addresses. */
+
+ if (family != AF_INET6)
+ goto fallback;
+ if (ifindex <= 0)
+ goto fallback;
+
+ r = in_addr_is_link_local(family, u);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ goto fallback;
+
+ l = INET6_ADDRSTRLEN + 1 + DECIMAL_STR_MAX(ifindex) + 1;
+ x = new(char, l);
+ if (!x)
+ return -ENOMEM;
+
+ errno = 0;
+ if (!inet_ntop(family, u, x, l)) {
+ free(x);
+ return errno > 0 ? -errno : -EINVAL;
+ }
+
+ sprintf(strchr(x, 0), "%%%i", ifindex);
+ *ret = x;
+
+ return 0;
+
+fallback:
+ return in_addr_to_string(family, u, ret);
+}
+
+int in_addr_from_string(int family, const char *s, union in_addr_union *ret) {
+ union in_addr_union buffer;
+ assert(s);
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return -EAFNOSUPPORT;
+
+ errno = 0;
+ if (inet_pton(family, s, ret ?: &buffer) <= 0)
+ return errno > 0 ? -errno : -EINVAL;
+
+ return 0;
+}
+
+int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret) {
+ int r;
+
+ assert(s);
+
+ r = in_addr_from_string(AF_INET, s, ret);
+ if (r >= 0) {
+ if (ret_family)
+ *ret_family = AF_INET;
+ return 0;
+ }
+
+ r = in_addr_from_string(AF_INET6, s, ret);
+ if (r >= 0) {
+ if (ret_family)
+ *ret_family = AF_INET6;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex) {
+ _cleanup_free_ char *buf = NULL;
+ const char *suffix;
+ int r, ifi = 0;
+
+ assert(s);
+ assert(family);
+ assert(ret);
+
+ /* Similar to in_addr_from_string_auto() but also parses an optionally appended IPv6 zone suffix ("scope id")
+ * if one is found. */
+
+ suffix = strchr(s, '%');
+ if (suffix) {
+
+ if (ifindex) {
+ /* If we shall return the interface index, try to parse it */
+ r = parse_ifindex(suffix + 1, &ifi);
+ if (r < 0) {
+ unsigned u;
+
+ u = if_nametoindex(suffix + 1);
+ if (u <= 0)
+ return -errno;
+
+ ifi = (int) u;
+ }
+ }
+
+ buf = strndup(s, suffix - s);
+ if (!buf)
+ return -ENOMEM;
+
+ s = buf;
+ }
+
+ r = in_addr_from_string_auto(s, family, ret);
+ if (r < 0)
+ return r;
+
+ if (ifindex)
+ *ifindex = ifi;
+
+ return r;
+}
+
+unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr) {
+ assert(addr);
+
+ return 32U - u32ctz(be32toh(addr->s_addr));
+}
+
+struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
+ assert(addr);
+ assert(prefixlen <= 32);
+
+ /* Shifting beyond 32 is not defined, handle this specially. */
+ if (prefixlen == 0)
+ addr->s_addr = 0;
+ else
+ addr->s_addr = htobe32((0xffffffff << (32 - prefixlen)) & 0xffffffff);
+
+ return addr;
+}
+
+int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
+ uint8_t msb_octet = *(uint8_t*) addr;
+
+ /* addr may not be aligned, so make sure we only access it byte-wise */
+
+ assert(addr);
+ assert(prefixlen);
+
+ if (msb_octet < 128)
+ /* class A, leading bits: 0 */
+ *prefixlen = 8;
+ else if (msb_octet < 192)
+ /* class B, leading bits 10 */
+ *prefixlen = 16;
+ else if (msb_octet < 224)
+ /* class C, leading bits 110 */
+ *prefixlen = 24;
+ else
+ /* class D or E, no default prefixlen */
+ return -ERANGE;
+
+ return 0;
+}
+
+int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
+ unsigned char prefixlen;
+ int r;
+
+ assert(addr);
+ assert(mask);
+
+ r = in4_addr_default_prefixlen(addr, &prefixlen);
+ if (r < 0)
+ return r;
+
+ in4_addr_prefixlen_to_netmask(mask, prefixlen);
+ return 0;
+}
+
+int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen) {
+ assert(addr);
+
+ if (family == AF_INET) {
+ struct in_addr mask;
+
+ if (!in4_addr_prefixlen_to_netmask(&mask, prefixlen))
+ return -EINVAL;
+
+ addr->in.s_addr &= mask.s_addr;
+ return 0;
+ }
+
+ if (family == AF_INET6) {
+ unsigned i;
+
+ for (i = 0; i < 16; i++) {
+ uint8_t mask;
+
+ if (prefixlen >= 8) {
+ mask = 0xFF;
+ prefixlen -= 8;
+ } else {
+ mask = 0xFF << (8 - prefixlen);
+ prefixlen = 0;
+ }
+
+ addr->in6.s6_addr[i] &= mask;
+ }
+
+ return 0;
+ }
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_prefix_covers(int family,
+ const union in_addr_union *prefix,
+ unsigned char prefixlen,
+ const union in_addr_union *address) {
+
+ union in_addr_union masked_prefix, masked_address;
+ int r;
+
+ assert(prefix);
+ assert(address);
+
+ masked_prefix = *prefix;
+ r = in_addr_mask(family, &masked_prefix, prefixlen);
+ if (r < 0)
+ return r;
+
+ masked_address = *address;
+ r = in_addr_mask(family, &masked_address, prefixlen);
+ if (r < 0)
+ return r;
+
+ return in_addr_equal(family, &masked_prefix, &masked_address);
+}
+
+int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret) {
+ uint8_t u;
+ int r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return -EAFNOSUPPORT;
+
+ r = safe_atou8(p, &u);
+ if (r < 0)
+ return r;
+
+ if (u > FAMILY_ADDRESS_SIZE(family) * 8)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+int in_addr_prefix_from_string(
+ const char *p,
+ int family,
+ union in_addr_union *ret_prefix,
+ unsigned char *ret_prefixlen) {
+
+ _cleanup_free_ char *str = NULL;
+ union in_addr_union buffer;
+ const char *e, *l;
+ unsigned char k;
+ int r;
+
+ assert(p);
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return -EAFNOSUPPORT;
+
+ e = strchr(p, '/');
+ if (e) {
+ str = strndup(p, e - p);
+ if (!str)
+ return -ENOMEM;
+
+ l = str;
+ } else
+ l = p;
+
+ r = in_addr_from_string(family, l, &buffer);
+ if (r < 0)
+ return r;
+
+ if (e) {
+ r = in_addr_parse_prefixlen(family, e+1, &k);
+ if (r < 0)
+ return r;
+ } else
+ k = FAMILY_ADDRESS_SIZE(family) * 8;
+
+ if (ret_prefix)
+ *ret_prefix = buffer;
+ if (ret_prefixlen)
+ *ret_prefixlen = k;
+
+ return 0;
+}
+
+int in_addr_prefix_from_string_auto_internal(
+ const char *p,
+ InAddrPrefixLenMode mode,
+ int *ret_family,
+ union in_addr_union *ret_prefix,
+ unsigned char *ret_prefixlen) {
+
+ _cleanup_free_ char *str = NULL;
+ union in_addr_union buffer;
+ const char *e, *l;
+ unsigned char k;
+ int family, r;
+
+ assert(p);
+
+ e = strchr(p, '/');
+ if (e) {
+ str = strndup(p, e - p);
+ if (!str)
+ return -ENOMEM;
+
+ l = str;
+ } else
+ l = p;
+
+ r = in_addr_from_string_auto(l, &family, &buffer);
+ if (r < 0)
+ return r;
+
+ if (e) {
+ r = in_addr_parse_prefixlen(family, e+1, &k);
+ if (r < 0)
+ return r;
+ } else
+ switch (mode) {
+ case PREFIXLEN_FULL:
+ k = FAMILY_ADDRESS_SIZE(family) * 8;
+ break;
+ case PREFIXLEN_REFUSE:
+ return -ENOANO; /* To distinguish this error from others. */
+ case PREFIXLEN_LEGACY:
+ if (family == AF_INET) {
+ r = in4_addr_default_prefixlen(&buffer.in, &k);
+ if (r < 0)
+ return r;
+ } else
+ k = 0;
+ break;
+ default:
+ assert_not_reached("Invalid prefixlen mode");
+ }
+
+ if (ret_family)
+ *ret_family = family;
+ if (ret_prefix)
+ *ret_prefix = buffer;
+ if (ret_prefixlen)
+ *ret_prefixlen = k;
+
+ return 0;
+
+}
+
+static void in_addr_data_hash_func(const struct in_addr_data *a, struct siphash *state) {
+ siphash24_compress(&a->family, sizeof(a->family), state);
+ siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state);
+}
+
+static int in_addr_data_compare_func(const struct in_addr_data *x, const struct in_addr_data *y) {
+ int r;
+
+ r = CMP(x->family, y->family);
+ if (r != 0)
+ return r;
+
+ return memcmp(&x->address, &y->address, FAMILY_ADDRESS_SIZE(x->family));
+}
+
+DEFINE_HASH_OPS(in_addr_data_hash_ops, struct in_addr_data, in_addr_data_hash_func, in_addr_data_compare_func);
diff --git a/src/basic/in-addr-util.h b/src/basic/in-addr-util.h
new file mode 100644
index 0000000..3069790
--- /dev/null
+++ b/src/basic/in-addr-util.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <netinet/in.h>
+#include <stddef.h>
+#include <sys/socket.h>
+
+#include "hash-funcs.h"
+#include "macro.h"
+#include "util.h"
+
+union in_addr_union {
+ struct in_addr in;
+ struct in6_addr in6;
+};
+
+struct in_addr_data {
+ int family;
+ union in_addr_union address;
+};
+
+bool in4_addr_is_null(const struct in_addr *a);
+int in_addr_is_null(int family, const union in_addr_union *u);
+
+int in_addr_is_multicast(int family, const union in_addr_union *u);
+
+bool in4_addr_is_link_local(const struct in_addr *a);
+int in_addr_is_link_local(int family, const union in_addr_union *u);
+
+bool in4_addr_is_localhost(const struct in_addr *a);
+int in_addr_is_localhost(int family, const union in_addr_union *u);
+
+int in_addr_equal(int family, const union in_addr_union *a, const union in_addr_union *b);
+int in_addr_prefix_intersect(int family, const union in_addr_union *a, unsigned aprefixlen, const union in_addr_union *b, unsigned bprefixlen);
+int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen);
+int in_addr_to_string(int family, const union in_addr_union *u, char **ret);
+int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret);
+int in_addr_from_string(int family, const char *s, union in_addr_union *ret);
+int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret);
+int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex);
+unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr);
+struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
+int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
+int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
+int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen);
+int in_addr_prefix_covers(int family, const union in_addr_union *prefix, unsigned char prefixlen, const union in_addr_union *address);
+int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret);
+int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
+
+typedef enum InAddrPrefixLenMode {
+ PREFIXLEN_FULL, /* Default to prefixlen of address size, 32 for IPv4 or 128 for IPv6, if not specified. */
+ PREFIXLEN_REFUSE, /* Fail with -ENOANO if prefixlen is not specified. */
+ PREFIXLEN_LEGACY, /* Default to legacy default prefixlen calculation from address if not specified. */
+} InAddrPrefixLenMode;
+
+int in_addr_prefix_from_string_auto_internal(const char *p, InAddrPrefixLenMode mode, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
+static inline int in_addr_prefix_from_string_auto(const char *p, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen) {
+ return in_addr_prefix_from_string_auto_internal(p, PREFIXLEN_FULL, ret_family, ret_prefix, ret_prefixlen);
+}
+
+static inline size_t FAMILY_ADDRESS_SIZE(int family) {
+ assert(IN_SET(family, AF_INET, AF_INET6));
+ return family == AF_INET6 ? 16 : 4;
+}
+
+/* Workaround for clang, explicitly specify the maximum-size element here.
+ * See also oss-fuzz#11344. */
+#define IN_ADDR_NULL ((union in_addr_union) { .in6 = {} })
+
+extern const struct hash_ops in_addr_data_hash_ops;
diff --git a/src/basic/io-util.c b/src/basic/io-util.c
new file mode 100644
index 0000000..575398f
--- /dev/null
+++ b/src/basic/io-util.c
@@ -0,0 +1,264 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <poll.h>
+#include <stdio.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "io-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+int flush_fd(int fd) {
+ struct pollfd pollfd = {
+ .fd = fd,
+ .events = POLLIN,
+ };
+ int count = 0;
+
+ /* Read from the specified file descriptor, until POLLIN is not set anymore, throwing away everything
+ * read. Note that some file descriptors (notable IP sockets) will trigger POLLIN even when no data can be read
+ * (due to IP packet checksum mismatches), hence this function is only safe to be non-blocking if the fd used
+ * was set to non-blocking too. */
+
+ for (;;) {
+ char buf[LINE_MAX];
+ ssize_t l;
+ int r;
+
+ r = poll(&pollfd, 1, 0);
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return -errno;
+
+ } else if (r == 0)
+ return count;
+
+ l = read(fd, buf, sizeof(buf));
+ if (l < 0) {
+
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN)
+ return count;
+
+ return -errno;
+ } else if (l == 0)
+ return count;
+
+ count += (int) l;
+ }
+}
+
+ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll) {
+ uint8_t *p = buf;
+ ssize_t n = 0;
+
+ assert(fd >= 0);
+ assert(buf);
+
+ /* If called with nbytes == 0, let's call read() at least
+ * once, to validate the operation */
+
+ if (nbytes > (size_t) SSIZE_MAX)
+ return -EINVAL;
+
+ do {
+ ssize_t k;
+
+ k = read(fd, p, nbytes);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN && do_poll) {
+
+ /* We knowingly ignore any return value here,
+ * and expect that any error/EOF is reported
+ * via read() */
+
+ (void) fd_wait_for_event(fd, POLLIN, USEC_INFINITY);
+ continue;
+ }
+
+ return n > 0 ? n : -errno;
+ }
+
+ if (k == 0)
+ return n;
+
+ assert((size_t) k <= nbytes);
+
+ p += k;
+ nbytes -= k;
+ n += k;
+ } while (nbytes > 0);
+
+ return n;
+}
+
+int loop_read_exact(int fd, void *buf, size_t nbytes, bool do_poll) {
+ ssize_t n;
+
+ n = loop_read(fd, buf, nbytes, do_poll);
+ if (n < 0)
+ return (int) n;
+ if ((size_t) n != nbytes)
+ return -EIO;
+
+ return 0;
+}
+
+int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) {
+ const uint8_t *p = buf;
+
+ assert(fd >= 0);
+ assert(buf);
+
+ if (_unlikely_(nbytes > (size_t) SSIZE_MAX))
+ return -EINVAL;
+
+ do {
+ ssize_t k;
+
+ k = write(fd, p, nbytes);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN && do_poll) {
+ /* We knowingly ignore any return value here,
+ * and expect that any error/EOF is reported
+ * via write() */
+
+ (void) fd_wait_for_event(fd, POLLOUT, USEC_INFINITY);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ if (_unlikely_(nbytes > 0 && k == 0)) /* Can't really happen */
+ return -EIO;
+
+ assert((size_t) k <= nbytes);
+
+ p += k;
+ nbytes -= k;
+ } while (nbytes > 0);
+
+ return 0;
+}
+
+int pipe_eof(int fd) {
+ struct pollfd pollfd = {
+ .fd = fd,
+ .events = POLLIN|POLLHUP,
+ };
+
+ int r;
+
+ r = poll(&pollfd, 1, 0);
+ if (r < 0)
+ return -errno;
+
+ if (r == 0)
+ return 0;
+
+ return pollfd.revents & POLLHUP;
+}
+
+int fd_wait_for_event(int fd, int event, usec_t t) {
+
+ struct pollfd pollfd = {
+ .fd = fd,
+ .events = event,
+ };
+
+ struct timespec ts;
+ int r;
+
+ r = ppoll(&pollfd, 1, t == USEC_INFINITY ? NULL : timespec_store(&ts, t), NULL);
+ if (r < 0)
+ return -errno;
+ if (r == 0)
+ return 0;
+
+ return pollfd.revents;
+}
+
+static size_t nul_length(const uint8_t *p, size_t sz) {
+ size_t n = 0;
+
+ while (sz > 0) {
+ if (*p != 0)
+ break;
+
+ n++;
+ p++;
+ sz--;
+ }
+
+ return n;
+}
+
+ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length) {
+ const uint8_t *q, *w, *e;
+ ssize_t l;
+
+ q = w = p;
+ e = q + sz;
+ while (q < e) {
+ size_t n;
+
+ n = nul_length(q, e - q);
+
+ /* If there are more than the specified run length of
+ * NUL bytes, or if this is the beginning or the end
+ * of the buffer, then seek instead of write */
+ if ((n > run_length) ||
+ (n > 0 && q == p) ||
+ (n > 0 && q + n >= e)) {
+ if (q > w) {
+ l = write(fd, w, q - w);
+ if (l < 0)
+ return -errno;
+ if (l != q -w)
+ return -EIO;
+ }
+
+ if (lseek(fd, n, SEEK_CUR) == (off_t) -1)
+ return -errno;
+
+ q += n;
+ w = q;
+ } else if (n > 0)
+ q += n;
+ else
+ q++;
+ }
+
+ if (q > w) {
+ l = write(fd, w, q - w);
+ if (l < 0)
+ return -errno;
+ if (l != q - w)
+ return -EIO;
+ }
+
+ return q - (const uint8_t*) p;
+}
+
+char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value) {
+ char *x;
+
+ x = strappend(field, value);
+ if (x)
+ iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(x);
+ return x;
+}
diff --git a/src/basic/io-util.h b/src/basic/io-util.h
new file mode 100644
index 0000000..792a64a
--- /dev/null
+++ b/src/basic/io-util.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include "macro.h"
+#include "time-util.h"
+
+int flush_fd(int fd);
+
+ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll);
+int loop_read_exact(int fd, void *buf, size_t nbytes, bool do_poll);
+int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll);
+
+int pipe_eof(int fd);
+
+int fd_wait_for_event(int fd, int event, usec_t timeout);
+
+ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length);
+
+static inline size_t IOVEC_TOTAL_SIZE(const struct iovec *i, size_t n) {
+ size_t j, r = 0;
+
+ for (j = 0; j < n; j++)
+ r += i[j].iov_len;
+
+ return r;
+}
+
+static inline size_t IOVEC_INCREMENT(struct iovec *i, size_t n, size_t k) {
+ size_t j;
+
+ for (j = 0; j < n; j++) {
+ size_t sub;
+
+ if (_unlikely_(k <= 0))
+ break;
+
+ sub = MIN(i[j].iov_len, k);
+ i[j].iov_len -= sub;
+ i[j].iov_base = (uint8_t*) i[j].iov_base + sub;
+ k -= sub;
+ }
+
+ return k;
+}
+
+static inline bool FILE_SIZE_VALID(uint64_t l) {
+ /* ftruncate() and friends take an unsigned file size, but actually cannot deal with file sizes larger than
+ * 2^63 since the kernel internally handles it as signed value. This call allows checking for this early. */
+
+ return (l >> 63) == 0;
+}
+
+static inline bool FILE_SIZE_VALID_OR_INFINITY(uint64_t l) {
+
+ /* Same as above, but allows one extra value: -1 as indication for infinity. */
+
+ if (l == (uint64_t) -1)
+ return true;
+
+ return FILE_SIZE_VALID(l);
+
+}
+
+#define IOVEC_INIT(base, len) { .iov_base = (base), .iov_len = (len) }
+#define IOVEC_MAKE(base, len) (struct iovec) IOVEC_INIT(base, len)
+#define IOVEC_INIT_STRING(string) IOVEC_INIT((char*) string, strlen(string))
+#define IOVEC_MAKE_STRING(string) (struct iovec) IOVEC_INIT_STRING(string)
+
+char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value);
diff --git a/src/basic/ioprio.h b/src/basic/ioprio.h
new file mode 100644
index 0000000..3fb168d
--- /dev/null
+++ b/src/basic/ioprio.h
@@ -0,0 +1,56 @@
+#ifndef IOPRIO_H
+#define IOPRIO_H
+
+/* This is minimal version of Linux' linux/ioprio.h header file, which
+ * is licensed GPL2 */
+
+#include <sys/syscall.h>
+#include <unistd.h>
+
+/*
+ * Gives us 8 prio classes with 13-bits of data for each class
+ */
+#define IOPRIO_BITS 16
+#define IOPRIO_N_CLASSES 8
+#define IOPRIO_CLASS_SHIFT 13
+#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
+
+#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
+#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
+#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
+
+#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE)
+
+/*
+ * These are the io priority groups as implemented by CFQ. RT is the realtime
+ * class, it always gets premium service. BE is the best-effort scheduling
+ * class, the default for any process. IDLE is the idle scheduling class, it
+ * is only served when no one else is using the disk.
+ */
+enum {
+ IOPRIO_CLASS_NONE,
+ IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE,
+ IOPRIO_CLASS_IDLE,
+};
+
+/*
+ * 8 best effort priority levels are supported
+ */
+#define IOPRIO_BE_NR (8)
+
+enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+};
+
+static inline int ioprio_set(int which, int who, int ioprio) {
+ return syscall(__NR_ioprio_set, which, who, ioprio);
+}
+
+static inline int ioprio_get(int which, int who) {
+ return syscall(__NR_ioprio_get, which, who);
+}
+
+#endif
diff --git a/src/basic/khash.c b/src/basic/khash.c
new file mode 100644
index 0000000..847f257
--- /dev/null
+++ b/src/basic/khash.c
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <linux/if_alg.h>
+#include <stdbool.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "khash.h"
+#include "macro.h"
+#include "missing.h"
+#include "string-util.h"
+#include "util.h"
+
+/* On current kernels the maximum digest (according to "grep digestsize /proc/crypto | sort -u") is actually 32, but
+ * let's add some extra room, the few wasted bytes don't really matter... */
+#define LONGEST_DIGEST 128
+
+struct khash {
+ int fd;
+ char *algorithm;
+ uint8_t digest[LONGEST_DIGEST+1];
+ size_t digest_size;
+ bool digest_valid;
+};
+
+int khash_supported(void) {
+ static const union {
+ struct sockaddr sa;
+ struct sockaddr_alg alg;
+ } sa = {
+ .alg.salg_family = AF_ALG,
+ .alg.salg_type = "hash",
+ .alg.salg_name = "sha256", /* a very common algorithm */
+ };
+
+ static int cached = -1;
+
+ if (cached < 0) {
+ _cleanup_close_ int fd1 = -1, fd2 = -1;
+ uint8_t buf[LONGEST_DIGEST+1];
+
+ fd1 = socket(AF_ALG, SOCK_SEQPACKET|SOCK_CLOEXEC, 0);
+ if (fd1 < 0) {
+ /* The kernel returns EAFNOSUPPORT if AF_ALG is not supported at all */
+ if (IN_SET(errno, EAFNOSUPPORT, EOPNOTSUPP))
+ return (cached = false);
+
+ return -errno;
+ }
+
+ if (bind(fd1, &sa.sa, sizeof(sa)) < 0) {
+ /* The kernel returns ENOENT if the selected algorithm is not supported at all. We use a check
+ * for SHA256 as a proxy for whether the whole API is supported at all. After all it's one of
+ * the most common hash functions, and if it isn't supported, that's ample indication that
+ * something is really off. */
+
+ if (IN_SET(errno, ENOENT, EOPNOTSUPP))
+ return (cached = false);
+
+ return -errno;
+ }
+
+ fd2 = accept4(fd1, NULL, 0, SOCK_CLOEXEC);
+ if (fd2 < 0) {
+ if (errno == EOPNOTSUPP)
+ return (cached = false);
+
+ return -errno;
+ }
+
+ if (recv(fd2, buf, sizeof(buf), 0) < 0) {
+ /* On some kernels we get ENOKEY for non-keyed hash functions (such as sha256), let's refuse
+ * using the API in those cases, since the kernel is
+ * broken. https://github.com/systemd/systemd/issues/8278 */
+
+ if (IN_SET(errno, ENOKEY, EOPNOTSUPP))
+ return (cached = false);
+ }
+
+ cached = true;
+ }
+
+ return cached;
+}
+
+int khash_new_with_key(khash **ret, const char *algorithm, const void *key, size_t key_size) {
+ union {
+ struct sockaddr sa;
+ struct sockaddr_alg alg;
+ } sa = {
+ .alg.salg_family = AF_ALG,
+ .alg.salg_type = "hash",
+ };
+
+ _cleanup_(khash_unrefp) khash *h = NULL;
+ _cleanup_close_ int fd = -1;
+ int supported;
+ ssize_t n;
+
+ assert(ret);
+ assert(key || key_size == 0);
+
+ /* Filter out an empty algorithm early, as we do not support an algorithm by that name. */
+ if (isempty(algorithm))
+ return -EINVAL;
+
+ /* Overly long hash algorithm names we definitely do not support */
+ if (strlen(algorithm) >= sizeof(sa.alg.salg_name))
+ return -EOPNOTSUPP;
+
+ supported = khash_supported();
+ if (supported < 0)
+ return supported;
+ if (supported == 0)
+ return -EOPNOTSUPP;
+
+ fd = socket(AF_ALG, SOCK_SEQPACKET|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ strcpy((char*) sa.alg.salg_name, algorithm);
+ if (bind(fd, &sa.sa, sizeof(sa)) < 0) {
+ if (errno == ENOENT)
+ return -EOPNOTSUPP;
+ return -errno;
+ }
+
+ if (key) {
+ if (setsockopt(fd, SOL_ALG, ALG_SET_KEY, key, key_size) < 0)
+ return -errno;
+ }
+
+ h = new0(khash, 1);
+ if (!h)
+ return -ENOMEM;
+
+ h->fd = accept4(fd, NULL, 0, SOCK_CLOEXEC);
+ if (h->fd < 0)
+ return -errno;
+
+ h->algorithm = strdup(algorithm);
+ if (!h->algorithm)
+ return -ENOMEM;
+
+ /* Temporary fix for rc kernel bug: https://bugzilla.redhat.com/show_bug.cgi?id=1395896 */
+ (void) send(h->fd, NULL, 0, 0);
+
+ /* Figure out the digest size */
+ n = recv(h->fd, h->digest, sizeof(h->digest), 0);
+ if (n < 0)
+ return -errno;
+ if (n >= LONGEST_DIGEST) /* longer than what we expected? If so, we don't support this */
+ return -EOPNOTSUPP;
+
+ h->digest_size = (size_t) n;
+ h->digest_valid = true;
+
+ /* Temporary fix for rc kernel bug: https://bugzilla.redhat.com/show_bug.cgi?id=1395896 */
+ (void) send(h->fd, NULL, 0, 0);
+
+ *ret = h;
+ h = NULL;
+
+ return 0;
+}
+
+int khash_new(khash **ret, const char *algorithm) {
+ return khash_new_with_key(ret, algorithm, NULL, 0);
+}
+
+khash* khash_unref(khash *h) {
+ if (!h)
+ return NULL;
+
+ safe_close(h->fd);
+ free(h->algorithm);
+ return mfree(h);
+}
+
+int khash_dup(khash *h, khash **ret) {
+ _cleanup_(khash_unrefp) khash *copy = NULL;
+
+ assert(h);
+ assert(ret);
+
+ copy = newdup(khash, h, 1);
+ if (!copy)
+ return -ENOMEM;
+
+ copy->fd = -1;
+ copy->algorithm = strdup(h->algorithm);
+ if (!copy->algorithm)
+ return -ENOMEM;
+
+ copy->fd = accept4(h->fd, NULL, 0, SOCK_CLOEXEC);
+ if (copy->fd < 0)
+ return -errno;
+
+ *ret = TAKE_PTR(copy);
+
+ return 0;
+}
+
+const char *khash_get_algorithm(khash *h) {
+ assert(h);
+
+ return h->algorithm;
+}
+
+size_t khash_get_size(khash *h) {
+ assert(h);
+
+ return h->digest_size;
+}
+
+int khash_reset(khash *h) {
+ ssize_t n;
+
+ assert(h);
+
+ n = send(h->fd, NULL, 0, 0);
+ if (n < 0)
+ return -errno;
+
+ h->digest_valid = false;
+
+ return 0;
+}
+
+int khash_put(khash *h, const void *buffer, size_t size) {
+ ssize_t n;
+
+ assert(h);
+ assert(buffer || size == 0);
+
+ if (size <= 0)
+ return 0;
+
+ n = send(h->fd, buffer, size, MSG_MORE);
+ if (n < 0)
+ return -errno;
+
+ h->digest_valid = false;
+
+ return 0;
+}
+
+int khash_put_iovec(khash *h, const struct iovec *iovec, size_t n) {
+ struct msghdr mh = {
+ mh.msg_iov = (struct iovec*) iovec,
+ mh.msg_iovlen = n,
+ };
+ ssize_t k;
+
+ assert(h);
+ assert(iovec || n == 0);
+
+ if (n <= 0)
+ return 0;
+
+ k = sendmsg(h->fd, &mh, MSG_MORE);
+ if (k < 0)
+ return -errno;
+
+ h->digest_valid = false;
+
+ return 0;
+}
+
+static int retrieve_digest(khash *h) {
+ ssize_t n;
+
+ assert(h);
+
+ if (h->digest_valid)
+ return 0;
+
+ n = recv(h->fd, h->digest, h->digest_size, 0);
+ if (n < 0)
+ return n;
+ if ((size_t) n != h->digest_size) /* digest size changed? */
+ return -EIO;
+
+ h->digest_valid = true;
+
+ return 0;
+}
+
+int khash_digest_data(khash *h, const void **ret) {
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ r = retrieve_digest(h);
+ if (r < 0)
+ return r;
+
+ *ret = h->digest;
+ return 0;
+}
+
+int khash_digest_string(khash *h, char **ret) {
+ int r;
+ char *p;
+
+ assert(h);
+ assert(ret);
+
+ r = retrieve_digest(h);
+ if (r < 0)
+ return r;
+
+ p = hexmem(h->digest, h->digest_size);
+ if (!p)
+ return -ENOMEM;
+
+ *ret = p;
+ return 0;
+}
diff --git a/src/basic/khash.h b/src/basic/khash.h
new file mode 100644
index 0000000..a3013b9
--- /dev/null
+++ b/src/basic/khash.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include "macro.h"
+
+typedef struct khash khash;
+
+int khash_supported(void);
+
+/* For plain hash functions. Hash functions commonly supported on today's kernels are: crc32c, crct10dif, crc32,
+ * sha224, sha256, sha512, sha384, sha1, md5, md4, sha3-224, sha3-256, sha3-384, sha3-512, and more. */
+int khash_new(khash **ret, const char *algorithm);
+
+/* For keyed hash functions. Hash functions commonly supported on today's kernels are: hmac(sha256), cmac(aes),
+ * cmac(des3_ede), hmac(sha3-512), hmac(sha3-384), hmac(sha3-256), hmac(sha3-224), hmac(rmd160), hmac(rmd128),
+ * hmac(sha224), hmac(sha512), hmac(sha384), hmac(sha1), hmac(md5), and more. */
+int khash_new_with_key(khash **ret, const char *algorithm, const void *key, size_t key_size);
+
+int khash_dup(khash *h, khash **ret);
+khash* khash_unref(khash *h);
+
+const char *khash_get_algorithm(khash *h);
+size_t khash_get_size(khash *h);
+
+int khash_reset(khash *h);
+
+int khash_put(khash *h, const void *buffer, size_t size);
+int khash_put_iovec(khash *h, const struct iovec *iovec, size_t n);
+
+int khash_digest_data(khash *h, const void **ret);
+int khash_digest_string(khash *h, char **ret);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(khash*, khash_unref);
diff --git a/src/basic/label.c b/src/basic/label.c
new file mode 100644
index 0000000..12a7fb0
--- /dev/null
+++ b/src/basic/label.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "btrfs-util.h"
+#include "label.h"
+#include "macro.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+
+int label_fix(const char *path, LabelFixFlags flags) {
+ int r, q;
+
+ r = mac_selinux_fix(path, flags);
+ q = mac_smack_fix(path, flags);
+
+ if (r < 0)
+ return r;
+ if (q < 0)
+ return q;
+
+ return 0;
+}
+
+int symlink_label(const char *old_path, const char *new_path) {
+ int r;
+
+ assert(old_path);
+ assert(new_path);
+
+ r = mac_selinux_create_file_prepare(new_path, S_IFLNK);
+ if (r < 0)
+ return r;
+
+ if (symlink(old_path, new_path) < 0)
+ r = -errno;
+
+ mac_selinux_create_file_clear();
+
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix(new_path, 0);
+}
+
+int btrfs_subvol_make_label(const char *path) {
+ int r;
+
+ assert(path);
+
+ r = mac_selinux_create_file_prepare(path, S_IFDIR);
+ if (r < 0)
+ return r;
+
+ r = btrfs_subvol_make(path);
+ mac_selinux_create_file_clear();
+
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix(path, 0);
+}
diff --git a/src/basic/label.h b/src/basic/label.h
new file mode 100644
index 0000000..594fd65
--- /dev/null
+++ b/src/basic/label.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+typedef enum LabelFixFlags {
+ LABEL_IGNORE_ENOENT = 1 << 0,
+ LABEL_IGNORE_EROFS = 1 << 1,
+} LabelFixFlags;
+
+int label_fix(const char *path, LabelFixFlags flags);
+
+int mkdir_label(const char *path, mode_t mode);
+int mkdirat_label(int dirfd, const char *path, mode_t mode);
+int symlink_label(const char *old_path, const char *new_path);
+
+int btrfs_subvol_make_label(const char *path);
diff --git a/src/basic/list.h b/src/basic/list.h
new file mode 100644
index 0000000..f7f9700
--- /dev/null
+++ b/src/basic/list.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "macro.h"
+
+/* The head of the linked list. Use this in the structure that shall
+ * contain the head of the linked list */
+#define LIST_HEAD(t,name) \
+ t *name
+
+/* The pointers in the linked list's items. Use this in the item structure */
+#define LIST_FIELDS(t,name) \
+ t *name##_next, *name##_prev
+
+/* Initialize the list's head */
+#define LIST_HEAD_INIT(head) \
+ do { \
+ (head) = NULL; \
+ } while (false)
+
+/* Initialize a list item */
+#define LIST_INIT(name,item) \
+ do { \
+ typeof(*(item)) *_item = (item); \
+ assert(_item); \
+ _item->name##_prev = _item->name##_next = NULL; \
+ } while (false)
+
+/* Prepend an item to the list */
+#define LIST_PREPEND(name,head,item) \
+ do { \
+ typeof(*(head)) **_head = &(head), *_item = (item); \
+ assert(_item); \
+ if ((_item->name##_next = *_head)) \
+ _item->name##_next->name##_prev = _item; \
+ _item->name##_prev = NULL; \
+ *_head = _item; \
+ } while (false)
+
+/* Append an item to the list */
+#define LIST_APPEND(name,head,item) \
+ do { \
+ typeof(*(head)) **_hhead = &(head), *_tail; \
+ LIST_FIND_TAIL(name, *_hhead, _tail); \
+ LIST_INSERT_AFTER(name, *_hhead, _tail, item); \
+ } while (false)
+
+/* Remove an item from the list */
+#define LIST_REMOVE(name,head,item) \
+ do { \
+ typeof(*(head)) **_head = &(head), *_item = (item); \
+ assert(_item); \
+ if (_item->name##_next) \
+ _item->name##_next->name##_prev = _item->name##_prev; \
+ if (_item->name##_prev) \
+ _item->name##_prev->name##_next = _item->name##_next; \
+ else { \
+ assert(*_head == _item); \
+ *_head = _item->name##_next; \
+ } \
+ _item->name##_next = _item->name##_prev = NULL; \
+ } while (false)
+
+/* Find the head of the list */
+#define LIST_FIND_HEAD(name,item,head) \
+ do { \
+ typeof(*(item)) *_item = (item); \
+ if (!_item) \
+ (head) = NULL; \
+ else { \
+ while (_item->name##_prev) \
+ _item = _item->name##_prev; \
+ (head) = _item; \
+ } \
+ } while (false)
+
+/* Find the tail of the list */
+#define LIST_FIND_TAIL(name,item,tail) \
+ do { \
+ typeof(*(item)) *_item = (item); \
+ if (!_item) \
+ (tail) = NULL; \
+ else { \
+ while (_item->name##_next) \
+ _item = _item->name##_next; \
+ (tail) = _item; \
+ } \
+ } while (false)
+
+/* Insert an item after another one (a = where, b = what) */
+#define LIST_INSERT_AFTER(name,head,a,b) \
+ do { \
+ typeof(*(head)) **_head = &(head), *_a = (a), *_b = (b); \
+ assert(_b); \
+ if (!_a) { \
+ if ((_b->name##_next = *_head)) \
+ _b->name##_next->name##_prev = _b; \
+ _b->name##_prev = NULL; \
+ *_head = _b; \
+ } else { \
+ if ((_b->name##_next = _a->name##_next)) \
+ _b->name##_next->name##_prev = _b; \
+ _b->name##_prev = _a; \
+ _a->name##_next = _b; \
+ } \
+ } while (false)
+
+/* Insert an item before another one (a = where, b = what) */
+#define LIST_INSERT_BEFORE(name,head,a,b) \
+ do { \
+ typeof(*(head)) **_head = &(head), *_a = (a), *_b = (b); \
+ assert(_b); \
+ if (!_a) { \
+ if (!*_head) { \
+ _b->name##_next = NULL; \
+ _b->name##_prev = NULL; \
+ *_head = _b; \
+ } else { \
+ typeof(*(head)) *_tail = (head); \
+ while (_tail->name##_next) \
+ _tail = _tail->name##_next; \
+ _b->name##_next = NULL; \
+ _b->name##_prev = _tail; \
+ _tail->name##_next = _b; \
+ } \
+ } else { \
+ if ((_b->name##_prev = _a->name##_prev)) \
+ _b->name##_prev->name##_next = _b; \
+ else \
+ *_head = _b; \
+ _b->name##_next = _a; \
+ _a->name##_prev = _b; \
+ } \
+ } while (false)
+
+#define LIST_JUST_US(name,item) \
+ (!(item)->name##_prev && !(item)->name##_next) \
+
+#define LIST_FOREACH(name,i,head) \
+ for ((i) = (head); (i); (i) = (i)->name##_next)
+
+#define LIST_FOREACH_SAFE(name,i,n,head) \
+ for ((i) = (head); (i) && (((n) = (i)->name##_next), 1); (i) = (n))
+
+#define LIST_FOREACH_BEFORE(name,i,p) \
+ for ((i) = (p)->name##_prev; (i); (i) = (i)->name##_prev)
+
+#define LIST_FOREACH_AFTER(name,i,p) \
+ for ((i) = (p)->name##_next; (i); (i) = (i)->name##_next)
+
+/* Iterate through all the members of the list p is included in, but skip over p */
+#define LIST_FOREACH_OTHERS(name,i,p) \
+ for (({ \
+ (i) = (p); \
+ while ((i) && (i)->name##_prev) \
+ (i) = (i)->name##_prev; \
+ if ((i) == (p)) \
+ (i) = (p)->name##_next; \
+ }); \
+ (i); \
+ (i) = (i)->name##_next == (p) ? (p)->name##_next : (i)->name##_next)
+
+/* Loop starting from p->next until p->prev.
+ p can be adjusted meanwhile. */
+#define LIST_LOOP_BUT_ONE(name,i,head,p) \
+ for ((i) = (p)->name##_next ? (p)->name##_next : (head); \
+ (i) != (p); \
+ (i) = (i)->name##_next ? (i)->name##_next : (head))
+
+#define LIST_IS_EMPTY(head) \
+ (!(head))
diff --git a/src/basic/locale-util.c b/src/basic/locale-util.c
new file mode 100644
index 0000000..fc1577a
--- /dev/null
+++ b/src/basic/locale-util.c
@@ -0,0 +1,460 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <ftw.h>
+#include <langinfo.h>
+#include <libintl.h>
+#include <locale.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "def.h"
+#include "dirent-util.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "locale-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+static int add_locales_from_archive(Set *locales) {
+ /* Stolen from glibc... */
+
+ struct locarhead {
+ uint32_t magic;
+ /* Serial number. */
+ uint32_t serial;
+ /* Name hash table. */
+ uint32_t namehash_offset;
+ uint32_t namehash_used;
+ uint32_t namehash_size;
+ /* String table. */
+ uint32_t string_offset;
+ uint32_t string_used;
+ uint32_t string_size;
+ /* Table with locale records. */
+ uint32_t locrectab_offset;
+ uint32_t locrectab_used;
+ uint32_t locrectab_size;
+ /* MD5 sum hash table. */
+ uint32_t sumhash_offset;
+ uint32_t sumhash_used;
+ uint32_t sumhash_size;
+ };
+
+ struct namehashent {
+ /* Hash value of the name. */
+ uint32_t hashval;
+ /* Offset of the name in the string table. */
+ uint32_t name_offset;
+ /* Offset of the locale record. */
+ uint32_t locrec_offset;
+ };
+
+ const struct locarhead *h;
+ const struct namehashent *e;
+ const void *p = MAP_FAILED;
+ _cleanup_close_ int fd = -1;
+ size_t sz = 0;
+ struct stat st;
+ size_t i;
+ int r;
+
+ fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISREG(st.st_mode))
+ return -EBADMSG;
+
+ if (st.st_size < (off_t) sizeof(struct locarhead))
+ return -EBADMSG;
+
+ p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED)
+ return -errno;
+
+ h = (const struct locarhead *) p;
+ if (h->magic != 0xde020109 ||
+ h->namehash_offset + h->namehash_size > st.st_size ||
+ h->string_offset + h->string_size > st.st_size ||
+ h->locrectab_offset + h->locrectab_size > st.st_size ||
+ h->sumhash_offset + h->sumhash_size > st.st_size) {
+ r = -EBADMSG;
+ goto finish;
+ }
+
+ e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
+ for (i = 0; i < h->namehash_size; i++) {
+ char *z;
+
+ if (e[i].locrec_offset == 0)
+ continue;
+
+ if (!utf8_is_valid((char*) p + e[i].name_offset))
+ continue;
+
+ z = strdup((char*) p + e[i].name_offset);
+ if (!z) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = set_consume(locales, z);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = 0;
+
+ finish:
+ if (p != MAP_FAILED)
+ munmap((void*) p, sz);
+
+ return r;
+}
+
+static int add_locales_from_libdir (Set *locales) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *entry;
+ int r;
+
+ dir = opendir("/usr/lib/locale");
+ if (!dir)
+ return errno == ENOENT ? 0 : -errno;
+
+ FOREACH_DIRENT(entry, dir, return -errno) {
+ char *z;
+
+ dirent_ensure_type(dir, entry);
+
+ if (entry->d_type != DT_DIR)
+ continue;
+
+ z = strdup(entry->d_name);
+ if (!z)
+ return -ENOMEM;
+
+ r = set_consume(locales, z);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ return 0;
+}
+
+int get_locales(char ***ret) {
+ _cleanup_set_free_ Set *locales = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+ locales = set_new(&string_hash_ops);
+ if (!locales)
+ return -ENOMEM;
+
+ r = add_locales_from_archive(locales);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ r = add_locales_from_libdir(locales);
+ if (r < 0)
+ return r;
+
+ l = set_get_strv(locales);
+ if (!l)
+ return -ENOMEM;
+
+ strv_sort(l);
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+}
+
+bool locale_is_valid(const char *name) {
+
+ if (isempty(name))
+ return false;
+
+ if (strlen(name) >= 128)
+ return false;
+
+ if (!utf8_is_valid(name))
+ return false;
+
+ if (!filename_is_valid(name))
+ return false;
+
+ if (!string_is_safe(name))
+ return false;
+
+ return true;
+}
+
+void init_gettext(void) {
+ setlocale(LC_ALL, "");
+ textdomain(GETTEXT_PACKAGE);
+}
+
+bool is_locale_utf8(void) {
+ const char *set;
+ static int cached_answer = -1;
+
+ /* Note that we default to 'true' here, since today UTF8 is
+ * pretty much supported everywhere. */
+
+ if (cached_answer >= 0)
+ goto out;
+
+ if (!setlocale(LC_ALL, "")) {
+ cached_answer = true;
+ goto out;
+ }
+
+ set = nl_langinfo(CODESET);
+ if (!set) {
+ cached_answer = true;
+ goto out;
+ }
+
+ if (streq(set, "UTF-8")) {
+ cached_answer = true;
+ goto out;
+ }
+
+ /* For LC_CTYPE=="C" return true, because CTYPE is effectly
+ * unset and everything can do to UTF-8 nowadays. */
+ set = setlocale(LC_CTYPE, NULL);
+ if (!set) {
+ cached_answer = true;
+ goto out;
+ }
+
+ /* Check result, but ignore the result if C was set
+ * explicitly. */
+ cached_answer =
+ STR_IN_SET(set, "C", "POSIX") &&
+ !getenv("LC_ALL") &&
+ !getenv("LC_CTYPE") &&
+ !getenv("LANG");
+
+out:
+ return (bool) cached_answer;
+}
+
+static thread_local Set *keymaps = NULL;
+
+static int nftw_cb(
+ const char *fpath,
+ const struct stat *sb,
+ int tflag,
+ struct FTW *ftwbuf) {
+
+ char *p, *e;
+ int r;
+
+ if (tflag != FTW_F)
+ return 0;
+
+ if (!endswith(fpath, ".map") &&
+ !endswith(fpath, ".map.gz"))
+ return 0;
+
+ p = strdup(basename(fpath));
+ if (!p)
+ return FTW_STOP;
+
+ e = endswith(p, ".map");
+ if (e)
+ *e = 0;
+
+ e = endswith(p, ".map.gz");
+ if (e)
+ *e = 0;
+
+ r = set_consume(keymaps, p);
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ return 0;
+}
+
+int get_keymaps(char ***ret) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char *dir;
+ int r;
+
+ keymaps = set_new(&string_hash_ops);
+ if (!keymaps)
+ return -ENOMEM;
+
+ NULSTR_FOREACH(dir, KBD_KEYMAP_DIRS) {
+ r = nftw(dir, nftw_cb, 20, FTW_PHYS|FTW_ACTIONRETVAL);
+
+ if (r == FTW_STOP)
+ log_debug("Directory not found %s", dir);
+ else if (r < 0)
+ log_debug_errno(r, "Can't add keymap: %m");
+ }
+
+ l = set_get_strv(keymaps);
+ if (!l) {
+ set_free_free(keymaps);
+ return -ENOMEM;
+ }
+
+ set_free(keymaps);
+
+ if (strv_isempty(l))
+ return -ENOENT;
+
+ strv_sort(l);
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+}
+
+bool keymap_is_valid(const char *name) {
+
+ if (isempty(name))
+ return false;
+
+ if (strlen(name) >= 128)
+ return false;
+
+ if (!utf8_is_valid(name))
+ return false;
+
+ if (!filename_is_valid(name))
+ return false;
+
+ if (!string_is_safe(name))
+ return false;
+
+ return true;
+}
+
+static bool emoji_enabled(void) {
+ static int cached_emoji_enabled = -1;
+
+ if (cached_emoji_enabled < 0) {
+ int val;
+
+ val = getenv_bool("SYSTEMD_EMOJI");
+ if (val < 0)
+ cached_emoji_enabled =
+ is_locale_utf8() &&
+ !STRPTR_IN_SET(getenv("TERM"), "dumb", "linux");
+ else
+ cached_emoji_enabled = val;
+ }
+
+ return cached_emoji_enabled;
+}
+
+const char *special_glyph(SpecialGlyph code) {
+
+ /* A list of a number of interesting unicode glyphs we can use to decorate our output. It's probably wise to be
+ * conservative here, and primarily stick to the glyphs defined in the eurlatgr font, so that display still
+ * works reasonably well on the Linux console. For details see:
+ *
+ * http://git.altlinux.org/people/legion/packages/kbd.git?p=kbd.git;a=blob;f=data/consolefonts/README.eurlatgr
+ */
+
+ static const char* const draw_table[2][_SPECIAL_GLYPH_MAX] = {
+ /* ASCII fallback */
+ [false] = {
+ [SPECIAL_GLYPH_TREE_VERTICAL] = "| ",
+ [SPECIAL_GLYPH_TREE_BRANCH] = "|-",
+ [SPECIAL_GLYPH_TREE_RIGHT] = "`-",
+ [SPECIAL_GLYPH_TREE_SPACE] = " ",
+ [SPECIAL_GLYPH_TRIANGULAR_BULLET] = ">",
+ [SPECIAL_GLYPH_BLACK_CIRCLE] = "*",
+ [SPECIAL_GLYPH_BULLET] = "*",
+ [SPECIAL_GLYPH_ARROW] = "->",
+ [SPECIAL_GLYPH_MDASH] = "-",
+ [SPECIAL_GLYPH_ELLIPSIS] = "...",
+ [SPECIAL_GLYPH_MU] = "u",
+ [SPECIAL_GLYPH_CHECK_MARK] = "+",
+ [SPECIAL_GLYPH_CROSS_MARK] = "-",
+ [SPECIAL_GLYPH_ECSTATIC_SMILEY] = ":-]",
+ [SPECIAL_GLYPH_HAPPY_SMILEY] = ":-}",
+ [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = ":-)",
+ [SPECIAL_GLYPH_NEUTRAL_SMILEY] = ":-|",
+ [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = ":-(",
+ [SPECIAL_GLYPH_UNHAPPY_SMILEY] = ":-{️",
+ [SPECIAL_GLYPH_DEPRESSED_SMILEY] = ":-[",
+ },
+
+ /* UTF-8 */
+ [true] = {
+ [SPECIAL_GLYPH_TREE_VERTICAL] = "\342\224\202 ", /* │ */
+ [SPECIAL_GLYPH_TREE_BRANCH] = "\342\224\234\342\224\200", /* ├─ */
+ [SPECIAL_GLYPH_TREE_RIGHT] = "\342\224\224\342\224\200", /* └─ */
+ [SPECIAL_GLYPH_TREE_SPACE] = " ", /* */
+ [SPECIAL_GLYPH_TRIANGULAR_BULLET] = "\342\200\243", /* ‣ */
+ [SPECIAL_GLYPH_BLACK_CIRCLE] = "\342\227\217", /* ● */
+ [SPECIAL_GLYPH_BULLET] = "\342\200\242", /* • */
+ [SPECIAL_GLYPH_ARROW] = "\342\206\222", /* → */
+ [SPECIAL_GLYPH_MDASH] = "\342\200\223", /* – */
+ [SPECIAL_GLYPH_ELLIPSIS] = "\342\200\246", /* … */
+ [SPECIAL_GLYPH_MU] = "\316\274", /* μ */
+ [SPECIAL_GLYPH_CHECK_MARK] = "\342\234\223", /* ✓ */
+ [SPECIAL_GLYPH_CROSS_MARK] = "\342\234\227", /* ✗ */
+ [SPECIAL_GLYPH_ECSTATIC_SMILEY] = "\360\237\230\207", /* 😇 */
+ [SPECIAL_GLYPH_HAPPY_SMILEY] = "\360\237\230\200", /* 😀 */
+ [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = "\360\237\231\202", /* 🙂 */
+ [SPECIAL_GLYPH_NEUTRAL_SMILEY] = "\360\237\230\220", /* 😐 */
+ [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = "\360\237\231\201", /* 🙁 */
+ [SPECIAL_GLYPH_UNHAPPY_SMILEY] = "\360\237\230\250", /* 😨️️ */
+ [SPECIAL_GLYPH_DEPRESSED_SMILEY] = "\360\237\244\242", /* 🤢 */
+ },
+ };
+
+ assert(code < _SPECIAL_GLYPH_MAX);
+
+ return draw_table[code >= _SPECIAL_GLYPH_FIRST_SMILEY ? emoji_enabled() : is_locale_utf8()][code];
+}
+
+void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
+ LocaleVariable i;
+
+ if (!l)
+ return;
+
+ for (i = 0; i < _VARIABLE_LC_MAX; i++)
+ l[i] = mfree(l[i]);
+}
+
+static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
+ [VARIABLE_LANG] = "LANG",
+ [VARIABLE_LANGUAGE] = "LANGUAGE",
+ [VARIABLE_LC_CTYPE] = "LC_CTYPE",
+ [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
+ [VARIABLE_LC_TIME] = "LC_TIME",
+ [VARIABLE_LC_COLLATE] = "LC_COLLATE",
+ [VARIABLE_LC_MONETARY] = "LC_MONETARY",
+ [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
+ [VARIABLE_LC_PAPER] = "LC_PAPER",
+ [VARIABLE_LC_NAME] = "LC_NAME",
+ [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
+ [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
+ [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
+ [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);
diff --git a/src/basic/locale-util.h b/src/basic/locale-util.h
new file mode 100644
index 0000000..e64f0ce
--- /dev/null
+++ b/src/basic/locale-util.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <libintl.h>
+#include <stdbool.h>
+#include <locale.h>
+
+#include "macro.h"
+
+typedef enum LocaleVariable {
+ /* We don't list LC_ALL here on purpose. People should be
+ * using LANG instead. */
+
+ VARIABLE_LANG,
+ VARIABLE_LANGUAGE,
+ VARIABLE_LC_CTYPE,
+ VARIABLE_LC_NUMERIC,
+ VARIABLE_LC_TIME,
+ VARIABLE_LC_COLLATE,
+ VARIABLE_LC_MONETARY,
+ VARIABLE_LC_MESSAGES,
+ VARIABLE_LC_PAPER,
+ VARIABLE_LC_NAME,
+ VARIABLE_LC_ADDRESS,
+ VARIABLE_LC_TELEPHONE,
+ VARIABLE_LC_MEASUREMENT,
+ VARIABLE_LC_IDENTIFICATION,
+ _VARIABLE_LC_MAX,
+ _VARIABLE_LC_INVALID = -1
+} LocaleVariable;
+
+int get_locales(char ***l);
+bool locale_is_valid(const char *name);
+
+#define _(String) gettext(String)
+#define N_(String) String
+void init_gettext(void);
+
+bool is_locale_utf8(void);
+
+typedef enum {
+ SPECIAL_GLYPH_TREE_VERTICAL,
+ SPECIAL_GLYPH_TREE_BRANCH,
+ SPECIAL_GLYPH_TREE_RIGHT,
+ SPECIAL_GLYPH_TREE_SPACE,
+ SPECIAL_GLYPH_TRIANGULAR_BULLET,
+ SPECIAL_GLYPH_BLACK_CIRCLE,
+ SPECIAL_GLYPH_BULLET,
+ SPECIAL_GLYPH_ARROW,
+ SPECIAL_GLYPH_MDASH,
+ SPECIAL_GLYPH_ELLIPSIS,
+ SPECIAL_GLYPH_MU,
+ SPECIAL_GLYPH_CHECK_MARK,
+ SPECIAL_GLYPH_CROSS_MARK,
+ _SPECIAL_GLYPH_FIRST_SMILEY,
+ SPECIAL_GLYPH_ECSTATIC_SMILEY = _SPECIAL_GLYPH_FIRST_SMILEY,
+ SPECIAL_GLYPH_HAPPY_SMILEY,
+ SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY,
+ SPECIAL_GLYPH_NEUTRAL_SMILEY,
+ SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY,
+ SPECIAL_GLYPH_UNHAPPY_SMILEY,
+ SPECIAL_GLYPH_DEPRESSED_SMILEY,
+ _SPECIAL_GLYPH_MAX
+} SpecialGlyph;
+
+const char *special_glyph(SpecialGlyph code) _const_;
+
+const char* locale_variable_to_string(LocaleVariable i) _const_;
+LocaleVariable locale_variable_from_string(const char *s) _pure_;
+
+int get_keymaps(char ***l);
+bool keymap_is_valid(const char *name);
+
+static inline void freelocalep(locale_t *p) {
+ if (*p == (locale_t) 0)
+ return;
+
+ freelocale(*p);
+}
+
+void locale_variables_free(char* l[_VARIABLE_LC_MAX]);
+static inline void locale_variables_freep(char*(*l)[_VARIABLE_LC_MAX]) {
+ locale_variables_free(*l);
+}
diff --git a/src/basic/log.c b/src/basic/log.c
new file mode 100644
index 0000000..0486027
--- /dev/null
+++ b/src/basic/log.c
@@ -0,0 +1,1324 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+static LogTarget log_target = LOG_TARGET_CONSOLE;
+static int log_max_level[] = {LOG_INFO, LOG_INFO};
+assert_cc(ELEMENTSOF(log_max_level) == _LOG_REALM_MAX);
+static int log_facility = LOG_DAEMON;
+
+static int console_fd = STDERR_FILENO;
+static int syslog_fd = -1;
+static int kmsg_fd = -1;
+static int journal_fd = -1;
+
+static bool syslog_is_stream = false;
+
+static bool show_color = false;
+static bool show_location = false;
+
+static bool upgrade_syslog_to_journal = false;
+static bool always_reopen_console = false;
+static bool open_when_needed = false;
+static bool prohibit_ipc = false;
+
+/* Akin to glibc's __abort_msg; which is private and we hence cannot
+ * use here. */
+static char *log_abort_msg = NULL;
+
+/* An assert to use in logging functions that does not call recursively
+ * into our logging functions (since that might lead to a loop). */
+#define assert_raw(expr) \
+ do { \
+ if (_unlikely_(!(expr))) { \
+ fputs(#expr "\n", stderr); \
+ abort(); \
+ } \
+ } while (false)
+
+static void log_close_console(void) {
+ console_fd = safe_close_above_stdio(console_fd);
+}
+
+static int log_open_console(void) {
+
+ if (!always_reopen_console) {
+ console_fd = STDERR_FILENO;
+ return 0;
+ }
+
+ if (console_fd < 3) {
+ console_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (console_fd < 0)
+ return console_fd;
+
+ console_fd = fd_move_above_stdio(console_fd);
+ }
+
+ return 0;
+}
+
+static void log_close_kmsg(void) {
+ kmsg_fd = safe_close(kmsg_fd);
+}
+
+static int log_open_kmsg(void) {
+
+ if (kmsg_fd >= 0)
+ return 0;
+
+ kmsg_fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (kmsg_fd < 0)
+ return -errno;
+
+ kmsg_fd = fd_move_above_stdio(kmsg_fd);
+ return 0;
+}
+
+static void log_close_syslog(void) {
+ syslog_fd = safe_close(syslog_fd);
+}
+
+static int create_log_socket(int type) {
+ struct timeval tv;
+ int fd;
+
+ fd = socket(AF_UNIX, type|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ fd = fd_move_above_stdio(fd);
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ /* We need a blocking fd here since we'd otherwise lose messages way too early. However, let's not hang forever
+ * in the unlikely case of a deadlock. */
+ if (getpid_cached() == 1)
+ timeval_store(&tv, 10 * USEC_PER_MSEC);
+ else
+ timeval_store(&tv, 10 * USEC_PER_SEC);
+ (void) setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
+
+ return fd;
+}
+
+static int log_open_syslog(void) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/dev/log",
+ };
+
+ int r;
+
+ if (syslog_fd >= 0)
+ return 0;
+
+ syslog_fd = create_log_socket(SOCK_DGRAM);
+ if (syslog_fd < 0) {
+ r = syslog_fd;
+ goto fail;
+ }
+
+ if (connect(syslog_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
+ safe_close(syslog_fd);
+
+ /* Some legacy syslog systems still use stream
+ * sockets. They really shouldn't. But what can we
+ * do... */
+ syslog_fd = create_log_socket(SOCK_STREAM);
+ if (syslog_fd < 0) {
+ r = syslog_fd;
+ goto fail;
+ }
+
+ if (connect(syslog_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ syslog_is_stream = true;
+ } else
+ syslog_is_stream = false;
+
+ return 0;
+
+fail:
+ log_close_syslog();
+ return r;
+}
+
+static void log_close_journal(void) {
+ journal_fd = safe_close(journal_fd);
+}
+
+static int log_open_journal(void) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/socket",
+ };
+
+ int r;
+
+ if (journal_fd >= 0)
+ return 0;
+
+ journal_fd = create_log_socket(SOCK_DGRAM);
+ if (journal_fd < 0) {
+ r = journal_fd;
+ goto fail;
+ }
+
+ if (connect(journal_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ log_close_journal();
+ return r;
+}
+
+int log_open(void) {
+ int r;
+
+ /* Do not call from library code. */
+
+ /* If we don't use the console we close it here, to not get
+ * killed by SAK. If we don't use syslog we close it here so
+ * that we are not confused by somebody deleting the socket in
+ * the fs, and to make sure we don't use it if prohibit_ipc is
+ * set. If we don't use /dev/kmsg we still keep it open,
+ * because there is no reason to close it. */
+
+ if (log_target == LOG_TARGET_NULL) {
+ log_close_journal();
+ log_close_syslog();
+ log_close_console();
+ return 0;
+ }
+
+ if (log_target != LOG_TARGET_AUTO ||
+ getpid_cached() == 1 ||
+ isatty(STDERR_FILENO) <= 0) {
+
+ if (!prohibit_ipc &&
+ IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL)) {
+ r = log_open_journal();
+ if (r >= 0) {
+ log_close_syslog();
+ log_close_console();
+ return r;
+ }
+ }
+
+ if (!prohibit_ipc &&
+ IN_SET(log_target, LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_SYSLOG)) {
+ r = log_open_syslog();
+ if (r >= 0) {
+ log_close_journal();
+ log_close_console();
+ return r;
+ }
+ }
+
+ if (IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_KMSG)) {
+ r = log_open_kmsg();
+ if (r >= 0) {
+ log_close_journal();
+ log_close_syslog();
+ log_close_console();
+ return r;
+ }
+ }
+ }
+
+ log_close_journal();
+ log_close_syslog();
+
+ return log_open_console();
+}
+
+void log_set_target(LogTarget target) {
+ assert(target >= 0);
+ assert(target < _LOG_TARGET_MAX);
+
+ if (upgrade_syslog_to_journal) {
+ if (target == LOG_TARGET_SYSLOG)
+ target = LOG_TARGET_JOURNAL;
+ else if (target == LOG_TARGET_SYSLOG_OR_KMSG)
+ target = LOG_TARGET_JOURNAL_OR_KMSG;
+ }
+
+ log_target = target;
+}
+
+void log_close(void) {
+ /* Do not call from library code. */
+
+ log_close_journal();
+ log_close_syslog();
+ log_close_kmsg();
+ log_close_console();
+}
+
+void log_forget_fds(void) {
+ /* Do not call from library code. */
+
+ console_fd = kmsg_fd = syslog_fd = journal_fd = -1;
+}
+
+void log_set_max_level_realm(LogRealm realm, int level) {
+ assert((level & LOG_PRIMASK) == level);
+ assert(realm < ELEMENTSOF(log_max_level));
+
+ log_max_level[realm] = level;
+}
+
+void log_set_facility(int facility) {
+ log_facility = facility;
+}
+
+static int write_to_console(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *buffer) {
+
+ char location[256], prefix[1 + DECIMAL_STR_MAX(int) + 2];
+ struct iovec iovec[6] = {};
+ bool highlight;
+ size_t n = 0;
+
+ if (console_fd < 0)
+ return 0;
+
+ if (log_target == LOG_TARGET_CONSOLE_PREFIXED) {
+ xsprintf(prefix, "<%i>", level);
+ iovec[n++] = IOVEC_MAKE_STRING(prefix);
+ }
+
+ highlight = LOG_PRI(level) <= LOG_ERR && show_color;
+
+ if (show_location) {
+ (void) snprintf(location, sizeof location, "(%s:%i) ", file, line);
+ iovec[n++] = IOVEC_MAKE_STRING(location);
+ }
+
+ if (highlight)
+ iovec[n++] = IOVEC_MAKE_STRING(ANSI_HIGHLIGHT_RED);
+ iovec[n++] = IOVEC_MAKE_STRING(buffer);
+ if (highlight)
+ iovec[n++] = IOVEC_MAKE_STRING(ANSI_NORMAL);
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
+
+ if (writev(console_fd, iovec, n) < 0) {
+
+ if (errno == EIO && getpid_cached() == 1) {
+
+ /* If somebody tried to kick us from our
+ * console tty (via vhangup() or suchlike),
+ * try to reconnect */
+
+ log_close_console();
+ log_open_console();
+
+ if (console_fd < 0)
+ return 0;
+
+ if (writev(console_fd, iovec, n) < 0)
+ return -errno;
+ } else
+ return -errno;
+ }
+
+ return 1;
+}
+
+static int write_to_syslog(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *buffer) {
+
+ char header_priority[2 + DECIMAL_STR_MAX(int) + 1],
+ header_time[64],
+ header_pid[4 + DECIMAL_STR_MAX(pid_t) + 1];
+ struct iovec iovec[5] = {};
+ struct msghdr msghdr = {
+ .msg_iov = iovec,
+ .msg_iovlen = ELEMENTSOF(iovec),
+ };
+ time_t t;
+ struct tm tm;
+
+ if (syslog_fd < 0)
+ return 0;
+
+ xsprintf(header_priority, "<%i>", level);
+
+ t = (time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC);
+ if (!localtime_r(&t, &tm))
+ return -EINVAL;
+
+ if (strftime(header_time, sizeof(header_time), "%h %e %T ", &tm) <= 0)
+ return -EINVAL;
+
+ xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
+
+ iovec[0] = IOVEC_MAKE_STRING(header_priority);
+ iovec[1] = IOVEC_MAKE_STRING(header_time);
+ iovec[2] = IOVEC_MAKE_STRING(program_invocation_short_name);
+ iovec[3] = IOVEC_MAKE_STRING(header_pid);
+ iovec[4] = IOVEC_MAKE_STRING(buffer);
+
+ /* When using syslog via SOCK_STREAM separate the messages by NUL chars */
+ if (syslog_is_stream)
+ iovec[4].iov_len++;
+
+ for (;;) {
+ ssize_t n;
+
+ n = sendmsg(syslog_fd, &msghdr, MSG_NOSIGNAL);
+ if (n < 0)
+ return -errno;
+
+ if (!syslog_is_stream ||
+ (size_t) n >= IOVEC_TOTAL_SIZE(iovec, ELEMENTSOF(iovec)))
+ break;
+
+ IOVEC_INCREMENT(iovec, ELEMENTSOF(iovec), n);
+ }
+
+ return 1;
+}
+
+static int write_to_kmsg(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *buffer) {
+
+ char header_priority[2 + DECIMAL_STR_MAX(int) + 1],
+ header_pid[4 + DECIMAL_STR_MAX(pid_t) + 1];
+ struct iovec iovec[5] = {};
+
+ if (kmsg_fd < 0)
+ return 0;
+
+ xsprintf(header_priority, "<%i>", level);
+ xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
+
+ iovec[0] = IOVEC_MAKE_STRING(header_priority);
+ iovec[1] = IOVEC_MAKE_STRING(program_invocation_short_name);
+ iovec[2] = IOVEC_MAKE_STRING(header_pid);
+ iovec[3] = IOVEC_MAKE_STRING(buffer);
+ iovec[4] = IOVEC_MAKE_STRING("\n");
+
+ if (writev(kmsg_fd, iovec, ELEMENTSOF(iovec)) < 0)
+ return -errno;
+
+ return 1;
+}
+
+static int log_do_header(
+ char *header,
+ size_t size,
+ int level,
+ int error,
+ const char *file, int line, const char *func,
+ const char *object_field, const char *object,
+ const char *extra_field, const char *extra) {
+ int r;
+
+ error = IS_SYNTHETIC_ERRNO(error) ? 0 : ERRNO_VALUE(error);
+
+ r = snprintf(header, size,
+ "PRIORITY=%i\n"
+ "SYSLOG_FACILITY=%i\n"
+ "%s%.256s%s" /* CODE_FILE */
+ "%s%.*i%s" /* CODE_LINE */
+ "%s%.256s%s" /* CODE_FUNC */
+ "%s%.*i%s" /* ERRNO */
+ "%s%.256s%s" /* object */
+ "%s%.256s%s" /* extra */
+ "SYSLOG_IDENTIFIER=%.256s\n",
+ LOG_PRI(level),
+ LOG_FAC(level),
+ isempty(file) ? "" : "CODE_FILE=",
+ isempty(file) ? "" : file,
+ isempty(file) ? "" : "\n",
+ line ? "CODE_LINE=" : "",
+ line ? 1 : 0, line, /* %.0d means no output too, special case for 0 */
+ line ? "\n" : "",
+ isempty(func) ? "" : "CODE_FUNC=",
+ isempty(func) ? "" : func,
+ isempty(func) ? "" : "\n",
+ error ? "ERRNO=" : "",
+ error ? 1 : 0, error,
+ error ? "\n" : "",
+ isempty(object) ? "" : object_field,
+ isempty(object) ? "" : object,
+ isempty(object) ? "" : "\n",
+ isempty(extra) ? "" : extra_field,
+ isempty(extra) ? "" : extra,
+ isempty(extra) ? "" : "\n",
+ program_invocation_short_name);
+ assert_raw((size_t) r < size);
+
+ return 0;
+}
+
+static int write_to_journal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *buffer) {
+
+ char header[LINE_MAX];
+ struct iovec iovec[4] = {};
+ struct msghdr mh = {};
+
+ if (journal_fd < 0)
+ return 0;
+
+ log_do_header(header, sizeof(header), level, error, file, line, func, object_field, object, extra_field, extra);
+
+ iovec[0] = IOVEC_MAKE_STRING(header);
+ iovec[1] = IOVEC_MAKE_STRING("MESSAGE=");
+ iovec[2] = IOVEC_MAKE_STRING(buffer);
+ iovec[3] = IOVEC_MAKE_STRING("\n");
+
+ mh.msg_iov = iovec;
+ mh.msg_iovlen = ELEMENTSOF(iovec);
+
+ if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 1;
+}
+
+int log_dispatch_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ char *buffer) {
+
+ assert_raw(buffer);
+
+ if (log_target == LOG_TARGET_NULL)
+ return -ERRNO_VALUE(error);
+
+ /* Patch in LOG_DAEMON facility if necessary */
+ if ((level & LOG_FACMASK) == 0)
+ level |= log_facility;
+
+ if (open_when_needed)
+ log_open();
+
+ do {
+ char *e;
+ int k = 0;
+
+ buffer += strspn(buffer, NEWLINE);
+
+ if (buffer[0] == 0)
+ break;
+
+ if ((e = strpbrk(buffer, NEWLINE)))
+ *(e++) = 0;
+
+ if (IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL)) {
+
+ k = write_to_journal(level, error, file, line, func, object_field, object, extra_field, extra, buffer);
+ if (k < 0 && k != -EAGAIN)
+ log_close_journal();
+ }
+
+ if (IN_SET(log_target, LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_SYSLOG)) {
+
+ k = write_to_syslog(level, error, file, line, func, buffer);
+ if (k < 0 && k != -EAGAIN)
+ log_close_syslog();
+ }
+
+ if (k <= 0 &&
+ IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_KMSG)) {
+
+ if (k < 0)
+ log_open_kmsg();
+
+ k = write_to_kmsg(level, error, file, line, func, buffer);
+ if (k < 0) {
+ log_close_kmsg();
+ log_open_console();
+ }
+ }
+
+ if (k <= 0)
+ (void) write_to_console(level, error, file, line, func, buffer);
+
+ buffer = e;
+ } while (buffer);
+
+ if (open_when_needed)
+ log_close();
+
+ return -ERRNO_VALUE(error);
+}
+
+int log_dump_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ char *buffer) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ PROTECT_ERRNO;
+
+ /* This modifies the buffer... */
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]))
+ return -ERRNO_VALUE(error);
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buffer);
+}
+
+int log_internalv_realm(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format,
+ va_list ap) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ char buffer[LINE_MAX];
+ PROTECT_ERRNO;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]))
+ return -ERRNO_VALUE(error);
+
+ /* Make sure that %m maps to the specified error (or "Success"). */
+ errno = ERRNO_VALUE(error);
+
+ (void) vsnprintf(buffer, sizeof buffer, format, ap);
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buffer);
+}
+
+int log_internal_realm(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = log_internalv_realm(level, error, file, line, func, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_printf_(10,0)
+static int log_object_internalv(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *format,
+ va_list ap) {
+
+ PROTECT_ERRNO;
+ char *buffer, *b;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[LOG_REALM_SYSTEMD]))
+ return -ERRNO_VALUE(error);
+
+ /* Make sure that %m maps to the specified error (or "Success"). */
+ errno = ERRNO_VALUE(error);
+
+ /* Prepend the object name before the message */
+ if (object) {
+ size_t n;
+
+ n = strlen(object);
+ buffer = newa(char, n + 2 + LINE_MAX);
+ b = stpcpy(stpcpy(buffer, object), ": ");
+ } else
+ b = buffer = newa(char, LINE_MAX);
+
+ (void) vsnprintf(b, LINE_MAX, format, ap);
+
+ return log_dispatch_internal(level, error, file, line, func,
+ object_field, object, extra_field, extra, buffer);
+}
+
+int log_object_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *format, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = log_object_internalv(level, error, file, line, func, object_field, object, extra_field, extra, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+static void log_assert(
+ int level,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format) {
+
+ static char buffer[LINE_MAX];
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]))
+ return;
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ (void) snprintf(buffer, sizeof buffer, format, text, file, line, func);
+ REENABLE_WARNING;
+
+ log_abort_msg = buffer;
+
+ log_dispatch_internal(level, 0, file, line, func, NULL, NULL, NULL, NULL, buffer);
+}
+
+_noreturn_ void log_assert_failed_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func) {
+ log_open();
+ log_assert(LOG_REALM_PLUS_LEVEL(realm, LOG_CRIT), text, file, line, func,
+ "Assertion '%s' failed at %s:%u, function %s(). Aborting.");
+ abort();
+}
+
+_noreturn_ void log_assert_failed_unreachable_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func) {
+ log_open();
+ log_assert(LOG_REALM_PLUS_LEVEL(realm, LOG_CRIT), text, file, line, func,
+ "Code should not be reached '%s' at %s:%u, function %s(). Aborting.");
+ abort();
+}
+
+void log_assert_failed_return_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func) {
+ PROTECT_ERRNO;
+ log_assert(LOG_REALM_PLUS_LEVEL(realm, LOG_DEBUG), text, file, line, func,
+ "Assertion '%s' failed at %s:%u, function %s(). Ignoring.");
+}
+
+int log_oom_internal(LogRealm realm, const char *file, int line, const char *func) {
+ return log_internal_realm(LOG_REALM_PLUS_LEVEL(realm, LOG_ERR),
+ ENOMEM, file, line, func, "Out of memory.");
+}
+
+int log_format_iovec(
+ struct iovec *iovec,
+ size_t iovec_len,
+ size_t *n,
+ bool newline_separator,
+ int error,
+ const char *format,
+ va_list ap) {
+
+ static const char nl = '\n';
+
+ while (format && *n + 1 < iovec_len) {
+ va_list aq;
+ char *m;
+ int r;
+
+ /* We need to copy the va_list structure,
+ * since vasprintf() leaves it afterwards at
+ * an undefined location */
+
+ errno = ERRNO_VALUE(error);
+
+ va_copy(aq, ap);
+ r = vasprintf(&m, format, aq);
+ va_end(aq);
+ if (r < 0)
+ return -EINVAL;
+
+ /* Now, jump enough ahead, so that we point to
+ * the next format string */
+ VA_FORMAT_ADVANCE(format, ap);
+
+ iovec[(*n)++] = IOVEC_MAKE_STRING(m);
+
+ if (newline_separator) {
+ iovec[*n] = IOVEC_MAKE((char *)&nl, 1);
+ (*n)++;
+ }
+
+ format = va_arg(ap, char *);
+ }
+ return 0;
+}
+
+int log_struct_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ char buf[LINE_MAX];
+ bool found = false;
+ PROTECT_ERRNO;
+ va_list ap;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]) ||
+ log_target == LOG_TARGET_NULL)
+ return -ERRNO_VALUE(error);
+
+ if ((level & LOG_FACMASK) == 0)
+ level |= log_facility;
+
+ if (IN_SET(log_target,
+ LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL)) {
+
+ if (open_when_needed)
+ log_open_journal();
+
+ if (journal_fd >= 0) {
+ char header[LINE_MAX];
+ struct iovec iovec[17] = {};
+ size_t n = 0, i;
+ int r;
+ struct msghdr mh = {
+ .msg_iov = iovec,
+ };
+ bool fallback = false;
+
+ /* If the journal is available do structured logging.
+ * Do not report the errno if it is synthetic. */
+ log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
+ iovec[n++] = IOVEC_MAKE_STRING(header);
+
+ va_start(ap, format);
+ r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, true, error, format, ap);
+ if (r < 0)
+ fallback = true;
+ else {
+ mh.msg_iovlen = n;
+ (void) sendmsg(journal_fd, &mh, MSG_NOSIGNAL);
+ }
+
+ va_end(ap);
+ for (i = 1; i < n; i += 2)
+ free(iovec[i].iov_base);
+
+ if (!fallback) {
+ if (open_when_needed)
+ log_close();
+
+ return -ERRNO_VALUE(error);
+ }
+ }
+ }
+
+ /* Fallback if journal logging is not available or didn't work. */
+
+ va_start(ap, format);
+ while (format) {
+ va_list aq;
+
+ errno = ERRNO_VALUE(error);
+
+ va_copy(aq, ap);
+ (void) vsnprintf(buf, sizeof buf, format, aq);
+ va_end(aq);
+
+ if (startswith(buf, "MESSAGE=")) {
+ found = true;
+ break;
+ }
+
+ VA_FORMAT_ADVANCE(format, ap);
+
+ format = va_arg(ap, char *);
+ }
+ va_end(ap);
+
+ if (!found) {
+ if (open_when_needed)
+ log_close();
+
+ return -ERRNO_VALUE(error);
+ }
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buf + 8);
+}
+
+int log_struct_iovec_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const struct iovec input_iovec[],
+ size_t n_input_iovec) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ PROTECT_ERRNO;
+ size_t i;
+ char *m;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]) ||
+ log_target == LOG_TARGET_NULL)
+ return -ERRNO_VALUE(error);
+
+ if ((level & LOG_FACMASK) == 0)
+ level |= log_facility;
+
+ if (IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL) &&
+ journal_fd >= 0) {
+
+ struct iovec iovec[1 + n_input_iovec*2];
+ char header[LINE_MAX];
+ struct msghdr mh = {
+ .msg_iov = iovec,
+ .msg_iovlen = 1 + n_input_iovec*2,
+ };
+
+ log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
+ iovec[0] = IOVEC_MAKE_STRING(header);
+
+ for (i = 0; i < n_input_iovec; i++) {
+ iovec[1+i*2] = input_iovec[i];
+ iovec[1+i*2+1] = IOVEC_MAKE_STRING("\n");
+ }
+
+ if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) >= 0)
+ return -ERRNO_VALUE(error);
+ }
+
+ for (i = 0; i < n_input_iovec; i++)
+ if (memory_startswith(input_iovec[i].iov_base, input_iovec[i].iov_len, "MESSAGE="))
+ break;
+
+ if (_unlikely_(i >= n_input_iovec)) /* Couldn't find MESSAGE=? */
+ return -ERRNO_VALUE(error);
+
+ m = strndupa(input_iovec[i].iov_base + STRLEN("MESSAGE="),
+ input_iovec[i].iov_len - STRLEN("MESSAGE="));
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, m);
+}
+
+int log_set_target_from_string(const char *e) {
+ LogTarget t;
+
+ t = log_target_from_string(e);
+ if (t < 0)
+ return -EINVAL;
+
+ log_set_target(t);
+ return 0;
+}
+
+int log_set_max_level_from_string_realm(LogRealm realm, const char *e) {
+ int t;
+
+ t = log_level_from_string(e);
+ if (t < 0)
+ return -EINVAL;
+
+ log_set_max_level_realm(realm, t);
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+
+ /*
+ * The systemd.log_xyz= settings are parsed by all tools, and
+ * so is "debug".
+ *
+ * However, "quiet" is only parsed by PID 1, and only turns of
+ * status output to /dev/console, but does not alter the log
+ * level.
+ */
+
+ if (streq(key, "debug") && !value)
+ log_set_max_level(LOG_DEBUG);
+
+ else if (proc_cmdline_key_streq(key, "systemd.log_target")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (log_set_target_from_string(value) < 0)
+ log_warning("Failed to parse log target '%s'. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.log_level")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (log_set_max_level_from_string(value) < 0)
+ log_warning("Failed to parse log level '%s'. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.log_color")) {
+
+ if (log_show_color_from_string(value ?: "1") < 0)
+ log_warning("Failed to parse log color setting '%s'. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.log_location")) {
+
+ if (log_show_location_from_string(value ?: "1") < 0)
+ log_warning("Failed to parse log location setting '%s'. Ignoring.", value);
+ }
+
+ return 0;
+}
+
+void log_parse_environment_realm(LogRealm realm) {
+ /* Do not call from library code. */
+
+ const char *e;
+
+ if (get_ctty_devnr(0, NULL) < 0)
+ /* Only try to read the command line in daemons. We assume that anything that has a controlling tty is
+ user stuff. */
+ (void) proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+
+ e = getenv("SYSTEMD_LOG_TARGET");
+ if (e && log_set_target_from_string(e) < 0)
+ log_warning("Failed to parse log target '%s'. Ignoring.", e);
+
+ e = getenv("SYSTEMD_LOG_LEVEL");
+ if (e && log_set_max_level_from_string_realm(realm, e) < 0)
+ log_warning("Failed to parse log level '%s'. Ignoring.", e);
+
+ e = getenv("SYSTEMD_LOG_COLOR");
+ if (e && log_show_color_from_string(e) < 0)
+ log_warning("Failed to parse bool '%s'. Ignoring.", e);
+
+ e = getenv("SYSTEMD_LOG_LOCATION");
+ if (e && log_show_location_from_string(e) < 0)
+ log_warning("Failed to parse bool '%s'. Ignoring.", e);
+}
+
+LogTarget log_get_target(void) {
+ return log_target;
+}
+
+int log_get_max_level_realm(LogRealm realm) {
+ return log_max_level[realm];
+}
+
+void log_show_color(bool b) {
+ show_color = b;
+}
+
+bool log_get_show_color(void) {
+ return show_color;
+}
+
+void log_show_location(bool b) {
+ show_location = b;
+}
+
+bool log_get_show_location(void) {
+ return show_location;
+}
+
+int log_show_color_from_string(const char *e) {
+ int t;
+
+ t = parse_boolean(e);
+ if (t < 0)
+ return t;
+
+ log_show_color(t);
+ return 0;
+}
+
+int log_show_location_from_string(const char *e) {
+ int t;
+
+ t = parse_boolean(e);
+ if (t < 0)
+ return t;
+
+ log_show_location(t);
+ return 0;
+}
+
+bool log_on_console(void) {
+ if (IN_SET(log_target, LOG_TARGET_CONSOLE,
+ LOG_TARGET_CONSOLE_PREFIXED))
+ return true;
+
+ return syslog_fd < 0 && kmsg_fd < 0 && journal_fd < 0;
+}
+
+static const char *const log_target_table[_LOG_TARGET_MAX] = {
+ [LOG_TARGET_CONSOLE] = "console",
+ [LOG_TARGET_CONSOLE_PREFIXED] = "console-prefixed",
+ [LOG_TARGET_KMSG] = "kmsg",
+ [LOG_TARGET_JOURNAL] = "journal",
+ [LOG_TARGET_JOURNAL_OR_KMSG] = "journal-or-kmsg",
+ [LOG_TARGET_SYSLOG] = "syslog",
+ [LOG_TARGET_SYSLOG_OR_KMSG] = "syslog-or-kmsg",
+ [LOG_TARGET_AUTO] = "auto",
+ [LOG_TARGET_NULL] = "null",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(log_target, LogTarget);
+
+void log_received_signal(int level, const struct signalfd_siginfo *si) {
+ assert(si);
+
+ if (pid_is_valid(si->ssi_pid)) {
+ _cleanup_free_ char *p = NULL;
+
+ (void) get_process_comm(si->ssi_pid, &p);
+
+ log_full(level,
+ "Received SIG%s from PID %"PRIu32" (%s).",
+ signal_to_string(si->ssi_signo),
+ si->ssi_pid, strna(p));
+ } else
+ log_full(level,
+ "Received SIG%s.",
+ signal_to_string(si->ssi_signo));
+}
+
+int log_syntax_internal(
+ const char *unit,
+ int level,
+ const char *config_file,
+ unsigned config_line,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ PROTECT_ERRNO;
+ char buffer[LINE_MAX];
+ va_list ap;
+ const char *unit_fmt = NULL;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[LOG_REALM_SYSTEMD]) ||
+ log_target == LOG_TARGET_NULL)
+ return -ERRNO_VALUE(error);
+
+ errno = error;
+
+ va_start(ap, format);
+ (void) vsnprintf(buffer, sizeof buffer, format, ap);
+ va_end(ap);
+
+ if (unit)
+ unit_fmt = getpid_cached() == 1 ? "UNIT=%s" : "USER_UNIT=%s";
+
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ "CONFIG_FILE=%s", config_file,
+ "CONFIG_LINE=%u", config_line,
+ LOG_MESSAGE("%s:%u: %s", config_file, config_line, buffer),
+ unit_fmt, unit,
+ NULL);
+}
+
+int log_syntax_invalid_utf8_internal(
+ const char *unit,
+ int level,
+ const char *config_file,
+ unsigned config_line,
+ const char *file,
+ int line,
+ const char *func,
+ const char *rvalue) {
+
+ _cleanup_free_ char *p = NULL;
+
+ if (rvalue)
+ p = utf8_escape_invalid(rvalue);
+
+ log_syntax_internal(unit, level, config_file, config_line, 0, file, line, func,
+ "String is not UTF-8 clean, ignoring assignment: %s", strna(p));
+
+ return -EINVAL;
+}
+
+void log_set_upgrade_syslog_to_journal(bool b) {
+ upgrade_syslog_to_journal = b;
+
+ /* Make the change effective immediately */
+ if (b) {
+ if (log_target == LOG_TARGET_SYSLOG)
+ log_target = LOG_TARGET_JOURNAL;
+ else if (log_target == LOG_TARGET_SYSLOG_OR_KMSG)
+ log_target = LOG_TARGET_JOURNAL_OR_KMSG;
+ }
+}
+
+void log_set_always_reopen_console(bool b) {
+ always_reopen_console = b;
+}
+
+void log_set_open_when_needed(bool b) {
+ open_when_needed = b;
+}
+
+void log_set_prohibit_ipc(bool b) {
+ prohibit_ipc = b;
+}
+
+int log_emergency_level(void) {
+ /* Returns the log level to use for log_emergency() logging. We use LOG_EMERG only when we are PID 1, as only
+ * then the system of the whole system is obviously affected. */
+
+ return getpid_cached() == 1 ? LOG_EMERG : LOG_ERR;
+}
+
+int log_dup_console(void) {
+ int copy;
+
+ /* Duplicate the fd we use for fd logging if it's < 3 and use the copy from now on. This call is useful
+ * whenever we want to continue logging through the original fd, but want to rearrange stderr. */
+
+ if (console_fd >= 3)
+ return 0;
+
+ copy = fcntl(console_fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ console_fd = copy;
+ return 0;
+}
+
+void log_setup_service(void) {
+ /* Sets up logging the way it is most appropriate for running a program as a service. Note that using this
+ * doesn't make the binary unsuitable for invocation on the command line, as log output will still go to the
+ * terminal if invoked interactively. */
+
+ log_set_target(LOG_TARGET_AUTO);
+ log_parse_environment();
+ log_open();
+}
diff --git a/src/basic/log.h b/src/basic/log.h
new file mode 100644
index 0000000..17438d7
--- /dev/null
+++ b/src/basic/log.h
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <syslog.h>
+
+#include "macro.h"
+
+/* Some structures we reference but don't want to pull in headers for */
+struct iovec;
+struct signalfd_siginfo;
+
+typedef enum LogRealm {
+ LOG_REALM_SYSTEMD,
+ LOG_REALM_UDEV,
+ _LOG_REALM_MAX,
+} LogRealm;
+
+#ifndef LOG_REALM
+# define LOG_REALM LOG_REALM_SYSTEMD
+#endif
+
+typedef enum LogTarget{
+ LOG_TARGET_CONSOLE,
+ LOG_TARGET_CONSOLE_PREFIXED,
+ LOG_TARGET_KMSG,
+ LOG_TARGET_JOURNAL,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_SYSLOG,
+ LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_AUTO, /* console if stderr is tty, JOURNAL_OR_KMSG otherwise */
+ LOG_TARGET_NULL,
+ _LOG_TARGET_MAX,
+ _LOG_TARGET_INVALID = -1
+} LogTarget;
+
+/* Note to readers: << and >> have lower precedence than & and | */
+#define LOG_REALM_PLUS_LEVEL(realm, level) ((realm) << 10 | (level))
+#define LOG_REALM_REMOVE_LEVEL(realm_level) ((realm_level) >> 10)
+#define SYNTHETIC_ERRNO(num) (1 << 30 | (num))
+#define IS_SYNTHETIC_ERRNO(val) ((val) >> 30 & 1)
+#define ERRNO_VALUE(val) (abs(val) & 255)
+
+void log_set_target(LogTarget target);
+void log_set_max_level_realm(LogRealm realm, int level);
+#define log_set_max_level(level) \
+ log_set_max_level_realm(LOG_REALM, (level))
+
+void log_set_facility(int facility);
+
+int log_set_target_from_string(const char *e);
+int log_set_max_level_from_string_realm(LogRealm realm, const char *e);
+#define log_set_max_level_from_string(e) \
+ log_set_max_level_from_string_realm(LOG_REALM, (e))
+
+void log_show_color(bool b);
+bool log_get_show_color(void) _pure_;
+void log_show_location(bool b);
+bool log_get_show_location(void) _pure_;
+
+int log_show_color_from_string(const char *e);
+int log_show_location_from_string(const char *e);
+
+LogTarget log_get_target(void) _pure_;
+int log_get_max_level_realm(LogRealm realm) _pure_;
+#define log_get_max_level() \
+ log_get_max_level_realm(LOG_REALM)
+
+/* Functions below that open and close logs or configure logging based on the
+ * environment should not be called from library code — this is always a job
+ * for the application itself.
+ */
+
+int log_open(void);
+void log_close(void);
+void log_forget_fds(void);
+
+void log_parse_environment_realm(LogRealm realm);
+#define log_parse_environment() \
+ log_parse_environment_realm(LOG_REALM)
+
+int log_dispatch_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra,
+ const char *extra_field,
+ char *buffer);
+
+int log_internal_realm(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) _printf_(6,7);
+#define log_internal(level, ...) \
+ log_internal_realm(LOG_REALM_PLUS_LEVEL(LOG_REALM, (level)), __VA_ARGS__)
+
+int log_internalv_realm(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format,
+ va_list ap) _printf_(6,0);
+#define log_internalv(level, ...) \
+ log_internalv_realm(LOG_REALM_PLUS_LEVEL(LOG_REALM, (level)), __VA_ARGS__)
+
+/* Realm is fixed to LOG_REALM_SYSTEMD for those */
+int log_object_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *format, ...) _printf_(10,11);
+
+int log_struct_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) _printf_(6,0) _sentinel_;
+
+int log_oom_internal(
+ LogRealm realm,
+ const char *file,
+ int line,
+ const char *func);
+
+int log_format_iovec(
+ struct iovec *iovec,
+ size_t iovec_len,
+ size_t *n,
+ bool newline_separator,
+ int error,
+ const char *format,
+ va_list ap) _printf_(6, 0);
+
+int log_struct_iovec_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const struct iovec *input_iovec,
+ size_t n_input_iovec);
+
+/* This modifies the buffer passed! */
+int log_dump_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ char *buffer);
+
+/* Logging for various assertions */
+_noreturn_ void log_assert_failed_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func);
+#define log_assert_failed(text, ...) \
+ log_assert_failed_realm(LOG_REALM, (text), __VA_ARGS__)
+
+_noreturn_ void log_assert_failed_unreachable_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func);
+#define log_assert_failed_unreachable(text, ...) \
+ log_assert_failed_unreachable_realm(LOG_REALM, (text), __VA_ARGS__)
+
+void log_assert_failed_return_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func);
+#define log_assert_failed_return(text, ...) \
+ log_assert_failed_return_realm(LOG_REALM, (text), __VA_ARGS__)
+
+#define log_dispatch(level, error, buffer) \
+ log_dispatch_internal(level, error, __FILE__, __LINE__, __func__, NULL, NULL, NULL, NULL, buffer)
+
+/* Logging with level */
+#define log_full_errno_realm(realm, level, error, ...) \
+ ({ \
+ int _level = (level), _e = (error), _realm = (realm); \
+ (log_get_max_level_realm(_realm) >= LOG_PRI(_level)) \
+ ? log_internal_realm(LOG_REALM_PLUS_LEVEL(_realm, _level), _e, \
+ __FILE__, __LINE__, __func__, __VA_ARGS__) \
+ : -ERRNO_VALUE(_e); \
+ })
+
+#define log_full_errno(level, error, ...) \
+ log_full_errno_realm(LOG_REALM, (level), (error), __VA_ARGS__)
+
+#define log_full(level, ...) log_full_errno((level), 0, __VA_ARGS__)
+
+int log_emergency_level(void);
+
+/* Normal logging */
+#define log_debug(...) log_full(LOG_DEBUG, __VA_ARGS__)
+#define log_info(...) log_full(LOG_INFO, __VA_ARGS__)
+#define log_notice(...) log_full(LOG_NOTICE, __VA_ARGS__)
+#define log_warning(...) log_full(LOG_WARNING, __VA_ARGS__)
+#define log_error(...) log_full(LOG_ERR, __VA_ARGS__)
+#define log_emergency(...) log_full(log_emergency_level(), __VA_ARGS__)
+
+/* Logging triggered by an errno-like error */
+#define log_debug_errno(error, ...) log_full_errno(LOG_DEBUG, error, __VA_ARGS__)
+#define log_info_errno(error, ...) log_full_errno(LOG_INFO, error, __VA_ARGS__)
+#define log_notice_errno(error, ...) log_full_errno(LOG_NOTICE, error, __VA_ARGS__)
+#define log_warning_errno(error, ...) log_full_errno(LOG_WARNING, error, __VA_ARGS__)
+#define log_error_errno(error, ...) log_full_errno(LOG_ERR, error, __VA_ARGS__)
+#define log_emergency_errno(error, ...) log_full_errno(log_emergency_level(), error, __VA_ARGS__)
+
+#ifdef LOG_TRACE
+# define log_trace(...) log_debug(__VA_ARGS__)
+#else
+# define log_trace(...) do {} while (0)
+#endif
+
+/* Structured logging */
+#define log_struct_errno(level, error, ...) \
+ log_struct_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
+ error, __FILE__, __LINE__, __func__, __VA_ARGS__, NULL)
+#define log_struct(level, ...) log_struct_errno(level, 0, __VA_ARGS__)
+
+#define log_struct_iovec_errno(level, error, iovec, n_iovec) \
+ log_struct_iovec_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
+ error, __FILE__, __LINE__, __func__, iovec, n_iovec)
+#define log_struct_iovec(level, iovec, n_iovec) log_struct_iovec_errno(level, 0, iovec, n_iovec)
+
+/* This modifies the buffer passed! */
+#define log_dump(level, buffer) \
+ log_dump_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
+ 0, __FILE__, __LINE__, __func__, buffer)
+
+#define log_oom() log_oom_internal(LOG_REALM, __FILE__, __LINE__, __func__)
+
+bool log_on_console(void) _pure_;
+
+const char *log_target_to_string(LogTarget target) _const_;
+LogTarget log_target_from_string(const char *s) _pure_;
+
+/* Helper to prepare various field for structured logging */
+#define LOG_MESSAGE(fmt, ...) "MESSAGE=" fmt, ##__VA_ARGS__
+
+void log_received_signal(int level, const struct signalfd_siginfo *si);
+
+/* If turned on, any requests for a log target involving "syslog" will be implicitly upgraded to the equivalent journal target */
+void log_set_upgrade_syslog_to_journal(bool b);
+
+/* If turned on, and log_open() is called, we'll not use STDERR_FILENO for logging ever, but rather open /dev/console */
+void log_set_always_reopen_console(bool b);
+
+/* If turned on, we'll open the log stream implicitly if needed on each individual log call. This is normally not
+ * desired as we want to reuse our logging streams. It is useful however */
+void log_set_open_when_needed(bool b);
+
+/* If turned on, then we'll never use IPC-based logging, i.e. never log to syslog or the journal. We'll only log to
+ * stderr, the console or kmsg */
+void log_set_prohibit_ipc(bool b);
+
+int log_dup_console(void);
+
+int log_syntax_internal(
+ const char *unit,
+ int level,
+ const char *config_file,
+ unsigned config_line,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) _printf_(9, 10);
+
+int log_syntax_invalid_utf8_internal(
+ const char *unit,
+ int level,
+ const char *config_file,
+ unsigned config_line,
+ const char *file,
+ int line,
+ const char *func,
+ const char *rvalue);
+
+#define log_syntax(unit, level, config_file, config_line, error, ...) \
+ ({ \
+ int _level = (level), _e = (error); \
+ (log_get_max_level() >= LOG_PRI(_level)) \
+ ? log_syntax_internal(unit, _level, config_file, config_line, _e, __FILE__, __LINE__, __func__, __VA_ARGS__) \
+ : -abs(_e); \
+ })
+
+#define log_syntax_invalid_utf8(unit, level, config_file, config_line, rvalue) \
+ ({ \
+ int _level = (level); \
+ (log_get_max_level() >= LOG_PRI(_level)) \
+ ? log_syntax_invalid_utf8_internal(unit, _level, config_file, config_line, __FILE__, __LINE__, __func__, rvalue) \
+ : -EINVAL; \
+ })
+
+#define DEBUG_LOGGING _unlikely_(log_get_max_level() >= LOG_DEBUG)
+
+void log_setup_service(void);
diff --git a/src/basic/login-util.c b/src/basic/login-util.c
new file mode 100644
index 0000000..085ccd0
--- /dev/null
+++ b/src/basic/login-util.c
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+
+#include "login-util.h"
+#include "string-util.h"
+
+bool session_id_valid(const char *id) {
+
+ if (isempty(id))
+ return false;
+
+ return id[strspn(id, LETTERS DIGITS)] == '\0';
+}
diff --git a/src/basic/login-util.h b/src/basic/login-util.h
new file mode 100644
index 0000000..e1e62e1
--- /dev/null
+++ b/src/basic/login-util.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <unistd.h>
+
+bool session_id_valid(const char *id);
+
+static inline bool logind_running(void) {
+ return access("/run/systemd/seats/", F_OK) >= 0;
+}
diff --git a/src/basic/macro.h b/src/basic/macro.h
new file mode 100644
index 0000000..1971e91
--- /dev/null
+++ b/src/basic/macro.h
@@ -0,0 +1,549 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#define _printf_(a, b) __attribute__((__format__(printf, a, b)))
+#ifdef __clang__
+# define _alloc_(...)
+#else
+# define _alloc_(...) __attribute__((__alloc_size__(__VA_ARGS__)))
+#endif
+#define _sentinel_ __attribute__((__sentinel__))
+#define _section_(x) __attribute__((__section__(x)))
+#define _used_ __attribute__((__used__))
+#define _unused_ __attribute__((__unused__))
+#define _destructor_ __attribute__((__destructor__))
+#define _pure_ __attribute__((__pure__))
+#define _const_ __attribute__((__const__))
+#define _deprecated_ __attribute__((__deprecated__))
+#define _packed_ __attribute__((__packed__))
+#define _malloc_ __attribute__((__malloc__))
+#define _weak_ __attribute__((__weak__))
+#define _likely_(x) (__builtin_expect(!!(x), 1))
+#define _unlikely_(x) (__builtin_expect(!!(x), 0))
+#define _public_ __attribute__((__visibility__("default")))
+#define _hidden_ __attribute__((__visibility__("hidden")))
+#define _weakref_(x) __attribute__((__weakref__(#x)))
+#define _align_(x) __attribute__((__aligned__(x)))
+#define _alignas_(x) __attribute__((__aligned__(__alignof(x))))
+#define _alignptr_ __attribute__((__aligned__(sizeof(void*))))
+#define _cleanup_(x) __attribute__((__cleanup__(x)))
+#if __GNUC__ >= 7
+#define _fallthrough_ __attribute__((__fallthrough__))
+#else
+#define _fallthrough_
+#endif
+/* Define C11 noreturn without <stdnoreturn.h> and even on older gcc
+ * compiler versions */
+#ifndef _noreturn_
+#if __STDC_VERSION__ >= 201112L
+#define _noreturn_ _Noreturn
+#else
+#define _noreturn_ __attribute__((__noreturn__))
+#endif
+#endif
+
+#if !defined(HAS_FEATURE_MEMORY_SANITIZER)
+# if defined(__has_feature)
+# if __has_feature(memory_sanitizer)
+# define HAS_FEATURE_MEMORY_SANITIZER 1
+# endif
+# endif
+# if !defined(HAS_FEATURE_MEMORY_SANITIZER)
+# define HAS_FEATURE_MEMORY_SANITIZER 0
+# endif
+#endif
+
+#if !defined(HAS_FEATURE_ADDRESS_SANITIZER)
+# ifdef __SANITIZE_ADDRESS__
+# define HAS_FEATURE_ADDRESS_SANITIZER 1
+# elif defined(__has_feature)
+# if __has_feature(address_sanitizer)
+# define HAS_FEATURE_ADDRESS_SANITIZER 1
+# endif
+# endif
+# if !defined(HAS_FEATURE_ADDRESS_SANITIZER)
+# define HAS_FEATURE_ADDRESS_SANITIZER 0
+# endif
+#endif
+
+/* Note: on GCC "no_sanitize_address" is a function attribute only, on llvm it may also be applied to global
+ * variables. We define a specific macro which knows this. Note that on GCC we don't need this decorator so much, since
+ * our primary usecase for this attribute is registration structures placed in named ELF sections which shall not be
+ * padded, but GCC doesn't pad those anyway if AddressSanitizer is enabled. */
+#if HAS_FEATURE_ADDRESS_SANITIZER && defined(__clang__)
+#define _variable_no_sanitize_address_ __attribute__((__no_sanitize_address__))
+#else
+#define _variable_no_sanitize_address_
+#endif
+
+/* Temporarily disable some warnings */
+#define DISABLE_WARNING_FORMAT_NONLITERAL \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wformat-nonliteral\"")
+
+#define DISABLE_WARNING_MISSING_PROTOTYPES \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wmissing-prototypes\"")
+
+#define DISABLE_WARNING_NONNULL \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wnonnull\"")
+
+#define DISABLE_WARNING_SHADOW \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wshadow\"")
+
+#define DISABLE_WARNING_INCOMPATIBLE_POINTER_TYPES \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wincompatible-pointer-types\"")
+
+#define REENABLE_WARNING \
+ _Pragma("GCC diagnostic pop")
+
+/* automake test harness */
+#define EXIT_TEST_SKIP 77
+
+#define XSTRINGIFY(x) #x
+#define STRINGIFY(x) XSTRINGIFY(x)
+
+#define XCONCATENATE(x, y) x ## y
+#define CONCATENATE(x, y) XCONCATENATE(x, y)
+
+#define UNIQ_T(x, uniq) CONCATENATE(__unique_prefix_, CONCATENATE(x, uniq))
+#define UNIQ __COUNTER__
+
+/* builtins */
+#if __SIZEOF_INT__ == 4
+#define BUILTIN_FFS_U32(x) __builtin_ffs(x);
+#elif __SIZEOF_LONG__ == 4
+#define BUILTIN_FFS_U32(x) __builtin_ffsl(x);
+#else
+#error "neither int nor long are four bytes long?!?"
+#endif
+
+/* Rounds up */
+
+#define ALIGN4(l) (((l) + 3) & ~3)
+#define ALIGN8(l) (((l) + 7) & ~7)
+
+#if __SIZEOF_POINTER__ == 8
+#define ALIGN(l) ALIGN8(l)
+#elif __SIZEOF_POINTER__ == 4
+#define ALIGN(l) ALIGN4(l)
+#else
+#error "Wut? Pointers are neither 4 nor 8 bytes long?"
+#endif
+
+#define ALIGN_PTR(p) ((void*) ALIGN((unsigned long) (p)))
+#define ALIGN4_PTR(p) ((void*) ALIGN4((unsigned long) (p)))
+#define ALIGN8_PTR(p) ((void*) ALIGN8((unsigned long) (p)))
+
+static inline size_t ALIGN_TO(size_t l, size_t ali) {
+ return ((l + ali - 1) & ~(ali - 1));
+}
+
+#define ALIGN_TO_PTR(p, ali) ((void*) ALIGN_TO((unsigned long) (p), (ali)))
+
+/* align to next higher power-of-2 (except for: 0 => 0, overflow => 0) */
+static inline unsigned long ALIGN_POWER2(unsigned long u) {
+ /* clz(0) is undefined */
+ if (u == 1)
+ return 1;
+
+ /* left-shift overflow is undefined */
+ if (__builtin_clzl(u - 1UL) < 1)
+ return 0;
+
+ return 1UL << (sizeof(u) * 8 - __builtin_clzl(u - 1UL));
+}
+
+#ifndef __COVERITY__
+# define VOID_0 ((void)0)
+#else
+# define VOID_0 ((void*)0)
+#endif
+
+#define ELEMENTSOF(x) \
+ (__builtin_choose_expr( \
+ !__builtin_types_compatible_p(typeof(x), typeof(&*(x))), \
+ sizeof(x)/sizeof((x)[0]), \
+ VOID_0))
+
+/*
+ * STRLEN - return the length of a string literal, minus the trailing NUL byte.
+ * Contrary to strlen(), this is a constant expression.
+ * @x: a string literal.
+ */
+#define STRLEN(x) (sizeof(""x"") - 1)
+
+/*
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr: the pointer to the member.
+ * @type: the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ */
+#define container_of(ptr, type, member) __container_of(UNIQ, (ptr), type, member)
+#define __container_of(uniq, ptr, type, member) \
+ ({ \
+ const typeof( ((type*)0)->member ) *UNIQ_T(A, uniq) = (ptr); \
+ (type*)( (char *)UNIQ_T(A, uniq) - offsetof(type, member) ); \
+ })
+
+#undef MAX
+#define MAX(a, b) __MAX(UNIQ, (a), UNIQ, (b))
+#define __MAX(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) > UNIQ_T(B, bq) ? UNIQ_T(A, aq) : UNIQ_T(B, bq); \
+ })
+
+/* evaluates to (void) if _A or _B are not constant or of different types */
+#define CONST_MAX(_A, _B) \
+ (__builtin_choose_expr( \
+ __builtin_constant_p(_A) && \
+ __builtin_constant_p(_B) && \
+ __builtin_types_compatible_p(typeof(_A), typeof(_B)), \
+ ((_A) > (_B)) ? (_A) : (_B), \
+ VOID_0))
+
+/* takes two types and returns the size of the larger one */
+#define MAXSIZE(A, B) (sizeof(union _packed_ { typeof(A) a; typeof(B) b; }))
+
+#define MAX3(x, y, z) \
+ ({ \
+ const typeof(x) _c = MAX(x, y); \
+ MAX(_c, z); \
+ })
+
+#undef MIN
+#define MIN(a, b) __MIN(UNIQ, (a), UNIQ, (b))
+#define __MIN(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) < UNIQ_T(B, bq) ? UNIQ_T(A, aq) : UNIQ_T(B, bq); \
+ })
+
+#define MIN3(x, y, z) \
+ ({ \
+ const typeof(x) _c = MIN(x, y); \
+ MIN(_c, z); \
+ })
+
+#define LESS_BY(a, b) __LESS_BY(UNIQ, (a), UNIQ, (b))
+#define __LESS_BY(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) > UNIQ_T(B, bq) ? UNIQ_T(A, aq) - UNIQ_T(B, bq) : 0; \
+ })
+
+#define CMP(a, b) __CMP(UNIQ, (a), UNIQ, (b))
+#define __CMP(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) < UNIQ_T(B, bq) ? -1 : \
+ UNIQ_T(A, aq) > UNIQ_T(B, bq) ? 1 : 0; \
+ })
+
+#undef CLAMP
+#define CLAMP(x, low, high) __CLAMP(UNIQ, (x), UNIQ, (low), UNIQ, (high))
+#define __CLAMP(xq, x, lowq, low, highq, high) \
+ ({ \
+ const typeof(x) UNIQ_T(X, xq) = (x); \
+ const typeof(low) UNIQ_T(LOW, lowq) = (low); \
+ const typeof(high) UNIQ_T(HIGH, highq) = (high); \
+ UNIQ_T(X, xq) > UNIQ_T(HIGH, highq) ? \
+ UNIQ_T(HIGH, highq) : \
+ UNIQ_T(X, xq) < UNIQ_T(LOW, lowq) ? \
+ UNIQ_T(LOW, lowq) : \
+ UNIQ_T(X, xq); \
+ })
+
+/* [(x + y - 1) / y] suffers from an integer overflow, even though the
+ * computation should be possible in the given type. Therefore, we use
+ * [x / y + !!(x % y)]. Note that on "Real CPUs" a division returns both the
+ * quotient and the remainder, so both should be equally fast. */
+#define DIV_ROUND_UP(x, y) __DIV_ROUND_UP(UNIQ, (x), UNIQ, (y))
+#define __DIV_ROUND_UP(xq, x, yq, y) \
+ ({ \
+ const typeof(x) UNIQ_T(X, xq) = (x); \
+ const typeof(y) UNIQ_T(Y, yq) = (y); \
+ (UNIQ_T(X, xq) / UNIQ_T(Y, yq) + !!(UNIQ_T(X, xq) % UNIQ_T(Y, yq))); \
+ })
+
+#ifdef __COVERITY__
+
+/* Use special definitions of assertion macros in order to prevent
+ * false positives of ASSERT_SIDE_EFFECT on Coverity static analyzer
+ * for uses of assert_se() and assert_return().
+ *
+ * These definitions make expression go through a (trivial) function
+ * call to ensure they are not discarded. Also use ! or !! to ensure
+ * the boolean expressions are seen as such.
+ *
+ * This technique has been described and recommended in:
+ * https://community.synopsys.com/s/question/0D534000046Yuzb/suppressing-assertsideeffect-for-functions-that-allow-for-sideeffects
+ */
+
+extern void __coverity_panic__(void);
+
+static inline int __coverity_check__(int condition) {
+ return condition;
+}
+
+#define assert_message_se(expr, message) \
+ do { \
+ if (__coverity_check__(!(expr))) \
+ __coverity_panic__(); \
+ } while (false)
+
+#define assert_log(expr, message) __coverity_check__(!!(expr))
+
+#else /* ! __COVERITY__ */
+
+#define assert_message_se(expr, message) \
+ do { \
+ if (_unlikely_(!(expr))) \
+ log_assert_failed(message, __FILE__, __LINE__, __PRETTY_FUNCTION__); \
+ } while (false)
+
+#define assert_log(expr, message) ((_likely_(expr)) \
+ ? (true) \
+ : (log_assert_failed_return(message, __FILE__, __LINE__, __PRETTY_FUNCTION__), false))
+
+#endif /* __COVERITY__ */
+
+#define assert_se(expr) assert_message_se(expr, #expr)
+
+/* We override the glibc assert() here. */
+#undef assert
+#ifdef NDEBUG
+#define assert(expr) do {} while (false)
+#else
+#define assert(expr) assert_message_se(expr, #expr)
+#endif
+
+#define assert_not_reached(t) \
+ do { \
+ log_assert_failed_unreachable(t, __FILE__, __LINE__, __PRETTY_FUNCTION__); \
+ } while (false)
+
+#if defined(static_assert)
+#define assert_cc(expr) \
+ static_assert(expr, #expr);
+#else
+#define assert_cc(expr) \
+ struct CONCATENATE(_assert_struct_, __COUNTER__) { \
+ char x[(expr) ? 0 : -1]; \
+ };
+#endif
+
+#define assert_return(expr, r) \
+ do { \
+ if (!assert_log(expr, #expr)) \
+ return (r); \
+ } while (false)
+
+#define assert_return_errno(expr, r, err) \
+ do { \
+ if (!assert_log(expr, #expr)) { \
+ errno = err; \
+ return (r); \
+ } \
+ } while (false)
+
+#define return_with_errno(r, err) \
+ do { \
+ errno = abs(err); \
+ return r; \
+ } while (false)
+
+#define PTR_TO_INT(p) ((int) ((intptr_t) (p)))
+#define INT_TO_PTR(u) ((void *) ((intptr_t) (u)))
+#define PTR_TO_UINT(p) ((unsigned) ((uintptr_t) (p)))
+#define UINT_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_LONG(p) ((long) ((intptr_t) (p)))
+#define LONG_TO_PTR(u) ((void *) ((intptr_t) (u)))
+#define PTR_TO_ULONG(p) ((unsigned long) ((uintptr_t) (p)))
+#define ULONG_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_INT32(p) ((int32_t) ((intptr_t) (p)))
+#define INT32_TO_PTR(u) ((void *) ((intptr_t) (u)))
+#define PTR_TO_UINT32(p) ((uint32_t) ((uintptr_t) (p)))
+#define UINT32_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_INT64(p) ((int64_t) ((intptr_t) (p)))
+#define INT64_TO_PTR(u) ((void *) ((intptr_t) (u)))
+#define PTR_TO_UINT64(p) ((uint64_t) ((uintptr_t) (p)))
+#define UINT64_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_SIZE(p) ((size_t) ((uintptr_t) (p)))
+#define SIZE_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define CHAR_TO_STR(x) ((char[2]) { x, 0 })
+
+#define char_array_0(x) x[sizeof(x)-1] = 0;
+
+/* Returns the number of chars needed to format variables of the
+ * specified type as a decimal string. Adds in extra space for a
+ * negative '-' prefix (hence works correctly on signed
+ * types). Includes space for the trailing NUL. */
+#define DECIMAL_STR_MAX(type) \
+ (2+(sizeof(type) <= 1 ? 3 : \
+ sizeof(type) <= 2 ? 5 : \
+ sizeof(type) <= 4 ? 10 : \
+ sizeof(type) <= 8 ? 20 : sizeof(int[-2*(sizeof(type) > 8)])))
+
+#define DECIMAL_STR_WIDTH(x) \
+ ({ \
+ typeof(x) _x_ = (x); \
+ unsigned ans = 1; \
+ while ((_x_ /= 10) != 0) \
+ ans++; \
+ ans; \
+ })
+
+#define SET_FLAG(v, flag, b) \
+ (v) = (b) ? ((v) | (flag)) : ((v) & ~(flag))
+#define FLAGS_SET(v, flags) \
+ ((~(v) & (flags)) == 0)
+
+#define CASE_F(X) case X:
+#define CASE_F_1(CASE, X) CASE_F(X)
+#define CASE_F_2(CASE, X, ...) CASE(X) CASE_F_1(CASE, __VA_ARGS__)
+#define CASE_F_3(CASE, X, ...) CASE(X) CASE_F_2(CASE, __VA_ARGS__)
+#define CASE_F_4(CASE, X, ...) CASE(X) CASE_F_3(CASE, __VA_ARGS__)
+#define CASE_F_5(CASE, X, ...) CASE(X) CASE_F_4(CASE, __VA_ARGS__)
+#define CASE_F_6(CASE, X, ...) CASE(X) CASE_F_5(CASE, __VA_ARGS__)
+#define CASE_F_7(CASE, X, ...) CASE(X) CASE_F_6(CASE, __VA_ARGS__)
+#define CASE_F_8(CASE, X, ...) CASE(X) CASE_F_7(CASE, __VA_ARGS__)
+#define CASE_F_9(CASE, X, ...) CASE(X) CASE_F_8(CASE, __VA_ARGS__)
+#define CASE_F_10(CASE, X, ...) CASE(X) CASE_F_9(CASE, __VA_ARGS__)
+#define CASE_F_11(CASE, X, ...) CASE(X) CASE_F_10(CASE, __VA_ARGS__)
+#define CASE_F_12(CASE, X, ...) CASE(X) CASE_F_11(CASE, __VA_ARGS__)
+#define CASE_F_13(CASE, X, ...) CASE(X) CASE_F_12(CASE, __VA_ARGS__)
+#define CASE_F_14(CASE, X, ...) CASE(X) CASE_F_13(CASE, __VA_ARGS__)
+#define CASE_F_15(CASE, X, ...) CASE(X) CASE_F_14(CASE, __VA_ARGS__)
+#define CASE_F_16(CASE, X, ...) CASE(X) CASE_F_15(CASE, __VA_ARGS__)
+#define CASE_F_17(CASE, X, ...) CASE(X) CASE_F_16(CASE, __VA_ARGS__)
+#define CASE_F_18(CASE, X, ...) CASE(X) CASE_F_17(CASE, __VA_ARGS__)
+#define CASE_F_19(CASE, X, ...) CASE(X) CASE_F_18(CASE, __VA_ARGS__)
+#define CASE_F_20(CASE, X, ...) CASE(X) CASE_F_19(CASE, __VA_ARGS__)
+
+#define GET_CASE_F(_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12,_13,_14,_15,_16,_17,_18,_19,_20,NAME,...) NAME
+#define FOR_EACH_MAKE_CASE(...) \
+ GET_CASE_F(__VA_ARGS__,CASE_F_20,CASE_F_19,CASE_F_18,CASE_F_17,CASE_F_16,CASE_F_15,CASE_F_14,CASE_F_13,CASE_F_12,CASE_F_11, \
+ CASE_F_10,CASE_F_9,CASE_F_8,CASE_F_7,CASE_F_6,CASE_F_5,CASE_F_4,CASE_F_3,CASE_F_2,CASE_F_1) \
+ (CASE_F,__VA_ARGS__)
+
+#define IN_SET(x, ...) \
+ ({ \
+ bool _found = false; \
+ /* If the build breaks in the line below, you need to extend the case macros. (We use "long double" as \
+ * type for the array, in the hope that checkers such as ubsan don't complain that the initializers for \
+ * the array are not representable by the base type. Ideally we'd use typeof(x) as base type, but that \
+ * doesn't work, as we want to use this on bitfields and gcc refuses typeof() on bitfields.) */ \
+ assert_cc((sizeof((long double[]){__VA_ARGS__})/sizeof(long double)) <= 20); \
+ switch(x) { \
+ FOR_EACH_MAKE_CASE(__VA_ARGS__) \
+ _found = true; \
+ break; \
+ default: \
+ break; \
+ } \
+ _found; \
+ })
+
+#define SWAP_TWO(x, y) do { \
+ typeof(x) _t = (x); \
+ (x) = (y); \
+ (y) = (_t); \
+ } while (false)
+
+/* Define C11 thread_local attribute even on older gcc compiler
+ * version */
+#ifndef thread_local
+/*
+ * Don't break on glibc < 2.16 that doesn't define __STDC_NO_THREADS__
+ * see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53769
+ */
+#if __STDC_VERSION__ >= 201112L && !(defined(__STDC_NO_THREADS__) || (defined(__GNU_LIBRARY__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16))
+#define thread_local _Thread_local
+#else
+#define thread_local __thread
+#endif
+#endif
+
+#define DEFINE_TRIVIAL_DESTRUCTOR(name, type, func) \
+ static inline void name(type *p) { \
+ func(p); \
+ }
+
+#define DEFINE_TRIVIAL_CLEANUP_FUNC(type, func) \
+ static inline void func##p(type *p) { \
+ if (*p) \
+ func(*p); \
+ }
+
+#define _DEFINE_TRIVIAL_REF_FUNC(type, name, scope) \
+ scope type *name##_ref(type *p) { \
+ if (!p) \
+ return NULL; \
+ \
+ assert(p->n_ref > 0); \
+ p->n_ref++; \
+ return p; \
+ }
+
+#define _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, scope) \
+ scope type *name##_unref(type *p) { \
+ if (!p) \
+ return NULL; \
+ \
+ assert(p->n_ref > 0); \
+ p->n_ref--; \
+ if (p->n_ref > 0) \
+ return NULL; \
+ \
+ return free_func(p); \
+ }
+
+#define DEFINE_TRIVIAL_REF_FUNC(type, name) \
+ _DEFINE_TRIVIAL_REF_FUNC(type, name,)
+#define DEFINE_PRIVATE_TRIVIAL_REF_FUNC(type, name) \
+ _DEFINE_TRIVIAL_REF_FUNC(type, name, static)
+#define DEFINE_PUBLIC_TRIVIAL_REF_FUNC(type, name) \
+ _DEFINE_TRIVIAL_REF_FUNC(type, name, _public_)
+
+#define DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func,)
+#define DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, static)
+#define DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, _public_)
+
+#define DEFINE_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_TRIVIAL_REF_FUNC(type, name); \
+ DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func);
+
+#define DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_PRIVATE_TRIVIAL_REF_FUNC(type, name); \
+ DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(type, name, free_func);
+
+#define DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_PUBLIC_TRIVIAL_REF_FUNC(type, name); \
+ DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC(type, name, free_func);
+
+#include "log.h"
diff --git a/src/basic/memfd-util.c b/src/basic/memfd-util.c
new file mode 100644
index 0000000..f88f0fc
--- /dev/null
+++ b/src/basic/memfd-util.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#if HAVE_LINUX_MEMFD_H
+#include <linux/memfd.h>
+#endif
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "memfd-util.h"
+#include "missing.h"
+#include "string-util.h"
+#include "utf8.h"
+
+int memfd_new(const char *name) {
+ _cleanup_free_ char *g = NULL;
+ int fd;
+
+ if (!name) {
+ char pr[17] = {};
+
+ /* If no name is specified we generate one. We include
+ * a hint indicating our library implementation, and
+ * add the thread name to it */
+
+ assert_se(prctl(PR_GET_NAME, (unsigned long) pr) >= 0);
+
+ if (isempty(pr))
+ name = "sd";
+ else {
+ _cleanup_free_ char *e = NULL;
+
+ e = utf8_escape_invalid(pr);
+ if (!e)
+ return -ENOMEM;
+
+ g = strappend("sd-", e);
+ if (!g)
+ return -ENOMEM;
+
+ name = g;
+ }
+ }
+
+ fd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int memfd_map(int fd, uint64_t offset, size_t size, void **p) {
+ void *q;
+ int sealed;
+
+ assert(fd >= 0);
+ assert(size > 0);
+ assert(p);
+
+ sealed = memfd_get_sealed(fd);
+ if (sealed < 0)
+ return sealed;
+
+ if (sealed)
+ q = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, offset);
+ else
+ q = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset);
+
+ if (q == MAP_FAILED)
+ return -errno;
+
+ *p = q;
+ return 0;
+}
+
+int memfd_set_sealed(int fd) {
+ int r;
+
+ assert(fd >= 0);
+
+ r = fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int memfd_get_sealed(int fd) {
+ int r;
+
+ assert(fd >= 0);
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0)
+ return -errno;
+
+ return r == (F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL);
+}
+
+int memfd_get_size(int fd, uint64_t *sz) {
+ struct stat stat;
+ int r;
+
+ assert(fd >= 0);
+ assert(sz);
+
+ r = fstat(fd, &stat);
+ if (r < 0)
+ return -errno;
+
+ *sz = stat.st_size;
+ return 0;
+}
+
+int memfd_set_size(int fd, uint64_t sz) {
+ int r;
+
+ assert(fd >= 0);
+
+ r = ftruncate(fd, sz);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int memfd_new_and_map(const char *name, size_t sz, void **p) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(sz > 0);
+ assert(p);
+
+ fd = memfd_new(name);
+ if (fd < 0)
+ return fd;
+
+ r = memfd_set_size(fd, sz);
+ if (r < 0)
+ return r;
+
+ r = memfd_map(fd, 0, sz, p);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(fd);
+}
diff --git a/src/basic/memfd-util.h b/src/basic/memfd-util.h
new file mode 100644
index 0000000..5ebb519
--- /dev/null
+++ b/src/basic/memfd-util.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+int memfd_new(const char *name);
+int memfd_new_and_map(const char *name, size_t sz, void **p);
+
+int memfd_map(int fd, uint64_t offset, size_t size, void **p);
+
+int memfd_set_sealed(int fd);
+int memfd_get_sealed(int fd);
+
+int memfd_get_size(int fd, uint64_t *sz);
+int memfd_set_size(int fd, uint64_t sz);
diff --git a/src/basic/mempool.c b/src/basic/mempool.c
new file mode 100644
index 0000000..159c963
--- /dev/null
+++ b/src/basic/mempool.c
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "env-util.h"
+#include "macro.h"
+#include "mempool.h"
+#include "process-util.h"
+#include "util.h"
+
+struct pool {
+ struct pool *next;
+ size_t n_tiles;
+ size_t n_used;
+};
+
+void* mempool_alloc_tile(struct mempool *mp) {
+ size_t i;
+
+ /* When a tile is released we add it to the list and simply
+ * place the next pointer at its offset 0. */
+
+ assert(mp->tile_size >= sizeof(void*));
+ assert(mp->at_least > 0);
+
+ if (mp->freelist) {
+ void *r;
+
+ r = mp->freelist;
+ mp->freelist = * (void**) mp->freelist;
+ return r;
+ }
+
+ if (_unlikely_(!mp->first_pool) ||
+ _unlikely_(mp->first_pool->n_used >= mp->first_pool->n_tiles)) {
+ size_t size, n;
+ struct pool *p;
+
+ n = mp->first_pool ? mp->first_pool->n_tiles : 0;
+ n = MAX(mp->at_least, n * 2);
+ size = PAGE_ALIGN(ALIGN(sizeof(struct pool)) + n*mp->tile_size);
+ n = (size - ALIGN(sizeof(struct pool))) / mp->tile_size;
+
+ p = malloc(size);
+ if (!p)
+ return NULL;
+
+ p->next = mp->first_pool;
+ p->n_tiles = n;
+ p->n_used = 0;
+
+ mp->first_pool = p;
+ }
+
+ i = mp->first_pool->n_used++;
+
+ return ((uint8_t*) mp->first_pool) + ALIGN(sizeof(struct pool)) + i*mp->tile_size;
+}
+
+void* mempool_alloc0_tile(struct mempool *mp) {
+ void *p;
+
+ p = mempool_alloc_tile(mp);
+ if (p)
+ memzero(p, mp->tile_size);
+ return p;
+}
+
+void mempool_free_tile(struct mempool *mp, void *p) {
+ * (void**) p = mp->freelist;
+ mp->freelist = p;
+}
+
+bool mempool_enabled(void) {
+ static int b = -1;
+
+ if (!is_main_thread())
+ return false;
+
+ if (!mempool_use_allowed)
+ b = false;
+ if (b < 0)
+ b = getenv_bool("SYSTEMD_MEMPOOL") != 0;
+
+ return b;
+}
+
+#if VALGRIND
+void mempool_drop(struct mempool *mp) {
+ struct pool *p = mp->first_pool;
+ while (p) {
+ struct pool *n;
+ n = p->next;
+ free(p);
+ p = n;
+ }
+}
+#endif
diff --git a/src/basic/mempool.h b/src/basic/mempool.h
new file mode 100644
index 0000000..0eecca0
--- /dev/null
+++ b/src/basic/mempool.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+
+struct pool;
+
+struct mempool {
+ struct pool *first_pool;
+ void *freelist;
+ size_t tile_size;
+ unsigned at_least;
+};
+
+void* mempool_alloc_tile(struct mempool *mp);
+void* mempool_alloc0_tile(struct mempool *mp);
+void mempool_free_tile(struct mempool *mp, void *p);
+
+#define DEFINE_MEMPOOL(pool_name, tile_type, alloc_at_least) \
+static struct mempool pool_name = { \
+ .tile_size = sizeof(tile_type), \
+ .at_least = alloc_at_least, \
+}
+
+extern const bool mempool_use_allowed;
+bool mempool_enabled(void);
+
+#if VALGRIND
+void mempool_drop(struct mempool *mp);
+#endif
diff --git a/src/basic/meson.build b/src/basic/meson.build
new file mode 100644
index 0000000..e5852f3
--- /dev/null
+++ b/src/basic/meson.build
@@ -0,0 +1,310 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+basic_sources = files('''
+ MurmurHash2.c
+ MurmurHash2.h
+ af-list.c
+ af-list.h
+ alloc-util.c
+ alloc-util.h
+ architecture.c
+ architecture.h
+ arphrd-list.c
+ arphrd-list.h
+ async.c
+ async.h
+ audit-util.c
+ audit-util.h
+ blockdev-util.c
+ blockdev-util.h
+ btrfs-util.c
+ btrfs-util.h
+ build.h
+ bus-label.c
+ bus-label.h
+ cap-list.c
+ cap-list.h
+ capability-util.c
+ capability-util.h
+ cgroup-util.c
+ cgroup-util.h
+ chattr-util.c
+ chattr-util.h
+ conf-files.c
+ conf-files.h
+ copy.c
+ copy.h
+ def.h
+ device-nodes.c
+ device-nodes.h
+ dirent-util.c
+ dirent-util.h
+ env-file.c
+ env-file.h
+ env-util.c
+ env-util.h
+ errno-list.c
+ errno-list.h
+ escape.c
+ escape.h
+ ether-addr-util.c
+ ether-addr-util.h
+ extract-word.c
+ extract-word.h
+ fd-util.c
+ fd-util.h
+ fileio.c
+ fileio.h
+ format-util.h
+ fs-util.c
+ fs-util.h
+ glob-util.c
+ glob-util.h
+ gunicode.c
+ gunicode.h
+ hash-funcs.c
+ hash-funcs.h
+ hashmap.c
+ hashmap.h
+ hexdecoct.c
+ hexdecoct.h
+ hostname-util.c
+ hostname-util.h
+ in-addr-util.c
+ in-addr-util.h
+ io-util.c
+ io-util.h
+ ioprio.h
+ khash.c
+ khash.h
+ label.c
+ label.h
+ list.h
+ locale-util.c
+ locale-util.h
+ log.c
+ log.h
+ login-util.c
+ login-util.h
+ macro.h
+ memfd-util.c
+ memfd-util.h
+ mempool.c
+ mempool.h
+ missing.h
+ missing_audit.h
+ missing_btrfs.h
+ missing_btrfs_tree.h
+ missing_capability.h
+ missing_drm.h
+ missing_ethtool.h
+ missing_fcntl.h
+ missing_fib_rules.h
+ missing_fou.h
+ missing_fs.h
+ missing_if_bridge.h
+ missing_if_link.h
+ missing_if_tunnel.h
+ missing_input.h
+ missing_keyctl.h
+ missing_magic.h
+ missing_mman.h
+ missing_network.h
+ missing_prctl.h
+ missing_random.h
+ missing_resource.h
+ missing_sched.h
+ missing_securebits.h
+ missing_socket.h
+ missing_stat.h
+ missing_stdlib.h
+ missing_syscall.h
+ missing_timerfd.h
+ missing_type.h
+ missing_vxcan.h
+ mkdir-label.c
+ mkdir.c
+ mkdir.h
+ mountpoint-util.c
+ mountpoint-util.h
+ nss-util.h
+ ordered-set.c
+ ordered-set.h
+ parse-util.c
+ parse-util.h
+ path-util.c
+ path-util.h
+ prioq.c
+ prioq.h
+ proc-cmdline.c
+ proc-cmdline.h
+ process-util.c
+ process-util.h
+ procfs-util.c
+ procfs-util.h
+ random-util.c
+ random-util.h
+ ratelimit.c
+ ratelimit.h
+ raw-clone.h
+ raw-reboot.h
+ refcnt.h
+ replace-var.c
+ replace-var.h
+ rlimit-util.c
+ rlimit-util.h
+ rm-rf.c
+ rm-rf.h
+ selinux-util.c
+ selinux-util.h
+ set.h
+ sigbus.c
+ sigbus.h
+ signal-util.c
+ signal-util.h
+ siphash24.c
+ siphash24.h
+ smack-util.c
+ smack-util.h
+ socket-label.c
+ socket-util.c
+ socket-util.h
+ sparse-endian.h
+ special.h
+ stat-util.c
+ stat-util.h
+ static-destruct.h
+ stdio-util.h
+ strbuf.c
+ strbuf.h
+ string-table.c
+ string-table.h
+ string-util.c
+ string-util.h
+ strv.c
+ strv.h
+ strxcpyx.c
+ strxcpyx.h
+ syslog-util.c
+ syslog-util.h
+ terminal-util.c
+ terminal-util.h
+ time-util.c
+ time-util.h
+ tmpfile-util.c
+ tmpfile-util.h
+ umask-util.h
+ unaligned.h
+ unit-def.c
+ unit-def.h
+ unit-name.c
+ unit-name.h
+ user-util.c
+ user-util.h
+ utf8.c
+ utf8.h
+ util.c
+ util.h
+ virt.c
+ virt.h
+ xattr-util.c
+ xattr-util.h
+'''.split())
+
+missing_audit_h = files('missing_audit.h')
+missing_capability_h = files('missing_capability.h')
+missing_network_h = files('missing_network.h')
+missing_socket_h = files('missing_socket.h')
+
+generate_af_list = find_program('generate-af-list.sh')
+af_list_txt = custom_target(
+ 'af-list.txt',
+ output : 'af-list.txt',
+ command : [generate_af_list, cpp, config_h, missing_socket_h],
+ capture : true)
+
+generate_arphrd_list = find_program('generate-arphrd-list.sh')
+arphrd_list_txt = custom_target(
+ 'arphrd-list.txt',
+ output : 'arphrd-list.txt',
+ command : [generate_arphrd_list, cpp, config_h, missing_network_h],
+ capture : true)
+
+generate_cap_list = find_program('generate-cap-list.sh')
+cap_list_txt = custom_target(
+ 'cap-list.txt',
+ output : 'cap-list.txt',
+ command : [generate_cap_list, cpp, config_h, missing_capability_h],
+ capture : true)
+
+generate_errno_list = find_program('generate-errno-list.sh')
+errno_list_txt = custom_target(
+ 'errno-list.txt',
+ output : 'errno-list.txt',
+ command : [generate_errno_list, cpp],
+ capture : true)
+
+generated_gperf_headers = []
+foreach item : [['af', af_list_txt, 'af', ''],
+ ['arphrd', arphrd_list_txt, 'arphrd', 'ARPHRD_'],
+ ['cap', cap_list_txt, 'capability', ''],
+ ['errno', errno_list_txt, 'errno', '']]
+
+ fname = '@0@-from-name.gperf'.format(item[0])
+ gperf_file = custom_target(
+ fname,
+ input : item[1],
+ output : fname,
+ command : [generate_gperfs, item[2], item[3], '@INPUT@'],
+ capture : true)
+
+ fname = '@0@-from-name.h'.format(item[0])
+ target1 = custom_target(
+ fname,
+ input : gperf_file,
+ output : fname,
+ command : [gperf,
+ '-L', 'ANSI-C', '-t', '--ignore-case',
+ '-N', 'lookup_@0@'.format(item[2]),
+ '-H', 'hash_@0@_name'.format(item[2]),
+ '-p', '-C',
+ '@INPUT@'],
+ capture : true)
+
+ fname = '@0@-to-name.h'.format(item[0])
+ awkscript = '@0@-to-name.awk'.format(item[0])
+ target2 = custom_target(
+ fname,
+ input : [awkscript, item[1]],
+ output : fname,
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+ generated_gperf_headers += [target1, target2]
+endforeach
+
+basic_sources += generated_gperf_headers
+basic_gcrypt_sources = files(
+ 'gcrypt-util.c',
+ 'gcrypt-util.h')
+
+libbasic = static_library(
+ 'basic',
+ basic_sources,
+ include_directories : includes,
+ dependencies : [versiondep,
+ threads,
+ libcap,
+ libselinux,
+ libm],
+ c_args : ['-fvisibility=default'],
+ install : false)
+
+# A convenience library that is separate from libbasic to avoid
+# unnecessary linking to libgcrypt.
+libbasic_gcrypt = static_library(
+ 'basic-gcrypt',
+ basic_gcrypt_sources,
+ include_directories : includes,
+ dependencies : [libgcrypt],
+ c_args : ['-fvisibility=default'])
diff --git a/src/basic/missing.h b/src/basic/missing.h
new file mode 100644
index 0000000..5067c8f
--- /dev/null
+++ b/src/basic/missing.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* Missing glibc definitions to access certain kernel APIs */
+
+#include "missing_audit.h"
+#include "missing_btrfs_tree.h"
+#include "missing_capability.h"
+#include "missing_drm.h"
+#include "missing_fcntl.h"
+#include "missing_fs.h"
+#include "missing_input.h"
+#include "missing_magic.h"
+#include "missing_mman.h"
+#include "missing_network.h"
+#include "missing_prctl.h"
+#include "missing_random.h"
+#include "missing_resource.h"
+#include "missing_sched.h"
+#include "missing_socket.h"
+#include "missing_stdlib.h"
+#include "missing_timerfd.h"
+#include "missing_type.h"
+
+#include "missing_syscall.h"
diff --git a/src/basic/missing_audit.h b/src/basic/missing_audit.h
new file mode 100644
index 0000000..b00d537
--- /dev/null
+++ b/src/basic/missing_audit.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/audit.h>
+
+#if HAVE_AUDIT
+#include <libaudit.h>
+#endif
+
+#ifndef AUDIT_SERVICE_START
+#define AUDIT_SERVICE_START 1130 /* Service (daemon) start */
+#endif
+
+#ifndef AUDIT_SERVICE_STOP
+#define AUDIT_SERVICE_STOP 1131 /* Service (daemon) stop */
+#endif
+
+#ifndef MAX_AUDIT_MESSAGE_LENGTH
+#define MAX_AUDIT_MESSAGE_LENGTH 8970
+#endif
+
+#ifndef AUDIT_NLGRP_MAX
+#define AUDIT_NLGRP_READLOG 1
+#endif
diff --git a/src/basic/missing_btrfs.h b/src/basic/missing_btrfs.h
new file mode 100644
index 0000000..34c382f
--- /dev/null
+++ b/src/basic/missing_btrfs.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* Old btrfs.h requires stddef.h to be included before btrfs.h */
+#include <stddef.h>
+
+#include <linux/btrfs.h>
+
+/* linux@57254b6ebce4ceca02d9c8b615f6059c56c19238 (3.11) */
+#ifndef BTRFS_IOC_QUOTA_RESCAN_WAIT
+#define BTRFS_IOC_QUOTA_RESCAN_WAIT _IO(BTRFS_IOCTL_MAGIC, 46)
+#endif
+
+/* linux@83288b60bf6668933689078973136e0c9d387b38 (4.7) */
+#ifndef BTRFS_QGROUP_LIMIT_MAX_RFER
+#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0)
+#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1)
+#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2)
+#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3)
+#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4)
+#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5)
+#endif
diff --git a/src/basic/missing_btrfs_tree.h b/src/basic/missing_btrfs_tree.h
new file mode 100644
index 0000000..555f90f
--- /dev/null
+++ b/src/basic/missing_btrfs_tree.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/types.h>
+
+#include "missing_btrfs.h"
+
+/* linux@db6711600e27c885aed89751f04e727f3af26715 (4.7) */
+#if HAVE_LINUX_BTRFS_TREE_H
+#include <linux/btrfs_tree.h>
+#else
+#define BTRFS_ROOT_TREE_OBJECTID 1
+#define BTRFS_QUOTA_TREE_OBJECTID 8
+#define BTRFS_FIRST_FREE_OBJECTID 256
+#define BTRFS_LAST_FREE_OBJECTID -256ULL
+
+#define BTRFS_ROOT_ITEM_KEY 132
+#define BTRFS_ROOT_BACKREF_KEY 144
+#define BTRFS_QGROUP_STATUS_KEY 240
+#define BTRFS_QGROUP_INFO_KEY 242
+#define BTRFS_QGROUP_LIMIT_KEY 244
+#define BTRFS_QGROUP_RELATION_KEY 246
+
+struct btrfs_disk_key {
+ __le64 objectid;
+ __u8 type;
+ __le64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_timespec {
+ __le64 sec;
+ __le32 nsec;
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_item {
+ __le64 generation;
+ __le64 transid;
+ __le64 size;
+ __le64 nbytes;
+ __le64 block_group;
+ __le32 nlink;
+ __le32 uid;
+ __le32 gid;
+ __le32 mode;
+ __le64 rdev;
+ __le64 flags;
+ __le64 sequence;
+ __le64 reserved[4];
+ struct btrfs_timespec atime;
+ struct btrfs_timespec ctime;
+ struct btrfs_timespec mtime;
+ struct btrfs_timespec otime;
+} __attribute__ ((__packed__));
+
+#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
+
+struct btrfs_root_item {
+ struct btrfs_inode_item inode;
+ __le64 generation;
+ __le64 root_dirid;
+ __le64 bytenr;
+ __le64 byte_limit;
+ __le64 bytes_used;
+ __le64 last_snapshot;
+ __le64 flags;
+ __le32 refs;
+ struct btrfs_disk_key drop_progress;
+ __u8 drop_level;
+ __u8 level;
+
+ __le64 generation_v2;
+ __u8 uuid[BTRFS_UUID_SIZE];
+ __u8 parent_uuid[BTRFS_UUID_SIZE];
+ __u8 received_uuid[BTRFS_UUID_SIZE];
+ __le64 ctransid; /* updated when an inode changes */
+ __le64 otransid; /* trans when created */
+ __le64 stransid; /* trans when sent. non-zero for received subvol */
+ __le64 rtransid; /* trans when received. non-zero for received subvol */
+ struct btrfs_timespec ctime;
+ struct btrfs_timespec otime;
+ struct btrfs_timespec stime;
+ struct btrfs_timespec rtime;
+ __le64 reserved[8]; /* for future */
+} __attribute__ ((__packed__));
+
+struct btrfs_root_ref {
+ __le64 dirid;
+ __le64 sequence;
+ __le16 name_len;
+} __attribute__ ((__packed__));
+
+#define BTRFS_QGROUP_LEVEL_SHIFT 48
+
+struct btrfs_qgroup_info_item {
+ __le64 generation;
+ __le64 rfer;
+ __le64 rfer_cmpr;
+ __le64 excl;
+ __le64 excl_cmpr;
+} __attribute__ ((__packed__));
+
+struct btrfs_qgroup_limit_item {
+ __le64 flags;
+ __le64 max_rfer;
+ __le64 max_excl;
+ __le64 rsv_rfer;
+ __le64 rsv_excl;
+} __attribute__ ((__packed__));
+#endif
diff --git a/src/basic/missing_capability.h b/src/basic/missing_capability.h
new file mode 100644
index 0000000..1308a3d
--- /dev/null
+++ b/src/basic/missing_capability.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/capability.h>
+
+/* 3a101b8de0d39403b2c7e5c23fd0b005668acf48 (3.16) */
+#ifndef CAP_AUDIT_READ
+#define CAP_AUDIT_READ 37
+
+#undef CAP_LAST_CAP
+#define CAP_LAST_CAP CAP_AUDIT_READ
+#endif
diff --git a/src/basic/missing_drm.h b/src/basic/missing_drm.h
new file mode 100644
index 0000000..a64f74e
--- /dev/null
+++ b/src/basic/missing_drm.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#ifndef DRM_IOCTL_SET_MASTER
+#define DRM_IOCTL_SET_MASTER _IO('d', 0x1e)
+#endif
+
+#ifndef DRM_IOCTL_DROP_MASTER
+#define DRM_IOCTL_DROP_MASTER _IO('d', 0x1f)
+#endif
diff --git a/src/basic/missing_ethtool.h b/src/basic/missing_ethtool.h
new file mode 100644
index 0000000..9ba929c
--- /dev/null
+++ b/src/basic/missing_ethtool.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/types.h>
+
+/* Missing definitions in ethtool.h */
+
+#if !HAVE_ETHTOOL_LINK_MODE_10baseT_Half_BIT /* linux@3f1ac7a700d039c61d8d8b99f28d605d489a60cf (4.6) */
+
+#define ETHTOOL_GLINKSETTINGS 0x0000004c /* Get ethtool_link_settings */
+#define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */
+
+struct ethtool_link_settings {
+ __u32 cmd;
+ __u32 speed;
+ __u8 duplex;
+ __u8 port;
+ __u8 phy_address;
+ __u8 autoneg;
+ __u8 mdio_support;
+ __u8 eth_tp_mdix;
+ __u8 eth_tp_mdix_ctrl;
+ __s8 link_mode_masks_nwords;
+ __u8 transceiver;
+ __u8 reserved1[3];
+ __u32 reserved[7];
+ __u32 link_mode_masks[0];
+ /* layout of link_mode_masks fields:
+ * __u32 map_supported[link_mode_masks_nwords];
+ * __u32 map_advertising[link_mode_masks_nwords];
+ * __u32 map_lp_advertising[link_mode_masks_nwords];
+ */
+};
+
+enum ethtool_link_mode_bit_indices {
+ ETHTOOL_LINK_MODE_10baseT_Half_BIT = 0,
+ ETHTOOL_LINK_MODE_10baseT_Full_BIT = 1,
+ ETHTOOL_LINK_MODE_100baseT_Half_BIT = 2,
+ ETHTOOL_LINK_MODE_100baseT_Full_BIT = 3,
+ ETHTOOL_LINK_MODE_1000baseT_Half_BIT = 4,
+ ETHTOOL_LINK_MODE_1000baseT_Full_BIT = 5,
+ ETHTOOL_LINK_MODE_Autoneg_BIT = 6,
+ ETHTOOL_LINK_MODE_TP_BIT = 7,
+ ETHTOOL_LINK_MODE_AUI_BIT = 8,
+ ETHTOOL_LINK_MODE_MII_BIT = 9,
+ ETHTOOL_LINK_MODE_FIBRE_BIT = 10,
+ ETHTOOL_LINK_MODE_BNC_BIT = 11,
+ ETHTOOL_LINK_MODE_10000baseT_Full_BIT = 12,
+ ETHTOOL_LINK_MODE_Pause_BIT = 13,
+ ETHTOOL_LINK_MODE_Asym_Pause_BIT = 14,
+ ETHTOOL_LINK_MODE_2500baseX_Full_BIT = 15,
+ ETHTOOL_LINK_MODE_Backplane_BIT = 16,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT = 17,
+ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT = 18,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT = 19,
+ ETHTOOL_LINK_MODE_10000baseR_FEC_BIT = 20,
+ ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT = 21,
+ ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT = 22,
+ ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT = 23,
+ ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT = 24,
+ ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT = 25,
+ ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT = 26,
+ ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT = 27,
+ ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT = 28,
+ ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT = 29,
+ ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT = 30,
+ ETHTOOL_LINK_MODE_25000baseCR_Full_BIT = 31,
+ ETHTOOL_LINK_MODE_25000baseKR_Full_BIT = 32,
+ ETHTOOL_LINK_MODE_25000baseSR_Full_BIT = 33,
+ ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT = 34,
+ ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT = 35,
+ ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT = 36,
+ ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37,
+ ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38,
+ ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39,
+ ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40,
+ ETHTOOL_LINK_MODE_1000baseX_Full_BIT = 41,
+ ETHTOOL_LINK_MODE_10000baseCR_Full_BIT = 42,
+ ETHTOOL_LINK_MODE_10000baseSR_Full_BIT = 43,
+ ETHTOOL_LINK_MODE_10000baseLR_Full_BIT = 44,
+ ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT = 45,
+ ETHTOOL_LINK_MODE_10000baseER_Full_BIT = 46,
+ ETHTOOL_LINK_MODE_2500baseT_Full_BIT = 47,
+ ETHTOOL_LINK_MODE_5000baseT_Full_BIT = 48,
+
+ ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49,
+ ETHTOOL_LINK_MODE_FEC_RS_BIT = 50,
+ ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51,
+
+ /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
+ * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
+ * macro for bits > 31. The only way to use indices > 31 is to
+ * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API.
+ */
+
+ __ETHTOOL_LINK_MODE_LAST
+ = ETHTOOL_LINK_MODE_FEC_BASER_BIT,
+};
+#else
+#if !HAVE_ETHTOOL_LINK_MODE_25000baseCR_Full_BIT /* linux@3851112e4737cd52aaeda0ce8d084be9ee128106 (4.7) */
+#define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31
+#define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32
+#define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33
+#define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34
+#define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35
+#define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36
+#define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37
+#define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38
+#define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39
+#endif
+#if !HAVE_ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT /* linux@89da45b8b5b2187734a11038b8593714f964ffd1 (4.8) */
+#define ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT 40
+#endif
+#if !HAVE_ETHTOOL_LINK_MODE_1000baseX_Full_BIT /* linux@5711a98221443aec54c4c81ee98c6ae46acccb65 (4.9) */
+#define ETHTOOL_LINK_MODE_1000baseX_Full_BIT 41
+#define ETHTOOL_LINK_MODE_10000baseCR_Full_BIT 42
+#define ETHTOOL_LINK_MODE_10000baseSR_Full_BIT 43
+#define ETHTOOL_LINK_MODE_10000baseLR_Full_BIT 44
+#define ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT 45
+#define ETHTOOL_LINK_MODE_10000baseER_Full_BIT 46
+#endif
+#if !HAVE_ETHTOOL_LINK_MODE_2500baseT_Full_BIT /* linux@94842b4fc4d6b1691cfc86c6f5251f299d27f4ba (4.10) */
+#define ETHTOOL_LINK_MODE_2500baseT_Full_BIT 47
+#define ETHTOOL_LINK_MODE_5000baseT_Full_BIT 48
+#endif
+#if !HAVE_ETHTOOL_LINK_MODE_FEC_NONE_BIT /* linux@1a5f3da20bd966220931239fbd31e6ac6ff42251 (4.14) */
+#define ETHTOOL_LINK_MODE_FEC_NONE_BIT 49
+#define ETHTOOL_LINK_MODE_FEC_RS_BIT 50
+#define ETHTOOL_LINK_MODE_FEC_BASER_BIT 51
+#endif
+#endif
diff --git a/src/basic/missing_fcntl.h b/src/basic/missing_fcntl.h
new file mode 100644
index 0000000..5d1c635
--- /dev/null
+++ b/src/basic/missing_fcntl.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <fcntl.h>
+
+#ifndef F_LINUX_SPECIFIC_BASE
+#define F_LINUX_SPECIFIC_BASE 1024
+#endif
+
+#ifndef F_SETPIPE_SZ
+#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
+#endif
+
+#ifndef F_GETPIPE_SZ
+#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
+#endif
+
+#ifndef F_ADD_SEALS
+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+
+#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
+#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
+#define F_SEAL_GROW 0x0004 /* prevent file from growing */
+#define F_SEAL_WRITE 0x0008 /* prevent writes */
+#endif
+
+#ifndef F_OFD_GETLK
+#define F_OFD_GETLK 36
+#define F_OFD_SETLK 37
+#define F_OFD_SETLKW 38
+#endif
+
+#ifndef MAX_HANDLE_SZ
+#define MAX_HANDLE_SZ 128
+#endif
+
+/* The precise definition of __O_TMPFILE is arch specific; use the
+ * values defined by the kernel (note: some are hexa, some are octal,
+ * duplicated as-is from the kernel definitions):
+ * - alpha, parisc, sparc: each has a specific value;
+ * - others: they use the "generic" value.
+ */
+
+#ifndef __O_TMPFILE
+#if defined(__alpha__)
+#define __O_TMPFILE 0100000000
+#elif defined(__parisc__) || defined(__hppa__)
+#define __O_TMPFILE 0400000000
+#elif defined(__sparc__) || defined(__sparc64__)
+#define __O_TMPFILE 0x2000000
+#else
+#define __O_TMPFILE 020000000
+#endif
+#endif
+
+/* a horrid kludge trying to make sure that this will fail on old kernels */
+#ifndef O_TMPFILE
+#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
+#endif
diff --git a/src/basic/missing_fib_rules.h b/src/basic/missing_fib_rules.h
new file mode 100644
index 0000000..df120d7
--- /dev/null
+++ b/src/basic/missing_fib_rules.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/types.h>
+
+#if !HAVE_FRA_TUN_ID /* linux@e7030878fc8448492b6e5cecd574043f63271298 (4.3) */
+#define FRA_TUN_ID 12
+#endif
+
+#if !HAVE_FRA_SUPPRESS_PREFIXLEN /* linux@6ef94cfafba159d6b1a902ccb3349ac6a34ff6ad, 73f5698e77219bfc3ea1903759fe8e20ab5b285e (3.12) */
+#define FRA_SUPPRESS_IFGROUP 13
+#define FRA_SUPPRESS_PREFIXLEN 14
+#endif
+
+#if !HAVE_FRA_PAD /* linux@b46f6ded906ef0be52a4881ba50a084aeca64d7e (4.7) */
+#define FRA_PAD 18
+#endif
+
+#if !HAVE_FRA_L3MDEV /* linux@96c63fa7393d0a346acfe5a91e0c7d4c7782641b (4.8) */
+#define FRA_L3MDEV 19
+#endif
+
+#if !HAVE_FRA_UID_RANGE /* linux@622ec2c9d52405973c9f1ca5116eb1c393adfc7d (4.10) */
+#define FRA_UID_RANGE 20
+
+struct fib_rule_uid_range {
+ __u32 start;
+ __u32 end;
+};
+#endif
+
+#if !HAVE_FRA_DPORT_RANGE /* linux@1b71af6053af1bd2f849e9fda4f71c1e3f145dcf, bfff4862653bb96001ab57c1edd6d03f48e5f035 (4.17) */
+#define FRA_PROTOCOL 21
+#define FRA_IP_PROTO 22
+#define FRA_SPORT_RANGE 23
+#define FRA_DPORT_RANGE 24
+
+#undef FRA_MAX
+#define FRA_MAX 24
+
+struct fib_rule_port_range {
+ __u16 start;
+ __u16 end;
+};
+#endif
diff --git a/src/basic/missing_fou.h b/src/basic/missing_fou.h
new file mode 100644
index 0000000..d8c7435
--- /dev/null
+++ b/src/basic/missing_fou.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !HAVE_LINUX_FOU_H /* linux@23461551c00628c3f3fe9cf837bf53cf8f212b63 (3.18) */
+
+#define FOU_GENL_NAME "fou"
+#define FOU_GENL_VERSION 0x1
+
+enum {
+ FOU_ATTR_UNSPEC,
+ FOU_ATTR_PORT, /* u16 */
+ FOU_ATTR_AF, /* u8 */
+ FOU_ATTR_IPPROTO, /* u8 */
+ FOU_ATTR_TYPE, /* u8 */
+ FOU_ATTR_REMCSUM_NOPARTIAL, /* flag */
+
+ __FOU_ATTR_MAX,
+};
+
+#define FOU_ATTR_MAX (__FOU_ATTR_MAX - 1)
+
+enum {
+ FOU_CMD_UNSPEC,
+ FOU_CMD_ADD,
+ FOU_CMD_DEL,
+ FOU_CMD_GET,
+
+ __FOU_CMD_MAX,
+};
+
+enum {
+ FOU_ENCAP_UNSPEC,
+ FOU_ENCAP_DIRECT,
+ FOU_ENCAP_GUE,
+};
+
+#define FOU_CMD_MAX (__FOU_CMD_MAX - 1)
+
+#else
+
+#if !HAVE_FOU_ATTR_REMCSUM_NOPARTIAL /* linux@fe881ef11cf0220f118816181930494d484c4883 (4.0) */
+#define FOU_ATTR_REMCSUM_NOPARTIAL 5
+
+#undef FOU_ATTR_MAX
+#define FOU_ATTR_MAX 5
+#endif
+
+#if !HAVE_FOU_CMD_GET /* linux@7a6c8c34e5b71ac50e39588e20b39494a9e1d8e5 (4.1) */
+#define FOU_CMD_GET 3
+
+#undef FOU_CMD_MAX
+#define FOU_CMD_MAX 3
+#endif
+
+#endif
diff --git a/src/basic/missing_fs.h b/src/basic/missing_fs.h
new file mode 100644
index 0000000..48c1af0
--- /dev/null
+++ b/src/basic/missing_fs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* linux/fs.h */
+#ifndef RENAME_NOREPLACE /* 0a7c3937a1f23f8cb5fc77ae01661e9968a51d0c (3.15) */
+#define RENAME_NOREPLACE (1 << 0)
+#endif
+
+/* linux/fs.h or sys/mount.h */
+#ifndef MS_MOVE
+#define MS_MOVE 8192
+#endif
+
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MS_PRIVATE
+#define MS_PRIVATE (1<<18)
+#endif
+
+#ifndef MS_SLAVE
+#define MS_SLAVE (1<<19)
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1<<20)
+#endif
+
+#ifndef MS_RELATIME
+#define MS_RELATIME (1<<21)
+#endif
+
+#ifndef MS_KERNMOUNT
+#define MS_KERNMOUNT (1<<22)
+#endif
+
+#ifndef MS_I_VERSION
+#define MS_I_VERSION (1<<23)
+#endif
+
+#ifndef MS_STRICTATIME
+#define MS_STRICTATIME (1<<24)
+#endif
+
+#ifndef MS_LAZYTIME
+#define MS_LAZYTIME (1<<25)
+#endif
+
+/* Not exposed yet. Defined at fs/ext4/ext4.h */
+#ifndef EXT4_IOC_RESIZE_FS
+#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
+#endif
+
+/* Not exposed yet. Defined at fs/cifs/cifsglob.h */
+#ifndef CIFS_MAGIC_NUMBER
+#define CIFS_MAGIC_NUMBER 0xFF534D42
+#endif
+
+/* linux/nsfs.h */
+#ifndef NS_GET_NSTYPE /* d95fa3c76a66b6d76b1e109ea505c55e66360f3c (4.11) */
+#define NS_GET_NSTYPE _IO(0xb7, 0x3)
+#endif
diff --git a/src/basic/missing_if_bridge.h b/src/basic/missing_if_bridge.h
new file mode 100644
index 0000000..9306062
--- /dev/null
+++ b/src/basic/missing_if_bridge.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !HAVE_IFLA_BRIDGE_VLAN_TUNNEL_INFO /* linux@b3c7ef0adadc5768e0baa786213c6bd1ce521a77 (4.11) */
+#define IFLA_BRIDGE_VLAN_TUNNEL_INFO 3
+
+#undef IFLA_BRIDGE_MAX
+#define IFLA_BRIDGE_MAX 3
+#endif
+
+#ifndef BRIDGE_VLAN_INFO_RANGE_BEGIN
+#define BRIDGE_VLAN_INFO_RANGE_BEGIN (1<<3) /* VLAN is start of vlan range */
+#endif
+
+#ifndef BRIDGE_VLAN_INFO_RANGE_END
+#define BRIDGE_VLAN_INFO_RANGE_END (1<<4) /* VLAN is end of vlan range */
+#endif
+
+#ifndef BRIDGE_VLAN_INFO_BRENTRY
+#define BRIDGE_VLAN_INFO_BRENTRY (1<<5) /* Global bridge VLAN entry */
+#endif
diff --git a/src/basic/missing_if_link.h b/src/basic/missing_if_link.h
new file mode 100644
index 0000000..761797f
--- /dev/null
+++ b/src/basic/missing_if_link.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !HAVE_IFLA_INET6_ADDR_GEN_MODE /* linux@bc91b0f07ada5535427373a4e2050877bcc12218 (3.17) */
+#define IFLA_INET6_ADDR_GEN_MODE 8
+
+#undef IFLA_INET6_MAX
+#define IFLA_INET6_MAX 8
+
+enum in6_addr_gen_mode {
+ IN6_ADDR_GEN_MODE_EUI64,
+ IN6_ADDR_GEN_MODE_NONE,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
+ IN6_ADDR_GEN_MODE_RANDOM,
+};
+#else
+#if !HAVE_IN6_ADDR_GEN_MODE_STABLE_PRIVACY /* linux@622c81d57b392cc9be836670eb464a4dfaa9adfe (4.1) */
+#define IN6_ADDR_GEN_MODE_STABLE_PRIVACY 2
+#endif
+#if !HAVE_IN6_ADDR_GEN_MODE_RANDOM /* linux@cc9da6cc4f56e05cc9e591459fe0192727ff58b3 (4.5) */
+#define IN6_ADDR_GEN_MODE_RANDOM 3
+#endif
+#endif /* !HAVE_IFLA_INET6_ADDR_GEN_MODE */
+
+#if !HAVE_IFLA_IPVLAN_MODE /* linux@2ad7bf3638411cb547f2823df08166c13ab04269 (3.19) */
+enum {
+ IFLA_IPVLAN_UNSPEC,
+ IFLA_IPVLAN_MODE,
+ IFLA_IPVLAN_FLAGS,
+ __IFLA_IPVLAN_MAX
+};
+#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1)
+enum ipvlan_mode {
+ IPVLAN_MODE_L2 = 0,
+ IPVLAN_MODE_L3,
+ IPVLAN_MODE_L3S,
+ IPVLAN_MODE_MAX
+};
+#else
+#if !HAVE_IPVLAN_MODE_L3S /* linux@4fbae7d83c98c30efcf0a2a2ac55fbb75ef5a1a5 (4.9) */
+#define IPVLAN_MODE_L3S 2
+#define IPVLAN_MODE_MAX 3
+#endif
+#if !HAVE_IFLA_IPVLAN_FLAGS /* linux@a190d04db93710ae166749055b6985397c6d13f5 (4.15) */
+#define IFLA_IPVLAN_FLAGS 2
+
+#undef IFLA_IPVLAN_MAX
+#define IFLA_IPVLAN_MAX 2
+#endif
+#endif /* !HAVE_IFLA_IPVLAN_MODE */
+
+/* linux@a190d04db93710ae166749055b6985397c6d13f5 (4.15) */
+#ifndef IPVLAN_F_PRIVATE
+#define IPVLAN_F_PRIVATE 0x01
+#endif
+
+/* linux@fe89aa6b250c1011ccf425fbb7998e96bd54263f (4.15) */
+#ifndef IPVLAN_F_VEPA
+#define IPVLAN_F_VEPA 0x02
+#endif
+
+#if !HAVE_IFLA_PHYS_PORT_ID /* linux@66cae9ed6bc46b8cc57a9693f99f69926f3cc7ef (3.12) */
+#define IFLA_PHYS_PORT_ID 34
+#endif
+#if !HAVE_IFLA_CARRIER_CHANGES /* linux@2d3b479df41a10e2f41f9259fcba775bd34de6e4 (3.15) */
+#define IFLA_CARRIER_CHANGES 35
+#endif
+#if !HAVE_IFLA_PHYS_SWITCH_ID /* linux@82f2841291cfaf4d225aa1766424280254d3e3b2 (3.19) */
+#define IFLA_PHYS_SWITCH_ID 36
+#endif
+#if !HAVE_IFLA_LINK_NETNSID /* linux@d37512a277dfb2cef8a578e25a3246f61399a55a (4.0) */
+#define IFLA_LINK_NETNSID 37
+#endif
+#if !HAVE_IFLA_PHYS_PORT_NAME /* linux@db24a9044ee191c397dcd1c6574f56d67d7c8df5 (4.1) */
+#define IFLA_PHYS_PORT_NAME 38
+#endif
+#if !HAVE_IFLA_PROTO_DOWN /* linux@88d6378bd6c096cb8440face3ae3f33d55a2e6e4 (4.3) */
+#define IFLA_PROTO_DOWN 39
+#endif
+#if !HAVE_IFLA_GSO_MAX_SIZE /* linux@c70ce028e834f8e51306217dbdbd441d851c64d3 (4.6) */
+#define IFLA_GSO_MAX_SEGS 40
+#define IFLA_GSO_MAX_SIZE 41
+#endif
+#if !HAVE_IFLA_PAD /* linux@18402843bf88c2e9674e1a3a05c73b7d9b09ee05 (4.7) */
+#define IFLA_PAD 42
+#endif
+#if !HAVE_IFLA_XDP /* linux@d1fdd9138682e0f272beee0cb08b6328c5478b26 (4.8) */
+#define IFLA_XDP 43
+#endif
+#if !HAVE_IFLA_EVENT /* linux@3d3ea5af5c0b382bc9d9aed378fd814fb5d4a011 (4.13) */
+#define IFLA_EVENT 44
+#endif
+#if !HAVE_IFLA_IF_NETNSID /* linux@6621dd29eb9b5e6774ec7a9a75161352fdea47fc, 79e1ad148c844f5c8b9d76b36b26e3886dca95ae (4.15) */
+#define IFLA_IF_NETNSID 45
+#define IFLA_NEW_NETNSID 46
+#endif
+#if !HAVE_IFLA_TARGET_NETNSID /* linux@19d8f1ad12fd746e60707a58d954980013c7a35a (4.20) */
+#define IFLA_TARGET_NETNSID IFLA_IF_NETNSID
+#endif
+#if !HAVE_IFLA_NEW_IFINDEX /* linux@b2d3bcfa26a7a8de41f358a6cae8b848673b3c6e, 38e01b30563a5b5ade7b54e5d739d16a2b02fe82 (4.16) */
+#define IFLA_CARRIER_UP_COUNT 47
+#define IFLA_CARRIER_DOWN_COUNT 48
+#define IFLA_NEW_IFINDEX 49
+#endif
+#if !HAVE_IFLA_MAX_MTU /* linux@3e7a50ceb11ea75c27e944f1a01e478fd62a2d8d (4.19) */
+#define IFLA_MIN_MTU 50
+#define IFLA_MAX_MTU 51
+
+#undef IFLA_MAX
+#define IFLA_MAX 51
+#endif
+
+#if !HAVE_IFLA_BOND_MODE /* linux@90af231106c0b8d223c27d35464af95cb3d9cacf (3.13) */
+#define IFLA_BOND_MODE 1
+#endif
+#if !HAVE_IFLA_BOND_ACTIVE_SLAVE /* linux@ec76aa49855f6d6fea5e01de179fb57dd47c619d (3.13) */
+#define IFLA_BOND_ACTIVE_SLAVE 2
+#endif
+#if !HAVE_IFLA_BOND_AD_INFO /* linux@4ee7ac7526d4a9413cafa733d824edfe49fdcc46 (3.14) */
+#define IFLA_BOND_MIIMON 3
+#define IFLA_BOND_UPDELAY 4
+#define IFLA_BOND_DOWNDELAY 5
+#define IFLA_BOND_USE_CARRIER 6
+#define IFLA_BOND_ARP_INTERVAL 7
+#define IFLA_BOND_ARP_IP_TARGET 8
+#define IFLA_BOND_ARP_VALIDATE 9
+#define IFLA_BOND_ARP_ALL_TARGETS 10
+#define IFLA_BOND_PRIMARY 11
+#define IFLA_BOND_PRIMARY_RESELECT 12
+#define IFLA_BOND_FAIL_OVER_MAC 13
+#define IFLA_BOND_XMIT_HASH_POLICY 14
+#define IFLA_BOND_RESEND_IGMP 15
+#define IFLA_BOND_NUM_PEER_NOTIF 16
+#define IFLA_BOND_ALL_SLAVES_ACTIVE 17
+#define IFLA_BOND_MIN_LINKS 18
+#define IFLA_BOND_LP_INTERVAL 19
+#define IFLA_BOND_PACKETS_PER_SLAVE 20
+#define IFLA_BOND_AD_LACP_RATE 21
+#define IFLA_BOND_AD_SELECT 22
+#define IFLA_BOND_AD_INFO 23
+#endif
+#if !HAVE_IFLA_BOND_AD_ACTOR_SYSTEM /* linux@171a42c38c6e1a5a076d6276e94e55a0b5b7868c (4.2) */
+#define IFLA_BOND_AD_ACTOR_SYS_PRIO 24
+#define IFLA_BOND_AD_USER_PORT_KEY 25
+#define IFLA_BOND_AD_ACTOR_SYSTEM 26
+#endif
+#if !HAVE_IFLA_BOND_TLB_DYNAMIC_LB /* linux@0f7bffd9e512b77279bbce704fad3cb1d6887958 (4.3) */
+#define IFLA_BOND_TLB_DYNAMIC_LB 27
+
+#undef IFLA_BOND_MAX
+#define IFLA_BOND_MAX 27
+#endif
+
+#if !HAVE_IFLA_VXLAN_UDP_ZERO_CSUM6_RX /* linux@359a0ea9875ef4f32c8425bbe1ae348e1fd2ed2a (3.16) */
+#define IFLA_VXLAN_UDP_CSUM 18
+#define IFLA_VXLAN_UDP_ZERO_CSUM6_TX 19
+#define IFLA_VXLAN_UDP_ZERO_CSUM6_RX 20
+#endif
+#if !HAVE_IFLA_VXLAN_REMCSUM_NOPARTIAL /* linux@dfd8645ea1bd91277f841e74c33e1f4dbbede808..0ace2ca89cbd6bcdf2b9d2df1fa0fa24ea9d1653 (4.0) */
+#define IFLA_VXLAN_REMCSUM_TX 21
+#define IFLA_VXLAN_REMCSUM_RX 22
+#define IFLA_VXLAN_GBP 23
+#define IFLA_VXLAN_REMCSUM_NOPARTIAL 24
+#endif
+#if !HAVE_IFLA_VXLAN_COLLECT_METADATA /* linux@f8a9b1bc1b238eed9987da747a0e52f5bb009980 (4.3) */
+#define IFLA_VXLAN_COLLECT_METADATA 25
+#endif
+#if !HAVE_IFLA_VXLAN_LABEL /* linux@e7f70af111f086a20800ad2e17f544b2e3e0f375 (4.6) */
+#define IFLA_VXLAN_LABEL 26
+#endif
+#if !HAVE_IFLA_VXLAN_GPE /* linux@e1e5314de08ba6003b358125eafc9ad9e75a950c (4.7) */
+#define IFLA_VXLAN_GPE 27
+#endif
+#if !HAVE_IFLA_VXLAN_TTL_INHERIT /* linux@72f6d71e491e6ce269b564865b21fab0a4402dd3 (4.18) */
+#define IFLA_VXLAN_TTL_INHERIT 28
+
+#undef IFLA_VXLAN_MAX
+#define IFLA_VXLAN_MAX 28
+#endif
+
+#if !HAVE_IFLA_GENEVE_TOS /* linux@2d07dc79fe04a43d82a346ced6bbf07bdb523f1b..d89511251f6519599b109dc6cda87a6ab314ed8c (4.2) */
+enum {
+ IFLA_GENEVE_UNSPEC,
+ IFLA_GENEVE_ID,
+ IFLA_GENEVE_REMOTE,
+ IFLA_GENEVE_TTL,
+ IFLA_GENEVE_TOS,
+ IFLA_GENEVE_PORT, /* destination port */
+ IFLA_GENEVE_COLLECT_METADATA,
+ IFLA_GENEVE_REMOTE6,
+ IFLA_GENEVE_UDP_CSUM,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
+ IFLA_GENEVE_LABEL,
+ IFLA_GENEVE_TTL_INHERIT,
+ __IFLA_GENEVE_MAX
+};
+#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
+#else
+#if !HAVE_IFLA_GENEVE_COLLECT_METADATA /* linux@e305ac6cf5a1e1386aedce7ef9cb773635d5845c (4.3) */
+#define IFLA_GENEVE_PORT 5
+#define IFLA_GENEVE_COLLECT_METADATA 6
+#endif
+#if !HAVE_IFLA_GENEVE_REMOTE6 /* linux@8ed66f0e8235118a31720acdab3bbbe9debd0f6a (4.4) */
+#define IFLA_GENEVE_REMOTE6 7
+#endif
+#if !HAVE_IFLA_GENEVE_UDP_ZERO_CSUM6_RX /* linux@abe492b4f50c3ae2ebcfaa2f5c16176aebaa1c68 (4.5) */
+#define IFLA_GENEVE_UDP_CSUM 8
+#define IFLA_GENEVE_UDP_ZERO_CSUM6_TX 9
+#define IFLA_GENEVE_UDP_ZERO_CSUM6_RX 10
+#endif
+#if !HAVE_IFLA_GENEVE_LABEL /* linux@8eb3b99554b82da968d1fbc00df9f3156c5e2d63 (4.6) */
+#define IFLA_GENEVE_LABEL 11
+#endif
+#if !HAVE_IFLA_GENEVE_TTL_INHERIT /* linux@52d0d404d39dd9eac71a181615d6ca15e23d8e38 (4.20) */
+#define IFLA_GENEVE_TTL_INHERIT 12
+
+#undef IFLA_GENEVE_MAX
+#define IFLA_GENEVE_MAX 12
+#endif
+#endif
+
+#if !HAVE_IFLA_BR_MAX_AGE /* linux@e5c3ea5c668033b303e7ac835d7d91da32d97958 (3.18) */
+enum {
+ IFLA_BR_UNSPEC,
+ IFLA_BR_FORWARD_DELAY,
+ IFLA_BR_HELLO_TIME,
+ IFLA_BR_MAX_AGE,
+ IFLA_BR_AGEING_TIME,
+ IFLA_BR_STP_STATE,
+ IFLA_BR_PRIORITY,
+ IFLA_BR_VLAN_FILTERING,
+ IFLA_BR_VLAN_PROTOCOL,
+ IFLA_BR_GROUP_FWD_MASK,
+ IFLA_BR_ROOT_ID,
+ IFLA_BR_BRIDGE_ID,
+ IFLA_BR_ROOT_PORT,
+ IFLA_BR_ROOT_PATH_COST,
+ IFLA_BR_TOPOLOGY_CHANGE,
+ IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
+ IFLA_BR_HELLO_TIMER,
+ IFLA_BR_TCN_TIMER,
+ IFLA_BR_TOPOLOGY_CHANGE_TIMER,
+ IFLA_BR_GC_TIMER,
+ IFLA_BR_GROUP_ADDR,
+ IFLA_BR_FDB_FLUSH,
+ IFLA_BR_MCAST_ROUTER,
+ IFLA_BR_MCAST_SNOOPING,
+ IFLA_BR_MCAST_QUERY_USE_IFADDR,
+ IFLA_BR_MCAST_QUERIER,
+ IFLA_BR_MCAST_HASH_ELASTICITY,
+ IFLA_BR_MCAST_HASH_MAX,
+ IFLA_BR_MCAST_LAST_MEMBER_CNT,
+ IFLA_BR_MCAST_STARTUP_QUERY_CNT,
+ IFLA_BR_MCAST_LAST_MEMBER_INTVL,
+ IFLA_BR_MCAST_MEMBERSHIP_INTVL,
+ IFLA_BR_MCAST_QUERIER_INTVL,
+ IFLA_BR_MCAST_QUERY_INTVL,
+ IFLA_BR_MCAST_QUERY_RESPONSE_INTVL,
+ IFLA_BR_MCAST_STARTUP_QUERY_INTVL,
+ IFLA_BR_NF_CALL_IPTABLES,
+ IFLA_BR_NF_CALL_IP6TABLES,
+ IFLA_BR_NF_CALL_ARPTABLES,
+ IFLA_BR_VLAN_DEFAULT_PVID,
+ IFLA_BR_PAD,
+ IFLA_BR_VLAN_STATS_ENABLED,
+ IFLA_BR_MCAST_STATS_ENABLED,
+ IFLA_BR_MCAST_IGMP_VERSION,
+ IFLA_BR_MCAST_MLD_VERSION,
+ IFLA_BR_VLAN_STATS_PER_PORT,
+ __IFLA_BR_MAX,
+};
+
+#define IFLA_BR_MAX (__IFLA_BR_MAX - 1)
+#else
+#if !HAVE_IFLA_BR_PRIORITY /* linux@af615762e972be0c66cf1d156ca4fac13b93c0b0 (4.1) */
+#define IFLA_BR_AGEING_TIME 4
+#define IFLA_BR_STP_STATE 5
+#define IFLA_BR_PRIORITY 6
+#endif
+#if !HAVE_IFLA_BR_VLAN_PROTOCOL /* linux@a7854037da006a7472c48773e3190db55217ec9b, d2d427b3927bd7a0348fc7f323d0e291f79a2779 (4.3) */
+#define IFLA_BR_VLAN_FILTERING 7
+#define IFLA_BR_VLAN_PROTOCOL 8
+#endif
+#if !HAVE_IFLA_BR_VLAN_DEFAULT_PVID /* linux@7910228b6bb35f3c8e0bc72a8d84c29616cb1b90..0f963b7592ef9e054974b6672b86ec1edd84b4bc (4.4) */
+#define IFLA_BR_GROUP_FWD_MASK 9
+#define IFLA_BR_ROOT_ID 10
+#define IFLA_BR_BRIDGE_ID 11
+#define IFLA_BR_ROOT_PORT 12
+#define IFLA_BR_ROOT_PATH_COST 13
+#define IFLA_BR_TOPOLOGY_CHANGE 14
+#define IFLA_BR_TOPOLOGY_CHANGE_DETECTED 15
+#define IFLA_BR_HELLO_TIMER 16
+#define IFLA_BR_TCN_TIMER 17
+#define IFLA_BR_TOPOLOGY_CHANGE_TIMER 18
+#define IFLA_BR_GC_TIMER 19
+#define IFLA_BR_GROUP_ADDR 20
+#define IFLA_BR_FDB_FLUSH 21
+#define IFLA_BR_MCAST_ROUTER 22
+#define IFLA_BR_MCAST_SNOOPING 23
+#define IFLA_BR_MCAST_QUERY_USE_IFADDR 24
+#define IFLA_BR_MCAST_QUERIER 25
+#define IFLA_BR_MCAST_HASH_ELASTICITY 26
+#define IFLA_BR_MCAST_HASH_MAX 27
+#define IFLA_BR_MCAST_LAST_MEMBER_CNT 28
+#define IFLA_BR_MCAST_STARTUP_QUERY_CNT 29
+#define IFLA_BR_MCAST_LAST_MEMBER_INTVL 30
+#define IFLA_BR_MCAST_MEMBERSHIP_INTVL 31
+#define IFLA_BR_MCAST_QUERIER_INTVL 32
+#define IFLA_BR_MCAST_QUERY_INTVL 33
+#define IFLA_BR_MCAST_QUERY_RESPONSE_INTVL 34
+#define IFLA_BR_MCAST_STARTUP_QUERY_INTVL 35
+#define IFLA_BR_NF_CALL_IPTABLES 36
+#define IFLA_BR_NF_CALL_IP6TABLES 37
+#define IFLA_BR_NF_CALL_ARPTABLES 38
+#define IFLA_BR_VLAN_DEFAULT_PVID 39
+#endif
+#if !HAVE_IFLA_BR_VLAN_STATS_ENABLED /* linux@12a0faa3bd76157b9dc096758d6818ff535e4586, 6dada9b10a0818ba72c249526a742c8c41274a73 (4.7) */
+#define IFLA_BR_PAD 40
+#define IFLA_BR_VLAN_STATS_ENABLED 41
+#endif
+#if !HAVE_IFLA_BR_MCAST_STATS_ENABLED /* linux@1080ab95e3c7bdd77870e209aff83c763fdcf439 (4.8) */
+#define IFLA_BR_MCAST_STATS_ENABLED 42
+#endif
+#if !HAVE_IFLA_BR_MCAST_MLD_VERSION /* linux@5e9235853d652a295d5f56cb8652950b6b5bf56b, aa2ae3e71c74cc00ec22f133dc900b3817415785 (4.10) */
+#define IFLA_BR_MCAST_IGMP_VERSION 43
+#define IFLA_BR_MCAST_MLD_VERSION 44
+#endif
+#if !HAVE_IFLA_BR_VLAN_STATS_PER_PORT /* linux@9163a0fc1f0c0980f117cc25f4fa6ba9b0750a36 (4.20) */
+#define IFLA_BR_VLAN_STATS_PER_PORT 45
+
+#undef IFLA_BR_MAX
+#define IFLA_BR_MAX 45
+#endif
+#endif
+
+#if !HAVE_IFLA_BRPORT_LEARNING_SYNC /* linux@958501163ddd6ea22a98f94fa0e7ce6d4734e5c4, efacacdaf7cb5a0592ed772e3731636b2742e34a (3.19)*/
+#define IFLA_BRPORT_PROXYARP 10
+#define IFLA_BRPORT_LEARNING_SYNC 11
+#endif
+#if !HAVE_IFLA_BRPORT_PROXYARP_WIFI /* linux@842a9ae08a25671db3d4f689eed68b4d64be15b5 (4.1) */
+#define IFLA_BRPORT_PROXYARP_WIFI 12
+#endif
+#if !HAVE_IFLA_BRPORT_MULTICAST_ROUTER /* linux@4ebc7660ab4559cad10b6595e05f70562bb26dc5..5d6ae479ab7ddf77bb22bdf739268581453ff886 (4.4) */
+#define IFLA_BRPORT_ROOT_ID 13
+#define IFLA_BRPORT_BRIDGE_ID 14
+#define IFLA_BRPORT_DESIGNATED_PORT 15
+#define IFLA_BRPORT_DESIGNATED_COST 16
+#define IFLA_BRPORT_ID 17
+#define IFLA_BRPORT_NO 18
+#define IFLA_BRPORT_TOPOLOGY_CHANGE_ACK 19
+#define IFLA_BRPORT_CONFIG_PENDING 20
+#define IFLA_BRPORT_MESSAGE_AGE_TIMER 21
+#define IFLA_BRPORT_FORWARD_DELAY_TIMER 22
+#define IFLA_BRPORT_HOLD_TIMER 23
+#define IFLA_BRPORT_FLUSH 24
+#define IFLA_BRPORT_MULTICAST_ROUTER 25
+#endif
+#if !HAVE_IFLA_BRPORT_PAD /* linux@12a0faa3bd76157b9dc096758d6818ff535e4586 (4.7) */
+#define IFLA_BRPORT_PAD 26
+#endif
+#if !HAVE_IFLA_BRPORT_MCAST_FLOOD /* linux@b6cb5ac8331b6bcfe9ce38c7f7f58db6e1d6270a (4.9) */
+#define IFLA_BRPORT_MCAST_FLOOD 27
+#endif
+#if !HAVE_IFLA_BRPORT_VLAN_TUNNEL /* linux@6db6f0eae6052b70885562e1733896647ec1d807, b3c7ef0adadc5768e0baa786213c6bd1ce521a77 (4.11) */
+#define IFLA_BRPORT_MCAST_TO_UCAST 28
+#define IFLA_BRPORT_VLAN_TUNNEL 29
+#endif
+#if !HAVE_IFLA_BRPORT_BCAST_FLOOD /* linux@99f906e9ad7b6e79ffeda30f45906a8448b9d6a2 (4.12) */
+#define IFLA_BRPORT_BCAST_FLOOD 30
+#endif
+#if !HAVE_IFLA_BRPORT_NEIGH_SUPPRESS /* linux@5af48b59f35cf712793badabe1a574a0d0ce3bd3, 821f1b21cabb46827ce39ddf82e2789680b5042a (4.15) */
+#define IFLA_BRPORT_GROUP_FWD_MASK 31
+#define IFLA_BRPORT_NEIGH_SUPPRESS 32
+#endif
+#if !HAVE_IFLA_BRPORT_ISOLATED /* linux@7d850abd5f4edb1b1ca4b4141a4453305736f564 (4.18) */
+#define IFLA_BRPORT_ISOLATED 33
+#endif
+#if !HAVE_IFLA_BRPORT_BACKUP_PORT /* linux@2756f68c314917d03eb348084edb08bb929139d9 (4.19) */
+#define IFLA_BRPORT_BACKUP_PORT 34
+
+#undef IFLA_BRPORT_MAX
+#define IFLA_BRPORT_MAX 34
+#endif
+
+#if !HAVE_IFLA_VRF_TABLE /* linux@4e3c89920cd3a6cfce22c6f537690747c26128dd (4.3) */
+enum {
+ IFLA_VRF_UNSPEC,
+ IFLA_VRF_TABLE,
+ __IFLA_VRF_MAX
+};
+#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1)
+#endif
diff --git a/src/basic/missing_if_tunnel.h b/src/basic/missing_if_tunnel.h
new file mode 100644
index 0000000..f51fdd1
--- /dev/null
+++ b/src/basic/missing_if_tunnel.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !HAVE_IFLA_VTI_FWMARK /* linux@0a473b82cb23e7a35c4be6e9765c8487a65e8f55 (4.12) */
+#define IFLA_VTI_FWMARK 6
+
+#undef IFLA_VTI_MAX
+#define IFLA_VTI_MAX 6
+#endif
+
+#if !HAVE_IFLA_IPTUN_ENCAP_DPORT /* linux@56328486539ddd07cbaafec7a542a2c8a3043623 (3.18)*/
+#define IFLA_IPTUN_ENCAP_TYPE 15
+#define IFLA_IPTUN_ENCAP_FLAGS 16
+#define IFLA_IPTUN_ENCAP_SPORT 17
+#define IFLA_IPTUN_ENCAP_DPORT 18
+#endif
+
+#if !HAVE_IFLA_IPTUN_COLLECT_METADATA /* linux@cfc7381b3002756b1dcada32979e942aa3126e31 (4.9) */
+#define IFLA_IPTUN_COLLECT_METADATA 19
+#endif
+
+#if !HAVE_IFLA_IPTUN_FWMARK /* linux@0a473b82cb23e7a35c4be6e9765c8487a65e8f55 (4.12) */
+#define IFLA_IPTUN_FWMARK 20
+
+#undef IFLA_IPTUN_MAX
+#define IFLA_IPTUN_MAX 20
+#endif
+
+#if !HAVE_IFLA_GRE_ENCAP_DPORT /* linux@4565e9919cda747815547e2e5d7b78f15efbffdf (3.18) */
+#define IFLA_GRE_ENCAP_TYPE 14
+#define IFLA_GRE_ENCAP_FLAGS 15
+#define IFLA_GRE_ENCAP_SPORT 16
+#define IFLA_GRE_ENCAP_DPORT 17
+#endif
+
+#if !HAVE_IFLA_GRE_COLLECT_METADATA /* linux@2e15ea390e6f4466655066d97e22ec66870a042c (4.3) */
+#define IFLA_GRE_COLLECT_METADATA 18
+#endif
+
+#if !HAVE_IFLA_GRE_IGNORE_DF /* linux@22a59be8b7693eb2d0897a9638f5991f2f8e4ddd (4.8) */
+#define IFLA_GRE_IGNORE_DF 19
+#endif
+
+#if !HAVE_IFLA_GRE_FWMARK /* linux@0a473b82cb23e7a35c4be6e9765c8487a65e8f55 (4.12) */
+#define IFLA_GRE_FWMARK 20
+#endif
+
+#if !HAVE_IFLA_GRE_ERSPAN_INDEX /* linux@84e54fe0a5eaed696dee4019c396f8396f5a908b (4.14) */
+#define IFLA_GRE_ERSPAN_INDEX 21
+#endif
+
+#if !HAVE_IFLA_GRE_ERSPAN_HWID /* linux@f551c91de262ba36b20c3ac19538afb4f4507441 (4.16) */
+#define IFLA_GRE_ERSPAN_VER 22
+#define IFLA_GRE_ERSPAN_DIR 23
+#define IFLA_GRE_ERSPAN_HWID 24
+
+#undef IFLA_GRE_MAX
+#define IFLA_GRE_MAX 24
+#endif
diff --git a/src/basic/missing_input.h b/src/basic/missing_input.h
new file mode 100644
index 0000000..b91ccb6
--- /dev/null
+++ b/src/basic/missing_input.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/input.h>
+#include <linux/types.h>
+
+/* linux@c7dc65737c9a607d3e6f8478659876074ad129b8 (3.12) */
+#ifndef EVIOCREVOKE
+#define EVIOCREVOKE _IOW('E', 0x91, int)
+#endif
+
+/* linux@06a16293f71927f756dcf37558a79c0b05a91641 (4.4) */
+#ifndef EVIOCSMASK
+struct input_mask {
+ __u32 type;
+ __u32 codes_size;
+ __u64 codes_ptr;
+};
+
+#define EVIOCGMASK _IOR('E', 0x92, struct input_mask)
+#define EVIOCSMASK _IOW('E', 0x93, struct input_mask)
+#endif
+
+/* linux@7611392fe8ff95ecae528b01a815ae3d72ca6b95 (3.17) */
+#ifndef INPUT_PROP_POINTING_STICK
+#define INPUT_PROP_POINTING_STICK 0x05
+#endif
+
+/* linux@500d4160abe9a2e88b12e319c13ae3ebd1e18108 (4.0) */
+#ifndef INPUT_PROP_ACCELEROMETER
+#define INPUT_PROP_ACCELEROMETER 0x06
+#endif
+
+/* linux@d09bbfd2a8408a995419dff0d2ba906013cf4cc9 (3.11) */
+#ifndef BTN_DPAD_UP
+#define BTN_DPAD_UP 0x220
+#define BTN_DPAD_DOWN 0x221
+#define BTN_DPAD_LEFT 0x222
+#define BTN_DPAD_RIGHT 0x223
+#endif
+
+/* linux@358f24704f2f016af7d504b357cdf32606091d07 (3.13) */
+#ifndef KEY_ALS_TOGGLE
+#define KEY_ALS_TOGGLE 0x230
+#endif
diff --git a/src/basic/missing_keyctl.h b/src/basic/missing_keyctl.h
new file mode 100644
index 0000000..7eb7095
--- /dev/null
+++ b/src/basic/missing_keyctl.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/keyctl.h>
+
+#ifndef KEYCTL_JOIN_SESSION_KEYRING
+#define KEYCTL_JOIN_SESSION_KEYRING 1
+#endif
+
+#ifndef KEYCTL_CHOWN
+#define KEYCTL_CHOWN 4
+#endif
+
+#ifndef KEYCTL_SETPERM
+#define KEYCTL_SETPERM 5
+#endif
+
+#ifndef KEYCTL_DESCRIBE
+#define KEYCTL_DESCRIBE 6
+#endif
+
+#ifndef KEYCTL_LINK
+#define KEYCTL_LINK 8
+#endif
+
+#ifndef KEYCTL_READ
+#define KEYCTL_READ 11
+#endif
+
+#ifndef KEYCTL_SET_TIMEOUT
+#define KEYCTL_SET_TIMEOUT 15
+#endif
+
+#ifndef KEY_SPEC_USER_KEYRING
+#define KEY_SPEC_USER_KEYRING -4
+#endif
+
+#ifndef KEY_SPEC_SESSION_KEYRING
+#define KEY_SPEC_SESSION_KEYRING -3
+#endif
+
+/* From linux/key.h */
+#ifndef KEY_POS_VIEW
+
+typedef int32_t key_serial_t;
+
+#define KEY_POS_VIEW 0x01000000
+#define KEY_POS_READ 0x02000000
+#define KEY_POS_WRITE 0x04000000
+#define KEY_POS_SEARCH 0x08000000
+#define KEY_POS_LINK 0x10000000
+#define KEY_POS_SETATTR 0x20000000
+#define KEY_POS_ALL 0x3f000000
+
+#define KEY_USR_VIEW 0x00010000
+#define KEY_USR_READ 0x00020000
+#define KEY_USR_WRITE 0x00040000
+#define KEY_USR_SEARCH 0x00080000
+#define KEY_USR_LINK 0x00100000
+#define KEY_USR_SETATTR 0x00200000
+#define KEY_USR_ALL 0x003f0000
+
+#define KEY_GRP_VIEW 0x00000100
+#define KEY_GRP_READ 0x00000200
+#define KEY_GRP_WRITE 0x00000400
+#define KEY_GRP_SEARCH 0x00000800
+#define KEY_GRP_LINK 0x00001000
+#define KEY_GRP_SETATTR 0x00002000
+#define KEY_GRP_ALL 0x00003f00
+
+#define KEY_OTH_VIEW 0x00000001
+#define KEY_OTH_READ 0x00000002
+#define KEY_OTH_WRITE 0x00000004
+#define KEY_OTH_SEARCH 0x00000008
+#define KEY_OTH_LINK 0x00000010
+#define KEY_OTH_SETATTR 0x00000020
+#define KEY_OTH_ALL 0x0000003f
+#endif
diff --git a/src/basic/missing_magic.h b/src/basic/missing_magic.h
new file mode 100644
index 0000000..4910cd3
--- /dev/null
+++ b/src/basic/missing_magic.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/magic.h>
+
+/* 62aa81d7c4c24b90fdb61da70ac0dbbc414f9939 (4.13) */
+#ifndef OCFS2_SUPER_MAGIC
+#define OCFS2_SUPER_MAGIC 0x7461636f
+#endif
+
+/* 67e9c74b8a873408c27ac9a8e4c1d1c8d72c93ff (4.5) */
+#ifndef CGROUP2_SUPER_MAGIC
+#define CGROUP2_SUPER_MAGIC 0x63677270
+#endif
+
+/* 4282d60689d4f21b40692029080440cc58e8a17d (4.1) */
+#ifndef TRACEFS_MAGIC
+#define TRACEFS_MAGIC 0x74726163
+#endif
+
+/* e149ed2b805fefdccf7ccdfc19eca22fdd4514ac (3.19) */
+#ifndef NSFS_MAGIC
+#define NSFS_MAGIC 0x6e736673
+#endif
+
+/* b2197755b2633e164a439682fb05a9b5ea48f706 (4.4) */
+#ifndef BPF_FS_MAGIC
+#define BPF_FS_MAGIC 0xcafe4a11
+#endif
+
+/* Not exposed yet (4.20). Defined at ipc/mqueue.c */
+#ifndef MQUEUE_MAGIC
+#define MQUEUE_MAGIC 0x19800202
+#endif
diff --git a/src/basic/missing_mman.h b/src/basic/missing_mman.h
new file mode 100644
index 0000000..7ff12f7
--- /dev/null
+++ b/src/basic/missing_mman.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/mman.h>
+
+#ifndef MFD_ALLOW_SEALING
+#define MFD_ALLOW_SEALING 0x0002U
+#endif
+
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
diff --git a/src/basic/missing_network.h b/src/basic/missing_network.h
new file mode 100644
index 0000000..59a8cd2
--- /dev/null
+++ b/src/basic/missing_network.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/loop.h>
+#include <linux/rtnetlink.h>
+#include <net/ethernet.h>
+
+#include "missing_ethtool.h"
+#include "missing_fib_rules.h"
+#include "missing_fou.h"
+#include "missing_if_bridge.h"
+#include "missing_if_link.h"
+#include "missing_if_tunnel.h"
+#include "missing_vxcan.h"
+
+/* if.h */
+/* The following two defines are actually available in the kernel headers for longer, but we define them here anyway,
+ * since that makes it easier to use them in conjunction with the glibc net/if.h header which conflicts with
+ * linux/if.h. */
+#ifndef IF_OPER_UNKNOWN
+#define IF_OPER_UNKNOWN 0
+#endif
+
+#ifndef IF_OPER_UP
+#define IF_OPER_UP 6
+#endif
+
+#ifndef IFF_LOWER_UP
+#define IFF_LOWER_UP 0x10000
+#endif
+
+#ifndef IFF_DORMANT
+#define IFF_DORMANT 0x20000
+#endif
+
+/* if_addr.h */
+#if !HAVE_IFA_FLAGS
+#define IFA_FLAGS 8
+#endif
+
+#ifndef IFA_F_MANAGETEMPADDR
+#define IFA_F_MANAGETEMPADDR 0x100
+#endif
+
+#ifndef IFA_F_NOPREFIXROUTE
+#define IFA_F_NOPREFIXROUTE 0x200
+#endif
+
+#ifndef IFA_F_MCAUTOJOIN
+#define IFA_F_MCAUTOJOIN 0x400
+#endif
+
+/* if_arp.h */
+#ifndef ARPHRD_IP6GRE
+#define ARPHRD_IP6GRE 823
+#endif
+
+/* if_bonding.h */
+#ifndef BOND_XMIT_POLICY_ENCAP23
+#define BOND_XMIT_POLICY_ENCAP23 3
+#endif
+
+#ifndef BOND_XMIT_POLICY_ENCAP34
+#define BOND_XMIT_POLICY_ENCAP34 4
+#endif
+
+/* if_tun.h */
+#ifndef IFF_MULTI_QUEUE
+#define IFF_MULTI_QUEUE 0x100
+#endif
+
+/* in6.h */
+#ifndef IPV6_UNICAST_IF
+#define IPV6_UNICAST_IF 76
+#endif
+
+/* ip.h */
+#ifndef IPV4_MIN_MTU
+#define IPV4_MIN_MTU 68
+#endif
+
+/* ipv6.h */
+#ifndef IPV6_MIN_MTU
+#define IPV6_MIN_MTU 1280
+#endif
+
+/* loop.h */
+#if !HAVE_LO_FLAGS_PARTSCAN
+#define LO_FLAGS_PARTSCAN 8
+#endif
+
+#ifndef LOOP_CTL_REMOVE
+#define LOOP_CTL_REMOVE 0x4C81
+#endif
+
+#ifndef LOOP_CTL_GET_FREE
+#define LOOP_CTL_GET_FREE 0x4C82
+#endif
+
+/* netdevice.h */
+#ifndef NET_ADDR_RANDOM
+#define NET_ADDR_RANDOM 1
+#endif
+
+#ifndef NET_NAME_UNKNOWN
+#define NET_NAME_UNKNOWN 0
+#endif
+
+#ifndef NET_NAME_ENUM
+#define NET_NAME_ENUM 1
+#endif
+
+#ifndef NET_NAME_PREDICTABLE
+#define NET_NAME_PREDICTABLE 2
+#endif
+
+#ifndef NET_NAME_USER
+#define NET_NAME_USER 3
+#endif
+
+#ifndef NET_NAME_RENAMED
+#define NET_NAME_RENAMED 4
+#endif
+
+/* netlink.h */
+#ifndef NETLINK_LIST_MEMBERSHIPS /* b42be38b2778eda2237fc759e55e3b698b05b315 (4.2) */
+#define NETLINK_LIST_MEMBERSHIPS 9
+#endif
+
+/* rtnetlink.h */
+#ifndef RTA_PREF
+#define RTA_PREF 20
+#endif
+
+#ifndef RTAX_QUICKACK
+#define RTAX_QUICKACK 15
+#endif
+
+#ifndef RTA_EXPIRES
+#define RTA_EXPIRES 23
+#endif
+
+/* Note that LOOPBACK_IFINDEX is currently not exported by the
+ * kernel/glibc, but hardcoded internally by the kernel. However, as
+ * it is exported to userspace indirectly via rtnetlink and the
+ * ioctls, and made use of widely we define it here too, in a way that
+ * is compatible with the kernel's internal definition. */
+#ifndef LOOPBACK_IFINDEX
+#define LOOPBACK_IFINDEX 1
+#endif
+
+/* Not exposed yet. Similar values are defined in net/ethernet.h */
+#ifndef ETHERTYPE_LLDP
+#define ETHERTYPE_LLDP 0x88cc
+#endif
diff --git a/src/basic/missing_prctl.h b/src/basic/missing_prctl.h
new file mode 100644
index 0000000..f80cd17
--- /dev/null
+++ b/src/basic/missing_prctl.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/prctl.h>
+
+/* 58319057b7847667f0c9585b9de0e8932b0fdb08 (4.3) */
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT 47
+
+#define PR_CAP_AMBIENT_IS_SET 1
+#define PR_CAP_AMBIENT_RAISE 2
+#define PR_CAP_AMBIENT_LOWER 3
+#define PR_CAP_AMBIENT_CLEAR_ALL 4
+#endif
diff --git a/src/basic/missing_random.h b/src/basic/missing_random.h
new file mode 100644
index 0000000..2e76031
--- /dev/null
+++ b/src/basic/missing_random.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if USE_SYS_RANDOM_H
+# include <sys/random.h>
+#else
+# include <linux/random.h>
+#endif
+
+#ifndef GRND_NONBLOCK
+#define GRND_NONBLOCK 0x0001
+#endif
+
+#ifndef GRND_RANDOM
+#define GRND_RANDOM 0x0002
+#endif
diff --git a/src/basic/missing_resource.h b/src/basic/missing_resource.h
new file mode 100644
index 0000000..22ba8ab
--- /dev/null
+++ b/src/basic/missing_resource.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/resource.h>
+
+#ifndef RLIMIT_RTTIME
+#define RLIMIT_RTTIME 15
+#endif
+
+/* If RLIMIT_RTTIME is not defined, then we cannot use RLIMIT_NLIMITS as is */
+#define _RLIMIT_MAX (RLIMIT_RTTIME+1 > RLIMIT_NLIMITS ? RLIMIT_RTTIME+1 : RLIMIT_NLIMITS)
diff --git a/src/basic/missing_sched.h b/src/basic/missing_sched.h
new file mode 100644
index 0000000..baa3913
--- /dev/null
+++ b/src/basic/missing_sched.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sched.h>
+
+#ifndef CLONE_NEWCGROUP
+#define CLONE_NEWCGROUP 0x02000000
+#endif
+
+/* Not exposed yet. Defined at include/linux/sched.h */
+#ifndef PF_KTHREAD
+#define PF_KTHREAD 0x00200000
+#endif
+
+/* The maximum thread/process name length including trailing NUL byte. This mimics the kernel definition of the same
+ * name, which we need in userspace at various places but is not defined in userspace currently, neither under this
+ * name nor any other. */
+/* Not exposed yet. Defined at include/linux/sched.h */
+#ifndef TASK_COMM_LEN
+#define TASK_COMM_LEN 16
+#endif
diff --git a/src/basic/missing_securebits.h b/src/basic/missing_securebits.h
new file mode 100644
index 0000000..40d6ec9
--- /dev/null
+++ b/src/basic/missing_securebits.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <linux/securebits.h>
+
+/* 746bf6d64275be0c65b0631d8a72b16f1454cfa1 (4.3) */
+#ifndef SECURE_NO_CAP_AMBIENT_RAISE
+#define SECURE_NO_CAP_AMBIENT_RAISE 6
+#define SECURE_NO_CAP_AMBIENT_RAISE_LOCKED 7 /* make bit-6 immutable */
+#define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
+#define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED))
+
+#undef SECURE_ALL_BITS
+#define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \
+ issecure_mask(SECURE_NO_SETUID_FIXUP) | \
+ issecure_mask(SECURE_KEEP_CAPS) | \
+ issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
+#endif
diff --git a/src/basic/missing_socket.h b/src/basic/missing_socket.h
new file mode 100644
index 0000000..a5fd457
--- /dev/null
+++ b/src/basic/missing_socket.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/socket.h>
+
+#if HAVE_LINUX_VM_SOCKETS_H
+#include <linux/vm_sockets.h>
+#else
+#define VMADDR_CID_ANY -1U
+struct sockaddr_vm {
+ unsigned short svm_family;
+ unsigned short svm_reserved1;
+ unsigned int svm_port;
+ unsigned int svm_cid;
+ unsigned char svm_zero[sizeof(struct sockaddr) -
+ sizeof(unsigned short) -
+ sizeof(unsigned short) -
+ sizeof(unsigned int) -
+ sizeof(unsigned int)];
+};
+#endif /* !HAVE_LINUX_VM_SOCKETS_H */
+
+#ifndef AF_VSOCK
+#define AF_VSOCK 40
+#endif
+
+#ifndef SO_REUSEPORT
+#define SO_REUSEPORT 15
+#endif
+
+#ifndef SO_PEERGROUPS
+#define SO_PEERGROUPS 59
+#endif
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+#ifndef SOL_ALG
+#define SOL_ALG 279
+#endif
+
+/* Not exposed yet. Defined in include/linux/socket.h. */
+#ifndef SOL_SCTP
+#define SOL_SCTP 132
+#endif
+
+/* Not exposed yet. Defined in include/linux/socket.h */
+#ifndef SCM_SECURITY
+#define SCM_SECURITY 0x03
+#endif
+
+/* netinet/in.h */
+#ifndef IP_FREEBIND
+#define IP_FREEBIND 15
+#endif
+
+#ifndef IP_TRANSPARENT
+#define IP_TRANSPARENT 19
+#endif
diff --git a/src/basic/missing_stat.h b/src/basic/missing_stat.h
new file mode 100644
index 0000000..5116206
--- /dev/null
+++ b/src/basic/missing_stat.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/types.h>
+#include <sys/stat.h>
+
+#if WANT_LINUX_STAT_H
+#include <linux/stat.h>
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#if !HAVE_STRUCT_STATX
+struct statx_timestamp {
+ __s64 tv_sec;
+ __u32 tv_nsec;
+ __s32 __reserved;
+};
+struct statx {
+ __u32 stx_mask;
+ __u32 stx_blksize;
+ __u64 stx_attributes;
+ __u32 stx_nlink;
+ __u32 stx_uid;
+ __u32 stx_gid;
+ __u16 stx_mode;
+ __u16 __spare0[1];
+ __u64 stx_ino;
+ __u64 stx_size;
+ __u64 stx_blocks;
+ __u64 stx_attributes_mask;
+ struct statx_timestamp stx_atime;
+ struct statx_timestamp stx_btime;
+ struct statx_timestamp stx_ctime;
+ struct statx_timestamp stx_mtime;
+ __u32 stx_rdev_major;
+ __u32 stx_rdev_minor;
+ __u32 stx_dev_major;
+ __u32 stx_dev_minor;
+ __u64 __spare2[14];
+};
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_BTIME
+#define STATX_BTIME 0x00000800U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef AT_STATX_DONT_SYNC
+#define AT_STATX_DONT_SYNC 0x4000
+#endif
diff --git a/src/basic/missing_stdlib.h b/src/basic/missing_stdlib.h
new file mode 100644
index 0000000..188a8d4
--- /dev/null
+++ b/src/basic/missing_stdlib.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdlib.h>
+
+/* stdlib.h */
+#if !HAVE_SECURE_GETENV
+# if HAVE___SECURE_GETENV
+# define secure_getenv __secure_getenv
+# else
+# error "neither secure_getenv nor __secure_getenv are available"
+# endif
+#endif
diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h
new file mode 100644
index 0000000..d1aa322
--- /dev/null
+++ b/src/basic/missing_syscall.h
@@ -0,0 +1,446 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* Missing glibc definitions to access certain kernel APIs */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifdef ARCH_MIPS
+#include <asm/sgidefs.h>
+#endif
+
+#include "missing_keyctl.h"
+#include "missing_stat.h"
+
+/* linux/kcmp.h */
+#ifndef KCMP_FILE /* 3f4994cfc15f38a3159c6e3a4b3ab2e1481a6b02 (3.19) */
+#define KCMP_FILE 0
+#endif
+
+#if !HAVE_PIVOT_ROOT
+static inline int missing_pivot_root(const char *new_root, const char *put_old) {
+ return syscall(__NR_pivot_root, new_root, put_old);
+}
+
+# define pivot_root missing_pivot_root
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_MEMFD_CREATE
+# ifndef __NR_memfd_create
+# if defined __x86_64__
+# define __NR_memfd_create 319
+# elif defined __arm__
+# define __NR_memfd_create 385
+# elif defined __aarch64__
+# define __NR_memfd_create 279
+# elif defined __s390__
+# define __NR_memfd_create 350
+# elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define __NR_memfd_create 4354
+# endif
+# if _MIPS_SIM == _MIPS_SIM_NABI32
+# define __NR_memfd_create 6318
+# endif
+# if _MIPS_SIM == _MIPS_SIM_ABI64
+# define __NR_memfd_create 5314
+# endif
+# elif defined __i386__
+# define __NR_memfd_create 356
+# elif defined __arc__
+# define __NR_memfd_create 279
+# else
+# warning "__NR_memfd_create unknown for your architecture"
+# endif
+# endif
+
+static inline int missing_memfd_create(const char *name, unsigned int flags) {
+# ifdef __NR_memfd_create
+ return syscall(__NR_memfd_create, name, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define memfd_create missing_memfd_create
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_GETRANDOM
+# ifndef __NR_getrandom
+# if defined __x86_64__
+# define __NR_getrandom 318
+# elif defined(__i386__)
+# define __NR_getrandom 355
+# elif defined(__arm__)
+# define __NR_getrandom 384
+# elif defined(__aarch64__)
+# define __NR_getrandom 278
+# elif defined(__ia64__)
+# define __NR_getrandom 1339
+# elif defined(__m68k__)
+# define __NR_getrandom 352
+# elif defined(__s390x__)
+# define __NR_getrandom 349
+# elif defined(__powerpc__)
+# define __NR_getrandom 359
+# elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define __NR_getrandom 4353
+# endif
+# if _MIPS_SIM == _MIPS_SIM_NABI32
+# define __NR_getrandom 6317
+# endif
+# if _MIPS_SIM == _MIPS_SIM_ABI64
+# define __NR_getrandom 5313
+# endif
+# elif defined(__arc__)
+# define __NR_getrandom 278
+# else
+# warning "__NR_getrandom unknown for your architecture"
+# endif
+# endif
+
+static inline int missing_getrandom(void *buffer, size_t count, unsigned flags) {
+# ifdef __NR_getrandom
+ return syscall(__NR_getrandom, buffer, count, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define getrandom missing_getrandom
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_GETTID
+static inline pid_t missing_gettid(void) {
+ return (pid_t) syscall(__NR_gettid);
+}
+
+# define gettid missing_gettid
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_NAME_TO_HANDLE_AT
+# ifndef __NR_name_to_handle_at
+# if defined(__x86_64__)
+# define __NR_name_to_handle_at 303
+# elif defined(__i386__)
+# define __NR_name_to_handle_at 341
+# elif defined(__arm__)
+# define __NR_name_to_handle_at 370
+# elif defined(__powerpc__)
+# define __NR_name_to_handle_at 345
+# elif defined(__arc__)
+# define __NR_name_to_handle_at 264
+# else
+# error "__NR_name_to_handle_at is not defined"
+# endif
+# endif
+
+struct file_handle {
+ unsigned int handle_bytes;
+ int handle_type;
+ unsigned char f_handle[0];
+};
+
+static inline int missing_name_to_handle_at(int fd, const char *name, struct file_handle *handle, int *mnt_id, int flags) {
+# ifdef __NR_name_to_handle_at
+ return syscall(__NR_name_to_handle_at, fd, name, handle, mnt_id, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define name_to_handle_at missing_name_to_handle_at
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_SETNS
+# ifndef __NR_setns
+# if defined(__x86_64__)
+# define __NR_setns 308
+# elif defined(__i386__)
+# define __NR_setns 346
+# elif defined(__arc__)
+# define __NR_setns 268
+# else
+# error "__NR_setns is not defined"
+# endif
+# endif
+
+static inline int missing_setns(int fd, int nstype) {
+# ifdef __NR_setns
+ return syscall(__NR_setns, fd, nstype);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define setns missing_setns
+#endif
+
+/* ======================================================================= */
+
+static inline pid_t raw_getpid(void) {
+#if defined(__alpha__)
+ return (pid_t) syscall(__NR_getxpid);
+#else
+ return (pid_t) syscall(__NR_getpid);
+#endif
+}
+
+/* ======================================================================= */
+
+#if !HAVE_RENAMEAT2
+# ifndef __NR_renameat2
+# if defined __x86_64__
+# define __NR_renameat2 316
+# elif defined __arm__
+# define __NR_renameat2 382
+# elif defined __aarch64__
+# define __NR_renameat2 276
+# elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define __NR_renameat2 4351
+# endif
+# if _MIPS_SIM == _MIPS_SIM_NABI32
+# define __NR_renameat2 6315
+# endif
+# if _MIPS_SIM == _MIPS_SIM_ABI64
+# define __NR_renameat2 5311
+# endif
+# elif defined __i386__
+# define __NR_renameat2 353
+# elif defined __powerpc64__
+# define __NR_renameat2 357
+# elif defined __s390__ || defined __s390x__
+# define __NR_renameat2 347
+# elif defined __arc__
+# define __NR_renameat2 276
+# else
+# warning "__NR_renameat2 unknown for your architecture"
+# endif
+# endif
+
+static inline int missing_renameat2(int oldfd, const char *oldname, int newfd, const char *newname, unsigned flags) {
+# ifdef __NR_renameat2
+ return syscall(__NR_renameat2, oldfd, oldname, newfd, newname, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define renameat2 missing_renameat2
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_KCMP
+static inline int missing_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) {
+# ifdef __NR_kcmp
+ return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define kcmp missing_kcmp
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_KEYCTL
+static inline long missing_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) {
+# ifdef __NR_keyctl
+ return syscall(__NR_keyctl, cmd, arg2, arg3, arg4, arg5);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+
+# define keyctl missing_keyctl
+}
+
+static inline key_serial_t missing_add_key(const char *type, const char *description, const void *payload, size_t plen, key_serial_t ringid) {
+# ifdef __NR_add_key
+ return syscall(__NR_add_key, type, description, payload, plen, ringid);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+
+# define add_key missing_add_key
+}
+
+static inline key_serial_t missing_request_key(const char *type, const char *description, const char * callout_info, key_serial_t destringid) {
+# ifdef __NR_request_key
+ return syscall(__NR_request_key, type, description, callout_info, destringid);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+
+# define request_key missing_request_key
+}
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_COPY_FILE_RANGE
+# ifndef __NR_copy_file_range
+# if defined(__x86_64__)
+# define __NR_copy_file_range 326
+# elif defined(__i386__)
+# define __NR_copy_file_range 377
+# elif defined __s390__
+# define __NR_copy_file_range 375
+# elif defined __arm__
+# define __NR_copy_file_range 391
+# elif defined __aarch64__
+# define __NR_copy_file_range 285
+# elif defined __powerpc__
+# define __NR_copy_file_range 379
+# elif defined __arc__
+# define __NR_copy_file_range 285
+# else
+# warning "__NR_copy_file_range not defined for your architecture"
+# endif
+# endif
+
+static inline ssize_t missing_copy_file_range(int fd_in, loff_t *off_in,
+ int fd_out, loff_t *off_out,
+ size_t len,
+ unsigned int flags) {
+# ifdef __NR_copy_file_range
+ return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define copy_file_range missing_copy_file_range
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_BPF
+# ifndef __NR_bpf
+# if defined __i386__
+# define __NR_bpf 357
+# elif defined __x86_64__
+# define __NR_bpf 321
+# elif defined __aarch64__
+# define __NR_bpf 280
+# elif defined __arm__
+# define __NR_bpf 386
+# elif defined __sparc__
+# define __NR_bpf 349
+# elif defined __s390__
+# define __NR_bpf 351
+# elif defined __tilegx__
+# define __NR_bpf 280
+# else
+# warning "__NR_bpf not defined for your architecture"
+# endif
+# endif
+
+union bpf_attr;
+
+static inline int missing_bpf(int cmd, union bpf_attr *attr, size_t size) {
+#ifdef __NR_bpf
+ return (int) syscall(__NR_bpf, cmd, attr, size);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+# define bpf missing_bpf
+#endif
+
+/* ======================================================================= */
+
+#ifndef __IGNORE_pkey_mprotect
+# ifndef __NR_pkey_mprotect
+# if defined __i386__
+# define __NR_pkey_mprotect 380
+# elif defined __x86_64__
+# define __NR_pkey_mprotect 329
+# elif defined __arm__
+# define __NR_pkey_mprotect 394
+# elif defined __aarch64__
+# define __NR_pkey_mprotect 394
+# elif defined __powerpc__
+# define __NR_pkey_mprotect 386
+# elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define __NR_pkey_mprotect 4363
+# endif
+# if _MIPS_SIM == _MIPS_SIM_NABI32
+# define __NR_pkey_mprotect 6327
+# endif
+# if _MIPS_SIM == _MIPS_SIM_ABI64
+# define __NR_pkey_mprotect 5323
+# endif
+# else
+# warning "__NR_pkey_mprotect not defined for your architecture"
+# endif
+# endif
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_STATX
+# ifndef __NR_statx
+# if defined __aarch64__ || defined __arm__
+# define __NR_statx 397
+# elif defined __alpha__
+# define __NR_statx 522
+# elif defined __i386__ || defined __powerpc64__
+# define __NR_statx 383
+# elif defined __sparc__
+# define __NR_statx 360
+# elif defined __x86_64__
+# define __NR_statx 332
+# else
+# warning "__NR_statx not defined for your architecture"
+# endif
+# endif
+
+struct statx;
+#endif
+
+/* This typedef is supposed to be always defined. */
+typedef struct statx struct_statx;
+
+#if !HAVE_STATX
+static inline ssize_t missing_statx(int dfd, const char *filename, unsigned flags, unsigned int mask, struct statx *buffer) {
+# ifdef __NR_statx
+ return syscall(__NR_statx, dfd, filename, flags, mask, buffer);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define statx missing_statx
+#endif
diff --git a/src/basic/missing_timerfd.h b/src/basic/missing_timerfd.h
new file mode 100644
index 0000000..6b04044
--- /dev/null
+++ b/src/basic/missing_timerfd.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/timerfd.h>
+
+#ifndef TFD_TIMER_CANCEL_ON_SET
+#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
+#endif
diff --git a/src/basic/missing_type.h b/src/basic/missing_type.h
new file mode 100644
index 0000000..bf8a6ca
--- /dev/null
+++ b/src/basic/missing_type.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <uchar.h>
+
+#if !HAVE_CHAR32_T
+#define char32_t uint32_t
+#endif
+
+#if !HAVE_CHAR16_T
+#define char16_t uint16_t
+#endif
diff --git a/src/basic/missing_vxcan.h b/src/basic/missing_vxcan.h
new file mode 100644
index 0000000..be430f7
--- /dev/null
+++ b/src/basic/missing_vxcan.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !HAVE_LINUX_CAN_VXCAN_H /* linux@a8f820a380a2a06fc4fe1a54159067958f800929 (4.12) */
+enum {
+ VXCAN_INFO_UNSPEC,
+ VXCAN_INFO_PEER,
+
+ __VXCAN_INFO_MAX
+#define VXCAN_INFO_MAX (__VXCAN_INFO_MAX - 1)
+};
+#endif
diff --git a/src/basic/mkdir-label.c b/src/basic/mkdir-label.c
new file mode 100644
index 0000000..0eba7fc
--- /dev/null
+++ b/src/basic/mkdir-label.c
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "label.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+
+int mkdir_label(const char *path, mode_t mode) {
+ int r;
+
+ assert(path);
+
+ r = mac_selinux_create_file_prepare(path, S_IFDIR);
+ if (r < 0)
+ return r;
+
+ r = mkdir_errno_wrapper(path, mode);
+ mac_selinux_create_file_clear();
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix(path, 0);
+}
+
+int mkdirat_label(int dirfd, const char *path, mode_t mode) {
+ int r;
+
+ assert(path);
+
+ r = mac_selinux_create_file_prepare_at(dirfd, path, S_IFDIR);
+ if (r < 0)
+ return r;
+
+ r = mkdirat_errno_wrapper(dirfd, path, mode);
+ mac_selinux_create_file_clear();
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix_at(dirfd, path, 0);
+}
+
+int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) {
+ return mkdir_safe_internal(path, mode, uid, gid, flags, mkdir_label);
+}
+
+int mkdir_parents_label(const char *path, mode_t mode) {
+ return mkdir_parents_internal(NULL, path, mode, mkdir_label);
+}
+
+int mkdir_p_label(const char *path, mode_t mode) {
+ return mkdir_p_internal(NULL, path, mode, mkdir_label);
+}
diff --git a/src/basic/mkdir.c b/src/basic/mkdir.c
new file mode 100644
index 0000000..4bb65d5
--- /dev/null
+++ b/src/basic/mkdir.c
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "user-util.h"
+
+int mkdir_safe_internal(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir) {
+ struct stat st;
+ int r;
+
+ assert(_mkdir != mkdir);
+
+ if (_mkdir(path, mode) >= 0) {
+ r = chmod_and_chown(path, mode, uid, gid);
+ if (r < 0)
+ return r;
+ }
+
+ if (lstat(path, &st) < 0)
+ return -errno;
+
+ if ((flags & MKDIR_FOLLOW_SYMLINK) && S_ISLNK(st.st_mode)) {
+ _cleanup_free_ char *p = NULL;
+
+ r = chase_symlinks(path, NULL, CHASE_NONEXISTENT, &p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return mkdir_safe_internal(p, mode, uid, gid,
+ flags & ~MKDIR_FOLLOW_SYMLINK,
+ _mkdir);
+
+ if (lstat(p, &st) < 0)
+ return -errno;
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ log_full(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG,
+ "Path \"%s\" already exists and is not a directory, refusing.", path);
+ return -ENOTDIR;
+ }
+ if ((st.st_mode & 0007) > (mode & 0007) ||
+ (st.st_mode & 0070) > (mode & 0070) ||
+ (st.st_mode & 0700) > (mode & 0700)) {
+ log_full(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG,
+ "Directory \"%s\" already exists, but has mode %04o that is too permissive (%04o was requested), refusing.",
+ path, st.st_mode & 0777, mode);
+ return -EEXIST;
+ }
+ if ((uid != UID_INVALID && st.st_uid != uid) ||
+ (gid != GID_INVALID && st.st_gid != gid)) {
+ char u[DECIMAL_STR_MAX(uid_t)] = "-", g[DECIMAL_STR_MAX(gid_t)] = "-";
+
+ if (uid != UID_INVALID)
+ xsprintf(u, UID_FMT, uid);
+ if (gid != UID_INVALID)
+ xsprintf(g, GID_FMT, gid);
+ log_full(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG,
+ "Directory \"%s\" already exists, but is owned by "UID_FMT":"GID_FMT" (%s:%s was requested), refusing.",
+ path, st.st_uid, st.st_gid, u, g);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+int mkdir_errno_wrapper(const char *pathname, mode_t mode) {
+ if (mkdir(pathname, mode) < 0)
+ return -errno;
+ return 0;
+}
+
+int mkdirat_errno_wrapper(int dirfd, const char *pathname, mode_t mode) {
+ if (mkdirat(dirfd, pathname, mode) < 0)
+ return -errno;
+ return 0;
+}
+
+int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) {
+ return mkdir_safe_internal(path, mode, uid, gid, flags, mkdir_errno_wrapper);
+}
+
+int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir) {
+ const char *p, *e;
+ int r;
+
+ assert(path);
+ assert(_mkdir != mkdir);
+
+ if (prefix && !path_startswith(path, prefix))
+ return -ENOTDIR;
+
+ /* return immediately if directory exists */
+ e = strrchr(path, '/');
+ if (!e)
+ return -EINVAL;
+
+ if (e == path)
+ return 0;
+
+ p = strndupa(path, e - path);
+ r = is_dir(p, true);
+ if (r > 0)
+ return 0;
+ if (r == 0)
+ return -ENOTDIR;
+
+ /* create every parent directory in the path, except the last component */
+ p = path + strspn(path, "/");
+ for (;;) {
+ char t[strlen(path) + 1];
+
+ e = p + strcspn(p, "/");
+ p = e + strspn(e, "/");
+
+ /* Is this the last component? If so, then we're done */
+ if (*p == 0)
+ return 0;
+
+ memcpy(t, path, e - path);
+ t[e-path] = 0;
+
+ if (prefix && path_startswith(prefix, t))
+ continue;
+
+ r = _mkdir(t, mode);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+}
+
+int mkdir_parents(const char *path, mode_t mode) {
+ return mkdir_parents_internal(NULL, path, mode, mkdir_errno_wrapper);
+}
+
+int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir) {
+ int r;
+
+ /* Like mkdir -p */
+
+ assert(_mkdir != mkdir);
+
+ r = mkdir_parents_internal(prefix, path, mode, _mkdir);
+ if (r < 0)
+ return r;
+
+ r = _mkdir(path, mode);
+ if (r < 0 && (r != -EEXIST || is_dir(path, true) <= 0))
+ return r;
+
+ return 0;
+}
+
+int mkdir_p(const char *path, mode_t mode) {
+ return mkdir_p_internal(NULL, path, mode, mkdir_errno_wrapper);
+}
diff --git a/src/basic/mkdir.h b/src/basic/mkdir.h
new file mode 100644
index 0000000..eb54853
--- /dev/null
+++ b/src/basic/mkdir.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+typedef enum MkdirFlags {
+ MKDIR_FOLLOW_SYMLINK = 1 << 0,
+ MKDIR_WARN_MODE = 1 << 1,
+} MkdirFlags;
+
+int mkdir_errno_wrapper(const char *pathname, mode_t mode);
+int mkdirat_errno_wrapper(int dirfd, const char *pathname, mode_t mode);
+int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
+int mkdir_parents(const char *path, mode_t mode);
+int mkdir_p(const char *path, mode_t mode);
+
+/* mandatory access control(MAC) versions */
+int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
+int mkdir_parents_label(const char *path, mode_t mode);
+int mkdir_p_label(const char *path, mode_t mode);
+
+/* internally used */
+typedef int (*mkdir_func_t)(const char *pathname, mode_t mode);
+int mkdir_safe_internal(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir);
+int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir);
+int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, mkdir_func_t _mkdir);
diff --git a/src/basic/mountpoint-util.c b/src/basic/mountpoint-util.c
new file mode 100644
index 0000000..1e946a0
--- /dev/null
+++ b/src/basic/mountpoint-util.c
@@ -0,0 +1,444 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio_ext.h>
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "missing.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+
+/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
+ * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
+ * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
+ * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
+ * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
+ * with large file handles anyway. */
+#define ORIGINAL_MAX_HANDLE_SZ 128
+
+int name_to_handle_at_loop(
+ int fd,
+ const char *path,
+ struct file_handle **ret_handle,
+ int *ret_mnt_id,
+ int flags) {
+
+ _cleanup_free_ struct file_handle *h = NULL;
+ size_t n = ORIGINAL_MAX_HANDLE_SZ;
+
+ /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
+ * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
+ * start value, it is not an upper bound on the buffer size required.
+ *
+ * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
+ * as NULL if there's no interest in either. */
+
+ for (;;) {
+ int mnt_id = -1;
+
+ h = malloc0(offsetof(struct file_handle, f_handle) + n);
+ if (!h)
+ return -ENOMEM;
+
+ h->handle_bytes = n;
+
+ if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
+
+ if (ret_handle)
+ *ret_handle = TAKE_PTR(h);
+
+ if (ret_mnt_id)
+ *ret_mnt_id = mnt_id;
+
+ return 0;
+ }
+ if (errno != EOVERFLOW)
+ return -errno;
+
+ if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
+
+ /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
+ * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
+ * be filled in, and the caller was interested in only the mount ID an nothing else. */
+
+ *ret_mnt_id = mnt_id;
+ return 0;
+ }
+
+ /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
+ * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
+ * buffer. In that case propagate EOVERFLOW */
+ if (h->handle_bytes <= n)
+ return -EOVERFLOW;
+
+ /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
+ n = h->handle_bytes;
+ if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
+ return -EOVERFLOW;
+
+ h = mfree(h);
+ }
+}
+
+static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
+ char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
+ _cleanup_free_ char *fdinfo = NULL;
+ _cleanup_close_ int subfd = -1;
+ char *p;
+ int r;
+
+ if ((flags & AT_EMPTY_PATH) && isempty(filename))
+ xsprintf(path, "/proc/self/fdinfo/%i", fd);
+ else {
+ subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
+ if (subfd < 0)
+ return -errno;
+
+ xsprintf(path, "/proc/self/fdinfo/%i", subfd);
+ }
+
+ r = read_full_file(path, &fdinfo, NULL);
+ if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
+ return -EOPNOTSUPP;
+ if (r < 0)
+ return r;
+
+ p = startswith(fdinfo, "mnt_id:");
+ if (!p) {
+ p = strstr(fdinfo, "\nmnt_id:");
+ if (!p) /* The mnt_id field is a relatively new addition */
+ return -EOPNOTSUPP;
+
+ p += 8;
+ }
+
+ p += strspn(p, WHITESPACE);
+ p[strcspn(p, WHITESPACE)] = 0;
+
+ return safe_atoi(p, mnt_id);
+}
+
+int fd_is_mount_point(int fd, const char *filename, int flags) {
+ _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
+ int mount_id = -1, mount_id_parent = -1;
+ bool nosupp = false, check_st_dev = true;
+ struct stat a, b;
+ int r;
+
+ assert(fd >= 0);
+ assert(filename);
+
+ /* First we will try the name_to_handle_at() syscall, which
+ * tells us the mount id and an opaque file "handle". It is
+ * not supported everywhere though (kernel compile-time
+ * option, not all file systems are hooked up). If it works
+ * the mount id is usually good enough to tell us whether
+ * something is a mount point.
+ *
+ * If that didn't work we will try to read the mount id from
+ * /proc/self/fdinfo/<fd>. This is almost as good as
+ * name_to_handle_at(), however, does not return the
+ * opaque file handle. The opaque file handle is pretty useful
+ * to detect the root directory, which we should always
+ * consider a mount point. Hence we use this only as
+ * fallback. Exporting the mnt_id in fdinfo is a pretty recent
+ * kernel addition.
+ *
+ * As last fallback we do traditional fstat() based st_dev
+ * comparisons. This is how things were traditionally done,
+ * but unionfs breaks this since it exposes file
+ * systems with a variety of st_dev reported. Also, btrfs
+ * subvolumes have different st_dev, even though they aren't
+ * real mounts of their own. */
+
+ r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
+ if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
+ /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
+ * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
+ * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
+ * (EINVAL): fall back to simpler logic. */
+ goto fallback_fdinfo;
+ else if (r == -EOPNOTSUPP)
+ /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
+ * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
+ * logic */
+ nosupp = true;
+ else if (r < 0)
+ return r;
+
+ r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
+ if (r == -EOPNOTSUPP) {
+ if (nosupp)
+ /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
+ goto fallback_fdinfo;
+ else
+ /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
+ * it must be a mount point. */
+ return 1;
+ } else if (r < 0)
+ return r;
+
+ /* The parent can do name_to_handle_at() but the
+ * directory we are interested in can't? If so, it
+ * must be a mount point. */
+ if (nosupp)
+ return 1;
+
+ /* If the file handle for the directory we are
+ * interested in and its parent are identical, we
+ * assume this is the root directory, which is a mount
+ * point. */
+
+ if (h->handle_bytes == h_parent->handle_bytes &&
+ h->handle_type == h_parent->handle_type &&
+ memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
+ return 1;
+
+ return mount_id != mount_id_parent;
+
+fallback_fdinfo:
+ r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
+ if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
+ goto fallback_fstat;
+ if (r < 0)
+ return r;
+
+ r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
+ if (r < 0)
+ return r;
+
+ if (mount_id != mount_id_parent)
+ return 1;
+
+ /* Hmm, so, the mount ids are the same. This leaves one
+ * special case though for the root file system. For that,
+ * let's see if the parent directory has the same inode as we
+ * are interested in. Hence, let's also do fstat() checks now,
+ * too, but avoid the st_dev comparisons, since they aren't
+ * that useful on unionfs mounts. */
+ check_st_dev = false;
+
+fallback_fstat:
+ /* yay for fstatat() taking a different set of flags than the other
+ * _at() above */
+ if (flags & AT_SYMLINK_FOLLOW)
+ flags &= ~AT_SYMLINK_FOLLOW;
+ else
+ flags |= AT_SYMLINK_NOFOLLOW;
+ if (fstatat(fd, filename, &a, flags) < 0)
+ return -errno;
+
+ if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
+ return -errno;
+
+ /* A directory with same device and inode as its parent? Must
+ * be the root directory */
+ if (a.st_dev == b.st_dev &&
+ a.st_ino == b.st_ino)
+ return 1;
+
+ return check_st_dev && (a.st_dev != b.st_dev);
+}
+
+/* flags can be AT_SYMLINK_FOLLOW or 0 */
+int path_is_mount_point(const char *t, const char *root, int flags) {
+ _cleanup_free_ char *canonical = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(t);
+ assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
+
+ if (path_equal(t, "/"))
+ return 1;
+
+ /* we need to resolve symlinks manually, we can't just rely on
+ * fd_is_mount_point() to do that for us; if we have a structure like
+ * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
+ * look at needs to be /usr, not /. */
+ if (flags & AT_SYMLINK_FOLLOW) {
+ r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
+ if (r < 0)
+ return r;
+
+ t = canonical;
+ }
+
+ fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ return fd_is_mount_point(fd, last_path_component(t), flags);
+}
+
+int path_get_mnt_id(const char *path, int *ret) {
+ int r;
+
+ r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
+ if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
+ return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
+
+ return r;
+}
+
+bool fstype_is_network(const char *fstype) {
+ const char *x;
+
+ x = startswith(fstype, "fuse.");
+ if (x)
+ fstype = x;
+
+ return STR_IN_SET(fstype,
+ "afs",
+ "cifs",
+ "smbfs",
+ "sshfs",
+ "ncpfs",
+ "ncp",
+ "nfs",
+ "nfs4",
+ "gfs",
+ "gfs2",
+ "glusterfs",
+ "pvfs2", /* OrangeFS */
+ "ocfs2",
+ "lustre");
+}
+
+bool fstype_is_api_vfs(const char *fstype) {
+ return STR_IN_SET(fstype,
+ "autofs",
+ "bpf",
+ "cgroup",
+ "cgroup2",
+ "configfs",
+ "cpuset",
+ "debugfs",
+ "devpts",
+ "devtmpfs",
+ "efivarfs",
+ "fusectl",
+ "hugetlbfs",
+ "mqueue",
+ "proc",
+ "pstore",
+ "ramfs",
+ "securityfs",
+ "sysfs",
+ "tmpfs",
+ "tracefs");
+}
+
+bool fstype_is_ro(const char *fstype) {
+ /* All Linux file systems that are necessarily read-only */
+ return STR_IN_SET(fstype,
+ "DM_verity_hash",
+ "iso9660",
+ "squashfs");
+}
+
+bool fstype_can_discard(const char *fstype) {
+ return STR_IN_SET(fstype,
+ "btrfs",
+ "ext4",
+ "vfat",
+ "xfs");
+}
+
+bool fstype_can_uid_gid(const char *fstype) {
+
+ /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
+ * current and future. */
+
+ return STR_IN_SET(fstype,
+ "adfs",
+ "fat",
+ "hfs",
+ "hpfs",
+ "iso9660",
+ "msdos",
+ "ntfs",
+ "vfat");
+}
+
+int dev_is_devtmpfs(void) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+ int mount_id, r;
+ char *e;
+
+ r = path_get_mnt_id("/dev", &mount_id);
+ if (r < 0)
+ return r;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return -errno;
+
+ (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ int mid;
+
+ r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (sscanf(line, "%i", &mid) != 1)
+ continue;
+
+ if (mid != mount_id)
+ continue;
+
+ e = strstr(line, " - ");
+ if (!e)
+ continue;
+
+ /* accept any name that starts with the currently expected type */
+ if (startswith(e + 3, "devtmpfs"))
+ return true;
+ }
+
+ return false;
+}
+
+const char *mount_propagation_flags_to_string(unsigned long flags) {
+
+ switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
+ case 0:
+ return "";
+ case MS_SHARED:
+ return "shared";
+ case MS_SLAVE:
+ return "slave";
+ case MS_PRIVATE:
+ return "private";
+ }
+
+ return NULL;
+}
+
+int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
+
+ if (isempty(name))
+ *ret = 0;
+ else if (streq(name, "shared"))
+ *ret = MS_SHARED;
+ else if (streq(name, "slave"))
+ *ret = MS_SLAVE;
+ else if (streq(name, "private"))
+ *ret = MS_PRIVATE;
+ else
+ return -EINVAL;
+ return 0;
+}
diff --git a/src/basic/mountpoint-util.h b/src/basic/mountpoint-util.h
new file mode 100644
index 0000000..5398836
--- /dev/null
+++ b/src/basic/mountpoint-util.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+int name_to_handle_at_loop(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, int flags);
+
+int path_get_mnt_id(const char *path, int *ret);
+
+int fd_is_mount_point(int fd, const char *filename, int flags);
+int path_is_mount_point(const char *path, const char *root, int flags);
+
+bool fstype_is_network(const char *fstype);
+bool fstype_is_api_vfs(const char *fstype);
+bool fstype_is_ro(const char *fsype);
+bool fstype_can_discard(const char *fstype);
+bool fstype_can_uid_gid(const char *fstype);
+
+int dev_is_devtmpfs(void);
+
+const char *mount_propagation_flags_to_string(unsigned long flags);
+int mount_propagation_flags_from_string(const char *name, unsigned long *ret);
diff --git a/src/basic/nss-util.h b/src/basic/nss-util.h
new file mode 100644
index 0000000..2045175
--- /dev/null
+++ b/src/basic/nss-util.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <grp.h>
+#include <netdb.h>
+#include <nss.h>
+#include <pwd.h>
+#include <resolv.h>
+
+#define NSS_SIGNALS_BLOCK SIGALRM,SIGVTALRM,SIGPIPE,SIGCHLD,SIGTSTP,SIGIO,SIGHUP,SIGUSR1,SIGUSR2,SIGPROF,SIGURG,SIGWINCH
+
+#ifndef DEPRECATED_RES_USE_INET6
+# define DEPRECATED_RES_USE_INET6 0x00002000
+#endif
+
+#define NSS_GETHOSTBYNAME_PROTOTYPES(module) \
+enum nss_status _nss_##module##_gethostbyname4_r( \
+ const char *name, \
+ struct gaih_addrtuple **pat, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop, \
+ int32_t *ttlp) _public_; \
+enum nss_status _nss_##module##_gethostbyname3_r( \
+ const char *name, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop, \
+ int32_t *ttlp, \
+ char **canonp) _public_; \
+enum nss_status _nss_##module##_gethostbyname2_r( \
+ const char *name, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) _public_; \
+enum nss_status _nss_##module##_gethostbyname_r( \
+ const char *name, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) _public_
+
+#define NSS_GETHOSTBYADDR_PROTOTYPES(module) \
+enum nss_status _nss_##module##_gethostbyaddr2_r( \
+ const void* addr, socklen_t len, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop, \
+ int32_t *ttlp) _public_; \
+enum nss_status _nss_##module##_gethostbyaddr_r( \
+ const void* addr, socklen_t len, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) _public_
+
+#define NSS_GETHOSTBYNAME_FALLBACKS(module) \
+enum nss_status _nss_##module##_gethostbyname2_r( \
+ const char *name, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) { \
+ return _nss_##module##_gethostbyname3_r( \
+ name, \
+ af, \
+ host, \
+ buffer, buflen, \
+ errnop, h_errnop, \
+ NULL, \
+ NULL); \
+} \
+enum nss_status _nss_##module##_gethostbyname_r( \
+ const char *name, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) { \
+ enum nss_status ret = NSS_STATUS_NOTFOUND; \
+ \
+ if (_res.options & DEPRECATED_RES_USE_INET6) \
+ ret = _nss_##module##_gethostbyname3_r( \
+ name, \
+ AF_INET6, \
+ host, \
+ buffer, buflen, \
+ errnop, h_errnop, \
+ NULL, \
+ NULL); \
+ if (ret == NSS_STATUS_NOTFOUND) \
+ ret = _nss_##module##_gethostbyname3_r( \
+ name, \
+ AF_INET, \
+ host, \
+ buffer, buflen, \
+ errnop, h_errnop, \
+ NULL, \
+ NULL); \
+ return ret; \
+}
+
+#define NSS_GETHOSTBYADDR_FALLBACKS(module) \
+enum nss_status _nss_##module##_gethostbyaddr_r( \
+ const void* addr, socklen_t len, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) { \
+ return _nss_##module##_gethostbyaddr2_r( \
+ addr, len, \
+ af, \
+ host, \
+ buffer, buflen, \
+ errnop, h_errnop, \
+ NULL); \
+}
+
+#define NSS_GETPW_PROTOTYPES(module) \
+enum nss_status _nss_##module##_getpwnam_r( \
+ const char *name, \
+ struct passwd *pwd, \
+ char *buffer, size_t buflen, \
+ int *errnop) _public_; \
+enum nss_status _nss_##module##_getpwuid_r( \
+ uid_t uid, \
+ struct passwd *pwd, \
+ char *buffer, size_t buflen, \
+ int *errnop) _public_
+
+#define NSS_GETGR_PROTOTYPES(module) \
+enum nss_status _nss_##module##_getgrnam_r( \
+ const char *name, \
+ struct group *gr, \
+ char *buffer, size_t buflen, \
+ int *errnop) _public_; \
+enum nss_status _nss_##module##_getgrgid_r( \
+ gid_t gid, \
+ struct group *gr, \
+ char *buffer, size_t buflen, \
+ int *errnop) _public_
+
+typedef enum nss_status (*_nss_gethostbyname4_r_t)(
+ const char *name,
+ struct gaih_addrtuple **pat,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp);
+
+typedef enum nss_status (*_nss_gethostbyname3_r_t)(
+ const char *name,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp,
+ char **canonp);
+
+typedef enum nss_status (*_nss_gethostbyname2_r_t)(
+ const char *name,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop);
+
+typedef enum nss_status (*_nss_gethostbyname_r_t)(
+ const char *name,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop);
+
+typedef enum nss_status (*_nss_gethostbyaddr2_r_t)(
+ const void* addr, socklen_t len,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp);
+typedef enum nss_status (*_nss_gethostbyaddr_r_t)(
+ const void* addr, socklen_t len,
+ int af,
+ struct hostent *host,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop);
diff --git a/src/basic/ordered-set.c b/src/basic/ordered-set.c
new file mode 100644
index 0000000..ed9ba77
--- /dev/null
+++ b/src/basic/ordered-set.c
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "ordered-set.h"
+#include "strv.h"
+
+int ordered_set_consume(OrderedSet *s, void *p) {
+ int r;
+
+ r = ordered_set_put(s, p);
+ if (r <= 0)
+ free(p);
+
+ return r;
+}
+
+int ordered_set_put_strdup(OrderedSet *s, const char *p) {
+ char *c;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ c = strdup(p);
+ if (!c)
+ return -ENOMEM;
+
+ r = ordered_set_consume(s, c);
+ if (r == -EEXIST)
+ return 0;
+
+ return r;
+}
+
+int ordered_set_put_strdupv(OrderedSet *s, char **l) {
+ int n = 0, r;
+ char **i;
+
+ STRV_FOREACH(i, l) {
+ r = ordered_set_put_strdup(s, *i);
+ if (r < 0)
+ return r;
+
+ n += r;
+ }
+
+ return n;
+}
diff --git a/src/basic/ordered-set.h b/src/basic/ordered-set.h
new file mode 100644
index 0000000..7cbb718
--- /dev/null
+++ b/src/basic/ordered-set.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "hashmap.h"
+
+typedef struct OrderedSet OrderedSet;
+
+static inline OrderedSet* ordered_set_new(const struct hash_ops *ops) {
+ return (OrderedSet*) ordered_hashmap_new(ops);
+}
+
+static inline int ordered_set_ensure_allocated(OrderedSet **s, const struct hash_ops *ops) {
+ if (*s)
+ return 0;
+
+ *s = ordered_set_new(ops);
+ if (!*s)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline OrderedSet* ordered_set_free(OrderedSet *s) {
+ return (OrderedSet*) ordered_hashmap_free((OrderedHashmap*) s);
+}
+
+static inline OrderedSet* ordered_set_free_free(OrderedSet *s) {
+ return (OrderedSet*) ordered_hashmap_free_free((OrderedHashmap*) s);
+}
+
+static inline int ordered_set_put(OrderedSet *s, void *p) {
+ return ordered_hashmap_put((OrderedHashmap*) s, p, p);
+}
+
+static inline bool ordered_set_isempty(OrderedSet *s) {
+ return ordered_hashmap_isempty((OrderedHashmap*) s);
+}
+
+static inline bool ordered_set_iterate(OrderedSet *s, Iterator *i, void **value) {
+ return ordered_hashmap_iterate((OrderedHashmap*) s, i, value, NULL);
+}
+
+static inline void* ordered_set_remove(OrderedSet *s, void *p) {
+ return ordered_hashmap_remove((OrderedHashmap*) s, p);
+}
+
+static inline void* ordered_set_steal_first(OrderedSet *s) {
+ return ordered_hashmap_steal_first((OrderedHashmap*) s);
+}
+
+int ordered_set_consume(OrderedSet *s, void *p);
+int ordered_set_put_strdup(OrderedSet *s, const char *p);
+int ordered_set_put_strdupv(OrderedSet *s, char **l);
+
+#define ORDERED_SET_FOREACH(e, s, i) \
+ for ((i) = ITERATOR_FIRST; ordered_set_iterate((s), &(i), (void**)&(e)); )
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedSet*, ordered_set_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedSet*, ordered_set_free_free);
+
+#define _cleanup_ordered_set_free_ _cleanup_(ordered_set_freep)
+#define _cleanup_ordered_set_free_free_ _cleanup_(ordered_set_free_freep)
diff --git a/src/basic/parse-util.c b/src/basic/parse-util.c
new file mode 100644
index 0000000..87724af
--- /dev/null
+++ b/src/basic/parse-util.c
@@ -0,0 +1,779 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/oom.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "errno-list.h"
+#include "extract-word.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+
+int parse_boolean(const char *v) {
+ if (!v)
+ return -EINVAL;
+
+ if (streq(v, "1") || strcaseeq(v, "yes") || strcaseeq(v, "y") || strcaseeq(v, "true") || strcaseeq(v, "t") || strcaseeq(v, "on"))
+ return 1;
+ else if (streq(v, "0") || strcaseeq(v, "no") || strcaseeq(v, "n") || strcaseeq(v, "false") || strcaseeq(v, "f") || strcaseeq(v, "off"))
+ return 0;
+
+ return -EINVAL;
+}
+
+int parse_pid(const char *s, pid_t* ret_pid) {
+ unsigned long ul = 0;
+ pid_t pid;
+ int r;
+
+ assert(s);
+ assert(ret_pid);
+
+ r = safe_atolu(s, &ul);
+ if (r < 0)
+ return r;
+
+ pid = (pid_t) ul;
+
+ if ((unsigned long) pid != ul)
+ return -ERANGE;
+
+ if (!pid_is_valid(pid))
+ return -ERANGE;
+
+ *ret_pid = pid;
+ return 0;
+}
+
+int parse_mode(const char *s, mode_t *ret) {
+ char *x;
+ long l;
+
+ assert(s);
+ assert(ret);
+
+ s += strspn(s, WHITESPACE);
+ if (s[0] == '-')
+ return -ERANGE;
+
+ errno = 0;
+ l = strtol(s, &x, 8);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (l < 0 || l > 07777)
+ return -ERANGE;
+
+ *ret = (mode_t) l;
+ return 0;
+}
+
+int parse_ifindex(const char *s, int *ret) {
+ int ifi, r;
+
+ r = safe_atoi(s, &ifi);
+ if (r < 0)
+ return r;
+ if (ifi <= 0)
+ return -EINVAL;
+
+ *ret = ifi;
+ return 0;
+}
+
+int parse_mtu(int family, const char *s, uint32_t *ret) {
+ uint64_t u;
+ size_t m;
+ int r;
+
+ r = parse_size(s, 1024, &u);
+ if (r < 0)
+ return r;
+
+ if (u > UINT32_MAX)
+ return -ERANGE;
+
+ if (family == AF_INET6)
+ m = IPV6_MIN_MTU; /* This is 1280 */
+ else
+ m = IPV4_MIN_MTU; /* For all other protocols, including 'unspecified' we assume the IPv4 minimal MTU */
+
+ if (u < m)
+ return -ERANGE;
+
+ *ret = (uint32_t) u;
+ return 0;
+}
+
+int parse_size(const char *t, uint64_t base, uint64_t *size) {
+
+ /* Soo, sometimes we want to parse IEC binary suffixes, and
+ * sometimes SI decimal suffixes. This function can parse
+ * both. Which one is the right way depends on the
+ * context. Wikipedia suggests that SI is customary for
+ * hardware metrics and network speeds, while IEC is
+ * customary for most data sizes used by software and volatile
+ * (RAM) memory. Hence be careful which one you pick!
+ *
+ * In either case we use just K, M, G as suffix, and not Ki,
+ * Mi, Gi or so (as IEC would suggest). That's because that's
+ * frickin' ugly. But this means you really need to make sure
+ * to document which base you are parsing when you use this
+ * call. */
+
+ struct table {
+ const char *suffix;
+ unsigned long long factor;
+ };
+
+ static const struct table iec[] = {
+ { "E", 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL },
+ { "P", 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL },
+ { "T", 1024ULL*1024ULL*1024ULL*1024ULL },
+ { "G", 1024ULL*1024ULL*1024ULL },
+ { "M", 1024ULL*1024ULL },
+ { "K", 1024ULL },
+ { "B", 1ULL },
+ { "", 1ULL },
+ };
+
+ static const struct table si[] = {
+ { "E", 1000ULL*1000ULL*1000ULL*1000ULL*1000ULL*1000ULL },
+ { "P", 1000ULL*1000ULL*1000ULL*1000ULL*1000ULL },
+ { "T", 1000ULL*1000ULL*1000ULL*1000ULL },
+ { "G", 1000ULL*1000ULL*1000ULL },
+ { "M", 1000ULL*1000ULL },
+ { "K", 1000ULL },
+ { "B", 1ULL },
+ { "", 1ULL },
+ };
+
+ const struct table *table;
+ const char *p;
+ unsigned long long r = 0;
+ unsigned n_entries, start_pos = 0;
+
+ assert(t);
+ assert(IN_SET(base, 1000, 1024));
+ assert(size);
+
+ if (base == 1000) {
+ table = si;
+ n_entries = ELEMENTSOF(si);
+ } else {
+ table = iec;
+ n_entries = ELEMENTSOF(iec);
+ }
+
+ p = t;
+ do {
+ unsigned long long l, tmp;
+ double frac = 0;
+ char *e;
+ unsigned i;
+
+ p += strspn(p, WHITESPACE);
+
+ errno = 0;
+ l = strtoull(p, &e, 10);
+ if (errno > 0)
+ return -errno;
+ if (e == p)
+ return -EINVAL;
+ if (*p == '-')
+ return -ERANGE;
+
+ if (*e == '.') {
+ e++;
+
+ /* strtoull() itself would accept space/+/- */
+ if (*e >= '0' && *e <= '9') {
+ unsigned long long l2;
+ char *e2;
+
+ l2 = strtoull(e, &e2, 10);
+ if (errno > 0)
+ return -errno;
+
+ /* Ignore failure. E.g. 10.M is valid */
+ frac = l2;
+ for (; e < e2; e++)
+ frac /= 10;
+ }
+ }
+
+ e += strspn(e, WHITESPACE);
+
+ for (i = start_pos; i < n_entries; i++)
+ if (startswith(e, table[i].suffix))
+ break;
+
+ if (i >= n_entries)
+ return -EINVAL;
+
+ if (l + (frac > 0) > ULLONG_MAX / table[i].factor)
+ return -ERANGE;
+
+ tmp = l * table[i].factor + (unsigned long long) (frac * table[i].factor);
+ if (tmp > ULLONG_MAX - r)
+ return -ERANGE;
+
+ r += tmp;
+ if ((unsigned long long) (uint64_t) r != r)
+ return -ERANGE;
+
+ p = e + strlen(table[i].suffix);
+
+ start_pos = i + 1;
+
+ } while (*p);
+
+ *size = r;
+
+ return 0;
+}
+
+int parse_range(const char *t, unsigned *lower, unsigned *upper) {
+ _cleanup_free_ char *word = NULL;
+ unsigned l, u;
+ int r;
+
+ assert(lower);
+ assert(upper);
+
+ /* Extract the lower bound. */
+ r = extract_first_word(&t, &word, "-", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ r = safe_atou(word, &l);
+ if (r < 0)
+ return r;
+
+ /* Check for the upper bound and extract it if needed */
+ if (!t)
+ /* Single number with no dashes. */
+ u = l;
+ else if (!*t)
+ /* Trailing dash is an error. */
+ return -EINVAL;
+ else {
+ r = safe_atou(t, &u);
+ if (r < 0)
+ return r;
+ }
+
+ *lower = l;
+ *upper = u;
+ return 0;
+}
+
+int parse_errno(const char *t) {
+ int r, e;
+
+ assert(t);
+
+ r = errno_from_name(t);
+ if (r > 0)
+ return r;
+
+ r = safe_atoi(t, &e);
+ if (r < 0)
+ return r;
+
+ /* 0 is also allowed here */
+ if (!errno_is_valid(e) && e != 0)
+ return -ERANGE;
+
+ return e;
+}
+
+int parse_syscall_and_errno(const char *in, char **name, int *error) {
+ _cleanup_free_ char *n = NULL;
+ char *p;
+ int e = -1;
+
+ assert(in);
+ assert(name);
+ assert(error);
+
+ /*
+ * This parse "syscall:errno" like "uname:EILSEQ", "@sync:255".
+ * If errno is omitted, then error is set to -1.
+ * Empty syscall name is not allowed.
+ * Here, we do not check that the syscall name is valid or not.
+ */
+
+ p = strchr(in, ':');
+ if (p) {
+ e = parse_errno(p + 1);
+ if (e < 0)
+ return e;
+
+ n = strndup(in, p - in);
+ } else
+ n = strdup(in);
+
+ if (!n)
+ return -ENOMEM;
+
+ if (isempty(n))
+ return -EINVAL;
+
+ *error = e;
+ *name = TAKE_PTR(n);
+
+ return 0;
+}
+
+char *format_bytes(char *buf, size_t l, uint64_t t) {
+ unsigned i;
+
+ /* This only does IEC units so far */
+
+ static const struct {
+ const char *suffix;
+ uint64_t factor;
+ } table[] = {
+ { "E", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) },
+ { "P", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) },
+ { "T", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) },
+ { "G", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) },
+ { "M", UINT64_C(1024)*UINT64_C(1024) },
+ { "K", UINT64_C(1024) },
+ };
+
+ if (t == (uint64_t) -1)
+ return NULL;
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+
+ if (t >= table[i].factor) {
+ snprintf(buf, l,
+ "%" PRIu64 ".%" PRIu64 "%s",
+ t / table[i].factor,
+ ((t*UINT64_C(10)) / table[i].factor) % UINT64_C(10),
+ table[i].suffix);
+
+ goto finish;
+ }
+ }
+
+ snprintf(buf, l, "%" PRIu64 "B", t);
+
+finish:
+ buf[l-1] = 0;
+ return buf;
+
+}
+
+int safe_atou_full(const char *s, unsigned base, unsigned *ret_u) {
+ char *x = NULL;
+ unsigned long l;
+
+ assert(s);
+ assert(ret_u);
+ assert(base <= 16);
+
+ /* strtoul() is happy to parse negative values, and silently
+ * converts them to unsigned values without generating an
+ * error. We want a clean error, hence let's look for the "-"
+ * prefix on our own, and generate an error. But let's do so
+ * only after strtoul() validated that the string is clean
+ * otherwise, so that we return EINVAL preferably over
+ * ERANGE. */
+
+ s += strspn(s, WHITESPACE);
+
+ errno = 0;
+ l = strtoul(s, &x, base);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (s[0] == '-')
+ return -ERANGE;
+ if ((unsigned long) (unsigned) l != l)
+ return -ERANGE;
+
+ *ret_u = (unsigned) l;
+ return 0;
+}
+
+int safe_atoi(const char *s, int *ret_i) {
+ char *x = NULL;
+ long l;
+
+ assert(s);
+ assert(ret_i);
+
+ errno = 0;
+ l = strtol(s, &x, 0);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if ((long) (int) l != l)
+ return -ERANGE;
+
+ *ret_i = (int) l;
+ return 0;
+}
+
+int safe_atollu(const char *s, long long unsigned *ret_llu) {
+ char *x = NULL;
+ unsigned long long l;
+
+ assert(s);
+ assert(ret_llu);
+
+ s += strspn(s, WHITESPACE);
+
+ errno = 0;
+ l = strtoull(s, &x, 0);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (*s == '-')
+ return -ERANGE;
+
+ *ret_llu = l;
+ return 0;
+}
+
+int safe_atolli(const char *s, long long int *ret_lli) {
+ char *x = NULL;
+ long long l;
+
+ assert(s);
+ assert(ret_lli);
+
+ errno = 0;
+ l = strtoll(s, &x, 0);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+
+ *ret_lli = l;
+ return 0;
+}
+
+int safe_atou8(const char *s, uint8_t *ret) {
+ char *x = NULL;
+ unsigned long l;
+
+ assert(s);
+ assert(ret);
+
+ s += strspn(s, WHITESPACE);
+
+ errno = 0;
+ l = strtoul(s, &x, 0);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (s[0] == '-')
+ return -ERANGE;
+ if ((unsigned long) (uint8_t) l != l)
+ return -ERANGE;
+
+ *ret = (uint8_t) l;
+ return 0;
+}
+
+int safe_atou16_full(const char *s, unsigned base, uint16_t *ret) {
+ char *x = NULL;
+ unsigned long l;
+
+ assert(s);
+ assert(ret);
+ assert(base <= 16);
+
+ s += strspn(s, WHITESPACE);
+
+ errno = 0;
+ l = strtoul(s, &x, base);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (s[0] == '-')
+ return -ERANGE;
+ if ((unsigned long) (uint16_t) l != l)
+ return -ERANGE;
+
+ *ret = (uint16_t) l;
+ return 0;
+}
+
+int safe_atoi16(const char *s, int16_t *ret) {
+ char *x = NULL;
+ long l;
+
+ assert(s);
+ assert(ret);
+
+ errno = 0;
+ l = strtol(s, &x, 0);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if ((long) (int16_t) l != l)
+ return -ERANGE;
+
+ *ret = (int16_t) l;
+ return 0;
+}
+
+int safe_atod(const char *s, double *ret_d) {
+ _cleanup_(freelocalep) locale_t loc = (locale_t) 0;
+ char *x = NULL;
+ double d = 0;
+
+ assert(s);
+ assert(ret_d);
+
+ loc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0);
+ if (loc == (locale_t) 0)
+ return -errno;
+
+ errno = 0;
+ d = strtod_l(s, &x, loc);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+
+ *ret_d = (double) d;
+ return 0;
+}
+
+int parse_fractional_part_u(const char **p, size_t digits, unsigned *res) {
+ size_t i;
+ unsigned val = 0;
+ const char *s;
+
+ s = *p;
+
+ /* accept any number of digits, strtoull is limited to 19 */
+ for (i=0; i < digits; i++,s++) {
+ if (*s < '0' || *s > '9') {
+ if (i == 0)
+ return -EINVAL;
+
+ /* too few digits, pad with 0 */
+ for (; i < digits; i++)
+ val *= 10;
+
+ break;
+ }
+
+ val *= 10;
+ val += *s - '0';
+ }
+
+ /* maybe round up */
+ if (*s >= '5' && *s <= '9')
+ val++;
+
+ s += strspn(s, DIGITS);
+
+ *p = s;
+ *res = val;
+
+ return 0;
+}
+
+int parse_percent_unbounded(const char *p) {
+ const char *pc, *n;
+ int r, v;
+
+ pc = endswith(p, "%");
+ if (!pc)
+ return -EINVAL;
+
+ n = strndupa(p, pc - p);
+ r = safe_atoi(n, &v);
+ if (r < 0)
+ return r;
+ if (v < 0)
+ return -ERANGE;
+
+ return v;
+}
+
+int parse_percent(const char *p) {
+ int v;
+
+ v = parse_percent_unbounded(p);
+ if (v > 100)
+ return -ERANGE;
+
+ return v;
+}
+
+int parse_permille_unbounded(const char *p) {
+ const char *pc, *pm, *dot, *n;
+ int r, q, v;
+
+ pm = endswith(p, "‰");
+ if (pm) {
+ n = strndupa(p, pm - p);
+ r = safe_atoi(n, &v);
+ if (r < 0)
+ return r;
+ if (v < 0)
+ return -ERANGE;
+ } else {
+ pc = endswith(p, "%");
+ if (!pc)
+ return -EINVAL;
+
+ dot = memchr(p, '.', pc - p);
+ if (dot) {
+ if (dot + 2 != pc)
+ return -EINVAL;
+ if (dot[1] < '0' || dot[1] > '9')
+ return -EINVAL;
+ q = dot[1] - '0';
+ n = strndupa(p, dot - p);
+ } else {
+ q = 0;
+ n = strndupa(p, pc - p);
+ }
+ r = safe_atoi(n, &v);
+ if (r < 0)
+ return r;
+ if (v < 0)
+ return -ERANGE;
+ if (v > (INT_MAX - q) / 10)
+ return -ERANGE;
+
+ v = v * 10 + q;
+ }
+
+ return v;
+}
+
+int parse_permille(const char *p) {
+ int v;
+
+ v = parse_permille_unbounded(p);
+ if (v > 1000)
+ return -ERANGE;
+
+ return v;
+}
+
+int parse_nice(const char *p, int *ret) {
+ int n, r;
+
+ r = safe_atoi(p, &n);
+ if (r < 0)
+ return r;
+
+ if (!nice_is_valid(n))
+ return -ERANGE;
+
+ *ret = n;
+ return 0;
+}
+
+int parse_ip_port(const char *s, uint16_t *ret) {
+ uint16_t l;
+ int r;
+
+ r = safe_atou16(s, &l);
+ if (r < 0)
+ return r;
+
+ if (l == 0)
+ return -EINVAL;
+
+ *ret = (uint16_t) l;
+
+ return 0;
+}
+
+int parse_ip_port_range(const char *s, uint16_t *low, uint16_t *high) {
+ unsigned l, h;
+ int r;
+
+ r = parse_range(s, &l, &h);
+ if (r < 0)
+ return r;
+
+ if (l <= 0 || l > 65535 || h <= 0 || h > 65535)
+ return -EINVAL;
+
+ if (h < l)
+ return -EINVAL;
+
+ *low = l;
+ *high = h;
+
+ return 0;
+}
+
+int parse_dev(const char *s, dev_t *ret) {
+ const char *major;
+ unsigned x, y;
+ size_t n;
+ int r;
+
+ n = strspn(s, DIGITS);
+ if (n == 0)
+ return -EINVAL;
+ if (s[n] != ':')
+ return -EINVAL;
+
+ major = strndupa(s, n);
+ r = safe_atou(major, &x);
+ if (r < 0)
+ return r;
+
+ r = safe_atou(s + n + 1, &y);
+ if (r < 0)
+ return r;
+
+ if (!DEVICE_MAJOR_VALID(x) || !DEVICE_MINOR_VALID(y))
+ return -ERANGE;
+
+ *ret = makedev(x, y);
+ return 0;
+}
+
+int parse_oom_score_adjust(const char *s, int *ret) {
+ int r, v;
+
+ assert(s);
+ assert(ret);
+
+ r = safe_atoi(s, &v);
+ if (r < 0)
+ return r;
+
+ if (v < OOM_SCORE_ADJ_MIN || v > OOM_SCORE_ADJ_MAX)
+ return -ERANGE;
+
+ *ret = v;
+ return 0;
+}
diff --git a/src/basic/parse-util.h b/src/basic/parse-util.h
new file mode 100644
index 0000000..e47641b
--- /dev/null
+++ b/src/basic/parse-util.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+#define MODE_INVALID ((mode_t) -1)
+
+int parse_boolean(const char *v) _pure_;
+int parse_dev(const char *s, dev_t *ret);
+int parse_pid(const char *s, pid_t* ret_pid);
+int parse_mode(const char *s, mode_t *ret);
+int parse_ifindex(const char *s, int *ret);
+int parse_mtu(int family, const char *s, uint32_t *ret);
+
+int parse_size(const char *t, uint64_t base, uint64_t *size);
+int parse_range(const char *t, unsigned *lower, unsigned *upper);
+int parse_errno(const char *t);
+int parse_syscall_and_errno(const char *in, char **name, int *error);
+
+#define FORMAT_BYTES_MAX 8
+char *format_bytes(char *buf, size_t l, uint64_t t);
+
+int safe_atou_full(const char *s, unsigned base, unsigned *ret_u);
+
+static inline int safe_atou(const char *s, unsigned *ret_u) {
+ return safe_atou_full(s, 0, ret_u);
+}
+
+int safe_atoi(const char *s, int *ret_i);
+int safe_atollu(const char *s, unsigned long long *ret_u);
+int safe_atolli(const char *s, long long int *ret_i);
+
+int safe_atou8(const char *s, uint8_t *ret);
+
+int safe_atou16_full(const char *s, unsigned base, uint16_t *ret);
+
+static inline int safe_atou16(const char *s, uint16_t *ret) {
+ return safe_atou16_full(s, 0, ret);
+}
+
+static inline int safe_atoux16(const char *s, uint16_t *ret) {
+ return safe_atou16_full(s, 16, ret);
+}
+
+int safe_atoi16(const char *s, int16_t *ret);
+
+static inline int safe_atou32(const char *s, uint32_t *ret_u) {
+ assert_cc(sizeof(uint32_t) == sizeof(unsigned));
+ return safe_atou(s, (unsigned*) ret_u);
+}
+
+static inline int safe_atoi32(const char *s, int32_t *ret_i) {
+ assert_cc(sizeof(int32_t) == sizeof(int));
+ return safe_atoi(s, (int*) ret_i);
+}
+
+static inline int safe_atou64(const char *s, uint64_t *ret_u) {
+ assert_cc(sizeof(uint64_t) == sizeof(unsigned long long));
+ return safe_atollu(s, (unsigned long long*) ret_u);
+}
+
+static inline int safe_atoi64(const char *s, int64_t *ret_i) {
+ assert_cc(sizeof(int64_t) == sizeof(long long int));
+ return safe_atolli(s, (long long int*) ret_i);
+}
+
+#if LONG_MAX == INT_MAX
+static inline int safe_atolu(const char *s, unsigned long *ret_u) {
+ assert_cc(sizeof(unsigned long) == sizeof(unsigned));
+ return safe_atou(s, (unsigned*) ret_u);
+}
+static inline int safe_atoli(const char *s, long int *ret_u) {
+ assert_cc(sizeof(long int) == sizeof(int));
+ return safe_atoi(s, (int*) ret_u);
+}
+#else
+static inline int safe_atolu(const char *s, unsigned long *ret_u) {
+ assert_cc(sizeof(unsigned long) == sizeof(unsigned long long));
+ return safe_atollu(s, (unsigned long long*) ret_u);
+}
+static inline int safe_atoli(const char *s, long int *ret_u) {
+ assert_cc(sizeof(long int) == sizeof(long long int));
+ return safe_atolli(s, (long long int*) ret_u);
+}
+#endif
+
+#if SIZE_MAX == UINT_MAX
+static inline int safe_atozu(const char *s, size_t *ret_u) {
+ assert_cc(sizeof(size_t) == sizeof(unsigned));
+ return safe_atou(s, (unsigned *) ret_u);
+}
+#else
+static inline int safe_atozu(const char *s, size_t *ret_u) {
+ assert_cc(sizeof(size_t) == sizeof(long unsigned));
+ return safe_atolu(s, ret_u);
+}
+#endif
+
+int safe_atod(const char *s, double *ret_d);
+
+int parse_fractional_part_u(const char **s, size_t digits, unsigned *res);
+
+int parse_percent_unbounded(const char *p);
+int parse_percent(const char *p);
+
+int parse_permille_unbounded(const char *p);
+int parse_permille(const char *p);
+
+int parse_nice(const char *p, int *ret);
+
+int parse_ip_port(const char *s, uint16_t *ret);
+int parse_ip_port_range(const char *s, uint16_t *low, uint16_t *high);
+
+int parse_oom_score_adjust(const char *s, int *ret);
diff --git a/src/basic/path-util.c b/src/basic/path-util.c
new file mode 100644
index 0000000..2215173
--- /dev/null
+++ b/src/basic/path-util.c
@@ -0,0 +1,1150 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the
+ * POSIX version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+
+bool path_is_absolute(const char *p) {
+ return p[0] == '/';
+}
+
+bool is_path(const char *p) {
+ return !!strchr(p, '/');
+}
+
+int path_split_and_make_absolute(const char *p, char ***ret) {
+ char **l;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ l = strv_split(p, ":");
+ if (!l)
+ return -ENOMEM;
+
+ r = path_strv_make_absolute_cwd(l);
+ if (r < 0) {
+ strv_free(l);
+ return r;
+ }
+
+ *ret = l;
+ return r;
+}
+
+char *path_make_absolute(const char *p, const char *prefix) {
+ assert(p);
+
+ /* Makes every item in the list an absolute path by prepending
+ * the prefix, if specified and necessary */
+
+ if (path_is_absolute(p) || isempty(prefix))
+ return strdup(p);
+
+ if (endswith(prefix, "/"))
+ return strjoin(prefix, p);
+ else
+ return strjoin(prefix, "/", p);
+}
+
+int safe_getcwd(char **ret) {
+ char *cwd;
+
+ cwd = get_current_dir_name();
+ if (!cwd)
+ return negative_errno();
+
+ /* Let's make sure the directory is really absolute, to protect us from the logic behind
+ * CVE-2018-1000001 */
+ if (cwd[0] != '/') {
+ free(cwd);
+ return -ENOMEDIUM;
+ }
+
+ *ret = cwd;
+ return 0;
+}
+
+int path_make_absolute_cwd(const char *p, char **ret) {
+ char *c;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ /* Similar to path_make_absolute(), but prefixes with the
+ * current working directory. */
+
+ if (path_is_absolute(p))
+ c = strdup(p);
+ else {
+ _cleanup_free_ char *cwd = NULL;
+
+ r = safe_getcwd(&cwd);
+ if (r < 0)
+ return r;
+
+ c = path_join(cwd, p);
+ }
+ if (!c)
+ return -ENOMEM;
+
+ *ret = c;
+ return 0;
+}
+
+int path_make_relative(const char *from_dir, const char *to_path, char **_r) {
+ char *f, *t, *r, *p;
+ unsigned n_parents = 0;
+
+ assert(from_dir);
+ assert(to_path);
+ assert(_r);
+
+ /* Strips the common part, and adds ".." elements as necessary. */
+
+ if (!path_is_absolute(from_dir) || !path_is_absolute(to_path))
+ return -EINVAL;
+
+ f = strdupa(from_dir);
+ t = strdupa(to_path);
+
+ path_simplify(f, true);
+ path_simplify(t, true);
+
+ /* Skip the common part. */
+ for (;;) {
+ size_t a, b;
+
+ f += *f == '/';
+ t += *t == '/';
+
+ if (!*f) {
+ if (!*t)
+ /* from_dir equals to_path. */
+ r = strdup(".");
+ else
+ /* from_dir is a parent directory of to_path. */
+ r = strdup(t);
+ if (!r)
+ return -ENOMEM;
+
+ *_r = r;
+ return 0;
+ }
+
+ if (!*t)
+ break;
+
+ a = strcspn(f, "/");
+ b = strcspn(t, "/");
+
+ if (a != b || memcmp(f, t, a) != 0)
+ break;
+
+ f += a;
+ t += b;
+ }
+
+ /* If we're here, then "from_dir" has one or more elements that need to
+ * be replaced with "..". */
+
+ /* Count the number of necessary ".." elements. */
+ for (; *f;) {
+ size_t w;
+
+ w = strcspn(f, "/");
+
+ /* If this includes ".." we can't do a simple series of "..", refuse */
+ if (w == 2 && f[0] == '.' && f[1] == '.')
+ return -EINVAL;
+
+ /* Count number of elements */
+ n_parents++;
+
+ f += w;
+ f += *f == '/';
+ }
+
+ r = new(char, n_parents * 3 + strlen(t) + 1);
+ if (!r)
+ return -ENOMEM;
+
+ for (p = r; n_parents > 0; n_parents--)
+ p = mempcpy(p, "../", 3);
+
+ if (*t)
+ strcpy(p, t);
+ else
+ /* Remove trailing slash */
+ *(--p) = 0;
+
+ *_r = r;
+ return 0;
+}
+
+int path_strv_make_absolute_cwd(char **l) {
+ char **s;
+ int r;
+
+ /* Goes through every item in the string list and makes it
+ * absolute. This works in place and won't rollback any
+ * changes on failure. */
+
+ STRV_FOREACH(s, l) {
+ char *t;
+
+ r = path_make_absolute_cwd(*s, &t);
+ if (r < 0)
+ return r;
+
+ path_simplify(t, false);
+ free_and_replace(*s, t);
+ }
+
+ return 0;
+}
+
+char **path_strv_resolve(char **l, const char *root) {
+ char **s;
+ unsigned k = 0;
+ bool enomem = false;
+ int r;
+
+ if (strv_isempty(l))
+ return l;
+
+ /* Goes through every item in the string list and canonicalize
+ * the path. This works in place and won't rollback any
+ * changes on failure. */
+
+ STRV_FOREACH(s, l) {
+ _cleanup_free_ char *orig = NULL;
+ char *t, *u;
+
+ if (!path_is_absolute(*s)) {
+ free(*s);
+ continue;
+ }
+
+ if (root) {
+ orig = *s;
+ t = prefix_root(root, orig);
+ if (!t) {
+ enomem = true;
+ continue;
+ }
+ } else
+ t = *s;
+
+ r = chase_symlinks(t, root, 0, &u);
+ if (r == -ENOENT) {
+ if (root) {
+ u = TAKE_PTR(orig);
+ free(t);
+ } else
+ u = t;
+ } else if (r < 0) {
+ free(t);
+
+ if (r == -ENOMEM)
+ enomem = true;
+
+ continue;
+ } else if (root) {
+ char *x;
+
+ free(t);
+ x = path_startswith(u, root);
+ if (x) {
+ /* restore the slash if it was lost */
+ if (!startswith(x, "/"))
+ *(--x) = '/';
+
+ t = strdup(x);
+ free(u);
+ if (!t) {
+ enomem = true;
+ continue;
+ }
+ u = t;
+ } else {
+ /* canonicalized path goes outside of
+ * prefix, keep the original path instead */
+ free_and_replace(u, orig);
+ }
+ } else
+ free(t);
+
+ l[k++] = u;
+ }
+
+ l[k] = NULL;
+
+ if (enomem)
+ return NULL;
+
+ return l;
+}
+
+char **path_strv_resolve_uniq(char **l, const char *root) {
+
+ if (strv_isempty(l))
+ return l;
+
+ if (!path_strv_resolve(l, root))
+ return NULL;
+
+ return strv_uniq(l);
+}
+
+char *path_simplify(char *path, bool kill_dots) {
+ char *f, *t;
+ bool slash = false, ignore_slash = false, absolute;
+
+ assert(path);
+
+ /* Removes redundant inner and trailing slashes. Also removes unnecessary dots
+ * if kill_dots is true. Modifies the passed string in-place.
+ *
+ * ///foo//./bar/. becomes /foo/./bar/. (if kill_dots is false)
+ * ///foo//./bar/. becomes /foo/bar (if kill_dots is true)
+ * .//./foo//./bar/. becomes ././foo/./bar/. (if kill_dots is false)
+ * .//./foo//./bar/. becomes foo/bar (if kill_dots is true)
+ */
+
+ if (isempty(path))
+ return path;
+
+ absolute = path_is_absolute(path);
+
+ f = path;
+ if (kill_dots && *f == '.' && IN_SET(f[1], 0, '/')) {
+ ignore_slash = true;
+ f++;
+ }
+
+ for (t = path; *f; f++) {
+
+ if (*f == '/') {
+ slash = true;
+ continue;
+ }
+
+ if (slash) {
+ if (kill_dots && *f == '.' && IN_SET(f[1], 0, '/'))
+ continue;
+
+ slash = false;
+ if (ignore_slash)
+ ignore_slash = false;
+ else
+ *(t++) = '/';
+ }
+
+ *(t++) = *f;
+ }
+
+ /* Special rule, if we stripped everything, we either need a "/" (for the root directory)
+ * or "." for the current directory */
+ if (t == path) {
+ if (absolute)
+ *(t++) = '/';
+ else
+ *(t++) = '.';
+ }
+
+ *t = 0;
+ return path;
+}
+
+char* path_startswith(const char *path, const char *prefix) {
+ assert(path);
+ assert(prefix);
+
+ /* Returns a pointer to the start of the first component after the parts matched by
+ * the prefix, iff
+ * - both paths are absolute or both paths are relative,
+ * and
+ * - each component in prefix in turn matches a component in path at the same position.
+ * An empty string will be returned when the prefix and path are equivalent.
+ *
+ * Returns NULL otherwise.
+ */
+
+ if ((path[0] == '/') != (prefix[0] == '/'))
+ return NULL;
+
+ for (;;) {
+ size_t a, b;
+
+ path += strspn(path, "/");
+ prefix += strspn(prefix, "/");
+
+ if (*prefix == 0)
+ return (char*) path;
+
+ if (*path == 0)
+ return NULL;
+
+ a = strcspn(path, "/");
+ b = strcspn(prefix, "/");
+
+ if (a != b)
+ return NULL;
+
+ if (memcmp(path, prefix, a) != 0)
+ return NULL;
+
+ path += a;
+ prefix += b;
+ }
+}
+
+int path_compare(const char *a, const char *b) {
+ int d;
+
+ assert(a);
+ assert(b);
+
+ /* A relative path and an absolute path must not compare as equal.
+ * Which one is sorted before the other does not really matter.
+ * Here a relative path is ordered before an absolute path. */
+ d = (a[0] == '/') - (b[0] == '/');
+ if (d != 0)
+ return d;
+
+ for (;;) {
+ size_t j, k;
+
+ a += strspn(a, "/");
+ b += strspn(b, "/");
+
+ if (*a == 0 && *b == 0)
+ return 0;
+
+ /* Order prefixes first: "/foo" before "/foo/bar" */
+ if (*a == 0)
+ return -1;
+ if (*b == 0)
+ return 1;
+
+ j = strcspn(a, "/");
+ k = strcspn(b, "/");
+
+ /* Alphabetical sort: "/foo/aaa" before "/foo/b" */
+ d = memcmp(a, b, MIN(j, k));
+ if (d != 0)
+ return (d > 0) - (d < 0); /* sign of d */
+
+ /* Sort "/foo/a" before "/foo/aaa" */
+ d = (j > k) - (j < k); /* sign of (j - k) */
+ if (d != 0)
+ return d;
+
+ a += j;
+ b += k;
+ }
+}
+
+bool path_equal(const char *a, const char *b) {
+ return path_compare(a, b) == 0;
+}
+
+bool path_equal_or_files_same(const char *a, const char *b, int flags) {
+ return path_equal(a, b) || files_same(a, b, flags) > 0;
+}
+
+char* path_join_internal(const char *first, ...) {
+ char *joined, *q;
+ const char *p;
+ va_list ap;
+ bool slash;
+ size_t sz;
+
+ /* Joins all listed strings until the sentinel and places a "/" between them unless the strings end/begin
+ * already with one so that it is unnecessary. Note that slashes which are already duplicate won't be
+ * removed. The string returned is hence always equal to or longer than the sum of the lengths of each
+ * individual string.
+ *
+ * Note: any listed empty string is simply skipped. This can be useful for concatenating strings of which some
+ * are optional.
+ *
+ * Examples:
+ *
+ * path_join("foo", "bar") → "foo/bar"
+ * path_join("foo/", "bar") → "foo/bar"
+ * path_join("", "foo", "", "bar", "") → "foo/bar" */
+
+ sz = strlen_ptr(first);
+ va_start(ap, first);
+ while ((p = va_arg(ap, char*)) != (const char*) -1)
+ if (!isempty(p))
+ sz += 1 + strlen(p);
+ va_end(ap);
+
+ joined = new(char, sz + 1);
+ if (!joined)
+ return NULL;
+
+ if (!isempty(first)) {
+ q = stpcpy(joined, first);
+ slash = endswith(first, "/");
+ } else {
+ /* Skip empty items */
+ joined[0] = 0;
+ q = joined;
+ slash = true; /* no need to generate a slash anymore */
+ }
+
+ va_start(ap, first);
+ while ((p = va_arg(ap, char*)) != (const char*) -1) {
+ if (isempty(p))
+ continue;
+
+ if (!slash && p[0] != '/')
+ *(q++) = '/';
+
+ q = stpcpy(q, p);
+ slash = endswith(p, "/");
+ }
+ va_end(ap);
+
+ return joined;
+}
+
+int find_binary(const char *name, char **ret) {
+ int last_error, r;
+ const char *p;
+
+ assert(name);
+
+ if (is_path(name)) {
+ if (access(name, X_OK) < 0)
+ return -errno;
+
+ if (ret) {
+ r = path_make_absolute_cwd(name, ret);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+ }
+
+ /**
+ * Plain getenv, not secure_getenv, because we want
+ * to actually allow the user to pick the binary.
+ */
+ p = getenv("PATH");
+ if (!p)
+ p = DEFAULT_PATH;
+
+ last_error = -ENOENT;
+
+ for (;;) {
+ _cleanup_free_ char *j = NULL, *element = NULL;
+
+ r = extract_first_word(&p, &element, ":", EXTRACT_RELAX|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!path_is_absolute(element))
+ continue;
+
+ j = strjoin(element, "/", name);
+ if (!j)
+ return -ENOMEM;
+
+ if (access(j, X_OK) >= 0) {
+ /* Found it! */
+
+ if (ret) {
+ *ret = path_simplify(j, false);
+ j = NULL;
+ }
+
+ return 0;
+ }
+
+ last_error = -errno;
+ }
+
+ return last_error;
+}
+
+bool paths_check_timestamp(const char* const* paths, usec_t *timestamp, bool update) {
+ bool changed = false;
+ const char* const* i;
+
+ assert(timestamp);
+
+ if (!paths)
+ return false;
+
+ STRV_FOREACH(i, paths) {
+ struct stat stats;
+ usec_t u;
+
+ if (stat(*i, &stats) < 0)
+ continue;
+
+ u = timespec_load(&stats.st_mtim);
+
+ /* first check */
+ if (*timestamp >= u)
+ continue;
+
+ log_debug("timestamp of '%s' changed", *i);
+
+ /* update timestamp */
+ if (update) {
+ *timestamp = u;
+ changed = true;
+ } else
+ return true;
+ }
+
+ return changed;
+}
+
+static int binary_is_good(const char *binary) {
+ _cleanup_free_ char *p = NULL, *d = NULL;
+ int r;
+
+ r = find_binary(binary, &p);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ /* An fsck that is linked to /bin/true is a non-existent
+ * fsck */
+
+ r = readlink_malloc(p, &d);
+ if (r == -EINVAL) /* not a symlink */
+ return 1;
+ if (r < 0)
+ return r;
+
+ return !PATH_IN_SET(d, "true"
+ "/bin/true",
+ "/usr/bin/true",
+ "/dev/null");
+}
+
+int fsck_exists(const char *fstype) {
+ const char *checker;
+
+ assert(fstype);
+
+ if (streq(fstype, "auto"))
+ return -EINVAL;
+
+ checker = strjoina("fsck.", fstype);
+ return binary_is_good(checker);
+}
+
+int mkfs_exists(const char *fstype) {
+ const char *mkfs;
+
+ assert(fstype);
+
+ if (streq(fstype, "auto"))
+ return -EINVAL;
+
+ mkfs = strjoina("mkfs.", fstype);
+ return binary_is_good(mkfs);
+}
+
+char *prefix_root(const char *root, const char *path) {
+ char *n, *p;
+ size_t l;
+
+ /* If root is passed, prefixes path with it. Otherwise returns
+ * it as is. */
+
+ assert(path);
+
+ /* First, drop duplicate prefixing slashes from the path */
+ while (path[0] == '/' && path[1] == '/')
+ path++;
+
+ if (empty_or_root(root))
+ return strdup(path);
+
+ l = strlen(root) + 1 + strlen(path) + 1;
+
+ n = new(char, l);
+ if (!n)
+ return NULL;
+
+ p = stpcpy(n, root);
+
+ while (p > n && p[-1] == '/')
+ p--;
+
+ if (path[0] != '/')
+ *(p++) = '/';
+
+ strcpy(p, path);
+ return n;
+}
+
+int parse_path_argument_and_warn(const char *path, bool suppress_root, char **arg) {
+ char *p;
+ int r;
+
+ /*
+ * This function is intended to be used in command line
+ * parsers, to handle paths that are passed in. It makes the
+ * path absolute, and reduces it to NULL if omitted or
+ * root (the latter optionally).
+ *
+ * NOTE THAT THIS WILL FREE THE PREVIOUS ARGUMENT POINTER ON
+ * SUCCESS! Hence, do not pass in uninitialized pointers.
+ */
+
+ if (isempty(path)) {
+ *arg = mfree(*arg);
+ return 0;
+ }
+
+ r = path_make_absolute_cwd(path, &p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse path \"%s\" and make it absolute: %m", path);
+
+ path_simplify(p, false);
+ if (suppress_root && empty_or_root(p))
+ p = mfree(p);
+
+ free_and_replace(*arg, p);
+
+ return 0;
+}
+
+char* dirname_malloc(const char *path) {
+ char *d, *dir, *dir2;
+
+ assert(path);
+
+ d = strdup(path);
+ if (!d)
+ return NULL;
+
+ dir = dirname(d);
+ assert(dir);
+
+ if (dir == d)
+ return d;
+
+ dir2 = strdup(dir);
+ free(d);
+
+ return dir2;
+}
+
+const char *last_path_component(const char *path) {
+
+ /* Finds the last component of the path, preserving the optional trailing slash that signifies a directory.
+ *
+ * a/b/c → c
+ * a/b/c/ → c/
+ * x → x
+ * x/ → x/
+ * /y → y
+ * /y/ → y/
+ * / → /
+ * // → /
+ * /foo/a → a
+ * /foo/a/ → a/
+ *
+ * Also, the empty string is mapped to itself.
+ *
+ * This is different than basename(), which returns "" when a trailing slash is present.
+ */
+
+ unsigned l, k;
+
+ if (!path)
+ return NULL;
+
+ l = k = strlen(path);
+ if (l == 0) /* special case — an empty string */
+ return path;
+
+ while (k > 0 && path[k-1] == '/')
+ k--;
+
+ if (k == 0) /* the root directory */
+ return path + l - 1;
+
+ while (k > 0 && path[k-1] != '/')
+ k--;
+
+ return path + k;
+}
+
+int path_extract_filename(const char *p, char **ret) {
+ _cleanup_free_ char *a = NULL;
+ const char *c, *e = NULL, *q;
+
+ /* Extracts the filename part (i.e. right-most component) from a path, i.e. string that passes
+ * filename_is_valid(). A wrapper around last_path_component(), but eats up trailing slashes. */
+
+ if (!p)
+ return -EINVAL;
+
+ c = last_path_component(p);
+
+ for (q = c; *q != 0; q++)
+ if (*q != '/')
+ e = q + 1;
+
+ if (!e) /* no valid character? */
+ return -EINVAL;
+
+ a = strndup(c, e - c);
+ if (!a)
+ return -ENOMEM;
+
+ if (!filename_is_valid(a))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(a);
+
+ return 0;
+}
+
+bool filename_is_valid(const char *p) {
+ const char *e;
+
+ if (isempty(p))
+ return false;
+
+ if (dot_or_dot_dot(p))
+ return false;
+
+ e = strchrnul(p, '/');
+ if (*e != 0)
+ return false;
+
+ if (e - p > FILENAME_MAX) /* FILENAME_MAX is counted *without* the trailing NUL byte */
+ return false;
+
+ return true;
+}
+
+bool path_is_valid(const char *p) {
+
+ if (isempty(p))
+ return false;
+
+ if (strlen(p) >= PATH_MAX) /* PATH_MAX is counted *with* the trailing NUL byte */
+ return false;
+
+ return true;
+}
+
+bool path_is_normalized(const char *p) {
+
+ if (!path_is_valid(p))
+ return false;
+
+ if (dot_or_dot_dot(p))
+ return false;
+
+ if (startswith(p, "../") || endswith(p, "/..") || strstr(p, "/../"))
+ return false;
+
+ if (startswith(p, "./") || endswith(p, "/.") || strstr(p, "/./"))
+ return false;
+
+ if (strstr(p, "//"))
+ return false;
+
+ return true;
+}
+
+char *file_in_same_dir(const char *path, const char *filename) {
+ char *e, *ret;
+ size_t k;
+
+ assert(path);
+ assert(filename);
+
+ /* This removes the last component of path and appends
+ * filename, unless the latter is absolute anyway or the
+ * former isn't */
+
+ if (path_is_absolute(filename))
+ return strdup(filename);
+
+ e = strrchr(path, '/');
+ if (!e)
+ return strdup(filename);
+
+ k = strlen(filename);
+ ret = new(char, (e + 1 - path) + k + 1);
+ if (!ret)
+ return NULL;
+
+ memcpy(mempcpy(ret, path, e + 1 - path), filename, k + 1);
+ return ret;
+}
+
+bool hidden_or_backup_file(const char *filename) {
+ const char *p;
+
+ assert(filename);
+
+ if (filename[0] == '.' ||
+ streq(filename, "lost+found") ||
+ streq(filename, "aquota.user") ||
+ streq(filename, "aquota.group") ||
+ endswith(filename, "~"))
+ return true;
+
+ p = strrchr(filename, '.');
+ if (!p)
+ return false;
+
+ /* Please, let's not add more entries to the list below. If external projects think it's a good idea to come up
+ * with always new suffixes and that everybody else should just adjust to that, then it really should be on
+ * them. Hence, in future, let's not add any more entries. Instead, let's ask those packages to instead adopt
+ * one of the generic suffixes/prefixes for hidden files or backups, possibly augmented with an additional
+ * string. Specifically: there's now:
+ *
+ * The generic suffixes "~" and ".bak" for backup files
+ * The generic prefix "." for hidden files
+ *
+ * Thus, if a new package manager "foopkg" wants its own set of ".foopkg-new", ".foopkg-old", ".foopkg-dist"
+ * or so registered, let's refuse that and ask them to use ".foopkg.new", ".foopkg.old" or ".foopkg~" instead.
+ */
+
+ return STR_IN_SET(p + 1,
+ "rpmnew",
+ "rpmsave",
+ "rpmorig",
+ "dpkg-old",
+ "dpkg-new",
+ "dpkg-tmp",
+ "dpkg-dist",
+ "dpkg-bak",
+ "dpkg-backup",
+ "dpkg-remove",
+ "ucf-new",
+ "ucf-old",
+ "ucf-dist",
+ "swp",
+ "bak",
+ "old",
+ "new");
+}
+
+bool is_device_path(const char *path) {
+
+ /* Returns true on paths that likely refer to a device, either by path in sysfs or to something in /dev */
+
+ return PATH_STARTSWITH_SET(path, "/dev/", "/sys/");
+}
+
+bool valid_device_node_path(const char *path) {
+
+ /* Some superficial checks whether the specified path is a valid device node path, all without looking at the
+ * actual device node. */
+
+ if (!PATH_STARTSWITH_SET(path, "/dev/", "/run/systemd/inaccessible/"))
+ return false;
+
+ if (endswith(path, "/")) /* can't be a device node if it ends in a slash */
+ return false;
+
+ return path_is_normalized(path);
+}
+
+bool valid_device_allow_pattern(const char *path) {
+ assert(path);
+
+ /* Like valid_device_node_path(), but also allows full-subsystem expressions, like DeviceAllow= and DeviceDeny=
+ * accept it */
+
+ if (STARTSWITH_SET(path, "block-", "char-"))
+ return true;
+
+ return valid_device_node_path(path);
+}
+
+int systemd_installation_has_version(const char *root, unsigned minimal_version) {
+ const char *pattern;
+ int r;
+
+ /* Try to guess if systemd installation is later than the specified version. This
+ * is hacky and likely to yield false negatives, particularly if the installation
+ * is non-standard. False positives should be relatively rare.
+ */
+
+ NULSTR_FOREACH(pattern,
+ /* /lib works for systems without usr-merge, and for systems with a sane
+ * usr-merge, where /lib is a symlink to /usr/lib. /usr/lib is necessary
+ * for Gentoo which does a merge without making /lib a symlink.
+ */
+ "lib/systemd/libsystemd-shared-*.so\0"
+ "lib64/systemd/libsystemd-shared-*.so\0"
+ "usr/lib/systemd/libsystemd-shared-*.so\0"
+ "usr/lib64/systemd/libsystemd-shared-*.so\0") {
+
+ _cleanup_strv_free_ char **names = NULL;
+ _cleanup_free_ char *path = NULL;
+ char *c, **name;
+
+ path = prefix_root(root, pattern);
+ if (!path)
+ return -ENOMEM;
+
+ r = glob_extend(&names, path);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+
+ assert_se(c = endswith(path, "*.so"));
+ *c = '\0'; /* truncate the glob part */
+
+ STRV_FOREACH(name, names) {
+ /* This is most likely to run only once, hence let's not optimize anything. */
+ char *t, *t2;
+ unsigned version;
+
+ t = startswith(*name, path);
+ if (!t)
+ continue;
+
+ t2 = endswith(t, ".so");
+ if (!t2)
+ continue;
+
+ t2[0] = '\0'; /* truncate the suffix */
+
+ r = safe_atou(t, &version);
+ if (r < 0) {
+ log_debug_errno(r, "Found libsystemd shared at \"%s.so\", but failed to parse version: %m", *name);
+ continue;
+ }
+
+ log_debug("Found libsystemd shared at \"%s.so\", version %u (%s).",
+ *name, version,
+ version >= minimal_version ? "OK" : "too old");
+ if (version >= minimal_version)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool dot_or_dot_dot(const char *path) {
+ if (!path)
+ return false;
+ if (path[0] != '.')
+ return false;
+ if (path[1] == 0)
+ return true;
+ if (path[1] != '.')
+ return false;
+
+ return path[2] == 0;
+}
+
+bool empty_or_root(const char *root) {
+
+ /* For operations relative to some root directory, returns true if the specified root directory is redundant,
+ * i.e. either / or NULL or the empty string or any equivalent. */
+
+ if (!root)
+ return true;
+
+ return root[strspn(root, "/")] == 0;
+}
+
+int path_simplify_and_warn(
+ char *path,
+ unsigned flag,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue) {
+
+ bool absolute, fatal = flag & PATH_CHECK_FATAL;
+
+ assert(!FLAGS_SET(flag, PATH_CHECK_ABSOLUTE | PATH_CHECK_RELATIVE));
+
+ if (!utf8_is_valid(path)) {
+ log_syntax_invalid_utf8(unit, LOG_ERR, filename, line, path);
+ return -EINVAL;
+ }
+
+ if (flag & (PATH_CHECK_ABSOLUTE | PATH_CHECK_RELATIVE)) {
+ absolute = path_is_absolute(path);
+
+ if (!absolute && (flag & PATH_CHECK_ABSOLUTE)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "%s= path is not absolute%s: %s",
+ lvalue, fatal ? "" : ", ignoring", path);
+ return -EINVAL;
+ }
+
+ if (absolute && (flag & PATH_CHECK_RELATIVE)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "%s= path is absolute%s: %s",
+ lvalue, fatal ? "" : ", ignoring", path);
+ return -EINVAL;
+ }
+ }
+
+ path_simplify(path, true);
+
+ if (!path_is_normalized(path)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "%s= path is not normalized%s: %s",
+ lvalue, fatal ? "" : ", ignoring", path);
+ return -EINVAL;
+ }
+
+ if (!path_is_valid(path)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "%s= path has invalid length (%zu bytes)%s.",
+ lvalue, strlen(path), fatal ? "" : ", ignoring");
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/src/basic/path-util.h b/src/basic/path-util.h
new file mode 100644
index 0000000..86c5a57
--- /dev/null
+++ b/src/basic/path-util.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <alloca.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+#define PATH_SPLIT_SBIN_BIN(x) x "sbin:" x "bin"
+#define PATH_SPLIT_SBIN_BIN_NULSTR(x) x "sbin\0" x "bin\0"
+
+#define PATH_NORMAL_SBIN_BIN(x) x "bin"
+#define PATH_NORMAL_SBIN_BIN_NULSTR(x) x "bin\0"
+
+#if HAVE_SPLIT_BIN
+# define PATH_SBIN_BIN(x) PATH_SPLIT_SBIN_BIN(x)
+# define PATH_SBIN_BIN_NULSTR(x) PATH_SPLIT_SBIN_BIN_NULSTR(x)
+#else
+# define PATH_SBIN_BIN(x) PATH_NORMAL_SBIN_BIN(x)
+# define PATH_SBIN_BIN_NULSTR(x) PATH_NORMAL_SBIN_BIN_NULSTR(x)
+#endif
+
+#define DEFAULT_PATH_NORMAL PATH_SBIN_BIN("/usr/local/") ":" PATH_SBIN_BIN("/usr/")
+#define DEFAULT_PATH_NORMAL_NULSTR PATH_SBIN_BIN_NULSTR("/usr/local/") PATH_SBIN_BIN_NULSTR("/usr/")
+#define DEFAULT_PATH_SPLIT_USR DEFAULT_PATH_NORMAL ":" PATH_SBIN_BIN("/")
+#define DEFAULT_PATH_SPLIT_USR_NULSTR DEFAULT_PATH_NORMAL_NULSTR PATH_SBIN_BIN_NULSTR("/")
+#define DEFAULT_PATH_COMPAT PATH_SPLIT_SBIN_BIN("/usr/local/") ":" PATH_SPLIT_SBIN_BIN("/usr/") ":" PATH_SPLIT_SBIN_BIN("/")
+
+#if HAVE_SPLIT_USR
+# define DEFAULT_PATH DEFAULT_PATH_SPLIT_USR
+# define DEFAULT_PATH_NULSTR DEFAULT_PATH_SPLIT_USR_NULSTR
+#else
+# define DEFAULT_PATH DEFAULT_PATH_NORMAL
+# define DEFAULT_PATH_NULSTR DEFAULT_PATH_NORMAL_NULSTR
+#endif
+
+bool is_path(const char *p) _pure_;
+int path_split_and_make_absolute(const char *p, char ***ret);
+bool path_is_absolute(const char *p) _pure_;
+char* path_make_absolute(const char *p, const char *prefix);
+int safe_getcwd(char **ret);
+int path_make_absolute_cwd(const char *p, char **ret);
+int path_make_relative(const char *from_dir, const char *to_path, char **_r);
+char* path_startswith(const char *path, const char *prefix) _pure_;
+int path_compare(const char *a, const char *b) _pure_;
+bool path_equal(const char *a, const char *b) _pure_;
+bool path_equal_or_files_same(const char *a, const char *b, int flags);
+char* path_join_internal(const char *first, ...);
+#define path_join(x, ...) path_join_internal(x, __VA_ARGS__, (const char*) -1)
+
+char* path_simplify(char *path, bool kill_dots);
+
+static inline bool path_equal_ptr(const char *a, const char *b) {
+ return !!a == !!b && (!a || path_equal(a, b));
+}
+
+/* Note: the search terminates on the first NULL item. */
+#define PATH_IN_SET(p, ...) \
+ ({ \
+ char **_s; \
+ bool _found = false; \
+ STRV_FOREACH(_s, STRV_MAKE(__VA_ARGS__)) \
+ if (path_equal(p, *_s)) { \
+ _found = true; \
+ break; \
+ } \
+ _found; \
+ })
+
+#define PATH_STARTSWITH_SET(p, ...) \
+ ({ \
+ const char *_p = (p); \
+ char *_found = NULL, **_i; \
+ STRV_FOREACH(_i, STRV_MAKE(__VA_ARGS__)) { \
+ _found = path_startswith(_p, *_i); \
+ if (_found) \
+ break; \
+ } \
+ _found; \
+ })
+
+int path_strv_make_absolute_cwd(char **l);
+char** path_strv_resolve(char **l, const char *root);
+char** path_strv_resolve_uniq(char **l, const char *root);
+
+int find_binary(const char *name, char **filename);
+
+bool paths_check_timestamp(const char* const* paths, usec_t *paths_ts_usec, bool update);
+
+int fsck_exists(const char *fstype);
+int mkfs_exists(const char *fstype);
+
+/* Iterates through the path prefixes of the specified path, going up
+ * the tree, to root. Also returns "" (and not "/"!) for the root
+ * directory. Excludes the specified directory itself */
+#define PATH_FOREACH_PREFIX(prefix, path) \
+ for (char *_slash = ({ \
+ path_simplify(strcpy(prefix, path), false); \
+ streq(prefix, "/") ? NULL : strrchr(prefix, '/'); \
+ }); \
+ _slash && ((*_slash = 0), true); \
+ _slash = strrchr((prefix), '/'))
+
+/* Same as PATH_FOREACH_PREFIX but also includes the specified path itself */
+#define PATH_FOREACH_PREFIX_MORE(prefix, path) \
+ for (char *_slash = ({ \
+ path_simplify(strcpy(prefix, path), false); \
+ if (streq(prefix, "/")) \
+ prefix[0] = 0; \
+ strrchr(prefix, 0); \
+ }); \
+ _slash && ((*_slash = 0), true); \
+ _slash = strrchr((prefix), '/'))
+
+char *prefix_root(const char *root, const char *path);
+
+/* Similar to prefix_root(), but returns an alloca() buffer, or
+ * possibly a const pointer into the path parameter */
+#define prefix_roota(root, path) \
+ ({ \
+ const char* _path = (path), *_root = (root), *_ret; \
+ char *_p, *_n; \
+ size_t _l; \
+ while (_path[0] == '/' && _path[1] == '/') \
+ _path ++; \
+ if (empty_or_root(_root)) \
+ _ret = _path; \
+ else { \
+ _l = strlen(_root) + 1 + strlen(_path) + 1; \
+ _n = newa(char, _l); \
+ _p = stpcpy(_n, _root); \
+ while (_p > _n && _p[-1] == '/') \
+ _p--; \
+ if (_path[0] != '/') \
+ *(_p++) = '/'; \
+ strcpy(_p, _path); \
+ _ret = _n; \
+ } \
+ _ret; \
+ })
+
+int parse_path_argument_and_warn(const char *path, bool suppress_root, char **arg);
+
+char* dirname_malloc(const char *path);
+const char *last_path_component(const char *path);
+int path_extract_filename(const char *p, char **ret);
+
+bool filename_is_valid(const char *p) _pure_;
+bool path_is_valid(const char *p) _pure_;
+bool path_is_normalized(const char *p) _pure_;
+
+char *file_in_same_dir(const char *path, const char *filename);
+
+bool hidden_or_backup_file(const char *filename) _pure_;
+
+bool is_device_path(const char *path);
+
+bool valid_device_node_path(const char *path);
+bool valid_device_allow_pattern(const char *path);
+
+int systemd_installation_has_version(const char *root, unsigned minimal_version);
+
+bool dot_or_dot_dot(const char *path);
+
+static inline const char *skip_dev_prefix(const char *p) {
+ const char *e;
+
+ /* Drop any /dev prefix if there is any */
+
+ e = path_startswith(p, "/dev/");
+
+ return e ?: p;
+}
+
+bool empty_or_root(const char *root);
+static inline const char *empty_to_root(const char *path) {
+ return isempty(path) ? "/" : path;
+}
+
+enum {
+ PATH_CHECK_FATAL = 1 << 0, /* If not set, then error message is appended with 'ignoring'. */
+ PATH_CHECK_ABSOLUTE = 1 << 1,
+ PATH_CHECK_RELATIVE = 1 << 2,
+};
+
+int path_simplify_and_warn(char *path, unsigned flag, const char *unit, const char *filename, unsigned line, const char *lvalue);
diff --git a/src/basic/prioq.c b/src/basic/prioq.c
new file mode 100644
index 0000000..76b27fa
--- /dev/null
+++ b/src/basic/prioq.c
@@ -0,0 +1,300 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+/*
+ * Priority Queue
+ * The prioq object implements a priority queue. That is, it orders objects by
+ * their priority and allows O(1) access to the object with the highest
+ * priority. Insertion and removal are Θ(log n). Optionally, the caller can
+ * provide a pointer to an index which will be kept up-to-date by the prioq.
+ *
+ * The underlying algorithm used in this implementation is a Heap.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "hashmap.h"
+#include "prioq.h"
+
+struct prioq_item {
+ void *data;
+ unsigned *idx;
+};
+
+struct Prioq {
+ compare_func_t compare_func;
+ unsigned n_items, n_allocated;
+
+ struct prioq_item *items;
+};
+
+Prioq *prioq_new(compare_func_t compare_func) {
+ Prioq *q;
+
+ q = new(Prioq, 1);
+ if (!q)
+ return q;
+
+ *q = (Prioq) {
+ .compare_func = compare_func,
+ };
+
+ return q;
+}
+
+Prioq* prioq_free(Prioq *q) {
+ if (!q)
+ return NULL;
+
+ free(q->items);
+ return mfree(q);
+}
+
+int prioq_ensure_allocated(Prioq **q, compare_func_t compare_func) {
+ assert(q);
+
+ if (*q)
+ return 0;
+
+ *q = prioq_new(compare_func);
+ if (!*q)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void swap(Prioq *q, unsigned j, unsigned k) {
+ assert(q);
+ assert(j < q->n_items);
+ assert(k < q->n_items);
+
+ assert(!q->items[j].idx || *(q->items[j].idx) == j);
+ assert(!q->items[k].idx || *(q->items[k].idx) == k);
+
+ SWAP_TWO(q->items[j].data, q->items[k].data);
+ SWAP_TWO(q->items[j].idx, q->items[k].idx);
+
+ if (q->items[j].idx)
+ *q->items[j].idx = j;
+
+ if (q->items[k].idx)
+ *q->items[k].idx = k;
+}
+
+static unsigned shuffle_up(Prioq *q, unsigned idx) {
+ assert(q);
+ assert(idx < q->n_items);
+
+ while (idx > 0) {
+ unsigned k;
+
+ k = (idx-1)/2;
+
+ if (q->compare_func(q->items[k].data, q->items[idx].data) <= 0)
+ break;
+
+ swap(q, idx, k);
+ idx = k;
+ }
+
+ return idx;
+}
+
+static unsigned shuffle_down(Prioq *q, unsigned idx) {
+ assert(q);
+
+ for (;;) {
+ unsigned j, k, s;
+
+ k = (idx+1)*2; /* right child */
+ j = k-1; /* left child */
+
+ if (j >= q->n_items)
+ break;
+
+ if (q->compare_func(q->items[j].data, q->items[idx].data) < 0)
+
+ /* So our left child is smaller than we are, let's
+ * remember this fact */
+ s = j;
+ else
+ s = idx;
+
+ if (k < q->n_items &&
+ q->compare_func(q->items[k].data, q->items[s].data) < 0)
+
+ /* So our right child is smaller than we are, let's
+ * remember this fact */
+ s = k;
+
+ /* s now points to the smallest of the three items */
+
+ if (s == idx)
+ /* No swap necessary, we're done */
+ break;
+
+ swap(q, idx, s);
+ idx = s;
+ }
+
+ return idx;
+}
+
+int prioq_put(Prioq *q, void *data, unsigned *idx) {
+ struct prioq_item *i;
+ unsigned k;
+
+ assert(q);
+
+ if (q->n_items >= q->n_allocated) {
+ unsigned n;
+ struct prioq_item *j;
+
+ n = MAX((q->n_items+1) * 2, 16u);
+ j = reallocarray(q->items, n, sizeof(struct prioq_item));
+ if (!j)
+ return -ENOMEM;
+
+ q->items = j;
+ q->n_allocated = n;
+ }
+
+ k = q->n_items++;
+ i = q->items + k;
+ i->data = data;
+ i->idx = idx;
+
+ if (idx)
+ *idx = k;
+
+ shuffle_up(q, k);
+
+ return 0;
+}
+
+static void remove_item(Prioq *q, struct prioq_item *i) {
+ struct prioq_item *l;
+
+ assert(q);
+ assert(i);
+
+ l = q->items + q->n_items - 1;
+
+ if (i == l)
+ /* Last entry, let's just remove it */
+ q->n_items--;
+ else {
+ unsigned k;
+
+ /* Not last entry, let's replace the last entry with
+ * this one, and reshuffle */
+
+ k = i - q->items;
+
+ i->data = l->data;
+ i->idx = l->idx;
+ if (i->idx)
+ *i->idx = k;
+ q->n_items--;
+
+ k = shuffle_down(q, k);
+ shuffle_up(q, k);
+ }
+}
+
+_pure_ static struct prioq_item* find_item(Prioq *q, void *data, unsigned *idx) {
+ struct prioq_item *i;
+
+ assert(q);
+
+ if (q->n_items <= 0)
+ return NULL;
+
+ if (idx) {
+ if (*idx == PRIOQ_IDX_NULL ||
+ *idx >= q->n_items)
+ return NULL;
+
+ i = q->items + *idx;
+ if (i->data != data)
+ return NULL;
+
+ return i;
+ } else {
+ for (i = q->items; i < q->items + q->n_items; i++)
+ if (i->data == data)
+ return i;
+ return NULL;
+ }
+}
+
+int prioq_remove(Prioq *q, void *data, unsigned *idx) {
+ struct prioq_item *i;
+
+ if (!q)
+ return 0;
+
+ i = find_item(q, data, idx);
+ if (!i)
+ return 0;
+
+ remove_item(q, i);
+ return 1;
+}
+
+int prioq_reshuffle(Prioq *q, void *data, unsigned *idx) {
+ struct prioq_item *i;
+ unsigned k;
+
+ assert(q);
+
+ i = find_item(q, data, idx);
+ if (!i)
+ return 0;
+
+ k = i - q->items;
+ k = shuffle_down(q, k);
+ shuffle_up(q, k);
+ return 1;
+}
+
+void *prioq_peek_by_index(Prioq *q, unsigned idx) {
+ if (!q)
+ return NULL;
+
+ if (idx >= q->n_items)
+ return NULL;
+
+ return q->items[idx].data;
+}
+
+void *prioq_pop(Prioq *q) {
+ void *data;
+
+ if (!q)
+ return NULL;
+
+ if (q->n_items <= 0)
+ return NULL;
+
+ data = q->items[0].data;
+ remove_item(q, q->items);
+ return data;
+}
+
+unsigned prioq_size(Prioq *q) {
+
+ if (!q)
+ return 0;
+
+ return q->n_items;
+}
+
+bool prioq_isempty(Prioq *q) {
+
+ if (!q)
+ return true;
+
+ return q->n_items <= 0;
+}
diff --git a/src/basic/prioq.h b/src/basic/prioq.h
new file mode 100644
index 0000000..1fb57bf
--- /dev/null
+++ b/src/basic/prioq.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+
+typedef struct Prioq Prioq;
+
+#define PRIOQ_IDX_NULL ((unsigned) -1)
+
+Prioq *prioq_new(compare_func_t compare);
+Prioq *prioq_free(Prioq *q);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Prioq*, prioq_free);
+int prioq_ensure_allocated(Prioq **q, compare_func_t compare_func);
+
+int prioq_put(Prioq *q, void *data, unsigned *idx);
+int prioq_remove(Prioq *q, void *data, unsigned *idx);
+int prioq_reshuffle(Prioq *q, void *data, unsigned *idx);
+
+void *prioq_peek_by_index(Prioq *q, unsigned idx) _pure_;
+static inline void *prioq_peek(Prioq *q) {
+ return prioq_peek_by_index(q, 0);
+}
+void *prioq_pop(Prioq *q);
+
+#define PRIOQ_FOREACH_ITEM(q, p) \
+ for (unsigned _i = 0; (p = prioq_peek_by_index(q, _i)); _i++)
+
+unsigned prioq_size(Prioq *q) _pure_;
+bool prioq_isempty(Prioq *q) _pure_;
diff --git a/src/basic/proc-cmdline.c b/src/basic/proc-cmdline.c
new file mode 100644
index 0000000..1670001
--- /dev/null
+++ b/src/basic/proc-cmdline.c
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fileio.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "special.h"
+#include "string-util.h"
+#include "util.h"
+#include "virt.h"
+
+int proc_cmdline(char **ret) {
+ const char *e;
+ assert(ret);
+
+ /* For testing purposes it is sometimes useful to be able to override what we consider /proc/cmdline to be */
+ e = secure_getenv("SYSTEMD_PROC_CMDLINE");
+ if (e) {
+ char *m;
+
+ m = strdup(e);
+ if (!m)
+ return -ENOMEM;
+
+ *ret = m;
+ return 0;
+ }
+
+ if (detect_container() > 0)
+ return get_process_cmdline(1, 0, false, ret);
+ else
+ return read_one_line_file("/proc/cmdline", ret);
+}
+
+static int proc_cmdline_extract_first(const char **p, char **ret_word, ProcCmdlineFlags flags) {
+ const char *q = *p;
+ int r;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ const char *c;
+
+ r = extract_first_word(&q, &word, NULL, EXTRACT_QUOTES|EXTRACT_RELAX);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ /* Filter out arguments that are intended only for the initrd */
+ c = startswith(word, "rd.");
+ if (c) {
+ if (!in_initrd())
+ continue;
+
+ if (FLAGS_SET(flags, PROC_CMDLINE_STRIP_RD_PREFIX)) {
+ r = free_and_strdup(&word, c);
+ if (r < 0)
+ return r;
+ }
+
+ } else if (FLAGS_SET(flags, PROC_CMDLINE_RD_STRICT) && in_initrd())
+ continue; /* And optionally filter out arguments that are intended only for the host */
+
+ *p = q;
+ *ret_word = TAKE_PTR(word);
+ return 1;
+ }
+
+ *p = q;
+ *ret_word = NULL;
+ return 0;
+}
+
+int proc_cmdline_parse_given(const char *line, proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags) {
+ const char *p;
+ int r;
+
+ assert(parse_item);
+
+ /* The PROC_CMDLINE_VALUE_OPTIONAL flag doesn't really make sense for proc_cmdline_parse(), let's make this
+ * clear. */
+ assert(!FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL));
+
+ p = line;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ char *value;
+
+ r = proc_cmdline_extract_first(&p, &word, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ value = strchr(word, '=');
+ if (value)
+ *(value++) = 0;
+
+ r = parse_item(word, value, data);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int proc_cmdline_parse(proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags) {
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ assert(parse_item);
+
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ return proc_cmdline_parse_given(line, parse_item, data, flags);
+}
+
+static bool relaxed_equal_char(char a, char b) {
+ return a == b ||
+ (a == '_' && b == '-') ||
+ (a == '-' && b == '_');
+}
+
+char *proc_cmdline_key_startswith(const char *s, const char *prefix) {
+ assert(s);
+ assert(prefix);
+
+ /* Much like startswith(), but considers "-" and "_" the same */
+
+ for (; *prefix != 0; s++, prefix++)
+ if (!relaxed_equal_char(*s, *prefix))
+ return NULL;
+
+ return (char*) s;
+}
+
+bool proc_cmdline_key_streq(const char *x, const char *y) {
+ assert(x);
+ assert(y);
+
+ /* Much like streq(), but considers "-" and "_" the same */
+
+ for (; *x != 0 || *y != 0; x++, y++)
+ if (!relaxed_equal_char(*x, *y))
+ return false;
+
+ return true;
+}
+
+int proc_cmdline_get_key(const char *key, ProcCmdlineFlags flags, char **ret_value) {
+ _cleanup_free_ char *line = NULL, *ret = NULL;
+ bool found = false;
+ const char *p;
+ int r;
+
+ /* Looks for a specific key on the kernel command line. Supports three modes:
+ *
+ * a) The "ret_value" parameter is used. In this case a parameter beginning with the "key" string followed by
+ * "=" is searched for, and the value following it is returned in "ret_value".
+ *
+ * b) as above, but the PROC_CMDLINE_VALUE_OPTIONAL flag is set. In this case if the key is found as a separate
+ * word (i.e. not followed by "=" but instead by whitespace or the end of the command line), then this is
+ * also accepted, and "value" is returned as NULL.
+ *
+ * c) The "ret_value" parameter is NULL. In this case a search for the exact "key" parameter is performed.
+ *
+ * In all three cases, > 0 is returned if the key is found, 0 if not. */
+
+ if (isempty(key))
+ return -EINVAL;
+
+ if (FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL) && !ret_value)
+ return -EINVAL;
+
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ p = line;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = proc_cmdline_extract_first(&p, &word, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (ret_value) {
+ const char *e;
+
+ e = proc_cmdline_key_startswith(word, key);
+ if (!e)
+ continue;
+
+ if (*e == '=') {
+ r = free_and_strdup(&ret, e+1);
+ if (r < 0)
+ return r;
+
+ found = true;
+
+ } else if (*e == 0 && FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL))
+ found = true;
+
+ } else {
+ if (streq(word, key)) {
+ found = true;
+ break; /* we found what we were looking for */
+ }
+ }
+ }
+
+ if (ret_value)
+ *ret_value = TAKE_PTR(ret);
+
+ return found;
+}
+
+int proc_cmdline_get_bool(const char *key, bool *ret) {
+ _cleanup_free_ char *v = NULL;
+ int r;
+
+ assert(ret);
+
+ r = proc_cmdline_get_key(key, PROC_CMDLINE_VALUE_OPTIONAL, &v);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *ret = false;
+ return 0;
+ }
+
+ if (v) { /* parameter passed */
+ r = parse_boolean(v);
+ if (r < 0)
+ return r;
+ *ret = r;
+ } else /* no parameter passed */
+ *ret = true;
+
+ return 1;
+}
+
+int proc_cmdline_get_key_many_internal(ProcCmdlineFlags flags, ...) {
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ va_list ap;
+ int r, ret = 0;
+
+ /* The PROC_CMDLINE_VALUE_OPTIONAL flag doesn't really make sense for proc_cmdline_get_key_many(), let's make
+ * this clear. */
+ assert(!FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL));
+
+ /* This call may clobber arguments on failure! */
+
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ p = line;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = proc_cmdline_extract_first(&p, &word, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ va_start(ap, flags);
+
+ for (;;) {
+ char **v;
+ const char *k, *e;
+
+ k = va_arg(ap, const char*);
+ if (!k)
+ break;
+
+ assert_se(v = va_arg(ap, char**));
+
+ e = proc_cmdline_key_startswith(word, k);
+ if (e && *e == '=') {
+ r = free_and_strdup(v, e + 1);
+ if (r < 0) {
+ va_end(ap);
+ return r;
+ }
+
+ ret++;
+ }
+ }
+
+ va_end(ap);
+ }
+
+ return ret;
+}
+
+int shall_restore_state(void) {
+ bool ret;
+ int r;
+
+ r = proc_cmdline_get_bool("systemd.restore_state", &ret);
+ if (r < 0)
+ return r;
+
+ return r > 0 ? ret : true;
+}
+
+static const char * const rlmap[] = {
+ "emergency", SPECIAL_EMERGENCY_TARGET,
+ "-b", SPECIAL_EMERGENCY_TARGET,
+ "rescue", SPECIAL_RESCUE_TARGET,
+ "single", SPECIAL_RESCUE_TARGET,
+ "-s", SPECIAL_RESCUE_TARGET,
+ "s", SPECIAL_RESCUE_TARGET,
+ "S", SPECIAL_RESCUE_TARGET,
+ "1", SPECIAL_RESCUE_TARGET,
+ "2", SPECIAL_MULTI_USER_TARGET,
+ "3", SPECIAL_MULTI_USER_TARGET,
+ "4", SPECIAL_MULTI_USER_TARGET,
+ "5", SPECIAL_GRAPHICAL_TARGET,
+ NULL
+};
+
+static const char * const rlmap_initrd[] = {
+ "emergency", SPECIAL_EMERGENCY_TARGET,
+ "rescue", SPECIAL_RESCUE_TARGET,
+ NULL
+};
+
+const char* runlevel_to_target(const char *word) {
+ const char * const *rlmap_ptr;
+ size_t i;
+
+ if (!word)
+ return NULL;
+
+ if (in_initrd()) {
+ word = startswith(word, "rd.");
+ if (!word)
+ return NULL;
+ }
+
+ rlmap_ptr = in_initrd() ? rlmap_initrd : rlmap;
+
+ for (i = 0; rlmap_ptr[i]; i += 2)
+ if (streq(word, rlmap_ptr[i]))
+ return rlmap_ptr[i+1];
+
+ return NULL;
+}
diff --git a/src/basic/proc-cmdline.h b/src/basic/proc-cmdline.h
new file mode 100644
index 0000000..ff04379
--- /dev/null
+++ b/src/basic/proc-cmdline.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "log.h"
+
+typedef enum ProcCmdlineFlags {
+ PROC_CMDLINE_STRIP_RD_PREFIX = 1 << 0,
+ PROC_CMDLINE_VALUE_OPTIONAL = 1 << 1,
+ PROC_CMDLINE_RD_STRICT = 1 << 2,
+} ProcCmdlineFlags;
+
+typedef int (*proc_cmdline_parse_t)(const char *key, const char *value, void *data);
+
+int proc_cmdline(char **ret);
+
+int proc_cmdline_parse_given(const char *line, proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags);
+int proc_cmdline_parse(const proc_cmdline_parse_t parse, void *userdata, ProcCmdlineFlags flags);
+
+int proc_cmdline_get_key(const char *parameter, ProcCmdlineFlags flags, char **value);
+int proc_cmdline_get_bool(const char *key, bool *ret);
+
+int proc_cmdline_get_key_many_internal(ProcCmdlineFlags flags, ...);
+#define proc_cmdline_get_key_many(flags, ...) proc_cmdline_get_key_many_internal(flags, __VA_ARGS__, NULL)
+
+char *proc_cmdline_key_startswith(const char *s, const char *prefix);
+bool proc_cmdline_key_streq(const char *x, const char *y);
+
+int shall_restore_state(void);
+const char* runlevel_to_target(const char *rl);
+
+/* A little helper call, to be used in proc_cmdline_parse_t callbacks */
+static inline bool proc_cmdline_value_missing(const char *key, const char *value) {
+ if (!value) {
+ log_warning("Missing argument for %s= kernel command line switch, ignoring.", key);
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/basic/process-util.c b/src/basic/process-util.c
new file mode 100644
index 0000000..78ce43b
--- /dev/null
+++ b/src/basic/process-util.c
@@ -0,0 +1,1565 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/oom.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/personality.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <syslog.h>
+#include <unistd.h>
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "ioprio.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int get_process_state(pid_t pid) {
+ const char *p;
+ char state;
+ int r;
+ _cleanup_free_ char *line = NULL;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "stat");
+
+ r = read_one_line_file(p, &line);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ p = strrchr(line, ')');
+ if (!p)
+ return -EIO;
+
+ p++;
+
+ if (sscanf(p, " %c", &state) != 1)
+ return -EIO;
+
+ return (unsigned char) state;
+}
+
+int get_process_comm(pid_t pid, char **ret) {
+ _cleanup_free_ char *escaped = NULL, *comm = NULL;
+ const char *p;
+ int r;
+
+ assert(ret);
+ assert(pid >= 0);
+
+ escaped = new(char, TASK_COMM_LEN);
+ if (!escaped)
+ return -ENOMEM;
+
+ p = procfs_file_alloca(pid, "comm");
+
+ r = read_one_line_file(p, &comm);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* Escape unprintable characters, just in case, but don't grow the string beyond the underlying size */
+ cellescape(escaped, TASK_COMM_LEN, comm);
+
+ *ret = TAKE_PTR(escaped);
+ return 0;
+}
+
+int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool space = false;
+ char *k;
+ _cleanup_free_ char *ans = NULL;
+ const char *p;
+ int c;
+
+ assert(line);
+ assert(pid >= 0);
+
+ /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
+ * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
+ * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
+ * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
+ * command line that resolves to the empty string will return the "comm" name of the process instead.
+ *
+ * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
+ * comm_fallback is false). Returns 0 and sets *line otherwise. */
+
+ p = procfs_file_alloca(pid, "cmdline");
+
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return -ESRCH;
+ return -errno;
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ if (max_length == 0) {
+ /* This is supposed to be a safety guard against runaway command lines. */
+ long l = sysconf(_SC_ARG_MAX);
+ assert(l > 0);
+ max_length = l;
+ }
+
+ if (max_length == 1) {
+
+ /* If there's only room for one byte, return the empty string */
+ ans = new0(char, 1);
+ if (!ans)
+ return -ENOMEM;
+
+ *line = TAKE_PTR(ans);
+ return 0;
+
+ } else {
+ bool dotdotdot = false;
+ size_t left;
+
+ ans = new(char, max_length);
+ if (!ans)
+ return -ENOMEM;
+
+ k = ans;
+ left = max_length;
+ while ((c = getc(f)) != EOF) {
+
+ if (isprint(c)) {
+
+ if (space) {
+ if (left <= 2) {
+ dotdotdot = true;
+ break;
+ }
+
+ *(k++) = ' ';
+ left--;
+ space = false;
+ }
+
+ if (left <= 1) {
+ dotdotdot = true;
+ break;
+ }
+
+ *(k++) = (char) c;
+ left--;
+ } else if (k > ans)
+ space = true;
+ }
+
+ if (dotdotdot) {
+ if (max_length <= 4) {
+ k = ans;
+ left = max_length;
+ } else {
+ k = ans + max_length - 4;
+ left = 4;
+
+ /* Eat up final spaces */
+ while (k > ans && isspace(k[-1])) {
+ k--;
+ left++;
+ }
+ }
+
+ strncpy(k, "...", left-1);
+ k[left-1] = 0;
+ } else
+ *k = 0;
+ }
+
+ /* Kernel threads have no argv[] */
+ if (isempty(ans)) {
+ _cleanup_free_ char *t = NULL;
+ int h;
+
+ ans = mfree(ans);
+
+ if (!comm_fallback)
+ return -ENOENT;
+
+ h = get_process_comm(pid, &t);
+ if (h < 0)
+ return h;
+
+ size_t l = strlen(t);
+
+ if (l + 3 <= max_length) {
+ ans = strjoin("[", t, "]");
+ if (!ans)
+ return -ENOMEM;
+
+ } else if (max_length <= 6) {
+ ans = new(char, max_length);
+ if (!ans)
+ return -ENOMEM;
+
+ memcpy(ans, "[...]", max_length-1);
+ ans[max_length-1] = 0;
+ } else {
+ t[max_length - 6] = 0;
+
+ /* Chop off final spaces */
+ delete_trailing_chars(t, WHITESPACE);
+
+ ans = strjoin("[", t, "...]");
+ if (!ans)
+ return -ENOMEM;
+ }
+
+ *line = TAKE_PTR(ans);
+ return 0;
+ }
+
+ k = realloc(ans, strlen(ans) + 1);
+ if (!k)
+ return -ENOMEM;
+
+ ans = NULL;
+ *line = k;
+
+ return 0;
+}
+
+int rename_process(const char name[]) {
+ static size_t mm_size = 0;
+ static char *mm = NULL;
+ bool truncated = false;
+ size_t l;
+
+ /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
+ * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
+ * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
+ * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
+ * truncated.
+ *
+ * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
+
+ if (isempty(name))
+ return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
+
+ if (!is_main_thread())
+ return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we
+ * cache things without locking, and we make assumptions that PR_SET_NAME sets the
+ * process name that isn't correct on any other threads */
+
+ l = strlen(name);
+
+ /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we
+ * can use PR_SET_NAME, which sets the thread name for the calling thread. */
+ if (prctl(PR_SET_NAME, name) < 0)
+ log_debug_errno(errno, "PR_SET_NAME failed: %m");
+ if (l >= TASK_COMM_LEN) /* Linux process names can be 15 chars at max */
+ truncated = true;
+
+ /* Second step, change glibc's ID of the process name. */
+ if (program_invocation_name) {
+ size_t k;
+
+ k = strlen(program_invocation_name);
+ strncpy(program_invocation_name, name, k);
+ if (l > k)
+ truncated = true;
+ }
+
+ /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
+ * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
+ * the end. This is the best option for changing /proc/self/cmdline. */
+
+ /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
+ * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
+ * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
+ * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
+ * mmap() is not. */
+ if (geteuid() != 0)
+ log_debug("Skipping PR_SET_MM, as we don't have privileges.");
+ else if (mm_size < l+1) {
+ size_t nn_size;
+ char *nn;
+
+ nn_size = PAGE_ALIGN(l+1);
+ nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (nn == MAP_FAILED) {
+ log_debug_errno(errno, "mmap() failed: %m");
+ goto use_saved_argv;
+ }
+
+ strncpy(nn, name, nn_size);
+
+ /* Now, let's tell the kernel about this new memory */
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
+ /* HACK: prctl() API is kind of dumb on this point. The existing end address may already be
+ * below the desired start address, in which case the kernel may have kicked this back due
+ * to a range-check failure (see linux/kernel/sys.c:validate_prctl_map() to see this in
+ * action). The proper solution would be to have a prctl() API that could set both start+end
+ * simultaneously, or at least let us query the existing address to anticipate this condition
+ * and respond accordingly. For now, we can only guess at the cause of this failure and try
+ * a workaround--which will briefly expand the arg space to something potentially huge before
+ * resizing it to what we want. */
+ log_debug_errno(errno, "PR_SET_MM_ARG_START failed, attempting PR_SET_MM_ARG_END hack: %m");
+
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) {
+ log_debug_errno(errno, "PR_SET_MM_ARG_END hack failed, proceeding without: %m");
+ (void) munmap(nn, nn_size);
+ goto use_saved_argv;
+ }
+
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
+ log_debug_errno(errno, "PR_SET_MM_ARG_START still failed, proceeding without: %m");
+ goto use_saved_argv;
+ }
+ } else {
+ /* And update the end pointer to the new end, too. If this fails, we don't really know what
+ * to do, it's pretty unlikely that we can rollback, hence we'll just accept the failure,
+ * and continue. */
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
+ log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
+ }
+
+ if (mm)
+ (void) munmap(mm, mm_size);
+
+ mm = nn;
+ mm_size = nn_size;
+ } else {
+ strncpy(mm, name, mm_size);
+
+ /* Update the end pointer, continuing regardless of any failure. */
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
+ log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
+ }
+
+use_saved_argv:
+ /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
+ * it still looks here */
+
+ if (saved_argc > 0) {
+ int i;
+
+ if (saved_argv[0]) {
+ size_t k;
+
+ k = strlen(saved_argv[0]);
+ strncpy(saved_argv[0], name, k);
+ if (l > k)
+ truncated = true;
+ }
+
+ for (i = 1; i < saved_argc; i++) {
+ if (!saved_argv[i])
+ break;
+
+ memzero(saved_argv[i], strlen(saved_argv[i]));
+ }
+ }
+
+ return !truncated;
+}
+
+int is_kernel_thread(pid_t pid) {
+ _cleanup_free_ char *line = NULL;
+ unsigned long long flags;
+ size_t l, i;
+ const char *p;
+ char *q;
+ int r;
+
+ if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
+ return 0;
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ p = procfs_file_alloca(pid, "stat");
+ r = read_one_line_file(p, &line);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* Skip past the comm field */
+ q = strrchr(line, ')');
+ if (!q)
+ return -EINVAL;
+ q++;
+
+ /* Skip 6 fields to reach the flags field */
+ for (i = 0; i < 6; i++) {
+ l = strspn(q, WHITESPACE);
+ if (l < 1)
+ return -EINVAL;
+ q += l;
+
+ l = strcspn(q, WHITESPACE);
+ if (l < 1)
+ return -EINVAL;
+ q += l;
+ }
+
+ /* Skip preceding whitespace */
+ l = strspn(q, WHITESPACE);
+ if (l < 1)
+ return -EINVAL;
+ q += l;
+
+ /* Truncate the rest */
+ l = strcspn(q, WHITESPACE);
+ if (l < 1)
+ return -EINVAL;
+ q[l] = 0;
+
+ r = safe_atollu(q, &flags);
+ if (r < 0)
+ return r;
+
+ return !!(flags & PF_KTHREAD);
+}
+
+int get_process_capeff(pid_t pid, char **capeff) {
+ const char *p;
+ int r;
+
+ assert(capeff);
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "status");
+
+ r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
+ if (r == -ENOENT)
+ return -ESRCH;
+
+ return r;
+}
+
+static int get_process_link_contents(const char *proc_file, char **name) {
+ int r;
+
+ assert(proc_file);
+ assert(name);
+
+ r = readlink_malloc(proc_file, name);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int get_process_exe(pid_t pid, char **name) {
+ const char *p;
+ char *d;
+ int r;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "exe");
+ r = get_process_link_contents(p, name);
+ if (r < 0)
+ return r;
+
+ d = endswith(*name, " (deleted)");
+ if (d)
+ *d = '\0';
+
+ return 0;
+}
+
+static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+ int r;
+
+ assert(field);
+ assert(uid);
+
+ if (pid < 0)
+ return -EINVAL;
+
+ p = procfs_file_alloca(pid, "status");
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return -ESRCH;
+ return -errno;
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+
+ if (startswith(l, field)) {
+ l += strlen(field);
+ l += strspn(l, WHITESPACE);
+
+ l[strcspn(l, WHITESPACE)] = 0;
+
+ return parse_uid(l, uid);
+ }
+ }
+
+ return -EIO;
+}
+
+int get_process_uid(pid_t pid, uid_t *uid) {
+
+ if (pid == 0 || pid == getpid_cached()) {
+ *uid = getuid();
+ return 0;
+ }
+
+ return get_process_id(pid, "Uid:", uid);
+}
+
+int get_process_gid(pid_t pid, gid_t *gid) {
+
+ if (pid == 0 || pid == getpid_cached()) {
+ *gid = getgid();
+ return 0;
+ }
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ return get_process_id(pid, "Gid:", gid);
+}
+
+int get_process_cwd(pid_t pid, char **cwd) {
+ const char *p;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "cwd");
+
+ return get_process_link_contents(p, cwd);
+}
+
+int get_process_root(pid_t pid, char **root) {
+ const char *p;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "root");
+
+ return get_process_link_contents(p, root);
+}
+
+#define ENVIRONMENT_BLOCK_MAX (5U*1024U*1024U)
+
+int get_process_environ(pid_t pid, char **env) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *outcome = NULL;
+ size_t allocated = 0, sz = 0;
+ const char *p;
+ int r;
+
+ assert(pid >= 0);
+ assert(env);
+
+ p = procfs_file_alloca(pid, "environ");
+
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return -ESRCH;
+ return -errno;
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ for (;;) {
+ char c;
+
+ if (sz >= ENVIRONMENT_BLOCK_MAX)
+ return -ENOBUFS;
+
+ if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
+ return -ENOMEM;
+
+ r = safe_fgetc(f, &c);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (c == '\0')
+ outcome[sz++] = '\n';
+ else
+ sz += cescape_char(c, outcome + sz);
+ }
+
+ outcome[sz] = '\0';
+ *env = TAKE_PTR(outcome);
+
+ return 0;
+}
+
+int get_process_ppid(pid_t pid, pid_t *_ppid) {
+ int r;
+ _cleanup_free_ char *line = NULL;
+ long unsigned ppid;
+ const char *p;
+
+ assert(pid >= 0);
+ assert(_ppid);
+
+ if (pid == 0 || pid == getpid_cached()) {
+ *_ppid = getppid();
+ return 0;
+ }
+
+ p = procfs_file_alloca(pid, "stat");
+ r = read_one_line_file(p, &line);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* Let's skip the pid and comm fields. The latter is enclosed
+ * in () but does not escape any () in its value, so let's
+ * skip over it manually */
+
+ p = strrchr(line, ')');
+ if (!p)
+ return -EIO;
+
+ p++;
+
+ if (sscanf(p, " "
+ "%*c " /* state */
+ "%lu ", /* ppid */
+ &ppid) != 1)
+ return -EIO;
+
+ if ((long unsigned) (pid_t) ppid != ppid)
+ return -ERANGE;
+
+ *_ppid = (pid_t) ppid;
+
+ return 0;
+}
+
+int wait_for_terminate(pid_t pid, siginfo_t *status) {
+ siginfo_t dummy;
+
+ assert(pid >= 1);
+
+ if (!status)
+ status = &dummy;
+
+ for (;;) {
+ zero(*status);
+
+ if (waitid(P_PID, pid, status, WEXITED) < 0) {
+
+ if (errno == EINTR)
+ continue;
+
+ return negative_errno();
+ }
+
+ return 0;
+ }
+}
+
+/*
+ * Return values:
+ * < 0 : wait_for_terminate() failed to get the state of the
+ * process, the process was terminated by a signal, or
+ * failed for an unknown reason.
+ * >=0 : The process terminated normally, and its exit code is
+ * returned.
+ *
+ * That is, success is indicated by a return value of zero, and an
+ * error is indicated by a non-zero value.
+ *
+ * A warning is emitted if the process terminates abnormally,
+ * and also if it returns non-zero unless check_exit_code is true.
+ */
+int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) {
+ _cleanup_free_ char *buffer = NULL;
+ siginfo_t status;
+ int r, prio;
+
+ assert(pid > 1);
+
+ if (!name) {
+ r = get_process_comm(pid, &buffer);
+ if (r < 0)
+ log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pid);
+ else
+ name = buffer;
+ }
+
+ prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG;
+
+ r = wait_for_terminate(pid, &status);
+ if (r < 0)
+ return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name));
+
+ if (status.si_code == CLD_EXITED) {
+ if (status.si_status != EXIT_SUCCESS)
+ log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG,
+ "%s failed with exit status %i.", strna(name), status.si_status);
+ else
+ log_debug("%s succeeded.", name);
+
+ return status.si_status;
+
+ } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
+
+ log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status));
+ return -EPROTO;
+ }
+
+ log_full(prio, "%s failed due to unknown reason.", strna(name));
+ return -EPROTO;
+}
+
+/*
+ * Return values:
+ *
+ * < 0 : wait_for_terminate_with_timeout() failed to get the state of the process, the process timed out, the process
+ * was terminated by a signal, or failed for an unknown reason.
+ *
+ * >=0 : The process terminated normally with no failures.
+ *
+ * Success is indicated by a return value of zero, a timeout is indicated by ETIMEDOUT, and all other child failure
+ * states are indicated by error is indicated by a non-zero value.
+ *
+ * This call assumes SIGCHLD has been blocked already, in particular before the child to wait for has been forked off
+ * to remain entirely race-free.
+ */
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
+ sigset_t mask;
+ int r;
+ usec_t until;
+
+ assert_se(sigemptyset(&mask) == 0);
+ assert_se(sigaddset(&mask, SIGCHLD) == 0);
+
+ /* Drop into a sigtimewait-based timeout. Waiting for the
+ * pid to exit. */
+ until = now(CLOCK_MONOTONIC) + timeout;
+ for (;;) {
+ usec_t n;
+ siginfo_t status = {};
+ struct timespec ts;
+
+ n = now(CLOCK_MONOTONIC);
+ if (n >= until)
+ break;
+
+ r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
+ /* Assuming we woke due to the child exiting. */
+ if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
+ if (status.si_pid == pid) {
+ /* This is the correct child.*/
+ if (status.si_code == CLD_EXITED)
+ return (status.si_status == 0) ? 0 : -EPROTO;
+ else
+ return -EPROTO;
+ }
+ }
+ /* Not the child, check for errors and proceed appropriately */
+ if (r < 0) {
+ switch (r) {
+ case -EAGAIN:
+ /* Timed out, child is likely hung. */
+ return -ETIMEDOUT;
+ case -EINTR:
+ /* Received a different signal and should retry */
+ continue;
+ default:
+ /* Return any unexpected errors */
+ return r;
+ }
+ }
+ }
+
+ return -EPROTO;
+}
+
+void sigkill_wait(pid_t pid) {
+ assert(pid > 1);
+
+ if (kill(pid, SIGKILL) >= 0)
+ (void) wait_for_terminate(pid, NULL);
+}
+
+void sigkill_waitp(pid_t *pid) {
+ PROTECT_ERRNO;
+
+ if (!pid)
+ return;
+ if (*pid <= 1)
+ return;
+
+ sigkill_wait(*pid);
+}
+
+void sigterm_wait(pid_t pid) {
+ assert(pid > 1);
+
+ if (kill_and_sigcont(pid, SIGTERM) >= 0)
+ (void) wait_for_terminate(pid, NULL);
+}
+
+int kill_and_sigcont(pid_t pid, int sig) {
+ int r;
+
+ r = kill(pid, sig) < 0 ? -errno : 0;
+
+ /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
+ * affected by a process being suspended anyway. */
+ if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
+ (void) kill(pid, SIGCONT);
+
+ return r;
+}
+
+int getenv_for_pid(pid_t pid, const char *field, char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char *value = NULL;
+ const char *path;
+ size_t l, sum = 0;
+ int r;
+
+ assert(pid >= 0);
+ assert(field);
+ assert(ret);
+
+ if (pid == 0 || pid == getpid_cached()) {
+ const char *e;
+
+ e = getenv(field);
+ if (!e) {
+ *ret = NULL;
+ return 0;
+ }
+
+ value = strdup(e);
+ if (!value)
+ return -ENOMEM;
+
+ *ret = value;
+ return 1;
+ }
+
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ path = procfs_file_alloca(pid, "environ");
+
+ f = fopen(path, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return -ESRCH;
+
+ return -errno;
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ l = strlen(field);
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ if (sum > ENVIRONMENT_BLOCK_MAX) /* Give up searching eventually */
+ return -ENOBUFS;
+
+ r = read_nul_string(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0) /* EOF */
+ break;
+
+ sum += r;
+
+ if (strneq(line, field, l) && line[l] == '=') {
+ value = strdup(line + l + 1);
+ if (!value)
+ return -ENOMEM;
+
+ *ret = value;
+ return 1;
+ }
+ }
+
+ *ret = NULL;
+ return 0;
+}
+
+bool pid_is_unwaited(pid_t pid) {
+ /* Checks whether a PID is still valid at all, including a zombie */
+
+ if (pid < 0)
+ return false;
+
+ if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
+ return true;
+
+ if (pid == getpid_cached())
+ return true;
+
+ if (kill(pid, 0) >= 0)
+ return true;
+
+ return errno != ESRCH;
+}
+
+bool pid_is_alive(pid_t pid) {
+ int r;
+
+ /* Checks whether a PID is still valid and not a zombie */
+
+ if (pid < 0)
+ return false;
+
+ if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
+ return true;
+
+ if (pid == getpid_cached())
+ return true;
+
+ r = get_process_state(pid);
+ if (IN_SET(r, -ESRCH, 'Z'))
+ return false;
+
+ return true;
+}
+
+int pid_from_same_root_fs(pid_t pid) {
+ const char *root;
+
+ if (pid < 0)
+ return false;
+
+ if (pid == 0 || pid == getpid_cached())
+ return true;
+
+ root = procfs_file_alloca(pid, "root");
+
+ return files_same(root, "/proc/1/root", 0);
+}
+
+bool is_main_thread(void) {
+ static thread_local int cached = 0;
+
+ if (_unlikely_(cached == 0))
+ cached = getpid_cached() == gettid() ? 1 : -1;
+
+ return cached > 0;
+}
+
+_noreturn_ void freeze(void) {
+
+ log_close();
+
+ /* Make sure nobody waits for us on a socket anymore */
+ close_all_fds(NULL, 0);
+
+ sync();
+
+ /* Let's not freeze right away, but keep reaping zombies. */
+ for (;;) {
+ int r;
+ siginfo_t si = {};
+
+ r = waitid(P_ALL, 0, &si, WEXITED);
+ if (r < 0 && errno != EINTR)
+ break;
+ }
+
+ /* waitid() failed with an unexpected error, things are really borked. Freeze now! */
+ for (;;)
+ pause();
+}
+
+bool oom_score_adjust_is_valid(int oa) {
+ return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
+}
+
+unsigned long personality_from_string(const char *p) {
+ int architecture;
+
+ if (!p)
+ return PERSONALITY_INVALID;
+
+ /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
+ * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
+ * the same register size. */
+
+ architecture = architecture_from_string(p);
+ if (architecture < 0)
+ return PERSONALITY_INVALID;
+
+ if (architecture == native_architecture())
+ return PER_LINUX;
+#ifdef SECONDARY_ARCHITECTURE
+ if (architecture == SECONDARY_ARCHITECTURE)
+ return PER_LINUX32;
+#endif
+
+ return PERSONALITY_INVALID;
+}
+
+const char* personality_to_string(unsigned long p) {
+ int architecture = _ARCHITECTURE_INVALID;
+
+ if (p == PER_LINUX)
+ architecture = native_architecture();
+#ifdef SECONDARY_ARCHITECTURE
+ else if (p == PER_LINUX32)
+ architecture = SECONDARY_ARCHITECTURE;
+#endif
+
+ if (architecture < 0)
+ return NULL;
+
+ return architecture_to_string(architecture);
+}
+
+int safe_personality(unsigned long p) {
+ int ret;
+
+ /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
+ * and in others as negative return value containing an errno-like value. Let's work around this: this is a
+ * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
+ * the return value indicating the same issue, so that we are definitely on the safe side.
+ *
+ * See https://github.com/systemd/systemd/issues/6737 */
+
+ errno = 0;
+ ret = personality(p);
+ if (ret < 0) {
+ if (errno != 0)
+ return -errno;
+
+ errno = -ret;
+ }
+
+ return ret;
+}
+
+int opinionated_personality(unsigned long *ret) {
+ int current;
+
+ /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
+ * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
+ * two most relevant personalities: PER_LINUX and PER_LINUX32. */
+
+ current = safe_personality(PERSONALITY_INVALID);
+ if (current < 0)
+ return current;
+
+ if (((unsigned long) current & 0xffff) == PER_LINUX32)
+ *ret = PER_LINUX32;
+ else
+ *ret = PER_LINUX;
+
+ return 0;
+}
+
+void valgrind_summary_hack(void) {
+#if HAVE_VALGRIND_VALGRIND_H
+ if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
+ pid_t pid;
+ pid = raw_clone(SIGCHLD);
+ if (pid < 0)
+ log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
+ else if (pid == 0)
+ exit(EXIT_SUCCESS);
+ else {
+ log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
+ (void) wait_for_terminate(pid, NULL);
+ }
+ }
+#endif
+}
+
+int pid_compare_func(const pid_t *a, const pid_t *b) {
+ /* Suitable for usage in qsort() */
+ return CMP(*a, *b);
+}
+
+int ioprio_parse_priority(const char *s, int *ret) {
+ int i, r;
+
+ assert(s);
+ assert(ret);
+
+ r = safe_atoi(s, &i);
+ if (r < 0)
+ return r;
+
+ if (!ioprio_priority_is_valid(i))
+ return -EINVAL;
+
+ *ret = i;
+ return 0;
+}
+
+/* The cached PID, possible values:
+ *
+ * == UNSET [0] → cache not initialized yet
+ * == BUSY [-1] → some thread is initializing it at the moment
+ * any other → the cached PID
+ */
+
+#define CACHED_PID_UNSET ((pid_t) 0)
+#define CACHED_PID_BUSY ((pid_t) -1)
+
+static pid_t cached_pid = CACHED_PID_UNSET;
+
+void reset_cached_pid(void) {
+ /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
+ cached_pid = CACHED_PID_UNSET;
+}
+
+/* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
+ * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
+ * libpthread, as it is part of glibc anyway. */
+extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void *dso_handle);
+extern void* __dso_handle _weak_;
+
+pid_t getpid_cached(void) {
+ static bool installed = false;
+ pid_t current_value;
+
+ /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
+ * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
+ * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
+ * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
+ *
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
+ * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
+ */
+
+ current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
+
+ switch (current_value) {
+
+ case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
+ pid_t new_pid;
+
+ new_pid = raw_getpid();
+
+ if (!installed) {
+ /* __register_atfork() either returns 0 or -ENOMEM, in its glibc implementation. Since it's
+ * only half-documented (glibc doesn't document it but LSB does — though only superficially)
+ * we'll check for errors only in the most generic fashion possible. */
+
+ if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
+ /* OOM? Let's try again later */
+ cached_pid = CACHED_PID_UNSET;
+ return new_pid;
+ }
+
+ installed = true;
+ }
+
+ cached_pid = new_pid;
+ return new_pid;
+ }
+
+ case CACHED_PID_BUSY: /* Somebody else is currently initializing */
+ return raw_getpid();
+
+ default: /* Properly initialized */
+ return current_value;
+ }
+}
+
+int must_be_root(void) {
+
+ if (geteuid() == 0)
+ return 0;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be root.");
+}
+
+int safe_fork_full(
+ const char *name,
+ const int except_fds[],
+ size_t n_except_fds,
+ ForkFlags flags,
+ pid_t *ret_pid) {
+
+ pid_t original_pid, pid;
+ sigset_t saved_ss, ss;
+ bool block_signals = false;
+ int prio, r;
+
+ /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
+ * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */
+
+ prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG;
+
+ original_pid = getpid_cached();
+
+ if (flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG)) {
+ /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can
+ * be sure that SIGTERMs are not lost we might send to the child. */
+
+ assert_se(sigfillset(&ss) >= 0);
+ block_signals = true;
+
+ } else if (flags & FORK_WAIT) {
+ /* Let's block SIGCHLD at least, so that we can safely watch for the child process */
+
+ assert_se(sigemptyset(&ss) >= 0);
+ assert_se(sigaddset(&ss, SIGCHLD) >= 0);
+ block_signals = true;
+ }
+
+ if (block_signals)
+ if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
+ return log_full_errno(prio, errno, "Failed to set signal mask: %m");
+
+ if (flags & FORK_NEW_MOUNTNS)
+ pid = raw_clone(SIGCHLD|CLONE_NEWNS);
+ else
+ pid = fork();
+ if (pid < 0) {
+ r = -errno;
+
+ if (block_signals) /* undo what we did above */
+ (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
+
+ return log_full_errno(prio, r, "Failed to fork: %m");
+ }
+ if (pid > 0) {
+ /* We are in the parent process */
+
+ log_debug("Successfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
+
+ if (flags & FORK_WAIT) {
+ r = wait_for_terminate_and_check(name, pid, (flags & FORK_LOG ? WAIT_LOG : 0));
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */
+ return -EPROTO;
+ }
+
+ if (block_signals) /* undo what we did above */
+ (void) sigprocmask(SIG_SETMASK, &saved_ss, NULL);
+
+ if (ret_pid)
+ *ret_pid = pid;
+
+ return 1;
+ }
+
+ /* We are in the child process */
+
+ if (flags & FORK_REOPEN_LOG) {
+ /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
+ log_close();
+ log_set_open_when_needed(true);
+ }
+
+ if (name) {
+ r = rename_process(name);
+ if (r < 0)
+ log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG,
+ r, "Failed to rename process, ignoring: %m");
+ }
+
+ if (flags & FORK_DEATHSIG)
+ if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) {
+ log_full_errno(prio, errno, "Failed to set death signal: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (flags & FORK_RESET_SIGNALS) {
+ r = reset_all_signal_handlers();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to reset signal handlers: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
+ r = reset_signal_mask();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to reset signal mask: %m");
+ _exit(EXIT_FAILURE);
+ }
+ } else if (block_signals) { /* undo what we did above */
+ if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
+ log_full_errno(prio, errno, "Failed to restore signal mask: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (flags & FORK_DEATHSIG) {
+ pid_t ppid;
+ /* Let's see if the parent PID is still the one we started from? If not, then the parent
+ * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
+
+ ppid = getppid();
+ if (ppid == 0)
+ /* Parent is in a differn't PID namespace. */;
+ else if (ppid != original_pid) {
+ log_debug("Parent died early, raising SIGTERM.");
+ (void) raise(SIGTERM);
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) {
+
+ /* Optionally, make sure we never propagate mounts to the host. */
+
+ if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
+ log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (flags & FORK_CLOSE_ALL_FDS) {
+ /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
+ log_close();
+
+ r = close_all_fds(except_fds, n_except_fds);
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to close all file descriptors: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ /* When we were asked to reopen the logs, do so again now */
+ if (flags & FORK_REOPEN_LOG) {
+ log_open();
+ log_set_open_when_needed(false);
+ }
+
+ if (flags & FORK_NULL_STDIO) {
+ r = make_null_stdio();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (flags & FORK_RLIMIT_NOFILE_SAFE) {
+ r = rlimit_nofile_safe();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (ret_pid)
+ *ret_pid = getpid_cached();
+
+ return 0;
+}
+
+int namespace_fork(
+ const char *outer_name,
+ const char *inner_name,
+ const int except_fds[],
+ size_t n_except_fds,
+ ForkFlags flags,
+ int pidns_fd,
+ int mntns_fd,
+ int netns_fd,
+ int userns_fd,
+ int root_fd,
+ pid_t *ret_pid) {
+
+ int r;
+
+ /* This is much like safe_fork(), but forks twice, and joins the specified namespaces in the middle
+ * process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that
+ * /proc/self/fd works correctly. */
+
+ r = safe_fork_full(outer_name, except_fds, n_except_fds, (flags|FORK_DEATHSIG) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ pid_t pid;
+
+ /* Child */
+
+ r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd);
+ if (r < 0) {
+ log_full_errno(FLAGS_SET(flags, FORK_LOG) ? LOG_ERR : LOG_DEBUG, r, "Failed to join namespace: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* We mask a few flags here that either make no sense for the grandchild, or that we don't have to do again */
+ r = safe_fork_full(inner_name, except_fds, n_except_fds, flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_NULL_STDIO), &pid);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+ if (r == 0) {
+ /* Child */
+ if (ret_pid)
+ *ret_pid = pid;
+ return 0;
+ }
+
+ r = wait_for_terminate_and_check(inner_name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(r);
+ }
+
+ return 1;
+}
+
+int fork_agent(const char *name, const int except[], size_t n_except, pid_t *ret_pid, const char *path, ...) {
+ bool stdout_is_tty, stderr_is_tty;
+ size_t n, i;
+ va_list ap;
+ char **l;
+ int r;
+
+ assert(path);
+
+ /* Spawns a temporary TTY agent, making sure it goes away when we go away */
+
+ r = safe_fork_full(name, except, n_except, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS, ret_pid);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ /* In the child: */
+
+ stdout_is_tty = isatty(STDOUT_FILENO);
+ stderr_is_tty = isatty(STDERR_FILENO);
+
+ if (!stdout_is_tty || !stderr_is_tty) {
+ int fd;
+
+ /* Detach from stdout/stderr. and reopen
+ * /dev/tty for them. This is important to
+ * ensure that when systemctl is started via
+ * popen() or a similar call that expects to
+ * read EOF we actually do generate EOF and
+ * not delay this indefinitely by because we
+ * keep an unused copy of stdin around. */
+ fd = open("/dev/tty", O_WRONLY);
+ if (fd < 0) {
+ log_error_errno(errno, "Failed to open /dev/tty: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
+ log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
+ log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close_above_stdio(fd);
+ }
+
+ (void) rlimit_nofile_safe();
+
+ /* Count arguments */
+ va_start(ap, path);
+ for (n = 0; va_arg(ap, char*); n++)
+ ;
+ va_end(ap);
+
+ /* Allocate strv */
+ l = newa(char*, n + 1);
+
+ /* Fill in arguments */
+ va_start(ap, path);
+ for (i = 0; i <= n; i++)
+ l[i] = va_arg(ap, char*);
+ va_end(ap);
+
+ execv(path, l);
+ _exit(EXIT_FAILURE);
+}
+
+int set_oom_score_adjust(int value) {
+ char t[DECIMAL_STR_MAX(int)];
+
+ sprintf(t, "%i", value);
+
+ return write_string_file("/proc/self/oom_score_adj", t,
+ WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);
+}
+
+static const char *const ioprio_class_table[] = {
+ [IOPRIO_CLASS_NONE] = "none",
+ [IOPRIO_CLASS_RT] = "realtime",
+ [IOPRIO_CLASS_BE] = "best-effort",
+ [IOPRIO_CLASS_IDLE] = "idle"
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, IOPRIO_N_CLASSES);
+
+static const char *const sigchld_code_table[] = {
+ [CLD_EXITED] = "exited",
+ [CLD_KILLED] = "killed",
+ [CLD_DUMPED] = "dumped",
+ [CLD_TRAPPED] = "trapped",
+ [CLD_STOPPED] = "stopped",
+ [CLD_CONTINUED] = "continued",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
+
+static const char* const sched_policy_table[] = {
+ [SCHED_OTHER] = "other",
+ [SCHED_BATCH] = "batch",
+ [SCHED_IDLE] = "idle",
+ [SCHED_FIFO] = "fifo",
+ [SCHED_RR] = "rr"
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);
diff --git a/src/basic/process-util.h b/src/basic/process-util.h
new file mode 100644
index 0000000..c85ea30
--- /dev/null
+++ b/src/basic/process-util.h
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <alloca.h>
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+
+#include "format-util.h"
+#include "ioprio.h"
+#include "macro.h"
+#include "time-util.h"
+
+#define procfs_file_alloca(pid, field) \
+ ({ \
+ pid_t _pid_ = (pid); \
+ const char *_r_; \
+ if (_pid_ == 0) { \
+ _r_ = ("/proc/self/" field); \
+ } else { \
+ _r_ = newa(char, STRLEN("/proc/") + DECIMAL_STR_MAX(pid_t) + 1 + sizeof(field)); \
+ sprintf((char*) _r_, "/proc/"PID_FMT"/" field, _pid_); \
+ } \
+ _r_; \
+ })
+
+int get_process_state(pid_t pid);
+int get_process_comm(pid_t pid, char **name);
+int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line);
+int get_process_exe(pid_t pid, char **name);
+int get_process_uid(pid_t pid, uid_t *uid);
+int get_process_gid(pid_t pid, gid_t *gid);
+int get_process_capeff(pid_t pid, char **capeff);
+int get_process_cwd(pid_t pid, char **cwd);
+int get_process_root(pid_t pid, char **root);
+int get_process_environ(pid_t pid, char **environ);
+int get_process_ppid(pid_t pid, pid_t *ppid);
+
+int wait_for_terminate(pid_t pid, siginfo_t *status);
+
+typedef enum WaitFlags {
+ WAIT_LOG_ABNORMAL = 1 << 0,
+ WAIT_LOG_NON_ZERO_EXIT_STATUS = 1 << 1,
+
+ /* A shortcut for requesting the most complete logging */
+ WAIT_LOG = WAIT_LOG_ABNORMAL|WAIT_LOG_NON_ZERO_EXIT_STATUS,
+} WaitFlags;
+
+int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags);
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout);
+
+void sigkill_wait(pid_t pid);
+void sigkill_waitp(pid_t *pid);
+void sigterm_wait(pid_t pid);
+
+int kill_and_sigcont(pid_t pid, int sig);
+
+int rename_process(const char name[]);
+int is_kernel_thread(pid_t pid);
+
+int getenv_for_pid(pid_t pid, const char *field, char **_value);
+
+bool pid_is_alive(pid_t pid);
+bool pid_is_unwaited(pid_t pid);
+int pid_from_same_root_fs(pid_t pid);
+
+bool is_main_thread(void);
+
+_noreturn_ void freeze(void);
+
+bool oom_score_adjust_is_valid(int oa);
+
+#ifndef PERSONALITY_INVALID
+/* personality(7) documents that 0xffffffffUL is used for querying the
+ * current personality, hence let's use that here as error
+ * indicator. */
+#define PERSONALITY_INVALID 0xffffffffLU
+#endif
+
+unsigned long personality_from_string(const char *p);
+const char *personality_to_string(unsigned long);
+
+int safe_personality(unsigned long p);
+int opinionated_personality(unsigned long *ret);
+
+int ioprio_class_to_string_alloc(int i, char **s);
+int ioprio_class_from_string(const char *s);
+
+const char *sigchld_code_to_string(int i) _const_;
+int sigchld_code_from_string(const char *s) _pure_;
+
+int sched_policy_to_string_alloc(int i, char **s);
+int sched_policy_from_string(const char *s);
+
+static inline pid_t PTR_TO_PID(const void *p) {
+ return (pid_t) ((uintptr_t) p);
+}
+
+static inline void* PID_TO_PTR(pid_t pid) {
+ return (void*) ((uintptr_t) pid);
+}
+
+void valgrind_summary_hack(void);
+
+int pid_compare_func(const pid_t *a, const pid_t *b);
+
+static inline bool nice_is_valid(int n) {
+ return n >= PRIO_MIN && n < PRIO_MAX;
+}
+
+static inline bool sched_policy_is_valid(int i) {
+ return IN_SET(i, SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, SCHED_FIFO, SCHED_RR);
+}
+
+static inline bool sched_priority_is_valid(int i) {
+ return i >= 0 && i <= sched_get_priority_max(SCHED_RR);
+}
+
+static inline bool ioprio_class_is_valid(int i) {
+ return IN_SET(i, IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE);
+}
+
+static inline bool ioprio_priority_is_valid(int i) {
+ return i >= 0 && i < IOPRIO_BE_NR;
+}
+
+static inline bool pid_is_valid(pid_t p) {
+ return p > 0;
+}
+
+int ioprio_parse_priority(const char *s, int *ret);
+
+pid_t getpid_cached(void);
+void reset_cached_pid(void);
+
+int must_be_root(void);
+
+typedef enum ForkFlags {
+ FORK_RESET_SIGNALS = 1 << 0, /* Reset all signal handlers and signal mask */
+ FORK_CLOSE_ALL_FDS = 1 << 1, /* Close all open file descriptors in the child, except for 0,1,2 */
+ FORK_DEATHSIG = 1 << 2, /* Set PR_DEATHSIG in the child */
+ FORK_NULL_STDIO = 1 << 3, /* Connect 0,1,2 to /dev/null */
+ FORK_REOPEN_LOG = 1 << 4, /* Reopen log connection */
+ FORK_LOG = 1 << 5, /* Log above LOG_DEBUG log level about failures */
+ FORK_WAIT = 1 << 6, /* Wait until child exited */
+ FORK_NEW_MOUNTNS = 1 << 7, /* Run child in its own mount namespace */
+ FORK_MOUNTNS_SLAVE = 1 << 8, /* Make child's mount namespace MS_SLAVE */
+ FORK_RLIMIT_NOFILE_SAFE = 1 << 9, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */
+} ForkFlags;
+
+int safe_fork_full(const char *name, const int except_fds[], size_t n_except_fds, ForkFlags flags, pid_t *ret_pid);
+
+static inline int safe_fork(const char *name, ForkFlags flags, pid_t *ret_pid) {
+ return safe_fork_full(name, NULL, 0, flags, ret_pid);
+}
+
+int namespace_fork(const char *outer_name, const char *inner_name, const int except_fds[], size_t n_except_fds, ForkFlags flags, int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd, pid_t *ret_pid);
+
+int fork_agent(const char *name, const int except[], size_t n_except, pid_t *pid, const char *path, ...) _sentinel_;
+
+int set_oom_score_adjust(int value);
+
+#if SIZEOF_PID_T == 4
+/* The highest possibly (theoretic) pid_t value on this architecture. */
+#define PID_T_MAX ((pid_t) INT32_MAX)
+/* The maximum number of concurrent processes Linux allows on this architecture, as well as the highest valid PID value
+ * the kernel will potentially assign. This reflects a value compiled into the kernel (PID_MAX_LIMIT), and sets the
+ * upper boundary on what may be written to the /proc/sys/kernel/pid_max sysctl (but do note that the sysctl is off by
+ * 1, since PID 0 can never exist and there can hence only be one process less than the limit would suggest). Since
+ * these values are documented in proc(5) we feel quite confident that they are stable enough for the near future at
+ * least to define them here too. */
+#define TASKS_MAX 4194303U
+#elif SIZEOF_PID_T == 2
+#define PID_T_MAX ((pid_t) INT16_MAX)
+#define TASKS_MAX 32767U
+#else
+#error "Unknown pid_t size"
+#endif
+
+assert_cc(TASKS_MAX <= (unsigned long) PID_T_MAX)
+
+/* Like TAKE_PTR() but for child PIDs, resetting them to 0 */
+#define TAKE_PID(pid) \
+ ({ \
+ pid_t _pid_ = (pid); \
+ (pid) = 0; \
+ _pid_; \
+ })
diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c
new file mode 100644
index 0000000..7aaf95b
--- /dev/null
+++ b/src/basic/procfs-util.c
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+int procfs_tasks_get_limit(uint64_t *ret) {
+ _cleanup_free_ char *value = NULL;
+ uint64_t pid_max, threads_max;
+ int r;
+
+ assert(ret);
+
+ /* So there are two sysctl files that control the system limit of processes:
+ *
+ * 1. kernel.threads-max: this is probably the sysctl that makes more sense, as it directly puts a limit on
+ * concurrent tasks.
+ *
+ * 2. kernel.pid_max: this limits the numeric range PIDs can take, and thus indirectly also limits the number
+ * of concurrent threads. AFAICS it's primarily a compatibility concept: some crappy old code used a signed
+ * 16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond INT16_MAX by
+ * default.
+ *
+ * By default #2 is set to much lower values than #1, hence the limit people come into contact with first, as
+ * it's the lowest boundary they need to bump when they want higher number of processes.
+ *
+ * Also note the weird definition of #2: PIDs assigned will be kept below this value, which means the number of
+ * tasks that can be created is one lower, as PID 0 is not a valid process ID. */
+
+ r = read_one_line_file("/proc/sys/kernel/pid_max", &value);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(value, &pid_max);
+ if (r < 0)
+ return r;
+
+ value = mfree(value);
+ r = read_one_line_file("/proc/sys/kernel/threads-max", &value);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(value, &threads_max);
+ if (r < 0)
+ return r;
+
+ /* Subtract one from pid_max, since PID 0 is not a valid PID */
+ *ret = MIN(pid_max-1, threads_max);
+ return 0;
+}
+
+int procfs_tasks_set_limit(uint64_t limit) {
+ char buffer[DECIMAL_STR_MAX(uint64_t)+1];
+ _cleanup_free_ char *value = NULL;
+ uint64_t pid_max;
+ int r;
+
+ if (limit == 0) /* This makes no sense, we are userspace and hence count as tasks too, and we want to live,
+ * hence the limit conceptually has to be above 0. Also, most likely if anyone asks for a zero
+ * limit he/she probably means "no limit", hence let's better refuse this to avoid
+ * confusion. */
+ return -EINVAL;
+
+ /* The Linux kernel doesn't allow this value to go below 20, hence don't allow this either, higher values than
+ * TASKS_MAX are not accepted by the pid_max sysctl. We'll treat anything this high as "unbounded" and hence
+ * set it to the maximum. */
+ limit = CLAMP(limit, 20U, TASKS_MAX);
+
+ r = read_one_line_file("/proc/sys/kernel/pid_max", &value);
+ if (r < 0)
+ return r;
+ r = safe_atou64(value, &pid_max);
+ if (r < 0)
+ return r;
+
+ /* As pid_max is about the numeric pid_t range we'll bump it if necessary, but only ever increase it, never
+ * decrease it, as threads-max is the much more relevant sysctl. */
+ if (limit > pid_max-1) {
+ sprintf(buffer, "%" PRIu64, limit+1); /* Add one, since PID 0 is not a valid PID */
+ r = write_string_file("/proc/sys/kernel/pid_max", buffer, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+ }
+
+ sprintf(buffer, "%" PRIu64, limit);
+ r = write_string_file("/proc/sys/kernel/threads-max", buffer, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ uint64_t threads_max;
+
+ /* Hmm, we couldn't write this? If so, maybe it was already set properly? In that case let's not
+ * generate an error */
+
+ value = mfree(value);
+ if (read_one_line_file("/proc/sys/kernel/threads-max", &value) < 0)
+ return r; /* return original error */
+
+ if (safe_atou64(value, &threads_max) < 0)
+ return r; /* return original error */
+
+ if (MIN(pid_max-1, threads_max) != limit)
+ return r; /* return original error */
+
+ /* Yay! Value set already matches what we were trying to set, hence consider this a success. */
+ }
+
+ return 0;
+}
+
+int procfs_tasks_get_current(uint64_t *ret) {
+ _cleanup_free_ char *value = NULL;
+ const char *p, *nr;
+ size_t n;
+ int r;
+
+ assert(ret);
+
+ r = read_one_line_file("/proc/loadavg", &value);
+ if (r < 0)
+ return r;
+
+ /* Look for the second part of the fourth field, which is separated by a slash from the first part. None of the
+ * earlier fields use a slash, hence let's use this to find the right spot. */
+ p = strchr(value, '/');
+ if (!p)
+ return -EINVAL;
+
+ p++;
+ n = strspn(p, DIGITS);
+ nr = strndupa(p, n);
+
+ return safe_atou64(nr, ret);
+}
+
+static uint64_t calc_gcd64(uint64_t a, uint64_t b) {
+
+ while (b > 0) {
+ uint64_t t;
+
+ t = a % b;
+
+ a = b;
+ b = t;
+ }
+
+ return a;
+}
+
+int procfs_cpu_get_usage(nsec_t *ret) {
+ _cleanup_free_ char *first_line = NULL;
+ unsigned long user_ticks, nice_ticks, system_ticks, irq_ticks, softirq_ticks,
+ guest_ticks = 0, guest_nice_ticks = 0;
+ long ticks_per_second;
+ uint64_t sum, gcd, a, b;
+ const char *p;
+ int r;
+
+ assert(ret);
+
+ r = read_one_line_file("/proc/stat", &first_line);
+ if (r < 0)
+ return r;
+
+ p = first_word(first_line, "cpu");
+ if (!p)
+ return -EINVAL;
+
+ if (sscanf(p, "%lu %lu %lu %*u %*u %lu %lu %*u %lu %lu",
+ &user_ticks,
+ &nice_ticks,
+ &system_ticks,
+ &irq_ticks,
+ &softirq_ticks,
+ &guest_ticks,
+ &guest_nice_ticks) < 5) /* we only insist on the first five fields */
+ return -EINVAL;
+
+ ticks_per_second = sysconf(_SC_CLK_TCK);
+ if (ticks_per_second < 0)
+ return -errno;
+ assert(ticks_per_second > 0);
+
+ sum = (uint64_t) user_ticks + (uint64_t) nice_ticks + (uint64_t) system_ticks +
+ (uint64_t) irq_ticks + (uint64_t) softirq_ticks +
+ (uint64_t) guest_ticks + (uint64_t) guest_nice_ticks;
+
+ /* Let's reduce this fraction before we apply it to avoid overflows when converting this to µsec */
+ gcd = calc_gcd64(NSEC_PER_SEC, ticks_per_second);
+
+ a = (uint64_t) NSEC_PER_SEC / gcd;
+ b = (uint64_t) ticks_per_second / gcd;
+
+ *ret = DIV_ROUND_UP((nsec_t) sum * (nsec_t) a, (nsec_t) b);
+ return 0;
+}
+
+int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used) {
+ uint64_t mem_total = UINT64_MAX, mem_free = UINT64_MAX;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ f = fopen("/proc/meminfo", "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uint64_t *v;
+ char *p, *e;
+ size_t n;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL; /* EOF: Couldn't find one or both fields? */
+
+ p = first_word(line, "MemTotal:");
+ if (p)
+ v = &mem_total;
+ else {
+ p = first_word(line, "MemFree:");
+ if (p)
+ v = &mem_free;
+ else
+ continue;
+ }
+
+ /* Determine length of numeric value */
+ n = strspn(p, DIGITS);
+ if (n == 0)
+ return -EINVAL;
+ e = p + n;
+
+ /* Ensure the line ends in " kB" */
+ n = strspn(e, WHITESPACE);
+ if (n == 0)
+ return -EINVAL;
+ if (!streq(e + n, "kB"))
+ return -EINVAL;
+
+ *e = 0;
+ r = safe_atou64(p, v);
+ if (r < 0)
+ return r;
+ if (*v == UINT64_MAX)
+ return -EINVAL;
+
+ if (mem_total != UINT64_MAX && mem_free != UINT64_MAX)
+ break;
+ }
+
+ if (mem_free > mem_total)
+ return -EINVAL;
+
+ if (ret_total)
+ *ret_total = mem_total * 1024U;
+ if (ret_used)
+ *ret_used = (mem_total - mem_free) * 1024U;
+ return 0;
+}
diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h
new file mode 100644
index 0000000..5a44e9e
--- /dev/null
+++ b/src/basic/procfs-util.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+
+#include "time-util.h"
+
+int procfs_tasks_get_limit(uint64_t *ret);
+int procfs_tasks_set_limit(uint64_t limit);
+int procfs_tasks_get_current(uint64_t *ret);
+
+int procfs_cpu_get_usage(nsec_t *ret);
+
+int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used);
+static inline int procfs_memory_get_used(uint64_t *ret) {
+ return procfs_memory_get(NULL, ret);
+}
diff --git a/src/basic/random-util.c b/src/basic/random-util.c
new file mode 100644
index 0000000..f7decf6
--- /dev/null
+++ b/src/basic/random-util.c
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+#include <cpuid.h>
+#endif
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+
+#if HAVE_SYS_AUXV_H
+# include <sys/auxv.h>
+#endif
+
+#if USE_SYS_RANDOM_H
+# include <sys/random.h>
+#else
+# include <linux/random.h>
+#endif
+
+#include "fd-util.h"
+#include "io-util.h"
+#include "missing.h"
+#include "random-util.h"
+#include "time-util.h"
+
+#if HAS_FEATURE_MEMORY_SANITIZER
+#include <sanitizer/msan_interface.h>
+#endif
+
+int rdrand(unsigned long *ret) {
+
+#if defined(__i386__) || defined(__x86_64__)
+ static int have_rdrand = -1;
+ unsigned char err;
+
+ if (have_rdrand < 0) {
+ uint32_t eax, ebx, ecx, edx;
+
+ /* Check if RDRAND is supported by the CPU */
+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0) {
+ have_rdrand = false;
+ return -EOPNOTSUPP;
+ }
+
+ have_rdrand = !!(ecx & (1U << 30));
+ }
+
+ if (have_rdrand == 0)
+ return -EOPNOTSUPP;
+
+ asm volatile("rdrand %0;"
+ "setc %1"
+ : "=r" (*ret),
+ "=qm" (err));
+
+#if HAS_FEATURE_MEMORY_SANITIZER
+ __msan_unpoison(&err, sizeof(err));
+#endif
+
+ if (!err)
+ return -EAGAIN;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+int genuine_random_bytes(void *p, size_t n, RandomFlags flags) {
+ static int have_syscall = -1;
+ _cleanup_close_ int fd = -1;
+ bool got_some = false;
+ int r;
+
+ /* Gathers some randomness from the kernel (or the CPU if the RANDOM_ALLOW_RDRAND flag is set). This call won't
+ * block, unless the RANDOM_BLOCK flag is set. If RANDOM_DONT_DRAIN is set, an error is returned if the random
+ * pool is not initialized. Otherwise it will always return some data from the kernel, regardless of whether
+ * the random pool is fully initialized or not. */
+
+ if (n == 0)
+ return 0;
+
+ if (FLAGS_SET(flags, RANDOM_ALLOW_RDRAND))
+ /* Try x86-64' RDRAND intrinsic if we have it. We only use it if high quality randomness is not
+ * required, as we don't trust it (who does?). Note that we only do a single iteration of RDRAND here,
+ * even though the Intel docs suggest calling this in a tight loop of 10 invocations or so. That's
+ * because we don't really care about the quality here. We generally prefer using RDRAND if the caller
+ * allows us too, since this way we won't drain the kernel randomness pool if we don't need it, as the
+ * pool's entropy is scarce. */
+ for (;;) {
+ unsigned long u;
+ size_t m;
+
+ if (rdrand(&u) < 0) {
+ if (got_some && FLAGS_SET(flags, RANDOM_EXTEND_WITH_PSEUDO)) {
+ /* Fill in the remaining bytes using pseudo-random values */
+ pseudo_random_bytes(p, n);
+ return 0;
+ }
+
+ /* OK, this didn't work, let's go to getrandom() + /dev/urandom instead */
+ break;
+ }
+
+ m = MIN(sizeof(u), n);
+ memcpy(p, &u, m);
+
+ p = (uint8_t*) p + m;
+ n -= m;
+
+ if (n == 0)
+ return 0; /* Yay, success! */
+
+ got_some = true;
+ }
+
+ /* Use the getrandom() syscall unless we know we don't have it. */
+ if (have_syscall != 0 && !HAS_FEATURE_MEMORY_SANITIZER) {
+
+ for (;;) {
+ r = getrandom(p, n, FLAGS_SET(flags, RANDOM_BLOCK) ? 0 : GRND_NONBLOCK);
+ if (r > 0) {
+ have_syscall = true;
+
+ if ((size_t) r == n)
+ return 0; /* Yay, success! */
+
+ assert((size_t) r < n);
+ p = (uint8_t*) p + r;
+ n -= r;
+
+ if (FLAGS_SET(flags, RANDOM_EXTEND_WITH_PSEUDO)) {
+ /* Fill in the remaining bytes using pseudo-random values */
+ pseudo_random_bytes(p, n);
+ return 0;
+ }
+
+ got_some = true;
+
+ /* Hmm, we didn't get enough good data but the caller insists on good data? Then try again */
+ if (FLAGS_SET(flags, RANDOM_BLOCK))
+ continue;
+
+ /* Fill in the rest with /dev/urandom */
+ break;
+
+ } else if (r == 0) {
+ have_syscall = true;
+ return -EIO;
+
+ } else if (errno == ENOSYS) {
+ /* We lack the syscall, continue with reading from /dev/urandom. */
+ have_syscall = false;
+ break;
+
+ } else if (errno == EAGAIN) {
+ /* The kernel has no entropy whatsoever. Let's remember to use the syscall the next
+ * time again though.
+ *
+ * If RANDOM_DONT_DRAIN is set, return an error so that random_bytes() can produce some
+ * pseudo-random bytes instead. Otherwise, fall back to /dev/urandom, which we know is empty,
+ * but the kernel will produce some bytes for us on a best-effort basis. */
+ have_syscall = true;
+
+ if (got_some && FLAGS_SET(flags, RANDOM_EXTEND_WITH_PSEUDO)) {
+ /* Fill in the remaining bytes using pseudorandom values */
+ pseudo_random_bytes(p, n);
+ return 0;
+ }
+
+ if (FLAGS_SET(flags, RANDOM_DONT_DRAIN))
+ return -ENODATA;
+
+ /* Use /dev/urandom instead */
+ break;
+ } else
+ return -errno;
+ }
+ }
+
+ fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return errno == ENOENT ? -ENOSYS : -errno;
+
+ return loop_read_exact(fd, p, n, true);
+}
+
+void initialize_srand(void) {
+ static bool srand_called = false;
+ unsigned x;
+#if HAVE_SYS_AUXV_H
+ const void *auxv;
+#endif
+ unsigned long k;
+
+ if (srand_called)
+ return;
+
+#if HAVE_SYS_AUXV_H
+ /* The kernel provides us with 16 bytes of entropy in auxv, so let's
+ * try to make use of that to seed the pseudo-random generator. It's
+ * better than nothing... */
+
+ auxv = (const void*) getauxval(AT_RANDOM);
+ if (auxv) {
+ assert_cc(sizeof(x) <= 16);
+ memcpy(&x, auxv, sizeof(x));
+ } else
+#endif
+ x = 0;
+
+ x ^= (unsigned) now(CLOCK_REALTIME);
+ x ^= (unsigned) gettid();
+
+ if (rdrand(&k) >= 0)
+ x ^= (unsigned) k;
+
+ srand(x);
+ srand_called = true;
+}
+
+/* INT_MAX gives us only 31 bits, so use 24 out of that. */
+#if RAND_MAX >= INT_MAX
+# define RAND_STEP 3
+#else
+/* SHORT_INT_MAX or lower gives at most 15 bits, we just just 8 out of that. */
+# define RAND_STEP 1
+#endif
+
+void pseudo_random_bytes(void *p, size_t n) {
+ uint8_t *q;
+
+ initialize_srand();
+
+ for (q = p; q < (uint8_t*) p + n; q += RAND_STEP) {
+ unsigned rr;
+
+ rr = (unsigned) rand();
+
+#if RAND_STEP >= 3
+ if ((size_t) (q - (uint8_t*) p + 2) < n)
+ q[2] = rr >> 16;
+#endif
+#if RAND_STEP >= 2
+ if ((size_t) (q - (uint8_t*) p + 1) < n)
+ q[1] = rr >> 8;
+#endif
+ q[0] = rr;
+ }
+}
+
+void random_bytes(void *p, size_t n) {
+
+ if (genuine_random_bytes(p, n, RANDOM_EXTEND_WITH_PSEUDO|RANDOM_DONT_DRAIN|RANDOM_ALLOW_RDRAND) >= 0)
+ return;
+
+ /* If for some reason some user made /dev/urandom unavailable to us, or the kernel has no entropy, use a PRNG instead. */
+ pseudo_random_bytes(p, n);
+}
diff --git a/src/basic/random-util.h b/src/basic/random-util.h
new file mode 100644
index 0000000..3e8c288
--- /dev/null
+++ b/src/basic/random-util.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+typedef enum RandomFlags {
+ RANDOM_EXTEND_WITH_PSEUDO = 1 << 0, /* If we can't get enough genuine randomness, but some, fill up the rest with pseudo-randomness */
+ RANDOM_BLOCK = 1 << 1, /* Rather block than return crap randomness (only if the kernel supports that) */
+ RANDOM_DONT_DRAIN = 1 << 2, /* If we can't get any randomness at all, return early with -EAGAIN */
+ RANDOM_ALLOW_RDRAND = 1 << 3, /* Allow usage of the CPU RNG */
+} RandomFlags;
+
+int genuine_random_bytes(void *p, size_t n, RandomFlags flags); /* returns "genuine" randomness, optionally filled upwith pseudo random, if not enough is available */
+void pseudo_random_bytes(void *p, size_t n); /* returns only pseudo-randommess (but possibly seeded from something better) */
+void random_bytes(void *p, size_t n); /* returns genuine randomness if cheaply available, and pseudo randomness if not. */
+
+void initialize_srand(void);
+
+static inline uint64_t random_u64(void) {
+ uint64_t u;
+ random_bytes(&u, sizeof(u));
+ return u;
+}
+
+static inline uint32_t random_u32(void) {
+ uint32_t u;
+ random_bytes(&u, sizeof(u));
+ return u;
+}
+
+int rdrand(unsigned long *ret);
diff --git a/src/basic/ratelimit.c b/src/basic/ratelimit.c
new file mode 100644
index 0000000..4e04e04
--- /dev/null
+++ b/src/basic/ratelimit.c
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/time.h>
+
+#include "macro.h"
+#include "ratelimit.h"
+
+/* Modelled after Linux' lib/ratelimit.c by Dave Young
+ * <hidave.darkstar@gmail.com>, which is licensed GPLv2. */
+
+bool ratelimit_below(RateLimit *r) {
+ usec_t ts;
+
+ assert(r);
+
+ if (r->interval <= 0 || r->burst <= 0)
+ return true;
+
+ ts = now(CLOCK_MONOTONIC);
+
+ if (r->begin <= 0 ||
+ r->begin + r->interval < ts) {
+ r->begin = ts;
+
+ /* Reset counter */
+ r->num = 0;
+ goto good;
+ }
+
+ if (r->num < r->burst)
+ goto good;
+
+ return false;
+
+good:
+ r->num++;
+ return true;
+}
diff --git a/src/basic/ratelimit.h b/src/basic/ratelimit.h
new file mode 100644
index 0000000..de91def
--- /dev/null
+++ b/src/basic/ratelimit.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+#include "util.h"
+
+typedef struct RateLimit {
+ usec_t interval;
+ usec_t begin;
+ unsigned burst;
+ unsigned num;
+} RateLimit;
+
+#define RATELIMIT_DEFINE(_name, _interval, _burst) \
+ RateLimit _name = { \
+ .interval = (_interval), \
+ .burst = (_burst), \
+ .num = 0, \
+ .begin = 0 \
+ }
+
+#define RATELIMIT_INIT(v, _interval, _burst) \
+ do { \
+ RateLimit *_r = &(v); \
+ _r->interval = (_interval); \
+ _r->burst = (_burst); \
+ _r->num = 0; \
+ _r->begin = 0; \
+ } while (false)
+
+#define RATELIMIT_RESET(v) \
+ do { \
+ RateLimit *_r = &(v); \
+ _r->num = 0; \
+ _r->begin = 0; \
+ } while (false)
+
+bool ratelimit_below(RateLimit *r);
diff --git a/src/basic/raw-clone.h b/src/basic/raw-clone.h
new file mode 100644
index 0000000..b8857b0
--- /dev/null
+++ b/src/basic/raw-clone.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2016 Michael Karcher
+***/
+
+#include <errno.h>
+#include <sched.h>
+#include <sys/syscall.h>
+
+#include "log.h"
+#include "macro.h"
+
+/**
+ * raw_clone() - uses clone to create a new process with clone flags
+ * @flags: Flags to pass to the clone system call
+ *
+ * Uses the clone system call to create a new process with the cloning flags and termination signal passed in the flags
+ * parameter. Opposed to glibc's clone funtion, using this function does not set up a separate stack for the child, but
+ * relies on copy-on-write semantics on the one stack at a common virtual address, just as fork does.
+ *
+ * To obtain copy-on-write semantics, flags must not contain CLONE_VM, and thus CLONE_THREAD and CLONE_SIGHAND (which
+ * require CLONE_VM) are not usable.
+ *
+ * Additionally, as this function does not pass the ptid, newtls and ctid parameters to the kernel, flags must not
+ * contain CLONE_PARENT_SETTID, CLONE_CHILD_SETTID, CLONE_CHILD_CLEARTID or CLONE_SETTLS.
+ *
+ * Returns: 0 in the child process and the child process id in the parent.
+ */
+static inline pid_t raw_clone(unsigned long flags) {
+ pid_t ret;
+
+ assert((flags & (CLONE_VM|CLONE_PARENT_SETTID|CLONE_CHILD_SETTID|
+ CLONE_CHILD_CLEARTID|CLONE_SETTLS)) == 0);
+#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
+ /* On s390/s390x and cris the order of the first and second arguments
+ * of the raw clone() system call is reversed. */
+ ret = (pid_t) syscall(__NR_clone, NULL, flags);
+#elif defined(__sparc__)
+ {
+ /**
+ * sparc always returns the other process id in %o0, and
+ * a boolean flag whether this is the child or the parent in
+ * %o1. Inline assembly is needed to get the flag returned
+ * in %o1.
+ */
+ int in_child, child_pid, error;
+
+ asm volatile("mov %3, %%g1\n\t"
+ "mov %4, %%o0\n\t"
+ "mov 0 , %%o1\n\t"
+#if defined(__arch64__)
+ "t 0x6d\n\t"
+#else
+ "t 0x10\n\t"
+#endif
+ "addx %%g0, 0, %2\n\t"
+ "mov %%o1, %0\n\t"
+ "mov %%o0, %1" :
+ "=r"(in_child), "=r"(child_pid), "=r"(error) :
+ "i"(__NR_clone), "r"(flags) :
+ "%o1", "%o0", "%g1", "cc" );
+
+ if (error) {
+ errno = child_pid;
+ ret = -1;
+ } else
+ ret = in_child ? 0 : child_pid;
+ }
+#else
+ ret = (pid_t) syscall(__NR_clone, flags, NULL);
+#endif
+
+ if (ret == 0)
+ reset_cached_pid();
+
+ return ret;
+}
diff --git a/src/basic/raw-reboot.h b/src/basic/raw-reboot.h
new file mode 100644
index 0000000..8ecefe9
--- /dev/null
+++ b/src/basic/raw-reboot.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/reboot.h>
+#include <sys/reboot.h>
+#include <sys/syscall.h>
+
+/* glibc defines the reboot() API call, which is a wrapper around the system call of the same name, but without the
+ * extra "arg" parameter. Since we need that parameter for some calls, let's add a "raw" wrapper that is defined the
+ * same way, except it takes the additional argument. */
+
+static inline int raw_reboot(int cmd, const void *arg) {
+ return (int) syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, arg);
+}
diff --git a/src/basic/refcnt.h b/src/basic/refcnt.h
new file mode 100644
index 0000000..40f9a84
--- /dev/null
+++ b/src/basic/refcnt.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* A type-safe atomic refcounter.
+ *
+ * DO NOT USE THIS UNLESS YOU ACTUALLY CARE ABOUT THREAD SAFETY! */
+
+typedef struct {
+ volatile unsigned _value;
+} RefCount;
+
+#define REFCNT_GET(r) ((r)._value)
+#define REFCNT_INC(r) (__sync_add_and_fetch(&(r)._value, 1))
+#define REFCNT_DEC(r) (__sync_sub_and_fetch(&(r)._value, 1))
+
+#define REFCNT_INIT ((RefCount) { ._value = 1 })
+
+#define _DEFINE_ATOMIC_REF_FUNC(type, name, scope) \
+ scope type *name##_ref(type *p) { \
+ if (!p) \
+ return NULL; \
+ \
+ assert_se(REFCNT_INC(p->n_ref) >= 2); \
+ return p; \
+ }
+
+#define _DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func, scope) \
+ scope type *name##_unref(type *p) { \
+ if (!p) \
+ return NULL; \
+ \
+ if (REFCNT_DEC(p->n_ref) > 0) \
+ return NULL; \
+ \
+ return free_func(p); \
+ }
+
+#define DEFINE_ATOMIC_REF_FUNC(type, name) \
+ _DEFINE_ATOMIC_REF_FUNC(type, name,)
+#define DEFINE_PUBLIC_ATOMIC_REF_FUNC(type, name) \
+ _DEFINE_ATOMIC_REF_FUNC(type, name, _public_)
+
+#define DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func,)
+#define DEFINE_PUBLIC_ATOMIC_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func, _public_)
+
+#define DEFINE_ATOMIC_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_ATOMIC_REF_FUNC(type, name); \
+ DEFINE_ATOMIC_UNREF_FUNC(type, name, free_func);
+
+#define DEFINE_PUBLIC_ATOMIC_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_PUBLIC_ATOMIC_REF_FUNC(type, name); \
+ DEFINE_PUBLIC_ATOMIC_UNREF_FUNC(type, name, free_func);
diff --git a/src/basic/replace-var.c b/src/basic/replace-var.c
new file mode 100644
index 0000000..fd2b5c1
--- /dev/null
+++ b/src/basic/replace-var.c
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "replace-var.h"
+#include "string-util.h"
+
+/*
+ * Generic infrastructure for replacing @FOO@ style variables in
+ * strings. Will call a callback for each replacement.
+ */
+
+static int get_variable(const char *b, char **r) {
+ size_t k;
+ char *t;
+
+ assert(b);
+ assert(r);
+
+ if (*b != '@')
+ return 0;
+
+ k = strspn(b + 1, UPPERCASE_LETTERS "_");
+ if (k <= 0 || b[k+1] != '@')
+ return 0;
+
+ t = strndup(b + 1, k);
+ if (!t)
+ return -ENOMEM;
+
+ *r = t;
+ return 1;
+}
+
+char *replace_var(const char *text, char *(*lookup)(const char *variable, void *userdata), void *userdata) {
+ char *r, *t;
+ const char *f;
+ size_t l;
+
+ assert(text);
+ assert(lookup);
+
+ l = strlen(text);
+ r = new(char, l+1);
+ if (!r)
+ return NULL;
+
+ f = text;
+ t = r;
+ while (*f) {
+ _cleanup_free_ char *v = NULL, *n = NULL;
+ char *a;
+ int k;
+ size_t skip, d, nl;
+
+ k = get_variable(f, &v);
+ if (k < 0)
+ goto oom;
+ if (k == 0) {
+ *(t++) = *(f++);
+ continue;
+ }
+
+ n = lookup(v, userdata);
+ if (!n)
+ goto oom;
+
+ skip = strlen(v) + 2;
+
+ d = t - r;
+ nl = l - skip + strlen(n);
+ a = realloc(r, nl + 1);
+ if (!a)
+ goto oom;
+
+ l = nl;
+ r = a;
+ t = r + d;
+
+ t = stpcpy(t, n);
+ f += skip;
+ }
+
+ *t = 0;
+ return r;
+
+oom:
+ return mfree(r);
+}
diff --git a/src/basic/replace-var.h b/src/basic/replace-var.h
new file mode 100644
index 0000000..e6a489f
--- /dev/null
+++ b/src/basic/replace-var.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+char *replace_var(const char *text, char *(*lookup)(const char *variable, void *userdata), void *userdata);
diff --git a/src/basic/rlimit-util.c b/src/basic/rlimit-util.c
new file mode 100644
index 0000000..74b3a02
--- /dev/null
+++ b/src/basic/rlimit-util.c
@@ -0,0 +1,410 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/resource.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "rlimit-util.h"
+#include "string-table.h"
+#include "time-util.h"
+
+int setrlimit_closest(int resource, const struct rlimit *rlim) {
+ struct rlimit highest, fixed;
+
+ assert(rlim);
+
+ if (setrlimit(resource, rlim) >= 0)
+ return 0;
+
+ if (errno != EPERM)
+ return -errno;
+
+ /* So we failed to set the desired setrlimit, then let's try
+ * to get as close as we can */
+ if (getrlimit(resource, &highest) < 0)
+ return -errno;
+
+ /* If the hard limit is unbounded anyway, then the EPERM had other reasons, let's propagate the original EPERM
+ * then */
+ if (highest.rlim_max == RLIM_INFINITY)
+ return -EPERM;
+
+ fixed = (struct rlimit) {
+ .rlim_cur = MIN(rlim->rlim_cur, highest.rlim_max),
+ .rlim_max = MIN(rlim->rlim_max, highest.rlim_max),
+ };
+
+ /* Shortcut things if we wouldn't change anything. */
+ if (fixed.rlim_cur == highest.rlim_cur &&
+ fixed.rlim_max == highest.rlim_max)
+ return 0;
+
+ if (setrlimit(resource, &fixed) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int setrlimit_closest_all(const struct rlimit *const *rlim, int *which_failed) {
+ int i, r;
+
+ assert(rlim);
+
+ /* On failure returns the limit's index that failed in *which_failed, but only if non-NULL */
+
+ for (i = 0; i < _RLIMIT_MAX; i++) {
+ if (!rlim[i])
+ continue;
+
+ r = setrlimit_closest(i, rlim[i]);
+ if (r < 0) {
+ if (which_failed)
+ *which_failed = i;
+
+ return r;
+ }
+ }
+
+ if (which_failed)
+ *which_failed = -1;
+
+ return 0;
+}
+
+static int rlimit_parse_u64(const char *val, rlim_t *ret) {
+ uint64_t u;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ if (streq(val, "infinity")) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ /* setrlimit(2) suggests rlim_t is always 64bit on Linux. */
+ assert_cc(sizeof(rlim_t) == sizeof(uint64_t));
+
+ r = safe_atou64(val, &u);
+ if (r < 0)
+ return r;
+ if (u >= (uint64_t) RLIM_INFINITY)
+ return -ERANGE;
+
+ *ret = (rlim_t) u;
+ return 0;
+}
+
+static int rlimit_parse_size(const char *val, rlim_t *ret) {
+ uint64_t u;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ if (streq(val, "infinity")) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ r = parse_size(val, 1024, &u);
+ if (r < 0)
+ return r;
+ if (u >= (uint64_t) RLIM_INFINITY)
+ return -ERANGE;
+
+ *ret = (rlim_t) u;
+ return 0;
+}
+
+static int rlimit_parse_sec(const char *val, rlim_t *ret) {
+ uint64_t u;
+ usec_t t;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ if (streq(val, "infinity")) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ r = parse_sec(val, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ u = (uint64_t) DIV_ROUND_UP(t, USEC_PER_SEC);
+ if (u >= (uint64_t) RLIM_INFINITY)
+ return -ERANGE;
+
+ *ret = (rlim_t) u;
+ return 0;
+}
+
+static int rlimit_parse_usec(const char *val, rlim_t *ret) {
+ usec_t t;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ if (streq(val, "infinity")) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ r = parse_time(val, &t, 1);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ *ret = (rlim_t) t;
+ return 0;
+}
+
+static int rlimit_parse_nice(const char *val, rlim_t *ret) {
+ uint64_t rl;
+ int r;
+
+ /* So, Linux is weird. The range for RLIMIT_NICE is 40..1, mapping to the nice levels -20..19. However, the
+ * RLIMIT_NICE limit defaults to 0 by the kernel, i.e. a value that maps to nice level 20, which of course is
+ * bogus and does not exist. In order to permit parsing the RLIMIT_NICE of 0 here we hence implement a slight
+ * asymmetry: when parsing as positive nice level we permit 0..19. When parsing as negative nice level, we
+ * permit -20..0. But when parsing as raw resource limit value then we also allow the special value 0.
+ *
+ * Yeah, Linux is quality engineering sometimes... */
+
+ if (val[0] == '+') {
+
+ /* Prefixed with "+": Parse as positive user-friendly nice value */
+ r = safe_atou64(val + 1, &rl);
+ if (r < 0)
+ return r;
+
+ if (rl >= PRIO_MAX)
+ return -ERANGE;
+
+ rl = 20 - rl;
+
+ } else if (val[0] == '-') {
+
+ /* Prefixed with "-": Parse as negative user-friendly nice value */
+ r = safe_atou64(val + 1, &rl);
+ if (r < 0)
+ return r;
+
+ if (rl > (uint64_t) (-PRIO_MIN))
+ return -ERANGE;
+
+ rl = 20 + rl;
+ } else {
+
+ /* Not prefixed: parse as raw resource limit value */
+ r = safe_atou64(val, &rl);
+ if (r < 0)
+ return r;
+
+ if (rl > (uint64_t) (20 - PRIO_MIN))
+ return -ERANGE;
+ }
+
+ *ret = (rlim_t) rl;
+ return 0;
+}
+
+static int (*const rlimit_parse_table[_RLIMIT_MAX])(const char *val, rlim_t *ret) = {
+ [RLIMIT_CPU] = rlimit_parse_sec,
+ [RLIMIT_FSIZE] = rlimit_parse_size,
+ [RLIMIT_DATA] = rlimit_parse_size,
+ [RLIMIT_STACK] = rlimit_parse_size,
+ [RLIMIT_CORE] = rlimit_parse_size,
+ [RLIMIT_RSS] = rlimit_parse_size,
+ [RLIMIT_NOFILE] = rlimit_parse_u64,
+ [RLIMIT_AS] = rlimit_parse_size,
+ [RLIMIT_NPROC] = rlimit_parse_u64,
+ [RLIMIT_MEMLOCK] = rlimit_parse_size,
+ [RLIMIT_LOCKS] = rlimit_parse_u64,
+ [RLIMIT_SIGPENDING] = rlimit_parse_u64,
+ [RLIMIT_MSGQUEUE] = rlimit_parse_size,
+ [RLIMIT_NICE] = rlimit_parse_nice,
+ [RLIMIT_RTPRIO] = rlimit_parse_u64,
+ [RLIMIT_RTTIME] = rlimit_parse_usec,
+};
+
+int rlimit_parse_one(int resource, const char *val, rlim_t *ret) {
+ assert(val);
+ assert(ret);
+
+ if (resource < 0)
+ return -EINVAL;
+ if (resource >= _RLIMIT_MAX)
+ return -EINVAL;
+
+ return rlimit_parse_table[resource](val, ret);
+}
+
+int rlimit_parse(int resource, const char *val, struct rlimit *ret) {
+ _cleanup_free_ char *hard = NULL, *soft = NULL;
+ rlim_t hl, sl;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ r = extract_first_word(&val, &soft, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ r = rlimit_parse_one(resource, soft, &sl);
+ if (r < 0)
+ return r;
+
+ r = extract_first_word(&val, &hard, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (!isempty(val))
+ return -EINVAL;
+ if (r == 0)
+ hl = sl;
+ else {
+ r = rlimit_parse_one(resource, hard, &hl);
+ if (r < 0)
+ return r;
+ if (sl > hl)
+ return -EILSEQ;
+ }
+
+ *ret = (struct rlimit) {
+ .rlim_cur = sl,
+ .rlim_max = hl,
+ };
+
+ return 0;
+}
+
+int rlimit_format(const struct rlimit *rl, char **ret) {
+ char *s = NULL;
+
+ assert(rl);
+ assert(ret);
+
+ if (rl->rlim_cur >= RLIM_INFINITY && rl->rlim_max >= RLIM_INFINITY)
+ s = strdup("infinity");
+ else if (rl->rlim_cur >= RLIM_INFINITY)
+ (void) asprintf(&s, "infinity:" RLIM_FMT, rl->rlim_max);
+ else if (rl->rlim_max >= RLIM_INFINITY)
+ (void) asprintf(&s, RLIM_FMT ":infinity", rl->rlim_cur);
+ else if (rl->rlim_cur == rl->rlim_max)
+ (void) asprintf(&s, RLIM_FMT, rl->rlim_cur);
+ else
+ (void) asprintf(&s, RLIM_FMT ":" RLIM_FMT, rl->rlim_cur, rl->rlim_max);
+
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+static const char* const rlimit_table[_RLIMIT_MAX] = {
+ [RLIMIT_AS] = "AS",
+ [RLIMIT_CORE] = "CORE",
+ [RLIMIT_CPU] = "CPU",
+ [RLIMIT_DATA] = "DATA",
+ [RLIMIT_FSIZE] = "FSIZE",
+ [RLIMIT_LOCKS] = "LOCKS",
+ [RLIMIT_MEMLOCK] = "MEMLOCK",
+ [RLIMIT_MSGQUEUE] = "MSGQUEUE",
+ [RLIMIT_NICE] = "NICE",
+ [RLIMIT_NOFILE] = "NOFILE",
+ [RLIMIT_NPROC] = "NPROC",
+ [RLIMIT_RSS] = "RSS",
+ [RLIMIT_RTPRIO] = "RTPRIO",
+ [RLIMIT_RTTIME] = "RTTIME",
+ [RLIMIT_SIGPENDING] = "SIGPENDING",
+ [RLIMIT_STACK] = "STACK",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(rlimit, int);
+
+int rlimit_from_string_harder(const char *s) {
+ const char *suffix;
+
+ /* The official prefix */
+ suffix = startswith(s, "RLIMIT_");
+ if (suffix)
+ return rlimit_from_string(suffix);
+
+ /* Our own unit file setting prefix */
+ suffix = startswith(s, "Limit");
+ if (suffix)
+ return rlimit_from_string(suffix);
+
+ return rlimit_from_string(s);
+}
+
+void rlimit_free_all(struct rlimit **rl) {
+ int i;
+
+ if (!rl)
+ return;
+
+ for (i = 0; i < _RLIMIT_MAX; i++)
+ rl[i] = mfree(rl[i]);
+}
+
+int rlimit_nofile_bump(int limit) {
+ int r;
+
+ /* Bumps the (soft) RLIMIT_NOFILE resource limit as close as possible to the specified limit. If a negative
+ * limit is specified, bumps it to the maximum the kernel and the hard resource limit allows. This call should
+ * be used by all our programs that might need a lot of fds, and that know how to deal with high fd numbers
+ * (i.e. do not use select() — which chokes on fds >= 1024) */
+
+ if (limit < 0)
+ limit = read_nr_open();
+
+ if (limit < 3)
+ limit = 3;
+
+ r = setrlimit_closest(RLIMIT_NOFILE, &RLIMIT_MAKE_CONST(limit));
+ if (r < 0)
+ return log_debug_errno(r, "Failed to set RLIMIT_NOFILE: %m");
+
+ return 0;
+}
+
+int rlimit_nofile_safe(void) {
+ struct rlimit rl;
+
+ /* Resets RLIMIT_NOFILE's soft limit FD_SETSIZE (i.e. 1024), for compatibility with software still using
+ * select() */
+
+ if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
+ return log_debug_errno(errno, "Failed to query RLIMIT_NOFILE: %m");
+
+ if (rl.rlim_cur <= FD_SETSIZE)
+ return 0;
+
+ rl.rlim_cur = FD_SETSIZE;
+ if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
+ return log_debug_errno(errno, "Failed to lower RLIMIT_NOFILE's soft limit to " RLIM_FMT ": %m", rl.rlim_cur);
+
+ return 1;
+}
diff --git a/src/basic/rlimit-util.h b/src/basic/rlimit-util.h
new file mode 100644
index 0000000..d4fca2b
--- /dev/null
+++ b/src/basic/rlimit-util.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/resource.h>
+
+#include "macro.h"
+
+const char *rlimit_to_string(int i) _const_;
+int rlimit_from_string(const char *s) _pure_;
+int rlimit_from_string_harder(const char *s) _pure_;
+
+int setrlimit_closest(int resource, const struct rlimit *rlim);
+int setrlimit_closest_all(const struct rlimit * const *rlim, int *which_failed);
+
+int rlimit_parse_one(int resource, const char *val, rlim_t *ret);
+int rlimit_parse(int resource, const char *val, struct rlimit *ret);
+
+int rlimit_format(const struct rlimit *rl, char **ret);
+
+void rlimit_free_all(struct rlimit **rl);
+
+#define RLIMIT_MAKE_CONST(lim) ((struct rlimit) { lim, lim })
+
+int rlimit_nofile_bump(int limit);
+int rlimit_nofile_safe(void);
diff --git a/src/basic/rm-rf.c b/src/basic/rm-rf.c
new file mode 100644
index 0000000..0c957c9
--- /dev/null
+++ b/src/basic/rm-rf.c
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "cgroup-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stat-util.h"
+#include "string-util.h"
+
+static bool is_physical_fs(const struct statfs *sfs) {
+ return !is_temporary_fs(sfs) && !is_cgroup_fs(sfs);
+}
+
+int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int ret = 0, r;
+ struct statfs sfs;
+
+ assert(fd >= 0);
+
+ /* This returns the first error we run into, but nevertheless
+ * tries to go on. This closes the passed fd. */
+
+ if (!(flags & REMOVE_PHYSICAL)) {
+
+ r = fstatfs(fd, &sfs);
+ if (r < 0) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ if (is_physical_fs(&sfs)) {
+ /* We refuse to clean physical file systems with this call,
+ * unless explicitly requested. This is extra paranoia just
+ * to be sure we never ever remove non-state data. */
+ _cleanup_free_ char *path = NULL;
+
+ (void) fd_get_path(fd, &path);
+ log_error("Attempted to remove disk file system under \"%s\", and we can't allow that.",
+ strna(path));
+
+ safe_close(fd);
+ return -EPERM;
+ }
+ }
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return errno == ENOENT ? 0 : -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ bool is_dir;
+ struct stat st;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (de->d_type == DT_UNKNOWN ||
+ (de->d_type == DT_DIR && (root_dev || (flags & REMOVE_SUBVOLUME)))) {
+ if (fstatat(fd, de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ if (ret == 0 && errno != ENOENT)
+ ret = -errno;
+ continue;
+ }
+
+ is_dir = S_ISDIR(st.st_mode);
+ } else
+ is_dir = de->d_type == DT_DIR;
+
+ if (is_dir) {
+ int subdir_fd;
+
+ /* if root_dev is set, remove subdirectories only if device is same */
+ if (root_dev && st.st_dev != root_dev->st_dev)
+ continue;
+
+ subdir_fd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (subdir_fd < 0) {
+ if (ret == 0 && errno != ENOENT)
+ ret = -errno;
+ continue;
+ }
+
+ /* Stop at mount points */
+ r = fd_is_mount_point(fd, de->d_name, 0);
+ if (r < 0) {
+ if (ret == 0 && r != -ENOENT)
+ ret = r;
+
+ safe_close(subdir_fd);
+ continue;
+ }
+ if (r) {
+ safe_close(subdir_fd);
+ continue;
+ }
+
+ if ((flags & REMOVE_SUBVOLUME) && st.st_ino == 256) {
+
+ /* This could be a subvolume, try to remove it */
+
+ r = btrfs_subvol_remove_fd(fd, de->d_name, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
+ if (r < 0) {
+ if (!IN_SET(r, -ENOTTY, -EINVAL)) {
+ if (ret == 0)
+ ret = r;
+
+ safe_close(subdir_fd);
+ continue;
+ }
+
+ /* ENOTTY, then it wasn't a
+ * btrfs subvolume, continue
+ * below. */
+ } else {
+ /* It was a subvolume, continue. */
+ safe_close(subdir_fd);
+ continue;
+ }
+ }
+
+ /* We pass REMOVE_PHYSICAL here, to avoid
+ * doing the fstatfs() to check the file
+ * system type again for each directory */
+ r = rm_rf_children(subdir_fd, flags | REMOVE_PHYSICAL, root_dev);
+ if (r < 0 && ret == 0)
+ ret = r;
+
+ if (unlinkat(fd, de->d_name, AT_REMOVEDIR) < 0) {
+ if (ret == 0 && errno != ENOENT)
+ ret = -errno;
+ }
+
+ } else if (!(flags & REMOVE_ONLY_DIRECTORIES)) {
+
+ if (unlinkat(fd, de->d_name, 0) < 0) {
+ if (ret == 0 && errno != ENOENT)
+ ret = -errno;
+ }
+ }
+ }
+ return ret;
+}
+
+int rm_rf(const char *path, RemoveFlags flags) {
+ int fd, r;
+ struct statfs s;
+
+ assert(path);
+
+ /* We refuse to clean the root file system with this
+ * call. This is extra paranoia to never cause a really
+ * seriously broken system. */
+ if (path_equal_or_files_same(path, "/", AT_SYMLINK_NOFOLLOW))
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Attempted to remove entire root file system (\"%s\"), and we can't allow that.",
+ path);
+
+ if (FLAGS_SET(flags, REMOVE_SUBVOLUME | REMOVE_ROOT | REMOVE_PHYSICAL)) {
+ /* Try to remove as subvolume first */
+ r = btrfs_subvol_remove(path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
+ if (r >= 0)
+ return r;
+
+ if (!IN_SET(r, -ENOTTY, -EINVAL, -ENOTDIR))
+ return r;
+
+ /* Not btrfs or not a subvolume */
+ }
+
+ fd = open(path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (fd < 0) {
+ if (!IN_SET(errno, ENOTDIR, ELOOP))
+ return -errno;
+
+ if (!(flags & REMOVE_PHYSICAL)) {
+ if (statfs(path, &s) < 0)
+ return -errno;
+
+ if (is_physical_fs(&s))
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Attempted to remove files from a disk file system under \"%s\", refusing.",
+ path);
+ }
+
+ if ((flags & REMOVE_ROOT) && !(flags & REMOVE_ONLY_DIRECTORIES))
+ if (unlink(path) < 0 && errno != ENOENT)
+ return -errno;
+
+ return 0;
+ }
+
+ r = rm_rf_children(fd, flags, NULL);
+
+ if (flags & REMOVE_ROOT) {
+ if (rmdir(path) < 0) {
+ if (r == 0 && errno != ENOENT)
+ r = -errno;
+ }
+ }
+
+ return r;
+}
diff --git a/src/basic/rm-rf.h b/src/basic/rm-rf.h
new file mode 100644
index 0000000..3ee2b97
--- /dev/null
+++ b/src/basic/rm-rf.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/stat.h>
+
+#include "util.h"
+
+typedef enum RemoveFlags {
+ REMOVE_ONLY_DIRECTORIES = 1 << 0,
+ REMOVE_ROOT = 1 << 1,
+ REMOVE_PHYSICAL = 1 << 2, /* if not set, only removes files on tmpfs, never physical file systems */
+ REMOVE_SUBVOLUME = 1 << 3,
+} RemoveFlags;
+
+int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev);
+int rm_rf(const char *path, RemoveFlags flags);
+
+/* Useful for usage with _cleanup_(), destroys a directory and frees the pointer */
+static inline void rm_rf_physical_and_free(char *p) {
+ PROTECT_ERRNO;
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rm_rf_physical_and_free);
+
+/* Similar as above, but also has magic btrfs subvolume powers */
+static inline void rm_rf_subvolume_and_free(char *p) {
+ PROTECT_ERRNO;
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rm_rf_subvolume_and_free);
diff --git a/src/basic/selinux-util.c b/src/basic/selinux-util.c
new file mode 100644
index 0000000..dc06f3d
--- /dev/null
+++ b/src/basic/selinux-util.c
@@ -0,0 +1,518 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <malloc.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/un.h>
+#include <syslog.h>
+
+#if HAVE_SELINUX
+#include <selinux/context.h>
+#include <selinux/label.h>
+#include <selinux/selinux.h>
+#endif
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "stdio-util.h"
+#include "time-util.h"
+#include "util.h"
+
+#if HAVE_SELINUX
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, freecon);
+DEFINE_TRIVIAL_CLEANUP_FUNC(context_t, context_free);
+
+#define _cleanup_freecon_ _cleanup_(freeconp)
+#define _cleanup_context_free_ _cleanup_(context_freep)
+
+static int cached_use = -1;
+static struct selabel_handle *label_hnd = NULL;
+
+#define log_enforcing(...) log_full(security_getenforce() == 1 ? LOG_ERR : LOG_DEBUG, __VA_ARGS__)
+#define log_enforcing_errno(r, ...) log_full_errno(security_getenforce() == 1 ? LOG_ERR : LOG_DEBUG, r, __VA_ARGS__)
+#endif
+
+bool mac_selinux_use(void) {
+#if HAVE_SELINUX
+ if (cached_use < 0)
+ cached_use = is_selinux_enabled() > 0;
+
+ return cached_use;
+#else
+ return false;
+#endif
+}
+
+void mac_selinux_retest(void) {
+#if HAVE_SELINUX
+ cached_use = -1;
+#endif
+}
+
+int mac_selinux_init(void) {
+ int r = 0;
+
+#if HAVE_SELINUX
+ usec_t before_timestamp, after_timestamp;
+ struct mallinfo before_mallinfo, after_mallinfo;
+
+ if (label_hnd)
+ return 0;
+
+ if (!mac_selinux_use())
+ return 0;
+
+ before_mallinfo = mallinfo();
+ before_timestamp = now(CLOCK_MONOTONIC);
+
+ label_hnd = selabel_open(SELABEL_CTX_FILE, NULL, 0);
+ if (!label_hnd) {
+ log_enforcing_errno(errno, "Failed to initialize SELinux context: %m");
+ r = security_getenforce() == 1 ? -errno : 0;
+ } else {
+ char timespan[FORMAT_TIMESPAN_MAX];
+ int l;
+
+ after_timestamp = now(CLOCK_MONOTONIC);
+ after_mallinfo = mallinfo();
+
+ l = after_mallinfo.uordblks > before_mallinfo.uordblks ? after_mallinfo.uordblks - before_mallinfo.uordblks : 0;
+
+ log_debug("Successfully loaded SELinux database in %s, size on heap is %iK.",
+ format_timespan(timespan, sizeof(timespan), after_timestamp - before_timestamp, 0),
+ (l+1023)/1024);
+ }
+#endif
+
+ return r;
+}
+
+void mac_selinux_finish(void) {
+
+#if HAVE_SELINUX
+ if (!label_hnd)
+ return;
+
+ selabel_close(label_hnd);
+ label_hnd = NULL;
+#endif
+}
+
+int mac_selinux_fix(const char *path, LabelFixFlags flags) {
+
+#if HAVE_SELINUX
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ _cleanup_freecon_ char* fcon = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ int r;
+
+ assert(path);
+
+ /* if mac_selinux_init() wasn't called before we are a NOOP */
+ if (!label_hnd)
+ return 0;
+
+ /* Open the file as O_PATH, to pin it while we determine and adjust the label */
+ fd = open(path, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0) {
+ if ((flags & LABEL_IGNORE_ENOENT) && errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (selabel_lookup_raw(label_hnd, &fcon, path, st.st_mode) < 0) {
+ r = -errno;
+
+ /* If there's no label to set, then exit without warning */
+ if (r == -ENOENT)
+ return 0;
+
+ goto fail;
+ }
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ if (setfilecon_raw(procfs_path, fcon) < 0) {
+ _cleanup_freecon_ char *oldcon = NULL;
+
+ r = -errno;
+
+ /* If the FS doesn't support labels, then exit without warning */
+ if (r == -EOPNOTSUPP)
+ return 0;
+
+ /* It the FS is read-only and we were told to ignore failures caused by that, suppress error */
+ if (r == -EROFS && (flags & LABEL_IGNORE_EROFS))
+ return 0;
+
+ /* If the old label is identical to the new one, suppress any kind of error */
+ if (getfilecon_raw(procfs_path, &oldcon) >= 0 && streq(fcon, oldcon))
+ return 0;
+
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ log_enforcing_errno(r, "Unable to fix SELinux security context of %s: %m", path);
+ if (security_getenforce() == 1)
+ return r;
+#endif
+
+ return 0;
+}
+
+int mac_selinux_apply(const char *path, const char *label) {
+
+#if HAVE_SELINUX
+ if (!mac_selinux_use())
+ return 0;
+
+ assert(path);
+ assert(label);
+
+ if (setfilecon(path, label) < 0) {
+ log_enforcing_errno(errno, "Failed to set SELinux security context %s on path %s: %m", label, path);
+ if (security_getenforce() > 0)
+ return -errno;
+ }
+#endif
+ return 0;
+}
+
+int mac_selinux_get_create_label_from_exe(const char *exe, char **label) {
+ int r = -EOPNOTSUPP;
+
+#if HAVE_SELINUX
+ _cleanup_freecon_ char *mycon = NULL, *fcon = NULL;
+ security_class_t sclass;
+
+ assert(exe);
+ assert(label);
+
+ if (!mac_selinux_use())
+ return -EOPNOTSUPP;
+
+ r = getcon_raw(&mycon);
+ if (r < 0)
+ return -errno;
+
+ r = getfilecon_raw(exe, &fcon);
+ if (r < 0)
+ return -errno;
+
+ sclass = string_to_security_class("process");
+ r = security_compute_create_raw(mycon, fcon, sclass, label);
+ if (r < 0)
+ return -errno;
+#endif
+
+ return r;
+}
+
+int mac_selinux_get_our_label(char **label) {
+ int r = -EOPNOTSUPP;
+
+ assert(label);
+
+#if HAVE_SELINUX
+ if (!mac_selinux_use())
+ return -EOPNOTSUPP;
+
+ r = getcon_raw(label);
+ if (r < 0)
+ return -errno;
+#endif
+
+ return r;
+}
+
+int mac_selinux_get_child_mls_label(int socket_fd, const char *exe, const char *exec_label, char **label) {
+ int r = -EOPNOTSUPP;
+
+#if HAVE_SELINUX
+ _cleanup_freecon_ char *mycon = NULL, *peercon = NULL, *fcon = NULL;
+ _cleanup_context_free_ context_t pcon = NULL, bcon = NULL;
+ security_class_t sclass;
+ const char *range = NULL;
+
+ assert(socket_fd >= 0);
+ assert(exe);
+ assert(label);
+
+ if (!mac_selinux_use())
+ return -EOPNOTSUPP;
+
+ r = getcon_raw(&mycon);
+ if (r < 0)
+ return -errno;
+
+ r = getpeercon_raw(socket_fd, &peercon);
+ if (r < 0)
+ return -errno;
+
+ if (!exec_label) {
+ /* If there is no context set for next exec let's use context
+ of target executable */
+ r = getfilecon_raw(exe, &fcon);
+ if (r < 0)
+ return -errno;
+ }
+
+ bcon = context_new(mycon);
+ if (!bcon)
+ return -ENOMEM;
+
+ pcon = context_new(peercon);
+ if (!pcon)
+ return -ENOMEM;
+
+ range = context_range_get(pcon);
+ if (!range)
+ return -errno;
+
+ r = context_range_set(bcon, range);
+ if (r)
+ return -errno;
+
+ freecon(mycon);
+ mycon = strdup(context_str(bcon));
+ if (!mycon)
+ return -ENOMEM;
+
+ sclass = string_to_security_class("process");
+ r = security_compute_create_raw(mycon, fcon, sclass, label);
+ if (r < 0)
+ return -errno;
+#endif
+
+ return r;
+}
+
+char* mac_selinux_free(char *label) {
+
+#if HAVE_SELINUX
+ if (!label)
+ return NULL;
+
+ if (!mac_selinux_use())
+ return NULL;
+
+ freecon(label);
+#endif
+
+ return NULL;
+}
+
+#if HAVE_SELINUX
+static int selinux_create_file_prepare_abspath(const char *abspath, mode_t mode) {
+ _cleanup_freecon_ char *filecon = NULL;
+ int r;
+
+ assert(abspath);
+ assert(path_is_absolute(abspath));
+
+ r = selabel_lookup_raw(label_hnd, &filecon, abspath, mode);
+ if (r < 0) {
+ /* No context specified by the policy? Proceed without setting it. */
+ if (errno == ENOENT)
+ return 0;
+
+ log_enforcing_errno(errno, "Failed to determine SELinux security context for %s: %m", abspath);
+ } else {
+ if (setfscreatecon_raw(filecon) >= 0)
+ return 0; /* Success! */
+
+ log_enforcing_errno(errno, "Failed to set SELinux security context %s for %s: %m", filecon, abspath);
+ }
+
+ if (security_getenforce() > 0)
+ return -errno;
+
+ return 0;
+}
+#endif
+
+int mac_selinux_create_file_prepare_at(int dirfd, const char *path, mode_t mode) {
+ int r = 0;
+
+#if HAVE_SELINUX
+ _cleanup_free_ char *abspath = NULL;
+
+ assert(path);
+
+ if (!label_hnd)
+ return 0;
+
+ if (!path_is_absolute(path)) {
+ _cleanup_free_ char *p = NULL;
+
+ if (dirfd == AT_FDCWD)
+ r = safe_getcwd(&p);
+ else
+ r = fd_get_path(dirfd, &p);
+ if (r < 0)
+ return r;
+
+ path = abspath = path_join(p, path);
+ if (!path)
+ return -ENOMEM;
+ }
+
+ r = selinux_create_file_prepare_abspath(path, mode);
+#endif
+ return r;
+}
+
+int mac_selinux_create_file_prepare(const char *path, mode_t mode) {
+ int r = 0;
+
+#if HAVE_SELINUX
+ _cleanup_free_ char *abspath = NULL;
+
+ assert(path);
+
+ if (!label_hnd)
+ return 0;
+
+ r = path_make_absolute_cwd(path, &abspath);
+ if (r < 0)
+ return r;
+
+ r = selinux_create_file_prepare_abspath(abspath, mode);
+#endif
+ return r;
+}
+
+void mac_selinux_create_file_clear(void) {
+
+#if HAVE_SELINUX
+ PROTECT_ERRNO;
+
+ if (!mac_selinux_use())
+ return;
+
+ setfscreatecon_raw(NULL);
+#endif
+}
+
+int mac_selinux_create_socket_prepare(const char *label) {
+
+#if HAVE_SELINUX
+ if (!mac_selinux_use())
+ return 0;
+
+ assert(label);
+
+ if (setsockcreatecon(label) < 0) {
+ log_enforcing_errno(errno, "Failed to set SELinux security context %s for sockets: %m", label);
+
+ if (security_getenforce() == 1)
+ return -errno;
+ }
+#endif
+
+ return 0;
+}
+
+void mac_selinux_create_socket_clear(void) {
+
+#if HAVE_SELINUX
+ PROTECT_ERRNO;
+
+ if (!mac_selinux_use())
+ return;
+
+ setsockcreatecon_raw(NULL);
+#endif
+}
+
+int mac_selinux_bind(int fd, const struct sockaddr *addr, socklen_t addrlen) {
+
+ /* Binds a socket and label its file system object according to the SELinux policy */
+
+#if HAVE_SELINUX
+ _cleanup_freecon_ char *fcon = NULL;
+ const struct sockaddr_un *un;
+ bool context_changed = false;
+ char *path;
+ int r;
+
+ assert(fd >= 0);
+ assert(addr);
+ assert(addrlen >= sizeof(sa_family_t));
+
+ if (!label_hnd)
+ goto skipped;
+
+ /* Filter out non-local sockets */
+ if (addr->sa_family != AF_UNIX)
+ goto skipped;
+
+ /* Filter out anonymous sockets */
+ if (addrlen < offsetof(struct sockaddr_un, sun_path) + 1)
+ goto skipped;
+
+ /* Filter out abstract namespace sockets */
+ un = (const struct sockaddr_un*) addr;
+ if (un->sun_path[0] == 0)
+ goto skipped;
+
+ path = strndupa(un->sun_path, addrlen - offsetof(struct sockaddr_un, sun_path));
+
+ if (path_is_absolute(path))
+ r = selabel_lookup_raw(label_hnd, &fcon, path, S_IFSOCK);
+ else {
+ _cleanup_free_ char *newpath = NULL;
+
+ r = path_make_absolute_cwd(path, &newpath);
+ if (r < 0)
+ return r;
+
+ r = selabel_lookup_raw(label_hnd, &fcon, newpath, S_IFSOCK);
+ }
+
+ if (r < 0) {
+ /* No context specified by the policy? Proceed without setting it */
+ if (errno == ENOENT)
+ goto skipped;
+
+ log_enforcing_errno(errno, "Failed to determine SELinux security context for %s: %m", path);
+ if (security_getenforce() > 0)
+ return -errno;
+
+ } else {
+ if (setfscreatecon_raw(fcon) < 0) {
+ log_enforcing_errno(errno, "Failed to set SELinux security context %s for %s: %m", fcon, path);
+ if (security_getenforce() > 0)
+ return -errno;
+ } else
+ context_changed = true;
+ }
+
+ r = bind(fd, addr, addrlen) < 0 ? -errno : 0;
+
+ if (context_changed)
+ setfscreatecon_raw(NULL);
+
+ return r;
+
+skipped:
+#endif
+ if (bind(fd, addr, addrlen) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/basic/selinux-util.h b/src/basic/selinux-util.h
new file mode 100644
index 0000000..bd5207c
--- /dev/null
+++ b/src/basic/selinux-util.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "label.h"
+
+bool mac_selinux_use(void);
+void mac_selinux_retest(void);
+
+int mac_selinux_init(void);
+void mac_selinux_finish(void);
+
+int mac_selinux_fix(const char *path, LabelFixFlags flags);
+int mac_selinux_apply(const char *path, const char *label);
+
+int mac_selinux_get_create_label_from_exe(const char *exe, char **label);
+int mac_selinux_get_our_label(char **label);
+int mac_selinux_get_child_mls_label(int socket_fd, const char *exe, const char *exec_label, char **label);
+char* mac_selinux_free(char *label);
+
+int mac_selinux_create_file_prepare(const char *path, mode_t mode);
+int mac_selinux_create_file_prepare_at(int dirfd, const char *path, mode_t mode);
+void mac_selinux_create_file_clear(void);
+
+int mac_selinux_create_socket_prepare(const char *label);
+void mac_selinux_create_socket_clear(void);
+
+int mac_selinux_bind(int fd, const struct sockaddr *addr, socklen_t addrlen);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, mac_selinux_free);
diff --git a/src/basic/set.h b/src/basic/set.h
new file mode 100644
index 0000000..2a80632
--- /dev/null
+++ b/src/basic/set.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "extract-word.h"
+#include "hashmap.h"
+#include "macro.h"
+
+Set *internal_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+#define set_new(ops) internal_set_new(ops HASHMAP_DEBUG_SRC_ARGS)
+
+static inline Set *set_free(Set *s) {
+ return (Set*) internal_hashmap_free(HASHMAP_BASE(s), NULL, NULL);
+}
+
+static inline Set *set_free_free(Set *s) {
+ return (Set*) internal_hashmap_free(HASHMAP_BASE(s), free, NULL);
+}
+
+/* no set_free_free_free */
+
+static inline Set *set_copy(Set *s) {
+ return (Set*) internal_hashmap_copy(HASHMAP_BASE(s));
+}
+
+int internal_set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+#define set_ensure_allocated(h, ops) internal_set_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS)
+
+int set_put(Set *s, const void *key);
+/* no set_update */
+/* no set_replace */
+static inline void *set_get(Set *s, void *key) {
+ return internal_hashmap_get(HASHMAP_BASE(s), key);
+}
+/* no set_get2 */
+
+static inline bool set_contains(Set *s, const void *key) {
+ return internal_hashmap_contains(HASHMAP_BASE(s), key);
+}
+
+static inline void *set_remove(Set *s, const void *key) {
+ return internal_hashmap_remove(HASHMAP_BASE(s), key);
+}
+
+/* no set_remove2 */
+/* no set_remove_value */
+int set_remove_and_put(Set *s, const void *old_key, const void *new_key);
+/* no set_remove_and_replace */
+int set_merge(Set *s, Set *other);
+
+static inline int set_reserve(Set *h, unsigned entries_add) {
+ return internal_hashmap_reserve(HASHMAP_BASE(h), entries_add);
+}
+
+static inline int set_move(Set *s, Set *other) {
+ return internal_hashmap_move(HASHMAP_BASE(s), HASHMAP_BASE(other));
+}
+
+static inline int set_move_one(Set *s, Set *other, const void *key) {
+ return internal_hashmap_move_one(HASHMAP_BASE(s), HASHMAP_BASE(other), key);
+}
+
+static inline unsigned set_size(Set *s) {
+ return internal_hashmap_size(HASHMAP_BASE(s));
+}
+
+static inline bool set_isempty(Set *s) {
+ return set_size(s) == 0;
+}
+
+static inline unsigned set_buckets(Set *s) {
+ return internal_hashmap_buckets(HASHMAP_BASE(s));
+}
+
+bool set_iterate(Set *s, Iterator *i, void **value);
+
+static inline void set_clear(Set *s) {
+ internal_hashmap_clear(HASHMAP_BASE(s), NULL, NULL);
+}
+
+static inline void set_clear_free(Set *s) {
+ internal_hashmap_clear(HASHMAP_BASE(s), free, NULL);
+}
+
+/* no set_clear_free_free */
+
+static inline void *set_steal_first(Set *s) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(s), true, NULL);
+}
+
+#define set_clear_with_destructor(_s, _f) \
+ ({ \
+ void *_item; \
+ while ((_item = set_steal_first(_s))) \
+ _f(_item); \
+ })
+#define set_free_with_destructor(_s, _f) \
+ ({ \
+ set_clear_with_destructor(_s, _f); \
+ set_free(_s); \
+ })
+
+/* no set_steal_first_key */
+/* no set_first_key */
+
+static inline void *set_first(Set *s) {
+ return internal_hashmap_first_key_and_value(HASHMAP_BASE(s), false, NULL);
+}
+
+/* no set_next */
+
+static inline char **set_get_strv(Set *s) {
+ return internal_hashmap_get_strv(HASHMAP_BASE(s));
+}
+
+int set_consume(Set *s, void *value);
+int set_put_strdup(Set *s, const char *p);
+int set_put_strdupv(Set *s, char **l);
+int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags);
+
+#define SET_FOREACH(e, s, i) \
+ for ((i) = ITERATOR_FIRST; set_iterate((s), &(i), (void**)&(e)); )
+
+#define SET_FOREACH_MOVE(e, d, s) \
+ for (; ({ e = set_first(s); assert_se(!e || set_move_one(d, s, e) >= 0); e; }); )
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Set*, set_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Set*, set_free_free);
+
+#define _cleanup_set_free_ _cleanup_(set_freep)
+#define _cleanup_set_free_free_ _cleanup_(set_free_freep)
diff --git a/src/basic/sigbus.c b/src/basic/sigbus.c
new file mode 100644
index 0000000..d5254ea
--- /dev/null
+++ b/src/basic/sigbus.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stddef.h>
+#include <sys/mman.h>
+
+#include "macro.h"
+#include "sigbus.h"
+#include "util.h"
+
+#define SIGBUS_QUEUE_MAX 64
+
+static struct sigaction old_sigaction;
+static unsigned n_installed = 0;
+
+/* We maintain a fixed size list of page addresses that triggered a
+ SIGBUS. We access with list with atomic operations, so that we
+ don't have to deal with locks between signal handler and main
+ programs in possibly multiple threads. */
+
+static void* volatile sigbus_queue[SIGBUS_QUEUE_MAX];
+static volatile sig_atomic_t n_sigbus_queue = 0;
+
+static void sigbus_push(void *addr) {
+ unsigned u;
+
+ assert(addr);
+
+ /* Find a free place, increase the number of entries and leave, if we can */
+ for (u = 0; u < SIGBUS_QUEUE_MAX; u++)
+ if (__sync_bool_compare_and_swap(&sigbus_queue[u], NULL, addr)) {
+ __sync_fetch_and_add(&n_sigbus_queue, 1);
+ return;
+ }
+
+ /* If we can't, make sure the queue size is out of bounds, to
+ * mark it as overflow */
+ for (;;) {
+ unsigned c;
+
+ __sync_synchronize();
+ c = n_sigbus_queue;
+
+ if (c > SIGBUS_QUEUE_MAX) /* already overflow */
+ return;
+
+ if (__sync_bool_compare_and_swap(&n_sigbus_queue, c, c + SIGBUS_QUEUE_MAX))
+ return;
+ }
+}
+
+int sigbus_pop(void **ret) {
+ assert(ret);
+
+ for (;;) {
+ unsigned u, c;
+
+ __sync_synchronize();
+ c = n_sigbus_queue;
+
+ if (_likely_(c == 0))
+ return 0;
+
+ if (_unlikely_(c >= SIGBUS_QUEUE_MAX))
+ return -EOVERFLOW;
+
+ for (u = 0; u < SIGBUS_QUEUE_MAX; u++) {
+ void *addr;
+
+ addr = sigbus_queue[u];
+ if (!addr)
+ continue;
+
+ if (__sync_bool_compare_and_swap(&sigbus_queue[u], addr, NULL)) {
+ __sync_fetch_and_sub(&n_sigbus_queue, 1);
+ *ret = addr;
+ return 1;
+ }
+ }
+ }
+}
+
+static void sigbus_handler(int sn, siginfo_t *si, void *data) {
+ unsigned long ul;
+ void *aligned;
+
+ assert(sn == SIGBUS);
+ assert(si);
+
+ if (si->si_code != BUS_ADRERR || !si->si_addr) {
+ assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0);
+ raise(SIGBUS);
+ return;
+ }
+
+ ul = (unsigned long) si->si_addr;
+ ul = ul / page_size();
+ ul = ul * page_size();
+ aligned = (void*) ul;
+
+ /* Let's remember which address failed */
+ sigbus_push(aligned);
+
+ /* Replace mapping with an anonymous page, so that the
+ * execution can continue, however with a zeroed out page */
+ assert_se(mmap(aligned, page_size(), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == aligned);
+}
+
+void sigbus_install(void) {
+ struct sigaction sa = {
+ .sa_sigaction = sigbus_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+
+ /* make sure that sysconf() is not called from a signal handler because
+ * it is not guaranteed to be async-signal-safe since POSIX.1-2008 */
+ (void) page_size();
+
+ n_installed++;
+
+ if (n_installed == 1)
+ assert_se(sigaction(SIGBUS, &sa, &old_sigaction) == 0);
+
+ return;
+}
+
+void sigbus_reset(void) {
+
+ if (n_installed <= 0)
+ return;
+
+ n_installed--;
+
+ if (n_installed == 0)
+ assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0);
+
+ return;
+}
diff --git a/src/basic/sigbus.h b/src/basic/sigbus.h
new file mode 100644
index 0000000..459e19f
--- /dev/null
+++ b/src/basic/sigbus.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+void sigbus_install(void);
+void sigbus_reset(void);
+
+int sigbus_pop(void **ret);
diff --git a/src/basic/signal-util.c b/src/basic/signal-util.c
new file mode 100644
index 0000000..fb8a63f
--- /dev/null
+++ b/src/basic/signal-util.c
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "macro.h"
+#include "parse-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+
+int reset_all_signal_handlers(void) {
+ static const struct sigaction sa = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_RESTART,
+ };
+ int sig, r = 0;
+
+ for (sig = 1; sig < _NSIG; sig++) {
+
+ /* These two cannot be caught... */
+ if (IN_SET(sig, SIGKILL, SIGSTOP))
+ continue;
+
+ /* On Linux the first two RT signals are reserved by
+ * glibc, and sigaction() will return EINVAL for them. */
+ if ((sigaction(sig, &sa, NULL) < 0))
+ if (errno != EINVAL && r >= 0)
+ r = -errno;
+ }
+
+ return r;
+}
+
+int reset_signal_mask(void) {
+ sigset_t ss;
+
+ if (sigemptyset(&ss) < 0)
+ return -errno;
+
+ if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int sigaction_many_ap(const struct sigaction *sa, int sig, va_list ap) {
+ int r = 0;
+
+ /* negative signal ends the list. 0 signal is skipped. */
+
+ if (sig < 0)
+ return 0;
+
+ if (sig > 0) {
+ if (sigaction(sig, sa, NULL) < 0)
+ r = -errno;
+ }
+
+ while ((sig = va_arg(ap, int)) >= 0) {
+
+ if (sig == 0)
+ continue;
+
+ if (sigaction(sig, sa, NULL) < 0) {
+ if (r >= 0)
+ r = -errno;
+ }
+ }
+
+ return r;
+}
+
+int sigaction_many(const struct sigaction *sa, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, sa);
+ r = sigaction_many_ap(sa, 0, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int ignore_signals(int sig, ...) {
+
+ static const struct sigaction sa = {
+ .sa_handler = SIG_IGN,
+ .sa_flags = SA_RESTART,
+ };
+
+ va_list ap;
+ int r;
+
+ va_start(ap, sig);
+ r = sigaction_many_ap(&sa, sig, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int default_signals(int sig, ...) {
+
+ static const struct sigaction sa = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_RESTART,
+ };
+
+ va_list ap;
+ int r;
+
+ va_start(ap, sig);
+ r = sigaction_many_ap(&sa, sig, ap);
+ va_end(ap);
+
+ return r;
+}
+
+static int sigset_add_many_ap(sigset_t *ss, va_list ap) {
+ int sig, r = 0;
+
+ assert(ss);
+
+ while ((sig = va_arg(ap, int)) >= 0) {
+
+ if (sig == 0)
+ continue;
+
+ if (sigaddset(ss, sig) < 0) {
+ if (r >= 0)
+ r = -errno;
+ }
+ }
+
+ return r;
+}
+
+int sigset_add_many(sigset_t *ss, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, ss);
+ r = sigset_add_many_ap(ss, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int sigprocmask_many(int how, sigset_t *old, ...) {
+ va_list ap;
+ sigset_t ss;
+ int r;
+
+ if (sigemptyset(&ss) < 0)
+ return -errno;
+
+ va_start(ap, old);
+ r = sigset_add_many_ap(&ss, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ if (sigprocmask(how, &ss, old) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static const char *const __signal_table[] = {
+ [SIGHUP] = "HUP",
+ [SIGINT] = "INT",
+ [SIGQUIT] = "QUIT",
+ [SIGILL] = "ILL",
+ [SIGTRAP] = "TRAP",
+ [SIGABRT] = "ABRT",
+ [SIGBUS] = "BUS",
+ [SIGFPE] = "FPE",
+ [SIGKILL] = "KILL",
+ [SIGUSR1] = "USR1",
+ [SIGSEGV] = "SEGV",
+ [SIGUSR2] = "USR2",
+ [SIGPIPE] = "PIPE",
+ [SIGALRM] = "ALRM",
+ [SIGTERM] = "TERM",
+#ifdef SIGSTKFLT
+ [SIGSTKFLT] = "STKFLT", /* Linux on SPARC doesn't know SIGSTKFLT */
+#endif
+ [SIGCHLD] = "CHLD",
+ [SIGCONT] = "CONT",
+ [SIGSTOP] = "STOP",
+ [SIGTSTP] = "TSTP",
+ [SIGTTIN] = "TTIN",
+ [SIGTTOU] = "TTOU",
+ [SIGURG] = "URG",
+ [SIGXCPU] = "XCPU",
+ [SIGXFSZ] = "XFSZ",
+ [SIGVTALRM] = "VTALRM",
+ [SIGPROF] = "PROF",
+ [SIGWINCH] = "WINCH",
+ [SIGIO] = "IO",
+ [SIGPWR] = "PWR",
+ [SIGSYS] = "SYS"
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(__signal, int);
+
+const char *signal_to_string(int signo) {
+ static thread_local char buf[STRLEN("RTMIN+") + DECIMAL_STR_MAX(int) + 1];
+ const char *name;
+
+ name = __signal_to_string(signo);
+ if (name)
+ return name;
+
+ if (signo >= SIGRTMIN && signo <= SIGRTMAX)
+ xsprintf(buf, "RTMIN+%d", signo - SIGRTMIN);
+ else
+ xsprintf(buf, "%d", signo);
+
+ return buf;
+}
+
+int signal_from_string(const char *s) {
+ const char *p;
+ int signo, r;
+
+ /* Check that the input is a signal number. */
+ if (safe_atoi(s, &signo) >= 0) {
+ if (SIGNAL_VALID(signo))
+ return signo;
+ else
+ return -ERANGE;
+ }
+
+ /* Drop "SIG" prefix. */
+ if (startswith(s, "SIG"))
+ s += 3;
+
+ /* Check that the input is a signal name. */
+ signo = __signal_from_string(s);
+ if (signo > 0)
+ return signo;
+
+ /* Check that the input is RTMIN or
+ * RTMIN+n (0 <= n <= SIGRTMAX-SIGRTMIN). */
+ p = startswith(s, "RTMIN");
+ if (p) {
+ if (*p == '\0')
+ return SIGRTMIN;
+ if (*p != '+')
+ return -EINVAL;
+
+ r = safe_atoi(p, &signo);
+ if (r < 0)
+ return r;
+
+ if (signo < 0 || signo > SIGRTMAX - SIGRTMIN)
+ return -ERANGE;
+
+ return signo + SIGRTMIN;
+ }
+
+ /* Check that the input is RTMAX or
+ * RTMAX-n (0 <= n <= SIGRTMAX-SIGRTMIN). */
+ p = startswith(s, "RTMAX");
+ if (p) {
+ if (*p == '\0')
+ return SIGRTMAX;
+ if (*p != '-')
+ return -EINVAL;
+
+ r = safe_atoi(p, &signo);
+ if (r < 0)
+ return r;
+
+ if (signo > 0 || signo < SIGRTMIN - SIGRTMAX)
+ return -ERANGE;
+
+ return signo + SIGRTMAX;
+ }
+
+ return -EINVAL;
+}
+
+void nop_signal_handler(int sig) {
+ /* nothing here */
+}
diff --git a/src/basic/signal-util.h b/src/basic/signal-util.h
new file mode 100644
index 0000000..92f2804
--- /dev/null
+++ b/src/basic/signal-util.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <signal.h>
+
+#include "macro.h"
+
+int reset_all_signal_handlers(void);
+int reset_signal_mask(void);
+
+int ignore_signals(int sig, ...);
+int default_signals(int sig, ...);
+int sigaction_many(const struct sigaction *sa, ...);
+
+int sigset_add_many(sigset_t *ss, ...);
+int sigprocmask_many(int how, sigset_t *old, ...);
+
+const char *signal_to_string(int i) _const_;
+int signal_from_string(const char *s) _pure_;
+
+void nop_signal_handler(int sig);
+
+static inline void block_signals_reset(sigset_t *ss) {
+ assert_se(sigprocmask(SIG_SETMASK, ss, NULL) >= 0);
+}
+
+#define BLOCK_SIGNALS(...) \
+ _cleanup_(block_signals_reset) _unused_ sigset_t _saved_sigset = ({ \
+ sigset_t _t; \
+ assert_se(sigprocmask_many(SIG_BLOCK, &_t, __VA_ARGS__, -1) >= 0); \
+ _t; \
+ })
+
+static inline bool SIGNAL_VALID(int signo) {
+ return signo > 0 && signo < _NSIG;
+}
+
+static inline const char* signal_to_string_with_check(int n) {
+ if (!SIGNAL_VALID(n))
+ return NULL;
+
+ return signal_to_string(n);
+}
diff --git a/src/basic/siphash24.c b/src/basic/siphash24.c
new file mode 100644
index 0000000..6118081
--- /dev/null
+++ b/src/basic/siphash24.c
@@ -0,0 +1,200 @@
+/*
+ SipHash reference C implementation
+
+ Written in 2012 by
+ Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
+ Daniel J. Bernstein <djb@cr.yp.to>
+
+ To the extent possible under law, the author(s) have dedicated all copyright
+ and related and neighboring rights to this software to the public domain
+ worldwide. This software is distributed without any warranty.
+
+ You should have received a copy of the CC0 Public Domain Dedication along with
+ this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+ (Minimal changes made by Lennart Poettering, to make clean for inclusion in systemd)
+ (Refactored by Tom Gundersen to split up in several functions and follow systemd
+ coding style)
+*/
+
+#include <stdio.h>
+
+#include "macro.h"
+#include "siphash24.h"
+#include "unaligned.h"
+
+static uint64_t rotate_left(uint64_t x, uint8_t b) {
+ assert(b < 64);
+
+ return (x << b) | (x >> (64 - b));
+}
+
+static void sipround(struct siphash *state) {
+ assert(state);
+
+ state->v0 += state->v1;
+ state->v1 = rotate_left(state->v1, 13);
+ state->v1 ^= state->v0;
+ state->v0 = rotate_left(state->v0, 32);
+ state->v2 += state->v3;
+ state->v3 = rotate_left(state->v3, 16);
+ state->v3 ^= state->v2;
+ state->v0 += state->v3;
+ state->v3 = rotate_left(state->v3, 21);
+ state->v3 ^= state->v0;
+ state->v2 += state->v1;
+ state->v1 = rotate_left(state->v1, 17);
+ state->v1 ^= state->v2;
+ state->v2 = rotate_left(state->v2, 32);
+}
+
+void siphash24_init(struct siphash *state, const uint8_t k[static 16]) {
+ uint64_t k0, k1;
+
+ assert(state);
+ assert(k);
+
+ k0 = unaligned_read_le64(k);
+ k1 = unaligned_read_le64(k + 8);
+
+ *state = (struct siphash) {
+ /* "somepseudorandomlygeneratedbytes" */
+ .v0 = 0x736f6d6570736575ULL ^ k0,
+ .v1 = 0x646f72616e646f6dULL ^ k1,
+ .v2 = 0x6c7967656e657261ULL ^ k0,
+ .v3 = 0x7465646279746573ULL ^ k1,
+ .padding = 0,
+ .inlen = 0,
+ };
+}
+
+void siphash24_compress(const void *_in, size_t inlen, struct siphash *state) {
+
+ const uint8_t *in = _in;
+ const uint8_t *end = in + inlen;
+ size_t left = state->inlen & 7;
+ uint64_t m;
+
+ assert(in);
+ assert(state);
+
+ /* Update total length */
+ state->inlen += inlen;
+
+ /* If padding exists, fill it out */
+ if (left > 0) {
+ for ( ; in < end && left < 8; in ++, left ++)
+ state->padding |= ((uint64_t) *in) << (left * 8);
+
+ if (in == end && left < 8)
+ /* We did not have enough input to fill out the padding completely */
+ return;
+
+#if ENABLE_DEBUG_SIPHASH
+ printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0);
+ printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1);
+ printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2);
+ printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3);
+ printf("(%3zu) compress padding %08x %08x\n", state->inlen, (uint32_t) (state->padding >> 32), (uint32_t)state->padding);
+#endif
+
+ state->v3 ^= state->padding;
+ sipround(state);
+ sipround(state);
+ state->v0 ^= state->padding;
+
+ state->padding = 0;
+ }
+
+ end -= (state->inlen % sizeof(uint64_t));
+
+ for ( ; in < end; in += 8) {
+ m = unaligned_read_le64(in);
+#if ENABLE_DEBUG_SIPHASH
+ printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0);
+ printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1);
+ printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2);
+ printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3);
+ printf("(%3zu) compress %08x %08x\n", state->inlen, (uint32_t) (m >> 32), (uint32_t) m);
+#endif
+ state->v3 ^= m;
+ sipround(state);
+ sipround(state);
+ state->v0 ^= m;
+ }
+
+ left = state->inlen & 7;
+ switch (left) {
+ case 7:
+ state->padding |= ((uint64_t) in[6]) << 48;
+ _fallthrough_;
+ case 6:
+ state->padding |= ((uint64_t) in[5]) << 40;
+ _fallthrough_;
+ case 5:
+ state->padding |= ((uint64_t) in[4]) << 32;
+ _fallthrough_;
+ case 4:
+ state->padding |= ((uint64_t) in[3]) << 24;
+ _fallthrough_;
+ case 3:
+ state->padding |= ((uint64_t) in[2]) << 16;
+ _fallthrough_;
+ case 2:
+ state->padding |= ((uint64_t) in[1]) << 8;
+ _fallthrough_;
+ case 1:
+ state->padding |= ((uint64_t) in[0]);
+ _fallthrough_;
+ case 0:
+ break;
+ }
+}
+
+uint64_t siphash24_finalize(struct siphash *state) {
+ uint64_t b;
+
+ assert(state);
+
+ b = state->padding | (((uint64_t) state->inlen) << 56);
+
+#if ENABLE_DEBUG_SIPHASH
+ printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0);
+ printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1);
+ printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2);
+ printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3);
+ printf("(%3zu) padding %08x %08x\n", state->inlen, (uint32_t) (state->padding >> 32), (uint32_t) state->padding);
+#endif
+
+ state->v3 ^= b;
+ sipround(state);
+ sipround(state);
+ state->v0 ^= b;
+
+#if ENABLE_DEBUG_SIPHASH
+ printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0);
+ printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1);
+ printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2);
+ printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3);
+#endif
+ state->v2 ^= 0xff;
+
+ sipround(state);
+ sipround(state);
+ sipround(state);
+ sipround(state);
+
+ return state->v0 ^ state->v1 ^ state->v2 ^ state->v3;
+}
+
+uint64_t siphash24(const void *in, size_t inlen, const uint8_t k[static 16]) {
+ struct siphash state;
+
+ assert(in);
+ assert(k);
+
+ siphash24_init(&state, k);
+ siphash24_compress(in, inlen, &state);
+
+ return siphash24_finalize(&state);
+}
diff --git a/src/basic/siphash24.h b/src/basic/siphash24.h
new file mode 100644
index 0000000..67c4f75
--- /dev/null
+++ b/src/basic/siphash24.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/types.h>
+
+struct siphash {
+ uint64_t v0;
+ uint64_t v1;
+ uint64_t v2;
+ uint64_t v3;
+ uint64_t padding;
+ size_t inlen;
+};
+
+void siphash24_init(struct siphash *state, const uint8_t k[static 16]);
+void siphash24_compress(const void *in, size_t inlen, struct siphash *state);
+#define siphash24_compress_byte(byte, state) siphash24_compress((const uint8_t[]) { (byte) }, 1, (state))
+
+uint64_t siphash24_finalize(struct siphash *state);
+
+uint64_t siphash24(const void *in, size_t inlen, const uint8_t k[static 16]);
+
+static inline uint64_t siphash24_string(const char *s, const uint8_t k[static 16]) {
+ return siphash24(s, strlen(s) + 1, k);
+}
diff --git a/src/basic/smack-util.c b/src/basic/smack-util.c
new file mode 100644
index 0000000..123d00e
--- /dev/null
+++ b/src/basic/smack-util.c
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Intel Corporation
+
+ Author: Auke Kok <auke-jan.h.kok@intel.com>
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "smack-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "xattr-util.h"
+
+#if ENABLE_SMACK
+bool mac_smack_use(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0)
+ cached_use = access("/sys/fs/smackfs/", F_OK) >= 0;
+
+ return cached_use;
+}
+
+static const char* const smack_attr_table[_SMACK_ATTR_MAX] = {
+ [SMACK_ATTR_ACCESS] = "security.SMACK64",
+ [SMACK_ATTR_EXEC] = "security.SMACK64EXEC",
+ [SMACK_ATTR_MMAP] = "security.SMACK64MMAP",
+ [SMACK_ATTR_TRANSMUTE] = "security.SMACK64TRANSMUTE",
+ [SMACK_ATTR_IPIN] = "security.SMACK64IPIN",
+ [SMACK_ATTR_IPOUT] = "security.SMACK64IPOUT",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(smack_attr, SmackAttr);
+
+int mac_smack_read(const char *path, SmackAttr attr, char **label) {
+ assert(path);
+ assert(attr >= 0 && attr < _SMACK_ATTR_MAX);
+ assert(label);
+
+ if (!mac_smack_use())
+ return 0;
+
+ return getxattr_malloc(path, smack_attr_to_string(attr), label, true);
+}
+
+int mac_smack_read_fd(int fd, SmackAttr attr, char **label) {
+ assert(fd >= 0);
+ assert(attr >= 0 && attr < _SMACK_ATTR_MAX);
+ assert(label);
+
+ if (!mac_smack_use())
+ return 0;
+
+ return fgetxattr_malloc(fd, smack_attr_to_string(attr), label);
+}
+
+int mac_smack_apply(const char *path, SmackAttr attr, const char *label) {
+ int r;
+
+ assert(path);
+ assert(attr >= 0 && attr < _SMACK_ATTR_MAX);
+
+ if (!mac_smack_use())
+ return 0;
+
+ if (label)
+ r = lsetxattr(path, smack_attr_to_string(attr), label, strlen(label), 0);
+ else
+ r = lremovexattr(path, smack_attr_to_string(attr));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int mac_smack_apply_fd(int fd, SmackAttr attr, const char *label) {
+ int r;
+
+ assert(fd >= 0);
+ assert(attr >= 0 && attr < _SMACK_ATTR_MAX);
+
+ if (!mac_smack_use())
+ return 0;
+
+ if (label)
+ r = fsetxattr(fd, smack_attr_to_string(attr), label, strlen(label), 0);
+ else
+ r = fremovexattr(fd, smack_attr_to_string(attr));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int mac_smack_apply_pid(pid_t pid, const char *label) {
+ const char *p;
+ int r = 0;
+
+ assert(label);
+
+ if (!mac_smack_use())
+ return 0;
+
+ p = procfs_file_alloca(pid, "attr/current");
+ r = write_string_file(p, label, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+
+static int smack_fix_fd(int fd , const char *abspath, LabelFixFlags flags) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ const char *label;
+ struct stat st;
+ int r;
+
+ /* The caller should have done the sanity checks. */
+ assert(abspath);
+ assert(path_is_absolute(abspath));
+
+ /* Path must be in /dev. */
+ if (!path_startswith(abspath, "/dev"))
+ return 0;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /*
+ * Label directories and character devices "*".
+ * Label symlinks "_".
+ * Don't change anything else.
+ */
+
+ if (S_ISDIR(st.st_mode))
+ label = SMACK_STAR_LABEL;
+ else if (S_ISLNK(st.st_mode))
+ label = SMACK_FLOOR_LABEL;
+ else if (S_ISCHR(st.st_mode))
+ label = SMACK_STAR_LABEL;
+ else
+ return 0;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ if (setxattr(procfs_path, "security.SMACK64", label, strlen(label), 0) < 0) {
+ _cleanup_free_ char *old_label = NULL;
+
+ r = -errno;
+
+ /* If the FS doesn't support labels, then exit without warning */
+ if (r == -EOPNOTSUPP)
+ return 0;
+
+ /* It the FS is read-only and we were told to ignore failures caused by that, suppress error */
+ if (r == -EROFS && (flags & LABEL_IGNORE_EROFS))
+ return 0;
+
+ /* If the old label is identical to the new one, suppress any kind of error */
+ if (getxattr_malloc(procfs_path, "security.SMACK64", &old_label, false) >= 0 &&
+ streq(old_label, label))
+ return 0;
+
+ return log_debug_errno(r, "Unable to fix SMACK label of %s: %m", abspath);
+ }
+
+ return 0;
+}
+
+int mac_smack_fix_at(int dirfd, const char *path, LabelFixFlags flags) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(path);
+
+ if (!mac_smack_use())
+ return 0;
+
+ fd = openat(dirfd, path, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0) {
+ if ((flags & LABEL_IGNORE_ENOENT) && errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (!path_is_absolute(path)) {
+ r = fd_get_path(fd, &p);
+ if (r < 0)
+ return r;
+ path = p;
+ }
+
+ return smack_fix_fd(fd, path, flags);
+}
+
+int mac_smack_fix(const char *path, LabelFixFlags flags) {
+ _cleanup_free_ char *abspath = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(path);
+
+ if (!mac_smack_use())
+ return 0;
+
+ r = path_make_absolute_cwd(path, &abspath);
+ if (r < 0)
+ return r;
+
+ fd = open(abspath, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0) {
+ if ((flags & LABEL_IGNORE_ENOENT) && errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ return smack_fix_fd(fd, abspath, flags);
+}
+
+int mac_smack_copy(const char *dest, const char *src) {
+ int r = 0;
+ _cleanup_free_ char *label = NULL;
+
+ assert(dest);
+ assert(src);
+
+ r = mac_smack_read(src, SMACK_ATTR_ACCESS, &label);
+ if (r < 0)
+ return r;
+
+ r = mac_smack_apply(dest, SMACK_ATTR_ACCESS, label);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+
+#else
+bool mac_smack_use(void) {
+ return false;
+}
+
+int mac_smack_read(const char *path, SmackAttr attr, char **label) {
+ return -EOPNOTSUPP;
+}
+
+int mac_smack_read_fd(int fd, SmackAttr attr, char **label) {
+ return -EOPNOTSUPP;
+}
+
+int mac_smack_apply(const char *path, SmackAttr attr, const char *label) {
+ return 0;
+}
+
+int mac_smack_apply_fd(int fd, SmackAttr attr, const char *label) {
+ return 0;
+}
+
+int mac_smack_apply_pid(pid_t pid, const char *label) {
+ return 0;
+}
+
+int mac_smack_fix(const char *path, LabelFixFlags flags) {
+ return 0;
+}
+
+int mac_smack_fix_at(int dirfd, const char *path, LabelFixFlags flags) {
+ return 0;
+}
+
+int mac_smack_copy(const char *dest, const char *src) {
+ return 0;
+}
+#endif
diff --git a/src/basic/smack-util.h b/src/basic/smack-util.h
new file mode 100644
index 0000000..395ec07
--- /dev/null
+++ b/src/basic/smack-util.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation
+
+ Author: Auke Kok <auke-jan.h.kok@intel.com>
+***/
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "label.h"
+#include "macro.h"
+
+#define SMACK_FLOOR_LABEL "_"
+#define SMACK_STAR_LABEL "*"
+
+typedef enum SmackAttr {
+ SMACK_ATTR_ACCESS,
+ SMACK_ATTR_EXEC,
+ SMACK_ATTR_MMAP,
+ SMACK_ATTR_TRANSMUTE,
+ SMACK_ATTR_IPIN,
+ SMACK_ATTR_IPOUT,
+ _SMACK_ATTR_MAX,
+ _SMACK_ATTR_INVALID = -1,
+} SmackAttr;
+
+bool mac_smack_use(void);
+
+int mac_smack_fix(const char *path, LabelFixFlags flags);
+int mac_smack_fix_at(int dirfd, const char *path, LabelFixFlags flags);
+
+const char* smack_attr_to_string(SmackAttr i) _const_;
+SmackAttr smack_attr_from_string(const char *s) _pure_;
+int mac_smack_read(const char *path, SmackAttr attr, char **label);
+int mac_smack_read_fd(int fd, SmackAttr attr, char **label);
+int mac_smack_apply(const char *path, SmackAttr attr, const char *label);
+int mac_smack_apply_fd(int fd, SmackAttr attr, const char *label);
+int mac_smack_apply_pid(pid_t pid, const char *label);
+int mac_smack_copy(const char *dest, const char *src);
diff --git a/src/basic/socket-label.c b/src/basic/socket-label.c
new file mode 100644
index 0000000..4ed19cd
--- /dev/null
+++ b/src/basic/socket-label.c
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "selinux-util.h"
+#include "socket-util.h"
+#include "umask-util.h"
+
+int socket_address_listen(
+ const SocketAddress *a,
+ int flags,
+ int backlog,
+ SocketAddressBindIPv6Only only,
+ const char *bind_to_device,
+ bool reuse_port,
+ bool free_bind,
+ bool transparent,
+ mode_t directory_mode,
+ mode_t socket_mode,
+ const char *label) {
+
+ _cleanup_close_ int fd = -1;
+ const char *p;
+ int r;
+
+ assert(a);
+
+ r = socket_address_verify(a, true);
+ if (r < 0)
+ return r;
+
+ if (socket_address_family(a) == AF_INET6 && !socket_ipv6_is_supported())
+ return -EAFNOSUPPORT;
+
+ if (label) {
+ r = mac_selinux_create_socket_prepare(label);
+ if (r < 0)
+ return r;
+ }
+
+ fd = socket(socket_address_family(a), a->type | flags, a->protocol);
+ r = fd < 0 ? -errno : 0;
+
+ if (label)
+ mac_selinux_create_socket_clear();
+
+ if (r < 0)
+ return r;
+
+ if (socket_address_family(a) == AF_INET6 && only != SOCKET_ADDRESS_DEFAULT) {
+ r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_V6ONLY, only == SOCKET_ADDRESS_IPV6_ONLY);
+ if (r < 0)
+ return r;
+ }
+
+ if (IN_SET(socket_address_family(a), AF_INET, AF_INET6)) {
+ if (bind_to_device)
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, bind_to_device, strlen(bind_to_device)+1) < 0)
+ return -errno;
+
+ if (reuse_port) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEPORT, true);
+ if (r < 0)
+ log_warning_errno(r, "SO_REUSEPORT failed: %m");
+ }
+
+ if (free_bind) {
+ r = setsockopt_int(fd, IPPROTO_IP, IP_FREEBIND, true);
+ if (r < 0)
+ log_warning_errno(r, "IP_FREEBIND failed: %m");
+ }
+
+ if (transparent) {
+ r = setsockopt_int(fd, IPPROTO_IP, IP_TRANSPARENT, true);
+ if (r < 0)
+ log_warning_errno(r, "IP_TRANSPARENT failed: %m");
+ }
+ }
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return r;
+
+ p = socket_address_get_path(a);
+ if (p) {
+ /* Create parents */
+ (void) mkdir_parents_label(p, directory_mode);
+
+ /* Enforce the right access mode for the socket */
+ RUN_WITH_UMASK(~socket_mode) {
+ r = mac_selinux_bind(fd, &a->sockaddr.sa, a->size);
+ if (r == -EADDRINUSE) {
+ /* Unlink and try again */
+
+ if (unlink(p) < 0)
+ return r; /* didn't work, return original error */
+
+ r = mac_selinux_bind(fd, &a->sockaddr.sa, a->size);
+ }
+ if (r < 0)
+ return r;
+ }
+ } else {
+ if (bind(fd, &a->sockaddr.sa, a->size) < 0)
+ return -errno;
+ }
+
+ if (socket_address_can_accept(a))
+ if (listen(fd, backlog) < 0)
+ return -errno;
+
+ /* Let's trigger an inotify event on the socket node, so that anyone waiting for this socket to be connectable
+ * gets notified */
+ if (p)
+ (void) touch(p);
+
+ r = fd;
+ fd = -1;
+
+ return r;
+}
+
+int make_socket_fd(int log_level, const char* address, int type, int flags) {
+ SocketAddress a;
+ int fd, r;
+
+ r = socket_address_parse(&a, address);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse socket address \"%s\": %m", address);
+
+ a.type = type;
+
+ fd = socket_address_listen(&a, type | flags, SOMAXCONN, SOCKET_ADDRESS_DEFAULT,
+ NULL, false, false, false, 0755, 0644, NULL);
+ if (fd < 0 || log_get_max_level() >= log_level) {
+ _cleanup_free_ char *p = NULL;
+
+ r = socket_address_print(&a, &p);
+ if (r < 0)
+ return log_error_errno(r, "socket_address_print(): %m");
+
+ if (fd < 0)
+ log_error_errno(fd, "Failed to listen on %s: %m", p);
+ else
+ log_full(log_level, "Listening on %s", p);
+ }
+
+ return fd;
+}
diff --git a/src/basic/socket-util.c b/src/basic/socket-util.c
new file mode 100644
index 0000000..91bf801
--- /dev/null
+++ b/src/basic/socket-util.c
@@ -0,0 +1,1347 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <limits.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "utf8.h"
+#include "util.h"
+
+#if ENABLE_IDN
+# define IDN_FLAGS NI_IDN
+#else
+# define IDN_FLAGS 0
+#endif
+
+static const char* const socket_address_type_table[] = {
+ [SOCK_STREAM] = "Stream",
+ [SOCK_DGRAM] = "Datagram",
+ [SOCK_RAW] = "Raw",
+ [SOCK_RDM] = "ReliableDatagram",
+ [SOCK_SEQPACKET] = "SequentialPacket",
+ [SOCK_DCCP] = "DatagramCongestionControl",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_address_type, int);
+
+int socket_address_parse(SocketAddress *a, const char *s) {
+ _cleanup_free_ char *n = NULL;
+ char *e;
+ int r;
+
+ assert(a);
+ assert(s);
+
+ *a = (SocketAddress) {
+ .type = SOCK_STREAM,
+ };
+
+ if (*s == '[') {
+ uint16_t port;
+
+ /* IPv6 in [x:.....:z]:p notation */
+
+ e = strchr(s+1, ']');
+ if (!e)
+ return -EINVAL;
+
+ n = strndup(s+1, e-s-1);
+ if (!n)
+ return -ENOMEM;
+
+ errno = 0;
+ if (inet_pton(AF_INET6, n, &a->sockaddr.in6.sin6_addr) <= 0)
+ return errno > 0 ? -errno : -EINVAL;
+
+ e++;
+ if (*e != ':')
+ return -EINVAL;
+
+ e++;
+ r = parse_ip_port(e, &port);
+ if (r < 0)
+ return r;
+
+ a->sockaddr.in6.sin6_family = AF_INET6;
+ a->sockaddr.in6.sin6_port = htobe16(port);
+ a->size = sizeof(struct sockaddr_in6);
+
+ } else if (*s == '/') {
+ /* AF_UNIX socket */
+
+ size_t l;
+
+ l = strlen(s);
+ if (l >= sizeof(a->sockaddr.un.sun_path)) /* Note that we refuse non-NUL-terminated sockets when
+ * parsing (the kernel itself is less strict here in what it
+ * accepts) */
+ return -EINVAL;
+
+ a->sockaddr.un.sun_family = AF_UNIX;
+ memcpy(a->sockaddr.un.sun_path, s, l);
+ a->size = offsetof(struct sockaddr_un, sun_path) + l + 1;
+
+ } else if (*s == '@') {
+ /* Abstract AF_UNIX socket */
+ size_t l;
+
+ l = strlen(s+1);
+ if (l >= sizeof(a->sockaddr.un.sun_path) - 1) /* Note that we refuse non-NUL-terminated sockets here
+ * when parsing, even though abstract namespace sockets
+ * explicitly allow embedded NUL bytes and don't consider
+ * them special. But it's simply annoying to debug such
+ * sockets. */
+ return -EINVAL;
+
+ a->sockaddr.un.sun_family = AF_UNIX;
+ memcpy(a->sockaddr.un.sun_path+1, s+1, l);
+ a->size = offsetof(struct sockaddr_un, sun_path) + 1 + l;
+
+ } else if (startswith(s, "vsock:")) {
+ /* AF_VSOCK socket in vsock:cid:port notation */
+ const char *cid_start = s + STRLEN("vsock:");
+ unsigned port;
+
+ e = strchr(cid_start, ':');
+ if (!e)
+ return -EINVAL;
+
+ r = safe_atou(e+1, &port);
+ if (r < 0)
+ return r;
+
+ n = strndup(cid_start, e - cid_start);
+ if (!n)
+ return -ENOMEM;
+
+ if (!isempty(n)) {
+ r = safe_atou(n, &a->sockaddr.vm.svm_cid);
+ if (r < 0)
+ return r;
+ } else
+ a->sockaddr.vm.svm_cid = VMADDR_CID_ANY;
+
+ a->sockaddr.vm.svm_family = AF_VSOCK;
+ a->sockaddr.vm.svm_port = port;
+ a->size = sizeof(struct sockaddr_vm);
+
+ } else {
+ uint16_t port;
+
+ e = strchr(s, ':');
+ if (e) {
+ r = parse_ip_port(e + 1, &port);
+ if (r < 0)
+ return r;
+
+ n = strndup(s, e-s);
+ if (!n)
+ return -ENOMEM;
+
+ /* IPv4 in w.x.y.z:p notation? */
+ r = inet_pton(AF_INET, n, &a->sockaddr.in.sin_addr);
+ if (r < 0)
+ return -errno;
+
+ if (r > 0) {
+ /* Gotcha, it's a traditional IPv4 address */
+ a->sockaddr.in.sin_family = AF_INET;
+ a->sockaddr.in.sin_port = htobe16(port);
+ a->size = sizeof(struct sockaddr_in);
+ } else {
+ unsigned idx;
+
+ if (strlen(n) > IF_NAMESIZE-1)
+ return -EINVAL;
+
+ /* Uh, our last resort, an interface name */
+ idx = if_nametoindex(n);
+ if (idx == 0)
+ return -EINVAL;
+
+ a->sockaddr.in6.sin6_family = AF_INET6;
+ a->sockaddr.in6.sin6_port = htobe16(port);
+ a->sockaddr.in6.sin6_scope_id = idx;
+ a->sockaddr.in6.sin6_addr = in6addr_any;
+ a->size = sizeof(struct sockaddr_in6);
+ }
+ } else {
+
+ /* Just a port */
+ r = parse_ip_port(s, &port);
+ if (r < 0)
+ return r;
+
+ if (socket_ipv6_is_supported()) {
+ a->sockaddr.in6.sin6_family = AF_INET6;
+ a->sockaddr.in6.sin6_port = htobe16(port);
+ a->sockaddr.in6.sin6_addr = in6addr_any;
+ a->size = sizeof(struct sockaddr_in6);
+ } else {
+ a->sockaddr.in.sin_family = AF_INET;
+ a->sockaddr.in.sin_port = htobe16(port);
+ a->sockaddr.in.sin_addr.s_addr = INADDR_ANY;
+ a->size = sizeof(struct sockaddr_in);
+ }
+ }
+ }
+
+ return 0;
+}
+
+int socket_address_parse_and_warn(SocketAddress *a, const char *s) {
+ SocketAddress b;
+ int r;
+
+ /* Similar to socket_address_parse() but warns for IPv6 sockets when we don't support them. */
+
+ r = socket_address_parse(&b, s);
+ if (r < 0)
+ return r;
+
+ if (!socket_ipv6_is_supported() && b.sockaddr.sa.sa_family == AF_INET6) {
+ log_warning("Binding to IPv6 address not available since kernel does not support IPv6.");
+ return -EAFNOSUPPORT;
+ }
+
+ *a = b;
+ return 0;
+}
+
+int socket_address_parse_netlink(SocketAddress *a, const char *s) {
+ int family;
+ unsigned group = 0;
+ _cleanup_free_ char *sfamily = NULL;
+ assert(a);
+ assert(s);
+
+ zero(*a);
+ a->type = SOCK_RAW;
+
+ errno = 0;
+ if (sscanf(s, "%ms %u", &sfamily, &group) < 1)
+ return errno > 0 ? -errno : -EINVAL;
+
+ family = netlink_family_from_string(sfamily);
+ if (family < 0)
+ return -EINVAL;
+
+ a->sockaddr.nl.nl_family = AF_NETLINK;
+ a->sockaddr.nl.nl_groups = group;
+
+ a->type = SOCK_RAW;
+ a->size = sizeof(struct sockaddr_nl);
+ a->protocol = family;
+
+ return 0;
+}
+
+int socket_address_verify(const SocketAddress *a, bool strict) {
+ assert(a);
+
+ /* With 'strict' we enforce additional sanity constraints which are not set by the standard,
+ * but should only apply to sockets we create ourselves. */
+
+ switch (socket_address_family(a)) {
+
+ case AF_INET:
+ if (a->size != sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ if (a->sockaddr.in.sin_port == 0)
+ return -EINVAL;
+
+ if (!IN_SET(a->type, SOCK_STREAM, SOCK_DGRAM))
+ return -EINVAL;
+
+ return 0;
+
+ case AF_INET6:
+ if (a->size != sizeof(struct sockaddr_in6))
+ return -EINVAL;
+
+ if (a->sockaddr.in6.sin6_port == 0)
+ return -EINVAL;
+
+ if (!IN_SET(a->type, SOCK_STREAM, SOCK_DGRAM))
+ return -EINVAL;
+
+ return 0;
+
+ case AF_UNIX:
+ if (a->size < offsetof(struct sockaddr_un, sun_path))
+ return -EINVAL;
+ if (a->size > sizeof(struct sockaddr_un) + !strict)
+ /* If !strict, allow one extra byte, since getsockname() on Linux will append
+ * a NUL byte if we have path sockets that are above sun_path's full size. */
+ return -EINVAL;
+
+ if (a->size > offsetof(struct sockaddr_un, sun_path) &&
+ a->sockaddr.un.sun_path[0] != 0 &&
+ strict) {
+ /* Only validate file system sockets here, and only in strict mode */
+ const char *e;
+
+ e = memchr(a->sockaddr.un.sun_path, 0, sizeof(a->sockaddr.un.sun_path));
+ if (e) {
+ /* If there's an embedded NUL byte, make sure the size of the socket address matches it */
+ if (a->size != offsetof(struct sockaddr_un, sun_path) + (e - a->sockaddr.un.sun_path) + 1)
+ return -EINVAL;
+ } else {
+ /* If there's no embedded NUL byte, then then the size needs to match the whole
+ * structure or the structure with one extra NUL byte suffixed. (Yeah, Linux is awful,
+ * and considers both equivalent: getsockname() even extends sockaddr_un beyond its
+ * size if the path is non NUL terminated.)*/
+ if (!IN_SET(a->size, sizeof(a->sockaddr.un.sun_path), sizeof(a->sockaddr.un.sun_path)+1))
+ return -EINVAL;
+ }
+ }
+
+ if (!IN_SET(a->type, SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET))
+ return -EINVAL;
+
+ return 0;
+
+ case AF_NETLINK:
+
+ if (a->size != sizeof(struct sockaddr_nl))
+ return -EINVAL;
+
+ if (!IN_SET(a->type, SOCK_RAW, SOCK_DGRAM))
+ return -EINVAL;
+
+ return 0;
+
+ case AF_VSOCK:
+ if (a->size != sizeof(struct sockaddr_vm))
+ return -EINVAL;
+
+ if (!IN_SET(a->type, SOCK_STREAM, SOCK_DGRAM))
+ return -EINVAL;
+
+ return 0;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+int socket_address_print(const SocketAddress *a, char **ret) {
+ int r;
+
+ assert(a);
+ assert(ret);
+
+ r = socket_address_verify(a, false); /* We do non-strict validation, because we want to be
+ * able to pretty-print any socket the kernel considers
+ * valid. We still need to do validation to know if we
+ * can meaningfully print the address. */
+ if (r < 0)
+ return r;
+
+ if (socket_address_family(a) == AF_NETLINK) {
+ _cleanup_free_ char *sfamily = NULL;
+
+ r = netlink_family_to_string_alloc(a->protocol, &sfamily);
+ if (r < 0)
+ return r;
+
+ r = asprintf(ret, "%s %u", sfamily, a->sockaddr.nl.nl_groups);
+ if (r < 0)
+ return -ENOMEM;
+
+ return 0;
+ }
+
+ return sockaddr_pretty(&a->sockaddr.sa, a->size, false, true, ret);
+}
+
+bool socket_address_can_accept(const SocketAddress *a) {
+ assert(a);
+
+ return
+ IN_SET(a->type, SOCK_STREAM, SOCK_SEQPACKET);
+}
+
+bool socket_address_equal(const SocketAddress *a, const SocketAddress *b) {
+ assert(a);
+ assert(b);
+
+ /* Invalid addresses are unequal to all */
+ if (socket_address_verify(a, false) < 0 ||
+ socket_address_verify(b, false) < 0)
+ return false;
+
+ if (a->type != b->type)
+ return false;
+
+ if (socket_address_family(a) != socket_address_family(b))
+ return false;
+
+ switch (socket_address_family(a)) {
+
+ case AF_INET:
+ if (a->sockaddr.in.sin_addr.s_addr != b->sockaddr.in.sin_addr.s_addr)
+ return false;
+
+ if (a->sockaddr.in.sin_port != b->sockaddr.in.sin_port)
+ return false;
+
+ break;
+
+ case AF_INET6:
+ if (memcmp(&a->sockaddr.in6.sin6_addr, &b->sockaddr.in6.sin6_addr, sizeof(a->sockaddr.in6.sin6_addr)) != 0)
+ return false;
+
+ if (a->sockaddr.in6.sin6_port != b->sockaddr.in6.sin6_port)
+ return false;
+
+ break;
+
+ case AF_UNIX:
+ if (a->size <= offsetof(struct sockaddr_un, sun_path) ||
+ b->size <= offsetof(struct sockaddr_un, sun_path))
+ return false;
+
+ if ((a->sockaddr.un.sun_path[0] == 0) != (b->sockaddr.un.sun_path[0] == 0))
+ return false;
+
+ if (a->sockaddr.un.sun_path[0]) {
+ if (!path_equal_or_files_same(a->sockaddr.un.sun_path, b->sockaddr.un.sun_path, 0))
+ return false;
+ } else {
+ if (a->size != b->size)
+ return false;
+
+ if (memcmp(a->sockaddr.un.sun_path, b->sockaddr.un.sun_path, a->size) != 0)
+ return false;
+ }
+
+ break;
+
+ case AF_NETLINK:
+ if (a->protocol != b->protocol)
+ return false;
+
+ if (a->sockaddr.nl.nl_groups != b->sockaddr.nl.nl_groups)
+ return false;
+
+ break;
+
+ case AF_VSOCK:
+ if (a->sockaddr.vm.svm_cid != b->sockaddr.vm.svm_cid)
+ return false;
+
+ if (a->sockaddr.vm.svm_port != b->sockaddr.vm.svm_port)
+ return false;
+
+ break;
+
+ default:
+ /* Cannot compare, so we assume the addresses are different */
+ return false;
+ }
+
+ return true;
+}
+
+bool socket_address_is(const SocketAddress *a, const char *s, int type) {
+ struct SocketAddress b;
+
+ assert(a);
+ assert(s);
+
+ if (socket_address_parse(&b, s) < 0)
+ return false;
+
+ b.type = type;
+
+ return socket_address_equal(a, &b);
+}
+
+bool socket_address_is_netlink(const SocketAddress *a, const char *s) {
+ struct SocketAddress b;
+
+ assert(a);
+ assert(s);
+
+ if (socket_address_parse_netlink(&b, s) < 0)
+ return false;
+
+ return socket_address_equal(a, &b);
+}
+
+const char* socket_address_get_path(const SocketAddress *a) {
+ assert(a);
+
+ if (socket_address_family(a) != AF_UNIX)
+ return NULL;
+
+ if (a->sockaddr.un.sun_path[0] == 0)
+ return NULL;
+
+ /* Note that this is only safe because we know that there's an extra NUL byte after the sockaddr_un
+ * structure. On Linux AF_UNIX file system socket addresses don't have to be NUL terminated if they take up the
+ * full sun_path space. */
+ assert_cc(sizeof(union sockaddr_union) >= sizeof(struct sockaddr_un)+1);
+ return a->sockaddr.un.sun_path;
+}
+
+bool socket_ipv6_is_supported(void) {
+ if (access("/proc/net/if_inet6", F_OK) != 0)
+ return false;
+
+ return true;
+}
+
+bool socket_address_matches_fd(const SocketAddress *a, int fd) {
+ SocketAddress b;
+ socklen_t solen;
+
+ assert(a);
+ assert(fd >= 0);
+
+ b.size = sizeof(b.sockaddr);
+ if (getsockname(fd, &b.sockaddr.sa, &b.size) < 0)
+ return false;
+
+ if (b.sockaddr.sa.sa_family != a->sockaddr.sa.sa_family)
+ return false;
+
+ solen = sizeof(b.type);
+ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &b.type, &solen) < 0)
+ return false;
+
+ if (b.type != a->type)
+ return false;
+
+ if (a->protocol != 0) {
+ solen = sizeof(b.protocol);
+ if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &b.protocol, &solen) < 0)
+ return false;
+
+ if (b.protocol != a->protocol)
+ return false;
+ }
+
+ return socket_address_equal(a, &b);
+}
+
+int sockaddr_port(const struct sockaddr *_sa, unsigned *ret_port) {
+ union sockaddr_union *sa = (union sockaddr_union*) _sa;
+
+ /* Note, this returns the port as 'unsigned' rather than 'uint16_t', as AF_VSOCK knows larger ports */
+
+ assert(sa);
+
+ switch (sa->sa.sa_family) {
+
+ case AF_INET:
+ *ret_port = be16toh(sa->in.sin_port);
+ return 0;
+
+ case AF_INET6:
+ *ret_port = be16toh(sa->in6.sin6_port);
+ return 0;
+
+ case AF_VSOCK:
+ *ret_port = sa->vm.svm_port;
+ return 0;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+int sockaddr_pretty(
+ const struct sockaddr *_sa,
+ socklen_t salen,
+ bool translate_ipv6,
+ bool include_port,
+ char **ret) {
+
+ union sockaddr_union *sa = (union sockaddr_union*) _sa;
+ char *p;
+ int r;
+
+ assert(sa);
+ assert(salen >= sizeof(sa->sa.sa_family));
+
+ switch (sa->sa.sa_family) {
+
+ case AF_INET: {
+ uint32_t a;
+
+ a = be32toh(sa->in.sin_addr.s_addr);
+
+ if (include_port)
+ r = asprintf(&p,
+ "%u.%u.%u.%u:%u",
+ a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF,
+ be16toh(sa->in.sin_port));
+ else
+ r = asprintf(&p,
+ "%u.%u.%u.%u",
+ a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF);
+ if (r < 0)
+ return -ENOMEM;
+ break;
+ }
+
+ case AF_INET6: {
+ static const unsigned char ipv4_prefix[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF
+ };
+
+ if (translate_ipv6 &&
+ memcmp(&sa->in6.sin6_addr, ipv4_prefix, sizeof(ipv4_prefix)) == 0) {
+ const uint8_t *a = sa->in6.sin6_addr.s6_addr+12;
+ if (include_port)
+ r = asprintf(&p,
+ "%u.%u.%u.%u:%u",
+ a[0], a[1], a[2], a[3],
+ be16toh(sa->in6.sin6_port));
+ else
+ r = asprintf(&p,
+ "%u.%u.%u.%u",
+ a[0], a[1], a[2], a[3]);
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ char a[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6, &sa->in6.sin6_addr, a, sizeof(a));
+
+ if (include_port) {
+ r = asprintf(&p,
+ "[%s]:%u",
+ a,
+ be16toh(sa->in6.sin6_port));
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ p = strdup(a);
+ if (!p)
+ return -ENOMEM;
+ }
+ }
+
+ break;
+ }
+
+ case AF_UNIX:
+ if (salen <= offsetof(struct sockaddr_un, sun_path) ||
+ (sa->un.sun_path[0] == 0 && salen == offsetof(struct sockaddr_un, sun_path) + 1))
+ /* The name must have at least one character (and the leading NUL does not count) */
+ p = strdup("<unnamed>");
+ else {
+ /* Note that we calculate the path pointer here through the .un_buffer[] field, in order to
+ * outtrick bounds checking tools such as ubsan, which are too smart for their own good: on
+ * Linux the kernel may return sun_path[] data one byte longer than the declared size of the
+ * field. */
+ char *path = (char*) sa->un_buffer + offsetof(struct sockaddr_un, sun_path);
+ size_t path_len = salen - offsetof(struct sockaddr_un, sun_path);
+
+ if (path[0] == 0) {
+ /* Abstract socket. When parsing address information from, we
+ * explicitly reject overly long paths and paths with embedded NULs.
+ * But we might get such a socket from the outside. Let's return
+ * something meaningful and printable in this case. */
+
+ _cleanup_free_ char *e = NULL;
+
+ e = cescape_length(path + 1, path_len - 1);
+ if (!e)
+ return -ENOMEM;
+
+ p = strjoin("@", e);
+ } else {
+ if (path[path_len - 1] == '\0')
+ /* We expect a terminating NUL and don't print it */
+ path_len --;
+
+ p = cescape_length(path, path_len);
+ }
+ }
+ if (!p)
+ return -ENOMEM;
+
+ break;
+
+ case AF_VSOCK:
+ if (include_port) {
+ if (sa->vm.svm_cid == VMADDR_CID_ANY)
+ r = asprintf(&p, "vsock::%u", sa->vm.svm_port);
+ else
+ r = asprintf(&p, "vsock:%u:%u", sa->vm.svm_cid, sa->vm.svm_port);
+ } else
+ r = asprintf(&p, "vsock:%u", sa->vm.svm_cid);
+ if (r < 0)
+ return -ENOMEM;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ *ret = p;
+ return 0;
+}
+
+int getpeername_pretty(int fd, bool include_port, char **ret) {
+ union sockaddr_union sa;
+ socklen_t salen = sizeof(sa);
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (getpeername(fd, &sa.sa, &salen) < 0)
+ return -errno;
+
+ if (sa.sa.sa_family == AF_UNIX) {
+ struct ucred ucred = {};
+
+ /* UNIX connection sockets are anonymous, so let's use
+ * PID/UID as pretty credentials instead */
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ if (asprintf(ret, "PID "PID_FMT"/UID "UID_FMT, ucred.pid, ucred.uid) < 0)
+ return -ENOMEM;
+
+ return 0;
+ }
+
+ /* For remote sockets we translate IPv6 addresses back to IPv4
+ * if applicable, since that's nicer. */
+
+ return sockaddr_pretty(&sa.sa, salen, true, include_port, ret);
+}
+
+int getsockname_pretty(int fd, char **ret) {
+ union sockaddr_union sa;
+ socklen_t salen = sizeof(sa);
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (getsockname(fd, &sa.sa, &salen) < 0)
+ return -errno;
+
+ /* For local sockets we do not translate IPv6 addresses back
+ * to IPv6 if applicable, since this is usually used for
+ * listening sockets where the difference between IPv4 and
+ * IPv6 matters. */
+
+ return sockaddr_pretty(&sa.sa, salen, false, true, ret);
+}
+
+int socknameinfo_pretty(union sockaddr_union *sa, socklen_t salen, char **_ret) {
+ int r;
+ char host[NI_MAXHOST], *ret;
+
+ assert(_ret);
+
+ r = getnameinfo(&sa->sa, salen, host, sizeof(host), NULL, 0, IDN_FLAGS);
+ if (r != 0) {
+ int saved_errno = errno;
+
+ r = sockaddr_pretty(&sa->sa, salen, true, true, &ret);
+ if (r < 0)
+ return r;
+
+ log_debug_errno(saved_errno, "getnameinfo(%s) failed: %m", ret);
+ } else {
+ ret = strdup(host);
+ if (!ret)
+ return -ENOMEM;
+ }
+
+ *_ret = ret;
+ return 0;
+}
+
+static const char* const netlink_family_table[] = {
+ [NETLINK_ROUTE] = "route",
+ [NETLINK_FIREWALL] = "firewall",
+ [NETLINK_INET_DIAG] = "inet-diag",
+ [NETLINK_NFLOG] = "nflog",
+ [NETLINK_XFRM] = "xfrm",
+ [NETLINK_SELINUX] = "selinux",
+ [NETLINK_ISCSI] = "iscsi",
+ [NETLINK_AUDIT] = "audit",
+ [NETLINK_FIB_LOOKUP] = "fib-lookup",
+ [NETLINK_CONNECTOR] = "connector",
+ [NETLINK_NETFILTER] = "netfilter",
+ [NETLINK_IP6_FW] = "ip6-fw",
+ [NETLINK_DNRTMSG] = "dnrtmsg",
+ [NETLINK_KOBJECT_UEVENT] = "kobject-uevent",
+ [NETLINK_GENERIC] = "generic",
+ [NETLINK_SCSITRANSPORT] = "scsitransport",
+ [NETLINK_ECRYPTFS] = "ecryptfs",
+ [NETLINK_RDMA] = "rdma",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(netlink_family, int, INT_MAX);
+
+static const char* const socket_address_bind_ipv6_only_table[_SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX] = {
+ [SOCKET_ADDRESS_DEFAULT] = "default",
+ [SOCKET_ADDRESS_BOTH] = "both",
+ [SOCKET_ADDRESS_IPV6_ONLY] = "ipv6-only"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_address_bind_ipv6_only, SocketAddressBindIPv6Only);
+
+SocketAddressBindIPv6Only socket_address_bind_ipv6_only_or_bool_from_string(const char *n) {
+ int r;
+
+ r = parse_boolean(n);
+ if (r > 0)
+ return SOCKET_ADDRESS_IPV6_ONLY;
+ if (r == 0)
+ return SOCKET_ADDRESS_BOTH;
+
+ return socket_address_bind_ipv6_only_from_string(n);
+}
+
+bool sockaddr_equal(const union sockaddr_union *a, const union sockaddr_union *b) {
+ assert(a);
+ assert(b);
+
+ if (a->sa.sa_family != b->sa.sa_family)
+ return false;
+
+ if (a->sa.sa_family == AF_INET)
+ return a->in.sin_addr.s_addr == b->in.sin_addr.s_addr;
+
+ if (a->sa.sa_family == AF_INET6)
+ return memcmp(&a->in6.sin6_addr, &b->in6.sin6_addr, sizeof(a->in6.sin6_addr)) == 0;
+
+ if (a->sa.sa_family == AF_VSOCK)
+ return a->vm.svm_cid == b->vm.svm_cid;
+
+ return false;
+}
+
+int fd_inc_sndbuf(int fd, size_t n) {
+ int r, value;
+ socklen_t l = sizeof(value);
+
+ r = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &value, &l);
+ if (r >= 0 && l == sizeof(value) && (size_t) value >= n*2)
+ return 0;
+
+ /* If we have the privileges we will ignore the kernel limit. */
+
+ if (setsockopt_int(fd, SOL_SOCKET, SO_SNDBUF, n) < 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_SNDBUFFORCE, n);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+int fd_inc_rcvbuf(int fd, size_t n) {
+ int r, value;
+ socklen_t l = sizeof(value);
+
+ r = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &value, &l);
+ if (r >= 0 && l == sizeof(value) && (size_t) value >= n*2)
+ return 0;
+
+ /* If we have the privileges we will ignore the kernel limit. */
+
+ if (setsockopt_int(fd, SOL_SOCKET, SO_RCVBUF, n) < 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_RCVBUFFORCE, n);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+static const char* const ip_tos_table[] = {
+ [IPTOS_LOWDELAY] = "low-delay",
+ [IPTOS_THROUGHPUT] = "throughput",
+ [IPTOS_RELIABILITY] = "reliability",
+ [IPTOS_LOWCOST] = "low-cost",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ip_tos, int, 0xff);
+
+bool ifname_valid(const char *p) {
+ bool numeric = true;
+
+ /* Checks whether a network interface name is valid. This is inspired by dev_valid_name() in the kernel sources
+ * but slightly stricter, as we only allow non-control, non-space ASCII characters in the interface name. We
+ * also don't permit names that only container numbers, to avoid confusion with numeric interface indexes. */
+
+ if (isempty(p))
+ return false;
+
+ if (strlen(p) >= IFNAMSIZ)
+ return false;
+
+ if (dot_or_dot_dot(p))
+ return false;
+
+ while (*p) {
+ if ((unsigned char) *p >= 127U)
+ return false;
+
+ if ((unsigned char) *p <= 32U)
+ return false;
+
+ if (IN_SET(*p, ':', '/'))
+ return false;
+
+ numeric = numeric && (*p >= '0' && *p <= '9');
+ p++;
+ }
+
+ if (numeric)
+ return false;
+
+ return true;
+}
+
+bool address_label_valid(const char *p) {
+
+ if (isempty(p))
+ return false;
+
+ if (strlen(p) >= IFNAMSIZ)
+ return false;
+
+ while (*p) {
+ if ((uint8_t) *p >= 127U)
+ return false;
+
+ if ((uint8_t) *p <= 31U)
+ return false;
+ p++;
+ }
+
+ return true;
+}
+
+int getpeercred(int fd, struct ucred *ucred) {
+ socklen_t n = sizeof(struct ucred);
+ struct ucred u;
+ int r;
+
+ assert(fd >= 0);
+ assert(ucred);
+
+ r = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &u, &n);
+ if (r < 0)
+ return -errno;
+
+ if (n != sizeof(struct ucred))
+ return -EIO;
+
+ /* Check if the data is actually useful and not suppressed due to namespacing issues */
+ if (!pid_is_valid(u.pid))
+ return -ENODATA;
+
+ /* Note that we don't check UID/GID here, as namespace translation works differently there: instead of
+ * receiving in "invalid" user/group we get the overflow UID/GID. */
+
+ *ucred = u;
+ return 0;
+}
+
+int getpeersec(int fd, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ socklen_t n = 64;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ for (;;) {
+ s = new0(char, n+1);
+ if (!s)
+ return -ENOMEM;
+
+ if (getsockopt(fd, SOL_SOCKET, SO_PEERSEC, s, &n) >= 0)
+ break;
+
+ if (errno != ERANGE)
+ return -errno;
+
+ s = mfree(s);
+ }
+
+ if (isempty(s))
+ return -EOPNOTSUPP;
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+int getpeergroups(int fd, gid_t **ret) {
+ socklen_t n = sizeof(gid_t) * 64;
+ _cleanup_free_ gid_t *d = NULL;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ for (;;) {
+ d = malloc(n);
+ if (!d)
+ return -ENOMEM;
+
+ if (getsockopt(fd, SOL_SOCKET, SO_PEERGROUPS, d, &n) >= 0)
+ break;
+
+ if (errno != ERANGE)
+ return -errno;
+
+ d = mfree(d);
+ }
+
+ assert_se(n % sizeof(gid_t) == 0);
+ n /= sizeof(gid_t);
+
+ if ((socklen_t) (int) n != n)
+ return -E2BIG;
+
+ *ret = TAKE_PTR(d);
+
+ return (int) n;
+}
+
+ssize_t send_one_fd_iov_sa(
+ int transport_fd,
+ int fd,
+ struct iovec *iov, size_t iovlen,
+ const struct sockaddr *sa, socklen_t len,
+ int flags) {
+
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_name = (struct sockaddr*) sa,
+ .msg_namelen = len,
+ .msg_iov = iov,
+ .msg_iovlen = iovlen,
+ };
+ ssize_t k;
+
+ assert(transport_fd >= 0);
+
+ /*
+ * We need either an FD or data to send.
+ * If there's nothing, return an error.
+ */
+ if (fd < 0 && !iov)
+ return -EINVAL;
+
+ if (fd >= 0) {
+ struct cmsghdr *cmsg;
+
+ mh.msg_control = &control;
+ mh.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
+
+ mh.msg_controllen = CMSG_SPACE(sizeof(int));
+ }
+ k = sendmsg(transport_fd, &mh, MSG_NOSIGNAL | flags);
+ if (k < 0)
+ return (ssize_t) -errno;
+
+ return k;
+}
+
+int send_one_fd_sa(
+ int transport_fd,
+ int fd,
+ const struct sockaddr *sa, socklen_t len,
+ int flags) {
+
+ assert(fd >= 0);
+
+ return (int) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, sa, len, flags);
+}
+
+ssize_t receive_one_fd_iov(
+ int transport_fd,
+ struct iovec *iov, size_t iovlen,
+ int flags,
+ int *ret_fd) {
+
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = iov,
+ .msg_iovlen = iovlen,
+ };
+ struct cmsghdr *cmsg, *found = NULL;
+ ssize_t k;
+
+ assert(transport_fd >= 0);
+ assert(ret_fd);
+
+ /*
+ * Receive a single FD via @transport_fd. We don't care for
+ * the transport-type. We retrieve a single FD at most, so for
+ * packet-based transports, the caller must ensure to send
+ * only a single FD per packet. This is best used in
+ * combination with send_one_fd().
+ */
+
+ k = recvmsg(transport_fd, &mh, MSG_CMSG_CLOEXEC | flags);
+ if (k < 0)
+ return (ssize_t) -errno;
+
+ CMSG_FOREACH(cmsg, &mh) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(int))) {
+ assert(!found);
+ found = cmsg;
+ break;
+ }
+ }
+
+ if (!found)
+ cmsg_close_all(&mh);
+
+ /* If didn't receive an FD or any data, return an error. */
+ if (k == 0 && !found)
+ return -EIO;
+
+ if (found)
+ *ret_fd = *(int*) CMSG_DATA(found);
+ else
+ *ret_fd = -1;
+
+ return k;
+}
+
+int receive_one_fd(int transport_fd, int flags) {
+ int fd;
+ ssize_t k;
+
+ k = receive_one_fd_iov(transport_fd, NULL, 0, flags, &fd);
+ if (k == 0)
+ return fd;
+
+ /* k must be negative, since receive_one_fd_iov() only returns
+ * a positive value if data was received through the iov. */
+ assert(k < 0);
+ return (int) k;
+}
+
+ssize_t next_datagram_size_fd(int fd) {
+ ssize_t l;
+ int k;
+
+ /* This is a bit like FIONREAD/SIOCINQ, however a bit more powerful. The difference being: recv(MSG_PEEK) will
+ * actually cause the next datagram in the queue to be validated regarding checksums, which FIONREAD doesn't
+ * do. This difference is actually of major importance as we need to be sure that the size returned here
+ * actually matches what we will read with recvmsg() next, as otherwise we might end up allocating a buffer of
+ * the wrong size. */
+
+ l = recv(fd, NULL, 0, MSG_PEEK|MSG_TRUNC);
+ if (l < 0) {
+ if (IN_SET(errno, EOPNOTSUPP, EFAULT))
+ goto fallback;
+
+ return -errno;
+ }
+ if (l == 0)
+ goto fallback;
+
+ return l;
+
+fallback:
+ k = 0;
+
+ /* Some sockets (AF_PACKET) do not support null-sized recv() with MSG_TRUNC set, let's fall back to FIONREAD
+ * for them. Checksums don't matter for raw sockets anyway, hence this should be fine. */
+
+ if (ioctl(fd, FIONREAD, &k) < 0)
+ return -errno;
+
+ return (ssize_t) k;
+}
+
+int flush_accept(int fd) {
+
+ struct pollfd pollfd = {
+ .fd = fd,
+ .events = POLLIN,
+ };
+ int r;
+
+ /* Similar to flush_fd() but flushes all incoming connection by accepting them and immediately closing them. */
+
+ for (;;) {
+ int cfd;
+
+ r = poll(&pollfd, 1, 0);
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return -errno;
+
+ } else if (r == 0)
+ return 0;
+
+ cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (cfd < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN)
+ return 0;
+
+ return -errno;
+ }
+
+ close(cfd);
+ }
+}
+
+struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length) {
+ struct cmsghdr *cmsg;
+
+ assert(mh);
+
+ CMSG_FOREACH(cmsg, mh)
+ if (cmsg->cmsg_level == level &&
+ cmsg->cmsg_type == type &&
+ (length == (socklen_t) -1 || length == cmsg->cmsg_len))
+ return cmsg;
+
+ return NULL;
+}
+
+int socket_ioctl_fd(void) {
+ int fd;
+
+ /* Create a socket to invoke the various network interface ioctl()s on. Traditionally only AF_INET was good for
+ * that. Since kernel 4.6 AF_NETLINK works for this too. We first try to use AF_INET hence, but if that's not
+ * available (for example, because it is made unavailable via SECCOMP or such), we'll fall back to the more
+ * generic AF_NETLINK. */
+
+ fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_GENERIC);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int sockaddr_un_unlink(const struct sockaddr_un *sa) {
+ const char *p, * nul;
+
+ assert(sa);
+
+ if (sa->sun_family != AF_UNIX)
+ return -EPROTOTYPE;
+
+ if (sa->sun_path[0] == 0) /* Nothing to do for abstract sockets */
+ return 0;
+
+ /* The path in .sun_path is not necessarily NUL terminated. Let's fix that. */
+ nul = memchr(sa->sun_path, 0, sizeof(sa->sun_path));
+ if (nul)
+ p = sa->sun_path;
+ else
+ p = memdupa_suffix0(sa->sun_path, sizeof(sa->sun_path));
+
+ if (unlink(p) < 0)
+ return -errno;
+
+ return 1;
+}
+
+int sockaddr_un_set_path(struct sockaddr_un *ret, const char *path) {
+ size_t l;
+
+ assert(ret);
+ assert(path);
+
+ /* Initialize ret->sun_path from the specified argument. This will interpret paths starting with '@' as
+ * abstract namespace sockets, and those starting with '/' as regular filesystem sockets. It won't accept
+ * anything else (i.e. no relative paths), to avoid ambiguities. Note that this function cannot be used to
+ * reference paths in the abstract namespace that include NUL bytes in the name. */
+
+ l = strlen(path);
+ if (l == 0)
+ return -EINVAL;
+ if (!IN_SET(path[0], '/', '@'))
+ return -EINVAL;
+ if (path[1] == 0)
+ return -EINVAL;
+
+ /* Don't allow paths larger than the space in sockaddr_un. Note that we are a tiny bit more restrictive than
+ * the kernel is: we insist on NUL termination (both for abstract namespace and regular file system socket
+ * addresses!), which the kernel doesn't. We do this to reduce chance of incompatibility with other apps that
+ * do not expect non-NUL terminated file system path*/
+ if (l+1 > sizeof(ret->sun_path))
+ return -EINVAL;
+
+ *ret = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ };
+
+ if (path[0] == '@') {
+ /* Abstract namespace socket */
+ memcpy(ret->sun_path + 1, path + 1, l); /* copy *with* trailing NUL byte */
+ return (int) (offsetof(struct sockaddr_un, sun_path) + l); /* 🔥 *don't* 🔥 include trailing NUL in size */
+
+ } else {
+ assert(path[0] == '/');
+
+ /* File system socket */
+ memcpy(ret->sun_path, path, l + 1); /* copy *with* trailing NUL byte */
+ return (int) (offsetof(struct sockaddr_un, sun_path) + l + 1); /* include trailing NUL in size */
+ }
+}
diff --git a/src/basic/socket-util.h b/src/basic/socket-util.h
new file mode 100644
index 0000000..574d2b7
--- /dev/null
+++ b/src/basic/socket-util.h
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <linux/netlink.h>
+#include <linux/if_infiniband.h>
+#include <linux/if_packet.h>
+#include <netinet/ether.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "macro.h"
+#include "missing_socket.h"
+#include "sparse-endian.h"
+
+union sockaddr_union {
+ /* The minimal, abstract version */
+ struct sockaddr sa;
+
+ /* The libc provided version that allocates "enough room" for every protocol */
+ struct sockaddr_storage storage;
+
+ /* Protoctol-specific implementations */
+ struct sockaddr_in in;
+ struct sockaddr_in6 in6;
+ struct sockaddr_un un;
+ struct sockaddr_nl nl;
+ struct sockaddr_ll ll;
+ struct sockaddr_vm vm;
+
+ /* Ensure there is enough space to store Infiniband addresses */
+ uint8_t ll_buffer[offsetof(struct sockaddr_ll, sll_addr) + CONST_MAX(ETH_ALEN, INFINIBAND_ALEN)];
+
+ /* Ensure there is enough space after the AF_UNIX sun_path for one more NUL byte, just to be sure that the path
+ * component is always followed by at least one NUL byte. */
+ uint8_t un_buffer[sizeof(struct sockaddr_un) + 1];
+};
+
+typedef struct SocketAddress {
+ union sockaddr_union sockaddr;
+
+ /* We store the size here explicitly due to the weird
+ * sockaddr_un semantics for abstract sockets */
+ socklen_t size;
+
+ /* Socket type, i.e. SOCK_STREAM, SOCK_DGRAM, ... */
+ int type;
+
+ /* Socket protocol, IPPROTO_xxx, usually 0, except for netlink */
+ int protocol;
+} SocketAddress;
+
+typedef enum SocketAddressBindIPv6Only {
+ SOCKET_ADDRESS_DEFAULT,
+ SOCKET_ADDRESS_BOTH,
+ SOCKET_ADDRESS_IPV6_ONLY,
+ _SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX,
+ _SOCKET_ADDRESS_BIND_IPV6_ONLY_INVALID = -1
+} SocketAddressBindIPv6Only;
+
+#define socket_address_family(a) ((a)->sockaddr.sa.sa_family)
+
+const char* socket_address_type_to_string(int t) _const_;
+int socket_address_type_from_string(const char *s) _pure_;
+
+int socket_address_parse(SocketAddress *a, const char *s);
+int socket_address_parse_and_warn(SocketAddress *a, const char *s);
+int socket_address_parse_netlink(SocketAddress *a, const char *s);
+int socket_address_print(const SocketAddress *a, char **p);
+int socket_address_verify(const SocketAddress *a, bool strict) _pure_;
+
+int sockaddr_un_unlink(const struct sockaddr_un *sa);
+
+static inline int socket_address_unlink(const SocketAddress *a) {
+ return socket_address_family(a) == AF_UNIX ? sockaddr_un_unlink(&a->sockaddr.un) : 0;
+}
+
+bool socket_address_can_accept(const SocketAddress *a) _pure_;
+
+int socket_address_listen(
+ const SocketAddress *a,
+ int flags,
+ int backlog,
+ SocketAddressBindIPv6Only only,
+ const char *bind_to_device,
+ bool reuse_port,
+ bool free_bind,
+ bool transparent,
+ mode_t directory_mode,
+ mode_t socket_mode,
+ const char *label);
+int make_socket_fd(int log_level, const char* address, int type, int flags);
+
+bool socket_address_is(const SocketAddress *a, const char *s, int type);
+bool socket_address_is_netlink(const SocketAddress *a, const char *s);
+
+bool socket_address_matches_fd(const SocketAddress *a, int fd);
+
+bool socket_address_equal(const SocketAddress *a, const SocketAddress *b) _pure_;
+
+const char* socket_address_get_path(const SocketAddress *a);
+
+bool socket_ipv6_is_supported(void);
+
+int sockaddr_port(const struct sockaddr *_sa, unsigned *port);
+
+int sockaddr_pretty(const struct sockaddr *_sa, socklen_t salen, bool translate_ipv6, bool include_port, char **ret);
+int getpeername_pretty(int fd, bool include_port, char **ret);
+int getsockname_pretty(int fd, char **ret);
+
+int socknameinfo_pretty(union sockaddr_union *sa, socklen_t salen, char **_ret);
+
+const char* socket_address_bind_ipv6_only_to_string(SocketAddressBindIPv6Only b) _const_;
+SocketAddressBindIPv6Only socket_address_bind_ipv6_only_from_string(const char *s) _pure_;
+SocketAddressBindIPv6Only socket_address_bind_ipv6_only_or_bool_from_string(const char *s);
+
+int netlink_family_to_string_alloc(int b, char **s);
+int netlink_family_from_string(const char *s) _pure_;
+
+bool sockaddr_equal(const union sockaddr_union *a, const union sockaddr_union *b);
+
+int fd_inc_sndbuf(int fd, size_t n);
+int fd_inc_rcvbuf(int fd, size_t n);
+
+int ip_tos_to_string_alloc(int i, char **s);
+int ip_tos_from_string(const char *s);
+
+bool ifname_valid(const char *p);
+bool address_label_valid(const char *p);
+
+int getpeercred(int fd, struct ucred *ucred);
+int getpeersec(int fd, char **ret);
+int getpeergroups(int fd, gid_t **ret);
+
+ssize_t send_one_fd_iov_sa(
+ int transport_fd,
+ int fd,
+ struct iovec *iov, size_t iovlen,
+ const struct sockaddr *sa, socklen_t len,
+ int flags);
+int send_one_fd_sa(int transport_fd,
+ int fd,
+ const struct sockaddr *sa, socklen_t len,
+ int flags);
+#define send_one_fd_iov(transport_fd, fd, iov, iovlen, flags) send_one_fd_iov_sa(transport_fd, fd, iov, iovlen, NULL, 0, flags)
+#define send_one_fd(transport_fd, fd, flags) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, NULL, 0, flags)
+ssize_t receive_one_fd_iov(int transport_fd, struct iovec *iov, size_t iovlen, int flags, int *ret_fd);
+int receive_one_fd(int transport_fd, int flags);
+
+ssize_t next_datagram_size_fd(int fd);
+
+int flush_accept(int fd);
+
+#define CMSG_FOREACH(cmsg, mh) \
+ for ((cmsg) = CMSG_FIRSTHDR(mh); (cmsg); (cmsg) = CMSG_NXTHDR((mh), (cmsg)))
+
+struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length);
+
+/*
+ * Certain hardware address types (e.g Infiniband) do not fit into sll_addr
+ * (8 bytes) and run over the structure. This macro returns the correct size that
+ * must be passed to kernel.
+ */
+#define SOCKADDR_LL_LEN(sa) \
+ ({ \
+ const struct sockaddr_ll *_sa = &(sa); \
+ size_t _mac_len = sizeof(_sa->sll_addr); \
+ assert(_sa->sll_family == AF_PACKET); \
+ if (be16toh(_sa->sll_hatype) == ARPHRD_ETHER) \
+ _mac_len = MAX(_mac_len, (size_t) ETH_ALEN); \
+ if (be16toh(_sa->sll_hatype) == ARPHRD_INFINIBAND) \
+ _mac_len = MAX(_mac_len, (size_t) INFINIBAND_ALEN); \
+ offsetof(struct sockaddr_ll, sll_addr) + _mac_len; \
+ })
+
+/* Covers only file system and abstract AF_UNIX socket addresses, but not unnamed socket addresses. */
+#define SOCKADDR_UN_LEN(sa) \
+ ({ \
+ const struct sockaddr_un *_sa = &(sa); \
+ assert(_sa->sun_family == AF_UNIX); \
+ offsetof(struct sockaddr_un, sun_path) + \
+ (_sa->sun_path[0] == 0 ? \
+ 1 + strnlen(_sa->sun_path+1, sizeof(_sa->sun_path)-1) : \
+ strnlen(_sa->sun_path, sizeof(_sa->sun_path))+1); \
+ })
+
+int socket_ioctl_fd(void);
+
+int sockaddr_un_set_path(struct sockaddr_un *ret, const char *path);
+
+static inline int setsockopt_int(int fd, int level, int optname, int value) {
+ if (setsockopt(fd, level, optname, &value, sizeof(value)) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/basic/sparse-endian.h b/src/basic/sparse-endian.h
new file mode 100644
index 0000000..9583dda
--- /dev/null
+++ b/src/basic/sparse-endian.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (c) 2012 Josh Triplett <josh@joshtriplett.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#pragma once
+
+#include <byteswap.h>
+#include <endian.h>
+#include <stdint.h>
+
+#ifdef __CHECKER__
+#define __sd_bitwise __attribute__((__bitwise__))
+#define __sd_force __attribute__((__force__))
+#else
+#define __sd_bitwise
+#define __sd_force
+#endif
+
+typedef uint16_t __sd_bitwise le16_t;
+typedef uint16_t __sd_bitwise be16_t;
+typedef uint32_t __sd_bitwise le32_t;
+typedef uint32_t __sd_bitwise be32_t;
+typedef uint64_t __sd_bitwise le64_t;
+typedef uint64_t __sd_bitwise be64_t;
+
+#undef htobe16
+#undef htole16
+#undef be16toh
+#undef le16toh
+#undef htobe32
+#undef htole32
+#undef be32toh
+#undef le32toh
+#undef htobe64
+#undef htole64
+#undef be64toh
+#undef le64toh
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define bswap_16_on_le(x) __bswap_16(x)
+#define bswap_32_on_le(x) __bswap_32(x)
+#define bswap_64_on_le(x) __bswap_64(x)
+#define bswap_16_on_be(x) (x)
+#define bswap_32_on_be(x) (x)
+#define bswap_64_on_be(x) (x)
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define bswap_16_on_le(x) (x)
+#define bswap_32_on_le(x) (x)
+#define bswap_64_on_le(x) (x)
+#define bswap_16_on_be(x) __bswap_16(x)
+#define bswap_32_on_be(x) __bswap_32(x)
+#define bswap_64_on_be(x) __bswap_64(x)
+#endif
+
+static inline le16_t htole16(uint16_t value) { return (le16_t __sd_force) bswap_16_on_be(value); }
+static inline le32_t htole32(uint32_t value) { return (le32_t __sd_force) bswap_32_on_be(value); }
+static inline le64_t htole64(uint64_t value) { return (le64_t __sd_force) bswap_64_on_be(value); }
+
+static inline be16_t htobe16(uint16_t value) { return (be16_t __sd_force) bswap_16_on_le(value); }
+static inline be32_t htobe32(uint32_t value) { return (be32_t __sd_force) bswap_32_on_le(value); }
+static inline be64_t htobe64(uint64_t value) { return (be64_t __sd_force) bswap_64_on_le(value); }
+
+static inline uint16_t le16toh(le16_t value) { return bswap_16_on_be((uint16_t __sd_force)value); }
+static inline uint32_t le32toh(le32_t value) { return bswap_32_on_be((uint32_t __sd_force)value); }
+static inline uint64_t le64toh(le64_t value) { return bswap_64_on_be((uint64_t __sd_force)value); }
+
+static inline uint16_t be16toh(be16_t value) { return bswap_16_on_le((uint16_t __sd_force)value); }
+static inline uint32_t be32toh(be32_t value) { return bswap_32_on_le((uint32_t __sd_force)value); }
+static inline uint64_t be64toh(be64_t value) { return bswap_64_on_le((uint64_t __sd_force)value); }
+
+#undef __sd_bitwise
+#undef __sd_force
diff --git a/src/basic/special.h b/src/basic/special.h
new file mode 100644
index 0000000..379a3d7
--- /dev/null
+++ b/src/basic/special.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#define SPECIAL_DEFAULT_TARGET "default.target"
+
+/* Shutdown targets */
+#define SPECIAL_UMOUNT_TARGET "umount.target"
+/* This is not really intended to be started by directly. This is
+ * mostly so that other targets (reboot/halt/poweroff) can depend on
+ * it to bring all services down that want to be brought down on
+ * system shutdown. */
+#define SPECIAL_SHUTDOWN_TARGET "shutdown.target"
+#define SPECIAL_HALT_TARGET "halt.target"
+#define SPECIAL_POWEROFF_TARGET "poweroff.target"
+#define SPECIAL_REBOOT_TARGET "reboot.target"
+#define SPECIAL_KEXEC_TARGET "kexec.target"
+#define SPECIAL_EXIT_TARGET "exit.target"
+#define SPECIAL_SUSPEND_TARGET "suspend.target"
+#define SPECIAL_HIBERNATE_TARGET "hibernate.target"
+#define SPECIAL_HYBRID_SLEEP_TARGET "hybrid-sleep.target"
+#define SPECIAL_SUSPEND_THEN_HIBERNATE_TARGET "suspend-then-hibernate.target"
+
+/* Special boot targets */
+#define SPECIAL_RESCUE_TARGET "rescue.target"
+#define SPECIAL_EMERGENCY_TARGET "emergency.target"
+#define SPECIAL_MULTI_USER_TARGET "multi-user.target"
+#define SPECIAL_GRAPHICAL_TARGET "graphical.target"
+
+/* Early boot targets */
+#define SPECIAL_SYSINIT_TARGET "sysinit.target"
+#define SPECIAL_SOCKETS_TARGET "sockets.target"
+#define SPECIAL_TIMERS_TARGET "timers.target"
+#define SPECIAL_PATHS_TARGET "paths.target"
+#define SPECIAL_LOCAL_FS_TARGET "local-fs.target"
+#define SPECIAL_LOCAL_FS_PRE_TARGET "local-fs-pre.target"
+#define SPECIAL_INITRD_FS_TARGET "initrd-fs.target"
+#define SPECIAL_INITRD_ROOT_DEVICE_TARGET "initrd-root-device.target"
+#define SPECIAL_INITRD_ROOT_FS_TARGET "initrd-root-fs.target"
+#define SPECIAL_REMOTE_FS_TARGET "remote-fs.target" /* LSB's $remote_fs */
+#define SPECIAL_REMOTE_FS_PRE_TARGET "remote-fs-pre.target"
+#define SPECIAL_SWAP_TARGET "swap.target"
+#define SPECIAL_NETWORK_ONLINE_TARGET "network-online.target"
+#define SPECIAL_TIME_SYNC_TARGET "time-sync.target" /* LSB's $time */
+#define SPECIAL_BASIC_TARGET "basic.target"
+
+/* LSB compatibility */
+#define SPECIAL_NETWORK_TARGET "network.target" /* LSB's $network */
+#define SPECIAL_NSS_LOOKUP_TARGET "nss-lookup.target" /* LSB's $named */
+#define SPECIAL_RPCBIND_TARGET "rpcbind.target" /* LSB's $portmap */
+
+/*
+ * Rules regarding adding further high level targets like the above:
+ *
+ * - Be conservative, only add more of these when we really need
+ * them. We need strong usecases for further additions.
+ *
+ * - When there can be multiple implementations running side-by-side,
+ * it needs to be a .target unit which can pull in all
+ * implementations.
+ *
+ * - If something can be implemented with socket activation, and
+ * without, it needs to be a .target unit, so that it can pull in
+ * the appropriate unit.
+ *
+ * - Otherwise, it should be a .service unit.
+ *
+ * - In some cases it is OK to have both a .service and a .target
+ * unit, i.e. if there can be multiple parallel implementations, but
+ * only one is the "system" one. Example: syslog.
+ *
+ * Or to put this in other words: .service symlinks can be used to
+ * arbitrate between multiple implementations if there can be only one
+ * of a kind. .target units can be used to support multiple
+ * implementations that can run side-by-side.
+ */
+
+/* Magic early boot services */
+#define SPECIAL_FSCK_SERVICE "systemd-fsck@.service"
+#define SPECIAL_QUOTACHECK_SERVICE "systemd-quotacheck.service"
+#define SPECIAL_QUOTAON_SERVICE "quotaon.service"
+#define SPECIAL_REMOUNT_FS_SERVICE "systemd-remount-fs.service"
+
+/* Services systemd relies on */
+#define SPECIAL_DBUS_SERVICE "dbus.service"
+#define SPECIAL_DBUS_SOCKET "dbus.socket"
+#define SPECIAL_JOURNALD_SOCKET "systemd-journald.socket"
+#define SPECIAL_JOURNALD_SERVICE "systemd-journald.service"
+#define SPECIAL_TMPFILES_SETUP_SERVICE "systemd-tmpfiles-setup.service"
+
+/* Magic init signals */
+#define SPECIAL_KBREQUEST_TARGET "kbrequest.target"
+#define SPECIAL_SIGPWR_TARGET "sigpwr.target"
+#define SPECIAL_CTRL_ALT_DEL_TARGET "ctrl-alt-del.target"
+
+/* Where we add all our system units, users and machines by default */
+#define SPECIAL_SYSTEM_SLICE "system.slice"
+#define SPECIAL_USER_SLICE "user.slice"
+#define SPECIAL_MACHINE_SLICE "machine.slice"
+#define SPECIAL_ROOT_SLICE "-.slice"
+
+/* The scope unit systemd itself lives in. */
+#define SPECIAL_INIT_SCOPE "init.scope"
+
+/* The root directory. */
+#define SPECIAL_ROOT_MOUNT "-.mount"
diff --git a/src/basic/stat-util.c b/src/basic/stat-util.c
new file mode 100644
index 0000000..ea2bbc3
--- /dev/null
+++ b/src/basic/stat-util.c
@@ -0,0 +1,427 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/magic.h>
+#include <sched.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+
+int is_symlink(const char *path) {
+ struct stat info;
+
+ assert(path);
+
+ if (lstat(path, &info) < 0)
+ return -errno;
+
+ return !!S_ISLNK(info.st_mode);
+}
+
+int is_dir(const char* path, bool follow) {
+ struct stat st;
+ int r;
+
+ assert(path);
+
+ if (follow)
+ r = stat(path, &st);
+ else
+ r = lstat(path, &st);
+ if (r < 0)
+ return -errno;
+
+ return !!S_ISDIR(st.st_mode);
+}
+
+int is_dir_fd(int fd) {
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return !!S_ISDIR(st.st_mode);
+}
+
+int is_device_node(const char *path) {
+ struct stat info;
+
+ assert(path);
+
+ if (lstat(path, &info) < 0)
+ return -errno;
+
+ return !!(S_ISBLK(info.st_mode) || S_ISCHR(info.st_mode));
+}
+
+int dir_is_empty_at(int dir_fd, const char *path) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ if (path)
+ fd = openat(dir_fd, path, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ else
+ fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (fd < 0)
+ return -errno;
+
+ d = fdopendir(fd);
+ if (!d)
+ return -errno;
+ fd = -1;
+
+ FOREACH_DIRENT(de, d, return -errno)
+ return 0;
+
+ return 1;
+}
+
+bool null_or_empty(struct stat *st) {
+ assert(st);
+
+ if (S_ISREG(st->st_mode) && st->st_size <= 0)
+ return true;
+
+ /* We don't want to hardcode the major/minor of /dev/null,
+ * hence we do a simpler "is this a device node?" check. */
+
+ if (S_ISCHR(st->st_mode) || S_ISBLK(st->st_mode))
+ return true;
+
+ return false;
+}
+
+int null_or_empty_path(const char *fn) {
+ struct stat st;
+
+ assert(fn);
+
+ if (stat(fn, &st) < 0)
+ return -errno;
+
+ return null_or_empty(&st);
+}
+
+int null_or_empty_fd(int fd) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return null_or_empty(&st);
+}
+
+int path_is_read_only_fs(const char *path) {
+ struct statvfs st;
+
+ assert(path);
+
+ if (statvfs(path, &st) < 0)
+ return -errno;
+
+ if (st.f_flag & ST_RDONLY)
+ return true;
+
+ /* On NFS, statvfs() might not reflect whether we can actually
+ * write to the remote share. Let's try again with
+ * access(W_OK) which is more reliable, at least sometimes. */
+ if (access(path, W_OK) < 0 && errno == EROFS)
+ return true;
+
+ return false;
+}
+
+int files_same(const char *filea, const char *fileb, int flags) {
+ struct stat a, b;
+
+ assert(filea);
+ assert(fileb);
+
+ if (fstatat(AT_FDCWD, filea, &a, flags) < 0)
+ return -errno;
+
+ if (fstatat(AT_FDCWD, fileb, &b, flags) < 0)
+ return -errno;
+
+ return a.st_dev == b.st_dev &&
+ a.st_ino == b.st_ino;
+}
+
+bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) {
+ assert(s);
+ assert_cc(sizeof(statfs_f_type_t) >= sizeof(s->f_type));
+
+ return F_TYPE_EQUAL(s->f_type, magic_value);
+}
+
+int fd_is_fs_type(int fd, statfs_f_type_t magic_value) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_fs_type(&s, magic_value);
+}
+
+int path_is_fs_type(const char *path, statfs_f_type_t magic_value) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ return fd_is_fs_type(fd, magic_value);
+}
+
+bool is_temporary_fs(const struct statfs *s) {
+ return is_fs_type(s, TMPFS_MAGIC) ||
+ is_fs_type(s, RAMFS_MAGIC);
+}
+
+bool is_network_fs(const struct statfs *s) {
+ return is_fs_type(s, CIFS_MAGIC_NUMBER) ||
+ is_fs_type(s, CODA_SUPER_MAGIC) ||
+ is_fs_type(s, NCP_SUPER_MAGIC) ||
+ is_fs_type(s, NFS_SUPER_MAGIC) ||
+ is_fs_type(s, SMB_SUPER_MAGIC) ||
+ is_fs_type(s, V9FS_MAGIC) ||
+ is_fs_type(s, AFS_SUPER_MAGIC) ||
+ is_fs_type(s, OCFS2_SUPER_MAGIC);
+}
+
+int fd_is_temporary_fs(int fd) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_temporary_fs(&s);
+}
+
+int fd_is_network_fs(int fd) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_network_fs(&s);
+}
+
+int fd_is_network_ns(int fd) {
+ struct statfs s;
+ int r;
+
+ /* Checks whether the specified file descriptor refers to a network namespace. On old kernels there's no nice
+ * way to detect that, hence on those we'll return a recognizable error (EUCLEAN), so that callers can handle
+ * this somewhat nicely.
+ *
+ * This function returns > 0 if the fd definitely refers to a network namespace, 0 if it definitely does not
+ * refer to a network namespace, -EUCLEAN if we can't determine, and other negative error codes on error. */
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ if (!is_fs_type(&s, NSFS_MAGIC)) {
+ /* On really old kernels, there was no "nsfs", and network namespace sockets belonged to procfs
+ * instead. Handle that in a somewhat smart way. */
+
+ if (is_fs_type(&s, PROC_SUPER_MAGIC)) {
+ struct statfs t;
+
+ /* OK, so it is procfs. Let's see if our own network namespace is procfs, too. If so, then the
+ * passed fd might refer to a network namespace, but we can't know for sure. In that case,
+ * return a recognizable error. */
+
+ if (statfs("/proc/self/ns/net", &t) < 0)
+ return -errno;
+
+ if (s.f_type == t.f_type)
+ return -EUCLEAN; /* It's possible, we simply don't know */
+ }
+
+ return 0; /* No! */
+ }
+
+ r = ioctl(fd, NS_GET_NSTYPE);
+ if (r < 0) {
+ if (errno == ENOTTY) /* Old kernels didn't know this ioctl, let's also return a recognizable error in that case */
+ return -EUCLEAN;
+
+ return -errno;
+ }
+
+ return r == CLONE_NEWNET;
+}
+
+int path_is_temporary_fs(const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ return fd_is_temporary_fs(fd);
+}
+
+int stat_verify_regular(const struct stat *st) {
+ assert(st);
+
+ /* Checks whether the specified stat() structure refers to a regular file. If not returns an appropriate error
+ * code. */
+
+ if (S_ISDIR(st->st_mode))
+ return -EISDIR;
+
+ if (S_ISLNK(st->st_mode))
+ return -ELOOP;
+
+ if (!S_ISREG(st->st_mode))
+ return -EBADFD;
+
+ return 0;
+}
+
+int fd_verify_regular(int fd) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return stat_verify_regular(&st);
+}
+
+int stat_verify_directory(const struct stat *st) {
+ assert(st);
+
+ if (S_ISLNK(st->st_mode))
+ return -ELOOP;
+
+ if (!S_ISDIR(st->st_mode))
+ return -ENOTDIR;
+
+ return 0;
+}
+
+int fd_verify_directory(int fd) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return stat_verify_directory(&st);
+}
+
+int device_path_make_major_minor(mode_t mode, dev_t devno, char **ret) {
+ const char *t;
+
+ /* Generates the /dev/{char|block}/MAJOR:MINOR path for a dev_t */
+
+ if (S_ISCHR(mode))
+ t = "char";
+ else if (S_ISBLK(mode))
+ t = "block";
+ else
+ return -ENODEV;
+
+ if (asprintf(ret, "/dev/%s/%u:%u", t, major(devno), minor(devno)) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int device_path_make_canonical(mode_t mode, dev_t devno, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ /* Finds the canonical path for a device, i.e. resolves the /dev/{char|block}/MAJOR:MINOR path to the end. */
+
+ assert(ret);
+
+ if (major(devno) == 0 && minor(devno) == 0) {
+ char *s;
+
+ /* A special hack to make sure our 'inaccessible' device nodes work. They won't have symlinks in
+ * /dev/block/ and /dev/char/, hence we handle them specially here. */
+
+ if (S_ISCHR(mode))
+ s = strdup("/run/systemd/inaccessible/chr");
+ else if (S_ISBLK(mode))
+ s = strdup("/run/systemd/inaccessible/blk");
+ else
+ return -ENODEV;
+
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+ }
+
+ r = device_path_make_major_minor(mode, devno, &p);
+ if (r < 0)
+ return r;
+
+ return chase_symlinks(p, NULL, 0, ret);
+}
+
+int device_path_parse_major_minor(const char *path, mode_t *ret_mode, dev_t *ret_devno) {
+ mode_t mode;
+ dev_t devno;
+ int r;
+
+ /* Tries to extract the major/minor directly from the device path if we can. Handles /dev/block/ and /dev/char/
+ * paths, as well out synthetic inaccessible device nodes. Never goes to disk. Returns -ENODEV if the device
+ * path cannot be parsed like this. */
+
+ if (path_equal(path, "/run/systemd/inaccessible/chr")) {
+ mode = S_IFCHR;
+ devno = makedev(0, 0);
+ } else if (path_equal(path, "/run/systemd/inaccessible/blk")) {
+ mode = S_IFBLK;
+ devno = makedev(0, 0);
+ } else {
+ const char *w;
+
+ w = path_startswith(path, "/dev/block/");
+ if (w)
+ mode = S_IFBLK;
+ else {
+ w = path_startswith(path, "/dev/char/");
+ if (!w)
+ return -ENODEV;
+
+ mode = S_IFCHR;
+ }
+
+ r = parse_dev(w, &devno);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_mode)
+ *ret_mode = mode;
+ if (ret_devno)
+ *ret_devno = devno;
+
+ return 0;
+}
diff --git a/src/basic/stat-util.h b/src/basic/stat-util.h
new file mode 100644
index 0000000..74fb725
--- /dev/null
+++ b/src/basic/stat-util.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+
+#include "macro.h"
+
+int is_symlink(const char *path);
+int is_dir(const char *path, bool follow);
+int is_dir_fd(int fd);
+int is_device_node(const char *path);
+
+int dir_is_empty_at(int dir_fd, const char *path);
+static inline int dir_is_empty(const char *path) {
+ return dir_is_empty_at(AT_FDCWD, path);
+}
+
+static inline int dir_is_populated(const char *path) {
+ int r;
+ r = dir_is_empty(path);
+ if (r < 0)
+ return r;
+ return !r;
+}
+
+bool null_or_empty(struct stat *st) _pure_;
+int null_or_empty_path(const char *fn);
+int null_or_empty_fd(int fd);
+
+int path_is_read_only_fs(const char *path);
+
+int files_same(const char *filea, const char *fileb, int flags);
+
+/* The .f_type field of struct statfs is really weird defined on
+ * different archs. Let's give its type a name. */
+typedef typeof(((struct statfs*)NULL)->f_type) statfs_f_type_t;
+
+bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) _pure_;
+int fd_is_fs_type(int fd, statfs_f_type_t magic_value);
+int path_is_fs_type(const char *path, statfs_f_type_t magic_value);
+
+bool is_temporary_fs(const struct statfs *s) _pure_;
+bool is_network_fs(const struct statfs *s) _pure_;
+
+int fd_is_temporary_fs(int fd);
+int fd_is_network_fs(int fd);
+
+int fd_is_network_ns(int fd);
+
+int path_is_temporary_fs(const char *path);
+
+/* Because statfs.t_type can be int on some architectures, we have to cast
+ * the const magic to the type, otherwise the compiler warns about
+ * signed/unsigned comparison, because the magic can be 32 bit unsigned.
+ */
+#define F_TYPE_EQUAL(a, b) (a == (typeof(a)) b)
+
+int stat_verify_regular(const struct stat *st);
+int fd_verify_regular(int fd);
+
+int stat_verify_directory(const struct stat *st);
+int fd_verify_directory(int fd);
+
+/* glibc and the Linux kernel have different ideas about the major/minor size. These calls will check whether the
+ * specified major is valid by the Linux kernel's standards, not by glibc's. Linux has 20bits of minor, and 12 bits of
+ * major space. See MINORBITS in linux/kdev_t.h in the kernel sources. (If you wonder why we define _y here, instead of
+ * comparing directly >= 0: it's to trick out -Wtype-limits, which would otherwise complain if the type is unsigned, as
+ * such a test would be pointless in such a case.) */
+
+#define DEVICE_MAJOR_VALID(x) \
+ ({ \
+ typeof(x) _x = (x), _y = 0; \
+ _x >= _y && _x < (UINT32_C(1) << 12); \
+ \
+ })
+
+#define DEVICE_MINOR_VALID(x) \
+ ({ \
+ typeof(x) _x = (x), _y = 0; \
+ _x >= _y && _x < (UINT32_C(1) << 20); \
+ })
+
+int device_path_make_major_minor(mode_t mode, dev_t devno, char **ret);
+int device_path_make_canonical(mode_t mode, dev_t devno, char **ret);
+int device_path_parse_major_minor(const char *path, mode_t *ret_mode, dev_t *ret_devno);
diff --git a/src/basic/static-destruct.h b/src/basic/static-destruct.h
new file mode 100644
index 0000000..443c0e8
--- /dev/null
+++ b/src/basic/static-destruct.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include "alloc-util.h"
+#include "macro.h"
+
+/* A framework for registering static variables that shall be freed on shutdown of a process. It's a bit like gcc's
+ * destructor attribute, but allows us to precisely schedule when we want to free the variables. This is supposed to
+ * feel a bit like the gcc cleanup attribute, but for static variables. Note that this does not work for static
+ * variables declared in .so's, as the list is private to the same linking unit. But maybe that's a good thing. */
+
+typedef struct StaticDestructor {
+ void *data;
+ free_func_t destroy;
+} StaticDestructor;
+
+#define STATIC_DESTRUCTOR_REGISTER(variable, func) \
+ _STATIC_DESTRUCTOR_REGISTER(UNIQ, variable, func)
+
+#define _STATIC_DESTRUCTOR_REGISTER(uq, variable, func) \
+ /* Type-safe destructor */ \
+ static void UNIQ_T(static_destructor_wrapper, uq)(void *p) { \
+ typeof(variable) *q = p; \
+ func(q); \
+ } \
+ /* The actual destructor structure we place in a special section to find it */ \
+ _section_("SYSTEMD_STATIC_DESTRUCT") \
+ /* We pick pointer alignment, since that is apparently what gcc does for static variables */ \
+ _alignptr_ \
+ /* Make sure this is not dropped from the image because not explicitly referenced */ \
+ _used_ \
+ /* Make sure that AddressSanitizer doesn't pad this variable: we want everything in this section packed next to each other so that we can enumerate it. */ \
+ _variable_no_sanitize_address_ \
+ static const StaticDestructor UNIQ_T(static_destructor_entry, uq) = { \
+ .data = &(variable), \
+ .destroy = UNIQ_T(static_destructor_wrapper, uq), \
+ }
+
+/* Beginning and end of our section listing the destructors. We define these as weak as we want this to work even if
+ * there's not a single destructor is defined in which case the section will be missing. */
+extern const struct StaticDestructor _weak_ __start_SYSTEMD_STATIC_DESTRUCT[];
+extern const struct StaticDestructor _weak_ __stop_SYSTEMD_STATIC_DESTRUCT[];
+
+/* The function to destroy everything. (Note that this must be static inline, as it's key that it remains in the same
+ * linking unit as the variables we want to destroy. */
+static inline void static_destruct(void) {
+ const StaticDestructor *d;
+
+ if (!__start_SYSTEMD_STATIC_DESTRUCT)
+ return;
+
+ d = ALIGN_TO_PTR(__start_SYSTEMD_STATIC_DESTRUCT, sizeof(void*));
+ while (d < __stop_SYSTEMD_STATIC_DESTRUCT) {
+ d->destroy(d->data);
+ d = ALIGN_TO_PTR(d + 1, sizeof(void*));
+ }
+}
diff --git a/src/basic/stdio-util.h b/src/basic/stdio-util.h
new file mode 100644
index 0000000..dc67b6e
--- /dev/null
+++ b/src/basic/stdio-util.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <printf.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "util.h"
+
+#define snprintf_ok(buf, len, fmt, ...) \
+ ((size_t) snprintf(buf, len, fmt, __VA_ARGS__) < (len))
+
+#define xsprintf(buf, fmt, ...) \
+ assert_message_se(snprintf_ok(buf, ELEMENTSOF(buf), fmt, __VA_ARGS__), "xsprintf: " #buf "[] must be big enough")
+
+#define VA_FORMAT_ADVANCE(format, ap) \
+do { \
+ int _argtypes[128]; \
+ size_t _i, _k; \
+ /* See https://github.com/google/sanitizers/issues/992 */ \
+ if (HAS_FEATURE_MEMORY_SANITIZER) \
+ zero(_argtypes); \
+ _k = parse_printf_format((format), ELEMENTSOF(_argtypes), _argtypes); \
+ assert(_k < ELEMENTSOF(_argtypes)); \
+ for (_i = 0; _i < _k; _i++) { \
+ if (_argtypes[_i] & PA_FLAG_PTR) { \
+ (void) va_arg(ap, void*); \
+ continue; \
+ } \
+ \
+ switch (_argtypes[_i]) { \
+ case PA_INT: \
+ case PA_INT|PA_FLAG_SHORT: \
+ case PA_CHAR: \
+ (void) va_arg(ap, int); \
+ break; \
+ case PA_INT|PA_FLAG_LONG: \
+ (void) va_arg(ap, long int); \
+ break; \
+ case PA_INT|PA_FLAG_LONG_LONG: \
+ (void) va_arg(ap, long long int); \
+ break; \
+ case PA_WCHAR: \
+ (void) va_arg(ap, wchar_t); \
+ break; \
+ case PA_WSTRING: \
+ case PA_STRING: \
+ case PA_POINTER: \
+ (void) va_arg(ap, void*); \
+ break; \
+ case PA_FLOAT: \
+ case PA_DOUBLE: \
+ (void) va_arg(ap, double); \
+ break; \
+ case PA_DOUBLE|PA_FLAG_LONG_DOUBLE: \
+ (void) va_arg(ap, long double); \
+ break; \
+ default: \
+ assert_not_reached("Unknown format string argument."); \
+ } \
+ } \
+} while (false)
diff --git a/src/basic/strbuf.c b/src/basic/strbuf.c
new file mode 100644
index 0000000..81f4f21
--- /dev/null
+++ b/src/basic/strbuf.c
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "strbuf.h"
+#include "util.h"
+
+/*
+ * Strbuf stores given strings in a single continuous allocated memory
+ * area. Identical strings are de-duplicated and return the same offset
+ * as the first string stored. If the tail of a string already exists
+ * in the buffer, the tail is returned.
+ *
+ * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
+ * information about the stored strings.
+ *
+ * Example of udev rules:
+ * $ ./udevadm test .
+ * ...
+ * read rules file: /usr/lib/udev/rules.d/99-systemd.rules
+ * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
+ * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
+ * ...
+ */
+
+struct strbuf *strbuf_new(void) {
+ struct strbuf *str;
+
+ str = new(struct strbuf, 1);
+ if (!str)
+ return NULL;
+ *str = (struct strbuf) {
+ .buf = new0(char, 1),
+ .root = new0(struct strbuf_node, 1),
+ .len = 1,
+ .nodes_count = 1,
+ };
+ if (!str->buf || !str->root) {
+ free(str->buf);
+ free(str->root);
+ return mfree(str);
+ }
+
+ return str;
+}
+
+static struct strbuf_node* strbuf_node_cleanup(struct strbuf_node *node) {
+ size_t i;
+
+ for (i = 0; i < node->children_count; i++)
+ strbuf_node_cleanup(node->children[i].child);
+ free(node->children);
+ return mfree(node);
+}
+
+/* clean up trie data, leave only the string buffer */
+void strbuf_complete(struct strbuf *str) {
+ if (!str)
+ return;
+ if (str->root)
+ str->root = strbuf_node_cleanup(str->root);
+}
+
+/* clean up everything */
+void strbuf_cleanup(struct strbuf *str) {
+ if (!str)
+ return;
+
+ strbuf_complete(str);
+ free(str->buf);
+ free(str);
+}
+
+static int strbuf_children_cmp(const struct strbuf_child_entry *n1,
+ const struct strbuf_child_entry *n2) {
+ return n1->c - n2->c;
+}
+
+static void bubbleinsert(struct strbuf_node *node,
+ uint8_t c,
+ struct strbuf_node *node_child) {
+
+ struct strbuf_child_entry new = {
+ .c = c,
+ .child = node_child,
+ };
+ int left = 0, right = node->children_count;
+
+ while (right > left) {
+ int middle = (right + left) / 2 ;
+ if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
+ left = middle + 1;
+ else
+ right = middle;
+ }
+
+ memmove(node->children + left + 1, node->children + left,
+ sizeof(struct strbuf_child_entry) * (node->children_count - left));
+ node->children[left] = new;
+
+ node->children_count++;
+}
+
+/* add string, return the index/offset into the buffer */
+ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
+ uint8_t c;
+ struct strbuf_node *node;
+ size_t depth;
+ char *buf_new;
+ struct strbuf_child_entry *child;
+ struct strbuf_node *node_child;
+ ssize_t off;
+
+ if (!str->root)
+ return -EINVAL;
+
+ /* search string; start from last character to find possibly matching tails */
+
+ str->in_count++;
+ if (len == 0) {
+ str->dedup_count++;
+ return 0;
+ }
+ str->in_len += len;
+
+ node = str->root;
+ for (depth = 0; depth <= len; depth++) {
+ struct strbuf_child_entry search;
+
+ /* match against current node */
+ off = node->value_off + node->value_len - len;
+ if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
+ str->dedup_len += len;
+ str->dedup_count++;
+ return off;
+ }
+
+ c = s[len - 1 - depth];
+
+ /* lookup child node */
+ search.c = c;
+ child = typesafe_bsearch(&search, node->children, node->children_count, strbuf_children_cmp);
+ if (!child)
+ break;
+ node = child->child;
+ }
+
+ /* add new string */
+ buf_new = realloc(str->buf, str->len + len+1);
+ if (!buf_new)
+ return -ENOMEM;
+ str->buf = buf_new;
+ off = str->len;
+ memcpy(str->buf + off, s, len);
+ str->len += len;
+ str->buf[str->len++] = '\0';
+
+ /* new node */
+ node_child = new(struct strbuf_node, 1);
+ if (!node_child)
+ return -ENOMEM;
+ *node_child = (struct strbuf_node) {
+ .value_off = off,
+ .value_len = len,
+ };
+
+ /* extend array, add new entry, sort for bisection */
+ child = reallocarray(node->children, node->children_count + 1, sizeof(struct strbuf_child_entry));
+ if (!child) {
+ free(node_child);
+ return -ENOMEM;
+ }
+
+ str->nodes_count++;
+
+ node->children = child;
+ bubbleinsert(node, c, node_child);
+
+ return off;
+}
diff --git a/src/basic/strbuf.h b/src/basic/strbuf.h
new file mode 100644
index 0000000..a36944a
--- /dev/null
+++ b/src/basic/strbuf.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+struct strbuf {
+ char *buf;
+ size_t len;
+ struct strbuf_node *root;
+
+ size_t nodes_count;
+ size_t in_count;
+ size_t in_len;
+ size_t dedup_len;
+ size_t dedup_count;
+};
+
+struct strbuf_node {
+ size_t value_off;
+ size_t value_len;
+
+ struct strbuf_child_entry *children;
+ uint8_t children_count;
+};
+
+struct strbuf_child_entry {
+ uint8_t c;
+ struct strbuf_node *child;
+};
+
+struct strbuf *strbuf_new(void);
+ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len);
+void strbuf_complete(struct strbuf *str);
+void strbuf_cleanup(struct strbuf *str);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct strbuf*, strbuf_cleanup);
diff --git a/src/basic/string-table.c b/src/basic/string-table.c
new file mode 100644
index 0000000..34931b0
--- /dev/null
+++ b/src/basic/string-table.c
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "string-table.h"
+#include "string-util.h"
+
+ssize_t string_table_lookup(const char * const *table, size_t len, const char *key) {
+ size_t i;
+
+ if (!key)
+ return -1;
+
+ for (i = 0; i < len; ++i)
+ if (streq_ptr(table[i], key))
+ return (ssize_t) i;
+
+ return -1;
+}
diff --git a/src/basic/string-table.h b/src/basic/string-table.h
new file mode 100644
index 0000000..228c12a
--- /dev/null
+++ b/src/basic/string-table.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+ssize_t string_table_lookup(const char * const *table, size_t len, const char *key);
+
+/* For basic lookup tables with strictly enumerated entries */
+#define _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \
+ scope const char *name##_to_string(type i) { \
+ if (i < 0 || i >= (type) ELEMENTSOF(name##_table)) \
+ return NULL; \
+ return name##_table[i]; \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope) \
+ scope type name##_from_string(const char *s) { \
+ return (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(name,type,yes,scope) \
+ scope type name##_from_string(const char *s) { \
+ int b; \
+ if (!s) \
+ return -1; \
+ b = parse_boolean(s); \
+ if (b == 0) \
+ return (type) 0; \
+ else if (b > 0) \
+ return yes; \
+ return (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,scope) \
+ scope int name##_to_string_alloc(type i, char **str) { \
+ char *s; \
+ if (i < 0 || i > max) \
+ return -ERANGE; \
+ if (i < (type) ELEMENTSOF(name##_table)) { \
+ s = strdup(name##_table[i]); \
+ if (!s) \
+ return -ENOMEM; \
+ } else { \
+ if (asprintf(&s, "%i", i) < 0) \
+ return -ENOMEM; \
+ } \
+ *str = s; \
+ return 0; \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,scope) \
+ scope type name##_from_string(const char *s) { \
+ type i; \
+ unsigned u = 0; \
+ if (!s) \
+ return (type) -1; \
+ for (i = 0; i < (type) ELEMENTSOF(name##_table); i++) \
+ if (streq_ptr(name##_table[i], s)) \
+ return i; \
+ if (safe_atou(s, &u) >= 0 && u <= max) \
+ return (type) u; \
+ return (type) -1; \
+ } \
+
+#define _DEFINE_STRING_TABLE_LOOKUP(name,type,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope)
+
+#define _DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(name,type,yes,scope)
+
+#define DEFINE_STRING_TABLE_LOOKUP(name,type) _DEFINE_STRING_TABLE_LOOKUP(name,type,)
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP(name,type) _DEFINE_STRING_TABLE_LOOKUP(name,type,static)
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(name,type) _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,static)
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(name,type) _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,static)
+
+#define DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes) _DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes,)
+
+/* For string conversions where numbers are also acceptable */
+#define DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(name,type,max) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,)
+
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,static)
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,static)
+
+#define DUMP_STRING_TABLE(name,type,max) \
+ do { \
+ type _k; \
+ flockfile(stdout); \
+ for (_k = 0; _k < (max); _k++) { \
+ const char *_t; \
+ _t = name##_to_string(_k); \
+ if (!_t) \
+ continue; \
+ fputs_unlocked(_t, stdout); \
+ fputc_unlocked('\n', stdout); \
+ } \
+ funlockfile(stdout); \
+ } while(false)
diff --git a/src/basic/string-util.c b/src/basic/string-util.c
new file mode 100644
index 0000000..93917bc
--- /dev/null
+++ b/src/basic/string-util.c
@@ -0,0 +1,1099 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "gunicode.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "fileio.h"
+
+int strcmp_ptr(const char *a, const char *b) {
+
+ /* Like strcmp(), but tries to make sense of NULL pointers */
+ if (a && b)
+ return strcmp(a, b);
+
+ if (!a && b)
+ return -1;
+
+ if (a && !b)
+ return 1;
+
+ return 0;
+}
+
+char* endswith(const char *s, const char *postfix) {
+ size_t sl, pl;
+
+ assert(s);
+ assert(postfix);
+
+ sl = strlen(s);
+ pl = strlen(postfix);
+
+ if (pl == 0)
+ return (char*) s + sl;
+
+ if (sl < pl)
+ return NULL;
+
+ if (memcmp(s + sl - pl, postfix, pl) != 0)
+ return NULL;
+
+ return (char*) s + sl - pl;
+}
+
+char* endswith_no_case(const char *s, const char *postfix) {
+ size_t sl, pl;
+
+ assert(s);
+ assert(postfix);
+
+ sl = strlen(s);
+ pl = strlen(postfix);
+
+ if (pl == 0)
+ return (char*) s + sl;
+
+ if (sl < pl)
+ return NULL;
+
+ if (strcasecmp(s + sl - pl, postfix) != 0)
+ return NULL;
+
+ return (char*) s + sl - pl;
+}
+
+char* first_word(const char *s, const char *word) {
+ size_t sl, wl;
+ const char *p;
+
+ assert(s);
+ assert(word);
+
+ /* Checks if the string starts with the specified word, either
+ * followed by NUL or by whitespace. Returns a pointer to the
+ * NUL or the first character after the whitespace. */
+
+ sl = strlen(s);
+ wl = strlen(word);
+
+ if (sl < wl)
+ return NULL;
+
+ if (wl == 0)
+ return (char*) s;
+
+ if (memcmp(s, word, wl) != 0)
+ return NULL;
+
+ p = s + wl;
+ if (*p == 0)
+ return (char*) p;
+
+ if (!strchr(WHITESPACE, *p))
+ return NULL;
+
+ p += strspn(p, WHITESPACE);
+ return (char*) p;
+}
+
+static size_t strcspn_escaped(const char *s, const char *reject) {
+ bool escaped = false;
+ int n;
+
+ for (n=0; s[n]; n++) {
+ if (escaped)
+ escaped = false;
+ else if (s[n] == '\\')
+ escaped = true;
+ else if (strchr(reject, s[n]))
+ break;
+ }
+
+ /* if s ends in \, return index of previous char */
+ return n - escaped;
+}
+
+/* Split a string into words. */
+const char* split(const char **state, size_t *l, const char *separator, SplitFlags flags) {
+ const char *current;
+
+ current = *state;
+
+ if (!*current) {
+ assert(**state == '\0');
+ return NULL;
+ }
+
+ current += strspn(current, separator);
+ if (!*current) {
+ *state = current;
+ return NULL;
+ }
+
+ if (flags & SPLIT_QUOTES && strchr("\'\"", *current)) {
+ char quotechars[2] = {*current, '\0'};
+
+ *l = strcspn_escaped(current + 1, quotechars);
+ if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
+ (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
+ /* right quote missing or garbage at the end */
+ if (flags & SPLIT_RELAX) {
+ *state = current + *l + 1 + (current[*l + 1] != '\0');
+ return current + 1;
+ }
+ *state = current;
+ return NULL;
+ }
+ *state = current++ + *l + 2;
+ } else if (flags & SPLIT_QUOTES) {
+ *l = strcspn_escaped(current, separator);
+ if (current[*l] && !strchr(separator, current[*l]) && !(flags & SPLIT_RELAX)) {
+ /* unfinished escape */
+ *state = current;
+ return NULL;
+ }
+ *state = current + *l;
+ } else {
+ *l = strcspn(current, separator);
+ *state = current + *l;
+ }
+
+ return current;
+}
+
+char *strnappend(const char *s, const char *suffix, size_t b) {
+ size_t a;
+ char *r;
+
+ if (!s && !suffix)
+ return strdup("");
+
+ if (!s)
+ return strndup(suffix, b);
+
+ if (!suffix)
+ return strdup(s);
+
+ assert(s);
+ assert(suffix);
+
+ a = strlen(s);
+ if (b > ((size_t) -1) - a)
+ return NULL;
+
+ r = new(char, a+b+1);
+ if (!r)
+ return NULL;
+
+ memcpy(r, s, a);
+ memcpy(r+a, suffix, b);
+ r[a+b] = 0;
+
+ return r;
+}
+
+char *strappend(const char *s, const char *suffix) {
+ return strnappend(s, suffix, strlen_ptr(suffix));
+}
+
+char *strjoin_real(const char *x, ...) {
+ va_list ap;
+ size_t l;
+ char *r, *p;
+
+ va_start(ap, x);
+
+ if (x) {
+ l = strlen(x);
+
+ for (;;) {
+ const char *t;
+ size_t n;
+
+ t = va_arg(ap, const char *);
+ if (!t)
+ break;
+
+ n = strlen(t);
+ if (n > ((size_t) -1) - l) {
+ va_end(ap);
+ return NULL;
+ }
+
+ l += n;
+ }
+ } else
+ l = 0;
+
+ va_end(ap);
+
+ r = new(char, l+1);
+ if (!r)
+ return NULL;
+
+ if (x) {
+ p = stpcpy(r, x);
+
+ va_start(ap, x);
+
+ for (;;) {
+ const char *t;
+
+ t = va_arg(ap, const char *);
+ if (!t)
+ break;
+
+ p = stpcpy(p, t);
+ }
+
+ va_end(ap);
+ } else
+ r[0] = 0;
+
+ return r;
+}
+
+char *strstrip(char *s) {
+ if (!s)
+ return NULL;
+
+ /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */
+
+ return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE);
+}
+
+char *delete_chars(char *s, const char *bad) {
+ char *f, *t;
+
+ /* Drops all specified bad characters, regardless where in the string */
+
+ if (!s)
+ return NULL;
+
+ if (!bad)
+ bad = WHITESPACE;
+
+ for (f = s, t = s; *f; f++) {
+ if (strchr(bad, *f))
+ continue;
+
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ return s;
+}
+
+char *delete_trailing_chars(char *s, const char *bad) {
+ char *p, *c = s;
+
+ /* Drops all specified bad characters, at the end of the string */
+
+ if (!s)
+ return NULL;
+
+ if (!bad)
+ bad = WHITESPACE;
+
+ for (p = s; *p; p++)
+ if (!strchr(bad, *p))
+ c = p + 1;
+
+ *c = 0;
+
+ return s;
+}
+
+char *truncate_nl(char *s) {
+ assert(s);
+
+ s[strcspn(s, NEWLINE)] = 0;
+ return s;
+}
+
+char ascii_tolower(char x) {
+
+ if (x >= 'A' && x <= 'Z')
+ return x - 'A' + 'a';
+
+ return x;
+}
+
+char ascii_toupper(char x) {
+
+ if (x >= 'a' && x <= 'z')
+ return x - 'a' + 'A';
+
+ return x;
+}
+
+char *ascii_strlower(char *t) {
+ char *p;
+
+ assert(t);
+
+ for (p = t; *p; p++)
+ *p = ascii_tolower(*p);
+
+ return t;
+}
+
+char *ascii_strupper(char *t) {
+ char *p;
+
+ assert(t);
+
+ for (p = t; *p; p++)
+ *p = ascii_toupper(*p);
+
+ return t;
+}
+
+char *ascii_strlower_n(char *t, size_t n) {
+ size_t i;
+
+ if (n <= 0)
+ return t;
+
+ for (i = 0; i < n; i++)
+ t[i] = ascii_tolower(t[i]);
+
+ return t;
+}
+
+int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
+
+ for (; n > 0; a++, b++, n--) {
+ int x, y;
+
+ x = (int) (uint8_t) ascii_tolower(*a);
+ y = (int) (uint8_t) ascii_tolower(*b);
+
+ if (x != y)
+ return x - y;
+ }
+
+ return 0;
+}
+
+int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
+ int r;
+
+ r = ascii_strcasecmp_n(a, b, MIN(n, m));
+ if (r != 0)
+ return r;
+
+ return CMP(n, m);
+}
+
+bool chars_intersect(const char *a, const char *b) {
+ const char *p;
+
+ /* Returns true if any of the chars in a are in b. */
+ for (p = a; *p; p++)
+ if (strchr(b, *p))
+ return true;
+
+ return false;
+}
+
+bool string_has_cc(const char *p, const char *ok) {
+ const char *t;
+
+ assert(p);
+
+ /*
+ * Check if a string contains control characters. If 'ok' is
+ * non-NULL it may be a string containing additional CCs to be
+ * considered OK.
+ */
+
+ for (t = p; *t; t++) {
+ if (ok && strchr(ok, *t))
+ continue;
+
+ if (*t > 0 && *t < ' ')
+ return true;
+
+ if (*t == 127)
+ return true;
+ }
+
+ return false;
+}
+
+static int write_ellipsis(char *buf, bool unicode) {
+ if (unicode || is_locale_utf8()) {
+ buf[0] = 0xe2; /* tri-dot ellipsis: … */
+ buf[1] = 0x80;
+ buf[2] = 0xa6;
+ } else {
+ buf[0] = '.';
+ buf[1] = '.';
+ buf[2] = '.';
+ }
+
+ return 3;
+}
+
+static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
+ size_t x, need_space, suffix_len;
+ char *t;
+
+ assert(s);
+ assert(percent <= 100);
+ assert(new_length != (size_t) -1);
+
+ if (old_length <= new_length)
+ return strndup(s, old_length);
+
+ /* Special case short ellipsations */
+ switch (new_length) {
+
+ case 0:
+ return strdup("");
+
+ case 1:
+ if (is_locale_utf8())
+ return strdup("…");
+ else
+ return strdup(".");
+
+ case 2:
+ if (!is_locale_utf8())
+ return strdup("..");
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
+ * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
+ * either for the UTF-8 encoded character or for three ASCII characters. */
+ need_space = is_locale_utf8() ? 1 : 3;
+
+ t = new(char, new_length+3);
+ if (!t)
+ return NULL;
+
+ assert(new_length >= need_space);
+
+ x = ((new_length - need_space) * percent + 50) / 100;
+ assert(x <= new_length - need_space);
+
+ memcpy(t, s, x);
+ write_ellipsis(t + x, false);
+ suffix_len = new_length - x - need_space;
+ memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
+ *(t + x + 3 + suffix_len) = '\0';
+
+ return t;
+}
+
+char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
+ size_t x, k, len, len2;
+ const char *i, *j;
+ char *e;
+ int r;
+
+ /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
+ * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
+ * strings.
+ *
+ * Ellipsation is done in a locale-dependent way:
+ * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
+ * 2. Otherwise, a unicode ellipsis is used ("…")
+ *
+ * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
+ * the current locale is UTF-8.
+ */
+
+ assert(s);
+ assert(percent <= 100);
+
+ if (new_length == (size_t) -1)
+ return strndup(s, old_length);
+
+ if (new_length == 0)
+ return strdup("");
+
+ /* If no multibyte characters use ascii_ellipsize_mem for speed */
+ if (ascii_is_valid_n(s, old_length))
+ return ascii_ellipsize_mem(s, old_length, new_length, percent);
+
+ x = ((new_length - 1) * percent) / 100;
+ assert(x <= new_length - 1);
+
+ k = 0;
+ for (i = s; i < s + old_length; i = utf8_next_char(i)) {
+ char32_t c;
+ int w;
+
+ r = utf8_encoded_to_unichar(i, &c);
+ if (r < 0)
+ return NULL;
+
+ w = unichar_iswide(c) ? 2 : 1;
+ if (k + w <= x)
+ k += w;
+ else
+ break;
+ }
+
+ for (j = s + old_length; j > i; ) {
+ char32_t c;
+ int w;
+ const char *jj;
+
+ jj = utf8_prev_char(j);
+ r = utf8_encoded_to_unichar(jj, &c);
+ if (r < 0)
+ return NULL;
+
+ w = unichar_iswide(c) ? 2 : 1;
+ if (k + w <= new_length) {
+ k += w;
+ j = jj;
+ } else
+ break;
+ }
+ assert(i <= j);
+
+ /* we don't actually need to ellipsize */
+ if (i == j)
+ return memdup_suffix0(s, old_length);
+
+ /* make space for ellipsis, if possible */
+ if (j < s + old_length)
+ j = utf8_next_char(j);
+ else if (i > s)
+ i = utf8_prev_char(i);
+
+ len = i - s;
+ len2 = s + old_length - j;
+ e = new(char, len + 3 + len2 + 1);
+ if (!e)
+ return NULL;
+
+ /*
+ printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
+ old_length, new_length, x, len, len2, k);
+ */
+
+ memcpy(e, s, len);
+ write_ellipsis(e + len, true);
+ memcpy(e + len + 3, j, len2);
+ *(e + len + 3 + len2) = '\0';
+
+ return e;
+}
+
+char *cellescape(char *buf, size_t len, const char *s) {
+ /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII
+ * characters are copied as they are, everything else is escaped. The result
+ * is different then if escaping and ellipsization was performed in two
+ * separate steps, because each sequence is either stored in full or skipped.
+ *
+ * This function should be used for logging about strings which expected to
+ * be plain ASCII in a safe way.
+ *
+ * An ellipsis will be used if s is too long. It was always placed at the
+ * very end.
+ */
+
+ size_t i = 0, last_char_width[4] = {}, k = 0, j;
+
+ assert(len > 0); /* at least a terminating NUL */
+
+ for (;;) {
+ char four[4];
+ int w;
+
+ if (*s == 0) /* terminating NUL detected? then we are done! */
+ goto done;
+
+ w = cescape_char(*s, four);
+ if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's
+ * ellipsize at the previous location */
+ break;
+
+ /* OK, there was space, let's add this escaped character to the buffer */
+ memcpy(buf + i, four, w);
+ i += w;
+
+ /* And remember its width in the ring buffer */
+ last_char_width[k] = w;
+ k = (k + 1) % 4;
+
+ s++;
+ }
+
+ /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4
+ * characters ideally, but the buffer is shorter than that in the first place take what we can get */
+ for (j = 0; j < ELEMENTSOF(last_char_width); j++) {
+
+ if (i + 4 <= len) /* nice, we reached our space goal */
+ break;
+
+ k = k == 0 ? 3 : k - 1;
+ if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */
+ break;
+
+ assert(i >= last_char_width[k]);
+ i -= last_char_width[k];
+ }
+
+ if (i + 4 <= len) /* yay, enough space */
+ i += write_ellipsis(buf + i, false);
+ else if (i + 3 <= len) { /* only space for ".." */
+ buf[i++] = '.';
+ buf[i++] = '.';
+ } else if (i + 2 <= len) /* only space for a single "." */
+ buf[i++] = '.';
+ else
+ assert(i + 1 <= len);
+
+ done:
+ buf[i] = '\0';
+ return buf;
+}
+
+bool nulstr_contains(const char *nulstr, const char *needle) {
+ const char *i;
+
+ if (!nulstr)
+ return false;
+
+ NULSTR_FOREACH(i, nulstr)
+ if (streq(i, needle))
+ return true;
+
+ return false;
+}
+
+char* strshorten(char *s, size_t l) {
+ assert(s);
+
+ if (strnlen(s, l+1) > l)
+ s[l] = 0;
+
+ return s;
+}
+
+char *strreplace(const char *text, const char *old_string, const char *new_string) {
+ size_t l, old_len, new_len, allocated = 0;
+ char *t, *ret = NULL;
+ const char *f;
+
+ assert(old_string);
+ assert(new_string);
+
+ if (!text)
+ return NULL;
+
+ old_len = strlen(old_string);
+ new_len = strlen(new_string);
+
+ l = strlen(text);
+ if (!GREEDY_REALLOC(ret, allocated, l+1))
+ return NULL;
+
+ f = text;
+ t = ret;
+ while (*f) {
+ size_t d, nl;
+
+ if (!startswith(f, old_string)) {
+ *(t++) = *(f++);
+ continue;
+ }
+
+ d = t - ret;
+ nl = l - old_len + new_len;
+
+ if (!GREEDY_REALLOC(ret, allocated, nl + 1))
+ return mfree(ret);
+
+ l = nl;
+ t = ret + d;
+
+ t = stpcpy(t, new_string);
+ f += old_len;
+ }
+
+ *t = 0;
+ return ret;
+}
+
+static void advance_offsets(ssize_t diff, size_t offsets[static 2], size_t shift[static 2], size_t size) {
+ if (!offsets)
+ return;
+
+ if ((size_t) diff < offsets[0])
+ shift[0] += size;
+ if ((size_t) diff < offsets[1])
+ shift[1] += size;
+}
+
+char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
+ const char *i, *begin = NULL;
+ enum {
+ STATE_OTHER,
+ STATE_ESCAPE,
+ STATE_CSI,
+ STATE_CSO,
+ } state = STATE_OTHER;
+ char *obuf = NULL;
+ size_t osz = 0, isz, shift[2] = {};
+ FILE *f;
+
+ assert(ibuf);
+ assert(*ibuf);
+
+ /* This does three things:
+ *
+ * 1. Replaces TABs by 8 spaces
+ * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences
+ * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences
+ *
+ * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as are any
+ * other special characters. Truncated ANSI sequences are left-as is too. This call is supposed to suppress the
+ * most basic formatting noise, but nothing else.
+ *
+ * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
+
+ isz = _isz ? *_isz : strlen(*ibuf);
+
+ f = open_memstream(&obuf, &osz);
+ if (!f)
+ return NULL;
+
+ /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we created f here
+ * and it doesn't leave our scope. */
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ for (i = *ibuf; i < *ibuf + isz + 1; i++) {
+
+ switch (state) {
+
+ case STATE_OTHER:
+ if (i >= *ibuf + isz) /* EOT */
+ break;
+ else if (*i == '\x1B')
+ state = STATE_ESCAPE;
+ else if (*i == '\t') {
+ fputs(" ", f);
+ advance_offsets(i - *ibuf, highlight, shift, 7);
+ } else
+ fputc(*i, f);
+
+ break;
+
+ case STATE_ESCAPE:
+ if (i >= *ibuf + isz) { /* EOT */
+ fputc('\x1B', f);
+ advance_offsets(i - *ibuf, highlight, shift, 1);
+ break;
+ } else if (*i == '[') { /* ANSI CSI */
+ state = STATE_CSI;
+ begin = i + 1;
+ } else if (*i == ']') { /* ANSI CSO */
+ state = STATE_CSO;
+ begin = i + 1;
+ } else {
+ fputc('\x1B', f);
+ fputc(*i, f);
+ advance_offsets(i - *ibuf, highlight, shift, 1);
+ state = STATE_OTHER;
+ }
+
+ break;
+
+ case STATE_CSI:
+
+ if (i >= *ibuf + isz || /* EOT … */
+ !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
+ fputc('\x1B', f);
+ fputc('[', f);
+ advance_offsets(i - *ibuf, highlight, shift, 2);
+ state = STATE_OTHER;
+ i = begin-1;
+ } else if (*i == 'm')
+ state = STATE_OTHER;
+
+ break;
+
+ case STATE_CSO:
+
+ if (i >= *ibuf + isz || /* EOT … */
+ (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
+ fputc('\x1B', f);
+ fputc(']', f);
+ advance_offsets(i - *ibuf, highlight, shift, 2);
+ state = STATE_OTHER;
+ i = begin-1;
+ } else if (*i == '\a')
+ state = STATE_OTHER;
+
+ break;
+ }
+ }
+
+ if (fflush_and_check(f) < 0) {
+ fclose(f);
+ return mfree(obuf);
+ }
+
+ fclose(f);
+
+ free(*ibuf);
+ *ibuf = obuf;
+
+ if (_isz)
+ *_isz = osz;
+
+ if (highlight) {
+ highlight[0] += shift[0];
+ highlight[1] += shift[1];
+ }
+
+ return obuf;
+}
+
+char *strextend_with_separator(char **x, const char *separator, ...) {
+ bool need_separator;
+ size_t f, l, l_separator;
+ char *r, *p;
+ va_list ap;
+
+ assert(x);
+
+ l = f = strlen_ptr(*x);
+
+ need_separator = !isempty(*x);
+ l_separator = strlen_ptr(separator);
+
+ va_start(ap, separator);
+ for (;;) {
+ const char *t;
+ size_t n;
+
+ t = va_arg(ap, const char *);
+ if (!t)
+ break;
+
+ n = strlen(t);
+
+ if (need_separator)
+ n += l_separator;
+
+ if (n > ((size_t) -1) - l) {
+ va_end(ap);
+ return NULL;
+ }
+
+ l += n;
+ need_separator = true;
+ }
+ va_end(ap);
+
+ need_separator = !isempty(*x);
+
+ r = realloc(*x, l+1);
+ if (!r)
+ return NULL;
+
+ p = r + f;
+
+ va_start(ap, separator);
+ for (;;) {
+ const char *t;
+
+ t = va_arg(ap, const char *);
+ if (!t)
+ break;
+
+ if (need_separator && separator)
+ p = stpcpy(p, separator);
+
+ p = stpcpy(p, t);
+
+ need_separator = true;
+ }
+ va_end(ap);
+
+ assert(p == r + l);
+
+ *p = 0;
+ *x = r;
+
+ return r + l;
+}
+
+char *strrep(const char *s, unsigned n) {
+ size_t l;
+ char *r, *p;
+ unsigned i;
+
+ assert(s);
+
+ l = strlen(s);
+ p = r = malloc(l * n + 1);
+ if (!r)
+ return NULL;
+
+ for (i = 0; i < n; i++)
+ p = stpcpy(p, s);
+
+ *p = 0;
+ return r;
+}
+
+int split_pair(const char *s, const char *sep, char **l, char **r) {
+ char *x, *a, *b;
+
+ assert(s);
+ assert(sep);
+ assert(l);
+ assert(r);
+
+ if (isempty(sep))
+ return -EINVAL;
+
+ x = strstr(s, sep);
+ if (!x)
+ return -EINVAL;
+
+ a = strndup(s, x - s);
+ if (!a)
+ return -ENOMEM;
+
+ b = strdup(x + strlen(sep));
+ if (!b) {
+ free(a);
+ return -ENOMEM;
+ }
+
+ *l = a;
+ *r = b;
+
+ return 0;
+}
+
+int free_and_strdup(char **p, const char *s) {
+ char *t;
+
+ assert(p);
+
+ /* Replaces a string pointer with a strdup()ed new string,
+ * possibly freeing the old one. */
+
+ if (streq_ptr(*p, s))
+ return 0;
+
+ if (s) {
+ t = strdup(s);
+ if (!t)
+ return -ENOMEM;
+ } else
+ t = NULL;
+
+ free(*p);
+ *p = t;
+
+ return 1;
+}
+
+int free_and_strndup(char **p, const char *s, size_t l) {
+ char *t;
+
+ assert(p);
+ assert(s || l == 0);
+
+ /* Replaces a string pointer with a strndup()ed new string,
+ * freeing the old one. */
+
+ if (!*p && !s)
+ return 0;
+
+ if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0'))
+ return 0;
+
+ if (s) {
+ t = strndup(s, l);
+ if (!t)
+ return -ENOMEM;
+ } else
+ t = NULL;
+
+ free_and_replace(*p, t);
+ return 1;
+}
+
+#if !HAVE_EXPLICIT_BZERO
+/*
+ * Pointer to memset is volatile so that compiler must de-reference
+ * the pointer and can't assume that it points to any function in
+ * particular (such as memset, which it then might further "optimize")
+ * This approach is inspired by openssl's crypto/mem_clr.c.
+ */
+typedef void *(*memset_t)(void *,int,size_t);
+
+static volatile memset_t memset_func = memset;
+
+void* explicit_bzero_safe(void *p, size_t l) {
+ if (l > 0)
+ memset_func(p, '\0', l);
+
+ return p;
+}
+#endif
+
+char* string_erase(char *x) {
+ if (!x)
+ return NULL;
+
+ /* A delicious drop of snake-oil! To be called on memory where
+ * we stored passphrases or so, after we used them. */
+ explicit_bzero_safe(x, strlen(x));
+ return x;
+}
+
+char *string_free_erase(char *s) {
+ return mfree(string_erase(s));
+}
+
+bool string_is_safe(const char *p) {
+ const char *t;
+
+ if (!p)
+ return false;
+
+ for (t = p; *t; t++) {
+ if (*t > 0 && *t < ' ') /* no control characters */
+ return false;
+
+ if (strchr(QUOTES "\\\x7f", *t))
+ return false;
+ }
+
+ return true;
+}
diff --git a/src/basic/string-util.h b/src/basic/string-util.h
new file mode 100644
index 0000000..38070ab
--- /dev/null
+++ b/src/basic/string-util.h
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <alloca.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+
+/* What is interpreted as whitespace? */
+#define WHITESPACE " \t\n\r"
+#define NEWLINE "\n\r"
+#define QUOTES "\"\'"
+#define COMMENTS "#;"
+#define GLOB_CHARS "*?["
+#define DIGITS "0123456789"
+#define LOWERCASE_LETTERS "abcdefghijklmnopqrstuvwxyz"
+#define UPPERCASE_LETTERS "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+#define LETTERS LOWERCASE_LETTERS UPPERCASE_LETTERS
+#define ALPHANUMERICAL LETTERS DIGITS
+#define HEXDIGITS DIGITS "abcdefABCDEF"
+
+#define streq(a,b) (strcmp((a),(b)) == 0)
+#define strneq(a, b, n) (strncmp((a), (b), (n)) == 0)
+#define strcaseeq(a,b) (strcasecmp((a),(b)) == 0)
+#define strncaseeq(a, b, n) (strncasecmp((a), (b), (n)) == 0)
+
+int strcmp_ptr(const char *a, const char *b) _pure_;
+
+static inline bool streq_ptr(const char *a, const char *b) {
+ return strcmp_ptr(a, b) == 0;
+}
+
+static inline const char* strempty(const char *s) {
+ return s ?: "";
+}
+
+static inline const char* strnull(const char *s) {
+ return s ?: "(null)";
+}
+
+static inline const char *strna(const char *s) {
+ return s ?: "n/a";
+}
+
+static inline bool isempty(const char *p) {
+ return !p || !p[0];
+}
+
+static inline const char *empty_to_null(const char *p) {
+ return isempty(p) ? NULL : p;
+}
+
+static inline const char *empty_to_dash(const char *str) {
+ return isempty(str) ? "-" : str;
+}
+
+static inline char *startswith(const char *s, const char *prefix) {
+ size_t l;
+
+ l = strlen(prefix);
+ if (strncmp(s, prefix, l) == 0)
+ return (char*) s + l;
+
+ return NULL;
+}
+
+static inline char *startswith_no_case(const char *s, const char *prefix) {
+ size_t l;
+
+ l = strlen(prefix);
+ if (strncasecmp(s, prefix, l) == 0)
+ return (char*) s + l;
+
+ return NULL;
+}
+
+char *endswith(const char *s, const char *postfix) _pure_;
+char *endswith_no_case(const char *s, const char *postfix) _pure_;
+
+char *first_word(const char *s, const char *word) _pure_;
+
+typedef enum SplitFlags {
+ SPLIT_QUOTES = 0x01 << 0,
+ SPLIT_RELAX = 0x01 << 1,
+} SplitFlags;
+
+const char* split(const char **state, size_t *l, const char *separator, SplitFlags flags);
+
+#define FOREACH_WORD(word, length, s, state) \
+ _FOREACH_WORD(word, length, s, WHITESPACE, 0, state)
+
+#define FOREACH_WORD_SEPARATOR(word, length, s, separator, state) \
+ _FOREACH_WORD(word, length, s, separator, 0, state)
+
+#define _FOREACH_WORD(word, length, s, separator, flags, state) \
+ for ((state) = (s), (word) = split(&(state), &(length), (separator), (flags)); (word); (word) = split(&(state), &(length), (separator), (flags)))
+
+char *strappend(const char *s, const char *suffix);
+char *strnappend(const char *s, const char *suffix, size_t length);
+
+char *strjoin_real(const char *x, ...) _sentinel_;
+#define strjoin(a, ...) strjoin_real((a), __VA_ARGS__, NULL)
+
+#define strjoina(a, ...) \
+ ({ \
+ const char *_appendees_[] = { a, __VA_ARGS__ }; \
+ char *_d_, *_p_; \
+ size_t _len_ = 0; \
+ size_t _i_; \
+ for (_i_ = 0; _i_ < ELEMENTSOF(_appendees_) && _appendees_[_i_]; _i_++) \
+ _len_ += strlen(_appendees_[_i_]); \
+ _p_ = _d_ = newa(char, _len_ + 1); \
+ for (_i_ = 0; _i_ < ELEMENTSOF(_appendees_) && _appendees_[_i_]; _i_++) \
+ _p_ = stpcpy(_p_, _appendees_[_i_]); \
+ *_p_ = 0; \
+ _d_; \
+ })
+
+char *strstrip(char *s);
+char *delete_chars(char *s, const char *bad);
+char *delete_trailing_chars(char *s, const char *bad);
+char *truncate_nl(char *s);
+
+static inline char *skip_leading_chars(const char *s, const char *bad) {
+
+ if (!s)
+ return NULL;
+
+ if (!bad)
+ bad = WHITESPACE;
+
+ return (char*) s + strspn(s, bad);
+}
+
+char ascii_tolower(char x);
+char *ascii_strlower(char *s);
+char *ascii_strlower_n(char *s, size_t n);
+
+char ascii_toupper(char x);
+char *ascii_strupper(char *s);
+
+int ascii_strcasecmp_n(const char *a, const char *b, size_t n);
+int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m);
+
+bool chars_intersect(const char *a, const char *b) _pure_;
+
+static inline bool _pure_ in_charset(const char *s, const char* charset) {
+ assert(s);
+ assert(charset);
+ return s[strspn(s, charset)] == '\0';
+}
+
+bool string_has_cc(const char *p, const char *ok) _pure_;
+
+char *ellipsize_mem(const char *s, size_t old_length_bytes, size_t new_length_columns, unsigned percent);
+static inline char *ellipsize(const char *s, size_t length, unsigned percent) {
+ return ellipsize_mem(s, strlen(s), length, percent);
+}
+
+char *cellescape(char *buf, size_t len, const char *s);
+
+/* This limit is arbitrary, enough to give some idea what the string contains */
+#define CELLESCAPE_DEFAULT_LENGTH 64
+
+bool nulstr_contains(const char *nulstr, const char *needle);
+
+char* strshorten(char *s, size_t l);
+
+char *strreplace(const char *text, const char *old_string, const char *new_string);
+
+char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]);
+
+char *strextend_with_separator(char **x, const char *separator, ...) _sentinel_;
+
+#define strextend(x, ...) strextend_with_separator(x, NULL, __VA_ARGS__)
+
+char *strrep(const char *s, unsigned n);
+
+int split_pair(const char *s, const char *sep, char **l, char **r);
+
+int free_and_strdup(char **p, const char *s);
+int free_and_strndup(char **p, const char *s, size_t l);
+
+/* Normal memmem() requires haystack to be nonnull, which is annoying for zero-length buffers */
+static inline void *memmem_safe(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen) {
+
+ if (needlelen <= 0)
+ return (void*) haystack;
+
+ if (haystacklen < needlelen)
+ return NULL;
+
+ assert(haystack);
+ assert(needle);
+
+ return memmem(haystack, haystacklen, needle, needlelen);
+}
+
+#if HAVE_EXPLICIT_BZERO
+static inline void* explicit_bzero_safe(void *p, size_t l) {
+ if (l > 0)
+ explicit_bzero(p, l);
+
+ return p;
+}
+#else
+void *explicit_bzero_safe(void *p, size_t l);
+#endif
+
+char *string_erase(char *x);
+
+char *string_free_erase(char *s);
+DEFINE_TRIVIAL_CLEANUP_FUNC(char *, string_free_erase);
+#define _cleanup_string_free_erase_ _cleanup_(string_free_erasep)
+
+bool string_is_safe(const char *p) _pure_;
+
+static inline size_t strlen_ptr(const char *s) {
+ if (!s)
+ return 0;
+
+ return strlen(s);
+}
+
+/* Like startswith(), but operates on arbitrary memory blocks */
+static inline void *memory_startswith(const void *p, size_t sz, const char *token) {
+ size_t n;
+
+ assert(token);
+
+ n = strlen(token);
+ if (sz < n)
+ return NULL;
+
+ assert(p);
+
+ if (memcmp(p, token, n) != 0)
+ return NULL;
+
+ return (uint8_t*) p + n;
+}
+
+/* Like startswith_no_case(), but operates on arbitrary memory blocks.
+ * It works only for ASCII strings.
+ */
+static inline void *memory_startswith_no_case(const void *p, size_t sz, const char *token) {
+ size_t n, i;
+
+ assert(token);
+
+ n = strlen(token);
+ if (sz < n)
+ return NULL;
+
+ assert(p);
+
+ for (i = 0; i < n; i++) {
+ if (ascii_tolower(((char *)p)[i]) != ascii_tolower(token[i]))
+ return NULL;
+ }
+
+ return (uint8_t*) p + n;
+}
diff --git a/src/basic/strv.c b/src/basic/strv.c
new file mode 100644
index 0000000..3a62f25
--- /dev/null
+++ b/src/basic/strv.c
@@ -0,0 +1,889 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fnmatch.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "fileio.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+char *strv_find(char **l, const char *name) {
+ char **i;
+
+ assert(name);
+
+ STRV_FOREACH(i, l)
+ if (streq(*i, name))
+ return *i;
+
+ return NULL;
+}
+
+char *strv_find_prefix(char **l, const char *name) {
+ char **i;
+
+ assert(name);
+
+ STRV_FOREACH(i, l)
+ if (startswith(*i, name))
+ return *i;
+
+ return NULL;
+}
+
+char *strv_find_startswith(char **l, const char *name) {
+ char **i, *e;
+
+ assert(name);
+
+ /* Like strv_find_prefix, but actually returns only the
+ * suffix, not the whole item */
+
+ STRV_FOREACH(i, l) {
+ e = startswith(*i, name);
+ if (e)
+ return e;
+ }
+
+ return NULL;
+}
+
+void strv_clear(char **l) {
+ char **k;
+
+ if (!l)
+ return;
+
+ for (k = l; *k; k++)
+ free(*k);
+
+ *l = NULL;
+}
+
+char **strv_free(char **l) {
+ strv_clear(l);
+ return mfree(l);
+}
+
+char **strv_free_erase(char **l) {
+ char **i;
+
+ STRV_FOREACH(i, l)
+ string_erase(*i);
+
+ return strv_free(l);
+}
+
+char **strv_copy(char * const *l) {
+ char **r, **k;
+
+ k = r = new(char*, strv_length(l) + 1);
+ if (!r)
+ return NULL;
+
+ if (l)
+ for (; *l; k++, l++) {
+ *k = strdup(*l);
+ if (!*k) {
+ strv_free(r);
+ return NULL;
+ }
+ }
+
+ *k = NULL;
+ return r;
+}
+
+size_t strv_length(char * const *l) {
+ size_t n = 0;
+
+ if (!l)
+ return 0;
+
+ for (; *l; l++)
+ n++;
+
+ return n;
+}
+
+char **strv_new_ap(const char *x, va_list ap) {
+ const char *s;
+ _cleanup_strv_free_ char **a = NULL;
+ size_t n = 0, i = 0;
+ va_list aq;
+
+ /* As a special trick we ignore all listed strings that equal
+ * STRV_IGNORE. This is supposed to be used with the
+ * STRV_IFNOTNULL() macro to include possibly NULL strings in
+ * the string list. */
+
+ if (x) {
+ n = x == STRV_IGNORE ? 0 : 1;
+
+ va_copy(aq, ap);
+ while ((s = va_arg(aq, const char*))) {
+ if (s == STRV_IGNORE)
+ continue;
+
+ n++;
+ }
+
+ va_end(aq);
+ }
+
+ a = new(char*, n+1);
+ if (!a)
+ return NULL;
+
+ if (x) {
+ if (x != STRV_IGNORE) {
+ a[i] = strdup(x);
+ if (!a[i])
+ return NULL;
+ i++;
+ }
+
+ while ((s = va_arg(ap, const char*))) {
+
+ if (s == STRV_IGNORE)
+ continue;
+
+ a[i] = strdup(s);
+ if (!a[i])
+ return NULL;
+
+ i++;
+ }
+ }
+
+ a[i] = NULL;
+
+ return TAKE_PTR(a);
+}
+
+char **strv_new_internal(const char *x, ...) {
+ char **r;
+ va_list ap;
+
+ va_start(ap, x);
+ r = strv_new_ap(x, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int strv_extend_strv(char ***a, char **b, bool filter_duplicates) {
+ char **s, **t;
+ size_t p, q, i = 0, j;
+
+ assert(a);
+
+ if (strv_isempty(b))
+ return 0;
+
+ p = strv_length(*a);
+ q = strv_length(b);
+
+ t = reallocarray(*a, p + q + 1, sizeof(char *));
+ if (!t)
+ return -ENOMEM;
+
+ t[p] = NULL;
+ *a = t;
+
+ STRV_FOREACH(s, b) {
+
+ if (filter_duplicates && strv_contains(t, *s))
+ continue;
+
+ t[p+i] = strdup(*s);
+ if (!t[p+i])
+ goto rollback;
+
+ i++;
+ t[p+i] = NULL;
+ }
+
+ assert(i <= q);
+
+ return (int) i;
+
+rollback:
+ for (j = 0; j < i; j++)
+ free(t[p + j]);
+
+ t[p] = NULL;
+ return -ENOMEM;
+}
+
+int strv_extend_strv_concat(char ***a, char **b, const char *suffix) {
+ int r;
+ char **s;
+
+ STRV_FOREACH(s, b) {
+ char *v;
+
+ v = strappend(*s, suffix);
+ if (!v)
+ return -ENOMEM;
+
+ r = strv_push(a, v);
+ if (r < 0) {
+ free(v);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+char **strv_split_full(const char *s, const char *separator, SplitFlags flags) {
+ const char *word, *state;
+ size_t l;
+ size_t n, i;
+ char **r;
+
+ assert(s);
+
+ if (!separator)
+ separator = WHITESPACE;
+
+ s += strspn(s, separator);
+ if (isempty(s))
+ return new0(char*, 1);
+
+ n = 0;
+ _FOREACH_WORD(word, l, s, separator, flags, state)
+ n++;
+
+ r = new(char*, n+1);
+ if (!r)
+ return NULL;
+
+ i = 0;
+ _FOREACH_WORD(word, l, s, separator, flags, state) {
+ r[i] = strndup(word, l);
+ if (!r[i]) {
+ strv_free(r);
+ return NULL;
+ }
+
+ i++;
+ }
+
+ r[i] = NULL;
+ return r;
+}
+
+char **strv_split_newlines(const char *s) {
+ char **l;
+ size_t n;
+
+ assert(s);
+
+ /* Special version of strv_split() that splits on newlines and
+ * suppresses an empty string at the end */
+
+ l = strv_split(s, NEWLINE);
+ if (!l)
+ return NULL;
+
+ n = strv_length(l);
+ if (n <= 0)
+ return l;
+
+ if (isempty(l[n - 1]))
+ l[n - 1] = mfree(l[n - 1]);
+
+ return l;
+}
+
+int strv_split_extract(char ***t, const char *s, const char *separators, ExtractFlags flags) {
+ _cleanup_strv_free_ char **l = NULL;
+ size_t n = 0, allocated = 0;
+ int r;
+
+ assert(t);
+ assert(s);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&s, &word, separators, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!GREEDY_REALLOC(l, allocated, n + 2))
+ return -ENOMEM;
+
+ l[n++] = TAKE_PTR(word);
+
+ l[n] = NULL;
+ }
+
+ if (!l) {
+ l = new0(char*, 1);
+ if (!l)
+ return -ENOMEM;
+ }
+
+ *t = TAKE_PTR(l);
+
+ return (int) n;
+}
+
+char *strv_join_prefix(char **l, const char *separator, const char *prefix) {
+ char *r, *e;
+ char **s;
+ size_t n, k, m;
+
+ if (!separator)
+ separator = " ";
+
+ k = strlen(separator);
+ m = strlen_ptr(prefix);
+
+ n = 0;
+ STRV_FOREACH(s, l) {
+ if (s != l)
+ n += k;
+ n += m + strlen(*s);
+ }
+
+ r = new(char, n+1);
+ if (!r)
+ return NULL;
+
+ e = r;
+ STRV_FOREACH(s, l) {
+ if (s != l)
+ e = stpcpy(e, separator);
+
+ if (prefix)
+ e = stpcpy(e, prefix);
+
+ e = stpcpy(e, *s);
+ }
+
+ *e = 0;
+
+ return r;
+}
+
+int strv_push(char ***l, char *value) {
+ char **c;
+ size_t n, m;
+
+ if (!value)
+ return 0;
+
+ n = strv_length(*l);
+
+ /* Increase and check for overflow */
+ m = n + 2;
+ if (m < n)
+ return -ENOMEM;
+
+ c = reallocarray(*l, m, sizeof(char*));
+ if (!c)
+ return -ENOMEM;
+
+ c[n] = value;
+ c[n+1] = NULL;
+
+ *l = c;
+ return 0;
+}
+
+int strv_push_pair(char ***l, char *a, char *b) {
+ char **c;
+ size_t n, m;
+
+ if (!a && !b)
+ return 0;
+
+ n = strv_length(*l);
+
+ /* increase and check for overflow */
+ m = n + !!a + !!b + 1;
+ if (m < n)
+ return -ENOMEM;
+
+ c = reallocarray(*l, m, sizeof(char*));
+ if (!c)
+ return -ENOMEM;
+
+ if (a)
+ c[n++] = a;
+ if (b)
+ c[n++] = b;
+ c[n] = NULL;
+
+ *l = c;
+ return 0;
+}
+
+int strv_insert(char ***l, size_t position, char *value) {
+ char **c;
+ size_t n, m, i;
+
+ if (!value)
+ return 0;
+
+ n = strv_length(*l);
+ position = MIN(position, n);
+
+ /* increase and check for overflow */
+ m = n + 2;
+ if (m < n)
+ return -ENOMEM;
+
+ c = new(char*, m);
+ if (!c)
+ return -ENOMEM;
+
+ for (i = 0; i < position; i++)
+ c[i] = (*l)[i];
+ c[position] = value;
+ for (i = position; i < n; i++)
+ c[i+1] = (*l)[i];
+
+ c[n+1] = NULL;
+
+ free(*l);
+ *l = c;
+
+ return 0;
+}
+
+int strv_consume(char ***l, char *value) {
+ int r;
+
+ r = strv_push(l, value);
+ if (r < 0)
+ free(value);
+
+ return r;
+}
+
+int strv_consume_pair(char ***l, char *a, char *b) {
+ int r;
+
+ r = strv_push_pair(l, a, b);
+ if (r < 0) {
+ free(a);
+ free(b);
+ }
+
+ return r;
+}
+
+int strv_consume_prepend(char ***l, char *value) {
+ int r;
+
+ r = strv_push_prepend(l, value);
+ if (r < 0)
+ free(value);
+
+ return r;
+}
+
+int strv_extend(char ***l, const char *value) {
+ char *v;
+
+ if (!value)
+ return 0;
+
+ v = strdup(value);
+ if (!v)
+ return -ENOMEM;
+
+ return strv_consume(l, v);
+}
+
+int strv_extend_front(char ***l, const char *value) {
+ size_t n, m;
+ char *v, **c;
+
+ assert(l);
+
+ /* Like strv_extend(), but prepends rather than appends the new entry */
+
+ if (!value)
+ return 0;
+
+ n = strv_length(*l);
+
+ /* Increase and overflow check. */
+ m = n + 2;
+ if (m < n)
+ return -ENOMEM;
+
+ v = strdup(value);
+ if (!v)
+ return -ENOMEM;
+
+ c = reallocarray(*l, m, sizeof(char*));
+ if (!c) {
+ free(v);
+ return -ENOMEM;
+ }
+
+ memmove(c+1, c, n * sizeof(char*));
+ c[0] = v;
+ c[n+1] = NULL;
+
+ *l = c;
+ return 0;
+}
+
+char **strv_uniq(char **l) {
+ char **i;
+
+ /* Drops duplicate entries. The first identical string will be
+ * kept, the others dropped */
+
+ STRV_FOREACH(i, l)
+ strv_remove(i+1, *i);
+
+ return l;
+}
+
+bool strv_is_uniq(char **l) {
+ char **i;
+
+ STRV_FOREACH(i, l)
+ if (strv_find(i+1, *i))
+ return false;
+
+ return true;
+}
+
+char **strv_remove(char **l, const char *s) {
+ char **f, **t;
+
+ if (!l)
+ return NULL;
+
+ assert(s);
+
+ /* Drops every occurrence of s in the string list, edits
+ * in-place. */
+
+ for (f = t = l; *f; f++)
+ if (streq(*f, s))
+ free(*f);
+ else
+ *(t++) = *f;
+
+ *t = NULL;
+ return l;
+}
+
+char **strv_parse_nulstr(const char *s, size_t l) {
+ /* l is the length of the input data, which will be split at NULs into
+ * elements of the resulting strv. Hence, the number of items in the resulting strv
+ * will be equal to one plus the number of NUL bytes in the l bytes starting at s,
+ * unless s[l-1] is NUL, in which case the final empty string is not stored in
+ * the resulting strv, and length is equal to the number of NUL bytes.
+ *
+ * Note that contrary to a normal nulstr which cannot contain empty strings, because
+ * the input data is terminated by any two consequent NUL bytes, this parser accepts
+ * empty strings in s.
+ */
+
+ const char *p;
+ size_t c = 0, i = 0;
+ char **v;
+
+ assert(s || l <= 0);
+
+ if (l <= 0)
+ return new0(char*, 1);
+
+ for (p = s; p < s + l; p++)
+ if (*p == 0)
+ c++;
+
+ if (s[l-1] != 0)
+ c++;
+
+ v = new0(char*, c+1);
+ if (!v)
+ return NULL;
+
+ p = s;
+ while (p < s + l) {
+ const char *e;
+
+ e = memchr(p, 0, s + l - p);
+
+ v[i] = strndup(p, e ? e - p : s + l - p);
+ if (!v[i]) {
+ strv_free(v);
+ return NULL;
+ }
+
+ i++;
+
+ if (!e)
+ break;
+
+ p = e + 1;
+ }
+
+ assert(i == c);
+
+ return v;
+}
+
+char **strv_split_nulstr(const char *s) {
+ const char *i;
+ char **r = NULL;
+
+ NULSTR_FOREACH(i, s)
+ if (strv_extend(&r, i) < 0) {
+ strv_free(r);
+ return NULL;
+ }
+
+ if (!r)
+ return strv_new(NULL);
+
+ return r;
+}
+
+int strv_make_nulstr(char **l, char **p, size_t *q) {
+ /* A valid nulstr with two NULs at the end will be created, but
+ * q will be the length without the two trailing NULs. Thus the output
+ * string is a valid nulstr and can be iterated over using NULSTR_FOREACH,
+ * and can also be parsed by strv_parse_nulstr as long as the length
+ * is provided separately.
+ */
+
+ size_t n_allocated = 0, n = 0;
+ _cleanup_free_ char *m = NULL;
+ char **i;
+
+ assert(p);
+ assert(q);
+
+ STRV_FOREACH(i, l) {
+ size_t z;
+
+ z = strlen(*i);
+
+ if (!GREEDY_REALLOC(m, n_allocated, n + z + 2))
+ return -ENOMEM;
+
+ memcpy(m + n, *i, z + 1);
+ n += z + 1;
+ }
+
+ if (!m) {
+ m = new0(char, 1);
+ if (!m)
+ return -ENOMEM;
+ n = 1;
+ } else
+ /* make sure there is a second extra NUL at the end of resulting nulstr */
+ m[n] = '\0';
+
+ assert(n > 0);
+ *p = m;
+ *q = n - 1;
+
+ m = NULL;
+
+ return 0;
+}
+
+bool strv_overlap(char **a, char **b) {
+ char **i;
+
+ STRV_FOREACH(i, a)
+ if (strv_contains(b, *i))
+ return true;
+
+ return false;
+}
+
+static int str_compare(char * const *a, char * const *b) {
+ return strcmp(*a, *b);
+}
+
+char **strv_sort(char **l) {
+ typesafe_qsort(l, strv_length(l), str_compare);
+ return l;
+}
+
+bool strv_equal(char **a, char **b) {
+
+ if (strv_isempty(a))
+ return strv_isempty(b);
+
+ if (strv_isempty(b))
+ return false;
+
+ for ( ; *a || *b; ++a, ++b)
+ if (!streq_ptr(*a, *b))
+ return false;
+
+ return true;
+}
+
+void strv_print(char **l) {
+ char **s;
+
+ STRV_FOREACH(s, l)
+ puts(*s);
+}
+
+int strv_extendf(char ***l, const char *format, ...) {
+ va_list ap;
+ char *x;
+ int r;
+
+ va_start(ap, format);
+ r = vasprintf(&x, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return strv_consume(l, x);
+}
+
+char **strv_reverse(char **l) {
+ size_t n, i;
+
+ n = strv_length(l);
+ if (n <= 1)
+ return l;
+
+ for (i = 0; i < n / 2; i++)
+ SWAP_TWO(l[i], l[n-1-i]);
+
+ return l;
+}
+
+char **strv_shell_escape(char **l, const char *bad) {
+ char **s;
+
+ /* Escapes every character in every string in l that is in bad,
+ * edits in-place, does not roll-back on error. */
+
+ STRV_FOREACH(s, l) {
+ char *v;
+
+ v = shell_escape(*s, bad);
+ if (!v)
+ return NULL;
+
+ free(*s);
+ *s = v;
+ }
+
+ return l;
+}
+
+bool strv_fnmatch(char* const* patterns, const char *s, int flags) {
+ char* const* p;
+
+ STRV_FOREACH(p, patterns)
+ if (fnmatch(*p, s, flags) == 0)
+ return true;
+
+ return false;
+}
+
+char ***strv_free_free(char ***l) {
+ char ***i;
+
+ if (!l)
+ return NULL;
+
+ for (i = l; *i; i++)
+ strv_free(*i);
+
+ return mfree(l);
+}
+
+char **strv_skip(char **l, size_t n) {
+
+ while (n > 0) {
+ if (strv_isempty(l))
+ return l;
+
+ l++, n--;
+ }
+
+ return l;
+}
+
+int strv_extend_n(char ***l, const char *value, size_t n) {
+ size_t i, j, k;
+ char **nl;
+
+ assert(l);
+
+ if (!value)
+ return 0;
+ if (n == 0)
+ return 0;
+
+ /* Adds the value n times to l */
+
+ k = strv_length(*l);
+
+ nl = reallocarray(*l, k + n + 1, sizeof(char *));
+ if (!nl)
+ return -ENOMEM;
+
+ *l = nl;
+
+ for (i = k; i < k + n; i++) {
+ nl[i] = strdup(value);
+ if (!nl[i])
+ goto rollback;
+ }
+
+ nl[i] = NULL;
+ return 0;
+
+rollback:
+ for (j = k; j < i; j++)
+ free(nl[j]);
+
+ nl[k] = NULL;
+ return -ENOMEM;
+}
+
+int fputstrv(FILE *f, char **l, const char *separator, bool *space) {
+ bool b = false;
+ char **s;
+ int r;
+
+ /* Like fputs(), but for strv, and with a less stupid argument order */
+
+ if (!space)
+ space = &b;
+
+ STRV_FOREACH(s, l) {
+ r = fputs_with_space(f, *s, separator, space);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/basic/strv.h b/src/basic/strv.h
new file mode 100644
index 0000000..392cab6
--- /dev/null
+++ b/src/basic/strv.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <fnmatch.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "macro.h"
+#include "string-util.h"
+#include "util.h"
+
+char *strv_find(char **l, const char *name) _pure_;
+char *strv_find_prefix(char **l, const char *name) _pure_;
+char *strv_find_startswith(char **l, const char *name) _pure_;
+
+char **strv_free(char **l);
+DEFINE_TRIVIAL_CLEANUP_FUNC(char**, strv_free);
+#define _cleanup_strv_free_ _cleanup_(strv_freep)
+
+char **strv_free_erase(char **l);
+DEFINE_TRIVIAL_CLEANUP_FUNC(char**, strv_free_erase);
+#define _cleanup_strv_free_erase_ _cleanup_(strv_free_erasep)
+
+void strv_clear(char **l);
+
+char **strv_copy(char * const *l);
+size_t strv_length(char * const *l) _pure_;
+
+int strv_extend_strv(char ***a, char **b, bool filter_duplicates);
+int strv_extend_strv_concat(char ***a, char **b, const char *suffix);
+int strv_extend(char ***l, const char *value);
+int strv_extendf(char ***l, const char *format, ...) _printf_(2,0);
+int strv_extend_front(char ***l, const char *value);
+int strv_push(char ***l, char *value);
+int strv_push_pair(char ***l, char *a, char *b);
+int strv_insert(char ***l, size_t position, char *value);
+
+static inline int strv_push_prepend(char ***l, char *value) {
+ return strv_insert(l, 0, value);
+}
+
+int strv_consume(char ***l, char *value);
+int strv_consume_pair(char ***l, char *a, char *b);
+int strv_consume_prepend(char ***l, char *value);
+
+char **strv_remove(char **l, const char *s);
+char **strv_uniq(char **l);
+bool strv_is_uniq(char **l);
+
+bool strv_equal(char **a, char **b);
+
+#define strv_contains(l, s) (!!strv_find((l), (s)))
+
+char **strv_new_internal(const char *x, ...) _sentinel_;
+char **strv_new_ap(const char *x, va_list ap);
+#define strv_new(...) strv_new_internal(__VA_ARGS__, NULL)
+
+#define STRV_IGNORE ((const char *) -1)
+
+static inline const char* STRV_IFNOTNULL(const char *x) {
+ return x ? x : STRV_IGNORE;
+}
+
+static inline bool strv_isempty(char * const *l) {
+ return !l || !*l;
+}
+
+char **strv_split_full(const char *s, const char *separator, SplitFlags flags);
+static inline char **strv_split(const char *s, const char *separator) {
+ return strv_split_full(s, separator, 0);
+}
+char **strv_split_newlines(const char *s);
+
+int strv_split_extract(char ***t, const char *s, const char *separators, ExtractFlags flags);
+
+char *strv_join_prefix(char **l, const char *separator, const char *prefix);
+static inline char *strv_join(char **l, const char *separator) {
+ return strv_join_prefix(l, separator, NULL);
+}
+
+char **strv_parse_nulstr(const char *s, size_t l);
+char **strv_split_nulstr(const char *s);
+int strv_make_nulstr(char **l, char **p, size_t *n);
+
+bool strv_overlap(char **a, char **b) _pure_;
+
+#define STRV_FOREACH(s, l) \
+ for ((s) = (l); (s) && *(s); (s)++)
+
+#define STRV_FOREACH_BACKWARDS(s, l) \
+ for (s = ({ \
+ char **_l = l; \
+ _l ? _l + strv_length(_l) - 1U : NULL; \
+ }); \
+ (l) && ((s) >= (l)); \
+ (s)--)
+
+#define STRV_FOREACH_PAIR(x, y, l) \
+ for ((x) = (l), (y) = (x+1); (x) && *(x) && *(y); (x) += 2, (y) = (x + 1))
+
+char **strv_sort(char **l);
+void strv_print(char **l);
+
+#define STRV_MAKE(...) ((char**) ((const char*[]) { __VA_ARGS__, NULL }))
+
+#define STRV_MAKE_EMPTY ((char*[1]) { NULL })
+
+#define strv_from_stdarg_alloca(first) \
+ ({ \
+ char **_l; \
+ \
+ if (!first) \
+ _l = (char**) &first; \
+ else { \
+ size_t _n; \
+ va_list _ap; \
+ \
+ _n = 1; \
+ va_start(_ap, first); \
+ while (va_arg(_ap, char*)) \
+ _n++; \
+ va_end(_ap); \
+ \
+ _l = newa(char*, _n+1); \
+ _l[_n = 0] = (char*) first; \
+ va_start(_ap, first); \
+ for (;;) { \
+ _l[++_n] = va_arg(_ap, char*); \
+ if (!_l[_n]) \
+ break; \
+ } \
+ va_end(_ap); \
+ } \
+ _l; \
+ })
+
+#define STR_IN_SET(x, ...) strv_contains(STRV_MAKE(__VA_ARGS__), x)
+#define STRPTR_IN_SET(x, ...) \
+ ({ \
+ const char* _x = (x); \
+ _x && strv_contains(STRV_MAKE(__VA_ARGS__), _x); \
+ })
+
+#define STARTSWITH_SET(p, ...) \
+ ({ \
+ const char *_p = (p); \
+ char *_found = NULL, **_i; \
+ STRV_FOREACH(_i, STRV_MAKE(__VA_ARGS__)) { \
+ _found = startswith(_p, *_i); \
+ if (_found) \
+ break; \
+ } \
+ _found; \
+ })
+
+#define FOREACH_STRING(x, y, ...) \
+ for (char **_l = STRV_MAKE(({ x = y; }), ##__VA_ARGS__); \
+ x; \
+ x = *(++_l))
+
+char **strv_reverse(char **l);
+char **strv_shell_escape(char **l, const char *bad);
+
+bool strv_fnmatch(char* const* patterns, const char *s, int flags);
+
+static inline bool strv_fnmatch_or_empty(char* const* patterns, const char *s, int flags) {
+ assert(s);
+ return strv_isempty(patterns) ||
+ strv_fnmatch(patterns, s, flags);
+}
+
+char ***strv_free_free(char ***l);
+DEFINE_TRIVIAL_CLEANUP_FUNC(char***, strv_free_free);
+
+char **strv_skip(char **l, size_t n);
+
+int strv_extend_n(char ***l, const char *value, size_t n);
+
+int fputstrv(FILE *f, char **l, const char *separator, bool *space);
+
+#define strv_free_and_replace(a, b) \
+ ({ \
+ strv_free(a); \
+ (a) = (b); \
+ (b) = NULL; \
+ 0; \
+ })
diff --git a/src/basic/strxcpyx.c b/src/basic/strxcpyx.c
new file mode 100644
index 0000000..9210277
--- /dev/null
+++ b/src/basic/strxcpyx.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+/*
+ * Concatenates/copies strings. In any case, terminates in all cases
+ * with '\0' and moves the @dest pointer forward to the added '\0'.
+ * Returns the remaining size, and 0 if the string was truncated.
+ *
+ * Due to the intended usage, these helpers silently noop invocations
+ * having zero size. This is technically an exception to the above
+ * statement "terminates in all cases". It's unexpected for such calls to
+ * occur outside of a loop where this is the preferred behavior.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "strxcpyx.h"
+
+size_t strpcpy(char **dest, size_t size, const char *src) {
+ size_t len;
+
+ assert(dest);
+ assert(src);
+
+ if (size == 0)
+ return 0;
+
+ len = strlen(src);
+ if (len >= size) {
+ if (size > 1)
+ *dest = mempcpy(*dest, src, size-1);
+ size = 0;
+ } else if (len > 0) {
+ *dest = mempcpy(*dest, src, len);
+ size -= len;
+ }
+
+ *dest[0] = '\0';
+ return size;
+}
+
+size_t strpcpyf(char **dest, size_t size, const char *src, ...) {
+ va_list va;
+ int i;
+
+ assert(dest);
+ assert(src);
+
+ if (size == 0)
+ return 0;
+
+ va_start(va, src);
+ i = vsnprintf(*dest, size, src, va);
+ if (i < (int)size) {
+ *dest += i;
+ size -= i;
+ } else
+ size = 0;
+ va_end(va);
+ return size;
+}
+
+size_t strpcpyl(char **dest, size_t size, const char *src, ...) {
+ va_list va;
+
+ assert(dest);
+ assert(src);
+
+ va_start(va, src);
+ do {
+ size = strpcpy(dest, size, src);
+ src = va_arg(va, char *);
+ } while (src);
+ va_end(va);
+ return size;
+}
+
+size_t strscpy(char *dest, size_t size, const char *src) {
+ char *s;
+
+ assert(dest);
+ assert(src);
+
+ s = dest;
+ return strpcpy(&s, size, src);
+}
+
+size_t strscpyl(char *dest, size_t size, const char *src, ...) {
+ va_list va;
+ char *s;
+
+ assert(dest);
+ assert(src);
+
+ va_start(va, src);
+ s = dest;
+ do {
+ size = strpcpy(&s, size, src);
+ src = va_arg(va, char *);
+ } while (src);
+ va_end(va);
+
+ return size;
+}
diff --git a/src/basic/strxcpyx.h b/src/basic/strxcpyx.h
new file mode 100644
index 0000000..0f2b749
--- /dev/null
+++ b/src/basic/strxcpyx.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stddef.h>
+
+#include "macro.h"
+
+size_t strpcpy(char **dest, size_t size, const char *src);
+size_t strpcpyf(char **dest, size_t size, const char *src, ...) _printf_(3, 4);
+size_t strpcpyl(char **dest, size_t size, const char *src, ...) _sentinel_;
+size_t strscpy(char *dest, size_t size, const char *src);
+size_t strscpyl(char *dest, size_t size, const char *src, ...) _sentinel_;
diff --git a/src/basic/syslog-util.c b/src/basic/syslog-util.c
new file mode 100644
index 0000000..fe12948
--- /dev/null
+++ b/src/basic/syslog-util.c
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+#include <syslog.h>
+
+#include "hexdecoct.h"
+#include "macro.h"
+#include "string-table.h"
+#include "syslog-util.h"
+
+int syslog_parse_priority(const char **p, int *priority, bool with_facility) {
+ int a = 0, b = 0, c = 0;
+ const char *end;
+ size_t k;
+
+ assert(p);
+ assert(*p);
+ assert(priority);
+
+ if ((*p)[0] != '<')
+ return 0;
+
+ end = strchr(*p, '>');
+ if (!end)
+ return 0;
+
+ k = end - *p;
+ assert(k > 0);
+
+ if (k == 2)
+ c = undecchar((*p)[1]);
+ else if (k == 3) {
+ b = undecchar((*p)[1]);
+ c = undecchar((*p)[2]);
+ } else if (k == 4) {
+ a = undecchar((*p)[1]);
+ b = undecchar((*p)[2]);
+ c = undecchar((*p)[3]);
+ } else
+ return 0;
+
+ if (a < 0 || b < 0 || c < 0 ||
+ (!with_facility && (a || b || c > 7)))
+ return 0;
+
+ if (with_facility)
+ *priority = a*100 + b*10 + c;
+ else
+ *priority = (*priority & LOG_FACMASK) | c;
+
+ *p += k + 1;
+ return 1;
+}
+
+static const char *const log_facility_unshifted_table[LOG_NFACILITIES] = {
+ [LOG_FAC(LOG_KERN)] = "kern",
+ [LOG_FAC(LOG_USER)] = "user",
+ [LOG_FAC(LOG_MAIL)] = "mail",
+ [LOG_FAC(LOG_DAEMON)] = "daemon",
+ [LOG_FAC(LOG_AUTH)] = "auth",
+ [LOG_FAC(LOG_SYSLOG)] = "syslog",
+ [LOG_FAC(LOG_LPR)] = "lpr",
+ [LOG_FAC(LOG_NEWS)] = "news",
+ [LOG_FAC(LOG_UUCP)] = "uucp",
+ [LOG_FAC(LOG_CRON)] = "cron",
+ [LOG_FAC(LOG_AUTHPRIV)] = "authpriv",
+ [LOG_FAC(LOG_FTP)] = "ftp",
+ [LOG_FAC(LOG_LOCAL0)] = "local0",
+ [LOG_FAC(LOG_LOCAL1)] = "local1",
+ [LOG_FAC(LOG_LOCAL2)] = "local2",
+ [LOG_FAC(LOG_LOCAL3)] = "local3",
+ [LOG_FAC(LOG_LOCAL4)] = "local4",
+ [LOG_FAC(LOG_LOCAL5)] = "local5",
+ [LOG_FAC(LOG_LOCAL6)] = "local6",
+ [LOG_FAC(LOG_LOCAL7)] = "local7"
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(log_facility_unshifted, int, LOG_FAC(~0));
+
+bool log_facility_unshifted_is_valid(int facility) {
+ return facility >= 0 && facility <= LOG_FAC(~0);
+}
+
+static const char *const log_level_table[] = {
+ [LOG_EMERG] = "emerg",
+ [LOG_ALERT] = "alert",
+ [LOG_CRIT] = "crit",
+ [LOG_ERR] = "err",
+ [LOG_WARNING] = "warning",
+ [LOG_NOTICE] = "notice",
+ [LOG_INFO] = "info",
+ [LOG_DEBUG] = "debug"
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(log_level, int, LOG_DEBUG);
+
+bool log_level_is_valid(int level) {
+ return level >= 0 && level <= LOG_DEBUG;
+}
diff --git a/src/basic/syslog-util.h b/src/basic/syslog-util.h
new file mode 100644
index 0000000..8f419e8
--- /dev/null
+++ b/src/basic/syslog-util.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+int log_facility_unshifted_to_string_alloc(int i, char **s);
+int log_facility_unshifted_from_string(const char *s);
+bool log_facility_unshifted_is_valid(int faciliy);
+
+int log_level_to_string_alloc(int i, char **s);
+int log_level_from_string(const char *s);
+bool log_level_is_valid(int level);
+
+int syslog_parse_priority(const char **p, int *priority, bool with_facility);
diff --git a/src/basic/terminal-util.c b/src/basic/terminal-util.c
new file mode 100644
index 0000000..0f38120
--- /dev/null
+++ b/src/basic/terminal-util.c
@@ -0,0 +1,1318 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/kd.h>
+#include <linux/tiocl.h>
+#include <linux/vt.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/sysmacros.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "copy.h"
+#include "def.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "util.h"
+
+static volatile unsigned cached_columns = 0;
+static volatile unsigned cached_lines = 0;
+
+static volatile int cached_on_tty = -1;
+static volatile int cached_colors_enabled = -1;
+static volatile int cached_underline_enabled = -1;
+
+int chvt(int vt) {
+ _cleanup_close_ int fd;
+
+ /* Switch to the specified vt number. If the VT is specified <= 0 switch to the VT the kernel log messages go,
+ * if that's configured. */
+
+ fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+
+ if (vt <= 0) {
+ int tiocl[2] = {
+ TIOCL_GETKMSGREDIRECT,
+ 0
+ };
+
+ if (ioctl(fd, TIOCLINUX, tiocl) < 0)
+ return -errno;
+
+ vt = tiocl[0] <= 0 ? 1 : tiocl[0];
+ }
+
+ if (ioctl(fd, VT_ACTIVATE, vt) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int read_one_char(FILE *f, char *ret, usec_t t, bool *need_nl) {
+ _cleanup_free_ char *line = NULL;
+ struct termios old_termios;
+ int r;
+
+ assert(f);
+ assert(ret);
+
+ /* If this is a terminal, then switch canonical mode off, so that we can read a single character */
+ if (tcgetattr(fileno(f), &old_termios) >= 0) {
+ struct termios new_termios = old_termios;
+
+ new_termios.c_lflag &= ~ICANON;
+ new_termios.c_cc[VMIN] = 1;
+ new_termios.c_cc[VTIME] = 0;
+
+ if (tcsetattr(fileno(f), TCSADRAIN, &new_termios) >= 0) {
+ char c;
+
+ if (t != USEC_INFINITY) {
+ if (fd_wait_for_event(fileno(f), POLLIN, t) <= 0) {
+ (void) tcsetattr(fileno(f), TCSADRAIN, &old_termios);
+ return -ETIMEDOUT;
+ }
+ }
+
+ r = safe_fgetc(f, &c);
+ (void) tcsetattr(fileno(f), TCSADRAIN, &old_termios);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EIO;
+
+ if (need_nl)
+ *need_nl = c != '\n';
+
+ *ret = c;
+ return 0;
+ }
+ }
+
+ if (t != USEC_INFINITY) {
+ if (fd_wait_for_event(fileno(f), POLLIN, t) <= 0)
+ return -ETIMEDOUT;
+ }
+
+ /* If this is not a terminal, then read a full line instead */
+
+ r = read_line(f, 16, &line); /* longer than necessary, to eat up UTF-8 chars/vt100 key sequences */
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EIO;
+
+ if (strlen(line) != 1)
+ return -EBADMSG;
+
+ if (need_nl)
+ *need_nl = false;
+
+ *ret = line[0];
+ return 0;
+}
+
+#define DEFAULT_ASK_REFRESH_USEC (2*USEC_PER_SEC)
+
+int ask_char(char *ret, const char *replies, const char *fmt, ...) {
+ int r;
+
+ assert(ret);
+ assert(replies);
+ assert(fmt);
+
+ for (;;) {
+ va_list ap;
+ char c;
+ bool need_nl = true;
+
+ if (colors_enabled())
+ fputs(ANSI_HIGHLIGHT, stdout);
+
+ putchar('\r');
+
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+
+ if (colors_enabled())
+ fputs(ANSI_NORMAL, stdout);
+
+ fflush(stdout);
+
+ r = read_one_char(stdin, &c, DEFAULT_ASK_REFRESH_USEC, &need_nl);
+ if (r < 0) {
+
+ if (r == -ETIMEDOUT)
+ continue;
+
+ if (r == -EBADMSG) {
+ puts("Bad input, please try again.");
+ continue;
+ }
+
+ putchar('\n');
+ return r;
+ }
+
+ if (need_nl)
+ putchar('\n');
+
+ if (strchr(replies, c)) {
+ *ret = c;
+ return 0;
+ }
+
+ puts("Read unexpected character, please try again.");
+ }
+}
+
+int ask_string(char **ret, const char *text, ...) {
+ int r;
+
+ assert(ret);
+ assert(text);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ va_list ap;
+
+ if (colors_enabled())
+ fputs(ANSI_HIGHLIGHT, stdout);
+
+ va_start(ap, text);
+ vprintf(text, ap);
+ va_end(ap);
+
+ if (colors_enabled())
+ fputs(ANSI_NORMAL, stdout);
+
+ fflush(stdout);
+
+ r = read_line(stdin, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EIO;
+
+ if (!isempty(line)) {
+ *ret = TAKE_PTR(line);
+ return 0;
+ }
+ }
+}
+
+int reset_terminal_fd(int fd, bool switch_to_text) {
+ struct termios termios;
+ int r = 0;
+
+ /* Set terminal to some sane defaults */
+
+ assert(fd >= 0);
+
+ /* We leave locked terminal attributes untouched, so that
+ * Plymouth may set whatever it wants to set, and we don't
+ * interfere with that. */
+
+ /* Disable exclusive mode, just in case */
+ (void) ioctl(fd, TIOCNXCL);
+
+ /* Switch to text mode */
+ if (switch_to_text)
+ (void) ioctl(fd, KDSETMODE, KD_TEXT);
+
+ /* Set default keyboard mode */
+ (void) vt_reset_keyboard(fd);
+
+ if (tcgetattr(fd, &termios) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ /* We only reset the stuff that matters to the software. How
+ * hardware is set up we don't touch assuming that somebody
+ * else will do that for us */
+
+ termios.c_iflag &= ~(IGNBRK | BRKINT | ISTRIP | INLCR | IGNCR | IUCLC);
+ termios.c_iflag |= ICRNL | IMAXBEL | IUTF8;
+ termios.c_oflag |= ONLCR;
+ termios.c_cflag |= CREAD;
+ termios.c_lflag = ISIG | ICANON | IEXTEN | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOPRT | ECHOKE;
+
+ termios.c_cc[VINTR] = 03; /* ^C */
+ termios.c_cc[VQUIT] = 034; /* ^\ */
+ termios.c_cc[VERASE] = 0177;
+ termios.c_cc[VKILL] = 025; /* ^X */
+ termios.c_cc[VEOF] = 04; /* ^D */
+ termios.c_cc[VSTART] = 021; /* ^Q */
+ termios.c_cc[VSTOP] = 023; /* ^S */
+ termios.c_cc[VSUSP] = 032; /* ^Z */
+ termios.c_cc[VLNEXT] = 026; /* ^V */
+ termios.c_cc[VWERASE] = 027; /* ^W */
+ termios.c_cc[VREPRINT] = 022; /* ^R */
+ termios.c_cc[VEOL] = 0;
+ termios.c_cc[VEOL2] = 0;
+
+ termios.c_cc[VTIME] = 0;
+ termios.c_cc[VMIN] = 1;
+
+ if (tcsetattr(fd, TCSANOW, &termios) < 0)
+ r = -errno;
+
+finish:
+ /* Just in case, flush all crap out */
+ (void) tcflush(fd, TCIOFLUSH);
+
+ return r;
+}
+
+int reset_terminal(const char *name) {
+ _cleanup_close_ int fd = -1;
+
+ /* We open the terminal with O_NONBLOCK here, to ensure we
+ * don't block on carrier if this is a terminal with carrier
+ * configured. */
+
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return fd;
+
+ return reset_terminal_fd(fd, true);
+}
+
+int open_terminal(const char *name, int mode) {
+ unsigned c = 0;
+ int fd;
+
+ /*
+ * If a TTY is in the process of being closed opening it might
+ * cause EIO. This is horribly awful, but unlikely to be
+ * changed in the kernel. Hence we work around this problem by
+ * retrying a couple of times.
+ *
+ * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/554172/comments/245
+ */
+
+ if (mode & O_CREAT)
+ return -EINVAL;
+
+ for (;;) {
+ fd = open(name, mode, 0);
+ if (fd >= 0)
+ break;
+
+ if (errno != EIO)
+ return -errno;
+
+ /* Max 1s in total */
+ if (c >= 20)
+ return -errno;
+
+ usleep(50 * USEC_PER_MSEC);
+ c++;
+ }
+
+ if (isatty(fd) <= 0) {
+ safe_close(fd);
+ return -ENOTTY;
+ }
+
+ return fd;
+}
+
+int acquire_terminal(
+ const char *name,
+ AcquireTerminalFlags flags,
+ usec_t timeout) {
+
+ _cleanup_close_ int notify = -1, fd = -1;
+ usec_t ts = USEC_INFINITY;
+ int r, wd = -1;
+
+ assert(name);
+ assert(IN_SET(flags & ~ACQUIRE_TERMINAL_PERMISSIVE, ACQUIRE_TERMINAL_TRY, ACQUIRE_TERMINAL_FORCE, ACQUIRE_TERMINAL_WAIT));
+
+ /* We use inotify to be notified when the tty is closed. We create the watch before checking if we can actually
+ * acquire it, so that we don't lose any event.
+ *
+ * Note: strictly speaking this actually watches for the device being closed, it does *not* really watch
+ * whether a tty loses its controlling process. However, unless some rogue process uses TIOCNOTTY on /dev/tty
+ * *after* closing its tty otherwise this will not become a problem. As long as the administrator makes sure to
+ * not configure any service on the same tty as an untrusted user this should not be a problem. (Which they
+ * probably should not do anyway.) */
+
+ if ((flags & ~ACQUIRE_TERMINAL_PERMISSIVE) == ACQUIRE_TERMINAL_WAIT) {
+ notify = inotify_init1(IN_CLOEXEC | (timeout != USEC_INFINITY ? IN_NONBLOCK : 0));
+ if (notify < 0)
+ return -errno;
+
+ wd = inotify_add_watch(notify, name, IN_CLOSE);
+ if (wd < 0)
+ return -errno;
+
+ if (timeout != USEC_INFINITY)
+ ts = now(CLOCK_MONOTONIC);
+ }
+
+ for (;;) {
+ struct sigaction sa_old, sa_new = {
+ .sa_handler = SIG_IGN,
+ .sa_flags = SA_RESTART,
+ };
+
+ if (notify >= 0) {
+ r = flush_fd(notify);
+ if (r < 0)
+ return r;
+ }
+
+ /* We pass here O_NOCTTY only so that we can check the return value TIOCSCTTY and have a reliable way
+ * to figure out if we successfully became the controlling process of the tty */
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ /* Temporarily ignore SIGHUP, so that we don't get SIGHUP'ed if we already own the tty. */
+ assert_se(sigaction(SIGHUP, &sa_new, &sa_old) == 0);
+
+ /* First, try to get the tty */
+ r = ioctl(fd, TIOCSCTTY,
+ (flags & ~ACQUIRE_TERMINAL_PERMISSIVE) == ACQUIRE_TERMINAL_FORCE) < 0 ? -errno : 0;
+
+ /* Reset signal handler to old value */
+ assert_se(sigaction(SIGHUP, &sa_old, NULL) == 0);
+
+ /* Success? Exit the loop now! */
+ if (r >= 0)
+ break;
+
+ /* Any failure besides -EPERM? Fail, regardless of the mode. */
+ if (r != -EPERM)
+ return r;
+
+ if (flags & ACQUIRE_TERMINAL_PERMISSIVE) /* If we are in permissive mode, then EPERM is fine, turn this
+ * into a success. Note that EPERM is also returned if we
+ * already are the owner of the TTY. */
+ break;
+
+ if (flags != ACQUIRE_TERMINAL_WAIT) /* If we are in TRY or FORCE mode, then propagate EPERM as EPERM */
+ return r;
+
+ assert(notify >= 0);
+ assert(wd >= 0);
+
+ for (;;) {
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+
+ if (timeout != USEC_INFINITY) {
+ usec_t n;
+
+ assert(ts != USEC_INFINITY);
+
+ n = now(CLOCK_MONOTONIC);
+ if (ts + timeout < n)
+ return -ETIMEDOUT;
+
+ r = fd_wait_for_event(notify, POLLIN, ts + timeout - n);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+ }
+
+ l = read(notify, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ continue;
+
+ return -errno;
+ }
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l) {
+ if (e->mask & IN_Q_OVERFLOW) /* If we hit an inotify queue overflow, simply check if the terminal is up for grabs now. */
+ break;
+
+ if (e->wd != wd || !(e->mask & IN_CLOSE)) /* Safety checks */
+ return -EIO;
+ }
+
+ break;
+ }
+
+ /* We close the tty fd here since if the old session ended our handle will be dead. It's important that
+ * we do this after sleeping, so that we don't enter an endless loop. */
+ fd = safe_close(fd);
+ }
+
+ return TAKE_FD(fd);
+}
+
+int release_terminal(void) {
+ static const struct sigaction sa_new = {
+ .sa_handler = SIG_IGN,
+ .sa_flags = SA_RESTART,
+ };
+
+ _cleanup_close_ int fd = -1;
+ struct sigaction sa_old;
+ int r;
+
+ fd = open("/dev/tty", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+
+ /* Temporarily ignore SIGHUP, so that we don't get SIGHUP'ed
+ * by our own TIOCNOTTY */
+ assert_se(sigaction(SIGHUP, &sa_new, &sa_old) == 0);
+
+ r = ioctl(fd, TIOCNOTTY) < 0 ? -errno : 0;
+
+ assert_se(sigaction(SIGHUP, &sa_old, NULL) == 0);
+
+ return r;
+}
+
+int terminal_vhangup_fd(int fd) {
+ assert(fd >= 0);
+
+ if (ioctl(fd, TIOCVHANGUP) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int terminal_vhangup(const char *name) {
+ _cleanup_close_ int fd;
+
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return fd;
+
+ return terminal_vhangup_fd(fd);
+}
+
+int vt_disallocate(const char *name) {
+ _cleanup_close_ int fd = -1;
+ const char *e, *n;
+ unsigned u;
+ int r;
+
+ /* Deallocate the VT if possible. If not possible
+ * (i.e. because it is the active one), at least clear it
+ * entirely (including the scrollback buffer) */
+
+ e = path_startswith(name, "/dev/");
+ if (!e)
+ return -EINVAL;
+
+ if (!tty_is_vc(name)) {
+ /* So this is not a VT. I guess we cannot deallocate
+ * it then. But let's at least clear the screen */
+
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ loop_write(fd,
+ "\033[r" /* clear scrolling region */
+ "\033[H" /* move home */
+ "\033[2J", /* clear screen */
+ 10, false);
+ return 0;
+ }
+
+ n = startswith(e, "tty");
+ if (!n)
+ return -EINVAL;
+
+ r = safe_atou(n, &u);
+ if (r < 0)
+ return r;
+
+ if (u <= 0)
+ return -EINVAL;
+
+ /* Try to deallocate */
+ fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return fd;
+
+ r = ioctl(fd, VT_DISALLOCATE, u);
+ fd = safe_close(fd);
+
+ if (r >= 0)
+ return 0;
+
+ if (errno != EBUSY)
+ return -errno;
+
+ /* Couldn't deallocate, so let's clear it fully with
+ * scrollback */
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ loop_write(fd,
+ "\033[r" /* clear scrolling region */
+ "\033[H" /* move home */
+ "\033[3J", /* clear screen including scrollback, requires Linux 2.6.40 */
+ 10, false);
+ return 0;
+}
+
+int make_console_stdio(void) {
+ int fd, r;
+
+ /* Make /dev/console the controlling terminal and stdin/stdout/stderr */
+
+ fd = acquire_terminal("/dev/console", ACQUIRE_TERMINAL_FORCE|ACQUIRE_TERMINAL_PERMISSIVE, USEC_INFINITY);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to acquire terminal: %m");
+
+ r = reset_terminal_fd(fd, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to reset terminal, ignoring: %m");
+
+ r = rearrange_stdio(fd, fd, fd); /* This invalidates 'fd' both on success and on failure. */
+ if (r < 0)
+ return log_error_errno(r, "Failed to make terminal stdin/stdout/stderr: %m");
+
+ reset_terminal_feature_caches();
+
+ return 0;
+}
+
+bool tty_is_vc(const char *tty) {
+ assert(tty);
+
+ return vtnr_from_tty(tty) >= 0;
+}
+
+bool tty_is_console(const char *tty) {
+ assert(tty);
+
+ return streq(skip_dev_prefix(tty), "console");
+}
+
+int vtnr_from_tty(const char *tty) {
+ int i, r;
+
+ assert(tty);
+
+ tty = skip_dev_prefix(tty);
+
+ if (!startswith(tty, "tty") )
+ return -EINVAL;
+
+ if (tty[3] < '0' || tty[3] > '9')
+ return -EINVAL;
+
+ r = safe_atoi(tty+3, &i);
+ if (r < 0)
+ return r;
+
+ if (i < 0 || i > 63)
+ return -EINVAL;
+
+ return i;
+}
+
+ int resolve_dev_console(char **ret) {
+ _cleanup_free_ char *active = NULL;
+ char *tty;
+ int r;
+
+ assert(ret);
+
+ /* Resolve where /dev/console is pointing to, if /sys is actually ours (i.e. not read-only-mounted which is a
+ * sign for container setups) */
+
+ if (path_is_read_only_fs("/sys") > 0)
+ return -ENOMEDIUM;
+
+ r = read_one_line_file("/sys/class/tty/console/active", &active);
+ if (r < 0)
+ return r;
+
+ /* If multiple log outputs are configured the last one is what /dev/console points to */
+ tty = strrchr(active, ' ');
+ if (tty)
+ tty++;
+ else
+ tty = active;
+
+ if (streq(tty, "tty0")) {
+ active = mfree(active);
+
+ /* Get the active VC (e.g. tty1) */
+ r = read_one_line_file("/sys/class/tty/tty0/active", &active);
+ if (r < 0)
+ return r;
+
+ tty = active;
+ }
+
+ if (tty == active)
+ *ret = TAKE_PTR(active);
+ else {
+ char *tmp;
+
+ tmp = strdup(tty);
+ if (!tmp)
+ return -ENOMEM;
+
+ *ret = tmp;
+ }
+
+ return 0;
+}
+
+int get_kernel_consoles(char ***ret) {
+ _cleanup_strv_free_ char **l = NULL;
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ int r;
+
+ assert(ret);
+
+ /* If /sys is mounted read-only this means we are running in some kind of container environment. In that
+ * case /sys would reflect the host system, not us, hence ignore the data we can read from it. */
+ if (path_is_read_only_fs("/sys") > 0)
+ goto fallback;
+
+ r = read_one_line_file("/sys/class/tty/console/active", &line);
+ if (r < 0)
+ return r;
+
+ p = line;
+ for (;;) {
+ _cleanup_free_ char *tty = NULL;
+ char *path;
+
+ r = extract_first_word(&p, &tty, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (streq(tty, "tty0")) {
+ tty = mfree(tty);
+ r = read_one_line_file("/sys/class/tty/tty0/active", &tty);
+ if (r < 0)
+ return r;
+ }
+
+ path = strappend("/dev/", tty);
+ if (!path)
+ return -ENOMEM;
+
+ if (access(path, F_OK) < 0) {
+ log_debug_errno(errno, "Console device %s is not accessible, skipping: %m", path);
+ free(path);
+ continue;
+ }
+
+ r = strv_consume(&l, path);
+ if (r < 0)
+ return r;
+ }
+
+ if (strv_isempty(l)) {
+ log_debug("No devices found for system console");
+ goto fallback;
+ }
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+
+fallback:
+ r = strv_extend(&l, "/dev/console");
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+}
+
+bool tty_is_vc_resolve(const char *tty) {
+ _cleanup_free_ char *resolved = NULL;
+
+ assert(tty);
+
+ tty = skip_dev_prefix(tty);
+
+ if (streq(tty, "console")) {
+ if (resolve_dev_console(&resolved) < 0)
+ return false;
+
+ tty = resolved;
+ }
+
+ return tty_is_vc(tty);
+}
+
+const char *default_term_for_tty(const char *tty) {
+ return tty && tty_is_vc_resolve(tty) ? "linux" : "vt220";
+}
+
+int fd_columns(int fd) {
+ struct winsize ws = {};
+
+ if (ioctl(fd, TIOCGWINSZ, &ws) < 0)
+ return -errno;
+
+ if (ws.ws_col <= 0)
+ return -EIO;
+
+ return ws.ws_col;
+}
+
+unsigned columns(void) {
+ const char *e;
+ int c;
+
+ if (cached_columns > 0)
+ return cached_columns;
+
+ c = 0;
+ e = getenv("COLUMNS");
+ if (e)
+ (void) safe_atoi(e, &c);
+
+ if (c <= 0 || c > USHRT_MAX) {
+ c = fd_columns(STDOUT_FILENO);
+ if (c <= 0)
+ c = 80;
+ }
+
+ cached_columns = c;
+ return cached_columns;
+}
+
+int fd_lines(int fd) {
+ struct winsize ws = {};
+
+ if (ioctl(fd, TIOCGWINSZ, &ws) < 0)
+ return -errno;
+
+ if (ws.ws_row <= 0)
+ return -EIO;
+
+ return ws.ws_row;
+}
+
+unsigned lines(void) {
+ const char *e;
+ int l;
+
+ if (cached_lines > 0)
+ return cached_lines;
+
+ l = 0;
+ e = getenv("LINES");
+ if (e)
+ (void) safe_atoi(e, &l);
+
+ if (l <= 0 || l > USHRT_MAX) {
+ l = fd_lines(STDOUT_FILENO);
+ if (l <= 0)
+ l = 24;
+ }
+
+ cached_lines = l;
+ return cached_lines;
+}
+
+/* intended to be used as a SIGWINCH sighandler */
+void columns_lines_cache_reset(int signum) {
+ cached_columns = 0;
+ cached_lines = 0;
+}
+
+void reset_terminal_feature_caches(void) {
+ cached_columns = 0;
+ cached_lines = 0;
+
+ cached_colors_enabled = -1;
+ cached_underline_enabled = -1;
+ cached_on_tty = -1;
+}
+
+bool on_tty(void) {
+
+ /* We check both stdout and stderr, so that situations where pipes on the shell are used are reliably
+ * recognized, regardless if only the output or the errors are piped to some place. Since on_tty() is generally
+ * used to default to a safer, non-interactive, non-color mode of operation it's probably good to be defensive
+ * here, and check for both. Note that we don't check for STDIN_FILENO, because it should fine to use fancy
+ * terminal functionality when outputting stuff, even if the input is piped to us. */
+
+ if (cached_on_tty < 0)
+ cached_on_tty =
+ isatty(STDOUT_FILENO) > 0 &&
+ isatty(STDERR_FILENO) > 0;
+
+ return cached_on_tty;
+}
+
+int getttyname_malloc(int fd, char **ret) {
+ size_t l = 100;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ for (;;) {
+ char path[l];
+
+ r = ttyname_r(fd, path, sizeof(path));
+ if (r == 0) {
+ char *c;
+
+ c = strdup(skip_dev_prefix(path));
+ if (!c)
+ return -ENOMEM;
+
+ *ret = c;
+ return 0;
+ }
+
+ if (r != ERANGE)
+ return -r;
+
+ l *= 2;
+ }
+
+ return 0;
+}
+
+int getttyname_harder(int fd, char **r) {
+ int k;
+ char *s = NULL;
+
+ k = getttyname_malloc(fd, &s);
+ if (k < 0)
+ return k;
+
+ if (streq(s, "tty")) {
+ free(s);
+ return get_ctty(0, NULL, r);
+ }
+
+ *r = s;
+ return 0;
+}
+
+int get_ctty_devnr(pid_t pid, dev_t *d) {
+ int r;
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ unsigned long ttynr;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "stat");
+ r = read_one_line_file(p, &line);
+ if (r < 0)
+ return r;
+
+ p = strrchr(line, ')');
+ if (!p)
+ return -EIO;
+
+ p++;
+
+ if (sscanf(p, " "
+ "%*c " /* state */
+ "%*d " /* ppid */
+ "%*d " /* pgrp */
+ "%*d " /* session */
+ "%lu ", /* ttynr */
+ &ttynr) != 1)
+ return -EIO;
+
+ if (major(ttynr) == 0 && minor(ttynr) == 0)
+ return -ENXIO;
+
+ if (d)
+ *d = (dev_t) ttynr;
+
+ return 0;
+}
+
+int get_ctty(pid_t pid, dev_t *ret_devnr, char **ret) {
+ _cleanup_free_ char *fn = NULL, *b = NULL;
+ dev_t devnr;
+ int r;
+
+ r = get_ctty_devnr(pid, &devnr);
+ if (r < 0)
+ return r;
+
+ r = device_path_make_canonical(S_IFCHR, devnr, &fn);
+ if (r < 0) {
+ if (r != -ENOENT) /* No symlink for this in /dev/char/? */
+ return r;
+
+ if (major(devnr) == 136) {
+ /* This is an ugly hack: PTY devices are not listed in /dev/char/, as they don't follow the
+ * Linux device model. This means we have no nice way to match them up against their actual
+ * device node. Let's hence do the check by the fixed, assigned major number. Normally we try
+ * to avoid such fixed major/minor matches, but there appears to nother nice way to handle
+ * this. */
+
+ if (asprintf(&b, "pts/%u", minor(devnr)) < 0)
+ return -ENOMEM;
+ } else {
+ /* Probably something similar to the ptys which have no symlink in /dev/char/. Let's return
+ * something vaguely useful. */
+
+ r = device_path_make_major_minor(S_IFCHR, devnr, &fn);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!b) {
+ const char *w;
+
+ w = path_startswith(fn, "/dev/");
+ if (w) {
+ b = strdup(w);
+ if (!b)
+ return -ENOMEM;
+ } else
+ b = TAKE_PTR(fn);
+ }
+
+ if (ret)
+ *ret = TAKE_PTR(b);
+
+ if (ret_devnr)
+ *ret_devnr = devnr;
+
+ return 0;
+}
+
+int ptsname_malloc(int fd, char **ret) {
+ size_t l = 100;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ for (;;) {
+ char *c;
+
+ c = new(char, l);
+ if (!c)
+ return -ENOMEM;
+
+ if (ptsname_r(fd, c, l) == 0) {
+ *ret = c;
+ return 0;
+ }
+ if (errno != ERANGE) {
+ free(c);
+ return -errno;
+ }
+
+ free(c);
+ l *= 2;
+ }
+}
+
+int ptsname_namespace(int pty, char **ret) {
+ int no = -1, r;
+
+ /* Like ptsname(), but doesn't assume that the path is
+ * accessible in the local namespace. */
+
+ r = ioctl(pty, TIOCGPTN, &no);
+ if (r < 0)
+ return -errno;
+
+ if (no < 0)
+ return -EIO;
+
+ if (asprintf(ret, "/dev/pts/%i", no) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int openpt_in_namespace(pid_t pid, int flags) {
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1;
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ pid_t child;
+ int r;
+
+ assert(pid > 0);
+
+ r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-openptns)", "(sd-openpt)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ int master;
+
+ pair[0] = safe_close(pair[0]);
+
+ master = posix_openpt(flags|O_NOCTTY|O_CLOEXEC);
+ if (master < 0)
+ _exit(EXIT_FAILURE);
+
+ if (unlockpt(master) < 0)
+ _exit(EXIT_FAILURE);
+
+ if (send_one_fd(pair[1], master, 0) < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-openptns)", child, 0);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EIO;
+
+ return receive_one_fd(pair[0], 0);
+}
+
+int open_terminal_in_namespace(pid_t pid, const char *name, int mode) {
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1;
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ pid_t child;
+ int r;
+
+ r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-terminalns)", "(sd-terminal)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ int master;
+
+ pair[0] = safe_close(pair[0]);
+
+ master = open_terminal(name, mode|O_NOCTTY|O_CLOEXEC);
+ if (master < 0)
+ _exit(EXIT_FAILURE);
+
+ if (send_one_fd(pair[1], master, 0) < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-terminalns)", child, 0);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EIO;
+
+ return receive_one_fd(pair[0], 0);
+}
+
+static bool getenv_terminal_is_dumb(void) {
+ const char *e;
+
+ e = getenv("TERM");
+ if (!e)
+ return true;
+
+ return streq(e, "dumb");
+}
+
+bool terminal_is_dumb(void) {
+ if (!on_tty())
+ return true;
+
+ return getenv_terminal_is_dumb();
+}
+
+bool colors_enabled(void) {
+
+ /* Returns true if colors are considered supported on our stdout. For that we check $SYSTEMD_COLORS first
+ * (which is the explicit way to turn colors on/off). If that didn't work we turn colors off unless we are on a
+ * TTY. And if we are on a TTY we turn it off if $TERM is set to "dumb". There's one special tweak though: if
+ * we are PID 1 then we do not check whether we are connected to a TTY, because we don't keep /dev/console open
+ * continously due to fear of SAK, and hence things are a bit weird. */
+
+ if (cached_colors_enabled < 0) {
+ int val;
+
+ val = getenv_bool("SYSTEMD_COLORS");
+ if (val >= 0)
+ cached_colors_enabled = val;
+ else if (getpid_cached() == 1)
+ /* PID1 outputs to the console without holding it open all the time */
+ cached_colors_enabled = !getenv_terminal_is_dumb();
+ else
+ cached_colors_enabled = !terminal_is_dumb();
+ }
+
+ return cached_colors_enabled;
+}
+
+bool dev_console_colors_enabled(void) {
+ _cleanup_free_ char *s = NULL;
+ int b;
+
+ /* Returns true if we assume that color is supported on /dev/console.
+ *
+ * For that we first check if we explicitly got told to use colors or not, by checking $SYSTEMD_COLORS. If that
+ * isn't set we check whether PID 1 has $TERM set, and if not, whether TERM is set on the kernel command
+ * line. If we find $TERM set we assume color if it's not set to "dumb", similarly to how regular
+ * colors_enabled() operates. */
+
+ b = getenv_bool("SYSTEMD_COLORS");
+ if (b >= 0)
+ return b;
+
+ if (getenv_for_pid(1, "TERM", &s) <= 0)
+ (void) proc_cmdline_get_key("TERM", 0, &s);
+
+ return !streq_ptr(s, "dumb");
+}
+
+bool underline_enabled(void) {
+
+ if (cached_underline_enabled < 0) {
+
+ /* The Linux console doesn't support underlining, turn it off, but only there. */
+
+ if (colors_enabled())
+ cached_underline_enabled = !streq_ptr(getenv("TERM"), "linux");
+ else
+ cached_underline_enabled = false;
+ }
+
+ return cached_underline_enabled;
+}
+
+int vt_default_utf8(void) {
+ _cleanup_free_ char *b = NULL;
+ int r;
+
+ /* Read the default VT UTF8 setting from the kernel */
+
+ r = read_one_line_file("/sys/module/vt/parameters/default_utf8", &b);
+ if (r < 0)
+ return r;
+
+ return parse_boolean(b);
+}
+
+int vt_reset_keyboard(int fd) {
+ int kb;
+
+ /* If we can't read the default, then default to unicode. It's 2017 after all. */
+ kb = vt_default_utf8() != 0 ? K_UNICODE : K_XLATE;
+
+ if (ioctl(fd, KDSKBMODE, kb) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int vt_restore(int fd) {
+ static const struct vt_mode mode = {
+ .mode = VT_AUTO,
+ };
+ int r, q = 0;
+
+ r = ioctl(fd, KDSETMODE, KD_TEXT);
+ if (r < 0)
+ q = log_debug_errno(errno, "Failed to set VT in text mode, ignoring: %m");
+
+ r = vt_reset_keyboard(fd);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to reset keyboard mode, ignoring: %m");
+ if (q >= 0)
+ q = r;
+ }
+
+ r = ioctl(fd, VT_SETMODE, &mode);
+ if (r < 0) {
+ log_debug_errno(errno, "Failed to set VT_AUTO mode, ignoring: %m");
+ if (q >= 0)
+ q = -errno;
+ }
+
+ r = fchown(fd, 0, (gid_t) -1);
+ if (r < 0) {
+ log_debug_errno(errno, "Failed to chown VT, ignoring: %m");
+ if (q >= 0)
+ q = -errno;
+ }
+
+ return q;
+}
+
+int vt_release(int fd, bool restore) {
+ assert(fd >= 0);
+
+ /* This function releases the VT by acknowledging the VT-switch signal
+ * sent by the kernel and optionally reset the VT in text and auto
+ * VT-switching modes. */
+
+ if (ioctl(fd, VT_RELDISP, 1) < 0)
+ return -errno;
+
+ if (restore)
+ return vt_restore(fd);
+
+ return 0;
+}
diff --git a/src/basic/terminal-util.h b/src/basic/terminal-util.h
new file mode 100644
index 0000000..c885e0a
--- /dev/null
+++ b/src/basic/terminal-util.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "time-util.h"
+
+/* Regular colors */
+#define ANSI_BLACK "\x1B[0;30m"
+#define ANSI_RED "\x1B[0;31m"
+#define ANSI_GREEN "\x1B[0;32m"
+#define ANSI_YELLOW "\x1B[0;33m"
+#define ANSI_BLUE "\x1B[0;34m"
+#define ANSI_MAGENTA "\x1B[0;35m"
+#define ANSI_CYAN "\x1B[0;36m"
+#define ANSI_WHITE "\x1B[0;37m"
+#define ANSI_GREY "\x1B[0;2;37m"
+
+/* Bold/highlighted */
+#define ANSI_HIGHLIGHT_BLACK "\x1B[0;1;30m"
+#define ANSI_HIGHLIGHT_RED "\x1B[0;1;31m"
+#define ANSI_HIGHLIGHT_GREEN "\x1B[0;1;32m"
+#define ANSI_HIGHLIGHT_YELLOW "\x1B[0;1;33m"
+#define ANSI_HIGHLIGHT_BLUE "\x1B[0;1;34m"
+#define ANSI_HIGHLIGHT_MAGENTA "\x1B[0;1;35m"
+#define ANSI_HIGHLIGHT_CYAN "\x1B[0;1;36m"
+#define ANSI_HIGHLIGHT_WHITE "\x1B[0;1;37m"
+
+/* Underlined */
+#define ANSI_HIGHLIGHT_BLACK_UNDERLINE "\x1B[0;1;4;30m"
+#define ANSI_HIGHLIGHT_RED_UNDERLINE "\x1B[0;1;4;31m"
+#define ANSI_HIGHLIGHT_GREEN_UNDERLINE "\x1B[0;1;4;32m"
+#define ANSI_HIGHLIGHT_YELLOW_UNDERLINE "\x1B[0;1;4;33m"
+#define ANSI_HIGHLIGHT_BLUE_UNDERLINE "\x1B[0;1;4;34m"
+#define ANSI_HIGHLIGHT_MAGENTA_UNDERLINE "\x1B[0;1;4;35m"
+#define ANSI_HIGHLIGHT_CYAN_UNDERLINE "\x1B[0;1;4;36m"
+#define ANSI_HIGHLIGHT_WHITE_UNDERLINE "\x1B[0;1;4;37m"
+
+/* Other ANSI codes */
+#define ANSI_UNDERLINE "\x1B[0;4m"
+#define ANSI_HIGHLIGHT "\x1B[0;1;39m"
+#define ANSI_HIGHLIGHT_UNDERLINE "\x1B[0;1;4m"
+
+/* Reset/clear ANSI styles */
+#define ANSI_NORMAL "\x1B[0m"
+
+/* Erase characters until the end of the line */
+#define ANSI_ERASE_TO_END_OF_LINE "\x1B[K"
+
+/* Move cursor up one line */
+#define ANSI_REVERSE_LINEFEED "\x1BM"
+
+/* Set cursor to top left corner and clear screen */
+#define ANSI_HOME_CLEAR "\x1B[H\x1B[2J"
+
+int reset_terminal_fd(int fd, bool switch_to_text);
+int reset_terminal(const char *name);
+
+int open_terminal(const char *name, int mode);
+
+/* Flags for tweaking the way we become the controlling process of a terminal. */
+typedef enum AcquireTerminalFlags {
+ /* Try to become the controlling process of the TTY. If we can't return -EPERM. */
+ ACQUIRE_TERMINAL_TRY = 0,
+
+ /* Tell the kernel to forcibly make us the controlling process of the TTY. Returns -EPERM if the kernel doesn't allow that. */
+ ACQUIRE_TERMINAL_FORCE = 1,
+
+ /* If we can't become the controlling process of the TTY right-away, then wait until we can. */
+ ACQUIRE_TERMINAL_WAIT = 2,
+
+ /* Pick one of the above, and then OR this flag in, in order to request permissive behaviour, if we can't become controlling process then don't mind */
+ ACQUIRE_TERMINAL_PERMISSIVE = 1 << 2,
+} AcquireTerminalFlags;
+
+int acquire_terminal(const char *name, AcquireTerminalFlags flags, usec_t timeout);
+int release_terminal(void);
+
+int terminal_vhangup_fd(int fd);
+int terminal_vhangup(const char *name);
+
+int chvt(int vt);
+
+int read_one_char(FILE *f, char *ret, usec_t timeout, bool *need_nl);
+int ask_char(char *ret, const char *replies, const char *text, ...) _printf_(3, 4);
+int ask_string(char **ret, const char *text, ...) _printf_(2, 3);
+
+int vt_disallocate(const char *name);
+
+int resolve_dev_console(char **ret);
+int get_kernel_consoles(char ***ret);
+bool tty_is_vc(const char *tty);
+bool tty_is_vc_resolve(const char *tty);
+bool tty_is_console(const char *tty) _pure_;
+int vtnr_from_tty(const char *tty);
+const char *default_term_for_tty(const char *tty);
+
+int make_console_stdio(void);
+
+int fd_columns(int fd);
+unsigned columns(void);
+int fd_lines(int fd);
+unsigned lines(void);
+
+void columns_lines_cache_reset(int _unused_ signum);
+void reset_terminal_feature_caches(void);
+
+bool on_tty(void);
+bool terminal_is_dumb(void);
+bool colors_enabled(void);
+bool underline_enabled(void);
+bool dev_console_colors_enabled(void);
+
+#define DEFINE_ANSI_FUNC(name, NAME) \
+ static inline const char *ansi_##name(void) { \
+ return colors_enabled() ? ANSI_##NAME : ""; \
+ }
+
+#define DEFINE_ANSI_FUNC_UNDERLINE(name, NAME, REPLACEMENT) \
+ static inline const char *ansi_##name(void) { \
+ return underline_enabled() ? ANSI_##NAME : \
+ colors_enabled() ? ANSI_##REPLACEMENT : ""; \
+ }
+
+DEFINE_ANSI_FUNC(highlight, HIGHLIGHT);
+DEFINE_ANSI_FUNC(highlight_red, HIGHLIGHT_RED);
+DEFINE_ANSI_FUNC(highlight_green, HIGHLIGHT_GREEN);
+DEFINE_ANSI_FUNC(highlight_yellow, HIGHLIGHT_YELLOW);
+DEFINE_ANSI_FUNC(highlight_blue, HIGHLIGHT_BLUE);
+DEFINE_ANSI_FUNC(highlight_magenta, HIGHLIGHT_MAGENTA);
+DEFINE_ANSI_FUNC(normal, NORMAL);
+DEFINE_ANSI_FUNC(grey, GREY);
+
+DEFINE_ANSI_FUNC_UNDERLINE(underline, UNDERLINE, NORMAL);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_underline, HIGHLIGHT_UNDERLINE, HIGHLIGHT);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_red_underline, HIGHLIGHT_RED_UNDERLINE, HIGHLIGHT_RED);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_green_underline, HIGHLIGHT_GREEN_UNDERLINE, HIGHLIGHT_GREEN);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_yellow_underline, HIGHLIGHT_YELLOW_UNDERLINE, HIGHLIGHT_YELLOW);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_blue_underline, HIGHLIGHT_BLUE_UNDERLINE, HIGHLIGHT_BLUE);
+
+int get_ctty_devnr(pid_t pid, dev_t *d);
+int get_ctty(pid_t, dev_t *_devnr, char **r);
+
+int getttyname_malloc(int fd, char **r);
+int getttyname_harder(int fd, char **r);
+
+int ptsname_malloc(int fd, char **ret);
+int ptsname_namespace(int pty, char **ret);
+
+int openpt_in_namespace(pid_t pid, int flags);
+int open_terminal_in_namespace(pid_t pid, const char *name, int mode);
+
+int vt_default_utf8(void);
+int vt_reset_keyboard(int fd);
+int vt_restore(int fd);
+int vt_release(int fd, bool restore_vt);
diff --git a/src/basic/time-util.c b/src/basic/time-util.c
new file mode 100644
index 0000000..62cdc30
--- /dev/null
+++ b/src/basic/time-util.c
@@ -0,0 +1,1471 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/timerfd.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_timerfd.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+static clockid_t map_clock_id(clockid_t c) {
+
+ /* Some more exotic archs (s390, ppc, …) lack the "ALARM" flavour of the clocks. Thus, clock_gettime() will
+ * fail for them. Since they are essentially the same as their non-ALARM pendants (their only difference is
+ * when timers are set on them), let's just map them accordingly. This way, we can get the correct time even on
+ * those archs. */
+
+ switch (c) {
+
+ case CLOCK_BOOTTIME_ALARM:
+ return CLOCK_BOOTTIME;
+
+ case CLOCK_REALTIME_ALARM:
+ return CLOCK_REALTIME;
+
+ default:
+ return c;
+ }
+}
+
+usec_t now(clockid_t clock_id) {
+ struct timespec ts;
+
+ assert_se(clock_gettime(map_clock_id(clock_id), &ts) == 0);
+
+ return timespec_load(&ts);
+}
+
+nsec_t now_nsec(clockid_t clock_id) {
+ struct timespec ts;
+
+ assert_se(clock_gettime(map_clock_id(clock_id), &ts) == 0);
+
+ return timespec_load_nsec(&ts);
+}
+
+dual_timestamp* dual_timestamp_get(dual_timestamp *ts) {
+ assert(ts);
+
+ ts->realtime = now(CLOCK_REALTIME);
+ ts->monotonic = now(CLOCK_MONOTONIC);
+
+ return ts;
+}
+
+triple_timestamp* triple_timestamp_get(triple_timestamp *ts) {
+ assert(ts);
+
+ ts->realtime = now(CLOCK_REALTIME);
+ ts->monotonic = now(CLOCK_MONOTONIC);
+ ts->boottime = clock_boottime_supported() ? now(CLOCK_BOOTTIME) : USEC_INFINITY;
+
+ return ts;
+}
+
+dual_timestamp* dual_timestamp_from_realtime(dual_timestamp *ts, usec_t u) {
+ int64_t delta;
+ assert(ts);
+
+ if (u == USEC_INFINITY || u <= 0) {
+ ts->realtime = ts->monotonic = u;
+ return ts;
+ }
+
+ ts->realtime = u;
+
+ delta = (int64_t) now(CLOCK_REALTIME) - (int64_t) u;
+ ts->monotonic = usec_sub_signed(now(CLOCK_MONOTONIC), delta);
+
+ return ts;
+}
+
+triple_timestamp* triple_timestamp_from_realtime(triple_timestamp *ts, usec_t u) {
+ int64_t delta;
+
+ assert(ts);
+
+ if (u == USEC_INFINITY || u <= 0) {
+ ts->realtime = ts->monotonic = ts->boottime = u;
+ return ts;
+ }
+
+ ts->realtime = u;
+ delta = (int64_t) now(CLOCK_REALTIME) - (int64_t) u;
+ ts->monotonic = usec_sub_signed(now(CLOCK_MONOTONIC), delta);
+ ts->boottime = clock_boottime_supported() ? usec_sub_signed(now(CLOCK_BOOTTIME), delta) : USEC_INFINITY;
+
+ return ts;
+}
+
+dual_timestamp* dual_timestamp_from_monotonic(dual_timestamp *ts, usec_t u) {
+ int64_t delta;
+ assert(ts);
+
+ if (u == USEC_INFINITY) {
+ ts->realtime = ts->monotonic = USEC_INFINITY;
+ return ts;
+ }
+
+ ts->monotonic = u;
+ delta = (int64_t) now(CLOCK_MONOTONIC) - (int64_t) u;
+ ts->realtime = usec_sub_signed(now(CLOCK_REALTIME), delta);
+
+ return ts;
+}
+
+dual_timestamp* dual_timestamp_from_boottime_or_monotonic(dual_timestamp *ts, usec_t u) {
+ int64_t delta;
+
+ if (u == USEC_INFINITY) {
+ ts->realtime = ts->monotonic = USEC_INFINITY;
+ return ts;
+ }
+
+ dual_timestamp_get(ts);
+ delta = (int64_t) now(clock_boottime_or_monotonic()) - (int64_t) u;
+ ts->realtime = usec_sub_signed(ts->realtime, delta);
+ ts->monotonic = usec_sub_signed(ts->monotonic, delta);
+
+ return ts;
+}
+
+usec_t triple_timestamp_by_clock(triple_timestamp *ts, clockid_t clock) {
+
+ switch (clock) {
+
+ case CLOCK_REALTIME:
+ case CLOCK_REALTIME_ALARM:
+ return ts->realtime;
+
+ case CLOCK_MONOTONIC:
+ return ts->monotonic;
+
+ case CLOCK_BOOTTIME:
+ case CLOCK_BOOTTIME_ALARM:
+ return ts->boottime;
+
+ default:
+ return USEC_INFINITY;
+ }
+}
+
+usec_t timespec_load(const struct timespec *ts) {
+ assert(ts);
+
+ if (ts->tv_sec < 0 || ts->tv_nsec < 0)
+ return USEC_INFINITY;
+
+ if ((usec_t) ts->tv_sec > (UINT64_MAX - (ts->tv_nsec / NSEC_PER_USEC)) / USEC_PER_SEC)
+ return USEC_INFINITY;
+
+ return
+ (usec_t) ts->tv_sec * USEC_PER_SEC +
+ (usec_t) ts->tv_nsec / NSEC_PER_USEC;
+}
+
+nsec_t timespec_load_nsec(const struct timespec *ts) {
+ assert(ts);
+
+ if (ts->tv_sec < 0 || ts->tv_nsec < 0)
+ return NSEC_INFINITY;
+
+ if ((nsec_t) ts->tv_sec >= (UINT64_MAX - ts->tv_nsec) / NSEC_PER_SEC)
+ return NSEC_INFINITY;
+
+ return (nsec_t) ts->tv_sec * NSEC_PER_SEC + (nsec_t) ts->tv_nsec;
+}
+
+struct timespec *timespec_store(struct timespec *ts, usec_t u) {
+ assert(ts);
+
+ if (u == USEC_INFINITY ||
+ u / USEC_PER_SEC >= TIME_T_MAX) {
+ ts->tv_sec = (time_t) -1;
+ ts->tv_nsec = (long) -1;
+ return ts;
+ }
+
+ ts->tv_sec = (time_t) (u / USEC_PER_SEC);
+ ts->tv_nsec = (long int) ((u % USEC_PER_SEC) * NSEC_PER_USEC);
+
+ return ts;
+}
+
+usec_t timeval_load(const struct timeval *tv) {
+ assert(tv);
+
+ if (tv->tv_sec < 0 || tv->tv_usec < 0)
+ return USEC_INFINITY;
+
+ if ((usec_t) tv->tv_sec > (UINT64_MAX - tv->tv_usec) / USEC_PER_SEC)
+ return USEC_INFINITY;
+
+ return
+ (usec_t) tv->tv_sec * USEC_PER_SEC +
+ (usec_t) tv->tv_usec;
+}
+
+struct timeval *timeval_store(struct timeval *tv, usec_t u) {
+ assert(tv);
+
+ if (u == USEC_INFINITY ||
+ u / USEC_PER_SEC > TIME_T_MAX) {
+ tv->tv_sec = (time_t) -1;
+ tv->tv_usec = (suseconds_t) -1;
+ } else {
+ tv->tv_sec = (time_t) (u / USEC_PER_SEC);
+ tv->tv_usec = (suseconds_t) (u % USEC_PER_SEC);
+ }
+
+ return tv;
+}
+
+static char *format_timestamp_internal(
+ char *buf,
+ size_t l,
+ usec_t t,
+ bool utc,
+ bool us) {
+
+ /* The weekdays in non-localized (English) form. We use this instead of the localized form, so that our
+ * generated timestamps may be parsed with parse_timestamp(), and always read the same. */
+ static const char * const weekdays[] = {
+ [0] = "Sun",
+ [1] = "Mon",
+ [2] = "Tue",
+ [3] = "Wed",
+ [4] = "Thu",
+ [5] = "Fri",
+ [6] = "Sat",
+ };
+
+ struct tm tm;
+ time_t sec;
+ size_t n;
+
+ assert(buf);
+
+ if (l <
+ 3 + /* week day */
+ 1 + 10 + /* space and date */
+ 1 + 8 + /* space and time */
+ (us ? 1 + 6 : 0) + /* "." and microsecond part */
+ 1 + 1 + /* space and shortest possible zone */
+ 1)
+ return NULL; /* Not enough space even for the shortest form. */
+ if (t <= 0 || t == USEC_INFINITY)
+ return NULL; /* Timestamp is unset */
+
+ /* Let's not format times with years > 9999 */
+ if (t > USEC_TIMESTAMP_FORMATTABLE_MAX) {
+ assert(l >= STRLEN("--- XXXX-XX-XX XX:XX:XX") + 1);
+ strcpy(buf, "--- XXXX-XX-XX XX:XX:XX");
+ return buf;
+ }
+
+ sec = (time_t) (t / USEC_PER_SEC); /* Round down */
+
+ if (!localtime_or_gmtime_r(&sec, &tm, utc))
+ return NULL;
+
+ /* Start with the week day */
+ assert((size_t) tm.tm_wday < ELEMENTSOF(weekdays));
+ memcpy(buf, weekdays[tm.tm_wday], 4);
+
+ /* Add the main components */
+ if (strftime(buf + 3, l - 3, " %Y-%m-%d %H:%M:%S", &tm) <= 0)
+ return NULL; /* Doesn't fit */
+
+ /* Append the microseconds part, if that's requested */
+ if (us) {
+ n = strlen(buf);
+ if (n + 8 > l)
+ return NULL; /* Microseconds part doesn't fit. */
+
+ sprintf(buf + n, ".%06"PRI_USEC, t % USEC_PER_SEC);
+ }
+
+ /* Append the timezone */
+ n = strlen(buf);
+ if (utc) {
+ /* If this is UTC then let's explicitly use the "UTC" string here, because gmtime_r() normally uses the
+ * obsolete "GMT" instead. */
+ if (n + 5 > l)
+ return NULL; /* "UTC" doesn't fit. */
+
+ strcpy(buf + n, " UTC");
+
+ } else if (!isempty(tm.tm_zone)) {
+ size_t tn;
+
+ /* An explicit timezone is specified, let's use it, if it fits */
+ tn = strlen(tm.tm_zone);
+ if (n + 1 + tn + 1 > l) {
+ /* The full time zone does not fit in. Yuck. */
+
+ if (n + 1 + _POSIX_TZNAME_MAX + 1 > l)
+ return NULL; /* Not even enough space for the POSIX minimum (of 6)? In that case, complain that it doesn't fit */
+
+ /* So the time zone doesn't fit in fully, but the caller passed enough space for the POSIX
+ * minimum time zone length. In this case suppress the timezone entirely, in order not to dump
+ * an overly long, hard to read string on the user. This should be safe, because the user will
+ * assume the local timezone anyway if none is shown. And so does parse_timestamp(). */
+ } else {
+ buf[n++] = ' ';
+ strcpy(buf + n, tm.tm_zone);
+ }
+ }
+
+ return buf;
+}
+
+char *format_timestamp(char *buf, size_t l, usec_t t) {
+ return format_timestamp_internal(buf, l, t, false, false);
+}
+
+char *format_timestamp_utc(char *buf, size_t l, usec_t t) {
+ return format_timestamp_internal(buf, l, t, true, false);
+}
+
+char *format_timestamp_us(char *buf, size_t l, usec_t t) {
+ return format_timestamp_internal(buf, l, t, false, true);
+}
+
+char *format_timestamp_us_utc(char *buf, size_t l, usec_t t) {
+ return format_timestamp_internal(buf, l, t, true, true);
+}
+
+char *format_timestamp_relative(char *buf, size_t l, usec_t t) {
+ const char *s;
+ usec_t n, d;
+
+ if (t <= 0 || t == USEC_INFINITY)
+ return NULL;
+
+ n = now(CLOCK_REALTIME);
+ if (n > t) {
+ d = n - t;
+ s = "ago";
+ } else {
+ d = t - n;
+ s = "left";
+ }
+
+ if (d >= USEC_PER_YEAR)
+ snprintf(buf, l, USEC_FMT " years " USEC_FMT " months %s",
+ d / USEC_PER_YEAR,
+ (d % USEC_PER_YEAR) / USEC_PER_MONTH, s);
+ else if (d >= USEC_PER_MONTH)
+ snprintf(buf, l, USEC_FMT " months " USEC_FMT " days %s",
+ d / USEC_PER_MONTH,
+ (d % USEC_PER_MONTH) / USEC_PER_DAY, s);
+ else if (d >= USEC_PER_WEEK)
+ snprintf(buf, l, USEC_FMT " weeks " USEC_FMT " days %s",
+ d / USEC_PER_WEEK,
+ (d % USEC_PER_WEEK) / USEC_PER_DAY, s);
+ else if (d >= 2*USEC_PER_DAY)
+ snprintf(buf, l, USEC_FMT " days %s", d / USEC_PER_DAY, s);
+ else if (d >= 25*USEC_PER_HOUR)
+ snprintf(buf, l, "1 day " USEC_FMT "h %s",
+ (d - USEC_PER_DAY) / USEC_PER_HOUR, s);
+ else if (d >= 6*USEC_PER_HOUR)
+ snprintf(buf, l, USEC_FMT "h %s",
+ d / USEC_PER_HOUR, s);
+ else if (d >= USEC_PER_HOUR)
+ snprintf(buf, l, USEC_FMT "h " USEC_FMT "min %s",
+ d / USEC_PER_HOUR,
+ (d % USEC_PER_HOUR) / USEC_PER_MINUTE, s);
+ else if (d >= 5*USEC_PER_MINUTE)
+ snprintf(buf, l, USEC_FMT "min %s",
+ d / USEC_PER_MINUTE, s);
+ else if (d >= USEC_PER_MINUTE)
+ snprintf(buf, l, USEC_FMT "min " USEC_FMT "s %s",
+ d / USEC_PER_MINUTE,
+ (d % USEC_PER_MINUTE) / USEC_PER_SEC, s);
+ else if (d >= USEC_PER_SEC)
+ snprintf(buf, l, USEC_FMT "s %s",
+ d / USEC_PER_SEC, s);
+ else if (d >= USEC_PER_MSEC)
+ snprintf(buf, l, USEC_FMT "ms %s",
+ d / USEC_PER_MSEC, s);
+ else if (d > 0)
+ snprintf(buf, l, USEC_FMT"us %s",
+ d, s);
+ else
+ snprintf(buf, l, "now");
+
+ buf[l-1] = 0;
+ return buf;
+}
+
+char *format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy) {
+ static const struct {
+ const char *suffix;
+ usec_t usec;
+ } table[] = {
+ { "y", USEC_PER_YEAR },
+ { "month", USEC_PER_MONTH },
+ { "w", USEC_PER_WEEK },
+ { "d", USEC_PER_DAY },
+ { "h", USEC_PER_HOUR },
+ { "min", USEC_PER_MINUTE },
+ { "s", USEC_PER_SEC },
+ { "ms", USEC_PER_MSEC },
+ { "us", 1 },
+ };
+
+ size_t i;
+ char *p = buf;
+ bool something = false;
+
+ assert(buf);
+ assert(l > 0);
+
+ if (t == USEC_INFINITY) {
+ strncpy(p, "infinity", l-1);
+ p[l-1] = 0;
+ return p;
+ }
+
+ if (t <= 0) {
+ strncpy(p, "0", l-1);
+ p[l-1] = 0;
+ return p;
+ }
+
+ /* The result of this function can be parsed with parse_sec */
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+ int k = 0;
+ size_t n;
+ bool done = false;
+ usec_t a, b;
+
+ if (t <= 0)
+ break;
+
+ if (t < accuracy && something)
+ break;
+
+ if (t < table[i].usec)
+ continue;
+
+ if (l <= 1)
+ break;
+
+ a = t / table[i].usec;
+ b = t % table[i].usec;
+
+ /* Let's see if we should shows this in dot notation */
+ if (t < USEC_PER_MINUTE && b > 0) {
+ usec_t cc;
+ signed char j;
+
+ j = 0;
+ for (cc = table[i].usec; cc > 1; cc /= 10)
+ j++;
+
+ for (cc = accuracy; cc > 1; cc /= 10) {
+ b /= 10;
+ j--;
+ }
+
+ if (j > 0) {
+ k = snprintf(p, l,
+ "%s"USEC_FMT".%0*"PRI_USEC"%s",
+ p > buf ? " " : "",
+ a,
+ j,
+ b,
+ table[i].suffix);
+
+ t = 0;
+ done = true;
+ }
+ }
+
+ /* No? Then let's show it normally */
+ if (!done) {
+ k = snprintf(p, l,
+ "%s"USEC_FMT"%s",
+ p > buf ? " " : "",
+ a,
+ table[i].suffix);
+
+ t = b;
+ }
+
+ n = MIN((size_t) k, l);
+
+ l -= n;
+ p += n;
+
+ something = true;
+ }
+
+ *p = 0;
+
+ return buf;
+}
+
+static int parse_timestamp_impl(const char *t, usec_t *usec, bool with_tz) {
+ static const struct {
+ const char *name;
+ const int nr;
+ } day_nr[] = {
+ { "Sunday", 0 },
+ { "Sun", 0 },
+ { "Monday", 1 },
+ { "Mon", 1 },
+ { "Tuesday", 2 },
+ { "Tue", 2 },
+ { "Wednesday", 3 },
+ { "Wed", 3 },
+ { "Thursday", 4 },
+ { "Thu", 4 },
+ { "Friday", 5 },
+ { "Fri", 5 },
+ { "Saturday", 6 },
+ { "Sat", 6 },
+ };
+
+ const char *k, *utc = NULL, *tzn = NULL;
+ struct tm tm, copy;
+ time_t x;
+ usec_t x_usec, plus = 0, minus = 0, ret;
+ int r, weekday = -1, dst = -1;
+ size_t i;
+
+ /* Allowed syntaxes:
+ *
+ * 2012-09-22 16:34:22
+ * 2012-09-22 16:34 (seconds will be set to 0)
+ * 2012-09-22 (time will be set to 00:00:00)
+ * 16:34:22 (date will be set to today)
+ * 16:34 (date will be set to today, seconds to 0)
+ * now
+ * yesterday (time is set to 00:00:00)
+ * today (time is set to 00:00:00)
+ * tomorrow (time is set to 00:00:00)
+ * +5min
+ * -5days
+ * @2147483647 (seconds since epoch)
+ */
+
+ assert(t);
+ assert(usec);
+
+ if (t[0] == '@' && !with_tz)
+ return parse_sec(t + 1, usec);
+
+ ret = now(CLOCK_REALTIME);
+
+ if (!with_tz) {
+ if (streq(t, "now"))
+ goto finish;
+
+ else if (t[0] == '+') {
+ r = parse_sec(t+1, &plus);
+ if (r < 0)
+ return r;
+
+ goto finish;
+
+ } else if (t[0] == '-') {
+ r = parse_sec(t+1, &minus);
+ if (r < 0)
+ return r;
+
+ goto finish;
+
+ } else if ((k = endswith(t, " ago"))) {
+ t = strndupa(t, k - t);
+
+ r = parse_sec(t, &minus);
+ if (r < 0)
+ return r;
+
+ goto finish;
+
+ } else if ((k = endswith(t, " left"))) {
+ t = strndupa(t, k - t);
+
+ r = parse_sec(t, &plus);
+ if (r < 0)
+ return r;
+
+ goto finish;
+ }
+
+ /* See if the timestamp is suffixed with UTC */
+ utc = endswith_no_case(t, " UTC");
+ if (utc)
+ t = strndupa(t, utc - t);
+ else {
+ const char *e = NULL;
+ int j;
+
+ tzset();
+
+ /* See if the timestamp is suffixed by either the DST or non-DST local timezone. Note that we only
+ * support the local timezones here, nothing else. Not because we wouldn't want to, but simply because
+ * there are no nice APIs available to cover this. By accepting the local time zone strings, we make
+ * sure that all timestamps written by format_timestamp() can be parsed correctly, even though we don't
+ * support arbitrary timezone specifications. */
+
+ for (j = 0; j <= 1; j++) {
+
+ if (isempty(tzname[j]))
+ continue;
+
+ e = endswith_no_case(t, tzname[j]);
+ if (!e)
+ continue;
+ if (e == t)
+ continue;
+ if (e[-1] != ' ')
+ continue;
+
+ break;
+ }
+
+ if (IN_SET(j, 0, 1)) {
+ /* Found one of the two timezones specified. */
+ t = strndupa(t, e - t - 1);
+ dst = j;
+ tzn = tzname[j];
+ }
+ }
+ }
+
+ x = (time_t) (ret / USEC_PER_SEC);
+ x_usec = 0;
+
+ if (!localtime_or_gmtime_r(&x, &tm, utc))
+ return -EINVAL;
+
+ tm.tm_isdst = dst;
+ if (!with_tz && tzn)
+ tm.tm_zone = tzn;
+
+ if (streq(t, "today")) {
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+
+ } else if (streq(t, "yesterday")) {
+ tm.tm_mday--;
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+
+ } else if (streq(t, "tomorrow")) {
+ tm.tm_mday++;
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+ }
+
+ for (i = 0; i < ELEMENTSOF(day_nr); i++) {
+ size_t skip;
+
+ if (!startswith_no_case(t, day_nr[i].name))
+ continue;
+
+ skip = strlen(day_nr[i].name);
+ if (t[skip] != ' ')
+ continue;
+
+ weekday = day_nr[i].nr;
+ t += skip + 1;
+ break;
+ }
+
+ copy = tm;
+ k = strptime(t, "%y-%m-%d %H:%M:%S", &tm);
+ if (k) {
+ if (*k == '.')
+ goto parse_usec;
+ else if (*k == 0)
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%Y-%m-%d %H:%M:%S", &tm);
+ if (k) {
+ if (*k == '.')
+ goto parse_usec;
+ else if (*k == 0)
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%y-%m-%d %H:%M", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = 0;
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%Y-%m-%d %H:%M", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = 0;
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%y-%m-%d", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%Y-%m-%d", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%H:%M:%S", &tm);
+ if (k) {
+ if (*k == '.')
+ goto parse_usec;
+ else if (*k == 0)
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%H:%M", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = 0;
+ goto from_tm;
+ }
+
+ return -EINVAL;
+
+parse_usec:
+ {
+ unsigned add;
+
+ k++;
+ r = parse_fractional_part_u(&k, 6, &add);
+ if (r < 0)
+ return -EINVAL;
+
+ if (*k)
+ return -EINVAL;
+
+ x_usec = add;
+ }
+
+from_tm:
+ if (weekday >= 0 && tm.tm_wday != weekday)
+ return -EINVAL;
+
+ x = mktime_or_timegm(&tm, utc);
+ if (x < 0)
+ return -EINVAL;
+
+ ret = (usec_t) x * USEC_PER_SEC + x_usec;
+ if (ret > USEC_TIMESTAMP_FORMATTABLE_MAX)
+ return -EINVAL;
+
+finish:
+ if (ret + plus < ret) /* overflow? */
+ return -EINVAL;
+ ret += plus;
+ if (ret > USEC_TIMESTAMP_FORMATTABLE_MAX)
+ return -EINVAL;
+
+ if (ret >= minus)
+ ret -= minus;
+ else
+ return -EINVAL;
+
+ *usec = ret;
+
+ return 0;
+}
+
+typedef struct ParseTimestampResult {
+ usec_t usec;
+ int return_value;
+} ParseTimestampResult;
+
+int parse_timestamp(const char *t, usec_t *usec) {
+ char *last_space, *tz = NULL;
+ ParseTimestampResult *shared, tmp;
+ int r;
+
+ last_space = strrchr(t, ' ');
+ if (last_space != NULL && timezone_is_valid(last_space + 1, LOG_DEBUG))
+ tz = last_space + 1;
+
+ if (!tz || endswith_no_case(t, " UTC"))
+ return parse_timestamp_impl(t, usec, false);
+
+ shared = mmap(NULL, sizeof *shared, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+ if (shared == MAP_FAILED)
+ return negative_errno();
+
+ r = safe_fork("(sd-timestamp)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT, NULL);
+ if (r < 0) {
+ (void) munmap(shared, sizeof *shared);
+ return r;
+ }
+ if (r == 0) {
+ bool with_tz = true;
+
+ if (setenv("TZ", tz, 1) != 0) {
+ shared->return_value = negative_errno();
+ _exit(EXIT_FAILURE);
+ }
+
+ tzset();
+
+ /* If there is a timezone that matches the tzname fields, leave the parsing to the implementation.
+ * Otherwise just cut it off. */
+ with_tz = !STR_IN_SET(tz, tzname[0], tzname[1]);
+
+ /* Cut off the timezone if we don't need it. */
+ if (with_tz)
+ t = strndupa(t, last_space - t);
+
+ shared->return_value = parse_timestamp_impl(t, &shared->usec, with_tz);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ tmp = *shared;
+ if (munmap(shared, sizeof *shared) != 0)
+ return negative_errno();
+
+ if (tmp.return_value == 0)
+ *usec = tmp.usec;
+
+ return tmp.return_value;
+}
+
+static const char* extract_multiplier(const char *p, usec_t *multiplier) {
+ static const struct {
+ const char *suffix;
+ usec_t usec;
+ } table[] = {
+ { "seconds", USEC_PER_SEC },
+ { "second", USEC_PER_SEC },
+ { "sec", USEC_PER_SEC },
+ { "s", USEC_PER_SEC },
+ { "minutes", USEC_PER_MINUTE },
+ { "minute", USEC_PER_MINUTE },
+ { "min", USEC_PER_MINUTE },
+ { "months", USEC_PER_MONTH },
+ { "month", USEC_PER_MONTH },
+ { "M", USEC_PER_MONTH },
+ { "msec", USEC_PER_MSEC },
+ { "ms", USEC_PER_MSEC },
+ { "m", USEC_PER_MINUTE },
+ { "hours", USEC_PER_HOUR },
+ { "hour", USEC_PER_HOUR },
+ { "hr", USEC_PER_HOUR },
+ { "h", USEC_PER_HOUR },
+ { "days", USEC_PER_DAY },
+ { "day", USEC_PER_DAY },
+ { "d", USEC_PER_DAY },
+ { "weeks", USEC_PER_WEEK },
+ { "week", USEC_PER_WEEK },
+ { "w", USEC_PER_WEEK },
+ { "years", USEC_PER_YEAR },
+ { "year", USEC_PER_YEAR },
+ { "y", USEC_PER_YEAR },
+ { "usec", 1ULL },
+ { "us", 1ULL },
+ { "µs", 1ULL },
+ };
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+ char *e;
+
+ e = startswith(p, table[i].suffix);
+ if (e) {
+ *multiplier = table[i].usec;
+ return e;
+ }
+ }
+
+ return p;
+}
+
+int parse_time(const char *t, usec_t *usec, usec_t default_unit) {
+ const char *p, *s;
+ usec_t r = 0;
+ bool something = false;
+
+ assert(t);
+ assert(usec);
+ assert(default_unit > 0);
+
+ p = t;
+
+ p += strspn(p, WHITESPACE);
+ s = startswith(p, "infinity");
+ if (s) {
+ s += strspn(s, WHITESPACE);
+ if (*s != 0)
+ return -EINVAL;
+
+ *usec = USEC_INFINITY;
+ return 0;
+ }
+
+ for (;;) {
+ usec_t multiplier = default_unit, k;
+ long long l;
+ char *e;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p == 0) {
+ if (!something)
+ return -EINVAL;
+
+ break;
+ }
+
+ if (*p == '-') /* Don't allow "-0" */
+ return -ERANGE;
+
+ errno = 0;
+ l = strtoll(p, &e, 10);
+ if (errno > 0)
+ return -errno;
+ if (l < 0)
+ return -ERANGE;
+
+ if (*e == '.') {
+ p = e + 1;
+ p += strspn(p, DIGITS);
+ } else if (e == p)
+ return -EINVAL;
+ else
+ p = e;
+
+ s = extract_multiplier(p + strspn(p, WHITESPACE), &multiplier);
+ if (s == p && *s != '\0')
+ /* Don't allow '12.34.56', but accept '12.34 .56' or '12.34s.56'*/
+ return -EINVAL;
+
+ p = s;
+
+ if ((usec_t) l >= USEC_INFINITY / multiplier)
+ return -ERANGE;
+
+ k = (usec_t) l * multiplier;
+ if (k >= USEC_INFINITY - r)
+ return -ERANGE;
+
+ r += k;
+
+ something = true;
+
+ if (*e == '.') {
+ usec_t m = multiplier / 10;
+ const char *b;
+
+ for (b = e + 1; *b >= '0' && *b <= '9'; b++, m /= 10) {
+ k = (usec_t) (*b - '0') * m;
+ if (k >= USEC_INFINITY - r)
+ return -ERANGE;
+
+ r += k;
+ }
+
+ /* Don't allow "0.-0", "3.+1", "3. 1", "3.sec" or "3.hoge"*/
+ if (b == e + 1)
+ return -EINVAL;
+ }
+ }
+
+ *usec = r;
+
+ return 0;
+}
+
+int parse_sec(const char *t, usec_t *usec) {
+ return parse_time(t, usec, USEC_PER_SEC);
+}
+
+int parse_sec_fix_0(const char *t, usec_t *ret) {
+ usec_t k;
+ int r;
+
+ assert(t);
+ assert(ret);
+
+ r = parse_sec(t, &k);
+ if (r < 0)
+ return r;
+
+ *ret = k == 0 ? USEC_INFINITY : k;
+ return r;
+}
+
+static const char* extract_nsec_multiplier(const char *p, nsec_t *multiplier) {
+ static const struct {
+ const char *suffix;
+ nsec_t nsec;
+ } table[] = {
+ { "seconds", NSEC_PER_SEC },
+ { "second", NSEC_PER_SEC },
+ { "sec", NSEC_PER_SEC },
+ { "s", NSEC_PER_SEC },
+ { "minutes", NSEC_PER_MINUTE },
+ { "minute", NSEC_PER_MINUTE },
+ { "min", NSEC_PER_MINUTE },
+ { "months", NSEC_PER_MONTH },
+ { "month", NSEC_PER_MONTH },
+ { "M", NSEC_PER_MONTH },
+ { "msec", NSEC_PER_MSEC },
+ { "ms", NSEC_PER_MSEC },
+ { "m", NSEC_PER_MINUTE },
+ { "hours", NSEC_PER_HOUR },
+ { "hour", NSEC_PER_HOUR },
+ { "hr", NSEC_PER_HOUR },
+ { "h", NSEC_PER_HOUR },
+ { "days", NSEC_PER_DAY },
+ { "day", NSEC_PER_DAY },
+ { "d", NSEC_PER_DAY },
+ { "weeks", NSEC_PER_WEEK },
+ { "week", NSEC_PER_WEEK },
+ { "w", NSEC_PER_WEEK },
+ { "years", NSEC_PER_YEAR },
+ { "year", NSEC_PER_YEAR },
+ { "y", NSEC_PER_YEAR },
+ { "usec", NSEC_PER_USEC },
+ { "us", NSEC_PER_USEC },
+ { "µs", NSEC_PER_USEC },
+ { "nsec", 1ULL },
+ { "ns", 1ULL },
+ { "", 1ULL }, /* default is nsec */
+ };
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+ char *e;
+
+ e = startswith(p, table[i].suffix);
+ if (e) {
+ *multiplier = table[i].nsec;
+ return e;
+ }
+ }
+
+ return p;
+}
+
+int parse_nsec(const char *t, nsec_t *nsec) {
+ const char *p, *s;
+ nsec_t r = 0;
+ bool something = false;
+
+ assert(t);
+ assert(nsec);
+
+ p = t;
+
+ p += strspn(p, WHITESPACE);
+ s = startswith(p, "infinity");
+ if (s) {
+ s += strspn(s, WHITESPACE);
+ if (*s != 0)
+ return -EINVAL;
+
+ *nsec = NSEC_INFINITY;
+ return 0;
+ }
+
+ for (;;) {
+ nsec_t multiplier = 1, k;
+ long long l;
+ char *e;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p == 0) {
+ if (!something)
+ return -EINVAL;
+
+ break;
+ }
+
+ if (*p == '-') /* Don't allow "-0" */
+ return -ERANGE;
+
+ errno = 0;
+ l = strtoll(p, &e, 10);
+ if (errno > 0)
+ return -errno;
+ if (l < 0)
+ return -ERANGE;
+
+ if (*e == '.') {
+ p = e + 1;
+ p += strspn(p, DIGITS);
+ } else if (e == p)
+ return -EINVAL;
+ else
+ p = e;
+
+ s = extract_nsec_multiplier(p + strspn(p, WHITESPACE), &multiplier);
+ if (s == p && *s != '\0')
+ /* Don't allow '12.34.56', but accept '12.34 .56' or '12.34s.56'*/
+ return -EINVAL;
+
+ p = s;
+
+ if ((nsec_t) l >= NSEC_INFINITY / multiplier)
+ return -ERANGE;
+
+ k = (nsec_t) l * multiplier;
+ if (k >= NSEC_INFINITY - r)
+ return -ERANGE;
+
+ r += k;
+
+ something = true;
+
+ if (*e == '.') {
+ nsec_t m = multiplier / 10;
+ const char *b;
+
+ for (b = e + 1; *b >= '0' && *b <= '9'; b++, m /= 10) {
+ k = (nsec_t) (*b - '0') * m;
+ if (k >= NSEC_INFINITY - r)
+ return -ERANGE;
+
+ r += k;
+ }
+
+ /* Don't allow "0.-0", "3.+1", "3. 1", "3.sec" or "3.hoge"*/
+ if (b == e + 1)
+ return -EINVAL;
+ }
+ }
+
+ *nsec = r;
+
+ return 0;
+}
+
+bool ntp_synced(void) {
+ struct timex txc = {};
+
+ if (adjtimex(&txc) < 0)
+ return false;
+
+ if (txc.status & STA_UNSYNC)
+ return false;
+
+ return true;
+}
+
+int get_timezones(char ***ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **zones = NULL;
+ size_t n_zones = 0, n_allocated = 0;
+ int r;
+
+ assert(ret);
+
+ zones = strv_new("UTC");
+ if (!zones)
+ return -ENOMEM;
+
+ n_allocated = 2;
+ n_zones = 1;
+
+ f = fopen("/usr/share/zoneinfo/zone.tab", "re");
+ if (f) {
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *p, *w;
+ size_t k;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ p = strstrip(line);
+
+ if (isempty(p) || *p == '#')
+ continue;
+
+ /* Skip over country code */
+ p += strcspn(p, WHITESPACE);
+ p += strspn(p, WHITESPACE);
+
+ /* Skip over coordinates */
+ p += strcspn(p, WHITESPACE);
+ p += strspn(p, WHITESPACE);
+
+ /* Found timezone name */
+ k = strcspn(p, WHITESPACE);
+ if (k <= 0)
+ continue;
+
+ w = strndup(p, k);
+ if (!w)
+ return -ENOMEM;
+
+ if (!GREEDY_REALLOC(zones, n_allocated, n_zones + 2)) {
+ free(w);
+ return -ENOMEM;
+ }
+
+ zones[n_zones++] = w;
+ zones[n_zones] = NULL;
+ }
+
+ strv_sort(zones);
+
+ } else if (errno != ENOENT)
+ return -errno;
+
+ *ret = TAKE_PTR(zones);
+
+ return 0;
+}
+
+bool timezone_is_valid(const char *name, int log_level) {
+ bool slash = false;
+ const char *p, *t;
+ _cleanup_close_ int fd = -1;
+ char buf[4];
+ int r;
+
+ if (isempty(name))
+ return false;
+
+ if (name[0] == '/')
+ return false;
+
+ for (p = name; *p; p++) {
+ if (!(*p >= '0' && *p <= '9') &&
+ !(*p >= 'a' && *p <= 'z') &&
+ !(*p >= 'A' && *p <= 'Z') &&
+ !IN_SET(*p, '-', '_', '+', '/'))
+ return false;
+
+ if (*p == '/') {
+
+ if (slash)
+ return false;
+
+ slash = true;
+ } else
+ slash = false;
+ }
+
+ if (slash)
+ return false;
+
+ if (p - name >= PATH_MAX)
+ return false;
+
+ t = strjoina("/usr/share/zoneinfo/", name);
+
+ fd = open(t, O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ log_full_errno(log_level, errno, "Failed to open timezone file '%s': %m", t);
+ return false;
+ }
+
+ r = fd_verify_regular(fd);
+ if (r < 0) {
+ log_full_errno(log_level, r, "Timezone file '%s' is not a regular file: %m", t);
+ return false;
+ }
+
+ r = loop_read_exact(fd, buf, 4, false);
+ if (r < 0) {
+ log_full_errno(log_level, r, "Failed to read from timezone file '%s': %m", t);
+ return false;
+ }
+
+ /* Magic from tzfile(5) */
+ if (memcmp(buf, "TZif", 4) != 0) {
+ log_full(log_level, "Timezone file '%s' has wrong magic bytes", t);
+ return false;
+ }
+
+ return true;
+}
+
+bool clock_boottime_supported(void) {
+ static int supported = -1;
+
+ /* Note that this checks whether CLOCK_BOOTTIME is available in general as well as available for timerfds()! */
+
+ if (supported < 0) {
+ int fd;
+
+ fd = timerfd_create(CLOCK_BOOTTIME, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (fd < 0)
+ supported = false;
+ else {
+ safe_close(fd);
+ supported = true;
+ }
+ }
+
+ return supported;
+}
+
+clockid_t clock_boottime_or_monotonic(void) {
+ if (clock_boottime_supported())
+ return CLOCK_BOOTTIME;
+ else
+ return CLOCK_MONOTONIC;
+}
+
+bool clock_supported(clockid_t clock) {
+ struct timespec ts;
+
+ switch (clock) {
+
+ case CLOCK_MONOTONIC:
+ case CLOCK_REALTIME:
+ return true;
+
+ case CLOCK_BOOTTIME:
+ return clock_boottime_supported();
+
+ case CLOCK_BOOTTIME_ALARM:
+ if (!clock_boottime_supported())
+ return false;
+
+ _fallthrough_;
+ default:
+ /* For everything else, check properly */
+ return clock_gettime(clock, &ts) >= 0;
+ }
+}
+
+int get_timezone(char **tz) {
+ _cleanup_free_ char *t = NULL;
+ const char *e;
+ char *z;
+ int r;
+
+ r = readlink_malloc("/etc/localtime", &t);
+ if (r < 0)
+ return r; /* returns EINVAL if not a symlink */
+
+ e = PATH_STARTSWITH_SET(t, "/usr/share/zoneinfo/", "../usr/share/zoneinfo/");
+ if (!e)
+ return -EINVAL;
+
+ if (!timezone_is_valid(e, LOG_DEBUG))
+ return -EINVAL;
+
+ z = strdup(e);
+ if (!z)
+ return -ENOMEM;
+
+ *tz = z;
+ return 0;
+}
+
+time_t mktime_or_timegm(struct tm *tm, bool utc) {
+ return utc ? timegm(tm) : mktime(tm);
+}
+
+struct tm *localtime_or_gmtime_r(const time_t *t, struct tm *tm, bool utc) {
+ return utc ? gmtime_r(t, tm) : localtime_r(t, tm);
+}
+
+unsigned long usec_to_jiffies(usec_t u) {
+ static thread_local unsigned long hz = 0;
+ long r;
+
+ if (hz == 0) {
+ r = sysconf(_SC_CLK_TCK);
+
+ assert(r > 0);
+ hz = r;
+ }
+
+ return DIV_ROUND_UP(u , USEC_PER_SEC / hz);
+}
+
+usec_t usec_shift_clock(usec_t x, clockid_t from, clockid_t to) {
+ usec_t a, b;
+
+ if (x == USEC_INFINITY)
+ return USEC_INFINITY;
+ if (map_clock_id(from) == map_clock_id(to))
+ return x;
+
+ a = now(from);
+ b = now(to);
+
+ if (x > a)
+ /* x lies in the future */
+ return usec_add(b, usec_sub_unsigned(x, a));
+ else
+ /* x lies in the past */
+ return usec_sub_unsigned(b, usec_sub_unsigned(a, x));
+}
+
+bool in_utc_timezone(void) {
+ tzset();
+
+ return timezone == 0 && daylight == 0;
+}
+
+int time_change_fd(void) {
+
+ /* We only care for the cancellation event, hence we set the timeout to the latest possible value. */
+ static const struct itimerspec its = {
+ .it_value.tv_sec = TIME_T_MAX,
+ };
+
+ _cleanup_close_ int fd;
+
+ assert_cc(sizeof(time_t) == sizeof(TIME_T_MAX));
+
+ /* Uses TFD_TIMER_CANCEL_ON_SET to get notifications whenever CLOCK_REALTIME makes a jump relative to
+ * CLOCK_MONOTONIC. */
+
+ fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (timerfd_settime(fd, TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET, &its, NULL) < 0)
+ return -errno;
+
+ return TAKE_FD(fd);
+}
diff --git a/src/basic/time-util.h b/src/basic/time-util.h
new file mode 100644
index 0000000..5316305
--- /dev/null
+++ b/src/basic/time-util.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+
+typedef uint64_t usec_t;
+typedef uint64_t nsec_t;
+
+#define PRI_NSEC PRIu64
+#define PRI_USEC PRIu64
+#define NSEC_FMT "%" PRI_NSEC
+#define USEC_FMT "%" PRI_USEC
+
+#include "macro.h"
+
+typedef struct dual_timestamp {
+ usec_t realtime;
+ usec_t monotonic;
+} dual_timestamp;
+
+typedef struct triple_timestamp {
+ usec_t realtime;
+ usec_t monotonic;
+ usec_t boottime;
+} triple_timestamp;
+
+#define USEC_INFINITY ((usec_t) -1)
+#define NSEC_INFINITY ((nsec_t) -1)
+
+#define MSEC_PER_SEC 1000ULL
+#define USEC_PER_SEC ((usec_t) 1000000ULL)
+#define USEC_PER_MSEC ((usec_t) 1000ULL)
+#define NSEC_PER_SEC ((nsec_t) 1000000000ULL)
+#define NSEC_PER_MSEC ((nsec_t) 1000000ULL)
+#define NSEC_PER_USEC ((nsec_t) 1000ULL)
+
+#define USEC_PER_MINUTE ((usec_t) (60ULL*USEC_PER_SEC))
+#define NSEC_PER_MINUTE ((nsec_t) (60ULL*NSEC_PER_SEC))
+#define USEC_PER_HOUR ((usec_t) (60ULL*USEC_PER_MINUTE))
+#define NSEC_PER_HOUR ((nsec_t) (60ULL*NSEC_PER_MINUTE))
+#define USEC_PER_DAY ((usec_t) (24ULL*USEC_PER_HOUR))
+#define NSEC_PER_DAY ((nsec_t) (24ULL*NSEC_PER_HOUR))
+#define USEC_PER_WEEK ((usec_t) (7ULL*USEC_PER_DAY))
+#define NSEC_PER_WEEK ((nsec_t) (7ULL*NSEC_PER_DAY))
+#define USEC_PER_MONTH ((usec_t) (2629800ULL*USEC_PER_SEC))
+#define NSEC_PER_MONTH ((nsec_t) (2629800ULL*NSEC_PER_SEC))
+#define USEC_PER_YEAR ((usec_t) (31557600ULL*USEC_PER_SEC))
+#define NSEC_PER_YEAR ((nsec_t) (31557600ULL*NSEC_PER_SEC))
+
+/* We assume a maximum timezone length of 6. TZNAME_MAX is not defined on Linux, but glibc internally initializes this
+ * to 6. Let's rely on that. */
+#define FORMAT_TIMESTAMP_MAX (3+1+10+1+8+1+6+1+6+1)
+#define FORMAT_TIMESTAMP_WIDTH 28 /* when outputting, assume this width */
+#define FORMAT_TIMESTAMP_RELATIVE_MAX 256
+#define FORMAT_TIMESPAN_MAX 64
+
+#define TIME_T_MAX (time_t)((UINTMAX_C(1) << ((sizeof(time_t) << 3) - 1)) - 1)
+
+#define DUAL_TIMESTAMP_NULL ((struct dual_timestamp) {})
+#define TRIPLE_TIMESTAMP_NULL ((struct triple_timestamp) {})
+
+usec_t now(clockid_t clock);
+nsec_t now_nsec(clockid_t clock);
+
+dual_timestamp* dual_timestamp_get(dual_timestamp *ts);
+dual_timestamp* dual_timestamp_from_realtime(dual_timestamp *ts, usec_t u);
+dual_timestamp* dual_timestamp_from_monotonic(dual_timestamp *ts, usec_t u);
+dual_timestamp* dual_timestamp_from_boottime_or_monotonic(dual_timestamp *ts, usec_t u);
+
+triple_timestamp* triple_timestamp_get(triple_timestamp *ts);
+triple_timestamp* triple_timestamp_from_realtime(triple_timestamp *ts, usec_t u);
+
+#define DUAL_TIMESTAMP_HAS_CLOCK(clock) \
+ IN_SET(clock, CLOCK_REALTIME, CLOCK_REALTIME_ALARM, CLOCK_MONOTONIC)
+
+#define TRIPLE_TIMESTAMP_HAS_CLOCK(clock) \
+ IN_SET(clock, CLOCK_REALTIME, CLOCK_REALTIME_ALARM, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM)
+
+static inline bool dual_timestamp_is_set(const dual_timestamp *ts) {
+ return ((ts->realtime > 0 && ts->realtime != USEC_INFINITY) ||
+ (ts->monotonic > 0 && ts->monotonic != USEC_INFINITY));
+}
+
+static inline bool triple_timestamp_is_set(const triple_timestamp *ts) {
+ return ((ts->realtime > 0 && ts->realtime != USEC_INFINITY) ||
+ (ts->monotonic > 0 && ts->monotonic != USEC_INFINITY) ||
+ (ts->boottime > 0 && ts->boottime != USEC_INFINITY));
+}
+
+usec_t triple_timestamp_by_clock(triple_timestamp *ts, clockid_t clock);
+
+usec_t timespec_load(const struct timespec *ts) _pure_;
+nsec_t timespec_load_nsec(const struct timespec *ts) _pure_;
+struct timespec *timespec_store(struct timespec *ts, usec_t u);
+
+usec_t timeval_load(const struct timeval *tv) _pure_;
+struct timeval *timeval_store(struct timeval *tv, usec_t u);
+
+char *format_timestamp(char *buf, size_t l, usec_t t);
+char *format_timestamp_utc(char *buf, size_t l, usec_t t);
+char *format_timestamp_us(char *buf, size_t l, usec_t t);
+char *format_timestamp_us_utc(char *buf, size_t l, usec_t t);
+char *format_timestamp_relative(char *buf, size_t l, usec_t t);
+char *format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy);
+
+int parse_timestamp(const char *t, usec_t *usec);
+
+int parse_sec(const char *t, usec_t *usec);
+int parse_sec_fix_0(const char *t, usec_t *usec);
+int parse_time(const char *t, usec_t *usec, usec_t default_unit);
+int parse_nsec(const char *t, nsec_t *nsec);
+
+bool ntp_synced(void);
+
+int get_timezones(char ***l);
+bool timezone_is_valid(const char *name, int log_level);
+
+bool clock_boottime_supported(void);
+bool clock_supported(clockid_t clock);
+clockid_t clock_boottime_or_monotonic(void);
+
+usec_t usec_shift_clock(usec_t, clockid_t from, clockid_t to);
+
+int get_timezone(char **timezone);
+
+time_t mktime_or_timegm(struct tm *tm, bool utc);
+struct tm *localtime_or_gmtime_r(const time_t *t, struct tm *tm, bool utc);
+
+unsigned long usec_to_jiffies(usec_t usec);
+
+bool in_utc_timezone(void);
+
+static inline usec_t usec_add(usec_t a, usec_t b) {
+ usec_t c;
+
+ /* Adds two time values, and makes sure USEC_INFINITY as input results as USEC_INFINITY in output, and doesn't
+ * overflow. */
+
+ c = a + b;
+ if (c < a || c < b) /* overflow check */
+ return USEC_INFINITY;
+
+ return c;
+}
+
+static inline usec_t usec_sub_unsigned(usec_t timestamp, usec_t delta) {
+
+ if (timestamp == USEC_INFINITY) /* Make sure infinity doesn't degrade */
+ return USEC_INFINITY;
+ if (timestamp < delta)
+ return 0;
+
+ return timestamp - delta;
+}
+
+static inline usec_t usec_sub_signed(usec_t timestamp, int64_t delta) {
+ if (delta < 0)
+ return usec_add(timestamp, (usec_t) (-delta));
+ else
+ return usec_sub_unsigned(timestamp, (usec_t) delta);
+}
+
+#if SIZEOF_TIME_T == 8
+/* The last second we can format is 31. Dec 9999, 1s before midnight, because otherwise we'd enter 5 digit year
+ * territory. However, since we want to stay away from this in all timezones we take one day off. */
+#define USEC_TIMESTAMP_FORMATTABLE_MAX ((usec_t) 253402214399000000)
+#elif SIZEOF_TIME_T == 4
+/* With a 32bit time_t we can't go beyond 2038... */
+#define USEC_TIMESTAMP_FORMATTABLE_MAX ((usec_t) 2147483647000000)
+#else
+#error "Yuck, time_t is neither 4 nor 8 bytes wide?"
+#endif
+
+int time_change_fd(void);
diff --git a/src/basic/tmpfile-util.c b/src/basic/tmpfile-util.c
new file mode 100644
index 0000000..bc92d6a
--- /dev/null
+++ b/src/basic/tmpfile-util.c
@@ -0,0 +1,330 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mman.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "memfd-util.h"
+#include "missing_fcntl.h"
+#include "missing_syscall.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+
+int fopen_temporary(const char *path, FILE **_f, char **_temp_path) {
+ FILE *f;
+ char *t;
+ int r, fd;
+
+ assert(path);
+ assert(_f);
+ assert(_temp_path);
+
+ r = tempfn_xxxxxx(path, NULL, &t);
+ if (r < 0)
+ return r;
+
+ fd = mkostemp_safe(t);
+ if (fd < 0) {
+ free(t);
+ return -errno;
+ }
+
+ f = fdopen(fd, "w");
+ if (!f) {
+ unlink_noerrno(t);
+ free(t);
+ safe_close(fd);
+ return -errno;
+ }
+
+ *_f = f;
+ *_temp_path = t;
+
+ return 0;
+}
+
+/* This is much like mkostemp() but is subject to umask(). */
+int mkostemp_safe(char *pattern) {
+ _cleanup_umask_ mode_t u = 0;
+ int fd;
+
+ assert(pattern);
+
+ u = umask(077);
+
+ fd = mkostemp(pattern, O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int fmkostemp_safe(char *pattern, const char *mode, FILE **ret_f) {
+ int fd;
+ FILE *f;
+
+ fd = mkostemp_safe(pattern);
+ if (fd < 0)
+ return fd;
+
+ f = fdopen(fd, mode);
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ *ret_f = f;
+ return 0;
+}
+
+int tempfn_xxxxxx(const char *p, const char *extra, char **ret) {
+ const char *fn;
+ char *t;
+
+ assert(ret);
+
+ if (isempty(p))
+ return -EINVAL;
+ if (path_equal(p, "/"))
+ return -EINVAL;
+
+ /*
+ * Turns this:
+ * /foo/bar/waldo
+ *
+ * Into this:
+ * /foo/bar/.#<extra>waldoXXXXXX
+ */
+
+ fn = basename(p);
+ if (!filename_is_valid(fn))
+ return -EINVAL;
+
+ extra = strempty(extra);
+
+ t = new(char, strlen(p) + 2 + strlen(extra) + 6 + 1);
+ if (!t)
+ return -ENOMEM;
+
+ strcpy(stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), extra), fn), "XXXXXX");
+
+ *ret = path_simplify(t, false);
+ return 0;
+}
+
+int tempfn_random(const char *p, const char *extra, char **ret) {
+ const char *fn;
+ char *t, *x;
+ uint64_t u;
+ unsigned i;
+
+ assert(ret);
+
+ if (isempty(p))
+ return -EINVAL;
+ if (path_equal(p, "/"))
+ return -EINVAL;
+
+ /*
+ * Turns this:
+ * /foo/bar/waldo
+ *
+ * Into this:
+ * /foo/bar/.#<extra>waldobaa2a261115984a9
+ */
+
+ fn = basename(p);
+ if (!filename_is_valid(fn))
+ return -EINVAL;
+
+ extra = strempty(extra);
+
+ t = new(char, strlen(p) + 2 + strlen(extra) + 16 + 1);
+ if (!t)
+ return -ENOMEM;
+
+ x = stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), extra), fn);
+
+ u = random_u64();
+ for (i = 0; i < 16; i++) {
+ *(x++) = hexchar(u & 0xF);
+ u >>= 4;
+ }
+
+ *x = 0;
+
+ *ret = path_simplify(t, false);
+ return 0;
+}
+
+int tempfn_random_child(const char *p, const char *extra, char **ret) {
+ char *t, *x;
+ uint64_t u;
+ unsigned i;
+ int r;
+
+ assert(ret);
+
+ /* Turns this:
+ * /foo/bar/waldo
+ * Into this:
+ * /foo/bar/waldo/.#<extra>3c2b6219aa75d7d0
+ */
+
+ if (!p) {
+ r = tmp_dir(&p);
+ if (r < 0)
+ return r;
+ }
+
+ extra = strempty(extra);
+
+ t = new(char, strlen(p) + 3 + strlen(extra) + 16 + 1);
+ if (!t)
+ return -ENOMEM;
+
+ if (isempty(p))
+ x = stpcpy(stpcpy(t, ".#"), extra);
+ else
+ x = stpcpy(stpcpy(stpcpy(t, p), "/.#"), extra);
+
+ u = random_u64();
+ for (i = 0; i < 16; i++) {
+ *(x++) = hexchar(u & 0xF);
+ u >>= 4;
+ }
+
+ *x = 0;
+
+ *ret = path_simplify(t, false);
+ return 0;
+}
+
+int open_tmpfile_unlinkable(const char *directory, int flags) {
+ char *p;
+ int fd, r;
+
+ if (!directory) {
+ r = tmp_dir(&directory);
+ if (r < 0)
+ return r;
+ } else if (isempty(directory))
+ return -EINVAL;
+
+ /* Returns an unlinked temporary file that cannot be linked into the file system anymore */
+
+ /* Try O_TMPFILE first, if it is supported */
+ fd = open(directory, flags|O_TMPFILE|O_EXCL, S_IRUSR|S_IWUSR);
+ if (fd >= 0)
+ return fd;
+
+ /* Fall back to unguessable name + unlinking */
+ p = strjoina(directory, "/systemd-tmp-XXXXXX");
+
+ fd = mkostemp_safe(p);
+ if (fd < 0)
+ return fd;
+
+ (void) unlink(p);
+
+ return fd;
+}
+
+int open_tmpfile_linkable(const char *target, int flags, char **ret_path) {
+ _cleanup_free_ char *tmp = NULL;
+ int r, fd;
+
+ assert(target);
+ assert(ret_path);
+
+ /* Don't allow O_EXCL, as that has a special meaning for O_TMPFILE */
+ assert((flags & O_EXCL) == 0);
+
+ /* Creates a temporary file, that shall be renamed to "target" later. If possible, this uses O_TMPFILE – in
+ * which case "ret_path" will be returned as NULL. If not possible a the tempoary path name used is returned in
+ * "ret_path". Use link_tmpfile() below to rename the result after writing the file in full. */
+
+ fd = open_parent(target, O_TMPFILE|flags, 0640);
+ if (fd >= 0) {
+ *ret_path = NULL;
+ return fd;
+ }
+
+ log_debug_errno(fd, "Failed to use O_TMPFILE for %s: %m", target);
+
+ r = tempfn_random(target, NULL, &tmp);
+ if (r < 0)
+ return r;
+
+ fd = open(tmp, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|flags, 0640);
+ if (fd < 0)
+ return -errno;
+
+ *ret_path = TAKE_PTR(tmp);
+
+ return fd;
+}
+
+int link_tmpfile(int fd, const char *path, const char *target) {
+ int r;
+
+ assert(fd >= 0);
+ assert(target);
+
+ /* Moves a temporary file created with open_tmpfile() above into its final place. if "path" is NULL an fd
+ * created with O_TMPFILE is assumed, and linkat() is used. Otherwise it is assumed O_TMPFILE is not supported
+ * on the directory, and renameat2() is used instead.
+ *
+ * Note that in both cases we will not replace existing files. This is because linkat() does not support this
+ * operation currently (renameat2() does), and there is no nice way to emulate this. */
+
+ if (path) {
+ r = rename_noreplace(AT_FDCWD, path, AT_FDCWD, target);
+ if (r < 0)
+ return r;
+ } else {
+ char proc_fd_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(fd) + 1];
+
+ xsprintf(proc_fd_path, "/proc/self/fd/%i", fd);
+
+ if (linkat(AT_FDCWD, proc_fd_path, AT_FDCWD, target, AT_SYMLINK_FOLLOW) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int mkdtemp_malloc(const char *template, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(ret);
+
+ if (template)
+ p = strdup(template);
+ else {
+ const char *tmp;
+
+ r = tmp_dir(&tmp);
+ if (r < 0)
+ return r;
+
+ p = strjoin(tmp, "/XXXXXX");
+ }
+ if (!p)
+ return -ENOMEM;
+
+ if (!mkdtemp(p))
+ return -errno;
+
+ *ret = TAKE_PTR(p);
+ return 0;
+}
diff --git a/src/basic/tmpfile-util.h b/src/basic/tmpfile-util.h
new file mode 100644
index 0000000..802c85d
--- /dev/null
+++ b/src/basic/tmpfile-util.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+int fopen_temporary(const char *path, FILE **_f, char **_temp_path);
+int mkostemp_safe(char *pattern);
+int fmkostemp_safe(char *pattern, const char *mode, FILE**_f);
+
+int tempfn_xxxxxx(const char *p, const char *extra, char **ret);
+int tempfn_random(const char *p, const char *extra, char **ret);
+int tempfn_random_child(const char *p, const char *extra, char **ret);
+
+int open_tmpfile_unlinkable(const char *directory, int flags);
+int open_tmpfile_linkable(const char *target, int flags, char **ret_path);
+
+int link_tmpfile(int fd, const char *path, const char *target);
+
+int mkdtemp_malloc(const char *template, char **ret);
diff --git a/src/basic/umask-util.h b/src/basic/umask-util.h
new file mode 100644
index 0000000..e964292
--- /dev/null
+++ b/src/basic/umask-util.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+static inline void umaskp(mode_t *u) {
+ umask(*u);
+}
+
+#define _cleanup_umask_ _cleanup_(umaskp)
+
+struct _umask_struct_ {
+ mode_t mask;
+ bool quit;
+};
+
+static inline void _reset_umask_(struct _umask_struct_ *s) {
+ umask(s->mask);
+};
+
+#define RUN_WITH_UMASK(mask) \
+ for (_cleanup_(_reset_umask_) struct _umask_struct_ _saved_umask_ = { umask(mask), false }; \
+ !_saved_umask_.quit ; \
+ _saved_umask_.quit = true)
diff --git a/src/basic/unaligned.h b/src/basic/unaligned.h
new file mode 100644
index 0000000..00c17f8
--- /dev/null
+++ b/src/basic/unaligned.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <endian.h>
+#include <stdint.h>
+
+/* BE */
+
+static inline uint16_t unaligned_read_be16(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+ return be16toh(u->x);
+}
+
+static inline uint32_t unaligned_read_be32(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+ return be32toh(u->x);
+}
+
+static inline uint64_t unaligned_read_be64(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+ return be64toh(u->x);
+}
+
+static inline void unaligned_write_be16(void *_u, uint16_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+ u->x = be16toh(a);
+}
+
+static inline void unaligned_write_be32(void *_u, uint32_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+ u->x = be32toh(a);
+}
+
+static inline void unaligned_write_be64(void *_u, uint64_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+ u->x = be64toh(a);
+}
+
+/* LE */
+
+static inline uint16_t unaligned_read_le16(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+ return le16toh(u->x);
+}
+
+static inline uint32_t unaligned_read_le32(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+ return le32toh(u->x);
+}
+
+static inline uint64_t unaligned_read_le64(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+ return le64toh(u->x);
+}
+
+static inline void unaligned_write_le16(void *_u, uint16_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+ u->x = le16toh(a);
+}
+
+static inline void unaligned_write_le32(void *_u, uint32_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+ u->x = le32toh(a);
+}
+
+static inline void unaligned_write_le64(void *_u, uint64_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+ u->x = le64toh(a);
+}
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define unaligned_read_ne16 unaligned_read_be16
+#define unaligned_read_ne32 unaligned_read_be32
+#define unaligned_read_ne64 unaligned_read_be64
+
+#define unaligned_write_ne16 unaligned_write_be16
+#define unaligned_write_ne32 unaligned_write_be32
+#define unaligned_write_ne64 unaligned_write_be64
+#else
+#define unaligned_read_ne16 unaligned_read_le16
+#define unaligned_read_ne32 unaligned_read_le32
+#define unaligned_read_ne64 unaligned_read_le64
+
+#define unaligned_write_ne16 unaligned_write_le16
+#define unaligned_write_ne32 unaligned_write_le32
+#define unaligned_write_ne64 unaligned_write_le64
+#endif
diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c
new file mode 100644
index 0000000..245daab
--- /dev/null
+++ b/src/basic/unit-def.c
@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-label.h"
+#include "string-table.h"
+#include "unit-def.h"
+#include "unit-name.h"
+
+char *unit_dbus_path_from_name(const char *name) {
+ _cleanup_free_ char *e = NULL;
+
+ assert(name);
+
+ e = bus_label_escape(name);
+ if (!e)
+ return NULL;
+
+ return strappend("/org/freedesktop/systemd1/unit/", e);
+}
+
+int unit_name_from_dbus_path(const char *path, char **name) {
+ const char *e;
+ char *n;
+
+ e = startswith(path, "/org/freedesktop/systemd1/unit/");
+ if (!e)
+ return -EINVAL;
+
+ n = bus_label_unescape(e);
+ if (!n)
+ return -ENOMEM;
+
+ *name = n;
+ return 0;
+}
+
+const char* unit_dbus_interface_from_type(UnitType t) {
+
+ static const char *const table[_UNIT_TYPE_MAX] = {
+ [UNIT_SERVICE] = "org.freedesktop.systemd1.Service",
+ [UNIT_SOCKET] = "org.freedesktop.systemd1.Socket",
+ [UNIT_TARGET] = "org.freedesktop.systemd1.Target",
+ [UNIT_DEVICE] = "org.freedesktop.systemd1.Device",
+ [UNIT_MOUNT] = "org.freedesktop.systemd1.Mount",
+ [UNIT_AUTOMOUNT] = "org.freedesktop.systemd1.Automount",
+ [UNIT_SWAP] = "org.freedesktop.systemd1.Swap",
+ [UNIT_TIMER] = "org.freedesktop.systemd1.Timer",
+ [UNIT_PATH] = "org.freedesktop.systemd1.Path",
+ [UNIT_SLICE] = "org.freedesktop.systemd1.Slice",
+ [UNIT_SCOPE] = "org.freedesktop.systemd1.Scope",
+ };
+
+ if (t < 0)
+ return NULL;
+ if (t >= _UNIT_TYPE_MAX)
+ return NULL;
+
+ return table[t];
+}
+
+const char *unit_dbus_interface_from_name(const char *name) {
+ UnitType t;
+
+ t = unit_name_to_type(name);
+ if (t < 0)
+ return NULL;
+
+ return unit_dbus_interface_from_type(t);
+}
+
+static const char* const unit_type_table[_UNIT_TYPE_MAX] = {
+ [UNIT_SERVICE] = "service",
+ [UNIT_SOCKET] = "socket",
+ [UNIT_TARGET] = "target",
+ [UNIT_DEVICE] = "device",
+ [UNIT_MOUNT] = "mount",
+ [UNIT_AUTOMOUNT] = "automount",
+ [UNIT_SWAP] = "swap",
+ [UNIT_TIMER] = "timer",
+ [UNIT_PATH] = "path",
+ [UNIT_SLICE] = "slice",
+ [UNIT_SCOPE] = "scope",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_type, UnitType);
+
+static const char* const unit_load_state_table[_UNIT_LOAD_STATE_MAX] = {
+ [UNIT_STUB] = "stub",
+ [UNIT_LOADED] = "loaded",
+ [UNIT_NOT_FOUND] = "not-found",
+ [UNIT_BAD_SETTING] = "bad-setting",
+ [UNIT_ERROR] = "error",
+ [UNIT_MERGED] = "merged",
+ [UNIT_MASKED] = "masked"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_load_state, UnitLoadState);
+
+static const char* const unit_active_state_table[_UNIT_ACTIVE_STATE_MAX] = {
+ [UNIT_ACTIVE] = "active",
+ [UNIT_RELOADING] = "reloading",
+ [UNIT_INACTIVE] = "inactive",
+ [UNIT_FAILED] = "failed",
+ [UNIT_ACTIVATING] = "activating",
+ [UNIT_DEACTIVATING] = "deactivating"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_active_state, UnitActiveState);
+
+static const char* const automount_state_table[_AUTOMOUNT_STATE_MAX] = {
+ [AUTOMOUNT_DEAD] = "dead",
+ [AUTOMOUNT_WAITING] = "waiting",
+ [AUTOMOUNT_RUNNING] = "running",
+ [AUTOMOUNT_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(automount_state, AutomountState);
+
+static const char* const device_state_table[_DEVICE_STATE_MAX] = {
+ [DEVICE_DEAD] = "dead",
+ [DEVICE_TENTATIVE] = "tentative",
+ [DEVICE_PLUGGED] = "plugged",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(device_state, DeviceState);
+
+static const char* const mount_state_table[_MOUNT_STATE_MAX] = {
+ [MOUNT_DEAD] = "dead",
+ [MOUNT_MOUNTING] = "mounting",
+ [MOUNT_MOUNTING_DONE] = "mounting-done",
+ [MOUNT_MOUNTED] = "mounted",
+ [MOUNT_REMOUNTING] = "remounting",
+ [MOUNT_UNMOUNTING] = "unmounting",
+ [MOUNT_REMOUNTING_SIGTERM] = "remounting-sigterm",
+ [MOUNT_REMOUNTING_SIGKILL] = "remounting-sigkill",
+ [MOUNT_UNMOUNTING_SIGTERM] = "unmounting-sigterm",
+ [MOUNT_UNMOUNTING_SIGKILL] = "unmounting-sigkill",
+ [MOUNT_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mount_state, MountState);
+
+static const char* const path_state_table[_PATH_STATE_MAX] = {
+ [PATH_DEAD] = "dead",
+ [PATH_WAITING] = "waiting",
+ [PATH_RUNNING] = "running",
+ [PATH_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(path_state, PathState);
+
+static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
+ [SCOPE_DEAD] = "dead",
+ [SCOPE_RUNNING] = "running",
+ [SCOPE_ABANDONED] = "abandoned",
+ [SCOPE_STOP_SIGTERM] = "stop-sigterm",
+ [SCOPE_STOP_SIGKILL] = "stop-sigkill",
+ [SCOPE_FAILED] = "failed",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(scope_state, ScopeState);
+
+static const char* const service_state_table[_SERVICE_STATE_MAX] = {
+ [SERVICE_DEAD] = "dead",
+ [SERVICE_START_PRE] = "start-pre",
+ [SERVICE_START] = "start",
+ [SERVICE_START_POST] = "start-post",
+ [SERVICE_RUNNING] = "running",
+ [SERVICE_EXITED] = "exited",
+ [SERVICE_RELOAD] = "reload",
+ [SERVICE_STOP] = "stop",
+ [SERVICE_STOP_WATCHDOG] = "stop-watchdog",
+ [SERVICE_STOP_SIGTERM] = "stop-sigterm",
+ [SERVICE_STOP_SIGKILL] = "stop-sigkill",
+ [SERVICE_STOP_POST] = "stop-post",
+ [SERVICE_FINAL_SIGTERM] = "final-sigterm",
+ [SERVICE_FINAL_SIGKILL] = "final-sigkill",
+ [SERVICE_FAILED] = "failed",
+ [SERVICE_AUTO_RESTART] = "auto-restart",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_state, ServiceState);
+
+static const char* const slice_state_table[_SLICE_STATE_MAX] = {
+ [SLICE_DEAD] = "dead",
+ [SLICE_ACTIVE] = "active"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(slice_state, SliceState);
+
+static const char* const socket_state_table[_SOCKET_STATE_MAX] = {
+ [SOCKET_DEAD] = "dead",
+ [SOCKET_START_PRE] = "start-pre",
+ [SOCKET_START_CHOWN] = "start-chown",
+ [SOCKET_START_POST] = "start-post",
+ [SOCKET_LISTENING] = "listening",
+ [SOCKET_RUNNING] = "running",
+ [SOCKET_STOP_PRE] = "stop-pre",
+ [SOCKET_STOP_PRE_SIGTERM] = "stop-pre-sigterm",
+ [SOCKET_STOP_PRE_SIGKILL] = "stop-pre-sigkill",
+ [SOCKET_STOP_POST] = "stop-post",
+ [SOCKET_FINAL_SIGTERM] = "final-sigterm",
+ [SOCKET_FINAL_SIGKILL] = "final-sigkill",
+ [SOCKET_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_state, SocketState);
+
+static const char* const swap_state_table[_SWAP_STATE_MAX] = {
+ [SWAP_DEAD] = "dead",
+ [SWAP_ACTIVATING] = "activating",
+ [SWAP_ACTIVATING_DONE] = "activating-done",
+ [SWAP_ACTIVE] = "active",
+ [SWAP_DEACTIVATING] = "deactivating",
+ [SWAP_DEACTIVATING_SIGTERM] = "deactivating-sigterm",
+ [SWAP_DEACTIVATING_SIGKILL] = "deactivating-sigkill",
+ [SWAP_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(swap_state, SwapState);
+
+static const char* const target_state_table[_TARGET_STATE_MAX] = {
+ [TARGET_DEAD] = "dead",
+ [TARGET_ACTIVE] = "active"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(target_state, TargetState);
+
+static const char* const timer_state_table[_TIMER_STATE_MAX] = {
+ [TIMER_DEAD] = "dead",
+ [TIMER_WAITING] = "waiting",
+ [TIMER_RUNNING] = "running",
+ [TIMER_ELAPSED] = "elapsed",
+ [TIMER_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(timer_state, TimerState);
+
+static const char* const unit_dependency_table[_UNIT_DEPENDENCY_MAX] = {
+ [UNIT_REQUIRES] = "Requires",
+ [UNIT_REQUISITE] = "Requisite",
+ [UNIT_WANTS] = "Wants",
+ [UNIT_BINDS_TO] = "BindsTo",
+ [UNIT_PART_OF] = "PartOf",
+ [UNIT_REQUIRED_BY] = "RequiredBy",
+ [UNIT_REQUISITE_OF] = "RequisiteOf",
+ [UNIT_WANTED_BY] = "WantedBy",
+ [UNIT_BOUND_BY] = "BoundBy",
+ [UNIT_CONSISTS_OF] = "ConsistsOf",
+ [UNIT_CONFLICTS] = "Conflicts",
+ [UNIT_CONFLICTED_BY] = "ConflictedBy",
+ [UNIT_BEFORE] = "Before",
+ [UNIT_AFTER] = "After",
+ [UNIT_ON_FAILURE] = "OnFailure",
+ [UNIT_TRIGGERS] = "Triggers",
+ [UNIT_TRIGGERED_BY] = "TriggeredBy",
+ [UNIT_PROPAGATES_RELOAD_TO] = "PropagatesReloadTo",
+ [UNIT_RELOAD_PROPAGATED_FROM] = "ReloadPropagatedFrom",
+ [UNIT_JOINS_NAMESPACE_OF] = "JoinsNamespaceOf",
+ [UNIT_REFERENCES] = "References",
+ [UNIT_REFERENCED_BY] = "ReferencedBy",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_dependency, UnitDependency);
+
+static const char* const notify_access_table[_NOTIFY_ACCESS_MAX] = {
+ [NOTIFY_NONE] = "none",
+ [NOTIFY_MAIN] = "main",
+ [NOTIFY_EXEC] = "exec",
+ [NOTIFY_ALL] = "all"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(notify_access, NotifyAccess);
diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h
new file mode 100644
index 0000000..85f3e42
--- /dev/null
+++ b/src/basic/unit-def.h
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+typedef enum UnitType {
+ UNIT_SERVICE = 0,
+ UNIT_SOCKET,
+ UNIT_TARGET,
+ UNIT_DEVICE,
+ UNIT_MOUNT,
+ UNIT_AUTOMOUNT,
+ UNIT_SWAP,
+ UNIT_TIMER,
+ UNIT_PATH,
+ UNIT_SLICE,
+ UNIT_SCOPE,
+ _UNIT_TYPE_MAX,
+ _UNIT_TYPE_INVALID = -1
+} UnitType;
+
+typedef enum UnitLoadState {
+ UNIT_STUB = 0,
+ UNIT_LOADED,
+ UNIT_NOT_FOUND, /* error condition #1: unit file not found */
+ UNIT_BAD_SETTING, /* error condition #2: we couldn't parse some essential unit file setting */
+ UNIT_ERROR, /* error condition #3: other "system" error, catchall for the rest */
+ UNIT_MERGED,
+ UNIT_MASKED,
+ _UNIT_LOAD_STATE_MAX,
+ _UNIT_LOAD_STATE_INVALID = -1
+} UnitLoadState;
+
+typedef enum UnitActiveState {
+ UNIT_ACTIVE,
+ UNIT_RELOADING,
+ UNIT_INACTIVE,
+ UNIT_FAILED,
+ UNIT_ACTIVATING,
+ UNIT_DEACTIVATING,
+ _UNIT_ACTIVE_STATE_MAX,
+ _UNIT_ACTIVE_STATE_INVALID = -1
+} UnitActiveState;
+
+typedef enum AutomountState {
+ AUTOMOUNT_DEAD,
+ AUTOMOUNT_WAITING,
+ AUTOMOUNT_RUNNING,
+ AUTOMOUNT_FAILED,
+ _AUTOMOUNT_STATE_MAX,
+ _AUTOMOUNT_STATE_INVALID = -1
+} AutomountState;
+
+/* We simply watch devices, we cannot plug/unplug them. That
+ * simplifies the state engine greatly */
+typedef enum DeviceState {
+ DEVICE_DEAD,
+ DEVICE_TENTATIVE, /* mounted or swapped, but not (yet) announced by udev */
+ DEVICE_PLUGGED, /* announced by udev */
+ _DEVICE_STATE_MAX,
+ _DEVICE_STATE_INVALID = -1
+} DeviceState;
+
+typedef enum MountState {
+ MOUNT_DEAD,
+ MOUNT_MOUNTING, /* /usr/bin/mount is running, but the mount is not done yet. */
+ MOUNT_MOUNTING_DONE, /* /usr/bin/mount is running, and the mount is done. */
+ MOUNT_MOUNTED,
+ MOUNT_REMOUNTING,
+ MOUNT_UNMOUNTING,
+ MOUNT_REMOUNTING_SIGTERM,
+ MOUNT_REMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_FAILED,
+ _MOUNT_STATE_MAX,
+ _MOUNT_STATE_INVALID = -1
+} MountState;
+
+typedef enum PathState {
+ PATH_DEAD,
+ PATH_WAITING,
+ PATH_RUNNING,
+ PATH_FAILED,
+ _PATH_STATE_MAX,
+ _PATH_STATE_INVALID = -1
+} PathState;
+
+typedef enum ScopeState {
+ SCOPE_DEAD,
+ SCOPE_RUNNING,
+ SCOPE_ABANDONED,
+ SCOPE_STOP_SIGTERM,
+ SCOPE_STOP_SIGKILL,
+ SCOPE_FAILED,
+ _SCOPE_STATE_MAX,
+ _SCOPE_STATE_INVALID = -1
+} ScopeState;
+
+typedef enum ServiceState {
+ SERVICE_DEAD,
+ SERVICE_START_PRE,
+ SERVICE_START,
+ SERVICE_START_POST,
+ SERVICE_RUNNING,
+ SERVICE_EXITED, /* Nothing is running anymore, but RemainAfterExit is true hence this is OK */
+ SERVICE_RELOAD,
+ SERVICE_STOP, /* No STOP_PRE state, instead just register multiple STOP executables */
+ SERVICE_STOP_WATCHDOG,
+ SERVICE_STOP_SIGTERM,
+ SERVICE_STOP_SIGKILL,
+ SERVICE_STOP_POST,
+ SERVICE_FINAL_SIGTERM, /* In case the STOP_POST executable hangs, we shoot that down, too */
+ SERVICE_FINAL_SIGKILL,
+ SERVICE_FAILED,
+ SERVICE_AUTO_RESTART,
+ _SERVICE_STATE_MAX,
+ _SERVICE_STATE_INVALID = -1
+} ServiceState;
+
+typedef enum SliceState {
+ SLICE_DEAD,
+ SLICE_ACTIVE,
+ _SLICE_STATE_MAX,
+ _SLICE_STATE_INVALID = -1
+} SliceState;
+
+typedef enum SocketState {
+ SOCKET_DEAD,
+ SOCKET_START_PRE,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST,
+ SOCKET_LISTENING,
+ SOCKET_RUNNING,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_STOP_POST,
+ SOCKET_FINAL_SIGTERM,
+ SOCKET_FINAL_SIGKILL,
+ SOCKET_FAILED,
+ _SOCKET_STATE_MAX,
+ _SOCKET_STATE_INVALID = -1
+} SocketState;
+
+typedef enum SwapState {
+ SWAP_DEAD,
+ SWAP_ACTIVATING, /* /sbin/swapon is running, but the swap not yet enabled. */
+ SWAP_ACTIVATING_DONE, /* /sbin/swapon is running, and the swap is done. */
+ SWAP_ACTIVE,
+ SWAP_DEACTIVATING,
+ SWAP_DEACTIVATING_SIGTERM,
+ SWAP_DEACTIVATING_SIGKILL,
+ SWAP_FAILED,
+ _SWAP_STATE_MAX,
+ _SWAP_STATE_INVALID = -1
+} SwapState;
+
+typedef enum TargetState {
+ TARGET_DEAD,
+ TARGET_ACTIVE,
+ _TARGET_STATE_MAX,
+ _TARGET_STATE_INVALID = -1
+} TargetState;
+
+typedef enum TimerState {
+ TIMER_DEAD,
+ TIMER_WAITING,
+ TIMER_RUNNING,
+ TIMER_ELAPSED,
+ TIMER_FAILED,
+ _TIMER_STATE_MAX,
+ _TIMER_STATE_INVALID = -1
+} TimerState;
+
+typedef enum UnitDependency {
+ /* Positive dependencies */
+ UNIT_REQUIRES,
+ UNIT_REQUISITE,
+ UNIT_WANTS,
+ UNIT_BINDS_TO,
+ UNIT_PART_OF,
+
+ /* Inverse of the above */
+ UNIT_REQUIRED_BY, /* inverse of 'requires' is 'required_by' */
+ UNIT_REQUISITE_OF, /* inverse of 'requisite' is 'requisite_of' */
+ UNIT_WANTED_BY, /* inverse of 'wants' */
+ UNIT_BOUND_BY, /* inverse of 'binds_to' */
+ UNIT_CONSISTS_OF, /* inverse of 'part_of' */
+
+ /* Negative dependencies */
+ UNIT_CONFLICTS, /* inverse of 'conflicts' is 'conflicted_by' */
+ UNIT_CONFLICTED_BY,
+
+ /* Order */
+ UNIT_BEFORE, /* inverse of 'before' is 'after' and vice versa */
+ UNIT_AFTER,
+
+ /* On Failure */
+ UNIT_ON_FAILURE,
+
+ /* Triggers (i.e. a socket triggers a service) */
+ UNIT_TRIGGERS,
+ UNIT_TRIGGERED_BY,
+
+ /* Propagate reloads */
+ UNIT_PROPAGATES_RELOAD_TO,
+ UNIT_RELOAD_PROPAGATED_FROM,
+
+ /* Joins namespace of */
+ UNIT_JOINS_NAMESPACE_OF,
+
+ /* Reference information for GC logic */
+ UNIT_REFERENCES, /* Inverse of 'references' is 'referenced_by' */
+ UNIT_REFERENCED_BY,
+
+ _UNIT_DEPENDENCY_MAX,
+ _UNIT_DEPENDENCY_INVALID = -1
+} UnitDependency;
+
+typedef enum NotifyAccess {
+ NOTIFY_NONE,
+ NOTIFY_ALL,
+ NOTIFY_MAIN,
+ NOTIFY_EXEC,
+ _NOTIFY_ACCESS_MAX,
+ _NOTIFY_ACCESS_INVALID = -1
+} NotifyAccess;
+
+char *unit_dbus_path_from_name(const char *name);
+int unit_name_from_dbus_path(const char *path, char **name);
+
+const char* unit_dbus_interface_from_type(UnitType t);
+const char *unit_dbus_interface_from_name(const char *name);
+
+const char *unit_type_to_string(UnitType i) _const_;
+UnitType unit_type_from_string(const char *s) _pure_;
+
+const char *unit_load_state_to_string(UnitLoadState i) _const_;
+UnitLoadState unit_load_state_from_string(const char *s) _pure_;
+
+const char *unit_active_state_to_string(UnitActiveState i) _const_;
+UnitActiveState unit_active_state_from_string(const char *s) _pure_;
+
+const char* automount_state_to_string(AutomountState i) _const_;
+AutomountState automount_state_from_string(const char *s) _pure_;
+
+const char* device_state_to_string(DeviceState i) _const_;
+DeviceState device_state_from_string(const char *s) _pure_;
+
+const char* mount_state_to_string(MountState i) _const_;
+MountState mount_state_from_string(const char *s) _pure_;
+
+const char* path_state_to_string(PathState i) _const_;
+PathState path_state_from_string(const char *s) _pure_;
+
+const char* scope_state_to_string(ScopeState i) _const_;
+ScopeState scope_state_from_string(const char *s) _pure_;
+
+const char* service_state_to_string(ServiceState i) _const_;
+ServiceState service_state_from_string(const char *s) _pure_;
+
+const char* slice_state_to_string(SliceState i) _const_;
+SliceState slice_state_from_string(const char *s) _pure_;
+
+const char* socket_state_to_string(SocketState i) _const_;
+SocketState socket_state_from_string(const char *s) _pure_;
+
+const char* swap_state_to_string(SwapState i) _const_;
+SwapState swap_state_from_string(const char *s) _pure_;
+
+const char* target_state_to_string(TargetState i) _const_;
+TargetState target_state_from_string(const char *s) _pure_;
+
+const char *timer_state_to_string(TimerState i) _const_;
+TimerState timer_state_from_string(const char *s) _pure_;
+
+const char *unit_dependency_to_string(UnitDependency i) _const_;
+UnitDependency unit_dependency_from_string(const char *s) _pure_;
+
+const char* notify_access_to_string(NotifyAccess i) _const_;
+NotifyAccess notify_access_from_string(const char *s) _pure_;
diff --git a/src/basic/unit-name.c b/src/basic/unit-name.c
new file mode 100644
index 0000000..1b81fe2
--- /dev/null
+++ b/src/basic/unit-name.c
@@ -0,0 +1,775 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "glob-util.h"
+#include "hexdecoct.h"
+#include "path-util.h"
+#include "special.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+/* Characters valid in a unit name. */
+#define VALID_CHARS \
+ DIGITS \
+ LETTERS \
+ ":-_.\\"
+
+/* The same, but also permits the single @ character that may appear */
+#define VALID_CHARS_WITH_AT \
+ "@" \
+ VALID_CHARS
+
+/* All chars valid in a unit name glob */
+#define VALID_CHARS_GLOB \
+ VALID_CHARS_WITH_AT \
+ "[]!-*?"
+
+bool unit_name_is_valid(const char *n, UnitNameFlags flags) {
+ const char *e, *i, *at;
+
+ assert((flags & ~(UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE)) == 0);
+
+ if (_unlikely_(flags == 0))
+ return false;
+
+ if (isempty(n))
+ return false;
+
+ if (strlen(n) >= UNIT_NAME_MAX)
+ return false;
+
+ e = strrchr(n, '.');
+ if (!e || e == n)
+ return false;
+
+ if (unit_type_from_string(e + 1) < 0)
+ return false;
+
+ for (i = n, at = NULL; i < e; i++) {
+
+ if (*i == '@' && !at)
+ at = i;
+
+ if (!strchr("@" VALID_CHARS, *i))
+ return false;
+ }
+
+ if (at == n)
+ return false;
+
+ if (flags & UNIT_NAME_PLAIN)
+ if (!at)
+ return true;
+
+ if (flags & UNIT_NAME_INSTANCE)
+ if (at && e > at + 1)
+ return true;
+
+ if (flags & UNIT_NAME_TEMPLATE)
+ if (at && e == at + 1)
+ return true;
+
+ return false;
+}
+
+bool unit_prefix_is_valid(const char *p) {
+
+ /* We don't allow additional @ in the prefix string */
+
+ if (isempty(p))
+ return false;
+
+ return in_charset(p, VALID_CHARS);
+}
+
+bool unit_instance_is_valid(const char *i) {
+
+ /* The max length depends on the length of the string, so we
+ * don't really check this here. */
+
+ if (isempty(i))
+ return false;
+
+ /* We allow additional @ in the instance string, we do not
+ * allow them in the prefix! */
+
+ return in_charset(i, "@" VALID_CHARS);
+}
+
+bool unit_suffix_is_valid(const char *s) {
+ if (isempty(s))
+ return false;
+
+ if (s[0] != '.')
+ return false;
+
+ if (unit_type_from_string(s + 1) < 0)
+ return false;
+
+ return true;
+}
+
+int unit_name_to_prefix(const char *n, char **ret) {
+ const char *p;
+ char *s;
+
+ assert(n);
+ assert(ret);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ p = strchr(n, '@');
+ if (!p)
+ p = strrchr(n, '.');
+
+ assert_se(p);
+
+ s = strndup(n, p - n);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_to_instance(const char *n, char **instance) {
+ const char *p, *d;
+ char *i;
+
+ assert(n);
+ assert(instance);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ /* Everything past the first @ and before the last . is the instance */
+ p = strchr(n, '@');
+ if (!p) {
+ *instance = NULL;
+ return 0;
+ }
+
+ p++;
+
+ d = strrchr(p, '.');
+ if (!d)
+ return -EINVAL;
+
+ i = strndup(p, d-p);
+ if (!i)
+ return -ENOMEM;
+
+ *instance = i;
+ return 1;
+}
+
+int unit_name_to_prefix_and_instance(const char *n, char **ret) {
+ const char *d;
+ char *s;
+
+ assert(n);
+ assert(ret);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ d = strrchr(n, '.');
+ if (!d)
+ return -EINVAL;
+
+ s = strndup(n, d - n);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+UnitType unit_name_to_type(const char *n) {
+ const char *e;
+
+ assert(n);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return _UNIT_TYPE_INVALID;
+
+ assert_se(e = strrchr(n, '.'));
+
+ return unit_type_from_string(e + 1);
+}
+
+int unit_name_change_suffix(const char *n, const char *suffix, char **ret) {
+ char *e, *s;
+ size_t a, b;
+
+ assert(n);
+ assert(suffix);
+ assert(ret);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ if (!unit_suffix_is_valid(suffix))
+ return -EINVAL;
+
+ assert_se(e = strrchr(n, '.'));
+
+ a = e - n;
+ b = strlen(suffix);
+
+ s = new(char, a + b + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(s, n, a), suffix);
+ *ret = s;
+
+ return 0;
+}
+
+int unit_name_build(const char *prefix, const char *instance, const char *suffix, char **ret) {
+ UnitType type;
+
+ assert(prefix);
+ assert(suffix);
+ assert(ret);
+
+ if (suffix[0] != '.')
+ return -EINVAL;
+
+ type = unit_type_from_string(suffix + 1);
+ if (type < 0)
+ return -EINVAL;
+
+ return unit_name_build_from_type(prefix, instance, type, ret);
+}
+
+int unit_name_build_from_type(const char *prefix, const char *instance, UnitType type, char **ret) {
+ const char *ut;
+ char *s;
+
+ assert(prefix);
+ assert(type >= 0);
+ assert(type < _UNIT_TYPE_MAX);
+ assert(ret);
+
+ if (!unit_prefix_is_valid(prefix))
+ return -EINVAL;
+
+ if (instance && !unit_instance_is_valid(instance))
+ return -EINVAL;
+
+ ut = unit_type_to_string(type);
+
+ if (!instance)
+ s = strjoin(prefix, ".", ut);
+ else
+ s = strjoin(prefix, "@", instance, ".", ut);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+static char *do_escape_char(char c, char *t) {
+ assert(t);
+
+ *(t++) = '\\';
+ *(t++) = 'x';
+ *(t++) = hexchar(c >> 4);
+ *(t++) = hexchar(c);
+
+ return t;
+}
+
+static char *do_escape(const char *f, char *t) {
+ assert(f);
+ assert(t);
+
+ /* do not create units with a leading '.', like for "/.dotdir" mount points */
+ if (*f == '.') {
+ t = do_escape_char(*f, t);
+ f++;
+ }
+
+ for (; *f; f++) {
+ if (*f == '/')
+ *(t++) = '-';
+ else if (IN_SET(*f, '-', '\\') || !strchr(VALID_CHARS, *f))
+ t = do_escape_char(*f, t);
+ else
+ *(t++) = *f;
+ }
+
+ return t;
+}
+
+char *unit_name_escape(const char *f) {
+ char *r, *t;
+
+ assert(f);
+
+ r = new(char, strlen(f)*4+1);
+ if (!r)
+ return NULL;
+
+ t = do_escape(f, r);
+ *t = 0;
+
+ return r;
+}
+
+int unit_name_unescape(const char *f, char **ret) {
+ _cleanup_free_ char *r = NULL;
+ char *t;
+
+ assert(f);
+
+ r = strdup(f);
+ if (!r)
+ return -ENOMEM;
+
+ for (t = r; *f; f++) {
+ if (*f == '-')
+ *(t++) = '/';
+ else if (*f == '\\') {
+ int a, b;
+
+ if (f[1] != 'x')
+ return -EINVAL;
+
+ a = unhexchar(f[2]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unhexchar(f[3]);
+ if (b < 0)
+ return -EINVAL;
+
+ *(t++) = (char) (((uint8_t) a << 4U) | (uint8_t) b);
+ f += 3;
+ } else
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ *ret = TAKE_PTR(r);
+
+ return 0;
+}
+
+int unit_name_path_escape(const char *f, char **ret) {
+ char *p, *s;
+
+ assert(f);
+ assert(ret);
+
+ p = strdupa(f);
+ if (!p)
+ return -ENOMEM;
+
+ path_simplify(p, false);
+
+ if (empty_or_root(p))
+ s = strdup("-");
+ else {
+ if (!path_is_normalized(p))
+ return -EINVAL;
+
+ /* Truncate trailing slashes */
+ delete_trailing_chars(p, "/");
+
+ /* Truncate leading slashes */
+ p = skip_leading_chars(p, "/");
+
+ s = unit_name_escape(p);
+ }
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_path_unescape(const char *f, char **ret) {
+ char *s;
+ int r;
+
+ assert(f);
+
+ if (isempty(f))
+ return -EINVAL;
+
+ if (streq(f, "-")) {
+ s = strdup("/");
+ if (!s)
+ return -ENOMEM;
+ } else {
+ char *w;
+
+ r = unit_name_unescape(f, &w);
+ if (r < 0)
+ return r;
+
+ /* Don't accept trailing or leading slashes */
+ if (startswith(w, "/") || endswith(w, "/")) {
+ free(w);
+ return -EINVAL;
+ }
+
+ /* Prefix a slash again */
+ s = strappend("/", w);
+ free(w);
+ if (!s)
+ return -ENOMEM;
+
+ if (!path_is_normalized(s)) {
+ free(s);
+ return -EINVAL;
+ }
+ }
+
+ if (ret)
+ *ret = s;
+ else
+ free(s);
+
+ return 0;
+}
+
+int unit_name_replace_instance(const char *f, const char *i, char **ret) {
+ const char *p, *e;
+ char *s;
+ size_t a, b;
+
+ assert(f);
+ assert(i);
+ assert(ret);
+
+ if (!unit_name_is_valid(f, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+ if (!unit_instance_is_valid(i))
+ return -EINVAL;
+
+ assert_se(p = strchr(f, '@'));
+ assert_se(e = strrchr(f, '.'));
+
+ a = p - f;
+ b = strlen(i);
+
+ s = new(char, a + 1 + b + strlen(e) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(mempcpy(s, f, a + 1), i, b), e);
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_template(const char *f, char **ret) {
+ const char *p, *e;
+ char *s;
+ size_t a;
+
+ assert(f);
+ assert(ret);
+
+ if (!unit_name_is_valid(f, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+
+ assert_se(p = strchr(f, '@'));
+ assert_se(e = strrchr(f, '.'));
+
+ a = p - f;
+
+ s = new(char, a + 1 + strlen(e) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(s, f, a + 1), e);
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_from_path(const char *path, const char *suffix, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ char *s = NULL;
+ int r;
+
+ assert(path);
+ assert(suffix);
+ assert(ret);
+
+ if (!unit_suffix_is_valid(suffix))
+ return -EINVAL;
+
+ r = unit_name_path_escape(path, &p);
+ if (r < 0)
+ return r;
+
+ s = strappend(p, suffix);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_from_path_instance(const char *prefix, const char *path, const char *suffix, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ char *s;
+ int r;
+
+ assert(prefix);
+ assert(path);
+ assert(suffix);
+ assert(ret);
+
+ if (!unit_prefix_is_valid(prefix))
+ return -EINVAL;
+
+ if (!unit_suffix_is_valid(suffix))
+ return -EINVAL;
+
+ r = unit_name_path_escape(path, &p);
+ if (r < 0)
+ return r;
+
+ s = strjoin(prefix, "@", p, suffix);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_to_path(const char *name, char **ret) {
+ _cleanup_free_ char *prefix = NULL;
+ int r;
+
+ assert(name);
+
+ r = unit_name_to_prefix(name, &prefix);
+ if (r < 0)
+ return r;
+
+ return unit_name_path_unescape(prefix, ret);
+}
+
+static bool do_escape_mangle(const char *f, bool allow_globs, char *t) {
+ const char *valid_chars;
+ bool mangled = false;
+
+ assert(f);
+ assert(t);
+
+ /* We'll only escape the obvious characters here, to play safe.
+ *
+ * Returns true if any characters were mangled, false otherwise.
+ */
+
+ valid_chars = allow_globs ? VALID_CHARS_GLOB : VALID_CHARS_WITH_AT;
+
+ for (; *f; f++)
+ if (*f == '/') {
+ *(t++) = '-';
+ mangled = true;
+ } else if (!strchr(valid_chars, *f)) {
+ t = do_escape_char(*f, t);
+ mangled = true;
+ } else
+ *(t++) = *f;
+ *t = 0;
+
+ return mangled;
+}
+
+/**
+ * Convert a string to a unit name. /dev/blah is converted to dev-blah.device,
+ * /blah/blah is converted to blah-blah.mount, anything else is left alone,
+ * except that @suffix is appended if a valid unit suffix is not present.
+ *
+ * If @allow_globs, globs characters are preserved. Otherwise, they are escaped.
+ */
+int unit_name_mangle_with_suffix(const char *name, UnitNameMangle flags, const char *suffix, char **ret) {
+ char *s;
+ int r;
+ bool mangled;
+
+ assert(name);
+ assert(suffix);
+ assert(ret);
+
+ if (isempty(name)) /* We cannot mangle empty unit names to become valid, sorry. */
+ return -EINVAL;
+
+ if (!unit_suffix_is_valid(suffix))
+ return -EINVAL;
+
+ /* Already a fully valid unit name? If so, no mangling is necessary... */
+ if (unit_name_is_valid(name, UNIT_NAME_ANY))
+ goto good;
+
+ /* Already a fully valid globbing expression? If so, no mangling is necessary either... */
+ if ((flags & UNIT_NAME_MANGLE_GLOB) &&
+ string_is_glob(name) &&
+ in_charset(name, VALID_CHARS_GLOB))
+ goto good;
+
+ if (is_device_path(name)) {
+ r = unit_name_from_path(name, ".device", ret);
+ if (r >= 0)
+ return 1;
+ if (r != -EINVAL)
+ return r;
+ }
+
+ if (path_is_absolute(name)) {
+ r = unit_name_from_path(name, ".mount", ret);
+ if (r >= 0)
+ return 1;
+ if (r != -EINVAL)
+ return r;
+ }
+
+ s = new(char, strlen(name) * 4 + strlen(suffix) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ mangled = do_escape_mangle(name, flags & UNIT_NAME_MANGLE_GLOB, s);
+ if (mangled)
+ log_full(flags & UNIT_NAME_MANGLE_WARN ? LOG_NOTICE : LOG_DEBUG,
+ "Invalid unit name \"%s\" was escaped as \"%s\" (maybe you should use systemd-escape?)",
+ name, s);
+
+ /* Append a suffix if it doesn't have any, but only if this is not a glob, so that we can allow "foo.*" as a
+ * valid glob. */
+ if ((!(flags & UNIT_NAME_MANGLE_GLOB) || !string_is_glob(s)) && unit_name_to_type(s) < 0)
+ strcat(s, suffix);
+
+ *ret = s;
+ return 1;
+
+good:
+ s = strdup(name);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int slice_build_parent_slice(const char *slice, char **ret) {
+ char *s, *dash;
+ int r;
+
+ assert(slice);
+ assert(ret);
+
+ if (!slice_name_is_valid(slice))
+ return -EINVAL;
+
+ if (streq(slice, SPECIAL_ROOT_SLICE)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ s = strdup(slice);
+ if (!s)
+ return -ENOMEM;
+
+ dash = strrchr(s, '-');
+ if (dash)
+ strcpy(dash, ".slice");
+ else {
+ r = free_and_strdup(&s, SPECIAL_ROOT_SLICE);
+ if (r < 0) {
+ free(s);
+ return r;
+ }
+ }
+
+ *ret = s;
+ return 1;
+}
+
+int slice_build_subslice(const char *slice, const char *name, char **ret) {
+ char *subslice;
+
+ assert(slice);
+ assert(name);
+ assert(ret);
+
+ if (!slice_name_is_valid(slice))
+ return -EINVAL;
+
+ if (!unit_prefix_is_valid(name))
+ return -EINVAL;
+
+ if (streq(slice, SPECIAL_ROOT_SLICE))
+ subslice = strappend(name, ".slice");
+ else {
+ char *e;
+
+ assert_se(e = endswith(slice, ".slice"));
+
+ subslice = new(char, (e - slice) + 1 + strlen(name) + 6 + 1);
+ if (!subslice)
+ return -ENOMEM;
+
+ stpcpy(stpcpy(stpcpy(mempcpy(subslice, slice, e - slice), "-"), name), ".slice");
+ }
+
+ *ret = subslice;
+ return 0;
+}
+
+bool slice_name_is_valid(const char *name) {
+ const char *p, *e;
+ bool dash = false;
+
+ if (!unit_name_is_valid(name, UNIT_NAME_PLAIN))
+ return false;
+
+ if (streq(name, SPECIAL_ROOT_SLICE))
+ return true;
+
+ e = endswith(name, ".slice");
+ if (!e)
+ return false;
+
+ for (p = name; p < e; p++) {
+
+ if (*p == '-') {
+
+ /* Don't allow initial dash */
+ if (p == name)
+ return false;
+
+ /* Don't allow multiple dashes */
+ if (dash)
+ return false;
+
+ dash = true;
+ } else
+ dash = false;
+ }
+
+ /* Don't allow trailing hash */
+ if (dash)
+ return false;
+
+ return true;
+}
diff --git a/src/basic/unit-name.h b/src/basic/unit-name.h
new file mode 100644
index 0000000..0629db3
--- /dev/null
+++ b/src/basic/unit-name.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+#include "unit-def.h"
+
+#define UNIT_NAME_MAX 256
+
+typedef enum UnitNameFlags {
+ UNIT_NAME_PLAIN = 1 << 0, /* Allow foo.service */
+ UNIT_NAME_INSTANCE = 1 << 1, /* Allow foo@bar.service */
+ UNIT_NAME_TEMPLATE = 1 << 2, /* Allow foo@.service */
+ UNIT_NAME_ANY = UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE,
+} UnitNameFlags;
+
+bool unit_name_is_valid(const char *n, UnitNameFlags flags) _pure_;
+bool unit_prefix_is_valid(const char *p) _pure_;
+bool unit_instance_is_valid(const char *i) _pure_;
+bool unit_suffix_is_valid(const char *s) _pure_;
+
+static inline int unit_prefix_and_instance_is_valid(const char *p) {
+ /* For prefix+instance and instance the same rules apply */
+ return unit_instance_is_valid(p);
+}
+
+int unit_name_to_prefix(const char *n, char **prefix);
+int unit_name_to_instance(const char *n, char **instance);
+int unit_name_to_prefix_and_instance(const char *n, char **ret);
+
+UnitType unit_name_to_type(const char *n) _pure_;
+
+int unit_name_change_suffix(const char *n, const char *suffix, char **ret);
+
+int unit_name_build(const char *prefix, const char *instance, const char *suffix, char **ret);
+int unit_name_build_from_type(const char *prefix, const char *instance, UnitType, char **ret);
+
+char *unit_name_escape(const char *f);
+int unit_name_unescape(const char *f, char **ret);
+int unit_name_path_escape(const char *f, char **ret);
+int unit_name_path_unescape(const char *f, char **ret);
+
+int unit_name_replace_instance(const char *f, const char *i, char **ret);
+
+int unit_name_template(const char *f, char **ret);
+
+int unit_name_from_path(const char *path, const char *suffix, char **ret);
+int unit_name_from_path_instance(const char *prefix, const char *path, const char *suffix, char **ret);
+int unit_name_to_path(const char *name, char **ret);
+
+typedef enum UnitNameMangle {
+ UNIT_NAME_MANGLE_GLOB = 1 << 0,
+ UNIT_NAME_MANGLE_WARN = 1 << 1,
+} UnitNameMangle;
+
+int unit_name_mangle_with_suffix(const char *name, UnitNameMangle flags, const char *suffix, char **ret);
+
+static inline int unit_name_mangle(const char *name, UnitNameMangle flags, char **ret) {
+ return unit_name_mangle_with_suffix(name, flags, ".service", ret);
+}
+
+int slice_build_parent_slice(const char *slice, char **ret);
+int slice_build_subslice(const char *slice, const char *name, char **subslice);
+bool slice_name_is_valid(const char *name);
diff --git a/src/basic/user-util.c b/src/basic/user-util.c
new file mode 100644
index 0000000..260f3d2
--- /dev/null
+++ b/src/basic/user-util.c
@@ -0,0 +1,857 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <utmp.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "utf8.h"
+
+bool uid_is_valid(uid_t uid) {
+
+ /* Also see POSIX IEEE Std 1003.1-2008, 2016 Edition, 3.436. */
+
+ /* Some libc APIs use UID_INVALID as special placeholder */
+ if (uid == (uid_t) UINT32_C(0xFFFFFFFF))
+ return false;
+
+ /* A long time ago UIDs where 16bit, hence explicitly avoid the 16bit -1 too */
+ if (uid == (uid_t) UINT32_C(0xFFFF))
+ return false;
+
+ return true;
+}
+
+int parse_uid(const char *s, uid_t *ret) {
+ uint32_t uid = 0;
+ int r;
+
+ assert(s);
+
+ assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+ r = safe_atou32(s, &uid);
+ if (r < 0)
+ return r;
+
+ if (!uid_is_valid(uid))
+ return -ENXIO; /* we return ENXIO instead of EINVAL
+ * here, to make it easy to distuingish
+ * invalid numeric uids from invalid
+ * strings. */
+
+ if (ret)
+ *ret = uid;
+
+ return 0;
+}
+
+char* getlogname_malloc(void) {
+ uid_t uid;
+ struct stat st;
+
+ if (isatty(STDIN_FILENO) && fstat(STDIN_FILENO, &st) >= 0)
+ uid = st.st_uid;
+ else
+ uid = getuid();
+
+ return uid_to_name(uid);
+}
+
+char *getusername_malloc(void) {
+ const char *e;
+
+ e = getenv("USER");
+ if (e)
+ return strdup(e);
+
+ return uid_to_name(getuid());
+}
+
+static bool is_nologin_shell(const char *shell) {
+
+ return PATH_IN_SET(shell,
+ /* 'nologin' is the friendliest way to disable logins for a user account. It prints a nice
+ * message and exits. Different distributions place the binary at different places though,
+ * hence let's list them all. */
+ "/bin/nologin",
+ "/sbin/nologin",
+ "/usr/bin/nologin",
+ "/usr/sbin/nologin",
+ /* 'true' and 'false' work too for the same purpose, but are less friendly as they don't do
+ * any message printing. Different distributions place the binary at various places but at
+ * least not in the 'sbin' directory. */
+ "/bin/false",
+ "/usr/bin/false",
+ "/bin/true",
+ "/usr/bin/true");
+}
+
+static int synthesize_user_creds(
+ const char **username,
+ uid_t *uid, gid_t *gid,
+ const char **home,
+ const char **shell,
+ UserCredsFlags flags) {
+
+ /* We enforce some special rules for uid=0 and uid=65534: in order to avoid NSS lookups for root we hardcode
+ * their user record data. */
+
+ if (STR_IN_SET(*username, "root", "0")) {
+ *username = "root";
+
+ if (uid)
+ *uid = 0;
+ if (gid)
+ *gid = 0;
+
+ if (home)
+ *home = "/root";
+
+ if (shell)
+ *shell = "/bin/sh";
+
+ return 0;
+ }
+
+ if (synthesize_nobody() &&
+ STR_IN_SET(*username, NOBODY_USER_NAME, "65534")) {
+ *username = NOBODY_USER_NAME;
+
+ if (uid)
+ *uid = UID_NOBODY;
+ if (gid)
+ *gid = GID_NOBODY;
+
+ if (home)
+ *home = FLAGS_SET(flags, USER_CREDS_CLEAN) ? NULL : "/";
+
+ if (shell)
+ *shell = FLAGS_SET(flags, USER_CREDS_CLEAN) ? NULL : "/sbin/nologin";
+
+ return 0;
+ }
+
+ return -ENOMEDIUM;
+}
+
+int get_user_creds(
+ const char **username,
+ uid_t *uid, gid_t *gid,
+ const char **home,
+ const char **shell,
+ UserCredsFlags flags) {
+
+ uid_t u = UID_INVALID;
+ struct passwd *p;
+ int r;
+
+ assert(username);
+ assert(*username);
+
+ if (!FLAGS_SET(flags, USER_CREDS_PREFER_NSS) ||
+ (!home && !shell)) {
+
+ /* So here's the deal: normally, we'll try to synthesize all records we can synthesize, and override
+ * the user database with that. However, if the user specifies USER_CREDS_PREFER_NSS then the
+ * user database will override the synthetic records instead — except if the user is only interested in
+ * the UID and/or GID (but not the home directory, or the shell), in which case we'll always override
+ * the user database (i.e. the USER_CREDS_PREFER_NSS flag has no effect in this case). Why?
+ * Simply because there are valid usecase where the user might change the home directory or the shell
+ * of the relevant users, but changing the UID/GID mappings for them is something we explicitly don't
+ * support. */
+
+ r = synthesize_user_creds(username, uid, gid, home, shell, flags);
+ if (r >= 0)
+ return 0;
+ if (r != -ENOMEDIUM) /* not a username we can synthesize */
+ return r;
+ }
+
+ if (parse_uid(*username, &u) >= 0) {
+ errno = 0;
+ p = getpwuid(u);
+
+ /* If there are multiple users with the same id, make sure to leave $USER to the configured value
+ * instead of the first occurrence in the database. However if the uid was configured by a numeric uid,
+ * then let's pick the real username from /etc/passwd. */
+ if (p)
+ *username = p->pw_name;
+ else if (FLAGS_SET(flags, USER_CREDS_ALLOW_MISSING) && !gid && !home && !shell) {
+
+ /* If the specified user is a numeric UID and it isn't in the user database, and the caller
+ * passed USER_CREDS_ALLOW_MISSING and was only interested in the UID, then juts return that
+ * and don't complain. */
+
+ if (uid)
+ *uid = u;
+
+ return 0;
+ }
+ } else {
+ errno = 0;
+ p = getpwnam(*username);
+ }
+ if (!p) {
+ r = errno > 0 ? -errno : -ESRCH;
+
+ /* If the user requested that we only synthesize as fallback, do so now */
+ if (FLAGS_SET(flags, USER_CREDS_PREFER_NSS)) {
+ if (synthesize_user_creds(username, uid, gid, home, shell, flags) >= 0)
+ return 0;
+ }
+
+ return r;
+ }
+
+ if (uid) {
+ if (!uid_is_valid(p->pw_uid))
+ return -EBADMSG;
+
+ *uid = p->pw_uid;
+ }
+
+ if (gid) {
+ if (!gid_is_valid(p->pw_gid))
+ return -EBADMSG;
+
+ *gid = p->pw_gid;
+ }
+
+ if (home) {
+ if (FLAGS_SET(flags, USER_CREDS_CLEAN) && empty_or_root(p->pw_dir))
+ *home = NULL;
+ else
+ *home = p->pw_dir;
+ }
+
+ if (shell) {
+ if (FLAGS_SET(flags, USER_CREDS_CLEAN) && (isempty(p->pw_shell) || is_nologin_shell(p->pw_shell)))
+ *shell = NULL;
+ else
+ *shell = p->pw_shell;
+ }
+
+ return 0;
+}
+
+int get_group_creds(const char **groupname, gid_t *gid, UserCredsFlags flags) {
+ struct group *g;
+ gid_t id;
+
+ assert(groupname);
+
+ /* We enforce some special rules for gid=0: in order to avoid NSS lookups for root we hardcode its data. */
+
+ if (STR_IN_SET(*groupname, "root", "0")) {
+ *groupname = "root";
+
+ if (gid)
+ *gid = 0;
+
+ return 0;
+ }
+
+ if (synthesize_nobody() &&
+ STR_IN_SET(*groupname, NOBODY_GROUP_NAME, "65534")) {
+ *groupname = NOBODY_GROUP_NAME;
+
+ if (gid)
+ *gid = GID_NOBODY;
+
+ return 0;
+ }
+
+ if (parse_gid(*groupname, &id) >= 0) {
+ errno = 0;
+ g = getgrgid(id);
+
+ if (g)
+ *groupname = g->gr_name;
+ else if (FLAGS_SET(flags, USER_CREDS_ALLOW_MISSING)) {
+ if (gid)
+ *gid = id;
+
+ return 0;
+ }
+ } else {
+ errno = 0;
+ g = getgrnam(*groupname);
+ }
+
+ if (!g)
+ return errno > 0 ? -errno : -ESRCH;
+
+ if (gid) {
+ if (!gid_is_valid(g->gr_gid))
+ return -EBADMSG;
+
+ *gid = g->gr_gid;
+ }
+
+ return 0;
+}
+
+char* uid_to_name(uid_t uid) {
+ char *ret;
+ int r;
+
+ /* Shortcut things to avoid NSS lookups */
+ if (uid == 0)
+ return strdup("root");
+ if (synthesize_nobody() &&
+ uid == UID_NOBODY)
+ return strdup(NOBODY_USER_NAME);
+
+ if (uid_is_valid(uid)) {
+ long bufsize;
+
+ bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
+ if (bufsize <= 0)
+ bufsize = 4096;
+
+ for (;;) {
+ struct passwd pwbuf, *pw = NULL;
+ _cleanup_free_ char *buf = NULL;
+
+ buf = malloc(bufsize);
+ if (!buf)
+ return NULL;
+
+ r = getpwuid_r(uid, &pwbuf, buf, (size_t) bufsize, &pw);
+ if (r == 0 && pw)
+ return strdup(pw->pw_name);
+ if (r != ERANGE)
+ break;
+
+ bufsize *= 2;
+ }
+ }
+
+ if (asprintf(&ret, UID_FMT, uid) < 0)
+ return NULL;
+
+ return ret;
+}
+
+char* gid_to_name(gid_t gid) {
+ char *ret;
+ int r;
+
+ if (gid == 0)
+ return strdup("root");
+ if (synthesize_nobody() &&
+ gid == GID_NOBODY)
+ return strdup(NOBODY_GROUP_NAME);
+
+ if (gid_is_valid(gid)) {
+ long bufsize;
+
+ bufsize = sysconf(_SC_GETGR_R_SIZE_MAX);
+ if (bufsize <= 0)
+ bufsize = 4096;
+
+ for (;;) {
+ struct group grbuf, *gr = NULL;
+ _cleanup_free_ char *buf = NULL;
+
+ buf = malloc(bufsize);
+ if (!buf)
+ return NULL;
+
+ r = getgrgid_r(gid, &grbuf, buf, (size_t) bufsize, &gr);
+ if (r == 0 && gr)
+ return strdup(gr->gr_name);
+ if (r != ERANGE)
+ break;
+
+ bufsize *= 2;
+ }
+ }
+
+ if (asprintf(&ret, GID_FMT, gid) < 0)
+ return NULL;
+
+ return ret;
+}
+
+int in_gid(gid_t gid) {
+ long ngroups_max;
+ gid_t *gids;
+ int r, i;
+
+ if (getgid() == gid)
+ return 1;
+
+ if (getegid() == gid)
+ return 1;
+
+ if (!gid_is_valid(gid))
+ return -EINVAL;
+
+ ngroups_max = sysconf(_SC_NGROUPS_MAX);
+ assert(ngroups_max > 0);
+
+ gids = newa(gid_t, ngroups_max);
+
+ r = getgroups(ngroups_max, gids);
+ if (r < 0)
+ return -errno;
+
+ for (i = 0; i < r; i++)
+ if (gids[i] == gid)
+ return 1;
+
+ return 0;
+}
+
+int in_group(const char *name) {
+ int r;
+ gid_t gid;
+
+ r = get_group_creds(&name, &gid, 0);
+ if (r < 0)
+ return r;
+
+ return in_gid(gid);
+}
+
+int get_home_dir(char **_h) {
+ struct passwd *p;
+ const char *e;
+ char *h;
+ uid_t u;
+
+ assert(_h);
+
+ /* Take the user specified one */
+ e = secure_getenv("HOME");
+ if (e && path_is_absolute(e)) {
+ h = strdup(e);
+ if (!h)
+ return -ENOMEM;
+
+ *_h = h;
+ return 0;
+ }
+
+ /* Hardcode home directory for root and nobody to avoid NSS */
+ u = getuid();
+ if (u == 0) {
+ h = strdup("/root");
+ if (!h)
+ return -ENOMEM;
+
+ *_h = h;
+ return 0;
+ }
+ if (synthesize_nobody() &&
+ u == UID_NOBODY) {
+ h = strdup("/");
+ if (!h)
+ return -ENOMEM;
+
+ *_h = h;
+ return 0;
+ }
+
+ /* Check the database... */
+ errno = 0;
+ p = getpwuid(u);
+ if (!p)
+ return errno > 0 ? -errno : -ESRCH;
+
+ if (!path_is_absolute(p->pw_dir))
+ return -EINVAL;
+
+ h = strdup(p->pw_dir);
+ if (!h)
+ return -ENOMEM;
+
+ *_h = h;
+ return 0;
+}
+
+int get_shell(char **_s) {
+ struct passwd *p;
+ const char *e;
+ char *s;
+ uid_t u;
+
+ assert(_s);
+
+ /* Take the user specified one */
+ e = getenv("SHELL");
+ if (e) {
+ s = strdup(e);
+ if (!s)
+ return -ENOMEM;
+
+ *_s = s;
+ return 0;
+ }
+
+ /* Hardcode shell for root and nobody to avoid NSS */
+ u = getuid();
+ if (u == 0) {
+ s = strdup("/bin/sh");
+ if (!s)
+ return -ENOMEM;
+
+ *_s = s;
+ return 0;
+ }
+ if (synthesize_nobody() &&
+ u == UID_NOBODY) {
+ s = strdup("/sbin/nologin");
+ if (!s)
+ return -ENOMEM;
+
+ *_s = s;
+ return 0;
+ }
+
+ /* Check the database... */
+ errno = 0;
+ p = getpwuid(u);
+ if (!p)
+ return errno > 0 ? -errno : -ESRCH;
+
+ if (!path_is_absolute(p->pw_shell))
+ return -EINVAL;
+
+ s = strdup(p->pw_shell);
+ if (!s)
+ return -ENOMEM;
+
+ *_s = s;
+ return 0;
+}
+
+int reset_uid_gid(void) {
+ int r;
+
+ r = maybe_setgroups(0, NULL);
+ if (r < 0)
+ return r;
+
+ if (setresgid(0, 0, 0) < 0)
+ return -errno;
+
+ if (setresuid(0, 0, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int take_etc_passwd_lock(const char *root) {
+
+ struct flock flock = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = 0,
+ .l_len = 0,
+ };
+
+ const char *path;
+ int fd, r;
+
+ /* This is roughly the same as lckpwdf(), but not as awful. We
+ * don't want to use alarm() and signals, hence we implement
+ * our own trivial version of this.
+ *
+ * Note that shadow-utils also takes per-database locks in
+ * addition to lckpwdf(). However, we don't given that they
+ * are redundant as they invoke lckpwdf() first and keep
+ * it during everything they do. The per-database locks are
+ * awfully racy, and thus we just won't do them. */
+
+ if (root)
+ path = prefix_roota(root, ETC_PASSWD_LOCK_PATH);
+ else
+ path = ETC_PASSWD_LOCK_PATH;
+
+ fd = open(path, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0600);
+ if (fd < 0)
+ return log_debug_errno(errno, "Cannot open %s: %m", path);
+
+ r = fcntl(fd, F_SETLKW, &flock);
+ if (r < 0) {
+ safe_close(fd);
+ return log_debug_errno(errno, "Locking %s failed: %m", path);
+ }
+
+ return fd;
+}
+
+bool valid_user_group_name(const char *u) {
+ const char *i;
+ long sz;
+
+ /* Checks if the specified name is a valid user/group name. Also see POSIX IEEE Std 1003.1-2008, 2016 Edition,
+ * 3.437. We are a bit stricter here however. Specifically we deviate from POSIX rules:
+ *
+ * - We don't allow any dots (this would break chown syntax which permits dots as user/group name separator)
+ * - We require that names fit into the appropriate utmp field
+ * - We don't allow empty user names
+ *
+ * Note that other systems are even more restrictive, and don't permit underscores or uppercase characters.
+ */
+
+ if (isempty(u))
+ return false;
+
+ if (!(u[0] >= 'a' && u[0] <= 'z') &&
+ !(u[0] >= 'A' && u[0] <= 'Z') &&
+ u[0] != '_')
+ return false;
+
+ for (i = u+1; *i; i++) {
+ if (!(*i >= 'a' && *i <= 'z') &&
+ !(*i >= 'A' && *i <= 'Z') &&
+ !(*i >= '0' && *i <= '9') &&
+ !IN_SET(*i, '_', '-'))
+ return false;
+ }
+
+ sz = sysconf(_SC_LOGIN_NAME_MAX);
+ assert_se(sz > 0);
+
+ if ((size_t) (i-u) > (size_t) sz)
+ return false;
+
+ if ((size_t) (i-u) > UT_NAMESIZE - 1)
+ return false;
+
+ return true;
+}
+
+bool valid_user_group_name_or_id(const char *u) {
+
+ /* Similar as above, but is also fine with numeric UID/GID specifications, as long as they are in the right
+ * range, and not the invalid user ids. */
+
+ if (isempty(u))
+ return false;
+
+ if (valid_user_group_name(u))
+ return true;
+
+ return parse_uid(u, NULL) >= 0;
+}
+
+bool valid_gecos(const char *d) {
+
+ if (!d)
+ return false;
+
+ if (!utf8_is_valid(d))
+ return false;
+
+ if (string_has_cc(d, NULL))
+ return false;
+
+ /* Colons are used as field separators, and hence not OK */
+ if (strchr(d, ':'))
+ return false;
+
+ return true;
+}
+
+bool valid_home(const char *p) {
+ /* Note that this function is also called by valid_shell(), any
+ * changes must account for that. */
+
+ if (isempty(p))
+ return false;
+
+ if (!utf8_is_valid(p))
+ return false;
+
+ if (string_has_cc(p, NULL))
+ return false;
+
+ if (!path_is_absolute(p))
+ return false;
+
+ if (!path_is_normalized(p))
+ return false;
+
+ /* Colons are used as field separators, and hence not OK */
+ if (strchr(p, ':'))
+ return false;
+
+ return true;
+}
+
+int maybe_setgroups(size_t size, const gid_t *list) {
+ int r;
+
+ /* Check if setgroups is allowed before we try to drop all the auxiliary groups */
+ if (size == 0) { /* Dropping all aux groups? */
+ _cleanup_free_ char *setgroups_content = NULL;
+ bool can_setgroups;
+
+ r = read_one_line_file("/proc/self/setgroups", &setgroups_content);
+ if (r == -ENOENT)
+ /* Old kernels don't have /proc/self/setgroups, so assume we can use setgroups */
+ can_setgroups = true;
+ else if (r < 0)
+ return r;
+ else
+ can_setgroups = streq(setgroups_content, "allow");
+
+ if (!can_setgroups) {
+ log_debug("Skipping setgroups(), /proc/self/setgroups is set to 'deny'");
+ return 0;
+ }
+ }
+
+ if (setgroups(size, list) < 0)
+ return -errno;
+
+ return 0;
+}
+
+bool synthesize_nobody(void) {
+ /* Returns true when we shall synthesize the "nobody" user (which we do by default). This can be turned off by
+ * touching /etc/systemd/dont-synthesize-nobody in order to provide upgrade compatibility with legacy systems
+ * that used the "nobody" user name and group name for other UIDs/GIDs than 65534.
+ *
+ * Note that we do not employ any kind of synchronization on the following caching variable. If the variable is
+ * accessed in multi-threaded programs in the worst case it might happen that we initialize twice, but that
+ * shouldn't matter as each initialization should come to the same result. */
+ static int cache = -1;
+
+ if (cache < 0)
+ cache = access("/etc/systemd/dont-synthesize-nobody", F_OK) < 0;
+
+ return cache;
+}
+
+int putpwent_sane(const struct passwd *pw, FILE *stream) {
+ assert(pw);
+ assert(stream);
+
+ errno = 0;
+ if (putpwent(pw, stream) != 0)
+ return errno > 0 ? -errno : -EIO;
+
+ return 0;
+}
+
+int putspent_sane(const struct spwd *sp, FILE *stream) {
+ assert(sp);
+ assert(stream);
+
+ errno = 0;
+ if (putspent(sp, stream) != 0)
+ return errno > 0 ? -errno : -EIO;
+
+ return 0;
+}
+
+int putgrent_sane(const struct group *gr, FILE *stream) {
+ assert(gr);
+ assert(stream);
+
+ errno = 0;
+ if (putgrent(gr, stream) != 0)
+ return errno > 0 ? -errno : -EIO;
+
+ return 0;
+}
+
+#if ENABLE_GSHADOW
+int putsgent_sane(const struct sgrp *sg, FILE *stream) {
+ assert(sg);
+ assert(stream);
+
+ errno = 0;
+ if (putsgent(sg, stream) != 0)
+ return errno > 0 ? -errno : -EIO;
+
+ return 0;
+}
+#endif
+
+int fgetpwent_sane(FILE *stream, struct passwd **pw) {
+ struct passwd *p;
+
+ assert(pw);
+ assert(stream);
+
+ errno = 0;
+ p = fgetpwent(stream);
+ if (!p && errno != ENOENT)
+ return errno > 0 ? -errno : -EIO;
+
+ *pw = p;
+ return !!p;
+}
+
+int fgetspent_sane(FILE *stream, struct spwd **sp) {
+ struct spwd *s;
+
+ assert(sp);
+ assert(stream);
+
+ errno = 0;
+ s = fgetspent(stream);
+ if (!s && errno != ENOENT)
+ return errno > 0 ? -errno : -EIO;
+
+ *sp = s;
+ return !!s;
+}
+
+int fgetgrent_sane(FILE *stream, struct group **gr) {
+ struct group *g;
+
+ assert(gr);
+ assert(stream);
+
+ errno = 0;
+ g = fgetgrent(stream);
+ if (!g && errno != ENOENT)
+ return errno > 0 ? -errno : -EIO;
+
+ *gr = g;
+ return !!g;
+}
+
+#if ENABLE_GSHADOW
+int fgetsgent_sane(FILE *stream, struct sgrp **sg) {
+ struct sgrp *s;
+
+ assert(sg);
+ assert(stream);
+
+ errno = 0;
+ s = fgetsgent(stream);
+ if (!s && errno != ENOENT)
+ return errno > 0 ? -errno : -EIO;
+
+ *sg = s;
+ return !!s;
+}
+#endif
diff --git a/src/basic/user-util.h b/src/basic/user-util.h
new file mode 100644
index 0000000..cc899ee
--- /dev/null
+++ b/src/basic/user-util.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <grp.h>
+#if ENABLE_GSHADOW
+#include <gshadow.h>
+#endif
+#include <pwd.h>
+#include <shadow.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+bool uid_is_valid(uid_t uid);
+
+static inline bool gid_is_valid(gid_t gid) {
+ return uid_is_valid((uid_t) gid);
+}
+
+int parse_uid(const char *s, uid_t* ret_uid);
+
+static inline int parse_gid(const char *s, gid_t *ret_gid) {
+ return parse_uid(s, (uid_t*) ret_gid);
+}
+
+char* getlogname_malloc(void);
+char* getusername_malloc(void);
+
+typedef enum UserCredsFlags {
+ USER_CREDS_PREFER_NSS = 1 << 0, /* if set, only synthesize user records if database lacks them. Normally we bypass the userdb entirely for the records we can synthesize */
+ USER_CREDS_ALLOW_MISSING = 1 << 1, /* if a numeric UID string is resolved, be OK if there's no record for it */
+ USER_CREDS_CLEAN = 1 << 2, /* try to clean up shell and home fields with invalid data */
+} UserCredsFlags;
+
+int get_user_creds(const char **username, uid_t *uid, gid_t *gid, const char **home, const char **shell, UserCredsFlags flags);
+int get_group_creds(const char **groupname, gid_t *gid, UserCredsFlags flags);
+
+char* uid_to_name(uid_t uid);
+char* gid_to_name(gid_t gid);
+
+int in_gid(gid_t gid);
+int in_group(const char *name);
+
+int get_home_dir(char **ret);
+int get_shell(char **_ret);
+
+int reset_uid_gid(void);
+
+int take_etc_passwd_lock(const char *root);
+
+#define UID_INVALID ((uid_t) -1)
+#define GID_INVALID ((gid_t) -1)
+
+#define UID_NOBODY ((uid_t) 65534U)
+#define GID_NOBODY ((gid_t) 65534U)
+
+#define ETC_PASSWD_LOCK_PATH "/etc/.pwd.lock"
+
+static inline bool uid_is_dynamic(uid_t uid) {
+ return DYNAMIC_UID_MIN <= uid && uid <= DYNAMIC_UID_MAX;
+}
+
+static inline bool gid_is_dynamic(gid_t gid) {
+ return uid_is_dynamic((uid_t) gid);
+}
+
+static inline bool uid_is_system(uid_t uid) {
+ return uid <= SYSTEM_UID_MAX;
+}
+
+static inline bool gid_is_system(gid_t gid) {
+ return gid <= SYSTEM_GID_MAX;
+}
+
+/* The following macros add 1 when converting things, since UID 0 is a valid UID, while the pointer
+ * NULL is special */
+#define PTR_TO_UID(p) ((uid_t) (((uintptr_t) (p))-1))
+#define UID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1))
+
+#define PTR_TO_GID(p) ((gid_t) (((uintptr_t) (p))-1))
+#define GID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1))
+
+static inline bool userns_supported(void) {
+ return access("/proc/self/uid_map", F_OK) >= 0;
+}
+
+bool valid_user_group_name(const char *u);
+bool valid_user_group_name_or_id(const char *u);
+bool valid_gecos(const char *d);
+bool valid_home(const char *p);
+
+static inline bool valid_shell(const char *p) {
+ /* We have the same requirements, so just piggy-back on the home check.
+ *
+ * Let's ignore /etc/shells because this is only applicable to real and
+ * not system users. It is also incompatible with the idea of empty /etc.
+ */
+ return valid_home(p);
+}
+
+int maybe_setgroups(size_t size, const gid_t *list);
+
+bool synthesize_nobody(void);
+
+int fgetpwent_sane(FILE *stream, struct passwd **pw);
+int fgetspent_sane(FILE *stream, struct spwd **sp);
+int fgetgrent_sane(FILE *stream, struct group **gr);
+int putpwent_sane(const struct passwd *pw, FILE *stream);
+int putspent_sane(const struct spwd *sp, FILE *stream);
+int putgrent_sane(const struct group *gr, FILE *stream);
+#if ENABLE_GSHADOW
+int fgetsgent_sane(FILE *stream, struct sgrp **sg);
+int putsgent_sane(const struct sgrp *sg, FILE *stream);
+#endif
diff --git a/src/basic/utf8.c b/src/basic/utf8.c
new file mode 100644
index 0000000..e0d1949
--- /dev/null
+++ b/src/basic/utf8.c
@@ -0,0 +1,532 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+/* Parts of this file are based on the GLIB utf8 validation functions. The
+ * original license text follows. */
+
+/* gutf8.c - Operations on UTF-8 strings.
+ *
+ * Copyright (C) 1999 Tom Tromey
+ * Copyright (C) 2000 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "gunicode.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "utf8.h"
+
+bool unichar_is_valid(char32_t ch) {
+
+ if (ch >= 0x110000) /* End of unicode space */
+ return false;
+ if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
+ return false;
+ if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */
+ return false;
+ if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
+ return false;
+
+ return true;
+}
+
+static bool unichar_is_control(char32_t ch) {
+
+ /*
+ 0 to ' '-1 is the C0 range.
+ DEL=0x7F, and DEL+1 to 0x9F is C1 range.
+ '\t' is in C0 range, but more or less harmless and commonly used.
+ */
+
+ return (ch < ' ' && !IN_SET(ch, '\t', '\n')) ||
+ (0x7F <= ch && ch <= 0x9F);
+}
+
+/* count of characters used to encode one unicode char */
+static size_t utf8_encoded_expected_len(const char *str) {
+ uint8_t c;
+
+ assert(str);
+
+ c = (uint8_t) str[0];
+ if (c < 0x80)
+ return 1;
+ if ((c & 0xe0) == 0xc0)
+ return 2;
+ if ((c & 0xf0) == 0xe0)
+ return 3;
+ if ((c & 0xf8) == 0xf0)
+ return 4;
+ if ((c & 0xfc) == 0xf8)
+ return 5;
+ if ((c & 0xfe) == 0xfc)
+ return 6;
+
+ return 0;
+}
+
+/* decode one unicode char */
+int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar) {
+ char32_t unichar;
+ size_t len, i;
+
+ assert(str);
+
+ len = utf8_encoded_expected_len(str);
+
+ switch (len) {
+ case 1:
+ *ret_unichar = (char32_t)str[0];
+ return 0;
+ case 2:
+ unichar = str[0] & 0x1f;
+ break;
+ case 3:
+ unichar = (char32_t)str[0] & 0x0f;
+ break;
+ case 4:
+ unichar = (char32_t)str[0] & 0x07;
+ break;
+ case 5:
+ unichar = (char32_t)str[0] & 0x03;
+ break;
+ case 6:
+ unichar = (char32_t)str[0] & 0x01;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 1; i < len; i++) {
+ if (((char32_t)str[i] & 0xc0) != 0x80)
+ return -EINVAL;
+
+ unichar <<= 6;
+ unichar |= (char32_t)str[i] & 0x3f;
+ }
+
+ *ret_unichar = unichar;
+
+ return 0;
+}
+
+bool utf8_is_printable_newline(const char* str, size_t length, bool newline) {
+ const char *p;
+
+ assert(str);
+
+ for (p = str; length;) {
+ int encoded_len, r;
+ char32_t val;
+
+ encoded_len = utf8_encoded_valid_unichar(p);
+ if (encoded_len < 0 ||
+ (size_t) encoded_len > length)
+ return false;
+
+ r = utf8_encoded_to_unichar(p, &val);
+ if (r < 0 ||
+ unichar_is_control(val) ||
+ (!newline && val == '\n'))
+ return false;
+
+ length -= encoded_len;
+ p += encoded_len;
+ }
+
+ return true;
+}
+
+char *utf8_is_valid(const char *str) {
+ const char *p;
+
+ assert(str);
+
+ p = str;
+ while (*p) {
+ int len;
+
+ len = utf8_encoded_valid_unichar(p);
+ if (len < 0)
+ return NULL;
+
+ p += len;
+ }
+
+ return (char*) str;
+}
+
+char *utf8_escape_invalid(const char *str) {
+ char *p, *s;
+
+ assert(str);
+
+ p = s = malloc(strlen(str) * 4 + 1);
+ if (!p)
+ return NULL;
+
+ while (*str) {
+ int len;
+
+ len = utf8_encoded_valid_unichar(str);
+ if (len > 0) {
+ s = mempcpy(s, str, len);
+ str += len;
+ } else {
+ s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER);
+ str += 1;
+ }
+ }
+
+ *s = '\0';
+
+ return p;
+}
+
+char *utf8_escape_non_printable(const char *str) {
+ char *p, *s;
+
+ assert(str);
+
+ p = s = malloc(strlen(str) * 4 + 1);
+ if (!p)
+ return NULL;
+
+ while (*str) {
+ int len;
+
+ len = utf8_encoded_valid_unichar(str);
+ if (len > 0) {
+ if (utf8_is_printable(str, len)) {
+ s = mempcpy(s, str, len);
+ str += len;
+ } else {
+ while (len > 0) {
+ *(s++) = '\\';
+ *(s++) = 'x';
+ *(s++) = hexchar((int) *str >> 4);
+ *(s++) = hexchar((int) *str);
+
+ str += 1;
+ len--;
+ }
+ }
+ } else {
+ s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER);
+ str += 1;
+ }
+ }
+
+ *s = '\0';
+
+ return p;
+}
+
+char *ascii_is_valid(const char *str) {
+ const char *p;
+
+ /* Check whether the string consists of valid ASCII bytes,
+ * i.e values between 0 and 127, inclusive. */
+
+ assert(str);
+
+ for (p = str; *p; p++)
+ if ((unsigned char) *p >= 128)
+ return NULL;
+
+ return (char*) str;
+}
+
+char *ascii_is_valid_n(const char *str, size_t len) {
+ size_t i;
+
+ /* Very similar to ascii_is_valid(), but checks exactly len
+ * bytes and rejects any NULs in that range. */
+
+ assert(str);
+
+ for (i = 0; i < len; i++)
+ if ((unsigned char) str[i] >= 128 || str[i] == 0)
+ return NULL;
+
+ return (char*) str;
+}
+
+/**
+ * utf8_encode_unichar() - Encode single UCS-4 character as UTF-8
+ * @out_utf8: output buffer of at least 4 bytes or NULL
+ * @g: UCS-4 character to encode
+ *
+ * This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8.
+ * The length of the character is returned. It is not zero-terminated! If the
+ * output buffer is NULL, only the length is returned.
+ *
+ * Returns: The length in bytes that the UTF-8 representation does or would
+ * occupy.
+ */
+size_t utf8_encode_unichar(char *out_utf8, char32_t g) {
+
+ if (g < (1 << 7)) {
+ if (out_utf8)
+ out_utf8[0] = g & 0x7f;
+ return 1;
+ } else if (g < (1 << 11)) {
+ if (out_utf8) {
+ out_utf8[0] = 0xc0 | ((g >> 6) & 0x1f);
+ out_utf8[1] = 0x80 | (g & 0x3f);
+ }
+ return 2;
+ } else if (g < (1 << 16)) {
+ if (out_utf8) {
+ out_utf8[0] = 0xe0 | ((g >> 12) & 0x0f);
+ out_utf8[1] = 0x80 | ((g >> 6) & 0x3f);
+ out_utf8[2] = 0x80 | (g & 0x3f);
+ }
+ return 3;
+ } else if (g < (1 << 21)) {
+ if (out_utf8) {
+ out_utf8[0] = 0xf0 | ((g >> 18) & 0x07);
+ out_utf8[1] = 0x80 | ((g >> 12) & 0x3f);
+ out_utf8[2] = 0x80 | ((g >> 6) & 0x3f);
+ out_utf8[3] = 0x80 | (g & 0x3f);
+ }
+ return 4;
+ }
+
+ return 0;
+}
+
+char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */) {
+ const uint8_t *f;
+ char *r, *t;
+
+ assert(s);
+
+ /* Input length is in bytes, i.e. the shortest possible character takes 2 bytes. Each unicode character may
+ * take up to 4 bytes in UTF-8. Let's also account for a trailing NUL byte. */
+ if (length * 2 < length)
+ return NULL; /* overflow */
+
+ r = new(char, length * 2 + 1);
+ if (!r)
+ return NULL;
+
+ f = (const uint8_t*) s;
+ t = r;
+
+ while (f + 1 < (const uint8_t*) s + length) {
+ char16_t w1, w2;
+
+ /* see RFC 2781 section 2.2 */
+
+ w1 = f[1] << 8 | f[0];
+ f += 2;
+
+ if (!utf16_is_surrogate(w1)) {
+ t += utf8_encode_unichar(t, w1);
+ continue;
+ }
+
+ if (utf16_is_trailing_surrogate(w1))
+ continue; /* spurious trailing surrogate, ignore */
+
+ if (f + 1 >= (const uint8_t*) s + length)
+ break;
+
+ w2 = f[1] << 8 | f[0];
+ f += 2;
+
+ if (!utf16_is_trailing_surrogate(w2)) {
+ f -= 2;
+ continue; /* surrogate missing its trailing surrogate, ignore */
+ }
+
+ t += utf8_encode_unichar(t, utf16_surrogate_pair_to_unichar(w1, w2));
+ }
+
+ *t = 0;
+ return r;
+}
+
+size_t utf16_encode_unichar(char16_t *out, char32_t c) {
+
+ /* Note that this encodes as little-endian. */
+
+ switch (c) {
+
+ case 0 ... 0xd7ffU:
+ case 0xe000U ... 0xffffU:
+ out[0] = htole16(c);
+ return 1;
+
+ case 0x10000U ... 0x10ffffU:
+ c -= 0x10000U;
+ out[0] = htole16((c >> 10) + 0xd800U);
+ out[1] = htole16((c & 0x3ffU) + 0xdc00U);
+ return 2;
+
+ default: /* A surrogate (invalid) */
+ return 0;
+ }
+}
+
+char16_t *utf8_to_utf16(const char *s, size_t length) {
+ char16_t *n, *p;
+ size_t i;
+ int r;
+
+ assert(s);
+
+ n = new(char16_t, length + 1);
+ if (!n)
+ return NULL;
+
+ p = n;
+
+ for (i = 0; i < length;) {
+ char32_t unichar;
+ size_t e;
+
+ e = utf8_encoded_expected_len(s + i);
+ if (e <= 1) /* Invalid and single byte characters are copied as they are */
+ goto copy;
+
+ if (i + e > length) /* sequence longer than input buffer, then copy as-is */
+ goto copy;
+
+ r = utf8_encoded_to_unichar(s + i, &unichar);
+ if (r < 0) /* sequence invalid, then copy as-is */
+ goto copy;
+
+ p += utf16_encode_unichar(p, unichar);
+ i += e;
+ continue;
+
+ copy:
+ *(p++) = htole16(s[i++]);
+ }
+
+ *p = 0;
+ return n;
+}
+
+size_t char16_strlen(const char16_t *s) {
+ size_t n = 0;
+
+ assert(s);
+
+ while (*s != 0)
+ n++, s++;
+
+ return n;
+}
+
+/* expected size used to encode one unicode char */
+static int utf8_unichar_to_encoded_len(char32_t unichar) {
+
+ if (unichar < 0x80)
+ return 1;
+ if (unichar < 0x800)
+ return 2;
+ if (unichar < 0x10000)
+ return 3;
+ if (unichar < 0x200000)
+ return 4;
+ if (unichar < 0x4000000)
+ return 5;
+
+ return 6;
+}
+
+/* validate one encoded unicode char and return its length */
+int utf8_encoded_valid_unichar(const char *str) {
+ char32_t unichar;
+ size_t len, i;
+ int r;
+
+ assert(str);
+
+ len = utf8_encoded_expected_len(str);
+ if (len == 0)
+ return -EINVAL;
+
+ /* ascii is valid */
+ if (len == 1)
+ return 1;
+
+ /* check if expected encoded chars are available */
+ for (i = 0; i < len; i++)
+ if ((str[i] & 0x80) != 0x80)
+ return -EINVAL;
+
+ r = utf8_encoded_to_unichar(str, &unichar);
+ if (r < 0)
+ return r;
+
+ /* check if encoded length matches encoded value */
+ if (utf8_unichar_to_encoded_len(unichar) != (int) len)
+ return -EINVAL;
+
+ /* check if value has valid range */
+ if (!unichar_is_valid(unichar))
+ return -EINVAL;
+
+ return (int) len;
+}
+
+size_t utf8_n_codepoints(const char *str) {
+ size_t n = 0;
+
+ /* Returns the number of UTF-8 codepoints in this string, or (size_t) -1 if the string is not valid UTF-8. */
+
+ while (*str != 0) {
+ int k;
+
+ k = utf8_encoded_valid_unichar(str);
+ if (k < 0)
+ return (size_t) -1;
+
+ str += k;
+ n++;
+ }
+
+ return n;
+}
+
+size_t utf8_console_width(const char *str) {
+ size_t n = 0;
+
+ /* Returns the approximate width a string will take on screen when printed on a character cell
+ * terminal/console. */
+
+ while (*str != 0) {
+ char32_t c;
+
+ if (utf8_encoded_to_unichar(str, &c) < 0)
+ return (size_t) -1;
+
+ str = utf8_next_char(str);
+
+ n += unichar_iswide(c) ? 2 : 1;
+ }
+
+ return n;
+}
diff --git a/src/basic/utf8.h b/src/basic/utf8.h
new file mode 100644
index 0000000..6284569
--- /dev/null
+++ b/src/basic/utf8.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <uchar.h>
+
+#include "macro.h"
+#include "missing_type.h"
+
+#define UTF8_REPLACEMENT_CHARACTER "\xef\xbf\xbd"
+#define UTF8_BYTE_ORDER_MARK "\xef\xbb\xbf"
+
+bool unichar_is_valid(char32_t c);
+
+char *utf8_is_valid(const char *s) _pure_;
+char *ascii_is_valid(const char *s) _pure_;
+char *ascii_is_valid_n(const char *str, size_t len);
+
+bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pure_;
+#define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true)
+
+char *utf8_escape_invalid(const char *s);
+char *utf8_escape_non_printable(const char *str);
+
+size_t utf8_encode_unichar(char *out_utf8, char32_t g);
+size_t utf16_encode_unichar(char16_t *out, char32_t c);
+
+char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */);
+char16_t *utf8_to_utf16(const char *s, size_t length);
+
+size_t char16_strlen(const char16_t *s); /* returns the number of 16bit words in the string (not bytes!) */
+
+int utf8_encoded_valid_unichar(const char *str);
+int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar);
+
+static inline bool utf16_is_surrogate(char16_t c) {
+ return c >= 0xd800U && c <= 0xdfffU;
+}
+
+static inline bool utf16_is_trailing_surrogate(char16_t c) {
+ return c >= 0xdc00U && c <= 0xdfffU;
+}
+
+static inline char32_t utf16_surrogate_pair_to_unichar(char16_t lead, char16_t trail) {
+ return ((((char32_t) lead - 0xd800U) << 10) + ((char32_t) trail - 0xdc00U) + 0x10000U);
+}
+
+size_t utf8_n_codepoints(const char *str);
+size_t utf8_console_width(const char *str);
diff --git a/src/basic/util.c b/src/basic/util.c
new file mode 100644
index 0000000..e577c93
--- /dev/null
+++ b/src/basic/util.c
@@ -0,0 +1,637 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/statfs.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "build.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "device-nodes.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "set.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+int saved_argc = 0;
+char **saved_argv = NULL;
+static int saved_in_initrd = -1;
+
+size_t page_size(void) {
+ static thread_local size_t pgsz = 0;
+ long r;
+
+ if (_likely_(pgsz > 0))
+ return pgsz;
+
+ r = sysconf(_SC_PAGESIZE);
+ assert(r > 0);
+
+ pgsz = (size_t) r;
+ return pgsz;
+}
+
+bool plymouth_running(void) {
+ return access("/run/plymouth/pid", F_OK) >= 0;
+}
+
+bool display_is_local(const char *display) {
+ assert(display);
+
+ return
+ display[0] == ':' &&
+ display[1] >= '0' &&
+ display[1] <= '9';
+}
+
+bool kexec_loaded(void) {
+ _cleanup_free_ char *s = NULL;
+
+ if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
+ return false;
+
+ return s[0] == '1';
+}
+
+int prot_from_flags(int flags) {
+
+ switch (flags & O_ACCMODE) {
+
+ case O_RDONLY:
+ return PROT_READ;
+
+ case O_WRONLY:
+ return PROT_WRITE;
+
+ case O_RDWR:
+ return PROT_READ|PROT_WRITE;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+bool in_initrd(void) {
+ struct statfs s;
+ int r;
+
+ if (saved_in_initrd >= 0)
+ return saved_in_initrd;
+
+ /* We make two checks here:
+ *
+ * 1. the flag file /etc/initrd-release must exist
+ * 2. the root file system must be a memory file system
+ *
+ * The second check is extra paranoia, since misdetecting an
+ * initrd can have bad consequences due the initrd
+ * emptying when transititioning to the main systemd.
+ */
+
+ r = getenv_bool_secure("SYSTEMD_IN_INITRD");
+ if (r < 0 && r != -ENXIO)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m");
+
+ if (r >= 0)
+ saved_in_initrd = r > 0;
+ else
+ saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
+ statfs("/", &s) >= 0 &&
+ is_temporary_fs(&s);
+
+ return saved_in_initrd;
+}
+
+void in_initrd_force(bool value) {
+ saved_in_initrd = value;
+}
+
+/* hey glibc, APIs with callbacks without a user pointer are so useless */
+void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
+ __compar_d_fn_t compar, void *arg) {
+ size_t l, u, idx;
+ const void *p;
+ int comparison;
+
+ assert(!size_multiply_overflow(nmemb, size));
+
+ l = 0;
+ u = nmemb;
+ while (l < u) {
+ idx = (l + u) / 2;
+ p = (const uint8_t*) base + idx * size;
+ comparison = compar(key, p, arg);
+ if (comparison < 0)
+ u = idx;
+ else if (comparison > 0)
+ l = idx + 1;
+ else
+ return (void *)p;
+ }
+ return NULL;
+}
+
+bool memeqzero(const void *data, size_t length) {
+ /* Does the buffer consist entirely of NULs?
+ * Copied from https://github.com/systemd/casync/, copied in turn from
+ * https://github.com/rustyrussell/ccan/blob/master/ccan/mem/mem.c#L92,
+ * which is licensed CC-0.
+ */
+
+ const uint8_t *p = data;
+ size_t i;
+
+ /* Check first 16 bytes manually */
+ for (i = 0; i < 16; i++, length--) {
+ if (length == 0)
+ return true;
+ if (p[i])
+ return false;
+ }
+
+ /* Now we know first 16 bytes are NUL, memcmp with self. */
+ return memcmp(data, p + i, length) == 0;
+}
+
+int on_ac_power(void) {
+ bool found_offline = false, found_online = false;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir("/sys/class/power_supply");
+ if (!d)
+ return errno == ENOENT ? true : -errno;
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_close_ int fd = -1, device = -1;
+ char contents[6];
+ ssize_t n;
+
+ device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (device < 0) {
+ if (IN_SET(errno, ENOENT, ENOTDIR))
+ continue;
+
+ return -errno;
+ }
+
+ fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ n = read(fd, contents, sizeof(contents));
+ if (n < 0)
+ return -errno;
+
+ if (n != 6 || memcmp(contents, "Mains\n", 6))
+ continue;
+
+ safe_close(fd);
+ fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ n = read(fd, contents, sizeof(contents));
+ if (n < 0)
+ return -errno;
+
+ if (n != 2 || contents[1] != '\n')
+ return -EIO;
+
+ if (contents[0] == '1') {
+ found_online = true;
+ break;
+ } else if (contents[0] == '0')
+ found_offline = true;
+ else
+ return -EIO;
+ }
+
+ return found_online || !found_offline;
+}
+
+int container_get_leader(const char *machine, pid_t *pid) {
+ _cleanup_free_ char *s = NULL, *class = NULL;
+ const char *p;
+ pid_t leader;
+ int r;
+
+ assert(machine);
+ assert(pid);
+
+ if (streq(machine, ".host")) {
+ *pid = 1;
+ return 0;
+ }
+
+ if (!machine_name_is_valid(machine))
+ return -EINVAL;
+
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p,
+ "LEADER", &s,
+ "CLASS", &class);
+ if (r == -ENOENT)
+ return -EHOSTDOWN;
+ if (r < 0)
+ return r;
+ if (!s)
+ return -EIO;
+
+ if (!streq_ptr(class, "container"))
+ return -EIO;
+
+ r = parse_pid(s, &leader);
+ if (r < 0)
+ return r;
+ if (leader <= 1)
+ return -EIO;
+
+ *pid = leader;
+ return 0;
+}
+
+int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
+ int rfd = -1;
+
+ assert(pid >= 0);
+
+ if (mntns_fd) {
+ const char *mntns;
+
+ mntns = procfs_file_alloca(pid, "ns/mnt");
+ mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (mntnsfd < 0)
+ return -errno;
+ }
+
+ if (pidns_fd) {
+ const char *pidns;
+
+ pidns = procfs_file_alloca(pid, "ns/pid");
+ pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (pidnsfd < 0)
+ return -errno;
+ }
+
+ if (netns_fd) {
+ const char *netns;
+
+ netns = procfs_file_alloca(pid, "ns/net");
+ netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (netnsfd < 0)
+ return -errno;
+ }
+
+ if (userns_fd) {
+ const char *userns;
+
+ userns = procfs_file_alloca(pid, "ns/user");
+ usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (usernsfd < 0 && errno != ENOENT)
+ return -errno;
+ }
+
+ if (root_fd) {
+ const char *root;
+
+ root = procfs_file_alloca(pid, "root");
+ rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (rfd < 0)
+ return -errno;
+ }
+
+ if (pidns_fd)
+ *pidns_fd = pidnsfd;
+
+ if (mntns_fd)
+ *mntns_fd = mntnsfd;
+
+ if (netns_fd)
+ *netns_fd = netnsfd;
+
+ if (userns_fd)
+ *userns_fd = usernsfd;
+
+ if (root_fd)
+ *root_fd = rfd;
+
+ pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
+
+ return 0;
+}
+
+int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
+ if (userns_fd >= 0) {
+ /* Can't setns to your own userns, since then you could
+ * escalate from non-root to root in your own namespace, so
+ * check if namespaces equal before attempting to enter. */
+ _cleanup_free_ char *userns_fd_path = NULL;
+ int r;
+ if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
+ return -ENOMEM;
+
+ r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
+ if (r < 0)
+ return r;
+ if (r)
+ userns_fd = -1;
+ }
+
+ if (pidns_fd >= 0)
+ if (setns(pidns_fd, CLONE_NEWPID) < 0)
+ return -errno;
+
+ if (mntns_fd >= 0)
+ if (setns(mntns_fd, CLONE_NEWNS) < 0)
+ return -errno;
+
+ if (netns_fd >= 0)
+ if (setns(netns_fd, CLONE_NEWNET) < 0)
+ return -errno;
+
+ if (userns_fd >= 0)
+ if (setns(userns_fd, CLONE_NEWUSER) < 0)
+ return -errno;
+
+ if (root_fd >= 0) {
+ if (fchdir(root_fd) < 0)
+ return -errno;
+
+ if (chroot(".") < 0)
+ return -errno;
+ }
+
+ return reset_uid_gid();
+}
+
+uint64_t physical_memory(void) {
+ _cleanup_free_ char *root = NULL, *value = NULL;
+ uint64_t mem, lim;
+ size_t ps;
+ long sc;
+ int r;
+
+ /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
+ * memory.
+ *
+ * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
+ * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
+
+ sc = sysconf(_SC_PHYS_PAGES);
+ assert(sc > 0);
+
+ ps = page_size();
+ mem = (uint64_t) sc * (uint64_t) ps;
+
+ r = cg_get_root_path(&root);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+
+ r = cg_all_unified();
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+ if (r > 0) {
+ r = cg_get_attribute("memory", root, "memory.max", &value);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+
+ if (streq(value, "max"))
+ return mem;
+ } else {
+ r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+ }
+
+ r = safe_atou64(value, &lim);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value);
+ return mem;
+ }
+ if (lim == UINT64_MAX)
+ return mem;
+
+ /* Make sure the limit is a multiple of our own page size */
+ lim /= ps;
+ lim *= ps;
+
+ return MIN(mem, lim);
+}
+
+uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
+ uint64_t p, m, ps, r;
+
+ assert(max > 0);
+
+ /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
+ * the result is a multiple of the page size (rounds down). */
+
+ ps = page_size();
+ assert(ps > 0);
+
+ p = physical_memory() / ps;
+ assert(p > 0);
+
+ m = p * v;
+ if (m / p != v)
+ return UINT64_MAX;
+
+ m /= max;
+
+ r = m * ps;
+ if (r / ps != m)
+ return UINT64_MAX;
+
+ return r;
+}
+
+uint64_t system_tasks_max(void) {
+
+ uint64_t a = TASKS_MAX, b = TASKS_MAX;
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
+ * limit:
+ *
+ * a) the maximum tasks value the kernel allows on this architecture
+ * b) the cgroups pids_max attribute for the system
+ * c) the kernel's configured maximum PID value
+ *
+ * And then pick the smallest of the three */
+
+ r = procfs_tasks_get_limit(&a);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
+
+ r = cg_get_root_path(&root);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m");
+ else {
+ _cleanup_free_ char *value = NULL;
+
+ r = cg_get_attribute("pids", root, "pids.max", &value);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
+ else if (!streq(value, "max")) {
+ r = safe_atou64(value, &b);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
+ }
+ }
+
+ return MIN3(TASKS_MAX,
+ a <= 0 ? TASKS_MAX : a,
+ b <= 0 ? TASKS_MAX : b);
+}
+
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
+ uint64_t t, m;
+
+ assert(max > 0);
+
+ /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
+ * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
+
+ t = system_tasks_max();
+ assert(t > 0);
+
+ m = t * v;
+ if (m / t != v) /* overflow? */
+ return UINT64_MAX;
+
+ return m / max;
+}
+
+int version(void) {
+ puts("systemd " STRINGIFY(PROJECT_VERSION) " (" GIT_VERSION ")\n"
+ SYSTEMD_FEATURES);
+ return 0;
+}
+
+/* This is a direct translation of str_verscmp from boot.c */
+static bool is_digit(int c) {
+ return c >= '0' && c <= '9';
+}
+
+static int c_order(int c) {
+ if (c == 0 || is_digit(c))
+ return 0;
+
+ if ((c >= 'a') && (c <= 'z'))
+ return c;
+
+ return c + 0x10000;
+}
+
+int str_verscmp(const char *s1, const char *s2) {
+ const char *os1, *os2;
+
+ assert(s1);
+ assert(s2);
+
+ os1 = s1;
+ os2 = s2;
+
+ while (*s1 || *s2) {
+ int first;
+
+ while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
+ int order;
+
+ order = c_order(*s1) - c_order(*s2);
+ if (order != 0)
+ return order;
+ s1++;
+ s2++;
+ }
+
+ while (*s1 == '0')
+ s1++;
+ while (*s2 == '0')
+ s2++;
+
+ first = 0;
+ while (is_digit(*s1) && is_digit(*s2)) {
+ if (first == 0)
+ first = *s1 - *s2;
+ s1++;
+ s2++;
+ }
+
+ if (is_digit(*s1))
+ return 1;
+ if (is_digit(*s2))
+ return -1;
+
+ if (first != 0)
+ return first;
+ }
+
+ return strcmp(os1, os2);
+}
+
+/* Turn off core dumps but only if we're running outside of a container. */
+void disable_coredumps(void) {
+ int r;
+
+ if (detect_container() > 0)
+ return;
+
+ r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");
+}
diff --git a/src/basic/util.h b/src/basic/util.h
new file mode 100644
index 0000000..dc33d66
--- /dev/null
+++ b/src/basic/util.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "format-util.h"
+#include "macro.h"
+#include "time-util.h"
+
+size_t page_size(void) _pure_;
+#define PAGE_ALIGN(l) ALIGN_TO((l), page_size())
+
+static inline const char* yes_no(bool b) {
+ return b ? "yes" : "no";
+}
+
+static inline const char* true_false(bool b) {
+ return b ? "true" : "false";
+}
+
+static inline const char* one_zero(bool b) {
+ return b ? "1" : "0";
+}
+
+static inline const char* enable_disable(bool b) {
+ return b ? "enable" : "disable";
+}
+
+bool plymouth_running(void);
+
+bool display_is_local(const char *display) _pure_;
+
+#define NULSTR_FOREACH(i, l) \
+ for ((i) = (l); (i) && *(i); (i) = strchr((i), 0)+1)
+
+#define NULSTR_FOREACH_PAIR(i, j, l) \
+ for ((i) = (l), (j) = strchr((i), 0)+1; (i) && *(i); (i) = strchr((j), 0)+1, (j) = *(i) ? strchr((i), 0)+1 : (i))
+
+extern int saved_argc;
+extern char **saved_argv;
+
+bool kexec_loaded(void);
+
+int prot_from_flags(int flags) _const_;
+
+bool in_initrd(void);
+void in_initrd_force(bool value);
+
+void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
+ __compar_d_fn_t compar, void *arg);
+
+#define typesafe_bsearch_r(k, b, n, func, userdata) \
+ ({ \
+ const typeof(b[0]) *_k = k; \
+ int (*_func_)(const typeof(b[0])*, const typeof(b[0])*, typeof(userdata)) = func; \
+ xbsearch_r((const void*) _k, (b), (n), sizeof((b)[0]), (__compar_d_fn_t) _func_, userdata); \
+ })
+
+/**
+ * Normal bsearch requires base to be nonnull. Here were require
+ * that only if nmemb > 0.
+ */
+static inline void* bsearch_safe(const void *key, const void *base,
+ size_t nmemb, size_t size, __compar_fn_t compar) {
+ if (nmemb <= 0)
+ return NULL;
+
+ assert(base);
+ return bsearch(key, base, nmemb, size, compar);
+}
+
+#define typesafe_bsearch(k, b, n, func) \
+ ({ \
+ const typeof(b[0]) *_k = k; \
+ int (*_func_)(const typeof(b[0])*, const typeof(b[0])*) = func; \
+ bsearch_safe((const void*) _k, (b), (n), sizeof((b)[0]), (__compar_fn_t) _func_); \
+ })
+
+/**
+ * Normal qsort requires base to be nonnull. Here were require
+ * that only if nmemb > 0.
+ */
+static inline void qsort_safe(void *base, size_t nmemb, size_t size, __compar_fn_t compar) {
+ if (nmemb <= 1)
+ return;
+
+ assert(base);
+ qsort(base, nmemb, size, compar);
+}
+
+/* A wrapper around the above, but that adds typesafety: the element size is automatically derived from the type and so
+ * is the prototype for the comparison function */
+#define typesafe_qsort(p, n, func) \
+ ({ \
+ int (*_func_)(const typeof(p[0])*, const typeof(p[0])*) = func; \
+ qsort_safe((p), (n), sizeof((p)[0]), (__compar_fn_t) _func_); \
+ })
+
+static inline void qsort_r_safe(void *base, size_t nmemb, size_t size, __compar_d_fn_t compar, void *userdata) {
+ if (nmemb <= 1)
+ return;
+
+ assert(base);
+ qsort_r(base, nmemb, size, compar, userdata);
+}
+
+#define typesafe_qsort_r(p, n, func, userdata) \
+ ({ \
+ int (*_func_)(const typeof(p[0])*, const typeof(p[0])*, typeof(userdata)) = func; \
+ qsort_r_safe((p), (n), sizeof((p)[0]), (__compar_d_fn_t) _func_, userdata); \
+ })
+
+/* Normal memcpy requires src to be nonnull. We do nothing if n is 0. */
+static inline void memcpy_safe(void *dst, const void *src, size_t n) {
+ if (n == 0)
+ return;
+ assert(src);
+ memcpy(dst, src, n);
+}
+
+/* Normal memcmp requires s1 and s2 to be nonnull. We do nothing if n is 0. */
+static inline int memcmp_safe(const void *s1, const void *s2, size_t n) {
+ if (n == 0)
+ return 0;
+ assert(s1);
+ assert(s2);
+ return memcmp(s1, s2, n);
+}
+
+/* Compare s1 (length n1) with s2 (length n2) in lexicographic order. */
+static inline int memcmp_nn(const void *s1, size_t n1, const void *s2, size_t n2) {
+ return memcmp_safe(s1, s2, MIN(n1, n2))
+ ?: CMP(n1, n2);
+}
+
+int on_ac_power(void);
+
+#define memzero(x,l) \
+ ({ \
+ size_t _l_ = (l); \
+ void *_x_ = (x); \
+ _l_ == 0 ? _x_ : memset(_x_, 0, _l_); \
+ })
+
+#define zero(x) (memzero(&(x), sizeof(x)))
+
+bool memeqzero(const void *data, size_t length);
+
+#define eqzero(x) memeqzero(x, sizeof(x))
+
+static inline void *mempset(void *s, int c, size_t n) {
+ memset(s, c, n);
+ return (uint8_t*)s + n;
+}
+
+static inline void _reset_errno_(int *saved_errno) {
+ if (*saved_errno < 0) /* Invalidated by UNPROTECT_ERRNO? */
+ return;
+
+ errno = *saved_errno;
+}
+
+#define PROTECT_ERRNO \
+ _cleanup_(_reset_errno_) _unused_ int _saved_errno_ = errno
+
+#define UNPROTECT_ERRNO \
+ do { \
+ errno = _saved_errno_; \
+ _saved_errno_ = -1; \
+ } while (false)
+
+static inline int negative_errno(void) {
+ /* This helper should be used to shut up gcc if you know 'errno' is
+ * negative. Instead of "return -errno;", use "return negative_errno();"
+ * It will suppress bogus gcc warnings in case it assumes 'errno' might
+ * be 0 and thus the caller's error-handling might not be triggered. */
+ assert_return(errno > 0, -EINVAL);
+ return -errno;
+}
+
+static inline unsigned u64log2(uint64_t n) {
+#if __SIZEOF_LONG_LONG__ == 8
+ return (n > 1) ? (unsigned) __builtin_clzll(n) ^ 63U : 0;
+#else
+#error "Wut?"
+#endif
+}
+
+static inline unsigned u32ctz(uint32_t n) {
+#if __SIZEOF_INT__ == 4
+ return n != 0 ? __builtin_ctz(n) : 32;
+#else
+#error "Wut?"
+#endif
+}
+
+static inline unsigned log2i(int x) {
+ assert(x > 0);
+
+ return __SIZEOF_INT__ * 8 - __builtin_clz(x) - 1;
+}
+
+static inline unsigned log2u(unsigned x) {
+ assert(x > 0);
+
+ return sizeof(unsigned) * 8 - __builtin_clz(x) - 1;
+}
+
+static inline unsigned log2u_round_up(unsigned x) {
+ assert(x > 0);
+
+ if (x == 1)
+ return 0;
+
+ return log2u(x - 1) + 1;
+}
+
+int container_get_leader(const char *machine, pid_t *pid);
+
+int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd);
+int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd);
+
+uint64_t physical_memory(void);
+uint64_t physical_memory_scale(uint64_t v, uint64_t max);
+
+uint64_t system_tasks_max(void);
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max);
+
+int version(void);
+
+int str_verscmp(const char *s1, const char *s2);
+
+void disable_coredumps(void);
diff --git a/src/basic/virt.c b/src/basic/virt.c
new file mode 100644
index 0000000..f63f15f
--- /dev/null
+++ b/src/basic/virt.c
@@ -0,0 +1,642 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+#include <cpuid.h>
+#endif
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "virt.h"
+
+static int detect_vm_cpuid(void) {
+
+ /* CPUID is an x86 specific interface. */
+#if defined(__i386__) || defined(__x86_64__)
+
+ static const struct {
+ const char *cpuid;
+ int id;
+ } cpuid_vendor_table[] = {
+ { "XenVMMXenVMM", VIRTUALIZATION_XEN },
+ { "KVMKVMKVM", VIRTUALIZATION_KVM },
+ { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU },
+ /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
+ { "VMwareVMware", VIRTUALIZATION_VMWARE },
+ /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
+ { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
+ /* https://wiki.freebsd.org/bhyve */
+ { "bhyve bhyve ", VIRTUALIZATION_BHYVE },
+ { "QNXQVMBSQG", VIRTUALIZATION_QNX },
+ };
+
+ uint32_t eax, ebx, ecx, edx;
+ bool hypervisor;
+
+ /* http://lwn.net/Articles/301888/ */
+
+ /* First detect whether there is a hypervisor */
+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
+ return VIRTUALIZATION_NONE;
+
+ hypervisor = ecx & 0x80000000U;
+
+ if (hypervisor) {
+ union {
+ uint32_t sig32[3];
+ char text[13];
+ } sig = {};
+ unsigned j;
+
+ /* There is a hypervisor, see what it is */
+ __cpuid(0x40000000U, eax, ebx, ecx, edx);
+
+ sig.sig32[0] = ebx;
+ sig.sig32[1] = ecx;
+ sig.sig32[2] = edx;
+
+ log_debug("Virtualization found, CPUID=%s", sig.text);
+
+ for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
+ if (streq(sig.text, cpuid_vendor_table[j].cpuid))
+ return cpuid_vendor_table[j].id;
+
+ return VIRTUALIZATION_VM_OTHER;
+ }
+#endif
+ log_debug("No virtualization found in CPUID");
+
+ return VIRTUALIZATION_NONE;
+}
+
+static int detect_vm_device_tree(void) {
+#if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
+ _cleanup_free_ char *hvtype = NULL;
+ int r;
+
+ r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
+ if (r == -ENOENT) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+
+ dir = opendir("/proc/device-tree");
+ if (!dir) {
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "/proc/device-tree: %m");
+ return VIRTUALIZATION_NONE;
+ }
+ return -errno;
+ }
+
+ FOREACH_DIRENT(dent, dir, return -errno)
+ if (strstr(dent->d_name, "fw-cfg")) {
+ log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
+ return VIRTUALIZATION_QEMU;
+ }
+
+ log_debug("No virtualization found in /proc/device-tree/*");
+ return VIRTUALIZATION_NONE;
+ } else if (r < 0)
+ return r;
+
+ log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
+ if (streq(hvtype, "linux,kvm"))
+ return VIRTUALIZATION_KVM;
+ else if (strstr(hvtype, "xen"))
+ return VIRTUALIZATION_XEN;
+ else
+ return VIRTUALIZATION_VM_OTHER;
+#else
+ log_debug("This platform does not support /proc/device-tree");
+ return VIRTUALIZATION_NONE;
+#endif
+}
+
+static int detect_vm_dmi(void) {
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+
+ static const char *const dmi_vendors[] = {
+ "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
+ "/sys/class/dmi/id/sys_vendor",
+ "/sys/class/dmi/id/board_vendor",
+ "/sys/class/dmi/id/bios_vendor"
+ };
+
+ static const struct {
+ const char *vendor;
+ int id;
+ } dmi_vendor_table[] = {
+ { "KVM", VIRTUALIZATION_KVM },
+ { "QEMU", VIRTUALIZATION_QEMU },
+ /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
+ { "VMware", VIRTUALIZATION_VMWARE },
+ { "VMW", VIRTUALIZATION_VMWARE },
+ { "innotek GmbH", VIRTUALIZATION_ORACLE },
+ { "Xen", VIRTUALIZATION_XEN },
+ { "Bochs", VIRTUALIZATION_BOCHS },
+ { "Parallels", VIRTUALIZATION_PARALLELS },
+ /* https://wiki.freebsd.org/bhyve */
+ { "BHYVE", VIRTUALIZATION_BHYVE },
+ };
+ unsigned i;
+ int r;
+
+ for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
+ _cleanup_free_ char *s = NULL;
+ unsigned j;
+
+ r = read_one_line_file(dmi_vendors[i], &s);
+ if (r < 0) {
+ if (r == -ENOENT)
+ continue;
+
+ return r;
+ }
+
+ for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
+ if (startswith(s, dmi_vendor_table[j].vendor)) {
+ log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
+ return dmi_vendor_table[j].id;
+ }
+ }
+#endif
+
+ log_debug("No virtualization found in DMI");
+
+ return VIRTUALIZATION_NONE;
+}
+
+static int detect_vm_xen(void) {
+
+ /* Check for Dom0 will be executed later in detect_vm_xen_dom0
+ The presence of /proc/xen indicates some form of a Xen domain */
+ if (access("/proc/xen", F_OK) < 0) {
+ log_debug("Virtualization XEN not found, /proc/xen does not exist");
+ return VIRTUALIZATION_NONE;
+ }
+
+ log_debug("Virtualization XEN found (/proc/xen exists)");
+ return VIRTUALIZATION_XEN;
+}
+
+#define XENFEAT_dom0 11 /* xen/include/public/features.h */
+#define PATH_FEATURES "/sys/hypervisor/properties/features"
+/* Returns -errno, or 0 for domU, or 1 for dom0 */
+static int detect_vm_xen_dom0(void) {
+ _cleanup_free_ char *domcap = NULL;
+ char *cap, *i;
+ int r;
+
+ r = read_one_line_file(PATH_FEATURES, &domcap);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ if (r == 0) {
+ unsigned long features;
+
+ /* Here, we need to use sscanf() instead of safe_atoul()
+ * as the string lacks the leading "0x". */
+ r = sscanf(domcap, "%lx", &features);
+ if (r == 1) {
+ r = !!(features & (1U << XENFEAT_dom0));
+ log_debug("Virtualization XEN, found %s with value %08lx, "
+ "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
+ PATH_FEATURES, features, r ? "" : " not");
+ return r;
+ }
+ log_debug("Virtualization XEN, found %s, unhandled content '%s'",
+ PATH_FEATURES, domcap);
+ }
+
+ r = read_one_line_file("/proc/xen/capabilities", &domcap);
+ if (r == -ENOENT) {
+ log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ i = domcap;
+ while ((cap = strsep(&i, ",")))
+ if (streq(cap, "control_d"))
+ break;
+ if (!cap) {
+ log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
+ return 0;
+ }
+
+ log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
+ return 1;
+}
+
+static int detect_vm_hypervisor(void) {
+ _cleanup_free_ char *hvtype = NULL;
+ int r;
+
+ r = read_one_line_file("/sys/hypervisor/type", &hvtype);
+ if (r == -ENOENT)
+ return VIRTUALIZATION_NONE;
+ if (r < 0)
+ return r;
+
+ log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
+
+ if (streq(hvtype, "xen"))
+ return VIRTUALIZATION_XEN;
+ else
+ return VIRTUALIZATION_VM_OTHER;
+}
+
+static int detect_vm_uml(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ /* Detect User-Mode Linux by reading /proc/cpuinfo */
+ f = fopen("/proc/cpuinfo", "re");
+ if (!f) {
+ if (errno == ENOENT) {
+ log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
+ return VIRTUALIZATION_NONE;
+ }
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *t;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ t = startswith(line, "vendor_id\t: ");
+ if (t) {
+ if (startswith(t, "User Mode Linux")) {
+ log_debug("UML virtualization found in /proc/cpuinfo");
+ return VIRTUALIZATION_UML;
+ }
+
+ break;
+ }
+ }
+
+ log_debug("UML virtualization not found in /proc/cpuinfo.");
+ return VIRTUALIZATION_NONE;
+}
+
+static int detect_vm_zvm(void) {
+
+#if defined(__s390__)
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
+ if (r == -ENOENT)
+ return VIRTUALIZATION_NONE;
+ if (r < 0)
+ return r;
+
+ log_debug("Virtualization %s found in /proc/sysinfo", t);
+ if (streq(t, "z/VM"))
+ return VIRTUALIZATION_ZVM;
+ else
+ return VIRTUALIZATION_KVM;
+#else
+ log_debug("This platform does not support /proc/sysinfo");
+ return VIRTUALIZATION_NONE;
+#endif
+}
+
+/* Returns a short identifier for the various VM implementations */
+int detect_vm(void) {
+ static thread_local int cached_found = _VIRTUALIZATION_INVALID;
+ bool other = false;
+ int r, dmi;
+
+ if (cached_found >= 0)
+ return cached_found;
+
+ /* We have to use the correct order here:
+ *
+ * → First, try to detect Oracle Virtualbox, even if it uses KVM, as well as Xen even if it cloaks as Microsoft
+ * Hyper-V.
+ *
+ * → Second, try to detect from CPUID, this will report KVM for whatever software is used even if info in DMI is
+ * overwritten.
+ *
+ * → Third, try to detect from DMI. */
+
+ dmi = detect_vm_dmi();
+ if (IN_SET(dmi, VIRTUALIZATION_ORACLE, VIRTUALIZATION_XEN)) {
+ r = dmi;
+ goto finish;
+ }
+
+ r = detect_vm_cpuid();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ /* Now, let's get back to DMI */
+ if (dmi < 0)
+ return dmi;
+ if (dmi == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (dmi != VIRTUALIZATION_NONE) {
+ r = dmi;
+ goto finish;
+ }
+
+ /* x86 xen will most likely be detected by cpuid. If not (most likely
+ * because we're not an x86 guest), then we should try the /proc/xen
+ * directory next. If that's not found, then we check for the high-level
+ * hypervisor sysfs file.
+ */
+
+ r = detect_vm_xen();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ r = detect_vm_hypervisor();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ r = detect_vm_device_tree();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ r = detect_vm_uml();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ r = detect_vm_zvm();
+ if (r < 0)
+ return r;
+
+finish:
+ /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
+ * In order to detect the Dom0 as not virtualization we need to
+ * double-check it */
+ if (r == VIRTUALIZATION_XEN) {
+ int dom0;
+
+ dom0 = detect_vm_xen_dom0();
+ if (dom0 < 0)
+ return dom0;
+ if (dom0 > 0)
+ r = VIRTUALIZATION_NONE;
+ } else if (r == VIRTUALIZATION_NONE && other)
+ r = VIRTUALIZATION_VM_OTHER;
+
+ cached_found = r;
+ log_debug("Found VM virtualization %s", virtualization_to_string(r));
+ return r;
+}
+
+int detect_container(void) {
+
+ static const struct {
+ const char *value;
+ int id;
+ } value_table[] = {
+ { "lxc", VIRTUALIZATION_LXC },
+ { "lxc-libvirt", VIRTUALIZATION_LXC_LIBVIRT },
+ { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
+ { "docker", VIRTUALIZATION_DOCKER },
+ { "rkt", VIRTUALIZATION_RKT },
+ };
+
+ static thread_local int cached_found = _VIRTUALIZATION_INVALID;
+ _cleanup_free_ char *m = NULL;
+ const char *e = NULL;
+ unsigned j;
+ int r;
+
+ if (cached_found >= 0)
+ return cached_found;
+
+ /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
+ if (access("/proc/vz", F_OK) >= 0 &&
+ access("/proc/bc", F_OK) < 0) {
+ r = VIRTUALIZATION_OPENVZ;
+ goto finish;
+ }
+
+ if (getpid_cached() == 1) {
+ /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
+
+ e = getenv("container");
+ if (isempty(e)) {
+ r = VIRTUALIZATION_NONE;
+ goto finish;
+ }
+
+ goto translate_name;
+ }
+
+ /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
+ * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
+ r = read_one_line_file("/run/systemd/container", &m);
+ if (r >= 0) {
+ e = m;
+ goto translate_name;
+ }
+ if (r != -ENOENT)
+ return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
+
+ /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
+ r = getenv_for_pid(1, "container", &m);
+ if (r > 0) {
+ e = m;
+ goto translate_name;
+ }
+ if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
+ log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
+
+ /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
+ * there is not 1, we know we are in a PID namespace. and hence a container. */
+ r = read_one_line_file("/proc/1/sched", &m);
+ if (r >= 0) {
+ const char *t;
+
+ t = strrchr(m, '(');
+ if (!t)
+ return -EIO;
+
+ if (!startswith(t, "(1,")) {
+ r = VIRTUALIZATION_CONTAINER_OTHER;
+ goto finish;
+ }
+ } else if (r != -ENOENT)
+ return r;
+
+ /* If that didn't work, give up, assume no container manager. */
+ r = VIRTUALIZATION_NONE;
+ goto finish;
+
+translate_name:
+ for (j = 0; j < ELEMENTSOF(value_table); j++)
+ if (streq(e, value_table[j].value)) {
+ r = value_table[j].id;
+ goto finish;
+ }
+
+ r = VIRTUALIZATION_CONTAINER_OTHER;
+
+finish:
+ log_debug("Found container virtualization %s.", virtualization_to_string(r));
+ cached_found = r;
+ return r;
+}
+
+int detect_virtualization(void) {
+ int r;
+
+ r = detect_container();
+ if (r == 0)
+ r = detect_vm();
+
+ return r;
+}
+
+static int userns_has_mapping(const char *name) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *buf = NULL;
+ size_t n_allocated = 0;
+ ssize_t n;
+ uint32_t a, b, c;
+ int r;
+
+ f = fopen(name, "re");
+ if (!f) {
+ log_debug_errno(errno, "Failed to open %s: %m", name);
+ return errno == ENOENT ? false : -errno;
+ }
+
+ n = getline(&buf, &n_allocated, f);
+ if (n < 0) {
+ if (feof(f)) {
+ log_debug("%s is empty, we're in an uninitialized user namespace", name);
+ return true;
+ }
+
+ return log_debug_errno(errno, "Failed to read %s: %m", name);
+ }
+
+ r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
+ if (r < 3)
+ return log_debug_errno(errno, "Failed to parse %s: %m", name);
+
+ if (a == 0 && b == 0 && c == UINT32_MAX) {
+ /* The kernel calls mappings_overlap() and does not allow overlaps */
+ log_debug("%s has a full 1:1 mapping", name);
+ return false;
+ }
+
+ /* Anything else implies that we are in a user namespace */
+ log_debug("Mapping found in %s, we're in a user namespace", name);
+ return true;
+}
+
+int running_in_userns(void) {
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ r = userns_has_mapping("/proc/self/uid_map");
+ if (r != 0)
+ return r;
+
+ r = userns_has_mapping("/proc/self/gid_map");
+ if (r != 0)
+ return r;
+
+ /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
+ * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
+ * also does not exist. We cannot distinguish those two cases, so assume that
+ * we're running on a stripped-down recent kernel, rather than on an old one,
+ * and if the file is not found, return false.
+ */
+ r = read_one_line_file("/proc/self/setgroups", &line);
+ if (r < 0) {
+ log_debug_errno(r, "/proc/self/setgroups: %m");
+ return r == -ENOENT ? false : r;
+ }
+
+ truncate_nl(line);
+ r = streq(line, "deny");
+ /* See user_namespaces(7) for a description of this "setgroups" contents. */
+ log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
+ return r;
+}
+
+int running_in_chroot(void) {
+ int r;
+
+ if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
+ return 0;
+
+ r = files_same("/proc/1/root", "/", 0);
+ if (r < 0)
+ return r;
+
+ return r == 0;
+}
+
+static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
+ [VIRTUALIZATION_NONE] = "none",
+ [VIRTUALIZATION_KVM] = "kvm",
+ [VIRTUALIZATION_QEMU] = "qemu",
+ [VIRTUALIZATION_BOCHS] = "bochs",
+ [VIRTUALIZATION_XEN] = "xen",
+ [VIRTUALIZATION_UML] = "uml",
+ [VIRTUALIZATION_VMWARE] = "vmware",
+ [VIRTUALIZATION_ORACLE] = "oracle",
+ [VIRTUALIZATION_MICROSOFT] = "microsoft",
+ [VIRTUALIZATION_ZVM] = "zvm",
+ [VIRTUALIZATION_PARALLELS] = "parallels",
+ [VIRTUALIZATION_BHYVE] = "bhyve",
+ [VIRTUALIZATION_QNX] = "qnx",
+ [VIRTUALIZATION_VM_OTHER] = "vm-other",
+
+ [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
+ [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
+ [VIRTUALIZATION_LXC] = "lxc",
+ [VIRTUALIZATION_OPENVZ] = "openvz",
+ [VIRTUALIZATION_DOCKER] = "docker",
+ [VIRTUALIZATION_RKT] = "rkt",
+ [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(virtualization, int);
diff --git a/src/basic/virt.h b/src/basic/virt.h
new file mode 100644
index 0000000..c4cf4bf
--- /dev/null
+++ b/src/basic/virt.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+enum {
+ VIRTUALIZATION_NONE = 0,
+
+ VIRTUALIZATION_VM_FIRST,
+ VIRTUALIZATION_KVM = VIRTUALIZATION_VM_FIRST,
+ VIRTUALIZATION_QEMU,
+ VIRTUALIZATION_BOCHS,
+ VIRTUALIZATION_XEN,
+ VIRTUALIZATION_UML,
+ VIRTUALIZATION_VMWARE,
+ VIRTUALIZATION_ORACLE,
+ VIRTUALIZATION_MICROSOFT,
+ VIRTUALIZATION_ZVM,
+ VIRTUALIZATION_PARALLELS,
+ VIRTUALIZATION_BHYVE,
+ VIRTUALIZATION_QNX,
+ VIRTUALIZATION_VM_OTHER,
+ VIRTUALIZATION_VM_LAST = VIRTUALIZATION_VM_OTHER,
+
+ VIRTUALIZATION_CONTAINER_FIRST,
+ VIRTUALIZATION_SYSTEMD_NSPAWN = VIRTUALIZATION_CONTAINER_FIRST,
+ VIRTUALIZATION_LXC_LIBVIRT,
+ VIRTUALIZATION_LXC,
+ VIRTUALIZATION_OPENVZ,
+ VIRTUALIZATION_DOCKER,
+ VIRTUALIZATION_RKT,
+ VIRTUALIZATION_CONTAINER_OTHER,
+ VIRTUALIZATION_CONTAINER_LAST = VIRTUALIZATION_CONTAINER_OTHER,
+
+ _VIRTUALIZATION_MAX,
+ _VIRTUALIZATION_INVALID = -1
+};
+
+static inline bool VIRTUALIZATION_IS_VM(int x) {
+ return x >= VIRTUALIZATION_VM_FIRST && x <= VIRTUALIZATION_VM_LAST;
+}
+
+static inline bool VIRTUALIZATION_IS_CONTAINER(int x) {
+ return x >= VIRTUALIZATION_CONTAINER_FIRST && x <= VIRTUALIZATION_CONTAINER_LAST;
+}
+
+int detect_vm(void);
+int detect_container(void);
+int detect_virtualization(void);
+
+int running_in_userns(void);
+int running_in_chroot(void);
+
+const char *virtualization_to_string(int v) _const_;
+int virtualization_from_string(const char *s) _pure_;
diff --git a/src/basic/xattr-util.c b/src/basic/xattr-util.c
new file mode 100644
index 0000000..0ee0979
--- /dev/null
+++ b/src/basic/xattr-util.c
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/xattr.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "sparse-endian.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "xattr-util.h"
+
+int getxattr_malloc(const char *path, const char *name, char **value, bool allow_symlink) {
+ char *v;
+ size_t l;
+ ssize_t n;
+
+ assert(path);
+ assert(name);
+ assert(value);
+
+ for (l = 100; ; l = (size_t) n + 1) {
+ v = new0(char, l);
+ if (!v)
+ return -ENOMEM;
+
+ if (allow_symlink)
+ n = lgetxattr(path, name, v, l);
+ else
+ n = getxattr(path, name, v, l);
+
+ if (n >= 0 && (size_t) n < l) {
+ *value = v;
+ return n;
+ }
+
+ free(v);
+
+ if (n < 0 && errno != ERANGE)
+ return -errno;
+
+ if (allow_symlink)
+ n = lgetxattr(path, name, NULL, 0);
+ else
+ n = getxattr(path, name, NULL, 0);
+ if (n < 0)
+ return -errno;
+ }
+}
+
+int fgetxattr_malloc(int fd, const char *name, char **value) {
+ char *v;
+ size_t l;
+ ssize_t n;
+
+ assert(fd >= 0);
+ assert(name);
+ assert(value);
+
+ for (l = 100; ; l = (size_t) n + 1) {
+ v = new0(char, l);
+ if (!v)
+ return -ENOMEM;
+
+ n = fgetxattr(fd, name, v, l);
+
+ if (n >= 0 && (size_t) n < l) {
+ *value = v;
+ return n;
+ }
+
+ free(v);
+
+ if (n < 0 && errno != ERANGE)
+ return -errno;
+
+ n = fgetxattr(fd, name, NULL, 0);
+ if (n < 0)
+ return -errno;
+ }
+}
+
+int fgetxattrat_fake(
+ int dirfd,
+ const char *filename,
+ const char *attribute,
+ void *value, size_t size,
+ int flags,
+ size_t *ret_size) {
+
+ char fn[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ _cleanup_close_ int fd = -1;
+ ssize_t l;
+
+ /* The kernel doesn't have a fgetxattrat() command, hence let's emulate one */
+
+ if (flags & ~(AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH))
+ return -EINVAL;
+
+ if (isempty(filename)) {
+ if (!(flags & AT_EMPTY_PATH))
+ return -EINVAL;
+
+ xsprintf(fn, "/proc/self/fd/%i", dirfd);
+ } else {
+ fd = openat(dirfd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_NOFOLLOW ? O_NOFOLLOW : 0));
+ if (fd < 0)
+ return -errno;
+
+ xsprintf(fn, "/proc/self/fd/%i", fd);
+ }
+
+ l = getxattr(fn, attribute, value, size);
+ if (l < 0)
+ return -errno;
+
+ *ret_size = l;
+ return 0;
+}
+
+static int parse_crtime(le64_t le, usec_t *usec) {
+ uint64_t u;
+
+ assert(usec);
+
+ u = le64toh(le);
+ if (IN_SET(u, 0, (uint64_t) -1))
+ return -EIO;
+
+ *usec = (usec_t) u;
+ return 0;
+}
+
+int fd_getcrtime_at(int dirfd, const char *name, usec_t *ret, int flags) {
+ struct_statx sx;
+ usec_t a, b;
+ le64_t le;
+ size_t n;
+ int r;
+
+ assert(ret);
+
+ if (flags & ~(AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW))
+ return -EINVAL;
+
+ /* So here's the deal: the creation/birth time (crtime/btime) of a file is a relatively newly supported concept
+ * on Linux (or more strictly speaking: a concept that only recently got supported in the API, it was
+ * implemented on various file systems on the lower level since a while, but never was accessible). However, we
+ * needed a concept like that for vaccuuming algorithms and such, hence we emulated it via a user xattr for a
+ * long time. Starting with Linux 4.11 there's statx() which exposes the timestamp to userspace for the first
+ * time, where it is available. Thius function will read it, but it tries to keep some compatibility with older
+ * systems: we try to read both the crtime/btime and the xattr, and then use whatever is older. After all the
+ * concept is useful for determining how "old" a file really is, and hence using the older of the two makes
+ * most sense. */
+
+ if (statx(dirfd, strempty(name), flags|AT_STATX_DONT_SYNC, STATX_BTIME, &sx) >= 0 &&
+ (sx.stx_mask & STATX_BTIME) &&
+ sx.stx_btime.tv_sec != 0)
+ a = (usec_t) sx.stx_btime.tv_sec * USEC_PER_SEC +
+ (usec_t) sx.stx_btime.tv_nsec / NSEC_PER_USEC;
+ else
+ a = USEC_INFINITY;
+
+ r = fgetxattrat_fake(dirfd, name, "user.crtime_usec", &le, sizeof(le), flags, &n);
+ if (r >= 0) {
+ if (n != sizeof(le))
+ r = -EIO;
+ else
+ r = parse_crtime(le, &b);
+ }
+ if (r < 0) {
+ if (a != USEC_INFINITY) {
+ *ret = a;
+ return 0;
+ }
+
+ return r;
+ }
+
+ if (a != USEC_INFINITY)
+ *ret = MIN(a, b);
+ else
+ *ret = b;
+
+ return 0;
+}
+
+int fd_getcrtime(int fd, usec_t *ret) {
+ return fd_getcrtime_at(fd, NULL, ret, AT_EMPTY_PATH);
+}
+
+int path_getcrtime(const char *p, usec_t *ret) {
+ return fd_getcrtime_at(AT_FDCWD, p, ret, 0);
+}
+
+int fd_setcrtime(int fd, usec_t usec) {
+ le64_t le;
+
+ assert(fd >= 0);
+
+ if (IN_SET(usec, 0, USEC_INFINITY))
+ usec = now(CLOCK_REALTIME);
+
+ le = htole64((uint64_t) usec);
+ if (fsetxattr(fd, "user.crtime_usec", &le, sizeof(le), 0) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/basic/xattr-util.h b/src/basic/xattr-util.h
new file mode 100644
index 0000000..9fa85d7
--- /dev/null
+++ b/src/basic/xattr-util.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/types.h>
+
+#include "time-util.h"
+
+int getxattr_malloc(const char *path, const char *name, char **value, bool allow_symlink);
+int fgetxattr_malloc(int fd, const char *name, char **value);
+
+int fgetxattrat_fake(
+ int dirfd,
+ const char *filename,
+ const char *attribute,
+ void *value, size_t size,
+ int flags,
+ size_t *ret_size);
+
+int fd_setcrtime(int fd, usec_t usec);
+
+int fd_getcrtime(int fd, usec_t *usec);
+int path_getcrtime(const char *p, usec_t *usec);
+int fd_getcrtime_at(int dirfd, const char *name, usec_t *usec, int flags);
diff --git a/src/binfmt/binfmt.c b/src/binfmt/binfmt.c
new file mode 100644
index 0000000..af31f09
--- /dev/null
+++ b/src/binfmt/binfmt.c
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "main-func.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static bool arg_cat_config = false;
+static PagerFlags arg_pager_flags = 0;
+
+static int delete_rule(const char *rule) {
+ _cleanup_free_ char *x = NULL, *fn = NULL;
+ char *e;
+
+ assert(rule);
+ assert(rule[0]);
+
+ x = strdup(rule);
+ if (!x)
+ return log_oom();
+
+ e = strchrnul(x+1, x[0]);
+ *e = 0;
+
+ if (!filename_is_valid(x + 1))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Rule file name '%s' is not valid, refusing.", x + 1);
+
+ fn = strappend("/proc/sys/fs/binfmt_misc/", x+1);
+ if (!fn)
+ return log_oom();
+
+ return write_string_file(fn, "-1", WRITE_STRING_FILE_DISABLE_BUFFER);
+}
+
+static int apply_rule(const char *rule) {
+ int r;
+
+ (void) delete_rule(rule);
+
+ r = write_string_file("/proc/sys/fs/binfmt_misc/register", rule, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add binary format: %m");
+
+ return 0;
+}
+
+static int apply_file(const char *path, bool ignore_enoent) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(path);
+
+ r = search_and_fopen(path, "re", NULL, (const char**) CONF_PATHS_STRV("binfmt.d"), &f);
+ if (r < 0) {
+ if (ignore_enoent && r == -ENOENT)
+ return 0;
+
+ return log_error_errno(r, "Failed to open file '%s': %m", path);
+ }
+
+ log_debug("apply: %s", path);
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *p;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &line);
+ if (k < 0)
+ return log_error_errno(k, "Failed to read file '%s': %m", path);
+ if (k == 0)
+ break;
+
+ p = strstrip(line);
+ if (isempty(p))
+ continue;
+ if (strchr(COMMENTS, p[0]))
+ continue;
+
+ k = apply_rule(p);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-binfmt.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CONFIGURATION FILE...]\n\n"
+ "Registers binary formats with the kernel.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --cat-config Show configuration files\n"
+ " --no-pager Do not pipe output into a pager\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_CAT_CONFIG,
+ ARG_NO_PAGER,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "cat-config", no_argument, NULL, ARG_CAT_CONFIG },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_CAT_CONFIG:
+ arg_cat_config = true;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_cat_config && argc > optind)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Positional arguments are not allowed with --cat-config");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ int r, k;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+
+ log_setup_service();
+
+ umask(0022);
+
+ r = 0;
+
+ if (argc > optind) {
+ int i;
+
+ for (i = optind; i < argc; i++) {
+ k = apply_file(argv[i], false);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+ } else {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char**) CONF_PATHS_STRV("binfmt.d"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate binfmt.d files: %m");
+
+ if (arg_cat_config) {
+ (void) pager_open(arg_pager_flags);
+
+ return cat_files(NULL, files, 0);
+ }
+
+ /* Flush out all rules */
+ write_string_file("/proc/sys/fs/binfmt_misc/status", "-1", WRITE_STRING_FILE_DISABLE_BUFFER);
+
+ STRV_FOREACH(f, files) {
+ k = apply_file(*f, true);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/boot/bless-boot-generator.c b/src/boot/bless-boot-generator.c
new file mode 100644
index 0000000..e28cccd
--- /dev/null
+++ b/src/boot/bless-boot-generator.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "efivars.h"
+#include "generator.h"
+#include "log.h"
+#include "mkdir.h"
+#include "special.h"
+#include "string-util.h"
+#include "util.h"
+#include "virt.h"
+
+/* This generator pulls systemd-bless-boot.service into the initial transaction if the "LoaderBootCountPath" EFI
+ * variable is set, i.e. the system boots up with boot counting in effect, which means we should mark the boot as
+ * "good" if we manage to boot up far enough. */
+
+static const char *arg_dest = "/tmp";
+
+int main(int argc, char *argv[]) {
+ const char *p;
+
+ log_setup_generator();
+
+ if (argc > 1 && argc != 4) {
+ log_error("This program takes three or no arguments.");
+ return EXIT_FAILURE;
+ }
+
+ if (argc > 1)
+ arg_dest = argv[2];
+
+ if (in_initrd() > 0) {
+ log_debug("Skipping generator, running in the initrd.");
+ return EXIT_SUCCESS;
+ }
+
+ if (detect_container() > 0) {
+ log_debug("Skipping generator, running in a container.");
+ return EXIT_SUCCESS;
+ }
+
+ if (!is_efi_boot()) {
+ log_debug("Skipping generator, not an EFI boot.");
+ return EXIT_SUCCESS;
+ }
+
+ if (access("/sys/firmware/efi/efivars/LoaderBootCountPath-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f", F_OK) < 0) {
+
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "Skipping generator, not booted with boot counting in effect.");
+ return EXIT_SUCCESS;
+ }
+
+ log_error_errno(errno, "Failed to check if LoaderBootCountPath EFI variable exists: %m");
+ return EXIT_FAILURE;
+ }
+
+ /* We pull this in from basic.target so that it ends up in all "regular" boot ups, but not in rescue.target or
+ * even emergency.target. */
+ p = strjoina(arg_dest, "/" SPECIAL_BASIC_TARGET ".wants/systemd-bless-boot.service");
+ (void) mkdir_parents(p, 0755);
+ if (symlink(SYSTEM_DATA_UNIT_PATH "/systemd-bless-boot.service", p) < 0) {
+ log_error_errno(errno, "Failed to create symlink '%s': %m", p);
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/boot/bless-boot.c b/src/boot/bless-boot.c
new file mode 100644
index 0000000..42b9618
--- /dev/null
+++ b/src/boot/bless-boot.c
@@ -0,0 +1,472 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "bootspec.h"
+#include "efivars.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "util.h"
+#include "verbs.h"
+#include "virt.h"
+
+static char *arg_path = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_path, freep);
+
+static int help(int argc, char *argv[], void *userdata) {
+
+ printf("%s [COMMAND] [OPTIONS...]\n"
+ "\n"
+ "Mark the boot process as good or bad.\n\n"
+ " -h --help Show this help\n"
+ " --version Print version\n"
+ " --path=PATH Path to the EFI System Partition (ESP)\n"
+ "\n"
+ "Commands:\n"
+ " good Mark this boot as good\n"
+ " bad Mark this boot as bad\n"
+ " indeterminate Undo any marking as good or bad\n",
+ program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_PATH = 0x100,
+ ARG_VERSION,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "path", required_argument, NULL, ARG_PATH },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ help(0, NULL, NULL);
+ return 0;
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_PATH:
+ r = free_and_strdup(&arg_path, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ return 1;
+}
+
+static int acquire_esp(void) {
+ _cleanup_free_ char *np = NULL;
+ int r;
+
+ r = find_esp_and_warn(arg_path, false, &np, NULL, NULL, NULL, NULL);
+ if (r == -ENOKEY) /* find_esp_and_warn() doesn't warn in this one error case, but in all others */
+ return log_error_errno(r,
+ "Couldn't find EFI system partition. It is recommended to mount it to /boot or /efi.\n"
+ "Alternatively, use --path= to specify path to mount point.");
+ if (r < 0)
+ return r;
+
+ free_and_replace(arg_path, np);
+ log_debug("Using EFI System Partition at %s.", arg_path);
+
+ return 0;
+}
+
+static int parse_counter(
+ const char *path,
+ const char **p,
+ uint64_t *ret_left,
+ uint64_t *ret_done) {
+
+ uint64_t left, done;
+ const char *z, *e;
+ size_t k;
+ int r;
+
+ assert(path);
+ assert(p);
+
+ e = *p;
+ assert(e);
+ assert(*e == '+');
+
+ e++;
+
+ k = strspn(e, DIGITS);
+ if (k == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Can't parse empty 'tries left' counter from LoaderBootCountPath: %s",
+ path);
+
+ z = strndupa(e, k);
+ r = safe_atou64(z, &left);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse 'tries left' counter from LoaderBootCountPath: %s", path);
+
+ e += k;
+
+ if (*e == '-') {
+ e++;
+
+ k = strspn(e, DIGITS);
+ if (k == 0) /* If there's a "-" there also needs to be at least one digit */
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Can't parse empty 'tries done' counter from LoaderBootCountPath: %s",
+ path);
+
+ z = strndupa(e, k);
+ r = safe_atou64(z, &done);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse 'tries done' counter from LoaderBootCountPath: %s", path);
+
+ e += k;
+ } else
+ done = 0;
+
+ if (done == 0)
+ log_warning("The 'tries done' counter is currently at zero. This can't really be, after all we are running, and this boot must hence count as one. Proceeding anyway.");
+
+ *p = e;
+
+ if (ret_left)
+ *ret_left = left;
+
+ if (ret_done)
+ *ret_done = done;
+
+ return 0;
+}
+
+static int acquire_boot_count_path(
+ char **ret_path,
+ char **ret_prefix,
+ uint64_t *ret_left,
+ uint64_t *ret_done,
+ char **ret_suffix) {
+
+ _cleanup_free_ char *path = NULL, *prefix = NULL, *suffix = NULL;
+ const char *last, *e;
+ uint64_t left, done;
+ int r;
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderBootCountPath", &path);
+ if (r == -ENOENT)
+ return -EUNATCH; /* in this case, let the caller print a message */
+ if (r < 0)
+ return log_error_errno(r, "Failed to read LoaderBootCountPath EFI variable: %m");
+
+ efi_tilt_backslashes(path);
+
+ if (!path_is_normalized(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path read from LoaderBootCountPath is not normalized, refusing: %s",
+ path);
+
+ if (!path_is_absolute(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path read from LoaderBootCountPath is not absolute, refusing: %s",
+ path);
+
+ last = last_path_component(path);
+ e = strrchr(last, '+');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path read from LoaderBootCountPath does not contain a counter, refusing: %s",
+ path);
+
+ if (ret_prefix) {
+ prefix = strndup(path, e - path);
+ if (!prefix)
+ return log_oom();
+ }
+
+ r = parse_counter(path, &e, &left, &done);
+ if (r < 0)
+ return r;
+
+ if (ret_suffix) {
+ suffix = strdup(e);
+ if (!suffix)
+ return log_oom();
+
+ *ret_suffix = TAKE_PTR(suffix);
+ }
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(path);
+ if (ret_prefix)
+ *ret_prefix = TAKE_PTR(prefix);
+ if (ret_left)
+ *ret_left = left;
+ if (ret_done)
+ *ret_done = done;
+
+ return 0;
+}
+
+static int make_good(const char *prefix, const char *suffix, char **ret) {
+ _cleanup_free_ char *good = NULL;
+
+ assert(prefix);
+ assert(suffix);
+ assert(ret);
+
+ /* Generate the path we'd use on good boots. This one is easy. If we are successful, we simple drop the counter
+ * pair entirely from the name. After all, we know all is good, and the logs will contain information about the
+ * tries we needed to come here, hence it's safe to drop the counters from the name. */
+
+ good = strjoin(prefix, suffix);
+ if (!good)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(good);
+ return 0;
+}
+
+static int make_bad(const char *prefix, uint64_t done, const char *suffix, char **ret) {
+ _cleanup_free_ char *bad = NULL;
+
+ assert(prefix);
+ assert(suffix);
+ assert(ret);
+
+ /* Generate the path we'd use on bad boots. Let's simply set the 'left' counter to zero, and keep the 'done'
+ * counter. The information might be interesting to boot loaders, after all. */
+
+ if (done == 0) {
+ bad = strjoin(prefix, "+0", suffix);
+ if (!bad)
+ return -ENOMEM;
+ } else {
+ if (asprintf(&bad, "%s+0-%" PRIu64 "%s", prefix, done, suffix) < 0)
+ return -ENOMEM;
+ }
+
+ *ret = TAKE_PTR(bad);
+ return 0;
+}
+
+static const char *skip_slash(const char *path) {
+ assert(path);
+ assert(path[0] == '/');
+
+ return path + 1;
+}
+
+static int verb_status(int argc, char *argv[], void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *prefix = NULL, *suffix = NULL, *good = NULL, *bad = NULL;
+ _cleanup_close_ int fd = -1;
+ uint64_t left, done;
+ int r;
+
+ r = acquire_boot_count_path(&path, &prefix, &left, &done, &suffix);
+ if (r == -EUNATCH) { /* No boot count in place, then let's consider this a "clean" boot, as "good", "bad" or "indeterminate" don't apply. */
+ puts("clean");
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ r = acquire_esp();
+ if (r < 0)
+ return r;
+
+ r = make_good(prefix, suffix, &good);
+ if (r < 0)
+ return log_oom();
+
+ r = make_bad(prefix, done, suffix, &bad);
+ if (r < 0)
+ return log_oom();
+
+ log_debug("Booted file: %s%s\n"
+ "The same modified for 'good': %s%s\n"
+ "The same modified for 'bad': %s%s\n",
+ arg_path, path,
+ arg_path, good,
+ arg_path, bad);
+
+ log_debug("Tries left: %" PRIu64"\n"
+ "Tries done: %" PRIu64"\n",
+ left, done);
+
+ fd = open(arg_path, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open ESP '%s': %m", arg_path);
+
+ if (faccessat(fd, skip_slash(path), F_OK, 0) >= 0) {
+ puts("indeterminate");
+ return 0;
+ }
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to check if '%s' exists: %m", path);
+
+ if (faccessat(fd, skip_slash(good), F_OK, 0) >= 0) {
+ puts("good");
+ return 0;
+ }
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to check if '%s' exists: %m", good);
+
+ if (faccessat(fd, skip_slash(bad), F_OK, 0) >= 0) {
+ puts("bad");
+ return 0;
+ }
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to check if '%s' exists: %m", bad);
+
+ return log_error_errno(errno, "Couldn't determine boot state: %m");
+}
+
+static int verb_set(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *path = NULL, *prefix = NULL, *suffix = NULL, *good = NULL, *bad = NULL, *parent = NULL;
+ const char *target, *source1, *source2;
+ _cleanup_close_ int fd = -1;
+ uint64_t done;
+ int r;
+
+ r = acquire_boot_count_path(&path, &prefix, NULL, &done, &suffix);
+ if (r == -EUNATCH) /* acquire_boot_count_path() won't log on its own for this specific error */
+ return log_error_errno(r, "Not booted with boot counting in effect.");
+ if (r < 0)
+ return r;
+
+ r = acquire_esp();
+ if (r < 0)
+ return r;
+
+ r = make_good(prefix, suffix, &good);
+ if (r < 0)
+ return log_oom();
+
+ r = make_bad(prefix, done, suffix, &bad);
+ if (r < 0)
+ return log_oom();
+
+ fd = open(arg_path, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open ESP '%s': %m", arg_path);
+
+ /* Figure out what rename to what */
+ if (streq(argv[0], "good")) {
+ target = good;
+ source1 = path;
+ source2 = bad; /* Maybe this boot was previously marked as 'bad'? */
+ } else if (streq(argv[0], "bad")) {
+ target = bad;
+ source1 = path;
+ source2 = good; /* Maybe this boot was previously marked as 'good'? */
+ } else {
+ assert(streq(argv[0], "indeterminate"));
+ target = path;
+ source1 = good;
+ source2 = bad;
+ }
+
+ r = rename_noreplace(fd, skip_slash(source1), fd, skip_slash(target));
+ if (r == -EEXIST)
+ goto exists;
+ else if (r == -ENOENT) {
+
+ r = rename_noreplace(fd, skip_slash(source2), fd, skip_slash(target));
+ if (r == -EEXIST)
+ goto exists;
+ else if (r == -ENOENT) {
+
+ if (access(target, F_OK) >= 0) /* Hmm, if we can't find either source file, maybe the destination already exists? */
+ goto exists;
+
+ return log_error_errno(r, "Can't find boot counter source file for '%s': %m", target);
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to rename '%s' to '%s': %m", source2, target);
+ else
+ log_debug("Successfully renamed '%s' to '%s'.", source2, target);
+
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to rename '%s' to '%s': %m", source1, target);
+ else
+ log_debug("Successfully renamed '%s' to '%s'.", source1, target);
+
+ /* First, fsync() the directory these files are located in */
+ parent = dirname_malloc(path);
+ if (!parent)
+ return log_oom();
+
+ r = fsync_path_at(fd, skip_slash(parent));
+ if (r < 0)
+ log_debug_errno(errno, "Failed to synchronize image directory, ignoring: %m");
+
+ /* Secondly, syncfs() the whole file system these files are located in */
+ if (syncfs(fd) < 0)
+ log_debug_errno(errno, "Failed to synchronize ESP, ignoring: %m");
+
+ log_info("Marked boot as '%s'. (Boot attempt counter is at %" PRIu64".)", argv[0], done);
+
+ return 1;
+
+exists:
+ log_debug("Operation already executed before, not doing anything.");
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "status", VERB_ANY, 1, VERB_DEFAULT, verb_status },
+ { "good", VERB_ANY, 1, VERB_MUST_BE_ROOT, verb_set },
+ { "bad", VERB_ANY, 1, VERB_MUST_BE_ROOT, verb_set },
+ { "indeterminate", VERB_ANY, 1, VERB_MUST_BE_ROOT, verb_set },
+ {}
+ };
+
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (detect_container() > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Marking a boot is not supported in containers.");
+
+ if (!is_efi_boot())
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Marking a boot is only supported on EFI systems.");
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/boot/boot-check-no-failures.c b/src/boot/boot-check-no-failures.c
new file mode 100644
index 0000000..3284a04
--- /dev/null
+++ b/src/boot/boot-check-no-failures.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "log.h"
+#include "main-func.h"
+#include "util.h"
+
+static int help(void) {
+
+ printf("%s [COMMAND] [OPTIONS...]\n"
+ "\n"
+ "Verify system operational state.\n\n"
+ " -h --help Show this help\n"
+ " --version Print version\n",
+ program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_PATH = 0x100,
+ ARG_VERSION,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ help();
+ return 0;
+
+ case ARG_VERSION:
+ return version();
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ uint32_t n;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "NFailedUnits",
+ &error,
+ 'u',
+ &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get failed units counter: %s", bus_error_message(&error, r));
+
+ if (n > 0)
+ log_notice("Health check: %" PRIu32 " units have failed.", n);
+ else
+ log_info("Health check: no failed units.");
+
+ return n > 0;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/boot/bootctl.c b/src/boot/bootctl.c
new file mode 100644
index 0000000..dc2fd96
--- /dev/null
+++ b/src/boot/bootctl.c
@@ -0,0 +1,1275 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <blkid.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <ftw.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/magic.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "blkid-util.h"
+#include "bootspec.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "efivars.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "locale-util.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "rm-rf.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "verbs.h"
+#include "virt.h"
+
+static char *arg_path = NULL;
+static bool arg_print_path = false;
+static bool arg_touch_variables = true;
+static PagerFlags arg_pager_flags = 0;
+
+STATIC_DESTRUCTOR_REGISTER(arg_path, freep);
+
+static int acquire_esp(
+ bool unprivileged_mode,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+
+ char *np;
+ int r;
+
+ /* Find the ESP, and log about errors. Note that find_esp_and_warn() will log in all error cases on its own,
+ * except for ENOKEY (which is good, we want to show our own message in that case, suggesting use of --path=)
+ * and EACCESS (only when we request unprivileged mode; in this case we simply eat up the error here, so that
+ * --list and --status work too, without noise about this). */
+
+ r = find_esp_and_warn(arg_path, unprivileged_mode, &np, ret_part, ret_pstart, ret_psize, ret_uuid);
+ if (r == -ENOKEY)
+ return log_error_errno(r,
+ "Couldn't find EFI system partition. It is recommended to mount it to /boot or /efi.\n"
+ "Alternatively, use --path= to specify path to mount point.");
+ if (r < 0)
+ return r;
+
+ free_and_replace(arg_path, np);
+
+ log_debug("Using EFI System Partition at %s.", arg_path);
+
+ return 0;
+}
+
+/* search for "#### LoaderInfo: systemd-boot 218 ####" string inside the binary */
+static int get_file_version(int fd, char **v) {
+ struct stat st;
+ char *buf;
+ const char *s, *e;
+ char *x = NULL;
+ int r = 0;
+
+ assert(fd >= 0);
+ assert(v);
+
+ if (fstat(fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat EFI binary: %m");
+
+ if (st.st_size < 27) {
+ *v = NULL;
+ return 0;
+ }
+
+ buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (buf == MAP_FAILED)
+ return log_error_errno(errno, "Failed to memory map EFI binary: %m");
+
+ s = memmem(buf, st.st_size - 8, "#### LoaderInfo: ", 17);
+ if (!s)
+ goto finish;
+ s += 17;
+
+ e = memmem(s, st.st_size - (s - buf), " ####", 5);
+ if (!e || e - s < 3) {
+ log_error("Malformed version string.");
+ r = -EINVAL;
+ goto finish;
+ }
+
+ x = strndup(s, e - s);
+ if (!x) {
+ r = log_oom();
+ goto finish;
+ }
+ r = 1;
+
+finish:
+ (void) munmap(buf, st.st_size);
+ *v = x;
+ return r;
+}
+
+static int enumerate_binaries(const char *esp_path, const char *path, const char *prefix) {
+ char *p;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0, c = 0;
+
+ p = strjoina(esp_path, "/", path);
+ d = opendir(p);
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to read \"%s\": %m", p);
+ }
+
+ FOREACH_DIRENT(de, d, break) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *v = NULL;
+
+ if (!endswith_no_case(de->d_name, ".efi"))
+ continue;
+
+ if (prefix && !startswith_no_case(de->d_name, prefix))
+ continue;
+
+ fd = openat(dirfd(d), de->d_name, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open \"%s/%s\" for reading: %m", p, de->d_name);
+
+ r = get_file_version(fd, &v);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ printf(" File: %s/%s/%s (%s%s%s)\n", special_glyph(SPECIAL_GLYPH_TREE_RIGHT), path, de->d_name, ansi_highlight(), v, ansi_normal());
+ else
+ printf(" File: %s/%s/%s\n", special_glyph(SPECIAL_GLYPH_TREE_RIGHT), path, de->d_name);
+ c++;
+ }
+
+ return c;
+}
+
+static int status_binaries(const char *esp_path, sd_id128_t partition) {
+ int r;
+
+ printf("Available Boot Loaders on ESP:\n");
+
+ if (!esp_path) {
+ printf(" ESP: Cannot find or access mount point of ESP.\n\n");
+ return -ENOENT;
+ }
+
+ printf(" ESP: %s", esp_path);
+ if (!sd_id128_is_null(partition))
+ printf(" (/dev/disk/by-partuuid/%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x)", SD_ID128_FORMAT_VAL(partition));
+ printf("\n");
+
+ r = enumerate_binaries(esp_path, "EFI/systemd", NULL);
+ if (r == 0)
+ log_info("systemd-boot not installed in ESP.");
+ else if (r < 0)
+ return r;
+
+ r = enumerate_binaries(esp_path, "EFI/BOOT", "boot");
+ if (r == 0)
+ log_info("No default/fallback boot loader installed in ESP.");
+ else if (r < 0)
+ return r;
+
+ printf("\n");
+
+ return 0;
+}
+
+static int print_efi_option(uint16_t id, bool in_order) {
+ _cleanup_free_ char *title = NULL;
+ _cleanup_free_ char *path = NULL;
+ sd_id128_t partition;
+ bool active;
+ int r = 0;
+
+ r = efi_get_boot_option(id, &title, &partition, &path, &active);
+ if (r < 0)
+ return r;
+
+ /* print only configured entries with partition information */
+ if (!path || sd_id128_is_null(partition))
+ return 0;
+
+ efi_tilt_backslashes(path);
+
+ printf(" Title: %s%s%s\n", ansi_highlight(), strna(title), ansi_normal());
+ printf(" ID: 0x%04X\n", id);
+ printf(" Status: %sactive%s\n", active ? "" : "in", in_order ? ", boot-order" : "");
+ printf(" Partition: /dev/disk/by-partuuid/%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", SD_ID128_FORMAT_VAL(partition));
+ printf(" File: %s%s\n", special_glyph(SPECIAL_GLYPH_TREE_RIGHT), path);
+ printf("\n");
+
+ return 0;
+}
+
+static int status_variables(void) {
+ _cleanup_free_ uint16_t *options = NULL, *order = NULL;
+ int n_options, n_order, i;
+
+ n_options = efi_get_boot_options(&options);
+ if (n_options == -ENOENT)
+ return log_error_errno(n_options,
+ "Failed to access EFI variables, efivarfs"
+ " needs to be available at /sys/firmware/efi/efivars/.");
+ if (n_options < 0)
+ return log_error_errno(n_options, "Failed to read EFI boot entries: %m");
+
+ n_order = efi_get_boot_order(&order);
+ if (n_order == -ENOENT)
+ n_order = 0;
+ else if (n_order < 0)
+ return log_error_errno(n_order, "Failed to read EFI boot order: %m");
+
+ /* print entries in BootOrder first */
+ printf("Boot Loaders Listed in EFI Variables:\n");
+ for (i = 0; i < n_order; i++)
+ print_efi_option(order[i], true);
+
+ /* print remaining entries */
+ for (i = 0; i < n_options; i++) {
+ int j;
+
+ for (j = 0; j < n_order; j++)
+ if (options[i] == order[j])
+ goto next_option;
+
+ print_efi_option(options[i], false);
+
+ next_option:
+ continue;
+ }
+
+ return 0;
+}
+
+static int boot_entry_show(const BootEntry *e, bool show_as_default) {
+ assert(e);
+
+ printf(" title: %s%s%s%s%s%s\n",
+ ansi_highlight(),
+ boot_entry_title(e),
+ ansi_normal(),
+ ansi_highlight_green(),
+ show_as_default ? " (default)" : "",
+ ansi_normal());
+
+ if (e->id)
+ printf(" id: %s\n", e->id);
+ if (e->version)
+ printf(" version: %s\n", e->version);
+ if (e->machine_id)
+ printf(" machine-id: %s\n", e->machine_id);
+ if (e->architecture)
+ printf(" architecture: %s\n", e->architecture);
+ if (e->kernel)
+ printf(" linux: %s\n", e->kernel);
+ if (!strv_isempty(e->initrd)) {
+ _cleanup_free_ char *t;
+
+ t = strv_join(e->initrd, " ");
+ if (!t)
+ return log_oom();
+
+ printf(" initrd: %s\n", t);
+ }
+ if (!strv_isempty(e->options)) {
+ _cleanup_free_ char *t;
+
+ t = strv_join(e->options, " ");
+ if (!t)
+ return log_oom();
+
+ printf(" options: %s\n", t);
+ }
+ if (e->device_tree)
+ printf(" devicetree: %s\n", e->device_tree);
+
+ return 0;
+}
+
+static int status_entries(const char *esp_path, sd_id128_t partition) {
+ _cleanup_(boot_config_free) BootConfig config = {};
+ int r;
+
+ r = boot_entries_load_config(esp_path, &config);
+ if (r < 0)
+ return r;
+
+ if (config.default_entry < 0)
+ printf("%zu entries, no entry could be determined as default.\n", config.n_entries);
+ else {
+ printf("Default Boot Loader Entry:\n");
+
+ boot_entry_show(config.entries + config.default_entry, false);
+ }
+
+ return 0;
+}
+
+static int compare_product(const char *a, const char *b) {
+ size_t x, y;
+
+ assert(a);
+ assert(b);
+
+ x = strcspn(a, " ");
+ y = strcspn(b, " ");
+ if (x != y)
+ return x < y ? -1 : x > y ? 1 : 0;
+
+ return strncmp(a, b, x);
+}
+
+static int compare_version(const char *a, const char *b) {
+ assert(a);
+ assert(b);
+
+ a += strcspn(a, " ");
+ a += strspn(a, " ");
+ b += strcspn(b, " ");
+ b += strspn(b, " ");
+
+ return strverscmp(a, b);
+}
+
+static int version_check(int fd_from, const char *from, int fd_to, const char *to) {
+ _cleanup_free_ char *a = NULL, *b = NULL;
+ int r;
+
+ assert(fd_from >= 0);
+ assert(from);
+ assert(fd_to >= 0);
+ assert(to);
+
+ r = get_file_version(fd_from, &a);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Source file \"%s\" does not carry version information!",
+ from);
+
+ r = get_file_version(fd_to, &b);
+ if (r < 0)
+ return r;
+ if (r == 0 || compare_product(a, b) != 0)
+ return log_notice_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Skipping \"%s\", since it's owned by another boot loader.",
+ to);
+
+ if (compare_version(a, b) < 0) {
+ log_warning("Skipping \"%s\", since a newer boot loader version exists already.", to);
+ return -ESTALE;
+ }
+
+ return 0;
+}
+
+static int copy_file_with_version_check(const char *from, const char *to, bool force) {
+ _cleanup_close_ int fd_from = -1, fd_to = -1;
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ fd_from = open(from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd_from < 0)
+ return log_error_errno(errno, "Failed to open \"%s\" for reading: %m", from);
+
+ if (!force) {
+ fd_to = open(to, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd_to < 0) {
+ if (errno != -ENOENT)
+ return log_error_errno(errno, "Failed to open \"%s\" for reading: %m", to);
+ } else {
+ r = version_check(fd_from, from, fd_to, to);
+ if (r < 0)
+ return r;
+
+ if (lseek(fd_from, 0, SEEK_SET) == (off_t) -1)
+ return log_error_errno(errno, "Failed to seek in \"%s\": %m", from);
+
+ fd_to = safe_close(fd_to);
+ }
+ }
+
+ r = tempfn_random(to, NULL, &t);
+ if (r < 0)
+ return log_oom();
+
+ RUN_WITH_UMASK(0000) {
+ fd_to = open(t, O_WRONLY|O_CREAT|O_CLOEXEC|O_EXCL|O_NOFOLLOW, 0644);
+ if (fd_to < 0)
+ return log_error_errno(errno, "Failed to open \"%s\" for writing: %m", t);
+ }
+
+ r = copy_bytes(fd_from, fd_to, (uint64_t) -1, COPY_REFLINK);
+ if (r < 0) {
+ (void) unlink(t);
+ return log_error_errno(r, "Failed to copy data from \"%s\" to \"%s\": %m", from, t);
+ }
+
+ (void) copy_times(fd_from, fd_to);
+
+ if (fsync(fd_to) < 0) {
+ (void) unlink_noerrno(t);
+ return log_error_errno(errno, "Failed to copy data from \"%s\" to \"%s\": %m", from, t);
+ }
+
+ (void) fsync_directory_of_file(fd_to);
+
+ if (renameat(AT_FDCWD, t, AT_FDCWD, to) < 0) {
+ (void) unlink_noerrno(t);
+ return log_error_errno(errno, "Failed to rename \"%s\" to \"%s\": %m", t, to);
+ }
+
+ log_info("Copied \"%s\" to \"%s\".", from, to);
+
+ return 0;
+}
+
+static int mkdir_one(const char *prefix, const char *suffix) {
+ char *p;
+
+ p = strjoina(prefix, "/", suffix);
+ if (mkdir(p, 0700) < 0) {
+ if (errno != EEXIST)
+ return log_error_errno(errno, "Failed to create \"%s\": %m", p);
+ } else
+ log_info("Created \"%s\".", p);
+
+ return 0;
+}
+
+static const char *efi_subdirs[] = {
+ "EFI",
+ "EFI/systemd",
+ "EFI/BOOT",
+ "loader",
+ "loader/entries",
+ NULL
+};
+
+static int create_dirs(const char *esp_path) {
+ const char **i;
+ int r;
+
+ STRV_FOREACH(i, efi_subdirs) {
+ r = mkdir_one(esp_path, *i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int copy_one_file(const char *esp_path, const char *name, bool force) {
+ char *p, *q;
+ int r;
+
+ p = strjoina(BOOTLIBDIR "/", name);
+ q = strjoina(esp_path, "/EFI/systemd/", name);
+ r = copy_file_with_version_check(p, q, force);
+
+ if (startswith(name, "systemd-boot")) {
+ int k;
+ char *v;
+
+ /* Create the EFI default boot loader name (specified for removable devices) */
+ v = strjoina(esp_path, "/EFI/BOOT/BOOT",
+ name + STRLEN("systemd-boot"));
+ ascii_strupper(strrchr(v, '/') + 1);
+
+ k = copy_file_with_version_check(p, v, force);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int install_binaries(const char *esp_path, bool force) {
+ struct dirent *de;
+ _cleanup_closedir_ DIR *d = NULL;
+ int r = 0;
+
+ if (force) {
+ /* Don't create any of these directories when we are
+ * just updating. When we update we'll drop-in our
+ * files (unless there are newer ones already), but we
+ * won't create the directories for them in the first
+ * place. */
+ r = create_dirs(esp_path);
+ if (r < 0)
+ return r;
+ }
+
+ d = opendir(BOOTLIBDIR);
+ if (!d)
+ return log_error_errno(errno, "Failed to open \""BOOTLIBDIR"\": %m");
+
+ FOREACH_DIRENT(de, d, break) {
+ int k;
+
+ if (!endswith_no_case(de->d_name, ".efi"))
+ continue;
+
+ k = copy_one_file(esp_path, de->d_name, force);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static bool same_entry(uint16_t id, const sd_id128_t uuid, const char *path) {
+ _cleanup_free_ char *opath = NULL;
+ sd_id128_t ouuid;
+ int r;
+
+ r = efi_get_boot_option(id, NULL, &ouuid, &opath, NULL);
+ if (r < 0)
+ return false;
+ if (!sd_id128_equal(uuid, ouuid))
+ return false;
+ if (!streq_ptr(path, opath))
+ return false;
+
+ return true;
+}
+
+static int find_slot(sd_id128_t uuid, const char *path, uint16_t *id) {
+ _cleanup_free_ uint16_t *options = NULL;
+ int n, i;
+
+ n = efi_get_boot_options(&options);
+ if (n < 0)
+ return n;
+
+ /* find already existing systemd-boot entry */
+ for (i = 0; i < n; i++)
+ if (same_entry(options[i], uuid, path)) {
+ *id = options[i];
+ return 1;
+ }
+
+ /* find free slot in the sorted BootXXXX variable list */
+ for (i = 0; i < n; i++)
+ if (i != options[i]) {
+ *id = i;
+ return 1;
+ }
+
+ /* use the next one */
+ if (i == 0xffff)
+ return -ENOSPC;
+ *id = i;
+ return 0;
+}
+
+static int insert_into_order(uint16_t slot, bool first) {
+ _cleanup_free_ uint16_t *order = NULL;
+ uint16_t *t;
+ int n, i;
+
+ n = efi_get_boot_order(&order);
+ if (n <= 0)
+ /* no entry, add us */
+ return efi_set_boot_order(&slot, 1);
+
+ /* are we the first and only one? */
+ if (n == 1 && order[0] == slot)
+ return 0;
+
+ /* are we already in the boot order? */
+ for (i = 0; i < n; i++) {
+ if (order[i] != slot)
+ continue;
+
+ /* we do not require to be the first one, all is fine */
+ if (!first)
+ return 0;
+
+ /* move us to the first slot */
+ memmove(order + 1, order, i * sizeof(uint16_t));
+ order[0] = slot;
+ return efi_set_boot_order(order, n);
+ }
+
+ /* extend array */
+ t = realloc(order, (n + 1) * sizeof(uint16_t));
+ if (!t)
+ return -ENOMEM;
+ order = t;
+
+ /* add us to the top or end of the list */
+ if (first) {
+ memmove(order + 1, order, n * sizeof(uint16_t));
+ order[0] = slot;
+ } else
+ order[n] = slot;
+
+ return efi_set_boot_order(order, n + 1);
+}
+
+static int remove_from_order(uint16_t slot) {
+ _cleanup_free_ uint16_t *order = NULL;
+ int n, i;
+
+ n = efi_get_boot_order(&order);
+ if (n <= 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ if (order[i] != slot)
+ continue;
+
+ if (i + 1 < n)
+ memmove(order + i, order + i+1, (n - i) * sizeof(uint16_t));
+ return efi_set_boot_order(order, n - 1);
+ }
+
+ return 0;
+}
+
+static int install_variables(const char *esp_path,
+ uint32_t part, uint64_t pstart, uint64_t psize,
+ sd_id128_t uuid, const char *path,
+ bool first) {
+ char *p;
+ uint16_t slot;
+ int r;
+
+ if (!is_efi_boot()) {
+ log_warning("Not booted with EFI, skipping EFI variable setup.");
+ return 0;
+ }
+
+ p = strjoina(esp_path, path);
+ if (access(p, F_OK) < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Cannot access \"%s\": %m", p);
+ }
+
+ r = find_slot(uuid, path, &slot);
+ if (r < 0)
+ return log_error_errno(r,
+ r == -ENOENT ?
+ "Failed to access EFI variables. Is the \"efivarfs\" filesystem mounted?" :
+ "Failed to determine current boot order: %m");
+
+ if (first || r == 0) {
+ r = efi_add_boot_option(slot, "Linux Boot Manager",
+ part, pstart, psize,
+ uuid, path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create EFI Boot variable entry: %m");
+
+ log_info("Created EFI boot entry \"Linux Boot Manager\".");
+ }
+
+ return insert_into_order(slot, first);
+}
+
+static int remove_boot_efi(const char *esp_path) {
+ char *p;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r, c = 0;
+
+ p = strjoina(esp_path, "/EFI/BOOT");
+ d = opendir(p);
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open directory \"%s\": %m", p);
+ }
+
+ FOREACH_DIRENT(de, d, break) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *v = NULL;
+
+ if (!endswith_no_case(de->d_name, ".efi"))
+ continue;
+
+ if (!startswith_no_case(de->d_name, "boot"))
+ continue;
+
+ fd = openat(dirfd(d), de->d_name, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open \"%s/%s\" for reading: %m", p, de->d_name);
+
+ r = get_file_version(fd, &v);
+ if (r < 0)
+ return r;
+ if (r > 0 && startswith(v, "systemd-boot ")) {
+ r = unlinkat(dirfd(d), de->d_name, 0);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to remove \"%s/%s\": %m", p, de->d_name);
+
+ log_info("Removed \"%s/%s\".", p, de->d_name);
+ }
+
+ c++;
+ }
+
+ return c;
+}
+
+static int rmdir_one(const char *prefix, const char *suffix) {
+ char *p;
+
+ p = strjoina(prefix, "/", suffix);
+ if (rmdir(p) < 0) {
+ if (!IN_SET(errno, ENOENT, ENOTEMPTY))
+ return log_error_errno(errno, "Failed to remove \"%s\": %m", p);
+ } else
+ log_info("Removed \"%s\".", p);
+
+ return 0;
+}
+
+static int remove_binaries(const char *esp_path) {
+ char *p;
+ int r, q;
+ unsigned i;
+
+ p = strjoina(esp_path, "/EFI/systemd");
+ r = rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL);
+
+ q = remove_boot_efi(esp_path);
+ if (q < 0 && r == 0)
+ r = q;
+
+ for (i = ELEMENTSOF(efi_subdirs)-1; i > 0; i--) {
+ q = rmdir_one(esp_path, efi_subdirs[i-1]);
+ if (q < 0 && r == 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int remove_variables(sd_id128_t uuid, const char *path, bool in_order) {
+ uint16_t slot;
+ int r;
+
+ if (!is_efi_boot())
+ return 0;
+
+ r = find_slot(uuid, path, &slot);
+ if (r != 1)
+ return 0;
+
+ r = efi_remove_boot_option(slot);
+ if (r < 0)
+ return r;
+
+ if (in_order)
+ return remove_from_order(slot);
+
+ return 0;
+}
+
+static int install_loader_config(const char *esp_path) {
+
+ char machine_string[SD_ID128_STRING_MAX];
+ _cleanup_(unlink_and_freep) char *t = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ sd_id128_t machine_id;
+ const char *p;
+ int r, fd;
+
+ r = sd_id128_get_machine(&machine_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get machine id: %m");
+
+ p = strjoina(esp_path, "/loader/loader.conf");
+
+ if (access(p, F_OK) >= 0) /* Silently skip creation if the file already exists (early check) */
+ return 0;
+
+ fd = open_tmpfile_linkable(p, O_WRONLY|O_CLOEXEC, &t);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open \"%s\" for writing: %m", p);
+
+ f = fdopen(fd, "w");
+ if (!f) {
+ safe_close(fd);
+ return log_oom();
+ }
+
+ fprintf(f, "#timeout 3\n"
+ "#console-mode keep\n"
+ "default %s-*\n", sd_id128_to_string(machine_id, machine_string));
+
+ r = fflush_sync_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write \"%s\": %m", p);
+
+ r = link_tmpfile(fd, t, p);
+ if (r == -EEXIST)
+ return 0; /* Silently skip creation if the file exists now (recheck) */
+ if (r < 0)
+ return log_error_errno(r, "Failed to move \"%s\" into place: %m", p);
+
+ t = mfree(t);
+
+ return 1;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("bootctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [COMMAND] [OPTIONS...]\n\n"
+ "Install, update or remove the systemd-boot EFI boot manager.\n\n"
+ " -h --help Show this help\n"
+ " --version Print version\n"
+ " --path=PATH Path to the EFI System Partition (ESP)\n"
+ " -p --print-path Print path to the EFI partition\n"
+ " --no-variables Don't touch EFI variables\n"
+ " --no-pager Do not pipe output into a pager\n"
+ "\nBoot Loader Commands:\n"
+ " status Show status of installed systemd-boot and EFI variables\n"
+ " install Install systemd-boot to the ESP and EFI variables\n"
+ " update Update systemd-boot in the ESP and EFI variables\n"
+ " remove Remove systemd-boot from the ESP and EFI variables\n"
+ "\nBoot Loader Entries Commands:\n"
+ " list List boot loader entries\n"
+ " set-default ID Set default boot loader entry\n"
+ " set-oneshot ID Set default boot loader entry, for next boot only\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_PATH = 0x100,
+ ARG_VERSION,
+ ARG_NO_VARIABLES,
+ ARG_NO_PAGER,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "path", required_argument, NULL, ARG_PATH },
+ { "print-path", no_argument, NULL, 'p' },
+ { "no-variables", no_argument, NULL, ARG_NO_VARIABLES },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hp", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ help(0, NULL, NULL);
+ return 0;
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_PATH:
+ r = free_and_strdup(&arg_path, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case 'p':
+ arg_print_path = true;
+ break;
+
+ case ARG_NO_VARIABLES:
+ arg_touch_variables = false;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ return 1;
+}
+
+static void read_loader_efi_var(const char *name, char **var) {
+ int r;
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, name, var);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read EFI variable %s: %m", name);
+}
+
+static int verb_status(int argc, char *argv[], void *userdata) {
+
+ sd_id128_t uuid = SD_ID128_NULL;
+ int r, k;
+
+ r = acquire_esp(geteuid() != 0, NULL, NULL, NULL, &uuid);
+
+ if (arg_print_path) {
+ if (r == -EACCES) /* If we couldn't acquire the ESP path, log about access errors (which is the only
+ * error the find_esp_and_warn() won't log on its own) */
+ return log_error_errno(r, "Failed to determine ESP: %m");
+ if (r < 0)
+ return r;
+
+ puts(arg_path);
+ return 0;
+ }
+
+ r = 0; /* If we couldn't determine the path, then don't consider that a problem from here on, just show what we
+ * can show */
+
+ (void) pager_open(arg_pager_flags);
+
+ if (is_efi_boot()) {
+ static const struct {
+ uint64_t flag;
+ const char *name;
+ } flags[] = {
+ { EFI_LOADER_FEATURE_BOOT_COUNTING, "Boot counting" },
+ { EFI_LOADER_FEATURE_CONFIG_TIMEOUT, "Menu timeout control" },
+ { EFI_LOADER_FEATURE_CONFIG_TIMEOUT_ONE_SHOT, "One-shot menu timeout control" },
+ { EFI_LOADER_FEATURE_ENTRY_DEFAULT, "Default entry control" },
+ { EFI_LOADER_FEATURE_ENTRY_ONESHOT, "One-shot entry control" },
+ };
+
+ _cleanup_free_ char *fw_type = NULL, *fw_info = NULL, *loader = NULL, *loader_path = NULL, *stub = NULL;
+ sd_id128_t loader_part_uuid = SD_ID128_NULL;
+ uint64_t loader_features = 0;
+ size_t i;
+
+ read_loader_efi_var("LoaderFirmwareType", &fw_type);
+ read_loader_efi_var("LoaderFirmwareInfo", &fw_info);
+ read_loader_efi_var("LoaderInfo", &loader);
+ read_loader_efi_var("StubInfo", &stub);
+ read_loader_efi_var("LoaderImageIdentifier", &loader_path);
+ (void) efi_loader_get_features(&loader_features);
+
+ if (loader_path)
+ efi_tilt_backslashes(loader_path);
+
+ k = efi_loader_get_device_part_uuid(&loader_part_uuid);
+ if (k < 0 && k != -ENOENT)
+ r = log_warning_errno(k, "Failed to read EFI variable LoaderDevicePartUUID: %m");
+
+ printf("System:\n");
+ printf(" Firmware: %s%s (%s)%s\n", ansi_highlight(), strna(fw_type), strna(fw_info), ansi_normal());
+ printf(" Secure Boot: %sd\n", enable_disable(is_efi_secure_boot()));
+ printf(" Setup Mode: %s\n", is_efi_secure_boot_setup_mode() ? "setup" : "user");
+ printf("\n");
+
+ printf("Current Boot Loader:\n");
+ printf(" Product: %s%s%s\n", ansi_highlight(), strna(loader), ansi_normal());
+
+ for (i = 0; i < ELEMENTSOF(flags); i++) {
+
+ if (i == 0)
+ printf(" Features: ");
+ else
+ printf(" ");
+
+ if (FLAGS_SET(loader_features, flags[i].flag))
+ printf("%s%s%s %s\n", ansi_highlight_green(), special_glyph(SPECIAL_GLYPH_CHECK_MARK), ansi_normal(), flags[i].name);
+ else
+ printf("%s%s%s %s\n", ansi_highlight_red(), special_glyph(SPECIAL_GLYPH_CROSS_MARK), ansi_normal(), flags[i].name);
+ }
+
+ if (stub)
+ printf(" Stub: %s\n", stub);
+ if (!sd_id128_is_null(loader_part_uuid))
+ printf(" ESP: /dev/disk/by-partuuid/%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n",
+ SD_ID128_FORMAT_VAL(loader_part_uuid));
+ else
+ printf(" ESP: n/a\n");
+ printf(" File: %s%s\n", special_glyph(SPECIAL_GLYPH_TREE_RIGHT), strna(loader_path));
+ printf("\n");
+ } else
+ printf("System:\n Not booted with EFI\n\n");
+
+ if (arg_path) {
+ k = status_binaries(arg_path, uuid);
+ if (k < 0)
+ r = k;
+ }
+
+ if (is_efi_boot()) {
+ k = status_variables();
+ if (k < 0)
+ r = k;
+ }
+
+ if (arg_path) {
+ k = status_entries(arg_path, uuid);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int verb_list(int argc, char *argv[], void *userdata) {
+ _cleanup_(boot_config_free) BootConfig config = {};
+ _cleanup_free_ char **found_by_loader = NULL;
+ sd_id128_t uuid = SD_ID128_NULL;
+ int r;
+
+ /* If we lack privileges we invoke find_esp_and_warn() in "unprivileged mode" here, which does two things: turn
+ * off logging about access errors and turn off potentially privileged device probing. Here we're interested in
+ * the latter but not the former, hence request the mode, and log about EACCES. */
+
+ r = acquire_esp(geteuid() != 0, NULL, NULL, NULL, &uuid);
+ if (r == -EACCES) /* We really need the ESP path for this call, hence also log about access errors */
+ return log_error_errno(r, "Failed to determine ESP: %m");
+ if (r < 0)
+ return r;
+
+ r = boot_entries_load_config(arg_path, &config);
+ if (r < 0)
+ return r;
+
+ r = efi_loader_get_entries(&found_by_loader);
+ if (r < 0 && !IN_SET(r, -ENOENT, -EOPNOTSUPP))
+ log_debug_errno(r, "Failed to acquire boot loader discovered entries: %m");
+
+ if (config.n_entries == 0)
+ log_info("No boot loader entries found.");
+ else {
+ size_t n;
+
+ (void) pager_open(arg_pager_flags);
+
+ printf("Boot Loader Entries:\n");
+
+ for (n = 0; n < config.n_entries; n++) {
+ r = boot_entry_show(config.entries + n, n == (size_t) config.default_entry);
+ if (r < 0)
+ return r;
+
+ puts("");
+
+ strv_remove(found_by_loader, config.entries[n].id);
+ }
+ }
+
+ if (!strv_isempty(found_by_loader)) {
+ char **i;
+
+ printf("Automatic/Other Entries Found by Boot Loader:\n\n");
+
+ STRV_FOREACH(i, found_by_loader)
+ puts(*i);
+ }
+
+ return 0;
+}
+
+static int sync_esp(void) {
+ _cleanup_close_ int fd = -1;
+
+ if (!arg_path)
+ return 0;
+
+ fd = open(arg_path, O_CLOEXEC|O_DIRECTORY|O_RDONLY);
+ if (fd < 0)
+ return log_error_errno(errno, "Couldn't open ESP '%s' for synchronization: %m", arg_path);
+
+ if (syncfs(fd) < 0)
+ return log_error_errno(errno, "Failed to synchronize the ESP '%s': %m", arg_path);
+
+ return 1;
+}
+
+static int verb_install(int argc, char *argv[], void *userdata) {
+
+ sd_id128_t uuid = SD_ID128_NULL;
+ uint64_t pstart = 0, psize = 0;
+ uint32_t part = 0;
+ bool install;
+ int r;
+
+ r = acquire_esp(false, &part, &pstart, &psize, &uuid);
+ if (r < 0)
+ return r;
+
+ install = streq(argv[0], "install");
+
+ RUN_WITH_UMASK(0002) {
+ r = install_binaries(arg_path, install);
+ if (r < 0)
+ return r;
+
+ if (install) {
+ r = install_loader_config(arg_path);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ (void) sync_esp();
+
+ if (arg_touch_variables)
+ r = install_variables(arg_path,
+ part, pstart, psize, uuid,
+ "/EFI/systemd/systemd-boot" EFI_MACHINE_TYPE_NAME ".efi",
+ install);
+
+ return r;
+}
+
+static int verb_remove(int argc, char *argv[], void *userdata) {
+ sd_id128_t uuid = SD_ID128_NULL;
+ int r;
+
+ r = acquire_esp(false, NULL, NULL, NULL, &uuid);
+ if (r < 0)
+ return r;
+
+ r = remove_binaries(arg_path);
+
+ (void) sync_esp();
+
+ if (arg_touch_variables) {
+ int q;
+
+ q = remove_variables(uuid, "/EFI/systemd/systemd-boot" EFI_MACHINE_TYPE_NAME ".efi", true);
+ if (q < 0 && r == 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int verb_set_default(int argc, char *argv[], void *userdata) {
+ const char *name;
+ int r;
+
+ if (!is_efi_boot())
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Not booted with UEFI.");
+
+ if (access("/sys/firmware/efi/efivars/LoaderInfo-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f", F_OK) < 0) {
+ if (errno == ENOENT) {
+ log_error_errno(errno, "Not booted with a supported boot loader.");
+ return -EOPNOTSUPP;
+ }
+
+ return log_error_errno(errno, "Failed to detect whether boot loader supports '%s' operation: %m", argv[0]);
+ }
+
+ if (detect_container() > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "'%s' operation not supported in a container.",
+ argv[0]);
+
+ if (!arg_touch_variables)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "'%s' operation cannot be combined with --touch-variables=no.",
+ argv[0]);
+
+ name = streq(argv[0], "set-default") ? "LoaderEntryDefault" : "LoaderEntryOneShot";
+
+ if (isempty(argv[1])) {
+ r = efi_set_variable(EFI_VENDOR_LOADER, name, NULL, 0);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to remove EFI variale: %m");
+ } else {
+ _cleanup_free_ char16_t *encoded = NULL;
+
+ encoded = utf8_to_utf16(argv[1], strlen(argv[1]));
+ if (!encoded)
+ return log_oom();
+
+ r = efi_set_variable(EFI_VENDOR_LOADER, name, encoded, char16_strlen(encoded) * 2 + 2);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update EFI variable: %m");
+ }
+
+ return 0;
+}
+
+static int bootctl_main(int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "status", VERB_ANY, 1, VERB_DEFAULT, verb_status },
+ { "install", VERB_ANY, 1, VERB_MUST_BE_ROOT, verb_install },
+ { "update", VERB_ANY, 1, VERB_MUST_BE_ROOT, verb_install },
+ { "remove", VERB_ANY, 1, VERB_MUST_BE_ROOT, verb_remove },
+ { "list", VERB_ANY, 1, 0, verb_list },
+ { "set-default", 2, 2, VERB_MUST_BE_ROOT, verb_set_default },
+ { "set-oneshot", 2, 2, VERB_MUST_BE_ROOT, verb_set_default },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ /* If we run in a container, automatically turn of EFI file system access */
+ if (detect_container() > 0)
+ arg_touch_variables = false;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ return bootctl_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/boot/efi/boot.c b/src/boot/efi/boot.c
new file mode 100644
index 0000000..9bf6895
--- /dev/null
+++ b/src/boot/efi/boot.c
@@ -0,0 +1,2250 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "console.h"
+#include "disk.h"
+#include "graphics.h"
+#include "linux.h"
+#include "measure.h"
+#include "pe.h"
+#include "shim.h"
+#include "util.h"
+
+#ifndef EFI_OS_INDICATIONS_BOOT_TO_FW_UI
+#define EFI_OS_INDICATIONS_BOOT_TO_FW_UI 0x0000000000000001ULL
+#endif
+
+/* magic string to find in the binary image */
+static const char __attribute__((used)) magic[] = "#### LoaderInfo: systemd-boot " GIT_VERSION " ####";
+
+static const EFI_GUID global_guid = EFI_GLOBAL_VARIABLE;
+
+enum loader_type {
+ LOADER_UNDEFINED,
+ LOADER_EFI,
+ LOADER_LINUX,
+};
+
+typedef struct {
+ CHAR16 *id; /* The identifier for this entry (note that this id is not necessarily unique though!) */
+ CHAR16 *title_show;
+ CHAR16 *title;
+ CHAR16 *version;
+ CHAR16 *machine_id;
+ EFI_HANDLE *device;
+ enum loader_type type;
+ CHAR16 *loader;
+ CHAR16 *options;
+ CHAR16 key;
+ EFI_STATUS (*call)(VOID);
+ BOOLEAN no_autoselect;
+ BOOLEAN non_unique;
+ UINTN tries_done;
+ UINTN tries_left;
+ CHAR16 *path;
+ CHAR16 *current_name;
+ CHAR16 *next_name;
+} ConfigEntry;
+
+typedef struct {
+ ConfigEntry **entries;
+ UINTN entry_count;
+ INTN idx_default;
+ INTN idx_default_efivar;
+ UINTN timeout_sec;
+ UINTN timeout_sec_config;
+ INTN timeout_sec_efivar;
+ CHAR16 *entry_default_pattern;
+ CHAR16 *entry_oneshot;
+ CHAR16 *options_edit;
+ BOOLEAN editor;
+ BOOLEAN auto_entries;
+ BOOLEAN auto_firmware;
+ BOOLEAN force_menu;
+ UINTN console_mode;
+ enum console_mode_change_type console_mode_change;
+} Config;
+
+static VOID cursor_left(UINTN *cursor, UINTN *first) {
+ if ((*cursor) > 0)
+ (*cursor)--;
+ else if ((*first) > 0)
+ (*first)--;
+}
+
+static VOID cursor_right(
+ UINTN *cursor,
+ UINTN *first,
+ UINTN x_max,
+ UINTN len) {
+
+ if ((*cursor)+1 < x_max)
+ (*cursor)++;
+ else if ((*first) + (*cursor) < len)
+ (*first)++;
+}
+
+static BOOLEAN line_edit(
+ CHAR16 *line_in,
+ CHAR16 **line_out,
+ UINTN x_max,
+ UINTN y_pos) {
+
+ _cleanup_freepool_ CHAR16 *line = NULL, *print = NULL;
+ UINTN size, len, first, cursor, clear;
+ BOOLEAN exit, enter;
+
+ if (!line_in)
+ line_in = L"";
+ size = StrLen(line_in) + 1024;
+ line = AllocatePool(size * sizeof(CHAR16));
+ StrCpy(line, line_in);
+ len = StrLen(line);
+ print = AllocatePool((x_max+1) * sizeof(CHAR16));
+
+ uefi_call_wrapper(ST->ConOut->EnableCursor, 2, ST->ConOut, TRUE);
+
+ first = 0;
+ cursor = 0;
+ clear = 0;
+ enter = FALSE;
+ exit = FALSE;
+ while (!exit) {
+ EFI_STATUS err;
+ UINT64 key;
+ UINTN i;
+
+ i = len - first;
+ if (i >= x_max-1)
+ i = x_max-1;
+ CopyMem(print, line + first, i * sizeof(CHAR16));
+ while (clear > 0 && i < x_max-1) {
+ clear--;
+ print[i++] = ' ';
+ }
+ print[i] = '\0';
+
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_pos);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, print);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, cursor, y_pos);
+
+ err = console_key_read(&key, TRUE);
+ if (EFI_ERROR(err))
+ continue;
+
+ switch (key) {
+ case KEYPRESS(0, SCAN_ESC, 0):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'c'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'g'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('c')):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('g')):
+ exit = TRUE;
+ break;
+
+ case KEYPRESS(0, SCAN_HOME, 0):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'a'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('a')):
+ /* beginning-of-line */
+ cursor = 0;
+ first = 0;
+ continue;
+
+ case KEYPRESS(0, SCAN_END, 0):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'e'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('e')):
+ /* end-of-line */
+ cursor = len - first;
+ if (cursor+1 >= x_max) {
+ cursor = x_max-1;
+ first = len - (x_max-1);
+ }
+ continue;
+
+ case KEYPRESS(0, SCAN_DOWN, 0):
+ case KEYPRESS(EFI_ALT_PRESSED, 0, 'f'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, SCAN_RIGHT, 0):
+ /* forward-word */
+ while (line[first + cursor] == ' ')
+ cursor_right(&cursor, &first, x_max, len);
+ while (line[first + cursor] && line[first + cursor] != ' ')
+ cursor_right(&cursor, &first, x_max, len);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, cursor, y_pos);
+ continue;
+
+ case KEYPRESS(0, SCAN_UP, 0):
+ case KEYPRESS(EFI_ALT_PRESSED, 0, 'b'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, SCAN_LEFT, 0):
+ /* backward-word */
+ if ((first + cursor) > 0 && line[first + cursor-1] == ' ') {
+ cursor_left(&cursor, &first);
+ while ((first + cursor) > 0 && line[first + cursor] == ' ')
+ cursor_left(&cursor, &first);
+ }
+ while ((first + cursor) > 0 && line[first + cursor-1] != ' ')
+ cursor_left(&cursor, &first);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, cursor, y_pos);
+ continue;
+
+ case KEYPRESS(0, SCAN_RIGHT, 0):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'f'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('f')):
+ /* forward-char */
+ if (first + cursor == len)
+ continue;
+ cursor_right(&cursor, &first, x_max, len);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, cursor, y_pos);
+ continue;
+
+ case KEYPRESS(0, SCAN_LEFT, 0):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'b'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('b')):
+ /* backward-char */
+ cursor_left(&cursor, &first);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, cursor, y_pos);
+ continue;
+
+ case KEYPRESS(EFI_ALT_PRESSED, 0, 'd'):
+ /* kill-word */
+ clear = 0;
+ for (i = first + cursor; i < len && line[i] == ' '; i++)
+ clear++;
+ for (; i < len && line[i] != ' '; i++)
+ clear++;
+
+ for (i = first + cursor; i + clear < len; i++)
+ line[i] = line[i + clear];
+ len -= clear;
+ line[len] = '\0';
+ continue;
+
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'w'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('w')):
+ case KEYPRESS(EFI_ALT_PRESSED, 0, CHAR_BACKSPACE):
+ /* backward-kill-word */
+ clear = 0;
+ if ((first + cursor) > 0 && line[first + cursor-1] == ' ') {
+ cursor_left(&cursor, &first);
+ clear++;
+ while ((first + cursor) > 0 && line[first + cursor] == ' ') {
+ cursor_left(&cursor, &first);
+ clear++;
+ }
+ }
+ while ((first + cursor) > 0 && line[first + cursor-1] != ' ') {
+ cursor_left(&cursor, &first);
+ clear++;
+ }
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, cursor, y_pos);
+
+ for (i = first + cursor; i + clear < len; i++)
+ line[i] = line[i + clear];
+ len -= clear;
+ line[len] = '\0';
+ continue;
+
+ case KEYPRESS(0, SCAN_DELETE, 0):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'd'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('d')):
+ if (len == 0)
+ continue;
+ if (first + cursor == len)
+ continue;
+ for (i = first + cursor; i < len; i++)
+ line[i] = line[i+1];
+ clear = 1;
+ len--;
+ continue;
+
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'k'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('k')):
+ /* kill-line */
+ line[first + cursor] = '\0';
+ clear = len - (first + cursor);
+ len = first + cursor;
+ continue;
+
+ case KEYPRESS(0, 0, CHAR_LINEFEED):
+ case KEYPRESS(0, 0, CHAR_CARRIAGE_RETURN):
+ if (StrCmp(line, line_in) != 0) {
+ *line_out = line;
+ line = NULL;
+ }
+ enter = TRUE;
+ exit = TRUE;
+ break;
+
+ case KEYPRESS(0, 0, CHAR_BACKSPACE):
+ if (len == 0)
+ continue;
+ if (first == 0 && cursor == 0)
+ continue;
+ for (i = first + cursor-1; i < len; i++)
+ line[i] = line[i+1];
+ clear = 1;
+ len--;
+ if (cursor > 0)
+ cursor--;
+ if (cursor > 0 || first == 0)
+ continue;
+ /* show full line if it fits */
+ if (len < x_max) {
+ cursor = first;
+ first = 0;
+ continue;
+ }
+ /* jump left to see what we delete */
+ if (first > 10) {
+ first -= 10;
+ cursor = 10;
+ } else {
+ cursor = first;
+ first = 0;
+ }
+ continue;
+
+ case KEYPRESS(0, 0, ' ') ... KEYPRESS(0, 0, '~'):
+ case KEYPRESS(0, 0, 0x80) ... KEYPRESS(0, 0, 0xffff):
+ if (len+1 == size)
+ continue;
+ for (i = len; i > first + cursor; i--)
+ line[i] = line[i-1];
+ line[first + cursor] = KEYCHAR(key);
+ len++;
+ line[len] = '\0';
+ if (cursor+1 < x_max)
+ cursor++;
+ else if (first + cursor < len)
+ first++;
+ continue;
+ }
+ }
+
+ uefi_call_wrapper(ST->ConOut->EnableCursor, 2, ST->ConOut, FALSE);
+ return enter;
+}
+
+static UINTN entry_lookup_key(Config *config, UINTN start, CHAR16 key) {
+ UINTN i;
+
+ if (key == 0)
+ return -1;
+
+ /* select entry by number key */
+ if (key >= '1' && key <= '9') {
+ i = key - '0';
+ if (i > config->entry_count)
+ i = config->entry_count;
+ return i-1;
+ }
+
+ /* find matching key in config entries */
+ for (i = start; i < config->entry_count; i++)
+ if (config->entries[i]->key == key)
+ return i;
+
+ for (i = 0; i < start; i++)
+ if (config->entries[i]->key == key)
+ return i;
+
+ return -1;
+}
+
+static VOID print_status(Config *config, CHAR16 *loaded_image_path) {
+ UINT64 key;
+ UINTN i;
+ _cleanup_freepool_ CHAR8 *bootvar = NULL, *modevar = NULL, *indvar = NULL;
+ _cleanup_freepool_ CHAR16 *partstr = NULL, *defaultstr = NULL;
+ UINTN x, y, size;
+
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->ClearScreen, 1, ST->ConOut);
+
+ Print(L"systemd-boot version: " GIT_VERSION "\n");
+ Print(L"architecture: " EFI_MACHINE_TYPE_NAME "\n");
+ Print(L"loaded image: %s\n", loaded_image_path);
+ Print(L"UEFI specification: %d.%02d\n", ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff);
+ Print(L"firmware vendor: %s\n", ST->FirmwareVendor);
+ Print(L"firmware version: %d.%02d\n", ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff);
+
+ if (uefi_call_wrapper(ST->ConOut->QueryMode, 4, ST->ConOut, ST->ConOut->Mode->Mode, &x, &y) == EFI_SUCCESS)
+ Print(L"console size: %d x %d\n", x, y);
+
+ if (efivar_get_raw(&global_guid, L"SecureBoot", &bootvar, &size) == EFI_SUCCESS)
+ Print(L"SecureBoot: %s\n", yes_no(*bootvar > 0));
+
+ if (efivar_get_raw(&global_guid, L"SetupMode", &modevar, &size) == EFI_SUCCESS)
+ Print(L"SetupMode: %s\n", *modevar > 0 ? L"setup" : L"user");
+
+ if (shim_loaded())
+ Print(L"Shim: present\n");
+
+ if (efivar_get_raw(&global_guid, L"OsIndicationsSupported", &indvar, &size) == EFI_SUCCESS)
+ Print(L"OsIndicationsSupported: %d\n", (UINT64)*indvar);
+
+ Print(L"\n--- press key ---\n\n");
+ console_key_read(&key, TRUE);
+
+ Print(L"timeout: %u\n", config->timeout_sec);
+ if (config->timeout_sec_efivar >= 0)
+ Print(L"timeout (EFI var): %d\n", config->timeout_sec_efivar);
+ Print(L"timeout (config): %u\n", config->timeout_sec_config);
+ if (config->entry_default_pattern)
+ Print(L"default pattern: '%s'\n", config->entry_default_pattern);
+ Print(L"editor: %s\n", yes_no(config->editor));
+ Print(L"auto-entries: %s\n", yes_no(config->auto_entries));
+ Print(L"auto-firmware: %s\n", yes_no(config->auto_firmware));
+ Print(L"\n");
+
+ Print(L"config entry count: %d\n", config->entry_count);
+ Print(L"entry selected idx: %d\n", config->idx_default);
+ if (config->idx_default_efivar >= 0)
+ Print(L"entry EFI var idx: %d\n", config->idx_default_efivar);
+ Print(L"\n");
+
+ if (efivar_get_int(L"LoaderConfigTimeout", &i) == EFI_SUCCESS)
+ Print(L"LoaderConfigTimeout: %u\n", i);
+
+ if (config->entry_oneshot)
+ Print(L"LoaderEntryOneShot: %s\n", config->entry_oneshot);
+ if (efivar_get(L"LoaderDevicePartUUID", &partstr) == EFI_SUCCESS)
+ Print(L"LoaderDevicePartUUID: %s\n", partstr);
+ if (efivar_get(L"LoaderEntryDefault", &defaultstr) == EFI_SUCCESS)
+ Print(L"LoaderEntryDefault: %s\n", defaultstr);
+
+ Print(L"\n--- press key ---\n\n");
+ console_key_read(&key, TRUE);
+
+ for (i = 0; i < config->entry_count; i++) {
+ ConfigEntry *entry;
+
+ if (key == KEYPRESS(0, SCAN_ESC, 0) || key == KEYPRESS(0, 0, 'q'))
+ break;
+
+ entry = config->entries[i];
+ Print(L"config entry: %d/%d\n", i+1, config->entry_count);
+ if (entry->id)
+ Print(L"id '%s'\n", entry->id);
+ Print(L"title show '%s'\n", entry->title_show);
+ if (entry->title)
+ Print(L"title '%s'\n", entry->title);
+ if (entry->version)
+ Print(L"version '%s'\n", entry->version);
+ if (entry->machine_id)
+ Print(L"machine-id '%s'\n", entry->machine_id);
+ if (entry->device) {
+ EFI_DEVICE_PATH *device_path;
+
+ device_path = DevicePathFromHandle(entry->device);
+ if (device_path) {
+ _cleanup_freepool_ CHAR16 *str;
+
+ str = DevicePathToStr(device_path);
+ Print(L"device handle '%s'\n", str);
+ }
+ }
+ if (entry->loader)
+ Print(L"loader '%s'\n", entry->loader);
+ if (entry->options)
+ Print(L"options '%s'\n", entry->options);
+ Print(L"auto-select %s\n", yes_no(!entry->no_autoselect));
+ if (entry->call)
+ Print(L"internal call yes\n");
+
+ if (entry->tries_left != (UINTN) -1)
+ Print(L"counting boots yes\n"
+ "tries done %u\n"
+ "tries left %u\n"
+ "current path %s\\%s\n"
+ "next path %s\\%s\n",
+ entry->tries_done,
+ entry->tries_left,
+ entry->path, entry->current_name,
+ entry->path, entry->next_name);
+
+ Print(L"\n--- press key ---\n\n");
+ console_key_read(&key, TRUE);
+ }
+
+ uefi_call_wrapper(ST->ConOut->ClearScreen, 1, ST->ConOut);
+}
+
+static BOOLEAN menu_run(
+ Config *config,
+ ConfigEntry **chosen_entry,
+ CHAR16 *loaded_image_path) {
+
+ EFI_STATUS err;
+ UINTN visible_max;
+ UINTN idx_highlight;
+ UINTN idx_highlight_prev;
+ UINTN idx_first;
+ UINTN idx_last;
+ BOOLEAN refresh;
+ BOOLEAN highlight;
+ UINTN i;
+ UINTN line_width;
+ CHAR16 **lines;
+ UINTN x_start;
+ UINTN y_start;
+ UINTN x_max;
+ UINTN y_max;
+ CHAR16 *status;
+ CHAR16 *clearline;
+ INTN timeout_remain;
+ INT16 idx;
+ BOOLEAN exit = FALSE;
+ BOOLEAN run = TRUE;
+ BOOLEAN wait = FALSE;
+ BOOLEAN cleared_screen = FALSE;
+
+ graphics_mode(FALSE);
+ uefi_call_wrapper(ST->ConIn->Reset, 2, ST->ConIn, FALSE);
+ uefi_call_wrapper(ST->ConOut->EnableCursor, 2, ST->ConOut, FALSE);
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+
+ /* draw a single character to make ClearScreen work on some firmware */
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, L" ");
+
+ if (config->console_mode_change != CONSOLE_MODE_KEEP) {
+ err = console_set_mode(&config->console_mode, config->console_mode_change);
+ if (!EFI_ERROR(err))
+ cleared_screen = TRUE;
+ }
+
+ if (!cleared_screen)
+ uefi_call_wrapper(ST->ConOut->ClearScreen, 1, ST->ConOut);
+
+ if (config->console_mode_change != CONSOLE_MODE_KEEP && EFI_ERROR(err))
+ Print(L"Error switching console mode to %ld: %r.\r", (UINT64)config->console_mode, err);
+
+ err = uefi_call_wrapper(ST->ConOut->QueryMode, 4, ST->ConOut, ST->ConOut->Mode->Mode, &x_max, &y_max);
+ if (EFI_ERROR(err)) {
+ x_max = 80;
+ y_max = 25;
+ }
+
+ /* we check 10 times per second for a keystroke */
+ if (config->timeout_sec > 0)
+ timeout_remain = config->timeout_sec * 10;
+ else
+ timeout_remain = -1;
+
+ idx_highlight = config->idx_default;
+ idx_highlight_prev = 0;
+
+ visible_max = y_max - 2;
+
+ if ((UINTN)config->idx_default >= visible_max)
+ idx_first = config->idx_default-1;
+ else
+ idx_first = 0;
+
+ idx_last = idx_first + visible_max-1;
+
+ refresh = TRUE;
+ highlight = FALSE;
+
+ /* length of the longest entry */
+ line_width = 5;
+ for (i = 0; i < config->entry_count; i++) {
+ UINTN entry_len;
+
+ entry_len = StrLen(config->entries[i]->title_show);
+ if (line_width < entry_len)
+ line_width = entry_len;
+ }
+ if (line_width > x_max-6)
+ line_width = x_max-6;
+
+ /* offsets to center the entries on the screen */
+ x_start = (x_max - (line_width)) / 2;
+ if (config->entry_count < visible_max)
+ y_start = ((visible_max - config->entry_count) / 2) + 1;
+ else
+ y_start = 0;
+
+ /* menu entries title lines */
+ lines = AllocatePool(sizeof(CHAR16 *) * config->entry_count);
+ for (i = 0; i < config->entry_count; i++) {
+ UINTN j, k;
+
+ lines[i] = AllocatePool(((x_max+1) * sizeof(CHAR16)));
+ for (j = 0; j < x_start; j++)
+ lines[i][j] = ' ';
+
+ for (k = 0; config->entries[i]->title_show[k] != '\0' && j < x_max; j++, k++)
+ lines[i][j] = config->entries[i]->title_show[k];
+
+ for (; j < x_max; j++)
+ lines[i][j] = ' ';
+ lines[i][x_max] = '\0';
+ }
+
+ status = NULL;
+ clearline = AllocatePool((x_max+1) * sizeof(CHAR16));
+ for (i = 0; i < x_max; i++)
+ clearline[i] = ' ';
+ clearline[i] = 0;
+
+ while (!exit) {
+ UINT64 key;
+
+ if (refresh) {
+ for (i = 0; i < config->entry_count; i++) {
+ if (i < idx_first || i > idx_last)
+ continue;
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_start + i - idx_first);
+ if (i == idx_highlight)
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut,
+ EFI_BLACK|EFI_BACKGROUND_LIGHTGRAY);
+ else
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut,
+ EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, lines[i]);
+ if ((INTN)i == config->idx_default_efivar) {
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, x_start-3, y_start + i - idx_first);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, L"=>");
+ }
+ }
+ refresh = FALSE;
+ } else if (highlight) {
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_start + idx_highlight_prev - idx_first);
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, lines[idx_highlight_prev]);
+ if ((INTN)idx_highlight_prev == config->idx_default_efivar) {
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, x_start-3, y_start + idx_highlight_prev - idx_first);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, L"=>");
+ }
+
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_start + idx_highlight - idx_first);
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_BLACK|EFI_BACKGROUND_LIGHTGRAY);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, lines[idx_highlight]);
+ if ((INTN)idx_highlight == config->idx_default_efivar) {
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, x_start-3, y_start + idx_highlight - idx_first);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, L"=>");
+ }
+ highlight = FALSE;
+ }
+
+ if (timeout_remain > 0) {
+ FreePool(status);
+ status = PoolPrint(L"Boot in %d sec.", (timeout_remain + 5) / 10);
+ }
+
+ /* print status at last line of screen */
+ if (status) {
+ UINTN len;
+ UINTN x;
+
+ /* center line */
+ len = StrLen(status);
+ if (len < x_max)
+ x = (x_max - len) / 2;
+ else
+ x = 0;
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_max-1);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, clearline + (x_max - x));
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, status);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, clearline+1 + x + len);
+ }
+
+ err = console_key_read(&key, wait);
+ if (EFI_ERROR(err)) {
+ /* timeout reached */
+ if (timeout_remain == 0) {
+ exit = TRUE;
+ break;
+ }
+
+ /* sleep and update status */
+ if (timeout_remain > 0) {
+ uefi_call_wrapper(BS->Stall, 1, 100 * 1000);
+ timeout_remain--;
+ continue;
+ }
+
+ /* timeout disabled, wait for next key */
+ wait = TRUE;
+ continue;
+ }
+
+ timeout_remain = -1;
+
+ /* clear status after keystroke */
+ if (status) {
+ FreePool(status);
+ status = NULL;
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_max-1);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, clearline+1);
+ }
+
+ idx_highlight_prev = idx_highlight;
+
+ switch (key) {
+ case KEYPRESS(0, SCAN_UP, 0):
+ case KEYPRESS(0, 0, 'k'):
+ if (idx_highlight > 0)
+ idx_highlight--;
+ break;
+
+ case KEYPRESS(0, SCAN_DOWN, 0):
+ case KEYPRESS(0, 0, 'j'):
+ if (idx_highlight < config->entry_count-1)
+ idx_highlight++;
+ break;
+
+ case KEYPRESS(0, SCAN_HOME, 0):
+ case KEYPRESS(EFI_ALT_PRESSED, 0, '<'):
+ if (idx_highlight > 0) {
+ refresh = TRUE;
+ idx_highlight = 0;
+ }
+ break;
+
+ case KEYPRESS(0, SCAN_END, 0):
+ case KEYPRESS(EFI_ALT_PRESSED, 0, '>'):
+ if (idx_highlight < config->entry_count-1) {
+ refresh = TRUE;
+ idx_highlight = config->entry_count-1;
+ }
+ break;
+
+ case KEYPRESS(0, SCAN_PAGE_UP, 0):
+ if (idx_highlight > visible_max)
+ idx_highlight -= visible_max;
+ else
+ idx_highlight = 0;
+ break;
+
+ case KEYPRESS(0, SCAN_PAGE_DOWN, 0):
+ idx_highlight += visible_max;
+ if (idx_highlight > config->entry_count-1)
+ idx_highlight = config->entry_count-1;
+ break;
+
+ case KEYPRESS(0, 0, CHAR_LINEFEED):
+ case KEYPRESS(0, 0, CHAR_CARRIAGE_RETURN):
+ exit = TRUE;
+ break;
+
+ case KEYPRESS(0, SCAN_F1, 0):
+ case KEYPRESS(0, 0, 'h'):
+ case KEYPRESS(0, 0, '?'):
+ status = StrDuplicate(L"(d)efault, (t/T)timeout, (e)dit, (v)ersion (Q)uit (P)rint (h)elp");
+ break;
+
+ case KEYPRESS(0, 0, 'Q'):
+ exit = TRUE;
+ run = FALSE;
+ break;
+
+ case KEYPRESS(0, 0, 'd'):
+ if (config->idx_default_efivar != (INTN)idx_highlight) {
+ /* store the selected entry in a persistent EFI variable */
+ efivar_set(L"LoaderEntryDefault", config->entries[idx_highlight]->id, TRUE);
+ config->idx_default_efivar = idx_highlight;
+ status = StrDuplicate(L"Default boot entry selected.");
+ } else {
+ /* clear the default entry EFI variable */
+ efivar_set(L"LoaderEntryDefault", NULL, TRUE);
+ config->idx_default_efivar = -1;
+ status = StrDuplicate(L"Default boot entry cleared.");
+ }
+ refresh = TRUE;
+ break;
+
+ case KEYPRESS(0, 0, '-'):
+ case KEYPRESS(0, 0, 'T'):
+ if (config->timeout_sec_efivar > 0) {
+ config->timeout_sec_efivar--;
+ efivar_set_int(L"LoaderConfigTimeout", config->timeout_sec_efivar, TRUE);
+ if (config->timeout_sec_efivar > 0)
+ status = PoolPrint(L"Menu timeout set to %d sec.", config->timeout_sec_efivar);
+ else
+ status = StrDuplicate(L"Menu disabled. Hold down key at bootup to show menu.");
+ } else if (config->timeout_sec_efivar <= 0){
+ config->timeout_sec_efivar = -1;
+ efivar_set(L"LoaderConfigTimeout", NULL, TRUE);
+ if (config->timeout_sec_config > 0)
+ status = PoolPrint(L"Menu timeout of %d sec is defined by configuration file.",
+ config->timeout_sec_config);
+ else
+ status = StrDuplicate(L"Menu disabled. Hold down key at bootup to show menu.");
+ }
+ break;
+
+ case KEYPRESS(0, 0, '+'):
+ case KEYPRESS(0, 0, 't'):
+ if (config->timeout_sec_efivar == -1 && config->timeout_sec_config == 0)
+ config->timeout_sec_efivar++;
+ config->timeout_sec_efivar++;
+ efivar_set_int(L"LoaderConfigTimeout", config->timeout_sec_efivar, TRUE);
+ if (config->timeout_sec_efivar > 0)
+ status = PoolPrint(L"Menu timeout set to %d sec.",
+ config->timeout_sec_efivar);
+ else
+ status = StrDuplicate(L"Menu disabled. Hold down key at bootup to show menu.");
+ break;
+
+ case KEYPRESS(0, 0, 'e'):
+ /* only the options of configured entries can be edited */
+ if (!config->editor || config->entries[idx_highlight]->type == LOADER_UNDEFINED)
+ break;
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_max-1);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, clearline+1);
+ if (line_edit(config->entries[idx_highlight]->options, &config->options_edit, x_max-1, y_max-1))
+ exit = TRUE;
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_max-1);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, clearline+1);
+ break;
+
+ case KEYPRESS(0, 0, 'v'):
+ status = PoolPrint(L"systemd-boot " GIT_VERSION " (" EFI_MACHINE_TYPE_NAME "), UEFI Specification %d.%02d, Vendor %s %d.%02d",
+ ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff,
+ ST->FirmwareVendor, ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff);
+ break;
+
+ case KEYPRESS(0, 0, 'P'):
+ print_status(config, loaded_image_path);
+ refresh = TRUE;
+ break;
+
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'l'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('l')):
+ refresh = TRUE;
+ break;
+
+ default:
+ /* jump with a hotkey directly to a matching entry */
+ idx = entry_lookup_key(config, idx_highlight+1, KEYCHAR(key));
+ if (idx < 0)
+ break;
+ idx_highlight = idx;
+ refresh = TRUE;
+ }
+
+ if (idx_highlight > idx_last) {
+ idx_last = idx_highlight;
+ idx_first = 1 + idx_highlight - visible_max;
+ refresh = TRUE;
+ } else if (idx_highlight < idx_first) {
+ idx_first = idx_highlight;
+ idx_last = idx_highlight + visible_max-1;
+ refresh = TRUE;
+ }
+
+ if (!refresh && idx_highlight != idx_highlight_prev)
+ highlight = TRUE;
+ }
+
+ *chosen_entry = config->entries[idx_highlight];
+
+ for (i = 0; i < config->entry_count; i++)
+ FreePool(lines[i]);
+ FreePool(lines);
+ FreePool(clearline);
+
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_WHITE|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->ClearScreen, 1, ST->ConOut);
+ return run;
+}
+
+static VOID config_add_entry(Config *config, ConfigEntry *entry) {
+ if ((config->entry_count & 15) == 0) {
+ UINTN i;
+
+ i = config->entry_count + 16;
+ if (config->entry_count == 0)
+ config->entries = AllocatePool(sizeof(VOID *) * i);
+ else
+ config->entries = ReallocatePool(config->entries,
+ sizeof(VOID *) * config->entry_count, sizeof(VOID *) * i);
+ }
+ config->entries[config->entry_count++] = entry;
+}
+
+static VOID config_entry_free(ConfigEntry *entry) {
+ if (!entry)
+ return;
+
+ FreePool(entry->id);
+ FreePool(entry->title_show);
+ FreePool(entry->title);
+ FreePool(entry->version);
+ FreePool(entry->machine_id);
+ FreePool(entry->loader);
+ FreePool(entry->options);
+ FreePool(entry->path);
+ FreePool(entry->current_name);
+ FreePool(entry->next_name);
+ FreePool(entry);
+}
+
+static BOOLEAN is_digit(CHAR16 c) {
+ return (c >= '0') && (c <= '9');
+}
+
+static UINTN c_order(CHAR16 c) {
+ if (c == '\0')
+ return 0;
+ if (is_digit(c))
+ return 0;
+ else if ((c >= 'a') && (c <= 'z'))
+ return c;
+ else
+ return c + 0x10000;
+}
+
+static INTN str_verscmp(CHAR16 *s1, CHAR16 *s2) {
+ CHAR16 *os1 = s1;
+ CHAR16 *os2 = s2;
+
+ while (*s1 || *s2) {
+ INTN first;
+
+ while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
+ INTN order;
+
+ order = c_order(*s1) - c_order(*s2);
+ if (order != 0)
+ return order;
+ s1++;
+ s2++;
+ }
+
+ while (*s1 == '0')
+ s1++;
+ while (*s2 == '0')
+ s2++;
+
+ first = 0;
+ while (is_digit(*s1) && is_digit(*s2)) {
+ if (first == 0)
+ first = *s1 - *s2;
+ s1++;
+ s2++;
+ }
+
+ if (is_digit(*s1))
+ return 1;
+ if (is_digit(*s2))
+ return -1;
+
+ if (first != 0)
+ return first;
+ }
+
+ return StrCmp(os1, os2);
+}
+
+static CHAR8 *line_get_key_value(
+ CHAR8 *content,
+ CHAR8 *sep,
+ UINTN *pos,
+ CHAR8 **key_ret,
+ CHAR8 **value_ret) {
+
+ CHAR8 *line;
+ UINTN linelen;
+ CHAR8 *value;
+
+skip:
+ line = content + *pos;
+ if (*line == '\0')
+ return NULL;
+
+ linelen = 0;
+ while (line[linelen] && !strchra((CHAR8 *)"\n\r", line[linelen]))
+ linelen++;
+
+ /* move pos to next line */
+ *pos += linelen;
+ if (content[*pos])
+ (*pos)++;
+
+ /* empty line */
+ if (linelen == 0)
+ goto skip;
+
+ /* terminate line */
+ line[linelen] = '\0';
+
+ /* remove leading whitespace */
+ while (strchra((CHAR8 *)" \t", *line)) {
+ line++;
+ linelen--;
+ }
+
+ /* remove trailing whitespace */
+ while (linelen > 0 && strchra((CHAR8 *)" \t", line[linelen-1]))
+ linelen--;
+ line[linelen] = '\0';
+
+ if (*line == '#')
+ goto skip;
+
+ /* split key/value */
+ value = line;
+ while (*value && !strchra(sep, *value))
+ value++;
+ if (*value == '\0')
+ goto skip;
+ *value = '\0';
+ value++;
+ while (*value && strchra(sep, *value))
+ value++;
+
+ /* unquote */
+ if (value[0] == '"' && line[linelen-1] == '"') {
+ value++;
+ line[linelen-1] = '\0';
+ }
+
+ *key_ret = line;
+ *value_ret = value;
+ return line;
+}
+
+static VOID config_defaults_load_from_file(Config *config, CHAR8 *content) {
+ CHAR8 *line;
+ UINTN pos = 0;
+ CHAR8 *key, *value;
+
+ while ((line = line_get_key_value(content, (CHAR8 *)" \t", &pos, &key, &value))) {
+ if (strcmpa((CHAR8 *)"timeout", key) == 0) {
+ _cleanup_freepool_ CHAR16 *s = NULL;
+
+ s = stra_to_str(value);
+ config->timeout_sec_config = Atoi(s);
+ config->timeout_sec = config->timeout_sec_config;
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"default", key) == 0) {
+ FreePool(config->entry_default_pattern);
+ config->entry_default_pattern = stra_to_str(value);
+ StrLwr(config->entry_default_pattern);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"editor", key) == 0) {
+ BOOLEAN on;
+
+ if (EFI_ERROR(parse_boolean(value, &on)))
+ continue;
+ config->editor = on;
+ }
+
+ if (strcmpa((CHAR8 *)"auto-entries", key) == 0) {
+ BOOLEAN on;
+
+ if (EFI_ERROR(parse_boolean(value, &on)))
+ continue;
+ config->auto_entries = on;
+ }
+
+ if (strcmpa((CHAR8 *)"auto-firmware", key) == 0) {
+ BOOLEAN on;
+
+ if (EFI_ERROR(parse_boolean(value, &on)))
+ continue;
+ config->auto_firmware = on;
+ }
+
+ if (strcmpa((CHAR8 *)"console-mode", key) == 0) {
+ if (strcmpa((CHAR8 *)"auto", value) == 0)
+ config->console_mode_change = CONSOLE_MODE_AUTO;
+ else if (strcmpa((CHAR8 *)"max", value) == 0)
+ config->console_mode_change = CONSOLE_MODE_MAX;
+ else if (strcmpa((CHAR8 *)"keep", value) == 0)
+ config->console_mode_change = CONSOLE_MODE_KEEP;
+ else {
+ _cleanup_freepool_ CHAR16 *s = NULL;
+
+ s = stra_to_str(value);
+ config->console_mode = Atoi(s);
+ config->console_mode_change = CONSOLE_MODE_SET;
+ }
+
+ continue;
+ }
+ }
+}
+
+static VOID config_entry_parse_tries(
+ ConfigEntry *entry,
+ CHAR16 *path,
+ CHAR16 *file,
+ CHAR16 *suffix) {
+
+ UINTN left = (UINTN) -1, done = (UINTN) -1, factor = 1, i, next_left, next_done;
+ _cleanup_freepool_ CHAR16 *prefix = NULL;
+
+ /*
+ * Parses a suffix of two counters (one going down, one going up) in the form "+LEFT-DONE" from the end of the
+ * filename (but before the .efi/.conf suffix), where the "-DONE" part is optional and may be left out (in
+ * which case that counter as assumed to be zero, i.e. the missing part is synonymous to "-0").
+ *
+ * Names we grok, and the series they result in:
+ *
+ * foobar+3.efi → foobar+2-1.efi → foobar+1-2.efi → foobar+0-3.efi → STOP!
+ * foobar+4-0.efi → foobar+3-1.efi → foobar+2-2.efi → foobar+1-3.efi → foobar+0-4.efi → STOP!
+ */
+
+ i = StrLen(file);
+
+ /* Chop off any suffix such as ".conf" or ".efi" */
+ if (suffix) {
+ UINTN suffix_length;
+
+ suffix_length = StrLen(suffix);
+ if (i < suffix_length)
+ return;
+
+ i -= suffix_length;
+ }
+
+ /* Go backwards through the string and parse everything we encounter */
+ for (;;) {
+ if (i == 0)
+ return;
+
+ i--;
+
+ switch (file[i]) {
+
+ case '+':
+ if (left == (UINTN) -1) /* didn't read at least one digit for 'left'? */
+ return;
+
+ if (done == (UINTN) -1) /* no 'done' counter? If so, it's equivalent to 0 */
+ done = 0;
+
+ goto good;
+
+ case '-':
+ if (left == (UINTN) -1) /* didn't parse any digit yet? */
+ return;
+
+ if (done != (UINTN) -1) /* already encountered a dash earlier? */
+ return;
+
+ /* So we encountered a dash. This means this counter is of the form +LEFT-DONE. Let's assign
+ * what we already parsed to 'done', and start fresh for the 'left' part. */
+
+ done = left;
+ left = (UINTN) -1;
+ factor = 1;
+ break;
+
+ case '0'...'9': {
+ UINTN new_factor;
+
+ if (left == (UINTN) -1)
+ left = file[i] - '0';
+ else {
+ UINTN new_left, digit;
+
+ digit = file[i] - '0';
+ if (digit > (UINTN) -1 / factor) /* overflow check */
+ return;
+
+ new_left = left + digit * factor;
+ if (new_left < left) /* overflow check */
+ return;
+
+ if (new_left == (UINTN) -1) /* don't allow us to be confused */
+ return;
+ }
+
+ new_factor = factor * 10;
+ if (new_factor < factor) /* overflow chck */
+ return;
+
+ factor = new_factor;
+ break;
+ }
+
+ default:
+ return;
+ }
+ }
+
+good:
+ entry->tries_left = left;
+ entry->tries_done = done;
+
+ entry->path = StrDuplicate(path);
+ entry->current_name = StrDuplicate(file);
+
+ next_left = left <= 0 ? 0 : left - 1;
+ next_done = done >= (UINTN) -2 ? (UINTN) -2 : done + 1;
+
+ prefix = StrDuplicate(file);
+ prefix[i] = 0;
+
+ entry->next_name = PoolPrint(L"%s+%u-%u%s", prefix, next_left, next_done, suffix ?: L"");
+}
+
+static VOID config_entry_bump_counters(
+ ConfigEntry *entry,
+ EFI_FILE_HANDLE root_dir) {
+
+ _cleanup_freepool_ CHAR16* old_path = NULL, *new_path = NULL;
+ _cleanup_(FileHandleClosep) EFI_FILE_HANDLE handle = NULL;
+ static EFI_GUID EfiFileInfoGuid = EFI_FILE_INFO_ID;
+ _cleanup_freepool_ EFI_FILE_INFO *file_info = NULL;
+ UINTN file_info_size, a, b;
+ EFI_STATUS r;
+
+ if (entry->tries_left == (UINTN) -1)
+ return;
+
+ if (!entry->path || !entry->current_name || !entry->next_name)
+ return;
+
+ old_path = PoolPrint(L"%s\\%s", entry->path, entry->current_name);
+
+ r = uefi_call_wrapper(root_dir->Open, 5, root_dir, &handle, old_path, EFI_FILE_MODE_READ|EFI_FILE_MODE_WRITE, 0ULL);
+ if (EFI_ERROR(r))
+ return;
+
+ a = StrLen(entry->current_name);
+ b = StrLen(entry->next_name);
+
+ file_info_size = OFFSETOF(EFI_FILE_INFO, FileName) + (a > b ? a : b) + 1;
+
+ for (;;) {
+ file_info = AllocatePool(file_info_size);
+
+ r = uefi_call_wrapper(handle->GetInfo, 4, handle, &EfiFileInfoGuid, &file_info_size, file_info);
+ if (!EFI_ERROR(r))
+ break;
+
+ if (r != EFI_BUFFER_TOO_SMALL || file_info_size * 2 < file_info_size) {
+ Print(L"\nFailed to get file info for '%s': %r\n", old_path, r);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return;
+ }
+
+ file_info_size *= 2;
+ FreePool(file_info);
+ }
+
+ /* And rename the file */
+ StrCpy(file_info->FileName, entry->next_name);
+ r = uefi_call_wrapper(handle->SetInfo, 4, handle, &EfiFileInfoGuid, file_info_size, file_info);
+ if (EFI_ERROR(r)) {
+ Print(L"\nFailed to rename '%s' to '%s', ignoring: %r\n", old_path, entry->next_name, r);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return;
+ }
+
+ /* Flush everything to disk, just in case… */
+ (void) uefi_call_wrapper(handle->Flush, 1, handle);
+
+ /* Let's tell the OS that we renamed this file, so that it knows what to rename to the counter-less name on
+ * success */
+ new_path = PoolPrint(L"%s\\%s", entry->path, entry->next_name);
+ efivar_set(L"LoaderBootCountPath", new_path, FALSE);
+
+ /* If the file we just renamed is the loader path, then let's update that. */
+ if (StrCmp(entry->loader, old_path) == 0) {
+ FreePool(entry->loader);
+ entry->loader = new_path;
+ new_path = NULL;
+ }
+}
+
+static VOID config_entry_add_from_file(
+ Config *config,
+ EFI_HANDLE *device,
+ CHAR16 *path,
+ CHAR16 *file,
+ CHAR8 *content,
+ CHAR16 *loaded_image_path) {
+
+ ConfigEntry *entry;
+ CHAR8 *line;
+ UINTN pos = 0;
+ CHAR8 *key, *value;
+ UINTN len;
+ _cleanup_freepool_ CHAR16 *initrd = NULL;
+
+ entry = AllocatePool(sizeof(ConfigEntry));
+
+ *entry = (ConfigEntry) {
+ .tries_done = (UINTN) -1,
+ .tries_left = (UINTN) -1,
+ };
+
+ while ((line = line_get_key_value(content, (CHAR8 *)" \t", &pos, &key, &value))) {
+ if (strcmpa((CHAR8 *)"title", key) == 0) {
+ FreePool(entry->title);
+ entry->title = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"version", key) == 0) {
+ FreePool(entry->version);
+ entry->version = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"machine-id", key) == 0) {
+ FreePool(entry->machine_id);
+ entry->machine_id = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"linux", key) == 0) {
+ FreePool(entry->loader);
+ entry->type = LOADER_LINUX;
+ entry->loader = stra_to_path(value);
+ entry->key = 'l';
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"efi", key) == 0) {
+ entry->type = LOADER_EFI;
+ FreePool(entry->loader);
+ entry->loader = stra_to_path(value);
+
+ /* do not add an entry for ourselves */
+ if (StriCmp(entry->loader, loaded_image_path) == 0) {
+ entry->type = LOADER_UNDEFINED;
+ break;
+ }
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"architecture", key) == 0) {
+ /* do not add an entry for an EFI image of architecture not matching with that of the image */
+ if (strcmpa((CHAR8 *)EFI_MACHINE_TYPE_NAME, value) != 0) {
+ entry->type = LOADER_UNDEFINED;
+ break;
+ }
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"initrd", key) == 0) {
+ _cleanup_freepool_ CHAR16 *new = NULL;
+
+ new = stra_to_path(value);
+ if (initrd) {
+ CHAR16 *s;
+
+ s = PoolPrint(L"%s initrd=%s", initrd, new);
+ FreePool(initrd);
+ initrd = s;
+ } else
+ initrd = PoolPrint(L"initrd=%s", new);
+
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"options", key) == 0) {
+ _cleanup_freepool_ CHAR16 *new = NULL;
+
+ new = stra_to_str(value);
+ if (entry->options) {
+ CHAR16 *s;
+
+ s = PoolPrint(L"%s %s", entry->options, new);
+ FreePool(entry->options);
+ entry->options = s;
+ } else {
+ entry->options = new;
+ new = NULL;
+ }
+
+ continue;
+ }
+ }
+
+ if (entry->type == LOADER_UNDEFINED) {
+ config_entry_free(entry);
+ return;
+ }
+
+ /* add initrd= to options */
+ if (entry->type == LOADER_LINUX && initrd) {
+ if (entry->options) {
+ CHAR16 *s;
+
+ s = PoolPrint(L"%s %s", initrd, entry->options);
+ FreePool(entry->options);
+ entry->options = s;
+ } else {
+ entry->options = initrd;
+ initrd = NULL;
+ }
+ }
+
+ entry->device = device;
+ entry->id = StrDuplicate(file);
+ len = StrLen(entry->id);
+ /* remove ".conf" */
+ if (len > 5)
+ entry->id[len - 5] = '\0';
+ StrLwr(entry->id);
+
+ config_add_entry(config, entry);
+
+ config_entry_parse_tries(entry, path, file, L".conf");
+}
+
+static VOID config_load_defaults(Config *config, EFI_FILE *root_dir) {
+ _cleanup_freepool_ CHAR8 *content = NULL;
+ UINTN sec;
+ EFI_STATUS err;
+
+ *config = (Config) {
+ .editor = TRUE,
+ .auto_entries = TRUE,
+ .auto_firmware = TRUE,
+ };
+
+ err = file_read(root_dir, L"\\loader\\loader.conf", 0, 0, &content, NULL);
+ if (!EFI_ERROR(err))
+ config_defaults_load_from_file(config, content);
+
+ err = efivar_get_int(L"LoaderConfigTimeout", &sec);
+ if (!EFI_ERROR(err)) {
+ config->timeout_sec_efivar = sec > INTN_MAX ? INTN_MAX : sec;
+ config->timeout_sec = sec;
+ } else
+ config->timeout_sec_efivar = -1;
+
+ err = efivar_get_int(L"LoaderConfigTimeoutOneShot", &sec);
+ if (!EFI_ERROR(err)) {
+ /* Unset variable now, after all it's "one shot". */
+ (void) efivar_set(L"LoaderConfigTimeoutOneShot", NULL, TRUE);
+
+ config->timeout_sec = sec;
+ config->force_menu = TRUE; /* force the menu when this is set */
+ }
+}
+
+static VOID config_load_entries(
+ Config *config,
+ EFI_HANDLE *device,
+ EFI_FILE *root_dir,
+ CHAR16 *loaded_image_path) {
+
+ EFI_FILE_HANDLE entries_dir;
+ EFI_STATUS err;
+
+ err = uefi_call_wrapper(root_dir->Open, 5, root_dir, &entries_dir, L"\\loader\\entries", EFI_FILE_MODE_READ, 0ULL);
+ if (!EFI_ERROR(err)) {
+ for (;;) {
+ CHAR16 buf[256];
+ UINTN bufsize;
+ EFI_FILE_INFO *f;
+ _cleanup_freepool_ CHAR8 *content = NULL;
+ UINTN len;
+
+ bufsize = sizeof(buf);
+ err = uefi_call_wrapper(entries_dir->Read, 3, entries_dir, &bufsize, buf);
+ if (bufsize == 0 || EFI_ERROR(err))
+ break;
+
+ f = (EFI_FILE_INFO *) buf;
+ if (f->FileName[0] == '.')
+ continue;
+ if (f->Attribute & EFI_FILE_DIRECTORY)
+ continue;
+
+ len = StrLen(f->FileName);
+ if (len < 6)
+ continue;
+ if (StriCmp(f->FileName + len - 5, L".conf") != 0)
+ continue;
+ if (StrnCmp(f->FileName, L"auto-", 5) == 0)
+ continue;
+
+ err = file_read(entries_dir, f->FileName, 0, 0, &content, NULL);
+ if (!EFI_ERROR(err))
+ config_entry_add_from_file(config, device, L"\\loader\\entries", f->FileName, content, loaded_image_path);
+ }
+ uefi_call_wrapper(entries_dir->Close, 1, entries_dir);
+ }
+}
+
+static INTN config_entry_compare(ConfigEntry *a, ConfigEntry *b) {
+ INTN r;
+
+ /* Order entries that have no tries left to the end of the list */
+ if (a->tries_left != 0 && b->tries_left == 0)
+ return -1;
+ if (a->tries_left == 0 && b->tries_left != 0)
+ return 1;
+
+ r = str_verscmp(a->id, b->id);
+ if (r != 0)
+ return r;
+
+ if (a->tries_left == (UINTN) -1 ||
+ b->tries_left == (UINTN) -1)
+ return 0;
+
+ /* If both items have boot counting, and otherwise are identical, put the entry with more tries left first */
+ if (a->tries_left > b->tries_left)
+ return -1;
+ if (a->tries_left < b->tries_left)
+ return 1;
+
+ /* If they have the same number of tries left, then let the one win which was tried fewer times so far */
+ if (a->tries_done < b->tries_done)
+ return -1;
+ if (a->tries_done > b->tries_done)
+ return 1;
+
+ return 0;
+}
+
+static VOID config_sort_entries(Config *config) {
+ UINTN i;
+
+ for (i = 1; i < config->entry_count; i++) {
+ BOOLEAN more;
+ UINTN k;
+
+ more = FALSE;
+ for (k = 0; k < config->entry_count - i; k++) {
+ ConfigEntry *entry;
+
+ if (config_entry_compare(config->entries[k], config->entries[k+1]) <= 0)
+ continue;
+
+ entry = config->entries[k];
+ config->entries[k] = config->entries[k+1];
+ config->entries[k+1] = entry;
+ more = TRUE;
+ }
+ if (!more)
+ break;
+ }
+}
+
+static INTN config_entry_find(Config *config, CHAR16 *id) {
+ UINTN i;
+
+ for (i = 0; i < config->entry_count; i++)
+ if (StrCmp(config->entries[i]->id, id) == 0)
+ return (INTN) i;
+
+ return -1;
+}
+
+static VOID config_default_entry_select(Config *config) {
+ _cleanup_freepool_ CHAR16 *entry_oneshot = NULL, *entry_default = NULL;
+ EFI_STATUS err;
+ INTN i;
+
+ /*
+ * The EFI variable to specify a boot entry for the next, and only the
+ * next reboot. The variable is always cleared directly after it is read.
+ */
+ err = efivar_get(L"LoaderEntryOneShot", &entry_oneshot);
+ if (!EFI_ERROR(err)) {
+
+ config->entry_oneshot = StrDuplicate(entry_oneshot);
+ efivar_set(L"LoaderEntryOneShot", NULL, TRUE);
+
+ i = config_entry_find(config, entry_oneshot);
+ if (i >= 0) {
+ config->idx_default = i;
+ return;
+ }
+ }
+
+ /*
+ * The EFI variable to select the default boot entry overrides the
+ * configured pattern. The variable can be set and cleared by pressing
+ * the 'd' key in the loader selection menu, the entry is marked with
+ * an '*'.
+ */
+ err = efivar_get(L"LoaderEntryDefault", &entry_default);
+ if (!EFI_ERROR(err)) {
+
+ i = config_entry_find(config, entry_default);
+ if (i >= 0) {
+ config->idx_default = i;
+ config->idx_default_efivar = i;
+ return;
+ }
+ }
+ config->idx_default_efivar = -1;
+
+ if (config->entry_count == 0)
+ return;
+
+ /*
+ * Match the pattern from the end of the list to the start, find last
+ * entry (largest number) matching the given pattern.
+ */
+ if (config->entry_default_pattern) {
+ i = config->entry_count;
+ while (i--) {
+ if (config->entries[i]->no_autoselect)
+ continue;
+ if (MetaiMatch(config->entries[i]->id, config->entry_default_pattern)) {
+ config->idx_default = i;
+ return;
+ }
+ }
+ }
+
+ /* select the last suitable entry */
+ i = config->entry_count;
+ while (i--) {
+ if (config->entries[i]->no_autoselect)
+ continue;
+ config->idx_default = i;
+ return;
+ }
+
+ /* no entry found */
+ config->idx_default = -1;
+}
+
+static BOOLEAN find_nonunique(ConfigEntry **entries, UINTN entry_count) {
+ BOOLEAN non_unique = FALSE;
+ UINTN i, k;
+
+ for (i = 0; i < entry_count; i++)
+ entries[i]->non_unique = FALSE;
+
+ for (i = 0; i < entry_count; i++)
+ for (k = 0; k < entry_count; k++) {
+ if (i == k)
+ continue;
+ if (StrCmp(entries[i]->title_show, entries[k]->title_show) != 0)
+ continue;
+
+ non_unique = entries[i]->non_unique = entries[k]->non_unique = TRUE;
+ }
+
+ return non_unique;
+}
+
+/* generate a unique title, avoiding non-distinguishable menu entries */
+static VOID config_title_generate(Config *config) {
+ UINTN i;
+
+ /* set title */
+ for (i = 0; i < config->entry_count; i++) {
+ CHAR16 *title;
+
+ FreePool(config->entries[i]->title_show);
+ title = config->entries[i]->title;
+ if (!title)
+ title = config->entries[i]->id;
+ config->entries[i]->title_show = StrDuplicate(title);
+ }
+
+ if (!find_nonunique(config->entries, config->entry_count))
+ return;
+
+ /* add version to non-unique titles */
+ for (i = 0; i < config->entry_count; i++) {
+ CHAR16 *s;
+
+ if (!config->entries[i]->non_unique)
+ continue;
+ if (!config->entries[i]->version)
+ continue;
+
+ s = PoolPrint(L"%s (%s)", config->entries[i]->title_show, config->entries[i]->version);
+ FreePool(config->entries[i]->title_show);
+ config->entries[i]->title_show = s;
+ }
+
+ if (!find_nonunique(config->entries, config->entry_count))
+ return;
+
+ /* add machine-id to non-unique titles */
+ for (i = 0; i < config->entry_count; i++) {
+ CHAR16 *s;
+ _cleanup_freepool_ CHAR16 *m = NULL;
+
+ if (!config->entries[i]->non_unique)
+ continue;
+ if (!config->entries[i]->machine_id)
+ continue;
+
+ m = StrDuplicate(config->entries[i]->machine_id);
+ m[8] = '\0';
+ s = PoolPrint(L"%s (%s)", config->entries[i]->title_show, m);
+ FreePool(config->entries[i]->title_show);
+ config->entries[i]->title_show = s;
+ }
+
+ if (!find_nonunique(config->entries, config->entry_count))
+ return;
+
+ /* add file name to non-unique titles */
+ for (i = 0; i < config->entry_count; i++) {
+ CHAR16 *s;
+
+ if (!config->entries[i]->non_unique)
+ continue;
+ s = PoolPrint(L"%s (%s)", config->entries[i]->title_show, config->entries[i]->id);
+ FreePool(config->entries[i]->title_show);
+ config->entries[i]->title_show = s;
+ config->entries[i]->non_unique = FALSE;
+ }
+}
+
+static BOOLEAN config_entry_add_call(
+ Config *config,
+ CHAR16 *id,
+ CHAR16 *title,
+ EFI_STATUS (*call)(VOID)) {
+
+ ConfigEntry *entry;
+
+ entry = AllocatePool(sizeof(ConfigEntry));
+ *entry = (ConfigEntry) {
+ .id = StrDuplicate(id),
+ .title = StrDuplicate(title),
+ .call = call,
+ .no_autoselect = TRUE,
+ .tries_done = (UINTN) -1,
+ .tries_left = (UINTN) -1,
+ };
+
+ config_add_entry(config, entry);
+ return TRUE;
+}
+
+static ConfigEntry *config_entry_add_loader(
+ Config *config,
+ EFI_HANDLE *device,
+ enum loader_type type,
+ CHAR16 *id,
+ CHAR16 key,
+ CHAR16 *title,
+ CHAR16 *loader) {
+
+ ConfigEntry *entry;
+
+ entry = AllocatePool(sizeof(ConfigEntry));
+ *entry = (ConfigEntry) {
+ .type = type,
+ .title = StrDuplicate(title),
+ .device = device,
+ .loader = StrDuplicate(loader),
+ .id = StrDuplicate(id),
+ .key = key,
+ .tries_done = (UINTN) -1,
+ .tries_left = (UINTN) -1,
+ };
+
+ StrLwr(entry->id);
+
+ config_add_entry(config, entry);
+ return entry;
+}
+
+static BOOLEAN config_entry_add_loader_auto(
+ Config *config,
+ EFI_HANDLE *device,
+ EFI_FILE *root_dir,
+ CHAR16 *loaded_image_path,
+ CHAR16 *id,
+ CHAR16 key,
+ CHAR16 *title,
+ CHAR16 *loader) {
+
+ EFI_FILE_HANDLE handle;
+ ConfigEntry *entry;
+ EFI_STATUS err;
+
+ if (!config->auto_entries)
+ return FALSE;
+
+ /* do not add an entry for ourselves */
+ if (loaded_image_path) {
+ UINTN len;
+ _cleanup_freepool_ CHAR8 *content = NULL;
+
+ if (StriCmp(loader, loaded_image_path) == 0)
+ return FALSE;
+
+ /* look for systemd-boot magic string */
+ err = file_read(root_dir, loader, 0, 100*1024, &content, &len);
+ if (!EFI_ERROR(err)) {
+ CHAR8 *start = content;
+ CHAR8 *last = content + len - sizeof(magic) - 1;
+
+ for (; start <= last; start++)
+ if (start[0] == magic[0] && CompareMem(start, magic, sizeof(magic) - 1) == 0)
+ return FALSE;
+ }
+ }
+
+ /* check existence */
+ err = uefi_call_wrapper(root_dir->Open, 5, root_dir, &handle, loader, EFI_FILE_MODE_READ, 0ULL);
+ if (EFI_ERROR(err))
+ return FALSE;
+ uefi_call_wrapper(handle->Close, 1, handle);
+
+ entry = config_entry_add_loader(config, device, LOADER_UNDEFINED, id, key, title, loader);
+ if (!entry)
+ return FALSE;
+
+ /* do not boot right away into auto-detected entries */
+ entry->no_autoselect = TRUE;
+
+ return TRUE;
+}
+
+static VOID config_entry_add_osx(Config *config) {
+ EFI_STATUS err;
+ UINTN handle_count = 0;
+ _cleanup_freepool_ EFI_HANDLE *handles = NULL;
+
+ if (!config->auto_entries)
+ return;
+
+ err = LibLocateHandle(ByProtocol, &FileSystemProtocol, NULL, &handle_count, &handles);
+ if (!EFI_ERROR(err)) {
+ UINTN i;
+
+ for (i = 0; i < handle_count; i++) {
+ EFI_FILE *root;
+ BOOLEAN found;
+
+ root = LibOpenRoot(handles[i]);
+ if (!root)
+ continue;
+ found = config_entry_add_loader_auto(config, handles[i], root, NULL, L"auto-osx", 'a', L"macOS",
+ L"\\System\\Library\\CoreServices\\boot.efi");
+ uefi_call_wrapper(root->Close, 1, root);
+ if (found)
+ break;
+ }
+ }
+}
+
+static VOID config_entry_add_linux(
+ Config *config,
+ EFI_LOADED_IMAGE *loaded_image,
+ EFI_FILE *root_dir) {
+
+ EFI_FILE_HANDLE linux_dir;
+ EFI_STATUS err;
+ ConfigEntry *entry;
+
+ err = uefi_call_wrapper(root_dir->Open, 5, root_dir, &linux_dir, L"\\EFI\\Linux", EFI_FILE_MODE_READ, 0ULL);
+ if (EFI_ERROR(err))
+ return;
+
+ for (;;) {
+ CHAR16 buf[256];
+ UINTN bufsize = sizeof buf;
+ EFI_FILE_INFO *f;
+ CHAR8 *sections[] = {
+ (UINT8 *)".osrel",
+ (UINT8 *)".cmdline",
+ NULL
+ };
+ UINTN offs[ELEMENTSOF(sections)-1] = {};
+ UINTN szs[ELEMENTSOF(sections)-1] = {};
+ UINTN addrs[ELEMENTSOF(sections)-1] = {};
+ CHAR8 *content = NULL;
+ UINTN len;
+ CHAR8 *line;
+ UINTN pos = 0;
+ CHAR8 *key, *value;
+ CHAR16 *os_name = NULL;
+ CHAR16 *os_id = NULL;
+ CHAR16 *os_version = NULL;
+ CHAR16 *os_build = NULL;
+
+ err = uefi_call_wrapper(linux_dir->Read, 3, linux_dir, &bufsize, buf);
+ if (bufsize == 0 || EFI_ERROR(err))
+ break;
+
+ f = (EFI_FILE_INFO *) buf;
+ if (f->FileName[0] == '.')
+ continue;
+ if (f->Attribute & EFI_FILE_DIRECTORY)
+ continue;
+ len = StrLen(f->FileName);
+ if (len < 5)
+ continue;
+ if (StriCmp(f->FileName + len - 4, L".efi") != 0)
+ continue;
+
+ /* look for .osrel and .cmdline sections in the .efi binary */
+ err = pe_file_locate_sections(linux_dir, f->FileName, sections, addrs, offs, szs);
+ if (EFI_ERROR(err))
+ continue;
+
+ err = file_read(linux_dir, f->FileName, offs[0], szs[0], &content, NULL);
+ if (EFI_ERROR(err))
+ continue;
+
+ /* read properties from the embedded os-release file */
+ while ((line = line_get_key_value(content, (CHAR8 *)"=", &pos, &key, &value))) {
+ if (strcmpa((CHAR8 *)"PRETTY_NAME", key) == 0) {
+ FreePool(os_name);
+ os_name = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"ID", key) == 0) {
+ FreePool(os_id);
+ os_id = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"VERSION_ID", key) == 0) {
+ FreePool(os_version);
+ os_version = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"BUILD_ID", key) == 0) {
+ FreePool(os_build);
+ os_build = stra_to_str(value);
+ continue;
+ }
+ }
+
+ if (os_name && os_id && (os_version || os_build)) {
+ _cleanup_freepool_ CHAR16 *conf = NULL, *path = NULL;
+
+ conf = PoolPrint(L"%s-%s", os_id, os_version ? : os_build);
+ path = PoolPrint(L"\\EFI\\Linux\\%s", f->FileName);
+
+ entry = config_entry_add_loader(config, loaded_image->DeviceHandle, LOADER_LINUX, conf, 'l', os_name, path);
+
+ FreePool(content);
+ content = NULL;
+
+ /* read the embedded cmdline file */
+ err = file_read(linux_dir, f->FileName, offs[1], szs[1], &content, NULL);
+ if (!EFI_ERROR(err)) {
+
+ /* chomp the newline */
+ if (content[szs[1]-1] == '\n')
+ content[szs[1]-1] = '\0';
+
+ entry->options = stra_to_str(content);
+ }
+
+ config_entry_parse_tries(entry, L"\\EFI\\Linux", f->FileName, L".efi");
+ }
+
+ FreePool(os_name);
+ FreePool(os_id);
+ FreePool(os_version);
+ FreePool(os_build);
+ FreePool(content);
+ }
+
+ uefi_call_wrapper(linux_dir->Close, 1, linux_dir);
+}
+
+static EFI_STATUS image_start(
+ EFI_HANDLE parent_image,
+ const Config *config,
+ const ConfigEntry *entry) {
+
+ EFI_HANDLE image;
+ _cleanup_freepool_ EFI_DEVICE_PATH *path = NULL;
+ CHAR16 *options;
+ EFI_STATUS err;
+
+ path = FileDevicePath(entry->device, entry->loader);
+ if (!path) {
+ Print(L"Error getting device path.");
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return EFI_INVALID_PARAMETER;
+ }
+
+ err = uefi_call_wrapper(BS->LoadImage, 6, FALSE, parent_image, path, NULL, 0, &image);
+ if (EFI_ERROR(err)) {
+ Print(L"Error loading %s: %r", entry->loader, err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+ }
+
+ if (config->options_edit)
+ options = config->options_edit;
+ else if (entry->options)
+ options = entry->options;
+ else
+ options = NULL;
+ if (options) {
+ EFI_LOADED_IMAGE *loaded_image;
+
+ err = uefi_call_wrapper(BS->OpenProtocol, 6, image, &LoadedImageProtocol, (VOID **)&loaded_image,
+ parent_image, NULL, EFI_OPEN_PROTOCOL_GET_PROTOCOL);
+ if (EFI_ERROR(err)) {
+ Print(L"Error getting LoadedImageProtocol handle: %r", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ goto out_unload;
+ }
+ loaded_image->LoadOptions = options;
+ loaded_image->LoadOptionsSize = (StrLen(loaded_image->LoadOptions)+1) * sizeof(CHAR16);
+
+#if ENABLE_TPM
+ /* Try to log any options to the TPM, especially to catch manually edited options */
+ err = tpm_log_event(SD_TPM_PCR,
+ (EFI_PHYSICAL_ADDRESS) (UINTN) loaded_image->LoadOptions,
+ loaded_image->LoadOptionsSize, loaded_image->LoadOptions);
+ if (EFI_ERROR(err)) {
+ Print(L"Unable to add image options measurement: %r", err);
+ uefi_call_wrapper(BS->Stall, 1, 200 * 1000);
+ }
+#endif
+ }
+
+ efivar_set_time_usec(L"LoaderTimeExecUSec", 0);
+ err = uefi_call_wrapper(BS->StartImage, 3, image, NULL, NULL);
+out_unload:
+ uefi_call_wrapper(BS->UnloadImage, 1, image);
+ return err;
+}
+
+static EFI_STATUS reboot_into_firmware(VOID) {
+ _cleanup_freepool_ CHAR8 *b = NULL;
+ UINTN size;
+ UINT64 osind;
+ EFI_STATUS err;
+
+ osind = EFI_OS_INDICATIONS_BOOT_TO_FW_UI;
+
+ err = efivar_get_raw(&global_guid, L"OsIndications", &b, &size);
+ if (!EFI_ERROR(err))
+ osind |= (UINT64)*b;
+
+ err = efivar_set_raw(&global_guid, L"OsIndications", &osind, sizeof(UINT64), TRUE);
+ if (EFI_ERROR(err))
+ return err;
+
+ err = uefi_call_wrapper(RT->ResetSystem, 4, EfiResetCold, EFI_SUCCESS, 0, NULL);
+ Print(L"Error calling ResetSystem: %r", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+}
+
+static VOID config_free(Config *config) {
+ UINTN i;
+
+ for (i = 0; i < config->entry_count; i++)
+ config_entry_free(config->entries[i]);
+ FreePool(config->entries);
+ FreePool(config->entry_default_pattern);
+ FreePool(config->options_edit);
+ FreePool(config->entry_oneshot);
+}
+
+static VOID config_write_entries_to_variable(Config *config) {
+ _cleanup_freepool_ CHAR16 *buffer = NULL;
+ UINTN i, sz = 0;
+ CHAR16 *p;
+
+ for (i = 0; i < config->entry_count; i++)
+ sz += StrLen(config->entries[i]->id) + 1;
+
+ p = buffer = AllocatePool(sz * sizeof(CHAR16));
+
+ for (i = 0; i < config->entry_count; i++) {
+ UINTN l;
+
+ l = StrLen(config->entries[i]->id) + 1;
+ CopyMem(p, config->entries[i]->id, l * sizeof(CHAR16));
+
+ p += l;
+ }
+
+ /* Store the full list of discovered entries. */
+ (void) efivar_set_raw(&loader_guid, L"LoaderEntries", buffer, (UINT8*) p - (UINT8*) buffer, FALSE);
+}
+
+EFI_STATUS efi_main(EFI_HANDLE image, EFI_SYSTEM_TABLE *sys_table) {
+ static const UINT64 loader_features =
+ (1ULL << 0) | /* I honour the LoaderConfigTimeout variable */
+ (1ULL << 1) | /* I honour the LoaderConfigTimeoutOneShot variable */
+ (1ULL << 2) | /* I honour the LoaderEntryDefault variable */
+ (1ULL << 3) | /* I honour the LoaderEntryOneShot variable */
+ (1ULL << 4) | /* I support boot counting */
+ 0;
+
+ _cleanup_freepool_ CHAR16 *infostr = NULL, *typestr = NULL;
+ CHAR8 *b;
+ UINTN size;
+ EFI_LOADED_IMAGE *loaded_image;
+ EFI_FILE *root_dir;
+ CHAR16 *loaded_image_path;
+ EFI_STATUS err;
+ Config config;
+ UINT64 init_usec;
+ BOOLEAN menu = FALSE;
+ CHAR16 uuid[37];
+
+ InitializeLib(image, sys_table);
+ init_usec = time_usec();
+ efivar_set_time_usec(L"LoaderTimeInitUSec", init_usec);
+ efivar_set(L"LoaderInfo", L"systemd-boot " GIT_VERSION, FALSE);
+
+ infostr = PoolPrint(L"%s %d.%02d", ST->FirmwareVendor, ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff);
+ efivar_set(L"LoaderFirmwareInfo", infostr, FALSE);
+
+ typestr = PoolPrint(L"UEFI %d.%02d", ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff);
+ efivar_set(L"LoaderFirmwareType", typestr, FALSE);
+
+ (void) efivar_set_raw(&loader_guid, L"LoaderFeatures", &loader_features, sizeof(loader_features), FALSE);
+
+ err = uefi_call_wrapper(BS->OpenProtocol, 6, image, &LoadedImageProtocol, (VOID **)&loaded_image,
+ image, NULL, EFI_OPEN_PROTOCOL_GET_PROTOCOL);
+ if (EFI_ERROR(err)) {
+ Print(L"Error getting a LoadedImageProtocol handle: %r ", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+ }
+
+ /* export the device path this image is started from */
+ if (disk_get_part_uuid(loaded_image->DeviceHandle, uuid) == EFI_SUCCESS)
+ efivar_set(L"LoaderDevicePartUUID", uuid, FALSE);
+
+ root_dir = LibOpenRoot(loaded_image->DeviceHandle);
+ if (!root_dir) {
+ Print(L"Unable to open root directory: %r ", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return EFI_LOAD_ERROR;
+ }
+
+ if (secure_boot_enabled() && shim_loaded()) {
+ err = security_policy_install();
+ if (EFI_ERROR(err)) {
+ Print(L"Error installing security policy: %r ", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+ }
+ }
+
+ /* the filesystem path to this image, to prevent adding ourselves to the menu */
+ loaded_image_path = DevicePathToStr(loaded_image->FilePath);
+ efivar_set(L"LoaderImageIdentifier", loaded_image_path, FALSE);
+
+ config_load_defaults(&config, root_dir);
+
+ /* scan /EFI/Linux/ directory */
+ config_entry_add_linux(&config, loaded_image, root_dir);
+
+ /* scan /loader/entries/\*.conf files */
+ config_load_entries(&config, loaded_image->DeviceHandle, root_dir, loaded_image_path);
+
+ /* sort entries after version number */
+ config_sort_entries(&config);
+
+ /* if we find some well-known loaders, add them to the end of the list */
+ config_entry_add_loader_auto(&config, loaded_image->DeviceHandle, root_dir, NULL,
+ L"auto-windows", 'w', L"Windows Boot Manager", L"\\EFI\\Microsoft\\Boot\\bootmgfw.efi");
+ config_entry_add_loader_auto(&config, loaded_image->DeviceHandle, root_dir, NULL,
+ L"auto-efi-shell", 's', L"EFI Shell", L"\\shell" EFI_MACHINE_TYPE_NAME ".efi");
+ config_entry_add_loader_auto(&config, loaded_image->DeviceHandle, root_dir, loaded_image_path,
+ L"auto-efi-default", '\0', L"EFI Default Loader", L"\\EFI\\Boot\\boot" EFI_MACHINE_TYPE_NAME ".efi");
+ config_entry_add_osx(&config);
+
+ if (config.auto_firmware && efivar_get_raw(&global_guid, L"OsIndicationsSupported", &b, &size) == EFI_SUCCESS) {
+ UINT64 osind = (UINT64)*b;
+
+ if (osind & EFI_OS_INDICATIONS_BOOT_TO_FW_UI)
+ config_entry_add_call(&config, L"auto-reboot-to-firmware-setup", L"Reboot Into Firmware Interface", reboot_into_firmware);
+ FreePool(b);
+ }
+
+ if (config.entry_count == 0) {
+ Print(L"No loader found. Configuration files in \\loader\\entries\\*.conf are needed.");
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ goto out;
+ }
+
+ config_write_entries_to_variable(&config);
+
+ config_title_generate(&config);
+
+ /* select entry by configured pattern or EFI LoaderDefaultEntry= variable */
+ config_default_entry_select(&config);
+
+ /* if no configured entry to select from was found, enable the menu */
+ if (config.idx_default == -1) {
+ config.idx_default = 0;
+ if (config.timeout_sec == 0)
+ config.timeout_sec = 10;
+ }
+
+ /* select entry or show menu when key is pressed or timeout is set */
+ if (config.force_menu || config.timeout_sec > 0)
+ menu = TRUE;
+ else {
+ UINT64 key;
+
+ err = console_key_read(&key, FALSE);
+ if (!EFI_ERROR(err)) {
+ INT16 idx;
+
+ /* find matching key in config entries */
+ idx = entry_lookup_key(&config, config.idx_default, KEYCHAR(key));
+ if (idx >= 0)
+ config.idx_default = idx;
+ else
+ menu = TRUE;
+ }
+ }
+
+ for (;;) {
+ ConfigEntry *entry;
+
+ entry = config.entries[config.idx_default];
+ if (menu) {
+ efivar_set_time_usec(L"LoaderTimeMenuUSec", 0);
+ uefi_call_wrapper(BS->SetWatchdogTimer, 4, 0, 0x10000, 0, NULL);
+ if (!menu_run(&config, &entry, loaded_image_path))
+ break;
+ }
+
+ /* run special entry like "reboot" */
+ if (entry->call) {
+ entry->call();
+ continue;
+ }
+
+ config_entry_bump_counters(entry, root_dir);
+
+ /* export the selected boot entry to the system */
+ efivar_set(L"LoaderEntrySelected", entry->id, FALSE);
+
+ uefi_call_wrapper(BS->SetWatchdogTimer, 4, 5 * 60, 0x10000, 0, NULL);
+ err = image_start(image, &config, entry);
+ if (EFI_ERROR(err)) {
+ graphics_mode(FALSE);
+ Print(L"\nFailed to execute %s (%s): %r\n", entry->title, entry->loader, err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ goto out;
+ }
+
+ menu = TRUE;
+ config.timeout_sec = 0;
+ }
+ err = EFI_SUCCESS;
+out:
+ FreePool(loaded_image_path);
+ config_free(&config);
+ uefi_call_wrapper(root_dir->Close, 1, root_dir);
+ uefi_call_wrapper(BS->CloseProtocol, 4, image, &LoadedImageProtocol, image, NULL);
+ return err;
+}
diff --git a/src/boot/efi/console.c b/src/boot/efi/console.c
new file mode 100644
index 0000000..4d72bad
--- /dev/null
+++ b/src/boot/efi/console.c
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "console.h"
+#include "util.h"
+
+#define SYSTEM_FONT_WIDTH 8
+#define SYSTEM_FONT_HEIGHT 19
+
+#define EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL_GUID \
+ { 0xdd9e7534, 0x7762, 0x4698, { 0x8c, 0x14, 0xf5, 0x85, 0x17, 0xa6, 0x25, 0xaa } }
+
+struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL;
+
+typedef EFI_STATUS (EFIAPI *EFI_INPUT_RESET_EX)(
+ struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL *This,
+ BOOLEAN ExtendedVerification
+);
+
+typedef UINT8 EFI_KEY_TOGGLE_STATE;
+
+typedef struct {
+ UINT32 KeyShiftState;
+ EFI_KEY_TOGGLE_STATE KeyToggleState;
+} EFI_KEY_STATE;
+
+typedef struct {
+ EFI_INPUT_KEY Key;
+ EFI_KEY_STATE KeyState;
+} EFI_KEY_DATA;
+
+typedef EFI_STATUS (EFIAPI *EFI_INPUT_READ_KEY_EX)(
+ struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL *This,
+ EFI_KEY_DATA *KeyData
+);
+
+typedef EFI_STATUS (EFIAPI *EFI_SET_STATE)(
+ struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL *This,
+ EFI_KEY_TOGGLE_STATE *KeyToggleState
+);
+
+typedef EFI_STATUS (EFIAPI *EFI_KEY_NOTIFY_FUNCTION)(
+ EFI_KEY_DATA *KeyData
+);
+
+typedef EFI_STATUS (EFIAPI *EFI_REGISTER_KEYSTROKE_NOTIFY)(
+ struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL *This,
+ EFI_KEY_DATA KeyData,
+ EFI_KEY_NOTIFY_FUNCTION KeyNotificationFunction,
+ VOID **NotifyHandle
+);
+
+typedef EFI_STATUS (EFIAPI *EFI_UNREGISTER_KEYSTROKE_NOTIFY)(
+ struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL *This,
+ VOID *NotificationHandle
+);
+
+typedef struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL {
+ EFI_INPUT_RESET_EX Reset;
+ EFI_INPUT_READ_KEY_EX ReadKeyStrokeEx;
+ EFI_EVENT WaitForKeyEx;
+ EFI_SET_STATE SetState;
+ EFI_REGISTER_KEYSTROKE_NOTIFY RegisterKeyNotify;
+ EFI_UNREGISTER_KEYSTROKE_NOTIFY UnregisterKeyNotify;
+} EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL;
+
+EFI_STATUS console_key_read(UINT64 *key, BOOLEAN wait) {
+ EFI_GUID EfiSimpleTextInputExProtocolGuid = EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL_GUID;
+ static EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL *TextInputEx;
+ static BOOLEAN checked;
+ UINTN index;
+ EFI_INPUT_KEY k;
+ EFI_STATUS err;
+
+ if (!checked) {
+ err = LibLocateProtocol(&EfiSimpleTextInputExProtocolGuid, (VOID **)&TextInputEx);
+ if (EFI_ERROR(err))
+ TextInputEx = NULL;
+
+ checked = TRUE;
+ }
+
+ /* wait until key is pressed */
+ if (wait)
+ uefi_call_wrapper(BS->WaitForEvent, 3, 1, &ST->ConIn->WaitForKey, &index);
+
+ if (TextInputEx) {
+ EFI_KEY_DATA keydata;
+ UINT64 keypress;
+
+ err = uefi_call_wrapper(TextInputEx->ReadKeyStrokeEx, 2, TextInputEx, &keydata);
+ if (!EFI_ERROR(err)) {
+ UINT32 shift = 0;
+
+ /* do not distinguish between left and right keys */
+ if (keydata.KeyState.KeyShiftState & EFI_SHIFT_STATE_VALID) {
+ if (keydata.KeyState.KeyShiftState & (EFI_RIGHT_CONTROL_PRESSED|EFI_LEFT_CONTROL_PRESSED))
+ shift |= EFI_CONTROL_PRESSED;
+ if (keydata.KeyState.KeyShiftState & (EFI_RIGHT_ALT_PRESSED|EFI_LEFT_ALT_PRESSED))
+ shift |= EFI_ALT_PRESSED;
+ };
+
+ /* 32 bit modifier keys + 16 bit scan code + 16 bit unicode */
+ keypress = KEYPRESS(shift, keydata.Key.ScanCode, keydata.Key.UnicodeChar);
+ if (keypress > 0) {
+ *key = keypress;
+ return 0;
+ }
+ }
+ }
+
+ /* fallback for firmware which does not support SimpleTextInputExProtocol
+ *
+ * This is also called in case ReadKeyStrokeEx did not return a key, because
+ * some broken firmwares offer SimpleTextInputExProtocol, but never acually
+ * handle any key. */
+ err = uefi_call_wrapper(ST->ConIn->ReadKeyStroke, 2, ST->ConIn, &k);
+ if (EFI_ERROR(err))
+ return err;
+
+ *key = KEYPRESS(0, k.ScanCode, k.UnicodeChar);
+ return 0;
+}
+
+static EFI_STATUS change_mode(UINTN mode) {
+ EFI_STATUS err;
+
+ err = uefi_call_wrapper(ST->ConOut->SetMode, 2, ST->ConOut, mode);
+
+ /* Special case mode 1: when using OVMF and qemu, setting it returns error
+ * and breaks console output. */
+ if (EFI_ERROR(err) && mode == 1)
+ uefi_call_wrapper(ST->ConOut->SetMode, 2, ST->ConOut, (UINTN)0);
+
+ return err;
+}
+
+static UINT64 text_area_from_font_size(void) {
+ EFI_STATUS err;
+ UINT64 text_area;
+ UINTN rows, columns;
+
+ err = uefi_call_wrapper(ST->ConOut->QueryMode, 4, ST->ConOut, ST->ConOut->Mode->Mode, &columns, &rows);
+ if (EFI_ERROR(err)) {
+ columns = 80;
+ rows = 25;
+ }
+
+ text_area = SYSTEM_FONT_WIDTH * SYSTEM_FONT_HEIGHT * (UINT64)rows * (UINT64)columns;
+
+ return text_area;
+}
+
+static EFI_STATUS mode_auto(UINTN *mode) {
+ const UINT32 HORIZONTAL_MAX_OK = 1920;
+ const UINT32 VERTICAL_MAX_OK = 1080;
+ const UINT64 VIEWPORT_RATIO = 10;
+ UINT64 screen_area, text_area;
+ EFI_GUID GraphicsOutputProtocolGuid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
+ EFI_GRAPHICS_OUTPUT_PROTOCOL *GraphicsOutput;
+ EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *Info;
+ EFI_STATUS err;
+ BOOLEAN keep = FALSE;
+
+ err = LibLocateProtocol(&GraphicsOutputProtocolGuid, (VOID **)&GraphicsOutput);
+ if (!EFI_ERROR(err) && GraphicsOutput->Mode && GraphicsOutput->Mode->Info) {
+ Info = GraphicsOutput->Mode->Info;
+
+ /* Start verifying if we are in a resolution larger than Full HD
+ * (1920x1080). If we're not, assume we're in a good mode and do not
+ * try to change it. */
+ if (Info->HorizontalResolution <= HORIZONTAL_MAX_OK && Info->VerticalResolution <= VERTICAL_MAX_OK)
+ keep = TRUE;
+ /* For larger resolutions, calculate the ratio of the total screen
+ * area to the text viewport area. If it's less than 10 times bigger,
+ * then assume the text is readable and keep the text mode. */
+ else {
+ screen_area = (UINT64)Info->HorizontalResolution * (UINT64)Info->VerticalResolution;
+ text_area = text_area_from_font_size();
+
+ if (text_area != 0 && screen_area/text_area < VIEWPORT_RATIO)
+ keep = TRUE;
+ }
+ }
+
+ if (keep) {
+ /* Just clear the screen instead of changing the mode and return. */
+ *mode = ST->ConOut->Mode->Mode;
+ uefi_call_wrapper(ST->ConOut->ClearScreen, 1, ST->ConOut);
+ return EFI_SUCCESS;
+ }
+
+ /* If we reached here, then we have a high resolution screen and the text
+ * viewport is less than 10% the screen area, so the firmware developer
+ * screwed up. Try to switch to a better mode. Mode number 2 is first non
+ * standard mode, which is provided by the device manufacturer, so it should
+ * be a good mode.
+ * Note: MaxMode is the number of modes, not the last mode. */
+ if (ST->ConOut->Mode->MaxMode > 2)
+ *mode = 2;
+ /* Try again with mode different than zero (assume user requests
+ * auto mode due to some problem with mode zero). */
+ else if (ST->ConOut->Mode->MaxMode == 2)
+ *mode = 1;
+ /* Else force mode change to zero. */
+ else
+ *mode = 0;
+
+ return change_mode(*mode);
+}
+
+EFI_STATUS console_set_mode(UINTN *mode, enum console_mode_change_type how) {
+ if (how == CONSOLE_MODE_AUTO)
+ return mode_auto(mode);
+
+ if (how == CONSOLE_MODE_MAX) {
+ /* Note: MaxMode is the number of modes, not the last mode. */
+ if (ST->ConOut->Mode->MaxMode > 0)
+ *mode = ST->ConOut->Mode->MaxMode-1;
+ else
+ *mode = 0;
+ }
+
+ return change_mode(*mode);
+}
diff --git a/src/boot/efi/console.h b/src/boot/efi/console.h
new file mode 100644
index 0000000..b9ed6c7
--- /dev/null
+++ b/src/boot/efi/console.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#define EFI_SHIFT_STATE_VALID 0x80000000
+#define EFI_RIGHT_CONTROL_PRESSED 0x00000004
+#define EFI_LEFT_CONTROL_PRESSED 0x00000008
+#define EFI_RIGHT_ALT_PRESSED 0x00000010
+#define EFI_LEFT_ALT_PRESSED 0x00000020
+
+#define EFI_CONTROL_PRESSED (EFI_RIGHT_CONTROL_PRESSED|EFI_LEFT_CONTROL_PRESSED)
+#define EFI_ALT_PRESSED (EFI_RIGHT_ALT_PRESSED|EFI_LEFT_ALT_PRESSED)
+#define KEYPRESS(keys, scan, uni) ((((UINT64)keys) << 32) | (((UINT64)scan) << 16) | (uni))
+#define KEYCHAR(k) ((k) & 0xffff)
+#define CHAR_CTRL(c) ((c) - 'a' + 1)
+
+enum console_mode_change_type {
+ CONSOLE_MODE_KEEP = 0,
+ CONSOLE_MODE_SET,
+ CONSOLE_MODE_AUTO,
+ CONSOLE_MODE_MAX,
+};
+
+EFI_STATUS console_key_read(UINT64 *key, BOOLEAN wait);
+EFI_STATUS console_set_mode(UINTN *mode, enum console_mode_change_type how);
diff --git a/src/boot/efi/disk.c b/src/boot/efi/disk.c
new file mode 100644
index 0000000..49ee81b
--- /dev/null
+++ b/src/boot/efi/disk.c
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "util.h"
+
+EFI_STATUS disk_get_part_uuid(EFI_HANDLE *handle, CHAR16 uuid[static 37]) {
+ EFI_DEVICE_PATH *device_path;
+
+ /* export the device path this image is started from */
+ device_path = DevicePathFromHandle(handle);
+ if (device_path) {
+ _cleanup_freepool_ EFI_DEVICE_PATH *paths = NULL;
+ EFI_DEVICE_PATH *path;
+
+ paths = UnpackDevicePath(device_path);
+ for (path = paths; !IsDevicePathEnd(path); path = NextDevicePathNode(path)) {
+ HARDDRIVE_DEVICE_PATH *drive;
+
+ if (DevicePathType(path) != MEDIA_DEVICE_PATH)
+ continue;
+ if (DevicePathSubType(path) != MEDIA_HARDDRIVE_DP)
+ continue;
+ drive = (HARDDRIVE_DEVICE_PATH *)path;
+ if (drive->SignatureType != SIGNATURE_TYPE_GUID)
+ continue;
+
+ GuidToString(uuid, (EFI_GUID *)&drive->Signature);
+ return EFI_SUCCESS;
+ }
+ }
+
+ return EFI_NOT_FOUND;
+}
diff --git a/src/boot/efi/disk.h b/src/boot/efi/disk.h
new file mode 100644
index 0000000..41c4cce
--- /dev/null
+++ b/src/boot/efi/disk.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+EFI_STATUS disk_get_part_uuid(EFI_HANDLE *handle, CHAR16 uuid[static 37]);
diff --git a/src/boot/efi/graphics.c b/src/boot/efi/graphics.c
new file mode 100644
index 0000000..9b5003a
--- /dev/null
+++ b/src/boot/efi/graphics.c
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * Copyright © 2013 Intel Corporation
+ * Authored by Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+ */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "graphics.h"
+#include "util.h"
+
+EFI_STATUS graphics_mode(BOOLEAN on) {
+ #define EFI_CONSOLE_CONTROL_PROTOCOL_GUID \
+ { 0xf42f7782, 0x12e, 0x4c12, { 0x99, 0x56, 0x49, 0xf9, 0x43, 0x4, 0xf7, 0x21 } };
+
+ struct _EFI_CONSOLE_CONTROL_PROTOCOL;
+
+ typedef enum {
+ EfiConsoleControlScreenText,
+ EfiConsoleControlScreenGraphics,
+ EfiConsoleControlScreenMaxValue,
+ } EFI_CONSOLE_CONTROL_SCREEN_MODE;
+
+ typedef EFI_STATUS (EFIAPI *EFI_CONSOLE_CONTROL_PROTOCOL_GET_MODE)(
+ struct _EFI_CONSOLE_CONTROL_PROTOCOL *This,
+ EFI_CONSOLE_CONTROL_SCREEN_MODE *Mode,
+ BOOLEAN *UgaExists,
+ BOOLEAN *StdInLocked
+ );
+
+ typedef EFI_STATUS (EFIAPI *EFI_CONSOLE_CONTROL_PROTOCOL_SET_MODE)(
+ struct _EFI_CONSOLE_CONTROL_PROTOCOL *This,
+ EFI_CONSOLE_CONTROL_SCREEN_MODE Mode
+ );
+
+ typedef EFI_STATUS (EFIAPI *EFI_CONSOLE_CONTROL_PROTOCOL_LOCK_STD_IN)(
+ struct _EFI_CONSOLE_CONTROL_PROTOCOL *This,
+ CHAR16 *Password
+ );
+
+ typedef struct _EFI_CONSOLE_CONTROL_PROTOCOL {
+ EFI_CONSOLE_CONTROL_PROTOCOL_GET_MODE GetMode;
+ EFI_CONSOLE_CONTROL_PROTOCOL_SET_MODE SetMode;
+ EFI_CONSOLE_CONTROL_PROTOCOL_LOCK_STD_IN LockStdIn;
+ } EFI_CONSOLE_CONTROL_PROTOCOL;
+
+ EFI_GUID ConsoleControlProtocolGuid = EFI_CONSOLE_CONTROL_PROTOCOL_GUID;
+ EFI_CONSOLE_CONTROL_PROTOCOL *ConsoleControl = NULL;
+ EFI_CONSOLE_CONTROL_SCREEN_MODE new;
+ EFI_CONSOLE_CONTROL_SCREEN_MODE current;
+ BOOLEAN uga_exists;
+ BOOLEAN stdin_locked;
+ EFI_STATUS err;
+
+ err = LibLocateProtocol(&ConsoleControlProtocolGuid, (VOID **)&ConsoleControl);
+ if (EFI_ERROR(err))
+ /* console control protocol is nonstandard and might not exist. */
+ return err == EFI_NOT_FOUND ? EFI_SUCCESS : err;
+
+ /* check current mode */
+ err = uefi_call_wrapper(ConsoleControl->GetMode, 4, ConsoleControl, &current, &uga_exists, &stdin_locked);
+ if (EFI_ERROR(err))
+ return err;
+
+ /* do not touch the mode */
+ new = on ? EfiConsoleControlScreenGraphics : EfiConsoleControlScreenText;
+ if (new == current)
+ return EFI_SUCCESS;
+
+ err = uefi_call_wrapper(ConsoleControl->SetMode, 2, ConsoleControl, new);
+
+ /* some firmware enables the cursor when switching modes */
+ uefi_call_wrapper(ST->ConOut->EnableCursor, 2, ST->ConOut, FALSE);
+
+ return err;
+}
diff --git a/src/boot/efi/graphics.h b/src/boot/efi/graphics.h
new file mode 100644
index 0000000..809e878
--- /dev/null
+++ b/src/boot/efi/graphics.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * Copyright © 2013 Intel Corporation
+ * Authored by Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+ */
+#pragma once
+
+EFI_STATUS graphics_mode(BOOLEAN on);
diff --git a/src/boot/efi/linux.c b/src/boot/efi/linux.c
new file mode 100644
index 0000000..5b4c085
--- /dev/null
+++ b/src/boot/efi/linux.c
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "linux.h"
+#include "util.h"
+
+#define SETUP_MAGIC 0x53726448 /* "HdrS" */
+struct SetupHeader {
+ UINT8 boot_sector[0x01f1];
+ UINT8 setup_secs;
+ UINT16 root_flags;
+ UINT32 sys_size;
+ UINT16 ram_size;
+ UINT16 video_mode;
+ UINT16 root_dev;
+ UINT16 signature;
+ UINT16 jump;
+ UINT32 header;
+ UINT16 version;
+ UINT16 su_switch;
+ UINT16 setup_seg;
+ UINT16 start_sys;
+ UINT16 kernel_ver;
+ UINT8 loader_id;
+ UINT8 load_flags;
+ UINT16 movesize;
+ UINT32 code32_start;
+ UINT32 ramdisk_start;
+ UINT32 ramdisk_len;
+ UINT32 bootsect_kludge;
+ UINT16 heap_end;
+ UINT8 ext_loader_ver;
+ UINT8 ext_loader_type;
+ UINT32 cmd_line_ptr;
+ UINT32 ramdisk_max;
+ UINT32 kernel_alignment;
+ UINT8 relocatable_kernel;
+ UINT8 min_alignment;
+ UINT16 xloadflags;
+ UINT32 cmdline_size;
+ UINT32 hardware_subarch;
+ UINT64 hardware_subarch_data;
+ UINT32 payload_offset;
+ UINT32 payload_length;
+ UINT64 setup_data;
+ UINT64 pref_address;
+ UINT32 init_size;
+ UINT32 handover_offset;
+} __attribute__((packed));
+
+#ifdef __x86_64__
+typedef VOID(*handover_f)(VOID *image, EFI_SYSTEM_TABLE *table, struct SetupHeader *setup);
+static VOID linux_efi_handover(EFI_HANDLE image, struct SetupHeader *setup) {
+ handover_f handover;
+
+ asm volatile ("cli");
+ handover = (handover_f)((UINTN)setup->code32_start + 512 + setup->handover_offset);
+ handover(image, ST, setup);
+}
+#else
+typedef VOID(*handover_f)(VOID *image, EFI_SYSTEM_TABLE *table, struct SetupHeader *setup) __attribute__((regparm(0)));
+static VOID linux_efi_handover(EFI_HANDLE image, struct SetupHeader *setup) {
+ handover_f handover;
+
+ handover = (handover_f)((UINTN)setup->code32_start + setup->handover_offset);
+ handover(image, ST, setup);
+}
+#endif
+
+EFI_STATUS linux_exec(EFI_HANDLE *image,
+ CHAR8 *cmdline, UINTN cmdline_len,
+ UINTN linux_addr,
+ UINTN initrd_addr, UINTN initrd_size, BOOLEAN secure) {
+ struct SetupHeader *image_setup;
+ struct SetupHeader *boot_setup;
+ EFI_PHYSICAL_ADDRESS addr;
+ EFI_STATUS err;
+
+ image_setup = (struct SetupHeader *)(linux_addr);
+ if (image_setup->signature != 0xAA55 || image_setup->header != SETUP_MAGIC)
+ return EFI_LOAD_ERROR;
+
+ if (image_setup->version < 0x20b || !image_setup->relocatable_kernel)
+ return EFI_LOAD_ERROR;
+
+ addr = 0x3fffffff;
+ err = uefi_call_wrapper(BS->AllocatePages, 4, AllocateMaxAddress, EfiLoaderData,
+ EFI_SIZE_TO_PAGES(0x4000), &addr);
+ if (EFI_ERROR(err))
+ return err;
+ boot_setup = (struct SetupHeader *)(UINTN)addr;
+ ZeroMem(boot_setup, 0x4000);
+ CopyMem(boot_setup, image_setup, sizeof(struct SetupHeader));
+ boot_setup->loader_id = 0xff;
+
+ if (secure) {
+ /* set secure boot flag in linux kernel zero page, see
+ - Documentation/x86/zero-page.txt
+ - arch/x86/include/uapi/asm/bootparam.h
+ - drivers/firmware/efi/libstub/secureboot.c
+ in the linux kernel source tree
+ Possible values: 0 (unassigned), 1 (undetected), 2 (disabled), 3 (enabled)
+ */
+ boot_setup->boot_sector[0x1ec] = 3;
+ }
+
+ boot_setup->code32_start = (UINT32)linux_addr + (image_setup->setup_secs+1) * 512;
+
+ if (cmdline) {
+ addr = 0xA0000;
+ err = uefi_call_wrapper(BS->AllocatePages, 4, AllocateMaxAddress, EfiLoaderData,
+ EFI_SIZE_TO_PAGES(cmdline_len + 1), &addr);
+ if (EFI_ERROR(err))
+ return err;
+ CopyMem((VOID *)(UINTN)addr, cmdline, cmdline_len);
+ ((CHAR8 *)(UINTN)addr)[cmdline_len] = 0;
+ boot_setup->cmd_line_ptr = (UINT32)addr;
+ }
+
+ boot_setup->ramdisk_start = (UINT32)initrd_addr;
+ boot_setup->ramdisk_len = (UINT32)initrd_size;
+
+ linux_efi_handover(image, boot_setup);
+ return EFI_LOAD_ERROR;
+}
diff --git a/src/boot/efi/linux.h b/src/boot/efi/linux.h
new file mode 100644
index 0000000..2458a2f
--- /dev/null
+++ b/src/boot/efi/linux.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+EFI_STATUS linux_exec(EFI_HANDLE *image,
+ CHAR8 *cmdline, UINTN cmdline_size,
+ UINTN linux_addr,
+ UINTN initrd_addr, UINTN initrd_size, BOOLEAN secure);
diff --git a/src/boot/efi/measure.c b/src/boot/efi/measure.c
new file mode 100644
index 0000000..243c80a
--- /dev/null
+++ b/src/boot/efi/measure.c
@@ -0,0 +1,360 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if ENABLE_TPM
+
+#include <efi.h>
+#include <efilib.h>
+#include "measure.h"
+
+#define EFI_TCG_PROTOCOL_GUID { 0xf541796d, 0xa62e, 0x4954, {0xa7, 0x75, 0x95, 0x84, 0xf6, 0x1b, 0x9c, 0xdd} }
+
+typedef struct _TCG_VERSION {
+ UINT8 Major;
+ UINT8 Minor;
+ UINT8 RevMajor;
+ UINT8 RevMinor;
+} TCG_VERSION;
+
+typedef struct tdEFI_TCG2_VERSION {
+ UINT8 Major;
+ UINT8 Minor;
+} EFI_TCG2_VERSION;
+
+typedef struct _TCG_BOOT_SERVICE_CAPABILITY {
+ UINT8 Size;
+ struct _TCG_VERSION StructureVersion;
+ struct _TCG_VERSION ProtocolSpecVersion;
+ UINT8 HashAlgorithmBitmap;
+ BOOLEAN TPMPresentFlag;
+ BOOLEAN TPMDeactivatedFlag;
+} TCG_BOOT_SERVICE_CAPABILITY;
+
+typedef struct tdTREE_BOOT_SERVICE_CAPABILITY {
+ UINT8 Size;
+ EFI_TCG2_VERSION StructureVersion;
+ EFI_TCG2_VERSION ProtocolVersion;
+ UINT32 HashAlgorithmBitmap;
+ UINT32 SupportedEventLogs;
+ BOOLEAN TrEEPresentFlag;
+ UINT16 MaxCommandSize;
+ UINT16 MaxResponseSize;
+ UINT32 ManufacturerID;
+} TREE_BOOT_SERVICE_CAPABILITY;
+
+typedef UINT32 TCG_ALGORITHM_ID;
+#define TCG_ALG_SHA 0x00000004 // The SHA1 algorithm
+
+#define SHA1_DIGEST_SIZE 20
+
+typedef struct _TCG_DIGEST {
+ UINT8 Digest[SHA1_DIGEST_SIZE];
+} TCG_DIGEST;
+
+#define EV_IPL 13
+
+typedef struct _TCG_PCR_EVENT {
+ UINT32 PCRIndex;
+ UINT32 EventType;
+ struct _TCG_DIGEST digest;
+ UINT32 EventSize;
+ UINT8 Event[1];
+} TCG_PCR_EVENT;
+
+INTERFACE_DECL(_EFI_TCG);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG_STATUS_CHECK) (IN struct _EFI_TCG * This,
+ OUT struct _TCG_BOOT_SERVICE_CAPABILITY * ProtocolCapability,
+ OUT UINT32 * TCGFeatureFlags,
+ OUT EFI_PHYSICAL_ADDRESS * EventLogLocation,
+ OUT EFI_PHYSICAL_ADDRESS * EventLogLastEntry);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG_HASH_ALL) (IN struct _EFI_TCG * This,
+ IN UINT8 * HashData,
+ IN UINT64 HashDataLen,
+ IN TCG_ALGORITHM_ID AlgorithmId,
+ IN OUT UINT64 * HashedDataLen, IN OUT UINT8 ** HashedDataResult);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG_LOG_EVENT) (IN struct _EFI_TCG * This,
+ IN struct _TCG_PCR_EVENT * TCGLogData,
+ IN OUT UINT32 * EventNumber, IN UINT32 Flags);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG_PASS_THROUGH_TO_TPM) (IN struct _EFI_TCG * This,
+ IN UINT32 TpmInputParameterBlockSize,
+ IN UINT8 * TpmInputParameterBlock,
+ IN UINT32 TpmOutputParameterBlockSize,
+ IN UINT8 * TpmOutputParameterBlock);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG_HASH_LOG_EXTEND_EVENT) (IN struct _EFI_TCG * This,
+ IN EFI_PHYSICAL_ADDRESS HashData,
+ IN UINT64 HashDataLen,
+ IN TCG_ALGORITHM_ID AlgorithmId,
+ IN struct _TCG_PCR_EVENT * TCGLogData,
+ IN OUT UINT32 * EventNumber,
+ OUT EFI_PHYSICAL_ADDRESS * EventLogLastEntry);
+
+typedef struct _EFI_TCG {
+ EFI_TCG_STATUS_CHECK StatusCheck;
+ EFI_TCG_HASH_ALL HashAll;
+ EFI_TCG_LOG_EVENT LogEvent;
+ EFI_TCG_PASS_THROUGH_TO_TPM PassThroughToTPM;
+ EFI_TCG_HASH_LOG_EXTEND_EVENT HashLogExtendEvent;
+} EFI_TCG;
+
+#define EFI_TCG2_PROTOCOL_GUID {0x607f766c, 0x7455, 0x42be, { 0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f }}
+
+typedef struct tdEFI_TCG2_PROTOCOL EFI_TCG2_PROTOCOL;
+
+typedef UINT32 EFI_TCG2_EVENT_LOG_BITMAP;
+typedef UINT32 EFI_TCG2_EVENT_LOG_FORMAT;
+typedef UINT32 EFI_TCG2_EVENT_ALGORITHM_BITMAP;
+
+#define EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2 0x00000001
+#define EFI_TCG2_EVENT_LOG_FORMAT_TCG_2 0x00000002
+
+typedef struct tdEFI_TCG2_BOOT_SERVICE_CAPABILITY {
+ UINT8 Size;
+ EFI_TCG2_VERSION StructureVersion;
+ EFI_TCG2_VERSION ProtocolVersion;
+ EFI_TCG2_EVENT_ALGORITHM_BITMAP HashAlgorithmBitmap;
+ EFI_TCG2_EVENT_LOG_BITMAP SupportedEventLogs;
+ BOOLEAN TPMPresentFlag;
+ UINT16 MaxCommandSize;
+ UINT16 MaxResponseSize;
+ UINT32 ManufacturerID;
+ UINT32 NumberOfPCRBanks;
+ EFI_TCG2_EVENT_ALGORITHM_BITMAP ActivePcrBanks;
+} EFI_TCG2_BOOT_SERVICE_CAPABILITY;
+
+#define EFI_TCG2_EVENT_HEADER_VERSION 1
+
+typedef struct {
+ UINT32 HeaderSize;
+ UINT16 HeaderVersion;
+ UINT32 PCRIndex;
+ UINT32 EventType;
+} __attribute__((packed)) EFI_TCG2_EVENT_HEADER;
+
+typedef struct tdEFI_TCG2_EVENT {
+ UINT32 Size;
+ EFI_TCG2_EVENT_HEADER Header;
+ UINT8 Event[1];
+} __attribute__((packed)) EFI_TCG2_EVENT;
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_GET_CAPABILITY) (IN EFI_TCG2_PROTOCOL * This,
+ IN OUT EFI_TCG2_BOOT_SERVICE_CAPABILITY * ProtocolCapability);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_GET_EVENT_LOG) (IN EFI_TCG2_PROTOCOL * This,
+ IN EFI_TCG2_EVENT_LOG_FORMAT EventLogFormat,
+ OUT EFI_PHYSICAL_ADDRESS * EventLogLocation,
+ OUT EFI_PHYSICAL_ADDRESS * EventLogLastEntry,
+ OUT BOOLEAN * EventLogTruncated);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_HASH_LOG_EXTEND_EVENT) (IN EFI_TCG2_PROTOCOL * This,
+ IN UINT64 Flags,
+ IN EFI_PHYSICAL_ADDRESS DataToHash,
+ IN UINT64 DataToHashLen, IN EFI_TCG2_EVENT * EfiTcgEvent);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_SUBMIT_COMMAND) (IN EFI_TCG2_PROTOCOL * This,
+ IN UINT32 InputParameterBlockSize,
+ IN UINT8 * InputParameterBlock,
+ IN UINT32 OutputParameterBlockSize, IN UINT8 * OutputParameterBlock);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_GET_ACTIVE_PCR_BANKS) (IN EFI_TCG2_PROTOCOL * This, OUT UINT32 * ActivePcrBanks);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_SET_ACTIVE_PCR_BANKS) (IN EFI_TCG2_PROTOCOL * This, IN UINT32 ActivePcrBanks);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_GET_RESULT_OF_SET_ACTIVE_PCR_BANKS) (IN EFI_TCG2_PROTOCOL * This,
+ OUT UINT32 * OperationPresent, OUT UINT32 * Response);
+
+typedef struct tdEFI_TCG2_PROTOCOL {
+ EFI_TCG2_GET_CAPABILITY GetCapability;
+ EFI_TCG2_GET_EVENT_LOG GetEventLog;
+ EFI_TCG2_HASH_LOG_EXTEND_EVENT HashLogExtendEvent;
+ EFI_TCG2_SUBMIT_COMMAND SubmitCommand;
+ EFI_TCG2_GET_ACTIVE_PCR_BANKS GetActivePcrBanks;
+ EFI_TCG2_SET_ACTIVE_PCR_BANKS SetActivePcrBanks;
+ EFI_TCG2_GET_RESULT_OF_SET_ACTIVE_PCR_BANKS GetResultOfSetActivePcrBanks;
+} EFI_TCG2;
+
+static EFI_STATUS tpm1_measure_to_pcr_and_event_log(const EFI_TCG *tcg, UINT32 pcrindex, const EFI_PHYSICAL_ADDRESS buffer,
+ UINTN buffer_size, const CHAR16 *description) {
+ EFI_STATUS status;
+ TCG_PCR_EVENT *tcg_event;
+ UINT32 event_number;
+ EFI_PHYSICAL_ADDRESS event_log_last;
+ UINTN desc_len;
+
+ desc_len = (StrLen(description) + 1) * sizeof(CHAR16);
+
+ tcg_event = AllocateZeroPool(desc_len + sizeof(TCG_PCR_EVENT));
+
+ if (!tcg_event)
+ return EFI_OUT_OF_RESOURCES;
+
+ tcg_event->EventSize = desc_len;
+ CopyMem((VOID *) & tcg_event->Event[0], (VOID *) description, desc_len);
+
+ tcg_event->PCRIndex = pcrindex;
+ tcg_event->EventType = EV_IPL;
+
+ event_number = 1;
+ status = uefi_call_wrapper(tcg->HashLogExtendEvent, 7,
+ (EFI_TCG *) tcg, buffer, buffer_size, TCG_ALG_SHA, tcg_event, &event_number, &event_log_last);
+
+ if (EFI_ERROR(status))
+ return status;
+
+ uefi_call_wrapper(BS->FreePool, 1, tcg_event);
+
+ return EFI_SUCCESS;
+}
+
+/*
+ * According to TCG EFI Protocol Specification for TPM 2.0 family,
+ * all events generated after the invocation of EFI_TCG2_GET_EVENT_LOG
+ * shall be stored in an instance of an EFI_CONFIGURATION_TABLE aka
+ * EFI TCG 2.0 final events table. Hence, it is necessary to trigger the
+ * internal switch through calling get_event_log() in order to allow
+ * to retrieve the logs from OS runtime.
+ */
+static EFI_STATUS trigger_tcg2_final_events_table(const EFI_TCG2 *tcg, EFI_TCG2_EVENT_LOG_FORMAT log_fmt)
+{
+ EFI_PHYSICAL_ADDRESS loc;
+ EFI_PHYSICAL_ADDRESS last_loc;
+ BOOLEAN truncated;
+ return uefi_call_wrapper(tcg->GetEventLog, 5, (EFI_TCG2 *) tcg,
+ log_fmt, &loc, &last_loc, &truncated);
+}
+
+static EFI_STATUS tpm2_measure_to_pcr_and_event_log(const EFI_TCG2 *tcg, UINT32 pcrindex, const EFI_PHYSICAL_ADDRESS buffer,
+ UINT64 buffer_size, const CHAR16 *description, EFI_TCG2_EVENT_LOG_FORMAT log_fmt) {
+ EFI_STATUS status;
+ EFI_TCG2_EVENT *tcg_event;
+ UINTN desc_len;
+ static BOOLEAN triggered = FALSE;
+
+ if (triggered == FALSE) {
+ status = trigger_tcg2_final_events_table(tcg, log_fmt);
+ if (EFI_ERROR(status))
+ return status;
+
+ triggered = TRUE;
+ }
+
+ desc_len = StrLen(description) * sizeof(CHAR16);
+
+ tcg_event = AllocateZeroPool(sizeof(*tcg_event) - sizeof(tcg_event->Event) + desc_len + 1);
+
+ if (!tcg_event)
+ return EFI_OUT_OF_RESOURCES;
+
+ tcg_event->Size = sizeof(*tcg_event) - sizeof(tcg_event->Event) + desc_len + 1;
+ tcg_event->Header.HeaderSize = sizeof(EFI_TCG2_EVENT_HEADER);
+ tcg_event->Header.HeaderVersion = EFI_TCG2_EVENT_HEADER_VERSION;
+ tcg_event->Header.PCRIndex = pcrindex;
+ tcg_event->Header.EventType = EV_IPL;
+
+ CopyMem((VOID *) tcg_event->Event, (VOID *) description, desc_len);
+
+ status = uefi_call_wrapper(tcg->HashLogExtendEvent, 5, (EFI_TCG2 *) tcg, 0, buffer, (UINT64) buffer_size, tcg_event);
+
+ uefi_call_wrapper(BS->FreePool, 1, tcg_event);
+
+ if (EFI_ERROR(status))
+ return status;
+
+ return EFI_SUCCESS;
+}
+
+static EFI_TCG * tcg1_interface_check(void) {
+ EFI_GUID tpm_guid = EFI_TCG_PROTOCOL_GUID;
+ EFI_STATUS status;
+ EFI_TCG *tcg;
+ TCG_BOOT_SERVICE_CAPABILITY capability;
+ UINT32 features;
+ EFI_PHYSICAL_ADDRESS event_log_location;
+ EFI_PHYSICAL_ADDRESS event_log_last_entry;
+
+ status = LibLocateProtocol(&tpm_guid, (void **) &tcg);
+
+ if (EFI_ERROR(status))
+ return NULL;
+
+ capability.Size = (UINT8) sizeof(capability);
+ status = uefi_call_wrapper(tcg->StatusCheck, 5, tcg, &capability, &features, &event_log_location, &event_log_last_entry);
+
+ if (EFI_ERROR(status))
+ return NULL;
+
+ if (capability.TPMDeactivatedFlag)
+ return NULL;
+
+ if (!capability.TPMPresentFlag)
+ return NULL;
+
+ return tcg;
+}
+
+static EFI_TCG2 * tcg2_interface_check(EFI_TCG2_BOOT_SERVICE_CAPABILITY *caps) {
+ EFI_GUID tpm2_guid = EFI_TCG2_PROTOCOL_GUID;
+ EFI_STATUS status;
+ EFI_TCG2 *tcg;
+
+ status = LibLocateProtocol(&tpm2_guid, (void **) &tcg);
+
+ if (EFI_ERROR(status))
+ return NULL;
+
+ caps->Size = (UINT8) sizeof(EFI_TCG2_BOOT_SERVICE_CAPABILITY);
+ status = uefi_call_wrapper(tcg->GetCapability, 2, tcg, caps);
+
+ if (EFI_ERROR(status))
+ return NULL;
+
+ if (caps->StructureVersion.Major == 1 &&
+ caps->StructureVersion.Minor == 0) {
+ TCG_BOOT_SERVICE_CAPABILITY *caps_1_0;
+ caps_1_0 = (TCG_BOOT_SERVICE_CAPABILITY *)caps;
+ if (caps_1_0->TPMPresentFlag)
+ return tcg;
+ }
+
+ if (!caps->TPMPresentFlag)
+ return NULL;
+
+ return tcg;
+}
+
+EFI_STATUS tpm_log_event(UINT32 pcrindex, const EFI_PHYSICAL_ADDRESS buffer, UINTN buffer_size, const CHAR16 *description) {
+ EFI_TCG *tpm1;
+ EFI_TCG2 *tpm2;
+ EFI_TCG2_BOOT_SERVICE_CAPABILITY caps;
+
+ tpm2 = tcg2_interface_check(&caps);
+ if (tpm2) {
+ EFI_TCG2_EVENT_LOG_BITMAP supported_logs;
+ EFI_TCG2_EVENT_LOG_FORMAT log_fmt;
+
+ if (caps.StructureVersion.Major == 1 &&
+ caps.StructureVersion.Minor == 0)
+ supported_logs = ((TREE_BOOT_SERVICE_CAPABILITY *)&caps)->SupportedEventLogs;
+ else
+ supported_logs = caps.SupportedEventLogs;
+
+ if (supported_logs & EFI_TCG2_EVENT_LOG_FORMAT_TCG_2)
+ log_fmt = EFI_TCG2_EVENT_LOG_FORMAT_TCG_2;
+ else
+ log_fmt = EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2;
+
+ return tpm2_measure_to_pcr_and_event_log(tpm2, pcrindex, buffer, buffer_size, description, log_fmt);
+ }
+
+ tpm1 = tcg1_interface_check();
+ if (tpm1)
+ return tpm1_measure_to_pcr_and_event_log(tpm1, pcrindex, buffer, buffer_size, description);
+
+ /* No active TPM found, so don't return an error */
+ return EFI_SUCCESS;
+}
+
+#endif
diff --git a/src/boot/efi/measure.h b/src/boot/efi/measure.h
new file mode 100644
index 0000000..ebb6406
--- /dev/null
+++ b/src/boot/efi/measure.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+EFI_STATUS tpm_log_event(UINT32 pcrindex, const EFI_PHYSICAL_ADDRESS buffer, UINTN buffer_size, const CHAR16 *description);
diff --git a/src/boot/efi/meson.build b/src/boot/efi/meson.build
new file mode 100644
index 0000000..2140151
--- /dev/null
+++ b/src/boot/efi/meson.build
@@ -0,0 +1,217 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+efi_headers = files('''
+ console.h
+ disk.h
+ graphics.h
+ linux.h
+ measure.h
+ pe.h
+ splash.h
+ util.h
+ shim.h
+'''.split())
+
+common_sources = '''
+ disk.c
+ graphics.c
+ measure.c
+ pe.c
+ util.c
+'''.split()
+
+systemd_boot_sources = '''
+ boot.c
+ console.c
+ shim.c
+'''.split()
+
+stub_sources = '''
+ linux.c
+ splash.c
+ stub.c
+'''.split()
+
+if conf.get('ENABLE_EFI') == 1 and get_option('gnu-efi') != 'false'
+ efi_cc = get_option('efi-cc')
+ if efi_cc.length() == 0
+ efi_cc = cc.cmd_array()
+ endif
+ efi_ld = get_option('efi-ld')
+ if efi_ld == ''
+ efi_ld = find_program('ld', required: true)
+ endif
+ efi_incdir = get_option('efi-includedir')
+
+ gnu_efi_path_arch = ''
+ foreach name : [gnu_efi_arch, EFI_MACHINE_TYPE_NAME]
+ if (gnu_efi_path_arch == '' and name != '' and
+ cc.has_header('@0@/@1@/efibind.h'.format(efi_incdir, name)))
+ gnu_efi_path_arch = name
+ endif
+ endforeach
+
+ if gnu_efi_path_arch != '' and EFI_MACHINE_TYPE_NAME == ''
+ error('gnu-efi is available, but EFI_MACHINE_TYPE_NAME is unknown')
+ endif
+
+ efi_libdir = get_option('efi-libdir')
+ if efi_libdir == ''
+ ret = run_command(efi_cc + ['-print-multi-os-directory'])
+ if ret.returncode() == 0
+ path = join_paths('/usr/lib', ret.stdout().strip())
+ ret = run_command('realpath', '-e', path)
+ if ret.returncode() == 0
+ efi_libdir = ret.stdout().strip()
+ endif
+ endif
+ endif
+
+ have_gnu_efi = gnu_efi_path_arch != '' and efi_libdir != ''
+else
+ have_gnu_efi = false
+endif
+
+if get_option('gnu-efi') == 'true' and not have_gnu_efi
+ error('gnu-efi support requested, but headers were not found')
+endif
+
+if have_gnu_efi
+ efi_conf = configuration_data()
+ efi_conf.set_quoted('EFI_MACHINE_TYPE_NAME', EFI_MACHINE_TYPE_NAME)
+ efi_conf.set10('ENABLE_TPM', get_option('tpm'))
+ efi_conf.set('SD_TPM_PCR', get_option('tpm-pcrindex'))
+
+ efi_config_h = configure_file(
+ output : 'efi_config.h',
+ configuration : efi_conf)
+
+ objcopy = find_program('objcopy')
+
+ efi_ldsdir = get_option('efi-ldsdir')
+ arch_lds = 'elf_@0@_efi.lds'.format(gnu_efi_path_arch)
+ if efi_ldsdir == ''
+ efi_ldsdir = join_paths(efi_libdir, 'gnuefi')
+ cmd = run_command('test', '-f', join_paths(efi_ldsdir, arch_lds))
+ if cmd.returncode() != 0
+ efi_ldsdir = efi_libdir
+ cmd = run_command('test', '-f', join_paths(efi_ldsdir, arch_lds))
+ if cmd.returncode() != 0
+ error('Cannot find @0@'.format(arch_lds))
+ endif
+ endif
+ endif
+
+ compile_args = ['-Wall',
+ '-Wextra',
+ '-std=gnu90',
+ '-nostdinc',
+ '-ggdb', '-O0',
+ '-fpic',
+ '-fshort-wchar',
+ '-ffreestanding',
+ '-fno-strict-aliasing',
+ '-fno-stack-protector',
+ '-Wsign-compare',
+ '-Wno-missing-field-initializers',
+ '-isystem', efi_incdir,
+ '-isystem', join_paths(efi_incdir, gnu_efi_path_arch),
+ '-include', efi_config_h,
+ '-include', version_h]
+ if efi_arch == 'x86_64'
+ compile_args += ['-mno-red-zone',
+ '-mno-sse',
+ '-mno-mmx',
+ '-DEFI_FUNCTION_WRAPPER',
+ '-DGNU_EFI_USE_MS_ABI']
+ elif efi_arch == 'ia32'
+ compile_args += ['-mno-sse',
+ '-mno-mmx']
+ endif
+
+ efi_ldflags = ['-T',
+ join_paths(efi_ldsdir, arch_lds),
+ '-shared',
+ '-Bsymbolic',
+ '-nostdlib',
+ '-znocombreloc',
+ '-L', efi_libdir,
+ join_paths(efi_ldsdir, 'crt0-efi-@0@.o'.format(gnu_efi_path_arch))]
+ if efi_arch == 'aarch64' or efi_arch == 'arm'
+ # Aarch64 and ARM32 don't have an EFI capable objcopy. Use 'binary'
+ # instead, and add required symbols manually.
+ efi_ldflags += ['--defsym=EFI_SUBSYSTEM=0xa']
+ efi_format = ['-O', 'binary']
+ else
+ efi_format = ['--target=efi-app-@0@'.format(gnu_efi_arch)]
+ endif
+
+ systemd_boot_objects = []
+ stub_objects = []
+ foreach file : common_sources + systemd_boot_sources + stub_sources
+ o_file = custom_target(file + '.o',
+ input : file,
+ output : file + '.o',
+ command : efi_cc + ['-c', '@INPUT@', '-o', '@OUTPUT@']
+ + compile_args,
+ depend_files : efi_headers)
+ if (common_sources + systemd_boot_sources).contains(file)
+ systemd_boot_objects += o_file
+ endif
+ if (common_sources + stub_sources).contains(file)
+ stub_objects += o_file
+ endif
+ endforeach
+
+ libgcc_file_name = run_command(efi_cc + ['-print-libgcc-file-name']).stdout().strip()
+ systemd_boot_efi_name = 'systemd-boot@0@.efi'.format(EFI_MACHINE_TYPE_NAME)
+ stub_efi_name = 'linux@0@.efi.stub'.format(EFI_MACHINE_TYPE_NAME)
+ no_undefined_symbols = find_program('no-undefined-symbols.sh')
+
+ foreach tuple : [['systemd_boot.so', systemd_boot_efi_name, systemd_boot_objects],
+ ['stub.so', stub_efi_name, stub_objects]]
+ so = custom_target(
+ tuple[0],
+ input : tuple[2],
+ output : tuple[0],
+ command : [efi_ld, '-o', '@OUTPUT@'] +
+ efi_ldflags + tuple[2] +
+ ['-lefi', '-lgnuefi', libgcc_file_name])
+
+ if want_tests != 'false'
+ test('no-undefined-symbols-' + tuple[0],
+ no_undefined_symbols,
+ args : [so])
+ endif
+
+ stub = custom_target(
+ tuple[1],
+ input : so,
+ output : tuple[1],
+ command : [objcopy,
+ '-j', '.text',
+ '-j', '.sdata',
+ '-j', '.data',
+ '-j', '.dynamic',
+ '-j', '.dynsym',
+ '-j', '.rel*']
+ + efi_format +
+ ['@INPUT@', '@OUTPUT@'],
+ install : true,
+ install_dir : bootlibdir)
+
+ set_variable(tuple[0].underscorify(), so)
+ set_variable(tuple[0].underscorify() + '_stub', stub)
+ endforeach
+endif
+
+############################################################
+
+if have_gnu_efi
+ test_efi_disk_img = custom_target(
+ 'test-efi-disk.img',
+ input : [systemd_boot_so, stub_so_stub],
+ output : 'test-efi-disk.img',
+ command : [test_efi_create_disk_sh, '@OUTPUT@',
+ '@INPUT0@', '@INPUT1@', splash_bmp])
+endif
diff --git a/src/boot/efi/no-undefined-symbols.sh b/src/boot/efi/no-undefined-symbols.sh
new file mode 100755
index 0000000..8572cee
--- /dev/null
+++ b/src/boot/efi/no-undefined-symbols.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -eu
+
+if nm -D -u "$1" | grep ' U '; then
+ echo "Undefined symbols detected!"
+ exit 1
+fi
diff --git a/src/boot/efi/pe.c b/src/boot/efi/pe.c
new file mode 100644
index 0000000..f1f947b
--- /dev/null
+++ b/src/boot/efi/pe.c
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "pe.h"
+#include "util.h"
+
+struct DosFileHeader {
+ UINT8 Magic[2];
+ UINT16 LastSize;
+ UINT16 nBlocks;
+ UINT16 nReloc;
+ UINT16 HdrSize;
+ UINT16 MinAlloc;
+ UINT16 MaxAlloc;
+ UINT16 ss;
+ UINT16 sp;
+ UINT16 Checksum;
+ UINT16 ip;
+ UINT16 cs;
+ UINT16 RelocPos;
+ UINT16 nOverlay;
+ UINT16 reserved[4];
+ UINT16 OEMId;
+ UINT16 OEMInfo;
+ UINT16 reserved2[10];
+ UINT32 ExeHeader;
+} __attribute__((packed));
+
+#define PE_HEADER_MACHINE_I386 0x014c
+#define PE_HEADER_MACHINE_X64 0x8664
+struct PeFileHeader {
+ UINT16 Machine;
+ UINT16 NumberOfSections;
+ UINT32 TimeDateStamp;
+ UINT32 PointerToSymbolTable;
+ UINT32 NumberOfSymbols;
+ UINT16 SizeOfOptionalHeader;
+ UINT16 Characteristics;
+} __attribute__((packed));
+
+struct PeHeader {
+ UINT8 Magic[4];
+ struct PeFileHeader FileHeader;
+} __attribute__((packed));
+
+struct PeSectionHeader {
+ UINT8 Name[8];
+ UINT32 VirtualSize;
+ UINT32 VirtualAddress;
+ UINT32 SizeOfRawData;
+ UINT32 PointerToRawData;
+ UINT32 PointerToRelocations;
+ UINT32 PointerToLinenumbers;
+ UINT16 NumberOfRelocations;
+ UINT16 NumberOfLinenumbers;
+ UINT32 Characteristics;
+} __attribute__((packed));
+
+EFI_STATUS pe_memory_locate_sections(CHAR8 *base, CHAR8 **sections, UINTN *addrs, UINTN *offsets, UINTN *sizes) {
+ struct DosFileHeader *dos;
+ struct PeHeader *pe;
+ UINTN i;
+ UINTN offset;
+
+ dos = (struct DosFileHeader *)base;
+
+ if (CompareMem(dos->Magic, "MZ", 2) != 0)
+ return EFI_LOAD_ERROR;
+
+ pe = (struct PeHeader *)&base[dos->ExeHeader];
+ if (CompareMem(pe->Magic, "PE\0\0", 4) != 0)
+ return EFI_LOAD_ERROR;
+
+ /* PE32+ Subsystem type */
+ if (pe->FileHeader.Machine != PE_HEADER_MACHINE_X64 &&
+ pe->FileHeader.Machine != PE_HEADER_MACHINE_I386)
+ return EFI_LOAD_ERROR;
+
+ if (pe->FileHeader.NumberOfSections > 96)
+ return EFI_LOAD_ERROR;
+
+ offset = dos->ExeHeader + sizeof(*pe) + pe->FileHeader.SizeOfOptionalHeader;
+
+ for (i = 0; i < pe->FileHeader.NumberOfSections; i++) {
+ struct PeSectionHeader *sect;
+ UINTN j;
+
+ sect = (struct PeSectionHeader *)&base[offset];
+ for (j = 0; sections[j]; j++) {
+ if (CompareMem(sect->Name, sections[j], strlena(sections[j])) != 0)
+ continue;
+
+ if (addrs)
+ addrs[j] = (UINTN)sect->VirtualAddress;
+ if (offsets)
+ offsets[j] = (UINTN)sect->PointerToRawData;
+ if (sizes)
+ sizes[j] = (UINTN)sect->VirtualSize;
+ }
+ offset += sizeof(*sect);
+ }
+
+ return EFI_SUCCESS;
+}
+
+EFI_STATUS pe_file_locate_sections(EFI_FILE *dir, CHAR16 *path, CHAR8 **sections, UINTN *addrs, UINTN *offsets, UINTN *sizes) {
+ EFI_FILE_HANDLE handle;
+ struct DosFileHeader dos;
+ struct PeHeader pe;
+ UINTN len;
+ UINTN headerlen;
+ EFI_STATUS err;
+ _cleanup_freepool_ CHAR8 *header = NULL;
+
+ err = uefi_call_wrapper(dir->Open, 5, dir, &handle, path, EFI_FILE_MODE_READ, 0ULL);
+ if (EFI_ERROR(err))
+ return err;
+
+ /* MS-DOS stub */
+ len = sizeof(dos);
+ err = uefi_call_wrapper(handle->Read, 3, handle, &len, &dos);
+ if (EFI_ERROR(err))
+ goto out;
+ if (len != sizeof(dos)) {
+ err = EFI_LOAD_ERROR;
+ goto out;
+ }
+
+ err = uefi_call_wrapper(handle->SetPosition, 2, handle, dos.ExeHeader);
+ if (EFI_ERROR(err))
+ goto out;
+
+ len = sizeof(pe);
+ err = uefi_call_wrapper(handle->Read, 3, handle, &len, &pe);
+ if (EFI_ERROR(err))
+ goto out;
+ if (len != sizeof(pe)) {
+ err = EFI_LOAD_ERROR;
+ goto out;
+ }
+
+ headerlen = sizeof(dos) + sizeof(pe) + pe.FileHeader.SizeOfOptionalHeader + pe.FileHeader.NumberOfSections * sizeof(struct PeSectionHeader);
+ header = AllocatePool(headerlen);
+ if (!header) {
+ err = EFI_OUT_OF_RESOURCES;
+ goto out;
+ }
+ len = headerlen;
+ err = uefi_call_wrapper(handle->SetPosition, 2, handle, 0);
+ if (EFI_ERROR(err))
+ goto out;
+
+ err = uefi_call_wrapper(handle->Read, 3, handle, &len, header);
+ if (EFI_ERROR(err))
+ goto out;
+
+ if (len != headerlen) {
+ err = EFI_LOAD_ERROR;
+ goto out;
+ }
+
+ err = pe_memory_locate_sections(header, sections, addrs, offsets, sizes);
+out:
+ uefi_call_wrapper(handle->Close, 1, handle);
+ return err;
+}
diff --git a/src/boot/efi/pe.h b/src/boot/efi/pe.h
new file mode 100644
index 0000000..bfbb8d9
--- /dev/null
+++ b/src/boot/efi/pe.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+EFI_STATUS pe_memory_locate_sections(CHAR8 *base,
+ CHAR8 **sections, UINTN *addrs, UINTN *offsets, UINTN *sizes);
+EFI_STATUS pe_file_locate_sections(EFI_FILE *dir, CHAR16 *path,
+ CHAR8 **sections, UINTN *addrs, UINTN *offsets, UINTN *sizes);
diff --git a/src/boot/efi/shim.c b/src/boot/efi/shim.c
new file mode 100644
index 0000000..f6ffed1
--- /dev/null
+++ b/src/boot/efi/shim.c
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * Port to systemd-boot
+ * Copyright © 2017 Max Resch <resch.max@gmail.com>
+ *
+ * Security Policy Handling
+ * Copyright © 2012 <James.Bottomley@HansenPartnership.com>
+ * https://github.com/mjg59/efitools
+ */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "util.h"
+#include "shim.h"
+
+/* well known shim lock guid */
+#define SHIM_LOCK_GUID
+
+struct ShimLock {
+ EFI_STATUS __attribute__((sysv_abi)) (*shim_verify) (VOID *buffer, UINT32 size);
+
+ /* context is actually a struct for the PE header, but it isn't needed so void is sufficient just do define the interface
+ * see shim.c/shim.h and PeHeader.h in the github shim repo */
+ EFI_STATUS __attribute__((sysv_abi)) (*generate_hash) (VOID *data, UINT32 datasize, VOID *context, UINT8 *sha256hash, UINT8 *sha1hash);
+
+ EFI_STATUS __attribute__((sysv_abi)) (*read_header) (VOID *data, UINT32 datasize, VOID *context);
+};
+
+static const EFI_GUID simple_fs_guid = SIMPLE_FILE_SYSTEM_PROTOCOL;
+static const EFI_GUID global_guid = EFI_GLOBAL_VARIABLE;
+
+static const EFI_GUID security_protocol_guid = { 0xa46423e3, 0x4617, 0x49f1, {0xb9, 0xff, 0xd1, 0xbf, 0xa9, 0x11, 0x58, 0x39 } };
+static const EFI_GUID security2_protocol_guid = { 0x94ab2f58, 0x1438, 0x4ef1, {0x91, 0x52, 0x18, 0x94, 0x1a, 0x3a, 0x0e, 0x68 } };
+static const EFI_GUID shim_lock_guid = { 0x605dab50, 0xe046, 0x4300, {0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23} };
+
+BOOLEAN shim_loaded(void) {
+ struct ShimLock *shim_lock;
+
+ return uefi_call_wrapper(BS->LocateProtocol, 3, (EFI_GUID*) &shim_lock_guid, NULL, (VOID**) &shim_lock) == EFI_SUCCESS;
+}
+
+static BOOLEAN shim_validate(VOID *data, UINT32 size) {
+ struct ShimLock *shim_lock;
+
+ if (!data)
+ return FALSE;
+
+ if (uefi_call_wrapper(BS->LocateProtocol, 3, (EFI_GUID*) &shim_lock_guid, NULL, (VOID**) &shim_lock) != EFI_SUCCESS)
+ return FALSE;
+
+ if (!shim_lock)
+ return FALSE;
+
+ return shim_lock->shim_verify(data, size) == EFI_SUCCESS;
+}
+
+BOOLEAN secure_boot_enabled(void) {
+ _cleanup_freepool_ CHAR8 *b = NULL;
+ UINTN size;
+
+ if (efivar_get_raw(&global_guid, L"SecureBoot", &b, &size) == EFI_SUCCESS)
+ return *b > 0;
+
+ return FALSE;
+}
+
+/*
+ * See the UEFI Platform Initialization manual (Vol2: DXE) for this
+ */
+struct _EFI_SECURITY2_PROTOCOL;
+struct _EFI_SECURITY_PROTOCOL;
+struct _EFI_DEVICE_PATH_PROTOCOL;
+
+typedef struct _EFI_SECURITY2_PROTOCOL EFI_SECURITY2_PROTOCOL;
+typedef struct _EFI_SECURITY_PROTOCOL EFI_SECURITY_PROTOCOL;
+typedef struct _EFI_DEVICE_PATH_PROTOCOL EFI_DEVICE_PATH_PROTOCOL;
+
+typedef EFI_STATUS (EFIAPI *EFI_SECURITY_FILE_AUTHENTICATION_STATE) (
+ const EFI_SECURITY_PROTOCOL *This,
+ UINT32 AuthenticationStatus,
+ const EFI_DEVICE_PATH_PROTOCOL *File
+);
+
+typedef EFI_STATUS (EFIAPI *EFI_SECURITY2_FILE_AUTHENTICATION) (
+ const EFI_SECURITY2_PROTOCOL *This,
+ const EFI_DEVICE_PATH_PROTOCOL *DevicePath,
+ VOID *FileBuffer,
+ UINTN FileSize,
+ BOOLEAN BootPolicy
+);
+
+struct _EFI_SECURITY2_PROTOCOL {
+ EFI_SECURITY2_FILE_AUTHENTICATION FileAuthentication;
+};
+
+struct _EFI_SECURITY_PROTOCOL {
+ EFI_SECURITY_FILE_AUTHENTICATION_STATE FileAuthenticationState;
+};
+
+/* Handle to the original authenticator for security1 protocol */
+static EFI_SECURITY_FILE_AUTHENTICATION_STATE esfas = NULL;
+
+/* Handle to the original authenticator for security2 protocol */
+static EFI_SECURITY2_FILE_AUTHENTICATION es2fa = NULL;
+
+/*
+ * Perform shim/MOK and Secure Boot authentication on a binary that's already been
+ * loaded into memory. This function does the platform SB authentication first
+ * but preserves its return value in case of its failure, so that it can be
+ * returned in case of a shim/MOK authentication failure. This is done because
+ * the SB failure code seems to vary from one implementation to another, and I
+ * don't want to interfere with that at this time.
+ */
+static EFIAPI EFI_STATUS security2_policy_authentication (const EFI_SECURITY2_PROTOCOL *this,
+ const EFI_DEVICE_PATH_PROTOCOL *device_path,
+ VOID *file_buffer, UINTN file_size, BOOLEAN boot_policy) {
+ EFI_STATUS status;
+
+ /* Chain original security policy */
+ status = uefi_call_wrapper(es2fa, 5, this, device_path, file_buffer, file_size, boot_policy);
+
+ /* if OK, don't bother with MOK check */
+ if (status == EFI_SUCCESS)
+ return status;
+
+ if (shim_validate(file_buffer, file_size))
+ return EFI_SUCCESS;
+
+ return status;
+}
+
+/*
+ * Perform both shim/MOK and platform Secure Boot authentication. This function loads
+ * the file and performs shim/MOK authentication first simply to avoid double loads
+ * of Linux kernels, which are much more likely to be shim/MOK-signed than platform-signed,
+ * since kernels are big and can take several seconds to load on some computers and
+ * filesystems. This also has the effect of returning whatever the platform code is for
+ * authentication failure, be it EFI_ACCESS_DENIED, EFI_SECURITY_VIOLATION, or something
+ * else. (This seems to vary between implementations.)
+ */
+static EFIAPI EFI_STATUS security_policy_authentication (const EFI_SECURITY_PROTOCOL *this, UINT32 authentication_status,
+ const EFI_DEVICE_PATH_PROTOCOL *device_path_const) {
+ EFI_STATUS status;
+ _cleanup_freepool_ EFI_DEVICE_PATH *dev_path = NULL;
+ _cleanup_freepool_ CHAR16 *dev_path_str = NULL;
+ EFI_HANDLE h;
+ EFI_FILE *root;
+ _cleanup_freepool_ CHAR8 *file_buffer = NULL;
+ UINTN file_size;
+
+ if (!device_path_const)
+ return EFI_INVALID_PARAMETER;
+
+ dev_path = DuplicateDevicePath((EFI_DEVICE_PATH*) device_path_const);
+
+ status = uefi_call_wrapper(BS->LocateDevicePath, 3, (EFI_GUID*) &simple_fs_guid, &dev_path, &h);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ /* No need to check return value, this already happend in efi_main() */
+ root = LibOpenRoot(h);
+ dev_path_str = DevicePathToStr(dev_path);
+
+ status = file_read(root, dev_path_str, 0, 0, &file_buffer, &file_size);
+ if (EFI_ERROR(status))
+ return status;
+ uefi_call_wrapper(root->Close, 1, root);
+
+ if (shim_validate(file_buffer, file_size))
+ return EFI_SUCCESS;
+
+ /* Try using the platform's native policy.... */
+ return uefi_call_wrapper(esfas, 3, this, authentication_status, device_path_const);
+}
+
+EFI_STATUS security_policy_install(void) {
+ EFI_SECURITY_PROTOCOL *security_protocol;
+ EFI_SECURITY2_PROTOCOL *security2_protocol = NULL;
+ EFI_STATUS status;
+
+ /* Already Installed */
+ if (esfas)
+ return EFI_ALREADY_STARTED;
+
+ /*
+ * Don't bother with status here. The call is allowed
+ * to fail, since SECURITY2 was introduced in PI 1.2.1.
+ * Use security2_protocol == NULL as indicator.
+ */
+ uefi_call_wrapper(BS->LocateProtocol, 3, (EFI_GUID*) &security2_protocol_guid, NULL, (VOID**) &security2_protocol);
+
+ status = uefi_call_wrapper(BS->LocateProtocol, 3, (EFI_GUID*) &security_protocol_guid, NULL, (VOID**) &security_protocol);
+ /* This one is mandatory, so there's a serious problem */
+ if (status != EFI_SUCCESS)
+ return status;
+
+ esfas = security_protocol->FileAuthenticationState;
+ security_protocol->FileAuthenticationState = security_policy_authentication;
+
+ if (security2_protocol) {
+ es2fa = security2_protocol->FileAuthentication;
+ security2_protocol->FileAuthentication = security2_policy_authentication;
+ }
+
+ return EFI_SUCCESS;
+}
diff --git a/src/boot/efi/shim.h b/src/boot/efi/shim.h
new file mode 100644
index 0000000..209c9d4
--- /dev/null
+++ b/src/boot/efi/shim.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * Port to systemd-boot
+ * Copyright © 2017 Max Resch <resch.max@gmail.com>
+ *
+ * Security Policy Handling
+ * Copyright © 2012 <James.Bottomley@HansenPartnership.com>
+ * https://github.com/mjg59/efitools
+ */
+#pragma once
+
+BOOLEAN shim_loaded(void);
+
+BOOLEAN secure_boot_enabled(void);
+
+EFI_STATUS security_policy_install(void);
diff --git a/src/boot/efi/splash.c b/src/boot/efi/splash.c
new file mode 100644
index 0000000..ba4a2c5
--- /dev/null
+++ b/src/boot/efi/splash.c
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "graphics.h"
+#include "splash.h"
+#include "util.h"
+
+struct bmp_file {
+ CHAR8 signature[2];
+ UINT32 size;
+ UINT16 reserved[2];
+ UINT32 offset;
+} __attribute__((packed));
+
+/* we require at least BITMAPINFOHEADER, later versions are
+ accepted, but their features ignored */
+struct bmp_dib {
+ UINT32 size;
+ UINT32 x;
+ UINT32 y;
+ UINT16 planes;
+ UINT16 depth;
+ UINT32 compression;
+ UINT32 image_size;
+ INT32 x_pixel_meter;
+ INT32 y_pixel_meter;
+ UINT32 colors_used;
+ UINT32 colors_important;
+} __attribute__((packed));
+
+struct bmp_map {
+ UINT8 blue;
+ UINT8 green;
+ UINT8 red;
+ UINT8 reserved;
+} __attribute__((packed));
+
+EFI_STATUS bmp_parse_header(UINT8 *bmp, UINTN size, struct bmp_dib **ret_dib,
+ struct bmp_map **ret_map, UINT8 **pixmap) {
+ struct bmp_file *file;
+ struct bmp_dib *dib;
+ struct bmp_map *map;
+ UINTN row_size;
+
+ if (size < sizeof(struct bmp_file) + sizeof(struct bmp_dib))
+ return EFI_INVALID_PARAMETER;
+
+ /* check file header */
+ file = (struct bmp_file *)bmp;
+ if (file->signature[0] != 'B' || file->signature[1] != 'M')
+ return EFI_INVALID_PARAMETER;
+ if (file->size != size)
+ return EFI_INVALID_PARAMETER;
+ if (file->size < file->offset)
+ return EFI_INVALID_PARAMETER;
+
+ /* check device-independent bitmap */
+ dib = (struct bmp_dib *)(bmp + sizeof(struct bmp_file));
+ if (dib->size < sizeof(struct bmp_dib))
+ return EFI_UNSUPPORTED;
+
+ switch (dib->depth) {
+ case 1:
+ case 4:
+ case 8:
+ case 24:
+ if (dib->compression != 0)
+ return EFI_UNSUPPORTED;
+
+ break;
+
+ case 16:
+ case 32:
+ if (dib->compression != 0 && dib->compression != 3)
+ return EFI_UNSUPPORTED;
+
+ break;
+
+ default:
+ return EFI_UNSUPPORTED;
+ }
+
+ row_size = ((UINTN) dib->depth * dib->x + 31) / 32 * 4;
+ if (file->size - file->offset < dib->y * row_size)
+ return EFI_INVALID_PARAMETER;
+ if (row_size * dib->y > 64 * 1024 * 1024)
+ return EFI_INVALID_PARAMETER;
+
+ /* check color table */
+ map = (struct bmp_map *)(bmp + sizeof(struct bmp_file) + dib->size);
+ if (file->offset < sizeof(struct bmp_file) + dib->size)
+ return EFI_INVALID_PARAMETER;
+
+ if (file->offset > sizeof(struct bmp_file) + dib->size) {
+ UINT32 map_count;
+ UINTN map_size;
+
+ if (dib->colors_used)
+ map_count = dib->colors_used;
+ else {
+ switch (dib->depth) {
+ case 1:
+ case 4:
+ case 8:
+ map_count = 1 << dib->depth;
+ break;
+
+ default:
+ map_count = 0;
+ break;
+ }
+ }
+
+ map_size = file->offset - (sizeof(struct bmp_file) + dib->size);
+ if (map_size != sizeof(struct bmp_map) * map_count)
+ return EFI_INVALID_PARAMETER;
+ }
+
+ *ret_map = map;
+ *ret_dib = dib;
+ *pixmap = bmp + file->offset;
+
+ return EFI_SUCCESS;
+}
+
+static VOID pixel_blend(UINT32 *dst, const UINT32 source) {
+ UINT32 alpha, src, src_rb, src_g, dst_rb, dst_g, rb, g;
+
+ alpha = (source & 0xff);
+
+ /* convert src from RGBA to XRGB */
+ src = source >> 8;
+
+ /* decompose into RB and G components */
+ src_rb = (src & 0xff00ff);
+ src_g = (src & 0x00ff00);
+
+ dst_rb = (*dst & 0xff00ff);
+ dst_g = (*dst & 0x00ff00);
+
+ /* blend */
+ rb = ((((src_rb - dst_rb) * alpha + 0x800080) >> 8) + dst_rb) & 0xff00ff;
+ g = ((((src_g - dst_g) * alpha + 0x008000) >> 8) + dst_g) & 0x00ff00;
+
+ *dst = (rb | g);
+}
+
+EFI_STATUS bmp_to_blt(EFI_GRAPHICS_OUTPUT_BLT_PIXEL *buf,
+ struct bmp_dib *dib, struct bmp_map *map,
+ UINT8 *pixmap) {
+ UINT8 *in;
+ UINTN y;
+
+ /* transform and copy pixels */
+ in = pixmap;
+ for (y = 0; y < dib->y; y++) {
+ EFI_GRAPHICS_OUTPUT_BLT_PIXEL *out;
+ UINTN row_size;
+ UINTN x;
+
+ out = &buf[(dib->y - y - 1) * dib->x];
+ for (x = 0; x < dib->x; x++, in++, out++) {
+ switch (dib->depth) {
+ case 1: {
+ UINTN i;
+
+ for (i = 0; i < 8 && x < dib->x; i++) {
+ out->Red = map[((*in) >> (7 - i)) & 1].red;
+ out->Green = map[((*in) >> (7 - i)) & 1].green;
+ out->Blue = map[((*in) >> (7 - i)) & 1].blue;
+ out++;
+ x++;
+ }
+ out--;
+ x--;
+ break;
+ }
+
+ case 4: {
+ UINTN i;
+
+ i = (*in) >> 4;
+ out->Red = map[i].red;
+ out->Green = map[i].green;
+ out->Blue = map[i].blue;
+ if (x < (dib->x - 1)) {
+ out++;
+ x++;
+ i = (*in) & 0x0f;
+ out->Red = map[i].red;
+ out->Green = map[i].green;
+ out->Blue = map[i].blue;
+ }
+ break;
+ }
+
+ case 8:
+ out->Red = map[*in].red;
+ out->Green = map[*in].green;
+ out->Blue = map[*in].blue;
+ break;
+
+ case 16: {
+ UINT16 i = *(UINT16 *) in;
+
+ out->Red = (i & 0x7c00) >> 7;
+ out->Green = (i & 0x3e0) >> 2;
+ out->Blue = (i & 0x1f) << 3;
+ in += 1;
+ break;
+ }
+
+ case 24:
+ out->Red = in[2];
+ out->Green = in[1];
+ out->Blue = in[0];
+ in += 2;
+ break;
+
+ case 32: {
+ UINT32 i = *(UINT32 *) in;
+
+ pixel_blend((UINT32 *)out, i);
+
+ in += 3;
+ break;
+ }
+ }
+ }
+
+ /* add row padding; new lines always start at 32 bit boundary */
+ row_size = in - pixmap;
+ in += ((row_size + 3) & ~3) - row_size;
+ }
+
+ return EFI_SUCCESS;
+}
+
+EFI_STATUS graphics_splash(UINT8 *content, UINTN len, const EFI_GRAPHICS_OUTPUT_BLT_PIXEL *background) {
+ EFI_GRAPHICS_OUTPUT_BLT_PIXEL pixel = {};
+ EFI_GUID GraphicsOutputProtocolGuid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
+ EFI_GRAPHICS_OUTPUT_PROTOCOL *GraphicsOutput = NULL;
+ struct bmp_dib *dib;
+ struct bmp_map *map;
+ UINT8 *pixmap;
+ UINT64 blt_size;
+ _cleanup_freepool_ VOID *blt = NULL;
+ UINTN x_pos = 0;
+ UINTN y_pos = 0;
+ EFI_STATUS err;
+
+ if (!background) {
+ if (StriCmp(L"Apple", ST->FirmwareVendor) == 0) {
+ pixel.Red = 0xc0;
+ pixel.Green = 0xc0;
+ pixel.Blue = 0xc0;
+ }
+ background = &pixel;
+ }
+
+ err = LibLocateProtocol(&GraphicsOutputProtocolGuid, (VOID **)&GraphicsOutput);
+ if (EFI_ERROR(err))
+ return err;
+
+ err = bmp_parse_header(content, len, &dib, &map, &pixmap);
+ if (EFI_ERROR(err))
+ return err;
+
+ if (dib->x < GraphicsOutput->Mode->Info->HorizontalResolution)
+ x_pos = (GraphicsOutput->Mode->Info->HorizontalResolution - dib->x) / 2;
+ if (dib->y < GraphicsOutput->Mode->Info->VerticalResolution)
+ y_pos = (GraphicsOutput->Mode->Info->VerticalResolution - dib->y) / 2;
+
+ uefi_call_wrapper(GraphicsOutput->Blt, 10, GraphicsOutput,
+ (EFI_GRAPHICS_OUTPUT_BLT_PIXEL *)background,
+ EfiBltVideoFill, 0, 0, 0, 0,
+ GraphicsOutput->Mode->Info->HorizontalResolution,
+ GraphicsOutput->Mode->Info->VerticalResolution, 0);
+
+ /* EFI buffer */
+ blt_size = sizeof(EFI_GRAPHICS_OUTPUT_BLT_PIXEL) * dib->x * dib->y;
+ blt = AllocatePool(blt_size);
+ if (!blt)
+ return EFI_OUT_OF_RESOURCES;
+
+ err = uefi_call_wrapper(GraphicsOutput->Blt, 10, GraphicsOutput,
+ blt, EfiBltVideoToBltBuffer, x_pos, y_pos, 0, 0,
+ dib->x, dib->y, 0);
+ if (EFI_ERROR(err))
+ return err;
+
+ err = bmp_to_blt(blt, dib, map, pixmap);
+ if (EFI_ERROR(err))
+ return err;
+
+ err = graphics_mode(TRUE);
+ if (EFI_ERROR(err))
+ return err;
+
+ return uefi_call_wrapper(GraphicsOutput->Blt, 10, GraphicsOutput,
+ blt, EfiBltBufferToVideo, 0, 0, x_pos, y_pos,
+ dib->x, dib->y, 0);
+}
diff --git a/src/boot/efi/splash.h b/src/boot/efi/splash.h
new file mode 100644
index 0000000..8928b06
--- /dev/null
+++ b/src/boot/efi/splash.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+EFI_STATUS graphics_splash(UINT8 *content, UINTN len, const EFI_GRAPHICS_OUTPUT_BLT_PIXEL *background);
diff --git a/src/boot/efi/stub.c b/src/boot/efi/stub.c
new file mode 100644
index 0000000..6b07879
--- /dev/null
+++ b/src/boot/efi/stub.c
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "disk.h"
+#include "graphics.h"
+#include "linux.h"
+#include "measure.h"
+#include "pe.h"
+#include "splash.h"
+#include "util.h"
+
+/* magic string to find in the binary image */
+static const char __attribute__((used)) magic[] = "#### LoaderInfo: systemd-stub " GIT_VERSION " ####";
+
+static const EFI_GUID global_guid = EFI_GLOBAL_VARIABLE;
+
+EFI_STATUS efi_main(EFI_HANDLE image, EFI_SYSTEM_TABLE *sys_table) {
+ EFI_LOADED_IMAGE *loaded_image;
+ _cleanup_freepool_ CHAR8 *b = NULL;
+ UINTN size;
+ BOOLEAN secure = FALSE;
+ CHAR8 *sections[] = {
+ (UINT8 *)".cmdline",
+ (UINT8 *)".linux",
+ (UINT8 *)".initrd",
+ (UINT8 *)".splash",
+ NULL
+ };
+ UINTN addrs[ELEMENTSOF(sections)-1] = {};
+ UINTN offs[ELEMENTSOF(sections)-1] = {};
+ UINTN szs[ELEMENTSOF(sections)-1] = {};
+ CHAR8 *cmdline = NULL;
+ UINTN cmdline_len;
+ CHAR16 uuid[37];
+ EFI_STATUS err;
+
+ InitializeLib(image, sys_table);
+
+ err = uefi_call_wrapper(BS->OpenProtocol, 6, image, &LoadedImageProtocol, (VOID **)&loaded_image,
+ image, NULL, EFI_OPEN_PROTOCOL_GET_PROTOCOL);
+ if (EFI_ERROR(err)) {
+ Print(L"Error getting a LoadedImageProtocol handle: %r ", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+ }
+
+ if (efivar_get_raw(&global_guid, L"SecureBoot", &b, &size) == EFI_SUCCESS)
+ if (*b > 0)
+ secure = TRUE;
+
+ err = pe_memory_locate_sections(loaded_image->ImageBase, sections, addrs, offs, szs);
+ if (EFI_ERROR(err)) {
+ Print(L"Unable to locate embedded .linux section: %r ", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+ }
+
+ if (szs[0] > 0)
+ cmdline = (CHAR8 *)(loaded_image->ImageBase + addrs[0]);
+
+ cmdline_len = szs[0];
+
+ /* if we are not in secure boot mode, accept a custom command line and replace the built-in one */
+ if (!secure && loaded_image->LoadOptionsSize > 0 && *(CHAR16 *)loaded_image->LoadOptions > 0x1F) {
+ CHAR16 *options;
+ CHAR8 *line;
+ UINTN i;
+
+ options = (CHAR16 *)loaded_image->LoadOptions;
+ cmdline_len = (loaded_image->LoadOptionsSize / sizeof(CHAR16)) * sizeof(CHAR8);
+ line = AllocatePool(cmdline_len);
+ for (i = 0; i < cmdline_len; i++)
+ line[i] = options[i];
+ cmdline = line;
+
+#if ENABLE_TPM
+ /* Try to log any options to the TPM, especially manually edited options */
+ err = tpm_log_event(SD_TPM_PCR,
+ (EFI_PHYSICAL_ADDRESS) (UINTN) loaded_image->LoadOptions,
+ loaded_image->LoadOptionsSize, loaded_image->LoadOptions);
+ if (EFI_ERROR(err)) {
+ Print(L"Unable to add image options measurement: %r", err);
+ uefi_call_wrapper(BS->Stall, 1, 200 * 1000);
+ }
+#endif
+ }
+
+ /* export the device path this image is started from */
+ if (disk_get_part_uuid(loaded_image->DeviceHandle, uuid) == EFI_SUCCESS)
+ efivar_set(L"LoaderDevicePartUUID", uuid, FALSE);
+
+ /* if LoaderImageIdentifier is not set, assume the image with this stub was loaded directly from UEFI */
+ if (efivar_get_raw(&global_guid, L"LoaderImageIdentifier", &b, &size) != EFI_SUCCESS) {
+ _cleanup_freepool_ CHAR16 *s;
+
+ s = DevicePathToStr(loaded_image->FilePath);
+ efivar_set(L"LoaderImageIdentifier", s, FALSE);
+ }
+
+ /* if LoaderFirmwareInfo is not set, let's set it */
+ if (efivar_get_raw(&global_guid, L"LoaderFirmwareInfo", &b, &size) != EFI_SUCCESS) {
+ _cleanup_freepool_ CHAR16 *s;
+
+ s = PoolPrint(L"%s %d.%02d", ST->FirmwareVendor, ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff);
+ efivar_set(L"LoaderFirmwareInfo", s, FALSE);
+ }
+
+ /* ditto for LoaderFirmwareType */
+ if (efivar_get_raw(&global_guid, L"LoaderFirmwareType", &b, &size) != EFI_SUCCESS) {
+ _cleanup_freepool_ CHAR16 *s;
+
+ s = PoolPrint(L"UEFI %d.%02d", ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff);
+ efivar_set(L"LoaderFirmwareType", s, FALSE);
+ }
+
+ /* add StubInfo */
+ if (efivar_get_raw(&global_guid, L"StubInfo", &b, &size) != EFI_SUCCESS)
+ efivar_set(L"StubInfo", L"systemd-stub " GIT_VERSION, FALSE);
+
+ if (szs[3] > 0)
+ graphics_splash((UINT8 *)((UINTN)loaded_image->ImageBase + addrs[3]), szs[3], NULL);
+
+ err = linux_exec(image, cmdline, cmdline_len,
+ (UINTN)loaded_image->ImageBase + addrs[1],
+ (UINTN)loaded_image->ImageBase + addrs[2], szs[2], secure);
+
+ graphics_mode(FALSE);
+ Print(L"Execution of embedded linux image failed: %r\n", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+}
diff --git a/src/boot/efi/util.c b/src/boot/efi/util.c
new file mode 100644
index 0000000..f1f1674
--- /dev/null
+++ b/src/boot/efi/util.c
@@ -0,0 +1,342 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "util.h"
+
+/*
+ * Allocated random UUID, intended to be shared across tools that implement
+ * the (ESP)\loader\entries\<vendor>-<revision>.conf convention and the
+ * associated EFI variables.
+ */
+const EFI_GUID loader_guid = { 0x4a67b082, 0x0a4c, 0x41cf, {0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f} };
+
+#ifdef __x86_64__
+UINT64 ticks_read(VOID) {
+ UINT64 a, d;
+ __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
+ return (d << 32) | a;
+}
+#elif defined(__i386__)
+UINT64 ticks_read(VOID) {
+ UINT64 val;
+ __asm__ volatile ("rdtsc" : "=A" (val));
+ return val;
+}
+#else
+UINT64 ticks_read(VOID) {
+ UINT64 val = 1;
+ return val;
+}
+#endif
+
+/* count TSC ticks during a millisecond delay */
+UINT64 ticks_freq(VOID) {
+ UINT64 ticks_start, ticks_end;
+
+ ticks_start = ticks_read();
+ uefi_call_wrapper(BS->Stall, 1, 1000);
+ ticks_end = ticks_read();
+
+ return (ticks_end - ticks_start) * 1000UL;
+}
+
+UINT64 time_usec(VOID) {
+ UINT64 ticks;
+ static UINT64 freq;
+
+ ticks = ticks_read();
+ if (ticks == 0)
+ return 0;
+
+ if (freq == 0) {
+ freq = ticks_freq();
+ if (freq == 0)
+ return 0;
+ }
+
+ return 1000UL * 1000UL * ticks / freq;
+}
+
+EFI_STATUS parse_boolean(const CHAR8 *v, BOOLEAN *b) {
+ if (!v)
+ return EFI_INVALID_PARAMETER;
+
+ if (strcmpa(v, (CHAR8 *)"1") == 0 ||
+ strcmpa(v, (CHAR8 *)"yes") == 0 ||
+ strcmpa(v, (CHAR8 *)"y") == 0 ||
+ strcmpa(v, (CHAR8 *)"true") == 0) {
+ *b = TRUE;
+ return EFI_SUCCESS;
+ }
+
+ if (strcmpa(v, (CHAR8 *)"0") == 0 ||
+ strcmpa(v, (CHAR8 *)"no") == 0 ||
+ strcmpa(v, (CHAR8 *)"n") == 0 ||
+ strcmpa(v, (CHAR8 *)"false") == 0) {
+ *b = FALSE;
+ return EFI_SUCCESS;
+ }
+
+ return EFI_INVALID_PARAMETER;
+}
+
+EFI_STATUS efivar_set_raw(const EFI_GUID *vendor, const CHAR16 *name, const VOID *buf, UINTN size, BOOLEAN persistent) {
+ UINT32 flags;
+
+ flags = EFI_VARIABLE_BOOTSERVICE_ACCESS|EFI_VARIABLE_RUNTIME_ACCESS;
+ if (persistent)
+ flags |= EFI_VARIABLE_NON_VOLATILE;
+
+ return uefi_call_wrapper(RT->SetVariable, 5, (CHAR16*) name, (EFI_GUID *)vendor, flags, size, (VOID*) buf);
+}
+
+EFI_STATUS efivar_set(const CHAR16 *name, const CHAR16 *value, BOOLEAN persistent) {
+ return efivar_set_raw(&loader_guid, name, value, value ? (StrLen(value)+1) * sizeof(CHAR16) : 0, persistent);
+}
+
+EFI_STATUS efivar_set_int(CHAR16 *name, UINTN i, BOOLEAN persistent) {
+ CHAR16 str[32];
+
+ SPrint(str, 32, L"%u", i);
+ return efivar_set(name, str, persistent);
+}
+
+EFI_STATUS efivar_get(const CHAR16 *name, CHAR16 **value) {
+ _cleanup_freepool_ CHAR8 *buf = NULL;
+ EFI_STATUS err;
+ CHAR16 *val;
+ UINTN size;
+
+ err = efivar_get_raw(&loader_guid, name, &buf, &size);
+ if (EFI_ERROR(err))
+ return err;
+
+ /* Make sure there are no incomplete characters in the buffer */
+ if ((size % 2) != 0)
+ return EFI_INVALID_PARAMETER;
+
+ /* Return buffer directly if it happens to be NUL terminated already */
+ if (size >= 2 && buf[size-2] == 0 && buf[size-1] == 0) {
+ *value = (CHAR16*) buf;
+ buf = NULL;
+ return EFI_SUCCESS;
+ }
+
+ /* Make sure a terminating NUL is available at the end */
+ val = AllocatePool(size + 2);
+ if (!val)
+ return EFI_OUT_OF_RESOURCES;
+
+ CopyMem(val, buf, size);
+ val[size/2] = 0; /* NUL terminate */
+
+ *value = val;
+ return EFI_SUCCESS;
+}
+
+EFI_STATUS efivar_get_int(const CHAR16 *name, UINTN *i) {
+ _cleanup_freepool_ CHAR16 *val = NULL;
+ EFI_STATUS err;
+
+ err = efivar_get(name, &val);
+ if (!EFI_ERROR(err))
+ *i = Atoi(val);
+
+ return err;
+}
+
+EFI_STATUS efivar_get_raw(const EFI_GUID *vendor, const CHAR16 *name, CHAR8 **buffer, UINTN *size) {
+ _cleanup_freepool_ CHAR8 *buf = NULL;
+ UINTN l;
+ EFI_STATUS err;
+
+ l = sizeof(CHAR16 *) * EFI_MAXIMUM_VARIABLE_SIZE;
+ buf = AllocatePool(l);
+ if (!buf)
+ return EFI_OUT_OF_RESOURCES;
+
+ err = uefi_call_wrapper(RT->GetVariable, 5, (CHAR16*) name, (EFI_GUID *)vendor, NULL, &l, buf);
+ if (!EFI_ERROR(err)) {
+ *buffer = buf;
+ buf = NULL;
+ if (size)
+ *size = l;
+ }
+
+ return err;
+}
+
+VOID efivar_set_time_usec(CHAR16 *name, UINT64 usec) {
+ CHAR16 str[32];
+
+ if (usec == 0)
+ usec = time_usec();
+ if (usec == 0)
+ return;
+
+ SPrint(str, 32, L"%ld", usec);
+ efivar_set(name, str, FALSE);
+}
+
+static INTN utf8_to_16(CHAR8 *stra, CHAR16 *c) {
+ CHAR16 unichar;
+ UINTN len;
+ UINTN i;
+
+ if (stra[0] < 0x80)
+ len = 1;
+ else if ((stra[0] & 0xe0) == 0xc0)
+ len = 2;
+ else if ((stra[0] & 0xf0) == 0xe0)
+ len = 3;
+ else if ((stra[0] & 0xf8) == 0xf0)
+ len = 4;
+ else if ((stra[0] & 0xfc) == 0xf8)
+ len = 5;
+ else if ((stra[0] & 0xfe) == 0xfc)
+ len = 6;
+ else
+ return -1;
+
+ switch (len) {
+ case 1:
+ unichar = stra[0];
+ break;
+ case 2:
+ unichar = stra[0] & 0x1f;
+ break;
+ case 3:
+ unichar = stra[0] & 0x0f;
+ break;
+ case 4:
+ unichar = stra[0] & 0x07;
+ break;
+ case 5:
+ unichar = stra[0] & 0x03;
+ break;
+ case 6:
+ unichar = stra[0] & 0x01;
+ break;
+ }
+
+ for (i = 1; i < len; i++) {
+ if ((stra[i] & 0xc0) != 0x80)
+ return -1;
+ unichar <<= 6;
+ unichar |= stra[i] & 0x3f;
+ }
+
+ *c = unichar;
+ return len;
+}
+
+CHAR16 *stra_to_str(CHAR8 *stra) {
+ UINTN strlen;
+ UINTN len;
+ UINTN i;
+ CHAR16 *str;
+
+ len = strlena(stra);
+ str = AllocatePool((len + 1) * sizeof(CHAR16));
+
+ strlen = 0;
+ i = 0;
+ while (i < len) {
+ INTN utf8len;
+
+ utf8len = utf8_to_16(stra + i, str + strlen);
+ if (utf8len <= 0) {
+ /* invalid utf8 sequence, skip the garbage */
+ i++;
+ continue;
+ }
+
+ strlen++;
+ i += utf8len;
+ }
+ str[strlen] = '\0';
+ return str;
+}
+
+CHAR16 *stra_to_path(CHAR8 *stra) {
+ CHAR16 *str;
+ UINTN strlen;
+ UINTN len;
+ UINTN i;
+
+ len = strlena(stra);
+ str = AllocatePool((len + 2) * sizeof(CHAR16));
+
+ str[0] = '\\';
+ strlen = 1;
+ i = 0;
+ while (i < len) {
+ INTN utf8len;
+
+ utf8len = utf8_to_16(stra + i, str + strlen);
+ if (utf8len <= 0) {
+ /* invalid utf8 sequence, skip the garbage */
+ i++;
+ continue;
+ }
+
+ if (str[strlen] == '/')
+ str[strlen] = '\\';
+ if (str[strlen] == '\\' && str[strlen-1] == '\\') {
+ /* skip double slashes */
+ i += utf8len;
+ continue;
+ }
+
+ strlen++;
+ i += utf8len;
+ }
+ str[strlen] = '\0';
+ return str;
+}
+
+CHAR8 *strchra(CHAR8 *s, CHAR8 c) {
+ do {
+ if (*s == c)
+ return s;
+ } while (*s++);
+ return NULL;
+}
+
+EFI_STATUS file_read(EFI_FILE_HANDLE dir, const CHAR16 *name, UINTN off, UINTN size, CHAR8 **content, UINTN *content_size) {
+ EFI_FILE_HANDLE handle;
+ _cleanup_freepool_ CHAR8 *buf = NULL;
+ EFI_STATUS err;
+
+ err = uefi_call_wrapper(dir->Open, 5, dir, &handle, (CHAR16*) name, EFI_FILE_MODE_READ, 0ULL);
+ if (EFI_ERROR(err))
+ return err;
+
+ if (size == 0) {
+ _cleanup_freepool_ EFI_FILE_INFO *info;
+
+ info = LibFileInfo(handle);
+ size = info->FileSize+1;
+ }
+
+ if (off > 0) {
+ err = uefi_call_wrapper(handle->SetPosition, 2, handle, off);
+ if (EFI_ERROR(err))
+ return err;
+ }
+
+ buf = AllocatePool(size + 1);
+ err = uefi_call_wrapper(handle->Read, 3, handle, &size, buf);
+ if (!EFI_ERROR(err)) {
+ buf[size] = '\0';
+ *content = buf;
+ buf = NULL;
+ if (content_size)
+ *content_size = size;
+ }
+ uefi_call_wrapper(handle->Close, 1, handle);
+
+ return err;
+}
diff --git a/src/boot/efi/util.h b/src/boot/efi/util.h
new file mode 100644
index 0000000..4ba7050
--- /dev/null
+++ b/src/boot/efi/util.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <efi.h>
+#include <efilib.h>
+
+#define ELEMENTSOF(x) (sizeof(x)/sizeof((x)[0]))
+#define OFFSETOF(x,y) __builtin_offsetof(x,y)
+
+static inline const CHAR16 *yes_no(BOOLEAN b) {
+ return b ? L"yes" : L"no";
+}
+
+EFI_STATUS parse_boolean(const CHAR8 *v, BOOLEAN *b);
+
+UINT64 ticks_read(void);
+UINT64 ticks_freq(void);
+UINT64 time_usec(void);
+
+EFI_STATUS efivar_set(const CHAR16 *name, const CHAR16 *value, BOOLEAN persistent);
+EFI_STATUS efivar_set_raw(const EFI_GUID *vendor, const CHAR16 *name, const VOID *buf, UINTN size, BOOLEAN persistent);
+EFI_STATUS efivar_set_int(CHAR16 *name, UINTN i, BOOLEAN persistent);
+VOID efivar_set_time_usec(CHAR16 *name, UINT64 usec);
+
+EFI_STATUS efivar_get(const CHAR16 *name, CHAR16 **value);
+EFI_STATUS efivar_get_raw(const EFI_GUID *vendor, const CHAR16 *name, CHAR8 **buffer, UINTN *size);
+EFI_STATUS efivar_get_int(const CHAR16 *name, UINTN *i);
+
+CHAR8 *strchra(CHAR8 *s, CHAR8 c);
+CHAR16 *stra_to_path(CHAR8 *stra);
+CHAR16 *stra_to_str(CHAR8 *stra);
+
+EFI_STATUS file_read(EFI_FILE_HANDLE dir, const CHAR16 *name, UINTN off, UINTN size, CHAR8 **content, UINTN *content_size);
+
+static inline void FreePoolp(void *p) {
+ void *q = *(void**) p;
+
+ if (!q)
+ return;
+
+ FreePool(q);
+}
+
+#define _cleanup_(x) __attribute__((__cleanup__(x)))
+#define _cleanup_freepool_ _cleanup_(FreePoolp)
+
+static inline void FileHandleClosep(EFI_FILE_HANDLE *handle) {
+ if (!*handle)
+ return;
+
+ uefi_call_wrapper((*handle)->Close, 1, *handle);
+}
+
+const EFI_GUID loader_guid;
+
+#define UINTN_MAX (~(UINTN)0)
+#define INTN_MAX ((INTN)(UINTN_MAX>>1))
diff --git a/src/busctl/busctl-introspect.c b/src/busctl/busctl-introspect.c
new file mode 100644
index 0000000..18254ef
--- /dev/null
+++ b/src/busctl/busctl-introspect.c
@@ -0,0 +1,732 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "busctl-introspect.h"
+#include "string-util.h"
+#include "util.h"
+#include "xml.h"
+
+#define NODE_DEPTH_MAX 16
+
+typedef struct Context {
+ const XMLIntrospectOps *ops;
+ void *userdata;
+
+ char *interface_name;
+ uint64_t interface_flags;
+
+ char *member_name;
+ char *member_signature;
+ char *member_result;
+ uint64_t member_flags;
+ bool member_writable;
+
+ const char *current;
+ void *xml_state;
+} Context;
+
+static void context_reset_member(Context *c) {
+ free(c->member_name);
+ free(c->member_signature);
+ free(c->member_result);
+
+ c->member_name = c->member_signature = c->member_result = NULL;
+ c->member_flags = 0;
+ c->member_writable = false;
+}
+
+static void context_reset_interface(Context *c) {
+ c->interface_name = mfree(c->interface_name);
+ c->interface_flags = 0;
+
+ context_reset_member(c);
+}
+
+static int parse_xml_annotation(Context *context, uint64_t *flags) {
+
+ enum {
+ STATE_ANNOTATION,
+ STATE_NAME,
+ STATE_VALUE
+ } state = STATE_ANNOTATION;
+
+ _cleanup_free_ char *field = NULL, *value = NULL;
+
+ assert(context);
+
+ for (;;) {
+ _cleanup_free_ char *name = NULL;
+
+ int t;
+
+ t = xml_tokenize(&context->current, &name, &context->xml_state, NULL);
+ if (t < 0) {
+ log_error("XML parse error.");
+ return t;
+ }
+
+ if (t == XML_END)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Premature end of XML data.");
+
+ switch (state) {
+
+ case STATE_ANNOTATION:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+
+ if (streq_ptr(name, "name"))
+ state = STATE_NAME;
+
+ else if (streq_ptr(name, "value"))
+ state = STATE_VALUE;
+
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <annotation> attribute %s.",
+ name);
+
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "annotation"))) {
+
+ if (flags) {
+ if (streq_ptr(field, "org.freedesktop.DBus.Deprecated")) {
+
+ if (streq_ptr(value, "true"))
+ *flags |= SD_BUS_VTABLE_DEPRECATED;
+
+ } else if (streq_ptr(field, "org.freedesktop.DBus.Method.NoReply")) {
+
+ if (streq_ptr(value, "true"))
+ *flags |= SD_BUS_VTABLE_METHOD_NO_REPLY;
+
+ } else if (streq_ptr(field, "org.freedesktop.DBus.Property.EmitsChangedSignal")) {
+
+ if (streq_ptr(value, "const"))
+ *flags = (*flags & ~(SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION|SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE)) | SD_BUS_VTABLE_PROPERTY_CONST;
+ else if (streq_ptr(value, "invalidates"))
+ *flags = (*flags & ~(SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE|SD_BUS_VTABLE_PROPERTY_CONST)) | SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION;
+ else if (streq_ptr(value, "false"))
+ *flags = *flags & ~(SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE|SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION);
+ }
+ }
+
+ return 0;
+
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <annotation>. (1)");
+
+ break;
+
+ case STATE_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ free_and_replace(field, name);
+
+ state = STATE_ANNOTATION;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <annotation>. (2)");
+
+ break;
+
+ case STATE_VALUE:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ free_and_replace(value, name);
+
+ state = STATE_ANNOTATION;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <annotation>. (3)");
+
+ break;
+
+ default:
+ assert_not_reached("Bad state");
+ }
+ }
+}
+
+static int parse_xml_node(Context *context, const char *prefix, unsigned n_depth) {
+
+ enum {
+ STATE_NODE,
+ STATE_NODE_NAME,
+ STATE_INTERFACE,
+ STATE_INTERFACE_NAME,
+ STATE_METHOD,
+ STATE_METHOD_NAME,
+ STATE_METHOD_ARG,
+ STATE_METHOD_ARG_NAME,
+ STATE_METHOD_ARG_TYPE,
+ STATE_METHOD_ARG_DIRECTION,
+ STATE_SIGNAL,
+ STATE_SIGNAL_NAME,
+ STATE_SIGNAL_ARG,
+ STATE_SIGNAL_ARG_NAME,
+ STATE_SIGNAL_ARG_TYPE,
+ STATE_SIGNAL_ARG_DIRECTION,
+ STATE_PROPERTY,
+ STATE_PROPERTY_NAME,
+ STATE_PROPERTY_TYPE,
+ STATE_PROPERTY_ACCESS,
+ } state = STATE_NODE;
+
+ _cleanup_free_ char *node_path = NULL, *argument_type = NULL, *argument_direction = NULL;
+ const char *np = prefix;
+ int r;
+
+ assert(context);
+ assert(prefix);
+
+ if (n_depth > NODE_DEPTH_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "<node> depth too high.");
+
+ for (;;) {
+ _cleanup_free_ char *name = NULL;
+ int t;
+
+ t = xml_tokenize(&context->current, &name, &context->xml_state, NULL);
+ if (t < 0) {
+ log_error("XML parse error.");
+ return t;
+ }
+
+ if (t == XML_END)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Premature end of XML data.");
+
+ switch (state) {
+
+ case STATE_NODE:
+ if (t == XML_ATTRIBUTE_NAME) {
+
+ if (streq_ptr(name, "name"))
+ state = STATE_NODE_NAME;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <node> attribute %s.", name);
+
+ } else if (t == XML_TAG_OPEN) {
+
+ if (streq_ptr(name, "interface"))
+ state = STATE_INTERFACE;
+ else if (streq_ptr(name, "node")) {
+
+ r = parse_xml_node(context, np, n_depth+1);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <node> tag %s.", name);
+
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "node"))) {
+
+ if (context->ops->on_path) {
+ r = context->ops->on_path(node_path ? node_path : np, context->userdata);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <node>. (1)");
+
+ break;
+
+ case STATE_NODE_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+
+ free(node_path);
+
+ if (name[0] == '/')
+ node_path = TAKE_PTR(name);
+ else {
+
+ if (endswith(prefix, "/"))
+ node_path = strappend(prefix, name);
+ else
+ node_path = strjoin(prefix, "/", name);
+ if (!node_path)
+ return log_oom();
+ }
+
+ np = node_path;
+ state = STATE_NODE;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <node>. (2)");
+
+ break;
+
+ case STATE_INTERFACE:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+ if (streq_ptr(name, "name"))
+ state = STATE_INTERFACE_NAME;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <interface> attribute %s.",
+ name);
+
+ } else if (t == XML_TAG_OPEN) {
+ if (streq_ptr(name, "method"))
+ state = STATE_METHOD;
+ else if (streq_ptr(name, "signal"))
+ state = STATE_SIGNAL;
+ else if (streq_ptr(name, "property")) {
+ context->member_flags |= SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE;
+ state = STATE_PROPERTY;
+ } else if (streq_ptr(name, "annotation")) {
+ r = parse_xml_annotation(context, &context->interface_flags);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected <interface> tag %s.", name);
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "interface"))) {
+
+ if (n_depth == 0) {
+ if (context->ops->on_interface) {
+ r = context->ops->on_interface(context->interface_name, context->interface_flags, context->userdata);
+ if (r < 0)
+ return r;
+ }
+
+ context_reset_interface(context);
+ }
+
+ state = STATE_NODE;
+
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <interface>. (1)");
+
+ break;
+
+ case STATE_INTERFACE_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ if (n_depth == 0)
+ free_and_replace(context->interface_name, name);
+
+ state = STATE_INTERFACE;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <interface>. (2)");
+
+ break;
+
+ case STATE_METHOD:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+ if (streq_ptr(name, "name"))
+ state = STATE_METHOD_NAME;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <method> attribute %s",
+ name);
+ } else if (t == XML_TAG_OPEN) {
+ if (streq_ptr(name, "arg"))
+ state = STATE_METHOD_ARG;
+ else if (streq_ptr(name, "annotation")) {
+ r = parse_xml_annotation(context, &context->member_flags);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected <method> tag %s.",
+ name);
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "method"))) {
+
+ if (n_depth == 0) {
+ if (context->ops->on_method) {
+ r = context->ops->on_method(context->interface_name, context->member_name, context->member_signature, context->member_result, context->member_flags, context->userdata);
+ if (r < 0)
+ return r;
+ }
+
+ context_reset_member(context);
+ }
+
+ state = STATE_INTERFACE;
+
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <method> (1).");
+
+ break;
+
+ case STATE_METHOD_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ if (n_depth == 0)
+ free_and_replace(context->member_name, name);
+
+ state = STATE_METHOD;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <method> (2).");
+
+ break;
+
+ case STATE_METHOD_ARG:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+ if (streq_ptr(name, "name"))
+ state = STATE_METHOD_ARG_NAME;
+ else if (streq_ptr(name, "type"))
+ state = STATE_METHOD_ARG_TYPE;
+ else if (streq_ptr(name, "direction"))
+ state = STATE_METHOD_ARG_DIRECTION;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected method <arg> attribute %s.",
+ name);
+ } else if (t == XML_TAG_OPEN) {
+ if (streq_ptr(name, "annotation")) {
+ r = parse_xml_annotation(context, NULL);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected method <arg> tag %s.",
+ name);
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "arg"))) {
+
+ if (n_depth == 0) {
+
+ if (argument_type) {
+ if (!argument_direction || streq(argument_direction, "in")) {
+ if (!strextend(&context->member_signature, argument_type, NULL))
+ return log_oom();
+ } else if (streq(argument_direction, "out")) {
+ if (!strextend(&context->member_result, argument_type, NULL))
+ return log_oom();
+ } else
+ log_error("Unexpected method <arg> direction value '%s'.", argument_direction);
+ }
+
+ argument_type = mfree(argument_type);
+ argument_direction = mfree(argument_direction);
+ }
+
+ state = STATE_METHOD;
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in method <arg>. (1)");
+
+ break;
+
+ case STATE_METHOD_ARG_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE)
+ state = STATE_METHOD_ARG;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in method <arg>. (2)");
+
+ break;
+
+ case STATE_METHOD_ARG_TYPE:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ free_and_replace(argument_type, name);
+
+ state = STATE_METHOD_ARG;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in method <arg>. (3)");
+
+ break;
+
+ case STATE_METHOD_ARG_DIRECTION:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ free_and_replace(argument_direction, name);
+
+ state = STATE_METHOD_ARG;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in method <arg>. (4)");
+
+ break;
+
+ case STATE_SIGNAL:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+ if (streq_ptr(name, "name"))
+ state = STATE_SIGNAL_NAME;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <signal> attribute %s.",
+ name);
+ } else if (t == XML_TAG_OPEN) {
+ if (streq_ptr(name, "arg"))
+ state = STATE_SIGNAL_ARG;
+ else if (streq_ptr(name, "annotation")) {
+ r = parse_xml_annotation(context, &context->member_flags);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected <signal> tag %s.",
+ name);
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "signal"))) {
+
+ if (n_depth == 0) {
+ if (context->ops->on_signal) {
+ r = context->ops->on_signal(context->interface_name, context->member_name, context->member_signature, context->member_flags, context->userdata);
+ if (r < 0)
+ return r;
+ }
+
+ context_reset_member(context);
+ }
+
+ state = STATE_INTERFACE;
+
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <signal>. (1)");
+
+ break;
+
+ case STATE_SIGNAL_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ if (n_depth == 0)
+ free_and_replace(context->member_name, name);
+
+ state = STATE_SIGNAL;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <signal>. (2)");
+
+ break;
+
+ case STATE_SIGNAL_ARG:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+ if (streq_ptr(name, "name"))
+ state = STATE_SIGNAL_ARG_NAME;
+ else if (streq_ptr(name, "type"))
+ state = STATE_SIGNAL_ARG_TYPE;
+ else if (streq_ptr(name, "direction"))
+ state = STATE_SIGNAL_ARG_DIRECTION;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected signal <arg> attribute %s.",
+ name);
+ } else if (t == XML_TAG_OPEN) {
+ if (streq_ptr(name, "annotation")) {
+ r = parse_xml_annotation(context, NULL);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected signal <arg> tag %s.",
+ name);
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "arg"))) {
+
+ if (argument_type) {
+ if (!argument_direction || streq(argument_direction, "out")) {
+ if (!strextend(&context->member_signature, argument_type, NULL))
+ return log_oom();
+ } else
+ log_error("Unexpected signal <arg> direction value '%s'.", argument_direction);
+
+ argument_type = mfree(argument_type);
+ }
+
+ state = STATE_SIGNAL;
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in signal <arg> (1).");
+
+ break;
+
+ case STATE_SIGNAL_ARG_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE)
+ state = STATE_SIGNAL_ARG;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in signal <arg> (2).");
+
+ break;
+
+ case STATE_SIGNAL_ARG_TYPE:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ free_and_replace(argument_type, name);
+
+ state = STATE_SIGNAL_ARG;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in signal <arg> (3).");
+
+ break;
+
+ case STATE_SIGNAL_ARG_DIRECTION:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ free_and_replace(argument_direction, name);
+
+ state = STATE_SIGNAL_ARG;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in signal <arg>. (4)");
+
+ break;
+
+ case STATE_PROPERTY:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+ if (streq_ptr(name, "name"))
+ state = STATE_PROPERTY_NAME;
+ else if (streq_ptr(name, "type"))
+ state = STATE_PROPERTY_TYPE;
+ else if (streq_ptr(name, "access"))
+ state = STATE_PROPERTY_ACCESS;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <property> attribute %s.",
+ name);
+ } else if (t == XML_TAG_OPEN) {
+
+ if (streq_ptr(name, "annotation")) {
+ r = parse_xml_annotation(context, &context->member_flags);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected <property> tag %s.",
+ name);
+
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "property"))) {
+
+ if (n_depth == 0) {
+ if (context->ops->on_property) {
+ r = context->ops->on_property(context->interface_name, context->member_name, context->member_signature, context->member_writable, context->member_flags, context->userdata);
+ if (r < 0)
+ return r;
+ }
+
+ context_reset_member(context);
+ }
+
+ state = STATE_INTERFACE;
+
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <property>. (1)");
+
+ break;
+
+ case STATE_PROPERTY_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ if (n_depth == 0)
+ free_and_replace(context->member_name, name);
+
+ state = STATE_PROPERTY;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <property>. (2)");
+
+ break;
+
+ case STATE_PROPERTY_TYPE:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ if (n_depth == 0)
+ free_and_replace(context->member_signature, name);
+
+ state = STATE_PROPERTY;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <property>. (3)");
+
+ break;
+
+ case STATE_PROPERTY_ACCESS:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+
+ if (streq(name, "readwrite") || streq(name, "write"))
+ context->member_writable = true;
+
+ state = STATE_PROPERTY;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <property>. (4)");
+
+ break;
+ }
+ }
+}
+
+int parse_xml_introspect(const char *prefix, const char *xml, const XMLIntrospectOps *ops, void *userdata) {
+ Context context = {
+ .ops = ops,
+ .userdata = userdata,
+ .current = xml,
+ };
+
+ int r;
+
+ assert(prefix);
+ assert(xml);
+ assert(ops);
+
+ for (;;) {
+ _cleanup_free_ char *name = NULL;
+
+ r = xml_tokenize(&context.current, &name, &context.xml_state, NULL);
+ if (r < 0) {
+ log_error("XML parse error");
+ goto finish;
+ }
+
+ if (r == XML_END) {
+ r = 0;
+ break;
+ }
+
+ if (r == XML_TAG_OPEN) {
+
+ if (streq(name, "node")) {
+ r = parse_xml_node(&context, prefix, 0);
+ if (r < 0)
+ goto finish;
+ } else {
+ log_error("Unexpected tag '%s' in introspection data.", name);
+ r = -EBADMSG;
+ goto finish;
+ }
+ } else if (r != XML_TEXT || !in_charset(name, WHITESPACE)) {
+ log_error("Unexpected token.");
+ r = -EBADMSG;
+ goto finish;
+ }
+ }
+
+finish:
+ context_reset_interface(&context);
+
+ return r;
+}
diff --git a/src/busctl/busctl-introspect.h b/src/busctl/busctl-introspect.h
new file mode 100644
index 0000000..b17800e
--- /dev/null
+++ b/src/busctl/busctl-introspect.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+typedef struct XMLIntrospectOps {
+ int (*on_path)(const char *path, void *userdata);
+ int (*on_interface)(const char *name, uint64_t flags, void *userdata);
+ int (*on_method)(const char *interface, const char *name, const char *signature, const char *result, uint64_t flags, void *userdata);
+ int (*on_signal)(const char *interface, const char *name, const char *signature, uint64_t flags, void *userdata);
+ int (*on_property)(const char *interface, const char *name, const char *signature, bool writable, uint64_t flags, void *userdata);
+} XMLIntrospectOps;
+
+int parse_xml_introspect(const char *prefix, const char *xml, const XMLIntrospectOps *ops, void *userdata);
diff --git a/src/busctl/busctl.c b/src/busctl/busctl.c
new file mode 100644
index 0000000..08d9e70
--- /dev/null
+++ b/src/busctl/busctl.c
@@ -0,0 +1,2471 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <stdio_ext.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-signature.h"
+#include "bus-type.h"
+#include "bus-util.h"
+#include "busctl-introspect.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "json.h"
+#include "locale-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "set.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "verbs.h"
+
+static enum {
+ JSON_OFF,
+ JSON_SHORT,
+ JSON_PRETTY,
+} arg_json = JSON_OFF;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static const char *arg_address = NULL;
+static bool arg_unique = false;
+static bool arg_acquired = false;
+static bool arg_activatable = false;
+static bool arg_show_machine = false;
+static char **arg_matches = NULL;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static bool arg_user = false;
+static size_t arg_snaplen = 4096;
+static bool arg_list = false;
+static bool arg_quiet = false;
+static bool arg_verbose = false;
+static bool arg_expect_reply = true;
+static bool arg_auto_start = true;
+static bool arg_allow_interactive_authorization = true;
+static bool arg_augment_creds = true;
+static bool arg_watch_bind = false;
+static usec_t arg_timeout = 0;
+
+STATIC_DESTRUCTOR_REGISTER(arg_matches, strv_freep);
+
+#define NAME_IS_ACQUIRED INT_TO_PTR(1)
+#define NAME_IS_ACTIVATABLE INT_TO_PTR(2)
+
+static int json_transform_message(sd_bus_message *m, JsonVariant **ret);
+static void json_dump_with_flags(JsonVariant *v, FILE *f);
+
+static int acquire_bus(bool set_monitor, sd_bus **ret) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate bus: %m");
+
+ if (set_monitor) {
+ r = sd_bus_set_monitor(bus, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set monitor mode: %m");
+
+ r = sd_bus_negotiate_creds(bus, true, _SD_BUS_CREDS_ALL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable credentials: %m");
+
+ r = sd_bus_negotiate_timestamp(bus, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable timestamps: %m");
+
+ r = sd_bus_negotiate_fds(bus, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable fds: %m");
+ }
+
+ r = sd_bus_set_bus_client(bus, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set bus client: %m");
+
+ r = sd_bus_set_watch_bind(bus, arg_watch_bind);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set watch-bind setting to '%s': %m", yes_no(arg_watch_bind));
+
+ if (arg_address)
+ r = sd_bus_set_address(bus, arg_address);
+ else {
+ switch (arg_transport) {
+
+ case BUS_TRANSPORT_LOCAL:
+ if (arg_user) {
+ bus->is_user = true;
+ r = bus_set_address_user(bus);
+ } else {
+ bus->is_system = true;
+ r = bus_set_address_system(bus);
+ }
+ break;
+
+ case BUS_TRANSPORT_REMOTE:
+ r = bus_set_address_system_remote(bus, arg_host);
+ break;
+
+ case BUS_TRANSPORT_MACHINE:
+ r = bus_set_address_system_machine(bus, arg_host);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unknown transport type.");
+ }
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to set address: %m");
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to bus: %m");
+
+ *ret = TAKE_PTR(bus);
+
+ return 0;
+}
+
+static int list_bus_names(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_strv_free_ char **acquired = NULL, **activatable = NULL;
+ _cleanup_free_ char **merged = NULL;
+ _cleanup_hashmap_free_ Hashmap *names = NULL;
+ char **i;
+ int r;
+ size_t max_i = 0;
+ unsigned n = 0;
+ void *v;
+ char *k;
+ Iterator iterator;
+
+ if (!arg_unique && !arg_acquired && !arg_activatable)
+ arg_unique = arg_acquired = arg_activatable = true;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_list_names(bus, (arg_acquired || arg_unique) ? &acquired : NULL, arg_activatable ? &activatable : NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list names: %m");
+
+ (void) pager_open(arg_pager_flags);
+
+ names = hashmap_new(&string_hash_ops);
+ if (!names)
+ return log_oom();
+
+ STRV_FOREACH(i, acquired) {
+ max_i = MAX(max_i, strlen(*i));
+
+ r = hashmap_put(names, *i, NAME_IS_ACQUIRED);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add to hashmap: %m");
+ }
+
+ STRV_FOREACH(i, activatable) {
+ max_i = MAX(max_i, strlen(*i));
+
+ r = hashmap_put(names, *i, NAME_IS_ACTIVATABLE);
+ if (r < 0 && r != -EEXIST)
+ return log_error_errno(r, "Failed to add to hashmap: %m");
+ }
+
+ merged = new(char*, hashmap_size(names) + 1);
+ if (!merged)
+ return log_oom();
+
+ HASHMAP_FOREACH_KEY(v, k, names, iterator)
+ merged[n++] = k;
+
+ merged[n] = NULL;
+ strv_sort(merged);
+
+ if (arg_legend) {
+ printf("%-*s %*s %-*s %-*s %-*s %-*s %-*s %-*s",
+ (int) max_i, "NAME", 10, "PID", 15, "PROCESS", 16, "USER", 13, "CONNECTION", 25, "UNIT", 10, "SESSION", 19, "DESCRIPTION");
+
+ if (arg_show_machine)
+ puts(" MACHINE");
+ else
+ putchar('\n');
+ }
+
+ STRV_FOREACH(i, merged) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ sd_id128_t mid;
+
+ if (hashmap_get(names, *i) == NAME_IS_ACTIVATABLE) {
+ /* Activatable */
+
+ printf("%-*s", (int) max_i, *i);
+ printf(" - - - (activatable) - - ");
+ if (arg_show_machine)
+ puts(" -");
+ else
+ putchar('\n');
+ continue;
+
+ }
+
+ if (!arg_unique && (*i)[0] == ':')
+ continue;
+
+ if (!arg_acquired && (*i)[0] != ':')
+ continue;
+
+ printf("%-*s", (int) max_i, *i);
+
+ r = sd_bus_get_name_creds(
+ bus, *i,
+ (arg_augment_creds ? SD_BUS_CREDS_AUGMENT : 0) |
+ SD_BUS_CREDS_EUID|SD_BUS_CREDS_PID|SD_BUS_CREDS_COMM|
+ SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_SESSION|
+ SD_BUS_CREDS_DESCRIPTION, &creds);
+ if (r >= 0) {
+ const char *unique, *session, *unit, *cn;
+ pid_t pid;
+ uid_t uid;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r >= 0) {
+ const char *comm = NULL;
+
+ sd_bus_creds_get_comm(creds, &comm);
+
+ printf(" %10lu %-15s", (unsigned long) pid, strna(comm));
+ } else
+ fputs(" - - ", stdout);
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r >= 0) {
+ _cleanup_free_ char *u = NULL;
+
+ u = uid_to_name(uid);
+ if (!u)
+ return log_oom();
+
+ if (strlen(u) > 16)
+ u[16] = 0;
+
+ printf(" %-16s", u);
+ } else
+ fputs(" - ", stdout);
+
+ r = sd_bus_creds_get_unique_name(creds, &unique);
+ if (r >= 0)
+ printf(" %-13s", unique);
+ else
+ fputs(" - ", stdout);
+
+ r = sd_bus_creds_get_unit(creds, &unit);
+ if (r >= 0) {
+ _cleanup_free_ char *e;
+
+ e = ellipsize(unit, 25, 100);
+ if (!e)
+ return log_oom();
+
+ printf(" %-25s", e);
+ } else
+ fputs(" - ", stdout);
+
+ r = sd_bus_creds_get_session(creds, &session);
+ if (r >= 0)
+ printf(" %-10s", session);
+ else
+ fputs(" - ", stdout);
+
+ r = sd_bus_creds_get_description(creds, &cn);
+ if (r >= 0)
+ printf(" %-19s", cn);
+ else
+ fputs(" - ", stdout);
+
+ } else
+ printf(" - - - - - - - ");
+
+ if (arg_show_machine) {
+ r = sd_bus_get_name_machine_id(bus, *i, &mid);
+ if (r >= 0) {
+ char m[SD_ID128_STRING_MAX];
+ printf(" %s\n", sd_id128_to_string(mid, m));
+ } else
+ puts(" -");
+ } else
+ putchar('\n');
+ }
+
+ return 0;
+}
+
+static void print_subtree(const char *prefix, const char *path, char **l) {
+ const char *vertical, *space;
+ char **n;
+
+ /* We assume the list is sorted. Let's first skip over the
+ * entry we are looking at. */
+ for (;;) {
+ if (!*l)
+ return;
+
+ if (!streq(*l, path))
+ break;
+
+ l++;
+ }
+
+ vertical = strjoina(prefix, special_glyph(SPECIAL_GLYPH_TREE_VERTICAL));
+ space = strjoina(prefix, special_glyph(SPECIAL_GLYPH_TREE_SPACE));
+
+ for (;;) {
+ bool has_more = false;
+
+ if (!*l || !path_startswith(*l, path))
+ break;
+
+ n = l + 1;
+ for (;;) {
+ if (!*n || !path_startswith(*n, path))
+ break;
+
+ if (!path_startswith(*n, *l)) {
+ has_more = true;
+ break;
+ }
+
+ n++;
+ }
+
+ printf("%s%s%s\n", prefix, special_glyph(has_more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT), *l);
+
+ print_subtree(has_more ? vertical : space, *l, l);
+ l = n;
+ }
+}
+
+static void print_tree(const char *prefix, char **l) {
+
+ prefix = strempty(prefix);
+
+ if (arg_list) {
+ char **i;
+
+ STRV_FOREACH(i, l)
+ printf("%s%s\n", prefix, *i);
+ return;
+ }
+
+ if (strv_isempty(l)) {
+ printf("No objects discovered.\n");
+ return;
+ }
+
+ if (streq(l[0], "/") && !l[1]) {
+ printf("Only root object discovered.\n");
+ return;
+ }
+
+ print_subtree(prefix, "/", l);
+}
+
+static int on_path(const char *path, void *userdata) {
+ Set *paths = userdata;
+ int r;
+
+ assert(paths);
+
+ r = set_put_strdup(paths, path);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int find_nodes(sd_bus *bus, const char *service, const char *path, Set *paths, bool many) {
+ static const XMLIntrospectOps ops = {
+ .on_path = on_path,
+ };
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *xml;
+ int r;
+
+ r = sd_bus_call_method(bus, service, path, "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, "");
+ if (r < 0) {
+ if (many)
+ printf("Failed to introspect object %s of service %s: %s\n", path, service, bus_error_message(&error, r));
+ else
+ log_error_errno(r, "Failed to introspect object %s of service %s: %s", path, service, bus_error_message(&error, r));
+ return r;
+ }
+
+ r = sd_bus_message_read(reply, "s", &xml);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return parse_xml_introspect(path, xml, &ops, paths);
+}
+
+static int tree_one(sd_bus *bus, const char *service, const char *prefix, bool many) {
+ _cleanup_set_free_free_ Set *paths = NULL, *done = NULL, *failed = NULL;
+ _cleanup_free_ char **l = NULL;
+ char *m;
+ int r;
+
+ paths = set_new(&string_hash_ops);
+ if (!paths)
+ return log_oom();
+
+ done = set_new(&string_hash_ops);
+ if (!done)
+ return log_oom();
+
+ failed = set_new(&string_hash_ops);
+ if (!failed)
+ return log_oom();
+
+ m = strdup("/");
+ if (!m)
+ return log_oom();
+
+ r = set_put(paths, m);
+ if (r < 0) {
+ free(m);
+ return log_oom();
+ }
+
+ for (;;) {
+ _cleanup_free_ char *p = NULL;
+ int q;
+
+ p = set_steal_first(paths);
+ if (!p)
+ break;
+
+ if (set_contains(done, p) ||
+ set_contains(failed, p))
+ continue;
+
+ q = find_nodes(bus, service, p, paths, many);
+ if (q < 0) {
+ if (r >= 0)
+ r = q;
+
+ q = set_put(failed, p);
+ } else
+ q = set_put(done, p);
+
+ if (q < 0)
+ return log_oom();
+
+ assert(q != 0);
+ p = NULL;
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ l = set_get_strv(done);
+ if (!l)
+ return log_oom();
+
+ strv_sort(l);
+ print_tree(prefix, l);
+
+ fflush(stdout);
+
+ return r;
+}
+
+static int tree(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ char **i;
+ int r = 0;
+
+ if (!arg_unique && !arg_acquired)
+ arg_acquired = true;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ if (argc <= 1) {
+ _cleanup_strv_free_ char **names = NULL;
+ bool not_first = false;
+
+ r = sd_bus_list_names(bus, &names, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get name list: %m");
+
+ (void) pager_open(arg_pager_flags);
+
+ STRV_FOREACH(i, names) {
+ int q;
+
+ if (!arg_unique && (*i)[0] == ':')
+ continue;
+
+ if (!arg_acquired && (*i)[0] == ':')
+ continue;
+
+ if (not_first)
+ printf("\n");
+
+ printf("Service %s%s%s:\n", ansi_highlight(), *i, ansi_normal());
+
+ q = tree_one(bus, *i, NULL, true);
+ if (q < 0 && r >= 0)
+ r = q;
+
+ not_first = true;
+ }
+ } else {
+ STRV_FOREACH(i, argv+1) {
+ int q;
+
+ if (i > argv+1)
+ printf("\n");
+
+ if (argv[2]) {
+ (void) pager_open(arg_pager_flags);
+ printf("Service %s%s%s:\n", ansi_highlight(), *i, ansi_normal());
+ }
+
+ q = tree_one(bus, *i, NULL, !!argv[2]);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
+
+static int format_cmdline(sd_bus_message *m, FILE *f, bool needs_space) {
+ int r;
+
+ for (;;) {
+ const char *contents = NULL;
+ char type;
+ union {
+ uint8_t u8;
+ uint16_t u16;
+ int16_t s16;
+ uint32_t u32;
+ int32_t s32;
+ uint64_t u64;
+ int64_t s64;
+ double d64;
+ const char *string;
+ int i;
+ } basic;
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return needs_space;
+
+ if (bus_type_is_container(type) > 0) {
+
+ r = sd_bus_message_enter_container(m, type, contents);
+ if (r < 0)
+ return r;
+
+ if (type == SD_BUS_TYPE_ARRAY) {
+ unsigned n = 0;
+
+ /* count array entries */
+ for (;;) {
+
+ r = sd_bus_message_skip(m, contents);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ n++;
+ }
+
+ r = sd_bus_message_rewind(m, false);
+ if (r < 0)
+ return r;
+
+ if (needs_space)
+ fputc(' ', f);
+
+ fprintf(f, "%u", n);
+ needs_space = true;
+
+ } else if (type == SD_BUS_TYPE_VARIANT) {
+
+ if (needs_space)
+ fputc(' ', f);
+
+ fprintf(f, "%s", contents);
+ needs_space = true;
+ }
+
+ r = format_cmdline(m, f, needs_space);
+ if (r < 0)
+ return r;
+
+ needs_space = r > 0;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ r = sd_bus_message_read_basic(m, type, &basic);
+ if (r < 0)
+ return r;
+
+ if (needs_space)
+ fputc(' ', f);
+
+ switch (type) {
+ case SD_BUS_TYPE_BYTE:
+ fprintf(f, "%u", basic.u8);
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ fputs(true_false(basic.i), f);
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ fprintf(f, "%i", basic.s16);
+ break;
+
+ case SD_BUS_TYPE_UINT16:
+ fprintf(f, "%u", basic.u16);
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ fprintf(f, "%i", basic.s32);
+ break;
+
+ case SD_BUS_TYPE_UINT32:
+ fprintf(f, "%u", basic.u32);
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ fprintf(f, "%" PRIi64, basic.s64);
+ break;
+
+ case SD_BUS_TYPE_UINT64:
+ fprintf(f, "%" PRIu64, basic.u64);
+ break;
+
+ case SD_BUS_TYPE_DOUBLE:
+ fprintf(f, "%g", basic.d64);
+ break;
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE: {
+ _cleanup_free_ char *b = NULL;
+
+ b = cescape(basic.string);
+ if (!b)
+ return -ENOMEM;
+
+ fprintf(f, "\"%s\"", b);
+ break;
+ }
+
+ case SD_BUS_TYPE_UNIX_FD:
+ fprintf(f, "%i", basic.i);
+ break;
+
+ default:
+ assert_not_reached("Unknown basic type.");
+ }
+
+ needs_space = true;
+ }
+}
+
+typedef struct Member {
+ const char *type;
+ char *interface;
+ char *name;
+ char *signature;
+ char *result;
+ char *value;
+ bool writable;
+ uint64_t flags;
+} Member;
+
+static void member_hash_func(const Member *m, struct siphash *state) {
+ uint64_t arity = 1;
+
+ assert(m);
+ assert(m->type);
+
+ string_hash_func(m->type, state);
+
+ arity += !!m->name + !!m->interface;
+
+ uint64_hash_func(&arity, state);
+
+ if (m->name)
+ string_hash_func(m->name, state);
+
+ if (m->interface)
+ string_hash_func(m->interface, state);
+}
+
+static int member_compare_func(const Member *x, const Member *y) {
+ int d;
+
+ assert(x);
+ assert(y);
+ assert(x->type);
+ assert(y->type);
+
+ d = strcmp_ptr(x->interface, y->interface);
+ if (d != 0)
+ return d;
+
+ d = strcmp(x->type, y->type);
+ if (d != 0)
+ return d;
+
+ return strcmp_ptr(x->name, y->name);
+}
+
+static int member_compare_funcp(Member * const *a, Member * const *b) {
+ return member_compare_func(*a, *b);
+}
+
+static void member_free(Member *m) {
+ if (!m)
+ return;
+
+ free(m->interface);
+ free(m->name);
+ free(m->signature);
+ free(m->result);
+ free(m->value);
+ free(m);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Member*, member_free);
+
+static void member_set_free(Set *s) {
+ set_free_with_destructor(s, member_free);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Set*, member_set_free);
+
+static int on_interface(const char *interface, uint64_t flags, void *userdata) {
+ _cleanup_(member_freep) Member *m;
+ Set *members = userdata;
+ int r;
+
+ assert(interface);
+ assert(members);
+
+ m = new0(Member, 1);
+ if (!m)
+ return log_oom();
+
+ m->type = "interface";
+ m->flags = flags;
+
+ r = free_and_strdup(&m->interface, interface);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(members, m);
+ if (r <= 0) {
+ log_error("Duplicate interface");
+ return -EINVAL;
+ }
+
+ m = NULL;
+ return 0;
+}
+
+static int on_method(const char *interface, const char *name, const char *signature, const char *result, uint64_t flags, void *userdata) {
+ _cleanup_(member_freep) Member *m;
+ Set *members = userdata;
+ int r;
+
+ assert(interface);
+ assert(name);
+
+ m = new0(Member, 1);
+ if (!m)
+ return log_oom();
+
+ m->type = "method";
+ m->flags = flags;
+
+ r = free_and_strdup(&m->interface, interface);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->name, name);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->signature, signature);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->result, result);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(members, m);
+ if (r <= 0) {
+ log_error("Duplicate method");
+ return -EINVAL;
+ }
+
+ m = NULL;
+ return 0;
+}
+
+static int on_signal(const char *interface, const char *name, const char *signature, uint64_t flags, void *userdata) {
+ _cleanup_(member_freep) Member *m;
+ Set *members = userdata;
+ int r;
+
+ assert(interface);
+ assert(name);
+
+ m = new0(Member, 1);
+ if (!m)
+ return log_oom();
+
+ m->type = "signal";
+ m->flags = flags;
+
+ r = free_and_strdup(&m->interface, interface);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->name, name);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->signature, signature);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(members, m);
+ if (r <= 0) {
+ log_error("Duplicate signal");
+ return -EINVAL;
+ }
+
+ m = NULL;
+ return 0;
+}
+
+static int on_property(const char *interface, const char *name, const char *signature, bool writable, uint64_t flags, void *userdata) {
+ _cleanup_(member_freep) Member *m;
+ Set *members = userdata;
+ int r;
+
+ assert(interface);
+ assert(name);
+
+ m = new0(Member, 1);
+ if (!m)
+ return log_oom();
+
+ m->type = "property";
+ m->flags = flags;
+ m->writable = writable;
+
+ r = free_and_strdup(&m->interface, interface);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->name, name);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->signature, signature);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(members, m);
+ if (r <= 0) {
+ log_error("Duplicate property");
+ return -EINVAL;
+ }
+
+ m = NULL;
+ return 0;
+}
+
+DEFINE_PRIVATE_HASH_OPS(member_hash_ops, Member, member_hash_func, member_compare_func);
+
+static int introspect(int argc, char **argv, void *userdata) {
+ static const XMLIntrospectOps ops = {
+ .on_interface = on_interface,
+ .on_method = on_method,
+ .on_signal = on_signal,
+ .on_property = on_property,
+ };
+
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply_xml = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(member_set_freep) Set *members = NULL;
+ unsigned name_width, type_width, signature_width, result_width, j, k = 0;
+ Member *m, **sorted = NULL;
+ Iterator i;
+ const char *xml;
+ int r;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ members = set_new(&member_hash_ops);
+ if (!members)
+ return log_oom();
+
+ r = sd_bus_call_method(bus, argv[1], argv[2], "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply_xml, "");
+ if (r < 0)
+ return log_error_errno(r, "Failed to introspect object %s of service %s: %s", argv[2], argv[1], bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply_xml, "s", &xml);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ /* First, get list of all properties */
+ r = parse_xml_introspect(argv[2], xml, &ops, members);
+ if (r < 0)
+ return r;
+
+ /* Second, find the current values for them */
+ SET_FOREACH(m, members, i) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+
+ if (!streq(m->type, "property"))
+ continue;
+
+ if (m->value)
+ continue;
+
+ if (argv[3] && !streq(argv[3], m->interface))
+ continue;
+
+ r = sd_bus_call_method(bus, argv[1], argv[2], "org.freedesktop.DBus.Properties", "GetAll", &error, &reply, "s", m->interface);
+ if (r < 0)
+ return log_error_errno(r, "%s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "{sv}");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ Member *z;
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *mf = NULL;
+ size_t sz = 0;
+ const char *name;
+
+ r = sd_bus_message_enter_container(reply, 'e', "sv");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(reply, "s", &name);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_enter_container(reply, 'v', NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ mf = open_memstream(&buf, &sz);
+ if (!mf)
+ return log_oom();
+
+ (void) __fsetlocking(mf, FSETLOCKING_BYCALLER);
+
+ r = format_cmdline(reply, mf, false);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ mf = safe_fclose(mf);
+
+ z = set_get(members, &((Member) {
+ .type = "property",
+ .interface = m->interface,
+ .name = (char*) name }));
+ if (z)
+ free_and_replace(z->value, buf);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ name_width = STRLEN("NAME");
+ type_width = STRLEN("TYPE");
+ signature_width = STRLEN("SIGNATURE");
+ result_width = STRLEN("RESULT/VALUE");
+
+ sorted = newa(Member*, set_size(members));
+
+ SET_FOREACH(m, members, i) {
+
+ if (argv[3] && !streq(argv[3], m->interface))
+ continue;
+
+ if (m->interface)
+ name_width = MAX(name_width, strlen(m->interface));
+ if (m->name)
+ name_width = MAX(name_width, strlen(m->name) + 1);
+ if (m->type)
+ type_width = MAX(type_width, strlen(m->type));
+ if (m->signature)
+ signature_width = MAX(signature_width, strlen(m->signature));
+ if (m->result)
+ result_width = MAX(result_width, strlen(m->result));
+ if (m->value)
+ result_width = MAX(result_width, strlen(m->value));
+
+ sorted[k++] = m;
+ }
+
+ if (result_width > 40)
+ result_width = 40;
+
+ typesafe_qsort(sorted, k, member_compare_funcp);
+
+ if (arg_legend) {
+ printf("%-*s %-*s %-*s %-*s %s\n",
+ (int) name_width, "NAME",
+ (int) type_width, "TYPE",
+ (int) signature_width, "SIGNATURE",
+ (int) result_width, "RESULT/VALUE",
+ "FLAGS");
+ }
+
+ for (j = 0; j < k; j++) {
+ _cleanup_free_ char *ellipsized = NULL;
+ const char *rv;
+ bool is_interface;
+
+ m = sorted[j];
+
+ if (argv[3] && !streq(argv[3], m->interface))
+ continue;
+
+ is_interface = streq(m->type, "interface");
+
+ if (argv[3] && is_interface)
+ continue;
+
+ if (m->value) {
+ ellipsized = ellipsize(m->value, result_width, 100);
+ if (!ellipsized)
+ return log_oom();
+
+ rv = ellipsized;
+ } else
+ rv = empty_to_dash(m->result);
+
+ printf("%s%s%-*s%s %-*s %-*s %-*s%s%s%s%s%s%s\n",
+ is_interface ? ansi_highlight() : "",
+ is_interface ? "" : ".",
+ - !is_interface + (int) name_width, empty_to_dash(streq_ptr(m->type, "interface") ? m->interface : m->name),
+ is_interface ? ansi_normal() : "",
+ (int) type_width, empty_to_dash(m->type),
+ (int) signature_width, empty_to_dash(m->signature),
+ (int) result_width, rv,
+ (m->flags & SD_BUS_VTABLE_DEPRECATED) ? " deprecated" : (m->flags || m->writable ? "" : " -"),
+ (m->flags & SD_BUS_VTABLE_METHOD_NO_REPLY) ? " no-reply" : "",
+ (m->flags & SD_BUS_VTABLE_PROPERTY_CONST) ? " const" : "",
+ (m->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE) ? " emits-change" : "",
+ (m->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION) ? " emits-invalidation" : "",
+ m->writable ? " writable" : "");
+ }
+
+ return 0;
+}
+
+static int message_dump(sd_bus_message *m, FILE *f) {
+ return bus_message_dump(m, f, BUS_MESSAGE_DUMP_WITH_HEADER);
+}
+
+static int message_pcap(sd_bus_message *m, FILE *f) {
+ return bus_message_pcap_frame(m, arg_snaplen, f);
+}
+
+static int message_json(sd_bus_message *m, FILE *f) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *w = NULL;
+ char e[2];
+ int r;
+
+ r = json_transform_message(m, &v);
+ if (r < 0)
+ return r;
+
+ e[0] = m->header->endian;
+ e[1] = 0;
+
+ r = json_build(&w, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("type", JSON_BUILD_STRING(bus_message_type_to_string(m->header->type))),
+ JSON_BUILD_PAIR("endian", JSON_BUILD_STRING(e)),
+ JSON_BUILD_PAIR("flags", JSON_BUILD_INTEGER(m->header->flags)),
+ JSON_BUILD_PAIR("version", JSON_BUILD_INTEGER(m->header->version)),
+ JSON_BUILD_PAIR_CONDITION(m->priority != 0, "priority", JSON_BUILD_INTEGER(m->priority)),
+ JSON_BUILD_PAIR("cookie", JSON_BUILD_INTEGER(BUS_MESSAGE_COOKIE(m))),
+ JSON_BUILD_PAIR_CONDITION(m->reply_cookie != 0, "reply_cookie", JSON_BUILD_INTEGER(m->reply_cookie)),
+ JSON_BUILD_PAIR_CONDITION(m->sender, "sender", JSON_BUILD_STRING(m->sender)),
+ JSON_BUILD_PAIR_CONDITION(m->destination, "destination", JSON_BUILD_STRING(m->destination)),
+ JSON_BUILD_PAIR_CONDITION(m->path, "path", JSON_BUILD_STRING(m->path)),
+ JSON_BUILD_PAIR_CONDITION(m->interface, "interface", JSON_BUILD_STRING(m->interface)),
+ JSON_BUILD_PAIR_CONDITION(m->member, "member", JSON_BUILD_STRING(m->member)),
+ JSON_BUILD_PAIR_CONDITION(m->monotonic != 0, "monotonic", JSON_BUILD_INTEGER(m->monotonic)),
+ JSON_BUILD_PAIR_CONDITION(m->realtime != 0, "realtime", JSON_BUILD_INTEGER(m->realtime)),
+ JSON_BUILD_PAIR_CONDITION(m->seqnum != 0, "seqnum", JSON_BUILD_INTEGER(m->seqnum)),
+ JSON_BUILD_PAIR_CONDITION(m->error.name, "error_name", JSON_BUILD_STRING(m->error.name)),
+ JSON_BUILD_PAIR("payload", JSON_BUILD_VARIANT(v))));
+ if (r < 0)
+ return log_error_errno(r, "Failed to build JSON object: %m");
+
+ json_dump_with_flags(w, f);
+ return 0;
+}
+
+static int monitor(int argc, char **argv, int (*dump)(sd_bus_message *m, FILE *f)) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *message = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char **i;
+ uint32_t flags = 0;
+ const char *unique_name;
+ bool is_monitor = false;
+ int r;
+
+ r = acquire_bus(true, &bus);
+ if (r < 0)
+ return r;
+
+ /* upgrade connection; it's not used for anything else after this call */
+ r = sd_bus_message_new_method_call(bus, &message, "org.freedesktop.DBus", "/org/freedesktop/DBus", "org.freedesktop.DBus.Monitoring", "BecomeMonitor");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(message, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ STRV_FOREACH(i, argv+1) {
+ _cleanup_free_ char *m = NULL;
+
+ if (!service_name_is_valid(*i)) {
+ log_error("Invalid service name '%s'", *i);
+ return -EINVAL;
+ }
+
+ m = strjoin("sender='", *i, "'");
+ if (!m)
+ return log_oom();
+
+ r = sd_bus_message_append_basic(message, 's', m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ free(m);
+ m = strjoin("destination='", *i, "'");
+ if (!m)
+ return log_oom();
+
+ r = sd_bus_message_append_basic(message, 's', m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ STRV_FOREACH(i, arg_matches) {
+ r = sd_bus_message_append_basic(message, 's', *i);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(message);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(message, 'u', &flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, message, arg_timeout, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "%s", bus_error_message(&error, r));
+
+ r = sd_bus_get_unique_name(bus, &unique_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unique name: %m");
+
+ log_info("Monitoring bus message stream.");
+
+ for (;;) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = sd_bus_process(bus, &m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to process bus: %m");
+
+ if (!is_monitor) {
+ const char *name;
+
+ /* wait until we lose our unique name */
+ if (sd_bus_message_is_signal(m, "org.freedesktop.DBus", "NameLost") <= 0)
+ continue;
+
+ r = sd_bus_message_read(m, "s", &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read lost name: %m");
+
+ if (streq(name, unique_name))
+ is_monitor = true;
+
+ continue;
+ }
+
+ if (m) {
+ dump(m, stdout);
+ fflush(stdout);
+
+ if (sd_bus_message_is_signal(m, "org.freedesktop.DBus.Local", "Disconnected") > 0) {
+ log_info("Connection terminated, exiting.");
+ return 0;
+ }
+
+ continue;
+ }
+
+ if (r > 0)
+ continue;
+
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for bus: %m");
+ }
+}
+
+static int verb_monitor(int argc, char **argv, void *userdata) {
+ return monitor(argc, argv, arg_json != JSON_OFF ? message_json : message_dump);
+}
+
+static int verb_capture(int argc, char **argv, void *userdata) {
+ int r;
+
+ if (isatty(fileno(stdout)) > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Refusing to write message data to console, please redirect output to a file.");
+
+ bus_pcap_header(arg_snaplen, stdout);
+
+ r = monitor(argc, argv, message_pcap);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(stdout);
+ if (r < 0)
+ return log_error_errno(r, "Couldn't write capture file: %m");
+
+ return r;
+}
+
+static int status(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid;
+ int r;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ if (!isempty(argv[1])) {
+ r = parse_pid(argv[1], &pid);
+ if (r < 0)
+ r = sd_bus_get_name_creds(
+ bus,
+ argv[1],
+ (arg_augment_creds ? SD_BUS_CREDS_AUGMENT : 0) | _SD_BUS_CREDS_ALL,
+ &creds);
+ else
+ r = sd_bus_creds_new_from_pid(
+ &creds,
+ pid,
+ _SD_BUS_CREDS_ALL);
+ } else {
+ const char *scope, *address;
+ sd_id128_t bus_id;
+
+ r = sd_bus_get_address(bus, &address);
+ if (r >= 0)
+ printf("BusAddress=%s%s%s\n", ansi_highlight(), address, ansi_normal());
+
+ r = sd_bus_get_scope(bus, &scope);
+ if (r >= 0)
+ printf("BusScope=%s%s%s\n", ansi_highlight(), scope, ansi_normal());
+
+ r = sd_bus_get_bus_id(bus, &bus_id);
+ if (r >= 0)
+ printf("BusID=%s" SD_ID128_FORMAT_STR "%s\n", ansi_highlight(), SD_ID128_FORMAT_VAL(bus_id), ansi_normal());
+
+ r = sd_bus_get_owner_creds(
+ bus,
+ (arg_augment_creds ? SD_BUS_CREDS_AUGMENT : 0) | _SD_BUS_CREDS_ALL,
+ &creds);
+ }
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to get credentials: %m");
+
+ bus_creds_dump(creds, NULL, false);
+ return 0;
+}
+
+static int message_append_cmdline(sd_bus_message *m, const char *signature, char ***x) {
+ char **p;
+ int r;
+
+ assert(m);
+ assert(signature);
+ assert(x);
+
+ p = *x;
+
+ for (;;) {
+ const char *v;
+ char t;
+
+ t = *signature;
+ v = *p;
+
+ if (t == 0)
+ break;
+ if (!v)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too few parameters for signature.");
+
+ signature++;
+ p++;
+
+ switch (t) {
+
+ case SD_BUS_TYPE_BOOLEAN:
+
+ r = parse_boolean(v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as boolean: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &r);
+ break;
+
+ case SD_BUS_TYPE_BYTE: {
+ uint8_t z;
+
+ r = safe_atou8(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as byte (unsigned 8bit integer): %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_INT16: {
+ int16_t z;
+
+ r = safe_atoi16(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as signed 16bit integer: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_UINT16: {
+ uint16_t z;
+
+ r = safe_atou16(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as unsigned 16bit integer: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_INT32: {
+ int32_t z;
+
+ r = safe_atoi32(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as signed 32bit integer: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_UINT32: {
+ uint32_t z;
+
+ r = safe_atou32(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as unsigned 32bit integer: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_INT64: {
+ int64_t z;
+
+ r = safe_atoi64(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as signed 64bit integer: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t z;
+
+ r = safe_atou64(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as unsigned 64bit integer: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double z;
+
+ r = safe_atod(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as double precision floating point: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+
+ r = sd_bus_message_append_basic(m, t, v);
+ break;
+
+ case SD_BUS_TYPE_ARRAY: {
+ uint32_t n;
+ size_t k;
+
+ r = safe_atou32(v, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' number of array entries: %m", v);
+
+ r = signature_element_length(signature, &k);
+ if (r < 0)
+ return log_error_errno(r, "Invalid array signature: %m");
+
+ {
+ unsigned i;
+ char s[k + 1];
+ memcpy(s, signature, k);
+ s[k] = 0;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, s);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (i = 0; i < n; i++) {
+ r = message_append_cmdline(m, s, &p);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ signature += k;
+
+ r = sd_bus_message_close_container(m);
+ break;
+ }
+
+ case SD_BUS_TYPE_VARIANT:
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_VARIANT, v);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = message_append_cmdline(m, v, &p);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ break;
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ size_t k;
+
+ signature--;
+ p--;
+
+ r = signature_element_length(signature, &k);
+ if (r < 0)
+ return log_error_errno(r, "Invalid struct/dict entry signature: %m");
+
+ {
+ char s[k-1];
+ memcpy(s, signature + 1, k - 2);
+ s[k - 2] = 0;
+
+ r = sd_bus_message_open_container(m, t == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = message_append_cmdline(m, s, &p);
+ if (r < 0)
+ return r;
+ }
+
+ signature += k;
+
+ r = sd_bus_message_close_container(m);
+ break;
+ }
+
+ case SD_BUS_TYPE_UNIX_FD:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "UNIX file descriptor not supported as type.");
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown signature type %c.", t);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ *x = p;
+ return 0;
+}
+
+static int json_transform_one(sd_bus_message *m, JsonVariant **ret);
+
+static int json_transform_array_or_struct(sd_bus_message *m, JsonVariant **ret) {
+ size_t n_elements = 0, n_allocated = 0;
+ JsonVariant **elements = NULL;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ for (;;) {
+ r = sd_bus_message_at_end(m, false);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ goto finish;
+ }
+ if (r > 0)
+ break;
+
+ if (!GREEDY_REALLOC(elements, n_allocated, n_elements + 1)) {
+ r = log_oom();
+ goto finish;
+ }
+
+ r = json_transform_one(m, elements + n_elements);
+ if (r < 0)
+ goto finish;
+
+ n_elements++;
+ }
+
+ r = json_variant_new_array(ret, elements, n_elements);
+
+finish:
+ json_variant_unref_many(elements, n_elements);
+ free(elements);
+
+ return r;
+}
+
+static int json_transform_variant(sd_bus_message *m, const char *contents, JsonVariant **ret) {
+ _cleanup_(json_variant_unrefp) JsonVariant *value = NULL;
+ int r;
+
+ assert(m);
+ assert(contents);
+ assert(ret);
+
+ r = json_transform_one(m, &value);
+ if (r < 0)
+ return r;
+
+ r = json_build(ret, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("type", JSON_BUILD_STRING(contents)),
+ JSON_BUILD_PAIR("data", JSON_BUILD_VARIANT(value))));
+ if (r < 0)
+ return log_oom();
+
+ return r;
+}
+
+static int json_transform_dict_array(sd_bus_message *m, JsonVariant **ret) {
+ size_t n_elements = 0, n_allocated = 0;
+ JsonVariant **elements = NULL;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ for (;;) {
+ const char *contents;
+ char type;
+
+ r = sd_bus_message_at_end(m, false);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ goto finish;
+ }
+ if (r > 0)
+ break;
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+
+ assert(type == 'e');
+
+ if (!GREEDY_REALLOC(elements, n_allocated, n_elements + 2)) {
+ r = log_oom();
+ goto finish;
+ }
+
+ r = sd_bus_message_enter_container(m, type, contents);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ goto finish;
+ }
+
+ r = json_transform_one(m, elements + n_elements);
+ if (r < 0)
+ goto finish;
+
+ n_elements++;
+
+ r = json_transform_one(m, elements + n_elements);
+ if (r < 0)
+ goto finish;
+
+ n_elements++;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ goto finish;
+ }
+ }
+
+ r = json_variant_new_object(ret, elements, n_elements);
+
+finish:
+ json_variant_unref_many(elements, n_elements);
+ free(elements);
+
+ return r;
+}
+
+static int json_transform_one(sd_bus_message *m, JsonVariant **ret) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ const char *contents;
+ char type;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ switch (type) {
+
+ case SD_BUS_TYPE_BYTE: {
+ uint8_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_unsigned(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform byte: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_BOOLEAN: {
+ int b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_boolean(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform boolean: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_INT16: {
+ int16_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_integer(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform int16: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_UINT16: {
+ uint16_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_unsigned(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform uint16: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_INT32: {
+ int32_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_integer(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform int32: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_UINT32: {
+ uint32_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_unsigned(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform uint32: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_INT64: {
+ int64_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_integer(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform int64: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_unsigned(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform uint64: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double d;
+
+ r = sd_bus_message_read_basic(m, type, &d);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_real(&v, d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform double: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE: {
+ const char *s;
+
+ r = sd_bus_message_read_basic(m, type, &s);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_string(&v, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform double: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_UNIX_FD:
+ r = sd_bus_message_read_basic(m, type, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_null(&v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform fd: %m");
+
+ break;
+
+ case SD_BUS_TYPE_ARRAY:
+ case SD_BUS_TYPE_VARIANT:
+ case SD_BUS_TYPE_STRUCT:
+ r = sd_bus_message_enter_container(m, type, contents);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (type == SD_BUS_TYPE_VARIANT)
+ r = json_transform_variant(m, contents, &v);
+ else if (type == SD_BUS_TYPE_ARRAY && contents[0] == '{')
+ r = json_transform_dict_array(m, &v);
+ else
+ r = json_transform_array_or_struct(m, &v);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected element type");
+ }
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+static int json_transform_message(sd_bus_message *m, JsonVariant **ret) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ const char *type;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ assert_se(type = sd_bus_message_get_signature(m, false));
+
+ r = json_transform_array_or_struct(m, &v);
+ if (r < 0)
+ return r;
+
+ r = json_build(ret, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("type", JSON_BUILD_STRING(type)),
+ JSON_BUILD_PAIR("data", JSON_BUILD_VARIANT(v))));
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static void json_dump_with_flags(JsonVariant *v, FILE *f) {
+
+ json_variant_dump(v,
+ (arg_json == JSON_PRETTY ? JSON_FORMAT_PRETTY : JSON_FORMAT_NEWLINE) |
+ JSON_FORMAT_COLOR_AUTO,
+ f, NULL);
+}
+
+static int call(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ int r;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_call(bus, &m, argv[1], argv[2], argv[3], argv[4]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_expect_reply(m, arg_expect_reply);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_auto_start(m, arg_auto_start);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_allow_interactive_authorization);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (!isempty(argv[5])) {
+ char **p;
+
+ p = argv+6;
+
+ r = message_append_cmdline(m, argv[5], &p);
+ if (r < 0)
+ return r;
+
+ if (*p) {
+ log_error("Too many parameters for signature.");
+ return -EINVAL;
+ }
+ }
+
+ if (!arg_expect_reply) {
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send message: %m");
+
+ return 0;
+ }
+
+ r = sd_bus_call(bus, m, arg_timeout, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "%s", bus_error_message(&error, r));
+
+ r = sd_bus_message_is_empty(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (r == 0 && !arg_quiet) {
+
+ if (arg_json != JSON_OFF) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ if (arg_json != JSON_SHORT)
+ (void) pager_open(arg_pager_flags);
+
+ r = json_transform_message(reply, &v);
+ if (r < 0)
+ return r;
+
+ json_dump_with_flags(v, stdout);
+
+ } else if (arg_verbose) {
+ (void) pager_open(arg_pager_flags);
+
+ r = bus_message_dump(reply, stdout, 0);
+ if (r < 0)
+ return r;
+ } else {
+
+ fputs(sd_bus_message_get_signature(reply, true), stdout);
+ fputc(' ', stdout);
+
+ r = format_cmdline(reply, stdout, false);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ fputc('\n', stdout);
+ }
+ }
+
+ return 0;
+}
+
+static int get_property(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char **i;
+ int r;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, argv + 4) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *contents = NULL;
+ char type;
+
+ r = sd_bus_call_method(bus, argv[1], argv[2], "org.freedesktop.DBus.Properties", "Get", &error, &reply, "ss", argv[3], *i);
+ if (r < 0)
+ return log_error_errno(r, "%s", bus_error_message(&error, r));
+
+ r = sd_bus_message_peek_type(reply, &type, &contents);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_enter_container(reply, 'v', contents);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (arg_json != JSON_OFF) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ if (arg_json != JSON_SHORT)
+ (void) pager_open(arg_pager_flags);
+
+ r = json_transform_variant(reply, contents, &v);
+ if (r < 0)
+ return r;
+
+ json_dump_with_flags(v, stdout);
+
+ } else if (arg_verbose) {
+ (void) pager_open(arg_pager_flags);
+
+ r = bus_message_dump(reply, stdout, BUS_MESSAGE_DUMP_SUBTREE_ONLY);
+ if (r < 0)
+ return r;
+ } else {
+ fputs(contents, stdout);
+ fputc(' ', stdout);
+
+ r = format_cmdline(reply, stdout, false);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ fputc('\n', stdout);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ return 0;
+}
+
+static int set_property(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char **p;
+ int r;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_call(bus, &m, argv[1], argv[2], "org.freedesktop.DBus.Properties", "Set");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "ss", argv[3], argv[4]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', argv[5]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ p = argv + 6;
+ r = message_append_cmdline(m, argv[5], &p);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (*p) {
+ log_error("Too many parameters for signature.");
+ return -EINVAL;
+ }
+
+ r = sd_bus_call(bus, m, arg_timeout, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "%s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("busctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Introspect the bus.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " --system Connect to system bus\n"
+ " --user Connect to user bus\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --address=ADDRESS Connect to bus specified by address\n"
+ " --show-machine Show machine ID column in list\n"
+ " --unique Only show unique names\n"
+ " --acquired Only show acquired names\n"
+ " --activatable Only show activatable names\n"
+ " --match=MATCH Only show matching messages\n"
+ " --size=SIZE Maximum length of captured packet\n"
+ " --list Don't show tree, but simple object path list\n"
+ " -q --quiet Don't show method call reply\n"
+ " --verbose Show result values in long format\n"
+ " --json=MODE Output as JSON\n"
+ " -j Same as --json=pretty on tty, --json=short otherwise\n"
+ " --expect-reply=BOOL Expect a method call reply\n"
+ " --auto-start=BOOL Auto-start destination service\n"
+ " --allow-interactive-authorization=BOOL\n"
+ " Allow interactive authorization for operation\n"
+ " --timeout=SECS Maximum time to wait for method call completion\n"
+ " --augment-creds=BOOL Extend credential data with data read from /proc/$PID\n"
+ " --watch-bind=BOOL Wait for bus AF_UNIX socket to be bound in the file\n"
+ " system\n\n"
+ "Commands:\n"
+ " list List bus names\n"
+ " status [SERVICE] Show bus service, process or bus owner credentials\n"
+ " monitor [SERVICE...] Show bus traffic\n"
+ " capture [SERVICE...] Capture bus traffic as pcap\n"
+ " tree [SERVICE...] Show object tree of service\n"
+ " introspect SERVICE OBJECT [INTERFACE]\n"
+ " call SERVICE OBJECT INTERFACE METHOD [SIGNATURE [ARGUMENT...]]\n"
+ " Call a method\n"
+ " get-property SERVICE OBJECT INTERFACE PROPERTY...\n"
+ " Get property value\n"
+ " set-property SERVICE OBJECT INTERFACE PROPERTY SIGNATURE ARGUMENT...\n"
+ " Set property value\n"
+ " help Show this help\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int verb_help(int argc, char **argv, void *userdata) {
+ return help();
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_SYSTEM,
+ ARG_USER,
+ ARG_ADDRESS,
+ ARG_MATCH,
+ ARG_SHOW_MACHINE,
+ ARG_UNIQUE,
+ ARG_ACQUIRED,
+ ARG_ACTIVATABLE,
+ ARG_SIZE,
+ ARG_LIST,
+ ARG_VERBOSE,
+ ARG_EXPECT_REPLY,
+ ARG_AUTO_START,
+ ARG_ALLOW_INTERACTIVE_AUTHORIZATION,
+ ARG_TIMEOUT,
+ ARG_AUGMENT_CREDS,
+ ARG_WATCH_BIND,
+ ARG_JSON,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "user", no_argument, NULL, ARG_USER },
+ { "address", required_argument, NULL, ARG_ADDRESS },
+ { "show-machine", no_argument, NULL, ARG_SHOW_MACHINE },
+ { "unique", no_argument, NULL, ARG_UNIQUE },
+ { "acquired", no_argument, NULL, ARG_ACQUIRED },
+ { "activatable", no_argument, NULL, ARG_ACTIVATABLE },
+ { "match", required_argument, NULL, ARG_MATCH },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "size", required_argument, NULL, ARG_SIZE },
+ { "list", no_argument, NULL, ARG_LIST },
+ { "quiet", no_argument, NULL, 'q' },
+ { "verbose", no_argument, NULL, ARG_VERBOSE },
+ { "expect-reply", required_argument, NULL, ARG_EXPECT_REPLY },
+ { "auto-start", required_argument, NULL, ARG_AUTO_START },
+ { "allow-interactive-authorization", required_argument, NULL, ARG_ALLOW_INTERACTIVE_AUTHORIZATION },
+ { "timeout", required_argument, NULL, ARG_TIMEOUT },
+ { "augment-creds", required_argument, NULL, ARG_AUGMENT_CREDS },
+ { "watch-bind", required_argument, NULL, ARG_WATCH_BIND },
+ { "json", required_argument, NULL, ARG_JSON },
+ {},
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hH:M:qj", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case ARG_USER:
+ arg_user = true;
+ break;
+
+ case ARG_SYSTEM:
+ arg_user = false;
+ break;
+
+ case ARG_ADDRESS:
+ arg_address = optarg;
+ break;
+
+ case ARG_SHOW_MACHINE:
+ arg_show_machine = true;
+ break;
+
+ case ARG_UNIQUE:
+ arg_unique = true;
+ break;
+
+ case ARG_ACQUIRED:
+ arg_acquired = true;
+ break;
+
+ case ARG_ACTIVATABLE:
+ arg_activatable = true;
+ break;
+
+ case ARG_MATCH:
+ if (strv_extend(&arg_matches, optarg) < 0)
+ return log_oom();
+ break;
+
+ case ARG_SIZE: {
+ uint64_t sz;
+
+ r = parse_size(optarg, 1024, &sz);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse size '%s': %m", optarg);
+
+ if ((uint64_t) (size_t) sz != sz)
+ return log_error_errno(SYNTHETIC_ERRNO(E2BIG),
+ "Size out of range.");
+
+ arg_snaplen = (size_t) sz;
+ break;
+ }
+
+ case ARG_LIST:
+ arg_list = true;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case ARG_VERBOSE:
+ arg_verbose = true;
+ break;
+
+ case ARG_EXPECT_REPLY:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --expect-reply= parameter '%s': %m", optarg);
+
+ arg_expect_reply = r;
+ break;
+
+ case ARG_AUTO_START:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --auto-start= parameter '%s': %m", optarg);
+
+ arg_auto_start = r;
+ break;
+
+ case ARG_ALLOW_INTERACTIVE_AUTHORIZATION:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --allow-interactive-authorization= parameter '%s': %m", optarg);
+
+ arg_allow_interactive_authorization = r;
+ break;
+
+ case ARG_TIMEOUT:
+ r = parse_sec(optarg, &arg_timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --timeout= parameter '%s': %m", optarg);
+
+ break;
+
+ case ARG_AUGMENT_CREDS:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --augment-creds= parameter '%s': %m", optarg);
+
+ arg_augment_creds = r;
+ break;
+
+ case ARG_WATCH_BIND:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --watch-bind= parameter '%s': %m", optarg);
+
+ arg_watch_bind = r;
+ break;
+
+ case 'j':
+ if (on_tty())
+ arg_json = JSON_PRETTY;
+ else
+ arg_json = JSON_SHORT;
+ break;
+
+ case ARG_JSON:
+ if (streq(optarg, "short"))
+ arg_json = JSON_SHORT;
+ else if (streq(optarg, "pretty"))
+ arg_json = JSON_PRETTY;
+ else if (streq(optarg, "help")) {
+ fputs("short\n"
+ "pretty\n", stdout);
+ return 0;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown JSON out mode: %s",
+ optarg);
+
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int busctl_main(int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "list", VERB_ANY, 1, VERB_DEFAULT, list_bus_names },
+ { "status", VERB_ANY, 2, 0, status },
+ { "monitor", VERB_ANY, VERB_ANY, 0, verb_monitor },
+ { "capture", VERB_ANY, VERB_ANY, 0, verb_capture },
+ { "tree", VERB_ANY, VERB_ANY, 0, tree },
+ { "introspect", 3, 4, 0, introspect },
+ { "call", 5, VERB_ANY, 0, call },
+ { "get-property", 5, VERB_ANY, 0, get_property },
+ { "set-property", 6, VERB_ANY, 0, set_property },
+ { "help", VERB_ANY, VERB_ANY, 0, verb_help },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ return busctl_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/cgls/cgls.c b/src/cgls/cgls.c
new file mode 100644
index 0000000..b6b15cf
--- /dev/null
+++ b/src/cgls/cgls.c
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "main-func.h"
+#include "output-mode.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+
+static PagerFlags arg_pager_flags = 0;
+static bool arg_kernel_threads = false;
+static bool arg_all = false;
+
+static enum {
+ SHOW_UNIT_NONE,
+ SHOW_UNIT_SYSTEM,
+ SHOW_UNIT_USER,
+} arg_show_unit = SHOW_UNIT_NONE;
+static char **arg_names = NULL;
+
+static int arg_full = -1;
+static const char* arg_machine = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_names, freep); /* don't free the strings */
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-cgls", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CGROUP...]\n\n"
+ "Recursively show control group contents.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " -a --all Show all groups, including empty\n"
+ " -u --unit Show the subtrees of specifified system units\n"
+ " --user-unit Show the subtrees of specifified user units\n"
+ " -l --full Do not ellipsize output\n"
+ " -k Include kernel threads in output\n"
+ " -M --machine= Show container\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_NO_PAGER = 0x100,
+ ARG_VERSION,
+ ARG_USER_UNIT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "all", no_argument, NULL, 'a' },
+ { "full", no_argument, NULL, 'l' },
+ { "machine", required_argument, NULL, 'M' },
+ { "unit", optional_argument, NULL, 'u' },
+ { "user-unit", optional_argument, NULL, ARG_USER_UNIT },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 1);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "-hkalM:u::", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case 'u':
+ arg_show_unit = SHOW_UNIT_SYSTEM;
+ if (strv_push(&arg_names, optarg) < 0) /* push optarg if not empty */
+ return log_oom();
+ break;
+
+ case ARG_USER_UNIT:
+ arg_show_unit = SHOW_UNIT_USER;
+ if (strv_push(&arg_names, optarg) < 0) /* push optarg if not empty */
+ return log_oom();
+ break;
+
+ case 1:
+ /* positional argument */
+ if (strv_push(&arg_names, optarg) < 0)
+ return log_oom();
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case 'k':
+ arg_kernel_threads = true;
+ break;
+
+ case 'M':
+ arg_machine = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_machine && arg_show_unit != SHOW_UNIT_NONE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot combine --unit or --user-unit with --machine=.");
+
+ return 1;
+}
+
+static void show_cg_info(const char *controller, const char *path) {
+
+ if (cg_all_unified() == 0 && controller && !streq(controller, SYSTEMD_CGROUP_CONTROLLER))
+ printf("Controller %s; ", controller);
+
+ printf("Control group %s:\n", empty_to_root(path));
+ fflush(stdout);
+}
+
+static int run(int argc, char *argv[]) {
+ int r, output_flags;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = pager_open(arg_pager_flags);
+ if (r > 0 && arg_full < 0)
+ arg_full = true;
+
+ output_flags =
+ arg_all * OUTPUT_SHOW_ALL |
+ (arg_full > 0) * OUTPUT_FULL_WIDTH |
+ arg_kernel_threads * OUTPUT_KERNEL_THREADS;
+
+ if (arg_names) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *root = NULL;
+ char **name;
+
+ STRV_FOREACH(name, arg_names) {
+ int q;
+
+ if (arg_show_unit != SHOW_UNIT_NONE) {
+ /* Command line arguments are unit names */
+ _cleanup_free_ char *cgroup = NULL;
+
+ if (!bus) {
+ /* Connect to the bus only if necessary */
+ r = bus_connect_transport_systemd(BUS_TRANSPORT_LOCAL, NULL,
+ arg_show_unit == SHOW_UNIT_USER,
+ &bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+ }
+
+ q = show_cgroup_get_unit_path_and_warn(bus, *name, &cgroup);
+ if (q < 0)
+ goto failed;
+
+ if (isempty(cgroup)) {
+ log_warning("Unit %s not found.", *name);
+ q = -ENOENT;
+ goto failed;
+ }
+
+ printf("Unit %s (%s):\n", *name, cgroup);
+ fflush(stdout);
+
+ q = show_cgroup_by_path(cgroup, NULL, 0, output_flags);
+
+ } else if (path_startswith(*name, "/sys/fs/cgroup")) {
+
+ printf("Directory %s:\n", *name);
+ fflush(stdout);
+
+ q = show_cgroup_by_path(*name, NULL, 0, output_flags);
+ } else {
+ _cleanup_free_ char *c = NULL, *p = NULL, *j = NULL;
+ const char *controller, *path;
+
+ if (!root) {
+ /* Query root only if needed, treat error as fatal */
+ r = show_cgroup_get_path_and_warn(arg_machine, NULL, &root);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list cgroup tree: %m");
+ }
+
+ q = cg_split_spec(*name, &c, &p);
+ if (q < 0) {
+ log_error_errno(q, "Failed to split argument %s: %m", *name);
+ goto failed;
+ }
+
+ controller = c ?: SYSTEMD_CGROUP_CONTROLLER;
+ if (p) {
+ j = strjoin(root, "/", p);
+ if (!j)
+ return log_oom();
+
+ path_simplify(j, false);
+ path = j;
+ } else
+ path = root;
+
+ show_cg_info(controller, path);
+
+ q = show_cgroup(controller, path, NULL, 0, output_flags);
+ }
+
+ failed:
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ } else {
+ bool done = false;
+
+ if (!arg_machine) {
+ _cleanup_free_ char *cwd = NULL;
+
+ r = safe_getcwd(&cwd);
+ if (r < 0)
+ return log_error_errno(r, "Cannot determine current working directory: %m");
+
+ if (path_startswith(cwd, "/sys/fs/cgroup")) {
+ printf("Working directory %s:\n", cwd);
+ fflush(stdout);
+
+ r = show_cgroup_by_path(cwd, NULL, 0, output_flags);
+ done = true;
+ }
+ }
+
+ if (!done) {
+ _cleanup_free_ char *root = NULL;
+
+ r = show_cgroup_get_path_and_warn(arg_machine, NULL, &root);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list cgroup tree: %m");
+
+ show_cg_info(SYSTEMD_CGROUP_CONTROLLER, root);
+
+ printf("-.slice\n");
+ r = show_cgroup(SYSTEMD_CGROUP_CONTROLLER, root, NULL, 0, output_flags);
+ }
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to list cgroup tree: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/cgroups-agent/cgroups-agent.c b/src/cgroups-agent/cgroups-agent.c
new file mode 100644
index 0000000..9721a32
--- /dev/null
+++ b/src/cgroups-agent/cgroups-agent.c
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdlib.h>
+#include <sys/socket.h>
+
+#include "fd-util.h"
+#include "log.h"
+#include "socket-util.h"
+
+int main(int argc, char *argv[]) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/cgroups-agent",
+ };
+
+ _cleanup_close_ int fd = -1;
+ ssize_t n;
+ size_t l;
+
+ if (argc != 2) {
+ log_error("Incorrect number of arguments.");
+ return EXIT_FAILURE;
+ }
+
+ log_setup_service();
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+ if (fd < 0) {
+ log_debug_errno(errno, "Failed to allocate socket: %m");
+ return EXIT_FAILURE;
+ }
+
+ l = strlen(argv[1]);
+
+ n = sendto(fd, argv[1], l, 0, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (n < 0) {
+ log_debug_errno(errno, "Failed to send cgroups agent message: %m");
+ return EXIT_FAILURE;
+ }
+
+ if ((size_t) n != l) {
+ log_debug("Datagram size mismatch");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c
new file mode 100644
index 0000000..b3bda30
--- /dev/null
+++ b/src/cgtop/cgtop.c
@@ -0,0 +1,1104 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <alloca.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+#include "util.h"
+#include "virt.h"
+
+typedef struct Group {
+ char *path;
+
+ bool n_tasks_valid:1;
+ bool cpu_valid:1;
+ bool memory_valid:1;
+ bool io_valid:1;
+
+ uint64_t n_tasks;
+
+ unsigned cpu_iteration;
+ nsec_t cpu_usage;
+ nsec_t cpu_timestamp;
+ double cpu_fraction;
+
+ uint64_t memory;
+
+ unsigned io_iteration;
+ uint64_t io_input, io_output;
+ nsec_t io_timestamp;
+ uint64_t io_input_bps, io_output_bps;
+} Group;
+
+static unsigned arg_depth = 3;
+static unsigned arg_iterations = (unsigned) -1;
+static bool arg_batch = false;
+static bool arg_raw = false;
+static usec_t arg_delay = 1*USEC_PER_SEC;
+static char* arg_machine = NULL;
+static char* arg_root = NULL;
+static bool arg_recursive = true;
+static bool arg_recursive_unset = false;
+
+static enum {
+ COUNT_PIDS,
+ COUNT_USERSPACE_PROCESSES,
+ COUNT_ALL_PROCESSES,
+} arg_count = COUNT_PIDS;
+
+static enum {
+ ORDER_PATH,
+ ORDER_TASKS,
+ ORDER_CPU,
+ ORDER_MEMORY,
+ ORDER_IO,
+} arg_order = ORDER_CPU;
+
+static enum {
+ CPU_PERCENT,
+ CPU_TIME,
+} arg_cpu_type = CPU_PERCENT;
+
+static Group *group_free(Group *g) {
+ if (!g)
+ return NULL;
+
+ free(g->path);
+ return mfree(g);
+}
+
+static const char *maybe_format_bytes(char *buf, size_t l, bool is_valid, uint64_t t) {
+ if (!is_valid)
+ return "-";
+ if (arg_raw) {
+ snprintf(buf, l, "%" PRIu64, t);
+ return buf;
+ }
+ return format_bytes(buf, l, t);
+}
+
+static bool is_root_cgroup(const char *path) {
+
+ /* Returns true if the specified path belongs to the root cgroup. The root cgroup is special on cgroup v2 as it
+ * carries only very few attributes in order not to export multiple truth about system state as most
+ * information is available elsewhere in /proc anyway. We need to be able to deal with that, and need to get
+ * our data from different sources in that case.
+ *
+ * There's one extra complication in all of this, though 😣: if the path to the cgroup indicates we are in the
+ * root cgroup this might actually not be the case, because cgroup namespacing might be in effect
+ * (CLONE_NEWCGROUP). Since there's no nice way to distuingish a real cgroup root from a fake namespaced one we
+ * do an explicit container check here, under the assumption that CLONE_NEWCGROUP is generally used when
+ * container managers are used too.
+ *
+ * Note that checking for a container environment is kinda ugly, since in theory people could use cgtop from
+ * inside a container where cgroup namespacing is turned off to watch the host system. However, that's mostly a
+ * theoretic usecase, and if people actually try all they'll lose is accounting for the top-level cgroup. Which
+ * isn't too bad. */
+
+ if (detect_container() > 0)
+ return false;
+
+ return empty_or_root(path);
+}
+
+static int process(
+ const char *controller,
+ const char *path,
+ Hashmap *a,
+ Hashmap *b,
+ unsigned iteration,
+ Group **ret) {
+
+ Group *g;
+ int r, all_unified;
+
+ assert(controller);
+ assert(path);
+ assert(a);
+
+ all_unified = cg_all_unified();
+ if (all_unified < 0)
+ return all_unified;
+
+ g = hashmap_get(a, path);
+ if (!g) {
+ g = hashmap_get(b, path);
+ if (!g) {
+ g = new0(Group, 1);
+ if (!g)
+ return -ENOMEM;
+
+ g->path = strdup(path);
+ if (!g->path) {
+ group_free(g);
+ return -ENOMEM;
+ }
+
+ r = hashmap_put(a, g->path, g);
+ if (r < 0) {
+ group_free(g);
+ return r;
+ }
+ } else {
+ r = hashmap_move_one(a, b, path);
+ if (r < 0)
+ return r;
+
+ g->cpu_valid = g->memory_valid = g->io_valid = g->n_tasks_valid = false;
+ }
+ }
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) &&
+ IN_SET(arg_count, COUNT_ALL_PROCESSES, COUNT_USERSPACE_PROCESSES)) {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid;
+
+ r = cg_enumerate_processes(controller, path, &f);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ g->n_tasks = 0;
+ while (cg_read_pid(f, &pid) > 0) {
+
+ if (arg_count == COUNT_USERSPACE_PROCESSES && is_kernel_thread(pid) > 0)
+ continue;
+
+ g->n_tasks++;
+ }
+
+ if (g->n_tasks > 0)
+ g->n_tasks_valid = true;
+
+ } else if (streq(controller, "pids") && arg_count == COUNT_PIDS) {
+
+ if (is_root_cgroup(path)) {
+ r = procfs_tasks_get_current(&g->n_tasks);
+ if (r < 0)
+ return r;
+ } else {
+ _cleanup_free_ char *p = NULL, *v = NULL;
+
+ r = cg_get_path(controller, path, "pids.current", &p);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(p, &v);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &g->n_tasks);
+ if (r < 0)
+ return r;
+ }
+
+ if (g->n_tasks > 0)
+ g->n_tasks_valid = true;
+
+ } else if (STR_IN_SET(controller, "cpu", "cpuacct") || cpu_accounting_is_cheap()) {
+ _cleanup_free_ char *p = NULL, *v = NULL;
+ uint64_t new_usage;
+ nsec_t timestamp;
+
+ if (is_root_cgroup(path)) {
+ r = procfs_cpu_get_usage(&new_usage);
+ if (r < 0)
+ return r;
+ } else if (all_unified) {
+ _cleanup_free_ char *val = NULL;
+
+ if (!streq(controller, "cpu"))
+ return 0;
+
+ r = cg_get_keyed_attribute("cpu", path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
+ if (IN_SET(r, -ENOENT, -ENXIO))
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(val, &new_usage);
+ if (r < 0)
+ return r;
+
+ new_usage *= NSEC_PER_USEC;
+ } else {
+ if (!streq(controller, "cpuacct"))
+ return 0;
+
+ r = cg_get_path(controller, path, "cpuacct.usage", &p);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(p, &v);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &new_usage);
+ if (r < 0)
+ return r;
+ }
+
+ timestamp = now_nsec(CLOCK_MONOTONIC);
+
+ if (g->cpu_iteration == iteration - 1 &&
+ (nsec_t) new_usage > g->cpu_usage) {
+
+ nsec_t x, y;
+
+ x = timestamp - g->cpu_timestamp;
+ if (x < 1)
+ x = 1;
+
+ y = (nsec_t) new_usage - g->cpu_usage;
+ g->cpu_fraction = (double) y / (double) x;
+ g->cpu_valid = true;
+ }
+
+ g->cpu_usage = (nsec_t) new_usage;
+ g->cpu_timestamp = timestamp;
+ g->cpu_iteration = iteration;
+
+ } else if (streq(controller, "memory")) {
+
+ if (is_root_cgroup(path)) {
+ r = procfs_memory_get_used(&g->memory);
+ if (r < 0)
+ return r;
+ } else {
+ _cleanup_free_ char *p = NULL, *v = NULL;
+
+ if (all_unified)
+ r = cg_get_path(controller, path, "memory.current", &p);
+ else
+ r = cg_get_path(controller, path, "memory.usage_in_bytes", &p);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(p, &v);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &g->memory);
+ if (r < 0)
+ return r;
+ }
+
+ if (g->memory > 0)
+ g->memory_valid = true;
+
+ } else if ((streq(controller, "io") && all_unified) ||
+ (streq(controller, "blkio") && !all_unified)) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ uint64_t wr = 0, rd = 0;
+ nsec_t timestamp;
+
+ r = cg_get_path(controller, path, all_unified ? "io.stat" : "blkio.io_service_bytes", &p);
+ if (r < 0)
+ return r;
+
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uint64_t k, *q;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ /* Trim and skip the device */
+ l = strstrip(line);
+ l += strcspn(l, WHITESPACE);
+ l += strspn(l, WHITESPACE);
+
+ if (all_unified) {
+ while (!isempty(l)) {
+ if (sscanf(l, "rbytes=%" SCNu64, &k))
+ rd += k;
+ else if (sscanf(l, "wbytes=%" SCNu64, &k))
+ wr += k;
+
+ l += strcspn(l, WHITESPACE);
+ l += strspn(l, WHITESPACE);
+ }
+ } else {
+ if (first_word(l, "Read")) {
+ l += 4;
+ q = &rd;
+ } else if (first_word(l, "Write")) {
+ l += 5;
+ q = &wr;
+ } else
+ continue;
+
+ l += strspn(l, WHITESPACE);
+ r = safe_atou64(l, &k);
+ if (r < 0)
+ continue;
+
+ *q += k;
+ }
+ }
+
+ timestamp = now_nsec(CLOCK_MONOTONIC);
+
+ if (g->io_iteration == iteration - 1) {
+ uint64_t x, yr, yw;
+
+ x = (uint64_t) (timestamp - g->io_timestamp);
+ if (x < 1)
+ x = 1;
+
+ if (rd > g->io_input)
+ yr = rd - g->io_input;
+ else
+ yr = 0;
+
+ if (wr > g->io_output)
+ yw = wr - g->io_output;
+ else
+ yw = 0;
+
+ if (yr > 0 || yw > 0) {
+ g->io_input_bps = (yr * 1000000000ULL) / x;
+ g->io_output_bps = (yw * 1000000000ULL) / x;
+ g->io_valid = true;
+ }
+ }
+
+ g->io_input = rd;
+ g->io_output = wr;
+ g->io_timestamp = timestamp;
+ g->io_iteration = iteration;
+ }
+
+ if (ret)
+ *ret = g;
+
+ return 0;
+}
+
+static int refresh_one(
+ const char *controller,
+ const char *path,
+ Hashmap *a,
+ Hashmap *b,
+ unsigned iteration,
+ unsigned depth,
+ Group **ret) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ Group *ours = NULL;
+ int r;
+
+ assert(controller);
+ assert(path);
+ assert(a);
+
+ if (depth > arg_depth)
+ return 0;
+
+ r = process(controller, path, a, b, iteration, &ours);
+ if (r < 0)
+ return r;
+
+ r = cg_enumerate_subgroups(controller, path, &d);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *fn = NULL, *p = NULL;
+ Group *child = NULL;
+
+ r = cg_read_subgroup(d, &fn);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ p = strjoin(path, "/", fn);
+ if (!p)
+ return -ENOMEM;
+
+ path_simplify(p, false);
+
+ r = refresh_one(controller, p, a, b, iteration, depth + 1, &child);
+ if (r < 0)
+ return r;
+
+ if (arg_recursive &&
+ IN_SET(arg_count, COUNT_ALL_PROCESSES, COUNT_USERSPACE_PROCESSES) &&
+ child &&
+ child->n_tasks_valid &&
+ streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+
+ /* Recursively sum up processes */
+
+ if (ours->n_tasks_valid)
+ ours->n_tasks += child->n_tasks;
+ else {
+ ours->n_tasks = child->n_tasks;
+ ours->n_tasks_valid = true;
+ }
+ }
+ }
+
+ if (ret)
+ *ret = ours;
+
+ return 1;
+}
+
+static int refresh(const char *root, Hashmap *a, Hashmap *b, unsigned iteration) {
+ const char *c;
+ int r;
+
+ FOREACH_STRING(c, SYSTEMD_CGROUP_CONTROLLER, "cpu", "cpuacct", "memory", "io", "blkio", "pids") {
+ r = refresh_one(c, root, a, b, iteration, 0, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int group_compare(Group * const *a, Group * const *b) {
+ const Group *x = *a, *y = *b;
+ int r;
+
+ if (arg_order != ORDER_TASKS || arg_recursive) {
+ /* Let's make sure that the parent is always before
+ * the child. Except when ordering by tasks and
+ * recursive summing is off, since that is actually
+ * not accumulative for all children. */
+
+ if (path_startswith(empty_to_root(y->path), empty_to_root(x->path)))
+ return -1;
+ if (path_startswith(empty_to_root(x->path), empty_to_root(y->path)))
+ return 1;
+ }
+
+ switch (arg_order) {
+
+ case ORDER_PATH:
+ break;
+
+ case ORDER_CPU:
+ if (arg_cpu_type == CPU_PERCENT) {
+ if (x->cpu_valid && y->cpu_valid) {
+ r = CMP(y->cpu_fraction, x->cpu_fraction);
+ if (r != 0)
+ return r;
+ } else if (x->cpu_valid)
+ return -1;
+ else if (y->cpu_valid)
+ return 1;
+ } else {
+ r = CMP(y->cpu_usage, x->cpu_usage);
+ if (r != 0)
+ return r;
+ }
+
+ break;
+
+ case ORDER_TASKS:
+ if (x->n_tasks_valid && y->n_tasks_valid) {
+ r = CMP(y->n_tasks, x->n_tasks);
+ if (r != 0)
+ return r;
+ } else if (x->n_tasks_valid)
+ return -1;
+ else if (y->n_tasks_valid)
+ return 1;
+
+ break;
+
+ case ORDER_MEMORY:
+ if (x->memory_valid && y->memory_valid) {
+ r = CMP(y->memory, x->memory);
+ if (r != 0)
+ return r;
+ } else if (x->memory_valid)
+ return -1;
+ else if (y->memory_valid)
+ return 1;
+
+ break;
+
+ case ORDER_IO:
+ if (x->io_valid && y->io_valid) {
+ r = CMP(y->io_input_bps + y->io_output_bps, x->io_input_bps + x->io_output_bps);
+ if (r != 0)
+ return r;
+ } else if (x->io_valid)
+ return -1;
+ else if (y->io_valid)
+ return 1;
+ }
+
+ return path_compare(x->path, y->path);
+}
+
+static void display(Hashmap *a) {
+ Iterator i;
+ Group *g;
+ Group **array;
+ signed path_columns;
+ unsigned rows, n = 0, j, maxtcpu = 0, maxtpath = 3; /* 3 for ellipsize() to work properly */
+ char buffer[MAX3(21, FORMAT_BYTES_MAX, FORMAT_TIMESPAN_MAX)];
+
+ assert(a);
+
+ if (!terminal_is_dumb())
+ fputs(ANSI_HOME_CLEAR, stdout);
+
+ array = newa(Group*, hashmap_size(a));
+
+ HASHMAP_FOREACH(g, a, i)
+ if (g->n_tasks_valid || g->cpu_valid || g->memory_valid || g->io_valid)
+ array[n++] = g;
+
+ typesafe_qsort(array, n, group_compare);
+
+ /* Find the longest names in one run */
+ for (j = 0; j < n; j++) {
+ unsigned cputlen, pathtlen;
+
+ format_timespan(buffer, sizeof(buffer), (usec_t) (array[j]->cpu_usage / NSEC_PER_USEC), 0);
+ cputlen = strlen(buffer);
+ maxtcpu = MAX(maxtcpu, cputlen);
+
+ pathtlen = strlen(array[j]->path);
+ maxtpath = MAX(maxtpath, pathtlen);
+ }
+
+ if (arg_cpu_type == CPU_PERCENT)
+ xsprintf(buffer, "%6s", "%CPU");
+ else
+ xsprintf(buffer, "%*s", maxtcpu, "CPU Time");
+
+ rows = lines();
+ if (rows <= 10)
+ rows = 10;
+
+ if (on_tty()) {
+ const char *on, *off;
+
+ path_columns = columns() - 36 - strlen(buffer);
+ if (path_columns < 10)
+ path_columns = 10;
+
+ on = ansi_highlight_underline();
+ off = ansi_underline();
+
+ printf("%s%s%-*s%s %s%7s%s %s%s%s %s%8s%s %s%8s%s %s%8s%s%s\n",
+ ansi_underline(),
+ arg_order == ORDER_PATH ? on : "", path_columns, "Control Group",
+ arg_order == ORDER_PATH ? off : "",
+ arg_order == ORDER_TASKS ? on : "", arg_count == COUNT_PIDS ? "Tasks" : arg_count == COUNT_USERSPACE_PROCESSES ? "Procs" : "Proc+",
+ arg_order == ORDER_TASKS ? off : "",
+ arg_order == ORDER_CPU ? on : "", buffer,
+ arg_order == ORDER_CPU ? off : "",
+ arg_order == ORDER_MEMORY ? on : "", "Memory",
+ arg_order == ORDER_MEMORY ? off : "",
+ arg_order == ORDER_IO ? on : "", "Input/s",
+ arg_order == ORDER_IO ? off : "",
+ arg_order == ORDER_IO ? on : "", "Output/s",
+ arg_order == ORDER_IO ? off : "",
+ ansi_normal());
+ } else
+ path_columns = maxtpath;
+
+ for (j = 0; j < n; j++) {
+ _cleanup_free_ char *ellipsized = NULL;
+ const char *path;
+
+ if (on_tty() && j + 6 > rows)
+ break;
+
+ g = array[j];
+
+ path = empty_to_root(g->path);
+ ellipsized = ellipsize(path, path_columns, 33);
+ printf("%-*s", path_columns, ellipsized ?: path);
+
+ if (g->n_tasks_valid)
+ printf(" %7" PRIu64, g->n_tasks);
+ else
+ fputs(" -", stdout);
+
+ if (arg_cpu_type == CPU_PERCENT) {
+ if (g->cpu_valid)
+ printf(" %6.1f", g->cpu_fraction*100);
+ else
+ fputs(" -", stdout);
+ } else
+ printf(" %*s", maxtcpu, format_timespan(buffer, sizeof(buffer), (usec_t) (g->cpu_usage / NSEC_PER_USEC), 0));
+
+ printf(" %8s", maybe_format_bytes(buffer, sizeof(buffer), g->memory_valid, g->memory));
+ printf(" %8s", maybe_format_bytes(buffer, sizeof(buffer), g->io_valid, g->io_input_bps));
+ printf(" %8s", maybe_format_bytes(buffer, sizeof(buffer), g->io_valid, g->io_output_bps));
+
+ putchar('\n');
+ }
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-cgtop", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CGROUP]\n\n"
+ "Show top control groups by their resource usage.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -p --order=path Order by path\n"
+ " -t --order=tasks Order by number of tasks/processes\n"
+ " -c --order=cpu Order by CPU load (default)\n"
+ " -m --order=memory Order by memory load\n"
+ " -i --order=io Order by IO load\n"
+ " -r --raw Provide raw (not human-readable) numbers\n"
+ " --cpu=percentage Show CPU usage as percentage (default)\n"
+ " --cpu=time Show CPU usage as time\n"
+ " -P Count userspace processes instead of tasks (excl. kernel)\n"
+ " -k Count all processes instead of tasks (incl. kernel)\n"
+ " --recursive=BOOL Sum up process count recursively\n"
+ " -d --delay=DELAY Delay between updates\n"
+ " -n --iterations=N Run for N iterations before exiting\n"
+ " -1 Shortcut for --iterations=1\n"
+ " -b --batch Run in batch mode, accepting no input\n"
+ " --depth=DEPTH Maximum traversal depth (default: %u)\n"
+ " -M --machine= Show container\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , arg_depth
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_DEPTH,
+ ARG_CPU_TYPE,
+ ARG_ORDER,
+ ARG_RECURSIVE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "delay", required_argument, NULL, 'd' },
+ { "iterations", required_argument, NULL, 'n' },
+ { "batch", no_argument, NULL, 'b' },
+ { "raw", no_argument, NULL, 'r' },
+ { "depth", required_argument, NULL, ARG_DEPTH },
+ { "cpu", optional_argument, NULL, ARG_CPU_TYPE },
+ { "order", required_argument, NULL, ARG_ORDER },
+ { "recursive", required_argument, NULL, ARG_RECURSIVE },
+ { "machine", required_argument, NULL, 'M' },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 1);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hptcmin:brd:kPM:1", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_CPU_TYPE:
+ if (optarg) {
+ if (streq(optarg, "time"))
+ arg_cpu_type = CPU_TIME;
+ else if (streq(optarg, "percentage"))
+ arg_cpu_type = CPU_PERCENT;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown argument to --cpu=: %s",
+ optarg);
+ } else
+ arg_cpu_type = CPU_TIME;
+
+ break;
+
+ case ARG_DEPTH:
+ r = safe_atou(optarg, &arg_depth);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse depth parameter '%s': %m", optarg);
+
+ break;
+
+ case 'd':
+ r = parse_sec(optarg, &arg_delay);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse delay parameter '%s': %m", optarg);
+ if (arg_delay <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid delay parameter '%s'",
+ optarg);
+
+ break;
+
+ case 'n':
+ r = safe_atou(optarg, &arg_iterations);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse iterations parameter '%s': %m", optarg);
+
+ break;
+
+ case '1':
+ arg_iterations = 1;
+ break;
+
+ case 'b':
+ arg_batch = true;
+ break;
+
+ case 'r':
+ arg_raw = true;
+ break;
+
+ case 'p':
+ arg_order = ORDER_PATH;
+ break;
+
+ case 't':
+ arg_order = ORDER_TASKS;
+ break;
+
+ case 'c':
+ arg_order = ORDER_CPU;
+ break;
+
+ case 'm':
+ arg_order = ORDER_MEMORY;
+ break;
+
+ case 'i':
+ arg_order = ORDER_IO;
+ break;
+
+ case ARG_ORDER:
+ if (streq(optarg, "path"))
+ arg_order = ORDER_PATH;
+ else if (streq(optarg, "tasks"))
+ arg_order = ORDER_TASKS;
+ else if (streq(optarg, "cpu"))
+ arg_order = ORDER_CPU;
+ else if (streq(optarg, "memory"))
+ arg_order = ORDER_MEMORY;
+ else if (streq(optarg, "io"))
+ arg_order = ORDER_IO;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid argument to --order=: %s",
+ optarg);
+ break;
+
+ case 'k':
+ arg_count = COUNT_ALL_PROCESSES;
+ break;
+
+ case 'P':
+ arg_count = COUNT_USERSPACE_PROCESSES;
+ break;
+
+ case ARG_RECURSIVE:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --recursive= argument '%s': %m", optarg);
+
+ arg_recursive = r;
+ arg_recursive_unset = r == 0;
+ break;
+
+ case 'M':
+ arg_machine = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind == argc - 1)
+ arg_root = argv[optind];
+ else if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ return 1;
+}
+
+static const char* counting_what(void) {
+ if (arg_count == COUNT_PIDS)
+ return "tasks";
+ else if (arg_count == COUNT_ALL_PROCESSES)
+ return "all processes (incl. kernel)";
+ else
+ return "userspace processes (excl. kernel)";
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(group_hash_ops, char, path_hash_func, path_compare_func, Group, group_free);
+
+static int run(int argc, char *argv[]) {
+ _cleanup_hashmap_free_ Hashmap *a = NULL, *b = NULL;
+ unsigned iteration = 0;
+ usec_t last_refresh = 0;
+ bool quit = false, immediate_refresh = false;
+ _cleanup_free_ char *root = NULL;
+ CGroupMask mask;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = cg_mask_supported(&mask);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine supported controllers: %m");
+
+ arg_count = (mask & CGROUP_MASK_PIDS) ? COUNT_PIDS : COUNT_USERSPACE_PROCESSES;
+
+ if (arg_recursive_unset && arg_count == COUNT_PIDS) {
+ log_error("Non-recursive counting is only supported when counting processes, not tasks. Use -P or -k.");
+ return -EINVAL;
+ }
+
+ r = show_cgroup_get_path_and_warn(arg_machine, arg_root, &root);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get root control group path: %m");
+ log_debug("Cgroup path: %s", root);
+
+ a = hashmap_new(&group_hash_ops);
+ b = hashmap_new(&group_hash_ops);
+ if (!a || !b)
+ return log_oom();
+
+ signal(SIGWINCH, columns_lines_cache_reset);
+
+ if (arg_iterations == (unsigned) -1)
+ arg_iterations = on_tty() ? 0 : 1;
+
+ while (!quit) {
+ usec_t t;
+ char key;
+ char h[FORMAT_TIMESPAN_MAX];
+
+ t = now(CLOCK_MONOTONIC);
+
+ if (t >= last_refresh + arg_delay || immediate_refresh) {
+
+ r = refresh(root, a, b, iteration++);
+ if (r < 0)
+ return log_error_errno(r, "Failed to refresh: %m");
+
+ hashmap_clear(b);
+ SWAP_TWO(a, b);
+
+ last_refresh = t;
+ immediate_refresh = false;
+ }
+
+ display(b);
+
+ if (arg_iterations && iteration >= arg_iterations)
+ break;
+
+ if (!on_tty()) /* non-TTY: Empty newline as delimiter between polls */
+ fputs("\n", stdout);
+ fflush(stdout);
+
+ if (arg_batch)
+ (void) usleep(last_refresh + arg_delay - t);
+ else {
+ r = read_one_char(stdin, &key, last_refresh + arg_delay - t, NULL);
+ if (r == -ETIMEDOUT)
+ continue;
+ if (r < 0)
+ return log_error_errno(r, "Couldn't read key: %m");
+ }
+
+ if (on_tty()) { /* TTY: Clear any user keystroke */
+ fputs("\r \r", stdout);
+ fflush(stdout);
+ }
+
+ if (arg_batch)
+ continue;
+
+ switch (key) {
+
+ case ' ':
+ immediate_refresh = true;
+ break;
+
+ case 'q':
+ quit = true;
+ break;
+
+ case 'p':
+ arg_order = ORDER_PATH;
+ break;
+
+ case 't':
+ arg_order = ORDER_TASKS;
+ break;
+
+ case 'c':
+ arg_order = ORDER_CPU;
+ break;
+
+ case 'm':
+ arg_order = ORDER_MEMORY;
+ break;
+
+ case 'i':
+ arg_order = ORDER_IO;
+ break;
+
+ case '%':
+ arg_cpu_type = arg_cpu_type == CPU_TIME ? CPU_PERCENT : CPU_TIME;
+ break;
+
+ case 'k':
+ arg_count = arg_count != COUNT_ALL_PROCESSES ? COUNT_ALL_PROCESSES : COUNT_PIDS;
+ fprintf(stdout, "\nCounting: %s.", counting_what());
+ fflush(stdout);
+ sleep(1);
+ break;
+
+ case 'P':
+ arg_count = arg_count != COUNT_USERSPACE_PROCESSES ? COUNT_USERSPACE_PROCESSES : COUNT_PIDS;
+ fprintf(stdout, "\nCounting: %s.", counting_what());
+ fflush(stdout);
+ sleep(1);
+ break;
+
+ case 'r':
+ if (arg_count == COUNT_PIDS)
+ fprintf(stdout, "\n\aCannot toggle recursive counting, not available in task counting mode.");
+ else {
+ arg_recursive = !arg_recursive;
+ fprintf(stdout, "\nRecursive process counting: %s", yes_no(arg_recursive));
+ }
+ fflush(stdout);
+ sleep(1);
+ break;
+
+ case '+':
+ if (arg_delay < USEC_PER_SEC)
+ arg_delay += USEC_PER_MSEC*250;
+ else
+ arg_delay += USEC_PER_SEC;
+
+ fprintf(stdout, "\nIncreased delay to %s.", format_timespan(h, sizeof(h), arg_delay, 0));
+ fflush(stdout);
+ sleep(1);
+ break;
+
+ case '-':
+ if (arg_delay <= USEC_PER_MSEC*500)
+ arg_delay = USEC_PER_MSEC*250;
+ else if (arg_delay < USEC_PER_MSEC*1250)
+ arg_delay -= USEC_PER_MSEC*250;
+ else
+ arg_delay -= USEC_PER_SEC;
+
+ fprintf(stdout, "\nDecreased delay to %s.", format_timespan(h, sizeof(h), arg_delay, 0));
+ fflush(stdout);
+ sleep(1);
+ break;
+
+ case '?':
+ case 'h':
+
+#define ON ANSI_HIGHLIGHT
+#define OFF ANSI_NORMAL
+
+ fprintf(stdout,
+ "\t<" ON "p" OFF "> By path; <" ON "t" OFF "> By tasks/procs; <" ON "c" OFF "> By CPU; <" ON "m" OFF "> By memory; <" ON "i" OFF "> By I/O\n"
+ "\t<" ON "+" OFF "> Inc. delay; <" ON "-" OFF "> Dec. delay; <" ON "%%" OFF "> Toggle time; <" ON "SPACE" OFF "> Refresh\n"
+ "\t<" ON "P" OFF "> Toggle count userspace processes; <" ON "k" OFF "> Toggle count all processes\n"
+ "\t<" ON "r" OFF "> Count processes recursively; <" ON "q" OFF "> Quit");
+ fflush(stdout);
+ sleep(3);
+ break;
+
+ default:
+ if (key < ' ')
+ fprintf(stdout, "\nUnknown key '\\x%x'. Ignoring.", key);
+ else
+ fprintf(stdout, "\nUnknown key '%c'. Ignoring.", key);
+ fflush(stdout);
+ sleep(1);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/core/all-units.h b/src/core/all-units.h
new file mode 100644
index 0000000..ed8350e
--- /dev/null
+++ b/src/core/all-units.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include "unit.h"
+
+#include "automount.h"
+#include "device.h"
+#include "path.h"
+#include "scope.h"
+#include "service.h"
+#include "slice.h"
+#include "socket.h"
+#include "swap.h"
+#include "target.h"
+#include "timer.h"
diff --git a/src/core/audit-fd.c b/src/core/audit-fd.c
new file mode 100644
index 0000000..fdef433
--- /dev/null
+++ b/src/core/audit-fd.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "audit-fd.h"
+
+#if HAVE_AUDIT
+
+#include <libaudit.h>
+#include <stdbool.h>
+
+#include "capability-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "util.h"
+
+static bool initialized = false;
+static int audit_fd;
+
+int get_audit_fd(void) {
+
+ if (!initialized) {
+ if (have_effective_cap(CAP_AUDIT_WRITE) == 0) {
+ audit_fd = -EPERM;
+ initialized = true;
+
+ return audit_fd;
+ }
+
+ audit_fd = audit_open();
+
+ if (audit_fd < 0) {
+ if (!IN_SET(errno, EAFNOSUPPORT, EPROTONOSUPPORT))
+ log_error_errno(errno, "Failed to connect to audit log: %m");
+
+ audit_fd = errno ? -errno : -EINVAL;
+ }
+
+ initialized = true;
+ }
+
+ return audit_fd;
+}
+
+void close_audit_fd(void) {
+
+ if (initialized && audit_fd >= 0)
+ safe_close(audit_fd);
+
+ initialized = true;
+ audit_fd = -ECONNRESET;
+}
+
+#else
+
+int get_audit_fd(void) {
+ return -EAFNOSUPPORT;
+}
+
+void close_audit_fd(void) {
+}
+
+#endif
diff --git a/src/core/audit-fd.h b/src/core/audit-fd.h
new file mode 100644
index 0000000..8c1e471
--- /dev/null
+++ b/src/core/audit-fd.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int get_audit_fd(void);
+void close_audit_fd(void);
diff --git a/src/core/automount.c b/src/core/automount.c
new file mode 100644
index 0000000..6a83739
--- /dev/null
+++ b/src/core/automount.c
@@ -0,0 +1,1144 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/auto_dev-ioctl.h>
+#include <linux/auto_fs4.h>
+#include <sys/epoll.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "async.h"
+#include "automount.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "dbus-automount.h"
+#include "dbus-unit.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "label.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mount.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static const UnitActiveState state_translation_table[_AUTOMOUNT_STATE_MAX] = {
+ [AUTOMOUNT_DEAD] = UNIT_INACTIVE,
+ [AUTOMOUNT_WAITING] = UNIT_ACTIVE,
+ [AUTOMOUNT_RUNNING] = UNIT_ACTIVE,
+ [AUTOMOUNT_FAILED] = UNIT_FAILED
+};
+
+struct expire_data {
+ int dev_autofs_fd;
+ int ioctl_fd;
+};
+
+static void expire_data_free(struct expire_data *data) {
+ if (!data)
+ return;
+
+ safe_close(data->dev_autofs_fd);
+ safe_close(data->ioctl_fd);
+ free(data);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct expire_data*, expire_data_free);
+
+static int open_dev_autofs(Manager *m);
+static int automount_dispatch_io(sd_event_source *s, int fd, uint32_t events, void *userdata);
+static int automount_start_expire(Automount *a);
+static void automount_stop_expire(Automount *a);
+static int automount_send_ready(Automount *a, Set *tokens, int status);
+
+static void automount_init(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ a->pipe_fd = -1;
+ a->directory_mode = 0755;
+ UNIT(a)->ignore_on_isolate = true;
+}
+
+static void unmount_autofs(Automount *a) {
+ int r;
+
+ assert(a);
+
+ if (a->pipe_fd < 0)
+ return;
+
+ a->pipe_event_source = sd_event_source_unref(a->pipe_event_source);
+ a->pipe_fd = safe_close(a->pipe_fd);
+
+ /* If we reload/reexecute things we keep the mount point around */
+ if (!IN_SET(UNIT(a)->manager->objective, MANAGER_RELOAD, MANAGER_REEXECUTE)) {
+
+ automount_send_ready(a, a->tokens, -EHOSTDOWN);
+ automount_send_ready(a, a->expire_tokens, -EHOSTDOWN);
+
+ if (a->where) {
+ r = repeat_unmount(a->where, MNT_DETACH);
+ if (r < 0)
+ log_error_errno(r, "Failed to unmount: %m");
+ }
+ }
+}
+
+static void automount_done(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+
+ assert(a);
+
+ unmount_autofs(a);
+
+ a->where = mfree(a->where);
+
+ a->tokens = set_free(a->tokens);
+ a->expire_tokens = set_free(a->expire_tokens);
+
+ a->expire_event_source = sd_event_source_unref(a->expire_event_source);
+}
+
+static int automount_add_trigger_dependencies(Automount *a) {
+ Unit *x;
+ int r;
+
+ assert(a);
+
+ r = unit_load_related_unit(UNIT(a), ".mount", &x);
+ if (r < 0)
+ return r;
+
+ return unit_add_two_dependencies(UNIT(a), UNIT_BEFORE, UNIT_TRIGGERS, x, true, UNIT_DEPENDENCY_IMPLICIT);
+}
+
+static int automount_add_mount_dependencies(Automount *a) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(a);
+
+ parent = dirname_malloc(a->where);
+ if (!parent)
+ return -ENOMEM;
+
+ return unit_require_mounts_for(UNIT(a), parent, UNIT_DEPENDENCY_IMPLICIT);
+}
+
+static int automount_add_default_dependencies(Automount *a) {
+ int r;
+
+ assert(a);
+
+ if (!UNIT(a)->default_dependencies)
+ return 0;
+
+ if (!MANAGER_IS_SYSTEM(UNIT(a)->manager))
+ return 0;
+
+ r = unit_add_two_dependencies_by_name(UNIT(a), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int automount_verify(Automount *a) {
+ _cleanup_free_ char *e = NULL;
+ int r;
+
+ assert(a);
+
+ if (UNIT(a)->load_state != UNIT_LOADED)
+ return 0;
+
+ if (path_equal(a->where, "/")) {
+ log_unit_error(UNIT(a), "Cannot have an automount unit for the root directory. Refusing.");
+ return -ENOEXEC;
+ }
+
+ r = unit_name_from_path(a->where, ".automount", &e);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(a), r, "Failed to generate unit name from path: %m");
+
+ if (!unit_has_name(UNIT(a), e)) {
+ log_unit_error(UNIT(a), "Where= setting doesn't match unit name. Refusing.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int automount_set_where(Automount *a) {
+ int r;
+
+ assert(a);
+
+ if (a->where)
+ return 0;
+
+ r = unit_name_to_path(UNIT(a)->id, &a->where);
+ if (r < 0)
+ return r;
+
+ path_simplify(a->where, false);
+ return 1;
+}
+
+static int automount_load(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+ int r;
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ /* Load a .automount file */
+ r = unit_load_fragment_and_dropin(u);
+ if (r < 0)
+ return r;
+
+ if (u->load_state == UNIT_LOADED) {
+ r = automount_set_where(a);
+ if (r < 0)
+ return r;
+
+ r = automount_add_trigger_dependencies(a);
+ if (r < 0)
+ return r;
+
+ r = automount_add_mount_dependencies(a);
+ if (r < 0)
+ return r;
+
+ r = automount_add_default_dependencies(a);
+ if (r < 0)
+ return r;
+ }
+
+ return automount_verify(a);
+}
+
+static void automount_set_state(Automount *a, AutomountState state) {
+ AutomountState old_state;
+ assert(a);
+
+ if (a->state != state)
+ bus_unit_send_pending_change_signal(UNIT(a), false);
+
+ old_state = a->state;
+ a->state = state;
+
+ if (state != AUTOMOUNT_RUNNING)
+ automount_stop_expire(a);
+
+ if (!IN_SET(state, AUTOMOUNT_WAITING, AUTOMOUNT_RUNNING))
+ unmount_autofs(a);
+
+ if (state != old_state)
+ log_unit_debug(UNIT(a), "Changed %s -> %s", automount_state_to_string(old_state), automount_state_to_string(state));
+
+ unit_notify(UNIT(a), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static int automount_coldplug(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+ int r;
+
+ assert(a);
+ assert(a->state == AUTOMOUNT_DEAD);
+
+ if (a->deserialized_state == a->state)
+ return 0;
+
+ if (IN_SET(a->deserialized_state, AUTOMOUNT_WAITING, AUTOMOUNT_RUNNING)) {
+
+ r = automount_set_where(a);
+ if (r < 0)
+ return r;
+
+ r = open_dev_autofs(u->manager);
+ if (r < 0)
+ return r;
+
+ assert(a->pipe_fd >= 0);
+
+ r = sd_event_add_io(u->manager->event, &a->pipe_event_source, a->pipe_fd, EPOLLIN, automount_dispatch_io, u);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(a->pipe_event_source, "automount-io");
+ if (a->deserialized_state == AUTOMOUNT_RUNNING) {
+ r = automount_start_expire(a);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(a), r, "Failed to start expiration timer, ignoring: %m");
+ }
+
+ automount_set_state(a, a->deserialized_state);
+ }
+
+ return 0;
+}
+
+static void automount_dump(Unit *u, FILE *f, const char *prefix) {
+ char time_string[FORMAT_TIMESPAN_MAX];
+ Automount *a = AUTOMOUNT(u);
+
+ assert(a);
+
+ fprintf(f,
+ "%sAutomount State: %s\n"
+ "%sResult: %s\n"
+ "%sWhere: %s\n"
+ "%sDirectoryMode: %04o\n"
+ "%sTimeoutIdleUSec: %s\n",
+ prefix, automount_state_to_string(a->state),
+ prefix, automount_result_to_string(a->result),
+ prefix, a->where,
+ prefix, a->directory_mode,
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, a->timeout_idle_usec, USEC_PER_SEC));
+}
+
+static void automount_enter_dead(Automount *a, AutomountResult f) {
+ assert(a);
+
+ if (a->result == AUTOMOUNT_SUCCESS)
+ a->result = f;
+
+ unit_log_result(UNIT(a), a->result == AUTOMOUNT_SUCCESS, automount_result_to_string(a->result));
+ automount_set_state(a, a->result != AUTOMOUNT_SUCCESS ? AUTOMOUNT_FAILED : AUTOMOUNT_DEAD);
+}
+
+static int open_dev_autofs(Manager *m) {
+ struct autofs_dev_ioctl param;
+
+ assert(m);
+
+ if (m->dev_autofs_fd >= 0)
+ return m->dev_autofs_fd;
+
+ (void) label_fix("/dev/autofs", 0);
+
+ m->dev_autofs_fd = open("/dev/autofs", O_CLOEXEC|O_RDONLY);
+ if (m->dev_autofs_fd < 0)
+ return log_error_errno(errno, "Failed to open /dev/autofs: %m");
+
+ init_autofs_dev_ioctl(&param);
+ if (ioctl(m->dev_autofs_fd, AUTOFS_DEV_IOCTL_VERSION, &param) < 0) {
+ m->dev_autofs_fd = safe_close(m->dev_autofs_fd);
+ return -errno;
+ }
+
+ log_debug("Autofs kernel version %i.%i", param.ver_major, param.ver_minor);
+
+ return m->dev_autofs_fd;
+}
+
+static int open_ioctl_fd(int dev_autofs_fd, const char *where, dev_t devid) {
+ struct autofs_dev_ioctl *param;
+ size_t l;
+
+ assert(dev_autofs_fd >= 0);
+ assert(where);
+
+ l = sizeof(struct autofs_dev_ioctl) + strlen(where) + 1;
+ param = alloca(l);
+
+ init_autofs_dev_ioctl(param);
+ param->size = l;
+ param->ioctlfd = -1;
+ param->openmount.devid = devid;
+ strcpy(param->path, where);
+
+ if (ioctl(dev_autofs_fd, AUTOFS_DEV_IOCTL_OPENMOUNT, param) < 0)
+ return -errno;
+
+ if (param->ioctlfd < 0)
+ return -EIO;
+
+ (void) fd_cloexec(param->ioctlfd, true);
+ return param->ioctlfd;
+}
+
+static int autofs_protocol(int dev_autofs_fd, int ioctl_fd) {
+ uint32_t major, minor;
+ struct autofs_dev_ioctl param;
+
+ assert(dev_autofs_fd >= 0);
+ assert(ioctl_fd >= 0);
+
+ init_autofs_dev_ioctl(&param);
+ param.ioctlfd = ioctl_fd;
+
+ if (ioctl(dev_autofs_fd, AUTOFS_DEV_IOCTL_PROTOVER, &param) < 0)
+ return -errno;
+
+ major = param.protover.version;
+
+ init_autofs_dev_ioctl(&param);
+ param.ioctlfd = ioctl_fd;
+
+ if (ioctl(dev_autofs_fd, AUTOFS_DEV_IOCTL_PROTOSUBVER, &param) < 0)
+ return -errno;
+
+ minor = param.protosubver.sub_version;
+
+ log_debug("Autofs protocol version %i.%i", major, minor);
+ return 0;
+}
+
+static int autofs_set_timeout(int dev_autofs_fd, int ioctl_fd, usec_t usec) {
+ struct autofs_dev_ioctl param;
+
+ assert(dev_autofs_fd >= 0);
+ assert(ioctl_fd >= 0);
+
+ init_autofs_dev_ioctl(&param);
+ param.ioctlfd = ioctl_fd;
+
+ if (usec == USEC_INFINITY)
+ param.timeout.timeout = 0;
+ else
+ /* Convert to seconds, rounding up. */
+ param.timeout.timeout = DIV_ROUND_UP(usec, USEC_PER_SEC);
+
+ if (ioctl(dev_autofs_fd, AUTOFS_DEV_IOCTL_TIMEOUT, &param) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int autofs_send_ready(int dev_autofs_fd, int ioctl_fd, uint32_t token, int status) {
+ struct autofs_dev_ioctl param;
+
+ assert(dev_autofs_fd >= 0);
+ assert(ioctl_fd >= 0);
+
+ init_autofs_dev_ioctl(&param);
+ param.ioctlfd = ioctl_fd;
+
+ if (status != 0) {
+ param.fail.token = token;
+ param.fail.status = status;
+ } else
+ param.ready.token = token;
+
+ if (ioctl(dev_autofs_fd, status ? AUTOFS_DEV_IOCTL_FAIL : AUTOFS_DEV_IOCTL_READY, &param) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int automount_send_ready(Automount *a, Set *tokens, int status) {
+ _cleanup_close_ int ioctl_fd = -1;
+ unsigned token;
+ int r;
+
+ assert(a);
+ assert(status <= 0);
+
+ if (set_isempty(tokens))
+ return 0;
+
+ ioctl_fd = open_ioctl_fd(UNIT(a)->manager->dev_autofs_fd, a->where, a->dev_id);
+ if (ioctl_fd < 0)
+ return ioctl_fd;
+
+ if (status != 0)
+ log_unit_debug_errno(UNIT(a), status, "Sending failure: %m");
+ else
+ log_unit_debug(UNIT(a), "Sending success.");
+
+ r = 0;
+
+ /* Autofs thankfully does not hand out 0 as a token */
+ while ((token = PTR_TO_UINT(set_steal_first(tokens)))) {
+ int k;
+
+ /* Autofs fun fact:
+ *
+ * if you pass a positive status code here, kernels
+ * prior to 4.12 will freeze! Yay! */
+
+ k = autofs_send_ready(UNIT(a)->manager->dev_autofs_fd,
+ ioctl_fd,
+ token,
+ status);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static void automount_trigger_notify(Unit *u, Unit *other) {
+ Automount *a = AUTOMOUNT(u);
+ int r;
+
+ assert(a);
+ assert(other);
+
+ /* Filter out invocations with bogus state */
+ if (other->load_state != UNIT_LOADED || other->type != UNIT_MOUNT)
+ return;
+
+ /* Don't propagate state changes from the mount if we are already down */
+ if (!IN_SET(a->state, AUTOMOUNT_WAITING, AUTOMOUNT_RUNNING))
+ return;
+
+ /* Propagate start limit hit state */
+ if (other->start_limit_hit) {
+ automount_enter_dead(a, AUTOMOUNT_FAILURE_MOUNT_START_LIMIT_HIT);
+ return;
+ }
+
+ /* Don't propagate anything if there's still a job queued */
+ if (other->job)
+ return;
+
+ /* The mount is successfully established */
+ if (IN_SET(MOUNT(other)->state, MOUNT_MOUNTED, MOUNT_REMOUNTING)) {
+ (void) automount_send_ready(a, a->tokens, 0);
+
+ r = automount_start_expire(a);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(a), r, "Failed to start expiration timer, ignoring: %m");
+
+ automount_set_state(a, AUTOMOUNT_RUNNING);
+ }
+
+ if (IN_SET(MOUNT(other)->state,
+ MOUNT_MOUNTING, MOUNT_MOUNTING_DONE,
+ MOUNT_MOUNTED, MOUNT_REMOUNTING,
+ MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING_SIGTERM, MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_FAILED)) {
+
+ (void) automount_send_ready(a, a->expire_tokens, -ENODEV);
+ }
+
+ if (MOUNT(other)->state == MOUNT_DEAD)
+ (void) automount_send_ready(a, a->expire_tokens, 0);
+
+ /* The mount is in some unhappy state now, let's unfreeze any waiting clients */
+ if (IN_SET(MOUNT(other)->state,
+ MOUNT_DEAD, MOUNT_UNMOUNTING,
+ MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING_SIGTERM, MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_FAILED)) {
+
+ (void) automount_send_ready(a, a->tokens, -ENODEV);
+
+ automount_set_state(a, AUTOMOUNT_WAITING);
+ }
+}
+
+static void automount_enter_waiting(Automount *a) {
+ _cleanup_close_ int ioctl_fd = -1;
+ int p[2] = { -1, -1 };
+ char name[STRLEN("systemd-") + DECIMAL_STR_MAX(pid_t) + 1];
+ char options[STRLEN("fd=,pgrp=,minproto=5,maxproto=5,direct")
+ + DECIMAL_STR_MAX(int) + DECIMAL_STR_MAX(gid_t) + 1];
+ bool mounted = false;
+ int r, dev_autofs_fd;
+ struct stat st;
+
+ assert(a);
+ assert(a->pipe_fd < 0);
+ assert(a->where);
+
+ set_clear(a->tokens);
+
+ r = unit_fail_if_noncanonical(UNIT(a), a->where);
+ if (r < 0)
+ goto fail;
+
+ (void) mkdir_p_label(a->where, 0555);
+
+ unit_warn_if_dir_nonempty(UNIT(a), a->where);
+
+ dev_autofs_fd = open_dev_autofs(UNIT(a)->manager);
+ if (dev_autofs_fd < 0) {
+ r = dev_autofs_fd;
+ goto fail;
+ }
+
+ if (pipe2(p, O_CLOEXEC) < 0) {
+ r = -errno;
+ goto fail;
+ }
+ r = fd_nonblock(p[0], true);
+ if (r < 0)
+ goto fail;
+
+ xsprintf(options, "fd=%i,pgrp="PID_FMT",minproto=5,maxproto=5,direct", p[1], getpgrp());
+ xsprintf(name, "systemd-"PID_FMT, getpid_cached());
+ if (mount(name, a->where, "autofs", 0, options) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ mounted = true;
+
+ p[1] = safe_close(p[1]);
+
+ if (stat(a->where, &st) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ ioctl_fd = open_ioctl_fd(dev_autofs_fd, a->where, st.st_dev);
+ if (ioctl_fd < 0) {
+ r = ioctl_fd;
+ goto fail;
+ }
+
+ r = autofs_protocol(dev_autofs_fd, ioctl_fd);
+ if (r < 0)
+ goto fail;
+
+ r = autofs_set_timeout(dev_autofs_fd, ioctl_fd, a->timeout_idle_usec);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_add_io(UNIT(a)->manager->event, &a->pipe_event_source, p[0], EPOLLIN, automount_dispatch_io, a);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(a->pipe_event_source, "automount-io");
+
+ a->pipe_fd = p[0];
+ a->dev_id = st.st_dev;
+
+ automount_set_state(a, AUTOMOUNT_WAITING);
+
+ return;
+
+fail:
+ log_unit_error_errno(UNIT(a), r, "Failed to initialize automounter: %m");
+
+ safe_close_pair(p);
+
+ if (mounted) {
+ r = repeat_unmount(a->where, MNT_DETACH);
+ if (r < 0)
+ log_error_errno(r, "Failed to unmount, ignoring: %m");
+ }
+
+ automount_enter_dead(a, AUTOMOUNT_FAILURE_RESOURCES);
+}
+
+static void *expire_thread(void *p) {
+ struct autofs_dev_ioctl param;
+ _cleanup_(expire_data_freep) struct expire_data *data = (struct expire_data*)p;
+ int r;
+
+ assert(data->dev_autofs_fd >= 0);
+ assert(data->ioctl_fd >= 0);
+
+ init_autofs_dev_ioctl(&param);
+ param.ioctlfd = data->ioctl_fd;
+
+ do {
+ r = ioctl(data->dev_autofs_fd, AUTOFS_DEV_IOCTL_EXPIRE, &param);
+ } while (r >= 0);
+
+ if (errno != EAGAIN)
+ log_warning_errno(errno, "Failed to expire automount, ignoring: %m");
+
+ return NULL;
+}
+
+static int automount_dispatch_expire(sd_event_source *source, usec_t usec, void *userdata) {
+ Automount *a = AUTOMOUNT(userdata);
+ _cleanup_(expire_data_freep) struct expire_data *data = NULL;
+ int r;
+
+ assert(a);
+ assert(source == a->expire_event_source);
+
+ data = new0(struct expire_data, 1);
+ if (!data)
+ return log_oom();
+
+ data->ioctl_fd = -1;
+
+ data->dev_autofs_fd = fcntl(UNIT(a)->manager->dev_autofs_fd, F_DUPFD_CLOEXEC, 3);
+ if (data->dev_autofs_fd < 0)
+ return log_unit_error_errno(UNIT(a), errno, "Failed to duplicate autofs fd: %m");
+
+ data->ioctl_fd = open_ioctl_fd(UNIT(a)->manager->dev_autofs_fd, a->where, a->dev_id);
+ if (data->ioctl_fd < 0)
+ return log_unit_error_errno(UNIT(a), data->ioctl_fd, "Couldn't open autofs ioctl fd: %m");
+
+ r = asynchronous_job(expire_thread, data);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(a), r, "Failed to start expire job: %m");
+
+ data = NULL;
+
+ return automount_start_expire(a);
+}
+
+static int automount_start_expire(Automount *a) {
+ int r;
+ usec_t timeout;
+
+ assert(a);
+
+ if (a->timeout_idle_usec == 0)
+ return 0;
+
+ timeout = now(CLOCK_MONOTONIC) + MAX(a->timeout_idle_usec/3, USEC_PER_SEC);
+
+ if (a->expire_event_source) {
+ r = sd_event_source_set_time(a->expire_event_source, timeout);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(a->expire_event_source, SD_EVENT_ONESHOT);
+ }
+
+ r = sd_event_add_time(
+ UNIT(a)->manager->event,
+ &a->expire_event_source,
+ CLOCK_MONOTONIC, timeout, 0,
+ automount_dispatch_expire, a);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(a->expire_event_source, "automount-expire");
+
+ return 0;
+}
+
+static void automount_stop_expire(Automount *a) {
+ assert(a);
+
+ if (!a->expire_event_source)
+ return;
+
+ (void) sd_event_source_set_enabled(a->expire_event_source, SD_EVENT_OFF);
+}
+
+static void automount_enter_running(Automount *a) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Unit *trigger;
+ struct stat st;
+ int r;
+
+ assert(a);
+
+ /* If the user masked our unit in the meantime, fail */
+ if (UNIT(a)->load_state != UNIT_LOADED) {
+ log_unit_error(UNIT(a), "Suppressing automount event since unit is no longer loaded.");
+ goto fail;
+ }
+
+ /* We don't take mount requests anymore if we are supposed to
+ * shut down anyway */
+ if (unit_stop_pending(UNIT(a))) {
+ log_unit_debug(UNIT(a), "Suppressing automount request since unit stop is scheduled.");
+ automount_send_ready(a, a->tokens, -EHOSTDOWN);
+ automount_send_ready(a, a->expire_tokens, -EHOSTDOWN);
+ return;
+ }
+
+ mkdir_p_label(a->where, a->directory_mode);
+
+ /* Before we do anything, let's see if somebody is playing games with us? */
+ if (lstat(a->where, &st) < 0) {
+ log_unit_warning_errno(UNIT(a), errno, "Failed to stat automount point: %m");
+ goto fail;
+ }
+
+ /* The mount unit may have been explicitly started before we got the
+ * autofs request. Ack it to unblock anything waiting on the mount point. */
+ if (!S_ISDIR(st.st_mode) || st.st_dev != a->dev_id) {
+ log_unit_info(UNIT(a), "Automount point already active?");
+ automount_send_ready(a, a->tokens, 0);
+ return;
+ }
+
+ trigger = UNIT_TRIGGER(UNIT(a));
+ if (!trigger) {
+ log_unit_error(UNIT(a), "Unit to trigger vanished.");
+ goto fail;
+ }
+
+ r = manager_add_job(UNIT(a)->manager, JOB_START, trigger, JOB_REPLACE, &error, NULL);
+ if (r < 0) {
+ log_unit_warning(UNIT(a), "Failed to queue mount startup job: %s", bus_error_message(&error, r));
+ goto fail;
+ }
+
+ automount_set_state(a, AUTOMOUNT_RUNNING);
+ return;
+
+fail:
+ automount_enter_dead(a, AUTOMOUNT_FAILURE_RESOURCES);
+}
+
+static int automount_start(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+ Unit *trigger;
+ int r;
+
+ assert(a);
+ assert(IN_SET(a->state, AUTOMOUNT_DEAD, AUTOMOUNT_FAILED));
+
+ if (path_is_mount_point(a->where, NULL, 0) > 0) {
+ log_unit_error(u, "Path %s is already a mount point, refusing start.", a->where);
+ return -EEXIST;
+ }
+
+ trigger = UNIT_TRIGGER(u);
+ if (!trigger || trigger->load_state != UNIT_LOADED) {
+ log_unit_error(u, "Refusing to start, unit to trigger not loaded.");
+ return -ENOENT;
+ }
+
+ r = unit_start_limit_test(u);
+ if (r < 0) {
+ automount_enter_dead(a, AUTOMOUNT_FAILURE_START_LIMIT_HIT);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ a->result = AUTOMOUNT_SUCCESS;
+ automount_enter_waiting(a);
+ return 1;
+}
+
+static int automount_stop(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+
+ assert(a);
+ assert(IN_SET(a->state, AUTOMOUNT_WAITING, AUTOMOUNT_RUNNING));
+
+ automount_enter_dead(a, AUTOMOUNT_SUCCESS);
+ return 1;
+}
+
+static int automount_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Automount *a = AUTOMOUNT(u);
+ Iterator i;
+ void *p;
+ int r;
+
+ assert(a);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", automount_state_to_string(a->state));
+ (void) serialize_item(f, "result", automount_result_to_string(a->result));
+ (void) serialize_item_format(f, "dev-id", "%lu", (unsigned long) a->dev_id);
+
+ SET_FOREACH(p, a->tokens, i)
+ (void) serialize_item_format(f, "token", "%u", PTR_TO_UINT(p));
+ SET_FOREACH(p, a->expire_tokens, i)
+ (void) serialize_item_format(f, "expire-token", "%u", PTR_TO_UINT(p));
+
+ r = serialize_fd(f, fds, "pipe-fd", a->pipe_fd);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int automount_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Automount *a = AUTOMOUNT(u);
+ int r;
+
+ assert(a);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ AutomountState state;
+
+ state = automount_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ a->deserialized_state = state;
+ } else if (streq(key, "result")) {
+ AutomountResult f;
+
+ f = automount_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != AUTOMOUNT_SUCCESS)
+ a->result = f;
+
+ } else if (streq(key, "dev-id")) {
+ unsigned long d;
+
+ if (safe_atolu(value, &d) < 0)
+ log_unit_debug(u, "Failed to parse dev-id value: %s", value);
+ else
+ a->dev_id = (dev_t) d;
+
+ } else if (streq(key, "token")) {
+ unsigned token;
+
+ if (safe_atou(value, &token) < 0)
+ log_unit_debug(u, "Failed to parse token value: %s", value);
+ else {
+ r = set_ensure_allocated(&a->tokens, NULL);
+ if (r < 0) {
+ log_oom();
+ return 0;
+ }
+
+ r = set_put(a->tokens, UINT_TO_PTR(token));
+ if (r < 0)
+ log_unit_error_errno(u, r, "Failed to add token to set: %m");
+ }
+ } else if (streq(key, "expire-token")) {
+ unsigned token;
+
+ if (safe_atou(value, &token) < 0)
+ log_unit_debug(u, "Failed to parse token value: %s", value);
+ else {
+ r = set_ensure_allocated(&a->expire_tokens, NULL);
+ if (r < 0) {
+ log_oom();
+ return 0;
+ }
+
+ r = set_put(a->expire_tokens, UINT_TO_PTR(token));
+ if (r < 0)
+ log_unit_error_errno(u, r, "Failed to add expire token to set: %m");
+ }
+ } else if (streq(key, "pipe-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse pipe-fd value: %s", value);
+ else {
+ safe_close(a->pipe_fd);
+ a->pipe_fd = fdset_remove(fds, fd);
+ }
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+static UnitActiveState automount_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[AUTOMOUNT(u)->state];
+}
+
+static const char *automount_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return automount_state_to_string(AUTOMOUNT(u)->state);
+}
+
+static bool automount_may_gc(Unit *u) {
+ Unit *t;
+
+ assert(u);
+
+ t = UNIT_TRIGGER(u);
+ if (!t)
+ return true;
+
+ return UNIT_VTABLE(t)->may_gc(t);
+}
+
+static int automount_dispatch_io(sd_event_source *s, int fd, uint32_t events, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ union autofs_v5_packet_union packet;
+ Automount *a = AUTOMOUNT(userdata);
+ Unit *trigger;
+ int r;
+
+ assert(a);
+ assert(fd == a->pipe_fd);
+
+ if (events != EPOLLIN) {
+ log_unit_error(UNIT(a), "Got invalid poll event %"PRIu32" on pipe (fd=%d)", events, fd);
+ goto fail;
+ }
+
+ r = loop_read_exact(a->pipe_fd, &packet, sizeof(packet), true);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(a), r, "Invalid read from pipe: %m");
+ goto fail;
+ }
+
+ switch (packet.hdr.type) {
+
+ case autofs_ptype_missing_direct:
+
+ if (packet.v5_packet.pid > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ get_process_comm(packet.v5_packet.pid, &p);
+ log_unit_info(UNIT(a), "Got automount request for %s, triggered by %"PRIu32" (%s)", a->where, packet.v5_packet.pid, strna(p));
+ } else
+ log_unit_debug(UNIT(a), "Got direct mount request on %s", a->where);
+
+ r = set_ensure_allocated(&a->tokens, NULL);
+ if (r < 0) {
+ log_unit_error(UNIT(a), "Failed to allocate token set.");
+ goto fail;
+ }
+
+ r = set_put(a->tokens, UINT_TO_PTR(packet.v5_packet.wait_queue_token));
+ if (r < 0) {
+ log_unit_error_errno(UNIT(a), r, "Failed to remember token: %m");
+ goto fail;
+ }
+
+ automount_enter_running(a);
+ break;
+
+ case autofs_ptype_expire_direct:
+ log_unit_debug(UNIT(a), "Got direct umount request on %s", a->where);
+
+ automount_stop_expire(a);
+
+ r = set_ensure_allocated(&a->expire_tokens, NULL);
+ if (r < 0) {
+ log_unit_error(UNIT(a), "Failed to allocate token set.");
+ goto fail;
+ }
+
+ r = set_put(a->expire_tokens, UINT_TO_PTR(packet.v5_packet.wait_queue_token));
+ if (r < 0) {
+ log_unit_error_errno(UNIT(a), r, "Failed to remember token: %m");
+ goto fail;
+ }
+
+ trigger = UNIT_TRIGGER(UNIT(a));
+ if (!trigger) {
+ log_unit_error(UNIT(a), "Unit to trigger vanished.");
+ goto fail;
+ }
+
+ r = manager_add_job(UNIT(a)->manager, JOB_STOP, trigger, JOB_REPLACE, &error, NULL);
+ if (r < 0) {
+ log_unit_warning(UNIT(a), "Failed to queue umount startup job: %s", bus_error_message(&error, r));
+ goto fail;
+ }
+ break;
+
+ default:
+ log_unit_error(UNIT(a), "Received unknown automount request %i", packet.hdr.type);
+ break;
+ }
+
+ return 0;
+
+fail:
+ automount_enter_dead(a, AUTOMOUNT_FAILURE_RESOURCES);
+ return 0;
+}
+
+static void automount_shutdown(Manager *m) {
+ assert(m);
+
+ m->dev_autofs_fd = safe_close(m->dev_autofs_fd);
+}
+
+static void automount_reset_failed(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+
+ assert(a);
+
+ if (a->state == AUTOMOUNT_FAILED)
+ automount_set_state(a, AUTOMOUNT_DEAD);
+
+ a->result = AUTOMOUNT_SUCCESS;
+}
+
+static bool automount_supported(void) {
+ static int supported = -1;
+
+ if (supported < 0)
+ supported = access("/dev/autofs", F_OK) >= 0;
+
+ return supported;
+}
+
+static const char* const automount_result_table[_AUTOMOUNT_RESULT_MAX] = {
+ [AUTOMOUNT_SUCCESS] = "success",
+ [AUTOMOUNT_FAILURE_RESOURCES] = "resources",
+ [AUTOMOUNT_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+ [AUTOMOUNT_FAILURE_MOUNT_START_LIMIT_HIT] = "mount-start-limit-hit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(automount_result, AutomountResult);
+
+const UnitVTable automount_vtable = {
+ .object_size = sizeof(Automount),
+
+ .sections =
+ "Unit\0"
+ "Automount\0"
+ "Install\0",
+
+ .init = automount_init,
+ .load = automount_load,
+ .done = automount_done,
+
+ .coldplug = automount_coldplug,
+
+ .dump = automount_dump,
+
+ .start = automount_start,
+ .stop = automount_stop,
+
+ .serialize = automount_serialize,
+ .deserialize_item = automount_deserialize_item,
+
+ .active_state = automount_active_state,
+ .sub_state_to_string = automount_sub_state_to_string,
+
+ .may_gc = automount_may_gc,
+
+ .trigger_notify = automount_trigger_notify,
+
+ .reset_failed = automount_reset_failed,
+
+ .bus_vtable = bus_automount_vtable,
+ .bus_set_property = bus_automount_set_property,
+
+ .can_transient = true,
+
+ .shutdown = automount_shutdown,
+ .supported = automount_supported,
+
+ .status_message_formats = {
+ .finished_start_job = {
+ [JOB_DONE] = "Set up automount %s.",
+ [JOB_FAILED] = "Failed to set up automount %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Unset automount %s.",
+ [JOB_FAILED] = "Failed to unset automount %s.",
+ },
+ },
+};
diff --git a/src/core/automount.h b/src/core/automount.h
new file mode 100644
index 0000000..21dd1c0
--- /dev/null
+++ b/src/core/automount.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Automount Automount;
+
+#include "unit.h"
+
+typedef enum AutomountResult {
+ AUTOMOUNT_SUCCESS,
+ AUTOMOUNT_FAILURE_RESOURCES,
+ AUTOMOUNT_FAILURE_START_LIMIT_HIT,
+ AUTOMOUNT_FAILURE_MOUNT_START_LIMIT_HIT,
+ _AUTOMOUNT_RESULT_MAX,
+ _AUTOMOUNT_RESULT_INVALID = -1
+} AutomountResult;
+
+struct Automount {
+ Unit meta;
+
+ AutomountState state, deserialized_state;
+
+ char *where;
+ usec_t timeout_idle_usec;
+
+ int pipe_fd;
+ sd_event_source *pipe_event_source;
+ mode_t directory_mode;
+ dev_t dev_id;
+
+ Set *tokens;
+ Set *expire_tokens;
+
+ sd_event_source *expire_event_source;
+
+ AutomountResult result;
+};
+
+extern const UnitVTable automount_vtable;
+
+const char* automount_result_to_string(AutomountResult i) _const_;
+AutomountResult automount_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(AUTOMOUNT, Automount);
diff --git a/src/core/bpf-devices.c b/src/core/bpf-devices.c
new file mode 100644
index 0000000..81e91fc
--- /dev/null
+++ b/src/core/bpf-devices.c
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#include <linux/libbpf.h>
+
+#include "bpf-devices.h"
+#include "bpf-program.h"
+
+#define PASS_JUMP_OFF 4096
+
+static int bpf_access_type(const char *acc) {
+ int r = 0;
+
+ assert(acc);
+
+ for (; *acc; acc++)
+ switch(*acc) {
+ case 'r':
+ r |= BPF_DEVCG_ACC_READ;
+ break;
+ case 'w':
+ r |= BPF_DEVCG_ACC_WRITE;
+ break;
+ case 'm':
+ r |= BPF_DEVCG_ACC_MKNOD;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+int cgroup_bpf_whitelist_device(BPFProgram *prog, int type, int major, int minor, const char *acc) {
+ struct bpf_insn insn[] = {
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 6), /* compare device type */
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ insn[2].imm = access;
+
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+int cgroup_bpf_whitelist_major(BPFProgram *prog, int type, int major, const char *acc) {
+ struct bpf_insn insn[] = {
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ insn[2].imm = access;
+
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+int cgroup_bpf_whitelist_class(BPFProgram *prog, int type, const char *acc) {
+ struct bpf_insn insn[] = {
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 1), /* compare access type */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ insn[2].imm = access;
+
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist) {
+ struct bpf_insn pre_insn[] = {
+ /* load device type to r2 */
+ BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, access_type)),
+
+ /* load access type to r3 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, access_type)),
+ BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+
+ /* load major number to r4 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, major)),
+
+ /* load minor number to r5 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, minor)),
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+ int r;
+
+ assert(ret);
+
+ if (policy == CGROUP_AUTO && !whitelist)
+ return 0;
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &prog);
+ if (r < 0)
+ return log_error_errno(r, "Loading device control BPF program failed: %m");
+
+ if (policy == CGROUP_CLOSED || whitelist) {
+ r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+ }
+
+ *ret = TAKE_PTR(prog);
+
+ return 0;
+}
+
+int cgroup_apply_device_bpf(Unit *u, BPFProgram *prog, CGroupDevicePolicy policy, bool whitelist) {
+ struct bpf_insn post_insn[] = {
+ /* return DENY */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ };
+
+ struct bpf_insn exit_insn[] = {
+ /* else return ALLOW */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ if (!prog) {
+ /* Remove existing program. */
+ u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
+ return 0;
+ }
+
+ if (policy != CGROUP_STRICT || whitelist) {
+ size_t off;
+
+ r = bpf_program_add_instructions(prog, post_insn, ELEMENTSOF(post_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ /* Fixup PASS_JUMP_OFF jump offsets. */
+ for (off = 0; off < prog->n_instructions; off++) {
+ struct bpf_insn *ins = &prog->instructions[off];
+
+ if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
+ ins->off = prog->n_instructions - off - 1;
+ }
+ } else
+ /* Explicitly forbid everything. */
+ exit_insn[0].imm = 0;
+
+ r = bpf_program_add_instructions(prog, exit_insn, ELEMENTSOF(exit_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine cgroup path: %m");
+
+ r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, path, BPF_F_ALLOW_MULTI);
+ if (r < 0)
+ return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m", path);
+
+ /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
+ u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
+
+ /* Remember that this BPF program is installed now. */
+ u->bpf_device_control_installed = bpf_program_ref(prog);
+
+ return 0;
+}
+
+int bpf_devices_supported(void) {
+ struct bpf_insn trivial[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
+ static int supported = -1;
+ int r;
+
+ /* Checks whether BPF device controller is supported. For this, we check five things:
+ *
+ * a) whether we are privileged
+ * b) whether the unified hierarchy is being used
+ * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
+ */
+
+ if (supported >= 0)
+ return supported;
+
+ if (geteuid() != 0) {
+ log_debug("Not enough privileges, BPF device control is not supported.");
+ return supported = 0;
+ }
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
+ if (r == 0) {
+ log_debug("Not running with unified cgroups, BPF device control is not supported.");
+ return supported = 0;
+ }
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &program);
+ if (r < 0) {
+ log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
+ if (r < 0) {
+ log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ r = bpf_program_load_kernel(program, NULL, 0);
+ if (r < 0) {
+ log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ return supported = 1;
+}
diff --git a/src/core/bpf-devices.h b/src/core/bpf-devices.h
new file mode 100644
index 0000000..8d3de3b
--- /dev/null
+++ b/src/core/bpf-devices.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+
+#include "unit.h"
+
+struct BPFProgram;
+
+int bpf_devices_supported(void);
+
+int cgroup_bpf_whitelist_device(BPFProgram *p, int type, int major, int minor, const char *acc);
+int cgroup_bpf_whitelist_major(BPFProgram *p, int type, int major, const char *acc);
+int cgroup_bpf_whitelist_class(BPFProgram *prog, int type, const char *acc);
+
+int cgroup_init_device_bpf(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist);
+int cgroup_apply_device_bpf(Unit *u, BPFProgram *p, CGroupDevicePolicy policy, bool whitelist);
diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c
new file mode 100644
index 0000000..b9a611f
--- /dev/null
+++ b/src/core/bpf-firewall.c
@@ -0,0 +1,767 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/libbpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "ip-address-access.h"
+#include "missing_syscall.h"
+#include "unit.h"
+
+enum {
+ MAP_KEY_PACKETS,
+ MAP_KEY_BYTES,
+};
+
+enum {
+ ACCESS_ALLOWED = 1,
+ ACCESS_DENIED = 2,
+};
+
+/* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
+
+static int add_lookup_instructions(
+ BPFProgram *p,
+ int map_fd,
+ int protocol,
+ bool is_ingress,
+ int verdict) {
+
+ int r, addr_offset, addr_size;
+
+ assert(p);
+ assert(map_fd >= 0);
+
+ switch (protocol) {
+
+ case ETH_P_IP:
+ addr_size = sizeof(uint32_t);
+ addr_offset = is_ingress ?
+ offsetof(struct iphdr, saddr) :
+ offsetof(struct iphdr, daddr);
+ break;
+
+ case ETH_P_IPV6:
+ addr_size = 4 * sizeof(uint32_t);
+ addr_offset = is_ingress ?
+ offsetof(struct ip6_hdr, ip6_src.s6_addr) :
+ offsetof(struct ip6_hdr, ip6_dst.s6_addr);
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ do {
+ /* Compare IPv4 with one word instruction (32bit) */
+ struct bpf_insn insn[] = {
+ /* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, htobe16(protocol), 0),
+
+ /*
+ * Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
+ *
+ * R1: Pointer to the skb
+ * R2: Data offset
+ * R3: Destination buffer on the stack (r10 - 4)
+ * R4: Number of bytes to read (4)
+ */
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV32_IMM(BPF_REG_2, addr_offset),
+
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -addr_size),
+
+ BPF_MOV32_IMM(BPF_REG_4, addr_size),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
+
+ /*
+ * Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
+ * LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
+ * has to be set to the maximum possible value.
+ *
+ * On success, the looked up value is stored in R0. For this application, the actual
+ * value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
+ * matching value.
+ */
+
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -addr_size - sizeof(uint32_t)),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, addr_size * 8),
+
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
+ };
+
+ /* Jump label fixup */
+ insn[0].off = ELEMENTSOF(insn) - 1;
+
+ r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ return r;
+
+ } while (false);
+
+ return 0;
+}
+
+static int bpf_firewall_compile_bpf(
+ Unit *u,
+ bool is_ingress,
+ BPFProgram **ret) {
+
+ struct bpf_insn pre_insn[] = {
+ /*
+ * When the eBPF program is entered, R1 contains the address of the skb.
+ * However, R1-R5 are scratch registers that are not preserved when calling
+ * into kernel functions, so we need to save anything that's supposed to
+ * stay around to R6-R9. Save the skb to R6.
+ */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /*
+ * Although we cannot access the skb data directly from eBPF programs used in this
+ * scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
+ * Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
+ * for later use.
+ */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct __sk_buff, protocol)),
+
+ /*
+ * R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
+ * through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
+ */
+ BPF_MOV32_IMM(BPF_REG_8, 0),
+ };
+
+ /*
+ * The access checkers compiled for the configured allowance and denial lists
+ * write to R8 at runtime. The following code prepares for an early exit that
+ * skip the accounting if the packet is denied.
+ *
+ * R0 = 1
+ * if (R8 == ACCESS_DENIED)
+ * R0 = 0
+ *
+ * This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
+ * is allowed to pass.
+ */
+ struct bpf_insn post_insn[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+ int accounting_map_fd, r;
+ bool access_enabled;
+
+ assert(u);
+ assert(ret);
+
+ accounting_map_fd = is_ingress ?
+ u->ip_accounting_ingress_map_fd :
+ u->ip_accounting_egress_map_fd;
+
+ access_enabled =
+ u->ipv4_allow_map_fd >= 0 ||
+ u->ipv6_allow_map_fd >= 0 ||
+ u->ipv4_deny_map_fd >= 0 ||
+ u->ipv6_deny_map_fd >= 0;
+
+ if (accounting_map_fd < 0 && !access_enabled) {
+ *ret = NULL;
+ return 0;
+ }
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
+ if (r < 0)
+ return r;
+
+ r = bpf_program_add_instructions(p, pre_insn, ELEMENTSOF(pre_insn));
+ if (r < 0)
+ return r;
+
+ if (access_enabled) {
+ /*
+ * The simple rule this function translates into eBPF instructions is:
+ *
+ * - Access will be granted when an address matches an entry in @list_allow
+ * - Otherwise, access will be denied when an address matches an entry in @list_deny
+ * - Otherwise, access will be granted
+ */
+
+ if (u->ipv4_deny_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
+ if (r < 0)
+ return r;
+ }
+
+ if (u->ipv6_deny_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
+ if (r < 0)
+ return r;
+ }
+
+ if (u->ipv4_allow_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
+ if (r < 0)
+ return r;
+ }
+
+ if (u->ipv6_allow_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ r = bpf_program_add_instructions(p, post_insn, ELEMENTSOF(post_insn));
+ if (r < 0)
+ return r;
+
+ if (accounting_map_fd >= 0) {
+ struct bpf_insn insn[] = {
+ /*
+ * If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
+ * The jump label will be fixed up later.
+ */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
+
+ /* Count packets */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd), /* load map fd to r1 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ /* Count bytes */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ /* Allow the packet to pass */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ };
+
+ /* Jump label fixup */
+ insn[0].off = ELEMENTSOF(insn) - 1;
+
+ r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ return r;
+ }
+
+ do {
+ /*
+ * Exit from the eBPF program, R0 contains the verdict.
+ * 0 means the packet is denied, 1 means the packet may pass.
+ */
+ struct bpf_insn insn[] = {
+ BPF_EXIT_INSN()
+ };
+
+ r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ return r;
+ } while (false);
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static int bpf_firewall_count_access_items(IPAddressAccessItem *list, size_t *n_ipv4, size_t *n_ipv6) {
+ IPAddressAccessItem *a;
+
+ assert(n_ipv4);
+ assert(n_ipv6);
+
+ LIST_FOREACH(items, a, list) {
+ switch (a->family) {
+
+ case AF_INET:
+ (*n_ipv4)++;
+ break;
+
+ case AF_INET6:
+ (*n_ipv6)++;
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+ }
+
+ return 0;
+}
+
+static int bpf_firewall_add_access_items(
+ IPAddressAccessItem *list,
+ int ipv4_map_fd,
+ int ipv6_map_fd,
+ int verdict) {
+
+ struct bpf_lpm_trie_key *key_ipv4, *key_ipv6;
+ uint64_t value = verdict;
+ IPAddressAccessItem *a;
+ int r;
+
+ key_ipv4 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t));
+ key_ipv6 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t) * 4);
+
+ LIST_FOREACH(items, a, list) {
+ switch (a->family) {
+
+ case AF_INET:
+ key_ipv4->prefixlen = a->prefixlen;
+ memcpy(key_ipv4->data, &a->address, sizeof(uint32_t));
+
+ r = bpf_map_update_element(ipv4_map_fd, key_ipv4, &value);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case AF_INET6:
+ key_ipv6->prefixlen = a->prefixlen;
+ memcpy(key_ipv6->data, &a->address, 4 * sizeof(uint32_t));
+
+ r = bpf_map_update_element(ipv6_map_fd, key_ipv6, &value);
+ if (r < 0)
+ return r;
+
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+ }
+
+ return 0;
+}
+
+static int bpf_firewall_prepare_access_maps(
+ Unit *u,
+ int verdict,
+ int *ret_ipv4_map_fd,
+ int *ret_ipv6_map_fd) {
+
+ _cleanup_close_ int ipv4_map_fd = -1, ipv6_map_fd = -1;
+ size_t n_ipv4 = 0, n_ipv6 = 0;
+ Unit *p;
+ int r;
+
+ assert(ret_ipv4_map_fd);
+ assert(ret_ipv6_map_fd);
+
+ for (p = u; p; p = UNIT_DEREF(p->slice)) {
+ CGroupContext *cc;
+
+ cc = unit_get_cgroup_context(p);
+ if (!cc)
+ continue;
+
+ bpf_firewall_count_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny, &n_ipv4, &n_ipv6);
+ }
+
+ if (n_ipv4 > 0) {
+ ipv4_map_fd = bpf_map_new(
+ BPF_MAP_TYPE_LPM_TRIE,
+ offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t),
+ sizeof(uint64_t),
+ n_ipv4,
+ BPF_F_NO_PREALLOC);
+ if (ipv4_map_fd < 0)
+ return ipv4_map_fd;
+ }
+
+ if (n_ipv6 > 0) {
+ ipv6_map_fd = bpf_map_new(
+ BPF_MAP_TYPE_LPM_TRIE,
+ offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t)*4,
+ sizeof(uint64_t),
+ n_ipv6,
+ BPF_F_NO_PREALLOC);
+ if (ipv6_map_fd < 0)
+ return ipv6_map_fd;
+ }
+
+ for (p = u; p; p = UNIT_DEREF(p->slice)) {
+ CGroupContext *cc;
+
+ cc = unit_get_cgroup_context(p);
+ if (!cc)
+ continue;
+
+ r = bpf_firewall_add_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny,
+ ipv4_map_fd, ipv6_map_fd, verdict);
+ if (r < 0)
+ return r;
+ }
+
+ *ret_ipv4_map_fd = ipv4_map_fd;
+ *ret_ipv6_map_fd = ipv6_map_fd;
+
+ ipv4_map_fd = ipv6_map_fd = -1;
+ return 0;
+}
+
+static int bpf_firewall_prepare_accounting_maps(Unit *u, bool enabled, int *fd_ingress, int *fd_egress) {
+ int r;
+
+ assert(u);
+ assert(fd_ingress);
+ assert(fd_egress);
+
+ if (enabled) {
+ if (*fd_ingress < 0) {
+ r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
+ if (r < 0)
+ return r;
+
+ *fd_ingress = r;
+ }
+
+ if (*fd_egress < 0) {
+
+ r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
+ if (r < 0)
+ return r;
+
+ *fd_egress = r;
+ }
+
+ } else {
+ *fd_ingress = safe_close(*fd_ingress);
+ *fd_egress = safe_close(*fd_egress);
+
+ zero(u->ip_accounting_extra);
+ }
+
+ return 0;
+}
+
+int bpf_firewall_compile(Unit *u) {
+ CGroupContext *cc;
+ int r, supported;
+
+ assert(u);
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return -EINVAL;
+
+ supported = bpf_firewall_supported();
+ if (supported < 0)
+ return supported;
+ if (supported == BPF_FIREWALL_UNSUPPORTED) {
+ log_unit_debug(u, "BPF firewalling not supported on this manager, proceeding without.");
+ return -EOPNOTSUPP;
+ }
+ if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE) {
+ /* If BPF_F_ALLOW_MULTI is not supported we don't support any BPF magic on inner nodes (i.e. on slice
+ * units), since that would mean leaf nodes couldn't do any BPF anymore at all. Under the assumption
+ * that BPF is more interesting on leaf nodes we hence avoid it on inner nodes in that case. This is
+ * consistent with old systemd behaviour from before v238, where BPF wasn't supported in inner nodes at
+ * all, either. */
+ log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
+ return -EOPNOTSUPP;
+ }
+
+ /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
+ * but we reuse the the accounting maps. That way the firewall in effect always maps to the actual
+ * configuration, but we don't flush out the accounting unnecessarily */
+
+ u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
+ u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
+
+ u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
+ u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
+
+ u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
+ u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+
+ if (u->type != UNIT_SLICE) {
+ /* In inner nodes we only do accounting, we do not actually bother with access control. However, leaf
+ * nodes will incorporate all IP access rules set on all their parent nodes. This has the benefit that
+ * they can optionally cancel out system-wide rules. Since inner nodes can't contain processes this
+ * means that all configure IP access rules *will* take effect on processes, even though we never
+ * compile them for inner nodes. */
+
+ r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Preparation of eBPF allow maps failed: %m");
+
+ r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Preparation of eBPF deny maps failed: %m");
+ }
+
+ r = bpf_firewall_prepare_accounting_maps(u, cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Preparation of eBPF accounting maps failed: %m");
+
+ r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Compilation for ingress BPF program failed: %m");
+
+ r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Compilation for egress BPF program failed: %m");
+
+ return 0;
+}
+
+int bpf_firewall_install(Unit *u) {
+ _cleanup_free_ char *path = NULL;
+ CGroupContext *cc;
+ int r, supported;
+ uint32_t flags;
+
+ assert(u);
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return -EINVAL;
+ if (!u->cgroup_path)
+ return -EINVAL;
+ if (!u->cgroup_realized)
+ return -EINVAL;
+
+ supported = bpf_firewall_supported();
+ if (supported < 0)
+ return supported;
+ if (supported == BPF_FIREWALL_UNSUPPORTED) {
+ log_unit_debug(u, "BPF firewalling not supported on this manager, proceeding without.");
+ return -EOPNOTSUPP;
+ }
+ if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE) {
+ log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
+ return -EOPNOTSUPP;
+ }
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to determine cgroup path: %m");
+
+ flags = (supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
+ (u->type == UNIT_SLICE || unit_cgroup_delegate(u))) ? BPF_F_ALLOW_MULTI : 0;
+
+ /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program, to
+ * minimize the time window when we don't account for IP traffic. */
+ u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed);
+ u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
+
+ if (u->ip_bpf_egress) {
+ r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path);
+
+ /* Remember that this BPF program is installed now. */
+ u->ip_bpf_egress_installed = bpf_program_ref(u->ip_bpf_egress);
+ }
+
+ if (u->ip_bpf_ingress) {
+ r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
+
+ u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress);
+ }
+
+ return 0;
+}
+
+int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets) {
+ uint64_t key, packets;
+ int r;
+
+ if (map_fd < 0)
+ return -EBADF;
+
+ if (ret_packets) {
+ key = MAP_KEY_PACKETS;
+ r = bpf_map_lookup_element(map_fd, &key, &packets);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_bytes) {
+ key = MAP_KEY_BYTES;
+ r = bpf_map_lookup_element(map_fd, &key, ret_bytes);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_packets)
+ *ret_packets = packets;
+
+ return 0;
+}
+
+int bpf_firewall_reset_accounting(int map_fd) {
+ uint64_t key, value = 0;
+ int r;
+
+ if (map_fd < 0)
+ return -EBADF;
+
+ key = MAP_KEY_PACKETS;
+ r = bpf_map_update_element(map_fd, &key, &value);
+ if (r < 0)
+ return r;
+
+ key = MAP_KEY_BYTES;
+ return bpf_map_update_element(map_fd, &key, &value);
+}
+
+int bpf_firewall_supported(void) {
+ struct bpf_insn trivial[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
+ static int supported = -1;
+ union bpf_attr attr;
+ int fd, r;
+
+ /* Checks whether BPF firewalling is supported. For this, we check five things:
+ *
+ * a) whether we are privileged
+ * b) whether the unified hierarchy is being used
+ * c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require
+ * d) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_SKB programs, which we require
+ * e) the BPF implementation in the kernel supports the BPF_PROG_DETACH call, which we require
+ */
+
+ if (supported >= 0)
+ return supported;
+
+ if (geteuid() != 0) {
+ log_debug("Not enough privileges, BPF firewalling is not supported.");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
+ if (r == 0) {
+ log_debug("Not running with unified cgroups, BPF firewalling is not supported.");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ fd = bpf_map_new(BPF_MAP_TYPE_LPM_TRIE,
+ offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint64_t),
+ sizeof(uint64_t),
+ 1,
+ BPF_F_NO_PREALLOC);
+ if (fd < 0) {
+ log_debug_errno(fd, "Can't allocate BPF LPM TRIE map, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ safe_close(fd);
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &program);
+ if (r < 0) {
+ log_debug_errno(r, "Can't allocate CGROUP SKB BPF program, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
+ if (r < 0) {
+ log_debug_errno(r, "Can't add trivial instructions to CGROUP SKB BPF program, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ r = bpf_program_load_kernel(program, NULL, 0);
+ if (r < 0) {
+ log_debug_errno(r, "Can't load kernel CGROUP SKB BPF program, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ /* Unfortunately the kernel allows us to create BPF_PROG_TYPE_CGROUP_SKB programs even when CONFIG_CGROUP_BPF
+ * is turned off at kernel compilation time. This sucks of course: why does it allow us to create a cgroup BPF
+ * program if we can't do a thing with it later?
+ *
+ * We detect this case by issuing the BPF_PROG_DETACH bpf() call with invalid file descriptors: if
+ * CONFIG_CGROUP_BPF is turned off, then the call will fail early with EINVAL. If it is turned on the
+ * parameters are validated however, and that'll fail with EBADF then. */
+
+ attr = (union bpf_attr) {
+ .attach_type = BPF_CGROUP_INET_EGRESS,
+ .target_fd = -1,
+ .attach_bpf_fd = -1,
+ };
+
+ if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0) {
+ if (errno != EBADF) {
+ log_debug_errno(errno, "Didn't get EBADF from BPF_PROG_DETACH, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ /* YAY! */
+ } else {
+ log_debug("Wut? Kernel accepted our invalid BPF_PROG_DETACH call? Something is weird, assuming BPF firewalling is broken and hence not supported.");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ /* So now we know that the BPF program is generally available, let's see if BPF_F_ALLOW_MULTI is also supported
+ * (which was added in kernel 4.15). We use a similar logic as before, but this time we use the BPF_PROG_ATTACH
+ * bpf() call and the BPF_F_ALLOW_MULTI flags value. Since the flags are checked early in the system call we'll
+ * get EINVAL if it's not supported, and EBADF as before if it is available. */
+
+ attr = (union bpf_attr) {
+ .attach_type = BPF_CGROUP_INET_EGRESS,
+ .target_fd = -1,
+ .attach_bpf_fd = -1,
+ .attach_flags = BPF_F_ALLOW_MULTI,
+ };
+
+ if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0) {
+ if (errno == EBADF) {
+ log_debug_errno(errno, "Got EBADF when using BPF_F_ALLOW_MULTI, which indicates it is supported. Yay!");
+ return supported = BPF_FIREWALL_SUPPORTED_WITH_MULTI;
+ }
+
+ if (errno == EINVAL)
+ log_debug_errno(errno, "Got EINVAL error when using BPF_F_ALLOW_MULTI, which indicates it's not supported.");
+ else
+ log_debug_errno(errno, "Got unexpected error when using BPF_F_ALLOW_MULTI, assuming it's not supported: %m");
+
+ return supported = BPF_FIREWALL_SUPPORTED;
+ } else {
+ log_debug("Wut? Kernel accepted our invalid BPF_PROG_ATTACH+BPF_F_ALLOW_MULTI call? Something is weird, assuming BPF firewalling is broken and hence not supported.");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+}
diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h
new file mode 100644
index 0000000..7d38483
--- /dev/null
+++ b/src/core/bpf-firewall.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+
+#include "unit.h"
+
+enum {
+ BPF_FIREWALL_UNSUPPORTED = 0,
+ BPF_FIREWALL_SUPPORTED = 1,
+ BPF_FIREWALL_SUPPORTED_WITH_MULTI = 2,
+};
+
+int bpf_firewall_supported(void);
+
+int bpf_firewall_compile(Unit *u);
+int bpf_firewall_install(Unit *u);
+
+int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
+int bpf_firewall_reset_accounting(int map_fd);
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
new file mode 100644
index 0000000..18d470b
--- /dev/null
+++ b/src/core/cgroup.c
@@ -0,0 +1,3074 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <fnmatch.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "bpf-firewall.h"
+#include "btrfs-util.h"
+#include "bpf-devices.h"
+#include "bus-error.h"
+#include "cgroup-util.h"
+#include "cgroup.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "special.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "virt.h"
+
+#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
+
+/* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access
+ * problems we downgrade to LOG_DEBUG. This is supposed to be nice to container managers and kernels which want to mask
+ * out specific attributes from us. */
+#define LOG_LEVEL_CGROUP_WRITE(r) (IN_SET(abs(r), ENOENT, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING)
+
+bool manager_owns_host_root_cgroup(Manager *m) {
+ assert(m);
+
+ /* Returns true if we are managing the root cgroup. Note that it isn't sufficient to just check whether the
+ * group root path equals "/" since that will also be the case if CLONE_NEWCGROUP is in the mix. Since there's
+ * appears to be no nice way to detect whether we are in a CLONE_NEWCGROUP namespace we instead just check if
+ * we run in any kind of container virtualization. */
+
+ if (MANAGER_IS_USER(m))
+ return false;
+
+ if (detect_container() > 0)
+ return false;
+
+ return empty_or_root(m->cgroup_root);
+}
+
+bool unit_has_host_root_cgroup(Unit *u) {
+ assert(u);
+
+ /* Returns whether this unit manages the root cgroup. This will return true if this unit is the root slice and
+ * the manager manages the root cgroup. */
+
+ if (!manager_owns_host_root_cgroup(u->manager))
+ return false;
+
+ return unit_has_name(u, SPECIAL_ROOT_SLICE);
+}
+
+static int set_attribute_and_warn(Unit *u, const char *controller, const char *attribute, const char *value) {
+ int r;
+
+ r = cg_set_attribute(controller, u->cgroup_path, attribute, value);
+ if (r < 0)
+ log_unit_full(u, LOG_LEVEL_CGROUP_WRITE(r), r, "Failed to set '%s' attribute on '%s' to '%.*s': %m",
+ strna(attribute), isempty(u->cgroup_path) ? "/" : u->cgroup_path, (int) strcspn(value, NEWLINE), value);
+
+ return r;
+}
+
+static void cgroup_compat_warn(void) {
+ static bool cgroup_compat_warned = false;
+
+ if (cgroup_compat_warned)
+ return;
+
+ log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. "
+ "See cgroup-compat debug messages for details.");
+
+ cgroup_compat_warned = true;
+}
+
+#define log_cgroup_compat(unit, fmt, ...) do { \
+ cgroup_compat_warn(); \
+ log_unit_debug(unit, "cgroup-compat: " fmt, ##__VA_ARGS__); \
+ } while (false)
+
+void cgroup_context_init(CGroupContext *c) {
+ assert(c);
+
+ /* Initialize everything to the kernel defaults. */
+
+ *c = (CGroupContext) {
+ .cpu_weight = CGROUP_WEIGHT_INVALID,
+ .startup_cpu_weight = CGROUP_WEIGHT_INVALID,
+ .cpu_quota_per_sec_usec = USEC_INFINITY,
+
+ .cpu_shares = CGROUP_CPU_SHARES_INVALID,
+ .startup_cpu_shares = CGROUP_CPU_SHARES_INVALID,
+
+ .memory_high = CGROUP_LIMIT_MAX,
+ .memory_max = CGROUP_LIMIT_MAX,
+ .memory_swap_max = CGROUP_LIMIT_MAX,
+
+ .memory_limit = CGROUP_LIMIT_MAX,
+
+ .io_weight = CGROUP_WEIGHT_INVALID,
+ .startup_io_weight = CGROUP_WEIGHT_INVALID,
+
+ .blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
+ .startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
+
+ .tasks_max = CGROUP_LIMIT_MAX,
+ };
+}
+
+void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
+ assert(c);
+ assert(a);
+
+ LIST_REMOVE(device_allow, c->device_allow, a);
+ free(a->path);
+ free(a);
+}
+
+void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w) {
+ assert(c);
+ assert(w);
+
+ LIST_REMOVE(device_weights, c->io_device_weights, w);
+ free(w->path);
+ free(w);
+}
+
+void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l) {
+ assert(c);
+ assert(l);
+
+ LIST_REMOVE(device_latencies, c->io_device_latencies, l);
+ free(l->path);
+ free(l);
+}
+
+void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l) {
+ assert(c);
+ assert(l);
+
+ LIST_REMOVE(device_limits, c->io_device_limits, l);
+ free(l->path);
+ free(l);
+}
+
+void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
+ assert(c);
+ assert(w);
+
+ LIST_REMOVE(device_weights, c->blockio_device_weights, w);
+ free(w->path);
+ free(w);
+}
+
+void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
+ assert(c);
+ assert(b);
+
+ LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
+ free(b->path);
+ free(b);
+}
+
+void cgroup_context_done(CGroupContext *c) {
+ assert(c);
+
+ while (c->io_device_weights)
+ cgroup_context_free_io_device_weight(c, c->io_device_weights);
+
+ while (c->io_device_latencies)
+ cgroup_context_free_io_device_latency(c, c->io_device_latencies);
+
+ while (c->io_device_limits)
+ cgroup_context_free_io_device_limit(c, c->io_device_limits);
+
+ while (c->blockio_device_weights)
+ cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
+
+ while (c->blockio_device_bandwidths)
+ cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
+
+ while (c->device_allow)
+ cgroup_context_free_device_allow(c, c->device_allow);
+
+ c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
+ c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);
+}
+
+void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
+ CGroupIODeviceLimit *il;
+ CGroupIODeviceWeight *iw;
+ CGroupIODeviceLatency *l;
+ CGroupBlockIODeviceBandwidth *b;
+ CGroupBlockIODeviceWeight *w;
+ CGroupDeviceAllow *a;
+ IPAddressAccessItem *iaai;
+ char u[FORMAT_TIMESPAN_MAX];
+
+ assert(c);
+ assert(f);
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%sCPUAccounting=%s\n"
+ "%sIOAccounting=%s\n"
+ "%sBlockIOAccounting=%s\n"
+ "%sMemoryAccounting=%s\n"
+ "%sTasksAccounting=%s\n"
+ "%sIPAccounting=%s\n"
+ "%sCPUWeight=%" PRIu64 "\n"
+ "%sStartupCPUWeight=%" PRIu64 "\n"
+ "%sCPUShares=%" PRIu64 "\n"
+ "%sStartupCPUShares=%" PRIu64 "\n"
+ "%sCPUQuotaPerSecSec=%s\n"
+ "%sIOWeight=%" PRIu64 "\n"
+ "%sStartupIOWeight=%" PRIu64 "\n"
+ "%sBlockIOWeight=%" PRIu64 "\n"
+ "%sStartupBlockIOWeight=%" PRIu64 "\n"
+ "%sMemoryMin=%" PRIu64 "\n"
+ "%sMemoryLow=%" PRIu64 "\n"
+ "%sMemoryHigh=%" PRIu64 "\n"
+ "%sMemoryMax=%" PRIu64 "\n"
+ "%sMemorySwapMax=%" PRIu64 "\n"
+ "%sMemoryLimit=%" PRIu64 "\n"
+ "%sTasksMax=%" PRIu64 "\n"
+ "%sDevicePolicy=%s\n"
+ "%sDelegate=%s\n",
+ prefix, yes_no(c->cpu_accounting),
+ prefix, yes_no(c->io_accounting),
+ prefix, yes_no(c->blockio_accounting),
+ prefix, yes_no(c->memory_accounting),
+ prefix, yes_no(c->tasks_accounting),
+ prefix, yes_no(c->ip_accounting),
+ prefix, c->cpu_weight,
+ prefix, c->startup_cpu_weight,
+ prefix, c->cpu_shares,
+ prefix, c->startup_cpu_shares,
+ prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
+ prefix, c->io_weight,
+ prefix, c->startup_io_weight,
+ prefix, c->blockio_weight,
+ prefix, c->startup_blockio_weight,
+ prefix, c->memory_min,
+ prefix, c->memory_low,
+ prefix, c->memory_high,
+ prefix, c->memory_max,
+ prefix, c->memory_swap_max,
+ prefix, c->memory_limit,
+ prefix, c->tasks_max,
+ prefix, cgroup_device_policy_to_string(c->device_policy),
+ prefix, yes_no(c->delegate));
+
+ if (c->delegate) {
+ _cleanup_free_ char *t = NULL;
+
+ (void) cg_mask_to_string(c->delegate_controllers, &t);
+
+ fprintf(f, "%sDelegateControllers=%s\n",
+ prefix,
+ strempty(t));
+ }
+
+ LIST_FOREACH(device_allow, a, c->device_allow)
+ fprintf(f,
+ "%sDeviceAllow=%s %s%s%s\n",
+ prefix,
+ a->path,
+ a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
+
+ LIST_FOREACH(device_weights, iw, c->io_device_weights)
+ fprintf(f,
+ "%sIODeviceWeight=%s %" PRIu64 "\n",
+ prefix,
+ iw->path,
+ iw->weight);
+
+ LIST_FOREACH(device_latencies, l, c->io_device_latencies)
+ fprintf(f,
+ "%sIODeviceLatencyTargetSec=%s %s\n",
+ prefix,
+ l->path,
+ format_timespan(u, sizeof(u), l->target_usec, 1));
+
+ LIST_FOREACH(device_limits, il, c->io_device_limits) {
+ char buf[FORMAT_BYTES_MAX];
+ CGroupIOLimitType type;
+
+ for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
+ if (il->limits[type] != cgroup_io_limit_defaults[type])
+ fprintf(f,
+ "%s%s=%s %s\n",
+ prefix,
+ cgroup_io_limit_type_to_string(type),
+ il->path,
+ format_bytes(buf, sizeof(buf), il->limits[type]));
+ }
+
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights)
+ fprintf(f,
+ "%sBlockIODeviceWeight=%s %" PRIu64,
+ prefix,
+ w->path,
+ w->weight);
+
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+ char buf[FORMAT_BYTES_MAX];
+
+ if (b->rbps != CGROUP_LIMIT_MAX)
+ fprintf(f,
+ "%sBlockIOReadBandwidth=%s %s\n",
+ prefix,
+ b->path,
+ format_bytes(buf, sizeof(buf), b->rbps));
+ if (b->wbps != CGROUP_LIMIT_MAX)
+ fprintf(f,
+ "%sBlockIOWriteBandwidth=%s %s\n",
+ prefix,
+ b->path,
+ format_bytes(buf, sizeof(buf), b->wbps));
+ }
+
+ LIST_FOREACH(items, iaai, c->ip_address_allow) {
+ _cleanup_free_ char *k = NULL;
+
+ (void) in_addr_to_string(iaai->family, &iaai->address, &k);
+ fprintf(f, "%sIPAddressAllow=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
+ }
+
+ LIST_FOREACH(items, iaai, c->ip_address_deny) {
+ _cleanup_free_ char *k = NULL;
+
+ (void) in_addr_to_string(iaai->family, &iaai->address, &k);
+ fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
+ }
+}
+
+int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) {
+ _cleanup_free_ CGroupDeviceAllow *a = NULL;
+ _cleanup_free_ char *d = NULL;
+
+ assert(c);
+ assert(dev);
+ assert(isempty(mode) || in_charset(mode, "rwm"));
+
+ a = new(CGroupDeviceAllow, 1);
+ if (!a)
+ return -ENOMEM;
+
+ d = strdup(dev);
+ if (!d)
+ return -ENOMEM;
+
+ *a = (CGroupDeviceAllow) {
+ .path = TAKE_PTR(d),
+ .r = isempty(mode) || strchr(mode, 'r'),
+ .w = isempty(mode) || strchr(mode, 'w'),
+ .m = isempty(mode) || strchr(mode, 'm'),
+ };
+
+ LIST_PREPEND(device_allow, c->device_allow, a);
+ TAKE_PTR(a);
+
+ return 0;
+}
+
+static void cgroup_xattr_apply(Unit *u) {
+ char ids[SD_ID128_STRING_MAX];
+ int r;
+
+ assert(u);
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return;
+
+ if (sd_id128_is_null(u->invocation_id))
+ return;
+
+ r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ "trusted.invocation_id",
+ sd_id128_to_string(u->invocation_id, ids), 32,
+ 0);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
+}
+
+static int lookup_block_device(const char *p, dev_t *ret) {
+ dev_t rdev, dev = 0;
+ mode_t mode;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ r = device_path_parse_major_minor(p, &mode, &rdev);
+ if (r == -ENODEV) { /* not a parsable device node, need to go to disk */
+ struct stat st;
+ if (stat(p, &st) < 0)
+ return log_warning_errno(errno, "Couldn't stat device '%s': %m", p);
+ rdev = (dev_t)st.st_rdev;
+ dev = (dev_t)st.st_dev;
+ mode = st.st_mode;
+ } else if (r < 0)
+ return log_warning_errno(r, "Failed to parse major/minor from path '%s': %m", p);
+
+ if (S_ISCHR(mode)) {
+ log_warning("Device node '%s' is a character device, but block device needed.", p);
+ return -ENOTBLK;
+ } else if (S_ISBLK(mode))
+ *ret = rdev;
+ else if (major(dev) != 0)
+ *ret = dev; /* If this is not a device node then use the block device this file is stored on */
+ else {
+ /* If this is btrfs, getting the backing block device is a bit harder */
+ r = btrfs_get_block_device(p, ret);
+ if (r < 0 && r != -ENOTTY)
+ return log_warning_errno(r, "Failed to determine block device backing btrfs file system '%s': %m", p);
+ if (r == -ENOTTY) {
+ log_warning("'%s' is not a block device node, and file system block device cannot be determined or is not local.", p);
+ return -ENODEV;
+ }
+ }
+
+ /* If this is a LUKS device, try to get the originating block device */
+ (void) block_get_originating(*ret, ret);
+
+ /* If this is a partition, try to get the originating block device */
+ (void) block_get_whole_disk(*ret, ret);
+ return 0;
+}
+
+static int whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc) {
+ dev_t rdev;
+ mode_t mode;
+ int r;
+
+ assert(path);
+ assert(acc);
+
+ /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
+ * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
+ * means clients can use these path without the device node actually around */
+ r = device_path_parse_major_minor(node, &mode, &rdev);
+ if (r < 0) {
+ if (r != -ENODEV)
+ return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
+
+ struct stat st;
+ if (stat(node, &st) < 0)
+ return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
+
+ if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
+ log_warning("%s is not a device.", node);
+ return -ENODEV;
+ }
+ rdev = (dev_t) st.st_rdev;
+ mode = st.st_mode;
+ }
+
+ if (cg_all_unified() > 0) {
+ if (!prog)
+ return 0;
+
+ return cgroup_bpf_whitelist_device(prog, S_ISCHR(mode) ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
+ major(rdev), minor(rdev), acc);
+
+ } else {
+ char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
+
+ sprintf(buf,
+ "%c %u:%u %s",
+ S_ISCHR(mode) ? 'c' : 'b',
+ major(rdev), minor(rdev),
+ acc);
+
+ /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL here. */
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ return log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to set devices.allow on %s: %m", path);
+
+ return 0;
+ }
+}
+
+static int whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char buf[2+DECIMAL_STR_MAX(unsigned)+3+4];
+ bool good = false;
+ unsigned maj;
+ int r;
+
+ assert(path);
+ assert(acc);
+ assert(IN_SET(type, 'b', 'c'));
+
+ if (streq(name, "*")) {
+ /* If the name is a wildcard, then apply this list to all devices of this type */
+
+ if (cg_all_unified() > 0) {
+ if (!prog)
+ return 0;
+
+ (void) cgroup_bpf_whitelist_class(prog, type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK, acc);
+ } else {
+ xsprintf(buf, "%c *:* %s", type, acc);
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set devices.allow on %s: %m", path);
+ return 0;
+ }
+ }
+
+ if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj)) {
+ /* The name is numeric and suitable as major. In that case, let's take is major, and create the entry
+ * directly */
+
+ if (cg_all_unified() > 0) {
+ if (!prog)
+ return 0;
+
+ (void) cgroup_bpf_whitelist_major(prog,
+ type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
+ maj, acc);
+ } else {
+ xsprintf(buf, "%c %u:* %s", type, maj, acc);
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set devices.allow on %s: %m", path);
+ }
+
+ return 0;
+ }
+
+ f = fopen("/proc/devices", "re");
+ if (!f)
+ return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *w, *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/devices: %m");
+ if (r == 0)
+ break;
+
+ if (type == 'c' && streq(line, "Character devices:")) {
+ good = true;
+ continue;
+ }
+
+ if (type == 'b' && streq(line, "Block devices:")) {
+ good = true;
+ continue;
+ }
+
+ if (isempty(line)) {
+ good = false;
+ continue;
+ }
+
+ if (!good)
+ continue;
+
+ p = strstrip(line);
+
+ w = strpbrk(p, WHITESPACE);
+ if (!w)
+ continue;
+ *w = 0;
+
+ r = safe_atou(p, &maj);
+ if (r < 0)
+ continue;
+ if (maj <= 0)
+ continue;
+
+ w++;
+ w += strspn(w, WHITESPACE);
+
+ if (fnmatch(name, w, 0) != 0)
+ continue;
+
+ if (cg_all_unified() > 0) {
+ if (!prog)
+ continue;
+
+ (void) cgroup_bpf_whitelist_major(prog,
+ type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
+ maj, acc);
+ } else {
+ sprintf(buf,
+ "%c %u:* %s",
+ type,
+ maj,
+ acc);
+
+ /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL
+ * here. */
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to set devices.allow on %s: %m", path);
+ }
+ }
+
+ return 0;
+}
+
+static bool cgroup_context_has_cpu_weight(CGroupContext *c) {
+ return c->cpu_weight != CGROUP_WEIGHT_INVALID ||
+ c->startup_cpu_weight != CGROUP_WEIGHT_INVALID;
+}
+
+static bool cgroup_context_has_cpu_shares(CGroupContext *c) {
+ return c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID;
+}
+
+static uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_cpu_weight != CGROUP_WEIGHT_INVALID)
+ return c->startup_cpu_weight;
+ else if (c->cpu_weight != CGROUP_WEIGHT_INVALID)
+ return c->cpu_weight;
+ else
+ return CGROUP_WEIGHT_DEFAULT;
+}
+
+static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID)
+ return c->startup_cpu_shares;
+ else if (c->cpu_shares != CGROUP_CPU_SHARES_INVALID)
+ return c->cpu_shares;
+ else
+ return CGROUP_CPU_SHARES_DEFAULT;
+}
+
+static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 2];
+
+ xsprintf(buf, "%" PRIu64 "\n", weight);
+ (void) set_attribute_and_warn(u, "cpu", "cpu.weight", buf);
+}
+
+static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota) {
+ char buf[(DECIMAL_STR_MAX(usec_t) + 1) * 2 + 1];
+
+ if (quota != USEC_INFINITY)
+ xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
+ quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC);
+ else
+ xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+ (void) set_attribute_and_warn(u, "cpu", "cpu.max", buf);
+}
+
+static void cgroup_apply_legacy_cpu_shares(Unit *u, uint64_t shares) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 2];
+
+ xsprintf(buf, "%" PRIu64 "\n", shares);
+ (void) set_attribute_and_warn(u, "cpu", "cpu.shares", buf);
+}
+
+static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota) {
+ char buf[DECIMAL_STR_MAX(usec_t) + 2];
+
+ xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+ (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_period_us", buf);
+
+ if (quota != USEC_INFINITY) {
+ xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
+ (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", buf);
+ } else
+ (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", "-1\n");
+}
+
+static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) {
+ return CLAMP(shares * CGROUP_WEIGHT_DEFAULT / CGROUP_CPU_SHARES_DEFAULT,
+ CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
+static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) {
+ return CLAMP(weight * CGROUP_CPU_SHARES_DEFAULT / CGROUP_WEIGHT_DEFAULT,
+ CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX);
+}
+
+static bool cgroup_context_has_io_config(CGroupContext *c) {
+ return c->io_accounting ||
+ c->io_weight != CGROUP_WEIGHT_INVALID ||
+ c->startup_io_weight != CGROUP_WEIGHT_INVALID ||
+ c->io_device_weights ||
+ c->io_device_latencies ||
+ c->io_device_limits;
+}
+
+static bool cgroup_context_has_blockio_config(CGroupContext *c) {
+ return c->blockio_accounting ||
+ c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
+ c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
+ c->blockio_device_weights ||
+ c->blockio_device_bandwidths;
+}
+
+static uint64_t cgroup_context_io_weight(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_io_weight != CGROUP_WEIGHT_INVALID)
+ return c->startup_io_weight;
+ else if (c->io_weight != CGROUP_WEIGHT_INVALID)
+ return c->io_weight;
+ else
+ return CGROUP_WEIGHT_DEFAULT;
+}
+
+static uint64_t cgroup_context_blkio_weight(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID)
+ return c->startup_blockio_weight;
+ else if (c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID)
+ return c->blockio_weight;
+ else
+ return CGROUP_BLKIO_WEIGHT_DEFAULT;
+}
+
+static uint64_t cgroup_weight_blkio_to_io(uint64_t blkio_weight) {
+ return CLAMP(blkio_weight * CGROUP_WEIGHT_DEFAULT / CGROUP_BLKIO_WEIGHT_DEFAULT,
+ CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
+static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight) {
+ return CLAMP(io_weight * CGROUP_BLKIO_WEIGHT_DEFAULT / CGROUP_WEIGHT_DEFAULT,
+ CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX);
+}
+
+static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_t io_weight) {
+ char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
+ dev_t dev;
+ int r;
+
+ r = lookup_block_device(dev_path, &dev);
+ if (r < 0)
+ return;
+
+ xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), io_weight);
+ (void) set_attribute_and_warn(u, "io", "io.weight", buf);
+}
+
+static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint64_t blkio_weight) {
+ char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
+ dev_t dev;
+ int r;
+
+ r = lookup_block_device(dev_path, &dev);
+ if (r < 0)
+ return;
+
+ xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), blkio_weight);
+ (void) set_attribute_and_warn(u, "blkio", "blkio.weight_device", buf);
+}
+
+static void cgroup_apply_io_device_latency(Unit *u, const char *dev_path, usec_t target) {
+ char buf[DECIMAL_STR_MAX(dev_t)*2+2+7+DECIMAL_STR_MAX(uint64_t)+1];
+ dev_t dev;
+ int r;
+
+ r = lookup_block_device(dev_path, &dev);
+ if (r < 0)
+ return;
+
+ if (target != USEC_INFINITY)
+ xsprintf(buf, "%u:%u target=%" PRIu64 "\n", major(dev), minor(dev), target);
+ else
+ xsprintf(buf, "%u:%u target=max\n", major(dev), minor(dev));
+
+ (void) set_attribute_and_warn(u, "io", "io.latency", buf);
+}
+
+static void cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t *limits) {
+ char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)];
+ char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4];
+ CGroupIOLimitType type;
+ dev_t dev;
+ int r;
+
+ r = lookup_block_device(dev_path, &dev);
+ if (r < 0)
+ return;
+
+ for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
+ if (limits[type] != cgroup_io_limit_defaults[type])
+ xsprintf(limit_bufs[type], "%" PRIu64, limits[type]);
+ else
+ xsprintf(limit_bufs[type], "%s", limits[type] == CGROUP_LIMIT_MAX ? "max" : "0");
+
+ xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev),
+ limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX],
+ limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]);
+ (void) set_attribute_and_warn(u, "io", "io.max", buf);
+}
+
+static void cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, uint64_t rbps, uint64_t wbps) {
+ char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
+ dev_t dev;
+ int r;
+
+ r = lookup_block_device(dev_path, &dev);
+ if (r < 0)
+ return;
+
+ sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), rbps);
+ (void) set_attribute_and_warn(u, "blkio", "blkio.throttle.read_bps_device", buf);
+
+ sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), wbps);
+ (void) set_attribute_and_warn(u, "blkio", "blkio.throttle.write_bps_device", buf);
+}
+
+static bool cgroup_context_has_unified_memory_config(CGroupContext *c) {
+ return c->memory_min > 0 || c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX;
+}
+
+static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max\n";
+
+ if (v != CGROUP_LIMIT_MAX)
+ xsprintf(buf, "%" PRIu64 "\n", v);
+
+ (void) set_attribute_and_warn(u, "memory", file, buf);
+}
+
+static void cgroup_apply_firewall(Unit *u) {
+ assert(u);
+
+ /* Best-effort: let's apply IP firewalling and/or accounting if that's enabled */
+
+ if (bpf_firewall_compile(u) < 0)
+ return;
+
+ (void) bpf_firewall_install(u);
+}
+
+static void cgroup_context_apply(
+ Unit *u,
+ CGroupMask apply_mask,
+ ManagerState state) {
+
+ const char *path;
+ CGroupContext *c;
+ bool is_host_root, is_local_root;
+ int r;
+
+ assert(u);
+
+ /* Nothing to do? Exit early! */
+ if (apply_mask == 0)
+ return;
+
+ /* Some cgroup attributes are not supported on the host root cgroup, hence silently ignore them here. And other
+ * attributes should only be managed for cgroups further down the tree. */
+ is_local_root = unit_has_name(u, SPECIAL_ROOT_SLICE);
+ is_host_root = unit_has_host_root_cgroup(u);
+
+ assert_se(c = unit_get_cgroup_context(u));
+ assert_se(path = u->cgroup_path);
+
+ if (is_local_root) /* Make sure we don't try to display messages with an empty path. */
+ path = "/";
+
+ /* We generally ignore errors caused by read-only mounted cgroup trees (assuming we are running in a container
+ * then), and missing cgroups, i.e. EROFS and ENOENT. */
+
+ /* In fully unified mode these attributes don't exist on the host cgroup root. On legacy the weights exist, but
+ * setting the weight makes very little sense on the host root cgroup, as there are no other cgroups at this
+ * level. The quota exists there too, but any attempt to write to it is refused with EINVAL. Inside of
+ * containers we want to leave control of these to the container manager (and if cgroup v2 delegation is used
+ * we couldn't even write to them if we wanted to). */
+ if ((apply_mask & CGROUP_MASK_CPU) && !is_local_root) {
+
+ if (cg_all_unified() > 0) {
+ uint64_t weight;
+
+ if (cgroup_context_has_cpu_weight(c))
+ weight = cgroup_context_cpu_weight(c, state);
+ else if (cgroup_context_has_cpu_shares(c)) {
+ uint64_t shares;
+
+ shares = cgroup_context_cpu_shares(c, state);
+ weight = cgroup_cpu_shares_to_weight(shares);
+
+ log_cgroup_compat(u, "Applying [Startup]CPUShares=%" PRIu64 " as [Startup]CPUWeight=%" PRIu64 " on %s",
+ shares, weight, path);
+ } else
+ weight = CGROUP_WEIGHT_DEFAULT;
+
+ cgroup_apply_unified_cpu_weight(u, weight);
+ cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec);
+
+ } else {
+ uint64_t shares;
+
+ if (cgroup_context_has_cpu_weight(c)) {
+ uint64_t weight;
+
+ weight = cgroup_context_cpu_weight(c, state);
+ shares = cgroup_cpu_weight_to_shares(weight);
+
+ log_cgroup_compat(u, "Applying [Startup]CPUWeight=%" PRIu64 " as [Startup]CPUShares=%" PRIu64 " on %s",
+ weight, shares, path);
+ } else if (cgroup_context_has_cpu_shares(c))
+ shares = cgroup_context_cpu_shares(c, state);
+ else
+ shares = CGROUP_CPU_SHARES_DEFAULT;
+
+ cgroup_apply_legacy_cpu_shares(u, shares);
+ cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec);
+ }
+ }
+
+ /* The 'io' controller attributes are not exported on the host's root cgroup (being a pure cgroup v2
+ * controller), and in case of containers we want to leave control of these attributes to the container manager
+ * (and we couldn't access that stuff anyway, even if we tried if proper delegation is used). */
+ if ((apply_mask & CGROUP_MASK_IO) && !is_local_root) {
+ char buf[8+DECIMAL_STR_MAX(uint64_t)+1];
+ bool has_io, has_blockio;
+ uint64_t weight;
+
+ has_io = cgroup_context_has_io_config(c);
+ has_blockio = cgroup_context_has_blockio_config(c);
+
+ if (has_io)
+ weight = cgroup_context_io_weight(c, state);
+ else if (has_blockio) {
+ uint64_t blkio_weight;
+
+ blkio_weight = cgroup_context_blkio_weight(c, state);
+ weight = cgroup_weight_blkio_to_io(blkio_weight);
+
+ log_cgroup_compat(u, "Applying [Startup]BlockIOWeight=%" PRIu64 " as [Startup]IOWeight=%" PRIu64,
+ blkio_weight, weight);
+ } else
+ weight = CGROUP_WEIGHT_DEFAULT;
+
+ xsprintf(buf, "default %" PRIu64 "\n", weight);
+ (void) set_attribute_and_warn(u, "io", "io.weight", buf);
+
+ if (has_io) {
+ CGroupIODeviceLatency *latency;
+ CGroupIODeviceLimit *limit;
+ CGroupIODeviceWeight *w;
+
+ LIST_FOREACH(device_weights, w, c->io_device_weights)
+ cgroup_apply_io_device_weight(u, w->path, w->weight);
+
+ LIST_FOREACH(device_limits, limit, c->io_device_limits)
+ cgroup_apply_io_device_limit(u, limit->path, limit->limits);
+
+ LIST_FOREACH(device_latencies, latency, c->io_device_latencies)
+ cgroup_apply_io_device_latency(u, latency->path, latency->target_usec);
+
+ } else if (has_blockio) {
+ CGroupBlockIODeviceWeight *w;
+ CGroupBlockIODeviceBandwidth *b;
+
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
+ weight = cgroup_weight_blkio_to_io(w->weight);
+
+ log_cgroup_compat(u, "Applying BlockIODeviceWeight=%" PRIu64 " as IODeviceWeight=%" PRIu64 " for %s",
+ w->weight, weight, w->path);
+
+ cgroup_apply_io_device_weight(u, w->path, weight);
+ }
+
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+ uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
+ CGroupIOLimitType type;
+
+ for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
+ limits[type] = cgroup_io_limit_defaults[type];
+
+ limits[CGROUP_IO_RBPS_MAX] = b->rbps;
+ limits[CGROUP_IO_WBPS_MAX] = b->wbps;
+
+ log_cgroup_compat(u, "Applying BlockIO{Read|Write}Bandwidth=%" PRIu64 " %" PRIu64 " as IO{Read|Write}BandwidthMax= for %s",
+ b->rbps, b->wbps, b->path);
+
+ cgroup_apply_io_device_limit(u, b->path, limits);
+ }
+ }
+ }
+
+ if (apply_mask & CGROUP_MASK_BLKIO) {
+ bool has_io, has_blockio;
+
+ has_io = cgroup_context_has_io_config(c);
+ has_blockio = cgroup_context_has_blockio_config(c);
+
+ /* Applying a 'weight' never makes sense for the host root cgroup, and for containers this should be
+ * left to our container manager, too. */
+ if (!is_local_root) {
+ char buf[DECIMAL_STR_MAX(uint64_t)+1];
+ uint64_t weight;
+
+ if (has_io) {
+ uint64_t io_weight;
+
+ io_weight = cgroup_context_io_weight(c, state);
+ weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state));
+
+ log_cgroup_compat(u, "Applying [Startup]IOWeight=%" PRIu64 " as [Startup]BlockIOWeight=%" PRIu64,
+ io_weight, weight);
+ } else if (has_blockio)
+ weight = cgroup_context_blkio_weight(c, state);
+ else
+ weight = CGROUP_BLKIO_WEIGHT_DEFAULT;
+
+ xsprintf(buf, "%" PRIu64 "\n", weight);
+ (void) set_attribute_and_warn(u, "blkio", "blkio.weight", buf);
+
+ if (has_io) {
+ CGroupIODeviceWeight *w;
+
+ LIST_FOREACH(device_weights, w, c->io_device_weights) {
+ weight = cgroup_weight_io_to_blkio(w->weight);
+
+ log_cgroup_compat(u, "Applying IODeviceWeight=%" PRIu64 " as BlockIODeviceWeight=%" PRIu64 " for %s",
+ w->weight, weight, w->path);
+
+ cgroup_apply_blkio_device_weight(u, w->path, weight);
+ }
+ } else if (has_blockio) {
+ CGroupBlockIODeviceWeight *w;
+
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights)
+ cgroup_apply_blkio_device_weight(u, w->path, w->weight);
+ }
+ }
+
+ /* The bandwith limits are something that make sense to be applied to the host's root but not container
+ * roots, as there we want the container manager to handle it */
+ if (is_host_root || !is_local_root) {
+ if (has_io) {
+ CGroupIODeviceLimit *l;
+
+ LIST_FOREACH(device_limits, l, c->io_device_limits) {
+ log_cgroup_compat(u, "Applying IO{Read|Write}Bandwidth=%" PRIu64 " %" PRIu64 " as BlockIO{Read|Write}BandwidthMax= for %s",
+ l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX], l->path);
+
+ cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]);
+ }
+ } else if (has_blockio) {
+ CGroupBlockIODeviceBandwidth *b;
+
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths)
+ cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps);
+ }
+ }
+ }
+
+ /* In unified mode 'memory' attributes do not exist on the root cgroup. In legacy mode 'memory.limit_in_bytes'
+ * exists on the root cgroup, but any writes to it are refused with EINVAL. And if we run in a container we
+ * want to leave control to the container manager (and if proper cgroup v2 delegation is used we couldn't even
+ * write to this if we wanted to.) */
+ if ((apply_mask & CGROUP_MASK_MEMORY) && !is_local_root) {
+
+ if (cg_all_unified() > 0) {
+ uint64_t max, swap_max = CGROUP_LIMIT_MAX;
+
+ if (cgroup_context_has_unified_memory_config(c)) {
+ max = c->memory_max;
+ swap_max = c->memory_swap_max;
+ } else {
+ max = c->memory_limit;
+
+ if (max != CGROUP_LIMIT_MAX)
+ log_cgroup_compat(u, "Applying MemoryLimit=%" PRIu64 " as MemoryMax=", max);
+ }
+
+ cgroup_apply_unified_memory_limit(u, "memory.min", c->memory_min);
+ cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low);
+ cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
+ cgroup_apply_unified_memory_limit(u, "memory.max", max);
+ cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
+
+ } else {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+ uint64_t val;
+
+ if (cgroup_context_has_unified_memory_config(c)) {
+ val = c->memory_max;
+ log_cgroup_compat(u, "Applying MemoryMax=%" PRIi64 " as MemoryLimit=", val);
+ } else
+ val = c->memory_limit;
+
+ if (val == CGROUP_LIMIT_MAX)
+ strncpy(buf, "-1\n", sizeof(buf));
+ else
+ xsprintf(buf, "%" PRIu64 "\n", val);
+
+ (void) set_attribute_and_warn(u, "memory", "memory.limit_in_bytes", buf);
+ }
+ }
+
+ /* On cgroup v2 we can apply BPF everywhere. On cgroup v1 we apply it everywhere except for the root of
+ * containers, where we leave this to the manager */
+ if ((apply_mask & (CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES)) &&
+ (is_host_root || cg_all_unified() > 0 || !is_local_root)) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+ CGroupDeviceAllow *a;
+
+ if (cg_all_unified() > 0) {
+ r = cgroup_init_device_bpf(&prog, c->device_policy, c->device_allow);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to initialize device control bpf program: %m");
+ } else {
+ /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL
+ * here. */
+
+ if (c->device_allow || c->device_policy != CGROUP_AUTO)
+ r = cg_set_attribute("devices", path, "devices.deny", "a");
+ else
+ r = cg_set_attribute("devices", path, "devices.allow", "a");
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to reset devices.allow/devices.deny: %m");
+ }
+
+ if (c->device_policy == CGROUP_CLOSED ||
+ (c->device_policy == CGROUP_AUTO && c->device_allow)) {
+ static const char auto_devices[] =
+ "/dev/null\0" "rwm\0"
+ "/dev/zero\0" "rwm\0"
+ "/dev/full\0" "rwm\0"
+ "/dev/random\0" "rwm\0"
+ "/dev/urandom\0" "rwm\0"
+ "/dev/tty\0" "rwm\0"
+ "/dev/ptmx\0" "rwm\0"
+ /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
+ "/run/systemd/inaccessible/chr\0" "rwm\0"
+ "/run/systemd/inaccessible/blk\0" "rwm\0";
+
+ const char *x, *y;
+
+ NULSTR_FOREACH_PAIR(x, y, auto_devices)
+ (void) whitelist_device(prog, path, x, y);
+
+ /* PTS (/dev/pts) devices may not be duplicated, but accessed */
+ (void) whitelist_major(prog, path, "pts", 'c', "rw");
+ }
+
+ LIST_FOREACH(device_allow, a, c->device_allow) {
+ char acc[4], *val;
+ unsigned k = 0;
+
+ if (a->r)
+ acc[k++] = 'r';
+ if (a->w)
+ acc[k++] = 'w';
+ if (a->m)
+ acc[k++] = 'm';
+
+ if (k == 0)
+ continue;
+
+ acc[k++] = 0;
+
+ if (path_startswith(a->path, "/dev/"))
+ (void) whitelist_device(prog, path, a->path, acc);
+ else if ((val = startswith(a->path, "block-")))
+ (void) whitelist_major(prog, path, val, 'b', acc);
+ else if ((val = startswith(a->path, "char-")))
+ (void) whitelist_major(prog, path, val, 'c', acc);
+ else
+ log_unit_debug(u, "Ignoring device '%s' while writing cgroup attribute.", a->path);
+ }
+
+ r = cgroup_apply_device_bpf(u, prog, c->device_policy, c->device_allow);
+ if (r < 0) {
+ static bool warned = false;
+
+ log_full_errno(warned ? LOG_DEBUG : LOG_WARNING, r,
+ "Unit %s configures device ACL, but the local system doesn't seem to support the BPF-based device controller.\n"
+ "Proceeding WITHOUT applying ACL (all devices will be accessible)!\n"
+ "(This warning is only shown for the first loaded unit using device ACL.)", u->id);
+
+ warned = true;
+ }
+ }
+
+ if (apply_mask & CGROUP_MASK_PIDS) {
+
+ if (is_host_root) {
+ /* So, the "pids" controller does not expose anything on the root cgroup, in order not to
+ * replicate knobs exposed elsewhere needlessly. We abstract this away here however, and when
+ * the knobs of the root cgroup are modified propagate this to the relevant sysctls. There's a
+ * non-obvious asymmetry however: unlike the cgroup properties we don't really want to take
+ * exclusive ownership of the sysctls, but we still want to honour things if the user sets
+ * limits. Hence we employ sort of a one-way strategy: when the user sets a bounded limit
+ * through us it counts. When the user afterwards unsets it again (i.e. sets it to unbounded)
+ * it also counts. But if the user never set a limit through us (i.e. we are the default of
+ * "unbounded") we leave things unmodified. For this we manage a global boolean that we turn on
+ * the first time we set a limit. Note that this boolean is flushed out on manager reload,
+ * which is desirable so that there's an offical way to release control of the sysctl from
+ * systemd: set the limit to unbounded and reload. */
+
+ if (c->tasks_max != CGROUP_LIMIT_MAX) {
+ u->manager->sysctl_pid_max_changed = true;
+ r = procfs_tasks_set_limit(c->tasks_max);
+ } else if (u->manager->sysctl_pid_max_changed)
+ r = procfs_tasks_set_limit(TASKS_MAX);
+ else
+ r = 0;
+ if (r < 0)
+ log_unit_full(u, LOG_LEVEL_CGROUP_WRITE(r), r,
+ "Failed to write to tasks limit sysctls: %m");
+ }
+
+ /* The attribute itself is not available on the host root cgroup, and in the container case we want to
+ * leave it for the container manager. */
+ if (!is_local_root) {
+ if (c->tasks_max != CGROUP_LIMIT_MAX) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 2];
+
+ sprintf(buf, "%" PRIu64 "\n", c->tasks_max);
+ (void) set_attribute_and_warn(u, "pids", "pids.max", buf);
+ } else
+ (void) set_attribute_and_warn(u, "pids", "pids.max", "max\n");
+ }
+ }
+
+ if (apply_mask & CGROUP_MASK_BPF_FIREWALL)
+ cgroup_apply_firewall(u);
+}
+
+static bool unit_get_needs_bpf_firewall(Unit *u) {
+ CGroupContext *c;
+ Unit *p;
+ assert(u);
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return false;
+
+ if (c->ip_accounting ||
+ c->ip_address_allow ||
+ c->ip_address_deny)
+ return true;
+
+ /* If any parent slice has an IP access list defined, it applies too */
+ for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) {
+ c = unit_get_cgroup_context(p);
+ if (!c)
+ return false;
+
+ if (c->ip_address_allow ||
+ c->ip_address_deny)
+ return true;
+ }
+
+ return false;
+}
+
+static CGroupMask cgroup_context_get_mask(CGroupContext *c) {
+ CGroupMask mask = 0;
+
+ /* Figure out which controllers we need, based on the cgroup context object */
+
+ if (c->cpu_accounting)
+ mask |= get_cpu_accounting_mask();
+
+ if (cgroup_context_has_cpu_weight(c) ||
+ cgroup_context_has_cpu_shares(c) ||
+ c->cpu_quota_per_sec_usec != USEC_INFINITY)
+ mask |= CGROUP_MASK_CPU;
+
+ if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c))
+ mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
+
+ if (c->memory_accounting ||
+ c->memory_limit != CGROUP_LIMIT_MAX ||
+ cgroup_context_has_unified_memory_config(c))
+ mask |= CGROUP_MASK_MEMORY;
+
+ if (c->device_allow ||
+ c->device_policy != CGROUP_AUTO)
+ mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES;
+
+ if (c->tasks_accounting ||
+ c->tasks_max != CGROUP_LIMIT_MAX)
+ mask |= CGROUP_MASK_PIDS;
+
+ return CGROUP_MASK_EXTEND_JOINED(mask);
+}
+
+static CGroupMask unit_get_bpf_mask(Unit *u) {
+ CGroupMask mask = 0;
+
+ /* Figure out which controllers we need, based on the cgroup context, possibly taking into account children
+ * too. */
+
+ if (unit_get_needs_bpf_firewall(u))
+ mask |= CGROUP_MASK_BPF_FIREWALL;
+
+ return mask;
+}
+
+CGroupMask unit_get_own_mask(Unit *u) {
+ CGroupContext *c;
+
+ /* Returns the mask of controllers the unit needs for itself. If a unit is not properly loaded, return an empty
+ * mask, as we shouldn't reflect it in the cgroup hierarchy then. */
+
+ if (u->load_state != UNIT_LOADED)
+ return 0;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ return (cgroup_context_get_mask(c) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u)) & ~unit_get_ancestor_disable_mask(u);
+}
+
+CGroupMask unit_get_delegate_mask(Unit *u) {
+ CGroupContext *c;
+
+ /* If delegation is turned on, then turn on selected controllers, unless we are on the legacy hierarchy and the
+ * process we fork into is known to drop privileges, and hence shouldn't get access to the controllers.
+ *
+ * Note that on the unified hierarchy it is safe to delegate controllers to unprivileged services. */
+
+ if (!unit_cgroup_delegate(u))
+ return 0;
+
+ if (cg_all_unified() <= 0) {
+ ExecContext *e;
+
+ e = unit_get_exec_context(u);
+ if (e && !exec_context_maintains_privileges(e))
+ return 0;
+ }
+
+ assert_se(c = unit_get_cgroup_context(u));
+ return CGROUP_MASK_EXTEND_JOINED(c->delegate_controllers);
+}
+
+CGroupMask unit_get_members_mask(Unit *u) {
+ assert(u);
+
+ /* Returns the mask of controllers all of the unit's children require, merged */
+
+ if (u->cgroup_members_mask_valid)
+ return u->cgroup_members_mask; /* Use cached value if possible */
+
+ u->cgroup_members_mask = 0;
+
+ if (u->type == UNIT_SLICE) {
+ void *v;
+ Unit *member;
+ Iterator i;
+
+ HASHMAP_FOREACH_KEY(v, member, u->dependencies[UNIT_BEFORE], i) {
+ if (UNIT_DEREF(member->slice) == u)
+ u->cgroup_members_mask |= unit_get_subtree_mask(member); /* note that this calls ourselves again, for the children */
+ }
+ }
+
+ u->cgroup_members_mask_valid = true;
+ return u->cgroup_members_mask;
+}
+
+CGroupMask unit_get_siblings_mask(Unit *u) {
+ assert(u);
+
+ /* Returns the mask of controllers all of the unit's siblings
+ * require, i.e. the members mask of the unit's parent slice
+ * if there is one. */
+
+ if (UNIT_ISSET(u->slice))
+ return unit_get_members_mask(UNIT_DEREF(u->slice));
+
+ return unit_get_subtree_mask(u); /* we are the top-level slice */
+}
+
+CGroupMask unit_get_disable_mask(Unit *u) {
+ CGroupContext *c;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ return c->disable_controllers;
+}
+
+CGroupMask unit_get_ancestor_disable_mask(Unit *u) {
+ CGroupMask mask;
+
+ assert(u);
+ mask = unit_get_disable_mask(u);
+
+ /* Returns the mask of controllers which are marked as forcibly
+ * disabled in any ancestor unit or the unit in question. */
+
+ if (UNIT_ISSET(u->slice))
+ mask |= unit_get_ancestor_disable_mask(UNIT_DEREF(u->slice));
+
+ return mask;
+}
+
+CGroupMask unit_get_subtree_mask(Unit *u) {
+
+ /* Returns the mask of this subtree, meaning of the group
+ * itself and its children. */
+
+ return unit_get_own_mask(u) | unit_get_members_mask(u);
+}
+
+CGroupMask unit_get_target_mask(Unit *u) {
+ CGroupMask mask;
+
+ /* This returns the cgroup mask of all controllers to enable
+ * for a specific cgroup, i.e. everything it needs itself,
+ * plus all that its children need, plus all that its siblings
+ * need. This is primarily useful on the legacy cgroup
+ * hierarchy, where we need to duplicate each cgroup in each
+ * hierarchy that shall be enabled for it. */
+
+ mask = unit_get_own_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
+ mask &= u->manager->cgroup_supported;
+ mask &= ~unit_get_ancestor_disable_mask(u);
+
+ return mask;
+}
+
+CGroupMask unit_get_enable_mask(Unit *u) {
+ CGroupMask mask;
+
+ /* This returns the cgroup mask of all controllers to enable
+ * for the children of a specific cgroup. This is primarily
+ * useful for the unified cgroup hierarchy, where each cgroup
+ * controls which controllers are enabled for its children. */
+
+ mask = unit_get_members_mask(u);
+ mask &= u->manager->cgroup_supported;
+ mask &= ~unit_get_ancestor_disable_mask(u);
+
+ return mask;
+}
+
+void unit_invalidate_cgroup_members_masks(Unit *u) {
+ assert(u);
+
+ /* Recurse invalidate the member masks cache all the way up the tree */
+ u->cgroup_members_mask_valid = false;
+
+ if (UNIT_ISSET(u->slice))
+ unit_invalidate_cgroup_members_masks(UNIT_DEREF(u->slice));
+}
+
+const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) {
+
+ /* Returns the realized cgroup path of the specified unit where all specified controllers are available. */
+
+ while (u) {
+
+ if (u->cgroup_path &&
+ u->cgroup_realized &&
+ FLAGS_SET(u->cgroup_realized_mask, mask))
+ return u->cgroup_path;
+
+ u = UNIT_DEREF(u->slice);
+ }
+
+ return NULL;
+}
+
+static const char *migrate_callback(CGroupMask mask, void *userdata) {
+ return unit_get_realized_cgroup_path(userdata, mask);
+}
+
+char *unit_default_cgroup_path(const Unit *u) {
+ _cleanup_free_ char *escaped = NULL, *slice = NULL;
+ int r;
+
+ assert(u);
+
+ if (unit_has_name(u, SPECIAL_ROOT_SLICE))
+ return strdup(u->manager->cgroup_root);
+
+ if (UNIT_ISSET(u->slice) && !unit_has_name(UNIT_DEREF(u->slice), SPECIAL_ROOT_SLICE)) {
+ r = cg_slice_to_path(UNIT_DEREF(u->slice)->id, &slice);
+ if (r < 0)
+ return NULL;
+ }
+
+ escaped = cg_escape(u->id);
+ if (!escaped)
+ return NULL;
+
+ if (slice)
+ return strjoin(u->manager->cgroup_root, "/", slice, "/",
+ escaped);
+ else
+ return strjoin(u->manager->cgroup_root, "/", escaped);
+}
+
+int unit_set_cgroup_path(Unit *u, const char *path) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(u);
+
+ if (path) {
+ p = strdup(path);
+ if (!p)
+ return -ENOMEM;
+ } else
+ p = NULL;
+
+ if (streq_ptr(u->cgroup_path, p))
+ return 0;
+
+ if (p) {
+ r = hashmap_put(u->manager->cgroup_unit, p, u);
+ if (r < 0)
+ return r;
+ }
+
+ unit_release_cgroup(u);
+
+ u->cgroup_path = TAKE_PTR(p);
+
+ return 1;
+}
+
+int unit_watch_cgroup(Unit *u) {
+ _cleanup_free_ char *events = NULL;
+ int r;
+
+ assert(u);
+
+ if (!u->cgroup_path)
+ return 0;
+
+ if (u->cgroup_inotify_wd >= 0)
+ return 0;
+
+ /* Only applies to the unified hierarchy */
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether the name=systemd hierarchy is unified: %m");
+ if (r == 0)
+ return 0;
+
+ /* Don't watch the root slice, it's pointless. */
+ if (unit_has_name(u, SPECIAL_ROOT_SLICE))
+ return 0;
+
+ r = hashmap_ensure_allocated(&u->manager->cgroup_inotify_wd_unit, &trivial_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.events", &events);
+ if (r < 0)
+ return log_oom();
+
+ u->cgroup_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
+ if (u->cgroup_inotify_wd < 0) {
+
+ if (errno == ENOENT) /* If the directory is already
+ * gone we don't need to track
+ * it, so this is not an error */
+ return 0;
+
+ return log_unit_error_errno(u, errno, "Failed to add inotify watch descriptor for control group %s: %m", u->cgroup_path);
+ }
+
+ r = hashmap_put(u->manager->cgroup_inotify_wd_unit, INT_TO_PTR(u->cgroup_inotify_wd), u);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to add inotify watch descriptor to hash map: %m");
+
+ return 0;
+}
+
+int unit_pick_cgroup_path(Unit *u) {
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(u);
+
+ if (u->cgroup_path)
+ return 0;
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return -EINVAL;
+
+ path = unit_default_cgroup_path(u);
+ if (!path)
+ return log_oom();
+
+ r = unit_set_cgroup_path(u, path);
+ if (r == -EEXIST)
+ return log_unit_error_errno(u, r, "Control group %s exists already.", path);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to set unit's control group path to %s: %m", path);
+
+ return 0;
+}
+
+static int unit_create_cgroup(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask,
+ ManagerState state) {
+
+ bool created;
+ int r;
+
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return 0;
+
+ /* Figure out our cgroup path */
+ r = unit_pick_cgroup_path(u);
+ if (r < 0)
+ return r;
+
+ /* First, create our own group */
+ r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, u->cgroup_path);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", u->cgroup_path);
+ created = r;
+
+ /* Start watching it */
+ (void) unit_watch_cgroup(u);
+
+ /* Preserve enabled controllers in delegated units, adjust others. */
+ if (created || !u->cgroup_realized || !unit_cgroup_delegate(u)) {
+ CGroupMask result_mask = 0;
+
+ /* Enable all controllers we need */
+ r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path, &result_mask);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to enable/disable controllers on cgroup %s, ignoring: %m", u->cgroup_path);
+
+ /* If we just turned off a controller, this might release the controller for our parent too, let's
+ * enqueue the parent for re-realization in that case again. */
+ if (UNIT_ISSET(u->slice)) {
+ CGroupMask turned_off;
+
+ turned_off = (u->cgroup_realized ? u->cgroup_enabled_mask & ~result_mask : 0);
+ if (turned_off != 0) {
+ Unit *parent;
+
+ /* Force the parent to propagate the enable mask to the kernel again, by invalidating
+ * the controller we just turned off. */
+
+ for (parent = UNIT_DEREF(u->slice); parent; parent = UNIT_DEREF(parent->slice))
+ unit_invalidate_cgroup(parent, turned_off);
+ }
+ }
+
+ /* Remember what's actually enabled now */
+ u->cgroup_enabled_mask = result_mask;
+ }
+
+ /* Keep track that this is now realized */
+ u->cgroup_realized = true;
+ u->cgroup_realized_mask = target_mask;
+
+ if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) {
+
+ /* Then, possibly move things over, but not if
+ * subgroups may contain processes, which is the case
+ * for slice and delegation units. */
+ r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to migrate cgroup from to %s, ignoring: %m", u->cgroup_path);
+ }
+
+ /* Set attributes */
+ cgroup_context_apply(u, target_mask, state);
+ cgroup_xattr_apply(u);
+
+ return 0;
+}
+
+static int unit_attach_pid_to_cgroup_via_bus(Unit *u, pid_t pid, const char *suffix_path) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char *pp;
+ int r;
+
+ assert(u);
+
+ if (MANAGER_IS_SYSTEM(u->manager))
+ return -EINVAL;
+
+ if (!u->manager->system_bus)
+ return -EIO;
+
+ if (!u->cgroup_path)
+ return -EINVAL;
+
+ /* Determine this unit's cgroup path relative to our cgroup root */
+ pp = path_startswith(u->cgroup_path, u->manager->cgroup_root);
+ if (!pp)
+ return -EINVAL;
+
+ pp = strjoina("/", pp, suffix_path);
+ path_simplify(pp, false);
+
+ r = sd_bus_call_method(u->manager->system_bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "AttachProcessesToUnit",
+ &error, NULL,
+ "ssau",
+ NULL /* empty unit name means client's unit, i.e. us */, pp, 1, (uint32_t) pid);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to attach unit process " PID_FMT " via the bus: %s", pid, bus_error_message(&error, r));
+
+ return 0;
+}
+
+int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
+ CGroupMask delegated_mask;
+ const char *p;
+ Iterator i;
+ void *pidp;
+ int r, q;
+
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return -EINVAL;
+
+ if (set_isempty(pids))
+ return 0;
+
+ r = unit_realize_cgroup(u);
+ if (r < 0)
+ return r;
+
+ if (isempty(suffix_path))
+ p = u->cgroup_path;
+ else
+ p = strjoina(u->cgroup_path, "/", suffix_path);
+
+ delegated_mask = unit_get_delegate_mask(u);
+
+ r = 0;
+ SET_FOREACH(pidp, pids, i) {
+ pid_t pid = PTR_TO_PID(pidp);
+ CGroupController c;
+
+ /* First, attach the PID to the main cgroup hierarchy */
+ q = cg_attach(SYSTEMD_CGROUP_CONTROLLER, p, pid);
+ if (q < 0) {
+ log_unit_debug_errno(u, q, "Couldn't move process " PID_FMT " to requested cgroup '%s': %m", pid, p);
+
+ if (MANAGER_IS_USER(u->manager) && IN_SET(q, -EPERM, -EACCES)) {
+ int z;
+
+ /* If we are in a user instance, and we can't move the process ourselves due to
+ * permission problems, let's ask the system instance about it instead. Since it's more
+ * privileged it might be able to move the process across the leaves of a subtree who's
+ * top node is not owned by us. */
+
+ z = unit_attach_pid_to_cgroup_via_bus(u, pid, suffix_path);
+ if (z < 0)
+ log_unit_debug_errno(u, z, "Couldn't move process " PID_FMT " to requested cgroup '%s' via the system bus either: %m", pid, p);
+ else
+ continue; /* When the bus thing worked via the bus we are fully done for this PID. */
+ }
+
+ if (r >= 0)
+ r = q; /* Remember first error */
+
+ continue;
+ }
+
+ q = cg_all_unified();
+ if (q < 0)
+ return q;
+ if (q > 0)
+ continue;
+
+ /* In the legacy hierarchy, attach the process to the request cgroup if possible, and if not to the
+ * innermost realized one */
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *realized;
+
+ if (!(u->manager->cgroup_supported & bit))
+ continue;
+
+ /* If this controller is delegated and realized, honour the caller's request for the cgroup suffix. */
+ if (delegated_mask & u->cgroup_realized_mask & bit) {
+ q = cg_attach(cgroup_controller_to_string(c), p, pid);
+ if (q >= 0)
+ continue; /* Success! */
+
+ log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to requested cgroup %s in controller %s, falling back to unit's cgroup: %m",
+ pid, p, cgroup_controller_to_string(c));
+ }
+
+ /* So this controller is either not delegate or realized, or something else weird happened. In
+ * that case let's attach the PID at least to the closest cgroup up the tree that is
+ * realized. */
+ realized = unit_get_realized_cgroup_path(u, bit);
+ if (!realized)
+ continue; /* Not even realized in the root slice? Then let's not bother */
+
+ q = cg_attach(cgroup_controller_to_string(c), realized, pid);
+ if (q < 0)
+ log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to realized cgroup %s in controller %s, ignoring: %m",
+ pid, realized, cgroup_controller_to_string(c));
+ }
+ }
+
+ return r;
+}
+
+static bool unit_has_mask_realized(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask) {
+
+ assert(u);
+
+ /* Returns true if this unit is fully realized. We check four things:
+ *
+ * 1. Whether the cgroup was created at all
+ * 2. Whether the cgroup was created in all the hierarchies we need it to be created in (in case of cgroup v1)
+ * 3. Whether the cgroup has all the right controllers enabled (in case of cgroup v2)
+ * 4. Whether the invalidation mask is currently zero
+ *
+ * If you wonder why we mask the target realization and enable mask with CGROUP_MASK_V1/CGROUP_MASK_V2: note
+ * that there are three sets of bitmasks: CGROUP_MASK_V1 (for real cgroup v1 controllers), CGROUP_MASK_V2 (for
+ * real cgroup v2 controllers) and CGROUP_MASK_BPF (for BPF-based pseudo-controllers). Now, cgroup_realized_mask
+ * is only matters for cgroup v1 controllers, and cgroup_enabled_mask only used for cgroup v2, and if they
+ * differ in the others, we don't really care. (After all, the cgroup_enabled_mask tracks with controllers are
+ * enabled through cgroup.subtree_control, and since the BPF pseudo-controllers don't show up there, they
+ * simply don't matter. */
+
+ return u->cgroup_realized &&
+ ((u->cgroup_realized_mask ^ target_mask) & CGROUP_MASK_V1) == 0 &&
+ ((u->cgroup_enabled_mask ^ enable_mask) & CGROUP_MASK_V2) == 0 &&
+ u->cgroup_invalidated_mask == 0;
+}
+
+static bool unit_has_mask_disables_realized(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask) {
+
+ assert(u);
+
+ /* Returns true if all controllers which should be disabled are indeed disabled.
+ *
+ * Unlike unit_has_mask_realized, we don't care what was enabled, only that anything we want to remove is
+ * already removed. */
+
+ return !u->cgroup_realized ||
+ (FLAGS_SET(u->cgroup_realized_mask, target_mask & CGROUP_MASK_V1) &&
+ FLAGS_SET(u->cgroup_enabled_mask, enable_mask & CGROUP_MASK_V2));
+}
+
+static bool unit_has_mask_enables_realized(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask) {
+
+ assert(u);
+
+ /* Returns true if all controllers which should be enabled are indeed enabled.
+ *
+ * Unlike unit_has_mask_realized, we don't care about the controllers that are not present, only that anything
+ * we want to add is already added. */
+
+ return u->cgroup_realized &&
+ ((u->cgroup_realized_mask | target_mask) & CGROUP_MASK_V1) == (u->cgroup_realized_mask & CGROUP_MASK_V1) &&
+ ((u->cgroup_enabled_mask | enable_mask) & CGROUP_MASK_V2) == (u->cgroup_enabled_mask & CGROUP_MASK_V2);
+}
+
+void unit_add_to_cgroup_realize_queue(Unit *u) {
+ assert(u);
+
+ if (u->in_cgroup_realize_queue)
+ return;
+
+ LIST_PREPEND(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
+ u->in_cgroup_realize_queue = true;
+}
+
+static void unit_remove_from_cgroup_realize_queue(Unit *u) {
+ assert(u);
+
+ if (!u->in_cgroup_realize_queue)
+ return;
+
+ LIST_REMOVE(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
+ u->in_cgroup_realize_queue = false;
+}
+
+/* Controllers can only be enabled breadth-first, from the root of the
+ * hierarchy downwards to the unit in question. */
+static int unit_realize_cgroup_now_enable(Unit *u, ManagerState state) {
+ CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask;
+ int r;
+
+ assert(u);
+
+ /* First go deal with this unit's parent, or we won't be able to enable
+ * any new controllers at this layer. */
+ if (UNIT_ISSET(u->slice)) {
+ r = unit_realize_cgroup_now_enable(UNIT_DEREF(u->slice), state);
+ if (r < 0)
+ return r;
+ }
+
+ target_mask = unit_get_target_mask(u);
+ enable_mask = unit_get_enable_mask(u);
+
+ /* We can only enable in this direction, don't try to disable anything.
+ */
+ if (unit_has_mask_enables_realized(u, target_mask, enable_mask))
+ return 0;
+
+ new_target_mask = u->cgroup_realized_mask | target_mask;
+ new_enable_mask = u->cgroup_enabled_mask | enable_mask;
+
+ return unit_create_cgroup(u, new_target_mask, new_enable_mask, state);
+}
+
+/* Controllers can only be disabled depth-first, from the leaves of the
+ * hierarchy upwards to the unit in question. */
+static int unit_realize_cgroup_now_disable(Unit *u, ManagerState state) {
+ Iterator i;
+ Unit *m;
+ void *v;
+
+ assert(u);
+
+ if (u->type != UNIT_SLICE)
+ return 0;
+
+ HASHMAP_FOREACH_KEY(v, m, u->dependencies[UNIT_BEFORE], i) {
+ CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask;
+ int r;
+
+ if (UNIT_DEREF(m->slice) != u)
+ continue;
+
+ /* The cgroup for this unit might not actually be fully
+ * realised yet, in which case it isn't holding any controllers
+ * open anyway. */
+ if (!m->cgroup_path)
+ continue;
+
+ /* We must disable those below us first in order to release the
+ * controller. */
+ if (m->type == UNIT_SLICE)
+ (void) unit_realize_cgroup_now_disable(m, state);
+
+ target_mask = unit_get_target_mask(m);
+ enable_mask = unit_get_enable_mask(m);
+
+ /* We can only disable in this direction, don't try to enable
+ * anything. */
+ if (unit_has_mask_disables_realized(m, target_mask, enable_mask))
+ continue;
+
+ new_target_mask = m->cgroup_realized_mask & target_mask;
+ new_enable_mask = m->cgroup_enabled_mask & enable_mask;
+
+ r = unit_create_cgroup(m, new_target_mask, new_enable_mask, state);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+/* Check if necessary controllers and attributes for a unit are in place.
+ *
+ * - If so, do nothing.
+ * - If not, create paths, move processes over, and set attributes.
+ *
+ * Controllers can only be *enabled* in a breadth-first way, and *disabled* in
+ * a depth-first way. As such the process looks like this:
+ *
+ * Suppose we have a cgroup hierarchy which looks like this:
+ *
+ * root
+ * / \
+ * / \
+ * / \
+ * a b
+ * / \ / \
+ * / \ / \
+ * c d e f
+ * / \ / \ / \ / \
+ * h i j k l m n o
+ *
+ * 1. We want to realise cgroup "d" now.
+ * 2. cgroup "a" has DisableControllers=cpu in the associated unit.
+ * 3. cgroup "k" just started requesting the memory controller.
+ *
+ * To make this work we must do the following in order:
+ *
+ * 1. Disable CPU controller in k, j
+ * 2. Disable CPU controller in d
+ * 3. Enable memory controller in root
+ * 4. Enable memory controller in a
+ * 5. Enable memory controller in d
+ * 6. Enable memory controller in k
+ *
+ * Notice that we need to touch j in one direction, but not the other. We also
+ * don't go beyond d when disabling -- it's up to "a" to get realized if it
+ * wants to disable further. The basic rules are therefore:
+ *
+ * - If you're disabling something, you need to realise all of the cgroups from
+ * your recursive descendants to the root. This starts from the leaves.
+ * - If you're enabling something, you need to realise from the root cgroup
+ * downwards, but you don't need to iterate your recursive descendants.
+ *
+ * Returns 0 on success and < 0 on failure. */
+static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
+ CGroupMask target_mask, enable_mask;
+ int r;
+
+ assert(u);
+
+ unit_remove_from_cgroup_realize_queue(u);
+
+ target_mask = unit_get_target_mask(u);
+ enable_mask = unit_get_enable_mask(u);
+
+ if (unit_has_mask_realized(u, target_mask, enable_mask))
+ return 0;
+
+ /* Disable controllers below us, if there are any */
+ r = unit_realize_cgroup_now_disable(u, state);
+ if (r < 0)
+ return r;
+
+ /* Enable controllers above us, if there are any */
+ if (UNIT_ISSET(u->slice)) {
+ r = unit_realize_cgroup_now_enable(UNIT_DEREF(u->slice), state);
+ if (r < 0)
+ return r;
+ }
+
+ /* Now actually deal with the cgroup we were trying to realise and set attributes */
+ r = unit_create_cgroup(u, target_mask, enable_mask, state);
+ if (r < 0)
+ return r;
+
+ /* Now, reset the invalidation mask */
+ u->cgroup_invalidated_mask = 0;
+ return 0;
+}
+
+unsigned manager_dispatch_cgroup_realize_queue(Manager *m) {
+ ManagerState state;
+ unsigned n = 0;
+ Unit *i;
+ int r;
+
+ assert(m);
+
+ state = manager_state(m);
+
+ while ((i = m->cgroup_realize_queue)) {
+ assert(i->in_cgroup_realize_queue);
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(i))) {
+ /* Maybe things changed, and the unit is not actually active anymore? */
+ unit_remove_from_cgroup_realize_queue(i);
+ continue;
+ }
+
+ r = unit_realize_cgroup_now(i, state);
+ if (r < 0)
+ log_warning_errno(r, "Failed to realize cgroups for queued unit %s, ignoring: %m", i->id);
+
+ n++;
+ }
+
+ return n;
+}
+
+static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) {
+ Unit *slice;
+
+ /* This adds the siblings of the specified unit and the
+ * siblings of all parent units to the cgroup queue. (But
+ * neither the specified unit itself nor the parents.) */
+
+ while ((slice = UNIT_DEREF(u->slice))) {
+ Iterator i;
+ Unit *m;
+ void *v;
+
+ HASHMAP_FOREACH_KEY(v, m, u->dependencies[UNIT_BEFORE], i) {
+ /* Skip units that have a dependency on the slice
+ * but aren't actually in it. */
+ if (UNIT_DEREF(m->slice) != slice)
+ continue;
+
+ /* No point in doing cgroup application for units
+ * without active processes. */
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
+ continue;
+
+ /* If the unit doesn't need any new controllers
+ * and has current ones realized, it doesn't need
+ * any changes. */
+ if (unit_has_mask_realized(m,
+ unit_get_target_mask(m),
+ unit_get_enable_mask(m)))
+ continue;
+
+ unit_add_to_cgroup_realize_queue(m);
+ }
+
+ u = slice;
+ }
+}
+
+int unit_realize_cgroup(Unit *u) {
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return 0;
+
+ /* So, here's the deal: when realizing the cgroups for this
+ * unit, we need to first create all parents, but there's more
+ * actually: for the weight-based controllers we also need to
+ * make sure that all our siblings (i.e. units that are in the
+ * same slice as we are) have cgroups, too. Otherwise, things
+ * would become very uneven as each of their processes would
+ * get as much resources as all our group together. This call
+ * will synchronously create the parent cgroups, but will
+ * defer work on the siblings to the next event loop
+ * iteration. */
+
+ /* Add all sibling slices to the cgroup queue. */
+ unit_add_siblings_to_cgroup_realize_queue(u);
+
+ /* And realize this one now (and apply the values) */
+ return unit_realize_cgroup_now(u, manager_state(u->manager));
+}
+
+void unit_release_cgroup(Unit *u) {
+ assert(u);
+
+ /* Forgets all cgroup details for this cgroup — but does *not* destroy the cgroup. This is hence OK to call
+ * when we close down everything for reexecution, where we really want to leave the cgroup in place. */
+
+ if (u->cgroup_path) {
+ (void) hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
+ u->cgroup_path = mfree(u->cgroup_path);
+ }
+
+ if (u->cgroup_inotify_wd >= 0) {
+ if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_inotify_wd) < 0)
+ log_unit_debug_errno(u, errno, "Failed to remove cgroup inotify watch %i for %s, ignoring: %m", u->cgroup_inotify_wd, u->id);
+
+ (void) hashmap_remove(u->manager->cgroup_inotify_wd_unit, INT_TO_PTR(u->cgroup_inotify_wd));
+ u->cgroup_inotify_wd = -1;
+ }
+}
+
+void unit_prune_cgroup(Unit *u) {
+ int r;
+ bool is_root_slice;
+
+ assert(u);
+
+ /* Removes the cgroup, if empty and possible, and stops watching it. */
+
+ if (!u->cgroup_path)
+ return;
+
+ (void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */
+
+ is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
+
+ r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
+ if (r < 0) {
+ log_unit_debug_errno(u, r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path);
+ return;
+ }
+
+ if (is_root_slice)
+ return;
+
+ unit_release_cgroup(u);
+
+ u->cgroup_realized = false;
+ u->cgroup_realized_mask = 0;
+ u->cgroup_enabled_mask = 0;
+
+ u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
+}
+
+int unit_search_main_pid(Unit *u, pid_t *ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid = 0, npid, mypid;
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ if (!u->cgroup_path)
+ return -ENXIO;
+
+ r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f);
+ if (r < 0)
+ return r;
+
+ mypid = getpid_cached();
+ while (cg_read_pid(f, &npid) > 0) {
+ pid_t ppid;
+
+ if (npid == pid)
+ continue;
+
+ /* Ignore processes that aren't our kids */
+ if (get_process_ppid(npid, &ppid) >= 0 && ppid != mypid)
+ continue;
+
+ if (pid != 0)
+ /* Dang, there's more than one daemonized PID
+ in this group, so we don't know what process
+ is the main process. */
+
+ return -ENODATA;
+
+ pid = npid;
+ }
+
+ *ret = pid;
+ return 0;
+}
+
+static int unit_watch_pids_in_path(Unit *u, const char *path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int ret = 0, r;
+
+ assert(u);
+ assert(path);
+
+ r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
+ if (r < 0)
+ ret = r;
+ else {
+ pid_t pid;
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+ r = unit_watch_pid(u, pid);
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+
+ r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
+ if (r < 0) {
+ if (ret >= 0)
+ ret = r;
+ } else {
+ char *fn;
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin(path, "/", fn);
+ free(fn);
+
+ if (!p)
+ return -ENOMEM;
+
+ r = unit_watch_pids_in_path(u, p);
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+
+ return ret;
+}
+
+int unit_synthesize_cgroup_empty_event(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Enqueue a synthetic cgroup empty event if this unit doesn't watch any PIDs anymore. This is compatibility
+ * support for non-unified systems where notifications aren't reliable, and hence need to take whatever we can
+ * get as notification source as soon as we stopped having any useful PIDs to watch for. */
+
+ if (!u->cgroup_path)
+ return -ENOENT;
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return r;
+ if (r > 0) /* On unified we have reliable notifications, and don't need this */
+ return 0;
+
+ if (!set_isempty(u->pids))
+ return 0;
+
+ unit_add_to_cgroup_empty_queue(u);
+ return 0;
+}
+
+int unit_watch_all_pids(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Adds all PIDs from our cgroup to the set of PIDs we
+ * watch. This is a fallback logic for cases where we do not
+ * get reliable cgroup empty notifications: we try to use
+ * SIGCHLD as replacement. */
+
+ if (!u->cgroup_path)
+ return -ENOENT;
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return r;
+ if (r > 0) /* On unified we can use proper notifications */
+ return 0;
+
+ return unit_watch_pids_in_path(u, u->cgroup_path);
+}
+
+static int on_cgroup_empty_event(sd_event_source *s, void *userdata) {
+ Manager *m = userdata;
+ Unit *u;
+ int r;
+
+ assert(s);
+ assert(m);
+
+ u = m->cgroup_empty_queue;
+ if (!u)
+ return 0;
+
+ assert(u->in_cgroup_empty_queue);
+ u->in_cgroup_empty_queue = false;
+ LIST_REMOVE(cgroup_empty_queue, m->cgroup_empty_queue, u);
+
+ if (m->cgroup_empty_queue) {
+ /* More stuff queued, let's make sure we remain enabled */
+ r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
+ if (r < 0)
+ log_debug_errno(r, "Failed to reenable cgroup empty event source, ignoring: %m");
+ }
+
+ unit_add_to_gc_queue(u);
+
+ if (UNIT_VTABLE(u)->notify_cgroup_empty)
+ UNIT_VTABLE(u)->notify_cgroup_empty(u);
+
+ return 0;
+}
+
+void unit_add_to_cgroup_empty_queue(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Note that there are four different ways how cgroup empty events reach us:
+ *
+ * 1. On the unified hierarchy we get an inotify event on the cgroup
+ *
+ * 2. On the legacy hierarchy, when running in system mode, we get a datagram on the cgroup agent socket
+ *
+ * 3. On the legacy hierarchy, when running in user mode, we get a D-Bus signal on the system bus
+ *
+ * 4. On the legacy hierarchy, in service units we start watching all processes of the cgroup for SIGCHLD as
+ * soon as we get one SIGCHLD, to deal with unreliable cgroup notifications.
+ *
+ * Regardless which way we got the notification, we'll verify it here, and then add it to a separate
+ * queue. This queue will be dispatched at a lower priority than the SIGCHLD handler, so that we always use
+ * SIGCHLD if we can get it first, and only use the cgroup empty notifications if there's no SIGCHLD pending
+ * (which might happen if the cgroup doesn't contain processes that are our own child, which is typically the
+ * case for scope units). */
+
+ if (u->in_cgroup_empty_queue)
+ return;
+
+ /* Let's verify that the cgroup is really empty */
+ if (!u->cgroup_path)
+ return;
+ r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
+ if (r < 0) {
+ log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty: %m", u->cgroup_path);
+ return;
+ }
+ if (r == 0)
+ return;
+
+ LIST_PREPEND(cgroup_empty_queue, u->manager->cgroup_empty_queue, u);
+ u->in_cgroup_empty_queue = true;
+
+ /* Trigger the defer event */
+ r = sd_event_source_set_enabled(u->manager->cgroup_empty_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ log_debug_errno(r, "Failed to enable cgroup empty event source: %m");
+}
+
+static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(m);
+
+ for (;;) {
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+
+ l = read(fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ return 0;
+
+ return log_error_errno(errno, "Failed to read control group inotify events: %m");
+ }
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l) {
+ Unit *u;
+
+ if (e->wd < 0)
+ /* Queue overflow has no watch descriptor */
+ continue;
+
+ if (e->mask & IN_IGNORED)
+ /* The watch was just removed */
+ continue;
+
+ u = hashmap_get(m->cgroup_inotify_wd_unit, INT_TO_PTR(e->wd));
+ if (!u) /* Not that inotify might deliver
+ * events for a watch even after it
+ * was removed, because it was queued
+ * before the removal. Let's ignore
+ * this here safely. */
+ continue;
+
+ unit_add_to_cgroup_empty_queue(u);
+ }
+ }
+}
+
+static int cg_bpf_mask_supported(CGroupMask *ret) {
+ CGroupMask mask = 0;
+ int r;
+
+ /* BPF-based firewall */
+ r = bpf_firewall_supported();
+ if (r > 0)
+ mask |= CGROUP_MASK_BPF_FIREWALL;
+
+ /* BPF-based device access control */
+ r = bpf_devices_supported();
+ if (r > 0)
+ mask |= CGROUP_MASK_BPF_DEVICES;
+
+ *ret = mask;
+ return 0;
+}
+
+int manager_setup_cgroup(Manager *m) {
+ _cleanup_free_ char *path = NULL;
+ const char *scope_path;
+ CGroupController c;
+ int r, all_unified;
+ CGroupMask mask;
+ char *e;
+
+ assert(m);
+
+ /* 1. Determine hierarchy */
+ m->cgroup_root = mfree(m->cgroup_root);
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
+ if (r < 0)
+ return log_error_errno(r, "Cannot determine cgroup we are running in: %m");
+
+ /* Chop off the init scope, if we are already located in it */
+ e = endswith(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
+
+ /* LEGACY: Also chop off the system slice if we are in
+ * it. This is to support live upgrades from older systemd
+ * versions where PID 1 was moved there. Also see
+ * cg_get_root_path(). */
+ if (!e && MANAGER_IS_SYSTEM(m)) {
+ e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
+ if (!e)
+ e = endswith(m->cgroup_root, "/system"); /* even more legacy */
+ }
+ if (e)
+ *e = 0;
+
+ /* And make sure to store away the root value without trailing slash, even for the root dir, so that we can
+ * easily prepend it everywhere. */
+ delete_trailing_chars(m->cgroup_root, "/");
+
+ /* 2. Show data */
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
+ if (r < 0)
+ return log_error_errno(r, "Cannot find cgroup mount point: %m");
+
+ r = cg_unified_flush();
+ if (r < 0)
+ return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m");
+
+ all_unified = cg_all_unified();
+ if (all_unified < 0)
+ return log_error_errno(all_unified, "Couldn't determine whether we are in all unified mode: %m");
+ if (all_unified > 0)
+ log_debug("Unified cgroup hierarchy is located at %s.", path);
+ else {
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether systemd's own controller is in unified mode: %m");
+ if (r > 0)
+ log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path);
+ else
+ log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER_LEGACY ". File system hierarchy is at %s.", path);
+ }
+
+ /* 3. Allocate cgroup empty defer event source */
+ m->cgroup_empty_event_source = sd_event_source_unref(m->cgroup_empty_event_source);
+ r = sd_event_add_defer(m->event, &m->cgroup_empty_event_source, on_cgroup_empty_event, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create cgroup empty event source: %m");
+
+ r = sd_event_source_set_priority(m->cgroup_empty_event_source, SD_EVENT_PRIORITY_NORMAL-5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of cgroup empty event source: %m");
+
+ r = sd_event_source_set_enabled(m->cgroup_empty_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable cgroup empty event source: %m");
+
+ (void) sd_event_source_set_description(m->cgroup_empty_event_source, "cgroup-empty");
+
+ /* 4. Install notifier inotify object, or agent */
+ if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
+
+ /* In the unified hierarchy we can get cgroup empty notifications via inotify. */
+
+ m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
+ safe_close(m->cgroup_inotify_fd);
+
+ m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (m->cgroup_inotify_fd < 0)
+ return log_error_errno(errno, "Failed to create control group inotify object: %m");
+
+ r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch control group inotify object: %m");
+
+ /* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
+ * see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
+ r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-4);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of inotify event source: %m");
+
+ (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
+
+ } else if (MANAGER_IS_SYSTEM(m) && manager_owns_host_root_cgroup(m) && !MANAGER_IS_TEST_RUN(m)) {
+
+ /* On the legacy hierarchy we only get notifications via cgroup agents. (Which isn't really reliable,
+ * since it does not generate events when control groups with children run empty. */
+
+ r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
+ if (r < 0)
+ log_warning_errno(r, "Failed to install release agent, ignoring: %m");
+ else if (r > 0)
+ log_debug("Installed release agent.");
+ else if (r == 0)
+ log_debug("Release agent already installed.");
+ }
+
+ /* 5. Make sure we are in the special "init.scope" unit in the root slice. */
+ scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+ if (r >= 0) {
+ /* Also, move all other userspace processes remaining in the root cgroup into that scope. */
+ r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+ if (r < 0)
+ log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
+
+ /* 6. And pin it, so that it cannot be unmounted */
+ safe_close(m->pin_cgroupfs_fd);
+ m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
+ if (m->pin_cgroupfs_fd < 0)
+ return log_error_errno(errno, "Failed to open pin file: %m");
+
+ } else if (!MANAGER_IS_TEST_RUN(m))
+ return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
+
+ /* 7. Always enable hierarchical support if it exists... */
+ if (!all_unified && !MANAGER_IS_TEST_RUN(m))
+ (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
+
+ /* 8. Figure out which controllers are supported */
+ r = cg_mask_supported(&m->cgroup_supported);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine supported controllers: %m");
+
+ /* 9. Figure out which bpf-based pseudo-controllers are supported */
+ r = cg_bpf_mask_supported(&mask);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine supported bpf-based pseudo-controllers: %m");
+ m->cgroup_supported |= mask;
+
+ /* 10. Log which controllers are supported */
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
+ log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & CGROUP_CONTROLLER_TO_MASK(c)));
+
+ return 0;
+}
+
+void manager_shutdown_cgroup(Manager *m, bool delete) {
+ assert(m);
+
+ /* We can't really delete the group, since we are in it. But
+ * let's trim it. */
+ if (delete && m->cgroup_root && m->test_run_flags != MANAGER_TEST_RUN_MINIMAL)
+ (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
+
+ m->cgroup_empty_event_source = sd_event_source_unref(m->cgroup_empty_event_source);
+
+ m->cgroup_inotify_wd_unit = hashmap_free(m->cgroup_inotify_wd_unit);
+
+ m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
+ m->cgroup_inotify_fd = safe_close(m->cgroup_inotify_fd);
+
+ m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
+
+ m->cgroup_root = mfree(m->cgroup_root);
+}
+
+Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
+ char *p;
+ Unit *u;
+
+ assert(m);
+ assert(cgroup);
+
+ u = hashmap_get(m->cgroup_unit, cgroup);
+ if (u)
+ return u;
+
+ p = strdupa(cgroup);
+ for (;;) {
+ char *e;
+
+ e = strrchr(p, '/');
+ if (!e || e == p)
+ return hashmap_get(m->cgroup_unit, SPECIAL_ROOT_SLICE);
+
+ *e = 0;
+
+ u = hashmap_get(m->cgroup_unit, p);
+ if (u)
+ return u;
+ }
+}
+
+Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid) {
+ _cleanup_free_ char *cgroup = NULL;
+
+ assert(m);
+
+ if (!pid_is_valid(pid))
+ return NULL;
+
+ if (cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup) < 0)
+ return NULL;
+
+ return manager_get_unit_by_cgroup(m, cgroup);
+}
+
+Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
+ Unit *u, **array;
+
+ assert(m);
+
+ /* Note that a process might be owned by multiple units, we return only one here, which is good enough for most
+ * cases, though not strictly correct. We prefer the one reported by cgroup membership, as that's the most
+ * relevant one as children of the process will be assigned to that one, too, before all else. */
+
+ if (!pid_is_valid(pid))
+ return NULL;
+
+ if (pid == getpid_cached())
+ return hashmap_get(m->units, SPECIAL_INIT_SCOPE);
+
+ u = manager_get_unit_by_pid_cgroup(m, pid);
+ if (u)
+ return u;
+
+ u = hashmap_get(m->watch_pids, PID_TO_PTR(pid));
+ if (u)
+ return u;
+
+ array = hashmap_get(m->watch_pids, PID_TO_PTR(-pid));
+ if (array)
+ return array[0];
+
+ return NULL;
+}
+
+int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
+ Unit *u;
+
+ assert(m);
+ assert(cgroup);
+
+ /* Called on the legacy hierarchy whenever we get an explicit cgroup notification from the cgroup agent process
+ * or from the --system instance */
+
+ log_debug("Got cgroup empty notification for: %s", cgroup);
+
+ u = manager_get_unit_by_cgroup(m, cgroup);
+ if (!u)
+ return 0;
+
+ unit_add_to_cgroup_empty_queue(u);
+ return 1;
+}
+
+int unit_get_memory_current(Unit *u, uint64_t *ret) {
+ _cleanup_free_ char *v = NULL;
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ if (!UNIT_CGROUP_BOOL(u, memory_accounting))
+ return -ENODATA;
+
+ if (!u->cgroup_path)
+ return -ENODATA;
+
+ /* The root cgroup doesn't expose this information, let's get it from /proc instead */
+ if (unit_has_host_root_cgroup(u))
+ return procfs_memory_get_used(ret);
+
+ if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
+ return -ENODATA;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v);
+ else
+ r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ return safe_atou64(v, ret);
+}
+
+int unit_get_tasks_current(Unit *u, uint64_t *ret) {
+ _cleanup_free_ char *v = NULL;
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ if (!UNIT_CGROUP_BOOL(u, tasks_accounting))
+ return -ENODATA;
+
+ if (!u->cgroup_path)
+ return -ENODATA;
+
+ /* The root cgroup doesn't expose this information, let's get it from /proc instead */
+ if (unit_has_host_root_cgroup(u))
+ return procfs_tasks_get_current(ret);
+
+ if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
+ return -ENODATA;
+
+ r = cg_get_attribute("pids", u->cgroup_path, "pids.current", &v);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ return safe_atou64(v, ret);
+}
+
+static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
+ _cleanup_free_ char *v = NULL;
+ uint64_t ns;
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ if (!u->cgroup_path)
+ return -ENODATA;
+
+ /* The root cgroup doesn't expose this information, let's get it from /proc instead */
+ if (unit_has_host_root_cgroup(u))
+ return procfs_cpu_get_usage(ret);
+
+ /* Requisite controllers for CPU accounting are not enabled */
+ if ((get_cpu_accounting_mask() & ~u->cgroup_realized_mask) != 0)
+ return -ENODATA;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ _cleanup_free_ char *val = NULL;
+ uint64_t us;
+
+ r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
+ if (IN_SET(r, -ENOENT, -ENXIO))
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(val, &us);
+ if (r < 0)
+ return r;
+
+ ns = us * NSEC_PER_USEC;
+ } else {
+ r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &ns);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = ns;
+ return 0;
+}
+
+int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
+ nsec_t ns;
+ int r;
+
+ assert(u);
+
+ /* Retrieve the current CPU usage counter. This will subtract the CPU counter taken when the unit was
+ * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
+ * call this function with a NULL return value. */
+
+ if (!UNIT_CGROUP_BOOL(u, cpu_accounting))
+ return -ENODATA;
+
+ r = unit_get_cpu_usage_raw(u, &ns);
+ if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
+ /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
+ * cached value. */
+
+ if (ret)
+ *ret = u->cpu_usage_last;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ if (ns > u->cpu_usage_base)
+ ns -= u->cpu_usage_base;
+ else
+ ns = 0;
+
+ u->cpu_usage_last = ns;
+ if (ret)
+ *ret = ns;
+
+ return 0;
+}
+
+int unit_get_ip_accounting(
+ Unit *u,
+ CGroupIPAccountingMetric metric,
+ uint64_t *ret) {
+
+ uint64_t value;
+ int fd, r;
+
+ assert(u);
+ assert(metric >= 0);
+ assert(metric < _CGROUP_IP_ACCOUNTING_METRIC_MAX);
+ assert(ret);
+
+ if (!UNIT_CGROUP_BOOL(u, ip_accounting))
+ return -ENODATA;
+
+ fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
+ u->ip_accounting_ingress_map_fd :
+ u->ip_accounting_egress_map_fd;
+ if (fd < 0)
+ return -ENODATA;
+
+ if (IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES))
+ r = bpf_firewall_read_accounting(fd, &value, NULL);
+ else
+ r = bpf_firewall_read_accounting(fd, NULL, &value);
+ if (r < 0)
+ return r;
+
+ /* Add in additional metrics from a previous runtime. Note that when reexecing/reloading the daemon we compile
+ * all BPF programs and maps anew, but serialize the old counters. When deserializing we store them in the
+ * ip_accounting_extra[] field, and add them in here transparently. */
+
+ *ret = value + u->ip_accounting_extra[metric];
+
+ return r;
+}
+
+int unit_reset_cpu_accounting(Unit *u) {
+ nsec_t ns;
+ int r;
+
+ assert(u);
+
+ u->cpu_usage_last = NSEC_INFINITY;
+
+ r = unit_get_cpu_usage_raw(u, &ns);
+ if (r < 0) {
+ u->cpu_usage_base = 0;
+ return r;
+ }
+
+ u->cpu_usage_base = ns;
+ return 0;
+}
+
+int unit_reset_ip_accounting(Unit *u) {
+ int r = 0, q = 0;
+
+ assert(u);
+
+ if (u->ip_accounting_ingress_map_fd >= 0)
+ r = bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd);
+
+ if (u->ip_accounting_egress_map_fd >= 0)
+ q = bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd);
+
+ zero(u->ip_accounting_extra);
+
+ return r < 0 ? r : q;
+}
+
+void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return;
+
+ if (m == 0)
+ return;
+
+ /* always invalidate compat pairs together */
+ if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO))
+ m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
+
+ if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT))
+ m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT;
+
+ if (FLAGS_SET(u->cgroup_invalidated_mask, m)) /* NOP? */
+ return;
+
+ u->cgroup_invalidated_mask |= m;
+ unit_add_to_cgroup_realize_queue(u);
+}
+
+void unit_invalidate_cgroup_bpf(Unit *u) {
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return;
+
+ if (u->cgroup_invalidated_mask & CGROUP_MASK_BPF_FIREWALL) /* NOP? */
+ return;
+
+ u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
+ unit_add_to_cgroup_realize_queue(u);
+
+ /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
+ * list of our children includes our own. */
+ if (u->type == UNIT_SLICE) {
+ Unit *member;
+ Iterator i;
+ void *v;
+
+ HASHMAP_FOREACH_KEY(v, member, u->dependencies[UNIT_BEFORE], i) {
+ if (UNIT_DEREF(member->slice) == u)
+ unit_invalidate_cgroup_bpf(member);
+ }
+ }
+}
+
+bool unit_cgroup_delegate(Unit *u) {
+ CGroupContext *c;
+
+ assert(u);
+
+ if (!UNIT_VTABLE(u)->can_delegate)
+ return false;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return false;
+
+ return c->delegate;
+}
+
+void manager_invalidate_startup_units(Manager *m) {
+ Iterator i;
+ Unit *u;
+
+ assert(m);
+
+ SET_FOREACH(u, m->startup_units, i)
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU|CGROUP_MASK_IO|CGROUP_MASK_BLKIO);
+}
+
+static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
+ [CGROUP_AUTO] = "auto",
+ [CGROUP_CLOSED] = "closed",
+ [CGROUP_STRICT] = "strict",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
new file mode 100644
index 0000000..266daa2
--- /dev/null
+++ b/src/core/cgroup.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "cgroup-util.h"
+#include "ip-address-access.h"
+#include "list.h"
+#include "time-util.h"
+
+typedef struct CGroupContext CGroupContext;
+typedef struct CGroupDeviceAllow CGroupDeviceAllow;
+typedef struct CGroupIODeviceWeight CGroupIODeviceWeight;
+typedef struct CGroupIODeviceLimit CGroupIODeviceLimit;
+typedef struct CGroupIODeviceLatency CGroupIODeviceLatency;
+typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
+typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
+
+typedef enum CGroupDevicePolicy {
+
+ /* When devices listed, will allow those, plus built-in ones,
+ if none are listed will allow everything. */
+ CGROUP_AUTO,
+
+ /* Everything forbidden, except built-in ones and listed ones. */
+ CGROUP_CLOSED,
+
+ /* Everythings forbidden, except for the listed devices */
+ CGROUP_STRICT,
+
+ _CGROUP_DEVICE_POLICY_MAX,
+ _CGROUP_DEVICE_POLICY_INVALID = -1
+} CGroupDevicePolicy;
+
+struct CGroupDeviceAllow {
+ LIST_FIELDS(CGroupDeviceAllow, device_allow);
+ char *path;
+ bool r:1;
+ bool w:1;
+ bool m:1;
+};
+
+struct CGroupIODeviceWeight {
+ LIST_FIELDS(CGroupIODeviceWeight, device_weights);
+ char *path;
+ uint64_t weight;
+};
+
+struct CGroupIODeviceLimit {
+ LIST_FIELDS(CGroupIODeviceLimit, device_limits);
+ char *path;
+ uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
+};
+
+struct CGroupIODeviceLatency {
+ LIST_FIELDS(CGroupIODeviceLatency, device_latencies);
+ char *path;
+ usec_t target_usec;
+};
+
+struct CGroupBlockIODeviceWeight {
+ LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights);
+ char *path;
+ uint64_t weight;
+};
+
+struct CGroupBlockIODeviceBandwidth {
+ LIST_FIELDS(CGroupBlockIODeviceBandwidth, device_bandwidths);
+ char *path;
+ uint64_t rbps;
+ uint64_t wbps;
+};
+
+struct CGroupContext {
+ bool cpu_accounting;
+ bool io_accounting;
+ bool blockio_accounting;
+ bool memory_accounting;
+ bool tasks_accounting;
+ bool ip_accounting;
+
+ /* For unified hierarchy */
+ uint64_t cpu_weight;
+ uint64_t startup_cpu_weight;
+ usec_t cpu_quota_per_sec_usec;
+
+ uint64_t io_weight;
+ uint64_t startup_io_weight;
+ LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
+ LIST_HEAD(CGroupIODeviceLimit, io_device_limits);
+ LIST_HEAD(CGroupIODeviceLatency, io_device_latencies);
+
+ uint64_t memory_min;
+ uint64_t memory_low;
+ uint64_t memory_high;
+ uint64_t memory_max;
+ uint64_t memory_swap_max;
+
+ LIST_HEAD(IPAddressAccessItem, ip_address_allow);
+ LIST_HEAD(IPAddressAccessItem, ip_address_deny);
+
+ /* For legacy hierarchies */
+ uint64_t cpu_shares;
+ uint64_t startup_cpu_shares;
+
+ uint64_t blockio_weight;
+ uint64_t startup_blockio_weight;
+ LIST_HEAD(CGroupBlockIODeviceWeight, blockio_device_weights);
+ LIST_HEAD(CGroupBlockIODeviceBandwidth, blockio_device_bandwidths);
+
+ uint64_t memory_limit;
+
+ CGroupDevicePolicy device_policy;
+ LIST_HEAD(CGroupDeviceAllow, device_allow);
+
+ /* Common */
+ uint64_t tasks_max;
+
+ bool delegate;
+ CGroupMask delegate_controllers;
+
+ CGroupMask disable_controllers;
+};
+
+/* Used when querying IP accounting data */
+typedef enum CGroupIPAccountingMetric {
+ CGROUP_IP_INGRESS_BYTES,
+ CGROUP_IP_INGRESS_PACKETS,
+ CGROUP_IP_EGRESS_BYTES,
+ CGROUP_IP_EGRESS_PACKETS,
+ _CGROUP_IP_ACCOUNTING_METRIC_MAX,
+ _CGROUP_IP_ACCOUNTING_METRIC_INVALID = -1,
+} CGroupIPAccountingMetric;
+
+typedef struct Unit Unit;
+typedef struct Manager Manager;
+
+void cgroup_context_init(CGroupContext *c);
+void cgroup_context_done(CGroupContext *c);
+void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix);
+
+void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a);
+void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w);
+void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l);
+void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l);
+void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);
+void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b);
+
+int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode);
+
+CGroupMask unit_get_own_mask(Unit *u);
+CGroupMask unit_get_delegate_mask(Unit *u);
+CGroupMask unit_get_members_mask(Unit *u);
+CGroupMask unit_get_siblings_mask(Unit *u);
+CGroupMask unit_get_subtree_mask(Unit *u);
+CGroupMask unit_get_disable_mask(Unit *u);
+CGroupMask unit_get_ancestor_disable_mask(Unit *u);
+
+CGroupMask unit_get_target_mask(Unit *u);
+CGroupMask unit_get_enable_mask(Unit *u);
+
+void unit_invalidate_cgroup_members_masks(Unit *u);
+
+void unit_add_to_cgroup_realize_queue(Unit *u);
+
+const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask);
+char *unit_default_cgroup_path(const Unit *u);
+int unit_set_cgroup_path(Unit *u, const char *path);
+int unit_pick_cgroup_path(Unit *u);
+
+int unit_realize_cgroup(Unit *u);
+void unit_release_cgroup(Unit *u);
+void unit_prune_cgroup(Unit *u);
+int unit_watch_cgroup(Unit *u);
+
+void unit_add_to_cgroup_empty_queue(Unit *u);
+
+int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path);
+
+int manager_setup_cgroup(Manager *m);
+void manager_shutdown_cgroup(Manager *m, bool delete);
+
+unsigned manager_dispatch_cgroup_realize_queue(Manager *m);
+
+Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup);
+Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid);
+Unit* manager_get_unit_by_pid(Manager *m, pid_t pid);
+
+int unit_search_main_pid(Unit *u, pid_t *ret);
+int unit_watch_all_pids(Unit *u);
+
+int unit_synthesize_cgroup_empty_event(Unit *u);
+
+int unit_get_memory_current(Unit *u, uint64_t *ret);
+int unit_get_tasks_current(Unit *u, uint64_t *ret);
+int unit_get_cpu_usage(Unit *u, nsec_t *ret);
+int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret);
+
+int unit_reset_cpu_accounting(Unit *u);
+int unit_reset_ip_accounting(Unit *u);
+
+#define UNIT_CGROUP_BOOL(u, name) \
+ ({ \
+ CGroupContext *cc = unit_get_cgroup_context(u); \
+ cc ? cc->name : false; \
+ })
+
+bool manager_owns_host_root_cgroup(Manager *m);
+bool unit_has_host_root_cgroup(Unit *u);
+
+int manager_notify_cgroup_empty(Manager *m, const char *group);
+
+void unit_invalidate_cgroup(Unit *u, CGroupMask m);
+void unit_invalidate_cgroup_bpf(Unit *u);
+
+void manager_invalidate_startup_units(Manager *m);
+
+const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_;
+CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_;
+
+bool unit_cgroup_delegate(Unit *u);
diff --git a/src/core/chown-recursive.c b/src/core/chown-recursive.c
new file mode 100644
index 0000000..7767301
--- /dev/null
+++ b/src/core/chown-recursive.c
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+
+#include "chown-recursive.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int chown_one(int fd, const struct stat *st, uid_t uid, gid_t gid) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ const char *n;
+
+ assert(fd >= 0);
+ assert(st);
+
+ if ((!uid_is_valid(uid) || st->st_uid == uid) &&
+ (!gid_is_valid(gid) || st->st_gid == gid))
+ return 0;
+
+ /* We change ownership through the /proc/self/fd/%i path, so that we have a stable reference that works with
+ * O_PATH. (Note: fchown() and fchmod() do not work with O_PATH, the kernel refuses that. */
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+
+ /* Drop any ACL if there is one */
+ FOREACH_STRING(n, "system.posix_acl_access", "system.posix_acl_default")
+ if (removexattr(procfs_path, n) < 0)
+ if (!IN_SET(errno, ENODATA, EOPNOTSUPP, ENOSYS, ENOTTY))
+ return -errno;
+
+ if (chown(procfs_path, uid, gid) < 0)
+ return -errno;
+
+ /* The linux kernel alters the mode in some cases of chown(), as well when we change ACLs. Let's undo this. We
+ * do this only for non-symlinks however. That's because for symlinks the access mode is ignored anyway and
+ * because on some kernels/file systems trying to change the access mode will succeed but has no effect while
+ * on others it actively fails. */
+ if (!S_ISLNK(st->st_mode))
+ if (chmod(procfs_path, st->st_mode & 07777) < 0)
+ return -errno;
+
+ return 1;
+}
+
+static int chown_recursive_internal(int fd, const struct stat *st, uid_t uid, gid_t gid) {
+ _cleanup_closedir_ DIR *d = NULL;
+ bool changed = false;
+ struct dirent *de;
+ int r;
+
+ assert(fd >= 0);
+ assert(st);
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ _cleanup_close_ int path_fd = -1;
+ struct stat fst;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ /* Let's pin the child inode we want to fix now with an O_PATH fd, so that it cannot be swapped out
+ * while we manipulate it. */
+ path_fd = openat(dirfd(d), de->d_name, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (path_fd < 0)
+ return -errno;
+
+ if (fstat(path_fd, &fst) < 0)
+ return -errno;
+
+ if (S_ISDIR(fst.st_mode)) {
+ int subdir_fd;
+
+ /* Convert it to a "real" (i.e. non-O_PATH) fd now */
+ subdir_fd = fd_reopen(path_fd, O_RDONLY|O_CLOEXEC|O_NOATIME);
+ if (subdir_fd < 0)
+ return subdir_fd;
+
+ r = chown_recursive_internal(subdir_fd, &fst, uid, gid); /* takes possession of subdir_fd even on failure */
+ if (r < 0)
+ return r;
+ if (r > 0)
+ changed = true;
+ } else {
+ r = chown_one(path_fd, &fst, uid, gid);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ changed = true;
+ }
+ }
+
+ r = chown_one(dirfd(d), st, uid, gid);
+ if (r < 0)
+ return r;
+
+ return r > 0 || changed;
+}
+
+int path_chown_recursive(const char *path, uid_t uid, gid_t gid) {
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ fd = open(path, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (fd < 0)
+ return -errno;
+
+ if (!uid_is_valid(uid) && !gid_is_valid(gid))
+ return 0; /* nothing to do */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* Let's take a shortcut: if the top-level directory is properly owned, we don't descend into the whole tree,
+ * under the assumption that all is OK anyway. */
+
+ if ((!uid_is_valid(uid) || st.st_uid == uid) &&
+ (!gid_is_valid(gid) || st.st_gid == gid))
+ return 0;
+
+ return chown_recursive_internal(TAKE_FD(fd), &st, uid, gid); /* we donate the fd to the call, regardless if it succeeded or failed */
+}
diff --git a/src/core/chown-recursive.h b/src/core/chown-recursive.h
new file mode 100644
index 0000000..f3fa40a
--- /dev/null
+++ b/src/core/chown-recursive.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+int path_chown_recursive(const char *path, uid_t uid, gid_t gid);
diff --git a/src/core/dbus-automount.c b/src/core/dbus-automount.c
new file mode 100644
index 0000000..bd6e6a9
--- /dev/null
+++ b/src/core/dbus-automount.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "automount.h"
+#include "bus-util.h"
+#include "dbus-automount.h"
+#include "dbus-util.h"
+#include "string-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, automount_result, AutomountResult);
+
+const sd_bus_vtable bus_automount_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Where", "s", NULL, offsetof(Automount, where), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Automount, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Automount, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("TimeoutIdleUSec", "t", bus_property_get_usec, offsetof(Automount, timeout_idle_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_automount_set_transient_property(
+ Automount *a,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Unit *u = UNIT(a);
+
+ assert(a);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "Where"))
+ return bus_set_transient_path(u, name, &a->where, message, flags, error);
+
+ if (streq(name, "TimeoutIdleUSec"))
+ return bus_set_transient_usec_fix_0(u, name, &a->timeout_idle_usec, message, flags, error);
+
+ if (streq(name, "DirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &a->directory_mode, message, flags, error);
+
+ return 0;
+}
+
+int bus_automount_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Automount *a = AUTOMOUNT(u);
+
+ assert(a);
+ assert(name);
+ assert(message);
+
+ if (u->transient && u->load_state == UNIT_STUB) /* This is a transient unit? let's load a little more */
+ return bus_automount_set_transient_property(a, name, message, flags, error);
+
+ return 0;
+}
diff --git a/src/core/dbus-automount.h b/src/core/dbus-automount.h
new file mode 100644
index 0000000..3e165b0
--- /dev/null
+++ b/src/core/dbus-automount.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_automount_vtable[];
+
+int bus_automount_set_property(Unit *u, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
new file mode 100644
index 0000000..53890bc
--- /dev/null
+++ b/src/core/dbus-cgroup.c
@@ -0,0 +1,1415 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <stdio_ext.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "cgroup.h"
+#include "dbus-cgroup.h"
+#include "dbus-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "path-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy);
+
+static int property_get_cgroup_mask(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupMask *mask = userdata;
+ CGroupController ctrl;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (ctrl = 0; ctrl < _CGROUP_CONTROLLER_MAX; ctrl++) {
+ if ((*mask & CGROUP_CONTROLLER_TO_MASK(ctrl)) == 0)
+ continue;
+
+ r = sd_bus_message_append(reply, "s", cgroup_controller_to_string(ctrl));
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_delegate_controllers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (!c->delegate)
+ return sd_bus_message_append(reply, "as", 0);
+
+ return property_get_cgroup_mask(bus, path, interface, property, reply, &c->delegate_controllers, error);
+}
+
+static int property_get_io_device_weight(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupIODeviceWeight *w;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_weights, w, c->io_device_weights) {
+ r = sd_bus_message_append(reply, "(st)", w->path, w->weight);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_io_device_limits(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupIODeviceLimit *l;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_limits, l, c->io_device_limits) {
+ CGroupIOLimitType type;
+
+ type = cgroup_io_limit_type_from_string(property);
+ if (type < 0 || l->limits[type] == cgroup_io_limit_defaults[type])
+ continue;
+
+ r = sd_bus_message_append(reply, "(st)", l->path, l->limits[type]);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_io_device_latency(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupIODeviceLatency *l;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_latencies, l, c->io_device_latencies) {
+ r = sd_bus_message_append(reply, "(st)", l->path, l->target_usec);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_blockio_device_weight(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupBlockIODeviceWeight *w;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
+ r = sd_bus_message_append(reply, "(st)", w->path, w->weight);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_blockio_device_bandwidths(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupBlockIODeviceBandwidth *b;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+ uint64_t v;
+
+ if (streq(property, "BlockIOReadBandwidth"))
+ v = b->rbps;
+ else
+ v = b->wbps;
+
+ if (v == CGROUP_LIMIT_MAX)
+ continue;
+
+ r = sd_bus_message_append(reply, "(st)", b->path, v);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_device_allow(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupDeviceAllow *a;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_allow, a, c->device_allow) {
+ unsigned k = 0;
+ char rwm[4];
+
+ if (a->r)
+ rwm[k++] = 'r';
+ if (a->w)
+ rwm[k++] = 'w';
+ if (a->m)
+ rwm[k++] = 'm';
+
+ rwm[k] = 0;
+
+ r = sd_bus_message_append(reply, "(ss)", a->path, rwm);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_ip_address_access(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ IPAddressAccessItem** items = userdata, *i;
+ int r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iayu)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(items, i, *items) {
+
+ r = sd_bus_message_open_container(reply, 'r', "iayu");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "i", i->family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &i->address, FAMILY_ADDRESS_SIZE(i->family));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "u", (uint32_t) i->prefixlen);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable bus_cgroup_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
+ SD_BUS_PROPERTY("DelegateControllers", "as", property_get_delegate_controllers, 0, 0),
+ SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
+ SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0),
+ SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0),
+ SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
+ SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
+ SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
+ SD_BUS_PROPERTY("IOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, io_accounting), 0),
+ SD_BUS_PROPERTY("IOWeight", "t", NULL, offsetof(CGroupContext, io_weight), 0),
+ SD_BUS_PROPERTY("StartupIOWeight", "t", NULL, offsetof(CGroupContext, startup_io_weight), 0),
+ SD_BUS_PROPERTY("IODeviceWeight", "a(st)", property_get_io_device_weight, 0, 0),
+ SD_BUS_PROPERTY("IOReadBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0),
+ SD_BUS_PROPERTY("IOWriteBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0),
+ SD_BUS_PROPERTY("IOReadIOPSMax", "a(st)", property_get_io_device_limits, 0, 0),
+ SD_BUS_PROPERTY("IOWriteIOPSMax", "a(st)", property_get_io_device_limits, 0, 0),
+ SD_BUS_PROPERTY("IODeviceLatencyTargetUSec", "a(st)", property_get_io_device_latency, 0, 0),
+ SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0),
+ SD_BUS_PROPERTY("BlockIOWeight", "t", NULL, offsetof(CGroupContext, blockio_weight), 0),
+ SD_BUS_PROPERTY("StartupBlockIOWeight", "t", NULL, offsetof(CGroupContext, startup_blockio_weight), 0),
+ SD_BUS_PROPERTY("BlockIODeviceWeight", "a(st)", property_get_blockio_device_weight, 0, 0),
+ SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
+ SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
+ SD_BUS_PROPERTY("MemoryAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, memory_accounting), 0),
+ SD_BUS_PROPERTY("MemoryMin", "t", NULL, offsetof(CGroupContext, memory_min), 0),
+ SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0),
+ SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0),
+ SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0),
+ SD_BUS_PROPERTY("MemorySwapMax", "t", NULL, offsetof(CGroupContext, memory_swap_max), 0),
+ SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0),
+ SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
+ SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
+ SD_BUS_PROPERTY("TasksAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, tasks_accounting), 0),
+ SD_BUS_PROPERTY("TasksMax", "t", NULL, offsetof(CGroupContext, tasks_max), 0),
+ SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
+ SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
+ SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
+ SD_BUS_PROPERTY("DisableControllers", "as", property_get_cgroup_mask, offsetof(CGroupContext, disable_controllers), 0),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_cgroup_set_transient_property(
+ Unit *u,
+ CGroupContext *c,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(u);
+ assert(c);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "Delegate")) {
+ int b;
+
+ if (!UNIT_VTABLE(u)->can_delegate)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type");
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->delegate = b;
+ c->delegate_controllers = b ? _CGROUP_MASK_ALL : 0;
+
+ unit_write_settingf(u, flags, name, "Delegate=%s", yes_no(b));
+ }
+
+ return 1;
+
+ } else if (streq(name, "DelegateControllers")) {
+ CGroupMask mask = 0;
+
+ if (!UNIT_VTABLE(u)->can_delegate)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type");
+
+ r = sd_bus_message_enter_container(message, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ CGroupController cc;
+ const char *t;
+
+ r = sd_bus_message_read(message, "s", &t);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ cc = cgroup_controller_from_string(t);
+ if (cc < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown cgroup controller '%s'", t);
+
+ mask |= CGROUP_CONTROLLER_TO_MASK(cc);
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *t = NULL;
+
+ r = cg_mask_to_string(mask, &t);
+ if (r < 0)
+ return r;
+
+ c->delegate = true;
+ if (mask == 0)
+ c->delegate_controllers = 0;
+ else
+ c->delegate_controllers |= mask;
+
+ unit_write_settingf(u, flags, name, "Delegate=%s", strempty(t));
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_cgroup_set_boolean(
+ Unit *u,
+ const char *name,
+ bool *p,
+ CGroupMask mask,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int b, r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = b;
+ unit_invalidate_cgroup(u, mask);
+ unit_write_settingf(u, flags, name, "%s=%s", name, yes_no(b));
+ }
+
+ return 1;
+}
+
+#define BUS_DEFINE_SET_CGROUP_WEIGHT(function, mask, check, val) \
+ static int bus_cgroup_set_##function( \
+ Unit *u, \
+ const char *name, \
+ uint64_t *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ uint64_t v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, "t", &v); \
+ if (r < 0) \
+ return r; \
+ \
+ if (!check(v)) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Value specified in %s is out of range", name); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = v; \
+ unit_invalidate_cgroup(u, (mask)); \
+ \
+ if (v == (val)) \
+ unit_write_settingf(u, flags, name, \
+ "%s=", name); \
+ else \
+ unit_write_settingf(u, flags, name, \
+ "%s=%" PRIu64, name, v); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_CGROUP_LIMIT(function, mask, scale, minimum) \
+ static int bus_cgroup_set_##function( \
+ Unit *u, \
+ const char *name, \
+ uint64_t *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ uint64_t v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, "t", &v); \
+ if (r < 0) \
+ return r; \
+ \
+ if (v < minimum) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Value specified in %s is out of range", name); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = v; \
+ unit_invalidate_cgroup(u, (mask)); \
+ \
+ if (v == CGROUP_LIMIT_MAX) \
+ unit_write_settingf(u, flags, name, \
+ "%s=infinity", name); \
+ else \
+ unit_write_settingf(u, flags, name, \
+ "%s=%" PRIu64, name, v); \
+ } \
+ \
+ return 1; \
+ } \
+ static int bus_cgroup_set_##function##_scale( \
+ Unit *u, \
+ const char *name, \
+ uint64_t *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ uint64_t v; \
+ uint32_t raw; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, "u", &raw); \
+ if (r < 0) \
+ return r; \
+ \
+ v = scale(raw, UINT32_MAX); \
+ if (v < minimum || v >= UINT64_MAX) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Value specified in %s is out of range", name); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ const char *e; \
+ \
+ *p = v; \
+ unit_invalidate_cgroup(u, (mask)); \
+ \
+ /* Chop off suffix */ \
+ assert_se(e = endswith(name, "Scale")); \
+ name = strndupa(name, e - name); \
+ \
+ unit_write_settingf(u, flags, name, "%s=%" PRIu32 "%%", name, \
+ (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX))); \
+ } \
+ \
+ return 1; \
+ }
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wtype-limits"
+BUS_DEFINE_SET_CGROUP_WEIGHT(cpu_weight, CGROUP_MASK_CPU, CGROUP_WEIGHT_IS_OK, CGROUP_WEIGHT_INVALID);
+BUS_DEFINE_SET_CGROUP_WEIGHT(cpu_shares, CGROUP_MASK_CPU, CGROUP_CPU_SHARES_IS_OK, CGROUP_CPU_SHARES_INVALID);
+BUS_DEFINE_SET_CGROUP_WEIGHT(io_weight, CGROUP_MASK_IO, CGROUP_WEIGHT_IS_OK, CGROUP_WEIGHT_INVALID);
+BUS_DEFINE_SET_CGROUP_WEIGHT(blockio_weight, CGROUP_MASK_BLKIO, CGROUP_BLKIO_WEIGHT_IS_OK, CGROUP_BLKIO_WEIGHT_INVALID);
+BUS_DEFINE_SET_CGROUP_LIMIT(memory, CGROUP_MASK_MEMORY, physical_memory_scale, 1);
+BUS_DEFINE_SET_CGROUP_LIMIT(swap, CGROUP_MASK_MEMORY, physical_memory_scale, 0);
+BUS_DEFINE_SET_CGROUP_LIMIT(tasks_max, CGROUP_MASK_PIDS, system_tasks_max_scale, 1);
+#pragma GCC diagnostic pop
+
+int bus_cgroup_set_property(
+ Unit *u,
+ CGroupContext *c,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ CGroupIOLimitType iol_type;
+ int r;
+
+ assert(u);
+ assert(c);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "CPUAccounting"))
+ return bus_cgroup_set_boolean(u, name, &c->cpu_accounting, get_cpu_accounting_mask(), message, flags, error);
+
+ if (streq(name, "CPUWeight"))
+ return bus_cgroup_set_cpu_weight(u, name, &c->cpu_weight, message, flags, error);
+
+ if (streq(name, "StartupCPUWeight"))
+ return bus_cgroup_set_cpu_weight(u, name, &c->startup_cpu_weight, message, flags, error);
+
+ if (streq(name, "CPUShares"))
+ return bus_cgroup_set_cpu_shares(u, name, &c->cpu_shares, message, flags, error);
+
+ if (streq(name, "StartupCPUShares"))
+ return bus_cgroup_set_cpu_shares(u, name, &c->startup_cpu_shares, message, flags, error);
+
+ if (streq(name, "IOAccounting"))
+ return bus_cgroup_set_boolean(u, name, &c->io_accounting, CGROUP_MASK_IO, message, flags, error);
+
+ if (streq(name, "IOWeight"))
+ return bus_cgroup_set_io_weight(u, name, &c->io_weight, message, flags, error);
+
+ if (streq(name, "StartupIOWeight"))
+ return bus_cgroup_set_io_weight(u, name, &c->startup_io_weight, message, flags, error);
+
+ if (streq(name, "BlockIOAccounting"))
+ return bus_cgroup_set_boolean(u, name, &c->blockio_accounting, CGROUP_MASK_BLKIO, message, flags, error);
+
+ if (streq(name, "BlockIOWeight"))
+ return bus_cgroup_set_blockio_weight(u, name, &c->blockio_weight, message, flags, error);
+
+ if (streq(name, "StartupBlockIOWeight"))
+ return bus_cgroup_set_blockio_weight(u, name, &c->startup_blockio_weight, message, flags, error);
+
+ if (streq(name, "MemoryAccounting"))
+ return bus_cgroup_set_boolean(u, name, &c->memory_accounting, CGROUP_MASK_MEMORY, message, flags, error);
+
+ if (streq(name, "MemoryMin"))
+ return bus_cgroup_set_memory(u, name, &c->memory_min, message, flags, error);
+
+ if (streq(name, "MemoryLow"))
+ return bus_cgroup_set_memory(u, name, &c->memory_low, message, flags, error);
+
+ if (streq(name, "MemoryHigh"))
+ return bus_cgroup_set_memory(u, name, &c->memory_high, message, flags, error);
+
+ if (streq(name, "MemorySwapMax"))
+ return bus_cgroup_set_swap(u, name, &c->memory_swap_max, message, flags, error);
+
+ if (streq(name, "MemoryMax"))
+ return bus_cgroup_set_memory(u, name, &c->memory_max, message, flags, error);
+
+ if (streq(name, "MemoryLimit"))
+ return bus_cgroup_set_memory(u, name, &c->memory_limit, message, flags, error);
+
+ if (streq(name, "MemoryMinScale"))
+ return bus_cgroup_set_memory_scale(u, name, &c->memory_min, message, flags, error);
+
+ if (streq(name, "MemoryLowScale"))
+ return bus_cgroup_set_memory_scale(u, name, &c->memory_low, message, flags, error);
+
+ if (streq(name, "MemoryHighScale"))
+ return bus_cgroup_set_memory_scale(u, name, &c->memory_high, message, flags, error);
+
+ if (streq(name, "MemorySwapMaxScale"))
+ return bus_cgroup_set_swap_scale(u, name, &c->memory_swap_max, message, flags, error);
+
+ if (streq(name, "MemoryMaxScale"))
+ return bus_cgroup_set_memory_scale(u, name, &c->memory_max, message, flags, error);
+
+ if (streq(name, "MemoryLimitScale"))
+ return bus_cgroup_set_memory_scale(u, name, &c->memory_limit, message, flags, error);
+
+ if (streq(name, "TasksAccounting"))
+ return bus_cgroup_set_boolean(u, name, &c->tasks_accounting, CGROUP_MASK_PIDS, message, flags, error);
+
+ if (streq(name, "TasksMax"))
+ return bus_cgroup_set_tasks_max(u, name, &c->tasks_max, message, flags, error);
+
+ if (streq(name, "TasksMaxScale"))
+ return bus_cgroup_set_tasks_max_scale(u, name, &c->tasks_max, message, flags, error);
+
+ if (streq(name, "CPUQuotaPerSecUSec")) {
+ uint64_t u64;
+
+ r = sd_bus_message_read(message, "t", &u64);
+ if (r < 0)
+ return r;
+
+ if (u64 <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "CPUQuotaPerSecUSec= value out of range");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->cpu_quota_per_sec_usec = u64;
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+ if (c->cpu_quota_per_sec_usec == USEC_INFINITY)
+ unit_write_setting(u, flags, "CPUQuota", "CPUQuota=");
+ else
+ /* config_parse_cpu_quota() requires an integer, so truncating division is used on
+ * purpose here. */
+ unit_write_settingf(u, flags, "CPUQuota",
+ "CPUQuota=%0.f%%",
+ (double) (c->cpu_quota_per_sec_usec / 10000));
+ }
+
+ return 1;
+
+ } else if ((iol_type = cgroup_io_limit_type_from_string(name)) >= 0) {
+ const char *path;
+ unsigned n = 0;
+ uint64_t u64;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &u64)) > 0) {
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupIODeviceLimit *a = NULL, *b;
+
+ LIST_FOREACH(device_limits, b, c->io_device_limits) {
+ if (path_equal(path, b->path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ CGroupIOLimitType type;
+
+ a = new0(CGroupIODeviceLimit, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+
+ for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
+ a->limits[type] = cgroup_io_limit_defaults[type];
+
+ LIST_PREPEND(device_limits, c->io_device_limits, a);
+ }
+
+ a->limits[iol_type] = u64;
+ }
+
+ n++;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupIODeviceLimit *a;
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t size = 0;
+
+ if (n == 0) {
+ LIST_FOREACH(device_limits, a, c->io_device_limits)
+ a->limits[iol_type] = cgroup_io_limit_defaults[iol_type];
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_IO);
+
+ f = open_memstream(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fprintf(f, "%s=\n", name);
+ LIST_FOREACH(device_limits, a, c->io_device_limits)
+ if (a->limits[iol_type] != cgroup_io_limit_defaults[iol_type])
+ fprintf(f, "%s=%s %" PRIu64 "\n", name, a->path, a->limits[iol_type]);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
+ } else if (streq(name, "IODeviceWeight")) {
+ const char *path;
+ uint64_t weight;
+ unsigned n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &weight)) > 0) {
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path);
+
+ if (!CGROUP_WEIGHT_IS_OK(weight) || weight == CGROUP_WEIGHT_INVALID)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "IODeviceWeight= value out of range");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupIODeviceWeight *a = NULL, *b;
+
+ LIST_FOREACH(device_weights, b, c->io_device_weights) {
+ if (path_equal(b->path, path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupIODeviceWeight, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+ LIST_PREPEND(device_weights, c->io_device_weights, a);
+ }
+
+ a->weight = weight;
+ }
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ CGroupIODeviceWeight *a;
+ size_t size = 0;
+
+ if (n == 0) {
+ while (c->io_device_weights)
+ cgroup_context_free_io_device_weight(c, c->io_device_weights);
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_IO);
+
+ f = open_memstream(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fputs("IODeviceWeight=\n", f);
+ LIST_FOREACH(device_weights, a, c->io_device_weights)
+ fprintf(f, "IODeviceWeight=%s %" PRIu64 "\n", a->path, a->weight);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
+ } else if (streq(name, "IODeviceLatencyTargetUSec")) {
+ const char *path;
+ uint64_t target;
+ unsigned n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &target)) > 0) {
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupIODeviceLatency *a = NULL, *b;
+
+ LIST_FOREACH(device_latencies, b, c->io_device_latencies) {
+ if (path_equal(b->path, path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupIODeviceLatency, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+ LIST_PREPEND(device_latencies, c->io_device_latencies, a);
+ }
+
+ a->target_usec = target;
+ }
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ char ts[FORMAT_TIMESPAN_MAX];
+ CGroupIODeviceLatency *a;
+ size_t size = 0;
+
+ if (n == 0) {
+ while (c->io_device_latencies)
+ cgroup_context_free_io_device_latency(c, c->io_device_latencies);
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_IO);
+
+ f = open_memstream(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fputs("IODeviceLatencyTargetSec=\n", f);
+ LIST_FOREACH(device_latencies, a, c->io_device_latencies)
+ fprintf(f, "IODeviceLatencyTargetSec=%s %s\n",
+ a->path, format_timespan(ts, sizeof(ts), a->target_usec, 1));
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) {
+ const char *path;
+ bool read = true;
+ unsigned n = 0;
+ uint64_t u64;
+
+ if (streq(name, "BlockIOWriteBandwidth"))
+ read = false;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &u64)) > 0) {
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupBlockIODeviceBandwidth *a = NULL, *b;
+
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+ if (path_equal(path, b->path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupBlockIODeviceBandwidth, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->rbps = CGROUP_LIMIT_MAX;
+ a->wbps = CGROUP_LIMIT_MAX;
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+
+ LIST_PREPEND(device_bandwidths, c->blockio_device_bandwidths, a);
+ }
+
+ if (read)
+ a->rbps = u64;
+ else
+ a->wbps = u64;
+ }
+
+ n++;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupBlockIODeviceBandwidth *a;
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t size = 0;
+
+ if (n == 0) {
+ LIST_FOREACH(device_bandwidths, a, c->blockio_device_bandwidths) {
+ if (read)
+ a->rbps = CGROUP_LIMIT_MAX;
+ else
+ a->wbps = CGROUP_LIMIT_MAX;
+ }
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO);
+
+ f = open_memstream(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ if (read) {
+ fputs("BlockIOReadBandwidth=\n", f);
+ LIST_FOREACH(device_bandwidths, a, c->blockio_device_bandwidths)
+ if (a->rbps != CGROUP_LIMIT_MAX)
+ fprintf(f, "BlockIOReadBandwidth=%s %" PRIu64 "\n", a->path, a->rbps);
+ } else {
+ fputs("BlockIOWriteBandwidth=\n", f);
+ LIST_FOREACH(device_bandwidths, a, c->blockio_device_bandwidths)
+ if (a->wbps != CGROUP_LIMIT_MAX)
+ fprintf(f, "BlockIOWriteBandwidth=%s %" PRIu64 "\n", a->path, a->wbps);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
+ } else if (streq(name, "BlockIODeviceWeight")) {
+ const char *path;
+ uint64_t weight;
+ unsigned n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &weight)) > 0) {
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path);
+
+ if (!CGROUP_BLKIO_WEIGHT_IS_OK(weight) || weight == CGROUP_BLKIO_WEIGHT_INVALID)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "BlockIODeviceWeight= out of range");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupBlockIODeviceWeight *a = NULL, *b;
+
+ LIST_FOREACH(device_weights, b, c->blockio_device_weights) {
+ if (path_equal(b->path, path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupBlockIODeviceWeight, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+ LIST_PREPEND(device_weights, c->blockio_device_weights, a);
+ }
+
+ a->weight = weight;
+ }
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ CGroupBlockIODeviceWeight *a;
+ size_t size = 0;
+
+ if (n == 0) {
+ while (c->blockio_device_weights)
+ cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO);
+
+ f = open_memstream(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fputs("BlockIODeviceWeight=\n", f);
+ LIST_FOREACH(device_weights, a, c->blockio_device_weights)
+ fprintf(f, "BlockIODeviceWeight=%s %" PRIu64 "\n", a->path, a->weight);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
+ } else if (streq(name, "DevicePolicy")) {
+ const char *policy;
+ CGroupDevicePolicy p;
+
+ r = sd_bus_message_read(message, "s", &policy);
+ if (r < 0)
+ return r;
+
+ p = cgroup_device_policy_from_string(policy);
+ if (p < 0)
+ return -EINVAL;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->device_policy = p;
+ unit_invalidate_cgroup(u, CGROUP_MASK_DEVICES);
+ unit_write_settingf(u, flags, name, "DevicePolicy=%s", policy);
+ }
+
+ return 1;
+
+ } else if (streq(name, "DeviceAllow")) {
+ const char *path, *rwm;
+ unsigned n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ss)", &path, &rwm)) > 0) {
+
+ if (!valid_device_allow_pattern(path) || strpbrk(path, WHITESPACE))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "DeviceAllow= requires device node or pattern");
+
+ if (isempty(rwm))
+ rwm = "rwm";
+ else if (!in_charset(rwm, "rwm"))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "DeviceAllow= requires combination of rwm flags");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupDeviceAllow *a = NULL, *b;
+
+ LIST_FOREACH(device_allow, b, c->device_allow) {
+ if (path_equal(b->path, path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupDeviceAllow, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+
+ LIST_PREPEND(device_allow, c->device_allow, a);
+ }
+
+ a->r = !!strchr(rwm, 'r');
+ a->w = !!strchr(rwm, 'w');
+ a->m = !!strchr(rwm, 'm');
+ }
+
+ n++;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ CGroupDeviceAllow *a;
+ size_t size = 0;
+
+ if (n == 0) {
+ while (c->device_allow)
+ cgroup_context_free_device_allow(c, c->device_allow);
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_DEVICES);
+
+ f = open_memstream(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fputs("DeviceAllow=\n", f);
+ LIST_FOREACH(device_allow, a, c->device_allow)
+ fprintf(f, "DeviceAllow=%s %s%s%s\n", a->path, a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
+ } else if (streq(name, "IPAccounting")) {
+ int b;
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->ip_accounting = b;
+
+ unit_invalidate_cgroup_bpf(u);
+ unit_write_settingf(u, flags, name, "IPAccounting=%s", yes_no(b));
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "IPAddressAllow", "IPAddressDeny")) {
+ IPAddressAccessItem **list;
+ size_t n = 0;
+
+ list = streq(name, "IPAddressAllow") ? &c->ip_address_allow : &c->ip_address_deny;
+
+ r = sd_bus_message_enter_container(message, 'a', "(iayu)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const void *ap;
+ int32_t family;
+ uint32_t prefixlen;
+ size_t an;
+
+ r = sd_bus_message_enter_container(message, 'r', "iayu");
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(message, "i", &family);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= expects IPv4 or IPv6 addresses only.", name);
+
+ r = sd_bus_message_read_array(message, 'y', &ap, &an);
+ if (r < 0)
+ return r;
+
+ if (an != FAMILY_ADDRESS_SIZE(family))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "IP address has wrong size for family (%s, expected %zu, got %zu)",
+ af_to_name(family), FAMILY_ADDRESS_SIZE(family), an);
+
+ r = sd_bus_message_read(message, "u", &prefixlen);
+ if (r < 0)
+ return r;
+
+ if (prefixlen > FAMILY_ADDRESS_SIZE(family)*8)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Prefix length %" PRIu32 " too large for address family %s.", prefixlen, af_to_name(family));
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ IPAddressAccessItem *item;
+
+ item = new0(IPAddressAccessItem, 1);
+ if (!item)
+ return -ENOMEM;
+
+ item->family = family;
+ item->prefixlen = prefixlen;
+ memcpy(&item->address, ap, an);
+
+ LIST_PREPEND(items, *list, item);
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ *list = ip_address_access_reduce(*list);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ IPAddressAccessItem *item;
+ size_t size = 0;
+
+ if (n == 0)
+ *list = ip_address_access_free_all(*list);
+
+ unit_invalidate_cgroup_bpf(u);
+ f = open_memstream(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fputs(name, f);
+ fputs("=\n", f);
+
+ LIST_FOREACH(items, item, *list) {
+ char buffer[CONST_MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
+
+ errno = 0;
+ if (!inet_ntop(item->family, &item->address, buffer, sizeof(buffer)))
+ return errno > 0 ? -errno : -EINVAL;
+
+ fprintf(f, "%s=%s/%u\n", name, buffer, item->prefixlen);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ unit_write_setting(u, flags, name, buf);
+
+ if (*list) {
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r == BPF_FIREWALL_UNSUPPORTED) {
+ static bool warned = false;
+
+ log_full(warned ? LOG_DEBUG : LOG_WARNING,
+ "Transient unit %s configures an IP firewall, but the local system does not support BPF/cgroup firewalling.\n"
+ "Proceeding WITHOUT firewalling in effect! (This warning is only shown for the first started transient unit using IP firewalling.)", u->id);
+
+ warned = true;
+ }
+ }
+ }
+
+ return 1;
+ }
+
+ if (u->transient && u->load_state == UNIT_STUB)
+ return bus_cgroup_set_transient_property(u, c, name, message, flags, error);
+
+ return 0;
+}
diff --git a/src/core/dbus-cgroup.h b/src/core/dbus-cgroup.h
new file mode 100644
index 0000000..188baa9
--- /dev/null
+++ b/src/core/dbus-cgroup.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+#include "cgroup.h"
+
+extern const sd_bus_vtable bus_cgroup_vtable[];
+
+int bus_cgroup_set_property(Unit *u, CGroupContext *c, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-device.c b/src/core/dbus-device.c
new file mode 100644
index 0000000..6cf7f58
--- /dev/null
+++ b/src/core/dbus-device.c
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "dbus-device.h"
+#include "device.h"
+#include "unit.h"
+
+const sd_bus_vtable bus_device_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("SysFSPath", "s", NULL, offsetof(Device, sysfs), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_VTABLE_END
+};
diff --git a/src/core/dbus-device.h b/src/core/dbus-device.h
new file mode 100644
index 0000000..077a2bf
--- /dev/null
+++ b/src/core/dbus-device.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus-vtable.h"
+
+extern const sd_bus_vtable bus_device_vtable[];
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
new file mode 100644
index 0000000..11301e4
--- /dev/null
+++ b/src/core/dbus-execute.c
@@ -0,0 +1,2423 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <stdio_ext.h>
+
+#if HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "cpu-set-util.h"
+#include "dbus-execute.h"
+#include "dbus-util.h"
+#include "env-util.h"
+#include "errno-list.h"
+#include "escape.h"
+#include "execute.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "io-util.h"
+#include "ioprio.h"
+#include "journal-util.h"
+#include "missing.h"
+#include "mountpoint-util.h"
+#include "namespace.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "securebits-util.h"
+#include "specifier.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "unit-printf.h"
+#include "user-util.h"
+#include "utf8.h"
+
+BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutput);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_input, exec_input, ExecInput);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_preserve_mode, exec_preserve_mode, ExecPreserveMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_keyring_mode, exec_keyring_mode, ExecKeyringMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_home, protect_home, ProtectHome);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_system, protect_system, ProtectSystem);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_personality, personality, unsigned long);
+static BUS_DEFINE_PROPERTY_GET(property_get_ioprio, "i", ExecContext, exec_context_get_effective_ioprio);
+static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_class, "i", ExecContext, exec_context_get_effective_ioprio, IOPRIO_PRIO_CLASS);
+static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_priority, "i", ExecContext, exec_context_get_effective_ioprio, IOPRIO_PRIO_DATA);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL);
+static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_level, "i", int, LOG_PRI);
+static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_facility, "i", int, LOG_FAC);
+
+static int property_get_environment_files(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ char **j;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(sb)");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(j, c->environment_files) {
+ const char *fn = *j;
+
+ r = sd_bus_message_append(reply, "(sb)", fn[0] == '-' ? fn + 1 : fn, fn[0] == '-');
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_oom_score_adjust(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ int32_t n;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->oom_score_adjust_set)
+ n = c->oom_score_adjust;
+ else {
+ _cleanup_free_ char *t = NULL;
+
+ n = 0;
+ if (read_one_line_file("/proc/self/oom_score_adj", &t) >= 0)
+ safe_atoi32(t, &n);
+ }
+
+ return sd_bus_message_append(reply, "i", n);
+}
+
+static int property_get_nice(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ int32_t n;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->nice_set)
+ n = c->nice;
+ else {
+ errno = 0;
+ n = getpriority(PRIO_PROCESS, 0);
+ if (errno > 0)
+ n = 0;
+ }
+
+ return sd_bus_message_append(reply, "i", n);
+}
+
+static int property_get_cpu_sched_policy(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ int32_t n;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->cpu_sched_set)
+ n = c->cpu_sched_policy;
+ else {
+ n = sched_getscheduler(0);
+ if (n < 0)
+ n = SCHED_OTHER;
+ }
+
+ return sd_bus_message_append(reply, "i", n);
+}
+
+static int property_get_cpu_sched_priority(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ int32_t n;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->cpu_sched_set)
+ n = c->cpu_sched_priority;
+ else {
+ struct sched_param p = {};
+
+ if (sched_getparam(0, &p) >= 0)
+ n = p.sched_priority;
+ else
+ n = 0;
+ }
+
+ return sd_bus_message_append(reply, "i", n);
+}
+
+static int property_get_cpu_affinity(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ return sd_bus_message_append_array(reply, 'y', c->cpuset, CPU_ALLOC_SIZE(c->cpuset_ncpus));
+}
+
+static int property_get_timer_slack_nsec(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ uint64_t u;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->timer_slack_nsec != NSEC_INFINITY)
+ u = (uint64_t) c->timer_slack_nsec;
+ else
+ u = (uint64_t) prctl(PR_GET_TIMERSLACK);
+
+ return sd_bus_message_append(reply, "t", u);
+}
+
+static int property_get_syscall_filter(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+#if HAVE_SECCOMP
+ Iterator i;
+ void *id, *val;
+#endif
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "b", c->syscall_whitelist);
+ if (r < 0)
+ return r;
+
+#if HAVE_SECCOMP
+ HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, i) {
+ _cleanup_free_ char *name = NULL;
+ const char *e = NULL;
+ char *s;
+ int num = PTR_TO_INT(val);
+
+ name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
+ if (!name)
+ continue;
+
+ if (num >= 0) {
+ e = errno_to_name(num);
+ if (e) {
+ s = strjoin(name, ":", e);
+ if (!s)
+ return -ENOMEM;
+ } else {
+ r = asprintf(&s, "%s:%d", name, num);
+ if (r < 0)
+ return -ENOMEM;
+ }
+ } else
+ s = TAKE_PTR(name);
+
+ r = strv_consume(&l, s);
+ if (r < 0)
+ return r;
+ }
+#endif
+
+ strv_sort(l);
+
+ r = sd_bus_message_append_strv(reply, l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_syscall_archs(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+#if HAVE_SECCOMP
+ Iterator i;
+ void *id;
+#endif
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+#if HAVE_SECCOMP
+ SET_FOREACH(id, c->syscall_archs, i) {
+ const char *name;
+
+ name = seccomp_arch_to_string(PTR_TO_UINT32(id) - 1);
+ if (!name)
+ continue;
+
+ r = strv_extend(&l, name);
+ if (r < 0)
+ return -ENOMEM;
+ }
+#endif
+
+ strv_sort(l);
+
+ r = sd_bus_message_append_strv(reply, l);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int property_get_selinux_context(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ return sd_bus_message_append(reply, "(bs)", c->selinux_context_ignore, c->selinux_context);
+}
+
+static int property_get_apparmor_profile(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ return sd_bus_message_append(reply, "(bs)", c->apparmor_profile_ignore, c->apparmor_profile);
+}
+
+static int property_get_smack_process_label(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ return sd_bus_message_append(reply, "(bs)", c->smack_process_label_ignore, c->smack_process_label);
+}
+
+static int property_get_address_families(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ _cleanup_strv_free_ char **l = NULL;
+ Iterator i;
+ void *af;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "b", c->address_families_whitelist);
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(af, c->address_families, i) {
+ const char *name;
+
+ name = af_to_name(PTR_TO_INT(af));
+ if (!name)
+ continue;
+
+ r = strv_extend(&l, name);
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ strv_sort(l);
+
+ r = sd_bus_message_append_strv(reply, l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_working_directory(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ const char *wd;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->working_directory_home)
+ wd = "~";
+ else
+ wd = c->working_directory;
+
+ if (c->working_directory_missing_ok)
+ wd = strjoina("!", wd);
+
+ return sd_bus_message_append(reply, "s", wd);
+}
+
+static int property_get_stdio_fdname(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ int fileno;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ if (streq(property, "StandardInputFileDescriptorName"))
+ fileno = STDIN_FILENO;
+ else if (streq(property, "StandardOutputFileDescriptorName"))
+ fileno = STDOUT_FILENO;
+ else {
+ assert(streq(property, "StandardErrorFileDescriptorName"));
+ fileno = STDERR_FILENO;
+ }
+
+ return sd_bus_message_append(reply, "s", exec_context_fdname(c, fileno));
+}
+
+static int property_get_input_data(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ return sd_bus_message_append_array(reply, 'y', c->stdin_data, c->stdin_data_size);
+}
+
+static int property_get_bind_paths(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ unsigned i;
+ bool ro;
+ int r;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ ro = strstr(property, "ReadOnly");
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssbt)");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < c->n_bind_mounts; i++) {
+
+ if (ro != c->bind_mounts[i].read_only)
+ continue;
+
+ r = sd_bus_message_append(
+ reply, "(ssbt)",
+ c->bind_mounts[i].source,
+ c->bind_mounts[i].destination,
+ c->bind_mounts[i].ignore_enoent,
+ c->bind_mounts[i].recursive ? (uint64_t) MS_REC : (uint64_t) 0);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_temporary_filesystems(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ unsigned i;
+ int r;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < c->n_temporary_filesystems; i++) {
+ TemporaryFileSystem *t = c->temporary_filesystems + i;
+
+ r = sd_bus_message_append(
+ reply, "(ss)",
+ t->path,
+ t->options);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_log_extra_fields(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ size_t i;
+ int r;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "ay");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < c->n_log_extra_fields; i++) {
+ r = sd_bus_message_append_array(reply, 'y', c->log_extra_fields[i].iov_base, c->log_extra_fields[i].iov_len);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable bus_exec_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("EnvironmentFiles", "a(sb)", property_get_environment_files, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PassEnvironment", "as", NULL, offsetof(ExecContext, pass_environment), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UnsetEnvironment", "as", NULL, offsetof(ExecContext, unset_environment), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UMask", "u", bus_property_get_mode, offsetof(ExecContext, umask), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitCPU", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_CPU]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitCPUSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_CPU]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitFSIZE", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_FSIZE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitFSIZESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_FSIZE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitDATA", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_DATA]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitDATASoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_DATA]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitSTACK", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_STACK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitSTACKSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_STACK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitCORE", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_CORE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitCORESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_CORE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitRSS", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RSS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitRSSSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RSS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitNOFILE", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_NOFILE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitNOFILESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_NOFILE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitAS", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_AS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitASSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_AS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitNPROC", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_NPROC]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitNPROCSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_NPROC]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitMEMLOCK", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_MEMLOCK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitMEMLOCKSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_MEMLOCK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitLOCKS", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_LOCKS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitLOCKSSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_LOCKS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitSIGPENDING", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_SIGPENDING]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitSIGPENDINGSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_SIGPENDING]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitMSGQUEUE", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_MSGQUEUE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitMSGQUEUESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_MSGQUEUE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitNICE", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_NICE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitNICESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_NICE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitRTPRIO", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RTPRIO]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitRTPRIOSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RTPRIO]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitRTTIME", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("WorkingDirectory", "s", property_get_working_directory, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RootDirectory", "s", NULL, offsetof(ExecContext, root_directory), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RootImage", "s", NULL, offsetof(ExecContext, root_image), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("OOMScoreAdjust", "i", property_get_oom_score_adjust, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Nice", "i", property_get_nice, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IOSchedulingClass", "i", property_get_ioprio_class, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IOSchedulingPriority", "i", property_get_ioprio_priority, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CPUSchedulingPolicy", "i", property_get_cpu_sched_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CPUSchedulingPriority", "i", property_get_cpu_sched_priority, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CPUAffinity", "ay", property_get_cpu_affinity, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CPUSchedulingResetOnFork", "b", bus_property_get_bool, offsetof(ExecContext, cpu_sched_reset_on_fork), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NonBlocking", "b", bus_property_get_bool, offsetof(ExecContext, non_blocking), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardInput", "s", property_get_exec_input, offsetof(ExecContext, std_input), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardInputFileDescriptorName", "s", property_get_stdio_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardInputData", "ay", property_get_input_data, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardOutput", "s", bus_property_get_exec_output, offsetof(ExecContext, std_output), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardOutputFileDescriptorName", "s", property_get_stdio_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardError", "s", bus_property_get_exec_output, offsetof(ExecContext, std_error), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardErrorFileDescriptorName", "s", property_get_stdio_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TTYPath", "s", NULL, offsetof(ExecContext, tty_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TTYReset", "b", bus_property_get_bool, offsetof(ExecContext, tty_reset), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TTYVHangup", "b", bus_property_get_bool, offsetof(ExecContext, tty_vhangup), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TTYVTDisallocate", "b", bus_property_get_bool, offsetof(ExecContext, tty_vt_disallocate), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SyslogPriority", "i", bus_property_get_int, offsetof(ExecContext, syslog_priority), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SyslogIdentifier", "s", NULL, offsetof(ExecContext, syslog_identifier), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SyslogLevelPrefix", "b", bus_property_get_bool, offsetof(ExecContext, syslog_level_prefix), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SyslogLevel", "i", property_get_syslog_level, offsetof(ExecContext, syslog_priority), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SyslogFacility", "i", property_get_syslog_facility, offsetof(ExecContext, syslog_priority), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogLevelMax", "i", bus_property_get_int, offsetof(ExecContext, log_level_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogRateLimitIntervalUSec", "t", bus_property_get_usec, offsetof(ExecContext, log_rate_limit_interval_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogRateLimitBurst", "u", bus_property_get_unsigned, offsetof(ExecContext, log_rate_limit_burst), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogExtraFields", "aay", property_get_log_extra_fields, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SecureBits", "i", bus_property_get_int, offsetof(ExecContext, secure_bits), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CapabilityBoundingSet", "t", NULL, offsetof(ExecContext, capability_bounding_set), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("AmbientCapabilities", "t", NULL, offsetof(ExecContext, capability_ambient_set), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("User", "s", NULL, offsetof(ExecContext, user), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DynamicUser", "b", bus_property_get_bool, offsetof(ExecContext, dynamic_user), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemoveIPC", "b", bus_property_get_bool, offsetof(ExecContext, remove_ipc), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReadWritePaths", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReadOnlyPaths", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("InaccessiblePaths", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectKernelModules", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_modules), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateMounts", "b", bus_property_get_bool, offsetof(ExecContext, private_mounts), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectHome", "s", property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectSystem", "s", property_get_protect_system, offsetof(ExecContext, protect_system), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UtmpIdentifier", "s", NULL, offsetof(ExecContext, utmp_id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UtmpMode", "s", property_get_exec_utmp_mode, offsetof(ExecContext, utmp_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SELinuxContext", "(bs)", property_get_selinux_context, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("AppArmorProfile", "(bs)", property_get_apparmor_profile, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SmackProcessLabel", "(bs)", property_get_smack_process_label, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IgnoreSIGPIPE", "b", bus_property_get_bool, offsetof(ExecContext, ignore_sigpipe), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NoNewPrivileges", "b", bus_property_get_bool, offsetof(ExecContext, no_new_privileges), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SystemCallFilter", "(bas)", property_get_syscall_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SystemCallErrorNumber", "i", bus_property_get_int, offsetof(ExecContext, syscall_errno), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Personality", "s", property_get_personality, offsetof(ExecContext, personality), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LockPersonality", "b", bus_property_get_bool, offsetof(ExecContext, lock_personality), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimeDirectoryPreserve", "s", property_get_exec_preserve_mode, offsetof(ExecContext, runtime_directory_preserve_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME].mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimeDirectory", "as", NULL, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME].paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StateDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_STATE].mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StateDirectory", "as", NULL, offsetof(ExecContext, directories[EXEC_DIRECTORY_STATE].paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CacheDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_CACHE].mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CacheDirectory", "as", NULL, offsetof(ExecContext, directories[EXEC_DIRECTORY_CACHE].paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogsDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_LOGS].mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogsDirectory", "as", NULL, offsetof(ExecContext, directories[EXEC_DIRECTORY_LOGS].paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ConfigurationDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_CONFIGURATION].mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ConfigurationDirectory", "as", NULL, offsetof(ExecContext, directories[EXEC_DIRECTORY_CONFIGURATION].paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestrictNamespaces", "t", bus_property_get_ulong, offsetof(ExecContext, restrict_namespaces), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BindPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TemporaryFileSystem", "a(ss)", property_get_temporary_filesystems, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MountAPIVFS", "b", bus_property_get_bool, offsetof(ExecContext, mount_apivfs), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KeyringMode", "s", property_get_exec_keyring_mode, offsetof(ExecContext, keyring_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+
+ /* Obsolete/redundant properties: */
+ SD_BUS_PROPERTY("Capabilities", "s", property_get_empty_string, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("ReadOnlyDirectories", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("InaccessibleDirectories", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("IOScheduling", "i", property_get_ioprio, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+
+ SD_BUS_VTABLE_END
+};
+
+static int append_exec_command(sd_bus_message *reply, ExecCommand *c) {
+ int r;
+
+ assert(reply);
+ assert(c);
+
+ if (!c->path)
+ return 0;
+
+ r = sd_bus_message_open_container(reply, 'r', "sasbttttuii");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", c->path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(reply, c->argv);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "bttttuii",
+ !!(c->flags & EXEC_COMMAND_IGNORE_FAILURE),
+ c->exec_status.start_timestamp.realtime,
+ c->exec_status.start_timestamp.monotonic,
+ c->exec_status.exit_timestamp.realtime,
+ c->exec_status.exit_timestamp.monotonic,
+ (uint32_t) c->exec_status.pid,
+ (int32_t) c->exec_status.code,
+ (int32_t) c->exec_status.status);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+int bus_property_get_exec_command(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ ExecCommand *c = (ExecCommand*) userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "(sasbttttuii)");
+ if (r < 0)
+ return r;
+
+ r = append_exec_command(reply, c);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+int bus_property_get_exec_command_list(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ ExecCommand *c = *(ExecCommand**) userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "(sasbttttuii)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(command, c, c) {
+ r = append_exec_command(reply, c);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+int bus_set_transient_exec_command(
+ Unit *u,
+ const char *name,
+ ExecCommand **exec_command,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+ unsigned n = 0;
+ int r;
+
+ r = sd_bus_message_enter_container(message, 'a', "(sasb)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(message, 'r', "sasb")) > 0) {
+ _cleanup_strv_free_ char **argv = NULL;
+ const char *path;
+ int b;
+
+ r = sd_bus_message_read(message, "s", &path);
+ if (r < 0)
+ return r;
+
+ if (!path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not absolute.", path);
+
+ r = sd_bus_message_read_strv(message, &argv);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ ExecCommand *c;
+
+ c = new0(ExecCommand, 1);
+ if (!c)
+ return -ENOMEM;
+
+ c->path = strdup(path);
+ if (!c->path) {
+ free(c);
+ return -ENOMEM;
+ }
+
+ c->argv = TAKE_PTR(argv);
+
+ c->flags = b ? EXEC_COMMAND_IGNORE_FAILURE : 0;
+
+ path_simplify(c->path, false);
+ exec_command_append_list(exec_command, c);
+ }
+
+ n++;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ ExecCommand *c;
+ size_t size = 0;
+
+ if (n == 0)
+ *exec_command = exec_command_free_list(*exec_command);
+
+ f = open_memstream(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fputs("ExecStart=\n", f);
+
+ LIST_FOREACH(command, c, *exec_command) {
+ _cleanup_free_ char *a = NULL, *t = NULL;
+ const char *p;
+
+ p = unit_escape_setting(c->path, UNIT_ESCAPE_C|UNIT_ESCAPE_SPECIFIERS, &t);
+ if (!p)
+ return -ENOMEM;
+
+ a = unit_concat_strv(c->argv, UNIT_ESCAPE_C|UNIT_ESCAPE_SPECIFIERS);
+ if (!a)
+ return -ENOMEM;
+
+ fprintf(f, "%s=%s@%s %s\n",
+ name,
+ c->flags & EXEC_COMMAND_IGNORE_FAILURE ? "-" : "",
+ p,
+ a);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+}
+
+static int parse_personality(const char *s, unsigned long *p) {
+ unsigned long v;
+
+ assert(p);
+
+ v = personality_from_string(s);
+ if (v == PERSONALITY_INVALID)
+ return -EINVAL;
+
+ *p = v;
+ return 0;
+}
+
+static const char* mount_propagation_flags_to_string_with_check(unsigned long n) {
+ if (!IN_SET(n, 0, MS_SHARED, MS_PRIVATE, MS_SLAVE))
+ return NULL;
+
+ return mount_propagation_flags_to_string(n);
+}
+
+static BUS_DEFINE_SET_TRANSIENT(nsec, "t", uint64_t, nsec_t, NSEC_FMT);
+static BUS_DEFINE_SET_TRANSIENT_IS_VALID(log_level, "i", int32_t, int, "%" PRIi32, log_level_is_valid);
+#if HAVE_SECCOMP
+static BUS_DEFINE_SET_TRANSIENT_IS_VALID(errno, "i", int32_t, int, "%" PRIi32, errno_is_valid);
+#endif
+static BUS_DEFINE_SET_TRANSIENT_PARSE(std_input, ExecInput, exec_input_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(std_output, ExecOutput, exec_output_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(utmp_mode, ExecUtmpMode, exec_utmp_mode_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(protect_system, ProtectSystem, protect_system_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(protect_home, ProtectHome, protect_home_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(keyring_mode, ExecKeyringMode, exec_keyring_mode_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(preserve_mode, ExecPreserveMode, exec_preserve_mode_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(personality, unsigned long, parse_personality);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(secure_bits, "i", int32_t, int, "%" PRIi32, secure_bits_to_string_alloc_with_check);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(capability, "t", uint64_t, uint64_t, "%" PRIu64, capability_set_to_string_alloc);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(namespace_flag, "t", uint64_t, unsigned long, "%" PRIu64, namespace_flags_to_string);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(mount_flags, "t", uint64_t, unsigned long, "%" PRIu64, mount_propagation_flags_to_string_with_check);
+
+int bus_exec_context_set_transient_property(
+ Unit *u,
+ ExecContext *c,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ const char *suffix;
+ int r;
+
+ assert(u);
+ assert(c);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "User"))
+ return bus_set_transient_user(u, name, &c->user, message, flags, error);
+
+ if (streq(name, "Group"))
+ return bus_set_transient_user(u, name, &c->group, message, flags, error);
+
+ if (streq(name, "TTYPath"))
+ return bus_set_transient_path(u, name, &c->tty_path, message, flags, error);
+
+ if (streq(name, "RootImage"))
+ return bus_set_transient_path(u, name, &c->root_image, message, flags, error);
+
+ if (streq(name, "RootDirectory"))
+ return bus_set_transient_path(u, name, &c->root_directory, message, flags, error);
+
+ if (streq(name, "SyslogIdentifier"))
+ return bus_set_transient_string(u, name, &c->syslog_identifier, message, flags, error);
+
+ if (streq(name, "LogLevelMax"))
+ return bus_set_transient_log_level(u, name, &c->log_level_max, message, flags, error);
+
+ if (streq(name, "LogRateLimitIntervalUSec"))
+ return bus_set_transient_usec(u, name, &c->log_rate_limit_interval_usec, message, flags, error);
+
+ if (streq(name, "LogRateLimitBurst"))
+ return bus_set_transient_unsigned(u, name, &c->log_rate_limit_burst, message, flags, error);
+
+ if (streq(name, "Personality"))
+ return bus_set_transient_personality(u, name, &c->personality, message, flags, error);
+
+ if (streq(name, "StandardInput"))
+ return bus_set_transient_std_input(u, name, &c->std_input, message, flags, error);
+
+ if (streq(name, "StandardOutput"))
+ return bus_set_transient_std_output(u, name, &c->std_output, message, flags, error);
+
+ if (streq(name, "StandardError"))
+ return bus_set_transient_std_output(u, name, &c->std_error, message, flags, error);
+
+ if (streq(name, "IgnoreSIGPIPE"))
+ return bus_set_transient_bool(u, name, &c->ignore_sigpipe, message, flags, error);
+
+ if (streq(name, "TTYVHangup"))
+ return bus_set_transient_bool(u, name, &c->tty_vhangup, message, flags, error);
+
+ if (streq(name, "TTYReset"))
+ return bus_set_transient_bool(u, name, &c->tty_reset, message, flags, error);
+
+ if (streq(name, "TTYVTDisallocate"))
+ return bus_set_transient_bool(u, name, &c->tty_vt_disallocate, message, flags, error);
+
+ if (streq(name, "PrivateTmp"))
+ return bus_set_transient_bool(u, name, &c->private_tmp, message, flags, error);
+
+ if (streq(name, "PrivateDevices"))
+ return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error);
+
+ if (streq(name, "PrivateMounts"))
+ return bus_set_transient_bool(u, name, &c->private_mounts, message, flags, error);
+
+ if (streq(name, "PrivateNetwork"))
+ return bus_set_transient_bool(u, name, &c->private_network, message, flags, error);
+
+ if (streq(name, "PrivateUsers"))
+ return bus_set_transient_bool(u, name, &c->private_users, message, flags, error);
+
+ if (streq(name, "NoNewPrivileges"))
+ return bus_set_transient_bool(u, name, &c->no_new_privileges, message, flags, error);
+
+ if (streq(name, "SyslogLevelPrefix"))
+ return bus_set_transient_bool(u, name, &c->syslog_level_prefix, message, flags, error);
+
+ if (streq(name, "MemoryDenyWriteExecute"))
+ return bus_set_transient_bool(u, name, &c->memory_deny_write_execute, message, flags, error);
+
+ if (streq(name, "RestrictRealtime"))
+ return bus_set_transient_bool(u, name, &c->restrict_realtime, message, flags, error);
+
+ if (streq(name, "DynamicUser"))
+ return bus_set_transient_bool(u, name, &c->dynamic_user, message, flags, error);
+
+ if (streq(name, "RemoveIPC"))
+ return bus_set_transient_bool(u, name, &c->remove_ipc, message, flags, error);
+
+ if (streq(name, "ProtectKernelTunables"))
+ return bus_set_transient_bool(u, name, &c->protect_kernel_tunables, message, flags, error);
+
+ if (streq(name, "ProtectKernelModules"))
+ return bus_set_transient_bool(u, name, &c->protect_kernel_modules, message, flags, error);
+
+ if (streq(name, "ProtectControlGroups"))
+ return bus_set_transient_bool(u, name, &c->protect_control_groups, message, flags, error);
+
+ if (streq(name, "MountAPIVFS"))
+ return bus_set_transient_bool(u, name, &c->mount_apivfs, message, flags, error);
+
+ if (streq(name, "CPUSchedulingResetOnFork"))
+ return bus_set_transient_bool(u, name, &c->cpu_sched_reset_on_fork, message, flags, error);
+
+ if (streq(name, "NonBlocking"))
+ return bus_set_transient_bool(u, name, &c->non_blocking, message, flags, error);
+
+ if (streq(name, "LockPersonality"))
+ return bus_set_transient_bool(u, name, &c->lock_personality, message, flags, error);
+
+ if (streq(name, "UtmpIdentifier"))
+ return bus_set_transient_string(u, name, &c->utmp_id, message, flags, error);
+
+ if (streq(name, "UtmpMode"))
+ return bus_set_transient_utmp_mode(u, name, &c->utmp_mode, message, flags, error);
+
+ if (streq(name, "PAMName"))
+ return bus_set_transient_string(u, name, &c->pam_name, message, flags, error);
+
+ if (streq(name, "TimerSlackNSec"))
+ return bus_set_transient_nsec(u, name, &c->timer_slack_nsec, message, flags, error);
+
+ if (streq(name, "ProtectSystem"))
+ return bus_set_transient_protect_system(u, name, &c->protect_system, message, flags, error);
+
+ if (streq(name, "ProtectHome"))
+ return bus_set_transient_protect_home(u, name, &c->protect_home, message, flags, error);
+
+ if (streq(name, "KeyringMode"))
+ return bus_set_transient_keyring_mode(u, name, &c->keyring_mode, message, flags, error);
+
+ if (streq(name, "RuntimeDirectoryPreserve"))
+ return bus_set_transient_preserve_mode(u, name, &c->runtime_directory_preserve_mode, message, flags, error);
+
+ if (streq(name, "UMask"))
+ return bus_set_transient_mode_t(u, name, &c->umask, message, flags, error);
+
+ if (streq(name, "RuntimeDirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &c->directories[EXEC_DIRECTORY_RUNTIME].mode, message, flags, error);
+
+ if (streq(name, "StateDirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &c->directories[EXEC_DIRECTORY_STATE].mode, message, flags, error);
+
+ if (streq(name, "CacheDirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &c->directories[EXEC_DIRECTORY_CACHE].mode, message, flags, error);
+
+ if (streq(name, "LogsDirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &c->directories[EXEC_DIRECTORY_LOGS].mode, message, flags, error);
+
+ if (streq(name, "ConfigurationDirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &c->directories[EXEC_DIRECTORY_CONFIGURATION].mode, message, flags, error);
+
+ if (streq(name, "SELinuxContext"))
+ return bus_set_transient_string(u, name, &c->selinux_context, message, flags, error);
+
+ if (streq(name, "SecureBits"))
+ return bus_set_transient_secure_bits(u, name, &c->secure_bits, message, flags, error);
+
+ if (streq(name, "CapabilityBoundingSet"))
+ return bus_set_transient_capability(u, name, &c->capability_bounding_set, message, flags, error);
+
+ if (streq(name, "AmbientCapabilities"))
+ return bus_set_transient_capability(u, name, &c->capability_ambient_set, message, flags, error);
+
+ if (streq(name, "RestrictNamespaces"))
+ return bus_set_transient_namespace_flag(u, name, &c->restrict_namespaces, message, flags, error);
+
+ if (streq(name, "MountFlags"))
+ return bus_set_transient_mount_flags(u, name, &c->mount_flags, message, flags, error);
+
+ if (streq(name, "SupplementaryGroups")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **p;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, l) {
+ if (!isempty(*p) && !valid_user_group_name_or_id(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid supplementary group names");
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ c->supplementary_groups = strv_free(c->supplementary_groups);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ r = strv_extend_strv(&c->supplementary_groups, l, true);
+ if (r < 0)
+ return -ENOMEM;
+
+ joined = strv_join(c->supplementary_groups, " ");
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s", name, joined);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "SyslogLevel")) {
+ int32_t level;
+
+ r = sd_bus_message_read(message, "i", &level);
+ if (r < 0)
+ return r;
+
+ if (!log_level_is_valid(level))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Log level value out of range");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->syslog_priority = (c->syslog_priority & LOG_FACMASK) | level;
+ unit_write_settingf(u, flags, name, "SyslogLevel=%i", level);
+ }
+
+ return 1;
+
+ } else if (streq(name, "SyslogFacility")) {
+ int32_t facility;
+
+ r = sd_bus_message_read(message, "i", &facility);
+ if (r < 0)
+ return r;
+
+ if (!log_facility_unshifted_is_valid(facility))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Log facility value out of range");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->syslog_priority = (facility << 3) | LOG_PRI(c->syslog_priority);
+ unit_write_settingf(u, flags, name, "SyslogFacility=%i", facility);
+ }
+
+ return 1;
+
+ } else if (streq(name, "LogExtraFields")) {
+ size_t n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "ay");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ void *copy = NULL;
+ struct iovec *t;
+ const char *eq;
+ const void *p;
+ size_t sz;
+
+ /* Note that we expect a byte array for each field, instead of a string. That's because on the
+ * lower-level journal fields can actually contain binary data and are not restricted to text,
+ * and we should not "lose precision" in our types on the way. That said, I am pretty sure
+ * actually encoding binary data as unit metadata is not a good idea. Hence we actually refuse
+ * any actual binary data, and only accept UTF-8. This allows us to eventually lift this
+ * limitation, should a good, valid usecase arise. */
+
+ r = sd_bus_message_read_array(message, 'y', &p, &sz);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (memchr(p, 0, sz))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Journal field contains zero byte");
+
+ eq = memchr(p, '=', sz);
+ if (!eq)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Journal field contains no '=' character");
+ if (!journal_field_valid(p, eq - (const char*) p, false))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Journal field invalid");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ t = reallocarray(c->log_extra_fields, c->n_log_extra_fields+1, sizeof(struct iovec));
+ if (!t)
+ return -ENOMEM;
+ c->log_extra_fields = t;
+ }
+
+ copy = malloc(sz + 1);
+ if (!copy)
+ return -ENOMEM;
+
+ memcpy(copy, p, sz);
+ ((uint8_t*) copy)[sz] = 0;
+
+ if (!utf8_is_valid(copy))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Journal field is not valid UTF-8");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->log_extra_fields[c->n_log_extra_fields++] = IOVEC_MAKE(copy, sz);
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS|UNIT_ESCAPE_C, name, "LogExtraFields=%s", (char*) copy);
+
+ copy = NULL;
+ }
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && n == 0) {
+ exec_context_free_log_extra_fields(c);
+ unit_write_setting(u, flags, name, "LogExtraFields=");
+ }
+
+ return 1;
+ }
+
+#if HAVE_SECCOMP
+
+ if (streq(name, "SystemCallErrorNumber"))
+ return bus_set_transient_errno(u, name, &c->syscall_errno, message, flags, error);
+
+ if (streq(name, "SystemCallFilter")) {
+ int whitelist;
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_enter_container(message, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &whitelist);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *joined = NULL;
+ bool invert = !whitelist;
+ char **s;
+
+ if (strv_isempty(l)) {
+ c->syscall_whitelist = false;
+ c->syscall_filter = hashmap_free(c->syscall_filter);
+
+ unit_write_settingf(u, flags, name, "SystemCallFilter=");
+ return 1;
+ }
+
+ if (!c->syscall_filter) {
+ c->syscall_filter = hashmap_new(NULL);
+ if (!c->syscall_filter)
+ return log_oom();
+
+ c->syscall_whitelist = whitelist;
+
+ if (c->syscall_whitelist) {
+ r = seccomp_parse_syscall_filter("@default", -1, c->syscall_filter, SECCOMP_PARSE_WHITELIST | (invert ? SECCOMP_PARSE_INVERT : 0));
+ if (r < 0)
+ return r;
+ }
+ }
+
+ STRV_FOREACH(s, l) {
+ _cleanup_free_ char *n = NULL;
+ int e;
+
+ r = parse_syscall_and_errno(*s, &n, &e);
+ if (r < 0)
+ return r;
+
+ r = seccomp_parse_syscall_filter(n, e, c->syscall_filter, (invert ? SECCOMP_PARSE_INVERT : 0) | (c->syscall_whitelist ? SECCOMP_PARSE_WHITELIST : 0));
+ if (r < 0)
+ return r;
+ }
+
+ joined = strv_join(l, " ");
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "SystemCallFilter=%s%s", whitelist ? "" : "~", joined);
+ }
+
+ return 1;
+
+ } else if (streq(name, "SystemCallArchitectures")) {
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *joined = NULL;
+
+ if (strv_isempty(l))
+ c->syscall_archs = set_free(c->syscall_archs);
+ else {
+ char **s;
+
+ r = set_ensure_allocated(&c->syscall_archs, NULL);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(s, l) {
+ uint32_t a;
+
+ r = seccomp_arch_from_string(*s, &a);
+ if (r < 0)
+ return r;
+
+ r = set_put(c->syscall_archs, UINT32_TO_PTR(a + 1));
+ if (r < 0)
+ return r;
+ }
+
+ }
+
+ joined = strv_join(l, " ");
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, joined);
+ }
+
+ return 1;
+
+ } else if (streq(name, "RestrictAddressFamilies")) {
+ int whitelist;
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_enter_container(message, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &whitelist);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *joined = NULL;
+ bool invert = !whitelist;
+ char **s;
+
+ if (strv_isempty(l)) {
+ c->address_families_whitelist = false;
+ c->address_families = set_free(c->address_families);
+
+ unit_write_settingf(u, flags, name, "RestrictAddressFamilies=");
+ return 1;
+ }
+
+ if (!c->address_families) {
+ c->address_families = set_new(NULL);
+ if (!c->address_families)
+ return log_oom();
+
+ c->address_families_whitelist = whitelist;
+ }
+
+ STRV_FOREACH(s, l) {
+ int af;
+
+ af = af_from_name(*s);
+ if (af < 0)
+ return af;
+
+ if (!invert == c->address_families_whitelist) {
+ r = set_put(c->address_families, INT_TO_PTR(af));
+ if (r < 0)
+ return r;
+ } else
+ (void) set_remove(c->address_families, INT_TO_PTR(af));
+ }
+
+ joined = strv_join(l, " ");
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "RestrictAddressFamilies=%s%s", whitelist ? "" : "~", joined);
+ }
+
+ return 1;
+ }
+#endif
+ if (streq(name, "CPUAffinity")) {
+ const void *a;
+ size_t n = 0;
+
+ r = sd_bus_message_read_array(message, 'y', &a, &n);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (n == 0) {
+ c->cpuset = cpu_set_mfree(c->cpuset);
+ c->cpuset_ncpus = 0;
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ _cleanup_free_ char *str = NULL;
+ size_t allocated = 0, len = 0, i, ncpus;
+
+ ncpus = CPU_SIZE_TO_NUM(n);
+
+ for (i = 0; i < ncpus; i++) {
+ _cleanup_free_ char *p = NULL;
+ size_t add;
+
+ if (!CPU_ISSET_S(i, n, (cpu_set_t*) a))
+ continue;
+
+ r = asprintf(&p, "%zu", i);
+ if (r < 0)
+ return -ENOMEM;
+
+ add = strlen(p);
+
+ if (!GREEDY_REALLOC(str, allocated, len + add + 2))
+ return -ENOMEM;
+
+ strcpy(mempcpy(str + len, p, add), " ");
+ len += add + 1;
+ }
+
+ if (len != 0)
+ str[len - 1] = '\0';
+
+ if (!c->cpuset || c->cpuset_ncpus < ncpus) {
+ cpu_set_t *cpuset;
+
+ cpuset = CPU_ALLOC(ncpus);
+ if (!cpuset)
+ return -ENOMEM;
+
+ CPU_ZERO_S(n, cpuset);
+ if (c->cpuset) {
+ CPU_OR_S(CPU_ALLOC_SIZE(c->cpuset_ncpus), cpuset, c->cpuset, (cpu_set_t*) a);
+ CPU_FREE(c->cpuset);
+ } else
+ CPU_OR_S(n, cpuset, cpuset, (cpu_set_t*) a);
+
+ c->cpuset = cpuset;
+ c->cpuset_ncpus = ncpus;
+ } else
+ CPU_OR_S(n, c->cpuset, c->cpuset, (cpu_set_t*) a);
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, str);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "Nice")) {
+ int32_t q;
+
+ r = sd_bus_message_read(message, "i", &q);
+ if (r < 0)
+ return r;
+
+ if (!nice_is_valid(q))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid Nice value: %i", q);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->nice = q;
+ c->nice_set = true;
+
+ unit_write_settingf(u, flags, name, "Nice=%i", q);
+ }
+
+ return 1;
+
+ } else if (streq(name, "CPUSchedulingPolicy")) {
+ int32_t q;
+
+ r = sd_bus_message_read(message, "i", &q);
+ if (r < 0)
+ return r;
+
+ if (!sched_policy_is_valid(q))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid CPU scheduling policy: %i", q);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *s = NULL;
+
+ r = sched_policy_to_string_alloc(q, &s);
+ if (r < 0)
+ return r;
+
+ c->cpu_sched_policy = q;
+ c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min(q), sched_get_priority_max(q));
+ c->cpu_sched_set = true;
+
+ unit_write_settingf(u, flags, name, "CPUSchedulingPolicy=%s", s);
+ }
+
+ return 1;
+
+ } else if (streq(name, "CPUSchedulingPriority")) {
+ int32_t p, min, max;
+
+ r = sd_bus_message_read(message, "i", &p);
+ if (r < 0)
+ return r;
+
+ min = sched_get_priority_min(c->cpu_sched_policy);
+ max = sched_get_priority_max(c->cpu_sched_policy);
+ if (p < min || p > max)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid CPU scheduling priority: %i", p);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->cpu_sched_priority = p;
+ c->cpu_sched_set = true;
+
+ unit_write_settingf(u, flags, name, "CPUSchedulingPriority=%i", p);
+ }
+
+ return 1;
+
+ } else if (streq(name, "IOSchedulingClass")) {
+ int32_t q;
+
+ r = sd_bus_message_read(message, "i", &q);
+ if (r < 0)
+ return r;
+
+ if (!ioprio_class_is_valid(q))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid IO scheduling class: %i", q);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *s = NULL;
+
+ r = ioprio_class_to_string_alloc(q, &s);
+ if (r < 0)
+ return r;
+
+ c->ioprio = IOPRIO_PRIO_VALUE(q, IOPRIO_PRIO_DATA(c->ioprio));
+ c->ioprio_set = true;
+
+ unit_write_settingf(u, flags, name, "IOSchedulingClass=%s", s);
+ }
+
+ return 1;
+
+ } else if (streq(name, "IOSchedulingPriority")) {
+ int32_t p;
+
+ r = sd_bus_message_read(message, "i", &p);
+ if (r < 0)
+ return r;
+
+ if (!ioprio_priority_is_valid(p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid IO scheduling priority: %i", p);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_PRIO_CLASS(c->ioprio), p);
+ c->ioprio_set = true;
+
+ unit_write_settingf(u, flags, name, "IOSchedulingPriority=%i", p);
+ }
+
+ return 1;
+
+ } else if (streq(name, "WorkingDirectory")) {
+ const char *s;
+ bool missing_ok;
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ if (s[0] == '-') {
+ missing_ok = true;
+ s++;
+ } else
+ missing_ok = false;
+
+ if (!isempty(s) && !streq(s, "~") && !path_is_absolute(s))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "WorkingDirectory= expects an absolute path or '~'");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (streq(s, "~")) {
+ c->working_directory = mfree(c->working_directory);
+ c->working_directory_home = true;
+ } else {
+ r = free_and_strdup(&c->working_directory, empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ c->working_directory_home = false;
+ }
+
+ c->working_directory_missing_ok = missing_ok;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "WorkingDirectory=%s%s", missing_ok ? "-" : "", s);
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name,
+ "StandardInputFileDescriptorName", "StandardOutputFileDescriptorName", "StandardErrorFileDescriptorName")) {
+ const char *s;
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ if (!isempty(s) && !fdname_is_valid(s))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid file descriptor name");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+
+ if (streq(name, "StandardInputFileDescriptorName")) {
+ r = free_and_strdup(c->stdio_fdname + STDIN_FILENO, empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ c->std_input = EXEC_INPUT_NAMED_FD;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardInput=fd:%s", exec_context_fdname(c, STDIN_FILENO));
+
+ } else if (streq(name, "StandardOutputFileDescriptorName")) {
+ r = free_and_strdup(c->stdio_fdname + STDOUT_FILENO, empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ c->std_output = EXEC_OUTPUT_NAMED_FD;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardOutput=fd:%s", exec_context_fdname(c, STDOUT_FILENO));
+
+ } else {
+ assert(streq(name, "StandardErrorFileDescriptorName"));
+
+ r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ c->std_error = EXEC_OUTPUT_NAMED_FD;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardError=fd:%s", exec_context_fdname(c, STDERR_FILENO));
+ }
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name,
+ "StandardInputFile",
+ "StandardOutputFile", "StandardOutputFileToAppend",
+ "StandardErrorFile", "StandardErrorFileToAppend")) {
+ const char *s;
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ if (!isempty(s)) {
+ if (!path_is_absolute(s))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not absolute", s);
+ if (!path_is_normalized(s))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not normalized", s);
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+
+ if (streq(name, "StandardInputFile")) {
+ r = free_and_strdup(&c->stdio_file[STDIN_FILENO], empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ c->std_input = EXEC_INPUT_FILE;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardInput=file:%s", s);
+
+ } else if (STR_IN_SET(name, "StandardOutputFile", "StandardOutputFileToAppend")) {
+ r = free_and_strdup(&c->stdio_file[STDOUT_FILENO], empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ if (streq(name, "StandardOutputFile")) {
+ c->std_output = EXEC_OUTPUT_FILE;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardOutput=file:%s", s);
+ } else {
+ assert(streq(name, "StandardOutputFileToAppend"));
+ c->std_output = EXEC_OUTPUT_FILE_APPEND;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardOutput=append:%s", s);
+ }
+ } else {
+ assert(STR_IN_SET(name, "StandardErrorFile", "StandardErrorFileToAppend"));
+
+ r = free_and_strdup(&c->stdio_file[STDERR_FILENO], empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ if (streq(name, "StandardErrorFile")) {
+ c->std_error = EXEC_OUTPUT_FILE;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardError=file:%s", s);
+ } else {
+ assert(streq(name, "StandardErrorFileToAppend"));
+ c->std_error = EXEC_OUTPUT_FILE_APPEND;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardError=append:%s", s);
+ }
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "StandardInputData")) {
+ const void *p;
+ size_t sz;
+
+ r = sd_bus_message_read_array(message, 'y', &p, &sz);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *encoded = NULL;
+
+ if (sz == 0) {
+ c->stdin_data = mfree(c->stdin_data);
+ c->stdin_data_size = 0;
+
+ unit_write_settingf(u, flags, name, "StandardInputData=");
+ } else {
+ void *q;
+ ssize_t n;
+
+ if (c->stdin_data_size + sz < c->stdin_data_size || /* check for overflow */
+ c->stdin_data_size + sz > EXEC_STDIN_DATA_MAX)
+ return -E2BIG;
+
+ n = base64mem(p, sz, &encoded);
+ if (n < 0)
+ return (int) n;
+
+ q = realloc(c->stdin_data, c->stdin_data_size + sz);
+ if (!q)
+ return -ENOMEM;
+
+ memcpy((uint8_t*) q + c->stdin_data_size, p, sz);
+
+ c->stdin_data = q;
+ c->stdin_data_size += sz;
+
+ unit_write_settingf(u, flags, name, "StandardInputData=%s", encoded);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "Environment")) {
+
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ if (!strv_env_is_valid(l))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment block.");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ c->environment = strv_free(c->environment);
+ unit_write_setting(u, flags, name, "Environment=");
+ } else {
+ _cleanup_free_ char *joined = NULL;
+ char **e;
+
+ joined = unit_concat_strv(l, UNIT_ESCAPE_SPECIFIERS|UNIT_ESCAPE_C);
+ if (!joined)
+ return -ENOMEM;
+
+ e = strv_env_merge(2, c->environment, l);
+ if (!e)
+ return -ENOMEM;
+
+ strv_free_and_replace(c->environment, e);
+ unit_write_settingf(u, flags, name, "Environment=%s", joined);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "UnsetEnvironment")) {
+
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ if (!strv_env_name_or_assignment_is_valid(l))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid UnsetEnvironment= list.");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ c->unset_environment = strv_free(c->unset_environment);
+ unit_write_setting(u, flags, name, "UnsetEnvironment=");
+ } else {
+ _cleanup_free_ char *joined = NULL;
+ char **e;
+
+ joined = unit_concat_strv(l, UNIT_ESCAPE_SPECIFIERS|UNIT_ESCAPE_C);
+ if (!joined)
+ return -ENOMEM;
+
+ e = strv_env_merge(2, c->unset_environment, l);
+ if (!e)
+ return -ENOMEM;
+
+ strv_free_and_replace(c->unset_environment, e);
+ unit_write_settingf(u, flags, name, "UnsetEnvironment=%s", joined);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "OOMScoreAdjust")) {
+ int oa;
+
+ r = sd_bus_message_read(message, "i", &oa);
+ if (r < 0)
+ return r;
+
+ if (!oom_score_adjust_is_valid(oa))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "OOM score adjust value out of range");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->oom_score_adjust = oa;
+ c->oom_score_adjust_set = true;
+ unit_write_settingf(u, flags, name, "OOMScoreAdjust=%i", oa);
+ }
+
+ return 1;
+
+ } else if (streq(name, "EnvironmentFiles")) {
+
+ _cleanup_free_ char *joined = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ size_t size = 0;
+ char **i;
+
+ r = sd_bus_message_enter_container(message, 'a', "(sb)");
+ if (r < 0)
+ return r;
+
+ f = open_memstream(&joined, &size);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fputs("EnvironmentFile=\n", f);
+
+ STRV_FOREACH(i, c->environment_files) {
+ _cleanup_free_ char *q = NULL;
+
+ q = specifier_escape(*i);
+ if (!q)
+ return -ENOMEM;
+
+ fprintf(f, "EnvironmentFile=%s\n", q);
+ }
+
+ while ((r = sd_bus_message_enter_container(message, 'r', "sb")) > 0) {
+ const char *path;
+ int b;
+
+ r = sd_bus_message_read(message, "sb", &path, &b);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not absolute.", path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *q = NULL;
+ char *buf;
+
+ buf = strjoin(b ? "-" : "", path);
+ if (!buf)
+ return -ENOMEM;
+
+ q = specifier_escape(buf);
+ if (!q) {
+ free(buf);
+ return -ENOMEM;
+ }
+
+ fprintf(f, "EnvironmentFile=%s\n", q);
+
+ r = strv_consume(&l, buf);
+ if (r < 0)
+ return r;
+ }
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ c->environment_files = strv_free(c->environment_files);
+ unit_write_setting(u, flags, name, "EnvironmentFile=");
+ } else {
+ r = strv_extend_strv(&c->environment_files, l, true);
+ if (r < 0)
+ return r;
+
+ unit_write_setting(u, flags, name, joined);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "PassEnvironment")) {
+
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ if (!strv_env_name_is_valid(l))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid PassEnvironment= block.");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ c->pass_environment = strv_free(c->pass_environment);
+ unit_write_setting(u, flags, name, "PassEnvironment=");
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ r = strv_extend_strv(&c->pass_environment, l, true);
+ if (r < 0)
+ return r;
+
+ /* We write just the new settings out to file, with unresolved specifiers. */
+ joined = unit_concat_strv(l, UNIT_ESCAPE_SPECIFIERS);
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "PassEnvironment=%s", joined);
+ }
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories",
+ "ReadWritePaths", "ReadOnlyPaths", "InaccessiblePaths")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***dirs;
+ char **p;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, l) {
+ char *i = *p;
+ size_t offset;
+
+ offset = i[0] == '-';
+ offset += i[offset] == '+';
+ if (!path_is_absolute(i + offset))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid %s", name);
+
+ path_simplify(i + offset, false);
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (STR_IN_SET(name, "ReadWriteDirectories", "ReadWritePaths"))
+ dirs = &c->read_write_paths;
+ else if (STR_IN_SET(name, "ReadOnlyDirectories", "ReadOnlyPaths"))
+ dirs = &c->read_only_paths;
+ else /* "InaccessiblePaths" */
+ dirs = &c->inaccessible_paths;
+
+ if (strv_isempty(l)) {
+ *dirs = strv_free(*dirs);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ joined = unit_concat_strv(l, UNIT_ESCAPE_SPECIFIERS);
+ if (!joined)
+ return -ENOMEM;
+
+ r = strv_extend_strv(dirs, l, true);
+ if (r < 0)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, joined);
+ }
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "RuntimeDirectory", "StateDirectory", "CacheDirectory", "LogsDirectory", "ConfigurationDirectory")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **p;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, l) {
+ if (!path_is_normalized(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= path is not normalized: %s", name, *p);
+
+ if (path_is_absolute(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= path is absolute: %s", name, *p);
+
+ if (path_startswith(*p, "private"))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= path can't be 'private': %s", name, *p);
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ char ***dirs = NULL;
+ ExecDirectoryType i;
+
+ for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
+ if (streq(name, exec_directory_type_to_string(i))) {
+ dirs = &c->directories[i].paths;
+ break;
+ }
+
+ assert(dirs);
+
+ if (strv_isempty(l)) {
+ *dirs = strv_free(*dirs);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ r = strv_extend_strv(dirs, l, true);
+ if (r < 0)
+ return -ENOMEM;
+
+ joined = unit_concat_strv(l, UNIT_ESCAPE_SPECIFIERS);
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, joined);
+ }
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "AppArmorProfile", "SmackProcessLabel")) {
+ int ignore;
+ const char *s;
+
+ r = sd_bus_message_read(message, "(bs)", &ignore, &s);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ char **p;
+ bool *b;
+
+ if (streq(name, "AppArmorProfile")) {
+ p = &c->apparmor_profile;
+ b = &c->apparmor_profile_ignore;
+ } else { /* "SmackProcessLabel" */
+ p = &c->smack_process_label;
+ b = &c->smack_process_label_ignore;
+ }
+
+ if (isempty(s)) {
+ *p = mfree(*p);
+ *b = false;
+ } else {
+ if (free_and_strdup(p, s) < 0)
+ return -ENOMEM;
+ *b = ignore;
+ }
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s%s", name, ignore ? "-" : "", strempty(s));
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "BindPaths", "BindReadOnlyPaths")) {
+ const char *source, *destination;
+ int ignore_enoent;
+ uint64_t mount_flags;
+ bool empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ssbt)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ssbt)", &source, &destination, &ignore_enoent, &mount_flags)) > 0) {
+
+ if (!path_is_absolute(source))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path %s is not absolute.", source);
+ if (!path_is_absolute(destination))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path %s is not absolute.", destination);
+ if (!IN_SET(mount_flags, 0, MS_REC))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown mount flags.");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = bind_mount_add(&c->bind_mounts, &c->n_bind_mounts,
+ &(BindMount) {
+ .source = strdup(source),
+ .destination = strdup(destination),
+ .read_only = !!strstr(name, "ReadOnly"),
+ .recursive = !!(mount_flags & MS_REC),
+ .ignore_enoent = ignore_enoent,
+ });
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(
+ u, flags|UNIT_ESCAPE_SPECIFIERS, name,
+ "%s=%s%s:%s:%s",
+ name,
+ ignore_enoent ? "-" : "",
+ source,
+ destination,
+ (mount_flags & MS_REC) ? "rbind" : "norbind");
+ }
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (empty) {
+ bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
+ c->bind_mounts = NULL;
+ c->n_bind_mounts = 0;
+
+ unit_write_settingf(u, flags, name, "%s=", name);
+ }
+
+ return 1;
+
+ } else if (streq(name, "TemporaryFileSystem")) {
+ const char *path, *options;
+ bool empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ss)", &path, &options)) > 0) {
+
+ if (!path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Mount point %s is not absolute.", path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = temporary_filesystem_add(&c->temporary_filesystems, &c->n_temporary_filesystems, path, options);
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(
+ u, flags|UNIT_ESCAPE_SPECIFIERS, name,
+ "%s=%s:%s",
+ name,
+ path,
+ options);
+ }
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (empty) {
+ temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
+ c->temporary_filesystems = NULL;
+ c->n_temporary_filesystems = 0;
+
+ unit_write_settingf(u, flags, name, "%s=", name);
+ }
+
+ return 1;
+
+ } else if ((suffix = startswith(name, "Limit"))) {
+ const char *soft = NULL;
+ int ri;
+
+ ri = rlimit_from_string(suffix);
+ if (ri < 0) {
+ soft = endswith(suffix, "Soft");
+ if (soft) {
+ const char *n;
+
+ n = strndupa(suffix, soft - suffix);
+ ri = rlimit_from_string(n);
+ if (ri >= 0)
+ name = strjoina("Limit", n);
+ }
+ }
+
+ if (ri >= 0) {
+ uint64_t rl;
+ rlim_t x;
+
+ r = sd_bus_message_read(message, "t", &rl);
+ if (r < 0)
+ return r;
+
+ if (rl == (uint64_t) -1)
+ x = RLIM_INFINITY;
+ else {
+ x = (rlim_t) rl;
+
+ if ((uint64_t) x != rl)
+ return -ERANGE;
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *f = NULL;
+ struct rlimit nl;
+
+ if (c->rlimit[ri]) {
+ nl = *c->rlimit[ri];
+
+ if (soft)
+ nl.rlim_cur = x;
+ else
+ nl.rlim_max = x;
+ } else
+ /* When the resource limit is not initialized yet, then assign the value to both fields */
+ nl = (struct rlimit) {
+ .rlim_cur = x,
+ .rlim_max = x,
+ };
+
+ r = rlimit_format(&nl, &f);
+ if (r < 0)
+ return r;
+
+ if (c->rlimit[ri])
+ *c->rlimit[ri] = nl;
+ else {
+ c->rlimit[ri] = newdup(struct rlimit, &nl, 1);
+ if (!c->rlimit[ri])
+ return -ENOMEM;
+ }
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, f);
+ }
+
+ return 1;
+ }
+
+ }
+
+ return 0;
+}
diff --git a/src/core/dbus-execute.h b/src/core/dbus-execute.h
new file mode 100644
index 0000000..8405170
--- /dev/null
+++ b/src/core/dbus-execute.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "execute.h"
+
+#define BUS_EXEC_STATUS_VTABLE(prefix, offset, flags) \
+ BUS_PROPERTY_DUAL_TIMESTAMP(prefix "StartTimestamp", (offset) + offsetof(ExecStatus, start_timestamp), flags), \
+ BUS_PROPERTY_DUAL_TIMESTAMP(prefix "ExitTimestamp", (offset) + offsetof(ExecStatus, exit_timestamp), flags), \
+ SD_BUS_PROPERTY(prefix "PID", "u", bus_property_get_pid, (offset) + offsetof(ExecStatus, pid), flags), \
+ SD_BUS_PROPERTY(prefix "Code", "i", bus_property_get_int, (offset) + offsetof(ExecStatus, code), flags), \
+ SD_BUS_PROPERTY(prefix "Status", "i", bus_property_get_int, (offset) + offsetof(ExecStatus, status), flags)
+
+#define BUS_EXEC_COMMAND_VTABLE(name, offset, flags) \
+ SD_BUS_PROPERTY(name, "a(sasbttttuii)", bus_property_get_exec_command, offset, flags)
+
+#define BUS_EXEC_COMMAND_LIST_VTABLE(name, offset, flags) \
+ SD_BUS_PROPERTY(name, "a(sasbttttuii)", bus_property_get_exec_command_list, offset, flags)
+
+extern const sd_bus_vtable bus_exec_vtable[];
+
+int bus_property_get_exec_output(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+int bus_property_get_exec_command(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+int bus_property_get_exec_command_list(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+
+int bus_exec_context_set_transient_property(Unit *u, ExecContext *c, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_exec_command(Unit *u, const char *name, ExecCommand **exec_command, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-job.c b/src/core/dbus-job.c
new file mode 100644
index 0000000..d11e58b
--- /dev/null
+++ b/src/core/dbus-job.c
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "dbus-job.h"
+#include "dbus-unit.h"
+#include "dbus.h"
+#include "job.h"
+#include "log.h"
+#include "selinux-access.h"
+#include "string-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, job_type, JobType);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_state, job_state, JobState);
+
+static int property_get_unit(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *p = NULL;
+ Job *j = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(j);
+
+ p = unit_dbus_path(j->unit);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "(so)", j->unit->id, p);
+}
+
+int bus_job_method_cancel(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Job *j = userdata;
+ int r;
+
+ assert(message);
+ assert(j);
+
+ r = mac_selinux_unit_access_check(j->unit, message, "stop", error);
+ if (r < 0)
+ return r;
+
+ /* Access is granted to the job owner */
+ if (!sd_bus_track_contains(j->bus_track, sd_bus_message_get_sender(message))) {
+
+ /* And for everybody else consult polkit */
+ r = bus_verify_manage_units_async(j->unit->manager, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+ }
+
+ job_finish_and_invalidate(j, JOB_CANCELED, true, false);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_job_method_get_waiting_jobs(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ Job **list = NULL;
+ Job *j = userdata;
+ int r, i, n;
+
+ if (strstr(sd_bus_message_get_member(message), "After"))
+ n = job_get_after(j, &list);
+ else
+ n = job_get_before(j, &list);
+ if (n < 0)
+ return n;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(usssoo)");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n; i ++) {
+ _cleanup_free_ char *unit_path = NULL, *job_path = NULL;
+
+ job_path = job_dbus_path(list[i]);
+ if (!job_path)
+ return -ENOMEM;
+
+ unit_path = unit_dbus_path(list[i]->unit);
+ if (!unit_path)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(usssoo)",
+ list[i]->id,
+ list[i]->unit->id,
+ job_type_to_string(list[i]->type),
+ job_state_to_string(list[i]->state),
+ job_path,
+ unit_path);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+const sd_bus_vtable bus_job_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("Cancel", NULL, NULL, bus_job_method_cancel, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetAfter", NULL, "a(usssoo)", bus_job_method_get_waiting_jobs, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetBefore", NULL, "a(usssoo)", bus_job_method_get_waiting_jobs, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_PROPERTY("Id", "u", NULL, offsetof(Job, id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Unit", "(so)", property_get_unit, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JobType", "s", property_get_type, offsetof(Job, type), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("State", "s", property_get_state, offsetof(Job, state), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_VTABLE_END
+};
+
+static int send_new_signal(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *p = NULL;
+ Job *j = userdata;
+ int r;
+
+ assert(bus);
+ assert(j);
+
+ p = job_dbus_path(j);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "JobNew");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "uos", j->id, p, j->unit->id);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+static int send_changed_signal(sd_bus *bus, void *userdata) {
+ _cleanup_free_ char *p = NULL;
+ Job *j = userdata;
+
+ assert(bus);
+ assert(j);
+
+ p = job_dbus_path(j);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_properties_changed(bus, p, "org.freedesktop.systemd1.Job", "State", NULL);
+}
+
+void bus_job_send_change_signal(Job *j) {
+ int r;
+
+ assert(j);
+
+ /* Make sure that any change signal on the unit is reflected before we send out the change signal on the job */
+ bus_unit_send_pending_change_signal(j->unit, true);
+
+ if (j->in_dbus_queue) {
+ LIST_REMOVE(dbus_queue, j->manager->dbus_job_queue, j);
+ j->in_dbus_queue = false;
+ }
+
+ r = bus_foreach_bus(j->manager, j->bus_track, j->sent_dbus_new_signal ? send_changed_signal : send_new_signal, j);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send job change signal for %u: %m", j->id);
+
+ j->sent_dbus_new_signal = true;
+}
+
+void bus_job_send_pending_change_signal(Job *j, bool including_new) {
+ assert(j);
+
+ if (!j->in_dbus_queue)
+ return;
+
+ if (!j->sent_dbus_new_signal && !including_new)
+ return;
+
+ if (MANAGER_IS_RELOADING(j->unit->manager))
+ return;
+
+ bus_job_send_change_signal(j);
+}
+
+static int send_removed_signal(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *p = NULL;
+ Job *j = userdata;
+ int r;
+
+ assert(bus);
+ assert(j);
+
+ p = job_dbus_path(j);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "JobRemoved");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "uoss", j->id, p, j->unit->id, job_result_to_string(j->result));
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+void bus_job_send_removed_signal(Job *j) {
+ int r;
+
+ assert(j);
+
+ if (!j->sent_dbus_new_signal)
+ bus_job_send_change_signal(j);
+
+ /* Make sure that any change signal on the unit is reflected before we send out the change signal on the job */
+ bus_unit_send_pending_change_signal(j->unit, true);
+
+ r = bus_foreach_bus(j->manager, j->bus_track, send_removed_signal, j);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send job remove signal for %u: %m", j->id);
+}
+
+static int bus_job_track_handler(sd_bus_track *t, void *userdata) {
+ Job *j = userdata;
+
+ assert(t);
+ assert(j);
+
+ j->bus_track = sd_bus_track_unref(j->bus_track); /* make sure we aren't called again */
+
+ /* Last client dropped off the bus, maybe we should GC this now? */
+ job_add_to_gc_queue(j);
+ return 0;
+}
+
+static int bus_job_allocate_bus_track(Job *j) {
+
+ assert(j);
+
+ if (j->bus_track)
+ return 0;
+
+ return sd_bus_track_new(j->unit->manager->api_bus, &j->bus_track, bus_job_track_handler, j);
+}
+
+int bus_job_coldplug_bus_track(Job *j) {
+ int r = 0;
+ _cleanup_strv_free_ char **deserialized_clients = NULL;
+
+ assert(j);
+
+ deserialized_clients = TAKE_PTR(j->deserialized_clients);
+
+ if (strv_isempty(deserialized_clients))
+ return 0;
+
+ if (!j->manager->api_bus)
+ return 0;
+
+ r = bus_job_allocate_bus_track(j);
+ if (r < 0)
+ return r;
+
+ return bus_track_add_name_many(j->bus_track, deserialized_clients);
+}
+
+int bus_job_track_sender(Job *j, sd_bus_message *m) {
+ int r;
+
+ assert(j);
+ assert(m);
+
+ if (sd_bus_message_get_bus(m) != j->unit->manager->api_bus) {
+ j->ref_by_private_bus = true;
+ return 0;
+ }
+
+ r = bus_job_allocate_bus_track(j);
+ if (r < 0)
+ return r;
+
+ return sd_bus_track_add_sender(j->bus_track, m);
+}
diff --git a/src/core/dbus-job.h b/src/core/dbus-job.h
new file mode 100644
index 0000000..c9f6fc7
--- /dev/null
+++ b/src/core/dbus-job.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "job.h"
+
+extern const sd_bus_vtable bus_job_vtable[];
+
+int bus_job_method_cancel(sd_bus_message *message, void *job, sd_bus_error *error);
+int bus_job_method_get_waiting_jobs(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+void bus_job_send_change_signal(Job *j);
+void bus_job_send_pending_change_signal(Job *j, bool including_new);
+void bus_job_send_removed_signal(Job *j);
+
+int bus_job_coldplug_bus_track(Job *j);
+int bus_job_track_sender(Job *j, sd_bus_message *m);
diff --git a/src/core/dbus-kill.c b/src/core/dbus-kill.c
new file mode 100644
index 0000000..e2b3a0d
--- /dev/null
+++ b/src/core/dbus-kill.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "bus-util.h"
+#include "dbus-kill.h"
+#include "dbus-util.h"
+#include "kill.h"
+#include "signal-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_kill_mode, kill_mode, KillMode);
+
+const sd_bus_vtable bus_kill_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("KillMode", "s", property_get_kill_mode, offsetof(KillContext, kill_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KillSignal", "i", bus_property_get_int, offsetof(KillContext, kill_signal), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FinalKillSignal", "i", bus_property_get_int, offsetof(KillContext, final_kill_signal), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SendSIGKILL", "b", bus_property_get_bool, offsetof(KillContext, send_sigkill), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SendSIGHUP", "b", bus_property_get_bool, offsetof(KillContext, send_sighup), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("WatchdogSignal", "i", bus_property_get_int, offsetof(KillContext, watchdog_signal), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_VTABLE_END
+};
+
+static BUS_DEFINE_SET_TRANSIENT_PARSE(kill_mode, KillMode, kill_mode_from_string);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(kill_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(final_kill_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(watchdog_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check);
+
+int bus_kill_context_set_transient_property(
+ Unit *u,
+ KillContext *c,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ assert(u);
+ assert(c);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "KillMode"))
+ return bus_set_transient_kill_mode(u, name, &c->kill_mode, message, flags, error);
+
+ if (streq(name, "SendSIGHUP"))
+ return bus_set_transient_bool(u, name, &c->send_sighup, message, flags, error);
+
+ if (streq(name, "SendSIGKILL"))
+ return bus_set_transient_bool(u, name, &c->send_sigkill, message, flags, error);
+
+ if (streq(name, "KillSignal"))
+ return bus_set_transient_kill_signal(u, name, &c->kill_signal, message, flags, error);
+
+ if (streq(name, "FinalKillSignal"))
+ return bus_set_transient_final_kill_signal(u, name, &c->final_kill_signal, message, flags, error);
+
+ if (streq(name, "WatchdogSignal"))
+ return bus_set_transient_watchdog_signal(u, name, &c->watchdog_signal, message, flags, error);
+
+ return 0;
+}
diff --git a/src/core/dbus-kill.h b/src/core/dbus-kill.h
new file mode 100644
index 0000000..8192e94
--- /dev/null
+++ b/src/core/dbus-kill.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "kill.h"
+#include "unit.h"
+
+extern const sd_bus_vtable bus_kill_vtable[];
+
+int bus_kill_context_set_transient_property(Unit *u, KillContext *c, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
new file mode 100644
index 0000000..88e4c6b
--- /dev/null
+++ b/src/core/dbus-manager.c
@@ -0,0 +1,2718 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/prctl.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "build.h"
+#include "bus-common-errors.h"
+#include "dbus-execute.h"
+#include "dbus-job.h"
+#include "dbus-manager.h"
+#include "dbus-scope.h"
+#include "dbus-unit.h"
+#include "dbus.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "install.h"
+#include "log.h"
+#include "os-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "selinux-access.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "user-util.h"
+#include "virt.h"
+#include "watchdog.h"
+
+/* Require 16MiB free in /run/systemd for reloading/reexecing. After all we need to serialize our state there, and if
+ * we can't we'll fail badly. */
+#define RELOAD_DISK_SPACE_MIN (UINT64_C(16) * UINT64_C(1024) * UINT64_C(1024))
+
+static UnitFileFlags unit_file_bools_to_flags(bool runtime, bool force) {
+ return (runtime ? UNIT_FILE_RUNTIME : 0) |
+ (force ? UNIT_FILE_FORCE : 0);
+}
+
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_version, "s", GIT_VERSION);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_features, "s", SYSTEMD_FEATURES);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_architecture, "s", architecture_to_string(uname_architecture()));
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_log_target, "s", log_target_to_string(log_get_target()));
+static BUS_DEFINE_PROPERTY_GET2(property_get_system_state, "s", Manager, manager_state, manager_state_to_string);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_timer_slack_nsec, "t", (uint64_t) prctl(PR_GET_TIMERSLACK));
+static BUS_DEFINE_PROPERTY_GET_REF(property_get_hashmap_size, "u", Hashmap *, hashmap_size);
+static BUS_DEFINE_PROPERTY_GET_REF(property_get_set_size, "u", Set *, set_size);
+
+static int property_get_virtualization(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int v;
+
+ assert(bus);
+ assert(reply);
+
+ v = detect_virtualization();
+
+ /* Make sure to return the empty string when we detect no virtualization, as that is the API.
+ *
+ * https://github.com/systemd/systemd/issues/1423
+ */
+
+ return sd_bus_message_append(
+ reply, "s",
+ v == VIRTUALIZATION_NONE ? NULL : virtualization_to_string(v));
+}
+
+static int property_get_tainted(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *s = NULL;
+ Manager *m = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ s = manager_taint_string(m);
+ if (!s)
+ return log_oom();
+
+ return sd_bus_message_append(reply, "s", s);
+}
+
+static int property_set_log_target(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ const char *t;
+ int r;
+
+ assert(bus);
+ assert(value);
+
+ r = sd_bus_message_read(value, "s", &t);
+ if (r < 0)
+ return r;
+
+ if (isempty(t))
+ manager_restore_original_log_target(m);
+ else {
+ LogTarget target;
+
+ target = log_target_from_string(t);
+ if (target < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid log target '%s'", t);
+
+ manager_override_log_target(m, target);
+ }
+
+ return 0;
+}
+
+static int property_get_log_level(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = log_level_to_string_alloc(log_get_max_level(), &t);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append(reply, "s", t);
+}
+
+static int property_set_log_level(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ const char *t;
+ int r;
+
+ assert(bus);
+ assert(value);
+
+ r = sd_bus_message_read(value, "s", &t);
+ if (r < 0)
+ return r;
+
+ if (isempty(t))
+ manager_restore_original_log_level(m);
+ else {
+ int level;
+
+ level = log_level_from_string(t);
+ if (level < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid log level '%s'", t);
+
+ manager_override_log_level(m, level);
+ }
+
+ return 0;
+}
+
+static int property_get_progress(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ double d;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ if (MANAGER_IS_FINISHED(m))
+ d = 1.0;
+ else
+ d = 1.0 - ((double) hashmap_size(m->jobs) / (double) m->n_installed_jobs);
+
+ return sd_bus_message_append(reply, "d", d);
+}
+
+static int property_get_environment(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ r = manager_get_effective_environment(m, &l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append_strv(reply, l);
+}
+
+static int property_get_show_status(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ int b;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ b = IN_SET(m->show_status, SHOW_STATUS_TEMPORARY, SHOW_STATUS_YES);
+ return sd_bus_message_append_basic(reply, 'b', &b);
+}
+
+static int property_set_runtime_watchdog(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ usec_t *t = userdata;
+ int r;
+
+ assert(bus);
+ assert(value);
+
+ assert_cc(sizeof(usec_t) == sizeof(uint64_t));
+
+ r = sd_bus_message_read(value, "t", t);
+ if (r < 0)
+ return r;
+
+ return watchdog_set_timeout(t);
+}
+
+static int bus_get_unit_by_name(Manager *m, sd_bus_message *message, const char *name, Unit **ret_unit, sd_bus_error *error) {
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(ret_unit);
+
+ /* More or less a wrapper around manager_get_unit() that generates nice errors and has one trick up its sleeve:
+ * if the name is specified empty we use the client's unit. */
+
+ if (isempty(name)) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+
+ u = manager_get_unit_by_pid(m, pid);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Client not member of any unit.");
+ } else {
+ u = manager_get_unit(m, name);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s not loaded.", name);
+ }
+
+ *ret_unit = u;
+ return 0;
+}
+
+static int bus_load_unit_by_name(Manager *m, sd_bus_message *message, const char *name, Unit **ret_unit, sd_bus_error *error) {
+ assert(m);
+ assert(message);
+ assert(ret_unit);
+
+ /* Pretty much the same as bus_get_unit_by_name(), but we also load the unit if necessary. */
+
+ if (isempty(name))
+ return bus_get_unit_by_name(m, message, name, ret_unit, error);
+
+ return manager_load_unit(m, name, NULL, error, ret_unit);
+}
+
+static int method_get_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *path = NULL;
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_get_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ r = mac_selinux_unit_access_check(u, message, "status", error);
+ if (r < 0)
+ return r;
+
+ path = unit_dbus_path(u);
+ if (!path)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
+static int method_get_unit_by_pid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *path = NULL;
+ Manager *m = userdata;
+ pid_t pid;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+
+ /* Anyone can call this method */
+
+ r = sd_bus_message_read(message, "u", &pid);
+ if (r < 0)
+ return r;
+ if (pid < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid PID " PID_FMT, pid);
+
+ if (pid == 0) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+ }
+
+ u = manager_get_unit_by_pid(m, pid);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_UNIT_FOR_PID, "PID "PID_FMT" does not belong to any loaded unit.", pid);
+
+ r = mac_selinux_unit_access_check(u, message, "status", error);
+ if (r < 0)
+ return r;
+
+ path = unit_dbus_path(u);
+ if (!path)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
+static int method_get_unit_by_invocation_id(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *path = NULL;
+ Manager *m = userdata;
+ sd_id128_t id;
+ const void *a;
+ Unit *u;
+ size_t sz;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = sd_bus_message_read_array(message, 'y', &a, &sz);
+ if (r < 0)
+ return r;
+ if (sz == 0)
+ id = SD_ID128_NULL;
+ else if (sz == 16)
+ memcpy(&id, a, sz);
+ else
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid invocation ID");
+
+ if (sd_id128_is_null(id)) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+
+ u = manager_get_unit_by_pid(m, pid);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Client " PID_FMT " not member of any unit.", pid);
+ } else {
+ u = hashmap_get(m->units_by_invocation_id, &id);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(id));
+ }
+
+ r = mac_selinux_unit_access_check(u, message, "status", error);
+ if (r < 0)
+ return r;
+
+ /* So here's a special trick: the bus path we return actually references the unit by its invocation ID instead
+ * of the unit name. This means it stays valid only as long as the invocation ID stays the same. */
+ path = unit_dbus_path_invocation_id(u);
+ if (!path)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
+static int method_get_unit_by_control_group(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *path = NULL;
+ Manager *m = userdata;
+ const char *cgroup;
+ Unit *u;
+ int r;
+
+ r = sd_bus_message_read(message, "s", &cgroup);
+ if (r < 0)
+ return r;
+
+ u = manager_get_unit_by_cgroup(m, cgroup);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Control group '%s' is not valid or not managed by this instance", cgroup);
+
+ r = mac_selinux_unit_access_check(u, message, "status", error);
+ if (r < 0)
+ return r;
+
+ path = unit_dbus_path(u);
+ if (!path)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
+static int method_load_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *path = NULL;
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_load_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ r = mac_selinux_unit_access_check(u, message, "status", error);
+ if (r < 0)
+ return r;
+
+ path = unit_dbus_path(u);
+ if (!path)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
+static int method_start_unit_generic(sd_bus_message *message, Manager *m, JobType job_type, bool reload_if_possible, sd_bus_error *error) {
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_load_unit(m, name, NULL, error, &u);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_start_generic(message, u, job_type, reload_if_possible, error);
+}
+
+static int method_start_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_START, false, error);
+}
+
+static int method_stop_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_STOP, false, error);
+}
+
+static int method_reload_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_RELOAD, false, error);
+}
+
+static int method_restart_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_RESTART, false, error);
+}
+
+static int method_try_restart_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_TRY_RESTART, false, error);
+}
+
+static int method_reload_or_restart_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_RESTART, true, error);
+}
+
+static int method_reload_or_try_restart_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_TRY_RESTART, true, error);
+}
+
+static int method_start_unit_replace(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *old_name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &old_name);
+ if (r < 0)
+ return r;
+
+ r = bus_get_unit_by_name(m, message, old_name, &u, error);
+ if (r < 0)
+ return r;
+ if (!u->job || u->job->type != JOB_START)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_JOB, "No job queued for unit %s", old_name);
+
+ return method_start_unit_generic(message, m, JOB_START, false, error);
+}
+
+static int method_kill_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_get_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_kill(message, u, error);
+}
+
+static int method_reset_failed_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_get_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_reset_failed(message, u, error);
+}
+
+static int method_set_unit_properties(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_load_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ r = bus_unit_validate_load_state(u, error);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_set_properties(message, u, error);
+}
+
+static int method_ref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_load_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ r = bus_unit_validate_load_state(u, error);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_ref(message, u, error);
+}
+
+static int method_unref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_load_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ r = bus_unit_validate_load_state(u, error);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_unref(message, u, error);
+}
+
+static int reply_unit_info(sd_bus_message *reply, Unit *u) {
+ _cleanup_free_ char *unit_path = NULL, *job_path = NULL;
+ Unit *following;
+
+ following = unit_following(u);
+
+ unit_path = unit_dbus_path(u);
+ if (!unit_path)
+ return -ENOMEM;
+
+ if (u->job) {
+ job_path = job_dbus_path(u->job);
+ if (!job_path)
+ return -ENOMEM;
+ }
+
+ return sd_bus_message_append(
+ reply, "(ssssssouso)",
+ u->id,
+ unit_description(u),
+ unit_load_state_to_string(u->load_state),
+ unit_active_state_to_string(unit_active_state(u)),
+ unit_sub_state_to_string(u),
+ following ? following->id : "",
+ unit_path,
+ u->job ? u->job->id : 0,
+ u->job ? job_type_to_string(u->job->type) : "",
+ job_path ? job_path : "/");
+}
+
+static int method_list_units_by_names(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ int r;
+ char **unit;
+ _cleanup_strv_free_ char **units = NULL;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_strv(message, &units);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssssssouso)");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(unit, units) {
+ Unit *u;
+
+ if (!unit_name_is_valid(*unit, UNIT_NAME_ANY))
+ continue;
+
+ r = bus_load_unit_by_name(m, message, *unit, &u, error);
+ if (r < 0)
+ return r;
+
+ r = reply_unit_info(reply, u);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_get_unit_processes(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_get_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_get_processes(message, u, error);
+}
+
+static int method_attach_processes_to_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_get_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_attach_processes(message, u, error);
+}
+
+static int transient_unit_from_message(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name,
+ Unit **unit,
+ sd_bus_error *error) {
+
+ UnitType t;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(name);
+
+ t = unit_name_to_type(name);
+ if (t < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid unit name or type.");
+
+ if (!unit_vtable[t]->can_transient)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit type %s does not support transient units.", unit_type_to_string(t));
+
+ r = manager_load_unit(m, name, NULL, error, &u);
+ if (r < 0)
+ return r;
+
+ if (!unit_is_pristine(u))
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS, "Unit %s already exists.", name);
+
+ /* OK, the unit failed to load and is unreferenced, now let's
+ * fill in the transient data instead */
+ r = unit_make_transient(u);
+ if (r < 0)
+ return r;
+
+ /* Set our properties */
+ r = bus_unit_set_properties(u, message, UNIT_RUNTIME, false, error);
+ if (r < 0)
+ return r;
+
+ /* If the client asked for it, automatically add a reference to this unit. */
+ if (u->bus_track_add) {
+ r = bus_unit_track_add_sender(u, message);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch sender: %m");
+ }
+
+ /* Now load the missing bits of the unit we just created */
+ unit_add_to_load_queue(u);
+ manager_dispatch_load_queue(m);
+
+ *unit = u;
+
+ return 0;
+}
+
+static int transient_aux_units_from_message(
+ Manager *m,
+ sd_bus_message *message,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(m);
+ assert(message);
+
+ r = sd_bus_message_enter_container(message, 'a', "(sa(sv))");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(message, 'r', "sa(sv)")) > 0) {
+ const char *name = NULL;
+ Unit *u;
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = transient_unit_from_message(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int method_start_transient_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *name, *smode;
+ Manager *m = userdata;
+ JobMode mode;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "start", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "ss", &name, &smode);
+ if (r < 0)
+ return r;
+
+ mode = job_mode_from_string(smode);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Job mode %s is invalid.", smode);
+
+ r = bus_verify_manage_units_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = transient_unit_from_message(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ r = transient_aux_units_from_message(m, message, error);
+ if (r < 0)
+ return r;
+
+ /* Finally, start it */
+ return bus_unit_queue_job(message, u, JOB_START, mode, false, error);
+}
+
+static int method_get_job(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *path = NULL;
+ Manager *m = userdata;
+ uint32_t id;
+ Job *j;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = sd_bus_message_read(message, "u", &id);
+ if (r < 0)
+ return r;
+
+ j = manager_get_job(m, id);
+ if (!j)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_JOB, "Job %u does not exist.", (unsigned) id);
+
+ r = mac_selinux_unit_access_check(j->unit, message, "status", error);
+ if (r < 0)
+ return r;
+
+ path = job_dbus_path(j);
+ if (!path)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
+static int method_cancel_job(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint32_t id;
+ Job *j;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "u", &id);
+ if (r < 0)
+ return r;
+
+ j = manager_get_job(m, id);
+ if (!j)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_JOB, "Job %u does not exist.", (unsigned) id);
+
+ return bus_job_method_cancel(message, j, error);
+}
+
+static int method_clear_jobs(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ manager_clear_jobs(m);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ manager_reset_failed(m);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int list_units_filtered(sd_bus_message *message, void *userdata, sd_bus_error *error, char **states, char **patterns) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ const char *k;
+ Iterator i;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssssssouso)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_KEY(u, k, m->units, i) {
+ if (k != u->id)
+ continue;
+
+ if (!strv_isempty(states) &&
+ !strv_contains(states, unit_load_state_to_string(u->load_state)) &&
+ !strv_contains(states, unit_active_state_to_string(unit_active_state(u))) &&
+ !strv_contains(states, unit_sub_state_to_string(u)))
+ continue;
+
+ if (!strv_isempty(patterns) &&
+ !strv_fnmatch_or_empty(patterns, u->id, FNM_NOESCAPE))
+ continue;
+
+ r = reply_unit_info(reply, u);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_list_units(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return list_units_filtered(message, userdata, error, NULL, NULL);
+}
+
+static int method_list_units_filtered(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **states = NULL;
+ int r;
+
+ r = sd_bus_message_read_strv(message, &states);
+ if (r < 0)
+ return r;
+
+ return list_units_filtered(message, userdata, error, states, NULL);
+}
+
+static int method_list_units_by_patterns(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **states = NULL;
+ _cleanup_strv_free_ char **patterns = NULL;
+ int r;
+
+ r = sd_bus_message_read_strv(message, &states);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &patterns);
+ if (r < 0)
+ return r;
+
+ return list_units_filtered(message, userdata, error, states, patterns);
+}
+
+static int method_list_jobs(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Iterator i;
+ Job *j;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(usssoo)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(j, m->jobs, i) {
+ _cleanup_free_ char *unit_path = NULL, *job_path = NULL;
+
+ job_path = job_dbus_path(j);
+ if (!job_path)
+ return -ENOMEM;
+
+ unit_path = unit_dbus_path(j->unit);
+ if (!unit_path)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(
+ reply, "(usssoo)",
+ j->id,
+ j->unit->id,
+ job_type_to_string(j->type),
+ job_state_to_string(j->state),
+ job_path,
+ unit_path);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_subscribe(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ if (sd_bus_message_get_bus(message) == m->api_bus) {
+
+ /* Note that direct bus connection subscribe by
+ * default, we only track peers on the API bus here */
+
+ if (!m->subscribed) {
+ r = sd_bus_track_new(sd_bus_message_get_bus(message), &m->subscribed, NULL, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_track_add_sender(m->subscribed, message);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, BUS_ERROR_ALREADY_SUBSCRIBED, "Client is already subscribed.");
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_unsubscribe(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ if (sd_bus_message_get_bus(message) == m->api_bus) {
+ r = sd_bus_track_remove_sender(m->subscribed, message);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_SUBSCRIBED, "Client is not subscribed.");
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int dump_impl(sd_bus_message *message, void *userdata, sd_bus_error *error, int (*reply)(sd_bus_message *, char *)) {
+ _cleanup_free_ char *dump = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ r = manager_get_dump_string(m, &dump);
+ if (r < 0)
+ return r;
+
+ return reply(message, dump);
+}
+
+static int reply_dump(sd_bus_message *message, char *dump) {
+ return sd_bus_reply_method_return(message, "s", dump);
+}
+
+static int method_dump(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return dump_impl(message, userdata, error, reply_dump);
+}
+
+static int reply_dump_by_fd(sd_bus_message *message, char *dump) {
+ _cleanup_close_ int fd = -1;
+
+ fd = acquire_data_fd(dump, strlen(dump), 0);
+ if (fd < 0)
+ return fd;
+
+ return sd_bus_reply_method_return(message, "h", fd);
+}
+
+static int method_dump_by_fd(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return dump_impl(message, userdata, error, reply_dump_by_fd);
+}
+
+static int method_refuse_snapshot(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Support for snapshots has been removed.");
+}
+
+static int verify_run_space(const char *message, sd_bus_error *error) {
+ struct statvfs svfs;
+ uint64_t available;
+
+ if (statvfs("/run/systemd", &svfs) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to statvfs(/run/systemd): %m");
+
+ available = (uint64_t) svfs.f_bfree * (uint64_t) svfs.f_bsize;
+
+ if (available < RELOAD_DISK_SPACE_MIN) {
+ char fb_available[FORMAT_BYTES_MAX], fb_need[FORMAT_BYTES_MAX];
+ return sd_bus_error_setf(error,
+ BUS_ERROR_DISK_FULL,
+ "%s, not enough space available on /run/systemd. "
+ "Currently, %s are free, but a safety buffer of %s is enforced.",
+ message,
+ format_bytes(fb_available, sizeof(fb_available), available),
+ format_bytes(fb_need, sizeof(fb_need), RELOAD_DISK_SPACE_MIN));
+ }
+
+ return 0;
+}
+
+int verify_run_space_and_log(const char *message) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ r = verify_run_space(message, &error);
+ if (r < 0)
+ return log_error_errno(r, "%s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int method_reload(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = verify_run_space("Refusing to reload", error);
+ if (r < 0)
+ return r;
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_reload_daemon_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ /* Instead of sending the reply back right away, we just
+ * remember that we need to and then send it after the reload
+ * is finished. That way the caller knows when the reload
+ * finished. */
+
+ assert(!m->pending_reload_message);
+ r = sd_bus_message_new_method_return(message, &m->pending_reload_message);
+ if (r < 0)
+ return r;
+
+ m->objective = MANAGER_RELOAD;
+
+ return 1;
+}
+
+static int method_reexecute(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = verify_run_space("Refusing to reexecute", error);
+ if (r < 0)
+ return r;
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_reload_daemon_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ /* We don't send a reply back here, the client should
+ * just wait for us disconnecting. */
+
+ m->objective = MANAGER_REEXECUTE;
+ return 1;
+}
+
+static int method_exit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "halt", error);
+ if (r < 0)
+ return r;
+
+ /* Exit() (in contrast to SetExitCode()) is actually allowed even if
+ * we are running on the host. It will fall back on reboot() in
+ * systemd-shutdown if it cannot do the exit() because it isn't a
+ * container. */
+
+ m->objective = MANAGER_EXIT;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_reboot(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reboot", error);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Reboot is only supported for system managers.");
+
+ m->objective = MANAGER_REBOOT;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_poweroff(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "halt", error);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Powering off is only supported for system managers.");
+
+ m->objective = MANAGER_POWEROFF;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_halt(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "halt", error);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Halt is only supported for system managers.");
+
+ m->objective = MANAGER_HALT;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_kexec(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reboot", error);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "KExec is only supported for system managers.");
+
+ m->objective = MANAGER_KEXEC;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_switch_root(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ char *ri = NULL, *rt = NULL;
+ const char *root, *init;
+ Manager *m = userdata;
+ struct statvfs svfs;
+ uint64_t available;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ if (statvfs("/run/systemd", &svfs) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to statvfs(/run/systemd): %m");
+
+ available = (uint64_t) svfs.f_bfree * (uint64_t) svfs.f_bsize;
+
+ if (available < RELOAD_DISK_SPACE_MIN) {
+ char fb_available[FORMAT_BYTES_MAX], fb_need[FORMAT_BYTES_MAX];
+ log_warning("Dangerously low amount of free space on /run/systemd, root switching operation might not complete successfully. "
+ "Currently, %s are free, but %s are suggested. Proceeding anyway.",
+ format_bytes(fb_available, sizeof(fb_available), available),
+ format_bytes(fb_need, sizeof(fb_need), RELOAD_DISK_SPACE_MIN));
+ }
+
+ r = mac_selinux_access_check(message, "reboot", error);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Root switching is only supported by system manager.");
+
+ r = sd_bus_message_read(message, "ss", &root, &init);
+ if (r < 0)
+ return r;
+
+ if (isempty(root))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New root directory may not be the empty string.");
+ if (!path_is_absolute(root))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New root path '%s' is not absolute.", root);
+ if (path_equal(root, "/"))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New root directory cannot be the old root directory.");
+
+ /* Safety check */
+ if (isempty(init)) {
+ r = path_is_os_tree(root);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to determine whether root path '%s' contains an OS tree: %m", root);
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Specified switch root path '%s' does not seem to be an OS tree. os-release file is missing.", root);
+ } else {
+ _cleanup_free_ char *chased = NULL;
+
+ if (!path_is_absolute(init))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path to init binary '%s' not absolute.", init);
+
+ r = chase_symlinks(init, root, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &chased);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Could not resolve init executable %s: %m", init);
+
+ if (laccess(chased, X_OK) < 0) {
+ if (errno == EACCES)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Init binary %s is not executable.", init);
+
+ return sd_bus_error_set_errnof(error, r, "Could not check whether init binary %s is executable: %m", init);
+ }
+ }
+
+ rt = strdup(root);
+ if (!rt)
+ return -ENOMEM;
+
+ if (!isempty(init)) {
+ ri = strdup(init);
+ if (!ri) {
+ free(rt);
+ return -ENOMEM;
+ }
+ }
+
+ free(m->switch_root);
+ m->switch_root = rt;
+
+ free(m->switch_root_init);
+ m->switch_root_init = ri;
+
+ m->objective = MANAGER_SWITCH_ROOT;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_set_environment(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **plus = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &plus);
+ if (r < 0)
+ return r;
+ if (!strv_env_is_valid(plus))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment assignments");
+
+ r = bus_verify_set_environment_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = manager_client_environment_modify(m, NULL, plus);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_unset_environment(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **minus = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &minus);
+ if (r < 0)
+ return r;
+
+ if (!strv_env_name_or_assignment_is_valid(minus))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment variable names or assignments");
+
+ r = bus_verify_set_environment_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = manager_client_environment_modify(m, minus, NULL);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_unset_and_set_environment(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **minus = NULL, **plus = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &minus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &plus);
+ if (r < 0)
+ return r;
+
+ if (!strv_env_name_or_assignment_is_valid(minus))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment variable names or assignments");
+ if (!strv_env_is_valid(plus))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment assignments");
+
+ r = bus_verify_set_environment_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = manager_client_environment_modify(m, minus, plus);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_set_exit_code(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint8_t code;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "exit", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_basic(message, 'y', &code);
+ if (r < 0)
+ return r;
+
+ if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "ExitCode can only be set for user service managers or in containers.");
+
+ m->return_value = code;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_lookup_dynamic_user_by_name(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_basic(message, 's', &name);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance.");
+ if (!valid_user_group_name(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User name invalid: %s", name);
+
+ r = dynamic_user_lookup_name(m, name, &uid);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user %s does not exist.", name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "u", (uint32_t) uid);
+}
+
+static int method_lookup_dynamic_user_by_uid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *name = NULL;
+ Manager *m = userdata;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(uid) == sizeof(uint32_t));
+ r = sd_bus_message_read_basic(message, 'u', &uid);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance.");
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User ID invalid: " UID_FMT, uid);
+
+ r = dynamic_user_lookup_uid(m, uid, &name);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user ID " UID_FMT " does not exist.", uid);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", name);
+}
+
+static int method_get_dynamic_users(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ DynamicUser *d;
+ Iterator i;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance.");
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(us)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(d, m->dynamic_users, i) {
+ uid_t uid;
+
+ r = dynamic_user_current(d, &uid);
+ if (r == -EAGAIN) /* not realized yet? */
+ continue;
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Failed to lookup a dynamic user.");
+
+ r = sd_bus_message_append(reply, "(us)", uid, d->name);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int list_unit_files_by_patterns(sd_bus_message *message, void *userdata, sd_bus_error *error, char **states, char **patterns) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ UnitFileList *item;
+ Hashmap *h;
+ Iterator i;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ h = hashmap_new(&string_hash_ops);
+ if (!h)
+ return -ENOMEM;
+
+ r = unit_file_get_list(m->unit_file_scope, NULL, h, states, patterns);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ goto fail;
+
+ HASHMAP_FOREACH(item, h, i) {
+
+ r = sd_bus_message_append(reply, "(ss)", item->path, unit_file_state_to_string(item->state));
+ if (r < 0)
+ goto fail;
+ }
+
+ unit_file_list_free(h);
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+
+fail:
+ unit_file_list_free(h);
+ return r;
+}
+
+static int method_list_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return list_unit_files_by_patterns(message, userdata, error, NULL, NULL);
+}
+
+static int method_list_unit_files_by_patterns(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **states = NULL;
+ _cleanup_strv_free_ char **patterns = NULL;
+ int r;
+
+ r = sd_bus_message_read_strv(message, &states);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &patterns);
+ if (r < 0)
+ return r;
+
+ return list_unit_files_by_patterns(message, userdata, error, states, patterns);
+}
+
+static int method_get_unit_file_state(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ UnitFileState state;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = unit_file_get_state(m->unit_file_scope, NULL, name, &state);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", unit_file_state_to_string(state));
+}
+
+static int method_get_default_target(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *default_target = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ r = unit_file_get_default(m->unit_file_scope, NULL, &default_target);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", default_target);
+}
+
+static int send_unit_files_changed(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *message = NULL;
+ int r;
+
+ assert(bus);
+
+ r = sd_bus_message_new_signal(bus, &message, "/org/freedesktop/systemd1", "org.freedesktop.systemd1.Manager", "UnitFilesChanged");
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, message, NULL);
+}
+
+/* Create an error reply, using the error information from changes[]
+ * if possible, and fall back to generating an error from error code c.
+ * The error message only describes the first error.
+ *
+ * Coordinate with unit_file_dump_changes() in install.c.
+ */
+static int install_error(
+ sd_bus_error *error,
+ int c,
+ UnitFileChange *changes,
+ size_t n_changes) {
+
+ size_t i;
+ int r;
+
+ for (i = 0; i < n_changes; i++)
+
+ switch(changes[i].type) {
+
+ case 0 ... INT_MAX:
+ continue;
+
+ case -EEXIST:
+ if (changes[i].source)
+ r = sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS,
+ "File %s already exists and is a symlink to %s.",
+ changes[i].path, changes[i].source);
+ else
+ r = sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS,
+ "File %s already exists.",
+ changes[i].path);
+ goto found;
+
+ case -ERFKILL:
+ r = sd_bus_error_setf(error, BUS_ERROR_UNIT_MASKED,
+ "Unit file %s is masked.", changes[i].path);
+ goto found;
+
+ case -EADDRNOTAVAIL:
+ r = sd_bus_error_setf(error, BUS_ERROR_UNIT_GENERATED,
+ "Unit %s is transient or generated.", changes[i].path);
+ goto found;
+
+ case -ELOOP:
+ r = sd_bus_error_setf(error, BUS_ERROR_UNIT_LINKED,
+ "Refusing to operate on linked unit file %s", changes[i].path);
+ goto found;
+
+ case -ENOENT:
+ r = sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unit file %s does not exist.", changes[i].path);
+ goto found;
+
+ default:
+ r = sd_bus_error_set_errnof(error, changes[i].type, "File %s: %m", changes[i].path);
+ goto found;
+ }
+
+ r = c < 0 ? c : -EINVAL;
+
+ found:
+ unit_file_changes_free(changes, n_changes);
+ return r;
+}
+
+static int reply_unit_file_changes_and_free(
+ Manager *m,
+ sd_bus_message *message,
+ int carries_install_info,
+ UnitFileChange *changes,
+ size_t n_changes,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ bool bad = false, good = false;
+ size_t i;
+ int r;
+
+ if (unit_file_changes_have_modification(changes, n_changes)) {
+ r = bus_foreach_bus(m, NULL, send_unit_files_changed, NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send UnitFilesChanged signal: %m");
+ }
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ goto fail;
+
+ if (carries_install_info >= 0) {
+ r = sd_bus_message_append(reply, "b", carries_install_info);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = sd_bus_message_open_container(reply, 'a', "(sss)");
+ if (r < 0)
+ goto fail;
+
+ for (i = 0; i < n_changes; i++) {
+
+ if (changes[i].type < 0) {
+ bad = true;
+ continue;
+ }
+
+ r = sd_bus_message_append(
+ reply, "(sss)",
+ unit_file_change_type_to_string(changes[i].type),
+ changes[i].path,
+ changes[i].source);
+ if (r < 0)
+ goto fail;
+
+ good = true;
+ }
+
+ /* If there was a failed change, and no successful change, then return the first failure as proper method call
+ * error. */
+ if (bad && !good)
+ return install_error(error, 0, changes, n_changes);
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ goto fail;
+
+ unit_file_changes_free(changes, n_changes);
+ return sd_bus_send(NULL, reply, NULL);
+
+fail:
+ unit_file_changes_free(changes, n_changes);
+ return r;
+}
+
+static int method_enable_unit_files_generic(
+ sd_bus_message *message,
+ Manager *m,
+ int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, size_t *n_changes),
+ bool carries_install_info,
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ UnitFileFlags flags;
+ int runtime, force, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "bb", &runtime, &force);
+ if (r < 0)
+ return r;
+
+ flags = unit_file_bools_to_flags(runtime, force);
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = call(m->unit_file_scope, flags, NULL, l, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, carries_install_info ? r : -1, changes, n_changes, error);
+}
+
+static int method_enable_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_enable_unit_files_generic(message, userdata, unit_file_enable, true, error);
+}
+
+static int method_reenable_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_enable_unit_files_generic(message, userdata, unit_file_reenable, true, error);
+}
+
+static int method_link_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_enable_unit_files_generic(message, userdata, unit_file_link, false, error);
+}
+
+static int unit_file_preset_without_mode(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char **files, UnitFileChange **changes, size_t *n_changes) {
+ return unit_file_preset(scope, flags, root_dir, files, UNIT_FILE_PRESET_FULL, changes, n_changes);
+}
+
+static int method_preset_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_enable_unit_files_generic(message, userdata, unit_file_preset_without_mode, true, error);
+}
+
+static int method_mask_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_enable_unit_files_generic(message, userdata, unit_file_mask, false, error);
+}
+
+static int method_preset_unit_files_with_mode(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ Manager *m = userdata;
+ UnitFilePresetMode mm;
+ int runtime, force, r;
+ UnitFileFlags flags;
+ const char *mode;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "sbb", &mode, &runtime, &force);
+ if (r < 0)
+ return r;
+
+ flags = unit_file_bools_to_flags(runtime, force);
+
+ if (isempty(mode))
+ mm = UNIT_FILE_PRESET_FULL;
+ else {
+ mm = unit_file_preset_mode_from_string(mode);
+ if (mm < 0)
+ return -EINVAL;
+ }
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = unit_file_preset(m->unit_file_scope, flags, NULL, l, mm, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, r, changes, n_changes, error);
+}
+
+static int method_disable_unit_files_generic(
+ sd_bus_message *message,
+ Manager *m,
+ int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, size_t *n_changes),
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ int r, runtime;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &runtime);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = call(m->unit_file_scope, runtime ? UNIT_FILE_RUNTIME : 0, NULL, l, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes, error);
+}
+
+static int method_disable_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_disable_unit_files_generic(message, userdata, unit_file_disable, error);
+}
+
+static int method_unmask_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_disable_unit_files_generic(message, userdata, unit_file_unmask, error);
+}
+
+static int method_revert_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = unit_file_revert(m->unit_file_scope, NULL, l, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes, error);
+}
+
+static int method_set_default_target(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ Manager *m = userdata;
+ const char *name;
+ int force, r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "enable", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "sb", &name, &force);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = unit_file_set_default(m->unit_file_scope, force ? UNIT_FILE_FORCE : 0, NULL, name, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes, error);
+}
+
+static int method_preset_all_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ Manager *m = userdata;
+ UnitFilePresetMode mm;
+ const char *mode;
+ UnitFileFlags flags;
+ int force, runtime, r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "enable", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "sbb", &mode, &runtime, &force);
+ if (r < 0)
+ return r;
+
+ flags = unit_file_bools_to_flags(runtime, force);
+
+ if (isempty(mode))
+ mm = UNIT_FILE_PRESET_FULL;
+ else {
+ mm = unit_file_preset_mode_from_string(mode);
+ if (mm < 0)
+ return -EINVAL;
+ }
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = unit_file_preset_all(m->unit_file_scope, flags, NULL, mm, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes, error);
+}
+
+static int method_add_dependency_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ int runtime, force, r;
+ char *target, *type;
+ UnitDependency dep;
+ UnitFileFlags flags;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "ssbb", &target, &type, &runtime, &force);
+ if (r < 0)
+ return r;
+
+ flags = unit_file_bools_to_flags(runtime, force);
+
+ dep = unit_dependency_from_string(type);
+ if (dep < 0)
+ return -EINVAL;
+
+ r = unit_file_add_dependency(m->unit_file_scope, flags, NULL, l, target, dep, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes, error);
+}
+
+static int method_get_unit_file_links(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0, i;
+ UnitFileFlags flags;
+ const char *name;
+ char **p;
+ int runtime, r;
+
+ r = sd_bus_message_read(message, "sb", &name, &runtime);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, SD_BUS_TYPE_ARRAY, "s");
+ if (r < 0)
+ return r;
+
+ p = STRV_MAKE(name);
+ flags = UNIT_FILE_DRY_RUN |
+ (runtime ? UNIT_FILE_RUNTIME : 0);
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, flags, NULL, p, &changes, &n_changes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file links for %s: %m", name);
+
+ for (i = 0; i < n_changes; i++)
+ if (changes[i].type == UNIT_FILE_UNLINK) {
+ r = sd_bus_message_append(reply, "s", changes[i].path);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_get_job_waiting(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint32_t id;
+ Job *j;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "u", &id);
+ if (r < 0)
+ return r;
+
+ j = manager_get_job(m, id);
+ if (!j)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_JOB, "Job %u does not exist.", (unsigned) id);
+
+ return bus_job_method_get_waiting_jobs(message, j, error);
+}
+
+static int method_abandon_scope(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_get_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ if (u->type != UNIT_SCOPE)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit '%s' is not a scope unit, refusing.", name);
+
+ return bus_scope_method_abandon(message, u, error);
+}
+
+const sd_bus_vtable bus_manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Version", "s", property_get_version, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Features", "s", property_get_features, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Virtualization", "s", property_get_virtualization, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Architecture", "s", property_get_architecture, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Tainted", "s", property_get_tainted, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("FirmwareTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_FIRMWARE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("LoaderTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_LOADER]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("KernelTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_KERNEL]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("UserspaceTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_USERSPACE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("FinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("SecurityStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_SECURITY_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("SecurityFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("GeneratorsStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_GENERATORS_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("GeneratorsFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_GENERATORS_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("UnitsLoadStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_UNITS_LOAD_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("UnitsLoadFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_UNITS_LOAD_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDSecurityStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_SECURITY_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDSecurityFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDGeneratorsStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_GENERATORS_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDGeneratorsFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDUnitsLoadStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDUnitsLoadFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_WRITABLE_PROPERTY("LogLevel", "s", property_get_log_level, property_set_log_level, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("LogTarget", "s", property_get_log_target, property_set_log_target, 0, 0),
+ SD_BUS_PROPERTY("NNames", "u", property_get_hashmap_size, offsetof(Manager, units), 0),
+ SD_BUS_PROPERTY("NFailedUnits", "u", property_get_set_size, offsetof(Manager, failed_units), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("NJobs", "u", property_get_hashmap_size, offsetof(Manager, jobs), 0),
+ SD_BUS_PROPERTY("NInstalledJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_installed_jobs), 0),
+ SD_BUS_PROPERTY("NFailedJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_failed_jobs), 0),
+ SD_BUS_PROPERTY("Progress", "d", property_get_progress, 0, 0),
+ SD_BUS_PROPERTY("Environment", "as", property_get_environment, 0, 0),
+ SD_BUS_PROPERTY("ConfirmSpawn", "b", bus_property_get_bool, offsetof(Manager, confirm_spawn), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ShowStatus", "b", property_get_show_status, 0, 0),
+ SD_BUS_PROPERTY("UnitPath", "as", NULL, offsetof(Manager, lookup_paths.search_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultStandardOutput", "s", bus_property_get_exec_output, offsetof(Manager, default_std_output), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultStandardError", "s", bus_property_get_exec_output, offsetof(Manager, default_std_output), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_WRITABLE_PROPERTY("RuntimeWatchdogUSec", "t", bus_property_get_usec, property_set_runtime_watchdog, offsetof(Manager, runtime_watchdog), 0),
+ SD_BUS_WRITABLE_PROPERTY("ShutdownWatchdogUSec", "t", bus_property_get_usec, bus_property_set_usec, offsetof(Manager, shutdown_watchdog), 0),
+ SD_BUS_WRITABLE_PROPERTY("ServiceWatchdogs", "b", bus_property_get_bool, bus_property_set_bool, offsetof(Manager, service_watchdogs), 0),
+ SD_BUS_PROPERTY("ControlGroup", "s", NULL, offsetof(Manager, cgroup_root), 0),
+ SD_BUS_PROPERTY("SystemState", "s", property_get_system_state, 0, 0),
+ SD_BUS_PROPERTY("ExitCode", "y", bus_property_get_unsigned, offsetof(Manager, return_value), 0),
+ SD_BUS_PROPERTY("DefaultTimerAccuracyUSec", "t", bus_property_get_usec, offsetof(Manager, default_timer_accuracy_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultTimeoutStartUSec", "t", bus_property_get_usec, offsetof(Manager, default_timeout_start_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultTimeoutStopUSec", "t", bus_property_get_usec, offsetof(Manager, default_timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultRestartUSec", "t", bus_property_get_usec, offsetof(Manager, default_restart_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultStartLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Manager, default_start_limit_interval), SD_BUS_VTABLE_PROPERTY_CONST),
+ /* The following two items are obsolete alias */
+ SD_BUS_PROPERTY("DefaultStartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Manager, default_start_limit_interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("DefaultStartLimitInterval", "t", bus_property_get_usec, offsetof(Manager, default_start_limit_interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("DefaultStartLimitBurst", "u", bus_property_get_unsigned, offsetof(Manager, default_start_limit_burst), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultCPUAccounting", "b", bus_property_get_bool, offsetof(Manager, default_cpu_accounting), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultBlockIOAccounting", "b", bus_property_get_bool, offsetof(Manager, default_blockio_accounting), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultMemoryAccounting", "b", bus_property_get_bool, offsetof(Manager, default_memory_accounting), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultTasksAccounting", "b", bus_property_get_bool, offsetof(Manager, default_tasks_accounting), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitCPU", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_CPU]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitCPUSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_CPU]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitFSIZE", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_FSIZE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitFSIZESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_FSIZE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitDATA", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_DATA]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitDATASoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_DATA]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitSTACK", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_STACK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitSTACKSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_STACK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitCORE", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_CORE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitCORESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_CORE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitRSS", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RSS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitRSSSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RSS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitNOFILE", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_NOFILE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitNOFILESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_NOFILE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitAS", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_AS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitASSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_AS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitNPROC", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_NPROC]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitNPROCSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_NPROC]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitMEMLOCK", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_MEMLOCK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitMEMLOCKSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_MEMLOCK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitLOCKS", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_LOCKS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitLOCKSSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_LOCKS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitSIGPENDING", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_SIGPENDING]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitSIGPENDINGSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_SIGPENDING]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitMSGQUEUE", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_MSGQUEUE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitMSGQUEUESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_MSGQUEUE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitNICE", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_NICE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitNICESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_NICE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitRTPRIO", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTPRIO]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitRTPRIOSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTPRIO]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitRTTIME", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultTasksMax", "t", NULL, offsetof(Manager, default_tasks_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+
+ SD_BUS_METHOD("GetUnit", "s", "o", method_get_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUnitByPID", "u", "o", method_get_unit_by_pid, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUnitByInvocationID", "ay", "o", method_get_unit_by_invocation_id, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUnitByControlGroup", "s", "o", method_get_unit_by_control_group, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LoadUnit", "s", "o", method_load_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("StartUnit", "ss", "o", method_start_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("StartUnitReplace", "sss", "o", method_start_unit_replace, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("StopUnit", "ss", "o", method_stop_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ReloadUnit", "ss", "o", method_reload_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RestartUnit", "ss", "o", method_restart_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("TryRestartUnit", "ss", "o", method_try_restart_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ReloadOrRestartUnit", "ss", "o", method_reload_or_restart_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ReloadOrTryRestartUnit", "ss", "o", method_reload_or_try_restart_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("KillUnit", "ssi", NULL, method_kill_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ResetFailedUnit", "s", NULL, method_reset_failed_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetUnitProperties", "sba(sv)", NULL, method_set_unit_properties, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RefUnit", "s", NULL, method_ref_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("UnrefUnit", "s", NULL, method_unref_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("AttachProcessesToUnit", "ssau", NULL, method_attach_processes_to_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("AbandonScope", "s", NULL, method_abandon_scope, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetJobAfter", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetJobBefore", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CancelJob", "u", NULL, method_cancel_job, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ClearJobs", NULL, NULL, method_clear_jobs, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ResetFailed", NULL, NULL, method_reset_failed, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListUnits", NULL, "a(ssssssouso)", method_list_units, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListUnitsFiltered", "as", "a(ssssssouso)", method_list_units_filtered, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListUnitsByPatterns", "asas", "a(ssssssouso)", method_list_units_by_patterns, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListUnitsByNames", "as", "a(ssssssouso)", method_list_units_by_names, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListJobs", NULL, "a(usssoo)", method_list_jobs, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Subscribe", NULL, NULL, method_subscribe, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Unsubscribe", NULL, NULL, method_unsubscribe, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Dump", NULL, "s", method_dump, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("DumpByFileDescriptor", NULL, "h", method_dump_by_fd, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CreateSnapshot", "sb", "o", method_refuse_snapshot, SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_METHOD("RemoveSnapshot", "s", NULL, method_refuse_snapshot, SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_METHOD("Reload", NULL, NULL, method_reload, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Reexecute", NULL, NULL, method_reexecute, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Exit", NULL, NULL, method_exit, 0),
+ SD_BUS_METHOD("Reboot", NULL, NULL, method_reboot, SD_BUS_VTABLE_CAPABILITY(CAP_SYS_BOOT)),
+ SD_BUS_METHOD("PowerOff", NULL, NULL, method_poweroff, SD_BUS_VTABLE_CAPABILITY(CAP_SYS_BOOT)),
+ SD_BUS_METHOD("Halt", NULL, NULL, method_halt, SD_BUS_VTABLE_CAPABILITY(CAP_SYS_BOOT)),
+ SD_BUS_METHOD("KExec", NULL, NULL, method_kexec, SD_BUS_VTABLE_CAPABILITY(CAP_SYS_BOOT)),
+ SD_BUS_METHOD("SwitchRoot", "ss", NULL, method_switch_root, SD_BUS_VTABLE_CAPABILITY(CAP_SYS_BOOT)),
+ SD_BUS_METHOD("SetEnvironment", "as", NULL, method_set_environment, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("UnsetEnvironment", "as", NULL, method_unset_environment, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("UnsetAndSetEnvironment", "asas", NULL, method_unset_and_set_environment, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListUnitFiles", NULL, "a(ss)", method_list_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListUnitFilesByPatterns", "asas", "a(ss)", method_list_unit_files_by_patterns, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUnitFileState", "s", "s", method_get_unit_file_state, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("EnableUnitFiles", "asbb", "ba(sss)", method_enable_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("DisableUnitFiles", "asb", "a(sss)", method_disable_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ReenableUnitFiles", "asbb", "ba(sss)", method_reenable_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LinkUnitFiles", "asbb", "a(sss)", method_link_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("PresetUnitFiles", "asbb", "ba(sss)", method_preset_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("PresetUnitFilesWithMode", "assbb", "ba(sss)", method_preset_unit_files_with_mode, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("MaskUnitFiles", "asbb", "a(sss)", method_mask_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("UnmaskUnitFiles", "asb", "a(sss)", method_unmask_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RevertUnitFiles", "as", "a(sss)", method_revert_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetDefaultTarget", "sb", "a(sss)", method_set_default_target, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetDefaultTarget", NULL, "s", method_get_default_target, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("PresetAllUnitFiles", "sbb", "a(sss)", method_preset_all_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("AddDependencyUnitFiles", "asssbb", "a(sss)", method_add_dependency_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUnitFileLinks", "sb", "as", method_get_unit_file_links, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetExitCode", "y", NULL, method_set_exit_code, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LookupDynamicUserByName", "s", "u", method_lookup_dynamic_user_by_name, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LookupDynamicUserByUID", "u", "s", method_lookup_dynamic_user_by_uid, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetDynamicUsers", NULL, "a(us)", method_get_dynamic_users, SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_SIGNAL("UnitNew", "so", 0),
+ SD_BUS_SIGNAL("UnitRemoved", "so", 0),
+ SD_BUS_SIGNAL("JobNew", "uos", 0),
+ SD_BUS_SIGNAL("JobRemoved", "uoss", 0),
+ SD_BUS_SIGNAL("StartupFinished", "tttttt", 0),
+ SD_BUS_SIGNAL("UnitFilesChanged", NULL, 0),
+ SD_BUS_SIGNAL("Reloading", "b", 0),
+
+ SD_BUS_VTABLE_END
+};
+
+static int send_finished(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *message = NULL;
+ usec_t *times = userdata;
+ int r;
+
+ assert(bus);
+ assert(times);
+
+ r = sd_bus_message_new_signal(bus, &message, "/org/freedesktop/systemd1", "org.freedesktop.systemd1.Manager", "StartupFinished");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(message, "tttttt", times[0], times[1], times[2], times[3], times[4], times[5]);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, message, NULL);
+}
+
+void bus_manager_send_finished(
+ Manager *m,
+ usec_t firmware_usec,
+ usec_t loader_usec,
+ usec_t kernel_usec,
+ usec_t initrd_usec,
+ usec_t userspace_usec,
+ usec_t total_usec) {
+
+ int r;
+
+ assert(m);
+
+ r = bus_foreach_bus(
+ m,
+ NULL,
+ send_finished,
+ (usec_t[6]) {
+ firmware_usec,
+ loader_usec,
+ kernel_usec,
+ initrd_usec,
+ userspace_usec,
+ total_usec
+ });
+ if (r < 0)
+ log_debug_errno(r, "Failed to send finished signal: %m");
+}
+
+static int send_reloading(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *message = NULL;
+ int r;
+
+ assert(bus);
+
+ r = sd_bus_message_new_signal(bus, &message, "/org/freedesktop/systemd1", "org.freedesktop.systemd1.Manager", "Reloading");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(message, "b", PTR_TO_INT(userdata));
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, message, NULL);
+}
+
+void bus_manager_send_reloading(Manager *m, bool active) {
+ int r;
+
+ assert(m);
+
+ r = bus_foreach_bus(m, NULL, send_reloading, INT_TO_PTR(active));
+ if (r < 0)
+ log_debug_errno(r, "Failed to send reloading signal: %m");
+}
+
+static int send_changed_signal(sd_bus *bus, void *userdata) {
+ assert(bus);
+
+ return sd_bus_emit_properties_changed_strv(bus,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ NULL);
+}
+
+void bus_manager_send_change_signal(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = bus_foreach_bus(m, NULL, send_changed_signal, NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send manager change signal: %m");
+}
diff --git a/src/core/dbus-manager.h b/src/core/dbus-manager.h
new file mode 100644
index 0000000..d0306ad
--- /dev/null
+++ b/src/core/dbus-manager.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus-vtable.h"
+
+#include "manager.h"
+
+extern const sd_bus_vtable bus_manager_vtable[];
+
+void bus_manager_send_finished(Manager *m, usec_t firmware_usec, usec_t loader_usec, usec_t kernel_usec, usec_t initrd_usec, usec_t userspace_usec, usec_t total_usec);
+void bus_manager_send_reloading(Manager *m, bool active);
+void bus_manager_send_change_signal(Manager *m);
+
+int verify_run_space_and_log(const char *message);
diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c
new file mode 100644
index 0000000..b6d6162
--- /dev/null
+++ b/src/core/dbus-mount.c
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "bus-util.h"
+#include "dbus-cgroup.h"
+#include "dbus-execute.h"
+#include "dbus-kill.h"
+#include "dbus-mount.h"
+#include "dbus-util.h"
+#include "mount.h"
+#include "string-util.h"
+#include "unit.h"
+
+static const char *mount_get_what(const Mount *m) {
+ if (m->from_proc_self_mountinfo && m->parameters_proc_self_mountinfo.what)
+ return m->parameters_proc_self_mountinfo.what;
+ if (m->from_fragment && m->parameters_fragment.what)
+ return m->parameters_fragment.what;
+ return NULL;
+}
+
+static const char *mount_get_options(const Mount *m) {
+ if (m->from_proc_self_mountinfo && m->parameters_proc_self_mountinfo.options)
+ return m->parameters_proc_self_mountinfo.options;
+ if (m->from_fragment && m->parameters_fragment.options)
+ return m->parameters_fragment.options;
+ return NULL;
+}
+
+static const char *mount_get_fstype(const Mount *m) {
+ if (m->from_proc_self_mountinfo && m->parameters_proc_self_mountinfo.fstype)
+ return m->parameters_proc_self_mountinfo.fstype;
+ else if (m->from_fragment && m->parameters_fragment.fstype)
+ return m->parameters_fragment.fstype;
+ return NULL;
+}
+
+static BUS_DEFINE_PROPERTY_GET(property_get_what, "s", Mount, mount_get_what);
+static BUS_DEFINE_PROPERTY_GET(property_get_options, "s", Mount, mount_get_options);
+static BUS_DEFINE_PROPERTY_GET(property_get_type, "s", Mount, mount_get_fstype);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, mount_result, MountResult);
+
+const sd_bus_vtable bus_mount_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Where", "s", NULL, offsetof(Mount, where), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("What", "s", property_get_what, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Options","s", property_get_options, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Type", "s", property_get_type, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Mount, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Mount, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Mount, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SloppyOptions", "b", bus_property_get_bool, offsetof(Mount, sloppy_options), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LazyUnmount", "b", bus_property_get_bool, offsetof(Mount, lazy_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ForceUnmount", "b", bus_property_get_bool, offsetof(Mount, force_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Mount, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_EXEC_COMMAND_VTABLE("ExecMount", offsetof(Mount, exec_command[MOUNT_EXEC_MOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_VTABLE("ExecUnmount", offsetof(Mount, exec_command[MOUNT_EXEC_UNMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_VTABLE("ExecRemount", offsetof(Mount, exec_command[MOUNT_EXEC_REMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_mount_set_transient_property(
+ Mount *m,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Unit *u = UNIT(m);
+
+ assert(m);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "Where"))
+ return bus_set_transient_path(u, name, &m->where, message, flags, error);
+
+ if (streq(name, "What"))
+ return bus_set_transient_string(u, name, &m->parameters_fragment.what, message, flags, error);
+
+ if (streq(name, "Options"))
+ return bus_set_transient_string(u, name, &m->parameters_fragment.options, message, flags, error);
+
+ if (streq(name, "Type"))
+ return bus_set_transient_string(u, name, &m->parameters_fragment.fstype, message, flags, error);
+
+ if (streq(name, "TimeoutUSec"))
+ return bus_set_transient_usec_fix_0(u, name, &m->timeout_usec, message, flags, error);
+
+ if (streq(name, "DirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &m->directory_mode, message, flags, error);
+
+ if (streq(name, "SloppyOptions"))
+ return bus_set_transient_bool(u, name, &m->sloppy_options, message, flags, error);
+
+ if (streq(name, "LazyUnmount"))
+ return bus_set_transient_bool(u, name, &m->lazy_unmount, message, flags, error);
+
+ if (streq(name, "ForceUnmount"))
+ return bus_set_transient_bool(u, name, &m->force_unmount, message, flags, error);
+
+ return 0;
+}
+
+int bus_mount_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Mount *m = MOUNT(u);
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(message);
+
+ r = bus_cgroup_set_property(u, &m->cgroup_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ if (u->transient && u->load_state == UNIT_STUB) {
+ /* This is a transient unit, let's load a little more */
+
+ r = bus_mount_set_transient_property(m, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_exec_context_set_transient_property(u, &m->exec_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_kill_context_set_transient_property(u, &m->kill_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_mount_commit_properties(Unit *u) {
+ assert(u);
+
+ unit_invalidate_cgroup_members_masks(u);
+ unit_realize_cgroup(u);
+
+ return 0;
+}
diff --git a/src/core/dbus-mount.h b/src/core/dbus-mount.h
new file mode 100644
index 0000000..f7112a9
--- /dev/null
+++ b/src/core/dbus-mount.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_mount_vtable[];
+
+int bus_mount_set_property(Unit *u, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_mount_commit_properties(Unit *u);
diff --git a/src/core/dbus-path.c b/src/core/dbus-path.c
new file mode 100644
index 0000000..1a97d62
--- /dev/null
+++ b/src/core/dbus-path.c
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "dbus-path.h"
+#include "dbus-util.h"
+#include "list.h"
+#include "path.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "unit.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, path_result, PathResult);
+
+static int property_get_paths(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Path *p = userdata;
+ PathSpec *k;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(p);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(spec, k, p->specs) {
+ r = sd_bus_message_append(reply, "(ss)", path_type_to_string(k->type), k->path);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable bus_path_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Unit", "s", bus_property_get_triggered_unit, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Paths", "a(ss)", property_get_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MakeDirectory", "b", bus_property_get_bool, offsetof(Path, make_directory), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Path, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Path, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_path_set_transient_property(
+ Path *p,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Unit *u = UNIT(p);
+ int r;
+
+ assert(p);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "MakeDirectory"))
+ return bus_set_transient_bool(u, name, &p->make_directory, message, flags, error);
+
+ if (streq(name, "DirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &p->directory_mode, message, flags, error);
+
+ if (streq(name, "Paths")) {
+ const char *type_name, *path;
+ bool empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ss)", &type_name, &path)) > 0) {
+ PathType t;
+
+ t = path_type_from_string(type_name);
+ if (t < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown path type: %s", type_name);
+
+ if (isempty(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path in %s is empty", type_name);
+
+ if (!path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path in %s is not absolute: %s", type_name, path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *k;
+ PathSpec *s;
+
+ k = strdup(path);
+ if (!k)
+ return -ENOMEM;
+
+ path_simplify(k, false);
+
+ s = new0(PathSpec, 1);
+ if (!s)
+ return -ENOMEM;
+
+ s->unit = u;
+ s->path = TAKE_PTR(k);
+ s->type = t;
+ s->inotify_fd = -1;
+
+ LIST_PREPEND(spec, p->specs, s);
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s", type_name, path);
+ }
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && empty) {
+ path_free_specs(p);
+ unit_write_settingf(u, flags, name, "PathExists=");
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_path_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags mode,
+ sd_bus_error *error) {
+
+ Path *p = PATH(u);
+
+ assert(p);
+ assert(name);
+ assert(message);
+
+ if (u->transient && u->load_state == UNIT_STUB)
+ return bus_path_set_transient_property(p, name, message, mode, error);
+
+ return 0;
+}
diff --git a/src/core/dbus-path.h b/src/core/dbus-path.h
new file mode 100644
index 0000000..ad42b23
--- /dev/null
+++ b/src/core/dbus-path.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_path_vtable[];
+
+int bus_path_set_property(Unit *u, const char *name, sd_bus_message *i, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
new file mode 100644
index 0000000..bb807df
--- /dev/null
+++ b/src/core/dbus-scope.c
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-internal.h"
+#include "bus-util.h"
+#include "dbus-cgroup.h"
+#include "dbus-kill.h"
+#include "dbus-scope.h"
+#include "dbus-unit.h"
+#include "dbus-util.h"
+#include "dbus.h"
+#include "scope.h"
+#include "selinux-access.h"
+#include "unit.h"
+
+int bus_scope_method_abandon(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Scope *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = mac_selinux_unit_access_check(UNIT(s), message, "stop", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async(UNIT(s)->manager, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = scope_abandon(s);
+ if (r == -ESTALE)
+ return sd_bus_error_setf(error, BUS_ERROR_SCOPE_NOT_RUNNING, "Scope %s is not running, cannot abandon.", UNIT(s)->id);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, scope_result, ScopeResult);
+
+const sd_bus_vtable bus_scope_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Controller", "s", NULL, offsetof(Scope, controller), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Scope, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Scope, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_SIGNAL("RequestStop", NULL, 0),
+ SD_BUS_METHOD("Abandon", NULL, NULL, bus_scope_method_abandon, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_scope_set_transient_property(
+ Scope *s,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "TimeoutStopUSec"))
+ return bus_set_transient_usec(UNIT(s), name, &s->timeout_stop_usec, message, flags, error);
+
+ if (streq(name, "PIDs")) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ unsigned n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "u");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ uint32_t upid;
+ pid_t pid;
+
+ r = sd_bus_message_read(message, "u", &upid);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (upid == 0) {
+ if (!creds) {
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+ } else
+ pid = (uid_t) upid;
+
+ r = unit_pid_attachable(UNIT(s), pid, error);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = unit_watch_pid(UNIT(s), pid);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (n <= 0)
+ return -EINVAL;
+
+ return 1;
+
+ } else if (streq(name, "Controller")) {
+ const char *controller;
+
+ /* We can't support direct connections with this, as direct connections know no service or unique name
+ * concept, but the Controller field stores exactly that. */
+ if (sd_bus_message_get_bus(message) != UNIT(s)->manager->api_bus)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Sorry, Controller= logic only supported via the bus.");
+
+ r = sd_bus_message_read(message, "s", &controller);
+ if (r < 0)
+ return r;
+
+ if (!isempty(controller) && !service_name_is_valid(controller))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Controller '%s' is not a valid bus name.", controller);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = free_and_strdup(&s->controller, empty_to_null(controller));
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_scope_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Scope *s = SCOPE(u);
+ int r;
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ r = bus_cgroup_set_property(u, &s->cgroup_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ if (u->load_state == UNIT_STUB) {
+ /* While we are created we still accept PIDs */
+
+ r = bus_scope_set_transient_property(s, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_kill_context_set_transient_property(u, &s->kill_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_scope_commit_properties(Unit *u) {
+ assert(u);
+
+ unit_invalidate_cgroup_members_masks(u);
+ unit_realize_cgroup(u);
+
+ return 0;
+}
+
+int bus_scope_send_request_stop(Scope *s) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(s);
+
+ if (!s->controller)
+ return 0;
+
+ p = unit_dbus_path(UNIT(s));
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_signal(
+ UNIT(s)->manager->api_bus,
+ &m,
+ p,
+ "org.freedesktop.systemd1.Scope",
+ "RequestStop");
+ if (r < 0)
+ return r;
+
+ return sd_bus_send_to(UNIT(s)->manager->api_bus, m, s->controller, NULL);
+}
+
+static int on_controller_gone(sd_bus_track *track, void *userdata) {
+ Scope *s = userdata;
+
+ assert(track);
+
+ if (s->controller) {
+ log_unit_debug(UNIT(s), "Controller %s disappeared from bus.", s->controller);
+ unit_add_to_dbus_queue(UNIT(s));
+ s->controller = mfree(s->controller);
+ }
+
+ s->controller_track = sd_bus_track_unref(s->controller_track);
+
+ return 0;
+}
+
+int bus_scope_track_controller(Scope *s) {
+ int r;
+
+ assert(s);
+
+ if (!s->controller || s->controller_track)
+ return 0;
+
+ r = sd_bus_track_new(UNIT(s)->manager->api_bus, &s->controller_track, on_controller_gone, s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_track_add_name(s->controller_track, s->controller);
+ if (r < 0) {
+ s->controller_track = sd_bus_track_unref(s->controller_track);
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/core/dbus-scope.h b/src/core/dbus-scope.h
new file mode 100644
index 0000000..702f558
--- /dev/null
+++ b/src/core/dbus-scope.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "scope.h"
+#include "unit.h"
+
+extern const sd_bus_vtable bus_scope_vtable[];
+
+int bus_scope_set_property(Unit *u, const char *name, sd_bus_message *i, UnitWriteFlags flags, sd_bus_error *error);
+int bus_scope_commit_properties(Unit *u);
+
+int bus_scope_send_request_stop(Scope *s);
+
+int bus_scope_method_abandon(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+int bus_scope_track_controller(Scope *s);
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
new file mode 100644
index 0000000..0904cc0
--- /dev/null
+++ b/src/core/dbus-service.c
@@ -0,0 +1,437 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio_ext.h>
+
+#include "alloc-util.h"
+#include "async.h"
+#include "bus-internal.h"
+#include "bus-util.h"
+#include "dbus-cgroup.h"
+#include "dbus-execute.h"
+#include "dbus-kill.h"
+#include "dbus-service.h"
+#include "dbus-util.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "service.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, service_type, ServiceType);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, service_result, ServiceResult);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_restart, service_restart, ServiceRestart);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_notify_access, notify_access, NotifyAccess);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
+
+static int property_get_exit_status_set(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExitStatusSet *status_set = userdata;
+ Iterator i;
+ void *id;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(status_set);
+
+ r = sd_bus_message_open_container(reply, 'r', "aiai");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "i");
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(id, status_set->status, i) {
+ int val = PTR_TO_INT(id);
+
+ if (val < 0 || val > 255)
+ continue;
+
+ r = sd_bus_message_append_basic(reply, 'i', &val);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "i");
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(id, status_set->signal, i) {
+ int val = PTR_TO_INT(id);
+ const char *str;
+
+ str = signal_to_string(val);
+ if (!str)
+ continue;
+
+ r = sd_bus_message_append_basic(reply, 'i', &val);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable bus_service_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Service, type), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Restart", "s", property_get_restart, offsetof(Service, restart), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PIDFile", "s", NULL, offsetof(Service, pid_file), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NotifyAccess", "s", property_get_notify_access, offsetof(Service, notify_access), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestartUSec", "t", bus_property_get_usec, offsetof(Service, restart_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimeoutStartUSec", "t", bus_property_get_usec, offsetof(Service, timeout_start_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Service, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("WatchdogUSec", "t", bus_property_get_usec, offsetof(Service, watchdog_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0),
+ SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), /* 😷 deprecated */
+ SD_BUS_PROPERTY("RootDirectoryStartOnly", "b", bus_property_get_bool, offsetof(Service, root_directory_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemainAfterExit", "b", bus_property_get_bool, offsetof(Service, remain_after_exit), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("GuessMainPID", "b", bus_property_get_bool, offsetof(Service, guess_main_pid), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestartPreventExitStatus", "(aiai)", property_get_exit_status_set, offsetof(Service, restart_prevent_status), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestartForceExitStatus", "(aiai)", property_get_exit_status_set, offsetof(Service, restart_force_status), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SuccessExitStatus", "(aiai)", property_get_exit_status_set, offsetof(Service, success_status), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MainPID", "u", bus_property_get_pid, offsetof(Service, main_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Service, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("BusName", "s", NULL, offsetof(Service, bus_name), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FileDescriptorStoreMax", "u", bus_property_get_unsigned, offsetof(Service, n_fd_store_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NFileDescriptorStore", "u", bus_property_get_unsigned, offsetof(Service, n_fd_store), 0),
+ SD_BUS_PROPERTY("StatusText", "s", NULL, offsetof(Service, status_text), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("StatusErrno", "i", bus_property_get_int, offsetof(Service, status_errno), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("NRestarts", "u", bus_property_get_unsigned, offsetof(Service, n_restarts), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
+ BUS_EXEC_STATUS_VTABLE("ExecMain", offsetof(Service, main_exec_status), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStart", offsetof(Service, exec_command[SERVICE_EXEC_START]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPost", offsetof(Service, exec_command[SERVICE_EXEC_START_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecReload", offsetof(Service, exec_command[SERVICE_EXEC_RELOAD]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStop", offsetof(Service, exec_command[SERVICE_EXEC_STOP]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPost", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+
+ /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */
+ SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("FailureAction", "s", property_get_emergency_action, offsetof(Unit, failure_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_set_transient_exit_status(
+ Unit *u,
+ const char *name,
+ ExitStatusSet *status_set,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ const int *status, *signal;
+ size_t sz_status, sz_signal, i;
+ int r;
+
+ r = sd_bus_message_enter_container(message, 'r', "aiai");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(message, 'i', (const void **) &status, &sz_status);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(message, 'i', (const void **) &signal, &sz_signal);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (sz_status == 0 && sz_signal == 0 && !UNIT_WRITE_FLAGS_NOOP(flags)) {
+ exit_status_set_free(status_set);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ return 1;
+ }
+
+ for (i = 0; i < sz_status; i++) {
+ if (status[i] < 0 || status[i] > 255)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid status code in %s: %i", name, status[i]);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = set_ensure_allocated(&status_set->status, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_put(status_set->status, INT_TO_PTR(status[i]));
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags, name, "%s=%i", name, status[i]);
+ }
+ }
+
+ for (i = 0; i < sz_signal; i++) {
+ const char *str;
+
+ str = signal_to_string(signal[i]);
+ if (!str)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid signal in %s: %i", name, signal[i]);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = set_ensure_allocated(&status_set->signal, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_put(status_set->signal, INT_TO_PTR(signal[i]));
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, str);
+ }
+ }
+
+ return 1;
+}
+
+static int bus_set_transient_std_fd(
+ Unit *u,
+ const char *name,
+ int *p,
+ bool *b,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int fd, r;
+
+ assert(p);
+ assert(b);
+
+ r = sd_bus_message_read(message, "h", &fd);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ int copy;
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ asynchronous_close(*p);
+ *p = copy;
+ *b = true;
+ }
+
+ return 1;
+}
+static BUS_DEFINE_SET_TRANSIENT_PARSE(notify_access, NotifyAccess, notify_access_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(service_type, ServiceType, service_type_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(service_restart, ServiceRestart, service_restart_from_string);
+static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(bus_name, service_name_is_valid);
+
+static int bus_service_set_transient_property(
+ Service *s,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Unit *u = UNIT(s);
+ ServiceExecCommand ci;
+ int r;
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "PermissionsStartOnly"))
+ return bus_set_transient_bool(u, name, &s->permissions_start_only, message, flags, error);
+
+ if (streq(name, "RootDirectoryStartOnly"))
+ return bus_set_transient_bool(u, name, &s->root_directory_start_only, message, flags, error);
+
+ if (streq(name, "RemainAfterExit"))
+ return bus_set_transient_bool(u, name, &s->remain_after_exit, message, flags, error);
+
+ if (streq(name, "GuessMainPID"))
+ return bus_set_transient_bool(u, name, &s->guess_main_pid, message, flags, error);
+
+ if (streq(name, "Type"))
+ return bus_set_transient_service_type(u, name, &s->type, message, flags, error);
+
+ if (streq(name, "RestartUSec"))
+ return bus_set_transient_usec(u, name, &s->restart_usec, message, flags, error);
+
+ if (streq(name, "TimeoutStartUSec")) {
+ r = bus_set_transient_usec(u, name, &s->timeout_start_usec, message, flags, error);
+ if (r >= 0 && !UNIT_WRITE_FLAGS_NOOP(flags))
+ s->start_timeout_defined = true;
+
+ return r;
+ }
+
+ if (streq(name, "TimeoutStopUSec"))
+ return bus_set_transient_usec(u, name, &s->timeout_stop_usec, message, flags, error);
+
+ if (streq(name, "RuntimeMaxUSec"))
+ return bus_set_transient_usec(u, name, &s->runtime_max_usec, message, flags, error);
+
+ if (streq(name, "WatchdogUSec"))
+ return bus_set_transient_usec(u, name, &s->watchdog_usec, message, flags, error);
+
+ if (streq(name, "FileDescriptorStoreMax"))
+ return bus_set_transient_unsigned(u, name, &s->n_fd_store_max, message, flags, error);
+
+ if (streq(name, "NotifyAccess"))
+ return bus_set_transient_notify_access(u, name, &s->notify_access, message, flags, error);
+
+ if (streq(name, "PIDFile")) {
+ _cleanup_free_ char *n = NULL;
+ const char *v, *e;
+
+ r = sd_bus_message_read(message, "s", &v);
+ if (r < 0)
+ return r;
+
+ if (!isempty(v)) {
+ n = path_make_absolute(v, u->manager->prefix[EXEC_DIRECTORY_RUNTIME]);
+ if (!n)
+ return -ENOMEM;
+
+ path_simplify(n, true);
+
+ if (!path_is_normalized(n))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "PIDFile= path '%s' is not valid", n);
+
+ e = path_startswith(n, "/var/run/");
+ if (e) {
+ char *z;
+
+ z = strjoin("/run/", e);
+ if (!z)
+ return log_oom();
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags))
+ log_unit_notice(u, "Transient unit's PIDFile= property references path below legacy directory /var/run, updating %s → %s; please update client accordingly.", n, z);
+
+ free_and_replace(n, z);
+ }
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ free_and_replace(s->pid_file, n);
+ unit_write_settingf(u, flags, name, "%s=%s", name, strempty(s->pid_file));
+ }
+
+ return 1;
+ }
+
+ if (streq(name, "USBFunctionDescriptors"))
+ return bus_set_transient_path(u, name, &s->usb_function_descriptors, message, flags, error);
+
+ if (streq(name, "USBFunctionStrings"))
+ return bus_set_transient_path(u, name, &s->usb_function_strings, message, flags, error);
+
+ if (streq(name, "BusName"))
+ return bus_set_transient_bus_name(u, name, &s->bus_name, message, flags, error);
+
+ if (streq(name, "Restart"))
+ return bus_set_transient_service_restart(u, name, &s->restart, message, flags, error);
+
+ if (streq(name, "RestartPreventExitStatus"))
+ return bus_set_transient_exit_status(u, name, &s->restart_prevent_status, message, flags, error);
+
+ if (streq(name, "RestartForceExitStatus"))
+ return bus_set_transient_exit_status(u, name, &s->restart_force_status, message, flags, error);
+
+ if (streq(name, "SuccessExitStatus"))
+ return bus_set_transient_exit_status(u, name, &s->success_status, message, flags, error);
+
+ ci = service_exec_command_from_string(name);
+ if (ci >= 0)
+ return bus_set_transient_exec_command(u, name, &s->exec_command[ci], message, flags, error);
+
+ if (streq(name, "StandardInputFileDescriptor"))
+ return bus_set_transient_std_fd(u, name, &s->stdin_fd, &s->exec_context.stdio_as_fds, message, flags, error);
+
+ if (streq(name, "StandardOutputFileDescriptor"))
+ return bus_set_transient_std_fd(u, name, &s->stdout_fd, &s->exec_context.stdio_as_fds, message, flags, error);
+
+ if (streq(name, "StandardErrorFileDescriptor"))
+ return bus_set_transient_std_fd(u, name, &s->stderr_fd, &s->exec_context.stdio_as_fds, message, flags, error);
+
+ return 0;
+}
+
+int bus_service_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Service *s = SERVICE(u);
+ int r;
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ r = bus_cgroup_set_property(u, &s->cgroup_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ if (u->transient && u->load_state == UNIT_STUB) {
+ /* This is a transient unit, let's load a little more */
+
+ r = bus_service_set_transient_property(s, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_exec_context_set_transient_property(u, &s->exec_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_kill_context_set_transient_property(u, &s->kill_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_service_commit_properties(Unit *u) {
+ assert(u);
+
+ unit_invalidate_cgroup_members_masks(u);
+ unit_realize_cgroup(u);
+
+ return 0;
+}
diff --git a/src/core/dbus-service.h b/src/core/dbus-service.h
new file mode 100644
index 0000000..22d2b88
--- /dev/null
+++ b/src/core/dbus-service.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_service_vtable[];
+
+int bus_service_set_property(Unit *u, const char *name, sd_bus_message *i, UnitWriteFlags flags, sd_bus_error *error);
+int bus_service_commit_properties(Unit *u);
diff --git a/src/core/dbus-slice.c b/src/core/dbus-slice.c
new file mode 100644
index 0000000..effd5fa
--- /dev/null
+++ b/src/core/dbus-slice.c
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "dbus-cgroup.h"
+#include "dbus-slice.h"
+#include "slice.h"
+#include "unit.h"
+
+const sd_bus_vtable bus_slice_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_VTABLE_END
+};
+
+int bus_slice_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Slice *s = SLICE(u);
+
+ assert(name);
+ assert(u);
+
+ return bus_cgroup_set_property(u, &s->cgroup_context, name, message, flags, error);
+}
+
+int bus_slice_commit_properties(Unit *u) {
+ assert(u);
+
+ unit_invalidate_cgroup_members_masks(u);
+ unit_realize_cgroup(u);
+
+ return 0;
+}
diff --git a/src/core/dbus-slice.h b/src/core/dbus-slice.h
new file mode 100644
index 0000000..88cc48c
--- /dev/null
+++ b/src/core/dbus-slice.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_slice_vtable[];
+
+int bus_slice_set_property(Unit *u, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_slice_commit_properties(Unit *u);
diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c
new file mode 100644
index 0000000..83ac7ad
--- /dev/null
+++ b/src/core/dbus-socket.c
@@ -0,0 +1,468 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "dbus-cgroup.h"
+#include "dbus-execute.h"
+#include "dbus-kill.h"
+#include "dbus-socket.h"
+#include "dbus-util.h"
+#include "fd-util.h"
+#include "ip-protocol-list.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "socket.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "unit.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, socket_result, SocketResult);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_bind_ipv6_only, socket_address_bind_ipv6_only, SocketAddressBindIPv6Only);
+static BUS_DEFINE_PROPERTY_GET(property_get_fdname, "s", Socket, socket_fdname);
+
+static int property_get_listen(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Socket *s = SOCKET(userdata);
+ SocketPort *p;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(port, p, s->ports) {
+ _cleanup_free_ char *address = NULL;
+ const char *a;
+
+ switch (p->type) {
+ case SOCKET_SOCKET: {
+ r = socket_address_print(&p->address, &address);
+ if (r)
+ return r;
+
+ a = address;
+ break;
+ }
+
+ case SOCKET_SPECIAL:
+ case SOCKET_MQUEUE:
+ case SOCKET_FIFO:
+ case SOCKET_USB_FUNCTION:
+ a = p->path;
+ break;
+
+ default:
+ assert_not_reached("Unknown socket type");
+ }
+
+ r = sd_bus_message_append(reply, "(ss)", socket_port_type_to_string(p), a);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable bus_socket_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("BindIPv6Only", "s", property_get_bind_ipv6_only, offsetof(Socket, bind_ipv6_only), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Backlog", "u", bus_property_get_unsigned, offsetof(Socket, backlog), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Socket, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BindToDevice", "s", NULL, offsetof(Socket, bind_to_device), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SocketUser", "s", NULL, offsetof(Socket, user), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SocketGroup", "s", NULL, offsetof(Socket, group), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SocketMode", "u", bus_property_get_mode, offsetof(Socket, socket_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Socket, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Accept", "b", bus_property_get_bool, offsetof(Socket, accept), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Writable", "b", bus_property_get_bool, offsetof(Socket, writable), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KeepAlive", "b", bus_property_get_bool, offsetof(Socket, keep_alive), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KeepAliveTimeUSec", "t", bus_property_get_usec, offsetof(Socket, keep_alive_time), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KeepAliveIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, keep_alive_interval), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KeepAliveProbes", "u", bus_property_get_unsigned, offsetof(Socket, keep_alive_cnt), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DeferAcceptUSec" , "t", bus_property_get_usec, offsetof(Socket, defer_accept), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NoDelay", "b", bus_property_get_bool, offsetof(Socket, no_delay), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Priority", "i", bus_property_get_int, offsetof(Socket, priority), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReceiveBuffer", "t", bus_property_get_size, offsetof(Socket, receive_buffer), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SendBuffer", "t", bus_property_get_size, offsetof(Socket, send_buffer), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IPTOS", "i", bus_property_get_int, offsetof(Socket, ip_tos), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IPTTL", "i", bus_property_get_int, offsetof(Socket, ip_ttl), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PipeSize", "t", bus_property_get_size, offsetof(Socket, pipe_size), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FreeBind", "b", bus_property_get_bool, offsetof(Socket, free_bind), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Transparent", "b", bus_property_get_bool, offsetof(Socket, transparent), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Broadcast", "b", bus_property_get_bool, offsetof(Socket, broadcast), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PassCredentials", "b", bus_property_get_bool, offsetof(Socket, pass_cred), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PassSecurity", "b", bus_property_get_bool, offsetof(Socket, pass_sec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemoveOnStop", "b", bus_property_get_bool, offsetof(Socket, remove_on_stop), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Listen", "a(ss)", property_get_listen, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Symlinks", "as", NULL, offsetof(Socket, symlinks), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Mark", "i", bus_property_get_int, offsetof(Socket, mark), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MaxConnections", "u", bus_property_get_unsigned, offsetof(Socket, max_connections), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MaxConnectionsPerSource", "u", bus_property_get_unsigned, offsetof(Socket, max_connections_per_source), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MessageQueueMaxMessages", "x", bus_property_get_long, offsetof(Socket, mq_maxmsg), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MessageQueueMessageSize", "x", bus_property_get_long, offsetof(Socket, mq_msgsize), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TCPCongestion", "s", NULL, offsetof(Socket, tcp_congestion), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReusePort", "b", bus_property_get_bool, offsetof(Socket, reuse_port), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SmackLabel", "s", NULL, offsetof(Socket, smack), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SmackLabelIPIn", "s", NULL, offsetof(Socket, smack_ip_in), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SmackLabelIPOut", "s", NULL, offsetof(Socket, smack_ip_out), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Socket, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Socket, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("NConnections", "u", bus_property_get_unsigned, offsetof(Socket, n_connections), 0),
+ SD_BUS_PROPERTY("NAccepted", "u", bus_property_get_unsigned, offsetof(Socket, n_accepted), 0),
+ SD_BUS_PROPERTY("NRefused", "u", bus_property_get_unsigned, offsetof(Socket, n_refused), 0),
+ SD_BUS_PROPERTY("FileDescriptorName", "s", property_get_fdname, 0, 0),
+ SD_BUS_PROPERTY("SocketProtocol", "i", bus_property_get_int, offsetof(Socket, socket_protocol), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TriggerLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, trigger_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TriggerLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, trigger_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Socket, exec_command[SOCKET_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPost", offsetof(Socket, exec_command[SOCKET_EXEC_START_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPre", offsetof(Socket, exec_command[SOCKET_EXEC_STOP_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPost", offsetof(Socket, exec_command[SOCKET_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_VTABLE_END
+};
+
+static bool check_size_t_truncation(uint64_t t) {
+ return (size_t) t == t;
+}
+
+static const char* socket_protocol_to_string(int32_t i) {
+ if (i == IPPROTO_IP)
+ return "";
+
+ if (!IN_SET(i, IPPROTO_UDPLITE, IPPROTO_SCTP))
+ return NULL;
+
+ return ip_protocol_to_name(i);
+}
+
+static BUS_DEFINE_SET_TRANSIENT(int, "i", int32_t, int, "%" PRIi32);
+static BUS_DEFINE_SET_TRANSIENT(message_queue, "x", int64_t, long, "%" PRIi64);
+static BUS_DEFINE_SET_TRANSIENT_IS_VALID(size_t_check_truncation, "t", uint64_t, size_t, "%" PRIu64, check_size_t_truncation);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(bind_ipv6_only, SocketAddressBindIPv6Only, socket_address_bind_ipv6_only_or_bool_from_string);
+static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(fdname, fdname_is_valid);
+static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(ifname, ifname_valid);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(ip_tos, "i", int32_t, int, "%" PRIi32, ip_tos_to_string_alloc);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(socket_protocol, "i", int32_t, int, "%" PRIi32, socket_protocol_to_string);
+
+static int bus_socket_set_transient_property(
+ Socket *s,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ SocketExecCommand ci;
+ Unit *u = UNIT(s);
+ int r;
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "Accept"))
+ return bus_set_transient_bool(u, name, &s->accept, message, flags, error);
+
+ if (streq(name, "Writable"))
+ return bus_set_transient_bool(u, name, &s->writable, message, flags, error);
+
+ if (streq(name, "KeepAlive"))
+ return bus_set_transient_bool(u, name, &s->keep_alive, message, flags, error);
+
+ if (streq(name, "NoDelay"))
+ return bus_set_transient_bool(u, name, &s->no_delay, message, flags, error);
+
+ if (streq(name, "FreeBind"))
+ return bus_set_transient_bool(u, name, &s->free_bind, message, flags, error);
+
+ if (streq(name, "Transparent"))
+ return bus_set_transient_bool(u, name, &s->transparent, message, flags, error);
+
+ if (streq(name, "Broadcast"))
+ return bus_set_transient_bool(u, name, &s->broadcast, message, flags, error);
+
+ if (streq(name, "PassCredentials"))
+ return bus_set_transient_bool(u, name, &s->pass_cred, message, flags, error);
+
+ if (streq(name, "PassSecurity"))
+ return bus_set_transient_bool(u, name, &s->pass_sec, message, flags, error);
+
+ if (streq(name, "ReusePort"))
+ return bus_set_transient_bool(u, name, &s->reuse_port, message, flags, error);
+
+ if (streq(name, "RemoveOnStop"))
+ return bus_set_transient_bool(u, name, &s->remove_on_stop, message, flags, error);
+
+ if (streq(name, "SELinuxContextFromNet"))
+ return bus_set_transient_bool(u, name, &s->selinux_context_from_net, message, flags, error);
+
+ if (streq(name, "Priority"))
+ return bus_set_transient_int(u, name, &s->priority, message, flags, error);
+
+ if (streq(name, "IPTTL"))
+ return bus_set_transient_int(u, name, &s->ip_ttl, message, flags, error);
+
+ if (streq(name, "Mark"))
+ return bus_set_transient_int(u, name, &s->mark, message, flags, error);
+
+ if (streq(name, "Backlog"))
+ return bus_set_transient_unsigned(u, name, &s->backlog, message, flags, error);
+
+ if (streq(name, "MaxConnections"))
+ return bus_set_transient_unsigned(u, name, &s->max_connections, message, flags, error);
+
+ if (streq(name, "MaxConnectionsPerSource"))
+ return bus_set_transient_unsigned(u, name, &s->max_connections_per_source, message, flags, error);
+
+ if (streq(name, "KeepAliveProbes"))
+ return bus_set_transient_unsigned(u, name, &s->keep_alive_cnt, message, flags, error);
+
+ if (streq(name, "TriggerLimitBurst"))
+ return bus_set_transient_unsigned(u, name, &s->trigger_limit.burst, message, flags, error);
+
+ if (streq(name, "SocketMode"))
+ return bus_set_transient_mode_t(u, name, &s->socket_mode, message, flags, error);
+
+ if (streq(name, "DirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &s->directory_mode, message, flags, error);
+
+ if (streq(name, "MessageQueueMaxMessages"))
+ return bus_set_transient_message_queue(u, name, &s->mq_maxmsg, message, flags, error);
+
+ if (streq(name, "MessageQueueMessageSize"))
+ return bus_set_transient_message_queue(u, name, &s->mq_msgsize, message, flags, error);
+
+ if (streq(name, "TimeoutUSec"))
+ return bus_set_transient_usec_fix_0(u, name, &s->timeout_usec, message, flags, error);
+
+ if (streq(name, "KeepAliveTimeUSec"))
+ return bus_set_transient_usec(u, name, &s->keep_alive_time, message, flags, error);
+
+ if (streq(name, "KeepAliveIntervalUSec"))
+ return bus_set_transient_usec(u, name, &s->keep_alive_interval, message, flags, error);
+
+ if (streq(name, "DeferAcceptUSec"))
+ return bus_set_transient_usec(u, name, &s->defer_accept, message, flags, error);
+
+ if (streq(name, "TriggerLimitIntervalUSec"))
+ return bus_set_transient_usec(u, name, &s->trigger_limit.interval, message, flags, error);
+
+ if (streq(name, "SmackLabel"))
+ return bus_set_transient_string(u, name, &s->smack, message, flags, error);
+
+ if (streq(name, "SmackLabelIPin"))
+ return bus_set_transient_string(u, name, &s->smack_ip_in, message, flags, error);
+
+ if (streq(name, "SmackLabelIPOut"))
+ return bus_set_transient_string(u, name, &s->smack_ip_out, message, flags, error);
+
+ if (streq(name, "TCPCongestion"))
+ return bus_set_transient_string(u, name, &s->tcp_congestion, message, flags, error);
+
+ if (streq(name, "FileDescriptorName"))
+ return bus_set_transient_fdname(u, name, &s->fdname, message, flags, error);
+
+ if (streq(name, "SocketUser"))
+ return bus_set_transient_user(u, name, &s->user, message, flags, error);
+
+ if (streq(name, "SocketGroup"))
+ return bus_set_transient_user(u, name, &s->group, message, flags, error);
+
+ if (streq(name, "BindIPv6Only"))
+ return bus_set_transient_bind_ipv6_only(u, name, &s->bind_ipv6_only, message, flags, error);
+
+ if (streq(name, "ReceiveBuffer"))
+ return bus_set_transient_size_t_check_truncation(u, name, &s->receive_buffer, message, flags, error);
+
+ if (streq(name, "SendBuffer"))
+ return bus_set_transient_size_t_check_truncation(u, name, &s->send_buffer, message, flags, error);
+
+ if (streq(name, "PipeSize"))
+ return bus_set_transient_size_t_check_truncation(u, name, &s->pipe_size, message, flags, error);
+
+ if (streq(name, "BindToDevice"))
+ return bus_set_transient_ifname(u, name, &s->bind_to_device, message, flags, error);
+
+ if (streq(name, "IPTOS"))
+ return bus_set_transient_ip_tos(u, name, &s->ip_tos, message, flags, error);
+
+ if (streq(name, "SocketProtocol"))
+ return bus_set_transient_socket_protocol(u, name, &s->socket_protocol, message, flags, error);
+
+ if ((ci = socket_exec_command_from_string(name)) >= 0)
+ return bus_set_transient_exec_command(u, name, &s->exec_command[ci], message, flags, error);
+
+ if (streq(name, "Symlinks")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **p;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, l) {
+ if (!path_is_absolute(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Symlink path is not absolute: %s", *p);
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ s->symlinks = strv_free(s->symlinks);
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=", name);
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ r = strv_extend_strv(&s->symlinks, l, true);
+ if (r < 0)
+ return -ENOMEM;
+
+ joined = strv_join(l, " ");
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s", name, joined);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "Listen")) {
+ const char *t, *a;
+ bool empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ss)", &t, &a)) > 0) {
+ _cleanup_free_ SocketPort *p = NULL;
+
+ p = new(SocketPort, 1);
+ if (!p)
+ return log_oom();
+
+ *p = (SocketPort) {
+ .fd = -1,
+ .socket = s,
+ };
+
+ p->type = socket_port_type_from_string(t);
+ if (p->type < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown Socket type: %s", t);
+
+ if (p->type != SOCKET_SOCKET) {
+ if (!path_is_valid(p->path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid socket path: %s", t);
+
+ p->path = strdup(a);
+ if (!p->path)
+ return log_oom();
+
+ path_simplify(p->path, false);
+
+ } else if (streq(t, "Netlink")) {
+ r = socket_address_parse_netlink(&p->address, a);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid netlink address: %s", a);
+
+ } else {
+ r = socket_address_parse(&p->address, a);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid address: %s", a);
+
+ p->address.type = socket_address_type_from_string(t);
+ if (p->address.type < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid address type: %s", t);
+
+ if (socket_address_family(&p->address) != AF_LOCAL && p->address.type == SOCK_SEQPACKET)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Address family not supported: %s", a);
+ }
+
+ empty = false;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ LIST_APPEND(port, s->ports, TAKE_PTR(p));
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "Listen%s=%s", t, a);
+ }
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && empty) {
+ socket_free_ports(s);
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "ListenStream=");
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_socket_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Socket *s = SOCKET(u);
+ int r;
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ r = bus_cgroup_set_property(u, &s->cgroup_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ if (u->transient && u->load_state == UNIT_STUB) {
+ /* This is a transient unit, let's load a little more */
+
+ r = bus_socket_set_transient_property(s, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_exec_context_set_transient_property(u, &s->exec_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_kill_context_set_transient_property(u, &s->kill_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_socket_commit_properties(Unit *u) {
+ assert(u);
+
+ unit_invalidate_cgroup_members_masks(u);
+ unit_realize_cgroup(u);
+
+ return 0;
+}
diff --git a/src/core/dbus-socket.h b/src/core/dbus-socket.h
new file mode 100644
index 0000000..9aa8133
--- /dev/null
+++ b/src/core/dbus-socket.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_socket_vtable[];
+
+int bus_socket_set_property(Unit *u, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_socket_commit_properties(Unit *u);
diff --git a/src/core/dbus-swap.c b/src/core/dbus-swap.c
new file mode 100644
index 0000000..353fa20
--- /dev/null
+++ b/src/core/dbus-swap.c
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2010 Maarten Lankhorst
+***/
+
+#include "bus-util.h"
+#include "dbus-cgroup.h"
+#include "dbus-execute.h"
+#include "dbus-swap.h"
+#include "string-util.h"
+#include "swap.h"
+#include "unit.h"
+
+static int swap_get_priority(Swap *s) {
+ if (s->from_proc_swaps)
+ return s->parameters_proc_swaps.priority;
+ if (s->from_fragment)
+ return s->parameters_fragment.priority;
+ return -1;
+}
+
+static const char *swap_get_options(Swap *s) {
+ if (s->from_fragment)
+ return s->parameters_fragment.options;
+ return NULL;
+}
+
+static BUS_DEFINE_PROPERTY_GET(property_get_priority, "i", Swap, swap_get_priority);
+static BUS_DEFINE_PROPERTY_GET(property_get_options, "s", Swap, swap_get_options);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, swap_result, SwapResult);
+
+const sd_bus_vtable bus_swap_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("What", "s", NULL, offsetof(Swap, what), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Priority", "i", property_get_priority, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Options", "s", property_get_options, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Swap, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Swap, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Swap, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_EXEC_COMMAND_VTABLE("ExecActivate", offsetof(Swap, exec_command[SWAP_EXEC_ACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_VTABLE("ExecDeactivate", offsetof(Swap, exec_command[SWAP_EXEC_DEACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_VTABLE_END
+};
+
+int bus_swap_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Swap *s = SWAP(u);
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ return bus_cgroup_set_property(u, &s->cgroup_context, name, message, flags, error);
+}
+
+int bus_swap_commit_properties(Unit *u) {
+ assert(u);
+
+ unit_invalidate_cgroup_members_masks(u);
+ unit_realize_cgroup(u);
+
+ return 0;
+}
diff --git a/src/core/dbus-swap.h b/src/core/dbus-swap.h
new file mode 100644
index 0000000..b114fe0
--- /dev/null
+++ b/src/core/dbus-swap.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2010 Maarten Lankhorst
+***/
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_swap_vtable[];
+
+int bus_swap_set_property(Unit *u, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_swap_commit_properties(Unit *u);
diff --git a/src/core/dbus-target.c b/src/core/dbus-target.c
new file mode 100644
index 0000000..ba50113
--- /dev/null
+++ b/src/core/dbus-target.c
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "dbus-target.h"
+#include "unit.h"
+
+const sd_bus_vtable bus_target_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_VTABLE_END
+};
diff --git a/src/core/dbus-target.h b/src/core/dbus-target.h
new file mode 100644
index 0000000..ad02a1d
--- /dev/null
+++ b/src/core/dbus-target.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus-vtable.h"
+
+extern const sd_bus_vtable bus_target_vtable[];
diff --git a/src/core/dbus-timer.c b/src/core/dbus-timer.c
new file mode 100644
index 0000000..b9d2f3d
--- /dev/null
+++ b/src/core/dbus-timer.c
@@ -0,0 +1,369 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "dbus-timer.h"
+#include "dbus-util.h"
+#include "strv.h"
+#include "timer.h"
+#include "unit.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, timer_result, TimerResult);
+
+static int property_get_monotonic_timers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Timer *t = userdata;
+ TimerValue *v;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(t);
+
+ r = sd_bus_message_open_container(reply, 'a', "(stt)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(value, v, t->values) {
+ _cleanup_free_ char *buf = NULL;
+ const char *s;
+ size_t l;
+
+ if (v->base == TIMER_CALENDAR)
+ continue;
+
+ s = timer_base_to_string(v->base);
+ assert(endswith(s, "Sec"));
+
+ /* s/Sec/USec/ */
+ l = strlen(s);
+ buf = new(char, l+2);
+ if (!buf)
+ return -ENOMEM;
+
+ memcpy(buf, s, l-3);
+ memcpy(buf+l-3, "USec", 5);
+
+ r = sd_bus_message_append(reply, "(stt)", buf, v->value, v->next_elapse);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_calendar_timers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Timer *t = userdata;
+ TimerValue *v;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(t);
+
+ r = sd_bus_message_open_container(reply, 'a', "(sst)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(value, v, t->values) {
+ _cleanup_free_ char *buf = NULL;
+
+ if (v->base != TIMER_CALENDAR)
+ continue;
+
+ r = calendar_spec_to_string(v->calendar_spec, &buf);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "(sst)", timer_base_to_string(v->base), buf, v->next_elapse);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_next_elapse_monotonic(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Timer *t = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(t);
+
+ return sd_bus_message_append(reply, "t",
+ (uint64_t) usec_shift_clock(t->next_elapse_monotonic_or_boottime,
+ TIMER_MONOTONIC_CLOCK(t), CLOCK_MONOTONIC));
+}
+
+const sd_bus_vtable bus_timer_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Unit", "s", bus_property_get_triggered_unit, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimersMonotonic", "a(stt)", property_get_monotonic_timers, 0, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("TimersCalendar", "a(sst)", property_get_calendar_timers, 0, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("NextElapseUSecRealtime", "t", bus_property_get_usec, offsetof(Timer, next_elapse_realtime), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("NextElapseUSecMonotonic", "t", property_get_next_elapse_monotonic, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("LastTriggerUSec", offsetof(Timer, last_trigger), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Timer, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("AccuracyUSec", "t", bus_property_get_usec, offsetof(Timer, accuracy_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RandomizedDelayUSec", "t", bus_property_get_usec, offsetof(Timer, random_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Persistent", "b", bus_property_get_bool, offsetof(Timer, persistent), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("WakeSystem", "b", bus_property_get_bool, offsetof(Timer, wake_system), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemainAfterElapse", "b", bus_property_get_bool, offsetof(Timer, remain_after_elapse), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_timer_set_transient_property(
+ Timer *t,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Unit *u = UNIT(t);
+ int r;
+
+ assert(t);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "AccuracyUSec"))
+ return bus_set_transient_usec(u, name, &t->accuracy_usec, message, flags, error);
+
+ if (streq(name, "AccuracySec")) {
+ log_notice("Client is using obsolete AccuracySec= transient property, please use AccuracyUSec= instead.");
+ return bus_set_transient_usec(u, "AccuracyUSec", &t->accuracy_usec, message, flags, error);
+ }
+
+ if (streq(name, "RandomizedDelayUSec"))
+ return bus_set_transient_usec(u, name, &t->random_usec, message, flags, error);
+
+ if (streq(name, "WakeSystem"))
+ return bus_set_transient_bool(u, name, &t->wake_system, message, flags, error);
+
+ if (streq(name, "Persistent"))
+ return bus_set_transient_bool(u, name, &t->persistent, message, flags, error);
+
+ if (streq(name, "RemainAfterElapse"))
+ return bus_set_transient_bool(u, name, &t->remain_after_elapse, message, flags, error);
+
+ if (streq(name, "TimersMonotonic")) {
+ const char *base_name;
+ usec_t usec = 0;
+ bool empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &base_name, &usec)) > 0) {
+ TimerBase b;
+
+ b = timer_base_from_string(base_name);
+ if (b < 0 || b == TIMER_CALENDAR)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid timer base: %s", base_name);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ TimerValue *v;
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s", base_name,
+ format_timespan(ts, sizeof(ts), usec, USEC_PER_MSEC));
+
+ v = new0(TimerValue, 1);
+ if (!v)
+ return -ENOMEM;
+
+ v->base = b;
+ v->value = usec;
+
+ LIST_PREPEND(value, t->values, v);
+ }
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && empty) {
+ timer_free_values(t);
+ unit_write_setting(u, flags, name, "OnActiveSec=");
+ }
+
+ return 1;
+
+ } else if (streq(name, "TimersCalendar")) {
+ const char *base_name, *str;
+ bool empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ss)", &base_name, &str)) > 0) {
+ _cleanup_(calendar_spec_freep) CalendarSpec *c = NULL;
+ TimerBase b;
+
+ b = timer_base_from_string(base_name);
+ if (b != TIMER_CALENDAR)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid timer base: %s", base_name);
+
+ r = calendar_spec_from_string(str, &c);
+ if (r == -EINVAL)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid calendar spec: %s", str);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ TimerValue *v;
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s", base_name, str);
+
+ v = new0(TimerValue, 1);
+ if (!v)
+ return -ENOMEM;
+
+ v->base = b;
+ v->calendar_spec = TAKE_PTR(c);
+
+ LIST_PREPEND(value, t->values, v);
+ }
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && empty) {
+ timer_free_values(t);
+ unit_write_setting(u, flags, name, "OnCalendar=");
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name,
+ "OnActiveSec",
+ "OnBootSec",
+ "OnStartupSec",
+ "OnUnitActiveSec",
+ "OnUnitInactiveSec")) {
+
+ TimerValue *v;
+ TimerBase b = _TIMER_BASE_INVALID;
+ usec_t usec = 0;
+
+ log_notice("Client is using obsolete %s= transient property, please use TimersMonotonic= instead.", name);
+
+ b = timer_base_from_string(name);
+ if (b < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown timer base");
+
+ r = sd_bus_message_read(message, "t", &usec);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ char time[FORMAT_TIMESPAN_MAX];
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s", name,
+ format_timespan(time, sizeof(time), usec, USEC_PER_MSEC));
+
+ v = new0(TimerValue, 1);
+ if (!v)
+ return -ENOMEM;
+
+ v->base = b;
+ v->value = usec;
+
+ LIST_PREPEND(value, t->values, v);
+ }
+
+ return 1;
+
+ } else if (streq(name, "OnCalendar")) {
+
+ TimerValue *v;
+ _cleanup_(calendar_spec_freep) CalendarSpec *c = NULL;
+ const char *str;
+
+ log_notice("Client is using obsolete %s= transient property, please use TimersCalendar= instead.", name);
+
+ r = sd_bus_message_read(message, "s", &str);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = calendar_spec_from_string(str, &c);
+ if (r == -EINVAL)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid calendar spec");
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s", name, str);
+
+ v = new0(TimerValue, 1);
+ if (!v)
+ return -ENOMEM;
+
+ v->base = TIMER_CALENDAR;
+ v->calendar_spec = TAKE_PTR(c);
+
+ LIST_PREPEND(value, t->values, v);
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_timer_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags mode,
+ sd_bus_error *error) {
+
+ Timer *t = TIMER(u);
+
+ assert(t);
+ assert(name);
+ assert(message);
+
+ if (u->transient && u->load_state == UNIT_STUB)
+ return bus_timer_set_transient_property(t, name, message, mode, error);
+
+ return 0;
+}
diff --git a/src/core/dbus-timer.h b/src/core/dbus-timer.h
new file mode 100644
index 0000000..bb126b2
--- /dev/null
+++ b/src/core/dbus-timer.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_timer_vtable[];
+
+int bus_timer_set_property(Unit *u, const char *name, sd_bus_message *i, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
new file mode 100644
index 0000000..17c2003
--- /dev/null
+++ b/src/core/dbus-unit.c
@@ -0,0 +1,1951 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bus-common-errors.h"
+#include "cgroup-util.h"
+#include "condition.h"
+#include "dbus-job.h"
+#include "dbus-unit.h"
+#include "dbus-util.h"
+#include "dbus.h"
+#include "fd-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "selinux-access.h"
+#include "signal-util.h"
+#include "special.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "web-util.h"
+
+static bool unit_can_start_refuse_manual(Unit *u) {
+ return unit_can_start(u) && !u->refuse_manual_start;
+}
+
+static bool unit_can_stop_refuse_manual(Unit *u) {
+ return unit_can_stop(u) && !u->refuse_manual_stop;
+}
+
+static bool unit_can_isolate_refuse_manual(Unit *u) {
+ return unit_can_isolate(u) && !u->refuse_manual_start;
+}
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_collect_mode, collect_mode, CollectMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_load_state, unit_load_state, UnitLoadState);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_job_mode, job_mode, JobMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
+static BUS_DEFINE_PROPERTY_GET(property_get_description, "s", Unit, unit_description);
+static BUS_DEFINE_PROPERTY_GET2(property_get_active_state, "s", Unit, unit_active_state, unit_active_state_to_string);
+static BUS_DEFINE_PROPERTY_GET(property_get_sub_state, "s", Unit, unit_sub_state_to_string);
+static BUS_DEFINE_PROPERTY_GET2(property_get_unit_file_state, "s", Unit, unit_get_unit_file_state, unit_file_state_to_string);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_reload, "b", Unit, unit_can_reload);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_start, "b", Unit, unit_can_start_refuse_manual);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_stop, "b", Unit, unit_can_stop_refuse_manual);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_isolate, "b", Unit, unit_can_isolate_refuse_manual);
+static BUS_DEFINE_PROPERTY_GET(property_get_need_daemon_reload, "b", Unit, unit_need_daemon_reload);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_strv, "as", 0);
+
+static int property_get_names(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Set **s = userdata;
+ Iterator i;
+ const char *t;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(t, *s, i) {
+ r = sd_bus_message_append(reply, "s", t);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_following(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata, *f;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ f = unit_following(u);
+ return sd_bus_message_append(reply, "s", f ? f->id : NULL);
+}
+
+static int property_get_dependencies(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Hashmap **h = userdata;
+ Iterator j;
+ Unit *u;
+ void *v;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(h);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_KEY(v, u, *h, j) {
+ r = sd_bus_message_append(reply, "s", u->id);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_requires_mounts_for(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Hashmap **h = userdata;
+ const char *p;
+ Iterator j;
+ void *v;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(h);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_KEY(v, p, *h, j) {
+ r = sd_bus_message_append(reply, "s", p);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_unit_file_preset(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = unit_get_unit_file_preset(u);
+
+ return sd_bus_message_append(reply, "s",
+ r < 0 ? NULL:
+ r > 0 ? "enabled" : "disabled");
+}
+
+static int property_get_job(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *p = NULL;
+ Job **j = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(j);
+
+ if (!*j)
+ return sd_bus_message_append(reply, "(uo)", 0, "/");
+
+ p = job_dbus_path(*j);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "(uo)", (*j)->id, p);
+}
+
+static int property_get_conditions(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const char *(*to_string)(ConditionType type) = NULL;
+ Condition **list = userdata, *c;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(list);
+
+ to_string = streq(property, "Asserts") ? assert_type_to_string : condition_type_to_string;
+
+ r = sd_bus_message_open_container(reply, 'a', "(sbbsi)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(conditions, c, *list) {
+ int tristate;
+
+ tristate =
+ c->result == CONDITION_UNTESTED ? 0 :
+ c->result == CONDITION_SUCCEEDED ? 1 : -1;
+
+ r = sd_bus_message_append(reply, "(sbbsi)",
+ to_string(c->type),
+ c->trigger, c->negate,
+ c->parameter, tristate);
+ if (r < 0)
+ return r;
+
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_load_error(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error e = SD_BUS_ERROR_NULL;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = bus_unit_validate_load_state(u, &e);
+ if (r < 0)
+ return sd_bus_message_append(reply, "(ss)", e.name, e.message);
+
+ return sd_bus_message_append(reply, "(ss)", NULL, NULL);
+}
+
+static int bus_verify_manage_units_async_full(
+ Unit *u,
+ const char *verb,
+ int capability,
+ const char *polkit_message,
+ bool interactive,
+ sd_bus_message *call,
+ sd_bus_error *error) {
+
+ const char *details[9] = {
+ "unit", u->id,
+ "verb", verb,
+ };
+
+ if (polkit_message) {
+ details[4] = "polkit.message";
+ details[5] = polkit_message;
+ details[6] = "polkit.gettext_domain";
+ details[7] = GETTEXT_PACKAGE;
+ }
+
+ return bus_verify_polkit_async(
+ call,
+ capability,
+ "org.freedesktop.systemd1.manage-units",
+ details,
+ interactive,
+ UID_INVALID,
+ &u->manager->polkit_registry,
+ error);
+}
+
+int bus_unit_method_start_generic(
+ sd_bus_message *message,
+ Unit *u,
+ JobType job_type,
+ bool reload_if_possible,
+ sd_bus_error *error) {
+
+ const char *smode;
+ JobMode mode;
+ _cleanup_free_ char *verb = NULL;
+ static const char *const polkit_message_for_job[_JOB_TYPE_MAX] = {
+ [JOB_START] = N_("Authentication is required to start '$(unit)'."),
+ [JOB_STOP] = N_("Authentication is required to stop '$(unit)'."),
+ [JOB_RELOAD] = N_("Authentication is required to reload '$(unit)'."),
+ [JOB_RESTART] = N_("Authentication is required to restart '$(unit)'."),
+ [JOB_TRY_RESTART] = N_("Authentication is required to restart '$(unit)'."),
+ };
+ int r;
+
+ assert(message);
+ assert(u);
+ assert(job_type >= 0 && job_type < _JOB_TYPE_MAX);
+
+ r = mac_selinux_unit_access_check(
+ u, message,
+ job_type_to_access_method(job_type),
+ error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &smode);
+ if (r < 0)
+ return r;
+
+ mode = job_mode_from_string(smode);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Job mode %s invalid", smode);
+
+ if (reload_if_possible)
+ verb = strjoin("reload-or-", job_type_to_string(job_type));
+ else
+ verb = strdup(job_type_to_string(job_type));
+ if (!verb)
+ return -ENOMEM;
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ verb,
+ CAP_SYS_ADMIN,
+ job_type < _JOB_TYPE_MAX ? polkit_message_for_job[job_type] : NULL,
+ true,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ return bus_unit_queue_job(message, u, job_type, mode, reload_if_possible, error);
+}
+
+static int method_start(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_START, false, error);
+}
+
+static int method_stop(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_STOP, false, error);
+}
+
+static int method_reload(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_RELOAD, false, error);
+}
+
+static int method_restart(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_RESTART, false, error);
+}
+
+static int method_try_restart(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_TRY_RESTART, false, error);
+}
+
+static int method_reload_or_restart(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_RESTART, true, error);
+}
+
+static int method_reload_or_try_restart(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_TRY_RESTART, true, error);
+}
+
+int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ const char *swho;
+ int32_t signo;
+ KillWho who;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = mac_selinux_unit_access_check(u, message, "stop", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "si", &swho, &signo);
+ if (r < 0)
+ return r;
+
+ if (isempty(swho))
+ who = KILL_ALL;
+ else {
+ who = kill_who_from_string(swho);
+ if (who < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid who argument %s", swho);
+ }
+
+ if (!SIGNAL_VALID(signo))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Signal number out of range.");
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ "kill",
+ CAP_KILL,
+ N_("Authentication is required to send a UNIX signal to the processes of '$(unit)'."),
+ true,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = unit_kill(u, who, signo, error);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = mac_selinux_unit_access_check(u, message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ "reset-failed",
+ CAP_SYS_ADMIN,
+ N_("Authentication is required to reset the \"failed\" state of '$(unit)'."),
+ true,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ unit_reset_failed(u);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ int runtime, r;
+
+ assert(message);
+ assert(u);
+
+ r = mac_selinux_unit_access_check(u, message, "start", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &runtime);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ "set-property",
+ CAP_SYS_ADMIN,
+ N_("Authentication is required to set properties on '$(unit)'."),
+ true,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = bus_unit_set_properties(u, message, runtime ? UNIT_RUNTIME : UNIT_PERSISTENT, true, error);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = mac_selinux_unit_access_check(u, message, "start", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ "ref",
+ CAP_SYS_ADMIN,
+ NULL,
+ false,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = bus_unit_track_add_sender(u, message);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = bus_unit_track_remove_sender(u, message);
+ if (r == -EUNATCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_REFERENCED, "Unit has not been referenced yet.");
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int property_get_refs(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+ const char *i;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (i = sd_bus_track_first(u->bus_track); i; i = sd_bus_track_next(u->bus_track)) {
+ int c, k;
+
+ c = sd_bus_track_count_name(u->bus_track, i);
+ if (c < 0)
+ return c;
+
+ /* Add the item multiple times if the ref count for each is above 1 */
+ for (k = 0; k < c; k++) {
+ r = sd_bus_message_append(reply, "s", i);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable bus_unit_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Id", "s", NULL, offsetof(Unit, id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Names", "as", property_get_names, offsetof(Unit, names), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Following", "s", property_get_following, 0, 0),
+ SD_BUS_PROPERTY("Requires", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_REQUIRES]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Requisite", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_REQUISITE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Wants", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_WANTS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BindsTo", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_BINDS_TO]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PartOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_PART_OF]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RequiredBy", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_REQUIRED_BY]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RequisiteOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_REQUISITE_OF]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("WantedBy", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_WANTED_BY]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BoundBy", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_BOUND_BY]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ConsistsOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_CONSISTS_OF]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Conflicts", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_CONFLICTS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ConflictedBy", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_CONFLICTED_BY]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Before", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_BEFORE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("After", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_AFTER]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("OnFailure", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_ON_FAILURE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Triggers", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_TRIGGERS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TriggeredBy", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_TRIGGERED_BY]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PropagatesReloadTo", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_PROPAGATES_RELOAD_TO]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReloadPropagatedFrom", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_RELOAD_PROPAGATED_FROM]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JoinsNamespaceOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_JOINS_NAMESPACE_OF]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RequiresMountsFor", "as", property_get_requires_mounts_for, offsetof(Unit, requires_mounts_for), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Documentation", "as", NULL, offsetof(Unit, documentation), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Description", "s", property_get_description, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LoadState", "s", property_get_load_state, offsetof(Unit, load_state), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ActiveState", "s", property_get_active_state, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("SubState", "s", property_get_sub_state, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("FragmentPath", "s", NULL, offsetof(Unit, fragment_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SourcePath", "s", NULL, offsetof(Unit, source_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DropInPaths", "as", NULL, offsetof(Unit, dropin_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UnitFileState", "s", property_get_unit_file_state, 0, 0),
+ SD_BUS_PROPERTY("UnitFilePreset", "s", property_get_unit_file_preset, 0, 0),
+ BUS_PROPERTY_DUAL_TIMESTAMP("StateChangeTimestamp", offsetof(Unit, state_change_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InactiveExitTimestamp", offsetof(Unit, inactive_exit_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("ActiveEnterTimestamp", offsetof(Unit, active_enter_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("ActiveExitTimestamp", offsetof(Unit, active_exit_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InactiveEnterTimestamp", offsetof(Unit, inactive_enter_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CanStart", "b", property_get_can_start, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CanStop", "b", property_get_can_stop, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CanReload", "b", property_get_can_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CanIsolate", "b", property_get_can_isolate, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Job", "(uo)", property_get_job, offsetof(Unit, job), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("StopWhenUnneeded", "b", bus_property_get_bool, offsetof(Unit, stop_when_unneeded), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RefuseManualStart", "b", bus_property_get_bool, offsetof(Unit, refuse_manual_start), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RefuseManualStop", "b", bus_property_get_bool, offsetof(Unit, refuse_manual_stop), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("AllowIsolate", "b", bus_property_get_bool, offsetof(Unit, allow_isolate), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultDependencies", "b", bus_property_get_bool, offsetof(Unit, default_dependencies), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("OnFailureJobMode", "s", property_get_job_mode, offsetof(Unit, on_failure_job_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IgnoreOnIsolate", "b", bus_property_get_bool, offsetof(Unit, ignore_on_isolate), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NeedDaemonReload", "b", property_get_need_daemon_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JobTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_timeout), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JobRunningTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_running_timeout), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_emergency_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JobTimeoutRebootArgument", "s", NULL, offsetof(Unit, job_timeout_reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ConditionResult", "b", bus_property_get_bool, offsetof(Unit, condition_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("AssertResult", "b", bus_property_get_bool, offsetof(Unit, assert_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("ConditionTimestamp", offsetof(Unit, condition_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("AssertTimestamp", offsetof(Unit, assert_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Conditions", "a(sbbsi)", property_get_conditions, offsetof(Unit, conditions), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Perpetual", "b", bus_property_get_bool, offsetof(Unit, perpetual), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StartLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FailureAction", "s", property_get_emergency_action, offsetof(Unit, failure_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FailureActionExitStatus", "i", bus_property_get_int, offsetof(Unit, failure_action_exit_status), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SuccessAction", "s", property_get_emergency_action, offsetof(Unit, success_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SuccessActionExitStatus", "i", bus_property_get_int, offsetof(Unit, success_action_exit_status), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CollectMode", "s", property_get_collect_mode, offsetof(Unit, collect_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Refs", "as", property_get_refs, 0, 0),
+
+ SD_BUS_METHOD("Start", "s", "o", method_start, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Stop", "s", "o", method_stop, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Reload", "s", "o", method_reload, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Restart", "s", "o", method_restart, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("TryRestart", "s", "o", method_try_restart, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ReloadOrRestart", "s", "o", method_reload_or_restart, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ReloadOrTryRestart", "s", "o", method_reload_or_try_restart, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Kill", "si", NULL, bus_unit_method_kill, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ResetFailed", NULL, NULL, bus_unit_method_reset_failed, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetProperties", "ba(sv)", NULL, bus_unit_method_set_properties, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Ref", NULL, NULL, bus_unit_method_ref, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Unref", NULL, NULL, bus_unit_method_unref, SD_BUS_VTABLE_UNPRIVILEGED),
+
+ /* For dependency types we don't support anymore always return an empty array */
+ SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_empty_strv, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_empty_strv, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_empty_strv, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_empty_strv, 0, SD_BUS_VTABLE_HIDDEN),
+ /* Obsolete alias names */
+ SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("StartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_VTABLE_END
+};
+
+static int property_get_slice(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ return sd_bus_message_append(reply, "s", unit_slice_name(u));
+}
+
+static int property_get_current_memory(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t sz = (uint64_t) -1;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = unit_get_memory_current(u, &sz);
+ if (r < 0 && r != -ENODATA)
+ log_unit_warning_errno(u, r, "Failed to get memory.usage_in_bytes attribute: %m");
+
+ return sd_bus_message_append(reply, "t", sz);
+}
+
+static int property_get_current_tasks(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t cn = (uint64_t) -1;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = unit_get_tasks_current(u, &cn);
+ if (r < 0 && r != -ENODATA)
+ log_unit_warning_errno(u, r, "Failed to get pids.current attribute: %m");
+
+ return sd_bus_message_append(reply, "t", cn);
+}
+
+static int property_get_cpu_usage(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ nsec_t ns = (nsec_t) -1;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = unit_get_cpu_usage(u, &ns);
+ if (r < 0 && r != -ENODATA)
+ log_unit_warning_errno(u, r, "Failed to get cpuacct.usage attribute: %m");
+
+ return sd_bus_message_append(reply, "t", ns);
+}
+
+static int property_get_cgroup(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+ const char *t = NULL;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ /* Three cases: a) u->cgroup_path is NULL, in which case the
+ * unit has no control group, which we report as the empty
+ * string. b) u->cgroup_path is the empty string, which
+ * indicates the root cgroup, which we report as "/". c) all
+ * other cases we report as-is. */
+
+ if (u->cgroup_path)
+ t = empty_to_root(u->cgroup_path);
+
+ return sd_bus_message_append(reply, "s", t);
+}
+
+static int append_process(sd_bus_message *reply, const char *p, pid_t pid, Set *pids) {
+ _cleanup_free_ char *buf = NULL, *cmdline = NULL;
+ int r;
+
+ assert(reply);
+ assert(pid > 0);
+
+ r = set_put(pids, PID_TO_PTR(pid));
+ if (IN_SET(r, 0, -EEXIST))
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (!p) {
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &buf);
+ if (r == -ESRCH)
+ return 0;
+ if (r < 0)
+ return r;
+
+ p = buf;
+ }
+
+ (void) get_process_cmdline(pid, 0, true, &cmdline);
+
+ return sd_bus_message_append(reply,
+ "(sus)",
+ p,
+ (uint32_t) pid,
+ cmdline);
+}
+
+static int append_cgroup(sd_bus_message *reply, const char *p, Set *pids) {
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(reply);
+ assert(p);
+
+ r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, p, &f);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ pid_t pid;
+
+ r = cg_read_pid(f, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (is_kernel_thread(pid) > 0)
+ continue;
+
+ r = append_process(reply, p, pid, pids);
+ if (r < 0)
+ return r;
+ }
+
+ r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, p, &d);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *g = NULL, *j = NULL;
+
+ r = cg_read_subgroup(d, &g);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ j = strjoin(p, "/", g);
+ if (!j)
+ return -ENOMEM;
+
+ r = append_cgroup(reply, j, pids);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_set_free_ Set *pids = NULL;
+ Unit *u = userdata;
+ pid_t pid;
+ int r;
+
+ assert(message);
+
+ r = mac_selinux_unit_access_check(u, message, "status", error);
+ if (r < 0)
+ return r;
+
+ pids = set_new(NULL);
+ if (!pids)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(sus)");
+ if (r < 0)
+ return r;
+
+ if (u->cgroup_path) {
+ r = append_cgroup(reply, u->cgroup_path, pids);
+ if (r < 0)
+ return r;
+ }
+
+ /* The main and control pids might live outside of the cgroup, hence fetch them separately */
+ pid = unit_main_pid(u);
+ if (pid > 0) {
+ r = append_process(reply, NULL, pid, pids);
+ if (r < 0)
+ return r;
+ }
+
+ pid = unit_control_pid(u);
+ if (pid > 0) {
+ r = append_process(reply, NULL, pid, pids);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int property_get_ip_counter(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupIPAccountingMetric metric;
+ uint64_t value = (uint64_t) -1;
+ Unit *u = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(property);
+ assert(u);
+
+ if (streq(property, "IPIngressBytes"))
+ metric = CGROUP_IP_INGRESS_BYTES;
+ else if (streq(property, "IPIngressPackets"))
+ metric = CGROUP_IP_INGRESS_PACKETS;
+ else if (streq(property, "IPEgressBytes"))
+ metric = CGROUP_IP_EGRESS_BYTES;
+ else {
+ assert(streq(property, "IPEgressPackets"));
+ metric = CGROUP_IP_EGRESS_PACKETS;
+ }
+
+ (void) unit_get_ip_accounting(u, metric, &value);
+ return sd_bus_message_append(reply, "t", value);
+}
+
+int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ _cleanup_set_free_ Set *pids = NULL;
+ Unit *u = userdata;
+ const char *path;
+ int r;
+
+ assert(message);
+
+ /* This migrates the processes with the specified PIDs into the cgroup of this unit, optionally below a
+ * specified cgroup path. Obviously this only works for units that actually maintain a cgroup
+ * representation. If a process is already in the cgroup no operation is executed – in this case the specified
+ * subcgroup path has no effect! */
+
+ r = mac_selinux_unit_access_check(u, message, "start", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &path);
+ if (r < 0)
+ return r;
+
+ path = empty_to_null(path);
+ if (path) {
+ if (!path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Control group path is not absolute: %s", path);
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Control group path is not normalized: %s", path);
+ }
+
+ if (!unit_cgroup_delegate(u))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process migration not available on non-delegated units.");
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u)))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit is not active, refusing.");
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID|SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(message, 'a', "u");
+ if (r < 0)
+ return r;
+ for (;;) {
+ uid_t process_uid, sender_uid;
+ uint32_t upid;
+ pid_t pid;
+
+ r = sd_bus_message_read(message, "u", &upid);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (upid == 0) {
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+ } else
+ pid = (uid_t) upid;
+
+ /* Filter out duplicates */
+ if (set_contains(pids, PID_TO_PTR(pid)))
+ continue;
+
+ /* Check if this process is suitable for attaching to this unit */
+ r = unit_pid_attachable(u, pid, error);
+ if (r < 0)
+ return r;
+
+ /* Let's query the sender's UID, so that we can make our security decisions */
+ r = sd_bus_creds_get_euid(creds, &sender_uid);
+ if (r < 0)
+ return r;
+
+ /* Let's validate security: if the sender is root, then all is OK. If the sender is any other unit,
+ * then the process' UID and the target unit's UID have to match the sender's UID */
+ if (sender_uid != 0 && sender_uid != getuid()) {
+ r = get_process_uid(pid, &process_uid);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to retrieve process UID: %m");
+
+ if (process_uid != sender_uid)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Process " PID_FMT " not owned by client's UID. Refusing.", pid);
+ if (process_uid != u->ref_uid)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Process " PID_FMT " not owned by target unit's UID. Refusing.", pid);
+ }
+
+ if (!pids) {
+ pids = set_new(NULL);
+ if (!pids)
+ return -ENOMEM;
+ }
+
+ r = set_put(pids, PID_TO_PTR(pid));
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ r = unit_attach_pids_to_cgroup(u, pids, path);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to attach processes to control group: %m");
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+const sd_bus_vtable bus_unit_cgroup_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Slice", "s", property_get_slice, 0, 0),
+ SD_BUS_PROPERTY("ControlGroup", "s", property_get_cgroup, 0, 0),
+ SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0),
+ SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0),
+ SD_BUS_PROPERTY("TasksCurrent", "t", property_get_current_tasks, 0, 0),
+ SD_BUS_PROPERTY("IPIngressBytes", "t", property_get_ip_counter, 0, 0),
+ SD_BUS_PROPERTY("IPIngressPackets", "t", property_get_ip_counter, 0, 0),
+ SD_BUS_PROPERTY("IPEgressBytes", "t", property_get_ip_counter, 0, 0),
+ SD_BUS_PROPERTY("IPEgressPackets", "t", property_get_ip_counter, 0, 0),
+ SD_BUS_METHOD("GetProcesses", NULL, "a(sus)", bus_unit_method_get_processes, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("AttachProcesses", "sau", NULL, bus_unit_method_attach_processes, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END
+};
+
+static int send_new_signal(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *p = NULL;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(u);
+
+ p = unit_dbus_path(u);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "UnitNew");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "so", u->id, p);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+static int send_changed_signal(sd_bus *bus, void *userdata) {
+ _cleanup_free_ char *p = NULL;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(u);
+
+ p = unit_dbus_path(u);
+ if (!p)
+ return -ENOMEM;
+
+ /* Send a properties changed signal. First for the specific
+ * type, then for the generic unit. The clients may rely on
+ * this order to get atomic behavior if needed. */
+
+ r = sd_bus_emit_properties_changed_strv(
+ bus, p,
+ unit_dbus_interface_from_type(u->type),
+ NULL);
+ if (r < 0)
+ return r;
+
+ return sd_bus_emit_properties_changed_strv(
+ bus, p,
+ "org.freedesktop.systemd1.Unit",
+ NULL);
+}
+
+void bus_unit_send_change_signal(Unit *u) {
+ int r;
+ assert(u);
+
+ if (u->in_dbus_queue) {
+ LIST_REMOVE(dbus_queue, u->manager->dbus_unit_queue, u);
+ u->in_dbus_queue = false;
+ }
+
+ if (!u->id)
+ return;
+
+ r = bus_foreach_bus(u->manager, u->bus_track, u->sent_dbus_new_signal ? send_changed_signal : send_new_signal, u);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to send unit change signal for %s: %m", u->id);
+
+ u->sent_dbus_new_signal = true;
+}
+
+void bus_unit_send_pending_change_signal(Unit *u, bool including_new) {
+
+ /* Sends out any pending change signals, but only if they really are pending. This call is used when we are
+ * about to change state in order to force out a PropertiesChanged signal beforehand if there was one pending
+ * so that clients can follow the full state transition */
+
+ if (!u->in_dbus_queue) /* If not enqueued, don't bother */
+ return;
+
+ if (!u->sent_dbus_new_signal && !including_new) /* If the unit was never announced, don't bother, it's fine if
+ * the unit appears in the new state right-away (except if the
+ * caller explicitly asked us to send it anyway) */
+ return;
+
+ if (MANAGER_IS_RELOADING(u->manager)) /* Don't generate unnecessary PropertiesChanged signals for the same unit
+ * when we are reloading. */
+ return;
+
+ bus_unit_send_change_signal(u);
+}
+
+static int send_removed_signal(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *p = NULL;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(u);
+
+ p = unit_dbus_path(u);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "UnitRemoved");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "so", u->id, p);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+void bus_unit_send_removed_signal(Unit *u) {
+ int r;
+ assert(u);
+
+ if (!u->sent_dbus_new_signal || u->in_dbus_queue)
+ bus_unit_send_change_signal(u);
+
+ if (!u->id)
+ return;
+
+ r = bus_foreach_bus(u->manager, u->bus_track, send_removed_signal, u);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to send unit remove signal for %s: %m", u->id);
+}
+
+int bus_unit_queue_job(
+ sd_bus_message *message,
+ Unit *u,
+ JobType type,
+ JobMode mode,
+ bool reload_if_possible,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *path = NULL;
+ Job *j;
+ int r;
+
+ assert(message);
+ assert(u);
+ assert(type >= 0 && type < _JOB_TYPE_MAX);
+ assert(mode >= 0 && mode < _JOB_MODE_MAX);
+
+ r = mac_selinux_unit_access_check(
+ u, message,
+ job_type_to_access_method(type),
+ error);
+ if (r < 0)
+ return r;
+
+ if (reload_if_possible && unit_can_reload(u)) {
+ if (type == JOB_RESTART)
+ type = JOB_RELOAD_OR_START;
+ else if (type == JOB_TRY_RESTART)
+ type = JOB_TRY_RELOAD;
+ }
+
+ if (type == JOB_STOP &&
+ IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_ERROR, UNIT_BAD_SETTING) &&
+ unit_active_state(u) == UNIT_INACTIVE)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s not loaded.", u->id);
+
+ if ((type == JOB_START && u->refuse_manual_start) ||
+ (type == JOB_STOP && u->refuse_manual_stop) ||
+ (IN_SET(type, JOB_RESTART, JOB_TRY_RESTART) && (u->refuse_manual_start || u->refuse_manual_stop)) ||
+ (type == JOB_RELOAD_OR_START && job_type_collapse(type, u) == JOB_START && u->refuse_manual_start))
+ return sd_bus_error_setf(error, BUS_ERROR_ONLY_BY_DEPENDENCY, "Operation refused, unit %s may be requested by dependency only (it is configured to refuse manual start/stop).", u->id);
+
+ r = manager_add_job(u->manager, type, u, mode, error, &j);
+ if (r < 0)
+ return r;
+
+ r = bus_job_track_sender(j, message);
+ if (r < 0)
+ return r;
+
+ path = job_dbus_path(j);
+ if (!path)
+ return -ENOMEM;
+
+ /* Before we send the method reply, force out the announcement JobNew for this job */
+ bus_job_send_pending_change_signal(j, true);
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
+static int bus_unit_set_live_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(u);
+ assert(name);
+ assert(message);
+
+ /* Handles setting properties both "live" (i.e. at any time during runtime), and during creation (for transient
+ * units that are being created). */
+
+ if (streq(name, "Description")) {
+ const char *d;
+
+ r = sd_bus_message_read(message, "s", &d);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = unit_set_description(u, d);
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "Description=%s", d);
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_set_transient_emergency_action(
+ Unit *u,
+ const char *name,
+ EmergencyAction *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ const char *s;
+ EmergencyAction v;
+ int r;
+ bool system;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ system = MANAGER_IS_SYSTEM(u->manager);
+ r = parse_emergency_action(s, system, &v);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ r == -EOPNOTSUPP ? "%s setting invalid for manager type: %s"
+ : "Invalid %s setting: %s",
+ name, s);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = v;
+ unit_write_settingf(u, flags, name,
+ "%s=%s", name, s);
+ }
+
+ return 1;
+}
+
+static int bus_set_transient_exit_status(
+ Unit *u,
+ const char *name,
+ int *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int32_t k;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "i", &k);
+ if (r < 0)
+ return r;
+
+ if (k > 255)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Exit status must be in range 0…255 or negative.");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = k < 0 ? -1 : k;
+
+ if (k < 0)
+ unit_write_settingf(u, flags, name, "%s=", name);
+ else
+ unit_write_settingf(u, flags, name, "%s=%i", name, k);
+ }
+
+ return 1;
+}
+
+static BUS_DEFINE_SET_TRANSIENT_PARSE(collect_mode, CollectMode, collect_mode_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(job_mode, JobMode, job_mode_from_string);
+
+static int bus_set_transient_conditions(
+ Unit *u,
+ const char *name,
+ Condition **list,
+ bool is_condition,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ const char *type_name, *param;
+ int trigger, negate, r;
+ bool empty = true;
+
+ assert(list);
+
+ r = sd_bus_message_enter_container(message, 'a', "(sbbs)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(sbbs)", &type_name, &trigger, &negate, &param)) > 0) {
+ ConditionType t;
+
+ t = is_condition ? condition_type_from_string(type_name) : assert_type_from_string(type_name);
+ if (t < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid condition type: %s", type_name);
+
+ if (t != CONDITION_NULL) {
+ if (isempty(param))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Condition parameter in %s is empty", type_name);
+
+ if (condition_takes_path(t) && !path_is_absolute(param))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path in condition %s is not absolute: %s", type_name, param);
+ } else
+ param = NULL;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ Condition *c;
+
+ c = condition_new(t, param, trigger, negate);
+ if (!c)
+ return -ENOMEM;
+
+ LIST_PREPEND(conditions, *list, c);
+
+ if (t != CONDITION_NULL)
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name,
+ "%s=%s%s%s", type_name,
+ trigger ? "|" : "", negate ? "!" : "", param);
+ else
+ unit_write_settingf(u, flags, name,
+ "%s=%s%s", type_name,
+ trigger ? "|" : "", yes_no(!negate));
+ }
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && empty) {
+ *list = condition_free_list(*list);
+ unit_write_settingf(u, flags, name, "%sNull=", is_condition ? "Condition" : "Assert");
+ }
+
+ return 1;
+}
+
+static int bus_unit_set_transient_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ UnitDependency d = _UNIT_DEPENDENCY_INVALID;
+ int r;
+
+ assert(u);
+ assert(name);
+ assert(message);
+
+ /* Handles settings when transient units are created. This settings cannot be altered anymore after the unit
+ * has been created. */
+
+ if (streq(name, "SourcePath"))
+ return bus_set_transient_path(u, name, &u->source_path, message, flags, error);
+
+ if (streq(name, "StopWhenUnneeded"))
+ return bus_set_transient_bool(u, name, &u->stop_when_unneeded, message, flags, error);
+
+ if (streq(name, "RefuseManualStart"))
+ return bus_set_transient_bool(u, name, &u->refuse_manual_start, message, flags, error);
+
+ if (streq(name, "RefuseManualStop"))
+ return bus_set_transient_bool(u, name, &u->refuse_manual_stop, message, flags, error);
+
+ if (streq(name, "AllowIsolate"))
+ return bus_set_transient_bool(u, name, &u->allow_isolate, message, flags, error);
+
+ if (streq(name, "DefaultDependencies"))
+ return bus_set_transient_bool(u, name, &u->default_dependencies, message, flags, error);
+
+ if (streq(name, "OnFailureJobMode"))
+ return bus_set_transient_job_mode(u, name, &u->on_failure_job_mode, message, flags, error);
+
+ if (streq(name, "IgnoreOnIsolate"))
+ return bus_set_transient_bool(u, name, &u->ignore_on_isolate, message, flags, error);
+
+ if (streq(name, "JobTimeoutUSec")) {
+ r = bus_set_transient_usec_fix_0(u, name, &u->job_timeout, message, flags, error);
+ if (r >= 0 && !UNIT_WRITE_FLAGS_NOOP(flags) && !u->job_running_timeout_set)
+ u->job_running_timeout = u->job_timeout;
+ }
+
+ if (streq(name, "JobRunningTimeoutUSec")) {
+ r = bus_set_transient_usec_fix_0(u, name, &u->job_running_timeout, message, flags, error);
+ if (r >= 0 && !UNIT_WRITE_FLAGS_NOOP(flags))
+ u->job_running_timeout_set = true;
+
+ return r;
+ }
+
+ if (streq(name, "JobTimeoutAction"))
+ return bus_set_transient_emergency_action(u, name, &u->job_timeout_action, message, flags, error);
+
+ if (streq(name, "JobTimeoutRebootArgument"))
+ return bus_set_transient_string(u, name, &u->job_timeout_reboot_arg, message, flags, error);
+
+ if (streq(name, "StartLimitIntervalUSec"))
+ return bus_set_transient_usec(u, name, &u->start_limit.interval, message, flags, error);
+
+ if (streq(name, "StartLimitBurst"))
+ return bus_set_transient_unsigned(u, name, &u->start_limit.burst, message, flags, error);
+
+ if (streq(name, "StartLimitAction"))
+ return bus_set_transient_emergency_action(u, name, &u->start_limit_action, message, flags, error);
+
+ if (streq(name, "FailureAction"))
+ return bus_set_transient_emergency_action(u, name, &u->failure_action, message, flags, error);
+
+ if (streq(name, "SuccessAction"))
+ return bus_set_transient_emergency_action(u, name, &u->success_action, message, flags, error);
+
+ if (streq(name, "FailureActionExitStatus"))
+ return bus_set_transient_exit_status(u, name, &u->failure_action_exit_status, message, flags, error);
+
+ if (streq(name, "SuccessActionExitStatus"))
+ return bus_set_transient_exit_status(u, name, &u->success_action_exit_status, message, flags, error);
+
+ if (streq(name, "RebootArgument"))
+ return bus_set_transient_string(u, name, &u->reboot_arg, message, flags, error);
+
+ if (streq(name, "CollectMode"))
+ return bus_set_transient_collect_mode(u, name, &u->collect_mode, message, flags, error);
+
+ if (streq(name, "Conditions"))
+ return bus_set_transient_conditions(u, name, &u->conditions, true, message, flags, error);
+
+ if (streq(name, "Asserts"))
+ return bus_set_transient_conditions(u, name, &u->asserts, false, message, flags, error);
+
+ if (streq(name, "Documentation")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **p;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, l) {
+ if (!documentation_url_is_valid(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid URL in %s: %s", name, *p);
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ u->documentation = strv_free(u->documentation);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ strv_extend_strv(&u->documentation, l, false);
+
+ STRV_FOREACH(p, l)
+ unit_write_settingf(u, flags, name, "%s=%s", name, *p);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "Slice")) {
+ Unit *slice;
+ const char *s;
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "The slice property is only available for units with control groups.");
+ if (u->type == UNIT_SLICE)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Slice may not be set for slice units.");
+ if (unit_has_name(u, SPECIAL_INIT_SCOPE))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Cannot set slice for init.scope");
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ if (!unit_name_is_valid(s, UNIT_NAME_PLAIN))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid unit name '%s'", s);
+
+ /* Note that we do not dispatch the load queue here yet, as we don't want our own transient unit to be
+ * loaded while we are still setting it up. Or in other words, we use manager_load_unit_prepare()
+ * instead of manager_load_unit() on purpose, here. */
+ r = manager_load_unit_prepare(u->manager, s, NULL, error, &slice);
+ if (r < 0)
+ return r;
+
+ if (slice->type != UNIT_SLICE)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit name '%s' is not a slice", s);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = unit_set_slice(u, slice);
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags|UNIT_PRIVATE, name, "Slice=%s", s);
+ }
+
+ return 1;
+
+ } else if (streq(name, "RequiresMountsFor")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **p;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, l) {
+ if (!path_is_absolute(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path specified in %s is not absolute: %s", name, *p);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = unit_require_mounts_for(u, *p, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Failed to add required mount \"%s\": %m", *p);
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, *p);
+ }
+ }
+
+ return 1;
+ }
+
+ if (streq(name, "RequiresOverridable"))
+ d = UNIT_REQUIRES; /* redirect for obsolete unit dependency type */
+ else if (streq(name, "RequisiteOverridable"))
+ d = UNIT_REQUISITE; /* same here */
+ else
+ d = unit_dependency_from_string(name);
+
+ if (d >= 0) {
+ const char *other;
+
+ r = sd_bus_message_enter_container(message, 'a', "s");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "s", &other)) > 0) {
+ if (!unit_name_is_valid(other, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid unit name %s", other);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *label = NULL;
+
+ r = unit_add_dependency_by_name(u, d, other, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+
+ label = strjoin(name, "-", other);
+ if (!label)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, label, "%s=%s", unit_dependency_to_string(d), other);
+ }
+
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ return 1;
+
+ } else if (streq(name, "AddRef")) {
+
+ int b;
+
+ /* Why is this called "AddRef" rather than just "Ref", or "Reference"? There's already a "Ref()" method
+ * on the Unit interface, and it's probably not a good idea to expose a property and a method on the
+ * same interface (well, strictly speaking AddRef isn't exposed as full property, we just read it for
+ * transient units, but still). And "References" and "ReferencedBy" is already used as unit reference
+ * dependency type, hence let's not confuse things with that.
+ *
+ * Note that we don't acually add the reference to the bus track. We do that only after the setup of
+ * the transient unit is complete, so that setting this property multiple times in the same transient
+ * unit creation call doesn't count as individual references. */
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags))
+ u->bus_track_add = b;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_unit_set_properties(
+ Unit *u,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ bool commit,
+ sd_bus_error *error) {
+
+ bool for_real = false;
+ unsigned n = 0;
+ int r;
+
+ assert(u);
+ assert(message);
+
+ /* We iterate through the array twice. First run we just check
+ * if all passed data is valid, second run actually applies
+ * it. This is to implement transaction-like behaviour without
+ * actually providing full transactions. */
+
+ r = sd_bus_message_enter_container(message, 'a', "(sv)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *name;
+ UnitWriteFlags f;
+
+ r = sd_bus_message_enter_container(message, 'r', "sv");
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (for_real || UNIT_WRITE_FLAGS_NOOP(flags))
+ break;
+
+ /* Reached EOF. Let's try again, and this time for realz... */
+ r = sd_bus_message_rewind(message, false);
+ if (r < 0)
+ return r;
+
+ for_real = true;
+ continue;
+ }
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_VTABLE(u)->bus_set_property)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_PROPERTY_READ_ONLY, "Objects of this type do not support setting properties.");
+
+ r = sd_bus_message_enter_container(message, 'v', NULL);
+ if (r < 0)
+ return r;
+
+ /* If not for real, then mask out the two target flags */
+ f = for_real ? flags : (flags & ~(UNIT_RUNTIME|UNIT_PERSISTENT));
+
+ r = UNIT_VTABLE(u)->bus_set_property(u, name, message, f, error);
+ if (r == 0 && u->transient && u->load_state == UNIT_STUB)
+ r = bus_unit_set_transient_property(u, name, message, f, error);
+ if (r == 0)
+ r = bus_unit_set_live_property(u, name, message, f, error);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_PROPERTY_READ_ONLY, "Cannot set property %s, or unknown property.", name);
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ n += for_real;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (commit && n > 0 && UNIT_VTABLE(u)->bus_commit_properties)
+ UNIT_VTABLE(u)->bus_commit_properties(u);
+
+ return n;
+}
+
+int bus_unit_validate_load_state(Unit *u, sd_bus_error *error) {
+ assert(u);
+
+ /* Generates a pretty error if a unit isn't properly loaded. */
+
+ switch (u->load_state) {
+
+ case UNIT_LOADED:
+ return 0;
+
+ case UNIT_NOT_FOUND:
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s not found.", u->id);
+
+ case UNIT_BAD_SETTING:
+ return sd_bus_error_setf(error, BUS_ERROR_BAD_UNIT_SETTING, "Unit %s has a bad unit file setting.", u->id);
+
+ case UNIT_ERROR: /* Only show .load_error in UNIT_ERROR state */
+ return sd_bus_error_set_errnof(error, u->load_error, "Unit %s failed to load properly: %m.", u->id);
+
+ case UNIT_MASKED:
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_MASKED, "Unit %s is masked.", u->id);
+
+ case UNIT_STUB:
+ case UNIT_MERGED:
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unexpected load state of unit %s", u->id);
+ }
+}
+
+static int bus_unit_track_handler(sd_bus_track *t, void *userdata) {
+ Unit *u = userdata;
+
+ assert(t);
+ assert(u);
+
+ u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */
+
+ /* If the client that tracks us disappeared, then there's reason to believe that the cgroup is empty now too,
+ * let's see */
+ unit_add_to_cgroup_empty_queue(u);
+
+ /* Also add the unit to the GC queue, after all if the client left it might be time to GC this unit */
+ unit_add_to_gc_queue(u);
+
+ return 0;
+}
+
+static int bus_unit_allocate_bus_track(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (u->bus_track)
+ return 0;
+
+ r = sd_bus_track_new(u->manager->api_bus, &u->bus_track, bus_unit_track_handler, u);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_track_set_recursive(u->bus_track, true);
+ if (r < 0) {
+ u->bus_track = sd_bus_track_unref(u->bus_track);
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_unit_track_add_name(Unit *u, const char *name) {
+ int r;
+
+ assert(u);
+
+ r = bus_unit_allocate_bus_track(u);
+ if (r < 0)
+ return r;
+
+ return sd_bus_track_add_name(u->bus_track, name);
+}
+
+int bus_unit_track_add_sender(Unit *u, sd_bus_message *m) {
+ int r;
+
+ assert(u);
+
+ r = bus_unit_allocate_bus_track(u);
+ if (r < 0)
+ return r;
+
+ return sd_bus_track_add_sender(u->bus_track, m);
+}
+
+int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m) {
+ assert(u);
+
+ /* If we haven't allocated the bus track object yet, then there's definitely no reference taken yet, return an
+ * error */
+ if (!u->bus_track)
+ return -EUNATCH;
+
+ return sd_bus_track_remove_sender(u->bus_track, m);
+}
diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h
new file mode 100644
index 0000000..345345e
--- /dev/null
+++ b/src/core/dbus-unit.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "job.h"
+#include "unit.h"
+
+extern const sd_bus_vtable bus_unit_vtable[];
+extern const sd_bus_vtable bus_unit_cgroup_vtable[];
+
+void bus_unit_send_change_signal(Unit *u);
+void bus_unit_send_pending_change_signal(Unit *u, bool including_new);
+void bus_unit_send_removed_signal(Unit *u);
+
+int bus_unit_method_start_generic(sd_bus_message *message, Unit *u, JobType job_type, bool reload_if_possible, sd_bus_error *error);
+int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+int bus_unit_set_properties(Unit *u, sd_bus_message *message, UnitWriteFlags flags, bool commit, sd_bus_error *error);
+int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error);
+int bus_unit_validate_load_state(Unit *u, sd_bus_error *error);
+
+int bus_unit_track_add_name(Unit *u, const char *name);
+int bus_unit_track_add_sender(Unit *u, sd_bus_message *m);
+int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m);
diff --git a/src/core/dbus-util.c b/src/core/dbus-util.c
new file mode 100644
index 0000000..f4fbb72
--- /dev/null
+++ b/src/core/dbus-util.c
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "bus-util.h"
+#include "dbus-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "unit-printf.h"
+#include "user-util.h"
+#include "unit.h"
+
+int bus_property_get_triggered_unit(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata, *trigger;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ trigger = UNIT_TRIGGER(u);
+
+ return sd_bus_message_append(reply, "s", trigger ? trigger->id : NULL);
+}
+
+BUS_DEFINE_SET_TRANSIENT(mode_t, "u", uint32_t, mode_t, "%040o");
+BUS_DEFINE_SET_TRANSIENT(unsigned, "u", uint32_t, unsigned, "%" PRIu32);
+BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(user, valid_user_group_name_or_id);
+BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(path, path_is_absolute);
+
+int bus_set_transient_string(
+ Unit *u,
+ const char *name,
+ char **p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ const char *v;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "s", &v);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = free_and_strdup(p, empty_to_null(v));
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name,
+ "%s=%s", name, strempty(v));
+ }
+
+ return 1;
+}
+
+int bus_set_transient_bool(
+ Unit *u,
+ const char *name,
+ bool *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int v, r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "b", &v);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = v;
+ unit_write_settingf(u, flags, name, "%s=%s", name, yes_no(v));
+ }
+
+ return 1;
+}
+
+int bus_set_transient_usec_internal(
+ Unit *u,
+ const char *name,
+ usec_t *p,
+ bool fix_0,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ uint64_t v;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "t", &v);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ char *n, ts[FORMAT_TIMESPAN_MAX];
+
+ if (fix_0)
+ *p = v != 0 ? v: USEC_INFINITY;
+ else
+ *p = v;
+
+ n = strndupa(name, strlen(name) - 4);
+ unit_write_settingf(u, flags, name, "%sSec=%s", n,
+ format_timespan(ts, sizeof(ts), v, USEC_PER_MSEC));
+ }
+
+ return 1;
+}
diff --git a/src/core/dbus-util.h b/src/core/dbus-util.h
new file mode 100644
index 0000000..12b055e
--- /dev/null
+++ b/src/core/dbus-util.h
@@ -0,0 +1,248 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "unit.h"
+
+int bus_property_get_triggered_unit(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+#define BUS_DEFINE_SET_TRANSIENT(function, bus_type, type, cast_type, fmt) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ cast_type *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ type v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, bus_type, &v); \
+ if (r < 0) \
+ return r; \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = (cast_type) v; \
+ unit_write_settingf(u, flags, name, \
+ "%s=" fmt, name, v); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_TRANSIENT_IS_VALID(function, bus_type, type, cast_type, fmt, check) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ cast_type *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ type v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, bus_type, &v); \
+ if (r < 0) \
+ return r; \
+ \
+ if (!check(v)) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Invalid %s setting: " fmt, name, v); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = (cast_type) v; \
+ unit_write_settingf(u, flags, name, \
+ "%s=" fmt, name, v); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_TRANSIENT_TO_STRING(function, bus_type, type, cast_type, fmt, to_string) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ cast_type *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ const char *s; \
+ type v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, bus_type, &v); \
+ if (r < 0) \
+ return r; \
+ \
+ s = to_string(v); \
+ if (!s) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Invalid %s setting: " fmt, name, v); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = (cast_type) v; \
+ unit_write_settingf(u, flags, name, \
+ "%s=%s", name, s); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(function, bus_type, type, cast_type, fmt, to_string) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ cast_type *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ _cleanup_free_ char *s = NULL; \
+ type v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, bus_type, &v); \
+ if (r < 0) \
+ return r; \
+ \
+ r = to_string(v, &s); \
+ if (r == -EINVAL) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Invalid %s setting: " fmt, name, v); \
+ if (r < 0) \
+ return r; \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = (cast_type) v; \
+ unit_write_settingf(u, flags, name, \
+ "%s=%s", name, s); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_TRANSIENT_PARSE(function, type, parse) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ type *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ const char *s; \
+ type v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, "s", &s); \
+ if (r < 0) \
+ return r; \
+ \
+ v = parse(s); \
+ if (v < 0) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Invalid %s setting: %s", name, s); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = v; \
+ unit_write_settingf(u, flags, name, \
+ "%s=%s", name, s); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(function, type, parse) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ type *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ const char *s; \
+ type v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, "s", &s); \
+ if (r < 0) \
+ return r; \
+ \
+ r = parse(s, &v); \
+ if (r < 0) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Invalid %s setting: %s", name, s); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = v; \
+ unit_write_settingf(u, flags, name, \
+ "%s=%s", name, strempty(s)); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(function, check) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ char **p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ const char *v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, "s", &v); \
+ if (r < 0) \
+ return r; \
+ \
+ if (!isempty(v) && !check(v)) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Invalid %s setting: %s", name, v); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ r = free_and_strdup(p, empty_to_null(v)); \
+ if (r < 0) \
+ return r; \
+ \
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, \
+ "%s=%s", name, strempty(v)); \
+ } \
+ \
+ return 1; \
+ }
+
+int bus_set_transient_mode_t(Unit *u, const char *name, mode_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_unsigned(Unit *u, const char *name, unsigned *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_user(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_path(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_string(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_bool(Unit *u, const char *name, bool *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_usec_internal(Unit *u, const char *name, usec_t *p, bool fix_0, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+static inline int bus_set_transient_usec(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) {
+ return bus_set_transient_usec_internal(u, name, p, false, message, flags, error);
+}
+static inline int bus_set_transient_usec_fix_0(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) {
+ return bus_set_transient_usec_internal(u, name, p, true, message, flags, error);
+}
diff --git a/src/core/dbus.c b/src/core/dbus.c
new file mode 100644
index 0000000..255b86e
--- /dev/null
+++ b/src/core/dbus.c
@@ -0,0 +1,1315 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-internal.h"
+#include "bus-util.h"
+#include "dbus-automount.h"
+#include "dbus-cgroup.h"
+#include "dbus-device.h"
+#include "dbus-execute.h"
+#include "dbus-job.h"
+#include "dbus-kill.h"
+#include "dbus-manager.h"
+#include "dbus-mount.h"
+#include "dbus-path.h"
+#include "dbus-scope.h"
+#include "dbus-service.h"
+#include "dbus-slice.h"
+#include "dbus-socket.h"
+#include "dbus-swap.h"
+#include "dbus-target.h"
+#include "dbus-timer.h"
+#include "dbus-unit.h"
+#include "dbus.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "process-util.h"
+#include "selinux-access.h"
+#include "serialize.h"
+#include "service.h"
+#include "special.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "user-util.h"
+
+#define CONNECTIONS_MAX 4096
+
+static void destroy_bus(Manager *m, sd_bus **bus);
+
+int bus_send_pending_reload_message(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (!m->pending_reload_message)
+ return 0;
+
+ /* If we cannot get rid of this message we won't dispatch any D-Bus messages, so that we won't end up wanting
+ * to queue another message. */
+
+ r = sd_bus_send(NULL, m->pending_reload_message, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to send queued message, ignoring: %m");
+
+ m->pending_reload_message = sd_bus_message_unref(m->pending_reload_message);
+
+ return 0;
+}
+
+int bus_forward_agent_released(Manager *m, const char *path) {
+ int r;
+
+ assert(m);
+ assert(path);
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return 0;
+
+ if (!m->system_bus)
+ return 0;
+
+ /* If we are running a system instance we forward the agent message on the system bus, so that the user
+ * instances get notified about this, too */
+
+ r = sd_bus_emit_signal(m->system_bus,
+ "/org/freedesktop/systemd1/agent",
+ "org.freedesktop.systemd1.Agent",
+ "Released",
+ "s", path);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to propagate agent release message: %m");
+
+ return 1;
+}
+
+static int signal_agent_released(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ Manager *m = userdata;
+ const char *cgroup;
+ uid_t sender_uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* only accept org.freedesktop.systemd1.Agent from UID=0 */
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &sender_uid);
+ if (r < 0 || sender_uid != 0)
+ return 0;
+
+ /* parse 'cgroup-empty' notification */
+ r = sd_bus_message_read(message, "s", &cgroup);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ manager_notify_cgroup_empty(m, cgroup);
+ return 0;
+}
+
+static int signal_disconnected(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ sd_bus *bus;
+
+ assert(message);
+ assert(m);
+ assert_se(bus = sd_bus_message_get_bus(message));
+
+ if (bus == m->api_bus)
+ bus_done_api(m);
+ if (bus == m->system_bus)
+ bus_done_system(m);
+
+ if (set_remove(m->private_buses, bus)) {
+ log_debug("Got disconnect on private connection.");
+ destroy_bus(m, &bus);
+ }
+
+ return 0;
+}
+
+static int signal_activation_request(sd_bus_message *message, void *userdata, sd_bus_error *ret_error) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (manager_unit_inactive_or_pending(m, SPECIAL_DBUS_SERVICE) ||
+ manager_unit_inactive_or_pending(m, SPECIAL_DBUS_SOCKET)) {
+ r = sd_bus_error_setf(&error, BUS_ERROR_SHUTTING_DOWN, "Refusing activation, D-Bus is shutting down.");
+ goto failed;
+ }
+
+ r = manager_load_unit(m, name, NULL, &error, &u);
+ if (r < 0)
+ goto failed;
+
+ if (u->refuse_manual_start) {
+ r = sd_bus_error_setf(&error, BUS_ERROR_ONLY_BY_DEPENDENCY, "Operation refused, %s may be requested by dependency only (it is configured to refuse manual start/stop).", u->id);
+ goto failed;
+ }
+
+ r = manager_add_job(m, JOB_START, u, JOB_REPLACE, &error, NULL);
+ if (r < 0)
+ goto failed;
+
+ /* Successfully queued, that's it for us */
+ return 0;
+
+failed:
+ if (!sd_bus_error_is_set(&error))
+ sd_bus_error_set_errno(&error, r);
+
+ log_debug("D-Bus activation failed for %s: %s", name, bus_error_message(&error, r));
+
+ r = sd_bus_message_new_signal(sd_bus_message_get_bus(message), &reply, "/org/freedesktop/systemd1", "org.freedesktop.systemd1.Activator", "ActivationFailure");
+ if (r < 0) {
+ bus_log_create_error(r);
+ return 0;
+ }
+
+ r = sd_bus_message_append(reply, "sss", name, error.name, error.message);
+ if (r < 0) {
+ bus_log_create_error(r);
+ return 0;
+ }
+
+ r = sd_bus_send_to(NULL, reply, "org.freedesktop.DBus", NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to respond with to bus activation request: %m");
+
+ return 0;
+}
+
+#if HAVE_SELINUX
+static int mac_selinux_filter(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *verb, *path;
+ Unit *u = NULL;
+ Job *j;
+ int r;
+
+ assert(message);
+
+ /* Our own method calls are all protected individually with
+ * selinux checks, but the built-in interfaces need to be
+ * protected too. */
+
+ if (sd_bus_message_is_method_call(message, "org.freedesktop.DBus.Properties", "Set"))
+ verb = "reload";
+ else if (sd_bus_message_is_method_call(message, "org.freedesktop.DBus.Introspectable", NULL) ||
+ sd_bus_message_is_method_call(message, "org.freedesktop.DBus.Properties", NULL) ||
+ sd_bus_message_is_method_call(message, "org.freedesktop.DBus.ObjectManager", NULL) ||
+ sd_bus_message_is_method_call(message, "org.freedesktop.DBus.Peer", NULL))
+ verb = "status";
+ else
+ return 0;
+
+ path = sd_bus_message_get_path(message);
+
+ if (object_path_startswith("/org/freedesktop/systemd1", path)) {
+ r = mac_selinux_access_check(message, verb, error);
+ if (r < 0)
+ return r;
+
+ return 0;
+ }
+
+ if (streq_ptr(path, "/org/freedesktop/systemd1/unit/self")) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return 0;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return 0;
+
+ u = manager_get_unit_by_pid(m, pid);
+ } else {
+ r = manager_get_job_from_dbus_path(m, path, &j);
+ if (r >= 0)
+ u = j->unit;
+ else
+ manager_load_unit_from_dbus_path(m, path, NULL, &u);
+ }
+ if (!u)
+ return 0;
+
+ r = mac_selinux_unit_access_check(u, message, verb, error);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+#endif
+
+static int bus_job_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Job *j;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = manager_get_job_from_dbus_path(m, path, &j);
+ if (r < 0)
+ return 0;
+
+ *found = j;
+ return 1;
+}
+
+static int find_unit(Manager *m, sd_bus *bus, const char *path, Unit **unit, sd_bus_error *error) {
+ Unit *u = NULL; /* just to appease gcc, initialization is not really necessary */
+ int r;
+
+ assert(m);
+ assert(bus);
+ assert(path);
+
+ if (streq_ptr(path, "/org/freedesktop/systemd1/unit/self")) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ sd_bus_message *message;
+ pid_t pid;
+
+ message = sd_bus_get_current_message(bus);
+ if (!message)
+ return 0;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+
+ u = manager_get_unit_by_pid(m, pid);
+ if (!u)
+ return 0;
+ } else {
+ r = manager_load_unit_from_dbus_path(m, path, error, &u);
+ if (r < 0)
+ return 0;
+ assert(u);
+ }
+
+ *unit = u;
+ return 1;
+}
+
+static int bus_unit_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ return find_unit(m, bus, path, (Unit**) found, error);
+}
+
+static int bus_unit_interface_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Unit *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = find_unit(m, bus, path, &u, error);
+ if (r <= 0)
+ return r;
+
+ if (!streq_ptr(interface, unit_dbus_interface_from_type(u->type)))
+ return 0;
+
+ *found = u;
+ return 1;
+}
+
+static int bus_unit_cgroup_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Unit *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = find_unit(m, bus, path, &u, error);
+ if (r <= 0)
+ return r;
+
+ if (!streq_ptr(interface, unit_dbus_interface_from_type(u->type)))
+ return 0;
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return 0;
+
+ *found = u;
+ return 1;
+}
+
+static int bus_cgroup_context_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ CGroupContext *c;
+ Unit *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = find_unit(m, bus, path, &u, error);
+ if (r <= 0)
+ return r;
+
+ if (!streq_ptr(interface, unit_dbus_interface_from_type(u->type)))
+ return 0;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ *found = c;
+ return 1;
+}
+
+static int bus_exec_context_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ ExecContext *c;
+ Unit *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = find_unit(m, bus, path, &u, error);
+ if (r <= 0)
+ return r;
+
+ if (!streq_ptr(interface, unit_dbus_interface_from_type(u->type)))
+ return 0;
+
+ c = unit_get_exec_context(u);
+ if (!c)
+ return 0;
+
+ *found = c;
+ return 1;
+}
+
+static int bus_kill_context_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ KillContext *c;
+ Unit *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = find_unit(m, bus, path, &u, error);
+ if (r <= 0)
+ return r;
+
+ if (!streq_ptr(interface, unit_dbus_interface_from_type(u->type)))
+ return 0;
+
+ c = unit_get_kill_context(u);
+ if (!c)
+ return 0;
+
+ *found = c;
+ return 1;
+}
+
+static int bus_job_enumerate(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ unsigned k = 0;
+ Iterator i;
+ Job *j;
+
+ l = new0(char*, hashmap_size(m->jobs)+1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(j, m->jobs, i) {
+ l[k] = job_dbus_path(j);
+ if (!l[k])
+ return -ENOMEM;
+
+ k++;
+ }
+
+ assert(hashmap_size(m->jobs) == k);
+
+ *nodes = TAKE_PTR(l);
+
+ return k;
+}
+
+static int bus_unit_enumerate(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ unsigned k = 0;
+ Iterator i;
+ Unit *u;
+
+ l = new0(char*, hashmap_size(m->units)+1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(u, m->units, i) {
+ l[k] = unit_dbus_path(u);
+ if (!l[k])
+ return -ENOMEM;
+
+ k++;
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return k;
+}
+
+static int bus_setup_api_vtables(Manager *m, sd_bus *bus) {
+ UnitType t;
+ int r;
+
+ assert(m);
+ assert(bus);
+
+#if HAVE_SELINUX
+ r = sd_bus_add_filter(bus, NULL, mac_selinux_filter, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add SELinux access filter: %m");
+#endif
+
+ r = sd_bus_add_object_vtable(bus, NULL, "/org/freedesktop/systemd1", "org.freedesktop.systemd1.Manager", bus_manager_vtable, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register Manager vtable: %m");
+
+ r = sd_bus_add_fallback_vtable(bus, NULL, "/org/freedesktop/systemd1/job", "org.freedesktop.systemd1.Job", bus_job_vtable, bus_job_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register Job vtable: %m");
+
+ r = sd_bus_add_node_enumerator(bus, NULL, "/org/freedesktop/systemd1/job", bus_job_enumerate, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add job enumerator: %m");
+
+ r = sd_bus_add_fallback_vtable(bus, NULL, "/org/freedesktop/systemd1/unit", "org.freedesktop.systemd1.Unit", bus_unit_vtable, bus_unit_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register Unit vtable: %m");
+
+ r = sd_bus_add_node_enumerator(bus, NULL, "/org/freedesktop/systemd1/unit", bus_unit_enumerate, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add job enumerator: %m");
+
+ for (t = 0; t < _UNIT_TYPE_MAX; t++) {
+ const char *interface;
+
+ assert_se(interface = unit_dbus_interface_from_type(t));
+
+ r = sd_bus_add_fallback_vtable(bus, NULL, "/org/freedesktop/systemd1/unit", interface, unit_vtable[t]->bus_vtable, bus_unit_interface_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register type specific vtable for %s: %m", interface);
+
+ if (unit_vtable[t]->cgroup_context_offset > 0) {
+ r = sd_bus_add_fallback_vtable(bus, NULL, "/org/freedesktop/systemd1/unit", interface, bus_unit_cgroup_vtable, bus_unit_cgroup_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register control group unit vtable for %s: %m", interface);
+
+ r = sd_bus_add_fallback_vtable(bus, NULL, "/org/freedesktop/systemd1/unit", interface, bus_cgroup_vtable, bus_cgroup_context_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register control group vtable for %s: %m", interface);
+ }
+
+ if (unit_vtable[t]->exec_context_offset > 0) {
+ r = sd_bus_add_fallback_vtable(bus, NULL, "/org/freedesktop/systemd1/unit", interface, bus_exec_vtable, bus_exec_context_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register execute vtable for %s: %m", interface);
+ }
+
+ if (unit_vtable[t]->kill_context_offset > 0) {
+ r = sd_bus_add_fallback_vtable(bus, NULL, "/org/freedesktop/systemd1/unit", interface, bus_kill_vtable, bus_kill_context_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register kill vtable for %s: %m", interface);
+ }
+ }
+
+ return 0;
+}
+
+static int bus_setup_disconnected_match(Manager *m, sd_bus *bus) {
+ int r;
+
+ assert(m);
+ assert(bus);
+
+ r = sd_bus_match_signal_async(
+ bus,
+ NULL,
+ "org.freedesktop.DBus.Local",
+ "/org/freedesktop/DBus/Local",
+ "org.freedesktop.DBus.Local",
+ "Disconnected",
+ signal_disconnected, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for Disconnected message: %m");
+
+ return 0;
+}
+
+static int bus_on_connection(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_close_ int nfd = -1;
+ Manager *m = userdata;
+ sd_id128_t id;
+ int r;
+
+ assert(s);
+ assert(m);
+
+ nfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (nfd < 0) {
+ log_warning_errno(errno, "Failed to accept private connection, ignoring: %m");
+ return 0;
+ }
+
+ if (set_size(m->private_buses) >= CONNECTIONS_MAX) {
+ log_warning("Too many concurrent connections, refusing");
+ return 0;
+ }
+
+ r = set_ensure_allocated(&m->private_buses, NULL);
+ if (r < 0) {
+ log_oom();
+ return 0;
+ }
+
+ r = sd_bus_new(&bus);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to allocate new private connection bus: %m");
+ return 0;
+ }
+
+ (void) sd_bus_set_description(bus, "private-bus-connection");
+
+ r = sd_bus_set_fd(bus, nfd, nfd);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to set fd on new connection bus: %m");
+ return 0;
+ }
+
+ nfd = -1;
+
+ r = bus_check_peercred(bus);
+ if (r < 0) {
+ log_warning_errno(r, "Incoming private connection from unprivileged client, refusing: %m");
+ return 0;
+ }
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+
+ r = sd_bus_set_server(bus, 1, id);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to enable server support for new connection bus: %m");
+ return 0;
+ }
+
+ r = sd_bus_negotiate_creds(bus, 1,
+ SD_BUS_CREDS_PID|SD_BUS_CREDS_UID|
+ SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS|
+ SD_BUS_CREDS_SELINUX_CONTEXT);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to enable credentials for new connection: %m");
+ return 0;
+ }
+
+ r = sd_bus_set_sender(bus, "org.freedesktop.systemd1");
+ if (r < 0) {
+ log_warning_errno(r, "Failed to set direct connection sender: %m");
+ return 0;
+ }
+
+ r = sd_bus_start(bus);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to start new connection bus: %m");
+ return 0;
+ }
+
+ r = sd_bus_attach_event(bus, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to attach new connection bus to event loop: %m");
+ return 0;
+ }
+
+ r = bus_setup_disconnected_match(m, bus);
+ if (r < 0)
+ return 0;
+
+ r = bus_setup_api_vtables(m, bus);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to set up API vtables on new connection bus: %m");
+ return 0;
+ }
+
+ r = set_put(m->private_buses, bus);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to add new connection bus to set: %m");
+ return 0;
+ }
+
+ bus = NULL;
+
+ log_debug("Accepted new private connection.");
+
+ return 0;
+}
+
+static int manager_dispatch_sync_bus_names(sd_event_source *es, void *userdata) {
+ _cleanup_strv_free_ char **names = NULL;
+ Manager *m = userdata;
+ const char *name;
+ Iterator i;
+ Unit *u;
+ int r;
+
+ assert(es);
+ assert(m);
+ assert(m->sync_bus_names_event_source == es);
+
+ /* First things first, destroy the defer event so that we aren't triggered again */
+ m->sync_bus_names_event_source = sd_event_source_unref(m->sync_bus_names_event_source);
+
+ /* Let's see if there's anything to do still? */
+ if (!m->api_bus)
+ return 0;
+ if (hashmap_isempty(m->watch_bus))
+ return 0;
+
+ /* OK, let's sync up the names. Let's see which names are currently on the bus. */
+ r = sd_bus_list_names(m->api_bus, &names, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get initial list of names: %m");
+
+ /* We have to synchronize the current bus names with the
+ * list of active services. To do this, walk the list of
+ * all units with bus names. */
+ HASHMAP_FOREACH_KEY(u, name, m->watch_bus, i) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ if (!streq_ptr(s->bus_name, name)) {
+ log_unit_warning(u, "Bus name has changed from %s → %s, ignoring.", s->bus_name, name);
+ continue;
+ }
+
+ /* Check if a service's bus name is in the list of currently
+ * active names */
+ if (strv_contains(names, name)) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ const char *unique;
+
+ /* If it is, determine its current owner */
+ r = sd_bus_get_name_creds(m->api_bus, name, SD_BUS_CREDS_UNIQUE_NAME, &creds);
+ if (r < 0) {
+ log_full_errno(r == -ENXIO ? LOG_DEBUG : LOG_ERR, r, "Failed to get bus name owner %s: %m", name);
+ continue;
+ }
+
+ r = sd_bus_creds_get_unique_name(creds, &unique);
+ if (r < 0) {
+ log_full_errno(r == -ENXIO ? LOG_DEBUG : LOG_ERR, r, "Failed to get unique name for %s: %m", name);
+ continue;
+ }
+
+ /* Now, let's compare that to the previous bus owner, and
+ * if it's still the same, all is fine, so just don't
+ * bother the service. Otherwise, the name has apparently
+ * changed, so synthesize a name owner changed signal. */
+
+ if (!streq_ptr(unique, s->bus_name_owner))
+ UNIT_VTABLE(u)->bus_name_owner_change(u, name, s->bus_name_owner, unique);
+ } else {
+ /* So, the name we're watching is not on the bus.
+ * This either means it simply hasn't appeared yet,
+ * or it was lost during the daemon reload.
+ * Check if the service has a stored name owner,
+ * and synthesize a name loss signal in this case. */
+
+ if (s->bus_name_owner)
+ UNIT_VTABLE(u)->bus_name_owner_change(u, name, s->bus_name_owner, NULL);
+ }
+ }
+
+ return 0;
+}
+
+int manager_enqueue_sync_bus_names(Manager *m) {
+ int r;
+
+ assert(m);
+
+ /* Enqueues a request to synchronize the bus names in a later event loop iteration. The callers generally don't
+ * want us to invoke ->bus_name_owner_change() unit calls from their stack frames as this might result in event
+ * dispatching on its own creating loops, hence we simply create a defer event for the event loop and exit. */
+
+ if (m->sync_bus_names_event_source)
+ return 0;
+
+ r = sd_event_add_defer(m->event, &m->sync_bus_names_event_source, manager_dispatch_sync_bus_names, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus name synchronization event: %m");
+
+ r = sd_event_source_set_priority(m->sync_bus_names_event_source, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set event priority: %m");
+
+ r = sd_event_source_set_enabled(m->sync_bus_names_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set even to oneshot: %m");
+
+ (void) sd_event_source_set_description(m->sync_bus_names_event_source, "manager-sync-bus-names");
+ return 0;
+}
+
+static int bus_setup_api(Manager *m, sd_bus *bus) {
+ Iterator i;
+ char *name;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(bus);
+
+ /* Let's make sure we have enough credential bits so that we can make security and selinux decisions */
+ r = sd_bus_negotiate_creds(bus, 1,
+ SD_BUS_CREDS_PID|SD_BUS_CREDS_UID|
+ SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS|
+ SD_BUS_CREDS_SELINUX_CONTEXT);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable credential passing, ignoring: %m");
+
+ r = bus_setup_api_vtables(m, bus);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_KEY(u, name, m->watch_bus, i) {
+ r = unit_install_bus_match(u, bus, name);
+ if (r < 0)
+ log_error_errno(r, "Failed to subscribe to NameOwnerChanged signal for '%s': %m", name);
+ }
+
+ r = sd_bus_match_signal_async(
+ bus,
+ NULL,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.systemd1.Activator",
+ "ActivationRequest",
+ signal_activation_request, NULL, m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to subscribe to activation signal: %m");
+
+ /* Allow replacing of our name, to ease implementation of reexecution, where we keep the old connection open
+ * until after the new connection is set up and the name installed to allow clients to synchronously wait for
+ * reexecution to finish */
+ r = sd_bus_request_name_async(bus, NULL, "org.freedesktop.systemd1", SD_BUS_NAME_REPLACE_EXISTING|SD_BUS_NAME_ALLOW_REPLACEMENT, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ log_debug("Successfully connected to API bus.");
+
+ return 0;
+}
+
+int bus_init_api(Manager *m) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ if (m->api_bus)
+ return 0;
+
+ /* The API and system bus is the same if we are running in system mode */
+ if (MANAGER_IS_SYSTEM(m) && m->system_bus)
+ bus = sd_bus_ref(m->system_bus);
+ else {
+ if (MANAGER_IS_SYSTEM(m))
+ r = sd_bus_open_system_with_description(&bus, "bus-api-system");
+ else
+ r = sd_bus_open_user_with_description(&bus, "bus-api-user");
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to API bus: %m");
+
+ r = sd_bus_attach_event(bus, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach API bus to event loop: %m");
+
+ r = bus_setup_disconnected_match(m, bus);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_setup_api(m, bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up API bus: %m");
+
+ m->api_bus = TAKE_PTR(bus);
+
+ r = manager_enqueue_sync_bus_names(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int bus_setup_system(Manager *m, sd_bus *bus) {
+ int r;
+
+ assert(m);
+ assert(bus);
+
+ /* if we are a user instance we get the Released message via the system bus */
+ if (MANAGER_IS_USER(m)) {
+ r = sd_bus_match_signal_async(
+ bus,
+ NULL,
+ NULL,
+ "/org/freedesktop/systemd1/agent",
+ "org.freedesktop.systemd1.Agent",
+ "Released",
+ signal_agent_released, NULL, m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to request Released match on system bus: %m");
+ }
+
+ log_debug("Successfully connected to system bus.");
+ return 0;
+}
+
+int bus_init_system(Manager *m) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ if (m->system_bus)
+ return 0;
+
+ /* The API and system bus is the same if we are running in system mode */
+ if (MANAGER_IS_SYSTEM(m) && m->api_bus)
+ bus = sd_bus_ref(m->api_bus);
+ else {
+ r = sd_bus_open_system_with_description(&bus, "bus-system");
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_attach_event(bus, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach system bus to event loop: %m");
+
+ r = bus_setup_disconnected_match(m, bus);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_setup_system(m, bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up system bus: %m");
+
+ m->system_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_init_private(Manager *m) {
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union sa = {};
+ sd_event_source *s;
+ int r, salen;
+
+ assert(m);
+
+ if (m->private_listen_fd >= 0)
+ return 0;
+
+ if (MANAGER_IS_SYSTEM(m)) {
+
+ /* We want the private bus only when running as init */
+ if (getpid_cached() != 1)
+ return 0;
+
+ salen = sockaddr_un_set_path(&sa.un, "/run/systemd/private");
+ } else {
+ const char *e, *joined;
+
+ e = secure_getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
+ "XDG_RUNTIME_DIR is not set, refusing.");
+
+ joined = strjoina(e, "/systemd/private");
+ salen = sockaddr_un_set_path(&sa.un, joined);
+ }
+ if (salen < 0)
+ return log_error_errno(salen, "Can't set path for AF_UNIX socket to bind to: %m");
+
+ (void) mkdir_parents_label(sa.un.sun_path, 0755);
+ (void) sockaddr_un_unlink(&sa.un);
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate private socket: %m");
+
+ r = bind(fd, &sa.sa, salen);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to bind private socket: %m");
+
+ r = listen(fd, SOMAXCONN);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to make private socket listening: %m");
+
+ /* Generate an inotify event in case somebody waits for this socket to appear using inotify() */
+ (void) touch(sa.un.sun_path);
+
+ r = sd_event_add_io(m->event, &s, fd, EPOLLIN, bus_on_connection, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event source: %m");
+
+ (void) sd_event_source_set_description(s, "bus-connection");
+
+ m->private_listen_fd = TAKE_FD(fd);
+ m->private_listen_event_source = s;
+
+ log_debug("Successfully created private D-Bus server.");
+
+ return 0;
+}
+
+static void destroy_bus(Manager *m, sd_bus **bus) {
+ Iterator i;
+ Unit *u;
+ Job *j;
+
+ assert(m);
+ assert(bus);
+
+ if (!*bus)
+ return;
+
+ /* Make sure all bus slots watching names are released. */
+ HASHMAP_FOREACH(u, m->watch_bus, i) {
+ if (!u->match_bus_slot)
+ continue;
+
+ if (sd_bus_slot_get_bus(u->match_bus_slot) != *bus)
+ continue;
+
+ u->match_bus_slot = sd_bus_slot_unref(u->match_bus_slot);
+ }
+
+ /* Get rid of tracked clients on this bus */
+ if (m->subscribed && sd_bus_track_get_bus(m->subscribed) == *bus)
+ m->subscribed = sd_bus_track_unref(m->subscribed);
+
+ HASHMAP_FOREACH(j, m->jobs, i)
+ if (j->bus_track && sd_bus_track_get_bus(j->bus_track) == *bus)
+ j->bus_track = sd_bus_track_unref(j->bus_track);
+
+ HASHMAP_FOREACH(u, m->units, i)
+ if (u->bus_track && sd_bus_track_get_bus(u->bus_track) == *bus)
+ u->bus_track = sd_bus_track_unref(u->bus_track);
+
+ /* Get rid of queued message on this bus */
+ if (m->pending_reload_message && sd_bus_message_get_bus(m->pending_reload_message) == *bus)
+ m->pending_reload_message = sd_bus_message_unref(m->pending_reload_message);
+
+ /* Possibly flush unwritten data, but only if we are
+ * unprivileged, since we don't want to sync here */
+ if (!MANAGER_IS_SYSTEM(m))
+ sd_bus_flush(*bus);
+
+ /* And destroy the object */
+ *bus = sd_bus_close_unref(*bus);
+}
+
+void bus_done_api(Manager *m) {
+ destroy_bus(m, &m->api_bus);
+}
+
+void bus_done_system(Manager *m) {
+ destroy_bus(m, &m->system_bus);
+}
+
+void bus_done_private(Manager *m) {
+ sd_bus *b;
+
+ assert(m);
+
+ while ((b = set_steal_first(m->private_buses)))
+ destroy_bus(m, &b);
+
+ m->private_buses = set_free(m->private_buses);
+
+ m->private_listen_event_source = sd_event_source_unref(m->private_listen_event_source);
+ m->private_listen_fd = safe_close(m->private_listen_fd);
+}
+
+void bus_done(Manager *m) {
+ assert(m);
+
+ bus_done_api(m);
+ bus_done_system(m);
+ bus_done_private(m);
+
+ assert(!m->subscribed);
+
+ m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+}
+
+int bus_fdset_add_all(Manager *m, FDSet *fds) {
+ Iterator i;
+ sd_bus *b;
+ int fd;
+
+ assert(m);
+ assert(fds);
+
+ /* When we are about to reexecute we add all D-Bus fds to the
+ * set to pass over to the newly executed systemd. They won't
+ * be used there however, except thatt they are closed at the
+ * very end of deserialization, those making it possible for
+ * clients to synchronously wait for systemd to reexec by
+ * simply waiting for disconnection */
+
+ if (m->api_bus) {
+ fd = sd_bus_get_fd(m->api_bus);
+ if (fd >= 0) {
+ fd = fdset_put_dup(fds, fd);
+ if (fd < 0)
+ return fd;
+ }
+ }
+
+ SET_FOREACH(b, m->private_buses, i) {
+ fd = sd_bus_get_fd(b);
+ if (fd >= 0) {
+ fd = fdset_put_dup(fds, fd);
+ if (fd < 0)
+ return fd;
+ }
+ }
+
+ /* We don't offer any APIs on the system bus (well, unless it
+ * is the same as the API bus) hence we don't bother with it
+ * here */
+
+ return 0;
+}
+
+int bus_foreach_bus(
+ Manager *m,
+ sd_bus_track *subscribed2,
+ int (*send_message)(sd_bus *bus, void *userdata),
+ void *userdata) {
+
+ Iterator i;
+ sd_bus *b;
+ int r, ret = 0;
+
+ /* Send to all direct buses, unconditionally */
+ SET_FOREACH(b, m->private_buses, i) {
+
+ /* Don't bother with enqueing these messages to clients that haven't started yet */
+ if (sd_bus_is_ready(b) <= 0)
+ continue;
+
+ r = send_message(b, userdata);
+ if (r < 0)
+ ret = r;
+ }
+
+ /* Send to API bus, but only if somebody is subscribed */
+ if (m->api_bus &&
+ (sd_bus_track_count(m->subscribed) > 0 ||
+ sd_bus_track_count(subscribed2) > 0)) {
+ r = send_message(m->api_bus, userdata);
+ if (r < 0)
+ ret = r;
+ }
+
+ return ret;
+}
+
+void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix) {
+ const char *n;
+
+ assert(f);
+ assert(prefix);
+
+ for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t)) {
+ int c, j;
+
+ c = sd_bus_track_count_name(t, n);
+ for (j = 0; j < c; j++)
+ (void) serialize_item(f, prefix, n);
+ }
+}
+
+int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l) {
+ int r = 0;
+
+ assert(m);
+ assert(t);
+
+ if (strv_isempty(l))
+ return 0;
+
+ if (!m->api_bus)
+ return 0;
+
+ if (!*t) {
+ r = sd_bus_track_new(m->api_bus, t, NULL, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_track_set_recursive(*t, recursive);
+ if (r < 0)
+ return r;
+
+ return bus_track_add_name_many(*t, l);
+}
+
+int bus_verify_manage_units_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
+ return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.manage-units", NULL, false, UID_INVALID, &m->polkit_registry, error);
+}
+
+int bus_verify_manage_unit_files_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
+ return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.manage-unit-files", NULL, false, UID_INVALID, &m->polkit_registry, error);
+}
+
+int bus_verify_reload_daemon_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
+ return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.reload-daemon", NULL, false, UID_INVALID, &m->polkit_registry, error);
+}
+
+int bus_verify_set_environment_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
+ return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.set-environment", NULL, false, UID_INVALID, &m->polkit_registry, error);
+}
+
+uint64_t manager_bus_n_queued_write(Manager *m) {
+ uint64_t c = 0;
+ Iterator i;
+ sd_bus *b;
+ int r;
+
+ /* Returns the total number of messages queued for writing on all our direct and API busses. */
+
+ SET_FOREACH(b, m->private_buses, i) {
+ uint64_t k;
+
+ r = sd_bus_get_n_queued_write(b, &k);
+ if (r < 0)
+ log_debug_errno(r, "Failed to query queued messages for private bus: %m");
+ else
+ c += k;
+ }
+
+ if (m->api_bus) {
+ uint64_t k;
+
+ r = sd_bus_get_n_queued_write(m->api_bus, &k);
+ if (r < 0)
+ log_debug_errno(r, "Failed to query queued messages for API bus: %m");
+ else
+ c += k;
+ }
+
+ return c;
+}
+
+static void vtable_dump_bus_properties(FILE *f, const sd_bus_vtable *table) {
+ const sd_bus_vtable *i;
+
+ for (i = table; i->type != _SD_BUS_VTABLE_END; i++) {
+ if (!IN_SET(i->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY) ||
+ (i->flags & (SD_BUS_VTABLE_DEPRECATED | SD_BUS_VTABLE_HIDDEN)) != 0)
+ continue;
+
+ fprintf(f, "%s\n", i->x.property.member);
+ }
+}
+
+void dump_bus_properties(FILE *f) {
+ assert(f);
+
+ vtable_dump_bus_properties(f, bus_automount_vtable);
+ vtable_dump_bus_properties(f, bus_cgroup_vtable);
+ vtable_dump_bus_properties(f, bus_device_vtable);
+ vtable_dump_bus_properties(f, bus_exec_vtable);
+ vtable_dump_bus_properties(f, bus_job_vtable);
+ vtable_dump_bus_properties(f, bus_kill_vtable);
+ vtable_dump_bus_properties(f, bus_manager_vtable);
+ vtable_dump_bus_properties(f, bus_mount_vtable);
+ vtable_dump_bus_properties(f, bus_path_vtable);
+ vtable_dump_bus_properties(f, bus_scope_vtable);
+ vtable_dump_bus_properties(f, bus_service_vtable);
+ vtable_dump_bus_properties(f, bus_slice_vtable);
+ vtable_dump_bus_properties(f, bus_socket_vtable);
+ vtable_dump_bus_properties(f, bus_swap_vtable);
+ vtable_dump_bus_properties(f, bus_target_vtable);
+ vtable_dump_bus_properties(f, bus_timer_vtable);
+ vtable_dump_bus_properties(f, bus_unit_vtable);
+ vtable_dump_bus_properties(f, bus_unit_cgroup_vtable);
+}
diff --git a/src/core/dbus.h b/src/core/dbus.h
new file mode 100644
index 0000000..f1c0fa8
--- /dev/null
+++ b/src/core/dbus.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "manager.h"
+
+int bus_send_pending_reload_message(Manager *m);
+
+int bus_init_private(Manager *m);
+int bus_init_api(Manager *m);
+int bus_init_system(Manager *m);
+
+void bus_done_private(Manager *m);
+void bus_done_api(Manager *m);
+void bus_done_system(Manager *m);
+void bus_done(Manager *m);
+
+int bus_fdset_add_all(Manager *m, FDSet *fds);
+
+void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix);
+int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l);
+
+int manager_enqueue_sync_bus_names(Manager *m);
+
+int bus_foreach_bus(Manager *m, sd_bus_track *subscribed2, int (*send_message)(sd_bus *bus, void *userdata), void *userdata);
+
+int bus_verify_manage_units_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
+int bus_verify_manage_unit_files_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
+int bus_verify_reload_daemon_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
+int bus_verify_set_environment_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
+
+int bus_forward_agent_released(Manager *m, const char *path);
+
+uint64_t manager_bus_n_queued_write(Manager *m);
+
+void dump_bus_properties(FILE *f);
diff --git a/src/core/device.c b/src/core/device.c
new file mode 100644
index 0000000..b006add
--- /dev/null
+++ b/src/core/device.c
@@ -0,0 +1,1093 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/epoll.h>
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "dbus-device.h"
+#include "dbus-unit.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "device.h"
+#include "log.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "serialize.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "swap.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static const UnitActiveState state_translation_table[_DEVICE_STATE_MAX] = {
+ [DEVICE_DEAD] = UNIT_INACTIVE,
+ [DEVICE_TENTATIVE] = UNIT_ACTIVATING,
+ [DEVICE_PLUGGED] = UNIT_ACTIVE,
+};
+
+static int device_dispatch_io(sd_device_monitor *monitor, sd_device *dev, void *userdata);
+static void device_update_found_one(Device *d, DeviceFound found, DeviceFound mask);
+
+static void device_unset_sysfs(Device *d) {
+ Hashmap *devices;
+ Device *first;
+
+ assert(d);
+
+ if (!d->sysfs)
+ return;
+
+ /* Remove this unit from the chain of devices which share the
+ * same sysfs path. */
+ devices = UNIT(d)->manager->devices_by_sysfs;
+ first = hashmap_get(devices, d->sysfs);
+ LIST_REMOVE(same_sysfs, first, d);
+
+ if (first)
+ hashmap_remove_and_replace(devices, d->sysfs, first->sysfs, first);
+ else
+ hashmap_remove(devices, d->sysfs);
+
+ d->sysfs = mfree(d->sysfs);
+}
+
+static int device_set_sysfs(Device *d, const char *sysfs) {
+ _cleanup_free_ char *copy = NULL;
+ Device *first;
+ int r;
+
+ assert(d);
+
+ if (streq_ptr(d->sysfs, sysfs))
+ return 0;
+
+ r = hashmap_ensure_allocated(&UNIT(d)->manager->devices_by_sysfs, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ copy = strdup(sysfs);
+ if (!copy)
+ return -ENOMEM;
+
+ device_unset_sysfs(d);
+
+ first = hashmap_get(UNIT(d)->manager->devices_by_sysfs, sysfs);
+ LIST_PREPEND(same_sysfs, first, d);
+
+ r = hashmap_replace(UNIT(d)->manager->devices_by_sysfs, copy, first);
+ if (r < 0) {
+ LIST_REMOVE(same_sysfs, first, d);
+ return r;
+ }
+
+ d->sysfs = TAKE_PTR(copy);
+ return 0;
+}
+
+static void device_init(Unit *u) {
+ Device *d = DEVICE(u);
+
+ assert(d);
+ assert(UNIT(d)->load_state == UNIT_STUB);
+
+ /* In contrast to all other unit types we timeout jobs waiting
+ * for devices by default. This is because they otherwise wait
+ * indefinitely for plugged in devices, something which cannot
+ * happen for the other units since their operations time out
+ * anyway. */
+ u->job_running_timeout = u->manager->default_timeout_start_usec;
+
+ u->ignore_on_isolate = true;
+
+ d->deserialized_state = _DEVICE_STATE_INVALID;
+}
+
+static void device_done(Unit *u) {
+ Device *d = DEVICE(u);
+
+ assert(d);
+
+ device_unset_sysfs(d);
+ d->wants_property = strv_free(d->wants_property);
+}
+
+static int device_load(Unit *u) {
+ int r;
+
+ r = unit_load_fragment_and_dropin_optional(u);
+ if (r < 0)
+ return r;
+
+ if (!u->description) {
+ /* Generate a description based on the path, to be used until the
+ device is initialized properly */
+ r = unit_name_to_path(u->id, &u->description);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to unescape name: %m");
+ }
+
+ return 0;
+}
+
+static void device_set_state(Device *d, DeviceState state) {
+ DeviceState old_state;
+ assert(d);
+
+ if (d->state != state)
+ bus_unit_send_pending_change_signal(UNIT(d), false);
+
+ old_state = d->state;
+ d->state = state;
+
+ if (state == DEVICE_DEAD)
+ device_unset_sysfs(d);
+
+ if (state != old_state)
+ log_unit_debug(UNIT(d), "Changed %s -> %s", device_state_to_string(old_state), device_state_to_string(state));
+
+ unit_notify(UNIT(d), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static int device_coldplug(Unit *u) {
+ Device *d = DEVICE(u);
+
+ assert(d);
+ assert(d->state == DEVICE_DEAD);
+
+ /* First, let's put the deserialized state and found mask into effect, if we have it. */
+
+ if (d->deserialized_state < 0 ||
+ (d->deserialized_state == d->state &&
+ d->deserialized_found == d->found))
+ return 0;
+
+ d->found = d->deserialized_found;
+ device_set_state(d, d->deserialized_state);
+ return 0;
+}
+
+static void device_catchup(Unit *u) {
+ Device *d = DEVICE(u);
+
+ assert(d);
+
+ /* Second, let's update the state with the enumerated state if it's different */
+ if (d->enumerated_found == d->found)
+ return;
+
+ device_update_found_one(d, d->enumerated_found, DEVICE_FOUND_MASK);
+}
+
+static const struct {
+ DeviceFound flag;
+ const char *name;
+} device_found_map[] = {
+ { DEVICE_FOUND_UDEV, "found-udev" },
+ { DEVICE_FOUND_MOUNT, "found-mount" },
+ { DEVICE_FOUND_SWAP, "found-swap" },
+};
+
+static int device_found_to_string_many(DeviceFound flags, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ unsigned i;
+
+ assert(ret);
+
+ for (i = 0; i < ELEMENTSOF(device_found_map); i++) {
+ if (!FLAGS_SET(flags, device_found_map[i].flag))
+ continue;
+
+ if (!strextend_with_separator(&s, ",", device_found_map[i].name, NULL))
+ return -ENOMEM;
+ }
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+static int device_found_from_string_many(const char *name, DeviceFound *ret) {
+ DeviceFound flags = 0;
+ int r;
+
+ assert(ret);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ DeviceFound f = 0;
+ unsigned i;
+
+ r = extract_first_word(&name, &word, ",", 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ for (i = 0; i < ELEMENTSOF(device_found_map); i++)
+ if (streq(word, device_found_map[i].name)) {
+ f = device_found_map[i].flag;
+ break;
+ }
+
+ if (f == 0)
+ return -EINVAL;
+
+ flags |= f;
+ }
+
+ *ret = flags;
+ return 0;
+}
+
+static int device_serialize(Unit *u, FILE *f, FDSet *fds) {
+ _cleanup_free_ char *s = NULL;
+ Device *d = DEVICE(u);
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", device_state_to_string(d->state));
+
+ if (device_found_to_string_many(d->found, &s) >= 0)
+ (void) serialize_item(f, "found", s);
+
+ return 0;
+}
+
+static int device_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Device *d = DEVICE(u);
+ int r;
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ DeviceState state;
+
+ state = device_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value, ignoring: %s", value);
+ else
+ d->deserialized_state = state;
+
+ } else if (streq(key, "found")) {
+ r = device_found_from_string_many(value, &d->deserialized_found);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to parse found value '%s', ignoring: %m", value);
+
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+static void device_dump(Unit *u, FILE *f, const char *prefix) {
+ Device *d = DEVICE(u);
+ _cleanup_free_ char *s = NULL;
+
+ assert(d);
+
+ (void) device_found_to_string_many(d->found, &s);
+
+ fprintf(f,
+ "%sDevice State: %s\n"
+ "%sSysfs Path: %s\n"
+ "%sFound: %s\n",
+ prefix, device_state_to_string(d->state),
+ prefix, strna(d->sysfs),
+ prefix, strna(s));
+
+ if (!strv_isempty(d->wants_property)) {
+ char **i;
+
+ STRV_FOREACH(i, d->wants_property)
+ fprintf(f, "%sudev SYSTEMD_WANTS: %s\n",
+ prefix, *i);
+ }
+}
+
+_pure_ static UnitActiveState device_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[DEVICE(u)->state];
+}
+
+_pure_ static const char *device_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return device_state_to_string(DEVICE(u)->state);
+}
+
+static int device_update_description(Unit *u, sd_device *dev, const char *path) {
+ _cleanup_free_ char *j = NULL;
+ const char *model, *label, *desc;
+ int r;
+
+ assert(u);
+ assert(path);
+
+ desc = path;
+
+ if (dev &&
+ (sd_device_get_property_value(dev, "ID_MODEL_FROM_DATABASE", &model) >= 0 ||
+ sd_device_get_property_value(dev, "ID_MODEL", &model) >= 0)) {
+ desc = model;
+
+ /* Try to concatenate the device model string with a label, if there is one */
+ if (sd_device_get_property_value(dev, "ID_FS_LABEL", &label) >= 0 ||
+ sd_device_get_property_value(dev, "ID_PART_ENTRY_NAME", &label) >= 0 ||
+ sd_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER", &label) >= 0) {
+
+ desc = j = strjoin(model, " ", label);
+ if (!j)
+ return log_oom();
+ }
+ }
+
+ r = unit_set_description(u, desc);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to set device description: %m");
+
+ return 0;
+}
+
+static int device_add_udev_wants(Unit *u, sd_device *dev) {
+ _cleanup_strv_free_ char **added = NULL;
+ const char *wants, *property;
+ Device *d = DEVICE(u);
+ int r;
+
+ assert(d);
+ assert(dev);
+
+ property = MANAGER_IS_USER(u->manager) ? "SYSTEMD_USER_WANTS" : "SYSTEMD_WANTS";
+
+ r = sd_device_get_property_value(dev, property, &wants);
+ if (r < 0)
+ return 0;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&wants, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to parse property %s with value %s: %m", property, wants);
+
+ if (unit_name_is_valid(word, UNIT_NAME_TEMPLATE) && d->sysfs) {
+ _cleanup_free_ char *escaped = NULL;
+
+ /* If the unit name is specified as template, then automatically fill in the sysfs path of the
+ * device as instance name, properly escaped. */
+
+ r = unit_name_path_escape(d->sysfs, &escaped);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to escape %s: %m", d->sysfs);
+
+ r = unit_name_replace_instance(word, escaped, &k);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to build %s instance of template %s: %m", escaped, word);
+ } else {
+ /* If this is not a template, then let's mangle it so, that it becomes a valid unit name. */
+
+ r = unit_name_mangle(word, UNIT_NAME_MANGLE_WARN, &k);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to mangle unit name \"%s\": %m", word);
+ }
+
+ r = unit_add_dependency_by_name(u, UNIT_WANTS, k, true, UNIT_DEPENDENCY_UDEV);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to add Wants= dependency: %m");
+
+ r = strv_push(&added, k);
+ if (r < 0)
+ return log_oom();
+
+ k = NULL;
+ }
+
+ if (d->state != DEVICE_DEAD) {
+ char **i;
+
+ /* So here's a special hack, to compensate for the fact that the udev database's reload cycles are not
+ * synchronized with our own reload cycles: when we detect that the SYSTEMD_WANTS property of a device
+ * changes while the device unit is already up, let's manually trigger any new units listed in it not
+ * seen before. This typically appens during the boot-time switch root transition, as udev devices
+ * will generally already be up in the initrd, but SYSTEMD_WANTS properties get then added through udev
+ * rules only available on the host system, and thus only when the initial udev coldplug trigger runs.
+ *
+ * We do this only if the device has been up already when we parse this, as otherwise the usual
+ * dependency logic that is run from the dead → plugged transition will trigger these deps. */
+
+ STRV_FOREACH(i, added) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ if (strv_contains(d->wants_property, *i)) /* Was this unit already listed before? */
+ continue;
+
+ r = manager_add_job_by_name(u->manager, JOB_START, *i, JOB_FAIL, &error, NULL);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to enqueue SYSTEMD_WANTS= job, ignoring: %s", bus_error_message(&error, r));
+ }
+ }
+
+ strv_free(d->wants_property);
+ d->wants_property = TAKE_PTR(added);
+
+ return 0;
+}
+
+static bool device_is_bound_by_mounts(Device *d, sd_device *dev) {
+ const char *bound_by;
+ int r;
+
+ assert(d);
+ assert(dev);
+
+ if (sd_device_get_property_value(dev, "SYSTEMD_MOUNT_DEVICE_BOUND", &bound_by) >= 0) {
+ r = parse_boolean(bound_by);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to parse SYSTEMD_MOUNT_DEVICE_BOUND='%s' udev property, ignoring: %m", bound_by);
+
+ d->bind_mounts = r > 0;
+ } else
+ d->bind_mounts = false;
+
+ return d->bind_mounts;
+}
+
+static void device_upgrade_mount_deps(Unit *u) {
+ Unit *other;
+ Iterator i;
+ void *v;
+ int r;
+
+ /* Let's upgrade Requires= to BindsTo= on us. (Used when SYSTEMD_MOUNT_DEVICE_BOUND is set) */
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REQUIRED_BY], i) {
+ if (other->type != UNIT_MOUNT)
+ continue;
+
+ r = unit_add_dependency(other, UNIT_BINDS_TO, u, true, UNIT_DEPENDENCY_UDEV);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to add BindsTo= dependency between device and mount unit, ignoring: %m");
+ }
+}
+
+static int device_setup_unit(Manager *m, sd_device *dev, const char *path, bool main) {
+ _cleanup_free_ char *e = NULL;
+ const char *sysfs = NULL;
+ Unit *u = NULL;
+ bool delete;
+ int r;
+
+ assert(m);
+ assert(path);
+
+ if (dev) {
+ r = sd_device_get_syspath(dev, &sysfs);
+ if (r < 0) {
+ log_device_debug_errno(dev, r, "Couldn't get syspath from device, ignoring: %m");
+ return 0;
+ }
+ }
+
+ r = unit_name_from_path(path, ".device", &e);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to generate unit name from device path: %m");
+
+ u = manager_get_unit(m, e);
+ if (u) {
+ /* The device unit can still be present even if the device was unplugged: a mount unit can reference it
+ * hence preventing the GC to have garbaged it. That's desired since the device unit may have a
+ * dependency on the mount unit which was added during the loading of the later. When the device is
+ * plugged the sysfs might not be initialized yet, as we serialize the device's state but do not
+ * serialize the sysfs path across reloads/reexecs. Hence, when coming back from a reload/restart we
+ * might have the state valid, but not the sysfs path. Hence, let's filter out conflicting devices, but
+ * let's accept devices in any state with no sysfs path set. */
+
+ if (DEVICE(u)->state == DEVICE_PLUGGED &&
+ DEVICE(u)->sysfs &&
+ sysfs &&
+ !path_equal(DEVICE(u)->sysfs, sysfs)) {
+ log_unit_debug(u, "Device %s appeared twice with different sysfs paths %s and %s, ignoring the latter.",
+ e, DEVICE(u)->sysfs, sysfs);
+ return -EEXIST;
+ }
+
+ delete = false;
+
+ /* Let's remove all dependencies generated due to udev properties. We'll readd whatever is configured
+ * now below. */
+ unit_remove_dependencies(u, UNIT_DEPENDENCY_UDEV);
+ } else {
+ delete = true;
+
+ r = unit_new_for_name(m, sizeof(Device), e, &u);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to allocate device unit %s: %m", e);
+ goto fail;
+ }
+
+ unit_add_to_load_queue(u);
+ }
+
+ /* If this was created via some dependency and has not actually been seen yet ->sysfs will not be
+ * initialized. Hence initialize it if necessary. */
+ if (sysfs) {
+ r = device_set_sysfs(DEVICE(u), sysfs);
+ if (r < 0) {
+ log_unit_error_errno(u, r, "Failed to set sysfs path %s: %m", sysfs);
+ goto fail;
+ }
+
+ /* The additional systemd udev properties we only interpret for the main object */
+ if (main)
+ (void) device_add_udev_wants(u, dev);
+ }
+
+ (void) device_update_description(u, dev, path);
+
+ /* So the user wants the mount units to be bound to the device but a mount unit might has been seen by systemd
+ * before the device appears on its radar. In this case the device unit is partially initialized and includes
+ * the deps on the mount unit but at that time the "bind mounts" flag wasn't not present. Fix this up now. */
+ if (dev && device_is_bound_by_mounts(DEVICE(u), dev))
+ device_upgrade_mount_deps(u);
+
+ /* Note that this won't dispatch the load queue, the caller has to do that if needed and appropriate */
+ unit_add_to_dbus_queue(u);
+
+ return 0;
+
+fail:
+ if (delete)
+ unit_free(u);
+
+ return r;
+}
+
+static int device_process_new(Manager *m, sd_device *dev) {
+ const char *sysfs, *dn, *alias;
+ dev_t devnum;
+ int r;
+
+ assert(m);
+
+ if (sd_device_get_syspath(dev, &sysfs) < 0)
+ return 0;
+
+ /* Add the main unit named after the sysfs path */
+ r = device_setup_unit(m, dev, sysfs, true);
+ if (r < 0)
+ return r;
+
+ /* Add an additional unit for the device node */
+ if (sd_device_get_devname(dev, &dn) >= 0)
+ (void) device_setup_unit(m, dev, dn, false);
+
+ /* Add additional units for all symlinks */
+ if (sd_device_get_devnum(dev, &devnum) >= 0) {
+ const char *p;
+
+ FOREACH_DEVICE_DEVLINK(dev, p) {
+ struct stat st;
+
+ if (PATH_STARTSWITH_SET(p, "/dev/block/", "/dev/char/"))
+ continue;
+
+ /* Verify that the symlink in the FS actually belongs
+ * to this device. This is useful to deal with
+ * conflicting devices, e.g. when two disks want the
+ * same /dev/disk/by-label/xxx link because they have
+ * the same label. We want to make sure that the same
+ * device that won the symlink wins in systemd, so we
+ * check the device node major/minor */
+ if (stat(p, &st) >= 0 &&
+ ((!S_ISBLK(st.st_mode) && !S_ISCHR(st.st_mode)) ||
+ st.st_rdev != devnum))
+ continue;
+
+ (void) device_setup_unit(m, dev, p, false);
+ }
+ }
+
+ /* Add additional units for all explicitly configured aliases */
+ if (sd_device_get_property_value(dev, "SYSTEMD_ALIAS", &alias) < 0)
+ return 0;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&alias, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_device_warning_errno(dev, r, "Failed to parse SYSTEMD_ALIAS property: %m");
+
+ if (!path_is_absolute(word))
+ log_device_warning(dev, "SYSTEMD_ALIAS is not an absolute path, ignoring: %s", word);
+ else if (!path_is_normalized(word))
+ log_device_warning(dev, "SYSTEMD_ALIAS is not a normalized path, ignoring: %s", word);
+ else
+ (void) device_setup_unit(m, dev, word, false);
+ }
+
+ return 0;
+}
+
+static void device_found_changed(Device *d, DeviceFound previous, DeviceFound now) {
+ assert(d);
+
+ /* Didn't exist before, but does now? if so, generate a new invocation ID for it */
+ if (previous == DEVICE_NOT_FOUND && now != DEVICE_NOT_FOUND)
+ (void) unit_acquire_invocation_id(UNIT(d));
+
+ if (FLAGS_SET(now, DEVICE_FOUND_UDEV))
+ /* When the device is known to udev we consider it plugged. */
+ device_set_state(d, DEVICE_PLUGGED);
+ else if (now != DEVICE_NOT_FOUND && !FLAGS_SET(previous, DEVICE_FOUND_UDEV))
+ /* If the device has not been seen by udev yet, but is now referenced by the kernel, then we assume the
+ * kernel knows it now, and udev might soon too. */
+ device_set_state(d, DEVICE_TENTATIVE);
+ else
+ /* If nobody sees the device, or if the device was previously seen by udev and now is only referenced
+ * from the kernel, then we consider the device is gone, the kernel just hasn't noticed it yet. */
+ device_set_state(d, DEVICE_DEAD);
+}
+
+static void device_update_found_one(Device *d, DeviceFound found, DeviceFound mask) {
+ assert(d);
+
+ if (MANAGER_IS_RUNNING(UNIT(d)->manager)) {
+ DeviceFound n, previous;
+
+ /* When we are already running, then apply the new mask right-away, and trigger state changes
+ * right-away */
+
+ n = (d->found & ~mask) | (found & mask);
+ if (n == d->found)
+ return;
+
+ previous = d->found;
+ d->found = n;
+
+ device_found_changed(d, previous, n);
+ } else
+ /* We aren't running yet, let's apply the new mask to the shadow variable instead, which we'll apply as
+ * soon as we catch-up with the state. */
+ d->enumerated_found = (d->enumerated_found & ~mask) | (found & mask);
+}
+
+static void device_update_found_by_sysfs(Manager *m, const char *sysfs, DeviceFound found, DeviceFound mask) {
+ Device *d, *l, *n;
+
+ assert(m);
+ assert(sysfs);
+
+ if (mask == 0)
+ return;
+
+ l = hashmap_get(m->devices_by_sysfs, sysfs);
+ LIST_FOREACH_SAFE(same_sysfs, d, n, l)
+ device_update_found_one(d, found, mask);
+}
+
+static int device_update_found_by_name(Manager *m, const char *path, DeviceFound found, DeviceFound mask) {
+ _cleanup_free_ char *e = NULL;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(path);
+
+ if (mask == 0)
+ return 0;
+
+ r = unit_name_from_path(path, ".device", &e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name from device path: %m");
+
+ u = manager_get_unit(m, e);
+ if (!u)
+ return 0;
+
+ device_update_found_one(DEVICE(u), found, mask);
+ return 0;
+}
+
+static bool device_is_ready(sd_device *dev) {
+ const char *ready;
+
+ assert(dev);
+
+ if (sd_device_get_property_value(dev, "SYSTEMD_READY", &ready) < 0)
+ return true;
+
+ return parse_boolean(ready) != 0;
+}
+
+static Unit *device_following(Unit *u) {
+ Device *d = DEVICE(u);
+ Device *other, *first = NULL;
+
+ assert(d);
+
+ if (startswith(u->id, "sys-"))
+ return NULL;
+
+ /* Make everybody follow the unit that's named after the sysfs path */
+ LIST_FOREACH_AFTER(same_sysfs, other, d)
+ if (startswith(UNIT(other)->id, "sys-"))
+ return UNIT(other);
+
+ LIST_FOREACH_BEFORE(same_sysfs, other, d) {
+ if (startswith(UNIT(other)->id, "sys-"))
+ return UNIT(other);
+
+ first = other;
+ }
+
+ return UNIT(first);
+}
+
+static int device_following_set(Unit *u, Set **_set) {
+ Device *d = DEVICE(u), *other;
+ _cleanup_set_free_ Set *set = NULL;
+ int r;
+
+ assert(d);
+ assert(_set);
+
+ if (LIST_JUST_US(same_sysfs, d)) {
+ *_set = NULL;
+ return 0;
+ }
+
+ set = set_new(NULL);
+ if (!set)
+ return -ENOMEM;
+
+ LIST_FOREACH_AFTER(same_sysfs, other, d) {
+ r = set_put(set, other);
+ if (r < 0)
+ return r;
+ }
+
+ LIST_FOREACH_BEFORE(same_sysfs, other, d) {
+ r = set_put(set, other);
+ if (r < 0)
+ return r;
+ }
+
+ *_set = TAKE_PTR(set);
+ return 1;
+}
+
+static void device_shutdown(Manager *m) {
+ assert(m);
+
+ m->device_monitor = sd_device_monitor_unref(m->device_monitor);
+ m->devices_by_sysfs = hashmap_free(m->devices_by_sysfs);
+}
+
+static void device_enumerate(Manager *m) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *dev;
+ int r;
+
+ assert(m);
+
+ if (!m->device_monitor) {
+ r = sd_device_monitor_new(&m->device_monitor);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate device monitor: %m");
+ goto fail;
+ }
+
+ /* This will fail if we are unprivileged, but that
+ * should not matter much, as user instances won't run
+ * during boot. */
+ (void) sd_device_monitor_set_receive_buffer_size(m->device_monitor, 128*1024*1024);
+
+ r = sd_device_monitor_filter_add_match_tag(m->device_monitor, "systemd");
+ if (r < 0) {
+ log_error_errno(r, "Failed to add udev tag match: %m");
+ goto fail;
+ }
+
+ r = sd_device_monitor_attach_event(m->device_monitor, m->event);
+ if (r < 0) {
+ log_error_errno(r, "Failed to attach event to device monitor: %m");
+ goto fail;
+ }
+
+ r = sd_device_monitor_start(m->device_monitor, device_dispatch_io, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to start device monitor: %m");
+ goto fail;
+ }
+ }
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate device enumerator: %m");
+ goto fail;
+ }
+
+ r = sd_device_enumerator_add_match_tag(e, "systemd");
+ if (r < 0) {
+ log_error_errno(r, "Failed to set tag for device enumeration: %m");
+ goto fail;
+ }
+
+ FOREACH_DEVICE(e, dev) {
+ const char *sysfs;
+
+ if (!device_is_ready(dev))
+ continue;
+
+ (void) device_process_new(m, dev);
+
+ if (sd_device_get_syspath(dev, &sysfs) < 0)
+ continue;
+
+ device_update_found_by_sysfs(m, sysfs, DEVICE_FOUND_UDEV, DEVICE_FOUND_UDEV);
+ }
+
+ return;
+
+fail:
+ device_shutdown(m);
+}
+
+static void device_propagate_reload_by_sysfs(Manager *m, const char *sysfs) {
+ Device *d, *l, *n;
+ int r;
+
+ assert(m);
+ assert(sysfs);
+
+ l = hashmap_get(m->devices_by_sysfs, sysfs);
+ LIST_FOREACH_SAFE(same_sysfs, d, n, l) {
+ if (d->state == DEVICE_DEAD)
+ continue;
+
+ r = manager_propagate_reload(m, UNIT(d), JOB_REPLACE, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to propagate reload, ignoring: %m");
+ }
+}
+
+static int device_dispatch_io(sd_device_monitor *monitor, sd_device *dev, void *userdata) {
+ Manager *m = userdata;
+ const char *action, *sysfs;
+ int r;
+
+ assert(m);
+ assert(dev);
+
+ r = sd_device_get_syspath(dev, &sysfs);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to get device sys path: %m");
+ return 0;
+ }
+
+ r = sd_device_get_property_value(dev, "ACTION", &action);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to get udev action string: %m");
+ return 0;
+ }
+
+ if (streq(action, "change"))
+ device_propagate_reload_by_sysfs(m, sysfs);
+
+ /* A change event can signal that a device is becoming ready, in particular if
+ * the device is using the SYSTEMD_READY logic in udev
+ * so we need to reach the else block of the follwing if, even for change events */
+ if (streq(action, "remove")) {
+ r = swap_process_device_remove(m, dev);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to process swap device remove event, ignoring: %m");
+
+ /* If we get notified that a device was removed by
+ * udev, then it's completely gone, hence unset all
+ * found bits */
+ device_update_found_by_sysfs(m, sysfs, 0, DEVICE_FOUND_UDEV|DEVICE_FOUND_MOUNT|DEVICE_FOUND_SWAP);
+
+ } else if (device_is_ready(dev)) {
+
+ (void) device_process_new(m, dev);
+
+ r = swap_process_device_new(m, dev);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to process swap device new event, ignoring: %m");
+
+ manager_dispatch_load_queue(m);
+
+ /* The device is found now, set the udev found bit */
+ device_update_found_by_sysfs(m, sysfs, DEVICE_FOUND_UDEV, DEVICE_FOUND_UDEV);
+
+ } else {
+ /* The device is nominally around, but not ready for
+ * us. Hence unset the udev bit, but leave the rest
+ * around. */
+
+ device_update_found_by_sysfs(m, sysfs, 0, DEVICE_FOUND_UDEV);
+ }
+
+ return 0;
+}
+
+static bool device_supported(void) {
+ static int read_only = -1;
+
+ /* If /sys is read-only we don't support device units, and any
+ * attempts to start one should fail immediately. */
+
+ if (read_only < 0)
+ read_only = path_is_read_only_fs("/sys");
+
+ return read_only <= 0;
+}
+
+static int validate_node(Manager *m, const char *node, sd_device **ret) {
+ struct stat st;
+ int r;
+
+ assert(m);
+ assert(node);
+ assert(ret);
+
+ /* Validates a device node that showed up in /proc/swaps or /proc/self/mountinfo if it makes sense for us to
+ * track. Note that this validator is fine within missing device nodes, but not with badly set up ones! */
+
+ if (!path_startswith(node, "/dev")) {
+ *ret = NULL;
+ return 0; /* bad! */
+ }
+
+ if (stat(node, &st) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to stat() device node file %s: %m", node);
+
+ *ret = NULL;
+ return 1; /* good! (though missing) */
+
+ } else {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+
+ r = device_new_from_stat_rdev(&dev, &st);
+ if (r == -ENOENT) {
+ *ret = NULL;
+ return 1; /* good! (though missing) */
+ } else if (r == -ENOTTY) {
+ *ret = NULL;
+ return 0; /* bad! (not a device node but some other kind of file system node) */
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to get udev device from devnum %u:%u: %m", major(st.st_rdev), minor(st.st_rdev));
+
+ *ret = TAKE_PTR(dev);
+ return 1; /* good! */
+ }
+}
+
+void device_found_node(Manager *m, const char *node, DeviceFound found, DeviceFound mask) {
+ int r;
+
+ assert(m);
+ assert(node);
+
+ if (!device_supported())
+ return;
+
+ if (mask == 0)
+ return;
+
+ /* This is called whenever we find a device referenced in /proc/swaps or /proc/self/mounts. Such a device might
+ * be mounted/enabled at a time where udev has not finished probing it yet, and we thus haven't learned about
+ * it yet. In this case we will set the device unit to "tentative" state.
+ *
+ * This takes a pair of DeviceFound flags parameters. The 'mask' parameter is a bit mask that indicates which
+ * bits of 'found' to copy into the per-device DeviceFound flags field. Thus, this function may be used to set
+ * and unset individual bits in a single call, while merging partially with previous state. */
+
+ if ((found & mask) != 0) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+
+ /* If the device is known in the kernel and newly appeared, then we'll create a device unit for it,
+ * under the name referenced in /proc/swaps or /proc/self/mountinfo. But first, let's validate if
+ * everything is alright with the device node. */
+
+ r = validate_node(m, node, &dev);
+ if (r <= 0)
+ return; /* Don't create a device unit for this if the device node is borked. */
+
+ (void) device_setup_unit(m, dev, node, false);
+ }
+
+ /* Update the device unit's state, should it exist */
+ (void) device_update_found_by_name(m, node, found, mask);
+}
+
+bool device_shall_be_bound_by(Unit *device, Unit *u) {
+ assert(device);
+ assert(u);
+
+ if (u->type != UNIT_MOUNT)
+ return false;
+
+ return DEVICE(device)->bind_mounts;
+}
+
+const UnitVTable device_vtable = {
+ .object_size = sizeof(Device),
+ .sections =
+ "Unit\0"
+ "Device\0"
+ "Install\0",
+
+ .gc_jobs = true,
+
+ .init = device_init,
+ .done = device_done,
+ .load = device_load,
+
+ .coldplug = device_coldplug,
+ .catchup = device_catchup,
+
+ .serialize = device_serialize,
+ .deserialize_item = device_deserialize_item,
+
+ .dump = device_dump,
+
+ .active_state = device_active_state,
+ .sub_state_to_string = device_sub_state_to_string,
+
+ .bus_vtable = bus_device_vtable,
+
+ .following = device_following,
+ .following_set = device_following_set,
+
+ .enumerate = device_enumerate,
+ .shutdown = device_shutdown,
+ .supported = device_supported,
+
+ .status_message_formats = {
+ .starting_stopping = {
+ [0] = "Expecting device %s...",
+ },
+ .finished_start_job = {
+ [JOB_DONE] = "Found device %s.",
+ [JOB_TIMEOUT] = "Timed out waiting for device %s.",
+ },
+ },
+};
diff --git a/src/core/device.h b/src/core/device.h
new file mode 100644
index 0000000..3062be7
--- /dev/null
+++ b/src/core/device.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "unit.h"
+
+typedef struct Device Device;
+
+/* A mask specifying where we have seen the device currently. This is a bitmask because the device might show up
+ * asynchronously from each other at various places. For example, in very common case a device might already be mounted
+ * before udev finished probing it (think: a script setting up a loopback block device, formatting it and mounting it
+ * in quick succession). Hence we need to track precisely where it is already visible and where not. */
+typedef enum DeviceFound {
+ DEVICE_NOT_FOUND = 0,
+ DEVICE_FOUND_UDEV = 1 << 0, /* The device has shown up in the udev database */
+ DEVICE_FOUND_MOUNT = 1 << 1, /* The device has shown up in /proc/self/mountinfo */
+ DEVICE_FOUND_SWAP = 1 << 2, /* The device has shown up in /proc/swaps */
+ DEVICE_FOUND_MASK = DEVICE_FOUND_UDEV|DEVICE_FOUND_MOUNT|DEVICE_FOUND_SWAP,
+} DeviceFound;
+
+struct Device {
+ Unit meta;
+
+ char *sysfs;
+
+ /* In order to be able to distinguish dependencies on different device nodes we might end up creating multiple
+ * devices for the same sysfs path. We chain them up here. */
+ LIST_FIELDS(struct Device, same_sysfs);
+
+ DeviceState state, deserialized_state;
+ DeviceFound found, deserialized_found, enumerated_found;
+
+ bool bind_mounts;
+
+ /* The SYSTEMD_WANTS udev property for this device the last time we saw it */
+ char **wants_property;
+};
+
+extern const UnitVTable device_vtable;
+
+void device_found_node(Manager *m, const char *node, DeviceFound found, DeviceFound mask);
+bool device_shall_be_bound_by(Unit *device, Unit *u);
+
+DEFINE_CAST(DEVICE, Device);
diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
new file mode 100644
index 0000000..530df70
--- /dev/null
+++ b/src/core/dynamic-user.c
@@ -0,0 +1,824 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <grp.h>
+#include <pwd.h>
+#include <sys/file.h>
+
+#include "clean-ipc.h"
+#include "dynamic-user.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "nscd-flush.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "serialize.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "user-util.h"
+
+/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
+#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
+
+DEFINE_PRIVATE_TRIVIAL_REF_FUNC(DynamicUser, dynamic_user);
+
+static DynamicUser* dynamic_user_free(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ if (d->manager)
+ (void) hashmap_remove(d->manager->dynamic_users, d->name);
+
+ safe_close_pair(d->storage_socket);
+ return mfree(d);
+}
+
+static int dynamic_user_add(Manager *m, const char *name, int storage_socket[static 2], DynamicUser **ret) {
+ DynamicUser *d;
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(storage_socket);
+
+ r = hashmap_ensure_allocated(&m->dynamic_users, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = malloc0(offsetof(DynamicUser, name) + strlen(name) + 1);
+ if (!d)
+ return -ENOMEM;
+
+ strcpy(d->name, name);
+
+ d->storage_socket[0] = storage_socket[0];
+ d->storage_socket[1] = storage_socket[1];
+
+ r = hashmap_put(m->dynamic_users, d->name, d);
+ if (r < 0) {
+ free(d);
+ return r;
+ }
+
+ d->manager = m;
+
+ if (ret)
+ *ret = d;
+
+ return 0;
+}
+
+static int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret) {
+ _cleanup_close_pair_ int storage_socket[2] = { -1, -1 };
+ DynamicUser *d;
+ int r;
+
+ assert(m);
+ assert(name);
+
+ /* Return the DynamicUser structure for a specific user name. Note that this won't actually allocate a UID for
+ * it, but just prepare the data structure for it. The UID is allocated only on demand, when it's really
+ * needed, and in the child process we fork off, since allocation involves NSS checks which are not OK to do
+ * from PID 1. To allow the children and PID 1 share information about allocated UIDs we use an anonymous
+ * AF_UNIX/SOCK_DGRAM socket (called the "storage socket") that contains at most one datagram with the
+ * allocated UID number, plus an fd referencing the lock file for the UID
+ * (i.e. /run/systemd/dynamic-uid/$UID). Why involve the socket pair? So that PID 1 and all its children can
+ * share the same storage for the UID and lock fd, simply by inheriting the storage socket fds. The socket pair
+ * may exist in three different states:
+ *
+ * a) no datagram stored. This is the initial state. In this case the dynamic user was never realized.
+ *
+ * b) a datagram containing a UID stored, but no lock fd attached to it. In this case there was already a
+ * statically assigned UID by the same name, which we are reusing.
+ *
+ * c) a datagram containing a UID stored, and a lock fd is attached to it. In this case we allocated a dynamic
+ * UID and locked it in the file system, using the lock fd.
+ *
+ * As PID 1 and various children might access the socket pair simultaneously, and pop the datagram or push it
+ * back in any time, we also maintain a lock on the socket pair. Note one peculiarity regarding locking here:
+ * the UID lock on disk is protected via a BSD file lock (i.e. an fd-bound lock), so that the lock is kept in
+ * place as long as there's a reference to the fd open. The lock on the storage socket pair however is a POSIX
+ * file lock (i.e. a process-bound lock), as all users share the same fd of this (after all it is anonymous,
+ * nobody else could get any access to it except via our own fd) and we want to synchronize access between all
+ * processes that have access to it. */
+
+ d = hashmap_get(m->dynamic_users, name);
+ if (d) {
+ if (ret) {
+ /* We already have a structure for the dynamic user, let's increase the ref count and reuse it */
+ d->n_ref++;
+ *ret = d;
+ }
+ return 0;
+ }
+
+ if (!valid_user_group_name_or_id(name))
+ return -EINVAL;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, storage_socket) < 0)
+ return -errno;
+
+ r = dynamic_user_add(m, name, storage_socket, &d);
+ if (r < 0)
+ return r;
+
+ storage_socket[0] = storage_socket[1] = -1;
+
+ if (ret) {
+ d->n_ref++;
+ *ret = d;
+ }
+
+ return 1;
+}
+
+static int make_uid_symlinks(uid_t uid, const char *name, bool b) {
+
+ char path1[STRLEN("/run/systemd/dynamic-uid/direct:") + DECIMAL_STR_MAX(uid_t) + 1];
+ const char *path2;
+ int r = 0, k;
+
+ /* Add direct additional symlinks for direct lookups of dynamic UIDs and their names by userspace code. The
+ * only reason we have this is because dbus-daemon cannot use D-Bus for resolving users and groups (since it
+ * would be its own client then). We hence keep these world-readable symlinks in place, so that the
+ * unprivileged dbus user can read the mappings when it needs them via these symlinks instead of having to go
+ * via the bus. Ideally, we'd use the lock files we keep for this anyway, but we can't since we use BSD locks
+ * on them and as those may be taken by any user with read access we can't make them world-readable. */
+
+ xsprintf(path1, "/run/systemd/dynamic-uid/direct:" UID_FMT, uid);
+ if (unlink(path1) < 0 && errno != ENOENT)
+ r = -errno;
+
+ if (b && symlink(name, path1) < 0) {
+ k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path1);
+ if (r == 0)
+ r = k;
+ }
+
+ path2 = strjoina("/run/systemd/dynamic-uid/direct:", name);
+ if (unlink(path2) < 0 && errno != ENOENT) {
+ k = -errno;
+ if (r == 0)
+ r = k;
+ }
+
+ if (b && symlink(path1 + STRLEN("/run/systemd/dynamic-uid/direct:"), path2) < 0) {
+ k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path2);
+ if (r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int pick_uid(char **suggested_paths, const char *name, uid_t *ret_uid) {
+
+ /* Find a suitable free UID. We use the following strategy to find a suitable UID:
+ *
+ * 1. Initially, we try to read the UID of a number of specified paths. If any of these UIDs works, we use
+ * them. We use in order to increase the chance of UID reuse, if StateDirectory=, CacheDirectory= or
+ * LogsDirectory= are used, as reusing the UID these directories are owned by saves us from having to
+ * recursively chown() them to new users.
+ *
+ * 2. If that didn't yield a currently unused UID, we hash the user name, and try to use that. This should be
+ * pretty good, as the use ris by default derived from the unit name, and hence the same service and same
+ * user should usually get the same UID as long as our hashing doesn't clash.
+ *
+ * 3. Finally, if that didn't work, we randomly pick UIDs, until we find one that is empty.
+ *
+ * Since the dynamic UID space is relatively small we'll stop trying after 100 iterations, giving up. */
+
+ enum {
+ PHASE_SUGGESTED, /* the first phase, reusing directory ownership UIDs */
+ PHASE_HASHED, /* the second phase, deriving a UID from the username by hashing */
+ PHASE_RANDOM, /* the last phase, randomly picking UIDs */
+ } phase = PHASE_SUGGESTED;
+
+ static const uint8_t hash_key[] = {
+ 0x37, 0x53, 0x7e, 0x31, 0xcf, 0xce, 0x48, 0xf5,
+ 0x8a, 0xbb, 0x39, 0x57, 0x8d, 0xd9, 0xec, 0x59
+ };
+
+ unsigned n_tries = 100, current_suggested = 0;
+ int r;
+
+ (void) mkdir("/run/systemd/dynamic-uid", 0755);
+
+ for (;;) {
+ char lock_path[STRLEN("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+ _cleanup_close_ int lock_fd = -1;
+ uid_t candidate;
+ ssize_t l;
+
+ if (--n_tries <= 0) /* Give up retrying eventually */
+ return -EBUSY;
+
+ switch (phase) {
+
+ case PHASE_SUGGESTED: {
+ struct stat st;
+
+ if (!suggested_paths || !suggested_paths[current_suggested]) {
+ /* We reached the end of the suggested paths list, let's try by hashing the name */
+ phase = PHASE_HASHED;
+ continue;
+ }
+
+ if (stat(suggested_paths[current_suggested++], &st) < 0)
+ continue; /* We can't read the UID of this path, but that doesn't matter, just try the next */
+
+ candidate = st.st_uid;
+ break;
+ }
+
+ case PHASE_HASHED:
+ /* A static user by this name does not exist yet. Let's find a free ID then, and use that. We
+ * start with a UID generated as hash from the user name. */
+ candidate = UID_CLAMP_INTO_RANGE(siphash24(name, strlen(name), hash_key));
+
+ /* If this one fails, we should proceed with random tries */
+ phase = PHASE_RANDOM;
+ break;
+
+ case PHASE_RANDOM:
+
+ /* Pick another random UID, and see if that works for us. */
+ random_bytes(&candidate, sizeof(candidate));
+ candidate = UID_CLAMP_INTO_RANGE(candidate);
+ break;
+
+ default:
+ assert_not_reached("unknown phase");
+ }
+
+ /* Make sure whatever we picked here actually is in the right range */
+ if (!uid_is_dynamic(candidate))
+ continue;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, candidate);
+
+ for (;;) {
+ struct stat st;
+
+ lock_fd = open(lock_path, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600);
+ if (lock_fd < 0)
+ return -errno;
+
+ r = flock(lock_fd, LOCK_EX|LOCK_NB); /* Try to get a BSD file lock on the UID lock file */
+ if (r < 0) {
+ if (IN_SET(errno, EBUSY, EAGAIN))
+ goto next; /* already in use */
+
+ return -errno;
+ }
+
+ if (fstat(lock_fd, &st) < 0)
+ return -errno;
+ if (st.st_nlink > 0)
+ break;
+
+ /* Oh, bummer, we got the lock, but the file was unlinked between the time we opened it and
+ * got the lock. Close it, and try again. */
+ lock_fd = safe_close(lock_fd);
+ }
+
+ /* Some superficial check whether this UID/GID might already be taken by some static user */
+ if (getpwuid(candidate) ||
+ getgrgid((gid_t) candidate) ||
+ search_ipc(candidate, (gid_t) candidate) != 0) {
+ (void) unlink(lock_path);
+ continue;
+ }
+
+ /* Let's store the user name in the lock file, so that we can use it for looking up the username for a UID */
+ l = pwritev(lock_fd,
+ (struct iovec[2]) {
+ IOVEC_INIT_STRING(name),
+ IOVEC_INIT((char[1]) { '\n' }, 1),
+ }, 2, 0);
+ if (l < 0) {
+ r = -errno;
+ (void) unlink(lock_path);
+ return r;
+ }
+
+ (void) ftruncate(lock_fd, l);
+ (void) make_uid_symlinks(candidate, name, true); /* also add direct lookup symlinks */
+
+ *ret_uid = candidate;
+ return TAKE_FD(lock_fd);
+
+ next:
+ ;
+ }
+}
+
+static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
+ uid_t uid = UID_INVALID;
+ struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
+ int lock_fd;
+ ssize_t k;
+
+ assert(d);
+ assert(ret_uid);
+ assert(ret_lock_fd);
+
+ /* Read the UID and lock fd that is stored in the storage AF_UNIX socket. This should be called with the lock
+ * on the socket taken. */
+
+ k = receive_one_fd_iov(d->storage_socket[0], &iov, 1, MSG_DONTWAIT, &lock_fd);
+ if (k < 0)
+ return (int) k;
+
+ *ret_uid = uid;
+ *ret_lock_fd = lock_fd;
+
+ return 0;
+}
+
+static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
+ struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
+
+ assert(d);
+
+ /* Store the UID and lock_fd in the storage socket. This should be called with the socket pair lock taken. */
+ return send_one_fd_iov(d->storage_socket[1], lock_fd, &iov, 1, MSG_DONTWAIT);
+}
+
+static void unlink_uid_lock(int lock_fd, uid_t uid, const char *name) {
+ char lock_path[STRLEN("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+
+ if (lock_fd < 0)
+ return;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
+ (void) unlink(lock_path);
+
+ (void) make_uid_symlinks(uid, name, false); /* remove direct lookup symlinks */
+}
+
+static int lockfp(int fd, int *fd_lock) {
+ if (lockf(fd, F_LOCK, 0) < 0)
+ return -errno;
+ *fd_lock = fd;
+ return 0;
+}
+
+static void unlockfp(int *fd_lock) {
+ if (*fd_lock < 0)
+ return;
+ lockf(*fd_lock, F_ULOCK, 0);
+ *fd_lock = -1;
+}
+
+static int dynamic_user_realize(
+ DynamicUser *d,
+ char **suggested_dirs,
+ uid_t *ret_uid, gid_t *ret_gid,
+ bool is_user) {
+
+ _cleanup_(unlockfp) int storage_socket0_lock = -1;
+ _cleanup_close_ int uid_lock_fd = -1;
+ _cleanup_close_ int etc_passwd_lock_fd = -1;
+ uid_t num = UID_INVALID; /* a uid if is_user, and a gid otherwise */
+ gid_t gid = GID_INVALID; /* a gid if is_user, ignored otherwise */
+ bool flush_cache = false;
+ int r;
+
+ assert(d);
+ assert(is_user == !!ret_uid);
+ assert(ret_gid);
+
+ /* Acquire a UID for the user name. This will allocate a UID for the user name if the user doesn't exist
+ * yet. If it already exists its existing UID/GID will be reused. */
+
+ r = lockfp(d->storage_socket[0], &storage_socket0_lock);
+ if (r < 0)
+ return r;
+
+ r = dynamic_user_pop(d, &num, &uid_lock_fd);
+ if (r < 0) {
+ int new_uid_lock_fd;
+ uid_t new_uid;
+
+ if (r != -EAGAIN)
+ return r;
+
+ /* OK, nothing stored yet, let's try to find something useful. While we are working on this release the
+ * lock however, so that nobody else blocks on our NSS lookups. */
+ unlockfp(&storage_socket0_lock);
+
+ /* Let's see if a proper, static user or group by this name exists. Try to take the lock on
+ * /etc/passwd, if that fails with EROFS then /etc is read-only. In that case it's fine if we don't
+ * take the lock, given that users can't be added there anyway in this case. */
+ etc_passwd_lock_fd = take_etc_passwd_lock(NULL);
+ if (etc_passwd_lock_fd < 0 && etc_passwd_lock_fd != -EROFS)
+ return etc_passwd_lock_fd;
+
+ /* First, let's parse this as numeric UID */
+ r = parse_uid(d->name, &num);
+ if (r < 0) {
+ struct passwd *p;
+ struct group *g;
+
+ if (is_user) {
+ /* OK, this is not a numeric UID. Let's see if there's a user by this name */
+ p = getpwnam(d->name);
+ if (p) {
+ num = p->pw_uid;
+ gid = p->pw_gid;
+ } else {
+ /* if the user does not exist but the group with the same name exists, refuse operation */
+ g = getgrnam(d->name);
+ if (g)
+ return -EILSEQ;
+ }
+ } else {
+ /* Let's see if there's a group by this name */
+ g = getgrnam(d->name);
+ if (g)
+ num = (uid_t) g->gr_gid;
+ else {
+ /* if the group does not exist but the user with the same name exists, refuse operation */
+ p = getpwnam(d->name);
+ if (p)
+ return -EILSEQ;
+ }
+ }
+ }
+
+ if (num == UID_INVALID) {
+ /* No static UID assigned yet, excellent. Let's pick a new dynamic one, and lock it. */
+
+ uid_lock_fd = pick_uid(suggested_dirs, d->name, &num);
+ if (uid_lock_fd < 0)
+ return uid_lock_fd;
+ }
+
+ /* So, we found a working UID/lock combination. Let's see if we actually still need it. */
+ r = lockfp(d->storage_socket[0], &storage_socket0_lock);
+ if (r < 0) {
+ unlink_uid_lock(uid_lock_fd, num, d->name);
+ return r;
+ }
+
+ r = dynamic_user_pop(d, &new_uid, &new_uid_lock_fd);
+ if (r < 0) {
+ if (r != -EAGAIN) {
+ /* OK, something bad happened, let's get rid of the bits we acquired. */
+ unlink_uid_lock(uid_lock_fd, num, d->name);
+ return r;
+ }
+
+ /* Great! Nothing is stored here, still. Store our newly acquired data. */
+ flush_cache = true;
+ } else {
+ /* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we
+ * acquired, and use what's stored now. */
+
+ unlink_uid_lock(uid_lock_fd, num, d->name);
+ safe_close(uid_lock_fd);
+
+ num = new_uid;
+ uid_lock_fd = new_uid_lock_fd;
+ }
+ } else if (is_user && !uid_is_dynamic(num)) {
+ struct passwd *p;
+
+ /* Statically allocated user may have different uid and gid. So, let's obtain the gid. */
+ errno = 0;
+ p = getpwuid(num);
+ if (!p)
+ return errno > 0 ? -errno : -ESRCH;
+
+ gid = p->pw_gid;
+ }
+
+ /* If the UID/GID was already allocated dynamically, push the data we popped out back in. If it was already
+ * allocated statically, push the UID back too, but do not push the lock fd in. If we allocated the UID
+ * dynamically right here, push that in along with the lock fd for it. */
+ r = dynamic_user_push(d, num, uid_lock_fd);
+ if (r < 0)
+ return r;
+
+ if (flush_cache) {
+ /* If we allocated a new dynamic UID, refresh nscd, so that it forgets about potentially cached
+ * negative entries. But let's do so after we release the /etc/passwd lock, so that there's no
+ * potential for nscd wanting to lock that for completing the invalidation. */
+ etc_passwd_lock_fd = safe_close(etc_passwd_lock_fd);
+ (void) nscd_flush_cache(STRV_MAKE("passwd", "group"));
+ }
+
+ if (is_user) {
+ *ret_uid = num;
+ *ret_gid = gid != GID_INVALID ? gid : num;
+ } else
+ *ret_gid = num;
+
+ return 0;
+}
+
+int dynamic_user_current(DynamicUser *d, uid_t *ret) {
+ _cleanup_(unlockfp) int storage_socket0_lock = -1;
+ _cleanup_close_ int lock_fd = -1;
+ uid_t uid;
+ int r;
+
+ assert(d);
+ assert(ret);
+
+ /* Get the currently assigned UID for the user, if there's any. This simply pops the data from the storage socket, and pushes it back in right-away. */
+
+ r = lockfp(d->storage_socket[0], &storage_socket0_lock);
+ if (r < 0)
+ return r;
+
+ r = dynamic_user_pop(d, &uid, &lock_fd);
+ if (r < 0)
+ return r;
+
+ r = dynamic_user_push(d, uid, lock_fd);
+ if (r < 0)
+ return r;
+
+ *ret = uid;
+ return 0;
+}
+
+static DynamicUser* dynamic_user_unref(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ /* Note that this doesn't actually release any resources itself. If a dynamic user should be fully destroyed
+ * and its UID released, use dynamic_user_destroy() instead. NB: the dynamic user table may contain entries
+ * with no references, which is commonly the case right before a daemon reload. */
+
+ assert(d->n_ref > 0);
+ d->n_ref--;
+
+ return NULL;
+}
+
+static int dynamic_user_close(DynamicUser *d) {
+ _cleanup_(unlockfp) int storage_socket0_lock = -1;
+ _cleanup_close_ int lock_fd = -1;
+ uid_t uid;
+ int r;
+
+ /* Release the user ID, by releasing the lock on it, and emptying the storage socket. After this the user is
+ * unrealized again, much like it was after it the DynamicUser object was first allocated. */
+
+ r = lockfp(d->storage_socket[0], &storage_socket0_lock);
+ if (r < 0)
+ return r;
+
+ r = dynamic_user_pop(d, &uid, &lock_fd);
+ if (r == -EAGAIN)
+ /* User wasn't realized yet, nothing to do. */
+ return 0;
+ if (r < 0)
+ return r;
+
+ /* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */
+ unlink_uid_lock(lock_fd, uid, d->name);
+
+ (void) nscd_flush_cache(STRV_MAKE("passwd", "group"));
+ return 1;
+}
+
+static DynamicUser* dynamic_user_destroy(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ /* Drop a reference to a DynamicUser object, and destroy the user completely if this was the last
+ * reference. This is called whenever a service is shut down and wants its dynamic UID gone. Note that
+ * dynamic_user_unref() is what is called whenever a service is simply freed, for example during a reload
+ * cycle, where the dynamic users should not be destroyed, but our datastructures should. */
+
+ dynamic_user_unref(d);
+
+ if (d->n_ref > 0)
+ return NULL;
+
+ (void) dynamic_user_close(d);
+ return dynamic_user_free(d);
+}
+
+int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds) {
+ DynamicUser *d;
+ Iterator i;
+
+ assert(m);
+ assert(f);
+ assert(fds);
+
+ /* Dump the dynamic user database into the manager serialization, to deal with daemon reloads. */
+
+ HASHMAP_FOREACH(d, m->dynamic_users, i) {
+ int copy0, copy1;
+
+ copy0 = fdset_put_dup(fds, d->storage_socket[0]);
+ if (copy0 < 0)
+ return log_error_errno(copy0, "Failed to add dynamic user storage fd to serialization: %m");
+
+ copy1 = fdset_put_dup(fds, d->storage_socket[1]);
+ if (copy1 < 0)
+ return log_error_errno(copy1, "Failed to add dynamic user storage fd to serialization: %m");
+
+ (void) serialize_item_format(f, "dynamic-user", "%s %i %i", d->name, copy0, copy1);
+ }
+
+ return 0;
+}
+
+void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds) {
+ _cleanup_free_ char *name = NULL, *s0 = NULL, *s1 = NULL;
+ int r, fd0, fd1;
+
+ assert(m);
+ assert(value);
+ assert(fds);
+
+ /* Parse the serialization again, after a daemon reload */
+
+ r = extract_many_words(&value, NULL, 0, &name, &s0, &s1, NULL);
+ if (r != 3 || !isempty(value)) {
+ log_debug("Unable to parse dynamic user line.");
+ return;
+ }
+
+ if (safe_atoi(s0, &fd0) < 0 || !fdset_contains(fds, fd0)) {
+ log_debug("Unable to process dynamic user fd specification.");
+ return;
+ }
+
+ if (safe_atoi(s1, &fd1) < 0 || !fdset_contains(fds, fd1)) {
+ log_debug("Unable to process dynamic user fd specification.");
+ return;
+ }
+
+ r = dynamic_user_add(m, name, (int[]) { fd0, fd1 }, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add dynamic user: %m");
+ return;
+ }
+
+ (void) fdset_remove(fds, fd0);
+ (void) fdset_remove(fds, fd1);
+}
+
+void dynamic_user_vacuum(Manager *m, bool close_user) {
+ DynamicUser *d;
+ Iterator i;
+
+ assert(m);
+
+ /* Empty the dynamic user database, optionally cleaning up orphaned dynamic users, i.e. destroy and free users
+ * to which no reference exist. This is called after a daemon reload finished, in order to destroy users which
+ * might not be referenced anymore. */
+
+ HASHMAP_FOREACH(d, m->dynamic_users, i) {
+ if (d->n_ref > 0)
+ continue;
+
+ if (close_user) {
+ log_debug("Removing orphaned dynamic user %s", d->name);
+ (void) dynamic_user_close(d);
+ }
+
+ dynamic_user_free(d);
+ }
+}
+
+int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret) {
+ char lock_path[STRLEN("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+ _cleanup_free_ char *user = NULL;
+ uid_t check_uid;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ /* A friendly way to translate a dynamic user's UID into a name. */
+ if (!uid_is_dynamic(uid))
+ return -ESRCH;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
+ r = read_one_line_file(lock_path, &user);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* The lock file might be stale, hence let's verify the data before we return it */
+ r = dynamic_user_lookup_name(m, user, &check_uid);
+ if (r < 0)
+ return r;
+ if (check_uid != uid) /* lock file doesn't match our own idea */
+ return -ESRCH;
+
+ *ret = TAKE_PTR(user);
+
+ return 0;
+}
+
+int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret) {
+ DynamicUser *d;
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(ret);
+
+ /* A friendly call for translating a dynamic user's name into its UID */
+
+ d = hashmap_get(m->dynamic_users, name);
+ if (!d)
+ return -ESRCH;
+
+ r = dynamic_user_current(d, ret);
+ if (r == -EAGAIN) /* not realized yet? */
+ return -ESRCH;
+
+ return r;
+}
+
+int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group) {
+ bool acquired = false;
+ int r;
+
+ assert(creds);
+ assert(m);
+
+ /* A DynamicUser object encapsulates an allocation of both a UID and a GID for a specific name. However, some
+ * services use different user and groups. For cases like that there's DynamicCreds containing a pair of user
+ * and group. This call allocates a pair. */
+
+ if (!creds->user && user) {
+ r = dynamic_user_acquire(m, user, &creds->user);
+ if (r < 0)
+ return r;
+
+ acquired = true;
+ }
+
+ if (!creds->group) {
+
+ if (creds->user && (!group || streq_ptr(user, group)))
+ creds->group = dynamic_user_ref(creds->user);
+ else {
+ r = dynamic_user_acquire(m, group, &creds->group);
+ if (r < 0) {
+ if (acquired)
+ creds->user = dynamic_user_unref(creds->user);
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int dynamic_creds_realize(DynamicCreds *creds, char **suggested_paths, uid_t *uid, gid_t *gid) {
+ uid_t u = UID_INVALID;
+ gid_t g = GID_INVALID;
+ int r;
+
+ assert(creds);
+ assert(uid);
+ assert(gid);
+
+ /* Realize both the referenced user and group */
+
+ if (creds->user) {
+ r = dynamic_user_realize(creds->user, suggested_paths, &u, &g, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (creds->group && creds->group != creds->user) {
+ r = dynamic_user_realize(creds->group, suggested_paths, NULL, &g, false);
+ if (r < 0)
+ return r;
+ }
+
+ *uid = u;
+ *gid = g;
+ return 0;
+}
+
+void dynamic_creds_unref(DynamicCreds *creds) {
+ assert(creds);
+
+ creds->user = dynamic_user_unref(creds->user);
+ creds->group = dynamic_user_unref(creds->group);
+}
+
+void dynamic_creds_destroy(DynamicCreds *creds) {
+ assert(creds);
+
+ creds->user = dynamic_user_destroy(creds->user);
+ creds->group = dynamic_user_destroy(creds->group);
+}
diff --git a/src/core/dynamic-user.h b/src/core/dynamic-user.h
new file mode 100644
index 0000000..112f91e
--- /dev/null
+++ b/src/core/dynamic-user.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct DynamicUser DynamicUser;
+
+typedef struct DynamicCreds {
+ /* A combination of a dynamic user and group */
+ DynamicUser *user;
+ DynamicUser *group;
+} DynamicCreds;
+
+#include "manager.h"
+
+/* Note that this object always allocates a pair of user and group under the same name, even if one of them isn't
+ * used. This means, if you want to allocate a group and user pair, and they might have two different names, then you
+ * need to allocated two of these objects. DynamicCreds below makes that easy. */
+struct DynamicUser {
+ unsigned n_ref;
+ Manager *manager;
+
+ /* An AF_UNIX socket pair that contains a datagram containing both the numeric ID assigned, as well as a lock
+ * file fd locking the user ID we picked. */
+ int storage_socket[2];
+
+ char name[];
+};
+
+int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds);
+void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds);
+void dynamic_user_vacuum(Manager *m, bool close_user);
+
+int dynamic_user_current(DynamicUser *d, uid_t *ret);
+int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret);
+int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret);
+
+int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group);
+int dynamic_creds_realize(DynamicCreds *creds, char **suggested_paths, uid_t *uid, gid_t *gid);
+
+void dynamic_creds_unref(DynamicCreds *creds);
+void dynamic_creds_destroy(DynamicCreds *creds);
diff --git a/src/core/emergency-action.c b/src/core/emergency-action.c
new file mode 100644
index 0000000..f98b0de
--- /dev/null
+++ b/src/core/emergency-action.c
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/reboot.h>
+
+#include "bus-error.h"
+#include "bus-util.h"
+#include "emergency-action.h"
+#include "raw-reboot.h"
+#include "reboot-util.h"
+#include "special.h"
+#include "string-table.h"
+#include "terminal-util.h"
+#include "virt.h"
+
+static void log_and_status(Manager *m, bool warn, const char *message, const char *reason) {
+ log_full(warn ? LOG_WARNING : LOG_DEBUG, "%s: %s", message, reason);
+ if (warn)
+ manager_status_printf(m, STATUS_TYPE_EMERGENCY,
+ ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
+ "%s: %s", message, reason);
+}
+
+int emergency_action(
+ Manager *m,
+ EmergencyAction action,
+ EmergencyActionFlags options,
+ const char *reboot_arg,
+ int exit_status,
+ const char *reason) {
+
+ assert(m);
+ assert(action >= 0);
+ assert(action < _EMERGENCY_ACTION_MAX);
+
+ if (action == EMERGENCY_ACTION_NONE)
+ return -ECANCELED;
+
+ if (FLAGS_SET(options, EMERGENCY_ACTION_IS_WATCHDOG) && !m->service_watchdogs) {
+ log_warning("Watchdog disabled! Not acting on: %s", reason);
+ return -ECANCELED;
+ }
+
+ bool warn = FLAGS_SET(options, EMERGENCY_ACTION_WARN);
+
+ switch (action) {
+
+ case EMERGENCY_ACTION_REBOOT:
+ log_and_status(m, warn, "Rebooting", reason);
+
+ (void) update_reboot_parameter_and_warn(reboot_arg);
+ (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
+
+ break;
+
+ case EMERGENCY_ACTION_REBOOT_FORCE:
+ log_and_status(m, warn, "Forcibly rebooting", reason);
+
+ (void) update_reboot_parameter_and_warn(reboot_arg);
+ m->objective = MANAGER_REBOOT;
+
+ break;
+
+ case EMERGENCY_ACTION_REBOOT_IMMEDIATE:
+ log_and_status(m, warn, "Rebooting immediately", reason);
+
+ sync();
+
+ if (!isempty(reboot_arg)) {
+ log_info("Rebooting with argument '%s'.", reboot_arg);
+ (void) raw_reboot(LINUX_REBOOT_CMD_RESTART2, reboot_arg);
+ log_warning_errno(errno, "Failed to reboot with parameter, retrying without: %m");
+ }
+
+ log_info("Rebooting.");
+ (void) reboot(RB_AUTOBOOT);
+ break;
+
+ case EMERGENCY_ACTION_EXIT:
+
+ if (exit_status >= 0)
+ m->return_value = exit_status;
+
+ if (MANAGER_IS_USER(m) || detect_container() > 0) {
+ log_and_status(m, warn, "Exiting", reason);
+ (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_EXIT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
+ break;
+ }
+
+ log_notice("Doing \"poweroff\" action instead of an \"exit\" emergency action.");
+ _fallthrough_;
+
+ case EMERGENCY_ACTION_POWEROFF:
+ log_and_status(m, warn, "Powering off", reason);
+ (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
+ break;
+
+ case EMERGENCY_ACTION_EXIT_FORCE:
+
+ if (exit_status >= 0)
+ m->return_value = exit_status;
+
+ if (MANAGER_IS_USER(m) || detect_container() > 0) {
+ log_and_status(m, warn, "Exiting immediately", reason);
+ m->objective = MANAGER_EXIT;
+ break;
+ }
+
+ log_notice("Doing \"poweroff-force\" action instead of an \"exit-force\" emergency action.");
+ _fallthrough_;
+
+ case EMERGENCY_ACTION_POWEROFF_FORCE:
+ log_and_status(m, warn, "Forcibly powering off", reason);
+ m->objective = MANAGER_POWEROFF;
+ break;
+
+ case EMERGENCY_ACTION_POWEROFF_IMMEDIATE:
+ log_and_status(m, warn, "Powering off immediately", reason);
+
+ sync();
+
+ log_info("Powering off.");
+ (void) reboot(RB_POWER_OFF);
+ break;
+
+ default:
+ assert_not_reached("Unknown emergency action");
+ }
+
+ return -ECANCELED;
+}
+
+static const char* const emergency_action_table[_EMERGENCY_ACTION_MAX] = {
+ [EMERGENCY_ACTION_NONE] = "none",
+ [EMERGENCY_ACTION_REBOOT] = "reboot",
+ [EMERGENCY_ACTION_REBOOT_FORCE] = "reboot-force",
+ [EMERGENCY_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate",
+ [EMERGENCY_ACTION_POWEROFF] = "poweroff",
+ [EMERGENCY_ACTION_POWEROFF_FORCE] = "poweroff-force",
+ [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate",
+ [EMERGENCY_ACTION_EXIT] = "exit",
+ [EMERGENCY_ACTION_EXIT_FORCE] = "exit-force",
+};
+DEFINE_STRING_TABLE_LOOKUP(emergency_action, EmergencyAction);
+
+int parse_emergency_action(
+ const char *value,
+ bool system,
+ EmergencyAction *ret) {
+
+ EmergencyAction x;
+
+ x = emergency_action_from_string(value);
+ if (x < 0)
+ return -EINVAL;
+
+ if (!system && x != EMERGENCY_ACTION_NONE && x < _EMERGENCY_ACTION_FIRST_USER_ACTION)
+ return -EOPNOTSUPP;
+
+ *ret = x;
+ return 0;
+}
diff --git a/src/core/emergency-action.h b/src/core/emergency-action.h
new file mode 100644
index 0000000..6e6c69d
--- /dev/null
+++ b/src/core/emergency-action.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef enum EmergencyAction {
+ EMERGENCY_ACTION_NONE,
+ EMERGENCY_ACTION_REBOOT,
+ EMERGENCY_ACTION_REBOOT_FORCE,
+ EMERGENCY_ACTION_REBOOT_IMMEDIATE,
+ EMERGENCY_ACTION_POWEROFF,
+ EMERGENCY_ACTION_POWEROFF_FORCE,
+ EMERGENCY_ACTION_POWEROFF_IMMEDIATE,
+ EMERGENCY_ACTION_EXIT,
+ _EMERGENCY_ACTION_FIRST_USER_ACTION = EMERGENCY_ACTION_EXIT,
+ EMERGENCY_ACTION_EXIT_FORCE,
+ _EMERGENCY_ACTION_MAX,
+ _EMERGENCY_ACTION_INVALID = -1
+} EmergencyAction;
+
+typedef enum EmergencyActionFlags {
+ EMERGENCY_ACTION_IS_WATCHDOG = 1 << 0,
+ EMERGENCY_ACTION_WARN = 1 << 1,
+} EmergencyActionFlags;
+
+#include "macro.h"
+#include "manager.h"
+
+int emergency_action(Manager *m,
+ EmergencyAction action, EmergencyActionFlags options,
+ const char *reboot_arg, int exit_status, const char *reason);
+
+const char* emergency_action_to_string(EmergencyAction i) _const_;
+EmergencyAction emergency_action_from_string(const char *s) _pure_;
+
+int parse_emergency_action(const char *value, bool system, EmergencyAction *ret);
diff --git a/src/core/execute.c b/src/core/execute.c
new file mode 100644
index 0000000..a708231
--- /dev/null
+++ b/src/core/execute.c
@@ -0,0 +1,5290 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <glob.h>
+#include <grp.h>
+#include <poll.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/capability.h>
+#include <sys/eventfd.h>
+#include <sys/mman.h>
+#include <sys/personality.h>
+#include <sys/prctl.h>
+#include <sys/shm.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <unistd.h>
+#include <utmpx.h>
+
+#if HAVE_PAM
+#include <security/pam_appl.h>
+#endif
+
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+
+#if HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+
+#if HAVE_APPARMOR
+#include <sys/apparmor.h>
+#endif
+
+#include "sd-messages.h"
+
+#include "af-list.h"
+#include "alloc-util.h"
+#if HAVE_APPARMOR
+#include "apparmor-util.h"
+#endif
+#include "async.h"
+#include "barrier.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "chown-recursive.h"
+#include "cpu-set-util.h"
+#include "def.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "errno-list.h"
+#include "execute.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "io-util.h"
+#include "ioprio.h"
+#include "label.h"
+#include "log.h"
+#include "macro.h"
+#include "manager.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "namespace.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "rm-rf.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "securebits-util.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "smack-util.h"
+#include "socket-util.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "umask-util.h"
+#include "unit.h"
+#include "user-util.h"
+#include "util.h"
+#include "utmp-wtmp.h"
+
+#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
+#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
+
+/* This assumes there is a 'tty' group */
+#define TTY_MODE 0620
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+static int shift_fds(int fds[], size_t n_fds) {
+ int start, restart_from;
+
+ if (n_fds <= 0)
+ return 0;
+
+ /* Modifies the fds array! (sorts it) */
+
+ assert(fds);
+
+ start = 0;
+ for (;;) {
+ int i;
+
+ restart_from = -1;
+
+ for (i = start; i < (int) n_fds; i++) {
+ int nfd;
+
+ /* Already at right index? */
+ if (fds[i] == i+3)
+ continue;
+
+ nfd = fcntl(fds[i], F_DUPFD, i + 3);
+ if (nfd < 0)
+ return -errno;
+
+ safe_close(fds[i]);
+ fds[i] = nfd;
+
+ /* Hmm, the fd we wanted isn't free? Then
+ * let's remember that and try again from here */
+ if (nfd != i+3 && restart_from < 0)
+ restart_from = i;
+ }
+
+ if (restart_from < 0)
+ break;
+
+ start = restart_from;
+ }
+
+ return 0;
+}
+
+static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
+ size_t i, n_fds;
+ int r;
+
+ n_fds = n_socket_fds + n_storage_fds;
+ if (n_fds <= 0)
+ return 0;
+
+ assert(fds);
+
+ /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
+ * O_NONBLOCK only applies to socket activation though. */
+
+ for (i = 0; i < n_fds; i++) {
+
+ if (i < n_socket_fds) {
+ r = fd_nonblock(fds[i], nonblock);
+ if (r < 0)
+ return r;
+ }
+
+ /* We unconditionally drop FD_CLOEXEC from the fds,
+ * since after all we want to pass these fds to our
+ * children */
+
+ r = fd_cloexec(fds[i], false);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static const char *exec_context_tty_path(const ExecContext *context) {
+ assert(context);
+
+ if (context->stdio_as_fds)
+ return NULL;
+
+ if (context->tty_path)
+ return context->tty_path;
+
+ return "/dev/console";
+}
+
+static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
+ const char *path;
+
+ assert(context);
+
+ path = exec_context_tty_path(context);
+
+ if (context->tty_vhangup) {
+ if (p && p->stdin_fd >= 0)
+ (void) terminal_vhangup_fd(p->stdin_fd);
+ else if (path)
+ (void) terminal_vhangup(path);
+ }
+
+ if (context->tty_reset) {
+ if (p && p->stdin_fd >= 0)
+ (void) reset_terminal_fd(p->stdin_fd, true);
+ else if (path)
+ (void) reset_terminal(path);
+ }
+
+ if (context->tty_vt_disallocate && path)
+ (void) vt_disallocate(path);
+}
+
+static bool is_terminal_input(ExecInput i) {
+ return IN_SET(i,
+ EXEC_INPUT_TTY,
+ EXEC_INPUT_TTY_FORCE,
+ EXEC_INPUT_TTY_FAIL);
+}
+
+static bool is_terminal_output(ExecOutput o) {
+ return IN_SET(o,
+ EXEC_OUTPUT_TTY,
+ EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
+}
+
+static bool is_syslog_output(ExecOutput o) {
+ return IN_SET(o,
+ EXEC_OUTPUT_SYSLOG,
+ EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
+}
+
+static bool is_kmsg_output(ExecOutput o) {
+ return IN_SET(o,
+ EXEC_OUTPUT_KMSG,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE);
+}
+
+static bool exec_context_needs_term(const ExecContext *c) {
+ assert(c);
+
+ /* Return true if the execution context suggests we should set $TERM to something useful. */
+
+ if (is_terminal_input(c->std_input))
+ return true;
+
+ if (is_terminal_output(c->std_output))
+ return true;
+
+ if (is_terminal_output(c->std_error))
+ return true;
+
+ return !!c->tty_path;
+}
+
+static int open_null_as(int flags, int nfd) {
+ int fd;
+
+ assert(nfd >= 0);
+
+ fd = open("/dev/null", flags|O_NOCTTY);
+ if (fd < 0)
+ return -errno;
+
+ return move_fd(fd, nfd, false);
+}
+
+static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/stdout",
+ };
+ uid_t olduid = UID_INVALID;
+ gid_t oldgid = GID_INVALID;
+ int r;
+
+ if (gid_is_valid(gid)) {
+ oldgid = getgid();
+
+ if (setegid(gid) < 0)
+ return -errno;
+ }
+
+ if (uid_is_valid(uid)) {
+ olduid = getuid();
+
+ if (seteuid(uid) < 0) {
+ r = -errno;
+ goto restore_gid;
+ }
+ }
+
+ r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
+
+ /* If we fail to restore the uid or gid, things will likely
+ fail later on. This should only happen if an LSM interferes. */
+
+ if (uid_is_valid(uid))
+ (void) seteuid(olduid);
+
+ restore_gid:
+ if (gid_is_valid(gid))
+ (void) setegid(oldgid);
+
+ return r;
+}
+
+static int connect_logger_as(
+ const Unit *unit,
+ const ExecContext *context,
+ const ExecParameters *params,
+ ExecOutput output,
+ const char *ident,
+ int nfd,
+ uid_t uid,
+ gid_t gid) {
+
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(context);
+ assert(params);
+ assert(output < _EXEC_OUTPUT_MAX);
+ assert(ident);
+ assert(nfd >= 0);
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ r = connect_journal_socket(fd, uid, gid);
+ if (r < 0)
+ return r;
+
+ if (shutdown(fd, SHUT_RD) < 0)
+ return -errno;
+
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ if (dprintf(fd,
+ "%s\n"
+ "%s\n"
+ "%i\n"
+ "%i\n"
+ "%i\n"
+ "%i\n"
+ "%i\n",
+ context->syslog_identifier ?: ident,
+ params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
+ context->syslog_priority,
+ !!context->syslog_level_prefix,
+ is_syslog_output(output),
+ is_kmsg_output(output),
+ is_terminal_output(output)) < 0)
+ return -errno;
+
+ return move_fd(TAKE_FD(fd), nfd, false);
+}
+
+static int open_terminal_as(const char *path, int flags, int nfd) {
+ int fd;
+
+ assert(path);
+ assert(nfd >= 0);
+
+ fd = open_terminal(path, flags | O_NOCTTY);
+ if (fd < 0)
+ return fd;
+
+ return move_fd(fd, nfd, false);
+}
+
+static int acquire_path(const char *path, int flags, mode_t mode) {
+ union sockaddr_union sa = {};
+ _cleanup_close_ int fd = -1;
+ int r, salen;
+
+ assert(path);
+
+ if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
+ flags |= O_CREAT;
+
+ fd = open(path, flags|O_NOCTTY, mode);
+ if (fd >= 0)
+ return TAKE_FD(fd);
+
+ if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
+ return -errno;
+ if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
+ return -ENXIO;
+
+ /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ salen = sockaddr_un_set_path(&sa.un, path);
+ if (salen < 0)
+ return salen;
+
+ if (connect(fd, &sa.sa, salen) < 0)
+ return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
+ * indication that his wasn't an AF_UNIX socket after all */
+
+ if ((flags & O_ACCMODE) == O_RDONLY)
+ r = shutdown(fd, SHUT_WR);
+ else if ((flags & O_ACCMODE) == O_WRONLY)
+ r = shutdown(fd, SHUT_RD);
+ else
+ return TAKE_FD(fd);
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(fd);
+}
+
+static int fixup_input(
+ const ExecContext *context,
+ int socket_fd,
+ bool apply_tty_stdin) {
+
+ ExecInput std_input;
+
+ assert(context);
+
+ std_input = context->std_input;
+
+ if (is_terminal_input(std_input) && !apply_tty_stdin)
+ return EXEC_INPUT_NULL;
+
+ if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
+ return EXEC_INPUT_NULL;
+
+ if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
+ return EXEC_INPUT_NULL;
+
+ return std_input;
+}
+
+static int fixup_output(ExecOutput std_output, int socket_fd) {
+
+ if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
+ return EXEC_OUTPUT_INHERIT;
+
+ return std_output;
+}
+
+static int setup_input(
+ const ExecContext *context,
+ const ExecParameters *params,
+ int socket_fd,
+ int named_iofds[3]) {
+
+ ExecInput i;
+
+ assert(context);
+ assert(params);
+
+ if (params->stdin_fd >= 0) {
+ if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
+ return -errno;
+
+ /* Try to make this the controlling tty, if it is a tty, and reset it */
+ if (isatty(STDIN_FILENO)) {
+ (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
+ (void) reset_terminal_fd(STDIN_FILENO, true);
+ }
+
+ return STDIN_FILENO;
+ }
+
+ i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
+
+ switch (i) {
+
+ case EXEC_INPUT_NULL:
+ return open_null_as(O_RDONLY, STDIN_FILENO);
+
+ case EXEC_INPUT_TTY:
+ case EXEC_INPUT_TTY_FORCE:
+ case EXEC_INPUT_TTY_FAIL: {
+ int fd;
+
+ fd = acquire_terminal(exec_context_tty_path(context),
+ i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
+ i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
+ ACQUIRE_TERMINAL_WAIT,
+ USEC_INFINITY);
+ if (fd < 0)
+ return fd;
+
+ return move_fd(fd, STDIN_FILENO, false);
+ }
+
+ case EXEC_INPUT_SOCKET:
+ assert(socket_fd >= 0);
+
+ return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
+
+ case EXEC_INPUT_NAMED_FD:
+ assert(named_iofds[STDIN_FILENO] >= 0);
+
+ (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
+ return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
+
+ case EXEC_INPUT_DATA: {
+ int fd;
+
+ fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
+ if (fd < 0)
+ return fd;
+
+ return move_fd(fd, STDIN_FILENO, false);
+ }
+
+ case EXEC_INPUT_FILE: {
+ bool rw;
+ int fd;
+
+ assert(context->stdio_file[STDIN_FILENO]);
+
+ rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
+ (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
+
+ fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
+ if (fd < 0)
+ return fd;
+
+ return move_fd(fd, STDIN_FILENO, false);
+ }
+
+ default:
+ assert_not_reached("Unknown input type");
+ }
+}
+
+static bool can_inherit_stderr_from_stdout(
+ const ExecContext *context,
+ ExecOutput o,
+ ExecOutput e) {
+
+ assert(context);
+
+ /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
+ * stderr fd */
+
+ if (e == EXEC_OUTPUT_INHERIT)
+ return true;
+ if (e != o)
+ return false;
+
+ if (e == EXEC_OUTPUT_NAMED_FD)
+ return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
+
+ if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
+ return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
+
+ return true;
+}
+
+static int setup_output(
+ const Unit *unit,
+ const ExecContext *context,
+ const ExecParameters *params,
+ int fileno,
+ int socket_fd,
+ int named_iofds[3],
+ const char *ident,
+ uid_t uid,
+ gid_t gid,
+ dev_t *journal_stream_dev,
+ ino_t *journal_stream_ino) {
+
+ ExecOutput o;
+ ExecInput i;
+ int r;
+
+ assert(unit);
+ assert(context);
+ assert(params);
+ assert(ident);
+ assert(journal_stream_dev);
+ assert(journal_stream_ino);
+
+ if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
+
+ if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
+ return -errno;
+
+ return STDOUT_FILENO;
+ }
+
+ if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
+ if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
+ return -errno;
+
+ return STDERR_FILENO;
+ }
+
+ i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
+ o = fixup_output(context->std_output, socket_fd);
+
+ if (fileno == STDERR_FILENO) {
+ ExecOutput e;
+ e = fixup_output(context->std_error, socket_fd);
+
+ /* This expects the input and output are already set up */
+
+ /* Don't change the stderr file descriptor if we inherit all
+ * the way and are not on a tty */
+ if (e == EXEC_OUTPUT_INHERIT &&
+ o == EXEC_OUTPUT_INHERIT &&
+ i == EXEC_INPUT_NULL &&
+ !is_terminal_input(context->std_input) &&
+ getppid () != 1)
+ return fileno;
+
+ /* Duplicate from stdout if possible */
+ if (can_inherit_stderr_from_stdout(context, o, e))
+ return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
+
+ o = e;
+
+ } else if (o == EXEC_OUTPUT_INHERIT) {
+ /* If input got downgraded, inherit the original value */
+ if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
+ return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
+
+ /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
+ if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
+ return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
+
+ /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
+ if (getppid() != 1)
+ return fileno;
+
+ /* We need to open /dev/null here anew, to get the right access mode. */
+ return open_null_as(O_WRONLY, fileno);
+ }
+
+ switch (o) {
+
+ case EXEC_OUTPUT_NULL:
+ return open_null_as(O_WRONLY, fileno);
+
+ case EXEC_OUTPUT_TTY:
+ if (is_terminal_input(i))
+ return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
+
+ /* We don't reset the terminal if this is just about output */
+ return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
+
+ case EXEC_OUTPUT_SYSLOG:
+ case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
+ case EXEC_OUTPUT_KMSG:
+ case EXEC_OUTPUT_KMSG_AND_CONSOLE:
+ case EXEC_OUTPUT_JOURNAL:
+ case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
+ r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
+ if (r < 0) {
+ log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
+ r = open_null_as(O_WRONLY, fileno);
+ } else {
+ struct stat st;
+
+ /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
+ * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
+ * services to detect whether they are connected to the journal or not.
+ *
+ * If both stdout and stderr are connected to a stream then let's make sure to store the data
+ * about STDERR as that's usually the best way to do logging. */
+
+ if (fstat(fileno, &st) >= 0 &&
+ (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
+ *journal_stream_dev = st.st_dev;
+ *journal_stream_ino = st.st_ino;
+ }
+ }
+ return r;
+
+ case EXEC_OUTPUT_SOCKET:
+ assert(socket_fd >= 0);
+
+ return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
+
+ case EXEC_OUTPUT_NAMED_FD:
+ assert(named_iofds[fileno] >= 0);
+
+ (void) fd_nonblock(named_iofds[fileno], false);
+ return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
+
+ case EXEC_OUTPUT_FILE:
+ case EXEC_OUTPUT_FILE_APPEND: {
+ bool rw;
+ int fd, flags;
+
+ assert(context->stdio_file[fileno]);
+
+ rw = context->std_input == EXEC_INPUT_FILE &&
+ streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
+
+ if (rw)
+ return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
+
+ flags = O_WRONLY;
+ if (o == EXEC_OUTPUT_FILE_APPEND)
+ flags |= O_APPEND;
+
+ fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
+ if (fd < 0)
+ return fd;
+
+ return move_fd(fd, fileno, 0);
+ }
+
+ default:
+ assert_not_reached("Unknown error type");
+ }
+}
+
+static int chown_terminal(int fd, uid_t uid) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
+ if (isatty(fd) < 1)
+ return 0;
+
+ /* This might fail. What matters are the results. */
+ (void) fchown(fd, uid, -1);
+ (void) fchmod(fd, TTY_MODE);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
+ return -EPERM;
+
+ return 0;
+}
+
+static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
+ _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
+ int r;
+
+ assert(_saved_stdin);
+ assert(_saved_stdout);
+
+ saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
+ if (saved_stdin < 0)
+ return -errno;
+
+ saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
+ if (saved_stdout < 0)
+ return -errno;
+
+ fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
+ if (fd < 0)
+ return fd;
+
+ r = chown_terminal(fd, getuid());
+ if (r < 0)
+ return r;
+
+ r = reset_terminal_fd(fd, true);
+ if (r < 0)
+ return r;
+
+ r = rearrange_stdio(fd, fd, STDERR_FILENO);
+ fd = -1;
+ if (r < 0)
+ return r;
+
+ *_saved_stdin = saved_stdin;
+ *_saved_stdout = saved_stdout;
+
+ saved_stdin = saved_stdout = -1;
+
+ return 0;
+}
+
+static void write_confirm_error_fd(int err, int fd, const Unit *u) {
+ assert(err < 0);
+
+ if (err == -ETIMEDOUT)
+ dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
+ else {
+ errno = -err;
+ dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
+ }
+}
+
+static void write_confirm_error(int err, const char *vc, const Unit *u) {
+ _cleanup_close_ int fd = -1;
+
+ assert(vc);
+
+ fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return;
+
+ write_confirm_error_fd(err, fd, u);
+}
+
+static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
+ int r = 0;
+
+ assert(saved_stdin);
+ assert(saved_stdout);
+
+ release_terminal();
+
+ if (*saved_stdin >= 0)
+ if (dup2(*saved_stdin, STDIN_FILENO) < 0)
+ r = -errno;
+
+ if (*saved_stdout >= 0)
+ if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
+ r = -errno;
+
+ *saved_stdin = safe_close(*saved_stdin);
+ *saved_stdout = safe_close(*saved_stdout);
+
+ return r;
+}
+
+enum {
+ CONFIRM_PRETEND_FAILURE = -1,
+ CONFIRM_PRETEND_SUCCESS = 0,
+ CONFIRM_EXECUTE = 1,
+};
+
+static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
+ int saved_stdout = -1, saved_stdin = -1, r;
+ _cleanup_free_ char *e = NULL;
+ char c;
+
+ /* For any internal errors, assume a positive response. */
+ r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
+ if (r < 0) {
+ write_confirm_error(r, vc, u);
+ return CONFIRM_EXECUTE;
+ }
+
+ /* confirm_spawn might have been disabled while we were sleeping. */
+ if (manager_is_confirm_spawn_disabled(u->manager)) {
+ r = 1;
+ goto restore_stdio;
+ }
+
+ e = ellipsize(cmdline, 60, 100);
+ if (!e) {
+ log_oom();
+ r = CONFIRM_EXECUTE;
+ goto restore_stdio;
+ }
+
+ for (;;) {
+ r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
+ if (r < 0) {
+ write_confirm_error_fd(r, STDOUT_FILENO, u);
+ r = CONFIRM_EXECUTE;
+ goto restore_stdio;
+ }
+
+ switch (c) {
+ case 'c':
+ printf("Resuming normal execution.\n");
+ manager_disable_confirm_spawn();
+ r = 1;
+ break;
+ case 'D':
+ unit_dump(u, stdout, " ");
+ continue; /* ask again */
+ case 'f':
+ printf("Failing execution.\n");
+ r = CONFIRM_PRETEND_FAILURE;
+ break;
+ case 'h':
+ printf(" c - continue, proceed without asking anymore\n"
+ " D - dump, show the state of the unit\n"
+ " f - fail, don't execute the command and pretend it failed\n"
+ " h - help\n"
+ " i - info, show a short summary of the unit\n"
+ " j - jobs, show jobs that are in progress\n"
+ " s - skip, don't execute the command and pretend it succeeded\n"
+ " y - yes, execute the command\n");
+ continue; /* ask again */
+ case 'i':
+ printf(" Description: %s\n"
+ " Unit: %s\n"
+ " Command: %s\n",
+ u->id, u->description, cmdline);
+ continue; /* ask again */
+ case 'j':
+ manager_dump_jobs(u->manager, stdout, " ");
+ continue; /* ask again */
+ case 'n':
+ /* 'n' was removed in favor of 'f'. */
+ printf("Didn't understand 'n', did you mean 'f'?\n");
+ continue; /* ask again */
+ case 's':
+ printf("Skipping execution.\n");
+ r = CONFIRM_PRETEND_SUCCESS;
+ break;
+ case 'y':
+ r = CONFIRM_EXECUTE;
+ break;
+ default:
+ assert_not_reached("Unhandled choice");
+ }
+ break;
+ }
+
+restore_stdio:
+ restore_confirm_stdio(&saved_stdin, &saved_stdout);
+ return r;
+}
+
+static int get_fixed_user(const ExecContext *c, const char **user,
+ uid_t *uid, gid_t *gid,
+ const char **home, const char **shell) {
+ int r;
+ const char *name;
+
+ assert(c);
+
+ if (!c->user)
+ return 0;
+
+ /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
+ * (i.e. are "/" or "/bin/nologin"). */
+
+ name = c->user;
+ r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
+ if (r < 0)
+ return r;
+
+ *user = name;
+ return 0;
+}
+
+static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
+ int r;
+ const char *name;
+
+ assert(c);
+
+ if (!c->group)
+ return 0;
+
+ name = c->group;
+ r = get_group_creds(&name, gid, 0);
+ if (r < 0)
+ return r;
+
+ *group = name;
+ return 0;
+}
+
+static int get_supplementary_groups(const ExecContext *c, const char *user,
+ const char *group, gid_t gid,
+ gid_t **supplementary_gids, int *ngids) {
+ char **i;
+ int r, k = 0;
+ int ngroups_max;
+ bool keep_groups = false;
+ gid_t *groups = NULL;
+ _cleanup_free_ gid_t *l_gids = NULL;
+
+ assert(c);
+
+ /*
+ * If user is given, then lookup GID and supplementary groups list.
+ * We avoid NSS lookups for gid=0. Also we have to initialize groups
+ * here and as early as possible so we keep the list of supplementary
+ * groups of the caller.
+ */
+ if (user && gid_is_valid(gid) && gid != 0) {
+ /* First step, initialize groups from /etc/groups */
+ if (initgroups(user, gid) < 0)
+ return -errno;
+
+ keep_groups = true;
+ }
+
+ if (strv_isempty(c->supplementary_groups))
+ return 0;
+
+ /*
+ * If SupplementaryGroups= was passed then NGROUPS_MAX has to
+ * be positive, otherwise fail.
+ */
+ errno = 0;
+ ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
+ if (ngroups_max <= 0) {
+ if (errno > 0)
+ return -errno;
+ else
+ return -EOPNOTSUPP; /* For all other values */
+ }
+
+ l_gids = new(gid_t, ngroups_max);
+ if (!l_gids)
+ return -ENOMEM;
+
+ if (keep_groups) {
+ /*
+ * Lookup the list of groups that the user belongs to, we
+ * avoid NSS lookups here too for gid=0.
+ */
+ k = ngroups_max;
+ if (getgrouplist(user, gid, l_gids, &k) < 0)
+ return -EINVAL;
+ } else
+ k = 0;
+
+ STRV_FOREACH(i, c->supplementary_groups) {
+ const char *g;
+
+ if (k >= ngroups_max)
+ return -E2BIG;
+
+ g = *i;
+ r = get_group_creds(&g, l_gids+k, 0);
+ if (r < 0)
+ return r;
+
+ k++;
+ }
+
+ /*
+ * Sets ngids to zero to drop all supplementary groups, happens
+ * when we are under root and SupplementaryGroups= is empty.
+ */
+ if (k == 0) {
+ *ngids = 0;
+ return 0;
+ }
+
+ /* Otherwise get the final list of supplementary groups */
+ groups = memdup(l_gids, sizeof(gid_t) * k);
+ if (!groups)
+ return -ENOMEM;
+
+ *supplementary_gids = groups;
+ *ngids = k;
+
+ groups = NULL;
+
+ return 0;
+}
+
+static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
+ int r;
+
+ /* Handle SupplementaryGroups= if it is not empty */
+ if (ngids > 0) {
+ r = maybe_setgroups(ngids, supplementary_gids);
+ if (r < 0)
+ return r;
+ }
+
+ if (gid_is_valid(gid)) {
+ /* Then set our gids */
+ if (setresgid(gid, gid, gid) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int enforce_user(const ExecContext *context, uid_t uid) {
+ assert(context);
+
+ if (!uid_is_valid(uid))
+ return 0;
+
+ /* Sets (but doesn't look up) the uid and make sure we keep the
+ * capabilities while doing so. */
+
+ if (context->capability_ambient_set != 0) {
+
+ /* First step: If we need to keep capabilities but
+ * drop privileges we need to make sure we keep our
+ * caps, while we drop privileges. */
+ if (uid != 0) {
+ int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
+
+ if (prctl(PR_GET_SECUREBITS) != sb)
+ if (prctl(PR_SET_SECUREBITS, sb) < 0)
+ return -errno;
+ }
+ }
+
+ /* Second step: actually set the uids */
+ if (setresuid(uid, uid, uid) < 0)
+ return -errno;
+
+ /* At this point we should have all necessary capabilities but
+ are otherwise a normal user. However, the caps might got
+ corrupted due to the setresuid() so we need clean them up
+ later. This is done outside of this call. */
+
+ return 0;
+}
+
+#if HAVE_PAM
+
+static int null_conv(
+ int num_msg,
+ const struct pam_message **msg,
+ struct pam_response **resp,
+ void *appdata_ptr) {
+
+ /* We don't support conversations */
+
+ return PAM_CONV_ERR;
+}
+
+#endif
+
+static int setup_pam(
+ const char *name,
+ const char *user,
+ uid_t uid,
+ gid_t gid,
+ const char *tty,
+ char ***env,
+ int fds[], size_t n_fds) {
+
+#if HAVE_PAM
+
+ static const struct pam_conv conv = {
+ .conv = null_conv,
+ .appdata_ptr = NULL
+ };
+
+ _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
+ pam_handle_t *handle = NULL;
+ sigset_t old_ss;
+ int pam_code = PAM_SUCCESS, r;
+ char **nv, **e = NULL;
+ bool close_session = false;
+ pid_t pam_pid = 0, parent_pid;
+ int flags = 0;
+
+ assert(name);
+ assert(user);
+ assert(env);
+
+ /* We set up PAM in the parent process, then fork. The child
+ * will then stay around until killed via PR_GET_PDEATHSIG or
+ * systemd via the cgroup logic. It will then remove the PAM
+ * session again. The parent process will exec() the actual
+ * daemon. We do things this way to ensure that the main PID
+ * of the daemon is the one we initially fork()ed. */
+
+ r = barrier_create(&barrier);
+ if (r < 0)
+ goto fail;
+
+ if (log_get_max_level() < LOG_DEBUG)
+ flags |= PAM_SILENT;
+
+ pam_code = pam_start(name, user, &conv, &handle);
+ if (pam_code != PAM_SUCCESS) {
+ handle = NULL;
+ goto fail;
+ }
+
+ if (!tty) {
+ _cleanup_free_ char *q = NULL;
+
+ /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
+ * out if that's the case, and read the TTY off it. */
+
+ if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
+ tty = strjoina("/dev/", q);
+ }
+
+ if (tty) {
+ pam_code = pam_set_item(handle, PAM_TTY, tty);
+ if (pam_code != PAM_SUCCESS)
+ goto fail;
+ }
+
+ STRV_FOREACH(nv, *env) {
+ pam_code = pam_putenv(handle, *nv);
+ if (pam_code != PAM_SUCCESS)
+ goto fail;
+ }
+
+ pam_code = pam_acct_mgmt(handle, flags);
+ if (pam_code != PAM_SUCCESS)
+ goto fail;
+
+ pam_code = pam_open_session(handle, flags);
+ if (pam_code != PAM_SUCCESS)
+ goto fail;
+
+ close_session = true;
+
+ e = pam_getenvlist(handle);
+ if (!e) {
+ pam_code = PAM_BUF_ERR;
+ goto fail;
+ }
+
+ /* Block SIGTERM, so that we know that it won't get lost in
+ * the child */
+
+ assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
+
+ parent_pid = getpid_cached();
+
+ r = safe_fork("(sd-pam)", 0, &pam_pid);
+ if (r < 0)
+ goto fail;
+ if (r == 0) {
+ int sig, ret = EXIT_PAM;
+
+ /* The child's job is to reset the PAM session on
+ * termination */
+ barrier_set_role(&barrier, BARRIER_CHILD);
+
+ /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
+ * are open here that have been opened by PAM. */
+ (void) close_many(fds, n_fds);
+
+ /* Drop privileges - we don't need any to pam_close_session
+ * and this will make PR_SET_PDEATHSIG work in most cases.
+ * If this fails, ignore the error - but expect sd-pam threads
+ * to fail to exit normally */
+
+ r = maybe_setgroups(0, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
+ if (setresgid(gid, gid, gid) < 0)
+ log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
+ if (setresuid(uid, uid, uid) < 0)
+ log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
+
+ (void) ignore_signals(SIGPIPE, -1);
+
+ /* Wait until our parent died. This will only work if
+ * the above setresuid() succeeds, otherwise the kernel
+ * will not allow unprivileged parents kill their privileged
+ * children this way. We rely on the control groups kill logic
+ * to do the rest for us. */
+ if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
+ goto child_finish;
+
+ /* Tell the parent that our setup is done. This is especially
+ * important regarding dropping privileges. Otherwise, unit
+ * setup might race against our setresuid(2) call.
+ *
+ * If the parent aborted, we'll detect this below, hence ignore
+ * return failure here. */
+ (void) barrier_place(&barrier);
+
+ /* Check if our parent process might already have died? */
+ if (getppid() == parent_pid) {
+ sigset_t ss;
+
+ assert_se(sigemptyset(&ss) >= 0);
+ assert_se(sigaddset(&ss, SIGTERM) >= 0);
+
+ for (;;) {
+ if (sigwait(&ss, &sig) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ goto child_finish;
+ }
+
+ assert(sig == SIGTERM);
+ break;
+ }
+ }
+
+ /* If our parent died we'll end the session */
+ if (getppid() != parent_pid) {
+ pam_code = pam_close_session(handle, flags);
+ if (pam_code != PAM_SUCCESS)
+ goto child_finish;
+ }
+
+ ret = 0;
+
+ child_finish:
+ pam_end(handle, pam_code | flags);
+ _exit(ret);
+ }
+
+ barrier_set_role(&barrier, BARRIER_PARENT);
+
+ /* If the child was forked off successfully it will do all the
+ * cleanups, so forget about the handle here. */
+ handle = NULL;
+
+ /* Unblock SIGTERM again in the parent */
+ assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
+
+ /* We close the log explicitly here, since the PAM modules
+ * might have opened it, but we don't want this fd around. */
+ closelog();
+
+ /* Synchronously wait for the child to initialize. We don't care for
+ * errors as we cannot recover. However, warn loudly if it happens. */
+ if (!barrier_place_and_sync(&barrier))
+ log_error("PAM initialization failed");
+
+ return strv_free_and_replace(*env, e);
+
+fail:
+ if (pam_code != PAM_SUCCESS) {
+ log_error("PAM failed: %s", pam_strerror(handle, pam_code));
+ r = -EPERM; /* PAM errors do not map to errno */
+ } else
+ log_error_errno(r, "PAM failed: %m");
+
+ if (handle) {
+ if (close_session)
+ pam_code = pam_close_session(handle, flags);
+
+ pam_end(handle, pam_code | flags);
+ }
+
+ strv_free(e);
+ closelog();
+
+ return r;
+#else
+ return 0;
+#endif
+}
+
+static void rename_process_from_path(const char *path) {
+ char process_name[11];
+ const char *p;
+ size_t l;
+
+ /* This resulting string must fit in 10 chars (i.e. the length
+ * of "/sbin/init") to look pretty in /bin/ps */
+
+ p = basename(path);
+ if (isempty(p)) {
+ rename_process("(...)");
+ return;
+ }
+
+ l = strlen(p);
+ if (l > 8) {
+ /* The end of the process name is usually more
+ * interesting, since the first bit might just be
+ * "systemd-" */
+ p = p + l - 8;
+ l = 8;
+ }
+
+ process_name[0] = '(';
+ memcpy(process_name+1, p, l);
+ process_name[1+l] = ')';
+ process_name[1+l+1] = 0;
+
+ rename_process(process_name);
+}
+
+static bool context_has_address_families(const ExecContext *c) {
+ assert(c);
+
+ return c->address_families_whitelist ||
+ !set_isempty(c->address_families);
+}
+
+static bool context_has_syscall_filters(const ExecContext *c) {
+ assert(c);
+
+ return c->syscall_whitelist ||
+ !hashmap_isempty(c->syscall_filter);
+}
+
+static bool context_has_no_new_privileges(const ExecContext *c) {
+ assert(c);
+
+ if (c->no_new_privileges)
+ return true;
+
+ if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
+ return false;
+
+ /* We need NNP if we have any form of seccomp and are unprivileged */
+ return context_has_address_families(c) ||
+ c->memory_deny_write_execute ||
+ c->restrict_realtime ||
+ exec_context_restrict_namespaces_set(c) ||
+ c->protect_kernel_tunables ||
+ c->protect_kernel_modules ||
+ c->private_devices ||
+ context_has_syscall_filters(c) ||
+ !set_isempty(c->syscall_archs) ||
+ c->lock_personality;
+}
+
+#if HAVE_SECCOMP
+
+static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
+
+ if (is_seccomp_available())
+ return false;
+
+ log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
+ return true;
+}
+
+static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
+ uint32_t negative_action, default_action, action;
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (!context_has_syscall_filters(c))
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "SystemCallFilter="))
+ return 0;
+
+ negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
+
+ if (c->syscall_whitelist) {
+ default_action = negative_action;
+ action = SCMP_ACT_ALLOW;
+ } else {
+ default_action = SCMP_ACT_ALLOW;
+ action = negative_action;
+ }
+
+ if (needs_ambient_hack) {
+ r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
+ if (r < 0)
+ return r;
+ }
+
+ return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
+}
+
+static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (set_isempty(c->syscall_archs))
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
+ return 0;
+
+ return seccomp_restrict_archs(c->syscall_archs);
+}
+
+static int apply_address_families(const Unit* u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (!context_has_address_families(c))
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
+ return 0;
+
+ return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
+}
+
+static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (!c->memory_deny_write_execute)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
+ return 0;
+
+ return seccomp_memory_deny_write_execute();
+}
+
+static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (!c->restrict_realtime)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "RestrictRealtime="))
+ return 0;
+
+ return seccomp_restrict_realtime();
+}
+
+static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
+ * let's protect even those systems where this is left on in the kernel. */
+
+ if (!c->protect_kernel_tunables)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
+ return 0;
+
+ return seccomp_protect_sysctl();
+}
+
+static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ /* Turn off module syscalls on ProtectKernelModules=yes */
+
+ if (!c->protect_kernel_modules)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
+ return 0;
+
+ return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
+}
+
+static int apply_private_devices(const Unit *u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
+
+ if (!c->private_devices)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "PrivateDevices="))
+ return 0;
+
+ return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
+}
+
+static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (!exec_context_restrict_namespaces_set(c))
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
+ return 0;
+
+ return seccomp_restrict_namespaces(c->restrict_namespaces);
+}
+
+static int apply_lock_personality(const Unit* u, const ExecContext *c) {
+ unsigned long personality;
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (!c->lock_personality)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "LockPersonality="))
+ return 0;
+
+ personality = c->personality;
+
+ /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
+ if (personality == PERSONALITY_INVALID) {
+
+ r = opinionated_personality(&personality);
+ if (r < 0)
+ return r;
+ }
+
+ return seccomp_lock_personality(personality);
+}
+
+#endif
+
+static void do_idle_pipe_dance(int idle_pipe[static 4]) {
+ assert(idle_pipe);
+
+ idle_pipe[1] = safe_close(idle_pipe[1]);
+ idle_pipe[2] = safe_close(idle_pipe[2]);
+
+ if (idle_pipe[0] >= 0) {
+ int r;
+
+ r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
+
+ if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
+ ssize_t n;
+
+ /* Signal systemd that we are bored and want to continue. */
+ n = write(idle_pipe[3], "x", 1);
+ if (n > 0)
+ /* Wait for systemd to react to the signal above. */
+ fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
+ }
+
+ idle_pipe[0] = safe_close(idle_pipe[0]);
+
+ }
+
+ idle_pipe[3] = safe_close(idle_pipe[3]);
+}
+
+static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
+
+static int build_environment(
+ const Unit *u,
+ const ExecContext *c,
+ const ExecParameters *p,
+ size_t n_fds,
+ const char *home,
+ const char *username,
+ const char *shell,
+ dev_t journal_stream_dev,
+ ino_t journal_stream_ino,
+ char ***ret) {
+
+ _cleanup_strv_free_ char **our_env = NULL;
+ ExecDirectoryType t;
+ size_t n_env = 0;
+ char *x;
+
+ assert(u);
+ assert(c);
+ assert(p);
+ assert(ret);
+
+ our_env = new0(char*, 14 + _EXEC_DIRECTORY_TYPE_MAX);
+ if (!our_env)
+ return -ENOMEM;
+
+ if (n_fds > 0) {
+ _cleanup_free_ char *joined = NULL;
+
+ if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+
+ if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+
+ joined = strv_join(p->fd_names, ":");
+ if (!joined)
+ return -ENOMEM;
+
+ x = strjoin("LISTEN_FDNAMES=", joined);
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
+ if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
+ if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+
+ if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
+ /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
+ * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
+ * check the database directly. */
+ if (p->flags & EXEC_NSS_BYPASS_BUS) {
+ x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
+ if (home) {
+ x = strappend("HOME=", home);
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
+ if (username) {
+ x = strappend("LOGNAME=", username);
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+
+ x = strappend("USER=", username);
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
+ if (shell) {
+ x = strappend("SHELL=", shell);
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
+ if (!sd_id128_is_null(u->invocation_id)) {
+ if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
+ if (exec_context_needs_term(c)) {
+ const char *tty_path, *term = NULL;
+
+ tty_path = exec_context_tty_path(c);
+
+ /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
+ * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
+ * passes to PID 1 ends up all the way in the console login shown. */
+
+ if (path_equal(tty_path, "/dev/console") && getppid() == 1)
+ term = getenv("TERM");
+ if (!term)
+ term = default_term_for_tty(tty_path);
+
+ x = strappend("TERM=", term);
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
+ if (journal_stream_dev != 0 && journal_stream_ino != 0) {
+ if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
+ for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ _cleanup_free_ char *pre = NULL, *joined = NULL;
+ const char *n;
+
+ if (!p->prefix[t])
+ continue;
+
+ if (strv_isempty(c->directories[t].paths))
+ continue;
+
+ n = exec_directory_env_name_to_string(t);
+ if (!n)
+ continue;
+
+ pre = strjoin(p->prefix[t], "/");
+ if (!pre)
+ return -ENOMEM;
+
+ joined = strv_join_prefix(c->directories[t].paths, ":", pre);
+ if (!joined)
+ return -ENOMEM;
+
+ x = strjoin(n, "=", joined);
+ if (!x)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
+ our_env[n_env++] = NULL;
+ assert(n_env <= 14 + _EXEC_DIRECTORY_TYPE_MAX);
+
+ *ret = TAKE_PTR(our_env);
+
+ return 0;
+}
+
+static int build_pass_environment(const ExecContext *c, char ***ret) {
+ _cleanup_strv_free_ char **pass_env = NULL;
+ size_t n_env = 0, n_bufsize = 0;
+ char **i;
+
+ STRV_FOREACH(i, c->pass_environment) {
+ _cleanup_free_ char *x = NULL;
+ char *v;
+
+ v = getenv(*i);
+ if (!v)
+ continue;
+ x = strjoin(*i, "=", v);
+ if (!x)
+ return -ENOMEM;
+
+ if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
+ return -ENOMEM;
+
+ pass_env[n_env++] = TAKE_PTR(x);
+ pass_env[n_env] = NULL;
+ }
+
+ *ret = TAKE_PTR(pass_env);
+
+ return 0;
+}
+
+static bool exec_needs_mount_namespace(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const ExecRuntime *runtime) {
+
+ assert(context);
+ assert(params);
+
+ if (context->root_image)
+ return true;
+
+ if (!strv_isempty(context->read_write_paths) ||
+ !strv_isempty(context->read_only_paths) ||
+ !strv_isempty(context->inaccessible_paths))
+ return true;
+
+ if (context->n_bind_mounts > 0)
+ return true;
+
+ if (context->n_temporary_filesystems > 0)
+ return true;
+
+ if (context->mount_flags != 0)
+ return true;
+
+ if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
+ return true;
+
+ if (context->private_devices ||
+ context->private_mounts ||
+ context->protect_system != PROTECT_SYSTEM_NO ||
+ context->protect_home != PROTECT_HOME_NO ||
+ context->protect_kernel_tunables ||
+ context->protect_kernel_modules ||
+ context->protect_control_groups)
+ return true;
+
+ if (context->root_directory) {
+ ExecDirectoryType t;
+
+ if (context->mount_apivfs)
+ return true;
+
+ for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ if (!params->prefix[t])
+ continue;
+
+ if (!strv_isempty(context->directories[t].paths))
+ return true;
+ }
+ }
+
+ if (context->dynamic_user &&
+ (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
+ !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
+ !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
+ return true;
+
+ return false;
+}
+
+static int setup_private_users(uid_t uid, gid_t gid) {
+ _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
+ _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
+ _cleanup_close_ int unshare_ready_fd = -1;
+ _cleanup_(sigkill_waitp) pid_t pid = 0;
+ uint64_t c = 1;
+ ssize_t n;
+ int r;
+
+ /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
+ * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
+ * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
+ * which waits for the parent to create the new user namespace while staying in the original namespace. The
+ * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
+ * continues execution normally. */
+
+ if (uid != 0 && uid_is_valid(uid)) {
+ r = asprintf(&uid_map,
+ "0 0 1\n" /* Map root → root */
+ UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
+ uid, uid);
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
+ if (!uid_map)
+ return -ENOMEM;
+ }
+
+ if (gid != 0 && gid_is_valid(gid)) {
+ r = asprintf(&gid_map,
+ "0 0 1\n" /* Map root → root */
+ GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
+ gid, gid);
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
+ if (!gid_map)
+ return -ENOMEM;
+ }
+
+ /* Create a communication channel so that the parent can tell the child when it finished creating the user
+ * namespace. */
+ unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
+ if (unshare_ready_fd < 0)
+ return -errno;
+
+ /* Create a communication channel so that the child can tell the parent a proper error code in case it
+ * failed. */
+ if (pipe2(errno_pipe, O_CLOEXEC) < 0)
+ return -errno;
+
+ r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ _cleanup_close_ int fd = -1;
+ const char *a;
+ pid_t ppid;
+
+ /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
+ * here, after the parent opened its own user namespace. */
+
+ ppid = getppid();
+ errno_pipe[0] = safe_close(errno_pipe[0]);
+
+ /* Wait until the parent unshared the user namespace */
+ if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ /* Disable the setgroups() system call in the child user namespace, for good. */
+ a = procfs_file_alloca(ppid, "setgroups");
+ fd = open(a, O_WRONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (errno != ENOENT) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ /* If the file is missing the kernel is too old, let's continue anyway. */
+ } else {
+ if (write(fd, "deny\n", 5) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ fd = safe_close(fd);
+ }
+
+ /* First write the GID map */
+ a = procfs_file_alloca(ppid, "gid_map");
+ fd = open(a, O_WRONLY|O_CLOEXEC);
+ if (fd < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ if (write(fd, gid_map, strlen(gid_map)) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ fd = safe_close(fd);
+
+ /* The write the UID map */
+ a = procfs_file_alloca(ppid, "uid_map");
+ fd = open(a, O_WRONLY|O_CLOEXEC);
+ if (fd < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ if (write(fd, uid_map, strlen(uid_map)) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ _exit(EXIT_SUCCESS);
+
+ child_fail:
+ (void) write(errno_pipe[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ errno_pipe[1] = safe_close(errno_pipe[1]);
+
+ if (unshare(CLONE_NEWUSER) < 0)
+ return -errno;
+
+ /* Let the child know that the namespace is ready now */
+ if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
+ return -errno;
+
+ /* Try to read an error code from the child */
+ n = read(errno_pipe[0], &r, sizeof(r));
+ if (n < 0)
+ return -errno;
+ if (n == sizeof(r)) { /* an error code was sent to us */
+ if (r < 0)
+ return r;
+ return -EIO;
+ }
+ if (n != 0) /* on success we should have read 0 bytes */
+ return -EIO;
+
+ r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
+ pid = 0;
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
+ return -EIO;
+
+ return 0;
+}
+
+static int setup_exec_directory(
+ const ExecContext *context,
+ const ExecParameters *params,
+ uid_t uid,
+ gid_t gid,
+ ExecDirectoryType type,
+ int *exit_status) {
+
+ static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
+ [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
+ [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
+ [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
+ [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
+ [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
+ };
+ char **rt;
+ int r;
+
+ assert(context);
+ assert(params);
+ assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
+ assert(exit_status);
+
+ if (!params->prefix[type])
+ return 0;
+
+ if (params->flags & EXEC_CHOWN_DIRECTORIES) {
+ if (!uid_is_valid(uid))
+ uid = 0;
+ if (!gid_is_valid(gid))
+ gid = 0;
+ }
+
+ STRV_FOREACH(rt, context->directories[type].paths) {
+ _cleanup_free_ char *p = NULL, *pp = NULL;
+
+ p = strjoin(params->prefix[type], "/", *rt);
+ if (!p) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ r = mkdir_parents_label(p, 0755);
+ if (r < 0)
+ goto fail;
+
+ if (context->dynamic_user &&
+ !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
+ _cleanup_free_ char *private_root = NULL;
+
+ /* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
+ * want to avoid leaving a directory around fully accessible that is owned by a dynamic user
+ * whose UID is later on reused. To lock this down we use the same trick used by container
+ * managers to prohibit host users to get access to files of the same UID in containers: we
+ * place everything inside a directory that has an access mode of 0700 and is owned root:root,
+ * so that it acts as security boundary for unprivileged host code. We then use fs namespacing
+ * to make this directory permeable for the service itself.
+ *
+ * Specifically: for a service which wants a special directory "foo/" we first create a
+ * directory "private/" with access mode 0700 owned by root:root. Then we place "foo" inside of
+ * that directory (i.e. "private/foo/"), and make "foo" a symlink to "private/foo". This way,
+ * privileged host users can access "foo/" as usual, but unprivileged host users can't look
+ * into it. Inside of the namespaceof the container "private/" is replaced by a more liberally
+ * accessible tmpfs, into which the host's "private/foo/" is mounted under the same name, thus
+ * disabling the access boundary for the service and making sure it only gets access to the
+ * dirs it needs but no others. Tricky? Yes, absolutely, but it works!
+ *
+ * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
+ * owned by the service itself.
+ * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
+ * files or sockets with other services. */
+
+ private_root = strjoin(params->prefix[type], "/private");
+ if (!private_root) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
+ r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ goto fail;
+
+ pp = strjoin(private_root, "/", *rt);
+ if (!pp) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ /* Create all directories between the configured directory and this private root, and mark them 0755 */
+ r = mkdir_parents_label(pp, 0755);
+ if (r < 0)
+ goto fail;
+
+ if (is_dir(p, false) > 0 &&
+ (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
+
+ /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
+ * it over. Most likely the service has been upgraded from one that didn't use
+ * DynamicUser=1, to one that does. */
+
+ if (rename(p, pp) < 0) {
+ r = -errno;
+ goto fail;
+ }
+ } else {
+ /* Otherwise, create the actual directory for the service */
+
+ r = mkdir_label(pp, context->directories[type].mode);
+ if (r < 0 && r != -EEXIST)
+ goto fail;
+ }
+
+ /* And link it up from the original place */
+ r = symlink_idempotent(pp, p, true);
+ if (r < 0)
+ goto fail;
+
+ /* Lock down the access mode */
+ if (chmod(pp, context->directories[type].mode) < 0) {
+ r = -errno;
+ goto fail;
+ }
+ } else {
+ r = mkdir_label(p, context->directories[type].mode);
+ if (r < 0 && r != -EEXIST)
+ goto fail;
+ if (r == -EEXIST) {
+ struct stat st;
+
+ if (stat(p, &st) < 0) {
+ r = -errno;
+ goto fail;
+ }
+ if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
+ log_warning("%s \'%s\' already exists but the mode is different. "
+ "(filesystem: %o %sMode: %o)",
+ exec_directory_type_to_string(type), *rt,
+ st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
+ if (!context->dynamic_user)
+ continue;
+ }
+ }
+
+ /* Don't change the owner of the configuration directory, as in the common case it is not written to by
+ * a service, and shall not be writable. */
+ if (type == EXEC_DIRECTORY_CONFIGURATION)
+ continue;
+
+ /* Then, change the ownership of the whole tree, if necessary */
+ r = path_chown_recursive(pp ?: p, uid, gid);
+ if (r < 0)
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ *exit_status = exit_status_table[type];
+ return r;
+}
+
+#if ENABLE_SMACK
+static int setup_smack(
+ const ExecContext *context,
+ const ExecCommand *command) {
+
+ int r;
+
+ assert(context);
+ assert(command);
+
+ if (context->smack_process_label) {
+ r = mac_smack_apply_pid(0, context->smack_process_label);
+ if (r < 0)
+ return r;
+ }
+#ifdef SMACK_DEFAULT_PROCESS_LABEL
+ else {
+ _cleanup_free_ char *exec_label = NULL;
+
+ r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
+ if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
+ return r;
+
+ r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
+ if (r < 0)
+ return r;
+ }
+#endif
+
+ return 0;
+}
+#endif
+
+static int compile_bind_mounts(
+ const ExecContext *context,
+ const ExecParameters *params,
+ BindMount **ret_bind_mounts,
+ size_t *ret_n_bind_mounts,
+ char ***ret_empty_directories) {
+
+ _cleanup_strv_free_ char **empty_directories = NULL;
+ BindMount *bind_mounts;
+ size_t n, h = 0, i;
+ ExecDirectoryType t;
+ int r;
+
+ assert(context);
+ assert(params);
+ assert(ret_bind_mounts);
+ assert(ret_n_bind_mounts);
+ assert(ret_empty_directories);
+
+ n = context->n_bind_mounts;
+ for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ if (!params->prefix[t])
+ continue;
+
+ n += strv_length(context->directories[t].paths);
+ }
+
+ if (n <= 0) {
+ *ret_bind_mounts = NULL;
+ *ret_n_bind_mounts = 0;
+ *ret_empty_directories = NULL;
+ return 0;
+ }
+
+ bind_mounts = new(BindMount, n);
+ if (!bind_mounts)
+ return -ENOMEM;
+
+ for (i = 0; i < context->n_bind_mounts; i++) {
+ BindMount *item = context->bind_mounts + i;
+ char *s, *d;
+
+ s = strdup(item->source);
+ if (!s) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ d = strdup(item->destination);
+ if (!d) {
+ free(s);
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ bind_mounts[h++] = (BindMount) {
+ .source = s,
+ .destination = d,
+ .read_only = item->read_only,
+ .recursive = item->recursive,
+ .ignore_enoent = item->ignore_enoent,
+ };
+ }
+
+ for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ char **suffix;
+
+ if (!params->prefix[t])
+ continue;
+
+ if (strv_isempty(context->directories[t].paths))
+ continue;
+
+ if (context->dynamic_user &&
+ !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
+ !(context->root_directory || context->root_image)) {
+ char *private_root;
+
+ /* So this is for a dynamic user, and we need to make sure the process can access its own
+ * directory. For that we overmount the usually inaccessible "private" subdirectory with a
+ * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
+
+ private_root = strjoin(params->prefix[t], "/private");
+ if (!private_root) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = strv_consume(&empty_directories, private_root);
+ if (r < 0)
+ goto finish;
+ }
+
+ STRV_FOREACH(suffix, context->directories[t].paths) {
+ char *s, *d;
+
+ if (context->dynamic_user &&
+ !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
+ s = strjoin(params->prefix[t], "/private/", *suffix);
+ else
+ s = strjoin(params->prefix[t], "/", *suffix);
+ if (!s) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ if (context->dynamic_user &&
+ !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION) &&
+ (context->root_directory || context->root_image))
+ /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
+ * directory is not created on the root directory. So, let's bind-mount the directory
+ * on the 'non-private' place. */
+ d = strjoin(params->prefix[t], "/", *suffix);
+ else
+ d = strdup(s);
+ if (!d) {
+ free(s);
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ bind_mounts[h++] = (BindMount) {
+ .source = s,
+ .destination = d,
+ .read_only = false,
+ .recursive = true,
+ .ignore_enoent = false,
+ };
+ }
+ }
+
+ assert(h == n);
+
+ *ret_bind_mounts = bind_mounts;
+ *ret_n_bind_mounts = n;
+ *ret_empty_directories = TAKE_PTR(empty_directories);
+
+ return (int) n;
+
+finish:
+ bind_mount_free_many(bind_mounts, h);
+ return r;
+}
+
+static int apply_mount_namespace(
+ const Unit *u,
+ const ExecCommand *command,
+ const ExecContext *context,
+ const ExecParameters *params,
+ const ExecRuntime *runtime) {
+
+ _cleanup_strv_free_ char **empty_directories = NULL;
+ char *tmp = NULL, *var = NULL;
+ const char *root_dir = NULL, *root_image = NULL;
+ NamespaceInfo ns_info;
+ bool needs_sandboxing;
+ BindMount *bind_mounts = NULL;
+ size_t n_bind_mounts = 0;
+ int r;
+
+ assert(context);
+
+ /* The runtime struct only contains the parent of the private /tmp,
+ * which is non-accessible to world users. Inside of it there's a /tmp
+ * that is sticky, and that's the one we want to use here. */
+
+ if (context->private_tmp && runtime) {
+ if (runtime->tmp_dir)
+ tmp = strjoina(runtime->tmp_dir, "/tmp");
+ if (runtime->var_tmp_dir)
+ var = strjoina(runtime->var_tmp_dir, "/tmp");
+ }
+
+ if (params->flags & EXEC_APPLY_CHROOT) {
+ root_image = context->root_image;
+
+ if (!root_image)
+ root_dir = context->root_directory;
+ }
+
+ r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
+ if (r < 0)
+ return r;
+
+ needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
+ if (needs_sandboxing)
+ ns_info = (NamespaceInfo) {
+ .ignore_protect_paths = false,
+ .private_dev = context->private_devices,
+ .protect_control_groups = context->protect_control_groups,
+ .protect_kernel_tunables = context->protect_kernel_tunables,
+ .protect_kernel_modules = context->protect_kernel_modules,
+ .mount_apivfs = context->mount_apivfs,
+ .private_mounts = context->private_mounts,
+ };
+ else if (!context->dynamic_user && root_dir)
+ /*
+ * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
+ * sandbox info, otherwise enforce it, don't ignore protected paths and
+ * fail if we are enable to apply the sandbox inside the mount namespace.
+ */
+ ns_info = (NamespaceInfo) {
+ .ignore_protect_paths = true,
+ };
+ else
+ ns_info = (NamespaceInfo) {};
+
+ r = setup_namespace(root_dir, root_image,
+ &ns_info, context->read_write_paths,
+ needs_sandboxing ? context->read_only_paths : NULL,
+ needs_sandboxing ? context->inaccessible_paths : NULL,
+ empty_directories,
+ bind_mounts,
+ n_bind_mounts,
+ context->temporary_filesystems,
+ context->n_temporary_filesystems,
+ tmp,
+ var,
+ needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
+ needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
+ context->mount_flags,
+ DISSECT_IMAGE_DISCARD_ON_LOOP);
+
+ bind_mount_free_many(bind_mounts, n_bind_mounts);
+
+ /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
+ * that with a special, recognizable error ENOANO. In this case, silently proceeed, but only if exclusively
+ * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
+ * completely different execution environment. */
+ if (r == -ENOANO) {
+ if (n_bind_mounts == 0 &&
+ context->n_temporary_filesystems == 0 &&
+ !root_dir && !root_image &&
+ !context->dynamic_user) {
+ log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
+ return 0;
+ }
+
+ log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
+ "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
+ n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
+
+ return -EOPNOTSUPP;
+ }
+
+ return r;
+}
+
+static int apply_working_directory(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const char *home,
+ const bool needs_mount_ns,
+ int *exit_status) {
+
+ const char *d, *wd;
+
+ assert(context);
+ assert(exit_status);
+
+ if (context->working_directory_home) {
+
+ if (!home) {
+ *exit_status = EXIT_CHDIR;
+ return -ENXIO;
+ }
+
+ wd = home;
+
+ } else if (context->working_directory)
+ wd = context->working_directory;
+ else
+ wd = "/";
+
+ if (params->flags & EXEC_APPLY_CHROOT) {
+ if (!needs_mount_ns && context->root_directory)
+ if (chroot(context->root_directory) < 0) {
+ *exit_status = EXIT_CHROOT;
+ return -errno;
+ }
+
+ d = wd;
+ } else
+ d = prefix_roota(context->root_directory, wd);
+
+ if (chdir(d) < 0 && !context->working_directory_missing_ok) {
+ *exit_status = EXIT_CHDIR;
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int setup_keyring(
+ const Unit *u,
+ const ExecContext *context,
+ const ExecParameters *p,
+ uid_t uid, gid_t gid) {
+
+ key_serial_t keyring;
+ int r = 0;
+ uid_t saved_uid;
+ gid_t saved_gid;
+
+ assert(u);
+ assert(context);
+ assert(p);
+
+ /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
+ * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
+ * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
+ * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
+ * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
+ * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
+
+ if (context->keyring_mode == EXEC_KEYRING_INHERIT)
+ return 0;
+
+ /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
+ * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
+ * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
+ * & group is just as nasty as acquiring a reference to the user keyring. */
+
+ saved_uid = getuid();
+ saved_gid = getgid();
+
+ if (gid_is_valid(gid) && gid != saved_gid) {
+ if (setregid(gid, -1) < 0)
+ return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
+ }
+
+ if (uid_is_valid(uid) && uid != saved_uid) {
+ if (setreuid(uid, -1) < 0) {
+ r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
+ goto out;
+ }
+ }
+
+ keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
+ if (keyring == -1) {
+ if (errno == ENOSYS)
+ log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
+ else if (IN_SET(errno, EACCES, EPERM))
+ log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
+ else if (errno == EDQUOT)
+ log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
+ else
+ r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
+
+ goto out;
+ }
+
+ /* When requested link the user keyring into the session keyring. */
+ if (context->keyring_mode == EXEC_KEYRING_SHARED) {
+
+ if (keyctl(KEYCTL_LINK,
+ KEY_SPEC_USER_KEYRING,
+ KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
+ r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
+ goto out;
+ }
+ }
+
+ /* Restore uid/gid back */
+ if (uid_is_valid(uid) && uid != saved_uid) {
+ if (setreuid(saved_uid, -1) < 0) {
+ r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
+ goto out;
+ }
+ }
+
+ if (gid_is_valid(gid) && gid != saved_gid) {
+ if (setregid(saved_gid, -1) < 0)
+ return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
+ }
+
+ /* Populate they keyring with the invocation ID by default, as original saved_uid. */
+ if (!sd_id128_is_null(u->invocation_id)) {
+ key_serial_t key;
+
+ key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
+ if (key == -1)
+ log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
+ else {
+ if (keyctl(KEYCTL_SETPERM, key,
+ KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
+ KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
+ r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
+ }
+ }
+
+out:
+ /* Revert back uid & gid for the the last time, and exit */
+ /* no extra logging, as only the first already reported error matters */
+ if (getuid() != saved_uid)
+ (void) setreuid(saved_uid, -1);
+
+ if (getgid() != saved_gid)
+ (void) setregid(saved_gid, -1);
+
+ return r;
+}
+
+static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
+ assert(array);
+ assert(n);
+
+ if (!pair)
+ return;
+
+ if (pair[0] >= 0)
+ array[(*n)++] = pair[0];
+ if (pair[1] >= 0)
+ array[(*n)++] = pair[1];
+}
+
+static int close_remaining_fds(
+ const ExecParameters *params,
+ const ExecRuntime *runtime,
+ const DynamicCreds *dcreds,
+ int user_lookup_fd,
+ int socket_fd,
+ int exec_fd,
+ int *fds, size_t n_fds) {
+
+ size_t n_dont_close = 0;
+ int dont_close[n_fds + 12];
+
+ assert(params);
+
+ if (params->stdin_fd >= 0)
+ dont_close[n_dont_close++] = params->stdin_fd;
+ if (params->stdout_fd >= 0)
+ dont_close[n_dont_close++] = params->stdout_fd;
+ if (params->stderr_fd >= 0)
+ dont_close[n_dont_close++] = params->stderr_fd;
+
+ if (socket_fd >= 0)
+ dont_close[n_dont_close++] = socket_fd;
+ if (exec_fd >= 0)
+ dont_close[n_dont_close++] = exec_fd;
+ if (n_fds > 0) {
+ memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
+ n_dont_close += n_fds;
+ }
+
+ if (runtime)
+ append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
+
+ if (dcreds) {
+ if (dcreds->user)
+ append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
+ if (dcreds->group)
+ append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
+ }
+
+ if (user_lookup_fd >= 0)
+ dont_close[n_dont_close++] = user_lookup_fd;
+
+ return close_all_fds(dont_close, n_dont_close);
+}
+
+static int send_user_lookup(
+ Unit *unit,
+ int user_lookup_fd,
+ uid_t uid,
+ gid_t gid) {
+
+ assert(unit);
+
+ /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
+ * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
+ * specified. */
+
+ if (user_lookup_fd < 0)
+ return 0;
+
+ if (!uid_is_valid(uid) && !gid_is_valid(gid))
+ return 0;
+
+ if (writev(user_lookup_fd,
+ (struct iovec[]) {
+ IOVEC_INIT(&uid, sizeof(uid)),
+ IOVEC_INIT(&gid, sizeof(gid)),
+ IOVEC_INIT_STRING(unit->id) }, 3) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
+ int r;
+
+ assert(c);
+ assert(home);
+ assert(buf);
+
+ /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
+
+ if (*home)
+ return 0;
+
+ if (!c->working_directory_home)
+ return 0;
+
+ if (uid == 0) {
+ /* Hardcode /root as home directory for UID 0 */
+ *home = "/root";
+ return 1;
+ }
+
+ r = get_home_dir(buf);
+ if (r < 0)
+ return r;
+
+ *home = *buf;
+ return 1;
+}
+
+static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
+ _cleanup_strv_free_ char ** list = NULL;
+ ExecDirectoryType t;
+ int r;
+
+ assert(c);
+ assert(p);
+ assert(ret);
+
+ assert(c->dynamic_user);
+
+ /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
+ * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
+ * directories. */
+
+ for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ char **i;
+
+ if (t == EXEC_DIRECTORY_CONFIGURATION)
+ continue;
+
+ if (!p->prefix[t])
+ continue;
+
+ STRV_FOREACH(i, c->directories[t].paths) {
+ char *e;
+
+ if (t == EXEC_DIRECTORY_RUNTIME)
+ e = strjoin(p->prefix[t], "/", *i);
+ else
+ e = strjoin(p->prefix[t], "/private/", *i);
+ if (!e)
+ return -ENOMEM;
+
+ r = strv_consume(&list, e);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *ret = TAKE_PTR(list);
+
+ return 0;
+}
+
+static char *exec_command_line(char **argv);
+
+static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
+ bool using_subcgroup;
+ char *p;
+
+ assert(params);
+ assert(ret);
+
+ if (!params->cgroup_path)
+ return -EINVAL;
+
+ /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
+ * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
+ * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
+ * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
+ * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
+ * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
+ * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
+ * flag, which is only passed for the former statements, not for the latter. */
+
+ using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
+ if (using_subcgroup)
+ p = strjoin(params->cgroup_path, "/.control");
+ else
+ p = strdup(params->cgroup_path);
+ if (!p)
+ return -ENOMEM;
+
+ *ret = p;
+ return using_subcgroup;
+}
+
+static int exec_child(
+ Unit *unit,
+ const ExecCommand *command,
+ const ExecContext *context,
+ const ExecParameters *params,
+ ExecRuntime *runtime,
+ DynamicCreds *dcreds,
+ int socket_fd,
+ int named_iofds[3],
+ int *fds,
+ size_t n_socket_fds,
+ size_t n_storage_fds,
+ char **files_env,
+ int user_lookup_fd,
+ int *exit_status) {
+
+ _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
+ int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
+ _cleanup_free_ gid_t *supplementary_gids = NULL;
+ const char *username = NULL, *groupname = NULL;
+ _cleanup_free_ char *home_buffer = NULL;
+ const char *home = NULL, *shell = NULL;
+ dev_t journal_stream_dev = 0;
+ ino_t journal_stream_ino = 0;
+ bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
+ needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
+ needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
+ needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
+#if HAVE_SELINUX
+ _cleanup_free_ char *mac_selinux_context_net = NULL;
+ bool use_selinux = false;
+#endif
+#if ENABLE_SMACK
+ bool use_smack = false;
+#endif
+#if HAVE_APPARMOR
+ bool use_apparmor = false;
+#endif
+ uid_t uid = UID_INVALID;
+ gid_t gid = GID_INVALID;
+ size_t n_fds;
+ ExecDirectoryType dt;
+ int secure_bits;
+
+ assert(unit);
+ assert(command);
+ assert(context);
+ assert(params);
+ assert(exit_status);
+
+ rename_process_from_path(command->path);
+
+ /* We reset exactly these signals, since they are the
+ * only ones we set to SIG_IGN in the main daemon. All
+ * others we leave untouched because we set them to
+ * SIG_DFL or a valid handler initially, both of which
+ * will be demoted to SIG_DFL. */
+ (void) default_signals(SIGNALS_CRASH_HANDLER,
+ SIGNALS_IGNORE, -1);
+
+ if (context->ignore_sigpipe)
+ (void) ignore_signals(SIGPIPE, -1);
+
+ r = reset_signal_mask();
+ if (r < 0) {
+ *exit_status = EXIT_SIGNAL_MASK;
+ return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
+ }
+
+ if (params->idle_pipe)
+ do_idle_pipe_dance(params->idle_pipe);
+
+ /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
+ * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
+ * any fds open we don't really want open during the transition. In order to make logging work, we switch the
+ * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
+
+ log_forget_fds();
+ log_set_open_when_needed(true);
+
+ /* In case anything used libc syslog(), close this here, too */
+ closelog();
+
+ n_fds = n_socket_fds + n_storage_fds;
+ r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
+ }
+
+ if (!context->same_pgrp)
+ if (setsid() < 0) {
+ *exit_status = EXIT_SETSID;
+ return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
+ }
+
+ exec_context_tty_reset(context, params);
+
+ if (unit_shall_confirm_spawn(unit)) {
+ const char *vc = params->confirm_spawn;
+ _cleanup_free_ char *cmdline = NULL;
+
+ cmdline = exec_command_line(command->argv);
+ if (!cmdline) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ r = ask_for_confirmation(vc, unit, cmdline);
+ if (r != CONFIRM_EXECUTE) {
+ if (r == CONFIRM_PRETEND_SUCCESS) {
+ *exit_status = EXIT_SUCCESS;
+ return 0;
+ }
+ *exit_status = EXIT_CONFIRM;
+ log_unit_error(unit, "Execution cancelled by the user");
+ return -ECANCELED;
+ }
+ }
+
+ /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
+ * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
+ * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
+ * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
+ * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
+ if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
+ setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
+ }
+
+ if (context->dynamic_user && dcreds) {
+ _cleanup_strv_free_ char **suggested_paths = NULL;
+
+ /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
+ * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
+ if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
+ *exit_status = EXIT_USER;
+ return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
+ }
+
+ r = compile_suggested_paths(context, params, &suggested_paths);
+ if (r < 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ if (r == -EILSEQ) {
+ log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
+ return -EOPNOTSUPP;
+ }
+ return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
+ }
+
+ if (!uid_is_valid(uid)) {
+ *exit_status = EXIT_USER;
+ log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
+ return -ESRCH;
+ }
+
+ if (!gid_is_valid(gid)) {
+ *exit_status = EXIT_USER;
+ log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
+ return -ESRCH;
+ }
+
+ if (dcreds->user)
+ username = dcreds->user->name;
+
+ } else {
+ r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
+ }
+
+ r = get_fixed_group(context, &groupname, &gid);
+ if (r < 0) {
+ *exit_status = EXIT_GROUP;
+ return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
+ }
+ }
+
+ /* Initialize user supplementary groups and get SupplementaryGroups= ones */
+ r = get_supplementary_groups(context, username, groupname, gid,
+ &supplementary_gids, &ngids);
+ if (r < 0) {
+ *exit_status = EXIT_GROUP;
+ return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
+ }
+
+ r = send_user_lookup(unit, user_lookup_fd, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
+ }
+
+ user_lookup_fd = safe_close(user_lookup_fd);
+
+ r = acquire_home(context, uid, &home, &home_buffer);
+ if (r < 0) {
+ *exit_status = EXIT_CHDIR;
+ return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
+ }
+
+ /* If a socket is connected to STDIN/STDOUT/STDERR, we
+ * must sure to drop O_NONBLOCK */
+ if (socket_fd >= 0)
+ (void) fd_nonblock(socket_fd, false);
+
+ /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
+ * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
+ if (params->cgroup_path) {
+ _cleanup_free_ char *p = NULL;
+
+ r = exec_parameters_get_cgroup_path(params, &p);
+ if (r < 0) {
+ *exit_status = EXIT_CGROUP;
+ return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
+ }
+
+ r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
+ if (r < 0) {
+ *exit_status = EXIT_CGROUP;
+ return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
+ }
+ }
+
+ r = setup_input(context, params, socket_fd, named_iofds);
+ if (r < 0) {
+ *exit_status = EXIT_STDIN;
+ return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
+ }
+
+ r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+ if (r < 0) {
+ *exit_status = EXIT_STDOUT;
+ return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
+ }
+
+ r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+ if (r < 0) {
+ *exit_status = EXIT_STDERR;
+ return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
+ }
+
+ if (context->oom_score_adjust_set) {
+ /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
+ * prohibit write access to this file, and we shouldn't trip up over that. */
+ r = set_oom_score_adjust(context->oom_score_adjust);
+ if (IN_SET(r, -EPERM, -EACCES))
+ log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
+ else if (r < 0) {
+ *exit_status = EXIT_OOM_ADJUST;
+ return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
+ }
+ }
+
+ if (context->nice_set)
+ if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
+ *exit_status = EXIT_NICE;
+ return log_unit_error_errno(unit, errno, "Failed to set up process scheduling priority (nice level): %m");
+ }
+
+ if (context->cpu_sched_set) {
+ struct sched_param param = {
+ .sched_priority = context->cpu_sched_priority,
+ };
+
+ r = sched_setscheduler(0,
+ context->cpu_sched_policy |
+ (context->cpu_sched_reset_on_fork ?
+ SCHED_RESET_ON_FORK : 0),
+ &param);
+ if (r < 0) {
+ *exit_status = EXIT_SETSCHEDULER;
+ return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
+ }
+ }
+
+ if (context->cpuset)
+ if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
+ *exit_status = EXIT_CPUAFFINITY;
+ return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
+ }
+
+ if (context->ioprio_set)
+ if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
+ *exit_status = EXIT_IOPRIO;
+ return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
+ }
+
+ if (context->timer_slack_nsec != NSEC_INFINITY)
+ if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
+ *exit_status = EXIT_TIMERSLACK;
+ return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
+ }
+
+ if (context->personality != PERSONALITY_INVALID) {
+ r = safe_personality(context->personality);
+ if (r < 0) {
+ *exit_status = EXIT_PERSONALITY;
+ return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
+ }
+ }
+
+ if (context->utmp_id)
+ utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
+ context->tty_path,
+ context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
+ context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
+ USER_PROCESS,
+ username);
+
+ if (context->user) {
+ r = chown_terminal(STDIN_FILENO, uid);
+ if (r < 0) {
+ *exit_status = EXIT_STDIN;
+ return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
+ }
+ }
+
+ /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
+ * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
+ * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
+ * touch a single hierarchy too. */
+ if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
+ r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_CGROUP;
+ return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
+ }
+ }
+
+ for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
+ r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
+ }
+
+ r = build_environment(
+ unit,
+ context,
+ params,
+ n_fds,
+ home,
+ username,
+ shell,
+ journal_stream_dev,
+ journal_stream_ino,
+ &our_env);
+ if (r < 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ r = build_pass_environment(context, &pass_env);
+ if (r < 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ accum_env = strv_env_merge(5,
+ params->environment,
+ our_env,
+ pass_env,
+ context->environment,
+ files_env,
+ NULL);
+ if (!accum_env) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+ accum_env = strv_env_clean(accum_env);
+
+ (void) umask(context->umask);
+
+ r = setup_keyring(unit, context, params, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_KEYRING;
+ return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
+ }
+
+ /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
+ needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
+
+ /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
+ needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
+
+ /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
+ if (needs_ambient_hack)
+ needs_setuid = false;
+ else
+ needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
+
+ if (needs_sandboxing) {
+ /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
+ * present. The actual MAC context application will happen later, as late as possible, to avoid
+ * impacting our own code paths. */
+
+#if HAVE_SELINUX
+ use_selinux = mac_selinux_use();
+#endif
+#if ENABLE_SMACK
+ use_smack = mac_smack_use();
+#endif
+#if HAVE_APPARMOR
+ use_apparmor = mac_apparmor_use();
+#endif
+ }
+
+ if (needs_sandboxing) {
+ int which_failed;
+
+ /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
+ * is set here. (See below.) */
+
+ r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
+ if (r < 0) {
+ *exit_status = EXIT_LIMITS;
+ return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
+ }
+ }
+
+ if (needs_setuid) {
+
+ /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
+ * wins here. (See above.) */
+
+ if (context->pam_name && username) {
+ r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
+ if (r < 0) {
+ *exit_status = EXIT_PAM;
+ return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
+ }
+ }
+ }
+
+ if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
+ if (ns_type_supported(NAMESPACE_NET)) {
+ r = setup_netns(runtime->netns_storage_socket);
+ if (r < 0) {
+ *exit_status = EXIT_NETWORK;
+ return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
+ }
+ } else
+ log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
+ }
+
+ needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
+ if (needs_mount_namespace) {
+ r = apply_mount_namespace(unit, command, context, params, runtime);
+ if (r < 0) {
+ *exit_status = EXIT_NAMESPACE;
+ return log_unit_error_errno(unit, r, "Failed to set up mount namespacing: %m");
+ }
+ }
+
+ /* Drop groups as early as possbile */
+ if (needs_setuid) {
+ r = enforce_groups(gid, supplementary_gids, ngids);
+ if (r < 0) {
+ *exit_status = EXIT_GROUP;
+ return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
+ }
+ }
+
+ if (needs_sandboxing) {
+#if HAVE_SELINUX
+ if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
+ r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
+ if (r < 0) {
+ *exit_status = EXIT_SELINUX_CONTEXT;
+ return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
+ }
+ }
+#endif
+
+ if (context->private_users) {
+ r = setup_private_users(uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
+ }
+ }
+ }
+
+ /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
+ * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
+ * however if we have it as we want to keep it open until the final execve(). */
+
+ if (params->exec_fd >= 0) {
+ exec_fd = params->exec_fd;
+
+ if (exec_fd < 3 + (int) n_fds) {
+ int moved_fd;
+
+ /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
+ * process we are about to execute. */
+
+ moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
+ if (moved_fd < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
+ }
+
+ safe_close(exec_fd);
+ exec_fd = moved_fd;
+ } else {
+ /* This fd should be FD_CLOEXEC already, but let's make sure. */
+ r = fd_cloexec(exec_fd, true);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
+ }
+ }
+
+ fds_with_exec_fd = newa(int, n_fds + 1);
+ memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
+ fds_with_exec_fd[n_fds] = exec_fd;
+ n_fds_with_exec_fd = n_fds + 1;
+ } else {
+ fds_with_exec_fd = fds;
+ n_fds_with_exec_fd = n_fds;
+ }
+
+ r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
+ if (r >= 0)
+ r = shift_fds(fds, n_fds);
+ if (r >= 0)
+ r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
+ }
+
+ /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
+ * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
+ * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
+ * came this far. */
+
+ secure_bits = context->secure_bits;
+
+ if (needs_sandboxing) {
+ uint64_t bset;
+
+ /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
+ * requested. (Note this is placed after the general resource limit initialization, see
+ * above, in order to take precedence.) */
+ if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
+ if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
+ *exit_status = EXIT_LIMITS;
+ return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
+ }
+ }
+
+#if ENABLE_SMACK
+ /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
+ * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
+ if (use_smack) {
+ r = setup_smack(context, command);
+ if (r < 0) {
+ *exit_status = EXIT_SMACK_PROCESS_LABEL;
+ return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
+ }
+ }
+#endif
+
+ bset = context->capability_bounding_set;
+ /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
+ * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
+ * instead of us doing that */
+ if (needs_ambient_hack)
+ bset |= (UINT64_C(1) << CAP_SETPCAP) |
+ (UINT64_C(1) << CAP_SETUID) |
+ (UINT64_C(1) << CAP_SETGID);
+
+ if (!cap_test_all(bset)) {
+ r = capability_bounding_set_drop(bset, false);
+ if (r < 0) {
+ *exit_status = EXIT_CAPABILITIES;
+ return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
+ }
+ }
+
+ /* This is done before enforce_user, but ambient set
+ * does not survive over setresuid() if keep_caps is not set. */
+ if (!needs_ambient_hack &&
+ context->capability_ambient_set != 0) {
+ r = capability_ambient_set_apply(context->capability_ambient_set, true);
+ if (r < 0) {
+ *exit_status = EXIT_CAPABILITIES;
+ return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
+ }
+ }
+ }
+
+ if (needs_setuid) {
+ if (context->user) {
+ r = enforce_user(context, uid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
+ }
+
+ if (!needs_ambient_hack &&
+ context->capability_ambient_set != 0) {
+
+ /* Fix the ambient capabilities after user change. */
+ r = capability_ambient_set_apply(context->capability_ambient_set, false);
+ if (r < 0) {
+ *exit_status = EXIT_CAPABILITIES;
+ return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
+ }
+
+ /* If we were asked to change user and ambient capabilities
+ * were requested, we had to add keep-caps to the securebits
+ * so that we would maintain the inherited capability set
+ * through the setresuid(). Make sure that the bit is added
+ * also to the context secure_bits so that we don't try to
+ * drop the bit away next. */
+
+ secure_bits |= 1<<SECURE_KEEP_CAPS;
+ }
+ }
+ }
+
+ /* Apply working directory here, because the working directory might be on NFS and only the user running
+ * this service might have the correct privilege to change to the working directory */
+ r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
+
+ if (needs_sandboxing) {
+ /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
+ * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
+ * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
+ * are restricted. */
+
+#if HAVE_SELINUX
+ if (use_selinux) {
+ char *exec_context = mac_selinux_context_net ?: context->selinux_context;
+
+ if (exec_context) {
+ r = setexeccon(exec_context);
+ if (r < 0) {
+ *exit_status = EXIT_SELINUX_CONTEXT;
+ return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
+ }
+ }
+ }
+#endif
+
+#if HAVE_APPARMOR
+ if (use_apparmor && context->apparmor_profile) {
+ r = aa_change_onexec(context->apparmor_profile);
+ if (r < 0 && !context->apparmor_profile_ignore) {
+ *exit_status = EXIT_APPARMOR_PROFILE;
+ return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
+ }
+ }
+#endif
+
+ /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
+ * we'll try not to call PR_SET_SECUREBITS unless necessary. */
+ if (prctl(PR_GET_SECUREBITS) != secure_bits)
+ if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
+ *exit_status = EXIT_SECUREBITS;
+ return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
+ }
+
+ if (context_has_no_new_privileges(context))
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
+ *exit_status = EXIT_NO_NEW_PRIVILEGES;
+ return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
+ }
+
+#if HAVE_SECCOMP
+ r = apply_address_families(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_ADDRESS_FAMILIES;
+ return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
+ }
+
+ r = apply_memory_deny_write_execute(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
+ }
+
+ r = apply_restrict_realtime(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
+ }
+
+ r = apply_restrict_namespaces(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
+ }
+
+ r = apply_protect_sysctl(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
+ }
+
+ r = apply_protect_kernel_modules(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
+ }
+
+ r = apply_private_devices(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
+ }
+
+ r = apply_syscall_archs(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
+ }
+
+ r = apply_lock_personality(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
+ }
+
+ /* This really should remain the last step before the execve(), to make sure our own code is unaffected
+ * by the filter as little as possible. */
+ r = apply_syscall_filter(unit, context, needs_ambient_hack);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
+ }
+#endif
+ }
+
+ if (!strv_isempty(context->unset_environment)) {
+ char **ee = NULL;
+
+ ee = strv_env_delete(accum_env, 1, context->unset_environment);
+ if (!ee) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ strv_free_and_replace(accum_env, ee);
+ }
+
+ final_argv = replace_env_argv(command->argv, accum_env);
+ if (!final_argv) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *line;
+
+ line = exec_command_line(final_argv);
+ if (line)
+ log_struct(LOG_DEBUG,
+ "EXECUTABLE=%s", command->path,
+ LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
+ LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit));
+ }
+
+ if (exec_fd >= 0) {
+ uint8_t hot = 1;
+
+ /* We have finished with all our initializations. Let's now let the manager know that. From this point
+ * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
+
+ if (write(exec_fd, &hot, sizeof(hot)) < 0) {
+ *exit_status = EXIT_EXEC;
+ return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
+ }
+ }
+
+ execve(command->path, final_argv, accum_env);
+ r = -errno;
+
+ if (exec_fd >= 0) {
+ uint8_t hot = 0;
+
+ /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
+ * that POLLHUP on it no longer means execve() succeeded. */
+
+ if (write(exec_fd, &hot, sizeof(hot)) < 0) {
+ *exit_status = EXIT_EXEC;
+ return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
+ }
+ }
+
+ if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
+ log_struct_errno(LOG_INFO, r,
+ "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
+ LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit),
+ LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
+ command->path),
+ "EXECUTABLE=%s", command->path);
+ return 0;
+ }
+
+ *exit_status = EXIT_EXEC;
+ return log_unit_error_errno(unit, r, "Failed to execute command: %m");
+}
+
+static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
+static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
+
+int exec_spawn(Unit *unit,
+ ExecCommand *command,
+ const ExecContext *context,
+ const ExecParameters *params,
+ ExecRuntime *runtime,
+ DynamicCreds *dcreds,
+ pid_t *ret) {
+
+ int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
+ _cleanup_free_ char *subcgroup_path = NULL;
+ _cleanup_strv_free_ char **files_env = NULL;
+ size_t n_storage_fds = 0, n_socket_fds = 0;
+ _cleanup_free_ char *line = NULL;
+ pid_t pid;
+
+ assert(unit);
+ assert(command);
+ assert(context);
+ assert(ret);
+ assert(params);
+ assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
+
+ if (context->std_input == EXEC_INPUT_SOCKET ||
+ context->std_output == EXEC_OUTPUT_SOCKET ||
+ context->std_error == EXEC_OUTPUT_SOCKET) {
+
+ if (params->n_socket_fds > 1) {
+ log_unit_error(unit, "Got more than one socket.");
+ return -EINVAL;
+ }
+
+ if (params->n_socket_fds == 0) {
+ log_unit_error(unit, "Got no socket.");
+ return -EINVAL;
+ }
+
+ socket_fd = params->fds[0];
+ } else {
+ socket_fd = -1;
+ fds = params->fds;
+ n_socket_fds = params->n_socket_fds;
+ n_storage_fds = params->n_storage_fds;
+ }
+
+ r = exec_context_named_iofds(context, params, named_iofds);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
+
+ r = exec_context_load_environment(unit, context, &files_env);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
+
+ line = exec_command_line(command->argv);
+ if (!line)
+ return log_oom();
+
+ log_struct(LOG_DEBUG,
+ LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
+ "EXECUTABLE=%s", command->path,
+ LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit));
+
+ if (params->cgroup_path) {
+ r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
+ if (r > 0) { /* We are using a child cgroup */
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
+ }
+ }
+
+ pid = fork();
+ if (pid < 0)
+ return log_unit_error_errno(unit, errno, "Failed to fork: %m");
+
+ if (pid == 0) {
+ int exit_status = EXIT_SUCCESS;
+
+ r = exec_child(unit,
+ command,
+ context,
+ params,
+ runtime,
+ dcreds,
+ socket_fd,
+ named_iofds,
+ fds,
+ n_socket_fds,
+ n_storage_fds,
+ files_env,
+ unit->manager->user_lookup_fds[1],
+ &exit_status);
+
+ if (r < 0)
+ log_struct_errno(LOG_ERR, r,
+ "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
+ LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit),
+ LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
+ exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
+ command->path),
+ "EXECUTABLE=%s", command->path);
+
+ _exit(exit_status);
+ }
+
+ log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
+
+ /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
+ * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
+ * process will be killed too). */
+ if (subcgroup_path)
+ (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
+
+ exec_status_start(&command->exec_status, pid);
+
+ *ret = pid;
+ return 0;
+}
+
+void exec_context_init(ExecContext *c) {
+ ExecDirectoryType i;
+
+ assert(c);
+
+ c->umask = 0022;
+ c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
+ c->cpu_sched_policy = SCHED_OTHER;
+ c->syslog_priority = LOG_DAEMON|LOG_INFO;
+ c->syslog_level_prefix = true;
+ c->ignore_sigpipe = true;
+ c->timer_slack_nsec = NSEC_INFINITY;
+ c->personality = PERSONALITY_INVALID;
+ for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
+ c->directories[i].mode = 0755;
+ c->capability_bounding_set = CAP_ALL;
+ assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
+ c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
+ c->log_level_max = -1;
+}
+
+void exec_context_done(ExecContext *c) {
+ ExecDirectoryType i;
+ size_t l;
+
+ assert(c);
+
+ c->environment = strv_free(c->environment);
+ c->environment_files = strv_free(c->environment_files);
+ c->pass_environment = strv_free(c->pass_environment);
+ c->unset_environment = strv_free(c->unset_environment);
+
+ rlimit_free_all(c->rlimit);
+
+ for (l = 0; l < 3; l++) {
+ c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
+ c->stdio_file[l] = mfree(c->stdio_file[l]);
+ }
+
+ c->working_directory = mfree(c->working_directory);
+ c->root_directory = mfree(c->root_directory);
+ c->root_image = mfree(c->root_image);
+ c->tty_path = mfree(c->tty_path);
+ c->syslog_identifier = mfree(c->syslog_identifier);
+ c->user = mfree(c->user);
+ c->group = mfree(c->group);
+
+ c->supplementary_groups = strv_free(c->supplementary_groups);
+
+ c->pam_name = mfree(c->pam_name);
+
+ c->read_only_paths = strv_free(c->read_only_paths);
+ c->read_write_paths = strv_free(c->read_write_paths);
+ c->inaccessible_paths = strv_free(c->inaccessible_paths);
+
+ bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
+ c->bind_mounts = NULL;
+ c->n_bind_mounts = 0;
+ temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
+ c->temporary_filesystems = NULL;
+ c->n_temporary_filesystems = 0;
+
+ c->cpuset = cpu_set_mfree(c->cpuset);
+
+ c->utmp_id = mfree(c->utmp_id);
+ c->selinux_context = mfree(c->selinux_context);
+ c->apparmor_profile = mfree(c->apparmor_profile);
+ c->smack_process_label = mfree(c->smack_process_label);
+
+ c->syscall_filter = hashmap_free(c->syscall_filter);
+ c->syscall_archs = set_free(c->syscall_archs);
+ c->address_families = set_free(c->address_families);
+
+ for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
+ c->directories[i].paths = strv_free(c->directories[i].paths);
+
+ c->log_level_max = -1;
+
+ exec_context_free_log_extra_fields(c);
+
+ c->log_rate_limit_interval_usec = 0;
+ c->log_rate_limit_burst = 0;
+
+ c->stdin_data = mfree(c->stdin_data);
+ c->stdin_data_size = 0;
+}
+
+int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
+ char **i;
+
+ assert(c);
+
+ if (!runtime_prefix)
+ return 0;
+
+ STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
+ _cleanup_free_ char *p;
+
+ p = strjoin(runtime_prefix, "/", *i);
+ if (!p)
+ return -ENOMEM;
+
+ /* We execute this synchronously, since we need to be sure this is gone when we start the service
+ * next. */
+ (void) rm_rf(p, REMOVE_ROOT);
+ }
+
+ return 0;
+}
+
+static void exec_command_done(ExecCommand *c) {
+ assert(c);
+
+ c->path = mfree(c->path);
+ c->argv = strv_free(c->argv);
+}
+
+void exec_command_done_array(ExecCommand *c, size_t n) {
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ exec_command_done(c+i);
+}
+
+ExecCommand* exec_command_free_list(ExecCommand *c) {
+ ExecCommand *i;
+
+ while ((i = c)) {
+ LIST_REMOVE(command, c, i);
+ exec_command_done(i);
+ free(i);
+ }
+
+ return NULL;
+}
+
+void exec_command_free_array(ExecCommand **c, size_t n) {
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ c[i] = exec_command_free_list(c[i]);
+}
+
+void exec_command_reset_status_array(ExecCommand *c, size_t n) {
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ exec_status_reset(&c[i].exec_status);
+}
+
+void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ ExecCommand *z;
+
+ LIST_FOREACH(command, z, c[i])
+ exec_status_reset(&z->exec_status);
+ }
+}
+
+typedef struct InvalidEnvInfo {
+ const Unit *unit;
+ const char *path;
+} InvalidEnvInfo;
+
+static void invalid_env(const char *p, void *userdata) {
+ InvalidEnvInfo *info = userdata;
+
+ log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
+}
+
+const char* exec_context_fdname(const ExecContext *c, int fd_index) {
+ assert(c);
+
+ switch (fd_index) {
+
+ case STDIN_FILENO:
+ if (c->std_input != EXEC_INPUT_NAMED_FD)
+ return NULL;
+
+ return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
+
+ case STDOUT_FILENO:
+ if (c->std_output != EXEC_OUTPUT_NAMED_FD)
+ return NULL;
+
+ return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
+
+ case STDERR_FILENO:
+ if (c->std_error != EXEC_OUTPUT_NAMED_FD)
+ return NULL;
+
+ return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
+
+ default:
+ return NULL;
+ }
+}
+
+static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]) {
+ size_t i, targets;
+ const char* stdio_fdname[3];
+ size_t n_fds;
+
+ assert(c);
+ assert(p);
+
+ targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
+ (c->std_output == EXEC_OUTPUT_NAMED_FD) +
+ (c->std_error == EXEC_OUTPUT_NAMED_FD);
+
+ for (i = 0; i < 3; i++)
+ stdio_fdname[i] = exec_context_fdname(c, i);
+
+ n_fds = p->n_storage_fds + p->n_socket_fds;
+
+ for (i = 0; i < n_fds && targets > 0; i++)
+ if (named_iofds[STDIN_FILENO] < 0 &&
+ c->std_input == EXEC_INPUT_NAMED_FD &&
+ stdio_fdname[STDIN_FILENO] &&
+ streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
+
+ named_iofds[STDIN_FILENO] = p->fds[i];
+ targets--;
+
+ } else if (named_iofds[STDOUT_FILENO] < 0 &&
+ c->std_output == EXEC_OUTPUT_NAMED_FD &&
+ stdio_fdname[STDOUT_FILENO] &&
+ streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
+
+ named_iofds[STDOUT_FILENO] = p->fds[i];
+ targets--;
+
+ } else if (named_iofds[STDERR_FILENO] < 0 &&
+ c->std_error == EXEC_OUTPUT_NAMED_FD &&
+ stdio_fdname[STDERR_FILENO] &&
+ streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
+
+ named_iofds[STDERR_FILENO] = p->fds[i];
+ targets--;
+ }
+
+ return targets == 0 ? 0 : -ENOENT;
+}
+
+static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
+ char **i, **r = NULL;
+
+ assert(c);
+ assert(l);
+
+ STRV_FOREACH(i, c->environment_files) {
+ char *fn;
+ int k;
+ unsigned n;
+ bool ignore = false;
+ char **p;
+ _cleanup_globfree_ glob_t pglob = {};
+
+ fn = *i;
+
+ if (fn[0] == '-') {
+ ignore = true;
+ fn++;
+ }
+
+ if (!path_is_absolute(fn)) {
+ if (ignore)
+ continue;
+
+ strv_free(r);
+ return -EINVAL;
+ }
+
+ /* Filename supports globbing, take all matching files */
+ k = safe_glob(fn, 0, &pglob);
+ if (k < 0) {
+ if (ignore)
+ continue;
+
+ strv_free(r);
+ return k;
+ }
+
+ /* When we don't match anything, -ENOENT should be returned */
+ assert(pglob.gl_pathc > 0);
+
+ for (n = 0; n < pglob.gl_pathc; n++) {
+ k = load_env_file(NULL, pglob.gl_pathv[n], &p);
+ if (k < 0) {
+ if (ignore)
+ continue;
+
+ strv_free(r);
+ return k;
+ }
+ /* Log invalid environment variables with filename */
+ if (p) {
+ InvalidEnvInfo info = {
+ .unit = unit,
+ .path = pglob.gl_pathv[n]
+ };
+
+ p = strv_env_clean_with_callback(p, invalid_env, &info);
+ }
+
+ if (!r)
+ r = p;
+ else {
+ char **m;
+
+ m = strv_env_merge(2, r, p);
+ strv_free(r);
+ strv_free(p);
+ if (!m)
+ return -ENOMEM;
+
+ r = m;
+ }
+ }
+ }
+
+ *l = r;
+
+ return 0;
+}
+
+static bool tty_may_match_dev_console(const char *tty) {
+ _cleanup_free_ char *resolved = NULL;
+
+ if (!tty)
+ return true;
+
+ tty = skip_dev_prefix(tty);
+
+ /* trivial identity? */
+ if (streq(tty, "console"))
+ return true;
+
+ if (resolve_dev_console(&resolved) < 0)
+ return true; /* if we could not resolve, assume it may */
+
+ /* "tty0" means the active VC, so it may be the same sometimes */
+ return streq(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
+}
+
+bool exec_context_may_touch_console(const ExecContext *ec) {
+
+ return (ec->tty_reset ||
+ ec->tty_vhangup ||
+ ec->tty_vt_disallocate ||
+ is_terminal_input(ec->std_input) ||
+ is_terminal_output(ec->std_output) ||
+ is_terminal_output(ec->std_error)) &&
+ tty_may_match_dev_console(exec_context_tty_path(ec));
+}
+
+static void strv_fprintf(FILE *f, char **l) {
+ char **g;
+
+ assert(f);
+
+ STRV_FOREACH(g, l)
+ fprintf(f, " %s", *g);
+}
+
+void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
+ ExecDirectoryType dt;
+ char **e, **d;
+ unsigned i;
+ int r;
+
+ assert(c);
+ assert(f);
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%sUMask: %04o\n"
+ "%sWorkingDirectory: %s\n"
+ "%sRootDirectory: %s\n"
+ "%sNonBlocking: %s\n"
+ "%sPrivateTmp: %s\n"
+ "%sPrivateDevices: %s\n"
+ "%sProtectKernelTunables: %s\n"
+ "%sProtectKernelModules: %s\n"
+ "%sProtectControlGroups: %s\n"
+ "%sPrivateNetwork: %s\n"
+ "%sPrivateUsers: %s\n"
+ "%sProtectHome: %s\n"
+ "%sProtectSystem: %s\n"
+ "%sMountAPIVFS: %s\n"
+ "%sIgnoreSIGPIPE: %s\n"
+ "%sMemoryDenyWriteExecute: %s\n"
+ "%sRestrictRealtime: %s\n"
+ "%sKeyringMode: %s\n",
+ prefix, c->umask,
+ prefix, c->working_directory ? c->working_directory : "/",
+ prefix, c->root_directory ? c->root_directory : "/",
+ prefix, yes_no(c->non_blocking),
+ prefix, yes_no(c->private_tmp),
+ prefix, yes_no(c->private_devices),
+ prefix, yes_no(c->protect_kernel_tunables),
+ prefix, yes_no(c->protect_kernel_modules),
+ prefix, yes_no(c->protect_control_groups),
+ prefix, yes_no(c->private_network),
+ prefix, yes_no(c->private_users),
+ prefix, protect_home_to_string(c->protect_home),
+ prefix, protect_system_to_string(c->protect_system),
+ prefix, yes_no(c->mount_apivfs),
+ prefix, yes_no(c->ignore_sigpipe),
+ prefix, yes_no(c->memory_deny_write_execute),
+ prefix, yes_no(c->restrict_realtime),
+ prefix, exec_keyring_mode_to_string(c->keyring_mode));
+
+ if (c->root_image)
+ fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
+
+ STRV_FOREACH(e, c->environment)
+ fprintf(f, "%sEnvironment: %s\n", prefix, *e);
+
+ STRV_FOREACH(e, c->environment_files)
+ fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
+
+ STRV_FOREACH(e, c->pass_environment)
+ fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
+
+ STRV_FOREACH(e, c->unset_environment)
+ fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
+
+ fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
+
+ for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
+ fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
+
+ STRV_FOREACH(d, c->directories[dt].paths)
+ fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
+ }
+
+ if (c->nice_set)
+ fprintf(f,
+ "%sNice: %i\n",
+ prefix, c->nice);
+
+ if (c->oom_score_adjust_set)
+ fprintf(f,
+ "%sOOMScoreAdjust: %i\n",
+ prefix, c->oom_score_adjust);
+
+ for (i = 0; i < RLIM_NLIMITS; i++)
+ if (c->rlimit[i]) {
+ fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
+ prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
+ fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
+ prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
+ }
+
+ if (c->ioprio_set) {
+ _cleanup_free_ char *class_str = NULL;
+
+ r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
+ if (r >= 0)
+ fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
+
+ fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
+ }
+
+ if (c->cpu_sched_set) {
+ _cleanup_free_ char *policy_str = NULL;
+
+ r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
+ if (r >= 0)
+ fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
+
+ fprintf(f,
+ "%sCPUSchedulingPriority: %i\n"
+ "%sCPUSchedulingResetOnFork: %s\n",
+ prefix, c->cpu_sched_priority,
+ prefix, yes_no(c->cpu_sched_reset_on_fork));
+ }
+
+ if (c->cpuset) {
+ fprintf(f, "%sCPUAffinity:", prefix);
+ for (i = 0; i < c->cpuset_ncpus; i++)
+ if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
+ fprintf(f, " %u", i);
+ fputs("\n", f);
+ }
+
+ if (c->timer_slack_nsec != NSEC_INFINITY)
+ fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
+
+ fprintf(f,
+ "%sStandardInput: %s\n"
+ "%sStandardOutput: %s\n"
+ "%sStandardError: %s\n",
+ prefix, exec_input_to_string(c->std_input),
+ prefix, exec_output_to_string(c->std_output),
+ prefix, exec_output_to_string(c->std_error));
+
+ if (c->std_input == EXEC_INPUT_NAMED_FD)
+ fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
+ if (c->std_output == EXEC_OUTPUT_NAMED_FD)
+ fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
+ if (c->std_error == EXEC_OUTPUT_NAMED_FD)
+ fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
+
+ if (c->std_input == EXEC_INPUT_FILE)
+ fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
+ if (c->std_output == EXEC_OUTPUT_FILE)
+ fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
+ if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
+ fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
+ if (c->std_error == EXEC_OUTPUT_FILE)
+ fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
+ if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
+ fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
+
+ if (c->tty_path)
+ fprintf(f,
+ "%sTTYPath: %s\n"
+ "%sTTYReset: %s\n"
+ "%sTTYVHangup: %s\n"
+ "%sTTYVTDisallocate: %s\n",
+ prefix, c->tty_path,
+ prefix, yes_no(c->tty_reset),
+ prefix, yes_no(c->tty_vhangup),
+ prefix, yes_no(c->tty_vt_disallocate));
+
+ if (IN_SET(c->std_output,
+ EXEC_OUTPUT_SYSLOG,
+ EXEC_OUTPUT_KMSG,
+ EXEC_OUTPUT_JOURNAL,
+ EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
+ IN_SET(c->std_error,
+ EXEC_OUTPUT_SYSLOG,
+ EXEC_OUTPUT_KMSG,
+ EXEC_OUTPUT_JOURNAL,
+ EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
+
+ _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
+
+ r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
+ if (r >= 0)
+ fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
+
+ r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
+ if (r >= 0)
+ fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
+ }
+
+ if (c->log_level_max >= 0) {
+ _cleanup_free_ char *t = NULL;
+
+ (void) log_level_to_string_alloc(c->log_level_max, &t);
+
+ fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
+ }
+
+ if (c->log_rate_limit_interval_usec > 0) {
+ char buf_timespan[FORMAT_TIMESPAN_MAX];
+
+ fprintf(f,
+ "%sLogRateLimitIntervalSec: %s\n",
+ prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_rate_limit_interval_usec, USEC_PER_SEC));
+ }
+
+ if (c->log_rate_limit_burst > 0)
+ fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_rate_limit_burst);
+
+ if (c->n_log_extra_fields > 0) {
+ size_t j;
+
+ for (j = 0; j < c->n_log_extra_fields; j++) {
+ fprintf(f, "%sLogExtraFields: ", prefix);
+ fwrite(c->log_extra_fields[j].iov_base,
+ 1, c->log_extra_fields[j].iov_len,
+ f);
+ fputc('\n', f);
+ }
+ }
+
+ if (c->secure_bits) {
+ _cleanup_free_ char *str = NULL;
+
+ r = secure_bits_to_string_alloc(c->secure_bits, &str);
+ if (r >= 0)
+ fprintf(f, "%sSecure Bits: %s\n", prefix, str);
+ }
+
+ if (c->capability_bounding_set != CAP_ALL) {
+ _cleanup_free_ char *str = NULL;
+
+ r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
+ if (r >= 0)
+ fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
+ }
+
+ if (c->capability_ambient_set != 0) {
+ _cleanup_free_ char *str = NULL;
+
+ r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
+ if (r >= 0)
+ fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
+ }
+
+ if (c->user)
+ fprintf(f, "%sUser: %s\n", prefix, c->user);
+ if (c->group)
+ fprintf(f, "%sGroup: %s\n", prefix, c->group);
+
+ fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
+
+ if (!strv_isempty(c->supplementary_groups)) {
+ fprintf(f, "%sSupplementaryGroups:", prefix);
+ strv_fprintf(f, c->supplementary_groups);
+ fputs("\n", f);
+ }
+
+ if (c->pam_name)
+ fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
+
+ if (!strv_isempty(c->read_write_paths)) {
+ fprintf(f, "%sReadWritePaths:", prefix);
+ strv_fprintf(f, c->read_write_paths);
+ fputs("\n", f);
+ }
+
+ if (!strv_isempty(c->read_only_paths)) {
+ fprintf(f, "%sReadOnlyPaths:", prefix);
+ strv_fprintf(f, c->read_only_paths);
+ fputs("\n", f);
+ }
+
+ if (!strv_isempty(c->inaccessible_paths)) {
+ fprintf(f, "%sInaccessiblePaths:", prefix);
+ strv_fprintf(f, c->inaccessible_paths);
+ fputs("\n", f);
+ }
+
+ if (c->n_bind_mounts > 0)
+ for (i = 0; i < c->n_bind_mounts; i++)
+ fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
+ c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
+ c->bind_mounts[i].ignore_enoent ? "-": "",
+ c->bind_mounts[i].source,
+ c->bind_mounts[i].destination,
+ c->bind_mounts[i].recursive ? "rbind" : "norbind");
+
+ if (c->n_temporary_filesystems > 0)
+ for (i = 0; i < c->n_temporary_filesystems; i++) {
+ TemporaryFileSystem *t = c->temporary_filesystems + i;
+
+ fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
+ t->path,
+ isempty(t->options) ? "" : ":",
+ strempty(t->options));
+ }
+
+ if (c->utmp_id)
+ fprintf(f,
+ "%sUtmpIdentifier: %s\n",
+ prefix, c->utmp_id);
+
+ if (c->selinux_context)
+ fprintf(f,
+ "%sSELinuxContext: %s%s\n",
+ prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
+
+ if (c->apparmor_profile)
+ fprintf(f,
+ "%sAppArmorProfile: %s%s\n",
+ prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
+
+ if (c->smack_process_label)
+ fprintf(f,
+ "%sSmackProcessLabel: %s%s\n",
+ prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
+
+ if (c->personality != PERSONALITY_INVALID)
+ fprintf(f,
+ "%sPersonality: %s\n",
+ prefix, strna(personality_to_string(c->personality)));
+
+ fprintf(f,
+ "%sLockPersonality: %s\n",
+ prefix, yes_no(c->lock_personality));
+
+ if (c->syscall_filter) {
+#if HAVE_SECCOMP
+ Iterator j;
+ void *id, *val;
+ bool first = true;
+#endif
+
+ fprintf(f,
+ "%sSystemCallFilter: ",
+ prefix);
+
+ if (!c->syscall_whitelist)
+ fputc('~', f);
+
+#if HAVE_SECCOMP
+ HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
+ _cleanup_free_ char *name = NULL;
+ const char *errno_name = NULL;
+ int num = PTR_TO_INT(val);
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
+ fputs(strna(name), f);
+
+ if (num >= 0) {
+ errno_name = errno_to_name(num);
+ if (errno_name)
+ fprintf(f, ":%s", errno_name);
+ else
+ fprintf(f, ":%d", num);
+ }
+ }
+#endif
+
+ fputc('\n', f);
+ }
+
+ if (c->syscall_archs) {
+#if HAVE_SECCOMP
+ Iterator j;
+ void *id;
+#endif
+
+ fprintf(f,
+ "%sSystemCallArchitectures:",
+ prefix);
+
+#if HAVE_SECCOMP
+ SET_FOREACH(id, c->syscall_archs, j)
+ fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
+#endif
+ fputc('\n', f);
+ }
+
+ if (exec_context_restrict_namespaces_set(c)) {
+ _cleanup_free_ char *s = NULL;
+
+ r = namespace_flags_to_string(c->restrict_namespaces, &s);
+ if (r >= 0)
+ fprintf(f, "%sRestrictNamespaces: %s\n",
+ prefix, s);
+ }
+
+ if (c->syscall_errno > 0) {
+ const char *errno_name;
+
+ fprintf(f, "%sSystemCallErrorNumber: ", prefix);
+
+ errno_name = errno_to_name(c->syscall_errno);
+ if (errno_name)
+ fprintf(f, "%s\n", errno_name);
+ else
+ fprintf(f, "%d\n", c->syscall_errno);
+ }
+
+ if (c->apparmor_profile)
+ fprintf(f,
+ "%sAppArmorProfile: %s%s\n",
+ prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
+}
+
+bool exec_context_maintains_privileges(const ExecContext *c) {
+ assert(c);
+
+ /* Returns true if the process forked off would run under
+ * an unchanged UID or as root. */
+
+ if (!c->user)
+ return true;
+
+ if (streq(c->user, "root") || streq(c->user, "0"))
+ return true;
+
+ return false;
+}
+
+int exec_context_get_effective_ioprio(const ExecContext *c) {
+ int p;
+
+ assert(c);
+
+ if (c->ioprio_set)
+ return c->ioprio;
+
+ p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
+ if (p < 0)
+ return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
+
+ return p;
+}
+
+void exec_context_free_log_extra_fields(ExecContext *c) {
+ size_t l;
+
+ assert(c);
+
+ for (l = 0; l < c->n_log_extra_fields; l++)
+ free(c->log_extra_fields[l].iov_base);
+ c->log_extra_fields = mfree(c->log_extra_fields);
+ c->n_log_extra_fields = 0;
+}
+
+void exec_status_start(ExecStatus *s, pid_t pid) {
+ assert(s);
+
+ *s = (ExecStatus) {
+ .pid = pid,
+ };
+
+ dual_timestamp_get(&s->start_timestamp);
+}
+
+void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
+ assert(s);
+
+ if (s->pid != pid) {
+ *s = (ExecStatus) {
+ .pid = pid,
+ };
+ }
+
+ dual_timestamp_get(&s->exit_timestamp);
+
+ s->code = code;
+ s->status = status;
+
+ if (context) {
+ if (context->utmp_id)
+ (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
+
+ exec_context_tty_reset(context, NULL);
+ }
+}
+
+void exec_status_reset(ExecStatus *s) {
+ assert(s);
+
+ *s = (ExecStatus) {};
+}
+
+void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+
+ assert(s);
+ assert(f);
+
+ if (s->pid <= 0)
+ return;
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%sPID: "PID_FMT"\n",
+ prefix, s->pid);
+
+ if (dual_timestamp_is_set(&s->start_timestamp))
+ fprintf(f,
+ "%sStart Timestamp: %s\n",
+ prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
+
+ if (dual_timestamp_is_set(&s->exit_timestamp))
+ fprintf(f,
+ "%sExit Timestamp: %s\n"
+ "%sExit Code: %s\n"
+ "%sExit Status: %i\n",
+ prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
+ prefix, sigchld_code_to_string(s->code),
+ prefix, s->status);
+}
+
+static char *exec_command_line(char **argv) {
+ size_t k;
+ char *n, *p, **a;
+ bool first = true;
+
+ assert(argv);
+
+ k = 1;
+ STRV_FOREACH(a, argv)
+ k += strlen(*a)+3;
+
+ n = new(char, k);
+ if (!n)
+ return NULL;
+
+ p = n;
+ STRV_FOREACH(a, argv) {
+
+ if (!first)
+ *(p++) = ' ';
+ else
+ first = false;
+
+ if (strpbrk(*a, WHITESPACE)) {
+ *(p++) = '\'';
+ p = stpcpy(p, *a);
+ *(p++) = '\'';
+ } else
+ p = stpcpy(p, *a);
+
+ }
+
+ *p = 0;
+
+ /* FIXME: this doesn't really handle arguments that have
+ * spaces and ticks in them */
+
+ return n;
+}
+
+static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
+ _cleanup_free_ char *cmd = NULL;
+ const char *prefix2;
+
+ assert(c);
+ assert(f);
+
+ prefix = strempty(prefix);
+ prefix2 = strjoina(prefix, "\t");
+
+ cmd = exec_command_line(c->argv);
+ fprintf(f,
+ "%sCommand Line: %s\n",
+ prefix, cmd ? cmd : strerror(ENOMEM));
+
+ exec_status_dump(&c->exec_status, f, prefix2);
+}
+
+void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
+ assert(f);
+
+ prefix = strempty(prefix);
+
+ LIST_FOREACH(command, c, c)
+ exec_command_dump(c, f, prefix);
+}
+
+void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
+ ExecCommand *end;
+
+ assert(l);
+ assert(e);
+
+ if (*l) {
+ /* It's kind of important, that we keep the order here */
+ LIST_FIND_TAIL(command, *l, end);
+ LIST_INSERT_AFTER(command, *l, end, e);
+ } else
+ *l = e;
+}
+
+int exec_command_set(ExecCommand *c, const char *path, ...) {
+ va_list ap;
+ char **l, *p;
+
+ assert(c);
+ assert(path);
+
+ va_start(ap, path);
+ l = strv_new_ap(path, ap);
+ va_end(ap);
+
+ if (!l)
+ return -ENOMEM;
+
+ p = strdup(path);
+ if (!p) {
+ strv_free(l);
+ return -ENOMEM;
+ }
+
+ free_and_replace(c->path, p);
+
+ return strv_free_and_replace(c->argv, l);
+}
+
+int exec_command_append(ExecCommand *c, const char *path, ...) {
+ _cleanup_strv_free_ char **l = NULL;
+ va_list ap;
+ int r;
+
+ assert(c);
+ assert(path);
+
+ va_start(ap, path);
+ l = strv_new_ap(path, ap);
+ va_end(ap);
+
+ if (!l)
+ return -ENOMEM;
+
+ r = strv_extend_strv(&c->argv, l, false);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void *remove_tmpdir_thread(void *p) {
+ _cleanup_free_ char *path = p;
+
+ (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
+ return NULL;
+}
+
+static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
+ int r;
+
+ if (!rt)
+ return NULL;
+
+ if (rt->manager)
+ (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
+
+ /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
+ if (destroy && rt->tmp_dir) {
+ log_debug("Spawning thread to nuke %s", rt->tmp_dir);
+
+ r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
+ free(rt->tmp_dir);
+ }
+
+ rt->tmp_dir = NULL;
+ }
+
+ if (destroy && rt->var_tmp_dir) {
+ log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
+
+ r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
+ free(rt->var_tmp_dir);
+ }
+
+ rt->var_tmp_dir = NULL;
+ }
+
+ rt->id = mfree(rt->id);
+ rt->tmp_dir = mfree(rt->tmp_dir);
+ rt->var_tmp_dir = mfree(rt->var_tmp_dir);
+ safe_close_pair(rt->netns_storage_socket);
+ return mfree(rt);
+}
+
+static void exec_runtime_freep(ExecRuntime **rt) {
+ if (*rt)
+ (void) exec_runtime_free(*rt, false);
+}
+
+static int exec_runtime_allocate(ExecRuntime **rt) {
+ assert(rt);
+
+ *rt = new0(ExecRuntime, 1);
+ if (!*rt)
+ return -ENOMEM;
+
+ (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
+ return 0;
+}
+
+static int exec_runtime_add(
+ Manager *m,
+ const char *id,
+ const char *tmp_dir,
+ const char *var_tmp_dir,
+ const int netns_storage_socket[2],
+ ExecRuntime **ret) {
+
+ _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
+ int r;
+
+ assert(m);
+ assert(id);
+
+ r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = exec_runtime_allocate(&rt);
+ if (r < 0)
+ return r;
+
+ rt->id = strdup(id);
+ if (!rt->id)
+ return -ENOMEM;
+
+ if (tmp_dir) {
+ rt->tmp_dir = strdup(tmp_dir);
+ if (!rt->tmp_dir)
+ return -ENOMEM;
+
+ /* When tmp_dir is set, then we require var_tmp_dir is also set. */
+ assert(var_tmp_dir);
+ rt->var_tmp_dir = strdup(var_tmp_dir);
+ if (!rt->var_tmp_dir)
+ return -ENOMEM;
+ }
+
+ if (netns_storage_socket) {
+ rt->netns_storage_socket[0] = netns_storage_socket[0];
+ rt->netns_storage_socket[1] = netns_storage_socket[1];
+ }
+
+ r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
+ if (r < 0)
+ return r;
+
+ rt->manager = m;
+
+ if (ret)
+ *ret = rt;
+
+ /* do not remove created ExecRuntime object when the operation succeeds. */
+ rt = NULL;
+ return 0;
+}
+
+static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
+ _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
+ _cleanup_close_pair_ int netns_storage_socket[2] = {-1, -1};
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(id);
+
+ /* It is not necessary to create ExecRuntime object. */
+ if (!c->private_network && !c->private_tmp)
+ return 0;
+
+ if (c->private_tmp) {
+ r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->private_network) {
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
+ return -errno;
+ }
+
+ r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
+ if (r < 0)
+ return r;
+
+ /* Avoid cleanup */
+ netns_storage_socket[0] = -1;
+ netns_storage_socket[1] = -1;
+ return 1;
+}
+
+int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
+ ExecRuntime *rt;
+ int r;
+
+ assert(m);
+ assert(id);
+ assert(ret);
+
+ rt = hashmap_get(m->exec_runtime_by_id, id);
+ if (rt)
+ /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
+ goto ref;
+
+ if (!create)
+ return 0;
+
+ /* If not found, then create a new object. */
+ r = exec_runtime_make(m, c, id, &rt);
+ if (r <= 0)
+ /* When r == 0, it is not necessary to create ExecRuntime object. */
+ return r;
+
+ref:
+ /* increment reference counter. */
+ rt->n_ref++;
+ *ret = rt;
+ return 1;
+}
+
+ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
+ if (!rt)
+ return NULL;
+
+ assert(rt->n_ref > 0);
+
+ rt->n_ref--;
+ if (rt->n_ref > 0)
+ return NULL;
+
+ return exec_runtime_free(rt, destroy);
+}
+
+int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
+ ExecRuntime *rt;
+ Iterator i;
+
+ assert(m);
+ assert(f);
+ assert(fds);
+
+ HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
+ fprintf(f, "exec-runtime=%s", rt->id);
+
+ if (rt->tmp_dir)
+ fprintf(f, " tmp-dir=%s", rt->tmp_dir);
+
+ if (rt->var_tmp_dir)
+ fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
+
+ if (rt->netns_storage_socket[0] >= 0) {
+ int copy;
+
+ copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
+ if (copy < 0)
+ return copy;
+
+ fprintf(f, " netns-socket-0=%i", copy);
+ }
+
+ if (rt->netns_storage_socket[1] >= 0) {
+ int copy;
+
+ copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
+ if (copy < 0)
+ return copy;
+
+ fprintf(f, " netns-socket-1=%i", copy);
+ }
+
+ fputc('\n', f);
+ }
+
+ return 0;
+}
+
+int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
+ _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
+ ExecRuntime *rt;
+ int r;
+
+ /* This is for the migration from old (v237 or earlier) deserialization text.
+ * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
+ * Even if the ExecRuntime object originally created by the other unit, we cannot judge
+ * so or not from the serialized text, then we always creates a new object owned by this. */
+
+ assert(u);
+ assert(key);
+ assert(value);
+
+ /* Manager manages ExecRuntime objects by the unit id.
+ * So, we omit the serialized text when the unit does not have id (yet?)... */
+ if (isempty(u->id)) {
+ log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
+ if (r < 0) {
+ log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
+ return 0;
+ }
+
+ rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
+ if (!rt) {
+ r = exec_runtime_allocate(&rt_create);
+ if (r < 0)
+ return log_oom();
+
+ rt_create->id = strdup(u->id);
+ if (!rt_create->id)
+ return log_oom();
+
+ rt = rt_create;
+ }
+
+ if (streq(key, "tmp-dir")) {
+ char *copy;
+
+ copy = strdup(value);
+ if (!copy)
+ return log_oom();
+
+ free_and_replace(rt->tmp_dir, copy);
+
+ } else if (streq(key, "var-tmp-dir")) {
+ char *copy;
+
+ copy = strdup(value);
+ if (!copy)
+ return log_oom();
+
+ free_and_replace(rt->var_tmp_dir, copy);
+
+ } else if (streq(key, "netns-socket-0")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
+ log_unit_debug(u, "Failed to parse netns socket value: %s", value);
+ return 0;
+ }
+
+ safe_close(rt->netns_storage_socket[0]);
+ rt->netns_storage_socket[0] = fdset_remove(fds, fd);
+
+ } else if (streq(key, "netns-socket-1")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
+ log_unit_debug(u, "Failed to parse netns socket value: %s", value);
+ return 0;
+ }
+
+ safe_close(rt->netns_storage_socket[1]);
+ rt->netns_storage_socket[1] = fdset_remove(fds, fd);
+ } else
+ return 0;
+
+ /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
+ if (rt_create) {
+ r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
+ if (r < 0) {
+ log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
+ return 0;
+ }
+
+ rt_create->manager = u->manager;
+
+ /* Avoid cleanup */
+ rt_create = NULL;
+ }
+
+ return 1;
+}
+
+void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
+ char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
+ int r, fd0 = -1, fd1 = -1;
+ const char *p, *v = value;
+ size_t n;
+
+ assert(m);
+ assert(value);
+ assert(fds);
+
+ n = strcspn(v, " ");
+ id = strndupa(v, n);
+ if (v[n] != ' ')
+ goto finalize;
+ p = v + n + 1;
+
+ v = startswith(p, "tmp-dir=");
+ if (v) {
+ n = strcspn(v, " ");
+ tmp_dir = strndupa(v, n);
+ if (v[n] != ' ')
+ goto finalize;
+ p = v + n + 1;
+ }
+
+ v = startswith(p, "var-tmp-dir=");
+ if (v) {
+ n = strcspn(v, " ");
+ var_tmp_dir = strndupa(v, n);
+ if (v[n] != ' ')
+ goto finalize;
+ p = v + n + 1;
+ }
+
+ v = startswith(p, "netns-socket-0=");
+ if (v) {
+ char *buf;
+
+ n = strcspn(v, " ");
+ buf = strndupa(v, n);
+ if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
+ log_debug("Unable to process exec-runtime netns fd specification.");
+ return;
+ }
+ fd0 = fdset_remove(fds, fd0);
+ if (v[n] != ' ')
+ goto finalize;
+ p = v + n + 1;
+ }
+
+ v = startswith(p, "netns-socket-1=");
+ if (v) {
+ char *buf;
+
+ n = strcspn(v, " ");
+ buf = strndupa(v, n);
+ if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
+ log_debug("Unable to process exec-runtime netns fd specification.");
+ return;
+ }
+ fd1 = fdset_remove(fds, fd1);
+ }
+
+finalize:
+
+ r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to add exec-runtime: %m");
+}
+
+void exec_runtime_vacuum(Manager *m) {
+ ExecRuntime *rt;
+ Iterator i;
+
+ assert(m);
+
+ /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
+
+ HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
+ if (rt->n_ref > 0)
+ continue;
+
+ (void) exec_runtime_free(rt, false);
+ }
+}
+
+void exec_params_clear(ExecParameters *p) {
+ if (!p)
+ return;
+
+ strv_free(p->environment);
+}
+
+static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
+ [EXEC_INPUT_NULL] = "null",
+ [EXEC_INPUT_TTY] = "tty",
+ [EXEC_INPUT_TTY_FORCE] = "tty-force",
+ [EXEC_INPUT_TTY_FAIL] = "tty-fail",
+ [EXEC_INPUT_SOCKET] = "socket",
+ [EXEC_INPUT_NAMED_FD] = "fd",
+ [EXEC_INPUT_DATA] = "data",
+ [EXEC_INPUT_FILE] = "file",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
+
+static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
+ [EXEC_OUTPUT_INHERIT] = "inherit",
+ [EXEC_OUTPUT_NULL] = "null",
+ [EXEC_OUTPUT_TTY] = "tty",
+ [EXEC_OUTPUT_SYSLOG] = "syslog",
+ [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
+ [EXEC_OUTPUT_KMSG] = "kmsg",
+ [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
+ [EXEC_OUTPUT_JOURNAL] = "journal",
+ [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
+ [EXEC_OUTPUT_SOCKET] = "socket",
+ [EXEC_OUTPUT_NAMED_FD] = "fd",
+ [EXEC_OUTPUT_FILE] = "file",
+ [EXEC_OUTPUT_FILE_APPEND] = "append",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
+
+static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
+ [EXEC_UTMP_INIT] = "init",
+ [EXEC_UTMP_LOGIN] = "login",
+ [EXEC_UTMP_USER] = "user",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
+
+static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
+ [EXEC_PRESERVE_NO] = "no",
+ [EXEC_PRESERVE_YES] = "yes",
+ [EXEC_PRESERVE_RESTART] = "restart",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
+
+static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
+ [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
+ [EXEC_DIRECTORY_STATE] = "StateDirectory",
+ [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
+ [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
+ [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
+
+static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
+ [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
+ [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
+ [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
+ [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
+ [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
+
+static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
+ [EXEC_KEYRING_INHERIT] = "inherit",
+ [EXEC_KEYRING_PRIVATE] = "private",
+ [EXEC_KEYRING_SHARED] = "shared",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);
diff --git a/src/core/execute.h b/src/core/execute.h
new file mode 100644
index 0000000..0f1bf56
--- /dev/null
+++ b/src/core/execute.h
@@ -0,0 +1,404 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct ExecStatus ExecStatus;
+typedef struct ExecCommand ExecCommand;
+typedef struct ExecContext ExecContext;
+typedef struct ExecRuntime ExecRuntime;
+typedef struct ExecParameters ExecParameters;
+typedef struct Manager Manager;
+
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/capability.h>
+
+#include "cgroup-util.h"
+#include "fdset.h"
+#include "list.h"
+#include "missing_resource.h"
+#include "namespace.h"
+#include "nsflags.h"
+
+#define EXEC_STDIN_DATA_MAX (64U*1024U*1024U)
+
+typedef enum ExecUtmpMode {
+ EXEC_UTMP_INIT,
+ EXEC_UTMP_LOGIN,
+ EXEC_UTMP_USER,
+ _EXEC_UTMP_MODE_MAX,
+ _EXEC_UTMP_MODE_INVALID = -1
+} ExecUtmpMode;
+
+typedef enum ExecInput {
+ EXEC_INPUT_NULL,
+ EXEC_INPUT_TTY,
+ EXEC_INPUT_TTY_FORCE,
+ EXEC_INPUT_TTY_FAIL,
+ EXEC_INPUT_SOCKET,
+ EXEC_INPUT_NAMED_FD,
+ EXEC_INPUT_DATA,
+ EXEC_INPUT_FILE,
+ _EXEC_INPUT_MAX,
+ _EXEC_INPUT_INVALID = -1
+} ExecInput;
+
+typedef enum ExecOutput {
+ EXEC_OUTPUT_INHERIT,
+ EXEC_OUTPUT_NULL,
+ EXEC_OUTPUT_TTY,
+ EXEC_OUTPUT_SYSLOG,
+ EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_JOURNAL,
+ EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+ EXEC_OUTPUT_SOCKET,
+ EXEC_OUTPUT_NAMED_FD,
+ EXEC_OUTPUT_FILE,
+ EXEC_OUTPUT_FILE_APPEND,
+ _EXEC_OUTPUT_MAX,
+ _EXEC_OUTPUT_INVALID = -1
+} ExecOutput;
+
+typedef enum ExecPreserveMode {
+ EXEC_PRESERVE_NO,
+ EXEC_PRESERVE_YES,
+ EXEC_PRESERVE_RESTART,
+ _EXEC_PRESERVE_MODE_MAX,
+ _EXEC_PRESERVE_MODE_INVALID = -1
+} ExecPreserveMode;
+
+typedef enum ExecKeyringMode {
+ EXEC_KEYRING_INHERIT,
+ EXEC_KEYRING_PRIVATE,
+ EXEC_KEYRING_SHARED,
+ _EXEC_KEYRING_MODE_MAX,
+ _EXEC_KEYRING_MODE_INVALID = -1,
+} ExecKeyringMode;
+
+/* Contains start and exit information about an executed command. */
+struct ExecStatus {
+ pid_t pid;
+ dual_timestamp start_timestamp;
+ dual_timestamp exit_timestamp;
+ int code; /* as in siginfo_t::si_code */
+ int status; /* as in sigingo_t::si_status */
+};
+
+typedef enum ExecCommandFlags {
+ EXEC_COMMAND_IGNORE_FAILURE = 1 << 0,
+ EXEC_COMMAND_FULLY_PRIVILEGED = 1 << 1,
+ EXEC_COMMAND_NO_SETUID = 1 << 2,
+ EXEC_COMMAND_AMBIENT_MAGIC = 1 << 3,
+} ExecCommandFlags;
+
+/* Stores information about commands we execute. Covers both configuration settings as well as runtime data. */
+struct ExecCommand {
+ char *path;
+ char **argv;
+ ExecStatus exec_status;
+ ExecCommandFlags flags;
+ LIST_FIELDS(ExecCommand, command); /* useful for chaining commands */
+};
+
+/* Encapsulates certain aspects of the runtime environment that is to be shared between multiple otherwise separate
+ * invocations of commands. Specifically, this allows sharing of /tmp and /var/tmp data as well as network namespaces
+ * between invocations of commands. This is a reference counted object, with one reference taken by each currently
+ * active command invocation that wants to share this runtime. */
+struct ExecRuntime {
+ unsigned n_ref;
+
+ Manager *manager;
+
+ char *id; /* Unit id of the owner */
+
+ char *tmp_dir;
+ char *var_tmp_dir;
+
+ /* An AF_UNIX socket pair, that contains a datagram containing a file descriptor referring to the network
+ * namespace. */
+ int netns_storage_socket[2];
+};
+
+typedef enum ExecDirectoryType {
+ EXEC_DIRECTORY_RUNTIME = 0,
+ EXEC_DIRECTORY_STATE,
+ EXEC_DIRECTORY_CACHE,
+ EXEC_DIRECTORY_LOGS,
+ EXEC_DIRECTORY_CONFIGURATION,
+ _EXEC_DIRECTORY_TYPE_MAX,
+ _EXEC_DIRECTORY_TYPE_INVALID = -1,
+} ExecDirectoryType;
+
+typedef struct ExecDirectory {
+ char **paths;
+ mode_t mode;
+} ExecDirectory;
+
+/* Encodes configuration parameters applied to invoked commands. Does not carry runtime data, but only configuration
+ * changes sourced from unit files and suchlike. ExecContext objects are usually embedded into Unit objects, and do not
+ * change after being loaded. */
+struct ExecContext {
+ char **environment;
+ char **environment_files;
+ char **pass_environment;
+ char **unset_environment;
+
+ struct rlimit *rlimit[_RLIMIT_MAX];
+ char *working_directory, *root_directory, *root_image;
+ bool working_directory_missing_ok;
+ bool working_directory_home;
+
+ mode_t umask;
+ int oom_score_adjust;
+ int nice;
+ int ioprio;
+ int cpu_sched_policy;
+ int cpu_sched_priority;
+
+ cpu_set_t *cpuset;
+ unsigned cpuset_ncpus;
+
+ ExecInput std_input;
+ ExecOutput std_output;
+ ExecOutput std_error;
+ char *stdio_fdname[3];
+ char *stdio_file[3];
+
+ void *stdin_data;
+ size_t stdin_data_size;
+
+ nsec_t timer_slack_nsec;
+
+ bool stdio_as_fds;
+
+ char *tty_path;
+
+ bool tty_reset;
+ bool tty_vhangup;
+ bool tty_vt_disallocate;
+
+ bool ignore_sigpipe;
+
+ /* Since resolving these names might involve socket
+ * connections and we don't want to deadlock ourselves these
+ * names are resolved on execution only and in the child
+ * process. */
+ char *user;
+ char *group;
+ char **supplementary_groups;
+
+ char *pam_name;
+
+ char *utmp_id;
+ ExecUtmpMode utmp_mode;
+
+ bool selinux_context_ignore;
+ char *selinux_context;
+
+ bool apparmor_profile_ignore;
+ char *apparmor_profile;
+
+ bool smack_process_label_ignore;
+ char *smack_process_label;
+
+ ExecKeyringMode keyring_mode;
+
+ char **read_write_paths, **read_only_paths, **inaccessible_paths;
+ unsigned long mount_flags;
+ BindMount *bind_mounts;
+ size_t n_bind_mounts;
+ TemporaryFileSystem *temporary_filesystems;
+ size_t n_temporary_filesystems;
+
+ uint64_t capability_bounding_set;
+ uint64_t capability_ambient_set;
+ int secure_bits;
+
+ int syslog_priority;
+ char *syslog_identifier;
+ bool syslog_level_prefix;
+
+ int log_level_max;
+
+ struct iovec* log_extra_fields;
+ size_t n_log_extra_fields;
+
+ usec_t log_rate_limit_interval_usec;
+ unsigned log_rate_limit_burst;
+
+ bool cpu_sched_reset_on_fork;
+ bool non_blocking;
+ bool private_tmp;
+ bool private_network;
+ bool private_devices;
+ bool private_users;
+ bool private_mounts;
+ ProtectSystem protect_system;
+ ProtectHome protect_home;
+ bool protect_kernel_tunables;
+ bool protect_kernel_modules;
+ bool protect_control_groups;
+ bool mount_apivfs;
+
+ bool no_new_privileges;
+
+ bool dynamic_user;
+ bool remove_ipc;
+
+ /* This is not exposed to the user but available
+ * internally. We need it to make sure that whenever we spawn
+ * /usr/bin/mount it is run in the same process group as us so
+ * that the autofs logic detects that it belongs to us and we
+ * don't enter a trigger loop. */
+ bool same_pgrp;
+
+ unsigned long personality;
+ bool lock_personality;
+
+ unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */
+
+ Hashmap *syscall_filter;
+ Set *syscall_archs;
+ int syscall_errno;
+ bool syscall_whitelist:1;
+
+ Set *address_families;
+ bool address_families_whitelist:1;
+
+ ExecPreserveMode runtime_directory_preserve_mode;
+ ExecDirectory directories[_EXEC_DIRECTORY_TYPE_MAX];
+
+ bool memory_deny_write_execute;
+ bool restrict_realtime;
+
+ bool oom_score_adjust_set:1;
+ bool nice_set:1;
+ bool ioprio_set:1;
+ bool cpu_sched_set:1;
+};
+
+static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) {
+ assert(c);
+
+ return (c->restrict_namespaces & NAMESPACE_FLAGS_ALL) != NAMESPACE_FLAGS_ALL;
+}
+
+typedef enum ExecFlags {
+ EXEC_APPLY_SANDBOXING = 1 << 0,
+ EXEC_APPLY_CHROOT = 1 << 1,
+ EXEC_APPLY_TTY_STDIN = 1 << 2,
+ EXEC_PASS_LOG_UNIT = 1 << 3, /* Whether to pass the unit name to the service's journal stream connection */
+ EXEC_CHOWN_DIRECTORIES = 1 << 4, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
+ EXEC_NSS_BYPASS_BUS = 1 << 5, /* Set the SYSTEMD_NSS_BYPASS_BUS environment variable, to disable nss-systemd for dbus */
+ EXEC_CGROUP_DELEGATE = 1 << 6,
+ EXEC_IS_CONTROL = 1 << 7,
+ EXEC_CONTROL_CGROUP = 1 << 8, /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */
+
+ /* The following are not used by execute.c, but by consumers internally */
+ EXEC_PASS_FDS = 1 << 9,
+ EXEC_SETENV_RESULT = 1 << 10,
+ EXEC_SET_WATCHDOG = 1 << 11,
+} ExecFlags;
+
+/* Parameters for a specific invocation of a command. This structure is put together right before a command is
+ * executed. */
+struct ExecParameters {
+ char **environment;
+
+ int *fds;
+ char **fd_names;
+ size_t n_socket_fds;
+ size_t n_storage_fds;
+
+ ExecFlags flags;
+ bool selinux_context_net:1;
+
+ CGroupMask cgroup_supported;
+ const char *cgroup_path;
+
+ char **prefix;
+
+ const char *confirm_spawn;
+
+ usec_t watchdog_usec;
+
+ int *idle_pipe;
+
+ int stdin_fd;
+ int stdout_fd;
+ int stderr_fd;
+
+ /* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done */
+ int exec_fd;
+};
+
+#include "unit.h"
+#include "dynamic-user.h"
+
+int exec_spawn(Unit *unit,
+ ExecCommand *command,
+ const ExecContext *context,
+ const ExecParameters *exec_params,
+ ExecRuntime *runtime,
+ DynamicCreds *dynamic_creds,
+ pid_t *ret);
+
+void exec_command_done_array(ExecCommand *c, size_t n);
+ExecCommand* exec_command_free_list(ExecCommand *c);
+void exec_command_free_array(ExecCommand **c, size_t n);
+void exec_command_reset_status_array(ExecCommand *c, size_t n);
+void exec_command_reset_status_list_array(ExecCommand **c, size_t n);
+void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix);
+void exec_command_append_list(ExecCommand **l, ExecCommand *e);
+int exec_command_set(ExecCommand *c, const char *path, ...) _sentinel_;
+int exec_command_append(ExecCommand *c, const char *path, ...) _sentinel_;
+
+void exec_context_init(ExecContext *c);
+void exec_context_done(ExecContext *c);
+void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix);
+
+int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_root);
+
+const char* exec_context_fdname(const ExecContext *c, int fd_index);
+
+bool exec_context_may_touch_console(const ExecContext *c);
+bool exec_context_maintains_privileges(const ExecContext *c);
+
+int exec_context_get_effective_ioprio(const ExecContext *c);
+
+void exec_context_free_log_extra_fields(ExecContext *c);
+
+void exec_status_start(ExecStatus *s, pid_t pid);
+void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status);
+void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix);
+void exec_status_reset(ExecStatus *s);
+
+int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *name, bool create, ExecRuntime **ret);
+ExecRuntime *exec_runtime_unref(ExecRuntime *r, bool destroy);
+
+int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds);
+int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds);
+void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds);
+void exec_runtime_vacuum(Manager *m);
+
+void exec_params_clear(ExecParameters *p);
+
+const char* exec_output_to_string(ExecOutput i) _const_;
+ExecOutput exec_output_from_string(const char *s) _pure_;
+
+const char* exec_input_to_string(ExecInput i) _const_;
+ExecInput exec_input_from_string(const char *s) _pure_;
+
+const char* exec_utmp_mode_to_string(ExecUtmpMode i) _const_;
+ExecUtmpMode exec_utmp_mode_from_string(const char *s) _pure_;
+
+const char* exec_preserve_mode_to_string(ExecPreserveMode i) _const_;
+ExecPreserveMode exec_preserve_mode_from_string(const char *s) _pure_;
+
+const char* exec_keyring_mode_to_string(ExecKeyringMode i) _const_;
+ExecKeyringMode exec_keyring_mode_from_string(const char *s) _pure_;
+
+const char* exec_directory_type_to_string(ExecDirectoryType i) _const_;
+ExecDirectoryType exec_directory_type_from_string(const char *s) _pure_;
diff --git a/src/core/hostname-setup.c b/src/core/hostname-setup.c
new file mode 100644
index 0000000..83cce88
--- /dev/null
+++ b/src/core/hostname-setup.c
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "hostname-setup.h"
+#include "hostname-util.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "util.h"
+
+int hostname_setup(void) {
+ _cleanup_free_ char *b = NULL;
+ bool enoent = false;
+ const char *hn;
+ int r;
+
+ r = read_etc_hostname(NULL, &b);
+ if (r < 0) {
+ if (r == -ENOENT)
+ enoent = true;
+ else
+ log_warning_errno(r, "Failed to read configured hostname: %m");
+
+ hn = NULL;
+ } else
+ hn = b;
+
+ if (isempty(hn)) {
+ /* Don't override the hostname if it is already set
+ * and not explicitly configured */
+ if (hostname_is_set())
+ return 0;
+
+ if (enoent)
+ log_info("No hostname configured.");
+
+ hn = FALLBACK_HOSTNAME;
+ }
+
+ r = sethostname_idempotent(hn);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to set hostname to <%s>: %m", hn);
+
+ log_info("Set hostname to <%s>.", hn);
+ return 0;
+}
diff --git a/src/core/hostname-setup.h b/src/core/hostname-setup.h
new file mode 100644
index 0000000..dc7b9a6
--- /dev/null
+++ b/src/core/hostname-setup.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int hostname_setup(void);
diff --git a/src/core/ima-setup.c b/src/core/ima-setup.c
new file mode 100644
index 0000000..fd7c5f6
--- /dev/null
+++ b/src/core/ima-setup.c
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2012 Roberto Sassu - Politecnico di Torino, Italy
+ TORSEC group — http://security.polito.it
+***/
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "ima-setup.h"
+#include "log.h"
+#include "util.h"
+
+#define IMA_SECFS_DIR "/sys/kernel/security/ima"
+#define IMA_SECFS_POLICY IMA_SECFS_DIR "/policy"
+#define IMA_POLICY_PATH "/etc/ima/ima-policy"
+
+int ima_setup(void) {
+#if ENABLE_IMA
+ _cleanup_fclose_ FILE *input = NULL;
+ _cleanup_close_ int imafd = -1;
+ unsigned lineno = 0;
+ int r;
+
+ if (access(IMA_SECFS_DIR, F_OK) < 0) {
+ log_debug_errno(errno, "IMA support is disabled in the kernel, ignoring: %m");
+ return 0;
+ }
+
+ if (access(IMA_SECFS_POLICY, W_OK) < 0) {
+ log_warning_errno(errno, "Another IMA custom policy has already been loaded, ignoring: %m");
+ return 0;
+ }
+
+ if (access(IMA_POLICY_PATH, F_OK) < 0) {
+ log_debug_errno(errno, "No IMA custom policy file "IMA_POLICY_PATH", ignoring: %m");
+ return 0;
+ }
+
+ imafd = open(IMA_SECFS_POLICY, O_WRONLY|O_CLOEXEC);
+ if (imafd < 0) {
+ log_error_errno(errno, "Failed to open the IMA kernel interface "IMA_SECFS_POLICY", ignoring: %m");
+ return 0;
+ }
+
+ /* attempt to write the name of the policy file into sysfs file */
+ if (write(imafd, IMA_POLICY_PATH, STRLEN(IMA_POLICY_PATH)) > 0)
+ goto done;
+
+ /* fall back to copying the policy line-by-line */
+ input = fopen(IMA_POLICY_PATH, "re");
+ if (!input) {
+ log_warning_errno(errno, "Failed to open the IMA custom policy file "IMA_POLICY_PATH", ignoring: %m");
+ return 0;
+ }
+
+ safe_close(imafd);
+
+ imafd = open(IMA_SECFS_POLICY, O_WRONLY|O_CLOEXEC);
+ if (imafd < 0) {
+ log_error_errno(errno, "Failed to open the IMA kernel interface "IMA_SECFS_POLICY", ignoring: %m");
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ size_t len;
+
+ r = read_line(input, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read the IMA custom policy file "IMA_POLICY_PATH": %m");
+ if (r == 0)
+ break;
+
+ len = strlen(line);
+ lineno++;
+
+ if (len > 0 && write(imafd, line, len) < 0)
+ return log_error_errno(errno, "Failed to load the IMA custom policy file "IMA_POLICY_PATH"%u: %m",
+ lineno);
+ }
+
+done:
+ log_info("Successfully loaded the IMA custom policy "IMA_POLICY_PATH".");
+#endif /* ENABLE_IMA */
+ return 0;
+}
diff --git a/src/core/ima-setup.h b/src/core/ima-setup.h
new file mode 100644
index 0000000..cf47879
--- /dev/null
+++ b/src/core/ima-setup.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2012 Roberto Sassu - Politecnico di Torino, Italy
+ TORSEC group — http://security.polito.it
+***/
+
+int ima_setup(void);
diff --git a/src/core/ip-address-access.c b/src/core/ip-address-access.c
new file mode 100644
index 0000000..1d431bb
--- /dev/null
+++ b/src/core/ip-address-access.c
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "extract-word.h"
+#include "hostname-util.h"
+#include "ip-address-access.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+int config_parse_ip_address_access(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ IPAddressAccessItem **list = data;
+ const char *p;
+ int r;
+
+ assert(list);
+
+ if (isempty(rvalue)) {
+ *list = ip_address_access_free_all(*list);
+ return 0;
+ }
+
+ p = rvalue;
+
+ for (;;) {
+ _cleanup_free_ IPAddressAccessItem *a = NULL;
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ if (streq(word, "any")) {
+ /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
+
+ a->family = AF_INET;
+ LIST_APPEND(items, *list, a);
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ a->family = AF_INET6;
+
+ } else if (is_localhost(word)) {
+ /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
+
+ a->family = AF_INET;
+ a->address.in.s_addr = htobe32(0x7f000000);
+ a->prefixlen = 8;
+ LIST_APPEND(items, *list, a);
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ a->family = AF_INET6;
+ a->address.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
+ a->prefixlen = 128;
+
+ } else if (streq(word, "link-local")) {
+
+ /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
+
+ a->family = AF_INET;
+ a->address.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
+ a->prefixlen = 16;
+ LIST_APPEND(items, *list, a);
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ a->family = AF_INET6;
+ a->address.in6 = (struct in6_addr) {
+ .s6_addr32[0] = htobe32(0xfe800000)
+ };
+ a->prefixlen = 64;
+
+ } else if (streq(word, "multicast")) {
+
+ /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
+
+ a->family = AF_INET;
+ a->address.in.s_addr = htobe32((UINT32_C(224) << 24));
+ a->prefixlen = 4;
+ LIST_APPEND(items, *list, a);
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ a->family = AF_INET6;
+ a->address.in6 = (struct in6_addr) {
+ .s6_addr32[0] = htobe32(0xff000000)
+ };
+ a->prefixlen = 8;
+
+ } else {
+ r = in_addr_prefix_from_string_auto(word, &a->family, &a->address, &a->prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Address prefix is invalid, ignoring assignment: %s", word);
+ return 0;
+ }
+ }
+
+ LIST_APPEND(items, *list, a);
+ a = NULL;
+ }
+
+ *list = ip_address_access_reduce(*list);
+
+ if (*list) {
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r == BPF_FIREWALL_UNSUPPORTED) {
+ static bool warned = false;
+
+ log_full(warned ? LOG_DEBUG : LOG_WARNING,
+ "File %s:%u configures an IP firewall (%s=%s), but the local system does not support BPF/cgroup based firewalling.\n"
+ "Proceeding WITHOUT firewalling in effect! (This warning is only shown for the first loaded unit using IP firewalling.)", filename, line, lvalue, rvalue);
+
+ warned = true;
+ }
+ }
+
+ return 0;
+}
+
+IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first) {
+ IPAddressAccessItem *next, *p = first;
+
+ while (p) {
+ next = p->items_next;
+ free(p);
+
+ p = next;
+ }
+
+ return NULL;
+}
+
+IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first) {
+ IPAddressAccessItem *a, *b, *tmp;
+ int r;
+
+ /* Drops all entries from the list that are covered by another entry in full, thus removing all redundant
+ * entries. */
+
+ LIST_FOREACH_SAFE(items, a, tmp, first) {
+
+ /* Drop irrelevant bits */
+ (void) in_addr_mask(a->family, &a->address, a->prefixlen);
+
+ LIST_FOREACH(items, b, first) {
+
+ if (a == b)
+ continue;
+
+ if (a->family != b->family)
+ continue;
+
+ if (b->prefixlen > a->prefixlen)
+ continue;
+
+ r = in_addr_prefix_covers(b->family,
+ &b->address,
+ b->prefixlen,
+ &a->address);
+ if (r > 0) {
+ /* b covers a fully, then let's drop a */
+ LIST_REMOVE(items, first, a);
+ free(a);
+ break;
+ }
+ }
+ }
+
+ return first;
+}
diff --git a/src/core/ip-address-access.h b/src/core/ip-address-access.h
new file mode 100644
index 0000000..77078e1
--- /dev/null
+++ b/src/core/ip-address-access.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "list.h"
+
+typedef struct IPAddressAccessItem IPAddressAccessItem;
+
+struct IPAddressAccessItem {
+ int family;
+ unsigned char prefixlen;
+ union in_addr_union address;
+ LIST_FIELDS(IPAddressAccessItem, items);
+};
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ip_address_access);
+
+IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first);
+
+IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first);
diff --git a/src/core/job.c b/src/core/job.c
new file mode 100644
index 0000000..59bb9d2
--- /dev/null
+++ b/src/core/job.c
@@ -0,0 +1,1668 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "sd-id128.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "async.h"
+#include "dbus-job.h"
+#include "dbus.h"
+#include "escape.h"
+#include "fileio.h"
+#include "job.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "serialize.h"
+#include "set.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit.h"
+#include "virt.h"
+
+Job* job_new_raw(Unit *unit) {
+ Job *j;
+
+ /* used for deserialization */
+
+ assert(unit);
+
+ j = new(Job, 1);
+ if (!j)
+ return NULL;
+
+ *j = (Job) {
+ .manager = unit->manager,
+ .unit = unit,
+ .type = _JOB_TYPE_INVALID,
+ };
+
+ return j;
+}
+
+Job* job_new(Unit *unit, JobType type) {
+ Job *j;
+
+ assert(type < _JOB_TYPE_MAX);
+
+ j = job_new_raw(unit);
+ if (!j)
+ return NULL;
+
+ j->id = j->manager->current_job_id++;
+ j->type = type;
+
+ /* We don't link it here, that's what job_dependency() is for */
+
+ return j;
+}
+
+void job_unlink(Job *j) {
+ assert(j);
+ assert(!j->installed);
+ assert(!j->transaction_prev);
+ assert(!j->transaction_next);
+ assert(!j->subject_list);
+ assert(!j->object_list);
+
+ if (j->in_run_queue) {
+ LIST_REMOVE(run_queue, j->manager->run_queue, j);
+ j->in_run_queue = false;
+ }
+
+ if (j->in_dbus_queue) {
+ LIST_REMOVE(dbus_queue, j->manager->dbus_job_queue, j);
+ j->in_dbus_queue = false;
+ }
+
+ if (j->in_gc_queue) {
+ LIST_REMOVE(gc_queue, j->manager->gc_job_queue, j);
+ j->in_gc_queue = false;
+ }
+
+ j->timer_event_source = sd_event_source_unref(j->timer_event_source);
+}
+
+Job* job_free(Job *j) {
+ assert(j);
+ assert(!j->installed);
+ assert(!j->transaction_prev);
+ assert(!j->transaction_next);
+ assert(!j->subject_list);
+ assert(!j->object_list);
+
+ job_unlink(j);
+
+ sd_bus_track_unref(j->bus_track);
+ strv_free(j->deserialized_clients);
+
+ return mfree(j);
+}
+
+static void job_set_state(Job *j, JobState state) {
+ assert(j);
+ assert(state >= 0);
+ assert(state < _JOB_STATE_MAX);
+
+ if (j->state == state)
+ return;
+
+ j->state = state;
+
+ if (!j->installed)
+ return;
+
+ if (j->state == JOB_RUNNING)
+ j->unit->manager->n_running_jobs++;
+ else {
+ assert(j->state == JOB_WAITING);
+ assert(j->unit->manager->n_running_jobs > 0);
+
+ j->unit->manager->n_running_jobs--;
+
+ if (j->unit->manager->n_running_jobs <= 0)
+ j->unit->manager->jobs_in_progress_event_source = sd_event_source_unref(j->unit->manager->jobs_in_progress_event_source);
+ }
+}
+
+void job_uninstall(Job *j) {
+ Job **pj;
+
+ assert(j->installed);
+
+ job_set_state(j, JOB_WAITING);
+
+ pj = (j->type == JOB_NOP) ? &j->unit->nop_job : &j->unit->job;
+ assert(*pj == j);
+
+ /* Detach from next 'bigger' objects */
+
+ /* daemon-reload should be transparent to job observers */
+ if (!MANAGER_IS_RELOADING(j->manager))
+ bus_job_send_removed_signal(j);
+
+ *pj = NULL;
+
+ unit_add_to_gc_queue(j->unit);
+
+ unit_add_to_dbus_queue(j->unit); /* The Job property of the unit has changed now */
+
+ hashmap_remove_value(j->manager->jobs, UINT32_TO_PTR(j->id), j);
+ j->installed = false;
+}
+
+static bool job_type_allows_late_merge(JobType t) {
+ /* Tells whether it is OK to merge a job of type 't' with an already
+ * running job.
+ * Reloads cannot be merged this way. Think of the sequence:
+ * 1. Reload of a daemon is in progress; the daemon has already loaded
+ * its config file, but hasn't completed the reload operation yet.
+ * 2. Edit foo's config file.
+ * 3. Trigger another reload to have the daemon use the new config.
+ * Should the second reload job be merged into the first one, the daemon
+ * would not know about the new config.
+ * JOB_RESTART jobs on the other hand can be merged, because they get
+ * patched into JOB_START after stopping the unit. So if we see a
+ * JOB_RESTART running, it means the unit hasn't stopped yet and at
+ * this time the merge is still allowed. */
+ return t != JOB_RELOAD;
+}
+
+static void job_merge_into_installed(Job *j, Job *other) {
+ assert(j->installed);
+ assert(j->unit == other->unit);
+
+ if (j->type != JOB_NOP)
+ assert_se(job_type_merge_and_collapse(&j->type, other->type, j->unit) == 0);
+ else
+ assert(other->type == JOB_NOP);
+
+ j->irreversible = j->irreversible || other->irreversible;
+ j->ignore_order = j->ignore_order || other->ignore_order;
+}
+
+Job* job_install(Job *j) {
+ Job **pj;
+ Job *uj;
+
+ assert(!j->installed);
+ assert(j->type < _JOB_TYPE_MAX_IN_TRANSACTION);
+ assert(j->state == JOB_WAITING);
+
+ pj = (j->type == JOB_NOP) ? &j->unit->nop_job : &j->unit->job;
+ uj = *pj;
+
+ if (uj) {
+ if (job_type_is_conflicting(uj->type, j->type))
+ job_finish_and_invalidate(uj, JOB_CANCELED, false, false);
+ else {
+ /* not conflicting, i.e. mergeable */
+
+ if (uj->state == JOB_WAITING ||
+ (job_type_allows_late_merge(j->type) && job_type_is_superset(uj->type, j->type))) {
+ job_merge_into_installed(uj, j);
+ log_unit_debug(uj->unit,
+ "Merged %s/%s into installed job %s/%s as %"PRIu32,
+ j->unit->id, job_type_to_string(j->type), uj->unit->id,
+ job_type_to_string(uj->type), uj->id);
+ return uj;
+ } else {
+ /* already running and not safe to merge into */
+ /* Patch uj to become a merged job and re-run it. */
+ /* XXX It should be safer to queue j to run after uj finishes, but it is
+ * not currently possible to have more than one installed job per unit. */
+ job_merge_into_installed(uj, j);
+ log_unit_debug(uj->unit,
+ "Merged into running job, re-running: %s/%s as %"PRIu32,
+ uj->unit->id, job_type_to_string(uj->type), uj->id);
+
+ job_set_state(uj, JOB_WAITING);
+ return uj;
+ }
+ }
+ }
+
+ /* Install the job */
+ *pj = j;
+ j->installed = true;
+
+ j->manager->n_installed_jobs++;
+ log_unit_debug(j->unit,
+ "Installed new job %s/%s as %u",
+ j->unit->id, job_type_to_string(j->type), (unsigned) j->id);
+
+ job_add_to_gc_queue(j);
+
+ job_add_to_dbus_queue(j); /* announce this job to clients */
+ unit_add_to_dbus_queue(j->unit); /* The Job property of the unit has changed now */
+
+ return j;
+}
+
+int job_install_deserialized(Job *j) {
+ Job **pj;
+ int r;
+
+ assert(!j->installed);
+
+ if (j->type < 0 || j->type >= _JOB_TYPE_MAX_IN_TRANSACTION)
+ return log_unit_debug_errno(j->unit, SYNTHETIC_ERRNO(EINVAL),
+ "Invalid job type %s in deserialization.",
+ strna(job_type_to_string(j->type)));
+
+ pj = (j->type == JOB_NOP) ? &j->unit->nop_job : &j->unit->job;
+ if (*pj)
+ return log_unit_debug_errno(j->unit, SYNTHETIC_ERRNO(EEXIST),
+ "Unit already has a job installed. Not installing deserialized job.");
+
+ r = hashmap_put(j->manager->jobs, UINT32_TO_PTR(j->id), j);
+ if (r == -EEXIST)
+ return log_unit_debug_errno(j->unit, r, "Job ID %" PRIu32 " already used, cannot deserialize job.", j->id);
+ if (r < 0)
+ return log_unit_debug_errno(j->unit, r, "Failed to insert job into jobs hash table: %m");
+
+ *pj = j;
+ j->installed = true;
+
+ if (j->state == JOB_RUNNING)
+ j->unit->manager->n_running_jobs++;
+
+ log_unit_debug(j->unit,
+ "Reinstalled deserialized job %s/%s as %u",
+ j->unit->id, job_type_to_string(j->type), (unsigned) j->id);
+ return 0;
+}
+
+JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts) {
+ JobDependency *l;
+
+ assert(object);
+
+ /* Adds a new job link, which encodes that the 'subject' job
+ * needs the 'object' job in some way. If 'subject' is NULL
+ * this means the 'anchor' job (i.e. the one the user
+ * explicitly asked for) is the requester. */
+
+ l = new0(JobDependency, 1);
+ if (!l)
+ return NULL;
+
+ l->subject = subject;
+ l->object = object;
+ l->matters = matters;
+ l->conflicts = conflicts;
+
+ if (subject)
+ LIST_PREPEND(subject, subject->subject_list, l);
+
+ LIST_PREPEND(object, object->object_list, l);
+
+ return l;
+}
+
+void job_dependency_free(JobDependency *l) {
+ assert(l);
+
+ if (l->subject)
+ LIST_REMOVE(subject, l->subject->subject_list, l);
+
+ LIST_REMOVE(object, l->object->object_list, l);
+
+ free(l);
+}
+
+void job_dump(Job *j, FILE *f, const char *prefix) {
+ assert(j);
+ assert(f);
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%s-> Job %u:\n"
+ "%s\tAction: %s -> %s\n"
+ "%s\tState: %s\n"
+ "%s\tIrreversible: %s\n"
+ "%s\tMay GC: %s\n",
+ prefix, j->id,
+ prefix, j->unit->id, job_type_to_string(j->type),
+ prefix, job_state_to_string(j->state),
+ prefix, yes_no(j->irreversible),
+ prefix, yes_no(job_may_gc(j)));
+}
+
+/*
+ * Merging is commutative, so imagine the matrix as symmetric. We store only
+ * its lower triangle to avoid duplication. We don't store the main diagonal,
+ * because A merged with A is simply A.
+ *
+ * If the resulting type is collapsed immediately afterwards (to get rid of
+ * the JOB_RELOAD_OR_START, which lies outside the lookup function's domain),
+ * the following properties hold:
+ *
+ * Merging is associative! A merged with B, and then merged with C is the same
+ * as A merged with the result of B merged with C.
+ *
+ * Mergeability is transitive! If A can be merged with B and B with C then
+ * A also with C.
+ *
+ * Also, if A merged with B cannot be merged with C, then either A or B cannot
+ * be merged with C either.
+ */
+static const JobType job_merging_table[] = {
+/* What \ With * JOB_START JOB_VERIFY_ACTIVE JOB_STOP JOB_RELOAD */
+/*********************************************************************************/
+/*JOB_START */
+/*JOB_VERIFY_ACTIVE */ JOB_START,
+/*JOB_STOP */ -1, -1,
+/*JOB_RELOAD */ JOB_RELOAD_OR_START, JOB_RELOAD, -1,
+/*JOB_RESTART */ JOB_RESTART, JOB_RESTART, -1, JOB_RESTART,
+};
+
+JobType job_type_lookup_merge(JobType a, JobType b) {
+ assert_cc(ELEMENTSOF(job_merging_table) == _JOB_TYPE_MAX_MERGING * (_JOB_TYPE_MAX_MERGING - 1) / 2);
+ assert(a >= 0 && a < _JOB_TYPE_MAX_MERGING);
+ assert(b >= 0 && b < _JOB_TYPE_MAX_MERGING);
+
+ if (a == b)
+ return a;
+
+ if (a < b) {
+ JobType tmp = a;
+ a = b;
+ b = tmp;
+ }
+
+ return job_merging_table[(a - 1) * a / 2 + b];
+}
+
+bool job_type_is_redundant(JobType a, UnitActiveState b) {
+ switch (a) {
+
+ case JOB_START:
+ return IN_SET(b, UNIT_ACTIVE, UNIT_RELOADING);
+
+ case JOB_STOP:
+ return IN_SET(b, UNIT_INACTIVE, UNIT_FAILED);
+
+ case JOB_VERIFY_ACTIVE:
+ return IN_SET(b, UNIT_ACTIVE, UNIT_RELOADING);
+
+ case JOB_RELOAD:
+ return
+ b == UNIT_RELOADING;
+
+ case JOB_RESTART:
+ return
+ b == UNIT_ACTIVATING;
+
+ case JOB_NOP:
+ return true;
+
+ default:
+ assert_not_reached("Invalid job type");
+ }
+}
+
+JobType job_type_collapse(JobType t, Unit *u) {
+ UnitActiveState s;
+
+ switch (t) {
+
+ case JOB_TRY_RESTART:
+ s = unit_active_state(u);
+ if (UNIT_IS_INACTIVE_OR_DEACTIVATING(s))
+ return JOB_NOP;
+
+ return JOB_RESTART;
+
+ case JOB_TRY_RELOAD:
+ s = unit_active_state(u);
+ if (UNIT_IS_INACTIVE_OR_DEACTIVATING(s))
+ return JOB_NOP;
+
+ return JOB_RELOAD;
+
+ case JOB_RELOAD_OR_START:
+ s = unit_active_state(u);
+ if (UNIT_IS_INACTIVE_OR_DEACTIVATING(s))
+ return JOB_START;
+
+ return JOB_RELOAD;
+
+ default:
+ return t;
+ }
+}
+
+int job_type_merge_and_collapse(JobType *a, JobType b, Unit *u) {
+ JobType t;
+
+ t = job_type_lookup_merge(*a, b);
+ if (t < 0)
+ return -EEXIST;
+
+ *a = job_type_collapse(t, u);
+ return 0;
+}
+
+static bool job_is_runnable(Job *j) {
+ Iterator i;
+ Unit *other;
+ void *v;
+
+ assert(j);
+ assert(j->installed);
+
+ /* Checks whether there is any job running for the units this
+ * job needs to be running after (in the case of a 'positive'
+ * job type) or before (in the case of a 'negative' job
+ * type. */
+
+ /* Note that unit types have a say in what is runnable,
+ * too. For example, if they return -EAGAIN from
+ * unit_start() they can indicate they are not
+ * runnable yet. */
+
+ /* First check if there is an override */
+ if (j->ignore_order)
+ return true;
+
+ if (j->type == JOB_NOP)
+ return true;
+
+ if (IN_SET(j->type, JOB_START, JOB_VERIFY_ACTIVE, JOB_RELOAD)) {
+ /* Immediate result is that the job is or might be
+ * started. In this case let's wait for the
+ * dependencies, regardless whether they are
+ * starting or stopping something. */
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER], i)
+ if (other->job)
+ return false;
+ }
+
+ /* Also, if something else is being stopped and we should
+ * change state after it, then let's wait. */
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE], i)
+ if (other->job &&
+ IN_SET(other->job->type, JOB_STOP, JOB_RESTART))
+ return false;
+
+ /* This means that for a service a and a service b where b
+ * shall be started after a:
+ *
+ * start a + start b → 1st step start a, 2nd step start b
+ * start a + stop b → 1st step stop b, 2nd step start a
+ * stop a + start b → 1st step stop a, 2nd step start b
+ * stop a + stop b → 1st step stop b, 2nd step stop a
+ *
+ * This has the side effect that restarts are properly
+ * synchronized too. */
+
+ return true;
+}
+
+static void job_change_type(Job *j, JobType newtype) {
+ assert(j);
+
+ log_unit_debug(j->unit,
+ "Converting job %s/%s -> %s/%s",
+ j->unit->id, job_type_to_string(j->type),
+ j->unit->id, job_type_to_string(newtype));
+
+ j->type = newtype;
+}
+
+_pure_ static const char* job_get_begin_status_message_format(Unit *u, JobType t) {
+ const char *format;
+
+ assert(u);
+
+ if (t == JOB_RELOAD)
+ return "Reloading %s.";
+
+ assert(IN_SET(t, JOB_START, JOB_STOP));
+
+ format = UNIT_VTABLE(u)->status_message_formats.starting_stopping[t == JOB_STOP];
+ if (format)
+ return format;
+
+ /* Return generic strings */
+ if (t == JOB_START)
+ return "Starting %s.";
+ else {
+ assert(t == JOB_STOP);
+ return "Stopping %s.";
+ }
+}
+
+static void job_print_begin_status_message(Unit *u, JobType t) {
+ const char *format;
+
+ assert(u);
+
+ /* Reload status messages have traditionally not been printed to console. */
+ if (!IN_SET(t, JOB_START, JOB_STOP))
+ return;
+
+ format = job_get_begin_status_message_format(u, t);
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ unit_status_printf(u, "", format);
+ REENABLE_WARNING;
+}
+
+static void job_log_begin_status_message(Unit *u, uint32_t job_id, JobType t) {
+ const char *format, *mid;
+ char buf[LINE_MAX];
+
+ assert(u);
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ if (!IN_SET(t, JOB_START, JOB_STOP, JOB_RELOAD))
+ return;
+
+ if (log_on_console()) /* Skip this if it would only go on the console anyway */
+ return;
+
+ /* We log status messages for all units and all operations. */
+
+ format = job_get_begin_status_message_format(u, t);
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ (void) snprintf(buf, sizeof buf, format, unit_description(u));
+ REENABLE_WARNING;
+
+ mid = t == JOB_START ? "MESSAGE_ID=" SD_MESSAGE_UNIT_STARTING_STR :
+ t == JOB_STOP ? "MESSAGE_ID=" SD_MESSAGE_UNIT_STOPPING_STR :
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_RELOADING_STR;
+
+ /* Note that we deliberately use LOG_MESSAGE() instead of
+ * LOG_UNIT_MESSAGE() here, since this is supposed to mimic
+ * closely what is written to screen using the status output,
+ * which is supposed the highest level, friendliest output
+ * possible, which means we should avoid the low-level unit
+ * name. */
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("%s", buf),
+ "JOB_ID=%" PRIu32, job_id,
+ "JOB_TYPE=%s", job_type_to_string(t),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ mid);
+}
+
+static void job_emit_begin_status_message(Unit *u, uint32_t job_id, JobType t) {
+ assert(u);
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ job_log_begin_status_message(u, job_id, t);
+ job_print_begin_status_message(u, t);
+}
+
+static int job_perform_on_unit(Job **j) {
+ uint32_t id;
+ Manager *m;
+ JobType t;
+ Unit *u;
+ int r;
+
+ /* While we execute this operation the job might go away (for
+ * example: because it finishes immediately or is replaced by
+ * a new, conflicting job.) To make sure we don't access a
+ * freed job later on we store the id here, so that we can
+ * verify the job is still valid. */
+
+ assert(j);
+ assert(*j);
+
+ m = (*j)->manager;
+ u = (*j)->unit;
+ t = (*j)->type;
+ id = (*j)->id;
+
+ switch (t) {
+ case JOB_START:
+ r = unit_start(u);
+ break;
+
+ case JOB_RESTART:
+ t = JOB_STOP;
+ _fallthrough_;
+ case JOB_STOP:
+ r = unit_stop(u);
+ break;
+
+ case JOB_RELOAD:
+ r = unit_reload(u);
+ break;
+
+ default:
+ assert_not_reached("Invalid job type");
+ }
+
+ /* Log if the job still exists and the start/stop/reload function actually did something. Note that this means
+ * for units for which there's no 'activating' phase (i.e. because we transition directly from 'inactive' to
+ * 'active') we'll possibly skip the "Starting..." message. */
+ *j = manager_get_job(m, id);
+ if (*j && r > 0)
+ job_emit_begin_status_message(u, id, t);
+
+ return r;
+}
+
+int job_run_and_invalidate(Job *j) {
+ int r;
+
+ assert(j);
+ assert(j->installed);
+ assert(j->type < _JOB_TYPE_MAX_IN_TRANSACTION);
+ assert(j->in_run_queue);
+
+ LIST_REMOVE(run_queue, j->manager->run_queue, j);
+ j->in_run_queue = false;
+
+ if (j->state != JOB_WAITING)
+ return 0;
+
+ if (!job_is_runnable(j))
+ return -EAGAIN;
+
+ job_start_timer(j, true);
+ job_set_state(j, JOB_RUNNING);
+ job_add_to_dbus_queue(j);
+
+ switch (j->type) {
+
+ case JOB_VERIFY_ACTIVE: {
+ UnitActiveState t;
+
+ t = unit_active_state(j->unit);
+ if (UNIT_IS_ACTIVE_OR_RELOADING(t))
+ r = -EALREADY;
+ else if (t == UNIT_ACTIVATING)
+ r = -EAGAIN;
+ else
+ r = -EBADR;
+ break;
+ }
+
+ case JOB_START:
+ case JOB_STOP:
+ case JOB_RESTART:
+ r = job_perform_on_unit(&j);
+
+ /* If the unit type does not support starting/stopping, then simply wait. */
+ if (r == -EBADR)
+ r = 0;
+ break;
+
+ case JOB_RELOAD:
+ r = job_perform_on_unit(&j);
+ break;
+
+ case JOB_NOP:
+ r = -EALREADY;
+ break;
+
+ default:
+ assert_not_reached("Unknown job type");
+ }
+
+ if (j) {
+ if (r == -EAGAIN)
+ job_set_state(j, JOB_WAITING); /* Hmm, not ready after all, let's return to JOB_WAITING state */
+ else if (r == -EALREADY) /* already being executed */
+ r = job_finish_and_invalidate(j, JOB_DONE, true, true);
+ else if (r == -ECOMM) /* condition failed, but all is good */
+ r = job_finish_and_invalidate(j, JOB_DONE, true, false);
+ else if (r == -EBADR)
+ r = job_finish_and_invalidate(j, JOB_SKIPPED, true, false);
+ else if (r == -ENOEXEC)
+ r = job_finish_and_invalidate(j, JOB_INVALID, true, false);
+ else if (r == -EPROTO)
+ r = job_finish_and_invalidate(j, JOB_ASSERT, true, false);
+ else if (r == -EOPNOTSUPP)
+ r = job_finish_and_invalidate(j, JOB_UNSUPPORTED, true, false);
+ else if (r == -ENOLINK)
+ r = job_finish_and_invalidate(j, JOB_DEPENDENCY, true, false);
+ else if (r == -ESTALE)
+ r = job_finish_and_invalidate(j, JOB_ONCE, true, false);
+ else if (r < 0)
+ r = job_finish_and_invalidate(j, JOB_FAILED, true, false);
+ }
+
+ return r;
+}
+
+_pure_ static const char *job_get_done_status_message_format(Unit *u, JobType t, JobResult result) {
+
+ static const char *const generic_finished_start_job[_JOB_RESULT_MAX] = {
+ [JOB_DONE] = "Started %s.",
+ [JOB_TIMEOUT] = "Timed out starting %s.",
+ [JOB_FAILED] = "Failed to start %s.",
+ [JOB_DEPENDENCY] = "Dependency failed for %s.",
+ [JOB_ASSERT] = "Assertion failed for %s.",
+ [JOB_UNSUPPORTED] = "Starting of %s not supported.",
+ [JOB_COLLECTED] = "Unnecessary job for %s was removed.",
+ [JOB_ONCE] = "Unit %s has been started before and cannot be started again."
+ };
+ static const char *const generic_finished_stop_job[_JOB_RESULT_MAX] = {
+ [JOB_DONE] = "Stopped %s.",
+ [JOB_FAILED] = "Stopped (with error) %s.",
+ [JOB_TIMEOUT] = "Timed out stopping %s.",
+ };
+ static const char *const generic_finished_reload_job[_JOB_RESULT_MAX] = {
+ [JOB_DONE] = "Reloaded %s.",
+ [JOB_FAILED] = "Reload failed for %s.",
+ [JOB_TIMEOUT] = "Timed out reloading %s.",
+ };
+ /* When verify-active detects the unit is inactive, report it.
+ * Most likely a DEPEND warning from a requisiting unit will
+ * occur next and it's nice to see what was requisited. */
+ static const char *const generic_finished_verify_active_job[_JOB_RESULT_MAX] = {
+ [JOB_SKIPPED] = "%s is not active.",
+ };
+
+ const char *format;
+
+ assert(u);
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ if (IN_SET(t, JOB_START, JOB_STOP, JOB_RESTART)) {
+ format = t == JOB_START ?
+ UNIT_VTABLE(u)->status_message_formats.finished_start_job[result] :
+ UNIT_VTABLE(u)->status_message_formats.finished_stop_job[result];
+ if (format)
+ return format;
+ }
+
+ /* Return generic strings */
+ if (t == JOB_START)
+ return generic_finished_start_job[result];
+ else if (IN_SET(t, JOB_STOP, JOB_RESTART))
+ return generic_finished_stop_job[result];
+ else if (t == JOB_RELOAD)
+ return generic_finished_reload_job[result];
+ else if (t == JOB_VERIFY_ACTIVE)
+ return generic_finished_verify_active_job[result];
+
+ return NULL;
+}
+
+static const struct {
+ const char *color, *word;
+} job_print_done_status_messages[_JOB_RESULT_MAX] = {
+ [JOB_DONE] = { ANSI_OK_COLOR, " OK " },
+ [JOB_TIMEOUT] = { ANSI_HIGHLIGHT_RED, " TIME " },
+ [JOB_FAILED] = { ANSI_HIGHLIGHT_RED, "FAILED" },
+ [JOB_DEPENDENCY] = { ANSI_HIGHLIGHT_YELLOW, "DEPEND" },
+ [JOB_SKIPPED] = { ANSI_HIGHLIGHT, " INFO " },
+ [JOB_ASSERT] = { ANSI_HIGHLIGHT_YELLOW, "ASSERT" },
+ [JOB_UNSUPPORTED] = { ANSI_HIGHLIGHT_YELLOW, "UNSUPP" },
+ /* JOB_COLLECTED */
+ [JOB_ONCE] = { ANSI_HIGHLIGHT_RED, " ONCE " },
+};
+
+static void job_print_done_status_message(Unit *u, JobType t, JobResult result) {
+ const char *format;
+ const char *status;
+
+ assert(u);
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ /* Reload status messages have traditionally not been printed to console. */
+ if (t == JOB_RELOAD)
+ return;
+
+ /* No message if the job did not actually do anything due to failed condition. */
+ if (t == JOB_START && result == JOB_DONE && !u->condition_result)
+ return;
+
+ if (!job_print_done_status_messages[result].word)
+ return;
+
+ format = job_get_done_status_message_format(u, t, result);
+ if (!format)
+ return;
+
+ if (log_get_show_color())
+ status = strjoina(job_print_done_status_messages[result].color,
+ job_print_done_status_messages[result].word,
+ ANSI_NORMAL);
+ else
+ status = job_print_done_status_messages[result].word;
+
+ if (result != JOB_DONE)
+ manager_flip_auto_status(u->manager, true);
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ unit_status_printf(u, status, format);
+ REENABLE_WARNING;
+
+ if (t == JOB_START && result == JOB_FAILED) {
+ _cleanup_free_ char *quoted;
+
+ quoted = shell_maybe_quote(u->id, ESCAPE_BACKSLASH);
+ manager_status_printf(u->manager, STATUS_TYPE_NORMAL, NULL, "See 'systemctl status %s' for details.", strna(quoted));
+ }
+}
+
+static void job_log_done_status_message(Unit *u, uint32_t job_id, JobType t, JobResult result) {
+ const char *format, *mid;
+ char buf[LINE_MAX];
+ static const int job_result_log_level[_JOB_RESULT_MAX] = {
+ [JOB_DONE] = LOG_INFO,
+ [JOB_CANCELED] = LOG_INFO,
+ [JOB_TIMEOUT] = LOG_ERR,
+ [JOB_FAILED] = LOG_ERR,
+ [JOB_DEPENDENCY] = LOG_WARNING,
+ [JOB_SKIPPED] = LOG_NOTICE,
+ [JOB_INVALID] = LOG_INFO,
+ [JOB_ASSERT] = LOG_WARNING,
+ [JOB_UNSUPPORTED] = LOG_WARNING,
+ [JOB_COLLECTED] = LOG_INFO,
+ [JOB_ONCE] = LOG_ERR,
+ };
+
+ assert(u);
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ /* Skip printing if output goes to the console, and job_print_status_message()
+ will actually print something to the console. */
+ if (log_on_console() && job_print_done_status_messages[result].word)
+ return;
+
+ /* Show condition check message if the job did not actually do anything due to failed condition. */
+ if (t == JOB_START && result == JOB_DONE && !u->condition_result) {
+ log_struct(LOG_INFO,
+ "MESSAGE=Condition check resulted in %s being skipped.", unit_description(u),
+ "JOB_ID=%" PRIu32, job_id,
+ "JOB_TYPE=%s", job_type_to_string(t),
+ "JOB_RESULT=%s", job_result_to_string(result),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_STARTED_STR);
+
+ return;
+ }
+
+ format = job_get_done_status_message_format(u, t, result);
+ if (!format)
+ return;
+
+ /* The description might be longer than the buffer, but that's OK,
+ * we'll just truncate it here. Note that we use snprintf() rather than
+ * xsprintf() on purpose here: we are fine with truncation and don't
+ * consider that an error. */
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ (void) snprintf(buf, sizeof(buf), format, unit_description(u));
+ REENABLE_WARNING;
+
+ switch (t) {
+
+ case JOB_START:
+ if (result == JOB_DONE)
+ mid = "MESSAGE_ID=" SD_MESSAGE_UNIT_STARTED_STR;
+ else
+ mid = "MESSAGE_ID=" SD_MESSAGE_UNIT_FAILED_STR;
+ break;
+
+ case JOB_RELOAD:
+ mid = "MESSAGE_ID=" SD_MESSAGE_UNIT_RELOADED_STR;
+ break;
+
+ case JOB_STOP:
+ case JOB_RESTART:
+ mid = "MESSAGE_ID=" SD_MESSAGE_UNIT_STOPPED_STR;
+ break;
+
+ default:
+ log_struct(job_result_log_level[result],
+ LOG_MESSAGE("%s", buf),
+ "JOB_ID=%" PRIu32, job_id,
+ "JOB_TYPE=%s", job_type_to_string(t),
+ "JOB_RESULT=%s", job_result_to_string(result),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u));
+ return;
+ }
+
+ log_struct(job_result_log_level[result],
+ LOG_MESSAGE("%s", buf),
+ "JOB_ID=%" PRIu32, job_id,
+ "JOB_TYPE=%s", job_type_to_string(t),
+ "JOB_RESULT=%s", job_result_to_string(result),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ mid);
+}
+
+static void job_emit_done_status_message(Unit *u, uint32_t job_id, JobType t, JobResult result) {
+ assert(u);
+
+ job_log_done_status_message(u, job_id, t, result);
+ job_print_done_status_message(u, t, result);
+}
+
+static void job_fail_dependencies(Unit *u, UnitDependency d) {
+ Unit *other;
+ Iterator i;
+ void *v;
+
+ assert(u);
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[d], i) {
+ Job *j = other->job;
+
+ if (!j)
+ continue;
+ if (!IN_SET(j->type, JOB_START, JOB_VERIFY_ACTIVE))
+ continue;
+
+ job_finish_and_invalidate(j, JOB_DEPENDENCY, true, false);
+ }
+}
+
+int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already) {
+ Unit *u;
+ Unit *other;
+ JobType t;
+ Iterator i;
+ void *v;
+
+ assert(j);
+ assert(j->installed);
+ assert(j->type < _JOB_TYPE_MAX_IN_TRANSACTION);
+
+ u = j->unit;
+ t = j->type;
+
+ j->result = result;
+
+ log_unit_debug(u, "Job %" PRIu32 " %s/%s finished, result=%s", j->id, u->id, job_type_to_string(t), job_result_to_string(result));
+
+ /* If this job did nothing to respective unit we don't log the status message */
+ if (!already)
+ job_emit_done_status_message(u, j->id, t, result);
+
+ /* Patch restart jobs so that they become normal start jobs */
+ if (result == JOB_DONE && t == JOB_RESTART) {
+
+ job_change_type(j, JOB_START);
+ job_set_state(j, JOB_WAITING);
+
+ job_add_to_dbus_queue(j);
+ job_add_to_run_queue(j);
+ job_add_to_gc_queue(j);
+
+ goto finish;
+ }
+
+ if (IN_SET(result, JOB_FAILED, JOB_INVALID))
+ j->manager->n_failed_jobs++;
+
+ job_uninstall(j);
+ job_free(j);
+
+ /* Fail depending jobs on failure */
+ if (result != JOB_DONE && recursive) {
+ if (IN_SET(t, JOB_START, JOB_VERIFY_ACTIVE)) {
+ job_fail_dependencies(u, UNIT_REQUIRED_BY);
+ job_fail_dependencies(u, UNIT_REQUISITE_OF);
+ job_fail_dependencies(u, UNIT_BOUND_BY);
+ } else if (t == JOB_STOP)
+ job_fail_dependencies(u, UNIT_CONFLICTED_BY);
+ }
+
+ /* Trigger OnFailure dependencies that are not generated by
+ * the unit itself. We don't treat JOB_CANCELED as failure in
+ * this context. And JOB_FAILURE is already handled by the
+ * unit itself. */
+ if (IN_SET(result, JOB_TIMEOUT, JOB_DEPENDENCY)) {
+ log_struct(LOG_NOTICE,
+ "JOB_TYPE=%s", job_type_to_string(t),
+ "JOB_RESULT=%s", job_result_to_string(result),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_MESSAGE(u, "Job %s/%s failed with result '%s'.",
+ u->id,
+ job_type_to_string(t),
+ job_result_to_string(result)));
+
+ unit_start_on_failure(u);
+ }
+
+ unit_trigger_notify(u);
+
+finish:
+ /* Try to start the next jobs that can be started */
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_AFTER], i)
+ if (other->job) {
+ job_add_to_run_queue(other->job);
+ job_add_to_gc_queue(other->job);
+ }
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_BEFORE], i)
+ if (other->job) {
+ job_add_to_run_queue(other->job);
+ job_add_to_gc_queue(other->job);
+ }
+
+ manager_check_finished(u->manager);
+
+ return 0;
+}
+
+static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *userdata) {
+ Job *j = userdata;
+ Unit *u;
+
+ assert(j);
+ assert(s == j->timer_event_source);
+
+ log_unit_warning(j->unit, "Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
+
+ u = j->unit;
+ job_finish_and_invalidate(j, JOB_TIMEOUT, true, false);
+
+ emergency_action(u->manager, u->job_timeout_action,
+ EMERGENCY_ACTION_IS_WATCHDOG|EMERGENCY_ACTION_WARN,
+ u->job_timeout_reboot_arg, -1, "job timed out");
+
+ return 0;
+}
+
+int job_start_timer(Job *j, bool job_running) {
+ int r;
+ usec_t timeout_time, old_timeout_time;
+
+ if (job_running) {
+ j->begin_running_usec = now(CLOCK_MONOTONIC);
+
+ if (j->unit->job_running_timeout == USEC_INFINITY)
+ return 0;
+
+ timeout_time = usec_add(j->begin_running_usec, j->unit->job_running_timeout);
+
+ if (j->timer_event_source) {
+ /* Update only if JobRunningTimeoutSec= results in earlier timeout */
+ r = sd_event_source_get_time(j->timer_event_source, &old_timeout_time);
+ if (r < 0)
+ return r;
+
+ if (old_timeout_time <= timeout_time)
+ return 0;
+
+ return sd_event_source_set_time(j->timer_event_source, timeout_time);
+ }
+ } else {
+ if (j->timer_event_source)
+ return 0;
+
+ j->begin_usec = now(CLOCK_MONOTONIC);
+
+ if (j->unit->job_timeout == USEC_INFINITY)
+ return 0;
+
+ timeout_time = usec_add(j->begin_usec, j->unit->job_timeout);
+ }
+
+ r = sd_event_add_time(
+ j->manager->event,
+ &j->timer_event_source,
+ CLOCK_MONOTONIC,
+ timeout_time, 0,
+ job_dispatch_timer, j);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(j->timer_event_source, "job-start");
+
+ return 0;
+}
+
+void job_add_to_run_queue(Job *j) {
+ int r;
+
+ assert(j);
+ assert(j->installed);
+
+ if (j->in_run_queue)
+ return;
+
+ if (!j->manager->run_queue) {
+ r = sd_event_source_set_enabled(j->manager->run_queue_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable job run queue event source, ignoring: %m");
+ }
+
+ LIST_PREPEND(run_queue, j->manager->run_queue, j);
+ j->in_run_queue = true;
+}
+
+void job_add_to_dbus_queue(Job *j) {
+ assert(j);
+ assert(j->installed);
+
+ if (j->in_dbus_queue)
+ return;
+
+ /* We don't check if anybody is subscribed here, since this
+ * job might just have been created and not yet assigned to a
+ * connection/client. */
+
+ LIST_PREPEND(dbus_queue, j->manager->dbus_job_queue, j);
+ j->in_dbus_queue = true;
+}
+
+char *job_dbus_path(Job *j) {
+ char *p;
+
+ assert(j);
+
+ if (asprintf(&p, "/org/freedesktop/systemd1/job/%"PRIu32, j->id) < 0)
+ return NULL;
+
+ return p;
+}
+
+int job_serialize(Job *j, FILE *f) {
+ assert(j);
+ assert(f);
+
+ (void) serialize_item_format(f, "job-id", "%u", j->id);
+ (void) serialize_item(f, "job-type", job_type_to_string(j->type));
+ (void) serialize_item(f, "job-state", job_state_to_string(j->state));
+ (void) serialize_bool(f, "job-irreversible", j->irreversible);
+ (void) serialize_bool(f, "job-sent-dbus-new-signal", j->sent_dbus_new_signal);
+ (void) serialize_bool(f, "job-ignore-order", j->ignore_order);
+
+ if (j->begin_usec > 0)
+ (void) serialize_usec(f, "job-begin", j->begin_usec);
+ if (j->begin_running_usec > 0)
+ (void) serialize_usec(f, "job-begin-running", j->begin_running_usec);
+
+ bus_track_serialize(j->bus_track, f, "subscribed");
+
+ /* End marker */
+ fputc('\n', f);
+ return 0;
+}
+
+int job_deserialize(Job *j, FILE *f) {
+ int r;
+
+ assert(j);
+ assert(f);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l, *v;
+ size_t k;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0)
+ return 0;
+
+ l = strstrip(line);
+
+ /* End marker */
+ if (isempty(l))
+ return 0;
+
+ k = strcspn(l, "=");
+
+ if (l[k] == '=') {
+ l[k] = 0;
+ v = l+k+1;
+ } else
+ v = l+k;
+
+ if (streq(l, "job-id")) {
+
+ if (safe_atou32(v, &j->id) < 0)
+ log_debug("Failed to parse job id value: %s", v);
+
+ } else if (streq(l, "job-type")) {
+ JobType t;
+
+ t = job_type_from_string(v);
+ if (t < 0)
+ log_debug("Failed to parse job type: %s", v);
+ else if (t >= _JOB_TYPE_MAX_IN_TRANSACTION)
+ log_debug("Cannot deserialize job of type: %s", v);
+ else
+ j->type = t;
+
+ } else if (streq(l, "job-state")) {
+ JobState s;
+
+ s = job_state_from_string(v);
+ if (s < 0)
+ log_debug("Failed to parse job state: %s", v);
+ else
+ job_set_state(j, s);
+
+ } else if (streq(l, "job-irreversible")) {
+ int b;
+
+ b = parse_boolean(v);
+ if (b < 0)
+ log_debug("Failed to parse job irreversible flag: %s", v);
+ else
+ j->irreversible = j->irreversible || b;
+
+ } else if (streq(l, "job-sent-dbus-new-signal")) {
+ int b;
+
+ b = parse_boolean(v);
+ if (b < 0)
+ log_debug("Failed to parse job sent_dbus_new_signal flag: %s", v);
+ else
+ j->sent_dbus_new_signal = j->sent_dbus_new_signal || b;
+
+ } else if (streq(l, "job-ignore-order")) {
+ int b;
+
+ b = parse_boolean(v);
+ if (b < 0)
+ log_debug("Failed to parse job ignore_order flag: %s", v);
+ else
+ j->ignore_order = j->ignore_order || b;
+
+ } else if (streq(l, "job-begin"))
+ (void) deserialize_usec(v, &j->begin_usec);
+
+ else if (streq(l, "job-begin-running"))
+ (void) deserialize_usec(v, &j->begin_running_usec);
+
+ else if (streq(l, "subscribed")) {
+ if (strv_extend(&j->deserialized_clients, v) < 0)
+ return log_oom();
+ } else
+ log_debug("Unknown job serialization key: %s", l);
+ }
+}
+
+int job_coldplug(Job *j) {
+ int r;
+ usec_t timeout_time = USEC_INFINITY;
+
+ assert(j);
+
+ /* After deserialization is complete and the bus connection
+ * set up again, let's start watching our subscribers again */
+ (void) bus_job_coldplug_bus_track(j);
+
+ if (j->state == JOB_WAITING)
+ job_add_to_run_queue(j);
+
+ /* Maybe due to new dependencies we don't actually need this job anymore? */
+ job_add_to_gc_queue(j);
+
+ /* Create timer only when job began or began running and the respective timeout is finite.
+ * Follow logic of job_start_timer() if both timeouts are finite */
+ if (j->begin_usec == 0)
+ return 0;
+
+ if (j->unit->job_timeout != USEC_INFINITY)
+ timeout_time = usec_add(j->begin_usec, j->unit->job_timeout);
+
+ if (j->begin_running_usec > 0 && j->unit->job_running_timeout != USEC_INFINITY)
+ timeout_time = MIN(timeout_time, usec_add(j->begin_running_usec, j->unit->job_running_timeout));
+
+ if (timeout_time == USEC_INFINITY)
+ return 0;
+
+ j->timer_event_source = sd_event_source_unref(j->timer_event_source);
+
+ r = sd_event_add_time(
+ j->manager->event,
+ &j->timer_event_source,
+ CLOCK_MONOTONIC,
+ timeout_time, 0,
+ job_dispatch_timer, j);
+ if (r < 0)
+ log_debug_errno(r, "Failed to restart timeout for job: %m");
+
+ (void) sd_event_source_set_description(j->timer_event_source, "job-timeout");
+
+ return r;
+}
+
+void job_shutdown_magic(Job *j) {
+ assert(j);
+
+ /* The shutdown target gets some special treatment here: we
+ * tell the kernel to begin with flushing its disk caches, to
+ * optimize shutdown time a bit. Ideally we wouldn't hardcode
+ * this magic into PID 1. However all other processes aren't
+ * options either since they'd exit much sooner than PID 1 and
+ * asynchronous sync() would cause their exit to be
+ * delayed. */
+
+ if (j->type != JOB_START)
+ return;
+
+ if (!MANAGER_IS_SYSTEM(j->unit->manager))
+ return;
+
+ if (!unit_has_name(j->unit, SPECIAL_SHUTDOWN_TARGET))
+ return;
+
+ /* In case messages on console has been disabled on boot */
+ j->unit->manager->no_console_output = false;
+
+ if (detect_container() > 0)
+ return;
+
+ (void) asynchronous_sync(NULL);
+}
+
+int job_get_timeout(Job *j, usec_t *timeout) {
+ usec_t x = USEC_INFINITY, y = USEC_INFINITY;
+ Unit *u = j->unit;
+ int r;
+
+ assert(u);
+
+ if (j->timer_event_source) {
+ r = sd_event_source_get_time(j->timer_event_source, &x);
+ if (r < 0)
+ return r;
+ }
+
+ if (UNIT_VTABLE(u)->get_timeout) {
+ r = UNIT_VTABLE(u)->get_timeout(u, &y);
+ if (r < 0)
+ return r;
+ }
+
+ if (x == USEC_INFINITY && y == USEC_INFINITY)
+ return 0;
+
+ *timeout = MIN(x, y);
+ return 1;
+}
+
+bool job_may_gc(Job *j) {
+ Unit *other;
+ Iterator i;
+ void *v;
+
+ assert(j);
+
+ /* Checks whether this job should be GC'ed away. We only do this for jobs of units that have no effect on their
+ * own and just track external state. For now the only unit type that qualifies for this are .device units.
+ * Returns true if the job can be collected. */
+
+ if (!UNIT_VTABLE(j->unit)->gc_jobs)
+ return false;
+
+ if (sd_bus_track_count(j->bus_track) > 0)
+ return false;
+
+ /* FIXME: So this is a bit ugly: for now we don't properly track references made via private bus connections
+ * (because it's nasty, as sd_bus_track doesn't apply to it). We simply remember that the job was once
+ * referenced by one, and reset this whenever we notice that no private bus connections are around. This means
+ * the GC is a bit too conservative when it comes to jobs created by private bus connections. */
+ if (j->ref_by_private_bus) {
+ if (set_isempty(j->unit->manager->private_buses))
+ j->ref_by_private_bus = false;
+ else
+ return false;
+ }
+
+ if (j->type == JOB_NOP)
+ return false;
+
+ /* If a job is ordered after ours, and is to be started, then it needs to wait for us, regardless if we stop or
+ * start, hence let's not GC in that case. */
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE], i) {
+ if (!other->job)
+ continue;
+
+ if (other->job->ignore_order)
+ continue;
+
+ if (IN_SET(other->job->type, JOB_START, JOB_VERIFY_ACTIVE, JOB_RELOAD))
+ return false;
+ }
+
+ /* If we are going down, but something else is ordered After= us, then it needs to wait for us */
+ if (IN_SET(j->type, JOB_STOP, JOB_RESTART))
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER], i) {
+ if (!other->job)
+ continue;
+
+ if (other->job->ignore_order)
+ continue;
+
+ return false;
+ }
+
+ /* The logic above is kinda the inverse of the job_is_runnable() logic. Specifically, if the job "we" is
+ * ordered before the job "other":
+ *
+ * we start + other start → stay
+ * we start + other stop → gc
+ * we stop + other start → stay
+ * we stop + other stop → gc
+ *
+ * "we" are ordered after "other":
+ *
+ * we start + other start → gc
+ * we start + other stop → gc
+ * we stop + other start → stay
+ * we stop + other stop → stay
+ */
+
+ return true;
+}
+
+void job_add_to_gc_queue(Job *j) {
+ assert(j);
+
+ if (j->in_gc_queue)
+ return;
+
+ if (!job_may_gc(j))
+ return;
+
+ LIST_PREPEND(gc_queue, j->unit->manager->gc_job_queue, j);
+ j->in_gc_queue = true;
+}
+
+static int job_compare(Job * const *a, Job * const *b) {
+ return CMP((*a)->id, (*b)->id);
+}
+
+static size_t sort_job_list(Job **list, size_t n) {
+ Job *previous = NULL;
+ size_t a, b;
+
+ /* Order by numeric IDs */
+ typesafe_qsort(list, n, job_compare);
+
+ /* Filter out duplicates */
+ for (a = 0, b = 0; a < n; a++) {
+
+ if (previous == list[a])
+ continue;
+
+ previous = list[b++] = list[a];
+ }
+
+ return b;
+}
+
+int job_get_before(Job *j, Job*** ret) {
+ _cleanup_free_ Job** list = NULL;
+ size_t n = 0, n_allocated = 0;
+ Unit *other = NULL;
+ Iterator i;
+ void *v;
+
+ /* Returns a list of all pending jobs that need to finish before this job may be started. */
+
+ assert(j);
+ assert(ret);
+
+ if (j->ignore_order) {
+ *ret = NULL;
+ return 0;
+ }
+
+ if (IN_SET(j->type, JOB_START, JOB_VERIFY_ACTIVE, JOB_RELOAD)) {
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER], i) {
+ if (!other->job)
+ continue;
+
+ if (!GREEDY_REALLOC(list, n_allocated, n+1))
+ return -ENOMEM;
+ list[n++] = other->job;
+ }
+ }
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE], i) {
+ if (!other->job)
+ continue;
+
+ if (!IN_SET(other->job->type, JOB_STOP, JOB_RESTART))
+ continue;
+
+ if (!GREEDY_REALLOC(list, n_allocated, n+1))
+ return -ENOMEM;
+ list[n++] = other->job;
+ }
+
+ n = sort_job_list(list, n);
+
+ *ret = TAKE_PTR(list);
+
+ return (int) n;
+}
+
+int job_get_after(Job *j, Job*** ret) {
+ _cleanup_free_ Job** list = NULL;
+ size_t n = 0, n_allocated = 0;
+ Unit *other = NULL;
+ void *v;
+ Iterator i;
+
+ assert(j);
+ assert(ret);
+
+ /* Returns a list of all pending jobs that are waiting for this job to finish. */
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE], i) {
+ if (!other->job)
+ continue;
+
+ if (other->job->ignore_order)
+ continue;
+
+ if (!IN_SET(other->job->type, JOB_START, JOB_VERIFY_ACTIVE, JOB_RELOAD))
+ continue;
+
+ if (!GREEDY_REALLOC(list, n_allocated, n+1))
+ return -ENOMEM;
+ list[n++] = other->job;
+ }
+
+ if (IN_SET(j->type, JOB_STOP, JOB_RESTART)) {
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER], i) {
+ if (!other->job)
+ continue;
+
+ if (other->job->ignore_order)
+ continue;
+
+ if (!GREEDY_REALLOC(list, n_allocated, n+1))
+ return -ENOMEM;
+ list[n++] = other->job;
+ }
+ }
+
+ n = sort_job_list(list, n);
+
+ *ret = TAKE_PTR(list);
+
+ return (int) n;
+}
+
+static const char* const job_state_table[_JOB_STATE_MAX] = {
+ [JOB_WAITING] = "waiting",
+ [JOB_RUNNING] = "running",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(job_state, JobState);
+
+static const char* const job_type_table[_JOB_TYPE_MAX] = {
+ [JOB_START] = "start",
+ [JOB_VERIFY_ACTIVE] = "verify-active",
+ [JOB_STOP] = "stop",
+ [JOB_RELOAD] = "reload",
+ [JOB_RELOAD_OR_START] = "reload-or-start",
+ [JOB_RESTART] = "restart",
+ [JOB_TRY_RESTART] = "try-restart",
+ [JOB_TRY_RELOAD] = "try-reload",
+ [JOB_NOP] = "nop",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(job_type, JobType);
+
+static const char* const job_mode_table[_JOB_MODE_MAX] = {
+ [JOB_FAIL] = "fail",
+ [JOB_REPLACE] = "replace",
+ [JOB_REPLACE_IRREVERSIBLY] = "replace-irreversibly",
+ [JOB_ISOLATE] = "isolate",
+ [JOB_FLUSH] = "flush",
+ [JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
+ [JOB_IGNORE_REQUIREMENTS] = "ignore-requirements",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
+
+static const char* const job_result_table[_JOB_RESULT_MAX] = {
+ [JOB_DONE] = "done",
+ [JOB_CANCELED] = "canceled",
+ [JOB_TIMEOUT] = "timeout",
+ [JOB_FAILED] = "failed",
+ [JOB_DEPENDENCY] = "dependency",
+ [JOB_SKIPPED] = "skipped",
+ [JOB_INVALID] = "invalid",
+ [JOB_ASSERT] = "assert",
+ [JOB_UNSUPPORTED] = "unsupported",
+ [JOB_COLLECTED] = "collected",
+ [JOB_ONCE] = "once",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);
+
+const char* job_type_to_access_method(JobType t) {
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ if (IN_SET(t, JOB_START, JOB_RESTART, JOB_TRY_RESTART))
+ return "start";
+ else if (t == JOB_STOP)
+ return "stop";
+ else
+ return "reload";
+}
diff --git a/src/core/job.h b/src/core/job.h
new file mode 100644
index 0000000..1b9bcdd
--- /dev/null
+++ b/src/core/job.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-event.h"
+
+#include "list.h"
+#include "unit-name.h"
+
+typedef struct Job Job;
+typedef struct JobDependency JobDependency;
+typedef enum JobType JobType;
+typedef enum JobState JobState;
+typedef enum JobMode JobMode;
+typedef enum JobResult JobResult;
+
+/* Be careful when changing the job types! Adjust job_merging_table[] accordingly! */
+enum JobType {
+ JOB_START, /* if a unit does not support being started, we'll just wait until it becomes active */
+ JOB_VERIFY_ACTIVE,
+
+ JOB_STOP,
+
+ JOB_RELOAD, /* if running, reload */
+
+ /* Note that restarts are first treated like JOB_STOP, but
+ * then instead of finishing are patched to become
+ * JOB_START. */
+ JOB_RESTART, /* If running, stop. Then start unconditionally. */
+
+ _JOB_TYPE_MAX_MERGING,
+
+ /* JOB_NOP can enter into a transaction, but as it won't pull in
+ * any dependencies and it uses the special 'nop_job' slot in Unit,
+ * it won't have to merge with anything (except possibly into another
+ * JOB_NOP, previously installed). JOB_NOP is special-cased in
+ * job_type_is_*() functions so that the transaction can be
+ * activated. */
+ JOB_NOP = _JOB_TYPE_MAX_MERGING, /* do nothing */
+
+ _JOB_TYPE_MAX_IN_TRANSACTION,
+
+ /* JOB_TRY_RESTART can never appear in a transaction, because
+ * it always collapses into JOB_RESTART or JOB_NOP before entering.
+ * Thus we never need to merge it with anything. */
+ JOB_TRY_RESTART = _JOB_TYPE_MAX_IN_TRANSACTION, /* if running, stop and then start */
+
+ /* Similar to JOB_TRY_RESTART but collapses to JOB_RELOAD or JOB_NOP */
+ JOB_TRY_RELOAD,
+
+ /* JOB_RELOAD_OR_START won't enter into a transaction and cannot result
+ * from transaction merging (there's no way for JOB_RELOAD and
+ * JOB_START to meet in one transaction). It can result from a merge
+ * during job installation, but then it will immediately collapse into
+ * one of the two simpler types. */
+ JOB_RELOAD_OR_START, /* if running, reload, otherwise start */
+
+ _JOB_TYPE_MAX,
+ _JOB_TYPE_INVALID = -1
+};
+
+enum JobState {
+ JOB_WAITING,
+ JOB_RUNNING,
+ _JOB_STATE_MAX,
+ _JOB_STATE_INVALID = -1
+};
+
+enum JobMode {
+ JOB_FAIL, /* Fail if a conflicting job is already queued */
+ JOB_REPLACE, /* Replace an existing conflicting job */
+ JOB_REPLACE_IRREVERSIBLY,/* Like JOB_REPLACE + produce irreversible jobs */
+ JOB_ISOLATE, /* Start a unit, and stop all others */
+ JOB_FLUSH, /* Flush out all other queued jobs when queing this one */
+ JOB_IGNORE_DEPENDENCIES, /* Ignore both requirement and ordering dependencies */
+ JOB_IGNORE_REQUIREMENTS, /* Ignore requirement dependencies */
+ _JOB_MODE_MAX,
+ _JOB_MODE_INVALID = -1
+};
+
+enum JobResult {
+ JOB_DONE, /* Job completed successfully (or skipped due to a failed ConditionXYZ=) */
+ JOB_CANCELED, /* Job canceled by a conflicting job installation or by explicit cancel request */
+ JOB_TIMEOUT, /* Job timeout elapsed */
+ JOB_FAILED, /* Job failed */
+ JOB_DEPENDENCY, /* A required dependency job did not result in JOB_DONE */
+ JOB_SKIPPED, /* Negative result of JOB_VERIFY_ACTIVE */
+ JOB_INVALID, /* JOB_RELOAD of inactive unit */
+ JOB_ASSERT, /* Couldn't start a unit, because an assert didn't hold */
+ JOB_UNSUPPORTED, /* Couldn't start a unit, because the unit type is not supported on the system */
+ JOB_COLLECTED, /* Job was garbage collected, since nothing needed it anymore */
+ JOB_ONCE, /* Unit was started before, and hence can't be started again */
+ _JOB_RESULT_MAX,
+ _JOB_RESULT_INVALID = -1
+};
+
+#include "unit.h"
+
+struct JobDependency {
+ /* Encodes that the 'subject' job needs the 'object' job in
+ * some way. This structure is used only while building a transaction. */
+ Job *subject;
+ Job *object;
+
+ LIST_FIELDS(JobDependency, subject);
+ LIST_FIELDS(JobDependency, object);
+
+ bool matters:1;
+ bool conflicts:1;
+};
+
+struct Job {
+ Manager *manager;
+ Unit *unit;
+
+ LIST_FIELDS(Job, transaction);
+ LIST_FIELDS(Job, run_queue);
+ LIST_FIELDS(Job, dbus_queue);
+ LIST_FIELDS(Job, gc_queue);
+
+ LIST_HEAD(JobDependency, subject_list);
+ LIST_HEAD(JobDependency, object_list);
+
+ /* Used for graph algs as a "I have been here" marker */
+ Job* marker;
+ unsigned generation;
+
+ uint32_t id;
+
+ JobType type;
+ JobState state;
+
+ sd_event_source *timer_event_source;
+ usec_t begin_usec;
+ usec_t begin_running_usec;
+
+ /*
+ * This tracks where to send signals, and also which clients
+ * are allowed to call DBus methods on the job (other than
+ * root).
+ *
+ * There can be more than one client, because of job merging.
+ */
+ sd_bus_track *bus_track;
+ char **deserialized_clients;
+
+ JobResult result;
+
+ bool installed:1;
+ bool in_run_queue:1;
+ bool matters_to_anchor:1;
+ bool in_dbus_queue:1;
+ bool sent_dbus_new_signal:1;
+ bool ignore_order:1;
+ bool irreversible:1;
+ bool in_gc_queue:1;
+ bool ref_by_private_bus:1;
+};
+
+Job* job_new(Unit *unit, JobType type);
+Job* job_new_raw(Unit *unit);
+void job_unlink(Job *job);
+Job* job_free(Job *job);
+Job* job_install(Job *j);
+int job_install_deserialized(Job *j);
+void job_uninstall(Job *j);
+void job_dump(Job *j, FILE *f, const char *prefix);
+int job_serialize(Job *j, FILE *f);
+int job_deserialize(Job *j, FILE *f);
+int job_coldplug(Job *j);
+
+JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts);
+void job_dependency_free(JobDependency *l);
+
+int job_merge(Job *j, Job *other);
+
+JobType job_type_lookup_merge(JobType a, JobType b) _pure_;
+
+_pure_ static inline bool job_type_is_mergeable(JobType a, JobType b) {
+ return job_type_lookup_merge(a, b) >= 0;
+}
+
+_pure_ static inline bool job_type_is_conflicting(JobType a, JobType b) {
+ return a != JOB_NOP && b != JOB_NOP && !job_type_is_mergeable(a, b);
+}
+
+_pure_ static inline bool job_type_is_superset(JobType a, JobType b) {
+ /* Checks whether operation a is a "superset" of b in its actions */
+ if (b == JOB_NOP)
+ return true;
+ if (a == JOB_NOP)
+ return false;
+ return a == job_type_lookup_merge(a, b);
+}
+
+bool job_type_is_redundant(JobType a, UnitActiveState b) _pure_;
+
+/* Collapses a state-dependent job type into a simpler type by observing
+ * the state of the unit which it is going to be applied to. */
+JobType job_type_collapse(JobType t, Unit *u);
+
+int job_type_merge_and_collapse(JobType *a, JobType b, Unit *u);
+
+void job_add_to_run_queue(Job *j);
+void job_add_to_dbus_queue(Job *j);
+
+int job_start_timer(Job *j, bool job_running);
+
+int job_run_and_invalidate(Job *j);
+int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already);
+
+char *job_dbus_path(Job *j);
+
+void job_shutdown_magic(Job *j);
+
+int job_get_timeout(Job *j, usec_t *timeout) _pure_;
+
+bool job_may_gc(Job *j);
+void job_add_to_gc_queue(Job *j);
+
+int job_get_before(Job *j, Job*** ret);
+int job_get_after(Job *j, Job*** ret);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Job*, job_free);
+
+const char* job_type_to_string(JobType t) _const_;
+JobType job_type_from_string(const char *s) _pure_;
+
+const char* job_state_to_string(JobState t) _const_;
+JobState job_state_from_string(const char *s) _pure_;
+
+const char* job_mode_to_string(JobMode t) _const_;
+JobMode job_mode_from_string(const char *s) _pure_;
+
+const char* job_result_to_string(JobResult t) _const_;
+JobResult job_result_from_string(const char *s) _pure_;
+
+const char* job_type_to_access_method(JobType t);
diff --git a/src/core/kill.c b/src/core/kill.c
new file mode 100644
index 0000000..6fe96cf
--- /dev/null
+++ b/src/core/kill.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "kill.h"
+#include "signal-util.h"
+#include "string-table.h"
+#include "util.h"
+
+void kill_context_init(KillContext *c) {
+ assert(c);
+
+ c->kill_signal = SIGTERM;
+ c->final_kill_signal = SIGKILL;
+ c->send_sigkill = true;
+ c->send_sighup = false;
+ c->watchdog_signal = SIGABRT;
+}
+
+void kill_context_dump(KillContext *c, FILE *f, const char *prefix) {
+ assert(c);
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%sKillMode: %s\n"
+ "%sKillSignal: SIG%s\n"
+ "%sFinalKillSignal: SIG%s\n"
+ "%sSendSIGKILL: %s\n"
+ "%sSendSIGHUP: %s\n",
+ prefix, kill_mode_to_string(c->kill_mode),
+ prefix, signal_to_string(c->kill_signal),
+ prefix, signal_to_string(c->final_kill_signal),
+ prefix, yes_no(c->send_sigkill),
+ prefix, yes_no(c->send_sighup));
+}
+
+static const char* const kill_mode_table[_KILL_MODE_MAX] = {
+ [KILL_CONTROL_GROUP] = "control-group",
+ [KILL_PROCESS] = "process",
+ [KILL_MIXED] = "mixed",
+ [KILL_NONE] = "none"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
+
+static const char* const kill_who_table[_KILL_WHO_MAX] = {
+ [KILL_MAIN] = "main",
+ [KILL_CONTROL] = "control",
+ [KILL_ALL] = "all",
+ [KILL_MAIN_FAIL] = "main-fail",
+ [KILL_CONTROL_FAIL] = "control-fail",
+ [KILL_ALL_FAIL] = "all-fail"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);
diff --git a/src/core/kill.h b/src/core/kill.h
new file mode 100644
index 0000000..f3915be
--- /dev/null
+++ b/src/core/kill.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct KillContext KillContext;
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "macro.h"
+
+typedef enum KillMode {
+ /* The kill mode is a property of a unit. */
+ KILL_CONTROL_GROUP = 0,
+ KILL_PROCESS,
+ KILL_MIXED,
+ KILL_NONE,
+ _KILL_MODE_MAX,
+ _KILL_MODE_INVALID = -1
+} KillMode;
+
+struct KillContext {
+ KillMode kill_mode;
+ int kill_signal;
+ int final_kill_signal;
+ bool send_sigkill;
+ bool send_sighup;
+ int watchdog_signal;
+};
+
+typedef enum KillWho {
+ /* Kill who is a property of an operation */
+ KILL_MAIN,
+ KILL_CONTROL,
+ KILL_ALL,
+ KILL_MAIN_FAIL,
+ KILL_CONTROL_FAIL,
+ KILL_ALL_FAIL,
+ _KILL_WHO_MAX,
+ _KILL_WHO_INVALID = -1
+} KillWho;
+
+void kill_context_init(KillContext *c);
+void kill_context_dump(KillContext *c, FILE *f, const char *prefix);
+
+const char *kill_mode_to_string(KillMode k) _const_;
+KillMode kill_mode_from_string(const char *s) _pure_;
+
+const char *kill_who_to_string(KillWho k) _const_;
+KillWho kill_who_from_string(const char *s) _pure_;
diff --git a/src/core/killall.c b/src/core/killall.c
new file mode 100644
index 0000000..f0ce996
--- /dev/null
+++ b/src/core/killall.c
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2010 ProFUSION embedded systems
+***/
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "killall.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static bool ignore_proc(pid_t pid, bool warn_rootfs) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+ char c = 0;
+ uid_t uid;
+ int r;
+
+ /* We are PID 1, let's not commit suicide */
+ if (pid <= 1)
+ return true;
+
+ /* Ignore kernel threads */
+ r = is_kernel_thread(pid);
+ if (r != 0)
+ return true; /* also ignore processes where we can't determine this */
+
+ r = get_process_uid(pid, &uid);
+ if (r < 0)
+ return true; /* not really, but better safe than sorry */
+
+ /* Non-root processes otherwise are always subject to be killed */
+ if (uid != 0)
+ return false;
+
+ p = procfs_file_alloca(pid, "cmdline");
+ f = fopen(p, "re");
+ if (!f)
+ return true; /* not really, but has the desired effect */
+
+ /* Try to read the first character of the command line. If the cmdline is empty (which might be the case for
+ * kernel threads but potentially also other stuff), this line won't do anything, but we don't care much, as
+ * actual kernel threads are already filtered out above. */
+ (void) fread(&c, 1, 1, f);
+
+ /* Processes with argv[0][0] = '@' we ignore from the killing spree.
+ *
+ * http://www.freedesktop.org/wiki/Software/systemd/RootStorageDaemons */
+ if (c != '@')
+ return false;
+
+ if (warn_rootfs &&
+ pid_from_same_root_fs(pid) == 0) {
+
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(pid, &comm);
+
+ log_notice("Process " PID_FMT " (%s) has been marked to be excluded from killing. It is "
+ "running from the root file system, and thus likely to block re-mounting of the "
+ "root file system to read-only. Please consider moving it into an initrd file "
+ "system instead.", pid, strna(comm));
+ }
+
+ return true;
+}
+
+static void wait_for_children(Set *pids, sigset_t *mask, usec_t timeout) {
+ usec_t until;
+
+ assert(mask);
+
+ if (set_isempty(pids))
+ return;
+
+ until = now(CLOCK_MONOTONIC) + timeout;
+ for (;;) {
+ struct timespec ts;
+ int k;
+ usec_t n;
+ void *p;
+ Iterator i;
+
+ /* First, let the kernel inform us about killed
+ * children. Most processes will probably be our
+ * children, but some are not (might be our
+ * grandchildren instead...). */
+ for (;;) {
+ pid_t pid;
+
+ pid = waitpid(-1, NULL, WNOHANG);
+ if (pid == 0)
+ break;
+ if (pid < 0) {
+ if (errno == ECHILD)
+ break;
+
+ log_error_errno(errno, "waitpid() failed: %m");
+ return;
+ }
+
+ (void) set_remove(pids, PID_TO_PTR(pid));
+ }
+
+ /* Now explicitly check who might be remaining, who
+ * might not be our child. */
+ SET_FOREACH(p, pids, i) {
+
+ /* kill(pid, 0) sends no signal, but it tells
+ * us whether the process still exists. */
+ if (kill(PTR_TO_PID(p), 0) == 0)
+ continue;
+
+ if (errno != ESRCH)
+ continue;
+
+ set_remove(pids, p);
+ }
+
+ if (set_isempty(pids))
+ return;
+
+ n = now(CLOCK_MONOTONIC);
+ if (n >= until)
+ return;
+
+ timespec_store(&ts, until - n);
+ k = sigtimedwait(mask, NULL, &ts);
+ if (k != SIGCHLD) {
+
+ if (k < 0 && errno != EAGAIN) {
+ log_error_errno(errno, "sigtimedwait() failed: %m");
+ return;
+ }
+
+ if (k >= 0)
+ log_warning("sigtimedwait() returned unexpected signal.");
+ }
+ }
+}
+
+static int killall(int sig, Set *pids, bool send_sighup) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *d;
+
+ dir = opendir("/proc");
+ if (!dir)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(d, dir, break) {
+ pid_t pid;
+ int r;
+
+ if (!IN_SET(d->d_type, DT_DIR, DT_UNKNOWN))
+ continue;
+
+ if (parse_pid(d->d_name, &pid) < 0)
+ continue;
+
+ if (ignore_proc(pid, sig == SIGKILL && !in_initrd()))
+ continue;
+
+ if (sig == SIGKILL) {
+ _cleanup_free_ char *s = NULL;
+
+ get_process_comm(pid, &s);
+ log_notice("Sending SIGKILL to PID "PID_FMT" (%s).", pid, strna(s));
+ }
+
+ if (kill(pid, sig) >= 0) {
+ if (pids) {
+ r = set_put(pids, PID_TO_PTR(pid));
+ if (r < 0)
+ log_oom();
+ }
+ } else if (errno != ENOENT)
+ log_warning_errno(errno, "Could not kill %d: %m", pid);
+
+ if (send_sighup) {
+ /* Optionally, also send a SIGHUP signal, but
+ only if the process has a controlling
+ tty. This is useful to allow handling of
+ shells which ignore SIGTERM but react to
+ SIGHUP. We do not send this to processes that
+ have no controlling TTY since we don't want to
+ trigger reloads of daemon processes. Also we
+ make sure to only send this after SIGTERM so
+ that SIGTERM is always first in the queue. */
+
+ if (get_ctty_devnr(pid, NULL) >= 0)
+ /* it's OK if the process is gone, just ignore the result */
+ (void) kill(pid, SIGHUP);
+ }
+ }
+
+ return set_size(pids);
+}
+
+void broadcast_signal(int sig, bool wait_for_exit, bool send_sighup, usec_t timeout) {
+ sigset_t mask, oldmask;
+ _cleanup_set_free_ Set *pids = NULL;
+
+ if (wait_for_exit)
+ pids = set_new(NULL);
+
+ assert_se(sigemptyset(&mask) == 0);
+ assert_se(sigaddset(&mask, SIGCHLD) == 0);
+ assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) == 0);
+
+ if (kill(-1, SIGSTOP) < 0 && errno != ESRCH)
+ log_warning_errno(errno, "kill(-1, SIGSTOP) failed: %m");
+
+ killall(sig, pids, send_sighup);
+
+ if (kill(-1, SIGCONT) < 0 && errno != ESRCH)
+ log_warning_errno(errno, "kill(-1, SIGCONT) failed: %m");
+
+ if (wait_for_exit)
+ wait_for_children(pids, &mask, timeout);
+
+ assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) == 0);
+}
diff --git a/src/core/killall.h b/src/core/killall.h
new file mode 100644
index 0000000..cbd2026
--- /dev/null
+++ b/src/core/killall.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "time-util.h"
+
+void broadcast_signal(int sig, bool wait_for_exit, bool send_sighup, usec_t timeout);
diff --git a/src/core/kmod-setup.c b/src/core/kmod-setup.c
new file mode 100644
index 0000000..a91cfeb
--- /dev/null
+++ b/src/core/kmod-setup.c
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ftw.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "capability-util.h"
+#include "fileio.h"
+#include "kmod-setup.h"
+#include "macro.h"
+#include "string-util.h"
+
+#if HAVE_KMOD
+#include <libkmod.h>
+#include "module-util.h"
+
+static void systemd_kmod_log(
+ void *data,
+ int priority,
+ const char *file, int line,
+ const char *fn,
+ const char *format,
+ va_list args) {
+
+ /* library logging is enabled at debug only */
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ log_internalv(LOG_DEBUG, 0, file, line, fn, format, args);
+ REENABLE_WARNING;
+}
+
+static int has_virtio_rng_nftw_cb(
+ const char *fpath,
+ const struct stat *sb,
+ int tflag,
+ struct FTW *ftwbuf) {
+
+ _cleanup_free_ char *alias = NULL;
+ int r;
+
+ if ((FTW_D == tflag) && (ftwbuf->level > 2))
+ return FTW_SKIP_SUBTREE;
+
+ if (FTW_F != tflag)
+ return FTW_CONTINUE;
+
+ if (!endswith(fpath, "/modalias"))
+ return FTW_CONTINUE;
+
+ r = read_one_line_file(fpath, &alias);
+ if (r < 0)
+ return FTW_SKIP_SIBLINGS;
+
+ if (startswith(alias, "pci:v00001AF4d00001005"))
+ return FTW_STOP;
+
+ if (startswith(alias, "pci:v00001AF4d00001044"))
+ return FTW_STOP;
+
+ return FTW_SKIP_SIBLINGS;
+}
+
+static bool has_virtio_rng(void) {
+ return (nftw("/sys/devices/pci0000:00", has_virtio_rng_nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL) == FTW_STOP);
+}
+#endif
+
+int kmod_setup(void) {
+#if HAVE_KMOD
+
+ static const struct {
+ const char *module;
+ const char *path;
+ bool warn_if_unavailable:1;
+ bool warn_if_module:1;
+ bool (*condition_fn)(void);
+ } kmod_table[] = {
+ /* This one we need to load explicitly, since auto-loading on use doesn't work
+ * before udev created the ghost device nodes, and we need it earlier than that. */
+ { "autofs4", "/sys/class/misc/autofs", true, false, NULL },
+
+ /* This one we need to load explicitly, since auto-loading of IPv6 is not done when
+ * we try to configure ::1 on the loopback device. */
+ { "ipv6", "/sys/module/ipv6", false, true, NULL },
+
+ /* This should never be a module */
+ { "unix", "/proc/net/unix", true, true, NULL },
+
+#if HAVE_LIBIPTC
+ /* netfilter is needed by networkd, nspawn among others, and cannot be autoloaded */
+ { "ip_tables", "/proc/net/ip_tables_names", false, false, NULL },
+#endif
+ /* virtio_rng would be loaded by udev later, but real entropy might be needed very early */
+ { "virtio_rng", NULL, false, false, has_virtio_rng },
+ };
+ _cleanup_(kmod_unrefp) struct kmod_ctx *ctx = NULL;
+ unsigned i;
+
+ if (have_effective_cap(CAP_SYS_MODULE) == 0)
+ return 0;
+
+ for (i = 0; i < ELEMENTSOF(kmod_table); i++) {
+ if (kmod_table[i].path && access(kmod_table[i].path, F_OK) >= 0)
+ continue;
+
+ if (kmod_table[i].condition_fn && !kmod_table[i].condition_fn())
+ continue;
+
+ if (kmod_table[i].warn_if_module)
+ log_debug("Your kernel apparently lacks built-in %s support. Might be "
+ "a good idea to compile it in. We'll now try to work around "
+ "this by loading the module...", kmod_table[i].module);
+
+ if (!ctx) {
+ ctx = kmod_new(NULL, NULL);
+ if (!ctx)
+ return log_oom();
+
+ kmod_set_log_fn(ctx, systemd_kmod_log, NULL);
+ kmod_load_resources(ctx);
+ }
+
+ (void) module_load_and_warn(ctx, kmod_table[i].module, kmod_table[i].warn_if_unavailable);
+ }
+
+#endif
+ return 0;
+}
diff --git a/src/core/kmod-setup.h b/src/core/kmod-setup.h
new file mode 100644
index 0000000..801c7bf
--- /dev/null
+++ b/src/core/kmod-setup.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int kmod_setup(void);
diff --git a/src/core/load-dropin.c b/src/core/load-dropin.c
new file mode 100644
index 0000000..a50b200
--- /dev/null
+++ b/src/core/load-dropin.c
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "conf-parser.h"
+#include "fs-util.h"
+#include "load-dropin.h"
+#include "load-fragment.h"
+#include "log.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static int unit_name_compatible(const char *a, const char *b) {
+ _cleanup_free_ char *template = NULL;
+ int r;
+
+ /* The straightforward case: the symlink name matches the target */
+ if (streq(a, b))
+ return 1;
+
+ r = unit_name_template(a, &template);
+ if (r == -EINVAL)
+ return 0; /* Not a template */
+ if (r < 0)
+ return r; /* OOM, or some other failure. Just skip the warning. */
+
+ /* An instance name points to a target that is just the template name */
+ return streq(template, b);
+}
+
+static int process_deps(Unit *u, UnitDependency dependency, const char *dir_suffix) {
+ _cleanup_strv_free_ char **paths = NULL;
+ char **p;
+ int r;
+
+ r = unit_file_find_dropin_paths(NULL,
+ u->manager->lookup_paths.search_path,
+ u->manager->unit_path_cache,
+ dir_suffix,
+ NULL,
+ u->names,
+ &paths);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, paths) {
+ _cleanup_free_ char *target = NULL;
+ const char *entry;
+
+ entry = basename(*p);
+
+ if (null_or_empty_path(*p) > 0) {
+ /* an error usually means an invalid symlink, which is not a mask */
+ log_unit_debug(u, "%s dependency on %s is masked by %s, ignoring.",
+ unit_dependency_to_string(dependency), entry, *p);
+ continue;
+ }
+
+ r = is_symlink(*p);
+ if (r < 0) {
+ log_unit_warning_errno(u, r, "%s dropin %s unreadable, ignoring: %m",
+ unit_dependency_to_string(dependency), *p);
+ continue;
+ }
+ if (r == 0) {
+ log_unit_warning(u, "%s dependency dropin %s is not a symlink, ignoring.",
+ unit_dependency_to_string(dependency), *p);
+ continue;
+ }
+
+ if (!unit_name_is_valid(entry, UNIT_NAME_ANY)) {
+ log_unit_warning(u, "%s dependency dropin %s is not a valid unit name, ignoring.",
+ unit_dependency_to_string(dependency), *p);
+ continue;
+ }
+
+ r = readlink_malloc(*p, &target);
+ if (r < 0) {
+ log_unit_warning_errno(u, r, "readlink(\"%s\") failed, ignoring: %m", *p);
+ continue;
+ }
+
+ /* We don't treat this as an error, especially because we didn't check this for a
+ * long time. Nevertheless, we warn, because such mismatch can be mighty confusing. */
+ r = unit_name_compatible(entry, basename(target));
+ if (r < 0) {
+ log_unit_warning_errno(u, r, "Can't check if names %s and %s are compatible, ignoring: %m", entry, basename(target));
+ continue;
+ }
+ if (r == 0)
+ log_unit_warning(u, "%s dependency dropin %s target %s has different name",
+ unit_dependency_to_string(dependency), *p, target);
+
+ r = unit_add_dependency_by_name(u, dependency, entry, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Cannot add %s dependency on %s, ignoring: %m",
+ unit_dependency_to_string(dependency), entry);
+ }
+
+ return 0;
+}
+
+int unit_load_dropin(Unit *u) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **f;
+ int r;
+
+ assert(u);
+
+ /* Load dependencies from .wants and .requires directories */
+ r = process_deps(u, UNIT_WANTS, ".wants");
+ if (r < 0)
+ return r;
+
+ r = process_deps(u, UNIT_REQUIRES, ".requires");
+ if (r < 0)
+ return r;
+
+ /* Load .conf dropins */
+ r = unit_find_dropin_paths(u, &l);
+ if (r <= 0)
+ return 0;
+
+ if (!u->dropin_paths)
+ u->dropin_paths = TAKE_PTR(l);
+ else {
+ r = strv_extend_strv(&u->dropin_paths, l, true);
+ if (r < 0)
+ return log_oom();
+ }
+
+ STRV_FOREACH(f, u->dropin_paths)
+ (void) config_parse(u->id, *f, NULL,
+ UNIT_VTABLE(u)->sections,
+ config_item_perf_lookup, load_fragment_gperf_lookup,
+ 0, u);
+
+ u->dropin_mtime = now(CLOCK_REALTIME);
+
+ return 0;
+}
diff --git a/src/core/load-dropin.h b/src/core/load-dropin.h
new file mode 100644
index 0000000..bb10a76
--- /dev/null
+++ b/src/core/load-dropin.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "dropin.h"
+#include "unit.h"
+
+/* Read service data supplementary drop-in directories */
+
+static inline int unit_find_dropin_paths(Unit *u, char ***paths) {
+ assert(u);
+
+ return unit_file_find_dropin_conf_paths(NULL,
+ u->manager->lookup_paths.search_path,
+ u->manager->unit_path_cache,
+ u->names,
+ paths);
+}
+
+int unit_load_dropin(Unit *u);
diff --git a/src/core/load-fragment-gperf-nulstr.awk b/src/core/load-fragment-gperf-nulstr.awk
new file mode 100644
index 0000000..44bc1fb
--- /dev/null
+++ b/src/core/load-fragment-gperf-nulstr.awk
@@ -0,0 +1,14 @@
+BEGIN{
+ keywords=0 ; FS="," ;
+ print "extern const char load_fragment_gperf_nulstr[];" ;
+ print "const char load_fragment_gperf_nulstr[] ="
+}
+keyword==1 {
+ print "\"" $1 "\\0\""
+}
+/%%/ {
+ keyword=1
+}
+END {
+ print ";"
+}
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
new file mode 100644
index 0000000..cdbc67f
--- /dev/null
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -0,0 +1,459 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "load-fragment.h"
+#include "missing.h"
+
+#include "all-units.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name load_fragment_gperf_hash
+%define lookup-function-name load_fragment_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+m4_dnl Define the context options only once
+m4_define(`EXEC_CONTEXT_CONFIG_ITEMS',
+`$1.WorkingDirectory, config_parse_working_directory, 0, offsetof($1, exec_context)
+$1.RootDirectory, config_parse_unit_path_printf, true, offsetof($1, exec_context.root_directory)
+$1.RootImage, config_parse_unit_path_printf, true, offsetof($1, exec_context.root_image)
+$1.User, config_parse_user_group, 0, offsetof($1, exec_context.user)
+$1.Group, config_parse_user_group, 0, offsetof($1, exec_context.group)
+$1.SupplementaryGroups, config_parse_user_group_strv, 0, offsetof($1, exec_context.supplementary_groups)
+$1.Nice, config_parse_exec_nice, 0, offsetof($1, exec_context)
+$1.OOMScoreAdjust, config_parse_exec_oom_score_adjust, 0, offsetof($1, exec_context)
+$1.IOSchedulingClass, config_parse_exec_io_class, 0, offsetof($1, exec_context)
+$1.IOSchedulingPriority, config_parse_exec_io_priority, 0, offsetof($1, exec_context)
+$1.CPUSchedulingPolicy, config_parse_exec_cpu_sched_policy, 0, offsetof($1, exec_context)
+$1.CPUSchedulingPriority, config_parse_exec_cpu_sched_prio, 0, offsetof($1, exec_context)
+$1.CPUSchedulingResetOnFork, config_parse_bool, 0, offsetof($1, exec_context.cpu_sched_reset_on_fork)
+$1.CPUAffinity, config_parse_exec_cpu_affinity, 0, offsetof($1, exec_context)
+$1.UMask, config_parse_mode, 0, offsetof($1, exec_context.umask)
+$1.Environment, config_parse_environ, 0, offsetof($1, exec_context.environment)
+$1.EnvironmentFile, config_parse_unit_env_file, 0, offsetof($1, exec_context.environment_files)
+$1.PassEnvironment, config_parse_pass_environ, 0, offsetof($1, exec_context.pass_environment)
+$1.UnsetEnvironment, config_parse_unset_environ, 0, offsetof($1, exec_context.unset_environment)
+$1.DynamicUser, config_parse_bool, true, offsetof($1, exec_context.dynamic_user)
+$1.RemoveIPC, config_parse_bool, 0, offsetof($1, exec_context.remove_ipc)
+$1.StandardInput, config_parse_exec_input, 0, offsetof($1, exec_context)
+$1.StandardOutput, config_parse_exec_output, 0, offsetof($1, exec_context)
+$1.StandardError, config_parse_exec_output, 0, offsetof($1, exec_context)
+$1.StandardInputText, config_parse_exec_input_text, 0, offsetof($1, exec_context)
+$1.StandardInputData, config_parse_exec_input_data, 0, offsetof($1, exec_context)
+$1.TTYPath, config_parse_unit_path_printf, 0, offsetof($1, exec_context.tty_path)
+$1.TTYReset, config_parse_bool, 0, offsetof($1, exec_context.tty_reset)
+$1.TTYVHangup, config_parse_bool, 0, offsetof($1, exec_context.tty_vhangup)
+$1.TTYVTDisallocate, config_parse_bool, 0, offsetof($1, exec_context.tty_vt_disallocate)
+$1.SyslogIdentifier, config_parse_unit_string_printf, 0, offsetof($1, exec_context.syslog_identifier)
+$1.SyslogFacility, config_parse_log_facility, 0, offsetof($1, exec_context.syslog_priority)
+$1.SyslogLevel, config_parse_log_level, 0, offsetof($1, exec_context.syslog_priority)
+$1.SyslogLevelPrefix, config_parse_bool, 0, offsetof($1, exec_context.syslog_level_prefix)
+$1.LogLevelMax, config_parse_log_level, 0, offsetof($1, exec_context.log_level_max)
+$1.LogRateLimitIntervalSec, config_parse_sec, 0, offsetof($1, exec_context.log_rate_limit_interval_usec)
+$1.LogRateLimitBurst, config_parse_unsigned, 0, offsetof($1, exec_context.log_rate_limit_burst)
+$1.LogExtraFields, config_parse_log_extra_fields, 0, offsetof($1, exec_context)
+$1.Capabilities, config_parse_warn_compat, DISABLED_LEGACY, offsetof($1, exec_context)
+$1.SecureBits, config_parse_exec_secure_bits, 0, offsetof($1, exec_context.secure_bits)
+$1.CapabilityBoundingSet, config_parse_capability_set, 0, offsetof($1, exec_context.capability_bounding_set)
+$1.AmbientCapabilities, config_parse_capability_set, 0, offsetof($1, exec_context.capability_ambient_set)
+$1.TimerSlackNSec, config_parse_nsec, 0, offsetof($1, exec_context.timer_slack_nsec)
+$1.NoNewPrivileges, config_parse_bool, 0, offsetof($1, exec_context.no_new_privileges)
+$1.KeyringMode, config_parse_exec_keyring_mode, 0, offsetof($1, exec_context.keyring_mode)
+m4_ifdef(`HAVE_SECCOMP',
+`$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context)
+$1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs)
+$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)
+$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
+$1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context)
+$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime)
+$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)
+$1.LockPersonality, config_parse_bool, 0, offsetof($1, exec_context.lock_personality)',
+`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.LockPersonality, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+$1.LimitCPU, config_parse_rlimit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
+$1.LimitFSIZE, config_parse_rlimit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)
+$1.LimitDATA, config_parse_rlimit, RLIMIT_DATA, offsetof($1, exec_context.rlimit)
+$1.LimitSTACK, config_parse_rlimit, RLIMIT_STACK, offsetof($1, exec_context.rlimit)
+$1.LimitCORE, config_parse_rlimit, RLIMIT_CORE, offsetof($1, exec_context.rlimit)
+$1.LimitRSS, config_parse_rlimit, RLIMIT_RSS, offsetof($1, exec_context.rlimit)
+$1.LimitNOFILE, config_parse_rlimit, RLIMIT_NOFILE, offsetof($1, exec_context.rlimit)
+$1.LimitAS, config_parse_rlimit, RLIMIT_AS, offsetof($1, exec_context.rlimit)
+$1.LimitNPROC, config_parse_rlimit, RLIMIT_NPROC, offsetof($1, exec_context.rlimit)
+$1.LimitMEMLOCK, config_parse_rlimit, RLIMIT_MEMLOCK, offsetof($1, exec_context.rlimit)
+$1.LimitLOCKS, config_parse_rlimit, RLIMIT_LOCKS, offsetof($1, exec_context.rlimit)
+$1.LimitSIGPENDING, config_parse_rlimit, RLIMIT_SIGPENDING, offsetof($1, exec_context.rlimit)
+$1.LimitMSGQUEUE, config_parse_rlimit, RLIMIT_MSGQUEUE, offsetof($1, exec_context.rlimit)
+$1.LimitNICE, config_parse_rlimit, RLIMIT_NICE, offsetof($1, exec_context.rlimit)
+$1.LimitRTPRIO, config_parse_rlimit, RLIMIT_RTPRIO, offsetof($1, exec_context.rlimit)
+$1.LimitRTTIME, config_parse_rlimit, RLIMIT_RTTIME, offsetof($1, exec_context.rlimit)
+$1.ReadWriteDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths)
+$1.ReadOnlyDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths)
+$1.InaccessibleDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
+$1.ReadWritePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths)
+$1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths)
+$1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
+$1.BindPaths, config_parse_bind_paths, 0, offsetof($1, exec_context)
+$1.BindReadOnlyPaths, config_parse_bind_paths, 0, offsetof($1, exec_context)
+$1.TemporaryFileSystem, config_parse_temporary_filesystems, 0, offsetof($1, exec_context)
+$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
+$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
+$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)
+$1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules)
+$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
+$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
+$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
+$1.PrivateMounts, config_parse_bool, 0, offsetof($1, exec_context.private_mounts)
+$1.ProtectSystem, config_parse_protect_system, 0, offsetof($1, exec_context.protect_system)
+$1.ProtectHome, config_parse_protect_home, 0, offsetof($1, exec_context.protect_home)
+$1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context.mount_flags)
+$1.MountAPIVFS, config_parse_bool, 0, offsetof($1, exec_context.mount_apivfs)
+$1.Personality, config_parse_personality, 0, offsetof($1, exec_context.personality)
+$1.RuntimeDirectoryPreserve, config_parse_runtime_preserve_mode, 0, offsetof($1, exec_context.runtime_directory_preserve_mode)
+$1.RuntimeDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_RUNTIME].mode)
+$1.RuntimeDirectory, config_parse_exec_directories, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_RUNTIME].paths)
+$1.StateDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_STATE].mode)
+$1.StateDirectory, config_parse_exec_directories, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_STATE].paths)
+$1.CacheDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_CACHE].mode)
+$1.CacheDirectory, config_parse_exec_directories, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_CACHE].paths)
+$1.LogsDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_LOGS].mode)
+$1.LogsDirectory, config_parse_exec_directories, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_LOGS].paths)
+$1.ConfigurationDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_CONFIGURATION].mode)
+$1.ConfigurationDirectory, config_parse_exec_directories, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_CONFIGURATION].paths)
+m4_ifdef(`HAVE_PAM',
+`$1.PAMName, config_parse_unit_string_printf, 0, offsetof($1, exec_context.pam_name)',
+`$1.PAMName, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+$1.IgnoreSIGPIPE, config_parse_bool, 0, offsetof($1, exec_context.ignore_sigpipe)
+$1.UtmpIdentifier, config_parse_unit_string_printf, 0, offsetof($1, exec_context.utmp_id)
+$1.UtmpMode, config_parse_exec_utmp_mode, 0, offsetof($1, exec_context.utmp_mode)
+m4_ifdef(`HAVE_SELINUX',
+`$1.SELinuxContext, config_parse_exec_selinux_context, 0, offsetof($1, exec_context)',
+`$1.SELinuxContext, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+m4_ifdef(`HAVE_APPARMOR',
+`$1.AppArmorProfile, config_parse_exec_apparmor_profile, 0, offsetof($1, exec_context)',
+`$1.AppArmorProfile, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+m4_ifdef(`ENABLE_SMACK',
+`$1.SmackProcessLabel, config_parse_exec_smack_process_label, 0, offsetof($1, exec_context)',
+`$1.SmackProcessLabel, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')'
+)m4_dnl
+m4_define(`KILL_CONTEXT_CONFIG_ITEMS',
+`$1.SendSIGKILL, config_parse_bool, 0, offsetof($1, kill_context.send_sigkill)
+$1.SendSIGHUP, config_parse_bool, 0, offsetof($1, kill_context.send_sighup)
+$1.KillMode, config_parse_kill_mode, 0, offsetof($1, kill_context.kill_mode)
+$1.KillSignal, config_parse_signal, 0, offsetof($1, kill_context.kill_signal)
+$1.FinalKillSignal, config_parse_signal, 0, offsetof($1, kill_context.final_kill_signal)
+$1.WatchdogSignal, config_parse_signal, 0, offsetof($1, kill_context.watchdog_signal)'
+)m4_dnl
+m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS',
+`$1.Slice, config_parse_unit_slice, 0, 0
+$1.CPUAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.cpu_accounting)
+$1.CPUWeight, config_parse_cg_weight, 0, offsetof($1, cgroup_context.cpu_weight)
+$1.StartupCPUWeight, config_parse_cg_weight, 0, offsetof($1, cgroup_context.startup_cpu_weight)
+$1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares)
+$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
+$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
+$1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting)
+$1.MemoryMin, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemorySwapMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context)
+$1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy)
+$1.IOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.io_accounting)
+$1.IOWeight, config_parse_cg_weight, 0, offsetof($1, cgroup_context.io_weight)
+$1.StartupIOWeight, config_parse_cg_weight, 0, offsetof($1, cgroup_context.startup_io_weight)
+$1.IODeviceWeight, config_parse_io_device_weight, 0, offsetof($1, cgroup_context)
+$1.IOReadBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
+$1.IOWriteBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
+$1.IOReadIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
+$1.IOWriteIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
+$1.IODeviceLatencyTargetSec, config_parse_io_device_latency, 0, offsetof($1, cgroup_context)
+$1.BlockIOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.blockio_accounting)
+$1.BlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.blockio_weight)
+$1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight)
+$1.BlockIODeviceWeight, config_parse_blockio_device_weight, 0, offsetof($1, cgroup_context)
+$1.BlockIOReadBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)
+$1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)
+$1.TasksAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.tasks_accounting)
+$1.TasksMax, config_parse_tasks_max, 0, offsetof($1, cgroup_context.tasks_max)
+$1.Delegate, config_parse_delegate, 0, offsetof($1, cgroup_context)
+$1.DisableControllers, config_parse_disable_controllers, 0, offsetof($1, cgroup_context)
+$1.IPAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.ip_accounting)
+$1.IPAddressAllow, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_allow)
+$1.IPAddressDeny, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_deny)
+$1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0'
+)m4_dnl
+Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description)
+Unit.Documentation, config_parse_documentation, 0, offsetof(Unit, documentation)
+Unit.SourcePath, config_parse_unit_path_printf, 0, offsetof(Unit, source_path)
+Unit.Requires, config_parse_unit_deps, UNIT_REQUIRES, 0
+Unit.Requisite, config_parse_unit_deps, UNIT_REQUISITE, 0
+Unit.Wants, config_parse_unit_deps, UNIT_WANTS, 0
+Unit.BindsTo, config_parse_unit_deps, UNIT_BINDS_TO, 0
+Unit.BindTo, config_parse_unit_deps, UNIT_BINDS_TO, 0
+Unit.Conflicts, config_parse_unit_deps, UNIT_CONFLICTS, 0
+Unit.Before, config_parse_unit_deps, UNIT_BEFORE, 0
+Unit.After, config_parse_unit_deps, UNIT_AFTER, 0
+Unit.OnFailure, config_parse_unit_deps, UNIT_ON_FAILURE, 0
+Unit.PropagatesReloadTo, config_parse_unit_deps, UNIT_PROPAGATES_RELOAD_TO, 0
+Unit.PropagateReloadTo, config_parse_unit_deps, UNIT_PROPAGATES_RELOAD_TO, 0
+Unit.ReloadPropagatedFrom, config_parse_unit_deps, UNIT_RELOAD_PROPAGATED_FROM, 0
+Unit.PropagateReloadFrom, config_parse_unit_deps, UNIT_RELOAD_PROPAGATED_FROM, 0
+Unit.PartOf, config_parse_unit_deps, UNIT_PART_OF, 0
+Unit.JoinsNamespaceOf, config_parse_unit_deps, UNIT_JOINS_NAMESPACE_OF, 0
+Unit.RequiresOverridable, config_parse_obsolete_unit_deps, UNIT_REQUIRES, 0
+Unit.RequisiteOverridable, config_parse_obsolete_unit_deps, UNIT_REQUISITE, 0
+Unit.RequiresMountsFor, config_parse_unit_requires_mounts_for, 0, 0
+Unit.StopWhenUnneeded, config_parse_bool, 0, offsetof(Unit, stop_when_unneeded)
+Unit.RefuseManualStart, config_parse_bool, 0, offsetof(Unit, refuse_manual_start)
+Unit.RefuseManualStop, config_parse_bool, 0, offsetof(Unit, refuse_manual_stop)
+Unit.AllowIsolate, config_parse_bool, 0, offsetof(Unit, allow_isolate)
+Unit.DefaultDependencies, config_parse_bool, 0, offsetof(Unit, default_dependencies)
+Unit.OnFailureJobMode, config_parse_job_mode, 0, offsetof(Unit, on_failure_job_mode)
+m4_dnl The following is a legacy alias name for compatibility
+Unit.OnFailureIsolate, config_parse_job_mode_isolate, 0, offsetof(Unit, on_failure_job_mode)
+Unit.IgnoreOnIsolate, config_parse_bool, 0, offsetof(Unit, ignore_on_isolate)
+Unit.IgnoreOnSnapshot, config_parse_warn_compat, DISABLED_LEGACY, 0
+Unit.JobTimeoutSec, config_parse_job_timeout_sec, 0, 0
+Unit.JobRunningTimeoutSec, config_parse_job_running_timeout_sec, 0, 0
+Unit.JobTimeoutAction, config_parse_emergency_action, 0, offsetof(Unit, job_timeout_action)
+Unit.JobTimeoutRebootArgument, config_parse_unit_string_printf, 0, offsetof(Unit, job_timeout_reboot_arg)
+Unit.StartLimitIntervalSec, config_parse_sec, 0, offsetof(Unit, start_limit.interval)
+m4_dnl The following is a legacy alias name for compatibility
+Unit.StartLimitInterval, config_parse_sec, 0, offsetof(Unit, start_limit.interval)
+Unit.StartLimitBurst, config_parse_unsigned, 0, offsetof(Unit, start_limit.burst)
+Unit.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action)
+Unit.FailureAction, config_parse_emergency_action, 0, offsetof(Unit, failure_action)
+Unit.SuccessAction, config_parse_emergency_action, 0, offsetof(Unit, success_action)
+Unit.FailureActionExitStatus, config_parse_exit_status, 0, offsetof(Unit, failure_action_exit_status)
+Unit.SuccessActionExitStatus, config_parse_exit_status, 0, offsetof(Unit, success_action_exit_status)
+Unit.RebootArgument, config_parse_unit_string_printf, 0, offsetof(Unit, reboot_arg)
+Unit.ConditionPathExists, config_parse_unit_condition_path, CONDITION_PATH_EXISTS, offsetof(Unit, conditions)
+Unit.ConditionPathExistsGlob, config_parse_unit_condition_path, CONDITION_PATH_EXISTS_GLOB, offsetof(Unit, conditions)
+Unit.ConditionPathIsDirectory, config_parse_unit_condition_path, CONDITION_PATH_IS_DIRECTORY, offsetof(Unit, conditions)
+Unit.ConditionPathIsSymbolicLink,config_parse_unit_condition_path, CONDITION_PATH_IS_SYMBOLIC_LINK,offsetof(Unit, conditions)
+Unit.ConditionPathIsMountPoint, config_parse_unit_condition_path, CONDITION_PATH_IS_MOUNT_POINT, offsetof(Unit, conditions)
+Unit.ConditionPathIsReadWrite, config_parse_unit_condition_path, CONDITION_PATH_IS_READ_WRITE, offsetof(Unit, conditions)
+Unit.ConditionDirectoryNotEmpty, config_parse_unit_condition_path, CONDITION_DIRECTORY_NOT_EMPTY, offsetof(Unit, conditions)
+Unit.ConditionFileNotEmpty, config_parse_unit_condition_path, CONDITION_FILE_NOT_EMPTY, offsetof(Unit, conditions)
+Unit.ConditionFileIsExecutable, config_parse_unit_condition_path, CONDITION_FILE_IS_EXECUTABLE, offsetof(Unit, conditions)
+Unit.ConditionNeedsUpdate, config_parse_unit_condition_path, CONDITION_NEEDS_UPDATE, offsetof(Unit, conditions)
+Unit.ConditionFirstBoot, config_parse_unit_condition_string, CONDITION_FIRST_BOOT, offsetof(Unit, conditions)
+Unit.ConditionKernelCommandLine, config_parse_unit_condition_string, CONDITION_KERNEL_COMMAND_LINE, offsetof(Unit, conditions)
+Unit.ConditionKernelVersion, config_parse_unit_condition_string, CONDITION_KERNEL_VERSION, offsetof(Unit, conditions)
+Unit.ConditionArchitecture, config_parse_unit_condition_string, CONDITION_ARCHITECTURE, offsetof(Unit, conditions)
+Unit.ConditionVirtualization, config_parse_unit_condition_string, CONDITION_VIRTUALIZATION, offsetof(Unit, conditions)
+Unit.ConditionSecurity, config_parse_unit_condition_string, CONDITION_SECURITY, offsetof(Unit, conditions)
+Unit.ConditionCapability, config_parse_unit_condition_string, CONDITION_CAPABILITY, offsetof(Unit, conditions)
+Unit.ConditionHost, config_parse_unit_condition_string, CONDITION_HOST, offsetof(Unit, conditions)
+Unit.ConditionACPower, config_parse_unit_condition_string, CONDITION_AC_POWER, offsetof(Unit, conditions)
+Unit.ConditionUser, config_parse_unit_condition_string, CONDITION_USER, offsetof(Unit, conditions)
+Unit.ConditionGroup, config_parse_unit_condition_string, CONDITION_GROUP, offsetof(Unit, conditions)
+Unit.ConditionControlGroupController, config_parse_unit_condition_string, CONDITION_CONTROL_GROUP_CONTROLLER, offsetof(Unit, conditions)
+Unit.ConditionNull, config_parse_unit_condition_null, 0, offsetof(Unit, conditions)
+Unit.AssertPathExists, config_parse_unit_condition_path, CONDITION_PATH_EXISTS, offsetof(Unit, asserts)
+Unit.AssertPathExistsGlob, config_parse_unit_condition_path, CONDITION_PATH_EXISTS_GLOB, offsetof(Unit, asserts)
+Unit.AssertPathIsDirectory, config_parse_unit_condition_path, CONDITION_PATH_IS_DIRECTORY, offsetof(Unit, asserts)
+Unit.AssertPathIsSymbolicLink, config_parse_unit_condition_path, CONDITION_PATH_IS_SYMBOLIC_LINK,offsetof(Unit, asserts)
+Unit.AssertPathIsMountPoint, config_parse_unit_condition_path, CONDITION_PATH_IS_MOUNT_POINT, offsetof(Unit, asserts)
+Unit.AssertPathIsReadWrite, config_parse_unit_condition_path, CONDITION_PATH_IS_READ_WRITE, offsetof(Unit, asserts)
+Unit.AssertDirectoryNotEmpty, config_parse_unit_condition_path, CONDITION_DIRECTORY_NOT_EMPTY, offsetof(Unit, asserts)
+Unit.AssertFileNotEmpty, config_parse_unit_condition_path, CONDITION_FILE_NOT_EMPTY, offsetof(Unit, asserts)
+Unit.AssertFileIsExecutable, config_parse_unit_condition_path, CONDITION_FILE_IS_EXECUTABLE, offsetof(Unit, asserts)
+Unit.AssertNeedsUpdate, config_parse_unit_condition_path, CONDITION_NEEDS_UPDATE, offsetof(Unit, asserts)
+Unit.AssertFirstBoot, config_parse_unit_condition_string, CONDITION_FIRST_BOOT, offsetof(Unit, asserts)
+Unit.AssertKernelCommandLine, config_parse_unit_condition_string, CONDITION_KERNEL_COMMAND_LINE, offsetof(Unit, asserts)
+Unit.AssertKernelVersion, config_parse_unit_condition_string, CONDITION_KERNEL_VERSION, offsetof(Unit, asserts)
+Unit.AssertArchitecture, config_parse_unit_condition_string, CONDITION_ARCHITECTURE, offsetof(Unit, asserts)
+Unit.AssertVirtualization, config_parse_unit_condition_string, CONDITION_VIRTUALIZATION, offsetof(Unit, asserts)
+Unit.AssertSecurity, config_parse_unit_condition_string, CONDITION_SECURITY, offsetof(Unit, asserts)
+Unit.AssertCapability, config_parse_unit_condition_string, CONDITION_CAPABILITY, offsetof(Unit, asserts)
+Unit.AssertHost, config_parse_unit_condition_string, CONDITION_HOST, offsetof(Unit, asserts)
+Unit.AssertACPower, config_parse_unit_condition_string, CONDITION_AC_POWER, offsetof(Unit, asserts)
+Unit.AssertUser, config_parse_unit_condition_string, CONDITION_USER, offsetof(Unit, asserts)
+Unit.AssertGroup, config_parse_unit_condition_string, CONDITION_GROUP, offsetof(Unit, asserts)
+Unit.AssertControlGroupController, config_parse_unit_condition_string, CONDITION_CONTROL_GROUP_CONTROLLER, offsetof(Unit, asserts)
+Unit.AssertNull, config_parse_unit_condition_null, 0, offsetof(Unit, asserts)
+Unit.CollectMode, config_parse_collect_mode, 0, offsetof(Unit, collect_mode)
+m4_dnl
+Service.PIDFile, config_parse_pid_file, 0, offsetof(Service, pid_file)
+Service.ExecStartPre, config_parse_exec, SERVICE_EXEC_START_PRE, offsetof(Service, exec_command)
+Service.ExecStart, config_parse_exec, SERVICE_EXEC_START, offsetof(Service, exec_command)
+Service.ExecStartPost, config_parse_exec, SERVICE_EXEC_START_POST, offsetof(Service, exec_command)
+Service.ExecReload, config_parse_exec, SERVICE_EXEC_RELOAD, offsetof(Service, exec_command)
+Service.ExecStop, config_parse_exec, SERVICE_EXEC_STOP, offsetof(Service, exec_command)
+Service.ExecStopPost, config_parse_exec, SERVICE_EXEC_STOP_POST, offsetof(Service, exec_command)
+Service.RestartSec, config_parse_sec, 0, offsetof(Service, restart_usec)
+Service.TimeoutSec, config_parse_service_timeout, 0, 0
+Service.TimeoutStartSec, config_parse_service_timeout, 0, 0
+Service.TimeoutStopSec, config_parse_sec_fix_0, 0, offsetof(Service, timeout_stop_usec)
+Service.RuntimeMaxSec, config_parse_sec, 0, offsetof(Service, runtime_max_usec)
+Service.WatchdogSec, config_parse_sec, 0, offsetof(Service, watchdog_usec)
+m4_dnl The following five only exist for compatibility, they moved into Unit, see above
+Service.StartLimitInterval, config_parse_sec, 0, offsetof(Unit, start_limit.interval)
+Service.StartLimitBurst, config_parse_unsigned, 0, offsetof(Unit, start_limit.burst)
+Service.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action)
+Service.FailureAction, config_parse_emergency_action, 0, offsetof(Unit, failure_action)
+Service.RebootArgument, config_parse_unit_string_printf, 0, offsetof(Unit, reboot_arg)
+Service.Type, config_parse_service_type, 0, offsetof(Service, type)
+Service.Restart, config_parse_service_restart, 0, offsetof(Service, restart)
+Service.PermissionsStartOnly, config_parse_bool, 0, offsetof(Service, permissions_start_only)
+Service.RootDirectoryStartOnly, config_parse_bool, 0, offsetof(Service, root_directory_start_only)
+Service.RemainAfterExit, config_parse_bool, 0, offsetof(Service, remain_after_exit)
+Service.GuessMainPID, config_parse_bool, 0, offsetof(Service, guess_main_pid)
+Service.RestartPreventExitStatus, config_parse_set_status, 0, offsetof(Service, restart_prevent_status)
+Service.RestartForceExitStatus, config_parse_set_status, 0, offsetof(Service, restart_force_status)
+Service.SuccessExitStatus, config_parse_set_status, 0, offsetof(Service, success_status)
+Service.SysVStartPriority, config_parse_warn_compat, DISABLED_LEGACY, 0
+Service.NonBlocking, config_parse_bool, 0, offsetof(Service, exec_context.non_blocking)
+Service.BusName, config_parse_bus_name, 0, offsetof(Service, bus_name)
+Service.FileDescriptorStoreMax, config_parse_unsigned, 0, offsetof(Service, n_fd_store_max)
+Service.NotifyAccess, config_parse_notify_access, 0, offsetof(Service, notify_access)
+Service.Sockets, config_parse_service_sockets, 0, 0
+Service.BusPolicy, config_parse_warn_compat, DISABLED_LEGACY, 0
+Service.USBFunctionDescriptors, config_parse_unit_path_printf, 0, offsetof(Service, usb_function_descriptors)
+Service.USBFunctionStrings, config_parse_unit_path_printf, 0, offsetof(Service, usb_function_strings)
+EXEC_CONTEXT_CONFIG_ITEMS(Service)m4_dnl
+CGROUP_CONTEXT_CONFIG_ITEMS(Service)m4_dnl
+KILL_CONTEXT_CONFIG_ITEMS(Service)m4_dnl
+m4_dnl
+Socket.ListenStream, config_parse_socket_listen, SOCKET_SOCKET, 0
+Socket.ListenDatagram, config_parse_socket_listen, SOCKET_SOCKET, 0
+Socket.ListenSequentialPacket, config_parse_socket_listen, SOCKET_SOCKET, 0
+Socket.ListenFIFO, config_parse_socket_listen, SOCKET_FIFO, 0
+Socket.ListenNetlink, config_parse_socket_listen, SOCKET_SOCKET, 0
+Socket.ListenSpecial, config_parse_socket_listen, SOCKET_SPECIAL, 0
+Socket.ListenMessageQueue, config_parse_socket_listen, SOCKET_MQUEUE, 0
+Socket.ListenUSBFunction, config_parse_socket_listen, SOCKET_USB_FUNCTION, 0
+Socket.SocketProtocol, config_parse_socket_protocol, 0, offsetof(Socket, socket_protocol)
+Socket.BindIPv6Only, config_parse_socket_bind, 0, offsetof(Socket, bind_ipv6_only)
+Socket.Backlog, config_parse_unsigned, 0, offsetof(Socket, backlog)
+Socket.BindToDevice, config_parse_socket_bindtodevice, 0, 0
+Socket.ExecStartPre, config_parse_exec, SOCKET_EXEC_START_PRE, offsetof(Socket, exec_command)
+Socket.ExecStartPost, config_parse_exec, SOCKET_EXEC_START_POST, offsetof(Socket, exec_command)
+Socket.ExecStopPre, config_parse_exec, SOCKET_EXEC_STOP_PRE, offsetof(Socket, exec_command)
+Socket.ExecStopPost, config_parse_exec, SOCKET_EXEC_STOP_POST, offsetof(Socket, exec_command)
+Socket.TimeoutSec, config_parse_sec_fix_0, 0, offsetof(Socket, timeout_usec)
+Socket.SocketUser, config_parse_user_group, 0, offsetof(Socket, user)
+Socket.SocketGroup, config_parse_user_group, 0, offsetof(Socket, group)
+Socket.SocketMode, config_parse_mode, 0, offsetof(Socket, socket_mode)
+Socket.DirectoryMode, config_parse_mode, 0, offsetof(Socket, directory_mode)
+Socket.Accept, config_parse_bool, 0, offsetof(Socket, accept)
+Socket.Writable, config_parse_bool, 0, offsetof(Socket, writable)
+Socket.MaxConnections, config_parse_unsigned, 0, offsetof(Socket, max_connections)
+Socket.MaxConnectionsPerSource, config_parse_unsigned, 0, offsetof(Socket, max_connections_per_source)
+Socket.KeepAlive, config_parse_bool, 0, offsetof(Socket, keep_alive)
+Socket.KeepAliveTimeSec, config_parse_sec, 0, offsetof(Socket, keep_alive_time)
+Socket.KeepAliveIntervalSec, config_parse_sec, 0, offsetof(Socket, keep_alive_interval)
+Socket.KeepAliveProbes, config_parse_unsigned, 0, offsetof(Socket, keep_alive_cnt)
+Socket.DeferAcceptSec, config_parse_sec, 0, offsetof(Socket, defer_accept)
+Socket.NoDelay, config_parse_bool, 0, offsetof(Socket, no_delay)
+Socket.Priority, config_parse_int, 0, offsetof(Socket, priority)
+Socket.ReceiveBuffer, config_parse_iec_size, 0, offsetof(Socket, receive_buffer)
+Socket.SendBuffer, config_parse_iec_size, 0, offsetof(Socket, send_buffer)
+Socket.IPTOS, config_parse_ip_tos, 0, offsetof(Socket, ip_tos)
+Socket.IPTTL, config_parse_int, 0, offsetof(Socket, ip_ttl)
+Socket.Mark, config_parse_int, 0, offsetof(Socket, mark)
+Socket.PipeSize, config_parse_iec_size, 0, offsetof(Socket, pipe_size)
+Socket.FreeBind, config_parse_bool, 0, offsetof(Socket, free_bind)
+Socket.Transparent, config_parse_bool, 0, offsetof(Socket, transparent)
+Socket.Broadcast, config_parse_bool, 0, offsetof(Socket, broadcast)
+Socket.PassCredentials, config_parse_bool, 0, offsetof(Socket, pass_cred)
+Socket.PassSecurity, config_parse_bool, 0, offsetof(Socket, pass_sec)
+Socket.TCPCongestion, config_parse_string, 0, offsetof(Socket, tcp_congestion)
+Socket.ReusePort, config_parse_bool, 0, offsetof(Socket, reuse_port)
+Socket.MessageQueueMaxMessages, config_parse_long, 0, offsetof(Socket, mq_maxmsg)
+Socket.MessageQueueMessageSize, config_parse_long, 0, offsetof(Socket, mq_msgsize)
+Socket.RemoveOnStop, config_parse_bool, 0, offsetof(Socket, remove_on_stop)
+Socket.Symlinks, config_parse_unit_path_strv_printf, 0, offsetof(Socket, symlinks)
+Socket.FileDescriptorName, config_parse_fdname, 0, 0
+Socket.Service, config_parse_socket_service, 0, 0
+Socket.TriggerLimitIntervalSec, config_parse_sec, 0, offsetof(Socket, trigger_limit.interval)
+Socket.TriggerLimitBurst, config_parse_unsigned, 0, offsetof(Socket, trigger_limit.burst)
+m4_ifdef(`ENABLE_SMACK',
+`Socket.SmackLabel, config_parse_unit_string_printf, 0, offsetof(Socket, smack)
+Socket.SmackLabelIPIn, config_parse_unit_string_printf, 0, offsetof(Socket, smack_ip_in)
+Socket.SmackLabelIPOut, config_parse_unit_string_printf, 0, offsetof(Socket, smack_ip_out)',
+`Socket.SmackLabel, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+Socket.SmackLabelIPIn, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+Socket.SmackLabelIPOut, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+m4_ifdef(`HAVE_SELINUX',
+`Socket.SELinuxContextFromNet, config_parse_bool, 0, offsetof(Socket, selinux_context_from_net)',
+`Socket.SELinuxContextFromNet, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+EXEC_CONTEXT_CONFIG_ITEMS(Socket)m4_dnl
+CGROUP_CONTEXT_CONFIG_ITEMS(Socket)m4_dnl
+KILL_CONTEXT_CONFIG_ITEMS(Socket)m4_dnl
+m4_dnl
+Mount.What, config_parse_unit_string_printf, 0, offsetof(Mount, parameters_fragment.what)
+Mount.Where, config_parse_unit_path_printf, 0, offsetof(Mount, where)
+Mount.Options, config_parse_unit_string_printf, 0, offsetof(Mount, parameters_fragment.options)
+Mount.Type, config_parse_unit_string_printf, 0, offsetof(Mount, parameters_fragment.fstype)
+Mount.TimeoutSec, config_parse_sec_fix_0, 0, offsetof(Mount, timeout_usec)
+Mount.DirectoryMode, config_parse_mode, 0, offsetof(Mount, directory_mode)
+Mount.SloppyOptions, config_parse_bool, 0, offsetof(Mount, sloppy_options)
+Mount.LazyUnmount, config_parse_bool, 0, offsetof(Mount, lazy_unmount)
+Mount.ForceUnmount, config_parse_bool, 0, offsetof(Mount, force_unmount)
+EXEC_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
+CGROUP_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
+KILL_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
+m4_dnl
+Automount.Where, config_parse_unit_path_printf, 0, offsetof(Automount, where)
+Automount.DirectoryMode, config_parse_mode, 0, offsetof(Automount, directory_mode)
+Automount.TimeoutIdleSec, config_parse_sec_fix_0, 0, offsetof(Automount, timeout_idle_usec)
+m4_dnl
+Swap.What, config_parse_unit_path_printf, 0, offsetof(Swap, parameters_fragment.what)
+Swap.Priority, config_parse_int, 0, offsetof(Swap, parameters_fragment.priority)
+Swap.Options, config_parse_unit_string_printf, 0, offsetof(Swap, parameters_fragment.options)
+Swap.TimeoutSec, config_parse_sec_fix_0, 0, offsetof(Swap, timeout_usec)
+EXEC_CONTEXT_CONFIG_ITEMS(Swap)m4_dnl
+CGROUP_CONTEXT_CONFIG_ITEMS(Swap)m4_dnl
+KILL_CONTEXT_CONFIG_ITEMS(Swap)m4_dnl
+m4_dnl
+Timer.OnCalendar, config_parse_timer, 0, 0
+Timer.OnActiveSec, config_parse_timer, 0, 0
+Timer.OnBootSec, config_parse_timer, 0, 0
+Timer.OnStartupSec, config_parse_timer, 0, 0
+Timer.OnUnitActiveSec, config_parse_timer, 0, 0
+Timer.OnUnitInactiveSec, config_parse_timer, 0, 0
+Timer.Persistent, config_parse_bool, 0, offsetof(Timer, persistent)
+Timer.WakeSystem, config_parse_bool, 0, offsetof(Timer, wake_system)
+Timer.RemainAfterElapse, config_parse_bool, 0, offsetof(Timer, remain_after_elapse)
+Timer.AccuracySec, config_parse_sec, 0, offsetof(Timer, accuracy_usec)
+Timer.RandomizedDelaySec, config_parse_sec, 0, offsetof(Timer, random_usec)
+Timer.Unit, config_parse_trigger_unit, 0, 0
+m4_dnl
+Path.PathExists, config_parse_path_spec, 0, 0
+Path.PathExistsGlob, config_parse_path_spec, 0, 0
+Path.PathChanged, config_parse_path_spec, 0, 0
+Path.PathModified, config_parse_path_spec, 0, 0
+Path.DirectoryNotEmpty, config_parse_path_spec, 0, 0
+Path.Unit, config_parse_trigger_unit, 0, 0
+Path.MakeDirectory, config_parse_bool, 0, offsetof(Path, make_directory)
+Path.DirectoryMode, config_parse_mode, 0, offsetof(Path, directory_mode)
+m4_dnl
+CGROUP_CONTEXT_CONFIG_ITEMS(Slice)m4_dnl
+m4_dnl
+CGROUP_CONTEXT_CONFIG_ITEMS(Scope)m4_dnl
+KILL_CONTEXT_CONFIG_ITEMS(Scope)m4_dnl
+Scope.TimeoutStopSec, config_parse_sec, 0, offsetof(Scope, timeout_stop_usec)
+m4_dnl The [Install] section is ignored here.
+Install.Alias, NULL, 0, 0
+Install.WantedBy, NULL, 0, 0
+Install.RequiredBy, NULL, 0, 0
+Install.Also, NULL, 0, 0
+Install.DefaultInstance, NULL, 0, 0
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
new file mode 100644
index 0000000..44f00a3
--- /dev/null
+++ b/src/core/load-fragment.c
@@ -0,0 +1,4828 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2012 Holger Hans Peter Freyther
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <linux/oom.h>
+#if HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+#include <sched.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "all-units.h"
+#include "bus-error.h"
+#include "bus-internal.h"
+#include "bus-util.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "cgroup.h"
+#include "conf-parser.h"
+#include "cpu-set-util.h"
+#include "env-util.h"
+#include "errno-list.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "io-util.h"
+#include "ioprio.h"
+#include "ip-protocol-list.h"
+#include "journal-util.h"
+#include "load-fragment.h"
+#include "log.h"
+#include "missing.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "securebits-util.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit-printf.h"
+#include "user-util.h"
+#include "web-util.h"
+
+static int parse_socket_protocol(const char *s) {
+ int r;
+
+ r = parse_ip_protocol(s);
+ if (r < 0)
+ return r;
+ if (!IN_SET(r, IPPROTO_UDPLITE, IPPROTO_SCTP))
+ return -EPROTONOSUPPORT;
+
+ return r;
+}
+
+DEFINE_CONFIG_PARSE(config_parse_socket_protocol, parse_socket_protocol, "Failed to parse socket protocol");
+DEFINE_CONFIG_PARSE(config_parse_exec_secure_bits, secure_bits_from_string, "Failed to parse secure bits");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_collect_mode, collect_mode, CollectMode, "Failed to parse garbage collection mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_keyring_mode, exec_keyring_mode, ExecKeyringMode, "Failed to parse keyring mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode, "Failed to parse utmp mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_job_mode, job_mode, JobMode, "Failed to parse job mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_kill_mode, kill_mode, KillMode, "Failed to parse kill mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess, "Failed to parse notify access specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_home, protect_home, ProtectHome, "Failed to parse protect home value");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_system, protect_system, ProtectSystem, "Failed to parse protect system value");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_runtime_preserve_mode, exec_preserve_mode, ExecPreserveMode, "Failed to parse runtime directory preserve mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_service_type, service_type, ServiceType, "Failed to parse service type");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_service_restart, service_restart, ServiceRestart, "Failed to parse service restart specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_socket_bind, socket_address_bind_ipv6_only_or_bool, SocketAddressBindIPv6Only, "Failed to parse bind IPv6 only value");
+DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_ip_tos, ip_tos, int, -1, "Failed to parse IP TOS value");
+DEFINE_CONFIG_PARSE_PTR(config_parse_blockio_weight, cg_blkio_weight_parse, uint64_t, "Invalid block IO weight");
+DEFINE_CONFIG_PARSE_PTR(config_parse_cg_weight, cg_weight_parse, uint64_t, "Invalid weight");
+DEFINE_CONFIG_PARSE_PTR(config_parse_cpu_shares, cg_cpu_shares_parse, uint64_t, "Invalid CPU shares");
+DEFINE_CONFIG_PARSE_PTR(config_parse_exec_mount_flags, mount_propagation_flags_from_string, unsigned long, "Failed to parse mount flag");
+
+int config_parse_unit_deps(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitDependency d = ltype;
+ Unit *u = userdata;
+ const char *p;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+ int r;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_RETAIN_ESCAPE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
+
+ r = unit_name_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", word);
+ continue;
+ }
+
+ r = unit_add_dependency_by_name(u, d, k, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add dependency on %s, ignoring: %m", k);
+ }
+
+ return 0;
+}
+
+int config_parse_obsolete_unit_deps(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Unit dependency type %s= is obsolete, replacing by %s=, please update your unit file", lvalue, unit_dependency_to_string(ltype));
+
+ return config_parse_unit_deps(unit, filename, line, section, section_line, lvalue, ltype, rvalue, data, userdata);
+}
+
+int config_parse_unit_string_printf(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *k = NULL;
+ Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ return config_parse_string(unit, filename, line, section, section_line, lvalue, ltype, k, data, userdata);
+}
+
+int config_parse_unit_strv_printf(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Unit *u = userdata;
+ _cleanup_free_ char *k = NULL;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ return config_parse_strv(unit, filename, line, section, section_line, lvalue, ltype, k, data, userdata);
+}
+
+int config_parse_unit_path_printf(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *k = NULL;
+ Unit *u = userdata;
+ int r;
+ bool fatal = ltype;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ /* Let's not bother with anything that is too long */
+ if (strlen(rvalue) >= PATH_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "%s value too long%s.",
+ lvalue, fatal ? "" : ", ignoring");
+ return fatal ? -ENAMETOOLONG : 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in '%s'%s: %m",
+ rvalue, fatal ? "" : ", ignoring");
+ return fatal ? -ENOEXEC : 0;
+ }
+
+ return config_parse_path(unit, filename, line, section, section_line, lvalue, ltype, k, data, userdata);
+}
+
+int config_parse_unit_path_strv_printf(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***x = data;
+ Unit *u = userdata;
+ int r;
+ const char *p;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ *x = strv_free(*x);
+ return 0;
+ }
+
+ for (p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", word);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(k, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ r = strv_push(x, k);
+ if (r < 0)
+ return log_oom();
+ k = NULL;
+ }
+}
+
+int config_parse_socket_listen(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ SocketPort *p = NULL;
+ SocketPort *tail;
+ Socket *s;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ s = SOCKET(data);
+
+ if (isempty(rvalue)) {
+ /* An empty assignment removes all ports */
+ socket_free_ports(s);
+ return 0;
+ }
+
+ p = new0(SocketPort, 1);
+ if (!p)
+ return log_oom();
+
+ if (ltype != SOCKET_SOCKET) {
+ _cleanup_free_ char *k = NULL;
+
+ r = unit_full_printf(UNIT(s), rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(k, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ free_and_replace(p->path, k);
+ p->type = ltype;
+
+ } else if (streq(lvalue, "ListenNetlink")) {
+ _cleanup_free_ char *k = NULL;
+
+ r = unit_full_printf(UNIT(s), rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = socket_address_parse_netlink(&p->address, k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse address value in '%s', ignoring: %m", k);
+ return 0;
+ }
+
+ p->type = SOCKET_SOCKET;
+
+ } else {
+ _cleanup_free_ char *k = NULL;
+
+ r = unit_full_printf(UNIT(s), rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = socket_address_parse_and_warn(&p->address, k);
+ if (r < 0) {
+ if (r != -EAFNOSUPPORT)
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse address value in '%s', ignoring: %m", k);
+ return 0;
+ }
+
+ if (streq(lvalue, "ListenStream"))
+ p->address.type = SOCK_STREAM;
+ else if (streq(lvalue, "ListenDatagram"))
+ p->address.type = SOCK_DGRAM;
+ else {
+ assert(streq(lvalue, "ListenSequentialPacket"));
+ p->address.type = SOCK_SEQPACKET;
+ }
+
+ if (socket_address_family(&p->address) != AF_LOCAL && p->address.type == SOCK_SEQPACKET) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Address family not supported, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ p->type = SOCKET_SOCKET;
+ }
+
+ p->fd = -1;
+ p->auxiliary_fds = NULL;
+ p->n_auxiliary_fds = 0;
+ p->socket = s;
+
+ LIST_FIND_TAIL(port, s->ports, tail);
+ LIST_INSERT_AFTER(port, s->ports, tail, p);
+
+ p = NULL;
+
+ return 0;
+}
+
+int config_parse_exec_nice(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int priority, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->nice_set = false;
+ return 0;
+ }
+
+ r = parse_nice(rvalue, &priority);
+ if (r < 0) {
+ if (r == -ERANGE)
+ log_syntax(unit, LOG_ERR, filename, line, r, "Nice priority out of range, ignoring: %s", rvalue);
+ else
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse nice priority '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ c->nice = priority;
+ c->nice_set = true;
+
+ return 0;
+}
+
+int config_parse_exec_oom_score_adjust(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int oa, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->oom_score_adjust_set = false;
+ return 0;
+ }
+
+ r = parse_oom_score_adjust(rvalue, &oa);
+ if (r < 0) {
+ if (r == -ERANGE)
+ log_syntax(unit, LOG_ERR, filename, line, r, "OOM score adjust value out of range, ignoring: %s", rvalue);
+ else
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse the OOM score adjust value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ c->oom_score_adjust = oa;
+ c->oom_score_adjust_set = true;
+
+ return 0;
+}
+
+int config_parse_exec(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecCommand **e = data;
+ Unit *u = userdata;
+ const char *p;
+ bool semicolon;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(e);
+
+ e += ltype;
+ rvalue += strspn(rvalue, WHITESPACE);
+
+ if (isempty(rvalue)) {
+ /* An empty assignment resets the list */
+ *e = exec_command_free_list(*e);
+ return 0;
+ }
+
+ p = rvalue;
+ do {
+ _cleanup_free_ char *path = NULL, *firstword = NULL;
+ ExecCommandFlags flags = 0;
+ bool ignore = false, separate_argv0 = false;
+ _cleanup_free_ ExecCommand *nce = NULL;
+ _cleanup_strv_free_ char **n = NULL;
+ size_t nlen = 0, nbufsize = 0;
+ const char *f;
+
+ semicolon = false;
+
+ r = extract_first_word_and_warn(&p, &firstword, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE, unit, filename, line, rvalue);
+ if (r <= 0)
+ return 0;
+
+ f = firstword;
+ for (;;) {
+ /* We accept an absolute path as first argument. If it's prefixed with - and the path doesn't
+ * exist, we ignore it instead of erroring out; if it's prefixed with @, we allow overriding of
+ * argv[0]; if it's prefixed with +, it will be run with full privileges and no sandboxing; if
+ * it's prefixed with '!' we apply sandboxing, but do not change user/group credentials; if
+ * it's prefixed with '!!', then we apply user/group credentials if the kernel supports ambient
+ * capabilities -- if it doesn't we don't apply the credentials themselves, but do apply most
+ * other sandboxing, with some special exceptions for changing UID.
+ *
+ * The idea is that '!!' may be used to write services that can take benefit of systemd's
+ * UID/GID dropping if the kernel supports ambient creds, but provide an automatic fallback to
+ * privilege dropping within the daemon if the kernel does not offer that. */
+
+ if (*f == '-' && !(flags & EXEC_COMMAND_IGNORE_FAILURE)) {
+ flags |= EXEC_COMMAND_IGNORE_FAILURE;
+ ignore = true;
+ } else if (*f == '@' && !separate_argv0)
+ separate_argv0 = true;
+ else if (*f == '+' && !(flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID|EXEC_COMMAND_AMBIENT_MAGIC)))
+ flags |= EXEC_COMMAND_FULLY_PRIVILEGED;
+ else if (*f == '!' && !(flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID|EXEC_COMMAND_AMBIENT_MAGIC)))
+ flags |= EXEC_COMMAND_NO_SETUID;
+ else if (*f == '!' && !(flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_AMBIENT_MAGIC))) {
+ flags &= ~EXEC_COMMAND_NO_SETUID;
+ flags |= EXEC_COMMAND_AMBIENT_MAGIC;
+ } else
+ break;
+ f++;
+ }
+
+ r = unit_full_printf(u, f, &path);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in '%s'%s: %m",
+ f, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ if (isempty(path)) {
+ /* First word is either "-" or "@" with no command. */
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Empty path in command line%s: '%s'",
+ ignore ? ", ignoring" : "", rvalue);
+ return ignore ? 0 : -ENOEXEC;
+ }
+ if (!string_is_safe(path)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Executable name contains special characters%s: %s",
+ ignore ? ", ignoring" : "", path);
+ return ignore ? 0 : -ENOEXEC;
+ }
+ if (endswith(path, "/")) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Executable path specifies a directory%s: %s",
+ ignore ? ", ignoring" : "", path);
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ if (!path_is_absolute(path)) {
+ const char *prefix;
+ bool found = false;
+
+ if (!filename_is_valid(path)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Neither a valid executable name nor an absolute path%s: %s",
+ ignore ? ", ignoring" : "", path);
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ /* Resolve a single-component name to a full path */
+ NULSTR_FOREACH(prefix, DEFAULT_PATH_NULSTR) {
+ _cleanup_free_ char *fullpath = NULL;
+
+ fullpath = strjoin(prefix, "/", path);
+ if (!fullpath)
+ return log_oom();
+
+ if (access(fullpath, F_OK) >= 0) {
+ free_and_replace(path, fullpath);
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Executable \"%s\" not found in path \"%s\"%s",
+ path, DEFAULT_PATH, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -ENOEXEC;
+ }
+ }
+
+ if (!separate_argv0) {
+ char *w = NULL;
+
+ if (!GREEDY_REALLOC(n, nbufsize, nlen + 2))
+ return log_oom();
+
+ w = strdup(path);
+ if (!w)
+ return log_oom();
+ n[nlen++] = w;
+ n[nlen] = NULL;
+ }
+
+ path_simplify(path, false);
+
+ while (!isempty(p)) {
+ _cleanup_free_ char *word = NULL, *resolved = NULL;
+
+ /* Check explicitly for an unquoted semicolon as
+ * command separator token. */
+ if (p[0] == ';' && (!p[1] || strchr(WHITESPACE, p[1]))) {
+ p++;
+ p += strspn(p, WHITESPACE);
+ semicolon = true;
+ break;
+ }
+
+ /* Check for \; explicitly, to not confuse it with \\; or "\;" or "\\;" etc.
+ * extract_first_word() would return the same for all of those. */
+ if (p[0] == '\\' && p[1] == ';' && (!p[2] || strchr(WHITESPACE, p[2]))) {
+ char *w;
+
+ p += 2;
+ p += strspn(p, WHITESPACE);
+
+ if (!GREEDY_REALLOC(n, nbufsize, nlen + 2))
+ return log_oom();
+
+ w = strdup(";");
+ if (!w)
+ return log_oom();
+ n[nlen++] = w;
+ n[nlen] = NULL;
+ continue;
+ }
+
+ r = extract_first_word_and_warn(&p, &word, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE, unit, filename, line, rvalue);
+ if (r == 0)
+ break;
+ if (r < 0)
+ return ignore ? 0 : -ENOEXEC;
+
+ r = unit_full_printf(u, word, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in %s%s: %m",
+ word, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ if (!GREEDY_REALLOC(n, nbufsize, nlen + 2))
+ return log_oom();
+
+ n[nlen++] = TAKE_PTR(resolved);
+ n[nlen] = NULL;
+ }
+
+ if (!n || !n[0]) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Empty executable name or zeroeth argument%s: %s",
+ ignore ? ", ignoring" : "", rvalue);
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ nce = new0(ExecCommand, 1);
+ if (!nce)
+ return log_oom();
+
+ nce->argv = TAKE_PTR(n);
+ nce->path = TAKE_PTR(path);
+ nce->flags = flags;
+
+ exec_command_append_list(e, nce);
+
+ /* Do not _cleanup_free_ these. */
+ nce = NULL;
+
+ rvalue = p;
+ } while (semicolon);
+
+ return 0;
+}
+
+int config_parse_socket_bindtodevice(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Socket *s = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue) || streq(rvalue, "*")) {
+ s->bind_to_device = mfree(s->bind_to_device);
+ return 0;
+ }
+
+ if (!ifname_valid(rvalue)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid interface name, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (free_and_strdup(&s->bind_to_device, rvalue) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_exec_input(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ Unit *u = userdata;
+ const char *n;
+ ExecInput ei;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(rvalue);
+
+ n = startswith(rvalue, "fd:");
+ if (n) {
+ _cleanup_free_ char *resolved = NULL;
+
+ r = unit_full_printf(u, n, &resolved);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s': %m", n);
+
+ if (isempty(resolved))
+ resolved = mfree(resolved);
+ else if (!fdname_is_valid(resolved)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name: %s", resolved);
+ return -ENOEXEC;
+ }
+
+ free_and_replace(c->stdio_fdname[STDIN_FILENO], resolved);
+
+ ei = EXEC_INPUT_NAMED_FD;
+
+ } else if ((n = startswith(rvalue, "file:"))) {
+ _cleanup_free_ char *resolved = NULL;
+
+ r = unit_full_printf(u, n, &resolved);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s': %m", n);
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE | PATH_CHECK_FATAL, unit, filename, line, lvalue);
+ if (r < 0)
+ return -ENOEXEC;
+
+ free_and_replace(c->stdio_file[STDIN_FILENO], resolved);
+
+ ei = EXEC_INPUT_FILE;
+
+ } else {
+ ei = exec_input_from_string(rvalue);
+ if (ei < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse input specifier, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ c->std_input = ei;
+ return 0;
+}
+
+int config_parse_exec_input_text(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *unescaped = NULL, *resolved = NULL;
+ ExecContext *c = data;
+ Unit *u = userdata;
+ size_t sz;
+ void *p;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ /* Reset if the empty string is assigned */
+ c->stdin_data = mfree(c->stdin_data);
+ c->stdin_data_size = 0;
+ return 0;
+ }
+
+ r = cunescape(rvalue, 0, &unescaped);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to decode C escaped text '%s': %m", rvalue);
+
+ r = unit_full_printf(u, unescaped, &resolved);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s': %m", unescaped);
+
+ sz = strlen(resolved);
+ if (c->stdin_data_size + sz + 1 < c->stdin_data_size || /* check for overflow */
+ c->stdin_data_size + sz + 1 > EXEC_STDIN_DATA_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Standard input data too large (%zu), maximum of %zu permitted, ignoring.", c->stdin_data_size + sz, (size_t) EXEC_STDIN_DATA_MAX);
+ return -E2BIG;
+ }
+
+ p = realloc(c->stdin_data, c->stdin_data_size + sz + 1);
+ if (!p)
+ return log_oom();
+
+ *((char*) mempcpy((char*) p + c->stdin_data_size, resolved, sz)) = '\n';
+
+ c->stdin_data = p;
+ c->stdin_data_size += sz + 1;
+
+ return 0;
+}
+
+int config_parse_exec_input_data(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ void *p = NULL;
+ ExecContext *c = data;
+ size_t sz;
+ void *q;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ /* Reset if the empty string is assigned */
+ c->stdin_data = mfree(c->stdin_data);
+ c->stdin_data_size = 0;
+ return 0;
+ }
+
+ r = unbase64mem(rvalue, (size_t) -1, &p, &sz);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to decode base64 data, ignoring: %s", rvalue);
+
+ assert(sz > 0);
+
+ if (c->stdin_data_size + sz < c->stdin_data_size || /* check for overflow */
+ c->stdin_data_size + sz > EXEC_STDIN_DATA_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Standard input data too large (%zu), maximum of %zu permitted, ignoring.", c->stdin_data_size + sz, (size_t) EXEC_STDIN_DATA_MAX);
+ return -E2BIG;
+ }
+
+ q = realloc(c->stdin_data, c->stdin_data_size + sz);
+ if (!q)
+ return log_oom();
+
+ memcpy((uint8_t*) q + c->stdin_data_size, p, sz);
+
+ c->stdin_data = q;
+ c->stdin_data_size += sz;
+
+ return 0;
+}
+
+int config_parse_exec_output(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *resolved = NULL;
+ const char *n;
+ ExecContext *c = data;
+ Unit *u = userdata;
+ ExecOutput eo;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(lvalue);
+ assert(rvalue);
+
+ n = startswith(rvalue, "fd:");
+ if (n) {
+ r = unit_full_printf(u, n, &resolved);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s: %m", n);
+
+ if (isempty(resolved))
+ resolved = mfree(resolved);
+ else if (!fdname_is_valid(resolved)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name: %s", resolved);
+ return -ENOEXEC;
+ }
+
+ eo = EXEC_OUTPUT_NAMED_FD;
+
+ } else if ((n = startswith(rvalue, "file:"))) {
+
+ r = unit_full_printf(u, n, &resolved);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s: %m", n);
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE | PATH_CHECK_FATAL, unit, filename, line, lvalue);
+ if (r < 0)
+ return -ENOEXEC;
+
+ eo = EXEC_OUTPUT_FILE;
+
+ } else if ((n = startswith(rvalue, "append:"))) {
+
+ r = unit_full_printf(u, n, &resolved);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s: %m", n);
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE | PATH_CHECK_FATAL, unit, filename, line, lvalue);
+ if (r < 0)
+ return -ENOEXEC;
+
+ eo = EXEC_OUTPUT_FILE_APPEND;
+ } else {
+ eo = exec_output_from_string(rvalue);
+ if (eo < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output specifier, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ if (streq(lvalue, "StandardOutput")) {
+ if (eo == EXEC_OUTPUT_NAMED_FD)
+ free_and_replace(c->stdio_fdname[STDOUT_FILENO], resolved);
+ else
+ free_and_replace(c->stdio_file[STDOUT_FILENO], resolved);
+
+ c->std_output = eo;
+
+ } else {
+ assert(streq(lvalue, "StandardError"));
+
+ if (eo == EXEC_OUTPUT_NAMED_FD)
+ free_and_replace(c->stdio_fdname[STDERR_FILENO], resolved);
+ else
+ free_and_replace(c->stdio_file[STDERR_FILENO], resolved);
+
+ c->std_error = eo;
+ }
+
+ return 0;
+}
+
+int config_parse_exec_io_class(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int x;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->ioprio_set = false;
+ c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
+ return 0;
+ }
+
+ x = ioprio_class_from_string(rvalue);
+ if (x < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse IO scheduling class, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ c->ioprio = IOPRIO_PRIO_VALUE(x, IOPRIO_PRIO_DATA(c->ioprio));
+ c->ioprio_set = true;
+
+ return 0;
+}
+
+int config_parse_exec_io_priority(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int i, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->ioprio_set = false;
+ c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
+ return 0;
+ }
+
+ r = ioprio_parse_priority(rvalue, &i);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse IO priority, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_PRIO_CLASS(c->ioprio), i);
+ c->ioprio_set = true;
+
+ return 0;
+}
+
+int config_parse_exec_cpu_sched_policy(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int x;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->cpu_sched_set = false;
+ c->cpu_sched_policy = SCHED_OTHER;
+ c->cpu_sched_priority = 0;
+ return 0;
+ }
+
+ x = sched_policy_from_string(rvalue);
+ if (x < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse CPU scheduling policy, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ c->cpu_sched_policy = x;
+ /* Moving to or from real-time policy? We need to adjust the priority */
+ c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min(x), sched_get_priority_max(x));
+ c->cpu_sched_set = true;
+
+ return 0;
+}
+
+int config_parse_exec_cpu_sched_prio(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int i, min, max, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atoi(rvalue, &i);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CPU scheduling priority, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ /* On Linux RR/FIFO range from 1 to 99 and OTHER/BATCH may only be 0 */
+ min = sched_get_priority_min(c->cpu_sched_policy);
+ max = sched_get_priority_max(c->cpu_sched_policy);
+
+ if (i < min || i > max) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "CPU scheduling priority is out of range, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ c->cpu_sched_priority = i;
+ c->cpu_sched_set = true;
+
+ return 0;
+}
+
+int config_parse_exec_cpu_affinity(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ _cleanup_cpu_free_ cpu_set_t *cpuset = NULL;
+ int ncpus;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ ncpus = parse_cpu_set_and_warn(rvalue, &cpuset, unit, filename, line, lvalue);
+ if (ncpus < 0)
+ return ncpus;
+
+ if (ncpus == 0) {
+ /* An empty assignment resets the CPU list */
+ c->cpuset = cpu_set_mfree(c->cpuset);
+ c->cpuset_ncpus = 0;
+ return 0;
+ }
+
+ if (!c->cpuset) {
+ c->cpuset = TAKE_PTR(cpuset);
+ c->cpuset_ncpus = (unsigned) ncpus;
+ return 0;
+ }
+
+ if (c->cpuset_ncpus < (unsigned) ncpus) {
+ CPU_OR_S(CPU_ALLOC_SIZE(c->cpuset_ncpus), cpuset, c->cpuset, cpuset);
+ CPU_FREE(c->cpuset);
+ c->cpuset = TAKE_PTR(cpuset);
+ c->cpuset_ncpus = (unsigned) ncpus;
+ return 0;
+ }
+
+ CPU_OR_S(CPU_ALLOC_SIZE((unsigned) ncpus), c->cpuset, c->cpuset, cpuset);
+
+ return 0;
+}
+
+int config_parse_capability_set(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *capability_set = data;
+ uint64_t sum = 0, initial = 0;
+ bool invert = false;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (rvalue[0] == '~') {
+ invert = true;
+ rvalue++;
+ }
+
+ if (streq(lvalue, "CapabilityBoundingSet"))
+ initial = CAP_ALL; /* initialized to all bits on */
+ /* else "AmbientCapabilities" initialized to all bits off */
+
+ r = capability_set_from_string(rvalue, &sum);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s= specifier '%s', ignoring: %m", lvalue, rvalue);
+ return 0;
+ }
+
+ if (sum == 0 || *capability_set == initial)
+ /* "", "~" or uninitialized data -> replace */
+ *capability_set = invert ? ~sum : sum;
+ else {
+ /* previous data -> merge */
+ if (invert)
+ *capability_set &= ~sum;
+ else
+ *capability_set |= sum;
+ }
+
+ return 0;
+}
+
+int config_parse_exec_selinux_context(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ Unit *u = userdata;
+ bool ignore;
+ char *k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->selinux_context = mfree(c->selinux_context);
+ c->selinux_context_ignore = false;
+ return 0;
+ }
+
+ if (rvalue[0] == '-') {
+ ignore = true;
+ rvalue++;
+ } else
+ ignore = false;
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in '%s'%s: %m",
+ rvalue, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ free_and_replace(c->selinux_context, k);
+ c->selinux_context_ignore = ignore;
+
+ return 0;
+}
+
+int config_parse_exec_apparmor_profile(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ Unit *u = userdata;
+ bool ignore;
+ char *k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->apparmor_profile = mfree(c->apparmor_profile);
+ c->apparmor_profile_ignore = false;
+ return 0;
+ }
+
+ if (rvalue[0] == '-') {
+ ignore = true;
+ rvalue++;
+ } else
+ ignore = false;
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in '%s'%s: %m",
+ rvalue, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ free_and_replace(c->apparmor_profile, k);
+ c->apparmor_profile_ignore = ignore;
+
+ return 0;
+}
+
+int config_parse_exec_smack_process_label(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ Unit *u = userdata;
+ bool ignore;
+ char *k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->smack_process_label = mfree(c->smack_process_label);
+ c->smack_process_label_ignore = false;
+ return 0;
+ }
+
+ if (rvalue[0] == '-') {
+ ignore = true;
+ rvalue++;
+ } else
+ ignore = false;
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in '%s'%s: %m",
+ rvalue, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ free_and_replace(c->smack_process_label, k);
+ c->smack_process_label_ignore = ignore;
+
+ return 0;
+}
+
+int config_parse_timer(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Timer *t = data;
+ usec_t usec = 0;
+ TimerValue *v;
+ TimerBase b;
+ _cleanup_(calendar_spec_freep) CalendarSpec *c = NULL;
+ Unit *u = userdata;
+ _cleanup_free_ char *k = NULL;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets list */
+ timer_free_values(t);
+ return 0;
+ }
+
+ b = timer_base_from_string(lvalue);
+ if (b < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer base, ignoring: %s", lvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (b == TIMER_CALENDAR) {
+ if (calendar_spec_from_string(k, &c) < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", k);
+ return 0;
+ }
+ } else
+ if (parse_sec(k, &usec) < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", k);
+ return 0;
+ }
+
+ v = new0(TimerValue, 1);
+ if (!v)
+ return log_oom();
+
+ v->base = b;
+ v->value = usec;
+ v->calendar_spec = TAKE_PTR(c);
+
+ LIST_PREPEND(value, t->values, v);
+
+ return 0;
+}
+
+int config_parse_trigger_unit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *p = NULL;
+ Unit *u = data;
+ UnitType type;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (!hashmap_isempty(u->dependencies[UNIT_TRIGGERS])) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Multiple units to trigger specified, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_name_printf(u, rvalue, &p);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ type = unit_name_to_type(p);
+ if (type < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Unit type not valid, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (unit_has_name(u, p)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Units cannot trigger themselves, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_add_two_dependencies_by_name(u, UNIT_BEFORE, UNIT_TRIGGERS, p, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add trigger on %s, ignoring: %m", p);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_path_spec(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Path *p = data;
+ PathSpec *s;
+ PathType b;
+ _cleanup_free_ char *k = NULL;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment clears list */
+ path_free_specs(p);
+ return 0;
+ }
+
+ b = path_type_from_string(lvalue);
+ if (b < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse path type, ignoring: %s", lvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(UNIT(p), rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(k, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ s = new0(PathSpec, 1);
+ if (!s)
+ return log_oom();
+
+ s->unit = UNIT(p);
+ s->path = TAKE_PTR(k);
+ s->type = b;
+ s->inotify_fd = -1;
+
+ LIST_PREPEND(spec, p->specs, s);
+
+ return 0;
+}
+
+int config_parse_socket_service(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *p = NULL;
+ Socket *s = data;
+ Unit *x;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = unit_name_printf(UNIT(s), rvalue, &p);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s: %m", rvalue);
+ return -ENOEXEC;
+ }
+
+ if (!endswith(p, ".service")) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Unit must be of type service: %s", rvalue);
+ return -ENOEXEC;
+ }
+
+ r = manager_load_unit(UNIT(s)->manager, p, NULL, &error, &x);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to load unit %s: %s", rvalue, bus_error_message(&error, r));
+ return -ENOEXEC;
+ }
+
+ unit_ref_set(&s->service, UNIT(s), x);
+
+ return 0;
+}
+
+int config_parse_fdname(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *p = NULL;
+ Socket *s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ s->fdname = mfree(s->fdname);
+ return 0;
+ }
+
+ r = unit_full_printf(UNIT(s), rvalue, &p);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (!fdname_is_valid(p)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", p);
+ return 0;
+ }
+
+ return free_and_replace(s->fdname, p);
+}
+
+int config_parse_service_sockets(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Service *s = data;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Trailing garbage in sockets, ignoring: %s", rvalue);
+ break;
+ }
+
+ r = unit_name_printf(UNIT(s), word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", word);
+ continue;
+ }
+
+ if (!endswith(k, ".socket")) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Unit must be of type socket, ignoring: %s", k);
+ continue;
+ }
+
+ r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_WANTS, UNIT_AFTER, k, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add dependency on %s, ignoring: %m", k);
+
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_TRIGGERED_BY, k, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add dependency on %s, ignoring: %m", k);
+ }
+
+ return 0;
+}
+
+int config_parse_bus_name(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *k = NULL;
+ Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (!service_name_is_valid(k)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid bus name, ignoring: %s", k);
+ return 0;
+ }
+
+ return config_parse_string(unit, filename, line, section, section_line, lvalue, ltype, k, data, userdata);
+}
+
+int config_parse_service_timeout(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Service *s = userdata;
+ usec_t usec;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(s);
+
+ /* This is called for two cases: TimeoutSec= and TimeoutStartSec=. */
+
+ /* Traditionally, these options accepted 0 to disable the timeouts. However, a timeout of 0 suggests it happens
+ * immediately, hence fix this to become USEC_INFINITY instead. This is in-line with how we internally handle
+ * all other timeouts. */
+ r = parse_sec_fix_0(rvalue, &usec);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s= parameter, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ s->start_timeout_defined = true;
+ s->timeout_start_usec = usec;
+
+ if (streq(lvalue, "TimeoutSec"))
+ s->timeout_stop_usec = usec;
+
+ return 0;
+}
+
+int config_parse_sec_fix_0(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ usec_t *usec = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(usec);
+
+ /* This is pretty much like config_parse_sec(), except that this treats a time of 0 as infinity, for
+ * compatibility with older versions of systemd where 0 instead of infinity was used as indicator to turn off a
+ * timeout. */
+
+ r = parse_sec_fix_0(rvalue, usec);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s= parameter, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_user_group(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *k = NULL;
+ char **user = data;
+ Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ *user = mfree(*user);
+ return 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s: %m", rvalue);
+ return -ENOEXEC;
+ }
+
+ if (!valid_user_group_name_or_id(k)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID: %s", k);
+ return -ENOEXEC;
+ }
+
+ return free_and_replace(*user, k);
+}
+
+int config_parse_user_group_strv(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***users = data;
+ Unit *u = userdata;
+ const char *p = rvalue;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ *users = strv_free(*users);
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax: %s", rvalue);
+ return -ENOEXEC;
+ }
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s: %m", word);
+ return -ENOEXEC;
+ }
+
+ if (!valid_user_group_name_or_id(k)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID: %s", k);
+ return -ENOEXEC;
+ }
+
+ r = strv_push(users, k);
+ if (r < 0)
+ return log_oom();
+
+ k = NULL;
+ }
+
+ return 0;
+}
+
+int config_parse_working_directory(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ Unit *u = userdata;
+ bool missing_ok;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(c);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ c->working_directory_home = false;
+ c->working_directory = mfree(c->working_directory);
+ return 0;
+ }
+
+ if (rvalue[0] == '-') {
+ missing_ok = true;
+ rvalue++;
+ } else
+ missing_ok = false;
+
+ if (streq(rvalue, "~")) {
+ c->working_directory_home = true;
+ c->working_directory = mfree(c->working_directory);
+ } else {
+ _cleanup_free_ char *k = NULL;
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in working directory path '%s'%s: %m",
+ rvalue, missing_ok ? ", ignoring" : "");
+ return missing_ok ? 0 : -ENOEXEC;
+ }
+
+ r = path_simplify_and_warn(k, PATH_CHECK_ABSOLUTE | (missing_ok ? 0 : PATH_CHECK_FATAL), unit, filename, line, lvalue);
+ if (r < 0)
+ return missing_ok ? 0 : -ENOEXEC;
+
+ c->working_directory_home = false;
+ free_and_replace(c->working_directory, k);
+ }
+
+ c->working_directory_missing_ok = missing_ok;
+ return 0;
+}
+
+int config_parse_unit_env_file(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***env = data;
+ Unit *u = userdata;
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment frees the list */
+ *env = strv_free(*env);
+ return 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &n);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(n[0] == '-' ? n + 1 : n, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ r = strv_push(env, n);
+ if (r < 0)
+ return log_oom();
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_environ(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Unit *u = userdata;
+ char ***env = data;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *env = strv_free(*env);
+ return 0;
+ }
+
+ for (p = rvalue;; ) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_CUNESCAPE|EXTRACT_QUOTES);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (u) {
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in %s, ignoring: %m", word);
+ continue;
+ }
+ } else
+ k = TAKE_PTR(word);
+
+ if (!env_assignment_is_valid(k)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Invalid environment assignment, ignoring: %s", k);
+ continue;
+ }
+
+ r = strv_env_replace(env, k);
+ if (r < 0)
+ return log_oom();
+
+ k = NULL;
+ }
+}
+
+int config_parse_pass_environ(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_strv_free_ char **n = NULL;
+ size_t nlen = 0, nbufsize = 0;
+ char*** passenv = data;
+ const char *p = rvalue;
+ Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *passenv = strv_free(*passenv);
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Trailing garbage in %s, ignoring: %s", lvalue, rvalue);
+ break;
+ }
+
+ if (u) {
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolve specifiers in %s, ignoring: %m", word);
+ continue;
+ }
+ } else
+ k = TAKE_PTR(word);
+
+ if (!env_name_is_valid(k)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Invalid environment name for %s, ignoring: %s", lvalue, k);
+ continue;
+ }
+
+ if (!GREEDY_REALLOC(n, nbufsize, nlen + 2))
+ return log_oom();
+
+ n[nlen++] = TAKE_PTR(k);
+ n[nlen] = NULL;
+ }
+
+ if (n) {
+ r = strv_extend_strv(passenv, n, true);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int config_parse_unset_environ(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_strv_free_ char **n = NULL;
+ size_t nlen = 0, nbufsize = 0;
+ char*** unsetenv = data;
+ const char *p = rvalue;
+ Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *unsetenv = strv_free(*unsetenv);
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_CUNESCAPE|EXTRACT_QUOTES);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Trailing garbage in %s, ignoring: %s", lvalue, rvalue);
+ break;
+ }
+
+ if (u) {
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in %s, ignoring: %m", word);
+ continue;
+ }
+ } else
+ k = TAKE_PTR(word);
+
+ if (!env_assignment_is_valid(k) && !env_name_is_valid(k)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Invalid environment name or assignment %s, ignoring: %s", lvalue, k);
+ continue;
+ }
+
+ if (!GREEDY_REALLOC(n, nbufsize, nlen + 2))
+ return log_oom();
+
+ n[nlen++] = TAKE_PTR(k);
+ n[nlen] = NULL;
+ }
+
+ if (n) {
+ r = strv_extend_strv(unsetenv, n, true);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int config_parse_log_extra_fields(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ Unit *u = userdata;
+ const char *p = rvalue;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(c);
+
+ if (isempty(rvalue)) {
+ exec_context_free_log_extra_fields(c);
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+ struct iovec *t;
+ const char *eq;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_CUNESCAPE|EXTRACT_QUOTES);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", word);
+ continue;
+ }
+
+ eq = strchr(k, '=');
+ if (!eq) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Log field lacks '=' character, ignoring: %s", k);
+ continue;
+ }
+
+ if (!journal_field_valid(k, eq-k, false)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Log field name is invalid, ignoring: %s", k);
+ continue;
+ }
+
+ t = reallocarray(c->log_extra_fields, c->n_log_extra_fields+1, sizeof(struct iovec));
+ if (!t)
+ return log_oom();
+
+ c->log_extra_fields = t;
+ c->log_extra_fields[c->n_log_extra_fields++] = IOVEC_MAKE_STRING(k);
+
+ k = NULL;
+ }
+}
+
+int config_parse_unit_condition_path(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *p = NULL;
+ Condition **list = data, *c;
+ ConditionType t = ltype;
+ bool trigger, negate;
+ Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *list = condition_free_list(*list);
+ return 0;
+ }
+
+ trigger = rvalue[0] == '|';
+ if (trigger)
+ rvalue++;
+
+ negate = rvalue[0] == '!';
+ if (negate)
+ rvalue++;
+
+ r = unit_full_printf(u, rvalue, &p);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(p, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ c = condition_new(t, p, trigger, negate);
+ if (!c)
+ return log_oom();
+
+ LIST_PREPEND(conditions, *list, c);
+ return 0;
+}
+
+int config_parse_unit_condition_string(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *s = NULL;
+ Condition **list = data, *c;
+ ConditionType t = ltype;
+ bool trigger, negate;
+ Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *list = condition_free_list(*list);
+ return 0;
+ }
+
+ trigger = rvalue[0] == '|';
+ if (trigger)
+ rvalue++;
+
+ negate = rvalue[0] == '!';
+ if (negate)
+ rvalue++;
+
+ r = unit_full_printf(u, rvalue, &s);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ c = condition_new(t, s, trigger, negate);
+ if (!c)
+ return log_oom();
+
+ LIST_PREPEND(conditions, *list, c);
+ return 0;
+}
+
+int config_parse_unit_condition_null(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Condition **list = data, *c;
+ bool trigger, negate;
+ int b;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *list = condition_free_list(*list);
+ return 0;
+ }
+
+ trigger = rvalue[0] == '|';
+ if (trigger)
+ rvalue++;
+
+ negate = rvalue[0] == '!';
+ if (negate)
+ rvalue++;
+
+ b = parse_boolean(rvalue);
+ if (b < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, b, "Failed to parse boolean value in condition, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (!b)
+ negate = !negate;
+
+ c = condition_new(CONDITION_NULL, NULL, trigger, negate);
+ if (!c)
+ return log_oom();
+
+ LIST_PREPEND(conditions, *list, c);
+ return 0;
+}
+
+int config_parse_unit_requires_mounts_for(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ const char *p = rvalue;
+ Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *resolved = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(u, word, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", word);
+ continue;
+ }
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ r = unit_require_mounts_for(u, resolved, UNIT_DEPENDENCY_FILE);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to add required mount '%s', ignoring: %m", resolved);
+ continue;
+ }
+ }
+}
+
+int config_parse_documentation(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Unit *u = userdata;
+ int r;
+ char **a, **b;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ u->documentation = strv_free(u->documentation);
+ return 0;
+ }
+
+ r = config_parse_unit_strv_printf(unit, filename, line, section, section_line, lvalue, ltype,
+ rvalue, data, userdata);
+ if (r < 0)
+ return r;
+
+ for (a = b = u->documentation; a && *a; a++) {
+
+ if (documentation_url_is_valid(*a))
+ *(b++) = *a;
+ else {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid URL, ignoring: %s", *a);
+ free(*a);
+ }
+ }
+ if (b)
+ *b = NULL;
+
+ return r;
+}
+
+#if HAVE_SECCOMP
+int config_parse_syscall_filter(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ Unit *u = userdata;
+ bool invert = false;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ c->syscall_filter = hashmap_free(c->syscall_filter);
+ c->syscall_whitelist = false;
+ return 0;
+ }
+
+ if (rvalue[0] == '~') {
+ invert = true;
+ rvalue++;
+ }
+
+ if (!c->syscall_filter) {
+ c->syscall_filter = hashmap_new(NULL);
+ if (!c->syscall_filter)
+ return log_oom();
+
+ if (invert)
+ /* Allow everything but the ones listed */
+ c->syscall_whitelist = false;
+ else {
+ /* Allow nothing but the ones listed */
+ c->syscall_whitelist = true;
+
+ /* Accept default syscalls if we are on a whitelist */
+ r = seccomp_parse_syscall_filter("@default", -1, c->syscall_filter, SECCOMP_PARSE_WHITELIST);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *name = NULL;
+ int num;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = parse_syscall_and_errno(word, &name, &num);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse syscall:errno, ignoring: %s", word);
+ continue;
+ }
+
+ r = seccomp_parse_syscall_filter_full(name, num, c->syscall_filter,
+ SECCOMP_PARSE_LOG|SECCOMP_PARSE_PERMISSIVE|(invert ? SECCOMP_PARSE_INVERT : 0)|(c->syscall_whitelist ? SECCOMP_PARSE_WHITELIST : 0),
+ unit, filename, line);
+ if (r < 0)
+ return r;
+ }
+}
+
+int config_parse_syscall_archs(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ const char *p = rvalue;
+ Set **archs = data;
+ int r;
+
+ if (isempty(rvalue)) {
+ *archs = set_free(*archs);
+ return 0;
+ }
+
+ r = set_ensure_allocated(archs, NULL);
+ if (r < 0)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ uint32_t a;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = seccomp_arch_from_string(word, &a);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse system call architecture \"%s\", ignoring: %m", word);
+ continue;
+ }
+
+ r = set_put(*archs, UINT32_TO_PTR(a + 1));
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_syscall_errno(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int e;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets to KILL */
+ c->syscall_errno = 0;
+ return 0;
+ }
+
+ e = parse_errno(rvalue);
+ if (e <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse error number, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ c->syscall_errno = e;
+ return 0;
+}
+
+int config_parse_address_families(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ bool invert = false;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ c->address_families = set_free(c->address_families);
+ c->address_families_whitelist = false;
+ return 0;
+ }
+
+ if (rvalue[0] == '~') {
+ invert = true;
+ rvalue++;
+ }
+
+ if (!c->address_families) {
+ c->address_families = set_new(NULL);
+ if (!c->address_families)
+ return log_oom();
+
+ c->address_families_whitelist = !invert;
+ }
+
+ for (p = rvalue;;) {
+ _cleanup_free_ char *word = NULL;
+ int af;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ af = af_from_name(word);
+ if (af < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, af,
+ "Failed to parse address family, ignoring: %s", word);
+ continue;
+ }
+
+ /* If we previously wanted to forbid an address family and now
+ * we want to allow it, then just remove it from the list.
+ */
+ if (!invert == c->address_families_whitelist) {
+ r = set_put(c->address_families, INT_TO_PTR(af));
+ if (r < 0)
+ return log_oom();
+ } else
+ set_remove(c->address_families, INT_TO_PTR(af));
+ }
+}
+
+int config_parse_restrict_namespaces(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ unsigned long flags;
+ bool invert = false;
+ int r;
+
+ if (isempty(rvalue)) {
+ /* Reset to the default. */
+ c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
+ return 0;
+ }
+
+ /* Boolean parameter ignores the previous settings */
+ r = parse_boolean(rvalue);
+ if (r > 0) {
+ c->restrict_namespaces = 0;
+ return 0;
+ } else if (r == 0) {
+ c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
+ return 0;
+ }
+
+ if (rvalue[0] == '~') {
+ invert = true;
+ rvalue++;
+ }
+
+ /* Not a boolean argument, in this case it's a list of namespace types. */
+ r = namespace_flags_from_string(rvalue, &flags);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse namespace type string, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (c->restrict_namespaces == NAMESPACE_FLAGS_INITIAL)
+ /* Initial assignment. Just set the value. */
+ c->restrict_namespaces = invert ? (~flags) & NAMESPACE_FLAGS_ALL : flags;
+ else
+ /* Merge the value with the previous one. */
+ SET_FLAG(c->restrict_namespaces, flags, !invert);
+
+ return 0;
+}
+#endif
+
+int config_parse_unit_slice(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *k = NULL;
+ Unit *u = userdata, *slice = NULL;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = unit_name_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = manager_load_unit(u->manager, k, NULL, &error, &slice);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to load slice unit %s, ignoring: %s", k, bus_error_message(&error, r));
+ return 0;
+ }
+
+ r = unit_set_slice(u, slice);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to assign slice %s to unit %s, ignoring: %m", slice->id, u->id);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_cpu_quota(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ CGroupContext *c = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ c->cpu_quota_per_sec_usec = USEC_INFINITY;
+ return 0;
+ }
+
+ r = parse_permille_unbounded(rvalue);
+ if (r <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid CPU quota '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ c->cpu_quota_per_sec_usec = ((usec_t) r * USEC_PER_SEC) / 1000U;
+ return 0;
+}
+
+int config_parse_memory_limit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ CGroupContext *c = data;
+ uint64_t bytes = CGROUP_LIMIT_MAX;
+ int r;
+
+ if (!isempty(rvalue) && !streq(rvalue, "infinity")) {
+
+ r = parse_permille(rvalue);
+ if (r < 0) {
+ r = parse_size(rvalue, 1024, &bytes);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid memory limit '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+ } else
+ bytes = physical_memory_scale(r, 1000U);
+
+ if (bytes >= UINT64_MAX ||
+ (bytes <= 0 && !streq(lvalue, "MemorySwapMax"))) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Memory limit '%s' out of range, ignoring.", rvalue);
+ return 0;
+ }
+ }
+
+ if (streq(lvalue, "MemoryMin"))
+ c->memory_min = bytes;
+ else if (streq(lvalue, "MemoryLow"))
+ c->memory_low = bytes;
+ else if (streq(lvalue, "MemoryHigh"))
+ c->memory_high = bytes;
+ else if (streq(lvalue, "MemoryMax"))
+ c->memory_max = bytes;
+ else if (streq(lvalue, "MemorySwapMax"))
+ c->memory_swap_max = bytes;
+ else if (streq(lvalue, "MemoryLimit"))
+ c->memory_limit = bytes;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+int config_parse_tasks_max(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *tasks_max = data, v;
+ Unit *u = userdata;
+ int r;
+
+ if (isempty(rvalue)) {
+ *tasks_max = u ? u->manager->default_tasks_max : UINT64_MAX;
+ return 0;
+ }
+
+ if (streq(rvalue, "infinity")) {
+ *tasks_max = CGROUP_LIMIT_MAX;
+ return 0;
+ }
+
+ r = parse_permille(rvalue);
+ if (r < 0) {
+ r = safe_atou64(rvalue, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid maximum tasks value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+ } else
+ v = system_tasks_max_scale(r, 1000U);
+
+ if (v <= 0 || v >= UINT64_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range, ignoring.", rvalue);
+ return 0;
+ }
+
+ *tasks_max = v;
+ return 0;
+}
+
+int config_parse_delegate(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ CGroupContext *c = data;
+ UnitType t;
+ int r;
+
+ t = unit_name_to_type(unit);
+ assert(t != _UNIT_TYPE_INVALID);
+
+ if (!unit_vtable[t]->can_delegate) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Delegate= setting not supported for this unit type, ignoring.");
+ return 0;
+ }
+
+ /* We either accept a boolean value, which may be used to turn on delegation for all controllers, or turn it
+ * off for all. Or it takes a list of controller names, in which case we add the specified controllers to the
+ * mask to delegate. */
+
+ if (isempty(rvalue)) {
+ /* An empty string resets controllers and set Delegate=yes. */
+ c->delegate = true;
+ c->delegate_controllers = 0;
+ return 0;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ const char *p = rvalue;
+ CGroupMask mask = 0;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ CGroupController cc;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ cc = cgroup_controller_from_string(word);
+ if (cc < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid controller name '%s', ignoring", word);
+ continue;
+ }
+
+ mask |= CGROUP_CONTROLLER_TO_MASK(cc);
+ }
+
+ c->delegate = true;
+ c->delegate_controllers |= mask;
+
+ } else if (r > 0) {
+ c->delegate = true;
+ c->delegate_controllers = _CGROUP_MASK_ALL;
+ } else {
+ c->delegate = false;
+ c->delegate_controllers = 0;
+ }
+
+ return 0;
+}
+
+int config_parse_device_allow(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupContext *c = data;
+ const char *p = rvalue;
+ int r;
+
+ if (isempty(rvalue)) {
+ while (c->device_allow)
+ cgroup_context_free_device_allow(c, c->device_allow);
+
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device path and rights from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ if (!STARTSWITH_SET(resolved, "block-", "char-")) {
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ if (!valid_device_node_path(resolved)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s', ignoring.", resolved);
+ return 0;
+ }
+ }
+
+ if (!isempty(p) && !in_charset(p, "rwm")) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device rights '%s', ignoring.", p);
+ return 0;
+ }
+
+ return cgroup_add_device_allow(c, resolved, p);
+}
+
+int config_parse_io_device_weight(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupIODeviceWeight *w;
+ CGroupContext *c = data;
+ const char *p = rvalue;
+ uint64_t u;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ while (c->io_device_weights)
+ cgroup_context_free_io_device_weight(c, c->io_device_weights);
+
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device path and weight from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ r = cg_weight_parse(p, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "IO weight '%s' invalid, ignoring: %m", p);
+ return 0;
+ }
+
+ assert(u != CGROUP_WEIGHT_INVALID);
+
+ w = new0(CGroupIODeviceWeight, 1);
+ if (!w)
+ return log_oom();
+
+ w->path = TAKE_PTR(resolved);
+ w->weight = u;
+
+ LIST_PREPEND(device_weights, c->io_device_weights, w);
+ return 0;
+}
+
+int config_parse_io_device_latency(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupIODeviceLatency *l;
+ CGroupContext *c = data;
+ const char *p = rvalue;
+ usec_t usec;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ while (c->io_device_latencies)
+ cgroup_context_free_io_device_latency(c, c->io_device_latencies);
+
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device path and latency from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ if (parse_sec(p, &usec) < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", p);
+ return 0;
+ }
+
+ l = new0(CGroupIODeviceLatency, 1);
+ if (!l)
+ return log_oom();
+
+ l->path = TAKE_PTR(resolved);
+ l->target_usec = usec;
+
+ LIST_PREPEND(device_latencies, c->io_device_latencies, l);
+ return 0;
+}
+
+int config_parse_io_limit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupIODeviceLimit *l = NULL, *t;
+ CGroupContext *c = data;
+ CGroupIOLimitType type;
+ const char *p = rvalue;
+ uint64_t num;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ type = cgroup_io_limit_type_from_string(lvalue);
+ assert(type >= 0);
+
+ if (isempty(rvalue)) {
+ LIST_FOREACH(device_limits, l, c->io_device_limits)
+ l->limits[type] = cgroup_io_limit_defaults[type];
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device node and bandwidth from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ if (streq("infinity", p))
+ num = CGROUP_LIMIT_MAX;
+ else {
+ r = parse_size(p, 1000, &num);
+ if (r < 0 || num <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid IO limit '%s', ignoring.", p);
+ return 0;
+ }
+ }
+
+ LIST_FOREACH(device_limits, t, c->io_device_limits) {
+ if (path_equal(resolved, t->path)) {
+ l = t;
+ break;
+ }
+ }
+
+ if (!l) {
+ CGroupIOLimitType ttype;
+
+ l = new0(CGroupIODeviceLimit, 1);
+ if (!l)
+ return log_oom();
+
+ l->path = TAKE_PTR(resolved);
+ for (ttype = 0; ttype < _CGROUP_IO_LIMIT_TYPE_MAX; ttype++)
+ l->limits[ttype] = cgroup_io_limit_defaults[ttype];
+
+ LIST_PREPEND(device_limits, c->io_device_limits, l);
+ }
+
+ l->limits[type] = num;
+
+ return 0;
+}
+
+int config_parse_blockio_device_weight(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupBlockIODeviceWeight *w;
+ CGroupContext *c = data;
+ const char *p = rvalue;
+ uint64_t u;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ while (c->blockio_device_weights)
+ cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
+
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device node and weight from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ r = cg_blkio_weight_parse(p, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid block IO weight '%s', ignoring: %m", p);
+ return 0;
+ }
+
+ assert(u != CGROUP_BLKIO_WEIGHT_INVALID);
+
+ w = new0(CGroupBlockIODeviceWeight, 1);
+ if (!w)
+ return log_oom();
+
+ w->path = TAKE_PTR(resolved);
+ w->weight = u;
+
+ LIST_PREPEND(device_weights, c->blockio_device_weights, w);
+ return 0;
+}
+
+int config_parse_blockio_bandwidth(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupBlockIODeviceBandwidth *b = NULL, *t;
+ CGroupContext *c = data;
+ const char *p = rvalue;
+ uint64_t bytes;
+ bool read;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ read = streq("BlockIOReadBandwidth", lvalue);
+
+ if (isempty(rvalue)) {
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+ b->rbps = CGROUP_LIMIT_MAX;
+ b->wbps = CGROUP_LIMIT_MAX;
+ }
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device node and bandwidth from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ r = parse_size(p, 1000, &bytes);
+ if (r < 0 || bytes <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid Block IO Bandwidth '%s', ignoring.", p);
+ return 0;
+ }
+
+ LIST_FOREACH(device_bandwidths, t, c->blockio_device_bandwidths) {
+ if (path_equal(resolved, t->path)) {
+ b = t;
+ break;
+ }
+ }
+
+ if (!t) {
+ b = new0(CGroupBlockIODeviceBandwidth, 1);
+ if (!b)
+ return log_oom();
+
+ b->path = TAKE_PTR(resolved);
+ b->rbps = CGROUP_LIMIT_MAX;
+ b->wbps = CGROUP_LIMIT_MAX;
+
+ LIST_PREPEND(device_bandwidths, c->blockio_device_bandwidths, b);
+ }
+
+ if (read)
+ b->rbps = bytes;
+ else
+ b->wbps = bytes;
+
+ return 0;
+}
+
+int config_parse_job_mode_isolate(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ JobMode *m = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse boolean, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ log_notice("%s is deprecated. Please use OnFailureJobMode= instead", lvalue);
+
+ *m = r ? JOB_ISOLATE : JOB_REPLACE;
+ return 0;
+}
+
+int config_parse_exec_directories(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char***rt = data;
+ Unit *u = userdata;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *rt = strv_free(*rt);
+ return 0;
+ }
+
+ for (p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in \"%s\", ignoring: %m", word);
+ continue;
+ }
+
+ r = path_simplify_and_warn(k, PATH_CHECK_RELATIVE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ if (path_startswith(k, "private")) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "%s= path can't be 'private', ignoring assignment: %s", lvalue, word);
+ continue;
+ }
+
+ r = strv_push(rt, k);
+ if (r < 0)
+ return log_oom();
+ k = NULL;
+ }
+}
+
+int config_parse_set_status(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ size_t l;
+ const char *word, *state;
+ int r;
+ ExitStatusSet *status_set = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* Empty assignment resets the list */
+ if (isempty(rvalue)) {
+ exit_status_set_free(status_set);
+ return 0;
+ }
+
+ FOREACH_WORD(word, l, rvalue, state) {
+ _cleanup_free_ char *temp;
+ int val;
+ Set **set;
+
+ temp = strndup(word, l);
+ if (!temp)
+ return log_oom();
+
+ r = safe_atoi(temp, &val);
+ if (r < 0) {
+ val = signal_from_string(temp);
+
+ if (val <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse value, ignoring: %s", word);
+ continue;
+ }
+ set = &status_set->signal;
+ } else {
+ if (val < 0 || val > 255) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Value %d is outside range 0-255, ignoring", val);
+ continue;
+ }
+ set = &status_set->status;
+ }
+
+ r = set_ensure_allocated(set, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(*set, INT_TO_PTR(val));
+ if (r < 0)
+ return log_oom();
+ }
+ if (!isempty(state))
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
+
+ return 0;
+}
+
+int config_parse_namespace_path_strv(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Unit *u = userdata;
+ char*** sv = data;
+ const char *p = rvalue;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *sv = strv_free(*sv);
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *resolved = NULL, *joined = NULL;
+ const char *w;
+ bool ignore_enoent = false, shall_prefix = false;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract first word, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ w = word;
+ if (startswith(w, "-")) {
+ ignore_enoent = true;
+ w++;
+ }
+ if (startswith(w, "+")) {
+ shall_prefix = true;
+ w++;
+ }
+
+ r = unit_full_printf(u, w, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s: %m", w);
+ continue;
+ }
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ joined = strjoin(ignore_enoent ? "-" : "",
+ shall_prefix ? "+" : "",
+ resolved);
+
+ r = strv_push(sv, joined);
+ if (r < 0)
+ return log_oom();
+
+ joined = NULL;
+ }
+
+ return 0;
+}
+
+int config_parse_temporary_filesystems(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Unit *u = userdata;
+ ExecContext *c = data;
+ const char *p = rvalue;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
+ c->temporary_filesystems = NULL;
+ c->n_temporary_filesystems = 0;
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *path = NULL, *resolved = NULL;
+ const char *w;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract first word, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ w = word;
+ r = extract_first_word(&w, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract first word, ignoring: %s", word);
+ continue;
+ }
+ if (r == 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid syntax, ignoring: %s", word);
+ continue;
+ }
+
+ r = unit_full_printf(u, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", path);
+ continue;
+ }
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ r = temporary_filesystem_add(&c->temporary_filesystems, &c->n_temporary_filesystems, resolved, w);
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_bind_paths(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ Unit *u = userdata;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
+ c->bind_mounts = NULL;
+ c->n_bind_mounts = 0;
+ return 0;
+ }
+
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *source = NULL, *destination = NULL;
+ _cleanup_free_ char *sresolved = NULL, *dresolved = NULL;
+ char *s = NULL, *d = NULL;
+ bool rbind = true, ignore_enoent = false;
+
+ r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(u, source, &sresolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolved unit specifiers in \"%s\", ignoring: %m", source);
+ continue;
+ }
+
+ s = sresolved;
+ if (s[0] == '-') {
+ ignore_enoent = true;
+ s++;
+ }
+
+ r = path_simplify_and_warn(s, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ /* Optionally, the destination is specified. */
+ if (p && p[-1] == ':') {
+ r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+ if (r == 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Missing argument after ':', ignoring: %s", s);
+ continue;
+ }
+
+ r = unit_full_printf(u, destination, &dresolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to resolved specifiers in \"%s\", ignoring: %m", destination);
+ continue;
+ }
+
+ r = path_simplify_and_warn(dresolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ d = dresolved;
+
+ /* Optionally, there's also a short option string specified */
+ if (p && p[-1] == ':') {
+ _cleanup_free_ char *options = NULL;
+
+ r = extract_first_word(&p, &options, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (isempty(options) || streq(options, "rbind"))
+ rbind = true;
+ else if (streq(options, "norbind"))
+ rbind = false;
+ else {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid option string, ignoring setting: %s", options);
+ continue;
+ }
+ }
+ } else
+ d = s;
+
+ r = bind_mount_add(&c->bind_mounts, &c->n_bind_mounts,
+ &(BindMount) {
+ .source = s,
+ .destination = d,
+ .read_only = !!strstr(lvalue, "ReadOnly"),
+ .recursive = rbind,
+ .ignore_enoent = ignore_enoent,
+ });
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+int config_parse_job_timeout_sec(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Unit *u = data;
+ usec_t usec;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = parse_sec_fix_0(rvalue, &usec);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse JobTimeoutSec= parameter, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ /* If the user explicitly changed JobTimeoutSec= also change JobRunningTimeoutSec=, for compatibility with old
+ * versions. If JobRunningTimeoutSec= was explicitly set, avoid this however as whatever the user picked should
+ * count. */
+
+ if (!u->job_running_timeout_set)
+ u->job_running_timeout = usec;
+
+ u->job_timeout = usec;
+
+ return 0;
+}
+
+int config_parse_job_running_timeout_sec(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Unit *u = data;
+ usec_t usec;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = parse_sec_fix_0(rvalue, &usec);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse JobRunningTimeoutSec= parameter, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ u->job_running_timeout = usec;
+ u->job_running_timeout_set = true;
+
+ return 0;
+}
+
+int config_parse_emergency_action(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Manager *m = NULL;
+ EmergencyAction *x = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (unit)
+ m = ((Unit*) userdata)->manager;
+ else
+ m = data;
+
+ r = parse_emergency_action(rvalue, MANAGER_IS_SYSTEM(m), x);
+ if (r < 0) {
+ if (r == -EOPNOTSUPP && MANAGER_IS_USER(m)) {
+ /* Compat mode: remove for systemd 241. */
+
+ log_syntax(unit, LOG_INFO, filename, line, r,
+ "%s= in user mode specified as \"%s\", using \"exit-force\" instead.",
+ lvalue, rvalue);
+ *x = EMERGENCY_ACTION_EXIT_FORCE;
+ return 0;
+ }
+
+ if (r == -EOPNOTSUPP)
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "%s= specified as %s mode action, ignoring: %s",
+ lvalue, MANAGER_IS_SYSTEM(m) ? "user" : "system", rvalue);
+ else
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse %s=, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_pid_file(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *k = NULL, *n = NULL;
+ Unit *u = userdata;
+ char **s = data;
+ const char *e;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ /* An empty assignment removes already set value. */
+ *s = mfree(*s);
+ return 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ /* If this is a relative path make it absolute by prefixing the /run */
+ n = path_make_absolute(k, u->manager->prefix[EXEC_DIRECTORY_RUNTIME]);
+ if (!n)
+ return log_oom();
+
+ /* Check that the result is a sensible path */
+ r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return r;
+
+ e = path_startswith(n, "/var/run/");
+ if (e) {
+ char *z;
+
+ z = strjoin("/run/", e);
+ if (!z)
+ return log_oom();
+
+ log_syntax(unit, LOG_NOTICE, filename, line, 0, "PIDFile= references path below legacy directory /var/run/, updating %s → %s; please update the unit file accordingly.", n, z);
+
+ free_and_replace(*s, z);
+ } else
+ free_and_replace(*s, n);
+
+ return 0;
+}
+
+int config_parse_exit_status(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *exit_status = data, r;
+ uint8_t u;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(exit_status);
+
+ if (isempty(rvalue)) {
+ *exit_status = -1;
+ return 0;
+ }
+
+ r = safe_atou8(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse exit status '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ *exit_status = u;
+ return 0;
+}
+
+int config_parse_disable_controllers(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int r;
+ CGroupContext *c = data;
+ CGroupMask disabled_mask;
+
+ /* 1. If empty, make all controllers eligible for use again.
+ * 2. If non-empty, merge all listed controllers, space separated. */
+
+ if (isempty(rvalue)) {
+ c->disable_controllers = 0;
+ return 0;
+ }
+
+ r = cg_mask_from_string(rvalue, &disabled_mask);
+ if (r < 0 || disabled_mask <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid cgroup string: %s, ignoring", rvalue);
+ return 0;
+ }
+
+ c->disable_controllers |= disabled_mask;
+
+ return 0;
+}
+
+#define FOLLOW_MAX 8
+
+static int open_follow(char **filename, FILE **_f, Set *names, char **_final) {
+ char *id = NULL;
+ unsigned c = 0;
+ int fd, r;
+ FILE *f;
+
+ assert(filename);
+ assert(*filename);
+ assert(_f);
+ assert(names);
+
+ /* This will update the filename pointer if the loaded file is
+ * reached by a symlink. The old string will be freed. */
+
+ for (;;) {
+ char *target, *name;
+
+ if (c++ >= FOLLOW_MAX)
+ return -ELOOP;
+
+ path_simplify(*filename, false);
+
+ /* Add the file name we are currently looking at to
+ * the names of this unit, but only if it is a valid
+ * unit name. */
+ name = basename(*filename);
+ if (unit_name_is_valid(name, UNIT_NAME_ANY)) {
+
+ id = set_get(names, name);
+ if (!id) {
+ id = strdup(name);
+ if (!id)
+ return -ENOMEM;
+
+ r = set_consume(names, id);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Try to open the file name, but don't if its a symlink */
+ fd = open(*filename, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd >= 0)
+ break;
+
+ if (errno != ELOOP)
+ return -errno;
+
+ /* Hmm, so this is a symlink. Let's read the name, and follow it manually */
+ r = readlink_and_make_absolute(*filename, &target);
+ if (r < 0)
+ return r;
+
+ free_and_replace(*filename, target);
+ }
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ *_f = f;
+ *_final = id;
+
+ return 0;
+}
+
+static int merge_by_names(Unit **u, Set *names, const char *id) {
+ char *k;
+ int r;
+
+ assert(u);
+ assert(*u);
+ assert(names);
+
+ /* Let's try to add in all symlink names we found */
+ while ((k = set_steal_first(names))) {
+
+ /* First try to merge in the other name into our
+ * unit */
+ r = unit_merge_by_name(*u, k);
+ if (r < 0) {
+ Unit *other;
+
+ /* Hmm, we couldn't merge the other unit into
+ * ours? Then let's try it the other way
+ * round */
+
+ /* If the symlink name we are looking at is unit template, then
+ we must search for instance of this template */
+ if (unit_name_is_valid(k, UNIT_NAME_TEMPLATE) && (*u)->instance) {
+ _cleanup_free_ char *instance = NULL;
+
+ r = unit_name_replace_instance(k, (*u)->instance, &instance);
+ if (r < 0)
+ return r;
+
+ other = manager_get_unit((*u)->manager, instance);
+ } else
+ other = manager_get_unit((*u)->manager, k);
+
+ free(k);
+
+ if (other) {
+ r = unit_merge(other, *u);
+ if (r >= 0) {
+ *u = other;
+ return merge_by_names(u, names, NULL);
+ }
+ }
+
+ return r;
+ }
+
+ if (id == k)
+ unit_choose_id(*u, id);
+
+ free(k);
+ }
+
+ return 0;
+}
+
+static int load_from_path(Unit *u, const char *path) {
+ _cleanup_set_free_free_ Set *symlink_names = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *filename = NULL;
+ char *id = NULL;
+ Unit *merged;
+ struct stat st;
+ int r;
+
+ assert(u);
+ assert(path);
+
+ symlink_names = set_new(&string_hash_ops);
+ if (!symlink_names)
+ return -ENOMEM;
+
+ if (path_is_absolute(path)) {
+
+ filename = strdup(path);
+ if (!filename)
+ return -ENOMEM;
+
+ r = open_follow(&filename, &f, symlink_names, &id);
+ if (r < 0) {
+ filename = mfree(filename);
+ if (r != -ENOENT)
+ return r;
+ }
+
+ } else {
+ char **p;
+
+ STRV_FOREACH(p, u->manager->lookup_paths.search_path) {
+
+ /* Instead of opening the path right away, we manually
+ * follow all symlinks and add their name to our unit
+ * name set while doing so */
+ filename = path_make_absolute(path, *p);
+ if (!filename)
+ return -ENOMEM;
+
+ if (u->manager->unit_path_cache &&
+ !set_get(u->manager->unit_path_cache, filename))
+ r = -ENOENT;
+ else
+ r = open_follow(&filename, &f, symlink_names, &id);
+ if (r >= 0)
+ break;
+
+ /* ENOENT means that the file is missing or is a dangling symlink.
+ * ENOTDIR means that one of paths we expect to be is a directory
+ * is not a directory, we should just ignore that.
+ * EACCES means that the directory or file permissions are wrong.
+ */
+ if (r == -EACCES)
+ log_debug_errno(r, "Cannot access \"%s\": %m", filename);
+ else if (!IN_SET(r, -ENOENT, -ENOTDIR))
+ return r;
+
+ filename = mfree(filename);
+ /* Empty the symlink names for the next run */
+ set_clear_free(symlink_names);
+ }
+ }
+
+ if (!filename)
+ /* Hmm, no suitable file found? */
+ return 0;
+
+ if (!unit_type_may_alias(u->type) && set_size(symlink_names) > 1) {
+ log_unit_warning(u, "Unit type of %s does not support alias names, refusing loading via symlink.", u->id);
+ return -ELOOP;
+ }
+
+ merged = u;
+ r = merge_by_names(&merged, symlink_names, id);
+ if (r < 0)
+ return r;
+
+ if (merged != u) {
+ u->load_state = UNIT_MERGED;
+ return 0;
+ }
+
+ if (fstat(fileno(f), &st) < 0)
+ return -errno;
+
+ if (null_or_empty(&st)) {
+ u->load_state = UNIT_MASKED;
+ u->fragment_mtime = 0;
+ } else {
+ u->load_state = UNIT_LOADED;
+ u->fragment_mtime = timespec_load(&st.st_mtim);
+
+ /* Now, parse the file contents */
+ r = config_parse(u->id, filename, f,
+ UNIT_VTABLE(u)->sections,
+ config_item_perf_lookup, load_fragment_gperf_lookup,
+ CONFIG_PARSE_ALLOW_INCLUDE, u);
+ if (r < 0)
+ return r;
+ }
+
+ free_and_replace(u->fragment_path, filename);
+
+ if (u->source_path) {
+ if (stat(u->source_path, &st) >= 0)
+ u->source_mtime = timespec_load(&st.st_mtim);
+ else
+ u->source_mtime = 0;
+ }
+
+ return 0;
+}
+
+int unit_load_fragment(Unit *u) {
+ int r;
+ Iterator i;
+ const char *t;
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+ assert(u->id);
+
+ if (u->transient) {
+ u->load_state = UNIT_LOADED;
+ return 0;
+ }
+
+ /* First, try to find the unit under its id. We always look
+ * for unit files in the default directories, to make it easy
+ * to override things by placing things in /etc/systemd/system */
+ r = load_from_path(u, u->id);
+ if (r < 0)
+ return r;
+
+ /* Try to find an alias we can load this with */
+ if (u->load_state == UNIT_STUB) {
+ SET_FOREACH(t, u->names, i) {
+
+ if (t == u->id)
+ continue;
+
+ r = load_from_path(u, t);
+ if (r < 0)
+ return r;
+
+ if (u->load_state != UNIT_STUB)
+ break;
+ }
+ }
+
+ /* And now, try looking for it under the suggested (originally linked) path */
+ if (u->load_state == UNIT_STUB && u->fragment_path) {
+
+ r = load_from_path(u, u->fragment_path);
+ if (r < 0)
+ return r;
+
+ if (u->load_state == UNIT_STUB)
+ /* Hmm, this didn't work? Then let's get rid
+ * of the fragment path stored for us, so that
+ * we don't point to an invalid location. */
+ u->fragment_path = mfree(u->fragment_path);
+ }
+
+ /* Look for a template */
+ if (u->load_state == UNIT_STUB && u->instance) {
+ _cleanup_free_ char *k = NULL;
+
+ r = unit_name_template(u->id, &k);
+ if (r < 0)
+ return r;
+
+ r = load_from_path(u, k);
+ if (r < 0) {
+ if (r == -ENOEXEC)
+ log_unit_notice(u, "Unit configuration has fatal error, unit will not be started.");
+ return r;
+ }
+
+ if (u->load_state == UNIT_STUB) {
+ SET_FOREACH(t, u->names, i) {
+ _cleanup_free_ char *z = NULL;
+
+ if (t == u->id)
+ continue;
+
+ r = unit_name_template(t, &z);
+ if (r < 0)
+ return r;
+
+ r = load_from_path(u, z);
+ if (r < 0)
+ return r;
+
+ if (u->load_state != UNIT_STUB)
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void unit_dump_config_items(FILE *f) {
+ static const struct {
+ const ConfigParserCallback callback;
+ const char *rvalue;
+ } table[] = {
+ { config_parse_warn_compat, "NOTSUPPORTED" },
+ { config_parse_int, "INTEGER" },
+ { config_parse_unsigned, "UNSIGNED" },
+ { config_parse_iec_size, "SIZE" },
+ { config_parse_iec_uint64, "SIZE" },
+ { config_parse_si_size, "SIZE" },
+ { config_parse_bool, "BOOLEAN" },
+ { config_parse_string, "STRING" },
+ { config_parse_path, "PATH" },
+ { config_parse_unit_path_printf, "PATH" },
+ { config_parse_strv, "STRING [...]" },
+ { config_parse_exec_nice, "NICE" },
+ { config_parse_exec_oom_score_adjust, "OOMSCOREADJUST" },
+ { config_parse_exec_io_class, "IOCLASS" },
+ { config_parse_exec_io_priority, "IOPRIORITY" },
+ { config_parse_exec_cpu_sched_policy, "CPUSCHEDPOLICY" },
+ { config_parse_exec_cpu_sched_prio, "CPUSCHEDPRIO" },
+ { config_parse_exec_cpu_affinity, "CPUAFFINITY" },
+ { config_parse_mode, "MODE" },
+ { config_parse_unit_env_file, "FILE" },
+ { config_parse_exec_output, "OUTPUT" },
+ { config_parse_exec_input, "INPUT" },
+ { config_parse_log_facility, "FACILITY" },
+ { config_parse_log_level, "LEVEL" },
+ { config_parse_exec_secure_bits, "SECUREBITS" },
+ { config_parse_capability_set, "BOUNDINGSET" },
+ { config_parse_rlimit, "LIMIT" },
+ { config_parse_unit_deps, "UNIT [...]" },
+ { config_parse_exec, "PATH [ARGUMENT [...]]" },
+ { config_parse_service_type, "SERVICETYPE" },
+ { config_parse_service_restart, "SERVICERESTART" },
+ { config_parse_kill_mode, "KILLMODE" },
+ { config_parse_signal, "SIGNAL" },
+ { config_parse_socket_listen, "SOCKET [...]" },
+ { config_parse_socket_bind, "SOCKETBIND" },
+ { config_parse_socket_bindtodevice, "NETWORKINTERFACE" },
+ { config_parse_sec, "SECONDS" },
+ { config_parse_nsec, "NANOSECONDS" },
+ { config_parse_namespace_path_strv, "PATH [...]" },
+ { config_parse_bind_paths, "PATH[:PATH[:OPTIONS]] [...]" },
+ { config_parse_unit_requires_mounts_for, "PATH [...]" },
+ { config_parse_exec_mount_flags, "MOUNTFLAG [...]" },
+ { config_parse_unit_string_printf, "STRING" },
+ { config_parse_trigger_unit, "UNIT" },
+ { config_parse_timer, "TIMER" },
+ { config_parse_path_spec, "PATH" },
+ { config_parse_notify_access, "ACCESS" },
+ { config_parse_ip_tos, "TOS" },
+ { config_parse_unit_condition_path, "CONDITION" },
+ { config_parse_unit_condition_string, "CONDITION" },
+ { config_parse_unit_condition_null, "CONDITION" },
+ { config_parse_unit_slice, "SLICE" },
+ { config_parse_documentation, "URL" },
+ { config_parse_service_timeout, "SECONDS" },
+ { config_parse_emergency_action, "ACTION" },
+ { config_parse_set_status, "STATUS" },
+ { config_parse_service_sockets, "SOCKETS" },
+ { config_parse_environ, "ENVIRON" },
+#if HAVE_SECCOMP
+ { config_parse_syscall_filter, "SYSCALLS" },
+ { config_parse_syscall_archs, "ARCHS" },
+ { config_parse_syscall_errno, "ERRNO" },
+ { config_parse_address_families, "FAMILIES" },
+ { config_parse_restrict_namespaces, "NAMESPACES" },
+#endif
+ { config_parse_cpu_shares, "SHARES" },
+ { config_parse_cg_weight, "WEIGHT" },
+ { config_parse_memory_limit, "LIMIT" },
+ { config_parse_device_allow, "DEVICE" },
+ { config_parse_device_policy, "POLICY" },
+ { config_parse_io_limit, "LIMIT" },
+ { config_parse_io_device_weight, "DEVICEWEIGHT" },
+ { config_parse_io_device_latency, "DEVICELATENCY" },
+ { config_parse_blockio_bandwidth, "BANDWIDTH" },
+ { config_parse_blockio_weight, "WEIGHT" },
+ { config_parse_blockio_device_weight, "DEVICEWEIGHT" },
+ { config_parse_long, "LONG" },
+ { config_parse_socket_service, "SERVICE" },
+#if HAVE_SELINUX
+ { config_parse_exec_selinux_context, "LABEL" },
+#endif
+ { config_parse_job_mode, "MODE" },
+ { config_parse_job_mode_isolate, "BOOLEAN" },
+ { config_parse_personality, "PERSONALITY" },
+ };
+
+ const char *prev = NULL;
+ const char *i;
+
+ assert(f);
+
+ NULSTR_FOREACH(i, load_fragment_gperf_nulstr) {
+ const char *rvalue = "OTHER", *lvalue;
+ const ConfigPerfItem *p;
+ size_t prefix_len;
+ const char *dot;
+ unsigned j;
+
+ assert_se(p = load_fragment_gperf_lookup(i, strlen(i)));
+
+ /* Hide legacy settings */
+ if (p->parse == config_parse_warn_compat &&
+ p->ltype == DISABLED_LEGACY)
+ continue;
+
+ for (j = 0; j < ELEMENTSOF(table); j++)
+ if (p->parse == table[j].callback) {
+ rvalue = table[j].rvalue;
+ break;
+ }
+
+ dot = strchr(i, '.');
+ lvalue = dot ? dot + 1 : i;
+ prefix_len = dot-i;
+
+ if (dot)
+ if (!prev || !strneq(prev, i, prefix_len+1)) {
+ if (prev)
+ fputc('\n', f);
+
+ fprintf(f, "[%.*s]\n", (int) prefix_len, i);
+ }
+
+ fprintf(f, "%s=%s\n", lvalue, rvalue);
+ prev = i;
+ }
+}
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
new file mode 100644
index 0000000..e0d3b4e
--- /dev/null
+++ b/src/core/load-fragment.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "conf-parser.h"
+#include "unit.h"
+
+/* Read service data from .desktop file style configuration fragments */
+
+int unit_load_fragment(Unit *u);
+
+void unit_dump_config_items(FILE *f);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_deps);
+CONFIG_PARSER_PROTOTYPE(config_parse_obsolete_unit_deps);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_string_printf);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_strv_printf);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_path_printf);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_path_strv_printf);
+CONFIG_PARSER_PROTOTYPE(config_parse_documentation);
+CONFIG_PARSER_PROTOTYPE(config_parse_socket_listen);
+CONFIG_PARSER_PROTOTYPE(config_parse_socket_protocol);
+CONFIG_PARSER_PROTOTYPE(config_parse_socket_bind);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_nice);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_oom_score_adjust);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec);
+CONFIG_PARSER_PROTOTYPE(config_parse_service_timeout);
+CONFIG_PARSER_PROTOTYPE(config_parse_service_type);
+CONFIG_PARSER_PROTOTYPE(config_parse_service_restart);
+CONFIG_PARSER_PROTOTYPE(config_parse_socket_bindtodevice);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_output);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_input);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_input_text);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_input_data);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_io_class);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_io_priority);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_cpu_sched_policy);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_cpu_sched_prio);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_cpu_affinity);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_secure_bits);
+CONFIG_PARSER_PROTOTYPE(config_parse_capability_set);
+CONFIG_PARSER_PROTOTYPE(config_parse_kill_signal);
+CONFIG_PARSER_PROTOTYPE(config_parse_final_kill_signal);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_mount_flags);
+CONFIG_PARSER_PROTOTYPE(config_parse_timer);
+CONFIG_PARSER_PROTOTYPE(config_parse_trigger_unit);
+CONFIG_PARSER_PROTOTYPE(config_parse_path_spec);
+CONFIG_PARSER_PROTOTYPE(config_parse_socket_service);
+CONFIG_PARSER_PROTOTYPE(config_parse_service_sockets);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_env_file);
+CONFIG_PARSER_PROTOTYPE(config_parse_ip_tos);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_condition_path);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_condition_string);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_condition_null);
+CONFIG_PARSER_PROTOTYPE(config_parse_kill_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_notify_access);
+CONFIG_PARSER_PROTOTYPE(config_parse_emergency_action);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_requires_mounts_for);
+CONFIG_PARSER_PROTOTYPE(config_parse_syscall_filter);
+CONFIG_PARSER_PROTOTYPE(config_parse_syscall_archs);
+CONFIG_PARSER_PROTOTYPE(config_parse_syscall_errno);
+CONFIG_PARSER_PROTOTYPE(config_parse_environ);
+CONFIG_PARSER_PROTOTYPE(config_parse_pass_environ);
+CONFIG_PARSER_PROTOTYPE(config_parse_unset_environ);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_slice);
+CONFIG_PARSER_PROTOTYPE(config_parse_cg_weight);
+CONFIG_PARSER_PROTOTYPE(config_parse_cpu_shares);
+CONFIG_PARSER_PROTOTYPE(config_parse_memory_limit);
+CONFIG_PARSER_PROTOTYPE(config_parse_tasks_max);
+CONFIG_PARSER_PROTOTYPE(config_parse_delegate);
+CONFIG_PARSER_PROTOTYPE(config_parse_device_policy);
+CONFIG_PARSER_PROTOTYPE(config_parse_device_allow);
+CONFIG_PARSER_PROTOTYPE(config_parse_io_device_latency);
+CONFIG_PARSER_PROTOTYPE(config_parse_io_device_weight);
+CONFIG_PARSER_PROTOTYPE(config_parse_io_limit);
+CONFIG_PARSER_PROTOTYPE(config_parse_blockio_weight);
+CONFIG_PARSER_PROTOTYPE(config_parse_blockio_device_weight);
+CONFIG_PARSER_PROTOTYPE(config_parse_blockio_bandwidth);
+CONFIG_PARSER_PROTOTYPE(config_parse_job_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_job_mode_isolate);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_selinux_context);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_apparmor_profile);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_smack_process_label);
+CONFIG_PARSER_PROTOTYPE(config_parse_address_families);
+CONFIG_PARSER_PROTOTYPE(config_parse_runtime_preserve_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_directories);
+CONFIG_PARSER_PROTOTYPE(config_parse_set_status);
+CONFIG_PARSER_PROTOTYPE(config_parse_namespace_path_strv);
+CONFIG_PARSER_PROTOTYPE(config_parse_temporary_filesystems);
+CONFIG_PARSER_PROTOTYPE(config_parse_cpu_quota);
+CONFIG_PARSER_PROTOTYPE(config_parse_protect_home);
+CONFIG_PARSER_PROTOTYPE(config_parse_protect_system);
+CONFIG_PARSER_PROTOTYPE(config_parse_bus_name);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_utmp_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_working_directory);
+CONFIG_PARSER_PROTOTYPE(config_parse_fdname);
+CONFIG_PARSER_PROTOTYPE(config_parse_sec_fix_0);
+CONFIG_PARSER_PROTOTYPE(config_parse_user_group);
+CONFIG_PARSER_PROTOTYPE(config_parse_user_group_strv);
+CONFIG_PARSER_PROTOTYPE(config_parse_restrict_namespaces);
+CONFIG_PARSER_PROTOTYPE(config_parse_bind_paths);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_keyring_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_job_timeout_sec);
+CONFIG_PARSER_PROTOTYPE(config_parse_job_running_timeout_sec);
+CONFIG_PARSER_PROTOTYPE(config_parse_log_extra_fields);
+CONFIG_PARSER_PROTOTYPE(config_parse_collect_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_pid_file);
+CONFIG_PARSER_PROTOTYPE(config_parse_exit_status);
+CONFIG_PARSER_PROTOTYPE(config_parse_disable_controllers);
+
+/* gperf prototypes */
+const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+extern const char load_fragment_gperf_nulstr[];
diff --git a/src/core/locale-setup.c b/src/core/locale-setup.c
new file mode 100644
index 0000000..aa4a89c
--- /dev/null
+++ b/src/core/locale-setup.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "env-file.h"
+#include "env-util.h"
+#include "locale-setup.h"
+#include "locale-util.h"
+#include "proc-cmdline.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+#include "virt.h"
+
+int locale_setup(char ***environment) {
+ _cleanup_(locale_variables_freep) char *variables[_VARIABLE_LC_MAX] = {};
+ _cleanup_strv_free_ char **add = NULL;
+ LocaleVariable i;
+ int r;
+
+ r = proc_cmdline_get_key_many(PROC_CMDLINE_STRIP_RD_PREFIX,
+ "locale.LANG", &variables[VARIABLE_LANG],
+ "locale.LANGUAGE", &variables[VARIABLE_LANGUAGE],
+ "locale.LC_CTYPE", &variables[VARIABLE_LC_CTYPE],
+ "locale.LC_NUMERIC", &variables[VARIABLE_LC_NUMERIC],
+ "locale.LC_TIME", &variables[VARIABLE_LC_TIME],
+ "locale.LC_COLLATE", &variables[VARIABLE_LC_COLLATE],
+ "locale.LC_MONETARY", &variables[VARIABLE_LC_MONETARY],
+ "locale.LC_MESSAGES", &variables[VARIABLE_LC_MESSAGES],
+ "locale.LC_PAPER", &variables[VARIABLE_LC_PAPER],
+ "locale.LC_NAME", &variables[VARIABLE_LC_NAME],
+ "locale.LC_ADDRESS", &variables[VARIABLE_LC_ADDRESS],
+ "locale.LC_TELEPHONE", &variables[VARIABLE_LC_TELEPHONE],
+ "locale.LC_MEASUREMENT", &variables[VARIABLE_LC_MEASUREMENT],
+ "locale.LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION]);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read /proc/cmdline: %m");
+
+ /* Hmm, nothing set on the kernel cmd line? Then let's try /etc/locale.conf */
+ if (r <= 0) {
+ r = parse_env_file(NULL, "/etc/locale.conf",
+ "LANG", &variables[VARIABLE_LANG],
+ "LANGUAGE", &variables[VARIABLE_LANGUAGE],
+ "LC_CTYPE", &variables[VARIABLE_LC_CTYPE],
+ "LC_NUMERIC", &variables[VARIABLE_LC_NUMERIC],
+ "LC_TIME", &variables[VARIABLE_LC_TIME],
+ "LC_COLLATE", &variables[VARIABLE_LC_COLLATE],
+ "LC_MONETARY", &variables[VARIABLE_LC_MONETARY],
+ "LC_MESSAGES", &variables[VARIABLE_LC_MESSAGES],
+ "LC_PAPER", &variables[VARIABLE_LC_PAPER],
+ "LC_NAME", &variables[VARIABLE_LC_NAME],
+ "LC_ADDRESS", &variables[VARIABLE_LC_ADDRESS],
+ "LC_TELEPHONE", &variables[VARIABLE_LC_TELEPHONE],
+ "LC_MEASUREMENT", &variables[VARIABLE_LC_MEASUREMENT],
+ "LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION]);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read /etc/locale.conf: %m");
+ }
+
+ for (i = 0; i < _VARIABLE_LC_MAX; i++) {
+ char *s;
+
+ if (!variables[i])
+ continue;
+
+ s = strjoin(locale_variable_to_string(i), "=", variables[i]);
+ if (!s)
+ return -ENOMEM;
+
+ if (strv_consume(&add, s) < 0)
+ return -ENOMEM;
+ }
+
+ if (strv_isempty(add)) {
+ /* If no locale is configured then default to compile-time default. */
+
+ add = strv_new("LANG=" SYSTEMD_DEFAULT_LOCALE);
+ if (!add)
+ return -ENOMEM;
+ }
+
+ if (strv_isempty(*environment))
+ strv_free_and_replace(*environment, add);
+ else {
+ char **merged;
+
+ merged = strv_env_merge(2, *environment, add);
+ if (!merged)
+ return -ENOMEM;
+
+ strv_free_and_replace(*environment, merged);
+ }
+
+ return 0;
+}
diff --git a/src/core/locale-setup.h b/src/core/locale-setup.h
new file mode 100644
index 0000000..01fadd0
--- /dev/null
+++ b/src/core/locale-setup.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int locale_setup(char ***environment);
diff --git a/src/core/loopback-setup.c b/src/core/loopback-setup.c
new file mode 100644
index 0000000..f613db8
--- /dev/null
+++ b/src/core/loopback-setup.c
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+#include <stdlib.h>
+
+#include "sd-netlink.h"
+
+#include "loopback-setup.h"
+#include "missing.h"
+#include "netlink-util.h"
+
+#define LOOPBACK_SETUP_TIMEOUT_USEC (5 * USEC_PER_SEC)
+
+struct state {
+ unsigned n_messages;
+ int rcode;
+ const char *error_message;
+ const char *success_message;
+};
+
+static int generic_handler(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ struct state *s = userdata;
+ int r;
+
+ assert(s);
+ assert(s->n_messages > 0);
+ s->n_messages--;
+
+ errno = 0;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_debug_errno(r, "%s: %m", s->error_message);
+ else
+ log_debug("%s", s->success_message);
+
+ s->rcode = r;
+ return 0;
+}
+
+static int start_loopback(sd_netlink *rtnl, struct state *s) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(s);
+
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_SETLINK, LOOPBACK_IFINDEX);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_link_set_flags(req, IFF_UP, IFF_UP);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call_async(rtnl, NULL, req, generic_handler, NULL, s, LOOPBACK_SETUP_TIMEOUT_USEC, "systemd-start-loopback");
+ if (r < 0)
+ return r;
+
+ s->n_messages ++;
+ return 0;
+}
+
+static int add_ipv4_address(sd_netlink *rtnl, struct state *s) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(s);
+
+ r = sd_rtnl_message_new_addr(rtnl, &req, RTM_NEWADDR, LOOPBACK_IFINDEX, AF_INET);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_addr_set_prefixlen(req, 8);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_addr_set_flags(req, IFA_F_PERMANENT);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_addr_set_scope(req, RT_SCOPE_HOST);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_in_addr(req, IFA_LOCAL, &(struct in_addr) { .s_addr = htobe32(INADDR_LOOPBACK) } );
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call_async(rtnl, NULL, req, generic_handler, NULL, s, USEC_INFINITY, "systemd-loopback-ipv4");
+ if (r < 0)
+ return r;
+
+ s->n_messages ++;
+ return 0;
+}
+
+static int add_ipv6_address(sd_netlink *rtnl, struct state *s) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(s);
+
+ r = sd_rtnl_message_new_addr(rtnl, &req, RTM_NEWADDR, LOOPBACK_IFINDEX, AF_INET6);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_addr_set_prefixlen(req, 128);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_addr_set_flags(req, IFA_F_PERMANENT);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_addr_set_scope(req, RT_SCOPE_HOST);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_in6_addr(req, IFA_LOCAL, &in6addr_loopback);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call_async(rtnl, NULL, req, generic_handler, NULL, s, USEC_INFINITY, "systemd-loopback-ipv6");
+ if (r < 0)
+ return r;
+
+ s->n_messages ++;
+ return 0;
+}
+
+static bool check_loopback(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ unsigned flags;
+ int r;
+
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_GETLINK, LOOPBACK_IFINDEX);
+ if (r < 0)
+ return false;
+
+ r = sd_netlink_call(rtnl, req, USEC_INFINITY, &reply);
+ if (r < 0)
+ return false;
+
+ r = sd_rtnl_message_link_get_flags(reply, &flags);
+ if (r < 0)
+ return false;
+
+ return flags & IFF_UP;
+}
+
+int loopback_setup(void) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ struct state state_4 = {
+ .error_message = "Failed to add address 127.0.0.1 to loopback interface",
+ .success_message = "Successfully added address 127.0.0.1 to loopback interface",
+ }, state_6 = {
+ .error_message = "Failed to add address ::1 to loopback interface",
+ .success_message = "Successfully added address ::1 to loopback interface",
+ }, state_up = {
+ .error_message = "Failed to bring loopback interface up",
+ .success_message = "Successfully brought loopback interface up",
+ };
+ int r;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink: %m");
+
+ /* Note that we add the IP addresses here explicitly even though the kernel does that too implicitly when
+ * setting up the loopback device. The reason we do this here a second time (and possibly race against the
+ * kernel) is that we want to synchronously wait until the IP addresses are set up correctly, see
+ *
+ * https://github.com/systemd/systemd/issues/5641 */
+
+ r = add_ipv4_address(rtnl, &state_4);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enqueue IPv4 loopback address add request: %m");
+
+ r = add_ipv6_address(rtnl, &state_6);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enqueue IPv6 loopback address add request: %m");
+
+ r = start_loopback(rtnl, &state_up);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enqueue loopback interface start request: %m");
+
+ while (state_4.n_messages + state_6.n_messages + state_up.n_messages > 0) {
+ r = sd_netlink_wait(rtnl, LOOPBACK_SETUP_TIMEOUT_USEC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for netlink event: %m");
+
+ r = sd_netlink_process(rtnl, NULL);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to process netlink event: %m");
+ }
+
+ /* Note that we don't really care whether the addresses could be added or not */
+ if (state_up.rcode != 0) {
+ /* If we lack the permissions to configure the loopback device,
+ * but we find it to be already configured, let's exit cleanly,
+ * in order to supported unprivileged containers. */
+ if (state_up.rcode == -EPERM && check_loopback(rtnl))
+ return 0;
+
+ return log_warning_errno(state_up.rcode, "Failed to configure loopback device: %m");
+ }
+
+ return 0;
+}
diff --git a/src/core/loopback-setup.h b/src/core/loopback-setup.h
new file mode 100644
index 0000000..c0eea10
--- /dev/null
+++ b/src/core/loopback-setup.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int loopback_setup(void);
diff --git a/src/core/machine-id-setup.c b/src/core/machine-id-setup.c
new file mode 100644
index 0000000..aae5480
--- /dev/null
+++ b/src/core/machine-id-setup.c
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "id128-util.h"
+#include "log.h"
+#include "machine-id-setup.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "umask-util.h"
+#include "util.h"
+#include "virt.h"
+
+static int generate_machine_id(const char *root, sd_id128_t *ret) {
+ const char *dbus_machine_id;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(ret);
+
+ /* First, try reading the D-Bus machine id, unless it is a symlink */
+ dbus_machine_id = prefix_roota(root, "/var/lib/dbus/machine-id");
+ fd = open(dbus_machine_id, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd >= 0) {
+ if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0) {
+ log_info("Initializing machine ID from D-Bus machine ID.");
+ return 0;
+ }
+
+ fd = safe_close(fd);
+ }
+
+ if (isempty(root)) {
+ /* If that didn't work, see if we are running in a container,
+ * and a machine ID was passed in via $container_uuid the way
+ * libvirt/LXC does it */
+
+ if (detect_container() > 0) {
+ _cleanup_free_ char *e = NULL;
+
+ if (getenv_for_pid(1, "container_uuid", &e) > 0 &&
+ sd_id128_from_string(e, ret) >= 0) {
+ log_info("Initializing machine ID from container UUID.");
+ return 0;
+ }
+
+ } else if (detect_vm() == VIRTUALIZATION_KVM) {
+
+ /* If we are not running in a container, see if we are
+ * running in qemu/kvm and a machine ID was passed in
+ * via -uuid on the qemu/kvm command line */
+
+ if (id128_read("/sys/class/dmi/id/product_uuid", ID128_UUID, ret) >= 0) {
+ log_info("Initializing machine ID from KVM UUID.");
+ return 0;
+ }
+ }
+ }
+
+ /* If that didn't work, generate a random machine id */
+ r = sd_id128_randomize(ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate randomized machine ID: %m");
+
+ log_info("Initializing machine ID from random generator.");
+ return 0;
+}
+
+int machine_id_setup(const char *root, sd_id128_t machine_id, sd_id128_t *ret) {
+ const char *etc_machine_id, *run_machine_id;
+ _cleanup_close_ int fd = -1;
+ bool writable;
+ int r;
+
+ etc_machine_id = prefix_roota(root, "/etc/machine-id");
+
+ RUN_WITH_UMASK(0000) {
+ /* We create this 0444, to indicate that this isn't really
+ * something you should ever modify. Of course, since the file
+ * will be owned by root it doesn't matter much, but maybe
+ * people look. */
+
+ (void) mkdir_parents(etc_machine_id, 0755);
+ fd = open(etc_machine_id, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444);
+ if (fd < 0) {
+ int old_errno = errno;
+
+ fd = open(etc_machine_id, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0) {
+ if (old_errno == EROFS && errno == ENOENT)
+ return log_error_errno(errno,
+ "System cannot boot: Missing /etc/machine-id and /etc is mounted read-only.\n"
+ "Booting up is supported only when:\n"
+ "1) /etc/machine-id exists and is populated.\n"
+ "2) /etc/machine-id exists and is empty.\n"
+ "3) /etc/machine-id is missing and /etc is writable.\n");
+ else
+ return log_error_errno(errno, "Cannot open %s: %m", etc_machine_id);
+ }
+
+ writable = false;
+ } else
+ writable = true;
+ }
+
+ /* A we got a valid machine ID argument, that's what counts */
+ if (sd_id128_is_null(machine_id)) {
+
+ /* Try to read any existing machine ID */
+ if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0)
+ return 0;
+
+ /* Hmm, so, the id currently stored is not useful, then let's generate one */
+ r = generate_machine_id(root, &machine_id);
+ if (r < 0)
+ return r;
+ }
+
+ if (writable) {
+ if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
+ return log_error_errno(errno, "Failed to seek %s: %m", etc_machine_id);
+
+ if (ftruncate(fd, 0) < 0)
+ return log_error_errno(errno, "Failed to truncate %s: %m", etc_machine_id);
+
+ if (id128_write_fd(fd, ID128_PLAIN, machine_id, true) >= 0)
+ goto finish;
+ }
+
+ fd = safe_close(fd);
+
+ /* Hmm, we couldn't write it? So let's write it to /run/machine-id as a replacement */
+
+ run_machine_id = prefix_roota(root, "/run/machine-id");
+
+ RUN_WITH_UMASK(0022)
+ r = id128_write(run_machine_id, ID128_PLAIN, machine_id, false);
+ if (r < 0) {
+ (void) unlink(run_machine_id);
+ return log_error_errno(r, "Cannot write %s: %m", run_machine_id);
+ }
+
+ /* And now, let's mount it over */
+ if (mount(run_machine_id, etc_machine_id, NULL, MS_BIND, NULL) < 0) {
+ (void) unlink_noerrno(run_machine_id);
+ return log_error_errno(errno, "Failed to mount %s: %m", etc_machine_id);
+ }
+
+ log_info("Installed transient %s file.", etc_machine_id);
+
+ /* Mark the mount read-only */
+ if (mount(NULL, etc_machine_id, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL) < 0)
+ log_warning_errno(errno, "Failed to make transient %s read-only, ignoring: %m", etc_machine_id);
+
+finish:
+ if (ret)
+ *ret = machine_id;
+
+ return 0;
+}
+
+int machine_id_commit(const char *root) {
+ _cleanup_close_ int fd = -1, initial_mntns_fd = -1;
+ const char *etc_machine_id;
+ sd_id128_t id;
+ int r;
+
+ /* Replaces a tmpfs bind mount of /etc/machine-id by a proper file, atomically. For this, the umount is removed
+ * in a mount namespace, a new file is created at the right place. Afterwards the mount is also removed in the
+ * original mount namespace, thus revealing the file that was just created. */
+
+ etc_machine_id = prefix_roota(root, "/etc/machine-id");
+
+ r = path_is_mount_point(etc_machine_id, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", etc_machine_id);
+ if (r == 0) {
+ log_debug("%s is not a mount point. Nothing to do.", etc_machine_id);
+ return 0;
+ }
+
+ /* Read existing machine-id */
+ fd = open(etc_machine_id, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_error_errno(errno, "Cannot open %s: %m", etc_machine_id);
+
+ r = fd_is_temporary_fs(fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether %s is on a temporary file system: %m", etc_machine_id);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EROFS),
+ "%s is not on a temporary file system.",
+ etc_machine_id);
+
+ r = id128_read_fd(fd, ID128_PLAIN, &id);
+ if (r < 0)
+ return log_error_errno(r, "We didn't find a valid machine ID in %s: %m", etc_machine_id);
+
+ fd = safe_close(fd);
+
+ /* Store current mount namespace */
+ r = namespace_open(0, NULL, &initial_mntns_fd, NULL, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Can't fetch current mount namespace: %m");
+
+ /* Switch to a new mount namespace, isolate ourself and unmount etc_machine_id in our new namespace */
+ if (unshare(CLONE_NEWNS) < 0)
+ return log_error_errno(errno, "Failed to enter new namespace: %m");
+
+ if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
+ return log_error_errno(errno, "Couldn't make-rslave / mountpoint in our private namespace: %m");
+
+ if (umount(etc_machine_id) < 0)
+ return log_error_errno(errno, "Failed to unmount transient %s file in our private namespace: %m", etc_machine_id);
+
+ /* Update a persistent version of etc_machine_id */
+ r = id128_write(etc_machine_id, ID128_PLAIN, id, true);
+ if (r < 0)
+ return log_error_errno(r, "Cannot write %s. This is mandatory to get a persistent machine ID: %m", etc_machine_id);
+
+ /* Return to initial namespace and proceed a lazy tmpfs unmount */
+ r = namespace_enter(-1, initial_mntns_fd, -1, -1, -1);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to switch back to initial mount namespace: %m.\nWe'll keep transient %s file until next reboot.", etc_machine_id);
+
+ if (umount2(etc_machine_id, MNT_DETACH) < 0)
+ return log_warning_errno(errno, "Failed to unmount transient %s file: %m.\nWe keep that mount until next reboot.", etc_machine_id);
+
+ return 0;
+}
diff --git a/src/core/machine-id-setup.h b/src/core/machine-id-setup.h
new file mode 100644
index 0000000..d6ac62a
--- /dev/null
+++ b/src/core/machine-id-setup.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int machine_id_commit(const char *root);
+int machine_id_setup(const char *root, sd_id128_t requested, sd_id128_t *ret);
diff --git a/src/core/macros.systemd.in b/src/core/macros.systemd.in
new file mode 100644
index 0000000..d9e7f57
--- /dev/null
+++ b/src/core/macros.systemd.in
@@ -0,0 +1,145 @@
+# -*- Mode: rpm-spec; indent-tabs-mode: nil -*- */
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+
+# RPM macros for packages installing systemd unit files
+
+%_unitdir @systemunitdir@
+%_userunitdir @userunitdir@
+%_presetdir @systempresetdir@
+%_userpresetdir @userpresetdir@
+%_udevhwdbdir @udevhwdbdir@
+%_udevrulesdir @udevrulesdir@
+%_journalcatalogdir @catalogdir@
+%_binfmtdir @binfmtdir@
+%_sysctldir @sysctldir@
+%_sysusersdir @sysusersdir@
+%_tmpfilesdir @tmpfilesdir@
+%_environmentdir @environmentdir@
+%_modulesloaddir @modulesloaddir@
+%_modprobedir @modprobedir@
+%_systemdgeneratordir @systemgeneratordir@
+%_systemdusergeneratordir @usergeneratordir@
+%_systemd_system_env_generator_dir @systemenvgeneratordir@
+%_systemd_user_env_generator_dir @userenvgeneratordir@
+
+# Because we had one release with a typo...
+# This is temporary (Remove after systemd 240 is released)
+%_environmnentdir %{warn:Use %%_environmentdir instead}%_environmentdir
+
+%systemd_requires \
+Requires(post): systemd \
+Requires(preun): systemd \
+Requires(postun): systemd \
+%{nil}
+
+%systemd_ordering \
+OrderWithRequires(post): systemd \
+OrderWithRequires(preun): systemd \
+OrderWithRequires(postun): systemd \
+%{nil}
+
+%systemd_post() \
+if [ $1 -eq 1 ] ; then \
+ # Initial installation \
+ systemctl --no-reload preset %{?*} >/dev/null 2>&1 || : \
+fi \
+%{nil}
+
+%systemd_user_post() %{expand:%systemd_post \\--global %%{?*}}
+
+%systemd_preun() \
+if [ $1 -eq 0 ] ; then \
+ # Package removal, not upgrade \
+ systemctl --no-reload disable --now %{?*} >/dev/null 2>&1 || : \
+fi \
+%{nil}
+
+%systemd_user_preun() \
+if [ $1 -eq 0 ] ; then \
+ # Package removal, not upgrade \
+ systemctl --global disable %{?*} >/dev/null 2>&1 || : \
+fi \
+%{nil}
+
+%systemd_postun() %{nil}
+
+%systemd_user_postun() %{nil}
+
+%systemd_postun_with_restart() \
+if [ $1 -ge 1 ] ; then \
+ # Package upgrade, not uninstall \
+ systemctl try-restart %{?*} >/dev/null 2>&1 || : \
+fi \
+%{nil}
+
+%systemd_user_postun_with_restart() %{nil}
+
+%udev_hwdb_update() %{nil}
+
+%udev_rules_update() %{nil}
+
+%journal_catalog_update() %{nil}
+
+# Deprecated. Use %tmpfiles_create_package instead
+%tmpfiles_create() \
+systemd-tmpfiles --create %{?*} >/dev/null 2>&1 || : \
+%{nil}
+
+# Deprecated. Use %sysusers_create_package instead
+%sysusers_create() \
+systemd-sysusers %{?*} >/dev/null 2>&1 || : \
+%{nil}
+
+%sysusers_create_inline() \
+systemd-sysusers - <<SYSTEMD_INLINE_EOF >/dev/null 2>&1 || : \
+%{?*} \
+SYSTEMD_INLINE_EOF \
+%{nil}
+
+# This should be used by package installation scripts which require users or
+# groups to be present before the files installed by the package are present on
+# disk (for example because some files are owned by those users or groups).
+#
+# Example:
+# Source1: %{name}-sysusers.conf
+# ...
+# %install
+# install -D %SOURCE1 %{buildroot}%{_sysusersdir}/%{name}.conf
+# %pre
+# %sysusers_create_package %{name} %SOURCE1
+# %files
+# %{_sysusersdir}/%{name}.conf
+%sysusers_create_package() \
+systemd-sysusers --replace=%_sysusersdir/%1.conf - <<SYSTEMD_INLINE_EOF >/dev/null 2>&1 || : \
+%(cat %2) \
+SYSTEMD_INLINE_EOF \
+%{nil}
+
+# This may be used by package installation scripts to create files according to
+# their tmpfiles configuration from a package installation script, even before
+# the files of that package are installed on disk.
+#
+# Example:
+# Source1: %{name}-tmpfiles.conf
+# ...
+# %install
+# install -D %SOURCE1 %{buildroot}%{_tmpfilesdir}/%{name}.conf
+# %pre
+# %tmpfiles_create_package %{name} %SOURCE1
+# %files
+# %{_tmpfilesdir}/%{name}.conf
+%tmpfiles_create_package() \
+systemd-tmpfiles --replace=%_tmpfilesdir/%1.conf --create - <<SYSTEMD_INLINE_EOF >/dev/null 2>&1 || : \
+%(cat %2) \
+SYSTEMD_INLINE_EOF \
+%{nil}
+
+%sysctl_apply() \
+@rootlibexecdir@/systemd-sysctl %{?*} >/dev/null 2>&1 || : \
+%{nil}
+
+%binfmt_apply() \
+@rootlibexecdir@/systemd-binfmt %{?*} >/dev/null 2>&1 || : \
+%{nil}
diff --git a/src/core/main.c b/src/core/main.c
new file mode 100644
index 0000000..561f956
--- /dev/null
+++ b/src/core/main.c
@@ -0,0 +1,2690 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/reboot.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#if HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "build.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "capability-util.h"
+#include "cgroup-util.h"
+#include "clock-util.h"
+#include "conf-parser.h"
+#include "cpu-set-util.h"
+#include "dbus.h"
+#include "dbus-manager.h"
+#include "def.h"
+#include "emergency-action.h"
+#include "env-util.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fdset.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hostname-setup.h"
+#include "ima-setup.h"
+#include "killall.h"
+#include "kmod-setup.h"
+#include "load-fragment.h"
+#include "log.h"
+#include "loopback-setup.h"
+#include "machine-id-setup.h"
+#include "manager.h"
+#include "missing.h"
+#include "mount-setup.h"
+#include "os-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "rlimit-util.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "selinux-setup.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "smack-setup.h"
+#include "special.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "switch-root.h"
+#include "sysctl-util.h"
+#include "terminal-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+#include "watchdog.h"
+
+#if HAS_FEATURE_ADDRESS_SANITIZER
+#include <sanitizer/lsan_interface.h>
+#endif
+
+static enum {
+ ACTION_RUN,
+ ACTION_HELP,
+ ACTION_VERSION,
+ ACTION_TEST,
+ ACTION_DUMP_CONFIGURATION_ITEMS,
+ ACTION_DUMP_BUS_PROPERTIES,
+} arg_action = ACTION_RUN;
+static char *arg_default_unit = NULL;
+static bool arg_system = false;
+static bool arg_dump_core = true;
+static int arg_crash_chvt = -1;
+static bool arg_crash_shell = false;
+static bool arg_crash_reboot = false;
+static char *arg_confirm_spawn = NULL;
+static ShowStatus arg_show_status = _SHOW_STATUS_INVALID;
+static bool arg_switched_root = false;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_service_watchdogs = true;
+static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
+static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
+static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
+static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
+static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
+static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
+static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
+static usec_t arg_runtime_watchdog = 0;
+static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
+static char *arg_early_core_pattern = NULL;
+static char *arg_watchdog_device = NULL;
+static char **arg_default_environment = NULL;
+static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
+static uint64_t arg_capability_bounding_set = CAP_ALL;
+static bool arg_no_new_privs = false;
+static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
+static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
+static Set* arg_syscall_archs = NULL;
+static FILE* arg_serialization = NULL;
+static int arg_default_cpu_accounting = -1;
+static bool arg_default_io_accounting = false;
+static bool arg_default_ip_accounting = false;
+static bool arg_default_blockio_accounting = false;
+static bool arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
+static bool arg_default_tasks_accounting = true;
+static uint64_t arg_default_tasks_max = UINT64_MAX;
+static sd_id128_t arg_machine_id = {};
+static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
+
+_noreturn_ static void freeze_or_exit_or_reboot(void) {
+
+ /* If we are running in a contianer, let's prefer exiting, after all we can propagate an exit code to the
+ * container manager, and thus inform it that something went wrong. */
+ if (detect_container() > 0) {
+ log_emergency("Exiting PID 1...");
+ exit(EXIT_EXCEPTION);
+ }
+
+ if (arg_crash_reboot) {
+ log_notice("Rebooting in 10s...");
+ (void) sleep(10);
+
+ log_notice("Rebooting now...");
+ (void) reboot(RB_AUTOBOOT);
+ log_emergency_errno(errno, "Failed to reboot: %m");
+ }
+
+ log_emergency("Freezing execution.");
+ freeze();
+}
+
+_noreturn_ static void crash(int sig) {
+ struct sigaction sa;
+ pid_t pid;
+
+ if (getpid_cached() != 1)
+ /* Pass this on immediately, if this is not PID 1 */
+ (void) raise(sig);
+ else if (!arg_dump_core)
+ log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
+ else {
+ sa = (struct sigaction) {
+ .sa_handler = nop_signal_handler,
+ .sa_flags = SA_NOCLDSTOP|SA_RESTART,
+ };
+
+ /* We want to wait for the core process, hence let's enable SIGCHLD */
+ (void) sigaction(SIGCHLD, &sa, NULL);
+
+ pid = raw_clone(SIGCHLD);
+ if (pid < 0)
+ log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
+ else if (pid == 0) {
+ /* Enable default signal handler for core dump */
+
+ sa = (struct sigaction) {
+ .sa_handler = SIG_DFL,
+ };
+ (void) sigaction(sig, &sa, NULL);
+
+ /* Don't limit the coredump size */
+ (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
+
+ /* Just to be sure... */
+ (void) chdir("/");
+
+ /* Raise the signal again */
+ pid = raw_getpid();
+ (void) kill(pid, sig); /* raise() would kill the parent */
+
+ assert_not_reached("We shouldn't be here...");
+ _exit(EXIT_EXCEPTION);
+ } else {
+ siginfo_t status;
+ int r;
+
+ /* Order things nicely. */
+ r = wait_for_terminate(pid, &status);
+ if (r < 0)
+ log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
+ else if (status.si_code != CLD_DUMPED)
+ log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
+ signal_to_string(sig),
+ pid, sigchld_code_to_string(status.si_code),
+ status.si_status,
+ strna(status.si_code == CLD_EXITED
+ ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL)
+ : signal_to_string(status.si_status)));
+ else
+ log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
+ }
+ }
+
+ if (arg_crash_chvt >= 0)
+ (void) chvt(arg_crash_chvt);
+
+ sa = (struct sigaction) {
+ .sa_handler = SIG_IGN,
+ .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
+ };
+
+ /* Let the kernel reap children for us */
+ (void) sigaction(SIGCHLD, &sa, NULL);
+
+ if (arg_crash_shell) {
+ log_notice("Executing crash shell in 10s...");
+ (void) sleep(10);
+
+ pid = raw_clone(SIGCHLD);
+ if (pid < 0)
+ log_emergency_errno(errno, "Failed to fork off crash shell: %m");
+ else if (pid == 0) {
+ (void) setsid();
+ (void) make_console_stdio();
+ (void) rlimit_nofile_safe();
+ (void) execle("/bin/sh", "/bin/sh", NULL, environ);
+
+ log_emergency_errno(errno, "execle() failed: %m");
+ _exit(EXIT_EXCEPTION);
+ } else {
+ log_info("Spawned crash shell as PID "PID_FMT".", pid);
+ (void) wait_for_terminate(pid, NULL);
+ }
+ }
+
+ freeze_or_exit_or_reboot();
+}
+
+static void install_crash_handler(void) {
+ static const struct sigaction sa = {
+ .sa_handler = crash,
+ .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
+ };
+ int r;
+
+ /* We ignore the return value here, since, we don't mind if we
+ * cannot set up a crash handler */
+ r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
+ if (r < 0)
+ log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
+}
+
+static int console_setup(void) {
+ _cleanup_close_ int tty_fd = -1;
+ int r;
+
+ tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (tty_fd < 0)
+ return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
+
+ /* We don't want to force text mode. plymouth may be showing
+ * pictures already from initrd. */
+ r = reset_terminal_fd(tty_fd, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reset /dev/console: %m");
+
+ return 0;
+}
+
+static int parse_crash_chvt(const char *value) {
+ int b;
+
+ if (safe_atoi(value, &arg_crash_chvt) >= 0)
+ return 0;
+
+ b = parse_boolean(value);
+ if (b < 0)
+ return b;
+
+ if (b > 0)
+ arg_crash_chvt = 0; /* switch to where kmsg goes */
+ else
+ arg_crash_chvt = -1; /* turn off switching */
+
+ return 0;
+}
+
+static int parse_confirm_spawn(const char *value, char **console) {
+ char *s;
+ int r;
+
+ r = value ? parse_boolean(value) : 1;
+ if (r == 0) {
+ *console = NULL;
+ return 0;
+ }
+
+ if (r > 0) /* on with default tty */
+ s = strdup("/dev/console");
+ else if (is_path(value)) /* on with fully qualified path */
+ s = strdup(value);
+ else /* on with only a tty file name, not a fully qualified path */
+ s = strjoin("/dev/", value);
+ if (!s)
+ return -ENOMEM;
+
+ *console = s;
+ return 0;
+}
+
+static int set_machine_id(const char *m) {
+ sd_id128_t t;
+ assert(m);
+
+ if (sd_id128_from_string(m, &t) < 0)
+ return -EINVAL;
+
+ if (sd_id128_is_null(t))
+ return -EINVAL;
+
+ arg_machine_id = t;
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+
+ int r;
+
+ assert(key);
+
+ if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
+ log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
+ else if (in_initrd() == !!startswith(key, "rd.")) {
+ if (free_and_strdup(&arg_default_unit, value) < 0)
+ return log_oom();
+ }
+
+ } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
+ else
+ arg_dump_core = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (path_is_absolute(value))
+ (void) parse_path_argument_and_warn(value, false, &arg_early_core_pattern);
+ else
+ log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
+
+ if (!value)
+ arg_crash_chvt = 0; /* turn on */
+ else {
+ r = parse_crash_chvt(value);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
+ }
+
+ } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
+ else
+ arg_crash_shell = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
+ else
+ arg_crash_reboot = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
+ char *s;
+
+ r = parse_confirm_spawn(value, &s);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
+ else
+ free_and_replace(arg_confirm_spawn, s);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
+ else
+ arg_service_watchdogs = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
+
+ if (value) {
+ r = parse_show_status(value, &arg_show_status);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
+ } else
+ arg_show_status = SHOW_STATUS_YES;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = exec_output_from_string(value);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
+ else
+ arg_default_std_output = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = exec_output_from_string(value);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
+ else
+ arg_default_std_error = r;
+
+ } else if (streq(key, "systemd.setenv")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (env_assignment_is_valid(value)) {
+ char **env;
+
+ env = strv_env_set(arg_default_environment, value);
+ if (!env)
+ return log_oom();
+
+ arg_default_environment = env;
+ } else
+ log_warning("Environment variable name '%s' is not valid. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = set_machine_id(value);
+ if (r < 0)
+ log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = parse_sec(value, &arg_default_timeout_start_usec);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
+
+ if (arg_default_timeout_start_usec <= 0)
+ arg_default_timeout_start_usec = USEC_INFINITY;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ (void) parse_path_argument_and_warn(value, false, &arg_watchdog_device);
+
+ } else if (streq(key, "quiet") && !value) {
+
+ if (arg_show_status == _SHOW_STATUS_INVALID)
+ arg_show_status = SHOW_STATUS_AUTO;
+
+ } else if (streq(key, "debug") && !value) {
+
+ /* Note that log_parse_environment() handles 'debug'
+ * too, and sets the log level to LOG_DEBUG. */
+
+ if (detect_container() > 0)
+ log_set_target(LOG_TARGET_CONSOLE);
+
+ } else if (!value) {
+ const char *target;
+
+ /* SysV compatibility */
+ target = runlevel_to_target(key);
+ if (target)
+ return free_and_strdup(&arg_default_unit, target);
+ }
+
+ return 0;
+}
+
+#define DEFINE_SETTER(name, func, descr) \
+ static int name(const char *unit, \
+ const char *filename, \
+ unsigned line, \
+ const char *section, \
+ unsigned section_line, \
+ const char *lvalue, \
+ int ltype, \
+ const char *rvalue, \
+ void *data, \
+ void *userdata) { \
+ \
+ int r; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ \
+ r = func(rvalue); \
+ if (r < 0) \
+ log_syntax(unit, LOG_ERR, filename, line, r, \
+ "Invalid " descr "'%s': %m", \
+ rvalue); \
+ \
+ return 0; \
+ }
+
+DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level");
+DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target");
+DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" );
+DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location");
+
+static int config_parse_cpu_affinity2(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_cpu_free_ cpu_set_t *c = NULL;
+ int ncpus;
+
+ ncpus = parse_cpu_set_and_warn(rvalue, &c, unit, filename, line, lvalue);
+ if (ncpus < 0)
+ return ncpus;
+
+ if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
+ log_warning_errno(errno, "Failed to set CPU affinity: %m");
+
+ return 0;
+}
+
+static int config_parse_show_status(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int k;
+ ShowStatus *b = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ k = parse_show_status(rvalue, b);
+ if (k < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, k, "Failed to parse show status setting, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+static int config_parse_output_restricted(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecOutput t, *eo = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ t = exec_output_from_string(rvalue);
+ if (t < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output type, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file:, append: are not supported as defaults, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *eo = t;
+ return 0;
+}
+
+static int config_parse_crash_chvt(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = parse_crash_chvt(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CrashChangeVT= setting, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+static int parse_config_file(void) {
+
+ const ConfigTableItem items[] = {
+ { "Manager", "LogLevel", config_parse_level2, 0, NULL },
+ { "Manager", "LogTarget", config_parse_target, 0, NULL },
+ { "Manager", "LogColor", config_parse_color, 0, NULL },
+ { "Manager", "LogLocation", config_parse_location, 0, NULL },
+ { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
+ { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, NULL },
+ { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, NULL },
+ { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
+ { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
+ { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
+ { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL },
+ { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
+ { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
+ { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog },
+ { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
+ { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
+ { "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
+#if HAVE_SECCOMP
+ { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
+#endif
+ { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
+ { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
+ { "Manager", "DefaultStandardOutput", config_parse_output_restricted,0, &arg_default_std_output },
+ { "Manager", "DefaultStandardError", config_parse_output_restricted,0, &arg_default_std_error },
+ { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
+ { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
+ { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
+ { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
+ { "Manager", "DefaultStartLimitIntervalSec",config_parse_sec, 0, &arg_default_start_limit_interval },
+ { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
+ { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
+ { "Manager", "DefaultLimitCPU", config_parse_rlimit, RLIMIT_CPU, arg_default_rlimit },
+ { "Manager", "DefaultLimitFSIZE", config_parse_rlimit, RLIMIT_FSIZE, arg_default_rlimit },
+ { "Manager", "DefaultLimitDATA", config_parse_rlimit, RLIMIT_DATA, arg_default_rlimit },
+ { "Manager", "DefaultLimitSTACK", config_parse_rlimit, RLIMIT_STACK, arg_default_rlimit },
+ { "Manager", "DefaultLimitCORE", config_parse_rlimit, RLIMIT_CORE, arg_default_rlimit },
+ { "Manager", "DefaultLimitRSS", config_parse_rlimit, RLIMIT_RSS, arg_default_rlimit },
+ { "Manager", "DefaultLimitNOFILE", config_parse_rlimit, RLIMIT_NOFILE, arg_default_rlimit },
+ { "Manager", "DefaultLimitAS", config_parse_rlimit, RLIMIT_AS, arg_default_rlimit },
+ { "Manager", "DefaultLimitNPROC", config_parse_rlimit, RLIMIT_NPROC, arg_default_rlimit },
+ { "Manager", "DefaultLimitMEMLOCK", config_parse_rlimit, RLIMIT_MEMLOCK, arg_default_rlimit },
+ { "Manager", "DefaultLimitLOCKS", config_parse_rlimit, RLIMIT_LOCKS, arg_default_rlimit },
+ { "Manager", "DefaultLimitSIGPENDING", config_parse_rlimit, RLIMIT_SIGPENDING, arg_default_rlimit },
+ { "Manager", "DefaultLimitMSGQUEUE", config_parse_rlimit, RLIMIT_MSGQUEUE, arg_default_rlimit },
+ { "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit },
+ { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit },
+ { "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit },
+ { "Manager", "DefaultCPUAccounting", config_parse_tristate, 0, &arg_default_cpu_accounting },
+ { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
+ { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
+ { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
+ { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
+ { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
+ { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
+ { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
+ {}
+ };
+
+ const char *fn, *conf_dirs_nulstr;
+
+ fn = arg_system ?
+ PKGSYSCONFDIR "/system.conf" :
+ PKGSYSCONFDIR "/user.conf";
+
+ conf_dirs_nulstr = arg_system ?
+ CONF_PATHS_NULSTR("systemd/system.conf.d") :
+ CONF_PATHS_NULSTR("systemd/user.conf.d");
+
+ (void) config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, CONFIG_PARSE_WARN, NULL);
+
+ /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
+ * like everywhere else. */
+ if (arg_default_timeout_start_usec <= 0)
+ arg_default_timeout_start_usec = USEC_INFINITY;
+ if (arg_default_timeout_stop_usec <= 0)
+ arg_default_timeout_stop_usec = USEC_INFINITY;
+
+ return 0;
+}
+
+static void set_manager_defaults(Manager *m) {
+
+ assert(m);
+
+ /* Propagates the various default unit property settings into the manager object, i.e. properties that do not
+ * affect the manager itself, but are just what newly allocated units will have set if they haven't set
+ * anything else. (Also see set_manager_settings() for the settings that affect the manager's own behaviour) */
+
+ m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
+ m->default_std_output = arg_default_std_output;
+ m->default_std_error = arg_default_std_error;
+ m->default_timeout_start_usec = arg_default_timeout_start_usec;
+ m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
+ m->default_restart_usec = arg_default_restart_usec;
+ m->default_start_limit_interval = arg_default_start_limit_interval;
+ m->default_start_limit_burst = arg_default_start_limit_burst;
+
+ /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
+ * controller to be enabled, so the default is to enable it unless we got told otherwise. */
+ if (arg_default_cpu_accounting >= 0)
+ m->default_cpu_accounting = arg_default_cpu_accounting;
+ else
+ m->default_cpu_accounting = cpu_accounting_is_cheap();
+
+ m->default_io_accounting = arg_default_io_accounting;
+ m->default_ip_accounting = arg_default_ip_accounting;
+ m->default_blockio_accounting = arg_default_blockio_accounting;
+ m->default_memory_accounting = arg_default_memory_accounting;
+ m->default_tasks_accounting = arg_default_tasks_accounting;
+ m->default_tasks_max = arg_default_tasks_max;
+
+ (void) manager_set_default_rlimits(m, arg_default_rlimit);
+
+ (void) manager_default_environment(m);
+ (void) manager_transient_environment_add(m, arg_default_environment);
+}
+
+static void set_manager_settings(Manager *m) {
+
+ assert(m);
+
+ /* Propagates the various manager settings into the manager object, i.e. properties that effect the manager
+ * itself (as opposed to just being inherited into newly allocated units, see set_manager_defaults() above). */
+
+ m->confirm_spawn = arg_confirm_spawn;
+ m->service_watchdogs = arg_service_watchdogs;
+ m->runtime_watchdog = arg_runtime_watchdog;
+ m->shutdown_watchdog = arg_shutdown_watchdog;
+ m->cad_burst_action = arg_cad_burst_action;
+
+ manager_set_show_status(m, arg_show_status);
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_LOG_LEVEL = 0x100,
+ ARG_LOG_TARGET,
+ ARG_LOG_COLOR,
+ ARG_LOG_LOCATION,
+ ARG_UNIT,
+ ARG_SYSTEM,
+ ARG_USER,
+ ARG_TEST,
+ ARG_NO_PAGER,
+ ARG_VERSION,
+ ARG_DUMP_CONFIGURATION_ITEMS,
+ ARG_DUMP_BUS_PROPERTIES,
+ ARG_DUMP_CORE,
+ ARG_CRASH_CHVT,
+ ARG_CRASH_SHELL,
+ ARG_CRASH_REBOOT,
+ ARG_CONFIRM_SPAWN,
+ ARG_SHOW_STATUS,
+ ARG_DESERIALIZE,
+ ARG_SWITCHED_ROOT,
+ ARG_DEFAULT_STD_OUTPUT,
+ ARG_DEFAULT_STD_ERROR,
+ ARG_MACHINE_ID,
+ ARG_SERVICE_WATCHDOGS,
+ };
+
+ static const struct option options[] = {
+ { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
+ { "log-target", required_argument, NULL, ARG_LOG_TARGET },
+ { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
+ { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
+ { "unit", required_argument, NULL, ARG_UNIT },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "user", no_argument, NULL, ARG_USER },
+ { "test", no_argument, NULL, ARG_TEST },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
+ { "dump-bus-properties", no_argument, NULL, ARG_DUMP_BUS_PROPERTIES },
+ { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
+ { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
+ { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
+ { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
+ { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
+ { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
+ { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
+ { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
+ { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
+ { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
+ { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
+ { "service-watchdogs", required_argument, NULL, ARG_SERVICE_WATCHDOGS },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 1);
+ assert(argv);
+
+ if (getpid_cached() == 1)
+ opterr = 0;
+
+ while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case ARG_LOG_LEVEL:
+ r = log_set_max_level_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
+
+ break;
+
+ case ARG_LOG_TARGET:
+ r = log_set_target_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
+
+ break;
+
+ case ARG_LOG_COLOR:
+
+ if (optarg) {
+ r = log_show_color_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
+ optarg);
+ } else
+ log_show_color(true);
+
+ break;
+
+ case ARG_LOG_LOCATION:
+ if (optarg) {
+ r = log_show_location_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
+ optarg);
+ } else
+ log_show_location(true);
+
+ break;
+
+ case ARG_DEFAULT_STD_OUTPUT:
+ r = exec_output_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
+ optarg);
+ arg_default_std_output = r;
+ break;
+
+ case ARG_DEFAULT_STD_ERROR:
+ r = exec_output_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
+ optarg);
+ arg_default_std_error = r;
+ break;
+
+ case ARG_UNIT:
+ r = free_and_strdup(&arg_default_unit, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
+
+ break;
+
+ case ARG_SYSTEM:
+ arg_system = true;
+ break;
+
+ case ARG_USER:
+ arg_system = false;
+ break;
+
+ case ARG_TEST:
+ arg_action = ACTION_TEST;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_VERSION:
+ arg_action = ACTION_VERSION;
+ break;
+
+ case ARG_DUMP_CONFIGURATION_ITEMS:
+ arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
+ break;
+
+ case ARG_DUMP_BUS_PROPERTIES:
+ arg_action = ACTION_DUMP_BUS_PROPERTIES;
+ break;
+
+ case ARG_DUMP_CORE:
+ if (!optarg)
+ arg_dump_core = true;
+ else {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse dump core boolean: \"%s\": %m",
+ optarg);
+ arg_dump_core = r;
+ }
+ break;
+
+ case ARG_CRASH_CHVT:
+ r = parse_crash_chvt(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
+ optarg);
+ break;
+
+ case ARG_CRASH_SHELL:
+ if (!optarg)
+ arg_crash_shell = true;
+ else {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
+ optarg);
+ arg_crash_shell = r;
+ }
+ break;
+
+ case ARG_CRASH_REBOOT:
+ if (!optarg)
+ arg_crash_reboot = true;
+ else {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
+ optarg);
+ arg_crash_reboot = r;
+ }
+ break;
+
+ case ARG_CONFIRM_SPAWN:
+ arg_confirm_spawn = mfree(arg_confirm_spawn);
+
+ r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
+ optarg);
+ break;
+
+ case ARG_SERVICE_WATCHDOGS:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse service watchdogs boolean: \"%s\": %m",
+ optarg);
+ arg_service_watchdogs = r;
+ break;
+
+ case ARG_SHOW_STATUS:
+ if (optarg) {
+ r = parse_show_status(optarg, &arg_show_status);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
+ optarg);
+ } else
+ arg_show_status = SHOW_STATUS_YES;
+ break;
+
+ case ARG_DESERIALIZE: {
+ int fd;
+ FILE *f;
+
+ r = safe_atoi(optarg, &fd);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg);
+ if (fd < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid deserialize fd: %d",
+ fd);
+
+ (void) fd_cloexec(fd, true);
+
+ f = fdopen(fd, "r");
+ if (!f)
+ return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
+
+ safe_fclose(arg_serialization);
+ arg_serialization = f;
+
+ break;
+ }
+
+ case ARG_SWITCHED_ROOT:
+ arg_switched_root = true;
+ break;
+
+ case ARG_MACHINE_ID:
+ r = set_machine_id(optarg);
+ if (r < 0)
+ return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
+ break;
+
+ case 'h':
+ arg_action = ACTION_HELP;
+ break;
+
+ case 'D':
+ log_set_max_level(LOG_DEBUG);
+ break;
+
+ case 'b':
+ case 's':
+ case 'z':
+ /* Just to eat away the sysvinit kernel
+ * cmdline args without getopt() error
+ * messages that we'll parse in
+ * parse_proc_cmdline_word() or ignore. */
+
+ case '?':
+ if (getpid_cached() != 1)
+ return -EINVAL;
+ else
+ return 0;
+
+ default:
+ assert_not_reached("Unhandled option code.");
+ }
+
+ if (optind < argc && getpid_cached() != 1) {
+ /* Hmm, when we aren't run as init system
+ * let's complain about excess arguments */
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Excess arguments.");
+ }
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Starts up and maintains the system or user services.\n\n"
+ " -h --help Show this help\n"
+ " --version Show version\n"
+ " --test Determine startup sequence, dump it and exit\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --dump-configuration-items Dump understood unit configuration items\n"
+ " --dump-bus-properties Dump exposed bus properties\n"
+ " --unit=UNIT Set default unit\n"
+ " --system Run a system instance, even if PID != 1\n"
+ " --user Run a user instance\n"
+ " --dump-core[=BOOL] Dump core on crash\n"
+ " --crash-vt=NR Change to specified VT on crash\n"
+ " --crash-reboot[=BOOL] Reboot on crash\n"
+ " --crash-shell[=BOOL] Run shell on crash\n"
+ " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
+ " --show-status[=BOOL] Show status updates on the console during bootup\n"
+ " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
+ " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
+ " --log-color[=BOOL] Highlight important log messages\n"
+ " --log-location[=BOOL] Include code location in log messages\n"
+ " --default-standard-output= Set default standard output for services\n"
+ " --default-standard-error= Set default standard error output for services\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int prepare_reexecute(
+ Manager *m,
+ FILE **ret_f,
+ FDSet **ret_fds,
+ bool switching_root) {
+
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(m);
+ assert(ret_f);
+ assert(ret_fds);
+
+ r = manager_open_serialization(m, &f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create serialization file: %m");
+
+ /* Make sure nothing is really destructed when we shut down */
+ m->n_reloading++;
+ bus_manager_send_reloading(m, true);
+
+ fds = fdset_new();
+ if (!fds)
+ return log_oom();
+
+ r = manager_serialize(m, f, fds, switching_root);
+ if (r < 0)
+ return r;
+
+ if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
+ return log_error_errno(errno, "Failed to rewind serialization fd: %m");
+
+ r = fd_cloexec(fileno(f), false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
+
+ r = fdset_cloexec(fds, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
+
+ *ret_f = TAKE_PTR(f);
+ *ret_fds = TAKE_PTR(fds);
+
+ return 0;
+}
+
+static void bump_file_max_and_nr_open(void) {
+
+ /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
+ * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
+ * them and limiting them in another two layers of limits is unnecessary and just complicates things. This
+ * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
+ * hard) the only ones that really matter. */
+
+#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
+ _cleanup_free_ char *t = NULL;
+ int r;
+#endif
+
+#if BUMP_PROC_SYS_FS_FILE_MAX
+ /* I so wanted to use STRINGIFY(ULONG_MAX) here, but alas we can't as glibc/gcc define that as
+ * "(0x7fffffffffffffffL * 2UL + 1UL)". Seriously. 😢 */
+ if (asprintf(&t, "%lu\n", ULONG_MAX) < 0) {
+ log_oom();
+ return;
+ }
+
+ r = sysctl_write("fs/file-max", t);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m");
+#endif
+
+#if BUMP_PROC_SYS_FS_FILE_MAX && BUMP_PROC_SYS_FS_NR_OPEN
+ t = mfree(t);
+#endif
+
+#if BUMP_PROC_SYS_FS_NR_OPEN
+ int v = INT_MAX;
+
+ /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
+ * are. The expression by which the maximum is determined is dependent on the architecture, and is something we
+ * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
+ * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
+ * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
+ * APIs are kernel APIs, so what do can we do... 🤯 */
+
+ for (;;) {
+ int k;
+
+ v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
+ if (v < 1024) {
+ log_warning("Can't bump fs.nr_open, value too small.");
+ break;
+ }
+
+ k = read_nr_open();
+ if (k < 0) {
+ log_error_errno(k, "Failed to read fs.nr_open: %m");
+ break;
+ }
+ if (k >= v) { /* Already larger */
+ log_debug("Skipping bump, value is already larger.");
+ break;
+ }
+
+ if (asprintf(&t, "%i\n", v) < 0) {
+ log_oom();
+ return;
+ }
+
+ r = sysctl_write("fs/nr_open", t);
+ t = mfree(t);
+ if (r == -EINVAL) {
+ log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
+ v /= 2;
+ continue;
+ }
+ if (r < 0) {
+ log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m");
+ break;
+ }
+
+ log_debug("Successfully bumped fs.nr_open to %i", v);
+ break;
+ }
+#endif
+}
+
+static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
+ struct rlimit new_rlimit;
+ int r, nr;
+
+ assert(saved_rlimit);
+
+ /* Save the original RLIMIT_NOFILE so that we can reset it later when transitioning from the initrd to the main
+ * systemd or suchlike. */
+ if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
+ return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
+
+ /* Get the underlying absolute limit the kernel enforces */
+ nr = read_nr_open();
+
+ /* Make sure forked processes get limits based on the original kernel setting */
+ if (!arg_default_rlimit[RLIMIT_NOFILE]) {
+ struct rlimit *rl;
+
+ rl = newdup(struct rlimit, saved_rlimit, 1);
+ if (!rl)
+ return log_oom();
+
+ /* Bump the hard limit for system services to a substantially higher value. The default hard limit
+ * current kernels set is pretty low (4K), mostly for historical reasons. According to kernel
+ * developers, the fd handling in recent kernels has been optimized substantially enough, so that we
+ * can bump the limit now, without paying too high a price in memory or performance. Note however that
+ * we only bump the hard limit, not the soft limit. That's because select() works the way it works, and
+ * chokes on fds >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
+ * unexpecting programs that they get fds higher than what they can process using select(). By only
+ * bumping the hard limit but leaving the low limit as it is we avoid this pitfall: programs that are
+ * written by folks aware of the select() problem in mind (and thus use poll()/epoll instead of
+ * select(), the way everybody should) can explicitly opt into high fds by bumping their soft limit
+ * beyond 1024, to the hard limit we pass. */
+ if (arg_system)
+ rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
+
+ /* If for some reason we were invoked with a soft limit above 1024 (which should never
+ * happen!, but who knows what we get passed in from pam_limit when invoked as --user
+ * instance), then lower what we pass on to not confuse our children */
+ rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE);
+
+ arg_default_rlimit[RLIMIT_NOFILE] = rl;
+ }
+
+ /* Calculate the new limits to use for us. Never lower from what we inherited. */
+ new_rlimit = (struct rlimit) {
+ .rlim_cur = MAX((rlim_t) nr, saved_rlimit->rlim_cur),
+ .rlim_max = MAX((rlim_t) nr, saved_rlimit->rlim_max),
+ };
+
+ /* Shortcut if nothing changes. */
+ if (saved_rlimit->rlim_max >= new_rlimit.rlim_max &&
+ saved_rlimit->rlim_cur >= new_rlimit.rlim_cur) {
+ log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
+ return 0;
+ }
+
+ /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
+ * both hard and soft. */
+ r = setrlimit_closest(RLIMIT_NOFILE, &new_rlimit);
+ if (r < 0)
+ return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
+
+ return 0;
+}
+
+static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
+ struct rlimit new_rlimit;
+ int r;
+
+ assert(saved_rlimit);
+
+ /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should
+ * normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's bump
+ * the value high enough for our user. */
+
+ if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
+ return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
+
+ /* Pass the original value down to invoked processes */
+ if (!arg_default_rlimit[RLIMIT_MEMLOCK]) {
+ struct rlimit *rl;
+
+ rl = newdup(struct rlimit, saved_rlimit, 1);
+ if (!rl)
+ return log_oom();
+
+ arg_default_rlimit[RLIMIT_MEMLOCK] = rl;
+ }
+
+ /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
+ * must be unsigned, hence this is a given, but let's make this clear here. */
+ assert_cc(RLIM_INFINITY > 0);
+
+ new_rlimit = (struct rlimit) {
+ .rlim_cur = MAX(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_cur),
+ .rlim_max = MAX(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_max),
+ };
+
+ if (saved_rlimit->rlim_max >= new_rlimit.rlim_cur &&
+ saved_rlimit->rlim_cur >= new_rlimit.rlim_max) {
+ log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
+ return 0;
+ }
+
+ r = setrlimit_closest(RLIMIT_MEMLOCK, &new_rlimit);
+ if (r < 0)
+ return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
+
+ return 0;
+}
+
+static void test_usr(void) {
+
+ /* Check that /usr is not a separate fs */
+
+ if (dir_is_empty("/usr") <= 0)
+ return;
+
+ log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
+ "Some things will probably break (sometimes even silently) in mysterious ways. "
+ "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
+}
+
+static int enforce_syscall_archs(Set *archs) {
+#if HAVE_SECCOMP
+ int r;
+
+ if (!is_seccomp_available())
+ return 0;
+
+ r = seccomp_restrict_archs(arg_syscall_archs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
+#endif
+ return 0;
+}
+
+static int status_welcome(void) {
+ _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
+ int r;
+
+ if (IN_SET(arg_show_status, SHOW_STATUS_NO, SHOW_STATUS_AUTO))
+ return 0;
+
+ r = parse_os_release(NULL,
+ "PRETTY_NAME", &pretty_name,
+ "ANSI_COLOR", &ansi_color,
+ NULL);
+ if (r < 0)
+ log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to read os-release file, ignoring: %m");
+
+ if (log_get_show_color())
+ return status_printf(NULL, 0,
+ "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
+ isempty(ansi_color) ? "1" : ansi_color,
+ isempty(pretty_name) ? "Linux" : pretty_name);
+ else
+ return status_printf(NULL, 0,
+ "\nWelcome to %s!\n",
+ isempty(pretty_name) ? "Linux" : pretty_name);
+}
+
+static int write_container_id(void) {
+ const char *c;
+ int r;
+
+ c = getenv("container");
+ if (isempty(c))
+ return 0;
+
+ RUN_WITH_UMASK(0022)
+ r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
+
+ return 1;
+}
+
+static int bump_unix_max_dgram_qlen(void) {
+ _cleanup_free_ char *qlen = NULL;
+ unsigned long v;
+ int r;
+
+ /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set the value
+ * really really early during boot, so that it is actually applied to all our sockets, including the
+ * $NOTIFY_SOCKET one. */
+
+ r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
+ if (r < 0)
+ return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
+
+ r = safe_atolu(qlen, &v);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen);
+
+ if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
+ return 0;
+
+ r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
+ if (r < 0)
+ return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
+
+ return 1;
+}
+
+static int fixup_environment(void) {
+ _cleanup_free_ char *term = NULL;
+ const char *t;
+ int r;
+
+ /* Only fix up the environment when we are started as PID 1 */
+ if (getpid_cached() != 1)
+ return 0;
+
+ /* We expect the environment to be set correctly if run inside a container. */
+ if (detect_container() > 0)
+ return 0;
+
+ /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
+ * device used by the console. We try to make a better guess here since some consoles might not have support
+ * for color mode for example.
+ *
+ * However if TERM was configured through the kernel command line then leave it alone. */
+ r = proc_cmdline_get_key("TERM", 0, &term);
+ if (r < 0)
+ return r;
+
+ t = term ?: default_term_for_tty("/dev/console");
+
+ if (setenv("TERM", t, 1) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static void redirect_telinit(int argc, char *argv[]) {
+
+ /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
+
+#if HAVE_SYSV_COMPAT
+ if (getpid_cached() == 1)
+ return;
+
+ if (!strstr(program_invocation_short_name, "init"))
+ return;
+
+ execv(SYSTEMCTL_BINARY_PATH, argv);
+ log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
+ exit(EXIT_FAILURE);
+#endif
+}
+
+static int become_shutdown(
+ const char *shutdown_verb,
+ int retval) {
+
+ char log_level[DECIMAL_STR_MAX(int) + 1],
+ exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
+ timeout[DECIMAL_STR_MAX(usec_t) + 1];
+
+ const char* command_line[13] = {
+ SYSTEMD_SHUTDOWN_BINARY_PATH,
+ shutdown_verb,
+ "--timeout", timeout,
+ "--log-level", log_level,
+ "--log-target",
+ };
+
+ _cleanup_strv_free_ char **env_block = NULL;
+ size_t pos = 7;
+ int r;
+
+ assert(shutdown_verb);
+ assert(!command_line[pos]);
+ env_block = strv_copy(environ);
+
+ xsprintf(log_level, "%d", log_get_max_level());
+ xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
+
+ switch (log_get_target()) {
+
+ case LOG_TARGET_KMSG:
+ case LOG_TARGET_JOURNAL_OR_KMSG:
+ case LOG_TARGET_SYSLOG_OR_KMSG:
+ command_line[pos++] = "kmsg";
+ break;
+
+ case LOG_TARGET_NULL:
+ command_line[pos++] = "null";
+ break;
+
+ case LOG_TARGET_CONSOLE:
+ default:
+ command_line[pos++] = "console";
+ break;
+ };
+
+ if (log_get_show_color())
+ command_line[pos++] = "--log-color";
+
+ if (log_get_show_location())
+ command_line[pos++] = "--log-location";
+
+ if (streq(shutdown_verb, "exit")) {
+ command_line[pos++] = "--exit-code";
+ command_line[pos++] = exit_code;
+ xsprintf(exit_code, "%d", retval);
+ }
+
+ assert(pos < ELEMENTSOF(command_line));
+
+ if (streq(shutdown_verb, "reboot") &&
+ arg_shutdown_watchdog > 0 &&
+ arg_shutdown_watchdog != USEC_INFINITY) {
+
+ char *e;
+
+ /* If we reboot let's set the shutdown
+ * watchdog and tell the shutdown binary to
+ * repeatedly ping it */
+ r = watchdog_set_timeout(&arg_shutdown_watchdog);
+ watchdog_close(r < 0);
+
+ /* Tell the binary how often to ping, ignore failure */
+ if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
+ (void) strv_consume(&env_block, e);
+
+ if (arg_watchdog_device &&
+ asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
+ (void) strv_consume(&env_block, e);
+ } else
+ watchdog_close(true);
+
+ /* Avoid the creation of new processes forked by the
+ * kernel; at this point, we will not listen to the
+ * signals anyway */
+ if (detect_container() <= 0)
+ (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
+
+ execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
+ return -errno;
+}
+
+static void initialize_clock(void) {
+ int r;
+
+ if (clock_is_localtime(NULL) > 0) {
+ int min;
+
+ /*
+ * The very first call of settimeofday() also does a time warp in the kernel.
+ *
+ * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
+ * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
+ * the RTC alone if the registry tells that the RTC runs in UTC.
+ */
+ r = clock_set_timezone(&min);
+ if (r < 0)
+ log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
+ else
+ log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
+
+ } else if (!in_initrd()) {
+ /*
+ * Do a dummy very first call to seal the kernel's time warp magic.
+ *
+ * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
+ * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
+ * until we reach the real system.
+ *
+ * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
+ * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
+ * be treated as UTC that way.
+ */
+ (void) clock_reset_timewarp();
+ }
+
+ r = clock_apply_epoch();
+ if (r < 0)
+ log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
+ else if (r > 0)
+ log_info("System time before build time, advancing clock.");
+}
+
+static void initialize_coredump(bool skip_setup) {
+#if ENABLE_COREDUMP
+ if (getpid_cached() != 1)
+ return;
+
+ /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
+ * will process core dumps for system services by default. */
+ if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
+ log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
+
+ /* But at the same time, turn off the core_pattern logic by default, so that no
+ * coredumps are stored until the systemd-coredump tool is enabled via
+ * sysctl. However it can be changed via the kernel command line later so core
+ * dumps can still be generated during early startup and in initramfs. */
+ if (!skip_setup)
+ disable_coredumps();
+#endif
+}
+
+static void initialize_core_pattern(bool skip_setup) {
+ int r;
+
+ if (skip_setup || !arg_early_core_pattern)
+ return;
+
+ if (getpid_cached() != 1)
+ return;
+
+ r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern);
+}
+
+static void do_reexecute(
+ int argc,
+ char *argv[],
+ const struct rlimit *saved_rlimit_nofile,
+ const struct rlimit *saved_rlimit_memlock,
+ FDSet *fds,
+ const char *switch_root_dir,
+ const char *switch_root_init,
+ const char **ret_error_message) {
+
+ unsigned i, j, args_size;
+ const char **args;
+ int r;
+
+ assert(saved_rlimit_nofile);
+ assert(saved_rlimit_memlock);
+ assert(ret_error_message);
+
+ /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
+ * we do that */
+ watchdog_close(true);
+
+ /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
+ * the kernel default to its child processes */
+ if (saved_rlimit_nofile->rlim_cur != 0)
+ (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
+ if (saved_rlimit_memlock->rlim_cur != RLIM_INFINITY)
+ (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
+
+ if (switch_root_dir) {
+ /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
+ * SIGCHLD for them after deserializing. */
+ broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
+
+ /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
+ r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
+ if (r < 0)
+ log_error_errno(r, "Failed to switch root, trying to continue: %m");
+ }
+
+ args_size = MAX(6, argc+1);
+ args = newa(const char*, args_size);
+
+ if (!switch_root_init) {
+ char sfd[DECIMAL_STR_MAX(int) + 1];
+
+ /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
+ * the user didn't specify an explicit init to spawn. */
+
+ assert(arg_serialization);
+ assert(fds);
+
+ xsprintf(sfd, "%i", fileno(arg_serialization));
+
+ i = 0;
+ args[i++] = SYSTEMD_BINARY_PATH;
+ if (switch_root_dir)
+ args[i++] = "--switched-root";
+ args[i++] = arg_system ? "--system" : "--user";
+ args[i++] = "--deserialize";
+ args[i++] = sfd;
+ args[i++] = NULL;
+
+ assert(i <= args_size);
+
+ /*
+ * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
+ * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
+ * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
+ * before proceeding into the exec().
+ */
+ valgrind_summary_hack();
+
+ (void) execv(args[0], (char* const*) args);
+ log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
+ }
+
+ /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
+ * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
+ * doesn't matter.) */
+
+ arg_serialization = safe_fclose(arg_serialization);
+ fds = fdset_free(fds);
+
+ /* Reopen the console */
+ (void) make_console_stdio();
+
+ for (j = 1, i = 1; j < (unsigned) argc; j++)
+ args[i++] = argv[j];
+ args[i++] = NULL;
+ assert(i <= args_size);
+
+ /* Reenable any blocked signals, especially important if we switch from initial ramdisk to init=... */
+ (void) reset_all_signal_handlers();
+ (void) reset_signal_mask();
+ (void) rlimit_nofile_safe();
+
+ if (switch_root_init) {
+ args[0] = switch_root_init;
+ (void) execv(args[0], (char* const*) args);
+ log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
+ }
+
+ args[0] = "/sbin/init";
+ (void) execv(args[0], (char* const*) args);
+ r = -errno;
+
+ manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
+ ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
+ "Failed to execute /sbin/init");
+
+ if (r == -ENOENT) {
+ log_warning("No /sbin/init, trying fallback");
+
+ args[0] = "/bin/sh";
+ args[1] = NULL;
+ (void) execv(args[0], (char* const*) args);
+ log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
+ } else
+ log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
+
+ *ret_error_message = "Failed to execute fallback shell";
+}
+
+static int invoke_main_loop(
+ Manager *m,
+ bool *ret_reexecute,
+ int *ret_retval, /* Return parameters relevant for shutting down */
+ const char **ret_shutdown_verb, /* … */
+ FDSet **ret_fds, /* Return parameters for reexecuting */
+ char **ret_switch_root_dir, /* … */
+ char **ret_switch_root_init, /* … */
+ const char **ret_error_message) {
+
+ int r;
+
+ assert(m);
+ assert(ret_reexecute);
+ assert(ret_retval);
+ assert(ret_shutdown_verb);
+ assert(ret_fds);
+ assert(ret_switch_root_dir);
+ assert(ret_switch_root_init);
+ assert(ret_error_message);
+
+ for (;;) {
+ r = manager_loop(m);
+ if (r < 0) {
+ *ret_error_message = "Failed to run main loop";
+ return log_emergency_errno(r, "Failed to run main loop: %m");
+ }
+
+ switch ((ManagerObjective) r) {
+
+ case MANAGER_RELOAD: {
+ LogTarget saved_log_target;
+ int saved_log_level;
+
+ log_info("Reloading.");
+
+ /* First, save any overridden log level/target, then parse the configuration file, which might
+ * change the log level to new settings. */
+
+ saved_log_level = m->log_level_overridden ? log_get_max_level() : -1;
+ saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID;
+
+ r = parse_config_file();
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse config file, ignoring: %m");
+
+ set_manager_defaults(m);
+
+ if (saved_log_level >= 0)
+ manager_override_log_level(m, saved_log_level);
+ if (saved_log_target >= 0)
+ manager_override_log_target(m, saved_log_target);
+
+ r = manager_reload(m);
+ if (r < 0)
+ /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */
+ m->objective = MANAGER_OK;
+
+ break;
+ }
+
+ case MANAGER_REEXECUTE:
+
+ r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
+ if (r < 0) {
+ *ret_error_message = "Failed to prepare for reexecution";
+ return r;
+ }
+
+ log_notice("Reexecuting.");
+
+ *ret_reexecute = true;
+ *ret_retval = EXIT_SUCCESS;
+ *ret_shutdown_verb = NULL;
+ *ret_switch_root_dir = *ret_switch_root_init = NULL;
+
+ return 0;
+
+ case MANAGER_SWITCH_ROOT:
+ if (!m->switch_root_init) {
+ r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
+ if (r < 0) {
+ *ret_error_message = "Failed to prepare for reexecution";
+ return r;
+ }
+ } else
+ *ret_fds = NULL;
+
+ log_notice("Switching root.");
+
+ *ret_reexecute = true;
+ *ret_retval = EXIT_SUCCESS;
+ *ret_shutdown_verb = NULL;
+
+ /* Steal the switch root parameters */
+ *ret_switch_root_dir = m->switch_root;
+ *ret_switch_root_init = m->switch_root_init;
+ m->switch_root = m->switch_root_init = NULL;
+
+ return 0;
+
+ case MANAGER_EXIT:
+
+ if (MANAGER_IS_USER(m)) {
+ log_debug("Exit.");
+
+ *ret_reexecute = false;
+ *ret_retval = m->return_value;
+ *ret_shutdown_verb = NULL;
+ *ret_fds = NULL;
+ *ret_switch_root_dir = *ret_switch_root_init = NULL;
+
+ return 0;
+ }
+
+ _fallthrough_;
+ case MANAGER_REBOOT:
+ case MANAGER_POWEROFF:
+ case MANAGER_HALT:
+ case MANAGER_KEXEC: {
+ static const char * const table[_MANAGER_OBJECTIVE_MAX] = {
+ [MANAGER_EXIT] = "exit",
+ [MANAGER_REBOOT] = "reboot",
+ [MANAGER_POWEROFF] = "poweroff",
+ [MANAGER_HALT] = "halt",
+ [MANAGER_KEXEC] = "kexec",
+ };
+
+ log_notice("Shutting down.");
+
+ *ret_reexecute = false;
+ *ret_retval = m->return_value;
+ assert_se(*ret_shutdown_verb = table[m->objective]);
+ *ret_fds = NULL;
+ *ret_switch_root_dir = *ret_switch_root_init = NULL;
+
+ return 0;
+ }
+
+ default:
+ assert_not_reached("Unknown or unexpected manager objective.");
+ }
+ }
+}
+
+static void log_execution_mode(bool *ret_first_boot) {
+ assert(ret_first_boot);
+
+ if (arg_system) {
+ int v;
+
+ log_info("systemd " GIT_VERSION " running in %ssystem mode. (" SYSTEMD_FEATURES ")",
+ arg_action == ACTION_TEST ? "test " : "" );
+
+ v = detect_virtualization();
+ if (v > 0)
+ log_info("Detected virtualization %s.", virtualization_to_string(v));
+
+ log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
+
+ if (in_initrd()) {
+ *ret_first_boot = false;
+ log_info("Running in initial RAM disk.");
+ } else {
+ /* Let's check whether we are in first boot, i.e. whether /etc is still unpopulated. We use
+ * /etc/machine-id as flag file, for this: if it exists we assume /etc is populated, if it
+ * doesn't it's unpopulated. This allows container managers and installers to provision a
+ * couple of files already. If the container manager wants to provision the machine ID itself
+ * it should pass $container_uuid to PID 1. */
+
+ *ret_first_boot = access("/etc/machine-id", F_OK) < 0;
+ if (*ret_first_boot)
+ log_info("Running with unpopulated /etc.");
+ }
+ } else {
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *t;
+
+ t = uid_to_name(getuid());
+ log_debug("systemd " GIT_VERSION " running in %suser mode for user " UID_FMT "/%s. (" SYSTEMD_FEATURES ")",
+ arg_action == ACTION_TEST ? " test" : "", getuid(), strna(t));
+ }
+
+ *ret_first_boot = false;
+ }
+}
+
+static int initialize_runtime(
+ bool skip_setup,
+ struct rlimit *saved_rlimit_nofile,
+ struct rlimit *saved_rlimit_memlock,
+ const char **ret_error_message) {
+
+ int r;
+
+ assert(ret_error_message);
+
+ /* Sets up various runtime parameters. Many of these initializations are conditionalized:
+ *
+ * - Some only apply to --system instances
+ * - Some only apply to --user instances
+ * - Some only apply when we first start up, but not when we reexecute
+ */
+
+ if (arg_action != ACTION_RUN)
+ return 0;
+
+ if (arg_system) {
+ /* Make sure we leave a core dump without panicing the kernel. */
+ install_crash_handler();
+
+ if (!skip_setup) {
+ r = mount_cgroup_controllers();
+ if (r < 0) {
+ *ret_error_message = "Failed to mount cgroup hierarchies";
+ return r;
+ }
+
+ status_welcome();
+ hostname_setup();
+ machine_id_setup(NULL, arg_machine_id, NULL);
+ loopback_setup();
+ bump_unix_max_dgram_qlen();
+ bump_file_max_and_nr_open();
+ test_usr();
+ write_container_id();
+ }
+
+ if (arg_watchdog_device) {
+ r = watchdog_set_device(arg_watchdog_device);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
+ }
+
+ if (arg_runtime_watchdog > 0 && arg_runtime_watchdog != USEC_INFINITY)
+ watchdog_set_timeout(&arg_runtime_watchdog);
+ }
+
+ if (arg_timer_slack_nsec != NSEC_INFINITY)
+ if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
+ log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
+
+ if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
+ r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
+ if (r < 0) {
+ *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
+ return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
+ }
+
+ r = capability_bounding_set_drop(arg_capability_bounding_set, true);
+ if (r < 0) {
+ *ret_error_message = "Failed to drop capability bounding set";
+ return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
+ }
+ }
+
+ if (arg_system && arg_no_new_privs) {
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
+ *ret_error_message = "Failed to disable new privileges";
+ return log_emergency_errno(errno, "Failed to disable new privileges: %m");
+ }
+ }
+
+ if (arg_syscall_archs) {
+ r = enforce_syscall_archs(arg_syscall_archs);
+ if (r < 0) {
+ *ret_error_message = "Failed to set syscall architectures";
+ return r;
+ }
+ }
+
+ if (!arg_system)
+ /* Become reaper of our children */
+ if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
+ log_warning_errno(errno, "Failed to make us a subreaper: %m");
+
+ /* Bump up RLIMIT_NOFILE for systemd itself */
+ (void) bump_rlimit_nofile(saved_rlimit_nofile);
+ (void) bump_rlimit_memlock(saved_rlimit_memlock);
+
+ return 0;
+}
+
+static int do_queue_default_job(
+ Manager *m,
+ const char **ret_error_message) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Job *default_unit_job;
+ Unit *target = NULL;
+ int r;
+
+ log_debug("Activating default unit: %s", arg_default_unit);
+
+ r = manager_load_startable_unit_or_warn(m, arg_default_unit, NULL, &target);
+ if (r < 0) {
+ log_info("Falling back to rescue target: " SPECIAL_RESCUE_TARGET);
+
+ r = manager_load_startable_unit_or_warn(m, SPECIAL_RESCUE_TARGET, NULL, &target);
+ if (r < 0) {
+ *ret_error_message = r == -ERFKILL ? "Rescue target masked"
+ : "Failed to load rescue target";
+ return r;
+ }
+ }
+
+ assert(target->load_state == UNIT_LOADED);
+
+ r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, &error, &default_unit_job);
+ if (r == -EPERM) {
+ log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
+
+ sd_bus_error_free(&error);
+
+ r = manager_add_job(m, JOB_START, target, JOB_REPLACE, &error, &default_unit_job);
+ if (r < 0) {
+ *ret_error_message = "Failed to start default target";
+ return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
+ }
+
+ } else if (r < 0) {
+ *ret_error_message = "Failed to isolate default target";
+ return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
+ }
+
+ m->default_unit_job_id = default_unit_job->id;
+
+ return 0;
+}
+
+static void free_arguments(void) {
+
+ /* Frees all arg_* variables, with the exception of arg_serialization */
+ rlimit_free_all(arg_default_rlimit);
+
+ arg_default_unit = mfree(arg_default_unit);
+ arg_confirm_spawn = mfree(arg_confirm_spawn);
+ arg_default_environment = strv_free(arg_default_environment);
+ arg_syscall_archs = set_free(arg_syscall_archs);
+}
+
+static int load_configuration(int argc, char **argv, const char **ret_error_message) {
+ int r;
+
+ assert(ret_error_message);
+
+ arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
+
+ r = parse_config_file();
+ if (r < 0) {
+ *ret_error_message = "Failed to parse config file";
+ return r;
+ }
+
+ if (arg_system) {
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+ }
+
+ /* Note that this also parses bits from the kernel command line, including "debug". */
+ log_parse_environment();
+
+ r = parse_argv(argc, argv);
+ if (r < 0) {
+ *ret_error_message = "Failed to parse commandline arguments";
+ return r;
+ }
+
+ /* Initialize default unit */
+ if (!arg_default_unit) {
+ arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET);
+ if (!arg_default_unit) {
+ *ret_error_message = "Failed to set default unit";
+ return log_oom();
+ }
+ }
+
+ /* Initialize the show status setting if it hasn't been set explicitly yet */
+ if (arg_show_status == _SHOW_STATUS_INVALID)
+ arg_show_status = SHOW_STATUS_YES;
+
+ return 0;
+}
+
+static int safety_checks(void) {
+
+ if (getpid_cached() == 1 &&
+ arg_action != ACTION_RUN)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Unsupported execution mode while PID 1.");
+
+ if (getpid_cached() == 1 &&
+ !arg_system)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Can't run --user mode as PID 1.");
+
+ if (arg_action == ACTION_RUN &&
+ arg_system &&
+ getpid_cached() != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Can't run system mode unless PID 1.");
+
+ if (arg_action == ACTION_TEST &&
+ geteuid() == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Don't run test mode as root.");
+
+ if (!arg_system &&
+ arg_action == ACTION_RUN &&
+ sd_booted() <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Trying to run as user instance, but the system has not been booted with systemd.");
+
+ if (!arg_system &&
+ arg_action == ACTION_RUN &&
+ !getenv("XDG_RUNTIME_DIR"))
+ return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
+ "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
+
+ if (arg_system &&
+ arg_action == ACTION_RUN &&
+ running_in_chroot() > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Cannot be run in a chroot() environment.");
+
+ return 0;
+}
+
+static int initialize_security(
+ bool *loaded_policy,
+ dual_timestamp *security_start_timestamp,
+ dual_timestamp *security_finish_timestamp,
+ const char **ret_error_message) {
+
+ int r;
+
+ assert(loaded_policy);
+ assert(security_start_timestamp);
+ assert(security_finish_timestamp);
+ assert(ret_error_message);
+
+ dual_timestamp_get(security_start_timestamp);
+
+ r = mac_selinux_setup(loaded_policy);
+ if (r < 0) {
+ *ret_error_message = "Failed to load SELinux policy";
+ return r;
+ }
+
+ r = mac_smack_setup(loaded_policy);
+ if (r < 0) {
+ *ret_error_message = "Failed to load SMACK policy";
+ return r;
+ }
+
+ r = ima_setup();
+ if (r < 0) {
+ *ret_error_message = "Failed to load IMA policy";
+ return r;
+ }
+
+ dual_timestamp_get(security_finish_timestamp);
+ return 0;
+}
+
+static void test_summary(Manager *m) {
+ assert(m);
+
+ printf("-> By units:\n");
+ manager_dump_units(m, stdout, "\t");
+
+ printf("-> By jobs:\n");
+ manager_dump_jobs(m, stdout, "\t");
+}
+
+static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
+ int r;
+
+ assert(ret_fds);
+ assert(ret_error_message);
+
+ r = fdset_new_fill(ret_fds);
+ if (r < 0) {
+ *ret_error_message = "Failed to allocate fd set";
+ return log_emergency_errno(r, "Failed to allocate fd set: %m");
+ }
+
+ fdset_cloexec(*ret_fds, true);
+
+ if (arg_serialization)
+ assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
+
+ return 0;
+}
+
+static void setup_console_terminal(bool skip_setup) {
+
+ if (!arg_system)
+ return;
+
+ /* Become a session leader if we aren't one yet. */
+ (void) setsid();
+
+ /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
+ * tty. */
+ (void) release_terminal();
+
+ /* Reset the console, but only if this is really init and we are freshly booted */
+ if (getpid_cached() == 1 && !skip_setup)
+ (void) console_setup();
+}
+
+static bool early_skip_setup_check(int argc, char *argv[]) {
+ bool found_deserialize = false;
+ int i;
+
+ /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
+ * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
+ * anyway, even if in that case we also do deserialization. */
+
+ for (i = 1; i < argc; i++) {
+ if (streq(argv[i], "--switched-root"))
+ return false; /* If we switched root, don't skip the setup. */
+ else if (streq(argv[i], "--deserialize"))
+ found_deserialize = true;
+ }
+
+ return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
+}
+
+int main(int argc, char *argv[]) {
+
+ dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
+ security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
+ struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0),
+ saved_rlimit_memlock = RLIMIT_MAKE_CONST(RLIM_INFINITY); /* The original rlimits we passed
+ * in. Note we use different values
+ * for the two that indicate whether
+ * these fields are initialized! */
+ bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
+ char *switch_root_dir = NULL, *switch_root_init = NULL;
+ usec_t before_startup, after_startup;
+ static char systemd[] = "systemd";
+ char timespan[FORMAT_TIMESPAN_MAX];
+ const char *shutdown_verb = NULL, *error_message = NULL;
+ int r, retval = EXIT_FAILURE;
+ Manager *m = NULL;
+ FDSet *fds = NULL;
+
+ /* SysV compatibility: redirect init → telinit */
+ redirect_telinit(argc, argv);
+
+ /* Take timestamps early on */
+ dual_timestamp_from_monotonic(&kernel_timestamp, 0);
+ dual_timestamp_get(&userspace_timestamp);
+
+ /* Figure out whether we need to do initialize the system, or if we already did that because we are
+ * reexecuting */
+ skip_setup = early_skip_setup_check(argc, argv);
+
+ /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
+ * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
+ program_invocation_short_name = systemd;
+ (void) prctl(PR_SET_NAME, systemd);
+
+ /* Save the original command line */
+ saved_argv = argv;
+ saved_argc = argc;
+
+ /* Make sure that if the user says "syslog" we actually log to the journal. */
+ log_set_upgrade_syslog_to_journal(true);
+
+ if (getpid_cached() == 1) {
+ /* When we run as PID 1 force system mode */
+ arg_system = true;
+
+ /* Disable the umask logic */
+ umask(0);
+
+ /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be
+ * activated yet (even though the log socket for it exists). */
+ log_set_prohibit_ipc(true);
+
+ /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
+ * important so that we never end up logging to any foreign stderr, for example if we have to log in a
+ * child process right before execve()'ing the actual binary, at a point in time where socket
+ * activation stderr/stdout area already set up. */
+ log_set_always_reopen_console(true);
+
+ if (detect_container() <= 0) {
+
+ /* Running outside of a container as PID 1 */
+ log_set_target(LOG_TARGET_KMSG);
+ log_open();
+
+ if (in_initrd())
+ initrd_timestamp = userspace_timestamp;
+
+ if (!skip_setup) {
+ r = mount_setup_early();
+ if (r < 0) {
+ error_message = "Failed to mount early API filesystems";
+ goto finish;
+ }
+
+ r = initialize_security(
+ &loaded_policy,
+ &security_start_timestamp,
+ &security_finish_timestamp,
+ &error_message);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (mac_selinux_init() < 0) {
+ error_message = "Failed to initialize SELinux policy";
+ goto finish;
+ }
+
+ if (!skip_setup)
+ initialize_clock();
+
+ /* Set the default for later on, but don't actually open the logs like this for now. Note that
+ * if we are transitioning from the initrd there might still be journal fd open, and we
+ * shouldn't attempt opening that before we parsed /proc/cmdline which might redirect output
+ * elsewhere. */
+ log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
+
+ } else {
+ /* Running inside a container, as PID 1 */
+ log_set_target(LOG_TARGET_CONSOLE);
+ log_open();
+
+ /* For later on, see above... */
+ log_set_target(LOG_TARGET_JOURNAL);
+
+ /* clear the kernel timestamp,
+ * because we are in a container */
+ kernel_timestamp = DUAL_TIMESTAMP_NULL;
+ }
+
+ initialize_coredump(skip_setup);
+
+ r = fixup_environment();
+ if (r < 0) {
+ log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
+ error_message = "Failed to fix up PID1 environment";
+ goto finish;
+ }
+
+ } else {
+ /* Running as user instance */
+ arg_system = false;
+ log_set_target(LOG_TARGET_AUTO);
+ log_open();
+
+ /* clear the kernel timestamp,
+ * because we are not PID 1 */
+ kernel_timestamp = DUAL_TIMESTAMP_NULL;
+ }
+
+ if (arg_system) {
+ /* Try to figure out if we can use colors with the console. No need to do that for user instances since
+ * they never log into the console. */
+ log_show_color(colors_enabled());
+
+ r = make_null_stdio();
+ if (r < 0)
+ log_warning_errno(r, "Failed to redirect standard streams to /dev/null, ignoring: %m");
+ }
+
+ /* Mount /proc, /sys and friends, so that /proc/cmdline and
+ * /proc/$PID/fd is available. */
+ if (getpid_cached() == 1) {
+
+ /* Load the kernel modules early. */
+ if (!skip_setup)
+ kmod_setup();
+
+ r = mount_setup(loaded_policy);
+ if (r < 0) {
+ error_message = "Failed to mount API filesystems";
+ goto finish;
+ }
+ }
+
+ /* Reset all signal handlers. */
+ (void) reset_all_signal_handlers();
+ (void) ignore_signals(SIGNALS_IGNORE, -1);
+
+ r = load_configuration(argc, argv, &error_message);
+ if (r < 0)
+ goto finish;
+
+ r = safety_checks();
+ if (r < 0)
+ goto finish;
+
+ if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES))
+ (void) pager_open(arg_pager_flags);
+
+ if (arg_action != ACTION_RUN)
+ skip_setup = true;
+
+ if (arg_action == ACTION_HELP) {
+ retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+ goto finish;
+ } else if (arg_action == ACTION_VERSION) {
+ retval = version();
+ goto finish;
+ } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
+ unit_dump_config_items(stdout);
+ retval = EXIT_SUCCESS;
+ goto finish;
+ } else if (arg_action == ACTION_DUMP_BUS_PROPERTIES) {
+ dump_bus_properties(stdout);
+ retval = EXIT_SUCCESS;
+ goto finish;
+ }
+
+ assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
+
+ /* Move out of the way, so that we won't block unmounts */
+ assert_se(chdir("/") == 0);
+
+ if (arg_action == ACTION_RUN) {
+
+ /* A core pattern might have been specified via the cmdline. */
+ initialize_core_pattern(skip_setup);
+
+ /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
+ log_close();
+
+ /* Remember open file descriptors for later deserialization */
+ r = collect_fds(&fds, &error_message);
+ if (r < 0)
+ goto finish;
+
+ /* Give up any control of the console, but make sure its initialized. */
+ setup_console_terminal(skip_setup);
+
+ /* Open the logging devices, if possible and necessary */
+ log_open();
+ }
+
+ log_execution_mode(&first_boot);
+
+ r = initialize_runtime(skip_setup,
+ &saved_rlimit_nofile,
+ &saved_rlimit_memlock,
+ &error_message);
+ if (r < 0)
+ goto finish;
+
+ r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
+ arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
+ &m);
+ if (r < 0) {
+ log_emergency_errno(r, "Failed to allocate manager object: %m");
+ error_message = "Failed to allocate manager object";
+ goto finish;
+ }
+
+ m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
+ m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
+ m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
+ m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
+ m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
+
+ set_manager_defaults(m);
+ set_manager_settings(m);
+ manager_set_first_boot(m, first_boot);
+
+ /* Remember whether we should queue the default job */
+ queue_default_job = !arg_serialization || arg_switched_root;
+
+ before_startup = now(CLOCK_MONOTONIC);
+
+ r = manager_startup(m, arg_serialization, fds);
+ if (r < 0) {
+ error_message = "Failed to start up manager";
+ goto finish;
+ }
+
+ /* This will close all file descriptors that were opened, but not claimed by any unit. */
+ fds = fdset_free(fds);
+ arg_serialization = safe_fclose(arg_serialization);
+
+ if (queue_default_job) {
+ r = do_queue_default_job(m, &error_message);
+ if (r < 0)
+ goto finish;
+ }
+
+ after_startup = now(CLOCK_MONOTONIC);
+
+ log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
+ "Loaded units and determined initial transaction in %s.",
+ format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
+
+ if (arg_action == ACTION_TEST) {
+ test_summary(m);
+ retval = EXIT_SUCCESS;
+ goto finish;
+ }
+
+ (void) invoke_main_loop(m,
+ &reexecute,
+ &retval,
+ &shutdown_verb,
+ &fds,
+ &switch_root_dir,
+ &switch_root_init,
+ &error_message);
+
+finish:
+ pager_close();
+
+ if (m) {
+ arg_shutdown_watchdog = m->shutdown_watchdog;
+ m = manager_free(m);
+ }
+
+ free_arguments();
+ mac_selinux_finish();
+
+ if (reexecute)
+ do_reexecute(argc, argv,
+ &saved_rlimit_nofile,
+ &saved_rlimit_memlock,
+ fds,
+ switch_root_dir,
+ switch_root_init,
+ &error_message); /* This only returns if reexecution failed */
+
+ arg_serialization = safe_fclose(arg_serialization);
+ fds = fdset_free(fds);
+
+#if HAVE_VALGRIND_VALGRIND_H
+ /* If we are PID 1 and running under valgrind, then let's exit
+ * here explicitly. valgrind will only generate nice output on
+ * exit(), not on exec(), hence let's do the former not the
+ * latter here. */
+ if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
+ /* Cleanup watchdog_device strings for valgrind. We need them
+ * in become_shutdown() so normally we cannot free them yet. */
+ watchdog_free_device();
+ arg_watchdog_device = mfree(arg_watchdog_device);
+ return retval;
+ }
+#endif
+
+#if HAS_FEATURE_ADDRESS_SANITIZER
+ __lsan_do_leak_check();
+#endif
+
+ if (shutdown_verb) {
+ r = become_shutdown(shutdown_verb, retval);
+ log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
+ error_message = "Failed to execute shutdown binary";
+ }
+
+ watchdog_free_device();
+ arg_watchdog_device = mfree(arg_watchdog_device);
+
+ if (getpid_cached() == 1) {
+ if (error_message)
+ manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
+ ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
+ "%s.", error_message);
+ freeze_or_exit_or_reboot();
+ }
+
+ return retval;
+}
diff --git a/src/core/manager.c b/src/core/manager.c
new file mode 100644
index 0000000..6086531
--- /dev/null
+++ b/src/core/manager.c
@@ -0,0 +1,4678 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/kd.h>
+#include <signal.h>
+#include <stdio_ext.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/inotify.h>
+#include <sys/ioctl.h>
+#include <sys/reboot.h>
+#include <sys/timerfd.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#if HAVE_AUDIT
+#include <libaudit.h>
+#endif
+
+#include "sd-daemon.h"
+#include "sd-messages.h"
+#include "sd-path.h"
+
+#include "all-units.h"
+#include "alloc-util.h"
+#include "audit-fd.h"
+#include "boot-timestamps.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-kernel.h"
+#include "bus-util.h"
+#include "clean-ipc.h"
+#include "clock-util.h"
+#include "dbus-job.h"
+#include "dbus-manager.h"
+#include "dbus-unit.h"
+#include "dbus.h"
+#include "dirent-util.h"
+#include "env-util.h"
+#include "escape.h"
+#include "exec-util.h"
+#include "execute.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "io-util.h"
+#include "label.h"
+#include "locale-setup.h"
+#include "log.h"
+#include "macro.h"
+#include "manager.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "ratelimit.h"
+#include "rlimit-util.h"
+#include "rm-rf.h"
+#include "serialize.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "transaction.h"
+#include "umask-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+#include "watchdog.h"
+
+#define NOTIFY_RCVBUF_SIZE (8*1024*1024)
+#define CGROUPS_AGENT_RCVBUF_SIZE (8*1024*1024)
+
+/* Initial delay and the interval for printing status messages about running jobs */
+#define JOBS_IN_PROGRESS_WAIT_USEC (5*USEC_PER_SEC)
+#define JOBS_IN_PROGRESS_PERIOD_USEC (USEC_PER_SEC / 3)
+#define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3
+
+/* If there are more than 1K bus messages queue across our API and direct busses, then let's not add more on top until
+ * the queue gets more empty. */
+#define MANAGER_BUS_BUSY_THRESHOLD 1024LU
+
+/* How many units and jobs to process of the bus queue before returning to the event loop. */
+#define MANAGER_BUS_MESSAGE_BUDGET 100U
+
+static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata);
+static int manager_dispatch_run_queue(sd_event_source *source, void *userdata);
+static int manager_dispatch_sigchld(sd_event_source *source, void *userdata);
+static int manager_dispatch_timezone_change(sd_event_source *source, const struct inotify_event *event, void *userdata);
+static int manager_run_environment_generators(Manager *m);
+static int manager_run_generators(Manager *m);
+
+static void manager_watch_jobs_in_progress(Manager *m) {
+ usec_t next;
+ int r;
+
+ assert(m);
+
+ /* We do not want to show the cylon animation if the user
+ * needs to confirm service executions otherwise confirmation
+ * messages will be screwed by the cylon animation. */
+ if (!manager_is_confirm_spawn_disabled(m))
+ return;
+
+ if (m->jobs_in_progress_event_source)
+ return;
+
+ next = now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_WAIT_USEC;
+ r = sd_event_add_time(
+ m->event,
+ &m->jobs_in_progress_event_source,
+ CLOCK_MONOTONIC,
+ next, 0,
+ manager_dispatch_jobs_in_progress, m);
+ if (r < 0)
+ return;
+
+ (void) sd_event_source_set_description(m->jobs_in_progress_event_source, "manager-jobs-in-progress");
+}
+
+#define CYLON_BUFFER_EXTRA (2*STRLEN(ANSI_RED) + STRLEN(ANSI_HIGHLIGHT_RED) + 2*STRLEN(ANSI_NORMAL))
+
+static void draw_cylon(char buffer[], size_t buflen, unsigned width, unsigned pos) {
+ char *p = buffer;
+
+ assert(buflen >= CYLON_BUFFER_EXTRA + width + 1);
+ assert(pos <= width+1); /* 0 or width+1 mean that the center light is behind the corner */
+
+ if (pos > 1) {
+ if (pos > 2)
+ p = mempset(p, ' ', pos-2);
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_RED);
+ *p++ = '*';
+ }
+
+ if (pos > 0 && pos <= width) {
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_HIGHLIGHT_RED);
+ *p++ = '*';
+ }
+
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_NORMAL);
+
+ if (pos < width) {
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_RED);
+ *p++ = '*';
+ if (pos < width-1)
+ p = mempset(p, ' ', width-1-pos);
+ if (log_get_show_color())
+ strcpy(p, ANSI_NORMAL);
+ }
+}
+
+void manager_flip_auto_status(Manager *m, bool enable) {
+ assert(m);
+
+ if (enable) {
+ if (m->show_status == SHOW_STATUS_AUTO)
+ manager_set_show_status(m, SHOW_STATUS_TEMPORARY);
+ } else {
+ if (m->show_status == SHOW_STATUS_TEMPORARY)
+ manager_set_show_status(m, SHOW_STATUS_AUTO);
+ }
+}
+
+static void manager_print_jobs_in_progress(Manager *m) {
+ _cleanup_free_ char *job_of_n = NULL;
+ Iterator i;
+ Job *j;
+ unsigned counter = 0, print_nr;
+ char cylon[6 + CYLON_BUFFER_EXTRA + 1];
+ unsigned cylon_pos;
+ char time[FORMAT_TIMESPAN_MAX], limit[FORMAT_TIMESPAN_MAX] = "no limit";
+ uint64_t x;
+
+ assert(m);
+ assert(m->n_running_jobs > 0);
+
+ manager_flip_auto_status(m, true);
+
+ print_nr = (m->jobs_in_progress_iteration / JOBS_IN_PROGRESS_PERIOD_DIVISOR) % m->n_running_jobs;
+
+ HASHMAP_FOREACH(j, m->jobs, i)
+ if (j->state == JOB_RUNNING && counter++ == print_nr)
+ break;
+
+ /* m->n_running_jobs must be consistent with the contents of m->jobs,
+ * so the above loop must have succeeded in finding j. */
+ assert(counter == print_nr + 1);
+ assert(j);
+
+ cylon_pos = m->jobs_in_progress_iteration % 14;
+ if (cylon_pos >= 8)
+ cylon_pos = 14 - cylon_pos;
+ draw_cylon(cylon, sizeof(cylon), 6, cylon_pos);
+
+ m->jobs_in_progress_iteration++;
+
+ if (m->n_running_jobs > 1) {
+ if (asprintf(&job_of_n, "(%u of %u) ", counter, m->n_running_jobs) < 0)
+ job_of_n = NULL;
+ }
+
+ format_timespan(time, sizeof(time), now(CLOCK_MONOTONIC) - j->begin_usec, 1*USEC_PER_SEC);
+ if (job_get_timeout(j, &x) > 0)
+ format_timespan(limit, sizeof(limit), x - j->begin_usec, 1*USEC_PER_SEC);
+
+ manager_status_printf(m, STATUS_TYPE_EPHEMERAL, cylon,
+ "%sA %s job is running for %s (%s / %s)",
+ strempty(job_of_n),
+ job_type_to_string(j->type),
+ unit_description(j->unit),
+ time, limit);
+}
+
+static int have_ask_password(void) {
+ _cleanup_closedir_ DIR *dir;
+ struct dirent *de;
+
+ dir = opendir("/run/systemd/ask-password");
+ if (!dir) {
+ if (errno == ENOENT)
+ return false;
+ else
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, dir, return -errno) {
+ if (startswith(de->d_name, "ask."))
+ return true;
+ }
+ return false;
+}
+
+static int manager_dispatch_ask_password_fd(sd_event_source *source,
+ int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+
+ (void) flush_fd(fd);
+
+ m->have_ask_password = have_ask_password();
+ if (m->have_ask_password < 0)
+ /* Log error but continue. Negative have_ask_password
+ * is treated as unknown status. */
+ log_error_errno(m->have_ask_password, "Failed to list /run/systemd/ask-password: %m");
+
+ return 0;
+}
+
+static void manager_close_ask_password(Manager *m) {
+ assert(m);
+
+ m->ask_password_event_source = sd_event_source_unref(m->ask_password_event_source);
+ m->ask_password_inotify_fd = safe_close(m->ask_password_inotify_fd);
+ m->have_ask_password = -EINVAL;
+}
+
+static int manager_check_ask_password(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (!m->ask_password_event_source) {
+ assert(m->ask_password_inotify_fd < 0);
+
+ mkdir_p_label("/run/systemd/ask-password", 0755);
+
+ m->ask_password_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (m->ask_password_inotify_fd < 0)
+ return log_error_errno(errno, "inotify_init1() failed: %m");
+
+ if (inotify_add_watch(m->ask_password_inotify_fd, "/run/systemd/ask-password", IN_CREATE|IN_DELETE|IN_MOVE) < 0) {
+ log_error_errno(errno, "Failed to add watch on /run/systemd/ask-password: %m");
+ manager_close_ask_password(m);
+ return -errno;
+ }
+
+ r = sd_event_add_io(m->event, &m->ask_password_event_source,
+ m->ask_password_inotify_fd, EPOLLIN,
+ manager_dispatch_ask_password_fd, m);
+ if (r < 0) {
+ log_error_errno(errno, "Failed to add event source for /run/systemd/ask-password: %m");
+ manager_close_ask_password(m);
+ return -errno;
+ }
+
+ (void) sd_event_source_set_description(m->ask_password_event_source, "manager-ask-password");
+
+ /* Queries might have been added meanwhile... */
+ manager_dispatch_ask_password_fd(m->ask_password_event_source,
+ m->ask_password_inotify_fd, EPOLLIN, m);
+ }
+
+ return m->have_ask_password;
+}
+
+static int manager_watch_idle_pipe(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->idle_pipe_event_source)
+ return 0;
+
+ if (m->idle_pipe[2] < 0)
+ return 0;
+
+ r = sd_event_add_io(m->event, &m->idle_pipe_event_source, m->idle_pipe[2], EPOLLIN, manager_dispatch_idle_pipe_fd, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch idle pipe: %m");
+
+ (void) sd_event_source_set_description(m->idle_pipe_event_source, "manager-idle-pipe");
+
+ return 0;
+}
+
+static void manager_close_idle_pipe(Manager *m) {
+ assert(m);
+
+ m->idle_pipe_event_source = sd_event_source_unref(m->idle_pipe_event_source);
+
+ safe_close_pair(m->idle_pipe);
+ safe_close_pair(m->idle_pipe + 2);
+}
+
+static int manager_setup_time_change(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return 0;
+
+ m->time_change_event_source = sd_event_source_unref(m->time_change_event_source);
+ m->time_change_fd = safe_close(m->time_change_fd);
+
+ m->time_change_fd = time_change_fd();
+ if (m->time_change_fd < 0)
+ return log_error_errno(m->time_change_fd, "Failed to create timer change timer fd: %m");
+
+ r = sd_event_add_io(m->event, &m->time_change_event_source, m->time_change_fd, EPOLLIN, manager_dispatch_time_change_fd, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create time change event source: %m");
+
+ /* Schedule this slightly earlier than the .timer event sources */
+ r = sd_event_source_set_priority(m->time_change_event_source, SD_EVENT_PRIORITY_NORMAL-1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of time change event sources: %m");
+
+ (void) sd_event_source_set_description(m->time_change_event_source, "manager-time-change");
+
+ log_debug("Set up TFD_TIMER_CANCEL_ON_SET timerfd.");
+
+ return 0;
+}
+
+static int manager_read_timezone_stat(Manager *m) {
+ struct stat st;
+ bool changed;
+
+ assert(m);
+
+ /* Read the current stat() data of /etc/localtime so that we detect changes */
+ if (lstat("/etc/localtime", &st) < 0) {
+ log_debug_errno(errno, "Failed to stat /etc/localtime, ignoring: %m");
+ changed = m->etc_localtime_accessible;
+ m->etc_localtime_accessible = false;
+ } else {
+ usec_t k;
+
+ k = timespec_load(&st.st_mtim);
+ changed = !m->etc_localtime_accessible || k != m->etc_localtime_mtime;
+
+ m->etc_localtime_mtime = k;
+ m->etc_localtime_accessible = true;
+ }
+
+ return changed;
+}
+
+static int manager_setup_timezone_change(Manager *m) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *new_event = NULL;
+ int r;
+
+ assert(m);
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return 0;
+
+ /* We watch /etc/localtime for three events: change of the link count (which might mean removal from /etc even
+ * though another link might be kept), renames, and file close operations after writing. Note we don't bother
+ * with IN_DELETE_SELF, as that would just report when the inode is removed entirely, i.e. after the link count
+ * went to zero and all fds to it are closed.
+ *
+ * Note that we never follow symlinks here. This is a simplification, but should cover almost all cases
+ * correctly.
+ *
+ * Note that we create the new event source first here, before releasing the old one. This should optimize
+ * behaviour as this way sd-event can reuse the old watch in case the inode didn't change. */
+
+ r = sd_event_add_inotify(m->event, &new_event, "/etc/localtime",
+ IN_ATTRIB|IN_MOVE_SELF|IN_CLOSE_WRITE|IN_DONT_FOLLOW, manager_dispatch_timezone_change, m);
+ if (r == -ENOENT) {
+ /* If the file doesn't exist yet, subscribe to /etc instead, and wait until it is created either by
+ * O_CREATE or by rename() */
+
+ log_debug_errno(r, "/etc/localtime doesn't exist yet, watching /etc instead.");
+ r = sd_event_add_inotify(m->event, &new_event, "/etc",
+ IN_CREATE|IN_MOVED_TO|IN_ONLYDIR, manager_dispatch_timezone_change, m);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to create timezone change event source: %m");
+
+ /* Schedule this slightly earlier than the .timer event sources */
+ r = sd_event_source_set_priority(new_event, SD_EVENT_PRIORITY_NORMAL-1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of timezone change event sources: %m");
+
+ sd_event_source_unref(m->timezone_change_event_source);
+ m->timezone_change_event_source = TAKE_PTR(new_event);
+
+ return 0;
+}
+
+static int enable_special_signals(Manager *m) {
+ _cleanup_close_ int fd = -1;
+
+ assert(m);
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return 0;
+
+ /* Enable that we get SIGINT on control-alt-del. In containers
+ * this will fail with EPERM (older) or EINVAL (newer), so
+ * ignore that. */
+ if (reboot(RB_DISABLE_CAD) < 0 && !IN_SET(errno, EPERM, EINVAL))
+ log_warning_errno(errno, "Failed to enable ctrl-alt-del handling: %m");
+
+ fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0) {
+ /* Support systems without virtual console */
+ if (fd != -ENOENT)
+ log_warning_errno(errno, "Failed to open /dev/tty0: %m");
+ } else {
+ /* Enable that we get SIGWINCH on kbrequest */
+ if (ioctl(fd, KDSIGACCEPT, SIGWINCH) < 0)
+ log_warning_errno(errno, "Failed to enable kbrequest handling: %m");
+ }
+
+ return 0;
+}
+
+#define RTSIG_IF_AVAILABLE(signum) (signum <= SIGRTMAX ? signum : -1)
+
+static int manager_setup_signals(Manager *m) {
+ struct sigaction sa = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_NOCLDSTOP|SA_RESTART,
+ };
+ sigset_t mask;
+ int r;
+
+ assert(m);
+
+ assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
+
+ /* We make liberal use of realtime signals here. On
+ * Linux/glibc we have 30 of them (with the exception of Linux
+ * on hppa, see below), between SIGRTMIN+0 ... SIGRTMIN+30
+ * (aka SIGRTMAX). */
+
+ assert_se(sigemptyset(&mask) == 0);
+ sigset_add_many(&mask,
+ SIGCHLD, /* Child died */
+ SIGTERM, /* Reexecute daemon */
+ SIGHUP, /* Reload configuration */
+ SIGUSR1, /* systemd/upstart: reconnect to D-Bus */
+ SIGUSR2, /* systemd: dump status */
+ SIGINT, /* Kernel sends us this on control-alt-del */
+ SIGWINCH, /* Kernel sends us this on kbrequest (alt-arrowup) */
+ SIGPWR, /* Some kernel drivers and upsd send us this on power failure */
+
+ SIGRTMIN+0, /* systemd: start default.target */
+ SIGRTMIN+1, /* systemd: isolate rescue.target */
+ SIGRTMIN+2, /* systemd: isolate emergency.target */
+ SIGRTMIN+3, /* systemd: start halt.target */
+ SIGRTMIN+4, /* systemd: start poweroff.target */
+ SIGRTMIN+5, /* systemd: start reboot.target */
+ SIGRTMIN+6, /* systemd: start kexec.target */
+
+ /* ... space for more special targets ... */
+
+ SIGRTMIN+13, /* systemd: Immediate halt */
+ SIGRTMIN+14, /* systemd: Immediate poweroff */
+ SIGRTMIN+15, /* systemd: Immediate reboot */
+ SIGRTMIN+16, /* systemd: Immediate kexec */
+
+ /* ... space for more immediate system state changes ... */
+
+ SIGRTMIN+20, /* systemd: enable status messages */
+ SIGRTMIN+21, /* systemd: disable status messages */
+ SIGRTMIN+22, /* systemd: set log level to LOG_DEBUG */
+ SIGRTMIN+23, /* systemd: set log level to LOG_INFO */
+ SIGRTMIN+24, /* systemd: Immediate exit (--user only) */
+
+ /* .. one free signal here ... */
+
+ /* Apparently Linux on hppa had fewer RT signals until v3.18,
+ * SIGRTMAX was SIGRTMIN+25, and then SIGRTMIN was lowered,
+ * see commit v3.17-7614-g1f25df2eff.
+ *
+ * We cannot unconditionally make use of those signals here,
+ * so let's use a runtime check. Since these commands are
+ * accessible by different means and only really a safety
+ * net, the missing functionality on hppa shouldn't matter.
+ */
+
+ RTSIG_IF_AVAILABLE(SIGRTMIN+26), /* systemd: set log target to journal-or-kmsg */
+ RTSIG_IF_AVAILABLE(SIGRTMIN+27), /* systemd: set log target to console */
+ RTSIG_IF_AVAILABLE(SIGRTMIN+28), /* systemd: set log target to kmsg */
+ RTSIG_IF_AVAILABLE(SIGRTMIN+29), /* systemd: set log target to syslog-or-kmsg (obsolete) */
+
+ /* ... one free signal here SIGRTMIN+30 ... */
+ -1);
+ assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
+
+ m->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
+ if (m->signal_fd < 0)
+ return -errno;
+
+ r = sd_event_add_io(m->event, &m->signal_event_source, m->signal_fd, EPOLLIN, manager_dispatch_signal_fd, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->signal_event_source, "manager-signal");
+
+ /* Process signals a bit earlier than the rest of things, but later than notify_fd processing, so that the
+ * notify processing can still figure out to which process/service a message belongs, before we reap the
+ * process. Also, process this before handling cgroup notifications, so that we always collect child exit
+ * status information before detecting that there's no process in a cgroup. */
+ r = sd_event_source_set_priority(m->signal_event_source, SD_EVENT_PRIORITY_NORMAL-6);
+ if (r < 0)
+ return r;
+
+ if (MANAGER_IS_SYSTEM(m))
+ return enable_special_signals(m);
+
+ return 0;
+}
+
+static char** sanitize_environment(char **l) {
+
+ /* Let's remove some environment variables that we need ourselves to communicate with our clients */
+ strv_env_unset_many(
+ l,
+ "EXIT_CODE",
+ "EXIT_STATUS",
+ "INVOCATION_ID",
+ "JOURNAL_STREAM",
+ "LISTEN_FDNAMES",
+ "LISTEN_FDS",
+ "LISTEN_PID",
+ "MAINPID",
+ "MANAGERPID",
+ "NOTIFY_SOCKET",
+ "REMOTE_ADDR",
+ "REMOTE_PORT",
+ "SERVICE_RESULT",
+ "WATCHDOG_PID",
+ "WATCHDOG_USEC",
+ NULL);
+
+ /* Let's order the environment alphabetically, just to make it pretty */
+ strv_sort(l);
+
+ return l;
+}
+
+int manager_default_environment(Manager *m) {
+ assert(m);
+
+ m->transient_environment = strv_free(m->transient_environment);
+
+ if (MANAGER_IS_SYSTEM(m)) {
+ /* The system manager always starts with a clean
+ * environment for its children. It does not import
+ * the kernel's or the parents' exported variables.
+ *
+ * The initial passed environment is untouched to keep
+ * /proc/self/environ valid; it is used for tagging
+ * the init process inside containers. */
+ m->transient_environment = strv_new("PATH=" DEFAULT_PATH);
+
+ /* Import locale variables LC_*= from configuration */
+ (void) locale_setup(&m->transient_environment);
+ } else
+ /* The user manager passes its own environment
+ * along to its children. */
+ m->transient_environment = strv_copy(environ);
+
+ if (!m->transient_environment)
+ return log_oom();
+
+ sanitize_environment(m->transient_environment);
+
+ return 0;
+}
+
+static int manager_setup_prefix(Manager *m) {
+ struct table_entry {
+ uint64_t type;
+ const char *suffix;
+ };
+
+ static const struct table_entry paths_system[_EXEC_DIRECTORY_TYPE_MAX] = {
+ [EXEC_DIRECTORY_RUNTIME] = { SD_PATH_SYSTEM_RUNTIME, NULL },
+ [EXEC_DIRECTORY_STATE] = { SD_PATH_SYSTEM_STATE_PRIVATE, NULL },
+ [EXEC_DIRECTORY_CACHE] = { SD_PATH_SYSTEM_STATE_CACHE, NULL },
+ [EXEC_DIRECTORY_LOGS] = { SD_PATH_SYSTEM_STATE_LOGS, NULL },
+ [EXEC_DIRECTORY_CONFIGURATION] = { SD_PATH_SYSTEM_CONFIGURATION, NULL },
+ };
+
+ static const struct table_entry paths_user[_EXEC_DIRECTORY_TYPE_MAX] = {
+ [EXEC_DIRECTORY_RUNTIME] = { SD_PATH_USER_RUNTIME, NULL },
+ [EXEC_DIRECTORY_STATE] = { SD_PATH_USER_CONFIGURATION, NULL },
+ [EXEC_DIRECTORY_CACHE] = { SD_PATH_USER_STATE_CACHE, NULL },
+ [EXEC_DIRECTORY_LOGS] = { SD_PATH_USER_CONFIGURATION, "log" },
+ [EXEC_DIRECTORY_CONFIGURATION] = { SD_PATH_USER_CONFIGURATION, NULL },
+ };
+
+ const struct table_entry *p;
+ ExecDirectoryType i;
+ int r;
+
+ assert(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ p = paths_system;
+ else
+ p = paths_user;
+
+ for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++) {
+ r = sd_path_home(p[i].type, p[i].suffix, &m->prefix[i]);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int manager_setup_run_queue(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->run_queue_event_source);
+
+ r = sd_event_add_defer(m->event, &m->run_queue_event_source, manager_dispatch_run_queue, m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(m->run_queue_event_source, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(m->run_queue_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->run_queue_event_source, "manager-run-queue");
+
+ return 0;
+}
+
+static int manager_setup_sigchld_event_source(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->sigchld_event_source);
+
+ r = sd_event_add_defer(m->event, &m->sigchld_event_source, manager_dispatch_sigchld, m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(m->sigchld_event_source, SD_EVENT_PRIORITY_NORMAL-7);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->sigchld_event_source, "manager-sigchld");
+
+ return 0;
+}
+
+int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager **_m) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ assert(_m);
+ assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER));
+
+ m = new(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (Manager) {
+ .unit_file_scope = scope,
+ .objective = _MANAGER_OBJECTIVE_INVALID,
+
+ .default_timer_accuracy_usec = USEC_PER_MINUTE,
+ .default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT,
+ .default_tasks_accounting = true,
+ .default_tasks_max = UINT64_MAX,
+ .default_timeout_start_usec = DEFAULT_TIMEOUT_USEC,
+ .default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC,
+ .default_restart_usec = DEFAULT_RESTART_USEC,
+
+ .original_log_level = -1,
+ .original_log_target = _LOG_TARGET_INVALID,
+
+ .notify_fd = -1,
+ .cgroups_agent_fd = -1,
+ .signal_fd = -1,
+ .time_change_fd = -1,
+ .user_lookup_fds = { -1, -1 },
+ .private_listen_fd = -1,
+ .dev_autofs_fd = -1,
+ .cgroup_inotify_fd = -1,
+ .pin_cgroupfs_fd = -1,
+ .ask_password_inotify_fd = -1,
+ .idle_pipe = { -1, -1, -1, -1},
+
+ /* start as id #1, so that we can leave #0 around as "null-like" value */
+ .current_job_id = 1,
+
+ .have_ask_password = -EINVAL, /* we don't know */
+ .first_boot = -1,
+ .test_run_flags = test_run_flags,
+ };
+
+#if ENABLE_EFI
+ if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
+ boot_timestamps(m->timestamps + MANAGER_TIMESTAMP_USERSPACE,
+ m->timestamps + MANAGER_TIMESTAMP_FIRMWARE,
+ m->timestamps + MANAGER_TIMESTAMP_LOADER);
+#endif
+
+ /* Prepare log fields we can use for structured logging */
+ if (MANAGER_IS_SYSTEM(m)) {
+ m->unit_log_field = "UNIT=";
+ m->unit_log_format_string = "UNIT=%s";
+
+ m->invocation_log_field = "INVOCATION_ID=";
+ m->invocation_log_format_string = "INVOCATION_ID=%s";
+ } else {
+ m->unit_log_field = "USER_UNIT=";
+ m->unit_log_format_string = "USER_UNIT=%s";
+
+ m->invocation_log_field = "USER_INVOCATION_ID=";
+ m->invocation_log_format_string = "USER_INVOCATION_ID=%s";
+ }
+
+ /* Reboot immediately if the user hits C-A-D more often than 7x per 2s */
+ RATELIMIT_INIT(m->ctrl_alt_del_ratelimit, 2 * USEC_PER_SEC, 7);
+
+ r = manager_default_environment(m);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&m->units, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&m->jobs, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&m->cgroup_unit, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&m->watch_bus, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = manager_setup_prefix(m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ r = manager_setup_run_queue(m);
+ if (r < 0)
+ return r;
+
+ if (test_run_flags == MANAGER_TEST_RUN_MINIMAL) {
+ m->cgroup_root = strdup("");
+ if (!m->cgroup_root)
+ return -ENOMEM;
+ } else {
+ r = manager_setup_signals(m);
+ if (r < 0)
+ return r;
+
+ r = manager_setup_cgroup(m);
+ if (r < 0)
+ return r;
+
+ r = manager_setup_time_change(m);
+ if (r < 0)
+ return r;
+
+ r = manager_read_timezone_stat(m);
+ if (r < 0)
+ return r;
+
+ (void) manager_setup_timezone_change(m);
+
+ r = manager_setup_sigchld_event_source(m);
+ if (r < 0)
+ return r;
+ }
+
+ if (MANAGER_IS_SYSTEM(m) && test_run_flags == 0) {
+ r = mkdir_label("/run/systemd/units", 0755);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ m->taint_usr =
+ !in_initrd() &&
+ dir_is_empty("/usr") > 0;
+
+ /* Note that we do not set up the notify fd here. We do that after deserialization,
+ * since they might have gotten serialized across the reexec. */
+
+ *_m = TAKE_PTR(m);
+
+ return 0;
+}
+
+static int manager_setup_notify(Manager *m) {
+ int r;
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return 0;
+
+ if (m->notify_fd < 0) {
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union sa = {};
+ int salen;
+
+ /* First free all secondary fields */
+ m->notify_socket = mfree(m->notify_socket);
+ m->notify_event_source = sd_event_source_unref(m->notify_event_source);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate notification socket: %m");
+
+ fd_inc_rcvbuf(fd, NOTIFY_RCVBUF_SIZE);
+
+ m->notify_socket = strappend(m->prefix[EXEC_DIRECTORY_RUNTIME], "/systemd/notify");
+ if (!m->notify_socket)
+ return log_oom();
+
+ salen = sockaddr_un_set_path(&sa.un, m->notify_socket);
+ if (salen < 0)
+ return log_error_errno(salen, "Notify socket '%s' not valid for AF_UNIX socket address, refusing.", m->notify_socket);
+
+ (void) mkdir_parents_label(m->notify_socket, 0755);
+ (void) sockaddr_un_unlink(&sa.un);
+
+ r = bind(fd, &sa.sa, salen);
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", m->notify_socket);
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_PASSCRED failed: %m");
+
+ m->notify_fd = TAKE_FD(fd);
+
+ log_debug("Using notification socket %s", m->notify_socket);
+ }
+
+ if (!m->notify_event_source) {
+ r = sd_event_add_io(m->event, &m->notify_event_source, m->notify_fd, EPOLLIN, manager_dispatch_notify_fd, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate notify event source: %m");
+
+ /* Process notification messages a bit earlier than SIGCHLD, so that we can still identify to which
+ * service an exit message belongs. */
+ r = sd_event_source_set_priority(m->notify_event_source, SD_EVENT_PRIORITY_NORMAL-8);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of notify event source: %m");
+
+ (void) sd_event_source_set_description(m->notify_event_source, "manager-notify");
+ }
+
+ return 0;
+}
+
+static int manager_setup_cgroups_agent(Manager *m) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/cgroups-agent",
+ };
+ int r;
+
+ /* This creates a listening socket we receive cgroups agent messages on. We do not use D-Bus for delivering
+ * these messages from the cgroups agent binary to PID 1, as the cgroups agent binary is very short-living, and
+ * each instance of it needs a new D-Bus connection. Since D-Bus connections are SOCK_STREAM/AF_UNIX, on
+ * overloaded systems the backlog of the D-Bus socket becomes relevant, as not more than the configured number
+ * of D-Bus connections may be queued until the kernel will start dropping further incoming connections,
+ * possibly resulting in lost cgroups agent messages. To avoid this, we'll use a private SOCK_DGRAM/AF_UNIX
+ * socket, where no backlog is relevant as communication may take place without an actual connect() cycle, and
+ * we thus won't lose messages.
+ *
+ * Note that PID 1 will forward the agent message to system bus, so that the user systemd instance may listen
+ * to it. The system instance hence listens on this special socket, but the user instances listen on the system
+ * bus for these messages. */
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return 0;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return 0;
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether unified cgroups hierarchy is used: %m");
+ if (r > 0) /* We don't need this anymore on the unified hierarchy */
+ return 0;
+
+ if (m->cgroups_agent_fd < 0) {
+ _cleanup_close_ int fd = -1;
+
+ /* First free all secondary fields */
+ m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate cgroups agent socket: %m");
+
+ fd_inc_rcvbuf(fd, CGROUPS_AGENT_RCVBUF_SIZE);
+
+ (void) sockaddr_un_unlink(&sa.un);
+
+ /* Only allow root to connect to this socket */
+ RUN_WITH_UMASK(0077)
+ r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+
+ m->cgroups_agent_fd = TAKE_FD(fd);
+ }
+
+ if (!m->cgroups_agent_event_source) {
+ r = sd_event_add_io(m->event, &m->cgroups_agent_event_source, m->cgroups_agent_fd, EPOLLIN, manager_dispatch_cgroups_agent_fd, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate cgroups agent event source: %m");
+
+ /* Process cgroups notifications early, but after having processed service notification messages or
+ * SIGCHLD signals, so that a cgroup running empty is always just the last safety net of notification,
+ * and we collected the metadata the notification and SIGCHLD stuff offers first. Also see handling of
+ * cgroup inotify for the unified cgroup stuff. */
+ r = sd_event_source_set_priority(m->cgroups_agent_event_source, SD_EVENT_PRIORITY_NORMAL-4);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of cgroups agent event source: %m");
+
+ (void) sd_event_source_set_description(m->cgroups_agent_event_source, "manager-cgroups-agent");
+ }
+
+ return 0;
+}
+
+static int manager_setup_user_lookup_fd(Manager *m) {
+ int r;
+
+ assert(m);
+
+ /* Set up the socket pair used for passing UID/GID resolution results from forked off processes to PID
+ * 1. Background: we can't do name lookups (NSS) from PID 1, since it might involve IPC and thus activation,
+ * and we might hence deadlock on ourselves. Hence we do all user/group lookups asynchronously from the forked
+ * off processes right before executing the binaries to start. In order to be able to clean up any IPC objects
+ * created by a unit (see RemoveIPC=) we need to know in PID 1 the used UID/GID of the executed processes,
+ * hence we establish this communication channel so that forked off processes can pass their UID/GID
+ * information back to PID 1. The forked off processes send their resolved UID/GID to PID 1 in a simple
+ * datagram, along with their unit name, so that we can share one communication socket pair among all units for
+ * this purpose.
+ *
+ * You might wonder why we need a communication channel for this that is independent of the usual notification
+ * socket scheme (i.e. $NOTIFY_SOCKET). The primary difference is about trust: data sent via the $NOTIFY_SOCKET
+ * channel is only accepted if it originates from the right unit and if reception was enabled for it. The user
+ * lookup socket OTOH is only accessible by PID 1 and its children until they exec(), and always available.
+ *
+ * Note that this function is called under two circumstances: when we first initialize (in which case we
+ * allocate both the socket pair and the event source to listen on it), and when we deserialize after a reload
+ * (in which case the socket pair already exists but we still need to allocate the event source for it). */
+
+ if (m->user_lookup_fds[0] < 0) {
+
+ /* Free all secondary fields */
+ safe_close_pair(m->user_lookup_fds);
+ m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->user_lookup_fds) < 0)
+ return log_error_errno(errno, "Failed to allocate user lookup socket: %m");
+
+ (void) fd_inc_rcvbuf(m->user_lookup_fds[0], NOTIFY_RCVBUF_SIZE);
+ }
+
+ if (!m->user_lookup_event_source) {
+ r = sd_event_add_io(m->event, &m->user_lookup_event_source, m->user_lookup_fds[0], EPOLLIN, manager_dispatch_user_lookup_fd, m);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to allocate user lookup event source: %m");
+
+ /* Process even earlier than the notify event source, so that we always know first about valid UID/GID
+ * resolutions */
+ r = sd_event_source_set_priority(m->user_lookup_event_source, SD_EVENT_PRIORITY_NORMAL-11);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to set priority ot user lookup event source: %m");
+
+ (void) sd_event_source_set_description(m->user_lookup_event_source, "user-lookup");
+ }
+
+ return 0;
+}
+
+static unsigned manager_dispatch_cleanup_queue(Manager *m) {
+ Unit *u;
+ unsigned n = 0;
+
+ assert(m);
+
+ while ((u = m->cleanup_queue)) {
+ assert(u->in_cleanup_queue);
+
+ unit_free(u);
+ n++;
+ }
+
+ return n;
+}
+
+enum {
+ GC_OFFSET_IN_PATH, /* This one is on the path we were traveling */
+ GC_OFFSET_UNSURE, /* No clue */
+ GC_OFFSET_GOOD, /* We still need this unit */
+ GC_OFFSET_BAD, /* We don't need this unit anymore */
+ _GC_OFFSET_MAX
+};
+
+static void unit_gc_mark_good(Unit *u, unsigned gc_marker) {
+ Unit *other;
+ Iterator i;
+ void *v;
+
+ u->gc_marker = gc_marker + GC_OFFSET_GOOD;
+
+ /* Recursively mark referenced units as GOOD as well */
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REFERENCES], i)
+ if (other->gc_marker == gc_marker + GC_OFFSET_UNSURE)
+ unit_gc_mark_good(other, gc_marker);
+}
+
+static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
+ Unit *other;
+ bool is_bad;
+ Iterator i;
+ void *v;
+
+ assert(u);
+
+ if (IN_SET(u->gc_marker - gc_marker,
+ GC_OFFSET_GOOD, GC_OFFSET_BAD, GC_OFFSET_UNSURE, GC_OFFSET_IN_PATH))
+ return;
+
+ if (u->in_cleanup_queue)
+ goto bad;
+
+ if (!unit_may_gc(u))
+ goto good;
+
+ u->gc_marker = gc_marker + GC_OFFSET_IN_PATH;
+
+ is_bad = true;
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REFERENCED_BY], i) {
+ unit_gc_sweep(other, gc_marker);
+
+ if (other->gc_marker == gc_marker + GC_OFFSET_GOOD)
+ goto good;
+
+ if (other->gc_marker != gc_marker + GC_OFFSET_BAD)
+ is_bad = false;
+ }
+
+ if (u->refs_by_target) {
+ const UnitRef *ref;
+
+ LIST_FOREACH(refs_by_target, ref, u->refs_by_target) {
+ unit_gc_sweep(ref->source, gc_marker);
+
+ if (ref->source->gc_marker == gc_marker + GC_OFFSET_GOOD)
+ goto good;
+
+ if (ref->source->gc_marker != gc_marker + GC_OFFSET_BAD)
+ is_bad = false;
+ }
+ }
+
+ if (is_bad)
+ goto bad;
+
+ /* We were unable to find anything out about this entry, so
+ * let's investigate it later */
+ u->gc_marker = gc_marker + GC_OFFSET_UNSURE;
+ unit_add_to_gc_queue(u);
+ return;
+
+bad:
+ /* We definitely know that this one is not useful anymore, so
+ * let's mark it for deletion */
+ u->gc_marker = gc_marker + GC_OFFSET_BAD;
+ unit_add_to_cleanup_queue(u);
+ return;
+
+good:
+ unit_gc_mark_good(u, gc_marker);
+}
+
+static unsigned manager_dispatch_gc_unit_queue(Manager *m) {
+ unsigned n = 0, gc_marker;
+ Unit *u;
+
+ assert(m);
+
+ /* log_debug("Running GC..."); */
+
+ m->gc_marker += _GC_OFFSET_MAX;
+ if (m->gc_marker + _GC_OFFSET_MAX <= _GC_OFFSET_MAX)
+ m->gc_marker = 1;
+
+ gc_marker = m->gc_marker;
+
+ while ((u = m->gc_unit_queue)) {
+ assert(u->in_gc_queue);
+
+ unit_gc_sweep(u, gc_marker);
+
+ LIST_REMOVE(gc_queue, m->gc_unit_queue, u);
+ u->in_gc_queue = false;
+
+ n++;
+
+ if (IN_SET(u->gc_marker - gc_marker,
+ GC_OFFSET_BAD, GC_OFFSET_UNSURE)) {
+ if (u->id)
+ log_unit_debug(u, "Collecting.");
+ u->gc_marker = gc_marker + GC_OFFSET_BAD;
+ unit_add_to_cleanup_queue(u);
+ }
+ }
+
+ return n;
+}
+
+static unsigned manager_dispatch_gc_job_queue(Manager *m) {
+ unsigned n = 0;
+ Job *j;
+
+ assert(m);
+
+ while ((j = m->gc_job_queue)) {
+ assert(j->in_gc_queue);
+
+ LIST_REMOVE(gc_queue, m->gc_job_queue, j);
+ j->in_gc_queue = false;
+
+ n++;
+
+ if (!job_may_gc(j))
+ continue;
+
+ log_unit_debug(j->unit, "Collecting job.");
+ (void) job_finish_and_invalidate(j, JOB_COLLECTED, false, false);
+ }
+
+ return n;
+}
+
+static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) {
+ unsigned n = 0;
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ while ((u = m->stop_when_unneeded_queue)) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ assert(m->stop_when_unneeded_queue);
+
+ assert(u->in_stop_when_unneeded_queue);
+ LIST_REMOVE(stop_when_unneeded_queue, m->stop_when_unneeded_queue, u);
+ u->in_stop_when_unneeded_queue = false;
+
+ n++;
+
+ if (!unit_is_unneeded(u))
+ continue;
+
+ log_unit_debug(u, "Unit is not needed anymore.");
+
+ /* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
+ * service being unnecessary after a while. */
+
+ if (!ratelimit_below(&u->auto_stop_ratelimit)) {
+ log_unit_warning(u, "Unit not needed anymore, but not stopping since we tried this too often recently.");
+ continue;
+ }
+
+ /* Ok, nobody needs us anymore. Sniff. Then let's commit suicide */
+ r = manager_add_job(u->manager, JOB_STOP, u, JOB_FAIL, &error, NULL);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to enqueue stop job, ignoring: %s", bus_error_message(&error, r));
+ }
+
+ return n;
+}
+
+static void manager_clear_jobs_and_units(Manager *m) {
+ Unit *u;
+
+ assert(m);
+
+ while ((u = hashmap_first(m->units)))
+ unit_free(u);
+
+ manager_dispatch_cleanup_queue(m);
+
+ assert(!m->load_queue);
+ assert(!m->run_queue);
+ assert(!m->dbus_unit_queue);
+ assert(!m->dbus_job_queue);
+ assert(!m->cleanup_queue);
+ assert(!m->gc_unit_queue);
+ assert(!m->gc_job_queue);
+ assert(!m->stop_when_unneeded_queue);
+
+ assert(hashmap_isempty(m->jobs));
+ assert(hashmap_isempty(m->units));
+
+ m->n_on_console = 0;
+ m->n_running_jobs = 0;
+ m->n_installed_jobs = 0;
+ m->n_failed_jobs = 0;
+}
+
+Manager* manager_free(Manager *m) {
+ ExecDirectoryType dt;
+ UnitType c;
+
+ if (!m)
+ return NULL;
+
+ manager_clear_jobs_and_units(m);
+
+ for (c = 0; c < _UNIT_TYPE_MAX; c++)
+ if (unit_vtable[c]->shutdown)
+ unit_vtable[c]->shutdown(m);
+
+ /* Keep the cgroup hierarchy in place except when we know we are going down for good */
+ manager_shutdown_cgroup(m, IN_SET(m->objective, MANAGER_EXIT, MANAGER_REBOOT, MANAGER_POWEROFF, MANAGER_HALT, MANAGER_KEXEC));
+
+ lookup_paths_flush_generator(&m->lookup_paths);
+
+ bus_done(m);
+
+ exec_runtime_vacuum(m);
+ hashmap_free(m->exec_runtime_by_id);
+
+ dynamic_user_vacuum(m, false);
+ hashmap_free(m->dynamic_users);
+
+ hashmap_free(m->units);
+ hashmap_free(m->units_by_invocation_id);
+ hashmap_free(m->jobs);
+ hashmap_free(m->watch_pids);
+ hashmap_free(m->watch_bus);
+
+ set_free(m->startup_units);
+ set_free(m->failed_units);
+
+ sd_event_source_unref(m->signal_event_source);
+ sd_event_source_unref(m->sigchld_event_source);
+ sd_event_source_unref(m->notify_event_source);
+ sd_event_source_unref(m->cgroups_agent_event_source);
+ sd_event_source_unref(m->time_change_event_source);
+ sd_event_source_unref(m->timezone_change_event_source);
+ sd_event_source_unref(m->jobs_in_progress_event_source);
+ sd_event_source_unref(m->run_queue_event_source);
+ sd_event_source_unref(m->user_lookup_event_source);
+ sd_event_source_unref(m->sync_bus_names_event_source);
+
+ safe_close(m->signal_fd);
+ safe_close(m->notify_fd);
+ safe_close(m->cgroups_agent_fd);
+ safe_close(m->time_change_fd);
+ safe_close_pair(m->user_lookup_fds);
+
+ manager_close_ask_password(m);
+
+ manager_close_idle_pipe(m);
+
+ sd_event_unref(m->event);
+
+ free(m->notify_socket);
+
+ lookup_paths_free(&m->lookup_paths);
+ strv_free(m->transient_environment);
+ strv_free(m->client_environment);
+
+ hashmap_free(m->cgroup_unit);
+ set_free_free(m->unit_path_cache);
+
+ free(m->switch_root);
+ free(m->switch_root_init);
+
+ rlimit_free_all(m->rlimit);
+
+ assert(hashmap_isempty(m->units_requiring_mounts_for));
+ hashmap_free(m->units_requiring_mounts_for);
+
+ hashmap_free(m->uid_refs);
+ hashmap_free(m->gid_refs);
+
+ for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++)
+ m->prefix[dt] = mfree(m->prefix[dt]);
+
+ return mfree(m);
+}
+
+static void manager_enumerate_perpetual(Manager *m) {
+ UnitType c;
+
+ assert(m);
+
+ /* Let's ask every type to load all units from disk/kernel that it might know */
+ for (c = 0; c < _UNIT_TYPE_MAX; c++) {
+ if (!unit_type_supported(c)) {
+ log_debug("Unit type .%s is not supported on this system.", unit_type_to_string(c));
+ continue;
+ }
+
+ if (unit_vtable[c]->enumerate_perpetual)
+ unit_vtable[c]->enumerate_perpetual(m);
+ }
+}
+
+static void manager_enumerate(Manager *m) {
+ UnitType c;
+
+ assert(m);
+
+ /* Let's ask every type to load all units from disk/kernel that it might know */
+ for (c = 0; c < _UNIT_TYPE_MAX; c++) {
+ if (!unit_type_supported(c)) {
+ log_debug("Unit type .%s is not supported on this system.", unit_type_to_string(c));
+ continue;
+ }
+
+ if (unit_vtable[c]->enumerate)
+ unit_vtable[c]->enumerate(m);
+ }
+
+ manager_dispatch_load_queue(m);
+}
+
+static void manager_coldplug(Manager *m) {
+ Iterator i;
+ Unit *u;
+ char *k;
+ int r;
+
+ assert(m);
+
+ log_debug("Invoking unit coldplug() handlers…");
+
+ /* Let's place the units back into their deserialized state */
+ HASHMAP_FOREACH_KEY(u, k, m->units, i) {
+
+ /* ignore aliases */
+ if (u->id != k)
+ continue;
+
+ r = unit_coldplug(u);
+ if (r < 0)
+ log_warning_errno(r, "We couldn't coldplug %s, proceeding anyway: %m", u->id);
+ }
+}
+
+static void manager_catchup(Manager *m) {
+ Iterator i;
+ Unit *u;
+ char *k;
+
+ assert(m);
+
+ log_debug("Invoking unit catchup() handlers…");
+
+ /* Let's catch up on any state changes that happened while we were reloading/reexecing */
+ HASHMAP_FOREACH_KEY(u, k, m->units, i) {
+
+ /* ignore aliases */
+ if (u->id != k)
+ continue;
+
+ unit_catchup(u);
+ }
+}
+
+static void manager_build_unit_path_cache(Manager *m) {
+ char **i;
+ int r;
+
+ assert(m);
+
+ set_free_free(m->unit_path_cache);
+
+ m->unit_path_cache = set_new(&path_hash_ops);
+ if (!m->unit_path_cache) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ /* This simply builds a list of files we know exist, so that
+ * we don't always have to go to disk */
+
+ STRV_FOREACH(i, m->lookup_paths.search_path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(*i);
+ if (!d) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open directory %s, ignoring: %m", *i);
+ continue;
+ }
+
+ FOREACH_DIRENT(de, d, r = -errno; goto fail) {
+ char *p;
+
+ p = strjoin(streq(*i, "/") ? "" : *i, "/", de->d_name);
+ if (!p) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ r = set_consume(m->unit_path_cache, p);
+ if (r < 0)
+ goto fail;
+ }
+ }
+
+ return;
+
+fail:
+ log_warning_errno(r, "Failed to build unit path cache, proceeding without: %m");
+ m->unit_path_cache = set_free_free(m->unit_path_cache);
+}
+
+static void manager_distribute_fds(Manager *m, FDSet *fds) {
+ Iterator i;
+ Unit *u;
+
+ assert(m);
+
+ HASHMAP_FOREACH(u, m->units, i) {
+
+ if (fdset_size(fds) <= 0)
+ break;
+
+ if (!UNIT_VTABLE(u)->distribute_fds)
+ continue;
+
+ UNIT_VTABLE(u)->distribute_fds(u, fds);
+ }
+}
+
+static bool manager_dbus_is_running(Manager *m, bool deserialized) {
+ Unit *u;
+
+ assert(m);
+
+ /* This checks whether the dbus instance we are supposed to expose our APIs on is up. We check both the socket
+ * and the service unit. If the 'deserialized' parameter is true we'll check the deserialized state of the unit
+ * rather than the current one. */
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return false;
+
+ u = manager_get_unit(m, SPECIAL_DBUS_SOCKET);
+ if (!u)
+ return false;
+ if ((deserialized ? SOCKET(u)->deserialized_state : SOCKET(u)->state) != SOCKET_RUNNING)
+ return false;
+
+ u = manager_get_unit(m, SPECIAL_DBUS_SERVICE);
+ if (!u)
+ return false;
+ if (!IN_SET((deserialized ? SERVICE(u)->deserialized_state : SERVICE(u)->state), SERVICE_RUNNING, SERVICE_RELOAD))
+ return false;
+
+ return true;
+}
+
+static void manager_setup_bus(Manager *m) {
+ assert(m);
+
+ /* Let's set up our private bus connection now, unconditionally */
+ (void) bus_init_private(m);
+
+ /* If we are in --user mode also connect to the system bus now */
+ if (MANAGER_IS_USER(m))
+ (void) bus_init_system(m);
+
+ /* Let's connect to the bus now, but only if the unit is supposed to be up */
+ if (manager_dbus_is_running(m, MANAGER_IS_RELOADING(m))) {
+ (void) bus_init_api(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) bus_init_system(m);
+ }
+}
+
+static void manager_preset_all(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->first_boot <= 0)
+ return;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return;
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return;
+
+ /* If this is the first boot, and we are in the host system, then preset everything */
+ r = unit_file_preset_all(UNIT_FILE_SYSTEM, 0, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, NULL, 0);
+ if (r < 0)
+ log_full_errno(r == -EEXIST ? LOG_NOTICE : LOG_WARNING, r,
+ "Failed to populate /etc with preset unit settings, ignoring: %m");
+ else
+ log_info("Populated /etc with preset unit settings.");
+}
+
+static void manager_vacuum(Manager *m) {
+ assert(m);
+
+ /* Release any dynamic users no longer referenced */
+ dynamic_user_vacuum(m, true);
+
+ /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
+ manager_vacuum_uid_refs(m);
+ manager_vacuum_gid_refs(m);
+
+ /* Release any runtimes no longer referenced */
+ exec_runtime_vacuum(m);
+}
+
+static void manager_ready(Manager *m) {
+ assert(m);
+
+ /* After having loaded everything, do the final round of catching up with what might have changed */
+
+ m->objective = MANAGER_OK; /* Tell everyone we are up now */
+
+ /* It might be safe to log to the journal now and connect to dbus */
+ manager_recheck_journal(m);
+ manager_recheck_dbus(m);
+
+ /* Sync current state of bus names with our set of listening units */
+ (void) manager_enqueue_sync_bus_names(m);
+
+ /* Let's finally catch up with any changes that took place while we were reloading/reexecing */
+ manager_catchup(m);
+}
+
+static Manager* manager_reloading_start(Manager *m) {
+ m->n_reloading++;
+ return m;
+}
+static void manager_reloading_stopp(Manager **m) {
+ if (*m) {
+ assert((*m)->n_reloading > 0);
+ (*m)->n_reloading--;
+ }
+}
+
+int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
+ int r;
+
+ assert(m);
+
+ /* If we are running in test mode, we still want to run the generators,
+ * but we should not touch the real generator directories. */
+ r = lookup_paths_init(&m->lookup_paths, m->unit_file_scope,
+ MANAGER_IS_TEST_RUN(m) ? LOOKUP_PATHS_TEMPORARY_GENERATED : 0,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize path lookup table: %m");
+
+ dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_GENERATORS_START));
+ r = manager_run_environment_generators(m);
+ if (r >= 0)
+ r = manager_run_generators(m);
+ dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_GENERATORS_FINISH));
+ if (r < 0)
+ return r;
+
+ manager_preset_all(m);
+
+ r = lookup_paths_reduce(&m->lookup_paths);
+ if (r < 0)
+ log_warning_errno(r, "Failed ot reduce unit file paths, ignoring: %m");
+
+ manager_build_unit_path_cache(m);
+
+ {
+ /* This block is (optionally) done with the reloading counter bumped */
+ _cleanup_(manager_reloading_stopp) Manager *reloading = NULL;
+
+ /* If we will deserialize make sure that during enumeration this is already known, so we increase the
+ * counter here already */
+ if (serialization)
+ reloading = manager_reloading_start(m);
+
+ /* First, enumerate what we can from all config files */
+ dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_UNITS_LOAD_START));
+ manager_enumerate_perpetual(m);
+ manager_enumerate(m);
+ dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_UNITS_LOAD_FINISH));
+
+ /* Second, deserialize if there is something to deserialize */
+ if (serialization) {
+ r = manager_deserialize(m, serialization, fds);
+ if (r < 0)
+ return log_error_errno(r, "Deserialization failed: %m");
+ }
+
+ /* Any fds left? Find some unit which wants them. This is useful to allow container managers to pass
+ * some file descriptors to us pre-initialized. This enables socket-based activation of entire
+ * containers. */
+ manager_distribute_fds(m, fds);
+
+ /* We might have deserialized the notify fd, but if we didn't then let's create the bus now */
+ r = manager_setup_notify(m);
+ if (r < 0)
+ /* No sense to continue without notifications, our children would fail anyway. */
+ return r;
+
+ r = manager_setup_cgroups_agent(m);
+ if (r < 0)
+ /* Likewise, no sense to continue without empty cgroup notifications. */
+ return r;
+
+ r = manager_setup_user_lookup_fd(m);
+ if (r < 0)
+ /* This shouldn't fail, except if things are really broken. */
+ return r;
+
+ /* Connect to the bus if we are good for it */
+ manager_setup_bus(m);
+
+ /* Now that we are connected to all possible busses, let's deserialize who is tracking us. */
+ r = bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed);
+ if (r < 0)
+ log_warning_errno(r, "Failed to deserialized tracked clients, ignoring: %m");
+ m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
+
+ /* Third, fire things up! */
+ manager_coldplug(m);
+
+ /* Clean up runtime objects */
+ manager_vacuum(m);
+
+ if (serialization)
+ /* Let's wait for the UnitNew/JobNew messages being sent, before we notify that the
+ * reload is finished */
+ m->send_reloading_done = true;
+ }
+
+ manager_ready(m);
+
+ return 0;
+}
+
+int manager_add_job(Manager *m, JobType type, Unit *unit, JobMode mode, sd_bus_error *e, Job **_ret) {
+ int r;
+ Transaction *tr;
+
+ assert(m);
+ assert(type < _JOB_TYPE_MAX);
+ assert(unit);
+ assert(mode < _JOB_MODE_MAX);
+
+ if (mode == JOB_ISOLATE && type != JOB_START)
+ return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Isolate is only valid for start.");
+
+ if (mode == JOB_ISOLATE && !unit->allow_isolate)
+ return sd_bus_error_setf(e, BUS_ERROR_NO_ISOLATION, "Operation refused, unit may not be isolated.");
+
+ log_unit_debug(unit, "Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
+
+ type = job_type_collapse(type, unit);
+
+ tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY);
+ if (!tr)
+ return -ENOMEM;
+
+ r = transaction_add_job_and_dependencies(tr, type, unit, NULL, true, false,
+ IN_SET(mode, JOB_IGNORE_DEPENDENCIES, JOB_IGNORE_REQUIREMENTS),
+ mode == JOB_IGNORE_DEPENDENCIES, e);
+ if (r < 0)
+ goto tr_abort;
+
+ if (mode == JOB_ISOLATE) {
+ r = transaction_add_isolate_jobs(tr, m);
+ if (r < 0)
+ goto tr_abort;
+ }
+
+ r = transaction_activate(tr, m, mode, e);
+ if (r < 0)
+ goto tr_abort;
+
+ log_unit_debug(unit,
+ "Enqueued job %s/%s as %u", unit->id,
+ job_type_to_string(type), (unsigned) tr->anchor_job->id);
+
+ if (_ret)
+ *_ret = tr->anchor_job;
+
+ transaction_free(tr);
+ return 0;
+
+tr_abort:
+ transaction_abort(tr);
+ transaction_free(tr);
+ return r;
+}
+
+int manager_add_job_by_name(Manager *m, JobType type, const char *name, JobMode mode, sd_bus_error *e, Job **ret) {
+ Unit *unit = NULL; /* just to appease gcc, initialization is not really necessary */
+ int r;
+
+ assert(m);
+ assert(type < _JOB_TYPE_MAX);
+ assert(name);
+ assert(mode < _JOB_MODE_MAX);
+
+ r = manager_load_unit(m, name, NULL, NULL, &unit);
+ if (r < 0)
+ return r;
+ assert(unit);
+
+ return manager_add_job(m, type, unit, mode, e, ret);
+}
+
+int manager_add_job_by_name_and_warn(Manager *m, JobType type, const char *name, JobMode mode, Job **ret) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(m);
+ assert(type < _JOB_TYPE_MAX);
+ assert(name);
+ assert(mode < _JOB_MODE_MAX);
+
+ r = manager_add_job_by_name(m, type, name, mode, &error, ret);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to enqueue %s job for %s: %s", job_mode_to_string(mode), name, bus_error_message(&error, r));
+
+ return r;
+}
+
+int manager_propagate_reload(Manager *m, Unit *unit, JobMode mode, sd_bus_error *e) {
+ int r;
+ Transaction *tr;
+
+ assert(m);
+ assert(unit);
+ assert(mode < _JOB_MODE_MAX);
+ assert(mode != JOB_ISOLATE); /* Isolate is only valid for start */
+
+ tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY);
+ if (!tr)
+ return -ENOMEM;
+
+ /* We need an anchor job */
+ r = transaction_add_job_and_dependencies(tr, JOB_NOP, unit, NULL, false, false, true, true, e);
+ if (r < 0)
+ goto tr_abort;
+
+ /* Failure in adding individual dependencies is ignored, so this always succeeds. */
+ transaction_add_propagate_reload_jobs(tr, unit, tr->anchor_job, mode == JOB_IGNORE_DEPENDENCIES, e);
+
+ r = transaction_activate(tr, m, mode, e);
+ if (r < 0)
+ goto tr_abort;
+
+ transaction_free(tr);
+ return 0;
+
+tr_abort:
+ transaction_abort(tr);
+ transaction_free(tr);
+ return r;
+}
+
+Job *manager_get_job(Manager *m, uint32_t id) {
+ assert(m);
+
+ return hashmap_get(m->jobs, UINT32_TO_PTR(id));
+}
+
+Unit *manager_get_unit(Manager *m, const char *name) {
+ assert(m);
+ assert(name);
+
+ return hashmap_get(m->units, name);
+}
+
+static int manager_dispatch_target_deps_queue(Manager *m) {
+ Unit *u;
+ unsigned k;
+ int r = 0;
+
+ static const UnitDependency deps[] = {
+ UNIT_REQUIRED_BY,
+ UNIT_REQUISITE_OF,
+ UNIT_WANTED_BY,
+ UNIT_BOUND_BY
+ };
+
+ assert(m);
+
+ while ((u = m->target_deps_queue)) {
+ assert(u->in_target_deps_queue);
+
+ LIST_REMOVE(target_deps_queue, u->manager->target_deps_queue, u);
+ u->in_target_deps_queue = false;
+
+ for (k = 0; k < ELEMENTSOF(deps); k++) {
+ Unit *target;
+ Iterator i;
+ void *v;
+
+ HASHMAP_FOREACH_KEY(v, target, u->dependencies[deps[k]], i) {
+ r = unit_add_default_target_dependency(u, target);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ return r;
+}
+
+unsigned manager_dispatch_load_queue(Manager *m) {
+ Unit *u;
+ unsigned n = 0;
+
+ assert(m);
+
+ /* Make sure we are not run recursively */
+ if (m->dispatching_load_queue)
+ return 0;
+
+ m->dispatching_load_queue = true;
+
+ /* Dispatches the load queue. Takes a unit from the queue and
+ * tries to load its data until the queue is empty */
+
+ while ((u = m->load_queue)) {
+ assert(u->in_load_queue);
+
+ unit_load(u);
+ n++;
+ }
+
+ m->dispatching_load_queue = false;
+
+ /* Dispatch the units waiting for their target dependencies to be added now, as all targets that we know about
+ * should be loaded and have aliases resolved */
+ (void) manager_dispatch_target_deps_queue(m);
+
+ return n;
+}
+
+int manager_load_unit_prepare(
+ Manager *m,
+ const char *name,
+ const char *path,
+ sd_bus_error *e,
+ Unit **_ret) {
+
+ _cleanup_(unit_freep) Unit *cleanup_ret = NULL;
+ Unit *ret;
+ UnitType t;
+ int r;
+
+ assert(m);
+ assert(name || path);
+ assert(_ret);
+
+ /* This will prepare the unit for loading, but not actually
+ * load anything from disk. */
+
+ if (path && !is_path(path))
+ return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not absolute.", path);
+
+ if (!name)
+ name = basename(path);
+
+ t = unit_name_to_type(name);
+
+ if (t == _UNIT_TYPE_INVALID || !unit_name_is_valid(name, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) {
+ if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE))
+ return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Unit name %s is missing the instance name.", name);
+
+ return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Unit name %s is not valid.", name);
+ }
+
+ ret = manager_get_unit(m, name);
+ if (ret) {
+ *_ret = ret;
+ return 1;
+ }
+
+ ret = cleanup_ret = unit_new(m, unit_vtable[t]->object_size);
+ if (!ret)
+ return -ENOMEM;
+
+ if (path) {
+ ret->fragment_path = strdup(path);
+ if (!ret->fragment_path)
+ return -ENOMEM;
+ }
+
+ r = unit_add_name(ret, name);
+ if (r < 0)
+ return r;
+
+ unit_add_to_load_queue(ret);
+ unit_add_to_dbus_queue(ret);
+ unit_add_to_gc_queue(ret);
+
+ *_ret = ret;
+ cleanup_ret = NULL;
+
+ return 0;
+}
+
+int manager_load_unit(
+ Manager *m,
+ const char *name,
+ const char *path,
+ sd_bus_error *e,
+ Unit **_ret) {
+
+ int r;
+
+ assert(m);
+ assert(_ret);
+
+ /* This will load the service information files, but not actually
+ * start any services or anything. */
+
+ r = manager_load_unit_prepare(m, name, path, e, _ret);
+ if (r != 0)
+ return r;
+
+ manager_dispatch_load_queue(m);
+
+ *_ret = unit_follow_merge(*_ret);
+ return 0;
+}
+
+int manager_load_startable_unit_or_warn(
+ Manager *m,
+ const char *name,
+ const char *path,
+ Unit **ret) {
+
+ /* Load a unit, make sure it loaded fully and is not masked. */
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Unit *unit;
+ int r;
+
+ r = manager_load_unit(m, name, path, &error, &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load %s %s: %s",
+ name ? "unit" : "unit file", name ?: path,
+ bus_error_message(&error, r));
+
+ r = bus_unit_validate_load_state(unit, &error);
+ if (r < 0)
+ return log_error_errno(r, "%s", bus_error_message(&error, r));
+
+ *ret = unit;
+ return 0;
+}
+
+void manager_dump_jobs(Manager *s, FILE *f, const char *prefix) {
+ Iterator i;
+ Job *j;
+
+ assert(s);
+ assert(f);
+
+ HASHMAP_FOREACH(j, s->jobs, i)
+ job_dump(j, f, prefix);
+}
+
+void manager_dump_units(Manager *s, FILE *f, const char *prefix) {
+ Iterator i;
+ Unit *u;
+ const char *t;
+
+ assert(s);
+ assert(f);
+
+ HASHMAP_FOREACH_KEY(u, t, s->units, i)
+ if (u->id == t)
+ unit_dump(u, f, prefix);
+}
+
+void manager_dump(Manager *m, FILE *f, const char *prefix) {
+ ManagerTimestamp q;
+
+ assert(m);
+ assert(f);
+
+ for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+
+ if (dual_timestamp_is_set(m->timestamps + q))
+ fprintf(f, "%sTimestamp %s: %s\n",
+ strempty(prefix),
+ manager_timestamp_to_string(q),
+ format_timestamp(buf, sizeof(buf), m->timestamps[q].realtime));
+ }
+
+ manager_dump_units(m, f, prefix);
+ manager_dump_jobs(m, f, prefix);
+}
+
+int manager_get_dump_string(Manager *m, char **ret) {
+ _cleanup_free_ char *dump = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t size;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ f = open_memstream(&dump, &size);
+ if (!f)
+ return -errno;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ manager_dump(m, f, NULL);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ f = safe_fclose(f);
+
+ *ret = TAKE_PTR(dump);
+
+ return 0;
+}
+
+void manager_clear_jobs(Manager *m) {
+ Job *j;
+
+ assert(m);
+
+ while ((j = hashmap_first(m->jobs)))
+ /* No need to recurse. We're cancelling all jobs. */
+ job_finish_and_invalidate(j, JOB_CANCELED, false, false);
+}
+
+static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
+ Manager *m = userdata;
+ Job *j;
+
+ assert(source);
+ assert(m);
+
+ while ((j = m->run_queue)) {
+ assert(j->installed);
+ assert(j->in_run_queue);
+
+ (void) job_run_and_invalidate(j);
+ }
+
+ if (m->n_running_jobs > 0)
+ manager_watch_jobs_in_progress(m);
+
+ if (m->n_on_console > 0)
+ manager_watch_idle_pipe(m);
+
+ return 1;
+}
+
+static unsigned manager_dispatch_dbus_queue(Manager *m) {
+ unsigned n = 0, budget;
+ Unit *u;
+ Job *j;
+
+ assert(m);
+
+ /* When we are reloading, let's not wait with generating signals, since we need to exit the manager as quickly
+ * as we can. There's no point in throttling generation of signals in that case. */
+ if (MANAGER_IS_RELOADING(m) || m->send_reloading_done || m->pending_reload_message)
+ budget = (unsigned) -1; /* infinite budget in this case */
+ else {
+ /* Anything to do at all? */
+ if (!m->dbus_unit_queue && !m->dbus_job_queue)
+ return 0;
+
+ /* Do we have overly many messages queued at the moment? If so, let's not enqueue more on top, let's
+ * sit this cycle out, and process things in a later cycle when the queues got a bit emptier. */
+ if (manager_bus_n_queued_write(m) > MANAGER_BUS_BUSY_THRESHOLD)
+ return 0;
+
+ /* Only process a certain number of units/jobs per event loop iteration. Even if the bus queue wasn't
+ * overly full before this call we shouldn't increase it in size too wildly in one step, and we
+ * shouldn't monopolize CPU time with generating these messages. Note the difference in counting of
+ * this "budget" and the "threshold" above: the "budget" is decreased only once per generated message,
+ * regardless how many busses/direct connections it is enqueued on, while the "threshold" is applied to
+ * each queued instance of bus message, i.e. if the same message is enqueued to five busses/direct
+ * connections it will be counted five times. This difference in counting ("references"
+ * vs. "instances") is primarily a result of the fact that it's easier to implement it this way,
+ * however it also reflects the thinking that the "threshold" should put a limit on used queue memory,
+ * i.e. space, while the "budget" should put a limit on time. Also note that the "threshold" is
+ * currently chosen much higher than the "budget". */
+ budget = MANAGER_BUS_MESSAGE_BUDGET;
+ }
+
+ while (budget != 0 && (u = m->dbus_unit_queue)) {
+
+ assert(u->in_dbus_queue);
+
+ bus_unit_send_change_signal(u);
+ n++;
+
+ if (budget != (unsigned) -1)
+ budget--;
+ }
+
+ while (budget != 0 && (j = m->dbus_job_queue)) {
+ assert(j->in_dbus_queue);
+
+ bus_job_send_change_signal(j);
+ n++;
+
+ if (budget != (unsigned) -1)
+ budget--;
+ }
+
+ if (m->send_reloading_done) {
+ m->send_reloading_done = false;
+ bus_manager_send_reloading(m, false);
+ n++;
+ }
+
+ if (m->pending_reload_message) {
+ bus_send_pending_reload_message(m);
+ n++;
+ }
+
+ return n;
+}
+
+static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ char buf[PATH_MAX];
+ ssize_t n;
+
+ n = recv(fd, buf, sizeof(buf), 0);
+ if (n < 0)
+ return log_error_errno(errno, "Failed to read cgroups agent message: %m");
+ if (n == 0) {
+ log_error("Got zero-length cgroups agent message, ignoring.");
+ return 0;
+ }
+ if ((size_t) n >= sizeof(buf)) {
+ log_error("Got overly long cgroups agent message, ignoring.");
+ return 0;
+ }
+
+ if (memchr(buf, 0, n)) {
+ log_error("Got cgroups agent message with embedded NUL byte, ignoring.");
+ return 0;
+ }
+ buf[n] = 0;
+
+ manager_notify_cgroup_empty(m, buf);
+ (void) bus_forward_agent_released(m, buf);
+
+ return 0;
+}
+
+static void manager_invoke_notify_message(
+ Manager *m,
+ Unit *u,
+ const struct ucred *ucred,
+ const char *buf,
+ FDSet *fds) {
+
+ assert(m);
+ assert(u);
+ assert(ucred);
+ assert(buf);
+
+ if (u->notifygen == m->notifygen) /* Already invoked on this same unit in this same iteration? */
+ return;
+ u->notifygen = m->notifygen;
+
+ if (UNIT_VTABLE(u)->notify_message) {
+ _cleanup_strv_free_ char **tags = NULL;
+
+ tags = strv_split(buf, NEWLINE);
+ if (!tags) {
+ log_oom();
+ return;
+ }
+
+ UNIT_VTABLE(u)->notify_message(u, ucred, tags, fds);
+
+ } else if (DEBUG_LOGGING) {
+ _cleanup_free_ char *x = NULL, *y = NULL;
+
+ x = ellipsize(buf, 20, 90);
+ if (x)
+ y = cescape(x);
+
+ log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y));
+ }
+}
+
+static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ Manager *m = userdata;
+ char buf[NOTIFY_BUFFER_MAX+1];
+ struct iovec iovec = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf)-1,
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int) * NOTIFY_FD_MAX)];
+ } control = {};
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+
+ struct cmsghdr *cmsg;
+ struct ucred *ucred = NULL;
+ _cleanup_free_ Unit **array_copy = NULL;
+ Unit *u1, *u2, **array;
+ int r, *fd_array = NULL;
+ size_t n_fds = 0;
+ bool found = false;
+ ssize_t n;
+
+ assert(m);
+ assert(m->notify_fd == fd);
+
+ if (revents != EPOLLIN) {
+ log_warning("Got unexpected poll event for notify fd.");
+ return 0;
+ }
+
+ n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC);
+ if (n < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0; /* Spurious wakeup, try again */
+
+ /* If this is any other, real error, then let's stop processing this socket. This of course means we
+ * won't take notification messages anymore, but that's still better than busy looping around this:
+ * being woken up over and over again but being unable to actually read the message off the socket. */
+ return log_error_errno(errno, "Failed to receive notification message: %m");
+ }
+
+ CMSG_FOREACH(cmsg, &msghdr) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+
+ fd_array = (int*) CMSG_DATA(cmsg);
+ n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+ } else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
+
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+ }
+ }
+
+ if (n_fds > 0) {
+ assert(fd_array);
+
+ r = fdset_new_array(&fds, fd_array, n_fds);
+ if (r < 0) {
+ close_many(fd_array, n_fds);
+ log_oom();
+ return 0;
+ }
+ }
+
+ if (!ucred || !pid_is_valid(ucred->pid)) {
+ log_warning("Received notify message without valid credentials. Ignoring.");
+ return 0;
+ }
+
+ if ((size_t) n >= sizeof(buf) || (msghdr.msg_flags & MSG_TRUNC)) {
+ log_warning("Received notify message exceeded maximum size. Ignoring.");
+ return 0;
+ }
+
+ /* As extra safety check, let's make sure the string we get doesn't contain embedded NUL bytes. We permit one
+ * trailing NUL byte in the message, but don't expect it. */
+ if (n > 1 && memchr(buf, 0, n-1)) {
+ log_warning("Received notify message with embedded NUL bytes. Ignoring.");
+ return 0;
+ }
+
+ /* Make sure it's NUL-terminated. */
+ buf[n] = 0;
+
+ /* Increase the generation counter used for filtering out duplicate unit invocations. */
+ m->notifygen++;
+
+ /* Notify every unit that might be interested, which might be multiple. */
+ u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid);
+ u2 = hashmap_get(m->watch_pids, PID_TO_PTR(ucred->pid));
+ array = hashmap_get(m->watch_pids, PID_TO_PTR(-ucred->pid));
+ if (array) {
+ size_t k = 0;
+
+ while (array[k])
+ k++;
+
+ array_copy = newdup(Unit*, array, k+1);
+ if (!array_copy)
+ log_oom();
+ }
+ /* And now invoke the per-unit callbacks. Note that manager_invoke_notify_message() will handle duplicate units
+ * make sure we only invoke each unit's handler once. */
+ if (u1) {
+ manager_invoke_notify_message(m, u1, ucred, buf, fds);
+ found = true;
+ }
+ if (u2) {
+ manager_invoke_notify_message(m, u2, ucred, buf, fds);
+ found = true;
+ }
+ if (array_copy)
+ for (size_t i = 0; array_copy[i]; i++) {
+ manager_invoke_notify_message(m, array_copy[i], ucred, buf, fds);
+ found = true;
+ }
+
+ if (!found)
+ log_warning("Cannot find unit for notify message of PID "PID_FMT", ignoring.", ucred->pid);
+
+ if (fdset_size(fds) > 0)
+ log_warning("Got extra auxiliary fds with notification message, closing them.");
+
+ return 0;
+}
+
+static void manager_invoke_sigchld_event(
+ Manager *m,
+ Unit *u,
+ const siginfo_t *si) {
+
+ assert(m);
+ assert(u);
+ assert(si);
+
+ /* Already invoked the handler of this unit in this iteration? Then don't process this again */
+ if (u->sigchldgen == m->sigchldgen)
+ return;
+ u->sigchldgen = m->sigchldgen;
+
+ log_unit_debug(u, "Child "PID_FMT" belongs to %s.", si->si_pid, u->id);
+ unit_unwatch_pid(u, si->si_pid);
+
+ if (UNIT_VTABLE(u)->sigchld_event)
+ UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status);
+}
+
+static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) {
+ Manager *m = userdata;
+ siginfo_t si = {};
+ int r;
+
+ assert(source);
+ assert(m);
+
+ /* First we call waitd() for a PID and do not reap the zombie. That way we can still access /proc/$PID for it
+ * while it is a zombie. */
+
+ if (waitid(P_ALL, 0, &si, WEXITED|WNOHANG|WNOWAIT) < 0) {
+
+ if (errno != ECHILD)
+ log_error_errno(errno, "Failed to peek for child with waitid(), ignoring: %m");
+
+ goto turn_off;
+ }
+
+ if (si.si_pid <= 0)
+ goto turn_off;
+
+ if (IN_SET(si.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED)) {
+ _cleanup_free_ Unit **array_copy = NULL;
+ _cleanup_free_ char *name = NULL;
+ Unit *u1, *u2, **array;
+
+ (void) get_process_comm(si.si_pid, &name);
+
+ log_debug("Child "PID_FMT" (%s) died (code=%s, status=%i/%s)",
+ si.si_pid, strna(name),
+ sigchld_code_to_string(si.si_code),
+ si.si_status,
+ strna(si.si_code == CLD_EXITED
+ ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL)
+ : signal_to_string(si.si_status)));
+
+ /* Increase the generation counter used for filtering out duplicate unit invocations */
+ m->sigchldgen++;
+
+ /* And now figure out the unit this belongs to, it might be multiple... */
+ u1 = manager_get_unit_by_pid_cgroup(m, si.si_pid);
+ u2 = hashmap_get(m->watch_pids, PID_TO_PTR(si.si_pid));
+ array = hashmap_get(m->watch_pids, PID_TO_PTR(-si.si_pid));
+ if (array) {
+ size_t n = 0;
+
+ /* Cound how many entries the array has */
+ while (array[n])
+ n++;
+
+ /* Make a copy of the array so that we don't trip up on the array changing beneath us */
+ array_copy = newdup(Unit*, array, n+1);
+ if (!array_copy)
+ log_oom();
+ }
+
+ /* Finally, execute them all. Note that u1, u2 and the array might contain duplicates, but
+ * that's fine, manager_invoke_sigchld_event() will ensure we only invoke the handlers once for
+ * each iteration. */
+ if (u1)
+ manager_invoke_sigchld_event(m, u1, &si);
+ if (u2)
+ manager_invoke_sigchld_event(m, u2, &si);
+ if (array_copy)
+ for (size_t i = 0; array_copy[i]; i++)
+ manager_invoke_sigchld_event(m, array_copy[i], &si);
+ }
+
+ /* And now, we actually reap the zombie. */
+ if (waitid(P_PID, si.si_pid, &si, WEXITED) < 0) {
+ log_error_errno(errno, "Failed to dequeue child, ignoring: %m");
+ return 0;
+ }
+
+ return 0;
+
+turn_off:
+ /* All children processed for now, turn off event source */
+
+ r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable SIGCHLD event source: %m");
+
+ return 0;
+}
+
+static void manager_start_target(Manager *m, const char *name, JobMode mode) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ log_debug("Activating special unit %s", name);
+
+ r = manager_add_job_by_name(m, JOB_START, name, mode, &error, NULL);
+ if (r < 0)
+ log_error("Failed to enqueue %s job: %s", name, bus_error_message(&error, r));
+}
+
+static void manager_handle_ctrl_alt_del(Manager *m) {
+ /* If the user presses C-A-D more than
+ * 7 times within 2s, we reboot/shutdown immediately,
+ * unless it was disabled in system.conf */
+
+ if (ratelimit_below(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == EMERGENCY_ACTION_NONE)
+ manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
+ else
+ emergency_action(m, m->cad_burst_action, EMERGENCY_ACTION_WARN, NULL, -1,
+ "Ctrl-Alt-Del was pressed more than 7 times within 2s");
+}
+
+static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ ssize_t n;
+ struct signalfd_siginfo sfsi;
+ int r;
+
+ assert(m);
+ assert(m->signal_fd == fd);
+
+ if (revents != EPOLLIN) {
+ log_warning("Got unexpected events from signal file descriptor.");
+ return 0;
+ }
+
+ n = read(m->signal_fd, &sfsi, sizeof(sfsi));
+ if (n != sizeof(sfsi)) {
+ if (n >= 0) {
+ log_warning("Truncated read from signal fd (%zu bytes), ignoring!", n);
+ return 0;
+ }
+
+ if (IN_SET(errno, EINTR, EAGAIN))
+ return 0;
+
+ /* We return an error here, which will kill this handler,
+ * to avoid a busy loop on read error. */
+ return log_error_errno(errno, "Reading from signal fd failed: %m");
+ }
+
+ log_received_signal(sfsi.ssi_signo == SIGCHLD ||
+ (sfsi.ssi_signo == SIGTERM && MANAGER_IS_USER(m))
+ ? LOG_DEBUG : LOG_INFO,
+ &sfsi);
+
+ switch (sfsi.ssi_signo) {
+
+ case SIGCHLD:
+ r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_ON);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable SIGCHLD event source, ignoring: %m");
+
+ break;
+
+ case SIGTERM:
+ if (MANAGER_IS_SYSTEM(m)) {
+ /* This is for compatibility with the original sysvinit */
+ if (verify_run_space_and_log("Refusing to reexecute") < 0)
+ break;
+
+ m->objective = MANAGER_REEXECUTE;
+ break;
+ }
+
+ _fallthrough_;
+ case SIGINT:
+ if (MANAGER_IS_SYSTEM(m))
+ manager_handle_ctrl_alt_del(m);
+ else
+ manager_start_target(m, SPECIAL_EXIT_TARGET,
+ JOB_REPLACE_IRREVERSIBLY);
+ break;
+
+ case SIGWINCH:
+ /* This is a nop on non-init */
+ if (MANAGER_IS_SYSTEM(m))
+ manager_start_target(m, SPECIAL_KBREQUEST_TARGET, JOB_REPLACE);
+
+ break;
+
+ case SIGPWR:
+ /* This is a nop on non-init */
+ if (MANAGER_IS_SYSTEM(m))
+ manager_start_target(m, SPECIAL_SIGPWR_TARGET, JOB_REPLACE);
+
+ break;
+
+ case SIGUSR1:
+ if (manager_dbus_is_running(m, false)) {
+ log_info("Trying to reconnect to bus...");
+
+ (void) bus_init_api(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) bus_init_system(m);
+ } else {
+ log_info("Starting D-Bus service...");
+ manager_start_target(m, SPECIAL_DBUS_SERVICE, JOB_REPLACE);
+ }
+
+ break;
+
+ case SIGUSR2: {
+ _cleanup_free_ char *dump = NULL;
+
+ r = manager_get_dump_string(m, &dump);
+ if (r < 0) {
+ log_warning_errno(errno, "Failed to acquire manager dump: %m");
+ break;
+ }
+
+ log_dump(LOG_INFO, dump);
+ break;
+ }
+
+ case SIGHUP:
+ if (verify_run_space_and_log("Refusing to reload") < 0)
+ break;
+
+ m->objective = MANAGER_RELOAD;
+ break;
+
+ default: {
+
+ /* Starting SIGRTMIN+0 */
+ static const struct {
+ const char *target;
+ JobMode mode;
+ } target_table[] = {
+ [0] = { SPECIAL_DEFAULT_TARGET, JOB_ISOLATE },
+ [1] = { SPECIAL_RESCUE_TARGET, JOB_ISOLATE },
+ [2] = { SPECIAL_EMERGENCY_TARGET, JOB_ISOLATE },
+ [3] = { SPECIAL_HALT_TARGET, JOB_REPLACE_IRREVERSIBLY },
+ [4] = { SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY },
+ [5] = { SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY },
+ [6] = { SPECIAL_KEXEC_TARGET, JOB_REPLACE_IRREVERSIBLY },
+ };
+
+ /* Starting SIGRTMIN+13, so that target halt and system halt are 10 apart */
+ static const ManagerObjective objective_table[] = {
+ [0] = MANAGER_HALT,
+ [1] = MANAGER_POWEROFF,
+ [2] = MANAGER_REBOOT,
+ [3] = MANAGER_KEXEC,
+ };
+
+ if ((int) sfsi.ssi_signo >= SIGRTMIN+0 &&
+ (int) sfsi.ssi_signo < SIGRTMIN+(int) ELEMENTSOF(target_table)) {
+ int idx = (int) sfsi.ssi_signo - SIGRTMIN;
+ manager_start_target(m, target_table[idx].target,
+ target_table[idx].mode);
+ break;
+ }
+
+ if ((int) sfsi.ssi_signo >= SIGRTMIN+13 &&
+ (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(objective_table)) {
+ m->objective = objective_table[sfsi.ssi_signo - SIGRTMIN - 13];
+ break;
+ }
+
+ switch (sfsi.ssi_signo - SIGRTMIN) {
+
+ case 20:
+ manager_set_show_status(m, SHOW_STATUS_YES);
+ break;
+
+ case 21:
+ manager_set_show_status(m, SHOW_STATUS_NO);
+ break;
+
+ case 22:
+ manager_override_log_level(m, LOG_DEBUG);
+ break;
+
+ case 23:
+ manager_restore_original_log_level(m);
+ break;
+
+ case 24:
+ if (MANAGER_IS_USER(m)) {
+ m->objective = MANAGER_EXIT;
+ return 0;
+ }
+
+ /* This is a nop on init */
+ break;
+
+ case 26:
+ case 29: /* compatibility: used to be mapped to LOG_TARGET_SYSLOG_OR_KMSG */
+ manager_restore_original_log_target(m);
+ break;
+
+ case 27:
+ manager_override_log_target(m, LOG_TARGET_CONSOLE);
+ break;
+
+ case 28:
+ manager_override_log_target(m, LOG_TARGET_KMSG);
+ break;
+
+ default:
+ log_warning("Got unhandled signal <%s>.", signal_to_string(sfsi.ssi_signo));
+ }
+ }}
+
+ return 0;
+}
+
+static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ Iterator i;
+ Unit *u;
+
+ assert(m);
+ assert(m->time_change_fd == fd);
+
+ log_struct(LOG_DEBUG,
+ "MESSAGE_ID=" SD_MESSAGE_TIME_CHANGE_STR,
+ LOG_MESSAGE("Time has been changed"));
+
+ /* Restart the watch */
+ (void) manager_setup_time_change(m);
+
+ HASHMAP_FOREACH(u, m->units, i)
+ if (UNIT_VTABLE(u)->time_change)
+ UNIT_VTABLE(u)->time_change(u);
+
+ return 0;
+}
+
+static int manager_dispatch_timezone_change(
+ sd_event_source *source,
+ const struct inotify_event *e,
+ void *userdata) {
+
+ Manager *m = userdata;
+ int changed;
+ Iterator i;
+ Unit *u;
+
+ assert(m);
+
+ log_debug("inotify event for /etc/localtime");
+
+ changed = manager_read_timezone_stat(m);
+ if (changed <= 0)
+ return changed;
+
+ /* Something changed, restart the watch, to ensure we watch the new /etc/localtime if it changed */
+ (void) manager_setup_timezone_change(m);
+
+ /* Read the new timezone */
+ tzset();
+
+ log_debug("Timezone has been changed (now: %s).", tzname[daylight]);
+
+ HASHMAP_FOREACH(u, m->units, i)
+ if (UNIT_VTABLE(u)->timezone_change)
+ UNIT_VTABLE(u)->timezone_change(u);
+
+ return 0;
+}
+
+static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(m->idle_pipe[2] == fd);
+
+ /* There's at least one Type=idle child that just gave up on us waiting for the boot process to complete. Let's
+ * now turn off any further console output if there's at least one service that needs console access, so that
+ * from now on our own output should not spill into that service's output anymore. After all, we support
+ * Type=idle only to beautify console output and it generally is set on services that want to own the console
+ * exclusively without our interference. */
+ m->no_console_output = m->n_on_console > 0;
+
+ /* Acknowledge the child's request, and let all all other children know too that they shouldn't wait any longer
+ * by closing the pipes towards them, which is what they are waiting for. */
+ manager_close_idle_pipe(m);
+
+ return 0;
+}
+
+static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata) {
+ Manager *m = userdata;
+ int r;
+ uint64_t next;
+
+ assert(m);
+ assert(source);
+
+ manager_print_jobs_in_progress(m);
+
+ next = now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_PERIOD_USEC;
+ r = sd_event_source_set_time(source, next);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(source, SD_EVENT_ONESHOT);
+}
+
+int manager_loop(Manager *m) {
+ int r;
+
+ RATELIMIT_DEFINE(rl, 1*USEC_PER_SEC, 50000);
+
+ assert(m);
+ assert(m->objective == MANAGER_OK); /* Ensure manager_startup() has been called */
+
+ /* Release the path cache */
+ m->unit_path_cache = set_free_free(m->unit_path_cache);
+
+ manager_check_finished(m);
+
+ /* There might still be some zombies hanging around from before we were exec()'ed. Let's reap them. */
+ r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_ON);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable SIGCHLD event source: %m");
+
+ while (m->objective == MANAGER_OK) {
+ usec_t wait_usec;
+
+ if (m->runtime_watchdog > 0 && m->runtime_watchdog != USEC_INFINITY && MANAGER_IS_SYSTEM(m))
+ watchdog_ping();
+
+ if (!ratelimit_below(&rl)) {
+ /* Yay, something is going seriously wrong, pause a little */
+ log_warning("Looping too fast. Throttling execution a little.");
+ sleep(1);
+ }
+
+ if (manager_dispatch_load_queue(m) > 0)
+ continue;
+
+ if (manager_dispatch_gc_job_queue(m) > 0)
+ continue;
+
+ if (manager_dispatch_gc_unit_queue(m) > 0)
+ continue;
+
+ if (manager_dispatch_cleanup_queue(m) > 0)
+ continue;
+
+ if (manager_dispatch_cgroup_realize_queue(m) > 0)
+ continue;
+
+ if (manager_dispatch_stop_when_unneeded_queue(m) > 0)
+ continue;
+
+ if (manager_dispatch_dbus_queue(m) > 0)
+ continue;
+
+ /* Sleep for half the watchdog time */
+ if (m->runtime_watchdog > 0 && m->runtime_watchdog != USEC_INFINITY && MANAGER_IS_SYSTEM(m)) {
+ wait_usec = m->runtime_watchdog / 2;
+ if (wait_usec <= 0)
+ wait_usec = 1;
+ } else
+ wait_usec = USEC_INFINITY;
+
+ r = sd_event_run(m->event, wait_usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+ }
+
+ return m->objective;
+}
+
+int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u) {
+ _cleanup_free_ char *n = NULL;
+ sd_id128_t invocation_id;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(s);
+ assert(_u);
+
+ r = unit_name_from_dbus_path(s, &n);
+ if (r < 0)
+ return r;
+
+ /* Permit addressing units by invocation ID: if the passed bus path is suffixed by a 128bit ID then we use it
+ * as invocation ID. */
+ r = sd_id128_from_string(n, &invocation_id);
+ if (r >= 0) {
+ u = hashmap_get(m->units_by_invocation_id, &invocation_id);
+ if (u) {
+ *_u = u;
+ return 0;
+ }
+
+ return sd_bus_error_setf(e, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID,
+ "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.",
+ SD_ID128_FORMAT_VAL(invocation_id));
+ }
+
+ /* If this didn't work, we check if this is a unit name */
+ if (!unit_name_is_valid(n, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) {
+ _cleanup_free_ char *nn = NULL;
+
+ nn = cescape(n);
+ return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS,
+ "Unit name %s is neither a valid invocation ID nor unit name.", strnull(nn));
+ }
+
+ r = manager_load_unit(m, n, NULL, e, &u);
+ if (r < 0)
+ return r;
+
+ *_u = u;
+ return 0;
+}
+
+int manager_get_job_from_dbus_path(Manager *m, const char *s, Job **_j) {
+ const char *p;
+ unsigned id;
+ Job *j;
+ int r;
+
+ assert(m);
+ assert(s);
+ assert(_j);
+
+ p = startswith(s, "/org/freedesktop/systemd1/job/");
+ if (!p)
+ return -EINVAL;
+
+ r = safe_atou(p, &id);
+ if (r < 0)
+ return r;
+
+ j = manager_get_job(m, id);
+ if (!j)
+ return -ENOENT;
+
+ *_j = j;
+
+ return 0;
+}
+
+void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
+
+#if HAVE_AUDIT
+ _cleanup_free_ char *p = NULL;
+ const char *msg;
+ int audit_fd, r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return;
+
+ audit_fd = get_audit_fd();
+ if (audit_fd < 0)
+ return;
+
+ /* Don't generate audit events if the service was already
+ * started and we're just deserializing */
+ if (MANAGER_IS_RELOADING(m))
+ return;
+
+ if (u->type != UNIT_SERVICE)
+ return;
+
+ r = unit_name_to_prefix_and_instance(u->id, &p);
+ if (r < 0) {
+ log_error_errno(r, "Failed to extract prefix and instance of unit name: %m");
+ return;
+ }
+
+ msg = strjoina("unit=", p);
+ if (audit_log_user_comm_message(audit_fd, type, msg, "systemd", NULL, NULL, NULL, success) < 0) {
+ if (errno == EPERM)
+ /* We aren't allowed to send audit messages?
+ * Then let's not retry again. */
+ close_audit_fd();
+ else
+ log_warning_errno(errno, "Failed to send audit message: %m");
+ }
+#endif
+
+}
+
+void manager_send_unit_plymouth(Manager *m, Unit *u) {
+ static const union sockaddr_union sa = PLYMOUTH_SOCKET;
+ _cleanup_free_ char *message = NULL;
+ _cleanup_close_ int fd = -1;
+ int n = 0;
+
+ /* Don't generate plymouth events if the service was already
+ * started and we're just deserializing */
+ if (MANAGER_IS_RELOADING(m))
+ return;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return;
+
+ if (detect_container() > 0)
+ return;
+
+ if (!IN_SET(u->type, UNIT_SERVICE, UNIT_MOUNT, UNIT_SWAP))
+ return;
+
+ /* We set SOCK_NONBLOCK here so that we rather drop the
+ * message then wait for plymouth */
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0) {
+ log_error_errno(errno, "socket() failed: %m");
+ return;
+ }
+
+ if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
+ if (!IN_SET(errno, EPIPE, EAGAIN, ENOENT, ECONNREFUSED, ECONNRESET, ECONNABORTED))
+ log_error_errno(errno, "connect() failed: %m");
+ return;
+ }
+
+ if (asprintf(&message, "U\002%c%s%n", (int) (strlen(u->id) + 1), u->id, &n) < 0) {
+ log_oom();
+ return;
+ }
+
+ errno = 0;
+ if (write(fd, message, n + 1) != n + 1)
+ if (!IN_SET(errno, EPIPE, EAGAIN, ENOENT, ECONNREFUSED, ECONNRESET, ECONNABORTED))
+ log_error_errno(errno, "Failed to write Plymouth message: %m");
+}
+
+int manager_open_serialization(Manager *m, FILE **_f) {
+ int fd;
+ FILE *f;
+
+ assert(_f);
+
+ fd = open_serialization_fd("systemd-state");
+ if (fd < 0)
+ return fd;
+
+ f = fdopen(fd, "w+");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ *_f = f;
+ return 0;
+}
+
+static bool manager_timestamp_shall_serialize(ManagerTimestamp t) {
+
+ if (!in_initrd())
+ return true;
+
+ /* The following timestamps only apply to the host system, hence only serialize them there */
+ return !IN_SET(t,
+ MANAGER_TIMESTAMP_USERSPACE, MANAGER_TIMESTAMP_FINISH,
+ MANAGER_TIMESTAMP_SECURITY_START, MANAGER_TIMESTAMP_SECURITY_FINISH,
+ MANAGER_TIMESTAMP_GENERATORS_START, MANAGER_TIMESTAMP_GENERATORS_FINISH,
+ MANAGER_TIMESTAMP_UNITS_LOAD_START, MANAGER_TIMESTAMP_UNITS_LOAD_FINISH);
+}
+
+int manager_serialize(
+ Manager *m,
+ FILE *f,
+ FDSet *fds,
+ bool switching_root) {
+
+ ManagerTimestamp q;
+ const char *t;
+ Iterator i;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(f);
+ assert(fds);
+
+ _cleanup_(manager_reloading_stopp) _unused_ Manager *reloading = manager_reloading_start(m);
+
+ (void) serialize_item_format(f, "current-job-id", "%" PRIu32, m->current_job_id);
+ (void) serialize_item_format(f, "n-installed-jobs", "%u", m->n_installed_jobs);
+ (void) serialize_item_format(f, "n-failed-jobs", "%u", m->n_failed_jobs);
+ (void) serialize_bool(f, "taint-usr", m->taint_usr);
+ (void) serialize_bool(f, "ready-sent", m->ready_sent);
+ (void) serialize_bool(f, "taint-logged", m->taint_logged);
+ (void) serialize_bool(f, "service-watchdogs", m->service_watchdogs);
+
+ t = show_status_to_string(m->show_status);
+ if (t)
+ (void) serialize_item(f, "show-status", t);
+
+ if (m->log_level_overridden)
+ (void) serialize_item_format(f, "log-level-override", "%i", log_get_max_level());
+ if (m->log_target_overridden)
+ (void) serialize_item(f, "log-target-override", log_target_to_string(log_get_target()));
+
+ for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
+ _cleanup_free_ char *joined = NULL;
+
+ if (!manager_timestamp_shall_serialize(q))
+ continue;
+
+ joined = strjoin(manager_timestamp_to_string(q), "-timestamp");
+ if (!joined)
+ return log_oom();
+
+ (void) serialize_dual_timestamp(f, joined, m->timestamps + q);
+ }
+
+ if (!switching_root)
+ (void) serialize_strv(f, "env", m->client_environment);
+
+ if (m->notify_fd >= 0) {
+ r = serialize_fd(f, fds, "notify-fd", m->notify_fd);
+ if (r < 0)
+ return r;
+
+ (void) serialize_item(f, "notify-socket", m->notify_socket);
+ }
+
+ if (m->cgroups_agent_fd >= 0) {
+ r = serialize_fd(f, fds, "cgroups-agent-fd", m->cgroups_agent_fd);
+ if (r < 0)
+ return r;
+ }
+
+ if (m->user_lookup_fds[0] >= 0) {
+ int copy0, copy1;
+
+ copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]);
+ if (copy0 < 0)
+ return log_error_errno(copy0, "Failed to add user lookup fd to serialization: %m");
+
+ copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]);
+ if (copy1 < 0)
+ return log_error_errno(copy1, "Failed to add user lookup fd to serialization: %m");
+
+ (void) serialize_item_format(f, "user-lookup", "%i %i", copy0, copy1);
+ }
+
+ bus_track_serialize(m->subscribed, f, "subscribed");
+
+ r = dynamic_user_serialize(m, f, fds);
+ if (r < 0)
+ return r;
+
+ manager_serialize_uid_refs(m, f);
+ manager_serialize_gid_refs(m, f);
+
+ r = exec_runtime_serialize(m, f, fds);
+ if (r < 0)
+ return r;
+
+ (void) fputc('\n', f);
+
+ HASHMAP_FOREACH_KEY(u, t, m->units, i) {
+ if (u->id != t)
+ continue;
+
+ /* Start marker */
+ fputs(u->id, f);
+ fputc('\n', f);
+
+ r = unit_serialize(u, f, fds, !switching_root);
+ if (r < 0)
+ return r;
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to flush serialization: %m");
+
+ r = bus_fdset_add_all(m, fds);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add bus sockets to serialization: %m");
+
+ return 0;
+}
+
+static int manager_deserialize_one_unit(Manager *m, const char *name, FILE *f, FDSet *fds) {
+ Unit *u;
+ int r;
+
+ r = manager_load_unit(m, name, NULL, NULL, &u);
+ if (r < 0) {
+ if (r == -ENOMEM)
+ return r;
+ return log_notice_errno(r, "Failed to load unit \"%s\", skipping deserialization: %m", name);
+ }
+
+ r = unit_deserialize(u, f, fds);
+ if (r < 0) {
+ if (r == -ENOMEM)
+ return r;
+ return log_notice_errno(r, "Failed to deserialize unit \"%s\", skipping: %m", name);
+ }
+
+ return 0;
+}
+
+static int manager_deserialize_units(Manager *m, FILE *f, FDSet *fds) {
+ const char *unit_name;
+ int r;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ /* Start marker */
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0)
+ break;
+
+ unit_name = strstrip(line);
+
+ r = manager_deserialize_one_unit(m, unit_name, f, fds);
+ if (r == -ENOMEM)
+ return r;
+ if (r < 0) {
+ r = unit_deserialize_skip(f);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
+ int r = 0;
+
+ assert(m);
+ assert(f);
+
+ log_debug("Deserializing state...");
+
+ /* If we are not in reload mode yet, enter it now. Not that this is recursive, a caller might already have
+ * increased it to non-zero, which is why we just increase it by one here and down again at the end of this
+ * call. */
+ _cleanup_(manager_reloading_stopp) _unused_ Manager *reloading = manager_reloading_start(m);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *val, *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+ if (isempty(l)) /* end marker */
+ break;
+
+ if ((val = startswith(l, "current-job-id="))) {
+ uint32_t id;
+
+ if (safe_atou32(val, &id) < 0)
+ log_notice("Failed to parse current job id value '%s', ignoring.", val);
+ else
+ m->current_job_id = MAX(m->current_job_id, id);
+
+ } else if ((val = startswith(l, "n-installed-jobs="))) {
+ uint32_t n;
+
+ if (safe_atou32(val, &n) < 0)
+ log_notice("Failed to parse installed jobs counter '%s', ignoring.", val);
+ else
+ m->n_installed_jobs += n;
+
+ } else if ((val = startswith(l, "n-failed-jobs="))) {
+ uint32_t n;
+
+ if (safe_atou32(val, &n) < 0)
+ log_notice("Failed to parse failed jobs counter '%s', ignoring.", val);
+ else
+ m->n_failed_jobs += n;
+
+ } else if ((val = startswith(l, "taint-usr="))) {
+ int b;
+
+ b = parse_boolean(val);
+ if (b < 0)
+ log_notice("Failed to parse taint /usr flag '%s', ignoring.", val);
+ else
+ m->taint_usr = m->taint_usr || b;
+
+ } else if ((val = startswith(l, "ready-sent="))) {
+ int b;
+
+ b = parse_boolean(val);
+ if (b < 0)
+ log_notice("Failed to parse ready-sent flag '%s', ignoring.", val);
+ else
+ m->ready_sent = m->ready_sent || b;
+
+ } else if ((val = startswith(l, "taint-logged="))) {
+ int b;
+
+ b = parse_boolean(val);
+ if (b < 0)
+ log_notice("Failed to parse taint-logged flag '%s', ignoring.", val);
+ else
+ m->taint_logged = m->taint_logged || b;
+
+ } else if ((val = startswith(l, "service-watchdogs="))) {
+ int b;
+
+ b = parse_boolean(val);
+ if (b < 0)
+ log_notice("Failed to parse service-watchdogs flag '%s', ignoring.", val);
+ else
+ m->service_watchdogs = b;
+
+ } else if ((val = startswith(l, "show-status="))) {
+ ShowStatus s;
+
+ s = show_status_from_string(val);
+ if (s < 0)
+ log_notice("Failed to parse show-status flag '%s', ignoring.", val);
+ else
+ manager_set_show_status(m, s);
+
+ } else if ((val = startswith(l, "log-level-override="))) {
+ int level;
+
+ level = log_level_from_string(val);
+ if (level < 0)
+ log_notice("Failed to parse log-level-override value '%s', ignoring.", val);
+ else
+ manager_override_log_level(m, level);
+
+ } else if ((val = startswith(l, "log-target-override="))) {
+ LogTarget target;
+
+ target = log_target_from_string(val);
+ if (target < 0)
+ log_notice("Failed to parse log-target-override value '%s', ignoring.", val);
+ else
+ manager_override_log_target(m, target);
+
+ } else if (startswith(l, "env=")) {
+ r = deserialize_environment(l + 4, &m->client_environment);
+ if (r < 0)
+ log_notice_errno(r, "Failed to parse environment entry: \"%s\", ignoring: %m", l);
+
+ } else if ((val = startswith(l, "notify-fd="))) {
+ int fd;
+
+ if (safe_atoi(val, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_notice("Failed to parse notify fd, ignoring: \"%s\"", val);
+ else {
+ m->notify_event_source = sd_event_source_unref(m->notify_event_source);
+ safe_close(m->notify_fd);
+ m->notify_fd = fdset_remove(fds, fd);
+ }
+
+ } else if ((val = startswith(l, "notify-socket="))) {
+ r = free_and_strdup(&m->notify_socket, val);
+ if (r < 0)
+ return r;
+
+ } else if ((val = startswith(l, "cgroups-agent-fd="))) {
+ int fd;
+
+ if (safe_atoi(val, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_notice("Failed to parse cgroups agent fd, ignoring.: %s", val);
+ else {
+ m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source);
+ safe_close(m->cgroups_agent_fd);
+ m->cgroups_agent_fd = fdset_remove(fds, fd);
+ }
+
+ } else if ((val = startswith(l, "user-lookup="))) {
+ int fd0, fd1;
+
+ if (sscanf(val, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1))
+ log_notice("Failed to parse user lookup fd, ignoring: %s", val);
+ else {
+ m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
+ safe_close_pair(m->user_lookup_fds);
+ m->user_lookup_fds[0] = fdset_remove(fds, fd0);
+ m->user_lookup_fds[1] = fdset_remove(fds, fd1);
+ }
+
+ } else if ((val = startswith(l, "dynamic-user=")))
+ dynamic_user_deserialize_one(m, val, fds);
+ else if ((val = startswith(l, "destroy-ipc-uid=")))
+ manager_deserialize_uid_refs_one(m, val);
+ else if ((val = startswith(l, "destroy-ipc-gid=")))
+ manager_deserialize_gid_refs_one(m, val);
+ else if ((val = startswith(l, "exec-runtime=")))
+ exec_runtime_deserialize_one(m, val, fds);
+ else if ((val = startswith(l, "subscribed="))) {
+
+ if (strv_extend(&m->deserialized_subscribed, val) < 0)
+ return -ENOMEM;
+
+ } else {
+ ManagerTimestamp q;
+
+ for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
+ val = startswith(l, manager_timestamp_to_string(q));
+ if (!val)
+ continue;
+
+ val = startswith(val, "-timestamp=");
+ if (val)
+ break;
+ }
+
+ if (q < _MANAGER_TIMESTAMP_MAX) /* found it */
+ (void) deserialize_dual_timestamp(val, m->timestamps + q);
+ else if (!startswith(l, "kdbus-fd=")) /* ignore kdbus */
+ log_notice("Unknown serialization item '%s', ignoring.", l);
+ }
+ }
+
+ return manager_deserialize_units(m, f, fds);
+}
+
+int manager_reload(Manager *m) {
+ _cleanup_(manager_reloading_stopp) Manager *reloading = NULL;
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(m);
+
+ r = manager_open_serialization(m, &f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create serialization file: %m");
+
+ fds = fdset_new();
+ if (!fds)
+ return log_oom();
+
+ /* We are officially in reload mode from here on. */
+ reloading = manager_reloading_start(m);
+
+ r = manager_serialize(m, f, fds, false);
+ if (r < 0)
+ return r;
+
+ if (fseeko(f, 0, SEEK_SET) < 0)
+ return log_error_errno(errno, "Failed to seek to beginning of serialization: %m");
+
+ /* 💀 This is the point of no return, from here on there is no way back. 💀 */
+ reloading = NULL;
+
+ bus_manager_send_reloading(m, true);
+
+ /* Start by flushing out all jobs and units, all generated units, all runtime environments, all dynamic users
+ * and everything else that is worth flushing out. We'll get it all back from the serialization — if we need
+ * it.*/
+
+ manager_clear_jobs_and_units(m);
+ lookup_paths_flush_generator(&m->lookup_paths);
+ lookup_paths_free(&m->lookup_paths);
+ exec_runtime_vacuum(m);
+ dynamic_user_vacuum(m, false);
+ m->uid_refs = hashmap_free(m->uid_refs);
+ m->gid_refs = hashmap_free(m->gid_refs);
+
+ r = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to initialize path lookup table, ignoring: %m");
+
+ (void) manager_run_environment_generators(m);
+ (void) manager_run_generators(m);
+
+ r = lookup_paths_reduce(&m->lookup_paths);
+ if (r < 0)
+ log_warning_errno(r, "Failed ot reduce unit file paths, ignoring: %m");
+
+ manager_build_unit_path_cache(m);
+
+ /* First, enumerate what we can from kernel and suchlike */
+ manager_enumerate_perpetual(m);
+ manager_enumerate(m);
+
+ /* Second, deserialize our stored data */
+ r = manager_deserialize(m, f, fds);
+ if (r < 0)
+ log_warning_errno(r, "Deserialization failed, proceeding anyway: %m");
+
+ /* We don't need the serialization anymore */
+ f = safe_fclose(f);
+
+ /* Re-register notify_fd as event source, and set up other sockets/communication channels we might need */
+ (void) manager_setup_notify(m);
+ (void) manager_setup_cgroups_agent(m);
+ (void) manager_setup_user_lookup_fd(m);
+
+ /* Third, fire things up! */
+ manager_coldplug(m);
+
+ /* Clean up runtime objects no longer referenced */
+ manager_vacuum(m);
+
+ /* Consider the reload process complete now. */
+ assert(m->n_reloading > 0);
+ m->n_reloading--;
+
+ manager_ready(m);
+
+ m->send_reloading_done = true;
+ return 0;
+}
+
+void manager_reset_failed(Manager *m) {
+ Unit *u;
+ Iterator i;
+
+ assert(m);
+
+ HASHMAP_FOREACH(u, m->units, i)
+ unit_reset_failed(u);
+}
+
+bool manager_unit_inactive_or_pending(Manager *m, const char *name) {
+ Unit *u;
+
+ assert(m);
+ assert(name);
+
+ /* Returns true if the unit is inactive or going down */
+ u = manager_get_unit(m, name);
+ if (!u)
+ return true;
+
+ return unit_inactive_or_pending(u);
+}
+
+static void log_taint_string(Manager *m) {
+ _cleanup_free_ char *taint = NULL;
+
+ assert(m);
+
+ if (MANAGER_IS_USER(m) || m->taint_logged)
+ return;
+
+ m->taint_logged = true; /* only check for taint once */
+
+ taint = manager_taint_string(m);
+ if (isempty(taint))
+ return;
+
+ log_struct(LOG_NOTICE,
+ LOG_MESSAGE("System is tainted: %s", taint),
+ "TAINT=%s", taint,
+ "MESSAGE_ID=" SD_MESSAGE_TAINTED_STR);
+}
+
+static void manager_notify_finished(Manager *m) {
+ char userspace[FORMAT_TIMESPAN_MAX], initrd[FORMAT_TIMESPAN_MAX], kernel[FORMAT_TIMESPAN_MAX], sum[FORMAT_TIMESPAN_MAX];
+ usec_t firmware_usec, loader_usec, kernel_usec, initrd_usec, userspace_usec, total_usec;
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return;
+
+ if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ char buf[FORMAT_TIMESPAN_MAX + STRLEN(" (firmware) + ") + FORMAT_TIMESPAN_MAX + STRLEN(" (loader) + ")]
+ = {};
+ char *p = buf;
+ size_t size = sizeof buf;
+
+ /* Note that MANAGER_TIMESTAMP_KERNEL's monotonic value is always at 0, and
+ * MANAGER_TIMESTAMP_FIRMWARE's and MANAGER_TIMESTAMP_LOADER's monotonic value should be considered
+ * negative values. */
+
+ firmware_usec = m->timestamps[MANAGER_TIMESTAMP_FIRMWARE].monotonic - m->timestamps[MANAGER_TIMESTAMP_LOADER].monotonic;
+ loader_usec = m->timestamps[MANAGER_TIMESTAMP_LOADER].monotonic - m->timestamps[MANAGER_TIMESTAMP_KERNEL].monotonic;
+ userspace_usec = m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic - m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic;
+ total_usec = m->timestamps[MANAGER_TIMESTAMP_FIRMWARE].monotonic + m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic;
+
+ if (firmware_usec > 0)
+ size = strpcpyf(&p, size, "%s (firmware) + ", format_timespan(ts, sizeof(ts), firmware_usec, USEC_PER_MSEC));
+ if (loader_usec > 0)
+ size = strpcpyf(&p, size, "%s (loader) + ", format_timespan(ts, sizeof(ts), loader_usec, USEC_PER_MSEC));
+
+ if (dual_timestamp_is_set(&m->timestamps[MANAGER_TIMESTAMP_INITRD])) {
+
+ /* The initrd case on bare-metal*/
+ kernel_usec = m->timestamps[MANAGER_TIMESTAMP_INITRD].monotonic - m->timestamps[MANAGER_TIMESTAMP_KERNEL].monotonic;
+ initrd_usec = m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic - m->timestamps[MANAGER_TIMESTAMP_INITRD].monotonic;
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_STARTUP_FINISHED_STR,
+ "KERNEL_USEC="USEC_FMT, kernel_usec,
+ "INITRD_USEC="USEC_FMT, initrd_usec,
+ "USERSPACE_USEC="USEC_FMT, userspace_usec,
+ LOG_MESSAGE("Startup finished in %s%s (kernel) + %s (initrd) + %s (userspace) = %s.",
+ buf,
+ format_timespan(kernel, sizeof(kernel), kernel_usec, USEC_PER_MSEC),
+ format_timespan(initrd, sizeof(initrd), initrd_usec, USEC_PER_MSEC),
+ format_timespan(userspace, sizeof(userspace), userspace_usec, USEC_PER_MSEC),
+ format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC)));
+ } else {
+ /* The initrd-less case on bare-metal*/
+
+ kernel_usec = m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic - m->timestamps[MANAGER_TIMESTAMP_KERNEL].monotonic;
+ initrd_usec = 0;
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_STARTUP_FINISHED_STR,
+ "KERNEL_USEC="USEC_FMT, kernel_usec,
+ "USERSPACE_USEC="USEC_FMT, userspace_usec,
+ LOG_MESSAGE("Startup finished in %s%s (kernel) + %s (userspace) = %s.",
+ buf,
+ format_timespan(kernel, sizeof(kernel), kernel_usec, USEC_PER_MSEC),
+ format_timespan(userspace, sizeof(userspace), userspace_usec, USEC_PER_MSEC),
+ format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC)));
+ }
+ } else {
+ /* The container and --user case */
+ firmware_usec = loader_usec = initrd_usec = kernel_usec = 0;
+ total_usec = userspace_usec = m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic - m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic;
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_USER_STARTUP_FINISHED_STR,
+ "USERSPACE_USEC="USEC_FMT, userspace_usec,
+ LOG_MESSAGE("Startup finished in %s.",
+ format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC)));
+ }
+
+ bus_manager_send_finished(m, firmware_usec, loader_usec, kernel_usec, initrd_usec, userspace_usec, total_usec);
+
+ sd_notifyf(false,
+ m->ready_sent ? "STATUS=Startup finished in %s."
+ : "READY=1\n"
+ "STATUS=Startup finished in %s.",
+ format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC));
+ m->ready_sent = true;
+
+ log_taint_string(m);
+}
+
+static void manager_send_ready(Manager *m) {
+ assert(m);
+
+ /* We send READY=1 on reaching basic.target only when running in --user mode. */
+ if (!MANAGER_IS_USER(m) || m->ready_sent)
+ return;
+
+ m->ready_sent = true;
+
+ sd_notifyf(false,
+ "READY=1\n"
+ "STATUS=Reached " SPECIAL_BASIC_TARGET ".");
+}
+
+static void manager_check_basic_target(Manager *m) {
+ Unit *u;
+
+ assert(m);
+
+ /* Small shortcut */
+ if (m->ready_sent && m->taint_logged)
+ return;
+
+ u = manager_get_unit(m, SPECIAL_BASIC_TARGET);
+ if (!u || !UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
+ return;
+
+ /* For user managers, send out READY=1 as soon as we reach basic.target */
+ manager_send_ready(m);
+
+ /* Log the taint string as soon as we reach basic.target */
+ log_taint_string(m);
+}
+
+void manager_check_finished(Manager *m) {
+ assert(m);
+
+ if (MANAGER_IS_RELOADING(m))
+ return;
+
+ /* Verify that we have entered the event loop already, and not left it again. */
+ if (!MANAGER_IS_RUNNING(m))
+ return;
+
+ manager_check_basic_target(m);
+
+ if (hashmap_size(m->jobs) > 0) {
+ if (m->jobs_in_progress_event_source)
+ /* Ignore any failure, this is only for feedback */
+ (void) sd_event_source_set_time(m->jobs_in_progress_event_source, now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_WAIT_USEC);
+
+ return;
+ }
+
+ manager_flip_auto_status(m, false);
+
+ /* Notify Type=idle units that we are done now */
+ manager_close_idle_pipe(m);
+
+ /* Turn off confirm spawn now */
+ m->confirm_spawn = NULL;
+
+ /* No need to update ask password status when we're going non-interactive */
+ manager_close_ask_password(m);
+
+ /* This is no longer the first boot */
+ manager_set_first_boot(m, false);
+
+ if (MANAGER_IS_FINISHED(m))
+ return;
+
+ dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_FINISH);
+
+ manager_notify_finished(m);
+
+ manager_invalidate_startup_units(m);
+}
+
+static bool generator_path_any(const char* const* paths) {
+ char **path;
+ bool found = false;
+
+ /* Optimize by skipping the whole process by not creating output directories
+ * if no generators are found. */
+ STRV_FOREACH(path, (char**) paths)
+ if (access(*path, F_OK) == 0)
+ found = true;
+ else if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open generator directory %s: %m", *path);
+
+ return found;
+}
+
+static const char* system_env_generator_binary_paths[] = {
+ "/run/systemd/system-environment-generators",
+ "/etc/systemd/system-environment-generators",
+ "/usr/local/lib/systemd/system-environment-generators",
+ SYSTEM_ENV_GENERATOR_PATH,
+ NULL
+};
+
+static const char* user_env_generator_binary_paths[] = {
+ "/run/systemd/user-environment-generators",
+ "/etc/systemd/user-environment-generators",
+ "/usr/local/lib/systemd/user-environment-generators",
+ USER_ENV_GENERATOR_PATH,
+ NULL
+};
+
+static int manager_run_environment_generators(Manager *m) {
+ char **tmp = NULL; /* this is only used in the forked process, no cleanup here */
+ const char **paths;
+ void* args[] = {
+ [STDOUT_GENERATE] = &tmp,
+ [STDOUT_COLLECT] = &tmp,
+ [STDOUT_CONSUME] = &m->transient_environment,
+ };
+ int r;
+
+ if (MANAGER_IS_TEST_RUN(m) && !(m->test_run_flags & MANAGER_TEST_RUN_ENV_GENERATORS))
+ return 0;
+
+ paths = MANAGER_IS_SYSTEM(m) ? system_env_generator_binary_paths : user_env_generator_binary_paths;
+
+ if (!generator_path_any(paths))
+ return 0;
+
+ RUN_WITH_UMASK(0022)
+ r = execute_directories(paths, DEFAULT_TIMEOUT_USEC, gather_environment, args, NULL, m->transient_environment);
+
+ return r;
+}
+
+static int manager_run_generators(Manager *m) {
+ _cleanup_strv_free_ char **paths = NULL;
+ const char *argv[5];
+ int r;
+
+ assert(m);
+
+ if (MANAGER_IS_TEST_RUN(m) && !(m->test_run_flags & MANAGER_TEST_RUN_GENERATORS))
+ return 0;
+
+ paths = generator_binary_paths(m->unit_file_scope);
+ if (!paths)
+ return log_oom();
+
+ if (!generator_path_any((const char* const*) paths))
+ return 0;
+
+ r = lookup_paths_mkdir_generator(&m->lookup_paths);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create generator directories: %m");
+ goto finish;
+ }
+
+ argv[0] = NULL; /* Leave this empty, execute_directory() will fill something in */
+ argv[1] = m->lookup_paths.generator;
+ argv[2] = m->lookup_paths.generator_early;
+ argv[3] = m->lookup_paths.generator_late;
+ argv[4] = NULL;
+
+ RUN_WITH_UMASK(0022)
+ (void) execute_directories((const char* const*) paths, DEFAULT_TIMEOUT_USEC,
+ NULL, NULL, (char**) argv, m->transient_environment);
+
+ r = 0;
+
+finish:
+ lookup_paths_trim_generator(&m->lookup_paths);
+ return r;
+}
+
+int manager_transient_environment_add(Manager *m, char **plus) {
+ char **a;
+
+ assert(m);
+
+ if (strv_isempty(plus))
+ return 0;
+
+ a = strv_env_merge(2, m->transient_environment, plus);
+ if (!a)
+ return log_oom();
+
+ sanitize_environment(a);
+
+ return strv_free_and_replace(m->transient_environment, a);
+}
+
+int manager_client_environment_modify(
+ Manager *m,
+ char **minus,
+ char **plus) {
+
+ char **a = NULL, **b = NULL, **l;
+
+ assert(m);
+
+ if (strv_isempty(minus) && strv_isempty(plus))
+ return 0;
+
+ l = m->client_environment;
+
+ if (!strv_isempty(minus)) {
+ a = strv_env_delete(l, 1, minus);
+ if (!a)
+ return -ENOMEM;
+
+ l = a;
+ }
+
+ if (!strv_isempty(plus)) {
+ b = strv_env_merge(2, l, plus);
+ if (!b) {
+ strv_free(a);
+ return -ENOMEM;
+ }
+
+ l = b;
+ }
+
+ if (m->client_environment != l)
+ strv_free(m->client_environment);
+
+ if (a != l)
+ strv_free(a);
+ if (b != l)
+ strv_free(b);
+
+ m->client_environment = sanitize_environment(l);
+ return 0;
+}
+
+int manager_get_effective_environment(Manager *m, char ***ret) {
+ char **l;
+
+ assert(m);
+ assert(ret);
+
+ l = strv_env_merge(2, m->transient_environment, m->client_environment);
+ if (!l)
+ return -ENOMEM;
+
+ *ret = l;
+ return 0;
+}
+
+int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) {
+ int i;
+
+ assert(m);
+
+ for (i = 0; i < _RLIMIT_MAX; i++) {
+ m->rlimit[i] = mfree(m->rlimit[i]);
+
+ if (!default_rlimit[i])
+ continue;
+
+ m->rlimit[i] = newdup(struct rlimit, default_rlimit[i], 1);
+ if (!m->rlimit[i])
+ return log_oom();
+ }
+
+ return 0;
+}
+
+void manager_recheck_dbus(Manager *m) {
+ assert(m);
+
+ /* Connects to the bus if the dbus service and socket are running. If we are running in user mode this is all
+ * it does. In system mode we'll also connect to the system bus (which will most likely just reuse the
+ * connection of the API bus). That's because the system bus after all runs as service of the system instance,
+ * while in the user instance we can assume it's already there. */
+
+ if (MANAGER_IS_RELOADING(m))
+ return; /* don't check while we are reloading… */
+
+ if (manager_dbus_is_running(m, false)) {
+ (void) bus_init_api(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) bus_init_system(m);
+ } else {
+ (void) bus_done_api(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) bus_done_system(m);
+ }
+}
+
+static bool manager_journal_is_running(Manager *m) {
+ Unit *u;
+
+ assert(m);
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return false;
+
+ /* If we are the user manager we can safely assume that the journal is up */
+ if (!MANAGER_IS_SYSTEM(m))
+ return true;
+
+ /* Check that the socket is not only up, but in RUNNING state */
+ u = manager_get_unit(m, SPECIAL_JOURNALD_SOCKET);
+ if (!u)
+ return false;
+ if (SOCKET(u)->state != SOCKET_RUNNING)
+ return false;
+
+ /* Similar, check if the daemon itself is fully up, too */
+ u = manager_get_unit(m, SPECIAL_JOURNALD_SERVICE);
+ if (!u)
+ return false;
+ if (!IN_SET(SERVICE(u)->state, SERVICE_RELOAD, SERVICE_RUNNING))
+ return false;
+
+ return true;
+}
+
+void manager_recheck_journal(Manager *m) {
+
+ assert(m);
+
+ /* Don't bother with this unless we are in the special situation of being PID 1 */
+ if (getpid_cached() != 1)
+ return;
+
+ /* Don't check this while we are reloading, things might still change */
+ if (MANAGER_IS_RELOADING(m))
+ return;
+
+ /* The journal is fully and entirely up? If so, let's permit logging to it, if that's configured. If the
+ * journal is down, don't ever log to it, otherwise we might end up deadlocking ourselves as we might trigger
+ * an activation ourselves we can't fulfill. */
+ log_set_prohibit_ipc(!manager_journal_is_running(m));
+ log_open();
+}
+
+void manager_set_show_status(Manager *m, ShowStatus mode) {
+ assert(m);
+ assert(IN_SET(mode, SHOW_STATUS_AUTO, SHOW_STATUS_NO, SHOW_STATUS_YES, SHOW_STATUS_TEMPORARY));
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return;
+
+ if (m->show_status != mode)
+ log_debug("%s showing of status.",
+ mode == SHOW_STATUS_NO ? "Disabling" : "Enabling");
+ m->show_status = mode;
+
+ if (IN_SET(mode, SHOW_STATUS_TEMPORARY, SHOW_STATUS_YES))
+ (void) touch("/run/systemd/show-status");
+ else
+ (void) unlink("/run/systemd/show-status");
+}
+
+static bool manager_get_show_status(Manager *m, StatusType type) {
+ assert(m);
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return false;
+
+ if (m->no_console_output)
+ return false;
+
+ if (!IN_SET(manager_state(m), MANAGER_INITIALIZING, MANAGER_STARTING, MANAGER_STOPPING))
+ return false;
+
+ /* If we cannot find out the status properly, just proceed. */
+ if (type != STATUS_TYPE_EMERGENCY && manager_check_ask_password(m) > 0)
+ return false;
+
+ return IN_SET(m->show_status, SHOW_STATUS_TEMPORARY, SHOW_STATUS_YES);
+}
+
+const char *manager_get_confirm_spawn(Manager *m) {
+ static int last_errno = 0;
+ const char *vc = m->confirm_spawn;
+ struct stat st;
+ int r;
+
+ /* Here's the deal: we want to test the validity of the console but don't want
+ * PID1 to go through the whole console process which might block. But we also
+ * want to warn the user only once if something is wrong with the console so we
+ * cannot do the sanity checks after spawning our children. So here we simply do
+ * really basic tests to hopefully trap common errors.
+ *
+ * If the console suddenly disappear at the time our children will really it
+ * then they will simply fail to acquire it and a positive answer will be
+ * assumed. New children will fallback to /dev/console though.
+ *
+ * Note: TTYs are devices that can come and go any time, and frequently aren't
+ * available yet during early boot (consider a USB rs232 dongle...). If for any
+ * reason the configured console is not ready, we fallback to the default
+ * console. */
+
+ if (!vc || path_equal(vc, "/dev/console"))
+ return vc;
+
+ r = stat(vc, &st);
+ if (r < 0)
+ goto fail;
+
+ if (!S_ISCHR(st.st_mode)) {
+ errno = ENOTTY;
+ goto fail;
+ }
+
+ last_errno = 0;
+ return vc;
+fail:
+ if (last_errno != errno) {
+ last_errno = errno;
+ log_warning_errno(errno, "Failed to open %s: %m, using default console", vc);
+ }
+ return "/dev/console";
+}
+
+void manager_set_first_boot(Manager *m, bool b) {
+ assert(m);
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return;
+
+ if (m->first_boot != (int) b) {
+ if (b)
+ (void) touch("/run/systemd/first-boot");
+ else
+ (void) unlink("/run/systemd/first-boot");
+ }
+
+ m->first_boot = b;
+}
+
+void manager_disable_confirm_spawn(void) {
+ (void) touch("/run/systemd/confirm_spawn_disabled");
+}
+
+bool manager_is_confirm_spawn_disabled(Manager *m) {
+ if (!m->confirm_spawn)
+ return true;
+
+ return access("/run/systemd/confirm_spawn_disabled", F_OK) >= 0;
+}
+
+void manager_status_printf(Manager *m, StatusType type, const char *status, const char *format, ...) {
+ va_list ap;
+
+ /* If m is NULL, assume we're after shutdown and let the messages through. */
+
+ if (m && !manager_get_show_status(m, type))
+ return;
+
+ /* XXX We should totally drop the check for ephemeral here
+ * and thus effectively make 'Type=idle' pointless. */
+ if (type == STATUS_TYPE_EPHEMERAL && m && m->n_on_console > 0)
+ return;
+
+ va_start(ap, format);
+ status_vprintf(status, SHOW_STATUS_ELLIPSIZE|(type == STATUS_TYPE_EPHEMERAL ? SHOW_STATUS_EPHEMERAL : 0), format, ap);
+ va_end(ap);
+}
+
+Set *manager_get_units_requiring_mounts_for(Manager *m, const char *path) {
+ char p[strlen(path)+1];
+
+ assert(m);
+ assert(path);
+
+ strcpy(p, path);
+ path_simplify(p, false);
+
+ return hashmap_get(m->units_requiring_mounts_for, streq(p, "/") ? "" : p);
+}
+
+int manager_update_failed_units(Manager *m, Unit *u, bool failed) {
+ unsigned size;
+ int r;
+
+ assert(m);
+ assert(u->manager == m);
+
+ size = set_size(m->failed_units);
+
+ if (failed) {
+ r = set_ensure_allocated(&m->failed_units, NULL);
+ if (r < 0)
+ return log_oom();
+
+ if (set_put(m->failed_units, u) < 0)
+ return log_oom();
+ } else
+ (void) set_remove(m->failed_units, u);
+
+ if (set_size(m->failed_units) != size)
+ bus_manager_send_change_signal(m);
+
+ return 0;
+}
+
+ManagerState manager_state(Manager *m) {
+ Unit *u;
+
+ assert(m);
+
+ /* Did we ever finish booting? If not then we are still starting up */
+ if (!MANAGER_IS_FINISHED(m)) {
+
+ u = manager_get_unit(m, SPECIAL_BASIC_TARGET);
+ if (!u || !UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
+ return MANAGER_INITIALIZING;
+
+ return MANAGER_STARTING;
+ }
+
+ /* Is the special shutdown target active or queued? If so, we are in shutdown state */
+ u = manager_get_unit(m, SPECIAL_SHUTDOWN_TARGET);
+ if (u && unit_active_or_pending(u))
+ return MANAGER_STOPPING;
+
+ if (MANAGER_IS_SYSTEM(m)) {
+ /* Are the rescue or emergency targets active or queued? If so we are in maintenance state */
+ u = manager_get_unit(m, SPECIAL_RESCUE_TARGET);
+ if (u && unit_active_or_pending(u))
+ return MANAGER_MAINTENANCE;
+
+ u = manager_get_unit(m, SPECIAL_EMERGENCY_TARGET);
+ if (u && unit_active_or_pending(u))
+ return MANAGER_MAINTENANCE;
+ }
+
+ /* Are there any failed units? If so, we are in degraded mode */
+ if (set_size(m->failed_units) > 0)
+ return MANAGER_DEGRADED;
+
+ return MANAGER_RUNNING;
+}
+
+#define DESTROY_IPC_FLAG (UINT32_C(1) << 31)
+
+static void manager_unref_uid_internal(
+ Manager *m,
+ Hashmap **uid_refs,
+ uid_t uid,
+ bool destroy_now,
+ int (*_clean_ipc)(uid_t uid)) {
+
+ uint32_t c, n;
+
+ assert(m);
+ assert(uid_refs);
+ assert(uid_is_valid(uid));
+ assert(_clean_ipc);
+
+ /* A generic implementation, covering both manager_unref_uid() and manager_unref_gid(), under the assumption
+ * that uid_t and gid_t are actually defined the same way, with the same validity rules.
+ *
+ * We store a hashmap where the UID/GID is they key and the value is a 32bit reference counter, whose highest
+ * bit is used as flag for marking UIDs/GIDs whose IPC objects to remove when the last reference to the UID/GID
+ * is dropped. The flag is set to on, once at least one reference from a unit where RemoveIPC= is set is added
+ * on a UID/GID. It is reset when the UID's/GID's reference counter drops to 0 again. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (uid == 0) /* We don't keep track of root, and will never destroy it */
+ return;
+
+ c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+
+ n = c & ~DESTROY_IPC_FLAG;
+ assert(n > 0);
+ n--;
+
+ if (destroy_now && n == 0) {
+ hashmap_remove(*uid_refs, UID_TO_PTR(uid));
+
+ if (c & DESTROY_IPC_FLAG) {
+ log_debug("%s " UID_FMT " is no longer referenced, cleaning up its IPC.",
+ _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
+ uid);
+ (void) _clean_ipc(uid);
+ }
+ } else {
+ c = n | (c & DESTROY_IPC_FLAG);
+ assert_se(hashmap_update(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)) >= 0);
+ }
+}
+
+void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now) {
+ manager_unref_uid_internal(m, &m->uid_refs, uid, destroy_now, clean_ipc_by_uid);
+}
+
+void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now) {
+ manager_unref_uid_internal(m, &m->gid_refs, (uid_t) gid, destroy_now, clean_ipc_by_gid);
+}
+
+static int manager_ref_uid_internal(
+ Manager *m,
+ Hashmap **uid_refs,
+ uid_t uid,
+ bool clean_ipc) {
+
+ uint32_t c, n;
+ int r;
+
+ assert(m);
+ assert(uid_refs);
+ assert(uid_is_valid(uid));
+
+ /* A generic implementation, covering both manager_ref_uid() and manager_ref_gid(), under the assumption
+ * that uid_t and gid_t are actually defined the same way, with the same validity rules. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (uid == 0) /* We don't keep track of root, and will never destroy it */
+ return 0;
+
+ r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+
+ n = c & ~DESTROY_IPC_FLAG;
+ n++;
+
+ if (n & DESTROY_IPC_FLAG) /* check for overflow */
+ return -EOVERFLOW;
+
+ c = n | (c & DESTROY_IPC_FLAG) | (clean_ipc ? DESTROY_IPC_FLAG : 0);
+
+ return hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
+}
+
+int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc) {
+ return manager_ref_uid_internal(m, &m->uid_refs, uid, clean_ipc);
+}
+
+int manager_ref_gid(Manager *m, gid_t gid, bool clean_ipc) {
+ return manager_ref_uid_internal(m, &m->gid_refs, (uid_t) gid, clean_ipc);
+}
+
+static void manager_vacuum_uid_refs_internal(
+ Manager *m,
+ Hashmap **uid_refs,
+ int (*_clean_ipc)(uid_t uid)) {
+
+ Iterator i;
+ void *p, *k;
+
+ assert(m);
+ assert(uid_refs);
+ assert(_clean_ipc);
+
+ HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
+ uint32_t c, n;
+ uid_t uid;
+
+ uid = PTR_TO_UID(k);
+ c = PTR_TO_UINT32(p);
+
+ n = c & ~DESTROY_IPC_FLAG;
+ if (n > 0)
+ continue;
+
+ if (c & DESTROY_IPC_FLAG) {
+ log_debug("Found unreferenced %s " UID_FMT " after reload/reexec. Cleaning up.",
+ _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
+ uid);
+ (void) _clean_ipc(uid);
+ }
+
+ assert_se(hashmap_remove(*uid_refs, k) == p);
+ }
+}
+
+void manager_vacuum_uid_refs(Manager *m) {
+ manager_vacuum_uid_refs_internal(m, &m->uid_refs, clean_ipc_by_uid);
+}
+
+void manager_vacuum_gid_refs(Manager *m) {
+ manager_vacuum_uid_refs_internal(m, &m->gid_refs, clean_ipc_by_gid);
+}
+
+static void manager_serialize_uid_refs_internal(
+ Manager *m,
+ FILE *f,
+ Hashmap **uid_refs,
+ const char *field_name) {
+
+ Iterator i;
+ void *p, *k;
+
+ assert(m);
+ assert(f);
+ assert(uid_refs);
+ assert(field_name);
+
+ /* Serialize the UID reference table. Or actually, just the IPC destruction flag of it, as the actual counter
+ * of it is better rebuild after a reload/reexec. */
+
+ HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
+ uint32_t c;
+ uid_t uid;
+
+ uid = PTR_TO_UID(k);
+ c = PTR_TO_UINT32(p);
+
+ if (!(c & DESTROY_IPC_FLAG))
+ continue;
+
+ (void) serialize_item_format(f, field_name, UID_FMT, uid);
+ }
+}
+
+void manager_serialize_uid_refs(Manager *m, FILE *f) {
+ manager_serialize_uid_refs_internal(m, f, &m->uid_refs, "destroy-ipc-uid");
+}
+
+void manager_serialize_gid_refs(Manager *m, FILE *f) {
+ manager_serialize_uid_refs_internal(m, f, &m->gid_refs, "destroy-ipc-gid");
+}
+
+static void manager_deserialize_uid_refs_one_internal(
+ Manager *m,
+ Hashmap** uid_refs,
+ const char *value) {
+
+ uid_t uid;
+ uint32_t c;
+ int r;
+
+ assert(m);
+ assert(uid_refs);
+ assert(value);
+
+ r = parse_uid(value, &uid);
+ if (r < 0 || uid == 0) {
+ log_debug("Unable to parse UID reference serialization");
+ return;
+ }
+
+ r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
+ if (r < 0) {
+ log_oom();
+ return;
+ }
+
+ c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+ if (c & DESTROY_IPC_FLAG)
+ return;
+
+ c |= DESTROY_IPC_FLAG;
+
+ r = hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add UID reference entry: %m");
+ return;
+ }
+}
+
+void manager_deserialize_uid_refs_one(Manager *m, const char *value) {
+ manager_deserialize_uid_refs_one_internal(m, &m->uid_refs, value);
+}
+
+void manager_deserialize_gid_refs_one(Manager *m, const char *value) {
+ manager_deserialize_uid_refs_one_internal(m, &m->gid_refs, value);
+}
+
+int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ struct buffer {
+ uid_t uid;
+ gid_t gid;
+ char unit_name[UNIT_NAME_MAX+1];
+ } _packed_ buffer;
+
+ Manager *m = userdata;
+ ssize_t l;
+ size_t n;
+ Unit *u;
+
+ assert_se(source);
+ assert_se(m);
+
+ /* Invoked whenever a child process succeeded resolving its user/group to use and sent us the resulting UID/GID
+ * in a datagram. We parse the datagram here and pass it off to the unit, so that it can add a reference to the
+ * UID/GID so that it can destroy the UID/GID's IPC objects when the reference counter drops to 0. */
+
+ l = recv(fd, &buffer, sizeof(buffer), MSG_DONTWAIT);
+ if (l < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ return 0;
+
+ return log_error_errno(errno, "Failed to read from user lookup fd: %m");
+ }
+
+ if ((size_t) l <= offsetof(struct buffer, unit_name)) {
+ log_warning("Received too short user lookup message, ignoring.");
+ return 0;
+ }
+
+ if ((size_t) l > offsetof(struct buffer, unit_name) + UNIT_NAME_MAX) {
+ log_warning("Received too long user lookup message, ignoring.");
+ return 0;
+ }
+
+ if (!uid_is_valid(buffer.uid) && !gid_is_valid(buffer.gid)) {
+ log_warning("Got user lookup message with invalid UID/GID pair, ignoring.");
+ return 0;
+ }
+
+ n = (size_t) l - offsetof(struct buffer, unit_name);
+ if (memchr(buffer.unit_name, 0, n)) {
+ log_warning("Received lookup message with embedded NUL character, ignoring.");
+ return 0;
+ }
+
+ buffer.unit_name[n] = 0;
+ u = manager_get_unit(m, buffer.unit_name);
+ if (!u) {
+ log_debug("Got user lookup message but unit doesn't exist, ignoring.");
+ return 0;
+ }
+
+ log_unit_debug(u, "User lookup succeeded: uid=" UID_FMT " gid=" GID_FMT, buffer.uid, buffer.gid);
+
+ unit_notify_user_lookup(u, buffer.uid, buffer.gid);
+ return 0;
+}
+
+char *manager_taint_string(Manager *m) {
+ _cleanup_free_ char *destination = NULL, *overflowuid = NULL, *overflowgid = NULL;
+ char *buf, *e;
+ int r;
+
+ /* Returns a "taint string", e.g. "local-hwclock:var-run-bad".
+ * Only things that are detected at runtime should be tagged
+ * here. For stuff that is set during compilation, emit a warning
+ * in the configuration phase. */
+
+ assert(m);
+
+ buf = new(char, sizeof("split-usr:"
+ "cgroups-missing:"
+ "local-hwclock:"
+ "var-run-bad:"
+ "overflowuid-not-65534:"
+ "overflowgid-not-65534:"));
+ if (!buf)
+ return NULL;
+
+ e = buf;
+ buf[0] = 0;
+
+ if (m->taint_usr)
+ e = stpcpy(e, "split-usr:");
+
+ if (access("/proc/cgroups", F_OK) < 0)
+ e = stpcpy(e, "cgroups-missing:");
+
+ if (clock_is_localtime(NULL) > 0)
+ e = stpcpy(e, "local-hwclock:");
+
+ r = readlink_malloc("/var/run", &destination);
+ if (r < 0 || !PATH_IN_SET(destination, "../run", "/run"))
+ e = stpcpy(e, "var-run-bad:");
+
+ r = read_one_line_file("/proc/sys/kernel/overflowuid", &overflowuid);
+ if (r >= 0 && !streq(overflowuid, "65534"))
+ e = stpcpy(e, "overflowuid-not-65534:");
+
+ r = read_one_line_file("/proc/sys/kernel/overflowgid", &overflowgid);
+ if (r >= 0 && !streq(overflowgid, "65534"))
+ e = stpcpy(e, "overflowgid-not-65534:");
+
+ /* remove the last ':' */
+ if (e != buf)
+ e[-1] = 0;
+
+ return buf;
+}
+
+void manager_ref_console(Manager *m) {
+ assert(m);
+
+ m->n_on_console++;
+}
+
+void manager_unref_console(Manager *m) {
+
+ assert(m->n_on_console > 0);
+ m->n_on_console--;
+
+ if (m->n_on_console == 0)
+ m->no_console_output = false; /* unset no_console_output flag, since the console is definitely free now */
+}
+
+void manager_override_log_level(Manager *m, int level) {
+ _cleanup_free_ char *s = NULL;
+ assert(m);
+
+ if (!m->log_level_overridden) {
+ m->original_log_level = log_get_max_level();
+ m->log_level_overridden = true;
+ }
+
+ (void) log_level_to_string_alloc(level, &s);
+ log_info("Setting log level to %s.", strna(s));
+
+ log_set_max_level(level);
+}
+
+void manager_restore_original_log_level(Manager *m) {
+ _cleanup_free_ char *s = NULL;
+ assert(m);
+
+ if (!m->log_level_overridden)
+ return;
+
+ (void) log_level_to_string_alloc(m->original_log_level, &s);
+ log_info("Restoring log level to original (%s).", strna(s));
+
+ log_set_max_level(m->original_log_level);
+ m->log_level_overridden = false;
+}
+
+void manager_override_log_target(Manager *m, LogTarget target) {
+ assert(m);
+
+ if (!m->log_target_overridden) {
+ m->original_log_target = log_get_target();
+ m->log_target_overridden = true;
+ }
+
+ log_info("Setting log target to %s.", log_target_to_string(target));
+ log_set_target(target);
+}
+
+void manager_restore_original_log_target(Manager *m) {
+ assert(m);
+
+ if (!m->log_target_overridden)
+ return;
+
+ log_info("Restoring log target to original %s.", log_target_to_string(m->original_log_target));
+
+ log_set_target(m->original_log_target);
+ m->log_target_overridden = false;
+}
+
+ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s) {
+ if (in_initrd() &&
+ s >= MANAGER_TIMESTAMP_SECURITY_START &&
+ s <= MANAGER_TIMESTAMP_UNITS_LOAD_FINISH)
+ return s - MANAGER_TIMESTAMP_SECURITY_START + MANAGER_TIMESTAMP_INITRD_SECURITY_START;
+ return s;
+}
+
+static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
+ [MANAGER_INITIALIZING] = "initializing",
+ [MANAGER_STARTING] = "starting",
+ [MANAGER_RUNNING] = "running",
+ [MANAGER_DEGRADED] = "degraded",
+ [MANAGER_MAINTENANCE] = "maintenance",
+ [MANAGER_STOPPING] = "stopping",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(manager_state, ManagerState);
+
+static const char *const manager_timestamp_table[_MANAGER_TIMESTAMP_MAX] = {
+ [MANAGER_TIMESTAMP_FIRMWARE] = "firmware",
+ [MANAGER_TIMESTAMP_LOADER] = "loader",
+ [MANAGER_TIMESTAMP_KERNEL] = "kernel",
+ [MANAGER_TIMESTAMP_INITRD] = "initrd",
+ [MANAGER_TIMESTAMP_USERSPACE] = "userspace",
+ [MANAGER_TIMESTAMP_FINISH] = "finish",
+ [MANAGER_TIMESTAMP_SECURITY_START] = "security-start",
+ [MANAGER_TIMESTAMP_SECURITY_FINISH] = "security-finish",
+ [MANAGER_TIMESTAMP_GENERATORS_START] = "generators-start",
+ [MANAGER_TIMESTAMP_GENERATORS_FINISH] = "generators-finish",
+ [MANAGER_TIMESTAMP_UNITS_LOAD_START] = "units-load-start",
+ [MANAGER_TIMESTAMP_UNITS_LOAD_FINISH] = "units-load-finish",
+ [MANAGER_TIMESTAMP_INITRD_SECURITY_START] = "initrd-security-start",
+ [MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH] = "initrd-security-finish",
+ [MANAGER_TIMESTAMP_INITRD_GENERATORS_START] = "initrd-generators-start",
+ [MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH] = "initrd-generators-finish",
+ [MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START] = "initrd-units-load-start",
+ [MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH] = "initrd-units-load-finish",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(manager_timestamp, ManagerTimestamp);
diff --git a/src/core/manager.h b/src/core/manager.h
new file mode 100644
index 0000000..bce8020
--- /dev/null
+++ b/src/core/manager.h
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "cgroup-util.h"
+#include "fdset.h"
+#include "hashmap.h"
+#include "ip-address-access.h"
+#include "list.h"
+#include "ratelimit.h"
+
+struct libmnt_monitor;
+typedef struct Unit Unit;
+
+/* Enforce upper limit how many names we allow */
+#define MANAGER_MAX_NAMES 131072 /* 128K */
+
+typedef struct Manager Manager;
+
+/* An externally visible state. We don't actually maintain this as state variable, but derive it from various fields
+ * when requested */
+typedef enum ManagerState {
+ MANAGER_INITIALIZING,
+ MANAGER_STARTING,
+ MANAGER_RUNNING,
+ MANAGER_DEGRADED,
+ MANAGER_MAINTENANCE,
+ MANAGER_STOPPING,
+ _MANAGER_STATE_MAX,
+ _MANAGER_STATE_INVALID = -1
+} ManagerState;
+
+typedef enum ManagerObjective {
+ MANAGER_OK,
+ MANAGER_EXIT,
+ MANAGER_RELOAD,
+ MANAGER_REEXECUTE,
+ MANAGER_REBOOT,
+ MANAGER_POWEROFF,
+ MANAGER_HALT,
+ MANAGER_KEXEC,
+ MANAGER_SWITCH_ROOT,
+ _MANAGER_OBJECTIVE_MAX,
+ _MANAGER_OBJECTIVE_INVALID = -1
+} ManagerObjective;
+
+typedef enum StatusType {
+ STATUS_TYPE_EPHEMERAL,
+ STATUS_TYPE_NORMAL,
+ STATUS_TYPE_EMERGENCY,
+} StatusType;
+
+/* Notes:
+ * 1. TIMESTAMP_FIRMWARE, TIMESTAMP_LOADER, TIMESTAMP_KERNEL, TIMESTAMP_INITRD,
+ * TIMESTAMP_SECURITY_START, and TIMESTAMP_SECURITY_FINISH are set only when
+ * the manager is system and not running under container environment.
+ *
+ * 2. The monotonic timestamp of TIMESTAMP_KERNEL is always zero.
+ *
+ * 3. The realtime timestamp of TIMESTAMP_KERNEL will be unset if the system does not
+ * have RTC.
+ *
+ * 4. TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER will be unset if the system does not
+ * have RTC, or systemd is built without EFI support.
+ *
+ * 5. The monotonic timestamps of TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER are stored as
+ * negative of the actual value.
+ *
+ * 6. TIMESTAMP_USERSPACE is the timestamp of when the manager was started.
+ *
+ * 7. TIMESTAMP_INITRD_* are set only when the system is booted with an initrd.
+ */
+
+typedef enum ManagerTimestamp {
+ MANAGER_TIMESTAMP_FIRMWARE,
+ MANAGER_TIMESTAMP_LOADER,
+ MANAGER_TIMESTAMP_KERNEL,
+ MANAGER_TIMESTAMP_INITRD,
+ MANAGER_TIMESTAMP_USERSPACE,
+ MANAGER_TIMESTAMP_FINISH,
+
+ MANAGER_TIMESTAMP_SECURITY_START,
+ MANAGER_TIMESTAMP_SECURITY_FINISH,
+ MANAGER_TIMESTAMP_GENERATORS_START,
+ MANAGER_TIMESTAMP_GENERATORS_FINISH,
+ MANAGER_TIMESTAMP_UNITS_LOAD_START,
+ MANAGER_TIMESTAMP_UNITS_LOAD_FINISH,
+
+ MANAGER_TIMESTAMP_INITRD_SECURITY_START,
+ MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH,
+ MANAGER_TIMESTAMP_INITRD_GENERATORS_START,
+ MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH,
+ MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START,
+ MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH,
+ _MANAGER_TIMESTAMP_MAX,
+ _MANAGER_TIMESTAMP_INVALID = -1,
+} ManagerTimestamp;
+
+#include "execute.h"
+#include "job.h"
+#include "path-lookup.h"
+#include "show-status.h"
+#include "unit-name.h"
+
+typedef enum ManagerTestRunFlags {
+ MANAGER_TEST_NORMAL = 0, /* run normally */
+ MANAGER_TEST_RUN_MINIMAL = 1 << 0, /* create basic data structures */
+ MANAGER_TEST_RUN_BASIC = 1 << 1, /* interact with the environment */
+ MANAGER_TEST_RUN_ENV_GENERATORS = 1 << 2, /* also run env generators */
+ MANAGER_TEST_RUN_GENERATORS = 1 << 3, /* also run unit generators */
+ MANAGER_TEST_FULL = MANAGER_TEST_RUN_BASIC | MANAGER_TEST_RUN_ENV_GENERATORS | MANAGER_TEST_RUN_GENERATORS,
+} ManagerTestRunFlags;
+
+assert_cc((MANAGER_TEST_FULL & UINT8_MAX) == MANAGER_TEST_FULL);
+
+struct Manager {
+ /* Note that the set of units we know of is allowed to be
+ * inconsistent. However the subset of it that is loaded may
+ * not, and the list of jobs may neither. */
+
+ /* Active jobs and units */
+ Hashmap *units; /* name string => Unit object n:1 */
+ Hashmap *units_by_invocation_id;
+ Hashmap *jobs; /* job id => Job object 1:1 */
+
+ /* To make it easy to iterate through the units of a specific
+ * type we maintain a per type linked list */
+ LIST_HEAD(Unit, units_by_type[_UNIT_TYPE_MAX]);
+
+ /* Units that need to be loaded */
+ LIST_HEAD(Unit, load_queue); /* this is actually more a stack than a queue, but uh. */
+
+ /* Jobs that need to be run */
+ LIST_HEAD(Job, run_queue); /* more a stack than a queue, too */
+
+ /* Units and jobs that have not yet been announced via
+ * D-Bus. When something about a job changes it is added here
+ * if it is not in there yet. This allows easy coalescing of
+ * D-Bus change signals. */
+ LIST_HEAD(Unit, dbus_unit_queue);
+ LIST_HEAD(Job, dbus_job_queue);
+
+ /* Units to remove */
+ LIST_HEAD(Unit, cleanup_queue);
+
+ /* Units and jobs to check when doing GC */
+ LIST_HEAD(Unit, gc_unit_queue);
+ LIST_HEAD(Job, gc_job_queue);
+
+ /* Units that should be realized */
+ LIST_HEAD(Unit, cgroup_realize_queue);
+
+ /* Units whose cgroup ran empty */
+ LIST_HEAD(Unit, cgroup_empty_queue);
+
+ /* Target units whose default target dependencies haven't been set yet */
+ LIST_HEAD(Unit, target_deps_queue);
+
+ /* Units that might be subject to StopWhenUnneeded= clean-up */
+ LIST_HEAD(Unit, stop_when_unneeded_queue);
+
+ sd_event *event;
+
+ /* This maps PIDs we care about to units that are interested in. We allow multiple units to he interested in
+ * the same PID and multiple PIDs to be relevant to the same unit. Since in most cases only a single unit will
+ * be interested in the same PID we use a somewhat special encoding here: the first unit interested in a PID is
+ * stored directly in the hashmap, keyed by the PID unmodified. If there are other units interested too they'll
+ * be stored in a NULL-terminated array, and keyed by the negative PID. This is safe as pid_t is signed and
+ * negative PIDs are not used for regular processes but process groups, which we don't care about in this
+ * context, but this allows us to use the negative range for our own purposes. */
+ Hashmap *watch_pids; /* pid => unit as well as -pid => array of units */
+
+ /* A set contains all units which cgroup should be refreshed after startup */
+ Set *startup_units;
+
+ /* A set which contains all currently failed units */
+ Set *failed_units;
+
+ sd_event_source *run_queue_event_source;
+
+ char *notify_socket;
+ int notify_fd;
+ sd_event_source *notify_event_source;
+
+ int cgroups_agent_fd;
+ sd_event_source *cgroups_agent_event_source;
+
+ int signal_fd;
+ sd_event_source *signal_event_source;
+
+ sd_event_source *sigchld_event_source;
+
+ int time_change_fd;
+ sd_event_source *time_change_event_source;
+
+ sd_event_source *timezone_change_event_source;
+
+ sd_event_source *jobs_in_progress_event_source;
+
+ int user_lookup_fds[2];
+ sd_event_source *user_lookup_event_source;
+
+ sd_event_source *sync_bus_names_event_source;
+
+ UnitFileScope unit_file_scope;
+ LookupPaths lookup_paths;
+ Set *unit_path_cache;
+
+ char **transient_environment; /* The environment, as determined from config files, kernel cmdline and environment generators */
+ char **client_environment; /* Environment variables created by clients through the bus API */
+
+ usec_t runtime_watchdog;
+ usec_t shutdown_watchdog;
+
+ dual_timestamp timestamps[_MANAGER_TIMESTAMP_MAX];
+
+ /* Data specific to the device subsystem */
+ sd_device_monitor *device_monitor;
+ Hashmap *devices_by_sysfs;
+
+ /* Data specific to the mount subsystem */
+ struct libmnt_monitor *mount_monitor;
+ sd_event_source *mount_event_source;
+
+ /* Data specific to the swap filesystem */
+ FILE *proc_swaps;
+ sd_event_source *swap_event_source;
+ Hashmap *swaps_by_devnode;
+
+ /* Data specific to the D-Bus subsystem */
+ sd_bus *api_bus, *system_bus;
+ Set *private_buses;
+ int private_listen_fd;
+ sd_event_source *private_listen_event_source;
+
+ /* Contains all the clients that are subscribed to signals via
+ the API bus. Note that private bus connections are always
+ considered subscribes, since they last for very short only,
+ and it is much simpler that way. */
+ sd_bus_track *subscribed;
+ char **deserialized_subscribed;
+
+ /* This is used during reloading: before the reload we queue
+ * the reply message here, and afterwards we send it */
+ sd_bus_message *pending_reload_message;
+
+ Hashmap *watch_bus; /* D-Bus names => Unit object n:1 */
+
+ bool send_reloading_done;
+
+ uint32_t current_job_id;
+ uint32_t default_unit_job_id;
+
+ /* Data specific to the Automount subsystem */
+ int dev_autofs_fd;
+
+ /* Data specific to the cgroup subsystem */
+ Hashmap *cgroup_unit;
+ CGroupMask cgroup_supported;
+ char *cgroup_root;
+
+ /* Notifications from cgroups, when the unified hierarchy is used is done via inotify. */
+ int cgroup_inotify_fd;
+ sd_event_source *cgroup_inotify_event_source;
+ Hashmap *cgroup_inotify_wd_unit;
+
+ /* A defer event for handling cgroup empty events and processing them after SIGCHLD in all cases. */
+ sd_event_source *cgroup_empty_event_source;
+
+ /* Make sure the user cannot accidentally unmount our cgroup
+ * file system */
+ int pin_cgroupfs_fd;
+
+ unsigned gc_marker;
+
+ /* The stat() data the last time we saw /etc/localtime */
+ usec_t etc_localtime_mtime;
+ bool etc_localtime_accessible:1;
+
+ ManagerObjective objective:5;
+
+ /* Flags */
+ bool dispatching_load_queue:1;
+
+ bool taint_usr:1;
+
+ /* Have we already sent out the READY=1 notification? */
+ bool ready_sent:1;
+
+ /* Have we already printed the taint line if necessary? */
+ bool taint_logged:1;
+
+ /* Have we ever changed the "kernel.pid_max" sysctl? */
+ bool sysctl_pid_max_changed:1;
+
+ ManagerTestRunFlags test_run_flags:8;
+
+ /* If non-zero, exit with the following value when the systemd
+ * process terminate. Useful for containers: systemd-nspawn could get
+ * the return value. */
+ uint8_t return_value;
+
+ ShowStatus show_status;
+ char *confirm_spawn;
+ bool no_console_output;
+ bool service_watchdogs;
+
+ ExecOutput default_std_output, default_std_error;
+
+ usec_t default_restart_usec, default_timeout_start_usec, default_timeout_stop_usec;
+
+ usec_t default_start_limit_interval;
+ unsigned default_start_limit_burst;
+
+ bool default_cpu_accounting;
+ bool default_memory_accounting;
+ bool default_io_accounting;
+ bool default_blockio_accounting;
+ bool default_tasks_accounting;
+ bool default_ip_accounting;
+
+ uint64_t default_tasks_max;
+ usec_t default_timer_accuracy_usec;
+
+ int original_log_level;
+ LogTarget original_log_target;
+ bool log_level_overridden:1;
+ bool log_target_overridden:1;
+
+ struct rlimit *rlimit[_RLIMIT_MAX];
+
+ /* non-zero if we are reloading or reexecuting, */
+ int n_reloading;
+
+ unsigned n_installed_jobs;
+ unsigned n_failed_jobs;
+
+ /* Jobs in progress watching */
+ unsigned n_running_jobs;
+ unsigned n_on_console;
+ unsigned jobs_in_progress_iteration;
+
+ /* Do we have any outstanding password prompts? */
+ int have_ask_password;
+ int ask_password_inotify_fd;
+ sd_event_source *ask_password_event_source;
+
+ /* Type=idle pipes */
+ int idle_pipe[4];
+ sd_event_source *idle_pipe_event_source;
+
+ char *switch_root;
+ char *switch_root_init;
+
+ /* This maps all possible path prefixes to the units needing
+ * them. It's a hashmap with a path string as key and a Set as
+ * value where Unit objects are contained. */
+ Hashmap *units_requiring_mounts_for;
+
+ /* Used for processing polkit authorization responses */
+ Hashmap *polkit_registry;
+
+ /* Dynamic users/groups, indexed by their name */
+ Hashmap *dynamic_users;
+
+ /* Keep track of all UIDs and GIDs any of our services currently use. This is useful for the RemoveIPC= logic. */
+ Hashmap *uid_refs;
+ Hashmap *gid_refs;
+
+ /* ExecRuntime, indexed by their owner unit id */
+ Hashmap *exec_runtime_by_id;
+
+ /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */
+ RateLimit ctrl_alt_del_ratelimit;
+ EmergencyAction cad_burst_action;
+
+ const char *unit_log_field;
+ const char *unit_log_format_string;
+
+ const char *invocation_log_field;
+ const char *invocation_log_format_string;
+
+ int first_boot; /* tri-state */
+
+ /* Prefixes of e.g. RuntimeDirectory= */
+ char *prefix[_EXEC_DIRECTORY_TYPE_MAX];
+
+ /* Used in the SIGCHLD and sd_notify() message invocation logic to avoid that we dispatch the same event
+ * multiple times on the same unit. */
+ unsigned sigchldgen;
+ unsigned notifygen;
+};
+
+#define MANAGER_IS_SYSTEM(m) ((m)->unit_file_scope == UNIT_FILE_SYSTEM)
+#define MANAGER_IS_USER(m) ((m)->unit_file_scope != UNIT_FILE_SYSTEM)
+
+#define MANAGER_IS_RELOADING(m) ((m)->n_reloading > 0)
+
+#define MANAGER_IS_FINISHED(m) (dual_timestamp_is_set((m)->timestamps + MANAGER_TIMESTAMP_FINISH))
+
+/* The objective is set to OK as soon as we enter the main loop, and set otherwise as soon as we are done with it */
+#define MANAGER_IS_RUNNING(m) ((m)->objective == MANAGER_OK)
+
+#define MANAGER_IS_TEST_RUN(m) ((m)->test_run_flags != 0)
+
+int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager **m);
+Manager* manager_free(Manager *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+int manager_startup(Manager *m, FILE *serialization, FDSet *fds);
+
+Job *manager_get_job(Manager *m, uint32_t id);
+Unit *manager_get_unit(Manager *m, const char *name);
+
+int manager_get_job_from_dbus_path(Manager *m, const char *s, Job **_j);
+
+int manager_load_unit_prepare(Manager *m, const char *name, const char *path, sd_bus_error *e, Unit **_ret);
+int manager_load_unit(Manager *m, const char *name, const char *path, sd_bus_error *e, Unit **_ret);
+int manager_load_startable_unit_or_warn(Manager *m, const char *name, const char *path, Unit **ret);
+int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u);
+
+int manager_add_job(Manager *m, JobType type, Unit *unit, JobMode mode, sd_bus_error *e, Job **_ret);
+int manager_add_job_by_name(Manager *m, JobType type, const char *name, JobMode mode, sd_bus_error *e, Job **_ret);
+int manager_add_job_by_name_and_warn(Manager *m, JobType type, const char *name, JobMode mode, Job **ret);
+int manager_propagate_reload(Manager *m, Unit *unit, JobMode mode, sd_bus_error *e);
+
+void manager_dump_units(Manager *s, FILE *f, const char *prefix);
+void manager_dump_jobs(Manager *s, FILE *f, const char *prefix);
+void manager_dump(Manager *s, FILE *f, const char *prefix);
+int manager_get_dump_string(Manager *m, char **ret);
+
+void manager_clear_jobs(Manager *m);
+
+unsigned manager_dispatch_load_queue(Manager *m);
+
+int manager_default_environment(Manager *m);
+int manager_transient_environment_add(Manager *m, char **plus);
+int manager_client_environment_modify(Manager *m, char **minus, char **plus);
+int manager_get_effective_environment(Manager *m, char ***ret);
+
+int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit);
+
+int manager_loop(Manager *m);
+
+int manager_open_serialization(Manager *m, FILE **_f);
+
+int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root);
+int manager_deserialize(Manager *m, FILE *f, FDSet *fds);
+
+int manager_reload(Manager *m);
+
+void manager_reset_failed(Manager *m);
+
+void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success);
+void manager_send_unit_plymouth(Manager *m, Unit *u);
+
+bool manager_unit_inactive_or_pending(Manager *m, const char *name);
+
+void manager_check_finished(Manager *m);
+
+void manager_recheck_dbus(Manager *m);
+void manager_recheck_journal(Manager *m);
+
+void manager_set_show_status(Manager *m, ShowStatus mode);
+void manager_set_first_boot(Manager *m, bool b);
+
+void manager_status_printf(Manager *m, StatusType type, const char *status, const char *format, ...) _printf_(4,5);
+void manager_flip_auto_status(Manager *m, bool enable);
+
+Set *manager_get_units_requiring_mounts_for(Manager *m, const char *path);
+
+ManagerState manager_state(Manager *m);
+
+int manager_update_failed_units(Manager *m, Unit *u, bool failed);
+
+void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now);
+int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc);
+
+void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now);
+int manager_ref_gid(Manager *m, gid_t gid, bool destroy_now);
+
+void manager_vacuum_uid_refs(Manager *m);
+void manager_vacuum_gid_refs(Manager *m);
+
+void manager_serialize_uid_refs(Manager *m, FILE *f);
+void manager_deserialize_uid_refs_one(Manager *m, const char *value);
+
+void manager_serialize_gid_refs(Manager *m, FILE *f);
+void manager_deserialize_gid_refs_one(Manager *m, const char *value);
+
+char *manager_taint_string(Manager *m);
+
+void manager_ref_console(Manager *m);
+void manager_unref_console(Manager *m);
+
+void manager_override_log_level(Manager *m, int level);
+void manager_restore_original_log_level(Manager *m);
+
+void manager_override_log_target(Manager *m, LogTarget target);
+void manager_restore_original_log_target(Manager *m);
+
+const char *manager_state_to_string(ManagerState m) _const_;
+ManagerState manager_state_from_string(const char *s) _pure_;
+
+const char *manager_get_confirm_spawn(Manager *m);
+bool manager_is_confirm_spawn_disabled(Manager *m);
+void manager_disable_confirm_spawn(void);
+
+const char *manager_timestamp_to_string(ManagerTimestamp m) _const_;
+ManagerTimestamp manager_timestamp_from_string(const char *s) _pure_;
+ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s);
diff --git a/src/core/meson.build b/src/core/meson.build
new file mode 100644
index 0000000..85021bd
--- /dev/null
+++ b/src/core/meson.build
@@ -0,0 +1,218 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+libcore_la_sources = '''
+ audit-fd.c
+ audit-fd.h
+ automount.c
+ automount.h
+ bpf-devices.c
+ bpf-devices.h
+ bpf-firewall.c
+ bpf-firewall.h
+ cgroup.c
+ cgroup.h
+ chown-recursive.c
+ chown-recursive.h
+ dbus-automount.c
+ dbus-automount.h
+ dbus-cgroup.c
+ dbus-cgroup.h
+ dbus-device.c
+ dbus-device.h
+ dbus-execute.c
+ dbus-execute.h
+ dbus-job.c
+ dbus-job.h
+ dbus-kill.c
+ dbus-kill.h
+ dbus-manager.c
+ dbus-manager.h
+ dbus-mount.c
+ dbus-mount.h
+ dbus-path.c
+ dbus-path.h
+ dbus-scope.c
+ dbus-scope.h
+ dbus-service.c
+ dbus-service.h
+ dbus-slice.c
+ dbus-slice.h
+ dbus-socket.c
+ dbus-socket.h
+ dbus-swap.c
+ dbus-swap.h
+ dbus-target.c
+ dbus-target.h
+ dbus-timer.c
+ dbus-timer.h
+ dbus-unit.c
+ dbus-unit.h
+ dbus-util.c
+ dbus-util.h
+ dbus.c
+ dbus.h
+ device.c
+ device.h
+ dynamic-user.c
+ dynamic-user.h
+ emergency-action.c
+ emergency-action.h
+ execute.c
+ execute.h
+ hostname-setup.c
+ hostname-setup.h
+ ima-setup.c
+ ima-setup.h
+ ip-address-access.c
+ ip-address-access.h
+ job.c
+ job.h
+ kill.c
+ kill.h
+ killall.c
+ killall.h
+ kmod-setup.c
+ kmod-setup.h
+ load-dropin.c
+ load-dropin.h
+ load-fragment.c
+ load-fragment.h
+ locale-setup.c
+ locale-setup.h
+ loopback-setup.c
+ loopback-setup.h
+ machine-id-setup.c
+ machine-id-setup.h
+ manager.c
+ manager.h
+ mount-setup.c
+ mount-setup.h
+ mount.c
+ mount.h
+ namespace.c
+ namespace.h
+ path.c
+ path.h
+ scope.c
+ scope.h
+ selinux-access.c
+ selinux-access.h
+ selinux-setup.c
+ selinux-setup.h
+ service.c
+ service.h
+ show-status.c
+ show-status.h
+ slice.c
+ slice.h
+ smack-setup.c
+ smack-setup.h
+ socket.c
+ socket.h
+ swap.c
+ swap.h
+ target.c
+ target.h
+ timer.c
+ timer.h
+ transaction.c
+ transaction.h
+ unit-printf.c
+ unit-printf.h
+ unit.c
+ unit.h
+'''.split()
+
+load_fragment_gperf_gperf = custom_target(
+ 'load-fragment-gperf.gperf',
+ input : 'load-fragment-gperf.gperf.m4',
+ output: 'load-fragment-gperf.gperf',
+ command : [meson_apply_m4, config_h, '@INPUT@'],
+ capture : true)
+
+load_fragment_gperf_c = custom_target(
+ 'load-fragment-gperf.c',
+ input : load_fragment_gperf_gperf,
+ output : 'load-fragment-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+awkscript = 'load-fragment-gperf-nulstr.awk'
+load_fragment_gperf_nulstr_c = custom_target(
+ 'load-fragment-gperf-nulstr.c',
+ input : [awkscript, load_fragment_gperf_gperf],
+ output : 'load-fragment-gperf-nulstr.c',
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+libcore = static_library(
+ 'core',
+ libcore_la_sources,
+ load_fragment_gperf_c,
+ load_fragment_gperf_nulstr_c,
+ include_directories : includes,
+ dependencies : [threads,
+ librt,
+ libseccomp,
+ libpam,
+ libaudit,
+ libkmod,
+ libapparmor,
+ libselinux,
+ libmount])
+
+systemd_sources = files('main.c')
+
+systemd_shutdown_sources = files('''
+ shutdown.c
+ umount.c
+ umount.h
+ mount-setup.c
+ mount-setup.h
+ killall.c
+ killall.h
+'''.split())
+
+in_files = [['macros.systemd', rpmmacrosdir],
+ ['system.conf', pkgsysconfdir],
+ ['systemd.pc', pkgconfigdatadir],
+ ['triggers.systemd', '']]
+
+foreach item : in_files
+ file = item[0]
+ dir = item[1]
+
+ configure_file(
+ input : file + '.in',
+ output : file,
+ configuration : substs,
+ install_dir : dir == 'no' ? '' : dir)
+endforeach
+
+install_data('org.freedesktop.systemd1.conf',
+ install_dir : dbuspolicydir)
+install_data('org.freedesktop.systemd1.service',
+ install_dir : dbussystemservicedir)
+
+policy = configure_file(
+ input : 'org.freedesktop.systemd1.policy.in',
+ output : 'org.freedesktop.systemd1.policy',
+ configuration : substs)
+install_data(policy,
+ install_dir : polkitpolicydir)
+
+install_data('user.conf',
+ install_dir : pkgsysconfdir)
+
+meson.add_install_script('sh', '-c', mkdir_p.format(systemshutdowndir))
+meson.add_install_script('sh', '-c', mkdir_p.format(systemsleepdir))
+meson.add_install_script('sh', '-c', mkdir_p.format(systemgeneratordir))
+meson.add_install_script('sh', '-c', mkdir_p.format(usergeneratordir))
+
+meson.add_install_script('sh', '-c',
+ mkdir_p.format(join_paths(pkgsysconfdir, 'system/multi-user.target.wants')))
+meson.add_install_script('sh', '-c',
+ mkdir_p.format(join_paths(pkgsysconfdir, 'system/getty.target.wants')))
+meson.add_install_script('sh', '-c',
+ mkdir_p.format(join_paths(pkgsysconfdir, 'user')))
+meson.add_install_script('sh', '-c',
+ mkdir_p.format(join_paths(sysconfdir, 'xdg/systemd')))
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
new file mode 100644
index 0000000..3ce6164
--- /dev/null
+++ b/src/core/mount-setup.c
@@ -0,0 +1,566 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <ftw.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "dev-setup.h"
+#include "dirent-util.h"
+#include "efivars.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "label.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "mount-setup.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "smack-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+typedef enum MountMode {
+ MNT_NONE = 0,
+ MNT_FATAL = 1 << 0,
+ MNT_IN_CONTAINER = 1 << 1,
+ MNT_CHECK_WRITABLE = 1 << 2,
+} MountMode;
+
+typedef struct MountPoint {
+ const char *what;
+ const char *where;
+ const char *type;
+ const char *options;
+ unsigned long flags;
+ bool (*condition_fn)(void);
+ MountMode mode;
+} MountPoint;
+
+/* The first three entries we might need before SELinux is up. The
+ * fourth (securityfs) is needed by IMA to load a custom policy. The
+ * other ones we can delay until SELinux and IMA are loaded. When
+ * SMACK is enabled we need smackfs, too, so it's a fifth one. */
+#if ENABLE_SMACK
+#define N_EARLY_MOUNT 5
+#else
+#define N_EARLY_MOUNT 4
+#endif
+
+static const MountPoint mount_table[] = {
+ { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_NONE },
+#if ENABLE_SMACK
+ { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ mac_smack_use, MNT_FATAL },
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ mac_smack_use, MNT_FATAL },
+#endif
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
+ NULL, MNT_IN_CONTAINER },
+#if ENABLE_SMACK
+ { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ mac_smack_use, MNT_FATAL },
+#endif
+ { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
+ { "cgroup2", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
+ { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
+ cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+ { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
+ { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
+ { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_legacy_wanted, MNT_IN_CONTAINER },
+ { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+ { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_NONE },
+#if ENABLE_EFI
+ { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ is_efi_boot, MNT_NONE },
+#endif
+ { "bpf", "/sys/fs/bpf", "bpf", "mode=700", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_NONE, },
+};
+
+/* These are API file systems that might be mounted by other software,
+ * we just list them here so that we know that we should ignore them */
+
+static const char ignore_paths[] =
+ /* SELinux file systems */
+ "/sys/fs/selinux\0"
+ /* Container bind mounts */
+ "/proc/sys\0"
+ "/dev/console\0"
+ "/proc/kmsg\0";
+
+bool mount_point_is_api(const char *path) {
+ unsigned i;
+
+ /* Checks if this mount point is considered "API", and hence
+ * should be ignored */
+
+ for (i = 0; i < ELEMENTSOF(mount_table); i ++)
+ if (path_equal(path, mount_table[i].where))
+ return true;
+
+ return path_startswith(path, "/sys/fs/cgroup/");
+}
+
+bool mount_point_ignore(const char *path) {
+ const char *i;
+
+ NULSTR_FOREACH(i, ignore_paths)
+ if (path_equal(path, i))
+ return true;
+
+ return false;
+}
+
+static int mount_one(const MountPoint *p, bool relabel) {
+ int r, priority;
+
+ assert(p);
+
+ priority = (p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG;
+
+ if (p->condition_fn && !p->condition_fn())
+ return 0;
+
+ /* Relabel first, just in case */
+ if (relabel)
+ (void) label_fix(p->where, LABEL_IGNORE_ENOENT|LABEL_IGNORE_EROFS);
+
+ r = path_is_mount_point(p->where, NULL, AT_SYMLINK_FOLLOW);
+ if (r < 0 && r != -ENOENT) {
+ log_full_errno(priority, r, "Failed to determine whether %s is a mount point: %m", p->where);
+ return (p->mode & MNT_FATAL) ? r : 0;
+ }
+ if (r > 0)
+ return 0;
+
+ /* Skip securityfs in a container */
+ if (!(p->mode & MNT_IN_CONTAINER) && detect_container() > 0)
+ return 0;
+
+ /* The access mode here doesn't really matter too much, since
+ * the mounted file system will take precedence anyway. */
+ if (relabel)
+ (void) mkdir_p_label(p->where, 0755);
+ else
+ (void) mkdir_p(p->where, 0755);
+
+ log_debug("Mounting %s to %s of type %s with options %s.",
+ p->what,
+ p->where,
+ p->type,
+ strna(p->options));
+
+ if (mount(p->what,
+ p->where,
+ p->type,
+ p->flags,
+ p->options) < 0) {
+ log_full_errno(priority, errno, "Failed to mount %s at %s: %m", p->type, p->where);
+ return (p->mode & MNT_FATAL) ? -errno : 0;
+ }
+
+ /* Relabel again, since we now mounted something fresh here */
+ if (relabel)
+ (void) label_fix(p->where, 0);
+
+ if (p->mode & MNT_CHECK_WRITABLE) {
+ if (access(p->where, W_OK) < 0) {
+ r = -errno;
+
+ (void) umount(p->where);
+ (void) rmdir(p->where);
+
+ log_full_errno(priority, r, "Mount point %s not writable after mounting: %m", p->where);
+ return (p->mode & MNT_FATAL) ? r : 0;
+ }
+ }
+
+ return 1;
+}
+
+static int mount_points_setup(unsigned n, bool loaded_policy) {
+ unsigned i;
+ int r = 0;
+
+ for (i = 0; i < n; i ++) {
+ int j;
+
+ j = mount_one(mount_table + i, loaded_policy);
+ if (j != 0 && r >= 0)
+ r = j;
+ }
+
+ return r;
+}
+
+int mount_setup_early(void) {
+ assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
+
+ /* Do a minimal mount of /proc and friends to enable the most
+ * basic stuff, such as SELinux */
+ return mount_points_setup(N_EARLY_MOUNT, false);
+}
+
+static const char *join_with(const char *controller) {
+
+ static const char* const pairs[] = {
+ "cpu", "cpuacct",
+ "net_cls", "net_prio",
+ NULL
+ };
+
+ const char *const *x, *const *y;
+
+ assert(controller);
+
+ /* This will lookup which controller to mount another controller with. Input is a controller name, and output
+ * is the other controller name. The function works both ways: you can input one and get the other, and input
+ * the other to get the one. */
+
+ STRV_FOREACH_PAIR(x, y, pairs) {
+ if (streq(controller, *x))
+ return *y;
+ if (streq(controller, *y))
+ return *x;
+ }
+
+ return NULL;
+}
+
+static int symlink_controller(const char *target, const char *alias) {
+ const char *a;
+ int r;
+
+ assert(target);
+ assert(alias);
+
+ a = strjoina("/sys/fs/cgroup/", alias);
+
+ r = symlink_idempotent(target, a, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create symlink %s: %m", a);
+
+#ifdef SMACK_RUN_LABEL
+ const char *p;
+
+ p = strjoina("/sys/fs/cgroup/", target);
+
+ r = mac_smack_copy(a, p);
+ if (r < 0 && r != -EOPNOTSUPP)
+ return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", p, a);
+#endif
+
+ return 0;
+}
+
+int mount_cgroup_controllers(void) {
+ _cleanup_set_free_free_ Set *controllers = NULL;
+ int r;
+
+ if (!cg_is_legacy_wanted())
+ return 0;
+
+ /* Mount all available cgroup controllers that are built into the kernel. */
+ r = cg_kernel_controllers(&controllers);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate cgroup controllers: %m");
+
+ for (;;) {
+ _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
+ const char *other_controller;
+ MountPoint p = {
+ .what = "cgroup",
+ .type = "cgroup",
+ .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ .mode = MNT_IN_CONTAINER,
+ };
+
+ controller = set_steal_first(controllers);
+ if (!controller)
+ break;
+
+ /* Check if we shall mount this together with another controller */
+ other_controller = join_with(controller);
+ if (other_controller) {
+ _cleanup_free_ char *c = NULL;
+
+ /* Check if the other controller is actually available in the kernel too */
+ c = set_remove(controllers, other_controller);
+ if (c) {
+
+ /* Join the two controllers into one string, and maintain a stable ordering */
+ if (strcmp(controller, other_controller) < 0)
+ options = strjoin(controller, ",", other_controller);
+ else
+ options = strjoin(other_controller, ",", controller);
+ if (!options)
+ return log_oom();
+ }
+ }
+
+ /* The simple case, where there's only one controller to mount together */
+ if (!options)
+ options = TAKE_PTR(controller);
+
+ where = strappend("/sys/fs/cgroup/", options);
+ if (!where)
+ return log_oom();
+
+ p.where = where;
+ p.options = options;
+
+ r = mount_one(&p, true);
+ if (r < 0)
+ return r;
+
+ /* Create symlinks from the individual controller names, in case we have a joined mount */
+ if (controller)
+ (void) symlink_controller(options, controller);
+ if (other_controller)
+ (void) symlink_controller(options, other_controller);
+ }
+
+ /* Now that we mounted everything, let's make the tmpfs the cgroup file systems are mounted into read-only. */
+ (void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
+
+ return 0;
+}
+
+#if HAVE_SELINUX || ENABLE_SMACK
+static int nftw_cb(
+ const char *fpath,
+ const struct stat *sb,
+ int tflag,
+ struct FTW *ftwbuf) {
+
+ /* No need to label /dev twice in a row... */
+ if (_unlikely_(ftwbuf->level == 0))
+ return FTW_CONTINUE;
+
+ (void) label_fix(fpath, 0);
+
+ /* /run/initramfs is static data and big, no need to
+ * dynamically relabel its contents at boot... */
+ if (_unlikely_(ftwbuf->level == 1 &&
+ tflag == FTW_D &&
+ streq(fpath, "/run/initramfs")))
+ return FTW_SKIP_SUBTREE;
+
+ return FTW_CONTINUE;
+};
+
+static int relabel_cgroup_filesystems(void) {
+ int r;
+ struct statfs st;
+
+ r = cg_all_unified();
+ if (r == 0) {
+ /* Temporarily remount the root cgroup filesystem to give it a proper label. Do this
+ only when the filesystem has been already populated by a previous instance of systemd
+ running from initrd. Otherwise don't remount anything and leave the filesystem read-write
+ for the cgroup filesystems to be mounted inside. */
+ if (statfs("/sys/fs/cgroup", &st) < 0)
+ return log_error_errno(errno, "Failed to determine mount flags for /sys/fs/cgroup: %m");
+
+ if (st.f_flags & ST_RDONLY)
+ (void) mount(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT, NULL);
+
+ (void) label_fix("/sys/fs/cgroup", 0);
+ (void) nftw("/sys/fs/cgroup", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
+
+ if (st.f_flags & ST_RDONLY)
+ (void) mount(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT|MS_RDONLY, NULL);
+
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to determine whether we are in all unified mode: %m");
+
+ return 0;
+}
+
+static int relabel_extra(void) {
+ _cleanup_closedir_ DIR *d = NULL;
+ int r, c = 0;
+
+ /* Support for relabelling additional files or directories after loading the policy. For this, code in the
+ * initrd simply has to drop in *.relabel files into /run/systemd/relabel-extra.d/. We'll read all such files
+ * expecting one absolute path by line and will relabel each (and everyone below that in case the path refers
+ * to a directory). These drop-in files are supposed to be absolutely minimal, and do not understand comments
+ * and such. After the operation succeeded the files are removed, and the drop-in directory as well, if
+ * possible.
+ */
+
+ d = opendir("/run/systemd/relabel-extra.d/");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /run/systemd/relabel-extra.d/, ignoring: %m");
+ }
+
+ for (;;) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ struct dirent *de;
+
+ errno = 0;
+ de = readdir_no_dot(d);
+ if (!de) {
+ if (errno != 0)
+ return log_error_errno(errno, "Failed read directory /run/systemd/relabel-extra.d/, ignoring: %m");
+ break;
+ }
+
+ if (hidden_or_backup_file(de->d_name))
+ continue;
+
+ if (!endswith(de->d_name, ".relabel"))
+ continue;
+
+ if (!IN_SET(de->d_type, DT_REG, DT_UNKNOWN))
+ continue;
+
+ fd = openat(dirfd(d), de->d_name, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0) {
+ log_warning_errno(errno, "Failed to open /run/systemd/relabel-extra.d/%s, ignoring: %m", de->d_name);
+ continue;
+ }
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ log_warning_errno(errno, "Failed to convert file descriptor into file object, ignoring: %m");
+ continue;
+ }
+ TAKE_FD(fd);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read from /run/systemd/relabel-extra.d/%s, ignoring: %m", de->d_name);
+ break;
+ }
+ if (r == 0) /* EOF */
+ break;
+
+ path_simplify(line, true);
+
+ if (!path_is_normalized(line)) {
+ log_warning("Path to relabel is not normalized, ignoring: %s", line);
+ continue;
+ }
+
+ if (!path_is_absolute(line)) {
+ log_warning("Path to relabel is not absolute, ignoring: %s", line);
+ continue;
+ }
+
+ log_debug("Relabelling additional file/directory '%s'.", line);
+ (void) nftw(line, nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
+ c++;
+ }
+
+ if (unlinkat(dirfd(d), de->d_name, 0) < 0)
+ log_warning_errno(errno, "Failed to remove /run/systemd/relabel-extra.d/%s, ignoring: %m", de->d_name);
+ }
+
+ /* Remove when we completing things. */
+ if (rmdir("/run/systemd/relabel-extra.d") < 0)
+ log_warning_errno(errno, "Failed to remove /run/systemd/relabel-extra.d/ directory: %m");
+
+ return c;
+}
+#endif
+
+int mount_setup(bool loaded_policy) {
+ int r = 0;
+
+ r = mount_points_setup(ELEMENTSOF(mount_table), loaded_policy);
+ if (r < 0)
+ return r;
+
+#if HAVE_SELINUX || ENABLE_SMACK
+ /* Nodes in devtmpfs and /run need to be manually updated for
+ * the appropriate labels, after mounting. The other virtual
+ * API file systems like /sys and /proc do not need that, they
+ * use the same label for all their files. */
+ if (loaded_policy) {
+ usec_t before_relabel, after_relabel;
+ char timespan[FORMAT_TIMESPAN_MAX];
+ const char *i;
+ int n_extra;
+
+ before_relabel = now(CLOCK_MONOTONIC);
+
+ FOREACH_STRING(i, "/dev", "/dev/shm", "/run")
+ (void) nftw(i, nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
+
+ (void) relabel_cgroup_filesystems();
+
+ n_extra = relabel_extra();
+
+ after_relabel = now(CLOCK_MONOTONIC);
+
+ log_info("Relabelled /dev, /dev/shm, /run, /sys/fs/cgroup%s in %s.",
+ n_extra > 0 ? ", additional files" : "",
+ format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
+ }
+#endif
+
+ /* Create a few default symlinks, which are normally created
+ * by udevd, but some scripts might need them before we start
+ * udevd. */
+ dev_setup(NULL, UID_INVALID, GID_INVALID);
+
+ /* Mark the root directory as shared in regards to mount propagation. The kernel defaults to "private", but we
+ * think it makes more sense to have a default of "shared" so that nspawn and the container tools work out of
+ * the box. If specific setups need other settings they can reset the propagation mode to private if
+ * needed. Note that we set this only when we are invoked directly by the kernel. If we are invoked by a
+ * container manager we assume the container manager knows what it is doing (for example, because it set up
+ * some directories with different propagation modes). */
+ if (detect_container() <= 0)
+ if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
+ log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m");
+
+ /* Create a few directories we always want around, Note that sd_booted() checks for /run/systemd/system, so
+ * this mkdir really needs to stay for good, otherwise software that copied sd-daemon.c into their sources will
+ * misdetect systemd. */
+ (void) mkdir_label("/run/systemd", 0755);
+ (void) mkdir_label("/run/systemd/system", 0755);
+
+ /* Also create /run/systemd/inaccessible nodes, so that we always have something to mount inaccessible nodes
+ * from. */
+ (void) make_inaccessible_nodes(NULL, UID_INVALID, GID_INVALID);
+
+ return 0;
+}
diff --git a/src/core/mount-setup.h b/src/core/mount-setup.h
new file mode 100644
index 0000000..b4ca2cf
--- /dev/null
+++ b/src/core/mount-setup.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+int mount_setup_early(void);
+int mount_setup(bool loaded_policy);
+
+int mount_cgroup_controllers(void);
+
+bool mount_point_is_api(const char *path);
+bool mount_point_ignore(const char *path);
diff --git a/src/core/mount.c b/src/core/mount.c
new file mode 100644
index 0000000..c31cad6
--- /dev/null
+++ b/src/core/mount.c
@@ -0,0 +1,2043 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <sys/epoll.h>
+
+#include <libmount.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "dbus-mount.h"
+#include "dbus-unit.h"
+#include "device.h"
+#include "escape.h"
+#include "exit-status.h"
+#include "format-util.h"
+#include "fstab-util.h"
+#include "log.h"
+#include "manager.h"
+#include "mkdir.h"
+#include "mount-setup.h"
+#include "mount.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "special.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit.h"
+
+#define RETRY_UMOUNT_MAX 32
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct libmnt_table*, mnt_free_table);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct libmnt_iter*, mnt_free_iter);
+
+static const UnitActiveState state_translation_table[_MOUNT_STATE_MAX] = {
+ [MOUNT_DEAD] = UNIT_INACTIVE,
+ [MOUNT_MOUNTING] = UNIT_ACTIVATING,
+ [MOUNT_MOUNTING_DONE] = UNIT_ACTIVATING,
+ [MOUNT_MOUNTED] = UNIT_ACTIVE,
+ [MOUNT_REMOUNTING] = UNIT_RELOADING,
+ [MOUNT_UNMOUNTING] = UNIT_DEACTIVATING,
+ [MOUNT_REMOUNTING_SIGTERM] = UNIT_RELOADING,
+ [MOUNT_REMOUNTING_SIGKILL] = UNIT_RELOADING,
+ [MOUNT_UNMOUNTING_SIGTERM] = UNIT_DEACTIVATING,
+ [MOUNT_UNMOUNTING_SIGKILL] = UNIT_DEACTIVATING,
+ [MOUNT_FAILED] = UNIT_FAILED
+};
+
+static int mount_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
+static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+
+static bool MOUNT_STATE_WITH_PROCESS(MountState state) {
+ return IN_SET(state,
+ MOUNT_MOUNTING,
+ MOUNT_MOUNTING_DONE,
+ MOUNT_REMOUNTING,
+ MOUNT_REMOUNTING_SIGTERM,
+ MOUNT_REMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_UNMOUNTING_SIGKILL);
+}
+
+static bool mount_is_network(const MountParameters *p) {
+ assert(p);
+
+ if (fstab_test_option(p->options, "_netdev\0"))
+ return true;
+
+ if (p->fstype && fstype_is_network(p->fstype))
+ return true;
+
+ return false;
+}
+
+static bool mount_is_loop(const MountParameters *p) {
+ assert(p);
+
+ if (fstab_test_option(p->options, "loop\0"))
+ return true;
+
+ return false;
+}
+
+static bool mount_is_bind(const MountParameters *p) {
+ assert(p);
+
+ if (fstab_test_option(p->options, "bind\0" "rbind\0"))
+ return true;
+
+ if (p->fstype && STR_IN_SET(p->fstype, "bind", "rbind"))
+ return true;
+
+ return false;
+}
+
+static bool mount_is_auto(const MountParameters *p) {
+ assert(p);
+
+ return !fstab_test_option(p->options, "noauto\0");
+}
+
+static bool mount_is_automount(const MountParameters *p) {
+ assert(p);
+
+ return fstab_test_option(p->options,
+ "comment=systemd.automount\0"
+ "x-systemd.automount\0");
+}
+
+static bool mount_is_bound_to_device(const Mount *m) {
+ const MountParameters *p;
+
+ if (m->from_fragment)
+ return true;
+
+ p = &m->parameters_proc_self_mountinfo;
+ return fstab_test_option(p->options, "x-systemd.device-bound\0");
+}
+
+static bool mount_needs_quota(const MountParameters *p) {
+ assert(p);
+
+ /* Quotas are not enabled on network filesystems, but we want them, for example, on storage connected via
+ * iscsi. We hence don't use mount_is_network() here, as that would also return true for _netdev devices. */
+ if (p->fstype && fstype_is_network(p->fstype))
+ return false;
+
+ if (mount_is_bind(p))
+ return false;
+
+ return fstab_test_option(p->options,
+ "usrquota\0" "grpquota\0" "quota\0" "usrjquota\0" "grpjquota\0");
+}
+
+static void mount_init(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ m->timeout_usec = u->manager->default_timeout_start_usec;
+
+ m->exec_context.std_output = u->manager->default_std_output;
+ m->exec_context.std_error = u->manager->default_std_error;
+
+ m->directory_mode = 0755;
+
+ /* We need to make sure that /usr/bin/mount is always called
+ * in the same process group as us, so that the autofs kernel
+ * side doesn't send us another mount request while we are
+ * already trying to comply its last one. */
+ m->exec_context.same_pgrp = true;
+
+ m->control_command_id = _MOUNT_EXEC_COMMAND_INVALID;
+
+ u->ignore_on_isolate = true;
+}
+
+static int mount_arm_timer(Mount *m, usec_t usec) {
+ int r;
+
+ assert(m);
+
+ if (m->timer_event_source) {
+ r = sd_event_source_set_time(m->timer_event_source, usec);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(m->timer_event_source, SD_EVENT_ONESHOT);
+ }
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ r = sd_event_add_time(
+ UNIT(m)->manager->event,
+ &m->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec, 0,
+ mount_dispatch_timer, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->timer_event_source, "mount-timer");
+
+ return 0;
+}
+
+static void mount_unwatch_control_pid(Mount *m) {
+ assert(m);
+
+ if (m->control_pid <= 0)
+ return;
+
+ unit_unwatch_pid(UNIT(m), m->control_pid);
+ m->control_pid = 0;
+}
+
+static void mount_parameters_done(MountParameters *p) {
+ assert(p);
+
+ p->what = mfree(p->what);
+ p->options = mfree(p->options);
+ p->fstype = mfree(p->fstype);
+}
+
+static void mount_done(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ m->where = mfree(m->where);
+
+ mount_parameters_done(&m->parameters_proc_self_mountinfo);
+ mount_parameters_done(&m->parameters_fragment);
+
+ m->exec_runtime = exec_runtime_unref(m->exec_runtime, false);
+ exec_command_done_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
+ m->control_command = NULL;
+
+ dynamic_creds_unref(&m->dynamic_creds);
+
+ mount_unwatch_control_pid(m);
+
+ m->timer_event_source = sd_event_source_unref(m->timer_event_source);
+}
+
+_pure_ static MountParameters* get_mount_parameters_fragment(Mount *m) {
+ assert(m);
+
+ if (m->from_fragment)
+ return &m->parameters_fragment;
+
+ return NULL;
+}
+
+_pure_ static MountParameters* get_mount_parameters(Mount *m) {
+ assert(m);
+
+ if (m->from_proc_self_mountinfo)
+ return &m->parameters_proc_self_mountinfo;
+
+ return get_mount_parameters_fragment(m);
+}
+
+static int update_parameters_proc_self_mount_info(
+ Mount *m,
+ const char *what,
+ const char *options,
+ const char *fstype) {
+
+ MountParameters *p;
+ int r, q, w;
+
+ p = &m->parameters_proc_self_mountinfo;
+
+ r = free_and_strdup(&p->what, what);
+ if (r < 0)
+ return r;
+
+ q = free_and_strdup(&p->options, options);
+ if (q < 0)
+ return q;
+
+ w = free_and_strdup(&p->fstype, fstype);
+ if (w < 0)
+ return w;
+
+ return r > 0 || q > 0 || w > 0;
+}
+
+static int mount_add_mount_dependencies(Mount *m) {
+ MountParameters *pm;
+ Unit *other;
+ Iterator i;
+ Set *s;
+ int r;
+
+ assert(m);
+
+ if (!path_equal(m->where, "/")) {
+ _cleanup_free_ char *parent = NULL;
+
+ /* Adds in links to other mount points that might lie further up in the hierarchy */
+
+ parent = dirname_malloc(m->where);
+ if (!parent)
+ return -ENOMEM;
+
+ r = unit_require_mounts_for(UNIT(m), parent, UNIT_DEPENDENCY_IMPLICIT);
+ if (r < 0)
+ return r;
+ }
+
+ /* Adds in dependencies to other mount points that might be needed for the source path (if this is a bind mount
+ * or a loop mount) to be available. */
+ pm = get_mount_parameters_fragment(m);
+ if (pm && pm->what &&
+ path_is_absolute(pm->what) &&
+ (mount_is_bind(pm) || mount_is_loop(pm) || !mount_is_network(pm))) {
+
+ r = unit_require_mounts_for(UNIT(m), pm->what, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ /* Adds in dependencies to other units that use this path or paths further down in the hierarchy */
+ s = manager_get_units_requiring_mounts_for(UNIT(m)->manager, m->where);
+ SET_FOREACH(other, s, i) {
+
+ if (other->load_state != UNIT_LOADED)
+ continue;
+
+ if (other == UNIT(m))
+ continue;
+
+ r = unit_add_dependency(other, UNIT_AFTER, UNIT(m), true, UNIT_DEPENDENCY_PATH);
+ if (r < 0)
+ return r;
+
+ if (UNIT(m)->fragment_path) {
+ /* If we have fragment configuration, then make this dependency required */
+ r = unit_add_dependency(other, UNIT_REQUIRES, UNIT(m), true, UNIT_DEPENDENCY_PATH);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int mount_add_device_dependencies(Mount *m) {
+ bool device_wants_mount;
+ UnitDependencyMask mask;
+ MountParameters *p;
+ UnitDependency dep;
+ int r;
+
+ assert(m);
+
+ p = get_mount_parameters(m);
+ if (!p)
+ return 0;
+
+ if (!p->what)
+ return 0;
+
+ if (mount_is_bind(p))
+ return 0;
+
+ if (!is_device_path(p->what))
+ return 0;
+
+ /* /dev/root is a really weird thing, it's not a real device,
+ * but just a path the kernel exports for the root file system
+ * specified on the kernel command line. Ignore it here. */
+ if (path_equal(p->what, "/dev/root"))
+ return 0;
+
+ if (path_equal(m->where, "/"))
+ return 0;
+
+ device_wants_mount =
+ mount_is_auto(p) && !mount_is_automount(p) && MANAGER_IS_SYSTEM(UNIT(m)->manager);
+
+ /* Mount units from /proc/self/mountinfo are not bound to devices
+ * by default since they're subject to races when devices are
+ * unplugged. But the user can still force this dep with an
+ * appropriate option (or udev property) so the mount units are
+ * automatically stopped when the device disappears suddenly. */
+ dep = mount_is_bound_to_device(m) ? UNIT_BINDS_TO : UNIT_REQUIRES;
+
+ /* We always use 'what' from /proc/self/mountinfo if mounted */
+ mask = m->from_proc_self_mountinfo ? UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT : UNIT_DEPENDENCY_FILE;
+
+ r = unit_add_node_dependency(UNIT(m), p->what, device_wants_mount, dep, mask);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int mount_add_quota_dependencies(Mount *m) {
+ UnitDependencyMask mask;
+ MountParameters *p;
+ int r;
+
+ assert(m);
+
+ if (!MANAGER_IS_SYSTEM(UNIT(m)->manager))
+ return 0;
+
+ p = get_mount_parameters_fragment(m);
+ if (!p)
+ return 0;
+
+ if (!mount_needs_quota(p))
+ return 0;
+
+ mask = m->from_fragment ? UNIT_DEPENDENCY_FILE : UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT;
+
+ r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTACHECK_SERVICE, true, mask);
+ if (r < 0)
+ return r;
+
+ r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTAON_SERVICE, true, mask);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static bool mount_is_extrinsic(Mount *m) {
+ MountParameters *p;
+ assert(m);
+
+ /* Returns true for all units that are "magic" and should be excluded from the usual start-up and shutdown
+ * dependencies. We call them "extrinsic" here, as they are generally mounted outside of the systemd dependency
+ * logic. We shouldn't attempt to manage them ourselves but it's fine if the user operates on them with us. */
+
+ if (!MANAGER_IS_SYSTEM(UNIT(m)->manager)) /* We only automatically manage mounts if we are in system mode */
+ return true;
+
+ if (PATH_IN_SET(m->where, /* Don't bother with the OS data itself */
+ "/",
+ "/usr"))
+ return true;
+
+ if (PATH_STARTSWITH_SET(m->where,
+ "/run/initramfs", /* This should stay around from before we boot until after we shutdown */
+ "/proc", /* All of this is API VFS */
+ "/sys", /* … dito … */
+ "/dev")) /* … dito … */
+ return true;
+
+ /* If this is an initrd mount, and we are not in the initrd, then leave this around forever, too. */
+ p = get_mount_parameters(m);
+ if (p && fstab_test_option(p->options, "x-initrd.mount\0") && !in_initrd())
+ return true;
+
+ return false;
+}
+
+static int mount_add_default_dependencies(Mount *m) {
+ const char *after, *before;
+ UnitDependencyMask mask;
+ MountParameters *p;
+ bool nofail;
+ int r;
+
+ assert(m);
+
+ if (!UNIT(m)->default_dependencies)
+ return 0;
+
+ /* We do not add any default dependencies to /, /usr or /run/initramfs/, since they are guaranteed to stay
+ * mounted the whole time, since our system is on it. Also, don't bother with anything mounted below virtual
+ * file systems, it's also going to be virtual, and hence not worth the effort. */
+ if (mount_is_extrinsic(m))
+ return 0;
+
+ p = get_mount_parameters(m);
+ if (!p)
+ return 0;
+
+ mask = m->from_fragment ? UNIT_DEPENDENCY_FILE : UNIT_DEPENDENCY_MOUNTINFO_DEFAULT;
+ nofail = m->from_fragment ? fstab_test_yes_no_option(m->parameters_fragment.options, "nofail\0" "fail\0") : false;
+
+ if (mount_is_network(p)) {
+ /* We order ourselves after network.target. This is
+ * primarily useful at shutdown: services that take
+ * down the network should order themselves before
+ * network.target, so that they are shut down only
+ * after this mount unit is stopped. */
+
+ r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, SPECIAL_NETWORK_TARGET, true, mask);
+ if (r < 0)
+ return r;
+
+ /* We pull in network-online.target, and order
+ * ourselves after it. This is useful at start-up to
+ * actively pull in tools that want to be started
+ * before we start mounting network file systems, and
+ * whose purpose it is to delay this until the network
+ * is "up". */
+
+ r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_WANTS, UNIT_AFTER, SPECIAL_NETWORK_ONLINE_TARGET, true, mask);
+ if (r < 0)
+ return r;
+
+ after = SPECIAL_REMOTE_FS_PRE_TARGET;
+ before = SPECIAL_REMOTE_FS_TARGET;
+ } else {
+ after = SPECIAL_LOCAL_FS_PRE_TARGET;
+ before = SPECIAL_LOCAL_FS_TARGET;
+ }
+
+ if (!nofail) {
+ r = unit_add_dependency_by_name(UNIT(m), UNIT_BEFORE, before, true, mask);
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, after, true, mask);
+ if (r < 0)
+ return r;
+
+ r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, true, mask);
+ if (r < 0)
+ return r;
+
+ /* If this is a tmpfs mount then we have to unmount it before we try to deactivate swaps */
+ if (streq_ptr(p->fstype, "tmpfs")) {
+ r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, SPECIAL_SWAP_TARGET, true, mask);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int mount_verify(Mount *m) {
+ _cleanup_free_ char *e = NULL;
+ MountParameters *p;
+ int r;
+
+ assert(m);
+
+ if (UNIT(m)->load_state != UNIT_LOADED)
+ return 0;
+
+ if (!m->from_fragment && !m->from_proc_self_mountinfo && !UNIT(m)->perpetual)
+ return -ENOENT;
+
+ r = unit_name_from_path(m->where, ".mount", &e);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(m), r, "Failed to generate unit name from mount path: %m");
+
+ if (!unit_has_name(UNIT(m), e)) {
+ log_unit_error(UNIT(m), "Where= setting doesn't match unit name. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (mount_point_is_api(m->where) || mount_point_ignore(m->where)) {
+ log_unit_error(UNIT(m), "Cannot create mount unit for API file system %s. Refusing.", m->where);
+ return -ENOEXEC;
+ }
+
+ p = get_mount_parameters_fragment(m);
+ if (p && !p->what) {
+ log_unit_error(UNIT(m), "What= setting is missing. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (m->exec_context.pam_name && m->kill_context.kill_mode != KILL_CONTROL_GROUP) {
+ log_unit_error(UNIT(m), "Unit has PAM enabled. Kill mode must be set to control-group'. Refusing.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int mount_add_extras(Mount *m) {
+ Unit *u = UNIT(m);
+ int r;
+
+ assert(m);
+
+ /* Note: this call might be called after we already have been loaded once (and even when it has already been
+ * activated), in case data from /proc/self/mountinfo has changed. This means all code here needs to be ready
+ * to run with an already set up unit. */
+
+ if (u->fragment_path)
+ m->from_fragment = true;
+
+ if (!m->where) {
+ r = unit_name_to_path(u->id, &m->where);
+ if (r < 0)
+ return r;
+ }
+
+ path_simplify(m->where, false);
+
+ if (!u->description) {
+ r = unit_set_description(u, m->where);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_add_device_dependencies(m);
+ if (r < 0)
+ return r;
+
+ r = mount_add_mount_dependencies(m);
+ if (r < 0)
+ return r;
+
+ r = mount_add_quota_dependencies(m);
+ if (r < 0)
+ return r;
+
+ r = unit_patch_contexts(u);
+ if (r < 0)
+ return r;
+
+ r = unit_add_exec_dependencies(u, &m->exec_context);
+ if (r < 0)
+ return r;
+
+ r = unit_set_default_slice(u);
+ if (r < 0)
+ return r;
+
+ r = mount_add_default_dependencies(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int mount_load_root_mount(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_ROOT_MOUNT))
+ return 0;
+
+ u->perpetual = true;
+ u->default_dependencies = false;
+
+ /* The stdio/kmsg bridge socket is on /, in order to avoid a dep loop, don't use kmsg logging for -.mount */
+ MOUNT(u)->exec_context.std_output = EXEC_OUTPUT_NULL;
+ MOUNT(u)->exec_context.std_input = EXEC_INPUT_NULL;
+
+ if (!u->description)
+ u->description = strdup("Root Mount");
+
+ return 1;
+}
+
+static int mount_load(Unit *u) {
+ Mount *m = MOUNT(u);
+ int r, q, w;
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ r = mount_load_root_mount(u);
+
+ if (m->from_proc_self_mountinfo || u->perpetual)
+ q = unit_load_fragment_and_dropin_optional(u);
+ else
+ q = unit_load_fragment_and_dropin(u);
+
+ /* Add in some extras. Note we do this in all cases (even if we failed to load the unit) when announced by the
+ * kernel, because we need some things to be set up no matter what when the kernel establishes a mount and thus
+ * we need to update the state in our unit to track it. After all, consider that we don't allow changing the
+ * 'slice' field for a unit once it is active. */
+ if (u->load_state == UNIT_LOADED || m->from_proc_self_mountinfo || u->perpetual)
+ w = mount_add_extras(m);
+ else
+ w = 0;
+
+ if (r < 0)
+ return r;
+ if (q < 0)
+ return q;
+ if (w < 0)
+ return w;
+
+ return mount_verify(m);
+}
+
+static void mount_set_state(Mount *m, MountState state) {
+ MountState old_state;
+ assert(m);
+
+ if (m->state != state)
+ bus_unit_send_pending_change_signal(UNIT(m), false);
+
+ old_state = m->state;
+ m->state = state;
+
+ if (!MOUNT_STATE_WITH_PROCESS(state)) {
+ m->timer_event_source = sd_event_source_unref(m->timer_event_source);
+ mount_unwatch_control_pid(m);
+ m->control_command = NULL;
+ m->control_command_id = _MOUNT_EXEC_COMMAND_INVALID;
+ }
+
+ if (state != old_state)
+ log_unit_debug(UNIT(m), "Changed %s -> %s", mount_state_to_string(old_state), mount_state_to_string(state));
+
+ unit_notify(UNIT(m), state_translation_table[old_state], state_translation_table[state],
+ m->reload_result == MOUNT_SUCCESS ? 0 : UNIT_NOTIFY_RELOAD_FAILURE);
+}
+
+static int mount_coldplug(Unit *u) {
+ Mount *m = MOUNT(u);
+ MountState new_state = MOUNT_DEAD;
+ int r;
+
+ assert(m);
+ assert(m->state == MOUNT_DEAD);
+
+ if (m->deserialized_state != m->state)
+ new_state = m->deserialized_state;
+ else if (m->from_proc_self_mountinfo)
+ new_state = MOUNT_MOUNTED;
+
+ if (new_state == m->state)
+ return 0;
+
+ if (m->control_pid > 0 &&
+ pid_is_unwaited(m->control_pid) &&
+ MOUNT_STATE_WITH_PROCESS(new_state)) {
+
+ r = unit_watch_pid(UNIT(m), m->control_pid);
+ if (r < 0)
+ return r;
+
+ r = mount_arm_timer(m, usec_add(u->state_change_timestamp.monotonic, m->timeout_usec));
+ if (r < 0)
+ return r;
+ }
+
+ if (!IN_SET(new_state, MOUNT_DEAD, MOUNT_FAILED)) {
+ (void) unit_setup_dynamic_creds(u);
+ (void) unit_setup_exec_runtime(u);
+ }
+
+ mount_set_state(m, new_state);
+ return 0;
+}
+
+static void mount_dump(Unit *u, FILE *f, const char *prefix) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ Mount *m = MOUNT(u);
+ MountParameters *p;
+
+ assert(m);
+ assert(f);
+
+ p = get_mount_parameters(m);
+
+ fprintf(f,
+ "%sMount State: %s\n"
+ "%sResult: %s\n"
+ "%sWhere: %s\n"
+ "%sWhat: %s\n"
+ "%sFile System Type: %s\n"
+ "%sOptions: %s\n"
+ "%sFrom /proc/self/mountinfo: %s\n"
+ "%sFrom fragment: %s\n"
+ "%sExtrinsic: %s\n"
+ "%sDirectoryMode: %04o\n"
+ "%sSloppyOptions: %s\n"
+ "%sLazyUnmount: %s\n"
+ "%sForceUnmount: %s\n"
+ "%sTimeoutSec: %s\n",
+ prefix, mount_state_to_string(m->state),
+ prefix, mount_result_to_string(m->result),
+ prefix, m->where,
+ prefix, p ? strna(p->what) : "n/a",
+ prefix, p ? strna(p->fstype) : "n/a",
+ prefix, p ? strna(p->options) : "n/a",
+ prefix, yes_no(m->from_proc_self_mountinfo),
+ prefix, yes_no(m->from_fragment),
+ prefix, yes_no(mount_is_extrinsic(m)),
+ prefix, m->directory_mode,
+ prefix, yes_no(m->sloppy_options),
+ prefix, yes_no(m->lazy_unmount),
+ prefix, yes_no(m->force_unmount),
+ prefix, format_timespan(buf, sizeof(buf), m->timeout_usec, USEC_PER_SEC));
+
+ if (m->control_pid > 0)
+ fprintf(f,
+ "%sControl PID: "PID_FMT"\n",
+ prefix, m->control_pid);
+
+ exec_context_dump(&m->exec_context, f, prefix);
+ kill_context_dump(&m->kill_context, f, prefix);
+ cgroup_context_dump(&m->cgroup_context, f, prefix);
+}
+
+static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
+
+ _cleanup_(exec_params_clear) ExecParameters exec_params = {
+ .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
+ .exec_fd = -1,
+ };
+ pid_t pid;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(_pid);
+
+ r = unit_prepare_exec(UNIT(m));
+ if (r < 0)
+ return r;
+
+ r = mount_arm_timer(m, usec_add(now(CLOCK_MONOTONIC), m->timeout_usec));
+ if (r < 0)
+ return r;
+
+ r = unit_set_exec_params(UNIT(m), &exec_params);
+ if (r < 0)
+ return r;
+
+ r = exec_spawn(UNIT(m),
+ c,
+ &m->exec_context,
+ &exec_params,
+ m->exec_runtime,
+ &m->dynamic_creds,
+ &pid);
+ if (r < 0)
+ return r;
+
+ r = unit_watch_pid(UNIT(m), pid);
+ if (r < 0)
+ /* FIXME: we need to do something here */
+ return r;
+
+ *_pid = pid;
+
+ return 0;
+}
+
+static void mount_enter_dead(Mount *m, MountResult f) {
+ assert(m);
+
+ if (m->result == MOUNT_SUCCESS)
+ m->result = f;
+
+ unit_log_result(UNIT(m), m->result == MOUNT_SUCCESS, mount_result_to_string(m->result));
+ mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD);
+
+ m->exec_runtime = exec_runtime_unref(m->exec_runtime, true);
+
+ exec_context_destroy_runtime_directory(&m->exec_context, UNIT(m)->manager->prefix[EXEC_DIRECTORY_RUNTIME]);
+
+ unit_unref_uid_gid(UNIT(m), true);
+
+ dynamic_creds_destroy(&m->dynamic_creds);
+
+ /* Any dependencies based on /proc/self/mountinfo are now stale */
+ unit_remove_dependencies(UNIT(m), UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT);
+}
+
+static void mount_enter_mounted(Mount *m, MountResult f) {
+ assert(m);
+
+ if (m->result == MOUNT_SUCCESS)
+ m->result = f;
+
+ mount_set_state(m, MOUNT_MOUNTED);
+}
+
+static void mount_enter_dead_or_mounted(Mount *m, MountResult f) {
+ assert(m);
+
+ /* Enter DEAD or MOUNTED state, depending on what the kernel currently says about the mount point. We use this
+ * whenever we executed an operation, so that our internal state reflects what the kernel says again, after all
+ * ultimately we just mirror the kernel's internal state on this. */
+
+ if (m->from_proc_self_mountinfo)
+ mount_enter_mounted(m, f);
+ else
+ mount_enter_dead(m, f);
+}
+
+static int state_to_kill_operation(MountState state) {
+ switch (state) {
+
+ case MOUNT_REMOUNTING_SIGTERM:
+ case MOUNT_UNMOUNTING_SIGTERM:
+ return KILL_TERMINATE;
+
+ case MOUNT_REMOUNTING_SIGKILL:
+ case MOUNT_UNMOUNTING_SIGKILL:
+ return KILL_KILL;
+
+ default:
+ return _KILL_OPERATION_INVALID;
+ }
+}
+
+static void mount_enter_signal(Mount *m, MountState state, MountResult f) {
+ int r;
+
+ assert(m);
+
+ if (m->result == MOUNT_SUCCESS)
+ m->result = f;
+
+ r = unit_kill_context(
+ UNIT(m),
+ &m->kill_context,
+ state_to_kill_operation(state),
+ -1,
+ m->control_pid,
+ false);
+ if (r < 0)
+ goto fail;
+
+ if (r > 0) {
+ r = mount_arm_timer(m, usec_add(now(CLOCK_MONOTONIC), m->timeout_usec));
+ if (r < 0)
+ goto fail;
+
+ mount_set_state(m, state);
+ } else if (state == MOUNT_REMOUNTING_SIGTERM && m->kill_context.send_sigkill)
+ mount_enter_signal(m, MOUNT_REMOUNTING_SIGKILL, MOUNT_SUCCESS);
+ else if (IN_SET(state, MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL))
+ mount_enter_mounted(m, MOUNT_SUCCESS);
+ else if (state == MOUNT_UNMOUNTING_SIGTERM && m->kill_context.send_sigkill)
+ mount_enter_signal(m, MOUNT_UNMOUNTING_SIGKILL, MOUNT_SUCCESS);
+ else
+ mount_enter_dead_or_mounted(m, MOUNT_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(m), r, "Failed to kill processes: %m");
+ mount_enter_dead_or_mounted(m, MOUNT_FAILURE_RESOURCES);
+}
+
+static void mount_enter_unmounting(Mount *m) {
+ int r;
+
+ assert(m);
+
+ /* Start counting our attempts */
+ if (!IN_SET(m->state,
+ MOUNT_UNMOUNTING,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_UNMOUNTING_SIGKILL))
+ m->n_retry_umount = 0;
+
+ m->control_command_id = MOUNT_EXEC_UNMOUNT;
+ m->control_command = m->exec_command + MOUNT_EXEC_UNMOUNT;
+
+ r = exec_command_set(m->control_command, UMOUNT_PATH, m->where, "-c", NULL);
+ if (r >= 0 && m->lazy_unmount)
+ r = exec_command_append(m->control_command, "-l", NULL);
+ if (r >= 0 && m->force_unmount)
+ r = exec_command_append(m->control_command, "-f", NULL);
+ if (r < 0)
+ goto fail;
+
+ mount_unwatch_control_pid(m);
+
+ r = mount_spawn(m, m->control_command, &m->control_pid);
+ if (r < 0)
+ goto fail;
+
+ mount_set_state(m, MOUNT_UNMOUNTING);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(m), r, "Failed to run 'umount' task: %m");
+ mount_enter_dead_or_mounted(m, MOUNT_FAILURE_RESOURCES);
+}
+
+static void mount_enter_mounting(Mount *m) {
+ int r;
+ MountParameters *p;
+
+ assert(m);
+
+ r = unit_fail_if_noncanonical(UNIT(m), m->where);
+ if (r < 0)
+ goto fail;
+
+ (void) mkdir_p_label(m->where, m->directory_mode);
+
+ unit_warn_if_dir_nonempty(UNIT(m), m->where);
+ unit_warn_leftover_processes(UNIT(m));
+
+ m->control_command_id = MOUNT_EXEC_MOUNT;
+ m->control_command = m->exec_command + MOUNT_EXEC_MOUNT;
+
+ /* Create the source directory for bind-mounts if needed */
+ p = get_mount_parameters_fragment(m);
+ if (p && mount_is_bind(p))
+ (void) mkdir_p_label(p->what, m->directory_mode);
+
+ if (p) {
+ _cleanup_free_ char *opts = NULL;
+
+ r = fstab_filter_options(p->options, "nofail\0" "noauto\0" "auto\0", NULL, NULL, &opts);
+ if (r < 0)
+ goto fail;
+
+ r = exec_command_set(m->control_command, MOUNT_PATH, p->what, m->where, NULL);
+ if (r >= 0 && m->sloppy_options)
+ r = exec_command_append(m->control_command, "-s", NULL);
+ if (r >= 0 && p->fstype)
+ r = exec_command_append(m->control_command, "-t", p->fstype, NULL);
+ if (r >= 0 && !isempty(opts))
+ r = exec_command_append(m->control_command, "-o", opts, NULL);
+ } else
+ r = -ENOENT;
+ if (r < 0)
+ goto fail;
+
+ mount_unwatch_control_pid(m);
+
+ r = mount_spawn(m, m->control_command, &m->control_pid);
+ if (r < 0)
+ goto fail;
+
+ mount_set_state(m, MOUNT_MOUNTING);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(m), r, "Failed to run 'mount' task: %m");
+ mount_enter_dead_or_mounted(m, MOUNT_FAILURE_RESOURCES);
+}
+
+static void mount_set_reload_result(Mount *m, MountResult result) {
+ assert(m);
+
+ /* Only store the first error we encounter */
+ if (m->reload_result != MOUNT_SUCCESS)
+ return;
+
+ m->reload_result = result;
+}
+
+static void mount_enter_remounting(Mount *m) {
+ int r;
+ MountParameters *p;
+
+ assert(m);
+
+ /* Reset reload result when we are about to start a new remount operation */
+ m->reload_result = MOUNT_SUCCESS;
+
+ m->control_command_id = MOUNT_EXEC_REMOUNT;
+ m->control_command = m->exec_command + MOUNT_EXEC_REMOUNT;
+
+ p = get_mount_parameters_fragment(m);
+ if (p) {
+ const char *o;
+
+ if (p->options)
+ o = strjoina("remount,", p->options);
+ else
+ o = "remount";
+
+ r = exec_command_set(m->control_command, MOUNT_PATH,
+ p->what, m->where,
+ "-o", o, NULL);
+ if (r >= 0 && m->sloppy_options)
+ r = exec_command_append(m->control_command, "-s", NULL);
+ if (r >= 0 && p->fstype)
+ r = exec_command_append(m->control_command, "-t", p->fstype, NULL);
+ } else
+ r = -ENOENT;
+ if (r < 0)
+ goto fail;
+
+ mount_unwatch_control_pid(m);
+
+ r = mount_spawn(m, m->control_command, &m->control_pid);
+ if (r < 0)
+ goto fail;
+
+ mount_set_state(m, MOUNT_REMOUNTING);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(m), r, "Failed to run 'remount' task: %m");
+ mount_set_reload_result(m, MOUNT_FAILURE_RESOURCES);
+ mount_enter_dead_or_mounted(m, MOUNT_SUCCESS);
+}
+
+static void mount_cycle_clear(Mount *m) {
+ assert(m);
+
+ /* Clear all state we shall forget for this new cycle */
+
+ m->result = MOUNT_SUCCESS;
+ m->reload_result = MOUNT_SUCCESS;
+ exec_command_reset_status_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
+ UNIT(m)->reset_accounting = true;
+}
+
+static int mount_start(Unit *u) {
+ Mount *m = MOUNT(u);
+ int r;
+
+ assert(m);
+
+ /* We cannot fulfill this request right now, try again later
+ * please! */
+ if (IN_SET(m->state,
+ MOUNT_UNMOUNTING,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_UNMOUNTING_SIGKILL))
+ return -EAGAIN;
+
+ /* Already on it! */
+ if (m->state == MOUNT_MOUNTING)
+ return 0;
+
+ assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED));
+
+ r = unit_start_limit_test(u);
+ if (r < 0) {
+ mount_enter_dead(m, MOUNT_FAILURE_START_LIMIT_HIT);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ mount_cycle_clear(m);
+ mount_enter_mounting(m);
+
+ return 1;
+}
+
+static int mount_stop(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ switch (m->state) {
+
+ case MOUNT_UNMOUNTING:
+ case MOUNT_UNMOUNTING_SIGKILL:
+ case MOUNT_UNMOUNTING_SIGTERM:
+ /* Already on it */
+ return 0;
+
+ case MOUNT_MOUNTING:
+ case MOUNT_MOUNTING_DONE:
+ case MOUNT_REMOUNTING:
+ /* If we are still waiting for /bin/mount, we go directly into kill mode. */
+ mount_enter_signal(m, MOUNT_UNMOUNTING_SIGTERM, MOUNT_SUCCESS);
+ return 0;
+
+ case MOUNT_REMOUNTING_SIGTERM:
+ /* If we are already waiting for a hung remount, convert this to the matching unmounting state */
+ mount_set_state(m, MOUNT_UNMOUNTING_SIGTERM);
+ return 0;
+
+ case MOUNT_REMOUNTING_SIGKILL:
+ /* as above */
+ mount_set_state(m, MOUNT_UNMOUNTING_SIGKILL);
+ return 0;
+
+ case MOUNT_MOUNTED:
+ mount_enter_unmounting(m);
+ return 1;
+
+ default:
+ assert_not_reached("Unexpected state.");
+ }
+}
+
+static int mount_reload(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+ assert(m->state == MOUNT_MOUNTED);
+
+ mount_enter_remounting(m);
+
+ return 1;
+}
+
+static int mount_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", mount_state_to_string(m->state));
+ (void) serialize_item(f, "result", mount_result_to_string(m->result));
+ (void) serialize_item(f, "reload-result", mount_result_to_string(m->reload_result));
+ (void) serialize_item_format(f, "n-retry-umount", "%u", m->n_retry_umount);
+
+ if (m->control_pid > 0)
+ (void) serialize_item_format(f, "control-pid", PID_FMT, m->control_pid);
+
+ if (m->control_command_id >= 0)
+ (void) serialize_item(f, "control-command", mount_exec_command_to_string(m->control_command_id));
+
+ return 0;
+}
+
+static int mount_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Mount *m = MOUNT(u);
+ int r;
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ MountState state;
+
+ if ((state = mount_state_from_string(value)) < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ m->deserialized_state = state;
+
+ } else if (streq(key, "result")) {
+ MountResult f;
+
+ f = mount_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != MOUNT_SUCCESS)
+ m->result = f;
+
+ } else if (streq(key, "reload-result")) {
+ MountResult f;
+
+ f = mount_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse reload result value: %s", value);
+ else if (f != MOUNT_SUCCESS)
+ m->reload_result = f;
+
+ } else if (streq(key, "n-retry-umount")) {
+
+ r = safe_atou(value, &m->n_retry_umount);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse n-retry-umount value: %s", value);
+
+ } else if (streq(key, "control-pid")) {
+
+ if (parse_pid(value, &m->control_pid) < 0)
+ log_unit_debug(u, "Failed to parse control-pid value: %s", value);
+
+ } else if (streq(key, "control-command")) {
+ MountExecCommand id;
+
+ id = mount_exec_command_from_string(value);
+ if (id < 0)
+ log_unit_debug(u, "Failed to parse exec-command value: %s", value);
+ else {
+ m->control_command_id = id;
+ m->control_command = m->exec_command + id;
+ }
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState mount_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[MOUNT(u)->state];
+}
+
+_pure_ static const char *mount_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return mount_state_to_string(MOUNT(u)->state);
+}
+
+_pure_ static bool mount_may_gc(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ if (m->from_proc_self_mountinfo)
+ return false;
+
+ return true;
+}
+
+static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+ Mount *m = MOUNT(u);
+ MountResult f;
+
+ assert(m);
+ assert(pid >= 0);
+
+ if (pid != m->control_pid)
+ return;
+
+ m->control_pid = 0;
+
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
+ f = MOUNT_SUCCESS;
+ else if (code == CLD_EXITED)
+ f = MOUNT_FAILURE_EXIT_CODE;
+ else if (code == CLD_KILLED)
+ f = MOUNT_FAILURE_SIGNAL;
+ else if (code == CLD_DUMPED)
+ f = MOUNT_FAILURE_CORE_DUMP;
+ else
+ assert_not_reached("Unknown code");
+
+ if (IN_SET(m->state, MOUNT_REMOUNTING, MOUNT_REMOUNTING_SIGKILL, MOUNT_REMOUNTING_SIGTERM))
+ mount_set_reload_result(m, f);
+ else if (m->result == MOUNT_SUCCESS)
+ m->result = f;
+
+ if (m->control_command) {
+ exec_status_exit(&m->control_command->exec_status, &m->exec_context, pid, code, status);
+
+ m->control_command = NULL;
+ m->control_command_id = _MOUNT_EXEC_COMMAND_INVALID;
+ }
+
+ unit_log_process_exit(
+ u, f == MOUNT_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+ "Mount process",
+ mount_exec_command_to_string(m->control_command_id),
+ code, status);
+
+ /* Note that due to the io event priority logic, we can be sure the new mountinfo is loaded
+ * before we process the SIGCHLD for the mount command. */
+
+ switch (m->state) {
+
+ case MOUNT_MOUNTING:
+ /* Our mount point has not appeared in mountinfo. Something went wrong. */
+
+ if (f == MOUNT_SUCCESS) {
+ /* Either /bin/mount has an unexpected definition of success,
+ * or someone raced us and we lost. */
+ log_unit_warning(UNIT(m), "Mount process finished, but there is no mount.");
+ f = MOUNT_FAILURE_PROTOCOL;
+ }
+ mount_enter_dead(m, f);
+ break;
+
+ case MOUNT_MOUNTING_DONE:
+ mount_enter_mounted(m, f);
+ break;
+
+ case MOUNT_REMOUNTING:
+ case MOUNT_REMOUNTING_SIGTERM:
+ case MOUNT_REMOUNTING_SIGKILL:
+ mount_enter_dead_or_mounted(m, MOUNT_SUCCESS);
+ break;
+
+ case MOUNT_UNMOUNTING:
+
+ if (f == MOUNT_SUCCESS && m->from_proc_self_mountinfo) {
+
+ /* Still a mount point? If so, let's try again. Most likely there were multiple mount points
+ * stacked on top of each other. We might exceed the timeout specified by the user overall,
+ * but we will stop as soon as any one umount times out. */
+
+ if (m->n_retry_umount < RETRY_UMOUNT_MAX) {
+ log_unit_debug(u, "Mount still present, trying again.");
+ m->n_retry_umount++;
+ mount_enter_unmounting(m);
+ } else {
+ log_unit_warning(u, "Mount still present after %u attempts to unmount, giving up.", m->n_retry_umount);
+ mount_enter_mounted(m, f);
+ }
+ } else
+ mount_enter_dead_or_mounted(m, f);
+
+ break;
+
+ case MOUNT_UNMOUNTING_SIGKILL:
+ case MOUNT_UNMOUNTING_SIGTERM:
+ mount_enter_dead_or_mounted(m, f);
+ break;
+
+ default:
+ assert_not_reached("Uh, control process died at wrong time.");
+ }
+
+ /* Notify clients about changed exit status */
+ unit_add_to_dbus_queue(u);
+}
+
+static int mount_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
+ Mount *m = MOUNT(userdata);
+
+ assert(m);
+ assert(m->timer_event_source == source);
+
+ switch (m->state) {
+
+ case MOUNT_MOUNTING:
+ case MOUNT_MOUNTING_DONE:
+ log_unit_warning(UNIT(m), "Mounting timed out. Terminating.");
+ mount_enter_signal(m, MOUNT_UNMOUNTING_SIGTERM, MOUNT_FAILURE_TIMEOUT);
+ break;
+
+ case MOUNT_REMOUNTING:
+ log_unit_warning(UNIT(m), "Remounting timed out. Terminating remount process.");
+ mount_set_reload_result(m, MOUNT_FAILURE_TIMEOUT);
+ mount_enter_signal(m, MOUNT_REMOUNTING_SIGTERM, MOUNT_SUCCESS);
+ break;
+
+ case MOUNT_REMOUNTING_SIGTERM:
+ mount_set_reload_result(m, MOUNT_FAILURE_TIMEOUT);
+
+ if (m->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(m), "Remounting timed out. Killing.");
+ mount_enter_signal(m, MOUNT_REMOUNTING_SIGKILL, MOUNT_SUCCESS);
+ } else {
+ log_unit_warning(UNIT(m), "Remounting timed out. Skipping SIGKILL. Ignoring.");
+ mount_enter_dead_or_mounted(m, MOUNT_SUCCESS);
+ }
+ break;
+
+ case MOUNT_REMOUNTING_SIGKILL:
+ mount_set_reload_result(m, MOUNT_FAILURE_TIMEOUT);
+
+ log_unit_warning(UNIT(m), "Mount process still around after SIGKILL. Ignoring.");
+ mount_enter_dead_or_mounted(m, MOUNT_SUCCESS);
+ break;
+
+ case MOUNT_UNMOUNTING:
+ log_unit_warning(UNIT(m), "Unmounting timed out. Terminating.");
+ mount_enter_signal(m, MOUNT_UNMOUNTING_SIGTERM, MOUNT_FAILURE_TIMEOUT);
+ break;
+
+ case MOUNT_UNMOUNTING_SIGTERM:
+ if (m->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(m), "Mount process timed out. Killing.");
+ mount_enter_signal(m, MOUNT_UNMOUNTING_SIGKILL, MOUNT_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(m), "Mount process timed out. Skipping SIGKILL. Ignoring.");
+ mount_enter_dead_or_mounted(m, MOUNT_FAILURE_TIMEOUT);
+ }
+ break;
+
+ case MOUNT_UNMOUNTING_SIGKILL:
+ log_unit_warning(UNIT(m), "Mount process still around after SIGKILL. Ignoring.");
+ mount_enter_dead_or_mounted(m, MOUNT_FAILURE_TIMEOUT);
+ break;
+
+ default:
+ assert_not_reached("Timeout at wrong time.");
+ }
+
+ return 0;
+}
+
+static int mount_setup_new_unit(
+ Manager *m,
+ const char *name,
+ const char *what,
+ const char *where,
+ const char *options,
+ const char *fstype,
+ MountProcFlags *ret_flags,
+ Unit **ret) {
+
+ _cleanup_(unit_freep) Unit *u = NULL;
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(ret_flags);
+ assert(ret);
+
+ r = unit_new_for_name(m, sizeof(Mount), name, &u);
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&u->source_path, "/proc/self/mountinfo");
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&MOUNT(u)->where, where);
+ if (r < 0)
+ return r;
+
+ r = update_parameters_proc_self_mount_info(MOUNT(u), what, options, fstype);
+ if (r < 0)
+ return r;
+
+ /* This unit was generated because /proc/self/mountinfo reported it. Remember this, so that by the time we load
+ * the unit file for it (and thus add in extra deps right after) we know what source to attributes the deps
+ * to.*/
+ MOUNT(u)->from_proc_self_mountinfo = true;
+
+ /* We have only allocated the stub now, let's enqueue this unit for loading now, so that everything else is
+ * loaded in now. */
+ unit_add_to_load_queue(u);
+
+ *ret_flags = MOUNT_PROC_IS_MOUNTED | MOUNT_PROC_JUST_MOUNTED | MOUNT_PROC_JUST_CHANGED;
+ *ret = TAKE_PTR(u);
+ return 0;
+}
+
+static int mount_setup_existing_unit(
+ Unit *u,
+ const char *what,
+ const char *where,
+ const char *options,
+ const char *fstype,
+ MountProcFlags *ret_flags) {
+
+ MountProcFlags flags = MOUNT_PROC_IS_MOUNTED;
+ int r;
+
+ assert(u);
+ assert(flags);
+
+ if (!MOUNT(u)->where) {
+ MOUNT(u)->where = strdup(where);
+ if (!MOUNT(u)->where)
+ return -ENOMEM;
+ }
+
+ r = update_parameters_proc_self_mount_info(MOUNT(u), what, options, fstype);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ flags |= MOUNT_PROC_JUST_CHANGED;
+
+ if (!MOUNT(u)->from_proc_self_mountinfo || FLAGS_SET(MOUNT(u)->proc_flags, MOUNT_PROC_JUST_MOUNTED))
+ flags |= MOUNT_PROC_JUST_MOUNTED;
+
+ MOUNT(u)->from_proc_self_mountinfo = true;
+
+ if (IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_BAD_SETTING, UNIT_ERROR)) {
+ /* The unit was previously not found or otherwise not loaded. Now that the unit shows up in
+ * /proc/self/mountinfo we should reconsider it this, hence set it to UNIT_LOADED. */
+ u->load_state = UNIT_LOADED;
+ u->load_error = 0;
+
+ flags |= MOUNT_PROC_JUST_CHANGED;
+ }
+
+ if (FLAGS_SET(flags, MOUNT_PROC_JUST_CHANGED)) {
+ /* If things changed, then make sure that all deps are regenerated. Let's
+ * first remove all automatic deps, and then add in the new ones. */
+
+ unit_remove_dependencies(u, UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT);
+
+ r = mount_add_extras(MOUNT(u));
+ if (r < 0)
+ return r;
+ }
+
+ *ret_flags = flags;
+ return 0;
+}
+
+static int mount_setup_unit(
+ Manager *m,
+ const char *what,
+ const char *where,
+ const char *options,
+ const char *fstype,
+ bool set_flags) {
+
+ _cleanup_free_ char *e = NULL;
+ MountProcFlags flags;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(what);
+ assert(where);
+ assert(options);
+ assert(fstype);
+
+ /* Ignore API mount points. They should never be referenced in
+ * dependencies ever. */
+ if (mount_point_is_api(where) || mount_point_ignore(where))
+ return 0;
+
+ if (streq(fstype, "autofs"))
+ return 0;
+
+ /* probably some kind of swap, ignore */
+ if (!is_path(where))
+ return 0;
+
+ r = unit_name_from_path(where, ".mount", &e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name from path '%s': %m", where);
+
+ u = manager_get_unit(m, e);
+ if (u)
+ r = mount_setup_existing_unit(u, what, where, options, fstype, &flags);
+ else
+ /* First time we see this mount point meaning that it's not been initiated by a mount unit but rather
+ * by the sysadmin having called mount(8) directly. */
+ r = mount_setup_new_unit(m, e, what, where, options, fstype, &flags, &u);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to set up mount unit: %m");
+
+ /* If the mount changed properties or state, let's notify our clients */
+ if (flags & (MOUNT_PROC_JUST_CHANGED|MOUNT_PROC_JUST_MOUNTED))
+ unit_add_to_dbus_queue(u);
+
+ if (set_flags)
+ MOUNT(u)->proc_flags = flags;
+
+ return 0;
+}
+
+static int mount_load_proc_self_mountinfo(Manager *m, bool set_flags) {
+ _cleanup_(mnt_free_tablep) struct libmnt_table *t = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *i = NULL;
+ int r;
+
+ assert(m);
+
+ t = mnt_new_table();
+ i = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!t || !i)
+ return log_oom();
+
+ r = mnt_table_parse_mtab(t, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse /proc/self/mountinfo: %m");
+
+ for (;;) {
+ struct libmnt_fs *fs;
+ const char *device, *path, *options, *fstype;
+ _cleanup_free_ char *d = NULL, *p = NULL;
+ int k;
+
+ k = mnt_table_next_fs(t, i, &fs);
+ if (k == 1)
+ break;
+ if (k < 0)
+ return log_error_errno(k, "Failed to get next entry from /proc/self/mountinfo: %m");
+
+ device = mnt_fs_get_source(fs);
+ path = mnt_fs_get_target(fs);
+ options = mnt_fs_get_options(fs);
+ fstype = mnt_fs_get_fstype(fs);
+
+ if (!device || !path)
+ continue;
+
+ if (cunescape(device, UNESCAPE_RELAX, &d) < 0)
+ return log_oom();
+
+ if (cunescape(path, UNESCAPE_RELAX, &p) < 0)
+ return log_oom();
+
+ device_found_node(m, d, DEVICE_FOUND_MOUNT, DEVICE_FOUND_MOUNT);
+
+ (void) mount_setup_unit(m, d, p, options, fstype, set_flags);
+ }
+
+ return 0;
+}
+
+static void mount_shutdown(Manager *m) {
+ assert(m);
+
+ m->mount_event_source = sd_event_source_unref(m->mount_event_source);
+
+ mnt_unref_monitor(m->mount_monitor);
+ m->mount_monitor = NULL;
+}
+
+static int mount_get_timeout(Unit *u, usec_t *timeout) {
+ Mount *m = MOUNT(u);
+ usec_t t;
+ int r;
+
+ if (!m->timer_event_source)
+ return 0;
+
+ r = sd_event_source_get_time(m->timer_event_source, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY)
+ return 0;
+
+ *timeout = t;
+ return 1;
+}
+
+static void mount_enumerate_perpetual(Manager *m) {
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ /* Whatever happens, we know for sure that the root directory is around, and cannot go away. Let's
+ * unconditionally synthesize it here and mark it as perpetual. */
+
+ u = manager_get_unit(m, SPECIAL_ROOT_MOUNT);
+ if (!u) {
+ r = unit_new_for_name(m, sizeof(Mount), SPECIAL_ROOT_MOUNT, &u);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_MOUNT " unit: %m");
+ return;
+ }
+ }
+
+ u->perpetual = true;
+ MOUNT(u)->deserialized_state = MOUNT_MOUNTED;
+
+ unit_add_to_load_queue(u);
+ unit_add_to_dbus_queue(u);
+}
+
+static bool mount_is_mounted(Mount *m) {
+ assert(m);
+
+ return UNIT(m)->perpetual || FLAGS_SET(m->proc_flags, MOUNT_PROC_IS_MOUNTED);
+}
+
+static void mount_enumerate(Manager *m) {
+ int r;
+
+ assert(m);
+
+ mnt_init_debug(0);
+
+ if (!m->mount_monitor) {
+ int fd;
+
+ m->mount_monitor = mnt_new_monitor();
+ if (!m->mount_monitor) {
+ log_oom();
+ goto fail;
+ }
+
+ r = mnt_monitor_enable_kernel(m->mount_monitor, 1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to enable watching of kernel mount events: %m");
+ goto fail;
+ }
+
+ r = mnt_monitor_enable_userspace(m->mount_monitor, 1, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to enable watching of userspace mount events: %m");
+ goto fail;
+ }
+
+ /* mnt_unref_monitor() will close the fd */
+ fd = r = mnt_monitor_get_fd(m->mount_monitor);
+ if (r < 0) {
+ log_error_errno(r, "Failed to acquire watch file descriptor: %m");
+ goto fail;
+ }
+
+ r = sd_event_add_io(m->event, &m->mount_event_source, fd, EPOLLIN, mount_dispatch_io, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to watch mount file descriptor: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_priority(m->mount_event_source, SD_EVENT_PRIORITY_NORMAL-10);
+ if (r < 0) {
+ log_error_errno(r, "Failed to adjust mount watch priority: %m");
+ goto fail;
+ }
+
+ (void) sd_event_source_set_description(m->mount_event_source, "mount-monitor-dispatch");
+ }
+
+ r = mount_load_proc_self_mountinfo(m, false);
+ if (r < 0)
+ goto fail;
+
+ return;
+
+fail:
+ mount_shutdown(m);
+}
+
+static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ _cleanup_set_free_free_ Set *around = NULL, *gone = NULL;
+ Manager *m = userdata;
+ const char *what;
+ Iterator i;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(revents & EPOLLIN);
+
+ if (fd == mnt_monitor_get_fd(m->mount_monitor)) {
+ bool rescan = false;
+
+ /* Drain all events and verify that the event is valid.
+ *
+ * Note that libmount also monitors /run/mount mkdir if the
+ * directory does not exist yet. The mkdir may generate event
+ * which is irrelevant for us.
+ *
+ * error: r < 0; valid: r == 0, false positive: rc == 1 */
+ do {
+ r = mnt_monitor_next_change(m->mount_monitor, NULL, NULL);
+ if (r == 0)
+ rescan = true;
+ else if (r < 0)
+ return log_error_errno(r, "Failed to drain libmount events: %m");
+ } while (r == 0);
+
+ log_debug("libmount event [rescan: %s]", yes_no(rescan));
+ if (!rescan)
+ return 0;
+ }
+
+ r = mount_load_proc_self_mountinfo(m, true);
+ if (r < 0) {
+ /* Reset flags, just in case, for later calls */
+ LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT])
+ MOUNT(u)->proc_flags = 0;
+
+ return 0;
+ }
+
+ manager_dispatch_load_queue(m);
+
+ LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT]) {
+ Mount *mount = MOUNT(u);
+
+ if (!mount_is_mounted(mount)) {
+
+ /* A mount point is not around right now. It
+ * might be gone, or might never have
+ * existed. */
+
+ if (mount->from_proc_self_mountinfo &&
+ mount->parameters_proc_self_mountinfo.what) {
+
+ /* Remember that this device might just have disappeared */
+ if (set_ensure_allocated(&gone, &path_hash_ops) < 0 ||
+ set_put_strdup(gone, mount->parameters_proc_self_mountinfo.what) < 0)
+ log_oom(); /* we don't care too much about OOM here... */
+ }
+
+ mount->from_proc_self_mountinfo = false;
+ assert_se(update_parameters_proc_self_mount_info(mount, NULL, NULL, NULL) >= 0);
+
+ switch (mount->state) {
+
+ case MOUNT_MOUNTED:
+ /* This has just been unmounted by somebody else, follow the state change. */
+ mount_enter_dead(mount, MOUNT_SUCCESS);
+ break;
+
+ default:
+ break;
+ }
+
+ } else if (mount->proc_flags & (MOUNT_PROC_JUST_MOUNTED|MOUNT_PROC_JUST_CHANGED)) {
+
+ /* A mount point was added or changed */
+
+ switch (mount->state) {
+
+ case MOUNT_DEAD:
+ case MOUNT_FAILED:
+
+ /* This has just been mounted by somebody else, follow the state change, but let's
+ * generate a new invocation ID for this implicitly and automatically. */
+ (void) unit_acquire_invocation_id(u);
+ mount_cycle_clear(mount);
+ mount_enter_mounted(mount, MOUNT_SUCCESS);
+ break;
+
+ case MOUNT_MOUNTING:
+ mount_set_state(mount, MOUNT_MOUNTING_DONE);
+ break;
+
+ default:
+ /* Nothing really changed, but let's
+ * issue an notification call
+ * nonetheless, in case somebody is
+ * waiting for this. (e.g. file system
+ * ro/rw remounts.) */
+ mount_set_state(mount, mount->state);
+ break;
+ }
+ }
+
+ if (mount_is_mounted(mount) &&
+ mount->from_proc_self_mountinfo &&
+ mount->parameters_proc_self_mountinfo.what) {
+ /* Track devices currently used */
+
+ if (set_ensure_allocated(&around, &path_hash_ops) < 0 ||
+ set_put_strdup(around, mount->parameters_proc_self_mountinfo.what) < 0)
+ log_oom();
+ }
+
+ /* Reset the flags for later calls */
+ mount->proc_flags = 0;
+ }
+
+ SET_FOREACH(what, gone, i) {
+ if (set_contains(around, what))
+ continue;
+
+ /* Let the device units know that the device is no longer mounted */
+ device_found_node(m, what, 0, DEVICE_FOUND_MOUNT);
+ }
+
+ return 0;
+}
+
+static void mount_reset_failed(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ if (m->state == MOUNT_FAILED)
+ mount_set_state(m, MOUNT_DEAD);
+
+ m->result = MOUNT_SUCCESS;
+ m->reload_result = MOUNT_SUCCESS;
+}
+
+static int mount_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ return unit_kill_common(u, who, signo, -1, MOUNT(u)->control_pid, error);
+}
+
+static int mount_control_pid(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ return m->control_pid;
+}
+
+static const char* const mount_exec_command_table[_MOUNT_EXEC_COMMAND_MAX] = {
+ [MOUNT_EXEC_MOUNT] = "ExecMount",
+ [MOUNT_EXEC_UNMOUNT] = "ExecUnmount",
+ [MOUNT_EXEC_REMOUNT] = "ExecRemount",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mount_exec_command, MountExecCommand);
+
+static const char* const mount_result_table[_MOUNT_RESULT_MAX] = {
+ [MOUNT_SUCCESS] = "success",
+ [MOUNT_FAILURE_RESOURCES] = "resources",
+ [MOUNT_FAILURE_TIMEOUT] = "timeout",
+ [MOUNT_FAILURE_EXIT_CODE] = "exit-code",
+ [MOUNT_FAILURE_SIGNAL] = "signal",
+ [MOUNT_FAILURE_CORE_DUMP] = "core-dump",
+ [MOUNT_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+ [MOUNT_FAILURE_PROTOCOL] = "protocol",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mount_result, MountResult);
+
+const UnitVTable mount_vtable = {
+ .object_size = sizeof(Mount),
+ .exec_context_offset = offsetof(Mount, exec_context),
+ .cgroup_context_offset = offsetof(Mount, cgroup_context),
+ .kill_context_offset = offsetof(Mount, kill_context),
+ .exec_runtime_offset = offsetof(Mount, exec_runtime),
+ .dynamic_creds_offset = offsetof(Mount, dynamic_creds),
+
+ .sections =
+ "Unit\0"
+ "Mount\0"
+ "Install\0",
+ .private_section = "Mount",
+
+ .init = mount_init,
+ .load = mount_load,
+ .done = mount_done,
+
+ .coldplug = mount_coldplug,
+
+ .dump = mount_dump,
+
+ .start = mount_start,
+ .stop = mount_stop,
+ .reload = mount_reload,
+
+ .kill = mount_kill,
+
+ .serialize = mount_serialize,
+ .deserialize_item = mount_deserialize_item,
+
+ .active_state = mount_active_state,
+ .sub_state_to_string = mount_sub_state_to_string,
+
+ .may_gc = mount_may_gc,
+
+ .sigchld_event = mount_sigchld_event,
+
+ .reset_failed = mount_reset_failed,
+
+ .control_pid = mount_control_pid,
+
+ .bus_vtable = bus_mount_vtable,
+ .bus_set_property = bus_mount_set_property,
+ .bus_commit_properties = bus_mount_commit_properties,
+
+ .get_timeout = mount_get_timeout,
+
+ .can_transient = true,
+
+ .enumerate_perpetual = mount_enumerate_perpetual,
+ .enumerate = mount_enumerate,
+ .shutdown = mount_shutdown,
+
+ .status_message_formats = {
+ .starting_stopping = {
+ [0] = "Mounting %s...",
+ [1] = "Unmounting %s...",
+ },
+ .finished_start_job = {
+ [JOB_DONE] = "Mounted %s.",
+ [JOB_FAILED] = "Failed to mount %s.",
+ [JOB_TIMEOUT] = "Timed out mounting %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Unmounted %s.",
+ [JOB_FAILED] = "Failed unmounting %s.",
+ [JOB_TIMEOUT] = "Timed out unmounting %s.",
+ },
+ },
+};
diff --git a/src/core/mount.h b/src/core/mount.h
new file mode 100644
index 0000000..2e59f1f
--- /dev/null
+++ b/src/core/mount.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Mount Mount;
+
+#include "kill.h"
+#include "dynamic-user.h"
+#include "unit.h"
+
+typedef enum MountExecCommand {
+ MOUNT_EXEC_MOUNT,
+ MOUNT_EXEC_UNMOUNT,
+ MOUNT_EXEC_REMOUNT,
+ _MOUNT_EXEC_COMMAND_MAX,
+ _MOUNT_EXEC_COMMAND_INVALID = -1
+} MountExecCommand;
+
+typedef enum MountResult {
+ MOUNT_SUCCESS,
+ MOUNT_FAILURE_RESOURCES, /* a bit of a misnomer, just our catch-all error for errnos we didn't expect */
+ MOUNT_FAILURE_TIMEOUT,
+ MOUNT_FAILURE_EXIT_CODE,
+ MOUNT_FAILURE_SIGNAL,
+ MOUNT_FAILURE_CORE_DUMP,
+ MOUNT_FAILURE_START_LIMIT_HIT,
+ MOUNT_FAILURE_PROTOCOL,
+ _MOUNT_RESULT_MAX,
+ _MOUNT_RESULT_INVALID = -1
+} MountResult;
+
+typedef struct MountParameters {
+ char *what;
+ char *options;
+ char *fstype;
+} MountParameters;
+
+/* Used while looking for mount points that vanished or got added from/to /proc/self/mountinfo */
+typedef enum MountProcFlags {
+ MOUNT_PROC_IS_MOUNTED = 1 << 0,
+ MOUNT_PROC_JUST_MOUNTED = 1 << 1,
+ MOUNT_PROC_JUST_CHANGED = 1 << 2,
+} MountProcFlags;
+
+struct Mount {
+ Unit meta;
+
+ char *where;
+
+ MountParameters parameters_proc_self_mountinfo;
+ MountParameters parameters_fragment;
+
+ bool from_proc_self_mountinfo:1;
+ bool from_fragment:1;
+
+ MountProcFlags proc_flags;
+
+ bool sloppy_options;
+
+ bool lazy_unmount;
+ bool force_unmount;
+
+ MountResult result;
+ MountResult reload_result;
+
+ mode_t directory_mode;
+
+ usec_t timeout_usec;
+
+ ExecCommand exec_command[_MOUNT_EXEC_COMMAND_MAX];
+
+ ExecContext exec_context;
+ KillContext kill_context;
+ CGroupContext cgroup_context;
+
+ ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
+
+ MountState state, deserialized_state;
+
+ ExecCommand* control_command;
+ MountExecCommand control_command_id;
+ pid_t control_pid;
+
+ sd_event_source *timer_event_source;
+
+ unsigned n_retry_umount;
+};
+
+extern const UnitVTable mount_vtable;
+
+void mount_fd_event(Manager *m, int events);
+
+const char* mount_exec_command_to_string(MountExecCommand i) _const_;
+MountExecCommand mount_exec_command_from_string(const char *s) _pure_;
+
+const char* mount_result_to_string(MountResult i) _const_;
+MountResult mount_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(MOUNT, Mount);
diff --git a/src/core/namespace.c b/src/core/namespace.c
new file mode 100644
index 0000000..7f553a4
--- /dev/null
+++ b/src/core/namespace.c
@@ -0,0 +1,1733 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/fs.h>
+
+#include "alloc-util.h"
+#include "base-filesystem.h"
+#include "dev-setup.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "label.h"
+#include "loop-util.h"
+#include "loopback-setup.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "namespace.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "util.h"
+
+#define DEV_MOUNT_OPTIONS (MS_NOSUID|MS_STRICTATIME|MS_NOEXEC)
+
+typedef enum MountMode {
+ /* This is ordered by priority! */
+ INACCESSIBLE,
+ BIND_MOUNT,
+ BIND_MOUNT_RECURSIVE,
+ PRIVATE_TMP,
+ PRIVATE_DEV,
+ BIND_DEV,
+ EMPTY_DIR,
+ SYSFS,
+ PROCFS,
+ READONLY,
+ READWRITE,
+ TMPFS,
+} MountMode;
+
+typedef struct MountEntry {
+ const char *path_const; /* Memory allocated on stack or static */
+ MountMode mode:5;
+ bool ignore:1; /* Ignore if path does not exist? */
+ bool has_prefix:1; /* Already is prefixed by the root dir? */
+ bool read_only:1; /* Shall this mount point be read-only? */
+ bool applied:1; /* Already applied */
+ char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */
+ const char *source_const; /* The source path, for bind mounts */
+ char *source_malloc;
+ const char *options_const;/* Mount options for tmpfs */
+ char *options_malloc;
+ unsigned long flags; /* Mount flags used by EMPTY_DIR and TMPFS. Do not include MS_RDONLY here, but please use read_only. */
+ unsigned n_followed;
+} MountEntry;
+
+/* If MountAPIVFS= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted
+ * something there already. These mounts are hence overridden by any other explicitly configured mounts. */
+static const MountEntry apivfs_table[] = {
+ { "/proc", PROCFS, false },
+ { "/dev", BIND_DEV, false },
+ { "/sys", SYSFS, false },
+};
+
+/* ProtectKernelTunables= option and the related filesystem APIs */
+static const MountEntry protect_kernel_tunables_table[] = {
+ { "/proc/acpi", READONLY, true },
+ { "/proc/apm", READONLY, true }, /* Obsolete API, there's no point in permitting access to this, ever */
+ { "/proc/asound", READONLY, true },
+ { "/proc/bus", READONLY, true },
+ { "/proc/fs", READONLY, true },
+ { "/proc/irq", READONLY, true },
+ { "/proc/kallsyms", INACCESSIBLE, true },
+ { "/proc/kcore", INACCESSIBLE, true },
+ { "/proc/latency_stats", READONLY, true },
+ { "/proc/mtrr", READONLY, true },
+ { "/proc/scsi", READONLY, true },
+ { "/proc/sys", READONLY, false },
+ { "/proc/sysrq-trigger", READONLY, true },
+ { "/proc/timer_stats", READONLY, true },
+ { "/sys", READONLY, false },
+ { "/sys/fs/bpf", READONLY, true },
+ { "/sys/fs/cgroup", READWRITE, false }, /* READONLY is set by ProtectControlGroups= option */
+ { "/sys/fs/selinux", READWRITE, true },
+ { "/sys/kernel/debug", READONLY, true },
+ { "/sys/kernel/tracing", READONLY, true },
+};
+
+/* ProtectKernelModules= option */
+static const MountEntry protect_kernel_modules_table[] = {
+#if HAVE_SPLIT_USR
+ { "/lib/modules", INACCESSIBLE, true },
+#endif
+ { "/usr/lib/modules", INACCESSIBLE, true },
+};
+
+/*
+ * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of
+ * system should be protected by ProtectSystem=
+ */
+static const MountEntry protect_home_read_only_table[] = {
+ { "/home", READONLY, true },
+ { "/run/user", READONLY, true },
+ { "/root", READONLY, true },
+};
+
+/* ProtectHome=tmpfs table */
+static const MountEntry protect_home_tmpfs_table[] = {
+ { "/home", TMPFS, true, .read_only = true, .options_const = "mode=0755", .flags = MS_NODEV|MS_STRICTATIME },
+ { "/run/user", TMPFS, true, .read_only = true, .options_const = "mode=0755", .flags = MS_NODEV|MS_STRICTATIME },
+ { "/root", TMPFS, true, .read_only = true, .options_const = "mode=0700", .flags = MS_NODEV|MS_STRICTATIME },
+};
+
+/* ProtectHome=yes table */
+static const MountEntry protect_home_yes_table[] = {
+ { "/home", INACCESSIBLE, true },
+ { "/run/user", INACCESSIBLE, true },
+ { "/root", INACCESSIBLE, true },
+};
+
+/* ProtectSystem=yes table */
+static const MountEntry protect_system_yes_table[] = {
+ { "/usr", READONLY, false },
+ { "/boot", READONLY, true },
+ { "/efi", READONLY, true },
+#if HAVE_SPLIT_USR
+ { "/lib", READONLY, true },
+ { "/lib64", READONLY, true },
+ { "/bin", READONLY, true },
+# if HAVE_SPLIT_BIN
+ { "/sbin", READONLY, true },
+# endif
+#endif
+};
+
+/* ProtectSystem=full includes ProtectSystem=yes */
+static const MountEntry protect_system_full_table[] = {
+ { "/usr", READONLY, false },
+ { "/boot", READONLY, true },
+ { "/efi", READONLY, true },
+ { "/etc", READONLY, false },
+#if HAVE_SPLIT_USR
+ { "/lib", READONLY, true },
+ { "/lib64", READONLY, true },
+ { "/bin", READONLY, true },
+# if HAVE_SPLIT_BIN
+ { "/sbin", READONLY, true },
+# endif
+#endif
+};
+
+/*
+ * ProtectSystem=strict table. In this strict mode, we mount everything
+ * read-only, except for /proc, /dev, /sys which are the kernel API VFS,
+ * which are left writable, but PrivateDevices= + ProtectKernelTunables=
+ * protect those, and these options should be fully orthogonal.
+ * (And of course /home and friends are also left writable, as ProtectHome=
+ * shall manage those, orthogonally).
+ */
+static const MountEntry protect_system_strict_table[] = {
+ { "/", READONLY, false },
+ { "/proc", READWRITE, false }, /* ProtectKernelTunables= */
+ { "/sys", READWRITE, false }, /* ProtectKernelTunables= */
+ { "/dev", READWRITE, false }, /* PrivateDevices= */
+ { "/home", READWRITE, true }, /* ProtectHome= */
+ { "/run/user", READWRITE, true }, /* ProtectHome= */
+ { "/root", READWRITE, true }, /* ProtectHome= */
+};
+
+static const char *mount_entry_path(const MountEntry *p) {
+ assert(p);
+
+ /* Returns the path of this bind mount. If the malloc()-allocated ->path_buffer field is set we return that,
+ * otherwise the stack/static ->path field is returned. */
+
+ return p->path_malloc ?: p->path_const;
+}
+
+static bool mount_entry_read_only(const MountEntry *p) {
+ assert(p);
+
+ return p->read_only || IN_SET(p->mode, READONLY, INACCESSIBLE);
+}
+
+static const char *mount_entry_source(const MountEntry *p) {
+ assert(p);
+
+ return p->source_malloc ?: p->source_const;
+}
+
+static const char *mount_entry_options(const MountEntry *p) {
+ assert(p);
+
+ return p->options_malloc ?: p->options_const;
+}
+
+static void mount_entry_done(MountEntry *p) {
+ assert(p);
+
+ p->path_malloc = mfree(p->path_malloc);
+ p->source_malloc = mfree(p->source_malloc);
+ p->options_malloc = mfree(p->options_malloc);
+}
+
+static int append_access_mounts(MountEntry **p, char **strv, MountMode mode, bool forcibly_require_prefix) {
+ char **i;
+
+ assert(p);
+
+ /* Adds a list of user-supplied READWRITE/READONLY/INACCESSIBLE entries */
+
+ STRV_FOREACH(i, strv) {
+ bool ignore = false, needs_prefix = false;
+ const char *e = *i;
+
+ /* Look for any prefixes */
+ if (startswith(e, "-")) {
+ e++;
+ ignore = true;
+ }
+ if (startswith(e, "+")) {
+ e++;
+ needs_prefix = true;
+ }
+
+ if (!path_is_absolute(e))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path is not absolute: %s", e);
+
+ *((*p)++) = (MountEntry) {
+ .path_const = e,
+ .mode = mode,
+ .ignore = ignore,
+ .has_prefix = !needs_prefix && !forcibly_require_prefix,
+ };
+ }
+
+ return 0;
+}
+
+static int append_empty_dir_mounts(MountEntry **p, char **strv) {
+ char **i;
+
+ assert(p);
+
+ /* Adds tmpfs mounts to provide readable but empty directories. This is primarily used to implement the
+ * "/private/" boundary directories for DynamicUser=1. */
+
+ STRV_FOREACH(i, strv) {
+
+ *((*p)++) = (MountEntry) {
+ .path_const = *i,
+ .mode = EMPTY_DIR,
+ .ignore = false,
+ .read_only = true,
+ .options_const = "mode=755",
+ .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
+ };
+ }
+
+ return 0;
+}
+
+static int append_bind_mounts(MountEntry **p, const BindMount *binds, size_t n) {
+ size_t i;
+
+ assert(p);
+
+ for (i = 0; i < n; i++) {
+ const BindMount *b = binds + i;
+
+ *((*p)++) = (MountEntry) {
+ .path_const = b->destination,
+ .mode = b->recursive ? BIND_MOUNT_RECURSIVE : BIND_MOUNT,
+ .read_only = b->read_only,
+ .source_const = b->source,
+ .ignore = b->ignore_enoent,
+ };
+ }
+
+ return 0;
+}
+
+static int append_tmpfs_mounts(MountEntry **p, const TemporaryFileSystem *tmpfs, size_t n) {
+ size_t i;
+ int r;
+
+ assert(p);
+
+ for (i = 0; i < n; i++) {
+ const TemporaryFileSystem *t = tmpfs + i;
+ _cleanup_free_ char *o = NULL, *str = NULL;
+ unsigned long flags;
+ bool ro = false;
+
+ if (!path_is_absolute(t->path))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path is not absolute: %s",
+ t->path);
+
+ str = strjoin("mode=0755,", t->options);
+ if (!str)
+ return -ENOMEM;
+
+ r = mount_option_mangle(str, MS_NODEV|MS_STRICTATIME, &flags, &o);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse mount option '%s': %m", str);
+
+ ro = flags & MS_RDONLY;
+ if (ro)
+ flags ^= MS_RDONLY;
+
+ *((*p)++) = (MountEntry) {
+ .path_const = t->path,
+ .mode = TMPFS,
+ .read_only = ro,
+ .options_malloc = TAKE_PTR(o),
+ .flags = flags,
+ };
+ }
+
+ return 0;
+}
+
+static int append_static_mounts(MountEntry **p, const MountEntry *mounts, size_t n, bool ignore_protect) {
+ size_t i;
+
+ assert(p);
+ assert(mounts);
+
+ /* Adds a list of static pre-defined entries */
+
+ for (i = 0; i < n; i++)
+ *((*p)++) = (MountEntry) {
+ .path_const = mount_entry_path(mounts+i),
+ .mode = mounts[i].mode,
+ .ignore = mounts[i].ignore || ignore_protect,
+ };
+
+ return 0;
+}
+
+static int append_protect_home(MountEntry **p, ProtectHome protect_home, bool ignore_protect) {
+ assert(p);
+
+ switch (protect_home) {
+
+ case PROTECT_HOME_NO:
+ return 0;
+
+ case PROTECT_HOME_READ_ONLY:
+ return append_static_mounts(p, protect_home_read_only_table, ELEMENTSOF(protect_home_read_only_table), ignore_protect);
+
+ case PROTECT_HOME_TMPFS:
+ return append_static_mounts(p, protect_home_tmpfs_table, ELEMENTSOF(protect_home_tmpfs_table), ignore_protect);
+
+ case PROTECT_HOME_YES:
+ return append_static_mounts(p, protect_home_yes_table, ELEMENTSOF(protect_home_yes_table), ignore_protect);
+
+ default:
+ assert_not_reached("Unexpected ProtectHome= value");
+ }
+}
+
+static int append_protect_system(MountEntry **p, ProtectSystem protect_system, bool ignore_protect) {
+ assert(p);
+
+ switch (protect_system) {
+
+ case PROTECT_SYSTEM_NO:
+ return 0;
+
+ case PROTECT_SYSTEM_STRICT:
+ return append_static_mounts(p, protect_system_strict_table, ELEMENTSOF(protect_system_strict_table), ignore_protect);
+
+ case PROTECT_SYSTEM_YES:
+ return append_static_mounts(p, protect_system_yes_table, ELEMENTSOF(protect_system_yes_table), ignore_protect);
+
+ case PROTECT_SYSTEM_FULL:
+ return append_static_mounts(p, protect_system_full_table, ELEMENTSOF(protect_system_full_table), ignore_protect);
+
+ default:
+ assert_not_reached("Unexpected ProtectSystem= value");
+ }
+}
+
+static int mount_path_compare(const MountEntry *a, const MountEntry *b) {
+ int d;
+
+ /* If the paths are not equal, then order prefixes first */
+ d = path_compare(mount_entry_path(a), mount_entry_path(b));
+ if (d != 0)
+ return d;
+
+ /* If the paths are equal, check the mode */
+ return CMP((int) a->mode, (int) b->mode);
+}
+
+static int prefix_where_needed(MountEntry *m, size_t n, const char *root_directory) {
+ size_t i;
+
+ /* Prefixes all paths in the bind mount table with the root directory if the entry needs that. */
+
+ for (i = 0; i < n; i++) {
+ char *s;
+
+ if (m[i].has_prefix)
+ continue;
+
+ s = prefix_root(root_directory, mount_entry_path(m+i));
+ if (!s)
+ return -ENOMEM;
+
+ free_and_replace(m[i].path_malloc, s);
+ m[i].has_prefix = true;
+ }
+
+ return 0;
+}
+
+static void drop_duplicates(MountEntry *m, size_t *n) {
+ MountEntry *f, *t, *previous;
+
+ assert(m);
+ assert(n);
+
+ /* Drops duplicate entries. Expects that the array is properly ordered already. */
+
+ for (f = m, t = m, previous = NULL; f < m + *n; f++) {
+
+ /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare()
+ * above. Note that we only drop duplicates that haven't been mounted yet. */
+ if (previous &&
+ path_equal(mount_entry_path(f), mount_entry_path(previous)) &&
+ !f->applied && !previous->applied) {
+ log_debug("%s is duplicate.", mount_entry_path(f));
+ previous->read_only = previous->read_only || mount_entry_read_only(f); /* Propagate the read-only flag to the remaining entry */
+ mount_entry_done(f);
+ continue;
+ }
+
+ *t = *f;
+ previous = t;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static void drop_inaccessible(MountEntry *m, size_t *n) {
+ MountEntry *f, *t;
+ const char *clear = NULL;
+
+ assert(m);
+ assert(n);
+
+ /* Drops all entries obstructed by another entry further up the tree. Expects that the array is properly
+ * ordered already. */
+
+ for (f = m, t = m; f < m + *n; f++) {
+
+ /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop
+ * it, as inaccessible paths really should drop the entire subtree. */
+ if (clear && path_startswith(mount_entry_path(f), clear)) {
+ log_debug("%s is masked by %s.", mount_entry_path(f), clear);
+ mount_entry_done(f);
+ continue;
+ }
+
+ clear = f->mode == INACCESSIBLE ? mount_entry_path(f) : NULL;
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static void drop_nop(MountEntry *m, size_t *n) {
+ MountEntry *f, *t;
+
+ assert(m);
+ assert(n);
+
+ /* Drops all entries which have an immediate parent that has the same type, as they are redundant. Assumes the
+ * list is ordered by prefixes. */
+
+ for (f = m, t = m; f < m + *n; f++) {
+
+ /* Only suppress such subtrees for READONLY and READWRITE entries */
+ if (IN_SET(f->mode, READONLY, READWRITE)) {
+ MountEntry *p;
+ bool found = false;
+
+ /* Now let's find the first parent of the entry we are looking at. */
+ for (p = t-1; p >= m; p--) {
+ if (path_startswith(mount_entry_path(f), mount_entry_path(p))) {
+ found = true;
+ break;
+ }
+ }
+
+ /* We found it, let's see if it's the same mode, if so, we can drop this entry */
+ if (found && p->mode == f->mode) {
+ log_debug("%s is redundant by %s", mount_entry_path(f), mount_entry_path(p));
+ mount_entry_done(f);
+ continue;
+ }
+ }
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static void drop_outside_root(const char *root_directory, MountEntry *m, size_t *n) {
+ MountEntry *f, *t;
+
+ assert(m);
+ assert(n);
+
+ /* Nothing to do */
+ if (!root_directory)
+ return;
+
+ /* Drops all mounts that are outside of the root directory. */
+
+ for (f = m, t = m; f < m + *n; f++) {
+
+ if (!path_startswith(mount_entry_path(f), root_directory)) {
+ log_debug("%s is outside of root directory.", mount_entry_path(f));
+ mount_entry_done(f);
+ continue;
+ }
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static int clone_device_node(
+ const char *d,
+ const char *temporary_mount,
+ bool *make_devnode) {
+
+ _cleanup_free_ char *sl = NULL;
+ const char *dn, *bn, *t;
+ struct stat st;
+ int r;
+
+ if (stat(d, &st) < 0) {
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "Device node '%s' to clone does not exist, ignoring.", d);
+ return -ENXIO;
+ }
+
+ return log_debug_errno(errno, "Failed to stat() device node '%s' to clone, ignoring: %m", d);
+ }
+
+ if (!S_ISBLK(st.st_mode) &&
+ !S_ISCHR(st.st_mode))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Device node '%s' to clone is not a device node, ignoring.",
+ d);
+
+ dn = strjoina(temporary_mount, d);
+
+ /* First, try to create device node properly */
+ if (*make_devnode) {
+ mac_selinux_create_file_prepare(d, st.st_mode);
+ r = mknod(dn, st.st_mode, st.st_rdev);
+ mac_selinux_create_file_clear();
+ if (r >= 0)
+ goto add_symlink;
+ if (errno != EPERM)
+ return log_debug_errno(errno, "mknod failed for %s: %m", d);
+
+ /* This didn't work, let's not try this again for the next iterations. */
+ *make_devnode = false;
+ }
+
+ /* We're about to fallback to bind-mounting the device
+ * node. So create a dummy bind-mount target. */
+ mac_selinux_create_file_prepare(d, 0);
+ r = mknod(dn, S_IFREG, 0);
+ mac_selinux_create_file_clear();
+ if (r < 0 && errno != EEXIST)
+ return log_debug_errno(errno, "mknod() fallback failed for '%s': %m", d);
+
+ /* Fallback to bind-mounting:
+ * The assumption here is that all used device nodes carry standard
+ * properties. Specifically, the devices nodes we bind-mount should
+ * either be owned by root:root or root:tty (e.g. /dev/tty, /dev/ptmx)
+ * and should not carry ACLs. */
+ if (mount(d, dn, NULL, MS_BIND, NULL) < 0)
+ return log_debug_errno(errno, "Bind mounting failed for '%s': %m", d);
+
+add_symlink:
+ bn = path_startswith(d, "/dev/");
+ if (!bn)
+ return 0;
+
+ /* Create symlinks like /dev/char/1:9 → ../urandom */
+ if (asprintf(&sl, "%s/dev/%s/%u:%u", temporary_mount, S_ISCHR(st.st_mode) ? "char" : "block", major(st.st_rdev), minor(st.st_rdev)) < 0)
+ return log_oom();
+
+ (void) mkdir_parents(sl, 0755);
+
+ t = strjoina("../", bn);
+
+ if (symlink(t, sl) < 0)
+ log_debug_errno(errno, "Failed to symlink '%s' to '%s', ignoring: %m", t, sl);
+
+ return 0;
+}
+
+static int mount_private_dev(MountEntry *m) {
+ static const char devnodes[] =
+ "/dev/null\0"
+ "/dev/zero\0"
+ "/dev/full\0"
+ "/dev/random\0"
+ "/dev/urandom\0"
+ "/dev/tty\0";
+
+ char temporary_mount[] = "/tmp/namespace-dev-XXXXXX";
+ const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
+ bool can_mknod = true;
+ _cleanup_umask_ mode_t u;
+ int r;
+
+ assert(m);
+
+ u = umask(0000);
+
+ if (!mkdtemp(temporary_mount))
+ return log_debug_errno(errno, "Failed to create temporary directory '%s': %m", temporary_mount);
+
+ dev = strjoina(temporary_mount, "/dev");
+ (void) mkdir(dev, 0755);
+ if (mount("tmpfs", dev, "tmpfs", DEV_MOUNT_OPTIONS, "mode=755") < 0) {
+ r = log_debug_errno(errno, "Failed to mount tmpfs on '%s': %m", dev);
+ goto fail;
+ }
+
+ devpts = strjoina(temporary_mount, "/dev/pts");
+ (void) mkdir(devpts, 0755);
+ if (mount("/dev/pts", devpts, NULL, MS_BIND, NULL) < 0) {
+ r = log_debug_errno(errno, "Failed to bind mount /dev/pts on '%s': %m", devpts);
+ goto fail;
+ }
+
+ /* /dev/ptmx can either be a device node or a symlink to /dev/pts/ptmx.
+ * When /dev/ptmx a device node, /dev/pts/ptmx has 000 permissions making it inaccessible.
+ * Thus, in that case make a clone.
+ * In nspawn and other containers it will be a symlink, in that case make it a symlink. */
+ r = is_symlink("/dev/ptmx");
+ if (r < 0) {
+ log_debug_errno(r, "Failed to detect whether /dev/ptmx is a symlink or not: %m");
+ goto fail;
+ } else if (r > 0) {
+ devptmx = strjoina(temporary_mount, "/dev/ptmx");
+ if (symlink("pts/ptmx", devptmx) < 0) {
+ r = log_debug_errno(errno, "Failed to create a symlink '%s' to pts/ptmx: %m", devptmx);
+ goto fail;
+ }
+ } else {
+ r = clone_device_node("/dev/ptmx", temporary_mount, &can_mknod);
+ if (r < 0)
+ goto fail;
+ }
+
+ devshm = strjoina(temporary_mount, "/dev/shm");
+ (void) mkdir(devshm, 0755);
+ r = mount("/dev/shm", devshm, NULL, MS_BIND, NULL);
+ if (r < 0) {
+ r = log_debug_errno(errno, "Failed to bind mount /dev/shm on '%s': %m", devshm);
+ goto fail;
+ }
+
+ devmqueue = strjoina(temporary_mount, "/dev/mqueue");
+ (void) mkdir(devmqueue, 0755);
+ if (mount("/dev/mqueue", devmqueue, NULL, MS_BIND, NULL) < 0)
+ log_debug_errno(errno, "Failed to bind mount /dev/mqueue on '%s', ignoring: %m", devmqueue);
+
+ devhugepages = strjoina(temporary_mount, "/dev/hugepages");
+ (void) mkdir(devhugepages, 0755);
+ if (mount("/dev/hugepages", devhugepages, NULL, MS_BIND, NULL) < 0)
+ log_debug_errno(errno, "Failed to bind mount /dev/hugepages on '%s', ignoring: %m", devhugepages);
+
+ devlog = strjoina(temporary_mount, "/dev/log");
+ if (symlink("/run/systemd/journal/dev-log", devlog) < 0)
+ log_debug_errno(errno, "Failed to create a symlink '%s' to /run/systemd/journal/dev-log, ignoring: %m", devlog);
+
+ NULSTR_FOREACH(d, devnodes) {
+ r = clone_device_node(d, temporary_mount, &can_mknod);
+ /* ENXIO means the the *source* is not a device file, skip creation in that case */
+ if (r < 0 && r != -ENXIO)
+ goto fail;
+ }
+
+ r = dev_setup(temporary_mount, UID_INVALID, GID_INVALID);
+ if (r < 0)
+ log_debug_errno(r, "Failed to setup basic device tree at '%s', ignoring: %m", temporary_mount);
+
+ /* Create the /dev directory if missing. It is more likely to be
+ * missing when the service is started with RootDirectory. This is
+ * consistent with mount units creating the mount points when missing.
+ */
+ (void) mkdir_p_label(mount_entry_path(m), 0755);
+
+ /* Unmount everything in old /dev */
+ r = umount_recursive(mount_entry_path(m), 0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to unmount directories below '%s', ignoring: %m", mount_entry_path(m));
+
+ if (mount(dev, mount_entry_path(m), NULL, MS_MOVE, NULL) < 0) {
+ r = log_debug_errno(errno, "Failed to move mount point '%s' to '%s': %m", dev, mount_entry_path(m));
+ goto fail;
+ }
+
+ rmdir(dev);
+ rmdir(temporary_mount);
+
+ return 0;
+
+fail:
+ if (devpts)
+ umount(devpts);
+
+ if (devshm)
+ umount(devshm);
+
+ if (devhugepages)
+ umount(devhugepages);
+
+ if (devmqueue)
+ umount(devmqueue);
+
+ umount(dev);
+ rmdir(dev);
+ rmdir(temporary_mount);
+
+ return r;
+}
+
+static int mount_bind_dev(const MountEntry *m) {
+ int r;
+
+ assert(m);
+
+ /* Implements the little brother of mount_private_dev(): simply bind mounts the host's /dev into the service's
+ * /dev. This is only used when RootDirectory= is set. */
+
+ (void) mkdir_p_label(mount_entry_path(m), 0755);
+
+ r = path_is_mount_point(mount_entry_path(m), NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to determine whether /dev is already mounted: %m");
+ if (r > 0) /* make this a NOP if /dev is already a mount point */
+ return 0;
+
+ if (mount("/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL) < 0)
+ return log_debug_errno(errno, "Failed to bind mount %s: %m", mount_entry_path(m));
+
+ return 1;
+}
+
+static int mount_sysfs(const MountEntry *m) {
+ int r;
+
+ assert(m);
+
+ (void) mkdir_p_label(mount_entry_path(m), 0755);
+
+ r = path_is_mount_point(mount_entry_path(m), NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to determine whether /sys is already mounted: %m");
+ if (r > 0) /* make this a NOP if /sys is already a mount point */
+ return 0;
+
+ /* Bind mount the host's version so that we get all child mounts of it, too. */
+ if (mount("/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL) < 0)
+ return log_debug_errno(errno, "Failed to mount %s: %m", mount_entry_path(m));
+
+ return 1;
+}
+
+static int mount_procfs(const MountEntry *m) {
+ int r;
+
+ assert(m);
+
+ (void) mkdir_p_label(mount_entry_path(m), 0755);
+
+ r = path_is_mount_point(mount_entry_path(m), NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m");
+ if (r > 0) /* make this a NOP if /proc is already a mount point */
+ return 0;
+
+ /* Mount a new instance, so that we get the one that matches our user namespace, if we are running in one */
+ if (mount("proc", mount_entry_path(m), "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
+ return log_debug_errno(errno, "Failed to mount %s: %m", mount_entry_path(m));
+
+ return 1;
+}
+
+static int mount_tmpfs(const MountEntry *m) {
+ assert(m);
+
+ /* First, get rid of everything that is below if there is anything. Then, overmount with our new tmpfs */
+
+ (void) mkdir_p_label(mount_entry_path(m), 0755);
+ (void) umount_recursive(mount_entry_path(m), 0);
+
+ if (mount("tmpfs", mount_entry_path(m), "tmpfs", m->flags, mount_entry_options(m)) < 0)
+ return log_debug_errno(errno, "Failed to mount %s: %m", mount_entry_path(m));
+
+ return 1;
+}
+
+static int follow_symlink(
+ const char *root_directory,
+ MountEntry *m) {
+
+ _cleanup_free_ char *target = NULL;
+ int r;
+
+ /* Let's chase symlinks, but only one step at a time. That's because depending where the symlink points we
+ * might need to change the order in which we mount stuff. Hence: let's normalize piecemeal, and do one step at
+ * a time by specifying CHASE_STEP. This function returns 0 if we resolved one step, and > 0 if we reached the
+ * end and already have a fully normalized name. */
+
+ r = chase_symlinks(mount_entry_path(m), root_directory, CHASE_STEP|CHASE_NONEXISTENT, &target);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to chase symlinks '%s': %m", mount_entry_path(m));
+ if (r > 0) /* Reached the end, nothing more to resolve */
+ return 1;
+
+ if (m->n_followed >= CHASE_SYMLINKS_MAX) /* put a boundary on things */
+ return log_debug_errno(SYNTHETIC_ERRNO(ELOOP),
+ "Symlink loop on '%s'.",
+ mount_entry_path(m));
+
+ log_debug("Followed mount entry path symlink %s → %s.", mount_entry_path(m), target);
+
+ free_and_replace(m->path_malloc, target);
+ m->has_prefix = true;
+
+ m->n_followed ++;
+
+ return 0;
+}
+
+static int apply_mount(
+ const char *root_directory,
+ MountEntry *m) {
+
+ bool rbind = true, make = false;
+ const char *what;
+ int r;
+
+ assert(m);
+
+ log_debug("Applying namespace mount on %s", mount_entry_path(m));
+
+ switch (m->mode) {
+
+ case INACCESSIBLE: {
+ struct stat target;
+
+ /* First, get rid of everything that is below if there
+ * is anything... Then, overmount it with an
+ * inaccessible path. */
+ (void) umount_recursive(mount_entry_path(m), 0);
+
+ if (lstat(mount_entry_path(m), &target) < 0) {
+ if (errno == ENOENT && m->ignore)
+ return 0;
+
+ return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", mount_entry_path(m));
+ }
+
+ what = mode_to_inaccessible_node(target.st_mode);
+ if (!what)
+ return log_debug_errno(SYNTHETIC_ERRNO(ELOOP),
+ "File type not supported for inaccessible mounts. Note that symlinks are not allowed");
+ break;
+ }
+
+ case READONLY:
+ case READWRITE:
+ r = path_is_mount_point(mount_entry_path(m), root_directory, 0);
+ if (r == -ENOENT && m->ignore)
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", mount_entry_path(m));
+ if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */
+ return 0;
+ /* This isn't a mount point yet, let's make it one. */
+ what = mount_entry_path(m);
+ break;
+
+ case BIND_MOUNT:
+ rbind = false;
+
+ _fallthrough_;
+ case BIND_MOUNT_RECURSIVE: {
+ _cleanup_free_ char *chased = NULL;
+
+ /* Since mount() will always follow symlinks we chase the symlinks on our own first. Note that bind
+ * mount source paths are always relative to the host root, hence we pass NULL as root directory to
+ * chase_symlinks() here. */
+
+ r = chase_symlinks(mount_entry_source(m), NULL, CHASE_TRAIL_SLASH, &chased);
+ if (r == -ENOENT && m->ignore) {
+ log_debug_errno(r, "Path %s does not exist, ignoring.", mount_entry_source(m));
+ return 0;
+ }
+ if (r < 0)
+ return log_debug_errno(r, "Failed to follow symlinks on %s: %m", mount_entry_source(m));
+
+ log_debug("Followed source symlinks %s → %s.", mount_entry_source(m), chased);
+
+ free_and_replace(m->source_malloc, chased);
+
+ what = mount_entry_source(m);
+ make = true;
+ break;
+ }
+
+ case EMPTY_DIR:
+ case TMPFS:
+ return mount_tmpfs(m);
+
+ case PRIVATE_TMP:
+ what = mount_entry_source(m);
+ make = true;
+ break;
+
+ case PRIVATE_DEV:
+ return mount_private_dev(m);
+
+ case BIND_DEV:
+ return mount_bind_dev(m);
+
+ case SYSFS:
+ return mount_sysfs(m);
+
+ case PROCFS:
+ return mount_procfs(m);
+
+ default:
+ assert_not_reached("Unknown mode");
+ }
+
+ assert(what);
+
+ if (mount(what, mount_entry_path(m), NULL, MS_BIND|(rbind ? MS_REC : 0), NULL) < 0) {
+ bool try_again = false;
+ r = -errno;
+
+ if (r == -ENOENT && make) {
+ struct stat st;
+
+ /* Hmm, either the source or the destination are missing. Let's see if we can create the destination, then try again */
+
+ if (stat(what, &st) < 0)
+ log_debug_errno(errno, "Mount point source '%s' is not accessible: %m", what);
+ else {
+ int q;
+
+ (void) mkdir_parents(mount_entry_path(m), 0755);
+
+ if (S_ISDIR(st.st_mode))
+ q = mkdir(mount_entry_path(m), 0755) < 0 ? -errno : 0;
+ else
+ q = touch(mount_entry_path(m));
+
+ if (q < 0)
+ log_debug_errno(q, "Failed to create destination mount point node '%s': %m", mount_entry_path(m));
+ else
+ try_again = true;
+ }
+ }
+
+ if (try_again) {
+ if (mount(what, mount_entry_path(m), NULL, MS_BIND|(rbind ? MS_REC : 0), NULL) < 0)
+ r = -errno;
+ else
+ r = 0;
+ }
+
+ if (r < 0)
+ return log_debug_errno(r, "Failed to mount %s to %s: %m", what, mount_entry_path(m));
+ }
+
+ log_debug("Successfully mounted %s to %s", what, mount_entry_path(m));
+ return 0;
+}
+
+/* Change the per-mount readonly flag on an existing mount */
+static int remount_bind_readonly(const char *path, unsigned long orig_flags) {
+ int r;
+
+ r = mount(NULL, path, NULL, MS_REMOUNT | MS_BIND | MS_RDONLY | orig_flags, NULL);
+
+ return r < 0 ? -errno : 0;
+}
+
+static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self_mountinfo) {
+ bool submounts = false;
+ int r = 0;
+
+ assert(m);
+ assert(proc_self_mountinfo);
+
+ if (mount_entry_read_only(m)) {
+ if (IN_SET(m->mode, EMPTY_DIR, TMPFS)) {
+ r = remount_bind_readonly(mount_entry_path(m), m->flags);
+ } else {
+ submounts = true;
+ r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo);
+ }
+ } else if (m->mode == PRIVATE_DEV) {
+ /* Set /dev readonly, but not submounts like /dev/shm. Also, we only set the per-mount read-only flag.
+ * We can't set it on the superblock, if we are inside a user namespace and running Linux <= 4.17. */
+ r = remount_bind_readonly(mount_entry_path(m), DEV_MOUNT_OPTIONS);
+ } else
+ return 0;
+
+ /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only
+ * already stays this way. This improves compatibility with container managers, where we won't attempt to undo
+ * read-only mounts already applied. */
+
+ if (r == -ENOENT && m->ignore)
+ r = 0;
+
+ if (r < 0)
+ return log_debug_errno(r, "Failed to re-mount '%s'%s read-only: %m", mount_entry_path(m),
+ submounts ? " and its submounts" : "");
+
+ return 0;
+}
+
+static bool namespace_info_mount_apivfs(const NamespaceInfo *ns_info) {
+ assert(ns_info);
+
+ /*
+ * ProtectControlGroups= and ProtectKernelTunables= imply MountAPIVFS=,
+ * since to protect the API VFS mounts, they need to be around in the
+ * first place...
+ */
+
+ return ns_info->mount_apivfs ||
+ ns_info->protect_control_groups ||
+ ns_info->protect_kernel_tunables;
+}
+
+static size_t namespace_calculate_mounts(
+ const NamespaceInfo *ns_info,
+ char** read_write_paths,
+ char** read_only_paths,
+ char** inaccessible_paths,
+ char** empty_directories,
+ size_t n_bind_mounts,
+ size_t n_temporary_filesystems,
+ const char* tmp_dir,
+ const char* var_tmp_dir,
+ ProtectHome protect_home,
+ ProtectSystem protect_system) {
+
+ size_t protect_home_cnt;
+ size_t protect_system_cnt =
+ (protect_system == PROTECT_SYSTEM_STRICT ?
+ ELEMENTSOF(protect_system_strict_table) :
+ ((protect_system == PROTECT_SYSTEM_FULL) ?
+ ELEMENTSOF(protect_system_full_table) :
+ ((protect_system == PROTECT_SYSTEM_YES) ?
+ ELEMENTSOF(protect_system_yes_table) : 0)));
+
+ protect_home_cnt =
+ (protect_home == PROTECT_HOME_YES ?
+ ELEMENTSOF(protect_home_yes_table) :
+ ((protect_home == PROTECT_HOME_READ_ONLY) ?
+ ELEMENTSOF(protect_home_read_only_table) :
+ ((protect_home == PROTECT_HOME_TMPFS) ?
+ ELEMENTSOF(protect_home_tmpfs_table) : 0)));
+
+ return !!tmp_dir + !!var_tmp_dir +
+ strv_length(read_write_paths) +
+ strv_length(read_only_paths) +
+ strv_length(inaccessible_paths) +
+ strv_length(empty_directories) +
+ n_bind_mounts +
+ n_temporary_filesystems +
+ ns_info->private_dev +
+ (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
+ (ns_info->protect_control_groups ? 1 : 0) +
+ (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) +
+ protect_home_cnt + protect_system_cnt +
+ (namespace_info_mount_apivfs(ns_info) ? ELEMENTSOF(apivfs_table) : 0);
+}
+
+static void normalize_mounts(const char *root_directory, MountEntry *mounts, size_t *n_mounts) {
+ assert(root_directory);
+ assert(n_mounts);
+ assert(mounts || *n_mounts == 0);
+
+ typesafe_qsort(mounts, *n_mounts, mount_path_compare);
+
+ drop_duplicates(mounts, n_mounts);
+ drop_outside_root(root_directory, mounts, n_mounts);
+ drop_inaccessible(mounts, n_mounts);
+ drop_nop(mounts, n_mounts);
+}
+
+int setup_namespace(
+ const char* root_directory,
+ const char* root_image,
+ const NamespaceInfo *ns_info,
+ char** read_write_paths,
+ char** read_only_paths,
+ char** inaccessible_paths,
+ char** empty_directories,
+ const BindMount *bind_mounts,
+ size_t n_bind_mounts,
+ const TemporaryFileSystem *temporary_filesystems,
+ size_t n_temporary_filesystems,
+ const char* tmp_dir,
+ const char* var_tmp_dir,
+ ProtectHome protect_home,
+ ProtectSystem protect_system,
+ unsigned long mount_flags,
+ DissectImageFlags dissect_image_flags) {
+
+ _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
+ _cleanup_free_ void *root_hash = NULL;
+ MountEntry *m, *mounts = NULL;
+ size_t n_mounts, root_hash_size = 0;
+ bool require_prefix = false;
+ const char *root;
+ int r = 0;
+
+ assert(ns_info);
+
+ if (mount_flags == 0)
+ mount_flags = MS_SHARED;
+
+ if (root_image) {
+ dissect_image_flags |= DISSECT_IMAGE_REQUIRE_ROOT;
+
+ if (protect_system == PROTECT_SYSTEM_STRICT &&
+ protect_home != PROTECT_HOME_NO &&
+ strv_isempty(read_write_paths))
+ dissect_image_flags |= DISSECT_IMAGE_READ_ONLY;
+
+ r = loop_device_make_by_path(root_image,
+ dissect_image_flags & DISSECT_IMAGE_READ_ONLY ? O_RDONLY : O_RDWR,
+ &loop_device);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create loop device for root image: %m");
+
+ r = root_hash_load(root_image, &root_hash, &root_hash_size);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load root hash: %m");
+
+ r = dissect_image(loop_device->fd, root_hash, root_hash_size, dissect_image_flags, &dissected_image);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to dissect image: %m");
+
+ r = dissected_image_decrypt(dissected_image, NULL, root_hash, root_hash_size, dissect_image_flags, &decrypted_image);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to decrypt dissected image: %m");
+ }
+
+ if (root_directory)
+ root = root_directory;
+ else {
+ /* Always create the mount namespace in a temporary directory, instead of operating
+ * directly in the root. The temporary directory prevents any mounts from being
+ * potentially obscured my other mounts we already applied.
+ * We use the same mount point for all images, which is safe, since they all live
+ * in their own namespaces after all, and hence won't see each other. */
+
+ root = "/run/systemd/unit-root";
+ (void) mkdir_label(root, 0700);
+ require_prefix = true;
+ }
+
+ n_mounts = namespace_calculate_mounts(
+ ns_info,
+ read_write_paths,
+ read_only_paths,
+ inaccessible_paths,
+ empty_directories,
+ n_bind_mounts,
+ n_temporary_filesystems,
+ tmp_dir, var_tmp_dir,
+ protect_home, protect_system);
+
+ if (n_mounts > 0) {
+ m = mounts = (MountEntry *) alloca0(n_mounts * sizeof(MountEntry));
+ r = append_access_mounts(&m, read_write_paths, READWRITE, require_prefix);
+ if (r < 0)
+ goto finish;
+
+ r = append_access_mounts(&m, read_only_paths, READONLY, require_prefix);
+ if (r < 0)
+ goto finish;
+
+ r = append_access_mounts(&m, inaccessible_paths, INACCESSIBLE, require_prefix);
+ if (r < 0)
+ goto finish;
+
+ r = append_empty_dir_mounts(&m, empty_directories);
+ if (r < 0)
+ goto finish;
+
+ r = append_bind_mounts(&m, bind_mounts, n_bind_mounts);
+ if (r < 0)
+ goto finish;
+
+ r = append_tmpfs_mounts(&m, temporary_filesystems, n_temporary_filesystems);
+ if (r < 0)
+ goto finish;
+
+ if (tmp_dir) {
+ *(m++) = (MountEntry) {
+ .path_const = "/tmp",
+ .mode = PRIVATE_TMP,
+ .source_const = tmp_dir,
+ };
+ }
+
+ if (var_tmp_dir) {
+ *(m++) = (MountEntry) {
+ .path_const = "/var/tmp",
+ .mode = PRIVATE_TMP,
+ .source_const = var_tmp_dir,
+ };
+ }
+
+ if (ns_info->private_dev) {
+ *(m++) = (MountEntry) {
+ .path_const = "/dev",
+ .mode = PRIVATE_DEV,
+ };
+ }
+
+ if (ns_info->protect_kernel_tunables) {
+ r = append_static_mounts(&m, protect_kernel_tunables_table, ELEMENTSOF(protect_kernel_tunables_table), ns_info->ignore_protect_paths);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (ns_info->protect_kernel_modules) {
+ r = append_static_mounts(&m, protect_kernel_modules_table, ELEMENTSOF(protect_kernel_modules_table), ns_info->ignore_protect_paths);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (ns_info->protect_control_groups) {
+ *(m++) = (MountEntry) {
+ .path_const = "/sys/fs/cgroup",
+ .mode = READONLY,
+ };
+ }
+
+ r = append_protect_home(&m, protect_home, ns_info->ignore_protect_paths);
+ if (r < 0)
+ goto finish;
+
+ r = append_protect_system(&m, protect_system, false);
+ if (r < 0)
+ goto finish;
+
+ if (namespace_info_mount_apivfs(ns_info)) {
+ r = append_static_mounts(&m, apivfs_table, ELEMENTSOF(apivfs_table), ns_info->ignore_protect_paths);
+ if (r < 0)
+ goto finish;
+ }
+
+ assert(mounts + n_mounts == m);
+
+ /* Prepend the root directory where that's necessary */
+ r = prefix_where_needed(mounts, n_mounts, root);
+ if (r < 0)
+ goto finish;
+
+ normalize_mounts(root, mounts, &n_mounts);
+ }
+
+ /* All above is just preparation, figuring out what to do. Let's now actually start doing something. */
+
+ if (unshare(CLONE_NEWNS) < 0) {
+ r = log_debug_errno(errno, "Failed to unshare the mount namespace: %m");
+ if (IN_SET(r, -EACCES, -EPERM, -EOPNOTSUPP, -ENOSYS))
+ /* If the kernel doesn't support namespaces, or when there's a MAC or seccomp filter in place
+ * that doesn't allow us to create namespaces (or a missing cap), then propagate a recognizable
+ * error back, which the caller can use to detect this case (and only this) and optionally
+ * continue without namespacing applied. */
+ r = -ENOANO;
+
+ goto finish;
+ }
+
+ /* Remount / as SLAVE so that nothing now mounted in the namespace
+ * shows up in the parent */
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ r = log_debug_errno(errno, "Failed to remount '/' as SLAVE: %m");
+ goto finish;
+ }
+
+ if (root_image) {
+ /* A root image is specified, mount it to the right place */
+ r = dissected_image_mount(dissected_image, root, UID_INVALID, dissect_image_flags);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to mount root image: %m");
+ goto finish;
+ }
+
+ if (decrypted_image) {
+ r = decrypted_image_relinquish(decrypted_image);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to relinquish decrypted image: %m");
+ goto finish;
+ }
+ }
+
+ loop_device_relinquish(loop_device);
+
+ } else if (root_directory) {
+
+ /* A root directory is specified. Turn its directory into bind mount, if it isn't one yet. */
+ r = path_is_mount_point(root, NULL, AT_SYMLINK_FOLLOW);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to detect that %s is a mount point or not: %m", root);
+ goto finish;
+ }
+ if (r == 0) {
+ if (mount(root, root, NULL, MS_BIND|MS_REC, NULL) < 0) {
+ r = log_debug_errno(errno, "Failed to bind mount '%s': %m", root);
+ goto finish;
+ }
+ }
+
+ } else {
+
+ /* Let's mount the main root directory to the root directory to use */
+ if (mount("/", root, NULL, MS_BIND|MS_REC, NULL) < 0) {
+ r = log_debug_errno(errno, "Failed to bind mount '/' on '%s': %m", root);
+ goto finish;
+ }
+ }
+
+ /* Try to set up the new root directory before mounting anything else there. */
+ if (root_image || root_directory)
+ (void) base_filesystem_create(root, UID_INVALID, GID_INVALID);
+
+ if (n_mounts > 0) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+ char **blacklist;
+ size_t j;
+
+ /* Open /proc/self/mountinfo now as it may become unavailable if we mount anything on top of /proc.
+ * For example, this is the case with the option: 'InaccessiblePaths=/proc' */
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo) {
+ r = log_debug_errno(errno, "Failed to open /proc/self/mountinfo: %m");
+ goto finish;
+ }
+
+ /* First round, establish all mounts we need */
+ for (;;) {
+ bool again = false;
+
+ for (m = mounts; m < mounts + n_mounts; ++m) {
+
+ if (m->applied)
+ continue;
+
+ r = follow_symlink(root, m);
+ if (r < 0)
+ goto finish;
+ if (r == 0) {
+ /* We hit a symlinked mount point. The entry got rewritten and might point to a
+ * very different place now. Let's normalize the changed list, and start from
+ * the beginning. After all to mount the entry at the new location we might
+ * need some other mounts first */
+ again = true;
+ break;
+ }
+
+ r = apply_mount(root, m);
+ if (r < 0)
+ goto finish;
+
+ m->applied = true;
+ }
+
+ if (!again)
+ break;
+
+ normalize_mounts(root, mounts, &n_mounts);
+ }
+
+ /* Create a blacklist we can pass to bind_mount_recursive() */
+ blacklist = newa(char*, n_mounts+1);
+ for (j = 0; j < n_mounts; j++)
+ blacklist[j] = (char*) mount_entry_path(mounts+j);
+ blacklist[j] = NULL;
+
+ /* Second round, flip the ro bits if necessary. */
+ for (m = mounts; m < mounts + n_mounts; ++m) {
+ r = make_read_only(m, blacklist, proc_self_mountinfo);
+ if (r < 0)
+ goto finish;
+ }
+ }
+
+ /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
+ r = mount_move_root(root);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to mount root with MS_MOVE: %m");
+ goto finish;
+ }
+
+ /* Remount / as the desired mode. Note that this will not
+ * reestablish propagation from our side to the host, since
+ * what's disconnected is disconnected. */
+ if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
+ r = log_debug_errno(errno, "Failed to remount '/' with desired mount flags: %m");
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ for (m = mounts; m < mounts + n_mounts; m++)
+ mount_entry_done(m);
+
+ return r;
+}
+
+void bind_mount_free_many(BindMount *b, size_t n) {
+ size_t i;
+
+ assert(b || n == 0);
+
+ for (i = 0; i < n; i++) {
+ free(b[i].source);
+ free(b[i].destination);
+ }
+
+ free(b);
+}
+
+int bind_mount_add(BindMount **b, size_t *n, const BindMount *item) {
+ _cleanup_free_ char *s = NULL, *d = NULL;
+ BindMount *c;
+
+ assert(b);
+ assert(n);
+ assert(item);
+
+ s = strdup(item->source);
+ if (!s)
+ return -ENOMEM;
+
+ d = strdup(item->destination);
+ if (!d)
+ return -ENOMEM;
+
+ c = reallocarray(*b, *n + 1, sizeof(BindMount));
+ if (!c)
+ return -ENOMEM;
+
+ *b = c;
+
+ c[(*n) ++] = (BindMount) {
+ .source = TAKE_PTR(s),
+ .destination = TAKE_PTR(d),
+ .read_only = item->read_only,
+ .recursive = item->recursive,
+ .ignore_enoent = item->ignore_enoent,
+ };
+
+ return 0;
+}
+
+void temporary_filesystem_free_many(TemporaryFileSystem *t, size_t n) {
+ size_t i;
+
+ assert(t || n == 0);
+
+ for (i = 0; i < n; i++) {
+ free(t[i].path);
+ free(t[i].options);
+ }
+
+ free(t);
+}
+
+int temporary_filesystem_add(
+ TemporaryFileSystem **t,
+ size_t *n,
+ const char *path,
+ const char *options) {
+
+ _cleanup_free_ char *p = NULL, *o = NULL;
+ TemporaryFileSystem *c;
+
+ assert(t);
+ assert(n);
+ assert(path);
+
+ p = strdup(path);
+ if (!p)
+ return -ENOMEM;
+
+ if (!isempty(options)) {
+ o = strdup(options);
+ if (!o)
+ return -ENOMEM;
+ }
+
+ c = reallocarray(*t, *n + 1, sizeof(TemporaryFileSystem));
+ if (!c)
+ return -ENOMEM;
+
+ *t = c;
+
+ c[(*n) ++] = (TemporaryFileSystem) {
+ .path = TAKE_PTR(p),
+ .options = TAKE_PTR(o),
+ };
+
+ return 0;
+}
+
+static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
+ _cleanup_free_ char *x = NULL;
+ char bid[SD_ID128_STRING_MAX];
+ sd_id128_t boot_id;
+ int r;
+
+ assert(id);
+ assert(prefix);
+ assert(path);
+
+ /* We include the boot id in the directory so that after a
+ * reboot we can easily identify obsolete directories. */
+
+ r = sd_id128_get_boot(&boot_id);
+ if (r < 0)
+ return r;
+
+ x = strjoin(prefix, "/systemd-private-", sd_id128_to_string(boot_id, bid), "-", id, "-XXXXXX");
+ if (!x)
+ return -ENOMEM;
+
+ RUN_WITH_UMASK(0077)
+ if (!mkdtemp(x))
+ return -errno;
+
+ RUN_WITH_UMASK(0000) {
+ char *y;
+
+ y = strjoina(x, "/tmp");
+
+ if (mkdir(y, 0777 | S_ISVTX) < 0)
+ return -errno;
+ }
+
+ *path = TAKE_PTR(x);
+
+ return 0;
+}
+
+int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) {
+ char *a, *b;
+ int r;
+
+ assert(id);
+ assert(tmp_dir);
+ assert(var_tmp_dir);
+
+ r = setup_one_tmp_dir(id, "/tmp", &a);
+ if (r < 0)
+ return r;
+
+ r = setup_one_tmp_dir(id, "/var/tmp", &b);
+ if (r < 0) {
+ char *t;
+
+ t = strjoina(a, "/tmp");
+ rmdir(t);
+ rmdir(a);
+
+ free(a);
+ return r;
+ }
+
+ *tmp_dir = a;
+ *var_tmp_dir = b;
+
+ return 0;
+}
+
+int setup_netns(int netns_storage_socket[static 2]) {
+ _cleanup_close_ int netns = -1;
+ int r, q;
+
+ assert(netns_storage_socket);
+ assert(netns_storage_socket[0] >= 0);
+ assert(netns_storage_socket[1] >= 0);
+
+ /* We use the passed socketpair as a storage buffer for our
+ * namespace reference fd. Whatever process runs this first
+ * shall create a new namespace, all others should just join
+ * it. To serialize that we use a file lock on the socket
+ * pair.
+ *
+ * It's a bit crazy, but hey, works great! */
+
+ if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ netns = receive_one_fd(netns_storage_socket[0], MSG_DONTWAIT);
+ if (netns == -EAGAIN) {
+ /* Nothing stored yet, so let's create a new namespace */
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ loopback_setup();
+
+ netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (netns < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = 1;
+
+ } else if (netns < 0) {
+ r = netns;
+ goto fail;
+
+ } else {
+ /* Yay, found something, so let's join the namespace */
+ if (setns(netns, CLONE_NEWNET) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = 0;
+ }
+
+ q = send_one_fd(netns_storage_socket[1], netns, MSG_DONTWAIT);
+ if (q < 0) {
+ r = q;
+ goto fail;
+ }
+
+fail:
+ (void) lockf(netns_storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
+bool ns_type_supported(NamespaceType type) {
+ const char *t, *ns_proc;
+
+ t = namespace_type_to_string(type);
+ if (!t) /* Don't know how to translate this? Then it's not supported */
+ return false;
+
+ ns_proc = strjoina("/proc/self/ns/", t);
+ return access(ns_proc, F_OK) == 0;
+}
+
+static const char *const protect_home_table[_PROTECT_HOME_MAX] = {
+ [PROTECT_HOME_NO] = "no",
+ [PROTECT_HOME_YES] = "yes",
+ [PROTECT_HOME_READ_ONLY] = "read-only",
+ [PROTECT_HOME_TMPFS] = "tmpfs",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(protect_home, ProtectHome, PROTECT_HOME_YES);
+
+static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = {
+ [PROTECT_SYSTEM_NO] = "no",
+ [PROTECT_SYSTEM_YES] = "yes",
+ [PROTECT_SYSTEM_FULL] = "full",
+ [PROTECT_SYSTEM_STRICT] = "strict",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(protect_system, ProtectSystem, PROTECT_SYSTEM_YES);
+
+static const char* const namespace_type_table[] = {
+ [NAMESPACE_MOUNT] = "mnt",
+ [NAMESPACE_CGROUP] = "cgroup",
+ [NAMESPACE_UTS] = "uts",
+ [NAMESPACE_IPC] = "ipc",
+ [NAMESPACE_USER] = "user",
+ [NAMESPACE_PID] = "pid",
+ [NAMESPACE_NET] = "net",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(namespace_type, NamespaceType);
diff --git a/src/core/namespace.h b/src/core/namespace.h
new file mode 100644
index 0000000..5e0ec97
--- /dev/null
+++ b/src/core/namespace.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2016 Djalal Harouni
+***/
+
+typedef struct NamespaceInfo NamespaceInfo;
+typedef struct BindMount BindMount;
+typedef struct TemporaryFileSystem TemporaryFileSystem;
+
+#include <stdbool.h>
+
+#include "dissect-image.h"
+#include "macro.h"
+
+typedef enum ProtectHome {
+ PROTECT_HOME_NO,
+ PROTECT_HOME_YES,
+ PROTECT_HOME_READ_ONLY,
+ PROTECT_HOME_TMPFS,
+ _PROTECT_HOME_MAX,
+ _PROTECT_HOME_INVALID = -1
+} ProtectHome;
+
+typedef enum NamespaceType {
+ NAMESPACE_MOUNT,
+ NAMESPACE_CGROUP,
+ NAMESPACE_UTS,
+ NAMESPACE_IPC,
+ NAMESPACE_USER,
+ NAMESPACE_PID,
+ NAMESPACE_NET,
+ _NAMESPACE_TYPE_MAX,
+ _NAMESPACE_TYPE_INVALID = -1,
+} NamespaceType;
+
+typedef enum ProtectSystem {
+ PROTECT_SYSTEM_NO,
+ PROTECT_SYSTEM_YES,
+ PROTECT_SYSTEM_FULL,
+ PROTECT_SYSTEM_STRICT,
+ _PROTECT_SYSTEM_MAX,
+ _PROTECT_SYSTEM_INVALID = -1
+} ProtectSystem;
+
+struct NamespaceInfo {
+ bool ignore_protect_paths:1;
+ bool private_dev:1;
+ bool private_mounts:1;
+ bool protect_control_groups:1;
+ bool protect_kernel_tunables:1;
+ bool protect_kernel_modules:1;
+ bool mount_apivfs:1;
+};
+
+struct BindMount {
+ char *source;
+ char *destination;
+ bool read_only:1;
+ bool recursive:1;
+ bool ignore_enoent:1;
+};
+
+struct TemporaryFileSystem {
+ char *path;
+ char *options;
+};
+
+int setup_namespace(
+ const char *root_directory,
+ const char *root_image,
+ const NamespaceInfo *ns_info,
+ char **read_write_paths,
+ char **read_only_paths,
+ char **inaccessible_paths,
+ char **empty_directories,
+ const BindMount *bind_mounts,
+ size_t n_bind_mounts,
+ const TemporaryFileSystem *temporary_filesystems,
+ size_t n_temporary_filesystems,
+ const char *tmp_dir,
+ const char *var_tmp_dir,
+ ProtectHome protect_home,
+ ProtectSystem protect_system,
+ unsigned long mount_flags,
+ DissectImageFlags dissected_image_flags);
+
+int setup_tmp_dirs(
+ const char *id,
+ char **tmp_dir,
+ char **var_tmp_dir);
+
+int setup_netns(int netns_storage_socket[static 2]);
+
+const char* protect_home_to_string(ProtectHome p) _const_;
+ProtectHome protect_home_from_string(const char *s) _pure_;
+
+const char* protect_system_to_string(ProtectSystem p) _const_;
+ProtectSystem protect_system_from_string(const char *s) _pure_;
+
+void bind_mount_free_many(BindMount *b, size_t n);
+int bind_mount_add(BindMount **b, size_t *n, const BindMount *item);
+
+void temporary_filesystem_free_many(TemporaryFileSystem *t, size_t n);
+int temporary_filesystem_add(TemporaryFileSystem **t, size_t *n,
+ const char *path, const char *options);
+
+const char* namespace_type_to_string(NamespaceType t) _const_;
+NamespaceType namespace_type_from_string(const char *s) _pure_;
+
+bool ns_type_supported(NamespaceType type);
diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf
new file mode 100644
index 0000000..415b3f5
--- /dev/null
+++ b/src/core/org.freedesktop.systemd1.conf
@@ -0,0 +1,400 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.systemd1"/>
+
+ <!-- Root clients can do everything -->
+ <allow send_destination="org.freedesktop.systemd1"/>
+ <allow receive_sender="org.freedesktop.systemd1"/>
+
+ <!-- systemd may receive activator requests -->
+ <allow receive_interface="org.freedesktop.systemd1.Activator"
+ receive_member="ActivationRequest"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.systemd1"/>
+
+ <!-- Completely open to anyone: org.freedesktop.DBus.* interfaces -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Manager interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitByPID"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitByInvocationID"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitByControlGroup"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="LoadUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitProcesses"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetJob"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetJobAfter"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetJobBefore"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListUnits"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListUnitsFiltered"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListUnitsByPatterns"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListUnitsByNames"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListJobs"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="Subscribe"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="Unsubscribe"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="Dump"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="DumpByFileDescriptor"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListUnitFilesByPatterns"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitFileState"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetDefaultTarget"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitFileLinks"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="LookupDynamicUserByName"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="LookupDynamicUserByUID"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetDynamicUsers"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Unit interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Service"
+ send_member="GetProcesses"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Slice interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Slice"
+ send_member="GetProcesses"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Scope interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Scope"
+ send_member="GetProcesses"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Socket interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Socket"
+ send_member="GetProcesses"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Mount interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Mount"
+ send_member="GetProcesses"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Swap interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Swap"
+ send_member="GetProcesses"/>
+
+ <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Manager interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="StartUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="StartUnitReplace"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="StopUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ReloadUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="RestartUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="TryRestartUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ReloadOrRestartUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ReloadOrTryRestartUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="KillUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ResetFailedUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="SetUnitProperties"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="RefUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="UnrefUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="StartTransientUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="AttachProcessesToUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="CancelJob"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ClearJobs"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ResetFailed"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="Reload"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="Reexecute"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="EnableUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="DisableUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ReenableUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="LinkUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="PresetUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="PresetUnitFilesWithMode"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="MaskUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="UnmaskUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="RevertUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="SetDefaultTarget"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="PresetAllUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="AddDependencyUnitFiles"/>
+
+ <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Job interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Job"
+ send_member="Cancel"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Job"
+ send_member="GetAfter"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Job"
+ send_member="GetBefore"/>
+
+ <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Unit interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Start"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Stop"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Reload"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Restart"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="TryRestart"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="ReloadOrRestart"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="ReloadOrTryRestart"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Kill"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="ResetFailed"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="SetProperties"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Ref"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Unref"/>
+
+ <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Service interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Service"
+ send_member="AttachProcesses"/>
+
+ <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Scope interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Scope"
+ send_member="AttachProcesses"/>
+
+ <allow receive_sender="org.freedesktop.systemd1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/core/org.freedesktop.systemd1.policy.in b/src/core/org.freedesktop.systemd1.policy.in
new file mode 100644
index 0000000..001408d
--- /dev/null
+++ b/src/core/org.freedesktop.systemd1.policy.in
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.systemd1.reply-password">
+ <description gettext-domain="systemd">Send passphrase back to system</description>
+ <message gettext-domain="systemd">Authentication is required to send the entered passphrase back to the system.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>no</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.exec.path">@rootlibexecdir@/systemd-reply-password</annotate>
+ </action>
+
+ <action id="org.freedesktop.systemd1.manage-units">
+ <description gettext-domain="systemd">Manage system services or other units</description>
+ <message gettext-domain="systemd">Authentication is required to manage system services or other units.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.systemd1.manage-unit-files">
+ <description gettext-domain="systemd">Manage system service or unit files</description>
+ <message gettext-domain="systemd">Authentication is required to manage system service or unit files.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.systemd1.reload-daemon org.freedesktop.systemd1.manage-units</annotate>
+ </action>
+
+ <action id="org.freedesktop.systemd1.set-environment">
+ <description gettext-domain="systemd">Set or unset system and service manager environment variables</description>
+ <message gettext-domain="systemd">Authentication is required to set or unset system and service manager environment variables.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.systemd1.reload-daemon">
+ <description gettext-domain="systemd">Reload the systemd state</description>
+ <message gettext-domain="systemd">Authentication is required to reload the systemd state.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/core/org.freedesktop.systemd1.service b/src/core/org.freedesktop.systemd1.service
new file mode 100644
index 0000000..8bd7302
--- /dev/null
+++ b/src/core/org.freedesktop.systemd1.service
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.systemd1
+Exec=/bin/false
+User=root
diff --git a/src/core/path.c b/src/core/path.c
new file mode 100644
index 0000000..831e49d
--- /dev/null
+++ b/src/core/path.c
@@ -0,0 +1,787 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/epoll.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+
+#include "bus-error.h"
+#include "bus-util.h"
+#include "dbus-path.h"
+#include "dbus-unit.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path.h"
+#include "serialize.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static const UnitActiveState state_translation_table[_PATH_STATE_MAX] = {
+ [PATH_DEAD] = UNIT_INACTIVE,
+ [PATH_WAITING] = UNIT_ACTIVE,
+ [PATH_RUNNING] = UNIT_ACTIVE,
+ [PATH_FAILED] = UNIT_FAILED
+};
+
+static int path_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+
+int path_spec_watch(PathSpec *s, sd_event_io_handler_t handler) {
+
+ static const int flags_table[_PATH_TYPE_MAX] = {
+ [PATH_EXISTS] = IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB,
+ [PATH_EXISTS_GLOB] = IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB,
+ [PATH_CHANGED] = IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB|IN_CLOSE_WRITE|IN_CREATE|IN_DELETE|IN_MOVED_FROM|IN_MOVED_TO,
+ [PATH_MODIFIED] = IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB|IN_CLOSE_WRITE|IN_CREATE|IN_DELETE|IN_MOVED_FROM|IN_MOVED_TO|IN_MODIFY,
+ [PATH_DIRECTORY_NOT_EMPTY] = IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB|IN_CREATE|IN_MOVED_TO
+ };
+
+ bool exists = false;
+ char *slash, *oldslash = NULL;
+ int r;
+
+ assert(s);
+ assert(s->unit);
+ assert(handler);
+
+ path_spec_unwatch(s);
+
+ s->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (s->inotify_fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = sd_event_add_io(s->unit->manager->event, &s->event_source, s->inotify_fd, EPOLLIN, handler, s);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(s->event_source, "path");
+
+ /* This function assumes the path was passed through path_simplify()! */
+ assert(!strstr(s->path, "//"));
+
+ for (slash = strchr(s->path, '/'); ; slash = strchr(slash+1, '/')) {
+ char *cut = NULL;
+ int flags;
+ char tmp;
+
+ if (slash) {
+ cut = slash + (slash == s->path);
+ tmp = *cut;
+ *cut = '\0';
+
+ flags = IN_MOVE_SELF | IN_DELETE_SELF | IN_ATTRIB | IN_CREATE | IN_MOVED_TO;
+ } else
+ flags = flags_table[s->type];
+
+ r = inotify_add_watch(s->inotify_fd, s->path, flags);
+ if (r < 0) {
+ if (IN_SET(errno, EACCES, ENOENT)) {
+ if (cut)
+ *cut = tmp;
+ break;
+ }
+
+ r = log_warning_errno(errno, "Failed to add watch on %s: %s", s->path, errno == ENOSPC ? "too many watches" : strerror(-r));
+ if (cut)
+ *cut = tmp;
+ goto fail;
+ } else {
+ exists = true;
+
+ /* Path exists, we don't need to watch parent too closely. */
+ if (oldslash) {
+ char *cut2 = oldslash + (oldslash == s->path);
+ char tmp2 = *cut2;
+ *cut2 = '\0';
+
+ (void) inotify_add_watch(s->inotify_fd, s->path, IN_MOVE_SELF);
+ /* Error is ignored, the worst can happen is we get spurious events. */
+
+ *cut2 = tmp2;
+ }
+ }
+
+ if (cut)
+ *cut = tmp;
+
+ if (slash)
+ oldslash = slash;
+ else {
+ /* whole path has been iterated over */
+ s->primary_wd = r;
+ break;
+ }
+ }
+
+ if (!exists) {
+ r = log_error_errno(errno, "Failed to add watch on any of the components of %s: %m", s->path);
+ /* either EACCESS or ENOENT */
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ path_spec_unwatch(s);
+ return r;
+}
+
+void path_spec_unwatch(PathSpec *s) {
+ assert(s);
+
+ s->event_source = sd_event_source_unref(s->event_source);
+ s->inotify_fd = safe_close(s->inotify_fd);
+}
+
+int path_spec_fd_event(PathSpec *s, uint32_t revents) {
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+ int r = 0;
+
+ if (revents != EPOLLIN)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Got invalid poll event on inotify.");
+
+ l = read(s->inotify_fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return log_error_errno(errno, "Failed to read inotify event: %m");
+ }
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l) {
+ if (IN_SET(s->type, PATH_CHANGED, PATH_MODIFIED) &&
+ s->primary_wd == e->wd)
+ r = 1;
+ }
+
+ return r;
+}
+
+static bool path_spec_check_good(PathSpec *s, bool initial) {
+ bool good = false;
+
+ switch (s->type) {
+
+ case PATH_EXISTS:
+ good = access(s->path, F_OK) >= 0;
+ break;
+
+ case PATH_EXISTS_GLOB:
+ good = glob_exists(s->path) > 0;
+ break;
+
+ case PATH_DIRECTORY_NOT_EMPTY: {
+ int k;
+
+ k = dir_is_empty(s->path);
+ good = !(k == -ENOENT || k > 0);
+ break;
+ }
+
+ case PATH_CHANGED:
+ case PATH_MODIFIED: {
+ bool b;
+
+ b = access(s->path, F_OK) >= 0;
+ good = !initial && b != s->previous_exists;
+ s->previous_exists = b;
+ break;
+ }
+
+ default:
+ ;
+ }
+
+ return good;
+}
+
+static void path_spec_mkdir(PathSpec *s, mode_t mode) {
+ int r;
+
+ if (IN_SET(s->type, PATH_EXISTS, PATH_EXISTS_GLOB))
+ return;
+
+ r = mkdir_p_label(s->path, mode);
+ if (r < 0)
+ log_warning_errno(r, "mkdir(%s) failed: %m", s->path);
+}
+
+static void path_spec_dump(PathSpec *s, FILE *f, const char *prefix) {
+ fprintf(f,
+ "%s%s: %s\n",
+ prefix,
+ path_type_to_string(s->type),
+ s->path);
+}
+
+void path_spec_done(PathSpec *s) {
+ assert(s);
+ assert(s->inotify_fd == -1);
+
+ free(s->path);
+}
+
+static void path_init(Unit *u) {
+ Path *p = PATH(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ p->directory_mode = 0755;
+}
+
+void path_free_specs(Path *p) {
+ PathSpec *s;
+
+ assert(p);
+
+ while ((s = p->specs)) {
+ path_spec_unwatch(s);
+ LIST_REMOVE(spec, p->specs, s);
+ path_spec_done(s);
+ free(s);
+ }
+}
+
+static void path_done(Unit *u) {
+ Path *p = PATH(u);
+
+ assert(p);
+
+ path_free_specs(p);
+}
+
+static int path_add_mount_dependencies(Path *p) {
+ PathSpec *s;
+ int r;
+
+ assert(p);
+
+ LIST_FOREACH(spec, s, p->specs) {
+ r = unit_require_mounts_for(UNIT(p), s->path, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int path_verify(Path *p) {
+ assert(p);
+
+ if (UNIT(p)->load_state != UNIT_LOADED)
+ return 0;
+
+ if (!p->specs) {
+ log_unit_error(UNIT(p), "Path unit lacks path setting. Refusing.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int path_add_default_dependencies(Path *p) {
+ int r;
+
+ assert(p);
+
+ if (!UNIT(p)->default_dependencies)
+ return 0;
+
+ r = unit_add_dependency_by_name(UNIT(p), UNIT_BEFORE, SPECIAL_PATHS_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ if (MANAGER_IS_SYSTEM(UNIT(p)->manager)) {
+ r = unit_add_two_dependencies_by_name(UNIT(p), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+ }
+
+ return unit_add_two_dependencies_by_name(UNIT(p), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+static int path_add_trigger_dependencies(Path *p) {
+ Unit *x;
+ int r;
+
+ assert(p);
+
+ if (!hashmap_isempty(UNIT(p)->dependencies[UNIT_TRIGGERS]))
+ return 0;
+
+ r = unit_load_related_unit(UNIT(p), ".service", &x);
+ if (r < 0)
+ return r;
+
+ return unit_add_two_dependencies(UNIT(p), UNIT_BEFORE, UNIT_TRIGGERS, x, true, UNIT_DEPENDENCY_IMPLICIT);
+}
+
+static int path_load(Unit *u) {
+ Path *p = PATH(u);
+ int r;
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ r = unit_load_fragment_and_dropin(u);
+ if (r < 0)
+ return r;
+
+ if (u->load_state == UNIT_LOADED) {
+
+ r = path_add_trigger_dependencies(p);
+ if (r < 0)
+ return r;
+
+ r = path_add_mount_dependencies(p);
+ if (r < 0)
+ return r;
+
+ r = path_add_default_dependencies(p);
+ if (r < 0)
+ return r;
+ }
+
+ return path_verify(p);
+}
+
+static void path_dump(Unit *u, FILE *f, const char *prefix) {
+ Path *p = PATH(u);
+ Unit *trigger;
+ PathSpec *s;
+
+ assert(p);
+ assert(f);
+
+ trigger = UNIT_TRIGGER(u);
+
+ fprintf(f,
+ "%sPath State: %s\n"
+ "%sResult: %s\n"
+ "%sUnit: %s\n"
+ "%sMakeDirectory: %s\n"
+ "%sDirectoryMode: %04o\n",
+ prefix, path_state_to_string(p->state),
+ prefix, path_result_to_string(p->result),
+ prefix, trigger ? trigger->id : "n/a",
+ prefix, yes_no(p->make_directory),
+ prefix, p->directory_mode);
+
+ LIST_FOREACH(spec, s, p->specs)
+ path_spec_dump(s, f, prefix);
+}
+
+static void path_unwatch(Path *p) {
+ PathSpec *s;
+
+ assert(p);
+
+ LIST_FOREACH(spec, s, p->specs)
+ path_spec_unwatch(s);
+}
+
+static int path_watch(Path *p) {
+ int r;
+ PathSpec *s;
+
+ assert(p);
+
+ LIST_FOREACH(spec, s, p->specs) {
+ r = path_spec_watch(s, path_dispatch_io);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static void path_set_state(Path *p, PathState state) {
+ PathState old_state;
+ assert(p);
+
+ if (p->state != state)
+ bus_unit_send_pending_change_signal(UNIT(p), false);
+
+ old_state = p->state;
+ p->state = state;
+
+ if (state != PATH_WAITING &&
+ (state != PATH_RUNNING || p->inotify_triggered))
+ path_unwatch(p);
+
+ if (state != old_state)
+ log_unit_debug(UNIT(p), "Changed %s -> %s", path_state_to_string(old_state), path_state_to_string(state));
+
+ unit_notify(UNIT(p), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static void path_enter_waiting(Path *p, bool initial, bool recheck);
+
+static int path_coldplug(Unit *u) {
+ Path *p = PATH(u);
+
+ assert(p);
+ assert(p->state == PATH_DEAD);
+
+ if (p->deserialized_state != p->state) {
+
+ if (IN_SET(p->deserialized_state, PATH_WAITING, PATH_RUNNING))
+ path_enter_waiting(p, true, true);
+ else
+ path_set_state(p, p->deserialized_state);
+ }
+
+ return 0;
+}
+
+static void path_enter_dead(Path *p, PathResult f) {
+ assert(p);
+
+ if (p->result == PATH_SUCCESS)
+ p->result = f;
+
+ unit_log_result(UNIT(p), p->result == PATH_SUCCESS, path_result_to_string(p->result));
+ path_set_state(p, p->result != PATH_SUCCESS ? PATH_FAILED : PATH_DEAD);
+}
+
+static void path_enter_running(Path *p) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Unit *trigger;
+ int r;
+
+ assert(p);
+
+ /* Don't start job if we are supposed to go down */
+ if (unit_stop_pending(UNIT(p)))
+ return;
+
+ trigger = UNIT_TRIGGER(UNIT(p));
+ if (!trigger) {
+ log_unit_error(UNIT(p), "Unit to trigger vanished.");
+ path_enter_dead(p, PATH_FAILURE_RESOURCES);
+ return;
+ }
+
+ r = manager_add_job(UNIT(p)->manager, JOB_START, trigger, JOB_REPLACE, &error, NULL);
+ if (r < 0)
+ goto fail;
+
+ p->inotify_triggered = false;
+
+ r = path_watch(p);
+ if (r < 0)
+ goto fail;
+
+ path_set_state(p, PATH_RUNNING);
+ return;
+
+fail:
+ log_unit_warning(UNIT(p), "Failed to queue unit startup job: %s", bus_error_message(&error, r));
+ path_enter_dead(p, PATH_FAILURE_RESOURCES);
+}
+
+static bool path_check_good(Path *p, bool initial) {
+ PathSpec *s;
+ bool good = false;
+
+ assert(p);
+
+ LIST_FOREACH(spec, s, p->specs) {
+ good = path_spec_check_good(s, initial);
+
+ if (good)
+ break;
+ }
+
+ return good;
+}
+
+static void path_enter_waiting(Path *p, bool initial, bool recheck) {
+ int r;
+
+ if (recheck)
+ if (path_check_good(p, initial)) {
+ log_unit_debug(UNIT(p), "Got triggered.");
+ path_enter_running(p);
+ return;
+ }
+
+ r = path_watch(p);
+ if (r < 0)
+ goto fail;
+
+ /* Hmm, so now we have created inotify watches, but the file
+ * might have appeared/been removed by now, so we must
+ * recheck */
+
+ if (recheck)
+ if (path_check_good(p, false)) {
+ log_unit_debug(UNIT(p), "Got triggered.");
+ path_enter_running(p);
+ return;
+ }
+
+ path_set_state(p, PATH_WAITING);
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(p), r, "Failed to enter waiting state: %m");
+ path_enter_dead(p, PATH_FAILURE_RESOURCES);
+}
+
+static void path_mkdir(Path *p) {
+ PathSpec *s;
+
+ assert(p);
+
+ if (!p->make_directory)
+ return;
+
+ LIST_FOREACH(spec, s, p->specs)
+ path_spec_mkdir(s, p->directory_mode);
+}
+
+static int path_start(Unit *u) {
+ Path *p = PATH(u);
+ Unit *trigger;
+ int r;
+
+ assert(p);
+ assert(IN_SET(p->state, PATH_DEAD, PATH_FAILED));
+
+ trigger = UNIT_TRIGGER(u);
+ if (!trigger || trigger->load_state != UNIT_LOADED) {
+ log_unit_error(u, "Refusing to start, unit to trigger not loaded.");
+ return -ENOENT;
+ }
+
+ r = unit_start_limit_test(u);
+ if (r < 0) {
+ path_enter_dead(p, PATH_FAILURE_START_LIMIT_HIT);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ path_mkdir(p);
+
+ p->result = PATH_SUCCESS;
+ path_enter_waiting(p, true, true);
+
+ return 1;
+}
+
+static int path_stop(Unit *u) {
+ Path *p = PATH(u);
+
+ assert(p);
+ assert(IN_SET(p->state, PATH_WAITING, PATH_RUNNING));
+
+ path_enter_dead(p, PATH_SUCCESS);
+ return 1;
+}
+
+static int path_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Path *p = PATH(u);
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", path_state_to_string(p->state));
+ (void) serialize_item(f, "result", path_result_to_string(p->result));
+
+ return 0;
+}
+
+static int path_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Path *p = PATH(u);
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ PathState state;
+
+ state = path_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ p->deserialized_state = state;
+
+ } else if (streq(key, "result")) {
+ PathResult f;
+
+ f = path_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != PATH_SUCCESS)
+ p->result = f;
+
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState path_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[PATH(u)->state];
+}
+
+_pure_ static const char *path_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return path_state_to_string(PATH(u)->state);
+}
+
+static int path_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ PathSpec *s = userdata;
+ Path *p;
+ int changed;
+
+ assert(s);
+ assert(s->unit);
+ assert(fd >= 0);
+
+ p = PATH(s->unit);
+
+ if (!IN_SET(p->state, PATH_WAITING, PATH_RUNNING))
+ return 0;
+
+ /* log_debug("inotify wakeup on %s.", u->id); */
+
+ LIST_FOREACH(spec, s, p->specs)
+ if (path_spec_owns_inotify_fd(s, fd))
+ break;
+
+ if (!s) {
+ log_error("Got event on unknown fd.");
+ goto fail;
+ }
+
+ changed = path_spec_fd_event(s, revents);
+ if (changed < 0)
+ goto fail;
+
+ /* If we are already running, then remember that one event was
+ * dispatched so that we restart the service only if something
+ * actually changed on disk */
+ p->inotify_triggered = true;
+
+ if (changed)
+ path_enter_running(p);
+ else
+ path_enter_waiting(p, false, true);
+
+ return 0;
+
+fail:
+ path_enter_dead(p, PATH_FAILURE_RESOURCES);
+ return 0;
+}
+
+static void path_trigger_notify(Unit *u, Unit *other) {
+ Path *p = PATH(u);
+
+ assert(u);
+ assert(other);
+
+ /* Invoked whenever the unit we trigger changes state or gains
+ * or loses a job */
+
+ if (other->load_state != UNIT_LOADED)
+ return;
+
+ if (p->state == PATH_RUNNING &&
+ UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other))) {
+ log_unit_debug(UNIT(p), "Got notified about unit deactivation.");
+
+ /* Hmm, so inotify was triggered since the
+ * last activation, so I guess we need to
+ * recheck what is going on. */
+ path_enter_waiting(p, false, p->inotify_triggered);
+ }
+}
+
+static void path_reset_failed(Unit *u) {
+ Path *p = PATH(u);
+
+ assert(p);
+
+ if (p->state == PATH_FAILED)
+ path_set_state(p, PATH_DEAD);
+
+ p->result = PATH_SUCCESS;
+}
+
+static const char* const path_type_table[_PATH_TYPE_MAX] = {
+ [PATH_EXISTS] = "PathExists",
+ [PATH_EXISTS_GLOB] = "PathExistsGlob",
+ [PATH_DIRECTORY_NOT_EMPTY] = "DirectoryNotEmpty",
+ [PATH_CHANGED] = "PathChanged",
+ [PATH_MODIFIED] = "PathModified",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(path_type, PathType);
+
+static const char* const path_result_table[_PATH_RESULT_MAX] = {
+ [PATH_SUCCESS] = "success",
+ [PATH_FAILURE_RESOURCES] = "resources",
+ [PATH_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(path_result, PathResult);
+
+const UnitVTable path_vtable = {
+ .object_size = sizeof(Path),
+
+ .sections =
+ "Unit\0"
+ "Path\0"
+ "Install\0",
+ .private_section = "Path",
+
+ .can_transient = true,
+
+ .init = path_init,
+ .done = path_done,
+ .load = path_load,
+
+ .coldplug = path_coldplug,
+
+ .dump = path_dump,
+
+ .start = path_start,
+ .stop = path_stop,
+
+ .serialize = path_serialize,
+ .deserialize_item = path_deserialize_item,
+
+ .active_state = path_active_state,
+ .sub_state_to_string = path_sub_state_to_string,
+
+ .trigger_notify = path_trigger_notify,
+
+ .reset_failed = path_reset_failed,
+
+ .bus_vtable = bus_path_vtable,
+ .bus_set_property = bus_path_set_property,
+};
diff --git a/src/core/path.h b/src/core/path.h
new file mode 100644
index 0000000..4d4b623
--- /dev/null
+++ b/src/core/path.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Path Path;
+typedef struct PathSpec PathSpec;
+
+#include "unit.h"
+
+typedef enum PathType {
+ PATH_EXISTS,
+ PATH_EXISTS_GLOB,
+ PATH_DIRECTORY_NOT_EMPTY,
+ PATH_CHANGED,
+ PATH_MODIFIED,
+ _PATH_TYPE_MAX,
+ _PATH_TYPE_INVALID = -1
+} PathType;
+
+typedef struct PathSpec {
+ Unit *unit;
+
+ char *path;
+
+ sd_event_source *event_source;
+
+ LIST_FIELDS(struct PathSpec, spec);
+
+ PathType type;
+ int inotify_fd;
+ int primary_wd;
+
+ bool previous_exists;
+} PathSpec;
+
+int path_spec_watch(PathSpec *s, sd_event_io_handler_t handler);
+void path_spec_unwatch(PathSpec *s);
+int path_spec_fd_event(PathSpec *s, uint32_t events);
+void path_spec_done(PathSpec *s);
+
+static inline bool path_spec_owns_inotify_fd(PathSpec *s, int fd) {
+ return s->inotify_fd == fd;
+}
+
+typedef enum PathResult {
+ PATH_SUCCESS,
+ PATH_FAILURE_RESOURCES,
+ PATH_FAILURE_START_LIMIT_HIT,
+ _PATH_RESULT_MAX,
+ _PATH_RESULT_INVALID = -1
+} PathResult;
+
+struct Path {
+ Unit meta;
+
+ LIST_HEAD(PathSpec, specs);
+
+ PathState state, deserialized_state;
+
+ bool inotify_triggered;
+
+ bool make_directory;
+ mode_t directory_mode;
+
+ PathResult result;
+};
+
+void path_free_specs(Path *p);
+
+extern const UnitVTable path_vtable;
+
+const char* path_type_to_string(PathType i) _const_;
+PathType path_type_from_string(const char *s) _pure_;
+
+const char* path_result_to_string(PathResult i) _const_;
+PathResult path_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(PATH, Path);
diff --git a/src/core/scope.c b/src/core/scope.c
new file mode 100644
index 0000000..e478661
--- /dev/null
+++ b/src/core/scope.c
@@ -0,0 +1,624 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dbus-scope.h"
+#include "dbus-unit.h"
+#include "load-dropin.h"
+#include "log.h"
+#include "scope.h"
+#include "serialize.h"
+#include "special.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = {
+ [SCOPE_DEAD] = UNIT_INACTIVE,
+ [SCOPE_RUNNING] = UNIT_ACTIVE,
+ [SCOPE_ABANDONED] = UNIT_ACTIVE,
+ [SCOPE_STOP_SIGTERM] = UNIT_DEACTIVATING,
+ [SCOPE_STOP_SIGKILL] = UNIT_DEACTIVATING,
+ [SCOPE_FAILED] = UNIT_FAILED
+};
+
+static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
+
+static void scope_init(Unit *u) {
+ Scope *s = SCOPE(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ s->timeout_stop_usec = u->manager->default_timeout_stop_usec;
+ u->ignore_on_isolate = true;
+}
+
+static void scope_done(Unit *u) {
+ Scope *s = SCOPE(u);
+
+ assert(u);
+
+ s->controller = mfree(s->controller);
+ s->controller_track = sd_bus_track_unref(s->controller_track);
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+}
+
+static int scope_arm_timer(Scope *s, usec_t usec) {
+ int r;
+
+ assert(s);
+
+ if (s->timer_event_source) {
+ r = sd_event_source_set_time(s->timer_event_source, usec);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(s->timer_event_source, SD_EVENT_ONESHOT);
+ }
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ r = sd_event_add_time(
+ UNIT(s)->manager->event,
+ &s->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec, 0,
+ scope_dispatch_timer, s);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s->timer_event_source, "scope-timer");
+
+ return 0;
+}
+
+static void scope_set_state(Scope *s, ScopeState state) {
+ ScopeState old_state;
+ assert(s);
+
+ if (s->state != state)
+ bus_unit_send_pending_change_signal(UNIT(s), false);
+
+ old_state = s->state;
+ s->state = state;
+
+ if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ if (IN_SET(state, SCOPE_DEAD, SCOPE_FAILED)) {
+ unit_unwatch_all_pids(UNIT(s));
+ unit_dequeue_rewatch_pids(UNIT(s));
+ }
+
+ if (state != old_state)
+ log_debug("%s changed %s -> %s", UNIT(s)->id, scope_state_to_string(old_state), scope_state_to_string(state));
+
+ unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static int scope_add_default_dependencies(Scope *s) {
+ int r;
+
+ assert(s);
+
+ if (!UNIT(s)->default_dependencies)
+ return 0;
+
+ /* Make sure scopes are unloaded on shutdown */
+ r = unit_add_two_dependencies_by_name(
+ UNIT(s),
+ UNIT_BEFORE, UNIT_CONFLICTS,
+ SPECIAL_SHUTDOWN_TARGET, true,
+ UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int scope_verify(Scope *s) {
+ assert(s);
+
+ if (UNIT(s)->load_state != UNIT_LOADED)
+ return 0;
+
+ if (set_isempty(UNIT(s)->pids) &&
+ !MANAGER_IS_RELOADING(UNIT(s)->manager) &&
+ !unit_has_name(UNIT(s), SPECIAL_INIT_SCOPE)) {
+ log_unit_error(UNIT(s), "Scope has no PIDs. Refusing.");
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int scope_load_init_scope(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_INIT_SCOPE))
+ return 0;
+
+ u->transient = true;
+ u->perpetual = true;
+
+ /* init.scope is a bit special, as it has to stick around forever. Because of its special semantics we
+ * synthesize it here, instead of relying on the unit file on disk. */
+
+ u->default_dependencies = false;
+
+ /* Prettify things, if we can. */
+ if (!u->description)
+ u->description = strdup("System and Service Manager");
+ if (!u->documentation)
+ (void) strv_extend(&u->documentation, "man:systemd(1)");
+
+ return 1;
+}
+
+static int scope_load(Unit *u) {
+ Scope *s = SCOPE(u);
+ int r;
+
+ assert(s);
+ assert(u->load_state == UNIT_STUB);
+
+ if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
+ /* Refuse to load non-transient scope units, but allow them while reloading. */
+ return -ENOENT;
+
+ r = scope_load_init_scope(u);
+ if (r < 0)
+ return r;
+ r = unit_load_fragment_and_dropin_optional(u);
+ if (r < 0)
+ return r;
+
+ if (u->load_state == UNIT_LOADED) {
+ r = unit_patch_contexts(u);
+ if (r < 0)
+ return r;
+
+ r = unit_set_default_slice(u);
+ if (r < 0)
+ return r;
+
+ r = scope_add_default_dependencies(s);
+ if (r < 0)
+ return r;
+ }
+
+ return scope_verify(s);
+}
+
+static int scope_coldplug(Unit *u) {
+ Scope *s = SCOPE(u);
+ int r;
+
+ assert(s);
+ assert(s->state == SCOPE_DEAD);
+
+ if (s->deserialized_state == s->state)
+ return 0;
+
+ if (IN_SET(s->deserialized_state, SCOPE_STOP_SIGKILL, SCOPE_STOP_SIGTERM)) {
+ r = scope_arm_timer(s, usec_add(u->state_change_timestamp.monotonic, s->timeout_stop_usec));
+ if (r < 0)
+ return r;
+ }
+
+ if (!IN_SET(s->deserialized_state, SCOPE_DEAD, SCOPE_FAILED))
+ (void) unit_enqueue_rewatch_pids(u);
+
+ bus_scope_track_controller(s);
+
+ scope_set_state(s, s->deserialized_state);
+ return 0;
+}
+
+static void scope_dump(Unit *u, FILE *f, const char *prefix) {
+ Scope *s = SCOPE(u);
+
+ assert(s);
+ assert(f);
+
+ fprintf(f,
+ "%sScope State: %s\n"
+ "%sResult: %s\n",
+ prefix, scope_state_to_string(s->state),
+ prefix, scope_result_to_string(s->result));
+
+ cgroup_context_dump(&s->cgroup_context, f, prefix);
+ kill_context_dump(&s->kill_context, f, prefix);
+}
+
+static void scope_enter_dead(Scope *s, ScopeResult f) {
+ assert(s);
+
+ if (s->result == SCOPE_SUCCESS)
+ s->result = f;
+
+ unit_log_result(UNIT(s), s->result == SCOPE_SUCCESS, scope_result_to_string(s->result));
+ scope_set_state(s, s->result != SCOPE_SUCCESS ? SCOPE_FAILED : SCOPE_DEAD);
+}
+
+static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
+ bool skip_signal = false;
+ int r;
+
+ assert(s);
+
+ if (s->result == SCOPE_SUCCESS)
+ s->result = f;
+
+ /* Before sending any signal, make sure we track all members of this cgroup */
+ (void) unit_watch_all_pids(UNIT(s));
+
+ /* Also, enqueue a job that we recheck all our PIDs a bit later, given that it's likely some processes have
+ * died now */
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+
+ /* If we have a controller set let's ask the controller nicely to terminate the scope, instead of us going
+ * directly into SIGTERM berserk mode */
+ if (state == SCOPE_STOP_SIGTERM)
+ skip_signal = bus_scope_send_request_stop(s) > 0;
+
+ if (skip_signal)
+ r = 1; /* wait */
+ else {
+ r = unit_kill_context(
+ UNIT(s),
+ &s->kill_context,
+ state != SCOPE_STOP_SIGTERM ? KILL_KILL :
+ s->was_abandoned ? KILL_TERMINATE_AND_LOG :
+ KILL_TERMINATE,
+ -1, -1, false);
+ if (r < 0)
+ goto fail;
+ }
+
+ if (r > 0) {
+ r = scope_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_stop_usec));
+ if (r < 0)
+ goto fail;
+
+ scope_set_state(s, state);
+ } else if (state == SCOPE_STOP_SIGTERM)
+ scope_enter_signal(s, SCOPE_STOP_SIGKILL, SCOPE_SUCCESS);
+ else
+ scope_enter_dead(s, SCOPE_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
+
+ scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
+}
+
+static int scope_start(Unit *u) {
+ Scope *s = SCOPE(u);
+ int r;
+
+ assert(s);
+
+ if (unit_has_name(u, SPECIAL_INIT_SCOPE))
+ return -EPERM;
+
+ if (s->state == SCOPE_FAILED)
+ return -EPERM;
+
+ /* We can't fulfill this right now, please try again later */
+ if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+ return -EAGAIN;
+
+ assert(s->state == SCOPE_DEAD);
+
+ if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
+ return -ENOENT;
+
+ (void) bus_scope_track_controller(s);
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ (void) unit_realize_cgroup(u);
+ (void) unit_reset_cpu_accounting(u);
+ (void) unit_reset_ip_accounting(u);
+
+ unit_export_state_files(UNIT(s));
+
+ r = unit_attach_pids_to_cgroup(u, UNIT(s)->pids, NULL);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to add PIDs to scope's control group: %m");
+ scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
+ return r;
+ }
+
+ s->result = SCOPE_SUCCESS;
+
+ scope_set_state(s, SCOPE_RUNNING);
+
+ /* Start watching the PIDs currently in the scope */
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+ return 1;
+}
+
+static int scope_stop(Unit *u) {
+ Scope *s = SCOPE(u);
+
+ assert(s);
+
+ if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+ return 0;
+
+ assert(IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED));
+
+ scope_enter_signal(s, SCOPE_STOP_SIGTERM, SCOPE_SUCCESS);
+ return 1;
+}
+
+static void scope_reset_failed(Unit *u) {
+ Scope *s = SCOPE(u);
+
+ assert(s);
+
+ if (s->state == SCOPE_FAILED)
+ scope_set_state(s, SCOPE_DEAD);
+
+ s->result = SCOPE_SUCCESS;
+}
+
+static int scope_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
+ return unit_kill_common(u, who, signo, -1, -1, error);
+}
+
+static int scope_get_timeout(Unit *u, usec_t *timeout) {
+ Scope *s = SCOPE(u);
+ usec_t t;
+ int r;
+
+ if (!s->timer_event_source)
+ return 0;
+
+ r = sd_event_source_get_time(s->timer_event_source, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY)
+ return 0;
+
+ *timeout = t;
+ return 1;
+}
+
+static int scope_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Scope *s = SCOPE(u);
+
+ assert(s);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", scope_state_to_string(s->state));
+ (void) serialize_bool(f, "was-abandoned", s->was_abandoned);
+
+ if (s->controller)
+ (void) serialize_item(f, "controller", s->controller);
+
+ return 0;
+}
+
+static int scope_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Scope *s = SCOPE(u);
+ int r;
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ ScopeState state;
+
+ state = scope_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ s->deserialized_state = state;
+
+ } else if (streq(key, "was-abandoned")) {
+ int k;
+
+ k = parse_boolean(value);
+ if (k < 0)
+ log_unit_debug(u, "Failed to parse boolean value: %s", value);
+ else
+ s->was_abandoned = k;
+ } else if (streq(key, "controller")) {
+
+ r = free_and_strdup(&s->controller, value);
+ if (r < 0)
+ return log_oom();
+
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+static void scope_notify_cgroup_empty_event(Unit *u) {
+ Scope *s = SCOPE(u);
+ assert(u);
+
+ log_unit_debug(u, "cgroup is empty");
+
+ if (IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+ scope_enter_dead(s, SCOPE_SUCCESS);
+}
+
+static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+ assert(u);
+
+ /* If we get a SIGCHLD event for one of the processes we were interested in, then we look for others to
+ * watch, under the assumption that we'll sooner or later get a SIGCHLD for them, as the original
+ * process we watched was probably the parent of them, and they are hence now our children. */
+
+ (void) unit_enqueue_rewatch_pids(u);
+}
+
+static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
+ Scope *s = SCOPE(userdata);
+
+ assert(s);
+ assert(s->timer_event_source == source);
+
+ switch (s->state) {
+
+ case SCOPE_STOP_SIGTERM:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "Stopping timed out. Killing.");
+ scope_enter_signal(s, SCOPE_STOP_SIGKILL, SCOPE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "Stopping timed out. Skipping SIGKILL.");
+ scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
+ }
+
+ break;
+
+ case SCOPE_STOP_SIGKILL:
+ log_unit_warning(UNIT(s), "Still around after SIGKILL. Ignoring.");
+ scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
+ break;
+
+ default:
+ assert_not_reached("Timeout at wrong time.");
+ }
+
+ return 0;
+}
+
+int scope_abandon(Scope *s) {
+ assert(s);
+
+ if (unit_has_name(UNIT(s), SPECIAL_INIT_SCOPE))
+ return -EPERM;
+
+ if (!IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED))
+ return -ESTALE;
+
+ s->was_abandoned = true;
+
+ s->controller = mfree(s->controller);
+ s->controller_track = sd_bus_track_unref(s->controller_track);
+
+ scope_set_state(s, SCOPE_ABANDONED);
+
+ /* The client is no longer watching the remaining processes, so let's step in here, under the assumption that
+ * the remaining processes will be sooner or later reassigned to us as parent. */
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+
+ return 0;
+}
+
+_pure_ static UnitActiveState scope_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[SCOPE(u)->state];
+}
+
+_pure_ static const char *scope_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return scope_state_to_string(SCOPE(u)->state);
+}
+
+static void scope_enumerate_perpetual(Manager *m) {
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ /* Let's unconditionally add the "init.scope" special unit
+ * that encapsulates PID 1. Note that PID 1 already is in the
+ * cgroup for this, we hence just need to allocate the object
+ * for it and that's it. */
+
+ u = manager_get_unit(m, SPECIAL_INIT_SCOPE);
+ if (!u) {
+ r = unit_new_for_name(m, sizeof(Scope), SPECIAL_INIT_SCOPE, &u);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate the special " SPECIAL_INIT_SCOPE " unit: %m");
+ return;
+ }
+ }
+
+ u->transient = true;
+ u->perpetual = true;
+ SCOPE(u)->deserialized_state = SCOPE_RUNNING;
+
+ unit_add_to_load_queue(u);
+ unit_add_to_dbus_queue(u);
+}
+
+static const char* const scope_result_table[_SCOPE_RESULT_MAX] = {
+ [SCOPE_SUCCESS] = "success",
+ [SCOPE_FAILURE_RESOURCES] = "resources",
+ [SCOPE_FAILURE_TIMEOUT] = "timeout",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(scope_result, ScopeResult);
+
+const UnitVTable scope_vtable = {
+ .object_size = sizeof(Scope),
+ .cgroup_context_offset = offsetof(Scope, cgroup_context),
+ .kill_context_offset = offsetof(Scope, kill_context),
+
+ .sections =
+ "Unit\0"
+ "Scope\0"
+ "Install\0",
+ .private_section = "Scope",
+
+ .can_transient = true,
+ .can_delegate = true,
+ .once_only = true,
+
+ .init = scope_init,
+ .load = scope_load,
+ .done = scope_done,
+
+ .coldplug = scope_coldplug,
+
+ .dump = scope_dump,
+
+ .start = scope_start,
+ .stop = scope_stop,
+
+ .kill = scope_kill,
+
+ .get_timeout = scope_get_timeout,
+
+ .serialize = scope_serialize,
+ .deserialize_item = scope_deserialize_item,
+
+ .active_state = scope_active_state,
+ .sub_state_to_string = scope_sub_state_to_string,
+
+ .sigchld_event = scope_sigchld_event,
+
+ .reset_failed = scope_reset_failed,
+
+ .notify_cgroup_empty = scope_notify_cgroup_empty_event,
+
+ .bus_vtable = bus_scope_vtable,
+ .bus_set_property = bus_scope_set_property,
+ .bus_commit_properties = bus_scope_commit_properties,
+
+ .enumerate_perpetual = scope_enumerate_perpetual,
+};
diff --git a/src/core/scope.h b/src/core/scope.h
new file mode 100644
index 0000000..c38afb5
--- /dev/null
+++ b/src/core/scope.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Scope Scope;
+
+#include "cgroup.h"
+#include "kill.h"
+#include "unit.h"
+
+typedef enum ScopeResult {
+ SCOPE_SUCCESS,
+ SCOPE_FAILURE_RESOURCES,
+ SCOPE_FAILURE_TIMEOUT,
+ _SCOPE_RESULT_MAX,
+ _SCOPE_RESULT_INVALID = -1
+} ScopeResult;
+
+struct Scope {
+ Unit meta;
+
+ CGroupContext cgroup_context;
+ KillContext kill_context;
+
+ ScopeState state, deserialized_state;
+ ScopeResult result;
+
+ usec_t timeout_stop_usec;
+
+ char *controller;
+ sd_bus_track *controller_track;
+
+ bool was_abandoned;
+
+ sd_event_source *timer_event_source;
+};
+
+extern const UnitVTable scope_vtable;
+
+int scope_abandon(Scope *s);
+
+const char* scope_result_to_string(ScopeResult i) _const_;
+ScopeResult scope_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(SCOPE, Scope);
diff --git a/src/core/selinux-access.c b/src/core/selinux-access.c
new file mode 100644
index 0000000..0c6d885
--- /dev/null
+++ b/src/core/selinux-access.c
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "selinux-access.h"
+
+#if HAVE_SELINUX
+
+#include <errno.h>
+#include <selinux/avc.h>
+#include <selinux/selinux.h>
+#include <stdio.h>
+#if HAVE_AUDIT
+#include <libaudit.h>
+#endif
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "audit-fd.h"
+#include "bus-util.h"
+#include "log.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "util.h"
+
+static bool initialized = false;
+
+struct audit_info {
+ sd_bus_creds *creds;
+ const char *path;
+ const char *cmdline;
+};
+
+/*
+ Any time an access gets denied this callback will be called
+ with the audit data. We then need to just copy the audit data into the msgbuf.
+*/
+static int audit_callback(
+ void *auditdata,
+ security_class_t cls,
+ char *msgbuf,
+ size_t msgbufsize) {
+
+ const struct audit_info *audit = auditdata;
+ uid_t uid = 0, login_uid = 0;
+ gid_t gid = 0;
+ char login_uid_buf[DECIMAL_STR_MAX(uid_t) + 1] = "n/a";
+ char uid_buf[DECIMAL_STR_MAX(uid_t) + 1] = "n/a";
+ char gid_buf[DECIMAL_STR_MAX(gid_t) + 1] = "n/a";
+
+ if (sd_bus_creds_get_audit_login_uid(audit->creds, &login_uid) >= 0)
+ xsprintf(login_uid_buf, UID_FMT, login_uid);
+ if (sd_bus_creds_get_euid(audit->creds, &uid) >= 0)
+ xsprintf(uid_buf, UID_FMT, uid);
+ if (sd_bus_creds_get_egid(audit->creds, &gid) >= 0)
+ xsprintf(gid_buf, GID_FMT, gid);
+
+ snprintf(msgbuf, msgbufsize,
+ "auid=%s uid=%s gid=%s%s%s%s%s%s%s",
+ login_uid_buf, uid_buf, gid_buf,
+ audit->path ? " path=\"" : "", strempty(audit->path), audit->path ? "\"" : "",
+ audit->cmdline ? " cmdline=\"" : "", strempty(audit->cmdline), audit->cmdline ? "\"" : "");
+
+ return 0;
+}
+
+static int callback_type_to_priority(int type) {
+ switch(type) {
+
+ case SELINUX_ERROR:
+ return LOG_ERR;
+
+ case SELINUX_WARNING:
+ return LOG_WARNING;
+
+ case SELINUX_INFO:
+ return LOG_INFO;
+
+ case SELINUX_AVC:
+ default:
+ return LOG_NOTICE;
+ }
+}
+
+/*
+ libselinux uses this callback when access gets denied or other
+ events happen. If audit is turned on, messages will be reported
+ using audit netlink, otherwise they will be logged using the usual
+ channels.
+
+ Code copied from dbus and modified.
+*/
+_printf_(2, 3) static int log_callback(int type, const char *fmt, ...) {
+ va_list ap;
+ const char *fmt2;
+
+#if HAVE_AUDIT
+ int fd;
+
+ fd = get_audit_fd();
+
+ if (fd >= 0) {
+ _cleanup_free_ char *buf = NULL;
+ int r;
+
+ va_start(ap, fmt);
+ r = vasprintf(&buf, fmt, ap);
+ va_end(ap);
+
+ if (r >= 0) {
+ audit_log_user_avc_message(fd, AUDIT_USER_AVC, buf, NULL, NULL, NULL, 0);
+ return 0;
+ }
+ }
+#endif
+
+ fmt2 = strjoina("selinux: ", fmt);
+
+ va_start(ap, fmt);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+ log_internalv(LOG_AUTH | callback_type_to_priority(type),
+ 0, __FILE__, __LINE__, __FUNCTION__,
+ fmt2, ap);
+#pragma GCC diagnostic pop
+ va_end(ap);
+
+ return 0;
+}
+
+static int access_init(sd_bus_error *error) {
+
+ if (!mac_selinux_use())
+ return 0;
+
+ if (initialized)
+ return 1;
+
+ if (avc_open(NULL, 0) != 0) {
+ int enforce, saved_errno = errno;
+
+ enforce = security_getenforce();
+ log_full_errno(enforce != 0 ? LOG_ERR : LOG_WARNING, saved_errno, "Failed to open the SELinux AVC: %m");
+
+ /* If enforcement isn't on, then let's suppress this
+ * error, and just don't do any AVC checks. The
+ * warning we printed is hence all the admin will
+ * see. */
+ if (enforce == 0)
+ return 0;
+
+ /* Return an access denied error, if we couldn't load
+ * the AVC but enforcing mode was on, or we couldn't
+ * determine whether it is one. */
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Failed to open the SELinux AVC: %s", strerror(saved_errno));
+ }
+
+ selinux_set_callback(SELINUX_CB_AUDIT, (union selinux_callback) audit_callback);
+ selinux_set_callback(SELINUX_CB_LOG, (union selinux_callback) log_callback);
+
+ initialized = true;
+ return 1;
+}
+
+/*
+ This function communicates with the kernel to check whether or not it should
+ allow the access.
+ If the machine is in permissive mode it will return ok. Audit messages will
+ still be generated if the access would be denied in enforcing mode.
+*/
+int mac_selinux_generic_access_check(
+ sd_bus_message *message,
+ const char *path,
+ const char *permission,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ const char *tclass = NULL, *scon = NULL;
+ struct audit_info audit_info = {};
+ _cleanup_free_ char *cl = NULL;
+ char *fcon = NULL;
+ char **cmdline = NULL;
+ int r = 0;
+
+ assert(message);
+ assert(permission);
+ assert(error);
+
+ r = access_init(error);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_query_sender_creds(
+ message,
+ SD_BUS_CREDS_PID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_EGID|
+ SD_BUS_CREDS_CMDLINE|SD_BUS_CREDS_AUDIT_LOGIN_UID|
+ SD_BUS_CREDS_SELINUX_CONTEXT|
+ SD_BUS_CREDS_AUGMENT /* get more bits from /proc */,
+ &creds);
+ if (r < 0)
+ goto finish;
+
+ /* The SELinux context is something we really should have
+ * gotten directly from the message or sender, and not be an
+ * augmented field. If it was augmented we cannot use it for
+ * authorization, since this is racy and vulnerable. Let's add
+ * an extra check, just in case, even though this really
+ * shouldn't be possible. */
+ assert_return((sd_bus_creds_get_augmented_mask(creds) & SD_BUS_CREDS_SELINUX_CONTEXT) == 0, -EPERM);
+
+ r = sd_bus_creds_get_selinux_context(creds, &scon);
+ if (r < 0)
+ goto finish;
+
+ if (path) {
+ /* Get the file context of the unit file */
+
+ r = getfilecon_raw(path, &fcon);
+ if (r < 0) {
+ r = sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Failed to get file context on %s.", path);
+ goto finish;
+ }
+
+ tclass = "service";
+ } else {
+ r = getcon_raw(&fcon);
+ if (r < 0) {
+ r = sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Failed to get current context.");
+ goto finish;
+ }
+
+ tclass = "system";
+ }
+
+ sd_bus_creds_get_cmdline(creds, &cmdline);
+ cl = strv_join(cmdline, " ");
+
+ audit_info.creds = creds;
+ audit_info.path = path;
+ audit_info.cmdline = cl;
+
+ r = selinux_check_access(scon, fcon, tclass, permission, &audit_info);
+ if (r < 0)
+ r = sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "SELinux policy denies access.");
+
+ log_debug("SELinux access check scon=%s tcon=%s tclass=%s perm=%s path=%s cmdline=%s: %i", scon, fcon, tclass, permission, path, cl, r);
+
+finish:
+ freecon(fcon);
+
+ if (r < 0 && security_getenforce() != 1) {
+ sd_bus_error_free(error);
+ r = 0;
+ }
+
+ return r;
+}
+
+#else
+
+int mac_selinux_generic_access_check(
+ sd_bus_message *message,
+ const char *path,
+ const char *permission,
+ sd_bus_error *error) {
+
+ return 0;
+}
+
+#endif
diff --git a/src/core/selinux-access.h b/src/core/selinux-access.h
new file mode 100644
index 0000000..1e75930
--- /dev/null
+++ b/src/core/selinux-access.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-util.h"
+#include "manager.h"
+
+int mac_selinux_generic_access_check(sd_bus_message *message, const char *path, const char *permission, sd_bus_error *error);
+
+#if HAVE_SELINUX
+
+#define mac_selinux_access_check(message, permission, error) \
+ mac_selinux_generic_access_check((message), NULL, (permission), (error))
+
+#define mac_selinux_unit_access_check(unit, message, permission, error) \
+ mac_selinux_generic_access_check((message), unit_label_path(unit), (permission), (error))
+
+#else
+
+#define mac_selinux_access_check(message, permission, error) 0
+#define mac_selinux_unit_access_check(unit, message, permission, error) 0
+
+#endif
diff --git a/src/core/selinux-setup.c b/src/core/selinux-setup.c
new file mode 100644
index 0000000..bac1aa3
--- /dev/null
+++ b/src/core/selinux-setup.c
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+
+#include "log.h"
+#include "macro.h"
+#include "selinux-setup.h"
+#include "selinux-util.h"
+#include "string-util.h"
+#include "util.h"
+
+#if HAVE_SELINUX
+_printf_(2,3)
+static int null_log(int type, const char *fmt, ...) {
+ return 0;
+}
+#endif
+
+int mac_selinux_setup(bool *loaded_policy) {
+
+#if HAVE_SELINUX
+ int enforce = 0;
+ usec_t before_load, after_load;
+ char *con;
+ int r;
+ static const union selinux_callback cb = {
+ .func_log = null_log,
+ };
+
+ bool initialized = false;
+
+ assert(loaded_policy);
+
+ /* Turn off all of SELinux' own logging, we want to do that */
+ selinux_set_callback(SELINUX_CB_LOG, cb);
+
+ /* Don't load policy in the initrd if we don't appear to have
+ * it. For the real root, we check below if we've already
+ * loaded policy, and return gracefully.
+ */
+ if (in_initrd() && access(selinux_path(), F_OK) < 0)
+ return 0;
+
+ /* Already initialized by somebody else? */
+ r = getcon_raw(&con);
+ if (r == 0) {
+ initialized = !streq(con, "kernel");
+ freecon(con);
+ }
+
+ /* Make sure we have no fds open while loading the policy and
+ * transitioning */
+ log_close();
+
+ /* Now load the policy */
+ before_load = now(CLOCK_MONOTONIC);
+ r = selinux_init_load_policy(&enforce);
+ if (r == 0) {
+ _cleanup_(mac_selinux_freep) char *label = NULL;
+ char timespan[FORMAT_TIMESPAN_MAX];
+
+ mac_selinux_retest();
+
+ /* Transition to the new context */
+ r = mac_selinux_get_create_label_from_exe(SYSTEMD_BINARY_PATH, &label);
+ if (r < 0 || !label) {
+ log_open();
+ log_error("Failed to compute init label, ignoring.");
+ } else {
+ r = setcon_raw(label);
+
+ log_open();
+ if (r < 0)
+ log_error("Failed to transition into init label '%s', ignoring.", label);
+ }
+
+ after_load = now(CLOCK_MONOTONIC);
+
+ log_info("Successfully loaded SELinux policy in %s.",
+ format_timespan(timespan, sizeof(timespan), after_load - before_load, 0));
+
+ *loaded_policy = true;
+
+ } else {
+ log_open();
+
+ if (enforce > 0) {
+ if (!initialized) {
+ log_emergency("Failed to load SELinux policy.");
+ return -EIO;
+ }
+
+ log_warning("Failed to load new SELinux policy. Continuing with old policy.");
+ } else
+ log_debug("Unable to load SELinux policy. Ignoring.");
+ }
+#endif
+
+ return 0;
+}
diff --git a/src/core/selinux-setup.h b/src/core/selinux-setup.h
new file mode 100644
index 0000000..ad0d4f6
--- /dev/null
+++ b/src/core/selinux-setup.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+int mac_selinux_setup(bool *loaded_policy);
diff --git a/src/core/service.c b/src/core/service.c
new file mode 100644
index 0000000..324dcf2
--- /dev/null
+++ b/src/core/service.c
@@ -0,0 +1,4161 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "async.h"
+#include "bus-error.h"
+#include "bus-kernel.h"
+#include "bus-util.h"
+#include "dbus-service.h"
+#include "dbus-unit.h"
+#include "def.h"
+#include "env-util.h"
+#include "escape.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "load-dropin.h"
+#include "load-fragment.h"
+#include "log.h"
+#include "manager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "service.h"
+#include "signal-util.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit.h"
+#include "utf8.h"
+#include "util.h"
+
+static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
+ [SERVICE_DEAD] = UNIT_INACTIVE,
+ [SERVICE_START_PRE] = UNIT_ACTIVATING,
+ [SERVICE_START] = UNIT_ACTIVATING,
+ [SERVICE_START_POST] = UNIT_ACTIVATING,
+ [SERVICE_RUNNING] = UNIT_ACTIVE,
+ [SERVICE_EXITED] = UNIT_ACTIVE,
+ [SERVICE_RELOAD] = UNIT_RELOADING,
+ [SERVICE_STOP] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_WATCHDOG] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_POST] = UNIT_DEACTIVATING,
+ [SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
+ [SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
+ [SERVICE_FAILED] = UNIT_FAILED,
+ [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING
+};
+
+/* For Type=idle we never want to delay any other jobs, hence we
+ * consider idle jobs active as soon as we start working on them */
+static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] = {
+ [SERVICE_DEAD] = UNIT_INACTIVE,
+ [SERVICE_START_PRE] = UNIT_ACTIVE,
+ [SERVICE_START] = UNIT_ACTIVE,
+ [SERVICE_START_POST] = UNIT_ACTIVE,
+ [SERVICE_RUNNING] = UNIT_ACTIVE,
+ [SERVICE_EXITED] = UNIT_ACTIVE,
+ [SERVICE_RELOAD] = UNIT_RELOADING,
+ [SERVICE_STOP] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_WATCHDOG] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_POST] = UNIT_DEACTIVATING,
+ [SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
+ [SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
+ [SERVICE_FAILED] = UNIT_FAILED,
+ [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING
+};
+
+static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata);
+static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
+static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void *userdata);
+static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t events, void *userdata);
+
+static void service_enter_signal(Service *s, ServiceState state, ServiceResult f);
+static void service_enter_reload_by_notify(Service *s);
+
+static void service_init(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ s->timeout_start_usec = u->manager->default_timeout_start_usec;
+ s->timeout_stop_usec = u->manager->default_timeout_stop_usec;
+ s->restart_usec = u->manager->default_restart_usec;
+ s->runtime_max_usec = USEC_INFINITY;
+ s->type = _SERVICE_TYPE_INVALID;
+ s->socket_fd = -1;
+ s->stdin_fd = s->stdout_fd = s->stderr_fd = -1;
+ s->guess_main_pid = true;
+
+ s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
+
+ s->exec_context.keyring_mode = MANAGER_IS_SYSTEM(u->manager) ?
+ EXEC_KEYRING_PRIVATE : EXEC_KEYRING_INHERIT;
+
+ s->watchdog_original_usec = USEC_INFINITY;
+}
+
+static void service_unwatch_control_pid(Service *s) {
+ assert(s);
+
+ if (s->control_pid <= 0)
+ return;
+
+ unit_unwatch_pid(UNIT(s), s->control_pid);
+ s->control_pid = 0;
+}
+
+static void service_unwatch_main_pid(Service *s) {
+ assert(s);
+
+ if (s->main_pid <= 0)
+ return;
+
+ unit_unwatch_pid(UNIT(s), s->main_pid);
+ s->main_pid = 0;
+}
+
+static void service_unwatch_pid_file(Service *s) {
+ if (!s->pid_file_pathspec)
+ return;
+
+ log_unit_debug(UNIT(s), "Stopping watch for PID file %s", s->pid_file_pathspec->path);
+ path_spec_unwatch(s->pid_file_pathspec);
+ path_spec_done(s->pid_file_pathspec);
+ s->pid_file_pathspec = mfree(s->pid_file_pathspec);
+}
+
+static int service_set_main_pid(Service *s, pid_t pid) {
+ pid_t ppid;
+
+ assert(s);
+
+ if (pid <= 1)
+ return -EINVAL;
+
+ if (pid == getpid_cached())
+ return -EINVAL;
+
+ if (s->main_pid == pid && s->main_pid_known)
+ return 0;
+
+ if (s->main_pid != pid) {
+ service_unwatch_main_pid(s);
+ exec_status_start(&s->main_exec_status, pid);
+ }
+
+ s->main_pid = pid;
+ s->main_pid_known = true;
+
+ if (get_process_ppid(pid, &ppid) >= 0 && ppid != getpid_cached()) {
+ log_unit_warning(UNIT(s), "Supervising process "PID_FMT" which is not our child. We'll most likely not notice when it exits.", pid);
+ s->main_pid_alien = true;
+ } else
+ s->main_pid_alien = false;
+
+ return 0;
+}
+
+void service_close_socket_fd(Service *s) {
+ assert(s);
+
+ /* Undo the effect of service_set_socket_fd(). */
+
+ s->socket_fd = asynchronous_close(s->socket_fd);
+
+ if (UNIT_ISSET(s->accept_socket)) {
+ socket_connection_unref(SOCKET(UNIT_DEREF(s->accept_socket)));
+ unit_ref_unset(&s->accept_socket);
+ }
+}
+
+static void service_stop_watchdog(Service *s) {
+ assert(s);
+
+ s->watchdog_event_source = sd_event_source_unref(s->watchdog_event_source);
+ s->watchdog_timestamp = DUAL_TIMESTAMP_NULL;
+}
+
+static usec_t service_get_watchdog_usec(Service *s) {
+ assert(s);
+
+ if (s->watchdog_override_enable)
+ return s->watchdog_override_usec;
+
+ return s->watchdog_original_usec;
+}
+
+static void service_start_watchdog(Service *s) {
+ usec_t watchdog_usec;
+ int r;
+
+ assert(s);
+
+ watchdog_usec = service_get_watchdog_usec(s);
+ if (IN_SET(watchdog_usec, 0, USEC_INFINITY)) {
+ service_stop_watchdog(s);
+ return;
+ }
+
+ if (s->watchdog_event_source) {
+ r = sd_event_source_set_time(s->watchdog_event_source, usec_add(s->watchdog_timestamp.monotonic, watchdog_usec));
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to reset watchdog timer: %m");
+ return;
+ }
+
+ r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ONESHOT);
+ } else {
+ r = sd_event_add_time(
+ UNIT(s)->manager->event,
+ &s->watchdog_event_source,
+ CLOCK_MONOTONIC,
+ usec_add(s->watchdog_timestamp.monotonic, watchdog_usec), 0,
+ service_dispatch_watchdog, s);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to add watchdog timer: %m");
+ return;
+ }
+
+ (void) sd_event_source_set_description(s->watchdog_event_source, "service-watchdog");
+
+ /* Let's process everything else which might be a sign
+ * of living before we consider a service died. */
+ r = sd_event_source_set_priority(s->watchdog_event_source, SD_EVENT_PRIORITY_IDLE);
+ }
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "Failed to install watchdog timer: %m");
+}
+
+static void service_extend_event_source_timeout(Service *s, sd_event_source *source, usec_t extended) {
+ usec_t current;
+ int r;
+
+ assert(s);
+
+ /* Extends the specified event source timer to at least the specified time, unless it is already later
+ * anyway. */
+
+ if (!source)
+ return;
+
+ r = sd_event_source_get_time(source, &current);
+ if (r < 0) {
+ const char *desc;
+ (void) sd_event_source_get_description(s->timer_event_source, &desc);
+ log_unit_warning_errno(UNIT(s), r, "Failed to retrieve timeout time for event source '%s', ignoring: %m", strna(desc));
+ return;
+ }
+
+ if (current >= extended) /* Current timeout is already longer, ignore this. */
+ return;
+
+ r = sd_event_source_set_time(source, extended);
+ if (r < 0) {
+ const char *desc;
+ (void) sd_event_source_get_description(s->timer_event_source, &desc);
+ log_unit_warning_errno(UNIT(s), r, "Failed to set timeout time for even source '%s', ignoring %m", strna(desc));
+ }
+}
+
+static void service_extend_timeout(Service *s, usec_t extend_timeout_usec) {
+ usec_t extended;
+
+ assert(s);
+
+ if (IN_SET(extend_timeout_usec, 0, USEC_INFINITY))
+ return;
+
+ extended = usec_add(now(CLOCK_MONOTONIC), extend_timeout_usec);
+
+ service_extend_event_source_timeout(s, s->timer_event_source, extended);
+ service_extend_event_source_timeout(s, s->watchdog_event_source, extended);
+}
+
+static void service_reset_watchdog(Service *s) {
+ assert(s);
+
+ dual_timestamp_get(&s->watchdog_timestamp);
+ service_start_watchdog(s);
+}
+
+static void service_override_watchdog_timeout(Service *s, usec_t watchdog_override_usec) {
+ assert(s);
+
+ s->watchdog_override_enable = true;
+ s->watchdog_override_usec = watchdog_override_usec;
+ service_reset_watchdog(s);
+
+ log_unit_debug(UNIT(s), "watchdog_usec="USEC_FMT, s->watchdog_usec);
+ log_unit_debug(UNIT(s), "watchdog_override_usec="USEC_FMT, s->watchdog_override_usec);
+}
+
+static void service_fd_store_unlink(ServiceFDStore *fs) {
+
+ if (!fs)
+ return;
+
+ if (fs->service) {
+ assert(fs->service->n_fd_store > 0);
+ LIST_REMOVE(fd_store, fs->service->fd_store, fs);
+ fs->service->n_fd_store--;
+ }
+
+ if (fs->event_source) {
+ sd_event_source_set_enabled(fs->event_source, SD_EVENT_OFF);
+ sd_event_source_unref(fs->event_source);
+ }
+
+ free(fs->fdname);
+ safe_close(fs->fd);
+ free(fs);
+}
+
+static void service_release_fd_store(Service *s) {
+ assert(s);
+
+ if (s->n_keep_fd_store > 0)
+ return;
+
+ log_unit_debug(UNIT(s), "Releasing all stored fds");
+ while (s->fd_store)
+ service_fd_store_unlink(s->fd_store);
+
+ assert(s->n_fd_store == 0);
+}
+
+static void service_release_resources(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0)
+ return;
+
+ log_unit_debug(u, "Releasing resources.");
+
+ s->stdin_fd = safe_close(s->stdin_fd);
+ s->stdout_fd = safe_close(s->stdout_fd);
+ s->stderr_fd = safe_close(s->stderr_fd);
+
+ service_release_fd_store(s);
+}
+
+static void service_done(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ s->pid_file = mfree(s->pid_file);
+ s->status_text = mfree(s->status_text);
+
+ s->exec_runtime = exec_runtime_unref(s->exec_runtime, false);
+ exec_command_free_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX);
+ s->control_command = NULL;
+ s->main_command = NULL;
+
+ dynamic_creds_unref(&s->dynamic_creds);
+
+ exit_status_set_free(&s->restart_prevent_status);
+ exit_status_set_free(&s->restart_force_status);
+ exit_status_set_free(&s->success_status);
+
+ /* This will leak a process, but at least no memory or any of
+ * our resources */
+ service_unwatch_main_pid(s);
+ service_unwatch_control_pid(s);
+ service_unwatch_pid_file(s);
+
+ if (s->bus_name) {
+ unit_unwatch_bus_name(u, s->bus_name);
+ s->bus_name = mfree(s->bus_name);
+ }
+
+ s->bus_name_owner = mfree(s->bus_name_owner);
+
+ s->usb_function_descriptors = mfree(s->usb_function_descriptors);
+ s->usb_function_strings = mfree(s->usb_function_strings);
+
+ service_close_socket_fd(s);
+ s->peer = socket_peer_unref(s->peer);
+
+ unit_ref_unset(&s->accept_socket);
+
+ service_stop_watchdog(s);
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
+
+ service_release_resources(u);
+}
+
+static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ ServiceFDStore *fs = userdata;
+
+ assert(e);
+ assert(fs);
+
+ /* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */
+ log_unit_debug(UNIT(fs->service),
+ "Received %s on stored fd %d (%s), closing.",
+ revents & EPOLLERR ? "EPOLLERR" : "EPOLLHUP",
+ fs->fd, strna(fs->fdname));
+ service_fd_store_unlink(fs);
+ return 0;
+}
+
+static int service_add_fd_store(Service *s, int fd, const char *name) {
+ ServiceFDStore *fs;
+ int r;
+
+ /* fd is always consumed if we return >= 0 */
+
+ assert(s);
+ assert(fd >= 0);
+
+ if (s->n_fd_store >= s->n_fd_store_max)
+ return -EXFULL; /* Our store is full.
+ * Use this errno rather than E[NM]FILE to distinguish from
+ * the case where systemd itself hits the file limit. */
+
+ LIST_FOREACH(fd_store, fs, s->fd_store) {
+ r = same_fd(fs->fd, fd);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ safe_close(fd);
+ return 0; /* fd already included */
+ }
+ }
+
+ fs = new0(ServiceFDStore, 1);
+ if (!fs)
+ return -ENOMEM;
+
+ fs->fd = fd;
+ fs->service = s;
+ fs->fdname = strdup(name ?: "stored");
+ if (!fs->fdname) {
+ free(fs);
+ return -ENOMEM;
+ }
+
+ r = sd_event_add_io(UNIT(s)->manager->event, &fs->event_source, fd, 0, on_fd_store_io, fs);
+ if (r < 0 && r != -EPERM) { /* EPERM indicates fds that aren't pollable, which is OK */
+ free(fs->fdname);
+ free(fs);
+ return r;
+ } else if (r >= 0)
+ (void) sd_event_source_set_description(fs->event_source, "service-fd-store");
+
+ LIST_PREPEND(fd_store, s->fd_store, fs);
+ s->n_fd_store++;
+
+ return 1; /* fd newly stored */
+}
+
+static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
+ int r;
+
+ assert(s);
+
+ while (fdset_size(fds) > 0) {
+ _cleanup_close_ int fd = -1;
+
+ fd = fdset_steal_first(fds);
+ if (fd < 0)
+ break;
+
+ r = service_add_fd_store(s, fd, name);
+ if (r == -EXFULL)
+ return log_unit_warning_errno(UNIT(s), r,
+ "Cannot store more fds than FileDescriptorStoreMax=%u, closing remaining.",
+ s->n_fd_store_max);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to add fd to store: %m");
+ if (r > 0)
+ log_unit_debug(UNIT(s), "Added fd %u (%s) to fd store.", fd, strna(name));
+ fd = -1;
+ }
+
+ return 0;
+}
+
+static void service_remove_fd_store(Service *s, const char *name) {
+ ServiceFDStore *fs, *n;
+
+ assert(s);
+ assert(name);
+
+ LIST_FOREACH_SAFE(fd_store, fs, n, s->fd_store) {
+ if (!streq(fs->fdname, name))
+ continue;
+
+ log_unit_debug(UNIT(s), "Got explicit request to remove fd %i (%s), closing.", fs->fd, name);
+ service_fd_store_unlink(fs);
+ }
+}
+
+static int service_arm_timer(Service *s, usec_t usec) {
+ int r;
+
+ assert(s);
+
+ if (s->timer_event_source) {
+ r = sd_event_source_set_time(s->timer_event_source, usec);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(s->timer_event_source, SD_EVENT_ONESHOT);
+ }
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ r = sd_event_add_time(
+ UNIT(s)->manager->event,
+ &s->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec, 0,
+ service_dispatch_timer, s);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s->timer_event_source, "service-timer");
+
+ return 0;
+}
+
+static int service_verify(Service *s) {
+ assert(s);
+
+ if (UNIT(s)->load_state != UNIT_LOADED)
+ return 0;
+
+ if (!s->exec_command[SERVICE_EXEC_START] && !s->exec_command[SERVICE_EXEC_STOP]
+ && UNIT(s)->success_action == EMERGENCY_ACTION_NONE) {
+ /* FailureAction= only makes sense if one of the start or stop commands is specified.
+ * SuccessAction= will be executed unconditionally if no commands are specified. Hence,
+ * either a command or SuccessAction= are required. */
+
+ log_unit_error(UNIT(s), "Service has no ExecStart=, ExecStop=, or SuccessAction=. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->type != SERVICE_ONESHOT && !s->exec_command[SERVICE_EXEC_START]) {
+ log_unit_error(UNIT(s), "Service has no ExecStart= setting, which is only allowed for Type=oneshot services. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (!s->remain_after_exit && !s->exec_command[SERVICE_EXEC_START] && UNIT(s)->success_action == EMERGENCY_ACTION_NONE) {
+ log_unit_error(UNIT(s), "Service has no ExecStart= and no SuccessAction= settings and does not have RemainAfterExit=yes set. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->type != SERVICE_ONESHOT && s->exec_command[SERVICE_EXEC_START]->command_next) {
+ log_unit_error(UNIT(s), "Service has more than one ExecStart= setting, which is only allowed for Type=oneshot services. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->type == SERVICE_ONESHOT && s->restart != SERVICE_RESTART_NO) {
+ log_unit_error(UNIT(s), "Service has Restart= setting other than no, which isn't allowed for Type=oneshot services. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->type == SERVICE_ONESHOT && !exit_status_set_is_empty(&s->restart_force_status)) {
+ log_unit_error(UNIT(s), "Service has RestartForceStatus= set, which isn't allowed for Type=oneshot services. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->type == SERVICE_DBUS && !s->bus_name) {
+ log_unit_error(UNIT(s), "Service is of type D-Bus but no D-Bus service name has been specified. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->bus_name && s->type != SERVICE_DBUS)
+ log_unit_warning(UNIT(s), "Service has a D-Bus service name specified, but is not of type dbus. Ignoring.");
+
+ if (s->exec_context.pam_name && !IN_SET(s->kill_context.kill_mode, KILL_CONTROL_GROUP, KILL_MIXED)) {
+ log_unit_error(UNIT(s), "Service has PAM enabled. Kill mode must be set to 'control-group' or 'mixed'. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->usb_function_descriptors && !s->usb_function_strings)
+ log_unit_warning(UNIT(s), "Service has USBFunctionDescriptors= setting, but no USBFunctionStrings=. Ignoring.");
+
+ if (!s->usb_function_descriptors && s->usb_function_strings)
+ log_unit_warning(UNIT(s), "Service has USBFunctionStrings= setting, but no USBFunctionDescriptors=. Ignoring.");
+
+ if (s->runtime_max_usec != USEC_INFINITY && s->type == SERVICE_ONESHOT)
+ log_unit_warning(UNIT(s), "MaxRuntimeSec= has no effect in combination with Type=oneshot. Ignoring.");
+
+ return 0;
+}
+
+static int service_add_default_dependencies(Service *s) {
+ int r;
+
+ assert(s);
+
+ if (!UNIT(s)->default_dependencies)
+ return 0;
+
+ /* Add a number of automatic dependencies useful for the
+ * majority of services. */
+
+ if (MANAGER_IS_SYSTEM(UNIT(s)->manager)) {
+ /* First, pull in the really early boot stuff, and
+ * require it, so that we fail if we can't acquire
+ * it. */
+
+ r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+ } else {
+
+ /* In the --user instance there's no sysinit.target,
+ * in that case require basic.target instead. */
+
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_BASIC_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+ }
+
+ /* Second, if the rest of the base system is in the same
+ * transaction, order us after it, but do not pull it in or
+ * even require it. */
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_BASIC_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ /* Third, add us in for normal shutdown. */
+ return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+static void service_fix_output(Service *s) {
+ assert(s);
+
+ /* If nothing has been explicitly configured, patch default output in. If input is socket/tty we avoid this
+ * however, since in that case we want output to default to the same place as we read input from. */
+
+ if (s->exec_context.std_error == EXEC_OUTPUT_INHERIT &&
+ s->exec_context.std_output == EXEC_OUTPUT_INHERIT &&
+ s->exec_context.std_input == EXEC_INPUT_NULL)
+ s->exec_context.std_error = UNIT(s)->manager->default_std_error;
+
+ if (s->exec_context.std_output == EXEC_OUTPUT_INHERIT &&
+ s->exec_context.std_input == EXEC_INPUT_NULL)
+ s->exec_context.std_output = UNIT(s)->manager->default_std_output;
+
+ if (s->exec_context.std_input == EXEC_INPUT_NULL &&
+ s->exec_context.stdin_data_size > 0)
+ s->exec_context.std_input = EXEC_INPUT_DATA;
+}
+
+static int service_setup_bus_name(Service *s) {
+ int r;
+
+ assert(s);
+
+ if (!s->bus_name)
+ return 0;
+
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_DBUS_SOCKET, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m");
+
+ /* We always want to be ordered against dbus.socket if both are in the transaction. */
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_DBUS_SOCKET, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m");
+
+ r = unit_watch_bus_name(UNIT(s), s->bus_name);
+ if (r == -EEXIST)
+ return log_unit_error_errno(UNIT(s), r, "Two services allocated for the same bus name %s, refusing operation.", s->bus_name);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Cannot watch bus name %s: %m", s->bus_name);
+
+ return 0;
+}
+
+static int service_add_extras(Service *s) {
+ int r;
+
+ assert(s);
+
+ if (s->type == _SERVICE_TYPE_INVALID) {
+ /* Figure out a type automatically */
+ if (s->bus_name)
+ s->type = SERVICE_DBUS;
+ else if (s->exec_command[SERVICE_EXEC_START])
+ s->type = SERVICE_SIMPLE;
+ else
+ s->type = SERVICE_ONESHOT;
+ }
+
+ /* Oneshot services have disabled start timeout by default */
+ if (s->type == SERVICE_ONESHOT && !s->start_timeout_defined)
+ s->timeout_start_usec = USEC_INFINITY;
+
+ service_fix_output(s);
+
+ r = unit_patch_contexts(UNIT(s));
+ if (r < 0)
+ return r;
+
+ r = unit_add_exec_dependencies(UNIT(s), &s->exec_context);
+ if (r < 0)
+ return r;
+
+ r = unit_set_default_slice(UNIT(s));
+ if (r < 0)
+ return r;
+
+ /* If the service needs the notify socket, let's enable it automatically. */
+ if (s->notify_access == NOTIFY_NONE &&
+ (s->type == SERVICE_NOTIFY || s->watchdog_usec > 0 || s->n_fd_store_max > 0))
+ s->notify_access = NOTIFY_MAIN;
+
+ r = service_add_default_dependencies(s);
+ if (r < 0)
+ return r;
+
+ r = service_setup_bus_name(s);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int service_load(Unit *u) {
+ Service *s = SERVICE(u);
+ int r;
+
+ assert(s);
+
+ /* Load a .service file */
+ r = unit_load_fragment(u);
+ if (r < 0)
+ return r;
+
+ /* Still nothing found? Then let's give up */
+ if (u->load_state == UNIT_STUB)
+ return -ENOENT;
+
+ /* This is a new unit? Then let's add in some extras */
+ if (u->load_state == UNIT_LOADED) {
+
+ /* We were able to load something, then let's add in
+ * the dropin directories. */
+ r = unit_load_dropin(u);
+ if (r < 0)
+ return r;
+
+ /* This is a new unit? Then let's add in some
+ * extras */
+ r = service_add_extras(s);
+ if (r < 0)
+ return r;
+ }
+
+ return service_verify(s);
+}
+
+static void service_dump(Unit *u, FILE *f, const char *prefix) {
+ char buf_restart[FORMAT_TIMESPAN_MAX], buf_start[FORMAT_TIMESPAN_MAX], buf_stop[FORMAT_TIMESPAN_MAX];
+ char buf_runtime[FORMAT_TIMESPAN_MAX], buf_watchdog[FORMAT_TIMESPAN_MAX];
+ ServiceExecCommand c;
+ Service *s = SERVICE(u);
+ const char *prefix2;
+
+ assert(s);
+
+ prefix = strempty(prefix);
+ prefix2 = strjoina(prefix, "\t");
+
+ fprintf(f,
+ "%sService State: %s\n"
+ "%sResult: %s\n"
+ "%sReload Result: %s\n"
+ "%sPermissionsStartOnly: %s\n"
+ "%sRootDirectoryStartOnly: %s\n"
+ "%sRemainAfterExit: %s\n"
+ "%sGuessMainPID: %s\n"
+ "%sType: %s\n"
+ "%sRestart: %s\n"
+ "%sNotifyAccess: %s\n"
+ "%sNotifyState: %s\n",
+ prefix, service_state_to_string(s->state),
+ prefix, service_result_to_string(s->result),
+ prefix, service_result_to_string(s->reload_result),
+ prefix, yes_no(s->permissions_start_only),
+ prefix, yes_no(s->root_directory_start_only),
+ prefix, yes_no(s->remain_after_exit),
+ prefix, yes_no(s->guess_main_pid),
+ prefix, service_type_to_string(s->type),
+ prefix, service_restart_to_string(s->restart),
+ prefix, notify_access_to_string(s->notify_access),
+ prefix, notify_state_to_string(s->notify_state));
+
+ if (s->control_pid > 0)
+ fprintf(f,
+ "%sControl PID: "PID_FMT"\n",
+ prefix, s->control_pid);
+
+ if (s->main_pid > 0)
+ fprintf(f,
+ "%sMain PID: "PID_FMT"\n"
+ "%sMain PID Known: %s\n"
+ "%sMain PID Alien: %s\n",
+ prefix, s->main_pid,
+ prefix, yes_no(s->main_pid_known),
+ prefix, yes_no(s->main_pid_alien));
+
+ if (s->pid_file)
+ fprintf(f,
+ "%sPIDFile: %s\n",
+ prefix, s->pid_file);
+
+ if (s->bus_name)
+ fprintf(f,
+ "%sBusName: %s\n"
+ "%sBus Name Good: %s\n",
+ prefix, s->bus_name,
+ prefix, yes_no(s->bus_name_good));
+
+ if (UNIT_ISSET(s->accept_socket))
+ fprintf(f,
+ "%sAccept Socket: %s\n",
+ prefix, UNIT_DEREF(s->accept_socket)->id);
+
+ fprintf(f,
+ "%sRestartSec: %s\n"
+ "%sTimeoutStartSec: %s\n"
+ "%sTimeoutStopSec: %s\n"
+ "%sRuntimeMaxSec: %s\n"
+ "%sWatchdogSec: %s\n",
+ prefix, format_timespan(buf_restart, sizeof(buf_restart), s->restart_usec, USEC_PER_SEC),
+ prefix, format_timespan(buf_start, sizeof(buf_start), s->timeout_start_usec, USEC_PER_SEC),
+ prefix, format_timespan(buf_stop, sizeof(buf_stop), s->timeout_stop_usec, USEC_PER_SEC),
+ prefix, format_timespan(buf_runtime, sizeof(buf_runtime), s->runtime_max_usec, USEC_PER_SEC),
+ prefix, format_timespan(buf_watchdog, sizeof(buf_watchdog), s->watchdog_usec, USEC_PER_SEC));
+
+ kill_context_dump(&s->kill_context, f, prefix);
+ exec_context_dump(&s->exec_context, f, prefix);
+
+ for (c = 0; c < _SERVICE_EXEC_COMMAND_MAX; c++) {
+
+ if (!s->exec_command[c])
+ continue;
+
+ fprintf(f, "%s-> %s:\n",
+ prefix, service_exec_command_to_string(c));
+
+ exec_command_dump_list(s->exec_command[c], f, prefix2);
+ }
+
+ if (s->status_text)
+ fprintf(f, "%sStatus Text: %s\n",
+ prefix, s->status_text);
+
+ if (s->n_fd_store_max > 0)
+ fprintf(f,
+ "%sFile Descriptor Store Max: %u\n"
+ "%sFile Descriptor Store Current: %zu\n",
+ prefix, s->n_fd_store_max,
+ prefix, s->n_fd_store);
+
+ cgroup_context_dump(&s->cgroup_context, f, prefix);
+}
+
+static int service_is_suitable_main_pid(Service *s, pid_t pid, int prio) {
+ Unit *owner;
+
+ assert(s);
+ assert(pid_is_valid(pid));
+
+ /* Checks whether the specified PID is suitable as main PID for this service. returns negative if not, 0 if the
+ * PID is questionnable but should be accepted if the source of configuration is trusted. > 0 if the PID is
+ * good */
+
+ if (pid == getpid_cached() || pid == 1) {
+ log_unit_full(UNIT(s), prio, 0, "New main PID "PID_FMT" is the manager, refusing.", pid);
+ return -EPERM;
+ }
+
+ if (pid == s->control_pid) {
+ log_unit_full(UNIT(s), prio, 0, "New main PID "PID_FMT" is the control process, refusing.", pid);
+ return -EPERM;
+ }
+
+ if (!pid_is_alive(pid)) {
+ log_unit_full(UNIT(s), prio, 0, "New main PID "PID_FMT" does not exist or is a zombie.", pid);
+ return -ESRCH;
+ }
+
+ owner = manager_get_unit_by_pid(UNIT(s)->manager, pid);
+ if (owner == UNIT(s)) {
+ log_unit_debug(UNIT(s), "New main PID "PID_FMT" belongs to service, we are happy.", pid);
+ return 1; /* Yay, it's definitely a good PID */
+ }
+
+ return 0; /* Hmm it's a suspicious PID, let's accept it if configuration source is trusted */
+}
+
+static int service_load_pid_file(Service *s, bool may_warn) {
+ char procfs[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ bool questionable_pid_file = false;
+ _cleanup_free_ char *k = NULL;
+ _cleanup_close_ int fd = -1;
+ int r, prio;
+ pid_t pid;
+
+ assert(s);
+
+ if (!s->pid_file)
+ return -ENOENT;
+
+ prio = may_warn ? LOG_INFO : LOG_DEBUG;
+
+ fd = chase_symlinks(s->pid_file, NULL, CHASE_OPEN|CHASE_SAFE, NULL);
+ if (fd == -ENOLINK) {
+ log_unit_full(UNIT(s), LOG_DEBUG, fd, "Potentially unsafe symlink chain, will now retry with relaxed checks: %s", s->pid_file);
+
+ questionable_pid_file = true;
+
+ fd = chase_symlinks(s->pid_file, NULL, CHASE_OPEN, NULL);
+ }
+ if (fd < 0)
+ return log_unit_full(UNIT(s), prio, fd, "Can't open PID file %s (yet?) after %s: %m", s->pid_file, service_state_to_string(s->state));
+
+ /* Let's read the PID file now that we chased it down. But we need to convert the O_PATH fd chase_symlinks() returned us into a proper fd first. */
+ xsprintf(procfs, "/proc/self/fd/%i", fd);
+ r = read_one_line_file(procfs, &k);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Can't convert PID files %s O_PATH file descriptor to proper file descriptor: %m", s->pid_file);
+
+ r = parse_pid(k, &pid);
+ if (r < 0)
+ return log_unit_full(UNIT(s), prio, r, "Failed to parse PID from file %s: %m", s->pid_file);
+
+ if (s->main_pid_known && pid == s->main_pid)
+ return 0;
+
+ r = service_is_suitable_main_pid(s, pid, prio);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ struct stat st;
+
+ if (questionable_pid_file) {
+ log_unit_error(UNIT(s), "Refusing to accept PID outside of service control group, acquired through unsafe symlink chain: %s", s->pid_file);
+ return -EPERM;
+ }
+
+ /* Hmm, it's not clear if the new main PID is safe. Let's allow this if the PID file is owned by root */
+
+ if (fstat(fd, &st) < 0)
+ return log_unit_error_errno(UNIT(s), errno, "Failed to fstat() PID file O_PATH fd: %m");
+
+ if (st.st_uid != 0) {
+ log_unit_error(UNIT(s), "New main PID "PID_FMT" does not belong to service, and PID file is not owned by root. Refusing.", pid);
+ return -EPERM;
+ }
+
+ log_unit_debug(UNIT(s), "New main PID "PID_FMT" does not belong to service, but we'll accept it since PID file is owned by root.", pid);
+ }
+
+ if (s->main_pid_known) {
+ log_unit_debug(UNIT(s), "Main PID changing: "PID_FMT" -> "PID_FMT, s->main_pid, pid);
+
+ service_unwatch_main_pid(s);
+ s->main_pid_known = false;
+ } else
+ log_unit_debug(UNIT(s), "Main PID loaded: "PID_FMT, pid);
+
+ r = service_set_main_pid(s, pid);
+ if (r < 0)
+ return r;
+
+ r = unit_watch_pid(UNIT(s), pid);
+ if (r < 0) /* FIXME: we need to do something here */
+ return log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" for service: %m", pid);
+
+ return 1;
+}
+
+static void service_search_main_pid(Service *s) {
+ pid_t pid = 0;
+ int r;
+
+ assert(s);
+
+ /* If we know it anyway, don't ever fallback to unreliable
+ * heuristics */
+ if (s->main_pid_known)
+ return;
+
+ if (!s->guess_main_pid)
+ return;
+
+ assert(s->main_pid <= 0);
+
+ if (unit_search_main_pid(UNIT(s), &pid) < 0)
+ return;
+
+ log_unit_debug(UNIT(s), "Main PID guessed: "PID_FMT, pid);
+ if (service_set_main_pid(s, pid) < 0)
+ return;
+
+ r = unit_watch_pid(UNIT(s), pid);
+ if (r < 0)
+ /* FIXME: we need to do something here */
+ log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" from: %m", pid);
+}
+
+static void service_set_state(Service *s, ServiceState state) {
+ ServiceState old_state;
+ const UnitActiveState *table;
+
+ assert(s);
+
+ if (s->state != state)
+ bus_unit_send_pending_change_signal(UNIT(s), false);
+
+ table = s->type == SERVICE_IDLE ? state_translation_table_idle : state_translation_table;
+
+ old_state = s->state;
+ s->state = state;
+
+ service_unwatch_pid_file(s);
+
+ if (!IN_SET(state,
+ SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
+ SERVICE_RUNNING,
+ SERVICE_RELOAD,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
+ SERVICE_AUTO_RESTART))
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ if (!IN_SET(state,
+ SERVICE_START, SERVICE_START_POST,
+ SERVICE_RUNNING, SERVICE_RELOAD,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
+ service_unwatch_main_pid(s);
+ s->main_command = NULL;
+ }
+
+ if (!IN_SET(state,
+ SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
+ SERVICE_RELOAD,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
+ service_unwatch_control_pid(s);
+ s->control_command = NULL;
+ s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
+ }
+
+ if (IN_SET(state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART)) {
+ unit_unwatch_all_pids(UNIT(s));
+ unit_dequeue_rewatch_pids(UNIT(s));
+ }
+
+ if (!IN_SET(state,
+ SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
+ SERVICE_RUNNING, SERVICE_RELOAD,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) &&
+ !(state == SERVICE_DEAD && UNIT(s)->job))
+ service_close_socket_fd(s);
+
+ if (state != SERVICE_START)
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
+
+ if (!IN_SET(state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
+ service_stop_watchdog(s);
+
+ /* For the inactive states unit_notify() will trim the cgroup,
+ * but for exit we have to do that ourselves... */
+ if (state == SERVICE_EXITED && !MANAGER_IS_RELOADING(UNIT(s)->manager))
+ unit_prune_cgroup(UNIT(s));
+
+ if (old_state != state)
+ log_unit_debug(UNIT(s), "Changed %s -> %s", service_state_to_string(old_state), service_state_to_string(state));
+
+ unit_notify(UNIT(s), table[old_state], table[state],
+ (s->reload_result == SERVICE_SUCCESS ? 0 : UNIT_NOTIFY_RELOAD_FAILURE) |
+ (s->will_auto_restart ? UNIT_NOTIFY_WILL_AUTO_RESTART : 0));
+}
+
+static usec_t service_coldplug_timeout(Service *s) {
+ assert(s);
+
+ switch (s->deserialized_state) {
+
+ case SERVICE_START_PRE:
+ case SERVICE_START:
+ case SERVICE_START_POST:
+ case SERVICE_RELOAD:
+ return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->timeout_start_usec);
+
+ case SERVICE_RUNNING:
+ return usec_add(UNIT(s)->active_enter_timestamp.monotonic, s->runtime_max_usec);
+
+ case SERVICE_STOP:
+ case SERVICE_STOP_WATCHDOG:
+ case SERVICE_STOP_SIGTERM:
+ case SERVICE_STOP_SIGKILL:
+ case SERVICE_STOP_POST:
+ case SERVICE_FINAL_SIGTERM:
+ case SERVICE_FINAL_SIGKILL:
+ return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->timeout_stop_usec);
+
+ case SERVICE_AUTO_RESTART:
+ return usec_add(UNIT(s)->inactive_enter_timestamp.monotonic, s->restart_usec);
+
+ default:
+ return USEC_INFINITY;
+ }
+}
+
+static int service_coldplug(Unit *u) {
+ Service *s = SERVICE(u);
+ int r;
+
+ assert(s);
+ assert(s->state == SERVICE_DEAD);
+
+ if (s->deserialized_state == s->state)
+ return 0;
+
+ r = service_arm_timer(s, service_coldplug_timeout(s));
+ if (r < 0)
+ return r;
+
+ if (s->main_pid > 0 &&
+ pid_is_unwaited(s->main_pid) &&
+ (IN_SET(s->deserialized_state,
+ SERVICE_START, SERVICE_START_POST,
+ SERVICE_RUNNING, SERVICE_RELOAD,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
+ r = unit_watch_pid(UNIT(s), s->main_pid);
+ if (r < 0)
+ return r;
+ }
+
+ if (s->control_pid > 0 &&
+ pid_is_unwaited(s->control_pid) &&
+ IN_SET(s->deserialized_state,
+ SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
+ SERVICE_RELOAD,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
+ r = unit_watch_pid(UNIT(s), s->control_pid);
+ if (r < 0)
+ return r;
+ }
+
+ if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART)) {
+ (void) unit_enqueue_rewatch_pids(u);
+ (void) unit_setup_dynamic_creds(u);
+ (void) unit_setup_exec_runtime(u);
+ }
+
+ if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
+ service_start_watchdog(s);
+
+ if (UNIT_ISSET(s->accept_socket)) {
+ Socket* socket = SOCKET(UNIT_DEREF(s->accept_socket));
+
+ if (socket->max_connections_per_source > 0) {
+ SocketPeer *peer;
+
+ /* Make a best-effort attempt at bumping the connection count */
+ if (socket_acquire_peer(socket, s->socket_fd, &peer) > 0) {
+ socket_peer_unref(s->peer);
+ s->peer = peer;
+ }
+ }
+ }
+
+ service_set_state(s, s->deserialized_state);
+ return 0;
+}
+
+static int service_collect_fds(
+ Service *s,
+ int **fds,
+ char ***fd_names,
+ size_t *n_socket_fds,
+ size_t *n_storage_fds) {
+
+ _cleanup_strv_free_ char **rfd_names = NULL;
+ _cleanup_free_ int *rfds = NULL;
+ size_t rn_socket_fds = 0, rn_storage_fds = 0;
+ int r;
+
+ assert(s);
+ assert(fds);
+ assert(fd_names);
+ assert(n_socket_fds);
+ assert(n_storage_fds);
+
+ if (s->socket_fd >= 0) {
+
+ /* Pass the per-connection socket */
+
+ rfds = new(int, 1);
+ if (!rfds)
+ return -ENOMEM;
+ rfds[0] = s->socket_fd;
+
+ rfd_names = strv_new("connection");
+ if (!rfd_names)
+ return -ENOMEM;
+
+ rn_socket_fds = 1;
+ } else {
+ Iterator i;
+ void *v;
+ Unit *u;
+
+ /* Pass all our configured sockets for singleton services */
+
+ HASHMAP_FOREACH_KEY(v, u, UNIT(s)->dependencies[UNIT_TRIGGERED_BY], i) {
+ _cleanup_free_ int *cfds = NULL;
+ Socket *sock;
+ int cn_fds;
+
+ if (u->type != UNIT_SOCKET)
+ continue;
+
+ sock = SOCKET(u);
+
+ cn_fds = socket_collect_fds(sock, &cfds);
+ if (cn_fds < 0)
+ return cn_fds;
+
+ if (cn_fds <= 0)
+ continue;
+
+ if (!rfds) {
+ rfds = TAKE_PTR(cfds);
+ rn_socket_fds = cn_fds;
+ } else {
+ int *t;
+
+ t = reallocarray(rfds, rn_socket_fds + cn_fds, sizeof(int));
+ if (!t)
+ return -ENOMEM;
+
+ memcpy(t + rn_socket_fds, cfds, cn_fds * sizeof(int));
+
+ rfds = t;
+ rn_socket_fds += cn_fds;
+ }
+
+ r = strv_extend_n(&rfd_names, socket_fdname(sock), cn_fds);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (s->n_fd_store > 0) {
+ ServiceFDStore *fs;
+ size_t n_fds;
+ char **nl;
+ int *t;
+
+ t = reallocarray(rfds, rn_socket_fds + s->n_fd_store, sizeof(int));
+ if (!t)
+ return -ENOMEM;
+
+ rfds = t;
+
+ nl = reallocarray(rfd_names, rn_socket_fds + s->n_fd_store + 1, sizeof(char *));
+ if (!nl)
+ return -ENOMEM;
+
+ rfd_names = nl;
+ n_fds = rn_socket_fds;
+
+ LIST_FOREACH(fd_store, fs, s->fd_store) {
+ rfds[n_fds] = fs->fd;
+ rfd_names[n_fds] = strdup(strempty(fs->fdname));
+ if (!rfd_names[n_fds])
+ return -ENOMEM;
+
+ rn_storage_fds++;
+ n_fds++;
+ }
+
+ rfd_names[n_fds] = NULL;
+ }
+
+ *fds = TAKE_PTR(rfds);
+ *fd_names = TAKE_PTR(rfd_names);
+ *n_socket_fds = rn_socket_fds;
+ *n_storage_fds = rn_storage_fds;
+
+ return 0;
+}
+
+static int service_allocate_exec_fd_event_source(
+ Service *s,
+ int fd,
+ sd_event_source **ret_event_source) {
+
+ _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL;
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(ret_event_source);
+
+ r = sd_event_add_io(UNIT(s)->manager->event, &source, fd, 0, service_dispatch_exec_io, s);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to allocate exec_fd event source: %m");
+
+ /* This is a bit lower priority than SIGCHLD, as that carries a lot more interesting failure information */
+
+ r = sd_event_source_set_priority(source, SD_EVENT_PRIORITY_NORMAL-3);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to adjust priority of exec_fd event source: %m");
+
+ (void) sd_event_source_set_description(source, "service event_fd");
+
+ r = sd_event_source_set_io_fd_own(source, true);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to pass ownership of fd to event source: %m");
+
+ *ret_event_source = TAKE_PTR(source);
+ return 0;
+}
+
+static int service_allocate_exec_fd(
+ Service *s,
+ sd_event_source **ret_event_source,
+ int* ret_exec_fd) {
+
+ _cleanup_close_pair_ int p[2] = { -1, -1 };
+ int r;
+
+ assert(s);
+ assert(ret_event_source);
+ assert(ret_exec_fd);
+
+ if (pipe2(p, O_CLOEXEC|O_NONBLOCK) < 0)
+ return log_unit_error_errno(UNIT(s), errno, "Failed to allocate exec_fd pipe: %m");
+
+ r = service_allocate_exec_fd_event_source(s, p[0], ret_event_source);
+ if (r < 0)
+ return r;
+
+ p[0] = -1;
+ *ret_exec_fd = TAKE_FD(p[1]);
+
+ return 0;
+}
+
+static bool service_exec_needs_notify_socket(Service *s, ExecFlags flags) {
+ assert(s);
+
+ /* Notifications are accepted depending on the process and
+ * the access setting of the service:
+ * process: \ access: NONE MAIN EXEC ALL
+ * main no yes yes yes
+ * control no no yes yes
+ * other (forked) no no no yes */
+
+ if (flags & EXEC_IS_CONTROL)
+ /* A control process */
+ return IN_SET(s->notify_access, NOTIFY_EXEC, NOTIFY_ALL);
+
+ /* We only spawn main processes and control processes, so any
+ * process that is not a control process is a main process */
+ return s->notify_access != NOTIFY_NONE;
+}
+
+static int service_spawn(
+ Service *s,
+ ExecCommand *c,
+ usec_t timeout,
+ ExecFlags flags,
+ pid_t *_pid) {
+
+ _cleanup_(exec_params_clear) ExecParameters exec_params = {
+ .flags = flags,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
+ .exec_fd = -1,
+ };
+ _cleanup_strv_free_ char **final_env = NULL, **our_env = NULL, **fd_names = NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *exec_fd_source = NULL;
+ size_t n_socket_fds = 0, n_storage_fds = 0, n_env = 0;
+ _cleanup_close_ int exec_fd = -1;
+ _cleanup_free_ int *fds = NULL;
+ pid_t pid;
+ int r;
+
+ assert(s);
+ assert(c);
+ assert(_pid);
+
+ r = unit_prepare_exec(UNIT(s)); /* This realizes the cgroup, among other things */
+ if (r < 0)
+ return r;
+
+ if (flags & EXEC_IS_CONTROL) {
+ /* If this is a control process, mask the permissions/chroot application if this is requested. */
+ if (s->permissions_start_only)
+ exec_params.flags &= ~EXEC_APPLY_SANDBOXING;
+ if (s->root_directory_start_only)
+ exec_params.flags &= ~EXEC_APPLY_CHROOT;
+ }
+
+ if ((flags & EXEC_PASS_FDS) ||
+ s->exec_context.std_input == EXEC_INPUT_SOCKET ||
+ s->exec_context.std_output == EXEC_OUTPUT_SOCKET ||
+ s->exec_context.std_error == EXEC_OUTPUT_SOCKET) {
+
+ r = service_collect_fds(s, &fds, &fd_names, &n_socket_fds, &n_storage_fds);
+ if (r < 0)
+ return r;
+
+ log_unit_debug(UNIT(s), "Passing %zu fds to service", n_socket_fds + n_storage_fds);
+ }
+
+ if (!FLAGS_SET(flags, EXEC_IS_CONTROL) && s->type == SERVICE_EXEC) {
+ assert(!s->exec_fd_event_source);
+
+ r = service_allocate_exec_fd(s, &exec_fd_source, &exec_fd);
+ if (r < 0)
+ return r;
+ }
+
+ r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout));
+ if (r < 0)
+ return r;
+
+ our_env = new0(char*, 9);
+ if (!our_env)
+ return -ENOMEM;
+
+ if (service_exec_needs_notify_socket(s, flags))
+ if (asprintf(our_env + n_env++, "NOTIFY_SOCKET=%s", UNIT(s)->manager->notify_socket) < 0)
+ return -ENOMEM;
+
+ if (s->main_pid > 0)
+ if (asprintf(our_env + n_env++, "MAINPID="PID_FMT, s->main_pid) < 0)
+ return -ENOMEM;
+
+ if (MANAGER_IS_USER(UNIT(s)->manager))
+ if (asprintf(our_env + n_env++, "MANAGERPID="PID_FMT, getpid_cached()) < 0)
+ return -ENOMEM;
+
+ if (s->socket_fd >= 0) {
+ union sockaddr_union sa;
+ socklen_t salen = sizeof(sa);
+
+ /* If this is a per-connection service instance, let's set $REMOTE_ADDR and $REMOTE_PORT to something
+ * useful. Note that we do this only when we are still connected at this point in time, which we might
+ * very well not be. Hence we ignore all errors when retrieving peer information (as that might result
+ * in ENOTCONN), and just use whate we can use. */
+
+ if (getpeername(s->socket_fd, &sa.sa, &salen) >= 0 &&
+ IN_SET(sa.sa.sa_family, AF_INET, AF_INET6, AF_VSOCK)) {
+
+ _cleanup_free_ char *addr = NULL;
+ char *t;
+ unsigned port;
+
+ r = sockaddr_pretty(&sa.sa, salen, true, false, &addr);
+ if (r < 0)
+ return r;
+
+ t = strappend("REMOTE_ADDR=", addr);
+ if (!t)
+ return -ENOMEM;
+ our_env[n_env++] = t;
+
+ r = sockaddr_port(&sa.sa, &port);
+ if (r < 0)
+ return r;
+
+ if (asprintf(&t, "REMOTE_PORT=%u", port) < 0)
+ return -ENOMEM;
+ our_env[n_env++] = t;
+ }
+ }
+
+ if (flags & EXEC_SETENV_RESULT) {
+ if (asprintf(our_env + n_env++, "SERVICE_RESULT=%s", service_result_to_string(s->result)) < 0)
+ return -ENOMEM;
+
+ if (s->main_exec_status.pid > 0 &&
+ dual_timestamp_is_set(&s->main_exec_status.exit_timestamp)) {
+ if (asprintf(our_env + n_env++, "EXIT_CODE=%s", sigchld_code_to_string(s->main_exec_status.code)) < 0)
+ return -ENOMEM;
+
+ if (s->main_exec_status.code == CLD_EXITED)
+ r = asprintf(our_env + n_env++, "EXIT_STATUS=%i", s->main_exec_status.status);
+ else
+ r = asprintf(our_env + n_env++, "EXIT_STATUS=%s", signal_to_string(s->main_exec_status.status));
+ if (r < 0)
+ return -ENOMEM;
+ }
+ }
+
+ r = unit_set_exec_params(UNIT(s), &exec_params);
+ if (r < 0)
+ return r;
+
+ final_env = strv_env_merge(2, exec_params.environment, our_env, NULL);
+ if (!final_env)
+ return -ENOMEM;
+
+ /* System D-Bus needs nss-systemd disabled, so that we don't deadlock */
+ SET_FLAG(exec_params.flags, EXEC_NSS_BYPASS_BUS,
+ MANAGER_IS_SYSTEM(UNIT(s)->manager) && unit_has_name(UNIT(s), SPECIAL_DBUS_SERVICE));
+
+ strv_free_and_replace(exec_params.environment, final_env);
+ exec_params.fds = fds;
+ exec_params.fd_names = fd_names;
+ exec_params.n_socket_fds = n_socket_fds;
+ exec_params.n_storage_fds = n_storage_fds;
+ exec_params.watchdog_usec = service_get_watchdog_usec(s);
+ exec_params.selinux_context_net = s->socket_fd_selinux_context_net;
+ if (s->type == SERVICE_IDLE)
+ exec_params.idle_pipe = UNIT(s)->manager->idle_pipe;
+ exec_params.stdin_fd = s->stdin_fd;
+ exec_params.stdout_fd = s->stdout_fd;
+ exec_params.stderr_fd = s->stderr_fd;
+ exec_params.exec_fd = exec_fd;
+
+ r = exec_spawn(UNIT(s),
+ c,
+ &s->exec_context,
+ &exec_params,
+ s->exec_runtime,
+ &s->dynamic_creds,
+ &pid);
+ if (r < 0)
+ return r;
+
+ s->exec_fd_event_source = TAKE_PTR(exec_fd_source);
+ s->exec_fd_hot = false;
+
+ r = unit_watch_pid(UNIT(s), pid);
+ if (r < 0) /* FIXME: we need to do something here */
+ return r;
+
+ *_pid = pid;
+
+ return 0;
+}
+
+static int main_pid_good(Service *s) {
+ assert(s);
+
+ /* Returns 0 if the pid is dead, > 0 if it is good, < 0 if we don't know */
+
+ /* If we know the pid file, then let's just check if it is
+ * still valid */
+ if (s->main_pid_known) {
+
+ /* If it's an alien child let's check if it is still
+ * alive ... */
+ if (s->main_pid_alien && s->main_pid > 0)
+ return pid_is_alive(s->main_pid);
+
+ /* .. otherwise assume we'll get a SIGCHLD for it,
+ * which we really should wait for to collect exit
+ * status and code */
+ return s->main_pid > 0;
+ }
+
+ /* We don't know the pid */
+ return -EAGAIN;
+}
+
+static int control_pid_good(Service *s) {
+ assert(s);
+
+ /* Returns 0 if the control PID is dead, > 0 if it is good. We never actually return < 0 here, but in order to
+ * make this function as similar as possible to main_pid_good() and cgroup_good(), we pretend that < 0 also
+ * means: we can't figure it out. */
+
+ return s->control_pid > 0;
+}
+
+static int cgroup_good(Service *s) {
+ int r;
+
+ assert(s);
+
+ /* Returns 0 if the cgroup is empty or doesn't exist, > 0 if it is exists and is populated, < 0 if we can't
+ * figure it out */
+
+ if (!UNIT(s)->cgroup_path)
+ return 0;
+
+ r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, UNIT(s)->cgroup_path);
+ if (r < 0)
+ return r;
+
+ return r == 0;
+}
+
+static bool service_shall_restart(Service *s) {
+ assert(s);
+
+ /* Don't restart after manual stops */
+ if (s->forbid_restart)
+ return false;
+
+ /* Never restart if this is configured as special exception */
+ if (exit_status_set_test(&s->restart_prevent_status, s->main_exec_status.code, s->main_exec_status.status))
+ return false;
+
+ /* Restart if the exit code/status are configured as restart triggers */
+ if (exit_status_set_test(&s->restart_force_status, s->main_exec_status.code, s->main_exec_status.status))
+ return true;
+
+ switch (s->restart) {
+
+ case SERVICE_RESTART_NO:
+ return false;
+
+ case SERVICE_RESTART_ALWAYS:
+ return true;
+
+ case SERVICE_RESTART_ON_SUCCESS:
+ return s->result == SERVICE_SUCCESS;
+
+ case SERVICE_RESTART_ON_FAILURE:
+ return s->result != SERVICE_SUCCESS;
+
+ case SERVICE_RESTART_ON_ABNORMAL:
+ return !IN_SET(s->result, SERVICE_SUCCESS, SERVICE_FAILURE_EXIT_CODE);
+
+ case SERVICE_RESTART_ON_WATCHDOG:
+ return s->result == SERVICE_FAILURE_WATCHDOG;
+
+ case SERVICE_RESTART_ON_ABORT:
+ return IN_SET(s->result, SERVICE_FAILURE_SIGNAL, SERVICE_FAILURE_CORE_DUMP);
+
+ default:
+ assert_not_reached("unknown restart setting");
+ }
+}
+
+static bool service_will_restart(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ if (s->will_auto_restart)
+ return true;
+ if (s->state == SERVICE_AUTO_RESTART)
+ return true;
+ if (!UNIT(s)->job)
+ return false;
+ if (UNIT(s)->job->type == JOB_START)
+ return true;
+
+ return false;
+}
+
+static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) {
+ int r;
+
+ assert(s);
+
+ /* If there's a stop job queued before we enter the DEAD state, we shouldn't act on Restart=, in order to not
+ * undo what has already been enqueued. */
+ if (unit_stop_pending(UNIT(s)))
+ allow_restart = false;
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ unit_log_result(UNIT(s), s->result == SERVICE_SUCCESS, service_result_to_string(s->result));
+
+ if (allow_restart && service_shall_restart(s))
+ s->will_auto_restart = true;
+
+ /* Make sure service_release_resources() doesn't destroy our FD store, while we are changing through
+ * SERVICE_FAILED/SERVICE_DEAD before entering into SERVICE_AUTO_RESTART. */
+ s->n_keep_fd_store ++;
+
+ service_set_state(s, s->result != SERVICE_SUCCESS ? SERVICE_FAILED : SERVICE_DEAD);
+
+ if (s->will_auto_restart) {
+ s->will_auto_restart = false;
+
+ r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->restart_usec));
+ if (r < 0) {
+ s->n_keep_fd_store--;
+ goto fail;
+ }
+
+ service_set_state(s, SERVICE_AUTO_RESTART);
+ } else
+ /* If we shan't restart, then flush out the restart counter. But don't do that immediately, so that the
+ * user can still introspect the counter. Do so on the next start. */
+ s->flush_n_restarts = true;
+
+ /* The new state is in effect, let's decrease the fd store ref counter again. Let's also readd us to the GC
+ * queue, so that the fd store is possibly gc'ed again */
+ s->n_keep_fd_store--;
+ unit_add_to_gc_queue(UNIT(s));
+
+ /* The next restart might not be a manual stop, hence reset the flag indicating manual stops */
+ s->forbid_restart = false;
+
+ /* We want fresh tmpdirs in case service is started again immediately */
+ s->exec_runtime = exec_runtime_unref(s->exec_runtime, true);
+
+ if (s->exec_context.runtime_directory_preserve_mode == EXEC_PRESERVE_NO ||
+ (s->exec_context.runtime_directory_preserve_mode == EXEC_PRESERVE_RESTART && !service_will_restart(UNIT(s))))
+ /* Also, remove the runtime directory */
+ exec_context_destroy_runtime_directory(&s->exec_context, UNIT(s)->manager->prefix[EXEC_DIRECTORY_RUNTIME]);
+
+ /* Get rid of the IPC bits of the user */
+ unit_unref_uid_gid(UNIT(s), true);
+
+ /* Release the user, and destroy it if we are the only remaining owner */
+ dynamic_creds_destroy(&s->dynamic_creds);
+
+ /* Try to delete the pid file. At this point it will be
+ * out-of-date, and some software might be confused by it, so
+ * let's remove it. */
+ if (s->pid_file)
+ (void) unlink(s->pid_file);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run install restart timer: %m");
+ service_enter_dead(s, SERVICE_FAILURE_RESOURCES, false);
+}
+
+static void service_enter_stop_post(Service *s, ServiceResult f) {
+ int r;
+ assert(s);
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ service_unwatch_control_pid(s);
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+
+ s->control_command = s->exec_command[SERVICE_EXEC_STOP_POST];
+ if (s->control_command) {
+ s->control_command_id = SERVICE_EXEC_STOP_POST;
+
+ r = service_spawn(s,
+ s->control_command,
+ s->timeout_stop_usec,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
+ &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, SERVICE_STOP_POST);
+ } else
+ service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'stop-post' task: %m");
+ service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_RESOURCES);
+}
+
+static int state_to_kill_operation(ServiceState state) {
+ switch (state) {
+
+ case SERVICE_STOP_WATCHDOG:
+ return KILL_WATCHDOG;
+
+ case SERVICE_STOP_SIGTERM:
+ case SERVICE_FINAL_SIGTERM:
+ return KILL_TERMINATE;
+
+ case SERVICE_STOP_SIGKILL:
+ case SERVICE_FINAL_SIGKILL:
+ return KILL_KILL;
+
+ default:
+ return _KILL_OPERATION_INVALID;
+ }
+}
+
+static void service_enter_signal(Service *s, ServiceState state, ServiceResult f) {
+ int r;
+
+ assert(s);
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ /* Before sending any signal, make sure we track all members of this cgroup */
+ (void) unit_watch_all_pids(UNIT(s));
+
+ /* Also, enqueue a job that we recheck all our PIDs a bit later, given that it's likely some processes have
+ * died now */
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+
+ r = unit_kill_context(
+ UNIT(s),
+ &s->kill_context,
+ state_to_kill_operation(state),
+ s->main_pid,
+ s->control_pid,
+ s->main_pid_alien);
+ if (r < 0)
+ goto fail;
+
+ if (r > 0) {
+ r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_stop_usec));
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, state);
+ } else if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM) && s->kill_context.send_sigkill)
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_SUCCESS);
+ else if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL))
+ service_enter_stop_post(s, SERVICE_SUCCESS);
+ else if (state == SERVICE_FINAL_SIGTERM && s->kill_context.send_sigkill)
+ service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_SUCCESS);
+ else
+ service_enter_dead(s, SERVICE_SUCCESS, true);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
+
+ if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL))
+ service_enter_stop_post(s, SERVICE_FAILURE_RESOURCES);
+ else
+ service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true);
+}
+
+static void service_enter_stop_by_notify(Service *s) {
+ assert(s);
+
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+
+ service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_stop_usec));
+
+ /* The service told us it's stopping, so it's as if we SIGTERM'd it. */
+ service_set_state(s, SERVICE_STOP_SIGTERM);
+}
+
+static void service_enter_stop(Service *s, ServiceResult f) {
+ int r;
+
+ assert(s);
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ service_unwatch_control_pid(s);
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+
+ s->control_command = s->exec_command[SERVICE_EXEC_STOP];
+ if (s->control_command) {
+ s->control_command_id = SERVICE_EXEC_STOP;
+
+ r = service_spawn(s,
+ s->control_command,
+ s->timeout_stop_usec,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
+ &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, SERVICE_STOP);
+ } else
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'stop' task: %m");
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES);
+}
+
+static bool service_good(Service *s) {
+ int main_pid_ok;
+ assert(s);
+
+ if (s->type == SERVICE_DBUS && !s->bus_name_good)
+ return false;
+
+ main_pid_ok = main_pid_good(s);
+ if (main_pid_ok > 0) /* It's alive */
+ return true;
+ if (main_pid_ok == 0) /* It's dead */
+ return false;
+
+ /* OK, we don't know anything about the main PID, maybe
+ * because there is none. Let's check the control group
+ * instead. */
+
+ return cgroup_good(s) != 0;
+}
+
+static void service_enter_running(Service *s, ServiceResult f) {
+ assert(s);
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ service_unwatch_control_pid(s);
+
+ if (s->result != SERVICE_SUCCESS)
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ else if (service_good(s)) {
+
+ /* If there are any queued up sd_notify() notifications, process them now */
+ if (s->notify_state == NOTIFY_RELOADING)
+ service_enter_reload_by_notify(s);
+ else if (s->notify_state == NOTIFY_STOPPING)
+ service_enter_stop_by_notify(s);
+ else {
+ service_set_state(s, SERVICE_RUNNING);
+ service_arm_timer(s, usec_add(UNIT(s)->active_enter_timestamp.monotonic, s->runtime_max_usec));
+ }
+
+ } else if (s->remain_after_exit)
+ service_set_state(s, SERVICE_EXITED);
+ else
+ service_enter_stop(s, SERVICE_SUCCESS);
+}
+
+static void service_enter_start_post(Service *s) {
+ int r;
+ assert(s);
+
+ service_unwatch_control_pid(s);
+ service_reset_watchdog(s);
+
+ s->control_command = s->exec_command[SERVICE_EXEC_START_POST];
+ if (s->control_command) {
+ s->control_command_id = SERVICE_EXEC_START_POST;
+
+ r = service_spawn(s,
+ s->control_command,
+ s->timeout_start_usec,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
+ &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, SERVICE_START_POST);
+ } else
+ service_enter_running(s, SERVICE_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'start-post' task: %m");
+ service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
+}
+
+static void service_kill_control_process(Service *s) {
+ int r;
+
+ assert(s);
+
+ if (s->control_pid <= 0)
+ return;
+
+ r = kill_and_sigcont(s->control_pid, SIGKILL);
+ if (r < 0) {
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(s->control_pid, &comm);
+
+ log_unit_debug_errno(UNIT(s), r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m",
+ s->control_pid, strna(comm));
+ }
+}
+
+static void service_enter_start(Service *s) {
+ ExecCommand *c;
+ usec_t timeout;
+ pid_t pid;
+ int r;
+
+ assert(s);
+
+ service_unwatch_control_pid(s);
+ service_unwatch_main_pid(s);
+
+ unit_warn_leftover_processes(UNIT(s));
+
+ if (s->type == SERVICE_FORKING) {
+ s->control_command_id = SERVICE_EXEC_START;
+ c = s->control_command = s->exec_command[SERVICE_EXEC_START];
+
+ s->main_command = NULL;
+ } else {
+ s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
+ s->control_command = NULL;
+
+ c = s->main_command = s->exec_command[SERVICE_EXEC_START];
+ }
+
+ if (!c) {
+ if (s->type != SERVICE_ONESHOT) {
+ /* There's no command line configured for the main command? Hmm, that is strange. This can only
+ * happen if the configuration changes at runtime. In this case, let's enter a failure
+ * state. */
+ log_unit_error(UNIT(s), "There's no 'start' task anymore we could start.");
+ r = -ENXIO;
+ goto fail;
+ }
+
+ /* We force a fake state transition here. Otherwise, the unit would go directly from
+ * SERVICE_DEAD to SERVICE_DEAD without SERVICE_ACTIVATING or SERVICE_ACTIVE
+ * inbetween. This way we can later trigger actions that depend on the state
+ * transition, including SuccessAction=. */
+ service_set_state(s, SERVICE_START);
+
+ service_enter_start_post(s);
+ return;
+ }
+
+ if (IN_SET(s->type, SERVICE_SIMPLE, SERVICE_IDLE))
+ /* For simple + idle this is the main process. We don't apply any timeout here, but
+ * service_enter_running() will later apply the .runtime_max_usec timeout. */
+ timeout = USEC_INFINITY;
+ else
+ timeout = s->timeout_start_usec;
+
+ r = service_spawn(s,
+ c,
+ timeout,
+ EXEC_PASS_FDS|EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_SET_WATCHDOG,
+ &pid);
+ if (r < 0)
+ goto fail;
+
+ if (IN_SET(s->type, SERVICE_SIMPLE, SERVICE_IDLE)) {
+ /* For simple services we immediately start
+ * the START_POST binaries. */
+
+ service_set_main_pid(s, pid);
+ service_enter_start_post(s);
+
+ } else if (s->type == SERVICE_FORKING) {
+
+ /* For forking services we wait until the start
+ * process exited. */
+
+ s->control_pid = pid;
+ service_set_state(s, SERVICE_START);
+
+ } else if (IN_SET(s->type, SERVICE_ONESHOT, SERVICE_DBUS, SERVICE_NOTIFY, SERVICE_EXEC)) {
+
+ /* For oneshot services we wait until the start process exited, too, but it is our main process. */
+
+ /* For D-Bus services we know the main pid right away, but wait for the bus name to appear on the
+ * bus. 'notify' and 'exec' services are similar. */
+
+ service_set_main_pid(s, pid);
+ service_set_state(s, SERVICE_START);
+ } else
+ assert_not_reached("Unknown service type");
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'start' task: %m");
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES);
+}
+
+static void service_enter_start_pre(Service *s) {
+ int r;
+
+ assert(s);
+
+ service_unwatch_control_pid(s);
+
+ s->control_command = s->exec_command[SERVICE_EXEC_START_PRE];
+ if (s->control_command) {
+
+ unit_warn_leftover_processes(UNIT(s));
+
+ s->control_command_id = SERVICE_EXEC_START_PRE;
+
+ r = service_spawn(s,
+ s->control_command,
+ s->timeout_start_usec,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_APPLY_TTY_STDIN,
+ &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, SERVICE_START_PRE);
+ } else
+ service_enter_start(s);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'start-pre' task: %m");
+ service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true);
+}
+
+static void service_enter_restart(Service *s) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(s);
+
+ if (UNIT(s)->job && UNIT(s)->job->type == JOB_STOP) {
+ /* Don't restart things if we are going down anyway */
+ log_unit_info(UNIT(s), "Stop job pending for unit, delaying automatic restart.");
+
+ r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->restart_usec));
+ if (r < 0)
+ goto fail;
+
+ return;
+ }
+
+ /* Any units that are bound to this service must also be
+ * restarted. We use JOB_RESTART (instead of the more obvious
+ * JOB_START) here so that those dependency jobs will be added
+ * as well. */
+ r = manager_add_job(UNIT(s)->manager, JOB_RESTART, UNIT(s), JOB_REPLACE, &error, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* Count the jobs we enqueue for restarting. This counter is maintained as long as the unit isn't fully
+ * stopped, i.e. as long as it remains up or remains in auto-start states. The use can reset the counter
+ * explicitly however via the usual "systemctl reset-failure" logic. */
+ s->n_restarts ++;
+ s->flush_n_restarts = false;
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR,
+ LOG_UNIT_ID(UNIT(s)),
+ LOG_UNIT_INVOCATION_ID(UNIT(s)),
+ LOG_UNIT_MESSAGE(UNIT(s), "Scheduled restart job, restart counter is at %u.", s->n_restarts),
+ "N_RESTARTS=%u", s->n_restarts);
+
+ /* Notify clients about changed restart counter */
+ unit_add_to_dbus_queue(UNIT(s));
+
+ /* Note that we stay in the SERVICE_AUTO_RESTART state here,
+ * it will be canceled as part of the service_stop() call that
+ * is executed as part of JOB_RESTART. */
+
+ return;
+
+fail:
+ log_unit_warning(UNIT(s), "Failed to schedule restart job: %s", bus_error_message(&error, -r));
+ service_enter_dead(s, SERVICE_FAILURE_RESOURCES, false);
+}
+
+static void service_enter_reload_by_notify(Service *s) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(s);
+
+ service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_start_usec));
+ service_set_state(s, SERVICE_RELOAD);
+
+ /* service_enter_reload_by_notify is never called during a reload, thus no loops are possible. */
+ r = manager_propagate_reload(UNIT(s)->manager, UNIT(s), JOB_FAIL, &error);
+ if (r < 0)
+ log_unit_warning(UNIT(s), "Failed to schedule propagation of reload: %s", bus_error_message(&error, -r));
+}
+
+static void service_enter_reload(Service *s) {
+ int r;
+
+ assert(s);
+
+ service_unwatch_control_pid(s);
+ s->reload_result = SERVICE_SUCCESS;
+
+ s->control_command = s->exec_command[SERVICE_EXEC_RELOAD];
+ if (s->control_command) {
+ s->control_command_id = SERVICE_EXEC_RELOAD;
+
+ r = service_spawn(s,
+ s->control_command,
+ s->timeout_start_usec,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
+ &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, SERVICE_RELOAD);
+ } else
+ service_enter_running(s, SERVICE_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'reload' task: %m");
+ s->reload_result = SERVICE_FAILURE_RESOURCES;
+ service_enter_running(s, SERVICE_SUCCESS);
+}
+
+static void service_run_next_control(Service *s) {
+ usec_t timeout;
+ int r;
+
+ assert(s);
+ assert(s->control_command);
+ assert(s->control_command->command_next);
+
+ assert(s->control_command_id != SERVICE_EXEC_START);
+
+ s->control_command = s->control_command->command_next;
+ service_unwatch_control_pid(s);
+
+ if (IN_SET(s->state, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
+ timeout = s->timeout_start_usec;
+ else
+ timeout = s->timeout_stop_usec;
+
+ r = service_spawn(s,
+ s->control_command,
+ timeout,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_STOP_POST) ? EXEC_APPLY_TTY_STDIN : 0)|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0)|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_START_POST, SERVICE_EXEC_RELOAD, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_CONTROL_CGROUP : 0),
+ &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run next control task: %m");
+
+ if (IN_SET(s->state, SERVICE_START_PRE, SERVICE_START_POST, SERVICE_STOP))
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES);
+ else if (s->state == SERVICE_STOP_POST)
+ service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true);
+ else if (s->state == SERVICE_RELOAD) {
+ s->reload_result = SERVICE_FAILURE_RESOURCES;
+ service_enter_running(s, SERVICE_SUCCESS);
+ } else
+ service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
+}
+
+static void service_run_next_main(Service *s) {
+ pid_t pid;
+ int r;
+
+ assert(s);
+ assert(s->main_command);
+ assert(s->main_command->command_next);
+ assert(s->type == SERVICE_ONESHOT);
+
+ s->main_command = s->main_command->command_next;
+ service_unwatch_main_pid(s);
+
+ r = service_spawn(s,
+ s->main_command,
+ s->timeout_start_usec,
+ EXEC_PASS_FDS|EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_SET_WATCHDOG,
+ &pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_main_pid(s, pid);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run next main task: %m");
+ service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
+}
+
+static int service_start(Unit *u) {
+ Service *s = SERVICE(u);
+ int r;
+
+ assert(s);
+
+ /* We cannot fulfill this request right now, try again later
+ * please! */
+ if (IN_SET(s->state,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))
+ return -EAGAIN;
+
+ /* Already on it! */
+ if (IN_SET(s->state, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST))
+ return 0;
+
+ /* A service that will be restarted must be stopped first to
+ * trigger BindsTo and/or OnFailure dependencies. If a user
+ * does not want to wait for the holdoff time to elapse, the
+ * service should be manually restarted, not started. We
+ * simply return EAGAIN here, so that any start jobs stay
+ * queued, and assume that the auto restart timer will
+ * eventually trigger the restart. */
+ if (s->state == SERVICE_AUTO_RESTART)
+ return -EAGAIN;
+
+ assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED));
+
+ /* Make sure we don't enter a busy loop of some kind. */
+ r = unit_start_limit_test(u);
+ if (r < 0) {
+ service_enter_dead(s, SERVICE_FAILURE_START_LIMIT_HIT, false);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ s->result = SERVICE_SUCCESS;
+ s->reload_result = SERVICE_SUCCESS;
+ s->main_pid_known = false;
+ s->main_pid_alien = false;
+ s->forbid_restart = false;
+
+ s->status_text = mfree(s->status_text);
+ s->status_errno = 0;
+
+ s->notify_state = NOTIFY_UNKNOWN;
+
+ s->watchdog_original_usec = s->watchdog_usec;
+ s->watchdog_override_enable = false;
+ s->watchdog_override_usec = USEC_INFINITY;
+
+ exec_command_reset_status_list_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX);
+ exec_status_reset(&s->main_exec_status);
+
+ /* This is not an automatic restart? Flush the restart counter then */
+ if (s->flush_n_restarts) {
+ s->n_restarts = 0;
+ s->flush_n_restarts = false;
+ }
+
+ u->reset_accounting = true;
+
+ service_enter_start_pre(s);
+ return 1;
+}
+
+static int service_stop(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ /* Don't create restart jobs from manual stops. */
+ s->forbid_restart = true;
+
+ /* Already on it */
+ if (IN_SET(s->state,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))
+ return 0;
+
+ /* A restart will be scheduled or is in progress. */
+ if (s->state == SERVICE_AUTO_RESTART) {
+ service_set_state(s, SERVICE_DEAD);
+ return 0;
+ }
+
+ /* If there's already something running we go directly into
+ * kill mode. */
+ if (IN_SET(s->state, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RELOAD)) {
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_SUCCESS);
+ return 0;
+ }
+
+ assert(IN_SET(s->state, SERVICE_RUNNING, SERVICE_EXITED));
+
+ service_enter_stop(s, SERVICE_SUCCESS);
+ return 1;
+}
+
+static int service_reload(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ assert(IN_SET(s->state, SERVICE_RUNNING, SERVICE_EXITED));
+
+ service_enter_reload(s);
+ return 1;
+}
+
+_pure_ static bool service_can_reload(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ return !!s->exec_command[SERVICE_EXEC_RELOAD];
+}
+
+static unsigned service_exec_command_index(Unit *u, ServiceExecCommand id, ExecCommand *current) {
+ Service *s = SERVICE(u);
+ unsigned idx = 0;
+ ExecCommand *first, *c;
+
+ assert(s);
+
+ first = s->exec_command[id];
+
+ /* Figure out where we are in the list by walking back to the beginning */
+ for (c = current; c != first; c = c->command_prev)
+ idx++;
+
+ return idx;
+}
+
+static int service_serialize_exec_command(Unit *u, FILE *f, ExecCommand *command) {
+ _cleanup_free_ char *args = NULL, *p = NULL;
+ size_t allocated = 0, length = 0;
+ Service *s = SERVICE(u);
+ const char *type, *key;
+ ServiceExecCommand id;
+ unsigned idx;
+ char **arg;
+
+ assert(s);
+ assert(f);
+
+ if (!command)
+ return 0;
+
+ if (command == s->control_command) {
+ type = "control";
+ id = s->control_command_id;
+ } else {
+ type = "main";
+ id = SERVICE_EXEC_START;
+ }
+
+ idx = service_exec_command_index(u, id, command);
+
+ STRV_FOREACH(arg, command->argv) {
+ _cleanup_free_ char *e = NULL;
+ size_t n;
+
+ e = cescape(*arg);
+ if (!e)
+ return log_oom();
+
+ n = strlen(e);
+ if (!GREEDY_REALLOC(args, allocated, length + 1 + n + 1))
+ return log_oom();
+
+ if (length > 0)
+ args[length++] = ' ';
+
+ memcpy(args + length, e, n);
+ length += n;
+ }
+
+ if (!GREEDY_REALLOC(args, allocated, length + 1))
+ return log_oom();
+
+ args[length++] = 0;
+
+ p = cescape(command->path);
+ if (!p)
+ return -ENOMEM;
+
+ key = strjoina(type, "-command");
+ return serialize_item_format(f, key, "%s %u %s %s", service_exec_command_to_string(id), idx, p, args);
+}
+
+static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Service *s = SERVICE(u);
+ ServiceFDStore *fs;
+ int r;
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", service_state_to_string(s->state));
+ (void) serialize_item(f, "result", service_result_to_string(s->result));
+ (void) serialize_item(f, "reload-result", service_result_to_string(s->reload_result));
+
+ if (s->control_pid > 0)
+ (void) serialize_item_format(f, "control-pid", PID_FMT, s->control_pid);
+
+ if (s->main_pid_known && s->main_pid > 0)
+ (void) serialize_item_format(f, "main-pid", PID_FMT, s->main_pid);
+
+ (void) serialize_bool(f, "main-pid-known", s->main_pid_known);
+ (void) serialize_bool(f, "bus-name-good", s->bus_name_good);
+ (void) serialize_bool(f, "bus-name-owner", s->bus_name_owner);
+
+ (void) serialize_item_format(f, "n-restarts", "%u", s->n_restarts);
+ (void) serialize_bool(f, "flush-n-restarts", s->flush_n_restarts);
+
+ r = serialize_item_escaped(f, "status-text", s->status_text);
+ if (r < 0)
+ return r;
+
+ service_serialize_exec_command(u, f, s->control_command);
+ service_serialize_exec_command(u, f, s->main_command);
+
+ r = serialize_fd(f, fds, "stdin-fd", s->stdin_fd);
+ if (r < 0)
+ return r;
+ r = serialize_fd(f, fds, "stdout-fd", s->stdout_fd);
+ if (r < 0)
+ return r;
+ r = serialize_fd(f, fds, "stderr-fd", s->stderr_fd);
+ if (r < 0)
+ return r;
+
+ if (s->exec_fd_event_source) {
+ r = serialize_fd(f, fds, "exec-fd", sd_event_source_get_io_fd(s->exec_fd_event_source));
+ if (r < 0)
+ return r;
+
+ (void) serialize_bool(f, "exec-fd-hot", s->exec_fd_hot);
+ }
+
+ if (UNIT_ISSET(s->accept_socket)) {
+ r = serialize_item(f, "accept-socket", UNIT_DEREF(s->accept_socket)->id);
+ if (r < 0)
+ return r;
+ }
+
+ r = serialize_fd(f, fds, "socket-fd", s->socket_fd);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(fd_store, fs, s->fd_store) {
+ _cleanup_free_ char *c = NULL;
+ int copy;
+
+ copy = fdset_put_dup(fds, fs->fd);
+ if (copy < 0)
+ return log_error_errno(copy, "Failed to copy file descriptor for serialization: %m");
+
+ c = cescape(fs->fdname);
+ if (!c)
+ return log_oom();
+
+ (void) serialize_item_format(f, "fd-store-fd", "%i %s", copy, c);
+ }
+
+ if (s->main_exec_status.pid > 0) {
+ (void) serialize_item_format(f, "main-exec-status-pid", PID_FMT, s->main_exec_status.pid);
+ (void) serialize_dual_timestamp(f, "main-exec-status-start", &s->main_exec_status.start_timestamp);
+ (void) serialize_dual_timestamp(f, "main-exec-status-exit", &s->main_exec_status.exit_timestamp);
+
+ if (dual_timestamp_is_set(&s->main_exec_status.exit_timestamp)) {
+ (void) serialize_item_format(f, "main-exec-status-code", "%i", s->main_exec_status.code);
+ (void) serialize_item_format(f, "main-exec-status-status", "%i", s->main_exec_status.status);
+ }
+ }
+
+ (void) serialize_dual_timestamp(f, "watchdog-timestamp", &s->watchdog_timestamp);
+ (void) serialize_bool(f, "forbid-restart", s->forbid_restart);
+
+ if (s->watchdog_override_enable)
+ (void) serialize_item_format(f, "watchdog-override-usec", USEC_FMT, s->watchdog_override_usec);
+
+ if (s->watchdog_original_usec != USEC_INFINITY)
+ (void) serialize_item_format(f, "watchdog-original-usec", USEC_FMT, s->watchdog_original_usec);
+
+ return 0;
+}
+
+static int service_deserialize_exec_command(Unit *u, const char *key, const char *value) {
+ Service *s = SERVICE(u);
+ int r;
+ unsigned idx = 0, i;
+ bool control, found = false;
+ ServiceExecCommand id = _SERVICE_EXEC_COMMAND_INVALID;
+ ExecCommand *command = NULL;
+ _cleanup_free_ char *path = NULL;
+ _cleanup_strv_free_ char **argv = NULL;
+
+ enum ExecCommandState {
+ STATE_EXEC_COMMAND_TYPE,
+ STATE_EXEC_COMMAND_INDEX,
+ STATE_EXEC_COMMAND_PATH,
+ STATE_EXEC_COMMAND_ARGS,
+ _STATE_EXEC_COMMAND_MAX,
+ _STATE_EXEC_COMMAND_INVALID = -1,
+ } state;
+
+ assert(s);
+ assert(key);
+ assert(value);
+
+ control = streq(key, "control-command");
+
+ state = STATE_EXEC_COMMAND_TYPE;
+
+ for (;;) {
+ _cleanup_free_ char *arg = NULL;
+
+ r = extract_first_word(&value, &arg, NULL, EXTRACT_CUNESCAPE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ switch (state) {
+ case STATE_EXEC_COMMAND_TYPE:
+ id = service_exec_command_from_string(arg);
+ if (id < 0)
+ return -EINVAL;
+
+ state = STATE_EXEC_COMMAND_INDEX;
+ break;
+ case STATE_EXEC_COMMAND_INDEX:
+ r = safe_atou(arg, &idx);
+ if (r < 0)
+ return -EINVAL;
+
+ state = STATE_EXEC_COMMAND_PATH;
+ break;
+ case STATE_EXEC_COMMAND_PATH:
+ path = TAKE_PTR(arg);
+ state = STATE_EXEC_COMMAND_ARGS;
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+ break;
+ case STATE_EXEC_COMMAND_ARGS:
+ r = strv_extend(&argv, arg);
+ if (r < 0)
+ return -ENOMEM;
+ break;
+ default:
+ assert_not_reached("Unknown error at deserialization of exec command");
+ break;
+ }
+ }
+
+ if (state != STATE_EXEC_COMMAND_ARGS)
+ return -EINVAL;
+
+ /* Let's check whether exec command on given offset matches data that we just deserialized */
+ for (command = s->exec_command[id], i = 0; command; command = command->command_next, i++) {
+ if (i != idx)
+ continue;
+
+ found = strv_equal(argv, command->argv) && streq(command->path, path);
+ break;
+ }
+
+ if (!found) {
+ /* Command at the index we serialized is different, let's look for command that exactly
+ * matches but is on different index. If there is no such command we will not resume execution. */
+ for (command = s->exec_command[id]; command; command = command->command_next)
+ if (strv_equal(command->argv, argv) && streq(command->path, path))
+ break;
+ }
+
+ if (command && control)
+ s->control_command = command;
+ else if (command)
+ s->main_command = command;
+ else
+ log_unit_warning(u, "Current command vanished from the unit file, execution of the command list won't be resumed.");
+
+ return 0;
+}
+
+static int service_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Service *s = SERVICE(u);
+ int r;
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ ServiceState state;
+
+ state = service_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ s->deserialized_state = state;
+ } else if (streq(key, "result")) {
+ ServiceResult f;
+
+ f = service_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != SERVICE_SUCCESS)
+ s->result = f;
+
+ } else if (streq(key, "reload-result")) {
+ ServiceResult f;
+
+ f = service_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse reload result value: %s", value);
+ else if (f != SERVICE_SUCCESS)
+ s->reload_result = f;
+
+ } else if (streq(key, "control-pid")) {
+ pid_t pid;
+
+ if (parse_pid(value, &pid) < 0)
+ log_unit_debug(u, "Failed to parse control-pid value: %s", value);
+ else
+ s->control_pid = pid;
+ } else if (streq(key, "main-pid")) {
+ pid_t pid;
+
+ if (parse_pid(value, &pid) < 0)
+ log_unit_debug(u, "Failed to parse main-pid value: %s", value);
+ else
+ (void) service_set_main_pid(s, pid);
+ } else if (streq(key, "main-pid-known")) {
+ int b;
+
+ b = parse_boolean(value);
+ if (b < 0)
+ log_unit_debug(u, "Failed to parse main-pid-known value: %s", value);
+ else
+ s->main_pid_known = b;
+ } else if (streq(key, "bus-name-good")) {
+ int b;
+
+ b = parse_boolean(value);
+ if (b < 0)
+ log_unit_debug(u, "Failed to parse bus-name-good value: %s", value);
+ else
+ s->bus_name_good = b;
+ } else if (streq(key, "bus-name-owner")) {
+ r = free_and_strdup(&s->bus_name_owner, value);
+ if (r < 0)
+ log_unit_error_errno(u, r, "Unable to deserialize current bus owner %s: %m", value);
+ } else if (streq(key, "status-text")) {
+ char *t;
+
+ r = cunescape(value, 0, &t);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to unescape status text '%s': %m", value);
+ else
+ free_and_replace(s->status_text, t);
+
+ } else if (streq(key, "accept-socket")) {
+ Unit *socket;
+
+ r = manager_load_unit(u->manager, value, NULL, NULL, &socket);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to load accept-socket unit '%s': %m", value);
+ else {
+ unit_ref_set(&s->accept_socket, u, socket);
+ SOCKET(socket)->n_connections++;
+ }
+
+ } else if (streq(key, "socket-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse socket-fd value: %s", value);
+ else {
+ asynchronous_close(s->socket_fd);
+ s->socket_fd = fdset_remove(fds, fd);
+ }
+ } else if (streq(key, "fd-store-fd")) {
+ const char *fdv;
+ size_t pf;
+ int fd;
+
+ pf = strcspn(value, WHITESPACE);
+ fdv = strndupa(value, pf);
+
+ if (safe_atoi(fdv, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse fd-store-fd value: %s", value);
+ else {
+ _cleanup_free_ char *t = NULL;
+ const char *fdn;
+
+ fdn = value + pf;
+ fdn += strspn(fdn, WHITESPACE);
+ (void) cunescape(fdn, 0, &t);
+
+ r = service_add_fd_store(s, fd, t);
+ if (r < 0)
+ log_unit_error_errno(u, r, "Failed to add fd to store: %m");
+ else
+ fdset_remove(fds, fd);
+ }
+
+ } else if (streq(key, "main-exec-status-pid")) {
+ pid_t pid;
+
+ if (parse_pid(value, &pid) < 0)
+ log_unit_debug(u, "Failed to parse main-exec-status-pid value: %s", value);
+ else
+ s->main_exec_status.pid = pid;
+ } else if (streq(key, "main-exec-status-code")) {
+ int i;
+
+ if (safe_atoi(value, &i) < 0)
+ log_unit_debug(u, "Failed to parse main-exec-status-code value: %s", value);
+ else
+ s->main_exec_status.code = i;
+ } else if (streq(key, "main-exec-status-status")) {
+ int i;
+
+ if (safe_atoi(value, &i) < 0)
+ log_unit_debug(u, "Failed to parse main-exec-status-status value: %s", value);
+ else
+ s->main_exec_status.status = i;
+ } else if (streq(key, "main-exec-status-start"))
+ deserialize_dual_timestamp(value, &s->main_exec_status.start_timestamp);
+ else if (streq(key, "main-exec-status-exit"))
+ deserialize_dual_timestamp(value, &s->main_exec_status.exit_timestamp);
+ else if (streq(key, "watchdog-timestamp"))
+ deserialize_dual_timestamp(value, &s->watchdog_timestamp);
+ else if (streq(key, "forbid-restart")) {
+ int b;
+
+ b = parse_boolean(value);
+ if (b < 0)
+ log_unit_debug(u, "Failed to parse forbid-restart value: %s", value);
+ else
+ s->forbid_restart = b;
+ } else if (streq(key, "stdin-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse stdin-fd value: %s", value);
+ else {
+ asynchronous_close(s->stdin_fd);
+ s->stdin_fd = fdset_remove(fds, fd);
+ s->exec_context.stdio_as_fds = true;
+ }
+ } else if (streq(key, "stdout-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse stdout-fd value: %s", value);
+ else {
+ asynchronous_close(s->stdout_fd);
+ s->stdout_fd = fdset_remove(fds, fd);
+ s->exec_context.stdio_as_fds = true;
+ }
+ } else if (streq(key, "stderr-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse stderr-fd value: %s", value);
+ else {
+ asynchronous_close(s->stderr_fd);
+ s->stderr_fd = fdset_remove(fds, fd);
+ s->exec_context.stdio_as_fds = true;
+ }
+ } else if (streq(key, "exec-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse exec-fd value: %s", value);
+ else {
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
+
+ fd = fdset_remove(fds, fd);
+ if (service_allocate_exec_fd_event_source(s, fd, &s->exec_fd_event_source) < 0)
+ safe_close(fd);
+ }
+ } else if (streq(key, "watchdog-override-usec")) {
+ if (deserialize_usec(value, &s->watchdog_override_usec) < 0)
+ log_unit_debug(u, "Failed to parse watchdog_override_usec value: %s", value);
+ else
+ s->watchdog_override_enable = true;
+
+ } else if (streq(key, "watchdog-original-usec")) {
+ if (deserialize_usec(value, &s->watchdog_original_usec) < 0)
+ log_unit_debug(u, "Failed to parse watchdog_original_usec value: %s", value);
+
+ } else if (STR_IN_SET(key, "main-command", "control-command")) {
+ r = service_deserialize_exec_command(u, key, value);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to parse serialized command \"%s\": %m", value);
+
+ } else if (streq(key, "n-restarts")) {
+ r = safe_atou(value, &s->n_restarts);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to parse serialized restart counter '%s': %m", value);
+
+ } else if (streq(key, "flush-n-restarts")) {
+ r = parse_boolean(value);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to parse serialized flush restart counter setting '%s': %m", value);
+ else
+ s->flush_n_restarts = r;
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState service_active_state(Unit *u) {
+ const UnitActiveState *table;
+
+ assert(u);
+
+ table = SERVICE(u)->type == SERVICE_IDLE ? state_translation_table_idle : state_translation_table;
+
+ return table[SERVICE(u)->state];
+}
+
+static const char *service_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return service_state_to_string(SERVICE(u)->state);
+}
+
+static bool service_may_gc(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ /* Never clean up services that still have a process around, even if the service is formally dead. Note that
+ * unit_may_gc() already checked our cgroup for us, we just check our two additional PIDs, too, in case they
+ * have moved outside of the cgroup. */
+
+ if (main_pid_good(s) > 0 ||
+ control_pid_good(s) > 0)
+ return false;
+
+ return true;
+}
+
+static int service_retry_pid_file(Service *s) {
+ int r;
+
+ assert(s->pid_file);
+ assert(IN_SET(s->state, SERVICE_START, SERVICE_START_POST));
+
+ r = service_load_pid_file(s, false);
+ if (r < 0)
+ return r;
+
+ service_unwatch_pid_file(s);
+
+ service_enter_running(s, SERVICE_SUCCESS);
+ return 0;
+}
+
+static int service_watch_pid_file(Service *s) {
+ int r;
+
+ log_unit_debug(UNIT(s), "Setting watch for PID file %s", s->pid_file_pathspec->path);
+
+ r = path_spec_watch(s->pid_file_pathspec, service_dispatch_inotify_io);
+ if (r < 0)
+ goto fail;
+
+ /* the pidfile might have appeared just before we set the watch */
+ log_unit_debug(UNIT(s), "Trying to read PID file %s in case it changed", s->pid_file_pathspec->path);
+ service_retry_pid_file(s);
+
+ return 0;
+fail:
+ log_unit_error_errno(UNIT(s), r, "Failed to set a watch for PID file %s: %m", s->pid_file_pathspec->path);
+ service_unwatch_pid_file(s);
+ return r;
+}
+
+static int service_demand_pid_file(Service *s) {
+ PathSpec *ps;
+
+ assert(s->pid_file);
+ assert(!s->pid_file_pathspec);
+
+ ps = new0(PathSpec, 1);
+ if (!ps)
+ return -ENOMEM;
+
+ ps->unit = UNIT(s);
+ ps->path = strdup(s->pid_file);
+ if (!ps->path) {
+ free(ps);
+ return -ENOMEM;
+ }
+
+ path_simplify(ps->path, false);
+
+ /* PATH_CHANGED would not be enough. There are daemons (sendmail) that
+ * keep their PID file open all the time. */
+ ps->type = PATH_MODIFIED;
+ ps->inotify_fd = -1;
+
+ s->pid_file_pathspec = ps;
+
+ return service_watch_pid_file(s);
+}
+
+static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
+ PathSpec *p = userdata;
+ Service *s;
+
+ assert(p);
+
+ s = SERVICE(p->unit);
+
+ assert(s);
+ assert(fd >= 0);
+ assert(IN_SET(s->state, SERVICE_START, SERVICE_START_POST));
+ assert(s->pid_file_pathspec);
+ assert(path_spec_owns_inotify_fd(s->pid_file_pathspec, fd));
+
+ log_unit_debug(UNIT(s), "inotify event");
+
+ if (path_spec_fd_event(p, events) < 0)
+ goto fail;
+
+ if (service_retry_pid_file(s) == 0)
+ return 0;
+
+ if (service_watch_pid_file(s) < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ service_unwatch_pid_file(s);
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES);
+ return 0;
+}
+
+static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
+ Service *s = SERVICE(userdata);
+
+ assert(s);
+
+ log_unit_debug(UNIT(s), "got exec-fd event");
+
+ /* If Type=exec is set, we'll consider a service started successfully the instant we invoked execve()
+ * successfully for it. We implement this through a pipe() towards the child, which the kernel automatically
+ * closes for us due to O_CLOEXEC on execve() in the child, which then triggers EOF on the pipe in the
+ * parent. We need to be careful however, as there are other reasons that we might cause the child's side of
+ * the pipe to be closed (for example, a simple exit()). To deal with that we'll ignore EOFs on the pipe unless
+ * the child signalled us first that it is about to call the execve(). It does so by sending us a simple
+ * non-zero byte via the pipe. We also provide the child with a way to inform us in case execve() failed: if it
+ * sends a zero byte we'll ignore POLLHUP on the fd again. */
+
+ for (;;) {
+ uint8_t x;
+ ssize_t n;
+
+ n = read(fd, &x, sizeof(x));
+ if (n < 0) {
+ if (errno == EAGAIN) /* O_NONBLOCK in effect → everything queued has now been processed. */
+ return 0;
+
+ return log_unit_error_errno(UNIT(s), errno, "Failed to read from exec_fd: %m");
+ }
+ if (n == 0) { /* EOF → the event we are waiting for */
+
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
+
+ if (s->exec_fd_hot) { /* Did the child tell us to expect EOF now? */
+ log_unit_debug(UNIT(s), "Got EOF on exec-fd");
+
+ s->exec_fd_hot = false;
+
+ /* Nice! This is what we have been waiting for. Transition to next state. */
+ if (s->type == SERVICE_EXEC && s->state == SERVICE_START)
+ service_enter_start_post(s);
+ } else
+ log_unit_debug(UNIT(s), "Got EOF on exec-fd while it was disabled, ignoring.");
+
+ return 0;
+ }
+
+ /* A byte was read → this turns on/off the exec fd logic */
+ assert(n == sizeof(x));
+ s->exec_fd_hot = x;
+ }
+
+ return 0;
+}
+
+static void service_notify_cgroup_empty_event(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(u);
+
+ log_unit_debug(u, "cgroup is empty");
+
+ switch (s->state) {
+
+ /* Waiting for SIGCHLD is usually more interesting,
+ * because it includes return codes/signals. Which is
+ * why we ignore the cgroup events for most cases,
+ * except when we don't know pid which to expect the
+ * SIGCHLD for. */
+
+ case SERVICE_START:
+ if (s->type == SERVICE_NOTIFY &&
+ main_pid_good(s) == 0 &&
+ control_pid_good(s) == 0) {
+ /* No chance of getting a ready notification anymore */
+ service_enter_stop_post(s, SERVICE_FAILURE_PROTOCOL);
+ break;
+ }
+
+ _fallthrough_;
+ case SERVICE_START_POST:
+ if (s->pid_file_pathspec &&
+ main_pid_good(s) == 0 &&
+ control_pid_good(s) == 0) {
+
+ /* Give up hoping for the daemon to write its PID file */
+ log_unit_warning(u, "Daemon never wrote its PID file. Failing.");
+
+ service_unwatch_pid_file(s);
+ if (s->state == SERVICE_START)
+ service_enter_stop_post(s, SERVICE_FAILURE_PROTOCOL);
+ else
+ service_enter_stop(s, SERVICE_FAILURE_PROTOCOL);
+ }
+ break;
+
+ case SERVICE_RUNNING:
+ /* service_enter_running() will figure out what to do */
+ service_enter_running(s, SERVICE_SUCCESS);
+ break;
+
+ case SERVICE_STOP_WATCHDOG:
+ case SERVICE_STOP_SIGTERM:
+ case SERVICE_STOP_SIGKILL:
+
+ if (main_pid_good(s) <= 0 && control_pid_good(s) <= 0)
+ service_enter_stop_post(s, SERVICE_SUCCESS);
+
+ break;
+
+ case SERVICE_STOP_POST:
+ case SERVICE_FINAL_SIGTERM:
+ case SERVICE_FINAL_SIGKILL:
+ if (main_pid_good(s) <= 0 && control_pid_good(s) <= 0)
+ service_enter_dead(s, SERVICE_SUCCESS, true);
+
+ break;
+
+ default:
+ ;
+ }
+}
+
+static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+ bool notify_dbus = true;
+ Service *s = SERVICE(u);
+ ServiceResult f;
+
+ assert(s);
+ assert(pid >= 0);
+
+ if (is_clean_exit(code, status, s->type == SERVICE_ONESHOT ? EXIT_CLEAN_COMMAND : EXIT_CLEAN_DAEMON, &s->success_status))
+ f = SERVICE_SUCCESS;
+ else if (code == CLD_EXITED)
+ f = SERVICE_FAILURE_EXIT_CODE;
+ else if (code == CLD_KILLED)
+ f = SERVICE_FAILURE_SIGNAL;
+ else if (code == CLD_DUMPED)
+ f = SERVICE_FAILURE_CORE_DUMP;
+ else
+ assert_not_reached("Unknown code");
+
+ if (s->main_pid == pid) {
+ /* Forking services may occasionally move to a new PID.
+ * As long as they update the PID file before exiting the old
+ * PID, they're fine. */
+ if (service_load_pid_file(s, false) > 0)
+ return;
+
+ s->main_pid = 0;
+ exec_status_exit(&s->main_exec_status, &s->exec_context, pid, code, status);
+
+ if (s->main_command) {
+ /* If this is not a forking service than the
+ * main process got started and hence we copy
+ * the exit status so that it is recorded both
+ * as main and as control process exit
+ * status */
+
+ s->main_command->exec_status = s->main_exec_status;
+
+ if (s->main_command->flags & EXEC_COMMAND_IGNORE_FAILURE)
+ f = SERVICE_SUCCESS;
+ } else if (s->exec_command[SERVICE_EXEC_START]) {
+
+ /* If this is a forked process, then we should
+ * ignore the return value if this was
+ * configured for the starter process */
+
+ if (s->exec_command[SERVICE_EXEC_START]->flags & EXEC_COMMAND_IGNORE_FAILURE)
+ f = SERVICE_SUCCESS;
+ }
+
+ /* When this is a successful exit, let's log about the exit code on DEBUG level. If this is a failure
+ * and the process exited on its own via exit(), then let's make this a NOTICE, under the assumption
+ * that the service already logged the reason at a higher log level on its own. (Internally,
+ * unit_log_process_exit() will possibly bump this to WARNING if the service died due to a signal.) */
+ unit_log_process_exit(
+ u, f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+ "Main process",
+ service_exec_command_to_string(SERVICE_EXEC_START),
+ code, status);
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ if (s->main_command &&
+ s->main_command->command_next &&
+ s->type == SERVICE_ONESHOT &&
+ f == SERVICE_SUCCESS) {
+
+ /* There is another command to *
+ * execute, so let's do that. */
+
+ log_unit_debug(u, "Running next main command for state %s.", service_state_to_string(s->state));
+ service_run_next_main(s);
+
+ } else {
+
+ /* The service exited, so the service is officially
+ * gone. */
+ s->main_command = NULL;
+
+ switch (s->state) {
+
+ case SERVICE_START_POST:
+ case SERVICE_RELOAD:
+ case SERVICE_STOP:
+ /* Need to wait until the operation is
+ * done */
+ break;
+
+ case SERVICE_START:
+ if (s->type == SERVICE_ONESHOT) {
+ /* This was our main goal, so let's go on */
+ if (f == SERVICE_SUCCESS)
+ service_enter_start_post(s);
+ else
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ break;
+ } else if (s->type == SERVICE_NOTIFY) {
+ /* Only enter running through a notification, so that the
+ * SERVICE_START state signifies that no ready notification
+ * has been received */
+ if (f != SERVICE_SUCCESS)
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ else if (!s->remain_after_exit || s->notify_access == NOTIFY_MAIN)
+ /* The service has never been and will never be active */
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_PROTOCOL);
+ break;
+ }
+
+ _fallthrough_;
+ case SERVICE_RUNNING:
+ service_enter_running(s, f);
+ break;
+
+ case SERVICE_STOP_WATCHDOG:
+ case SERVICE_STOP_SIGTERM:
+ case SERVICE_STOP_SIGKILL:
+
+ if (control_pid_good(s) <= 0)
+ service_enter_stop_post(s, f);
+
+ /* If there is still a control process, wait for that first */
+ break;
+
+ case SERVICE_STOP_POST:
+ case SERVICE_FINAL_SIGTERM:
+ case SERVICE_FINAL_SIGKILL:
+
+ if (control_pid_good(s) <= 0)
+ service_enter_dead(s, f, true);
+ break;
+
+ default:
+ assert_not_reached("Uh, main process died at wrong time.");
+ }
+ }
+
+ } else if (s->control_pid == pid) {
+ s->control_pid = 0;
+
+ if (s->control_command) {
+ exec_status_exit(&s->control_command->exec_status, &s->exec_context, pid, code, status);
+
+ if (s->control_command->flags & EXEC_COMMAND_IGNORE_FAILURE)
+ f = SERVICE_SUCCESS;
+ }
+
+ unit_log_process_exit(
+ u, f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+ "Control process",
+ service_exec_command_to_string(s->control_command_id),
+ code, status);
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ if (s->control_command &&
+ s->control_command->command_next &&
+ f == SERVICE_SUCCESS) {
+
+ /* There is another command to *
+ * execute, so let's do that. */
+
+ log_unit_debug(u, "Running next control command for state %s.", service_state_to_string(s->state));
+ service_run_next_control(s);
+
+ } else {
+ /* No further commands for this step, so let's
+ * figure out what to do next */
+
+ s->control_command = NULL;
+ s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
+
+ log_unit_debug(u, "Got final SIGCHLD for state %s.", service_state_to_string(s->state));
+
+ switch (s->state) {
+
+ case SERVICE_START_PRE:
+ if (f == SERVICE_SUCCESS)
+ service_enter_start(s);
+ else
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ break;
+
+ case SERVICE_START:
+ if (s->type != SERVICE_FORKING)
+ /* Maybe spurious event due to a reload that changed the type? */
+ break;
+
+ if (f != SERVICE_SUCCESS) {
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ break;
+ }
+
+ if (s->pid_file) {
+ bool has_start_post;
+ int r;
+
+ /* Let's try to load the pid file here if we can.
+ * The PID file might actually be created by a START_POST
+ * script. In that case don't worry if the loading fails. */
+
+ has_start_post = s->exec_command[SERVICE_EXEC_START_POST];
+ r = service_load_pid_file(s, !has_start_post);
+ if (!has_start_post && r < 0) {
+ r = service_demand_pid_file(s);
+ if (r < 0 || cgroup_good(s) == 0)
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_PROTOCOL);
+ break;
+ }
+ } else
+ service_search_main_pid(s);
+
+ service_enter_start_post(s);
+ break;
+
+ case SERVICE_START_POST:
+ if (f != SERVICE_SUCCESS) {
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ break;
+ }
+
+ if (s->pid_file) {
+ int r;
+
+ r = service_load_pid_file(s, true);
+ if (r < 0) {
+ r = service_demand_pid_file(s);
+ if (r < 0 || cgroup_good(s) == 0)
+ service_enter_stop(s, SERVICE_FAILURE_PROTOCOL);
+ break;
+ }
+ } else
+ service_search_main_pid(s);
+
+ service_enter_running(s, SERVICE_SUCCESS);
+ break;
+
+ case SERVICE_RELOAD:
+ if (f == SERVICE_SUCCESS)
+ if (service_load_pid_file(s, true) < 0)
+ service_search_main_pid(s);
+
+ s->reload_result = f;
+ service_enter_running(s, SERVICE_SUCCESS);
+ break;
+
+ case SERVICE_STOP:
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ break;
+
+ case SERVICE_STOP_WATCHDOG:
+ case SERVICE_STOP_SIGTERM:
+ case SERVICE_STOP_SIGKILL:
+ if (main_pid_good(s) <= 0)
+ service_enter_stop_post(s, f);
+
+ /* If there is still a service
+ * process around, wait until
+ * that one quit, too */
+ break;
+
+ case SERVICE_STOP_POST:
+ case SERVICE_FINAL_SIGTERM:
+ case SERVICE_FINAL_SIGKILL:
+ if (main_pid_good(s) <= 0)
+ service_enter_dead(s, f, true);
+ break;
+
+ default:
+ assert_not_reached("Uh, control process died at wrong time.");
+ }
+ }
+ } else /* Neither control nor main PID? If so, don't notify about anything */
+ notify_dbus = false;
+
+ /* Notify clients about changed exit status */
+ if (notify_dbus)
+ unit_add_to_dbus_queue(u);
+
+ /* If we get a SIGCHLD event for one of the processes we were interested in, then we look for others to watch,
+ * under the assumption that we'll sooner or later get a SIGCHLD for them, as the original process we watched
+ * was probably the parent of them, and they are hence now our children. */
+ (void) unit_enqueue_rewatch_pids(u);
+}
+
+static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
+ Service *s = SERVICE(userdata);
+
+ assert(s);
+ assert(source == s->timer_event_source);
+
+ switch (s->state) {
+
+ case SERVICE_START_PRE:
+ case SERVICE_START:
+ log_unit_warning(UNIT(s), "%s operation timed out. Terminating.", s->state == SERVICE_START ? "Start" : "Start-pre");
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_START_POST:
+ log_unit_warning(UNIT(s), "Start-post operation timed out. Stopping.");
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_RUNNING:
+ log_unit_warning(UNIT(s), "Service reached runtime time limit. Stopping.");
+ service_enter_stop(s, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_RELOAD:
+ log_unit_warning(UNIT(s), "Reload operation timed out. Killing reload process.");
+ service_kill_control_process(s);
+ s->reload_result = SERVICE_FAILURE_TIMEOUT;
+ service_enter_running(s, SERVICE_SUCCESS);
+ break;
+
+ case SERVICE_STOP:
+ log_unit_warning(UNIT(s), "Stopping timed out. Terminating.");
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_STOP_WATCHDOG:
+ log_unit_warning(UNIT(s), "State 'stop-watchdog' timed out. Terminating.");
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_STOP_SIGTERM:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "State 'stop-sigterm' timed out. Killing.");
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "State 'stop-sigterm' timed out. Skipping SIGKILL.");
+ service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT);
+ }
+
+ break;
+
+ case SERVICE_STOP_SIGKILL:
+ /* Uh, we sent a SIGKILL and it is still not gone?
+ * Must be something we cannot kill, so let's just be
+ * weirded out and continue */
+
+ log_unit_warning(UNIT(s), "Processes still around after SIGKILL. Ignoring.");
+ service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_STOP_POST:
+ log_unit_warning(UNIT(s), "State 'stop-post' timed out. Terminating.");
+ service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_FINAL_SIGTERM:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "State 'stop-final-sigterm' timed out. Killing.");
+ service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "State 'stop-final-sigterm' timed out. Skipping SIGKILL. Entering failed mode.");
+ service_enter_dead(s, SERVICE_FAILURE_TIMEOUT, false);
+ }
+
+ break;
+
+ case SERVICE_FINAL_SIGKILL:
+ log_unit_warning(UNIT(s), "Processes still around after final SIGKILL. Entering failed mode.");
+ service_enter_dead(s, SERVICE_FAILURE_TIMEOUT, true);
+ break;
+
+ case SERVICE_AUTO_RESTART:
+ if (s->restart_usec > 0) {
+ char buf_restart[FORMAT_TIMESPAN_MAX];
+ log_unit_info(UNIT(s),
+ "Service RestartSec=%s expired, scheduling restart.",
+ format_timespan(buf_restart, sizeof buf_restart, s->restart_usec, USEC_PER_SEC));
+ } else
+ log_unit_info(UNIT(s),
+ "Service has no hold-off time (RestartSec=0), scheduling restart.");
+
+ service_enter_restart(s);
+ break;
+
+ default:
+ assert_not_reached("Timeout at wrong time.");
+ }
+
+ return 0;
+}
+
+static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void *userdata) {
+ Service *s = SERVICE(userdata);
+ char t[FORMAT_TIMESPAN_MAX];
+ usec_t watchdog_usec;
+
+ assert(s);
+ assert(source == s->watchdog_event_source);
+
+ watchdog_usec = service_get_watchdog_usec(s);
+
+ if (UNIT(s)->manager->service_watchdogs) {
+ log_unit_error(UNIT(s), "Watchdog timeout (limit %s)!",
+ format_timespan(t, sizeof(t), watchdog_usec, 1));
+
+ service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_WATCHDOG);
+ } else
+ log_unit_warning(UNIT(s), "Watchdog disabled! Ignoring watchdog timeout (limit %s)!",
+ format_timespan(t, sizeof(t), watchdog_usec, 1));
+
+ return 0;
+}
+
+static bool service_notify_message_authorized(Service *s, pid_t pid, char **tags, FDSet *fds) {
+ assert(s);
+
+ if (s->notify_access == NOTIFY_NONE) {
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception is disabled.", pid);
+ return false;
+ }
+
+ if (s->notify_access == NOTIFY_MAIN && pid != s->main_pid) {
+ if (s->main_pid != 0)
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT, pid, s->main_pid);
+ else
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID which is currently not known", pid);
+
+ return false;
+ }
+
+ if (s->notify_access == NOTIFY_EXEC && pid != s->main_pid && pid != s->control_pid) {
+ if (s->main_pid != 0 && s->control_pid != 0)
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT" and control PID "PID_FMT,
+ pid, s->main_pid, s->control_pid);
+ else if (s->main_pid != 0)
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT, pid, s->main_pid);
+ else if (s->control_pid != 0)
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for control PID "PID_FMT, pid, s->control_pid);
+ else
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID and control PID which are currently not known", pid);
+
+ return false;
+ }
+
+ return true;
+}
+
+static void service_notify_message(
+ Unit *u,
+ const struct ucred *ucred,
+ char **tags,
+ FDSet *fds) {
+
+ Service *s = SERVICE(u);
+ bool notify_dbus = false;
+ const char *e;
+ char **i;
+ int r;
+
+ assert(u);
+ assert(ucred);
+
+ if (!service_notify_message_authorized(SERVICE(u), ucred->pid, tags, fds))
+ return;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *cc = NULL;
+
+ cc = strv_join(tags, ", ");
+ log_unit_debug(u, "Got notification message from PID "PID_FMT" (%s)", ucred->pid, isempty(cc) ? "n/a" : cc);
+ }
+
+ /* Interpret MAINPID= */
+ e = strv_find_startswith(tags, "MAINPID=");
+ if (e && IN_SET(s->state, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD)) {
+ pid_t new_main_pid;
+
+ if (parse_pid(e, &new_main_pid) < 0)
+ log_unit_warning(u, "Failed to parse MAINPID= field in notification message, ignoring: %s", e);
+ else if (!s->main_pid_known || new_main_pid != s->main_pid) {
+
+ r = service_is_suitable_main_pid(s, new_main_pid, LOG_WARNING);
+ if (r == 0) {
+ /* The new main PID is a bit suspicous, which is OK if the sender is privileged. */
+
+ if (ucred->uid == 0) {
+ log_unit_debug(u, "New main PID "PID_FMT" does not belong to service, but we'll accept it as the request to change it came from a privileged process.", new_main_pid);
+ r = 1;
+ } else
+ log_unit_debug(u, "New main PID "PID_FMT" does not belong to service, refusing.", new_main_pid);
+ }
+ if (r > 0) {
+ service_set_main_pid(s, new_main_pid);
+
+ r = unit_watch_pid(UNIT(s), new_main_pid);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "Failed to watch new main PID "PID_FMT" for service: %m", new_main_pid);
+
+ notify_dbus = true;
+ }
+ }
+ }
+
+ /* Interpret READY=/STOPPING=/RELOADING=. Last one wins. */
+ STRV_FOREACH_BACKWARDS(i, tags) {
+
+ if (streq(*i, "READY=1")) {
+ s->notify_state = NOTIFY_READY;
+
+ /* Type=notify services inform us about completed
+ * initialization with READY=1 */
+ if (s->type == SERVICE_NOTIFY && s->state == SERVICE_START)
+ service_enter_start_post(s);
+
+ /* Sending READY=1 while we are reloading informs us
+ * that the reloading is complete */
+ if (s->state == SERVICE_RELOAD && s->control_pid == 0)
+ service_enter_running(s, SERVICE_SUCCESS);
+
+ notify_dbus = true;
+ break;
+
+ } else if (streq(*i, "RELOADING=1")) {
+ s->notify_state = NOTIFY_RELOADING;
+
+ if (s->state == SERVICE_RUNNING)
+ service_enter_reload_by_notify(s);
+
+ notify_dbus = true;
+ break;
+
+ } else if (streq(*i, "STOPPING=1")) {
+ s->notify_state = NOTIFY_STOPPING;
+
+ if (s->state == SERVICE_RUNNING)
+ service_enter_stop_by_notify(s);
+
+ notify_dbus = true;
+ break;
+ }
+ }
+
+ /* Interpret STATUS= */
+ e = strv_find_startswith(tags, "STATUS=");
+ if (e) {
+ _cleanup_free_ char *t = NULL;
+
+ if (!isempty(e)) {
+ /* Note that this size limit check is mostly paranoia: since the datagram size we are willing
+ * to process is already limited to NOTIFY_BUFFER_MAX, this limit here should never be hit. */
+ if (strlen(e) > STATUS_TEXT_MAX)
+ log_unit_warning(u, "Status message overly long (%zu > %u), ignoring.", strlen(e), STATUS_TEXT_MAX);
+ else if (!utf8_is_valid(e))
+ log_unit_warning(u, "Status message in notification message is not UTF-8 clean, ignoring.");
+ else {
+ t = strdup(e);
+ if (!t)
+ log_oom();
+ }
+ }
+
+ if (!streq_ptr(s->status_text, t)) {
+ free_and_replace(s->status_text, t);
+ notify_dbus = true;
+ }
+ }
+
+ /* Interpret ERRNO= */
+ e = strv_find_startswith(tags, "ERRNO=");
+ if (e) {
+ int status_errno;
+
+ status_errno = parse_errno(e);
+ if (status_errno < 0)
+ log_unit_warning_errno(u, status_errno,
+ "Failed to parse ERRNO= field value '%s' in notification message: %m", e);
+ else if (s->status_errno != status_errno) {
+ s->status_errno = status_errno;
+ notify_dbus = true;
+ }
+ }
+
+ /* Interpret EXTEND_TIMEOUT= */
+ e = strv_find_startswith(tags, "EXTEND_TIMEOUT_USEC=");
+ if (e) {
+ usec_t extend_timeout_usec;
+ if (safe_atou64(e, &extend_timeout_usec) < 0)
+ log_unit_warning(u, "Failed to parse EXTEND_TIMEOUT_USEC=%s", e);
+ else
+ service_extend_timeout(s, extend_timeout_usec);
+ }
+
+ /* Interpret WATCHDOG= */
+ if (strv_find(tags, "WATCHDOG=1"))
+ service_reset_watchdog(s);
+
+ e = strv_find_startswith(tags, "WATCHDOG_USEC=");
+ if (e) {
+ usec_t watchdog_override_usec;
+ if (safe_atou64(e, &watchdog_override_usec) < 0)
+ log_unit_warning(u, "Failed to parse WATCHDOG_USEC=%s", e);
+ else
+ service_override_watchdog_timeout(s, watchdog_override_usec);
+ }
+
+ /* Process FD store messages. Either FDSTOREREMOVE=1 for removal, or FDSTORE=1 for addition. In both cases,
+ * process FDNAME= for picking the file descriptor name to use. Note that FDNAME= is required when removing
+ * fds, but optional when pushing in new fds, for compatibility reasons. */
+ if (strv_find(tags, "FDSTOREREMOVE=1")) {
+ const char *name;
+
+ name = strv_find_startswith(tags, "FDNAME=");
+ if (!name || !fdname_is_valid(name))
+ log_unit_warning(u, "FDSTOREREMOVE=1 requested, but no valid file descriptor name passed, ignoring.");
+ else
+ service_remove_fd_store(s, name);
+
+ } else if (strv_find(tags, "FDSTORE=1")) {
+ const char *name;
+
+ name = strv_find_startswith(tags, "FDNAME=");
+ if (name && !fdname_is_valid(name)) {
+ log_unit_warning(u, "Passed FDNAME= name is invalid, ignoring.");
+ name = NULL;
+ }
+
+ (void) service_add_fd_store_set(s, fds, name);
+ }
+
+ /* Notify clients about changed status or main pid */
+ if (notify_dbus)
+ unit_add_to_dbus_queue(u);
+}
+
+static int service_get_timeout(Unit *u, usec_t *timeout) {
+ Service *s = SERVICE(u);
+ uint64_t t;
+ int r;
+
+ if (!s->timer_event_source)
+ return 0;
+
+ r = sd_event_source_get_time(s->timer_event_source, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY)
+ return 0;
+
+ *timeout = t;
+ return 1;
+}
+
+static void service_bus_name_owner_change(
+ Unit *u,
+ const char *name,
+ const char *old_owner,
+ const char *new_owner) {
+
+ Service *s = SERVICE(u);
+ int r;
+
+ assert(s);
+ assert(name);
+
+ assert(streq(s->bus_name, name));
+ assert(old_owner || new_owner);
+
+ if (old_owner && new_owner)
+ log_unit_debug(u, "D-Bus name %s changed owner from %s to %s", name, old_owner, new_owner);
+ else if (old_owner)
+ log_unit_debug(u, "D-Bus name %s no longer registered by %s", name, old_owner);
+ else
+ log_unit_debug(u, "D-Bus name %s now registered by %s", name, new_owner);
+
+ s->bus_name_good = !!new_owner;
+
+ /* Track the current owner, so we can reconstruct changes after a daemon reload */
+ r = free_and_strdup(&s->bus_name_owner, new_owner);
+ if (r < 0) {
+ log_unit_error_errno(u, r, "Unable to set new bus name owner %s: %m", new_owner);
+ return;
+ }
+
+ if (s->type == SERVICE_DBUS) {
+
+ /* service_enter_running() will figure out what to
+ * do */
+ if (s->state == SERVICE_RUNNING)
+ service_enter_running(s, SERVICE_SUCCESS);
+ else if (s->state == SERVICE_START && new_owner)
+ service_enter_start_post(s);
+
+ } else if (new_owner &&
+ s->main_pid <= 0 &&
+ IN_SET(s->state,
+ SERVICE_START,
+ SERVICE_START_POST,
+ SERVICE_RUNNING,
+ SERVICE_RELOAD)) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid;
+
+ /* Try to acquire PID from bus service */
+
+ r = sd_bus_get_name_creds(u->manager->api_bus, name, SD_BUS_CREDS_PID, &creds);
+ if (r >= 0)
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r >= 0) {
+ log_unit_debug(u, "D-Bus name %s is now owned by process " PID_FMT, name, pid);
+
+ service_set_main_pid(s, pid);
+ unit_watch_pid(UNIT(s), pid);
+ }
+ }
+}
+
+int service_set_socket_fd(Service *s, int fd, Socket *sock, bool selinux_context_net) {
+ _cleanup_free_ char *peer = NULL;
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ /* This is called by the socket code when instantiating a new service for a stream socket and the socket needs
+ * to be configured. We take ownership of the passed fd on success. */
+
+ if (UNIT(s)->load_state != UNIT_LOADED)
+ return -EINVAL;
+
+ if (s->socket_fd >= 0)
+ return -EBUSY;
+
+ if (s->state != SERVICE_DEAD)
+ return -EAGAIN;
+
+ if (getpeername_pretty(fd, true, &peer) >= 0) {
+
+ if (UNIT(s)->description) {
+ _cleanup_free_ char *a;
+
+ a = strjoin(UNIT(s)->description, " (", peer, ")");
+ if (!a)
+ return -ENOMEM;
+
+ r = unit_set_description(UNIT(s), a);
+ } else
+ r = unit_set_description(UNIT(s), peer);
+
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_add_two_dependencies(UNIT(sock), UNIT_BEFORE, UNIT_TRIGGERS, UNIT(s), false, UNIT_DEPENDENCY_IMPLICIT);
+ if (r < 0)
+ return r;
+
+ s->socket_fd = fd;
+ s->socket_fd_selinux_context_net = selinux_context_net;
+
+ unit_ref_set(&s->accept_socket, UNIT(s), UNIT(sock));
+ return 0;
+}
+
+static void service_reset_failed(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ if (s->state == SERVICE_FAILED)
+ service_set_state(s, SERVICE_DEAD);
+
+ s->result = SERVICE_SUCCESS;
+ s->reload_result = SERVICE_SUCCESS;
+ s->n_restarts = 0;
+ s->flush_n_restarts = false;
+}
+
+static int service_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ return unit_kill_common(u, who, signo, s->main_pid, s->control_pid, error);
+}
+
+static int service_main_pid(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ return s->main_pid;
+}
+
+static int service_control_pid(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ return s->control_pid;
+}
+
+static bool service_needs_console(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ /* We provide our own implementation of this here, instead of relying of the generic implementation
+ * unit_needs_console() provides, since we want to return false if we are in SERVICE_EXITED state. */
+
+ if (!exec_context_may_touch_console(&s->exec_context))
+ return false;
+
+ return IN_SET(s->state,
+ SERVICE_START_PRE,
+ SERVICE_START,
+ SERVICE_START_POST,
+ SERVICE_RUNNING,
+ SERVICE_RELOAD,
+ SERVICE_STOP,
+ SERVICE_STOP_WATCHDOG,
+ SERVICE_STOP_SIGTERM,
+ SERVICE_STOP_SIGKILL,
+ SERVICE_STOP_POST,
+ SERVICE_FINAL_SIGTERM,
+ SERVICE_FINAL_SIGKILL);
+}
+
+static int service_exit_status(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(u);
+
+ if (s->main_exec_status.pid <= 0 ||
+ !dual_timestamp_is_set(&s->main_exec_status.exit_timestamp))
+ return -ENODATA;
+
+ if (s->main_exec_status.code != CLD_EXITED)
+ return -EBADE;
+
+ return s->main_exec_status.status;
+}
+
+static const char* const service_restart_table[_SERVICE_RESTART_MAX] = {
+ [SERVICE_RESTART_NO] = "no",
+ [SERVICE_RESTART_ON_SUCCESS] = "on-success",
+ [SERVICE_RESTART_ON_FAILURE] = "on-failure",
+ [SERVICE_RESTART_ON_ABNORMAL] = "on-abnormal",
+ [SERVICE_RESTART_ON_WATCHDOG] = "on-watchdog",
+ [SERVICE_RESTART_ON_ABORT] = "on-abort",
+ [SERVICE_RESTART_ALWAYS] = "always",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_restart, ServiceRestart);
+
+static const char* const service_type_table[_SERVICE_TYPE_MAX] = {
+ [SERVICE_SIMPLE] = "simple",
+ [SERVICE_FORKING] = "forking",
+ [SERVICE_ONESHOT] = "oneshot",
+ [SERVICE_DBUS] = "dbus",
+ [SERVICE_NOTIFY] = "notify",
+ [SERVICE_IDLE] = "idle",
+ [SERVICE_EXEC] = "exec",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_type, ServiceType);
+
+static const char* const service_exec_command_table[_SERVICE_EXEC_COMMAND_MAX] = {
+ [SERVICE_EXEC_START_PRE] = "ExecStartPre",
+ [SERVICE_EXEC_START] = "ExecStart",
+ [SERVICE_EXEC_START_POST] = "ExecStartPost",
+ [SERVICE_EXEC_RELOAD] = "ExecReload",
+ [SERVICE_EXEC_STOP] = "ExecStop",
+ [SERVICE_EXEC_STOP_POST] = "ExecStopPost",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_exec_command, ServiceExecCommand);
+
+static const char* const notify_state_table[_NOTIFY_STATE_MAX] = {
+ [NOTIFY_UNKNOWN] = "unknown",
+ [NOTIFY_READY] = "ready",
+ [NOTIFY_RELOADING] = "reloading",
+ [NOTIFY_STOPPING] = "stopping",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(notify_state, NotifyState);
+
+static const char* const service_result_table[_SERVICE_RESULT_MAX] = {
+ [SERVICE_SUCCESS] = "success",
+ [SERVICE_FAILURE_RESOURCES] = "resources",
+ [SERVICE_FAILURE_PROTOCOL] = "protocol",
+ [SERVICE_FAILURE_TIMEOUT] = "timeout",
+ [SERVICE_FAILURE_EXIT_CODE] = "exit-code",
+ [SERVICE_FAILURE_SIGNAL] = "signal",
+ [SERVICE_FAILURE_CORE_DUMP] = "core-dump",
+ [SERVICE_FAILURE_WATCHDOG] = "watchdog",
+ [SERVICE_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_result, ServiceResult);
+
+const UnitVTable service_vtable = {
+ .object_size = sizeof(Service),
+ .exec_context_offset = offsetof(Service, exec_context),
+ .cgroup_context_offset = offsetof(Service, cgroup_context),
+ .kill_context_offset = offsetof(Service, kill_context),
+ .exec_runtime_offset = offsetof(Service, exec_runtime),
+ .dynamic_creds_offset = offsetof(Service, dynamic_creds),
+
+ .sections =
+ "Unit\0"
+ "Service\0"
+ "Install\0",
+ .private_section = "Service",
+
+ .can_transient = true,
+ .can_delegate = true,
+
+ .init = service_init,
+ .done = service_done,
+ .load = service_load,
+ .release_resources = service_release_resources,
+
+ .coldplug = service_coldplug,
+
+ .dump = service_dump,
+
+ .start = service_start,
+ .stop = service_stop,
+ .reload = service_reload,
+
+ .can_reload = service_can_reload,
+
+ .kill = service_kill,
+
+ .serialize = service_serialize,
+ .deserialize_item = service_deserialize_item,
+
+ .active_state = service_active_state,
+ .sub_state_to_string = service_sub_state_to_string,
+
+ .will_restart = service_will_restart,
+
+ .may_gc = service_may_gc,
+
+ .sigchld_event = service_sigchld_event,
+
+ .reset_failed = service_reset_failed,
+
+ .notify_cgroup_empty = service_notify_cgroup_empty_event,
+ .notify_message = service_notify_message,
+
+ .main_pid = service_main_pid,
+ .control_pid = service_control_pid,
+
+ .bus_name_owner_change = service_bus_name_owner_change,
+
+ .bus_vtable = bus_service_vtable,
+ .bus_set_property = bus_service_set_property,
+ .bus_commit_properties = bus_service_commit_properties,
+
+ .get_timeout = service_get_timeout,
+ .needs_console = service_needs_console,
+ .exit_status = service_exit_status,
+
+ .status_message_formats = {
+ .starting_stopping = {
+ [0] = "Starting %s...",
+ [1] = "Stopping %s...",
+ },
+ .finished_start_job = {
+ [JOB_DONE] = "Started %s.",
+ [JOB_FAILED] = "Failed to start %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Stopped %s.",
+ [JOB_FAILED] = "Stopped (with error) %s.",
+ },
+ },
+};
diff --git a/src/core/service.h b/src/core/service.h
new file mode 100644
index 0000000..9c4340c
--- /dev/null
+++ b/src/core/service.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Service Service;
+typedef struct ServiceFDStore ServiceFDStore;
+
+#include "exit-status.h"
+#include "kill.h"
+#include "path.h"
+#include "ratelimit.h"
+#include "socket.h"
+#include "unit.h"
+
+typedef enum ServiceRestart {
+ SERVICE_RESTART_NO,
+ SERVICE_RESTART_ON_SUCCESS,
+ SERVICE_RESTART_ON_FAILURE,
+ SERVICE_RESTART_ON_ABNORMAL,
+ SERVICE_RESTART_ON_WATCHDOG,
+ SERVICE_RESTART_ON_ABORT,
+ SERVICE_RESTART_ALWAYS,
+ _SERVICE_RESTART_MAX,
+ _SERVICE_RESTART_INVALID = -1
+} ServiceRestart;
+
+typedef enum ServiceType {
+ SERVICE_SIMPLE, /* we fork and go on right-away (i.e. modern socket activated daemons) */
+ SERVICE_FORKING, /* forks by itself (i.e. traditional daemons) */
+ SERVICE_ONESHOT, /* we fork and wait until the program finishes (i.e. programs like fsck which run and need to finish before we continue) */
+ SERVICE_DBUS, /* we fork and wait until a specific D-Bus name appears on the bus */
+ SERVICE_NOTIFY, /* we fork and wait until a daemon sends us a ready message with sd_notify() */
+ SERVICE_IDLE, /* much like simple, but delay exec() until all jobs are dispatched. */
+ SERVICE_EXEC, /* we fork and wait until we execute exec() (this means our own setup is waited for) */
+ _SERVICE_TYPE_MAX,
+ _SERVICE_TYPE_INVALID = -1
+} ServiceType;
+
+typedef enum ServiceExecCommand {
+ SERVICE_EXEC_START_PRE,
+ SERVICE_EXEC_START,
+ SERVICE_EXEC_START_POST,
+ SERVICE_EXEC_RELOAD,
+ SERVICE_EXEC_STOP,
+ SERVICE_EXEC_STOP_POST,
+ _SERVICE_EXEC_COMMAND_MAX,
+ _SERVICE_EXEC_COMMAND_INVALID = -1
+} ServiceExecCommand;
+
+typedef enum NotifyState {
+ NOTIFY_UNKNOWN,
+ NOTIFY_READY,
+ NOTIFY_RELOADING,
+ NOTIFY_STOPPING,
+ _NOTIFY_STATE_MAX,
+ _NOTIFY_STATE_INVALID = -1
+} NotifyState;
+
+/* The values of this enum are referenced in man/systemd.exec.xml and src/shared/bus-unit-util.c.
+ * Update those sources for each change to this enum. */
+typedef enum ServiceResult {
+ SERVICE_SUCCESS,
+ SERVICE_FAILURE_RESOURCES, /* a bit of a misnomer, just our catch-all error for errnos we didn't expect */
+ SERVICE_FAILURE_PROTOCOL,
+ SERVICE_FAILURE_TIMEOUT,
+ SERVICE_FAILURE_EXIT_CODE,
+ SERVICE_FAILURE_SIGNAL,
+ SERVICE_FAILURE_CORE_DUMP,
+ SERVICE_FAILURE_WATCHDOG,
+ SERVICE_FAILURE_START_LIMIT_HIT,
+ _SERVICE_RESULT_MAX,
+ _SERVICE_RESULT_INVALID = -1
+} ServiceResult;
+
+struct ServiceFDStore {
+ Service *service;
+
+ int fd;
+ char *fdname;
+ sd_event_source *event_source;
+
+ LIST_FIELDS(ServiceFDStore, fd_store);
+};
+
+struct Service {
+ Unit meta;
+
+ ServiceType type;
+ ServiceRestart restart;
+ ExitStatusSet restart_prevent_status;
+ ExitStatusSet restart_force_status;
+ ExitStatusSet success_status;
+
+ /* If set we'll read the main daemon PID from this file */
+ char *pid_file;
+
+ usec_t restart_usec;
+ usec_t timeout_start_usec;
+ usec_t timeout_stop_usec;
+ usec_t runtime_max_usec;
+
+ dual_timestamp watchdog_timestamp;
+ usec_t watchdog_usec; /* the requested watchdog timeout in the unit file */
+ usec_t watchdog_original_usec; /* the watchdog timeout that was in effect when the unit was started, i.e. the timeout the forked off processes currently see */
+ usec_t watchdog_override_usec; /* the watchdog timeout requested by the service itself through sd_notify() */
+ bool watchdog_override_enable;
+ sd_event_source *watchdog_event_source;
+
+ ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
+
+ ExecContext exec_context;
+ KillContext kill_context;
+ CGroupContext cgroup_context;
+
+ ServiceState state, deserialized_state;
+
+ /* The exit status of the real main process */
+ ExecStatus main_exec_status;
+
+ /* The currently executed control process */
+ ExecCommand *control_command;
+
+ /* The currently executed main process, which may be NULL if
+ * the main process got started via forking mode and not by
+ * us */
+ ExecCommand *main_command;
+
+ /* The ID of the control command currently being executed */
+ ServiceExecCommand control_command_id;
+
+ /* Runtime data of the execution context */
+ ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
+
+ pid_t main_pid, control_pid;
+ int socket_fd;
+ SocketPeer *peer;
+ bool socket_fd_selinux_context_net;
+
+ bool permissions_start_only;
+ bool root_directory_start_only;
+ bool remain_after_exit;
+ bool guess_main_pid;
+
+ /* If we shut down, remember why */
+ ServiceResult result;
+ ServiceResult reload_result;
+
+ bool main_pid_known:1;
+ bool main_pid_alien:1;
+ bool bus_name_good:1;
+ bool forbid_restart:1;
+ /* Keep restart intention between UNIT_FAILED and UNIT_ACTIVATING */
+ bool will_auto_restart:1;
+ bool start_timeout_defined:1;
+
+ char *bus_name;
+ char *bus_name_owner; /* unique name of the current owner */
+
+ char *status_text;
+ int status_errno;
+
+ UnitRef accept_socket;
+
+ sd_event_source *timer_event_source;
+ PathSpec *pid_file_pathspec;
+
+ NotifyAccess notify_access;
+ NotifyState notify_state;
+
+ sd_event_source *exec_fd_event_source;
+
+ ServiceFDStore *fd_store;
+ size_t n_fd_store;
+ unsigned n_fd_store_max;
+ unsigned n_keep_fd_store;
+
+ char *usb_function_descriptors;
+ char *usb_function_strings;
+
+ int stdin_fd;
+ int stdout_fd;
+ int stderr_fd;
+
+ unsigned n_restarts;
+ bool flush_n_restarts;
+ bool exec_fd_hot;
+};
+
+extern const UnitVTable service_vtable;
+
+int service_set_socket_fd(Service *s, int fd, struct Socket *socket, bool selinux_context_net);
+void service_close_socket_fd(Service *s);
+
+const char* service_restart_to_string(ServiceRestart i) _const_;
+ServiceRestart service_restart_from_string(const char *s) _pure_;
+
+const char* service_type_to_string(ServiceType i) _const_;
+ServiceType service_type_from_string(const char *s) _pure_;
+
+const char* service_exec_command_to_string(ServiceExecCommand i) _const_;
+ServiceExecCommand service_exec_command_from_string(const char *s) _pure_;
+
+const char* notify_state_to_string(NotifyState i) _const_;
+NotifyState notify_state_from_string(const char *s) _pure_;
+
+const char* service_result_to_string(ServiceResult i) _const_;
+ServiceResult service_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(SERVICE, Service);
+
+#define STATUS_TEXT_MAX (16U*1024U)
diff --git a/src/core/show-status.c b/src/core/show-status.c
new file mode 100644
index 0000000..f748a82
--- /dev/null
+++ b/src/core/show-status.c
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "parse-util.h"
+#include "show-status.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static const char* const show_status_table[_SHOW_STATUS_MAX] = {
+ [SHOW_STATUS_NO] = "no",
+ [SHOW_STATUS_AUTO] = "auto",
+ [SHOW_STATUS_TEMPORARY] = "temporary",
+ [SHOW_STATUS_YES] = "yes",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(show_status, ShowStatus, SHOW_STATUS_YES);
+
+int parse_show_status(const char *v, ShowStatus *ret) {
+ ShowStatus s;
+
+ assert(ret);
+
+ s = show_status_from_string(v);
+ if (s < 0 || s == SHOW_STATUS_TEMPORARY)
+ return -EINVAL;
+
+ *ret = s;
+ return 0;
+}
+
+int status_vprintf(const char *status, ShowStatusFlags flags, const char *format, va_list ap) {
+ static const char status_indent[] = " "; /* "[" STATUS "] " */
+ _cleanup_free_ char *s = NULL;
+ _cleanup_close_ int fd = -1;
+ struct iovec iovec[7] = {};
+ int n = 0;
+ static bool prev_ephemeral;
+
+ assert(format);
+
+ /* This is independent of logging, as status messages are
+ * optional and go exclusively to the console. */
+
+ if (vasprintf(&s, format, ap) < 0)
+ return log_oom();
+
+ /* Before you ask: yes, on purpose we open/close the console for each status line we write individually. This
+ * is a good strategy to avoid PID 1 getting killed by the kernel's SAK concept (it doesn't fix this entirely,
+ * but minimizes the time window the kernel might end up killing PID 1 due to SAK). It also makes things easier
+ * for us so that we don't have to recover from hangups and suchlike triggered on the console. */
+
+ fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ if (FLAGS_SET(flags, SHOW_STATUS_ELLIPSIZE)) {
+ char *e;
+ size_t emax, sl;
+ int c;
+
+ c = fd_columns(fd);
+ if (c <= 0)
+ c = 80;
+
+ sl = status ? sizeof(status_indent)-1 : 0;
+
+ emax = c - sl - 1;
+ if (emax < 3)
+ emax = 3;
+
+ e = ellipsize(s, emax, 50);
+ if (e)
+ free_and_replace(s, e);
+ }
+
+ if (prev_ephemeral)
+ iovec[n++] = IOVEC_MAKE_STRING(ANSI_REVERSE_LINEFEED "\r" ANSI_ERASE_TO_END_OF_LINE);
+
+ if (status) {
+ if (!isempty(status)) {
+ iovec[n++] = IOVEC_MAKE_STRING("[");
+ iovec[n++] = IOVEC_MAKE_STRING(status);
+ iovec[n++] = IOVEC_MAKE_STRING("] ");
+ } else
+ iovec[n++] = IOVEC_MAKE_STRING(status_indent);
+ }
+
+ iovec[n++] = IOVEC_MAKE_STRING(s);
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
+
+ if (prev_ephemeral && !FLAGS_SET(flags, SHOW_STATUS_EPHEMERAL))
+ iovec[n++] = IOVEC_MAKE_STRING(ANSI_ERASE_TO_END_OF_LINE);
+ prev_ephemeral = FLAGS_SET(flags, SHOW_STATUS_EPHEMERAL) ;
+
+ if (writev(fd, iovec, n) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int status_printf(const char *status, ShowStatusFlags flags, const char *format, ...) {
+ va_list ap;
+ int r;
+
+ assert(format);
+
+ va_start(ap, format);
+ r = status_vprintf(status, flags, format, ap);
+ va_end(ap);
+
+ return r;
+}
diff --git a/src/core/show-status.h b/src/core/show-status.h
new file mode 100644
index 0000000..f574d92
--- /dev/null
+++ b/src/core/show-status.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+/* Manager status */
+
+typedef enum ShowStatus {
+ SHOW_STATUS_NO,
+ SHOW_STATUS_AUTO,
+ SHOW_STATUS_TEMPORARY,
+ SHOW_STATUS_YES,
+ _SHOW_STATUS_MAX,
+ _SHOW_STATUS_INVALID = -1,
+} ShowStatus;
+
+typedef enum ShowStatusFlags {
+ SHOW_STATUS_ELLIPSIZE = 1 << 0,
+ SHOW_STATUS_EPHEMERAL = 1 << 1,
+} ShowStatusFlags;
+
+ShowStatus show_status_from_string(const char *v) _const_;
+const char* show_status_to_string(ShowStatus s) _pure_;
+int parse_show_status(const char *v, ShowStatus *ret);
+
+int status_vprintf(const char *status, ShowStatusFlags flags, const char *format, va_list ap) _printf_(3,0);
+int status_printf(const char *status, ShowStatusFlags flags, const char *format, ...) _printf_(3,4);
diff --git a/src/core/shutdown.c b/src/core/shutdown.c
new file mode 100644
index 0000000..cb47ee8
--- /dev/null
+++ b/src/core/shutdown.c
@@ -0,0 +1,545 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2010 ProFUSION embedded systems
+***/
+
+#include <errno.h>
+#include <getopt.h>
+#include <linux/reboot.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/reboot.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "async.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "exec-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "killall.h"
+#include "log.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "reboot-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "switch-root.h"
+#include "terminal-util.h"
+#include "umount.h"
+#include "util.h"
+#include "virt.h"
+#include "watchdog.h"
+
+#define SYNC_PROGRESS_ATTEMPTS 3
+#define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
+
+static char* arg_verb;
+static uint8_t arg_exit_code;
+static usec_t arg_timeout = DEFAULT_TIMEOUT_USEC;
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_LOG_LEVEL = 0x100,
+ ARG_LOG_TARGET,
+ ARG_LOG_COLOR,
+ ARG_LOG_LOCATION,
+ ARG_EXIT_CODE,
+ ARG_TIMEOUT,
+ };
+
+ static const struct option options[] = {
+ { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
+ { "log-target", required_argument, NULL, ARG_LOG_TARGET },
+ { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
+ { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
+ { "exit-code", required_argument, NULL, ARG_EXIT_CODE },
+ { "timeout", required_argument, NULL, ARG_TIMEOUT },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 1);
+ assert(argv);
+
+ /* "-" prevents getopt from permuting argv[] and moving the verb away
+ * from argv[1]. Our interface to initrd promises it'll be there. */
+ while ((c = getopt_long(argc, argv, "-", options, NULL)) >= 0)
+ switch (c) {
+
+ case ARG_LOG_LEVEL:
+ r = log_set_max_level_from_string(optarg);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse log level %s, ignoring: %m", optarg);
+
+ break;
+
+ case ARG_LOG_TARGET:
+ r = log_set_target_from_string(optarg);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse log target %s, ignoring: %m", optarg);
+
+ break;
+
+ case ARG_LOG_COLOR:
+
+ if (optarg) {
+ r = log_show_color_from_string(optarg);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse log color setting %s, ignoring: %m", optarg);
+ } else
+ log_show_color(true);
+
+ break;
+
+ case ARG_LOG_LOCATION:
+ if (optarg) {
+ r = log_show_location_from_string(optarg);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse log location setting %s, ignoring: %m", optarg);
+ } else
+ log_show_location(true);
+
+ break;
+
+ case ARG_EXIT_CODE:
+ r = safe_atou8(optarg, &arg_exit_code);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse exit code %s, ignoring: %m", optarg);
+
+ break;
+
+ case ARG_TIMEOUT:
+ r = parse_sec(optarg, &arg_timeout);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse shutdown timeout %s, ignoring: %m", optarg);
+
+ break;
+
+ case '\001':
+ if (!arg_verb)
+ arg_verb = optarg;
+ else
+ log_error("Excess arguments, ignoring");
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option code.");
+ }
+
+ if (!arg_verb)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Verb argument missing.");
+
+ return 0;
+}
+
+static int switch_root_initramfs(void) {
+ if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0)
+ return log_error_errno(errno, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
+
+ if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0)
+ return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m");
+
+ /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
+ * /run/initramfs/shutdown will take care of these.
+ * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
+ */
+ return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
+}
+
+/* Read the following fields from /proc/meminfo:
+ *
+ * NFS_Unstable
+ * Writeback
+ * Dirty
+ *
+ * Return true if the sum of these fields is greater than the previous
+ * value input. For all other issues, report the failure and indicate that
+ * the sync is not making progress.
+ */
+static bool sync_making_progress(unsigned long long *prev_dirty) {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned long long val = 0;
+ bool r = false;
+
+ f = fopen("/proc/meminfo", "re");
+ if (!f)
+ return log_warning_errno(errno, "Failed to open /proc/meminfo: %m");
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ unsigned long long ull = 0;
+ int q;
+
+ q = read_line(f, LONG_LINE_MAX, &line);
+ if (q < 0)
+ return log_warning_errno(q, "Failed to parse /proc/meminfo: %m");
+ if (q == 0)
+ break;
+
+ if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
+ continue;
+
+ errno = 0;
+ if (sscanf(line, "%*s %llu %*s", &ull) != 1) {
+ if (errno != 0)
+ log_warning_errno(errno, "Failed to parse /proc/meminfo: %m");
+ else
+ log_warning("Failed to parse /proc/meminfo");
+
+ return false;
+ }
+
+ val += ull;
+ }
+
+ r = *prev_dirty > val;
+
+ *prev_dirty = val;
+
+ return r;
+}
+
+static void sync_with_progress(void) {
+ unsigned long long dirty = ULONG_LONG_MAX;
+ unsigned checks;
+ pid_t pid;
+ int r;
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ /* Due to the possiblity of the sync operation hanging, we fork a child process and monitor the progress. If
+ * the timeout lapses, the assumption is that that particular sync stalled. */
+
+ r = asynchronous_sync(&pid);
+ if (r < 0) {
+ log_error_errno(r, "Failed to fork sync(): %m");
+ return;
+ }
+
+ log_info("Syncing filesystems and block devices.");
+
+ /* Start monitoring the sync operation. If more than
+ * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
+ * we assume that the sync is stalled */
+ for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
+ r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC);
+ if (r == 0)
+ /* Sync finished without error.
+ * (The sync itself does not return an error code) */
+ return;
+ else if (r == -ETIMEDOUT) {
+ /* Reset the check counter if the "Dirty" value is
+ * decreasing */
+ if (sync_making_progress(&dirty))
+ checks = 0;
+ } else {
+ log_error_errno(r, "Failed to sync filesystems and block devices: %m");
+ return;
+ }
+ }
+
+ /* Only reached in the event of a timeout. We should issue a kill
+ * to the stray process. */
+ log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid);
+ (void) kill(pid, SIGKILL);
+}
+
+int main(int argc, char *argv[]) {
+ bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
+ bool in_container, use_watchdog = false, can_initrd;
+ _cleanup_free_ char *cgroup = NULL;
+ char *arguments[3];
+ int cmd, r, umount_log_level = LOG_INFO;
+ static const char* const dirs[] = {SYSTEM_SHUTDOWN_PATH, NULL};
+ char *watchdog_device;
+
+ /* The log target defaults to console, but the original systemd process will pass its log target in through a
+ * command line argument, which will override this default. Also, ensure we'll never log to the journal or
+ * syslog, as these logging daemons are either already dead or will die very soon. */
+
+ log_set_target(LOG_TARGET_CONSOLE);
+ log_set_prohibit_ipc(true);
+ log_parse_environment();
+
+ r = parse_argv(argc, argv);
+ if (r < 0)
+ goto error;
+
+ log_open();
+
+ umask(0022);
+
+ if (getpid_cached() != 1) {
+ log_error("Not executed by init (PID 1).");
+ r = -EPERM;
+ goto error;
+ }
+
+ if (streq(arg_verb, "reboot"))
+ cmd = RB_AUTOBOOT;
+ else if (streq(arg_verb, "poweroff"))
+ cmd = RB_POWER_OFF;
+ else if (streq(arg_verb, "halt"))
+ cmd = RB_HALT_SYSTEM;
+ else if (streq(arg_verb, "kexec"))
+ cmd = LINUX_REBOOT_CMD_KEXEC;
+ else if (streq(arg_verb, "exit"))
+ cmd = 0; /* ignored, just checking that arg_verb is valid */
+ else {
+ log_error("Unknown action '%s'.", arg_verb);
+ r = -EINVAL;
+ goto error;
+ }
+
+ (void) cg_get_root_path(&cgroup);
+ in_container = detect_container() > 0;
+
+ use_watchdog = getenv("WATCHDOG_USEC");
+ watchdog_device = getenv("WATCHDOG_DEVICE");
+ if (watchdog_device) {
+ r = watchdog_set_device(watchdog_device);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m",
+ watchdog_device);
+ }
+
+ /* Lock us into memory */
+ (void) mlockall(MCL_CURRENT|MCL_FUTURE);
+
+ /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
+ * slow IO is processed here already and the final process killing spree is not impacted by processes
+ * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
+ * result. */
+ if (!in_container)
+ sync_with_progress();
+
+ disable_coredumps();
+
+ log_info("Sending SIGTERM to remaining processes...");
+ broadcast_signal(SIGTERM, true, true, arg_timeout);
+
+ log_info("Sending SIGKILL to remaining processes...");
+ broadcast_signal(SIGKILL, true, false, arg_timeout);
+
+ need_umount = !in_container;
+ need_swapoff = !in_container;
+ need_loop_detach = !in_container;
+ need_dm_detach = !in_container;
+ can_initrd = !in_container && !in_initrd() && access("/run/initramfs/shutdown", X_OK) == 0;
+
+ /* Unmount all mountpoints, swaps, and loopback devices */
+ for (;;) {
+ bool changed = false;
+
+ if (use_watchdog)
+ watchdog_ping();
+
+ /* Let's trim the cgroup tree on each iteration so
+ that we leave an empty cgroup tree around, so that
+ container managers get a nice notify event when we
+ are down */
+ if (cgroup)
+ cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false);
+
+ if (need_umount) {
+ log_info("Unmounting file systems.");
+ r = umount_all(&changed, umount_log_level);
+ if (r == 0) {
+ need_umount = false;
+ log_info("All filesystems unmounted.");
+ } else if (r > 0)
+ log_info("Not all file systems unmounted, %d left.", r);
+ else
+ log_error_errno(r, "Failed to unmount file systems: %m");
+ }
+
+ if (need_swapoff) {
+ log_info("Deactivating swaps.");
+ r = swapoff_all(&changed);
+ if (r == 0) {
+ need_swapoff = false;
+ log_info("All swaps deactivated.");
+ } else if (r > 0)
+ log_info("Not all swaps deactivated, %d left.", r);
+ else
+ log_error_errno(r, "Failed to deactivate swaps: %m");
+ }
+
+ if (need_loop_detach) {
+ log_info("Detaching loop devices.");
+ r = loopback_detach_all(&changed, umount_log_level);
+ if (r == 0) {
+ need_loop_detach = false;
+ log_info("All loop devices detached.");
+ } else if (r > 0)
+ log_info("Not all loop devices detached, %d left.", r);
+ else
+ log_error_errno(r, "Failed to detach loop devices: %m");
+ }
+
+ if (need_dm_detach) {
+ log_info("Detaching DM devices.");
+ r = dm_detach_all(&changed, umount_log_level);
+ if (r == 0) {
+ need_dm_detach = false;
+ log_info("All DM devices detached.");
+ } else if (r > 0)
+ log_info("Not all DM devices detached, %d left.", r);
+ else
+ log_error_errno(r, "Failed to detach DM devices: %m");
+ }
+
+ if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach) {
+ log_info("All filesystems, swaps, loop devices and DM devices detached.");
+ /* Yay, done */
+ break;
+ }
+
+ if (!changed && umount_log_level == LOG_INFO && !can_initrd) {
+ /* There are things we cannot get rid of. Loop one more time
+ * with LOG_ERR to inform the user. Note that we don't need
+ * to do this if there is a initrd to switch to, because that
+ * one is likely to get rid of the remounting mounts. If not,
+ * it will log about them. */
+ umount_log_level = LOG_ERR;
+ continue;
+ }
+
+ /* If in this iteration we didn't manage to
+ * unmount/deactivate anything, we simply give up */
+ if (!changed) {
+ log_info("Cannot finalize remaining%s%s%s%s continuing.",
+ need_umount ? " file systems," : "",
+ need_swapoff ? " swap devices," : "",
+ need_loop_detach ? " loop devices," : "",
+ need_dm_detach ? " DM devices," : "");
+ break;
+ }
+
+ log_debug("Couldn't finalize remaining %s%s%s%s trying again.",
+ need_umount ? " file systems," : "",
+ need_swapoff ? " swap devices," : "",
+ need_loop_detach ? " loop devices," : "",
+ need_dm_detach ? " DM devices," : "");
+ }
+
+ /* We're done with the watchdog. */
+ watchdog_free_device();
+
+ arguments[0] = NULL;
+ arguments[1] = arg_verb;
+ arguments[2] = NULL;
+ execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments, NULL);
+
+ (void) rlimit_nofile_safe();
+
+ if (can_initrd) {
+ r = switch_root_initramfs();
+ if (r >= 0) {
+ argv[0] = (char*) "/shutdown";
+
+ (void) setsid();
+ (void) make_console_stdio();
+
+ log_info("Successfully changed into root pivot.\n"
+ "Returning to initrd...");
+
+ execv("/shutdown", argv);
+ log_error_errno(errno, "Failed to execute shutdown binary: %m");
+ } else
+ log_error_errno(r, "Failed to switch root to \"/run/initramfs\": %m");
+
+ }
+
+ if (need_umount || need_swapoff || need_loop_detach || need_dm_detach)
+ log_error("Failed to finalize %s%s%s%s ignoring",
+ need_umount ? " file systems," : "",
+ need_swapoff ? " swap devices," : "",
+ need_loop_detach ? " loop devices," : "",
+ need_dm_detach ? " DM devices," : "");
+
+ /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
+ * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
+ * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
+ * let's do it once more. Do not remove this sync, data corruption will result. */
+ if (!in_container)
+ sync_with_progress();
+
+ if (streq(arg_verb, "exit")) {
+ if (in_container)
+ return arg_exit_code;
+
+ cmd = RB_POWER_OFF; /* We cannot exit() on the host, fallback on another method. */
+ }
+
+ switch (cmd) {
+
+ case LINUX_REBOOT_CMD_KEXEC:
+
+ if (!in_container) {
+ /* We cheat and exec kexec to avoid doing all its work */
+ log_info("Rebooting with kexec.");
+
+ r = safe_fork("(sd-kexec)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_WAIT, NULL);
+ if (r == 0) {
+ const char * const args[] = {
+ KEXEC, "-e", NULL
+ };
+
+ /* Child */
+
+ execv(args[0], (char * const *) args);
+ _exit(EXIT_FAILURE);
+ }
+
+ /* If we are still running, then the kexec can't have worked, let's fall through */
+ }
+
+ cmd = RB_AUTOBOOT;
+ _fallthrough_;
+
+ case RB_AUTOBOOT:
+ (void) reboot_with_parameter(REBOOT_LOG);
+ log_info("Rebooting.");
+ break;
+
+ case RB_POWER_OFF:
+ log_info("Powering off.");
+ break;
+
+ case RB_HALT_SYSTEM:
+ log_info("Halting system.");
+ break;
+
+ default:
+ assert_not_reached("Unknown magic");
+ }
+
+ (void) reboot(cmd);
+ if (errno == EPERM && in_container) {
+ /* If we are in a container, and we lacked
+ * CAP_SYS_BOOT just exit, this will kill our
+ * container for good. */
+ log_info("Exiting container.");
+ return EXIT_SUCCESS;
+ }
+
+ r = log_error_errno(errno, "Failed to invoke reboot(): %m");
+
+ error:
+ log_emergency_errno(r, "Critical error while doing system shutdown: %m");
+ freeze();
+}
diff --git a/src/core/slice.c b/src/core/slice.c
new file mode 100644
index 0000000..15b18bc
--- /dev/null
+++ b/src/core/slice.c
@@ -0,0 +1,397 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "dbus-slice.h"
+#include "dbus-unit.h"
+#include "log.h"
+#include "serialize.h"
+#include "slice.h"
+#include "special.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static const UnitActiveState state_translation_table[_SLICE_STATE_MAX] = {
+ [SLICE_DEAD] = UNIT_INACTIVE,
+ [SLICE_ACTIVE] = UNIT_ACTIVE
+};
+
+static void slice_init(Unit *u) {
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ u->ignore_on_isolate = true;
+}
+
+static void slice_set_state(Slice *t, SliceState state) {
+ SliceState old_state;
+ assert(t);
+
+ if (t->state != state)
+ bus_unit_send_pending_change_signal(UNIT(t), false);
+
+ old_state = t->state;
+ t->state = state;
+
+ if (state != old_state)
+ log_debug("%s changed %s -> %s",
+ UNIT(t)->id,
+ slice_state_to_string(old_state),
+ slice_state_to_string(state));
+
+ unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static int slice_add_parent_slice(Slice *s) {
+ Unit *u = UNIT(s), *parent;
+ _cleanup_free_ char *a = NULL;
+ int r;
+
+ assert(s);
+
+ if (UNIT_ISSET(u->slice))
+ return 0;
+
+ r = slice_build_parent_slice(u->id, &a);
+ if (r <= 0) /* 0 means root slice */
+ return r;
+
+ r = manager_load_unit(u->manager, a, NULL, NULL, &parent);
+ if (r < 0)
+ return r;
+
+ unit_ref_set(&u->slice, u, parent);
+ return 0;
+}
+
+static int slice_add_default_dependencies(Slice *s) {
+ int r;
+
+ assert(s);
+
+ if (!UNIT(s)->default_dependencies)
+ return 0;
+
+ /* Make sure slices are unloaded on shutdown */
+ r = unit_add_two_dependencies_by_name(
+ UNIT(s),
+ UNIT_BEFORE, UNIT_CONFLICTS,
+ SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int slice_verify(Slice *s) {
+ _cleanup_free_ char *parent = NULL;
+ int r;
+
+ assert(s);
+
+ if (UNIT(s)->load_state != UNIT_LOADED)
+ return 0;
+
+ if (!slice_name_is_valid(UNIT(s)->id)) {
+ log_unit_error(UNIT(s), "Slice name %s is not valid. Refusing.", UNIT(s)->id);
+ return -ENOEXEC;
+ }
+
+ r = slice_build_parent_slice(UNIT(s)->id, &parent);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to determine parent slice: %m");
+
+ if (parent ? !unit_has_name(UNIT_DEREF(UNIT(s)->slice), parent) : UNIT_ISSET(UNIT(s)->slice)) {
+ log_unit_error(UNIT(s), "Located outside of parent slice. Refusing.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int slice_load_root_slice(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_ROOT_SLICE))
+ return 0;
+
+ u->perpetual = true;
+
+ /* The root slice is a bit special. For example it is always running and cannot be terminated. Because of its
+ * special semantics we synthesize it here, instead of relying on the unit file on disk. */
+
+ u->default_dependencies = false;
+
+ if (!u->description)
+ u->description = strdup("Root Slice");
+ if (!u->documentation)
+ u->documentation = strv_new("man:systemd.special(7)");
+
+ return 1;
+}
+
+static int slice_load_system_slice(Unit *u) {
+ assert(u);
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return 0;
+ if (!unit_has_name(u, SPECIAL_SYSTEM_SLICE))
+ return 0;
+
+ u->perpetual = true;
+
+ /* The system slice is a bit special. For example it is always running and cannot be terminated. Because of its
+ * special semantics we synthesize it here, instead of relying on the unit file on disk. */
+
+ u->default_dependencies = false;
+
+ if (!u->description)
+ u->description = strdup("System Slice");
+ if (!u->documentation)
+ u->documentation = strv_new("man:systemd.special(7)");
+
+ return 1;
+}
+
+static int slice_load(Unit *u) {
+ Slice *s = SLICE(u);
+ int r;
+
+ assert(s);
+ assert(u->load_state == UNIT_STUB);
+
+ r = slice_load_root_slice(u);
+ if (r < 0)
+ return r;
+ r = slice_load_system_slice(u);
+ if (r < 0)
+ return r;
+
+ r = unit_load_fragment_and_dropin_optional(u);
+ if (r < 0)
+ return r;
+
+ /* This is a new unit? Then let's add in some extras */
+ if (u->load_state == UNIT_LOADED) {
+
+ r = unit_patch_contexts(u);
+ if (r < 0)
+ return r;
+
+ r = slice_add_parent_slice(s);
+ if (r < 0)
+ return r;
+
+ r = slice_add_default_dependencies(s);
+ if (r < 0)
+ return r;
+ }
+
+ return slice_verify(s);
+}
+
+static int slice_coldplug(Unit *u) {
+ Slice *t = SLICE(u);
+
+ assert(t);
+ assert(t->state == SLICE_DEAD);
+
+ if (t->deserialized_state != t->state)
+ slice_set_state(t, t->deserialized_state);
+
+ return 0;
+}
+
+static void slice_dump(Unit *u, FILE *f, const char *prefix) {
+ Slice *t = SLICE(u);
+
+ assert(t);
+ assert(f);
+
+ fprintf(f,
+ "%sSlice State: %s\n",
+ prefix, slice_state_to_string(t->state));
+
+ cgroup_context_dump(&t->cgroup_context, f, prefix);
+}
+
+static int slice_start(Unit *u) {
+ Slice *t = SLICE(u);
+ int r;
+
+ assert(t);
+ assert(t->state == SLICE_DEAD);
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ (void) unit_realize_cgroup(u);
+ (void) unit_reset_cpu_accounting(u);
+ (void) unit_reset_ip_accounting(u);
+
+ slice_set_state(t, SLICE_ACTIVE);
+ return 1;
+}
+
+static int slice_stop(Unit *u) {
+ Slice *t = SLICE(u);
+
+ assert(t);
+ assert(t->state == SLICE_ACTIVE);
+
+ /* We do not need to destroy the cgroup explicitly,
+ * unit_notify() will do that for us anyway. */
+
+ slice_set_state(t, SLICE_DEAD);
+ return 1;
+}
+
+static int slice_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
+ return unit_kill_common(u, who, signo, -1, -1, error);
+}
+
+static int slice_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Slice *s = SLICE(u);
+
+ assert(s);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", slice_state_to_string(s->state));
+
+ return 0;
+}
+
+static int slice_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Slice *s = SLICE(u);
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ SliceState state;
+
+ state = slice_state_from_string(value);
+ if (state < 0)
+ log_debug("Failed to parse state value %s", value);
+ else
+ s->deserialized_state = state;
+
+ } else
+ log_debug("Unknown serialization key '%s'", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState slice_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[SLICE(u)->state];
+}
+
+_pure_ static const char *slice_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return slice_state_to_string(SLICE(u)->state);
+}
+
+static int slice_make_perpetual(Manager *m, const char *name, Unit **ret) {
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(name);
+
+ u = manager_get_unit(m, name);
+ if (!u) {
+ r = unit_new_for_name(m, sizeof(Slice), name, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate the special %s unit: %m", name);
+ }
+
+ u->perpetual = true;
+ SLICE(u)->deserialized_state = SLICE_ACTIVE;
+
+ unit_add_to_load_queue(u);
+ unit_add_to_dbus_queue(u);
+
+ if (ret)
+ *ret = u;
+
+ return 0;
+}
+
+static void slice_enumerate_perpetual(Manager *m) {
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ r = slice_make_perpetual(m, SPECIAL_ROOT_SLICE, &u);
+ if (r >= 0 && manager_owns_host_root_cgroup(m)) {
+ Slice *s = SLICE(u);
+
+ /* If we are managing the root cgroup then this means our root slice covers the whole system, which
+ * means the kernel will track CPU/tasks/memory for us anyway, and it is all available in /proc. Let's
+ * hence turn accounting on here, so that our APIs to query this data are available. */
+
+ s->cgroup_context.cpu_accounting = true;
+ s->cgroup_context.tasks_accounting = true;
+ s->cgroup_context.memory_accounting = true;
+ }
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) slice_make_perpetual(m, SPECIAL_SYSTEM_SLICE, NULL);
+}
+
+const UnitVTable slice_vtable = {
+ .object_size = sizeof(Slice),
+ .cgroup_context_offset = offsetof(Slice, cgroup_context),
+
+ .sections =
+ "Unit\0"
+ "Slice\0"
+ "Install\0",
+ .private_section = "Slice",
+
+ .can_transient = true,
+
+ .init = slice_init,
+ .load = slice_load,
+
+ .coldplug = slice_coldplug,
+
+ .dump = slice_dump,
+
+ .start = slice_start,
+ .stop = slice_stop,
+
+ .kill = slice_kill,
+
+ .serialize = slice_serialize,
+ .deserialize_item = slice_deserialize_item,
+
+ .active_state = slice_active_state,
+ .sub_state_to_string = slice_sub_state_to_string,
+
+ .bus_vtable = bus_slice_vtable,
+ .bus_set_property = bus_slice_set_property,
+ .bus_commit_properties = bus_slice_commit_properties,
+
+ .enumerate_perpetual = slice_enumerate_perpetual,
+
+ .status_message_formats = {
+ .finished_start_job = {
+ [JOB_DONE] = "Created slice %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Removed slice %s.",
+ },
+ },
+};
diff --git a/src/core/slice.h b/src/core/slice.h
new file mode 100644
index 0000000..4678c08
--- /dev/null
+++ b/src/core/slice.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "unit.h"
+
+typedef struct Slice Slice;
+
+struct Slice {
+ Unit meta;
+
+ SliceState state, deserialized_state;
+
+ CGroupContext cgroup_context;
+};
+
+extern const UnitVTable slice_vtable;
+
+DEFINE_CAST(SLICE, Slice);
diff --git a/src/core/smack-setup.c b/src/core/smack-setup.c
new file mode 100644
index 0000000..49b37ae
--- /dev/null
+++ b/src/core/smack-setup.c
@@ -0,0 +1,421 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Intel Corporation
+ Authors:
+ Nathaniel Chen <nathaniel.chen@intel.com>
+***/
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "smack-setup.h"
+#include "string-util.h"
+#include "util.h"
+
+#if ENABLE_SMACK
+
+static int write_access2_rules(const char* srcdir) {
+ _cleanup_close_ int load2_fd = -1, change_fd = -1;
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *entry;
+ int dfd = -1;
+ int r = 0;
+
+ load2_fd = open("/sys/fs/smackfs/load2", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (load2_fd < 0) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open '/sys/fs/smackfs/load2': %m");
+ return -errno; /* negative error */
+ }
+
+ change_fd = open("/sys/fs/smackfs/change-rule", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (change_fd < 0) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open '/sys/fs/smackfs/change-rule': %m");
+ return -errno; /* negative error */
+ }
+
+ /* write rules to load2 or change-rule from every file in the directory */
+ dir = opendir(srcdir);
+ if (!dir) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to opendir '%s': %m", srcdir);
+ return errno; /* positive on purpose */
+ }
+
+ dfd = dirfd(dir);
+ assert(dfd >= 0);
+
+ FOREACH_DIRENT(entry, dir, return 0) {
+ int fd;
+ _cleanup_fclose_ FILE *policy = NULL;
+
+ if (!dirent_is_file(entry))
+ continue;
+
+ fd = openat(dfd, entry->d_name, O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (r == 0)
+ r = -errno;
+ log_warning_errno(errno, "Failed to open '%s': %m", entry->d_name);
+ continue;
+ }
+
+ policy = fdopen(fd, "r");
+ if (!policy) {
+ if (r == 0)
+ r = -errno;
+ safe_close(fd);
+ log_error_errno(errno, "Failed to open '%s': %m", entry->d_name);
+ continue;
+ }
+
+ /* load2 write rules in the kernel require a line buffered stream */
+ for (;;) {
+ _cleanup_free_ char *buf = NULL, *sbj = NULL, *obj = NULL, *acc1 = NULL, *acc2 = NULL;
+ int q;
+
+ q = read_line(policy, NAME_MAX, &buf);
+ if (q < 0)
+ return log_error_errno(q, "Failed to read line from '%s': %m", entry->d_name);
+ if (q == 0)
+ break;
+
+ if (isempty(buf) || strchr(COMMENTS, buf[0]))
+ continue;
+
+ /* if 3 args -> load rule : subject object access1 */
+ /* if 4 args -> change rule : subject object access1 access2 */
+ if (sscanf(buf, "%ms %ms %ms %ms", &sbj, &obj, &acc1, &acc2) < 3) {
+ log_error_errno(errno, "Failed to parse rule '%s' in '%s', ignoring.", buf, entry->d_name);
+ continue;
+ }
+
+ if (write(isempty(acc2) ? load2_fd : change_fd, buf, strlen(buf)) < 0) {
+ if (r == 0)
+ r = -errno;
+ log_error_errno(errno, "Failed to write '%s' to '%s' in '%s': %m",
+ buf, isempty(acc2) ? "/sys/fs/smackfs/load2" : "/sys/fs/smackfs/change-rule", entry->d_name);
+ }
+ }
+ }
+
+ return r;
+}
+
+static int write_cipso2_rules(const char* srcdir) {
+ _cleanup_close_ int cipso2_fd = -1;
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *entry;
+ int dfd = -1;
+ int r = 0;
+
+ cipso2_fd = open("/sys/fs/smackfs/cipso2", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (cipso2_fd < 0) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open '/sys/fs/smackfs/cipso2': %m");
+ return -errno; /* negative error */
+ }
+
+ /* write rules to cipso2 from every file in the directory */
+ dir = opendir(srcdir);
+ if (!dir) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to opendir '%s': %m", srcdir);
+ return errno; /* positive on purpose */
+ }
+
+ dfd = dirfd(dir);
+ assert(dfd >= 0);
+
+ FOREACH_DIRENT(entry, dir, return 0) {
+ int fd;
+ _cleanup_fclose_ FILE *policy = NULL;
+
+ if (!dirent_is_file(entry))
+ continue;
+
+ fd = openat(dfd, entry->d_name, O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (r == 0)
+ r = -errno;
+ log_error_errno(errno, "Failed to open '%s': %m", entry->d_name);
+ continue;
+ }
+
+ policy = fdopen(fd, "r");
+ if (!policy) {
+ if (r == 0)
+ r = -errno;
+ safe_close(fd);
+ log_error_errno(errno, "Failed to open '%s': %m", entry->d_name);
+ continue;
+ }
+
+ /* cipso2 write rules in the kernel require a line buffered stream */
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ int q;
+
+ q = read_line(policy, NAME_MAX, &buf);
+ if (q < 0)
+ return log_error_errno(q, "Failed to read line from '%s': %m", entry->d_name);
+ if (q == 0)
+ break;
+
+ if (isempty(buf) || strchr(COMMENTS, buf[0]))
+ continue;
+
+ if (write(cipso2_fd, buf, strlen(buf)) < 0) {
+ if (r == 0)
+ r = -errno;
+ log_error_errno(errno, "Failed to write '%s' to '/sys/fs/smackfs/cipso2' in '%s': %m",
+ buf, entry->d_name);
+ break;
+ }
+ }
+ }
+
+ return r;
+}
+
+static int write_netlabel_rules(const char* srcdir) {
+ _cleanup_fclose_ FILE *dst = NULL;
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *entry;
+ int dfd = -1;
+ int r = 0;
+
+ dst = fopen("/sys/fs/smackfs/netlabel", "we");
+ if (!dst) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open /sys/fs/smackfs/netlabel: %m");
+ return -errno; /* negative error */
+ }
+
+ /* write rules to dst from every file in the directory */
+ dir = opendir(srcdir);
+ if (!dir) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to opendir %s: %m", srcdir);
+ return errno; /* positive on purpose */
+ }
+
+ dfd = dirfd(dir);
+ assert(dfd >= 0);
+
+ FOREACH_DIRENT(entry, dir, return 0) {
+ int fd;
+ _cleanup_fclose_ FILE *policy = NULL;
+
+ fd = openat(dfd, entry->d_name, O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (r == 0)
+ r = -errno;
+ log_warning_errno(errno, "Failed to open %s: %m", entry->d_name);
+ continue;
+ }
+
+ policy = fdopen(fd, "r");
+ if (!policy) {
+ if (r == 0)
+ r = -errno;
+ safe_close(fd);
+ log_error_errno(errno, "Failed to open %s: %m", entry->d_name);
+ continue;
+ }
+
+ (void) __fsetlocking(policy, FSETLOCKING_BYCALLER);
+
+ /* load2 write rules in the kernel require a line buffered stream */
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ int q;
+
+ q = read_line(policy, NAME_MAX, &buf);
+ if (q < 0)
+ return log_error_errno(q, "Failed to read line from %s: %m", entry->d_name);
+ if (q == 0)
+ break;
+
+ if (!fputs(buf, dst)) {
+ if (r == 0)
+ r = -EINVAL;
+ log_error_errno(errno, "Failed to write line to /sys/fs/smackfs/netlabel: %m");
+ break;
+ }
+ q = fflush_and_check(dst);
+ if (q < 0) {
+ if (r == 0)
+ r = q;
+ log_error_errno(q, "Failed to flush writes to /sys/fs/smackfs/netlabel: %m");
+ break;
+ }
+ }
+ }
+
+ return r;
+}
+
+static int write_onlycap_list(void) {
+ _cleanup_close_ int onlycap_fd = -1;
+ _cleanup_free_ char *list = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t len = 0, allocated = 0;
+ int r;
+
+ f = fopen("/etc/smack/onlycap", "re");
+ if (!f) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to read '/etc/smack/onlycap': %m");
+
+ return errno == ENOENT ? ENOENT : -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ size_t l;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read line from /etc/smack/onlycap: %m");
+ if (r == 0)
+ break;
+
+ if (isempty(buf) || strchr(COMMENTS, *buf))
+ continue;
+
+ l = strlen(buf);
+ if (!GREEDY_REALLOC(list, allocated, len + l + 1))
+ return log_oom();
+
+ stpcpy(list + len, buf)[0] = ' ';
+ len += l + 1;
+ }
+
+ if (len == 0)
+ return 0;
+
+ list[len - 1] = 0;
+
+ onlycap_fd = open("/sys/fs/smackfs/onlycap", O_WRONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (onlycap_fd < 0) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open '/sys/fs/smackfs/onlycap': %m");
+ return -errno; /* negative error */
+ }
+
+ r = write(onlycap_fd, list, len);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to write onlycap list(%s) to '/sys/fs/smackfs/onlycap': %m", list);
+
+ return 0;
+}
+
+#endif
+
+int mac_smack_setup(bool *loaded_policy) {
+
+#if ENABLE_SMACK
+
+ int r;
+
+ assert(loaded_policy);
+
+ r = write_access2_rules("/etc/smack/accesses.d/");
+ switch(r) {
+ case -ENOENT:
+ log_debug("Smack is not enabled in the kernel.");
+ return 0;
+ case ENOENT:
+ log_debug("Smack access rules directory '/etc/smack/accesses.d/' not found");
+ return 0;
+ case 0:
+ log_info("Successfully loaded Smack policies.");
+ break;
+ default:
+ log_warning_errno(r, "Failed to load Smack access rules, ignoring: %m");
+ return 0;
+ }
+
+#ifdef SMACK_RUN_LABEL
+ r = write_string_file("/proc/self/attr/current", SMACK_RUN_LABEL, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set SMACK label \"" SMACK_RUN_LABEL "\" on self: %m");
+ r = write_string_file("/sys/fs/smackfs/ambient", SMACK_RUN_LABEL, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set SMACK ambient label \"" SMACK_RUN_LABEL "\": %m");
+ r = write_string_file("/sys/fs/smackfs/netlabel",
+ "0.0.0.0/0 " SMACK_RUN_LABEL, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set SMACK netlabel rule \"0.0.0.0/0 " SMACK_RUN_LABEL "\": %m");
+ r = write_string_file("/sys/fs/smackfs/netlabel", "127.0.0.1 -CIPSO", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set SMACK netlabel rule \"127.0.0.1 -CIPSO\": %m");
+#endif
+
+ r = write_cipso2_rules("/etc/smack/cipso.d/");
+ switch(r) {
+ case -ENOENT:
+ log_debug("Smack/CIPSO is not enabled in the kernel.");
+ return 0;
+ case ENOENT:
+ log_debug("Smack/CIPSO access rules directory '/etc/smack/cipso.d/' not found");
+ break;
+ case 0:
+ log_info("Successfully loaded Smack/CIPSO policies.");
+ break;
+ default:
+ log_warning_errno(r, "Failed to load Smack/CIPSO access rules, ignoring: %m");
+ break;
+ }
+
+ r = write_netlabel_rules("/etc/smack/netlabel.d/");
+ switch(r) {
+ case -ENOENT:
+ log_debug("Smack/CIPSO is not enabled in the kernel.");
+ return 0;
+ case ENOENT:
+ log_debug("Smack network host rules directory '/etc/smack/netlabel.d/' not found");
+ break;
+ case 0:
+ log_info("Successfully loaded Smack network host rules.");
+ break;
+ default:
+ log_warning_errno(r, "Failed to load Smack network host rules: %m, ignoring.");
+ break;
+ }
+
+ r = write_onlycap_list();
+ switch(r) {
+ case -ENOENT:
+ log_debug("Smack is not enabled in the kernel.");
+ break;
+ case ENOENT:
+ log_debug("Smack onlycap list file '/etc/smack/onlycap' not found");
+ break;
+ case 0:
+ log_info("Successfully wrote Smack onlycap list.");
+ break;
+ default:
+ log_emergency_errno(r, "Failed to write Smack onlycap list: %m");
+ return r;
+ }
+
+ *loaded_policy = true;
+
+#endif
+
+ return 0;
+}
diff --git a/src/core/smack-setup.h b/src/core/smack-setup.h
new file mode 100644
index 0000000..b65daaf
--- /dev/null
+++ b/src/core/smack-setup.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation
+ Authors:
+ Nathaniel Chen <nathaniel.chen@intel.com>
+***/
+
+int mac_smack_setup(bool *loaded_policy);
diff --git a/src/core/socket.c b/src/core/socket.c
new file mode 100644
index 0000000..af95e90
--- /dev/null
+++ b/src/core/socket.c
@@ -0,0 +1,3339 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <mqueue.h>
+#include <netinet/tcp.h>
+#include <signal.h>
+#include <sys/epoll.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/sctp.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "copy.h"
+#include "dbus-socket.h"
+#include "dbus-unit.h"
+#include "def.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "in-addr-util.h"
+#include "io-util.h"
+#include "ip-protocol-list.h"
+#include "label.h"
+#include "log.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "serialize.h"
+#include "signal-util.h"
+#include "smack-util.h"
+#include "socket.h"
+#include "special.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit.h"
+#include "user-util.h"
+
+struct SocketPeer {
+ unsigned n_ref;
+
+ Socket *socket;
+ union sockaddr_union peer;
+ socklen_t peer_salen;
+};
+
+static const UnitActiveState state_translation_table[_SOCKET_STATE_MAX] = {
+ [SOCKET_DEAD] = UNIT_INACTIVE,
+ [SOCKET_START_PRE] = UNIT_ACTIVATING,
+ [SOCKET_START_CHOWN] = UNIT_ACTIVATING,
+ [SOCKET_START_POST] = UNIT_ACTIVATING,
+ [SOCKET_LISTENING] = UNIT_ACTIVE,
+ [SOCKET_RUNNING] = UNIT_ACTIVE,
+ [SOCKET_STOP_PRE] = UNIT_DEACTIVATING,
+ [SOCKET_STOP_PRE_SIGTERM] = UNIT_DEACTIVATING,
+ [SOCKET_STOP_PRE_SIGKILL] = UNIT_DEACTIVATING,
+ [SOCKET_STOP_POST] = UNIT_DEACTIVATING,
+ [SOCKET_FINAL_SIGTERM] = UNIT_DEACTIVATING,
+ [SOCKET_FINAL_SIGKILL] = UNIT_DEACTIVATING,
+ [SOCKET_FAILED] = UNIT_FAILED
+};
+
+static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int socket_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
+
+static void socket_init(Unit *u) {
+ Socket *s = SOCKET(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ s->backlog = SOMAXCONN;
+ s->timeout_usec = u->manager->default_timeout_start_usec;
+ s->directory_mode = 0755;
+ s->socket_mode = 0666;
+
+ s->max_connections = 64;
+
+ s->priority = -1;
+ s->ip_tos = -1;
+ s->ip_ttl = -1;
+ s->mark = -1;
+
+ s->exec_context.std_output = u->manager->default_std_output;
+ s->exec_context.std_error = u->manager->default_std_error;
+
+ s->control_command_id = _SOCKET_EXEC_COMMAND_INVALID;
+
+ s->trigger_limit.interval = USEC_INFINITY;
+ s->trigger_limit.burst = (unsigned) -1;
+}
+
+static void socket_unwatch_control_pid(Socket *s) {
+ assert(s);
+
+ if (s->control_pid <= 0)
+ return;
+
+ unit_unwatch_pid(UNIT(s), s->control_pid);
+ s->control_pid = 0;
+}
+
+static void socket_cleanup_fd_list(SocketPort *p) {
+ assert(p);
+
+ close_many(p->auxiliary_fds, p->n_auxiliary_fds);
+ p->auxiliary_fds = mfree(p->auxiliary_fds);
+ p->n_auxiliary_fds = 0;
+}
+
+void socket_free_ports(Socket *s) {
+ SocketPort *p;
+
+ assert(s);
+
+ while ((p = s->ports)) {
+ LIST_REMOVE(port, s->ports, p);
+
+ sd_event_source_unref(p->event_source);
+
+ socket_cleanup_fd_list(p);
+ safe_close(p->fd);
+ free(p->path);
+ free(p);
+ }
+}
+
+static void socket_done(Unit *u) {
+ Socket *s = SOCKET(u);
+ SocketPeer *p;
+
+ assert(s);
+
+ socket_free_ports(s);
+
+ while ((p = set_steal_first(s->peers_by_address)))
+ p->socket = NULL;
+
+ s->peers_by_address = set_free(s->peers_by_address);
+
+ s->exec_runtime = exec_runtime_unref(s->exec_runtime, false);
+ exec_command_free_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
+ s->control_command = NULL;
+
+ dynamic_creds_unref(&s->dynamic_creds);
+
+ socket_unwatch_control_pid(s);
+
+ unit_ref_unset(&s->service);
+
+ s->tcp_congestion = mfree(s->tcp_congestion);
+ s->bind_to_device = mfree(s->bind_to_device);
+
+ s->smack = mfree(s->smack);
+ s->smack_ip_in = mfree(s->smack_ip_in);
+ s->smack_ip_out = mfree(s->smack_ip_out);
+
+ strv_free(s->symlinks);
+
+ s->user = mfree(s->user);
+ s->group = mfree(s->group);
+
+ s->fdname = mfree(s->fdname);
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+}
+
+static int socket_arm_timer(Socket *s, usec_t usec) {
+ int r;
+
+ assert(s);
+
+ if (s->timer_event_source) {
+ r = sd_event_source_set_time(s->timer_event_source, usec);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(s->timer_event_source, SD_EVENT_ONESHOT);
+ }
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ r = sd_event_add_time(
+ UNIT(s)->manager->event,
+ &s->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec, 0,
+ socket_dispatch_timer, s);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s->timer_event_source, "socket-timer");
+
+ return 0;
+}
+
+int socket_instantiate_service(Socket *s) {
+ _cleanup_free_ char *prefix = NULL, *name = NULL;
+ int r;
+ Unit *u;
+
+ assert(s);
+
+ /* This fills in s->service if it isn't filled in yet. For
+ * Accept=yes sockets we create the next connection service
+ * here. For Accept=no this is mostly a NOP since the service
+ * is figured out at load time anyway. */
+
+ if (UNIT_DEREF(s->service))
+ return 0;
+
+ if (!s->accept)
+ return 0;
+
+ r = unit_name_to_prefix(UNIT(s)->id, &prefix);
+ if (r < 0)
+ return r;
+
+ if (asprintf(&name, "%s@%u.service", prefix, s->n_accepted) < 0)
+ return -ENOMEM;
+
+ r = manager_load_unit(UNIT(s)->manager, name, NULL, NULL, &u);
+ if (r < 0)
+ return r;
+
+ unit_ref_set(&s->service, UNIT(s), u);
+
+ return unit_add_two_dependencies(UNIT(s), UNIT_BEFORE, UNIT_TRIGGERS, u, false, UNIT_DEPENDENCY_IMPLICIT);
+}
+
+static bool have_non_accept_socket(Socket *s) {
+ SocketPort *p;
+
+ assert(s);
+
+ if (!s->accept)
+ return true;
+
+ LIST_FOREACH(port, p, s->ports) {
+
+ if (p->type != SOCKET_SOCKET)
+ return true;
+
+ if (!socket_address_can_accept(&p->address))
+ return true;
+ }
+
+ return false;
+}
+
+static int socket_add_mount_dependencies(Socket *s) {
+ SocketPort *p;
+ int r;
+
+ assert(s);
+
+ LIST_FOREACH(port, p, s->ports) {
+ const char *path = NULL;
+
+ if (p->type == SOCKET_SOCKET)
+ path = socket_address_get_path(&p->address);
+ else if (IN_SET(p->type, SOCKET_FIFO, SOCKET_SPECIAL, SOCKET_USB_FUNCTION))
+ path = p->path;
+
+ if (!path)
+ continue;
+
+ r = unit_require_mounts_for(UNIT(s), path, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int socket_add_device_dependencies(Socket *s) {
+ char *t;
+
+ assert(s);
+
+ if (!s->bind_to_device || streq(s->bind_to_device, "lo"))
+ return 0;
+
+ t = strjoina("/sys/subsystem/net/devices/", s->bind_to_device);
+ return unit_add_node_dependency(UNIT(s), t, false, UNIT_BINDS_TO, UNIT_DEPENDENCY_FILE);
+}
+
+static int socket_add_default_dependencies(Socket *s) {
+ int r;
+ assert(s);
+
+ if (!UNIT(s)->default_dependencies)
+ return 0;
+
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_BEFORE, SPECIAL_SOCKETS_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ if (MANAGER_IS_SYSTEM(UNIT(s)->manager)) {
+ r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+ }
+
+ return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+_pure_ static bool socket_has_exec(Socket *s) {
+ unsigned i;
+ assert(s);
+
+ for (i = 0; i < _SOCKET_EXEC_COMMAND_MAX; i++)
+ if (s->exec_command[i])
+ return true;
+
+ return false;
+}
+
+static int socket_add_extras(Socket *s) {
+ Unit *u = UNIT(s);
+ int r;
+
+ assert(s);
+
+ /* Pick defaults for the trigger limit, if nothing was explicitly configured. We pick a relatively high limit
+ * in Accept=yes mode, and a lower limit for Accept=no. Reason: in Accept=yes mode we are invoking accept()
+ * ourselves before the trigger limit can hit, thus incoming connections are taken off the socket queue quickly
+ * and reliably. This is different for Accept=no, where the spawned service has to take the incoming traffic
+ * off the queues, which it might not necessarily do. Moreover, while Accept=no services are supposed to
+ * process whatever is queued in one go, and thus should normally never have to be started frequently. This is
+ * different for Accept=yes where each connection is processed by a new service instance, and thus frequent
+ * service starts are typical. */
+
+ if (s->trigger_limit.interval == USEC_INFINITY)
+ s->trigger_limit.interval = 2 * USEC_PER_SEC;
+
+ if (s->trigger_limit.burst == (unsigned) -1) {
+ if (s->accept)
+ s->trigger_limit.burst = 200;
+ else
+ s->trigger_limit.burst = 20;
+ }
+
+ if (have_non_accept_socket(s)) {
+
+ if (!UNIT_DEREF(s->service)) {
+ Unit *x;
+
+ r = unit_load_related_unit(u, ".service", &x);
+ if (r < 0)
+ return r;
+
+ unit_ref_set(&s->service, u, x);
+ }
+
+ r = unit_add_two_dependencies(u, UNIT_BEFORE, UNIT_TRIGGERS, UNIT_DEREF(s->service), true, UNIT_DEPENDENCY_IMPLICIT);
+ if (r < 0)
+ return r;
+ }
+
+ r = socket_add_mount_dependencies(s);
+ if (r < 0)
+ return r;
+
+ r = socket_add_device_dependencies(s);
+ if (r < 0)
+ return r;
+
+ r = unit_patch_contexts(u);
+ if (r < 0)
+ return r;
+
+ if (socket_has_exec(s)) {
+ r = unit_add_exec_dependencies(u, &s->exec_context);
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_set_default_slice(u);
+ if (r < 0)
+ return r;
+
+ r = socket_add_default_dependencies(s);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static const char *socket_find_symlink_target(Socket *s) {
+ const char *found = NULL;
+ SocketPort *p;
+
+ LIST_FOREACH(port, p, s->ports) {
+ const char *f = NULL;
+
+ switch (p->type) {
+
+ case SOCKET_FIFO:
+ f = p->path;
+ break;
+
+ case SOCKET_SOCKET:
+ f = socket_address_get_path(&p->address);
+ break;
+
+ default:
+ break;
+ }
+
+ if (f) {
+ if (found)
+ return NULL;
+
+ found = f;
+ }
+ }
+
+ return found;
+}
+
+static int socket_verify(Socket *s) {
+ assert(s);
+
+ if (UNIT(s)->load_state != UNIT_LOADED)
+ return 0;
+
+ if (!s->ports) {
+ log_unit_error(UNIT(s), "Unit has no Listen setting (ListenStream=, ListenDatagram=, ListenFIFO=, ...). Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->accept && have_non_accept_socket(s)) {
+ log_unit_error(UNIT(s), "Unit configured for accepting sockets, but sockets are non-accepting. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->accept && s->max_connections <= 0) {
+ log_unit_error(UNIT(s), "MaxConnection= setting too small. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->accept && UNIT_DEREF(s->service)) {
+ log_unit_error(UNIT(s), "Explicit service configuration for accepting socket units not supported. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->exec_context.pam_name && s->kill_context.kill_mode != KILL_CONTROL_GROUP) {
+ log_unit_error(UNIT(s), "Unit has PAM enabled. Kill mode must be set to 'control-group'. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (!strv_isempty(s->symlinks) && !socket_find_symlink_target(s)) {
+ log_unit_error(UNIT(s), "Unit has symlinks set but none or more than one node in the file system. Refusing.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static void peer_address_hash_func(const SocketPeer *s, struct siphash *state) {
+ assert(s);
+
+ if (s->peer.sa.sa_family == AF_INET)
+ siphash24_compress(&s->peer.in.sin_addr, sizeof(s->peer.in.sin_addr), state);
+ else if (s->peer.sa.sa_family == AF_INET6)
+ siphash24_compress(&s->peer.in6.sin6_addr, sizeof(s->peer.in6.sin6_addr), state);
+ else if (s->peer.sa.sa_family == AF_VSOCK)
+ siphash24_compress(&s->peer.vm.svm_cid, sizeof(s->peer.vm.svm_cid), state);
+ else
+ assert_not_reached("Unknown address family.");
+}
+
+static int peer_address_compare_func(const SocketPeer *x, const SocketPeer *y) {
+ int r;
+
+ r = CMP(x->peer.sa.sa_family, y->peer.sa.sa_family);
+ if (r != 0)
+ return r;
+
+ switch(x->peer.sa.sa_family) {
+ case AF_INET:
+ return memcmp(&x->peer.in.sin_addr, &y->peer.in.sin_addr, sizeof(x->peer.in.sin_addr));
+ case AF_INET6:
+ return memcmp(&x->peer.in6.sin6_addr, &y->peer.in6.sin6_addr, sizeof(x->peer.in6.sin6_addr));
+ case AF_VSOCK:
+ return CMP(x->peer.vm.svm_cid, y->peer.vm.svm_cid);
+ }
+ assert_not_reached("Black sheep in the family!");
+}
+
+DEFINE_PRIVATE_HASH_OPS(peer_address_hash_ops, SocketPeer, peer_address_hash_func, peer_address_compare_func);
+
+static int socket_load(Unit *u) {
+ Socket *s = SOCKET(u);
+ int r;
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ r = set_ensure_allocated(&s->peers_by_address, &peer_address_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = unit_load_fragment_and_dropin(u);
+ if (r < 0)
+ return r;
+
+ if (u->load_state == UNIT_LOADED) {
+ /* This is a new unit? Then let's add in some extras */
+ r = socket_add_extras(s);
+ if (r < 0)
+ return r;
+ }
+
+ return socket_verify(s);
+}
+
+static SocketPeer *socket_peer_new(void) {
+ SocketPeer *p;
+
+ p = new0(SocketPeer, 1);
+ if (!p)
+ return NULL;
+
+ p->n_ref = 1;
+
+ return p;
+}
+
+static SocketPeer *socket_peer_free(SocketPeer *p) {
+ assert(p);
+
+ if (p->socket)
+ set_remove(p->socket->peers_by_address, p);
+
+ return mfree(p);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(SocketPeer, socket_peer, socket_peer_free);
+
+int socket_acquire_peer(Socket *s, int fd, SocketPeer **p) {
+ _cleanup_(socket_peer_unrefp) SocketPeer *remote = NULL;
+ SocketPeer sa = {}, *i;
+ socklen_t salen = sizeof(sa.peer);
+ int r;
+
+ assert(fd >= 0);
+ assert(s);
+
+ r = getpeername(fd, &sa.peer.sa, &salen);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), errno, "getpeername failed: %m");
+
+ if (!IN_SET(sa.peer.sa.sa_family, AF_INET, AF_INET6, AF_VSOCK)) {
+ *p = NULL;
+ return 0;
+ }
+
+ i = set_get(s->peers_by_address, &sa);
+ if (i) {
+ *p = socket_peer_ref(i);
+ return 1;
+ }
+
+ remote = socket_peer_new();
+ if (!remote)
+ return log_oom();
+
+ remote->peer = sa.peer;
+ remote->peer_salen = salen;
+
+ r = set_put(s->peers_by_address, remote);
+ if (r < 0)
+ return r;
+
+ remote->socket = s;
+
+ *p = TAKE_PTR(remote);
+
+ return 1;
+}
+
+_const_ static const char* listen_lookup(int family, int type) {
+
+ if (family == AF_NETLINK)
+ return "ListenNetlink";
+
+ if (type == SOCK_STREAM)
+ return "ListenStream";
+ else if (type == SOCK_DGRAM)
+ return "ListenDatagram";
+ else if (type == SOCK_SEQPACKET)
+ return "ListenSequentialPacket";
+
+ assert_not_reached("Unknown socket type");
+ return NULL;
+}
+
+static void socket_dump(Unit *u, FILE *f, const char *prefix) {
+ char time_string[FORMAT_TIMESPAN_MAX];
+ SocketExecCommand c;
+ Socket *s = SOCKET(u);
+ SocketPort *p;
+ const char *prefix2, *str;
+
+ assert(s);
+ assert(f);
+
+ prefix = strempty(prefix);
+ prefix2 = strjoina(prefix, "\t");
+
+ fprintf(f,
+ "%sSocket State: %s\n"
+ "%sResult: %s\n"
+ "%sBindIPv6Only: %s\n"
+ "%sBacklog: %u\n"
+ "%sSocketMode: %04o\n"
+ "%sDirectoryMode: %04o\n"
+ "%sKeepAlive: %s\n"
+ "%sNoDelay: %s\n"
+ "%sFreeBind: %s\n"
+ "%sTransparent: %s\n"
+ "%sBroadcast: %s\n"
+ "%sPassCredentials: %s\n"
+ "%sPassSecurity: %s\n"
+ "%sTCPCongestion: %s\n"
+ "%sRemoveOnStop: %s\n"
+ "%sWritable: %s\n"
+ "%sFileDescriptorName: %s\n"
+ "%sSELinuxContextFromNet: %s\n",
+ prefix, socket_state_to_string(s->state),
+ prefix, socket_result_to_string(s->result),
+ prefix, socket_address_bind_ipv6_only_to_string(s->bind_ipv6_only),
+ prefix, s->backlog,
+ prefix, s->socket_mode,
+ prefix, s->directory_mode,
+ prefix, yes_no(s->keep_alive),
+ prefix, yes_no(s->no_delay),
+ prefix, yes_no(s->free_bind),
+ prefix, yes_no(s->transparent),
+ prefix, yes_no(s->broadcast),
+ prefix, yes_no(s->pass_cred),
+ prefix, yes_no(s->pass_sec),
+ prefix, strna(s->tcp_congestion),
+ prefix, yes_no(s->remove_on_stop),
+ prefix, yes_no(s->writable),
+ prefix, socket_fdname(s),
+ prefix, yes_no(s->selinux_context_from_net));
+
+ if (s->control_pid > 0)
+ fprintf(f,
+ "%sControl PID: "PID_FMT"\n",
+ prefix, s->control_pid);
+
+ if (s->bind_to_device)
+ fprintf(f,
+ "%sBindToDevice: %s\n",
+ prefix, s->bind_to_device);
+
+ if (s->accept)
+ fprintf(f,
+ "%sAccepted: %u\n"
+ "%sNConnections: %u\n"
+ "%sMaxConnections: %u\n"
+ "%sMaxConnectionsPerSource: %u\n",
+ prefix, s->n_accepted,
+ prefix, s->n_connections,
+ prefix, s->max_connections,
+ prefix, s->max_connections_per_source);
+
+ if (s->priority >= 0)
+ fprintf(f,
+ "%sPriority: %i\n",
+ prefix, s->priority);
+
+ if (s->receive_buffer > 0)
+ fprintf(f,
+ "%sReceiveBuffer: %zu\n",
+ prefix, s->receive_buffer);
+
+ if (s->send_buffer > 0)
+ fprintf(f,
+ "%sSendBuffer: %zu\n",
+ prefix, s->send_buffer);
+
+ if (s->ip_tos >= 0)
+ fprintf(f,
+ "%sIPTOS: %i\n",
+ prefix, s->ip_tos);
+
+ if (s->ip_ttl >= 0)
+ fprintf(f,
+ "%sIPTTL: %i\n",
+ prefix, s->ip_ttl);
+
+ if (s->pipe_size > 0)
+ fprintf(f,
+ "%sPipeSize: %zu\n",
+ prefix, s->pipe_size);
+
+ if (s->mark >= 0)
+ fprintf(f,
+ "%sMark: %i\n",
+ prefix, s->mark);
+
+ if (s->mq_maxmsg > 0)
+ fprintf(f,
+ "%sMessageQueueMaxMessages: %li\n",
+ prefix, s->mq_maxmsg);
+
+ if (s->mq_msgsize > 0)
+ fprintf(f,
+ "%sMessageQueueMessageSize: %li\n",
+ prefix, s->mq_msgsize);
+
+ if (s->reuse_port)
+ fprintf(f,
+ "%sReusePort: %s\n",
+ prefix, yes_no(s->reuse_port));
+
+ if (s->smack)
+ fprintf(f,
+ "%sSmackLabel: %s\n",
+ prefix, s->smack);
+
+ if (s->smack_ip_in)
+ fprintf(f,
+ "%sSmackLabelIPIn: %s\n",
+ prefix, s->smack_ip_in);
+
+ if (s->smack_ip_out)
+ fprintf(f,
+ "%sSmackLabelIPOut: %s\n",
+ prefix, s->smack_ip_out);
+
+ if (!isempty(s->user) || !isempty(s->group))
+ fprintf(f,
+ "%sSocketUser: %s\n"
+ "%sSocketGroup: %s\n",
+ prefix, strna(s->user),
+ prefix, strna(s->group));
+
+ if (s->keep_alive_time > 0)
+ fprintf(f,
+ "%sKeepAliveTimeSec: %s\n",
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, s->keep_alive_time, USEC_PER_SEC));
+
+ if (s->keep_alive_interval > 0)
+ fprintf(f,
+ "%sKeepAliveIntervalSec: %s\n",
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, s->keep_alive_interval, USEC_PER_SEC));
+
+ if (s->keep_alive_cnt > 0)
+ fprintf(f,
+ "%sKeepAliveProbes: %u\n",
+ prefix, s->keep_alive_cnt);
+
+ if (s->defer_accept > 0)
+ fprintf(f,
+ "%sDeferAcceptSec: %s\n",
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, s->defer_accept, USEC_PER_SEC));
+
+ LIST_FOREACH(port, p, s->ports) {
+
+ switch (p->type) {
+ case SOCKET_SOCKET: {
+ _cleanup_free_ char *k = NULL;
+ const char *t;
+ int r;
+
+ r = socket_address_print(&p->address, &k);
+ if (r < 0)
+ t = strerror(-r);
+ else
+ t = k;
+
+ fprintf(f, "%s%s: %s\n", prefix, listen_lookup(socket_address_family(&p->address), p->address.type), t);
+ break;
+ }
+ case SOCKET_SPECIAL:
+ fprintf(f, "%sListenSpecial: %s\n", prefix, p->path);
+ break;
+ case SOCKET_USB_FUNCTION:
+ fprintf(f, "%sListenUSBFunction: %s\n", prefix, p->path);
+ break;
+ case SOCKET_MQUEUE:
+ fprintf(f, "%sListenMessageQueue: %s\n", prefix, p->path);
+ break;
+ default:
+ fprintf(f, "%sListenFIFO: %s\n", prefix, p->path);
+ }
+ }
+
+ fprintf(f,
+ "%sTriggerLimitIntervalSec: %s\n"
+ "%sTriggerLimitBurst: %u\n",
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, s->trigger_limit.interval, USEC_PER_SEC),
+ prefix, s->trigger_limit.burst);
+
+ str = ip_protocol_to_name(s->socket_protocol);
+ if (str)
+ fprintf(f, "%sSocketProtocol: %s\n", prefix, str);
+
+ if (!strv_isempty(s->symlinks)) {
+ char **q;
+
+ fprintf(f, "%sSymlinks:", prefix);
+ STRV_FOREACH(q, s->symlinks)
+ fprintf(f, " %s", *q);
+
+ fprintf(f, "\n");
+ }
+
+ fprintf(f,
+ "%sTimeoutSec: %s\n",
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, s->timeout_usec, USEC_PER_SEC));
+
+ exec_context_dump(&s->exec_context, f, prefix);
+ kill_context_dump(&s->kill_context, f, prefix);
+
+ for (c = 0; c < _SOCKET_EXEC_COMMAND_MAX; c++) {
+ if (!s->exec_command[c])
+ continue;
+
+ fprintf(f, "%s-> %s:\n",
+ prefix, socket_exec_command_to_string(c));
+
+ exec_command_dump_list(s->exec_command[c], f, prefix2);
+ }
+
+ cgroup_context_dump(&s->cgroup_context, f, prefix);
+}
+
+static int instance_from_socket(int fd, unsigned nr, char **instance) {
+ socklen_t l;
+ char *r;
+ union sockaddr_union local, remote;
+
+ assert(fd >= 0);
+ assert(instance);
+
+ l = sizeof(local);
+ if (getsockname(fd, &local.sa, &l) < 0)
+ return -errno;
+
+ l = sizeof(remote);
+ if (getpeername(fd, &remote.sa, &l) < 0)
+ return -errno;
+
+ switch (local.sa.sa_family) {
+
+ case AF_INET: {
+ uint32_t
+ a = be32toh(local.in.sin_addr.s_addr),
+ b = be32toh(remote.in.sin_addr.s_addr);
+
+ if (asprintf(&r,
+ "%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
+ nr,
+ a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF,
+ be16toh(local.in.sin_port),
+ b >> 24, (b >> 16) & 0xFF, (b >> 8) & 0xFF, b & 0xFF,
+ be16toh(remote.in.sin_port)) < 0)
+ return -ENOMEM;
+
+ break;
+ }
+
+ case AF_INET6: {
+ static const unsigned char ipv4_prefix[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF
+ };
+
+ if (memcmp(&local.in6.sin6_addr, ipv4_prefix, sizeof(ipv4_prefix)) == 0 &&
+ memcmp(&remote.in6.sin6_addr, ipv4_prefix, sizeof(ipv4_prefix)) == 0) {
+ const uint8_t
+ *a = local.in6.sin6_addr.s6_addr+12,
+ *b = remote.in6.sin6_addr.s6_addr+12;
+
+ if (asprintf(&r,
+ "%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
+ nr,
+ a[0], a[1], a[2], a[3],
+ be16toh(local.in6.sin6_port),
+ b[0], b[1], b[2], b[3],
+ be16toh(remote.in6.sin6_port)) < 0)
+ return -ENOMEM;
+ } else {
+ char a[INET6_ADDRSTRLEN], b[INET6_ADDRSTRLEN];
+
+ if (asprintf(&r,
+ "%u-%s:%u-%s:%u",
+ nr,
+ inet_ntop(AF_INET6, &local.in6.sin6_addr, a, sizeof(a)),
+ be16toh(local.in6.sin6_port),
+ inet_ntop(AF_INET6, &remote.in6.sin6_addr, b, sizeof(b)),
+ be16toh(remote.in6.sin6_port)) < 0)
+ return -ENOMEM;
+ }
+
+ break;
+ }
+
+ case AF_UNIX: {
+ struct ucred ucred;
+ int k;
+
+ k = getpeercred(fd, &ucred);
+ if (k >= 0) {
+ if (asprintf(&r,
+ "%u-"PID_FMT"-"UID_FMT,
+ nr, ucred.pid, ucred.uid) < 0)
+ return -ENOMEM;
+ } else if (k == -ENODATA) {
+ /* This handles the case where somebody is
+ * connecting from another pid/uid namespace
+ * (e.g. from outside of our container). */
+ if (asprintf(&r,
+ "%u-unknown",
+ nr) < 0)
+ return -ENOMEM;
+ } else
+ return k;
+
+ break;
+ }
+
+ case AF_VSOCK:
+ if (asprintf(&r,
+ "%u-%u:%u-%u:%u",
+ nr,
+ local.vm.svm_cid, local.vm.svm_port,
+ remote.vm.svm_cid, remote.vm.svm_port) < 0)
+ return -ENOMEM;
+
+ break;
+
+ default:
+ assert_not_reached("Unhandled socket type.");
+ }
+
+ *instance = r;
+ return 0;
+}
+
+static void socket_close_fds(Socket *s) {
+ SocketPort *p;
+ char **i;
+
+ assert(s);
+
+ LIST_FOREACH(port, p, s->ports) {
+ bool was_open;
+
+ was_open = p->fd >= 0;
+
+ p->event_source = sd_event_source_unref(p->event_source);
+ p->fd = safe_close(p->fd);
+ socket_cleanup_fd_list(p);
+
+ /* One little note: we should normally not delete any sockets in the file system here! After all some
+ * other process we spawned might still have a reference of this fd and wants to continue to use
+ * it. Therefore we normally delete sockets in the file system before we create a new one, not after we
+ * stopped using one! That all said, if the user explicitly requested this, we'll delete them here
+ * anyway, but only then. */
+
+ if (!was_open || !s->remove_on_stop)
+ continue;
+
+ switch (p->type) {
+
+ case SOCKET_FIFO:
+ (void) unlink(p->path);
+ break;
+
+ case SOCKET_MQUEUE:
+ (void) mq_unlink(p->path);
+ break;
+
+ case SOCKET_SOCKET:
+ (void) socket_address_unlink(&p->address);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (s->remove_on_stop)
+ STRV_FOREACH(i, s->symlinks)
+ (void) unlink(*i);
+}
+
+static void socket_apply_socket_options(Socket *s, int fd) {
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ if (s->keep_alive) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_KEEPALIVE, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_KEEPALIVE failed: %m");
+ }
+
+ if (s->keep_alive_time > 0) {
+ r = setsockopt_int(fd, SOL_TCP, TCP_KEEPIDLE, s->keep_alive_time / USEC_PER_SEC);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_KEEPIDLE failed: %m");
+ }
+
+ if (s->keep_alive_interval > 0) {
+ r = setsockopt_int(fd, SOL_TCP, TCP_KEEPINTVL, s->keep_alive_interval / USEC_PER_SEC);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_KEEPINTVL failed: %m");
+ }
+
+ if (s->keep_alive_cnt > 0) {
+ r = setsockopt_int(fd, SOL_TCP, TCP_KEEPCNT, s->keep_alive_cnt);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_KEEPCNT failed: %m");
+ }
+
+ if (s->defer_accept > 0) {
+ r = setsockopt_int(fd, SOL_TCP, TCP_DEFER_ACCEPT, s->defer_accept / USEC_PER_SEC);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_DEFER_ACCEPT failed: %m");
+ }
+
+ if (s->no_delay) {
+ if (s->socket_protocol == IPPROTO_SCTP) {
+ r = setsockopt_int(fd, SOL_SCTP, SCTP_NODELAY, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SCTP_NODELAY failed: %m");
+ } else {
+ r = setsockopt_int(fd, SOL_TCP, TCP_NODELAY, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_NODELAY failed: %m");
+ }
+ }
+
+ if (s->broadcast) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_BROADCAST, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_BROADCAST failed: %m");
+ }
+
+ if (s->pass_cred) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_PASSCRED failed: %m");
+ }
+
+ if (s->pass_sec) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSSEC, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_PASSSEC failed: %m");
+ }
+
+ if (s->priority >= 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PRIORITY, s->priority);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_PRIORITY failed: %m");
+ }
+
+ if (s->receive_buffer > 0) {
+ /* We first try with SO_RCVBUFFORCE, in case we have the perms for that */
+ if (setsockopt_int(fd, SOL_SOCKET, SO_RCVBUFFORCE, s->receive_buffer) < 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_RCVBUF, s->receive_buffer);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_RCVBUF failed: %m");
+ }
+ }
+
+ if (s->send_buffer > 0) {
+ if (setsockopt_int(fd, SOL_SOCKET, SO_SNDBUFFORCE, s->send_buffer) < 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_SNDBUF, s->send_buffer);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_SNDBUF failed: %m");
+ }
+ }
+
+ if (s->mark >= 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_MARK, s->mark);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_MARK failed: %m");
+ }
+
+ if (s->ip_tos >= 0) {
+ r = setsockopt_int(fd, IPPROTO_IP, IP_TOS, s->ip_tos);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "IP_TOS failed: %m");
+ }
+
+ if (s->ip_ttl >= 0) {
+ int x;
+
+ r = setsockopt_int(fd, IPPROTO_IP, IP_TTL, s->ip_ttl);
+
+ if (socket_ipv6_is_supported())
+ x = setsockopt_int(fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, s->ip_ttl);
+ else
+ x = -EAFNOSUPPORT;
+
+ if (r < 0 && x < 0)
+ log_unit_warning_errno(UNIT(s), r, "IP_TTL/IPV6_UNICAST_HOPS failed: %m");
+ }
+
+ if (s->tcp_congestion)
+ if (setsockopt(fd, SOL_TCP, TCP_CONGESTION, s->tcp_congestion, strlen(s->tcp_congestion)+1) < 0)
+ log_unit_warning_errno(UNIT(s), errno, "TCP_CONGESTION failed: %m");
+
+ if (s->smack_ip_in) {
+ r = mac_smack_apply_fd(fd, SMACK_ATTR_IPIN, s->smack_ip_in);
+ if (r < 0)
+ log_unit_error_errno(UNIT(s), r, "mac_smack_apply_ip_in_fd: %m");
+ }
+
+ if (s->smack_ip_out) {
+ r = mac_smack_apply_fd(fd, SMACK_ATTR_IPOUT, s->smack_ip_out);
+ if (r < 0)
+ log_unit_error_errno(UNIT(s), r, "mac_smack_apply_ip_out_fd: %m");
+ }
+}
+
+static void socket_apply_fifo_options(Socket *s, int fd) {
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ if (s->pipe_size > 0)
+ if (fcntl(fd, F_SETPIPE_SZ, s->pipe_size) < 0)
+ log_unit_warning_errno(UNIT(s), errno, "Setting pipe size failed, ignoring: %m");
+
+ if (s->smack) {
+ r = mac_smack_apply_fd(fd, SMACK_ATTR_ACCESS, s->smack);
+ if (r < 0)
+ log_unit_error_errno(UNIT(s), r, "SMACK relabelling failed, ignoring: %m");
+ }
+}
+
+static int fifo_address_create(
+ const char *path,
+ mode_t directory_mode,
+ mode_t socket_mode) {
+
+ _cleanup_close_ int fd = -1;
+ mode_t old_mask;
+ struct stat st;
+ int r;
+
+ assert(path);
+
+ (void) mkdir_parents_label(path, directory_mode);
+
+ r = mac_selinux_create_file_prepare(path, S_IFIFO);
+ if (r < 0)
+ return r;
+
+ /* Enforce the right access mode for the fifo */
+ old_mask = umask(~socket_mode);
+
+ /* Include the original umask in our mask */
+ (void) umask(~socket_mode | old_mask);
+
+ r = mkfifo(path, socket_mode);
+ (void) umask(old_mask);
+
+ if (r < 0 && errno != EEXIST) {
+ r = -errno;
+ goto fail;
+ }
+
+ fd = open(path, O_RDWR | O_CLOEXEC | O_NOCTTY | O_NONBLOCK | O_NOFOLLOW);
+ if (fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ mac_selinux_create_file_clear();
+
+ if (fstat(fd, &st) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (!S_ISFIFO(st.st_mode) ||
+ (st.st_mode & 0777) != (socket_mode & ~old_mask) ||
+ st.st_uid != getuid() ||
+ st.st_gid != getgid()) {
+ r = -EEXIST;
+ goto fail;
+ }
+
+ return TAKE_FD(fd);
+
+fail:
+ mac_selinux_create_file_clear();
+ return r;
+}
+
+static int special_address_create(const char *path, bool writable) {
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ assert(path);
+
+ fd = open(path, (writable ? O_RDWR : O_RDONLY)|O_CLOEXEC|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* Check whether this is a /proc, /sys or /dev file or char device */
+ if (!S_ISREG(st.st_mode) && !S_ISCHR(st.st_mode))
+ return -EEXIST;
+
+ return TAKE_FD(fd);
+}
+
+static int usbffs_address_create(const char *path) {
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ assert(path);
+
+ fd = open(path, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* Check whether this is a regular file (ffs endpoint) */
+ if (!S_ISREG(st.st_mode))
+ return -EEXIST;
+
+ return TAKE_FD(fd);
+}
+
+static int mq_address_create(
+ const char *path,
+ mode_t mq_mode,
+ long maxmsg,
+ long msgsize) {
+
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ mode_t old_mask;
+ struct mq_attr _attr, *attr = NULL;
+
+ assert(path);
+
+ if (maxmsg > 0 && msgsize > 0) {
+ _attr = (struct mq_attr) {
+ .mq_flags = O_NONBLOCK,
+ .mq_maxmsg = maxmsg,
+ .mq_msgsize = msgsize,
+ };
+ attr = &_attr;
+ }
+
+ /* Enforce the right access mode for the mq */
+ old_mask = umask(~mq_mode);
+
+ /* Include the original umask in our mask */
+ (void) umask(~mq_mode | old_mask);
+ fd = mq_open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_CREAT, mq_mode, attr);
+ (void) umask(old_mask);
+
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if ((st.st_mode & 0777) != (mq_mode & ~old_mask) ||
+ st.st_uid != getuid() ||
+ st.st_gid != getgid())
+ return -EEXIST;
+
+ return TAKE_FD(fd);
+}
+
+static int socket_symlink(Socket *s) {
+ const char *p;
+ char **i;
+ int r;
+
+ assert(s);
+
+ p = socket_find_symlink_target(s);
+ if (!p)
+ return 0;
+
+ STRV_FOREACH(i, s->symlinks) {
+ (void) mkdir_parents_label(*i, s->directory_mode);
+
+ r = symlink_idempotent(p, *i, false);
+
+ if (r == -EEXIST && s->remove_on_stop) {
+ /* If there's already something where we want to create the symlink, and the destructive
+ * RemoveOnStop= mode is set, then we might as well try to remove what already exists and try
+ * again. */
+
+ if (unlink(*i) >= 0)
+ r = symlink_idempotent(p, *i, false);
+ }
+
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "Failed to create symlink %s → %s, ignoring: %m", p, *i);
+ }
+
+ return 0;
+}
+
+static int usbffs_write_descs(int fd, Service *s) {
+ int r;
+
+ if (!s->usb_function_descriptors || !s->usb_function_strings)
+ return -EINVAL;
+
+ r = copy_file_fd(s->usb_function_descriptors, fd, 0);
+ if (r < 0)
+ return r;
+
+ return copy_file_fd(s->usb_function_strings, fd, 0);
+}
+
+static int usbffs_select_ep(const struct dirent *d) {
+ return d->d_name[0] != '.' && !streq(d->d_name, "ep0");
+}
+
+static int usbffs_dispatch_eps(SocketPort *p) {
+ _cleanup_free_ struct dirent **ent = NULL;
+ size_t n, k, i;
+ int r;
+
+ r = scandir(p->path, &ent, usbffs_select_ep, alphasort);
+ if (r < 0)
+ return -errno;
+
+ n = (size_t) r;
+ p->auxiliary_fds = new(int, n);
+ if (!p->auxiliary_fds) {
+ r = -ENOMEM;
+ goto clear;
+ }
+
+ p->n_auxiliary_fds = n;
+
+ k = 0;
+ for (i = 0; i < n; ++i) {
+ _cleanup_free_ char *ep = NULL;
+
+ ep = path_make_absolute(ent[i]->d_name, p->path);
+ if (!ep) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ path_simplify(ep, false);
+
+ r = usbffs_address_create(ep);
+ if (r < 0)
+ goto fail;
+
+ p->auxiliary_fds[k++] = r;
+ }
+
+ r = 0;
+ goto clear;
+
+fail:
+ close_many(p->auxiliary_fds, k);
+ p->auxiliary_fds = mfree(p->auxiliary_fds);
+ p->n_auxiliary_fds = 0;
+
+clear:
+ for (i = 0; i < n; ++i)
+ free(ent[i]);
+
+ return r;
+}
+
+static int socket_determine_selinux_label(Socket *s, char **ret) {
+ Service *service;
+ ExecCommand *c;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(s);
+ assert(ret);
+
+ if (s->selinux_context_from_net) {
+ /* If this is requested, get label from the network label */
+
+ r = mac_selinux_get_our_label(ret);
+ if (r == -EOPNOTSUPP)
+ goto no_label;
+
+ } else {
+ /* Otherwise, get it from the executable we are about to start */
+ r = socket_instantiate_service(s);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_ISSET(s->service))
+ goto no_label;
+
+ service = SERVICE(UNIT_DEREF(s->service));
+ c = service->exec_command[SERVICE_EXEC_START];
+ if (!c)
+ goto no_label;
+
+ r = chase_symlinks(c->path, service->exec_context.root_directory, CHASE_PREFIX_ROOT, &path);
+ if (r < 0)
+ goto no_label;
+
+ r = mac_selinux_get_create_label_from_exe(path, ret);
+ if (IN_SET(r, -EPERM, -EOPNOTSUPP))
+ goto no_label;
+ }
+
+ return r;
+
+no_label:
+ *ret = NULL;
+ return 0;
+}
+
+static int socket_address_listen_do(
+ Socket *s,
+ const SocketAddress *address,
+ const char *label) {
+
+ assert(s);
+ assert(address);
+
+ return socket_address_listen(
+ address,
+ SOCK_CLOEXEC|SOCK_NONBLOCK,
+ s->backlog,
+ s->bind_ipv6_only,
+ s->bind_to_device,
+ s->reuse_port,
+ s->free_bind,
+ s->transparent,
+ s->directory_mode,
+ s->socket_mode,
+ label);
+}
+
+#define log_address_error_errno(u, address, error, fmt) \
+ ({ \
+ _cleanup_free_ char *_t = NULL; \
+ \
+ (void) socket_address_print(address, &_t); \
+ log_unit_error_errno(u, error, fmt, strna(_t)); \
+ })
+
+static int socket_address_listen_in_cgroup(
+ Socket *s,
+ const SocketAddress *address,
+ const char *label) {
+
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ int fd, r;
+ pid_t pid;
+
+ assert(s);
+ assert(address);
+
+ /* This is a wrapper around socket_address_listen(), that forks off a helper process inside the socket's cgroup
+ * in which the socket is actually created. This way we ensure the socket is actually properly attached to the
+ * unit's cgroup for the purpose of BPF filtering and such. */
+
+ if (!IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6))
+ goto shortcut; /* BPF filtering only applies to IPv4 + IPv6, shortcut things for other protocols */
+
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r == BPF_FIREWALL_UNSUPPORTED) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
+ goto shortcut;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
+ return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
+
+ r = unit_fork_helper_process(UNIT(s), "(sd-listen)", &pid);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to fork off listener stub process: %m");
+ if (r == 0) {
+ /* Child */
+
+ pair[0] = safe_close(pair[0]);
+
+ fd = socket_address_listen_do(s, address, label);
+ if (fd < 0) {
+ log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ r = send_one_fd(pair[1], fd, 0);
+ if (r < 0) {
+ log_address_error_errno(UNIT(s), address, r, "Failed to send listening socket (%s) to parent: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+ fd = receive_one_fd(pair[0], 0);
+
+ /* We synchronously wait for the helper, as it shouldn't be slow */
+ r = wait_for_terminate_and_check("(sd-listen)", pid, WAIT_LOG_ABNORMAL);
+ if (r < 0) {
+ safe_close(fd);
+ return r;
+ }
+
+ if (fd < 0)
+ return log_address_error_errno(UNIT(s), address, fd, "Failed to receive listening socket (%s): %m");
+
+ return fd;
+
+shortcut:
+ fd = socket_address_listen_do(s, address, label);
+ if (fd < 0)
+ return log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
+
+ return fd;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Socket *, socket_close_fds);
+
+static int socket_open_fds(Socket *_s) {
+ _cleanup_(socket_close_fdsp) Socket *s = _s;
+ _cleanup_(mac_selinux_freep) char *label = NULL;
+ bool know_label = false;
+ SocketPort *p;
+ int r;
+
+ assert(s);
+
+ LIST_FOREACH(port, p, s->ports) {
+
+ if (p->fd >= 0)
+ continue;
+
+ switch (p->type) {
+
+ case SOCKET_SOCKET:
+
+ if (!know_label) {
+ /* Figure out label, if we don't it know yet. We do it once, for the first socket where
+ * we need this and remember it for the rest. */
+
+ r = socket_determine_selinux_label(s, &label);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to determine SELinux label: %m");
+
+ know_label = true;
+ }
+
+ /* Apply the socket protocol */
+ switch (p->address.type) {
+
+ case SOCK_STREAM:
+ case SOCK_SEQPACKET:
+ if (s->socket_protocol == IPPROTO_SCTP)
+ p->address.protocol = s->socket_protocol;
+ break;
+
+ case SOCK_DGRAM:
+ if (s->socket_protocol == IPPROTO_UDPLITE)
+ p->address.protocol = s->socket_protocol;
+ break;
+ }
+
+ p->fd = socket_address_listen_in_cgroup(s, &p->address, label);
+ if (p->fd < 0)
+ return p->fd;
+
+ socket_apply_socket_options(s, p->fd);
+ socket_symlink(s);
+ break;
+
+ case SOCKET_SPECIAL:
+
+ p->fd = special_address_create(p->path, s->writable);
+ if (p->fd < 0)
+ return log_unit_error_errno(UNIT(s), p->fd, "Failed to open special file %s: %m", p->path);
+ break;
+
+ case SOCKET_FIFO:
+
+ p->fd = fifo_address_create(
+ p->path,
+ s->directory_mode,
+ s->socket_mode);
+ if (p->fd < 0)
+ return log_unit_error_errno(UNIT(s), p->fd, "Failed to open FIFO %s: %m", p->path);
+
+ socket_apply_fifo_options(s, p->fd);
+ socket_symlink(s);
+ break;
+
+ case SOCKET_MQUEUE:
+
+ p->fd = mq_address_create(
+ p->path,
+ s->socket_mode,
+ s->mq_maxmsg,
+ s->mq_msgsize);
+ if (p->fd < 0)
+ return log_unit_error_errno(UNIT(s), p->fd, "Failed to open message queue %s: %m", p->path);
+ break;
+
+ case SOCKET_USB_FUNCTION: {
+ _cleanup_free_ char *ep = NULL;
+
+ ep = path_make_absolute("ep0", p->path);
+
+ p->fd = usbffs_address_create(ep);
+ if (p->fd < 0)
+ return p->fd;
+
+ r = usbffs_write_descs(p->fd, SERVICE(UNIT_DEREF(s->service)));
+ if (r < 0)
+ return r;
+
+ r = usbffs_dispatch_eps(p);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+ default:
+ assert_not_reached("Unknown port type");
+ }
+ }
+
+ s = NULL;
+ return 0;
+}
+
+static void socket_unwatch_fds(Socket *s) {
+ SocketPort *p;
+ int r;
+
+ assert(s);
+
+ LIST_FOREACH(port, p, s->ports) {
+ if (p->fd < 0)
+ continue;
+
+ if (!p->event_source)
+ continue;
+
+ r = sd_event_source_set_enabled(p->event_source, SD_EVENT_OFF);
+ if (r < 0)
+ log_unit_debug_errno(UNIT(s), r, "Failed to disable event source: %m");
+ }
+}
+
+static int socket_watch_fds(Socket *s) {
+ SocketPort *p;
+ int r;
+
+ assert(s);
+
+ LIST_FOREACH(port, p, s->ports) {
+ if (p->fd < 0)
+ continue;
+
+ if (p->event_source) {
+ r = sd_event_source_set_enabled(p->event_source, SD_EVENT_ON);
+ if (r < 0)
+ goto fail;
+ } else {
+ r = sd_event_add_io(UNIT(s)->manager->event, &p->event_source, p->fd, EPOLLIN, socket_dispatch_io, p);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(p->event_source, "socket-port-io");
+ }
+ }
+
+ return 0;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to watch listening fds: %m");
+ socket_unwatch_fds(s);
+ return r;
+}
+
+enum {
+ SOCKET_OPEN_NONE,
+ SOCKET_OPEN_SOME,
+ SOCKET_OPEN_ALL,
+};
+
+static int socket_check_open(Socket *s) {
+ bool have_open = false, have_closed = false;
+ SocketPort *p;
+
+ assert(s);
+
+ LIST_FOREACH(port, p, s->ports) {
+ if (p->fd < 0)
+ have_closed = true;
+ else
+ have_open = true;
+
+ if (have_open && have_closed)
+ return SOCKET_OPEN_SOME;
+ }
+
+ if (have_open)
+ return SOCKET_OPEN_ALL;
+
+ return SOCKET_OPEN_NONE;
+}
+
+static void socket_set_state(Socket *s, SocketState state) {
+ SocketState old_state;
+ assert(s);
+
+ if (s->state != state)
+ bus_unit_send_pending_change_signal(UNIT(s), false);
+
+ old_state = s->state;
+ s->state = state;
+
+ if (!IN_SET(state,
+ SOCKET_START_PRE,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_STOP_POST,
+ SOCKET_FINAL_SIGTERM,
+ SOCKET_FINAL_SIGKILL)) {
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+ socket_unwatch_control_pid(s);
+ s->control_command = NULL;
+ s->control_command_id = _SOCKET_EXEC_COMMAND_INVALID;
+ }
+
+ if (state != SOCKET_LISTENING)
+ socket_unwatch_fds(s);
+
+ if (!IN_SET(state,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST,
+ SOCKET_LISTENING,
+ SOCKET_RUNNING,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_PRE_SIGKILL))
+ socket_close_fds(s);
+
+ if (state != old_state)
+ log_unit_debug(UNIT(s), "Changed %s -> %s", socket_state_to_string(old_state), socket_state_to_string(state));
+
+ unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static int socket_coldplug(Unit *u) {
+ Socket *s = SOCKET(u);
+ int r;
+
+ assert(s);
+ assert(s->state == SOCKET_DEAD);
+
+ if (s->deserialized_state == s->state)
+ return 0;
+
+ if (s->control_pid > 0 &&
+ pid_is_unwaited(s->control_pid) &&
+ IN_SET(s->deserialized_state,
+ SOCKET_START_PRE,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_STOP_POST,
+ SOCKET_FINAL_SIGTERM,
+ SOCKET_FINAL_SIGKILL)) {
+
+ r = unit_watch_pid(UNIT(s), s->control_pid);
+ if (r < 0)
+ return r;
+
+ r = socket_arm_timer(s, usec_add(u->state_change_timestamp.monotonic, s->timeout_usec));
+ if (r < 0)
+ return r;
+ }
+
+ if (IN_SET(s->deserialized_state,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST,
+ SOCKET_LISTENING,
+ SOCKET_RUNNING)) {
+
+ /* Originally, we used to simply reopen all sockets here that we didn't have file descriptors
+ * for. However, this is problematic, as we won't traverse throught the SOCKET_START_CHOWN state for
+ * them, and thus the UID/GID wouldn't be right. Hence, instead simply check if we have all fds open,
+ * and if there's a mismatch, warn loudly. */
+
+ r = socket_check_open(s);
+ if (r == SOCKET_OPEN_NONE)
+ log_unit_warning(UNIT(s),
+ "Socket unit configuration has changed while unit has been running, "
+ "no open socket file descriptor left. "
+ "The socket unit is not functional until restarted.");
+ else if (r == SOCKET_OPEN_SOME)
+ log_unit_warning(UNIT(s),
+ "Socket unit configuration has changed while unit has been running, "
+ "and some socket file descriptors have not been opened yet. "
+ "The socket unit is not fully functional until restarted.");
+ }
+
+ if (s->deserialized_state == SOCKET_LISTENING) {
+ r = socket_watch_fds(s);
+ if (r < 0)
+ return r;
+ }
+
+ if (!IN_SET(s->deserialized_state, SOCKET_DEAD, SOCKET_FAILED)) {
+ (void) unit_setup_dynamic_creds(u);
+ (void) unit_setup_exec_runtime(u);
+ }
+
+ socket_set_state(s, s->deserialized_state);
+ return 0;
+}
+
+static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
+
+ _cleanup_(exec_params_clear) ExecParameters exec_params = {
+ .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
+ .exec_fd = -1,
+ };
+ pid_t pid;
+ int r;
+
+ assert(s);
+ assert(c);
+ assert(_pid);
+
+ r = unit_prepare_exec(UNIT(s));
+ if (r < 0)
+ return r;
+
+ r = socket_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
+ if (r < 0)
+ return r;
+
+ r = unit_set_exec_params(UNIT(s), &exec_params);
+ if (r < 0)
+ return r;
+
+ r = exec_spawn(UNIT(s),
+ c,
+ &s->exec_context,
+ &exec_params,
+ s->exec_runtime,
+ &s->dynamic_creds,
+ &pid);
+ if (r < 0)
+ return r;
+
+ r = unit_watch_pid(UNIT(s), pid);
+ if (r < 0)
+ /* FIXME: we need to do something here */
+ return r;
+
+ *_pid = pid;
+
+ return 0;
+}
+
+static int socket_chown(Socket *s, pid_t *_pid) {
+ pid_t pid;
+ int r;
+
+ r = socket_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
+ if (r < 0)
+ goto fail;
+
+ /* We have to resolve the user names out-of-process, hence
+ * let's fork here. It's messy, but well, what can we do? */
+
+ r = unit_fork_helper_process(UNIT(s), "(sd-chown)", &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ uid_t uid = UID_INVALID;
+ gid_t gid = GID_INVALID;
+ SocketPort *p;
+
+ /* Child */
+
+ if (!isempty(s->user)) {
+ const char *user = s->user;
+
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to resolve user %s: %m", user);
+ _exit(EXIT_USER);
+ }
+ }
+
+ if (!isempty(s->group)) {
+ const char *group = s->group;
+
+ r = get_group_creds(&group, &gid, 0);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to resolve group %s: %m", group);
+ _exit(EXIT_GROUP);
+ }
+ }
+
+ LIST_FOREACH(port, p, s->ports) {
+ const char *path = NULL;
+
+ if (p->type == SOCKET_SOCKET)
+ path = socket_address_get_path(&p->address);
+ else if (p->type == SOCKET_FIFO)
+ path = p->path;
+
+ if (!path)
+ continue;
+
+ if (chown(path, uid, gid) < 0) {
+ log_unit_error_errno(UNIT(s), errno, "Failed to chown(): %m");
+ _exit(EXIT_CHOWN);
+ }
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ r = unit_watch_pid(UNIT(s), pid);
+ if (r < 0)
+ goto fail;
+
+ *_pid = pid;
+ return 0;
+
+fail:
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+ return r;
+}
+
+static void socket_enter_dead(Socket *s, SocketResult f) {
+ assert(s);
+
+ if (s->result == SOCKET_SUCCESS)
+ s->result = f;
+
+ if (s->result == SOCKET_SUCCESS)
+ unit_log_success(UNIT(s));
+ else
+ unit_log_failure(UNIT(s), socket_result_to_string(s->result));
+
+ socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD);
+
+ s->exec_runtime = exec_runtime_unref(s->exec_runtime, true);
+
+ exec_context_destroy_runtime_directory(&s->exec_context, UNIT(s)->manager->prefix[EXEC_DIRECTORY_RUNTIME]);
+
+ unit_unref_uid_gid(UNIT(s), true);
+
+ dynamic_creds_destroy(&s->dynamic_creds);
+}
+
+static void socket_enter_signal(Socket *s, SocketState state, SocketResult f);
+
+static void socket_enter_stop_post(Socket *s, SocketResult f) {
+ int r;
+ assert(s);
+
+ if (s->result == SOCKET_SUCCESS)
+ s->result = f;
+
+ socket_unwatch_control_pid(s);
+ s->control_command_id = SOCKET_EXEC_STOP_POST;
+ s->control_command = s->exec_command[SOCKET_EXEC_STOP_POST];
+
+ if (s->control_command) {
+ r = socket_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ socket_set_state(s, SOCKET_STOP_POST);
+ } else
+ socket_enter_signal(s, SOCKET_FINAL_SIGTERM, SOCKET_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'stop-post' task: %m");
+ socket_enter_signal(s, SOCKET_FINAL_SIGTERM, SOCKET_FAILURE_RESOURCES);
+}
+
+static void socket_enter_signal(Socket *s, SocketState state, SocketResult f) {
+ int r;
+
+ assert(s);
+
+ if (s->result == SOCKET_SUCCESS)
+ s->result = f;
+
+ r = unit_kill_context(
+ UNIT(s),
+ &s->kill_context,
+ !IN_SET(state, SOCKET_STOP_PRE_SIGTERM, SOCKET_FINAL_SIGTERM) ?
+ KILL_KILL : KILL_TERMINATE,
+ -1,
+ s->control_pid,
+ false);
+ if (r < 0)
+ goto fail;
+
+ if (r > 0) {
+ r = socket_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
+ if (r < 0)
+ goto fail;
+
+ socket_set_state(s, state);
+ } else if (state == SOCKET_STOP_PRE_SIGTERM)
+ socket_enter_signal(s, SOCKET_STOP_PRE_SIGKILL, SOCKET_SUCCESS);
+ else if (state == SOCKET_STOP_PRE_SIGKILL)
+ socket_enter_stop_post(s, SOCKET_SUCCESS);
+ else if (state == SOCKET_FINAL_SIGTERM)
+ socket_enter_signal(s, SOCKET_FINAL_SIGKILL, SOCKET_SUCCESS);
+ else
+ socket_enter_dead(s, SOCKET_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
+
+ if (IN_SET(state, SOCKET_STOP_PRE_SIGTERM, SOCKET_STOP_PRE_SIGKILL))
+ socket_enter_stop_post(s, SOCKET_FAILURE_RESOURCES);
+ else
+ socket_enter_dead(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void socket_enter_stop_pre(Socket *s, SocketResult f) {
+ int r;
+ assert(s);
+
+ if (s->result == SOCKET_SUCCESS)
+ s->result = f;
+
+ socket_unwatch_control_pid(s);
+ s->control_command_id = SOCKET_EXEC_STOP_PRE;
+ s->control_command = s->exec_command[SOCKET_EXEC_STOP_PRE];
+
+ if (s->control_command) {
+ r = socket_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ socket_set_state(s, SOCKET_STOP_PRE);
+ } else
+ socket_enter_stop_post(s, SOCKET_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'stop-pre' task: %m");
+ socket_enter_stop_post(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void socket_enter_listening(Socket *s) {
+ int r;
+ assert(s);
+
+ r = socket_watch_fds(s);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to watch sockets: %m");
+ goto fail;
+ }
+
+ socket_set_state(s, SOCKET_LISTENING);
+ return;
+
+fail:
+ socket_enter_stop_pre(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void socket_enter_start_post(Socket *s) {
+ int r;
+ assert(s);
+
+ socket_unwatch_control_pid(s);
+ s->control_command_id = SOCKET_EXEC_START_POST;
+ s->control_command = s->exec_command[SOCKET_EXEC_START_POST];
+
+ if (s->control_command) {
+ r = socket_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'start-post' task: %m");
+ goto fail;
+ }
+
+ socket_set_state(s, SOCKET_START_POST);
+ } else
+ socket_enter_listening(s);
+
+ return;
+
+fail:
+ socket_enter_stop_pre(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void socket_enter_start_chown(Socket *s) {
+ int r;
+
+ assert(s);
+
+ r = socket_open_fds(s);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to listen on sockets: %m");
+ goto fail;
+ }
+
+ if (!isempty(s->user) || !isempty(s->group)) {
+
+ socket_unwatch_control_pid(s);
+ s->control_command_id = SOCKET_EXEC_START_CHOWN;
+ s->control_command = NULL;
+
+ r = socket_chown(s, &s->control_pid);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to fork 'start-chown' task: %m");
+ goto fail;
+ }
+
+ socket_set_state(s, SOCKET_START_CHOWN);
+ } else
+ socket_enter_start_post(s);
+
+ return;
+
+fail:
+ socket_enter_stop_pre(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void socket_enter_start_pre(Socket *s) {
+ int r;
+ assert(s);
+
+ socket_unwatch_control_pid(s);
+
+ unit_warn_leftover_processes(UNIT(s));
+
+ s->control_command_id = SOCKET_EXEC_START_PRE;
+ s->control_command = s->exec_command[SOCKET_EXEC_START_PRE];
+
+ if (s->control_command) {
+ r = socket_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'start-pre' task: %m");
+ goto fail;
+ }
+
+ socket_set_state(s, SOCKET_START_PRE);
+ } else
+ socket_enter_start_chown(s);
+
+ return;
+
+fail:
+ socket_enter_dead(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void flush_ports(Socket *s) {
+ SocketPort *p;
+
+ /* Flush all incoming traffic, regardless if actual bytes or new connections, so that this socket isn't busy
+ * anymore */
+
+ LIST_FOREACH(port, p, s->ports) {
+ if (p->fd < 0)
+ continue;
+
+ (void) flush_accept(p->fd);
+ (void) flush_fd(p->fd);
+ }
+}
+
+static void socket_enter_running(Socket *s, int cfd) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ /* Note that this call takes possession of the connection fd passed. It either has to assign it somewhere or
+ * close it. */
+
+ assert(s);
+
+ /* We don't take connections anymore if we are supposed to shut down anyway */
+ if (unit_stop_pending(UNIT(s))) {
+
+ log_unit_debug(UNIT(s), "Suppressing connection request since unit stop is scheduled.");
+
+ if (cfd >= 0)
+ goto refuse;
+ else
+ flush_ports(s);
+
+ return;
+ }
+
+ if (!ratelimit_below(&s->trigger_limit)) {
+ log_unit_warning(UNIT(s), "Trigger limit hit, refusing further activation.");
+ socket_enter_stop_pre(s, SOCKET_FAILURE_TRIGGER_LIMIT_HIT);
+ goto refuse;
+ }
+
+ if (cfd < 0) {
+ bool pending = false;
+ Unit *other;
+ Iterator i;
+ void *v;
+
+ /* If there's already a start pending don't bother to
+ * do anything */
+ HASHMAP_FOREACH_KEY(v, other, UNIT(s)->dependencies[UNIT_TRIGGERS], i)
+ if (unit_active_or_pending(other)) {
+ pending = true;
+ break;
+ }
+
+ if (!pending) {
+ if (!UNIT_ISSET(s->service)) {
+ log_unit_error(UNIT(s), "Service to activate vanished, refusing activation.");
+ r = -ENOENT;
+ goto fail;
+ }
+
+ r = manager_add_job(UNIT(s)->manager, JOB_START, UNIT_DEREF(s->service), JOB_REPLACE, &error, NULL);
+ if (r < 0)
+ goto fail;
+ }
+
+ socket_set_state(s, SOCKET_RUNNING);
+ } else {
+ _cleanup_free_ char *prefix = NULL, *instance = NULL, *name = NULL;
+ _cleanup_(socket_peer_unrefp) SocketPeer *p = NULL;
+ Service *service;
+
+ if (s->n_connections >= s->max_connections) {
+ log_unit_warning(UNIT(s), "Too many incoming connections (%u), dropping connection.",
+ s->n_connections);
+ goto refuse;
+ }
+
+ if (s->max_connections_per_source > 0) {
+ r = socket_acquire_peer(s, cfd, &p);
+ if (r < 0) {
+ goto refuse;
+ } else if (r > 0 && p->n_ref > s->max_connections_per_source) {
+ _cleanup_free_ char *t = NULL;
+
+ (void) sockaddr_pretty(&p->peer.sa, p->peer_salen, true, false, &t);
+
+ log_unit_warning(UNIT(s),
+ "Too many incoming connections (%u) from source %s, dropping connection.",
+ p->n_ref, strnull(t));
+ goto refuse;
+ }
+ }
+
+ r = socket_instantiate_service(s);
+ if (r < 0)
+ goto fail;
+
+ r = instance_from_socket(cfd, s->n_accepted, &instance);
+ if (r < 0) {
+ if (r != -ENOTCONN)
+ goto fail;
+
+ /* ENOTCONN is legitimate if TCP RST was received.
+ * This connection is over, but the socket unit lives on. */
+ log_unit_debug(UNIT(s), "Got ENOTCONN on incoming socket, assuming aborted connection attempt, ignoring.");
+ goto refuse;
+ }
+
+ r = unit_name_to_prefix(UNIT(s)->id, &prefix);
+ if (r < 0)
+ goto fail;
+
+ r = unit_name_build(prefix, instance, ".service", &name);
+ if (r < 0)
+ goto fail;
+
+ r = unit_add_name(UNIT_DEREF(s->service), name);
+ if (r < 0)
+ goto fail;
+
+ service = SERVICE(UNIT_DEREF(s->service));
+ unit_ref_unset(&s->service);
+
+ s->n_accepted++;
+ unit_choose_id(UNIT(service), name);
+
+ r = service_set_socket_fd(service, cfd, s, s->selinux_context_from_net);
+ if (r < 0)
+ goto fail;
+
+ cfd = -1; /* We passed ownership of the fd to the service now. Forget it here. */
+ s->n_connections++;
+
+ service->peer = TAKE_PTR(p); /* Pass ownership of the peer reference */
+
+ r = manager_add_job(UNIT(s)->manager, JOB_START, UNIT(service), JOB_REPLACE, &error, NULL);
+ if (r < 0) {
+ /* We failed to activate the new service, but it still exists. Let's make sure the service
+ * closes and forgets the connection fd again, immediately. */
+ service_close_socket_fd(service);
+ goto fail;
+ }
+
+ /* Notify clients about changed counters */
+ unit_add_to_dbus_queue(UNIT(s));
+ }
+
+ return;
+
+refuse:
+ s->n_refused++;
+ safe_close(cfd);
+ return;
+
+fail:
+ log_unit_warning(UNIT(s), "Failed to queue service startup job (Maybe the service file is missing or not a %s unit?): %s",
+ cfd >= 0 ? "template" : "non-template",
+ bus_error_message(&error, r));
+
+ socket_enter_stop_pre(s, SOCKET_FAILURE_RESOURCES);
+ safe_close(cfd);
+}
+
+static void socket_run_next(Socket *s) {
+ int r;
+
+ assert(s);
+ assert(s->control_command);
+ assert(s->control_command->command_next);
+
+ socket_unwatch_control_pid(s);
+
+ s->control_command = s->control_command->command_next;
+
+ r = socket_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run next task: %m");
+
+ if (s->state == SOCKET_START_POST)
+ socket_enter_stop_pre(s, SOCKET_FAILURE_RESOURCES);
+ else if (s->state == SOCKET_STOP_POST)
+ socket_enter_dead(s, SOCKET_FAILURE_RESOURCES);
+ else
+ socket_enter_signal(s, SOCKET_FINAL_SIGTERM, SOCKET_FAILURE_RESOURCES);
+}
+
+static int socket_start(Unit *u) {
+ Socket *s = SOCKET(u);
+ int r;
+
+ assert(s);
+
+ /* We cannot fulfill this request right now, try again later
+ * please! */
+ if (IN_SET(s->state,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_POST,
+ SOCKET_FINAL_SIGTERM,
+ SOCKET_FINAL_SIGKILL))
+ return -EAGAIN;
+
+ /* Already on it! */
+ if (IN_SET(s->state,
+ SOCKET_START_PRE,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST))
+ return 0;
+
+ /* Cannot run this without the service being around */
+ if (UNIT_ISSET(s->service)) {
+ Service *service;
+
+ service = SERVICE(UNIT_DEREF(s->service));
+
+ if (UNIT(service)->load_state != UNIT_LOADED) {
+ log_unit_error(u, "Socket service %s not loaded, refusing.", UNIT(service)->id);
+ return -ENOENT;
+ }
+
+ /* If the service is already active we cannot start the
+ * socket */
+ if (!IN_SET(service->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART)) {
+ log_unit_error(u, "Socket service %s already active, refusing.", UNIT(service)->id);
+ return -EBUSY;
+ }
+ }
+
+ assert(IN_SET(s->state, SOCKET_DEAD, SOCKET_FAILED));
+
+ r = unit_start_limit_test(u);
+ if (r < 0) {
+ socket_enter_dead(s, SOCKET_FAILURE_START_LIMIT_HIT);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ s->result = SOCKET_SUCCESS;
+ exec_command_reset_status_list_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
+
+ u->reset_accounting = true;
+
+ socket_enter_start_pre(s);
+ return 1;
+}
+
+static int socket_stop(Unit *u) {
+ Socket *s = SOCKET(u);
+
+ assert(s);
+
+ /* Already on it */
+ if (IN_SET(s->state,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_STOP_POST,
+ SOCKET_FINAL_SIGTERM,
+ SOCKET_FINAL_SIGKILL))
+ return 0;
+
+ /* If there's already something running we go directly into
+ * kill mode. */
+ if (IN_SET(s->state,
+ SOCKET_START_PRE,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST)) {
+ socket_enter_signal(s, SOCKET_STOP_PRE_SIGTERM, SOCKET_SUCCESS);
+ return -EAGAIN;
+ }
+
+ assert(IN_SET(s->state, SOCKET_LISTENING, SOCKET_RUNNING));
+
+ socket_enter_stop_pre(s, SOCKET_SUCCESS);
+ return 1;
+}
+
+static int socket_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Socket *s = SOCKET(u);
+ SocketPort *p;
+ int r;
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", socket_state_to_string(s->state));
+ (void) serialize_item(f, "result", socket_result_to_string(s->result));
+ (void) serialize_item_format(f, "n-accepted", "%u", s->n_accepted);
+ (void) serialize_item_format(f, "n-refused", "%u", s->n_refused);
+
+ if (s->control_pid > 0)
+ (void) serialize_item_format(f, "control-pid", PID_FMT, s->control_pid);
+
+ if (s->control_command_id >= 0)
+ (void) serialize_item(f, "control-command", socket_exec_command_to_string(s->control_command_id));
+
+ LIST_FOREACH(port, p, s->ports) {
+ int copy;
+
+ if (p->fd < 0)
+ continue;
+
+ copy = fdset_put_dup(fds, p->fd);
+ if (copy < 0)
+ return log_unit_warning_errno(u, copy, "Failed to serialize socket fd: %m");
+
+ if (p->type == SOCKET_SOCKET) {
+ _cleanup_free_ char *t = NULL;
+
+ r = socket_address_print(&p->address, &t);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to format socket address: %m");
+
+ if (socket_address_family(&p->address) == AF_NETLINK)
+ (void) serialize_item_format(f, "netlink", "%i %s", copy, t);
+ else
+ (void) serialize_item_format(f, "socket", "%i %i %s", copy, p->address.type, t);
+ } else if (p->type == SOCKET_SPECIAL)
+ (void) serialize_item_format(f, "special", "%i %s", copy, p->path);
+ else if (p->type == SOCKET_MQUEUE)
+ (void) serialize_item_format(f, "mqueue", "%i %s", copy, p->path);
+ else if (p->type == SOCKET_USB_FUNCTION)
+ (void) serialize_item_format(f, "ffs", "%i %s", copy, p->path);
+ else {
+ assert(p->type == SOCKET_FIFO);
+ (void) serialize_item_format(f, "fifo", "%i %s", copy, p->path);
+ }
+ }
+
+ return 0;
+}
+
+static void socket_port_take_fd(SocketPort *p, FDSet *fds, int fd) {
+ assert(p);
+
+ safe_close(p->fd);
+ p->fd = fdset_remove(fds, fd);
+}
+
+static int socket_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Socket *s = SOCKET(u);
+
+ assert(u);
+ assert(key);
+ assert(value);
+
+ if (streq(key, "state")) {
+ SocketState state;
+
+ state = socket_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ s->deserialized_state = state;
+ } else if (streq(key, "result")) {
+ SocketResult f;
+
+ f = socket_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != SOCKET_SUCCESS)
+ s->result = f;
+
+ } else if (streq(key, "n-accepted")) {
+ unsigned k;
+
+ if (safe_atou(value, &k) < 0)
+ log_unit_debug(u, "Failed to parse n-accepted value: %s", value);
+ else
+ s->n_accepted += k;
+ } else if (streq(key, "n-refused")) {
+ unsigned k;
+
+ if (safe_atou(value, &k) < 0)
+ log_unit_debug(u, "Failed to parse n-refused value: %s", value);
+ else
+ s->n_refused += k;
+ } else if (streq(key, "control-pid")) {
+ pid_t pid;
+
+ if (parse_pid(value, &pid) < 0)
+ log_unit_debug(u, "Failed to parse control-pid value: %s", value);
+ else
+ s->control_pid = pid;
+ } else if (streq(key, "control-command")) {
+ SocketExecCommand id;
+
+ id = socket_exec_command_from_string(value);
+ if (id < 0)
+ log_unit_debug(u, "Failed to parse exec-command value: %s", value);
+ else {
+ s->control_command_id = id;
+ s->control_command = s->exec_command[id];
+ }
+ } else if (streq(key, "fifo")) {
+ int fd, skip = 0;
+ SocketPort *p;
+
+ if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse fifo value: %s", value);
+ else
+ LIST_FOREACH(port, p, s->ports)
+ if (p->type == SOCKET_FIFO &&
+ path_equal_or_files_same(p->path, value+skip, 0)) {
+ socket_port_take_fd(p, fds, fd);
+ break;
+ }
+
+ } else if (streq(key, "special")) {
+ int fd, skip = 0;
+ SocketPort *p;
+
+ if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse special value: %s", value);
+ else
+ LIST_FOREACH(port, p, s->ports)
+ if (p->type == SOCKET_SPECIAL &&
+ path_equal_or_files_same(p->path, value+skip, 0)) {
+ socket_port_take_fd(p, fds, fd);
+ break;
+ }
+
+ } else if (streq(key, "mqueue")) {
+ int fd, skip = 0;
+ SocketPort *p;
+
+ if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse mqueue value: %s", value);
+ else
+ LIST_FOREACH(port, p, s->ports)
+ if (p->type == SOCKET_MQUEUE &&
+ streq(p->path, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
+ break;
+ }
+
+ } else if (streq(key, "socket")) {
+ int fd, type, skip = 0;
+ SocketPort *p;
+
+ if (sscanf(value, "%i %i %n", &fd, &type, &skip) < 2 || fd < 0 || type < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse socket value: %s", value);
+ else
+ LIST_FOREACH(port, p, s->ports)
+ if (socket_address_is(&p->address, value+skip, type)) {
+ socket_port_take_fd(p, fds, fd);
+ break;
+ }
+
+ } else if (streq(key, "netlink")) {
+ int fd, skip = 0;
+ SocketPort *p;
+
+ if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse socket value: %s", value);
+ else
+ LIST_FOREACH(port, p, s->ports)
+ if (socket_address_is_netlink(&p->address, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
+ break;
+ }
+
+ } else if (streq(key, "ffs")) {
+ int fd, skip = 0;
+ SocketPort *p;
+
+ if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse ffs value: %s", value);
+ else
+ LIST_FOREACH(port, p, s->ports)
+ if (p->type == SOCKET_USB_FUNCTION &&
+ path_equal_or_files_same(p->path, value+skip, 0)) {
+ socket_port_take_fd(p, fds, fd);
+ break;
+ }
+
+ } else
+ log_unit_debug(UNIT(s), "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+static void socket_distribute_fds(Unit *u, FDSet *fds) {
+ Socket *s = SOCKET(u);
+ SocketPort *p;
+
+ assert(u);
+
+ LIST_FOREACH(port, p, s->ports) {
+ Iterator i;
+ int fd;
+
+ if (p->type != SOCKET_SOCKET)
+ continue;
+
+ if (p->fd >= 0)
+ continue;
+
+ FDSET_FOREACH(fd, fds, i) {
+ if (socket_address_matches_fd(&p->address, fd)) {
+ p->fd = fdset_remove(fds, fd);
+ s->deserialized_state = SOCKET_LISTENING;
+ break;
+ }
+ }
+ }
+}
+
+_pure_ static UnitActiveState socket_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[SOCKET(u)->state];
+}
+
+_pure_ static const char *socket_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return socket_state_to_string(SOCKET(u)->state);
+}
+
+const char* socket_port_type_to_string(SocketPort *p) {
+
+ assert(p);
+
+ switch (p->type) {
+
+ case SOCKET_SOCKET:
+
+ switch (p->address.type) {
+
+ case SOCK_STREAM:
+ return "Stream";
+
+ case SOCK_DGRAM:
+ return "Datagram";
+
+ case SOCK_SEQPACKET:
+ return "SequentialPacket";
+
+ case SOCK_RAW:
+ if (socket_address_family(&p->address) == AF_NETLINK)
+ return "Netlink";
+
+ _fallthrough_;
+ default:
+ return NULL;
+ }
+
+ case SOCKET_SPECIAL:
+ return "Special";
+
+ case SOCKET_MQUEUE:
+ return "MessageQueue";
+
+ case SOCKET_FIFO:
+ return "FIFO";
+
+ case SOCKET_USB_FUNCTION:
+ return "USBFunction";
+
+ default:
+ return NULL;
+ }
+}
+
+SocketType socket_port_type_from_string(const char *s) {
+ assert(s);
+
+ if (STR_IN_SET(s, "Stream", "Datagram", "SequentialPacket", "Netlink"))
+ return SOCKET_SOCKET;
+ else if (streq(s, "Special"))
+ return SOCKET_SPECIAL;
+ else if (streq(s, "MessageQueue"))
+ return SOCKET_MQUEUE;
+ else if (streq(s, "FIFO"))
+ return SOCKET_FIFO;
+ else if (streq(s, "USBFunction"))
+ return SOCKET_USB_FUNCTION;
+ else
+ return _SOCKET_TYPE_INVALID;
+}
+
+_pure_ static bool socket_may_gc(Unit *u) {
+ Socket *s = SOCKET(u);
+
+ assert(u);
+
+ return s->n_connections == 0;
+}
+
+static int socket_accept_do(Socket *s, int fd) {
+ int cfd;
+
+ assert(s);
+ assert(fd >= 0);
+
+ for (;;) {
+ cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
+ if (cfd < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return -errno;
+ }
+
+ break;
+ }
+
+ return cfd;
+}
+
+static int socket_accept_in_cgroup(Socket *s, SocketPort *p, int fd) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ int cfd, r;
+ pid_t pid;
+
+ assert(s);
+ assert(p);
+ assert(fd >= 0);
+
+ /* Similar to socket_address_listen_in_cgroup(), but for accept() rathern than socket(): make sure that any
+ * connection socket is also properly associated with the cgroup. */
+
+ if (!IN_SET(p->address.sockaddr.sa.sa_family, AF_INET, AF_INET6))
+ goto shortcut;
+
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r == BPF_FIREWALL_UNSUPPORTED)
+ goto shortcut;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
+ return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
+
+ r = unit_fork_helper_process(UNIT(s), "(sd-accept)", &pid);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to fork off accept stub process: %m");
+ if (r == 0) {
+ /* Child */
+
+ pair[0] = safe_close(pair[0]);
+
+ cfd = socket_accept_do(s, fd);
+ if (cfd < 0) {
+ log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ r = send_one_fd(pair[1], cfd, 0);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to send connection socket to parent: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+ cfd = receive_one_fd(pair[0], 0);
+
+ /* We synchronously wait for the helper, as it shouldn't be slow */
+ r = wait_for_terminate_and_check("(sd-accept)", pid, WAIT_LOG_ABNORMAL);
+ if (r < 0) {
+ safe_close(cfd);
+ return r;
+ }
+
+ if (cfd < 0)
+ return log_unit_error_errno(UNIT(s), cfd, "Failed to receive connection socket: %m");
+
+ return cfd;
+
+shortcut:
+ cfd = socket_accept_do(s, fd);
+ if (cfd < 0)
+ return log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
+
+ return cfd;
+}
+
+static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ SocketPort *p = userdata;
+ int cfd = -1;
+
+ assert(p);
+ assert(fd >= 0);
+
+ if (p->socket->state != SOCKET_LISTENING)
+ return 0;
+
+ log_unit_debug(UNIT(p->socket), "Incoming traffic");
+
+ if (revents != EPOLLIN) {
+
+ if (revents & EPOLLHUP)
+ log_unit_error(UNIT(p->socket), "Got POLLHUP on a listening socket. The service probably invoked shutdown() on it, and should better not do that.");
+ else
+ log_unit_error(UNIT(p->socket), "Got unexpected poll event (0x%x) on socket.", revents);
+ goto fail;
+ }
+
+ if (p->socket->accept &&
+ p->type == SOCKET_SOCKET &&
+ socket_address_can_accept(&p->address)) {
+
+ cfd = socket_accept_in_cgroup(p->socket, p, fd);
+ if (cfd < 0)
+ goto fail;
+
+ socket_apply_socket_options(p->socket, cfd);
+ }
+
+ socket_enter_running(p->socket, cfd);
+ return 0;
+
+fail:
+ socket_enter_stop_pre(p->socket, SOCKET_FAILURE_RESOURCES);
+ return 0;
+}
+
+static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+ Socket *s = SOCKET(u);
+ SocketResult f;
+
+ assert(s);
+ assert(pid >= 0);
+
+ if (pid != s->control_pid)
+ return;
+
+ s->control_pid = 0;
+
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
+ f = SOCKET_SUCCESS;
+ else if (code == CLD_EXITED)
+ f = SOCKET_FAILURE_EXIT_CODE;
+ else if (code == CLD_KILLED)
+ f = SOCKET_FAILURE_SIGNAL;
+ else if (code == CLD_DUMPED)
+ f = SOCKET_FAILURE_CORE_DUMP;
+ else
+ assert_not_reached("Unknown sigchld code");
+
+ if (s->control_command) {
+ exec_status_exit(&s->control_command->exec_status, &s->exec_context, pid, code, status);
+
+ if (s->control_command->flags & EXEC_COMMAND_IGNORE_FAILURE)
+ f = SOCKET_SUCCESS;
+ }
+
+ unit_log_process_exit(
+ u, f == SOCKET_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+ "Control process",
+ socket_exec_command_to_string(s->control_command_id),
+ code, status);
+
+ if (s->result == SOCKET_SUCCESS)
+ s->result = f;
+
+ if (s->control_command &&
+ s->control_command->command_next &&
+ f == SOCKET_SUCCESS) {
+
+ log_unit_debug(u, "Running next command for state %s", socket_state_to_string(s->state));
+ socket_run_next(s);
+ } else {
+ s->control_command = NULL;
+ s->control_command_id = _SOCKET_EXEC_COMMAND_INVALID;
+
+ /* No further commands for this step, so let's figure
+ * out what to do next */
+
+ log_unit_debug(u, "Got final SIGCHLD for state %s", socket_state_to_string(s->state));
+
+ switch (s->state) {
+
+ case SOCKET_START_PRE:
+ if (f == SOCKET_SUCCESS)
+ socket_enter_start_chown(s);
+ else
+ socket_enter_signal(s, SOCKET_FINAL_SIGTERM, f);
+ break;
+
+ case SOCKET_START_CHOWN:
+ if (f == SOCKET_SUCCESS)
+ socket_enter_start_post(s);
+ else
+ socket_enter_stop_pre(s, f);
+ break;
+
+ case SOCKET_START_POST:
+ if (f == SOCKET_SUCCESS)
+ socket_enter_listening(s);
+ else
+ socket_enter_stop_pre(s, f);
+ break;
+
+ case SOCKET_STOP_PRE:
+ case SOCKET_STOP_PRE_SIGTERM:
+ case SOCKET_STOP_PRE_SIGKILL:
+ socket_enter_stop_post(s, f);
+ break;
+
+ case SOCKET_STOP_POST:
+ case SOCKET_FINAL_SIGTERM:
+ case SOCKET_FINAL_SIGKILL:
+ socket_enter_dead(s, f);
+ break;
+
+ default:
+ assert_not_reached("Uh, control process died at wrong time.");
+ }
+ }
+
+ /* Notify clients about changed exit status */
+ unit_add_to_dbus_queue(u);
+}
+
+static int socket_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
+ Socket *s = SOCKET(userdata);
+
+ assert(s);
+ assert(s->timer_event_source == source);
+
+ switch (s->state) {
+
+ case SOCKET_START_PRE:
+ log_unit_warning(UNIT(s), "Starting timed out. Terminating.");
+ socket_enter_signal(s, SOCKET_FINAL_SIGTERM, SOCKET_FAILURE_TIMEOUT);
+ break;
+
+ case SOCKET_START_CHOWN:
+ case SOCKET_START_POST:
+ log_unit_warning(UNIT(s), "Starting timed out. Stopping.");
+ socket_enter_stop_pre(s, SOCKET_FAILURE_TIMEOUT);
+ break;
+
+ case SOCKET_STOP_PRE:
+ log_unit_warning(UNIT(s), "Stopping timed out. Terminating.");
+ socket_enter_signal(s, SOCKET_STOP_PRE_SIGTERM, SOCKET_FAILURE_TIMEOUT);
+ break;
+
+ case SOCKET_STOP_PRE_SIGTERM:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "Stopping timed out. Killing.");
+ socket_enter_signal(s, SOCKET_STOP_PRE_SIGKILL, SOCKET_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "Stopping timed out. Skipping SIGKILL. Ignoring.");
+ socket_enter_stop_post(s, SOCKET_FAILURE_TIMEOUT);
+ }
+ break;
+
+ case SOCKET_STOP_PRE_SIGKILL:
+ log_unit_warning(UNIT(s), "Processes still around after SIGKILL. Ignoring.");
+ socket_enter_stop_post(s, SOCKET_FAILURE_TIMEOUT);
+ break;
+
+ case SOCKET_STOP_POST:
+ log_unit_warning(UNIT(s), "Stopping timed out (2). Terminating.");
+ socket_enter_signal(s, SOCKET_FINAL_SIGTERM, SOCKET_FAILURE_TIMEOUT);
+ break;
+
+ case SOCKET_FINAL_SIGTERM:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "Stopping timed out (2). Killing.");
+ socket_enter_signal(s, SOCKET_FINAL_SIGKILL, SOCKET_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "Stopping timed out (2). Skipping SIGKILL. Ignoring.");
+ socket_enter_dead(s, SOCKET_FAILURE_TIMEOUT);
+ }
+ break;
+
+ case SOCKET_FINAL_SIGKILL:
+ log_unit_warning(UNIT(s), "Still around after SIGKILL (2). Entering failed mode.");
+ socket_enter_dead(s, SOCKET_FAILURE_TIMEOUT);
+ break;
+
+ default:
+ assert_not_reached("Timeout at wrong time.");
+ }
+
+ return 0;
+}
+
+int socket_collect_fds(Socket *s, int **fds) {
+ size_t k = 0, n = 0;
+ SocketPort *p;
+ int *rfds;
+
+ assert(s);
+ assert(fds);
+
+ /* Called from the service code for requesting our fds */
+
+ LIST_FOREACH(port, p, s->ports) {
+ if (p->fd >= 0)
+ n++;
+ n += p->n_auxiliary_fds;
+ }
+
+ if (n <= 0) {
+ *fds = NULL;
+ return 0;
+ }
+
+ rfds = new(int, n);
+ if (!rfds)
+ return -ENOMEM;
+
+ LIST_FOREACH(port, p, s->ports) {
+ size_t i;
+
+ if (p->fd >= 0)
+ rfds[k++] = p->fd;
+ for (i = 0; i < p->n_auxiliary_fds; ++i)
+ rfds[k++] = p->auxiliary_fds[i];
+ }
+
+ assert(k == n);
+
+ *fds = rfds;
+ return (int) n;
+}
+
+static void socket_reset_failed(Unit *u) {
+ Socket *s = SOCKET(u);
+
+ assert(s);
+
+ if (s->state == SOCKET_FAILED)
+ socket_set_state(s, SOCKET_DEAD);
+
+ s->result = SOCKET_SUCCESS;
+}
+
+void socket_connection_unref(Socket *s) {
+ assert(s);
+
+ /* The service is dead. Yay!
+ *
+ * This is strictly for one-instance-per-connection
+ * services. */
+
+ assert(s->n_connections > 0);
+ s->n_connections--;
+
+ log_unit_debug(UNIT(s), "One connection closed, %u left.", s->n_connections);
+}
+
+static void socket_trigger_notify(Unit *u, Unit *other) {
+ Socket *s = SOCKET(u);
+
+ assert(u);
+ assert(other);
+
+ /* Filter out invocations with bogus state */
+ if (other->load_state != UNIT_LOADED || other->type != UNIT_SERVICE)
+ return;
+
+ /* Don't propagate state changes from the service if we are already down */
+ if (!IN_SET(s->state, SOCKET_RUNNING, SOCKET_LISTENING))
+ return;
+
+ /* We don't care for the service state if we are in Accept=yes mode */
+ if (s->accept)
+ return;
+
+ /* Propagate start limit hit state */
+ if (other->start_limit_hit) {
+ socket_enter_stop_pre(s, SOCKET_FAILURE_SERVICE_START_LIMIT_HIT);
+ return;
+ }
+
+ /* Don't propagate anything if there's still a job queued */
+ if (other->job)
+ return;
+
+ if (IN_SET(SERVICE(other)->state,
+ SERVICE_DEAD, SERVICE_FAILED,
+ SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
+ SERVICE_AUTO_RESTART))
+ socket_enter_listening(s);
+
+ if (SERVICE(other)->state == SERVICE_RUNNING)
+ socket_set_state(s, SOCKET_RUNNING);
+}
+
+static int socket_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
+ return unit_kill_common(u, who, signo, -1, SOCKET(u)->control_pid, error);
+}
+
+static int socket_get_timeout(Unit *u, usec_t *timeout) {
+ Socket *s = SOCKET(u);
+ usec_t t;
+ int r;
+
+ if (!s->timer_event_source)
+ return 0;
+
+ r = sd_event_source_get_time(s->timer_event_source, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY)
+ return 0;
+
+ *timeout = t;
+ return 1;
+}
+
+char *socket_fdname(Socket *s) {
+ assert(s);
+
+ /* Returns the name to use for $LISTEN_NAMES. If the user
+ * didn't specify anything specifically, use the socket unit's
+ * name as fallback. */
+
+ return s->fdname ?: UNIT(s)->id;
+}
+
+static int socket_control_pid(Unit *u) {
+ Socket *s = SOCKET(u);
+
+ assert(s);
+
+ return s->control_pid;
+}
+
+static const char* const socket_exec_command_table[_SOCKET_EXEC_COMMAND_MAX] = {
+ [SOCKET_EXEC_START_PRE] = "ExecStartPre",
+ [SOCKET_EXEC_START_CHOWN] = "ExecStartChown",
+ [SOCKET_EXEC_START_POST] = "ExecStartPost",
+ [SOCKET_EXEC_STOP_PRE] = "ExecStopPre",
+ [SOCKET_EXEC_STOP_POST] = "ExecStopPost"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_exec_command, SocketExecCommand);
+
+static const char* const socket_result_table[_SOCKET_RESULT_MAX] = {
+ [SOCKET_SUCCESS] = "success",
+ [SOCKET_FAILURE_RESOURCES] = "resources",
+ [SOCKET_FAILURE_TIMEOUT] = "timeout",
+ [SOCKET_FAILURE_EXIT_CODE] = "exit-code",
+ [SOCKET_FAILURE_SIGNAL] = "signal",
+ [SOCKET_FAILURE_CORE_DUMP] = "core-dump",
+ [SOCKET_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+ [SOCKET_FAILURE_TRIGGER_LIMIT_HIT] = "trigger-limit-hit",
+ [SOCKET_FAILURE_SERVICE_START_LIMIT_HIT] = "service-start-limit-hit"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_result, SocketResult);
+
+const UnitVTable socket_vtable = {
+ .object_size = sizeof(Socket),
+ .exec_context_offset = offsetof(Socket, exec_context),
+ .cgroup_context_offset = offsetof(Socket, cgroup_context),
+ .kill_context_offset = offsetof(Socket, kill_context),
+ .exec_runtime_offset = offsetof(Socket, exec_runtime),
+ .dynamic_creds_offset = offsetof(Socket, dynamic_creds),
+
+ .sections =
+ "Unit\0"
+ "Socket\0"
+ "Install\0",
+ .private_section = "Socket",
+
+ .can_transient = true,
+
+ .init = socket_init,
+ .done = socket_done,
+ .load = socket_load,
+
+ .coldplug = socket_coldplug,
+
+ .dump = socket_dump,
+
+ .start = socket_start,
+ .stop = socket_stop,
+
+ .kill = socket_kill,
+
+ .get_timeout = socket_get_timeout,
+
+ .serialize = socket_serialize,
+ .deserialize_item = socket_deserialize_item,
+ .distribute_fds = socket_distribute_fds,
+
+ .active_state = socket_active_state,
+ .sub_state_to_string = socket_sub_state_to_string,
+
+ .may_gc = socket_may_gc,
+
+ .sigchld_event = socket_sigchld_event,
+
+ .trigger_notify = socket_trigger_notify,
+
+ .reset_failed = socket_reset_failed,
+
+ .control_pid = socket_control_pid,
+
+ .bus_vtable = bus_socket_vtable,
+ .bus_set_property = bus_socket_set_property,
+ .bus_commit_properties = bus_socket_commit_properties,
+
+ .status_message_formats = {
+ /*.starting_stopping = {
+ [0] = "Starting socket %s...",
+ [1] = "Stopping socket %s...",
+ },*/
+ .finished_start_job = {
+ [JOB_DONE] = "Listening on %s.",
+ [JOB_FAILED] = "Failed to listen on %s.",
+ [JOB_TIMEOUT] = "Timed out starting %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Closed %s.",
+ [JOB_FAILED] = "Failed stopping %s.",
+ [JOB_TIMEOUT] = "Timed out stopping %s.",
+ },
+ },
+};
diff --git a/src/core/socket.h b/src/core/socket.h
new file mode 100644
index 0000000..c4e25db
--- /dev/null
+++ b/src/core/socket.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Socket Socket;
+typedef struct SocketPeer SocketPeer;
+
+#include "mount.h"
+#include "service.h"
+#include "socket-util.h"
+#include "unit.h"
+
+typedef enum SocketExecCommand {
+ SOCKET_EXEC_START_PRE,
+ SOCKET_EXEC_START_CHOWN,
+ SOCKET_EXEC_START_POST,
+ SOCKET_EXEC_STOP_PRE,
+ SOCKET_EXEC_STOP_POST,
+ _SOCKET_EXEC_COMMAND_MAX,
+ _SOCKET_EXEC_COMMAND_INVALID = -1
+} SocketExecCommand;
+
+typedef enum SocketType {
+ SOCKET_SOCKET,
+ SOCKET_FIFO,
+ SOCKET_SPECIAL,
+ SOCKET_MQUEUE,
+ SOCKET_USB_FUNCTION,
+ _SOCKET_TYPE_MAX,
+ _SOCKET_TYPE_INVALID = -1
+} SocketType;
+
+typedef enum SocketResult {
+ SOCKET_SUCCESS,
+ SOCKET_FAILURE_RESOURCES,
+ SOCKET_FAILURE_TIMEOUT,
+ SOCKET_FAILURE_EXIT_CODE,
+ SOCKET_FAILURE_SIGNAL,
+ SOCKET_FAILURE_CORE_DUMP,
+ SOCKET_FAILURE_START_LIMIT_HIT,
+ SOCKET_FAILURE_TRIGGER_LIMIT_HIT,
+ SOCKET_FAILURE_SERVICE_START_LIMIT_HIT,
+ _SOCKET_RESULT_MAX,
+ _SOCKET_RESULT_INVALID = -1
+} SocketResult;
+
+typedef struct SocketPort {
+ Socket *socket;
+
+ SocketType type;
+ int fd;
+ int *auxiliary_fds;
+ size_t n_auxiliary_fds;
+
+ SocketAddress address;
+ char *path;
+ sd_event_source *event_source;
+
+ LIST_FIELDS(struct SocketPort, port);
+} SocketPort;
+
+struct Socket {
+ Unit meta;
+
+ LIST_HEAD(SocketPort, ports);
+
+ Set *peers_by_address;
+
+ unsigned n_accepted;
+ unsigned n_connections;
+ unsigned n_refused;
+ unsigned max_connections;
+ unsigned max_connections_per_source;
+
+ unsigned backlog;
+ unsigned keep_alive_cnt;
+ usec_t timeout_usec;
+ usec_t keep_alive_time;
+ usec_t keep_alive_interval;
+ usec_t defer_accept;
+
+ ExecCommand* exec_command[_SOCKET_EXEC_COMMAND_MAX];
+ ExecContext exec_context;
+ KillContext kill_context;
+ CGroupContext cgroup_context;
+
+ ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
+
+ /* For Accept=no sockets refers to the one service we'll
+ * activate. For Accept=yes sockets is either NULL, or filled
+ * to refer to the next service we spawn. */
+ UnitRef service;
+
+ SocketState state, deserialized_state;
+
+ sd_event_source *timer_event_source;
+
+ ExecCommand* control_command;
+ SocketExecCommand control_command_id;
+ pid_t control_pid;
+
+ mode_t directory_mode;
+ mode_t socket_mode;
+
+ SocketResult result;
+
+ char **symlinks;
+
+ bool accept;
+ bool remove_on_stop;
+ bool writable;
+
+ int socket_protocol;
+
+ /* Socket options */
+ bool keep_alive;
+ bool no_delay;
+ bool free_bind;
+ bool transparent;
+ bool broadcast;
+ bool pass_cred;
+ bool pass_sec;
+
+ /* Only for INET6 sockets: issue IPV6_V6ONLY sockopt */
+ SocketAddressBindIPv6Only bind_ipv6_only;
+
+ int priority;
+ int mark;
+ size_t receive_buffer;
+ size_t send_buffer;
+ int ip_tos;
+ int ip_ttl;
+ size_t pipe_size;
+ char *bind_to_device;
+ char *tcp_congestion;
+ bool reuse_port;
+ long mq_maxmsg;
+ long mq_msgsize;
+
+ char *smack;
+ char *smack_ip_in;
+ char *smack_ip_out;
+
+ bool selinux_context_from_net;
+
+ char *user, *group;
+
+ char *fdname;
+
+ RateLimit trigger_limit;
+};
+
+SocketPeer *socket_peer_ref(SocketPeer *p);
+SocketPeer *socket_peer_unref(SocketPeer *p);
+int socket_acquire_peer(Socket *s, int fd, SocketPeer **p);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(SocketPeer*, socket_peer_unref);
+
+/* Called from the service code when collecting fds */
+int socket_collect_fds(Socket *s, int **fds);
+
+/* Called from the service code when a per-connection service ended */
+void socket_connection_unref(Socket *s);
+
+void socket_free_ports(Socket *s);
+
+int socket_instantiate_service(Socket *s);
+
+char *socket_fdname(Socket *s);
+
+extern const UnitVTable socket_vtable;
+
+const char* socket_exec_command_to_string(SocketExecCommand i) _const_;
+SocketExecCommand socket_exec_command_from_string(const char *s) _pure_;
+
+const char* socket_result_to_string(SocketResult i) _const_;
+SocketResult socket_result_from_string(const char *s) _pure_;
+
+const char* socket_port_type_to_string(SocketPort *p) _pure_;
+SocketType socket_port_type_from_string(const char *p) _pure_;
+
+DEFINE_CAST(SOCKET, Socket);
diff --git a/src/core/swap.c b/src/core/swap.c
new file mode 100644
index 0000000..2d8463b
--- /dev/null
+++ b/src/core/swap.c
@@ -0,0 +1,1555 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/epoll.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "dbus-swap.h"
+#include "dbus-unit.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "device.h"
+#include "escape.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fstab-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "special.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "swap.h"
+#include "unit-name.h"
+#include "unit.h"
+#include "virt.h"
+
+static const UnitActiveState state_translation_table[_SWAP_STATE_MAX] = {
+ [SWAP_DEAD] = UNIT_INACTIVE,
+ [SWAP_ACTIVATING] = UNIT_ACTIVATING,
+ [SWAP_ACTIVATING_DONE] = UNIT_ACTIVE,
+ [SWAP_ACTIVE] = UNIT_ACTIVE,
+ [SWAP_DEACTIVATING] = UNIT_DEACTIVATING,
+ [SWAP_DEACTIVATING_SIGTERM] = UNIT_DEACTIVATING,
+ [SWAP_DEACTIVATING_SIGKILL] = UNIT_DEACTIVATING,
+ [SWAP_FAILED] = UNIT_FAILED
+};
+
+static int swap_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
+static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+
+static bool SWAP_STATE_WITH_PROCESS(SwapState state) {
+ return IN_SET(state,
+ SWAP_ACTIVATING,
+ SWAP_ACTIVATING_DONE,
+ SWAP_DEACTIVATING,
+ SWAP_DEACTIVATING_SIGTERM,
+ SWAP_DEACTIVATING_SIGKILL);
+}
+
+static void swap_unset_proc_swaps(Swap *s) {
+ assert(s);
+
+ if (!s->from_proc_swaps)
+ return;
+
+ s->parameters_proc_swaps.what = mfree(s->parameters_proc_swaps.what);
+
+ s->from_proc_swaps = false;
+}
+
+static int swap_set_devnode(Swap *s, const char *devnode) {
+ Hashmap *swaps;
+ Swap *first;
+ int r;
+
+ assert(s);
+
+ r = hashmap_ensure_allocated(&UNIT(s)->manager->swaps_by_devnode, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ swaps = UNIT(s)->manager->swaps_by_devnode;
+
+ if (s->devnode) {
+ first = hashmap_get(swaps, s->devnode);
+
+ LIST_REMOVE(same_devnode, first, s);
+ if (first)
+ hashmap_replace(swaps, first->devnode, first);
+ else
+ hashmap_remove(swaps, s->devnode);
+
+ s->devnode = mfree(s->devnode);
+ }
+
+ if (devnode) {
+ s->devnode = strdup(devnode);
+ if (!s->devnode)
+ return -ENOMEM;
+
+ first = hashmap_get(swaps, s->devnode);
+ LIST_PREPEND(same_devnode, first, s);
+
+ return hashmap_replace(swaps, first->devnode, first);
+ }
+
+ return 0;
+}
+
+static void swap_init(Unit *u) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+ assert(UNIT(s)->load_state == UNIT_STUB);
+
+ s->timeout_usec = u->manager->default_timeout_start_usec;
+
+ s->exec_context.std_output = u->manager->default_std_output;
+ s->exec_context.std_error = u->manager->default_std_error;
+
+ s->parameters_proc_swaps.priority = s->parameters_fragment.priority = -1;
+
+ s->control_command_id = _SWAP_EXEC_COMMAND_INVALID;
+
+ u->ignore_on_isolate = true;
+}
+
+static void swap_unwatch_control_pid(Swap *s) {
+ assert(s);
+
+ if (s->control_pid <= 0)
+ return;
+
+ unit_unwatch_pid(UNIT(s), s->control_pid);
+ s->control_pid = 0;
+}
+
+static void swap_done(Unit *u) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+
+ swap_unset_proc_swaps(s);
+ swap_set_devnode(s, NULL);
+
+ s->what = mfree(s->what);
+ s->parameters_fragment.what = mfree(s->parameters_fragment.what);
+ s->parameters_fragment.options = mfree(s->parameters_fragment.options);
+
+ s->exec_runtime = exec_runtime_unref(s->exec_runtime, false);
+ exec_command_done_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
+ s->control_command = NULL;
+
+ dynamic_creds_unref(&s->dynamic_creds);
+
+ swap_unwatch_control_pid(s);
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+}
+
+static int swap_arm_timer(Swap *s, usec_t usec) {
+ int r;
+
+ assert(s);
+
+ if (s->timer_event_source) {
+ r = sd_event_source_set_time(s->timer_event_source, usec);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(s->timer_event_source, SD_EVENT_ONESHOT);
+ }
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ r = sd_event_add_time(
+ UNIT(s)->manager->event,
+ &s->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec, 0,
+ swap_dispatch_timer, s);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s->timer_event_source, "swap-timer");
+
+ return 0;
+}
+
+static int swap_add_device_dependencies(Swap *s) {
+ assert(s);
+
+ if (!s->what)
+ return 0;
+
+ if (!s->from_fragment)
+ return 0;
+
+ if (is_device_path(s->what))
+ return unit_add_node_dependency(UNIT(s), s->what, MANAGER_IS_SYSTEM(UNIT(s)->manager), UNIT_BINDS_TO, UNIT_DEPENDENCY_FILE);
+ else
+ /* File based swap devices need to be ordered after
+ * systemd-remount-fs.service, since they might need a
+ * writable file system. */
+ return unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_REMOUNT_FS_SERVICE, true, UNIT_DEPENDENCY_FILE);
+}
+
+static int swap_add_default_dependencies(Swap *s) {
+ int r;
+
+ assert(s);
+
+ if (!UNIT(s)->default_dependencies)
+ return 0;
+
+ if (!MANAGER_IS_SYSTEM(UNIT(s)->manager))
+ return 0;
+
+ if (detect_container() > 0)
+ return 0;
+
+ /* swap units generated for the swap dev links are missing the
+ * ordering dep against the swap target. */
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_BEFORE, SPECIAL_SWAP_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+static int swap_verify(Swap *s) {
+ _cleanup_free_ char *e = NULL;
+ int r;
+
+ if (UNIT(s)->load_state != UNIT_LOADED)
+ return 0;
+
+ r = unit_name_from_path(s->what, ".swap", &e);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to generate unit name from path: %m");
+
+ if (!unit_has_name(UNIT(s), e)) {
+ log_unit_error(UNIT(s), "Value of What= and unit name do not match, not loading.");
+ return -ENOEXEC;
+ }
+
+ if (s->exec_context.pam_name && s->kill_context.kill_mode != KILL_CONTROL_GROUP) {
+ log_unit_error(UNIT(s), "Unit has PAM enabled. Kill mode must be set to 'control-group'. Refusing to load.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int swap_load_devnode(Swap *s) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ struct stat st;
+ const char *p;
+ int r;
+
+ assert(s);
+
+ if (stat(s->what, &st) < 0 || !S_ISBLK(st.st_mode))
+ return 0;
+
+ r = device_new_from_stat_rdev(&d, &st);
+ if (r < 0) {
+ log_unit_full(UNIT(s), r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to allocate device for swap %s: %m", s->what);
+ return 0;
+ }
+
+ if (sd_device_get_devname(d, &p) < 0)
+ return 0;
+
+ return swap_set_devnode(s, p);
+}
+
+static int swap_add_extras(Swap *s) {
+ int r;
+
+ assert(s);
+
+ if (UNIT(s)->fragment_path)
+ s->from_fragment = true;
+
+ if (!s->what) {
+ if (s->parameters_fragment.what)
+ s->what = strdup(s->parameters_fragment.what);
+ else if (s->parameters_proc_swaps.what)
+ s->what = strdup(s->parameters_proc_swaps.what);
+ else {
+ r = unit_name_to_path(UNIT(s)->id, &s->what);
+ if (r < 0)
+ return r;
+ }
+
+ if (!s->what)
+ return -ENOMEM;
+ }
+
+ path_simplify(s->what, false);
+
+ if (!UNIT(s)->description) {
+ r = unit_set_description(UNIT(s), s->what);
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_require_mounts_for(UNIT(s), s->what, UNIT_DEPENDENCY_IMPLICIT);
+ if (r < 0)
+ return r;
+
+ r = swap_add_device_dependencies(s);
+ if (r < 0)
+ return r;
+
+ r = swap_load_devnode(s);
+ if (r < 0)
+ return r;
+
+ r = unit_patch_contexts(UNIT(s));
+ if (r < 0)
+ return r;
+
+ r = unit_add_exec_dependencies(UNIT(s), &s->exec_context);
+ if (r < 0)
+ return r;
+
+ r = unit_set_default_slice(UNIT(s));
+ if (r < 0)
+ return r;
+
+ r = swap_add_default_dependencies(s);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int swap_load(Unit *u) {
+ Swap *s = SWAP(u);
+ int r, q;
+
+ assert(s);
+ assert(u->load_state == UNIT_STUB);
+
+ /* Load a .swap file */
+ if (SWAP(u)->from_proc_swaps)
+ r = unit_load_fragment_and_dropin_optional(u);
+ else
+ r = unit_load_fragment_and_dropin(u);
+
+ /* Add in some extras, and do so either when we successfully loaded something or when /proc/swaps is already
+ * active. */
+ if (u->load_state == UNIT_LOADED || s->from_proc_swaps)
+ q = swap_add_extras(s);
+ else
+ q = 0;
+
+ if (r < 0)
+ return r;
+ if (q < 0)
+ return q;
+
+ return swap_verify(s);
+}
+
+static int swap_setup_unit(
+ Manager *m,
+ const char *what,
+ const char *what_proc_swaps,
+ int priority,
+ bool set_flags) {
+
+ _cleanup_free_ char *e = NULL;
+ bool delete = false;
+ Unit *u = NULL;
+ int r;
+ SwapParameters *p;
+
+ assert(m);
+ assert(what);
+ assert(what_proc_swaps);
+
+ r = unit_name_from_path(what, ".swap", &e);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to generate unit name from path: %m");
+
+ u = manager_get_unit(m, e);
+ if (u &&
+ SWAP(u)->from_proc_swaps &&
+ !path_equal(SWAP(u)->parameters_proc_swaps.what, what_proc_swaps))
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Swap %s appeared twice with different device paths %s and %s",
+ e, SWAP(u)->parameters_proc_swaps.what, what_proc_swaps);
+
+ if (!u) {
+ delete = true;
+
+ r = unit_new_for_name(m, sizeof(Swap), e, &u);
+ if (r < 0)
+ goto fail;
+
+ SWAP(u)->what = strdup(what);
+ if (!SWAP(u)->what) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ unit_add_to_load_queue(u);
+ } else
+ delete = false;
+
+ p = &SWAP(u)->parameters_proc_swaps;
+
+ if (!p->what) {
+ p->what = strdup(what_proc_swaps);
+ if (!p->what) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ /* The unit is definitely around now, mark it as loaded if it was previously referenced but could not be
+ * loaded. After all we can load it now, from the data in /proc/swaps. */
+ if (IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_BAD_SETTING, UNIT_ERROR)) {
+ u->load_state = UNIT_LOADED;
+ u->load_error = 0;
+ }
+
+ if (set_flags) {
+ SWAP(u)->is_active = true;
+ SWAP(u)->just_activated = !SWAP(u)->from_proc_swaps;
+ }
+
+ SWAP(u)->from_proc_swaps = true;
+
+ p->priority = priority;
+
+ unit_add_to_dbus_queue(u);
+ return 0;
+
+fail:
+ log_unit_warning_errno(u, r, "Failed to load swap unit: %m");
+
+ if (delete)
+ unit_free(u);
+
+ return r;
+}
+
+static int swap_process_new(Manager *m, const char *device, int prio, bool set_flags) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ const char *dn, *devlink;
+ struct stat st, st_link;
+ int r;
+
+ assert(m);
+
+ r = swap_setup_unit(m, device, device, prio, set_flags);
+ if (r < 0)
+ return r;
+
+ /* If this is a block device, then let's add duplicates for
+ * all other names of this block device */
+ if (stat(device, &st) < 0 || !S_ISBLK(st.st_mode))
+ return 0;
+
+ r = device_new_from_stat_rdev(&d, &st);
+ if (r < 0) {
+ log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to allocate device for swap %s: %m", device);
+ return 0;
+ }
+
+ /* Add the main device node */
+ if (sd_device_get_devname(d, &dn) >= 0 && !streq(dn, device))
+ swap_setup_unit(m, dn, device, prio, set_flags);
+
+ /* Add additional units for all symlinks */
+ FOREACH_DEVICE_DEVLINK(d, devlink) {
+
+ /* Don't bother with the /dev/block links */
+ if (streq(devlink, device))
+ continue;
+
+ if (path_startswith(devlink, "/dev/block/"))
+ continue;
+
+ if (stat(devlink, &st_link) >= 0 &&
+ (!S_ISBLK(st_link.st_mode) ||
+ st_link.st_rdev != st.st_rdev))
+ continue;
+
+ swap_setup_unit(m, devlink, device, prio, set_flags);
+ }
+
+ return 0;
+}
+
+static void swap_set_state(Swap *s, SwapState state) {
+ SwapState old_state;
+ Swap *other;
+
+ assert(s);
+
+ if (s->state != state)
+ bus_unit_send_pending_change_signal(UNIT(s), false);
+
+ old_state = s->state;
+ s->state = state;
+
+ if (!SWAP_STATE_WITH_PROCESS(state)) {
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+ swap_unwatch_control_pid(s);
+ s->control_command = NULL;
+ s->control_command_id = _SWAP_EXEC_COMMAND_INVALID;
+ }
+
+ if (state != old_state)
+ log_unit_debug(UNIT(s), "Changed %s -> %s", swap_state_to_string(old_state), swap_state_to_string(state));
+
+ unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], 0);
+
+ /* If there other units for the same device node have a job
+ queued it might be worth checking again if it is runnable
+ now. This is necessary, since swap_start() refuses
+ operation with EAGAIN if there's already another job for
+ the same device node queued. */
+ LIST_FOREACH_OTHERS(same_devnode, other, s)
+ if (UNIT(other)->job)
+ job_add_to_run_queue(UNIT(other)->job);
+}
+
+static int swap_coldplug(Unit *u) {
+ Swap *s = SWAP(u);
+ SwapState new_state = SWAP_DEAD;
+ int r;
+
+ assert(s);
+ assert(s->state == SWAP_DEAD);
+
+ if (s->deserialized_state != s->state)
+ new_state = s->deserialized_state;
+ else if (s->from_proc_swaps)
+ new_state = SWAP_ACTIVE;
+
+ if (new_state == s->state)
+ return 0;
+
+ if (s->control_pid > 0 &&
+ pid_is_unwaited(s->control_pid) &&
+ SWAP_STATE_WITH_PROCESS(new_state)) {
+
+ r = unit_watch_pid(UNIT(s), s->control_pid);
+ if (r < 0)
+ return r;
+
+ r = swap_arm_timer(s, usec_add(u->state_change_timestamp.monotonic, s->timeout_usec));
+ if (r < 0)
+ return r;
+ }
+
+ if (!IN_SET(new_state, SWAP_DEAD, SWAP_FAILED)) {
+ (void) unit_setup_dynamic_creds(u);
+ (void) unit_setup_exec_runtime(u);
+ }
+
+ swap_set_state(s, new_state);
+ return 0;
+}
+
+static void swap_dump(Unit *u, FILE *f, const char *prefix) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ Swap *s = SWAP(u);
+ SwapParameters *p;
+
+ assert(s);
+ assert(f);
+
+ if (s->from_proc_swaps)
+ p = &s->parameters_proc_swaps;
+ else if (s->from_fragment)
+ p = &s->parameters_fragment;
+ else
+ p = NULL;
+
+ fprintf(f,
+ "%sSwap State: %s\n"
+ "%sResult: %s\n"
+ "%sWhat: %s\n"
+ "%sFrom /proc/swaps: %s\n"
+ "%sFrom fragment: %s\n",
+ prefix, swap_state_to_string(s->state),
+ prefix, swap_result_to_string(s->result),
+ prefix, s->what,
+ prefix, yes_no(s->from_proc_swaps),
+ prefix, yes_no(s->from_fragment));
+
+ if (s->devnode)
+ fprintf(f, "%sDevice Node: %s\n", prefix, s->devnode);
+
+ if (p)
+ fprintf(f,
+ "%sPriority: %i\n"
+ "%sOptions: %s\n",
+ prefix, p->priority,
+ prefix, strempty(p->options));
+
+ fprintf(f,
+ "%sTimeoutSec: %s\n",
+ prefix, format_timespan(buf, sizeof(buf), s->timeout_usec, USEC_PER_SEC));
+
+ if (s->control_pid > 0)
+ fprintf(f,
+ "%sControl PID: "PID_FMT"\n",
+ prefix, s->control_pid);
+
+ exec_context_dump(&s->exec_context, f, prefix);
+ kill_context_dump(&s->kill_context, f, prefix);
+ cgroup_context_dump(&s->cgroup_context, f, prefix);
+}
+
+static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
+
+ _cleanup_(exec_params_clear) ExecParameters exec_params = {
+ .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
+ .exec_fd = -1,
+ };
+ pid_t pid;
+ int r;
+
+ assert(s);
+ assert(c);
+ assert(_pid);
+
+ r = unit_prepare_exec(UNIT(s));
+ if (r < 0)
+ return r;
+
+ r = swap_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
+ if (r < 0)
+ goto fail;
+
+ r = unit_set_exec_params(UNIT(s), &exec_params);
+ if (r < 0)
+ goto fail;
+
+ r = exec_spawn(UNIT(s),
+ c,
+ &s->exec_context,
+ &exec_params,
+ s->exec_runtime,
+ &s->dynamic_creds,
+ &pid);
+ if (r < 0)
+ goto fail;
+
+ r = unit_watch_pid(UNIT(s), pid);
+ if (r < 0)
+ /* FIXME: we need to do something here */
+ goto fail;
+
+ *_pid = pid;
+
+ return 0;
+
+fail:
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ return r;
+}
+
+static void swap_enter_dead(Swap *s, SwapResult f) {
+ assert(s);
+
+ if (s->result == SWAP_SUCCESS)
+ s->result = f;
+
+ unit_log_result(UNIT(s), s->result == SWAP_SUCCESS, swap_result_to_string(s->result));
+ swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD);
+
+ s->exec_runtime = exec_runtime_unref(s->exec_runtime, true);
+
+ exec_context_destroy_runtime_directory(&s->exec_context, UNIT(s)->manager->prefix[EXEC_DIRECTORY_RUNTIME]);
+
+ unit_unref_uid_gid(UNIT(s), true);
+
+ dynamic_creds_destroy(&s->dynamic_creds);
+}
+
+static void swap_enter_active(Swap *s, SwapResult f) {
+ assert(s);
+
+ if (s->result == SWAP_SUCCESS)
+ s->result = f;
+
+ swap_set_state(s, SWAP_ACTIVE);
+}
+
+static void swap_enter_dead_or_active(Swap *s, SwapResult f) {
+ assert(s);
+
+ if (s->from_proc_swaps)
+ swap_enter_active(s, f);
+ else
+ swap_enter_dead(s, f);
+}
+
+static void swap_enter_signal(Swap *s, SwapState state, SwapResult f) {
+ int r;
+ KillOperation kop;
+
+ assert(s);
+
+ if (s->result == SWAP_SUCCESS)
+ s->result = f;
+
+ if (state == SWAP_DEACTIVATING_SIGTERM)
+ kop = KILL_TERMINATE;
+ else
+ kop = KILL_KILL;
+
+ r = unit_kill_context(UNIT(s), &s->kill_context, kop, -1, s->control_pid, false);
+ if (r < 0)
+ goto fail;
+
+ if (r > 0) {
+ r = swap_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
+ if (r < 0)
+ goto fail;
+
+ swap_set_state(s, state);
+ } else if (state == SWAP_DEACTIVATING_SIGTERM && s->kill_context.send_sigkill)
+ swap_enter_signal(s, SWAP_DEACTIVATING_SIGKILL, SWAP_SUCCESS);
+ else
+ swap_enter_dead_or_active(s, SWAP_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
+ swap_enter_dead_or_active(s, SWAP_FAILURE_RESOURCES);
+}
+
+static void swap_enter_activating(Swap *s) {
+ _cleanup_free_ char *opts = NULL;
+ int r;
+
+ assert(s);
+
+ unit_warn_leftover_processes(UNIT(s));
+
+ s->control_command_id = SWAP_EXEC_ACTIVATE;
+ s->control_command = s->exec_command + SWAP_EXEC_ACTIVATE;
+
+ if (s->from_fragment) {
+ int priority = -1;
+
+ r = fstab_find_pri(s->parameters_fragment.options, &priority);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse swap priority \"%s\", ignoring: %m", s->parameters_fragment.options);
+ else if (r == 1 && s->parameters_fragment.priority >= 0)
+ log_warning("Duplicate swap priority configuration by Priority and Options fields.");
+
+ if (r <= 0 && s->parameters_fragment.priority >= 0) {
+ if (s->parameters_fragment.options)
+ r = asprintf(&opts, "%s,pri=%i", s->parameters_fragment.options, s->parameters_fragment.priority);
+ else
+ r = asprintf(&opts, "pri=%i", s->parameters_fragment.priority);
+ if (r < 0)
+ goto fail;
+ }
+ }
+
+ r = exec_command_set(s->control_command, "/sbin/swapon", NULL);
+ if (r < 0)
+ goto fail;
+
+ if (s->parameters_fragment.options || opts) {
+ r = exec_command_append(s->control_command, "-o",
+ opts ? : s->parameters_fragment.options, NULL);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = exec_command_append(s->control_command, s->what, NULL);
+ if (r < 0)
+ goto fail;
+
+ swap_unwatch_control_pid(s);
+
+ r = swap_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ swap_set_state(s, SWAP_ACTIVATING);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'swapon' task: %m");
+ swap_enter_dead_or_active(s, SWAP_FAILURE_RESOURCES);
+}
+
+static void swap_enter_deactivating(Swap *s) {
+ int r;
+
+ assert(s);
+
+ s->control_command_id = SWAP_EXEC_DEACTIVATE;
+ s->control_command = s->exec_command + SWAP_EXEC_DEACTIVATE;
+
+ r = exec_command_set(s->control_command,
+ "/sbin/swapoff",
+ s->what,
+ NULL);
+ if (r < 0)
+ goto fail;
+
+ swap_unwatch_control_pid(s);
+
+ r = swap_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ swap_set_state(s, SWAP_DEACTIVATING);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'swapoff' task: %m");
+ swap_enter_dead_or_active(s, SWAP_FAILURE_RESOURCES);
+}
+
+static void swap_cycle_clear(Swap *s) {
+ assert(s);
+
+ s->result = SWAP_SUCCESS;
+ exec_command_reset_status_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
+ UNIT(s)->reset_accounting = true;
+}
+
+static int swap_start(Unit *u) {
+ Swap *s = SWAP(u), *other;
+ int r;
+
+ assert(s);
+
+ /* We cannot fulfill this request right now, try again later please! */
+ if (IN_SET(s->state,
+ SWAP_DEACTIVATING,
+ SWAP_DEACTIVATING_SIGTERM,
+ SWAP_DEACTIVATING_SIGKILL))
+ return -EAGAIN;
+
+ /* Already on it! */
+ if (s->state == SWAP_ACTIVATING)
+ return 0;
+
+ assert(IN_SET(s->state, SWAP_DEAD, SWAP_FAILED));
+
+ if (detect_container() > 0)
+ return -EPERM;
+
+ /* If there's a job for another swap unit for the same node
+ * running, then let's not dispatch this one for now, and wait
+ * until that other job has finished. */
+ LIST_FOREACH_OTHERS(same_devnode, other, s)
+ if (UNIT(other)->job && UNIT(other)->job->state == JOB_RUNNING)
+ return -EAGAIN;
+
+ r = unit_start_limit_test(u);
+ if (r < 0) {
+ swap_enter_dead(s, SWAP_FAILURE_START_LIMIT_HIT);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ swap_cycle_clear(s);
+ swap_enter_activating(s);
+ return 1;
+}
+
+static int swap_stop(Unit *u) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+
+ switch (s->state) {
+
+ case SWAP_DEACTIVATING:
+ case SWAP_DEACTIVATING_SIGTERM:
+ case SWAP_DEACTIVATING_SIGKILL:
+ /* Already on it */
+ return 0;
+
+ case SWAP_ACTIVATING:
+ case SWAP_ACTIVATING_DONE:
+ /* There's a control process pending, directly enter kill mode */
+ swap_enter_signal(s, SWAP_DEACTIVATING_SIGTERM, SWAP_SUCCESS);
+ return 0;
+
+ case SWAP_ACTIVE:
+ if (detect_container() > 0)
+ return -EPERM;
+
+ swap_enter_deactivating(s);
+ return 1;
+
+ default:
+ assert_not_reached("Unexpected state.");
+ }
+}
+
+static int swap_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", swap_state_to_string(s->state));
+ (void) serialize_item(f, "result", swap_result_to_string(s->result));
+
+ if (s->control_pid > 0)
+ (void) serialize_item_format(f, "control-pid", PID_FMT, s->control_pid);
+
+ if (s->control_command_id >= 0)
+ (void) serialize_item(f, "control-command", swap_exec_command_to_string(s->control_command_id));
+
+ return 0;
+}
+
+static int swap_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ SwapState state;
+
+ state = swap_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ s->deserialized_state = state;
+ } else if (streq(key, "result")) {
+ SwapResult f;
+
+ f = swap_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != SWAP_SUCCESS)
+ s->result = f;
+ } else if (streq(key, "control-pid")) {
+ pid_t pid;
+
+ if (parse_pid(value, &pid) < 0)
+ log_unit_debug(u, "Failed to parse control-pid value: %s", value);
+ else
+ s->control_pid = pid;
+
+ } else if (streq(key, "control-command")) {
+ SwapExecCommand id;
+
+ id = swap_exec_command_from_string(value);
+ if (id < 0)
+ log_unit_debug(u, "Failed to parse exec-command value: %s", value);
+ else {
+ s->control_command_id = id;
+ s->control_command = s->exec_command + id;
+ }
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState swap_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[SWAP(u)->state];
+}
+
+_pure_ static const char *swap_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return swap_state_to_string(SWAP(u)->state);
+}
+
+_pure_ static bool swap_may_gc(Unit *u) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+
+ if (s->from_proc_swaps)
+ return false;
+
+ return true;
+}
+
+static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+ Swap *s = SWAP(u);
+ SwapResult f;
+
+ assert(s);
+ assert(pid >= 0);
+
+ if (pid != s->control_pid)
+ return;
+
+ s->control_pid = 0;
+
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
+ f = SWAP_SUCCESS;
+ else if (code == CLD_EXITED)
+ f = SWAP_FAILURE_EXIT_CODE;
+ else if (code == CLD_KILLED)
+ f = SWAP_FAILURE_SIGNAL;
+ else if (code == CLD_DUMPED)
+ f = SWAP_FAILURE_CORE_DUMP;
+ else
+ assert_not_reached("Unknown code");
+
+ if (s->result == SWAP_SUCCESS)
+ s->result = f;
+
+ if (s->control_command) {
+ exec_status_exit(&s->control_command->exec_status, &s->exec_context, pid, code, status);
+
+ s->control_command = NULL;
+ s->control_command_id = _SWAP_EXEC_COMMAND_INVALID;
+ }
+
+ unit_log_process_exit(
+ u, f == SWAP_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+ "Swap process",
+ swap_exec_command_to_string(s->control_command_id),
+ code, status);
+
+ switch (s->state) {
+
+ case SWAP_ACTIVATING:
+ case SWAP_ACTIVATING_DONE:
+
+ if (f == SWAP_SUCCESS || s->from_proc_swaps)
+ swap_enter_active(s, f);
+ else
+ swap_enter_dead(s, f);
+ break;
+
+ case SWAP_DEACTIVATING:
+ case SWAP_DEACTIVATING_SIGKILL:
+ case SWAP_DEACTIVATING_SIGTERM:
+
+ swap_enter_dead_or_active(s, f);
+ break;
+
+ default:
+ assert_not_reached("Uh, control process died at wrong time.");
+ }
+
+ /* Notify clients about changed exit status */
+ unit_add_to_dbus_queue(u);
+}
+
+static int swap_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
+ Swap *s = SWAP(userdata);
+
+ assert(s);
+ assert(s->timer_event_source == source);
+
+ switch (s->state) {
+
+ case SWAP_ACTIVATING:
+ case SWAP_ACTIVATING_DONE:
+ log_unit_warning(UNIT(s), "Activation timed out. Stopping.");
+ swap_enter_signal(s, SWAP_DEACTIVATING_SIGTERM, SWAP_FAILURE_TIMEOUT);
+ break;
+
+ case SWAP_DEACTIVATING:
+ log_unit_warning(UNIT(s), "Deactivation timed out. Stopping.");
+ swap_enter_signal(s, SWAP_DEACTIVATING_SIGTERM, SWAP_FAILURE_TIMEOUT);
+ break;
+
+ case SWAP_DEACTIVATING_SIGTERM:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "Swap process timed out. Killing.");
+ swap_enter_signal(s, SWAP_DEACTIVATING_SIGKILL, SWAP_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "Swap process timed out. Skipping SIGKILL. Ignoring.");
+ swap_enter_dead_or_active(s, SWAP_FAILURE_TIMEOUT);
+ }
+ break;
+
+ case SWAP_DEACTIVATING_SIGKILL:
+ log_unit_warning(UNIT(s), "Swap process still around after SIGKILL. Ignoring.");
+ swap_enter_dead_or_active(s, SWAP_FAILURE_TIMEOUT);
+ break;
+
+ default:
+ assert_not_reached("Timeout at wrong time.");
+ }
+
+ return 0;
+}
+
+static int swap_load_proc_swaps(Manager *m, bool set_flags) {
+ unsigned i;
+
+ assert(m);
+
+ rewind(m->proc_swaps);
+
+ (void) fscanf(m->proc_swaps, "%*s %*s %*s %*s %*s\n");
+
+ for (i = 1;; i++) {
+ _cleanup_free_ char *dev = NULL, *d = NULL;
+ int prio = 0, k;
+
+ k = fscanf(m->proc_swaps,
+ "%ms " /* device/file */
+ "%*s " /* type of swap */
+ "%*s " /* swap size */
+ "%*s " /* used */
+ "%i\n", /* priority */
+ &dev, &prio);
+ if (k != 2) {
+ if (k == EOF)
+ break;
+
+ log_warning("Failed to parse /proc/swaps:%u.", i);
+ continue;
+ }
+
+ if (cunescape(dev, UNESCAPE_RELAX, &d) < 0)
+ return log_oom();
+
+ device_found_node(m, d, DEVICE_FOUND_SWAP, DEVICE_FOUND_SWAP);
+
+ (void) swap_process_new(m, d, prio, set_flags);
+ }
+
+ return 0;
+}
+
+static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(revents & EPOLLPRI);
+
+ r = swap_load_proc_swaps(m, true);
+ if (r < 0) {
+ log_error_errno(r, "Failed to reread /proc/swaps: %m");
+
+ /* Reset flags, just in case, for late calls */
+ LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_SWAP]) {
+ Swap *swap = SWAP(u);
+
+ swap->is_active = swap->just_activated = false;
+ }
+
+ return 0;
+ }
+
+ manager_dispatch_load_queue(m);
+
+ LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_SWAP]) {
+ Swap *swap = SWAP(u);
+
+ if (!swap->is_active) {
+
+ swap_unset_proc_swaps(swap);
+
+ switch (swap->state) {
+
+ case SWAP_ACTIVE:
+ /* This has just been deactivated */
+ swap_enter_dead(swap, SWAP_SUCCESS);
+ break;
+
+ default:
+ /* Fire again */
+ swap_set_state(swap, swap->state);
+ break;
+ }
+
+ if (swap->what)
+ device_found_node(m, swap->what, 0, DEVICE_FOUND_SWAP);
+
+ } else if (swap->just_activated) {
+
+ /* New swap entry */
+
+ switch (swap->state) {
+
+ case SWAP_DEAD:
+ case SWAP_FAILED:
+ (void) unit_acquire_invocation_id(u);
+ swap_cycle_clear(swap);
+ swap_enter_active(swap, SWAP_SUCCESS);
+ break;
+
+ case SWAP_ACTIVATING:
+ swap_set_state(swap, SWAP_ACTIVATING_DONE);
+ break;
+
+ default:
+ /* Nothing really changed, but let's
+ * issue an notification call
+ * nonetheless, in case somebody is
+ * waiting for this. */
+ swap_set_state(swap, swap->state);
+ break;
+ }
+ }
+
+ /* Reset the flags for later calls */
+ swap->is_active = swap->just_activated = false;
+ }
+
+ return 1;
+}
+
+static Unit *swap_following(Unit *u) {
+ Swap *s = SWAP(u);
+ Swap *other, *first = NULL;
+
+ assert(s);
+
+ /* If the user configured the swap through /etc/fstab or
+ * a device unit, follow that. */
+
+ if (s->from_fragment)
+ return NULL;
+
+ LIST_FOREACH_OTHERS(same_devnode, other, s)
+ if (other->from_fragment)
+ return UNIT(other);
+
+ /* Otherwise, make everybody follow the unit that's named after
+ * the swap device in the kernel */
+
+ if (streq_ptr(s->what, s->devnode))
+ return NULL;
+
+ LIST_FOREACH_AFTER(same_devnode, other, s)
+ if (streq_ptr(other->what, other->devnode))
+ return UNIT(other);
+
+ LIST_FOREACH_BEFORE(same_devnode, other, s) {
+ if (streq_ptr(other->what, other->devnode))
+ return UNIT(other);
+
+ first = other;
+ }
+
+ /* Fall back to the first on the list */
+ return UNIT(first);
+}
+
+static int swap_following_set(Unit *u, Set **_set) {
+ Swap *s = SWAP(u), *other;
+ _cleanup_set_free_ Set *set = NULL;
+ int r;
+
+ assert(s);
+ assert(_set);
+
+ if (LIST_JUST_US(same_devnode, s)) {
+ *_set = NULL;
+ return 0;
+ }
+
+ set = set_new(NULL);
+ if (!set)
+ return -ENOMEM;
+
+ LIST_FOREACH_OTHERS(same_devnode, other, s) {
+ r = set_put(set, other);
+ if (r < 0)
+ return r;
+ }
+
+ *_set = TAKE_PTR(set);
+ return 1;
+}
+
+static void swap_shutdown(Manager *m) {
+ assert(m);
+
+ m->swap_event_source = sd_event_source_unref(m->swap_event_source);
+ m->proc_swaps = safe_fclose(m->proc_swaps);
+ m->swaps_by_devnode = hashmap_free(m->swaps_by_devnode);
+}
+
+static void swap_enumerate(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (!m->proc_swaps) {
+ m->proc_swaps = fopen("/proc/swaps", "re");
+ if (!m->proc_swaps) {
+ if (errno == ENOENT)
+ log_debug_errno(errno, "Not swap enabled, skipping enumeration.");
+ else
+ log_warning_errno(errno, "Failed to open /proc/swaps, ignoring: %m");
+
+ return;
+ }
+
+ r = sd_event_add_io(m->event, &m->swap_event_source, fileno(m->proc_swaps), EPOLLPRI, swap_dispatch_io, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to watch /proc/swaps: %m");
+ goto fail;
+ }
+
+ /* Dispatch this before we dispatch SIGCHLD, so that
+ * we always get the events from /proc/swaps before
+ * the SIGCHLD of /sbin/swapon. */
+ r = sd_event_source_set_priority(m->swap_event_source, SD_EVENT_PRIORITY_NORMAL-10);
+ if (r < 0) {
+ log_error_errno(r, "Failed to change /proc/swaps priority: %m");
+ goto fail;
+ }
+
+ (void) sd_event_source_set_description(m->swap_event_source, "swap-proc");
+ }
+
+ r = swap_load_proc_swaps(m, false);
+ if (r < 0)
+ goto fail;
+
+ return;
+
+fail:
+ swap_shutdown(m);
+}
+
+int swap_process_device_new(Manager *m, sd_device *dev) {
+ _cleanup_free_ char *e = NULL;
+ const char *dn, *devlink;
+ Unit *u;
+ int r = 0;
+
+ assert(m);
+ assert(dev);
+
+ r = sd_device_get_devname(dev, &dn);
+ if (r < 0)
+ return 0;
+
+ r = unit_name_from_path(dn, ".swap", &e);
+ if (r < 0)
+ return r;
+
+ u = manager_get_unit(m, e);
+ if (u)
+ r = swap_set_devnode(SWAP(u), dn);
+
+ FOREACH_DEVICE_DEVLINK(dev, devlink) {
+ _cleanup_free_ char *n = NULL;
+ int q;
+
+ q = unit_name_from_path(devlink, ".swap", &n);
+ if (q < 0)
+ return q;
+
+ u = manager_get_unit(m, n);
+ if (u) {
+ q = swap_set_devnode(SWAP(u), dn);
+ if (q < 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
+
+int swap_process_device_remove(Manager *m, sd_device *dev) {
+ const char *dn;
+ int r = 0;
+ Swap *s;
+
+ r = sd_device_get_devname(dev, &dn);
+ if (r < 0)
+ return 0;
+
+ while ((s = hashmap_get(m->swaps_by_devnode, dn))) {
+ int q;
+
+ q = swap_set_devnode(s, NULL);
+ if (q < 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static void swap_reset_failed(Unit *u) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+
+ if (s->state == SWAP_FAILED)
+ swap_set_state(s, SWAP_DEAD);
+
+ s->result = SWAP_SUCCESS;
+}
+
+static int swap_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
+ return unit_kill_common(u, who, signo, -1, SWAP(u)->control_pid, error);
+}
+
+static int swap_get_timeout(Unit *u, usec_t *timeout) {
+ Swap *s = SWAP(u);
+ usec_t t;
+ int r;
+
+ if (!s->timer_event_source)
+ return 0;
+
+ r = sd_event_source_get_time(s->timer_event_source, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY)
+ return 0;
+
+ *timeout = t;
+ return 1;
+}
+
+static bool swap_supported(void) {
+ static int supported = -1;
+
+ /* If swap support is not available in the kernel, or we are
+ * running in a container we don't support swap units, and any
+ * attempts to starting one should fail immediately. */
+
+ if (supported < 0)
+ supported =
+ access("/proc/swaps", F_OK) >= 0 &&
+ detect_container() <= 0;
+
+ return supported;
+}
+
+static int swap_control_pid(Unit *u) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+
+ return s->control_pid;
+}
+
+static const char* const swap_exec_command_table[_SWAP_EXEC_COMMAND_MAX] = {
+ [SWAP_EXEC_ACTIVATE] = "ExecActivate",
+ [SWAP_EXEC_DEACTIVATE] = "ExecDeactivate",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(swap_exec_command, SwapExecCommand);
+
+static const char* const swap_result_table[_SWAP_RESULT_MAX] = {
+ [SWAP_SUCCESS] = "success",
+ [SWAP_FAILURE_RESOURCES] = "resources",
+ [SWAP_FAILURE_TIMEOUT] = "timeout",
+ [SWAP_FAILURE_EXIT_CODE] = "exit-code",
+ [SWAP_FAILURE_SIGNAL] = "signal",
+ [SWAP_FAILURE_CORE_DUMP] = "core-dump",
+ [SWAP_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(swap_result, SwapResult);
+
+const UnitVTable swap_vtable = {
+ .object_size = sizeof(Swap),
+ .exec_context_offset = offsetof(Swap, exec_context),
+ .cgroup_context_offset = offsetof(Swap, cgroup_context),
+ .kill_context_offset = offsetof(Swap, kill_context),
+ .exec_runtime_offset = offsetof(Swap, exec_runtime),
+ .dynamic_creds_offset = offsetof(Swap, dynamic_creds),
+
+ .sections =
+ "Unit\0"
+ "Swap\0"
+ "Install\0",
+ .private_section = "Swap",
+
+ .init = swap_init,
+ .load = swap_load,
+ .done = swap_done,
+
+ .coldplug = swap_coldplug,
+
+ .dump = swap_dump,
+
+ .start = swap_start,
+ .stop = swap_stop,
+
+ .kill = swap_kill,
+
+ .get_timeout = swap_get_timeout,
+
+ .serialize = swap_serialize,
+ .deserialize_item = swap_deserialize_item,
+
+ .active_state = swap_active_state,
+ .sub_state_to_string = swap_sub_state_to_string,
+
+ .may_gc = swap_may_gc,
+
+ .sigchld_event = swap_sigchld_event,
+
+ .reset_failed = swap_reset_failed,
+
+ .control_pid = swap_control_pid,
+
+ .bus_vtable = bus_swap_vtable,
+ .bus_set_property = bus_swap_set_property,
+ .bus_commit_properties = bus_swap_commit_properties,
+
+ .following = swap_following,
+ .following_set = swap_following_set,
+
+ .enumerate = swap_enumerate,
+ .shutdown = swap_shutdown,
+ .supported = swap_supported,
+
+ .status_message_formats = {
+ .starting_stopping = {
+ [0] = "Activating swap %s...",
+ [1] = "Deactivating swap %s...",
+ },
+ .finished_start_job = {
+ [JOB_DONE] = "Activated swap %s.",
+ [JOB_FAILED] = "Failed to activate swap %s.",
+ [JOB_TIMEOUT] = "Timed out activating swap %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Deactivated swap %s.",
+ [JOB_FAILED] = "Failed deactivating swap %s.",
+ [JOB_TIMEOUT] = "Timed out deactivating swap %s.",
+ },
+ },
+};
diff --git a/src/core/swap.h b/src/core/swap.h
new file mode 100644
index 0000000..1a4b60b
--- /dev/null
+++ b/src/core/swap.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2010 Maarten Lankhorst
+***/
+
+#include "sd-device.h"
+#include "unit.h"
+
+typedef struct Swap Swap;
+
+typedef enum SwapExecCommand {
+ SWAP_EXEC_ACTIVATE,
+ SWAP_EXEC_DEACTIVATE,
+ _SWAP_EXEC_COMMAND_MAX,
+ _SWAP_EXEC_COMMAND_INVALID = -1
+} SwapExecCommand;
+
+typedef enum SwapResult {
+ SWAP_SUCCESS,
+ SWAP_FAILURE_RESOURCES,
+ SWAP_FAILURE_TIMEOUT,
+ SWAP_FAILURE_EXIT_CODE,
+ SWAP_FAILURE_SIGNAL,
+ SWAP_FAILURE_CORE_DUMP,
+ SWAP_FAILURE_START_LIMIT_HIT,
+ _SWAP_RESULT_MAX,
+ _SWAP_RESULT_INVALID = -1
+} SwapResult;
+
+typedef struct SwapParameters {
+ char *what;
+ char *options;
+ int priority;
+} SwapParameters;
+
+struct Swap {
+ Unit meta;
+
+ char *what;
+
+ /* If the device has already shown up, this is the device
+ * node, which might be different from what, due to
+ * symlinks */
+ char *devnode;
+
+ SwapParameters parameters_proc_swaps;
+ SwapParameters parameters_fragment;
+
+ bool from_proc_swaps:1;
+ bool from_fragment:1;
+
+ /* Used while looking for swaps that vanished or got added
+ * from/to /proc/swaps */
+ bool is_active:1;
+ bool just_activated:1;
+
+ SwapResult result;
+
+ usec_t timeout_usec;
+
+ ExecCommand exec_command[_SWAP_EXEC_COMMAND_MAX];
+ ExecContext exec_context;
+ KillContext kill_context;
+ CGroupContext cgroup_context;
+
+ ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
+
+ SwapState state, deserialized_state;
+
+ ExecCommand* control_command;
+ SwapExecCommand control_command_id;
+ pid_t control_pid;
+
+ sd_event_source *timer_event_source;
+
+ /* In order to be able to distinguish dependencies on
+ different device nodes we might end up creating multiple
+ devices for the same swap. We chain them up here. */
+
+ LIST_FIELDS(struct Swap, same_devnode);
+};
+
+extern const UnitVTable swap_vtable;
+
+int swap_process_device_new(Manager *m, sd_device *dev);
+int swap_process_device_remove(Manager *m, sd_device *dev);
+
+const char* swap_exec_command_to_string(SwapExecCommand i) _const_;
+SwapExecCommand swap_exec_command_from_string(const char *s) _pure_;
+
+const char* swap_result_to_string(SwapResult i) _const_;
+SwapResult swap_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(SWAP, Swap);
diff --git a/src/core/system.conf.in b/src/core/system.conf.in
new file mode 100644
index 0000000..0a58737
--- /dev/null
+++ b/src/core/system.conf.in
@@ -0,0 +1,64 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See systemd-system.conf(5) for details.
+
+[Manager]
+#LogLevel=info
+#LogTarget=journal-or-kmsg
+#LogColor=yes
+#LogLocation=no
+#DumpCore=yes
+#ShowStatus=yes
+#CrashChangeVT=no
+#CrashShell=no
+#CrashReboot=no
+#CtrlAltDelBurstAction=reboot-force
+#CPUAffinity=1 2
+#RuntimeWatchdogSec=0
+#ShutdownWatchdogSec=10min
+#WatchdogDevice=
+#CapabilityBoundingSet=
+#NoNewPrivileges=no
+#SystemCallArchitectures=
+#TimerSlackNSec=
+#DefaultTimerAccuracySec=1min
+#DefaultStandardOutput=journal
+#DefaultStandardError=inherit
+#DefaultTimeoutStartSec=90s
+#DefaultTimeoutStopSec=90s
+#DefaultRestartSec=100ms
+#DefaultStartLimitIntervalSec=10s
+#DefaultStartLimitBurst=5
+#DefaultEnvironment=
+#DefaultCPUAccounting=no
+#DefaultIOAccounting=no
+#DefaultIPAccounting=no
+#DefaultBlockIOAccounting=no
+#DefaultMemoryAccounting=@MEMORY_ACCOUNTING_DEFAULT@
+#DefaultTasksAccounting=yes
+#DefaultTasksMax=15%
+#DefaultLimitCPU=
+#DefaultLimitFSIZE=
+#DefaultLimitDATA=
+#DefaultLimitSTACK=
+#DefaultLimitCORE=
+#DefaultLimitRSS=
+#DefaultLimitNOFILE=1024:@HIGH_RLIMIT_NOFILE@
+#DefaultLimitAS=
+#DefaultLimitNPROC=
+#DefaultLimitMEMLOCK=
+#DefaultLimitLOCKS=
+#DefaultLimitSIGPENDING=
+#DefaultLimitMSGQUEUE=
+#DefaultLimitNICE=
+#DefaultLimitRTPRIO=
+#DefaultLimitRTTIME=
diff --git a/src/core/systemd.pc.in b/src/core/systemd.pc.in
new file mode 100644
index 0000000..0dae950
--- /dev/null
+++ b/src/core/systemd.pc.in
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+prefix=@prefix@
+systemdutildir=@rootlibexecdir@
+systemdsystemunitdir=@systemunitdir@
+systemdsystempresetdir=@systempresetdir@
+systemduserunitdir=@userunitdir@
+systemduserpresetdir=@userpresetdir@
+systemdsystemconfdir=@pkgsysconfdir@/system
+systemduserconfdir=@pkgsysconfdir@/user
+systemdsystemunitpath=${systemdsystemconfdir}:/etc/systemd/system:/run/systemd/system:/usr/local/lib/systemd/system:${systemdsystemunitdir}:/usr/lib/systemd/system:/lib/systemd/system
+systemduserunitpath=${systemduserconfdir}:/etc/systemd/user:/run/systemd/user:/usr/local/lib/systemd/user:/usr/local/share/systemd/user:${systemduserunitdir}:/usr/lib/systemd/user:/usr/share/systemd/user
+systemdsystemgeneratordir=@systemgeneratordir@
+systemdusergeneratordir=@usergeneratordir@
+systemdsleepdir=@systemsleepdir@
+systemdshutdowndir=@systemshutdowndir@
+tmpfilesdir=@tmpfilesdir@
+sysusersdir=@sysusersdir@
+sysctldir=@sysctldir@
+binfmtdir=@binfmtdir@
+modulesloaddir=@modulesloaddir@
+catalogdir=@catalogdir@
+systemuidmax=@systemuidmax@
+systemgidmax=@systemgidmax@
+dynamicuidmin=@dynamicuidmin@
+dynamicuidmax=@dynamicuidmax@
+containeruidbasemin=@containeruidbasemin@
+containeruidbasemax=@containeruidbasemax@
+
+Name: systemd
+Description: systemd System and Service Manager
+URL: @PROJECT_URL@
+Version: @PROJECT_VERSION@
diff --git a/src/core/target.c b/src/core/target.c
new file mode 100644
index 0000000..421a304
--- /dev/null
+++ b/src/core/target.c
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "dbus-target.h"
+#include "dbus-unit.h"
+#include "log.h"
+#include "serialize.h"
+#include "special.h"
+#include "string-util.h"
+#include "target.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static const UnitActiveState state_translation_table[_TARGET_STATE_MAX] = {
+ [TARGET_DEAD] = UNIT_INACTIVE,
+ [TARGET_ACTIVE] = UNIT_ACTIVE
+};
+
+static void target_set_state(Target *t, TargetState state) {
+ TargetState old_state;
+ assert(t);
+
+ if (t->state != state)
+ bus_unit_send_pending_change_signal(UNIT(t), false);
+
+ old_state = t->state;
+ t->state = state;
+
+ if (state != old_state)
+ log_debug("%s changed %s -> %s",
+ UNIT(t)->id,
+ target_state_to_string(old_state),
+ target_state_to_string(state));
+
+ unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static int target_add_default_dependencies(Target *t) {
+
+ static const UnitDependency deps[] = {
+ UNIT_REQUIRES,
+ UNIT_REQUISITE,
+ UNIT_WANTS,
+ UNIT_BINDS_TO,
+ UNIT_PART_OF
+ };
+
+ int r;
+ unsigned k;
+
+ assert(t);
+
+ if (!UNIT(t)->default_dependencies)
+ return 0;
+
+ /* Imply ordering for requirement dependencies on target units. Note that when the user created a contradicting
+ * ordering manually we won't add anything in here to make sure we don't create a loop. */
+
+ for (k = 0; k < ELEMENTSOF(deps); k++) {
+ Unit *other;
+ Iterator i;
+ void *v;
+
+ HASHMAP_FOREACH_KEY(v, other, UNIT(t)->dependencies[deps[k]], i) {
+ r = unit_add_default_target_dependency(other, UNIT(t));
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (unit_has_name(UNIT(t), SPECIAL_SHUTDOWN_TARGET))
+ return 0;
+
+ /* Make sure targets are unloaded on shutdown */
+ return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+static int target_load(Unit *u) {
+ Target *t = TARGET(u);
+ int r;
+
+ assert(t);
+
+ r = unit_load_fragment_and_dropin(u);
+ if (r < 0)
+ return r;
+
+ /* This is a new unit? Then let's add in some extras */
+ if (u->load_state == UNIT_LOADED) {
+ r = target_add_default_dependencies(t);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int target_coldplug(Unit *u) {
+ Target *t = TARGET(u);
+
+ assert(t);
+ assert(t->state == TARGET_DEAD);
+
+ if (t->deserialized_state != t->state)
+ target_set_state(t, t->deserialized_state);
+
+ return 0;
+}
+
+static void target_dump(Unit *u, FILE *f, const char *prefix) {
+ Target *t = TARGET(u);
+
+ assert(t);
+ assert(f);
+
+ fprintf(f,
+ "%sTarget State: %s\n",
+ prefix, target_state_to_string(t->state));
+}
+
+static int target_start(Unit *u) {
+ Target *t = TARGET(u);
+ int r;
+
+ assert(t);
+ assert(t->state == TARGET_DEAD);
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ target_set_state(t, TARGET_ACTIVE);
+ return 1;
+}
+
+static int target_stop(Unit *u) {
+ Target *t = TARGET(u);
+
+ assert(t);
+ assert(t->state == TARGET_ACTIVE);
+
+ target_set_state(t, TARGET_DEAD);
+ return 1;
+}
+
+static int target_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Target *s = TARGET(u);
+
+ assert(s);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", target_state_to_string(s->state));
+ return 0;
+}
+
+static int target_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Target *s = TARGET(u);
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ TargetState state;
+
+ state = target_state_from_string(value);
+ if (state < 0)
+ log_debug("Failed to parse state value %s", value);
+ else
+ s->deserialized_state = state;
+
+ } else
+ log_debug("Unknown serialization key '%s'", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState target_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[TARGET(u)->state];
+}
+
+_pure_ static const char *target_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return target_state_to_string(TARGET(u)->state);
+}
+
+const UnitVTable target_vtable = {
+ .object_size = sizeof(Target),
+
+ .sections =
+ "Unit\0"
+ "Target\0"
+ "Install\0",
+
+ .load = target_load,
+ .coldplug = target_coldplug,
+
+ .dump = target_dump,
+
+ .start = target_start,
+ .stop = target_stop,
+
+ .serialize = target_serialize,
+ .deserialize_item = target_deserialize_item,
+
+ .active_state = target_active_state,
+ .sub_state_to_string = target_sub_state_to_string,
+
+ .bus_vtable = bus_target_vtable,
+
+ .status_message_formats = {
+ .finished_start_job = {
+ [JOB_DONE] = "Reached target %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Stopped target %s.",
+ },
+ },
+};
diff --git a/src/core/target.h b/src/core/target.h
new file mode 100644
index 0000000..28f7888
--- /dev/null
+++ b/src/core/target.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "unit.h"
+
+typedef struct Target Target;
+
+struct Target {
+ Unit meta;
+
+ TargetState state, deserialized_state;
+};
+
+extern const UnitVTable target_vtable;
+
+DEFINE_CAST(TARGET, Target);
diff --git a/src/core/timer.c b/src/core/timer.c
new file mode 100644
index 0000000..d9ba2f7
--- /dev/null
+++ b/src/core/timer.c
@@ -0,0 +1,881 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "dbus-timer.h"
+#include "dbus-unit.h"
+#include "fs-util.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "serialize.h"
+#include "special.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "timer.h"
+#include "unit-name.h"
+#include "unit.h"
+#include "user-util.h"
+#include "virt.h"
+
+static const UnitActiveState state_translation_table[_TIMER_STATE_MAX] = {
+ [TIMER_DEAD] = UNIT_INACTIVE,
+ [TIMER_WAITING] = UNIT_ACTIVE,
+ [TIMER_RUNNING] = UNIT_ACTIVE,
+ [TIMER_ELAPSED] = UNIT_ACTIVE,
+ [TIMER_FAILED] = UNIT_FAILED
+};
+
+static int timer_dispatch(sd_event_source *s, uint64_t usec, void *userdata);
+
+static void timer_init(Unit *u) {
+ Timer *t = TIMER(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ t->next_elapse_monotonic_or_boottime = USEC_INFINITY;
+ t->next_elapse_realtime = USEC_INFINITY;
+ t->accuracy_usec = u->manager->default_timer_accuracy_usec;
+ t->remain_after_elapse = true;
+}
+
+void timer_free_values(Timer *t) {
+ TimerValue *v;
+
+ assert(t);
+
+ while ((v = t->values)) {
+ LIST_REMOVE(value, t->values, v);
+ calendar_spec_free(v->calendar_spec);
+ free(v);
+ }
+}
+
+static void timer_done(Unit *u) {
+ Timer *t = TIMER(u);
+
+ assert(t);
+
+ timer_free_values(t);
+
+ t->monotonic_event_source = sd_event_source_unref(t->monotonic_event_source);
+ t->realtime_event_source = sd_event_source_unref(t->realtime_event_source);
+
+ free(t->stamp_path);
+}
+
+static int timer_verify(Timer *t) {
+ assert(t);
+
+ if (UNIT(t)->load_state != UNIT_LOADED)
+ return 0;
+
+ if (!t->values) {
+ log_unit_error(UNIT(t), "Timer unit lacks value setting. Refusing.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int timer_add_default_dependencies(Timer *t) {
+ int r;
+ TimerValue *v;
+
+ assert(t);
+
+ if (!UNIT(t)->default_dependencies)
+ return 0;
+
+ r = unit_add_dependency_by_name(UNIT(t), UNIT_BEFORE, SPECIAL_TIMERS_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ if (MANAGER_IS_SYSTEM(UNIT(t)->manager)) {
+ r = unit_add_two_dependencies_by_name(UNIT(t), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(value, v, t->values) {
+ if (v->base == TIMER_CALENDAR) {
+ r = unit_add_dependency_by_name(UNIT(t), UNIT_AFTER, SPECIAL_TIME_SYNC_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+ break;
+ }
+ }
+ }
+
+ return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+static int timer_add_trigger_dependencies(Timer *t) {
+ Unit *x;
+ int r;
+
+ assert(t);
+
+ if (!hashmap_isempty(UNIT(t)->dependencies[UNIT_TRIGGERS]))
+ return 0;
+
+ r = unit_load_related_unit(UNIT(t), ".service", &x);
+ if (r < 0)
+ return r;
+
+ return unit_add_two_dependencies(UNIT(t), UNIT_BEFORE, UNIT_TRIGGERS, x, true, UNIT_DEPENDENCY_IMPLICIT);
+}
+
+static int timer_setup_persistent(Timer *t) {
+ int r;
+
+ assert(t);
+
+ if (!t->persistent)
+ return 0;
+
+ if (MANAGER_IS_SYSTEM(UNIT(t)->manager)) {
+
+ r = unit_require_mounts_for(UNIT(t), "/var/lib/systemd/timers", UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+
+ t->stamp_path = strappend("/var/lib/systemd/timers/stamp-", UNIT(t)->id);
+ } else {
+ const char *e;
+
+ e = getenv("XDG_DATA_HOME");
+ if (e)
+ t->stamp_path = strjoin(e, "/systemd/timers/stamp-", UNIT(t)->id);
+ else {
+
+ _cleanup_free_ char *h = NULL;
+
+ r = get_home_dir(&h);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(t), r, "Failed to determine home directory: %m");
+
+ t->stamp_path = strjoin(h, "/.local/share/systemd/timers/stamp-", UNIT(t)->id);
+ }
+ }
+
+ if (!t->stamp_path)
+ return log_oom();
+
+ return 0;
+}
+
+static int timer_load(Unit *u) {
+ Timer *t = TIMER(u);
+ int r;
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ r = unit_load_fragment_and_dropin(u);
+ if (r < 0)
+ return r;
+
+ if (u->load_state == UNIT_LOADED) {
+
+ r = timer_add_trigger_dependencies(t);
+ if (r < 0)
+ return r;
+
+ r = timer_setup_persistent(t);
+ if (r < 0)
+ return r;
+
+ r = timer_add_default_dependencies(t);
+ if (r < 0)
+ return r;
+ }
+
+ return timer_verify(t);
+}
+
+static void timer_dump(Unit *u, FILE *f, const char *prefix) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ Timer *t = TIMER(u);
+ Unit *trigger;
+ TimerValue *v;
+
+ trigger = UNIT_TRIGGER(u);
+
+ fprintf(f,
+ "%sTimer State: %s\n"
+ "%sResult: %s\n"
+ "%sUnit: %s\n"
+ "%sPersistent: %s\n"
+ "%sWakeSystem: %s\n"
+ "%sAccuracy: %s\n"
+ "%sRemainAfterElapse: %s\n",
+ prefix, timer_state_to_string(t->state),
+ prefix, timer_result_to_string(t->result),
+ prefix, trigger ? trigger->id : "n/a",
+ prefix, yes_no(t->persistent),
+ prefix, yes_no(t->wake_system),
+ prefix, format_timespan(buf, sizeof(buf), t->accuracy_usec, 1),
+ prefix, yes_no(t->remain_after_elapse));
+
+ LIST_FOREACH(value, v, t->values) {
+
+ if (v->base == TIMER_CALENDAR) {
+ _cleanup_free_ char *p = NULL;
+
+ (void) calendar_spec_to_string(v->calendar_spec, &p);
+
+ fprintf(f,
+ "%s%s: %s\n",
+ prefix,
+ timer_base_to_string(v->base),
+ strna(p));
+ } else {
+ char timespan1[FORMAT_TIMESPAN_MAX];
+
+ fprintf(f,
+ "%s%s: %s\n",
+ prefix,
+ timer_base_to_string(v->base),
+ format_timespan(timespan1, sizeof(timespan1), v->value, 0));
+ }
+ }
+}
+
+static void timer_set_state(Timer *t, TimerState state) {
+ TimerState old_state;
+ assert(t);
+
+ if (t->state != state)
+ bus_unit_send_pending_change_signal(UNIT(t), false);
+
+ old_state = t->state;
+ t->state = state;
+
+ if (state != TIMER_WAITING) {
+ t->monotonic_event_source = sd_event_source_unref(t->monotonic_event_source);
+ t->realtime_event_source = sd_event_source_unref(t->realtime_event_source);
+ t->next_elapse_monotonic_or_boottime = USEC_INFINITY;
+ t->next_elapse_realtime = USEC_INFINITY;
+ }
+
+ if (state != old_state)
+ log_unit_debug(UNIT(t), "Changed %s -> %s", timer_state_to_string(old_state), timer_state_to_string(state));
+
+ unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static void timer_enter_waiting(Timer *t, bool time_change);
+
+static int timer_coldplug(Unit *u) {
+ Timer *t = TIMER(u);
+
+ assert(t);
+ assert(t->state == TIMER_DEAD);
+
+ if (t->deserialized_state == t->state)
+ return 0;
+
+ if (t->deserialized_state == TIMER_WAITING)
+ timer_enter_waiting(t, false);
+ else
+ timer_set_state(t, t->deserialized_state);
+
+ return 0;
+}
+
+static void timer_enter_dead(Timer *t, TimerResult f) {
+ assert(t);
+
+ if (t->result == TIMER_SUCCESS)
+ t->result = f;
+
+ unit_log_result(UNIT(t), t->result == TIMER_SUCCESS, timer_result_to_string(t->result));
+ timer_set_state(t, t->result != TIMER_SUCCESS ? TIMER_FAILED : TIMER_DEAD);
+}
+
+static void timer_enter_elapsed(Timer *t, bool leave_around) {
+ assert(t);
+
+ /* If a unit is marked with RemainAfterElapse=yes we leave it
+ * around even after it elapsed once, so that starting it
+ * later again does not necessarily mean immediate
+ * retriggering. We unconditionally leave units with
+ * TIMER_UNIT_ACTIVE or TIMER_UNIT_INACTIVE triggers around,
+ * since they might be restarted automatically at any time
+ * later on. */
+
+ if (t->remain_after_elapse || leave_around)
+ timer_set_state(t, TIMER_ELAPSED);
+ else
+ timer_enter_dead(t, TIMER_SUCCESS);
+}
+
+static void add_random(Timer *t, usec_t *v) {
+ char s[FORMAT_TIMESPAN_MAX];
+ usec_t add;
+
+ assert(t);
+ assert(v);
+
+ if (t->random_usec == 0)
+ return;
+ if (*v == USEC_INFINITY)
+ return;
+
+ add = random_u64() % t->random_usec;
+
+ if (*v + add < *v) /* overflow */
+ *v = (usec_t) -2; /* Highest possible value, that is not USEC_INFINITY */
+ else
+ *v += add;
+
+ log_unit_debug(UNIT(t), "Adding %s random time.", format_timespan(s, sizeof(s), add, 0));
+}
+
+static void timer_enter_waiting(Timer *t, bool time_change) {
+ bool found_monotonic = false, found_realtime = false;
+ bool leave_around = false;
+ triple_timestamp ts;
+ TimerValue *v;
+ Unit *trigger;
+ int r;
+
+ assert(t);
+
+ trigger = UNIT_TRIGGER(UNIT(t));
+ if (!trigger) {
+ log_unit_error(UNIT(t), "Unit to trigger vanished.");
+ timer_enter_dead(t, TIMER_FAILURE_RESOURCES);
+ return;
+ }
+
+ triple_timestamp_get(&ts);
+ t->next_elapse_monotonic_or_boottime = t->next_elapse_realtime = 0;
+
+ LIST_FOREACH(value, v, t->values) {
+ if (v->disabled)
+ continue;
+
+ if (v->base == TIMER_CALENDAR) {
+ usec_t b;
+
+ /* If we know the last time this was
+ * triggered, schedule the job based relative
+ * to that. If we don't, just start from
+ * the activation time. */
+
+ if (t->last_trigger.realtime > 0)
+ b = t->last_trigger.realtime;
+ else {
+ if (state_translation_table[t->state] == UNIT_ACTIVE)
+ b = UNIT(t)->inactive_exit_timestamp.realtime;
+ else
+ b = ts.realtime;
+ }
+
+ r = calendar_spec_next_usec(v->calendar_spec, b, &v->next_elapse);
+ if (r < 0)
+ continue;
+
+ if (!found_realtime)
+ t->next_elapse_realtime = v->next_elapse;
+ else
+ t->next_elapse_realtime = MIN(t->next_elapse_realtime, v->next_elapse);
+
+ found_realtime = true;
+
+ } else {
+ usec_t base;
+
+ switch (v->base) {
+
+ case TIMER_ACTIVE:
+ if (state_translation_table[t->state] == UNIT_ACTIVE)
+ base = UNIT(t)->inactive_exit_timestamp.monotonic;
+ else
+ base = ts.monotonic;
+ break;
+
+ case TIMER_BOOT:
+ if (detect_container() <= 0) {
+ /* CLOCK_MONOTONIC equals the uptime on Linux */
+ base = 0;
+ break;
+ }
+ /* In a container we don't want to include the time the host
+ * was already up when the container started, so count from
+ * our own startup. */
+ _fallthrough_;
+ case TIMER_STARTUP:
+ base = UNIT(t)->manager->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic;
+ break;
+
+ case TIMER_UNIT_ACTIVE:
+ leave_around = true;
+ base = trigger->inactive_exit_timestamp.monotonic;
+
+ if (base <= 0)
+ base = t->last_trigger.monotonic;
+
+ if (base <= 0)
+ continue;
+ base = MAX(base, t->last_trigger.monotonic);
+
+ break;
+
+ case TIMER_UNIT_INACTIVE:
+ leave_around = true;
+ base = trigger->inactive_enter_timestamp.monotonic;
+
+ if (base <= 0)
+ base = t->last_trigger.monotonic;
+
+ if (base <= 0)
+ continue;
+ base = MAX(base, t->last_trigger.monotonic);
+
+ break;
+
+ default:
+ assert_not_reached("Unknown timer base");
+ }
+
+ v->next_elapse = usec_add(usec_shift_clock(base, CLOCK_MONOTONIC, TIMER_MONOTONIC_CLOCK(t)), v->value);
+
+ if (dual_timestamp_is_set(&t->last_trigger) &&
+ !time_change &&
+ v->next_elapse < triple_timestamp_by_clock(&ts, TIMER_MONOTONIC_CLOCK(t)) &&
+ IN_SET(v->base, TIMER_ACTIVE, TIMER_BOOT, TIMER_STARTUP)) {
+ /* This is a one time trigger, disable it now */
+ v->disabled = true;
+ continue;
+ }
+
+ if (!found_monotonic)
+ t->next_elapse_monotonic_or_boottime = v->next_elapse;
+ else
+ t->next_elapse_monotonic_or_boottime = MIN(t->next_elapse_monotonic_or_boottime, v->next_elapse);
+
+ found_monotonic = true;
+ }
+ }
+
+ if (!found_monotonic && !found_realtime) {
+ log_unit_debug(UNIT(t), "Timer is elapsed.");
+ timer_enter_elapsed(t, leave_around);
+ return;
+ }
+
+ if (found_monotonic) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ usec_t left;
+
+ add_random(t, &t->next_elapse_monotonic_or_boottime);
+
+ left = usec_sub_unsigned(t->next_elapse_monotonic_or_boottime, triple_timestamp_by_clock(&ts, TIMER_MONOTONIC_CLOCK(t)));
+ log_unit_debug(UNIT(t), "Monotonic timer elapses in %s.", format_timespan(buf, sizeof(buf), left, 0));
+
+ if (t->monotonic_event_source) {
+ r = sd_event_source_set_time(t->monotonic_event_source, t->next_elapse_monotonic_or_boottime);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_enabled(t->monotonic_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ goto fail;
+ } else {
+
+ r = sd_event_add_time(
+ UNIT(t)->manager->event,
+ &t->monotonic_event_source,
+ t->wake_system ? CLOCK_BOOTTIME_ALARM : CLOCK_MONOTONIC,
+ t->next_elapse_monotonic_or_boottime, t->accuracy_usec,
+ timer_dispatch, t);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(t->monotonic_event_source, "timer-monotonic");
+ }
+
+ } else if (t->monotonic_event_source) {
+
+ r = sd_event_source_set_enabled(t->monotonic_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ goto fail;
+ }
+
+ if (found_realtime) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+
+ add_random(t, &t->next_elapse_realtime);
+
+ log_unit_debug(UNIT(t), "Realtime timer elapses at %s.", format_timestamp(buf, sizeof(buf), t->next_elapse_realtime));
+
+ if (t->realtime_event_source) {
+ r = sd_event_source_set_time(t->realtime_event_source, t->next_elapse_realtime);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_enabled(t->realtime_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ goto fail;
+ } else {
+ r = sd_event_add_time(
+ UNIT(t)->manager->event,
+ &t->realtime_event_source,
+ t->wake_system ? CLOCK_REALTIME_ALARM : CLOCK_REALTIME,
+ t->next_elapse_realtime, t->accuracy_usec,
+ timer_dispatch, t);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(t->realtime_event_source, "timer-realtime");
+ }
+
+ } else if (t->realtime_event_source) {
+
+ r = sd_event_source_set_enabled(t->realtime_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ goto fail;
+ }
+
+ timer_set_state(t, TIMER_WAITING);
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(t), r, "Failed to enter waiting state: %m");
+ timer_enter_dead(t, TIMER_FAILURE_RESOURCES);
+}
+
+static void timer_enter_running(Timer *t) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Unit *trigger;
+ int r;
+
+ assert(t);
+
+ /* Don't start job if we are supposed to go down */
+ if (unit_stop_pending(UNIT(t)))
+ return;
+
+ trigger = UNIT_TRIGGER(UNIT(t));
+ if (!trigger) {
+ log_unit_error(UNIT(t), "Unit to trigger vanished.");
+ timer_enter_dead(t, TIMER_FAILURE_RESOURCES);
+ return;
+ }
+
+ r = manager_add_job(UNIT(t)->manager, JOB_START, trigger, JOB_REPLACE, &error, NULL);
+ if (r < 0)
+ goto fail;
+
+ dual_timestamp_get(&t->last_trigger);
+
+ if (t->stamp_path)
+ touch_file(t->stamp_path, true, t->last_trigger.realtime, UID_INVALID, GID_INVALID, MODE_INVALID);
+
+ timer_set_state(t, TIMER_RUNNING);
+ return;
+
+fail:
+ log_unit_warning(UNIT(t), "Failed to queue unit startup job: %s", bus_error_message(&error, r));
+ timer_enter_dead(t, TIMER_FAILURE_RESOURCES);
+}
+
+static int timer_start(Unit *u) {
+ Timer *t = TIMER(u);
+ TimerValue *v;
+ Unit *trigger;
+ int r;
+
+ assert(t);
+ assert(IN_SET(t->state, TIMER_DEAD, TIMER_FAILED));
+
+ trigger = UNIT_TRIGGER(u);
+ if (!trigger || trigger->load_state != UNIT_LOADED) {
+ log_unit_error(u, "Refusing to start, unit to trigger not loaded.");
+ return -ENOENT;
+ }
+
+ r = unit_start_limit_test(u);
+ if (r < 0) {
+ timer_enter_dead(t, TIMER_FAILURE_START_LIMIT_HIT);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ t->last_trigger = DUAL_TIMESTAMP_NULL;
+
+ /* Reenable all timers that depend on unit activation time */
+ LIST_FOREACH(value, v, t->values)
+ if (v->base == TIMER_ACTIVE)
+ v->disabled = false;
+
+ if (t->stamp_path) {
+ struct stat st;
+
+ if (stat(t->stamp_path, &st) >= 0) {
+ usec_t ft;
+
+ /* Load the file timestamp, but only if it is actually in the past. If it is in the future,
+ * something is wrong with the system clock. */
+
+ ft = timespec_load(&st.st_mtim);
+ if (ft < now(CLOCK_REALTIME))
+ t->last_trigger.realtime = ft;
+ else {
+ char z[FORMAT_TIMESTAMP_MAX];
+
+ log_unit_warning(u, "Not using persistent file timestamp %s as it is in the future.",
+ format_timestamp(z, sizeof(z), ft));
+ }
+
+ } else if (errno == ENOENT)
+ /* The timer has never run before,
+ * make sure a stamp file exists.
+ */
+ (void) touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
+ }
+
+ t->result = TIMER_SUCCESS;
+ timer_enter_waiting(t, false);
+ return 1;
+}
+
+static int timer_stop(Unit *u) {
+ Timer *t = TIMER(u);
+
+ assert(t);
+ assert(IN_SET(t->state, TIMER_WAITING, TIMER_RUNNING, TIMER_ELAPSED));
+
+ timer_enter_dead(t, TIMER_SUCCESS);
+ return 1;
+}
+
+static int timer_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Timer *t = TIMER(u);
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", timer_state_to_string(t->state));
+ (void) serialize_item(f, "result", timer_result_to_string(t->result));
+
+ if (t->last_trigger.realtime > 0)
+ (void) serialize_usec(f, "last-trigger-realtime", t->last_trigger.realtime);
+
+ if (t->last_trigger.monotonic > 0)
+ (void) serialize_usec(f, "last-trigger-monotonic", t->last_trigger.monotonic);
+
+ return 0;
+}
+
+static int timer_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Timer *t = TIMER(u);
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ TimerState state;
+
+ state = timer_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ t->deserialized_state = state;
+
+ } else if (streq(key, "result")) {
+ TimerResult f;
+
+ f = timer_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != TIMER_SUCCESS)
+ t->result = f;
+
+ } else if (streq(key, "last-trigger-realtime"))
+ (void) deserialize_usec(value, &t->last_trigger.realtime);
+ else if (streq(key, "last-trigger-monotonic"))
+ (void) deserialize_usec(value, &t->last_trigger.monotonic);
+ else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState timer_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[TIMER(u)->state];
+}
+
+_pure_ static const char *timer_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return timer_state_to_string(TIMER(u)->state);
+}
+
+static int timer_dispatch(sd_event_source *s, uint64_t usec, void *userdata) {
+ Timer *t = TIMER(userdata);
+
+ assert(t);
+
+ if (t->state != TIMER_WAITING)
+ return 0;
+
+ log_unit_debug(UNIT(t), "Timer elapsed.");
+ timer_enter_running(t);
+ return 0;
+}
+
+static void timer_trigger_notify(Unit *u, Unit *other) {
+ Timer *t = TIMER(u);
+ TimerValue *v;
+
+ assert(u);
+ assert(other);
+
+ if (other->load_state != UNIT_LOADED)
+ return;
+
+ /* Reenable all timers that depend on unit state */
+ LIST_FOREACH(value, v, t->values)
+ if (IN_SET(v->base, TIMER_UNIT_ACTIVE, TIMER_UNIT_INACTIVE))
+ v->disabled = false;
+
+ switch (t->state) {
+
+ case TIMER_WAITING:
+ case TIMER_ELAPSED:
+
+ /* Recalculate sleep time */
+ timer_enter_waiting(t, false);
+ break;
+
+ case TIMER_RUNNING:
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other))) {
+ log_unit_debug(UNIT(t), "Got notified about unit deactivation.");
+ timer_enter_waiting(t, false);
+ }
+ break;
+
+ case TIMER_DEAD:
+ case TIMER_FAILED:
+ break;
+
+ default:
+ assert_not_reached("Unknown timer state");
+ }
+}
+
+static void timer_reset_failed(Unit *u) {
+ Timer *t = TIMER(u);
+
+ assert(t);
+
+ if (t->state == TIMER_FAILED)
+ timer_set_state(t, TIMER_DEAD);
+
+ t->result = TIMER_SUCCESS;
+}
+
+static void timer_time_change(Unit *u) {
+ Timer *t = TIMER(u);
+ usec_t ts;
+
+ assert(u);
+
+ if (t->state != TIMER_WAITING)
+ return;
+
+ /* If we appear to have triggered in the future, the system clock must
+ * have been set backwards. So let's rewind our own clock and allow
+ * the future trigger(s) to happen again :). Exactly the same as when
+ * you start a timer unit with Persistent=yes. */
+ ts = now(CLOCK_REALTIME);
+ if (t->last_trigger.realtime > ts)
+ t->last_trigger.realtime = ts;
+
+ log_unit_debug(u, "Time change, recalculating next elapse.");
+ timer_enter_waiting(t, true);
+}
+
+static void timer_timezone_change(Unit *u) {
+ Timer *t = TIMER(u);
+
+ assert(u);
+
+ if (t->state != TIMER_WAITING)
+ return;
+
+ log_unit_debug(u, "Timezone change, recalculating next elapse.");
+ timer_enter_waiting(t, false);
+}
+
+static const char* const timer_base_table[_TIMER_BASE_MAX] = {
+ [TIMER_ACTIVE] = "OnActiveSec",
+ [TIMER_BOOT] = "OnBootSec",
+ [TIMER_STARTUP] = "OnStartupSec",
+ [TIMER_UNIT_ACTIVE] = "OnUnitActiveSec",
+ [TIMER_UNIT_INACTIVE] = "OnUnitInactiveSec",
+ [TIMER_CALENDAR] = "OnCalendar"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(timer_base, TimerBase);
+
+static const char* const timer_result_table[_TIMER_RESULT_MAX] = {
+ [TIMER_SUCCESS] = "success",
+ [TIMER_FAILURE_RESOURCES] = "resources",
+ [TIMER_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(timer_result, TimerResult);
+
+const UnitVTable timer_vtable = {
+ .object_size = sizeof(Timer),
+
+ .sections =
+ "Unit\0"
+ "Timer\0"
+ "Install\0",
+ .private_section = "Timer",
+
+ .init = timer_init,
+ .done = timer_done,
+ .load = timer_load,
+
+ .coldplug = timer_coldplug,
+
+ .dump = timer_dump,
+
+ .start = timer_start,
+ .stop = timer_stop,
+
+ .serialize = timer_serialize,
+ .deserialize_item = timer_deserialize_item,
+
+ .active_state = timer_active_state,
+ .sub_state_to_string = timer_sub_state_to_string,
+
+ .trigger_notify = timer_trigger_notify,
+
+ .reset_failed = timer_reset_failed,
+ .time_change = timer_time_change,
+ .timezone_change = timer_timezone_change,
+
+ .bus_vtable = bus_timer_vtable,
+ .bus_set_property = bus_timer_set_property,
+
+ .can_transient = true,
+};
diff --git a/src/core/timer.h b/src/core/timer.h
new file mode 100644
index 0000000..833aadb
--- /dev/null
+++ b/src/core/timer.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Timer Timer;
+
+#include "calendarspec.h"
+#include "unit.h"
+
+typedef enum TimerBase {
+ TIMER_ACTIVE,
+ TIMER_BOOT,
+ TIMER_STARTUP,
+ TIMER_UNIT_ACTIVE,
+ TIMER_UNIT_INACTIVE,
+ TIMER_CALENDAR,
+ _TIMER_BASE_MAX,
+ _TIMER_BASE_INVALID = -1
+} TimerBase;
+
+typedef struct TimerValue {
+ TimerBase base;
+ bool disabled;
+
+ usec_t value; /* only for monotonic events */
+ CalendarSpec *calendar_spec; /* only for calendar events */
+ usec_t next_elapse;
+
+ LIST_FIELDS(struct TimerValue, value);
+} TimerValue;
+
+typedef enum TimerResult {
+ TIMER_SUCCESS,
+ TIMER_FAILURE_RESOURCES,
+ TIMER_FAILURE_START_LIMIT_HIT,
+ _TIMER_RESULT_MAX,
+ _TIMER_RESULT_INVALID = -1
+} TimerResult;
+
+struct Timer {
+ Unit meta;
+
+ usec_t accuracy_usec;
+ usec_t random_usec;
+
+ LIST_HEAD(TimerValue, values);
+ usec_t next_elapse_realtime;
+ usec_t next_elapse_monotonic_or_boottime;
+ dual_timestamp last_trigger;
+
+ TimerState state, deserialized_state;
+
+ sd_event_source *monotonic_event_source;
+ sd_event_source *realtime_event_source;
+
+ TimerResult result;
+
+ bool persistent;
+ bool wake_system;
+ bool remain_after_elapse;
+
+ char *stamp_path;
+};
+
+#define TIMER_MONOTONIC_CLOCK(t) ((t)->wake_system && clock_boottime_supported() ? CLOCK_BOOTTIME_ALARM : CLOCK_MONOTONIC)
+
+void timer_free_values(Timer *t);
+
+extern const UnitVTable timer_vtable;
+
+const char *timer_base_to_string(TimerBase i) _const_;
+TimerBase timer_base_from_string(const char *s) _pure_;
+
+const char* timer_result_to_string(TimerResult i) _const_;
+TimerResult timer_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(TIMER, Timer);
diff --git a/src/core/transaction.c b/src/core/transaction.c
new file mode 100644
index 0000000..2418332
--- /dev/null
+++ b/src/core/transaction.c
@@ -0,0 +1,1119 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "terminal-util.h"
+#include "transaction.h"
+#include "dbus-unit.h"
+
+static void transaction_unlink_job(Transaction *tr, Job *j, bool delete_dependencies);
+
+static void transaction_delete_job(Transaction *tr, Job *j, bool delete_dependencies) {
+ assert(tr);
+ assert(j);
+
+ /* Deletes one job from the transaction */
+
+ transaction_unlink_job(tr, j, delete_dependencies);
+
+ job_free(j);
+}
+
+static void transaction_delete_unit(Transaction *tr, Unit *u) {
+ Job *j;
+
+ /* Deletes all jobs associated with a certain unit from the
+ * transaction */
+
+ while ((j = hashmap_get(tr->jobs, u)))
+ transaction_delete_job(tr, j, true);
+}
+
+void transaction_abort(Transaction *tr) {
+ Job *j;
+
+ assert(tr);
+
+ while ((j = hashmap_first(tr->jobs)))
+ transaction_delete_job(tr, j, false);
+
+ assert(hashmap_isempty(tr->jobs));
+}
+
+static void transaction_find_jobs_that_matter_to_anchor(Job *j, unsigned generation) {
+ JobDependency *l;
+
+ /* A recursive sweep through the graph that marks all units
+ * that matter to the anchor job, i.e. are directly or
+ * indirectly a dependency of the anchor job via paths that
+ * are fully marked as mattering. */
+
+ j->matters_to_anchor = true;
+ j->generation = generation;
+
+ LIST_FOREACH(subject, l, j->subject_list) {
+
+ /* This link does not matter */
+ if (!l->matters)
+ continue;
+
+ /* This unit has already been marked */
+ if (l->object->generation == generation)
+ continue;
+
+ transaction_find_jobs_that_matter_to_anchor(l->object, generation);
+ }
+}
+
+static void transaction_merge_and_delete_job(Transaction *tr, Job *j, Job *other, JobType t) {
+ JobDependency *l, *last;
+
+ assert(j);
+ assert(other);
+ assert(j->unit == other->unit);
+ assert(!j->installed);
+
+ /* Merges 'other' into 'j' and then deletes 'other'. */
+
+ j->type = t;
+ j->state = JOB_WAITING;
+ j->irreversible = j->irreversible || other->irreversible;
+ j->matters_to_anchor = j->matters_to_anchor || other->matters_to_anchor;
+
+ /* Patch us in as new owner of the JobDependency objects */
+ last = NULL;
+ LIST_FOREACH(subject, l, other->subject_list) {
+ assert(l->subject == other);
+ l->subject = j;
+ last = l;
+ }
+
+ /* Merge both lists */
+ if (last) {
+ last->subject_next = j->subject_list;
+ if (j->subject_list)
+ j->subject_list->subject_prev = last;
+ j->subject_list = other->subject_list;
+ }
+
+ /* Patch us in as new owner of the JobDependency objects */
+ last = NULL;
+ LIST_FOREACH(object, l, other->object_list) {
+ assert(l->object == other);
+ l->object = j;
+ last = l;
+ }
+
+ /* Merge both lists */
+ if (last) {
+ last->object_next = j->object_list;
+ if (j->object_list)
+ j->object_list->object_prev = last;
+ j->object_list = other->object_list;
+ }
+
+ /* Kill the other job */
+ other->subject_list = NULL;
+ other->object_list = NULL;
+ transaction_delete_job(tr, other, true);
+}
+
+_pure_ static bool job_is_conflicted_by(Job *j) {
+ JobDependency *l;
+
+ assert(j);
+
+ /* Returns true if this job is pulled in by a least one
+ * ConflictedBy dependency. */
+
+ LIST_FOREACH(object, l, j->object_list)
+ if (l->conflicts)
+ return true;
+
+ return false;
+}
+
+static int delete_one_unmergeable_job(Transaction *tr, Job *j) {
+ Job *k;
+
+ assert(j);
+
+ /* Tries to delete one item in the linked list
+ * j->transaction_next->transaction_next->... that conflicts
+ * with another one, in an attempt to make an inconsistent
+ * transaction work. */
+
+ /* We rely here on the fact that if a merged with b does not
+ * merge with c, either a or b merge with c neither */
+ LIST_FOREACH(transaction, j, j)
+ LIST_FOREACH(transaction, k, j->transaction_next) {
+ Job *d;
+
+ /* Is this one mergeable? Then skip it */
+ if (job_type_is_mergeable(j->type, k->type))
+ continue;
+
+ /* Ok, we found two that conflict, let's see if we can
+ * drop one of them */
+ if (!j->matters_to_anchor && !k->matters_to_anchor) {
+
+ /* Both jobs don't matter, so let's
+ * find the one that is smarter to
+ * remove. Let's think positive and
+ * rather remove stops then starts --
+ * except if something is being
+ * stopped because it is conflicted by
+ * another unit in which case we
+ * rather remove the start. */
+
+ log_unit_debug(j->unit,
+ "Looking at job %s/%s conflicted_by=%s",
+ j->unit->id, job_type_to_string(j->type),
+ yes_no(j->type == JOB_STOP && job_is_conflicted_by(j)));
+ log_unit_debug(k->unit,
+ "Looking at job %s/%s conflicted_by=%s",
+ k->unit->id, job_type_to_string(k->type),
+ yes_no(k->type == JOB_STOP && job_is_conflicted_by(k)));
+
+ if (j->type == JOB_STOP) {
+
+ if (job_is_conflicted_by(j))
+ d = k;
+ else
+ d = j;
+
+ } else if (k->type == JOB_STOP) {
+
+ if (job_is_conflicted_by(k))
+ d = j;
+ else
+ d = k;
+ } else
+ d = j;
+
+ } else if (!j->matters_to_anchor)
+ d = j;
+ else if (!k->matters_to_anchor)
+ d = k;
+ else
+ return -ENOEXEC;
+
+ /* Ok, we can drop one, so let's do so. */
+ log_unit_debug(d->unit,
+ "Fixing conflicting jobs %s/%s,%s/%s by deleting job %s/%s",
+ j->unit->id, job_type_to_string(j->type),
+ k->unit->id, job_type_to_string(k->type),
+ d->unit->id, job_type_to_string(d->type));
+ transaction_delete_job(tr, d, true);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int transaction_merge_jobs(Transaction *tr, sd_bus_error *e) {
+ Job *j;
+ Iterator i;
+ int r;
+
+ assert(tr);
+
+ /* First step, check whether any of the jobs for one specific
+ * task conflict. If so, try to drop one of them. */
+ HASHMAP_FOREACH(j, tr->jobs, i) {
+ JobType t;
+ Job *k;
+
+ t = j->type;
+ LIST_FOREACH(transaction, k, j->transaction_next) {
+ if (job_type_merge_and_collapse(&t, k->type, j->unit) >= 0)
+ continue;
+
+ /* OK, we could not merge all jobs for this
+ * action. Let's see if we can get rid of one
+ * of them */
+
+ r = delete_one_unmergeable_job(tr, j);
+ if (r >= 0)
+ /* Ok, we managed to drop one, now
+ * let's ask our callers to call us
+ * again after garbage collecting */
+ return -EAGAIN;
+
+ /* We couldn't merge anything. Failure */
+ return sd_bus_error_setf(e, BUS_ERROR_TRANSACTION_JOBS_CONFLICTING,
+ "Transaction contains conflicting jobs '%s' and '%s' for %s. "
+ "Probably contradicting requirement dependencies configured.",
+ job_type_to_string(t),
+ job_type_to_string(k->type),
+ k->unit->id);
+ }
+ }
+
+ /* Second step, merge the jobs. */
+ HASHMAP_FOREACH(j, tr->jobs, i) {
+ JobType t = j->type;
+ Job *k;
+
+ /* Merge all transaction jobs for j->unit */
+ LIST_FOREACH(transaction, k, j->transaction_next)
+ assert_se(job_type_merge_and_collapse(&t, k->type, j->unit) == 0);
+
+ while ((k = j->transaction_next)) {
+ if (tr->anchor_job == k) {
+ transaction_merge_and_delete_job(tr, k, j, t);
+ j = k;
+ } else
+ transaction_merge_and_delete_job(tr, j, k, t);
+ }
+
+ assert(!j->transaction_next);
+ assert(!j->transaction_prev);
+ }
+
+ return 0;
+}
+
+static void transaction_drop_redundant(Transaction *tr) {
+ Job *j;
+ Iterator i;
+
+ /* Goes through the transaction and removes all jobs of the units
+ * whose jobs are all noops. If not all of a unit's jobs are
+ * redundant, they are kept. */
+
+ assert(tr);
+
+rescan:
+ HASHMAP_FOREACH(j, tr->jobs, i) {
+ Job *k;
+
+ LIST_FOREACH(transaction, k, j) {
+
+ if (tr->anchor_job == k ||
+ !job_type_is_redundant(k->type, unit_active_state(k->unit)) ||
+ (k->unit->job && job_type_is_conflicting(k->type, k->unit->job->type)))
+ goto next_unit;
+ }
+
+ log_trace("Found redundant job %s/%s, dropping from transaction.", j->unit->id, job_type_to_string(j->type));
+ transaction_delete_job(tr, j, false);
+ goto rescan;
+ next_unit:;
+ }
+}
+
+_pure_ static bool unit_matters_to_anchor(Unit *u, Job *j) {
+ assert(u);
+ assert(!j->transaction_prev);
+
+ /* Checks whether at least one of the jobs for this unit
+ * matters to the anchor. */
+
+ LIST_FOREACH(transaction, j, j)
+ if (j->matters_to_anchor)
+ return true;
+
+ return false;
+}
+
+static char* merge_unit_ids(const char* unit_log_field, char **pairs) {
+ char **unit_id, **job_type, *ans = NULL;
+ size_t alloc = 0, size = 0, next;
+
+ STRV_FOREACH_PAIR(unit_id, job_type, pairs) {
+ next = strlen(unit_log_field) + strlen(*unit_id);
+ if (!GREEDY_REALLOC(ans, alloc, size + next + 1)) {
+ return mfree(ans);
+ }
+
+ sprintf(ans + size, "%s%s", unit_log_field, *unit_id);
+ if (*(unit_id+1))
+ ans[size + next] = '\n';
+ size += next + 1;
+ }
+
+ return ans;
+}
+
+static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsigned generation, sd_bus_error *e) {
+ Iterator i;
+ Unit *u;
+ void *v;
+ int r;
+
+ assert(tr);
+ assert(j);
+ assert(!j->transaction_prev);
+
+ /* Does a recursive sweep through the ordering graph, looking
+ * for a cycle. If we find a cycle we try to break it. */
+
+ /* Have we seen this before? */
+ if (j->generation == generation) {
+ Job *k, *delete = NULL;
+ _cleanup_free_ char **array = NULL, *unit_ids = NULL;
+ char **unit_id, **job_type;
+
+ /* If the marker is NULL we have been here already and
+ * decided the job was loop-free from here. Hence
+ * shortcut things and return right-away. */
+ if (!j->marker)
+ return 0;
+
+ /* So, the marker is not NULL and we already have been here. We have
+ * a cycle. Let's try to break it. We go backwards in our path and
+ * try to find a suitable job to remove. We use the marker to find
+ * our way back, since smart how we are we stored our way back in
+ * there. */
+
+ for (k = from; k; k = ((k->generation == generation && k->marker != k) ? k->marker : NULL)) {
+
+ /* For logging below */
+ if (strv_push_pair(&array, k->unit->id, (char*) job_type_to_string(k->type)) < 0)
+ log_oom();
+
+ if (!delete && hashmap_get(tr->jobs, k->unit) && !unit_matters_to_anchor(k->unit, k))
+ /* Ok, we can drop this one, so let's do so. */
+ delete = k;
+
+ /* Check if this in fact was the beginning of the cycle */
+ if (k == j)
+ break;
+ }
+
+ unit_ids = merge_unit_ids(j->manager->unit_log_field, array); /* ignore error */
+
+ STRV_FOREACH_PAIR(unit_id, job_type, array)
+ /* logging for j not k here to provide a consistent narrative */
+ log_struct(LOG_WARNING,
+ "MESSAGE=%s: Found %s on %s/%s",
+ j->unit->id,
+ unit_id == array ? "ordering cycle" : "dependency",
+ *unit_id, *job_type,
+ unit_ids);
+
+ if (delete) {
+ const char *status;
+ /* logging for j not k here to provide a consistent narrative */
+ log_struct(LOG_ERR,
+ "MESSAGE=%s: Job %s/%s deleted to break ordering cycle starting with %s/%s",
+ j->unit->id, delete->unit->id, job_type_to_string(delete->type),
+ j->unit->id, job_type_to_string(j->type),
+ unit_ids);
+
+ if (log_get_show_color())
+ status = ANSI_HIGHLIGHT_RED " SKIP " ANSI_NORMAL;
+ else
+ status = " SKIP ";
+
+ unit_status_printf(delete->unit, status,
+ "Ordering cycle found, skipping %s");
+ transaction_delete_unit(tr, delete->unit);
+ return -EAGAIN;
+ }
+
+ log_struct(LOG_ERR,
+ "MESSAGE=%s: Unable to break cycle starting with %s/%s",
+ j->unit->id, j->unit->id, job_type_to_string(j->type),
+ unit_ids);
+
+ return sd_bus_error_setf(e, BUS_ERROR_TRANSACTION_ORDER_IS_CYCLIC,
+ "Transaction order is cyclic. See system logs for details.");
+ }
+
+ /* Make the marker point to where we come from, so that we can
+ * find our way backwards if we want to break a cycle. We use
+ * a special marker for the beginning: we point to
+ * ourselves. */
+ j->marker = from ? from : j;
+ j->generation = generation;
+
+ /* We assume that the dependencies are bidirectional, and
+ * hence can ignore UNIT_AFTER */
+ HASHMAP_FOREACH_KEY(v, u, j->unit->dependencies[UNIT_BEFORE], i) {
+ Job *o;
+
+ /* Is there a job for this unit? */
+ o = hashmap_get(tr->jobs, u);
+ if (!o) {
+ /* Ok, there is no job for this in the
+ * transaction, but maybe there is already one
+ * running? */
+ o = u->job;
+ if (!o)
+ continue;
+ }
+
+ r = transaction_verify_order_one(tr, o, j, generation, e);
+ if (r < 0)
+ return r;
+ }
+
+ /* Ok, let's backtrack, and remember that this entry is not on
+ * our path anymore. */
+ j->marker = NULL;
+
+ return 0;
+}
+
+static int transaction_verify_order(Transaction *tr, unsigned *generation, sd_bus_error *e) {
+ Job *j;
+ int r;
+ Iterator i;
+ unsigned g;
+
+ assert(tr);
+ assert(generation);
+
+ /* Check if the ordering graph is cyclic. If it is, try to fix
+ * that up by dropping one of the jobs. */
+
+ g = (*generation)++;
+
+ HASHMAP_FOREACH(j, tr->jobs, i) {
+ r = transaction_verify_order_one(tr, j, NULL, g, e);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static void transaction_collect_garbage(Transaction *tr) {
+ Iterator i;
+ Job *j;
+
+ assert(tr);
+
+ /* Drop jobs that are not required by any other job */
+
+rescan:
+ HASHMAP_FOREACH(j, tr->jobs, i) {
+ if (tr->anchor_job == j)
+ continue;
+ if (j->object_list) {
+ log_trace("Keeping job %s/%s because of %s/%s",
+ j->unit->id, job_type_to_string(j->type),
+ j->object_list->subject ? j->object_list->subject->unit->id : "root",
+ j->object_list->subject ? job_type_to_string(j->object_list->subject->type) : "root");
+ continue;
+ }
+
+ log_trace("Garbage collecting job %s/%s", j->unit->id, job_type_to_string(j->type));
+ transaction_delete_job(tr, j, true);
+ goto rescan;
+ }
+}
+
+static int transaction_is_destructive(Transaction *tr, JobMode mode, sd_bus_error *e) {
+ Iterator i;
+ Job *j;
+
+ assert(tr);
+
+ /* Checks whether applying this transaction means that
+ * existing jobs would be replaced */
+
+ HASHMAP_FOREACH(j, tr->jobs, i) {
+
+ /* Assume merged */
+ assert(!j->transaction_prev);
+ assert(!j->transaction_next);
+
+ if (j->unit->job && (mode == JOB_FAIL || j->unit->job->irreversible) &&
+ job_type_is_conflicting(j->unit->job->type, j->type))
+ return sd_bus_error_setf(e, BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE,
+ "Transaction for %s/%s is destructive (%s has '%s' job queued, but '%s' is included in transaction).",
+ tr->anchor_job->unit->id, job_type_to_string(tr->anchor_job->type),
+ j->unit->id, job_type_to_string(j->unit->job->type), job_type_to_string(j->type));
+ }
+
+ return 0;
+}
+
+static void transaction_minimize_impact(Transaction *tr) {
+ Job *j;
+ Iterator i;
+
+ assert(tr);
+
+ /* Drops all unnecessary jobs that reverse already active jobs
+ * or that stop a running service. */
+
+rescan:
+ HASHMAP_FOREACH(j, tr->jobs, i) {
+ LIST_FOREACH(transaction, j, j) {
+ bool stops_running_service, changes_existing_job;
+
+ /* If it matters, we shouldn't drop it */
+ if (j->matters_to_anchor)
+ continue;
+
+ /* Would this stop a running service?
+ * Would this change an existing job?
+ * If so, let's drop this entry */
+
+ stops_running_service =
+ j->type == JOB_STOP && UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(j->unit));
+
+ changes_existing_job =
+ j->unit->job &&
+ job_type_is_conflicting(j->type, j->unit->job->type);
+
+ if (!stops_running_service && !changes_existing_job)
+ continue;
+
+ if (stops_running_service)
+ log_unit_debug(j->unit,
+ "%s/%s would stop a running service.",
+ j->unit->id, job_type_to_string(j->type));
+
+ if (changes_existing_job)
+ log_unit_debug(j->unit,
+ "%s/%s would change existing job.",
+ j->unit->id, job_type_to_string(j->type));
+
+ /* Ok, let's get rid of this */
+ log_unit_debug(j->unit,
+ "Deleting %s/%s to minimize impact.",
+ j->unit->id, job_type_to_string(j->type));
+
+ transaction_delete_job(tr, j, true);
+ goto rescan;
+ }
+ }
+}
+
+static int transaction_apply(Transaction *tr, Manager *m, JobMode mode) {
+ Iterator i;
+ Job *j;
+ int r;
+
+ /* Moves the transaction jobs to the set of active jobs */
+
+ if (IN_SET(mode, JOB_ISOLATE, JOB_FLUSH)) {
+
+ /* When isolating first kill all installed jobs which
+ * aren't part of the new transaction */
+ HASHMAP_FOREACH(j, m->jobs, i) {
+ assert(j->installed);
+
+ if (j->unit->ignore_on_isolate)
+ continue;
+
+ if (hashmap_get(tr->jobs, j->unit))
+ continue;
+
+ /* Not invalidating recursively. Avoids triggering
+ * OnFailure= actions of dependent jobs. Also avoids
+ * invalidating our iterator. */
+ job_finish_and_invalidate(j, JOB_CANCELED, false, false);
+ }
+ }
+
+ HASHMAP_FOREACH(j, tr->jobs, i) {
+ /* Assume merged */
+ assert(!j->transaction_prev);
+ assert(!j->transaction_next);
+
+ r = hashmap_put(m->jobs, UINT32_TO_PTR(j->id), j);
+ if (r < 0)
+ goto rollback;
+ }
+
+ while ((j = hashmap_steal_first(tr->jobs))) {
+ Job *installed_job;
+
+ /* Clean the job dependencies */
+ transaction_unlink_job(tr, j, false);
+
+ installed_job = job_install(j);
+ if (installed_job != j) {
+ /* j has been merged into a previously installed job */
+ if (tr->anchor_job == j)
+ tr->anchor_job = installed_job;
+ hashmap_remove(m->jobs, UINT32_TO_PTR(j->id));
+ job_free(j);
+ j = installed_job;
+ }
+
+ job_add_to_run_queue(j);
+ job_add_to_dbus_queue(j);
+ job_start_timer(j, false);
+ job_shutdown_magic(j);
+ }
+
+ return 0;
+
+rollback:
+
+ HASHMAP_FOREACH(j, tr->jobs, i)
+ hashmap_remove(m->jobs, UINT32_TO_PTR(j->id));
+
+ return r;
+}
+
+int transaction_activate(Transaction *tr, Manager *m, JobMode mode, sd_bus_error *e) {
+ Iterator i;
+ Job *j;
+ int r;
+ unsigned generation = 1;
+
+ assert(tr);
+
+ /* This applies the changes recorded in tr->jobs to
+ * the actual list of jobs, if possible. */
+
+ /* Reset the generation counter of all installed jobs. The detection of cycles
+ * looks at installed jobs. If they had a non-zero generation from some previous
+ * walk of the graph, the algorithm would break. */
+ HASHMAP_FOREACH(j, m->jobs, i)
+ j->generation = 0;
+
+ /* First step: figure out which jobs matter */
+ transaction_find_jobs_that_matter_to_anchor(tr->anchor_job, generation++);
+
+ /* Second step: Try not to stop any running services if
+ * we don't have to. Don't try to reverse running
+ * jobs if we don't have to. */
+ if (mode == JOB_FAIL)
+ transaction_minimize_impact(tr);
+
+ /* Third step: Drop redundant jobs */
+ transaction_drop_redundant(tr);
+
+ for (;;) {
+ /* Fourth step: Let's remove unneeded jobs that might
+ * be lurking. */
+ if (mode != JOB_ISOLATE)
+ transaction_collect_garbage(tr);
+
+ /* Fifth step: verify order makes sense and correct
+ * cycles if necessary and possible */
+ r = transaction_verify_order(tr, &generation, e);
+ if (r >= 0)
+ break;
+
+ if (r != -EAGAIN)
+ return log_warning_errno(r, "Requested transaction contains an unfixable cyclic ordering dependency: %s", bus_error_message(e, r));
+
+ /* Let's see if the resulting transaction ordering
+ * graph is still cyclic... */
+ }
+
+ for (;;) {
+ /* Sixth step: let's drop unmergeable entries if
+ * necessary and possible, merge entries we can
+ * merge */
+ r = transaction_merge_jobs(tr, e);
+ if (r >= 0)
+ break;
+
+ if (r != -EAGAIN)
+ return log_warning_errno(r, "Requested transaction contains unmergeable jobs: %s", bus_error_message(e, r));
+
+ /* Seventh step: an entry got dropped, let's garbage
+ * collect its dependencies. */
+ if (mode != JOB_ISOLATE)
+ transaction_collect_garbage(tr);
+
+ /* Let's see if the resulting transaction still has
+ * unmergeable entries ... */
+ }
+
+ /* Eights step: Drop redundant jobs again, if the merging now allows us to drop more. */
+ transaction_drop_redundant(tr);
+
+ /* Ninth step: check whether we can actually apply this */
+ r = transaction_is_destructive(tr, mode, e);
+ if (r < 0)
+ return log_notice_errno(r, "Requested transaction contradicts existing jobs: %s", bus_error_message(e, r));
+
+ /* Tenth step: apply changes */
+ r = transaction_apply(tr, m, mode);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to apply transaction: %m");
+
+ assert(hashmap_isempty(tr->jobs));
+
+ if (!hashmap_isempty(m->jobs)) {
+ /* Are there any jobs now? Then make sure we have the
+ * idle pipe around. We don't really care too much
+ * whether this works or not, as the idle pipe is a
+ * feature for cosmetics, not actually useful for
+ * anything beyond that. */
+
+ if (m->idle_pipe[0] < 0 && m->idle_pipe[1] < 0 &&
+ m->idle_pipe[2] < 0 && m->idle_pipe[3] < 0) {
+ (void) pipe2(m->idle_pipe, O_NONBLOCK|O_CLOEXEC);
+ (void) pipe2(m->idle_pipe + 2, O_NONBLOCK|O_CLOEXEC);
+ }
+ }
+
+ return 0;
+}
+
+static Job* transaction_add_one_job(Transaction *tr, JobType type, Unit *unit, bool *is_new) {
+ Job *j, *f;
+
+ assert(tr);
+ assert(unit);
+
+ /* Looks for an existing prospective job and returns that. If
+ * it doesn't exist it is created and added to the prospective
+ * jobs list. */
+
+ f = hashmap_get(tr->jobs, unit);
+
+ LIST_FOREACH(transaction, j, f) {
+ assert(j->unit == unit);
+
+ if (j->type == type) {
+ if (is_new)
+ *is_new = false;
+ return j;
+ }
+ }
+
+ j = job_new(unit, type);
+ if (!j)
+ return NULL;
+
+ j->generation = 0;
+ j->marker = NULL;
+ j->matters_to_anchor = false;
+ j->irreversible = tr->irreversible;
+
+ LIST_PREPEND(transaction, f, j);
+
+ if (hashmap_replace(tr->jobs, unit, f) < 0) {
+ LIST_REMOVE(transaction, f, j);
+ job_free(j);
+ return NULL;
+ }
+
+ if (is_new)
+ *is_new = true;
+
+ log_trace("Added job %s/%s to transaction.", unit->id, job_type_to_string(type));
+
+ return j;
+}
+
+static void transaction_unlink_job(Transaction *tr, Job *j, bool delete_dependencies) {
+ assert(tr);
+ assert(j);
+
+ if (j->transaction_prev)
+ j->transaction_prev->transaction_next = j->transaction_next;
+ else if (j->transaction_next)
+ hashmap_replace(tr->jobs, j->unit, j->transaction_next);
+ else
+ hashmap_remove_value(tr->jobs, j->unit, j);
+
+ if (j->transaction_next)
+ j->transaction_next->transaction_prev = j->transaction_prev;
+
+ j->transaction_prev = j->transaction_next = NULL;
+
+ while (j->subject_list)
+ job_dependency_free(j->subject_list);
+
+ while (j->object_list) {
+ Job *other = j->object_list->matters ? j->object_list->subject : NULL;
+
+ job_dependency_free(j->object_list);
+
+ if (other && delete_dependencies) {
+ log_unit_debug(other->unit,
+ "Deleting job %s/%s as dependency of job %s/%s",
+ other->unit->id, job_type_to_string(other->type),
+ j->unit->id, job_type_to_string(j->type));
+ transaction_delete_job(tr, other, delete_dependencies);
+ }
+ }
+}
+
+void transaction_add_propagate_reload_jobs(Transaction *tr, Unit *unit, Job *by, bool ignore_order, sd_bus_error *e) {
+ Iterator i;
+ JobType nt;
+ Unit *dep;
+ void *v;
+ int r;
+
+ assert(tr);
+ assert(unit);
+
+ HASHMAP_FOREACH_KEY(v, dep, unit->dependencies[UNIT_PROPAGATES_RELOAD_TO], i) {
+ nt = job_type_collapse(JOB_TRY_RELOAD, dep);
+ if (nt == JOB_NOP)
+ continue;
+
+ r = transaction_add_job_and_dependencies(tr, nt, dep, by, false, false, false, ignore_order, e);
+ if (r < 0) {
+ log_unit_warning(dep,
+ "Cannot add dependency reload job, ignoring: %s",
+ bus_error_message(e, r));
+ sd_bus_error_free(e);
+ }
+ }
+}
+
+int transaction_add_job_and_dependencies(
+ Transaction *tr,
+ JobType type,
+ Unit *unit,
+ Job *by,
+ bool matters,
+ bool conflicts,
+ bool ignore_requirements,
+ bool ignore_order,
+ sd_bus_error *e) {
+
+ bool is_new;
+ Iterator i;
+ Unit *dep;
+ Job *ret;
+ void *v;
+ int r;
+
+ assert(tr);
+ assert(type < _JOB_TYPE_MAX);
+ assert(type < _JOB_TYPE_MAX_IN_TRANSACTION);
+ assert(unit);
+
+ /* Before adding jobs for this unit, let's ensure that its state has been loaded
+ * This matters when jobs are spawned as part of coldplugging itself (see e. g. path_coldplug()).
+ * This way, we "recursively" coldplug units, ensuring that we do not look at state of
+ * not-yet-coldplugged units. */
+ if (MANAGER_IS_RELOADING(unit->manager))
+ unit_coldplug(unit);
+
+ if (by)
+ log_trace("Pulling in %s/%s from %s/%s", unit->id, job_type_to_string(type), by->unit->id, job_type_to_string(by->type));
+
+ /* Safety check that the unit is a valid state, i.e. not in UNIT_STUB or UNIT_MERGED which should only be set
+ * temporarily. */
+ if (!IN_SET(unit->load_state, UNIT_LOADED, UNIT_ERROR, UNIT_NOT_FOUND, UNIT_BAD_SETTING, UNIT_MASKED))
+ return sd_bus_error_setf(e, BUS_ERROR_LOAD_FAILED, "Unit %s is not loaded properly.", unit->id);
+
+ if (type != JOB_STOP) {
+ r = bus_unit_validate_load_state(unit, e);
+ if (r < 0)
+ return r;
+ }
+
+ if (!unit_job_is_applicable(unit, type))
+ return sd_bus_error_setf(e, BUS_ERROR_JOB_TYPE_NOT_APPLICABLE,
+ "Job type %s is not applicable for unit %s.",
+ job_type_to_string(type), unit->id);
+
+ /* First add the job. */
+ ret = transaction_add_one_job(tr, type, unit, &is_new);
+ if (!ret)
+ return -ENOMEM;
+
+ ret->ignore_order = ret->ignore_order || ignore_order;
+
+ /* Then, add a link to the job. */
+ if (by) {
+ if (!job_dependency_new(by, ret, matters, conflicts))
+ return -ENOMEM;
+ } else {
+ /* If the job has no parent job, it is the anchor job. */
+ assert(!tr->anchor_job);
+ tr->anchor_job = ret;
+ }
+
+ if (is_new && !ignore_requirements && type != JOB_NOP) {
+ Set *following;
+
+ /* If we are following some other unit, make sure we
+ * add all dependencies of everybody following. */
+ if (unit_following_set(ret->unit, &following) > 0) {
+ SET_FOREACH(dep, following, i) {
+ r = transaction_add_job_and_dependencies(tr, type, dep, ret, false, false, false, ignore_order, e);
+ if (r < 0) {
+ log_unit_full(dep,
+ r == -ERFKILL ? LOG_INFO : LOG_WARNING,
+ r, "Cannot add dependency job, ignoring: %s",
+ bus_error_message(e, r));
+ sd_bus_error_free(e);
+ }
+ }
+
+ set_free(following);
+ }
+
+ /* Finally, recursively add in all dependencies. */
+ if (IN_SET(type, JOB_START, JOB_RESTART)) {
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[UNIT_REQUIRES], i) {
+ r = transaction_add_job_and_dependencies(tr, JOB_START, dep, ret, true, false, false, ignore_order, e);
+ if (r < 0) {
+ if (r != -EBADR) /* job type not applicable */
+ goto fail;
+
+ sd_bus_error_free(e);
+ }
+ }
+
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[UNIT_BINDS_TO], i) {
+ r = transaction_add_job_and_dependencies(tr, JOB_START, dep, ret, true, false, false, ignore_order, e);
+ if (r < 0) {
+ if (r != -EBADR) /* job type not applicable */
+ goto fail;
+
+ sd_bus_error_free(e);
+ }
+ }
+
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[UNIT_WANTS], i) {
+ r = transaction_add_job_and_dependencies(tr, JOB_START, dep, ret, false, false, false, ignore_order, e);
+ if (r < 0) {
+ /* unit masked, job type not applicable and unit not found are not considered as errors. */
+ log_unit_full(dep,
+ IN_SET(r, -ERFKILL, -EBADR, -ENOENT) ? LOG_DEBUG : LOG_WARNING,
+ r, "Cannot add dependency job, ignoring: %s",
+ bus_error_message(e, r));
+ sd_bus_error_free(e);
+ }
+ }
+
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[UNIT_REQUISITE], i) {
+ r = transaction_add_job_and_dependencies(tr, JOB_VERIFY_ACTIVE, dep, ret, true, false, false, ignore_order, e);
+ if (r < 0) {
+ if (r != -EBADR) /* job type not applicable */
+ goto fail;
+
+ sd_bus_error_free(e);
+ }
+ }
+
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[UNIT_CONFLICTS], i) {
+ r = transaction_add_job_and_dependencies(tr, JOB_STOP, dep, ret, true, true, false, ignore_order, e);
+ if (r < 0) {
+ if (r != -EBADR) /* job type not applicable */
+ goto fail;
+
+ sd_bus_error_free(e);
+ }
+ }
+
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[UNIT_CONFLICTED_BY], i) {
+ r = transaction_add_job_and_dependencies(tr, JOB_STOP, dep, ret, false, false, false, ignore_order, e);
+ if (r < 0) {
+ log_unit_warning(dep,
+ "Cannot add dependency job, ignoring: %s",
+ bus_error_message(e, r));
+ sd_bus_error_free(e);
+ }
+ }
+
+ }
+
+ if (IN_SET(type, JOB_STOP, JOB_RESTART)) {
+ static const UnitDependency propagate_deps[] = {
+ UNIT_REQUIRED_BY,
+ UNIT_REQUISITE_OF,
+ UNIT_BOUND_BY,
+ UNIT_CONSISTS_OF,
+ };
+
+ JobType ptype;
+ unsigned j;
+
+ /* We propagate STOP as STOP, but RESTART only
+ * as TRY_RESTART, in order not to start
+ * dependencies that are not around. */
+ ptype = type == JOB_RESTART ? JOB_TRY_RESTART : type;
+
+ for (j = 0; j < ELEMENTSOF(propagate_deps); j++)
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[propagate_deps[j]], i) {
+ JobType nt;
+
+ nt = job_type_collapse(ptype, dep);
+ if (nt == JOB_NOP)
+ continue;
+
+ r = transaction_add_job_and_dependencies(tr, nt, dep, ret, true, false, false, ignore_order, e);
+ if (r < 0) {
+ if (r != -EBADR) /* job type not applicable */
+ goto fail;
+
+ sd_bus_error_free(e);
+ }
+ }
+ }
+
+ if (type == JOB_RELOAD)
+ transaction_add_propagate_reload_jobs(tr, ret->unit, ret, ignore_order, e);
+
+ /* JOB_VERIFY_ACTIVE requires no dependency handling */
+ }
+
+ return 0;
+
+fail:
+ return r;
+}
+
+int transaction_add_isolate_jobs(Transaction *tr, Manager *m) {
+ Iterator i;
+ Unit *u;
+ char *k;
+ int r;
+
+ assert(tr);
+ assert(m);
+
+ HASHMAP_FOREACH_KEY(u, k, m->units, i) {
+
+ /* ignore aliases */
+ if (u->id != k)
+ continue;
+
+ if (u->ignore_on_isolate)
+ continue;
+
+ /* No need to stop inactive jobs */
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u)) && !u->job)
+ continue;
+
+ /* Is there already something listed for this? */
+ if (hashmap_get(tr->jobs, u))
+ continue;
+
+ r = transaction_add_job_and_dependencies(tr, JOB_STOP, u, tr->anchor_job, true, false, false, false, NULL);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Cannot add isolate job, ignoring: %m");
+ }
+
+ return 0;
+}
+
+Transaction *transaction_new(bool irreversible) {
+ Transaction *tr;
+
+ tr = new0(Transaction, 1);
+ if (!tr)
+ return NULL;
+
+ tr->jobs = hashmap_new(NULL);
+ if (!tr->jobs)
+ return mfree(tr);
+
+ tr->irreversible = irreversible;
+
+ return tr;
+}
+
+void transaction_free(Transaction *tr) {
+ assert(hashmap_isempty(tr->jobs));
+ hashmap_free(tr->jobs);
+ free(tr);
+}
diff --git a/src/core/transaction.h b/src/core/transaction.h
new file mode 100644
index 0000000..70d74a4
--- /dev/null
+++ b/src/core/transaction.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Transaction Transaction;
+
+#include "hashmap.h"
+#include "job.h"
+#include "manager.h"
+#include "unit.h"
+
+struct Transaction {
+ /* Jobs to be added */
+ Hashmap *jobs; /* Unit object => Job object list 1:1 */
+ Job *anchor_job; /* the job the user asked for */
+ bool irreversible;
+};
+
+Transaction *transaction_new(bool irreversible);
+void transaction_free(Transaction *tr);
+
+void transaction_add_propagate_reload_jobs(Transaction *tr, Unit *unit, Job *by, bool ignore_order, sd_bus_error *e);
+int transaction_add_job_and_dependencies(
+ Transaction *tr,
+ JobType type,
+ Unit *unit,
+ Job *by,
+ bool matters,
+ bool conflicts,
+ bool ignore_requirements,
+ bool ignore_order,
+ sd_bus_error *e);
+int transaction_activate(Transaction *tr, Manager *m, JobMode mode, sd_bus_error *e);
+int transaction_add_isolate_jobs(Transaction *tr, Manager *m);
+void transaction_abort(Transaction *tr);
diff --git a/src/core/triggers.systemd.in b/src/core/triggers.systemd.in
new file mode 100644
index 0000000..10b1889
--- /dev/null
+++ b/src/core/triggers.systemd.in
@@ -0,0 +1,143 @@
+# -*- Mode: rpm-spec; indent-tabs-mode: nil -*- */
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+# Copyright © 2018 Neal Gompa
+
+# The contents of this are an example to be copied into systemd.spec.
+#
+# Minimum rpm version supported: 4.13.0
+
+%transfiletriggerin -P 900900 -p <lua> -- @systemunitdir@ /etc/systemd/system
+-- This script will run after any package is initially installed or
+-- upgraded. We care about the case where a package is initially
+-- installed, because other cases are covered by the *un scriptlets,
+-- so sometimes we will reload needlessly.
+
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/systemctl", "daemon-reload"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerun -p <lua> -- @systemunitdir@ /etc/systemd/system
+-- On removal, we need to run daemon-reload after any units have been
+-- removed. %transfiletriggerpostun would be ideal, but it does not get
+-- executed for some reason.
+-- On upgrade, we need to run daemon-reload after any new unit files
+-- have been installed, but before %postun scripts in packages get
+-- executed. %transfiletriggerun gets the right list of files
+-- but it is invoked too early (before changes happen).
+-- %filetriggerpostun happens at the right time, but it fires for
+-- every package.
+-- To execute the reload at the right time, we create a state
+-- file in %transfiletriggerun and execute the daemon-reload in
+-- the first %filetriggerpostun.
+
+if posix.access("/run/systemd/system") then
+ posix.mkdir("%{_localstatedir}/lib")
+ posix.mkdir("%{_localstatedir}/lib/rpm-state")
+ posix.mkdir("%{_localstatedir}/lib/rpm-state/systemd")
+ io.open("%{_localstatedir}/lib/rpm-state/systemd/needs-reload", "w")
+end
+
+%filetriggerpostun -P 1000100 -p <lua> -- @systemunitdir@ /etc/systemd/system
+if posix.access("%{_localstatedir}/lib/rpm-state/systemd/needs-reload") then
+ posix.unlink("%{_localstatedir}/lib/rpm-state/systemd/needs-reload")
+ posix.rmdir("%{_localstatedir}/lib/rpm-state/systemd")
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/systemctl", "daemon-reload"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -P 100700 -p <lua> -- @sysusersdir@
+-- This script will process files installed in @sysusersdir@ to create
+-- specified users automatically. The priority is set such that it
+-- will run before the tmpfiles file trigger.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/systemd-sysusers"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -P 100500 -p <lua> -- @tmpfilesdir@
+-- This script will process files installed in @tmpfilesdir@ to create
+-- tmpfiles automatically. The priority is set such that it will run
+-- after the sysusers file trigger, but before any other triggers.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/systemd-tmpfiles", "--create"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -p <lua> -- @udevhwdbdir@
+-- This script will automatically invoke hwdb update if files have been
+-- installed or updated in @udevhwdbdir@.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/systemd-hwdb", "update"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -p <lua> -- @catalogdir@
+-- This script will automatically invoke journal catalog update if files
+-- have been installed or updated in @catalogdir@.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/journalctl", "--update-catalog"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -p <lua> -- @udevrulesdir@
+-- This script will automatically update udev with new rules if files
+-- have been installed or updated in @udevrulesdir@.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/udevadm", "control", "--reload"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -p <lua> -- @sysctldir@
+-- This script will automatically apply sysctl rules if files have been
+-- installed or updated in @sysctldir@.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("@rootlibexecdir@/systemd-sysctl"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -p <lua> -- @binfmtdir@
+-- This script will automatically apply binfmt rules if files have been
+-- installed or updated in @binfmtdir@.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("@rootlibexecdir@/systemd-binfmt"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
diff --git a/src/core/umount.c b/src/core/umount.c
new file mode 100644
index 0000000..7af0195
--- /dev/null
+++ b/src/core/umount.c
@@ -0,0 +1,699 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2010 ProFUSION embedded systems
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/loop.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/swap.h>
+
+/* This needs to be after sys/mount.h :( */
+#include <libmount.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "def.h"
+#include "device-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fstab-util.h"
+#include "linux-3.13/dm-ioctl.h"
+#include "mount-setup.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "umount.h"
+#include "util.h"
+#include "virt.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct libmnt_table*, mnt_free_table);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct libmnt_iter*, mnt_free_iter);
+
+static void mount_point_free(MountPoint **head, MountPoint *m) {
+ assert(head);
+ assert(m);
+
+ LIST_REMOVE(mount_point, *head, m);
+
+ free(m->path);
+ free(m->remount_options);
+ free(m);
+}
+
+void mount_points_list_free(MountPoint **head) {
+ assert(head);
+
+ while (*head)
+ mount_point_free(head, *head);
+}
+
+int mount_points_list_get(const char *mountinfo, MountPoint **head) {
+ _cleanup_(mnt_free_tablep) struct libmnt_table *t = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *i = NULL;
+ int r;
+
+ assert(head);
+
+ t = mnt_new_table();
+ i = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!t || !i)
+ return log_oom();
+
+ r = mnt_table_parse_mtab(t, mountinfo);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s: %m", mountinfo);
+
+ for (;;) {
+ struct libmnt_fs *fs;
+ const char *path, *fstype;
+ _cleanup_free_ char *options = NULL;
+ _cleanup_free_ char *p = NULL;
+ unsigned long remount_flags = 0u;
+ _cleanup_free_ char *remount_options = NULL;
+ bool try_remount_ro;
+ MountPoint *m;
+
+ r = mnt_table_next_fs(t, i, &fs);
+ if (r == 1)
+ break;
+ if (r < 0)
+ return log_error_errno(r, "Failed to get next entry from %s: %m", mountinfo);
+
+ path = mnt_fs_get_target(fs);
+ if (!path)
+ continue;
+
+ if (cunescape(path, UNESCAPE_RELAX, &p) < 0)
+ return log_oom();
+
+ fstype = mnt_fs_get_fstype(fs);
+
+ /* Combine the generic VFS options with the FS-specific
+ * options. Duplicates are not a problem here, because the only
+ * options that should come up twice are typically ro/rw, which
+ * are turned into MS_RDONLY or the invertion of it.
+ *
+ * Even if there are duplicates later in mount_option_mangle()
+ * it shouldn't hurt anyways as they override each other.
+ */
+ if (!strextend_with_separator(&options, ",",
+ mnt_fs_get_vfs_options(fs),
+ NULL))
+ return log_oom();
+ if (!strextend_with_separator(&options, ",",
+ mnt_fs_get_fs_options(fs),
+ NULL))
+ return log_oom();
+
+ /* Ignore mount points we can't unmount because they
+ * are API or because we are keeping them open (like
+ * /dev/console). Also, ignore all mounts below API
+ * file systems, since they are likely virtual too,
+ * and hence not worth spending time on. Also, in
+ * unprivileged containers we might lack the rights to
+ * unmount these things, hence don't bother. */
+ if (mount_point_is_api(p) ||
+ mount_point_ignore(p) ||
+ PATH_STARTSWITH_SET(p, "/dev", "/sys", "/proc"))
+ continue;
+
+ /* If we are in a container, don't attempt to
+ * read-only mount anything as that brings no real
+ * benefits, but might confuse the host, as we remount
+ * the superblock here, not the bind mount.
+ *
+ * If the filesystem is a network fs, also skip the
+ * remount. It brings no value (we cannot leave
+ * a "dirty fs") and could hang if the network is down.
+ * Note that umount2() is more careful and will not
+ * hang because of the network being down. */
+ try_remount_ro = detect_container() <= 0 &&
+ !fstype_is_network(fstype) &&
+ !fstype_is_api_vfs(fstype) &&
+ !fstype_is_ro(fstype) &&
+ !fstab_test_yes_no_option(options, "ro\0rw\0");
+
+ if (try_remount_ro) {
+ /* mount(2) states that mount flags and options need to be exactly the same
+ * as they were when the filesystem was mounted, except for the desired
+ * changes. So we reconstruct both here and adjust them for the later
+ * remount call too. */
+
+ r = mnt_fs_get_propagation(fs, &remount_flags);
+ if (r < 0) {
+ log_warning_errno(r, "mnt_fs_get_propagation() failed for %s, ignoring: %m", path);
+ continue;
+ }
+
+ r = mount_option_mangle(options, remount_flags, &remount_flags, &remount_options);
+ if (r < 0) {
+ log_warning_errno(r, "mount_option_mangle failed for %s, ignoring: %m", path);
+ continue;
+ }
+
+ /* MS_BIND is special. If it is provided it will only make the mount-point
+ * read-only. If left out, the super block itself is remounted, which we want. */
+ remount_flags = (remount_flags|MS_REMOUNT|MS_RDONLY) & ~MS_BIND;
+ }
+
+ m = new0(MountPoint, 1);
+ if (!m)
+ return log_oom();
+
+ free_and_replace(m->path, p);
+ free_and_replace(m->remount_options, remount_options);
+ m->remount_flags = remount_flags;
+ m->try_remount_ro = try_remount_ro;
+
+ LIST_PREPEND(mount_point, *head, m);
+ }
+
+ return 0;
+}
+
+int swap_list_get(const char *swaps, MountPoint **head) {
+ _cleanup_(mnt_free_tablep) struct libmnt_table *t = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *i = NULL;
+ int r;
+
+ assert(head);
+
+ t = mnt_new_table();
+ i = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!t || !i)
+ return log_oom();
+
+ r = mnt_table_parse_swaps(t, swaps);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s: %m", swaps);
+
+ for (;;) {
+ struct libmnt_fs *fs;
+
+ MountPoint *swap;
+ const char *source;
+ _cleanup_free_ char *d = NULL;
+
+ r = mnt_table_next_fs(t, i, &fs);
+ if (r == 1)
+ break;
+ if (r < 0)
+ return log_error_errno(r, "Failed to get next entry from %s: %m", swaps);
+
+ source = mnt_fs_get_source(fs);
+ if (!source)
+ continue;
+
+ r = cunescape(source, UNESCAPE_RELAX, &d);
+ if (r < 0)
+ return r;
+
+ swap = new0(MountPoint, 1);
+ if (!swap)
+ return -ENOMEM;
+
+ free_and_replace(swap->path, d);
+ LIST_PREPEND(mount_point, *head, swap);
+ }
+
+ return 0;
+}
+
+static int loopback_list_get(MountPoint **head) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r;
+
+ assert(head);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(e, "block", true);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_sysname(e, "loop*");
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_sysattr(e, "loop/backing_file", NULL, true);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ _cleanup_free_ char *p = NULL;
+ const char *dn;
+ MountPoint *lb;
+
+ if (sd_device_get_devname(d, &dn) < 0)
+ continue;
+
+ p = strdup(dn);
+ if (!p)
+ return -ENOMEM;
+
+ lb = new(MountPoint, 1);
+ if (!lb)
+ return -ENOMEM;
+
+ *lb = (MountPoint) {
+ .path = TAKE_PTR(p),
+ };
+
+ LIST_PREPEND(mount_point, *head, lb);
+ }
+
+ return 0;
+}
+
+static int dm_list_get(MountPoint **head) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r;
+
+ assert(head);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(e, "block", true);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_sysname(e, "dm-*");
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ _cleanup_free_ char *p = NULL;
+ const char *dn;
+ MountPoint *m;
+ dev_t devnum;
+
+ if (sd_device_get_devnum(d, &devnum) < 0 ||
+ sd_device_get_devname(d, &dn) < 0)
+ continue;
+
+ p = strdup(dn);
+ if (!p)
+ return -ENOMEM;
+
+ m = new(MountPoint, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (MountPoint) {
+ .path = TAKE_PTR(p),
+ .devnum = devnum,
+ };
+
+ LIST_PREPEND(mount_point, *head, m);
+ }
+
+ return 0;
+}
+
+static int delete_loopback(const char *device) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(device);
+
+ fd = open(device, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ r = ioctl(fd, LOOP_CLR_FD, 0);
+ if (r >= 0)
+ return 1;
+
+ /* ENXIO: not bound, so no error */
+ if (errno == ENXIO)
+ return 0;
+
+ return -errno;
+}
+
+static int delete_dm(dev_t devnum) {
+
+ struct dm_ioctl dm = {
+ .version = {
+ DM_VERSION_MAJOR,
+ DM_VERSION_MINOR,
+ DM_VERSION_PATCHLEVEL
+ },
+ .data_size = sizeof(dm),
+ .dev = devnum,
+ };
+
+ _cleanup_close_ int fd = -1;
+
+ assert(major(devnum) != 0);
+
+ fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (ioctl(fd, DM_DEV_REMOVE, &dm) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static bool nonunmountable_path(const char *path) {
+ return path_equal(path, "/")
+#if ! HAVE_SPLIT_USR
+ || path_equal(path, "/usr")
+#endif
+ || path_startswith(path, "/run/initramfs");
+}
+
+static int remount_with_timeout(MountPoint *m, int umount_log_level) {
+ pid_t pid;
+ int r;
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ assert(m);
+
+ /* Due to the possiblity of a remount operation hanging, we
+ * fork a child process and set a timeout. If the timeout
+ * lapses, the assumption is that that particular remount
+ * failed. */
+ r = safe_fork("(sd-remount)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_REOPEN_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ log_info("Remounting '%s' read-only in with options '%s'.", m->path, m->remount_options);
+
+ /* Start the mount operation here in the child */
+ r = mount(NULL, m->path, NULL, m->remount_flags, m->remount_options);
+ if (r < 0)
+ log_full_errno(umount_log_level, errno, "Failed to remount '%s' read-only: %m", m->path);
+
+ _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+ }
+
+ r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC);
+ if (r == -ETIMEDOUT) {
+ log_error_errno(r, "Remounting '%s' timed out, issuing SIGKILL to PID " PID_FMT ".", m->path, pid);
+ (void) kill(pid, SIGKILL);
+ } else if (r == -EPROTO)
+ log_debug_errno(r, "Remounting '%s' failed abnormally, child process " PID_FMT " aborted or exited non-zero.", m->path, pid);
+ else if (r < 0)
+ log_error_errno(r, "Remounting '%s' failed unexpectedly, couldn't wait for child process " PID_FMT ": %m", m->path, pid);
+
+ return r;
+}
+
+static int umount_with_timeout(MountPoint *m, int umount_log_level) {
+ pid_t pid;
+ int r;
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ assert(m);
+
+ /* Due to the possiblity of a umount operation hanging, we
+ * fork a child process and set a timeout. If the timeout
+ * lapses, the assumption is that that particular umount
+ * failed. */
+ r = safe_fork("(sd-umount)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_REOPEN_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ log_info("Unmounting '%s'.", m->path);
+
+ /* Start the mount operation here in the child Using MNT_FORCE
+ * causes some filesystems (e.g. FUSE and NFS and other network
+ * filesystems) to abort any pending requests and return -EIO
+ * rather than blocking indefinitely. If the filesysten is
+ * "busy", this may allow processes to die, thus making the
+ * filesystem less busy so the unmount might succeed (rather
+ * then return EBUSY).*/
+ r = umount2(m->path, MNT_FORCE);
+ if (r < 0)
+ log_full_errno(umount_log_level, errno, "Failed to unmount %s: %m", m->path);
+
+ _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+ }
+
+ r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC);
+ if (r == -ETIMEDOUT) {
+ log_error_errno(r, "Unmounting '%s' timed out, issuing SIGKILL to PID " PID_FMT ".", m->path, pid);
+ (void) kill(pid, SIGKILL);
+ } else if (r == -EPROTO)
+ log_debug_errno(r, "Unmounting '%s' failed abnormally, child process " PID_FMT " aborted or exited non-zero.", m->path, pid);
+ else if (r < 0)
+ log_error_errno(r, "Unmounting '%s' failed unexpectedly, couldn't wait for child process " PID_FMT ": %m", m->path, pid);
+
+ return r;
+}
+
+/* This includes remounting readonly, which changes the kernel mount options.
+ * Therefore the list passed to this function is invalidated, and should not be reused. */
+static int mount_points_list_umount(MountPoint **head, bool *changed, int umount_log_level) {
+ MountPoint *m;
+ int n_failed = 0;
+
+ assert(head);
+ assert(changed);
+
+ LIST_FOREACH(mount_point, m, *head) {
+ if (m->try_remount_ro) {
+ /* We always try to remount directories
+ * read-only first, before we go on and umount
+ * them.
+ *
+ * Mount points can be stacked. If a mount
+ * point is stacked below / or /usr, we
+ * cannot umount or remount it directly,
+ * since there is no way to refer to the
+ * underlying mount. There's nothing we can do
+ * about it for the general case, but we can
+ * do something about it if it is aliased
+ * somehwere else via a bind mount. If we
+ * explicitly remount the super block of that
+ * alias read-only we hence should be
+ * relatively safe regarding keeping a dirty fs
+ * we cannot otherwise see.
+ *
+ * Since the remount can hang in the instance of
+ * remote filesystems, we remount asynchronously
+ * and skip the subsequent umount if it fails. */
+ if (remount_with_timeout(m, umount_log_level) < 0) {
+ /* Remount failed, but try unmounting anyway,
+ * unless this is a mount point we want to skip. */
+ if (nonunmountable_path(m->path)) {
+ n_failed++;
+ continue;
+ }
+ }
+ }
+
+ /* Skip / and /usr since we cannot unmount that
+ * anyway, since we are running from it. They have
+ * already been remounted ro. */
+ if (nonunmountable_path(m->path))
+ continue;
+
+ /* Trying to umount */
+ if (umount_with_timeout(m, umount_log_level) < 0)
+ n_failed++;
+ else
+ *changed = true;
+ }
+
+ return n_failed;
+}
+
+static int swap_points_list_off(MountPoint **head, bool *changed) {
+ MountPoint *m, *n;
+ int n_failed = 0;
+
+ assert(head);
+ assert(changed);
+
+ LIST_FOREACH_SAFE(mount_point, m, n, *head) {
+ log_info("Deactivating swap %s.", m->path);
+ if (swapoff(m->path) == 0) {
+ *changed = true;
+ mount_point_free(head, m);
+ } else {
+ log_warning_errno(errno, "Could not deactivate swap %s: %m", m->path);
+ n_failed++;
+ }
+ }
+
+ return n_failed;
+}
+
+static int loopback_points_list_detach(MountPoint **head, bool *changed, int umount_log_level) {
+ MountPoint *m, *n;
+ int n_failed = 0, k;
+ struct stat root_st;
+
+ assert(head);
+ assert(changed);
+
+ k = lstat("/", &root_st);
+
+ LIST_FOREACH_SAFE(mount_point, m, n, *head) {
+ int r;
+ struct stat loopback_st;
+
+ if (k >= 0 &&
+ major(root_st.st_dev) != 0 &&
+ lstat(m->path, &loopback_st) >= 0 &&
+ root_st.st_dev == loopback_st.st_rdev) {
+ n_failed++;
+ continue;
+ }
+
+ log_info("Detaching loopback %s.", m->path);
+ r = delete_loopback(m->path);
+ if (r >= 0) {
+ if (r > 0)
+ *changed = true;
+
+ mount_point_free(head, m);
+ } else {
+ log_full_errno(umount_log_level, errno, "Could not detach loopback %s: %m", m->path);
+ n_failed++;
+ }
+ }
+
+ return n_failed;
+}
+
+static int dm_points_list_detach(MountPoint **head, bool *changed, int umount_log_level) {
+ MountPoint *m, *n;
+ int n_failed = 0, r;
+ dev_t rootdev;
+
+ assert(head);
+ assert(changed);
+
+ r = get_block_device("/", &rootdev);
+ if (r <= 0)
+ rootdev = 0;
+
+ LIST_FOREACH_SAFE(mount_point, m, n, *head) {
+
+ if (major(rootdev) != 0 && rootdev == m->devnum) {
+ n_failed ++;
+ continue;
+ }
+
+ log_info("Detaching DM %u:%u.", major(m->devnum), minor(m->devnum));
+ r = delete_dm(m->devnum);
+ if (r >= 0) {
+ *changed = true;
+ mount_point_free(head, m);
+ } else {
+ log_full_errno(umount_log_level, errno, "Could not detach DM %s: %m", m->path);
+ n_failed++;
+ }
+ }
+
+ return n_failed;
+}
+
+static int umount_all_once(bool *changed, int umount_log_level) {
+ int r;
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, mp_list_head);
+
+ assert(changed);
+
+ LIST_HEAD_INIT(mp_list_head);
+ r = mount_points_list_get(NULL, &mp_list_head);
+ if (r < 0)
+ return r;
+
+ return mount_points_list_umount(&mp_list_head, changed, umount_log_level);
+}
+
+int umount_all(bool *changed, int umount_log_level) {
+ bool umount_changed;
+ int r;
+
+ assert(changed);
+
+ /* Retry umount, until nothing can be umounted anymore. Mounts are
+ * processed in order, newest first. The retries are needed when
+ * an old mount has been moved, to a path inside a newer mount. */
+ do {
+ umount_changed = false;
+
+ r = umount_all_once(&umount_changed, umount_log_level);
+ if (umount_changed)
+ *changed = true;
+ } while (umount_changed);
+
+ return r;
+}
+
+int swapoff_all(bool *changed) {
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, swap_list_head);
+ int r;
+
+ assert(changed);
+
+ LIST_HEAD_INIT(swap_list_head);
+
+ r = swap_list_get(NULL, &swap_list_head);
+ if (r < 0)
+ return r;
+
+ return swap_points_list_off(&swap_list_head, changed);
+}
+
+int loopback_detach_all(bool *changed, int umount_log_level) {
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, loopback_list_head);
+ int r;
+
+ assert(changed);
+
+ LIST_HEAD_INIT(loopback_list_head);
+
+ r = loopback_list_get(&loopback_list_head);
+ if (r < 0)
+ return r;
+
+ return loopback_points_list_detach(&loopback_list_head, changed, umount_log_level);
+}
+
+int dm_detach_all(bool *changed, int umount_log_level) {
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, dm_list_head);
+ int r;
+
+ assert(changed);
+
+ LIST_HEAD_INIT(dm_list_head);
+
+ r = dm_list_get(&dm_list_head);
+ if (r < 0)
+ return r;
+
+ return dm_points_list_detach(&dm_list_head, changed, umount_log_level);
+}
diff --git a/src/core/umount.h b/src/core/umount.h
new file mode 100644
index 0000000..6f2b24d
--- /dev/null
+++ b/src/core/umount.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2010 ProFUSION embedded systems
+***/
+
+#include "list.h"
+
+int umount_all(bool *changed, int umount_log_level);
+
+int swapoff_all(bool *changed);
+
+int loopback_detach_all(bool *changed, int umount_log_level);
+
+int dm_detach_all(bool *changed, int umount_log_level);
+
+/* This is exported just for testing */
+typedef struct MountPoint {
+ char *path;
+ char *remount_options;
+ unsigned long remount_flags;
+ bool try_remount_ro;
+ dev_t devnum;
+ LIST_FIELDS(struct MountPoint, mount_point);
+} MountPoint;
+
+int mount_points_list_get(const char *mountinfo, MountPoint **head);
+void mount_points_list_free(MountPoint **head);
+int swap_list_get(const char *swaps, MountPoint **head);
diff --git a/src/core/unit-printf.c b/src/core/unit-printf.c
new file mode 100644
index 0000000..72391ac
--- /dev/null
+++ b/src/core/unit-printf.c
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "format-util.h"
+#include "macro.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit-printf.h"
+#include "unit.h"
+#include "user-util.h"
+
+static int specifier_prefix_and_instance(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+
+ assert(u);
+
+ return unit_name_to_prefix_and_instance(u->id, ret);
+}
+
+static int specifier_prefix(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+
+ assert(u);
+
+ return unit_name_to_prefix(u->id, ret);
+}
+
+static int specifier_prefix_unescaped(char specifier, const void *data, const void *userdata, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ const Unit *u = userdata;
+ int r;
+
+ assert(u);
+
+ r = unit_name_to_prefix(u->id, &p);
+ if (r < 0)
+ return r;
+
+ return unit_name_unescape(p, ret);
+}
+
+static int specifier_instance_unescaped(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+
+ assert(u);
+
+ return unit_name_unescape(strempty(u->instance), ret);
+}
+
+static int specifier_last_component(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+ _cleanup_free_ char *prefix = NULL;
+ char *dash;
+ int r;
+
+ assert(u);
+
+ r = unit_name_to_prefix(u->id, &prefix);
+ if (r < 0)
+ return r;
+
+ dash = strrchr(prefix, '-');
+ if (dash)
+ return specifier_string(specifier, dash + 1, userdata, ret);
+
+ *ret = TAKE_PTR(prefix);
+ return 0;
+}
+
+static int specifier_last_component_unescaped(char specifier, const void *data, const void *userdata, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = specifier_last_component(specifier, data, userdata, &p);
+ if (r < 0)
+ return r;
+
+ return unit_name_unescape(p, ret);
+}
+
+static int specifier_filename(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+
+ assert(u);
+
+ if (u->instance)
+ return unit_name_path_unescape(u->instance, ret);
+ else
+ return unit_name_to_path(u->id, ret);
+}
+
+static void bad_specifier(const Unit *u, char specifier) {
+ log_unit_warning(u, "Specifier '%%%c' used in unit configuration, which is deprecated. Please update your unit file, as it does not work as intended.", specifier);
+}
+
+static int specifier_cgroup(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+ char *n;
+
+ assert(u);
+
+ bad_specifier(u, specifier);
+
+ if (u->cgroup_path)
+ n = strdup(u->cgroup_path);
+ else
+ n = unit_default_cgroup_path(u);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+static int specifier_cgroup_root(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+ char *n;
+
+ assert(u);
+
+ bad_specifier(u, specifier);
+
+ n = strdup(u->manager->cgroup_root);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+static int specifier_cgroup_slice(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+ char *n;
+
+ assert(u);
+
+ bad_specifier(u, specifier);
+
+ if (UNIT_ISSET(u->slice)) {
+ const Unit *slice;
+
+ slice = UNIT_DEREF(u->slice);
+
+ if (slice->cgroup_path)
+ n = strdup(slice->cgroup_path);
+ else
+ n = unit_default_cgroup_path(slice);
+ } else
+ n = strdup(u->manager->cgroup_root);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+static int specifier_special_directory(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+ char *n = NULL;
+
+ assert(u);
+
+ n = strdup(u->manager->prefix[PTR_TO_UINT(data)]);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int unit_name_printf(Unit *u, const char* format, char **ret) {
+
+ /*
+ * This will use the passed string as format string and replace the following specifiers (which should all be
+ * safe for inclusion in unit names):
+ *
+ * %n: the full id of the unit (foo@bar.waldo)
+ * %N: the id of the unit without the suffix (foo@bar)
+ * %p: the prefix (foo)
+ * %i: the instance (bar)
+ *
+ * %U: the UID of the running user
+ * %u: the username of the running user
+ *
+ * %m: the machine ID of the running system
+ * %H: the host name of the running system
+ * %b: the boot ID of the running system
+ */
+
+ const Specifier table[] = {
+ { 'n', specifier_string, u->id },
+ { 'N', specifier_prefix_and_instance, NULL },
+ { 'p', specifier_prefix, NULL },
+ { 'i', specifier_string, u->instance },
+ { 'j', specifier_last_component, NULL },
+
+ { 'g', specifier_group_name, NULL },
+ { 'G', specifier_group_id, NULL },
+ { 'U', specifier_user_id, NULL },
+ { 'u', specifier_user_name, NULL },
+
+ { 'm', specifier_machine_id, NULL },
+ { 'H', specifier_host_name, NULL },
+ { 'b', specifier_boot_id, NULL },
+ {}
+ };
+
+ assert(u);
+ assert(format);
+ assert(ret);
+
+ return specifier_printf(format, table, u, ret);
+}
+
+int unit_full_printf(Unit *u, const char *format, char **ret) {
+
+ /* This is similar to unit_name_printf() but also supports unescaping. Also, adds a couple of additional codes
+ * (which are likely not suitable for unescaped inclusion in unit names):
+ *
+ * %f: the unescaped instance if set, otherwise the id unescaped as path
+ *
+ * %c: cgroup path of unit (deprecated)
+ * %r: where units in this slice are placed in the cgroup tree (deprecated)
+ * %R: the root of this systemd's instance tree (deprecated)
+ *
+ * %t: the runtime directory root (e.g. /run or $XDG_RUNTIME_DIR)
+ * %S: the state directory root (e.g. /var/lib or $XDG_CONFIG_HOME)
+ * %C: the cache directory root (e.g. /var/cache or $XDG_CACHE_HOME)
+ * %L: the log directory root (e.g. /var/log or $XDG_CONFIG_HOME/log)
+ * %E: the configuration directory root (e.g. /etc or $XDG_CONFIG_HOME)
+ * %T: the temporary directory (e.g. /tmp, or $TMPDIR, $TEMP, $TMP)
+ * %V: the temporary directory for large, persistent stuff (e.g. /var/tmp, or $TMPDIR, $TEMP, $TMP)
+ *
+ * %h: the homedir of the running user
+ * %s: the shell of the running user
+ *
+ * %v: `uname -r` of the running system
+ *
+ * NOTICE: When you add new entries here, please be careful: specifiers which depend on settings of the unit
+ * file itself are broken by design, as they would resolve differently depending on whether they are used
+ * before or after the relevant configuration setting. Hence: don't add them.
+ */
+
+ const Specifier table[] = {
+ { 'n', specifier_string, u->id },
+ { 'N', specifier_prefix_and_instance, NULL },
+ { 'p', specifier_prefix, NULL },
+ { 'P', specifier_prefix_unescaped, NULL },
+ { 'i', specifier_string, u->instance },
+ { 'I', specifier_instance_unescaped, NULL },
+ { 'j', specifier_last_component, NULL },
+ { 'J', specifier_last_component_unescaped, NULL },
+
+ { 'f', specifier_filename, NULL },
+ { 'c', specifier_cgroup, NULL },
+ { 'r', specifier_cgroup_slice, NULL },
+ { 'R', specifier_cgroup_root, NULL },
+
+ { 't', specifier_special_directory, UINT_TO_PTR(EXEC_DIRECTORY_RUNTIME) },
+ { 'S', specifier_special_directory, UINT_TO_PTR(EXEC_DIRECTORY_STATE) },
+ { 'C', specifier_special_directory, UINT_TO_PTR(EXEC_DIRECTORY_CACHE) },
+ { 'L', specifier_special_directory, UINT_TO_PTR(EXEC_DIRECTORY_LOGS) },
+ { 'E', specifier_special_directory, UINT_TO_PTR(EXEC_DIRECTORY_CONFIGURATION) },
+ { 'T', specifier_tmp_dir, NULL },
+ { 'V', specifier_var_tmp_dir, NULL },
+
+ { 'g', specifier_group_name, NULL },
+ { 'G', specifier_group_id, NULL },
+ { 'U', specifier_user_id, NULL },
+ { 'u', specifier_user_name, NULL },
+ { 'h', specifier_user_home, NULL },
+ { 's', specifier_user_shell, NULL },
+
+ { 'm', specifier_machine_id, NULL },
+ { 'H', specifier_host_name, NULL },
+ { 'b', specifier_boot_id, NULL },
+ { 'v', specifier_kernel_release, NULL },
+ {}
+ };
+
+ assert(u);
+ assert(format);
+ assert(ret);
+
+ return specifier_printf(format, table, u, ret);
+}
diff --git a/src/core/unit-printf.h b/src/core/unit-printf.h
new file mode 100644
index 0000000..f3dae15
--- /dev/null
+++ b/src/core/unit-printf.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "unit.h"
+
+int unit_name_printf(Unit *u, const char* text, char **ret);
+int unit_full_printf(Unit *u, const char *text, char **ret);
diff --git a/src/core/unit.c b/src/core/unit.c
new file mode 100644
index 0000000..24b14fb
--- /dev/null
+++ b/src/core/unit.c
@@ -0,0 +1,5593 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+#include "sd-messages.h"
+
+#include "all-units.h"
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "dbus-unit.h"
+#include "dbus.h"
+#include "dropin.h"
+#include "escape.h"
+#include "execute.h"
+#include "fd-util.h"
+#include "fileio-label.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "load-dropin.h"
+#include "load-fragment.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "set.h"
+#include "signal-util.h"
+#include "sparse-endian.h"
+#include "special.h"
+#include "specifier.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "unit-name.h"
+#include "unit.h"
+#include "user-util.h"
+#include "virt.h"
+
+const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX] = {
+ [UNIT_SERVICE] = &service_vtable,
+ [UNIT_SOCKET] = &socket_vtable,
+ [UNIT_TARGET] = &target_vtable,
+ [UNIT_DEVICE] = &device_vtable,
+ [UNIT_MOUNT] = &mount_vtable,
+ [UNIT_AUTOMOUNT] = &automount_vtable,
+ [UNIT_SWAP] = &swap_vtable,
+ [UNIT_TIMER] = &timer_vtable,
+ [UNIT_PATH] = &path_vtable,
+ [UNIT_SLICE] = &slice_vtable,
+ [UNIT_SCOPE] = &scope_vtable,
+};
+
+static void maybe_warn_about_dependency(Unit *u, const char *other, UnitDependency dependency);
+
+Unit *unit_new(Manager *m, size_t size) {
+ Unit *u;
+
+ assert(m);
+ assert(size >= sizeof(Unit));
+
+ u = malloc0(size);
+ if (!u)
+ return NULL;
+
+ u->names = set_new(&string_hash_ops);
+ if (!u->names)
+ return mfree(u);
+
+ u->manager = m;
+ u->type = _UNIT_TYPE_INVALID;
+ u->default_dependencies = true;
+ u->unit_file_state = _UNIT_FILE_STATE_INVALID;
+ u->unit_file_preset = -1;
+ u->on_failure_job_mode = JOB_REPLACE;
+ u->cgroup_inotify_wd = -1;
+ u->job_timeout = USEC_INFINITY;
+ u->job_running_timeout = USEC_INFINITY;
+ u->ref_uid = UID_INVALID;
+ u->ref_gid = GID_INVALID;
+ u->cpu_usage_last = NSEC_INFINITY;
+ u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
+ u->failure_action_exit_status = u->success_action_exit_status = -1;
+
+ u->ip_accounting_ingress_map_fd = -1;
+ u->ip_accounting_egress_map_fd = -1;
+ u->ipv4_allow_map_fd = -1;
+ u->ipv6_allow_map_fd = -1;
+ u->ipv4_deny_map_fd = -1;
+ u->ipv6_deny_map_fd = -1;
+
+ u->last_section_private = -1;
+
+ RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
+ RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
+
+ return u;
+}
+
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret) {
+ _cleanup_(unit_freep) Unit *u = NULL;
+ int r;
+
+ u = unit_new(m, size);
+ if (!u)
+ return -ENOMEM;
+
+ r = unit_add_name(u, name);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(u);
+
+ return r;
+}
+
+bool unit_has_name(const Unit *u, const char *name) {
+ assert(u);
+ assert(name);
+
+ return set_contains(u->names, (char*) name);
+}
+
+static void unit_init(Unit *u) {
+ CGroupContext *cc;
+ ExecContext *ec;
+ KillContext *kc;
+
+ assert(u);
+ assert(u->manager);
+ assert(u->type >= 0);
+
+ cc = unit_get_cgroup_context(u);
+ if (cc) {
+ cgroup_context_init(cc);
+
+ /* Copy in the manager defaults into the cgroup
+ * context, _before_ the rest of the settings have
+ * been initialized */
+
+ cc->cpu_accounting = u->manager->default_cpu_accounting;
+ cc->io_accounting = u->manager->default_io_accounting;
+ cc->ip_accounting = u->manager->default_ip_accounting;
+ cc->blockio_accounting = u->manager->default_blockio_accounting;
+ cc->memory_accounting = u->manager->default_memory_accounting;
+ cc->tasks_accounting = u->manager->default_tasks_accounting;
+ cc->ip_accounting = u->manager->default_ip_accounting;
+
+ if (u->type != UNIT_SLICE)
+ cc->tasks_max = u->manager->default_tasks_max;
+ }
+
+ ec = unit_get_exec_context(u);
+ if (ec) {
+ exec_context_init(ec);
+
+ ec->keyring_mode = MANAGER_IS_SYSTEM(u->manager) ?
+ EXEC_KEYRING_SHARED : EXEC_KEYRING_INHERIT;
+ }
+
+ kc = unit_get_kill_context(u);
+ if (kc)
+ kill_context_init(kc);
+
+ if (UNIT_VTABLE(u)->init)
+ UNIT_VTABLE(u)->init(u);
+}
+
+int unit_add_name(Unit *u, const char *text) {
+ _cleanup_free_ char *s = NULL, *i = NULL;
+ UnitType t;
+ int r;
+
+ assert(u);
+ assert(text);
+
+ if (unit_name_is_valid(text, UNIT_NAME_TEMPLATE)) {
+
+ if (!u->instance)
+ return -EINVAL;
+
+ r = unit_name_replace_instance(text, u->instance, &s);
+ if (r < 0)
+ return r;
+ } else {
+ s = strdup(text);
+ if (!s)
+ return -ENOMEM;
+ }
+
+ if (set_contains(u->names, s))
+ return 0;
+ if (hashmap_contains(u->manager->units, s))
+ return -EEXIST;
+
+ if (!unit_name_is_valid(s, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
+ return -EINVAL;
+
+ t = unit_name_to_type(s);
+ if (t < 0)
+ return -EINVAL;
+
+ if (u->type != _UNIT_TYPE_INVALID && t != u->type)
+ return -EINVAL;
+
+ r = unit_name_to_instance(s, &i);
+ if (r < 0)
+ return r;
+
+ if (i && !unit_type_may_template(t))
+ return -EINVAL;
+
+ /* Ensure that this unit is either instanced or not instanced,
+ * but not both. Note that we do allow names with different
+ * instance names however! */
+ if (u->type != _UNIT_TYPE_INVALID && !u->instance != !i)
+ return -EINVAL;
+
+ if (!unit_type_may_alias(t) && !set_isempty(u->names))
+ return -EEXIST;
+
+ if (hashmap_size(u->manager->units) >= MANAGER_MAX_NAMES)
+ return -E2BIG;
+
+ r = set_put(u->names, s);
+ if (r < 0)
+ return r;
+ assert(r > 0);
+
+ r = hashmap_put(u->manager->units, s, u);
+ if (r < 0) {
+ (void) set_remove(u->names, s);
+ return r;
+ }
+
+ if (u->type == _UNIT_TYPE_INVALID) {
+ u->type = t;
+ u->id = s;
+ u->instance = TAKE_PTR(i);
+
+ LIST_PREPEND(units_by_type, u->manager->units_by_type[t], u);
+
+ unit_init(u);
+ }
+
+ s = NULL;
+
+ unit_add_to_dbus_queue(u);
+ return 0;
+}
+
+int unit_choose_id(Unit *u, const char *name) {
+ _cleanup_free_ char *t = NULL;
+ char *s, *i;
+ int r;
+
+ assert(u);
+ assert(name);
+
+ if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
+
+ if (!u->instance)
+ return -EINVAL;
+
+ r = unit_name_replace_instance(name, u->instance, &t);
+ if (r < 0)
+ return r;
+
+ name = t;
+ }
+
+ /* Selects one of the names of this unit as the id */
+ s = set_get(u->names, (char*) name);
+ if (!s)
+ return -ENOENT;
+
+ /* Determine the new instance from the new id */
+ r = unit_name_to_instance(s, &i);
+ if (r < 0)
+ return r;
+
+ u->id = s;
+
+ free(u->instance);
+ u->instance = i;
+
+ unit_add_to_dbus_queue(u);
+
+ return 0;
+}
+
+int unit_set_description(Unit *u, const char *description) {
+ int r;
+
+ assert(u);
+
+ r = free_and_strdup(&u->description, empty_to_null(description));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ unit_add_to_dbus_queue(u);
+
+ return 0;
+}
+
+bool unit_may_gc(Unit *u) {
+ UnitActiveState state;
+ int r;
+
+ assert(u);
+
+ /* Checks whether the unit is ready to be unloaded for garbage collection.
+ * Returns true when the unit may be collected, and false if there's some
+ * reason to keep it loaded.
+ *
+ * References from other units are *not* checked here. Instead, this is done
+ * in unit_gc_sweep(), but using markers to properly collect dependency loops.
+ */
+
+ if (u->job)
+ return false;
+
+ if (u->nop_job)
+ return false;
+
+ state = unit_active_state(u);
+
+ /* If the unit is inactive and failed and no job is queued for it, then release its runtime resources */
+ if (UNIT_IS_INACTIVE_OR_FAILED(state) &&
+ UNIT_VTABLE(u)->release_resources)
+ UNIT_VTABLE(u)->release_resources(u);
+
+ if (u->perpetual)
+ return false;
+
+ if (sd_bus_track_count(u->bus_track) > 0)
+ return false;
+
+ /* But we keep the unit object around for longer when it is referenced or configured to not be gc'ed */
+ switch (u->collect_mode) {
+
+ case COLLECT_INACTIVE:
+ if (state != UNIT_INACTIVE)
+ return false;
+
+ break;
+
+ case COLLECT_INACTIVE_OR_FAILED:
+ if (!IN_SET(state, UNIT_INACTIVE, UNIT_FAILED))
+ return false;
+
+ break;
+
+ default:
+ assert_not_reached("Unknown garbage collection mode");
+ }
+
+ if (u->cgroup_path) {
+ /* If the unit has a cgroup, then check whether there's anything in it. If so, we should stay
+ * around. Units with active processes should never be collected. */
+
+ r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty: %m", u->cgroup_path);
+ if (r <= 0)
+ return false;
+ }
+
+ if (UNIT_VTABLE(u)->may_gc && !UNIT_VTABLE(u)->may_gc(u))
+ return false;
+
+ return true;
+}
+
+void unit_add_to_load_queue(Unit *u) {
+ assert(u);
+ assert(u->type != _UNIT_TYPE_INVALID);
+
+ if (u->load_state != UNIT_STUB || u->in_load_queue)
+ return;
+
+ LIST_PREPEND(load_queue, u->manager->load_queue, u);
+ u->in_load_queue = true;
+}
+
+void unit_add_to_cleanup_queue(Unit *u) {
+ assert(u);
+
+ if (u->in_cleanup_queue)
+ return;
+
+ LIST_PREPEND(cleanup_queue, u->manager->cleanup_queue, u);
+ u->in_cleanup_queue = true;
+}
+
+void unit_add_to_gc_queue(Unit *u) {
+ assert(u);
+
+ if (u->in_gc_queue || u->in_cleanup_queue)
+ return;
+
+ if (!unit_may_gc(u))
+ return;
+
+ LIST_PREPEND(gc_queue, u->manager->gc_unit_queue, u);
+ u->in_gc_queue = true;
+}
+
+void unit_add_to_dbus_queue(Unit *u) {
+ assert(u);
+ assert(u->type != _UNIT_TYPE_INVALID);
+
+ if (u->load_state == UNIT_STUB || u->in_dbus_queue)
+ return;
+
+ /* Shortcut things if nobody cares */
+ if (sd_bus_track_count(u->manager->subscribed) <= 0 &&
+ sd_bus_track_count(u->bus_track) <= 0 &&
+ set_isempty(u->manager->private_buses)) {
+ u->sent_dbus_new_signal = true;
+ return;
+ }
+
+ LIST_PREPEND(dbus_queue, u->manager->dbus_unit_queue, u);
+ u->in_dbus_queue = true;
+}
+
+void unit_submit_to_stop_when_unneeded_queue(Unit *u) {
+ assert(u);
+
+ if (u->in_stop_when_unneeded_queue)
+ return;
+
+ if (!u->stop_when_unneeded)
+ return;
+
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
+ return;
+
+ LIST_PREPEND(stop_when_unneeded_queue, u->manager->stop_when_unneeded_queue, u);
+ u->in_stop_when_unneeded_queue = true;
+}
+
+static void bidi_set_free(Unit *u, Hashmap *h) {
+ Unit *other;
+ Iterator i;
+ void *v;
+
+ assert(u);
+
+ /* Frees the hashmap and makes sure we are dropped from the inverse pointers */
+
+ HASHMAP_FOREACH_KEY(v, other, h, i) {
+ UnitDependency d;
+
+ for (d = 0; d < _UNIT_DEPENDENCY_MAX; d++)
+ hashmap_remove(other->dependencies[d], u);
+
+ unit_add_to_gc_queue(other);
+ }
+
+ hashmap_free(h);
+}
+
+static void unit_remove_transient(Unit *u) {
+ char **i;
+
+ assert(u);
+
+ if (!u->transient)
+ return;
+
+ if (u->fragment_path)
+ (void) unlink(u->fragment_path);
+
+ STRV_FOREACH(i, u->dropin_paths) {
+ _cleanup_free_ char *p = NULL, *pp = NULL;
+
+ p = dirname_malloc(*i); /* Get the drop-in directory from the drop-in file */
+ if (!p)
+ continue;
+
+ pp = dirname_malloc(p); /* Get the config directory from the drop-in directory */
+ if (!pp)
+ continue;
+
+ /* Only drop transient drop-ins */
+ if (!path_equal(u->manager->lookup_paths.transient, pp))
+ continue;
+
+ (void) unlink(*i);
+ (void) rmdir(p);
+ }
+}
+
+static void unit_free_requires_mounts_for(Unit *u) {
+ assert(u);
+
+ for (;;) {
+ _cleanup_free_ char *path;
+
+ path = hashmap_steal_first_key(u->requires_mounts_for);
+ if (!path)
+ break;
+ else {
+ char s[strlen(path) + 1];
+
+ PATH_FOREACH_PREFIX_MORE(s, path) {
+ char *y;
+ Set *x;
+
+ x = hashmap_get2(u->manager->units_requiring_mounts_for, s, (void**) &y);
+ if (!x)
+ continue;
+
+ (void) set_remove(x, u);
+
+ if (set_isempty(x)) {
+ (void) hashmap_remove(u->manager->units_requiring_mounts_for, y);
+ free(y);
+ set_free(x);
+ }
+ }
+ }
+ }
+
+ u->requires_mounts_for = hashmap_free(u->requires_mounts_for);
+}
+
+static void unit_done(Unit *u) {
+ ExecContext *ec;
+ CGroupContext *cc;
+
+ assert(u);
+
+ if (u->type < 0)
+ return;
+
+ if (UNIT_VTABLE(u)->done)
+ UNIT_VTABLE(u)->done(u);
+
+ ec = unit_get_exec_context(u);
+ if (ec)
+ exec_context_done(ec);
+
+ cc = unit_get_cgroup_context(u);
+ if (cc)
+ cgroup_context_done(cc);
+}
+
+void unit_free(Unit *u) {
+ UnitDependency d;
+ Iterator i;
+ char *t;
+
+ if (!u)
+ return;
+
+ if (UNIT_ISSET(u->slice)) {
+ /* A unit is being dropped from the tree, make sure our parent slice recalculates the member mask */
+ unit_invalidate_cgroup_members_masks(UNIT_DEREF(u->slice));
+
+ /* And make sure the parent is realized again, updating cgroup memberships */
+ unit_add_to_cgroup_realize_queue(UNIT_DEREF(u->slice));
+ }
+
+ u->transient_file = safe_fclose(u->transient_file);
+
+ if (!MANAGER_IS_RELOADING(u->manager))
+ unit_remove_transient(u);
+
+ bus_unit_send_removed_signal(u);
+
+ unit_done(u);
+
+ unit_dequeue_rewatch_pids(u);
+
+ sd_bus_slot_unref(u->match_bus_slot);
+ sd_bus_track_unref(u->bus_track);
+ u->deserialized_refs = strv_free(u->deserialized_refs);
+
+ unit_free_requires_mounts_for(u);
+
+ SET_FOREACH(t, u->names, i)
+ hashmap_remove_value(u->manager->units, t, u);
+
+ if (!sd_id128_is_null(u->invocation_id))
+ hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u);
+
+ if (u->job) {
+ Job *j = u->job;
+ job_uninstall(j);
+ job_free(j);
+ }
+
+ if (u->nop_job) {
+ Job *j = u->nop_job;
+ job_uninstall(j);
+ job_free(j);
+ }
+
+ for (d = 0; d < _UNIT_DEPENDENCY_MAX; d++)
+ bidi_set_free(u, u->dependencies[d]);
+
+ if (u->on_console)
+ manager_unref_console(u->manager);
+
+ unit_release_cgroup(u);
+
+ if (!MANAGER_IS_RELOADING(u->manager))
+ unit_unlink_state_files(u);
+
+ unit_unref_uid_gid(u, false);
+
+ (void) manager_update_failed_units(u->manager, u, false);
+ set_remove(u->manager->startup_units, u);
+
+ unit_unwatch_all_pids(u);
+
+ unit_ref_unset(&u->slice);
+ while (u->refs_by_target)
+ unit_ref_unset(u->refs_by_target);
+
+ if (u->type != _UNIT_TYPE_INVALID)
+ LIST_REMOVE(units_by_type, u->manager->units_by_type[u->type], u);
+
+ if (u->in_load_queue)
+ LIST_REMOVE(load_queue, u->manager->load_queue, u);
+
+ if (u->in_dbus_queue)
+ LIST_REMOVE(dbus_queue, u->manager->dbus_unit_queue, u);
+
+ if (u->in_gc_queue)
+ LIST_REMOVE(gc_queue, u->manager->gc_unit_queue, u);
+
+ if (u->in_cgroup_realize_queue)
+ LIST_REMOVE(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
+
+ if (u->in_cgroup_empty_queue)
+ LIST_REMOVE(cgroup_empty_queue, u->manager->cgroup_empty_queue, u);
+
+ if (u->in_cleanup_queue)
+ LIST_REMOVE(cleanup_queue, u->manager->cleanup_queue, u);
+
+ if (u->in_target_deps_queue)
+ LIST_REMOVE(target_deps_queue, u->manager->target_deps_queue, u);
+
+ if (u->in_stop_when_unneeded_queue)
+ LIST_REMOVE(stop_when_unneeded_queue, u->manager->stop_when_unneeded_queue, u);
+
+ safe_close(u->ip_accounting_ingress_map_fd);
+ safe_close(u->ip_accounting_egress_map_fd);
+
+ safe_close(u->ipv4_allow_map_fd);
+ safe_close(u->ipv6_allow_map_fd);
+ safe_close(u->ipv4_deny_map_fd);
+ safe_close(u->ipv6_deny_map_fd);
+
+ bpf_program_unref(u->ip_bpf_ingress);
+ bpf_program_unref(u->ip_bpf_ingress_installed);
+ bpf_program_unref(u->ip_bpf_egress);
+ bpf_program_unref(u->ip_bpf_egress_installed);
+
+ bpf_program_unref(u->bpf_device_control_installed);
+
+ condition_free_list(u->conditions);
+ condition_free_list(u->asserts);
+
+ free(u->description);
+ strv_free(u->documentation);
+ free(u->fragment_path);
+ free(u->source_path);
+ strv_free(u->dropin_paths);
+ free(u->instance);
+
+ free(u->job_timeout_reboot_arg);
+
+ set_free_free(u->names);
+
+ free(u->reboot_arg);
+
+ free(u);
+}
+
+UnitActiveState unit_active_state(Unit *u) {
+ assert(u);
+
+ if (u->load_state == UNIT_MERGED)
+ return unit_active_state(unit_follow_merge(u));
+
+ /* After a reload it might happen that a unit is not correctly
+ * loaded but still has a process around. That's why we won't
+ * shortcut failed loading to UNIT_INACTIVE_FAILED. */
+
+ return UNIT_VTABLE(u)->active_state(u);
+}
+
+const char* unit_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return UNIT_VTABLE(u)->sub_state_to_string(u);
+}
+
+static int set_complete_move(Set **s, Set **other) {
+ assert(s);
+ assert(other);
+
+ if (!other)
+ return 0;
+
+ if (*s)
+ return set_move(*s, *other);
+ else
+ *s = TAKE_PTR(*other);
+
+ return 0;
+}
+
+static int hashmap_complete_move(Hashmap **s, Hashmap **other) {
+ assert(s);
+ assert(other);
+
+ if (!*other)
+ return 0;
+
+ if (*s)
+ return hashmap_move(*s, *other);
+ else
+ *s = TAKE_PTR(*other);
+
+ return 0;
+}
+
+static int merge_names(Unit *u, Unit *other) {
+ char *t;
+ Iterator i;
+ int r;
+
+ assert(u);
+ assert(other);
+
+ r = set_complete_move(&u->names, &other->names);
+ if (r < 0)
+ return r;
+
+ set_free_free(other->names);
+ other->names = NULL;
+ other->id = NULL;
+
+ SET_FOREACH(t, u->names, i)
+ assert_se(hashmap_replace(u->manager->units, t, u) == 0);
+
+ return 0;
+}
+
+static int reserve_dependencies(Unit *u, Unit *other, UnitDependency d) {
+ unsigned n_reserve;
+
+ assert(u);
+ assert(other);
+ assert(d < _UNIT_DEPENDENCY_MAX);
+
+ /*
+ * If u does not have this dependency set allocated, there is no need
+ * to reserve anything. In that case other's set will be transferred
+ * as a whole to u by complete_move().
+ */
+ if (!u->dependencies[d])
+ return 0;
+
+ /* merge_dependencies() will skip a u-on-u dependency */
+ n_reserve = hashmap_size(other->dependencies[d]) - !!hashmap_get(other->dependencies[d], u);
+
+ return hashmap_reserve(u->dependencies[d], n_reserve);
+}
+
+static void merge_dependencies(Unit *u, Unit *other, const char *other_id, UnitDependency d) {
+ Iterator i;
+ Unit *back;
+ void *v;
+ int r;
+
+ /* Merges all dependencies of type 'd' of the unit 'other' into the deps of the unit 'u' */
+
+ assert(u);
+ assert(other);
+ assert(d < _UNIT_DEPENDENCY_MAX);
+
+ /* Fix backwards pointers. Let's iterate through all dependendent units of the other unit. */
+ HASHMAP_FOREACH_KEY(v, back, other->dependencies[d], i) {
+ UnitDependency k;
+
+ /* Let's now iterate through the dependencies of that dependencies of the other units, looking for
+ * pointers back, and let's fix them up, to instead point to 'u'. */
+
+ for (k = 0; k < _UNIT_DEPENDENCY_MAX; k++) {
+ if (back == u) {
+ /* Do not add dependencies between u and itself. */
+ if (hashmap_remove(back->dependencies[k], other))
+ maybe_warn_about_dependency(u, other_id, k);
+ } else {
+ UnitDependencyInfo di_u, di_other, di_merged;
+
+ /* Let's drop this dependency between "back" and "other", and let's create it between
+ * "back" and "u" instead. Let's merge the bit masks of the dependency we are moving,
+ * and any such dependency which might already exist */
+
+ di_other.data = hashmap_get(back->dependencies[k], other);
+ if (!di_other.data)
+ continue; /* dependency isn't set, let's try the next one */
+
+ di_u.data = hashmap_get(back->dependencies[k], u);
+
+ di_merged = (UnitDependencyInfo) {
+ .origin_mask = di_u.origin_mask | di_other.origin_mask,
+ .destination_mask = di_u.destination_mask | di_other.destination_mask,
+ };
+
+ r = hashmap_remove_and_replace(back->dependencies[k], other, u, di_merged.data);
+ if (r < 0)
+ log_warning_errno(r, "Failed to remove/replace: back=%s other=%s u=%s: %m", back->id, other_id, u->id);
+ assert(r >= 0);
+
+ /* assert_se(hashmap_remove_and_replace(back->dependencies[k], other, u, di_merged.data) >= 0); */
+ }
+ }
+
+ }
+
+ /* Also do not move dependencies on u to itself */
+ back = hashmap_remove(other->dependencies[d], u);
+ if (back)
+ maybe_warn_about_dependency(u, other_id, d);
+
+ /* The move cannot fail. The caller must have performed a reservation. */
+ assert_se(hashmap_complete_move(&u->dependencies[d], &other->dependencies[d]) == 0);
+
+ other->dependencies[d] = hashmap_free(other->dependencies[d]);
+}
+
+int unit_merge(Unit *u, Unit *other) {
+ UnitDependency d;
+ const char *other_id = NULL;
+ int r;
+
+ assert(u);
+ assert(other);
+ assert(u->manager == other->manager);
+ assert(u->type != _UNIT_TYPE_INVALID);
+
+ other = unit_follow_merge(other);
+
+ if (other == u)
+ return 0;
+
+ if (u->type != other->type)
+ return -EINVAL;
+
+ if (!u->instance != !other->instance)
+ return -EINVAL;
+
+ if (!unit_type_may_alias(u->type)) /* Merging only applies to unit names that support aliases */
+ return -EEXIST;
+
+ if (!IN_SET(other->load_state, UNIT_STUB, UNIT_NOT_FOUND))
+ return -EEXIST;
+
+ if (other->job)
+ return -EEXIST;
+
+ if (other->nop_job)
+ return -EEXIST;
+
+ if (!UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other)))
+ return -EEXIST;
+
+ if (other->id)
+ other_id = strdupa(other->id);
+
+ /* Make reservations to ensure merge_dependencies() won't fail */
+ for (d = 0; d < _UNIT_DEPENDENCY_MAX; d++) {
+ r = reserve_dependencies(u, other, d);
+ /*
+ * We don't rollback reservations if we fail. We don't have
+ * a way to undo reservations. A reservation is not a leak.
+ */
+ if (r < 0)
+ return r;
+ }
+
+ /* Merge names */
+ r = merge_names(u, other);
+ if (r < 0)
+ return r;
+
+ /* Redirect all references */
+ while (other->refs_by_target)
+ unit_ref_set(other->refs_by_target, other->refs_by_target->source, u);
+
+ /* Merge dependencies */
+ for (d = 0; d < _UNIT_DEPENDENCY_MAX; d++)
+ merge_dependencies(u, other, other_id, d);
+
+ other->load_state = UNIT_MERGED;
+ other->merged_into = u;
+
+ /* If there is still some data attached to the other node, we
+ * don't need it anymore, and can free it. */
+ if (other->load_state != UNIT_STUB)
+ if (UNIT_VTABLE(other)->done)
+ UNIT_VTABLE(other)->done(other);
+
+ unit_add_to_dbus_queue(u);
+ unit_add_to_cleanup_queue(other);
+
+ return 0;
+}
+
+int unit_merge_by_name(Unit *u, const char *name) {
+ _cleanup_free_ char *s = NULL;
+ Unit *other;
+ int r;
+
+ assert(u);
+ assert(name);
+
+ if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
+ if (!u->instance)
+ return -EINVAL;
+
+ r = unit_name_replace_instance(name, u->instance, &s);
+ if (r < 0)
+ return r;
+
+ name = s;
+ }
+
+ other = manager_get_unit(u->manager, name);
+ if (other)
+ return unit_merge(u, other);
+
+ return unit_add_name(u, name);
+}
+
+Unit* unit_follow_merge(Unit *u) {
+ assert(u);
+
+ while (u->load_state == UNIT_MERGED)
+ assert_se(u = u->merged_into);
+
+ return u;
+}
+
+int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
+ ExecDirectoryType dt;
+ char **dp;
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (c->working_directory && !c->working_directory_missing_ok) {
+ r = unit_require_mounts_for(u, c->working_directory, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->root_directory) {
+ r = unit_require_mounts_for(u, c->root_directory, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->root_image) {
+ r = unit_require_mounts_for(u, c->root_image, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
+ if (!u->manager->prefix[dt])
+ continue;
+
+ STRV_FOREACH(dp, c->directories[dt].paths) {
+ _cleanup_free_ char *p;
+
+ p = strjoin(u->manager->prefix[dt], "/", *dp);
+ if (!p)
+ return -ENOMEM;
+
+ r = unit_require_mounts_for(u, p, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return 0;
+
+ if (c->private_tmp) {
+ const char *p;
+
+ FOREACH_STRING(p, "/tmp", "/var/tmp") {
+ r = unit_require_mounts_for(u, p, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_TMPFILES_SETUP_SERVICE, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ if (!IN_SET(c->std_output,
+ EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE) &&
+ !IN_SET(c->std_error,
+ EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE))
+ return 0;
+
+ /* If syslog or kernel logging is requested, make sure our own
+ * logging daemon is run first. */
+
+ r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_JOURNALD_SOCKET, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+const char *unit_description(Unit *u) {
+ assert(u);
+
+ if (u->description)
+ return u->description;
+
+ return strna(u->id);
+}
+
+static void print_unit_dependency_mask(FILE *f, const char *kind, UnitDependencyMask mask, bool *space) {
+ const struct {
+ UnitDependencyMask mask;
+ const char *name;
+ } table[] = {
+ { UNIT_DEPENDENCY_FILE, "file" },
+ { UNIT_DEPENDENCY_IMPLICIT, "implicit" },
+ { UNIT_DEPENDENCY_DEFAULT, "default" },
+ { UNIT_DEPENDENCY_UDEV, "udev" },
+ { UNIT_DEPENDENCY_PATH, "path" },
+ { UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT, "mountinfo-implicit" },
+ { UNIT_DEPENDENCY_MOUNTINFO_DEFAULT, "mountinfo-default" },
+ { UNIT_DEPENDENCY_PROC_SWAP, "proc-swap" },
+ };
+ size_t i;
+
+ assert(f);
+ assert(kind);
+ assert(space);
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+
+ if (mask == 0)
+ break;
+
+ if (FLAGS_SET(mask, table[i].mask)) {
+ if (*space)
+ fputc(' ', f);
+ else
+ *space = true;
+
+ fputs(kind, f);
+ fputs("-", f);
+ fputs(table[i].name, f);
+
+ mask &= ~table[i].mask;
+ }
+ }
+
+ assert(mask == 0);
+}
+
+void unit_dump(Unit *u, FILE *f, const char *prefix) {
+ char *t, **j;
+ UnitDependency d;
+ Iterator i;
+ const char *prefix2;
+ char
+ timestamp0[FORMAT_TIMESTAMP_MAX],
+ timestamp1[FORMAT_TIMESTAMP_MAX],
+ timestamp2[FORMAT_TIMESTAMP_MAX],
+ timestamp3[FORMAT_TIMESTAMP_MAX],
+ timestamp4[FORMAT_TIMESTAMP_MAX],
+ timespan[FORMAT_TIMESPAN_MAX];
+ Unit *following;
+ _cleanup_set_free_ Set *following_set = NULL;
+ const char *n;
+ CGroupMask m;
+ int r;
+
+ assert(u);
+ assert(u->type >= 0);
+
+ prefix = strempty(prefix);
+ prefix2 = strjoina(prefix, "\t");
+
+ fprintf(f,
+ "%s-> Unit %s:\n"
+ "%s\tDescription: %s\n"
+ "%s\tInstance: %s\n"
+ "%s\tUnit Load State: %s\n"
+ "%s\tUnit Active State: %s\n"
+ "%s\tState Change Timestamp: %s\n"
+ "%s\tInactive Exit Timestamp: %s\n"
+ "%s\tActive Enter Timestamp: %s\n"
+ "%s\tActive Exit Timestamp: %s\n"
+ "%s\tInactive Enter Timestamp: %s\n"
+ "%s\tMay GC: %s\n"
+ "%s\tNeed Daemon Reload: %s\n"
+ "%s\tTransient: %s\n"
+ "%s\tPerpetual: %s\n"
+ "%s\tGarbage Collection Mode: %s\n"
+ "%s\tSlice: %s\n"
+ "%s\tCGroup: %s\n"
+ "%s\tCGroup realized: %s\n",
+ prefix, u->id,
+ prefix, unit_description(u),
+ prefix, strna(u->instance),
+ prefix, unit_load_state_to_string(u->load_state),
+ prefix, unit_active_state_to_string(unit_active_state(u)),
+ prefix, strna(format_timestamp(timestamp0, sizeof(timestamp0), u->state_change_timestamp.realtime)),
+ prefix, strna(format_timestamp(timestamp1, sizeof(timestamp1), u->inactive_exit_timestamp.realtime)),
+ prefix, strna(format_timestamp(timestamp2, sizeof(timestamp2), u->active_enter_timestamp.realtime)),
+ prefix, strna(format_timestamp(timestamp3, sizeof(timestamp3), u->active_exit_timestamp.realtime)),
+ prefix, strna(format_timestamp(timestamp4, sizeof(timestamp4), u->inactive_enter_timestamp.realtime)),
+ prefix, yes_no(unit_may_gc(u)),
+ prefix, yes_no(unit_need_daemon_reload(u)),
+ prefix, yes_no(u->transient),
+ prefix, yes_no(u->perpetual),
+ prefix, collect_mode_to_string(u->collect_mode),
+ prefix, strna(unit_slice_name(u)),
+ prefix, strna(u->cgroup_path),
+ prefix, yes_no(u->cgroup_realized));
+
+ if (u->cgroup_realized_mask != 0) {
+ _cleanup_free_ char *s = NULL;
+ (void) cg_mask_to_string(u->cgroup_realized_mask, &s);
+ fprintf(f, "%s\tCGroup realized mask: %s\n", prefix, strnull(s));
+ }
+
+ if (u->cgroup_enabled_mask != 0) {
+ _cleanup_free_ char *s = NULL;
+ (void) cg_mask_to_string(u->cgroup_enabled_mask, &s);
+ fprintf(f, "%s\tCGroup enabled mask: %s\n", prefix, strnull(s));
+ }
+
+ m = unit_get_own_mask(u);
+ if (m != 0) {
+ _cleanup_free_ char *s = NULL;
+ (void) cg_mask_to_string(m, &s);
+ fprintf(f, "%s\tCGroup own mask: %s\n", prefix, strnull(s));
+ }
+
+ m = unit_get_members_mask(u);
+ if (m != 0) {
+ _cleanup_free_ char *s = NULL;
+ (void) cg_mask_to_string(m, &s);
+ fprintf(f, "%s\tCGroup members mask: %s\n", prefix, strnull(s));
+ }
+
+ m = unit_get_delegate_mask(u);
+ if (m != 0) {
+ _cleanup_free_ char *s = NULL;
+ (void) cg_mask_to_string(m, &s);
+ fprintf(f, "%s\tCGroup delegate mask: %s\n", prefix, strnull(s));
+ }
+
+ SET_FOREACH(t, u->names, i)
+ fprintf(f, "%s\tName: %s\n", prefix, t);
+
+ if (!sd_id128_is_null(u->invocation_id))
+ fprintf(f, "%s\tInvocation ID: " SD_ID128_FORMAT_STR "\n",
+ prefix, SD_ID128_FORMAT_VAL(u->invocation_id));
+
+ STRV_FOREACH(j, u->documentation)
+ fprintf(f, "%s\tDocumentation: %s\n", prefix, *j);
+
+ following = unit_following(u);
+ if (following)
+ fprintf(f, "%s\tFollowing: %s\n", prefix, following->id);
+
+ r = unit_following_set(u, &following_set);
+ if (r >= 0) {
+ Unit *other;
+
+ SET_FOREACH(other, following_set, i)
+ fprintf(f, "%s\tFollowing Set Member: %s\n", prefix, other->id);
+ }
+
+ if (u->fragment_path)
+ fprintf(f, "%s\tFragment Path: %s\n", prefix, u->fragment_path);
+
+ if (u->source_path)
+ fprintf(f, "%s\tSource Path: %s\n", prefix, u->source_path);
+
+ STRV_FOREACH(j, u->dropin_paths)
+ fprintf(f, "%s\tDropIn Path: %s\n", prefix, *j);
+
+ if (u->failure_action != EMERGENCY_ACTION_NONE)
+ fprintf(f, "%s\tFailure Action: %s\n", prefix, emergency_action_to_string(u->failure_action));
+ if (u->failure_action_exit_status >= 0)
+ fprintf(f, "%s\tFailure Action Exit Status: %i\n", prefix, u->failure_action_exit_status);
+ if (u->success_action != EMERGENCY_ACTION_NONE)
+ fprintf(f, "%s\tSuccess Action: %s\n", prefix, emergency_action_to_string(u->success_action));
+ if (u->success_action_exit_status >= 0)
+ fprintf(f, "%s\tSuccess Action Exit Status: %i\n", prefix, u->success_action_exit_status);
+
+ if (u->job_timeout != USEC_INFINITY)
+ fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->job_timeout, 0));
+
+ if (u->job_timeout_action != EMERGENCY_ACTION_NONE)
+ fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, emergency_action_to_string(u->job_timeout_action));
+
+ if (u->job_timeout_reboot_arg)
+ fprintf(f, "%s\tJob Timeout Reboot Argument: %s\n", prefix, u->job_timeout_reboot_arg);
+
+ condition_dump_list(u->conditions, f, prefix, condition_type_to_string);
+ condition_dump_list(u->asserts, f, prefix, assert_type_to_string);
+
+ if (dual_timestamp_is_set(&u->condition_timestamp))
+ fprintf(f,
+ "%s\tCondition Timestamp: %s\n"
+ "%s\tCondition Result: %s\n",
+ prefix, strna(format_timestamp(timestamp1, sizeof(timestamp1), u->condition_timestamp.realtime)),
+ prefix, yes_no(u->condition_result));
+
+ if (dual_timestamp_is_set(&u->assert_timestamp))
+ fprintf(f,
+ "%s\tAssert Timestamp: %s\n"
+ "%s\tAssert Result: %s\n",
+ prefix, strna(format_timestamp(timestamp1, sizeof(timestamp1), u->assert_timestamp.realtime)),
+ prefix, yes_no(u->assert_result));
+
+ for (d = 0; d < _UNIT_DEPENDENCY_MAX; d++) {
+ UnitDependencyInfo di;
+ Unit *other;
+
+ HASHMAP_FOREACH_KEY(di.data, other, u->dependencies[d], i) {
+ bool space = false;
+
+ fprintf(f, "%s\t%s: %s (", prefix, unit_dependency_to_string(d), other->id);
+
+ print_unit_dependency_mask(f, "origin", di.origin_mask, &space);
+ print_unit_dependency_mask(f, "destination", di.destination_mask, &space);
+
+ fputs(")\n", f);
+ }
+ }
+
+ if (!hashmap_isempty(u->requires_mounts_for)) {
+ UnitDependencyInfo di;
+ const char *path;
+
+ HASHMAP_FOREACH_KEY(di.data, path, u->requires_mounts_for, i) {
+ bool space = false;
+
+ fprintf(f, "%s\tRequiresMountsFor: %s (", prefix, path);
+
+ print_unit_dependency_mask(f, "origin", di.origin_mask, &space);
+ print_unit_dependency_mask(f, "destination", di.destination_mask, &space);
+
+ fputs(")\n", f);
+ }
+ }
+
+ if (u->load_state == UNIT_LOADED) {
+
+ fprintf(f,
+ "%s\tStopWhenUnneeded: %s\n"
+ "%s\tRefuseManualStart: %s\n"
+ "%s\tRefuseManualStop: %s\n"
+ "%s\tDefaultDependencies: %s\n"
+ "%s\tOnFailureJobMode: %s\n"
+ "%s\tIgnoreOnIsolate: %s\n",
+ prefix, yes_no(u->stop_when_unneeded),
+ prefix, yes_no(u->refuse_manual_start),
+ prefix, yes_no(u->refuse_manual_stop),
+ prefix, yes_no(u->default_dependencies),
+ prefix, job_mode_to_string(u->on_failure_job_mode),
+ prefix, yes_no(u->ignore_on_isolate));
+
+ if (UNIT_VTABLE(u)->dump)
+ UNIT_VTABLE(u)->dump(u, f, prefix2);
+
+ } else if (u->load_state == UNIT_MERGED)
+ fprintf(f,
+ "%s\tMerged into: %s\n",
+ prefix, u->merged_into->id);
+ else if (u->load_state == UNIT_ERROR)
+ fprintf(f, "%s\tLoad Error Code: %s\n", prefix, strerror(-u->load_error));
+
+ for (n = sd_bus_track_first(u->bus_track); n; n = sd_bus_track_next(u->bus_track))
+ fprintf(f, "%s\tBus Ref: %s\n", prefix, n);
+
+ if (u->job)
+ job_dump(u->job, f, prefix2);
+
+ if (u->nop_job)
+ job_dump(u->nop_job, f, prefix2);
+}
+
+/* Common implementation for multiple backends */
+int unit_load_fragment_and_dropin(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Load a .{service,socket,...} file */
+ r = unit_load_fragment(u);
+ if (r < 0)
+ return r;
+
+ if (u->load_state == UNIT_STUB)
+ return -ENOENT;
+
+ /* Load drop-in directory data. If u is an alias, we might be reloading the
+ * target unit needlessly. But we cannot be sure which drops-ins have already
+ * been loaded and which not, at least without doing complicated book-keeping,
+ * so let's always reread all drop-ins. */
+ return unit_load_dropin(unit_follow_merge(u));
+}
+
+/* Common implementation for multiple backends */
+int unit_load_fragment_and_dropin_optional(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Same as unit_load_fragment_and_dropin(), but whether
+ * something can be loaded or not doesn't matter. */
+
+ /* Load a .service/.socket/.slice/… file */
+ r = unit_load_fragment(u);
+ if (r < 0)
+ return r;
+
+ if (u->load_state == UNIT_STUB)
+ u->load_state = UNIT_LOADED;
+
+ /* Load drop-in directory data */
+ return unit_load_dropin(unit_follow_merge(u));
+}
+
+void unit_add_to_target_deps_queue(Unit *u) {
+ Manager *m = u->manager;
+
+ assert(u);
+
+ if (u->in_target_deps_queue)
+ return;
+
+ LIST_PREPEND(target_deps_queue, m->target_deps_queue, u);
+ u->in_target_deps_queue = true;
+}
+
+int unit_add_default_target_dependency(Unit *u, Unit *target) {
+ assert(u);
+ assert(target);
+
+ if (target->type != UNIT_TARGET)
+ return 0;
+
+ /* Only add the dependency if both units are loaded, so that
+ * that loop check below is reliable */
+ if (u->load_state != UNIT_LOADED ||
+ target->load_state != UNIT_LOADED)
+ return 0;
+
+ /* If either side wants no automatic dependencies, then let's
+ * skip this */
+ if (!u->default_dependencies ||
+ !target->default_dependencies)
+ return 0;
+
+ /* Don't create loops */
+ if (hashmap_get(target->dependencies[UNIT_BEFORE], u))
+ return 0;
+
+ return unit_add_dependency(target, UNIT_AFTER, u, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+static int unit_add_slice_dependencies(Unit *u) {
+ UnitDependencyMask mask;
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return 0;
+
+ /* Slice units are implicitly ordered against their parent slices (as this relationship is encoded in the
+ name), while all other units are ordered based on configuration (as in their case Slice= configures the
+ relationship). */
+ mask = u->type == UNIT_SLICE ? UNIT_DEPENDENCY_IMPLICIT : UNIT_DEPENDENCY_FILE;
+
+ if (UNIT_ISSET(u->slice))
+ return unit_add_two_dependencies(u, UNIT_AFTER, UNIT_REQUIRES, UNIT_DEREF(u->slice), true, mask);
+
+ if (unit_has_name(u, SPECIAL_ROOT_SLICE))
+ return 0;
+
+ return unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_REQUIRES, SPECIAL_ROOT_SLICE, true, mask);
+}
+
+static int unit_add_mount_dependencies(Unit *u) {
+ UnitDependencyInfo di;
+ const char *path;
+ Iterator i;
+ int r;
+
+ assert(u);
+
+ HASHMAP_FOREACH_KEY(di.data, path, u->requires_mounts_for, i) {
+ char prefix[strlen(path) + 1];
+
+ PATH_FOREACH_PREFIX_MORE(prefix, path) {
+ _cleanup_free_ char *p = NULL;
+ Unit *m;
+
+ r = unit_name_from_path(prefix, ".mount", &p);
+ if (r < 0)
+ return r;
+
+ m = manager_get_unit(u->manager, p);
+ if (!m) {
+ /* Make sure to load the mount unit if
+ * it exists. If so the dependencies
+ * on this unit will be added later
+ * during the loading of the mount
+ * unit. */
+ (void) manager_load_unit_prepare(u->manager, p, NULL, NULL, &m);
+ continue;
+ }
+ if (m == u)
+ continue;
+
+ if (m->load_state != UNIT_LOADED)
+ continue;
+
+ r = unit_add_dependency(u, UNIT_AFTER, m, true, di.origin_mask);
+ if (r < 0)
+ return r;
+
+ if (m->fragment_path) {
+ r = unit_add_dependency(u, UNIT_REQUIRES, m, true, di.origin_mask);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int unit_add_startup_units(Unit *u) {
+ CGroupContext *c;
+ int r;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ if (c->startup_cpu_shares == CGROUP_CPU_SHARES_INVALID &&
+ c->startup_io_weight == CGROUP_WEIGHT_INVALID &&
+ c->startup_blockio_weight == CGROUP_BLKIO_WEIGHT_INVALID)
+ return 0;
+
+ r = set_ensure_allocated(&u->manager->startup_units, NULL);
+ if (r < 0)
+ return r;
+
+ return set_put(u->manager->startup_units, u);
+}
+
+int unit_load(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (u->in_load_queue) {
+ LIST_REMOVE(load_queue, u->manager->load_queue, u);
+ u->in_load_queue = false;
+ }
+
+ if (u->type == _UNIT_TYPE_INVALID)
+ return -EINVAL;
+
+ if (u->load_state != UNIT_STUB)
+ return 0;
+
+ if (u->transient_file) {
+ /* Finalize transient file: if this is a transient unit file, as soon as we reach unit_load() the setup
+ * is complete, hence let's synchronize the unit file we just wrote to disk. */
+
+ r = fflush_and_check(u->transient_file);
+ if (r < 0)
+ goto fail;
+
+ u->transient_file = safe_fclose(u->transient_file);
+ u->fragment_mtime = now(CLOCK_REALTIME);
+ }
+
+ if (UNIT_VTABLE(u)->load) {
+ r = UNIT_VTABLE(u)->load(u);
+ if (r < 0)
+ goto fail;
+ }
+
+ if (u->load_state == UNIT_STUB) {
+ r = -ENOENT;
+ goto fail;
+ }
+
+ if (u->load_state == UNIT_LOADED) {
+ unit_add_to_target_deps_queue(u);
+
+ r = unit_add_slice_dependencies(u);
+ if (r < 0)
+ goto fail;
+
+ r = unit_add_mount_dependencies(u);
+ if (r < 0)
+ goto fail;
+
+ r = unit_add_startup_units(u);
+ if (r < 0)
+ goto fail;
+
+ if (u->on_failure_job_mode == JOB_ISOLATE && hashmap_size(u->dependencies[UNIT_ON_FAILURE]) > 1) {
+ log_unit_error(u, "More than one OnFailure= dependencies specified but OnFailureJobMode=isolate set. Refusing.");
+ r = -ENOEXEC;
+ goto fail;
+ }
+
+ if (u->job_running_timeout != USEC_INFINITY && u->job_running_timeout > u->job_timeout)
+ log_unit_warning(u, "JobRunningTimeoutSec= is greater than JobTimeoutSec=, it has no effect.");
+
+ /* We finished loading, let's ensure our parents recalculate the members mask */
+ unit_invalidate_cgroup_members_masks(u);
+ }
+
+ assert((u->load_state != UNIT_MERGED) == !u->merged_into);
+
+ unit_add_to_dbus_queue(unit_follow_merge(u));
+ unit_add_to_gc_queue(u);
+
+ return 0;
+
+fail:
+ /* We convert ENOEXEC errors to the UNIT_BAD_SETTING load state here. Configuration parsing code should hence
+ * return ENOEXEC to ensure units are placed in this state after loading */
+
+ u->load_state = u->load_state == UNIT_STUB ? UNIT_NOT_FOUND :
+ r == -ENOEXEC ? UNIT_BAD_SETTING :
+ UNIT_ERROR;
+ u->load_error = r;
+
+ unit_add_to_dbus_queue(u);
+ unit_add_to_gc_queue(u);
+
+ return log_unit_debug_errno(u, r, "Failed to load configuration: %m");
+}
+
+static bool unit_condition_test_list(Unit *u, Condition *first, const char *(*to_string)(ConditionType t)) {
+ Condition *c;
+ int triggered = -1;
+
+ assert(u);
+ assert(to_string);
+
+ /* If the condition list is empty, then it is true */
+ if (!first)
+ return true;
+
+ /* Otherwise, if all of the non-trigger conditions apply and
+ * if any of the trigger conditions apply (unless there are
+ * none) we return true */
+ LIST_FOREACH(conditions, c, first) {
+ int r;
+
+ r = condition_test(c);
+ if (r < 0)
+ log_unit_warning(u,
+ "Couldn't determine result for %s=%s%s%s, assuming failed: %m",
+ to_string(c->type),
+ c->trigger ? "|" : "",
+ c->negate ? "!" : "",
+ c->parameter);
+ else
+ log_unit_debug(u,
+ "%s=%s%s%s %s.",
+ to_string(c->type),
+ c->trigger ? "|" : "",
+ c->negate ? "!" : "",
+ c->parameter,
+ condition_result_to_string(c->result));
+
+ if (!c->trigger && r <= 0)
+ return false;
+
+ if (c->trigger && triggered <= 0)
+ triggered = r > 0;
+ }
+
+ return triggered != 0;
+}
+
+static bool unit_condition_test(Unit *u) {
+ assert(u);
+
+ dual_timestamp_get(&u->condition_timestamp);
+ u->condition_result = unit_condition_test_list(u, u->conditions, condition_type_to_string);
+
+ unit_add_to_dbus_queue(u);
+
+ return u->condition_result;
+}
+
+static bool unit_assert_test(Unit *u) {
+ assert(u);
+
+ dual_timestamp_get(&u->assert_timestamp);
+ u->assert_result = unit_condition_test_list(u, u->asserts, assert_type_to_string);
+
+ unit_add_to_dbus_queue(u);
+
+ return u->assert_result;
+}
+
+void unit_status_printf(Unit *u, const char *status, const char *unit_status_msg_format) {
+ const char *d;
+
+ d = unit_description(u);
+ if (log_get_show_color())
+ d = strjoina(ANSI_HIGHLIGHT, d, ANSI_NORMAL);
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ manager_status_printf(u->manager, STATUS_TYPE_NORMAL, status, unit_status_msg_format, d);
+ REENABLE_WARNING;
+}
+
+int unit_start_limit_test(Unit *u) {
+ const char *reason;
+
+ assert(u);
+
+ if (ratelimit_below(&u->start_limit)) {
+ u->start_limit_hit = false;
+ return 0;
+ }
+
+ log_unit_warning(u, "Start request repeated too quickly.");
+ u->start_limit_hit = true;
+
+ reason = strjoina("unit ", u->id, " failed");
+
+ return emergency_action(u->manager, u->start_limit_action,
+ EMERGENCY_ACTION_IS_WATCHDOG|EMERGENCY_ACTION_WARN,
+ u->reboot_arg, -1, reason);
+}
+
+bool unit_shall_confirm_spawn(Unit *u) {
+ assert(u);
+
+ if (manager_is_confirm_spawn_disabled(u->manager))
+ return false;
+
+ /* For some reasons units remaining in the same process group
+ * as PID 1 fail to acquire the console even if it's not used
+ * by any process. So skip the confirmation question for them. */
+ return !unit_get_exec_context(u)->same_pgrp;
+}
+
+static bool unit_verify_deps(Unit *u) {
+ Unit *other;
+ Iterator j;
+ void *v;
+
+ assert(u);
+
+ /* Checks whether all BindsTo= dependencies of this unit are fulfilled — if they are also combined with
+ * After=. We do not check Requires= or Requisite= here as they only should have an effect on the job
+ * processing, but do not have any effect afterwards. We don't check BindsTo= dependencies that are not used in
+ * conjunction with After= as for them any such check would make things entirely racy. */
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_BINDS_TO], j) {
+
+ if (!hashmap_contains(u->dependencies[UNIT_AFTER], other))
+ continue;
+
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(other))) {
+ log_unit_notice(u, "Bound to unit %s, but unit isn't active.", other->id);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/* Errors:
+ * -EBADR: This unit type does not support starting.
+ * -EALREADY: Unit is already started.
+ * -EAGAIN: An operation is already in progress. Retry later.
+ * -ECANCELED: Too many requests for now.
+ * -EPROTO: Assert failed
+ * -EINVAL: Unit not loaded
+ * -EOPNOTSUPP: Unit type not supported
+ * -ENOLINK: The necessary dependencies are not fulfilled.
+ * -ESTALE: This unit has been started before and can't be started a second time
+ */
+int unit_start(Unit *u) {
+ UnitActiveState state;
+ Unit *following;
+
+ assert(u);
+
+ /* If this is already started, then this will succeed. Note
+ * that this will even succeed if this unit is not startable
+ * by the user. This is relied on to detect when we need to
+ * wait for units and when waiting is finished. */
+ state = unit_active_state(u);
+ if (UNIT_IS_ACTIVE_OR_RELOADING(state))
+ return -EALREADY;
+
+ /* Units that aren't loaded cannot be started */
+ if (u->load_state != UNIT_LOADED)
+ return -EINVAL;
+
+ /* Refuse starting scope units more than once */
+ if (UNIT_VTABLE(u)->once_only && dual_timestamp_is_set(&u->inactive_enter_timestamp))
+ return -ESTALE;
+
+ /* If the conditions failed, don't do anything at all. If we
+ * already are activating this call might still be useful to
+ * speed up activation in case there is some hold-off time,
+ * but we don't want to recheck the condition in that case. */
+ if (state != UNIT_ACTIVATING &&
+ !unit_condition_test(u)) {
+ log_unit_debug(u, "Starting requested but condition failed. Not starting unit.");
+ return -ECOMM;
+ }
+
+ /* If the asserts failed, fail the entire job */
+ if (state != UNIT_ACTIVATING &&
+ !unit_assert_test(u)) {
+ log_unit_notice(u, "Starting requested but asserts failed.");
+ return -EPROTO;
+ }
+
+ /* Units of types that aren't supported cannot be
+ * started. Note that we do this test only after the condition
+ * checks, so that we rather return condition check errors
+ * (which are usually not considered a true failure) than "not
+ * supported" errors (which are considered a failure).
+ */
+ if (!unit_supported(u))
+ return -EOPNOTSUPP;
+
+ /* Let's make sure that the deps really are in order before we start this. Normally the job engine should have
+ * taken care of this already, but let's check this here again. After all, our dependencies might not be in
+ * effect anymore, due to a reload or due to a failed condition. */
+ if (!unit_verify_deps(u))
+ return -ENOLINK;
+
+ /* Forward to the main object, if we aren't it. */
+ following = unit_following(u);
+ if (following) {
+ log_unit_debug(u, "Redirecting start request from %s to %s.", u->id, following->id);
+ return unit_start(following);
+ }
+
+ /* If it is stopped, but we cannot start it, then fail */
+ if (!UNIT_VTABLE(u)->start)
+ return -EBADR;
+
+ /* We don't suppress calls to ->start() here when we are
+ * already starting, to allow this request to be used as a
+ * "hurry up" call, for example when the unit is in some "auto
+ * restart" state where it waits for a holdoff timer to elapse
+ * before it will start again. */
+
+ unit_add_to_dbus_queue(u);
+
+ return UNIT_VTABLE(u)->start(u);
+}
+
+bool unit_can_start(Unit *u) {
+ assert(u);
+
+ if (u->load_state != UNIT_LOADED)
+ return false;
+
+ if (!unit_supported(u))
+ return false;
+
+ /* Scope units may be started only once */
+ if (UNIT_VTABLE(u)->once_only && dual_timestamp_is_set(&u->inactive_exit_timestamp))
+ return false;
+
+ return !!UNIT_VTABLE(u)->start;
+}
+
+bool unit_can_isolate(Unit *u) {
+ assert(u);
+
+ return unit_can_start(u) &&
+ u->allow_isolate;
+}
+
+/* Errors:
+ * -EBADR: This unit type does not support stopping.
+ * -EALREADY: Unit is already stopped.
+ * -EAGAIN: An operation is already in progress. Retry later.
+ */
+int unit_stop(Unit *u) {
+ UnitActiveState state;
+ Unit *following;
+
+ assert(u);
+
+ state = unit_active_state(u);
+ if (UNIT_IS_INACTIVE_OR_FAILED(state))
+ return -EALREADY;
+
+ following = unit_following(u);
+ if (following) {
+ log_unit_debug(u, "Redirecting stop request from %s to %s.", u->id, following->id);
+ return unit_stop(following);
+ }
+
+ if (!UNIT_VTABLE(u)->stop)
+ return -EBADR;
+
+ unit_add_to_dbus_queue(u);
+
+ return UNIT_VTABLE(u)->stop(u);
+}
+
+bool unit_can_stop(Unit *u) {
+ assert(u);
+
+ if (!unit_supported(u))
+ return false;
+
+ if (u->perpetual)
+ return false;
+
+ return !!UNIT_VTABLE(u)->stop;
+}
+
+/* Errors:
+ * -EBADR: This unit type does not support reloading.
+ * -ENOEXEC: Unit is not started.
+ * -EAGAIN: An operation is already in progress. Retry later.
+ */
+int unit_reload(Unit *u) {
+ UnitActiveState state;
+ Unit *following;
+
+ assert(u);
+
+ if (u->load_state != UNIT_LOADED)
+ return -EINVAL;
+
+ if (!unit_can_reload(u))
+ return -EBADR;
+
+ state = unit_active_state(u);
+ if (state == UNIT_RELOADING)
+ return -EALREADY;
+
+ if (state != UNIT_ACTIVE) {
+ log_unit_warning(u, "Unit cannot be reloaded because it is inactive.");
+ return -ENOEXEC;
+ }
+
+ following = unit_following(u);
+ if (following) {
+ log_unit_debug(u, "Redirecting reload request from %s to %s.", u->id, following->id);
+ return unit_reload(following);
+ }
+
+ unit_add_to_dbus_queue(u);
+
+ if (!UNIT_VTABLE(u)->reload) {
+ /* Unit doesn't have a reload function, but we need to propagate the reload anyway */
+ unit_notify(u, unit_active_state(u), unit_active_state(u), 0);
+ return 0;
+ }
+
+ return UNIT_VTABLE(u)->reload(u);
+}
+
+bool unit_can_reload(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->can_reload)
+ return UNIT_VTABLE(u)->can_reload(u);
+
+ if (!hashmap_isempty(u->dependencies[UNIT_PROPAGATES_RELOAD_TO]))
+ return true;
+
+ return UNIT_VTABLE(u)->reload;
+}
+
+bool unit_is_unneeded(Unit *u) {
+ static const UnitDependency deps[] = {
+ UNIT_REQUIRED_BY,
+ UNIT_REQUISITE_OF,
+ UNIT_WANTED_BY,
+ UNIT_BOUND_BY,
+ };
+ size_t j;
+
+ assert(u);
+
+ if (!u->stop_when_unneeded)
+ return false;
+
+ /* Don't clean up while the unit is transitioning or is even inactive. */
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
+ return false;
+ if (u->job)
+ return false;
+
+ for (j = 0; j < ELEMENTSOF(deps); j++) {
+ Unit *other;
+ Iterator i;
+ void *v;
+
+ /* If a dependent unit has a job queued, is active or transitioning, or is marked for
+ * restart, then don't clean this one up. */
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[deps[j]], i) {
+ if (other->job)
+ return false;
+
+ if (!UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other)))
+ return false;
+
+ if (unit_will_restart(other))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void check_unneeded_dependencies(Unit *u) {
+
+ static const UnitDependency deps[] = {
+ UNIT_REQUIRES,
+ UNIT_REQUISITE,
+ UNIT_WANTS,
+ UNIT_BINDS_TO,
+ };
+ size_t j;
+
+ assert(u);
+
+ /* Add all units this unit depends on to the queue that processes StopWhenUnneeded= behaviour. */
+
+ for (j = 0; j < ELEMENTSOF(deps); j++) {
+ Unit *other;
+ Iterator i;
+ void *v;
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[deps[j]], i)
+ unit_submit_to_stop_when_unneeded_queue(other);
+ }
+}
+
+static void unit_check_binds_to(Unit *u) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool stop = false;
+ Unit *other;
+ Iterator i;
+ void *v;
+ int r;
+
+ assert(u);
+
+ if (u->job)
+ return;
+
+ if (unit_active_state(u) != UNIT_ACTIVE)
+ return;
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_BINDS_TO], i) {
+ if (other->job)
+ continue;
+
+ if (!other->coldplugged)
+ /* We might yet create a job for the other unit… */
+ continue;
+
+ if (!UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other)))
+ continue;
+
+ stop = true;
+ break;
+ }
+
+ if (!stop)
+ return;
+
+ /* If stopping a unit fails continuously we might enter a stop
+ * loop here, hence stop acting on the service being
+ * unnecessary after a while. */
+ if (!ratelimit_below(&u->auto_stop_ratelimit)) {
+ log_unit_warning(u, "Unit is bound to inactive unit %s, but not stopping since we tried this too often recently.", other->id);
+ return;
+ }
+
+ assert(other);
+ log_unit_info(u, "Unit is bound to inactive unit %s. Stopping, too.", other->id);
+
+ /* A unit we need to run is gone. Sniff. Let's stop this. */
+ r = manager_add_job(u->manager, JOB_STOP, u, JOB_FAIL, &error, NULL);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to enqueue stop job, ignoring: %s", bus_error_message(&error, r));
+}
+
+static void retroactively_start_dependencies(Unit *u) {
+ Iterator i;
+ Unit *other;
+ void *v;
+
+ assert(u);
+ assert(UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u)));
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REQUIRES], i)
+ if (!hashmap_get(u->dependencies[UNIT_AFTER], other) &&
+ !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(other)))
+ manager_add_job(u->manager, JOB_START, other, JOB_REPLACE, NULL, NULL);
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_BINDS_TO], i)
+ if (!hashmap_get(u->dependencies[UNIT_AFTER], other) &&
+ !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(other)))
+ manager_add_job(u->manager, JOB_START, other, JOB_REPLACE, NULL, NULL);
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_WANTS], i)
+ if (!hashmap_get(u->dependencies[UNIT_AFTER], other) &&
+ !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(other)))
+ manager_add_job(u->manager, JOB_START, other, JOB_FAIL, NULL, NULL);
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_CONFLICTS], i)
+ if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
+ manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, NULL, NULL);
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_CONFLICTED_BY], i)
+ if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
+ manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, NULL, NULL);
+}
+
+static void retroactively_stop_dependencies(Unit *u) {
+ Unit *other;
+ Iterator i;
+ void *v;
+
+ assert(u);
+ assert(UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(u)));
+
+ /* Pull down units which are bound to us recursively if enabled */
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_BOUND_BY], i)
+ if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
+ manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, NULL, NULL);
+}
+
+void unit_start_on_failure(Unit *u) {
+ Unit *other;
+ Iterator i;
+ void *v;
+ int r;
+
+ assert(u);
+
+ if (hashmap_size(u->dependencies[UNIT_ON_FAILURE]) <= 0)
+ return;
+
+ log_unit_info(u, "Triggering OnFailure= dependencies.");
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_ON_FAILURE], i) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = manager_add_job(u->manager, JOB_START, other, u->on_failure_job_mode, &error, NULL);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to enqueue OnFailure= job, ignoring: %s", bus_error_message(&error, r));
+ }
+}
+
+void unit_trigger_notify(Unit *u) {
+ Unit *other;
+ Iterator i;
+ void *v;
+
+ assert(u);
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_TRIGGERED_BY], i)
+ if (UNIT_VTABLE(other)->trigger_notify)
+ UNIT_VTABLE(other)->trigger_notify(other, u);
+}
+
+static int unit_log_resources(Unit *u) {
+ struct iovec iovec[1 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + 4];
+ bool any_traffic = false, have_ip_accounting = false;
+ _cleanup_free_ char *igress = NULL, *egress = NULL;
+ size_t n_message_parts = 0, n_iovec = 0;
+ char* message_parts[3 + 1], *t;
+ nsec_t nsec = NSEC_INFINITY;
+ CGroupIPAccountingMetric m;
+ size_t i;
+ int r;
+ const char* const ip_fields[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IP_INGRESS_BYTES] = "IP_METRIC_INGRESS_BYTES",
+ [CGROUP_IP_INGRESS_PACKETS] = "IP_METRIC_INGRESS_PACKETS",
+ [CGROUP_IP_EGRESS_BYTES] = "IP_METRIC_EGRESS_BYTES",
+ [CGROUP_IP_EGRESS_PACKETS] = "IP_METRIC_EGRESS_PACKETS",
+ };
+
+ assert(u);
+
+ /* Invoked whenever a unit enters failed or dead state. Logs information about consumed resources if resource
+ * accounting was enabled for a unit. It does this in two ways: a friendly human readable string with reduced
+ * information and the complete data in structured fields. */
+
+ (void) unit_get_cpu_usage(u, &nsec);
+ if (nsec != NSEC_INFINITY) {
+ char buf[FORMAT_TIMESPAN_MAX] = "";
+
+ /* Format the CPU time for inclusion in the structured log message */
+ if (asprintf(&t, "CPU_USAGE_NSEC=%" PRIu64, nsec) < 0) {
+ r = log_oom();
+ goto finish;
+ }
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+ /* Format the CPU time for inclusion in the human language message string */
+ format_timespan(buf, sizeof(buf), nsec / NSEC_PER_USEC, USEC_PER_MSEC);
+ t = strjoin("consumed ", buf, " CPU time");
+ if (!t) {
+ r = log_oom();
+ goto finish;
+ }
+
+ message_parts[n_message_parts++] = t;
+ }
+
+ for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+ char buf[FORMAT_BYTES_MAX] = "";
+ uint64_t value = UINT64_MAX;
+
+ assert(ip_fields[m]);
+
+ (void) unit_get_ip_accounting(u, m, &value);
+ if (value == UINT64_MAX)
+ continue;
+
+ have_ip_accounting = true;
+ if (value > 0)
+ any_traffic = true;
+
+ /* Format IP accounting data for inclusion in the structured log message */
+ if (asprintf(&t, "%s=%" PRIu64, ip_fields[m], value) < 0) {
+ r = log_oom();
+ goto finish;
+ }
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+ /* Format the IP accounting data for inclusion in the human language message string, but only for the
+ * bytes counters (and not for the packets counters) */
+ if (m == CGROUP_IP_INGRESS_BYTES) {
+ assert(!igress);
+ igress = strjoin("received ", format_bytes(buf, sizeof(buf), value), " IP traffic");
+ if (!igress) {
+ r = log_oom();
+ goto finish;
+ }
+ } else if (m == CGROUP_IP_EGRESS_BYTES) {
+ assert(!egress);
+ egress = strjoin("sent ", format_bytes(buf, sizeof(buf), value), " IP traffic");
+ if (!egress) {
+ r = log_oom();
+ goto finish;
+ }
+ }
+ }
+
+ if (have_ip_accounting) {
+ if (any_traffic) {
+ if (igress)
+ message_parts[n_message_parts++] = TAKE_PTR(igress);
+ if (egress)
+ message_parts[n_message_parts++] = TAKE_PTR(egress);
+
+ } else {
+ char *k;
+
+ k = strdup("no IP traffic");
+ if (!k) {
+ r = log_oom();
+ goto finish;
+ }
+
+ message_parts[n_message_parts++] = k;
+ }
+ }
+
+ /* Is there any accounting data available at all? */
+ if (n_iovec == 0) {
+ r = 0;
+ goto finish;
+ }
+
+ if (n_message_parts == 0)
+ t = strjoina("MESSAGE=", u->id, ": Completed.");
+ else {
+ _cleanup_free_ char *joined;
+
+ message_parts[n_message_parts] = NULL;
+
+ joined = strv_join(message_parts, ", ");
+ if (!joined) {
+ r = log_oom();
+ goto finish;
+ }
+
+ joined[0] = ascii_toupper(joined[0]);
+ t = strjoina("MESSAGE=", u->id, ": ", joined, ".");
+ }
+
+ /* The following four fields we allocate on the stack or are static strings, we hence don't want to free them,
+ * and hence don't increase n_iovec for them */
+ iovec[n_iovec] = IOVEC_MAKE_STRING(t);
+ iovec[n_iovec + 1] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_UNIT_RESOURCES_STR);
+
+ t = strjoina(u->manager->unit_log_field, u->id);
+ iovec[n_iovec + 2] = IOVEC_MAKE_STRING(t);
+
+ t = strjoina(u->manager->invocation_log_field, u->invocation_id_string);
+ iovec[n_iovec + 3] = IOVEC_MAKE_STRING(t);
+
+ log_struct_iovec(LOG_INFO, iovec, n_iovec + 4);
+ r = 0;
+
+finish:
+ for (i = 0; i < n_message_parts; i++)
+ free(message_parts[i]);
+
+ for (i = 0; i < n_iovec; i++)
+ free(iovec[i].iov_base);
+
+ return r;
+
+}
+
+static void unit_update_on_console(Unit *u) {
+ bool b;
+
+ assert(u);
+
+ b = unit_needs_console(u);
+ if (u->on_console == b)
+ return;
+
+ u->on_console = b;
+ if (b)
+ manager_ref_console(u->manager);
+ else
+ manager_unref_console(u->manager);
+}
+
+static void unit_emit_audit_start(Unit *u) {
+ assert(u);
+
+ if (u->type != UNIT_SERVICE)
+ return;
+
+ /* Write audit record if we have just finished starting up */
+ manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_START, true);
+ u->in_audit = true;
+}
+
+static void unit_emit_audit_stop(Unit *u, UnitActiveState state) {
+ assert(u);
+
+ if (u->type != UNIT_SERVICE)
+ return;
+
+ if (u->in_audit) {
+ /* Write audit record if we have just finished shutting down */
+ manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_STOP, state == UNIT_INACTIVE);
+ u->in_audit = false;
+ } else {
+ /* Hmm, if there was no start record written write it now, so that we always have a nice pair */
+ manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_START, state == UNIT_INACTIVE);
+
+ if (state == UNIT_INACTIVE)
+ manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_STOP, true);
+ }
+}
+
+static bool unit_process_job(Job *j, UnitActiveState ns, UnitNotifyFlags flags) {
+ bool unexpected = false;
+
+ assert(j);
+
+ if (j->state == JOB_WAITING)
+
+ /* So we reached a different state for this job. Let's see if we can run it now if it failed previously
+ * due to EAGAIN. */
+ job_add_to_run_queue(j);
+
+ /* Let's check whether the unit's new state constitutes a finished job, or maybe contradicts a running job and
+ * hence needs to invalidate jobs. */
+
+ switch (j->type) {
+
+ case JOB_START:
+ case JOB_VERIFY_ACTIVE:
+
+ if (UNIT_IS_ACTIVE_OR_RELOADING(ns))
+ job_finish_and_invalidate(j, JOB_DONE, true, false);
+ else if (j->state == JOB_RUNNING && ns != UNIT_ACTIVATING) {
+ unexpected = true;
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns))
+ job_finish_and_invalidate(j, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false);
+ }
+
+ break;
+
+ case JOB_RELOAD:
+ case JOB_RELOAD_OR_START:
+ case JOB_TRY_RELOAD:
+
+ if (j->state == JOB_RUNNING) {
+ if (ns == UNIT_ACTIVE)
+ job_finish_and_invalidate(j, (flags & UNIT_NOTIFY_RELOAD_FAILURE) ? JOB_FAILED : JOB_DONE, true, false);
+ else if (!IN_SET(ns, UNIT_ACTIVATING, UNIT_RELOADING)) {
+ unexpected = true;
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns))
+ job_finish_and_invalidate(j, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false);
+ }
+ }
+
+ break;
+
+ case JOB_STOP:
+ case JOB_RESTART:
+ case JOB_TRY_RESTART:
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns))
+ job_finish_and_invalidate(j, JOB_DONE, true, false);
+ else if (j->state == JOB_RUNNING && ns != UNIT_DEACTIVATING) {
+ unexpected = true;
+ job_finish_and_invalidate(j, JOB_FAILED, true, false);
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Job type unknown");
+ }
+
+ return unexpected;
+}
+
+void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlags flags) {
+ const char *reason;
+ Manager *m;
+
+ assert(u);
+ assert(os < _UNIT_ACTIVE_STATE_MAX);
+ assert(ns < _UNIT_ACTIVE_STATE_MAX);
+
+ /* Note that this is called for all low-level state changes, even if they might map to the same high-level
+ * UnitActiveState! That means that ns == os is an expected behavior here. For example: if a mount point is
+ * remounted this function will be called too! */
+
+ m = u->manager;
+
+ /* Let's enqueue the change signal early. In case this unit has a job associated we want that this unit is in
+ * the bus queue, so that any job change signal queued will force out the unit change signal first. */
+ unit_add_to_dbus_queue(u);
+
+ /* Update timestamps for state changes */
+ if (!MANAGER_IS_RELOADING(m)) {
+ dual_timestamp_get(&u->state_change_timestamp);
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(os) && !UNIT_IS_INACTIVE_OR_FAILED(ns))
+ u->inactive_exit_timestamp = u->state_change_timestamp;
+ else if (!UNIT_IS_INACTIVE_OR_FAILED(os) && UNIT_IS_INACTIVE_OR_FAILED(ns))
+ u->inactive_enter_timestamp = u->state_change_timestamp;
+
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(os) && UNIT_IS_ACTIVE_OR_RELOADING(ns))
+ u->active_enter_timestamp = u->state_change_timestamp;
+ else if (UNIT_IS_ACTIVE_OR_RELOADING(os) && !UNIT_IS_ACTIVE_OR_RELOADING(ns))
+ u->active_exit_timestamp = u->state_change_timestamp;
+ }
+
+ /* Keep track of failed units */
+ (void) manager_update_failed_units(m, u, ns == UNIT_FAILED);
+
+ /* Make sure the cgroup and state files are always removed when we become inactive */
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns)) {
+ unit_prune_cgroup(u);
+ unit_unlink_state_files(u);
+ }
+
+ unit_update_on_console(u);
+
+ if (!MANAGER_IS_RELOADING(m)) {
+ bool unexpected;
+
+ /* Let's propagate state changes to the job */
+ if (u->job)
+ unexpected = unit_process_job(u->job, ns, flags);
+ else
+ unexpected = true;
+
+ /* If this state change happened without being requested by a job, then let's retroactively start or
+ * stop dependencies. We skip that step when deserializing, since we don't want to create any
+ * additional jobs just because something is already activated. */
+
+ if (unexpected) {
+ if (UNIT_IS_INACTIVE_OR_FAILED(os) && UNIT_IS_ACTIVE_OR_ACTIVATING(ns))
+ retroactively_start_dependencies(u);
+ else if (UNIT_IS_ACTIVE_OR_ACTIVATING(os) && UNIT_IS_INACTIVE_OR_DEACTIVATING(ns))
+ retroactively_stop_dependencies(u);
+ }
+
+ /* stop unneeded units regardless if going down was expected or not */
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns))
+ check_unneeded_dependencies(u);
+
+ if (ns != os && ns == UNIT_FAILED) {
+ log_unit_debug(u, "Unit entered failed state.");
+
+ if (!(flags & UNIT_NOTIFY_WILL_AUTO_RESTART))
+ unit_start_on_failure(u);
+ }
+
+ if (UNIT_IS_ACTIVE_OR_RELOADING(ns) && !UNIT_IS_ACTIVE_OR_RELOADING(os)) {
+ /* This unit just finished starting up */
+
+ unit_emit_audit_start(u);
+ manager_send_unit_plymouth(m, u);
+ }
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns) && !UNIT_IS_INACTIVE_OR_FAILED(os)) {
+ /* This unit just stopped/failed. */
+
+ unit_emit_audit_stop(u, ns);
+ unit_log_resources(u);
+ }
+ }
+
+ manager_recheck_journal(m);
+ manager_recheck_dbus(m);
+
+ unit_trigger_notify(u);
+
+ if (!MANAGER_IS_RELOADING(m)) {
+ /* Maybe we finished startup and are now ready for being stopped because unneeded? */
+ unit_submit_to_stop_when_unneeded_queue(u);
+
+ /* Maybe we finished startup, but something we needed has vanished? Let's die then. (This happens when
+ * something BindsTo= to a Type=oneshot unit, as these units go directly from starting to inactive,
+ * without ever entering started.) */
+ unit_check_binds_to(u);
+
+ if (os != UNIT_FAILED && ns == UNIT_FAILED) {
+ reason = strjoina("unit ", u->id, " failed");
+ (void) emergency_action(m, u->failure_action, 0, u->reboot_arg, unit_failure_action_exit_status(u), reason);
+ } else if (!UNIT_IS_INACTIVE_OR_FAILED(os) && ns == UNIT_INACTIVE) {
+ reason = strjoina("unit ", u->id, " succeeded");
+ (void) emergency_action(m, u->success_action, 0, u->reboot_arg, unit_success_action_exit_status(u), reason);
+ }
+ }
+
+ unit_add_to_gc_queue(u);
+}
+
+int unit_watch_pid(Unit *u, pid_t pid) {
+ int r;
+
+ assert(u);
+ assert(pid_is_valid(pid));
+
+ /* Watch a specific PID */
+
+ r = set_ensure_allocated(&u->pids, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&u->manager->watch_pids, NULL);
+ if (r < 0)
+ return r;
+
+ /* First try, let's add the unit keyed by "pid". */
+ r = hashmap_put(u->manager->watch_pids, PID_TO_PTR(pid), u);
+ if (r == -EEXIST) {
+ Unit **array;
+ bool found = false;
+ size_t n = 0;
+
+ /* OK, the "pid" key is already assigned to a different unit. Let's see if the "-pid" key (which points
+ * to an array of Units rather than just a Unit), lists us already. */
+
+ array = hashmap_get(u->manager->watch_pids, PID_TO_PTR(-pid));
+ if (array)
+ for (; array[n]; n++)
+ if (array[n] == u)
+ found = true;
+
+ if (found) /* Found it already? if so, do nothing */
+ r = 0;
+ else {
+ Unit **new_array;
+
+ /* Allocate a new array */
+ new_array = new(Unit*, n + 2);
+ if (!new_array)
+ return -ENOMEM;
+
+ memcpy_safe(new_array, array, sizeof(Unit*) * n);
+ new_array[n] = u;
+ new_array[n+1] = NULL;
+
+ /* Add or replace the old array */
+ r = hashmap_replace(u->manager->watch_pids, PID_TO_PTR(-pid), new_array);
+ if (r < 0) {
+ free(new_array);
+ return r;
+ }
+
+ free(array);
+ }
+ } else if (r < 0)
+ return r;
+
+ r = set_put(u->pids, PID_TO_PTR(pid));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+void unit_unwatch_pid(Unit *u, pid_t pid) {
+ Unit **array;
+
+ assert(u);
+ assert(pid_is_valid(pid));
+
+ /* First let's drop the unit in case it's keyed as "pid". */
+ (void) hashmap_remove_value(u->manager->watch_pids, PID_TO_PTR(pid), u);
+
+ /* Then, let's also drop the unit, in case it's in the array keyed by -pid */
+ array = hashmap_get(u->manager->watch_pids, PID_TO_PTR(-pid));
+ if (array) {
+ size_t n, m = 0;
+
+ /* Let's iterate through the array, dropping our own entry */
+ for (n = 0; array[n]; n++)
+ if (array[n] != u)
+ array[m++] = array[n];
+ array[m] = NULL;
+
+ if (m == 0) {
+ /* The array is now empty, remove the entire entry */
+ assert(hashmap_remove(u->manager->watch_pids, PID_TO_PTR(-pid)) == array);
+ free(array);
+ }
+ }
+
+ (void) set_remove(u->pids, PID_TO_PTR(pid));
+}
+
+void unit_unwatch_all_pids(Unit *u) {
+ assert(u);
+
+ while (!set_isempty(u->pids))
+ unit_unwatch_pid(u, PTR_TO_PID(set_first(u->pids)));
+
+ u->pids = set_free(u->pids);
+}
+
+static void unit_tidy_watch_pids(Unit *u) {
+ pid_t except1, except2;
+ Iterator i;
+ void *e;
+
+ assert(u);
+
+ /* Cleans dead PIDs from our list */
+
+ except1 = unit_main_pid(u);
+ except2 = unit_control_pid(u);
+
+ SET_FOREACH(e, u->pids, i) {
+ pid_t pid = PTR_TO_PID(e);
+
+ if (pid == except1 || pid == except2)
+ continue;
+
+ if (!pid_is_unwaited(pid))
+ unit_unwatch_pid(u, pid);
+ }
+}
+
+static int on_rewatch_pids_event(sd_event_source *s, void *userdata) {
+ Unit *u = userdata;
+
+ assert(s);
+ assert(u);
+
+ unit_tidy_watch_pids(u);
+ unit_watch_all_pids(u);
+
+ /* If the PID set is empty now, then let's finish this off. */
+ unit_synthesize_cgroup_empty_event(u);
+
+ return 0;
+}
+
+int unit_enqueue_rewatch_pids(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (!u->cgroup_path)
+ return -ENOENT;
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return r;
+ if (r > 0) /* On unified we can use proper notifications */
+ return 0;
+
+ /* Enqueues a low-priority job that will clean up dead PIDs from our list of PIDs to watch and subscribe to new
+ * PIDs that might have appeared. We do this in a delayed job because the work might be quite slow, as it
+ * involves issuing kill(pid, 0) on all processes we watch. */
+
+ if (!u->rewatch_pids_event_source) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
+
+ r = sd_event_add_defer(u->manager->event, &s, on_rewatch_pids_event, u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event source for tidying watched PIDs: %m");
+
+ r = sd_event_source_set_priority(s, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust priority of event source for tidying watched PIDs: m");
+
+ (void) sd_event_source_set_description(s, "tidy-watch-pids");
+
+ u->rewatch_pids_event_source = TAKE_PTR(s);
+ }
+
+ r = sd_event_source_set_enabled(u->rewatch_pids_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable event source for tidying watched PIDs: %m");
+
+ return 0;
+}
+
+void unit_dequeue_rewatch_pids(Unit *u) {
+ int r;
+ assert(u);
+
+ if (!u->rewatch_pids_event_source)
+ return;
+
+ r = sd_event_source_set_enabled(u->rewatch_pids_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ log_warning_errno(r, "Failed to disable event source for tidying watched PIDs, ignoring: %m");
+
+ u->rewatch_pids_event_source = sd_event_source_unref(u->rewatch_pids_event_source);
+}
+
+bool unit_job_is_applicable(Unit *u, JobType j) {
+ assert(u);
+ assert(j >= 0 && j < _JOB_TYPE_MAX);
+
+ switch (j) {
+
+ case JOB_VERIFY_ACTIVE:
+ case JOB_START:
+ case JOB_NOP:
+ /* Note that we don't check unit_can_start() here. That's because .device units and suchlike are not
+ * startable by us but may appear due to external events, and it thus makes sense to permit enqueing
+ * jobs for it. */
+ return true;
+
+ case JOB_STOP:
+ /* Similar as above. However, perpetual units can never be stopped (neither explicitly nor due to
+ * external events), hence it makes no sense to permit enqueing such a request either. */
+ return !u->perpetual;
+
+ case JOB_RESTART:
+ case JOB_TRY_RESTART:
+ return unit_can_stop(u) && unit_can_start(u);
+
+ case JOB_RELOAD:
+ case JOB_TRY_RELOAD:
+ return unit_can_reload(u);
+
+ case JOB_RELOAD_OR_START:
+ return unit_can_reload(u) && unit_can_start(u);
+
+ default:
+ assert_not_reached("Invalid job type");
+ }
+}
+
+static void maybe_warn_about_dependency(Unit *u, const char *other, UnitDependency dependency) {
+ assert(u);
+
+ /* Only warn about some unit types */
+ if (!IN_SET(dependency, UNIT_CONFLICTS, UNIT_CONFLICTED_BY, UNIT_BEFORE, UNIT_AFTER, UNIT_ON_FAILURE, UNIT_TRIGGERS, UNIT_TRIGGERED_BY))
+ return;
+
+ if (streq_ptr(u->id, other))
+ log_unit_warning(u, "Dependency %s=%s dropped", unit_dependency_to_string(dependency), u->id);
+ else
+ log_unit_warning(u, "Dependency %s=%s dropped, merged into %s", unit_dependency_to_string(dependency), strna(other), u->id);
+}
+
+static int unit_add_dependency_hashmap(
+ Hashmap **h,
+ Unit *other,
+ UnitDependencyMask origin_mask,
+ UnitDependencyMask destination_mask) {
+
+ UnitDependencyInfo info;
+ int r;
+
+ assert(h);
+ assert(other);
+ assert(origin_mask < _UNIT_DEPENDENCY_MASK_FULL);
+ assert(destination_mask < _UNIT_DEPENDENCY_MASK_FULL);
+ assert(origin_mask > 0 || destination_mask > 0);
+
+ r = hashmap_ensure_allocated(h, NULL);
+ if (r < 0)
+ return r;
+
+ assert_cc(sizeof(void*) == sizeof(info));
+
+ info.data = hashmap_get(*h, other);
+ if (info.data) {
+ /* Entry already exists. Add in our mask. */
+
+ if (FLAGS_SET(origin_mask, info.origin_mask) &&
+ FLAGS_SET(destination_mask, info.destination_mask))
+ return 0; /* NOP */
+
+ info.origin_mask |= origin_mask;
+ info.destination_mask |= destination_mask;
+
+ r = hashmap_update(*h, other, info.data);
+ } else {
+ info = (UnitDependencyInfo) {
+ .origin_mask = origin_mask,
+ .destination_mask = destination_mask,
+ };
+
+ r = hashmap_put(*h, other, info.data);
+ }
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int unit_add_dependency(
+ Unit *u,
+ UnitDependency d,
+ Unit *other,
+ bool add_reference,
+ UnitDependencyMask mask) {
+
+ static const UnitDependency inverse_table[_UNIT_DEPENDENCY_MAX] = {
+ [UNIT_REQUIRES] = UNIT_REQUIRED_BY,
+ [UNIT_WANTS] = UNIT_WANTED_BY,
+ [UNIT_REQUISITE] = UNIT_REQUISITE_OF,
+ [UNIT_BINDS_TO] = UNIT_BOUND_BY,
+ [UNIT_PART_OF] = UNIT_CONSISTS_OF,
+ [UNIT_REQUIRED_BY] = UNIT_REQUIRES,
+ [UNIT_REQUISITE_OF] = UNIT_REQUISITE,
+ [UNIT_WANTED_BY] = UNIT_WANTS,
+ [UNIT_BOUND_BY] = UNIT_BINDS_TO,
+ [UNIT_CONSISTS_OF] = UNIT_PART_OF,
+ [UNIT_CONFLICTS] = UNIT_CONFLICTED_BY,
+ [UNIT_CONFLICTED_BY] = UNIT_CONFLICTS,
+ [UNIT_BEFORE] = UNIT_AFTER,
+ [UNIT_AFTER] = UNIT_BEFORE,
+ [UNIT_ON_FAILURE] = _UNIT_DEPENDENCY_INVALID,
+ [UNIT_REFERENCES] = UNIT_REFERENCED_BY,
+ [UNIT_REFERENCED_BY] = UNIT_REFERENCES,
+ [UNIT_TRIGGERS] = UNIT_TRIGGERED_BY,
+ [UNIT_TRIGGERED_BY] = UNIT_TRIGGERS,
+ [UNIT_PROPAGATES_RELOAD_TO] = UNIT_RELOAD_PROPAGATED_FROM,
+ [UNIT_RELOAD_PROPAGATED_FROM] = UNIT_PROPAGATES_RELOAD_TO,
+ [UNIT_JOINS_NAMESPACE_OF] = UNIT_JOINS_NAMESPACE_OF,
+ };
+ Unit *original_u = u, *original_other = other;
+ int r;
+
+ assert(u);
+ assert(d >= 0 && d < _UNIT_DEPENDENCY_MAX);
+ assert(other);
+
+ u = unit_follow_merge(u);
+ other = unit_follow_merge(other);
+
+ /* We won't allow dependencies on ourselves. We will not
+ * consider them an error however. */
+ if (u == other) {
+ maybe_warn_about_dependency(original_u, original_other->id, d);
+ return 0;
+ }
+
+ if ((d == UNIT_BEFORE && other->type == UNIT_DEVICE) ||
+ (d == UNIT_AFTER && u->type == UNIT_DEVICE)) {
+ log_unit_warning(u, "Dependency Before=%s ignored (.device units cannot be delayed)", other->id);
+ return 0;
+ }
+
+ r = unit_add_dependency_hashmap(u->dependencies + d, other, mask, 0);
+ if (r < 0)
+ return r;
+
+ if (inverse_table[d] != _UNIT_DEPENDENCY_INVALID && inverse_table[d] != d) {
+ r = unit_add_dependency_hashmap(other->dependencies + inverse_table[d], u, 0, mask);
+ if (r < 0)
+ return r;
+ }
+
+ if (add_reference) {
+ r = unit_add_dependency_hashmap(u->dependencies + UNIT_REFERENCES, other, mask, 0);
+ if (r < 0)
+ return r;
+
+ r = unit_add_dependency_hashmap(other->dependencies + UNIT_REFERENCED_BY, u, 0, mask);
+ if (r < 0)
+ return r;
+ }
+
+ unit_add_to_dbus_queue(u);
+ return 0;
+}
+
+int unit_add_two_dependencies(Unit *u, UnitDependency d, UnitDependency e, Unit *other, bool add_reference, UnitDependencyMask mask) {
+ int r;
+
+ assert(u);
+
+ r = unit_add_dependency(u, d, other, add_reference, mask);
+ if (r < 0)
+ return r;
+
+ return unit_add_dependency(u, e, other, add_reference, mask);
+}
+
+static int resolve_template(Unit *u, const char *name, char **buf, const char **ret) {
+ int r;
+
+ assert(u);
+ assert(name);
+ assert(buf);
+ assert(ret);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
+ *buf = NULL;
+ *ret = name;
+ return 0;
+ }
+
+ if (u->instance)
+ r = unit_name_replace_instance(name, u->instance, buf);
+ else {
+ _cleanup_free_ char *i = NULL;
+
+ r = unit_name_to_prefix(u->id, &i);
+ if (r < 0)
+ return r;
+
+ r = unit_name_replace_instance(name, i, buf);
+ }
+ if (r < 0)
+ return r;
+
+ *ret = *buf;
+ return 0;
+}
+
+int unit_add_dependency_by_name(Unit *u, UnitDependency d, const char *name, bool add_reference, UnitDependencyMask mask) {
+ _cleanup_free_ char *buf = NULL;
+ Unit *other;
+ int r;
+
+ assert(u);
+ assert(name);
+
+ r = resolve_template(u, name, &buf, &name);
+ if (r < 0)
+ return r;
+
+ r = manager_load_unit(u->manager, name, NULL, NULL, &other);
+ if (r < 0)
+ return r;
+
+ return unit_add_dependency(u, d, other, add_reference, mask);
+}
+
+int unit_add_two_dependencies_by_name(Unit *u, UnitDependency d, UnitDependency e, const char *name, bool add_reference, UnitDependencyMask mask) {
+ _cleanup_free_ char *buf = NULL;
+ Unit *other;
+ int r;
+
+ assert(u);
+ assert(name);
+
+ r = resolve_template(u, name, &buf, &name);
+ if (r < 0)
+ return r;
+
+ r = manager_load_unit(u->manager, name, NULL, NULL, &other);
+ if (r < 0)
+ return r;
+
+ return unit_add_two_dependencies(u, d, e, other, add_reference, mask);
+}
+
+int set_unit_path(const char *p) {
+ /* This is mostly for debug purposes */
+ if (setenv("SYSTEMD_UNIT_PATH", p, 1) < 0)
+ return -errno;
+
+ return 0;
+}
+
+char *unit_dbus_path(Unit *u) {
+ assert(u);
+
+ if (!u->id)
+ return NULL;
+
+ return unit_dbus_path_from_name(u->id);
+}
+
+char *unit_dbus_path_invocation_id(Unit *u) {
+ assert(u);
+
+ if (sd_id128_is_null(u->invocation_id))
+ return NULL;
+
+ return unit_dbus_path_from_name(u->invocation_id_string);
+}
+
+int unit_set_slice(Unit *u, Unit *slice) {
+ assert(u);
+ assert(slice);
+
+ /* Sets the unit slice if it has not been set before. Is extra
+ * careful, to only allow this for units that actually have a
+ * cgroup context. Also, we don't allow to set this for slices
+ * (since the parent slice is derived from the name). Make
+ * sure the unit we set is actually a slice. */
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return -EOPNOTSUPP;
+
+ if (u->type == UNIT_SLICE)
+ return -EINVAL;
+
+ if (unit_active_state(u) != UNIT_INACTIVE)
+ return -EBUSY;
+
+ if (slice->type != UNIT_SLICE)
+ return -EINVAL;
+
+ if (unit_has_name(u, SPECIAL_INIT_SCOPE) &&
+ !unit_has_name(slice, SPECIAL_ROOT_SLICE))
+ return -EPERM;
+
+ if (UNIT_DEREF(u->slice) == slice)
+ return 0;
+
+ /* Disallow slice changes if @u is already bound to cgroups */
+ if (UNIT_ISSET(u->slice) && u->cgroup_realized)
+ return -EBUSY;
+
+ unit_ref_set(&u->slice, u, slice);
+ return 1;
+}
+
+int unit_set_default_slice(Unit *u) {
+ const char *slice_name;
+ Unit *slice;
+ int r;
+
+ assert(u);
+
+ if (UNIT_ISSET(u->slice))
+ return 0;
+
+ if (u->instance) {
+ _cleanup_free_ char *prefix = NULL, *escaped = NULL;
+
+ /* Implicitly place all instantiated units in their
+ * own per-template slice */
+
+ r = unit_name_to_prefix(u->id, &prefix);
+ if (r < 0)
+ return r;
+
+ /* The prefix is already escaped, but it might include
+ * "-" which has a special meaning for slice units,
+ * hence escape it here extra. */
+ escaped = unit_name_escape(prefix);
+ if (!escaped)
+ return -ENOMEM;
+
+ if (MANAGER_IS_SYSTEM(u->manager))
+ slice_name = strjoina("system-", escaped, ".slice");
+ else
+ slice_name = strjoina(escaped, ".slice");
+ } else
+ slice_name =
+ MANAGER_IS_SYSTEM(u->manager) && !unit_has_name(u, SPECIAL_INIT_SCOPE)
+ ? SPECIAL_SYSTEM_SLICE
+ : SPECIAL_ROOT_SLICE;
+
+ r = manager_load_unit(u->manager, slice_name, NULL, NULL, &slice);
+ if (r < 0)
+ return r;
+
+ return unit_set_slice(u, slice);
+}
+
+const char *unit_slice_name(Unit *u) {
+ assert(u);
+
+ if (!UNIT_ISSET(u->slice))
+ return NULL;
+
+ return UNIT_DEREF(u->slice)->id;
+}
+
+int unit_load_related_unit(Unit *u, const char *type, Unit **_found) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(u);
+ assert(type);
+ assert(_found);
+
+ r = unit_name_change_suffix(u->id, type, &t);
+ if (r < 0)
+ return r;
+ if (unit_has_name(u, t))
+ return -EINVAL;
+
+ r = manager_load_unit(u->manager, t, NULL, NULL, _found);
+ assert(r < 0 || *_found != u);
+ return r;
+}
+
+static int signal_name_owner_changed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *name, *old_owner, *new_owner;
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = sd_bus_message_read(message, "sss", &name, &old_owner, &new_owner);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ old_owner = empty_to_null(old_owner);
+ new_owner = empty_to_null(new_owner);
+
+ if (UNIT_VTABLE(u)->bus_name_owner_change)
+ UNIT_VTABLE(u)->bus_name_owner_change(u, name, old_owner, new_owner);
+
+ return 0;
+}
+
+int unit_install_bus_match(Unit *u, sd_bus *bus, const char *name) {
+ const char *match;
+
+ assert(u);
+ assert(bus);
+ assert(name);
+
+ if (u->match_bus_slot)
+ return -EBUSY;
+
+ match = strjoina("type='signal',"
+ "sender='org.freedesktop.DBus',"
+ "path='/org/freedesktop/DBus',"
+ "interface='org.freedesktop.DBus',"
+ "member='NameOwnerChanged',"
+ "arg0='", name, "'");
+
+ return sd_bus_add_match_async(bus, &u->match_bus_slot, match, signal_name_owner_changed, NULL, u);
+}
+
+int unit_watch_bus_name(Unit *u, const char *name) {
+ int r;
+
+ assert(u);
+ assert(name);
+
+ /* Watch a specific name on the bus. We only support one unit
+ * watching each name for now. */
+
+ if (u->manager->api_bus) {
+ /* If the bus is already available, install the match directly.
+ * Otherwise, just put the name in the list. bus_setup_api() will take care later. */
+ r = unit_install_bus_match(u, u->manager->api_bus, name);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to subscribe to NameOwnerChanged signal for '%s': %m", name);
+ }
+
+ r = hashmap_put(u->manager->watch_bus, name, u);
+ if (r < 0) {
+ u->match_bus_slot = sd_bus_slot_unref(u->match_bus_slot);
+ return log_warning_errno(r, "Failed to put bus name to hashmap: %m");
+ }
+
+ return 0;
+}
+
+void unit_unwatch_bus_name(Unit *u, const char *name) {
+ assert(u);
+ assert(name);
+
+ (void) hashmap_remove_value(u->manager->watch_bus, name, u);
+ u->match_bus_slot = sd_bus_slot_unref(u->match_bus_slot);
+}
+
+bool unit_can_serialize(Unit *u) {
+ assert(u);
+
+ return UNIT_VTABLE(u)->serialize && UNIT_VTABLE(u)->deserialize_item;
+}
+
+static int serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert(f);
+ assert(key);
+
+ if (mask == 0)
+ return 0;
+
+ r = cg_mask_to_string(mask, &s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format cgroup mask: %m");
+
+ return serialize_item(f, key, s);
+}
+
+static const char *ip_accounting_metric_field[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IP_INGRESS_BYTES] = "ip-accounting-ingress-bytes",
+ [CGROUP_IP_INGRESS_PACKETS] = "ip-accounting-ingress-packets",
+ [CGROUP_IP_EGRESS_BYTES] = "ip-accounting-egress-bytes",
+ [CGROUP_IP_EGRESS_PACKETS] = "ip-accounting-egress-packets",
+};
+
+int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
+ CGroupIPAccountingMetric m;
+ int r;
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ if (unit_can_serialize(u)) {
+ r = UNIT_VTABLE(u)->serialize(u, f, fds);
+ if (r < 0)
+ return r;
+ }
+
+ (void) serialize_dual_timestamp(f, "state-change-timestamp", &u->state_change_timestamp);
+
+ (void) serialize_dual_timestamp(f, "inactive-exit-timestamp", &u->inactive_exit_timestamp);
+ (void) serialize_dual_timestamp(f, "active-enter-timestamp", &u->active_enter_timestamp);
+ (void) serialize_dual_timestamp(f, "active-exit-timestamp", &u->active_exit_timestamp);
+ (void) serialize_dual_timestamp(f, "inactive-enter-timestamp", &u->inactive_enter_timestamp);
+
+ (void) serialize_dual_timestamp(f, "condition-timestamp", &u->condition_timestamp);
+ (void) serialize_dual_timestamp(f, "assert-timestamp", &u->assert_timestamp);
+
+ if (dual_timestamp_is_set(&u->condition_timestamp))
+ (void) serialize_bool(f, "condition-result", u->condition_result);
+
+ if (dual_timestamp_is_set(&u->assert_timestamp))
+ (void) serialize_bool(f, "assert-result", u->assert_result);
+
+ (void) serialize_bool(f, "transient", u->transient);
+ (void) serialize_bool(f, "in-audit", u->in_audit);
+
+ (void) serialize_bool(f, "exported-invocation-id", u->exported_invocation_id);
+ (void) serialize_bool(f, "exported-log-level-max", u->exported_log_level_max);
+ (void) serialize_bool(f, "exported-log-extra-fields", u->exported_log_extra_fields);
+ (void) serialize_bool(f, "exported-log-rate-limit-interval", u->exported_log_rate_limit_interval);
+ (void) serialize_bool(f, "exported-log-rate-limit-burst", u->exported_log_rate_limit_burst);
+
+ (void) serialize_item_format(f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
+ if (u->cpu_usage_last != NSEC_INFINITY)
+ (void) serialize_item_format(f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last);
+
+ if (u->cgroup_path)
+ (void) serialize_item(f, "cgroup", u->cgroup_path);
+
+ (void) serialize_bool(f, "cgroup-realized", u->cgroup_realized);
+ (void) serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
+ (void) serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
+ (void) serialize_cgroup_mask(f, "cgroup-invalidated-mask", u->cgroup_invalidated_mask);
+
+ if (uid_is_valid(u->ref_uid))
+ (void) serialize_item_format(f, "ref-uid", UID_FMT, u->ref_uid);
+ if (gid_is_valid(u->ref_gid))
+ (void) serialize_item_format(f, "ref-gid", GID_FMT, u->ref_gid);
+
+ if (!sd_id128_is_null(u->invocation_id))
+ (void) serialize_item_format(f, "invocation-id", SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id));
+
+ bus_track_serialize(u->bus_track, f, "ref");
+
+ for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+ uint64_t v;
+
+ r = unit_get_ip_accounting(u, m, &v);
+ if (r >= 0)
+ (void) serialize_item_format(f, ip_accounting_metric_field[m], "%" PRIu64, v);
+ }
+
+ if (serialize_jobs) {
+ if (u->job) {
+ fputs("job\n", f);
+ job_serialize(u->job, f);
+ }
+
+ if (u->nop_job) {
+ fputs("job\n", f);
+ job_serialize(u->nop_job, f);
+ }
+ }
+
+ /* End marker */
+ fputc('\n', f);
+ return 0;
+}
+
+static int unit_deserialize_job(Unit *u, FILE *f) {
+ _cleanup_(job_freep) Job *j = NULL;
+ int r;
+
+ assert(u);
+ assert(f);
+
+ j = job_new_raw(u);
+ if (!j)
+ return log_oom();
+
+ r = job_deserialize(j, f);
+ if (r < 0)
+ return r;
+
+ r = job_install_deserialized(j);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(j);
+ return 0;
+}
+
+int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
+ int r;
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ CGroupIPAccountingMetric m;
+ char *l, *v;
+ size_t k;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0) /* eof */
+ break;
+
+ l = strstrip(line);
+ if (isempty(l)) /* End marker */
+ break;
+
+ k = strcspn(l, "=");
+
+ if (l[k] == '=') {
+ l[k] = 0;
+ v = l+k+1;
+ } else
+ v = l+k;
+
+ if (streq(l, "job")) {
+ if (v[0] == '\0') {
+ /* New-style serialized job */
+ r = unit_deserialize_job(u, f);
+ if (r < 0)
+ return r;
+ } else /* Legacy for pre-44 */
+ log_unit_warning(u, "Update from too old systemd versions are unsupported, cannot deserialize job: %s", v);
+ continue;
+ } else if (streq(l, "state-change-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->state_change_timestamp);
+ continue;
+ } else if (streq(l, "inactive-exit-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->inactive_exit_timestamp);
+ continue;
+ } else if (streq(l, "active-enter-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->active_enter_timestamp);
+ continue;
+ } else if (streq(l, "active-exit-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->active_exit_timestamp);
+ continue;
+ } else if (streq(l, "inactive-enter-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->inactive_enter_timestamp);
+ continue;
+ } else if (streq(l, "condition-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->condition_timestamp);
+ continue;
+ } else if (streq(l, "assert-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->assert_timestamp);
+ continue;
+ } else if (streq(l, "condition-result")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse condition result value %s, ignoring.", v);
+ else
+ u->condition_result = r;
+
+ continue;
+
+ } else if (streq(l, "assert-result")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse assert result value %s, ignoring.", v);
+ else
+ u->assert_result = r;
+
+ continue;
+
+ } else if (streq(l, "transient")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse transient bool %s, ignoring.", v);
+ else
+ u->transient = r;
+
+ continue;
+
+ } else if (streq(l, "in-audit")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse in-audit bool %s, ignoring.", v);
+ else
+ u->in_audit = r;
+
+ continue;
+
+ } else if (streq(l, "exported-invocation-id")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse exported invocation ID bool %s, ignoring.", v);
+ else
+ u->exported_invocation_id = r;
+
+ continue;
+
+ } else if (streq(l, "exported-log-level-max")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse exported log level max bool %s, ignoring.", v);
+ else
+ u->exported_log_level_max = r;
+
+ continue;
+
+ } else if (streq(l, "exported-log-extra-fields")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse exported log extra fields bool %s, ignoring.", v);
+ else
+ u->exported_log_extra_fields = r;
+
+ continue;
+
+ } else if (streq(l, "exported-log-rate-limit-interval")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse exported log rate limit interval %s, ignoring.", v);
+ else
+ u->exported_log_rate_limit_interval = r;
+
+ continue;
+
+ } else if (streq(l, "exported-log-rate-limit-burst")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse exported log rate limit burst %s, ignoring.", v);
+ else
+ u->exported_log_rate_limit_burst = r;
+
+ continue;
+
+ } else if (STR_IN_SET(l, "cpu-usage-base", "cpuacct-usage-base")) {
+
+ r = safe_atou64(v, &u->cpu_usage_base);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse CPU usage base %s, ignoring.", v);
+
+ continue;
+
+ } else if (streq(l, "cpu-usage-last")) {
+
+ r = safe_atou64(v, &u->cpu_usage_last);
+ if (r < 0)
+ log_unit_debug(u, "Failed to read CPU usage last %s, ignoring.", v);
+
+ continue;
+
+ } else if (streq(l, "cgroup")) {
+
+ r = unit_set_cgroup_path(u, v);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to set cgroup path %s, ignoring: %m", v);
+
+ (void) unit_watch_cgroup(u);
+
+ continue;
+ } else if (streq(l, "cgroup-realized")) {
+ int b;
+
+ b = parse_boolean(v);
+ if (b < 0)
+ log_unit_debug(u, "Failed to parse cgroup-realized bool %s, ignoring.", v);
+ else
+ u->cgroup_realized = b;
+
+ continue;
+
+ } else if (streq(l, "cgroup-realized-mask")) {
+
+ r = cg_mask_from_string(v, &u->cgroup_realized_mask);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse cgroup-realized-mask %s, ignoring.", v);
+ continue;
+
+ } else if (streq(l, "cgroup-enabled-mask")) {
+
+ r = cg_mask_from_string(v, &u->cgroup_enabled_mask);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v);
+ continue;
+
+ } else if (streq(l, "cgroup-invalidated-mask")) {
+
+ r = cg_mask_from_string(v, &u->cgroup_invalidated_mask);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse cgroup-invalidated-mask %s, ignoring.", v);
+ continue;
+
+ } else if (streq(l, "ref-uid")) {
+ uid_t uid;
+
+ r = parse_uid(v, &uid);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse referenced UID %s, ignoring.", v);
+ else
+ unit_ref_uid_gid(u, uid, GID_INVALID);
+
+ continue;
+
+ } else if (streq(l, "ref-gid")) {
+ gid_t gid;
+
+ r = parse_gid(v, &gid);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse referenced GID %s, ignoring.", v);
+ else
+ unit_ref_uid_gid(u, UID_INVALID, gid);
+
+ continue;
+
+ } else if (streq(l, "ref")) {
+
+ r = strv_extend(&u->deserialized_refs, v);
+ if (r < 0)
+ return log_oom();
+
+ continue;
+ } else if (streq(l, "invocation-id")) {
+ sd_id128_t id;
+
+ r = sd_id128_from_string(v, &id);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse invocation id %s, ignoring.", v);
+ else {
+ r = unit_set_invocation_id(u, id);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to set invocation ID for unit: %m");
+ }
+
+ continue;
+ }
+
+ /* Check if this is an IP accounting metric serialization field */
+ for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++)
+ if (streq(l, ip_accounting_metric_field[m]))
+ break;
+ if (m < _CGROUP_IP_ACCOUNTING_METRIC_MAX) {
+ uint64_t c;
+
+ r = safe_atou64(v, &c);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse IP accounting value %s, ignoring.", v);
+ else
+ u->ip_accounting_extra[m] = c;
+ continue;
+ }
+
+ if (unit_can_serialize(u)) {
+ r = exec_runtime_deserialize_compat(u, l, v, fds);
+ if (r < 0) {
+ log_unit_warning(u, "Failed to deserialize runtime parameter '%s', ignoring.", l);
+ continue;
+ }
+
+ /* Returns positive if key was handled by the call */
+ if (r > 0)
+ continue;
+
+ r = UNIT_VTABLE(u)->deserialize_item(u, l, v, fds);
+ if (r < 0)
+ log_unit_warning(u, "Failed to deserialize unit parameter '%s', ignoring.", l);
+ }
+ }
+
+ /* Versions before 228 did not carry a state change timestamp. In this case, take the current time. This is
+ * useful, so that timeouts based on this timestamp don't trigger too early, and is in-line with the logic from
+ * before 228 where the base for timeouts was not persistent across reboots. */
+
+ if (!dual_timestamp_is_set(&u->state_change_timestamp))
+ dual_timestamp_get(&u->state_change_timestamp);
+
+ /* Let's make sure that everything that is deserialized also gets any potential new cgroup settings applied
+ * after we are done. For that we invalidate anything already realized, so that we can realize it again. */
+ unit_invalidate_cgroup(u, _CGROUP_MASK_ALL);
+ unit_invalidate_cgroup_bpf(u);
+
+ return 0;
+}
+
+int unit_deserialize_skip(FILE *f) {
+ int r;
+ assert(f);
+
+ /* Skip serialized data for this unit. We don't know what it is. */
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0)
+ return 0;
+
+ l = strstrip(line);
+
+ /* End marker */
+ if (isempty(l))
+ return 1;
+ }
+}
+
+int unit_add_node_dependency(Unit *u, const char *what, bool wants, UnitDependency dep, UnitDependencyMask mask) {
+ Unit *device;
+ _cleanup_free_ char *e = NULL;
+ int r;
+
+ assert(u);
+
+ /* Adds in links to the device node that this unit is based on */
+ if (isempty(what))
+ return 0;
+
+ if (!is_device_path(what))
+ return 0;
+
+ /* When device units aren't supported (such as in a
+ * container), don't create dependencies on them. */
+ if (!unit_type_supported(UNIT_DEVICE))
+ return 0;
+
+ r = unit_name_from_path(what, ".device", &e);
+ if (r < 0)
+ return r;
+
+ r = manager_load_unit(u->manager, e, NULL, NULL, &device);
+ if (r < 0)
+ return r;
+
+ if (dep == UNIT_REQUIRES && device_shall_be_bound_by(device, u))
+ dep = UNIT_BINDS_TO;
+
+ r = unit_add_two_dependencies(u, UNIT_AFTER,
+ MANAGER_IS_SYSTEM(u->manager) ? dep : UNIT_WANTS,
+ device, true, mask);
+ if (r < 0)
+ return r;
+
+ if (wants) {
+ r = unit_add_dependency(device, UNIT_WANTS, u, false, mask);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int unit_coldplug(Unit *u) {
+ int r = 0, q;
+ char **i;
+
+ assert(u);
+
+ /* Make sure we don't enter a loop, when coldplugging recursively. */
+ if (u->coldplugged)
+ return 0;
+
+ u->coldplugged = true;
+
+ STRV_FOREACH(i, u->deserialized_refs) {
+ q = bus_unit_track_add_name(u, *i);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+ u->deserialized_refs = strv_free(u->deserialized_refs);
+
+ if (UNIT_VTABLE(u)->coldplug) {
+ q = UNIT_VTABLE(u)->coldplug(u);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ if (u->job) {
+ q = job_coldplug(u->job);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+void unit_catchup(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->catchup)
+ UNIT_VTABLE(u)->catchup(u);
+}
+
+static bool fragment_mtime_newer(const char *path, usec_t mtime, bool path_masked) {
+ struct stat st;
+
+ if (!path)
+ return false;
+
+ /* If the source is some virtual kernel file system, then we assume we watch it anyway, and hence pretend we
+ * are never out-of-date. */
+ if (PATH_STARTSWITH_SET(path, "/proc", "/sys"))
+ return false;
+
+ if (stat(path, &st) < 0)
+ /* What, cannot access this anymore? */
+ return true;
+
+ if (path_masked)
+ /* For masked files check if they are still so */
+ return !null_or_empty(&st);
+ else
+ /* For non-empty files check the mtime */
+ return timespec_load(&st.st_mtim) > mtime;
+
+ return false;
+}
+
+bool unit_need_daemon_reload(Unit *u) {
+ _cleanup_strv_free_ char **t = NULL;
+ char **path;
+
+ assert(u);
+
+ /* For unit files, we allow masking… */
+ if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime,
+ u->load_state == UNIT_MASKED))
+ return true;
+
+ /* Source paths should not be masked… */
+ if (fragment_mtime_newer(u->source_path, u->source_mtime, false))
+ return true;
+
+ if (u->load_state == UNIT_LOADED)
+ (void) unit_find_dropin_paths(u, &t);
+ if (!strv_equal(u->dropin_paths, t))
+ return true;
+
+ /* … any drop-ins that are masked are simply omitted from the list. */
+ STRV_FOREACH(path, u->dropin_paths)
+ if (fragment_mtime_newer(*path, u->dropin_mtime, false))
+ return true;
+
+ return false;
+}
+
+void unit_reset_failed(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->reset_failed)
+ UNIT_VTABLE(u)->reset_failed(u);
+
+ RATELIMIT_RESET(u->start_limit);
+ u->start_limit_hit = false;
+}
+
+Unit *unit_following(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->following)
+ return UNIT_VTABLE(u)->following(u);
+
+ return NULL;
+}
+
+bool unit_stop_pending(Unit *u) {
+ assert(u);
+
+ /* This call does check the current state of the unit. It's
+ * hence useful to be called from state change calls of the
+ * unit itself, where the state isn't updated yet. This is
+ * different from unit_inactive_or_pending() which checks both
+ * the current state and for a queued job. */
+
+ return u->job && u->job->type == JOB_STOP;
+}
+
+bool unit_inactive_or_pending(Unit *u) {
+ assert(u);
+
+ /* Returns true if the unit is inactive or going down */
+
+ if (UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(u)))
+ return true;
+
+ if (unit_stop_pending(u))
+ return true;
+
+ return false;
+}
+
+bool unit_active_or_pending(Unit *u) {
+ assert(u);
+
+ /* Returns true if the unit is active or going up */
+
+ if (UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u)))
+ return true;
+
+ if (u->job &&
+ IN_SET(u->job->type, JOB_START, JOB_RELOAD_OR_START, JOB_RESTART))
+ return true;
+
+ return false;
+}
+
+bool unit_will_restart(Unit *u) {
+ assert(u);
+
+ if (!UNIT_VTABLE(u)->will_restart)
+ return false;
+
+ return UNIT_VTABLE(u)->will_restart(u);
+}
+
+int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error) {
+ assert(u);
+ assert(w >= 0 && w < _KILL_WHO_MAX);
+ assert(SIGNAL_VALID(signo));
+
+ if (!UNIT_VTABLE(u)->kill)
+ return -EOPNOTSUPP;
+
+ return UNIT_VTABLE(u)->kill(u, w, signo, error);
+}
+
+static Set *unit_pid_set(pid_t main_pid, pid_t control_pid) {
+ _cleanup_set_free_ Set *pid_set = NULL;
+ int r;
+
+ pid_set = set_new(NULL);
+ if (!pid_set)
+ return NULL;
+
+ /* Exclude the main/control pids from being killed via the cgroup */
+ if (main_pid > 0) {
+ r = set_put(pid_set, PID_TO_PTR(main_pid));
+ if (r < 0)
+ return NULL;
+ }
+
+ if (control_pid > 0) {
+ r = set_put(pid_set, PID_TO_PTR(control_pid));
+ if (r < 0)
+ return NULL;
+ }
+
+ return TAKE_PTR(pid_set);
+}
+
+int unit_kill_common(
+ Unit *u,
+ KillWho who,
+ int signo,
+ pid_t main_pid,
+ pid_t control_pid,
+ sd_bus_error *error) {
+
+ int r = 0;
+ bool killed = false;
+
+ if (IN_SET(who, KILL_MAIN, KILL_MAIN_FAIL)) {
+ if (main_pid < 0)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_PROCESS, "%s units have no main processes", unit_type_to_string(u->type));
+ else if (main_pid == 0)
+ return sd_bus_error_set_const(error, BUS_ERROR_NO_SUCH_PROCESS, "No main process to kill");
+ }
+
+ if (IN_SET(who, KILL_CONTROL, KILL_CONTROL_FAIL)) {
+ if (control_pid < 0)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_PROCESS, "%s units have no control processes", unit_type_to_string(u->type));
+ else if (control_pid == 0)
+ return sd_bus_error_set_const(error, BUS_ERROR_NO_SUCH_PROCESS, "No control process to kill");
+ }
+
+ if (IN_SET(who, KILL_CONTROL, KILL_CONTROL_FAIL, KILL_ALL, KILL_ALL_FAIL))
+ if (control_pid > 0) {
+ if (kill(control_pid, signo) < 0)
+ r = -errno;
+ else
+ killed = true;
+ }
+
+ if (IN_SET(who, KILL_MAIN, KILL_MAIN_FAIL, KILL_ALL, KILL_ALL_FAIL))
+ if (main_pid > 0) {
+ if (kill(main_pid, signo) < 0)
+ r = -errno;
+ else
+ killed = true;
+ }
+
+ if (IN_SET(who, KILL_ALL, KILL_ALL_FAIL) && u->cgroup_path) {
+ _cleanup_set_free_ Set *pid_set = NULL;
+ int q;
+
+ /* Exclude the main/control pids from being killed via the cgroup */
+ pid_set = unit_pid_set(main_pid, control_pid);
+ if (!pid_set)
+ return -ENOMEM;
+
+ q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, 0, pid_set, NULL, NULL);
+ if (q < 0 && !IN_SET(q, -EAGAIN, -ESRCH, -ENOENT))
+ r = q;
+ else
+ killed = true;
+ }
+
+ if (r == 0 && !killed && IN_SET(who, KILL_ALL_FAIL, KILL_CONTROL_FAIL))
+ return -ESRCH;
+
+ return r;
+}
+
+int unit_following_set(Unit *u, Set **s) {
+ assert(u);
+ assert(s);
+
+ if (UNIT_VTABLE(u)->following_set)
+ return UNIT_VTABLE(u)->following_set(u, s);
+
+ *s = NULL;
+ return 0;
+}
+
+UnitFileState unit_get_unit_file_state(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (u->unit_file_state < 0 && u->fragment_path) {
+ r = unit_file_get_state(
+ u->manager->unit_file_scope,
+ NULL,
+ u->id,
+ &u->unit_file_state);
+ if (r < 0)
+ u->unit_file_state = UNIT_FILE_BAD;
+ }
+
+ return u->unit_file_state;
+}
+
+int unit_get_unit_file_preset(Unit *u) {
+ assert(u);
+
+ if (u->unit_file_preset < 0 && u->fragment_path)
+ u->unit_file_preset = unit_file_query_preset(
+ u->manager->unit_file_scope,
+ NULL,
+ basename(u->fragment_path));
+
+ return u->unit_file_preset;
+}
+
+Unit* unit_ref_set(UnitRef *ref, Unit *source, Unit *target) {
+ assert(ref);
+ assert(source);
+ assert(target);
+
+ if (ref->target)
+ unit_ref_unset(ref);
+
+ ref->source = source;
+ ref->target = target;
+ LIST_PREPEND(refs_by_target, target->refs_by_target, ref);
+ return target;
+}
+
+void unit_ref_unset(UnitRef *ref) {
+ assert(ref);
+
+ if (!ref->target)
+ return;
+
+ /* We are about to drop a reference to the unit, make sure the garbage collection has a look at it as it might
+ * be unreferenced now. */
+ unit_add_to_gc_queue(ref->target);
+
+ LIST_REMOVE(refs_by_target, ref->target->refs_by_target, ref);
+ ref->source = ref->target = NULL;
+}
+
+static int user_from_unit_name(Unit *u, char **ret) {
+
+ static const uint8_t hash_key[] = {
+ 0x58, 0x1a, 0xaf, 0xe6, 0x28, 0x58, 0x4e, 0x96,
+ 0xb4, 0x4e, 0xf5, 0x3b, 0x8c, 0x92, 0x07, 0xec
+ };
+
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ r = unit_name_to_prefix(u->id, &n);
+ if (r < 0)
+ return r;
+
+ if (valid_user_group_name(n)) {
+ *ret = TAKE_PTR(n);
+ return 0;
+ }
+
+ /* If we can't use the unit name as a user name, then let's hash it and use that */
+ if (asprintf(ret, "_du%016" PRIx64, siphash24(n, strlen(n), hash_key)) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int unit_patch_contexts(Unit *u) {
+ CGroupContext *cc;
+ ExecContext *ec;
+ unsigned i;
+ int r;
+
+ assert(u);
+
+ /* Patch in the manager defaults into the exec and cgroup
+ * contexts, _after_ the rest of the settings have been
+ * initialized */
+
+ ec = unit_get_exec_context(u);
+ if (ec) {
+ /* This only copies in the ones that need memory */
+ for (i = 0; i < _RLIMIT_MAX; i++)
+ if (u->manager->rlimit[i] && !ec->rlimit[i]) {
+ ec->rlimit[i] = newdup(struct rlimit, u->manager->rlimit[i], 1);
+ if (!ec->rlimit[i])
+ return -ENOMEM;
+ }
+
+ if (MANAGER_IS_USER(u->manager) &&
+ !ec->working_directory) {
+
+ r = get_home_dir(&ec->working_directory);
+ if (r < 0)
+ return r;
+
+ /* Allow user services to run, even if the
+ * home directory is missing */
+ ec->working_directory_missing_ok = true;
+ }
+
+ if (ec->private_devices)
+ ec->capability_bounding_set &= ~((UINT64_C(1) << CAP_MKNOD) | (UINT64_C(1) << CAP_SYS_RAWIO));
+
+ if (ec->protect_kernel_modules)
+ ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE);
+
+ if (ec->dynamic_user) {
+ if (!ec->user) {
+ r = user_from_unit_name(u, &ec->user);
+ if (r < 0)
+ return r;
+ }
+
+ if (!ec->group) {
+ ec->group = strdup(ec->user);
+ if (!ec->group)
+ return -ENOMEM;
+ }
+
+ /* If the dynamic user option is on, let's make sure that the unit can't leave its UID/GID
+ * around in the file system or on IPC objects. Hence enforce a strict sandbox. */
+
+ ec->private_tmp = true;
+ ec->remove_ipc = true;
+ ec->protect_system = PROTECT_SYSTEM_STRICT;
+ if (ec->protect_home == PROTECT_HOME_NO)
+ ec->protect_home = PROTECT_HOME_READ_ONLY;
+ }
+ }
+
+ cc = unit_get_cgroup_context(u);
+ if (cc && ec) {
+
+ if (ec->private_devices &&
+ cc->device_policy == CGROUP_AUTO)
+ cc->device_policy = CGROUP_CLOSED;
+
+ if (ec->root_image &&
+ (cc->device_policy != CGROUP_AUTO || cc->device_allow)) {
+
+ /* When RootImage= is specified, the following devices are touched. */
+ r = cgroup_add_device_allow(cc, "/dev/loop-control", "rw");
+ if (r < 0)
+ return r;
+
+ r = cgroup_add_device_allow(cc, "block-loop", "rwm");
+ if (r < 0)
+ return r;
+
+ r = cgroup_add_device_allow(cc, "block-blkext", "rwm");
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+ExecContext *unit_get_exec_context(Unit *u) {
+ size_t offset;
+ assert(u);
+
+ if (u->type < 0)
+ return NULL;
+
+ offset = UNIT_VTABLE(u)->exec_context_offset;
+ if (offset <= 0)
+ return NULL;
+
+ return (ExecContext*) ((uint8_t*) u + offset);
+}
+
+KillContext *unit_get_kill_context(Unit *u) {
+ size_t offset;
+ assert(u);
+
+ if (u->type < 0)
+ return NULL;
+
+ offset = UNIT_VTABLE(u)->kill_context_offset;
+ if (offset <= 0)
+ return NULL;
+
+ return (KillContext*) ((uint8_t*) u + offset);
+}
+
+CGroupContext *unit_get_cgroup_context(Unit *u) {
+ size_t offset;
+
+ if (u->type < 0)
+ return NULL;
+
+ offset = UNIT_VTABLE(u)->cgroup_context_offset;
+ if (offset <= 0)
+ return NULL;
+
+ return (CGroupContext*) ((uint8_t*) u + offset);
+}
+
+ExecRuntime *unit_get_exec_runtime(Unit *u) {
+ size_t offset;
+
+ if (u->type < 0)
+ return NULL;
+
+ offset = UNIT_VTABLE(u)->exec_runtime_offset;
+ if (offset <= 0)
+ return NULL;
+
+ return *(ExecRuntime**) ((uint8_t*) u + offset);
+}
+
+static const char* unit_drop_in_dir(Unit *u, UnitWriteFlags flags) {
+ assert(u);
+
+ if (UNIT_WRITE_FLAGS_NOOP(flags))
+ return NULL;
+
+ if (u->transient) /* Redirect drop-ins for transient units always into the transient directory. */
+ return u->manager->lookup_paths.transient;
+
+ if (flags & UNIT_PERSISTENT)
+ return u->manager->lookup_paths.persistent_control;
+
+ if (flags & UNIT_RUNTIME)
+ return u->manager->lookup_paths.runtime_control;
+
+ return NULL;
+}
+
+char* unit_escape_setting(const char *s, UnitWriteFlags flags, char **buf) {
+ char *ret = NULL;
+
+ if (!s)
+ return NULL;
+
+ /* Escapes the input string as requested. Returns the escaped string. If 'buf' is specified then the allocated
+ * return buffer pointer is also written to *buf, except if no escaping was necessary, in which case *buf is
+ * set to NULL, and the input pointer is returned as-is. This means the return value always contains a properly
+ * escaped version, but *buf when passed only contains a pointer if an allocation was necessary. If *buf is
+ * not specified, then the return value always needs to be freed. Callers can use this to optimize memory
+ * allocations. */
+
+ if (flags & UNIT_ESCAPE_SPECIFIERS) {
+ ret = specifier_escape(s);
+ if (!ret)
+ return NULL;
+
+ s = ret;
+ }
+
+ if (flags & UNIT_ESCAPE_C) {
+ char *a;
+
+ a = cescape(s);
+ free(ret);
+ if (!a)
+ return NULL;
+
+ ret = a;
+ }
+
+ if (buf) {
+ *buf = ret;
+ return ret ?: (char*) s;
+ }
+
+ return ret ?: strdup(s);
+}
+
+char* unit_concat_strv(char **l, UnitWriteFlags flags) {
+ _cleanup_free_ char *result = NULL;
+ size_t n = 0, allocated = 0;
+ char **i;
+
+ /* Takes a list of strings, escapes them, and concatenates them. This may be used to format command lines in a
+ * way suitable for ExecStart= stanzas */
+
+ STRV_FOREACH(i, l) {
+ _cleanup_free_ char *buf = NULL;
+ const char *p;
+ size_t a;
+ char *q;
+
+ p = unit_escape_setting(*i, flags, &buf);
+ if (!p)
+ return NULL;
+
+ a = (n > 0) + 1 + strlen(p) + 1; /* separating space + " + entry + " */
+ if (!GREEDY_REALLOC(result, allocated, n + a + 1))
+ return NULL;
+
+ q = result + n;
+ if (n > 0)
+ *(q++) = ' ';
+
+ *(q++) = '"';
+ q = stpcpy(q, p);
+ *(q++) = '"';
+
+ n += a;
+ }
+
+ if (!GREEDY_REALLOC(result, allocated, n + 1))
+ return NULL;
+
+ result[n] = 0;
+
+ return TAKE_PTR(result);
+}
+
+int unit_write_setting(Unit *u, UnitWriteFlags flags, const char *name, const char *data) {
+ _cleanup_free_ char *p = NULL, *q = NULL, *escaped = NULL;
+ const char *dir, *wrapped;
+ int r;
+
+ assert(u);
+ assert(name);
+ assert(data);
+
+ if (UNIT_WRITE_FLAGS_NOOP(flags))
+ return 0;
+
+ data = unit_escape_setting(data, flags, &escaped);
+ if (!data)
+ return -ENOMEM;
+
+ /* Prefix the section header. If we are writing this out as transient file, then let's suppress this if the
+ * previous section header is the same */
+
+ if (flags & UNIT_PRIVATE) {
+ if (!UNIT_VTABLE(u)->private_section)
+ return -EINVAL;
+
+ if (!u->transient_file || u->last_section_private < 0)
+ data = strjoina("[", UNIT_VTABLE(u)->private_section, "]\n", data);
+ else if (u->last_section_private == 0)
+ data = strjoina("\n[", UNIT_VTABLE(u)->private_section, "]\n", data);
+ } else {
+ if (!u->transient_file || u->last_section_private < 0)
+ data = strjoina("[Unit]\n", data);
+ else if (u->last_section_private > 0)
+ data = strjoina("\n[Unit]\n", data);
+ }
+
+ if (u->transient_file) {
+ /* When this is a transient unit file in creation, then let's not create a new drop-in but instead
+ * write to the transient unit file. */
+ fputs(data, u->transient_file);
+
+ if (!endswith(data, "\n"))
+ fputc('\n', u->transient_file);
+
+ /* Remember which section we wrote this entry to */
+ u->last_section_private = !!(flags & UNIT_PRIVATE);
+ return 0;
+ }
+
+ dir = unit_drop_in_dir(u, flags);
+ if (!dir)
+ return -EINVAL;
+
+ wrapped = strjoina("# This is a drop-in unit file extension, created via \"systemctl set-property\"\n"
+ "# or an equivalent operation. Do not edit.\n",
+ data,
+ "\n");
+
+ r = drop_in_file(dir, u->id, 50, name, &p, &q);
+ if (r < 0)
+ return r;
+
+ (void) mkdir_p_label(p, 0755);
+ r = write_string_file_atomic_label(q, wrapped);
+ if (r < 0)
+ return r;
+
+ r = strv_push(&u->dropin_paths, q);
+ if (r < 0)
+ return r;
+ q = NULL;
+
+ strv_uniq(u->dropin_paths);
+
+ u->dropin_mtime = now(CLOCK_REALTIME);
+
+ return 0;
+}
+
+int unit_write_settingf(Unit *u, UnitWriteFlags flags, const char *name, const char *format, ...) {
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ assert(u);
+ assert(name);
+ assert(format);
+
+ if (UNIT_WRITE_FLAGS_NOOP(flags))
+ return 0;
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return unit_write_setting(u, flags, name, p);
+}
+
+int unit_make_transient(Unit *u) {
+ _cleanup_free_ char *path = NULL;
+ FILE *f;
+
+ assert(u);
+
+ if (!UNIT_VTABLE(u)->can_transient)
+ return -EOPNOTSUPP;
+
+ (void) mkdir_p_label(u->manager->lookup_paths.transient, 0755);
+
+ path = strjoin(u->manager->lookup_paths.transient, "/", u->id);
+ if (!path)
+ return -ENOMEM;
+
+ /* Let's open the file we'll write the transient settings into. This file is kept open as long as we are
+ * creating the transient, and is closed in unit_load(), as soon as we start loading the file. */
+
+ RUN_WITH_UMASK(0022) {
+ f = fopen(path, "we");
+ if (!f)
+ return -errno;
+ }
+
+ safe_fclose(u->transient_file);
+ u->transient_file = f;
+
+ free_and_replace(u->fragment_path, path);
+
+ u->source_path = mfree(u->source_path);
+ u->dropin_paths = strv_free(u->dropin_paths);
+ u->fragment_mtime = u->source_mtime = u->dropin_mtime = 0;
+
+ u->load_state = UNIT_STUB;
+ u->load_error = 0;
+ u->transient = true;
+
+ unit_add_to_dbus_queue(u);
+ unit_add_to_gc_queue(u);
+
+ fputs("# This is a transient unit file, created programmatically via the systemd API. Do not edit.\n",
+ u->transient_file);
+
+ return 0;
+}
+
+static void log_kill(pid_t pid, int sig, void *userdata) {
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(pid, &comm);
+
+ /* Don't log about processes marked with brackets, under the assumption that these are temporary processes
+ only, like for example systemd's own PAM stub process. */
+ if (comm && comm[0] == '(')
+ return;
+
+ log_unit_notice(userdata,
+ "Killing process " PID_FMT " (%s) with signal SIG%s.",
+ pid,
+ strna(comm),
+ signal_to_string(sig));
+}
+
+static int operation_to_signal(KillContext *c, KillOperation k) {
+ assert(c);
+
+ switch (k) {
+
+ case KILL_TERMINATE:
+ case KILL_TERMINATE_AND_LOG:
+ return c->kill_signal;
+
+ case KILL_KILL:
+ return c->final_kill_signal;
+
+ case KILL_WATCHDOG:
+ return c->watchdog_signal;
+
+ default:
+ assert_not_reached("KillOperation unknown");
+ }
+}
+
+int unit_kill_context(
+ Unit *u,
+ KillContext *c,
+ KillOperation k,
+ pid_t main_pid,
+ pid_t control_pid,
+ bool main_pid_alien) {
+
+ bool wait_for_exit = false, send_sighup;
+ cg_kill_log_func_t log_func = NULL;
+ int sig, r;
+
+ assert(u);
+ assert(c);
+
+ /* Kill the processes belonging to this unit, in preparation for shutting the unit down.
+ * Returns > 0 if we killed something worth waiting for, 0 otherwise. */
+
+ if (c->kill_mode == KILL_NONE)
+ return 0;
+
+ sig = operation_to_signal(c, k);
+
+ send_sighup =
+ c->send_sighup &&
+ IN_SET(k, KILL_TERMINATE, KILL_TERMINATE_AND_LOG) &&
+ sig != SIGHUP;
+
+ if (k != KILL_TERMINATE || IN_SET(sig, SIGKILL, SIGABRT))
+ log_func = log_kill;
+
+ if (main_pid > 0) {
+ if (log_func)
+ log_func(main_pid, sig, u);
+
+ r = kill_and_sigcont(main_pid, sig);
+ if (r < 0 && r != -ESRCH) {
+ _cleanup_free_ char *comm = NULL;
+ (void) get_process_comm(main_pid, &comm);
+
+ log_unit_warning_errno(u, r, "Failed to kill main process " PID_FMT " (%s), ignoring: %m", main_pid, strna(comm));
+ } else {
+ if (!main_pid_alien)
+ wait_for_exit = true;
+
+ if (r != -ESRCH && send_sighup)
+ (void) kill(main_pid, SIGHUP);
+ }
+ }
+
+ if (control_pid > 0) {
+ if (log_func)
+ log_func(control_pid, sig, u);
+
+ r = kill_and_sigcont(control_pid, sig);
+ if (r < 0 && r != -ESRCH) {
+ _cleanup_free_ char *comm = NULL;
+ (void) get_process_comm(control_pid, &comm);
+
+ log_unit_warning_errno(u, r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m", control_pid, strna(comm));
+ } else {
+ wait_for_exit = true;
+
+ if (r != -ESRCH && send_sighup)
+ (void) kill(control_pid, SIGHUP);
+ }
+ }
+
+ if (u->cgroup_path &&
+ (c->kill_mode == KILL_CONTROL_GROUP || (c->kill_mode == KILL_MIXED && k == KILL_KILL))) {
+ _cleanup_set_free_ Set *pid_set = NULL;
+
+ /* Exclude the main/control pids from being killed via the cgroup */
+ pid_set = unit_pid_set(main_pid, control_pid);
+ if (!pid_set)
+ return -ENOMEM;
+
+ r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ sig,
+ CGROUP_SIGCONT|CGROUP_IGNORE_SELF,
+ pid_set,
+ log_func, u);
+ if (r < 0) {
+ if (!IN_SET(r, -EAGAIN, -ESRCH, -ENOENT))
+ log_unit_warning_errno(u, r, "Failed to kill control group %s, ignoring: %m", u->cgroup_path);
+
+ } else if (r > 0) {
+
+ /* FIXME: For now, on the legacy hierarchy, we will not wait for the cgroup members to die if
+ * we are running in a container or if this is a delegation unit, simply because cgroup
+ * notification is unreliable in these cases. It doesn't work at all in containers, and outside
+ * of containers it can be confused easily by left-over directories in the cgroup — which
+ * however should not exist in non-delegated units. On the unified hierarchy that's different,
+ * there we get proper events. Hence rely on them. */
+
+ if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0 ||
+ (detect_container() == 0 && !unit_cgroup_delegate(u)))
+ wait_for_exit = true;
+
+ if (send_sighup) {
+ set_free(pid_set);
+
+ pid_set = unit_pid_set(main_pid, control_pid);
+ if (!pid_set)
+ return -ENOMEM;
+
+ cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ SIGHUP,
+ CGROUP_IGNORE_SELF,
+ pid_set,
+ NULL, NULL);
+ }
+ }
+ }
+
+ return wait_for_exit;
+}
+
+int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask) {
+ _cleanup_free_ char *p = NULL;
+ UnitDependencyInfo di;
+ int r;
+
+ assert(u);
+ assert(path);
+
+ /* Registers a unit for requiring a certain path and all its prefixes. We keep a hashtable of these paths in
+ * the unit (from the path to the UnitDependencyInfo structure indicating how to the dependency came to
+ * be). However, we build a prefix table for all possible prefixes so that new appearing mount units can easily
+ * determine which units to make themselves a dependency of. */
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ r = hashmap_ensure_allocated(&u->requires_mounts_for, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ p = strdup(path);
+ if (!p)
+ return -ENOMEM;
+
+ path = path_simplify(p, false);
+
+ if (!path_is_normalized(path))
+ return -EPERM;
+
+ if (hashmap_contains(u->requires_mounts_for, path))
+ return 0;
+
+ di = (UnitDependencyInfo) {
+ .origin_mask = mask
+ };
+
+ r = hashmap_put(u->requires_mounts_for, path, di.data);
+ if (r < 0)
+ return r;
+ p = NULL;
+
+ char prefix[strlen(path) + 1];
+ PATH_FOREACH_PREFIX_MORE(prefix, path) {
+ Set *x;
+
+ x = hashmap_get(u->manager->units_requiring_mounts_for, prefix);
+ if (!x) {
+ _cleanup_free_ char *q = NULL;
+
+ r = hashmap_ensure_allocated(&u->manager->units_requiring_mounts_for, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ q = strdup(prefix);
+ if (!q)
+ return -ENOMEM;
+
+ x = set_new(NULL);
+ if (!x)
+ return -ENOMEM;
+
+ r = hashmap_put(u->manager->units_requiring_mounts_for, q, x);
+ if (r < 0) {
+ set_free(x);
+ return r;
+ }
+ q = NULL;
+ }
+
+ r = set_put(x, u);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int unit_setup_exec_runtime(Unit *u) {
+ ExecRuntime **rt;
+ size_t offset;
+ Unit *other;
+ Iterator i;
+ void *v;
+ int r;
+
+ offset = UNIT_VTABLE(u)->exec_runtime_offset;
+ assert(offset > 0);
+
+ /* Check if there already is an ExecRuntime for this unit? */
+ rt = (ExecRuntime**) ((uint8_t*) u + offset);
+ if (*rt)
+ return 0;
+
+ /* Try to get it from somebody else */
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_JOINS_NAMESPACE_OF], i) {
+ r = exec_runtime_acquire(u->manager, NULL, other->id, false, rt);
+ if (r == 1)
+ return 1;
+ }
+
+ return exec_runtime_acquire(u->manager, unit_get_exec_context(u), u->id, true, rt);
+}
+
+int unit_setup_dynamic_creds(Unit *u) {
+ ExecContext *ec;
+ DynamicCreds *dcreds;
+ size_t offset;
+
+ assert(u);
+
+ offset = UNIT_VTABLE(u)->dynamic_creds_offset;
+ assert(offset > 0);
+ dcreds = (DynamicCreds*) ((uint8_t*) u + offset);
+
+ ec = unit_get_exec_context(u);
+ assert(ec);
+
+ if (!ec->dynamic_user)
+ return 0;
+
+ return dynamic_creds_acquire(dcreds, u->manager, ec->user, ec->group);
+}
+
+bool unit_type_supported(UnitType t) {
+ if (_unlikely_(t < 0))
+ return false;
+ if (_unlikely_(t >= _UNIT_TYPE_MAX))
+ return false;
+
+ if (!unit_vtable[t]->supported)
+ return true;
+
+ return unit_vtable[t]->supported();
+}
+
+void unit_warn_if_dir_nonempty(Unit *u, const char* where) {
+ int r;
+
+ assert(u);
+ assert(where);
+
+ r = dir_is_empty(where);
+ if (r > 0 || r == -ENOTDIR)
+ return;
+ if (r < 0) {
+ log_unit_warning_errno(u, r, "Failed to check directory %s: %m", where);
+ return;
+ }
+
+ log_struct(LOG_NOTICE,
+ "MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "Directory %s to mount over is not empty, mounting anyway.", where),
+ "WHERE=%s", where);
+}
+
+int unit_fail_if_noncanonical(Unit *u, const char* where) {
+ _cleanup_free_ char *canonical_where = NULL;
+ int r;
+
+ assert(u);
+ assert(where);
+
+ r = chase_symlinks(where, NULL, CHASE_NONEXISTENT, &canonical_where);
+ if (r < 0) {
+ log_unit_debug_errno(u, r, "Failed to check %s for symlinks, ignoring: %m", where);
+ return 0;
+ }
+
+ /* We will happily ignore a trailing slash (or any redundant slashes) */
+ if (path_equal(where, canonical_where))
+ return 0;
+
+ /* No need to mention "." or "..", they would already have been rejected by unit_name_from_path() */
+ log_struct(LOG_ERR,
+ "MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "Mount path %s is not canonical (contains a symlink).", where),
+ "WHERE=%s", where);
+
+ return -ELOOP;
+}
+
+bool unit_is_pristine(Unit *u) {
+ assert(u);
+
+ /* Check if the unit already exists or is already around,
+ * in a number of different ways. Note that to cater for unit
+ * types such as slice, we are generally fine with units that
+ * are marked UNIT_LOADED even though nothing was actually
+ * loaded, as those unit types don't require a file on disk. */
+
+ return !(!IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_LOADED) ||
+ u->fragment_path ||
+ u->source_path ||
+ !strv_isempty(u->dropin_paths) ||
+ u->job ||
+ u->merged_into);
+}
+
+pid_t unit_control_pid(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->control_pid)
+ return UNIT_VTABLE(u)->control_pid(u);
+
+ return 0;
+}
+
+pid_t unit_main_pid(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->main_pid)
+ return UNIT_VTABLE(u)->main_pid(u);
+
+ return 0;
+}
+
+static void unit_unref_uid_internal(
+ Unit *u,
+ uid_t *ref_uid,
+ bool destroy_now,
+ void (*_manager_unref_uid)(Manager *m, uid_t uid, bool destroy_now)) {
+
+ assert(u);
+ assert(ref_uid);
+ assert(_manager_unref_uid);
+
+ /* Generic implementation of both unit_unref_uid() and unit_unref_gid(), under the assumption that uid_t and
+ * gid_t are actually the same time, with the same validity rules.
+ *
+ * Drops a reference to UID/GID from a unit. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (!uid_is_valid(*ref_uid))
+ return;
+
+ _manager_unref_uid(u->manager, *ref_uid, destroy_now);
+ *ref_uid = UID_INVALID;
+}
+
+void unit_unref_uid(Unit *u, bool destroy_now) {
+ unit_unref_uid_internal(u, &u->ref_uid, destroy_now, manager_unref_uid);
+}
+
+void unit_unref_gid(Unit *u, bool destroy_now) {
+ unit_unref_uid_internal(u, (uid_t*) &u->ref_gid, destroy_now, manager_unref_gid);
+}
+
+static int unit_ref_uid_internal(
+ Unit *u,
+ uid_t *ref_uid,
+ uid_t uid,
+ bool clean_ipc,
+ int (*_manager_ref_uid)(Manager *m, uid_t uid, bool clean_ipc)) {
+
+ int r;
+
+ assert(u);
+ assert(ref_uid);
+ assert(uid_is_valid(uid));
+ assert(_manager_ref_uid);
+
+ /* Generic implementation of both unit_ref_uid() and unit_ref_guid(), under the assumption that uid_t and gid_t
+ * are actually the same type, and have the same validity rules.
+ *
+ * Adds a reference on a specific UID/GID to this unit. Each unit referencing the same UID/GID maintains a
+ * reference so that we can destroy the UID/GID's IPC resources as soon as this is requested and the counter
+ * drops to zero. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (*ref_uid == uid)
+ return 0;
+
+ if (uid_is_valid(*ref_uid)) /* Already set? */
+ return -EBUSY;
+
+ r = _manager_ref_uid(u->manager, uid, clean_ipc);
+ if (r < 0)
+ return r;
+
+ *ref_uid = uid;
+ return 1;
+}
+
+int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc) {
+ return unit_ref_uid_internal(u, &u->ref_uid, uid, clean_ipc, manager_ref_uid);
+}
+
+int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc) {
+ return unit_ref_uid_internal(u, (uid_t*) &u->ref_gid, (uid_t) gid, clean_ipc, manager_ref_gid);
+}
+
+static int unit_ref_uid_gid_internal(Unit *u, uid_t uid, gid_t gid, bool clean_ipc) {
+ int r = 0, q = 0;
+
+ assert(u);
+
+ /* Reference both a UID and a GID in one go. Either references both, or neither. */
+
+ if (uid_is_valid(uid)) {
+ r = unit_ref_uid(u, uid, clean_ipc);
+ if (r < 0)
+ return r;
+ }
+
+ if (gid_is_valid(gid)) {
+ q = unit_ref_gid(u, gid, clean_ipc);
+ if (q < 0) {
+ if (r > 0)
+ unit_unref_uid(u, false);
+
+ return q;
+ }
+ }
+
+ return r > 0 || q > 0;
+}
+
+int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid) {
+ ExecContext *c;
+ int r;
+
+ assert(u);
+
+ c = unit_get_exec_context(u);
+
+ r = unit_ref_uid_gid_internal(u, uid, gid, c ? c->remove_ipc : false);
+ if (r < 0)
+ return log_unit_warning_errno(u, r, "Couldn't add UID/GID reference to unit, proceeding without: %m");
+
+ return r;
+}
+
+void unit_unref_uid_gid(Unit *u, bool destroy_now) {
+ assert(u);
+
+ unit_unref_uid(u, destroy_now);
+ unit_unref_gid(u, destroy_now);
+}
+
+void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid) {
+ int r;
+
+ assert(u);
+
+ /* This is invoked whenever one of the forked off processes let's us know the UID/GID its user name/group names
+ * resolved to. We keep track of which UID/GID is currently assigned in order to be able to destroy its IPC
+ * objects when no service references the UID/GID anymore. */
+
+ r = unit_ref_uid_gid(u, uid, gid);
+ if (r > 0)
+ unit_add_to_dbus_queue(u);
+}
+
+int unit_set_invocation_id(Unit *u, sd_id128_t id) {
+ int r;
+
+ assert(u);
+
+ /* Set the invocation ID for this unit. If we cannot, this will not roll back, but reset the whole thing. */
+
+ if (sd_id128_equal(u->invocation_id, id))
+ return 0;
+
+ if (!sd_id128_is_null(u->invocation_id))
+ (void) hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u);
+
+ if (sd_id128_is_null(id)) {
+ r = 0;
+ goto reset;
+ }
+
+ r = hashmap_ensure_allocated(&u->manager->units_by_invocation_id, &id128_hash_ops);
+ if (r < 0)
+ goto reset;
+
+ u->invocation_id = id;
+ sd_id128_to_string(id, u->invocation_id_string);
+
+ r = hashmap_put(u->manager->units_by_invocation_id, &u->invocation_id, u);
+ if (r < 0)
+ goto reset;
+
+ return 0;
+
+reset:
+ u->invocation_id = SD_ID128_NULL;
+ u->invocation_id_string[0] = 0;
+ return r;
+}
+
+int unit_acquire_invocation_id(Unit *u) {
+ sd_id128_t id;
+ int r;
+
+ assert(u);
+
+ r = sd_id128_randomize(&id);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to generate invocation ID for unit: %m");
+
+ r = unit_set_invocation_id(u, id);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to set invocation ID for unit: %m");
+
+ unit_add_to_dbus_queue(u);
+ return 0;
+}
+
+int unit_set_exec_params(Unit *u, ExecParameters *p) {
+ int r;
+
+ assert(u);
+ assert(p);
+
+ /* Copy parameters from manager */
+ r = manager_get_effective_environment(u->manager, &p->environment);
+ if (r < 0)
+ return r;
+
+ p->confirm_spawn = manager_get_confirm_spawn(u->manager);
+ p->cgroup_supported = u->manager->cgroup_supported;
+ p->prefix = u->manager->prefix;
+ SET_FLAG(p->flags, EXEC_PASS_LOG_UNIT|EXEC_CHOWN_DIRECTORIES, MANAGER_IS_SYSTEM(u->manager));
+
+ /* Copy paramaters from unit */
+ p->cgroup_path = u->cgroup_path;
+ SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
+
+ return 0;
+}
+
+int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret) {
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ /* Forks off a helper process and makes sure it is a member of the unit's cgroup. Returns == 0 in the child,
+ * and > 0 in the parent. The pid parameter is always filled in with the child's PID. */
+
+ (void) unit_realize_cgroup(u);
+
+ r = safe_fork(name, FORK_REOPEN_LOG, ret);
+ if (r != 0)
+ return r;
+
+ (void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
+ (void) ignore_signals(SIGPIPE, -1);
+
+ (void) prctl(PR_SET_PDEATHSIG, SIGTERM);
+
+ if (u->cgroup_path) {
+ r = cg_attach_everywhere(u->manager->cgroup_supported, u->cgroup_path, 0, NULL, NULL);
+ if (r < 0) {
+ log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", u->cgroup_path);
+ _exit(EXIT_CGROUP);
+ }
+ }
+
+ return 0;
+}
+
+static void unit_update_dependency_mask(Unit *u, UnitDependency d, Unit *other, UnitDependencyInfo di) {
+ assert(u);
+ assert(d >= 0);
+ assert(d < _UNIT_DEPENDENCY_MAX);
+ assert(other);
+
+ if (di.origin_mask == 0 && di.destination_mask == 0) {
+ /* No bit set anymore, let's drop the whole entry */
+ assert_se(hashmap_remove(u->dependencies[d], other));
+ log_unit_debug(u, "%s lost dependency %s=%s", u->id, unit_dependency_to_string(d), other->id);
+ } else
+ /* Mask was reduced, let's update the entry */
+ assert_se(hashmap_update(u->dependencies[d], other, di.data) == 0);
+}
+
+void unit_remove_dependencies(Unit *u, UnitDependencyMask mask) {
+ UnitDependency d;
+
+ assert(u);
+
+ /* Removes all dependencies u has on other units marked for ownership by 'mask'. */
+
+ if (mask == 0)
+ return;
+
+ for (d = 0; d < _UNIT_DEPENDENCY_MAX; d++) {
+ bool done;
+
+ do {
+ UnitDependencyInfo di;
+ Unit *other;
+ Iterator i;
+
+ done = true;
+
+ HASHMAP_FOREACH_KEY(di.data, other, u->dependencies[d], i) {
+ UnitDependency q;
+
+ if ((di.origin_mask & ~mask) == di.origin_mask)
+ continue;
+ di.origin_mask &= ~mask;
+ unit_update_dependency_mask(u, d, other, di);
+
+ /* We updated the dependency from our unit to the other unit now. But most dependencies
+ * imply a reverse dependency. Hence, let's delete that one too. For that we go through
+ * all dependency types on the other unit and delete all those which point to us and
+ * have the right mask set. */
+
+ for (q = 0; q < _UNIT_DEPENDENCY_MAX; q++) {
+ UnitDependencyInfo dj;
+
+ dj.data = hashmap_get(other->dependencies[q], u);
+ if ((dj.destination_mask & ~mask) == dj.destination_mask)
+ continue;
+ dj.destination_mask &= ~mask;
+
+ unit_update_dependency_mask(other, q, u, dj);
+ }
+
+ unit_add_to_gc_queue(other);
+
+ done = false;
+ break;
+ }
+
+ } while (!done);
+ }
+}
+
+static int unit_export_invocation_id(Unit *u) {
+ const char *p;
+ int r;
+
+ assert(u);
+
+ if (u->exported_invocation_id)
+ return 0;
+
+ if (sd_id128_is_null(u->invocation_id))
+ return 0;
+
+ p = strjoina("/run/systemd/units/invocation:", u->id);
+ r = symlink_atomic(u->invocation_id_string, p);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to create invocation ID symlink %s: %m", p);
+
+ u->exported_invocation_id = true;
+ return 0;
+}
+
+static int unit_export_log_level_max(Unit *u, const ExecContext *c) {
+ const char *p;
+ char buf[2];
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (u->exported_log_level_max)
+ return 0;
+
+ if (c->log_level_max < 0)
+ return 0;
+
+ assert(c->log_level_max <= 7);
+
+ buf[0] = '0' + c->log_level_max;
+ buf[1] = 0;
+
+ p = strjoina("/run/systemd/units/log-level-max:", u->id);
+ r = symlink_atomic(buf, p);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to create maximum log level symlink %s: %m", p);
+
+ u->exported_log_level_max = true;
+ return 0;
+}
+
+static int unit_export_log_extra_fields(Unit *u, const ExecContext *c) {
+ _cleanup_close_ int fd = -1;
+ struct iovec *iovec;
+ const char *p;
+ char *pattern;
+ le64_t *sizes;
+ ssize_t n;
+ size_t i;
+ int r;
+
+ if (u->exported_log_extra_fields)
+ return 0;
+
+ if (c->n_log_extra_fields <= 0)
+ return 0;
+
+ sizes = newa(le64_t, c->n_log_extra_fields);
+ iovec = newa(struct iovec, c->n_log_extra_fields * 2);
+
+ for (i = 0; i < c->n_log_extra_fields; i++) {
+ sizes[i] = htole64(c->log_extra_fields[i].iov_len);
+
+ iovec[i*2] = IOVEC_MAKE(sizes + i, sizeof(le64_t));
+ iovec[i*2+1] = c->log_extra_fields[i];
+ }
+
+ p = strjoina("/run/systemd/units/log-extra-fields:", u->id);
+ pattern = strjoina(p, ".XXXXXX");
+
+ fd = mkostemp_safe(pattern);
+ if (fd < 0)
+ return log_unit_debug_errno(u, fd, "Failed to create extra fields file %s: %m", p);
+
+ n = writev(fd, iovec, c->n_log_extra_fields*2);
+ if (n < 0) {
+ r = log_unit_debug_errno(u, errno, "Failed to write extra fields: %m");
+ goto fail;
+ }
+
+ (void) fchmod(fd, 0644);
+
+ if (rename(pattern, p) < 0) {
+ r = log_unit_debug_errno(u, errno, "Failed to rename extra fields file: %m");
+ goto fail;
+ }
+
+ u->exported_log_extra_fields = true;
+ return 0;
+
+fail:
+ (void) unlink(pattern);
+ return r;
+}
+
+static int unit_export_log_rate_limit_interval(Unit *u, const ExecContext *c) {
+ _cleanup_free_ char *buf = NULL;
+ const char *p;
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (u->exported_log_rate_limit_interval)
+ return 0;
+
+ if (c->log_rate_limit_interval_usec == 0)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-rate-limit-interval:", u->id);
+
+ if (asprintf(&buf, "%" PRIu64, c->log_rate_limit_interval_usec) < 0)
+ return log_oom();
+
+ r = symlink_atomic(buf, p);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to create log rate limit interval symlink %s: %m", p);
+
+ u->exported_log_rate_limit_interval = true;
+ return 0;
+}
+
+static int unit_export_log_rate_limit_burst(Unit *u, const ExecContext *c) {
+ _cleanup_free_ char *buf = NULL;
+ const char *p;
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (u->exported_log_rate_limit_burst)
+ return 0;
+
+ if (c->log_rate_limit_burst == 0)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-rate-limit-burst:", u->id);
+
+ if (asprintf(&buf, "%u", c->log_rate_limit_burst) < 0)
+ return log_oom();
+
+ r = symlink_atomic(buf, p);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to create log rate limit burst symlink %s: %m", p);
+
+ u->exported_log_rate_limit_burst = true;
+ return 0;
+}
+
+void unit_export_state_files(Unit *u) {
+ const ExecContext *c;
+
+ assert(u);
+
+ if (!u->id)
+ return;
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return;
+
+ if (MANAGER_IS_TEST_RUN(u->manager))
+ return;
+
+ /* Exports a couple of unit properties to /run/systemd/units/, so that journald can quickly query this data
+ * from there. Ideally, journald would use IPC to query this, like everybody else, but that's hard, as long as
+ * the IPC system itself and PID 1 also log to the journal.
+ *
+ * Note that these files really shouldn't be considered API for anyone else, as use a runtime file system as
+ * IPC replacement is not compatible with today's world of file system namespaces. However, this doesn't really
+ * apply to communication between the journal and systemd, as we assume that these two daemons live in the same
+ * namespace at least.
+ *
+ * Note that some of the "files" exported here are actually symlinks and not regular files. Symlinks work
+ * better for storing small bits of data, in particular as we can write them with two system calls, and read
+ * them with one. */
+
+ (void) unit_export_invocation_id(u);
+
+ c = unit_get_exec_context(u);
+ if (c) {
+ (void) unit_export_log_level_max(u, c);
+ (void) unit_export_log_extra_fields(u, c);
+ (void) unit_export_log_rate_limit_interval(u, c);
+ (void) unit_export_log_rate_limit_burst(u, c);
+ }
+}
+
+void unit_unlink_state_files(Unit *u) {
+ const char *p;
+
+ assert(u);
+
+ if (!u->id)
+ return;
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return;
+
+ /* Undoes the effect of unit_export_state() */
+
+ if (u->exported_invocation_id) {
+ p = strjoina("/run/systemd/units/invocation:", u->id);
+ (void) unlink(p);
+
+ u->exported_invocation_id = false;
+ }
+
+ if (u->exported_log_level_max) {
+ p = strjoina("/run/systemd/units/log-level-max:", u->id);
+ (void) unlink(p);
+
+ u->exported_log_level_max = false;
+ }
+
+ if (u->exported_log_extra_fields) {
+ p = strjoina("/run/systemd/units/extra-fields:", u->id);
+ (void) unlink(p);
+
+ u->exported_log_extra_fields = false;
+ }
+
+ if (u->exported_log_rate_limit_interval) {
+ p = strjoina("/run/systemd/units/log-rate-limit-interval:", u->id);
+ (void) unlink(p);
+
+ u->exported_log_rate_limit_interval = false;
+ }
+
+ if (u->exported_log_rate_limit_burst) {
+ p = strjoina("/run/systemd/units/log-rate-limit-burst:", u->id);
+ (void) unlink(p);
+
+ u->exported_log_rate_limit_burst = false;
+ }
+}
+
+int unit_prepare_exec(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Prepares everything so that we can fork of a process for this unit */
+
+ (void) unit_realize_cgroup(u);
+
+ if (u->reset_accounting) {
+ (void) unit_reset_cpu_accounting(u);
+ (void) unit_reset_ip_accounting(u);
+ u->reset_accounting = false;
+ }
+
+ unit_export_state_files(u);
+
+ r = unit_setup_exec_runtime(u);
+ if (r < 0)
+ return r;
+
+ r = unit_setup_dynamic_creds(u);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void log_leftover(pid_t pid, int sig, void *userdata) {
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(pid, &comm);
+
+ if (comm && comm[0] == '(') /* Most likely our own helper process (PAM?), ignore */
+ return;
+
+ log_unit_warning(userdata,
+ "Found left-over process " PID_FMT " (%s) in control group while starting unit. Ignoring.\n"
+ "This usually indicates unclean termination of a previous run, or service implementation deficiencies.",
+ pid, strna(comm));
+}
+
+void unit_warn_leftover_processes(Unit *u) {
+ assert(u);
+
+ (void) unit_pick_cgroup_path(u);
+
+ if (!u->cgroup_path)
+ return;
+
+ (void) cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, 0, 0, NULL, log_leftover, u);
+}
+
+bool unit_needs_console(Unit *u) {
+ ExecContext *ec;
+ UnitActiveState state;
+
+ assert(u);
+
+ state = unit_active_state(u);
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(state))
+ return false;
+
+ if (UNIT_VTABLE(u)->needs_console)
+ return UNIT_VTABLE(u)->needs_console(u);
+
+ /* If this unit type doesn't implement this call, let's use a generic fallback implementation: */
+ ec = unit_get_exec_context(u);
+ if (!ec)
+ return false;
+
+ return exec_context_may_touch_console(ec);
+}
+
+const char *unit_label_path(Unit *u) {
+ const char *p;
+
+ /* Returns the file system path to use for MAC access decisions, i.e. the file to read the SELinux label off
+ * when validating access checks. */
+
+ p = u->source_path ?: u->fragment_path;
+ if (!p)
+ return NULL;
+
+ /* If a unit is masked, then don't read the SELinux label of /dev/null, as that really makes no sense */
+ if (path_equal(p, "/dev/null"))
+ return NULL;
+
+ return p;
+}
+
+int unit_pid_attachable(Unit *u, pid_t pid, sd_bus_error *error) {
+ int r;
+
+ assert(u);
+
+ /* Checks whether the specified PID is generally good for attaching, i.e. a valid PID, not our manager itself,
+ * and not a kernel thread either */
+
+ /* First, a simple range check */
+ if (!pid_is_valid(pid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process identifier " PID_FMT " is not valid.", pid);
+
+ /* Some extra safety check */
+ if (pid == 1 || pid == getpid_cached())
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process " PID_FMT " is a manager process, refusing.", pid);
+
+ /* Don't even begin to bother with kernel threads */
+ r = is_kernel_thread(pid);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_UNIX_PROCESS_ID_UNKNOWN, "Process with ID " PID_FMT " does not exist.", pid);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to determine whether process " PID_FMT " is a kernel thread: %m", pid);
+ if (r > 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process " PID_FMT " is a kernel thread, refusing.", pid);
+
+ return 0;
+}
+
+void unit_log_success(Unit *u) {
+ assert(u);
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_SUCCESS_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "Succeeded."));
+}
+
+void unit_log_failure(Unit *u, const char *result) {
+ assert(u);
+ assert(result);
+
+ log_struct(LOG_WARNING,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_FAILURE_RESULT_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "Failed with result '%s'.", result),
+ "UNIT_RESULT=%s", result);
+}
+
+void unit_log_process_exit(
+ Unit *u,
+ int level,
+ const char *kind,
+ const char *command,
+ int code,
+ int status) {
+
+ assert(u);
+ assert(kind);
+
+ if (code != CLD_EXITED)
+ level = LOG_WARNING;
+
+ log_struct(level,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_PROCESS_EXIT_STR,
+ LOG_UNIT_MESSAGE(u, "%s exited, code=%s, status=%i/%s",
+ kind,
+ sigchld_code_to_string(code), status,
+ strna(code == CLD_EXITED
+ ? exit_status_to_string(status, EXIT_STATUS_FULL)
+ : signal_to_string(status))),
+ "EXIT_CODE=%s", sigchld_code_to_string(code),
+ "EXIT_STATUS=%i", status,
+ "COMMAND=%s", strna(command),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u));
+}
+
+int unit_exit_status(Unit *u) {
+ assert(u);
+
+ /* Returns the exit status to propagate for the most recent cycle of this unit. Returns a value in the range
+ * 0…255 if there's something to propagate. EOPNOTSUPP if the concept does not apply to this unit type, ENODATA
+ * if no data is currently known (for example because the unit hasn't deactivated yet) and EBADE if the main
+ * service process has exited abnormally (signal/coredump). */
+
+ if (!UNIT_VTABLE(u)->exit_status)
+ return -EOPNOTSUPP;
+
+ return UNIT_VTABLE(u)->exit_status(u);
+}
+
+int unit_failure_action_exit_status(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Returns the exit status to propagate on failure, or an error if there's nothing to propagate */
+
+ if (u->failure_action_exit_status >= 0)
+ return u->failure_action_exit_status;
+
+ r = unit_exit_status(u);
+ if (r == -EBADE) /* Exited, but not cleanly (i.e. by signal or such) */
+ return 255;
+
+ return r;
+}
+
+int unit_success_action_exit_status(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Returns the exit status to propagate on success, or an error if there's nothing to propagate */
+
+ if (u->success_action_exit_status >= 0)
+ return u->success_action_exit_status;
+
+ r = unit_exit_status(u);
+ if (r == -EBADE) /* Exited, but not cleanly (i.e. by signal or such) */
+ return 255;
+
+ return r;
+}
+
+static const char* const collect_mode_table[_COLLECT_MODE_MAX] = {
+ [COLLECT_INACTIVE] = "inactive",
+ [COLLECT_INACTIVE_OR_FAILED] = "inactive-or-failed",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(collect_mode, CollectMode);
diff --git a/src/core/unit.h b/src/core/unit.h
new file mode 100644
index 0000000..43cf157
--- /dev/null
+++ b/src/core/unit.h
@@ -0,0 +1,856 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "bpf-program.h"
+#include "condition.h"
+#include "emergency-action.h"
+#include "install.h"
+#include "list.h"
+#include "unit-name.h"
+#include "cgroup.h"
+
+typedef struct UnitRef UnitRef;
+
+typedef enum KillOperation {
+ KILL_TERMINATE,
+ KILL_TERMINATE_AND_LOG,
+ KILL_KILL,
+ KILL_WATCHDOG,
+ _KILL_OPERATION_MAX,
+ _KILL_OPERATION_INVALID = -1
+} KillOperation;
+
+typedef enum CollectMode {
+ COLLECT_INACTIVE,
+ COLLECT_INACTIVE_OR_FAILED,
+ _COLLECT_MODE_MAX,
+ _COLLECT_MODE_INVALID = -1,
+} CollectMode;
+
+static inline bool UNIT_IS_ACTIVE_OR_RELOADING(UnitActiveState t) {
+ return IN_SET(t, UNIT_ACTIVE, UNIT_RELOADING);
+}
+
+static inline bool UNIT_IS_ACTIVE_OR_ACTIVATING(UnitActiveState t) {
+ return IN_SET(t, UNIT_ACTIVE, UNIT_ACTIVATING, UNIT_RELOADING);
+}
+
+static inline bool UNIT_IS_INACTIVE_OR_DEACTIVATING(UnitActiveState t) {
+ return IN_SET(t, UNIT_INACTIVE, UNIT_FAILED, UNIT_DEACTIVATING);
+}
+
+static inline bool UNIT_IS_INACTIVE_OR_FAILED(UnitActiveState t) {
+ return IN_SET(t, UNIT_INACTIVE, UNIT_FAILED);
+}
+
+/* Stores the 'reason' a dependency was created as a bit mask, i.e. due to which configuration source it came to be. We
+ * use this so that we can selectively flush out parts of dependencies again. Note that the same dependency might be
+ * created as a result of multiple "reasons", hence the bitmask. */
+typedef enum UnitDependencyMask {
+ /* Configured directly by the unit file, .wants/.requries symlink or drop-in, or as an immediate result of a
+ * non-dependency option configured that way. */
+ UNIT_DEPENDENCY_FILE = 1 << 0,
+
+ /* As unconditional implicit dependency (not affected by unit configuration — except by the unit name and
+ * type) */
+ UNIT_DEPENDENCY_IMPLICIT = 1 << 1,
+
+ /* A dependency effected by DefaultDependencies=yes. Note that dependencies marked this way are conceptually
+ * just a subset of UNIT_DEPENDENCY_FILE, as DefaultDependencies= is itself a unit file setting that can only
+ * be set in unit files. We make this two separate bits only to help debugging how dependencies came to be. */
+ UNIT_DEPENDENCY_DEFAULT = 1 << 2,
+
+ /* A dependency created from udev rules */
+ UNIT_DEPENDENCY_UDEV = 1 << 3,
+
+ /* A dependency created because of some unit's RequiresMountsFor= setting */
+ UNIT_DEPENDENCY_PATH = 1 << 4,
+
+ /* A dependency created because of data read from /proc/self/mountinfo and no other configuration source */
+ UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT = 1 << 5,
+
+ /* A dependency created because of data read from /proc/self/mountinfo, but conditionalized by
+ * DefaultDependencies= and thus also involving configuration from UNIT_DEPENDENCY_FILE sources */
+ UNIT_DEPENDENCY_MOUNTINFO_DEFAULT = 1 << 6,
+
+ /* A dependency created because of data read from /proc/swaps and no other configuration source */
+ UNIT_DEPENDENCY_PROC_SWAP = 1 << 7,
+
+ _UNIT_DEPENDENCY_MASK_FULL = (1 << 8) - 1,
+} UnitDependencyMask;
+
+/* The Unit's dependencies[] hashmaps use this structure as value. It has the same size as a void pointer, and thus can
+ * be stored directly as hashmap value, without any indirection. Note that this stores two masks, as both the origin
+ * and the destination of a dependency might have created it. */
+typedef union UnitDependencyInfo {
+ void *data;
+ struct {
+ UnitDependencyMask origin_mask:16;
+ UnitDependencyMask destination_mask:16;
+ } _packed_;
+} UnitDependencyInfo;
+
+#include "job.h"
+
+struct UnitRef {
+ /* Keeps tracks of references to a unit. This is useful so
+ * that we can merge two units if necessary and correct all
+ * references to them */
+
+ Unit *source, *target;
+ LIST_FIELDS(UnitRef, refs_by_target);
+};
+
+typedef struct Unit {
+ Manager *manager;
+
+ UnitType type;
+ UnitLoadState load_state;
+ Unit *merged_into;
+
+ char *id; /* One name is special because we use it for identification. Points to an entry in the names set */
+ char *instance;
+
+ Set *names;
+
+ /* For each dependency type we maintain a Hashmap whose key is the Unit* object, and the value encodes why the
+ * dependency exists, using the UnitDependencyInfo type */
+ Hashmap *dependencies[_UNIT_DEPENDENCY_MAX];
+
+ /* Similar, for RequiresMountsFor= path dependencies. The key is the path, the value the UnitDependencyInfo type */
+ Hashmap *requires_mounts_for;
+
+ char *description;
+ char **documentation;
+
+ char *fragment_path; /* if loaded from a config file this is the primary path to it */
+ char *source_path; /* if converted, the source file */
+ char **dropin_paths;
+
+ usec_t fragment_mtime;
+ usec_t source_mtime;
+ usec_t dropin_mtime;
+
+ /* If this is a transient unit we are currently writing, this is where we are writing it to */
+ FILE *transient_file;
+
+ /* If there is something to do with this unit, then this is the installed job for it */
+ Job *job;
+
+ /* JOB_NOP jobs are special and can be installed without disturbing the real job. */
+ Job *nop_job;
+
+ /* The slot used for watching NameOwnerChanged signals */
+ sd_bus_slot *match_bus_slot;
+
+ /* References to this unit from clients */
+ sd_bus_track *bus_track;
+ char **deserialized_refs;
+
+ /* Job timeout and action to take */
+ usec_t job_timeout;
+ usec_t job_running_timeout;
+ bool job_running_timeout_set:1;
+ EmergencyAction job_timeout_action;
+ char *job_timeout_reboot_arg;
+
+ /* References to this */
+ LIST_HEAD(UnitRef, refs_by_target);
+
+ /* Conditions to check */
+ LIST_HEAD(Condition, conditions);
+ LIST_HEAD(Condition, asserts);
+
+ dual_timestamp condition_timestamp;
+ dual_timestamp assert_timestamp;
+
+ /* Updated whenever the low-level state changes */
+ dual_timestamp state_change_timestamp;
+
+ /* Updated whenever the (high-level) active state enters or leaves the active or inactive states */
+ dual_timestamp inactive_exit_timestamp;
+ dual_timestamp active_enter_timestamp;
+ dual_timestamp active_exit_timestamp;
+ dual_timestamp inactive_enter_timestamp;
+
+ UnitRef slice;
+
+ /* Per type list */
+ LIST_FIELDS(Unit, units_by_type);
+
+ /* Load queue */
+ LIST_FIELDS(Unit, load_queue);
+
+ /* D-Bus queue */
+ LIST_FIELDS(Unit, dbus_queue);
+
+ /* Cleanup queue */
+ LIST_FIELDS(Unit, cleanup_queue);
+
+ /* GC queue */
+ LIST_FIELDS(Unit, gc_queue);
+
+ /* CGroup realize members queue */
+ LIST_FIELDS(Unit, cgroup_realize_queue);
+
+ /* cgroup empty queue */
+ LIST_FIELDS(Unit, cgroup_empty_queue);
+
+ /* Target dependencies queue */
+ LIST_FIELDS(Unit, target_deps_queue);
+
+ /* Queue of units with StopWhenUnneeded set that shell be checked for clean-up. */
+ LIST_FIELDS(Unit, stop_when_unneeded_queue);
+
+ /* PIDs we keep an eye on. Note that a unit might have many
+ * more, but these are the ones we care enough about to
+ * process SIGCHLD for */
+ Set *pids;
+
+ /* Used in SIGCHLD and sd_notify() message event invocation logic to avoid that we dispatch the same event
+ * multiple times on the same unit. */
+ unsigned sigchldgen;
+ unsigned notifygen;
+
+ /* Used during GC sweeps */
+ unsigned gc_marker;
+
+ /* Error code when we didn't manage to load the unit (negative) */
+ int load_error;
+
+ /* Put a ratelimit on unit starting */
+ RateLimit start_limit;
+ EmergencyAction start_limit_action;
+
+ /* What to do on failure or success */
+ EmergencyAction success_action, failure_action;
+ int success_action_exit_status, failure_action_exit_status;
+ char *reboot_arg;
+
+ /* Make sure we never enter endless loops with the check unneeded logic, or the BindsTo= logic */
+ RateLimit auto_stop_ratelimit;
+
+ /* Reference to a specific UID/GID */
+ uid_t ref_uid;
+ gid_t ref_gid;
+
+ /* Cached unit file state and preset */
+ UnitFileState unit_file_state;
+ int unit_file_preset;
+
+ /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
+ nsec_t cpu_usage_base;
+ nsec_t cpu_usage_last; /* the most recently read value */
+
+ /* Counterparts in the cgroup filesystem */
+ char *cgroup_path;
+ CGroupMask cgroup_realized_mask; /* In which hierarchies does this unit's cgroup exist? (only relevant on cgroup v1) */
+ CGroupMask cgroup_enabled_mask; /* Which controllers are enabled (or more correctly: enabled for the children) for this unit's cgroup? (only relevant on cgroup v2) */
+ CGroupMask cgroup_invalidated_mask; /* A mask specifiying controllers which shall be considered invalidated, and require re-realization */
+ CGroupMask cgroup_members_mask; /* A cache for the controllers required by all children of this cgroup (only relevant for slice units) */
+ int cgroup_inotify_wd;
+
+ /* Device Controller BPF program */
+ BPFProgram *bpf_device_control_installed;
+
+ /* IP BPF Firewalling/accounting */
+ int ip_accounting_ingress_map_fd;
+ int ip_accounting_egress_map_fd;
+
+ int ipv4_allow_map_fd;
+ int ipv6_allow_map_fd;
+ int ipv4_deny_map_fd;
+ int ipv6_deny_map_fd;
+
+ BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed;
+ BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed;
+
+ uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
+
+ /* Low-priority event source which is used to remove watched PIDs that have gone away, and subscribe to any new
+ * ones which might have appeared. */
+ sd_event_source *rewatch_pids_event_source;
+
+ /* How to start OnFailure units */
+ JobMode on_failure_job_mode;
+
+ /* Tweaking the GC logic */
+ CollectMode collect_mode;
+
+ /* The current invocation ID */
+ sd_id128_t invocation_id;
+ char invocation_id_string[SD_ID128_STRING_MAX]; /* useful when logging */
+
+ /* Garbage collect us we nobody wants or requires us anymore */
+ bool stop_when_unneeded;
+
+ /* Create default dependencies */
+ bool default_dependencies;
+
+ /* Refuse manual starting, allow starting only indirectly via dependency. */
+ bool refuse_manual_start;
+
+ /* Don't allow the user to stop this unit manually, allow stopping only indirectly via dependency. */
+ bool refuse_manual_stop;
+
+ /* Allow isolation requests */
+ bool allow_isolate;
+
+ /* Ignore this unit when isolating */
+ bool ignore_on_isolate;
+
+ /* Did the last condition check succeed? */
+ bool condition_result;
+ bool assert_result;
+
+ /* Is this a transient unit? */
+ bool transient;
+
+ /* Is this a unit that is always running and cannot be stopped? */
+ bool perpetual;
+
+ /* Booleans indicating membership of this unit in the various queues */
+ bool in_load_queue:1;
+ bool in_dbus_queue:1;
+ bool in_cleanup_queue:1;
+ bool in_gc_queue:1;
+ bool in_cgroup_realize_queue:1;
+ bool in_cgroup_empty_queue:1;
+ bool in_target_deps_queue:1;
+ bool in_stop_when_unneeded_queue:1;
+
+ bool sent_dbus_new_signal:1;
+
+ bool in_audit:1;
+ bool on_console:1;
+
+ bool cgroup_realized:1;
+ bool cgroup_members_mask_valid:1;
+
+ /* Reset cgroup accounting next time we fork something off */
+ bool reset_accounting:1;
+
+ bool start_limit_hit:1;
+
+ /* Did we already invoke unit_coldplug() for this unit? */
+ bool coldplugged:1;
+
+ /* For transient units: whether to add a bus track reference after creating the unit */
+ bool bus_track_add:1;
+
+ /* Remember which unit state files we created */
+ bool exported_invocation_id:1;
+ bool exported_log_level_max:1;
+ bool exported_log_extra_fields:1;
+ bool exported_log_rate_limit_interval:1;
+ bool exported_log_rate_limit_burst:1;
+
+ /* When writing transient unit files, stores which section we stored last. If < 0, we didn't write any yet. If
+ * == 0 we are in the [Unit] section, if > 0 we are in the unit type-specific section. */
+ signed int last_section_private:2;
+} Unit;
+
+typedef struct UnitStatusMessageFormats {
+ const char *starting_stopping[2];
+ const char *finished_start_job[_JOB_RESULT_MAX];
+ const char *finished_stop_job[_JOB_RESULT_MAX];
+} UnitStatusMessageFormats;
+
+/* Flags used when writing drop-in files or transient unit files */
+typedef enum UnitWriteFlags {
+ /* Write a runtime unit file or drop-in (i.e. one below /run) */
+ UNIT_RUNTIME = 1 << 0,
+
+ /* Write a persistent drop-in (i.e. one below /etc) */
+ UNIT_PERSISTENT = 1 << 1,
+
+ /* Place this item in the per-unit-type private section, instead of [Unit] */
+ UNIT_PRIVATE = 1 << 2,
+
+ /* Apply specifier escaping before writing */
+ UNIT_ESCAPE_SPECIFIERS = 1 << 3,
+
+ /* Apply C escaping before writing */
+ UNIT_ESCAPE_C = 1 << 4,
+} UnitWriteFlags;
+
+/* Returns true if neither persistent, nor runtime storage is requested, i.e. this is a check invocation only */
+static inline bool UNIT_WRITE_FLAGS_NOOP(UnitWriteFlags flags) {
+ return (flags & (UNIT_RUNTIME|UNIT_PERSISTENT)) == 0;
+}
+
+#include "kill.h"
+
+typedef struct UnitVTable {
+ /* How much memory does an object of this unit type need */
+ size_t object_size;
+
+ /* If greater than 0, the offset into the object where
+ * ExecContext is found, if the unit type has that */
+ size_t exec_context_offset;
+
+ /* If greater than 0, the offset into the object where
+ * CGroupContext is found, if the unit type has that */
+ size_t cgroup_context_offset;
+
+ /* If greater than 0, the offset into the object where
+ * KillContext is found, if the unit type has that */
+ size_t kill_context_offset;
+
+ /* If greater than 0, the offset into the object where the
+ * pointer to ExecRuntime is found, if the unit type has
+ * that */
+ size_t exec_runtime_offset;
+
+ /* If greater than 0, the offset into the object where the pointer to DynamicCreds is found, if the unit type
+ * has that. */
+ size_t dynamic_creds_offset;
+
+ /* The name of the configuration file section with the private settings of this unit */
+ const char *private_section;
+
+ /* Config file sections this unit type understands, separated
+ * by NUL chars */
+ const char *sections;
+
+ /* This should reset all type-specific variables. This should
+ * not allocate memory, and is called with zero-initialized
+ * data. It should hence only initialize variables that need
+ * to be set != 0. */
+ void (*init)(Unit *u);
+
+ /* This should free all type-specific variables. It should be
+ * idempotent. */
+ void (*done)(Unit *u);
+
+ /* Actually load data from disk. This may fail, and should set
+ * load_state to UNIT_LOADED, UNIT_MERGED or leave it at
+ * UNIT_STUB if no configuration could be found. */
+ int (*load)(Unit *u);
+
+ /* During deserialization we only record the intended state to return to. With coldplug() we actually put the
+ * deserialized state in effect. This is where unit_notify() should be called to start things up. Note that
+ * this callback is invoked *before* we leave the reloading state of the manager, i.e. *before* we consider the
+ * reloading to be complete. Thus, this callback should just restore the exact same state for any unit that was
+ * in effect before the reload, i.e. units should not catch up with changes happened during the reload. That's
+ * what catchup() below is for. */
+ int (*coldplug)(Unit *u);
+
+ /* This is called shortly after all units' coldplug() call was invoked, and *after* the manager left the
+ * reloading state. It's supposed to catch up with state changes due to external events we missed so far (for
+ * example because they took place while we were reloading/reexecing) */
+ void (*catchup)(Unit *u);
+
+ void (*dump)(Unit *u, FILE *f, const char *prefix);
+
+ int (*start)(Unit *u);
+ int (*stop)(Unit *u);
+ int (*reload)(Unit *u);
+
+ int (*kill)(Unit *u, KillWho w, int signo, sd_bus_error *error);
+
+ bool (*can_reload)(Unit *u);
+
+ /* Write all data that cannot be restored from other sources
+ * away using unit_serialize_item() */
+ int (*serialize)(Unit *u, FILE *f, FDSet *fds);
+
+ /* Restore one item from the serialization */
+ int (*deserialize_item)(Unit *u, const char *key, const char *data, FDSet *fds);
+
+ /* Try to match up fds with what we need for this unit */
+ void (*distribute_fds)(Unit *u, FDSet *fds);
+
+ /* Boils down the more complex internal state of this unit to
+ * a simpler one that the engine can understand */
+ UnitActiveState (*active_state)(Unit *u);
+
+ /* Returns the substate specific to this unit type as
+ * string. This is purely information so that we can give the
+ * user a more fine grained explanation in which actual state a
+ * unit is in. */
+ const char* (*sub_state_to_string)(Unit *u);
+
+ /* Additionally to UnitActiveState determine whether unit is to be restarted. */
+ bool (*will_restart)(Unit *u);
+
+ /* Return false when there is a reason to prevent this unit from being gc'ed
+ * even though nothing references it and it isn't active in any way. */
+ bool (*may_gc)(Unit *u);
+
+ /* When the unit is not running and no job for it queued we shall release its runtime resources */
+ void (*release_resources)(Unit *u);
+
+ /* Invoked on every child that died */
+ void (*sigchld_event)(Unit *u, pid_t pid, int code, int status);
+
+ /* Reset failed state if we are in failed state */
+ void (*reset_failed)(Unit *u);
+
+ /* Called whenever any of the cgroups this unit watches for
+ * ran empty */
+ void (*notify_cgroup_empty)(Unit *u);
+
+ /* Called whenever a process of this unit sends us a message */
+ void (*notify_message)(Unit *u, const struct ucred *ucred, char **tags, FDSet *fds);
+
+ /* Called whenever a name this Unit registered for comes or goes away. */
+ void (*bus_name_owner_change)(Unit *u, const char *name, const char *old_owner, const char *new_owner);
+
+ /* Called for each property that is being set */
+ int (*bus_set_property)(Unit *u, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+
+ /* Called after at least one property got changed to apply the necessary change */
+ int (*bus_commit_properties)(Unit *u);
+
+ /* Return the unit this unit is following */
+ Unit *(*following)(Unit *u);
+
+ /* Return the set of units that are following each other */
+ int (*following_set)(Unit *u, Set **s);
+
+ /* Invoked each time a unit this unit is triggering changes
+ * state or gains/loses a job */
+ void (*trigger_notify)(Unit *u, Unit *trigger);
+
+ /* Called whenever CLOCK_REALTIME made a jump */
+ void (*time_change)(Unit *u);
+
+ /* Called whenever /etc/localtime was modified */
+ void (*timezone_change)(Unit *u);
+
+ /* Returns the next timeout of a unit */
+ int (*get_timeout)(Unit *u, usec_t *timeout);
+
+ /* Returns the main PID if there is any defined, or 0. */
+ pid_t (*main_pid)(Unit *u);
+
+ /* Returns the main PID if there is any defined, or 0. */
+ pid_t (*control_pid)(Unit *u);
+
+ /* Returns true if the unit currently needs access to the console */
+ bool (*needs_console)(Unit *u);
+
+ /* Returns the exit status to propagate in case of FailureAction=exit/SuccessAction=exit; usually returns the
+ * exit code of the "main" process of the service or similar. */
+ int (*exit_status)(Unit *u);
+
+ /* Like the enumerate() callback further down, but only enumerates the perpetual units, i.e. all units that
+ * unconditionally exist and are always active. The main reason to keep both enumeration functions separate is
+ * philosophical: the state of perpetual units should be put in place by coldplug(), while the state of those
+ * discovered through regular enumeration should be put in place by catchup(), see below. */
+ void (*enumerate_perpetual)(Manager *m);
+
+ /* This is called for each unit type and should be used to enumerate units already existing in the system
+ * internally and load them. However, everything that is loaded here should still stay in inactive state. It is
+ * the job of the catchup() call above to put the units into the discovered state. */
+ void (*enumerate)(Manager *m);
+
+ /* Type specific cleanups. */
+ void (*shutdown)(Manager *m);
+
+ /* If this function is set and return false all jobs for units
+ * of this type will immediately fail. */
+ bool (*supported)(void);
+
+ /* The bus vtable */
+ const sd_bus_vtable *bus_vtable;
+
+ /* The strings to print in status messages */
+ UnitStatusMessageFormats status_message_formats;
+
+ /* True if transient units of this type are OK */
+ bool can_transient:1;
+
+ /* True if cgroup delegation is permissible */
+ bool can_delegate:1;
+
+ /* True if units of this type shall be startable only once and then never again */
+ bool once_only:1;
+
+ /* True if queued jobs of this type should be GC'ed if no other job needs them anymore */
+ bool gc_jobs:1;
+} UnitVTable;
+
+extern const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX];
+
+static inline const UnitVTable* UNIT_VTABLE(Unit *u) {
+ return unit_vtable[u->type];
+}
+
+/* For casting a unit into the various unit types */
+#define DEFINE_CAST(UPPERCASE, MixedCase) \
+ static inline MixedCase* UPPERCASE(Unit *u) { \
+ if (_unlikely_(!u || u->type != UNIT_##UPPERCASE)) \
+ return NULL; \
+ \
+ return (MixedCase*) u; \
+ }
+
+/* For casting the various unit types into a unit */
+#define UNIT(u) \
+ ({ \
+ typeof(u) _u_ = (u); \
+ Unit *_w_ = _u_ ? &(_u_)->meta : NULL; \
+ _w_; \
+ })
+
+#define UNIT_HAS_EXEC_CONTEXT(u) (UNIT_VTABLE(u)->exec_context_offset > 0)
+#define UNIT_HAS_CGROUP_CONTEXT(u) (UNIT_VTABLE(u)->cgroup_context_offset > 0)
+#define UNIT_HAS_KILL_CONTEXT(u) (UNIT_VTABLE(u)->kill_context_offset > 0)
+
+static inline Unit* UNIT_TRIGGER(Unit *u) {
+ return hashmap_first_key(u->dependencies[UNIT_TRIGGERS]);
+}
+
+Unit *unit_new(Manager *m, size_t size);
+void unit_free(Unit *u);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Unit *, unit_free);
+
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret);
+int unit_add_name(Unit *u, const char *name);
+
+int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_reference, UnitDependencyMask mask);
+int unit_add_two_dependencies(Unit *u, UnitDependency d, UnitDependency e, Unit *other, bool add_reference, UnitDependencyMask mask);
+
+int unit_add_dependency_by_name(Unit *u, UnitDependency d, const char *name, bool add_reference, UnitDependencyMask mask);
+int unit_add_two_dependencies_by_name(Unit *u, UnitDependency d, UnitDependency e, const char *name, bool add_reference, UnitDependencyMask mask);
+
+int unit_add_exec_dependencies(Unit *u, ExecContext *c);
+
+int unit_choose_id(Unit *u, const char *name);
+int unit_set_description(Unit *u, const char *description);
+
+bool unit_may_gc(Unit *u);
+
+void unit_add_to_load_queue(Unit *u);
+void unit_add_to_dbus_queue(Unit *u);
+void unit_add_to_cleanup_queue(Unit *u);
+void unit_add_to_gc_queue(Unit *u);
+void unit_add_to_target_deps_queue(Unit *u);
+void unit_submit_to_stop_when_unneeded_queue(Unit *u);
+
+int unit_merge(Unit *u, Unit *other);
+int unit_merge_by_name(Unit *u, const char *other);
+
+Unit *unit_follow_merge(Unit *u) _pure_;
+
+int unit_load_fragment_and_dropin(Unit *u);
+int unit_load_fragment_and_dropin_optional(Unit *u);
+int unit_load(Unit *unit);
+
+int unit_set_slice(Unit *u, Unit *slice);
+int unit_set_default_slice(Unit *u);
+
+const char *unit_description(Unit *u) _pure_;
+
+bool unit_has_name(const Unit *u, const char *name);
+
+UnitActiveState unit_active_state(Unit *u);
+
+const char* unit_sub_state_to_string(Unit *u);
+
+void unit_dump(Unit *u, FILE *f, const char *prefix);
+
+bool unit_can_reload(Unit *u) _pure_;
+bool unit_can_start(Unit *u) _pure_;
+bool unit_can_stop(Unit *u) _pure_;
+bool unit_can_isolate(Unit *u) _pure_;
+
+int unit_start(Unit *u);
+int unit_stop(Unit *u);
+int unit_reload(Unit *u);
+
+int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error);
+int unit_kill_common(Unit *u, KillWho who, int signo, pid_t main_pid, pid_t control_pid, sd_bus_error *error);
+
+typedef enum UnitNotifyFlags {
+ UNIT_NOTIFY_RELOAD_FAILURE = 1 << 0,
+ UNIT_NOTIFY_WILL_AUTO_RESTART = 1 << 1,
+} UnitNotifyFlags;
+
+void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlags flags);
+
+int unit_watch_pid(Unit *u, pid_t pid);
+void unit_unwatch_pid(Unit *u, pid_t pid);
+void unit_unwatch_all_pids(Unit *u);
+
+int unit_enqueue_rewatch_pids(Unit *u);
+void unit_dequeue_rewatch_pids(Unit *u);
+
+int unit_install_bus_match(Unit *u, sd_bus *bus, const char *name);
+int unit_watch_bus_name(Unit *u, const char *name);
+void unit_unwatch_bus_name(Unit *u, const char *name);
+
+bool unit_job_is_applicable(Unit *u, JobType j);
+
+int set_unit_path(const char *p);
+
+char *unit_dbus_path(Unit *u);
+char *unit_dbus_path_invocation_id(Unit *u);
+
+int unit_load_related_unit(Unit *u, const char *type, Unit **_found);
+
+bool unit_can_serialize(Unit *u) _pure_;
+
+int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs);
+int unit_deserialize(Unit *u, FILE *f, FDSet *fds);
+int unit_deserialize_skip(FILE *f);
+
+int unit_add_node_dependency(Unit *u, const char *what, bool wants, UnitDependency d, UnitDependencyMask mask);
+
+int unit_coldplug(Unit *u);
+void unit_catchup(Unit *u);
+
+void unit_status_printf(Unit *u, const char *status, const char *unit_status_msg_format) _printf_(3, 0);
+
+bool unit_need_daemon_reload(Unit *u);
+
+void unit_reset_failed(Unit *u);
+
+Unit *unit_following(Unit *u);
+int unit_following_set(Unit *u, Set **s);
+
+const char *unit_slice_name(Unit *u);
+
+bool unit_stop_pending(Unit *u) _pure_;
+bool unit_inactive_or_pending(Unit *u) _pure_;
+bool unit_active_or_pending(Unit *u);
+bool unit_will_restart(Unit *u);
+
+int unit_add_default_target_dependency(Unit *u, Unit *target);
+
+void unit_start_on_failure(Unit *u);
+void unit_trigger_notify(Unit *u);
+
+UnitFileState unit_get_unit_file_state(Unit *u);
+int unit_get_unit_file_preset(Unit *u);
+
+Unit* unit_ref_set(UnitRef *ref, Unit *source, Unit *target);
+void unit_ref_unset(UnitRef *ref);
+
+#define UNIT_DEREF(ref) ((ref).target)
+#define UNIT_ISSET(ref) (!!(ref).target)
+
+int unit_patch_contexts(Unit *u);
+
+ExecContext *unit_get_exec_context(Unit *u) _pure_;
+KillContext *unit_get_kill_context(Unit *u) _pure_;
+CGroupContext *unit_get_cgroup_context(Unit *u) _pure_;
+
+ExecRuntime *unit_get_exec_runtime(Unit *u) _pure_;
+
+int unit_setup_exec_runtime(Unit *u);
+int unit_setup_dynamic_creds(Unit *u);
+
+char* unit_escape_setting(const char *s, UnitWriteFlags flags, char **buf);
+char* unit_concat_strv(char **l, UnitWriteFlags flags);
+
+int unit_write_setting(Unit *u, UnitWriteFlags flags, const char *name, const char *data);
+int unit_write_settingf(Unit *u, UnitWriteFlags mode, const char *name, const char *format, ...) _printf_(4,5);
+
+int unit_kill_context(Unit *u, KillContext *c, KillOperation k, pid_t main_pid, pid_t control_pid, bool main_pid_alien);
+
+int unit_make_transient(Unit *u);
+
+int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask);
+
+bool unit_type_supported(UnitType t);
+
+bool unit_is_pristine(Unit *u);
+
+bool unit_is_unneeded(Unit *u);
+
+pid_t unit_control_pid(Unit *u);
+pid_t unit_main_pid(Unit *u);
+
+static inline bool unit_supported(Unit *u) {
+ return unit_type_supported(u->type);
+}
+
+void unit_warn_if_dir_nonempty(Unit *u, const char* where);
+int unit_fail_if_noncanonical(Unit *u, const char* where);
+
+int unit_start_limit_test(Unit *u);
+
+void unit_unref_uid(Unit *u, bool destroy_now);
+int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc);
+
+void unit_unref_gid(Unit *u, bool destroy_now);
+int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc);
+
+int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid);
+void unit_unref_uid_gid(Unit *u, bool destroy_now);
+
+void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid);
+
+int unit_set_invocation_id(Unit *u, sd_id128_t id);
+int unit_acquire_invocation_id(Unit *u);
+
+bool unit_shall_confirm_spawn(Unit *u);
+
+int unit_set_exec_params(Unit *s, ExecParameters *p);
+
+int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret);
+
+void unit_remove_dependencies(Unit *u, UnitDependencyMask mask);
+
+void unit_export_state_files(Unit *u);
+void unit_unlink_state_files(Unit *u);
+
+int unit_prepare_exec(Unit *u);
+
+void unit_warn_leftover_processes(Unit *u);
+
+bool unit_needs_console(Unit *u);
+
+const char *unit_label_path(Unit *u);
+
+int unit_pid_attachable(Unit *unit, pid_t pid, sd_bus_error *error);
+
+void unit_log_success(Unit *u);
+void unit_log_failure(Unit *u, const char *result);
+static inline void unit_log_result(Unit *u, bool success, const char *result) {
+ if (success)
+ unit_log_success(u);
+ else
+ unit_log_failure(u, result);
+}
+
+void unit_log_process_exit(Unit *u, int level, const char *kind, const char *command, int code, int status);
+
+int unit_exit_status(Unit *u);
+int unit_success_action_exit_status(Unit *u);
+int unit_failure_action_exit_status(Unit *u);
+
+/* Macros which append UNIT= or USER_UNIT= to the message */
+
+#define log_unit_full(unit, level, error, ...) \
+ ({ \
+ const Unit *_u = (unit); \
+ _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, _u->manager->invocation_log_field, _u->invocation_id_string, ##__VA_ARGS__) : \
+ log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
+ })
+
+#define log_unit_debug(unit, ...) log_unit_full(unit, LOG_DEBUG, 0, ##__VA_ARGS__)
+#define log_unit_info(unit, ...) log_unit_full(unit, LOG_INFO, 0, ##__VA_ARGS__)
+#define log_unit_notice(unit, ...) log_unit_full(unit, LOG_NOTICE, 0, ##__VA_ARGS__)
+#define log_unit_warning(unit, ...) log_unit_full(unit, LOG_WARNING, 0, ##__VA_ARGS__)
+#define log_unit_error(unit, ...) log_unit_full(unit, LOG_ERR, 0, ##__VA_ARGS__)
+
+#define log_unit_debug_errno(unit, error, ...) log_unit_full(unit, LOG_DEBUG, error, ##__VA_ARGS__)
+#define log_unit_info_errno(unit, error, ...) log_unit_full(unit, LOG_INFO, error, ##__VA_ARGS__)
+#define log_unit_notice_errno(unit, error, ...) log_unit_full(unit, LOG_NOTICE, error, ##__VA_ARGS__)
+#define log_unit_warning_errno(unit, error, ...) log_unit_full(unit, LOG_WARNING, error, ##__VA_ARGS__)
+#define log_unit_error_errno(unit, error, ...) log_unit_full(unit, LOG_ERR, error, ##__VA_ARGS__)
+
+#define LOG_UNIT_MESSAGE(unit, fmt, ...) "MESSAGE=%s: " fmt, (unit)->id, ##__VA_ARGS__
+#define LOG_UNIT_ID(unit) (unit)->manager->unit_log_format_string, (unit)->id
+#define LOG_UNIT_INVOCATION_ID(unit) (unit)->manager->invocation_log_format_string, (unit)->invocation_id_string
+
+const char* collect_mode_to_string(CollectMode m) _const_;
+CollectMode collect_mode_from_string(const char *s) _pure_;
diff --git a/src/core/user.conf b/src/core/user.conf
new file mode 100644
index 0000000..b427f1e
--- /dev/null
+++ b/src/core/user.conf
@@ -0,0 +1,44 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# You can override the directives in this file by creating files in
+# /etc/systemd/user.conf.d/*.conf.
+#
+# See systemd-user.conf(5) for details
+
+[Manager]
+#LogLevel=info
+#LogTarget=console
+#LogColor=yes
+#LogLocation=no
+#SystemCallArchitectures=
+#TimerSlackNSec=
+#DefaultTimerAccuracySec=1min
+#DefaultStandardOutput=inherit
+#DefaultStandardError=inherit
+#DefaultTimeoutStartSec=90s
+#DefaultTimeoutStopSec=90s
+#DefaultRestartSec=100ms
+#DefaultStartLimitIntervalSec=10s
+#DefaultStartLimitBurst=5
+#DefaultEnvironment=
+#DefaultLimitCPU=
+#DefaultLimitFSIZE=
+#DefaultLimitDATA=
+#DefaultLimitSTACK=
+#DefaultLimitCORE=
+#DefaultLimitRSS=
+#DefaultLimitNOFILE=
+#DefaultLimitAS=
+#DefaultLimitNPROC=
+#DefaultLimitMEMLOCK=
+#DefaultLimitLOCKS=
+#DefaultLimitSIGPENDING=
+#DefaultLimitMSGQUEUE=
+#DefaultLimitNICE=
+#DefaultLimitRTPRIO=
+#DefaultLimitRTTIME=
diff --git a/src/coredump/coredump-vacuum.c b/src/coredump/coredump-vacuum.c
new file mode 100644
index 0000000..6ce5dfc
--- /dev/null
+++ b/src/coredump/coredump-vacuum.c
@@ -0,0 +1,245 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/statvfs.h>
+
+#include "alloc-util.h"
+#include "coredump-vacuum.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "user-util.h"
+#include "util.h"
+
+#define DEFAULT_MAX_USE_LOWER (uint64_t) (1ULL*1024ULL*1024ULL) /* 1 MiB */
+#define DEFAULT_MAX_USE_UPPER (uint64_t) (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
+#define DEFAULT_KEEP_FREE_UPPER (uint64_t) (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
+#define DEFAULT_KEEP_FREE (uint64_t) (1024ULL*1024ULL) /* 1 MB */
+
+struct vacuum_candidate {
+ unsigned n_files;
+ char *oldest_file;
+ usec_t oldest_mtime;
+};
+
+static void vacuum_candidate_free(struct vacuum_candidate *c) {
+ if (!c)
+ return;
+
+ free(c->oldest_file);
+ free(c);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct vacuum_candidate*, vacuum_candidate_free);
+
+static void vacuum_candidate_hashmap_free(Hashmap *h) {
+ hashmap_free_with_destructor(h, vacuum_candidate_free);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, vacuum_candidate_hashmap_free);
+
+static int uid_from_file_name(const char *filename, uid_t *uid) {
+ const char *p, *e, *u;
+
+ p = startswith(filename, "core.");
+ if (!p)
+ return -EINVAL;
+
+ /* Skip the comm field */
+ p = strchr(p, '.');
+ if (!p)
+ return -EINVAL;
+ p++;
+
+ /* Find end up UID */
+ e = strchr(p, '.');
+ if (!e)
+ return -EINVAL;
+
+ u = strndupa(p, e-p);
+ return parse_uid(u, uid);
+}
+
+static bool vacuum_necessary(int fd, uint64_t sum, uint64_t keep_free, uint64_t max_use) {
+ uint64_t fs_size = 0, fs_free = (uint64_t) -1;
+ struct statvfs sv;
+
+ assert(fd >= 0);
+
+ if (fstatvfs(fd, &sv) >= 0) {
+ fs_size = sv.f_frsize * sv.f_blocks;
+ fs_free = sv.f_frsize * sv.f_bfree;
+ }
+
+ if (max_use == (uint64_t) -1) {
+
+ if (fs_size > 0) {
+ max_use = PAGE_ALIGN(fs_size / 10); /* 10% */
+
+ if (max_use > DEFAULT_MAX_USE_UPPER)
+ max_use = DEFAULT_MAX_USE_UPPER;
+
+ if (max_use < DEFAULT_MAX_USE_LOWER)
+ max_use = DEFAULT_MAX_USE_LOWER;
+ } else
+ max_use = DEFAULT_MAX_USE_LOWER;
+ } else
+ max_use = PAGE_ALIGN(max_use);
+
+ if (max_use > 0 && sum > max_use)
+ return true;
+
+ if (keep_free == (uint64_t) -1) {
+
+ if (fs_size > 0) {
+ keep_free = PAGE_ALIGN((fs_size * 3) / 20); /* 15% */
+
+ if (keep_free > DEFAULT_KEEP_FREE_UPPER)
+ keep_free = DEFAULT_KEEP_FREE_UPPER;
+ } else
+ keep_free = DEFAULT_KEEP_FREE;
+ } else
+ keep_free = PAGE_ALIGN(keep_free);
+
+ if (keep_free > 0 && fs_free < keep_free)
+ return true;
+
+ return false;
+}
+
+int coredump_vacuum(int exclude_fd, uint64_t keep_free, uint64_t max_use) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct stat exclude_st;
+ int r;
+
+ if (keep_free == 0 && max_use == 0)
+ return 0;
+
+ if (exclude_fd >= 0) {
+ if (fstat(exclude_fd, &exclude_st) < 0)
+ return log_error_errno(errno, "Failed to fstat(): %m");
+ }
+
+ /* This algorithm will keep deleting the oldest file of the
+ * user with the most coredumps until we are back in the size
+ * limits. Note that vacuuming for journal files is different,
+ * because we rely on rate-limiting of the messages there,
+ * to avoid being flooded. */
+
+ d = opendir("/var/lib/systemd/coredump");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Can't open coredump directory: %m");
+ }
+
+ for (;;) {
+ _cleanup_(vacuum_candidate_hashmap_freep) Hashmap *h = NULL;
+ struct vacuum_candidate *worst = NULL;
+ struct dirent *de;
+ uint64_t sum = 0;
+
+ rewinddir(d);
+
+ FOREACH_DIRENT(de, d, goto fail) {
+ struct vacuum_candidate *c;
+ struct stat st;
+ uid_t uid;
+ usec_t t;
+
+ r = uid_from_file_name(de->d_name, &uid);
+ if (r < 0)
+ continue;
+
+ if (fstatat(dirfd(d), de->d_name, &st, AT_NO_AUTOMOUNT|AT_SYMLINK_NOFOLLOW) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ log_warning_errno(errno, "Failed to stat /var/lib/systemd/coredump/%s: %m", de->d_name);
+ continue;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ if (exclude_fd >= 0 &&
+ exclude_st.st_dev == st.st_dev &&
+ exclude_st.st_ino == st.st_ino)
+ continue;
+
+ r = hashmap_ensure_allocated(&h, NULL);
+ if (r < 0)
+ return log_oom();
+
+ t = timespec_load(&st.st_mtim);
+
+ c = hashmap_get(h, UID_TO_PTR(uid));
+ if (c) {
+
+ if (t < c->oldest_mtime) {
+ char *n;
+
+ n = strdup(de->d_name);
+ if (!n)
+ return log_oom();
+
+ free(c->oldest_file);
+ c->oldest_file = n;
+ c->oldest_mtime = t;
+ }
+
+ } else {
+ _cleanup_(vacuum_candidate_freep) struct vacuum_candidate *n = NULL;
+
+ n = new0(struct vacuum_candidate, 1);
+ if (!n)
+ return log_oom();
+
+ n->oldest_file = strdup(de->d_name);
+ if (!n->oldest_file)
+ return log_oom();
+
+ n->oldest_mtime = t;
+
+ r = hashmap_put(h, UID_TO_PTR(uid), n);
+ if (r < 0)
+ return log_oom();
+
+ c = TAKE_PTR(n);
+ }
+
+ c->n_files++;
+
+ if (!worst ||
+ worst->n_files < c->n_files ||
+ (worst->n_files == c->n_files && c->oldest_mtime < worst->oldest_mtime))
+ worst = c;
+
+ sum += st.st_blocks * 512;
+ }
+
+ if (!worst)
+ break;
+
+ r = vacuum_necessary(dirfd(d), sum, keep_free, max_use);
+ if (r <= 0)
+ return r;
+
+ r = unlinkat_deallocate(dirfd(d), worst->oldest_file, 0);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return log_error_errno(r, "Failed to remove file %s: %m", worst->oldest_file);
+
+ log_info("Removed old coredump %s.", worst->oldest_file);
+ }
+
+ return 0;
+
+fail:
+ return log_error_errno(errno, "Failed to read directory: %m");
+}
diff --git a/src/coredump/coredump-vacuum.h b/src/coredump/coredump-vacuum.h
new file mode 100644
index 0000000..0db1167
--- /dev/null
+++ b/src/coredump/coredump-vacuum.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+int coredump_vacuum(int exclude_fd, uint64_t keep_free, uint64_t max_use);
diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c
new file mode 100644
index 0000000..ecbb4bf
--- /dev/null
+++ b/src/coredump/coredump.c
@@ -0,0 +1,1405 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <sys/prctl.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#if HAVE_ELFUTILS
+#include <dwarf.h>
+#include <elfutils/libdwfl.h>
+#endif
+
+#include "sd-daemon.h"
+#include "sd-journal.h"
+#include "sd-login.h"
+#include "sd-messages.h"
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "cgroup-util.h"
+#include "compress.h"
+#include "conf-parser.h"
+#include "copy.h"
+#include "coredump-vacuum.h"
+#include "dirent-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "journal-importer.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "special.h"
+#include "stacktrace.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+/* The maximum size up to which we process coredumps */
+#define PROCESS_SIZE_MAX ((uint64_t) (2LLU*1024LLU*1024LLU*1024LLU))
+
+/* The maximum size up to which we leave the coredump around on disk */
+#define EXTERNAL_SIZE_MAX PROCESS_SIZE_MAX
+
+/* The maximum size up to which we store the coredump in the journal */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+#define JOURNAL_SIZE_MAX ((size_t) (767LU*1024LU*1024LU))
+#else
+/* oss-fuzz limits memory usage. */
+#define JOURNAL_SIZE_MAX ((size_t) (10LU*1024LU*1024LU))
+#endif
+
+/* Make sure to not make this larger than the maximum journal entry
+ * size. See DATA_SIZE_MAX in journal-importer.h. */
+assert_cc(JOURNAL_SIZE_MAX <= DATA_SIZE_MAX);
+
+enum {
+ /* We use this as array indexes for a couple of special fields we use for
+ * naming coredump files, and attaching xattrs, and for indexing argv[].
+
+ * Our pattern for man:systectl(1) kernel.core_pattern is such that the
+ * kernel passes fields until CONTEXT_RLIMIT as arguments in argv[]. After
+ * that it gets complicated: the kernel passes "comm" as one or more fields
+ * starting at index CONTEXT_COMM (in other words, full "comm" is under index
+ * CONTEXT_COMM when it does not contain spaces, which is the common
+ * case). This mapping is not reversible, so we prefer to retrieve "comm"
+ * from /proc. We only fall back to argv[CONTEXT_COMM...] when that fails.
+ *
+ * In the internal context[] array, fields before CONTEXT_COMM are the
+ * strings from argv[], so they should not be freed. The strings at indices
+ * CONTEXT_COMM and higher are allocated by us and should be freed at the
+ * end.
+ */
+ CONTEXT_PID,
+ CONTEXT_UID,
+ CONTEXT_GID,
+ CONTEXT_SIGNAL,
+ CONTEXT_TIMESTAMP,
+ CONTEXT_RLIMIT,
+ CONTEXT_HOSTNAME,
+ CONTEXT_COMM,
+ CONTEXT_EXE,
+ CONTEXT_UNIT,
+ _CONTEXT_MAX
+};
+
+typedef enum CoredumpStorage {
+ COREDUMP_STORAGE_NONE,
+ COREDUMP_STORAGE_EXTERNAL,
+ COREDUMP_STORAGE_JOURNAL,
+ _COREDUMP_STORAGE_MAX,
+ _COREDUMP_STORAGE_INVALID = -1
+} CoredumpStorage;
+
+static const char* const coredump_storage_table[_COREDUMP_STORAGE_MAX] = {
+ [COREDUMP_STORAGE_NONE] = "none",
+ [COREDUMP_STORAGE_EXTERNAL] = "external",
+ [COREDUMP_STORAGE_JOURNAL] = "journal",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage, CoredumpStorage);
+static DEFINE_CONFIG_PARSE_ENUM(config_parse_coredump_storage, coredump_storage, CoredumpStorage, "Failed to parse storage setting");
+
+static CoredumpStorage arg_storage = COREDUMP_STORAGE_EXTERNAL;
+static bool arg_compress = true;
+static uint64_t arg_process_size_max = PROCESS_SIZE_MAX;
+static uint64_t arg_external_size_max = EXTERNAL_SIZE_MAX;
+static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX;
+static uint64_t arg_keep_free = (uint64_t) -1;
+static uint64_t arg_max_use = (uint64_t) -1;
+
+static int parse_config(void) {
+ static const ConfigTableItem items[] = {
+ { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage },
+ { "Coredump", "Compress", config_parse_bool, 0, &arg_compress },
+ { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max },
+ { "Coredump", "ExternalSizeMax", config_parse_iec_uint64, 0, &arg_external_size_max },
+ { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max },
+ { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free },
+ { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use },
+ {}
+ };
+
+ return config_parse_many_nulstr(PKGSYSCONFDIR "/coredump.conf",
+ CONF_PATHS_NULSTR("systemd/coredump.conf.d"),
+ "Coredump\0",
+ config_item_table_lookup, items,
+ CONFIG_PARSE_WARN, NULL);
+}
+
+static uint64_t storage_size_max(void) {
+ if (arg_storage == COREDUMP_STORAGE_EXTERNAL)
+ return arg_external_size_max;
+ if (arg_storage == COREDUMP_STORAGE_JOURNAL)
+ return arg_journal_size_max;
+ assert(arg_storage == COREDUMP_STORAGE_NONE);
+ return 0;
+}
+
+static int fix_acl(int fd, uid_t uid) {
+
+#if HAVE_ACL
+ _cleanup_(acl_freep) acl_t acl = NULL;
+ acl_entry_t entry;
+ acl_permset_t permset;
+ int r;
+
+ assert(fd >= 0);
+
+ if (uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY)
+ return 0;
+
+ /* Make sure normal users can read (but not write or delete)
+ * their own coredumps */
+
+ acl = acl_get_fd(fd);
+ if (!acl)
+ return log_error_errno(errno, "Failed to get ACL: %m");
+
+ if (acl_create_entry(&acl, &entry) < 0 ||
+ acl_set_tag_type(entry, ACL_USER) < 0 ||
+ acl_set_qualifier(entry, &uid) < 0)
+ return log_error_errno(errno, "Failed to patch ACL: %m");
+
+ if (acl_get_permset(entry, &permset) < 0 ||
+ acl_add_perm(permset, ACL_READ) < 0)
+ return log_warning_errno(errno, "Failed to patch ACL: %m");
+
+ r = calc_acl_mask_if_needed(&acl);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to patch ACL: %m");
+
+ if (acl_set_fd(fd, acl) < 0)
+ return log_error_errno(errno, "Failed to apply ACL: %m");
+#endif
+
+ return 0;
+}
+
+static int fix_xattr(int fd, const char *context[_CONTEXT_MAX]) {
+
+ static const char * const xattrs[_CONTEXT_MAX] = {
+ [CONTEXT_PID] = "user.coredump.pid",
+ [CONTEXT_UID] = "user.coredump.uid",
+ [CONTEXT_GID] = "user.coredump.gid",
+ [CONTEXT_SIGNAL] = "user.coredump.signal",
+ [CONTEXT_TIMESTAMP] = "user.coredump.timestamp",
+ [CONTEXT_RLIMIT] = "user.coredump.rlimit",
+ [CONTEXT_HOSTNAME] = "user.coredump.hostname",
+ [CONTEXT_COMM] = "user.coredump.comm",
+ [CONTEXT_EXE] = "user.coredump.exe",
+ };
+
+ int r = 0;
+ unsigned i;
+
+ assert(fd >= 0);
+
+ /* Attach some metadata to coredumps via extended
+ * attributes. Just because we can. */
+
+ for (i = 0; i < _CONTEXT_MAX; i++) {
+ int k;
+
+ if (isempty(context[i]) || !xattrs[i])
+ continue;
+
+ k = fsetxattr(fd, xattrs[i], context[i], strlen(context[i]), XATTR_CREATE);
+ if (k < 0 && r == 0)
+ r = -errno;
+ }
+
+ return r;
+}
+
+#define filename_escape(s) xescape((s), "./ ")
+
+static const char *coredump_tmpfile_name(const char *s) {
+ return s ? s : "(unnamed temporary file)";
+}
+
+static int fix_permissions(
+ int fd,
+ const char *filename,
+ const char *target,
+ const char *context[_CONTEXT_MAX],
+ uid_t uid) {
+
+ int r;
+
+ assert(fd >= 0);
+ assert(target);
+ assert(context);
+
+ /* Ignore errors on these */
+ (void) fchmod(fd, 0640);
+ (void) fix_acl(fd, uid);
+ (void) fix_xattr(fd, context);
+
+ if (fsync(fd) < 0)
+ return log_error_errno(errno, "Failed to sync coredump %s: %m", coredump_tmpfile_name(filename));
+
+ (void) fsync_directory_of_file(fd);
+
+ r = link_tmpfile(fd, filename, target);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move coredump %s into place: %m", target);
+
+ return 0;
+}
+
+static int maybe_remove_external_coredump(const char *filename, uint64_t size) {
+
+ /* Returns 1 if might remove, 0 if will not remove, < 0 on error. */
+
+ if (arg_storage == COREDUMP_STORAGE_EXTERNAL &&
+ size <= arg_external_size_max)
+ return 0;
+
+ if (!filename)
+ return 1;
+
+ if (unlink(filename) < 0 && errno != ENOENT)
+ return log_error_errno(errno, "Failed to unlink %s: %m", filename);
+
+ return 1;
+}
+
+static int make_filename(const char *context[_CONTEXT_MAX], char **ret) {
+ _cleanup_free_ char *c = NULL, *u = NULL, *p = NULL, *t = NULL;
+ sd_id128_t boot = {};
+ int r;
+
+ assert(context);
+
+ c = filename_escape(context[CONTEXT_COMM]);
+ if (!c)
+ return -ENOMEM;
+
+ u = filename_escape(context[CONTEXT_UID]);
+ if (!u)
+ return -ENOMEM;
+
+ r = sd_id128_get_boot(&boot);
+ if (r < 0)
+ return r;
+
+ p = filename_escape(context[CONTEXT_PID]);
+ if (!p)
+ return -ENOMEM;
+
+ t = filename_escape(context[CONTEXT_TIMESTAMP]);
+ if (!t)
+ return -ENOMEM;
+
+ if (asprintf(ret,
+ "/var/lib/systemd/coredump/core.%s.%s." SD_ID128_FORMAT_STR ".%s.%s000000",
+ c,
+ u,
+ SD_ID128_FORMAT_VAL(boot),
+ p,
+ t) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int save_external_coredump(
+ const char *context[_CONTEXT_MAX],
+ int input_fd,
+ char **ret_filename,
+ int *ret_node_fd,
+ int *ret_data_fd,
+ uint64_t *ret_size,
+ bool *ret_truncated) {
+
+ _cleanup_free_ char *fn = NULL, *tmp = NULL;
+ _cleanup_close_ int fd = -1;
+ uint64_t rlimit, process_limit, max_size;
+ struct stat st;
+ uid_t uid;
+ int r;
+
+ assert(context);
+ assert(ret_filename);
+ assert(ret_node_fd);
+ assert(ret_data_fd);
+ assert(ret_size);
+
+ r = parse_uid(context[CONTEXT_UID], &uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse UID: %m");
+
+ r = safe_atou64(context[CONTEXT_RLIMIT], &rlimit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse resource limit '%s': %m", context[CONTEXT_RLIMIT]);
+ if (rlimit < page_size()) {
+ /* Is coredumping disabled? Then don't bother saving/processing the coredump.
+ * Anything below PAGE_SIZE cannot give a readable coredump (the kernel uses
+ * ELF_EXEC_PAGESIZE which is not easily accessible, but is usually the same as PAGE_SIZE. */
+ return log_info_errno(SYNTHETIC_ERRNO(EBADSLT),
+ "Resource limits disable core dumping for process %s (%s).",
+ context[CONTEXT_PID], context[CONTEXT_COMM]);
+ }
+
+ process_limit = MAX(arg_process_size_max, storage_size_max());
+ if (process_limit == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADSLT),
+ "Limits for coredump processing and storage are both 0, not dumping core.");
+
+ /* Never store more than the process configured, or than we actually shall keep or process */
+ max_size = MIN(rlimit, process_limit);
+
+ r = make_filename(context, &fn);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine coredump file name: %m");
+
+ mkdir_p_label("/var/lib/systemd/coredump", 0755);
+
+ fd = open_tmpfile_linkable(fn, O_RDWR|O_CLOEXEC, &tmp);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to create temporary file for coredump %s: %m", fn);
+
+ r = copy_bytes(input_fd, fd, max_size, 0);
+ if (r < 0) {
+ log_error_errno(r, "Cannot store coredump of %s (%s): %m", context[CONTEXT_PID], context[CONTEXT_COMM]);
+ goto fail;
+ }
+ *ret_truncated = r == 1;
+ if (*ret_truncated)
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Core file was truncated to %zu bytes.", max_size),
+ "SIZE_LIMIT=%zu", max_size,
+ "MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR);
+
+ if (fstat(fd, &st) < 0) {
+ log_error_errno(errno, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp));
+ goto fail;
+ }
+
+ if (lseek(fd, 0, SEEK_SET) == (off_t) -1) {
+ log_error_errno(errno, "Failed to seek on %s: %m", coredump_tmpfile_name(tmp));
+ goto fail;
+ }
+
+#if HAVE_XZ || HAVE_LZ4
+ /* If we will remove the coredump anyway, do not compress. */
+ if (arg_compress && !maybe_remove_external_coredump(NULL, st.st_size)) {
+
+ _cleanup_free_ char *fn_compressed = NULL, *tmp_compressed = NULL;
+ _cleanup_close_ int fd_compressed = -1;
+
+ fn_compressed = strappend(fn, COMPRESSED_EXT);
+ if (!fn_compressed) {
+ log_oom();
+ goto uncompressed;
+ }
+
+ fd_compressed = open_tmpfile_linkable(fn_compressed, O_RDWR|O_CLOEXEC, &tmp_compressed);
+ if (fd_compressed < 0) {
+ log_error_errno(fd_compressed, "Failed to create temporary file for coredump %s: %m", fn_compressed);
+ goto uncompressed;
+ }
+
+ r = compress_stream(fd, fd_compressed, -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed));
+ goto fail_compressed;
+ }
+
+ r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, uid);
+ if (r < 0)
+ goto fail_compressed;
+
+ /* OK, this worked, we can get rid of the uncompressed version now */
+ if (tmp)
+ unlink_noerrno(tmp);
+
+ *ret_filename = TAKE_PTR(fn_compressed); /* compressed */
+ *ret_node_fd = TAKE_FD(fd_compressed); /* compressed */
+ *ret_data_fd = TAKE_FD(fd); /* uncompressed */
+ *ret_size = (uint64_t) st.st_size; /* uncompressed */
+
+ return 0;
+
+ fail_compressed:
+ if (tmp_compressed)
+ (void) unlink(tmp_compressed);
+ }
+
+uncompressed:
+#endif
+
+ r = fix_permissions(fd, tmp, fn, context, uid);
+ if (r < 0)
+ goto fail;
+
+ *ret_filename = TAKE_PTR(fn);
+ *ret_data_fd = TAKE_FD(fd);
+ *ret_node_fd = -1;
+ *ret_size = (uint64_t) st.st_size;
+
+ return 0;
+
+fail:
+ if (tmp)
+ (void) unlink(tmp);
+ return r;
+}
+
+static int allocate_journal_field(int fd, size_t size, char **ret, size_t *ret_size) {
+ _cleanup_free_ char *field = NULL;
+ ssize_t n;
+
+ assert(fd >= 0);
+ assert(ret);
+ assert(ret_size);
+
+ if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
+ return log_warning_errno(errno, "Failed to seek: %m");
+
+ field = malloc(9 + size);
+ if (!field) {
+ log_warning("Failed to allocate memory for coredump, coredump will not be stored.");
+ return -ENOMEM;
+ }
+
+ memcpy(field, "COREDUMP=", 9);
+
+ n = read(fd, field + 9, size);
+ if (n < 0)
+ return log_error_errno((int) n, "Failed to read core data: %m");
+ if ((size_t) n < size)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Core data too short.");
+
+ *ret = TAKE_PTR(field);
+ *ret_size = size + 9;
+
+ return 0;
+}
+
+/* Joins /proc/[pid]/fd/ and /proc/[pid]/fdinfo/ into the following lines:
+ * 0:/dev/pts/23
+ * pos: 0
+ * flags: 0100002
+ *
+ * 1:/dev/pts/23
+ * pos: 0
+ * flags: 0100002
+ *
+ * 2:/dev/pts/23
+ * pos: 0
+ * flags: 0100002
+ * EOF
+ */
+static int compose_open_fds(pid_t pid, char **open_fds) {
+ _cleanup_closedir_ DIR *proc_fd_dir = NULL;
+ _cleanup_close_ int proc_fdinfo_fd = -1;
+ _cleanup_free_ char *buffer = NULL;
+ _cleanup_fclose_ FILE *stream = NULL;
+ const char *fddelim = "", *path;
+ struct dirent *dent = NULL;
+ size_t size = 0;
+ int r;
+
+ assert(pid >= 0);
+ assert(open_fds != NULL);
+
+ path = procfs_file_alloca(pid, "fd");
+ proc_fd_dir = opendir(path);
+ if (!proc_fd_dir)
+ return -errno;
+
+ proc_fdinfo_fd = openat(dirfd(proc_fd_dir), "../fdinfo", O_DIRECTORY|O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (proc_fdinfo_fd < 0)
+ return -errno;
+
+ stream = open_memstream(&buffer, &size);
+ if (!stream)
+ return -ENOMEM;
+
+ (void) __fsetlocking(stream, FSETLOCKING_BYCALLER);
+
+ FOREACH_DIRENT(dent, proc_fd_dir, return -errno) {
+ _cleanup_fclose_ FILE *fdinfo = NULL;
+ _cleanup_free_ char *fdname = NULL;
+ int fd;
+
+ r = readlinkat_malloc(dirfd(proc_fd_dir), dent->d_name, &fdname);
+ if (r < 0)
+ return r;
+
+ fprintf(stream, "%s%s:%s\n", fddelim, dent->d_name, fdname);
+ fddelim = "\n";
+
+ /* Use the directory entry from /proc/[pid]/fd with /proc/[pid]/fdinfo */
+ fd = openat(proc_fdinfo_fd, dent->d_name, O_NOFOLLOW|O_CLOEXEC|O_RDONLY);
+ if (fd < 0)
+ continue;
+
+ fdinfo = fdopen(fd, "r");
+ if (!fdinfo) {
+ safe_close(fd);
+ continue;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(fdinfo, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ fputs(line, stream);
+ fputc('\n', stream);
+ }
+ }
+
+ errno = 0;
+ stream = safe_fclose(stream);
+
+ if (errno > 0)
+ return -errno;
+
+ *open_fds = TAKE_PTR(buffer);
+
+ return 0;
+}
+
+static int get_process_ns(pid_t pid, const char *namespace, ino_t *ns) {
+ const char *p;
+ struct stat stbuf;
+ _cleanup_close_ int proc_ns_dir_fd;
+
+ p = procfs_file_alloca(pid, "ns");
+
+ proc_ns_dir_fd = open(p, O_DIRECTORY | O_CLOEXEC | O_RDONLY);
+ if (proc_ns_dir_fd < 0)
+ return -errno;
+
+ if (fstatat(proc_ns_dir_fd, namespace, &stbuf, /* flags */0) < 0)
+ return -errno;
+
+ *ns = stbuf.st_ino;
+ return 0;
+}
+
+static int get_mount_namespace_leader(pid_t pid, pid_t *container_pid) {
+ pid_t cpid = pid, ppid = 0;
+ ino_t proc_mntns;
+ int r = 0;
+
+ r = get_process_ns(pid, "mnt", &proc_mntns);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ ino_t parent_mntns;
+
+ r = get_process_ppid(cpid, &ppid);
+ if (r < 0)
+ return r;
+
+ r = get_process_ns(ppid, "mnt", &parent_mntns);
+ if (r < 0)
+ return r;
+
+ if (proc_mntns != parent_mntns)
+ break;
+
+ if (ppid == 1)
+ return -ENOENT;
+
+ cpid = ppid;
+ }
+
+ *container_pid = ppid;
+ return 0;
+}
+
+/* Returns 1 if the parent was found.
+ * Returns 0 if there is not a process we can call the pid's
+ * container parent (the pid's process isn't 'containerized').
+ * Returns a negative number on errors.
+ */
+static int get_process_container_parent_cmdline(pid_t pid, char** cmdline) {
+ int r = 0;
+ pid_t container_pid;
+ const char *proc_root_path;
+ struct stat root_stat, proc_root_stat;
+
+ /* To compare inodes of / and /proc/[pid]/root */
+ if (stat("/", &root_stat) < 0)
+ return -errno;
+
+ proc_root_path = procfs_file_alloca(pid, "root");
+ if (stat(proc_root_path, &proc_root_stat) < 0)
+ return -errno;
+
+ /* The process uses system root. */
+ if (proc_root_stat.st_ino == root_stat.st_ino) {
+ *cmdline = NULL;
+ return 0;
+ }
+
+ r = get_mount_namespace_leader(pid, &container_pid);
+ if (r < 0)
+ return r;
+
+ r = get_process_cmdline(container_pid, 0, false, cmdline);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int change_uid_gid(const char *context[]) {
+ uid_t uid;
+ gid_t gid;
+ int r;
+
+ r = parse_uid(context[CONTEXT_UID], &uid);
+ if (r < 0)
+ return r;
+
+ if (uid <= SYSTEM_UID_MAX) {
+ const char *user = "systemd-coredump";
+
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0) {
+ log_warning_errno(r, "Cannot resolve %s user. Proceeding to dump core as root: %m", user);
+ uid = gid = 0;
+ }
+ } else {
+ r = parse_gid(context[CONTEXT_GID], &gid);
+ if (r < 0)
+ return r;
+ }
+
+ return drop_privileges(uid, gid, 0);
+}
+
+static bool is_journald_crash(const char *context[_CONTEXT_MAX]) {
+ assert(context);
+
+ return streq_ptr(context[CONTEXT_UNIT], SPECIAL_JOURNALD_SERVICE);
+}
+
+static bool is_pid1_crash(const char *context[_CONTEXT_MAX]) {
+ assert(context);
+
+ return streq_ptr(context[CONTEXT_UNIT], SPECIAL_INIT_SCOPE) ||
+ streq_ptr(context[CONTEXT_PID], "1");
+}
+
+#define SUBMIT_COREDUMP_FIELDS 4
+
+static int submit_coredump(
+ const char *context[_CONTEXT_MAX],
+ struct iovec *iovec,
+ size_t n_iovec_allocated,
+ size_t n_iovec,
+ int input_fd) {
+
+ _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1;
+ _cleanup_free_ char *core_message = NULL, *filename = NULL, *coredump_data = NULL;
+ uint64_t coredump_size = UINT64_MAX;
+ bool truncated = false, journald_crash;
+ int r;
+
+ assert(context);
+ assert(iovec);
+ assert(n_iovec_allocated >= n_iovec + SUBMIT_COREDUMP_FIELDS);
+ assert(input_fd >= 0);
+
+ journald_crash = is_journald_crash(context);
+
+ /* Vacuum before we write anything again */
+ (void) coredump_vacuum(-1, arg_keep_free, arg_max_use);
+
+ /* Always stream the coredump to disk, if that's possible */
+ r = save_external_coredump(context, input_fd,
+ &filename, &coredump_node_fd, &coredump_fd, &coredump_size, &truncated);
+ if (r < 0)
+ /* Skip whole core dumping part */
+ goto log;
+
+ /* If we don't want to keep the coredump on disk, remove it now, as later on we will lack the privileges for
+ * it. However, we keep the fd to it, so that we can still process it and log it. */
+ r = maybe_remove_external_coredump(filename, coredump_size);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *coredump_filename;
+
+ coredump_filename = strjoina("COREDUMP_FILENAME=", filename);
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(coredump_filename);
+ } else if (arg_storage == COREDUMP_STORAGE_EXTERNAL)
+ log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
+ coredump_size, arg_external_size_max);
+
+ /* Vacuum again, but exclude the coredump we just created */
+ (void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use);
+
+ /* Now, let's drop privileges to become the user who owns the segfaulted process and allocate the coredump
+ * memory under the user's uid. This also ensures that the credentials journald will see are the ones of the
+ * coredumping user, thus making sure the user gets access to the core dump. Let's also get rid of all
+ * capabilities, if we run as root, we won't need them anymore. */
+ r = change_uid_gid(context);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop privileges: %m");
+
+#if HAVE_ELFUTILS
+ /* Try to get a strack trace if we can */
+ if (coredump_size <= arg_process_size_max) {
+ _cleanup_free_ char *stacktrace = NULL;
+
+ r = coredump_make_stack_trace(coredump_fd, context[CONTEXT_EXE], &stacktrace);
+ if (r >= 0)
+ core_message = strjoin("MESSAGE=Process ", context[CONTEXT_PID],
+ " (", context[CONTEXT_COMM], ") of user ",
+ context[CONTEXT_UID], " dumped core.",
+ journald_crash ? "\nCoredump diverted to " : "",
+ journald_crash ? filename : "",
+ "\n\n", stacktrace);
+ else if (r == -EINVAL)
+ log_warning("Failed to generate stack trace: %s", dwfl_errmsg(dwfl_errno()));
+ else
+ log_warning_errno(r, "Failed to generate stack trace: %m");
+ } else
+ log_debug("Not generating stack trace: core size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
+ coredump_size, arg_process_size_max);
+
+ if (!core_message)
+#endif
+log:
+ core_message = strjoin("MESSAGE=Process ", context[CONTEXT_PID],
+ " (", context[CONTEXT_COMM], ") of user ",
+ context[CONTEXT_UID], " dumped core.",
+ journald_crash && filename ? "\nCoredump diverted to " : NULL,
+ journald_crash && filename ? filename : NULL);
+ if (!core_message)
+ return log_oom();
+
+ if (journald_crash) {
+ /* We cannot log to the journal, so just print the message.
+ * The target was set previously to something safe. */
+ assert(startswith(core_message, "MESSAGE="));
+ log_dispatch(LOG_ERR, 0, core_message + strlen("MESSAGE="));
+ return 0;
+ }
+
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(core_message);
+
+ if (truncated)
+ iovec[n_iovec++] = IOVEC_MAKE_STRING("COREDUMP_TRUNCATED=1");
+
+ /* Optionally store the entire coredump in the journal */
+ if (arg_storage == COREDUMP_STORAGE_JOURNAL) {
+ if (coredump_size <= arg_journal_size_max) {
+ size_t sz = 0;
+
+ /* Store the coredump itself in the journal */
+
+ r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz);
+ if (r >= 0)
+ iovec[n_iovec++] = IOVEC_MAKE(coredump_data, sz);
+ else
+ log_warning_errno(r, "Failed to attach the core to the journal entry: %m");
+ } else
+ log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
+ coredump_size, arg_journal_size_max);
+ }
+
+ assert(n_iovec <= n_iovec_allocated);
+
+ r = sd_journal_sendv(iovec, n_iovec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to log coredump: %m");
+
+ return 0;
+}
+
+static void map_context_fields(const struct iovec *iovec, const char* context[]) {
+
+ static const char * const context_field_names[] = {
+ [CONTEXT_PID] = "COREDUMP_PID=",
+ [CONTEXT_UID] = "COREDUMP_UID=",
+ [CONTEXT_GID] = "COREDUMP_GID=",
+ [CONTEXT_SIGNAL] = "COREDUMP_SIGNAL=",
+ [CONTEXT_TIMESTAMP] = "COREDUMP_TIMESTAMP=",
+ [CONTEXT_RLIMIT] = "COREDUMP_RLIMIT=",
+ [CONTEXT_HOSTNAME] = "COREDUMP_HOSTNAME=",
+ [CONTEXT_COMM] = "COREDUMP_COMM=",
+ [CONTEXT_EXE] = "COREDUMP_EXE=",
+ };
+
+ unsigned i;
+
+ assert(iovec);
+ assert(context);
+
+ for (i = 0; i < ELEMENTSOF(context_field_names); i++) {
+ char *p;
+
+ if (!context_field_names[i])
+ continue;
+
+ p = memory_startswith(iovec->iov_base, iovec->iov_len, context_field_names[i]);
+ if (!p)
+ continue;
+
+ /* Note that these strings are NUL terminated, because we made sure that a trailing NUL byte is in the
+ * buffer, though not included in the iov_len count. (see below) */
+ context[i] = p;
+ break;
+ }
+}
+
+static int process_socket(int fd) {
+ _cleanup_close_ int coredump_fd = -1;
+ struct iovec *iovec = NULL;
+ size_t n_iovec = 0, n_allocated = 0, i, k;
+ const char *context[_CONTEXT_MAX] = {};
+ int r;
+
+ assert(fd >= 0);
+
+ log_setup_service();
+
+ log_debug("Processing coredump received on stdin...");
+
+ for (;;) {
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iovlen = 1,
+ };
+ ssize_t n;
+ ssize_t l;
+
+ if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec + SUBMIT_COREDUMP_FIELDS)) {
+ r = log_oom();
+ goto finish;
+ }
+
+ l = next_datagram_size_fd(fd);
+ if (l < 0) {
+ r = log_error_errno(l, "Failed to determine datagram size to read: %m");
+ goto finish;
+ }
+
+ assert(l >= 0);
+
+ iovec[n_iovec].iov_len = l;
+ iovec[n_iovec].iov_base = malloc(l + 1);
+ if (!iovec[n_iovec].iov_base) {
+ r = log_oom();
+ goto finish;
+ }
+
+ mh.msg_iov = iovec + n_iovec;
+
+ n = recvmsg(fd, &mh, MSG_CMSG_CLOEXEC);
+ if (n < 0) {
+ free(iovec[n_iovec].iov_base);
+ r = log_error_errno(errno, "Failed to receive datagram: %m");
+ goto finish;
+ }
+
+ if (n == 0) {
+ struct cmsghdr *cmsg, *found = NULL;
+ /* The final zero-length datagram carries the file descriptor and tells us that we're done. */
+
+ free(iovec[n_iovec].iov_base);
+
+ CMSG_FOREACH(cmsg, &mh) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(int))) {
+ assert(!found);
+ found = cmsg;
+ }
+ }
+
+ if (!found) {
+ log_error("Coredump file descriptor missing.");
+ r = -EBADMSG;
+ goto finish;
+ }
+
+ assert(coredump_fd < 0);
+ coredump_fd = *(int*) CMSG_DATA(found);
+ break;
+ }
+
+ /* Add trailing NUL byte, in case these are strings */
+ ((char*) iovec[n_iovec].iov_base)[n] = 0;
+ iovec[n_iovec].iov_len = (size_t) n;
+
+ cmsg_close_all(&mh);
+ map_context_fields(iovec + n_iovec, context);
+ n_iovec++;
+ }
+
+ if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec + SUBMIT_COREDUMP_FIELDS)) {
+ r = log_oom();
+ goto finish;
+ }
+
+ /* Make sure we got all data we really need */
+ assert(context[CONTEXT_PID]);
+ assert(context[CONTEXT_UID]);
+ assert(context[CONTEXT_GID]);
+ assert(context[CONTEXT_SIGNAL]);
+ assert(context[CONTEXT_TIMESTAMP]);
+ assert(context[CONTEXT_RLIMIT]);
+ assert(context[CONTEXT_HOSTNAME]);
+ assert(context[CONTEXT_COMM]);
+ assert(coredump_fd >= 0);
+
+ /* Small quirk: the journal fields contain the timestamp padded with six zeroes, so that the kernel-supplied 1s
+ * granularity timestamps becomes 1µs granularity, i.e. the granularity systemd usually operates in. Since we
+ * are reconstructing the original kernel context, we chop this off again, here. */
+ k = strlen(context[CONTEXT_TIMESTAMP]);
+ if (k > 6)
+ context[CONTEXT_TIMESTAMP] = strndupa(context[CONTEXT_TIMESTAMP], k - 6);
+
+ r = submit_coredump(context, iovec, n_allocated, n_iovec, coredump_fd);
+
+finish:
+ for (i = 0; i < n_iovec; i++)
+ free(iovec[i].iov_base);
+ free(iovec);
+
+ return r;
+}
+
+static int send_iovec(const struct iovec iovec[], size_t n_iovec, int input_fd) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/coredump",
+ };
+ _cleanup_close_ int fd = -1;
+ size_t i;
+ int r;
+
+ assert(iovec || n_iovec <= 0);
+ assert(input_fd >= 0);
+
+ fd = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to create coredump socket: %m");
+
+ if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0)
+ return log_error_errno(errno, "Failed to connect to coredump service: %m");
+
+ for (i = 0; i < n_iovec; i++) {
+ struct msghdr mh = {
+ .msg_iov = (struct iovec*) iovec + i,
+ .msg_iovlen = 1,
+ };
+ struct iovec copy[2];
+
+ for (;;) {
+ if (sendmsg(fd, &mh, MSG_NOSIGNAL) >= 0)
+ break;
+
+ if (errno == EMSGSIZE && mh.msg_iov[0].iov_len > 0) {
+ /* This field didn't fit? That's a pity. Given that this is just metadata,
+ * let's truncate the field at half, and try again. We append three dots, in
+ * order to show that this is truncated. */
+
+ if (mh.msg_iov != copy) {
+ /* We don't want to modify the caller's iovec, hence let's create our
+ * own array, consisting of two new iovecs, where the first is a
+ * (truncated) copy of what we want to send, and the second one
+ * contains the trailing dots. */
+ copy[0] = iovec[i];
+ copy[1] = (struct iovec) {
+ .iov_base = (char[]) { '.', '.', '.' },
+ .iov_len = 3,
+ };
+
+ mh.msg_iov = copy;
+ mh.msg_iovlen = 2;
+ }
+
+ copy[0].iov_len /= 2; /* halve it, and try again */
+ continue;
+ }
+
+ return log_error_errno(errno, "Failed to send coredump datagram: %m");
+ }
+ }
+
+ r = send_one_fd(fd, input_fd, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send coredump fd: %m");
+
+ return 0;
+}
+
+static char* set_iovec_field_free(struct iovec *iovec, size_t *n_iovec, const char *field, char *value) {
+ char *x;
+
+ x = set_iovec_string_field(iovec, n_iovec, field, value);
+ free(value);
+ return x;
+}
+
+static int gather_pid_metadata(
+ char* context[_CONTEXT_MAX],
+ char **comm_fallback,
+ struct iovec *iovec, size_t *n_iovec) {
+
+ /* We need 27 empty slots in iovec!
+ *
+ * Note that if we fail on oom later on, we do not roll-back changes to the iovec structure. (It remains valid,
+ * with the first n_iovec fields initialized.) */
+
+ uid_t owner_uid;
+ pid_t pid;
+ char *t;
+ const char *p;
+ int r, signo;
+
+ r = parse_pid(context[CONTEXT_PID], &pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PID \"%s\": %m", context[CONTEXT_PID]);
+
+ r = get_process_comm(pid, &context[CONTEXT_COMM]);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to get COMM, falling back to the command line: %m");
+ context[CONTEXT_COMM] = strv_join(comm_fallback, " ");
+ if (!context[CONTEXT_COMM])
+ return log_oom();
+ }
+
+ r = get_process_exe(pid, &context[CONTEXT_EXE]);
+ if (r < 0)
+ log_warning_errno(r, "Failed to get EXE, ignoring: %m");
+
+ if (cg_pid_get_unit(pid, &context[CONTEXT_UNIT]) >= 0) {
+ if (!is_journald_crash((const char**) context)) {
+ /* OK, now we know it's not the journal, hence we can make use of it now. */
+ log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
+ log_open();
+ }
+
+ /* If this is PID 1 disable coredump collection, we'll unlikely be able to process it later on. */
+ if (is_pid1_crash((const char**) context)) {
+ log_notice("Due to PID 1 having crashed coredump collection will now be turned off.");
+ disable_coredumps();
+ }
+
+ set_iovec_string_field(iovec, n_iovec, "COREDUMP_UNIT=", context[CONTEXT_UNIT]);
+ }
+
+ if (cg_pid_get_user_unit(pid, &t) >= 0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_USER_UNIT=", t);
+
+ /* The next few are mandatory */
+ if (!set_iovec_string_field(iovec, n_iovec, "COREDUMP_PID=", context[CONTEXT_PID]))
+ return log_oom();
+
+ if (!set_iovec_string_field(iovec, n_iovec, "COREDUMP_UID=", context[CONTEXT_UID]))
+ return log_oom();
+
+ if (!set_iovec_string_field(iovec, n_iovec, "COREDUMP_GID=", context[CONTEXT_GID]))
+ return log_oom();
+
+ if (!set_iovec_string_field(iovec, n_iovec, "COREDUMP_SIGNAL=", context[CONTEXT_SIGNAL]))
+ return log_oom();
+
+ if (!set_iovec_string_field(iovec, n_iovec, "COREDUMP_RLIMIT=", context[CONTEXT_RLIMIT]))
+ return log_oom();
+
+ if (!set_iovec_string_field(iovec, n_iovec, "COREDUMP_HOSTNAME=", context[CONTEXT_HOSTNAME]))
+ return log_oom();
+
+ if (!set_iovec_string_field(iovec, n_iovec, "COREDUMP_COMM=", context[CONTEXT_COMM]))
+ return log_oom();
+
+ if (context[CONTEXT_EXE] &&
+ !set_iovec_string_field(iovec, n_iovec, "COREDUMP_EXE=", context[CONTEXT_EXE]))
+ return log_oom();
+
+ if (sd_pid_get_session(pid, &t) >= 0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_SESSION=", t);
+
+ if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) {
+ r = asprintf(&t, "COREDUMP_OWNER_UID=" UID_FMT, owner_uid);
+ if (r > 0)
+ iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(t);
+ }
+
+ if (sd_pid_get_slice(pid, &t) >= 0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_SLICE=", t);
+
+ if (get_process_cmdline(pid, 0, false, &t) >= 0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_CMDLINE=", t);
+
+ if (cg_pid_get_path_shifted(pid, NULL, &t) >= 0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_CGROUP=", t);
+
+ if (compose_open_fds(pid, &t) >= 0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_OPEN_FDS=", t);
+
+ p = procfs_file_alloca(pid, "status");
+ if (read_full_file(p, &t, NULL) >= 0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_PROC_STATUS=", t);
+
+ p = procfs_file_alloca(pid, "maps");
+ if (read_full_file(p, &t, NULL) >= 0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_PROC_MAPS=", t);
+
+ p = procfs_file_alloca(pid, "limits");
+ if (read_full_file(p, &t, NULL) >= 0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_PROC_LIMITS=", t);
+
+ p = procfs_file_alloca(pid, "cgroup");
+ if (read_full_file(p, &t, NULL) >=0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_PROC_CGROUP=", t);
+
+ p = procfs_file_alloca(pid, "mountinfo");
+ if (read_full_file(p, &t, NULL) >=0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_PROC_MOUNTINFO=", t);
+
+ if (get_process_cwd(pid, &t) >= 0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_CWD=", t);
+
+ if (get_process_root(pid, &t) >= 0) {
+ bool proc_self_root_is_slash;
+
+ proc_self_root_is_slash = strcmp(t, "/") == 0;
+
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_ROOT=", t);
+
+ /* If the process' root is "/", then there is a chance it has
+ * mounted own root and hence being containerized. */
+ if (proc_self_root_is_slash && get_process_container_parent_cmdline(pid, &t) > 0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_CONTAINER_CMDLINE=", t);
+ }
+
+ if (get_process_environ(pid, &t) >= 0)
+ set_iovec_field_free(iovec, n_iovec, "COREDUMP_ENVIRON=", t);
+
+ t = strjoin("COREDUMP_TIMESTAMP=", context[CONTEXT_TIMESTAMP], "000000");
+ if (t)
+ iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(t);
+
+ if (safe_atoi(context[CONTEXT_SIGNAL], &signo) >= 0 && SIGNAL_VALID(signo))
+ set_iovec_string_field(iovec, n_iovec, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(signo));
+
+ return 0; /* we successfully acquired all metadata */
+}
+
+static int process_kernel(int argc, char* argv[]) {
+
+ char* context[_CONTEXT_MAX] = {};
+ struct iovec iovec[29 + SUBMIT_COREDUMP_FIELDS];
+ size_t i, n_iovec, n_to_free = 0;
+ int r;
+
+ log_debug("Processing coredump received from the kernel...");
+
+ if (argc < CONTEXT_COMM + 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not enough arguments passed by the kernel (%i, expected %i).",
+ argc - 1, CONTEXT_COMM + 1 - 1);
+
+ context[CONTEXT_PID] = argv[1 + CONTEXT_PID];
+ context[CONTEXT_UID] = argv[1 + CONTEXT_UID];
+ context[CONTEXT_GID] = argv[1 + CONTEXT_GID];
+ context[CONTEXT_SIGNAL] = argv[1 + CONTEXT_SIGNAL];
+ context[CONTEXT_TIMESTAMP] = argv[1 + CONTEXT_TIMESTAMP];
+ context[CONTEXT_RLIMIT] = argv[1 + CONTEXT_RLIMIT];
+ context[CONTEXT_HOSTNAME] = argv[1 + CONTEXT_HOSTNAME];
+
+ r = gather_pid_metadata(context, argv + 1 + CONTEXT_COMM, iovec, &n_to_free);
+ if (r < 0)
+ goto finish;
+
+ n_iovec = n_to_free;
+
+ iovec[n_iovec++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);
+
+ assert_cc(2 == LOG_CRIT);
+ iovec[n_iovec++] = IOVEC_MAKE_STRING("PRIORITY=2");
+
+ assert(n_iovec <= ELEMENTSOF(iovec));
+
+ if (is_journald_crash((const char**) context) || is_pid1_crash((const char**) context))
+ r = submit_coredump((const char**) context,
+ iovec, ELEMENTSOF(iovec), n_iovec,
+ STDIN_FILENO);
+ else
+ r = send_iovec(iovec, n_iovec, STDIN_FILENO);
+
+ finish:
+ for (i = 0; i < n_to_free; i++)
+ free(iovec[i].iov_base);
+
+ /* Those fields are allocated by gather_pid_metadata */
+ free(context[CONTEXT_COMM]);
+ free(context[CONTEXT_EXE]);
+ free(context[CONTEXT_UNIT]);
+
+ return r;
+}
+
+static int process_backtrace(int argc, char *argv[]) {
+ char *context[_CONTEXT_MAX] = {};
+ _cleanup_free_ char *message = NULL;
+ _cleanup_free_ struct iovec *iovec = NULL;
+ size_t n_iovec, n_allocated, n_to_free = 0, i;
+ int r;
+ JournalImporter importer = {
+ .fd = STDIN_FILENO,
+ };
+
+ log_debug("Processing backtrace on stdin...");
+
+ if (argc < CONTEXT_COMM + 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not enough arguments passed (%i, expected %i).",
+ argc - 1, CONTEXT_COMM + 1 - 1);
+
+ context[CONTEXT_PID] = argv[2 + CONTEXT_PID];
+ context[CONTEXT_UID] = argv[2 + CONTEXT_UID];
+ context[CONTEXT_GID] = argv[2 + CONTEXT_GID];
+ context[CONTEXT_SIGNAL] = argv[2 + CONTEXT_SIGNAL];
+ context[CONTEXT_TIMESTAMP] = argv[2 + CONTEXT_TIMESTAMP];
+ context[CONTEXT_RLIMIT] = argv[2 + CONTEXT_RLIMIT];
+ context[CONTEXT_HOSTNAME] = argv[2 + CONTEXT_HOSTNAME];
+
+ n_allocated = 34 + COREDUMP_STORAGE_EXTERNAL;
+ /* 26 metadata, 2 static, +unknown input, 4 storage, rounded up */
+ iovec = new(struct iovec, n_allocated);
+ if (!iovec)
+ return log_oom();
+
+ r = gather_pid_metadata(context, argv + 2 + CONTEXT_COMM, iovec, &n_to_free);
+ if (r < 0)
+ goto finish;
+ if (r > 0) {
+ /* This was a special crash, and has already been processed. */
+ r = 0;
+ goto finish;
+ }
+ n_iovec = n_to_free;
+
+ for (;;) {
+ r = journal_importer_process_data(&importer);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse journal entry on stdin: %m");
+ goto finish;
+ }
+ if (r == 1 || /* complete entry */
+ journal_importer_eof(&importer)) /* end of data */
+ break;
+ }
+
+ if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec + importer.iovw.count + 2))
+ return log_oom();
+
+ if (journal_importer_eof(&importer)) {
+ log_warning("Did not receive a full journal entry on stdin, ignoring message sent by reporter");
+
+ message = strjoin("MESSAGE=Process ", context[CONTEXT_PID],
+ " (", context[CONTEXT_COMM], ")"
+ " of user ", context[CONTEXT_UID],
+ " failed with ", context[CONTEXT_SIGNAL]);
+ if (!message) {
+ r = log_oom();
+ goto finish;
+ }
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(message);
+ } else {
+ for (i = 0; i < importer.iovw.count; i++)
+ iovec[n_iovec++] = importer.iovw.iovec[i];
+ }
+
+ iovec[n_iovec++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
+ assert_cc(2 == LOG_CRIT);
+ iovec[n_iovec++] = IOVEC_MAKE_STRING("PRIORITY=2");
+
+ assert(n_iovec <= n_allocated);
+
+ r = sd_journal_sendv(iovec, n_iovec);
+ if (r < 0)
+ log_error_errno(r, "Failed to log backtrace: %m");
+
+ finish:
+ for (i = 0; i < n_to_free; i++)
+ free(iovec[i].iov_base);
+
+ /* Those fields are allocated by gather_pid_metadata */
+ free(context[CONTEXT_COMM]);
+ free(context[CONTEXT_EXE]);
+ free(context[CONTEXT_UNIT]);
+
+ return r;
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ /* First, log to a safe place, since we don't know what crashed and it might
+ * be journald which we'd rather not log to then. */
+
+ log_set_target(LOG_TARGET_KMSG);
+ log_open();
+
+ /* Make sure we never enter a loop */
+ (void) prctl(PR_SET_DUMPABLE, 0);
+
+ /* Ignore all parse errors */
+ (void) parse_config();
+
+ log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage));
+ log_debug("Selected compression %s.", yes_no(arg_compress));
+
+ r = sd_listen_fds(false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine the number of file descriptors: %m");
+
+ /* If we got an fd passed, we are running in coredumpd mode. Otherwise we
+ * are invoked from the kernel as coredump handler. */
+ if (r == 0) {
+ if (streq_ptr(argv[1], "--backtrace"))
+ return process_backtrace(argc, argv);
+ else
+ return process_kernel(argc, argv);
+ } else if (r == 1)
+ return process_socket(SD_LISTEN_FDS_START);
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Received unexpected number of file descriptors.");
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/coredump/coredump.conf b/src/coredump/coredump.conf
new file mode 100644
index 0000000..c2f0643
--- /dev/null
+++ b/src/coredump/coredump.conf
@@ -0,0 +1,21 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See coredump.conf(5) for details.
+
+[Coredump]
+#Storage=external
+#Compress=yes
+#ProcessSizeMax=2G
+#ExternalSizeMax=2G
+#JournalSizeMax=767M
+#MaxUse=
+#KeepFree=
diff --git a/src/coredump/coredumpctl.c b/src/coredump/coredumpctl.c
new file mode 100644
index 0000000..fbee242
--- /dev/null
+++ b/src/coredump/coredumpctl.c
@@ -0,0 +1,1096 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-journal.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "compress.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "journal-internal.h"
+#include "journal-util.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "sigbus.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "verbs.h"
+
+#define SHORT_BUS_CALL_TIMEOUT_USEC (3 * USEC_PER_SEC)
+
+static usec_t arg_since = USEC_INFINITY, arg_until = USEC_INFINITY;
+static const char* arg_field = NULL;
+static const char *arg_debugger = NULL;
+static const char *arg_directory = NULL;
+static PagerFlags arg_pager_flags = 0;
+static int arg_no_legend = false;
+static int arg_one = false;
+static const char* arg_output = NULL;
+static bool arg_reverse = false;
+static bool arg_quiet = false;
+
+static int add_match(sd_journal *j, const char *match) {
+ _cleanup_free_ char *p = NULL;
+ const char* prefix, *pattern;
+ pid_t pid;
+ int r;
+
+ if (strchr(match, '='))
+ prefix = "";
+ else if (strchr(match, '/')) {
+ r = path_make_absolute_cwd(match, &p);
+ if (r < 0)
+ return log_error_errno(r, "path_make_absolute_cwd(\"%s\"): %m", match);
+
+ match = p;
+ prefix = "COREDUMP_EXE=";
+ } else if (parse_pid(match, &pid) >= 0)
+ prefix = "COREDUMP_PID=";
+ else
+ prefix = "COREDUMP_COMM=";
+
+ pattern = strjoina(prefix, match);
+ log_debug("Adding match: %s", pattern);
+ r = sd_journal_add_match(j, pattern, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match \"%s\": %m", match);
+
+ return 0;
+}
+
+static int add_matches(sd_journal *j, char **matches) {
+ char **match;
+ int r;
+
+ r = sd_journal_add_match(j, "MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match \"%s\": %m", "MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);
+
+ r = sd_journal_add_match(j, "MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match \"%s\": %m", "MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
+
+ STRV_FOREACH(match, matches) {
+ r = add_match(j, *match);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int acquire_journal(sd_journal **ret, char **matches) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert(ret);
+
+ if (arg_directory) {
+ r = sd_journal_open_directory(&j, arg_directory, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open journals in directory: %s: %m", arg_directory);
+ } else {
+ r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open journal: %m");
+ }
+
+ r = journal_access_check_and_warn(j, arg_quiet, true);
+ if (r < 0)
+ return r;
+
+ r = add_matches(j, matches);
+ if (r < 0)
+ return r;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *filter;
+
+ filter = journal_make_match_string(j);
+ log_debug("Journal filter: %s", filter);
+ }
+
+ *ret = TAKE_PTR(j);
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("coredumpctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "List or retrieve coredumps from the journal.\n\n"
+ "Flags:\n"
+ " -h --help Show this help\n"
+ " --version Print version string\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not print the column headers\n"
+ " --debugger=DEBUGGER Use the given debugger\n"
+ " -1 Show information about most recent entry only\n"
+ " -S --since=DATE Only print coredumps since the date\n"
+ " -U --until=DATE Only print coredumps until the date\n"
+ " -r --reverse Show the newest entries first\n"
+ " -F --field=FIELD List all values a certain field takes\n"
+ " -o --output=FILE Write output to FILE\n"
+ " -D --directory=DIR Use journal files from directory\n\n"
+ " -q --quiet Do not show info messages and privilege warning\n"
+ "Commands:\n"
+ " list [MATCHES...] List available coredumps (default)\n"
+ " info [MATCHES...] Show detailed information about one or more coredumps\n"
+ " dump [MATCHES...] Print first matching coredump to stdout\n"
+ " debug [MATCHES...] Start a debugger for the first matching coredump\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_DEBUGGER,
+ };
+
+ int c, r;
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version" , no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "debugger", required_argument, NULL, ARG_DEBUGGER },
+ { "output", required_argument, NULL, 'o' },
+ { "field", required_argument, NULL, 'F' },
+ { "directory", required_argument, NULL, 'D' },
+ { "reverse", no_argument, NULL, 'r' },
+ { "since", required_argument, NULL, 'S' },
+ { "until", required_argument, NULL, 'U' },
+ { "quiet", no_argument, NULL, 'q' },
+ {}
+ };
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "ho:F:1D:rS:U:q", options, NULL)) >= 0)
+ switch(c) {
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_no_legend = true;
+ break;
+
+ case ARG_DEBUGGER:
+ arg_debugger = optarg;
+ break;
+
+ case 'o':
+ if (arg_output)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot set output more than once.");
+
+ arg_output = optarg;
+ break;
+
+ case 'S':
+ r = parse_timestamp(optarg, &arg_since);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timestamp '%s': %m", optarg);
+ break;
+
+ case 'U':
+ r = parse_timestamp(optarg, &arg_until);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timestamp '%s': %m", optarg);
+ break;
+
+ case 'F':
+ if (arg_field)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot use --field/-F more than once.");
+ arg_field = optarg;
+ break;
+
+ case '1':
+ arg_one = true;
+ break;
+
+ case 'D':
+ arg_directory = optarg;
+ break;
+
+ case 'r':
+ arg_reverse = true;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_since != USEC_INFINITY && arg_until != USEC_INFINITY &&
+ arg_since > arg_until)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--since= must be before --until=.");
+
+ return 1;
+}
+
+static int retrieve(const void *data,
+ size_t len,
+ const char *name,
+ char **var) {
+
+ size_t ident;
+ char *v;
+
+ ident = strlen(name) + 1; /* name + "=" */
+
+ if (len < ident)
+ return 0;
+
+ if (memcmp(data, name, ident - 1) != 0)
+ return 0;
+
+ if (((const char*) data)[ident - 1] != '=')
+ return 0;
+
+ v = strndup((const char*)data + ident, len - ident);
+ if (!v)
+ return log_oom();
+
+ free(*var);
+ *var = v;
+
+ return 1;
+}
+
+static int print_field(FILE* file, sd_journal *j) {
+ const void *d;
+ size_t l;
+
+ assert(file);
+ assert(j);
+
+ assert(arg_field);
+
+ /* A (user-specified) field may appear more than once for a given entry.
+ * We will print all of the occurences.
+ * This is different below for fields that systemd-coredump uses,
+ * because they cannot meaningfully appear more than once.
+ */
+ SD_JOURNAL_FOREACH_DATA(j, d, l) {
+ _cleanup_free_ char *value = NULL;
+ int r;
+
+ r = retrieve(d, l, arg_field, &value);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ fprintf(file, "%s\n", value);
+ }
+
+ return 0;
+}
+
+#define RETRIEVE(d, l, name, arg) \
+ { \
+ int _r = retrieve(d, l, name, &arg); \
+ if (_r < 0) \
+ return _r; \
+ if (_r > 0) \
+ continue; \
+ }
+
+static int print_list(FILE* file, sd_journal *j, int had_legend) {
+ _cleanup_free_ char
+ *mid = NULL, *pid = NULL, *uid = NULL, *gid = NULL,
+ *sgnl = NULL, *exe = NULL, *comm = NULL, *cmdline = NULL,
+ *filename = NULL, *truncated = NULL, *coredump = NULL;
+ const void *d;
+ size_t l;
+ usec_t t;
+ char buf[FORMAT_TIMESTAMP_MAX];
+ int r;
+ const char *present;
+ bool normal_coredump;
+
+ assert(file);
+ assert(j);
+
+ SD_JOURNAL_FOREACH_DATA(j, d, l) {
+ RETRIEVE(d, l, "MESSAGE_ID", mid);
+ RETRIEVE(d, l, "COREDUMP_PID", pid);
+ RETRIEVE(d, l, "COREDUMP_UID", uid);
+ RETRIEVE(d, l, "COREDUMP_GID", gid);
+ RETRIEVE(d, l, "COREDUMP_SIGNAL", sgnl);
+ RETRIEVE(d, l, "COREDUMP_EXE", exe);
+ RETRIEVE(d, l, "COREDUMP_COMM", comm);
+ RETRIEVE(d, l, "COREDUMP_CMDLINE", cmdline);
+ RETRIEVE(d, l, "COREDUMP_FILENAME", filename);
+ RETRIEVE(d, l, "COREDUMP_TRUNCATED", truncated);
+ RETRIEVE(d, l, "COREDUMP", coredump);
+ }
+
+ if (!pid && !uid && !gid && !sgnl && !exe && !comm && !cmdline && !filename) {
+ log_warning("Empty coredump log entry");
+ return -EINVAL;
+ }
+
+ r = sd_journal_get_realtime_usec(j, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ format_timestamp(buf, sizeof(buf), t);
+
+ if (!had_legend && !arg_no_legend)
+ fprintf(file, "%-*s %*s %*s %*s %*s %-*s %s\n",
+ FORMAT_TIMESTAMP_WIDTH, "TIME",
+ 6, "PID",
+ 5, "UID",
+ 5, "GID",
+ 3, "SIG",
+ 9, "COREFILE",
+ "EXE");
+
+ normal_coredump = streq_ptr(mid, SD_MESSAGE_COREDUMP_STR);
+
+ if (filename)
+ if (access(filename, R_OK) == 0)
+ present = "present";
+ else if (errno == ENOENT)
+ present = "missing";
+ else
+ present = "error";
+ else if (coredump)
+ present = "journal";
+ else if (normal_coredump)
+ present = "none";
+ else
+ present = "-";
+
+ if (STR_IN_SET(present, "present", "journal") && truncated && parse_boolean(truncated) > 0)
+ present = "truncated";
+
+ fprintf(file, "%-*s %*s %*s %*s %*s %-*s %s\n",
+ FORMAT_TIMESTAMP_WIDTH, buf,
+ 6, strna(pid),
+ 5, strna(uid),
+ 5, strna(gid),
+ 3, normal_coredump ? strna(sgnl) : "-",
+ 9, present,
+ strna(exe ?: (comm ?: cmdline)));
+
+ return 0;
+}
+
+static int print_info(FILE *file, sd_journal *j, bool need_space) {
+ _cleanup_free_ char
+ *mid = NULL, *pid = NULL, *uid = NULL, *gid = NULL,
+ *sgnl = NULL, *exe = NULL, *comm = NULL, *cmdline = NULL,
+ *unit = NULL, *user_unit = NULL, *session = NULL,
+ *boot_id = NULL, *machine_id = NULL, *hostname = NULL,
+ *slice = NULL, *cgroup = NULL, *owner_uid = NULL,
+ *message = NULL, *timestamp = NULL, *filename = NULL,
+ *truncated = NULL, *coredump = NULL;
+ const void *d;
+ size_t l;
+ bool normal_coredump;
+ int r;
+
+ assert(file);
+ assert(j);
+
+ SD_JOURNAL_FOREACH_DATA(j, d, l) {
+ RETRIEVE(d, l, "MESSAGE_ID", mid);
+ RETRIEVE(d, l, "COREDUMP_PID", pid);
+ RETRIEVE(d, l, "COREDUMP_UID", uid);
+ RETRIEVE(d, l, "COREDUMP_GID", gid);
+ RETRIEVE(d, l, "COREDUMP_SIGNAL", sgnl);
+ RETRIEVE(d, l, "COREDUMP_EXE", exe);
+ RETRIEVE(d, l, "COREDUMP_COMM", comm);
+ RETRIEVE(d, l, "COREDUMP_CMDLINE", cmdline);
+ RETRIEVE(d, l, "COREDUMP_UNIT", unit);
+ RETRIEVE(d, l, "COREDUMP_USER_UNIT", user_unit);
+ RETRIEVE(d, l, "COREDUMP_SESSION", session);
+ RETRIEVE(d, l, "COREDUMP_OWNER_UID", owner_uid);
+ RETRIEVE(d, l, "COREDUMP_SLICE", slice);
+ RETRIEVE(d, l, "COREDUMP_CGROUP", cgroup);
+ RETRIEVE(d, l, "COREDUMP_TIMESTAMP", timestamp);
+ RETRIEVE(d, l, "COREDUMP_FILENAME", filename);
+ RETRIEVE(d, l, "COREDUMP_TRUNCATED", truncated);
+ RETRIEVE(d, l, "COREDUMP", coredump);
+ RETRIEVE(d, l, "_BOOT_ID", boot_id);
+ RETRIEVE(d, l, "_MACHINE_ID", machine_id);
+ RETRIEVE(d, l, "_HOSTNAME", hostname);
+ RETRIEVE(d, l, "MESSAGE", message);
+ }
+
+ if (need_space)
+ fputs("\n", file);
+
+ normal_coredump = streq_ptr(mid, SD_MESSAGE_COREDUMP_STR);
+
+ if (comm)
+ fprintf(file,
+ " PID: %s%s%s (%s)\n",
+ ansi_highlight(), strna(pid), ansi_normal(), comm);
+ else
+ fprintf(file,
+ " PID: %s%s%s\n",
+ ansi_highlight(), strna(pid), ansi_normal());
+
+ if (uid) {
+ uid_t n;
+
+ if (parse_uid(uid, &n) >= 0) {
+ _cleanup_free_ char *u = NULL;
+
+ u = uid_to_name(n);
+ fprintf(file,
+ " UID: %s (%s)\n",
+ uid, u);
+ } else {
+ fprintf(file,
+ " UID: %s\n",
+ uid);
+ }
+ }
+
+ if (gid) {
+ gid_t n;
+
+ if (parse_gid(gid, &n) >= 0) {
+ _cleanup_free_ char *g = NULL;
+
+ g = gid_to_name(n);
+ fprintf(file,
+ " GID: %s (%s)\n",
+ gid, g);
+ } else {
+ fprintf(file,
+ " GID: %s\n",
+ gid);
+ }
+ }
+
+ if (sgnl) {
+ int sig;
+ const char *name = normal_coredump ? "Signal" : "Reason";
+
+ if (normal_coredump && safe_atoi(sgnl, &sig) >= 0)
+ fprintf(file, " %s: %s (%s)\n", name, sgnl, signal_to_string(sig));
+ else
+ fprintf(file, " %s: %s\n", name, sgnl);
+ }
+
+ if (timestamp) {
+ usec_t u;
+
+ r = safe_atou64(timestamp, &u);
+ if (r >= 0) {
+ char absolute[FORMAT_TIMESTAMP_MAX], relative[FORMAT_TIMESPAN_MAX];
+
+ fprintf(file,
+ " Timestamp: %s (%s)\n",
+ format_timestamp(absolute, sizeof(absolute), u),
+ format_timestamp_relative(relative, sizeof(relative), u));
+
+ } else
+ fprintf(file, " Timestamp: %s\n", timestamp);
+ }
+
+ if (cmdline)
+ fprintf(file, " Command Line: %s\n", cmdline);
+ if (exe)
+ fprintf(file, " Executable: %s%s%s\n", ansi_highlight(), exe, ansi_normal());
+ if (cgroup)
+ fprintf(file, " Control Group: %s\n", cgroup);
+ if (unit)
+ fprintf(file, " Unit: %s\n", unit);
+ if (user_unit)
+ fprintf(file, " User Unit: %s\n", user_unit);
+ if (slice)
+ fprintf(file, " Slice: %s\n", slice);
+ if (session)
+ fprintf(file, " Session: %s\n", session);
+ if (owner_uid) {
+ uid_t n;
+
+ if (parse_uid(owner_uid, &n) >= 0) {
+ _cleanup_free_ char *u = NULL;
+
+ u = uid_to_name(n);
+ fprintf(file,
+ " Owner UID: %s (%s)\n",
+ owner_uid, u);
+ } else {
+ fprintf(file,
+ " Owner UID: %s\n",
+ owner_uid);
+ }
+ }
+ if (boot_id)
+ fprintf(file, " Boot ID: %s\n", boot_id);
+ if (machine_id)
+ fprintf(file, " Machine ID: %s\n", machine_id);
+ if (hostname)
+ fprintf(file, " Hostname: %s\n", hostname);
+
+ if (filename) {
+ bool inacc, trunc;
+
+ inacc = access(filename, R_OK) < 0;
+ trunc = truncated && parse_boolean(truncated) > 0;
+
+ if (inacc || trunc)
+ fprintf(file, " Storage: %s%s (%s%s%s)%s\n",
+ ansi_highlight_red(),
+ filename,
+ inacc ? "inaccessible" : "",
+ inacc && trunc ? ", " : "",
+ trunc ? "truncated" : "",
+ ansi_normal());
+ else
+ fprintf(file, " Storage: %s\n", filename);
+ }
+
+ else if (coredump)
+ fprintf(file, " Storage: journal\n");
+ else
+ fprintf(file, " Storage: none\n");
+
+ if (message) {
+ _cleanup_free_ char *m = NULL;
+
+ m = strreplace(message, "\n", "\n ");
+
+ fprintf(file, " Message: %s\n", strstrip(m ?: message));
+ }
+
+ return 0;
+}
+
+static int focus(sd_journal *j) {
+ int r;
+
+ r = sd_journal_seek_tail(j);
+ if (r == 0)
+ r = sd_journal_previous(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to search journal: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
+ "No match found.");
+ return r;
+}
+
+static int print_entry(sd_journal *j, unsigned n_found, bool verb_is_info) {
+ assert(j);
+
+ if (verb_is_info)
+ return print_info(stdout, j, n_found);
+ else if (arg_field)
+ return print_field(stdout, j);
+ else
+ return print_list(stdout, j, n_found);
+}
+
+static int dump_list(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ unsigned n_found = 0;
+ bool verb_is_info;
+ int r;
+
+ verb_is_info = (argc >= 1 && streq(argv[0], "info"));
+
+ r = acquire_journal(&j, argv + 1);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ /* The coredumps are likely to compressed, and for just
+ * listing them we don't need to decompress them, so let's
+ * pick a fairly low data threshold here */
+ sd_journal_set_data_threshold(j, 4096);
+
+ /* "info" without pattern implies "-1" */
+ if (arg_one || (verb_is_info && argc == 1)) {
+ r = focus(j);
+ if (r < 0)
+ return r;
+
+ return print_entry(j, 0, verb_is_info);
+ } else {
+ if (arg_since != USEC_INFINITY && !arg_reverse)
+ r = sd_journal_seek_realtime_usec(j, arg_since);
+ else if (arg_until != USEC_INFINITY && arg_reverse)
+ r = sd_journal_seek_realtime_usec(j, arg_until);
+ else if (arg_reverse)
+ r = sd_journal_seek_tail(j);
+ else
+ r = sd_journal_seek_head(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to seek to date: %m");
+
+ for (;;) {
+ if (!arg_reverse)
+ r = sd_journal_next(j);
+ else
+ r = sd_journal_previous(j);
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to iterate through journal: %m");
+
+ if (r == 0)
+ break;
+
+ if (arg_until != USEC_INFINITY && !arg_reverse) {
+ usec_t usec;
+
+ r = sd_journal_get_realtime_usec(j, &usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine timestamp: %m");
+ if (usec > arg_until)
+ continue;
+ }
+
+ if (arg_since != USEC_INFINITY && arg_reverse) {
+ usec_t usec;
+
+ r = sd_journal_get_realtime_usec(j, &usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine timestamp: %m");
+ if (usec < arg_since)
+ continue;
+ }
+
+ r = print_entry(j, n_found++, verb_is_info);
+ if (r < 0)
+ return r;
+ }
+
+ if (!arg_field && n_found <= 0) {
+ if (!arg_quiet)
+ log_notice("No coredumps found.");
+ return -ESRCH;
+ }
+ }
+
+ return 0;
+}
+
+static int save_core(sd_journal *j, FILE *file, char **path, bool *unlink_temp) {
+ const char *data;
+ _cleanup_free_ char *filename = NULL;
+ size_t len;
+ int r, fd;
+ _cleanup_close_ int fdt = -1;
+ char *temp = NULL;
+
+ assert(!(file && path)); /* At most one can be specified */
+ assert(!!path == !!unlink_temp); /* Those must be specified together */
+
+ /* Look for a coredump on disk first. */
+ r = sd_journal_get_data(j, "COREDUMP_FILENAME", (const void**) &data, &len);
+ if (r == 0)
+ retrieve(data, len, "COREDUMP_FILENAME", &filename);
+ else {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to retrieve COREDUMP_FILENAME field: %m");
+ /* Check that we can have a COREDUMP field. We still haven't set a high
+ * data threshold, so we'll get a few kilobytes at most.
+ */
+
+ r = sd_journal_get_data(j, "COREDUMP", (const void**) &data, &len);
+ if (r == -ENOENT)
+ return log_error_errno(r, "Coredump entry has no core attached (neither internally in the journal nor externally on disk).");
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve COREDUMP field: %m");
+ }
+
+ if (filename) {
+ if (access(filename, R_OK) < 0)
+ return log_error_errno(errno, "File \"%s\" is not readable: %m", filename);
+
+ if (path && !endswith(filename, ".xz") && !endswith(filename, ".lz4")) {
+ *path = TAKE_PTR(filename);
+
+ return 0;
+ }
+ }
+
+ if (path) {
+ const char *vt;
+
+ /* Create a temporary file to write the uncompressed core to. */
+
+ r = var_tmp_dir(&vt);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire temporary directory path: %m");
+
+ temp = strjoin(vt, "/coredump-XXXXXX");
+ if (!temp)
+ return log_oom();
+
+ fdt = mkostemp_safe(temp);
+ if (fdt < 0)
+ return log_error_errno(fdt, "Failed to create temporary file: %m");
+ log_debug("Created temporary file %s", temp);
+
+ fd = fdt;
+ } else {
+ /* If neither path or file are specified, we will write to stdout. Let's now check
+ * if stdout is connected to a tty. We checked that the file exists, or that the
+ * core might be stored in the journal. In this second case, if we found the entry,
+ * in all likelyhood we will be able to access the COREDUMP= field. In either case,
+ * we stop before doing any "real" work, i.e. before starting decompression or
+ * reading from the file or creating temporary files.
+ */
+ if (!file) {
+ if (on_tty())
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY),
+ "Refusing to dump core to tty"
+ " (use shell redirection or specify --output).");
+ file = stdout;
+ }
+
+ fd = fileno(file);
+ }
+
+ if (filename) {
+#if HAVE_XZ || HAVE_LZ4
+ _cleanup_close_ int fdf;
+
+ fdf = open(filename, O_RDONLY | O_CLOEXEC);
+ if (fdf < 0) {
+ r = log_error_errno(errno, "Failed to open %s: %m", filename);
+ goto error;
+ }
+
+ r = decompress_stream(filename, fdf, fd, -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to decompress %s: %m", filename);
+ goto error;
+ }
+#else
+ log_error("Cannot decompress file. Compiled without compression support.");
+ r = -EOPNOTSUPP;
+ goto error;
+#endif
+ } else {
+ ssize_t sz;
+
+ /* We want full data, nothing truncated. */
+ sd_journal_set_data_threshold(j, 0);
+
+ r = sd_journal_get_data(j, "COREDUMP", (const void**) &data, &len);
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve COREDUMP field: %m");
+
+ assert(len >= 9);
+ data += 9;
+ len -= 9;
+
+ sz = write(fd, data, len);
+ if (sz < 0) {
+ r = log_error_errno(errno, "Failed to write output: %m");
+ goto error;
+ }
+ if (sz != (ssize_t) len) {
+ log_error("Short write to output.");
+ r = -EIO;
+ goto error;
+ }
+ }
+
+ if (temp) {
+ *path = temp;
+ *unlink_temp = true;
+ }
+ return 0;
+
+error:
+ if (temp) {
+ unlink(temp);
+ log_debug("Removed temporary file %s", temp);
+ }
+ return r;
+}
+
+static int dump_core(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ if (arg_field) {
+ log_error("Option --field/-F only makes sense with list");
+ return -EINVAL;
+ }
+
+ r = acquire_journal(&j, argv + 1);
+ if (r < 0)
+ return r;
+
+ r = focus(j);
+ if (r < 0)
+ return r;
+
+ if (arg_output) {
+ f = fopen(arg_output, "we");
+ if (!f)
+ return log_error_errno(errno, "Failed to open \"%s\" for writing: %m", arg_output);
+ }
+
+ print_info(f ? stdout : stderr, j, false);
+
+ r = save_core(j, f, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_journal_previous(j);
+ if (r > 0 && !arg_quiet)
+ log_notice("More than one entry matches, ignoring rest.");
+
+ return 0;
+}
+
+static int run_debug(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ _cleanup_free_ char *exe = NULL, *path = NULL;
+ bool unlink_path = false;
+ const char *data, *fork_name;
+ size_t len;
+ pid_t pid;
+ int r;
+
+ if (!arg_debugger) {
+ char *env_debugger;
+
+ env_debugger = getenv("SYSTEMD_DEBUGGER");
+ if (env_debugger)
+ arg_debugger = env_debugger;
+ else
+ arg_debugger = "gdb";
+ }
+
+ if (arg_field) {
+ log_error("Option --field/-F only makes sense with list");
+ return -EINVAL;
+ }
+
+ r = acquire_journal(&j, argv + 1);
+ if (r < 0)
+ return r;
+
+ r = focus(j);
+ if (r < 0)
+ return r;
+
+ print_info(stdout, j, false);
+ fputs("\n", stdout);
+
+ r = sd_journal_get_data(j, "COREDUMP_EXE", (const void**) &data, &len);
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve COREDUMP_EXE field: %m");
+
+ assert(len > STRLEN("COREDUMP_EXE="));
+ data += STRLEN("COREDUMP_EXE=");
+ len -= STRLEN("COREDUMP_EXE=");
+
+ exe = strndup(data, len);
+ if (!exe)
+ return log_oom();
+
+ if (endswith(exe, " (deleted)")) {
+ log_error("Binary already deleted.");
+ return -ENOENT;
+ }
+
+ if (!path_is_absolute(exe)) {
+ log_error("Binary is not an absolute path.");
+ return -ENOENT;
+ }
+
+ r = save_core(j, NULL, &path, &unlink_path);
+ if (r < 0)
+ return r;
+
+ /* Don't interfere with gdb and its handling of SIGINT. */
+ (void) ignore_signals(SIGINT, -1);
+
+ fork_name = strjoina("(", arg_debugger, ")");
+
+ r = safe_fork(fork_name, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ goto finish;
+ if (r == 0) {
+ execlp(arg_debugger, arg_debugger, exe, "-c", path, NULL);
+ log_open();
+ log_error_errno(errno, "Failed to invoke %s: %m", arg_debugger);
+ _exit(EXIT_FAILURE);
+ }
+
+ r = wait_for_terminate_and_check(arg_debugger, pid, WAIT_LOG_ABNORMAL);
+
+finish:
+ (void) default_signals(SIGINT, -1);
+
+ if (unlink_path) {
+ log_debug("Removed temporary file %s", path);
+ unlink(path);
+ }
+
+ return r;
+}
+
+static int check_units_active(void) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int c = 0, r;
+ const char *id, *state, *substate;
+
+ if (arg_quiet)
+ return false;
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire bus: %m");
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListUnitsByPatterns");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, NULL);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, STRV_MAKE("systemd-coredump@*.service"));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, SHORT_BUS_CALL_TIMEOUT_USEC, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to check if any systemd-coredump@.service units are running: %s",
+ bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(
+ reply, "(ssssssouso)",
+ &id, NULL, NULL, &state, &substate,
+ NULL, NULL, NULL, NULL, NULL)) > 0) {
+ bool found = !STR_IN_SET(state, "inactive", "dead", "failed");
+ log_debug("Unit %s is %s/%s, %scounting it.", id, state, substate, found ? "" : "not ");
+ c += found;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return c;
+}
+
+static int coredumpctl_main(int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "list", VERB_ANY, VERB_ANY, VERB_DEFAULT, dump_list },
+ { "info", VERB_ANY, VERB_ANY, 0, dump_list },
+ { "dump", VERB_ANY, VERB_ANY, 0, dump_core },
+ { "debug", VERB_ANY, VERB_ANY, 0, run_debug },
+ { "gdb", VERB_ANY, VERB_ANY, 0, run_debug },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r, units_active;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ /* The journal merging logic potentially needs a lot of fds. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ sigbus_install();
+
+ units_active = check_units_active(); /* error is treated the same as 0 */
+
+ r = coredumpctl_main(argc, argv);
+
+ if (units_active > 0)
+ printf("%s-- Notice: %d systemd-coredump@.service %s, output may be incomplete.%s\n",
+ ansi_highlight_red(),
+ units_active, units_active == 1 ? "unit is running" : "units are running",
+ ansi_normal());
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/coredump/meson.build b/src/coredump/meson.build
new file mode 100644
index 0000000..7fa5942
--- /dev/null
+++ b/src/coredump/meson.build
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+systemd_coredump_sources = files('''
+ coredump.c
+ coredump-vacuum.c
+ coredump-vacuum.h
+'''.split())
+
+if conf.get('HAVE_ELFUTILS') == 1
+ systemd_coredump_sources += files(['stacktrace.c',
+ 'stacktrace.h'])
+endif
+
+coredumpctl_sources = files('coredumpctl.c')
+
+if conf.get('ENABLE_COREDUMP') == 1
+ install_data('coredump.conf',
+ install_dir : pkgsysconfdir)
+endif
+
+tests += [
+ [['src/coredump/test-coredump-vacuum.c',
+ 'src/coredump/coredump-vacuum.c',
+ 'src/coredump/coredump-vacuum.h'],
+ [],
+ [],
+ 'ENABLE_COREDUMP', 'manual'],
+]
diff --git a/src/coredump/stacktrace.c b/src/coredump/stacktrace.c
new file mode 100644
index 0000000..dab4c1a
--- /dev/null
+++ b/src/coredump/stacktrace.c
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dwarf.h>
+#include <elfutils/libdwfl.h>
+#include <stdio_ext.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "macro.h"
+#include "stacktrace.h"
+#include "string-util.h"
+#include "util.h"
+
+#define FRAMES_MAX 64
+#define THREADS_MAX 64
+
+struct stack_context {
+ FILE *f;
+ Dwfl *dwfl;
+ Elf *elf;
+ unsigned n_thread;
+ unsigned n_frame;
+};
+
+static int frame_callback(Dwfl_Frame *frame, void *userdata) {
+ struct stack_context *c = userdata;
+ Dwarf_Addr pc, pc_adjusted, bias = 0;
+ _cleanup_free_ Dwarf_Die *scopes = NULL;
+ const char *fname = NULL, *symbol = NULL;
+ Dwfl_Module *module;
+ bool is_activation;
+
+ assert(frame);
+ assert(c);
+
+ if (c->n_frame >= FRAMES_MAX)
+ return DWARF_CB_ABORT;
+
+ if (!dwfl_frame_pc(frame, &pc, &is_activation))
+ return DWARF_CB_ABORT;
+
+ pc_adjusted = pc - (is_activation ? 0 : 1);
+
+ module = dwfl_addrmodule(c->dwfl, pc_adjusted);
+ if (module) {
+ Dwarf_Die *s, *cudie;
+ int n;
+
+ cudie = dwfl_module_addrdie(module, pc_adjusted, &bias);
+ if (cudie) {
+ n = dwarf_getscopes(cudie, pc_adjusted - bias, &scopes);
+ for (s = scopes; s < scopes + n; s++) {
+ if (IN_SET(dwarf_tag(s), DW_TAG_subprogram, DW_TAG_inlined_subroutine, DW_TAG_entry_point)) {
+ Dwarf_Attribute *a, space;
+
+ a = dwarf_attr_integrate(s, DW_AT_MIPS_linkage_name, &space);
+ if (!a)
+ a = dwarf_attr_integrate(s, DW_AT_linkage_name, &space);
+ if (a)
+ symbol = dwarf_formstring(a);
+ if (!symbol)
+ symbol = dwarf_diename(s);
+
+ if (symbol)
+ break;
+ }
+ }
+ }
+
+ if (!symbol)
+ symbol = dwfl_module_addrname(module, pc_adjusted);
+
+ fname = dwfl_module_info(module, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+ }
+
+ fprintf(c->f, "#%-2u 0x%016" PRIx64 " %s (%s)\n", c->n_frame, (uint64_t) pc, strna(symbol), strna(fname));
+ c->n_frame++;
+
+ return DWARF_CB_OK;
+}
+
+static int thread_callback(Dwfl_Thread *thread, void *userdata) {
+ struct stack_context *c = userdata;
+ pid_t tid;
+
+ assert(thread);
+ assert(c);
+
+ if (c->n_thread >= THREADS_MAX)
+ return DWARF_CB_ABORT;
+
+ if (c->n_thread != 0)
+ fputc('\n', c->f);
+
+ c->n_frame = 0;
+
+ tid = dwfl_thread_tid(thread);
+ fprintf(c->f, "Stack trace of thread " PID_FMT ":\n", tid);
+
+ if (dwfl_thread_getframes(thread, frame_callback, c) < 0)
+ return DWARF_CB_ABORT;
+
+ c->n_thread++;
+
+ return DWARF_CB_OK;
+}
+
+int coredump_make_stack_trace(int fd, const char *executable, char **ret) {
+
+ static const Dwfl_Callbacks callbacks = {
+ .find_elf = dwfl_build_id_find_elf,
+ .find_debuginfo = dwfl_standard_find_debuginfo,
+ };
+
+ struct stack_context c = {};
+ char *buf = NULL;
+ size_t sz = 0;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
+ return -errno;
+
+ c.f = open_memstream(&buf, &sz);
+ if (!c.f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(c.f, FSETLOCKING_BYCALLER);
+
+ elf_version(EV_CURRENT);
+
+ c.elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!c.elf) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ c.dwfl = dwfl_begin(&callbacks);
+ if (!c.dwfl) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (dwfl_core_file_report(c.dwfl, c.elf, executable) < 0) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (dwfl_report_end(c.dwfl, NULL, NULL) != 0) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (dwfl_core_file_attach(c.dwfl, c.elf) < 0) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (dwfl_getthreads(c.dwfl, thread_callback, &c) < 0) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ c.f = safe_fclose(c.f);
+
+ *ret = TAKE_PTR(buf);
+
+ r = 0;
+
+finish:
+ if (c.dwfl)
+ dwfl_end(c.dwfl);
+
+ if (c.elf)
+ elf_end(c.elf);
+
+ safe_fclose(c.f);
+
+ free(buf);
+
+ return r;
+}
diff --git a/src/coredump/stacktrace.h b/src/coredump/stacktrace.h
new file mode 100644
index 0000000..5290042
--- /dev/null
+++ b/src/coredump/stacktrace.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int coredump_make_stack_trace(int fd, const char *executable, char **ret);
diff --git a/src/coredump/test-coredump-vacuum.c b/src/coredump/test-coredump-vacuum.c
new file mode 100644
index 0000000..75fb442
--- /dev/null
+++ b/src/coredump/test-coredump-vacuum.c
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdlib.h>
+
+#include "coredump-vacuum.h"
+
+int main(int argc, char *argv[]) {
+
+ if (coredump_vacuum(-1, (uint64_t) -1, 70 * 1024) < 0)
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/cryptsetup/cryptsetup-generator.c b/src/cryptsetup/cryptsetup-generator.c
new file mode 100644
index 0000000..8759a26
--- /dev/null
+++ b/src/cryptsetup/cryptsetup-generator.c
@@ -0,0 +1,609 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio_ext.h>
+
+#include "alloc-util.h"
+#include "dropin.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "hashmap.h"
+#include "id128-util.h"
+#include "log.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+
+typedef struct crypto_device {
+ char *uuid;
+ char *keyfile;
+ char *keydev;
+ char *name;
+ char *options;
+ bool create;
+} crypto_device;
+
+static const char *arg_dest = NULL;
+static bool arg_enabled = true;
+static bool arg_read_crypttab = true;
+static bool arg_whitelist = false;
+static Hashmap *arg_disks = NULL;
+static char *arg_default_options = NULL;
+static char *arg_default_keyfile = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_disks, hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_default_options, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_default_keyfile, freep);
+
+static int generate_keydev_mount(const char *name, const char *keydev, char **unit, char **mount) {
+ _cleanup_free_ char *u = NULL, *what = NULL, *where = NULL, *name_escaped = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(name);
+ assert(keydev);
+ assert(unit);
+ assert(mount);
+
+ r = mkdir_parents("/run/systemd/cryptsetup", 0755);
+ if (r < 0)
+ return r;
+
+ r = mkdir("/run/systemd/cryptsetup", 0700);
+ if (r < 0 && errno != EEXIST)
+ return -errno;
+
+ name_escaped = cescape(name);
+ if (!name_escaped)
+ return -ENOMEM;
+
+ where = strjoin("/run/systemd/cryptsetup/keydev-", name_escaped);
+ if (!where)
+ return -ENOMEM;
+
+ r = mkdir(where, 0700);
+ if (r < 0 && errno != EEXIST)
+ return -errno;
+
+ r = unit_name_from_path(where, ".mount", &u);
+ if (r < 0)
+ return r;
+
+ r = generator_open_unit_file(arg_dest, NULL, u, &f);
+ if (r < 0)
+ return r;
+
+ what = fstab_node_to_udev_node(keydev);
+ if (!what)
+ return -ENOMEM;
+
+ fprintf(f,
+ "[Unit]\n"
+ "DefaultDependencies=no\n\n"
+ "[Mount]\n"
+ "What=%s\n"
+ "Where=%s\n"
+ "Options=ro\n", what, where);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ *unit = TAKE_PTR(u);
+ *mount = TAKE_PTR(where);
+
+ return 0;
+}
+
+static int create_disk(
+ const char *name,
+ const char *device,
+ const char *keydev,
+ const char *password,
+ const char *options) {
+
+ _cleanup_free_ char *n = NULL, *d = NULL, *u = NULL, *e = NULL,
+ *filtered = NULL, *u_escaped = NULL, *password_escaped = NULL, *filtered_escaped = NULL, *name_escaped = NULL, *keydev_mount = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *dmname;
+ bool noauto, nofail, tmp, swap, netdev;
+ int r;
+
+ assert(name);
+ assert(device);
+
+ noauto = fstab_test_yes_no_option(options, "noauto\0" "auto\0");
+ nofail = fstab_test_yes_no_option(options, "nofail\0" "fail\0");
+ tmp = fstab_test_option(options, "tmp\0");
+ swap = fstab_test_option(options, "swap\0");
+ netdev = fstab_test_option(options, "_netdev\0");
+
+ if (tmp && swap)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Device '%s' cannot be both 'tmp' and 'swap'. Ignoring.",
+ name);
+
+ name_escaped = specifier_escape(name);
+ if (!name_escaped)
+ return log_oom();
+
+ e = unit_name_escape(name);
+ if (!e)
+ return log_oom();
+
+ u = fstab_node_to_udev_node(device);
+ if (!u)
+ return log_oom();
+
+ r = unit_name_build("systemd-cryptsetup", e, ".service", &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ u_escaped = specifier_escape(u);
+ if (!u_escaped)
+ return log_oom();
+
+ r = unit_name_from_path(u, ".device", &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ if (password) {
+ password_escaped = specifier_escape(password);
+ if (!password_escaped)
+ return log_oom();
+ }
+
+ if (keydev && !password)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Key device is specified, but path to the password file is missing.");
+
+ r = generator_open_unit_file(arg_dest, NULL, n, &f);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "[Unit]\n"
+ "Description=Cryptography Setup for %%I\n"
+ "Documentation=man:crypttab(5) man:systemd-cryptsetup-generator(8) man:systemd-cryptsetup@.service(8)\n"
+ "SourcePath=/etc/crypttab\n"
+ "DefaultDependencies=no\n"
+ "Conflicts=umount.target\n"
+ "IgnoreOnIsolate=true\n"
+ "After=%s\n",
+ netdev ? "remote-fs-pre.target" : "cryptsetup-pre.target");
+
+ if (keydev) {
+ _cleanup_free_ char *unit = NULL, *p = NULL;
+
+ r = generate_keydev_mount(name, keydev, &unit, &keydev_mount);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate keydev mount unit: %m");
+
+ p = prefix_root(keydev_mount, password_escaped);
+ if (!p)
+ return log_oom();
+
+ free_and_replace(password_escaped, p);
+ }
+
+ if (!nofail)
+ fprintf(f,
+ "Before=%s\n",
+ netdev ? "remote-cryptsetup.target" : "cryptsetup.target");
+
+ if (password) {
+ if (PATH_IN_SET(password, "/dev/urandom", "/dev/random", "/dev/hw_random"))
+ fputs("After=systemd-random-seed.service\n", f);
+ else if (!STR_IN_SET(password, "-", "none")) {
+ _cleanup_free_ char *uu;
+
+ uu = fstab_node_to_udev_node(password);
+ if (!uu)
+ return log_oom();
+
+ if (!path_equal(uu, "/dev/null")) {
+
+ if (path_startswith(uu, "/dev/")) {
+ _cleanup_free_ char *dd = NULL;
+
+ r = unit_name_from_path(uu, ".device", &dd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ fprintf(f, "After=%1$s\nRequires=%1$s\n", dd);
+ } else
+ fprintf(f, "RequiresMountsFor=%s\n", password_escaped);
+ }
+ }
+ }
+
+ if (path_startswith(u, "/dev/")) {
+ fprintf(f,
+ "BindsTo=%s\n"
+ "After=%s\n"
+ "Before=umount.target\n",
+ d, d);
+
+ if (swap)
+ fputs("Before=dev-mapper-%i.swap\n",
+ f);
+ } else
+ /* For loopback devices, add systemd-tmpfiles-setup-dev.service
+ dependency to ensure that loopback support is available in
+ the kernel (/dev/loop-control needs to exist) */
+ fprintf(f,
+ "RequiresMountsFor=%s\n"
+ "Requires=systemd-tmpfiles-setup-dev.service\n"
+ "After=systemd-tmpfiles-setup-dev.service\n",
+ u_escaped);
+
+ r = generator_write_timeouts(arg_dest, device, name, options, &filtered);
+ if (r < 0)
+ return r;
+
+ if (filtered) {
+ filtered_escaped = specifier_escape(filtered);
+ if (!filtered_escaped)
+ return log_oom();
+ }
+
+ fprintf(f,
+ "\n[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "TimeoutSec=0\n" /* the binary handles timeouts anyway */
+ "KeyringMode=shared\n" /* make sure we can share cached keys among instances */
+ "ExecStart=" SYSTEMD_CRYPTSETUP_PATH " attach '%s' '%s' '%s' '%s'\n"
+ "ExecStop=" SYSTEMD_CRYPTSETUP_PATH " detach '%s'\n",
+ name_escaped, u_escaped, strempty(password_escaped), strempty(filtered_escaped),
+ name_escaped);
+
+ if (tmp)
+ fprintf(f,
+ "ExecStartPost=/sbin/mke2fs '/dev/mapper/%s'\n",
+ name_escaped);
+
+ if (swap)
+ fprintf(f,
+ "ExecStartPost=/sbin/mkswap '/dev/mapper/%s'\n",
+ name_escaped);
+
+ if (keydev)
+ fprintf(f,
+ "ExecStartPost=" UMOUNT_PATH " %s\n\n",
+ keydev_mount);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", n);
+
+ if (!noauto) {
+ r = generator_add_symlink(arg_dest, d, "wants", n);
+ if (r < 0)
+ return r;
+
+ r = generator_add_symlink(arg_dest,
+ netdev ? "remote-cryptsetup.target" : "cryptsetup.target",
+ nofail ? "wants" : "requires", n);
+ if (r < 0)
+ return r;
+ }
+
+ dmname = strjoina("dev-mapper-", e, ".device");
+ r = generator_add_symlink(arg_dest, dmname, "requires", n);
+ if (r < 0)
+ return r;
+
+ if (!noauto && !nofail) {
+ r = write_drop_in(arg_dest, dmname, 90, "device-timeout",
+ "# Automatically generated by systemd-cryptsetup-generator \n\n"
+ "[Unit]\nJobTimeoutSec=0");
+ if (r < 0)
+ return log_error_errno(r, "Failed to write device drop-in: %m");
+ }
+
+ return 0;
+}
+
+static crypto_device* crypt_device_free(crypto_device *d) {
+ if (!d)
+ return NULL;
+
+ free(d->uuid);
+ free(d->keyfile);
+ free(d->keydev);
+ free(d->name);
+ free(d->options);
+ return mfree(d);
+}
+
+static crypto_device *get_crypto_device(const char *uuid) {
+ int r;
+ crypto_device *d;
+
+ assert(uuid);
+
+ d = hashmap_get(arg_disks, uuid);
+ if (!d) {
+ d = new0(struct crypto_device, 1);
+ if (!d)
+ return NULL;
+
+ d->create = false;
+ d->keyfile = d->options = d->name = NULL;
+
+ d->uuid = strdup(uuid);
+ if (!d->uuid)
+ return mfree(d);
+
+ r = hashmap_put(arg_disks, d->uuid, d);
+ if (r < 0) {
+ free(d->uuid);
+ return mfree(d);
+ }
+ }
+
+ return d;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ _cleanup_free_ char *uuid = NULL, *uuid_value = NULL;
+ crypto_device *d;
+ int r;
+
+ if (streq(key, "luks")) {
+
+ r = value ? parse_boolean(value) : 1;
+ if (r < 0)
+ log_warning("Failed to parse luks= kernel command line switch %s. Ignoring.", value);
+ else
+ arg_enabled = r;
+
+ } else if (streq(key, "luks.crypttab")) {
+
+ r = value ? parse_boolean(value) : 1;
+ if (r < 0)
+ log_warning("Failed to parse luks.crypttab= kernel command line switch %s. Ignoring.", value);
+ else
+ arg_read_crypttab = r;
+
+ } else if (streq(key, "luks.uuid")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ d = get_crypto_device(startswith(value, "luks-") ? value+5 : value);
+ if (!d)
+ return log_oom();
+
+ d->create = arg_whitelist = true;
+
+ } else if (streq(key, "luks.options")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = sscanf(value, "%m[0-9a-fA-F-]=%ms", &uuid, &uuid_value);
+ if (r == 2) {
+ d = get_crypto_device(uuid);
+ if (!d)
+ return log_oom();
+
+ free_and_replace(d->options, uuid_value);
+ } else if (free_and_strdup(&arg_default_options, value) < 0)
+ return log_oom();
+
+ } else if (streq(key, "luks.key")) {
+ size_t n;
+ _cleanup_free_ char *keyfile = NULL, *keydev = NULL;
+ char *c;
+ const char *keyspec;
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ n = strspn(value, LETTERS DIGITS "-");
+ if (value[n] != '=') {
+ if (free_and_strdup(&arg_default_keyfile, value) < 0)
+ return log_oom();
+ return 0;
+ }
+
+ uuid = strndup(value, n);
+ if (!uuid)
+ return log_oom();
+
+ if (!id128_is_valid(uuid)) {
+ log_warning("Failed to parse luks.key= kernel command line switch. UUID is invalid, ignoring.");
+ return 0;
+ }
+
+ d = get_crypto_device(uuid);
+ if (!d)
+ return log_oom();
+
+ keyspec = value + n + 1;
+ c = strrchr(keyspec, ':');
+ if (c) {
+ *c = '\0';
+ keyfile = strdup(keyspec);
+ keydev = strdup(c + 1);
+
+ if (!keyfile || !keydev)
+ return log_oom();
+ } else {
+ /* No keydev specified */
+ keyfile = strdup(keyspec);
+ if (!keyfile)
+ return log_oom();
+ }
+
+ free_and_replace(d->keyfile, keyfile);
+ free_and_replace(d->keydev, keydev);
+ } else if (streq(key, "luks.name")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = sscanf(value, "%m[0-9a-fA-F-]=%ms", &uuid, &uuid_value);
+ if (r == 2) {
+ d = get_crypto_device(uuid);
+ if (!d)
+ return log_oom();
+
+ d->create = arg_whitelist = true;
+
+ free_and_replace(d->name, uuid_value);
+ } else
+ log_warning("Failed to parse luks name switch %s. Ignoring.", value);
+ }
+
+ return 0;
+}
+
+static int add_crypttab_devices(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned crypttab_line = 0;
+ struct stat st;
+ int r;
+
+ if (!arg_read_crypttab)
+ return 0;
+
+ f = fopen("/etc/crypttab", "re");
+ if (!f) {
+ if (errno != ENOENT)
+ log_error_errno(errno, "Failed to open /etc/crypttab: %m");
+ return 0;
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ if (fstat(fileno(f), &st) < 0) {
+ log_error_errno(errno, "Failed to stat /etc/crypttab: %m");
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL, *name = NULL, *device = NULL, *keyfile = NULL, *options = NULL;
+ crypto_device *d = NULL;
+ char *l, *uuid;
+ int k;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read /etc/crypttab: %m");
+ if (r == 0)
+ break;
+
+ crypttab_line++;
+
+ l = strstrip(line);
+ if (IN_SET(l[0], 0, '#'))
+ continue;
+
+ k = sscanf(l, "%ms %ms %ms %ms", &name, &device, &keyfile, &options);
+ if (k < 2 || k > 4) {
+ log_error("Failed to parse /etc/crypttab:%u, ignoring.", crypttab_line);
+ continue;
+ }
+
+ uuid = STARTSWITH_SET(device, "UUID=", "luks-");
+ if (!uuid)
+ uuid = path_startswith(device, "/dev/disk/by-uuid/");
+ if (uuid)
+ d = hashmap_get(arg_disks, uuid);
+
+ if (arg_whitelist && !d) {
+ log_info("Not creating device '%s' because it was not specified on the kernel command line.", name);
+ continue;
+ }
+
+ r = create_disk(name, device, NULL, keyfile, (d && d->options) ? d->options : options);
+ if (r < 0)
+ return r;
+
+ if (d)
+ d->create = false;
+ }
+
+ return 0;
+}
+
+static int add_proc_cmdline_devices(void) {
+ int r;
+ Iterator i;
+ crypto_device *d;
+
+ HASHMAP_FOREACH(d, arg_disks, i) {
+ const char *options;
+ _cleanup_free_ char *device = NULL;
+
+ if (!d->create)
+ continue;
+
+ if (!d->name) {
+ d->name = strappend("luks-", d->uuid);
+ if (!d->name)
+ return log_oom();
+ }
+
+ device = strappend("UUID=", d->uuid);
+ if (!device)
+ return log_oom();
+
+ if (d->options)
+ options = d->options;
+ else if (arg_default_options)
+ options = arg_default_options;
+ else
+ options = "timeout=0";
+
+ r = create_disk(d->name, device, d->keydev, d->keyfile ?: arg_default_keyfile, options);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(crypt_device_hash_ops, char, string_hash_func, string_compare_func,
+ crypto_device, crypt_device_free);
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r;
+
+ assert_se(arg_dest = dest);
+
+ arg_disks = hashmap_new(&crypt_device_hash_ops);
+ if (!arg_disks)
+ return log_oom();
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse kernel command line: %m");
+
+ if (!arg_enabled)
+ return 0;
+
+ r = add_crypttab_devices();
+ if (r < 0)
+ return r;
+
+ r = add_proc_cmdline_devices();
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/cryptsetup/cryptsetup.c b/src/cryptsetup/cryptsetup.c
new file mode 100644
index 0000000..9cb52dd
--- /dev/null
+++ b/src/cryptsetup/cryptsetup.c
@@ -0,0 +1,745 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <mntent.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "ask-password-api.h"
+#include "crypt-util.h"
+#include "device-util.h"
+#include "escape.h"
+#include "fileio.h"
+#include "log.h"
+#include "main-func.h"
+#include "mount-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "pretty-print.h"
+#include "util.h"
+
+/* internal helper */
+#define ANY_LUKS "LUKS"
+/* as in src/cryptsetup.h */
+#define CRYPT_SECTOR_SIZE 512
+#define CRYPT_MAX_SECTOR_SIZE 4096
+
+static const char *arg_type = NULL; /* ANY_LUKS, CRYPT_LUKS1, CRYPT_LUKS2, CRYPT_TCRYPT or CRYPT_PLAIN */
+static char *arg_cipher = NULL;
+static unsigned arg_key_size = 0;
+#if HAVE_LIBCRYPTSETUP_SECTOR_SIZE
+static unsigned arg_sector_size = CRYPT_SECTOR_SIZE;
+#endif
+static int arg_key_slot = CRYPT_ANY_SLOT;
+static unsigned arg_keyfile_size = 0;
+static uint64_t arg_keyfile_offset = 0;
+static char *arg_hash = NULL;
+static char *arg_header = NULL;
+static unsigned arg_tries = 3;
+static bool arg_readonly = false;
+static bool arg_verify = false;
+static bool arg_discards = false;
+static bool arg_tcrypt_hidden = false;
+static bool arg_tcrypt_system = false;
+#ifdef CRYPT_TCRYPT_VERA_MODES
+static bool arg_tcrypt_veracrypt = false;
+#endif
+static char **arg_tcrypt_keyfiles = NULL;
+static uint64_t arg_offset = 0;
+static uint64_t arg_skip = 0;
+static usec_t arg_timeout = USEC_INFINITY;
+
+STATIC_DESTRUCTOR_REGISTER(arg_cipher, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_hash, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_header, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_tcrypt_keyfiles, strv_freep);
+
+/* Options Debian's crypttab knows we don't:
+
+ precheck=
+ check=
+ checkargs=
+ noearly=
+ loud=
+ keyscript=
+*/
+
+static int parse_one_option(const char *option) {
+ const char *val;
+ int r;
+
+ assert(option);
+
+ /* Handled outside of this tool */
+ if (STR_IN_SET(option, "noauto", "auto", "nofail", "fail", "_netdev"))
+ return 0;
+
+ if ((val = startswith(option, "cipher="))) {
+ r = free_and_strdup(&arg_cipher, val);
+ if (r < 0)
+ return log_oom();
+
+ } else if ((val = startswith(option, "size="))) {
+
+ r = safe_atou(val, &arg_key_size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ if (arg_key_size % 8) {
+ log_error("size= not a multiple of 8, ignoring.");
+ return 0;
+ }
+
+ arg_key_size /= 8;
+
+ } else if ((val = startswith(option, "sector-size="))) {
+
+#if HAVE_LIBCRYPTSETUP_SECTOR_SIZE
+ r = safe_atou(val, &arg_sector_size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ if (arg_sector_size % 2) {
+ log_error("sector-size= not a multiple of 2, ignoring.");
+ return 0;
+ }
+
+ if (arg_sector_size < CRYPT_SECTOR_SIZE || arg_sector_size > CRYPT_MAX_SECTOR_SIZE) {
+ log_error("sector-size= is outside of %u and %u, ignoring.", CRYPT_SECTOR_SIZE, CRYPT_MAX_SECTOR_SIZE);
+ return 0;
+ }
+#else
+ log_error("sector-size= is not supported, compiled with old libcryptsetup.");
+ return 0;
+#endif
+
+ } else if ((val = startswith(option, "key-slot="))) {
+
+ arg_type = ANY_LUKS;
+ r = safe_atoi(val, &arg_key_slot);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ } else if ((val = startswith(option, "tcrypt-keyfile="))) {
+
+ arg_type = CRYPT_TCRYPT;
+ if (path_is_absolute(val)) {
+ if (strv_extend(&arg_tcrypt_keyfiles, val) < 0)
+ return log_oom();
+ } else
+ log_error("Key file path \"%s\" is not absolute. Ignoring.", val);
+
+ } else if ((val = startswith(option, "keyfile-size="))) {
+
+ r = safe_atou(val, &arg_keyfile_size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ } else if ((val = startswith(option, "keyfile-offset="))) {
+ uint64_t off;
+
+ r = safe_atou64(val, &off);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ if ((size_t) off != off) {
+ /* https://gitlab.com/cryptsetup/cryptsetup/issues/359 */
+ log_error("keyfile-offset= value would truncated to %zu, ignoring.", (size_t) off);
+ return 0;
+ }
+
+ arg_keyfile_offset = off;
+
+ } else if ((val = startswith(option, "hash="))) {
+ r = free_and_strdup(&arg_hash, val);
+ if (r < 0)
+ return log_oom();
+
+ } else if ((val = startswith(option, "header="))) {
+ arg_type = ANY_LUKS;
+
+ if (!path_is_absolute(val))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Header path \"%s\" is not absolute, refusing.", val);
+
+ if (arg_header)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Duplicate header= option, refusing.");
+
+ arg_header = strdup(val);
+ if (!arg_header)
+ return log_oom();
+
+ } else if ((val = startswith(option, "tries="))) {
+
+ r = safe_atou(val, &arg_tries);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ } else if (STR_IN_SET(option, "readonly", "read-only"))
+ arg_readonly = true;
+ else if (streq(option, "verify"))
+ arg_verify = true;
+ else if (STR_IN_SET(option, "allow-discards", "discard"))
+ arg_discards = true;
+ else if (streq(option, "luks"))
+ arg_type = ANY_LUKS;
+ else if (streq(option, "tcrypt"))
+ arg_type = CRYPT_TCRYPT;
+ else if (streq(option, "tcrypt-hidden")) {
+ arg_type = CRYPT_TCRYPT;
+ arg_tcrypt_hidden = true;
+ } else if (streq(option, "tcrypt-system")) {
+ arg_type = CRYPT_TCRYPT;
+ arg_tcrypt_system = true;
+ } else if (streq(option, "tcrypt-veracrypt")) {
+#ifdef CRYPT_TCRYPT_VERA_MODES
+ arg_type = CRYPT_TCRYPT;
+ arg_tcrypt_veracrypt = true;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This version of cryptsetup does not support tcrypt-veracrypt; refusing.");
+#endif
+ } else if (STR_IN_SET(option, "plain", "swap", "tmp"))
+ arg_type = CRYPT_PLAIN;
+ else if ((val = startswith(option, "timeout="))) {
+
+ r = parse_sec_fix_0(val, &arg_timeout);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ } else if ((val = startswith(option, "offset="))) {
+
+ r = safe_atou64(val, &arg_offset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s: %m", option);
+
+ } else if ((val = startswith(option, "skip="))) {
+
+ r = safe_atou64(val, &arg_skip);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s: %m", option);
+
+ } else if (!streq(option, "none"))
+ log_warning("Encountered unknown /etc/crypttab option '%s', ignoring.", option);
+
+ return 0;
+}
+
+static int parse_options(const char *options) {
+ const char *word, *state;
+ size_t l;
+ int r;
+
+ assert(options);
+
+ FOREACH_WORD_SEPARATOR(word, l, options, ",", state) {
+ _cleanup_free_ char *o;
+
+ o = strndup(word, l);
+ if (!o)
+ return -ENOMEM;
+ r = parse_one_option(o);
+ if (r < 0)
+ return r;
+ }
+
+ /* sanity-check options */
+ if (arg_type != NULL && !streq(arg_type, CRYPT_PLAIN)) {
+ if (arg_offset)
+ log_warning("offset= ignored with type %s", arg_type);
+ if (arg_skip)
+ log_warning("skip= ignored with type %s", arg_type);
+ }
+
+ return 0;
+}
+
+static char* disk_description(const char *path) {
+ static const char name_fields[] =
+ "ID_PART_ENTRY_NAME\0"
+ "DM_NAME\0"
+ "ID_MODEL_FROM_DATABASE\0"
+ "ID_MODEL\0";
+
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ const char *i, *name;
+ struct stat st;
+
+ assert(path);
+
+ if (stat(path, &st) < 0)
+ return NULL;
+
+ if (!S_ISBLK(st.st_mode))
+ return NULL;
+
+ if (sd_device_new_from_devnum(&device, 'b', st.st_rdev) < 0)
+ return NULL;
+
+ NULSTR_FOREACH(i, name_fields)
+ if (sd_device_get_property_value(device, i, &name) >= 0 &&
+ !isempty(name))
+ return strdup(name);
+
+ return NULL;
+}
+
+static char *disk_mount_point(const char *label) {
+ _cleanup_free_ char *device = NULL;
+ _cleanup_endmntent_ FILE *f = NULL;
+ struct mntent *m;
+
+ /* Yeah, we don't support native systemd unit files here for now */
+
+ if (asprintf(&device, "/dev/mapper/%s", label) < 0)
+ return NULL;
+
+ f = setmntent("/etc/fstab", "re");
+ if (!f)
+ return NULL;
+
+ while ((m = getmntent(f)))
+ if (path_equal(m->mnt_fsname, device))
+ return strdup(m->mnt_dir);
+
+ return NULL;
+}
+
+static int get_password(const char *vol, const char *src, usec_t until, bool accept_cached, char ***ret) {
+ _cleanup_free_ char *description = NULL, *name_buffer = NULL, *mount_point = NULL, *text = NULL, *disk_path = NULL;
+ _cleanup_strv_free_erase_ char **passwords = NULL;
+ const char *name = NULL;
+ char **p, *id;
+ int r = 0;
+
+ assert(vol);
+ assert(src);
+ assert(ret);
+
+ description = disk_description(src);
+ mount_point = disk_mount_point(vol);
+
+ disk_path = cescape(src);
+ if (!disk_path)
+ return log_oom();
+
+ if (description && streq(vol, description))
+ /* If the description string is simply the
+ * volume name, then let's not show this
+ * twice */
+ description = mfree(description);
+
+ if (mount_point && description)
+ r = asprintf(&name_buffer, "%s (%s) on %s", description, vol, mount_point);
+ else if (mount_point)
+ r = asprintf(&name_buffer, "%s on %s", vol, mount_point);
+ else if (description)
+ r = asprintf(&name_buffer, "%s (%s)", description, vol);
+
+ if (r < 0)
+ return log_oom();
+
+ name = name_buffer ? name_buffer : vol;
+
+ if (asprintf(&text, "Please enter passphrase for disk %s:", name) < 0)
+ return log_oom();
+
+ id = strjoina("cryptsetup:", disk_path);
+
+ r = ask_password_auto(text, "drive-harddisk", id, "cryptsetup", until,
+ ASK_PASSWORD_PUSH_CACHE | (accept_cached*ASK_PASSWORD_ACCEPT_CACHED),
+ &passwords);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query password: %m");
+
+ if (arg_verify) {
+ _cleanup_strv_free_erase_ char **passwords2 = NULL;
+
+ assert(strv_length(passwords) == 1);
+
+ if (asprintf(&text, "Please enter passphrase for disk %s (verification):", name) < 0)
+ return log_oom();
+
+ id = strjoina("cryptsetup-verification:", disk_path);
+
+ r = ask_password_auto(text, "drive-harddisk", id, "cryptsetup", until, ASK_PASSWORD_PUSH_CACHE, &passwords2);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query verification password: %m");
+
+ assert(strv_length(passwords2) == 1);
+
+ if (!streq(passwords[0], passwords2[0])) {
+ log_warning("Passwords did not match, retrying.");
+ return -EAGAIN;
+ }
+ }
+
+ strv_uniq(passwords);
+
+ STRV_FOREACH(p, passwords) {
+ char *c;
+
+ if (strlen(*p)+1 >= arg_key_size)
+ continue;
+
+ /* Pad password if necessary */
+ c = new(char, arg_key_size);
+ if (!c)
+ return log_oom();
+
+ strncpy(c, *p, arg_key_size);
+ free_and_replace(*p, c);
+ }
+
+ *ret = TAKE_PTR(passwords);
+
+ return 0;
+}
+
+static int attach_tcrypt(
+ struct crypt_device *cd,
+ const char *name,
+ const char *key_file,
+ char **passwords,
+ uint32_t flags) {
+
+ int r = 0;
+ _cleanup_free_ char *passphrase = NULL;
+ struct crypt_params_tcrypt params = {
+ .flags = CRYPT_TCRYPT_LEGACY_MODES,
+ .keyfiles = (const char **)arg_tcrypt_keyfiles,
+ .keyfiles_count = strv_length(arg_tcrypt_keyfiles)
+ };
+
+ assert(cd);
+ assert(name);
+ assert(key_file || (passwords && passwords[0]));
+
+ if (arg_tcrypt_hidden)
+ params.flags |= CRYPT_TCRYPT_HIDDEN_HEADER;
+
+ if (arg_tcrypt_system)
+ params.flags |= CRYPT_TCRYPT_SYSTEM_HEADER;
+
+#ifdef CRYPT_TCRYPT_VERA_MODES
+ if (arg_tcrypt_veracrypt)
+ params.flags |= CRYPT_TCRYPT_VERA_MODES;
+#endif
+
+ if (key_file) {
+ r = read_one_line_file(key_file, &passphrase);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read password file '%s': %m", key_file);
+ return -EAGAIN; /* log with the actual error, but return EAGAIN */
+ }
+
+ params.passphrase = passphrase;
+ } else
+ params.passphrase = passwords[0];
+ params.passphrase_size = strlen(params.passphrase);
+
+ r = crypt_load(cd, CRYPT_TCRYPT, &params);
+ if (r < 0) {
+ if (key_file && r == -EPERM) {
+ log_error_errno(r, "Failed to activate using password file '%s'. (Key data not correct?)", key_file);
+ return -EAGAIN; /* log the actual error, but return EAGAIN */
+ }
+
+ return log_error_errno(r, "Failed to load tcrypt superblock on device %s: %m", crypt_get_device_name(cd));
+ }
+
+ r = crypt_activate_by_volume_key(cd, name, NULL, 0, flags);
+ if (r < 0)
+ return log_error_errno(r, "Failed to activate tcrypt device %s: %m", crypt_get_device_name(cd));
+
+ return 0;
+}
+
+static int attach_luks_or_plain(struct crypt_device *cd,
+ const char *name,
+ const char *key_file,
+ const char *data_device,
+ char **passwords,
+ uint32_t flags) {
+ int r = 0;
+ bool pass_volume_key = false;
+
+ assert(cd);
+ assert(name);
+ assert(key_file || passwords);
+
+ if (!arg_type || STR_IN_SET(arg_type, ANY_LUKS, CRYPT_LUKS1)) {
+ r = crypt_load(cd, CRYPT_LUKS, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load LUKS superblock on device %s: %m", crypt_get_device_name(cd));
+
+ if (data_device)
+ r = crypt_set_data_device(cd, data_device);
+ }
+
+ if ((!arg_type && r < 0) || streq_ptr(arg_type, CRYPT_PLAIN)) {
+ struct crypt_params_plain params = {
+ .offset = arg_offset,
+ .skip = arg_skip,
+#if HAVE_LIBCRYPTSETUP_SECTOR_SIZE
+ .sector_size = arg_sector_size,
+#endif
+ };
+ const char *cipher, *cipher_mode;
+ _cleanup_free_ char *truncated_cipher = NULL;
+
+ if (arg_hash) {
+ /* plain isn't a real hash type. it just means "use no hash" */
+ if (!streq(arg_hash, "plain"))
+ params.hash = arg_hash;
+ } else if (!key_file)
+ /* for CRYPT_PLAIN, the behaviour of cryptsetup
+ * package is to not hash when a key file is provided */
+ params.hash = "ripemd160";
+
+ if (arg_cipher) {
+ size_t l;
+
+ l = strcspn(arg_cipher, "-");
+ truncated_cipher = strndup(arg_cipher, l);
+ if (!truncated_cipher)
+ return log_oom();
+
+ cipher = truncated_cipher;
+ cipher_mode = arg_cipher[l] ? arg_cipher+l+1 : "plain";
+ } else {
+ cipher = "aes";
+ cipher_mode = "cbc-essiv:sha256";
+ }
+
+ /* for CRYPT_PLAIN limit reads from keyfile to key length, and ignore keyfile-size */
+ arg_keyfile_size = arg_key_size;
+
+ /* In contrast to what the name crypt_setup() might suggest this doesn't actually format
+ * anything, it just configures encryption parameters when used for plain mode. */
+ r = crypt_format(cd, CRYPT_PLAIN, cipher, cipher_mode, NULL, NULL, arg_keyfile_size, &params);
+
+ /* hash == NULL implies the user passed "plain" */
+ pass_volume_key = (params.hash == NULL);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Loading of cryptographic parameters failed: %m");
+
+ log_info("Set cipher %s, mode %s, key size %i bits for device %s.",
+ crypt_get_cipher(cd),
+ crypt_get_cipher_mode(cd),
+ crypt_get_volume_key_size(cd)*8,
+ crypt_get_device_name(cd));
+
+ if (key_file) {
+ r = crypt_activate_by_keyfile_offset(cd, name, arg_key_slot, key_file, arg_keyfile_size, arg_keyfile_offset, flags);
+ if (r == -EPERM) {
+ log_error_errno(r, "Failed to activate with key file '%s'. (Key data incorrect?)", key_file);
+ return -EAGAIN; /* Log actual error, but return EAGAIN */
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to activate with key file '%s': %m", key_file);
+ } else {
+ char **p;
+
+ r = -EINVAL;
+ STRV_FOREACH(p, passwords) {
+ if (pass_volume_key)
+ r = crypt_activate_by_volume_key(cd, name, *p, arg_key_size, flags);
+ else
+ r = crypt_activate_by_passphrase(cd, name, arg_key_slot, *p, strlen(*p), flags);
+ if (r >= 0)
+ break;
+ }
+ if (r == -EPERM) {
+ log_error_errno(r, "Failed to activate with specified passphrase. (Passphrase incorrect?)");
+ return -EAGAIN; /* log actual error, but return EAGAIN */
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to activate with specified passphrase: %m");
+ }
+
+ return r;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-cryptsetup@.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s attach VOLUME SOURCEDEVICE [PASSWORD] [OPTIONS]\n"
+ "%s detach VOLUME\n\n"
+ "Attaches or detaches an encrypted block device.\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ int r;
+
+ if (argc <= 1)
+ return help();
+
+ if (argc < 3) {
+ log_error("This program requires at least two arguments.");
+ return -EINVAL;
+ }
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (streq(argv[1], "attach")) {
+ uint32_t flags = 0;
+ unsigned tries;
+ usec_t until;
+ crypt_status_info status;
+ const char *key_file = NULL;
+
+ /* Arguments: systemd-cryptsetup attach VOLUME SOURCE-DEVICE [PASSWORD] [OPTIONS] */
+
+ if (argc < 4)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "attach requires at least two arguments.");
+
+ if (argc >= 5 &&
+ argv[4][0] &&
+ !streq(argv[4], "-") &&
+ !streq(argv[4], "none")) {
+
+ if (!path_is_absolute(argv[4]))
+ log_warning("Password file path '%s' is not absolute. Ignoring.", argv[4]);
+ else
+ key_file = argv[4];
+ }
+
+ if (argc >= 6 && argv[5][0] && !streq(argv[5], "-")) {
+ r = parse_options(argv[5]);
+ if (r < 0)
+ return r;
+ }
+
+ /* A delicious drop of snake oil */
+ mlockall(MCL_FUTURE);
+
+ if (arg_header) {
+ log_debug("LUKS header: %s", arg_header);
+ r = crypt_init(&cd, arg_header);
+ } else
+ r = crypt_init(&cd, argv[3]);
+ if (r < 0)
+ return log_error_errno(r, "crypt_init() failed: %m");
+
+ crypt_set_log_callback(cd, cryptsetup_log_glue, NULL);
+
+ status = crypt_status(cd, argv[2]);
+ if (IN_SET(status, CRYPT_ACTIVE, CRYPT_BUSY)) {
+ log_info("Volume %s already active.", argv[2]);
+ return 0;
+ }
+
+ if (arg_readonly)
+ flags |= CRYPT_ACTIVATE_READONLY;
+
+ if (arg_discards)
+ flags |= CRYPT_ACTIVATE_ALLOW_DISCARDS;
+
+ if (arg_timeout == USEC_INFINITY)
+ until = 0;
+ else
+ until = now(CLOCK_MONOTONIC) + arg_timeout;
+
+ arg_key_size = (arg_key_size > 0 ? arg_key_size : (256 / 8));
+
+ if (key_file) {
+ struct stat st;
+
+ /* Ideally we'd do this on the open fd, but since this is just a
+ * warning it's OK to do this in two steps. */
+ if (stat(key_file, &st) >= 0 && S_ISREG(st.st_mode) && (st.st_mode & 0005))
+ log_warning("Key file %s is world-readable. This is not a good idea!", key_file);
+ }
+
+ for (tries = 0; arg_tries == 0 || tries < arg_tries; tries++) {
+ _cleanup_strv_free_erase_ char **passwords = NULL;
+
+ if (!key_file) {
+ r = get_password(argv[2], argv[3], until, tries == 0 && !arg_verify, &passwords);
+ if (r == -EAGAIN)
+ continue;
+ if (r < 0)
+ return r;
+ }
+
+ if (streq_ptr(arg_type, CRYPT_TCRYPT))
+ r = attach_tcrypt(cd, argv[2], key_file, passwords, flags);
+ else
+ r = attach_luks_or_plain(cd,
+ argv[2],
+ key_file,
+ arg_header ? argv[3] : NULL,
+ passwords,
+ flags);
+ if (r >= 0)
+ break;
+ if (r != -EAGAIN)
+ return r;
+
+ /* Passphrase not correct? Let's try again! */
+ key_file = NULL;
+ }
+
+ if (arg_tries != 0 && tries >= arg_tries)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Too many attempts to activate; giving up.");
+
+ } else if (streq(argv[1], "detach")) {
+
+ r = crypt_init_by_name(&cd, argv[2]);
+ if (r == -ENODEV) {
+ log_info("Volume %s already inactive.", argv[2]);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "crypt_init_by_name() failed: %m");
+
+ crypt_set_log_callback(cd, cryptsetup_log_glue, NULL);
+
+ r = crypt_deactivate(cd, argv[2]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to deactivate: %m");
+
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown verb %s.", argv[1]);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/debug-generator/debug-generator.c b/src/debug-generator/debug-generator.c
new file mode 100644
index 0000000..fa4ca57
--- /dev/null
+++ b/src/debug-generator/debug-generator.c
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "generator.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+
+static const char *arg_dest = NULL;
+static char *arg_default_unit = NULL;
+static char **arg_mask = NULL;
+static char **arg_wants = NULL;
+static bool arg_debug_shell = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_default_unit, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_mask, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_wants, strv_freep);
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ assert(key);
+
+ if (streq(key, "systemd.mask")) {
+ char *n;
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = unit_name_mangle(value, UNIT_NAME_MANGLE_WARN, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to glob unit name: %m");
+
+ r = strv_consume(&arg_mask, n);
+ if (r < 0)
+ return log_oom();
+
+ } else if (streq(key, "systemd.wants")) {
+ char *n;
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = unit_name_mangle(value, UNIT_NAME_MANGLE_WARN, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to glob unit name: %m");
+
+ r = strv_consume(&arg_wants, n);
+ if (r < 0)
+ return log_oom();
+
+ } else if (proc_cmdline_key_streq(key, "systemd.debug_shell")) {
+
+ if (value) {
+ r = parse_boolean(value);
+ if (r < 0)
+ log_error("Failed to parse systemd.debug_shell= argument '%s', ignoring.", value);
+ else
+ arg_debug_shell = r;
+ } else
+ arg_debug_shell = true;
+
+ } else if (streq(key, "systemd.unit")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = free_and_strdup(&arg_default_unit, value);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set default unit %s: %m", value);
+
+ } else if (!value) {
+ const char *target;
+
+ target = runlevel_to_target(key);
+ if (target) {
+ r = free_and_strdup(&arg_default_unit, target);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set default unit %s: %m", target);
+ }
+ }
+
+ return 0;
+}
+
+static int generate_mask_symlinks(void) {
+ char **u;
+ int r = 0;
+
+ if (strv_isempty(arg_mask))
+ return 0;
+
+ STRV_FOREACH(u, arg_mask) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin(arg_dest, "/", *u);
+ if (!p)
+ return log_oom();
+
+ if (symlink("/dev/null", p) < 0)
+ r = log_error_errno(errno,
+ "Failed to create mask symlink %s: %m",
+ p);
+ }
+
+ return r;
+}
+
+static int generate_wants_symlinks(void) {
+ char **u;
+ int r = 0;
+
+ if (strv_isempty(arg_wants))
+ return 0;
+
+ STRV_FOREACH(u, arg_wants) {
+ _cleanup_free_ char *p = NULL, *f = NULL;
+ const char *target = arg_default_unit ?: SPECIAL_DEFAULT_TARGET;
+
+ p = strjoin(arg_dest, "/", target, ".wants/", *u);
+ if (!p)
+ return log_oom();
+
+ f = strappend(SYSTEM_DATA_UNIT_PATH "/", *u);
+ if (!f)
+ return log_oom();
+
+ mkdir_parents_label(p, 0755);
+
+ if (symlink(f, p) < 0)
+ r = log_error_errno(errno,
+ "Failed to create wants symlink %s: %m",
+ p);
+ }
+
+ return r;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r, q;
+
+ assert_se(arg_dest = dest_early);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_RD_STRICT | PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ if (arg_debug_shell) {
+ r = strv_extend(&arg_wants, "debug-shell.service");
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = generate_mask_symlinks();
+ q = generate_wants_symlinks();
+
+ return r < 0 ? r : q;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/delta/delta.c b/src/delta/delta.c
new file mode 100644
index 0000000..1ffbc6c
--- /dev/null
+++ b/src/delta/delta.c
@@ -0,0 +1,685 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "locale-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static const char prefixes[] =
+ "/etc\0"
+ "/run\0"
+ "/usr/local/lib\0"
+ "/usr/local/share\0"
+ "/usr/lib\0"
+ "/usr/share\0"
+#if HAVE_SPLIT_USR
+ "/lib\0"
+#endif
+ ;
+
+static const char suffixes[] =
+ "sysctl.d\0"
+ "tmpfiles.d\0"
+ "modules-load.d\0"
+ "binfmt.d\0"
+ "systemd/system\0"
+ "systemd/user\0"
+ "systemd/system-preset\0"
+ "systemd/user-preset\0"
+ "udev/rules.d\0"
+ "modprobe.d\0";
+
+static const char have_dropins[] =
+ "systemd/system\0"
+ "systemd/user\0";
+
+static PagerFlags arg_pager_flags = 0;
+static int arg_diff = -1;
+
+static enum {
+ SHOW_MASKED = 1 << 0,
+ SHOW_EQUIVALENT = 1 << 1,
+ SHOW_REDIRECTED = 1 << 2,
+ SHOW_OVERRIDDEN = 1 << 3,
+ SHOW_UNCHANGED = 1 << 4,
+ SHOW_EXTENDED = 1 << 5,
+
+ SHOW_DEFAULTS =
+ (SHOW_MASKED | SHOW_EQUIVALENT | SHOW_REDIRECTED | SHOW_OVERRIDDEN | SHOW_EXTENDED)
+} arg_flags = 0;
+
+static int equivalent(const char *a, const char *b) {
+ _cleanup_free_ char *x = NULL, *y = NULL;
+ int r;
+
+ r = chase_symlinks(a, NULL, CHASE_TRAIL_SLASH, &x);
+ if (r < 0)
+ return r;
+
+ r = chase_symlinks(b, NULL, CHASE_TRAIL_SLASH, &y);
+ if (r < 0)
+ return r;
+
+ return path_equal(x, y);
+}
+
+static int notify_override_masked(const char *top, const char *bottom) {
+ if (!(arg_flags & SHOW_MASKED))
+ return 0;
+
+ printf("%s%s%s %s %s %s\n",
+ ansi_highlight_red(), "[MASKED]", ansi_normal(),
+ top, special_glyph(SPECIAL_GLYPH_ARROW), bottom);
+ return 1;
+}
+
+static int notify_override_equivalent(const char *top, const char *bottom) {
+ if (!(arg_flags & SHOW_EQUIVALENT))
+ return 0;
+
+ printf("%s%s%s %s %s %s\n",
+ ansi_highlight_green(), "[EQUIVALENT]", ansi_normal(),
+ top, special_glyph(SPECIAL_GLYPH_ARROW), bottom);
+ return 1;
+}
+
+static int notify_override_redirected(const char *top, const char *bottom) {
+ if (!(arg_flags & SHOW_REDIRECTED))
+ return 0;
+
+ printf("%s%s%s %s %s %s\n",
+ ansi_highlight(), "[REDIRECTED]", ansi_normal(),
+ top, special_glyph(SPECIAL_GLYPH_ARROW), bottom);
+ return 1;
+}
+
+static int notify_override_overridden(const char *top, const char *bottom) {
+ if (!(arg_flags & SHOW_OVERRIDDEN))
+ return 0;
+
+ printf("%s%s%s %s %s %s\n",
+ ansi_highlight(), "[OVERRIDDEN]", ansi_normal(),
+ top, special_glyph(SPECIAL_GLYPH_ARROW), bottom);
+ return 1;
+}
+
+static int notify_override_extended(const char *top, const char *bottom) {
+ if (!(arg_flags & SHOW_EXTENDED))
+ return 0;
+
+ printf("%s%s%s %s %s %s\n",
+ ansi_highlight(), "[EXTENDED]", ansi_normal(),
+ top, special_glyph(SPECIAL_GLYPH_ARROW), bottom);
+ return 1;
+}
+
+static int notify_override_unchanged(const char *f) {
+ if (!(arg_flags & SHOW_UNCHANGED))
+ return 0;
+
+ printf("[UNCHANGED] %s\n", f);
+ return 1;
+}
+
+static int found_override(const char *top, const char *bottom) {
+ _cleanup_free_ char *dest = NULL;
+ pid_t pid;
+ int r;
+
+ assert(top);
+ assert(bottom);
+
+ if (null_or_empty_path(top) > 0)
+ return notify_override_masked(top, bottom);
+
+ r = readlink_malloc(top, &dest);
+ if (r >= 0) {
+ if (equivalent(dest, bottom) > 0)
+ return notify_override_equivalent(top, bottom);
+ else
+ return notify_override_redirected(top, bottom);
+ }
+
+ r = notify_override_overridden(top, bottom);
+ if (!arg_diff)
+ return r;
+
+ putchar('\n');
+
+ fflush(stdout);
+
+ r = safe_fork("(diff)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ execlp("diff", "diff", "-us", "--", bottom, top, NULL);
+ log_open();
+ log_error_errno(errno, "Failed to execute diff: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ (void) wait_for_terminate_and_check("diff", pid, WAIT_LOG_ABNORMAL);
+ putchar('\n');
+
+ return r;
+}
+
+static int enumerate_dir_d(
+ OrderedHashmap *top,
+ OrderedHashmap *bottom,
+ OrderedHashmap *drops,
+ const char *toppath, const char *drop) {
+
+ _cleanup_free_ char *unit = NULL;
+ _cleanup_free_ char *path = NULL;
+ _cleanup_strv_free_ char **list = NULL;
+ char **file;
+ char *c;
+ int r;
+
+ assert(!endswith(drop, "/"));
+
+ path = strjoin(toppath, "/", drop);
+ if (!path)
+ return -ENOMEM;
+
+ log_debug("Looking at %s", path);
+
+ unit = strdup(drop);
+ if (!unit)
+ return -ENOMEM;
+
+ c = strrchr(unit, '.');
+ if (!c)
+ return -EINVAL;
+ *c = 0;
+
+ r = get_files_in_directory(path, &list);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate %s: %m", path);
+
+ strv_sort(list);
+
+ STRV_FOREACH(file, list) {
+ OrderedHashmap *h;
+ int k;
+ char *p;
+ char *d;
+
+ if (!endswith(*file, ".conf"))
+ continue;
+
+ p = strjoin(path, "/", *file);
+ if (!p)
+ return -ENOMEM;
+ d = p + strlen(toppath) + 1;
+
+ log_debug("Adding at top: %s %s %s", d, special_glyph(SPECIAL_GLYPH_ARROW), p);
+ k = ordered_hashmap_put(top, d, p);
+ if (k >= 0) {
+ p = strdup(p);
+ if (!p)
+ return -ENOMEM;
+ d = p + strlen(toppath) + 1;
+ } else if (k != -EEXIST) {
+ free(p);
+ return k;
+ }
+
+ log_debug("Adding at bottom: %s %s %s", d, special_glyph(SPECIAL_GLYPH_ARROW), p);
+ free(ordered_hashmap_remove(bottom, d));
+ k = ordered_hashmap_put(bottom, d, p);
+ if (k < 0) {
+ free(p);
+ return k;
+ }
+
+ h = ordered_hashmap_get(drops, unit);
+ if (!h) {
+ h = ordered_hashmap_new(&string_hash_ops);
+ if (!h)
+ return -ENOMEM;
+ ordered_hashmap_put(drops, unit, h);
+ unit = strdup(unit);
+ if (!unit)
+ return -ENOMEM;
+ }
+
+ p = strdup(p);
+ if (!p)
+ return -ENOMEM;
+
+ log_debug("Adding to drops: %s %s %s %s %s",
+ unit, special_glyph(SPECIAL_GLYPH_ARROW), basename(p), special_glyph(SPECIAL_GLYPH_ARROW), p);
+ k = ordered_hashmap_put(h, basename(p), p);
+ if (k < 0) {
+ free(p);
+ if (k != -EEXIST)
+ return k;
+ }
+ }
+ return 0;
+}
+
+static int enumerate_dir(
+ OrderedHashmap *top,
+ OrderedHashmap *bottom,
+ OrderedHashmap *drops,
+ const char *path, bool dropins) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
+ size_t n_files = 0, allocated_files = 0, n_dirs = 0, allocated_dirs = 0;
+ char **t;
+ int r;
+
+ assert(top);
+ assert(bottom);
+ assert(drops);
+ assert(path);
+
+ log_debug("Looking at %s", path);
+
+ d = opendir(path);
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open %s: %m", path);
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ dirent_ensure_type(d, de);
+
+ if (dropins && de->d_type == DT_DIR && endswith(de->d_name, ".d")) {
+ if (!GREEDY_REALLOC0(dirs, allocated_dirs, n_dirs + 2))
+ return -ENOMEM;
+
+ dirs[n_dirs] = strdup(de->d_name);
+ if (!dirs[n_dirs])
+ return -ENOMEM;
+ n_dirs ++;
+ }
+
+ if (!dirent_is_file(de))
+ continue;
+
+ if (!GREEDY_REALLOC0(files, allocated_files, n_files + 2))
+ return -ENOMEM;
+
+ files[n_files] = strdup(de->d_name);
+ if (!files[n_files])
+ return -ENOMEM;
+ n_files ++;
+ }
+
+ strv_sort(dirs);
+ strv_sort(files);
+
+ STRV_FOREACH(t, dirs) {
+ r = enumerate_dir_d(top, bottom, drops, path, *t);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(t, files) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin(path, "/", *t);
+ if (!p)
+ return -ENOMEM;
+
+ log_debug("Adding at top: %s %s %s", basename(p), special_glyph(SPECIAL_GLYPH_ARROW), p);
+ r = ordered_hashmap_put(top, basename(p), p);
+ if (r >= 0) {
+ p = strdup(p);
+ if (!p)
+ return -ENOMEM;
+ } else if (r != -EEXIST)
+ return r;
+
+ log_debug("Adding at bottom: %s %s %s", basename(p), special_glyph(SPECIAL_GLYPH_ARROW), p);
+ free(ordered_hashmap_remove(bottom, basename(p)));
+ r = ordered_hashmap_put(bottom, basename(p), p);
+ if (r < 0)
+ return r;
+ p = NULL;
+ }
+
+ return 0;
+}
+
+static bool should_skip_path(const char *prefix, const char *suffix) {
+#if HAVE_SPLIT_USR
+ _cleanup_free_ char *target = NULL;
+ const char *p;
+ char *dirname;
+
+ dirname = strjoina(prefix, "/", suffix);
+
+ if (chase_symlinks(dirname, NULL, 0, &target) < 0)
+ return false;
+
+ NULSTR_FOREACH(p, prefixes) {
+ if (path_startswith(dirname, p))
+ continue;
+
+ if (path_equal(target, strjoina(p, "/", suffix))) {
+ log_debug("%s redirects to %s, skipping.", dirname, target);
+ return true;
+ }
+ }
+#endif
+ return false;
+}
+
+static int process_suffix(const char *suffix, const char *onlyprefix) {
+ const char *p;
+ char *f;
+ OrderedHashmap *top, *bottom, *drops;
+ OrderedHashmap *h;
+ char *key;
+ int r = 0, k;
+ Iterator i, j;
+ int n_found = 0;
+ bool dropins;
+
+ assert(suffix);
+ assert(!startswith(suffix, "/"));
+ assert(!strstr(suffix, "//"));
+
+ dropins = nulstr_contains(have_dropins, suffix);
+
+ top = ordered_hashmap_new(&string_hash_ops);
+ bottom = ordered_hashmap_new(&string_hash_ops);
+ drops = ordered_hashmap_new(&string_hash_ops);
+ if (!top || !bottom || !drops) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ NULSTR_FOREACH(p, prefixes) {
+ _cleanup_free_ char *t = NULL;
+
+ if (should_skip_path(p, suffix))
+ continue;
+
+ t = strjoin(p, "/", suffix);
+ if (!t) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ k = enumerate_dir(top, bottom, drops, t, dropins);
+ if (r == 0)
+ r = k;
+ }
+
+ ORDERED_HASHMAP_FOREACH_KEY(f, key, top, i) {
+ char *o;
+
+ o = ordered_hashmap_get(bottom, key);
+ assert(o);
+
+ if (!onlyprefix || startswith(o, onlyprefix)) {
+ if (path_equal(o, f)) {
+ notify_override_unchanged(f);
+ } else {
+ k = found_override(f, o);
+ if (k < 0)
+ r = k;
+ else
+ n_found += k;
+ }
+ }
+
+ h = ordered_hashmap_get(drops, key);
+ if (h)
+ ORDERED_HASHMAP_FOREACH(o, h, j)
+ if (!onlyprefix || startswith(o, onlyprefix))
+ n_found += notify_override_extended(f, o);
+ }
+
+finish:
+ ordered_hashmap_free_free(top);
+ ordered_hashmap_free_free(bottom);
+
+ ORDERED_HASHMAP_FOREACH_KEY(h, key, drops, i) {
+ ordered_hashmap_free_free(ordered_hashmap_remove(drops, key));
+ ordered_hashmap_remove(drops, key);
+ free(key);
+ }
+ ordered_hashmap_free(drops);
+
+ return r < 0 ? r : n_found;
+}
+
+static int process_suffixes(const char *onlyprefix) {
+ const char *n;
+ int n_found = 0, r;
+
+ NULSTR_FOREACH(n, suffixes) {
+ r = process_suffix(n, onlyprefix);
+ if (r < 0)
+ return r;
+
+ n_found += r;
+ }
+
+ return n_found;
+}
+
+static int process_suffix_chop(const char *arg) {
+ const char *p;
+
+ assert(arg);
+
+ if (!path_is_absolute(arg))
+ return process_suffix(arg, NULL);
+
+ /* Strip prefix from the suffix */
+ NULSTR_FOREACH(p, prefixes) {
+ const char *suffix;
+
+ suffix = startswith(arg, p);
+ if (suffix) {
+ suffix += strspn(suffix, "/");
+ if (*suffix)
+ return process_suffix(suffix, p);
+ else
+ return process_suffixes(arg);
+ }
+ }
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid suffix specification %s.", arg);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-delta", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [SUFFIX...]\n\n"
+ "Find overridden configuration files.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --diff[=1|0] Show a diff when overridden files differ\n"
+ " -t --type=LIST... Only display a selected set of override types\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_flags(const char *flag_str, int flags) {
+ const char *word, *state;
+ size_t l;
+
+ FOREACH_WORD_SEPARATOR(word, l, flag_str, ",", state) {
+ if (strneq("masked", word, l))
+ flags |= SHOW_MASKED;
+ else if (strneq ("equivalent", word, l))
+ flags |= SHOW_EQUIVALENT;
+ else if (strneq("redirected", word, l))
+ flags |= SHOW_REDIRECTED;
+ else if (strneq("overridden", word, l))
+ flags |= SHOW_OVERRIDDEN;
+ else if (strneq("unchanged", word, l))
+ flags |= SHOW_UNCHANGED;
+ else if (strneq("extended", word, l))
+ flags |= SHOW_EXTENDED;
+ else if (strneq("default", word, l))
+ flags |= SHOW_DEFAULTS;
+ else
+ return -EINVAL;
+ }
+ return flags;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_NO_PAGER = 0x100,
+ ARG_DIFF,
+ ARG_VERSION
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "diff", optional_argument, NULL, ARG_DIFF },
+ { "type", required_argument, NULL, 't' },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 1);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "ht:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case 't': {
+ int f;
+ f = parse_flags(optarg, arg_flags);
+ if (f < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse flags field.");
+ arg_flags = f;
+ break;
+ }
+
+ case ARG_DIFF:
+ if (!optarg)
+ arg_diff = 1;
+ else {
+ int b;
+
+ b = parse_boolean(optarg);
+ if (b < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse diff boolean.");
+
+ arg_diff = b;
+ }
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ int r, k, n_found = 0;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_flags == 0)
+ arg_flags = SHOW_DEFAULTS;
+
+ if (arg_diff < 0)
+ arg_diff = !!(arg_flags & SHOW_OVERRIDDEN);
+ else if (arg_diff)
+ arg_flags |= SHOW_OVERRIDDEN;
+
+ (void) pager_open(arg_pager_flags);
+
+ if (optind < argc) {
+ int i;
+
+ for (i = optind; i < argc; i++) {
+ path_simplify(argv[i], false);
+
+ k = process_suffix_chop(argv[i]);
+ if (k < 0)
+ r = k;
+ else
+ n_found += k;
+ }
+
+ } else {
+ k = process_suffixes(NULL);
+ if (k < 0)
+ r = k;
+ else
+ n_found += k;
+ }
+
+ if (r >= 0)
+ printf("%s%i overridden configuration files found.\n", n_found ? "\n" : "", n_found);
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/detect-virt/detect-virt.c b/src/detect-virt/detect-virt.c
new file mode 100644
index 0000000..f05f227
--- /dev/null
+++ b/src/detect-virt/detect-virt.c
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "string-table.h"
+#include "util.h"
+#include "virt.h"
+
+static bool arg_quiet = false;
+static enum {
+ ANY_VIRTUALIZATION,
+ ONLY_VM,
+ ONLY_CONTAINER,
+ ONLY_CHROOT,
+ ONLY_PRIVATE_USERS,
+} arg_mode = ANY_VIRTUALIZATION;
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-detect-virt", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Detect execution in a virtualized environment.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -c --container Only detect whether we are run in a container\n"
+ " -v --vm Only detect whether we are run in a VM\n"
+ " -r --chroot Detect whether we are run in a chroot() environment\n"
+ " --private-users Only detect whether we are running in a user namespace\n"
+ " -q --quiet Don't output anything, just set return value\n"
+ " --list List all known and detectable types of virtualization\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_PRIVATE_USERS,
+ ARG_LIST,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "container", no_argument, NULL, 'c' },
+ { "vm", no_argument, NULL, 'v' },
+ { "chroot", no_argument, NULL, 'r' },
+ { "private-users", no_argument, NULL, ARG_PRIVATE_USERS },
+ { "quiet", no_argument, NULL, 'q' },
+ { "list", no_argument, NULL, ARG_LIST },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hqcvr", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case 'c':
+ arg_mode = ONLY_CONTAINER;
+ break;
+
+ case ARG_PRIVATE_USERS:
+ arg_mode = ONLY_PRIVATE_USERS;
+ break;
+
+ case 'v':
+ arg_mode = ONLY_VM;
+ break;
+
+ case 'r':
+ arg_mode = ONLY_CHROOT;
+ break;
+
+ case ARG_LIST:
+ DUMP_STRING_TABLE(virtualization, int, _VIRTUALIZATION_MAX);
+ return 0;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s takes no arguments.",
+ program_invocation_short_name);
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ /* This is mostly intended to be used for scripts which want
+ * to detect whether we are being run in a virtualized
+ * environment or not */
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ switch (arg_mode) {
+ case ONLY_VM:
+ r = detect_vm();
+ if (r < 0)
+ return log_error_errno(r, "Failed to check for VM: %m");
+ break;
+
+ case ONLY_CONTAINER:
+ r = detect_container();
+ if (r < 0)
+ return log_error_errno(r, "Failed to check for container: %m");
+ break;
+
+ case ONLY_CHROOT:
+ r = running_in_chroot();
+ if (r < 0)
+ return log_error_errno(r, "Failed to check for chroot() environment: %m");
+ return !r;
+
+ case ONLY_PRIVATE_USERS:
+ r = running_in_userns();
+ if (r < 0)
+ return log_error_errno(r, "Failed to check for user namespace: %m");
+ return !r;
+
+ case ANY_VIRTUALIZATION:
+ default:
+ r = detect_virtualization();
+ if (r < 0)
+ return log_error_errno(r, "Failed to check for virtualization: %m");
+ break;
+ }
+
+ if (!arg_quiet)
+ puts(virtualization_to_string(r));
+
+ return r == VIRTUALIZATION_NONE;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c
new file mode 100644
index 0000000..50de0af
--- /dev/null
+++ b/src/dissect/dissect.c
@@ -0,0 +1,282 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <getopt.h>
+
+#include "architecture.h"
+#include "dissect-image.h"
+#include "hexdecoct.h"
+#include "log.h"
+#include "loop-util.h"
+#include "main-func.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+static enum {
+ ACTION_DISSECT,
+ ACTION_MOUNT,
+} arg_action = ACTION_DISSECT;
+static const char *arg_image = NULL;
+static const char *arg_path = NULL;
+static DissectImageFlags arg_flags = DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_DISCARD_ON_LOOP;
+static void *arg_root_hash = NULL;
+static size_t arg_root_hash_size = 0;
+
+STATIC_DESTRUCTOR_REGISTER(arg_root_hash, freep);
+
+static void help(void) {
+ printf("%s [OPTIONS...] IMAGE\n"
+ "%s [OPTIONS...] --mount IMAGE PATH\n"
+ "Dissect a file system OS image.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -m --mount Mount the image to the specified directory\n"
+ " -r --read-only Mount read-only\n"
+ " --discard=MODE Choose 'discard' mode (disabled, loop, all, crypto)\n"
+ " --root-hash=HASH Specify root hash for verity\n",
+ program_invocation_short_name,
+ program_invocation_short_name);
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_DISCARD,
+ ARG_ROOT_HASH,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "mount", no_argument, NULL, 'm' },
+ { "read-only", no_argument, NULL, 'r' },
+ { "discard", required_argument, NULL, ARG_DISCARD },
+ { "root-hash", required_argument, NULL, ARG_ROOT_HASH },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hmr", options, NULL)) >= 0) {
+
+ switch (c) {
+
+ case 'h':
+ help();
+ return 0;
+
+ case ARG_VERSION:
+ return version();
+
+ case 'm':
+ arg_action = ACTION_MOUNT;
+ break;
+
+ case 'r':
+ arg_flags |= DISSECT_IMAGE_READ_ONLY;
+ break;
+
+ case ARG_DISCARD: {
+ DissectImageFlags flags;
+
+ if (streq(optarg, "disabled"))
+ flags = 0;
+ else if (streq(optarg, "loop"))
+ flags = DISSECT_IMAGE_DISCARD_ON_LOOP;
+ else if (streq(optarg, "all"))
+ flags = DISSECT_IMAGE_DISCARD_ON_LOOP | DISSECT_IMAGE_DISCARD;
+ else if (streq(optarg, "crypt"))
+ flags = DISSECT_IMAGE_DISCARD_ANY;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown --discard= parameter: %s",
+ optarg);
+ arg_flags = (arg_flags & ~DISSECT_IMAGE_DISCARD_ANY) | flags;
+
+ break;
+ }
+
+ case ARG_ROOT_HASH: {
+ void *p;
+ size_t l;
+
+ r = unhexmem(optarg, strlen(optarg), &p, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse root hash '%s': %m", optarg);
+ if (l < sizeof(sd_id128_t)) {
+ log_error("Root hash must be at least 128bit long: %s", optarg);
+ free(p);
+ return -EINVAL;
+ }
+
+ free(arg_root_hash);
+ arg_root_hash = p;
+ arg_root_hash_size = l;
+ break;
+ }
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ }
+
+ switch (arg_action) {
+
+ case ACTION_DISSECT:
+ if (optind + 1 != argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected a file path as only argument.");
+
+ arg_image = argv[optind];
+ arg_flags |= DISSECT_IMAGE_READ_ONLY;
+ break;
+
+ case ACTION_MOUNT:
+ if (optind + 2 != argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected a file path and mount point path as only arguments.");
+
+ arg_image = argv[optind];
+ arg_path = argv[optind + 1];
+ break;
+
+ default:
+ assert_not_reached("Unknown action.");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *di = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = loop_device_make_by_path(arg_image, (arg_flags & DISSECT_IMAGE_READ_ONLY) ? O_RDONLY : O_RDWR, &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up loopback device: %m");
+
+ if (!arg_root_hash) {
+ r = root_hash_load(arg_image, &arg_root_hash, &arg_root_hash_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read root hash file for %s: %m", arg_image);
+ }
+
+ r = dissect_image_and_warn(d->fd, arg_image, arg_root_hash, arg_root_hash_size, arg_flags, &m);
+ if (r < 0)
+ return r;
+
+ switch (arg_action) {
+
+ case ACTION_DISSECT: {
+ unsigned i;
+
+ for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ DissectedPartition *p = m->partitions + i;
+ int k;
+
+ if (!p->found)
+ continue;
+
+ printf("Found %s '%s' partition",
+ p->rw ? "writable" : "read-only",
+ partition_designator_to_string(i));
+
+ if (!sd_id128_is_null(p->uuid))
+ printf(" (UUID " SD_ID128_FORMAT_STR ")", SD_ID128_FORMAT_VAL(p->uuid));
+
+ if (p->fstype)
+ printf(" of type %s", p->fstype);
+
+ if (p->architecture != _ARCHITECTURE_INVALID)
+ printf(" for %s", architecture_to_string(p->architecture));
+
+ k = PARTITION_VERITY_OF(i);
+ if (k >= 0)
+ printf(" %s verity", m->partitions[k].found ? "with" : "without");
+
+ if (p->partno >= 0)
+ printf(" on partition #%i", p->partno);
+
+ if (p->node)
+ printf(" (%s)", p->node);
+
+ putchar('\n');
+ }
+
+ r = dissected_image_acquire_metadata(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire image metadata: %m");
+
+ if (m->hostname)
+ printf(" Hostname: %s\n", m->hostname);
+
+ if (!sd_id128_is_null(m->machine_id))
+ printf("Machine ID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(m->machine_id));
+
+ if (!strv_isempty(m->machine_info)) {
+ char **p, **q;
+
+ STRV_FOREACH_PAIR(p, q, m->machine_info)
+ printf("%s %s=%s\n",
+ p == m->machine_info ? "Mach. Info:" : " ",
+ *p, *q);
+ }
+
+ if (!strv_isempty(m->os_release)) {
+ char **p, **q;
+
+ STRV_FOREACH_PAIR(p, q, m->os_release)
+ printf("%s %s=%s\n",
+ p == m->os_release ? "OS Release:" : " ",
+ *p, *q);
+ }
+
+ break;
+ }
+
+ case ACTION_MOUNT:
+ r = dissected_image_decrypt_interactively(m, NULL, arg_root_hash, arg_root_hash_size, arg_flags, &di);
+ if (r < 0)
+ return r;
+
+ r = dissected_image_mount(m, arg_path, UID_INVALID, arg_flags);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mount image: %m");
+
+ if (di) {
+ r = decrypted_image_relinquish(di);
+ if (r < 0)
+ return log_error_errno(r, "Failed to relinquish DM devices: %m");
+ }
+
+ loop_device_relinquish(d);
+ break;
+
+ default:
+ assert_not_reached("Unknown action.");
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/environment-d-generator/environment-d-generator.c b/src/environment-d-generator/environment-d-generator.c
new file mode 100644
index 0000000..9d64d95
--- /dev/null
+++ b/src/environment-d-generator/environment-d-generator.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-path.h"
+
+#include "conf-files.h"
+#include "def.h"
+#include "env-file.h"
+#include "escape.h"
+#include "log.h"
+#include "path-lookup.h"
+
+static int environment_dirs(char ***ret) {
+ _cleanup_strv_free_ char **dirs = NULL;
+ _cleanup_free_ char *c = NULL;
+ int r;
+
+ dirs = strv_split_nulstr(CONF_PATHS_NULSTR("environment.d"));
+ if (!dirs)
+ return -ENOMEM;
+
+ /* ~/.config/systemd/environment.d */
+ r = sd_path_home(SD_PATH_USER_CONFIGURATION, "environment.d", &c);
+ if (r < 0)
+ return r;
+
+ r = strv_extend_front(&dirs, c);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(dirs);
+ return 0;
+}
+
+static int load_and_print(void) {
+ _cleanup_strv_free_ char **dirs = NULL, **files = NULL, **env = NULL;
+ char **i;
+ int r;
+
+ r = environment_dirs(&dirs);
+ if (r < 0)
+ return r;
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char **) dirs);
+ if (r < 0)
+ return r;
+
+ /* This will mutate the existing environment, based on the presumption
+ * that in case of failure, a partial update is better than none. */
+
+ STRV_FOREACH(i, files) {
+ r = merge_env_file(&env, NULL, *i);
+ if (r == -ENOMEM)
+ return r;
+ }
+
+ STRV_FOREACH(i, env) {
+ char *t;
+ _cleanup_free_ char *q = NULL;
+
+ t = strchr(*i, '=');
+ assert(t);
+
+ q = shell_maybe_quote(t + 1, ESCAPE_BACKSLASH);
+ if (!q)
+ return log_oom();
+
+ printf("%.*s=%s\n", (int) (t - *i), *i, q);
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ if (argc > 1) {
+ log_error("This program takes no arguments.");
+ return EXIT_FAILURE;
+ }
+
+ r = load_and_print();
+ if (r < 0)
+ log_error_errno(r, "Failed to load environment.d: %m");
+
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/escape/escape.c b/src/escape/escape.c
new file mode 100644
index 0000000..8b38d9f
--- /dev/null
+++ b/src/escape/escape.c
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+static enum {
+ ACTION_ESCAPE,
+ ACTION_UNESCAPE,
+ ACTION_MANGLE
+} arg_action = ACTION_ESCAPE;
+static const char *arg_suffix = NULL;
+static const char *arg_template = NULL;
+static bool arg_path = false;
+static bool arg_instance = false;
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-escape", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [NAME...]\n\n"
+ "Escape strings for usage in systemd unit names.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --suffix=SUFFIX Unit suffix to append to escaped strings\n"
+ " --template=TEMPLATE Insert strings as instance into template\n"
+ " --instance With --unescape, show just the instance part\n"
+ " -u --unescape Unescape strings\n"
+ " -m --mangle Mangle strings\n"
+ " -p --path When escaping/unescaping assume the string is a path\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_SUFFIX,
+ ARG_TEMPLATE
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "suffix", required_argument, NULL, ARG_SUFFIX },
+ { "template", required_argument, NULL, ARG_TEMPLATE },
+ { "unescape", no_argument, NULL, 'u' },
+ { "mangle", no_argument, NULL, 'm' },
+ { "path", no_argument, NULL, 'p' },
+ { "instance", no_argument, NULL, 'i' },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hump", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_SUFFIX:
+
+ if (unit_type_from_string(optarg) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid unit suffix type %s.", optarg);
+
+ arg_suffix = optarg;
+ break;
+
+ case ARG_TEMPLATE:
+
+ if (!unit_name_is_valid(optarg, UNIT_NAME_TEMPLATE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Template name %s is not valid.", optarg);
+
+ arg_template = optarg;
+ break;
+
+ case 'u':
+ arg_action = ACTION_UNESCAPE;
+ break;
+
+ case 'm':
+ arg_action = ACTION_MANGLE;
+ break;
+
+ case 'p':
+ arg_path = true;
+ break;
+
+ case 'i':
+ arg_instance = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not enough arguments.");
+
+ if (arg_template && arg_suffix)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--suffix= and --template= may not be combined.");
+
+ if ((arg_template || arg_suffix) && arg_action == ACTION_MANGLE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--suffix= and --template= are not compatible with --mangle.");
+
+ if (arg_suffix && arg_action == ACTION_UNESCAPE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--suffix is not compatible with --unescape.");
+
+ if (arg_path && !IN_SET(arg_action, ACTION_ESCAPE, ACTION_UNESCAPE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--path may not be combined with --mangle.");
+
+ if (arg_instance && arg_action != ACTION_UNESCAPE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--instance must be used in conjunction with --unescape.");
+
+ if (arg_instance && arg_template)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--instance may not be combined with --template.");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ char **i;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ STRV_FOREACH(i, argv + optind) {
+ _cleanup_free_ char *e = NULL;
+
+ switch (arg_action) {
+
+ case ACTION_ESCAPE:
+ if (arg_path) {
+ r = unit_name_path_escape(*i, &e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to escape string: %m");
+ } else {
+ e = unit_name_escape(*i);
+ if (!e)
+ return log_oom();
+ }
+
+ if (arg_template) {
+ char *x;
+
+ r = unit_name_replace_instance(arg_template, e, &x);
+ if (r < 0)
+ return log_error_errno(r, "Failed to replace instance: %m");
+
+ free_and_replace(e, x);
+ } else if (arg_suffix) {
+ char *x;
+
+ x = strjoin(e, ".", arg_suffix);
+ if (!x)
+ return log_oom();
+
+ free_and_replace(e, x);
+ }
+
+ break;
+
+ case ACTION_UNESCAPE: {
+ _cleanup_free_ char *name = NULL;
+
+ if (arg_template || arg_instance) {
+ _cleanup_free_ char *template = NULL;
+
+ r = unit_name_to_instance(*i, &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract instance: %m");
+ if (isempty(name))
+ return log_error("Unit %s is missing the instance name.", *i);
+
+ r = unit_name_template(*i, &template);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract template: %m");
+ if (arg_template && !streq(arg_template, template))
+ return log_error("Unit %s template %s does not match specified template %s.",
+ *i, template, arg_template);
+ } else {
+ name = strdup(*i);
+ if (!name)
+ return log_oom();
+ }
+
+ if (arg_path)
+ r = unit_name_path_unescape(name, &e);
+ else
+ r = unit_name_unescape(name, &e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape string: %m");
+
+ break;
+ }
+
+ case ACTION_MANGLE:
+ r = unit_name_mangle(*i, 0, &e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle name: %m");
+
+ break;
+ }
+
+ if (i != argv + optind)
+ fputc(' ', stdout);
+
+ fputs(e, stdout);
+ }
+
+ fputc('\n', stdout);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/firstboot/firstboot.c b/src/firstboot/firstboot.c
new file mode 100644
index 0000000..dde1157
--- /dev/null
+++ b/src/firstboot/firstboot.c
@@ -0,0 +1,994 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <unistd.h>
+
+#if HAVE_CRYPT_H
+/* libxcrypt is a replacement for glibc's libcrypt, and libcrypt might be
+ * removed from glibc at some point. As part of the removal, defines for
+ * crypt(3) are dropped from unistd.h, and we must include crypt.h instead.
+ *
+ * Newer versions of glibc (v2.0+) already ship crypt.h with a definition
+ * of crypt(3) as well, so we simply include it if it is present. MariaDB,
+ * MySQL, PostgreSQL, Perl and some other wide-spread packages do it the
+ * same way since ages without any problems.
+ */
+# include <crypt.h>
+#endif
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "ask-password-api.h"
+#include "copy.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "locale-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "os-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "proc-cmdline.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+
+static char *arg_root = NULL;
+static char *arg_locale = NULL; /* $LANG */
+static char *arg_keymap = NULL;
+static char *arg_locale_messages = NULL; /* $LC_MESSAGES */
+static char *arg_timezone = NULL;
+static char *arg_hostname = NULL;
+static sd_id128_t arg_machine_id = {};
+static char *arg_root_password = NULL;
+static bool arg_prompt_locale = false;
+static bool arg_prompt_keymap = false;
+static bool arg_prompt_timezone = false;
+static bool arg_prompt_hostname = false;
+static bool arg_prompt_root_password = false;
+static bool arg_copy_locale = false;
+static bool arg_copy_keymap = false;
+static bool arg_copy_timezone = false;
+static bool arg_copy_root_password = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_locale, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_locale_messages, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_keymap, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_timezone, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_hostname, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root_password, string_free_erasep);
+
+static bool press_any_key(void) {
+ char k = 0;
+ bool need_nl = true;
+
+ printf("-- Press any key to proceed --");
+ fflush(stdout);
+
+ (void) read_one_char(stdin, &k, USEC_INFINITY, &need_nl);
+
+ if (need_nl)
+ putchar('\n');
+
+ return k != 'q';
+}
+
+static void print_welcome(void) {
+ _cleanup_free_ char *pretty_name = NULL;
+ static bool done = false;
+ int r;
+
+ if (done)
+ return;
+
+ r = parse_os_release(arg_root, "PRETTY_NAME", &pretty_name, NULL);
+ if (r < 0)
+ log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to read os-release file, ignoring: %m");
+
+ printf("\nWelcome to your new installation of %s!\nPlease configure a few basic system settings:\n\n",
+ isempty(pretty_name) ? "Linux" : pretty_name);
+
+ press_any_key();
+
+ done = true;
+}
+
+static int show_menu(char **x, unsigned n_columns, unsigned width, unsigned percentage) {
+ unsigned break_lines, break_modulo;
+ size_t n, per_column, i, j;
+
+ assert(n_columns > 0);
+
+ n = strv_length(x);
+ per_column = DIV_ROUND_UP(n, n_columns);
+
+ break_lines = lines();
+ if (break_lines > 2)
+ break_lines--;
+
+ /* The first page gets two extra lines, since we want to show
+ * a title */
+ break_modulo = break_lines;
+ if (break_modulo > 3)
+ break_modulo -= 3;
+
+ for (i = 0; i < per_column; i++) {
+
+ for (j = 0; j < n_columns; j ++) {
+ _cleanup_free_ char *e = NULL;
+
+ if (j * per_column + i >= n)
+ break;
+
+ e = ellipsize(x[j * per_column + i], width, percentage);
+ if (!e)
+ return log_oom();
+
+ printf("%4zu) %-*s", j * per_column + i + 1, width, e);
+ }
+
+ putchar('\n');
+
+ /* on the first screen we reserve 2 extra lines for the title */
+ if (i % break_lines == break_modulo) {
+ if (!press_any_key())
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static int prompt_loop(const char *text, char **l, bool (*is_valid)(const char *name), char **ret) {
+ int r;
+
+ assert(text);
+ assert(is_valid);
+ assert(ret);
+
+ for (;;) {
+ _cleanup_free_ char *p = NULL;
+ unsigned u;
+
+ r = ask_string(&p, "%s %s (empty to skip): ", special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET), text);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query user: %m");
+
+ if (isempty(p)) {
+ log_warning("No data entered, skipping.");
+ return 0;
+ }
+
+ r = safe_atou(p, &u);
+ if (r >= 0) {
+ char *c;
+
+ if (u <= 0 || u > strv_length(l)) {
+ log_error("Specified entry number out of range.");
+ continue;
+ }
+
+ log_info("Selected '%s'.", l[u-1]);
+
+ c = strdup(l[u-1]);
+ if (!c)
+ return log_oom();
+
+ free(*ret);
+ *ret = c;
+ return 0;
+ }
+
+ if (!is_valid(p)) {
+ log_error("Entered data invalid.");
+ continue;
+ }
+
+ free(*ret);
+ *ret = p;
+ p = 0;
+ return 0;
+ }
+}
+
+static int prompt_locale(void) {
+ _cleanup_strv_free_ char **locales = NULL;
+ int r;
+
+ if (arg_locale || arg_locale_messages)
+ return 0;
+
+ if (!arg_prompt_locale)
+ return 0;
+
+ r = get_locales(&locales);
+ if (r < 0)
+ return log_error_errno(r, "Cannot query locales list: %m");
+
+ print_welcome();
+
+ printf("\nAvailable Locales:\n\n");
+ r = show_menu(locales, 3, 22, 60);
+ if (r < 0)
+ return r;
+
+ putchar('\n');
+
+ r = prompt_loop("Please enter system locale name or number", locales, locale_is_valid, &arg_locale);
+ if (r < 0)
+ return r;
+
+ if (isempty(arg_locale))
+ return 0;
+
+ r = prompt_loop("Please enter system message locale name or number", locales, locale_is_valid, &arg_locale_messages);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int process_locale(void) {
+ const char *etc_localeconf;
+ char* locales[3];
+ unsigned i = 0;
+ int r;
+
+ etc_localeconf = prefix_roota(arg_root, "/etc/locale.conf");
+ if (laccess(etc_localeconf, F_OK) >= 0)
+ return 0;
+
+ if (arg_copy_locale && arg_root) {
+
+ mkdir_parents(etc_localeconf, 0755);
+ r = copy_file("/etc/locale.conf", etc_localeconf, 0, 0644, 0, COPY_REFLINK);
+ if (r != -ENOENT) {
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy %s: %m", etc_localeconf);
+
+ log_info("%s copied.", etc_localeconf);
+ return 0;
+ }
+ }
+
+ r = prompt_locale();
+ if (r < 0)
+ return r;
+
+ if (!isempty(arg_locale))
+ locales[i++] = strjoina("LANG=", arg_locale);
+ if (!isempty(arg_locale_messages) && !streq(arg_locale_messages, arg_locale))
+ locales[i++] = strjoina("LC_MESSAGES=", arg_locale_messages);
+
+ if (i == 0)
+ return 0;
+
+ locales[i] = NULL;
+
+ mkdir_parents(etc_localeconf, 0755);
+ r = write_env_file(etc_localeconf, locales);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write %s: %m", etc_localeconf);
+
+ log_info("%s written.", etc_localeconf);
+ return 0;
+}
+
+static int prompt_keymap(void) {
+ _cleanup_strv_free_ char **kmaps = NULL;
+ int r;
+
+ if (arg_keymap)
+ return 0;
+
+ if (!arg_prompt_keymap)
+ return 0;
+
+ r = get_keymaps(&kmaps);
+ if (r == -ENOENT) /* no keymaps installed */
+ return r;
+ if (r < 0)
+ return log_error_errno(r, "Failed to read keymaps: %m");
+
+ print_welcome();
+
+ printf("\nAvailable keymaps:\n\n");
+ r = show_menu(kmaps, 3, 22, 60);
+ if (r < 0)
+ return r;
+
+ putchar('\n');
+
+ return prompt_loop("Please enter system keymap name or number",
+ kmaps, keymap_is_valid, &arg_keymap);
+}
+
+static int process_keymap(void) {
+ const char *etc_vconsoleconf;
+ char **keymap;
+ int r;
+
+ etc_vconsoleconf = prefix_roota(arg_root, "/etc/vconsole.conf");
+ if (laccess(etc_vconsoleconf, F_OK) >= 0)
+ return 0;
+
+ if (arg_copy_keymap && arg_root) {
+
+ mkdir_parents(etc_vconsoleconf, 0755);
+ r = copy_file("/etc/vconsole.conf", etc_vconsoleconf, 0, 0644, 0, COPY_REFLINK);
+ if (r != -ENOENT) {
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy %s: %m", etc_vconsoleconf);
+
+ log_info("%s copied.", etc_vconsoleconf);
+ return 0;
+ }
+ }
+
+ r = prompt_keymap();
+ if (r == -ENOENT)
+ return 0; /* don't fail if no keymaps are installed */
+ if (r < 0)
+ return r;
+
+ if (isempty(arg_keymap))
+ return 0;
+
+ keymap = STRV_MAKE(strjoina("KEYMAP=", arg_keymap));
+
+ r = mkdir_parents(etc_vconsoleconf, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create the parent directory of %s: %m", etc_vconsoleconf);
+
+ r = write_env_file(etc_vconsoleconf, keymap);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write %s: %m", etc_vconsoleconf);
+
+ log_info("%s written.", etc_vconsoleconf);
+ return 0;
+}
+
+static bool timezone_is_valid_log_error(const char *name) {
+ return timezone_is_valid(name, LOG_ERR);
+}
+
+static int prompt_timezone(void) {
+ _cleanup_strv_free_ char **zones = NULL;
+ int r;
+
+ if (arg_timezone)
+ return 0;
+
+ if (!arg_prompt_timezone)
+ return 0;
+
+ r = get_timezones(&zones);
+ if (r < 0)
+ return log_error_errno(r, "Cannot query timezone list: %m");
+
+ print_welcome();
+
+ printf("\nAvailable Time Zones:\n\n");
+ r = show_menu(zones, 3, 22, 30);
+ if (r < 0)
+ return r;
+
+ putchar('\n');
+
+ r = prompt_loop("Please enter timezone name or number", zones, timezone_is_valid_log_error, &arg_timezone);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int process_timezone(void) {
+ const char *etc_localtime, *e;
+ int r;
+
+ etc_localtime = prefix_roota(arg_root, "/etc/localtime");
+ if (laccess(etc_localtime, F_OK) >= 0)
+ return 0;
+
+ if (arg_copy_timezone && arg_root) {
+ _cleanup_free_ char *p = NULL;
+
+ r = readlink_malloc("/etc/localtime", &p);
+ if (r != -ENOENT) {
+ if (r < 0)
+ return log_error_errno(r, "Failed to read host timezone: %m");
+
+ mkdir_parents(etc_localtime, 0755);
+ if (symlink(p, etc_localtime) < 0)
+ return log_error_errno(errno, "Failed to create %s symlink: %m", etc_localtime);
+
+ log_info("%s copied.", etc_localtime);
+ return 0;
+ }
+ }
+
+ r = prompt_timezone();
+ if (r < 0)
+ return r;
+
+ if (isempty(arg_timezone))
+ return 0;
+
+ e = strjoina("../usr/share/zoneinfo/", arg_timezone);
+
+ mkdir_parents(etc_localtime, 0755);
+ if (symlink(e, etc_localtime) < 0)
+ return log_error_errno(errno, "Failed to create %s symlink: %m", etc_localtime);
+
+ log_info("%s written", etc_localtime);
+ return 0;
+}
+
+static int prompt_hostname(void) {
+ int r;
+
+ if (arg_hostname)
+ return 0;
+
+ if (!arg_prompt_hostname)
+ return 0;
+
+ print_welcome();
+ putchar('\n');
+
+ for (;;) {
+ _cleanup_free_ char *h = NULL;
+
+ r = ask_string(&h, "%s Please enter hostname for new system (empty to skip): ", special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET));
+ if (r < 0)
+ return log_error_errno(r, "Failed to query hostname: %m");
+
+ if (isempty(h)) {
+ log_warning("No hostname entered, skipping.");
+ break;
+ }
+
+ if (!hostname_is_valid(h, true)) {
+ log_error("Specified hostname invalid.");
+ continue;
+ }
+
+ /* Get rid of the trailing dot that we allow, but don't want to see */
+ arg_hostname = hostname_cleanup(h);
+ h = NULL;
+ break;
+ }
+
+ return 0;
+}
+
+static int process_hostname(void) {
+ const char *etc_hostname;
+ int r;
+
+ etc_hostname = prefix_roota(arg_root, "/etc/hostname");
+ if (laccess(etc_hostname, F_OK) >= 0)
+ return 0;
+
+ r = prompt_hostname();
+ if (r < 0)
+ return r;
+
+ if (isempty(arg_hostname))
+ return 0;
+
+ mkdir_parents(etc_hostname, 0755);
+ r = write_string_file(etc_hostname, arg_hostname,
+ WRITE_STRING_FILE_CREATE | WRITE_STRING_FILE_SYNC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write %s: %m", etc_hostname);
+
+ log_info("%s written.", etc_hostname);
+ return 0;
+}
+
+static int process_machine_id(void) {
+ const char *etc_machine_id;
+ char id[SD_ID128_STRING_MAX];
+ int r;
+
+ etc_machine_id = prefix_roota(arg_root, "/etc/machine-id");
+ if (laccess(etc_machine_id, F_OK) >= 0)
+ return 0;
+
+ if (sd_id128_is_null(arg_machine_id))
+ return 0;
+
+ mkdir_parents(etc_machine_id, 0755);
+ r = write_string_file(etc_machine_id, sd_id128_to_string(arg_machine_id, id),
+ WRITE_STRING_FILE_CREATE | WRITE_STRING_FILE_SYNC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write machine id: %m");
+
+ log_info("%s written.", etc_machine_id);
+ return 0;
+}
+
+static int prompt_root_password(void) {
+ const char *msg1, *msg2, *etc_shadow;
+ int r;
+
+ if (arg_root_password)
+ return 0;
+
+ if (!arg_prompt_root_password)
+ return 0;
+
+ etc_shadow = prefix_roota(arg_root, "/etc/shadow");
+ if (laccess(etc_shadow, F_OK) >= 0)
+ return 0;
+
+ print_welcome();
+ putchar('\n');
+
+ msg1 = strjoina(special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET), " Please enter a new root password (empty to skip): ");
+ msg2 = strjoina(special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET), " Please enter new root password again: ");
+
+ for (;;) {
+ _cleanup_strv_free_erase_ char **a = NULL, **b = NULL;
+
+ r = ask_password_tty(-1, msg1, NULL, 0, 0, NULL, &a);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query root password: %m");
+ if (strv_length(a) != 1) {
+ log_warning("Received multiple passwords, where we expected one.");
+ return -EINVAL;
+ }
+
+ if (isempty(*a)) {
+ log_warning("No password entered, skipping.");
+ break;
+ }
+
+ r = ask_password_tty(-1, msg2, NULL, 0, 0, NULL, &b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query root password: %m");
+
+ if (!streq(*a, *b)) {
+ log_error("Entered passwords did not match, please try again.");
+ continue;
+ }
+
+ arg_root_password = TAKE_PTR(*a);
+ break;
+ }
+
+ return 0;
+}
+
+static int write_root_shadow(const char *path, const struct spwd *p) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(path);
+ assert(p);
+
+ RUN_WITH_UMASK(0777)
+ f = fopen(path, "wex");
+ if (!f)
+ return -errno;
+
+ r = putspent_sane(p, f);
+ if (r < 0)
+ return r;
+
+ return fflush_sync_and_check(f);
+}
+
+static int process_root_password(void) {
+
+ static const char table[] =
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "0123456789"
+ "./";
+
+ struct spwd item = {
+ .sp_namp = (char*) "root",
+ .sp_min = -1,
+ .sp_max = -1,
+ .sp_warn = -1,
+ .sp_inact = -1,
+ .sp_expire = -1,
+ .sp_flag = (unsigned long) -1, /* this appears to be what everybody does ... */
+ };
+
+ _cleanup_close_ int lock = -1;
+ char salt[3+16+1+1];
+ uint8_t raw[16];
+ unsigned i;
+ char *j;
+
+ const char *etc_shadow;
+ int r;
+
+ etc_shadow = prefix_roota(arg_root, "/etc/shadow");
+ if (laccess(etc_shadow, F_OK) >= 0)
+ return 0;
+
+ mkdir_parents(etc_shadow, 0755);
+
+ lock = take_etc_passwd_lock(arg_root);
+ if (lock < 0)
+ return log_error_errno(lock, "Failed to take a lock: %m");
+
+ if (arg_copy_root_password && arg_root) {
+ struct spwd *p;
+
+ errno = 0;
+ p = getspnam("root");
+ if (p || errno != ENOENT) {
+ if (!p) {
+ if (!errno)
+ errno = EIO;
+
+ return log_error_errno(errno, "Failed to find shadow entry for root: %m");
+ }
+
+ r = write_root_shadow(etc_shadow, p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write %s: %m", etc_shadow);
+
+ log_info("%s copied.", etc_shadow);
+ return 0;
+ }
+ }
+
+ r = prompt_root_password();
+ if (r < 0)
+ return r;
+
+ if (!arg_root_password)
+ return 0;
+
+ /* Insist on the best randomness by setting RANDOM_BLOCK, this is about keeping passwords secret after all. */
+ r = genuine_random_bytes(raw, 16, RANDOM_BLOCK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get salt: %m");
+
+ /* We only bother with SHA512 hashed passwords, the rest is legacy, and we don't do legacy. */
+ assert_cc(sizeof(table) == 64 + 1);
+ j = stpcpy(salt, "$6$");
+ for (i = 0; i < 16; i++)
+ j[i] = table[raw[i] & 63];
+ j[i++] = '$';
+ j[i] = 0;
+
+ errno = 0;
+ item.sp_pwdp = crypt(arg_root_password, salt);
+ if (!item.sp_pwdp) {
+ if (!errno)
+ errno = EINVAL;
+
+ return log_error_errno(errno, "Failed to encrypt password: %m");
+ }
+
+ item.sp_lstchg = (long) (now(CLOCK_REALTIME) / USEC_PER_DAY);
+
+ r = write_root_shadow(etc_shadow, &item);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write %s: %m", etc_shadow);
+
+ log_info("%s written.", etc_shadow);
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-firstboot", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Configures basic settings of the system.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --root=PATH Operate on an alternate filesystem root\n"
+ " --locale=LOCALE Set primary locale (LANG=)\n"
+ " --locale-messages=LOCALE Set message locale (LC_MESSAGES=)\n"
+ " --keymap=KEYMAP Set keymap\n"
+ " --timezone=TIMEZONE Set timezone\n"
+ " --hostname=NAME Set host name\n"
+ " --machine-ID=ID Set machine ID\n"
+ " --root-password=PASSWORD Set root password\n"
+ " --root-password-file=FILE Set root password from file\n"
+ " --prompt-locale Prompt the user for locale settings\n"
+ " --prompt-keymap Prompt the user for keymap settings\n"
+ " --prompt-timezone Prompt the user for timezone\n"
+ " --prompt-hostname Prompt the user for hostname\n"
+ " --prompt-root-password Prompt the user for root password\n"
+ " --prompt Prompt for all of the above\n"
+ " --copy-locale Copy locale from host\n"
+ " --copy-keymap Copy keymap from host\n"
+ " --copy-timezone Copy timezone from host\n"
+ " --copy-root-password Copy root password from host\n"
+ " --copy Copy locale, keymap, timezone, root password\n"
+ " --setup-machine-id Generate a new random machine ID\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_ROOT,
+ ARG_LOCALE,
+ ARG_LOCALE_MESSAGES,
+ ARG_KEYMAP,
+ ARG_TIMEZONE,
+ ARG_HOSTNAME,
+ ARG_MACHINE_ID,
+ ARG_ROOT_PASSWORD,
+ ARG_ROOT_PASSWORD_FILE,
+ ARG_PROMPT,
+ ARG_PROMPT_LOCALE,
+ ARG_PROMPT_KEYMAP,
+ ARG_PROMPT_TIMEZONE,
+ ARG_PROMPT_HOSTNAME,
+ ARG_PROMPT_ROOT_PASSWORD,
+ ARG_COPY,
+ ARG_COPY_LOCALE,
+ ARG_COPY_KEYMAP,
+ ARG_COPY_TIMEZONE,
+ ARG_COPY_ROOT_PASSWORD,
+ ARG_SETUP_MACHINE_ID,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "locale", required_argument, NULL, ARG_LOCALE },
+ { "locale-messages", required_argument, NULL, ARG_LOCALE_MESSAGES },
+ { "keymap", required_argument, NULL, ARG_KEYMAP },
+ { "timezone", required_argument, NULL, ARG_TIMEZONE },
+ { "hostname", required_argument, NULL, ARG_HOSTNAME },
+ { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
+ { "root-password", required_argument, NULL, ARG_ROOT_PASSWORD },
+ { "root-password-file", required_argument, NULL, ARG_ROOT_PASSWORD_FILE },
+ { "prompt", no_argument, NULL, ARG_PROMPT },
+ { "prompt-locale", no_argument, NULL, ARG_PROMPT_LOCALE },
+ { "prompt-keymap", no_argument, NULL, ARG_PROMPT_KEYMAP },
+ { "prompt-timezone", no_argument, NULL, ARG_PROMPT_TIMEZONE },
+ { "prompt-hostname", no_argument, NULL, ARG_PROMPT_HOSTNAME },
+ { "prompt-root-password", no_argument, NULL, ARG_PROMPT_ROOT_PASSWORD },
+ { "copy", no_argument, NULL, ARG_COPY },
+ { "copy-locale", no_argument, NULL, ARG_COPY_LOCALE },
+ { "copy-keymap", no_argument, NULL, ARG_COPY_KEYMAP },
+ { "copy-timezone", no_argument, NULL, ARG_COPY_TIMEZONE },
+ { "copy-root-password", no_argument, NULL, ARG_COPY_ROOT_PASSWORD },
+ { "setup-machine-id", no_argument, NULL, ARG_SETUP_MACHINE_ID },
+ {}
+ };
+
+ int r, c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, true, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_LOCALE:
+ if (!locale_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Locale %s is not valid.", optarg);
+
+ r = free_and_strdup(&arg_locale, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_LOCALE_MESSAGES:
+ if (!locale_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Locale %s is not valid.", optarg);
+
+ r = free_and_strdup(&arg_locale_messages, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_KEYMAP:
+ if (!keymap_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Keymap %s is not valid.", optarg);
+
+ r = free_and_strdup(&arg_keymap, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_TIMEZONE:
+ if (!timezone_is_valid(optarg, LOG_ERR))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Timezone %s is not valid.", optarg);
+
+ r = free_and_strdup(&arg_timezone, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_ROOT_PASSWORD:
+ r = free_and_strdup(&arg_root_password, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case ARG_ROOT_PASSWORD_FILE:
+ arg_root_password = mfree(arg_root_password);
+
+ r = read_one_line_file(optarg, &arg_root_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read %s: %m", optarg);
+
+ break;
+
+ case ARG_HOSTNAME:
+ if (!hostname_is_valid(optarg, true))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Host name %s is not valid.", optarg);
+
+ hostname_cleanup(optarg);
+ r = free_and_strdup(&arg_hostname, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_MACHINE_ID:
+ if (sd_id128_from_string(optarg, &arg_machine_id) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse machine id %s.", optarg);
+
+ break;
+
+ case ARG_PROMPT:
+ arg_prompt_locale = arg_prompt_keymap = arg_prompt_timezone = arg_prompt_hostname = arg_prompt_root_password = true;
+ break;
+
+ case ARG_PROMPT_LOCALE:
+ arg_prompt_locale = true;
+ break;
+
+ case ARG_PROMPT_KEYMAP:
+ arg_prompt_keymap = true;
+ break;
+
+ case ARG_PROMPT_TIMEZONE:
+ arg_prompt_timezone = true;
+ break;
+
+ case ARG_PROMPT_HOSTNAME:
+ arg_prompt_hostname = true;
+ break;
+
+ case ARG_PROMPT_ROOT_PASSWORD:
+ arg_prompt_root_password = true;
+ break;
+
+ case ARG_COPY:
+ arg_copy_locale = arg_copy_keymap = arg_copy_timezone = arg_copy_root_password = true;
+ break;
+
+ case ARG_COPY_LOCALE:
+ arg_copy_locale = true;
+ break;
+
+ case ARG_COPY_KEYMAP:
+ arg_copy_keymap = true;
+ break;
+
+ case ARG_COPY_TIMEZONE:
+ arg_copy_timezone = true;
+ break;
+
+ case ARG_COPY_ROOT_PASSWORD:
+ arg_copy_root_password = true;
+ break;
+
+ case ARG_SETUP_MACHINE_ID:
+
+ r = sd_id128_randomize(&arg_machine_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate randomized machine ID: %m");
+
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ bool enabled;
+ int r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ r = proc_cmdline_get_bool("systemd.firstboot", &enabled);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse systemd.firstboot= kernel command line argument, ignoring: %m");
+ if (r > 0 && !enabled)
+ return 0; /* disabled */
+
+ r = process_locale();
+ if (r < 0)
+ return r;
+
+ r = process_keymap();
+ if (r < 0)
+ return r;
+
+ r = process_timezone();
+ if (r < 0)
+ return r;
+
+ r = process_hostname();
+ if (r < 0)
+ return r;
+
+ r = process_machine_id();
+ if (r < 0)
+ return r;
+
+ r = process_root_password();
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/fsck/fsck.c b/src/fsck/fsck.c
new file mode 100644
index 0000000..524327c
--- /dev/null
+++ b/src/fsck/fsck.c
@@ -0,0 +1,437 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Holger Hans Peter Freyther
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/file.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "util.h"
+
+/* exit codes as defined in fsck(8) */
+enum {
+ FSCK_SUCCESS = 0,
+ FSCK_ERROR_CORRECTED = 1 << 0,
+ FSCK_SYSTEM_SHOULD_REBOOT = 1 << 1,
+ FSCK_ERRORS_LEFT_UNCORRECTED = 1 << 2,
+ FSCK_OPERATIONAL_ERROR = 1 << 3,
+ FSCK_USAGE_OR_SYNTAX_ERROR = 1 << 4,
+ FSCK_USER_CANCELLED = 1 << 5,
+ FSCK_SHARED_LIB_ERROR = 1 << 7,
+};
+
+static bool arg_skip = false;
+static bool arg_force = false;
+static bool arg_show_progress = false;
+static const char *arg_repair = "-a";
+
+static void start_target(const char *target, const char *mode) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(target);
+
+ r = bus_connect_system_systemd(&bus);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get D-Bus connection: %m");
+ return;
+ }
+
+ log_info("Running request %s/start/replace", target);
+
+ /* Start these units only if we can replace base.target with it */
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnitReplace",
+ &error,
+ NULL,
+ "sss", "basic.target", target, mode);
+
+ /* Don't print a warning if we aren't called during startup */
+ if (r < 0 && !sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_JOB))
+ log_error("Failed to start unit: %s", bus_error_message(&error, r));
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ assert(key);
+
+ if (streq(key, "fsck.mode")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (streq(value, "auto"))
+ arg_force = arg_skip = false;
+ else if (streq(value, "force"))
+ arg_force = true;
+ else if (streq(value, "skip"))
+ arg_skip = true;
+ else
+ log_warning("Invalid fsck.mode= parameter '%s'. Ignoring.", value);
+
+ } else if (streq(key, "fsck.repair")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (streq(value, "preen"))
+ arg_repair = "-a";
+ else {
+ r = parse_boolean(value);
+ if (r > 0)
+ arg_repair = "-y";
+ else if (r == 0)
+ arg_repair = "-n";
+ else
+ log_warning("Invalid fsck.repair= parameter '%s'. Ignoring.", value);
+ }
+ }
+
+#if HAVE_SYSV_COMPAT
+ else if (streq(key, "fastboot") && !value) {
+ log_warning("Please pass 'fsck.mode=skip' rather than 'fastboot' on the kernel command line.");
+ arg_skip = true;
+
+ } else if (streq(key, "forcefsck") && !value) {
+ log_warning("Please pass 'fsck.mode=force' rather than 'forcefsck' on the kernel command line.");
+ arg_force = true;
+ }
+#endif
+
+ return 0;
+}
+
+static void test_files(void) {
+
+#if HAVE_SYSV_COMPAT
+ if (access("/fastboot", F_OK) >= 0) {
+ log_error("Please pass 'fsck.mode=skip' on the kernel command line rather than creating /fastboot on the root file system.");
+ arg_skip = true;
+ }
+
+ if (access("/forcefsck", F_OK) >= 0) {
+ log_error("Please pass 'fsck.mode=force' on the kernel command line rather than creating /forcefsck on the root file system.");
+ arg_force = true;
+ }
+#endif
+
+ arg_show_progress = access("/run/systemd/show-status", F_OK) >= 0;
+}
+
+static double percent(int pass, unsigned long cur, unsigned long max) {
+ /* Values stolen from e2fsck */
+
+ static const int pass_table[] = {
+ 0, 70, 90, 92, 95, 100
+ };
+
+ if (pass <= 0)
+ return 0.0;
+
+ if ((unsigned) pass >= ELEMENTSOF(pass_table) || max == 0)
+ return 100.0;
+
+ return (double) pass_table[pass-1] +
+ ((double) pass_table[pass] - (double) pass_table[pass-1]) *
+ (double) cur / (double) max;
+}
+
+static int process_progress(int fd) {
+ _cleanup_fclose_ FILE *console = NULL, *f = NULL;
+ usec_t last = 0;
+ bool locked = false;
+ int clear = 0, r;
+
+ /* No progress pipe to process? Then we are a NOP. */
+ if (fd < 0)
+ return 0;
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ console = fopen("/dev/console", "we");
+ if (!console)
+ return -ENOMEM;
+
+ for (;;) {
+ int pass, m;
+ unsigned long cur, max;
+ _cleanup_free_ char *device = NULL;
+ double p;
+ usec_t t;
+
+ if (fscanf(f, "%i %lu %lu %ms", &pass, &cur, &max, &device) != 4) {
+
+ if (ferror(f))
+ r = log_warning_errno(errno, "Failed to read from progress pipe: %m");
+ else if (feof(f))
+ r = 0;
+ else {
+ log_warning("Failed to parse progress pipe data");
+ r = -EBADMSG;
+ }
+ break;
+ }
+
+ /* Only show one progress counter at max */
+ if (!locked) {
+ if (flock(fileno(console), LOCK_EX|LOCK_NB) < 0)
+ continue;
+
+ locked = true;
+ }
+
+ /* Only update once every 50ms */
+ t = now(CLOCK_MONOTONIC);
+ if (last + 50 * USEC_PER_MSEC > t)
+ continue;
+
+ last = t;
+
+ p = percent(pass, cur, max);
+ fprintf(console, "\r%s: fsck %3.1f%% complete...\r%n", device, p, &m);
+ fflush(console);
+
+ if (m > clear)
+ clear = m;
+ }
+
+ if (clear > 0) {
+ unsigned j;
+
+ fputc('\r', console);
+ for (j = 0; j < (unsigned) clear; j++)
+ fputc(' ', console);
+ fputc('\r', console);
+ fflush(console);
+ }
+
+ return r;
+}
+
+static int fsck_progress_socket(void) {
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/fsck.progress",
+ };
+
+ _cleanup_close_ int fd = -1;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return log_warning_errno(errno, "socket(): %m");
+
+ if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0)
+ return log_full_errno(IN_SET(errno, ECONNREFUSED, ENOENT) ? LOG_DEBUG : LOG_WARNING,
+ errno, "Failed to connect to progress socket %s, ignoring: %m", sa.un.sun_path);
+
+ return TAKE_FD(fd);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_close_pair_ int progress_pipe[2] = { -1, -1 };
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ const char *device, *type;
+ bool root_directory;
+ struct stat st;
+ int r, exit_status;
+ pid_t pid;
+
+ log_setup_service();
+
+ if (argc > 2) {
+ log_error("This program expects one or no arguments.");
+ return -EINVAL;
+ }
+
+ umask(0022);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ test_files();
+
+ if (!arg_force && arg_skip)
+ return 0;
+
+ if (argc > 1) {
+ device = argv[1];
+
+ if (stat(device, &st) < 0)
+ return log_error_errno(errno, "Failed to stat %s: %m", device);
+
+ if (!S_ISBLK(st.st_mode)) {
+ log_error("%s is not a block device.", device);
+ return -EINVAL;
+ }
+
+ r = sd_device_new_from_devnum(&dev, 'b', st.st_rdev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to detect device %s: %m", device);
+
+ root_directory = false;
+ } else {
+ struct timespec times[2];
+
+ /* Find root device */
+
+ if (stat("/", &st) < 0)
+ return log_error_errno(errno, "Failed to stat() the root directory: %m");
+
+ /* Virtual root devices don't need an fsck */
+ if (major(st.st_dev) == 0) {
+ log_debug("Root directory is virtual or btrfs, skipping check.");
+ return 0;
+ }
+
+ /* check if we are already writable */
+ times[0] = st.st_atim;
+ times[1] = st.st_mtim;
+
+ if (utimensat(AT_FDCWD, "/", times, 0) == 0) {
+ log_info("Root directory is writable, skipping check.");
+ return 0;
+ }
+
+ r = sd_device_new_from_devnum(&dev, 'b', st.st_dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to detect root device: %m");
+
+ r = sd_device_get_devname(dev, &device);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to detect device node of root directory: %m");
+
+ root_directory = true;
+ }
+
+ if (sd_device_get_property_value(dev, "ID_FS_TYPE", &type) >= 0) {
+ r = fsck_exists(type);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Couldn't detect if fsck.%s may be used, proceeding: %m", type);
+ else if (r == 0) {
+ log_device_info(dev, "fsck.%s doesn't exist, not checking file system.", type);
+ return 0;
+ }
+ }
+
+ if (arg_show_progress &&
+ pipe(progress_pipe) < 0)
+ return log_error_errno(errno, "pipe(): %m");
+
+ r = safe_fork("(fsck)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ char dash_c[STRLEN("-C") + DECIMAL_STR_MAX(int) + 1];
+ int progress_socket = -1;
+ const char *cmdline[9];
+ int i = 0;
+
+ /* Child */
+
+ /* Close the reading side of the progress pipe */
+ progress_pipe[0] = safe_close(progress_pipe[0]);
+
+ /* Try to connect to a progress management daemon, if there is one */
+ progress_socket = fsck_progress_socket();
+ if (progress_socket >= 0) {
+ /* If this worked we close the progress pipe early, and just use the socket */
+ progress_pipe[1] = safe_close(progress_pipe[1]);
+ xsprintf(dash_c, "-C%i", progress_socket);
+ } else if (progress_pipe[1] >= 0) {
+ /* Otherwise if we have the progress pipe to our own local handle, we use it */
+ xsprintf(dash_c, "-C%i", progress_pipe[1]);
+ } else
+ dash_c[0] = 0;
+
+ cmdline[i++] = "/sbin/fsck";
+ cmdline[i++] = arg_repair;
+ cmdline[i++] = "-T";
+
+ /*
+ * Since util-linux v2.25 fsck uses /run/fsck/<diskname>.lock files.
+ * The previous versions use flock for the device and conflict with
+ * udevd, see https://bugs.freedesktop.org/show_bug.cgi?id=79576#c5
+ */
+ cmdline[i++] = "-l";
+
+ if (!root_directory)
+ cmdline[i++] = "-M";
+
+ if (arg_force)
+ cmdline[i++] = "-f";
+
+ if (!isempty(dash_c))
+ cmdline[i++] = dash_c;
+
+ cmdline[i++] = device;
+ cmdline[i++] = NULL;
+
+ (void) rlimit_nofile_safe();
+
+ execv(cmdline[0], (char**) cmdline);
+ _exit(FSCK_OPERATIONAL_ERROR);
+ }
+
+ progress_pipe[1] = safe_close(progress_pipe[1]);
+ (void) process_progress(TAKE_FD(progress_pipe[0]));
+
+ exit_status = wait_for_terminate_and_check("fsck", pid, WAIT_LOG_ABNORMAL);
+ if (exit_status < 0)
+ return exit_status;
+ if ((exit_status & ~FSCK_ERROR_CORRECTED) != FSCK_SUCCESS) {
+ log_error("fsck failed with exit status %i.", exit_status);
+
+ if ((exit_status & FSCK_SYSTEM_SHOULD_REBOOT) && root_directory) {
+ /* System should be rebooted. */
+ start_target(SPECIAL_REBOOT_TARGET, "replace-irreversibly");
+ return -EINVAL;
+ } else if (exit_status & (FSCK_SYSTEM_SHOULD_REBOOT | FSCK_ERRORS_LEFT_UNCORRECTED))
+ /* Some other problem */
+ start_target(SPECIAL_EMERGENCY_TARGET, "replace");
+ else
+ log_warning("Ignoring error.");
+ }
+
+ if (exit_status & FSCK_ERROR_CORRECTED)
+ (void) touch("/run/systemd/quotacheck");
+
+ return !!(exit_status & (FSCK_SYSTEM_SHOULD_REBOOT | FSCK_ERRORS_LEFT_UNCORRECTED));
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/fstab-generator/fstab-generator.c b/src/fstab-generator/fstab-generator.c
new file mode 100644
index 0000000..55a8242
--- /dev/null
+++ b/src/fstab-generator/fstab-generator.c
@@ -0,0 +1,922 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <mntent.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio_ext.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "log.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "mount-setup.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "specifier.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+#include "virt.h"
+#include "volatile-util.h"
+
+typedef enum MountpointFlags {
+ NOAUTO = 1 << 0,
+ NOFAIL = 1 << 1,
+ AUTOMOUNT = 1 << 2,
+ MAKEFS = 1 << 3,
+ GROWFS = 1 << 4,
+} MountpointFlags;
+
+static const char *arg_dest = NULL;
+static const char *arg_dest_late = NULL;
+static bool arg_fstab_enabled = true;
+static char *arg_root_what = NULL;
+static char *arg_root_fstype = NULL;
+static char *arg_root_options = NULL;
+static char *arg_root_hash = NULL;
+static int arg_root_rw = -1;
+static char *arg_usr_what = NULL;
+static char *arg_usr_fstype = NULL;
+static char *arg_usr_options = NULL;
+static VolatileMode arg_volatile_mode = _VOLATILE_MODE_INVALID;
+
+STATIC_DESTRUCTOR_REGISTER(arg_root_what, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root_fstype, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root_options, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root_hash, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_usr_what, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_usr_fstype, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_usr_options, freep);
+
+static int write_options(FILE *f, const char *options) {
+ _cleanup_free_ char *o = NULL;
+
+ if (isempty(options))
+ return 0;
+
+ if (streq(options, "defaults"))
+ return 0;
+
+ o = specifier_escape(options);
+ if (!o)
+ return log_oom();
+
+ fprintf(f, "Options=%s\n", o);
+ return 1;
+}
+
+static int write_what(FILE *f, const char *what) {
+ _cleanup_free_ char *w = NULL;
+
+ w = specifier_escape(what);
+ if (!w)
+ return log_oom();
+
+ fprintf(f, "What=%s\n", w);
+ return 1;
+}
+
+static int add_swap(
+ const char *what,
+ struct mntent *me,
+ MountpointFlags flags) {
+
+ _cleanup_free_ char *name = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(what);
+ assert(me);
+
+ if (access("/proc/swaps", F_OK) < 0) {
+ log_info("Swap not supported, ignoring fstab swap entry for %s.", what);
+ return 0;
+ }
+
+ if (detect_container() > 0) {
+ log_info("Running in a container, ignoring fstab swap entry for %s.", what);
+ return 0;
+ }
+
+ r = unit_name_from_path(what, ".swap", &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ r = generator_open_unit_file(arg_dest, "/etc/fstab", name, &f);
+ if (r < 0)
+ return r;
+
+ fputs("# Automatically generated by systemd-fstab-generator\n\n"
+ "[Unit]\n"
+ "SourcePath=/etc/fstab\n"
+ "Documentation=man:fstab(5) man:systemd-fstab-generator(8)\n\n"
+ "[Swap]\n", f);
+
+ r = write_what(f, what);
+ if (r < 0)
+ return r;
+
+ r = write_options(f, me->mnt_opts);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", name);
+
+ /* use what as where, to have a nicer error message */
+ r = generator_write_timeouts(arg_dest, what, what, me->mnt_opts, NULL);
+ if (r < 0)
+ return r;
+
+ if (flags & MAKEFS) {
+ r = generator_hook_up_mkswap(arg_dest, what);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & GROWFS)
+ /* TODO: swap devices must be wiped and recreated */
+ log_warning("%s: growing swap devices is currently unsupported.", what);
+
+ if (!(flags & NOAUTO)) {
+ r = generator_add_symlink(arg_dest, SPECIAL_SWAP_TARGET,
+ (flags & NOFAIL) ? "wants" : "requires", name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static bool mount_is_network(struct mntent *me) {
+ assert(me);
+
+ return fstab_test_option(me->mnt_opts, "_netdev\0") ||
+ fstype_is_network(me->mnt_type);
+}
+
+static bool mount_in_initrd(struct mntent *me) {
+ assert(me);
+
+ return fstab_test_option(me->mnt_opts, "x-initrd.mount\0") ||
+ streq(me->mnt_dir, "/usr");
+}
+
+static int write_timeout(FILE *f, const char *where, const char *opts,
+ const char *filter, const char *variable) {
+ _cleanup_free_ char *timeout = NULL;
+ char timespan[FORMAT_TIMESPAN_MAX];
+ usec_t u;
+ int r;
+
+ r = fstab_filter_options(opts, filter, NULL, &timeout, NULL);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse options: %m");
+ if (r == 0)
+ return 0;
+
+ r = parse_sec_fix_0(timeout, &u);
+ if (r < 0) {
+ log_warning("Failed to parse timeout for %s, ignoring: %s", where, timeout);
+ return 0;
+ }
+
+ fprintf(f, "%s=%s\n", variable, format_timespan(timespan, sizeof(timespan), u, 0));
+
+ return 0;
+}
+
+static int write_idle_timeout(FILE *f, const char *where, const char *opts) {
+ return write_timeout(f, where, opts,
+ "x-systemd.idle-timeout\0", "TimeoutIdleSec");
+}
+
+static int write_mount_timeout(FILE *f, const char *where, const char *opts) {
+ return write_timeout(f, where, opts,
+ "x-systemd.mount-timeout\0", "TimeoutSec");
+}
+
+static int write_dependency(FILE *f, const char *opts,
+ const char *filter, const char *format) {
+ _cleanup_strv_free_ char **names = NULL, **units = NULL;
+ _cleanup_free_ char *res = NULL;
+ char **s;
+ int r;
+
+ assert(f);
+ assert(opts);
+
+ r = fstab_extract_values(opts, filter, &names);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse options: %m");
+ if (r == 0)
+ return 0;
+
+ STRV_FOREACH(s, names) {
+ char *x;
+
+ r = unit_name_mangle_with_suffix(*s, 0, ".mount", &x);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+ r = strv_consume(&units, x);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (units) {
+ res = strv_join(units, " ");
+ if (!res)
+ return log_oom();
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+ fprintf(f, format, res);
+#pragma GCC diagnostic pop
+ }
+
+ return 0;
+}
+
+static int write_after(FILE *f, const char *opts) {
+ return write_dependency(f, opts, "x-systemd.after", "After=%1$s\n");
+}
+
+static int write_requires_after(FILE *f, const char *opts) {
+ return write_dependency(f, opts,
+ "x-systemd.requires", "After=%1$s\nRequires=%1$s\n");
+}
+
+static int write_before(FILE *f, const char *opts) {
+ return write_dependency(f, opts,
+ "x-systemd.before", "Before=%1$s\n");
+}
+
+static int write_requires_mounts_for(FILE *f, const char *opts) {
+ _cleanup_strv_free_ char **paths = NULL, **paths_escaped = NULL;
+ _cleanup_free_ char *res = NULL;
+ int r;
+
+ assert(f);
+ assert(opts);
+
+ r = fstab_extract_values(opts, "x-systemd.requires-mounts-for", &paths);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse options: %m");
+ if (r == 0)
+ return 0;
+
+ r = specifier_escape_strv(paths, &paths_escaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to escape paths: %m");
+
+ res = strv_join(paths_escaped, " ");
+ if (!res)
+ return log_oom();
+
+ fprintf(f, "RequiresMountsFor=%s\n", res);
+
+ return 0;
+}
+
+static int add_mount(
+ const char *dest,
+ const char *what,
+ const char *where,
+ const char *original_where,
+ const char *fstype,
+ const char *opts,
+ int passno,
+ MountpointFlags flags,
+ const char *post,
+ const char *source) {
+
+ _cleanup_free_ char
+ *name = NULL,
+ *automount_name = NULL,
+ *filtered = NULL,
+ *where_escaped = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(what);
+ assert(where);
+ assert(opts);
+ assert(post);
+ assert(source);
+
+ if (streq_ptr(fstype, "autofs"))
+ return 0;
+
+ if (!is_path(where)) {
+ log_warning("Mount point %s is not a valid path, ignoring.", where);
+ return 0;
+ }
+
+ if (mount_point_is_api(where) ||
+ mount_point_ignore(where))
+ return 0;
+
+ if (path_equal(where, "/")) {
+ if (flags & NOAUTO)
+ log_warning("Ignoring \"noauto\" for root device");
+ if (flags & NOFAIL)
+ log_warning("Ignoring \"nofail\" for root device");
+ if (flags & AUTOMOUNT)
+ log_warning("Ignoring automount option for root device");
+
+ SET_FLAG(flags, NOAUTO | NOFAIL | AUTOMOUNT, false);
+ }
+
+ r = unit_name_from_path(where, ".mount", &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ r = generator_open_unit_file(dest, "/etc/fstab", name, &f);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "[Unit]\n"
+ "SourcePath=%s\n"
+ "Documentation=man:fstab(5) man:systemd-fstab-generator(8)\n",
+ source);
+
+ if (STRPTR_IN_SET(fstype, "nfs", "nfs4") && !(flags & AUTOMOUNT) &&
+ fstab_test_yes_no_option(opts, "bg\0" "fg\0")) {
+ /* The default retry timeout that mount.nfs uses for 'bg' mounts
+ * is 10000 minutes, where as it uses 2 minutes for 'fg' mounts.
+ * As we are making 'bg' mounts look like an 'fg' mount to
+ * mount.nfs (so systemd can manage the job-control aspects of 'bg'),
+ * we need to explicitly preserve that default, and also ensure
+ * the systemd mount-timeout doesn't interfere.
+ * By placing these options first, they can be over-ridden by
+ * settings in /etc/fstab. */
+ opts = strjoina("x-systemd.mount-timeout=infinity,retry=10000,", opts, ",fg");
+ SET_FLAG(flags, NOFAIL, true);
+ }
+
+ if (!(flags & NOFAIL) && !(flags & AUTOMOUNT))
+ fprintf(f, "Before=%s\n", post);
+
+ if (!(flags & AUTOMOUNT) && opts) {
+ r = write_after(f, opts);
+ if (r < 0)
+ return r;
+ r = write_requires_after(f, opts);
+ if (r < 0)
+ return r;
+ r = write_before(f, opts);
+ if (r < 0)
+ return r;
+ r = write_requires_mounts_for(f, opts);
+ if (r < 0)
+ return r;
+ }
+
+ if (passno != 0) {
+ r = generator_write_fsck_deps(f, dest, what, where, fstype);
+ if (r < 0)
+ return r;
+ }
+
+ fprintf(f, "\n[Mount]\n");
+ if (original_where)
+ fprintf(f, "# Canonicalized from %s\n", original_where);
+
+ where_escaped = specifier_escape(where);
+ if (!where_escaped)
+ return log_oom();
+ fprintf(f, "Where=%s\n", where_escaped);
+
+ r = write_what(f, what);
+ if (r < 0)
+ return r;
+
+ if (!isempty(fstype) && !streq(fstype, "auto")) {
+ _cleanup_free_ char *t;
+
+ t = specifier_escape(fstype);
+ if (!t)
+ return -ENOMEM;
+
+ fprintf(f, "Type=%s\n", t);
+ }
+
+ r = generator_write_timeouts(dest, what, where, opts, &filtered);
+ if (r < 0)
+ return r;
+
+ r = generator_write_device_deps(dest, what, where, opts);
+ if (r < 0)
+ return r;
+
+ r = write_mount_timeout(f, where, opts);
+ if (r < 0)
+ return r;
+
+ r = write_options(f, filtered);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", name);
+
+ if (flags & MAKEFS) {
+ r = generator_hook_up_mkfs(dest, what, where, fstype);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & GROWFS) {
+ r = generator_hook_up_growfs(dest, where, post);
+ if (r < 0)
+ return r;
+ }
+
+ if (!(flags & NOAUTO) && !(flags & AUTOMOUNT)) {
+ r = generator_add_symlink(dest, post,
+ (flags & NOFAIL) ? "wants" : "requires", name);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & AUTOMOUNT) {
+ r = unit_name_from_path(where, ".automount", &automount_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ fclose(f);
+
+ r = generator_open_unit_file(dest, "/etc/fstab", automount_name, &f);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "[Unit]\n"
+ "SourcePath=%s\n"
+ "Documentation=man:fstab(5) man:systemd-fstab-generator(8)\n",
+ source);
+
+ fprintf(f, "Before=%s\n", post);
+
+ if (opts) {
+ r = write_after(f, opts);
+ if (r < 0)
+ return r;
+ r = write_requires_after(f, opts);
+ if (r < 0)
+ return r;
+ r = write_before(f, opts);
+ if (r < 0)
+ return r;
+ r = write_requires_mounts_for(f, opts);
+ if (r < 0)
+ return r;
+ }
+
+ fprintf(f,
+ "\n"
+ "[Automount]\n"
+ "Where=%s\n",
+ where_escaped);
+
+ r = write_idle_timeout(f, where, opts);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", automount_name);
+
+ r = generator_add_symlink(dest, post,
+ (flags & NOFAIL) ? "wants" : "requires", automount_name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int parse_fstab(bool initrd) {
+ _cleanup_endmntent_ FILE *f = NULL;
+ const char *fstab_path;
+ struct mntent *me;
+ int r = 0;
+
+ fstab_path = initrd ? "/sysroot/etc/fstab" : "/etc/fstab";
+ f = setmntent(fstab_path, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open %s: %m", fstab_path);
+ }
+
+ while ((me = getmntent(f))) {
+ _cleanup_free_ char *where = NULL, *what = NULL, *canonical_where = NULL;
+ bool makefs, growfs, noauto, nofail;
+ int k;
+
+ if (initrd && !mount_in_initrd(me))
+ continue;
+
+ what = fstab_node_to_udev_node(me->mnt_fsname);
+ if (!what)
+ return log_oom();
+
+ if (is_device_path(what) && path_is_read_only_fs("/sys") > 0) {
+ log_info("Running in a container, ignoring fstab device entry for %s.", what);
+ continue;
+ }
+
+ where = strdup(me->mnt_dir);
+ if (!where)
+ return log_oom();
+
+ if (is_path(where)) {
+ path_simplify(where, false);
+
+ /* Follow symlinks here; see 5261ba901845c084de5a8fd06500ed09bfb0bd80 which makes sense for
+ * mount units, but causes problems since it historically worked to have symlinks in e.g.
+ * /etc/fstab. So we canonicalize here. Note that we use CHASE_NONEXISTENT to handle the case
+ * where a symlink refers to another mount target; this works assuming the sub-mountpoint
+ * target is the final directory. */
+ r = chase_symlinks(where, initrd ? "/sysroot" : NULL,
+ CHASE_PREFIX_ROOT | CHASE_NONEXISTENT,
+ &canonical_where);
+ if (r < 0) /* If we can't canonicalize we continue on as if it wasn't a symlink */
+ log_debug_errno(r, "Failed to read symlink target for %s, ignoring: %m", where);
+ else if (streq(canonical_where, where)) /* If it was fully canonicalized, suppress the change */
+ canonical_where = mfree(canonical_where);
+ else
+ log_debug("Canonicalized what=%s where=%s to %s", what, where, canonical_where);
+ }
+
+ makefs = fstab_test_option(me->mnt_opts, "x-systemd.makefs\0");
+ growfs = fstab_test_option(me->mnt_opts, "x-systemd.growfs\0");
+ noauto = fstab_test_yes_no_option(me->mnt_opts, "noauto\0" "auto\0");
+ nofail = fstab_test_yes_no_option(me->mnt_opts, "nofail\0" "fail\0");
+
+ log_debug("Found entry what=%s where=%s type=%s makefs=%s nofail=%s noauto=%s",
+ what, where, me->mnt_type,
+ yes_no(makefs),
+ yes_no(noauto), yes_no(nofail));
+
+ if (streq(me->mnt_type, "swap"))
+ k = add_swap(what, me,
+ makefs*MAKEFS | growfs*GROWFS | noauto*NOAUTO | nofail*NOFAIL);
+ else {
+ bool automount;
+ const char *post;
+
+ automount = fstab_test_option(me->mnt_opts,
+ "comment=systemd.automount\0"
+ "x-systemd.automount\0");
+ if (initrd)
+ post = SPECIAL_INITRD_FS_TARGET;
+ else if (mount_is_network(me))
+ post = SPECIAL_REMOTE_FS_TARGET;
+ else
+ post = SPECIAL_LOCAL_FS_TARGET;
+
+ k = add_mount(arg_dest,
+ what,
+ canonical_where ?: where,
+ canonical_where ? where: NULL,
+ me->mnt_type,
+ me->mnt_opts,
+ me->mnt_passno,
+ makefs*MAKEFS | growfs*GROWFS | noauto*NOAUTO | nofail*NOFAIL | automount*AUTOMOUNT,
+ post,
+ fstab_path);
+ }
+
+ if (r >= 0 && k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int add_sysroot_mount(void) {
+ _cleanup_free_ char *what = NULL;
+ const char *opts;
+ int r;
+
+ if (isempty(arg_root_what)) {
+ log_debug("Could not find a root= entry on the kernel command line.");
+ return 0;
+ }
+
+ if (streq(arg_root_what, "gpt-auto")) {
+ /* This is handled by the gpt-auto generator */
+ log_debug("Skipping root directory handling, as gpt-auto was requested.");
+ return 0;
+ }
+
+ if (path_equal(arg_root_what, "/dev/nfs")) {
+ /* This is handled by the kernel or the initrd */
+ log_debug("Skipping root directory handling, as /dev/nfs was requested.");
+ return 0;
+ }
+
+ what = fstab_node_to_udev_node(arg_root_what);
+ if (!what)
+ return log_oom();
+
+ if (!arg_root_options)
+ opts = arg_root_rw > 0 ? "rw" : "ro";
+ else if (arg_root_rw >= 0 ||
+ !fstab_test_option(arg_root_options, "ro\0" "rw\0"))
+ opts = strjoina(arg_root_options, ",", arg_root_rw > 0 ? "rw" : "ro");
+ else
+ opts = arg_root_options;
+
+ log_debug("Found entry what=%s where=/sysroot type=%s", what, strna(arg_root_fstype));
+
+ if (is_device_path(what)) {
+ r = generator_write_initrd_root_device_deps(arg_dest, what);
+ if (r < 0)
+ return r;
+ }
+
+ return add_mount(arg_dest,
+ what,
+ "/sysroot",
+ NULL,
+ arg_root_fstype,
+ opts,
+ is_device_path(what) ? 1 : 0, /* passno */
+ 0, /* makefs off, growfs off, noauto off, nofail off, automount off */
+ SPECIAL_INITRD_ROOT_FS_TARGET,
+ "/proc/cmdline");
+}
+
+static int add_sysroot_usr_mount(void) {
+ _cleanup_free_ char *what = NULL;
+ const char *opts;
+
+ if (!arg_usr_what && !arg_usr_fstype && !arg_usr_options)
+ return 0;
+
+ if (arg_root_what && !arg_usr_what) {
+ /* Copy over the root device, in case the /usr mount just differs in a mount option (consider btrfs subvolumes) */
+ arg_usr_what = strdup(arg_root_what);
+ if (!arg_usr_what)
+ return log_oom();
+ }
+
+ if (arg_root_fstype && !arg_usr_fstype) {
+ arg_usr_fstype = strdup(arg_root_fstype);
+ if (!arg_usr_fstype)
+ return log_oom();
+ }
+
+ if (arg_root_options && !arg_usr_options) {
+ arg_usr_options = strdup(arg_root_options);
+ if (!arg_usr_options)
+ return log_oom();
+ }
+
+ if (!arg_usr_what)
+ return 0;
+
+ what = fstab_node_to_udev_node(arg_usr_what);
+ if (!what)
+ return log_oom();
+
+ if (!arg_usr_options)
+ opts = arg_root_rw > 0 ? "rw" : "ro";
+ else if (!fstab_test_option(arg_usr_options, "ro\0" "rw\0"))
+ opts = strjoina(arg_usr_options, ",", arg_root_rw > 0 ? "rw" : "ro");
+ else
+ opts = arg_usr_options;
+
+ log_debug("Found entry what=%s where=/sysroot/usr type=%s", what, strna(arg_usr_fstype));
+ return add_mount(arg_dest,
+ what,
+ "/sysroot/usr",
+ NULL,
+ arg_usr_fstype,
+ opts,
+ is_device_path(what) ? 1 : 0, /* passno */
+ 0,
+ SPECIAL_INITRD_FS_TARGET,
+ "/proc/cmdline");
+}
+
+static int add_volatile_root(void) {
+ const char *from, *to;
+
+ if (arg_volatile_mode != VOLATILE_YES)
+ return 0;
+
+ /* Let's add in systemd-remount-volatile.service which will remount the root device to tmpfs if this is
+ * requested, leaving only /usr from the root mount inside. */
+
+ from = strjoina(SYSTEM_DATA_UNIT_PATH "/systemd-volatile-root.service");
+ to = strjoina(arg_dest, "/" SPECIAL_INITRD_ROOT_FS_TARGET, ".requires/systemd-volatile-root.service");
+
+ (void) mkdir_parents(to, 0755);
+
+ if (symlink(from, to) < 0)
+ return log_error_errno(errno, "Failed to hook in volatile remount service: %m");
+
+ return 0;
+}
+
+static int add_volatile_var(void) {
+
+ if (arg_volatile_mode != VOLATILE_STATE)
+ return 0;
+
+ /* If requested, mount /var as tmpfs, but do so only if there's nothing else defined for this. */
+
+ return add_mount(arg_dest_late,
+ "tmpfs",
+ "/var",
+ NULL,
+ "tmpfs",
+ "mode=0755",
+ 0,
+ 0,
+ SPECIAL_LOCAL_FS_TARGET,
+ "/proc/cmdline");
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ /* root=, usr=, usrfstype= and roofstype= may occur more than once, the last
+ * instance should take precedence. In the case of multiple rootflags=
+ * or usrflags= the arguments should be concatenated */
+
+ if (STR_IN_SET(key, "fstab", "rd.fstab")) {
+
+ r = value ? parse_boolean(value) : 1;
+ if (r < 0)
+ log_warning("Failed to parse fstab switch %s. Ignoring.", value);
+ else
+ arg_fstab_enabled = r;
+
+ } else if (streq(key, "root")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_root_what, value) < 0)
+ return log_oom();
+
+ } else if (streq(key, "rootfstype")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_root_fstype, value) < 0)
+ return log_oom();
+
+ } else if (streq(key, "rootflags")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (!strextend_with_separator(&arg_root_options, ",", value, NULL))
+ return log_oom();
+
+ } else if (streq(key, "roothash")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_root_hash, value) < 0)
+ return log_oom();
+
+ } else if (streq(key, "mount.usr")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_usr_what, value) < 0)
+ return log_oom();
+
+ } else if (streq(key, "mount.usrfstype")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_usr_fstype, value) < 0)
+ return log_oom();
+
+ } else if (streq(key, "mount.usrflags")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (!strextend_with_separator(&arg_usr_options, ",", value, NULL))
+ return log_oom();
+
+ } else if (streq(key, "rw") && !value)
+ arg_root_rw = true;
+ else if (streq(key, "ro") && !value)
+ arg_root_rw = false;
+ else if (streq(key, "systemd.volatile")) {
+ VolatileMode m;
+
+ if (value) {
+ m = volatile_mode_from_string(value);
+ if (m < 0)
+ log_warning("Failed to parse systemd.volatile= argument: %s", value);
+ else
+ arg_volatile_mode = m;
+ } else
+ arg_volatile_mode = VOLATILE_YES;
+ }
+
+ return 0;
+}
+
+static int determine_root(void) {
+ /* If we have a root hash but no root device then Verity is used, and we use the "root" DM device as root. */
+
+ if (arg_root_what)
+ return 0;
+
+ if (!arg_root_hash)
+ return 0;
+
+ arg_root_what = strdup("/dev/mapper/root");
+ if (!arg_root_what)
+ return log_oom();
+
+ log_info("Using verity root device %s.", arg_root_what);
+
+ return 1;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r;
+
+ assert_se(arg_dest = dest);
+ assert_se(arg_dest_late = dest_late);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ (void) determine_root();
+
+ /* Always honour root= and usr= in the kernel command line if we are in an initrd */
+ if (in_initrd()) {
+ int k;
+
+ r = add_sysroot_mount();
+
+ k = add_sysroot_usr_mount();
+ if (k < 0)
+ r = k;
+
+ k = add_volatile_root();
+ if (k < 0)
+ r = k;
+ } else
+ r = add_volatile_var();
+
+ /* Honour /etc/fstab only when that's enabled */
+ if (arg_fstab_enabled) {
+ int k;
+
+ log_debug("Parsing /etc/fstab");
+
+ /* Parse the local /etc/fstab, possibly from the initrd */
+ k = parse_fstab(false);
+ if (k < 0)
+ r = k;
+
+ /* If running in the initrd also parse the /etc/fstab from the host */
+ if (in_initrd()) {
+ log_debug("Parsing /sysroot/etc/fstab");
+
+ k = parse_fstab(true);
+ if (k < 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/fuzz/fuzz-bus-message.c b/src/fuzz/fuzz-bus-message.c
new file mode 100644
index 0000000..9842c62
--- /dev/null
+++ b/src/fuzz/fuzz-bus-message.c
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-message.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fuzz.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_free_ char *out = NULL; /* out should be freed after g */
+ size_t out_size;
+ _cleanup_fclose_ FILE *g = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ void *buffer = NULL;
+ int r;
+
+ /* We don't want to fill the logs with messages about parse errors.
+ * Disable most logging if not running standalone */
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ r = sd_bus_new(&bus);
+ assert_se(r >= 0);
+
+ assert_se(buffer = memdup(data, size));
+
+ r = bus_message_from_malloc(bus, buffer, size, NULL, 0, NULL, &m);
+ if (r == -EBADMSG)
+ return 0;
+ assert_se(r >= 0);
+ TAKE_PTR(buffer);
+
+ if (getenv_bool("SYSTEMD_FUZZ_OUTPUT") <= 0)
+ assert_se(g = open_memstream(&out, &out_size));
+
+ bus_message_dump(m, g ?: stdout, BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ r = sd_bus_message_rewind(m, true);
+ assert_se(r >= 0);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-catalog.c b/src/fuzz/fuzz-catalog.c
new file mode 100644
index 0000000..7ee9750
--- /dev/null
+++ b/src/fuzz/fuzz-catalog.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "catalog.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fuzz.h"
+#include "tmpfile-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/fuzz-catalog.XXXXXX";
+ _cleanup_close_ int fd = -1;
+ _cleanup_hashmap_free_free_free_ Hashmap *h = NULL;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(h = hashmap_new(&catalog_hash_ops));
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, data, size) == (ssize_t) size);
+
+ (void) catalog_import_file(h, name);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-compress.c b/src/fuzz/fuzz-compress.c
new file mode 100644
index 0000000..9c5dfc9
--- /dev/null
+++ b/src/fuzz/fuzz-compress.c
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "compress.h"
+#include "fuzz.h"
+
+static int compress(int alg,
+ const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size) {
+
+ if (alg == OBJECT_COMPRESSED_LZ4)
+ return compress_blob_lz4(src, src_size, dst, dst_alloc_size, dst_size);
+ if (alg == OBJECT_COMPRESSED_XZ)
+ return compress_blob_xz(src, src_size, dst, dst_alloc_size, dst_size);
+ return -EOPNOTSUPP;
+}
+
+typedef struct header {
+ uint32_t alg:2; /* We have only two compression algorithms so far, but we might add
+ * more in the future. Let's make this a bit wider so our fuzzer
+ * cases remain stable in the future. */
+ uint32_t sw_len;
+ uint32_t sw_alloc;
+ uint32_t reserved[3]; /* Extra space to keep fuzz cases stable in case we need to
+ * add stuff in the future. */
+ uint8_t data[];
+} header;
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_free_ void *buf = NULL, *buf2 = NULL;
+ int r;
+
+ if (size < offsetof(header, data) + 1)
+ return 0;
+
+ const header *h = (struct header*) data;
+ const size_t data_len = size - offsetof(header, data);
+
+ int alg = h->alg;
+
+ /* We don't want to fill the logs with messages about parse errors.
+ * Disable most logging if not running standalone */
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ log_info("Using compression %s, data size=%zu",
+ object_compressed_to_string(alg) ?: "(none)",
+ data_len);
+
+ buf = malloc(MAX(size, 128u)); /* Make the buffer a bit larger for very small data */
+ if (!buf) {
+ log_oom();
+ return 0;
+ }
+
+ size_t csize;
+ r = compress(alg, h->data, data_len, buf, size, &csize);
+ if (r < 0) {
+ log_error_errno(r, "Compression failed: %m");
+ return 0;
+ }
+
+ log_debug("Compressed %zu bytes to → %zu bytes", data_len, csize);
+
+ size_t sw_alloc = MAX(h->sw_alloc, 1u);
+ buf2 = malloc(sw_alloc);
+ if (!buf) {
+ log_oom();
+ return 0;
+ }
+
+ size_t sw_len = MIN(data_len - 1, h->sw_len);
+
+ r = decompress_startswith(alg, buf, csize, &buf2, &sw_alloc, h->data, sw_len, h->data[sw_len]);
+ assert_se(r > 0);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-dhcp-server.c b/src/fuzz/fuzz-dhcp-server.c
new file mode 100644
index 0000000..01fe350
--- /dev/null
+++ b/src/fuzz/fuzz-dhcp-server.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fuzz.h"
+
+#include "sd-dhcp-server.c"
+
+/* stub out network so that the server doesn't send */
+ssize_t sendto(int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *dest_addr, socklen_t addrlen) {
+ return len;
+}
+
+ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags) {
+ return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(sd_dhcp_server_unrefp) sd_dhcp_server *server = NULL;
+ struct in_addr address = {.s_addr = htobe32(UINT32_C(10) << 24 | UINT32_C(1))};
+ static const uint8_t chaddr[] = {3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3};
+ uint8_t *client_id;
+ DHCPLease *lease;
+ int pool_offset;
+
+ if (size < sizeof(DHCPMessage))
+ return 0;
+
+ assert_se(sd_dhcp_server_new(&server, 1) >= 0);
+ server->fd = open("/dev/null", O_RDWR|O_CLOEXEC|O_NOCTTY);
+ assert_se(server->fd >= 0);
+ assert_se(sd_dhcp_server_configure_pool(server, &address, 24, 0, 0) >= 0);
+
+ /* add a lease to the pool to expose additional code paths */
+ client_id = malloc(2);
+ assert_se(client_id);
+ client_id[0] = 2;
+ client_id[1] = 2;
+ lease = new0(DHCPLease, 1);
+ assert_se(lease);
+ lease->client_id.length = 2;
+ lease->client_id.data = client_id;
+ lease->address = htobe32(UINT32_C(10) << 24 | UINT32_C(2));
+ lease->gateway = htobe32(UINT32_C(10) << 24 | UINT32_C(1));
+ lease->expiration = UINT64_MAX;
+ memcpy(lease->chaddr, chaddr, 16);
+ pool_offset = get_pool_offset(server, lease->address);
+ server->bound_leases[pool_offset] = lease;
+ assert_se(hashmap_put(server->leases_by_client_id, &lease->client_id, lease) >= 0);
+
+ (void) dhcp_server_handle_message(server, (DHCPMessage*)data, size);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-dhcp-server.options b/src/fuzz/fuzz-dhcp-server.options
new file mode 100644
index 0000000..5c330e5
--- /dev/null
+++ b/src/fuzz/fuzz-dhcp-server.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 600
diff --git a/src/fuzz/fuzz-dhcp6-client.c b/src/fuzz/fuzz-dhcp6-client.c
new file mode 100644
index 0000000..c9bc2b3
--- /dev/null
+++ b/src/fuzz/fuzz-dhcp6-client.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "sd-dhcp6-client.h"
+#include "sd-event.h"
+
+#include "dhcp6-internal.h"
+#include "dhcp6-protocol.h"
+#include "fd-util.h"
+#include "fuzz.h"
+
+static int test_dhcp_fd[2] = { -1, -1 };
+
+int dhcp6_network_send_udp_socket(int s, struct in6_addr *server_address,
+ const void *packet, size_t len) {
+ return len;
+}
+
+int dhcp6_network_bind_udp_socket(int index, struct in6_addr *local_address) {
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_dhcp_fd) >= 0);
+ return test_dhcp_fd[0];
+}
+
+static void fuzz_client(const uint8_t *data, size_t size, bool is_information_request_enabled) {
+ _cleanup_(sd_event_unrefp) sd_event *e;
+ _cleanup_(sd_dhcp6_client_unrefp) sd_dhcp6_client *client = NULL;
+ struct in6_addr address = { { { 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01 } } };
+
+ assert_se(sd_event_new(&e) >= 0);
+ assert_se(sd_dhcp6_client_new(&client) >= 0);
+ assert_se(sd_dhcp6_client_attach_event(client, e, 0) >= 0);
+ assert_se(sd_dhcp6_client_set_ifindex(client, 42) == 0);
+ assert_se(sd_dhcp6_client_set_local_address(client, &address) >= 0);
+ assert_se(sd_dhcp6_client_set_information_request(client, is_information_request_enabled) == 0);
+
+ assert_se(sd_dhcp6_client_start(client) >= 0);
+
+ if (size >= sizeof(DHCP6Message))
+ assert_se(sd_dhcp6_client_set_transaction_id(client, htobe32(0x00ffffff) & ((const DHCP6Message *) data)->transaction_id) == 0);
+
+ assert_se(write(test_dhcp_fd[1], data, size) == (ssize_t) size);
+
+ sd_event_run(e, (uint64_t) -1);
+
+ assert_se(sd_dhcp6_client_stop(client) >= 0);
+
+ test_dhcp_fd[1] = safe_close(test_dhcp_fd[1]);
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ /* This triggers client_receive_advertise */
+ fuzz_client(data, size, false);
+
+ /* This triggers client_receive_reply */
+ fuzz_client(data, size, true);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-dns-packet.c b/src/fuzz/fuzz-dns-packet.c
new file mode 100644
index 0000000..c150c81
--- /dev/null
+++ b/src/fuzz/fuzz-dns-packet.c
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fuzz.h"
+#include "resolved-dns-packet.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+
+ if (size > DNS_PACKET_SIZE_MAX)
+ return 0;
+
+ assert_se(dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, DNS_PACKET_SIZE_MAX) >= 0);
+ p->size = 0; /* by default append starts after the header, undo that */
+ assert_se(dns_packet_append_blob(p, data, size, NULL) >= 0);
+ if (size < DNS_PACKET_HEADER_SIZE) {
+ /* make sure we pad the packet back up to the minimum header size */
+ assert_se(p->allocated >= DNS_PACKET_HEADER_SIZE);
+ memzero(DNS_PACKET_DATA(p) + size, DNS_PACKET_HEADER_SIZE - size);
+ p->size = DNS_PACKET_HEADER_SIZE;
+ }
+ (void) dns_packet_extract(p);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-dns-packet.options b/src/fuzz/fuzz-dns-packet.options
new file mode 100644
index 0000000..0824b19
--- /dev/null
+++ b/src/fuzz/fuzz-dns-packet.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65535
diff --git a/src/fuzz/fuzz-journal-remote.c b/src/fuzz/fuzz-journal-remote.c
new file mode 100644
index 0000000..3ab4eb0
--- /dev/null
+++ b/src/fuzz/fuzz-journal-remote.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fuzz.h"
+
+#include <sys/mman.h>
+
+#include "sd-journal.h"
+
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "journal-remote.h"
+#include "logs-show.h"
+#include "memfd-util.h"
+#include "strv.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_fclose_ FILE *dev_null = NULL;
+ RemoteServer s = {};
+ char name[] = "/tmp/fuzz-journal-remote.XXXXXX.journal";
+ void *mem;
+ int fdin; /* will be closed by journal_remote handler after EOF */
+ _cleanup_close_ int fdout = -1;
+ sd_journal *j;
+ OutputMode mode;
+ int r;
+
+ if (size <= 2)
+ return 0;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se((fdin = memfd_new_and_map("fuzz-journal-remote", size, &mem)) >= 0);
+ memcpy(mem, data, size);
+ assert_se(munmap(mem, size) == 0);
+
+ fdout = mkostemps(name, STRLEN(".journal"), O_CLOEXEC);
+ assert_se(fdout >= 0);
+
+ /* In */
+
+ assert_se(journal_remote_server_init(&s, name, JOURNAL_WRITE_SPLIT_NONE, false, false) >= 0);
+
+ assert_se(journal_remote_add_source(&s, fdin, (char*) "fuzz-data", false) > 0);
+
+ while (s.active) {
+ r = journal_remote_handle_raw_source(NULL, fdin, 0, &s);
+ assert_se(r >= 0);
+ }
+
+ journal_remote_server_destroy(&s);
+ assert_se(close(fdin) < 0 && errno == EBADF); /* Check that the fd is closed already */
+
+ /* Out */
+
+ r = sd_journal_open_files(&j, (const char**) STRV_MAKE(name), 0);
+ assert_se(r >= 0);
+
+ if (getenv_bool("SYSTEMD_FUZZ_OUTPUT") <= 0)
+ assert_se(dev_null = fopen("/dev/null", "we"));
+
+ for (mode = 0; mode < _OUTPUT_MODE_MAX; mode++) {
+ if (!dev_null)
+ log_info("/* %s */", output_mode_to_string(mode));
+ r = show_journal(dev_null ?: stdout, j, mode, 0, 0, -1, 0, NULL);
+ assert_se(r >= 0);
+
+ r = sd_journal_seek_head(j);
+ assert_se(r >= 0);
+ }
+
+ sd_journal_close(j);
+ unlink(name);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journal-remote.options b/src/fuzz/fuzz-journal-remote.options
new file mode 100644
index 0000000..678d526
--- /dev/null
+++ b/src/fuzz/fuzz-journal-remote.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65536
diff --git a/src/fuzz/fuzz-journald-audit.c b/src/fuzz/fuzz-journald-audit.c
new file mode 100644
index 0000000..3f3ce7e
--- /dev/null
+++ b/src/fuzz/fuzz-journald-audit.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fuzz.h"
+#include "fuzz-journald.h"
+#include "journald-audit.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ Server s;
+
+ dummy_server_init(&s, data, size);
+ process_audit_string(&s, 0, s.buffer, size);
+ server_done(&s);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journald-kmsg.c b/src/fuzz/fuzz-journald-kmsg.c
new file mode 100644
index 0000000..f7426c8
--- /dev/null
+++ b/src/fuzz/fuzz-journald-kmsg.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fuzz.h"
+#include "fuzz-journald.h"
+#include "journald-kmsg.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ Server s;
+
+ if (size == 0)
+ return 0;
+
+ dummy_server_init(&s, data, size);
+ dev_kmsg_record(&s, s.buffer, size);
+ server_done(&s);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journald-native-fd.c b/src/fuzz/fuzz-journald-native-fd.c
new file mode 100644
index 0000000..8e3e850
--- /dev/null
+++ b/src/fuzz/fuzz-journald-native-fd.c
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fuzz-journald.h"
+#include "fuzz.h"
+#include "journald-native.h"
+#include "memfd-util.h"
+#include "process-util.h"
+#include "tmpfile-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ Server s;
+ _cleanup_close_ int sealed_fd = -1, unsealed_fd = -1;
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/fuzz-journald-native-fd.XXXXXX";
+ char *label = NULL;
+ size_t label_len = 0;
+ struct ucred ucred;
+ struct timeval *tv = NULL;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ dummy_server_init(&s, NULL, 0);
+
+ sealed_fd = memfd_new(NULL);
+ assert_se(sealed_fd >= 0);
+ assert_se(write(sealed_fd, data, size) == (ssize_t) size);
+ assert_se(memfd_set_sealed(sealed_fd) >= 0);
+ assert_se(lseek(sealed_fd, 0, SEEK_SET) == 0);
+ ucred = (struct ucred) {
+ .pid = getpid_cached(),
+ .uid = geteuid(),
+ .gid = getegid(),
+ };
+ server_process_native_file(&s, sealed_fd, &ucred, tv, label, label_len);
+
+ unsealed_fd = mkostemp_safe(name);
+ assert_se(unsealed_fd >= 0);
+ assert_se(write(unsealed_fd, data, size) == (ssize_t) size);
+ assert_se(lseek(unsealed_fd, 0, SEEK_SET) == 0);
+ server_process_native_file(&s, unsealed_fd, &ucred, tv, label, label_len);
+
+ server_done(&s);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journald-native.c b/src/fuzz/fuzz-journald-native.c
new file mode 100644
index 0000000..f4de5fd
--- /dev/null
+++ b/src/fuzz/fuzz-journald-native.c
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fuzz.h"
+#include "fuzz-journald.h"
+#include "journald-native.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ fuzz_journald_processing_function(data, size, server_process_native_message);
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journald-stream.c b/src/fuzz/fuzz-journald-stream.c
new file mode 100644
index 0000000..5d6c8eb
--- /dev/null
+++ b/src/fuzz/fuzz-journald-stream.c
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+
+#include "fd-util.h"
+#include "fuzz.h"
+#include "fuzz-journald.h"
+#include "journald-stream.h"
+
+static int stream_fds[2] = { -1, -1 };
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ Server s;
+ StdoutStream *stream;
+ int v;
+
+ if (size == 0)
+ return 0;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0, stream_fds) >= 0);
+ dummy_server_init(&s, NULL, 0);
+ assert_se(stdout_stream_install(&s, stream_fds[0], &stream) >= 0);
+ assert_se(write(stream_fds[1], data, size) == (ssize_t) size);
+ while (ioctl(stream_fds[0], SIOCINQ, &v) == 0 && v)
+ sd_event_run(s.event, (uint64_t) -1);
+ if (s.n_stdout_streams)
+ stdout_stream_destroy(stream);
+ server_done(&s);
+ stream_fds[1] = safe_close(stream_fds[1]);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journald-syslog.c b/src/fuzz/fuzz-journald-syslog.c
new file mode 100644
index 0000000..100f0ce
--- /dev/null
+++ b/src/fuzz/fuzz-journald-syslog.c
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fuzz.h"
+#include "fuzz-journald.h"
+#include "journald-syslog.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ fuzz_journald_processing_function(data, size, server_process_syslog_message);
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journald.c b/src/fuzz/fuzz-journald.c
new file mode 100644
index 0000000..950e885
--- /dev/null
+++ b/src/fuzz/fuzz-journald.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "fuzz-journald.h"
+#include "journald-server.h"
+#include "sd-event.h"
+
+void dummy_server_init(Server *s, const uint8_t *buffer, size_t size) {
+ *s = (Server) {
+ .syslog_fd = -1,
+ .native_fd = -1,
+ .stdout_fd = -1,
+ .dev_kmsg_fd = -1,
+ .audit_fd = -1,
+ .hostname_fd = -1,
+ .notify_fd = -1,
+ .storage = STORAGE_NONE,
+ .line_max = 64,
+ };
+ assert_se(sd_event_default(&s->event) >= 0);
+
+ if (buffer) {
+ s->buffer = memdup_suffix0(buffer, size);
+ assert_se(s->buffer);
+ s->buffer_size = size + 1;
+ }
+}
+
+void fuzz_journald_processing_function(
+ const uint8_t *data,
+ size_t size,
+ void (*f)(Server *s, const char *buf, size_t raw_len, const struct ucred *ucred, const struct timeval *tv, const char *label, size_t label_len)
+ ) {
+ Server s;
+ char *label = NULL;
+ size_t label_len = 0;
+ struct ucred *ucred = NULL;
+ struct timeval *tv = NULL;
+
+ if (size == 0)
+ return;
+
+ dummy_server_init(&s, data, size);
+ (*f)(&s, s.buffer, size, ucred, tv, label, label_len);
+ server_done(&s);
+}
diff --git a/src/fuzz/fuzz-journald.h b/src/fuzz/fuzz-journald.h
new file mode 100644
index 0000000..77e3b0c
--- /dev/null
+++ b/src/fuzz/fuzz-journald.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "journald-server.h"
+
+void dummy_server_init(Server *s, const uint8_t *buffer, size_t size);
+
+void fuzz_journald_processing_function(
+ const uint8_t *data,
+ size_t size,
+ void (*f)(Server *s, const char *buf, size_t raw_len, const struct ucred *ucred, const struct timeval *tv, const char *label, size_t label_len)
+);
diff --git a/src/fuzz/fuzz-json.c b/src/fuzz/fuzz-json.c
new file mode 100644
index 0000000..3aa9d08
--- /dev/null
+++ b/src/fuzz/fuzz-json.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fuzz.h"
+#include "json.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_free_ char *out = NULL; /* out should be freed after g */
+ size_t out_size;
+ _cleanup_fclose_ FILE *f = NULL, *g = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ if (size == 0)
+ return 0;
+
+ f = fmemopen((char*) data, size, "re");
+ assert_se(f);
+
+ if (json_parse_file(f, NULL, &v, NULL, NULL) < 0)
+ return 0;
+
+ g = open_memstream(&out, &out_size);
+ assert_se(g);
+
+ json_variant_dump(v, 0, g, NULL);
+ json_variant_dump(v, JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR|JSON_FORMAT_SOURCE, g, NULL);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-lldp.c b/src/fuzz/fuzz-lldp.c
new file mode 100644
index 0000000..b9291d4
--- /dev/null
+++ b/src/fuzz/fuzz-lldp.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+#include "sd-lldp.h"
+
+#include "fd-util.h"
+#include "fuzz.h"
+#include "lldp-network.h"
+
+static int test_fd[2] = { -1, -1 };
+
+int lldp_network_bind_raw_socket(int ifindex) {
+ if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) < 0)
+ return -errno;
+
+ return test_fd[0];
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(sd_lldp_unrefp) sd_lldp *lldp = NULL;
+
+ assert_se(sd_event_new(&e) == 0);
+ assert_se(sd_lldp_new(&lldp) >= 0);
+ assert_se(sd_lldp_set_ifindex(lldp, 42) >= 0);
+ assert_se(sd_lldp_attach_event(lldp, e, 0) >= 0);
+ assert_se(sd_lldp_start(lldp) >= 0);
+
+ assert_se(write(test_fd[1], data, size) == (ssize_t) size);
+ assert_se(sd_event_run(e, 0) >= 0);
+
+ assert_se(sd_lldp_stop(lldp) >= 0);
+ assert_se(sd_lldp_detach_event(lldp) >= 0);
+ test_fd[1] = safe_close(test_fd[1]);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-main.c b/src/fuzz/fuzz-main.c
new file mode 100644
index 0000000..d5c9984
--- /dev/null
+++ b/src/fuzz/fuzz-main.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "log.h"
+#include "fileio.h"
+#include "fuzz.h"
+#include "tests.h"
+
+/* This is a test driver for the systemd fuzzers that provides main function
+ * for regression testing outside of oss-fuzz (https://github.com/google/oss-fuzz)
+ *
+ * It reads files named on the command line and passes them one by one into the
+ * fuzzer that it is compiled into. */
+
+/* This one was borrowed from
+ * https://github.com/google/oss-fuzz/blob/646fca1b506b056db3a60d32c4a1a7398f171c94/infra/base-images/base-runner/bad_build_check#L19
+ */
+#define MIN_NUMBER_OF_RUNS 4
+
+int main(int argc, char **argv) {
+ int i, r;
+ size_t size;
+ char *name;
+
+ test_setup_logging(LOG_DEBUG);
+
+ for (i = 1; i < argc; i++) {
+ _cleanup_free_ char *buf = NULL;
+
+ name = argv[i];
+ r = read_full_file(name, &buf, &size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open '%s': %m", name);
+ return EXIT_FAILURE;
+ }
+ printf("%s... ", name);
+ fflush(stdout);
+ for (int j = 0; j < MIN_NUMBER_OF_RUNS; j++)
+ if (LLVMFuzzerTestOneInput((uint8_t*)buf, size) == EXIT_TEST_SKIP)
+ return EXIT_TEST_SKIP;
+ printf("ok\n");
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/fuzz/fuzz-ndisc-rs.c b/src/fuzz/fuzz-ndisc-rs.c
new file mode 100644
index 0000000..3a1e60f
--- /dev/null
+++ b/src/fuzz/fuzz-ndisc-rs.c
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <netinet/icmp6.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "icmp6-util.h"
+#include "fuzz.h"
+#include "sd-ndisc.h"
+#include "socket-util.h"
+#include "ndisc-internal.h"
+
+static int test_fd[2] = { -1, -1 };
+
+int icmp6_bind_router_solicitation(int index) {
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) >= 0);
+ return test_fd[0];
+}
+
+int icmp6_bind_router_advertisement(int index) {
+ return -ENOSYS;
+}
+
+int icmp6_receive(int fd, void *iov_base, size_t iov_len,
+ struct in6_addr *dst, triple_timestamp *timestamp) {
+ assert_se(read(fd, iov_base, iov_len) == (ssize_t) iov_len);
+
+ if (timestamp)
+ triple_timestamp_get(timestamp);
+
+ return 0;
+}
+
+int icmp6_send_router_solicitation(int s, const struct ether_addr *ether_addr) {
+ return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ struct ether_addr mac_addr = {
+ .ether_addr_octet = {'A', 'B', 'C', '1', '2', '3'}
+ };
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(sd_ndisc_unrefp) sd_ndisc *nd = NULL;
+
+ assert_se(sd_event_new(&e) >= 0);
+ assert_se(sd_ndisc_new(&nd) >= 0);
+ assert_se(sd_ndisc_attach_event(nd, e, 0) >= 0);
+ assert_se(sd_ndisc_set_ifindex(nd, 42) >= 0);
+ assert_se(sd_ndisc_set_mac(nd, &mac_addr) >= 0);
+ assert_se(sd_ndisc_start(nd) >= 0);
+ assert_se(write(test_fd[1], data, size) == (ssize_t) size);
+ (void) sd_event_run(e, (uint64_t) -1);
+ assert_se(sd_ndisc_stop(nd) >= 0);
+ close(test_fd[1]);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-udev-rules.c b/src/fuzz/fuzz-udev-rules.c
new file mode 100644
index 0000000..e894fa8
--- /dev/null
+++ b/src/fuzz/fuzz-udev-rules.c
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fuzz.h"
+#include "log.h"
+#include "mkdir.h"
+#include "missing.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tests.h"
+#include "udev.h"
+
+static struct fakefs {
+ const char *target;
+ bool ignore_mount_error;
+ bool is_mounted;
+} fakefss[] = {
+ { "/sys", false, false },
+ { "/dev", false, false },
+ { "/run", false, false },
+ { "/etc", false, false },
+ { UDEVLIBEXECDIR "/rules.d", true, false },
+};
+
+static int setup_mount_namespace(void) {
+ static thread_local bool is_namespaced = false;
+
+ if (is_namespaced)
+ return 1;
+
+ if (unshare(CLONE_NEWNS) < 0)
+ return log_error_errno(errno, "Failed to call unshare(): %m");
+
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
+ return log_error_errno(errno, "Failed to mount / as private: %m");
+
+ is_namespaced = true;
+
+ return 1;
+}
+
+static int setup_fake_filesystems(const char *runtime_dir) {
+ for (unsigned i = 0; i < ELEMENTSOF(fakefss); i++) {
+ if (mount(runtime_dir, fakefss[i].target, NULL, MS_BIND, NULL) < 0) {
+ log_full_errno(fakefss[i].ignore_mount_error ? LOG_DEBUG : LOG_ERR, errno, "Failed to mount %s: %m", fakefss[i].target);
+ if (!fakefss[i].ignore_mount_error)
+ return -errno;
+ } else
+ fakefss[i].is_mounted = true;
+ }
+
+ return 0;
+}
+
+static int cleanup_fake_filesystems(const char *runtime_dir) {
+ for (unsigned i = 0; i < ELEMENTSOF(fakefss); i++) {
+ if (!fakefss[i].is_mounted)
+ continue;
+
+ if (umount(fakefss[i].target) < 0) {
+ log_full_errno(fakefss[i].ignore_mount_error ? LOG_DEBUG : LOG_ERR, errno, "Failed to umount %s: %m", fakefss[i].target);
+ if (!fakefss[i].ignore_mount_error)
+ return -errno;
+ } else
+ fakefss[i].is_mounted = false;
+ }
+ return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(udev_rules_freep) UdevRules *rules = NULL;
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ FILE *f = NULL;
+
+ (void) setup_mount_namespace();
+
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ if (setup_fake_filesystems(runtime_dir) < 0) {
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ return EXIT_TEST_SKIP;
+#endif
+ }
+
+ if (!getenv("SYSTEMD_LOG_LEVEL")) {
+ log_set_max_level_realm(LOG_REALM_UDEV, LOG_CRIT);
+ log_set_max_level_realm(LOG_REALM_SYSTEMD, LOG_CRIT);
+ }
+
+ assert_se(mkdir_p("/etc/udev/rules.d", 0755) >= 0);
+ f = fopen("/etc/udev/rules.d/fuzz.rules", "we");
+ assert_se(f);
+ if (size != 0)
+ assert_se(fwrite(data, size, 1, f) == 1);
+ assert_se(fclose(f) == 0);
+
+ assert_se(udev_rules_new(&rules, RESOLVE_NAME_EARLY) == 0);
+
+ assert_se(cleanup_fake_filesystems(runtime_dir) >= 0);
+ return 0;
+}
diff --git a/src/fuzz/fuzz-unit-file.c b/src/fuzz/fuzz-unit-file.c
new file mode 100644
index 0000000..93de501
--- /dev/null
+++ b/src/fuzz/fuzz-unit-file.c
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "conf-parser.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fuzz.h"
+#include "install.h"
+#include "load-fragment.h"
+#include "string-util.h"
+#include "unit.h"
+#include "utf8.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_free_ char *out = NULL; /* out should be freed after g */
+ size_t out_size;
+ _cleanup_fclose_ FILE *f = NULL, *g = NULL;
+ _cleanup_free_ char *p = NULL;
+ UnitType t;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *u;
+ const char *name;
+ long offset;
+
+ if (size == 0)
+ return 0;
+
+ f = fmemopen((char*) data, size, "re");
+ assert_se(f);
+
+ if (read_line(f, LINE_MAX, &p) < 0)
+ return 0;
+
+ t = unit_type_from_string(p);
+ if (t < 0)
+ return 0;
+
+ if (!unit_vtable[t]->load)
+ return 0;
+
+ offset = ftell(f);
+ assert_se(offset >= 0);
+
+ for (;;) {
+ _cleanup_free_ char *l = NULL;
+ const char *ll;
+
+ if (read_line(f, LONG_LINE_MAX, &l) <= 0)
+ break;
+
+ ll = startswith(l, UTF8_BYTE_ORDER_MARK) ?: l;
+ ll = ll + strspn(ll, WHITESPACE);
+
+ if (HAS_FEATURE_MEMORY_SANITIZER && startswith(ll, "ListenNetlink")) {
+ /* ListenNetlink causes a false positive in msan,
+ * let's skip this for now. */
+ log_notice("Skipping test because ListenNetlink= is present");
+ return 0;
+ }
+ }
+
+ assert_se(fseek(f, offset, SEEK_SET) == 0);
+
+ /* We don't want to fill the logs with messages about parse errors.
+ * Disable most logging if not running standalone */
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(manager_new(UNIT_FILE_SYSTEM, MANAGER_TEST_RUN_MINIMAL, &m) >= 0);
+
+ name = strjoina("a.", unit_type_to_string(t));
+ assert_se(unit_new_for_name(m, unit_vtable[t]->object_size, name, &u) >= 0);
+
+ (void) config_parse(name, name, f,
+ UNIT_VTABLE(u)->sections,
+ config_item_perf_lookup, load_fragment_gperf_lookup,
+ CONFIG_PARSE_ALLOW_INCLUDE, u);
+
+ g = open_memstream(&out, &out_size);
+ assert_se(g);
+
+ unit_dump(u, g, "");
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz.h b/src/fuzz/fuzz.h
new file mode 100644
index 0000000..1e56526
--- /dev/null
+++ b/src/fuzz/fuzz.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+/* The entry point into the fuzzer */
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
diff --git a/src/fuzz/meson.build b/src/fuzz/meson.build
new file mode 100644
index 0000000..f628001
--- /dev/null
+++ b/src/fuzz/meson.build
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+fuzzers += [
+ [['src/fuzz/fuzz-bus-message.c'],
+ [libshared],
+ []],
+
+ [['src/fuzz/fuzz-catalog.c'],
+ [libjournal_core,
+ libshared],
+ []],
+
+ [['src/fuzz/fuzz-dns-packet.c',
+ dns_type_headers],
+ [libsystemd_resolve_core,
+ libshared],
+ [libgcrypt,
+ libgpg_error,
+ libm]],
+
+ [['src/fuzz/fuzz-dhcp6-client.c',
+ 'src/libsystemd-network/dhcp-identifier.h',
+ 'src/libsystemd-network/dhcp-identifier.c',
+ 'src/libsystemd-network/dhcp6-internal.h',
+ 'src/systemd/sd-dhcp6-client.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/fuzz/fuzz-dhcp-server.c'],
+ [libsystemd_network,
+ libshared],
+ []],
+
+ [['src/fuzz/fuzz-lldp.c'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/fuzz/fuzz-ndisc-rs.c',
+ 'src/libsystemd-network/dhcp-identifier.h',
+ 'src/libsystemd-network/dhcp-identifier.c',
+ 'src/libsystemd-network/icmp6-util.h',
+ 'src/systemd/sd-dhcp6-client.h',
+ 'src/systemd/sd-ndisc.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/fuzz/fuzz-json.c'],
+ [libshared],
+ []],
+
+ [['src/fuzz/fuzz-unit-file.c'],
+ [libcore,
+ libshared],
+ [libmount]],
+
+ [['src/fuzz/fuzz-journald-audit.c',
+ 'src/fuzz/fuzz-journald.c'],
+ [libjournal_core,
+ libshared],
+ [libselinux]],
+
+ [['src/fuzz/fuzz-journald-kmsg.c',
+ 'src/fuzz/fuzz-journald.c'],
+ [libjournal_core,
+ libshared],
+ [libselinux]],
+
+ [['src/fuzz/fuzz-journald-native.c',
+ 'src/fuzz/fuzz-journald.c'],
+ [libjournal_core,
+ libshared],
+ [libselinux]],
+
+ [['src/fuzz/fuzz-journald-native-fd.c',
+ 'src/fuzz/fuzz-journald.c'],
+ [libjournal_core,
+ libshared],
+ [libselinux]],
+
+ [['src/fuzz/fuzz-journald-stream.c',
+ 'src/fuzz/fuzz-journald.c'],
+ [libjournal_core,
+ libshared],
+ [libselinux]],
+
+ [['src/fuzz/fuzz-journald-syslog.c',
+ 'src/fuzz/fuzz-journald.c'],
+ [libjournal_core,
+ libshared],
+ [libselinux]],
+
+ [['src/fuzz/fuzz-journal-remote.c'],
+ [libsystemd_journal_remote,
+ libshared],
+ []],
+
+ [['src/fuzz/fuzz-udev-rules.c'],
+ [libudev_core,
+ libudev_static,
+ libsystemd_network,
+ libshared],
+ [threads,
+ libacl]],
+
+ [['src/fuzz/fuzz-compress.c'],
+ [libshared],
+ []],
+]
diff --git a/src/getty-generator/getty-generator.c b/src/getty-generator/getty-generator.c
new file mode 100644
index 0000000..35501b6
--- /dev/null
+++ b/src/getty-generator/getty-generator.c
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "generator.h"
+#include "log.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+#include "util.h"
+#include "virt.h"
+
+static const char *arg_dest = NULL;
+
+static int add_symlink(const char *fservice, const char *tservice) {
+ char *from, *to;
+ int r;
+
+ assert(fservice);
+ assert(tservice);
+
+ from = strjoina(SYSTEM_DATA_UNIT_PATH "/", fservice);
+ to = strjoina(arg_dest, "/getty.target.wants/", tservice);
+
+ mkdir_parents_label(to, 0755);
+
+ r = symlink(from, to);
+ if (r < 0) {
+ /* In case console=hvc0 is passed this will very likely result in EEXIST */
+ if (errno == EEXIST)
+ return 0;
+
+ return log_error_errno(errno, "Failed to create symlink %s: %m", to);
+ }
+
+ return 0;
+}
+
+static int add_serial_getty(const char *tty) {
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ assert(tty);
+
+ log_debug("Automatically adding serial getty for /dev/%s.", tty);
+
+ r = unit_name_from_path_instance("serial-getty", tty, ".service", &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate service name: %m");
+
+ return add_symlink("serial-getty@.service", n);
+}
+
+static int add_container_getty(const char *tty) {
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ assert(tty);
+
+ log_debug("Automatically adding container getty for /dev/pts/%s.", tty);
+
+ r = unit_name_from_path_instance("container-getty", tty, ".service", &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate service name: %m");
+
+ return add_symlink("container-getty@.service", n);
+}
+
+static int verify_tty(const char *name) {
+ _cleanup_close_ int fd = -1;
+ const char *p;
+
+ /* Some TTYs are weird and have been enumerated but don't work
+ * when you try to use them, such as classic ttyS0 and
+ * friends. Let's check that and open the device and run
+ * isatty() on it. */
+
+ p = strjoina("/dev/", name);
+
+ /* O_NONBLOCK is essential here, to make sure we don't wait
+ * for DCD */
+ fd = open(p, O_RDWR|O_NONBLOCK|O_NOCTTY|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ errno = 0;
+ if (isatty(fd) <= 0)
+ return errno > 0 ? -errno : -EIO;
+
+ return 0;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ _cleanup_free_ char *active = NULL;
+ const char *j;
+ int r;
+
+ assert_se(arg_dest = dest);
+
+ if (detect_container() > 0) {
+ _cleanup_free_ char *container_ttys = NULL;
+
+ log_debug("Automatically adding console shell.");
+
+ r = add_symlink("console-getty.service", "console-getty.service");
+ if (r < 0)
+ return r;
+
+ /* When $container_ttys is set for PID 1, spawn
+ * gettys on all ptys named therein. Note that despite
+ * the variable name we only support ptys here. */
+
+ r = getenv_for_pid(1, "container_ttys", &container_ttys);
+ if (r > 0) {
+ const char *word, *state;
+ size_t l;
+
+ FOREACH_WORD(word, l, container_ttys, state) {
+ const char *t;
+ char tty[l + 1];
+
+ memcpy(tty, word, l);
+ tty[l] = 0;
+
+ /* First strip off /dev/ if it is specified */
+ t = path_startswith(tty, "/dev/");
+ if (!t)
+ t = tty;
+
+ /* Then, make sure it's actually a pty */
+ t = path_startswith(t, "pts/");
+ if (!t)
+ continue;
+
+ r = add_container_getty(t);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Don't add any further magic if we are in a container */
+ return 0;
+ }
+
+ if (read_one_line_file("/sys/class/tty/console/active", &active) >= 0) {
+ const char *word, *state;
+ size_t l;
+
+ /* Automatically add in a serial getty on all active
+ * kernel consoles */
+ FOREACH_WORD(word, l, active, state) {
+ _cleanup_free_ char *tty = NULL;
+
+ tty = strndup(word, l);
+ if (!tty)
+ return log_oom();
+
+ /* We assume that gettys on virtual terminals are
+ * started via manual configuration and do this magic
+ * only for non-VC terminals. */
+
+ if (isempty(tty) || tty_is_vc(tty))
+ continue;
+
+ if (verify_tty(tty) < 0)
+ continue;
+
+ r = add_serial_getty(tty);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Automatically add in a serial getty on the first
+ * virtualizer console */
+ FOREACH_STRING(j,
+ "hvc0",
+ "xvc0",
+ "hvsi0",
+ "sclp_line0",
+ "ttysclp0",
+ "3270!tty1") {
+ const char *p;
+
+ p = strjoina("/sys/class/tty/", j);
+ if (access(p, F_OK) < 0)
+ continue;
+
+ r = add_serial_getty(j);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/gpt-auto-generator/gpt-auto-generator.c b/src/gpt-auto-generator/gpt-auto-generator.c
new file mode 100644
index 0000000..d9e29c4
--- /dev/null
+++ b/src/gpt-auto-generator/gpt-auto-generator.c
@@ -0,0 +1,746 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <blkid.h>
+#include <stdlib.h>
+#include <sys/statfs.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "blkid-util.h"
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "dissect-image.h"
+#include "efivars.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "gpt.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "specifier.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+#include "virt.h"
+
+static const char *arg_dest = NULL;
+static bool arg_enabled = true;
+static bool arg_root_enabled = true;
+static int arg_root_rw = -1;
+
+static int add_cryptsetup(const char *id, const char *what, bool rw, bool require, char **device) {
+ _cleanup_free_ char *e = NULL, *n = NULL, *d = NULL, *id_escaped = NULL, *what_escaped = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+ int r;
+
+ assert(id);
+ assert(what);
+
+ r = unit_name_from_path(what, ".device", &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ e = unit_name_escape(id);
+ if (!e)
+ return log_oom();
+
+ r = unit_name_build("systemd-cryptsetup", e, ".service", &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ id_escaped = specifier_escape(id);
+ if (!id_escaped)
+ return log_oom();
+
+ what_escaped = specifier_escape(what);
+ if (!what_escaped)
+ return log_oom();
+
+ p = strjoina(arg_dest, "/", n);
+ f = fopen(p, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", p);
+
+ fprintf(f,
+ "# Automatically generated by systemd-gpt-auto-generator\n\n"
+ "[Unit]\n"
+ "Description=Cryptography Setup for %%I\n"
+ "Documentation=man:systemd-gpt-auto-generator(8) man:systemd-cryptsetup@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "Conflicts=umount.target\n"
+ "BindsTo=dev-mapper-%%i.device %s\n"
+ "Before=umount.target cryptsetup.target\n"
+ "After=%s\n"
+ "IgnoreOnIsolate=true\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "TimeoutSec=0\n" /* the binary handles timeouts anyway */
+ "KeyringMode=shared\n" /* make sure we can share cached keys among instances */
+ "ExecStart=" SYSTEMD_CRYPTSETUP_PATH " attach '%s' '%s' '' '%s'\n"
+ "ExecStop=" SYSTEMD_CRYPTSETUP_PATH " detach '%s'\n",
+ d, d,
+ id_escaped, what_escaped, rw ? "" : "read-only",
+ id_escaped);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write file %s: %m", p);
+
+ r = generator_add_symlink(arg_dest, d, "wants", n);
+ if (r < 0)
+ return r;
+
+ if (require) {
+ const char *dmname;
+
+ r = generator_add_symlink(arg_dest, "cryptsetup.target", "requires", n);
+ if (r < 0)
+ return r;
+
+ dmname = strjoina("dev-mapper-", e, ".device");
+ r = generator_add_symlink(arg_dest, dmname, "requires", n);
+ if (r < 0)
+ return r;
+ }
+
+ p = strjoina(arg_dest, "/dev-mapper-", e, ".device.d/50-job-timeout-sec-0.conf");
+ mkdir_parents_label(p, 0755);
+ r = write_string_file(p,
+ "# Automatically generated by systemd-gpt-auto-generator\n\n"
+ "[Unit]\n"
+ "JobTimeoutSec=0\n",
+ WRITE_STRING_FILE_CREATE); /* the binary handles timeouts anyway */
+ if (r < 0)
+ return log_error_errno(r, "Failed to write device drop-in: %m");
+
+ if (device) {
+ char *ret;
+
+ ret = strappend("/dev/mapper/", id);
+ if (!ret)
+ return log_oom();
+
+ *device = ret;
+ }
+
+ return 0;
+}
+
+static int add_mount(
+ const char *id,
+ const char *what,
+ const char *where,
+ const char *fstype,
+ bool rw,
+ const char *options,
+ const char *description,
+ const char *post) {
+
+ _cleanup_free_ char *unit = NULL, *crypto_what = NULL, *p = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ /* Note that we don't apply specifier escaping on the input strings here, since we know they are not configured
+ * externally, but all originate from our own sources here, and hence we know they contain no % characters that
+ * could potentially be understood as specifiers. */
+
+ assert(id);
+ assert(what);
+ assert(where);
+ assert(description);
+
+ log_debug("Adding %s: %s %s", where, what, strna(fstype));
+
+ if (streq_ptr(fstype, "crypto_LUKS")) {
+
+ r = add_cryptsetup(id, what, rw, true, &crypto_what);
+ if (r < 0)
+ return r;
+
+ what = crypto_what;
+ fstype = NULL;
+ }
+
+ r = unit_name_from_path(where, ".mount", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ p = strjoin(arg_dest, "/", unit);
+ if (!p)
+ return log_oom();
+
+ f = fopen(p, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by systemd-gpt-auto-generator\n\n"
+ "[Unit]\n"
+ "Description=%s\n"
+ "Documentation=man:systemd-gpt-auto-generator(8)\n",
+ description);
+
+ if (post)
+ fprintf(f, "Before=%s\n", post);
+
+ r = generator_write_fsck_deps(f, arg_dest, what, where, fstype);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "\n"
+ "[Mount]\n"
+ "What=%s\n"
+ "Where=%s\n",
+ what, where);
+
+ if (fstype)
+ fprintf(f, "Type=%s\n", fstype);
+
+ if (options)
+ fprintf(f, "Options=%s,%s\n", options, rw ? "rw" : "ro");
+ else
+ fprintf(f, "Options=%s\n", rw ? "rw" : "ro");
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", p);
+
+ if (post)
+ return generator_add_symlink(arg_dest, post, "requires", unit);
+ return 0;
+}
+
+static int path_is_busy(const char *where) {
+ int r;
+
+ /* already a mountpoint; generators run during reload */
+ r = path_is_mount_point(where, NULL, AT_SYMLINK_FOLLOW);
+ if (r > 0)
+ return false;
+
+ /* the directory might not exist on a stateless system */
+ if (r == -ENOENT)
+ return false;
+
+ if (r < 0)
+ return log_warning_errno(r, "Cannot check if \"%s\" is a mount point: %m", where);
+
+ /* not a mountpoint but it contains files */
+ r = dir_is_empty(where);
+ if (r < 0)
+ return log_warning_errno(r, "Cannot check if \"%s\" is empty: %m", where);
+ if (r > 0)
+ return false;
+
+ log_debug("\"%s\" already populated, ignoring.", where);
+ return true;
+}
+
+static int add_partition_mount(
+ DissectedPartition *p,
+ const char *id,
+ const char *where,
+ const char *description) {
+
+ int r;
+ assert(p);
+
+ r = path_is_busy(where);
+ if (r != 0)
+ return r < 0 ? r : 0;
+
+ return add_mount(
+ id,
+ p->node,
+ where,
+ p->fstype,
+ p->rw,
+ NULL,
+ description,
+ SPECIAL_LOCAL_FS_TARGET);
+}
+
+static int add_swap(const char *path) {
+ _cleanup_free_ char *name = NULL, *unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(path);
+
+ /* Disable the swap auto logic if at least one swap is defined in /etc/fstab, see #6192. */
+ r = fstab_has_fstype("swap");
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse fstab: %m");
+ if (r > 0) {
+ log_debug("swap specified in fstab, ignoring.");
+ return 0;
+ }
+
+ log_debug("Adding swap: %s", path);
+
+ r = unit_name_from_path(path, ".swap", &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ unit = strjoin(arg_dest, "/", name);
+ if (!unit)
+ return log_oom();
+
+ f = fopen(unit, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by systemd-gpt-auto-generator\n\n"
+ "[Unit]\n"
+ "Description=Swap Partition\n"
+ "Documentation=man:systemd-gpt-auto-generator(8)\n\n"
+ "[Swap]\n"
+ "What=%s\n",
+ path);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit);
+
+ return generator_add_symlink(arg_dest, SPECIAL_SWAP_TARGET, "wants", name);
+}
+
+#if ENABLE_EFI
+static int add_automount(
+ const char *id,
+ const char *what,
+ const char *where,
+ const char *fstype,
+ bool rw,
+ const char *options,
+ const char *description,
+ usec_t timeout) {
+
+ _cleanup_free_ char *unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *opt = "noauto", *p;
+ int r;
+
+ assert(id);
+ assert(where);
+ assert(description);
+
+ if (options)
+ opt = strjoina(options, ",", opt);
+
+ r = add_mount(id,
+ what,
+ where,
+ fstype,
+ rw,
+ opt,
+ description,
+ NULL);
+ if (r < 0)
+ return r;
+
+ r = unit_name_from_path(where, ".automount", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ p = strjoina(arg_dest, "/", unit);
+ f = fopen(p, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by systemd-gpt-auto-generator\n\n"
+ "[Unit]\n"
+ "Description=%s\n"
+ "Documentation=man:systemd-gpt-auto-generator(8)\n"
+ "[Automount]\n"
+ "Where=%s\n"
+ "TimeoutIdleSec="USEC_FMT"\n",
+ description,
+ where,
+ timeout / USEC_PER_SEC);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", p);
+
+ return generator_add_symlink(arg_dest, SPECIAL_LOCAL_FS_TARGET, "wants", unit);
+}
+
+static int add_esp(DissectedPartition *p) {
+ const char *esp;
+ int r;
+
+ assert(p);
+
+ if (in_initrd()) {
+ log_debug("In initrd, ignoring the ESP.");
+ return 0;
+ }
+
+ /* If /efi exists we'll use that. Otherwise we'll use /boot, as that's usually the better choice */
+ esp = access("/efi/", F_OK) >= 0 ? "/efi" : "/boot";
+
+ /* We create an .automount which is not overridden by the .mount from the fstab generator. */
+ r = fstab_is_mount_point(esp);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse fstab: %m");
+ if (r > 0) {
+ log_debug("%s specified in fstab, ignoring.", esp);
+ return 0;
+ }
+
+ r = path_is_busy(esp);
+ if (r != 0)
+ return r < 0 ? r : 0;
+
+ if (is_efi_boot()) {
+ sd_id128_t loader_uuid;
+
+ /* If this is an EFI boot, be extra careful, and only mount the ESP if it was the ESP used for booting. */
+
+ r = efi_loader_get_device_part_uuid(&loader_uuid);
+ if (r == -ENOENT) {
+ log_debug("EFI loader partition unknown.");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to read ESP partition UUID: %m");
+
+ if (!sd_id128_equal(p->uuid, loader_uuid)) {
+ log_debug("Partition for %s does not appear to be the partition we are booted from.", esp);
+ return 0;
+ }
+ } else
+ log_debug("Not an EFI boot, skipping ESP check.");
+
+ return add_automount("boot",
+ p->node,
+ esp,
+ p->fstype,
+ true,
+ "umask=0077",
+ "EFI System Partition Automount",
+ 120 * USEC_PER_SEC);
+}
+#else
+static int add_esp(DissectedPartition *p) {
+ return 0;
+}
+#endif
+
+static int add_root_rw(DissectedPartition *p) {
+ const char *path;
+ int r;
+
+ assert(p);
+
+ if (in_initrd()) {
+ log_debug("In initrd, not generating drop-in for systemd-remount-fs.service.");
+ return 0;
+ }
+
+ if (arg_root_rw >= 0) {
+ log_debug("Parameter ro/rw specified on kernel command line, not generating drop-in for systemd-remount-fs.service.");
+ return 0;
+ }
+
+ if (!p->rw) {
+ log_debug("Root partition marked read-only in GPT partition table, not generating drop-in for systemd-remount-fs.service.");
+ return 0;
+ }
+
+ path = strjoina(arg_dest, "/systemd-remount-fs.service.d/50-remount-rw.conf");
+ (void) mkdir_parents(path, 0755);
+
+ r = write_string_file(path,
+ "# Automatically generated by systemd-gpt-generator\n\n"
+ "[Unit]\n"
+ "ConditionPathExists=\n\n" /* We need to turn off the ConditionPathExist= in the main unit file */
+ "[Service]\n"
+ "Environment=SYSTEMD_REMOUNT_ROOT_RW=1\n",
+ WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_NOFOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write drop-in file %s: %m", path);
+
+ return 0;
+}
+
+static int open_parent(dev_t devnum, int *ret) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ const char *name, *devtype, *node;
+ sd_device *parent;
+ dev_t pn;
+ int fd, r;
+
+ assert(ret);
+
+ r = sd_device_new_from_devnum(&d, 'b', devnum);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to open device: %m");
+
+ if (sd_device_get_devname(d, &name) < 0) {
+ r = sd_device_get_syspath(d, &name);
+ if (r < 0) {
+ log_device_debug_errno(d, r, "Device %u:%u does not have a name, ignoring: %m", major(devnum), minor(devnum));
+ return 0;
+ }
+ }
+
+ r = sd_device_get_parent(d, &parent);
+ if (r < 0) {
+ log_device_debug_errno(d, r, "Not a partitioned device, ignoring: %m");
+ return 0;
+ }
+
+ /* Does it have a devtype? */
+ r = sd_device_get_devtype(parent, &devtype);
+ if (r < 0) {
+ log_device_debug_errno(parent, r, "Parent doesn't have a device type, ignoring: %m");
+ return 0;
+ }
+
+ /* Is this a disk or a partition? We only care for disks... */
+ if (!streq(devtype, "disk")) {
+ log_device_debug(parent, "Parent isn't a raw disk, ignoring.");
+ return 0;
+ }
+
+ /* Does it have a device node? */
+ r = sd_device_get_devname(parent, &node);
+ if (r < 0) {
+ log_device_debug_errno(parent, r, "Parent device does not have device node, ignoring: %m");
+ return 0;
+ }
+
+ log_device_debug(d, "Root device %s.", node);
+
+ r = sd_device_get_devnum(parent, &pn);
+ if (r < 0) {
+ log_device_debug_errno(parent, r, "Parent device is not a proper block device, ignoring: %m");
+ return 0;
+ }
+
+ fd = open(node, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", node);
+
+ *ret = fd;
+ return 1;
+}
+
+static int enumerate_partitions(dev_t devnum) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+ int r, k;
+
+ r = open_parent(devnum, &fd);
+ if (r <= 0)
+ return r;
+
+ r = dissect_image(fd, NULL, 0, DISSECT_IMAGE_GPT_ONLY|DISSECT_IMAGE_NO_UDEV, &m);
+ if (r == -ENOPKG) {
+ log_debug_errno(r, "No suitable partition table found, ignoring.");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to dissect: %m");
+
+ if (m->partitions[PARTITION_SWAP].found) {
+ k = add_swap(m->partitions[PARTITION_SWAP].node);
+ if (k < 0)
+ r = k;
+ }
+
+ if (m->partitions[PARTITION_ESP].found) {
+ k = add_esp(m->partitions + PARTITION_ESP);
+ if (k < 0)
+ r = k;
+ }
+
+ if (m->partitions[PARTITION_HOME].found) {
+ k = add_partition_mount(m->partitions + PARTITION_HOME, "home", "/home", "Home Partition");
+ if (k < 0)
+ r = k;
+ }
+
+ if (m->partitions[PARTITION_SRV].found) {
+ k = add_partition_mount(m->partitions + PARTITION_SRV, "srv", "/srv", "Server Data Partition");
+ if (k < 0)
+ r = k;
+ }
+
+ if (m->partitions[PARTITION_ROOT].found) {
+ k = add_root_rw(m->partitions + PARTITION_ROOT);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ assert(key);
+
+ if (proc_cmdline_key_streq(key, "systemd.gpt_auto") ||
+ proc_cmdline_key_streq(key, "rd.systemd.gpt_auto")) {
+
+ r = value ? parse_boolean(value) : 1;
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse gpt-auto switch \"%s\", ignoring: %m", value);
+ else
+ arg_enabled = r;
+
+ } else if (proc_cmdline_key_streq(key, "root")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ /* Disable root disk logic if there's a root= value
+ * specified (unless it happens to be "gpt-auto") */
+
+ arg_root_enabled = streq(value, "gpt-auto");
+
+ } else if (proc_cmdline_key_streq(key, "roothash")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ /* Disable root disk logic if there's roothash= defined (i.e. verity enabled) */
+
+ arg_root_enabled = false;
+
+ } else if (proc_cmdline_key_streq(key, "rw") && !value)
+ arg_root_rw = true;
+ else if (proc_cmdline_key_streq(key, "ro") && !value)
+ arg_root_rw = false;
+
+ return 0;
+}
+
+#if ENABLE_EFI
+static int add_root_cryptsetup(void) {
+
+ /* If a device /dev/gpt-auto-root-luks appears, then make it pull in systemd-cryptsetup-root.service, which
+ * sets it up, and causes /dev/gpt-auto-root to appear which is all we are looking for. */
+
+ return add_cryptsetup("root", "/dev/gpt-auto-root-luks", true, false, NULL);
+}
+#endif
+
+static int add_root_mount(void) {
+
+#if ENABLE_EFI
+ int r;
+
+ if (!is_efi_boot()) {
+ log_debug("Not a EFI boot, not creating root mount.");
+ return 0;
+ }
+
+ r = efi_loader_get_device_part_uuid(NULL);
+ if (r == -ENOENT) {
+ log_debug("EFI loader partition unknown, exiting.");
+ return 0;
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to read ESP partition UUID: %m");
+
+ /* OK, we have an ESP partition, this is fantastic, so let's
+ * wait for a root device to show up. A udev rule will create
+ * the link for us under the right name. */
+
+ if (in_initrd()) {
+ r = generator_write_initrd_root_device_deps(arg_dest, "/dev/gpt-auto-root");
+ if (r < 0)
+ return 0;
+
+ r = add_root_cryptsetup();
+ if (r < 0)
+ return r;
+ }
+
+ return add_mount(
+ "root",
+ "/dev/gpt-auto-root",
+ in_initrd() ? "/sysroot" : "/",
+ NULL,
+ arg_root_rw > 0,
+ NULL,
+ "Root Partition",
+ in_initrd() ? SPECIAL_INITRD_ROOT_FS_TARGET : SPECIAL_LOCAL_FS_TARGET);
+#else
+ return 0;
+#endif
+}
+
+static int add_mounts(void) {
+ dev_t devno;
+ int r;
+
+ r = get_block_device_harder("/", &devno);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine block device of root file system: %m");
+ if (r == 0) {
+ r = get_block_device_harder("/usr", &devno);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine block device of /usr file system: %m");
+ if (r == 0) {
+ log_debug("Neither root nor /usr file system are on a (single) block device.");
+ return 0;
+ }
+ }
+
+ return enumerate_partitions(devno);
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r, k;
+
+ assert_se(arg_dest = dest_late);
+
+ if (detect_container() > 0) {
+ log_debug("In a container, exiting.");
+ return 0;
+ }
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ if (!arg_enabled) {
+ log_debug("Disabled, exiting.");
+ return 0;
+ }
+
+ if (arg_root_enabled)
+ r = add_root_mount();
+
+ if (!in_initrd()) {
+ k = add_mounts();
+ if (r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/hibernate-resume/hibernate-resume-generator.c b/src/hibernate-resume/hibernate-resume-generator.c
new file mode 100644
index 0000000..8b127ca
--- /dev/null
+++ b/src/hibernate-resume/hibernate-resume-generator.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "log.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "string-util.h"
+#include "unit-name.h"
+#include "util.h"
+
+static const char *arg_dest = "/tmp";
+static char *arg_resume_device = NULL;
+static bool arg_noresume = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_resume_device, freep);
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+
+ if (streq(key, "resume")) {
+ char *s;
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ s = fstab_node_to_udev_node(value);
+ if (!s)
+ return log_oom();
+
+ free_and_replace(arg_resume_device, s);
+
+ } else if (streq(key, "noresume")) {
+ if (value) {
+ log_warning("\"noresume\" kernel command line switch specified with an argument, ignoring.");
+ return 0;
+ }
+
+ arg_noresume = true;
+ }
+
+ return 0;
+}
+
+static int process_resume(void) {
+ _cleanup_free_ char *name = NULL, *lnk = NULL;
+ int r;
+
+ if (!arg_resume_device)
+ return 0;
+
+ r = unit_name_from_path_instance("systemd-hibernate-resume", arg_resume_device, ".service", &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ lnk = strjoin(arg_dest, "/" SPECIAL_SYSINIT_TARGET ".wants/", name);
+ if (!lnk)
+ return log_oom();
+
+ mkdir_parents_label(lnk, 0755);
+ if (symlink(SYSTEM_DATA_UNIT_PATH "/systemd-hibernate-resume@.service", lnk) < 0)
+ return log_error_errno(errno, "Failed to create symlink %s: %m", lnk);
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ int r = 0;
+
+ log_setup_generator();
+
+ if (argc > 1 && argc != 4)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program takes three or no arguments.");
+
+ if (argc > 1)
+ arg_dest = argv[1];
+
+ /* Don't even consider resuming outside of initramfs. */
+ if (!in_initrd()) {
+ log_debug("Not running in an initrd, quitting.");
+ return 0;
+ }
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ if (arg_noresume) {
+ log_notice("Found \"noresume\" on the kernel command line, quitting.");
+ return 0;
+ }
+
+ return process_resume();
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/hibernate-resume/hibernate-resume.c b/src/hibernate-resume/hibernate-resume.c
new file mode 100644
index 0000000..17e7cd1
--- /dev/null
+++ b/src/hibernate-resume/hibernate-resume.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ struct stat st;
+ const char *device;
+ _cleanup_free_ char *major_minor = NULL;
+ int r;
+
+ if (argc != 2) {
+ log_error("This program expects one argument.");
+ return EXIT_FAILURE;
+ }
+
+ log_setup_service();
+
+ umask(0022);
+
+ /* Refuse to run unless we are in an initrd() */
+ if (!in_initrd())
+ return EXIT_SUCCESS;
+
+ device = argv[1];
+
+ if (stat(device, &st) < 0) {
+ log_error_errno(errno, "Failed to stat '%s': %m", device);
+ return EXIT_FAILURE;
+ }
+
+ if (!S_ISBLK(st.st_mode)) {
+ log_error("Resume device '%s' is not a block device.", device);
+ return EXIT_FAILURE;
+ }
+
+ if (asprintf(&major_minor, "%d:%d", major(st.st_rdev), minor(st.st_rdev)) < 0) {
+ log_oom();
+ return EXIT_FAILURE;
+ }
+
+ r = write_string_file("/sys/power/resume", major_minor, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write '%s' to /sys/power/resume: %m", major_minor);
+ return EXIT_FAILURE;
+ }
+
+ /*
+ * The write above shall not return.
+ *
+ * However, failed resume is a normal condition (may mean that there is
+ * no hibernation image).
+ */
+
+ log_info("Could not resume from '%s' (%s).", device, major_minor);
+ return EXIT_SUCCESS;
+}
diff --git a/src/hostname/hostnamectl.c b/src/hostname/hostnamectl.c
new file mode 100644
index 0000000..7f9bb49
--- /dev/null
+++ b/src/hostname/hostnamectl.c
@@ -0,0 +1,449 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <locale.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sd-bus.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "hostname-util.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "spawn-polkit-agent.h"
+#include "util.h"
+#include "verbs.h"
+
+static bool arg_ask_password = true;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static char *arg_host = NULL;
+static bool arg_transient = false;
+static bool arg_pretty = false;
+static bool arg_static = false;
+
+typedef struct StatusInfo {
+ const char *hostname;
+ const char *static_hostname;
+ const char *pretty_hostname;
+ const char *icon_name;
+ const char *chassis;
+ const char *deployment;
+ const char *location;
+ const char *kernel_name;
+ const char *kernel_release;
+ const char *os_pretty_name;
+ const char *os_cpe_name;
+ const char *virtualization;
+ const char *architecture;
+ const char *home_url;
+} StatusInfo;
+
+static void print_status_info(StatusInfo *i) {
+ sd_id128_t mid = {}, bid = {};
+ int r;
+
+ assert(i);
+
+ printf(" Static hostname: %s\n", strna(i->static_hostname));
+
+ if (!isempty(i->pretty_hostname) &&
+ !streq_ptr(i->pretty_hostname, i->static_hostname))
+ printf(" Pretty hostname: %s\n", i->pretty_hostname);
+
+ if (!isempty(i->hostname) &&
+ !streq_ptr(i->hostname, i->static_hostname))
+ printf("Transient hostname: %s\n", i->hostname);
+
+ if (!isempty(i->icon_name))
+ printf(" Icon name: %s\n",
+ strna(i->icon_name));
+
+ if (!isempty(i->chassis))
+ printf(" Chassis: %s\n",
+ strna(i->chassis));
+
+ if (!isempty(i->deployment))
+ printf(" Deployment: %s\n", i->deployment);
+
+ if (!isempty(i->location))
+ printf(" Location: %s\n", i->location);
+
+ r = sd_id128_get_machine(&mid);
+ if (r >= 0)
+ printf(" Machine ID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(mid));
+
+ r = sd_id128_get_boot(&bid);
+ if (r >= 0)
+ printf(" Boot ID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(bid));
+
+ if (!isempty(i->virtualization))
+ printf(" Virtualization: %s\n", i->virtualization);
+
+ if (!isempty(i->os_pretty_name)) {
+ _cleanup_free_ char *formatted = NULL;
+ const char *t = i->os_pretty_name;
+
+ if (i->home_url) {
+ if (terminal_urlify(i->home_url, i->os_pretty_name, &formatted) >= 0)
+ t = formatted;
+ }
+
+ printf(" Operating System: %s\n", t);
+ }
+
+ if (!isempty(i->os_cpe_name))
+ printf(" CPE OS Name: %s\n", i->os_cpe_name);
+
+ if (!isempty(i->kernel_name) && !isempty(i->kernel_release))
+ printf(" Kernel: %s %s\n", i->kernel_name, i->kernel_release);
+
+ if (!isempty(i->architecture))
+ printf(" Architecture: %s\n", i->architecture);
+
+}
+
+static int show_one_name(sd_bus *bus, const char* attr) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *s;
+ int r;
+
+ r = sd_bus_get_property(
+ bus,
+ "org.freedesktop.hostname1",
+ "/org/freedesktop/hostname1",
+ "org.freedesktop.hostname1",
+ attr,
+ &error, &reply, "s");
+ if (r < 0)
+ return log_error_errno(r, "Could not get property: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &s);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ printf("%s\n", s);
+
+ return 0;
+}
+
+static int show_all_names(sd_bus *bus, sd_bus_error *error) {
+ StatusInfo info = {};
+
+ static const struct bus_properties_map hostname_map[] = {
+ { "Hostname", "s", NULL, offsetof(StatusInfo, hostname) },
+ { "StaticHostname", "s", NULL, offsetof(StatusInfo, static_hostname) },
+ { "PrettyHostname", "s", NULL, offsetof(StatusInfo, pretty_hostname) },
+ { "IconName", "s", NULL, offsetof(StatusInfo, icon_name) },
+ { "Chassis", "s", NULL, offsetof(StatusInfo, chassis) },
+ { "Deployment", "s", NULL, offsetof(StatusInfo, deployment) },
+ { "Location", "s", NULL, offsetof(StatusInfo, location) },
+ { "KernelName", "s", NULL, offsetof(StatusInfo, kernel_name) },
+ { "KernelRelease", "s", NULL, offsetof(StatusInfo, kernel_release) },
+ { "OperatingSystemPrettyName", "s", NULL, offsetof(StatusInfo, os_pretty_name) },
+ { "OperatingSystemCPEName", "s", NULL, offsetof(StatusInfo, os_cpe_name) },
+ { "HomeURL", "s", NULL, offsetof(StatusInfo, home_url) },
+ {}
+ };
+
+ static const struct bus_properties_map manager_map[] = {
+ { "Virtualization", "s", NULL, offsetof(StatusInfo, virtualization) },
+ { "Architecture", "s", NULL, offsetof(StatusInfo, architecture) },
+ {}
+ };
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *host_message = NULL, *manager_message = NULL;
+ int r;
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.hostname1",
+ "/org/freedesktop/hostname1",
+ hostname_map,
+ 0,
+ error,
+ &host_message,
+ &info);
+ if (r < 0)
+ return r;
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ manager_map,
+ 0,
+ error,
+ &manager_message,
+ &info);
+
+ print_status_info(&info);
+
+ return r;
+}
+
+static int show_status(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ int r;
+
+ if (arg_pretty || arg_static || arg_transient) {
+ const char *attr;
+
+ if (!!arg_static + !!arg_pretty + !!arg_transient > 1) {
+ log_error("Cannot query more than one name type at a time");
+ return -EINVAL;
+ }
+
+ attr = arg_pretty ? "PrettyHostname" :
+ arg_static ? "StaticHostname" : "Hostname";
+
+ return show_one_name(bus, attr);
+ } else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = show_all_names(bus, &error);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query system properties: %s", bus_error_message(&error, r));
+
+ return 0;
+ }
+}
+
+static int set_simple_string(sd_bus *bus, const char *method, const char *value) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r = 0;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.hostname1",
+ "/org/freedesktop/hostname1",
+ "org.freedesktop.hostname1",
+ method,
+ &error, NULL,
+ "sb", value, arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Could not set property: %s", bus_error_message(&error, -r));
+
+ return 0;
+}
+
+static int set_hostname(int argc, char **argv, void *userdata) {
+ _cleanup_free_ char *h = NULL;
+ const char *hostname = argv[1];
+ sd_bus *bus = userdata;
+ int r;
+
+ if (!arg_pretty && !arg_static && !arg_transient)
+ arg_pretty = arg_static = arg_transient = true;
+
+ if (arg_pretty) {
+ const char *p;
+
+ /* If the passed hostname is already valid, then assume the user doesn't know anything about pretty
+ * hostnames, so let's unset the pretty hostname, and just set the passed hostname as static/dynamic
+ * hostname. */
+ if (arg_static && hostname_is_valid(hostname, true))
+ p = ""; /* No pretty hostname (as it is redundant), just a static one */
+ else
+ p = hostname; /* Use the passed name as pretty hostname */
+
+ r = set_simple_string(bus, "SetPrettyHostname", p);
+ if (r < 0)
+ return r;
+
+ /* Now that we set the pretty hostname, let's clean up the parameter and use that as static
+ * hostname. If the hostname was already valid as static hostname, this will only chop off the trailing
+ * dot if there is one. If it was not valid, then it will be made fully valid by truncating, dropping
+ * multiple dots, and dropping weird chars. Note that we clean the name up only if we also are
+ * supposed to set the pretty name. If the pretty name is not being set we assume the user knows what
+ * he does and pass the name as-is. */
+ h = strdup(hostname);
+ if (!h)
+ return log_oom();
+
+ hostname = hostname_cleanup(h); /* Use the cleaned up name as static hostname */
+ }
+
+ if (arg_static) {
+ r = set_simple_string(bus, "SetStaticHostname", hostname);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_transient) {
+ r = set_simple_string(bus, "SetHostname", hostname);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int set_icon_name(int argc, char **argv, void *userdata) {
+ return set_simple_string(userdata, "SetIconName", argv[1]);
+}
+
+static int set_chassis(int argc, char **argv, void *userdata) {
+ return set_simple_string(userdata, "SetChassis", argv[1]);
+}
+
+static int set_deployment(int argc, char **argv, void *userdata) {
+ return set_simple_string(userdata, "SetDeployment", argv[1]);
+}
+
+static int set_location(int argc, char **argv, void *userdata) {
+ return set_simple_string(userdata, "SetLocation", argv[1]);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("hostnamectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "Query or change system hostname.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-ask-password Do not prompt for password\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --transient Only set transient hostname\n"
+ " --static Only set static hostname\n"
+ " --pretty Only set pretty hostname\n\n"
+ "Commands:\n"
+ " status Show current hostname settings\n"
+ " set-hostname NAME Set system hostname\n"
+ " set-icon-name NAME Set icon name for host\n"
+ " set-chassis NAME Set chassis type for host\n"
+ " set-deployment NAME Set deployment environment for host\n"
+ " set-location NAME Set location for host\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int verb_help(int argc, char **argv, void *userdata) {
+ return help();
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_ASK_PASSWORD,
+ ARG_TRANSIENT,
+ ARG_STATIC,
+ ARG_PRETTY
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "transient", no_argument, NULL, ARG_TRANSIENT },
+ { "static", no_argument, NULL, ARG_STATIC },
+ { "pretty", no_argument, NULL, ARG_PRETTY },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hH:M:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_TRANSIENT:
+ arg_transient = true;
+ break;
+
+ case ARG_PRETTY:
+ arg_pretty = true;
+ break;
+
+ case ARG_STATIC:
+ arg_static = true;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int hostnamectl_main(sd_bus *bus, int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "status", VERB_ANY, 1, VERB_DEFAULT, show_status },
+ { "set-hostname", 2, 2, 0, set_hostname },
+ { "set-icon-name", 2, 2, 0, set_icon_name },
+ { "set-chassis", 2, 2, 0, set_chassis },
+ { "set-deployment", 2, 2, 0, set_deployment },
+ { "set-location", 2, 2, 0, set_location },
+ { "help", VERB_ANY, VERB_ANY, 0, verb_help }, /* Not documented, but supported since it is created. */
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, bus);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, &bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ return hostnamectl_main(bus, argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/hostname/hostnamed.c b/src/hostname/hostnamed.c
new file mode 100644
index 0000000..7777450
--- /dev/null
+++ b/src/hostname/hostnamed.c
@@ -0,0 +1,763 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-util.h"
+#include "def.h"
+#include "env-file-label.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fileio-label.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "main-func.h"
+#include "missing_capability.h"
+#include "nscd-flush.h"
+#include "os-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+#define VALID_DEPLOYMENT_CHARS (DIGITS LETTERS "-.:")
+
+enum {
+ PROP_HOSTNAME,
+ PROP_STATIC_HOSTNAME,
+ PROP_PRETTY_HOSTNAME,
+ PROP_ICON_NAME,
+ PROP_CHASSIS,
+ PROP_DEPLOYMENT,
+ PROP_LOCATION,
+ PROP_KERNEL_NAME,
+ PROP_KERNEL_RELEASE,
+ PROP_KERNEL_VERSION,
+ PROP_OS_PRETTY_NAME,
+ PROP_OS_CPE_NAME,
+ PROP_HOME_URL,
+ _PROP_MAX
+};
+
+typedef struct Context {
+ char *data[_PROP_MAX];
+ Hashmap *polkit_registry;
+ sd_id128_t uuid;
+ bool has_uuid;
+} Context;
+
+static void context_reset(Context *c) {
+ int p;
+
+ assert(c);
+
+ for (p = 0; p < _PROP_MAX; p++)
+ c->data[p] = mfree(c->data[p]);
+}
+
+static void context_clear(Context *c) {
+ assert(c);
+
+ context_reset(c);
+ bus_verify_polkit_async_registry_free(c->polkit_registry);
+}
+
+static int context_read_data(Context *c) {
+ int r;
+ struct utsname u;
+
+ assert(c);
+
+ context_reset(c);
+
+ assert_se(uname(&u) >= 0);
+ c->data[PROP_KERNEL_NAME] = strdup(u.sysname);
+ c->data[PROP_KERNEL_RELEASE] = strdup(u.release);
+ c->data[PROP_KERNEL_VERSION] = strdup(u.version);
+ if (!c->data[PROP_KERNEL_NAME] || !c->data[PROP_KERNEL_RELEASE] ||
+ !c->data[PROP_KERNEL_VERSION])
+ return -ENOMEM;
+
+ c->data[PROP_HOSTNAME] = gethostname_malloc();
+ if (!c->data[PROP_HOSTNAME])
+ return -ENOMEM;
+
+ r = read_etc_hostname(NULL, &c->data[PROP_STATIC_HOSTNAME]);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ r = parse_env_file(NULL, "/etc/machine-info",
+ "PRETTY_HOSTNAME", &c->data[PROP_PRETTY_HOSTNAME],
+ "ICON_NAME", &c->data[PROP_ICON_NAME],
+ "CHASSIS", &c->data[PROP_CHASSIS],
+ "DEPLOYMENT", &c->data[PROP_DEPLOYMENT],
+ "LOCATION", &c->data[PROP_LOCATION]);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ r = parse_os_release(NULL,
+ "PRETTY_NAME", &c->data[PROP_OS_PRETTY_NAME],
+ "CPE_NAME", &c->data[PROP_OS_CPE_NAME],
+ "HOME_URL", &c->data[PROP_HOME_URL],
+ NULL);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ r = id128_read("/sys/class/dmi/id/product_uuid", ID128_UUID, &c->uuid);
+ if (r < 0)
+ log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to read product UUID, ignoring: %m");
+ else if (sd_id128_is_null(c->uuid) || sd_id128_is_allf(c->uuid))
+ log_debug("DMI product UUID " SD_ID128_FORMAT_STR " is all 0x00 or all 0xFF, ignoring.", SD_ID128_FORMAT_VAL(c->uuid));
+ else
+ c->has_uuid = true;
+
+ return 0;
+}
+
+static bool valid_chassis(const char *chassis) {
+ assert(chassis);
+
+ return nulstr_contains(
+ "vm\0"
+ "container\0"
+ "desktop\0"
+ "laptop\0"
+ "convertible\0"
+ "server\0"
+ "tablet\0"
+ "handset\0"
+ "watch\0"
+ "embedded\0",
+ chassis);
+}
+
+static bool valid_deployment(const char *deployment) {
+ assert(deployment);
+
+ return in_charset(deployment, VALID_DEPLOYMENT_CHARS);
+}
+
+static const char* fallback_chassis(void) {
+ char *type;
+ unsigned t;
+ int v, r;
+
+ v = detect_virtualization();
+ if (VIRTUALIZATION_IS_VM(v))
+ return "vm";
+ if (VIRTUALIZATION_IS_CONTAINER(v))
+ return "container";
+
+ r = read_one_line_file("/sys/class/dmi/id/chassis_type", &type);
+ if (r < 0)
+ goto try_acpi;
+
+ r = safe_atou(type, &t);
+ free(type);
+ if (r < 0)
+ goto try_acpi;
+
+ /* We only list the really obvious cases here. The DMI data is unreliable enough, so let's not do any
+ additional guesswork on top of that.
+
+ See the SMBIOS Specification 3.0 section 7.4.1 for details about the values listed here:
+
+ https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.0.0.pdf
+ */
+
+ switch (t) {
+
+ case 0x3: /* Desktop */
+ case 0x4: /* Low Profile Desktop */
+ case 0x6: /* Mini Tower */
+ case 0x7: /* Tower */
+ return "desktop";
+
+ case 0x8: /* Portable */
+ case 0x9: /* Laptop */
+ case 0xA: /* Notebook */
+ case 0xE: /* Sub Notebook */
+ return "laptop";
+
+ case 0xB: /* Hand Held */
+ return "handset";
+
+ case 0x11: /* Main Server Chassis */
+ case 0x1C: /* Blade */
+ case 0x1D: /* Blade Enclosure */
+ return "server";
+
+ case 0x1E: /* Tablet */
+ return "tablet";
+
+ case 0x1F: /* Convertible */
+ case 0x20: /* Detachable */
+ return "convertible";
+ }
+
+try_acpi:
+ r = read_one_line_file("/sys/firmware/acpi/pm_profile", &type);
+ if (r < 0)
+ return NULL;
+
+ r = safe_atou(type, &t);
+ free(type);
+ if (r < 0)
+ return NULL;
+
+ /* We only list the really obvious cases here as the ACPI data is not really super reliable.
+ *
+ * See the ACPI 5.0 Spec Section 5.2.9.1 for details:
+ *
+ * http://www.acpi.info/DOWNLOADS/ACPIspec50.pdf
+ */
+
+ switch(t) {
+
+ case 1: /* Desktop */
+ case 3: /* Workstation */
+ case 6: /* Appliance PC */
+ return "desktop";
+
+ case 2: /* Mobile */
+ return "laptop";
+
+ case 4: /* Enterprise Server */
+ case 5: /* SOHO Server */
+ case 7: /* Performance Server */
+ return "server";
+
+ case 8: /* Tablet */
+ return "tablet";
+ }
+
+ return NULL;
+}
+
+static char* context_fallback_icon_name(Context *c) {
+ const char *chassis;
+
+ assert(c);
+
+ if (!isempty(c->data[PROP_CHASSIS]))
+ return strappend("computer-", c->data[PROP_CHASSIS]);
+
+ chassis = fallback_chassis();
+ if (chassis)
+ return strappend("computer-", chassis);
+
+ return strdup("computer");
+}
+
+static bool hostname_is_useful(const char *hn) {
+ return !isempty(hn) && !is_localhost(hn);
+}
+
+static int context_update_kernel_hostname(Context *c) {
+ const char *static_hn;
+ const char *hn;
+
+ assert(c);
+
+ static_hn = c->data[PROP_STATIC_HOSTNAME];
+
+ /* /etc/hostname with something other than "localhost"
+ * has the highest preference ... */
+ if (hostname_is_useful(static_hn))
+ hn = static_hn;
+
+ /* ... the transient host name, (ie: DHCP) comes next ... */
+ else if (!isempty(c->data[PROP_HOSTNAME]))
+ hn = c->data[PROP_HOSTNAME];
+
+ /* ... fallback to static "localhost.*" ignored above ... */
+ else if (!isempty(static_hn))
+ hn = static_hn;
+
+ /* ... and the ultimate fallback */
+ else
+ hn = FALLBACK_HOSTNAME;
+
+ if (sethostname_idempotent(hn) < 0)
+ return -errno;
+
+ (void) nscd_flush_cache(STRV_MAKE("hosts"));
+
+ return 0;
+}
+
+static int context_write_data_static_hostname(Context *c) {
+
+ assert(c);
+
+ if (isempty(c->data[PROP_STATIC_HOSTNAME])) {
+
+ if (unlink("/etc/hostname") < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ return 0;
+ }
+ return write_string_file_atomic_label("/etc/hostname", c->data[PROP_STATIC_HOSTNAME]);
+}
+
+static int context_write_data_machine_info(Context *c) {
+
+ static const char * const name[_PROP_MAX] = {
+ [PROP_PRETTY_HOSTNAME] = "PRETTY_HOSTNAME",
+ [PROP_ICON_NAME] = "ICON_NAME",
+ [PROP_CHASSIS] = "CHASSIS",
+ [PROP_DEPLOYMENT] = "DEPLOYMENT",
+ [PROP_LOCATION] = "LOCATION",
+ };
+
+ _cleanup_strv_free_ char **l = NULL;
+ int r, p;
+
+ assert(c);
+
+ r = load_env_file(NULL, "/etc/machine-info", &l);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ for (p = PROP_PRETTY_HOSTNAME; p <= PROP_LOCATION; p++) {
+ _cleanup_free_ char *t = NULL;
+ char **u;
+
+ assert(name[p]);
+
+ if (isempty(c->data[p])) {
+ strv_env_unset(l, name[p]);
+ continue;
+ }
+
+ t = strjoin(name[p], "=", c->data[p]);
+ if (!t)
+ return -ENOMEM;
+
+ u = strv_env_set(l, t);
+ if (!u)
+ return -ENOMEM;
+
+ strv_free_and_replace(l, u);
+ }
+
+ if (strv_isempty(l)) {
+ if (unlink("/etc/machine-info") < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ return 0;
+ }
+
+ return write_env_file_label("/etc/machine-info", l);
+}
+
+static int property_get_icon_name(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *n = NULL;
+ Context *c = userdata;
+ const char *name;
+
+ if (isempty(c->data[PROP_ICON_NAME]))
+ name = n = context_fallback_icon_name(c);
+ else
+ name = c->data[PROP_ICON_NAME];
+
+ if (!name)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "s", name);
+}
+
+static int property_get_chassis(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ const char *name;
+
+ if (isempty(c->data[PROP_CHASSIS]))
+ name = fallback_chassis();
+ else
+ name = c->data[PROP_CHASSIS];
+
+ return sd_bus_message_append(reply, "s", name);
+}
+
+static int method_set_hostname(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ Context *c = userdata;
+ const char *name;
+ int interactive;
+ int r;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read(m, "sb", &name, &interactive);
+ if (r < 0)
+ return r;
+
+ if (isempty(name))
+ name = c->data[PROP_STATIC_HOSTNAME];
+
+ if (isempty(name))
+ name = FALLBACK_HOSTNAME;
+
+ if (!hostname_is_valid(name, false))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid hostname '%s'", name);
+
+ if (streq_ptr(name, c->data[PROP_HOSTNAME]))
+ return sd_bus_reply_method_return(m, NULL);
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.hostname1.set-hostname",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = free_and_strdup(&c->data[PROP_HOSTNAME], name);
+ if (r < 0)
+ return r;
+
+ r = context_update_kernel_hostname(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set host name: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set hostname: %m");
+ }
+
+ log_info("Changed host name to '%s'", strna(c->data[PROP_HOSTNAME]));
+
+ (void) sd_bus_emit_properties_changed(sd_bus_message_get_bus(m), "/org/freedesktop/hostname1", "org.freedesktop.hostname1", "Hostname", NULL);
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_set_static_hostname(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ Context *c = userdata;
+ const char *name;
+ int interactive;
+ int r;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read(m, "sb", &name, &interactive);
+ if (r < 0)
+ return r;
+
+ name = empty_to_null(name);
+
+ if (streq_ptr(name, c->data[PROP_STATIC_HOSTNAME]))
+ return sd_bus_reply_method_return(m, NULL);
+
+ if (!isempty(name) && !hostname_is_valid(name, false))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid static hostname '%s'", name);
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.hostname1.set-static-hostname",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = free_and_strdup(&c->data[PROP_STATIC_HOSTNAME], name);
+ if (r < 0)
+ return r;
+
+ r = context_update_kernel_hostname(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set host name: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set hostname: %m");
+ }
+
+ r = context_write_data_static_hostname(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write static host name: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set static hostname: %m");
+ }
+
+ log_info("Changed static host name to '%s'", strna(c->data[PROP_STATIC_HOSTNAME]));
+
+ (void) sd_bus_emit_properties_changed(sd_bus_message_get_bus(m), "/org/freedesktop/hostname1", "org.freedesktop.hostname1", "StaticHostname", NULL);
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int set_machine_info(Context *c, sd_bus_message *m, int prop, sd_bus_message_handler_t cb, sd_bus_error *error) {
+ int interactive;
+ const char *name;
+ int r;
+
+ assert(c);
+ assert(m);
+
+ r = sd_bus_message_read(m, "sb", &name, &interactive);
+ if (r < 0)
+ return r;
+
+ name = empty_to_null(name);
+
+ if (streq_ptr(name, c->data[prop]))
+ return sd_bus_reply_method_return(m, NULL);
+
+ if (!isempty(name)) {
+ /* The icon name might ultimately be used as file
+ * name, so better be safe than sorry */
+
+ if (prop == PROP_ICON_NAME && !filename_is_valid(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid icon name '%s'", name);
+ if (prop == PROP_PRETTY_HOSTNAME && string_has_cc(name, NULL))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid pretty host name '%s'", name);
+ if (prop == PROP_CHASSIS && !valid_chassis(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid chassis '%s'", name);
+ if (prop == PROP_DEPLOYMENT && !valid_deployment(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid deployment '%s'", name);
+ if (prop == PROP_LOCATION && string_has_cc(name, NULL))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid location '%s'", name);
+ }
+
+ /* Since the pretty hostname should always be changed at the
+ * same time as the static one, use the same policy action for
+ * both... */
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ prop == PROP_PRETTY_HOSTNAME ? "org.freedesktop.hostname1.set-static-hostname" : "org.freedesktop.hostname1.set-machine-info",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = free_and_strdup(&c->data[prop], name);
+ if (r < 0)
+ return r;
+
+ r = context_write_data_machine_info(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write machine info: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to write machine info: %m");
+ }
+
+ log_info("Changed %s to '%s'",
+ prop == PROP_PRETTY_HOSTNAME ? "pretty host name" :
+ prop == PROP_DEPLOYMENT ? "deployment" :
+ prop == PROP_LOCATION ? "location" :
+ prop == PROP_CHASSIS ? "chassis" : "icon name", strna(c->data[prop]));
+
+ (void) sd_bus_emit_properties_changed(
+ sd_bus_message_get_bus(m),
+ "/org/freedesktop/hostname1",
+ "org.freedesktop.hostname1",
+ prop == PROP_PRETTY_HOSTNAME ? "PrettyHostname" :
+ prop == PROP_DEPLOYMENT ? "Deployment" :
+ prop == PROP_LOCATION ? "Location" :
+ prop == PROP_CHASSIS ? "Chassis" : "IconName" , NULL);
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_set_pretty_hostname(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return set_machine_info(userdata, m, PROP_PRETTY_HOSTNAME, method_set_pretty_hostname, error);
+}
+
+static int method_set_icon_name(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return set_machine_info(userdata, m, PROP_ICON_NAME, method_set_icon_name, error);
+}
+
+static int method_set_chassis(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return set_machine_info(userdata, m, PROP_CHASSIS, method_set_chassis, error);
+}
+
+static int method_set_deployment(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return set_machine_info(userdata, m, PROP_DEPLOYMENT, method_set_deployment, error);
+}
+
+static int method_set_location(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return set_machine_info(userdata, m, PROP_LOCATION, method_set_location, error);
+}
+
+static int method_get_product_uuid(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Context *c = userdata;
+ int interactive, r;
+
+ assert(m);
+ assert(c);
+
+ if (!c->has_uuid)
+ return sd_bus_error_set(error, BUS_ERROR_NO_PRODUCT_UUID, "Failed to read product UUID from /sys/class/dmi/id/product_uuid");
+
+ r = sd_bus_message_read(m, "b", &interactive);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.hostname1.get-product-uuid",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &c->uuid, sizeof(c->uuid));
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static const sd_bus_vtable hostname_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Hostname", "s", NULL, offsetof(Context, data) + sizeof(char*) * PROP_HOSTNAME, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("StaticHostname", "s", NULL, offsetof(Context, data) + sizeof(char*) * PROP_STATIC_HOSTNAME, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("PrettyHostname", "s", NULL, offsetof(Context, data) + sizeof(char*) * PROP_PRETTY_HOSTNAME, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IconName", "s", property_get_icon_name, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Chassis", "s", property_get_chassis, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Deployment", "s", NULL, offsetof(Context, data) + sizeof(char*) * PROP_DEPLOYMENT, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Location", "s", NULL, offsetof(Context, data) + sizeof(char*) * PROP_LOCATION, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("KernelName", "s", NULL, offsetof(Context, data) + sizeof(char*) * PROP_KERNEL_NAME, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KernelRelease", "s", NULL, offsetof(Context, data) + sizeof(char*) * PROP_KERNEL_RELEASE, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KernelVersion", "s", NULL, offsetof(Context, data) + sizeof(char*) * PROP_KERNEL_VERSION, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("OperatingSystemPrettyName", "s", NULL, offsetof(Context, data) + sizeof(char*) * PROP_OS_PRETTY_NAME, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("OperatingSystemCPEName", "s", NULL, offsetof(Context, data) + sizeof(char*) * PROP_OS_CPE_NAME, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HomeURL", "s", NULL, offsetof(Context, data) + sizeof(char*) * PROP_HOME_URL, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_METHOD("SetHostname", "sb", NULL, method_set_hostname, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetStaticHostname", "sb", NULL, method_set_static_hostname, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetPrettyHostname", "sb", NULL, method_set_pretty_hostname, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetIconName", "sb", NULL, method_set_icon_name, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetChassis", "sb", NULL, method_set_chassis, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetDeployment", "sb", NULL, method_set_deployment, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLocation", "sb", NULL, method_set_location, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetProductUUID", "b", "ay", method_get_product_uuid, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END,
+};
+
+static int connect_bus(Context *c, sd_event *event, sd_bus **_bus) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(c);
+ assert(event);
+ assert(_bus);
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get system bus connection: %m");
+
+ r = sd_bus_add_object_vtable(bus, NULL, "/org/freedesktop/hostname1", "org.freedesktop.hostname1", hostname_vtable, c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register object: %m");
+
+ r = sd_bus_request_name_async(bus, NULL, "org.freedesktop.hostname1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_clear) Context context = {};
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+ mac_selinux_init();
+
+ if (argc != 1) {
+ log_error("This program takes no arguments.");
+ return -EINVAL;
+ }
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ (void) sd_event_set_watchdog(event, true);
+
+ r = sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to install SIGINT handler: %m");
+
+ r = sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to install SIGTERM handler: %m");
+
+ r = connect_bus(&context, event, &bus);
+ if (r < 0)
+ return r;
+
+ r = context_read_data(&context);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read hostname and machine information: %m");
+
+ r = bus_event_loop_with_idle(event, bus, "org.freedesktop.hostname1", DEFAULT_EXIT_USEC, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/hostname/meson.build b/src/hostname/meson.build
new file mode 100644
index 0000000..db66ba5
--- /dev/null
+++ b/src/hostname/meson.build
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+if conf.get('ENABLE_HOSTNAMED') == 1
+ install_data('org.freedesktop.hostname1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.hostname1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.hostname1.policy',
+ install_dir : polkitpolicydir)
+endif
diff --git a/src/hostname/org.freedesktop.hostname1.conf b/src/hostname/org.freedesktop.hostname1.conf
new file mode 100644
index 0000000..e2658c6
--- /dev/null
+++ b/src/hostname/org.freedesktop.hostname1.conf
@@ -0,0 +1,29 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.hostname1"/>
+ <allow send_destination="org.freedesktop.hostname1"/>
+ <allow receive_sender="org.freedesktop.hostname1"/>
+ </policy>
+
+ <policy context="default">
+ <allow send_destination="org.freedesktop.hostname1"/>
+ <allow receive_sender="org.freedesktop.hostname1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/hostname/org.freedesktop.hostname1.policy b/src/hostname/org.freedesktop.hostname1.policy
new file mode 100644
index 0000000..5bedc0b
--- /dev/null
+++ b/src/hostname/org.freedesktop.hostname1.policy
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.hostname1.set-hostname">
+ <description gettext-domain="systemd">Set host name</description>
+ <message gettext-domain="systemd">Authentication is required to set the local host name.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.hostname1.set-static-hostname">
+ <description gettext-domain="systemd">Set static host name</description>
+ <message gettext-domain="systemd">Authentication is required to set the statically configured local host name, as well as the pretty host name.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.hostname1.set-hostname org.freedesktop.hostname1.set-machine-info</annotate>
+ </action>
+
+ <action id="org.freedesktop.hostname1.set-machine-info">
+ <description gettext-domain="systemd">Set machine information</description>
+ <message gettext-domain="systemd">Authentication is required to set local machine information.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.hostname1.get-product-uuid">
+ <description gettext-domain="systemd">Get product UUID</description>
+ <message gettext-domain="systemd">Authentication is required to get product UUID.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/hostname/org.freedesktop.hostname1.service b/src/hostname/org.freedesktop.hostname1.service
new file mode 100644
index 0000000..98c7bcb
--- /dev/null
+++ b/src/hostname/org.freedesktop.hostname1.service
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.hostname1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.hostname1.service
diff --git a/src/hwdb/hwdb.c b/src/hwdb/hwdb.c
new file mode 100644
index 0000000..b069a3e
--- /dev/null
+++ b/src/hwdb/hwdb.c
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+
+#include "sd-hwdb.h"
+
+#include "alloc-util.h"
+#include "hwdb-util.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "selinux-util.h"
+#include "util.h"
+#include "verbs.h"
+
+static const char *arg_hwdb_bin_dir = NULL;
+static const char *arg_root = NULL;
+static bool arg_strict = false;
+
+static int verb_query(int argc, char *argv[], void *userdata) {
+ return hwdb_query(argv[1]);
+}
+
+static int verb_update(int argc, char *argv[], void *userdata) {
+ return hwdb_update(arg_root, arg_hwdb_bin_dir, arg_strict, false);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-hwdb", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s OPTIONS COMMAND\n\n"
+ "Update or query the hardware database.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -s --strict When updating, return non-zero exit value on any parsing error\n"
+ " --usr Generate in " UDEVLIBEXECDIR " instead of /etc/udev\n"
+ " -r --root=PATH Alternative root path in the filesystem\n\n"
+ "Commands:\n"
+ " update Update the hwdb database\n"
+ " query MODALIAS Query database and print result\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_USR,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "usr", no_argument, NULL, ARG_USR },
+ { "strict", no_argument, NULL, 's' },
+ { "root", required_argument, NULL, 'r' },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "ust:r:h", options, NULL)) >= 0)
+ switch(c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_USR:
+ arg_hwdb_bin_dir = UDEVLIBEXECDIR;
+ break;
+
+ case 's':
+ arg_strict = true;
+ break;
+
+ case 'r':
+ arg_root = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ return 1;
+}
+
+static int hwdb_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "update", 1, 1, 0, verb_update },
+ { "query", 2, 2, 0, verb_query },
+ {},
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ mac_selinux_init();
+
+ return hwdb_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/id128/id128.c b/src/id128/id128.c
new file mode 100644
index 0000000..06049e7
--- /dev/null
+++ b/src/id128/id128.c
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "id128-print.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "util.h"
+#include "verbs.h"
+
+static bool arg_pretty = false;
+static sd_id128_t arg_app = {};
+
+static int verb_new(int argc, char **argv, void *userdata) {
+ return id128_print_new(arg_pretty);
+}
+
+static int verb_machine_id(int argc, char **argv, void *userdata) {
+ sd_id128_t id;
+ int r;
+
+ if (sd_id128_is_null(arg_app))
+ r = sd_id128_get_machine(&id);
+ else
+ r = sd_id128_get_machine_app_specific(arg_app, &id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get %smachine-ID: %m",
+ sd_id128_is_null(arg_app) ? "" : "app-specific ");
+
+ return id128_pretty_print(id, arg_pretty);
+}
+
+static int verb_boot_id(int argc, char **argv, void *userdata) {
+ sd_id128_t id;
+ int r;
+
+ if (sd_id128_is_null(arg_app))
+ r = sd_id128_get_boot(&id);
+ else
+ r = sd_id128_get_boot_app_specific(arg_app, &id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get %sboot-ID: %m",
+ sd_id128_is_null(arg_app) ? "" : "app-specific ");
+
+ return id128_pretty_print(id, arg_pretty);
+}
+
+static int verb_invocation_id(int argc, char **argv, void *userdata) {
+ sd_id128_t id;
+ int r;
+
+ if (!sd_id128_is_null(arg_app))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Verb \"invocation-id\" cannot be combined with --app-specific=.");
+
+ r = sd_id128_get_invocation(&id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get invocation-ID: %m");
+
+ return id128_pretty_print(id, arg_pretty);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-id128", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Generate and print id128 strings.\n\n"
+ " -h --help Show this help\n"
+ " -p --pretty Generate samples of program code\n"
+ " -a --app-specific=ID Generate app-specific IDs\n"
+ "\nCommands:\n"
+ " new Generate a new id128 string\n"
+ " machine-id Print the ID of current machine\n"
+ " boot-id Print the ID of current boot\n"
+ " invocation-id Print the ID of current invocation\n"
+ " help Show this help\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int verb_help(int argc, char **argv, void *userdata) {
+ return help();
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "app-specific", required_argument, NULL, 'a' },
+ {},
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hpa:", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'p':
+ arg_pretty = true;
+ break;
+
+ case 'a':
+ r = sd_id128_from_string(optarg, &arg_app);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse \"%s\" as application-ID: %m", optarg);
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int id128_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "new", VERB_ANY, 1, 0, verb_new },
+ { "machine-id", VERB_ANY, 1, 0, verb_machine_id },
+ { "boot-id", VERB_ANY, 1, 0, verb_boot_id },
+ { "invocation-id", VERB_ANY, 1, 0, verb_invocation_id },
+ { "help", VERB_ANY, VERB_ANY, 0, verb_help },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ return id128_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/import/curl-util.c b/src/import/curl-util.c
new file mode 100644
index 0000000..7db03b2
--- /dev/null
+++ b/src/import/curl-util.c
@@ -0,0 +1,416 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "build.h"
+#include "curl-util.h"
+#include "fd-util.h"
+#include "locale-util.h"
+#include "string-util.h"
+
+static void curl_glue_check_finished(CurlGlue *g) {
+ CURLMsg *msg;
+ int k = 0;
+
+ assert(g);
+
+ msg = curl_multi_info_read(g->curl, &k);
+ if (!msg)
+ return;
+
+ if (msg->msg != CURLMSG_DONE)
+ return;
+
+ if (g->on_finished)
+ g->on_finished(g, msg->easy_handle, msg->data.result);
+}
+
+static int curl_glue_on_io(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ CurlGlue *g = userdata;
+ int action, k = 0, translated_fd;
+
+ assert(s);
+ assert(g);
+
+ translated_fd = PTR_TO_FD(hashmap_get(g->translate_fds, FD_TO_PTR(fd)));
+
+ if (FLAGS_SET(revents, EPOLLIN | EPOLLOUT))
+ action = CURL_POLL_INOUT;
+ else if (revents & EPOLLIN)
+ action = CURL_POLL_IN;
+ else if (revents & EPOLLOUT)
+ action = CURL_POLL_OUT;
+ else
+ action = 0;
+
+ if (curl_multi_socket_action(g->curl, translated_fd, action, &k) != CURLM_OK)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to propagate IO event.");
+
+ curl_glue_check_finished(g);
+ return 0;
+}
+
+static int curl_glue_socket_callback(CURLM *curl, curl_socket_t s, int action, void *userdata, void *socketp) {
+ sd_event_source *io;
+ CurlGlue *g = userdata;
+ uint32_t events = 0;
+ int r;
+
+ assert(curl);
+ assert(g);
+
+ io = hashmap_get(g->ios, FD_TO_PTR(s));
+
+ if (action == CURL_POLL_REMOVE) {
+ if (io) {
+ int fd;
+
+ fd = sd_event_source_get_io_fd(io);
+ assert(fd >= 0);
+
+ sd_event_source_set_enabled(io, SD_EVENT_OFF);
+ sd_event_source_unref(io);
+
+ hashmap_remove(g->ios, FD_TO_PTR(s));
+ hashmap_remove(g->translate_fds, FD_TO_PTR(fd));
+
+ safe_close(fd);
+ }
+
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&g->ios, &trivial_hash_ops);
+ if (r < 0) {
+ log_oom();
+ return -1;
+ }
+
+ r = hashmap_ensure_allocated(&g->translate_fds, &trivial_hash_ops);
+ if (r < 0) {
+ log_oom();
+ return -1;
+ }
+
+ if (action == CURL_POLL_IN)
+ events = EPOLLIN;
+ else if (action == CURL_POLL_OUT)
+ events = EPOLLOUT;
+ else if (action == CURL_POLL_INOUT)
+ events = EPOLLIN|EPOLLOUT;
+
+ if (io) {
+ if (sd_event_source_set_io_events(io, events) < 0)
+ return -1;
+
+ if (sd_event_source_set_enabled(io, SD_EVENT_ON) < 0)
+ return -1;
+ } else {
+ _cleanup_close_ int fd = -1;
+
+ /* When curl needs to remove an fd from us it closes
+ * the fd first, and only then calls into us. This is
+ * nasty, since we cannot pass the fd on to epoll()
+ * anymore. Hence, duplicate the fds here, and keep a
+ * copy for epoll which we control after use. */
+
+ fd = fcntl(s, F_DUPFD_CLOEXEC, 3);
+ if (fd < 0)
+ return -1;
+
+ if (sd_event_add_io(g->event, &io, fd, events, curl_glue_on_io, g) < 0)
+ return -1;
+
+ (void) sd_event_source_set_description(io, "curl-io");
+
+ r = hashmap_put(g->ios, FD_TO_PTR(s), io);
+ if (r < 0) {
+ log_oom();
+ sd_event_source_unref(io);
+ return -1;
+ }
+
+ r = hashmap_put(g->translate_fds, FD_TO_PTR(fd), FD_TO_PTR(s));
+ if (r < 0) {
+ log_oom();
+ hashmap_remove(g->ios, FD_TO_PTR(s));
+ sd_event_source_unref(io);
+ return -1;
+ }
+
+ fd = -1;
+ }
+
+ return 0;
+}
+
+static int curl_glue_on_timer(sd_event_source *s, uint64_t usec, void *userdata) {
+ CurlGlue *g = userdata;
+ int k = 0;
+
+ assert(s);
+ assert(g);
+
+ if (curl_multi_socket_action(g->curl, CURL_SOCKET_TIMEOUT, 0, &k) != CURLM_OK)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to propagate timeout.");
+
+ curl_glue_check_finished(g);
+ return 0;
+}
+
+static int curl_glue_timer_callback(CURLM *curl, long timeout_ms, void *userdata) {
+ CurlGlue *g = userdata;
+ usec_t usec;
+
+ assert(curl);
+ assert(g);
+
+ if (timeout_ms < 0) {
+ if (g->timer) {
+ if (sd_event_source_set_enabled(g->timer, SD_EVENT_OFF) < 0)
+ return -1;
+ }
+
+ return 0;
+ }
+
+ usec = now(clock_boottime_or_monotonic()) + (usec_t) timeout_ms * USEC_PER_MSEC + USEC_PER_MSEC - 1;
+
+ if (g->timer) {
+ if (sd_event_source_set_time(g->timer, usec) < 0)
+ return -1;
+
+ if (sd_event_source_set_enabled(g->timer, SD_EVENT_ONESHOT) < 0)
+ return -1;
+ } else {
+ if (sd_event_add_time(g->event, &g->timer, clock_boottime_or_monotonic(), usec, 0, curl_glue_on_timer, g) < 0)
+ return -1;
+
+ (void) sd_event_source_set_description(g->timer, "curl-timer");
+ }
+
+ return 0;
+}
+
+CurlGlue *curl_glue_unref(CurlGlue *g) {
+ sd_event_source *io;
+
+ if (!g)
+ return NULL;
+
+ if (g->curl)
+ curl_multi_cleanup(g->curl);
+
+ while ((io = hashmap_steal_first(g->ios))) {
+ int fd;
+
+ fd = sd_event_source_get_io_fd(io);
+ assert(fd >= 0);
+
+ hashmap_remove(g->translate_fds, FD_TO_PTR(fd));
+
+ safe_close(fd);
+ sd_event_source_unref(io);
+ }
+
+ hashmap_free(g->ios);
+
+ sd_event_source_unref(g->timer);
+ sd_event_unref(g->event);
+ return mfree(g);
+}
+
+int curl_glue_new(CurlGlue **glue, sd_event *event) {
+ _cleanup_(curl_glue_unrefp) CurlGlue *g = NULL;
+ _cleanup_(curl_multi_cleanupp) CURL *c = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ int r;
+
+ if (event)
+ e = sd_event_ref(event);
+ else {
+ r = sd_event_default(&e);
+ if (r < 0)
+ return r;
+ }
+
+ c = curl_multi_init();
+ if (!c)
+ return -ENOMEM;
+
+ g = new(CurlGlue, 1);
+ if (!g)
+ return -ENOMEM;
+
+ *g = (CurlGlue) {
+ .event = TAKE_PTR(e),
+ .curl = TAKE_PTR(c),
+ };
+
+ if (curl_multi_setopt(g->curl, CURLMOPT_SOCKETDATA, g) != CURLM_OK)
+ return -EINVAL;
+
+ if (curl_multi_setopt(g->curl, CURLMOPT_SOCKETFUNCTION, curl_glue_socket_callback) != CURLM_OK)
+ return -EINVAL;
+
+ if (curl_multi_setopt(g->curl, CURLMOPT_TIMERDATA, g) != CURLM_OK)
+ return -EINVAL;
+
+ if (curl_multi_setopt(g->curl, CURLMOPT_TIMERFUNCTION, curl_glue_timer_callback) != CURLM_OK)
+ return -EINVAL;
+
+ *glue = TAKE_PTR(g);
+
+ return 0;
+}
+
+int curl_glue_make(CURL **ret, const char *url, void *userdata) {
+ _cleanup_(curl_easy_cleanupp) CURL *c = NULL;
+ const char *useragent;
+
+ assert(ret);
+ assert(url);
+
+ c = curl_easy_init();
+ if (!c)
+ return -ENOMEM;
+
+ /* curl_easy_setopt(c, CURLOPT_VERBOSE, 1L); */
+
+ if (curl_easy_setopt(c, CURLOPT_URL, url) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(c, CURLOPT_PRIVATE, userdata) != CURLE_OK)
+ return -EIO;
+
+ useragent = strjoina(program_invocation_short_name, "/" GIT_VERSION);
+ if (curl_easy_setopt(c, CURLOPT_USERAGENT, useragent) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(c, CURLOPT_FOLLOWLOCATION, 1L) != CURLE_OK)
+ return -EIO;
+
+ *ret = TAKE_PTR(c);
+ return 0;
+}
+
+int curl_glue_add(CurlGlue *g, CURL *c) {
+ assert(g);
+ assert(c);
+
+ if (curl_multi_add_handle(g->curl, c) != CURLM_OK)
+ return -EIO;
+
+ return 0;
+}
+
+void curl_glue_remove_and_free(CurlGlue *g, CURL *c) {
+ assert(g);
+
+ if (!c)
+ return;
+
+ if (g->curl)
+ curl_multi_remove_handle(g->curl, c);
+
+ curl_easy_cleanup(c);
+}
+
+struct curl_slist *curl_slist_new(const char *first, ...) {
+ struct curl_slist *l;
+ va_list ap;
+
+ if (!first)
+ return NULL;
+
+ l = curl_slist_append(NULL, first);
+ if (!l)
+ return NULL;
+
+ va_start(ap, first);
+
+ for (;;) {
+ struct curl_slist *n;
+ const char *i;
+
+ i = va_arg(ap, const char*);
+ if (!i)
+ break;
+
+ n = curl_slist_append(l, i);
+ if (!n) {
+ va_end(ap);
+ curl_slist_free_all(l);
+ return NULL;
+ }
+
+ l = n;
+ }
+
+ va_end(ap);
+ return l;
+}
+
+int curl_header_strdup(const void *contents, size_t sz, const char *field, char **value) {
+ const char *p;
+ char *s;
+
+ p = memory_startswith_no_case(contents, sz, field);
+ if (!p)
+ return 0;
+
+ sz -= p - (const char*) contents;
+
+ if (memchr(p, 0, sz))
+ return 0;
+
+ /* Skip over preceeding whitespace */
+ while (sz > 0 && strchr(WHITESPACE, p[0])) {
+ p++;
+ sz--;
+ }
+
+ /* Truncate trailing whitespace */
+ while (sz > 0 && strchr(WHITESPACE, p[sz-1]))
+ sz--;
+
+ s = strndup(p, sz);
+ if (!s)
+ return -ENOMEM;
+
+ *value = s;
+ return 1;
+}
+
+int curl_parse_http_time(const char *t, usec_t *ret) {
+ _cleanup_(freelocalep) locale_t loc = (locale_t) 0;
+ const char *e;
+ struct tm tm;
+ time_t v;
+
+ assert(t);
+ assert(ret);
+
+ loc = newlocale(LC_TIME_MASK, "C", (locale_t) 0);
+ if (loc == (locale_t) 0)
+ return -errno;
+
+ /* RFC822 */
+ e = strptime_l(t, "%a, %d %b %Y %H:%M:%S %Z", &tm, loc);
+ if (!e || *e != 0)
+ /* RFC 850 */
+ e = strptime_l(t, "%A, %d-%b-%y %H:%M:%S %Z", &tm, loc);
+ if (!e || *e != 0)
+ /* ANSI C */
+ e = strptime_l(t, "%a %b %d %H:%M:%S %Y", &tm, loc);
+ if (!e || *e != 0)
+ return -EINVAL;
+
+ v = timegm(&tm);
+ if (v == (time_t) -1)
+ return -EINVAL;
+
+ *ret = (usec_t) v * USEC_PER_SEC;
+ return 0;
+}
diff --git a/src/import/curl-util.h b/src/import/curl-util.h
new file mode 100644
index 0000000..a03e844
--- /dev/null
+++ b/src/import/curl-util.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <curl/curl.h>
+#include <sys/types.h>
+
+#include "sd-event.h"
+
+#include "hashmap.h"
+
+typedef struct CurlGlue CurlGlue;
+
+struct CurlGlue {
+ sd_event *event;
+ CURLM *curl;
+ sd_event_source *timer;
+ Hashmap *ios;
+ Hashmap *translate_fds;
+
+ void (*on_finished)(CurlGlue *g, CURL *curl, CURLcode code);
+ void *userdata;
+};
+
+int curl_glue_new(CurlGlue **glue, sd_event *event);
+CurlGlue* curl_glue_unref(CurlGlue *glue);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(CurlGlue*, curl_glue_unref);
+
+int curl_glue_make(CURL **ret, const char *url, void *userdata);
+int curl_glue_add(CurlGlue *g, CURL *c);
+void curl_glue_remove_and_free(CurlGlue *g, CURL *c);
+
+struct curl_slist *curl_slist_new(const char *first, ...) _sentinel_;
+int curl_header_strdup(const void *contents, size_t sz, const char *field, char **value);
+int curl_parse_http_time(const char *t, usec_t *ret);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(CURL*, curl_easy_cleanup);
+DEFINE_TRIVIAL_CLEANUP_FUNC(CURL*, curl_multi_cleanup);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct curl_slist*, curl_slist_free_all);
diff --git a/src/import/export-raw.c b/src/import/export-raw.c
new file mode 100644
index 0000000..6a02b47
--- /dev/null
+++ b/src/import/export-raw.c
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/sendfile.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the POSIX
+ * version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "copy.h"
+#include "export-raw.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "import-common.h"
+#include "missing.h"
+#include "ratelimit.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#define COPY_BUFFER_SIZE (16*1024)
+
+struct RawExport {
+ sd_event *event;
+
+ RawExportFinished on_finished;
+ void *userdata;
+
+ char *path;
+
+ int input_fd;
+ int output_fd;
+
+ ImportCompress compress;
+
+ sd_event_source *output_event_source;
+
+ void *buffer;
+ size_t buffer_size;
+ size_t buffer_allocated;
+
+ uint64_t written_compressed;
+ uint64_t written_uncompressed;
+
+ unsigned last_percent;
+ RateLimit progress_rate_limit;
+
+ struct stat st;
+
+ bool eof;
+ bool tried_reflink;
+ bool tried_sendfile;
+};
+
+RawExport *raw_export_unref(RawExport *e) {
+ if (!e)
+ return NULL;
+
+ sd_event_source_unref(e->output_event_source);
+
+ import_compress_free(&e->compress);
+
+ sd_event_unref(e->event);
+
+ safe_close(e->input_fd);
+
+ free(e->buffer);
+ free(e->path);
+ return mfree(e);
+}
+
+int raw_export_new(
+ RawExport **ret,
+ sd_event *event,
+ RawExportFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(raw_export_unrefp) RawExport *e = NULL;
+ int r;
+
+ assert(ret);
+
+ e = new(RawExport, 1);
+ if (!e)
+ return -ENOMEM;
+
+ *e = (RawExport) {
+ .output_fd = -1,
+ .input_fd = -1,
+ .on_finished = on_finished,
+ .userdata = userdata,
+ .last_percent = (unsigned) -1,
+ };
+
+ RATELIMIT_INIT(e->progress_rate_limit, 100 * USEC_PER_MSEC, 1);
+
+ if (event)
+ e->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&e->event);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(e);
+
+ return 0;
+}
+
+static void raw_export_report_progress(RawExport *e) {
+ unsigned percent;
+ assert(e);
+
+ if (e->written_uncompressed >= (uint64_t) e->st.st_size)
+ percent = 100;
+ else
+ percent = (unsigned) ((e->written_uncompressed * UINT64_C(100)) / (uint64_t) e->st.st_size);
+
+ if (percent == e->last_percent)
+ return;
+
+ if (!ratelimit_below(&e->progress_rate_limit))
+ return;
+
+ sd_notifyf(false, "X_IMPORT_PROGRESS=%u", percent);
+ log_info("Exported %u%%.", percent);
+
+ e->last_percent = percent;
+}
+
+static int raw_export_process(RawExport *e) {
+ ssize_t l;
+ int r;
+
+ assert(e);
+
+ if (!e->tried_reflink && e->compress.type == IMPORT_COMPRESS_UNCOMPRESSED) {
+
+ /* If we shall take an uncompressed snapshot we can
+ * reflink source to destination directly. Let's see
+ * if this works. */
+
+ r = btrfs_reflink(e->input_fd, e->output_fd);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ }
+
+ e->tried_reflink = true;
+ }
+
+ if (!e->tried_sendfile && e->compress.type == IMPORT_COMPRESS_UNCOMPRESSED) {
+
+ l = sendfile(e->output_fd, e->input_fd, NULL, COPY_BUFFER_SIZE);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ e->tried_sendfile = true;
+ } else if (l == 0) {
+ r = 0;
+ goto finish;
+ } else {
+ e->written_uncompressed += l;
+ e->written_compressed += l;
+
+ raw_export_report_progress(e);
+
+ return 0;
+ }
+ }
+
+ while (e->buffer_size <= 0) {
+ uint8_t input[COPY_BUFFER_SIZE];
+
+ if (e->eof) {
+ r = 0;
+ goto finish;
+ }
+
+ l = read(e->input_fd, input, sizeof(input));
+ if (l < 0) {
+ r = log_error_errno(errno, "Failed to read raw file: %m");
+ goto finish;
+ }
+
+ if (l == 0) {
+ e->eof = true;
+ r = import_compress_finish(&e->compress, &e->buffer, &e->buffer_size, &e->buffer_allocated);
+ } else {
+ e->written_uncompressed += l;
+ r = import_compress(&e->compress, input, l, &e->buffer, &e->buffer_size, &e->buffer_allocated);
+ }
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to encode: %m");
+ goto finish;
+ }
+ }
+
+ l = write(e->output_fd, e->buffer, e->buffer_size);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ r = log_error_errno(errno, "Failed to write output file: %m");
+ goto finish;
+ }
+
+ assert((size_t) l <= e->buffer_size);
+ memmove(e->buffer, (uint8_t*) e->buffer + l, e->buffer_size - l);
+ e->buffer_size -= l;
+ e->written_compressed += l;
+
+ raw_export_report_progress(e);
+
+ return 0;
+
+finish:
+ if (r >= 0) {
+ (void) copy_times(e->input_fd, e->output_fd);
+ (void) copy_xattr(e->input_fd, e->output_fd);
+ }
+
+ if (e->on_finished)
+ e->on_finished(e, r, e->userdata);
+ else
+ sd_event_exit(e->event, r);
+
+ return 0;
+}
+
+static int raw_export_on_output(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ RawExport *i = userdata;
+
+ return raw_export_process(i);
+}
+
+static int raw_export_on_defer(sd_event_source *s, void *userdata) {
+ RawExport *i = userdata;
+
+ return raw_export_process(i);
+}
+
+static int reflink_snapshot(int fd, const char *path) {
+ int new_fd, r;
+
+ new_fd = open_parent(path, O_TMPFILE|O_CLOEXEC|O_RDWR, 0600);
+ if (new_fd < 0) {
+ _cleanup_free_ char *t = NULL;
+
+ r = tempfn_random(path, NULL, &t);
+ if (r < 0)
+ return r;
+
+ new_fd = open(t, O_CLOEXEC|O_CREAT|O_NOCTTY|O_RDWR, 0600);
+ if (new_fd < 0)
+ return -errno;
+
+ (void) unlink(t);
+ }
+
+ r = btrfs_reflink(fd, new_fd);
+ if (r < 0) {
+ safe_close(new_fd);
+ return r;
+ }
+
+ return new_fd;
+}
+
+int raw_export_start(RawExport *e, const char *path, int fd, ImportCompressType compress) {
+ _cleanup_close_ int sfd = -1, tfd = -1;
+ int r;
+
+ assert(e);
+ assert(path);
+ assert(fd >= 0);
+ assert(compress < _IMPORT_COMPRESS_TYPE_MAX);
+ assert(compress != IMPORT_COMPRESS_UNKNOWN);
+
+ if (e->output_fd >= 0)
+ return -EBUSY;
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&e->path, path);
+ if (r < 0)
+ return r;
+
+ sfd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (sfd < 0)
+ return -errno;
+
+ if (fstat(sfd, &e->st) < 0)
+ return -errno;
+ r = stat_verify_regular(&e->st);
+ if (r < 0)
+ return r;
+
+ /* Try to take a reflink snapshot of the file, if we can t make the export atomic */
+ tfd = reflink_snapshot(sfd, path);
+ if (tfd >= 0)
+ e->input_fd = TAKE_FD(tfd);
+ else
+ e->input_fd = TAKE_FD(sfd);
+
+ r = import_compress_init(&e->compress, compress);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_io(e->event, &e->output_event_source, fd, EPOLLOUT, raw_export_on_output, e);
+ if (r == -EPERM) {
+ r = sd_event_add_defer(e->event, &e->output_event_source, raw_export_on_defer, e);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(e->output_event_source, SD_EVENT_ON);
+ }
+ if (r < 0)
+ return r;
+
+ e->output_fd = fd;
+ return r;
+}
diff --git a/src/import/export-raw.h b/src/import/export-raw.h
new file mode 100644
index 0000000..196b8ef
--- /dev/null
+++ b/src/import/export-raw.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-event.h"
+
+#include "import-compress.h"
+#include "macro.h"
+
+typedef struct RawExport RawExport;
+
+typedef void (*RawExportFinished)(RawExport *export, int error, void *userdata);
+
+int raw_export_new(RawExport **export, sd_event *event, RawExportFinished on_finished, void *userdata);
+RawExport* raw_export_unref(RawExport *export);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(RawExport*, raw_export_unref);
+
+int raw_export_start(RawExport *export, const char *path, int fd, ImportCompressType compress);
diff --git a/src/import/export-tar.c b/src/import/export-tar.c
new file mode 100644
index 0000000..ed54676
--- /dev/null
+++ b/src/import/export-tar.c
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "export-tar.h"
+#include "fd-util.h"
+#include "import-common.h"
+#include "process-util.h"
+#include "ratelimit.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#define COPY_BUFFER_SIZE (16*1024)
+
+struct TarExport {
+ sd_event *event;
+
+ TarExportFinished on_finished;
+ void *userdata;
+
+ char *path;
+ char *temp_path;
+
+ int output_fd;
+ int tar_fd;
+
+ ImportCompress compress;
+
+ sd_event_source *output_event_source;
+
+ void *buffer;
+ size_t buffer_size;
+ size_t buffer_allocated;
+
+ uint64_t written_compressed;
+ uint64_t written_uncompressed;
+
+ pid_t tar_pid;
+
+ struct stat st;
+ uint64_t quota_referenced;
+
+ unsigned last_percent;
+ RateLimit progress_rate_limit;
+
+ bool eof;
+ bool tried_splice;
+};
+
+TarExport *tar_export_unref(TarExport *e) {
+ if (!e)
+ return NULL;
+
+ sd_event_source_unref(e->output_event_source);
+
+ if (e->tar_pid > 1) {
+ (void) kill_and_sigcont(e->tar_pid, SIGKILL);
+ (void) wait_for_terminate(e->tar_pid, NULL);
+ }
+
+ if (e->temp_path) {
+ (void) btrfs_subvol_remove(e->temp_path, BTRFS_REMOVE_QUOTA);
+ free(e->temp_path);
+ }
+
+ import_compress_free(&e->compress);
+
+ sd_event_unref(e->event);
+
+ safe_close(e->tar_fd);
+
+ free(e->buffer);
+ free(e->path);
+ return mfree(e);
+}
+
+int tar_export_new(
+ TarExport **ret,
+ sd_event *event,
+ TarExportFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(tar_export_unrefp) TarExport *e = NULL;
+ int r;
+
+ assert(ret);
+
+ e = new(TarExport, 1);
+ if (!e)
+ return -ENOMEM;
+
+ *e = (TarExport) {
+ .output_fd = -1,
+ .tar_fd = -1,
+ .on_finished = on_finished,
+ .userdata = userdata,
+ .quota_referenced = (uint64_t) -1,
+ .last_percent = (unsigned) -1,
+ };
+
+ RATELIMIT_INIT(e->progress_rate_limit, 100 * USEC_PER_MSEC, 1);
+
+ if (event)
+ e->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&e->event);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(e);
+
+ return 0;
+}
+
+static void tar_export_report_progress(TarExport *e) {
+ unsigned percent;
+ assert(e);
+
+ /* Do we have any quota info? If not, we don't know anything about the progress */
+ if (e->quota_referenced == (uint64_t) -1)
+ return;
+
+ if (e->written_uncompressed >= e->quota_referenced)
+ percent = 100;
+ else
+ percent = (unsigned) ((e->written_uncompressed * UINT64_C(100)) / e->quota_referenced);
+
+ if (percent == e->last_percent)
+ return;
+
+ if (!ratelimit_below(&e->progress_rate_limit))
+ return;
+
+ sd_notifyf(false, "X_IMPORT_PROGRESS=%u", percent);
+ log_info("Exported %u%%.", percent);
+
+ e->last_percent = percent;
+}
+
+static int tar_export_finish(TarExport *e) {
+ int r;
+
+ assert(e);
+ assert(e->tar_fd >= 0);
+
+ if (e->tar_pid > 0) {
+ r = wait_for_terminate_and_check("tar", e->tar_pid, WAIT_LOG);
+ e->tar_pid = 0;
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EPROTO;
+ }
+
+ e->tar_fd = safe_close(e->tar_fd);
+
+ return 0;
+}
+
+static int tar_export_process(TarExport *e) {
+ ssize_t l;
+ int r;
+
+ assert(e);
+
+ if (!e->tried_splice && e->compress.type == IMPORT_COMPRESS_UNCOMPRESSED) {
+
+ l = splice(e->tar_fd, NULL, e->output_fd, NULL, COPY_BUFFER_SIZE, 0);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ e->tried_splice = true;
+ } else if (l == 0) {
+ r = tar_export_finish(e);
+ goto finish;
+ } else {
+ e->written_uncompressed += l;
+ e->written_compressed += l;
+
+ tar_export_report_progress(e);
+
+ return 0;
+ }
+ }
+
+ while (e->buffer_size <= 0) {
+ uint8_t input[COPY_BUFFER_SIZE];
+
+ if (e->eof) {
+ r = tar_export_finish(e);
+ goto finish;
+ }
+
+ l = read(e->tar_fd, input, sizeof(input));
+ if (l < 0) {
+ r = log_error_errno(errno, "Failed to read tar file: %m");
+ goto finish;
+ }
+
+ if (l == 0) {
+ e->eof = true;
+ r = import_compress_finish(&e->compress, &e->buffer, &e->buffer_size, &e->buffer_allocated);
+ } else {
+ e->written_uncompressed += l;
+ r = import_compress(&e->compress, input, l, &e->buffer, &e->buffer_size, &e->buffer_allocated);
+ }
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to encode: %m");
+ goto finish;
+ }
+ }
+
+ l = write(e->output_fd, e->buffer, e->buffer_size);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ r = log_error_errno(errno, "Failed to write output file: %m");
+ goto finish;
+ }
+
+ assert((size_t) l <= e->buffer_size);
+ memmove(e->buffer, (uint8_t*) e->buffer + l, e->buffer_size - l);
+ e->buffer_size -= l;
+ e->written_compressed += l;
+
+ tar_export_report_progress(e);
+
+ return 0;
+
+finish:
+ if (e->on_finished)
+ e->on_finished(e, r, e->userdata);
+ else
+ sd_event_exit(e->event, r);
+
+ return 0;
+}
+
+static int tar_export_on_output(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ TarExport *i = userdata;
+
+ return tar_export_process(i);
+}
+
+static int tar_export_on_defer(sd_event_source *s, void *userdata) {
+ TarExport *i = userdata;
+
+ return tar_export_process(i);
+}
+
+int tar_export_start(TarExport *e, const char *path, int fd, ImportCompressType compress) {
+ _cleanup_close_ int sfd = -1;
+ int r;
+
+ assert(e);
+ assert(path);
+ assert(fd >= 0);
+ assert(compress < _IMPORT_COMPRESS_TYPE_MAX);
+ assert(compress != IMPORT_COMPRESS_UNKNOWN);
+
+ if (e->output_fd >= 0)
+ return -EBUSY;
+
+ sfd = open(path, O_DIRECTORY|O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (sfd < 0)
+ return -errno;
+
+ if (fstat(sfd, &e->st) < 0)
+ return -errno;
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&e->path, path);
+ if (r < 0)
+ return r;
+
+ e->quota_referenced = (uint64_t) -1;
+
+ if (e->st.st_ino == 256) { /* might be a btrfs subvolume? */
+ BtrfsQuotaInfo q;
+
+ r = btrfs_subvol_get_subtree_quota_fd(sfd, 0, &q);
+ if (r >= 0)
+ e->quota_referenced = q.referenced;
+
+ e->temp_path = mfree(e->temp_path);
+
+ r = tempfn_random(path, NULL, &e->temp_path);
+ if (r < 0)
+ return r;
+
+ /* Let's try to make a snapshot, if we can, so that the export is atomic */
+ r = btrfs_subvol_snapshot_fd(sfd, e->temp_path, BTRFS_SNAPSHOT_READ_ONLY|BTRFS_SNAPSHOT_RECURSIVE);
+ if (r < 0) {
+ log_debug_errno(r, "Couldn't create snapshot %s of %s, not exporting atomically: %m", e->temp_path, path);
+ e->temp_path = mfree(e->temp_path);
+ }
+ }
+
+ r = import_compress_init(&e->compress, compress);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_io(e->event, &e->output_event_source, fd, EPOLLOUT, tar_export_on_output, e);
+ if (r == -EPERM) {
+ r = sd_event_add_defer(e->event, &e->output_event_source, tar_export_on_defer, e);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(e->output_event_source, SD_EVENT_ON);
+ }
+ if (r < 0)
+ return r;
+
+ e->tar_fd = import_fork_tar_c(e->temp_path ?: e->path, &e->tar_pid);
+ if (e->tar_fd < 0) {
+ e->output_event_source = sd_event_source_unref(e->output_event_source);
+ return e->tar_fd;
+ }
+
+ e->output_fd = fd;
+ return r;
+}
diff --git a/src/import/export-tar.h b/src/import/export-tar.h
new file mode 100644
index 0000000..6abb7d3
--- /dev/null
+++ b/src/import/export-tar.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-event.h"
+
+#include "import-compress.h"
+#include "macro.h"
+
+typedef struct TarExport TarExport;
+
+typedef void (*TarExportFinished)(TarExport *export, int error, void *userdata);
+
+int tar_export_new(TarExport **export, sd_event *event, TarExportFinished on_finished, void *userdata);
+TarExport* tar_export_unref(TarExport *export);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(TarExport*, tar_export_unref);
+
+int tar_export_start(TarExport *export, const char *path, int fd, ImportCompressType compress);
diff --git a/src/import/export.c b/src/import/export.c
new file mode 100644
index 0000000..4907106
--- /dev/null
+++ b/src/import/export.c
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "export-raw.h"
+#include "export-tar.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-util.h"
+#include "machine-image.h"
+#include "main-func.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "verbs.h"
+
+static ImportCompressType arg_compress = IMPORT_COMPRESS_UNKNOWN;
+
+static void determine_compression_from_filename(const char *p) {
+
+ if (arg_compress != IMPORT_COMPRESS_UNKNOWN)
+ return;
+
+ if (!p) {
+ arg_compress = IMPORT_COMPRESS_UNCOMPRESSED;
+ return;
+ }
+
+ if (endswith(p, ".xz"))
+ arg_compress = IMPORT_COMPRESS_XZ;
+ else if (endswith(p, ".gz"))
+ arg_compress = IMPORT_COMPRESS_GZIP;
+ else if (endswith(p, ".bz2"))
+ arg_compress = IMPORT_COMPRESS_BZIP2;
+ else
+ arg_compress = IMPORT_COMPRESS_UNCOMPRESSED;
+}
+
+static int interrupt_signal_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ log_notice("Transfer aborted.");
+ sd_event_exit(sd_event_source_get_event(s), EINTR);
+ return 0;
+}
+
+static void on_tar_finished(TarExport *export, int error, void *userdata) {
+ sd_event *event = userdata;
+ assert(export);
+
+ if (error == 0)
+ log_info("Operation completed successfully.");
+
+ sd_event_exit(event, abs(error));
+}
+
+static int export_tar(int argc, char *argv[], void *userdata) {
+ _cleanup_(tar_export_unrefp) TarExport *export = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(image_unrefp) Image *image = NULL;
+ const char *path = NULL, *local = NULL;
+ _cleanup_close_ int open_fd = -1;
+ int r, fd;
+
+ if (machine_name_is_valid(argv[1])) {
+ r = image_find(IMAGE_MACHINE, argv[1], &image);
+ if (r == -ENOENT)
+ return log_error_errno(r, "Machine image %s not found.", argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to look for machine %s: %m", argv[1]);
+
+ local = image->path;
+ } else
+ local = argv[1];
+
+ if (argc >= 3)
+ path = argv[2];
+ if (isempty(path) || streq(path, "-"))
+ path = NULL;
+
+ determine_compression_from_filename(path);
+
+ if (path) {
+ open_fd = open(path, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC|O_NOCTTY, 0666);
+ if (open_fd < 0)
+ return log_error_errno(errno, "Failed to open tar image for export: %m");
+
+ fd = open_fd;
+
+ log_info("Exporting '%s', saving to '%s' with compression '%s'.", local, path, import_compress_type_to_string(arg_compress));
+ } else {
+ _cleanup_free_ char *pretty = NULL;
+
+ fd = STDOUT_FILENO;
+
+ (void) fd_get_path(fd, &pretty);
+ log_info("Exporting '%s', saving to '%s' with compression '%s'.", local, strna(pretty), import_compress_type_to_string(arg_compress));
+ }
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, interrupt_signal_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, interrupt_signal_handler, NULL);
+
+ r = tar_export_new(&export, event, on_tar_finished, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate exporter: %m");
+
+ r = tar_export_start(export, local, fd, arg_compress);
+ if (r < 0)
+ return log_error_errno(r, "Failed to export image: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ log_info("Exiting.");
+ return -r;
+}
+
+static void on_raw_finished(RawExport *export, int error, void *userdata) {
+ sd_event *event = userdata;
+ assert(export);
+
+ if (error == 0)
+ log_info("Operation completed successfully.");
+
+ sd_event_exit(event, abs(error));
+}
+
+static int export_raw(int argc, char *argv[], void *userdata) {
+ _cleanup_(raw_export_unrefp) RawExport *export = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(image_unrefp) Image *image = NULL;
+ const char *path = NULL, *local = NULL;
+ _cleanup_close_ int open_fd = -1;
+ int r, fd;
+
+ if (machine_name_is_valid(argv[1])) {
+ r = image_find(IMAGE_MACHINE, argv[1], &image);
+ if (r == -ENOENT)
+ return log_error_errno(r, "Machine image %s not found.", argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to look for machine %s: %m", argv[1]);
+
+ local = image->path;
+ } else
+ local = argv[1];
+
+ if (argc >= 3)
+ path = argv[2];
+ if (isempty(path) || streq(path, "-"))
+ path = NULL;
+
+ determine_compression_from_filename(path);
+
+ if (path) {
+ open_fd = open(path, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC|O_NOCTTY, 0666);
+ if (open_fd < 0)
+ return log_error_errno(errno, "Failed to open raw image for export: %m");
+
+ fd = open_fd;
+
+ log_info("Exporting '%s', saving to '%s' with compression '%s'.", local, path, import_compress_type_to_string(arg_compress));
+ } else {
+ _cleanup_free_ char *pretty = NULL;
+
+ fd = STDOUT_FILENO;
+
+ (void) fd_get_path(fd, &pretty);
+ log_info("Exporting '%s', saving to '%s' with compression '%s'.", local, strna(pretty), import_compress_type_to_string(arg_compress));
+ }
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, interrupt_signal_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, interrupt_signal_handler, NULL);
+
+ r = raw_export_new(&export, event, on_raw_finished, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate exporter: %m");
+
+ r = raw_export_start(export, local, fd, arg_compress);
+ if (r < 0)
+ return log_error_errno(r, "Failed to export image: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ log_info("Exiting.");
+ return -r;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Export container or virtual machine images.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --format=FORMAT Select format\n\n"
+ "Commands:\n"
+ " tar NAME [FILE] Export a TAR image\n"
+ " raw NAME [FILE] Export a RAW image\n",
+ program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_FORMAT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "format", required_argument, NULL, ARG_FORMAT },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_FORMAT:
+ if (streq(optarg, "uncompressed"))
+ arg_compress = IMPORT_COMPRESS_UNCOMPRESSED;
+ else if (streq(optarg, "xz"))
+ arg_compress = IMPORT_COMPRESS_XZ;
+ else if (streq(optarg, "gzip"))
+ arg_compress = IMPORT_COMPRESS_GZIP;
+ else if (streq(optarg, "bzip2"))
+ arg_compress = IMPORT_COMPRESS_BZIP2;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown format: %s", optarg);
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int export_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "tar", 2, 3, 0, export_tar },
+ { "raw", 2, 3, 0, export_raw },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ (void) ignore_signals(SIGPIPE, -1);
+
+ return export_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/import/import-common.c b/src/import/import-common.c
new file mode 100644
index 0000000..89f0301
--- /dev/null
+++ b/src/import/import-common.c
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sched.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "capability-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "import-common.h"
+#include "os-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+int import_make_read_only_fd(int fd) {
+ int r;
+
+ assert(fd >= 0);
+
+ /* First, let's make this a read-only subvolume if it refers
+ * to a subvolume */
+ r = btrfs_subvol_set_read_only_fd(fd, true);
+ if (IN_SET(r, -ENOTTY, -ENOTDIR, -EINVAL)) {
+ struct stat st;
+
+ /* This doesn't refer to a subvolume, or the file
+ * system isn't even btrfs. In that, case fall back to
+ * chmod()ing */
+
+ r = fstat(fd, &st);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to stat temporary image: %m");
+
+ /* Drop "w" flag */
+ if (fchmod(fd, st.st_mode & 07555) < 0)
+ return log_error_errno(errno, "Failed to chmod() final image: %m");
+
+ return 0;
+
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to make subvolume read-only: %m");
+
+ return 0;
+}
+
+int import_make_read_only(const char *path) {
+ _cleanup_close_ int fd = 1;
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", path);
+
+ return import_make_read_only_fd(fd);
+}
+
+int import_fork_tar_x(const char *path, pid_t *ret) {
+ _cleanup_close_pair_ int pipefd[2] = { -1, -1 };
+ pid_t pid;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ if (pipe2(pipefd, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to create pipe for tar: %m");
+
+ r = safe_fork("(tar)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ uint64_t retain =
+ (1ULL << CAP_CHOWN) |
+ (1ULL << CAP_FOWNER) |
+ (1ULL << CAP_FSETID) |
+ (1ULL << CAP_MKNOD) |
+ (1ULL << CAP_SETFCAP) |
+ (1ULL << CAP_DAC_OVERRIDE);
+
+ /* Child */
+
+ pipefd[1] = safe_close(pipefd[1]);
+
+ r = rearrange_stdio(pipefd[0], -1, STDERR_FILENO);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rearrange stdin/stdout: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (unshare(CLONE_NEWNET) < 0)
+ log_error_errno(errno, "Failed to lock tar into network namespace, ignoring: %m");
+
+ r = capability_bounding_set_drop(retain, true);
+ if (r < 0)
+ log_error_errno(r, "Failed to drop capabilities, ignoring: %m");
+
+ execlp("tar", "tar", "--numeric-owner", "-C", path, "-px", "--xattrs", "--xattrs-include=*", NULL);
+ log_error_errno(errno, "Failed to execute tar: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ *ret = pid;
+
+ return TAKE_FD(pipefd[1]);
+}
+
+int import_fork_tar_c(const char *path, pid_t *ret) {
+ _cleanup_close_pair_ int pipefd[2] = { -1, -1 };
+ pid_t pid;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ if (pipe2(pipefd, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to create pipe for tar: %m");
+
+ r = safe_fork("(tar)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ uint64_t retain = (1ULL << CAP_DAC_OVERRIDE);
+
+ /* Child */
+
+ pipefd[0] = safe_close(pipefd[0]);
+
+ r = rearrange_stdio(-1, pipefd[1], STDERR_FILENO);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rearrange stdin/stdout: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (unshare(CLONE_NEWNET) < 0)
+ log_error_errno(errno, "Failed to lock tar into network namespace, ignoring: %m");
+
+ r = capability_bounding_set_drop(retain, true);
+ if (r < 0)
+ log_error_errno(r, "Failed to drop capabilities, ignoring: %m");
+
+ execlp("tar", "tar", "-C", path, "-c", "--xattrs", "--xattrs-include=*", ".", NULL);
+ log_error_errno(errno, "Failed to execute tar: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ *ret = pid;
+
+ return TAKE_FD(pipefd[0]);
+}
+
+int import_mangle_os_tree(const char *path) {
+ _cleanup_closedir_ DIR *d = NULL, *cd = NULL;
+ _cleanup_free_ char *child = NULL, *t = NULL;
+ const char *joined;
+ struct dirent *de;
+ int r;
+
+ assert(path);
+
+ /* Some tarballs contain a single top-level directory that contains the actual OS directory tree. Try to
+ * recognize this, and move the tree one level up. */
+
+ r = path_is_os_tree(path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether '%s' is an OS tree: %m", path);
+ if (r > 0) {
+ log_debug("Directory tree '%s' is a valid OS tree.", path);
+ return 0;
+ }
+
+ log_debug("Directory tree '%s' is not recognizable as OS tree, checking whether to rearrange it.", path);
+
+ d = opendir(path);
+ if (!d)
+ return log_error_errno(r, "Failed to open directory '%s': %m", path);
+
+ errno = 0;
+ de = readdir_no_dot(d);
+ if (!de) {
+ if (errno != 0)
+ return log_error_errno(errno, "Failed to iterate through directory '%s': %m", path);
+
+ log_debug("Directory '%s' is empty, leaving it as it is.", path);
+ return 0;
+ }
+
+ child = strdup(de->d_name);
+ if (!child)
+ return log_oom();
+
+ errno = 0;
+ de = readdir_no_dot(d);
+ if (de) {
+ if (errno != 0)
+ return log_error_errno(errno, "Failed to iterate through directory '%s': %m", path);
+
+ log_debug("Directory '%s' does not look like a directory tree, and has multiple children, leaving as it is.", path);
+ return 0;
+ }
+
+ joined = strjoina(path, "/", child);
+ r = path_is_os_tree(joined);
+ if (r == -ENOTDIR) {
+ log_debug("Directory '%s' does not look like a directory tree, and contains a single regular file only, leaving as it is.", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether '%s' is an OS tree: %m", joined);
+ if (r == 0) {
+ log_debug("Neither '%s' nor '%s' is a valid OS tree, leaving them as they are.", path, joined);
+ return 0;
+ }
+
+ /* Nice, we have checked now:
+ *
+ * 1. The top-level directory does not qualify as OS tree
+ * 1. The top-level directory only contains one item
+ * 2. That item is a directory
+ * 3. And that directory qualifies as OS tree
+ *
+ * Let's now rearrange things, moving everything in the inner directory one level up */
+
+ cd = xopendirat(dirfd(d), child, O_NOFOLLOW);
+ if (!cd)
+ return log_error_errno(errno, "Can't open directory '%s': %m", joined);
+
+ log_info("Rearranging '%s', moving OS tree one directory up.", joined);
+
+ /* Let's rename the child to an unguessable name so that we can be sure all files contained in it can be
+ * safely moved up and won't collide with the name. */
+ r = tempfn_random(child, NULL, &t);
+ if (r < 0)
+ return log_oom();
+ r = rename_noreplace(dirfd(d), child, dirfd(d), t);
+ if (r < 0)
+ return log_error_errno(r, "Unable to rename '%s' to '%s/%s': %m", joined, path, t);
+
+ FOREACH_DIRENT_ALL(de, cd, return log_error_errno(errno, "Failed to iterate through directory '%s': %m", joined)) {
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ r = rename_noreplace(dirfd(cd), de->d_name, dirfd(d), de->d_name);
+ if (r < 0)
+ return log_error_errno(r, "Unable to move '%s/%s/%s' to '%s/%s': %m", path, t, de->d_name, path, de->d_name);
+ }
+
+ if (unlinkat(dirfd(d), t, AT_REMOVEDIR) < 0)
+ return log_error_errno(errno, "Failed to remove temporary directory '%s/%s': %m", path, t);
+
+ log_info("Successfully rearranged OS tree.");
+
+ return 0;
+}
diff --git a/src/import/import-common.h b/src/import/import-common.h
new file mode 100644
index 0000000..94d224f
--- /dev/null
+++ b/src/import/import-common.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+int import_make_read_only_fd(int fd);
+int import_make_read_only(const char *path);
+
+int import_fork_tar_c(const char *path, pid_t *ret);
+int import_fork_tar_x(const char *path, pid_t *ret);
+
+int import_mangle_os_tree(const char *path);
diff --git a/src/import/import-compress.c b/src/import/import-compress.c
new file mode 100644
index 0000000..3fbd067
--- /dev/null
+++ b/src/import/import-compress.c
@@ -0,0 +1,466 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "import-compress.h"
+#include "string-table.h"
+#include "util.h"
+
+void import_compress_free(ImportCompress *c) {
+ assert(c);
+
+ if (c->type == IMPORT_COMPRESS_XZ)
+ lzma_end(&c->xz);
+ else if (c->type == IMPORT_COMPRESS_GZIP) {
+ if (c->encoding)
+ deflateEnd(&c->gzip);
+ else
+ inflateEnd(&c->gzip);
+#if HAVE_BZIP2
+ } else if (c->type == IMPORT_COMPRESS_BZIP2) {
+ if (c->encoding)
+ BZ2_bzCompressEnd(&c->bzip2);
+ else
+ BZ2_bzDecompressEnd(&c->bzip2);
+#endif
+ }
+
+ c->type = IMPORT_COMPRESS_UNKNOWN;
+}
+
+int import_uncompress_detect(ImportCompress *c, const void *data, size_t size) {
+ static const uint8_t xz_signature[] = {
+ 0xfd, '7', 'z', 'X', 'Z', 0x00
+ };
+ static const uint8_t gzip_signature[] = {
+ 0x1f, 0x8b
+ };
+ static const uint8_t bzip2_signature[] = {
+ 'B', 'Z', 'h'
+ };
+
+ int r;
+
+ assert(c);
+
+ if (c->type != IMPORT_COMPRESS_UNKNOWN)
+ return 1;
+
+ if (size < MAX3(sizeof(xz_signature),
+ sizeof(gzip_signature),
+ sizeof(bzip2_signature)))
+ return 0;
+
+ assert(data);
+
+ if (memcmp(data, xz_signature, sizeof(xz_signature)) == 0) {
+ lzma_ret xzr;
+
+ xzr = lzma_stream_decoder(&c->xz, UINT64_MAX, LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED);
+ if (xzr != LZMA_OK)
+ return -EIO;
+
+ c->type = IMPORT_COMPRESS_XZ;
+
+ } else if (memcmp(data, gzip_signature, sizeof(gzip_signature)) == 0) {
+ r = inflateInit2(&c->gzip, 15+16);
+ if (r != Z_OK)
+ return -EIO;
+
+ c->type = IMPORT_COMPRESS_GZIP;
+
+#if HAVE_BZIP2
+ } else if (memcmp(data, bzip2_signature, sizeof(bzip2_signature)) == 0) {
+ r = BZ2_bzDecompressInit(&c->bzip2, 0, 0);
+ if (r != BZ_OK)
+ return -EIO;
+
+ c->type = IMPORT_COMPRESS_BZIP2;
+#endif
+ } else
+ c->type = IMPORT_COMPRESS_UNCOMPRESSED;
+
+ c->encoding = false;
+
+ return 1;
+}
+
+int import_uncompress(ImportCompress *c, const void *data, size_t size, ImportCompressCallback callback, void *userdata) {
+ int r;
+
+ assert(c);
+ assert(callback);
+
+ r = import_uncompress_detect(c, data, size);
+ if (r <= 0)
+ return r;
+
+ if (c->encoding)
+ return -EINVAL;
+
+ if (size <= 0)
+ return 1;
+
+ assert(data);
+
+ switch (c->type) {
+
+ case IMPORT_COMPRESS_UNCOMPRESSED:
+ r = callback(data, size, userdata);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMPORT_COMPRESS_XZ:
+ c->xz.next_in = data;
+ c->xz.avail_in = size;
+
+ while (c->xz.avail_in > 0) {
+ uint8_t buffer[16 * 1024];
+ lzma_ret lzr;
+
+ c->xz.next_out = buffer;
+ c->xz.avail_out = sizeof(buffer);
+
+ lzr = lzma_code(&c->xz, LZMA_RUN);
+ if (!IN_SET(lzr, LZMA_OK, LZMA_STREAM_END))
+ return -EIO;
+
+ r = callback(buffer, sizeof(buffer) - c->xz.avail_out, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+
+ case IMPORT_COMPRESS_GZIP:
+ c->gzip.next_in = (void*) data;
+ c->gzip.avail_in = size;
+
+ while (c->gzip.avail_in > 0) {
+ uint8_t buffer[16 * 1024];
+
+ c->gzip.next_out = buffer;
+ c->gzip.avail_out = sizeof(buffer);
+
+ r = inflate(&c->gzip, Z_NO_FLUSH);
+ if (!IN_SET(r, Z_OK, Z_STREAM_END))
+ return -EIO;
+
+ r = callback(buffer, sizeof(buffer) - c->gzip.avail_out, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+
+#if HAVE_BZIP2
+ case IMPORT_COMPRESS_BZIP2:
+ c->bzip2.next_in = (void*) data;
+ c->bzip2.avail_in = size;
+
+ while (c->bzip2.avail_in > 0) {
+ uint8_t buffer[16 * 1024];
+
+ c->bzip2.next_out = (char*) buffer;
+ c->bzip2.avail_out = sizeof(buffer);
+
+ r = BZ2_bzDecompress(&c->bzip2);
+ if (!IN_SET(r, BZ_OK, BZ_STREAM_END))
+ return -EIO;
+
+ r = callback(buffer, sizeof(buffer) - c->bzip2.avail_out, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+#endif
+
+ default:
+ assert_not_reached("Unknown compression");
+ }
+
+ return 1;
+}
+
+int import_compress_init(ImportCompress *c, ImportCompressType t) {
+ int r;
+
+ assert(c);
+
+ switch (t) {
+
+ case IMPORT_COMPRESS_XZ: {
+ lzma_ret xzr;
+
+ xzr = lzma_easy_encoder(&c->xz, LZMA_PRESET_DEFAULT, LZMA_CHECK_CRC64);
+ if (xzr != LZMA_OK)
+ return -EIO;
+
+ c->type = IMPORT_COMPRESS_XZ;
+ break;
+ }
+
+ case IMPORT_COMPRESS_GZIP:
+ r = deflateInit2(&c->gzip, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY);
+ if (r != Z_OK)
+ return -EIO;
+
+ c->type = IMPORT_COMPRESS_GZIP;
+ break;
+
+#if HAVE_BZIP2
+ case IMPORT_COMPRESS_BZIP2:
+ r = BZ2_bzCompressInit(&c->bzip2, 9, 0, 0);
+ if (r != BZ_OK)
+ return -EIO;
+
+ c->type = IMPORT_COMPRESS_BZIP2;
+ break;
+#endif
+
+ case IMPORT_COMPRESS_UNCOMPRESSED:
+ c->type = IMPORT_COMPRESS_UNCOMPRESSED;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ c->encoding = true;
+ return 0;
+}
+
+static int enlarge_buffer(void **buffer, size_t *buffer_size, size_t *buffer_allocated) {
+ size_t l;
+ void *p;
+
+ if (*buffer_allocated > *buffer_size)
+ return 0;
+
+ l = MAX(16*1024U, (*buffer_size * 2));
+ p = realloc(*buffer, l);
+ if (!p)
+ return -ENOMEM;
+
+ *buffer = p;
+ *buffer_allocated = l;
+
+ return 1;
+}
+
+int import_compress(ImportCompress *c, const void *data, size_t size, void **buffer, size_t *buffer_size, size_t *buffer_allocated) {
+ int r;
+
+ assert(c);
+ assert(buffer);
+ assert(buffer_size);
+ assert(buffer_allocated);
+
+ if (!c->encoding)
+ return -EINVAL;
+
+ if (size <= 0)
+ return 0;
+
+ assert(data);
+
+ *buffer_size = 0;
+
+ switch (c->type) {
+
+ case IMPORT_COMPRESS_XZ:
+
+ c->xz.next_in = data;
+ c->xz.avail_in = size;
+
+ while (c->xz.avail_in > 0) {
+ lzma_ret lzr;
+
+ r = enlarge_buffer(buffer, buffer_size, buffer_allocated);
+ if (r < 0)
+ return r;
+
+ c->xz.next_out = (uint8_t*) *buffer + *buffer_size;
+ c->xz.avail_out = *buffer_allocated - *buffer_size;
+
+ lzr = lzma_code(&c->xz, LZMA_RUN);
+ if (lzr != LZMA_OK)
+ return -EIO;
+
+ *buffer_size += (*buffer_allocated - *buffer_size) - c->xz.avail_out;
+ }
+
+ break;
+
+ case IMPORT_COMPRESS_GZIP:
+
+ c->gzip.next_in = (void*) data;
+ c->gzip.avail_in = size;
+
+ while (c->gzip.avail_in > 0) {
+ r = enlarge_buffer(buffer, buffer_size, buffer_allocated);
+ if (r < 0)
+ return r;
+
+ c->gzip.next_out = (uint8_t*) *buffer + *buffer_size;
+ c->gzip.avail_out = *buffer_allocated - *buffer_size;
+
+ r = deflate(&c->gzip, Z_NO_FLUSH);
+ if (r != Z_OK)
+ return -EIO;
+
+ *buffer_size += (*buffer_allocated - *buffer_size) - c->gzip.avail_out;
+ }
+
+ break;
+
+#if HAVE_BZIP2
+ case IMPORT_COMPRESS_BZIP2:
+
+ c->bzip2.next_in = (void*) data;
+ c->bzip2.avail_in = size;
+
+ while (c->bzip2.avail_in > 0) {
+ r = enlarge_buffer(buffer, buffer_size, buffer_allocated);
+ if (r < 0)
+ return r;
+
+ c->bzip2.next_out = (void*) ((uint8_t*) *buffer + *buffer_size);
+ c->bzip2.avail_out = *buffer_allocated - *buffer_size;
+
+ r = BZ2_bzCompress(&c->bzip2, BZ_RUN);
+ if (r != BZ_RUN_OK)
+ return -EIO;
+
+ *buffer_size += (*buffer_allocated - *buffer_size) - c->bzip2.avail_out;
+ }
+
+ break;
+#endif
+
+ case IMPORT_COMPRESS_UNCOMPRESSED:
+
+ if (*buffer_allocated < size) {
+ void *p;
+
+ p = realloc(*buffer, size);
+ if (!p)
+ return -ENOMEM;
+
+ *buffer = p;
+ *buffer_allocated = size;
+ }
+
+ memcpy(*buffer, data, size);
+ *buffer_size = size;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int import_compress_finish(ImportCompress *c, void **buffer, size_t *buffer_size, size_t *buffer_allocated) {
+ int r;
+
+ assert(c);
+ assert(buffer);
+ assert(buffer_size);
+ assert(buffer_allocated);
+
+ if (!c->encoding)
+ return -EINVAL;
+
+ *buffer_size = 0;
+
+ switch (c->type) {
+
+ case IMPORT_COMPRESS_XZ: {
+ lzma_ret lzr;
+
+ c->xz.avail_in = 0;
+
+ do {
+ r = enlarge_buffer(buffer, buffer_size, buffer_allocated);
+ if (r < 0)
+ return r;
+
+ c->xz.next_out = (uint8_t*) *buffer + *buffer_size;
+ c->xz.avail_out = *buffer_allocated - *buffer_size;
+
+ lzr = lzma_code(&c->xz, LZMA_FINISH);
+ if (!IN_SET(lzr, LZMA_OK, LZMA_STREAM_END))
+ return -EIO;
+
+ *buffer_size += (*buffer_allocated - *buffer_size) - c->xz.avail_out;
+ } while (lzr != LZMA_STREAM_END);
+
+ break;
+ }
+
+ case IMPORT_COMPRESS_GZIP:
+ c->gzip.avail_in = 0;
+
+ do {
+ r = enlarge_buffer(buffer, buffer_size, buffer_allocated);
+ if (r < 0)
+ return r;
+
+ c->gzip.next_out = (uint8_t*) *buffer + *buffer_size;
+ c->gzip.avail_out = *buffer_allocated - *buffer_size;
+
+ r = deflate(&c->gzip, Z_FINISH);
+ if (!IN_SET(r, Z_OK, Z_STREAM_END))
+ return -EIO;
+
+ *buffer_size += (*buffer_allocated - *buffer_size) - c->gzip.avail_out;
+ } while (r != Z_STREAM_END);
+
+ break;
+
+#if HAVE_BZIP2
+ case IMPORT_COMPRESS_BZIP2:
+ c->bzip2.avail_in = 0;
+
+ do {
+ r = enlarge_buffer(buffer, buffer_size, buffer_allocated);
+ if (r < 0)
+ return r;
+
+ c->bzip2.next_out = (void*) ((uint8_t*) *buffer + *buffer_size);
+ c->bzip2.avail_out = *buffer_allocated - *buffer_size;
+
+ r = BZ2_bzCompress(&c->bzip2, BZ_FINISH);
+ if (!IN_SET(r, BZ_FINISH_OK, BZ_STREAM_END))
+ return -EIO;
+
+ *buffer_size += (*buffer_allocated - *buffer_size) - c->bzip2.avail_out;
+ } while (r != BZ_STREAM_END);
+
+ break;
+#endif
+
+ case IMPORT_COMPRESS_UNCOMPRESSED:
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static const char* const import_compress_type_table[_IMPORT_COMPRESS_TYPE_MAX] = {
+ [IMPORT_COMPRESS_UNKNOWN] = "unknown",
+ [IMPORT_COMPRESS_UNCOMPRESSED] = "uncompressed",
+ [IMPORT_COMPRESS_XZ] = "xz",
+ [IMPORT_COMPRESS_GZIP] = "gzip",
+#if HAVE_BZIP2
+ [IMPORT_COMPRESS_BZIP2] = "bzip2",
+#endif
+};
+
+DEFINE_STRING_TABLE_LOOKUP(import_compress_type, ImportCompressType);
diff --git a/src/import/import-compress.h b/src/import/import-compress.h
new file mode 100644
index 0000000..859bd0e
--- /dev/null
+++ b/src/import/import-compress.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if HAVE_BZIP2
+#include <bzlib.h>
+#endif
+#include <lzma.h>
+#include <sys/types.h>
+#include <zlib.h>
+
+#include "macro.h"
+
+typedef enum ImportCompressType {
+ IMPORT_COMPRESS_UNKNOWN,
+ IMPORT_COMPRESS_UNCOMPRESSED,
+ IMPORT_COMPRESS_XZ,
+ IMPORT_COMPRESS_GZIP,
+ IMPORT_COMPRESS_BZIP2,
+ _IMPORT_COMPRESS_TYPE_MAX,
+ _IMPORT_COMPRESS_TYPE_INVALID = -1,
+} ImportCompressType;
+
+typedef struct ImportCompress {
+ ImportCompressType type;
+ bool encoding;
+ union {
+ lzma_stream xz;
+ z_stream gzip;
+#if HAVE_BZIP2
+ bz_stream bzip2;
+#endif
+ };
+} ImportCompress;
+
+typedef int (*ImportCompressCallback)(const void *data, size_t size, void *userdata);
+
+void import_compress_free(ImportCompress *c);
+
+int import_uncompress_detect(ImportCompress *c, const void *data, size_t size);
+int import_uncompress(ImportCompress *c, const void *data, size_t size, ImportCompressCallback callback, void *userdata);
+
+int import_compress_init(ImportCompress *c, ImportCompressType t);
+int import_compress(ImportCompress *c, const void *data, size_t size, void **buffer, size_t *buffer_size, size_t *buffer_allocated);
+int import_compress_finish(ImportCompress *c, void **buffer, size_t *buffer_size, size_t *buffer_allocated);
+
+const char* import_compress_type_to_string(ImportCompressType t) _const_;
+ImportCompressType import_compress_type_from_string(const char *s) _pure_;
diff --git a/src/import/import-fs.c b/src/import/import-fs.c
new file mode 100644
index 0000000..35ba6ba
--- /dev/null
+++ b/src/import/import-fs.c
@@ -0,0 +1,327 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-common.h"
+#include "import-util.h"
+#include "machine-image.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "ratelimit.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "verbs.h"
+
+static bool arg_force = false;
+static bool arg_read_only = false;
+static const char *arg_image_root = "/var/lib/machines";
+
+typedef struct ProgressInfo {
+ RateLimit limit;
+ char *path;
+ uint64_t size;
+ bool started;
+ bool logged_incomplete;
+} ProgressInfo;
+
+static volatile sig_atomic_t cancelled = false;
+
+static void sigterm_sigint(int sig) {
+ cancelled = true;
+}
+
+static void progress_info_free(ProgressInfo *p) {
+ free(p->path);
+}
+
+static void progress_show(ProgressInfo *p) {
+ assert(p);
+
+ /* Show progress only every now and then. */
+ if (!ratelimit_below(&p->limit))
+ return;
+
+ /* Suppress the first message, start with the second one */
+ if (!p->started) {
+ p->started = true;
+ return;
+ }
+
+ /* Mention the list is incomplete before showing first output. */
+ if (!p->logged_incomplete) {
+ log_notice("(Note, file list shown below is incomplete, and is intended as sporadic progress report only.)");
+ p->logged_incomplete = true;
+ }
+
+ if (p->size == 0)
+ log_info("Copying tree, currently at '%s'...", p->path);
+ else {
+ char buffer[FORMAT_BYTES_MAX];
+
+ log_info("Copying tree, currently at '%s' (@%s)...", p->path, format_bytes(buffer, sizeof(buffer), p->size));
+ }
+}
+
+static int progress_path(const char *path, const struct stat *st, void *userdata) {
+ ProgressInfo *p = userdata;
+ int r;
+
+ assert(p);
+
+ if (cancelled)
+ return -EOWNERDEAD;
+
+ r = free_and_strdup(&p->path, path);
+ if (r < 0)
+ return r;
+
+ p->size = 0;
+
+ progress_show(p);
+ return 0;
+}
+
+static int progress_bytes(uint64_t nbytes, void *userdata) {
+ ProgressInfo *p = userdata;
+
+ assert(p);
+ assert(p->size != UINT64_MAX);
+
+ if (cancelled)
+ return -EOWNERDEAD;
+
+ p->size += nbytes;
+
+ progress_show(p);
+ return 0;
+}
+
+static int import_fs(int argc, char *argv[], void *userdata) {
+ _cleanup_(rm_rf_subvolume_and_freep) char *temp_path = NULL;
+ _cleanup_(progress_info_free) ProgressInfo progress = {};
+ const char *path = NULL, *local = NULL, *final_path;
+ _cleanup_close_ int open_fd = -1;
+ struct sigaction old_sigint_sa, old_sigterm_sa;
+ static const struct sigaction sa = {
+ .sa_handler = sigterm_sigint,
+ .sa_flags = SA_RESTART,
+ };
+ int r, fd;
+
+ if (argc >= 2)
+ path = argv[1];
+ if (isempty(path) || streq(path, "-"))
+ path = NULL;
+
+ if (argc >= 3)
+ local = argv[2];
+ else if (path)
+ local = basename(path);
+ if (isempty(local) || streq(local, "-"))
+ local = NULL;
+
+ if (local) {
+ if (!machine_name_is_valid(local)) {
+ log_error("Local image name '%s' is not valid.", local);
+ return -EINVAL;
+ }
+
+ if (!arg_force) {
+ r = image_find(IMAGE_MACHINE, local, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local);
+ } else {
+ log_error("Image '%s' already exists.", local);
+ return -EEXIST;
+ }
+ }
+ } else
+ local = "imported";
+
+ if (path) {
+ open_fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (open_fd < 0)
+ return log_error_errno(errno, "Failed to open directory to import: %m");
+
+ fd = open_fd;
+
+ log_info("Importing '%s', saving as '%s'.", path, local);
+ } else {
+ _cleanup_free_ char *pretty = NULL;
+
+ fd = STDIN_FILENO;
+
+ (void) fd_get_path(fd, &pretty);
+ log_info("Importing '%s', saving as '%s'.", strempty(pretty), local);
+ }
+
+ final_path = strjoina(arg_image_root, "/", local);
+
+ r = tempfn_random(final_path, NULL, &temp_path);
+ if (r < 0)
+ return log_oom();
+
+ (void) mkdir_parents_label(temp_path, 0700);
+
+ RATELIMIT_INIT(progress.limit, 200*USEC_PER_MSEC, 1);
+
+ /* Hook into SIGINT/SIGTERM, so that we can cancel things then */
+ assert(sigaction(SIGINT, &sa, &old_sigint_sa) >= 0);
+ assert(sigaction(SIGTERM, &sa, &old_sigterm_sa) >= 0);
+
+ r = btrfs_subvol_snapshot_fd_full(
+ fd,
+ temp_path,
+ BTRFS_SNAPSHOT_FALLBACK_COPY|BTRFS_SNAPSHOT_RECURSIVE|BTRFS_SNAPSHOT_FALLBACK_DIRECTORY|BTRFS_SNAPSHOT_QUOTA,
+ progress_path,
+ progress_bytes,
+ &progress);
+ if (r == -EOWNERDEAD) { /* SIGINT + SIGTERM cause this, see signal handler above */
+ log_error("Copy cancelled.");
+ goto finish;
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to copy directory: %m");
+ goto finish;
+ }
+
+ r = import_mangle_os_tree(temp_path);
+ if (r < 0)
+ goto finish;
+
+ (void) import_assign_pool_quota_and_warn(temp_path);
+
+ if (arg_read_only) {
+ r = import_make_read_only(temp_path);
+ if (r < 0) {
+ log_error_errno(r, "Failed to make directory read-only: %m");
+ goto finish;
+ }
+ }
+
+ if (arg_force)
+ (void) rm_rf(final_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+
+ r = rename_noreplace(AT_FDCWD, temp_path, AT_FDCWD, final_path);
+ if (r < 0) {
+ log_error_errno(r, "Failed to move image into place: %m");
+ goto finish;
+ }
+
+ temp_path = mfree(temp_path);
+
+ log_info("Exiting.");
+
+finish:
+ /* Put old signal handlers into place */
+ assert(sigaction(SIGINT, &old_sigint_sa, NULL) >= 0);
+ assert(sigaction(SIGTERM, &old_sigterm_sa, NULL) >= 0);
+
+ return 0;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Import container images from a file system.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --force Force creation of image\n"
+ " --image-root=PATH Image root directory\n"
+ " --read-only Create a read-only image\n\n"
+ "Commands:\n"
+ " run DIRECTORY [NAME] Import a directory\n",
+ program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_FORCE,
+ ARG_IMAGE_ROOT,
+ ARG_READ_ONLY,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "force", no_argument, NULL, ARG_FORCE },
+ { "image-root", required_argument, NULL, ARG_IMAGE_ROOT },
+ { "read-only", no_argument, NULL, ARG_READ_ONLY },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_FORCE:
+ arg_force = true;
+ break;
+
+ case ARG_IMAGE_ROOT:
+ arg_image_root = optarg;
+ break;
+
+ case ARG_READ_ONLY:
+ arg_read_only = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int import_fs_main(int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "run", 2, 3, 0, import_fs },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+int main(int argc, char *argv[]) {
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ goto finish;
+
+ r = import_fs_main(argc, argv);
+
+finish:
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/import/import-pubring.gpg b/src/import/import-pubring.gpg
new file mode 100644
index 0000000..be27776
--- /dev/null
+++ b/src/import/import-pubring.gpg
Binary files differ
diff --git a/src/import/import-raw.c b/src/import/import-raw.c
new file mode 100644
index 0000000..4b11615
--- /dev/null
+++ b/src/import/import-raw.c
@@ -0,0 +1,438 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <linux/fs.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "copy.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-common.h"
+#include "import-compress.h"
+#include "import-raw.h"
+#include "io-util.h"
+#include "machine-pool.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "qcow2-util.h"
+#include "ratelimit.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+struct RawImport {
+ sd_event *event;
+
+ char *image_root;
+
+ RawImportFinished on_finished;
+ void *userdata;
+
+ char *local;
+ bool force_local;
+ bool read_only;
+
+ char *temp_path;
+ char *final_path;
+
+ int input_fd;
+ int output_fd;
+
+ ImportCompress compress;
+
+ sd_event_source *input_event_source;
+
+ uint8_t buffer[16*1024];
+ size_t buffer_size;
+
+ uint64_t written_compressed;
+ uint64_t written_uncompressed;
+
+ struct stat st;
+
+ unsigned last_percent;
+ RateLimit progress_rate_limit;
+};
+
+RawImport* raw_import_unref(RawImport *i) {
+ if (!i)
+ return NULL;
+
+ sd_event_unref(i->event);
+
+ if (i->temp_path) {
+ (void) unlink(i->temp_path);
+ free(i->temp_path);
+ }
+
+ import_compress_free(&i->compress);
+
+ sd_event_source_unref(i->input_event_source);
+
+ safe_close(i->output_fd);
+
+ free(i->final_path);
+ free(i->image_root);
+ free(i->local);
+ return mfree(i);
+}
+
+int raw_import_new(
+ RawImport **ret,
+ sd_event *event,
+ const char *image_root,
+ RawImportFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(raw_import_unrefp) RawImport *i = NULL;
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ assert(ret);
+
+ root = strdup(image_root ?: "/var/lib/machines");
+ if (!root)
+ return -ENOMEM;
+
+ i = new(RawImport, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (RawImport) {
+ .input_fd = -1,
+ .output_fd = -1,
+ .on_finished = on_finished,
+ .userdata = userdata,
+ .last_percent = (unsigned) -1,
+ .image_root = TAKE_PTR(root),
+ };
+
+ RATELIMIT_INIT(i->progress_rate_limit, 100 * USEC_PER_MSEC, 1);
+
+ if (event)
+ i->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&i->event);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(i);
+
+ return 0;
+}
+
+static void raw_import_report_progress(RawImport *i) {
+ unsigned percent;
+ assert(i);
+
+ /* We have no size information, unless the source is a regular file */
+ if (!S_ISREG(i->st.st_mode))
+ return;
+
+ if (i->written_compressed >= (uint64_t) i->st.st_size)
+ percent = 100;
+ else
+ percent = (unsigned) ((i->written_compressed * UINT64_C(100)) / (uint64_t) i->st.st_size);
+
+ if (percent == i->last_percent)
+ return;
+
+ if (!ratelimit_below(&i->progress_rate_limit))
+ return;
+
+ sd_notifyf(false, "X_IMPORT_PROGRESS=%u", percent);
+ log_info("Imported %u%%.", percent);
+
+ i->last_percent = percent;
+}
+
+static int raw_import_maybe_convert_qcow2(RawImport *i) {
+ _cleanup_close_ int converted_fd = -1;
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(i);
+
+ r = qcow2_detect(i->output_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to detect whether this is a QCOW2 image: %m");
+ if (r == 0)
+ return 0;
+
+ /* This is a QCOW2 image, let's convert it */
+ r = tempfn_random(i->final_path, NULL, &t);
+ if (r < 0)
+ return log_oom();
+
+ converted_fd = open(t, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0664);
+ if (converted_fd < 0)
+ return log_error_errno(errno, "Failed to create %s: %m", t);
+
+ r = chattr_fd(converted_fd, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set file attributes on %s: %m", t);
+
+ log_info("Unpacking QCOW2 file.");
+
+ r = qcow2_convert(i->output_fd, converted_fd);
+ if (r < 0) {
+ unlink(t);
+ return log_error_errno(r, "Failed to convert qcow2 image: %m");
+ }
+
+ (void) unlink(i->temp_path);
+ free_and_replace(i->temp_path, t);
+
+ safe_close(i->output_fd);
+ i->output_fd = TAKE_FD(converted_fd);
+
+ return 1;
+}
+
+static int raw_import_finish(RawImport *i) {
+ int r;
+
+ assert(i);
+ assert(i->output_fd >= 0);
+ assert(i->temp_path);
+ assert(i->final_path);
+
+ /* In case this was a sparse file, make sure the file system is right */
+ if (i->written_uncompressed > 0) {
+ if (ftruncate(i->output_fd, i->written_uncompressed) < 0)
+ return log_error_errno(errno, "Failed to truncate file: %m");
+ }
+
+ r = raw_import_maybe_convert_qcow2(i);
+ if (r < 0)
+ return r;
+
+ if (S_ISREG(i->st.st_mode)) {
+ (void) copy_times(i->input_fd, i->output_fd);
+ (void) copy_xattr(i->input_fd, i->output_fd);
+ }
+
+ if (i->read_only) {
+ r = import_make_read_only_fd(i->output_fd);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->force_local)
+ (void) rm_rf(i->final_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+
+ r = rename_noreplace(AT_FDCWD, i->temp_path, AT_FDCWD, i->final_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move image into place: %m");
+
+ i->temp_path = mfree(i->temp_path);
+
+ return 0;
+}
+
+static int raw_import_open_disk(RawImport *i) {
+ int r;
+
+ assert(i);
+
+ assert(!i->final_path);
+ assert(!i->temp_path);
+ assert(i->output_fd < 0);
+
+ i->final_path = strjoin(i->image_root, "/", i->local, ".raw");
+ if (!i->final_path)
+ return log_oom();
+
+ r = tempfn_random(i->final_path, NULL, &i->temp_path);
+ if (r < 0)
+ return log_oom();
+
+ (void) mkdir_parents_label(i->temp_path, 0700);
+
+ i->output_fd = open(i->temp_path, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0664);
+ if (i->output_fd < 0)
+ return log_error_errno(errno, "Failed to open destination %s: %m", i->temp_path);
+
+ r = chattr_fd(i->output_fd, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set file attributes on %s: %m", i->temp_path);
+
+ return 0;
+}
+
+static int raw_import_try_reflink(RawImport *i) {
+ off_t p;
+ int r;
+
+ assert(i);
+ assert(i->input_fd >= 0);
+ assert(i->output_fd >= 0);
+
+ if (i->compress.type != IMPORT_COMPRESS_UNCOMPRESSED)
+ return 0;
+
+ if (!S_ISREG(i->st.st_mode))
+ return 0;
+
+ p = lseek(i->input_fd, 0, SEEK_CUR);
+ if (p == (off_t) -1)
+ return log_error_errno(errno, "Failed to read file offset of input file: %m");
+
+ /* Let's only try a btrfs reflink, if we are reading from the beginning of the file */
+ if ((uint64_t) p != (uint64_t) i->buffer_size)
+ return 0;
+
+ r = btrfs_reflink(i->input_fd, i->output_fd);
+ if (r >= 0)
+ return 1;
+
+ return 0;
+}
+
+static int raw_import_write(const void *p, size_t sz, void *userdata) {
+ RawImport *i = userdata;
+ ssize_t n;
+
+ n = sparse_write(i->output_fd, p, sz, 64);
+ if (n < 0)
+ return (int) n;
+ if ((size_t) n < sz)
+ return -EIO;
+
+ i->written_uncompressed += sz;
+
+ return 0;
+}
+
+static int raw_import_process(RawImport *i) {
+ ssize_t l;
+ int r;
+
+ assert(i);
+ assert(i->buffer_size < sizeof(i->buffer));
+
+ l = read(i->input_fd, i->buffer + i->buffer_size, sizeof(i->buffer) - i->buffer_size);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ r = log_error_errno(errno, "Failed to read input file: %m");
+ goto finish;
+ }
+ if (l == 0) {
+ if (i->compress.type == IMPORT_COMPRESS_UNKNOWN) {
+ log_error("Premature end of file.");
+ r = -EIO;
+ goto finish;
+ }
+
+ r = raw_import_finish(i);
+ goto finish;
+ }
+
+ i->buffer_size += l;
+
+ if (i->compress.type == IMPORT_COMPRESS_UNKNOWN) {
+ r = import_uncompress_detect(&i->compress, i->buffer, i->buffer_size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to detect file compression: %m");
+ goto finish;
+ }
+ if (r == 0) /* Need more data */
+ return 0;
+
+ r = raw_import_open_disk(i);
+ if (r < 0)
+ goto finish;
+
+ r = raw_import_try_reflink(i);
+ if (r < 0)
+ goto finish;
+ if (r > 0) {
+ r = raw_import_finish(i);
+ goto finish;
+ }
+ }
+
+ r = import_uncompress(&i->compress, i->buffer, i->buffer_size, raw_import_write, i);
+ if (r < 0) {
+ log_error_errno(r, "Failed to decode and write: %m");
+ goto finish;
+ }
+
+ i->written_compressed += i->buffer_size;
+ i->buffer_size = 0;
+
+ raw_import_report_progress(i);
+
+ return 0;
+
+finish:
+ if (i->on_finished)
+ i->on_finished(i, r, i->userdata);
+ else
+ sd_event_exit(i->event, r);
+
+ return 0;
+}
+
+static int raw_import_on_input(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ RawImport *i = userdata;
+
+ return raw_import_process(i);
+}
+
+static int raw_import_on_defer(sd_event_source *s, void *userdata) {
+ RawImport *i = userdata;
+
+ return raw_import_process(i);
+}
+
+int raw_import_start(RawImport *i, int fd, const char *local, bool force_local, bool read_only) {
+ int r;
+
+ assert(i);
+ assert(fd >= 0);
+ assert(local);
+
+ if (!machine_name_is_valid(local))
+ return -EINVAL;
+
+ if (i->input_fd >= 0)
+ return -EBUSY;
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&i->local, local);
+ if (r < 0)
+ return r;
+ i->force_local = force_local;
+ i->read_only = read_only;
+
+ if (fstat(fd, &i->st) < 0)
+ return -errno;
+
+ r = sd_event_add_io(i->event, &i->input_event_source, fd, EPOLLIN, raw_import_on_input, i);
+ if (r == -EPERM) {
+ /* This fd does not support epoll, for example because it is a regular file. Busy read in that case */
+ r = sd_event_add_defer(i->event, &i->input_event_source, raw_import_on_defer, i);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(i->input_event_source, SD_EVENT_ON);
+ }
+ if (r < 0)
+ return r;
+
+ i->input_fd = fd;
+ return r;
+}
diff --git a/src/import/import-raw.h b/src/import/import-raw.h
new file mode 100644
index 0000000..de4c3ea
--- /dev/null
+++ b/src/import/import-raw.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-event.h"
+
+#include "import-util.h"
+#include "macro.h"
+
+typedef struct RawImport RawImport;
+
+typedef void (*RawImportFinished)(RawImport *import, int error, void *userdata);
+
+int raw_import_new(RawImport **import, sd_event *event, const char *image_root, RawImportFinished on_finished, void *userdata);
+RawImport* raw_import_unref(RawImport *import);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(RawImport*, raw_import_unref);
+
+int raw_import_start(RawImport *i, int fd, const char *local, bool force_local, bool read_only);
diff --git a/src/import/import-tar.c b/src/import/import-tar.c
new file mode 100644
index 0000000..4d0d925
--- /dev/null
+++ b/src/import/import-tar.c
@@ -0,0 +1,369 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <linux/fs.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "copy.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-common.h"
+#include "import-compress.h"
+#include "import-tar.h"
+#include "io-util.h"
+#include "machine-pool.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "qcow2-util.h"
+#include "ratelimit.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+struct TarImport {
+ sd_event *event;
+
+ char *image_root;
+
+ TarImportFinished on_finished;
+ void *userdata;
+
+ char *local;
+ bool force_local;
+ bool read_only;
+
+ char *temp_path;
+ char *final_path;
+
+ int input_fd;
+ int tar_fd;
+
+ ImportCompress compress;
+
+ sd_event_source *input_event_source;
+
+ uint8_t buffer[16*1024];
+ size_t buffer_size;
+
+ uint64_t written_compressed;
+ uint64_t written_uncompressed;
+
+ struct stat st;
+
+ pid_t tar_pid;
+
+ unsigned last_percent;
+ RateLimit progress_rate_limit;
+};
+
+TarImport* tar_import_unref(TarImport *i) {
+ if (!i)
+ return NULL;
+
+ sd_event_source_unref(i->input_event_source);
+
+ if (i->tar_pid > 1) {
+ (void) kill_and_sigcont(i->tar_pid, SIGKILL);
+ (void) wait_for_terminate(i->tar_pid, NULL);
+ }
+
+ if (i->temp_path) {
+ (void) rm_rf(i->temp_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ free(i->temp_path);
+ }
+
+ import_compress_free(&i->compress);
+
+ sd_event_unref(i->event);
+
+ safe_close(i->tar_fd);
+
+ free(i->final_path);
+ free(i->image_root);
+ free(i->local);
+ return mfree(i);
+}
+
+int tar_import_new(
+ TarImport **ret,
+ sd_event *event,
+ const char *image_root,
+ TarImportFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(tar_import_unrefp) TarImport *i = NULL;
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ assert(ret);
+
+ root = strdup(image_root ?: "/var/lib/machines");
+ if (!root)
+ return -ENOMEM;
+
+ i = new(TarImport, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (TarImport) {
+ .input_fd = -1,
+ .tar_fd = -1,
+ .on_finished = on_finished,
+ .userdata = userdata,
+ .last_percent = (unsigned) -1,
+ .image_root = TAKE_PTR(root),
+ };
+
+ RATELIMIT_INIT(i->progress_rate_limit, 100 * USEC_PER_MSEC, 1);
+
+ if (event)
+ i->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&i->event);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(i);
+
+ return 0;
+}
+
+static void tar_import_report_progress(TarImport *i) {
+ unsigned percent;
+ assert(i);
+
+ /* We have no size information, unless the source is a regular file */
+ if (!S_ISREG(i->st.st_mode))
+ return;
+
+ if (i->written_compressed >= (uint64_t) i->st.st_size)
+ percent = 100;
+ else
+ percent = (unsigned) ((i->written_compressed * UINT64_C(100)) / (uint64_t) i->st.st_size);
+
+ if (percent == i->last_percent)
+ return;
+
+ if (!ratelimit_below(&i->progress_rate_limit))
+ return;
+
+ sd_notifyf(false, "X_IMPORT_PROGRESS=%u", percent);
+ log_info("Imported %u%%.", percent);
+
+ i->last_percent = percent;
+}
+
+static int tar_import_finish(TarImport *i) {
+ int r;
+
+ assert(i);
+ assert(i->tar_fd >= 0);
+ assert(i->temp_path);
+ assert(i->final_path);
+
+ i->tar_fd = safe_close(i->tar_fd);
+
+ if (i->tar_pid > 0) {
+ r = wait_for_terminate_and_check("tar", i->tar_pid, WAIT_LOG);
+ i->tar_pid = 0;
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EPROTO;
+ }
+
+ r = import_mangle_os_tree(i->temp_path);
+ if (r < 0)
+ return r;
+
+ if (i->read_only) {
+ r = import_make_read_only(i->temp_path);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->force_local)
+ (void) rm_rf(i->final_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+
+ r = rename_noreplace(AT_FDCWD, i->temp_path, AT_FDCWD, i->final_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move image into place: %m");
+
+ i->temp_path = mfree(i->temp_path);
+
+ return 0;
+}
+
+static int tar_import_fork_tar(TarImport *i) {
+ int r;
+
+ assert(i);
+
+ assert(!i->final_path);
+ assert(!i->temp_path);
+ assert(i->tar_fd < 0);
+
+ i->final_path = strjoin(i->image_root, "/", i->local);
+ if (!i->final_path)
+ return log_oom();
+
+ r = tempfn_random(i->final_path, NULL, &i->temp_path);
+ if (r < 0)
+ return log_oom();
+
+ (void) mkdir_parents_label(i->temp_path, 0700);
+
+ r = btrfs_subvol_make(i->temp_path);
+ if (r == -ENOTTY) {
+ if (mkdir(i->temp_path, 0755) < 0)
+ return log_error_errno(errno, "Failed to create directory %s: %m", i->temp_path);
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to create subvolume %s: %m", i->temp_path);
+ else
+ (void) import_assign_pool_quota_and_warn(i->temp_path);
+
+ i->tar_fd = import_fork_tar_x(i->temp_path, &i->tar_pid);
+ if (i->tar_fd < 0)
+ return i->tar_fd;
+
+ return 0;
+}
+
+static int tar_import_write(const void *p, size_t sz, void *userdata) {
+ TarImport *i = userdata;
+ int r;
+
+ r = loop_write(i->tar_fd, p, sz, false);
+ if (r < 0)
+ return r;
+
+ i->written_uncompressed += sz;
+
+ return 0;
+}
+
+static int tar_import_process(TarImport *i) {
+ ssize_t l;
+ int r;
+
+ assert(i);
+ assert(i->buffer_size < sizeof(i->buffer));
+
+ l = read(i->input_fd, i->buffer + i->buffer_size, sizeof(i->buffer) - i->buffer_size);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ r = log_error_errno(errno, "Failed to read input file: %m");
+ goto finish;
+ }
+ if (l == 0) {
+ if (i->compress.type == IMPORT_COMPRESS_UNKNOWN) {
+ log_error("Premature end of file.");
+ r = -EIO;
+ goto finish;
+ }
+
+ r = tar_import_finish(i);
+ goto finish;
+ }
+
+ i->buffer_size += l;
+
+ if (i->compress.type == IMPORT_COMPRESS_UNKNOWN) {
+ r = import_uncompress_detect(&i->compress, i->buffer, i->buffer_size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to detect file compression: %m");
+ goto finish;
+ }
+ if (r == 0) /* Need more data */
+ return 0;
+
+ r = tar_import_fork_tar(i);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = import_uncompress(&i->compress, i->buffer, i->buffer_size, tar_import_write, i);
+ if (r < 0) {
+ log_error_errno(r, "Failed to decode and write: %m");
+ goto finish;
+ }
+
+ i->written_compressed += i->buffer_size;
+ i->buffer_size = 0;
+
+ tar_import_report_progress(i);
+
+ return 0;
+
+finish:
+ if (i->on_finished)
+ i->on_finished(i, r, i->userdata);
+ else
+ sd_event_exit(i->event, r);
+
+ return 0;
+}
+
+static int tar_import_on_input(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ TarImport *i = userdata;
+
+ return tar_import_process(i);
+}
+
+static int tar_import_on_defer(sd_event_source *s, void *userdata) {
+ TarImport *i = userdata;
+
+ return tar_import_process(i);
+}
+
+int tar_import_start(TarImport *i, int fd, const char *local, bool force_local, bool read_only) {
+ int r;
+
+ assert(i);
+ assert(fd >= 0);
+ assert(local);
+
+ if (!machine_name_is_valid(local))
+ return -EINVAL;
+
+ if (i->input_fd >= 0)
+ return -EBUSY;
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&i->local, local);
+ if (r < 0)
+ return r;
+ i->force_local = force_local;
+ i->read_only = read_only;
+
+ if (fstat(fd, &i->st) < 0)
+ return -errno;
+
+ r = sd_event_add_io(i->event, &i->input_event_source, fd, EPOLLIN, tar_import_on_input, i);
+ if (r == -EPERM) {
+ /* This fd does not support epoll, for example because it is a regular file. Busy read in that case */
+ r = sd_event_add_defer(i->event, &i->input_event_source, tar_import_on_defer, i);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(i->input_event_source, SD_EVENT_ON);
+ }
+ if (r < 0)
+ return r;
+
+ i->input_fd = fd;
+ return r;
+}
diff --git a/src/import/import-tar.h b/src/import/import-tar.h
new file mode 100644
index 0000000..347f522
--- /dev/null
+++ b/src/import/import-tar.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-event.h"
+
+#include "import-util.h"
+#include "macro.h"
+
+typedef struct TarImport TarImport;
+
+typedef void (*TarImportFinished)(TarImport *import, int error, void *userdata);
+
+int tar_import_new(TarImport **import, sd_event *event, const char *image_root, TarImportFinished on_finished, void *userdata);
+TarImport* tar_import_unref(TarImport *import);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(TarImport*, tar_import_unref);
+
+int tar_import_start(TarImport *import, int fd, const char *local, bool force_local, bool read_only);
diff --git a/src/import/import.c b/src/import/import.c
new file mode 100644
index 0000000..f34244a
--- /dev/null
+++ b/src/import/import.c
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-raw.h"
+#include "import-tar.h"
+#include "import-util.h"
+#include "machine-image.h"
+#include "main-func.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "verbs.h"
+
+static bool arg_force = false;
+static bool arg_read_only = false;
+static const char *arg_image_root = "/var/lib/machines";
+
+static int interrupt_signal_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ log_notice("Transfer aborted.");
+ sd_event_exit(sd_event_source_get_event(s), EINTR);
+ return 0;
+}
+
+static void on_tar_finished(TarImport *import, int error, void *userdata) {
+ sd_event *event = userdata;
+ assert(import);
+
+ if (error == 0)
+ log_info("Operation completed successfully.");
+
+ sd_event_exit(event, abs(error));
+}
+
+static int import_tar(int argc, char *argv[], void *userdata) {
+ _cleanup_(tar_import_unrefp) TarImport *import = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ const char *path = NULL, *local = NULL;
+ _cleanup_free_ char *ll = NULL;
+ _cleanup_close_ int open_fd = -1;
+ int r, fd;
+
+ if (argc >= 2)
+ path = argv[1];
+ if (isempty(path) || streq(path, "-"))
+ path = NULL;
+
+ if (argc >= 3)
+ local = argv[2];
+ else if (path)
+ local = basename(path);
+ if (isempty(local) || streq(local, "-"))
+ local = NULL;
+
+ if (local) {
+ r = tar_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local)) {
+ log_error("Local image name '%s' is not valid.", local);
+ return -EINVAL;
+ }
+
+ if (!arg_force) {
+ r = image_find(IMAGE_MACHINE, local, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local);
+ } else {
+ log_error("Image '%s' already exists.", local);
+ return -EEXIST;
+ }
+ }
+ } else
+ local = "imported";
+
+ if (path) {
+ open_fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (open_fd < 0)
+ return log_error_errno(errno, "Failed to open tar image to import: %m");
+
+ fd = open_fd;
+
+ log_info("Importing '%s', saving as '%s'.", path, local);
+ } else {
+ _cleanup_free_ char *pretty = NULL;
+
+ fd = STDIN_FILENO;
+
+ (void) fd_get_path(fd, &pretty);
+ log_info("Importing '%s', saving as '%s'.", strna(pretty), local);
+ }
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, interrupt_signal_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, interrupt_signal_handler, NULL);
+
+ r = tar_import_new(&import, event, arg_image_root, on_tar_finished, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate importer: %m");
+
+ r = tar_import_start(import, fd, local, arg_force, arg_read_only);
+ if (r < 0)
+ return log_error_errno(r, "Failed to import image: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ log_info("Exiting.");
+ return -r;
+}
+
+static void on_raw_finished(RawImport *import, int error, void *userdata) {
+ sd_event *event = userdata;
+ assert(import);
+
+ if (error == 0)
+ log_info("Operation completed successfully.");
+
+ sd_event_exit(event, abs(error));
+}
+
+static int import_raw(int argc, char *argv[], void *userdata) {
+ _cleanup_(raw_import_unrefp) RawImport *import = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ const char *path = NULL, *local = NULL;
+ _cleanup_free_ char *ll = NULL;
+ _cleanup_close_ int open_fd = -1;
+ int r, fd;
+
+ if (argc >= 2)
+ path = argv[1];
+ if (isempty(path) || streq(path, "-"))
+ path = NULL;
+
+ if (argc >= 3)
+ local = argv[2];
+ else if (path)
+ local = basename(path);
+ if (isempty(local) || streq(local, "-"))
+ local = NULL;
+
+ if (local) {
+ r = raw_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local)) {
+ log_error("Local image name '%s' is not valid.", local);
+ return -EINVAL;
+ }
+
+ if (!arg_force) {
+ r = image_find(IMAGE_MACHINE, local, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local);
+ } else {
+ log_error("Image '%s' already exists.", local);
+ return -EEXIST;
+ }
+ }
+ } else
+ local = "imported";
+
+ if (path) {
+ open_fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (open_fd < 0)
+ return log_error_errno(errno, "Failed to open raw image to import: %m");
+
+ fd = open_fd;
+
+ log_info("Importing '%s', saving as '%s'.", path, local);
+ } else {
+ _cleanup_free_ char *pretty = NULL;
+
+ fd = STDIN_FILENO;
+
+ (void) fd_get_path(fd, &pretty);
+ log_info("Importing '%s', saving as '%s'.", strempty(pretty), local);
+ }
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, interrupt_signal_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, interrupt_signal_handler, NULL);
+
+ r = raw_import_new(&import, event, arg_image_root, on_raw_finished, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate importer: %m");
+
+ r = raw_import_start(import, fd, local, arg_force, arg_read_only);
+ if (r < 0)
+ return log_error_errno(r, "Failed to import image: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ log_info("Exiting.");
+ return -r;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Import container or virtual machine images.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --force Force creation of image\n"
+ " --image-root=PATH Image root directory\n"
+ " --read-only Create a read-only image\n\n"
+ "Commands:\n"
+ " tar FILE [NAME] Import a TAR image\n"
+ " raw FILE [NAME] Import a RAW image\n",
+ program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_FORCE,
+ ARG_IMAGE_ROOT,
+ ARG_READ_ONLY,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "force", no_argument, NULL, ARG_FORCE },
+ { "image-root", required_argument, NULL, ARG_IMAGE_ROOT },
+ { "read-only", no_argument, NULL, ARG_READ_ONLY },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_FORCE:
+ arg_force = true;
+ break;
+
+ case ARG_IMAGE_ROOT:
+ arg_image_root = optarg;
+ break;
+
+ case ARG_READ_ONLY:
+ arg_read_only = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int import_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "tar", 2, 3, 0, import_tar },
+ { "raw", 2, 3, 0, import_raw },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return 0;
+
+ (void) ignore_signals(SIGPIPE, -1);
+
+ return import_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/import/importd.c b/src/import/importd.c
new file mode 100644
index 0000000..2426933
--- /dev/null
+++ b/src/import/importd.c
@@ -0,0 +1,1266 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "float.h"
+#include "hostname-util.h"
+#include "import-util.h"
+#include "machine-pool.h"
+#include "main-func.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "web-util.h"
+
+typedef struct Transfer Transfer;
+typedef struct Manager Manager;
+
+typedef enum TransferType {
+ TRANSFER_IMPORT_TAR,
+ TRANSFER_IMPORT_RAW,
+ TRANSFER_IMPORT_FS,
+ TRANSFER_EXPORT_TAR,
+ TRANSFER_EXPORT_RAW,
+ TRANSFER_PULL_TAR,
+ TRANSFER_PULL_RAW,
+ _TRANSFER_TYPE_MAX,
+ _TRANSFER_TYPE_INVALID = -1,
+} TransferType;
+
+struct Transfer {
+ Manager *manager;
+
+ uint32_t id;
+ char *object_path;
+
+ TransferType type;
+ ImportVerify verify;
+
+ char *remote;
+ char *local;
+ bool force_local;
+ bool read_only;
+
+ char *format;
+
+ pid_t pid;
+
+ int log_fd;
+
+ char log_message[LINE_MAX];
+ size_t log_message_size;
+
+ sd_event_source *pid_event_source;
+ sd_event_source *log_event_source;
+
+ unsigned n_canceled;
+ unsigned progress_percent;
+
+ int stdin_fd;
+ int stdout_fd;
+};
+
+struct Manager {
+ sd_event *event;
+ sd_bus *bus;
+
+ uint32_t current_transfer_id;
+ Hashmap *transfers;
+
+ Hashmap *polkit_registry;
+
+ int notify_fd;
+
+ sd_event_source *notify_event_source;
+};
+
+#define TRANSFERS_MAX 64
+
+static const char* const transfer_type_table[_TRANSFER_TYPE_MAX] = {
+ [TRANSFER_IMPORT_TAR] = "import-tar",
+ [TRANSFER_IMPORT_RAW] = "import-raw",
+ [TRANSFER_IMPORT_FS] = "import-fs",
+ [TRANSFER_EXPORT_TAR] = "export-tar",
+ [TRANSFER_EXPORT_RAW] = "export-raw",
+ [TRANSFER_PULL_TAR] = "pull-tar",
+ [TRANSFER_PULL_RAW] = "pull-raw",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(transfer_type, TransferType);
+
+static Transfer *transfer_unref(Transfer *t) {
+ if (!t)
+ return NULL;
+
+ if (t->manager)
+ hashmap_remove(t->manager->transfers, UINT32_TO_PTR(t->id));
+
+ sd_event_source_unref(t->pid_event_source);
+ sd_event_source_unref(t->log_event_source);
+
+ free(t->remote);
+ free(t->local);
+ free(t->format);
+ free(t->object_path);
+
+ if (t->pid > 0) {
+ (void) kill_and_sigcont(t->pid, SIGKILL);
+ (void) wait_for_terminate(t->pid, NULL);
+ }
+
+ safe_close(t->log_fd);
+ safe_close(t->stdin_fd);
+ safe_close(t->stdout_fd);
+
+ return mfree(t);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Transfer*, transfer_unref);
+
+static int transfer_new(Manager *m, Transfer **ret) {
+ _cleanup_(transfer_unrefp) Transfer *t = NULL;
+ uint32_t id;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ if (hashmap_size(m->transfers) >= TRANSFERS_MAX)
+ return -E2BIG;
+
+ r = hashmap_ensure_allocated(&m->transfers, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ t = new(Transfer, 1);
+ if (!t)
+ return -ENOMEM;
+
+ *t = (Transfer) {
+ .type = _TRANSFER_TYPE_INVALID,
+ .log_fd = -1,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .verify = _IMPORT_VERIFY_INVALID,
+ .progress_percent= (unsigned) -1,
+ };
+
+ id = m->current_transfer_id + 1;
+
+ if (asprintf(&t->object_path, "/org/freedesktop/import1/transfer/_%" PRIu32, id) < 0)
+ return -ENOMEM;
+
+ r = hashmap_put(m->transfers, UINT32_TO_PTR(id), t);
+ if (r < 0)
+ return r;
+
+ m->current_transfer_id = id;
+
+ t->manager = m;
+ t->id = id;
+
+ *ret = TAKE_PTR(t);
+
+ return 0;
+}
+
+static double transfer_percent_as_double(Transfer *t) {
+ assert(t);
+
+ if (t->progress_percent == (unsigned) -1)
+ return -DBL_MAX;
+
+ return (double) t->progress_percent / 100.0;
+}
+
+static void transfer_send_log_line(Transfer *t, const char *line) {
+ int r, priority = LOG_INFO;
+
+ assert(t);
+ assert(line);
+
+ syslog_parse_priority(&line, &priority, true);
+
+ log_full(priority, "(transfer%" PRIu32 ") %s", t->id, line);
+
+ r = sd_bus_emit_signal(
+ t->manager->bus,
+ t->object_path,
+ "org.freedesktop.import1.Transfer",
+ "LogMessage",
+ "us",
+ priority,
+ line);
+ if (r < 0)
+ log_warning_errno(r, "Cannot emit log message signal, ignoring: %m");
+ }
+
+static void transfer_send_logs(Transfer *t, bool flush) {
+ assert(t);
+
+ /* Try to send out all log messages, if we can. But if we
+ * can't we remove the messages from the buffer, but don't
+ * fail */
+
+ while (t->log_message_size > 0) {
+ _cleanup_free_ char *n = NULL;
+ char *e;
+
+ if (t->log_message_size >= sizeof(t->log_message))
+ e = t->log_message + sizeof(t->log_message);
+ else {
+ char *a, *b;
+
+ a = memchr(t->log_message, 0, t->log_message_size);
+ b = memchr(t->log_message, '\n', t->log_message_size);
+
+ if (a && b)
+ e = a < b ? a : b;
+ else if (a)
+ e = a;
+ else
+ e = b;
+ }
+
+ if (!e) {
+ if (!flush)
+ return;
+
+ e = t->log_message + t->log_message_size;
+ }
+
+ n = strndup(t->log_message, e - t->log_message);
+
+ /* Skip over NUL and newlines */
+ while (e < t->log_message + t->log_message_size && (*e == 0 || *e == '\n'))
+ e++;
+
+ memmove(t->log_message, e, t->log_message + sizeof(t->log_message) - e);
+ t->log_message_size -= e - t->log_message;
+
+ if (!n) {
+ log_oom();
+ continue;
+ }
+
+ if (isempty(n))
+ continue;
+
+ transfer_send_log_line(t, n);
+ }
+}
+
+static int transfer_finalize(Transfer *t, bool success) {
+ int r;
+
+ assert(t);
+
+ transfer_send_logs(t, true);
+
+ r = sd_bus_emit_signal(
+ t->manager->bus,
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "TransferRemoved",
+ "uos",
+ t->id,
+ t->object_path,
+ success ? "done" :
+ t->n_canceled > 0 ? "canceled" : "failed");
+
+ if (r < 0)
+ log_error_errno(r, "Cannot emit message: %m");
+
+ transfer_unref(t);
+ return 0;
+}
+
+static int transfer_cancel(Transfer *t) {
+ int r;
+
+ assert(t);
+
+ r = kill_and_sigcont(t->pid, t->n_canceled < 3 ? SIGTERM : SIGKILL);
+ if (r < 0)
+ return r;
+
+ t->n_canceled++;
+ return 0;
+}
+
+static int transfer_on_pid(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ Transfer *t = userdata;
+ bool success = false;
+
+ assert(s);
+ assert(t);
+
+ if (si->si_code == CLD_EXITED) {
+ if (si->si_status != 0)
+ log_error("Transfer process failed with exit code %i.", si->si_status);
+ else {
+ log_debug("Transfer process succeeded.");
+ success = true;
+ }
+
+ } else if (IN_SET(si->si_code, CLD_KILLED, CLD_DUMPED))
+ log_error("Transfer process terminated by signal %s.", signal_to_string(si->si_status));
+ else
+ log_error("Transfer process failed due to unknown reason.");
+
+ t->pid = 0;
+
+ return transfer_finalize(t, success);
+}
+
+static int transfer_on_log(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Transfer *t = userdata;
+ ssize_t l;
+
+ assert(s);
+ assert(t);
+
+ l = read(fd, t->log_message + t->log_message_size, sizeof(t->log_message) - t->log_message_size);
+ if (l < 0)
+ log_error_errno(errno, "Failed to read log message: %m");
+ if (l <= 0) {
+ /* EOF/read error. We just close the pipe here, and
+ * close the watch, waiting for the SIGCHLD to arrive,
+ * before we do anything else. */
+ t->log_event_source = sd_event_source_unref(t->log_event_source);
+ return 0;
+ }
+
+ t->log_message_size += l;
+
+ transfer_send_logs(t, false);
+
+ return 0;
+}
+
+static int transfer_start(Transfer *t) {
+ _cleanup_close_pair_ int pipefd[2] = { -1, -1 };
+ int r;
+
+ assert(t);
+ assert(t->pid <= 0);
+
+ if (pipe2(pipefd, O_CLOEXEC) < 0)
+ return -errno;
+
+ r = safe_fork("(sd-transfer)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &t->pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *cmd[] = {
+ NULL, /* systemd-import, systemd-import-fs, systemd-export or systemd-pull */
+ NULL, /* tar, raw */
+ NULL, /* --verify= */
+ NULL, /* verify argument */
+ NULL, /* maybe --force */
+ NULL, /* maybe --read-only */
+ NULL, /* if so: the actual URL */
+ NULL, /* maybe --format= */
+ NULL, /* if so: the actual format */
+ NULL, /* remote */
+ NULL, /* local */
+ NULL
+ };
+ unsigned k = 0;
+
+ /* Child */
+
+ pipefd[0] = safe_close(pipefd[0]);
+
+ r = rearrange_stdio(t->stdin_fd,
+ t->stdout_fd < 0 ? pipefd[1] : t->stdout_fd,
+ pipefd[1]);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set stdin/stdout/stderr: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (setenv("SYSTEMD_LOG_TARGET", "console-prefixed", 1) < 0 ||
+ setenv("NOTIFY_SOCKET", "/run/systemd/import/notify", 1) < 0) {
+ log_error_errno(errno, "setenv() failed: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ switch (t->type) {
+
+ case TRANSFER_IMPORT_TAR:
+ case TRANSFER_IMPORT_RAW:
+ cmd[k++] = SYSTEMD_IMPORT_PATH;
+ break;
+
+ case TRANSFER_IMPORT_FS:
+ cmd[k++] = SYSTEMD_IMPORT_FS_PATH;
+ break;
+
+ case TRANSFER_EXPORT_TAR:
+ case TRANSFER_EXPORT_RAW:
+ cmd[k++] = SYSTEMD_EXPORT_PATH;
+ break;
+
+ case TRANSFER_PULL_TAR:
+ case TRANSFER_PULL_RAW:
+ cmd[k++] = SYSTEMD_PULL_PATH;
+ break;
+
+ default:
+ assert_not_reached("Unexpected transfer type");
+ }
+
+ switch (t->type) {
+
+ case TRANSFER_IMPORT_TAR:
+ case TRANSFER_EXPORT_TAR:
+ case TRANSFER_PULL_TAR:
+ cmd[k++] = "tar";
+ break;
+
+ case TRANSFER_IMPORT_RAW:
+ case TRANSFER_EXPORT_RAW:
+ case TRANSFER_PULL_RAW:
+ cmd[k++] = "raw";
+ break;
+
+ case TRANSFER_IMPORT_FS:
+ cmd[k++] = "run";
+ break;
+
+ default:
+ break;
+ }
+
+ if (t->verify != _IMPORT_VERIFY_INVALID) {
+ cmd[k++] = "--verify";
+ cmd[k++] = import_verify_to_string(t->verify);
+ }
+
+ if (t->force_local)
+ cmd[k++] = "--force";
+ if (t->read_only)
+ cmd[k++] = "--read-only";
+
+ if (t->format) {
+ cmd[k++] = "--format";
+ cmd[k++] = t->format;
+ }
+
+ if (!IN_SET(t->type, TRANSFER_EXPORT_TAR, TRANSFER_EXPORT_RAW)) {
+ if (t->remote)
+ cmd[k++] = t->remote;
+ else
+ cmd[k++] = "-";
+ }
+
+ if (t->local)
+ cmd[k++] = t->local;
+ cmd[k] = NULL;
+
+ execv(cmd[0], (char * const *) cmd);
+ log_error_errno(errno, "Failed to execute %s tool: %m", cmd[0]);
+ _exit(EXIT_FAILURE);
+ }
+
+ pipefd[1] = safe_close(pipefd[1]);
+ t->log_fd = TAKE_FD(pipefd[0]);
+
+ t->stdin_fd = safe_close(t->stdin_fd);
+
+ r = sd_event_add_child(t->manager->event, &t->pid_event_source, t->pid, WEXITED, transfer_on_pid, t);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_io(t->manager->event, &t->log_event_source, t->log_fd, EPOLLIN, transfer_on_log, t);
+ if (r < 0)
+ return r;
+
+ /* Make sure always process logging before SIGCHLD */
+ r = sd_event_source_set_priority(t->log_event_source, SD_EVENT_PRIORITY_NORMAL -5);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_emit_signal(
+ t->manager->bus,
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "TransferNew",
+ "uo",
+ t->id,
+ t->object_path);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static Manager *manager_unref(Manager *m) {
+ Transfer *t;
+
+ if (!m)
+ return NULL;
+
+ sd_event_source_unref(m->notify_event_source);
+ safe_close(m->notify_fd);
+
+ while ((t = hashmap_first(m->transfers)))
+ transfer_unref(t);
+
+ hashmap_free(m->transfers);
+
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+
+ m->bus = sd_bus_flush_close_unref(m->bus);
+ sd_event_unref(m->event);
+
+ return mfree(m);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_unref);
+
+static int manager_on_notify(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+
+ char buf[NOTIFY_BUFFER_MAX+1];
+ struct iovec iovec = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf)-1,
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int) * NOTIFY_FD_MAX)];
+ } control = {};
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct ucred *ucred = NULL;
+ Manager *m = userdata;
+ struct cmsghdr *cmsg;
+ char *p, *e;
+ Transfer *t;
+ Iterator i;
+ ssize_t n;
+ int r;
+
+ n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (n < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return -errno;
+ }
+
+ cmsg_close_all(&msghdr);
+
+ CMSG_FOREACH(cmsg, &msghdr)
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+
+ if (msghdr.msg_flags & MSG_TRUNC) {
+ log_warning("Got overly long notification datagram, ignoring.");
+ return 0;
+ }
+
+ if (!ucred || ucred->pid <= 0) {
+ log_warning("Got notification datagram lacking credential information, ignoring.");
+ return 0;
+ }
+
+ HASHMAP_FOREACH(t, m->transfers, i)
+ if (ucred->pid == t->pid)
+ break;
+
+ if (!t) {
+ log_warning("Got notification datagram from unexpected peer, ignoring.");
+ return 0;
+ }
+
+ buf[n] = 0;
+
+ p = startswith(buf, "X_IMPORT_PROGRESS=");
+ if (!p) {
+ p = strstr(buf, "\nX_IMPORT_PROGRESS=");
+ if (!p)
+ return 0;
+
+ p += 19;
+ }
+
+ e = strchrnul(p, '\n');
+ *e = 0;
+
+ r = parse_percent(p);
+ if (r < 0) {
+ log_warning("Got invalid percent value, ignoring.");
+ return 0;
+ }
+
+ t->progress_percent = (unsigned) r;
+
+ log_debug("Got percentage from client: %u%%", t->progress_percent);
+ return 0;
+}
+
+static int manager_new(Manager **ret) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/import/notify",
+ };
+ int r;
+
+ assert(ret);
+
+ m = new0(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ sd_event_set_watchdog(m->event, true);
+
+ r = sd_bus_default_system(&m->bus);
+ if (r < 0)
+ return r;
+
+ m->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (m->notify_fd < 0)
+ return -errno;
+
+ (void) mkdir_parents_label(sa.un.sun_path, 0755);
+ (void) sockaddr_un_unlink(&sa.un);
+
+ if (bind(m->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0)
+ return -errno;
+
+ r = setsockopt_int(m->notify_fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_io(m->event, &m->notify_event_source, m->notify_fd, EPOLLIN, manager_on_notify, m);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+static Transfer *manager_find(Manager *m, TransferType type, const char *remote) {
+ Transfer *t;
+ Iterator i;
+
+ assert(m);
+ assert(type >= 0);
+ assert(type < _TRANSFER_TYPE_MAX);
+
+ HASHMAP_FOREACH(t, m->transfers, i)
+ if (t->type == type && streq_ptr(t->remote, remote))
+ return t;
+
+ return NULL;
+}
+
+static int method_import_tar_or_raw(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ _cleanup_(transfer_unrefp) Transfer *t = NULL;
+ int fd, force, read_only, r;
+ const char *local, *object;
+ Manager *m = userdata;
+ TransferType type;
+ uint32_t id;
+
+ assert(msg);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ msg,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.import1.import",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = sd_bus_message_read(msg, "hsbb", &fd, &local, &force, &read_only);
+ if (r < 0)
+ return r;
+
+ r = fd_verify_regular(fd);
+ if (r < 0)
+ return r;
+
+ if (!machine_name_is_valid(local))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Local name %s is invalid", local);
+
+ r = setup_machine_directory(error);
+ if (r < 0)
+ return r;
+
+ type = streq_ptr(sd_bus_message_get_member(msg), "ImportTar") ? TRANSFER_IMPORT_TAR : TRANSFER_IMPORT_RAW;
+
+ r = transfer_new(m, &t);
+ if (r < 0)
+ return r;
+
+ t->type = type;
+ t->force_local = force;
+ t->read_only = read_only;
+
+ t->local = strdup(local);
+ if (!t->local)
+ return -ENOMEM;
+
+ t->stdin_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (t->stdin_fd < 0)
+ return -errno;
+
+ r = transfer_start(t);
+ if (r < 0)
+ return r;
+
+ object = t->object_path;
+ id = t->id;
+ t = NULL;
+
+ return sd_bus_reply_method_return(msg, "uo", id, object);
+}
+
+static int method_import_fs(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ _cleanup_(transfer_unrefp) Transfer *t = NULL;
+ int fd, force, read_only, r;
+ const char *local, *object;
+ Manager *m = userdata;
+ uint32_t id;
+
+ assert(msg);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ msg,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.import1.import",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = sd_bus_message_read(msg, "hsbb", &fd, &local, &force, &read_only);
+ if (r < 0)
+ return r;
+
+ r = fd_verify_directory(fd);
+ if (r < 0)
+ return r;
+
+ if (!machine_name_is_valid(local))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Local name %s is invalid", local);
+
+ r = setup_machine_directory(error);
+ if (r < 0)
+ return r;
+
+ r = transfer_new(m, &t);
+ if (r < 0)
+ return r;
+
+ t->type = TRANSFER_IMPORT_FS;
+ t->force_local = force;
+ t->read_only = read_only;
+
+ t->local = strdup(local);
+ if (!t->local)
+ return -ENOMEM;
+
+ t->stdin_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (t->stdin_fd < 0)
+ return -errno;
+
+ r = transfer_start(t);
+ if (r < 0)
+ return r;
+
+ object = t->object_path;
+ id = t->id;
+ t = NULL;
+
+ return sd_bus_reply_method_return(msg, "uo", id, object);
+}
+
+static int method_export_tar_or_raw(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ _cleanup_(transfer_unrefp) Transfer *t = NULL;
+ int fd, r;
+ const char *local, *object, *format;
+ Manager *m = userdata;
+ TransferType type;
+ uint32_t id;
+
+ assert(msg);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ msg,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.import1.export",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = sd_bus_message_read(msg, "shs", &local, &fd, &format);
+ if (r < 0)
+ return r;
+
+ if (!machine_name_is_valid(local))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Local name %s is invalid", local);
+
+ r = fd_verify_regular(fd);
+ if (r < 0)
+ return r;
+
+ type = streq_ptr(sd_bus_message_get_member(msg), "ExportTar") ? TRANSFER_EXPORT_TAR : TRANSFER_EXPORT_RAW;
+
+ r = transfer_new(m, &t);
+ if (r < 0)
+ return r;
+
+ t->type = type;
+
+ if (!isempty(format)) {
+ t->format = strdup(format);
+ if (!t->format)
+ return -ENOMEM;
+ }
+
+ t->local = strdup(local);
+ if (!t->local)
+ return -ENOMEM;
+
+ t->stdout_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (t->stdout_fd < 0)
+ return -errno;
+
+ r = transfer_start(t);
+ if (r < 0)
+ return r;
+
+ object = t->object_path;
+ id = t->id;
+ t = NULL;
+
+ return sd_bus_reply_method_return(msg, "uo", id, object);
+}
+
+static int method_pull_tar_or_raw(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ _cleanup_(transfer_unrefp) Transfer *t = NULL;
+ const char *remote, *local, *verify, *object;
+ Manager *m = userdata;
+ ImportVerify v;
+ TransferType type;
+ int force, r;
+ uint32_t id;
+
+ assert(msg);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ msg,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.import1.pull",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = sd_bus_message_read(msg, "sssb", &remote, &local, &verify, &force);
+ if (r < 0)
+ return r;
+
+ if (!http_url_is_valid(remote))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "URL %s is invalid", remote);
+
+ if (isempty(local))
+ local = NULL;
+ else if (!machine_name_is_valid(local))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Local name %s is invalid", local);
+
+ if (isempty(verify))
+ v = IMPORT_VERIFY_SIGNATURE;
+ else
+ v = import_verify_from_string(verify);
+ if (v < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown verification mode %s", verify);
+
+ r = setup_machine_directory(error);
+ if (r < 0)
+ return r;
+
+ type = streq_ptr(sd_bus_message_get_member(msg), "PullTar") ? TRANSFER_PULL_TAR : TRANSFER_PULL_RAW;
+
+ if (manager_find(m, type, remote))
+ return sd_bus_error_setf(error, BUS_ERROR_TRANSFER_IN_PROGRESS, "Transfer for %s already in progress.", remote);
+
+ r = transfer_new(m, &t);
+ if (r < 0)
+ return r;
+
+ t->type = type;
+ t->verify = v;
+ t->force_local = force;
+
+ t->remote = strdup(remote);
+ if (!t->remote)
+ return -ENOMEM;
+
+ if (local) {
+ t->local = strdup(local);
+ if (!t->local)
+ return -ENOMEM;
+ }
+
+ r = transfer_start(t);
+ if (r < 0)
+ return r;
+
+ object = t->object_path;
+ id = t->id;
+ t = NULL;
+
+ return sd_bus_reply_method_return(msg, "uo", id, object);
+}
+
+static int method_list_transfers(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Transfer *t;
+ Iterator i;
+ int r;
+
+ assert(msg);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(msg, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(usssdo)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(t, m->transfers, i) {
+
+ r = sd_bus_message_append(
+ reply,
+ "(usssdo)",
+ t->id,
+ transfer_type_to_string(t->type),
+ t->remote,
+ t->local,
+ transfer_percent_as_double(t),
+ t->object_path);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_cancel(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ Transfer *t = userdata;
+ int r;
+
+ assert(msg);
+ assert(t);
+
+ r = bus_verify_polkit_async(
+ msg,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.import1.pull",
+ NULL,
+ false,
+ UID_INVALID,
+ &t->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = transfer_cancel(t);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(msg, NULL);
+}
+
+static int method_cancel_transfer(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Transfer *t;
+ uint32_t id;
+ int r;
+
+ assert(msg);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ msg,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.import1.pull",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = sd_bus_message_read(msg, "u", &id);
+ if (r < 0)
+ return r;
+ if (id <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid transfer id");
+
+ t = hashmap_get(m->transfers, UINT32_TO_PTR(id));
+ if (!t)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_TRANSFER, "No transfer by id %" PRIu32, id);
+
+ r = transfer_cancel(t);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(msg, NULL);
+}
+
+static int property_get_progress(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Transfer *t = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(t);
+
+ return sd_bus_message_append(reply, "d", transfer_percent_as_double(t));
+}
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, transfer_type, TransferType);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_verify, import_verify, ImportVerify);
+
+static const sd_bus_vtable transfer_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Id", "u", NULL, offsetof(Transfer, id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Local", "s", NULL, offsetof(Transfer, local), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Remote", "s", NULL, offsetof(Transfer, remote), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Transfer, type), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Verify", "s", property_get_verify, offsetof(Transfer, verify), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Progress", "d", property_get_progress, 0, 0),
+ SD_BUS_METHOD("Cancel", NULL, NULL, method_cancel, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_SIGNAL("LogMessage", "us", 0),
+ SD_BUS_VTABLE_END,
+};
+
+static const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("ImportTar", "hsbb", "uo", method_import_tar_or_raw, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ImportRaw", "hsbb", "uo", method_import_tar_or_raw, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ImportFileSystem", "hsbb", "uo", method_import_fs, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ExportTar", "shs", "uo", method_export_tar_or_raw, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ExportRaw", "shs", "uo", method_export_tar_or_raw, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("PullTar", "sssb", "uo", method_pull_tar_or_raw, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("PullRaw", "sssb", "uo", method_pull_tar_or_raw, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListTransfers", NULL, "a(usssdo)", method_list_transfers, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CancelTransfer", "u", NULL, method_cancel_transfer, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_SIGNAL("TransferNew", "uo", 0),
+ SD_BUS_SIGNAL("TransferRemoved", "uos", 0),
+ SD_BUS_VTABLE_END,
+};
+
+static int transfer_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Transfer *t;
+ const char *p;
+ uint32_t id;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ p = startswith(path, "/org/freedesktop/import1/transfer/_");
+ if (!p)
+ return 0;
+
+ r = safe_atou32(p, &id);
+ if (r < 0 || id == 0)
+ return 0;
+
+ t = hashmap_get(m->transfers, UINT32_TO_PTR(id));
+ if (!t)
+ return 0;
+
+ *found = t;
+ return 1;
+}
+
+static int transfer_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ Transfer *t;
+ unsigned k = 0;
+ Iterator i;
+
+ l = new0(char*, hashmap_size(m->transfers) + 1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(t, m->transfers, i) {
+
+ l[k] = strdup(t->object_path);
+ if (!l[k])
+ return -ENOMEM;
+
+ k++;
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+static int manager_add_bus_objects(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = sd_bus_add_object_vtable(m->bus, NULL, "/org/freedesktop/import1", "org.freedesktop.import1.Manager", manager_vtable, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register object: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/import1/transfer", "org.freedesktop.import1.Transfer", transfer_vtable, transfer_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register object: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/import1/transfer", transfer_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add transfer enumerator: %m");
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.import1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ return 0;
+}
+
+static bool manager_check_idle(void *userdata) {
+ Manager *m = userdata;
+
+ return hashmap_isempty(m->transfers);
+}
+
+static int manager_run(Manager *m) {
+ assert(m);
+
+ return bus_event_loop_with_idle(
+ m->event,
+ m->bus,
+ "org.freedesktop.import1",
+ DEFAULT_EXIT_USEC,
+ manager_check_idle,
+ m);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc != 1) {
+ log_error("This program takes no arguments.");
+ return -EINVAL;
+ }
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate manager object: %m");
+
+ r = manager_add_bus_objects(m);
+ if (r < 0)
+ return r;
+
+ r = manager_run(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/import/meson.build b/src/import/meson.build
new file mode 100644
index 0000000..1c15fd8
--- /dev/null
+++ b/src/import/meson.build
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+systemd_importd_sources = files('''
+ importd.c
+'''.split())
+
+systemd_pull_sources = files('''
+ pull.c
+ pull-raw.c
+ pull-raw.h
+ pull-tar.c
+ pull-tar.h
+ pull-job.c
+ pull-job.h
+ pull-common.c
+ pull-common.h
+ import-common.c
+ import-common.h
+ import-compress.c
+ import-compress.h
+ curl-util.c
+ curl-util.h
+ qcow2-util.c
+ qcow2-util.h
+'''.split())
+
+systemd_import_sources = files('''
+ import.c
+ import-raw.c
+ import-raw.h
+ import-tar.c
+ import-tar.h
+ import-common.c
+ import-common.h
+ import-compress.c
+ import-compress.h
+ qcow2-util.c
+ qcow2-util.h
+'''.split())
+
+systemd_import_fs_sources = files('''
+ import-fs.c
+ import-common.c
+ import-common.h
+'''.split())
+
+systemd_export_sources = files('''
+ export.c
+ export-tar.c
+ export-tar.h
+ export-raw.c
+ export-raw.h
+ import-common.c
+ import-common.h
+ import-compress.c
+ import-compress.h
+'''.split())
+
+if conf.get('ENABLE_IMPORTD') == 1
+ install_data('org.freedesktop.import1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.import1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.import1.policy',
+ install_dir : polkitpolicydir)
+
+ install_data('import-pubring.gpg',
+ install_dir : rootlibexecdir)
+ # TODO: shouldn't this be in pkgdatadir?
+endif
+
+tests += [
+ [['src/import/test-qcow2.c',
+ 'src/import/qcow2-util.c',
+ 'src/import/qcow2-util.h'],
+ [libshared],
+ [libz],
+ 'HAVE_ZLIB', 'manual'],
+]
diff --git a/src/import/org.freedesktop.import1.conf b/src/import/org.freedesktop.import1.conf
new file mode 100644
index 0000000..2fdb2ba
--- /dev/null
+++ b/src/import/org.freedesktop.import1.conf
@@ -0,0 +1,84 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.import1"/>
+ <allow send_destination="org.freedesktop.import1"/>
+ <allow receive_sender="org.freedesktop.import1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.import1"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="ListTransfers"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="CancelTransfer"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="ImportTar"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="ImportRaw"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="ImportFileSystem"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="ExportTar"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="ExportRaw"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="PullTar"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="PullRaw"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Transfer"
+ send_member="Cancel"/>
+
+ <allow receive_sender="org.freedesktop.import1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/import/org.freedesktop.import1.policy b/src/import/org.freedesktop.import1.policy
new file mode 100644
index 0000000..beea5fe
--- /dev/null
+++ b/src/import/org.freedesktop.import1.policy
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.import1.import">
+ <description gettext-domain="systemd">Import a VM or container image</description>
+ <message gettext-domain="systemd">Authentication is required to import a VM or container image</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.import1.export">
+ <description gettext-domain="systemd">Export a VM or container image</description>
+ <message gettext-domain="systemd">Authentication is required to export a VM or container image</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.import1.pull">
+ <description gettext-domain="systemd">Download a VM or container image</description>
+ <message gettext-domain="systemd">Authentication is required to download a VM or container image</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/import/org.freedesktop.import1.service b/src/import/org.freedesktop.import1.service
new file mode 100644
index 0000000..34d26d0
--- /dev/null
+++ b/src/import/org.freedesktop.import1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.import1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.import1.service
diff --git a/src/import/pull-common.c b/src/import/pull-common.c
new file mode 100644
index 0000000..acfe380
--- /dev/null
+++ b/src/import/pull-common.c
@@ -0,0 +1,530 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/prctl.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "capability-util.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "pull-common.h"
+#include "pull-job.h"
+#include "rlimit-util.h"
+#include "rm-rf.h"
+#include "signal-util.h"
+#include "siphash24.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+#include "web-util.h"
+
+#define FILENAME_ESCAPE "/.#\"\'"
+#define HASH_URL_THRESHOLD_LENGTH (_POSIX_PATH_MAX - 16)
+
+int pull_find_old_etags(
+ const char *url,
+ const char *image_root,
+ int dt,
+ const char *prefix,
+ const char *suffix,
+ char ***etags) {
+
+ _cleanup_free_ char *escaped_url = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ struct dirent *de;
+ int r;
+
+ assert(url);
+ assert(etags);
+
+ if (!image_root)
+ image_root = "/var/lib/machines";
+
+ escaped_url = xescape(url, FILENAME_ESCAPE);
+ if (!escaped_url)
+ return -ENOMEM;
+
+ d = opendir(image_root);
+ if (!d) {
+ if (errno == ENOENT) {
+ *etags = NULL;
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ const char *a, *b;
+ char *u;
+
+ if (de->d_type != DT_UNKNOWN &&
+ de->d_type != dt)
+ continue;
+
+ if (prefix) {
+ a = startswith(de->d_name, prefix);
+ if (!a)
+ continue;
+ } else
+ a = de->d_name;
+
+ a = startswith(a, escaped_url);
+ if (!a)
+ continue;
+
+ a = startswith(a, ".");
+ if (!a)
+ continue;
+
+ if (suffix) {
+ b = endswith(de->d_name, suffix);
+ if (!b)
+ continue;
+ } else
+ b = strchr(de->d_name, 0);
+
+ if (a >= b)
+ continue;
+
+ r = cunescape_length(a, b - a, 0, &u);
+ if (r < 0)
+ return r;
+
+ if (!http_etag_is_valid(u)) {
+ free(u);
+ continue;
+ }
+
+ r = strv_consume(&l, u);
+ if (r < 0)
+ return r;
+ }
+
+ *etags = TAKE_PTR(l);
+
+ return 0;
+}
+
+int pull_make_local_copy(const char *final, const char *image_root, const char *local, bool force_local) {
+ const char *p;
+ int r;
+
+ assert(final);
+ assert(local);
+
+ if (!image_root)
+ image_root = "/var/lib/machines";
+
+ p = strjoina(image_root, "/", local);
+
+ if (force_local)
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+
+ r = btrfs_subvol_snapshot(final, p,
+ BTRFS_SNAPSHOT_QUOTA|
+ BTRFS_SNAPSHOT_FALLBACK_COPY|
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY|
+ BTRFS_SNAPSHOT_RECURSIVE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create local image: %m");
+
+ log_info("Created new local image '%s'.", local);
+
+ return 0;
+}
+
+static int hash_url(const char *url, char **ret) {
+ uint64_t h;
+ static const sd_id128_t k = SD_ID128_ARRAY(df,89,16,87,01,cc,42,30,98,ab,4a,19,a6,a5,63,4f);
+
+ assert(url);
+
+ h = siphash24(url, strlen(url), k.bytes);
+ if (asprintf(ret, "%"PRIx64, h) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int pull_make_path(const char *url, const char *etag, const char *image_root, const char *prefix, const char *suffix, char **ret) {
+ _cleanup_free_ char *escaped_url = NULL, *escaped_etag = NULL;
+ char *path;
+
+ assert(url);
+ assert(ret);
+
+ if (!image_root)
+ image_root = "/var/lib/machines";
+
+ escaped_url = xescape(url, FILENAME_ESCAPE);
+ if (!escaped_url)
+ return -ENOMEM;
+
+ if (etag) {
+ escaped_etag = xescape(etag, FILENAME_ESCAPE);
+ if (!escaped_etag)
+ return -ENOMEM;
+ }
+
+ path = strjoin(image_root, "/", strempty(prefix), escaped_url, escaped_etag ? "." : "",
+ strempty(escaped_etag), strempty(suffix));
+ if (!path)
+ return -ENOMEM;
+
+ /* URLs might make the path longer than the maximum allowed length for a file name.
+ * When that happens, a URL hash is used instead. Paths returned by this function
+ * can be later used with tempfn_random() which adds 16 bytes to the resulting name. */
+ if (strlen(path) >= HASH_URL_THRESHOLD_LENGTH) {
+ _cleanup_free_ char *hash = NULL;
+ int r;
+
+ free(path);
+
+ r = hash_url(url, &hash);
+ if (r < 0)
+ return r;
+
+ path = strjoin(image_root, "/", strempty(prefix), hash, escaped_etag ? "." : "",
+ strempty(escaped_etag), strempty(suffix));
+ if (!path)
+ return -ENOMEM;
+ }
+
+ *ret = path;
+ return 0;
+}
+
+int pull_make_auxiliary_job(
+ PullJob **ret,
+ const char *url,
+ int (*strip_suffixes)(const char *name, char **ret),
+ const char *suffix,
+ CurlGlue *glue,
+ PullJobFinished on_finished,
+ void *userdata) {
+
+ _cleanup_free_ char *last_component = NULL, *ll = NULL, *auxiliary_url = NULL;
+ _cleanup_(pull_job_unrefp) PullJob *job = NULL;
+ const char *q;
+ int r;
+
+ assert(ret);
+ assert(url);
+ assert(strip_suffixes);
+ assert(glue);
+
+ r = import_url_last_component(url, &last_component);
+ if (r < 0)
+ return r;
+
+ r = strip_suffixes(last_component, &ll);
+ if (r < 0)
+ return r;
+
+ q = strjoina(ll, suffix);
+
+ r = import_url_change_last_component(url, q, &auxiliary_url);
+ if (r < 0)
+ return r;
+
+ r = pull_job_new(&job, auxiliary_url, glue, userdata);
+ if (r < 0)
+ return r;
+
+ job->on_finished = on_finished;
+ job->compressed_max = job->uncompressed_max = 1ULL * 1024ULL * 1024ULL;
+
+ *ret = TAKE_PTR(job);
+
+ return 0;
+}
+
+int pull_make_verification_jobs(
+ PullJob **ret_checksum_job,
+ PullJob **ret_signature_job,
+ ImportVerify verify,
+ const char *url,
+ CurlGlue *glue,
+ PullJobFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(pull_job_unrefp) PullJob *checksum_job = NULL, *signature_job = NULL;
+ int r;
+ const char *chksums = NULL;
+
+ assert(ret_checksum_job);
+ assert(ret_signature_job);
+ assert(verify >= 0);
+ assert(verify < _IMPORT_VERIFY_MAX);
+ assert(url);
+ assert(glue);
+
+ if (verify != IMPORT_VERIFY_NO) {
+ _cleanup_free_ char *checksum_url = NULL, *fn = NULL;
+
+ /* Queue jobs for the checksum file for the image. */
+ r = import_url_last_component(url, &fn);
+ if (r < 0)
+ return r;
+
+ chksums = strjoina(fn, ".sha256");
+
+ r = import_url_change_last_component(url, chksums, &checksum_url);
+ if (r < 0)
+ return r;
+
+ r = pull_job_new(&checksum_job, checksum_url, glue, userdata);
+ if (r < 0)
+ return r;
+
+ checksum_job->on_finished = on_finished;
+ checksum_job->uncompressed_max = checksum_job->compressed_max = 1ULL * 1024ULL * 1024ULL;
+ }
+
+ if (verify == IMPORT_VERIFY_SIGNATURE) {
+ _cleanup_free_ char *signature_url = NULL;
+
+ /* Queue job for the SHA256SUMS.gpg file for the image. */
+ r = import_url_change_last_component(url, "SHA256SUMS.gpg", &signature_url);
+ if (r < 0)
+ return r;
+
+ r = pull_job_new(&signature_job, signature_url, glue, userdata);
+ if (r < 0)
+ return r;
+
+ signature_job->on_finished = on_finished;
+ signature_job->uncompressed_max = signature_job->compressed_max = 1ULL * 1024ULL * 1024ULL;
+ }
+
+ *ret_checksum_job = checksum_job;
+ *ret_signature_job = signature_job;
+
+ checksum_job = signature_job = NULL;
+
+ return 0;
+}
+
+static int verify_one(PullJob *checksum_job, PullJob *job) {
+ _cleanup_free_ char *fn = NULL;
+ const char *line, *p;
+ int r;
+
+ assert(checksum_job);
+
+ if (!job)
+ return 0;
+
+ assert(IN_SET(job->state, PULL_JOB_DONE, PULL_JOB_FAILED));
+
+ /* Don't verify the checksum if we didn't actually successfully download something new */
+ if (job->state != PULL_JOB_DONE)
+ return 0;
+ if (job->error != 0)
+ return 0;
+ if (job->etag_exists)
+ return 0;
+
+ assert(job->calc_checksum);
+ assert(job->checksum);
+
+ r = import_url_last_component(job->url, &fn);
+ if (r < 0)
+ return log_oom();
+
+ if (!filename_is_valid(fn))
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Cannot verify checksum, could not determine server-side file name.");
+
+ line = strjoina(job->checksum, " *", fn, "\n");
+
+ p = memmem(checksum_job->payload,
+ checksum_job->payload_size,
+ line,
+ strlen(line));
+
+ if (!p) {
+ line = strjoina(job->checksum, " ", fn, "\n");
+
+ p = memmem(checksum_job->payload,
+ checksum_job->payload_size,
+ line,
+ strlen(line));
+ }
+
+ if (!p || (p != (char*) checksum_job->payload && p[-1] != '\n'))
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "DOWNLOAD INVALID: Checksum of %s file did not checkout, file has been tampered with.", fn);
+
+ log_info("SHA256 checksum of %s is valid.", job->url);
+ return 1;
+}
+
+int pull_verify(PullJob *main_job,
+ PullJob *roothash_job,
+ PullJob *settings_job,
+ PullJob *checksum_job,
+ PullJob *signature_job) {
+
+ _cleanup_close_pair_ int gpg_pipe[2] = { -1, -1 };
+ _cleanup_close_ int sig_file = -1;
+ char sig_file_path[] = "/tmp/sigXXXXXX", gpg_home[] = "/tmp/gpghomeXXXXXX";
+ _cleanup_(sigkill_waitp) pid_t pid = 0;
+ bool gpg_home_created = false;
+ int r;
+
+ assert(main_job);
+ assert(main_job->state == PULL_JOB_DONE);
+
+ if (!checksum_job)
+ return 0;
+
+ assert(main_job->calc_checksum);
+ assert(main_job->checksum);
+
+ assert(checksum_job->state == PULL_JOB_DONE);
+
+ if (!checksum_job->payload || checksum_job->payload_size <= 0) {
+ log_error("Checksum is empty, cannot verify.");
+ return -EBADMSG;
+ }
+
+ r = verify_one(checksum_job, main_job);
+ if (r < 0)
+ return r;
+
+ r = verify_one(checksum_job, roothash_job);
+ if (r < 0)
+ return r;
+
+ r = verify_one(checksum_job, settings_job);
+ if (r < 0)
+ return r;
+
+ if (!signature_job)
+ return 0;
+
+ if (checksum_job->style == VERIFICATION_PER_FILE)
+ signature_job = checksum_job;
+
+ assert(signature_job->state == PULL_JOB_DONE);
+
+ if (!signature_job->payload || signature_job->payload_size <= 0) {
+ log_error("Signature is empty, cannot verify.");
+ return -EBADMSG;
+ }
+
+ r = pipe2(gpg_pipe, O_CLOEXEC);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to create pipe for gpg: %m");
+
+ sig_file = mkostemp(sig_file_path, O_RDWR);
+ if (sig_file < 0)
+ return log_error_errno(errno, "Failed to create temporary file: %m");
+
+ r = loop_write(sig_file, signature_job->payload, signature_job->payload_size, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write to temporary file: %m");
+ goto finish;
+ }
+
+ if (!mkdtemp(gpg_home)) {
+ r = log_error_errno(errno, "Failed to create tempory home for gpg: %m");
+ goto finish;
+ }
+
+ gpg_home_created = true;
+
+ r = safe_fork("(gpg)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *cmd[] = {
+ "gpg",
+ "--no-options",
+ "--no-default-keyring",
+ "--no-auto-key-locate",
+ "--no-auto-check-trustdb",
+ "--batch",
+ "--trust-model=always",
+ NULL, /* --homedir= */
+ NULL, /* --keyring= */
+ NULL, /* --verify */
+ NULL, /* signature file */
+ NULL, /* dash */
+ NULL /* trailing NULL */
+ };
+ unsigned k = ELEMENTSOF(cmd) - 6;
+
+ /* Child */
+
+ gpg_pipe[1] = safe_close(gpg_pipe[1]);
+
+ r = rearrange_stdio(gpg_pipe[0], -1, STDERR_FILENO);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rearrange stdin/stdout: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ (void) rlimit_nofile_safe();
+
+ cmd[k++] = strjoina("--homedir=", gpg_home);
+
+ /* We add the user keyring only to the command line
+ * arguments, if it's around since gpg fails
+ * otherwise. */
+ if (access(USER_KEYRING_PATH, F_OK) >= 0)
+ cmd[k++] = "--keyring=" USER_KEYRING_PATH;
+ else
+ cmd[k++] = "--keyring=" VENDOR_KEYRING_PATH;
+
+ cmd[k++] = "--verify";
+ if (checksum_job->style == VERIFICATION_PER_DIRECTORY) {
+ cmd[k++] = sig_file_path;
+ cmd[k++] = "-";
+ cmd[k++] = NULL;
+ }
+
+ execvp("gpg2", (char * const *) cmd);
+ execvp("gpg", (char * const *) cmd);
+ log_error_errno(errno, "Failed to execute gpg: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ gpg_pipe[0] = safe_close(gpg_pipe[0]);
+
+ r = loop_write(gpg_pipe[1], checksum_job->payload, checksum_job->payload_size, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write to pipe: %m");
+ goto finish;
+ }
+
+ gpg_pipe[1] = safe_close(gpg_pipe[1]);
+
+ r = wait_for_terminate_and_check("gpg", pid, WAIT_LOG_ABNORMAL);
+ pid = 0;
+ if (r < 0)
+ goto finish;
+ if (r != EXIT_SUCCESS) {
+ log_error("DOWNLOAD INVALID: Signature verification failed.");
+ r = -EBADMSG;
+ } else {
+ log_info("Signature verification succeeded.");
+ r = 0;
+ }
+
+finish:
+ (void) unlink(sig_file_path);
+
+ if (gpg_home_created)
+ (void) rm_rf(gpg_home, REMOVE_ROOT|REMOVE_PHYSICAL);
+
+ return r;
+}
diff --git a/src/import/pull-common.h b/src/import/pull-common.h
new file mode 100644
index 0000000..65f239d
--- /dev/null
+++ b/src/import/pull-common.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "import-util.h"
+#include "pull-job.h"
+
+int pull_make_local_copy(const char *final, const char *root, const char *local, bool force_local);
+
+int pull_find_old_etags(const char *url, const char *root, int dt, const char *prefix, const char *suffix, char ***etags);
+
+int pull_make_path(const char *url, const char *etag, const char *image_root, const char *prefix, const char *suffix, char **ret);
+
+int pull_make_auxiliary_job(PullJob **ret, const char *url, int (*strip_suffixes)(const char *name, char **ret), const char *suffix, CurlGlue *glue, PullJobFinished on_finished, void *userdata);
+int pull_make_verification_jobs(PullJob **ret_checksum_job, PullJob **ret_signature_job, ImportVerify verify, const char *url, CurlGlue *glue, PullJobFinished on_finished, void *userdata);
+
+int pull_verify(PullJob *main_job, PullJob *roothash_job, PullJob *settings_job, PullJob *checksum_job, PullJob *signature_job);
diff --git a/src/import/pull-job.c b/src/import/pull-job.c
new file mode 100644
index 0000000..6881bd6
--- /dev/null
+++ b/src/import/pull-job.c
@@ -0,0 +1,636 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/xattr.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "gcrypt-util.h"
+#include "hexdecoct.h"
+#include "import-util.h"
+#include "io-util.h"
+#include "machine-pool.h"
+#include "parse-util.h"
+#include "pull-common.h"
+#include "pull-job.h"
+#include "string-util.h"
+#include "strv.h"
+#include "xattr-util.h"
+
+PullJob* pull_job_unref(PullJob *j) {
+ if (!j)
+ return NULL;
+
+ curl_glue_remove_and_free(j->glue, j->curl);
+ curl_slist_free_all(j->request_header);
+
+ safe_close(j->disk_fd);
+
+ import_compress_free(&j->compress);
+
+ if (j->checksum_context)
+ gcry_md_close(j->checksum_context);
+
+ free(j->url);
+ free(j->etag);
+ strv_free(j->old_etags);
+ free(j->payload);
+ free(j->checksum);
+
+ return mfree(j);
+}
+
+static void pull_job_finish(PullJob *j, int ret) {
+ assert(j);
+
+ if (IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED))
+ return;
+
+ if (ret == 0) {
+ j->state = PULL_JOB_DONE;
+ j->progress_percent = 100;
+ log_info("Download of %s complete.", j->url);
+ } else {
+ j->state = PULL_JOB_FAILED;
+ j->error = ret;
+ }
+
+ if (j->on_finished)
+ j->on_finished(j);
+}
+
+static int pull_job_restart(PullJob *j) {
+ int r;
+ char *chksum_url = NULL;
+
+ r = import_url_change_last_component(j->url, "SHA256SUMS", &chksum_url);
+ if (r < 0)
+ return r;
+
+ free(j->url);
+ j->url = chksum_url;
+ j->state = PULL_JOB_INIT;
+ j->payload = mfree(j->payload);
+ j->payload_size = 0;
+ j->payload_allocated = 0;
+ j->written_compressed = 0;
+ j->written_uncompressed = 0;
+
+ r = pull_job_begin(j);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result) {
+ PullJob *j = NULL;
+ CURLcode code;
+ long status;
+ int r;
+
+ if (curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&j) != CURLE_OK)
+ return;
+
+ if (!j || IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED))
+ return;
+
+ if (result != CURLE_OK) {
+ log_error("Transfer failed: %s", curl_easy_strerror(result));
+ r = -EIO;
+ goto finish;
+ }
+
+ code = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status);
+ if (code != CURLE_OK) {
+ log_error("Failed to retrieve response code: %s", curl_easy_strerror(code));
+ r = -EIO;
+ goto finish;
+ } else if (status == 304) {
+ log_info("Image already downloaded. Skipping download.");
+ j->etag_exists = true;
+ r = 0;
+ goto finish;
+ } else if (status >= 300) {
+ if (status == 404 && j->style == VERIFICATION_PER_FILE) {
+
+ /* retry pull job with SHA256SUMS file */
+ r = pull_job_restart(j);
+ if (r < 0)
+ goto finish;
+
+ code = curl_easy_getinfo(j->curl, CURLINFO_RESPONSE_CODE, &status);
+ if (code != CURLE_OK) {
+ log_error("Failed to retrieve response code: %s", curl_easy_strerror(code));
+ r = -EIO;
+ goto finish;
+ }
+
+ if (status == 0) {
+ j->style = VERIFICATION_PER_DIRECTORY;
+ return;
+ }
+ }
+
+ log_error("HTTP request to %s failed with code %li.", j->url, status);
+ r = -EIO;
+ goto finish;
+ } else if (status < 200) {
+ log_error("HTTP request to %s finished with unexpected code %li.", j->url, status);
+ r = -EIO;
+ goto finish;
+ }
+
+ if (j->state != PULL_JOB_RUNNING) {
+ log_error("Premature connection termination.");
+ r = -EIO;
+ goto finish;
+ }
+
+ if (j->content_length != (uint64_t) -1 &&
+ j->content_length != j->written_compressed) {
+ log_error("Download truncated.");
+ r = -EIO;
+ goto finish;
+ }
+
+ if (j->checksum_context) {
+ uint8_t *k;
+
+ k = gcry_md_read(j->checksum_context, GCRY_MD_SHA256);
+ if (!k) {
+ log_error("Failed to get checksum.");
+ r = -EIO;
+ goto finish;
+ }
+
+ j->checksum = hexmem(k, gcry_md_get_algo_dlen(GCRY_MD_SHA256));
+ if (!j->checksum) {
+ r = log_oom();
+ goto finish;
+ }
+
+ log_debug("SHA256 of %s is %s.", j->url, j->checksum);
+ }
+
+ if (j->disk_fd >= 0 && j->allow_sparse) {
+ /* Make sure the file size is right, in case the file was
+ * sparse and we just seeked for the last part */
+
+ if (ftruncate(j->disk_fd, j->written_uncompressed) < 0) {
+ r = log_error_errno(errno, "Failed to truncate file: %m");
+ goto finish;
+ }
+
+ if (j->etag)
+ (void) fsetxattr(j->disk_fd, "user.source_etag", j->etag, strlen(j->etag), 0);
+ if (j->url)
+ (void) fsetxattr(j->disk_fd, "user.source_url", j->url, strlen(j->url), 0);
+
+ if (j->mtime != 0) {
+ struct timespec ut[2];
+
+ timespec_store(&ut[0], j->mtime);
+ ut[1] = ut[0];
+ (void) futimens(j->disk_fd, ut);
+
+ (void) fd_setcrtime(j->disk_fd, j->mtime);
+ }
+ }
+
+ r = 0;
+
+finish:
+ pull_job_finish(j, r);
+}
+
+static int pull_job_write_uncompressed(const void *p, size_t sz, void *userdata) {
+ PullJob *j = userdata;
+ ssize_t n;
+
+ assert(j);
+ assert(p);
+
+ if (sz <= 0)
+ return 0;
+
+ if (j->written_uncompressed + sz < j->written_uncompressed)
+ return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW),
+ "File too large, overflow");
+
+ if (j->written_uncompressed + sz > j->uncompressed_max)
+ return log_error_errno(SYNTHETIC_ERRNO(EFBIG),
+ "File overly large, refusing");
+
+ if (j->disk_fd >= 0) {
+
+ if (j->allow_sparse)
+ n = sparse_write(j->disk_fd, p, sz, 64);
+ else {
+ n = write(j->disk_fd, p, sz);
+ if (n < 0)
+ n = -errno;
+ }
+ if (n < 0)
+ return log_error_errno((int) n, "Failed to write file: %m");
+ if ((size_t) n < sz)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write");
+ } else {
+
+ if (!GREEDY_REALLOC(j->payload, j->payload_allocated, j->payload_size + sz))
+ return log_oom();
+
+ memcpy(j->payload + j->payload_size, p, sz);
+ j->payload_size += sz;
+ }
+
+ j->written_uncompressed += sz;
+
+ return 0;
+}
+
+static int pull_job_write_compressed(PullJob *j, void *p, size_t sz) {
+ int r;
+
+ assert(j);
+ assert(p);
+
+ if (sz <= 0)
+ return 0;
+
+ if (j->written_compressed + sz < j->written_compressed)
+ return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "File too large, overflow");
+
+ if (j->written_compressed + sz > j->compressed_max)
+ return log_error_errno(SYNTHETIC_ERRNO(EFBIG), "File overly large, refusing.");
+
+ if (j->content_length != (uint64_t) -1 &&
+ j->written_compressed + sz > j->content_length)
+ return log_error_errno(SYNTHETIC_ERRNO(EFBIG),
+ "Content length incorrect.");
+
+ if (j->checksum_context)
+ gcry_md_write(j->checksum_context, p, sz);
+
+ r = import_uncompress(&j->compress, p, sz, pull_job_write_uncompressed, j);
+ if (r < 0)
+ return r;
+
+ j->written_compressed += sz;
+
+ return 0;
+}
+
+static int pull_job_open_disk(PullJob *j) {
+ int r;
+
+ assert(j);
+
+ if (j->on_open_disk) {
+ r = j->on_open_disk(j);
+ if (r < 0)
+ return r;
+ }
+
+ if (j->disk_fd >= 0) {
+ /* Check if we can do sparse files */
+
+ if (lseek(j->disk_fd, SEEK_SET, 0) == 0)
+ j->allow_sparse = true;
+ else {
+ if (errno != ESPIPE)
+ return log_error_errno(errno, "Failed to seek on file descriptor: %m");
+
+ j->allow_sparse = false;
+ }
+ }
+
+ if (j->calc_checksum) {
+ initialize_libgcrypt(false);
+
+ if (gcry_md_open(&j->checksum_context, GCRY_MD_SHA256, 0) != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to initialize hash context.");
+ }
+
+ return 0;
+}
+
+static int pull_job_detect_compression(PullJob *j) {
+ _cleanup_free_ uint8_t *stub = NULL;
+ size_t stub_size;
+
+ int r;
+
+ assert(j);
+
+ r = import_uncompress_detect(&j->compress, j->payload, j->payload_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize compressor: %m");
+ if (r == 0)
+ return 0;
+
+ log_debug("Stream is compressed: %s", import_compress_type_to_string(j->compress.type));
+
+ r = pull_job_open_disk(j);
+ if (r < 0)
+ return r;
+
+ /* Now, take the payload we read so far, and decompress it */
+ stub = j->payload;
+ stub_size = j->payload_size;
+
+ j->payload = NULL;
+ j->payload_size = 0;
+ j->payload_allocated = 0;
+
+ j->state = PULL_JOB_RUNNING;
+
+ r = pull_job_write_compressed(j, stub, stub_size);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static size_t pull_job_write_callback(void *contents, size_t size, size_t nmemb, void *userdata) {
+ PullJob *j = userdata;
+ size_t sz = size * nmemb;
+ int r;
+
+ assert(contents);
+ assert(j);
+
+ switch (j->state) {
+
+ case PULL_JOB_ANALYZING:
+ /* Let's first check what it actually is */
+
+ if (!GREEDY_REALLOC(j->payload, j->payload_allocated, j->payload_size + sz)) {
+ r = log_oom();
+ goto fail;
+ }
+
+ memcpy(j->payload + j->payload_size, contents, sz);
+ j->payload_size += sz;
+
+ r = pull_job_detect_compression(j);
+ if (r < 0)
+ goto fail;
+
+ break;
+
+ case PULL_JOB_RUNNING:
+
+ r = pull_job_write_compressed(j, contents, sz);
+ if (r < 0)
+ goto fail;
+
+ break;
+
+ case PULL_JOB_DONE:
+ case PULL_JOB_FAILED:
+ r = -ESTALE;
+ goto fail;
+
+ default:
+ assert_not_reached("Impossible state.");
+ }
+
+ return sz;
+
+fail:
+ pull_job_finish(j, r);
+ return 0;
+}
+
+static size_t pull_job_header_callback(void *contents, size_t size, size_t nmemb, void *userdata) {
+ PullJob *j = userdata;
+ size_t sz = size * nmemb;
+ _cleanup_free_ char *length = NULL, *last_modified = NULL;
+ char *etag;
+ int r;
+
+ assert(contents);
+ assert(j);
+
+ if (IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED)) {
+ r = -ESTALE;
+ goto fail;
+ }
+
+ assert(j->state == PULL_JOB_ANALYZING);
+
+ r = curl_header_strdup(contents, sz, "ETag:", &etag);
+ if (r < 0) {
+ log_oom();
+ goto fail;
+ }
+ if (r > 0) {
+ free(j->etag);
+ j->etag = etag;
+
+ if (strv_contains(j->old_etags, j->etag)) {
+ log_info("Image already downloaded. Skipping download.");
+ j->etag_exists = true;
+ pull_job_finish(j, 0);
+ return sz;
+ }
+
+ return sz;
+ }
+
+ r = curl_header_strdup(contents, sz, "Content-Length:", &length);
+ if (r < 0) {
+ log_oom();
+ goto fail;
+ }
+ if (r > 0) {
+ (void) safe_atou64(length, &j->content_length);
+
+ if (j->content_length != (uint64_t) -1) {
+ char bytes[FORMAT_BYTES_MAX];
+
+ if (j->content_length > j->compressed_max) {
+ log_error("Content too large.");
+ r = -EFBIG;
+ goto fail;
+ }
+
+ log_info("Downloading %s for %s.", format_bytes(bytes, sizeof(bytes), j->content_length), j->url);
+ }
+
+ return sz;
+ }
+
+ r = curl_header_strdup(contents, sz, "Last-Modified:", &last_modified);
+ if (r < 0) {
+ log_oom();
+ goto fail;
+ }
+ if (r > 0) {
+ (void) curl_parse_http_time(last_modified, &j->mtime);
+ return sz;
+ }
+
+ if (j->on_header) {
+ r = j->on_header(j, contents, sz);
+ if (r < 0)
+ goto fail;
+ }
+
+ return sz;
+
+fail:
+ pull_job_finish(j, r);
+ return 0;
+}
+
+static int pull_job_progress_callback(void *userdata, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow) {
+ PullJob *j = userdata;
+ unsigned percent;
+ usec_t n;
+
+ assert(j);
+
+ if (dltotal <= 0)
+ return 0;
+
+ percent = ((100 * dlnow) / dltotal);
+ n = now(CLOCK_MONOTONIC);
+
+ if (n > j->last_status_usec + USEC_PER_SEC &&
+ percent != j->progress_percent &&
+ dlnow < dltotal) {
+ char buf[FORMAT_TIMESPAN_MAX];
+
+ if (n - j->start_usec > USEC_PER_SEC && dlnow > 0) {
+ char y[FORMAT_BYTES_MAX];
+ usec_t left, done;
+
+ done = n - j->start_usec;
+ left = (usec_t) (((double) done * (double) dltotal) / dlnow) - done;
+
+ log_info("Got %u%% of %s. %s left at %s/s.",
+ percent,
+ j->url,
+ format_timespan(buf, sizeof(buf), left, USEC_PER_SEC),
+ format_bytes(y, sizeof(y), (uint64_t) ((double) dlnow / ((double) done / (double) USEC_PER_SEC))));
+ } else
+ log_info("Got %u%% of %s.", percent, j->url);
+
+ j->progress_percent = percent;
+ j->last_status_usec = n;
+
+ if (j->on_progress)
+ j->on_progress(j);
+ }
+
+ return 0;
+}
+
+int pull_job_new(PullJob **ret, const char *url, CurlGlue *glue, void *userdata) {
+ _cleanup_(pull_job_unrefp) PullJob *j = NULL;
+ _cleanup_free_ char *u = NULL;
+
+ assert(url);
+ assert(glue);
+ assert(ret);
+
+ u = strdup(url);
+ if (!u)
+ return -ENOMEM;
+
+ j = new(PullJob, 1);
+ if (!j)
+ return -ENOMEM;
+
+ *j = (PullJob) {
+ .state = PULL_JOB_INIT,
+ .disk_fd = -1,
+ .userdata = userdata,
+ .glue = glue,
+ .content_length = (uint64_t) -1,
+ .start_usec = now(CLOCK_MONOTONIC),
+ .compressed_max = 64LLU * 1024LLU * 1024LLU * 1024LLU, /* 64GB safety limit */
+ .uncompressed_max = 64LLU * 1024LLU * 1024LLU * 1024LLU, /* 64GB safety limit */
+ .style = VERIFICATION_STYLE_UNSET,
+ .url = TAKE_PTR(u),
+ };
+
+ *ret = TAKE_PTR(j);
+
+ return 0;
+}
+
+int pull_job_begin(PullJob *j) {
+ int r;
+
+ assert(j);
+
+ if (j->state != PULL_JOB_INIT)
+ return -EBUSY;
+
+ r = curl_glue_make(&j->curl, j->url, j);
+ if (r < 0)
+ return r;
+
+ if (!strv_isempty(j->old_etags)) {
+ _cleanup_free_ char *cc = NULL, *hdr = NULL;
+
+ cc = strv_join(j->old_etags, ", ");
+ if (!cc)
+ return -ENOMEM;
+
+ hdr = strappend("If-None-Match: ", cc);
+ if (!hdr)
+ return -ENOMEM;
+
+ if (!j->request_header) {
+ j->request_header = curl_slist_new(hdr, NULL);
+ if (!j->request_header)
+ return -ENOMEM;
+ } else {
+ struct curl_slist *l;
+
+ l = curl_slist_append(j->request_header, hdr);
+ if (!l)
+ return -ENOMEM;
+
+ j->request_header = l;
+ }
+ }
+
+ if (j->request_header) {
+ if (curl_easy_setopt(j->curl, CURLOPT_HTTPHEADER, j->request_header) != CURLE_OK)
+ return -EIO;
+ }
+
+ if (curl_easy_setopt(j->curl, CURLOPT_WRITEFUNCTION, pull_job_write_callback) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(j->curl, CURLOPT_WRITEDATA, j) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(j->curl, CURLOPT_HEADERFUNCTION, pull_job_header_callback) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(j->curl, CURLOPT_HEADERDATA, j) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(j->curl, CURLOPT_XFERINFOFUNCTION, pull_job_progress_callback) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(j->curl, CURLOPT_XFERINFODATA, j) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(j->curl, CURLOPT_NOPROGRESS, 0) != CURLE_OK)
+ return -EIO;
+
+ r = curl_glue_add(j->glue, j->curl);
+ if (r < 0)
+ return r;
+
+ j->state = PULL_JOB_ANALYZING;
+
+ return 0;
+}
diff --git a/src/import/pull-job.h b/src/import/pull-job.h
new file mode 100644
index 0000000..c907e74
--- /dev/null
+++ b/src/import/pull-job.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <gcrypt.h>
+
+#include "curl-util.h"
+#include "import-compress.h"
+#include "macro.h"
+
+typedef struct PullJob PullJob;
+
+typedef void (*PullJobFinished)(PullJob *job);
+typedef int (*PullJobOpenDisk)(PullJob *job);
+typedef int (*PullJobHeader)(PullJob *job, const char *header, size_t sz);
+typedef void (*PullJobProgress)(PullJob *job);
+
+typedef enum PullJobState {
+ PULL_JOB_INIT,
+ PULL_JOB_ANALYZING, /* Still reading into ->payload, to figure out what we have */
+ PULL_JOB_RUNNING, /* Writing to destination */
+ PULL_JOB_DONE,
+ PULL_JOB_FAILED,
+ _PULL_JOB_STATE_MAX,
+ _PULL_JOB_STATE_INVALID = -1,
+} PullJobState;
+
+typedef enum VerificationStyle {
+ VERIFICATION_STYLE_UNSET,
+ VERIFICATION_PER_FILE, /* SuSE-style ".sha256" files with inline signature */
+ VERIFICATION_PER_DIRECTORY, /* Ubuntu-style SHA256SUM files with detach SHA256SUM.gpg signatures */
+} VerificationStyle;
+
+#define PULL_JOB_IS_COMPLETE(j) (IN_SET((j)->state, PULL_JOB_DONE, PULL_JOB_FAILED))
+
+struct PullJob {
+ PullJobState state;
+ int error;
+
+ char *url;
+
+ void *userdata;
+ PullJobFinished on_finished;
+ PullJobOpenDisk on_open_disk;
+ PullJobHeader on_header;
+ PullJobProgress on_progress;
+
+ CurlGlue *glue;
+ CURL *curl;
+ struct curl_slist *request_header;
+
+ char *etag;
+ char **old_etags;
+ bool etag_exists;
+
+ uint64_t content_length;
+ uint64_t written_compressed;
+ uint64_t written_uncompressed;
+
+ uint64_t uncompressed_max;
+ uint64_t compressed_max;
+
+ uint8_t *payload;
+ size_t payload_size;
+ size_t payload_allocated;
+
+ int disk_fd;
+
+ usec_t mtime;
+
+ ImportCompress compress;
+
+ unsigned progress_percent;
+ usec_t start_usec;
+ usec_t last_status_usec;
+
+ bool allow_sparse;
+
+ bool calc_checksum;
+ gcry_md_hd_t checksum_context;
+
+ char *checksum;
+
+ VerificationStyle style;
+};
+
+int pull_job_new(PullJob **job, const char *url, CurlGlue *glue, void *userdata);
+PullJob* pull_job_unref(PullJob *job);
+
+int pull_job_begin(PullJob *j);
+
+void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(PullJob*, pull_job_unref);
diff --git a/src/import/pull-raw.c b/src/import/pull-raw.c
new file mode 100644
index 0000000..3a3e015
--- /dev/null
+++ b/src/import/pull-raw.c
@@ -0,0 +1,750 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <curl/curl.h>
+#include <linux/fs.h>
+#include <sys/xattr.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "copy.h"
+#include "curl-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-common.h"
+#include "import-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "pull-common.h"
+#include "pull-job.h"
+#include "pull-raw.h"
+#include "qcow2-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "web-util.h"
+
+typedef enum RawProgress {
+ RAW_DOWNLOADING,
+ RAW_VERIFYING,
+ RAW_UNPACKING,
+ RAW_FINALIZING,
+ RAW_COPYING,
+} RawProgress;
+
+struct RawPull {
+ sd_event *event;
+ CurlGlue *glue;
+
+ char *image_root;
+
+ PullJob *raw_job;
+ PullJob *roothash_job;
+ PullJob *settings_job;
+ PullJob *checksum_job;
+ PullJob *signature_job;
+
+ RawPullFinished on_finished;
+ void *userdata;
+
+ char *local;
+ bool force_local;
+ bool settings;
+ bool roothash;
+
+ char *final_path;
+ char *temp_path;
+
+ char *settings_path;
+ char *settings_temp_path;
+
+ char *roothash_path;
+ char *roothash_temp_path;
+
+ ImportVerify verify;
+};
+
+RawPull* raw_pull_unref(RawPull *i) {
+ if (!i)
+ return NULL;
+
+ pull_job_unref(i->raw_job);
+ pull_job_unref(i->settings_job);
+ pull_job_unref(i->roothash_job);
+ pull_job_unref(i->checksum_job);
+ pull_job_unref(i->signature_job);
+
+ curl_glue_unref(i->glue);
+ sd_event_unref(i->event);
+
+ if (i->temp_path) {
+ (void) unlink(i->temp_path);
+ free(i->temp_path);
+ }
+
+ if (i->roothash_temp_path) {
+ (void) unlink(i->roothash_temp_path);
+ free(i->roothash_temp_path);
+ }
+
+ if (i->settings_temp_path) {
+ (void) unlink(i->settings_temp_path);
+ free(i->settings_temp_path);
+ }
+
+ free(i->final_path);
+ free(i->roothash_path);
+ free(i->settings_path);
+ free(i->image_root);
+ free(i->local);
+ return mfree(i);
+}
+
+int raw_pull_new(
+ RawPull **ret,
+ sd_event *event,
+ const char *image_root,
+ RawPullFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(curl_glue_unrefp) CurlGlue *g = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(raw_pull_unrefp) RawPull *i = NULL;
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ assert(ret);
+
+ root = strdup(image_root ?: "/var/lib/machines");
+ if (!root)
+ return -ENOMEM;
+
+ if (event)
+ e = sd_event_ref(event);
+ else {
+ r = sd_event_default(&e);
+ if (r < 0)
+ return r;
+ }
+
+ r = curl_glue_new(&g, e);
+ if (r < 0)
+ return r;
+
+ i = new(RawPull, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (RawPull) {
+ .on_finished = on_finished,
+ .userdata = userdata,
+ .image_root = TAKE_PTR(root),
+ .event = TAKE_PTR(e),
+ .glue = TAKE_PTR(g),
+ };
+
+ i->glue->on_finished = pull_job_curl_on_finished;
+ i->glue->userdata = i;
+
+ *ret = TAKE_PTR(i);
+
+ return 0;
+}
+
+static void raw_pull_report_progress(RawPull *i, RawProgress p) {
+ unsigned percent;
+
+ assert(i);
+
+ switch (p) {
+
+ case RAW_DOWNLOADING: {
+ unsigned remain = 80;
+
+ percent = 0;
+
+ if (i->settings_job) {
+ percent += i->settings_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->roothash_job) {
+ percent += i->roothash_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->checksum_job) {
+ percent += i->checksum_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->signature_job) {
+ percent += i->signature_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->raw_job)
+ percent += i->raw_job->progress_percent * remain / 100;
+ break;
+ }
+
+ case RAW_VERIFYING:
+ percent = 80;
+ break;
+
+ case RAW_UNPACKING:
+ percent = 85;
+ break;
+
+ case RAW_FINALIZING:
+ percent = 90;
+ break;
+
+ case RAW_COPYING:
+ percent = 95;
+ break;
+
+ default:
+ assert_not_reached("Unknown progress state");
+ }
+
+ sd_notifyf(false, "X_IMPORT_PROGRESS=%u", percent);
+ log_debug("Combined progress %u%%", percent);
+}
+
+static int raw_pull_maybe_convert_qcow2(RawPull *i) {
+ _cleanup_close_ int converted_fd = -1;
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(i);
+ assert(i->raw_job);
+
+ r = qcow2_detect(i->raw_job->disk_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to detect whether this is a QCOW2 image: %m");
+ if (r == 0)
+ return 0;
+
+ /* This is a QCOW2 image, let's convert it */
+ r = tempfn_random(i->final_path, NULL, &t);
+ if (r < 0)
+ return log_oom();
+
+ converted_fd = open(t, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0664);
+ if (converted_fd < 0)
+ return log_error_errno(errno, "Failed to create %s: %m", t);
+
+ r = chattr_fd(converted_fd, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set file attributes on %s: %m", t);
+
+ log_info("Unpacking QCOW2 file.");
+
+ r = qcow2_convert(i->raw_job->disk_fd, converted_fd);
+ if (r < 0) {
+ unlink(t);
+ return log_error_errno(r, "Failed to convert qcow2 image: %m");
+ }
+
+ (void) unlink(i->temp_path);
+ free_and_replace(i->temp_path, t);
+
+ safe_close(i->raw_job->disk_fd);
+ i->raw_job->disk_fd = TAKE_FD(converted_fd);
+
+ return 1;
+}
+
+static int raw_pull_determine_path(RawPull *i, const char *suffix, char **field) {
+ int r;
+
+ assert(i);
+ assert(field);
+
+ if (*field)
+ return 0;
+
+ assert(i->raw_job);
+
+ r = pull_make_path(i->raw_job->url, i->raw_job->etag, i->image_root, ".raw-", suffix, field);
+ if (r < 0)
+ return log_oom();
+
+ return 1;
+}
+
+static int raw_pull_copy_auxiliary_file(
+ RawPull *i,
+ const char *suffix,
+ char **path) {
+
+ const char *local;
+ int r;
+
+ assert(i);
+ assert(suffix);
+ assert(path);
+
+ r = raw_pull_determine_path(i, suffix, path);
+ if (r < 0)
+ return r;
+
+ local = strjoina(i->image_root, "/", i->local, suffix);
+
+ r = copy_file_atomic(*path, local, 0644, 0, COPY_REFLINK | (i->force_local ? COPY_REPLACE : 0));
+ if (r == -EEXIST)
+ log_warning_errno(r, "File %s already exists, not replacing.", local);
+ else if (r == -ENOENT)
+ log_debug_errno(r, "Skipping creation of auxiliary file, since none was found.");
+ else if (r < 0)
+ log_warning_errno(r, "Failed to copy file %s, ignoring: %m", local);
+ else
+ log_info("Created new file %s.", local);
+
+ return 0;
+}
+
+static int raw_pull_make_local_copy(RawPull *i) {
+ _cleanup_free_ char *tp = NULL;
+ _cleanup_close_ int dfd = -1;
+ const char *p;
+ int r;
+
+ assert(i);
+ assert(i->raw_job);
+
+ if (!i->local)
+ return 0;
+
+ if (i->raw_job->etag_exists) {
+ /* We have downloaded this one previously, reopen it */
+
+ assert(i->raw_job->disk_fd < 0);
+
+ i->raw_job->disk_fd = open(i->final_path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (i->raw_job->disk_fd < 0)
+ return log_error_errno(errno, "Failed to open vendor image: %m");
+ } else {
+ /* We freshly downloaded the image, use it */
+
+ assert(i->raw_job->disk_fd >= 0);
+
+ if (lseek(i->raw_job->disk_fd, SEEK_SET, 0) == (off_t) -1)
+ return log_error_errno(errno, "Failed to seek to beginning of vendor image: %m");
+ }
+
+ p = strjoina(i->image_root, "/", i->local, ".raw");
+
+ if (i->force_local)
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+
+ r = tempfn_random(p, NULL, &tp);
+ if (r < 0)
+ return log_oom();
+
+ dfd = open(tp, O_WRONLY|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0664);
+ if (dfd < 0)
+ return log_error_errno(errno, "Failed to create writable copy of image: %m");
+
+ /* Turn off COW writing. This should greatly improve
+ * performance on COW file systems like btrfs, since it
+ * reduces fragmentation caused by not allowing in-place
+ * writes. */
+ r = chattr_fd(dfd, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set file attributes on %s: %m", tp);
+
+ r = copy_bytes(i->raw_job->disk_fd, dfd, (uint64_t) -1, COPY_REFLINK);
+ if (r < 0) {
+ unlink(tp);
+ return log_error_errno(r, "Failed to make writable copy of image: %m");
+ }
+
+ (void) copy_times(i->raw_job->disk_fd, dfd);
+ (void) copy_xattr(i->raw_job->disk_fd, dfd);
+
+ dfd = safe_close(dfd);
+
+ r = rename(tp, p);
+ if (r < 0) {
+ r = log_error_errno(errno, "Failed to move writable image into place: %m");
+ unlink(tp);
+ return r;
+ }
+
+ log_info("Created new local image '%s'.", i->local);
+
+ if (i->roothash) {
+ r = raw_pull_copy_auxiliary_file(i, ".roothash", &i->roothash_path);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->settings) {
+ r = raw_pull_copy_auxiliary_file(i, ".nspawn", &i->settings_path);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static bool raw_pull_is_done(RawPull *i) {
+ assert(i);
+ assert(i->raw_job);
+
+ if (!PULL_JOB_IS_COMPLETE(i->raw_job))
+ return false;
+ if (i->roothash_job && !PULL_JOB_IS_COMPLETE(i->roothash_job))
+ return false;
+ if (i->settings_job && !PULL_JOB_IS_COMPLETE(i->settings_job))
+ return false;
+ if (i->checksum_job && !PULL_JOB_IS_COMPLETE(i->checksum_job))
+ return false;
+ if (i->signature_job && !PULL_JOB_IS_COMPLETE(i->signature_job))
+ return false;
+
+ return true;
+}
+
+static int raw_pull_rename_auxiliary_file(
+ RawPull *i,
+ const char *suffix,
+ char **temp_path,
+ char **path) {
+
+ int r;
+
+ assert(i);
+ assert(temp_path);
+ assert(suffix);
+ assert(path);
+
+ /* Regenerate final name for this auxiliary file, we might know the etag of the file now, and we should
+ * incorporate it in the file name if we can */
+ *path = mfree(*path);
+ r = raw_pull_determine_path(i, suffix, path);
+ if (r < 0)
+ return r;
+
+ r = import_make_read_only(*temp_path);
+ if (r < 0)
+ return r;
+
+ r = rename_noreplace(AT_FDCWD, *temp_path, AT_FDCWD, *path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to rename file %s to %s: %m", *temp_path, *path);
+
+ *temp_path = mfree(*temp_path);
+
+ return 1;
+}
+
+static void raw_pull_job_on_finished(PullJob *j) {
+ RawPull *i;
+ int r;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+ if (j == i->roothash_job) {
+ if (j->error != 0)
+ log_info_errno(j->error, "Root hash file could not be retrieved, proceeding without.");
+ } else if (j == i->settings_job) {
+ if (j->error != 0)
+ log_info_errno(j->error, "Settings file could not be retrieved, proceeding without.");
+ } else if (j->error != 0 && j != i->signature_job) {
+ if (j == i->checksum_job)
+ log_error_errno(j->error, "Failed to retrieve SHA256 checksum, cannot verify. (Try --verify=no?)");
+ else
+ log_error_errno(j->error, "Failed to retrieve image file. (Wrong URL?)");
+
+ r = j->error;
+ goto finish;
+ }
+
+ /* This is invoked if either the download completed
+ * successfully, or the download was skipped because we
+ * already have the etag. In this case ->etag_exists is
+ * true.
+ *
+ * We only do something when we got all three files */
+
+ if (!raw_pull_is_done(i))
+ return;
+
+ if (i->signature_job && i->checksum_job->style == VERIFICATION_PER_DIRECTORY && i->signature_job->error != 0) {
+ log_error_errno(j->error, "Failed to retrieve signature file, cannot verify. (Try --verify=no?)");
+
+ r = i->signature_job->error;
+ goto finish;
+ }
+
+ if (i->roothash_job)
+ i->roothash_job->disk_fd = safe_close(i->roothash_job->disk_fd);
+ if (i->settings_job)
+ i->settings_job->disk_fd = safe_close(i->settings_job->disk_fd);
+
+ r = raw_pull_determine_path(i, ".raw", &i->final_path);
+ if (r < 0)
+ goto finish;
+
+ if (!i->raw_job->etag_exists) {
+ /* This is a new download, verify it, and move it into place */
+ assert(i->raw_job->disk_fd >= 0);
+
+ raw_pull_report_progress(i, RAW_VERIFYING);
+
+ r = pull_verify(i->raw_job, i->roothash_job, i->settings_job, i->checksum_job, i->signature_job);
+ if (r < 0)
+ goto finish;
+
+ raw_pull_report_progress(i, RAW_UNPACKING);
+
+ r = raw_pull_maybe_convert_qcow2(i);
+ if (r < 0)
+ goto finish;
+
+ raw_pull_report_progress(i, RAW_FINALIZING);
+
+ r = import_make_read_only_fd(i->raw_job->disk_fd);
+ if (r < 0)
+ goto finish;
+
+ r = rename_noreplace(AT_FDCWD, i->temp_path, AT_FDCWD, i->final_path);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rename raw file to %s: %m", i->final_path);
+ goto finish;
+ }
+
+ i->temp_path = mfree(i->temp_path);
+
+ if (i->roothash_job &&
+ i->roothash_job->error == 0) {
+ r = raw_pull_rename_auxiliary_file(i, ".roothash", &i->roothash_temp_path, &i->roothash_path);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (i->settings_job &&
+ i->settings_job->error == 0) {
+ r = raw_pull_rename_auxiliary_file(i, ".nspawn", &i->settings_temp_path, &i->settings_path);
+ if (r < 0)
+ goto finish;
+ }
+ }
+
+ raw_pull_report_progress(i, RAW_COPYING);
+
+ r = raw_pull_make_local_copy(i);
+ if (r < 0)
+ goto finish;
+
+ r = 0;
+
+finish:
+ if (i->on_finished)
+ i->on_finished(i, r, i->userdata);
+ else
+ sd_event_exit(i->event, r);
+}
+
+static int raw_pull_job_on_open_disk_generic(
+ RawPull *i,
+ PullJob *j,
+ const char *extra,
+ char **temp_path) {
+
+ int r;
+
+ assert(i);
+ assert(j);
+ assert(extra);
+ assert(temp_path);
+
+ if (!*temp_path) {
+ r = tempfn_random_child(i->image_root, extra, temp_path);
+ if (r < 0)
+ return log_oom();
+ }
+
+ (void) mkdir_parents_label(*temp_path, 0700);
+
+ j->disk_fd = open(*temp_path, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0664);
+ if (j->disk_fd < 0)
+ return log_error_errno(errno, "Failed to create %s: %m", *temp_path);
+
+ return 0;
+}
+
+static int raw_pull_job_on_open_disk_raw(PullJob *j) {
+ RawPull *i;
+ int r;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+ assert(i->raw_job == j);
+
+ r = raw_pull_job_on_open_disk_generic(i, j, "raw", &i->temp_path);
+ if (r < 0)
+ return r;
+
+ r = chattr_fd(j->disk_fd, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set file attributes on %s, ignoring: %m", i->temp_path);
+
+ return 0;
+}
+
+static int raw_pull_job_on_open_disk_roothash(PullJob *j) {
+ RawPull *i;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+ assert(i->roothash_job == j);
+
+ return raw_pull_job_on_open_disk_generic(i, j, "roothash", &i->roothash_temp_path);
+}
+
+static int raw_pull_job_on_open_disk_settings(PullJob *j) {
+ RawPull *i;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+ assert(i->settings_job == j);
+
+ return raw_pull_job_on_open_disk_generic(i, j, "settings", &i->settings_temp_path);
+}
+
+static void raw_pull_job_on_progress(PullJob *j) {
+ RawPull *i;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+
+ raw_pull_report_progress(i, RAW_DOWNLOADING);
+}
+
+int raw_pull_start(
+ RawPull *i,
+ const char *url,
+ const char *local,
+ bool force_local,
+ ImportVerify verify,
+ bool settings,
+ bool roothash) {
+
+ int r;
+
+ assert(i);
+ assert(verify < _IMPORT_VERIFY_MAX);
+ assert(verify >= 0);
+
+ if (!http_url_is_valid(url))
+ return -EINVAL;
+
+ if (local && !machine_name_is_valid(local))
+ return -EINVAL;
+
+ if (i->raw_job)
+ return -EBUSY;
+
+ r = free_and_strdup(&i->local, local);
+ if (r < 0)
+ return r;
+
+ i->force_local = force_local;
+ i->verify = verify;
+ i->settings = settings;
+ i->roothash = roothash;
+
+ /* Queue job for the image itself */
+ r = pull_job_new(&i->raw_job, url, i->glue, i);
+ if (r < 0)
+ return r;
+
+ i->raw_job->on_finished = raw_pull_job_on_finished;
+ i->raw_job->on_open_disk = raw_pull_job_on_open_disk_raw;
+ i->raw_job->on_progress = raw_pull_job_on_progress;
+ i->raw_job->calc_checksum = verify != IMPORT_VERIFY_NO;
+
+ r = pull_find_old_etags(url, i->image_root, DT_REG, ".raw-", ".raw", &i->raw_job->old_etags);
+ if (r < 0)
+ return r;
+
+ if (roothash) {
+ r = pull_make_auxiliary_job(&i->roothash_job, url, raw_strip_suffixes, ".roothash", i->glue, raw_pull_job_on_finished, i);
+ if (r < 0)
+ return r;
+
+ i->roothash_job->on_open_disk = raw_pull_job_on_open_disk_roothash;
+ i->roothash_job->on_progress = raw_pull_job_on_progress;
+ i->roothash_job->calc_checksum = verify != IMPORT_VERIFY_NO;
+ }
+
+ if (settings) {
+ r = pull_make_auxiliary_job(&i->settings_job, url, raw_strip_suffixes, ".nspawn", i->glue, raw_pull_job_on_finished, i);
+ if (r < 0)
+ return r;
+
+ i->settings_job->on_open_disk = raw_pull_job_on_open_disk_settings;
+ i->settings_job->on_progress = raw_pull_job_on_progress;
+ i->settings_job->calc_checksum = verify != IMPORT_VERIFY_NO;
+ }
+
+ r = pull_make_verification_jobs(&i->checksum_job, &i->signature_job, verify, url, i->glue, raw_pull_job_on_finished, i);
+ if (r < 0)
+ return r;
+
+ r = pull_job_begin(i->raw_job);
+ if (r < 0)
+ return r;
+
+ if (i->roothash_job) {
+ r = pull_job_begin(i->roothash_job);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->settings_job) {
+ r = pull_job_begin(i->settings_job);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->checksum_job) {
+ i->checksum_job->on_progress = raw_pull_job_on_progress;
+ i->checksum_job->style = VERIFICATION_PER_FILE;
+
+ r = pull_job_begin(i->checksum_job);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->signature_job) {
+ i->signature_job->on_progress = raw_pull_job_on_progress;
+
+ r = pull_job_begin(i->signature_job);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/import/pull-raw.h b/src/import/pull-raw.h
new file mode 100644
index 0000000..4ccd65b
--- /dev/null
+++ b/src/import/pull-raw.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-event.h"
+
+#include "import-util.h"
+#include "macro.h"
+
+typedef struct RawPull RawPull;
+
+typedef void (*RawPullFinished)(RawPull *pull, int error, void *userdata);
+
+int raw_pull_new(RawPull **pull, sd_event *event, const char *image_root, RawPullFinished on_finished, void *userdata);
+RawPull* raw_pull_unref(RawPull *pull);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(RawPull*, raw_pull_unref);
+
+int raw_pull_start(RawPull *pull, const char *url, const char *local, bool force_local, ImportVerify verify, bool settings, bool roothash);
diff --git a/src/import/pull-tar.c b/src/import/pull-tar.c
new file mode 100644
index 0000000..e7a208e
--- /dev/null
+++ b/src/import/pull-tar.c
@@ -0,0 +1,559 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <curl/curl.h>
+#include <sys/prctl.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "copy.h"
+#include "curl-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-common.h"
+#include "import-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "pull-common.h"
+#include "pull-job.h"
+#include "pull-tar.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "web-util.h"
+
+typedef enum TarProgress {
+ TAR_DOWNLOADING,
+ TAR_VERIFYING,
+ TAR_FINALIZING,
+ TAR_COPYING,
+} TarProgress;
+
+struct TarPull {
+ sd_event *event;
+ CurlGlue *glue;
+
+ char *image_root;
+
+ PullJob *tar_job;
+ PullJob *settings_job;
+ PullJob *checksum_job;
+ PullJob *signature_job;
+
+ TarPullFinished on_finished;
+ void *userdata;
+
+ char *local;
+ bool force_local;
+ bool settings;
+
+ pid_t tar_pid;
+
+ char *final_path;
+ char *temp_path;
+
+ char *settings_path;
+ char *settings_temp_path;
+
+ ImportVerify verify;
+};
+
+TarPull* tar_pull_unref(TarPull *i) {
+ if (!i)
+ return NULL;
+
+ if (i->tar_pid > 1) {
+ (void) kill_and_sigcont(i->tar_pid, SIGKILL);
+ (void) wait_for_terminate(i->tar_pid, NULL);
+ }
+
+ pull_job_unref(i->tar_job);
+ pull_job_unref(i->settings_job);
+ pull_job_unref(i->checksum_job);
+ pull_job_unref(i->signature_job);
+
+ curl_glue_unref(i->glue);
+ sd_event_unref(i->event);
+
+ if (i->temp_path) {
+ (void) rm_rf(i->temp_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ free(i->temp_path);
+ }
+
+ if (i->settings_temp_path) {
+ (void) unlink(i->settings_temp_path);
+ free(i->settings_temp_path);
+ }
+
+ free(i->final_path);
+ free(i->settings_path);
+ free(i->image_root);
+ free(i->local);
+
+ return mfree(i);
+}
+
+int tar_pull_new(
+ TarPull **ret,
+ sd_event *event,
+ const char *image_root,
+ TarPullFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(curl_glue_unrefp) CurlGlue *g = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(tar_pull_unrefp) TarPull *i = NULL;
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ assert(ret);
+
+ root = strdup(image_root ?: "/var/lib/machines");
+ if (!root)
+ return -ENOMEM;
+
+ if (event)
+ e = sd_event_ref(event);
+ else {
+ r = sd_event_default(&e);
+ if (r < 0)
+ return r;
+ }
+
+ r = curl_glue_new(&g, e);
+ if (r < 0)
+ return r;
+
+ i = new(TarPull, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (TarPull) {
+ .on_finished = on_finished,
+ .userdata = userdata,
+ .image_root = TAKE_PTR(root),
+ .event = TAKE_PTR(e),
+ .glue = TAKE_PTR(g),
+ };
+
+ i->glue->on_finished = pull_job_curl_on_finished;
+ i->glue->userdata = i;
+
+ *ret = TAKE_PTR(i);
+
+ return 0;
+}
+
+static void tar_pull_report_progress(TarPull *i, TarProgress p) {
+ unsigned percent;
+
+ assert(i);
+
+ switch (p) {
+
+ case TAR_DOWNLOADING: {
+ unsigned remain = 85;
+
+ percent = 0;
+
+ if (i->settings_job) {
+ percent += i->settings_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->checksum_job) {
+ percent += i->checksum_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->signature_job) {
+ percent += i->signature_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->tar_job)
+ percent += i->tar_job->progress_percent * remain / 100;
+ break;
+ }
+
+ case TAR_VERIFYING:
+ percent = 85;
+ break;
+
+ case TAR_FINALIZING:
+ percent = 90;
+ break;
+
+ case TAR_COPYING:
+ percent = 95;
+ break;
+
+ default:
+ assert_not_reached("Unknown progress state");
+ }
+
+ sd_notifyf(false, "X_IMPORT_PROGRESS=%u", percent);
+ log_debug("Combined progress %u%%", percent);
+}
+
+static int tar_pull_determine_path(TarPull *i, const char *suffix, char **field) {
+ int r;
+
+ assert(i);
+ assert(field);
+
+ if (*field)
+ return 0;
+
+ assert(i->tar_job);
+
+ r = pull_make_path(i->tar_job->url, i->tar_job->etag, i->image_root, ".tar-", suffix, field);
+ if (r < 0)
+ return log_oom();
+
+ return 1;
+}
+
+static int tar_pull_make_local_copy(TarPull *i) {
+ int r;
+
+ assert(i);
+ assert(i->tar_job);
+
+ if (!i->local)
+ return 0;
+
+ r = pull_make_local_copy(i->final_path, i->image_root, i->local, i->force_local);
+ if (r < 0)
+ return r;
+
+ if (i->settings) {
+ const char *local_settings;
+ assert(i->settings_job);
+
+ r = tar_pull_determine_path(i, ".nspawn", &i->settings_path);
+ if (r < 0)
+ return r;
+
+ local_settings = strjoina(i->image_root, "/", i->local, ".nspawn");
+
+ r = copy_file_atomic(i->settings_path, local_settings, 0664, 0, COPY_REFLINK | (i->force_local ? COPY_REPLACE : 0));
+ if (r == -EEXIST)
+ log_warning_errno(r, "Settings file %s already exists, not replacing.", local_settings);
+ else if (r == -ENOENT)
+ log_debug_errno(r, "Skipping creation of settings file, since none was found.");
+ else if (r < 0)
+ log_warning_errno(r, "Failed to copy settings files %s, ignoring: %m", local_settings);
+ else
+ log_info("Created new settings file %s.", local_settings);
+ }
+
+ return 0;
+}
+
+static bool tar_pull_is_done(TarPull *i) {
+ assert(i);
+ assert(i->tar_job);
+
+ if (!PULL_JOB_IS_COMPLETE(i->tar_job))
+ return false;
+ if (i->settings_job && !PULL_JOB_IS_COMPLETE(i->settings_job))
+ return false;
+ if (i->checksum_job && !PULL_JOB_IS_COMPLETE(i->checksum_job))
+ return false;
+ if (i->signature_job && !PULL_JOB_IS_COMPLETE(i->signature_job))
+ return false;
+
+ return true;
+}
+
+static void tar_pull_job_on_finished(PullJob *j) {
+ TarPull *i;
+ int r;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+
+ if (j == i->settings_job) {
+ if (j->error != 0)
+ log_info_errno(j->error, "Settings file could not be retrieved, proceeding without.");
+ } else if (j->error != 0 && j != i->signature_job) {
+ if (j == i->checksum_job)
+ log_error_errno(j->error, "Failed to retrieve SHA256 checksum, cannot verify. (Try --verify=no?)");
+ else
+ log_error_errno(j->error, "Failed to retrieve image file. (Wrong URL?)");
+
+ r = j->error;
+ goto finish;
+ }
+
+ /* This is invoked if either the download completed
+ * successfully, or the download was skipped because we
+ * already have the etag. */
+
+ if (!tar_pull_is_done(i))
+ return;
+
+ if (i->signature_job && i->checksum_job->style == VERIFICATION_PER_DIRECTORY && i->signature_job->error != 0) {
+ log_error_errno(j->error, "Failed to retrieve signature file, cannot verify. (Try --verify=no?)");
+
+ r = i->signature_job->error;
+ goto finish;
+ }
+
+ i->tar_job->disk_fd = safe_close(i->tar_job->disk_fd);
+ if (i->settings_job)
+ i->settings_job->disk_fd = safe_close(i->settings_job->disk_fd);
+
+ r = tar_pull_determine_path(i, NULL, &i->final_path);
+ if (r < 0)
+ goto finish;
+
+ if (i->tar_pid > 0) {
+ r = wait_for_terminate_and_check("tar", i->tar_pid, WAIT_LOG);
+ i->tar_pid = 0;
+ if (r < 0)
+ goto finish;
+ if (r != EXIT_SUCCESS) {
+ r = -EIO;
+ goto finish;
+ }
+ }
+
+ if (!i->tar_job->etag_exists) {
+ /* This is a new download, verify it, and move it into place */
+
+ tar_pull_report_progress(i, TAR_VERIFYING);
+
+ r = pull_verify(i->tar_job, NULL, i->settings_job, i->checksum_job, i->signature_job);
+ if (r < 0)
+ goto finish;
+
+ tar_pull_report_progress(i, TAR_FINALIZING);
+
+ r = import_make_read_only(i->temp_path);
+ if (r < 0)
+ goto finish;
+
+ r = rename_noreplace(AT_FDCWD, i->temp_path, AT_FDCWD, i->final_path);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rename to final image name to %s: %m", i->final_path);
+ goto finish;
+ }
+
+ i->temp_path = mfree(i->temp_path);
+
+ if (i->settings_job &&
+ i->settings_job->error == 0) {
+
+ /* Also move the settings file into place, if it exists. Note that we do so only if we also
+ * moved the tar file in place, to keep things strictly in sync. */
+ assert(i->settings_temp_path);
+
+ /* Regenerate final name for this auxiliary file, we might know the etag of the file now, and
+ * we should incorporate it in the file name if we can */
+ i->settings_path = mfree(i->settings_path);
+
+ r = tar_pull_determine_path(i, ".nspawn", &i->settings_path);
+ if (r < 0)
+ goto finish;
+
+ r = import_make_read_only(i->settings_temp_path);
+ if (r < 0)
+ goto finish;
+
+ r = rename_noreplace(AT_FDCWD, i->settings_temp_path, AT_FDCWD, i->settings_path);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rename settings file to %s: %m", i->settings_path);
+ goto finish;
+ }
+
+ i->settings_temp_path = mfree(i->settings_temp_path);
+ }
+ }
+
+ tar_pull_report_progress(i, TAR_COPYING);
+
+ r = tar_pull_make_local_copy(i);
+ if (r < 0)
+ goto finish;
+
+ r = 0;
+
+finish:
+ if (i->on_finished)
+ i->on_finished(i, r, i->userdata);
+ else
+ sd_event_exit(i->event, r);
+}
+
+static int tar_pull_job_on_open_disk_tar(PullJob *j) {
+ TarPull *i;
+ int r;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+ assert(i->tar_job == j);
+ assert(i->tar_pid <= 0);
+
+ if (!i->temp_path) {
+ r = tempfn_random_child(i->image_root, "tar", &i->temp_path);
+ if (r < 0)
+ return log_oom();
+ }
+
+ mkdir_parents_label(i->temp_path, 0700);
+
+ r = btrfs_subvol_make(i->temp_path);
+ if (r == -ENOTTY) {
+ if (mkdir(i->temp_path, 0755) < 0)
+ return log_error_errno(errno, "Failed to create directory %s: %m", i->temp_path);
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to create subvolume %s: %m", i->temp_path);
+ else
+ (void) import_assign_pool_quota_and_warn(i->temp_path);
+
+ j->disk_fd = import_fork_tar_x(i->temp_path, &i->tar_pid);
+ if (j->disk_fd < 0)
+ return j->disk_fd;
+
+ return 0;
+}
+
+static int tar_pull_job_on_open_disk_settings(PullJob *j) {
+ TarPull *i;
+ int r;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+ assert(i->settings_job == j);
+
+ if (!i->settings_temp_path) {
+ r = tempfn_random_child(i->image_root, "settings", &i->settings_temp_path);
+ if (r < 0)
+ return log_oom();
+ }
+
+ mkdir_parents_label(i->settings_temp_path, 0700);
+
+ j->disk_fd = open(i->settings_temp_path, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0664);
+ if (j->disk_fd < 0)
+ return log_error_errno(errno, "Failed to create %s: %m", i->settings_temp_path);
+
+ return 0;
+}
+
+static void tar_pull_job_on_progress(PullJob *j) {
+ TarPull *i;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+
+ tar_pull_report_progress(i, TAR_DOWNLOADING);
+}
+
+int tar_pull_start(
+ TarPull *i,
+ const char *url,
+ const char *local,
+ bool force_local,
+ ImportVerify verify,
+ bool settings) {
+
+ int r;
+
+ assert(i);
+ assert(verify < _IMPORT_VERIFY_MAX);
+ assert(verify >= 0);
+
+ if (!http_url_is_valid(url))
+ return -EINVAL;
+
+ if (local && !machine_name_is_valid(local))
+ return -EINVAL;
+
+ if (i->tar_job)
+ return -EBUSY;
+
+ r = free_and_strdup(&i->local, local);
+ if (r < 0)
+ return r;
+
+ i->force_local = force_local;
+ i->verify = verify;
+ i->settings = settings;
+
+ /* Set up download job for TAR file */
+ r = pull_job_new(&i->tar_job, url, i->glue, i);
+ if (r < 0)
+ return r;
+
+ i->tar_job->on_finished = tar_pull_job_on_finished;
+ i->tar_job->on_open_disk = tar_pull_job_on_open_disk_tar;
+ i->tar_job->on_progress = tar_pull_job_on_progress;
+ i->tar_job->calc_checksum = verify != IMPORT_VERIFY_NO;
+
+ r = pull_find_old_etags(url, i->image_root, DT_DIR, ".tar-", NULL, &i->tar_job->old_etags);
+ if (r < 0)
+ return r;
+
+ /* Set up download job for the settings file (.nspawn) */
+ if (settings) {
+ r = pull_make_auxiliary_job(&i->settings_job, url, tar_strip_suffixes, ".nspawn", i->glue, tar_pull_job_on_finished, i);
+ if (r < 0)
+ return r;
+
+ i->settings_job->on_open_disk = tar_pull_job_on_open_disk_settings;
+ i->settings_job->on_progress = tar_pull_job_on_progress;
+ i->settings_job->calc_checksum = verify != IMPORT_VERIFY_NO;
+ }
+
+ /* Set up download of checksum/signature files */
+ r = pull_make_verification_jobs(&i->checksum_job, &i->signature_job, verify, url, i->glue, tar_pull_job_on_finished, i);
+ if (r < 0)
+ return r;
+
+ r = pull_job_begin(i->tar_job);
+ if (r < 0)
+ return r;
+
+ if (i->settings_job) {
+ r = pull_job_begin(i->settings_job);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->checksum_job) {
+ i->checksum_job->on_progress = tar_pull_job_on_progress;
+ i->checksum_job->style = VERIFICATION_PER_FILE;
+
+ r = pull_job_begin(i->checksum_job);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->signature_job) {
+ i->signature_job->on_progress = tar_pull_job_on_progress;
+
+ r = pull_job_begin(i->signature_job);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/import/pull-tar.h b/src/import/pull-tar.h
new file mode 100644
index 0000000..76f920e
--- /dev/null
+++ b/src/import/pull-tar.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-event.h"
+
+#include "import-util.h"
+#include "macro.h"
+
+typedef struct TarPull TarPull;
+
+typedef void (*TarPullFinished)(TarPull *pull, int error, void *userdata);
+
+int tar_pull_new(TarPull **pull, sd_event *event, const char *image_root, TarPullFinished on_finished, void *userdata);
+TarPull* tar_pull_unref(TarPull *pull);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(TarPull*, tar_pull_unref);
+
+int tar_pull_start(TarPull *pull, const char *url, const char *local, bool force_local, ImportVerify verify, bool settings);
diff --git a/src/import/pull.c b/src/import/pull.c
new file mode 100644
index 0000000..3376992
--- /dev/null
+++ b/src/import/pull.c
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "hostname-util.h"
+#include "import-util.h"
+#include "machine-image.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "pull-raw.h"
+#include "pull-tar.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "verbs.h"
+#include "web-util.h"
+
+static bool arg_force = false;
+static const char *arg_image_root = "/var/lib/machines";
+static ImportVerify arg_verify = IMPORT_VERIFY_SIGNATURE;
+static bool arg_settings = true;
+static bool arg_roothash = true;
+
+static int interrupt_signal_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ log_notice("Transfer aborted.");
+ sd_event_exit(sd_event_source_get_event(s), EINTR);
+ return 0;
+}
+
+static void on_tar_finished(TarPull *pull, int error, void *userdata) {
+ sd_event *event = userdata;
+ assert(pull);
+
+ if (error == 0)
+ log_info("Operation completed successfully.");
+
+ sd_event_exit(event, abs(error));
+}
+
+static int pull_tar(int argc, char *argv[], void *userdata) {
+ _cleanup_(tar_pull_unrefp) TarPull *pull = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ const char *url, *local;
+ _cleanup_free_ char *l = NULL, *ll = NULL;
+ int r;
+
+ url = argv[1];
+ if (!http_url_is_valid(url)) {
+ log_error("URL '%s' is not valid.", url);
+ return -EINVAL;
+ }
+
+ if (argc >= 3)
+ local = argv[2];
+ else {
+ r = import_url_last_component(url, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed get final component of URL: %m");
+
+ local = l;
+ }
+
+ if (isempty(local) || streq(local, "-"))
+ local = NULL;
+
+ if (local) {
+ r = tar_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local)) {
+ log_error("Local image name '%s' is not valid.", local);
+ return -EINVAL;
+ }
+
+ if (!arg_force) {
+ r = image_find(IMAGE_MACHINE, local, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local);
+ } else {
+ log_error("Image '%s' already exists.", local);
+ return -EEXIST;
+ }
+ }
+
+ log_info("Pulling '%s', saving as '%s'.", url, local);
+ } else
+ log_info("Pulling '%s'.", url);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, interrupt_signal_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, interrupt_signal_handler, NULL);
+
+ r = tar_pull_new(&pull, event, arg_image_root, on_tar_finished, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate puller: %m");
+
+ r = tar_pull_start(pull, url, local, arg_force, arg_verify, arg_settings);
+ if (r < 0)
+ return log_error_errno(r, "Failed to pull image: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ log_info("Exiting.");
+ return -r;
+}
+
+static void on_raw_finished(RawPull *pull, int error, void *userdata) {
+ sd_event *event = userdata;
+ assert(pull);
+
+ if (error == 0)
+ log_info("Operation completed successfully.");
+
+ sd_event_exit(event, abs(error));
+}
+
+static int pull_raw(int argc, char *argv[], void *userdata) {
+ _cleanup_(raw_pull_unrefp) RawPull *pull = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ const char *url, *local;
+ _cleanup_free_ char *l = NULL, *ll = NULL;
+ int r;
+
+ url = argv[1];
+ if (!http_url_is_valid(url)) {
+ log_error("URL '%s' is not valid.", url);
+ return -EINVAL;
+ }
+
+ if (argc >= 3)
+ local = argv[2];
+ else {
+ r = import_url_last_component(url, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed get final component of URL: %m");
+
+ local = l;
+ }
+
+ if (isempty(local) || streq(local, "-"))
+ local = NULL;
+
+ if (local) {
+ r = raw_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local)) {
+ log_error("Local image name '%s' is not valid.", local);
+ return -EINVAL;
+ }
+
+ if (!arg_force) {
+ r = image_find(IMAGE_MACHINE, local, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local);
+ } else {
+ log_error("Image '%s' already exists.", local);
+ return -EEXIST;
+ }
+ }
+
+ log_info("Pulling '%s', saving as '%s'.", url, local);
+ } else
+ log_info("Pulling '%s'.", url);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, interrupt_signal_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, interrupt_signal_handler, NULL);
+
+ r = raw_pull_new(&pull, event, arg_image_root, on_raw_finished, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate puller: %m");
+
+ r = raw_pull_start(pull, url, local, arg_force, arg_verify, arg_settings, arg_roothash);
+ if (r < 0)
+ return log_error_errno(r, "Failed to pull image: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ log_info("Exiting.");
+ return -r;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Download container or virtual machine images.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --force Force creation of image\n"
+ " --verify=MODE Verify downloaded image, one of: 'no',\n"
+ " 'checksum', 'signature'\n"
+ " --settings=BOOL Download settings file with image\n"
+ " --roothash=BOOL Download root hash file with image\n"
+ " --image-root=PATH Image root directory\n\n"
+ "Commands:\n"
+ " tar URL [NAME] Download a TAR image\n"
+ " raw URL [NAME] Download a RAW image\n",
+ program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_FORCE,
+ ARG_IMAGE_ROOT,
+ ARG_VERIFY,
+ ARG_SETTINGS,
+ ARG_ROOTHASH,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "force", no_argument, NULL, ARG_FORCE },
+ { "image-root", required_argument, NULL, ARG_IMAGE_ROOT },
+ { "verify", required_argument, NULL, ARG_VERIFY },
+ { "settings", required_argument, NULL, ARG_SETTINGS },
+ { "roothash", required_argument, NULL, ARG_ROOTHASH },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_FORCE:
+ arg_force = true;
+ break;
+
+ case ARG_IMAGE_ROOT:
+ arg_image_root = optarg;
+ break;
+
+ case ARG_VERIFY:
+ arg_verify = import_verify_from_string(optarg);
+ if (arg_verify < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid verification setting '%s'", optarg);
+
+ break;
+
+ case ARG_SETTINGS:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --settings= parameter '%s': %m", optarg);
+
+ arg_settings = r;
+ break;
+
+ case ARG_ROOTHASH:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --roothash= parameter '%s': %m", optarg);
+
+ arg_roothash = r;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int pull_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "tar", 2, 3, 0, pull_tar },
+ { "raw", 2, 3, 0, pull_raw },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ (void) ignore_signals(SIGPIPE, -1);
+
+ return pull_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/import/qcow2-util.c b/src/import/qcow2-util.c
new file mode 100644
index 0000000..e927b60
--- /dev/null
+++ b/src/import/qcow2-util.c
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <zlib.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "qcow2-util.h"
+#include "sparse-endian.h"
+#include "util.h"
+
+#define QCOW2_MAGIC 0x514649fb
+
+#define QCOW2_COPIED (1ULL << 63)
+#define QCOW2_COMPRESSED (1ULL << 62)
+#define QCOW2_ZERO (1ULL << 0)
+
+typedef struct _packed_ Header {
+ be32_t magic;
+ be32_t version;
+
+ be64_t backing_file_offset;
+ be32_t backing_file_size;
+
+ be32_t cluster_bits;
+ be64_t size;
+ be32_t crypt_method;
+
+ be32_t l1_size;
+ be64_t l1_table_offset;
+
+ be64_t refcount_table_offset;
+ be32_t refcount_table_clusters;
+
+ be32_t nb_snapshots;
+ be64_t snapshots_offset;
+
+ /* The remainder is only present on QCOW3 */
+ be64_t incompatible_features;
+ be64_t compatible_features;
+ be64_t autoclear_features;
+
+ be32_t refcount_order;
+ be32_t header_length;
+} Header;
+
+#define HEADER_MAGIC(header) be32toh((header)->magic)
+#define HEADER_VERSION(header) be32toh((header)->version)
+#define HEADER_CLUSTER_BITS(header) be32toh((header)->cluster_bits)
+#define HEADER_CLUSTER_SIZE(header) (1ULL << HEADER_CLUSTER_BITS(header))
+#define HEADER_L2_BITS(header) (HEADER_CLUSTER_BITS(header) - 3)
+#define HEADER_SIZE(header) be64toh((header)->size)
+#define HEADER_CRYPT_METHOD(header) be32toh((header)->crypt_method)
+#define HEADER_L1_SIZE(header) be32toh((header)->l1_size)
+#define HEADER_L2_SIZE(header) (HEADER_CLUSTER_SIZE(header)/sizeof(uint64_t))
+#define HEADER_L1_TABLE_OFFSET(header) be64toh((header)->l1_table_offset)
+
+static uint32_t HEADER_HEADER_LENGTH(const Header *h) {
+ if (HEADER_VERSION(h) < 3)
+ return offsetof(Header, incompatible_features);
+
+ return be32toh(h->header_length);
+}
+
+static int copy_cluster(
+ int sfd, uint64_t soffset,
+ int dfd, uint64_t doffset,
+ uint64_t cluster_size,
+ void *buffer) {
+
+ ssize_t l;
+ int r;
+
+ r = btrfs_clone_range(sfd, soffset, dfd, doffset, cluster_size);
+ if (r >= 0)
+ return r;
+
+ l = pread(sfd, buffer, cluster_size, soffset);
+ if (l < 0)
+ return -errno;
+ if ((uint64_t) l != cluster_size)
+ return -EIO;
+
+ l = pwrite(dfd, buffer, cluster_size, doffset);
+ if (l < 0)
+ return -errno;
+ if ((uint64_t) l != cluster_size)
+ return -EIO;
+
+ return 0;
+}
+
+static int decompress_cluster(
+ int sfd, uint64_t soffset,
+ int dfd, uint64_t doffset,
+ uint64_t compressed_size,
+ uint64_t cluster_size,
+ void *buffer1,
+ void *buffer2) {
+
+ _cleanup_free_ void *large_buffer = NULL;
+ z_stream s = {};
+ uint64_t sz;
+ ssize_t l;
+ int r;
+
+ if (compressed_size > cluster_size) {
+ /* The usual cluster buffer doesn't suffice, let's
+ * allocate a larger one, temporarily */
+
+ large_buffer = malloc(compressed_size);
+ if (!large_buffer)
+ return -ENOMEM;
+
+ buffer1 = large_buffer;
+ }
+
+ l = pread(sfd, buffer1, compressed_size, soffset);
+ if (l < 0)
+ return -errno;
+ if ((uint64_t) l != compressed_size)
+ return -EIO;
+
+ s.next_in = buffer1;
+ s.avail_in = compressed_size;
+ s.next_out = buffer2;
+ s.avail_out = cluster_size;
+
+ r = inflateInit2(&s, -12);
+ if (r != Z_OK)
+ return -EIO;
+
+ r = inflate(&s, Z_FINISH);
+ sz = (uint8_t*) s.next_out - (uint8_t*) buffer2;
+ inflateEnd(&s);
+ if (r != Z_STREAM_END || sz != cluster_size)
+ return -EIO;
+
+ l = pwrite(dfd, buffer2, cluster_size, doffset);
+ if (l < 0)
+ return -errno;
+ if ((uint64_t) l != cluster_size)
+ return -EIO;
+
+ return 0;
+}
+
+static int normalize_offset(
+ const Header *header,
+ uint64_t p,
+ uint64_t *ret,
+ bool *compressed,
+ uint64_t *compressed_size) {
+
+ uint64_t q;
+
+ q = be64toh(p);
+
+ if (q & QCOW2_COMPRESSED) {
+ uint64_t sz, csize_shift, csize_mask;
+
+ if (!compressed)
+ return -EOPNOTSUPP;
+
+ csize_shift = 64 - 2 - (HEADER_CLUSTER_BITS(header) - 8);
+ csize_mask = (1ULL << (HEADER_CLUSTER_BITS(header) - 8)) - 1;
+ sz = (((q >> csize_shift) & csize_mask) + 1) * 512 - (q & 511);
+ q &= ((1ULL << csize_shift) - 1);
+
+ if (compressed_size)
+ *compressed_size = sz;
+
+ *compressed = true;
+
+ } else {
+ if (compressed) {
+ *compressed = false;
+ *compressed_size = 0;
+ }
+
+ if (q & QCOW2_ZERO) {
+ /* We make no distinction between zero blocks and holes */
+ *ret = 0;
+ return 0;
+ }
+
+ q &= ~QCOW2_COPIED;
+ }
+
+ *ret = q;
+ return q > 0; /* returns positive if not a hole */
+}
+
+static int verify_header(const Header *header) {
+ assert(header);
+
+ if (HEADER_MAGIC(header) != QCOW2_MAGIC)
+ return -EBADMSG;
+
+ if (!IN_SET(HEADER_VERSION(header), 2, 3))
+ return -EOPNOTSUPP;
+
+ if (HEADER_CRYPT_METHOD(header) != 0)
+ return -EOPNOTSUPP;
+
+ if (HEADER_CLUSTER_BITS(header) < 9) /* 512K */
+ return -EBADMSG;
+
+ if (HEADER_CLUSTER_BITS(header) > 21) /* 2MB */
+ return -EBADMSG;
+
+ if (HEADER_SIZE(header) % HEADER_CLUSTER_SIZE(header) != 0)
+ return -EBADMSG;
+
+ if (HEADER_L1_SIZE(header) > 32*1024*1024) /* 32MB */
+ return -EBADMSG;
+
+ if (HEADER_VERSION(header) == 3) {
+
+ if (header->incompatible_features != 0)
+ return -EOPNOTSUPP;
+
+ if (HEADER_HEADER_LENGTH(header) < sizeof(Header))
+ return -EBADMSG;
+ }
+
+ return 0;
+}
+
+int qcow2_convert(int qcow2_fd, int raw_fd) {
+ _cleanup_free_ void *buffer1 = NULL, *buffer2 = NULL;
+ _cleanup_free_ be64_t *l1_table = NULL, *l2_table = NULL;
+ uint64_t sz, i;
+ Header header;
+ ssize_t l;
+ int r;
+
+ l = pread(qcow2_fd, &header, sizeof(header), 0);
+ if (l < 0)
+ return -errno;
+ if (l != sizeof(header))
+ return -EIO;
+
+ r = verify_header(&header);
+ if (r < 0)
+ return r;
+
+ l1_table = new(be64_t, HEADER_L1_SIZE(&header));
+ if (!l1_table)
+ return -ENOMEM;
+
+ l2_table = malloc(HEADER_CLUSTER_SIZE(&header));
+ if (!l2_table)
+ return -ENOMEM;
+
+ buffer1 = malloc(HEADER_CLUSTER_SIZE(&header));
+ if (!buffer1)
+ return -ENOMEM;
+
+ buffer2 = malloc(HEADER_CLUSTER_SIZE(&header));
+ if (!buffer2)
+ return -ENOMEM;
+
+ /* Empty the file if it exists, we rely on zero bits */
+ if (ftruncate(raw_fd, 0) < 0)
+ return -errno;
+
+ if (ftruncate(raw_fd, HEADER_SIZE(&header)) < 0)
+ return -errno;
+
+ sz = sizeof(uint64_t) * HEADER_L1_SIZE(&header);
+ l = pread(qcow2_fd, l1_table, sz, HEADER_L1_TABLE_OFFSET(&header));
+ if (l < 0)
+ return -errno;
+ if ((uint64_t) l != sz)
+ return -EIO;
+
+ for (i = 0; i < HEADER_L1_SIZE(&header); i ++) {
+ uint64_t l2_begin, j;
+
+ r = normalize_offset(&header, l1_table[i], &l2_begin, NULL, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ l = pread(qcow2_fd, l2_table, HEADER_CLUSTER_SIZE(&header), l2_begin);
+ if (l < 0)
+ return -errno;
+ if ((uint64_t) l != HEADER_CLUSTER_SIZE(&header))
+ return -EIO;
+
+ for (j = 0; j < HEADER_L2_SIZE(&header); j++) {
+ uint64_t data_begin, p, compressed_size;
+ bool compressed;
+
+ p = ((i << HEADER_L2_BITS(&header)) + j) << HEADER_CLUSTER_BITS(&header);
+
+ r = normalize_offset(&header, l2_table[j], &data_begin, &compressed, &compressed_size);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (compressed)
+ r = decompress_cluster(
+ qcow2_fd, data_begin,
+ raw_fd, p,
+ compressed_size, HEADER_CLUSTER_SIZE(&header),
+ buffer1, buffer2);
+ else
+ r = copy_cluster(
+ qcow2_fd, data_begin,
+ raw_fd, p,
+ HEADER_CLUSTER_SIZE(&header), buffer1);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+int qcow2_detect(int fd) {
+ be32_t id;
+ ssize_t l;
+
+ l = pread(fd, &id, sizeof(id), 0);
+ if (l < 0)
+ return -errno;
+ if (l != sizeof(id))
+ return -EIO;
+
+ return htobe32(QCOW2_MAGIC) == id;
+}
diff --git a/src/import/qcow2-util.h b/src/import/qcow2-util.h
new file mode 100644
index 0000000..7393d98
--- /dev/null
+++ b/src/import/qcow2-util.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int qcow2_detect(int fd);
+int qcow2_convert(int qcow2_fd, int raw_fd);
diff --git a/src/import/test-qcow2.c b/src/import/test-qcow2.c
new file mode 100644
index 0000000..bd2b458
--- /dev/null
+++ b/src/import/test-qcow2.c
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fd-util.h"
+#include "log.h"
+#include "qcow2-util.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_close_ int sfd = -1, dfd = -1;
+ int r;
+
+ if (argc != 3) {
+ log_error("Needs two arguments.");
+ return EXIT_FAILURE;
+ }
+
+ sfd = open(argv[1], O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (sfd < 0) {
+ log_error_errno(errno, "Can't open source file: %m");
+ return EXIT_FAILURE;
+ }
+
+ dfd = open(argv[2], O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY, 0666);
+ if (dfd < 0) {
+ log_error_errno(errno, "Can't open destination file: %m");
+ return EXIT_FAILURE;
+ }
+
+ r = qcow2_convert(sfd, dfd);
+ if (r < 0) {
+ log_error_errno(r, "Failed to unpack: %m");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/initctl/initctl.c b/src/initctl/initctl.c
new file mode 100644
index 0000000..260dc2e
--- /dev/null
+++ b/src/initctl/initctl.c
@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "initreq.h"
+#include "list.h"
+#include "log.h"
+#include "special.h"
+#include "util.h"
+#include "process-util.h"
+
+#define SERVER_FD_MAX 16
+#define TIMEOUT_MSEC ((int) (DEFAULT_EXIT_USEC/USEC_PER_MSEC))
+
+typedef struct Fifo Fifo;
+
+typedef struct Server {
+ int epoll_fd;
+
+ LIST_HEAD(Fifo, fifos);
+ unsigned n_fifos;
+
+ sd_bus *bus;
+
+ bool quit;
+} Server;
+
+struct Fifo {
+ Server *server;
+
+ int fd;
+
+ struct init_request buffer;
+ size_t bytes_read;
+
+ LIST_FIELDS(Fifo, fifo);
+};
+
+static const char *translate_runlevel(int runlevel, bool *isolate) {
+ static const struct {
+ const int runlevel;
+ const char *special;
+ bool isolate;
+ } table[] = {
+ { '0', SPECIAL_POWEROFF_TARGET, false },
+ { '1', SPECIAL_RESCUE_TARGET, true },
+ { 's', SPECIAL_RESCUE_TARGET, true },
+ { 'S', SPECIAL_RESCUE_TARGET, true },
+ { '2', SPECIAL_MULTI_USER_TARGET, true },
+ { '3', SPECIAL_MULTI_USER_TARGET, true },
+ { '4', SPECIAL_MULTI_USER_TARGET, true },
+ { '5', SPECIAL_GRAPHICAL_TARGET, true },
+ { '6', SPECIAL_REBOOT_TARGET, false },
+ };
+
+ unsigned i;
+
+ assert(isolate);
+
+ for (i = 0; i < ELEMENTSOF(table); i++)
+ if (table[i].runlevel == runlevel) {
+ *isolate = table[i].isolate;
+ if (runlevel == '6' && kexec_loaded())
+ return SPECIAL_KEXEC_TARGET;
+ return table[i].special;
+ }
+
+ return NULL;
+}
+
+static int change_runlevel(Server *s, int runlevel) {
+ const char *target;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *mode;
+ bool isolate = false;
+ int r;
+
+ assert(s);
+
+ target = translate_runlevel(runlevel, &isolate);
+ if (!target) {
+ log_warning("Got request for unknown runlevel %c, ignoring.", runlevel);
+ return 0;
+ }
+
+ if (isolate)
+ mode = "isolate";
+ else
+ mode = "replace-irreversibly";
+
+ log_debug("Running request %s/start/%s", target, mode);
+
+ r = sd_bus_call_method(
+ s->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnit",
+ &error,
+ NULL,
+ "ss", target, mode);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change runlevel: %s", bus_error_message(&error, -r));
+
+ return 0;
+}
+
+static void request_process(Server *s, const struct init_request *req) {
+ assert(s);
+ assert(req);
+
+ if (req->magic != INIT_MAGIC) {
+ log_error("Got initctl request with invalid magic. Ignoring.");
+ return;
+ }
+
+ switch (req->cmd) {
+
+ case INIT_CMD_RUNLVL:
+ if (!isprint(req->runlevel))
+ log_error("Got invalid runlevel. Ignoring.");
+ else
+ switch (req->runlevel) {
+
+ /* we are async anyway, so just use kill for reexec/reload */
+ case 'u':
+ case 'U':
+ if (kill(1, SIGTERM) < 0)
+ log_error_errno(errno, "kill() failed: %m");
+
+ /* The bus connection will be
+ * terminated if PID 1 is reexecuted,
+ * hence let's just exit here, and
+ * rely on that we'll be restarted on
+ * the next request */
+ s->quit = true;
+ break;
+
+ case 'q':
+ case 'Q':
+ if (kill(1, SIGHUP) < 0)
+ log_error_errno(errno, "kill() failed: %m");
+ break;
+
+ default:
+ (void) change_runlevel(s, req->runlevel);
+ }
+ return;
+
+ case INIT_CMD_POWERFAIL:
+ case INIT_CMD_POWERFAILNOW:
+ case INIT_CMD_POWEROK:
+ log_warning("Received UPS/power initctl request. This is not implemented in systemd. Upgrade your UPS daemon!");
+ return;
+
+ case INIT_CMD_CHANGECONS:
+ log_warning("Received console change initctl request. This is not implemented in systemd.");
+ return;
+
+ case INIT_CMD_SETENV:
+ case INIT_CMD_UNSETENV:
+ log_warning("Received environment initctl request. This is not implemented in systemd.");
+ return;
+
+ default:
+ log_warning("Received unknown initctl request. Ignoring.");
+ return;
+ }
+}
+
+static int fifo_process(Fifo *f) {
+ ssize_t l;
+
+ assert(f);
+
+ errno = EIO;
+ l = read(f->fd,
+ ((uint8_t*) &f->buffer) + f->bytes_read,
+ sizeof(f->buffer) - f->bytes_read);
+ if (l <= 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to read from fifo: %m");
+ }
+
+ f->bytes_read += l;
+ assert(f->bytes_read <= sizeof(f->buffer));
+
+ if (f->bytes_read == sizeof(f->buffer)) {
+ request_process(f->server, &f->buffer);
+ f->bytes_read = 0;
+ }
+
+ return 0;
+}
+
+static void fifo_free(Fifo *f) {
+ assert(f);
+
+ if (f->server) {
+ assert(f->server->n_fifos > 0);
+ f->server->n_fifos--;
+ LIST_REMOVE(fifo, f->server->fifos, f);
+ }
+
+ if (f->fd >= 0) {
+ if (f->server)
+ epoll_ctl(f->server->epoll_fd, EPOLL_CTL_DEL, f->fd, NULL);
+
+ safe_close(f->fd);
+ }
+
+ free(f);
+}
+
+static void server_done(Server *s) {
+ assert(s);
+
+ while (s->fifos)
+ fifo_free(s->fifos);
+
+ s->epoll_fd = safe_close(s->epoll_fd);
+ s->bus = sd_bus_flush_close_unref(s->bus);
+}
+
+static int server_init(Server *s, unsigned n_sockets) {
+ int r;
+ unsigned i;
+
+ assert(s);
+ assert(n_sockets > 0);
+
+ zero(*s);
+
+ s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (s->epoll_fd < 0) {
+ r = log_error_errno(errno,
+ "Failed to create epoll object: %m");
+ goto fail;
+ }
+
+ for (i = 0; i < n_sockets; i++) {
+ struct epoll_event ev;
+ Fifo *f;
+ int fd;
+
+ fd = SD_LISTEN_FDS_START+i;
+
+ r = sd_is_fifo(fd, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine file descriptor type: %m");
+ goto fail;
+ }
+
+ if (!r) {
+ log_error("Wrong file descriptor type.");
+ r = -EINVAL;
+ goto fail;
+ }
+
+ f = new0(Fifo, 1);
+ if (!f) {
+ r = -ENOMEM;
+ log_error_errno(errno, "Failed to create fifo object: %m");
+ goto fail;
+ }
+
+ f->fd = -1;
+
+ zero(ev);
+ ev.events = EPOLLIN;
+ ev.data.ptr = f;
+ if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+ r = -errno;
+ fifo_free(f);
+ log_error_errno(errno, "Failed to add fifo fd to epoll object: %m");
+ goto fail;
+ }
+
+ f->fd = fd;
+ LIST_PREPEND(fifo, s->fifos, f);
+ f->server = s;
+ s->n_fifos++;
+ }
+
+ r = bus_connect_system_systemd(&s->bus);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get D-Bus connection: %m");
+ r = -EIO;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ server_done(s);
+
+ return r;
+}
+
+static int process_event(Server *s, struct epoll_event *ev) {
+ int r;
+ Fifo *f;
+
+ assert(s);
+
+ if (!(ev->events & EPOLLIN))
+ return log_info_errno(SYNTHETIC_ERRNO(EIO),
+ "Got invalid event from epoll. (3)");
+
+ f = (Fifo*) ev->data.ptr;
+ r = fifo_process(f);
+ if (r < 0) {
+ log_info_errno(r, "Got error on fifo: %m");
+ fifo_free(f);
+ return r;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ Server server;
+ int r = EXIT_FAILURE, n;
+
+ if (getppid() != 1) {
+ log_error("This program should be invoked by init only.");
+ return EXIT_FAILURE;
+ }
+
+ if (argc > 1) {
+ log_error("This program does not take arguments.");
+ return EXIT_FAILURE;
+ }
+
+ log_setup_service();
+
+ umask(0022);
+
+ n = sd_listen_fds(true);
+ if (n < 0) {
+ log_error_errno(r, "Failed to read listening file descriptors from environment: %m");
+ return EXIT_FAILURE;
+ }
+
+ if (n <= 0 || n > SERVER_FD_MAX) {
+ log_error("No or too many file descriptors passed.");
+ return EXIT_FAILURE;
+ }
+
+ if (server_init(&server, (unsigned) n) < 0)
+ return EXIT_FAILURE;
+
+ log_debug("systemd-initctl running as pid "PID_FMT, getpid_cached());
+
+ sd_notify(false,
+ "READY=1\n"
+ "STATUS=Processing requests...");
+
+ while (!server.quit) {
+ struct epoll_event event;
+ int k;
+
+ k = epoll_wait(server.epoll_fd, &event, 1, TIMEOUT_MSEC);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+ log_error_errno(errno, "epoll_wait() failed: %m");
+ goto fail;
+ }
+
+ if (k <= 0)
+ break;
+
+ if (process_event(&server, &event) < 0)
+ goto fail;
+ }
+
+ r = EXIT_SUCCESS;
+
+ log_debug("systemd-initctl stopped as pid "PID_FMT, getpid_cached());
+
+fail:
+ sd_notify(false,
+ "STOPPING=1\n"
+ "STATUS=Shutting down...");
+
+ server_done(&server);
+
+ return r;
+}
diff --git a/src/journal-remote/browse.html b/src/journal-remote/browse.html
new file mode 100644
index 0000000..e5162d1
--- /dev/null
+++ b/src/journal-remote/browse.html
@@ -0,0 +1,547 @@
+<!DOCTYPE html>
+<html>
+<head>
+ <title>Journal</title>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+ <style type="text/css">
+ div#divlogs, div#diventry {
+ font-family: monospace;
+ font-size: 7pt;
+ background-color: #ffffff;
+ padding: 1em;
+ margin: 2em 0em;
+ border-radius: 10px 10px 10px 10px;
+ border: 1px solid threedshadow;
+ white-space: nowrap;
+ overflow-x: scroll;
+ }
+ div#diventry {
+ display: none;
+ }
+ div#divlogs {
+ display: block;
+ }
+ body {
+ background-color: #ededed;
+ color: #313739;
+ font: message-box;
+ margin: 3em;
+ }
+ td.timestamp {
+ text-align: right;
+ border-right: 1px dotted lightgrey;
+ padding-right: 5px;
+ }
+ td.process {
+ border-right: 1px dotted lightgrey;
+ padding-left: 5px;
+ padding-right: 5px;
+ }
+ td.message {
+ padding-left: 5px;
+ }
+ td.message > a:link, td.message > a:visited {
+ text-decoration: none;
+ color: #313739;
+ }
+ td.message-error {
+ padding-left: 5px;
+ color: red;
+ font-weight: bold;
+ }
+ td.message-error > a:link, td.message-error > a:visited {
+ text-decoration: none;
+ color: red;
+ }
+ td.message-highlight {
+ padding-left: 5px;
+ font-weight: bold;
+ }
+ td.message-highlight > a:link, td.message-highlight > a:visited {
+ text-decoration: none;
+ color: #313739;
+ }
+ td > a:hover, td > a:active {
+ text-decoration: underline;
+ color: #c13739;
+ }
+ table#tablelogs, table#tableentry {
+ border-collapse: collapse;
+ }
+ td.field {
+ text-align: right;
+ border-right: 1px dotted lightgrey;
+ padding-right: 5px;
+ }
+ td.data {
+ padding-left: 5px;
+ }
+ div#keynav {
+ text-align: center;
+ font-size: 7pt;
+ color: #818789;
+ padding-top: 2em;
+ }
+ span.key {
+ font-weight: bold;
+ color: #313739;
+ }
+ div#buttonnav {
+ text-align: center;
+ }
+ button {
+ font-size: 18pt;
+ font-weight: bold;
+ width: 2em;
+ height: 2em;
+ }
+ div#filternav {
+ text-align: center;
+ }
+ select {
+ width: 50em;
+ }
+ </style>
+</head>
+
+<body>
+ <!-- TODO:
+ - live display
+ - show red lines for reboots -->
+
+ <h1 id="title"></h1>
+
+ <div id="os"></div>
+ <div id="virtualization"></div>
+ <div id="cutoff"></div>
+ <div id="machine"></div>
+ <div id="usage"></div>
+ <div id="showing"></div>
+
+ <div id="filternav">
+ <select id="filter" onchange="onFilterChange(this);" onfocus="onFilterFocus(this);">
+ <option>No filter</option>
+ </select>
+ &nbsp;&nbsp;&nbsp;&nbsp;
+ <input id="boot" type="checkbox" onchange="onBootChange(this);">Only current boot</input>
+ </div>
+
+ <div id="divlogs"><table id="tablelogs"></table></div>
+ <a name="entry"></a>
+ <div id="diventry"><table id="tableentry"></table></div>
+
+ <div id="buttonnav">
+ <button id="head" onclick="entriesLoadHead();" title="First Page">&#8676;</button>
+ <button id="previous" type="button" onclick="entriesLoadPrevious();" title="Previous Page"/>&#8592;</button>
+ <button id="next" type="button" onclick="entriesLoadNext();" title="Next Page"/>&#8594;</button>
+ <button id="tail" type="button" onclick="entriesLoadTail();" title="Last Page"/>&#8677;</button>
+ &nbsp;&nbsp;&nbsp;&nbsp;
+ <button id="more" type="button" onclick="entriesMore();" title="More Entries"/>+</button>
+ <button id="less" type="button" onclick="entriesLess();" title="Fewer Entries"/>-</button>
+ </div>
+
+ <div id="keynav">
+ <span class="key">g</span>: First Page &nbsp;&nbsp;&nbsp;&nbsp;
+ <span class="key">&#8592;, k, BACKSPACE</span>: Previous Page &nbsp;&nbsp;&nbsp;&nbsp;
+ <span class="key">&#8594;, j, SPACE</span>: Next Page &nbsp;&nbsp;&nbsp;&nbsp;
+ <span class="key">G</span>: Last Page &nbsp;&nbsp;&nbsp;&nbsp;
+ <span class="key">+</span>: More entries &nbsp;&nbsp;&nbsp;&nbsp;
+ <span class="key">-</span>: Fewer entries
+ </div>
+
+ <script type="text/javascript">
+ var first_cursor = null;
+ var last_cursor = null;
+
+ function getNEntries() {
+ var n;
+ n = localStorage["n_entries"];
+ if (n == null)
+ return 50;
+ n = parseInt(n);
+ if (n < 10)
+ return 10;
+ if (n > 1000)
+ return 1000;
+ return n;
+ }
+
+ function showNEntries(n) {
+ var showing = document.getElementById("showing");
+ showing.innerHTML = "Showing <b>" + n.toString() + "</b> entries.";
+ }
+
+ function setNEntries(n) {
+ if (n < 10)
+ return 10;
+ if (n > 1000)
+ return 1000;
+ localStorage["n_entries"] = n.toString();
+ showNEntries(n);
+ }
+
+ function machineLoad() {
+ var request = new XMLHttpRequest();
+ request.open("GET", "/machine");
+ request.onreadystatechange = machineOnResult;
+ request.setRequestHeader("Accept", "application/json");
+ request.send(null);
+ }
+
+ function formatBytes(u) {
+ if (u >= 1024*1024*1024*1024)
+ return (u/1024/1024/1024/1024).toFixed(1) + " TiB";
+ else if (u >= 1024*1024*1024)
+ return (u/1024/1024/1024).toFixed(1) + " GiB";
+ else if (u >= 1024*1024)
+ return (u/1024/1024).toFixed(1) + " MiB";
+ else if (u >= 1024)
+ return (u/1024).toFixed(1) + " KiB";
+ else
+ return u.toString() + " B";
+ }
+
+ function escapeHTML(s) {
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
+ }
+
+ function machineOnResult(event) {
+ if ((event.currentTarget.readyState != 4) ||
+ (event.currentTarget.status != 200 && event.currentTarget.status != 0))
+ return;
+
+ var d = JSON.parse(event.currentTarget.responseText);
+
+ var title = document.getElementById("title");
+ title.innerHTML = 'Journal of ' + escapeHTML(d.hostname);
+ document.title = 'Journal of ' + escapeHTML(d.hostname);
+
+ var machine = document.getElementById("machine");
+ machine.innerHTML = 'Machine ID is <b>' + d.machine_id + '</b>, current boot ID is <b>' + d.boot_id + '</b>.';
+
+ var cutoff = document.getElementById("cutoff");
+ var from = new Date(parseInt(d.cutoff_from_realtime) / 1000);
+ var to = new Date(parseInt(d.cutoff_to_realtime) / 1000);
+ cutoff.innerHTML = 'Journal begins at <b>' + from.toLocaleString() + '</b> and ends at <b>' + to.toLocaleString() + '</b>.';
+
+ var usage = document.getElementById("usage");
+ usage.innerHTML = 'Disk usage is <b>' + formatBytes(parseInt(d.usage)) + '</b>.';
+
+ var os = document.getElementById("os");
+ os.innerHTML = 'Operating system is <b>' + escapeHTML(d.os_pretty_name) + '</b>.';
+
+ var virtualization = document.getElementById("virtualization");
+ virtualization.innerHTML = d.virtualization == "bare" ? "Running on <b>bare metal</b>." : "Running on virtualization <b>" + escapeHTML(d.virtualization) + "</b>.";
+ }
+
+ function entriesLoad(range) {
+
+ if (range == null) {
+ if (localStorage["cursor"] != null && localStorage["cursor"] != "")
+ range = localStorage["cursor"] + ":0";
+ else
+ range = "";
+ }
+
+ var url = "/entries";
+
+ if (localStorage["filter"] != "" && localStorage["filter"] != null) {
+ url += "?_SYSTEMD_UNIT=" + escape(localStorage["filter"]);
+
+ if (localStorage["boot"] == "1")
+ url += "&boot";
+ } else {
+ if (localStorage["boot"] == "1")
+ url += "?boot";
+ }
+
+ var request = new XMLHttpRequest();
+ request.open("GET", url);
+ request.onreadystatechange = entriesOnResult;
+ request.setRequestHeader("Accept", "application/json");
+ request.setRequestHeader("Range", "entries=" + range + ":" + getNEntries().toString());
+ request.send(null);
+ }
+
+ function entriesLoadNext() {
+ if (last_cursor == null)
+ entriesLoad("");
+ else
+ entriesLoad(last_cursor + ":1");
+ }
+
+ function entriesLoadPrevious() {
+ if (first_cursor == null)
+ entriesLoad("");
+ else
+ entriesLoad(first_cursor + ":-" + getNEntries().toString());
+ }
+
+ function entriesLoadHead() {
+ entriesLoad("");
+ }
+
+ function entriesLoadTail() {
+ entriesLoad(":-" + getNEntries().toString());
+ }
+
+ function entriesOnResult(event) {
+
+ if ((event.currentTarget.readyState != 4) ||
+ (event.currentTarget.status != 200 && event.currentTarget.status != 0))
+ return;
+
+ var logs = document.getElementById("tablelogs");
+
+ var lc = null;
+ var fc = null;
+
+ var i, l = event.currentTarget.responseText.split('\n');
+
+ if (l.length <= 1) {
+ logs.innerHTML = '<tbody><tr><td colspan="3"><i>No further entries...</i></td></tr></tbody>';
+ return;
+ }
+
+ var buf = '';
+
+ for (i in l) {
+ if (l[i] == '')
+ continue;
+
+ var d = JSON.parse(l[i]);
+ if (d.MESSAGE == undefined || d.__CURSOR == undefined)
+ continue;
+
+ if (fc == null)
+ fc = d.__CURSOR;
+ lc = d.__CURSOR;
+
+ var priority;
+ if (d.PRIORITY != undefined)
+ priority = parseInt(d.PRIORITY);
+ else
+ priority = 6;
+
+ var clazz;
+ if (priority <= 3)
+ clazz = "message-error";
+ else if (priority <= 5)
+ clazz = "message-highlight";
+ else
+ clazz = "message";
+
+ buf += '<tr><td class="timestamp">';
+
+ if (d.__REALTIME_TIMESTAMP != undefined) {
+ var timestamp = new Date(parseInt(d.__REALTIME_TIMESTAMP) / 1000);
+ buf += timestamp.toLocaleString();
+ }
+
+ buf += '</td><td class="process">';
+
+ if (d.SYSLOG_IDENTIFIER != undefined)
+ buf += escapeHTML(d.SYSLOG_IDENTIFIER);
+ else if (d._COMM != undefined)
+ buf += escapeHTML(d._COMM);
+
+ if (d._PID != undefined)
+ buf += "[" + escapeHTML(d._PID) + "]";
+ else if (d.SYSLOG_PID != undefined)
+ buf += "[" + escapeHTML(d.SYSLOG_PID) + "]";
+
+ buf += '</td><td class="' + clazz + '"><a href="#entry" onclick="onMessageClick(\'' + d.__CURSOR + '\');">';
+
+ if (d.MESSAGE == null)
+ buf += "[blob data]";
+ else if (d.MESSAGE instanceof Array)
+ buf += "[" + formatBytes(d.MESSAGE.length) + " blob data]";
+ else
+ buf += escapeHTML(d.MESSAGE);
+
+ buf += '</a></td></tr>';
+ }
+
+ logs.innerHTML = '<tbody>' + buf + '</tbody>';
+
+ if (fc != null) {
+ first_cursor = fc;
+ localStorage["cursor"] = fc;
+ }
+ if (lc != null)
+ last_cursor = lc;
+ }
+
+ function entriesMore() {
+ setNEntries(getNEntries() + 10);
+ entriesLoad(first_cursor);
+ }
+
+ function entriesLess() {
+ setNEntries(getNEntries() - 10);
+ entriesLoad(first_cursor);
+ }
+
+ function onResultMessageClick(event) {
+ if ((event.currentTarget.readyState != 4) ||
+ (event.currentTarget.status != 200 && event.currentTarget.status != 0))
+ return;
+
+ var d = JSON.parse(event.currentTarget.responseText);
+
+ document.getElementById("diventry").style.display = "block";
+ var entry = document.getElementById("tableentry");
+
+ var buf = "";
+ for (var key in d) {
+ var data = d[key];
+
+ if (data == null)
+ data = "[blob data]";
+ else if (data instanceof Array)
+ data = "[" + formatBytes(data.length) + " blob data]";
+ else
+ data = escapeHTML(data);
+
+ buf += '<tr><td class="field">' + key + '</td><td class="data">' + data + '</td></tr>';
+ }
+ entry.innerHTML = '<tbody>' + buf + '</tbody>';
+ }
+
+ function onMessageClick(t) {
+ var request = new XMLHttpRequest();
+ request.open("GET", "/entries?discrete");
+ request.onreadystatechange = onResultMessageClick;
+ request.setRequestHeader("Accept", "application/json");
+ request.setRequestHeader("Range", "entries=" + t + ":0:1");
+ request.send(null);
+ }
+
+ function onKeyUp(event) {
+ switch (event.keyCode) {
+ case 8:
+ case 37:
+ case 75:
+ entriesLoadPrevious();
+ break;
+ case 32:
+ case 39:
+ case 74:
+ entriesLoadNext();
+ break;
+
+ case 71:
+ if (event.shiftKey)
+ entriesLoadTail();
+ else
+ entriesLoadHead();
+ break;
+ case 171:
+ entriesMore();
+ break;
+ case 173:
+ entriesLess();
+ break;
+ }
+ }
+
+ function onMouseWheel(event) {
+ if (event.detail < 0 || event.wheelDelta > 0)
+ entriesLoadPrevious();
+ else
+ entriesLoadNext();
+ }
+
+ function onResultFilterFocus(event) {
+ if ((event.currentTarget.readyState != 4) ||
+ (event.currentTarget.status != 200 && event.currentTarget.status != 0))
+ return;
+
+ var f = document.getElementById("filter");
+
+ var l = event.currentTarget.responseText.split('\n');
+ var buf = '<option>No filter</option>';
+ var j = -1;
+
+ for (i in l) {
+
+ if (l[i] == '')
+ continue;
+
+ var d = JSON.parse(l[i]);
+ if (d._SYSTEMD_UNIT == undefined)
+ continue;
+
+ buf += '<option value="' + escape(d._SYSTEMD_UNIT) + '">' + escapeHTML(d._SYSTEMD_UNIT) + '</option>';
+
+ if (d._SYSTEMD_UNIT == localStorage["filter"])
+ j = i;
+ }
+
+ if (j < 0) {
+ if (localStorage["filter"] != null && localStorage["filter"] != "") {
+ buf += '<option value="' + escape(localStorage["filter"]) + '">' + escapeHTML(localStorage["filter"]) + '</option>';
+ j = i + 1;
+ } else
+ j = 0;
+ }
+
+ f.innerHTML = buf;
+ f.selectedIndex = j;
+ }
+
+ function onFilterFocus(w) {
+ var request = new XMLHttpRequest();
+ request.open("GET", "/fields/_SYSTEMD_UNIT");
+ request.onreadystatechange = onResultFilterFocus;
+ request.setRequestHeader("Accept", "application/json");
+ request.send(null);
+ }
+
+ function onFilterChange(w) {
+ if (w.selectedIndex <= 0)
+ localStorage["filter"] = "";
+ else
+ localStorage["filter"] = unescape(w.options[w.selectedIndex].value);
+
+ entriesLoadHead();
+ }
+
+ function onBootChange(w) {
+ localStorage["boot"] = w.checked ? "1" : "0";
+ entriesLoadHead();
+ }
+
+ function initFilter() {
+ var f = document.getElementById("filter");
+
+ var buf = '<option>No filter</option>';
+
+ var filter = localStorage["filter"];
+ var j;
+ if (filter != null && filter != "") {
+ buf += '<option value="' + escape(filter) + '">' + escapeHTML(filter) + '</option>';
+ j = 1;
+ } else
+ j = 0;
+
+ f.innerHTML = buf;
+ f.selectedIndex = j;
+ }
+
+ function installHandlers() {
+ document.onkeyup = onKeyUp;
+
+ var logs = document.getElementById("divlogs");
+ logs.addEventListener("mousewheel", onMouseWheel, false);
+ logs.addEventListener("DOMMouseScroll", onMouseWheel, false);
+ }
+
+ machineLoad();
+ entriesLoad(null);
+ showNEntries(getNEntries());
+ initFilter();
+ installHandlers();
+ </script>
+</body>
+</html>
diff --git a/src/journal-remote/journal-gatewayd.c b/src/journal-remote/journal-gatewayd.c
new file mode 100644
index 0000000..af45fa5
--- /dev/null
+++ b/src/journal-remote/journal-gatewayd.c
@@ -0,0 +1,1045 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <microhttpd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "log.h"
+#include "logs-show.h"
+#include "main-func.h"
+#include "microhttpd-util.h"
+#include "os-util.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "sigbus.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#define JOURNAL_WAIT_TIMEOUT (10*USEC_PER_SEC)
+
+static char *arg_key_pem = NULL;
+static char *arg_cert_pem = NULL;
+static char *arg_trust_pem = NULL;
+static const char *arg_directory = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_key_pem, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_cert_pem, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_trust_pem, freep);
+
+typedef struct RequestMeta {
+ sd_journal *journal;
+
+ OutputMode mode;
+
+ char *cursor;
+ int64_t n_skip;
+ uint64_t n_entries;
+ bool n_entries_set;
+
+ FILE *tmp;
+ uint64_t delta, size;
+
+ int argument_parse_error;
+
+ bool follow;
+ bool discrete;
+
+ uint64_t n_fields;
+ bool n_fields_set;
+} RequestMeta;
+
+static const char* const mime_types[_OUTPUT_MODE_MAX] = {
+ [OUTPUT_SHORT] = "text/plain",
+ [OUTPUT_JSON] = "application/json",
+ [OUTPUT_JSON_SSE] = "text/event-stream",
+ [OUTPUT_JSON_SEQ] = "application/json-seq",
+ [OUTPUT_EXPORT] = "application/vnd.fdo.journal",
+};
+
+static RequestMeta *request_meta(void **connection_cls) {
+ RequestMeta *m;
+
+ assert(connection_cls);
+ if (*connection_cls)
+ return *connection_cls;
+
+ m = new0(RequestMeta, 1);
+ if (!m)
+ return NULL;
+
+ *connection_cls = m;
+ return m;
+}
+
+static void request_meta_free(
+ void *cls,
+ struct MHD_Connection *connection,
+ void **connection_cls,
+ enum MHD_RequestTerminationCode toe) {
+
+ RequestMeta *m = *connection_cls;
+
+ if (!m)
+ return;
+
+ sd_journal_close(m->journal);
+
+ safe_fclose(m->tmp);
+
+ free(m->cursor);
+ free(m);
+}
+
+static int open_journal(RequestMeta *m) {
+ assert(m);
+
+ if (m->journal)
+ return 0;
+
+ if (arg_directory)
+ return sd_journal_open_directory(&m->journal, arg_directory, 0);
+ else
+ return sd_journal_open(&m->journal, SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_SYSTEM);
+}
+
+static int request_meta_ensure_tmp(RequestMeta *m) {
+ assert(m);
+
+ if (m->tmp)
+ rewind(m->tmp);
+ else {
+ int fd;
+
+ fd = open_tmpfile_unlinkable("/tmp", O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ m->tmp = fdopen(fd, "w+");
+ if (!m->tmp) {
+ safe_close(fd);
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+static ssize_t request_reader_entries(
+ void *cls,
+ uint64_t pos,
+ char *buf,
+ size_t max) {
+
+ RequestMeta *m = cls;
+ int r;
+ size_t n, k;
+
+ assert(m);
+ assert(buf);
+ assert(max > 0);
+ assert(pos >= m->delta);
+
+ pos -= m->delta;
+
+ while (pos >= m->size) {
+ off_t sz;
+
+ /* End of this entry, so let's serialize the next
+ * one */
+
+ if (m->n_entries_set &&
+ m->n_entries <= 0)
+ return MHD_CONTENT_READER_END_OF_STREAM;
+
+ if (m->n_skip < 0)
+ r = sd_journal_previous_skip(m->journal, (uint64_t) -m->n_skip + 1);
+ else if (m->n_skip > 0)
+ r = sd_journal_next_skip(m->journal, (uint64_t) m->n_skip + 1);
+ else
+ r = sd_journal_next(m->journal);
+
+ if (r < 0) {
+ log_error_errno(r, "Failed to advance journal pointer: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ } else if (r == 0) {
+
+ if (m->follow) {
+ r = sd_journal_wait(m->journal, (uint64_t) JOURNAL_WAIT_TIMEOUT);
+ if (r < 0) {
+ log_error_errno(r, "Couldn't wait for journal event: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+ if (r == SD_JOURNAL_NOP)
+ break;
+
+ continue;
+ }
+
+ return MHD_CONTENT_READER_END_OF_STREAM;
+ }
+
+ if (m->discrete) {
+ assert(m->cursor);
+
+ r = sd_journal_test_cursor(m->journal, m->cursor);
+ if (r < 0) {
+ log_error_errno(r, "Failed to test cursor: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ if (r == 0)
+ return MHD_CONTENT_READER_END_OF_STREAM;
+ }
+
+ pos -= m->size;
+ m->delta += m->size;
+
+ if (m->n_entries_set)
+ m->n_entries -= 1;
+
+ m->n_skip = 0;
+
+ r = request_meta_ensure_tmp(m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create temporary file: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ r = show_journal_entry(m->tmp, m->journal, m->mode, 0, OUTPUT_FULL_WIDTH,
+ NULL, NULL, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to serialize item: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ sz = ftello(m->tmp);
+ if (sz == (off_t) -1) {
+ log_error_errno(errno, "Failed to retrieve file position: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ m->size = (uint64_t) sz;
+ }
+
+ if (m->tmp == NULL && m->follow)
+ return 0;
+
+ if (fseeko(m->tmp, pos, SEEK_SET) < 0) {
+ log_error_errno(errno, "Failed to seek to position: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ n = m->size - pos;
+ if (n < 1)
+ return 0;
+ if (n > max)
+ n = max;
+
+ errno = 0;
+ k = fread(buf, 1, n, m->tmp);
+ if (k != n) {
+ log_error("Failed to read from file: %s", errno ? strerror(errno) : "Premature EOF");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ return (ssize_t) k;
+}
+
+static int request_parse_accept(
+ RequestMeta *m,
+ struct MHD_Connection *connection) {
+
+ const char *header;
+
+ assert(m);
+ assert(connection);
+
+ header = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Accept");
+ if (!header)
+ return 0;
+
+ if (streq(header, mime_types[OUTPUT_JSON]))
+ m->mode = OUTPUT_JSON;
+ else if (streq(header, mime_types[OUTPUT_JSON_SSE]))
+ m->mode = OUTPUT_JSON_SSE;
+ else if (streq(header, mime_types[OUTPUT_JSON_SEQ]))
+ m->mode = OUTPUT_JSON_SEQ;
+ else if (streq(header, mime_types[OUTPUT_EXPORT]))
+ m->mode = OUTPUT_EXPORT;
+ else
+ m->mode = OUTPUT_SHORT;
+
+ return 0;
+}
+
+static int request_parse_range(
+ RequestMeta *m,
+ struct MHD_Connection *connection) {
+
+ const char *range, *colon, *colon2;
+ int r;
+
+ assert(m);
+ assert(connection);
+
+ range = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Range");
+ if (!range)
+ return 0;
+
+ if (!startswith(range, "entries="))
+ return 0;
+
+ range += 8;
+ range += strspn(range, WHITESPACE);
+
+ colon = strchr(range, ':');
+ if (!colon)
+ m->cursor = strdup(range);
+ else {
+ const char *p;
+
+ colon2 = strchr(colon + 1, ':');
+ if (colon2) {
+ _cleanup_free_ char *t;
+
+ t = strndup(colon + 1, colon2 - colon - 1);
+ if (!t)
+ return -ENOMEM;
+
+ r = safe_atoi64(t, &m->n_skip);
+ if (r < 0)
+ return r;
+ }
+
+ p = (colon2 ? colon2 : colon) + 1;
+ if (*p) {
+ r = safe_atou64(p, &m->n_entries);
+ if (r < 0)
+ return r;
+
+ if (m->n_entries <= 0)
+ return -EINVAL;
+
+ m->n_entries_set = true;
+ }
+
+ m->cursor = strndup(range, colon - range);
+ }
+
+ if (!m->cursor)
+ return -ENOMEM;
+
+ m->cursor[strcspn(m->cursor, WHITESPACE)] = 0;
+ if (isempty(m->cursor))
+ m->cursor = mfree(m->cursor);
+
+ return 0;
+}
+
+static int request_parse_arguments_iterator(
+ void *cls,
+ enum MHD_ValueKind kind,
+ const char *key,
+ const char *value) {
+
+ RequestMeta *m = cls;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(m);
+
+ if (isempty(key)) {
+ m->argument_parse_error = -EINVAL;
+ return MHD_NO;
+ }
+
+ if (streq(key, "follow")) {
+ if (isempty(value)) {
+ m->follow = true;
+ return MHD_YES;
+ }
+
+ r = parse_boolean(value);
+ if (r < 0) {
+ m->argument_parse_error = r;
+ return MHD_NO;
+ }
+
+ m->follow = r;
+ return MHD_YES;
+ }
+
+ if (streq(key, "discrete")) {
+ if (isempty(value)) {
+ m->discrete = true;
+ return MHD_YES;
+ }
+
+ r = parse_boolean(value);
+ if (r < 0) {
+ m->argument_parse_error = r;
+ return MHD_NO;
+ }
+
+ m->discrete = r;
+ return MHD_YES;
+ }
+
+ if (streq(key, "boot")) {
+ if (isempty(value))
+ r = true;
+ else {
+ r = parse_boolean(value);
+ if (r < 0) {
+ m->argument_parse_error = r;
+ return MHD_NO;
+ }
+ }
+
+ if (r) {
+ char match[9 + 32 + 1] = "_BOOT_ID=";
+ sd_id128_t bid;
+
+ r = sd_id128_get_boot(&bid);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get boot ID: %m");
+ return MHD_NO;
+ }
+
+ sd_id128_to_string(bid, match + 9);
+ r = sd_journal_add_match(m->journal, match, sizeof(match)-1);
+ if (r < 0) {
+ m->argument_parse_error = r;
+ return MHD_NO;
+ }
+ }
+
+ return MHD_YES;
+ }
+
+ p = strjoin(key, "=", strempty(value));
+ if (!p) {
+ m->argument_parse_error = log_oom();
+ return MHD_NO;
+ }
+
+ r = sd_journal_add_match(m->journal, p, 0);
+ if (r < 0) {
+ m->argument_parse_error = r;
+ return MHD_NO;
+ }
+
+ return MHD_YES;
+}
+
+static int request_parse_arguments(
+ RequestMeta *m,
+ struct MHD_Connection *connection) {
+
+ assert(m);
+ assert(connection);
+
+ m->argument_parse_error = 0;
+ MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, request_parse_arguments_iterator, m);
+
+ return m->argument_parse_error;
+}
+
+static int request_handler_entries(
+ struct MHD_Connection *connection,
+ void *connection_cls) {
+
+ _cleanup_(MHD_destroy_responsep) struct MHD_Response *response = NULL;
+ RequestMeta *m = connection_cls;
+ int r;
+
+ assert(connection);
+ assert(m);
+
+ r = open_journal(m);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to open journal: %m");
+
+ if (request_parse_accept(m, connection) < 0)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Failed to parse Accept header.");
+
+ if (request_parse_range(m, connection) < 0)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Failed to parse Range header.");
+
+ if (request_parse_arguments(m, connection) < 0)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Failed to parse URL arguments.");
+
+ if (m->discrete) {
+ if (!m->cursor)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Discrete seeks require a cursor specification.");
+
+ m->n_entries = 1;
+ m->n_entries_set = true;
+ }
+
+ if (m->cursor)
+ r = sd_journal_seek_cursor(m->journal, m->cursor);
+ else if (m->n_skip >= 0)
+ r = sd_journal_seek_head(m->journal);
+ else if (m->n_skip < 0)
+ r = sd_journal_seek_tail(m->journal);
+ if (r < 0)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Failed to seek in journal.");
+
+ response = MHD_create_response_from_callback(MHD_SIZE_UNKNOWN, 4*1024, request_reader_entries, m, NULL);
+ if (!response)
+ return respond_oom(connection);
+
+ MHD_add_response_header(response, "Content-Type", mime_types[m->mode]);
+ return MHD_queue_response(connection, MHD_HTTP_OK, response);
+}
+
+static int output_field(FILE *f, OutputMode m, const char *d, size_t l) {
+ const char *eq;
+ size_t j;
+
+ eq = memchr(d, '=', l);
+ if (!eq)
+ return -EINVAL;
+
+ j = l - (eq - d + 1);
+
+ if (m == OUTPUT_JSON) {
+ fprintf(f, "{ \"%.*s\" : ", (int) (eq - d), d);
+ json_escape(f, eq+1, j, OUTPUT_FULL_WIDTH);
+ fputs(" }\n", f);
+ } else {
+ fwrite(eq+1, 1, j, f);
+ fputc('\n', f);
+ }
+
+ return 0;
+}
+
+static ssize_t request_reader_fields(
+ void *cls,
+ uint64_t pos,
+ char *buf,
+ size_t max) {
+
+ RequestMeta *m = cls;
+ int r;
+ size_t n, k;
+
+ assert(m);
+ assert(buf);
+ assert(max > 0);
+ assert(pos >= m->delta);
+
+ pos -= m->delta;
+
+ while (pos >= m->size) {
+ off_t sz;
+ const void *d;
+ size_t l;
+
+ /* End of this field, so let's serialize the next
+ * one */
+
+ if (m->n_fields_set &&
+ m->n_fields <= 0)
+ return MHD_CONTENT_READER_END_OF_STREAM;
+
+ r = sd_journal_enumerate_unique(m->journal, &d, &l);
+ if (r < 0) {
+ log_error_errno(r, "Failed to advance field index: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ } else if (r == 0)
+ return MHD_CONTENT_READER_END_OF_STREAM;
+
+ pos -= m->size;
+ m->delta += m->size;
+
+ if (m->n_fields_set)
+ m->n_fields -= 1;
+
+ r = request_meta_ensure_tmp(m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create temporary file: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ r = output_field(m->tmp, m->mode, d, l);
+ if (r < 0) {
+ log_error_errno(r, "Failed to serialize item: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ sz = ftello(m->tmp);
+ if (sz == (off_t) -1) {
+ log_error_errno(errno, "Failed to retrieve file position: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ m->size = (uint64_t) sz;
+ }
+
+ if (fseeko(m->tmp, pos, SEEK_SET) < 0) {
+ log_error_errno(errno, "Failed to seek to position: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ n = m->size - pos;
+ if (n > max)
+ n = max;
+
+ errno = 0;
+ k = fread(buf, 1, n, m->tmp);
+ if (k != n) {
+ log_error("Failed to read from file: %s", errno ? strerror(errno) : "Premature EOF");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ return (ssize_t) k;
+}
+
+static int request_handler_fields(
+ struct MHD_Connection *connection,
+ const char *field,
+ void *connection_cls) {
+
+ _cleanup_(MHD_destroy_responsep) struct MHD_Response *response = NULL;
+ RequestMeta *m = connection_cls;
+ int r;
+
+ assert(connection);
+ assert(m);
+
+ r = open_journal(m);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to open journal: %m");
+
+ if (request_parse_accept(m, connection) < 0)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Failed to parse Accept header.");
+
+ r = sd_journal_query_unique(m->journal, field);
+ if (r < 0)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Failed to query unique fields.");
+
+ response = MHD_create_response_from_callback(MHD_SIZE_UNKNOWN, 4*1024, request_reader_fields, m, NULL);
+ if (!response)
+ return respond_oom(connection);
+
+ MHD_add_response_header(response, "Content-Type", mime_types[m->mode == OUTPUT_JSON ? OUTPUT_JSON : OUTPUT_SHORT]);
+ return MHD_queue_response(connection, MHD_HTTP_OK, response);
+}
+
+static int request_handler_redirect(
+ struct MHD_Connection *connection,
+ const char *target) {
+
+ char *page;
+ _cleanup_(MHD_destroy_responsep) struct MHD_Response *response = NULL;
+
+ assert(connection);
+ assert(target);
+
+ if (asprintf(&page, "<html><body>Please continue to the <a href=\"%s\">journal browser</a>.</body></html>", target) < 0)
+ return respond_oom(connection);
+
+ response = MHD_create_response_from_buffer(strlen(page), page, MHD_RESPMEM_MUST_FREE);
+ if (!response) {
+ free(page);
+ return respond_oom(connection);
+ }
+
+ MHD_add_response_header(response, "Content-Type", "text/html");
+ MHD_add_response_header(response, "Location", target);
+ return MHD_queue_response(connection, MHD_HTTP_MOVED_PERMANENTLY, response);
+}
+
+static int request_handler_file(
+ struct MHD_Connection *connection,
+ const char *path,
+ const char *mime_type) {
+
+ _cleanup_(MHD_destroy_responsep) struct MHD_Response *response = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ assert(connection);
+ assert(path);
+ assert(mime_type);
+
+ fd = open(path, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return mhd_respondf(connection, errno, MHD_HTTP_NOT_FOUND, "Failed to open file %s: %m", path);
+
+ if (fstat(fd, &st) < 0)
+ return mhd_respondf(connection, errno, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to stat file: %m");
+
+ response = MHD_create_response_from_fd_at_offset64(st.st_size, fd, 0);
+ if (!response)
+ return respond_oom(connection);
+ TAKE_FD(fd);
+
+ MHD_add_response_header(response, "Content-Type", mime_type);
+ return MHD_queue_response(connection, MHD_HTTP_OK, response);
+}
+
+static int get_virtualization(char **v) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ char *b = NULL;
+ int r;
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Virtualization",
+ NULL,
+ &b);
+ if (r < 0)
+ return r;
+
+ if (isempty(b)) {
+ free(b);
+ *v = NULL;
+ return 0;
+ }
+
+ *v = b;
+ return 1;
+}
+
+static int request_handler_machine(
+ struct MHD_Connection *connection,
+ void *connection_cls) {
+
+ _cleanup_(MHD_destroy_responsep) struct MHD_Response *response = NULL;
+ RequestMeta *m = connection_cls;
+ int r;
+ _cleanup_free_ char* hostname = NULL, *os_name = NULL;
+ uint64_t cutoff_from = 0, cutoff_to = 0, usage = 0;
+ sd_id128_t mid, bid;
+ _cleanup_free_ char *v = NULL, *json = NULL;
+
+ assert(connection);
+ assert(m);
+
+ r = open_journal(m);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to open journal: %m");
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to determine machine ID: %m");
+
+ r = sd_id128_get_boot(&bid);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to determine boot ID: %m");
+
+ hostname = gethostname_malloc();
+ if (!hostname)
+ return respond_oom(connection);
+
+ r = sd_journal_get_usage(m->journal, &usage);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to determine disk usage: %m");
+
+ r = sd_journal_get_cutoff_realtime_usec(m->journal, &cutoff_from, &cutoff_to);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to determine disk usage: %m");
+
+ (void) parse_os_release(NULL, "PRETTY_NAME", &os_name, NULL);
+ (void) get_virtualization(&v);
+
+ r = asprintf(&json,
+ "{ \"machine_id\" : \"" SD_ID128_FORMAT_STR "\","
+ "\"boot_id\" : \"" SD_ID128_FORMAT_STR "\","
+ "\"hostname\" : \"%s\","
+ "\"os_pretty_name\" : \"%s\","
+ "\"virtualization\" : \"%s\","
+ "\"usage\" : \"%"PRIu64"\","
+ "\"cutoff_from_realtime\" : \"%"PRIu64"\","
+ "\"cutoff_to_realtime\" : \"%"PRIu64"\" }\n",
+ SD_ID128_FORMAT_VAL(mid),
+ SD_ID128_FORMAT_VAL(bid),
+ hostname_cleanup(hostname),
+ os_name ? os_name : "Linux",
+ v ? v : "bare",
+ usage,
+ cutoff_from,
+ cutoff_to);
+ if (r < 0)
+ return respond_oom(connection);
+
+ response = MHD_create_response_from_buffer(strlen(json), json, MHD_RESPMEM_MUST_FREE);
+ if (!response)
+ return respond_oom(connection);
+ TAKE_PTR(json);
+
+ MHD_add_response_header(response, "Content-Type", "application/json");
+ return MHD_queue_response(connection, MHD_HTTP_OK, response);
+}
+
+static int request_handler(
+ void *cls,
+ struct MHD_Connection *connection,
+ const char *url,
+ const char *method,
+ const char *version,
+ const char *upload_data,
+ size_t *upload_data_size,
+ void **connection_cls) {
+ int r, code;
+
+ assert(connection);
+ assert(connection_cls);
+ assert(url);
+ assert(method);
+
+ if (!streq(method, "GET"))
+ return mhd_respond(connection, MHD_HTTP_NOT_ACCEPTABLE, "Unsupported method.");
+
+ if (!*connection_cls) {
+ if (!request_meta(connection_cls))
+ return respond_oom(connection);
+ return MHD_YES;
+ }
+
+ if (arg_trust_pem) {
+ r = check_permissions(connection, &code, NULL);
+ if (r < 0)
+ return code;
+ }
+
+ if (streq(url, "/"))
+ return request_handler_redirect(connection, "/browse");
+
+ if (streq(url, "/entries"))
+ return request_handler_entries(connection, *connection_cls);
+
+ if (startswith(url, "/fields/"))
+ return request_handler_fields(connection, url + 8, *connection_cls);
+
+ if (streq(url, "/browse"))
+ return request_handler_file(connection, DOCUMENT_ROOT "/browse.html", "text/html");
+
+ if (streq(url, "/machine"))
+ return request_handler_machine(connection, *connection_cls);
+
+ return mhd_respond(connection, MHD_HTTP_NOT_FOUND, "Not found.");
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-journal-gatewayd.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] ...\n\n"
+ "HTTP server for journal events.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --cert=CERT.PEM Server certificate in PEM format\n"
+ " --key=KEY.PEM Server key in PEM format\n"
+ " --trust=CERT.PEM Certificate authority certificate in PEM format\n"
+ " -D --directory=PATH Serve journal files in directory\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_KEY,
+ ARG_CERT,
+ ARG_TRUST,
+ };
+
+ int r, c;
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "key", required_argument, NULL, ARG_KEY },
+ { "cert", required_argument, NULL, ARG_CERT },
+ { "trust", required_argument, NULL, ARG_TRUST },
+ { "directory", required_argument, NULL, 'D' },
+ {}
+ };
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hD:", options, NULL)) >= 0)
+
+ switch(c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_KEY:
+ if (arg_key_pem)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Key file specified twice");
+ r = read_full_file(optarg, &arg_key_pem, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read key file: %m");
+ assert(arg_key_pem);
+ break;
+
+ case ARG_CERT:
+ if (arg_cert_pem)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Certificate file specified twice");
+ r = read_full_file(optarg, &arg_cert_pem, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read certificate file: %m");
+ assert(arg_cert_pem);
+ break;
+
+ case ARG_TRUST:
+#if HAVE_GNUTLS
+ if (arg_trust_pem)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "CA certificate file specified twice");
+ r = read_full_file(optarg, &arg_trust_pem, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read CA certificate file: %m");
+ assert(arg_trust_pem);
+ break;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --trust is not available.");
+#endif
+ case 'D':
+ arg_directory = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program does not take arguments.");
+
+ if (!!arg_key_pem != !!arg_cert_pem)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Certificate and key files must be specified together");
+
+ if (arg_trust_pem && !arg_key_pem)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "CA certificate can only be used with certificate file");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(MHD_stop_daemonp) struct MHD_Daemon *d = NULL;
+ struct MHD_OptionItem opts[] = {
+ { MHD_OPTION_NOTIFY_COMPLETED,
+ (intptr_t) request_meta_free, NULL },
+ { MHD_OPTION_EXTERNAL_LOGGER,
+ (intptr_t) microhttpd_logger, NULL },
+ { MHD_OPTION_END, 0, NULL },
+ { MHD_OPTION_END, 0, NULL },
+ { MHD_OPTION_END, 0, NULL },
+ { MHD_OPTION_END, 0, NULL },
+ { MHD_OPTION_END, 0, NULL },
+ };
+ int opts_pos = 2;
+
+ /* We force MHD_USE_ITC here, in order to make sure
+ * libmicrohttpd doesn't use shutdown() on our listening
+ * socket, which would break socket re-activation. See
+ *
+ * https://lists.gnu.org/archive/html/libmicrohttpd/2015-09/msg00014.html
+ * https://github.com/systemd/systemd/pull/1286
+ */
+
+ int flags =
+ MHD_USE_DEBUG |
+ MHD_USE_DUAL_STACK |
+ MHD_USE_ITC |
+ MHD_USE_POLL_INTERNAL_THREAD |
+ MHD_USE_THREAD_PER_CONNECTION;
+ int r, n;
+
+ log_setup_service();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ sigbus_install();
+
+ r = setup_gnutls_logger(NULL);
+ if (r < 0)
+ return r;
+
+ n = sd_listen_fds(1);
+ if (n < 0)
+ return log_error_errno(n, "Failed to determine passed sockets: %m");
+ if (n > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Can't listen on more than one socket.");
+
+ if (n == 1)
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ { MHD_OPTION_LISTEN_SOCKET, SD_LISTEN_FDS_START };
+
+ if (arg_key_pem) {
+ assert(arg_cert_pem);
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ { MHD_OPTION_HTTPS_MEM_KEY, 0, arg_key_pem };
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ { MHD_OPTION_HTTPS_MEM_CERT, 0, arg_cert_pem };
+ flags |= MHD_USE_TLS;
+ }
+
+ if (arg_trust_pem) {
+ assert(flags & MHD_USE_TLS);
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ { MHD_OPTION_HTTPS_MEM_TRUST, 0, arg_trust_pem };
+ }
+
+ d = MHD_start_daemon(flags, 19531,
+ NULL, NULL,
+ request_handler, NULL,
+ MHD_OPTION_ARRAY, opts,
+ MHD_OPTION_END);
+ if (!d)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to start daemon!");
+
+ pause();
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/journal-remote/journal-remote-main.c b/src/journal-remote/journal-remote-main.c
new file mode 100644
index 0000000..802c3ea
--- /dev/null
+++ b/src/journal-remote/journal-remote-main.c
@@ -0,0 +1,1158 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "conf-parser.h"
+#include "daemon-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "journal-remote-write.h"
+#include "journal-remote.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "strv.h"
+
+#define PRIV_KEY_FILE CERTIFICATE_ROOT "/private/journal-remote.pem"
+#define CERT_FILE CERTIFICATE_ROOT "/certs/journal-remote.pem"
+#define TRUST_FILE CERTIFICATE_ROOT "/ca/trusted.pem"
+
+static const char* arg_url = NULL;
+static const char* arg_getter = NULL;
+static const char* arg_listen_raw = NULL;
+static const char* arg_listen_http = NULL;
+static const char* arg_listen_https = NULL;
+static char** arg_files = NULL; /* Do not free this. */
+static int arg_compress = true;
+static int arg_seal = false;
+static int http_socket = -1, https_socket = -1;
+static char** arg_gnutls_log = NULL;
+
+static JournalWriteSplitMode arg_split_mode = _JOURNAL_WRITE_SPLIT_INVALID;
+static const char* arg_output = NULL;
+
+static char *arg_key = NULL;
+static char *arg_cert = NULL;
+static char *arg_trust = NULL;
+static bool arg_trust_all = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_gnutls_log, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_key, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_cert, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_trust, freep);
+
+static const char* const journal_write_split_mode_table[_JOURNAL_WRITE_SPLIT_MAX] = {
+ [JOURNAL_WRITE_SPLIT_NONE] = "none",
+ [JOURNAL_WRITE_SPLIT_HOST] = "host",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(journal_write_split_mode, JournalWriteSplitMode);
+static DEFINE_CONFIG_PARSE_ENUM(config_parse_write_split_mode,
+ journal_write_split_mode,
+ JournalWriteSplitMode,
+ "Failed to parse split mode setting");
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+static int spawn_child(const char* child, char** argv) {
+ pid_t child_pid;
+ int fd[2], r;
+
+ if (pipe(fd) < 0)
+ return log_error_errno(errno, "Failed to create pager pipe: %m");
+
+ r = safe_fork("(remote)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &child_pid);
+ if (r < 0) {
+ safe_close_pair(fd);
+ return r;
+ }
+
+ /* In the child */
+ if (r == 0) {
+ safe_close(fd[0]);
+
+ r = rearrange_stdio(STDIN_FILENO, fd[1], STDERR_FILENO);
+ if (r < 0) {
+ log_error_errno(r, "Failed to dup pipe to stdout: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ (void) rlimit_nofile_safe();
+
+ execvp(child, argv);
+ log_error_errno(errno, "Failed to exec child %s: %m", child);
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close(fd[1]);
+
+ r = fd_nonblock(fd[0], true);
+ if (r < 0)
+ log_warning_errno(errno, "Failed to set child pipe to non-blocking: %m");
+
+ return fd[0];
+}
+
+static int spawn_curl(const char* url) {
+ char **argv = STRV_MAKE("curl",
+ "-HAccept: application/vnd.fdo.journal",
+ "--silent",
+ "--show-error",
+ url);
+ int r;
+
+ r = spawn_child("curl", argv);
+ if (r < 0)
+ log_error_errno(r, "Failed to spawn curl: %m");
+ return r;
+}
+
+static int spawn_getter(const char *getter) {
+ int r;
+ _cleanup_strv_free_ char **words = NULL;
+
+ assert(getter);
+ r = strv_split_extract(&words, getter, WHITESPACE, EXTRACT_QUOTES);
+ if (r < 0)
+ return log_error_errno(r, "Failed to split getter option: %m");
+
+ r = spawn_child(words[0], words);
+ if (r < 0)
+ log_error_errno(r, "Failed to spawn getter %s: %m", getter);
+
+ return r;
+}
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+static int null_timer_event_handler(sd_event_source *s,
+ uint64_t usec,
+ void *userdata);
+static int dispatch_http_event(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userdata);
+
+static int request_meta(void **connection_cls, int fd, char *hostname) {
+ RemoteSource *source;
+ Writer *writer;
+ int r;
+
+ assert(connection_cls);
+ if (*connection_cls)
+ return 0;
+
+ r = journal_remote_get_writer(journal_remote_server_global, hostname, &writer);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to get writer for source %s: %m",
+ hostname);
+
+ source = source_new(fd, true, hostname, writer);
+ if (!source) {
+ writer_unref(writer);
+ return log_oom();
+ }
+
+ log_debug("Added RemoteSource as connection metadata %p", source);
+
+ *connection_cls = source;
+ return 0;
+}
+
+static void request_meta_free(void *cls,
+ struct MHD_Connection *connection,
+ void **connection_cls,
+ enum MHD_RequestTerminationCode toe) {
+ RemoteSource *s;
+
+ assert(connection_cls);
+ s = *connection_cls;
+
+ if (s) {
+ log_debug("Cleaning up connection metadata %p", s);
+ source_free(s);
+ *connection_cls = NULL;
+ }
+}
+
+static int process_http_upload(
+ struct MHD_Connection *connection,
+ const char *upload_data,
+ size_t *upload_data_size,
+ RemoteSource *source) {
+
+ bool finished = false;
+ size_t remaining;
+ int r;
+
+ assert(source);
+
+ log_trace("%s: connection %p, %zu bytes",
+ __func__, connection, *upload_data_size);
+
+ if (*upload_data_size) {
+ log_trace("Received %zu bytes", *upload_data_size);
+
+ r = journal_importer_push_data(&source->importer,
+ upload_data, *upload_data_size);
+ if (r < 0)
+ return mhd_respond_oom(connection);
+
+ *upload_data_size = 0;
+ } else
+ finished = true;
+
+ for (;;) {
+ r = process_source(source,
+ journal_remote_server_global->compress,
+ journal_remote_server_global->seal);
+ if (r == -EAGAIN)
+ break;
+ if (r < 0) {
+ if (r == -ENOBUFS)
+ log_warning_errno(r, "Entry is above the maximum of %u, aborting connection %p.",
+ DATA_SIZE_MAX, connection);
+ else if (r == -E2BIG)
+ log_warning_errno(r, "Entry with more fields than the maximum of %u, aborting connection %p.",
+ ENTRY_FIELD_COUNT_MAX, connection);
+ else
+ log_warning_errno(r, "Failed to process data, aborting connection %p: %m",
+ connection);
+ return MHD_NO;
+ }
+ }
+
+ if (!finished)
+ return MHD_YES;
+
+ /* The upload is finished */
+
+ remaining = journal_importer_bytes_remaining(&source->importer);
+ if (remaining > 0) {
+ log_warning("Premature EOF byte. %zu bytes lost.", remaining);
+ return mhd_respondf(connection,
+ 0, MHD_HTTP_EXPECTATION_FAILED,
+ "Premature EOF. %zu bytes of trailing data not processed.",
+ remaining);
+ }
+
+ return mhd_respond(connection, MHD_HTTP_ACCEPTED, "OK.");
+};
+
+static int request_handler(
+ void *cls,
+ struct MHD_Connection *connection,
+ const char *url,
+ const char *method,
+ const char *version,
+ const char *upload_data,
+ size_t *upload_data_size,
+ void **connection_cls) {
+
+ const char *header;
+ int r, code, fd;
+ _cleanup_free_ char *hostname = NULL;
+ size_t len;
+
+ assert(connection);
+ assert(connection_cls);
+ assert(url);
+ assert(method);
+
+ log_trace("Handling a connection %s %s %s", method, url, version);
+
+ if (*connection_cls)
+ return process_http_upload(connection,
+ upload_data, upload_data_size,
+ *connection_cls);
+
+ if (!streq(method, "POST"))
+ return mhd_respond(connection, MHD_HTTP_NOT_ACCEPTABLE, "Unsupported method.");
+
+ if (!streq(url, "/upload"))
+ return mhd_respond(connection, MHD_HTTP_NOT_FOUND, "Not found.");
+
+ header = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Content-Type");
+ if (!header || !streq(header, "application/vnd.fdo.journal"))
+ return mhd_respond(connection, MHD_HTTP_UNSUPPORTED_MEDIA_TYPE,
+ "Content-Type: application/vnd.fdo.journal is required.");
+
+ header = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Content-Length");
+ if (!header)
+ return mhd_respond(connection, MHD_HTTP_LENGTH_REQUIRED,
+ "Content-Length header is required.");
+ r = safe_atozu(header, &len);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_LENGTH_REQUIRED,
+ "Content-Length: %s cannot be parsed: %m", header);
+
+ if (len > ENTRY_SIZE_MAX)
+ /* When serialized, an entry of maximum size might be slightly larger,
+ * so this does not correspond exactly to the limit in journald. Oh well.
+ */
+ return mhd_respondf(connection, 0, MHD_HTTP_PAYLOAD_TOO_LARGE,
+ "Payload larger than maximum size of %u bytes", ENTRY_SIZE_MAX);
+
+ {
+ const union MHD_ConnectionInfo *ci;
+
+ ci = MHD_get_connection_info(connection,
+ MHD_CONNECTION_INFO_CONNECTION_FD);
+ if (!ci) {
+ log_error("MHD_get_connection_info failed: cannot get remote fd");
+ return mhd_respond(connection, MHD_HTTP_INTERNAL_SERVER_ERROR,
+ "Cannot check remote address.");
+ }
+
+ fd = ci->connect_fd;
+ assert(fd >= 0);
+ }
+
+ if (journal_remote_server_global->check_trust) {
+ r = check_permissions(connection, &code, &hostname);
+ if (r < 0)
+ return code;
+ } else {
+ r = getpeername_pretty(fd, false, &hostname);
+ if (r < 0)
+ return mhd_respond(connection, MHD_HTTP_INTERNAL_SERVER_ERROR,
+ "Cannot check remote hostname.");
+ }
+
+ assert(hostname);
+
+ r = request_meta(connection_cls, fd, hostname);
+ if (r == -ENOMEM)
+ return respond_oom(connection);
+ else if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "%m");
+
+ hostname = NULL;
+ return MHD_YES;
+}
+
+static int setup_microhttpd_server(RemoteServer *s,
+ int fd,
+ const char *key,
+ const char *cert,
+ const char *trust) {
+ struct MHD_OptionItem opts[] = {
+ { MHD_OPTION_NOTIFY_COMPLETED, (intptr_t) request_meta_free},
+ { MHD_OPTION_EXTERNAL_LOGGER, (intptr_t) microhttpd_logger},
+ { MHD_OPTION_LISTEN_SOCKET, fd},
+ { MHD_OPTION_CONNECTION_MEMORY_LIMIT, 128*1024},
+ { MHD_OPTION_END},
+ { MHD_OPTION_END},
+ { MHD_OPTION_END},
+ { MHD_OPTION_END},
+ { MHD_OPTION_END}};
+ int opts_pos = 4;
+ int flags =
+ MHD_USE_DEBUG |
+ MHD_USE_DUAL_STACK |
+ MHD_USE_EPOLL |
+ MHD_USE_ITC;
+
+ const union MHD_DaemonInfo *info;
+ int r, epoll_fd;
+ MHDDaemonWrapper *d;
+
+ assert(fd >= 0);
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make fd:%d nonblocking: %m", fd);
+
+/* MHD_OPTION_STRICT_FOR_CLIENT is introduced in microhttpd 0.9.54,
+ * and MHD_USE_PEDANTIC_CHECKS will be deprecated in future.
+ * If MHD_USE_PEDANTIC_CHECKS is '#define'd, then it is deprecated
+ * and we should use MHD_OPTION_STRICT_FOR_CLIENT. On the other hand,
+ * if MHD_USE_PEDANTIC_CHECKS is not '#define'd, then it is not
+ * deprecated yet and there exists an enum element with the same name.
+ * So we can safely use it. */
+#ifdef MHD_USE_PEDANTIC_CHECKS
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ {MHD_OPTION_STRICT_FOR_CLIENT, 1};
+#else
+ flags |= MHD_USE_PEDANTIC_CHECKS;
+#endif
+
+ if (key) {
+ assert(cert);
+
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ {MHD_OPTION_HTTPS_MEM_KEY, 0, (char*) key};
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ {MHD_OPTION_HTTPS_MEM_CERT, 0, (char*) cert};
+
+ flags |= MHD_USE_TLS;
+
+ if (trust)
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ {MHD_OPTION_HTTPS_MEM_TRUST, 0, (char*) trust};
+ }
+
+ d = new(MHDDaemonWrapper, 1);
+ if (!d)
+ return log_oom();
+
+ d->fd = (uint64_t) fd;
+
+ d->daemon = MHD_start_daemon(flags, 0,
+ NULL, NULL,
+ request_handler, NULL,
+ MHD_OPTION_ARRAY, opts,
+ MHD_OPTION_END);
+ if (!d->daemon) {
+ log_error("Failed to start µhttp daemon");
+ r = -EINVAL;
+ goto error;
+ }
+
+ log_debug("Started MHD %s daemon on fd:%d (wrapper @ %p)",
+ key ? "HTTPS" : "HTTP", fd, d);
+
+ info = MHD_get_daemon_info(d->daemon, MHD_DAEMON_INFO_EPOLL_FD_LINUX_ONLY);
+ if (!info) {
+ log_error("µhttp returned NULL daemon info");
+ r = -EOPNOTSUPP;
+ goto error;
+ }
+
+ epoll_fd = info->listen_fd;
+ if (epoll_fd < 0) {
+ log_error("µhttp epoll fd is invalid");
+ r = -EUCLEAN;
+ goto error;
+ }
+
+ r = sd_event_add_io(s->events, &d->io_event,
+ epoll_fd, EPOLLIN,
+ dispatch_http_event, d);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add event callback: %m");
+ goto error;
+ }
+
+ r = sd_event_source_set_description(d->io_event, "io_event");
+ if (r < 0) {
+ log_error_errno(r, "Failed to set source name: %m");
+ goto error;
+ }
+
+ r = sd_event_add_time(s->events, &d->timer_event,
+ CLOCK_MONOTONIC, (uint64_t) -1, 0,
+ null_timer_event_handler, d);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add timer_event: %m");
+ goto error;
+ }
+
+ r = sd_event_source_set_description(d->timer_event, "timer_event");
+ if (r < 0) {
+ log_error_errno(r, "Failed to set source name: %m");
+ goto error;
+ }
+
+ r = hashmap_ensure_allocated(&s->daemons, &uint64_hash_ops);
+ if (r < 0) {
+ log_oom();
+ goto error;
+ }
+
+ r = hashmap_put(s->daemons, &d->fd, d);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add daemon to hashmap: %m");
+ goto error;
+ }
+
+ s->active++;
+ return 0;
+
+error:
+ MHD_stop_daemon(d->daemon);
+ free(d->daemon);
+ free(d);
+ return r;
+}
+
+static int setup_microhttpd_socket(RemoteServer *s,
+ const char *address,
+ const char *key,
+ const char *cert,
+ const char *trust) {
+ int fd;
+
+ fd = make_socket_fd(LOG_DEBUG, address, SOCK_STREAM, SOCK_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ return setup_microhttpd_server(s, fd, key, cert, trust);
+}
+
+static int null_timer_event_handler(sd_event_source *timer_event,
+ uint64_t usec,
+ void *userdata) {
+ return dispatch_http_event(timer_event, 0, 0, userdata);
+}
+
+static int dispatch_http_event(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+ MHDDaemonWrapper *d = userdata;
+ int r;
+ MHD_UNSIGNED_LONG_LONG timeout = ULONG_LONG_MAX;
+
+ assert(d);
+
+ r = MHD_run(d->daemon);
+ if (r == MHD_NO)
+ // FIXME: unregister daemon
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "MHD_run failed!");
+ if (MHD_get_timeout(d->daemon, &timeout) == MHD_NO)
+ timeout = ULONG_LONG_MAX;
+
+ r = sd_event_source_set_time(d->timer_event, timeout);
+ if (r < 0) {
+ log_warning_errno(r, "Unable to set event loop timeout: %m, this may result in indefinite blocking!");
+ return 1;
+ }
+
+ r = sd_event_source_set_enabled(d->timer_event, SD_EVENT_ON);
+ if (r < 0)
+ log_warning_errno(r, "Unable to enable timer_event: %m, this may result in indefinite blocking!");
+
+ return 1; /* work to do */
+}
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+static int setup_signals(RemoteServer *s) {
+ int r;
+
+ assert(s);
+
+ assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, -1) >= 0);
+
+ r = sd_event_add_signal(s->events, &s->sigterm_event, SIGTERM, NULL, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(s->events, &s->sigint_event, SIGINT, NULL, s);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int setup_raw_socket(RemoteServer *s, const char *address) {
+ int fd;
+
+ fd = make_socket_fd(LOG_INFO, address, SOCK_STREAM, SOCK_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ return journal_remote_add_raw_socket(s, fd);
+}
+
+static int create_remoteserver(
+ RemoteServer *s,
+ const char* key,
+ const char* cert,
+ const char* trust) {
+
+ int r, n, fd;
+ char **file;
+
+ r = journal_remote_server_init(s, arg_output, arg_split_mode, arg_compress, arg_seal);
+ if (r < 0)
+ return r;
+
+ r = setup_signals(s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up signals: %m");
+
+ n = sd_listen_fds(true);
+ if (n < 0)
+ return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
+ else
+ log_debug("Received %d descriptors", n);
+
+ if (MAX(http_socket, https_socket) >= SD_LISTEN_FDS_START + n)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADFD),
+ "Received fewer sockets than expected");
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
+ if (sd_is_socket(fd, AF_UNSPEC, 0, true)) {
+ log_debug("Received a listening socket (fd:%d)", fd);
+
+ if (fd == http_socket)
+ r = setup_microhttpd_server(s, fd, NULL, NULL, NULL);
+ else if (fd == https_socket)
+ r = setup_microhttpd_server(s, fd, key, cert, trust);
+ else
+ r = journal_remote_add_raw_socket(s, fd);
+ } else if (sd_is_socket(fd, AF_UNSPEC, 0, false)) {
+ char *hostname;
+
+ r = getpeername_pretty(fd, false, &hostname);
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve remote name: %m");
+
+ log_debug("Received a connection socket (fd:%d) from %s", fd, hostname);
+
+ r = journal_remote_add_source(s, fd, hostname, true);
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown socket passed on fd:%d", fd);
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to register socket (fd:%d): %m", fd);
+ }
+
+ if (arg_getter) {
+ log_info("Spawning getter %s...", arg_getter);
+ fd = spawn_getter(arg_getter);
+ if (fd < 0)
+ return fd;
+
+ r = journal_remote_add_source(s, fd, (char*) arg_output, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_url) {
+ const char *url, *hostname;
+
+ if (!strstr(arg_url, "/entries")) {
+ if (endswith(arg_url, "/"))
+ url = strjoina(arg_url, "entries");
+ else
+ url = strjoina(arg_url, "/entries");
+ } else
+ url = strdupa(arg_url);
+
+ log_info("Spawning curl %s...", url);
+ fd = spawn_curl(url);
+ if (fd < 0)
+ return fd;
+
+ hostname = STARTSWITH_SET(arg_url, "https://", "http://");
+ if (!hostname)
+ hostname = arg_url;
+
+ hostname = strndupa(hostname, strcspn(hostname, "/:"));
+
+ r = journal_remote_add_source(s, fd, (char *) hostname, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_listen_raw) {
+ log_debug("Listening on a socket...");
+ r = setup_raw_socket(s, arg_listen_raw);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_listen_http) {
+ r = setup_microhttpd_socket(s, arg_listen_http, NULL, NULL, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_listen_https) {
+ r = setup_microhttpd_socket(s, arg_listen_https, key, cert, trust);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(file, arg_files) {
+ const char *output_name;
+
+ if (streq(*file, "-")) {
+ log_debug("Using standard input as source.");
+
+ fd = STDIN_FILENO;
+ output_name = "stdin";
+ } else {
+ log_debug("Reading file %s...", *file);
+
+ fd = open(*file, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", *file);
+ output_name = *file;
+ }
+
+ r = journal_remote_add_source(s, fd, (char*) output_name, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (s->active == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Zero sources specified");
+
+ if (arg_split_mode == JOURNAL_WRITE_SPLIT_NONE) {
+ /* In this case we know what the writer will be
+ called, so we can create it and verify that we can
+ create output as expected. */
+ r = journal_remote_get_writer(s, NULL, &s->_single_writer);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int negative_fd(const char *spec) {
+ /* Return a non-positive number as its inverse, -EINVAL otherwise. */
+
+ int fd, r;
+
+ r = safe_atoi(spec, &fd);
+ if (r < 0)
+ return r;
+
+ if (fd > 0)
+ return -EINVAL;
+ else
+ return -fd;
+}
+
+static int parse_config(void) {
+ const ConfigTableItem items[] = {
+ { "Remote", "Seal", config_parse_bool, 0, &arg_seal },
+ { "Remote", "SplitMode", config_parse_write_split_mode, 0, &arg_split_mode },
+ { "Remote", "ServerKeyFile", config_parse_path, 0, &arg_key },
+ { "Remote", "ServerCertificateFile", config_parse_path, 0, &arg_cert },
+ { "Remote", "TrustedCertificateFile", config_parse_path, 0, &arg_trust },
+ {}
+ };
+
+ return config_parse_many_nulstr(PKGSYSCONFDIR "/journal-remote.conf",
+ CONF_PATHS_NULSTR("systemd/journal-remote.conf.d"),
+ "Remote\0", config_item_table_lookup, items,
+ CONFIG_PARSE_WARN, NULL);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-journal-remote.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {FILE|-}...\n\n"
+ "Write external journal events to journal file(s).\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --url=URL Read events from systemd-journal-gatewayd at URL\n"
+ " --getter=COMMAND Read events from the output of COMMAND\n"
+ " --listen-raw=ADDR Listen for connections at ADDR\n"
+ " --listen-http=ADDR Listen for HTTP connections at ADDR\n"
+ " --listen-https=ADDR Listen for HTTPS connections at ADDR\n"
+ " -o --output=FILE|DIR Write output to FILE or DIR/external-*.journal\n"
+ " --compress[=BOOL] XZ-compress the output journal (default: yes)\n"
+ " --seal[=BOOL] Use event sealing (default: no)\n"
+ " --key=FILENAME SSL key in PEM format (default:\n"
+ " \"" PRIV_KEY_FILE "\")\n"
+ " --cert=FILENAME SSL certificate in PEM format (default:\n"
+ " \"" CERT_FILE "\")\n"
+ " --trust=FILENAME|all SSL CA certificate or disable checking (default:\n"
+ " \"" TRUST_FILE "\")\n"
+ " --gnutls-log=CATEGORY...\n"
+ " Specify a list of gnutls logging categories\n"
+ " --split-mode=none|host How many output files to create\n"
+ "\nNote: file descriptors from sd_listen_fds() will be consumed, too.\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_URL,
+ ARG_LISTEN_RAW,
+ ARG_LISTEN_HTTP,
+ ARG_LISTEN_HTTPS,
+ ARG_GETTER,
+ ARG_SPLIT_MODE,
+ ARG_COMPRESS,
+ ARG_SEAL,
+ ARG_KEY,
+ ARG_CERT,
+ ARG_TRUST,
+ ARG_GNUTLS_LOG,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "url", required_argument, NULL, ARG_URL },
+ { "getter", required_argument, NULL, ARG_GETTER },
+ { "listen-raw", required_argument, NULL, ARG_LISTEN_RAW },
+ { "listen-http", required_argument, NULL, ARG_LISTEN_HTTP },
+ { "listen-https", required_argument, NULL, ARG_LISTEN_HTTPS },
+ { "output", required_argument, NULL, 'o' },
+ { "split-mode", required_argument, NULL, ARG_SPLIT_MODE },
+ { "compress", optional_argument, NULL, ARG_COMPRESS },
+ { "seal", optional_argument, NULL, ARG_SEAL },
+ { "key", required_argument, NULL, ARG_KEY },
+ { "cert", required_argument, NULL, ARG_CERT },
+ { "trust", required_argument, NULL, ARG_TRUST },
+ { "gnutls-log", required_argument, NULL, ARG_GNUTLS_LOG },
+ {}
+ };
+
+ int c, r;
+ bool type_a, type_b;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "ho:", options, NULL)) >= 0)
+ switch(c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_URL:
+ if (arg_url)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot currently set more than one --url");
+
+ arg_url = optarg;
+ break;
+
+ case ARG_GETTER:
+ if (arg_getter)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot currently use --getter more than once");
+
+ arg_getter = optarg;
+ break;
+
+ case ARG_LISTEN_RAW:
+ if (arg_listen_raw)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot currently use --listen-raw more than once");
+
+ arg_listen_raw = optarg;
+ break;
+
+ case ARG_LISTEN_HTTP:
+ if (arg_listen_http || http_socket >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot currently use --listen-http more than once");
+
+ r = negative_fd(optarg);
+ if (r >= 0)
+ http_socket = r;
+ else
+ arg_listen_http = optarg;
+ break;
+
+ case ARG_LISTEN_HTTPS:
+ if (arg_listen_https || https_socket >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot currently use --listen-https more than once");
+
+ r = negative_fd(optarg);
+ if (r >= 0)
+ https_socket = r;
+ else
+ arg_listen_https = optarg;
+
+ break;
+
+ case ARG_KEY:
+ if (arg_key)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Key file specified twice");
+
+ arg_key = strdup(optarg);
+ if (!arg_key)
+ return log_oom();
+
+ break;
+
+ case ARG_CERT:
+ if (arg_cert)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Certificate file specified twice");
+
+ arg_cert = strdup(optarg);
+ if (!arg_cert)
+ return log_oom();
+
+ break;
+
+ case ARG_TRUST:
+ if (arg_trust || arg_trust_all)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Confusing trusted CA configuration");
+
+ if (streq(optarg, "all"))
+ arg_trust_all = true;
+ else {
+#if HAVE_GNUTLS
+ arg_trust = strdup(optarg);
+ if (!arg_trust)
+ return log_oom();
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --trust is not available.");
+#endif
+ }
+
+ break;
+
+ case 'o':
+ if (arg_output)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use --output/-o more than once");
+
+ arg_output = optarg;
+ break;
+
+ case ARG_SPLIT_MODE:
+ arg_split_mode = journal_write_split_mode_from_string(optarg);
+ if (arg_split_mode == _JOURNAL_WRITE_SPLIT_INVALID)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid split mode: %s", optarg);
+ break;
+
+ case ARG_COMPRESS:
+ if (optarg) {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --compress= parameter.");
+
+ arg_compress = !!r;
+ } else
+ arg_compress = true;
+
+ break;
+
+ case ARG_SEAL:
+ if (optarg) {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --seal= parameter.");
+
+ arg_seal = !!r;
+ } else
+ arg_seal = true;
+
+ break;
+
+ case ARG_GNUTLS_LOG: {
+#if HAVE_GNUTLS
+ const char* p = optarg;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --gnutls-log= argument: %m");
+ if (r == 0)
+ break;
+
+ if (strv_push(&arg_gnutls_log, word) < 0)
+ return log_oom();
+
+ word = NULL;
+ }
+ break;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --gnutls-log is not available.");
+#endif
+ }
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option code.");
+ }
+
+ if (optind < argc)
+ arg_files = argv + optind;
+
+ type_a = arg_getter || !strv_isempty(arg_files);
+ type_b = arg_url
+ || arg_listen_raw
+ || arg_listen_http || arg_listen_https
+ || sd_listen_fds(false) > 0;
+ if (type_a && type_b)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot use file input or --getter with "
+ "--arg-listen-... or socket activation.");
+ if (type_a) {
+ if (!arg_output)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --output must be specified with file input or --getter.");
+
+ if (!IN_SET(arg_split_mode, JOURNAL_WRITE_SPLIT_NONE, _JOURNAL_WRITE_SPLIT_INVALID))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "For active sources, only --split-mode=none is allowed.");
+
+ arg_split_mode = JOURNAL_WRITE_SPLIT_NONE;
+ }
+
+ if (arg_split_mode == _JOURNAL_WRITE_SPLIT_INVALID)
+ arg_split_mode = JOURNAL_WRITE_SPLIT_HOST;
+
+ if (arg_split_mode == JOURNAL_WRITE_SPLIT_NONE && arg_output) {
+ if (is_dir(arg_output, true) > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "For SplitMode=none, output must be a file.");
+ if (!endswith(arg_output, ".journal"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "For SplitMode=none, output file name must end with .journal.");
+ }
+
+ if (arg_split_mode == JOURNAL_WRITE_SPLIT_HOST
+ && arg_output && is_dir(arg_output, true) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "For SplitMode=host, output must be a directory.");
+
+ log_debug("Full config: SplitMode=%s Key=%s Cert=%s Trust=%s",
+ journal_write_split_mode_to_string(arg_split_mode),
+ strna(arg_key),
+ strna(arg_cert),
+ strna(arg_trust));
+
+ return 1 /* work to do */;
+}
+
+static int load_certificates(char **key, char **cert, char **trust) {
+ int r;
+
+ r = read_full_file(arg_key ?: PRIV_KEY_FILE, key, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read key from file '%s': %m",
+ arg_key ?: PRIV_KEY_FILE);
+
+ r = read_full_file(arg_cert ?: CERT_FILE, cert, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read certificate from file '%s': %m",
+ arg_cert ?: CERT_FILE);
+
+ if (arg_trust_all)
+ log_info("Certificate checking disabled.");
+ else {
+ r = read_full_file(arg_trust ?: TRUST_FILE, trust, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read CA certificate file '%s': %m",
+ arg_trust ?: TRUST_FILE);
+ }
+
+ if ((arg_listen_raw || arg_listen_http) && *trust)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --trust makes all non-HTTPS connections untrusted.");
+
+ return 0;
+}
+
+static int run(int argc, char **argv) {
+ _cleanup_(notify_on_cleanup) const char *notify_message = NULL;
+ _cleanup_(journal_remote_server_destroy) RemoteServer s = {};
+ _cleanup_free_ char *key = NULL, *cert = NULL, *trust = NULL;
+ int r;
+
+ log_show_color(true);
+ log_parse_environment();
+
+ /* The journal merging logic potentially needs a lot of fds. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ r = parse_config();
+ if (r < 0)
+ return r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_listen_http || arg_listen_https) {
+ r = setup_gnutls_logger(arg_gnutls_log);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_listen_https || https_socket >= 0) {
+ r = load_certificates(&key, &cert, &trust);
+ if (r < 0)
+ return r;
+
+ s.check_trust = !arg_trust_all;
+ }
+
+ r = create_remoteserver(&s, key, cert, trust);
+ if (r < 0)
+ return r;
+
+ r = sd_event_set_watchdog(s.events, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable watchdog: %m");
+
+ log_debug("Watchdog is %sd.", enable_disable(r > 0));
+
+ log_debug("%s running as pid "PID_FMT,
+ program_invocation_short_name, getpid_cached());
+
+ notify_message = notify_start(NOTIFY_READY, NOTIFY_STOPPING);
+
+ while (s.active) {
+ r = sd_event_get_state(s.events);
+ if (r < 0)
+ return r;
+ if (r == SD_EVENT_FINISHED)
+ break;
+
+ r = sd_event_run(s.events, -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+ }
+
+ notify_message = NULL;
+ (void) sd_notifyf(false,
+ "STOPPING=1\n"
+ "STATUS=Shutting down after writing %" PRIu64 " entries...", s.event_count);
+
+ log_info("Finishing after writing %" PRIu64 " entries", s.event_count);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/journal-remote/journal-remote-parse.c b/src/journal-remote/journal-remote-parse.c
new file mode 100644
index 0000000..535d06a
--- /dev/null
+++ b/src/journal-remote/journal-remote-parse.c
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "journal-remote-parse.h"
+#include "journald-native.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+void source_free(RemoteSource *source) {
+ if (!source)
+ return;
+
+ journal_importer_cleanup(&source->importer);
+
+ log_debug("Writer ref count %i", source->writer->n_ref);
+ writer_unref(source->writer);
+
+ sd_event_source_unref(source->event);
+ sd_event_source_unref(source->buffer_event);
+
+ free(source);
+}
+
+/**
+ * Initialize zero-filled source with given values. On success, takes
+ * ownership of fd, name, and writer, otherwise does not touch them.
+ */
+RemoteSource* source_new(int fd, bool passive_fd, char *name, Writer *writer) {
+ RemoteSource *source;
+
+ log_debug("Creating source for %sfd:%d (%s)",
+ passive_fd ? "passive " : "", fd, name);
+
+ assert(fd >= 0);
+
+ source = new0(RemoteSource, 1);
+ if (!source)
+ return NULL;
+
+ source->importer.fd = fd;
+ source->importer.passive_fd = passive_fd;
+ source->importer.name = name;
+
+ source->writer = writer;
+
+ return source;
+}
+
+int process_source(RemoteSource *source, bool compress, bool seal) {
+ int r;
+
+ assert(source);
+ assert(source->writer);
+
+ r = journal_importer_process_data(&source->importer);
+ if (r <= 0)
+ return r;
+
+ /* We have a full event */
+ log_trace("Received full event from source@%p fd:%d (%s)",
+ source, source->importer.fd, source->importer.name);
+
+ if (source->importer.iovw.count == 0) {
+ log_warning("Entry with no payload, skipping");
+ goto freeing;
+ }
+
+ assert(source->importer.iovw.iovec);
+
+ r = writer_write(source->writer, &source->importer.iovw, &source->importer.ts, compress, seal);
+ if (r == -EBADMSG) {
+ log_error_errno(r, "Entry is invalid, ignoring.");
+ r = 0;
+ } else if (r < 0)
+ log_error_errno(r, "Failed to write entry of %zu bytes: %m",
+ iovw_size(&source->importer.iovw));
+ else
+ r = 1;
+
+ freeing:
+ journal_importer_drop_iovw(&source->importer);
+ return r;
+}
diff --git a/src/journal-remote/journal-remote-parse.h b/src/journal-remote/journal-remote-parse.h
new file mode 100644
index 0000000..d6c7736
--- /dev/null
+++ b/src/journal-remote/journal-remote-parse.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-event.h"
+
+#include "journal-importer.h"
+#include "journal-remote-write.h"
+
+typedef struct RemoteSource {
+ JournalImporter importer;
+
+ Writer *writer;
+
+ sd_event_source *event;
+ sd_event_source *buffer_event;
+} RemoteSource;
+
+RemoteSource* source_new(int fd, bool passive_fd, char *name, Writer *writer);
+void source_free(RemoteSource *source);
+int process_source(RemoteSource *source, bool compress, bool seal);
diff --git a/src/journal-remote/journal-remote-write.c b/src/journal-remote/journal-remote-write.c
new file mode 100644
index 0000000..188ff35
--- /dev/null
+++ b/src/journal-remote/journal-remote-write.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "journal-remote.h"
+
+static int do_rotate(JournalFile **f, bool compress, bool seal) {
+ int r = journal_file_rotate(f, compress, (uint64_t) -1, seal, NULL);
+ if (r < 0) {
+ if (*f)
+ log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
+ else
+ log_error_errno(r, "Failed to create rotated journal: %m");
+ }
+
+ return r;
+}
+
+Writer* writer_new(RemoteServer *server) {
+ Writer *w;
+
+ w = new0(Writer, 1);
+ if (!w)
+ return NULL;
+
+ memset(&w->metrics, 0xFF, sizeof(w->metrics));
+
+ w->mmap = mmap_cache_new();
+ if (!w->mmap)
+ return mfree(w);
+
+ w->n_ref = 1;
+ w->server = server;
+
+ return w;
+}
+
+static Writer* writer_free(Writer *w) {
+ if (!w)
+ return NULL;
+
+ if (w->journal) {
+ log_debug("Closing journal file %s.", w->journal->path);
+ journal_file_close(w->journal);
+ }
+
+ if (w->server && w->hashmap_key)
+ hashmap_remove(w->server->writers, w->hashmap_key);
+
+ free(w->hashmap_key);
+
+ if (w->mmap)
+ mmap_cache_unref(w->mmap);
+
+ return mfree(w);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(Writer, writer, writer_free);
+
+int writer_write(Writer *w,
+ struct iovec_wrapper *iovw,
+ dual_timestamp *ts,
+ bool compress,
+ bool seal) {
+ int r;
+
+ assert(w);
+ assert(iovw);
+ assert(iovw->count > 0);
+
+ if (journal_file_rotate_suggested(w->journal, 0)) {
+ log_info("%s: Journal header limits reached or header out-of-date, rotating",
+ w->journal->path);
+ r = do_rotate(&w->journal, compress, seal);
+ if (r < 0)
+ return r;
+ }
+
+ r = journal_file_append_entry(w->journal, ts, NULL,
+ iovw->iovec, iovw->count,
+ &w->seqnum, NULL, NULL);
+ if (r >= 0) {
+ if (w->server)
+ w->server->event_count += 1;
+ return 0;
+ } else if (r == -EBADMSG)
+ return r;
+
+ log_debug_errno(r, "%s: Write failed, rotating: %m", w->journal->path);
+ r = do_rotate(&w->journal, compress, seal);
+ if (r < 0)
+ return r;
+ else
+ log_debug("%s: Successfully rotated journal", w->journal->path);
+
+ log_debug("Retrying write.");
+ r = journal_file_append_entry(w->journal, ts, NULL,
+ iovw->iovec, iovw->count,
+ &w->seqnum, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ if (w->server)
+ w->server->event_count += 1;
+ return 0;
+}
diff --git a/src/journal-remote/journal-remote-write.h b/src/journal-remote/journal-remote-write.h
new file mode 100644
index 0000000..e445859
--- /dev/null
+++ b/src/journal-remote/journal-remote-write.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "journal-file.h"
+#include "journal-importer.h"
+
+typedef struct RemoteServer RemoteServer;
+
+typedef struct Writer {
+ JournalFile *journal;
+ JournalMetrics metrics;
+
+ MMapCache *mmap;
+ RemoteServer *server;
+ char *hashmap_key;
+
+ uint64_t seqnum;
+
+ unsigned n_ref;
+} Writer;
+
+Writer* writer_new(RemoteServer* server);
+Writer* writer_ref(Writer *w);
+Writer* writer_unref(Writer *w);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Writer*, writer_unref);
+
+int writer_write(Writer *s,
+ struct iovec_wrapper *iovw,
+ dual_timestamp *ts,
+ bool compress,
+ bool seal);
+
+typedef enum JournalWriteSplitMode {
+ JOURNAL_WRITE_SPLIT_NONE,
+ JOURNAL_WRITE_SPLIT_HOST,
+ _JOURNAL_WRITE_SPLIT_MAX,
+ _JOURNAL_WRITE_SPLIT_INVALID = -1
+} JournalWriteSplitMode;
diff --git a/src/journal-remote/journal-remote.c b/src/journal-remote/journal-remote.c
new file mode 100644
index 0000000..1da32c5
--- /dev/null
+++ b/src/journal-remote/journal-remote.c
@@ -0,0 +1,533 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <stdint.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "def.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "journal-file.h"
+#include "journal-remote-write.h"
+#include "journal-remote.h"
+#include "journald-native.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+#define REMOTE_JOURNAL_PATH "/var/log/journal/remote"
+
+#define filename_escape(s) xescape((s), "/ ")
+
+static int open_output(RemoteServer *s, Writer *w, const char* host) {
+ _cleanup_free_ char *_filename = NULL;
+ const char *filename;
+ int r;
+
+ switch (s->split_mode) {
+ case JOURNAL_WRITE_SPLIT_NONE:
+ filename = s->output;
+ break;
+
+ case JOURNAL_WRITE_SPLIT_HOST: {
+ _cleanup_free_ char *name;
+
+ assert(host);
+
+ name = filename_escape(host);
+ if (!name)
+ return log_oom();
+
+ r = asprintf(&_filename, "%s/remote-%s.journal", s->output, name);
+ if (r < 0)
+ return log_oom();
+
+ filename = _filename;
+ break;
+ }
+
+ default:
+ assert_not_reached("what?");
+ }
+
+ r = journal_file_open_reliably(filename,
+ O_RDWR|O_CREAT, 0640,
+ s->compress, (uint64_t) -1, s->seal,
+ &w->metrics,
+ w->mmap, NULL,
+ NULL, &w->journal);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open output journal %s: %m", filename);
+
+ log_debug("Opened output file %s", w->journal->path);
+ return 0;
+}
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+static int init_writer_hashmap(RemoteServer *s) {
+ static const struct hash_ops* const hash_ops[] = {
+ [JOURNAL_WRITE_SPLIT_NONE] = NULL,
+ [JOURNAL_WRITE_SPLIT_HOST] = &string_hash_ops,
+ };
+
+ assert(s);
+ assert(s->split_mode >= 0 && s->split_mode < (int) ELEMENTSOF(hash_ops));
+
+ s->writers = hashmap_new(hash_ops[s->split_mode]);
+ if (!s->writers)
+ return log_oom();
+
+ return 0;
+}
+
+int journal_remote_get_writer(RemoteServer *s, const char *host, Writer **writer) {
+ _cleanup_(writer_unrefp) Writer *w = NULL;
+ const void *key;
+ int r;
+
+ switch(s->split_mode) {
+ case JOURNAL_WRITE_SPLIT_NONE:
+ key = "one and only";
+ break;
+
+ case JOURNAL_WRITE_SPLIT_HOST:
+ assert(host);
+ key = host;
+ break;
+
+ default:
+ assert_not_reached("what split mode?");
+ }
+
+ w = hashmap_get(s->writers, key);
+ if (w)
+ writer_ref(w);
+ else {
+ w = writer_new(s);
+ if (!w)
+ return log_oom();
+
+ if (s->split_mode == JOURNAL_WRITE_SPLIT_HOST) {
+ w->hashmap_key = strdup(key);
+ if (!w->hashmap_key)
+ return log_oom();
+ }
+
+ r = open_output(s, w, host);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(s->writers, w->hashmap_key ?: key, w);
+ if (r < 0)
+ return r;
+ }
+
+ *writer = TAKE_PTR(w);
+
+ return 0;
+}
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+/* This should go away as soon as µhttpd allows state to be passed around. */
+RemoteServer *journal_remote_server_global;
+
+static int dispatch_raw_source_event(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userdata);
+static int dispatch_raw_source_until_block(sd_event_source *event,
+ void *userdata);
+static int dispatch_blocking_source_event(sd_event_source *event,
+ void *userdata);
+static int dispatch_raw_connection_event(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userdata);
+
+static int get_source_for_fd(RemoteServer *s,
+ int fd, char *name, RemoteSource **source) {
+ Writer *writer;
+ int r;
+
+ /* This takes ownership of name, but only on success. */
+
+ assert(fd >= 0);
+ assert(source);
+
+ if (!GREEDY_REALLOC0(s->sources, s->sources_size, fd + 1))
+ return log_oom();
+
+ r = journal_remote_get_writer(s, name, &writer);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to get writer for source %s: %m",
+ name);
+
+ if (s->sources[fd] == NULL) {
+ s->sources[fd] = source_new(fd, false, name, writer);
+ if (!s->sources[fd]) {
+ writer_unref(writer);
+ return log_oom();
+ }
+
+ s->active++;
+ }
+
+ *source = s->sources[fd];
+ return 0;
+}
+
+static int remove_source(RemoteServer *s, int fd) {
+ RemoteSource *source;
+
+ assert(s);
+ assert(fd >= 0 && fd < (ssize_t) s->sources_size);
+
+ source = s->sources[fd];
+ if (source) {
+ /* this closes fd too */
+ source_free(source);
+ s->sources[fd] = NULL;
+ s->active--;
+ }
+
+ return 0;
+}
+
+int journal_remote_add_source(RemoteServer *s, int fd, char* name, bool own_name) {
+ RemoteSource *source = NULL;
+ int r;
+
+ /* This takes ownership of name, even on failure, if own_name is true. */
+
+ assert(s);
+ assert(fd >= 0);
+ assert(name);
+
+ if (!own_name) {
+ name = strdup(name);
+ if (!name)
+ return log_oom();
+ }
+
+ r = get_source_for_fd(s, fd, name, &source);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create source for fd:%d (%s): %m",
+ fd, name);
+ free(name);
+ return r;
+ }
+
+ r = sd_event_add_io(s->events, &source->event,
+ fd, EPOLLIN|EPOLLRDHUP|EPOLLPRI,
+ dispatch_raw_source_event, source);
+ if (r == 0) {
+ /* Add additional source for buffer processing. It will be
+ * enabled later. */
+ r = sd_event_add_defer(s->events, &source->buffer_event,
+ dispatch_raw_source_until_block, source);
+ if (r == 0)
+ sd_event_source_set_enabled(source->buffer_event, SD_EVENT_OFF);
+ } else if (r == -EPERM) {
+ log_debug("Falling back to sd_event_add_defer for fd:%d (%s)", fd, name);
+ r = sd_event_add_defer(s->events, &source->event,
+ dispatch_blocking_source_event, source);
+ if (r == 0)
+ sd_event_source_set_enabled(source->event, SD_EVENT_ON);
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to register event source for fd:%d: %m",
+ fd);
+ goto error;
+ }
+
+ r = sd_event_source_set_description(source->event, name);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set source name for fd:%d: %m", fd);
+ goto error;
+ }
+
+ return 1; /* work to do */
+
+ error:
+ remove_source(s, fd);
+ return r;
+}
+
+int journal_remote_add_raw_socket(RemoteServer *s, int fd) {
+ int r;
+ _cleanup_close_ int fd_ = fd;
+ char name[STRLEN("raw-socket-") + DECIMAL_STR_MAX(int) + 1];
+
+ assert(fd >= 0);
+
+ r = sd_event_add_io(s->events, &s->listen_event,
+ fd, EPOLLIN,
+ dispatch_raw_connection_event, s);
+ if (r < 0)
+ return r;
+
+ xsprintf(name, "raw-socket-%d", fd);
+
+ r = sd_event_source_set_description(s->listen_event, name);
+ if (r < 0)
+ return r;
+
+ fd_ = -1;
+ s->active++;
+ return 0;
+}
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+int journal_remote_server_init(
+ RemoteServer *s,
+ const char *output,
+ JournalWriteSplitMode split_mode,
+ bool compress,
+ bool seal) {
+
+ int r;
+
+ assert(s);
+
+ assert(journal_remote_server_global == NULL);
+ journal_remote_server_global = s;
+
+ s->split_mode = split_mode;
+ s->compress = compress;
+ s->seal = seal;
+
+ if (output)
+ s->output = output;
+ else if (split_mode == JOURNAL_WRITE_SPLIT_NONE)
+ s->output = REMOTE_JOURNAL_PATH "/remote.journal";
+ else if (split_mode == JOURNAL_WRITE_SPLIT_HOST)
+ s->output = REMOTE_JOURNAL_PATH;
+ else
+ assert_not_reached("bad split mode");
+
+ r = sd_event_default(&s->events);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ r = init_writer_hashmap(s);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+#if HAVE_MICROHTTPD
+static void MHDDaemonWrapper_free(MHDDaemonWrapper *d) {
+ MHD_stop_daemon(d->daemon);
+ sd_event_source_unref(d->io_event);
+ sd_event_source_unref(d->timer_event);
+ free(d);
+}
+#endif
+
+void journal_remote_server_destroy(RemoteServer *s) {
+ size_t i;
+
+#if HAVE_MICROHTTPD
+ hashmap_free_with_destructor(s->daemons, MHDDaemonWrapper_free);
+#endif
+
+ assert(s->sources_size == 0 || s->sources);
+ for (i = 0; i < s->sources_size; i++)
+ remove_source(s, i);
+ free(s->sources);
+
+ writer_unref(s->_single_writer);
+ hashmap_free(s->writers);
+
+ sd_event_source_unref(s->sigterm_event);
+ sd_event_source_unref(s->sigint_event);
+ sd_event_source_unref(s->listen_event);
+ sd_event_unref(s->events);
+
+ if (s == journal_remote_server_global)
+ journal_remote_server_global = NULL;
+
+ /* fds that we're listening on remain open... */
+}
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+int journal_remote_handle_raw_source(
+ sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ RemoteServer *s) {
+
+ RemoteSource *source;
+ int r;
+
+ /* Returns 1 if there might be more data pending,
+ * 0 if data is currently exhausted, negative on error.
+ */
+
+ assert(fd >= 0 && fd < (ssize_t) s->sources_size);
+ source = s->sources[fd];
+ assert(source->importer.fd == fd);
+
+ r = process_source(source, s->compress, s->seal);
+ if (journal_importer_eof(&source->importer)) {
+ size_t remaining;
+
+ log_debug("EOF reached with source %s (fd=%d)",
+ source->importer.name, source->importer.fd);
+
+ remaining = journal_importer_bytes_remaining(&source->importer);
+ if (remaining > 0)
+ log_notice("Premature EOF. %zu bytes lost.", remaining);
+ remove_source(s, source->importer.fd);
+ log_debug("%zu active sources remaining", s->active);
+ return 0;
+ } else if (r == -E2BIG) {
+ log_notice("Entry with too many fields, skipped");
+ return 1;
+ } else if (r == -ENOBUFS) {
+ log_notice("Entry too big, skipped");
+ return 1;
+ } else if (r == -EAGAIN) {
+ return 0;
+ } else if (r < 0) {
+ log_debug_errno(r, "Closing connection: %m");
+ remove_source(s, fd);
+ return 0;
+ } else
+ return 1;
+}
+
+static int dispatch_raw_source_until_block(sd_event_source *event,
+ void *userdata) {
+ RemoteSource *source = userdata;
+ int r;
+
+ /* Make sure event stays around even if source is destroyed */
+ sd_event_source_ref(event);
+
+ r = journal_remote_handle_raw_source(event, source->importer.fd, EPOLLIN, journal_remote_server_global);
+ if (r != 1)
+ /* No more data for now */
+ sd_event_source_set_enabled(event, SD_EVENT_OFF);
+
+ sd_event_source_unref(event);
+
+ return r;
+}
+
+static int dispatch_raw_source_event(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+ RemoteSource *source = userdata;
+ int r;
+
+ assert(source->event);
+ assert(source->buffer_event);
+
+ r = journal_remote_handle_raw_source(event, fd, EPOLLIN, journal_remote_server_global);
+ if (r == 1)
+ /* Might have more data. We need to rerun the handler
+ * until we are sure the buffer is exhausted. */
+ sd_event_source_set_enabled(source->buffer_event, SD_EVENT_ON);
+
+ return r;
+}
+
+static int dispatch_blocking_source_event(sd_event_source *event,
+ void *userdata) {
+ RemoteSource *source = userdata;
+
+ return journal_remote_handle_raw_source(event, source->importer.fd, EPOLLIN, journal_remote_server_global);
+}
+
+static int accept_connection(const char* type, int fd,
+ SocketAddress *addr, char **hostname) {
+ int fd2, r;
+
+ log_debug("Accepting new %s connection on fd:%d", type, fd);
+ fd2 = accept4(fd, &addr->sockaddr.sa, &addr->size, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (fd2 < 0)
+ return log_error_errno(errno, "accept() on fd:%d failed: %m", fd);
+
+ switch(socket_address_family(addr)) {
+ case AF_INET:
+ case AF_INET6: {
+ _cleanup_free_ char *a = NULL;
+ char *b;
+
+ r = socket_address_print(addr, &a);
+ if (r < 0) {
+ log_error_errno(r, "socket_address_print(): %m");
+ close(fd2);
+ return r;
+ }
+
+ r = socknameinfo_pretty(&addr->sockaddr, addr->size, &b);
+ if (r < 0) {
+ log_error_errno(r, "Resolving hostname failed: %m");
+ close(fd2);
+ return r;
+ }
+
+ log_debug("Accepted %s %s connection from %s",
+ type,
+ socket_address_family(addr) == AF_INET ? "IP" : "IPv6",
+ a);
+
+ *hostname = b;
+
+ return fd2;
+ };
+ default:
+ log_error("Rejected %s connection with unsupported family %d",
+ type, socket_address_family(addr));
+ close(fd2);
+
+ return -EINVAL;
+ }
+}
+
+static int dispatch_raw_connection_event(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+ RemoteServer *s = userdata;
+ int fd2;
+ SocketAddress addr = {
+ .size = sizeof(union sockaddr_union),
+ .type = SOCK_STREAM,
+ };
+ char *hostname = NULL;
+
+ fd2 = accept_connection("raw", fd, &addr, &hostname);
+ if (fd2 < 0)
+ return fd2;
+
+ return journal_remote_add_source(s, fd2, hostname, true);
+}
diff --git a/src/journal-remote/journal-remote.conf.in b/src/journal-remote/journal-remote.conf.in
new file mode 100644
index 0000000..edc3aba
--- /dev/null
+++ b/src/journal-remote/journal-remote.conf.in
@@ -0,0 +1,19 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See journal-remote.conf(5) for details
+
+[Remote]
+# Seal=false
+# SplitMode=host
+# ServerKeyFile=@CERTIFICATEROOT@/private/journal-remote.pem
+# ServerCertificateFile=@CERTIFICATEROOT@/certs/journal-remote.pem
+# TrustedCertificateFile=@CERTIFICATEROOT@/ca/trusted.pem
diff --git a/src/journal-remote/journal-remote.h b/src/journal-remote/journal-remote.h
new file mode 100644
index 0000000..4c25d43
--- /dev/null
+++ b/src/journal-remote/journal-remote.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-event.h"
+
+#include "hashmap.h"
+#include "journal-remote-parse.h"
+#include "journal-remote-write.h"
+
+#if HAVE_MICROHTTPD
+#include "microhttpd-util.h"
+
+typedef struct MHDDaemonWrapper MHDDaemonWrapper;
+
+struct MHDDaemonWrapper {
+ uint64_t fd;
+ struct MHD_Daemon *daemon;
+
+ sd_event_source *io_event;
+ sd_event_source *timer_event;
+};
+#endif
+
+struct RemoteServer {
+ RemoteSource **sources;
+ size_t sources_size;
+ size_t active;
+
+ sd_event *events;
+ sd_event_source *sigterm_event, *sigint_event, *listen_event;
+
+ Hashmap *writers;
+ Writer *_single_writer;
+ uint64_t event_count;
+
+#if HAVE_MICROHTTPD
+ Hashmap *daemons;
+#endif
+ const char *output; /* either the output file or directory */
+
+ JournalWriteSplitMode split_mode;
+ bool compress;
+ bool seal;
+ bool check_trust;
+};
+extern RemoteServer *journal_remote_server_global;
+
+int journal_remote_server_init(
+ RemoteServer *s,
+ const char *output,
+ JournalWriteSplitMode split_mode,
+ bool compress,
+ bool seal);
+
+int journal_remote_get_writer(RemoteServer *s, const char *host, Writer **writer);
+
+int journal_remote_add_source(RemoteServer *s, int fd, char* name, bool own_name);
+int journal_remote_add_raw_socket(RemoteServer *s, int fd);
+int journal_remote_handle_raw_source(
+ sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ RemoteServer *s);
+
+void journal_remote_server_destroy(RemoteServer *s);
diff --git a/src/journal-remote/journal-upload-journal.c b/src/journal-remote/journal-upload-journal.c
new file mode 100644
index 0000000..7d7e738
--- /dev/null
+++ b/src/journal-remote/journal-upload-journal.c
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <curl/curl.h>
+#include <stdbool.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "journal-upload.h"
+#include "log.h"
+#include "string-util.h"
+#include "utf8.h"
+#include "util.h"
+
+/**
+ * Write up to size bytes to buf. Return negative on error, and number of
+ * bytes written otherwise. The last case is a kind of an error too.
+ */
+static ssize_t write_entry(char *buf, size_t size, Uploader *u) {
+ int r;
+ size_t pos = 0;
+
+ assert(size <= SSIZE_MAX);
+
+ for (;;) {
+
+ switch(u->entry_state) {
+ case ENTRY_CURSOR: {
+ u->current_cursor = mfree(u->current_cursor);
+
+ r = sd_journal_get_cursor(u->journal, &u->current_cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ r = snprintf(buf + pos, size - pos,
+ "__CURSOR=%s\n", u->current_cursor);
+ assert(r >= 0);
+ if ((size_t) r > size - pos)
+ /* not enough space */
+ return pos;
+
+ u->entry_state++;
+
+ if (pos + r == size) {
+ /* exactly one character short, but we don't need it */
+ buf[size - 1] = '\n';
+ return size;
+ }
+
+ pos += r;
+ }
+ _fallthrough_;
+ case ENTRY_REALTIME: {
+ usec_t realtime;
+
+ r = sd_journal_get_realtime_usec(u->journal, &realtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ r = snprintf(buf + pos, size - pos,
+ "__REALTIME_TIMESTAMP="USEC_FMT"\n", realtime);
+ assert(r >= 0);
+ if ((size_t) r > size - pos)
+ /* not enough space */
+ return pos;
+
+ u->entry_state++;
+
+ if (r + pos == size) {
+ /* exactly one character short, but we don't need it */
+ buf[size - 1] = '\n';
+ return size;
+ }
+
+ pos += r;
+ }
+ _fallthrough_;
+ case ENTRY_MONOTONIC: {
+ usec_t monotonic;
+ sd_id128_t boot_id;
+
+ r = sd_journal_get_monotonic_usec(u->journal, &monotonic, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ r = snprintf(buf + pos, size - pos,
+ "__MONOTONIC_TIMESTAMP="USEC_FMT"\n", monotonic);
+ assert(r >= 0);
+ if ((size_t) r > size - pos)
+ /* not enough space */
+ return pos;
+
+ u->entry_state++;
+
+ if (r + pos == size) {
+ /* exactly one character short, but we don't need it */
+ buf[size - 1] = '\n';
+ return size;
+ }
+
+ pos += r;
+ }
+ _fallthrough_;
+ case ENTRY_BOOT_ID: {
+ sd_id128_t boot_id;
+ char sid[33];
+
+ r = sd_journal_get_monotonic_usec(u->journal, NULL, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ r = snprintf(buf + pos, size - pos,
+ "_BOOT_ID=%s\n", sd_id128_to_string(boot_id, sid));
+ assert(r >= 0);
+ if ((size_t) r > size - pos)
+ /* not enough space */
+ return pos;
+
+ u->entry_state++;
+
+ if (r + pos == size) {
+ /* exactly one character short, but we don't need it */
+ buf[size - 1] = '\n';
+ return size;
+ }
+
+ pos += r;
+ }
+ _fallthrough_;
+ case ENTRY_NEW_FIELD: {
+ u->field_pos = 0;
+
+ r = sd_journal_enumerate_data(u->journal,
+ &u->field_data,
+ &u->field_length);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move to next field in entry: %m");
+ else if (r == 0) {
+ u->entry_state = ENTRY_OUTRO;
+ continue;
+ }
+
+ /* We already printed the boot id from the data in
+ * the header, hence let's suppress it here */
+ if (memory_startswith(u->field_data, u->field_length, "_BOOT_ID="))
+ continue;
+
+ if (!utf8_is_printable_newline(u->field_data, u->field_length, false)) {
+ u->entry_state = ENTRY_BINARY_FIELD_START;
+ continue;
+ }
+
+ u->entry_state++;
+ }
+ _fallthrough_;
+ case ENTRY_TEXT_FIELD:
+ case ENTRY_BINARY_FIELD: {
+ bool done;
+ size_t tocopy;
+
+ done = size - pos > u->field_length - u->field_pos;
+ if (done)
+ tocopy = u->field_length - u->field_pos;
+ else
+ tocopy = size - pos;
+
+ memcpy(buf + pos,
+ (char*) u->field_data + u->field_pos,
+ tocopy);
+
+ if (done) {
+ buf[pos + tocopy] = '\n';
+ pos += tocopy + 1;
+ u->entry_state = ENTRY_NEW_FIELD;
+ continue;
+ } else {
+ u->field_pos += tocopy;
+ return size;
+ }
+ }
+
+ case ENTRY_BINARY_FIELD_START: {
+ const char *c;
+ size_t len;
+
+ c = memchr(u->field_data, '=', u->field_length);
+ if (!c || c == u->field_data)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid field.");
+
+ len = c - (const char*)u->field_data;
+
+ /* need space for label + '\n' */
+ if (size - pos < len + 1)
+ return pos;
+
+ memcpy(buf + pos, u->field_data, len);
+ buf[pos + len] = '\n';
+ pos += len + 1;
+
+ u->field_pos = len + 1;
+ u->entry_state++;
+ }
+ _fallthrough_;
+ case ENTRY_BINARY_FIELD_SIZE: {
+ uint64_t le64;
+
+ /* need space for uint64_t */
+ if (size - pos < 8)
+ return pos;
+
+ le64 = htole64(u->field_length - u->field_pos);
+ memcpy(buf + pos, &le64, 8);
+ pos += 8;
+
+ u->entry_state++;
+ continue;
+ }
+
+ case ENTRY_OUTRO:
+ /* need space for '\n' */
+ if (size - pos < 1)
+ return pos;
+
+ buf[pos++] = '\n';
+ u->entry_state++;
+ u->entries_sent++;
+
+ return pos;
+
+ default:
+ assert_not_reached("WTF?");
+ }
+ }
+ assert_not_reached("WTF?");
+}
+
+static void check_update_watchdog(Uploader *u) {
+ usec_t after;
+ usec_t elapsed_time;
+
+ if (u->watchdog_usec <= 0)
+ return;
+
+ after = now(CLOCK_MONOTONIC);
+ elapsed_time = usec_sub_unsigned(after, u->watchdog_timestamp);
+ if (elapsed_time > u->watchdog_usec / 2) {
+ log_debug("Update watchdog timer");
+ sd_notify(false, "WATCHDOG=1");
+ u->watchdog_timestamp = after;
+ }
+}
+
+static size_t journal_input_callback(void *buf, size_t size, size_t nmemb, void *userp) {
+ Uploader *u = userp;
+ int r;
+ sd_journal *j;
+ size_t filled = 0;
+ ssize_t w;
+
+ assert(u);
+ assert(nmemb <= SSIZE_MAX / size);
+
+ check_update_watchdog(u);
+
+ j = u->journal;
+
+ while (j && filled < size * nmemb) {
+ if (u->entry_state == ENTRY_DONE) {
+ r = sd_journal_next(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to move to next entry in journal: %m");
+ return CURL_READFUNC_ABORT;
+ } else if (r == 0) {
+ if (u->input_event)
+ log_debug("No more entries, waiting for journal.");
+ else {
+ log_info("No more entries, closing journal.");
+ close_journal_input(u);
+ }
+
+ u->uploading = false;
+
+ break;
+ }
+
+ u->entry_state = ENTRY_CURSOR;
+ }
+
+ w = write_entry((char*)buf + filled, size * nmemb - filled, u);
+ if (w < 0)
+ return CURL_READFUNC_ABORT;
+ filled += w;
+
+ if (filled == 0) {
+ log_error("Buffer space is too small to write entry.");
+ return CURL_READFUNC_ABORT;
+ } else if (u->entry_state != ENTRY_DONE)
+ /* This means that all available space was used up */
+ break;
+
+ log_debug("Entry %zu (%s) has been uploaded.",
+ u->entries_sent, u->current_cursor);
+ }
+
+ return filled;
+}
+
+void close_journal_input(Uploader *u) {
+ assert(u);
+
+ if (u->journal) {
+ log_debug("Closing journal input.");
+
+ sd_journal_close(u->journal);
+ u->journal = NULL;
+ }
+ u->timeout = 0;
+}
+
+static int process_journal_input(Uploader *u, int skip) {
+ int r;
+
+ if (u->uploading)
+ return 0;
+
+ r = sd_journal_next_skip(u->journal, skip);
+ if (r < 0)
+ return log_error_errno(r, "Failed to skip to next entry: %m");
+ else if (r < skip)
+ return 0;
+
+ /* have data */
+ u->entry_state = ENTRY_CURSOR;
+ return start_upload(u, journal_input_callback, u);
+}
+
+int check_journal_input(Uploader *u) {
+ if (u->input_event) {
+ int r;
+
+ r = sd_journal_process(u->journal);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process journal: %m");
+ close_journal_input(u);
+ return r;
+ }
+
+ if (r == SD_JOURNAL_NOP)
+ return 0;
+ }
+
+ return process_journal_input(u, 1);
+}
+
+static int dispatch_journal_input(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userp) {
+ Uploader *u = userp;
+
+ assert(u);
+
+ if (u->uploading)
+ return 0;
+
+ log_debug("Detected journal input, checking for new data.");
+ return check_journal_input(u);
+}
+
+int open_journal_for_upload(Uploader *u,
+ sd_journal *j,
+ const char *cursor,
+ bool after_cursor,
+ bool follow) {
+ int fd, r, events;
+
+ u->journal = j;
+
+ sd_journal_set_data_threshold(j, 0);
+
+ if (follow) {
+ fd = sd_journal_get_fd(j);
+ if (fd < 0)
+ return log_error_errno(fd, "sd_journal_get_fd failed: %m");
+
+ events = sd_journal_get_events(j);
+
+ r = sd_journal_reliable_fd(j);
+ assert(r >= 0);
+ if (r > 0)
+ u->timeout = -1;
+ else
+ u->timeout = JOURNAL_UPLOAD_POLL_TIMEOUT;
+
+ r = sd_event_add_io(u->events, &u->input_event,
+ fd, events, dispatch_journal_input, u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register input event: %m");
+
+ log_debug("Listening for journal events on fd:%d, timeout %d",
+ fd, u->timeout == (uint64_t) -1 ? -1 : (int) u->timeout);
+ } else
+ log_debug("Not listening for journal events.");
+
+ if (cursor) {
+ r = sd_journal_seek_cursor(j, cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to seek to cursor %s: %m",
+ cursor);
+ }
+
+ return process_journal_input(u, 1 + !!after_cursor);
+}
diff --git a/src/journal-remote/journal-upload.c b/src/journal-remote/journal-upload.c
new file mode 100644
index 0000000..ef3556f
--- /dev/null
+++ b/src/journal-remote/journal-upload.c
@@ -0,0 +1,854 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <curl/curl.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "build.h"
+#include "conf-parser.h"
+#include "daemon-util.h"
+#include "def.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "glob-util.h"
+#include "journal-upload.h"
+#include "log.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "sigbus.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#define PRIV_KEY_FILE CERTIFICATE_ROOT "/private/journal-upload.pem"
+#define CERT_FILE CERTIFICATE_ROOT "/certs/journal-upload.pem"
+#define TRUST_FILE CERTIFICATE_ROOT "/ca/trusted.pem"
+#define DEFAULT_PORT 19532
+
+static const char* arg_url = NULL;
+static const char *arg_key = NULL;
+static const char *arg_cert = NULL;
+static const char *arg_trust = NULL;
+static const char *arg_directory = NULL;
+static char **arg_file = NULL;
+static const char *arg_cursor = NULL;
+static bool arg_after_cursor = false;
+static int arg_journal_type = 0;
+static const char *arg_machine = NULL;
+static bool arg_merge = false;
+static int arg_follow = -1;
+static const char *arg_save_state = NULL;
+
+static void close_fd_input(Uploader *u);
+
+#define SERVER_ANSWER_KEEP 2048
+
+#define STATE_FILE "/var/lib/systemd/journal-upload/state"
+
+#define easy_setopt(curl, opt, value, level, cmd) \
+ do { \
+ code = curl_easy_setopt(curl, opt, value); \
+ if (code) { \
+ log_full(level, \
+ "curl_easy_setopt " #opt " failed: %s", \
+ curl_easy_strerror(code)); \
+ cmd; \
+ } \
+ } while (0)
+
+static size_t output_callback(char *buf,
+ size_t size,
+ size_t nmemb,
+ void *userp) {
+ Uploader *u = userp;
+
+ assert(u);
+
+ log_debug("The server answers (%zu bytes): %.*s",
+ size*nmemb, (int)(size*nmemb), buf);
+
+ if (nmemb && !u->answer) {
+ u->answer = strndup(buf, size*nmemb);
+ if (!u->answer)
+ log_warning("Failed to store server answer (%zu bytes): out of memory", size*nmemb);
+ }
+
+ return size * nmemb;
+}
+
+static int check_cursor_updating(Uploader *u) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ if (!u->state_file)
+ return 0;
+
+ r = mkdir_parents(u->state_file, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Cannot create parent directory of state file %s: %m",
+ u->state_file);
+
+ r = fopen_temporary(u->state_file, &f, &temp_path);
+ if (r < 0)
+ return log_error_errno(r, "Cannot save state to %s: %m",
+ u->state_file);
+ unlink(temp_path);
+
+ return 0;
+}
+
+static int update_cursor_state(Uploader *u) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ if (!u->state_file || !u->last_cursor)
+ return 0;
+
+ r = fopen_temporary(u->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "LAST_CURSOR=%s\n",
+ u->last_cursor);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, u->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ (void) unlink(u->state_file);
+
+ return log_error_errno(r, "Failed to save state %s: %m", u->state_file);
+}
+
+static int load_cursor_state(Uploader *u) {
+ int r;
+
+ if (!u->state_file)
+ return 0;
+
+ r = parse_env_file(NULL, u->state_file, "LAST_CURSOR", &u->last_cursor);
+ if (r == -ENOENT)
+ log_debug("State file %s is not present.", u->state_file);
+ else if (r < 0)
+ return log_error_errno(r, "Failed to read state file %s: %m",
+ u->state_file);
+ else
+ log_debug("Last cursor was %s", u->last_cursor);
+
+ return 0;
+}
+
+int start_upload(Uploader *u,
+ size_t (*input_callback)(void *ptr,
+ size_t size,
+ size_t nmemb,
+ void *userdata),
+ void *data) {
+ CURLcode code;
+
+ assert(u);
+ assert(input_callback);
+
+ if (!u->header) {
+ struct curl_slist *h;
+
+ h = curl_slist_append(NULL, "Content-Type: application/vnd.fdo.journal");
+ if (!h)
+ return log_oom();
+
+ h = curl_slist_append(h, "Transfer-Encoding: chunked");
+ if (!h) {
+ curl_slist_free_all(h);
+ return log_oom();
+ }
+
+ h = curl_slist_append(h, "Accept: text/plain");
+ if (!h) {
+ curl_slist_free_all(h);
+ return log_oom();
+ }
+
+ u->header = h;
+ }
+
+ if (!u->easy) {
+ CURL *curl;
+
+ curl = curl_easy_init();
+ if (!curl)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOSR),
+ "Call to curl_easy_init failed.");
+
+ /* tell it to POST to the URL */
+ easy_setopt(curl, CURLOPT_POST, 1L,
+ LOG_ERR, return -EXFULL);
+
+ easy_setopt(curl, CURLOPT_ERRORBUFFER, u->error,
+ LOG_ERR, return -EXFULL);
+
+ /* set where to write to */
+ easy_setopt(curl, CURLOPT_WRITEFUNCTION, output_callback,
+ LOG_ERR, return -EXFULL);
+
+ easy_setopt(curl, CURLOPT_WRITEDATA, data,
+ LOG_ERR, return -EXFULL);
+
+ /* set where to read from */
+ easy_setopt(curl, CURLOPT_READFUNCTION, input_callback,
+ LOG_ERR, return -EXFULL);
+
+ easy_setopt(curl, CURLOPT_READDATA, data,
+ LOG_ERR, return -EXFULL);
+
+ /* use our special own mime type and chunked transfer */
+ easy_setopt(curl, CURLOPT_HTTPHEADER, u->header,
+ LOG_ERR, return -EXFULL);
+
+ if (DEBUG_LOGGING)
+ /* enable verbose for easier tracing */
+ easy_setopt(curl, CURLOPT_VERBOSE, 1L, LOG_WARNING, );
+
+ easy_setopt(curl, CURLOPT_USERAGENT,
+ "systemd-journal-upload " GIT_VERSION,
+ LOG_WARNING, );
+
+ if (arg_key || startswith(u->url, "https://")) {
+ easy_setopt(curl, CURLOPT_SSLKEY, arg_key ?: PRIV_KEY_FILE,
+ LOG_ERR, return -EXFULL);
+ easy_setopt(curl, CURLOPT_SSLCERT, arg_cert ?: CERT_FILE,
+ LOG_ERR, return -EXFULL);
+ }
+
+ if (streq_ptr(arg_trust, "all"))
+ easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0,
+ LOG_ERR, return -EUCLEAN);
+ else if (arg_trust || startswith(u->url, "https://"))
+ easy_setopt(curl, CURLOPT_CAINFO, arg_trust ?: TRUST_FILE,
+ LOG_ERR, return -EXFULL);
+
+ if (arg_key || arg_trust)
+ easy_setopt(curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1,
+ LOG_WARNING, );
+
+ u->easy = curl;
+ } else {
+ /* truncate the potential old error message */
+ u->error[0] = '\0';
+
+ free(u->answer);
+ u->answer = 0;
+ }
+
+ /* upload to this place */
+ code = curl_easy_setopt(u->easy, CURLOPT_URL, u->url);
+ if (code)
+ return log_error_errno(SYNTHETIC_ERRNO(EXFULL),
+ "curl_easy_setopt CURLOPT_URL failed: %s",
+ curl_easy_strerror(code));
+
+ u->uploading = true;
+
+ return 0;
+}
+
+static size_t fd_input_callback(void *buf, size_t size, size_t nmemb, void *userp) {
+ Uploader *u = userp;
+ ssize_t n;
+
+ assert(u);
+ assert(nmemb < SSIZE_MAX / size);
+
+ if (u->input < 0)
+ return 0;
+
+ assert(!size_multiply_overflow(size, nmemb));
+
+ n = read(u->input, buf, size * nmemb);
+ log_debug("%s: allowed %zu, read %zd", __func__, size*nmemb, n);
+ if (n > 0)
+ return n;
+
+ u->uploading = false;
+ if (n < 0) {
+ log_error_errno(errno, "Aborting transfer after read error on input: %m.");
+ return CURL_READFUNC_ABORT;
+ }
+
+ log_debug("Reached EOF");
+ close_fd_input(u);
+ return 0;
+}
+
+static void close_fd_input(Uploader *u) {
+ assert(u);
+
+ u->input = safe_close(u->input);
+ u->timeout = 0;
+}
+
+static int dispatch_fd_input(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userp) {
+ Uploader *u = userp;
+
+ assert(u);
+ assert(fd >= 0);
+
+ if (revents & EPOLLHUP) {
+ log_debug("Received HUP");
+ close_fd_input(u);
+ return 0;
+ }
+
+ if (!(revents & EPOLLIN)) {
+ log_warning("Unexpected poll event %"PRIu32".", revents);
+ return -EINVAL;
+ }
+
+ if (u->uploading) {
+ log_warning("dispatch_fd_input called when uploading, ignoring.");
+ return 0;
+ }
+
+ return start_upload(u, fd_input_callback, u);
+}
+
+static int open_file_for_upload(Uploader *u, const char *filename) {
+ int fd, r = 0;
+
+ if (streq(filename, "-"))
+ fd = STDIN_FILENO;
+ else {
+ fd = open(filename, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", filename);
+ }
+
+ u->input = fd;
+
+ if (arg_follow) {
+ r = sd_event_add_io(u->events, &u->input_event,
+ fd, EPOLLIN, dispatch_fd_input, u);
+ if (r < 0) {
+ if (r != -EPERM || arg_follow > 0)
+ return log_error_errno(r, "Failed to register input event: %m");
+
+ /* Normal files should just be consumed without polling. */
+ r = start_upload(u, fd_input_callback, u);
+ }
+ }
+
+ return r;
+}
+
+static int dispatch_sigterm(sd_event_source *event,
+ const struct signalfd_siginfo *si,
+ void *userdata) {
+ Uploader *u = userdata;
+
+ assert(u);
+
+ log_received_signal(LOG_INFO, si);
+
+ close_fd_input(u);
+ close_journal_input(u);
+
+ sd_event_exit(u->events, 0);
+ return 0;
+}
+
+static int setup_signals(Uploader *u) {
+ int r;
+
+ assert(u);
+
+ assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, -1) >= 0);
+
+ r = sd_event_add_signal(u->events, &u->sigterm_event, SIGTERM, dispatch_sigterm, u);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(u->events, &u->sigint_event, SIGINT, dispatch_sigterm, u);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int setup_uploader(Uploader *u, const char *url, const char *state_file) {
+ int r;
+ const char *host, *proto = "";
+
+ assert(u);
+ assert(url);
+
+ *u = (Uploader) {
+ .input = -1
+ };
+
+ host = STARTSWITH_SET(url, "http://", "https://");
+ if (!host) {
+ host = url;
+ proto = "https://";
+ }
+
+ if (strchr(host, ':'))
+ u->url = strjoin(proto, url, "/upload");
+ else {
+ char *t;
+ size_t x;
+
+ t = strdupa(url);
+ x = strlen(t);
+ while (x > 0 && t[x - 1] == '/')
+ t[x - 1] = '\0';
+
+ u->url = strjoin(proto, t, ":" STRINGIFY(DEFAULT_PORT), "/upload");
+ }
+ if (!u->url)
+ return log_oom();
+
+ u->state_file = state_file;
+
+ r = sd_event_default(&u->events);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_default failed: %m");
+
+ r = setup_signals(u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up signals: %m");
+
+ (void) sd_watchdog_enabled(false, &u->watchdog_usec);
+
+ return load_cursor_state(u);
+}
+
+static void destroy_uploader(Uploader *u) {
+ assert(u);
+
+ curl_easy_cleanup(u->easy);
+ curl_slist_free_all(u->header);
+ free(u->answer);
+
+ free(u->last_cursor);
+ free(u->current_cursor);
+
+ free(u->url);
+
+ u->input_event = sd_event_source_unref(u->input_event);
+
+ close_fd_input(u);
+ close_journal_input(u);
+
+ sd_event_source_unref(u->sigterm_event);
+ sd_event_source_unref(u->sigint_event);
+ sd_event_unref(u->events);
+}
+
+static int perform_upload(Uploader *u) {
+ CURLcode code;
+ long status;
+
+ assert(u);
+
+ u->watchdog_timestamp = now(CLOCK_MONOTONIC);
+ code = curl_easy_perform(u->easy);
+ if (code) {
+ if (u->error[0])
+ log_error("Upload to %s failed: %.*s",
+ u->url, (int) sizeof(u->error), u->error);
+ else
+ log_error("Upload to %s failed: %s",
+ u->url, curl_easy_strerror(code));
+ return -EIO;
+ }
+
+ code = curl_easy_getinfo(u->easy, CURLINFO_RESPONSE_CODE, &status);
+ if (code)
+ return log_error_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "Failed to retrieve response code: %s",
+ curl_easy_strerror(code));
+
+ if (status >= 300)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Upload to %s failed with code %ld: %s",
+ u->url, status, strna(u->answer));
+ else if (status < 200)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Upload to %s finished with unexpected code %ld: %s",
+ u->url, status, strna(u->answer));
+ else
+ log_debug("Upload finished successfully with code %ld: %s",
+ status, strna(u->answer));
+
+ free_and_replace(u->last_cursor, u->current_cursor);
+
+ return update_cursor_state(u);
+}
+
+static int parse_config(void) {
+ const ConfigTableItem items[] = {
+ { "Upload", "URL", config_parse_string, 0, &arg_url },
+ { "Upload", "ServerKeyFile", config_parse_path, 0, &arg_key },
+ { "Upload", "ServerCertificateFile", config_parse_path, 0, &arg_cert },
+ { "Upload", "TrustedCertificateFile", config_parse_path, 0, &arg_trust },
+ {}};
+
+ return config_parse_many_nulstr(PKGSYSCONFDIR "/journal-upload.conf",
+ CONF_PATHS_NULSTR("systemd/journal-upload.conf.d"),
+ "Upload\0", config_item_table_lookup, items,
+ CONFIG_PARSE_WARN, NULL);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-journal-upload.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s -u URL {FILE|-}...\n\n"
+ "Upload journal events to a remote server.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -u --url=URL Upload to this address (default port "
+ STRINGIFY(DEFAULT_PORT) ")\n"
+ " --key=FILENAME Specify key in PEM format (default:\n"
+ " \"" PRIV_KEY_FILE "\")\n"
+ " --cert=FILENAME Specify certificate in PEM format (default:\n"
+ " \"" CERT_FILE "\")\n"
+ " --trust=FILENAME|all Specify CA certificate or disable checking (default:\n"
+ " \"" TRUST_FILE "\")\n"
+ " --system Use the system journal\n"
+ " --user Use the user journal for the current user\n"
+ " -m --merge Use all available journals\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " -D --directory=PATH Use journal files from directory\n"
+ " --file=PATH Use this journal file\n"
+ " --cursor=CURSOR Start at the specified cursor\n"
+ " --after-cursor=CURSOR Start after the specified cursor\n"
+ " --follow[=BOOL] Do [not] wait for input\n"
+ " --save-state[=FILE] Save uploaded cursors (default \n"
+ " " STATE_FILE ")\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_KEY,
+ ARG_CERT,
+ ARG_TRUST,
+ ARG_USER,
+ ARG_SYSTEM,
+ ARG_FILE,
+ ARG_CURSOR,
+ ARG_AFTER_CURSOR,
+ ARG_FOLLOW,
+ ARG_SAVE_STATE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "url", required_argument, NULL, 'u' },
+ { "key", required_argument, NULL, ARG_KEY },
+ { "cert", required_argument, NULL, ARG_CERT },
+ { "trust", required_argument, NULL, ARG_TRUST },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "user", no_argument, NULL, ARG_USER },
+ { "merge", no_argument, NULL, 'm' },
+ { "machine", required_argument, NULL, 'M' },
+ { "directory", required_argument, NULL, 'D' },
+ { "file", required_argument, NULL, ARG_FILE },
+ { "cursor", required_argument, NULL, ARG_CURSOR },
+ { "after-cursor", required_argument, NULL, ARG_AFTER_CURSOR },
+ { "follow", optional_argument, NULL, ARG_FOLLOW },
+ { "save-state", optional_argument, NULL, ARG_SAVE_STATE },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ opterr = 0;
+
+ while ((c = getopt_long(argc, argv, "hu:mM:D:", options, NULL)) >= 0)
+ switch(c) {
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'u':
+ if (arg_url)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --url");
+
+ arg_url = optarg;
+ break;
+
+ case ARG_KEY:
+ if (arg_key)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --key");
+
+ arg_key = optarg;
+ break;
+
+ case ARG_CERT:
+ if (arg_cert)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --cert");
+
+ arg_cert = optarg;
+ break;
+
+ case ARG_TRUST:
+ if (arg_trust)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --trust");
+
+ arg_trust = optarg;
+ break;
+
+ case ARG_SYSTEM:
+ arg_journal_type |= SD_JOURNAL_SYSTEM;
+ break;
+
+ case ARG_USER:
+ arg_journal_type |= SD_JOURNAL_CURRENT_USER;
+ break;
+
+ case 'm':
+ arg_merge = true;
+ break;
+
+ case 'M':
+ if (arg_machine)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --machine/-M");
+
+ arg_machine = optarg;
+ break;
+
+ case 'D':
+ if (arg_directory)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --directory/-D");
+
+ arg_directory = optarg;
+ break;
+
+ case ARG_FILE:
+ r = glob_extend(&arg_file, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add paths: %m");
+ break;
+
+ case ARG_CURSOR:
+ if (arg_cursor)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --cursor/--after-cursor");
+
+ arg_cursor = optarg;
+ break;
+
+ case ARG_AFTER_CURSOR:
+ if (arg_cursor)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --cursor/--after-cursor");
+
+ arg_cursor = optarg;
+ arg_after_cursor = true;
+ break;
+
+ case ARG_FOLLOW:
+ if (optarg) {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --follow= parameter.");
+
+ arg_follow = !!r;
+ } else
+ arg_follow = true;
+
+ break;
+
+ case ARG_SAVE_STATE:
+ arg_save_state = optarg ?: STATE_FILE;
+ break;
+
+ case '?':
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown option %s.",
+ argv[optind - 1]);
+
+ case ':':
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Missing argument to %s.",
+ argv[optind - 1]);
+
+ default:
+ assert_not_reached("Unhandled option code.");
+ }
+
+ if (!arg_url)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Required --url/-u option missing.");
+
+ if (!!arg_key != !!arg_cert)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Options --key and --cert must be used together.");
+
+ if (optind < argc && (arg_directory || arg_file || arg_machine || arg_journal_type))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Input arguments make no sense with journal input.");
+
+ return 1;
+}
+
+static int open_journal(sd_journal **j) {
+ int r;
+
+ if (arg_directory)
+ r = sd_journal_open_directory(j, arg_directory, arg_journal_type);
+ else if (arg_file)
+ r = sd_journal_open_files(j, (const char**) arg_file, 0);
+ else if (arg_machine)
+ r = sd_journal_open_container(j, arg_machine, 0);
+ else
+ r = sd_journal_open(j, !arg_merge*SD_JOURNAL_LOCAL_ONLY + arg_journal_type);
+ if (r < 0)
+ log_error_errno(r, "Failed to open %s: %m",
+ arg_directory ? arg_directory : arg_file ? "files" : "journal");
+ return r;
+}
+
+static int run(int argc, char **argv) {
+ _cleanup_(notify_on_cleanup) const char *notify_message = NULL;
+ _cleanup_(destroy_uploader) Uploader u = {};
+ bool use_journal;
+ int r;
+
+ log_show_color(true);
+ log_parse_environment();
+
+ /* The journal merging logic potentially needs a lot of fds. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ r = parse_config();
+ if (r < 0)
+ return r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ sigbus_install();
+
+ r = setup_uploader(&u, arg_url, arg_save_state);
+ if (r < 0)
+ return r;
+
+ sd_event_set_watchdog(u.events, true);
+
+ r = check_cursor_updating(&u);
+ if (r < 0)
+ return r;
+
+ log_debug("%s running as pid "PID_FMT,
+ program_invocation_short_name, getpid_cached());
+
+ use_journal = optind >= argc;
+ if (use_journal) {
+ sd_journal *j;
+ r = open_journal(&j);
+ if (r < 0)
+ return r;
+ r = open_journal_for_upload(&u, j,
+ arg_cursor ?: u.last_cursor,
+ arg_cursor ? arg_after_cursor : true,
+ !!arg_follow);
+ if (r < 0)
+ return r;
+ }
+
+ notify_message = notify_start("READY=1\n"
+ "STATUS=Processing input...",
+ NOTIFY_STOPPING);
+
+ for (;;) {
+ r = sd_event_get_state(u.events);
+ if (r < 0)
+ return r;
+ if (r == SD_EVENT_FINISHED)
+ return 0;
+
+ if (use_journal) {
+ if (!u.journal)
+ return 0;
+
+ r = check_journal_input(&u);
+ } else if (u.input < 0 && !use_journal) {
+ if (optind >= argc)
+ return 0;
+
+ log_debug("Using %s as input.", argv[optind]);
+ r = open_file_for_upload(&u, argv[optind++]);
+ }
+ if (r < 0)
+ return r;
+
+ if (u.uploading) {
+ r = perform_upload(&u);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_run(u.events, u.timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+ }
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/journal-remote/journal-upload.conf.in b/src/journal-remote/journal-upload.conf.in
new file mode 100644
index 0000000..5f59a6f
--- /dev/null
+++ b/src/journal-remote/journal-upload.conf.in
@@ -0,0 +1,18 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See journal-upload.conf(5) for details
+
+[Upload]
+# URL=
+# ServerKeyFile=@CERTIFICATEROOT@/private/journal-upload.pem
+# ServerCertificateFile=@CERTIFICATEROOT@/certs/journal-upload.pem
+# TrustedCertificateFile=@CERTIFICATEROOT@/ca/trusted.pem
diff --git a/src/journal-remote/journal-upload.h b/src/journal-remote/journal-upload.h
new file mode 100644
index 0000000..5711905
--- /dev/null
+++ b/src/journal-remote/journal-upload.h
@@ -0,0 +1,71 @@
+#pragma once
+
+#include <inttypes.h>
+
+#include "sd-event.h"
+#include "sd-journal.h"
+#include "time-util.h"
+
+typedef enum {
+ ENTRY_CURSOR = 0, /* Nothing actually written yet. */
+ ENTRY_REALTIME,
+ ENTRY_MONOTONIC,
+ ENTRY_BOOT_ID,
+ ENTRY_NEW_FIELD, /* In between fields. */
+ ENTRY_TEXT_FIELD, /* In the middle of a text field. */
+ ENTRY_BINARY_FIELD_START, /* Writing the name of a binary field. */
+ ENTRY_BINARY_FIELD_SIZE, /* Writing the size of a binary field. */
+ ENTRY_BINARY_FIELD, /* In the middle of a binary field. */
+ ENTRY_OUTRO, /* Writing '\n' */
+ ENTRY_DONE, /* Need to move to a new field. */
+} entry_state;
+
+typedef struct Uploader {
+ sd_event *events;
+ sd_event_source *sigint_event, *sigterm_event;
+
+ char *url;
+ CURL *easy;
+ bool uploading;
+ char error[CURL_ERROR_SIZE];
+ struct curl_slist *header;
+ char *answer;
+
+ sd_event_source *input_event;
+ uint64_t timeout;
+
+ /* fd stuff */
+ int input;
+
+ /* journal stuff */
+ sd_journal* journal;
+
+ entry_state entry_state;
+ const void *field_data;
+ size_t field_pos, field_length;
+
+ /* general metrics */
+ const char *state_file;
+
+ size_t entries_sent;
+ char *last_cursor, *current_cursor;
+ usec_t watchdog_timestamp;
+ usec_t watchdog_usec;
+} Uploader;
+
+#define JOURNAL_UPLOAD_POLL_TIMEOUT (10 * USEC_PER_SEC)
+
+int start_upload(Uploader *u,
+ size_t (*input_callback)(void *ptr,
+ size_t size,
+ size_t nmemb,
+ void *userdata),
+ void *data);
+
+int open_journal_for_upload(Uploader *u,
+ sd_journal *j,
+ const char *cursor,
+ bool after_cursor,
+ bool follow);
+void close_journal_input(Uploader *u);
+int check_journal_input(Uploader *u);
diff --git a/src/journal-remote/log-generator.py b/src/journal-remote/log-generator.py
new file mode 100755
index 0000000..e1725b1
--- /dev/null
+++ b/src/journal-remote/log-generator.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+import sys
+import argparse
+
+PARSER = argparse.ArgumentParser()
+PARSER.add_argument('n', type=int)
+PARSER.add_argument('--dots', action='store_true')
+PARSER.add_argument('-m', '--message-size', type=int, default=200)
+PARSER.add_argument('-d', '--data-size', type=int, default=4000)
+PARSER.add_argument('--data-type', choices={'random', 'simple'})
+OPTIONS = PARSER.parse_args()
+
+template = """\
+__CURSOR=s=6863c726210b4560b7048889d8ada5c5;i=3e931;b=f446871715504074bf7049ef0718fa93;m={m:x};t=4fd05c
+__REALTIME_TIMESTAMP={realtime_ts}
+__MONOTONIC_TIMESTAMP={monotonic_ts}
+_BOOT_ID=f446871715504074bf7049ef0718fa93
+_TRANSPORT=syslog
+PRIORITY={priority}
+SYSLOG_FACILITY={facility}
+SYSLOG_IDENTIFIER=/USR/SBIN/CRON
+MESSAGE={message}
+_UID=0
+_GID=0
+_MACHINE_ID=69121ca41d12c1b69a7960174c27b618
+_HOSTNAME=hostname
+SYSLOG_PID=25721
+_PID=25721
+_SOURCE_REALTIME_TIMESTAMP={source_realtime_ts}
+DATA={data}
+"""
+
+m = 0x198603b12d7
+realtime_ts = 1404101101501873
+monotonic_ts = 1753961140951
+source_realtime_ts = 1404101101483516
+priority = 3
+facility = 6
+
+src = open('/dev/urandom', 'rb')
+
+bytes = 0
+counter = 0
+
+for i in range(OPTIONS.n):
+ message = src.read(OPTIONS.message_size)
+ message = repr(message)[2:-1]
+
+ if OPTIONS.data_type == 'random':
+ data = repr(src.read(OPTIONS.data_size))
+ else:
+ # keep the pattern non-repeating so we get a different blob every time
+ data = '{:0{}}'.format(counter, OPTIONS.data_size)
+ counter += 1
+
+ entry = template.format(m=m,
+ realtime_ts=realtime_ts,
+ monotonic_ts=monotonic_ts,
+ source_realtime_ts=source_realtime_ts,
+ priority=priority,
+ facility=facility,
+ message=message,
+ data=data)
+ m += 1
+ realtime_ts += 1
+ monotonic_ts += 1
+ source_realtime_ts += 1
+
+ bytes += len(entry)
+
+ print(entry)
+
+ if OPTIONS.dots:
+ print('.', file=sys.stderr, end='', flush=True)
+
+if OPTIONS.dots:
+ print(file=sys.stderr)
+print('Wrote {} bytes'.format(bytes), file=sys.stderr)
diff --git a/src/journal-remote/meson.build b/src/journal-remote/meson.build
new file mode 100644
index 0000000..2887daa
--- /dev/null
+++ b/src/journal-remote/meson.build
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+systemd_journal_upload_sources = files('''
+ journal-upload.h
+ journal-upload.c
+ journal-upload-journal.c
+'''.split())
+
+libsystemd_journal_remote_sources = files('''
+ journal-remote-parse.h
+ journal-remote-parse.c
+ journal-remote-write.h
+ journal-remote-write.c
+ journal-remote.h
+ journal-remote.c
+'''.split())
+
+if conf.get('HAVE_MICROHTTPD') == 1
+ libsystemd_journal_remote_sources += files('''
+ microhttpd-util.h
+ microhttpd-util.c
+'''.split())
+endif
+
+libsystemd_journal_remote = static_library(
+ 'systemd-journal-remote',
+ libsystemd_journal_remote_sources,
+ include_directories : includes,
+ dependencies : [threads,
+ libmicrohttpd,
+ libgnutls,
+ libxz,
+ liblz4],
+ install : false)
+
+systemd_journal_remote_sources = files('''
+ journal-remote-main.c
+'''.split())
+
+systemd_journal_gatewayd_sources = files('''
+ journal-gatewayd.c
+ microhttpd-util.h
+ microhttpd-util.c
+'''.split())
+
+if conf.get('ENABLE_REMOTE') ==1 and conf.get('HAVE_LIBCURL') == 1
+ journal_upload_conf = configure_file(
+ input : 'journal-upload.conf.in',
+ output : 'journal-upload.conf',
+ configuration : substs)
+ install_data(journal_upload_conf,
+ install_dir : pkgsysconfdir)
+endif
+
+if conf.get('ENABLE_REMOTE') == 1 and conf.get('HAVE_MICROHTTPD') == 1
+ journal_remote_conf = configure_file(
+ input : 'journal-remote.conf.in',
+ output : 'journal-remote.conf',
+ configuration : substs)
+ install_data(journal_remote_conf,
+ install_dir : pkgsysconfdir)
+
+ install_data('browse.html',
+ install_dir : join_paths(pkgdatadir, 'gatewayd'))
+
+ meson.add_install_script('sh', '-c',
+ mkdir_p.format('/var/log/journal/remote'))
+ meson.add_install_script('sh', '-c',
+ '''chown 0:0 $DESTDIR/var/log/journal/remote &&
+ chmod 755 $DESTDIR/var/log/journal/remote || :''')
+endif
diff --git a/src/journal-remote/microhttpd-util.c b/src/journal-remote/microhttpd-util.c
new file mode 100644
index 0000000..5f56919
--- /dev/null
+++ b/src/journal-remote/microhttpd-util.c
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+#if HAVE_GNUTLS
+#include <gnutls/gnutls.h>
+#include <gnutls/x509.h>
+#endif
+
+#include "alloc-util.h"
+#include "log.h"
+#include "macro.h"
+#include "microhttpd-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+void microhttpd_logger(void *arg, const char *fmt, va_list ap) {
+ char *f;
+
+ f = strjoina("microhttpd: ", fmt);
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ log_internalv(LOG_INFO, 0, NULL, 0, NULL, f, ap);
+ REENABLE_WARNING;
+}
+
+static int mhd_respond_internal(struct MHD_Connection *connection,
+ enum MHD_RequestTerminationCode code,
+ const char *buffer,
+ size_t size,
+ enum MHD_ResponseMemoryMode mode) {
+ assert(connection);
+
+ _cleanup_(MHD_destroy_responsep) struct MHD_Response *response
+ = MHD_create_response_from_buffer(size, (char*) buffer, mode);
+ if (!response)
+ return MHD_NO;
+
+ log_debug("Queueing response %u: %s", code, buffer);
+ MHD_add_response_header(response, "Content-Type", "text/plain");
+ return MHD_queue_response(connection, code, response);
+}
+
+int mhd_respond(struct MHD_Connection *connection,
+ enum MHD_RequestTerminationCode code,
+ const char *message) {
+
+ const char *fmt;
+
+ fmt = strjoina(message, "\n");
+
+ return mhd_respond_internal(connection, code,
+ fmt, strlen(message) + 1,
+ MHD_RESPMEM_PERSISTENT);
+}
+
+int mhd_respond_oom(struct MHD_Connection *connection) {
+ return mhd_respond(connection, MHD_HTTP_SERVICE_UNAVAILABLE, "Out of memory.");
+}
+
+int mhd_respondf(struct MHD_Connection *connection,
+ int error,
+ enum MHD_RequestTerminationCode code,
+ const char *format, ...) {
+
+ const char *fmt;
+ char *m;
+ int r;
+ va_list ap;
+
+ assert(connection);
+ assert(format);
+
+ if (error < 0)
+ error = -error;
+ errno = -error;
+ fmt = strjoina(format, "\n");
+ va_start(ap, format);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+ r = vasprintf(&m, fmt, ap);
+#pragma GCC diagnostic pop
+ va_end(ap);
+
+ if (r < 0)
+ return respond_oom(connection);
+
+ return mhd_respond_internal(connection, code, m, r, MHD_RESPMEM_MUST_FREE);
+}
+
+#if HAVE_GNUTLS
+
+static struct {
+ const char *const names[4];
+ int level;
+ bool enabled;
+} gnutls_log_map[] = {
+ { {"0"}, LOG_DEBUG },
+ { {"1", "audit"}, LOG_WARNING, true}, /* gnutls session audit */
+ { {"2", "assert"}, LOG_DEBUG }, /* gnutls assert log */
+ { {"3", "hsk", "ext"}, LOG_DEBUG }, /* gnutls handshake log */
+ { {"4", "rec"}, LOG_DEBUG }, /* gnutls record log */
+ { {"5", "dtls"}, LOG_DEBUG }, /* gnutls DTLS log */
+ { {"6", "buf"}, LOG_DEBUG },
+ { {"7", "write", "read"}, LOG_DEBUG },
+ { {"8"}, LOG_DEBUG },
+ { {"9", "enc", "int"}, LOG_DEBUG },
+};
+
+static void log_func_gnutls(int level, const char *message) {
+ assert_se(message);
+
+ if (0 <= level && level < (int) ELEMENTSOF(gnutls_log_map)) {
+ if (gnutls_log_map[level].enabled)
+ log_internal(gnutls_log_map[level].level, 0, NULL, 0, NULL, "gnutls %d/%s: %s", level, gnutls_log_map[level].names[1], message);
+ } else {
+ log_debug("Received GNUTLS message with unknown level %d.", level);
+ log_internal(LOG_DEBUG, 0, NULL, 0, NULL, "gnutls: %s", message);
+ }
+}
+
+static void log_reset_gnutls_level(void) {
+ int i;
+
+ for (i = ELEMENTSOF(gnutls_log_map) - 1; i >= 0; i--)
+ if (gnutls_log_map[i].enabled) {
+ log_debug("Setting gnutls log level to %d", i);
+ gnutls_global_set_log_level(i);
+ break;
+ }
+}
+
+static int log_enable_gnutls_category(const char *cat) {
+ unsigned i;
+
+ if (streq(cat, "all")) {
+ for (i = 0; i < ELEMENTSOF(gnutls_log_map); i++)
+ gnutls_log_map[i].enabled = true;
+ log_reset_gnutls_level();
+ return 0;
+ } else
+ for (i = 0; i < ELEMENTSOF(gnutls_log_map); i++)
+ if (strv_contains((char**)gnutls_log_map[i].names, cat)) {
+ gnutls_log_map[i].enabled = true;
+ log_reset_gnutls_level();
+ return 0;
+ }
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No such log category: %s", cat);
+}
+
+int setup_gnutls_logger(char **categories) {
+ char **cat;
+ int r;
+
+ gnutls_global_set_log_function(log_func_gnutls);
+
+ if (categories) {
+ STRV_FOREACH(cat, categories) {
+ r = log_enable_gnutls_category(*cat);
+ if (r < 0)
+ return r;
+ }
+ } else
+ log_reset_gnutls_level();
+
+ return 0;
+}
+
+static int verify_cert_authorized(gnutls_session_t session) {
+ unsigned status;
+ gnutls_certificate_type_t type;
+ gnutls_datum_t out;
+ int r;
+
+ r = gnutls_certificate_verify_peers2(session, &status);
+ if (r < 0)
+ return log_error_errno(r, "gnutls_certificate_verify_peers2 failed: %m");
+
+ type = gnutls_certificate_type_get(session);
+ r = gnutls_certificate_verification_status_print(status, type, &out, 0);
+ if (r < 0)
+ return log_error_errno(r, "gnutls_certificate_verification_status_print failed: %m");
+
+ log_debug("Certificate status: %s", out.data);
+ gnutls_free(out.data);
+
+ return status == 0 ? 0 : -EPERM;
+}
+
+static int get_client_cert(gnutls_session_t session, gnutls_x509_crt_t *client_cert) {
+ const gnutls_datum_t *pcert;
+ unsigned listsize;
+ gnutls_x509_crt_t cert;
+ int r;
+
+ assert(session);
+ assert(client_cert);
+
+ pcert = gnutls_certificate_get_peers(session, &listsize);
+ if (!pcert || !listsize)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to retrieve certificate chain");
+
+ r = gnutls_x509_crt_init(&cert);
+ if (r < 0) {
+ log_error("Failed to initialize client certificate");
+ return r;
+ }
+
+ /* Note that by passing values between 0 and listsize here, you
+ can get access to the CA's certs */
+ r = gnutls_x509_crt_import(cert, &pcert[0], GNUTLS_X509_FMT_DER);
+ if (r < 0) {
+ log_error("Failed to import client certificate");
+ gnutls_x509_crt_deinit(cert);
+ return r;
+ }
+
+ *client_cert = cert;
+ return 0;
+}
+
+static int get_auth_dn(gnutls_x509_crt_t client_cert, char **buf) {
+ size_t len = 0;
+ int r;
+
+ assert(buf);
+ assert(*buf == NULL);
+
+ r = gnutls_x509_crt_get_dn(client_cert, NULL, &len);
+ if (r != GNUTLS_E_SHORT_MEMORY_BUFFER) {
+ log_error("gnutls_x509_crt_get_dn failed");
+ return r;
+ }
+
+ *buf = malloc(len);
+ if (!*buf)
+ return log_oom();
+
+ gnutls_x509_crt_get_dn(client_cert, *buf, &len);
+ return 0;
+}
+
+static void gnutls_x509_crt_deinitp(gnutls_x509_crt_t *p) {
+ gnutls_x509_crt_deinit(*p);
+}
+
+int check_permissions(struct MHD_Connection *connection, int *code, char **hostname) {
+ const union MHD_ConnectionInfo *ci;
+ gnutls_session_t session;
+ _cleanup_(gnutls_x509_crt_deinitp) gnutls_x509_crt_t client_cert = NULL;
+ _cleanup_free_ char *buf = NULL;
+ int r;
+
+ assert(connection);
+ assert(code);
+
+ *code = 0;
+
+ ci = MHD_get_connection_info(connection,
+ MHD_CONNECTION_INFO_GNUTLS_SESSION);
+ if (!ci) {
+ log_error("MHD_get_connection_info failed: session is unencrypted");
+ *code = mhd_respond(connection, MHD_HTTP_FORBIDDEN,
+ "Encrypted connection is required");
+ return -EPERM;
+ }
+ session = ci->tls_session;
+ assert(session);
+
+ r = get_client_cert(session, &client_cert);
+ if (r < 0) {
+ *code = mhd_respond(connection, MHD_HTTP_UNAUTHORIZED,
+ "Authorization through certificate is required");
+ return -EPERM;
+ }
+
+ r = get_auth_dn(client_cert, &buf);
+ if (r < 0) {
+ *code = mhd_respond(connection, MHD_HTTP_UNAUTHORIZED,
+ "Failed to determine distinguished name from certificate");
+ return -EPERM;
+ }
+
+ log_debug("Connection from %s", buf);
+
+ if (hostname)
+ *hostname = TAKE_PTR(buf);
+
+ r = verify_cert_authorized(session);
+ if (r < 0) {
+ log_warning("Client is not authorized");
+ *code = mhd_respond(connection, MHD_HTTP_UNAUTHORIZED,
+ "Client certificate not signed by recognized authority");
+ }
+ return r;
+}
+
+#else
+int check_permissions(struct MHD_Connection *connection, int *code, char **hostname) {
+ return -EPERM;
+}
+
+int setup_gnutls_logger(char **categories) {
+ if (categories)
+ log_notice("Ignoring specified gnutls logging categories — gnutls not available.");
+ return 0;
+}
+#endif
diff --git a/src/journal-remote/microhttpd-util.h b/src/journal-remote/microhttpd-util.h
new file mode 100644
index 0000000..ba51d84
--- /dev/null
+++ b/src/journal-remote/microhttpd-util.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <microhttpd.h>
+#include <stdarg.h>
+
+#include "macro.h"
+
+/* Those defines are added when options are renamed. If the old names
+ * are not '#define'd, then they are not deprecated yet and there are
+ * enum elements with the same name. Hence let's check for the *old* name,
+ * and define the new name by the value of the old name. */
+
+/* Renamed in µhttpd 0.9.51 */
+#ifndef MHD_USE_PIPE_FOR_SHUTDOWN
+# define MHD_USE_ITC MHD_USE_PIPE_FOR_SHUTDOWN
+#endif
+
+/* Renamed in µhttpd 0.9.52 */
+#ifndef MHD_USE_EPOLL_LINUX_ONLY
+# define MHD_USE_EPOLL MHD_USE_EPOLL_LINUX_ONLY
+#endif
+
+/* Renamed in µhttpd 0.9.52 */
+#ifndef MHD_USE_SSL
+# define MHD_USE_TLS MHD_USE_SSL
+#endif
+
+/* Renamed in µhttpd 0.9.53 */
+#ifndef MHD_USE_POLL_INTERNALLY
+# define MHD_USE_POLL_INTERNAL_THREAD MHD_USE_POLL_INTERNALLY
+#endif
+
+/* Both the old and new names are defines, check for the new one. */
+
+/* Compatiblity with libmicrohttpd < 0.9.38 */
+#ifndef MHD_HTTP_NOT_ACCEPTABLE
+# define MHD_HTTP_NOT_ACCEPTABLE MHD_HTTP_METHOD_NOT_ACCEPTABLE
+#endif
+
+/* Renamed in µhttpd 0.9.53 */
+#ifndef MHD_HTTP_PAYLOAD_TOO_LARGE
+# define MHD_HTTP_PAYLOAD_TOO_LARGE MHD_HTTP_REQUEST_ENTITY_TOO_LARGE
+#endif
+
+#if MHD_VERSION < 0x00094203
+# define MHD_create_response_from_fd_at_offset64 MHD_create_response_from_fd_at_offset
+#endif
+
+void microhttpd_logger(void *arg, const char *fmt, va_list ap) _printf_(2, 0);
+
+/* respond_oom() must be usable with return, hence this form. */
+#define respond_oom(connection) log_oom(), mhd_respond_oom(connection)
+
+int mhd_respondf(struct MHD_Connection *connection,
+ int error,
+ unsigned code,
+ const char *format, ...) _printf_(4,5);
+
+int mhd_respond(struct MHD_Connection *connection,
+ unsigned code,
+ const char *message);
+
+int mhd_respond_oom(struct MHD_Connection *connection);
+
+int check_permissions(struct MHD_Connection *connection, int *code, char **hostname);
+
+/* Set gnutls internal logging function to a callback which uses our
+ * own logging framework.
+ *
+ * gnutls categories are additionally filtered by our internal log
+ * level, so it should be set fairly high to capture all potentially
+ * interesting events without overwhelming detail.
+ */
+int setup_gnutls_logger(char **categories);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct MHD_Daemon*, MHD_stop_daemon);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct MHD_Response*, MHD_destroy_response);
diff --git a/src/journal/audit-type.c b/src/journal/audit-type.c
new file mode 100644
index 0000000..7b3dc1e
--- /dev/null
+++ b/src/journal/audit-type.c
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "audit-type.h"
+#include "missing_audit.h"
+
+#include "audit_type-to-name.h"
diff --git a/src/journal/audit-type.h b/src/journal/audit-type.h
new file mode 100644
index 0000000..98e5c39
--- /dev/null
+++ b/src/journal/audit-type.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <alloca.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+
+const char *audit_type_to_string(int type);
+int audit_type_from_string(const char *s);
+
+/* This is inspired by DNS TYPEnnn formatting */
+#define audit_type_name_alloca(type) \
+ ({ \
+ const char *_s_; \
+ _s_ = audit_type_to_string(type); \
+ if (!_s_) { \
+ _s_ = newa(char, STRLEN("AUDIT") + DECIMAL_STR_MAX(int)); \
+ sprintf((char*) _s_, "AUDIT%04i", type); \
+ } \
+ _s_; \
+ })
diff --git a/src/journal/audit_type-to-name.awk b/src/journal/audit_type-to-name.awk
new file mode 100644
index 0000000..44fc702
--- /dev/null
+++ b/src/journal/audit_type-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "const char *audit_type_to_string(int type) {\n\tswitch(type) {"
+}
+{
+ printf " case AUDIT_%s: return \"%s\";\n", $1, $1
+}
+END{
+ print " default: return NULL;\n\t}\n}\n"
+}
diff --git a/src/journal/cat.c b/src/journal/cat.c
new file mode 100644
index 0000000..9900bd2
--- /dev/null
+++ b/src/journal/cat.c
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "util.h"
+
+static const char *arg_identifier = NULL;
+static int arg_priority = LOG_INFO;
+static int arg_stderr_priority = -1;
+static bool arg_level_prefix = true;
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-cat", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Execute process with stdout/stderr connected to the journal.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -t --identifier=STRING Set syslog identifier\n"
+ " -p --priority=PRIORITY Set priority value (0..7)\n"
+ " --stderr-priority=PRIORITY Set priority value (0..7) used for stderr\n"
+ " --level-prefix=BOOL Control whether level prefix shall be parsed\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_STDERR_PRIORITY,
+ ARG_LEVEL_PREFIX
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "identifier", required_argument, NULL, 't' },
+ { "priority", required_argument, NULL, 'p' },
+ { "stderr-priority", required_argument, NULL, ARG_STDERR_PRIORITY },
+ { "level-prefix", required_argument, NULL, ARG_LEVEL_PREFIX },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+ht:p:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ help();
+ return 0;
+
+ case ARG_VERSION:
+ return version();
+
+ case 't':
+ if (isempty(optarg))
+ arg_identifier = NULL;
+ else
+ arg_identifier = optarg;
+ break;
+
+ case 'p':
+ arg_priority = log_level_from_string(optarg);
+ if (arg_priority < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse priority value.");
+ break;
+
+ case ARG_STDERR_PRIORITY:
+ arg_stderr_priority = log_level_from_string(optarg);
+ if (arg_stderr_priority < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse stderr priority value.");
+ break;
+
+ case ARG_LEVEL_PREFIX: {
+ int k;
+
+ k = parse_boolean(optarg);
+ if (k < 0)
+ return log_error_errno(k, "Failed to parse level prefix value.");
+
+ arg_level_prefix = k;
+ break;
+ }
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_close_ int outfd = -1, errfd = -1, saved_stderr = -1;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ outfd = sd_journal_stream_fd(arg_identifier, arg_priority, arg_level_prefix);
+ if (outfd < 0)
+ return log_error_errno(outfd, "Failed to create stream fd: %m");
+
+ if (arg_stderr_priority >= 0 && arg_stderr_priority != arg_priority) {
+ errfd = sd_journal_stream_fd(arg_identifier, arg_stderr_priority, arg_level_prefix);
+ if (errfd < 0)
+ return log_error_errno(errfd, "Failed to create stream fd: %m");
+ }
+
+ saved_stderr = fcntl(STDERR_FILENO, F_DUPFD_CLOEXEC, 3);
+
+ r = rearrange_stdio(STDIN_FILENO, outfd, errfd < 0 ? outfd : errfd); /* Invalidates fd on succcess + error! */
+ TAKE_FD(outfd);
+ TAKE_FD(errfd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to rearrange stdout/stderr: %m");
+
+ if (argc <= optind)
+ (void) execl("/bin/cat", "/bin/cat", NULL);
+ else
+ (void) execvp(argv[optind], argv + optind);
+ r = -errno;
+
+ /* Let's try to restore a working stderr, so we can print the error message */
+ if (saved_stderr >= 0)
+ (void) dup3(saved_stderr, STDERR_FILENO, 0);
+
+ return log_error_errno(r, "Failed to execute process: %m");
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/journal/catalog.c b/src/journal/catalog.c
new file mode 100644
index 0000000..4062f12
--- /dev/null
+++ b/src/journal/catalog.c
@@ -0,0 +1,743 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "catalog.h"
+#include "conf-files.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "log.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "siphash24.h"
+#include "sparse-endian.h"
+#include "strbuf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+const char * const catalog_file_dirs[] = {
+ "/usr/local/lib/systemd/catalog/",
+ "/usr/lib/systemd/catalog/",
+ NULL
+};
+
+#define CATALOG_SIGNATURE (uint8_t[]) { 'R', 'H', 'H', 'H', 'K', 'S', 'L', 'P' }
+
+typedef struct CatalogHeader {
+ uint8_t signature[8]; /* "RHHHKSLP" */
+ le32_t compatible_flags;
+ le32_t incompatible_flags;
+ le64_t header_size;
+ le64_t n_items;
+ le64_t catalog_item_size;
+} CatalogHeader;
+
+typedef struct CatalogItem {
+ sd_id128_t id;
+ char language[32]; /* One byte is used for termination, so the maximum allowed
+ * length of the string is actually 31 bytes. */
+ le64_t offset;
+} CatalogItem;
+
+static void catalog_hash_func(const CatalogItem *i, struct siphash *state) {
+ siphash24_compress(&i->id, sizeof(i->id), state);
+ siphash24_compress(i->language, strlen(i->language), state);
+}
+
+static int catalog_compare_func(const CatalogItem *a, const CatalogItem *b) {
+ unsigned k;
+ int r;
+
+ for (k = 0; k < ELEMENTSOF(b->id.bytes); k++) {
+ r = CMP(a->id.bytes[k], b->id.bytes[k]);
+ if (r != 0)
+ return r;
+ }
+
+ return strcmp(a->language, b->language);
+}
+
+DEFINE_HASH_OPS(catalog_hash_ops, CatalogItem, catalog_hash_func, catalog_compare_func);
+
+static bool next_header(const char **s) {
+ const char *e;
+
+ e = strchr(*s, '\n');
+
+ /* Unexpected end */
+ if (!e)
+ return false;
+
+ /* End of headers */
+ if (e == *s)
+ return false;
+
+ *s = e + 1;
+ return true;
+}
+
+static const char *skip_header(const char *s) {
+ while (next_header(&s))
+ ;
+ return s;
+}
+
+static char *combine_entries(const char *one, const char *two) {
+ const char *b1, *b2;
+ size_t l1, l2, n;
+ char *dest, *p;
+
+ /* Find split point of headers to body */
+ b1 = skip_header(one);
+ b2 = skip_header(two);
+
+ l1 = strlen(one);
+ l2 = strlen(two);
+ dest = new(char, l1 + l2 + 1);
+ if (!dest) {
+ log_oom();
+ return NULL;
+ }
+
+ p = dest;
+
+ /* Headers from @one */
+ n = b1 - one;
+ p = mempcpy(p, one, n);
+
+ /* Headers from @two, these will only be found if not present above */
+ n = b2 - two;
+ p = mempcpy(p, two, n);
+
+ /* Body from @one */
+ n = l1 - (b1 - one);
+ if (n > 0) {
+ memcpy(p, b1, n);
+ p += n;
+
+ /* Body from @two */
+ } else {
+ n = l2 - (b2 - two);
+ memcpy(p, b2, n);
+ p += n;
+ }
+
+ assert(p - dest <= (ptrdiff_t)(l1 + l2));
+ p[0] = '\0';
+ return dest;
+}
+
+static int finish_item(
+ Hashmap *h,
+ sd_id128_t id,
+ const char *language,
+ char *payload, size_t payload_size) {
+
+ _cleanup_free_ CatalogItem *i = NULL;
+ _cleanup_free_ char *prev = NULL, *combined = NULL;
+
+ assert(h);
+ assert(payload);
+ assert(payload_size > 0);
+
+ i = new0(CatalogItem, 1);
+ if (!i)
+ return log_oom();
+
+ i->id = id;
+ if (language) {
+ assert(strlen(language) > 1 && strlen(language) < 32);
+ strcpy(i->language, language);
+ }
+
+ prev = hashmap_get(h, i);
+ if (prev) {
+ /* Already have such an item, combine them */
+ combined = combine_entries(payload, prev);
+ if (!combined)
+ return log_oom();
+
+ if (hashmap_update(h, i, combined) < 0)
+ return log_oom();
+ combined = NULL;
+ } else {
+ /* A new item */
+ combined = memdup(payload, payload_size + 1);
+ if (!combined)
+ return log_oom();
+
+ if (hashmap_put(h, i, combined) < 0)
+ return log_oom();
+ i = NULL;
+ combined = NULL;
+ }
+
+ return 0;
+}
+
+int catalog_file_lang(const char* filename, char **lang) {
+ char *beg, *end, *_lang;
+
+ end = endswith(filename, ".catalog");
+ if (!end)
+ return 0;
+
+ beg = end - 1;
+ while (beg > filename && !IN_SET(*beg, '.', '/') && end - beg < 32)
+ beg--;
+
+ if (*beg != '.' || end <= beg + 1)
+ return 0;
+
+ _lang = strndup(beg + 1, end - beg - 1);
+ if (!_lang)
+ return -ENOMEM;
+
+ *lang = _lang;
+ return 1;
+}
+
+static int catalog_entry_lang(const char* filename, int line,
+ const char* t, const char* deflang, char **lang) {
+ size_t c;
+
+ c = strlen(t);
+ if (c < 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Language too short.",
+ filename, line);
+ if (c > 31)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] language too long.", filename,
+ line);
+
+ if (deflang) {
+ if (streq(t, deflang)) {
+ log_warning("[%s:%u] language specified unnecessarily",
+ filename, line);
+ return 0;
+ } else
+ log_warning("[%s:%u] language differs from default for file",
+ filename, line);
+ }
+
+ *lang = strdup(t);
+ if (!*lang)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int catalog_import_file(Hashmap *h, const char *path) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *payload = NULL;
+ size_t payload_size = 0, payload_allocated = 0;
+ unsigned n = 0;
+ sd_id128_t id;
+ _cleanup_free_ char *deflang = NULL, *lang = NULL;
+ bool got_id = false, empty_line = true;
+ int r;
+
+ assert(h);
+ assert(path);
+
+ f = fopen(path, "re");
+ if (!f)
+ return log_error_errno(errno, "Failed to open file %s: %m", path);
+
+ r = catalog_file_lang(path, &deflang);
+ if (r < 0)
+ log_error_errno(r, "Failed to determine language for file %s: %m", path);
+ if (r == 1)
+ log_debug("File %s has language %s.", path, deflang);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ size_t line_len;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read file %s: %m", path);
+ if (r == 0)
+ break;
+
+ n++;
+
+ if (isempty(line)) {
+ empty_line = true;
+ continue;
+ }
+
+ if (strchr(COMMENTS, line[0]))
+ continue;
+
+ if (empty_line &&
+ strlen(line) >= 2+1+32 &&
+ line[0] == '-' &&
+ line[1] == '-' &&
+ line[2] == ' ' &&
+ IN_SET(line[2+1+32], ' ', '\0')) {
+
+ bool with_language;
+ sd_id128_t jd;
+
+ /* New entry */
+
+ with_language = line[2+1+32] != '\0';
+ line[2+1+32] = '\0';
+
+ if (sd_id128_from_string(line + 2 + 1, &jd) >= 0) {
+
+ if (got_id) {
+ if (payload_size == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] No payload text.",
+ path,
+ n);
+
+ r = finish_item(h, id, lang ?: deflang, payload, payload_size);
+ if (r < 0)
+ return r;
+
+ lang = mfree(lang);
+ payload_size = 0;
+ }
+
+ if (with_language) {
+ char *t;
+
+ t = strstrip(line + 2 + 1 + 32 + 1);
+ r = catalog_entry_lang(path, n, t, deflang, &lang);
+ if (r < 0)
+ return r;
+ }
+
+ got_id = true;
+ empty_line = false;
+ id = jd;
+
+ continue;
+ }
+ }
+
+ /* Payload */
+ if (!got_id)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Got payload before ID.",
+ path, n);
+
+ line_len = strlen(line);
+ if (!GREEDY_REALLOC(payload, payload_allocated,
+ payload_size + (empty_line ? 1 : 0) + line_len + 1 + 1))
+ return log_oom();
+
+ if (empty_line)
+ payload[payload_size++] = '\n';
+ memcpy(payload + payload_size, line, line_len);
+ payload_size += line_len;
+ payload[payload_size++] = '\n';
+ payload[payload_size] = '\0';
+
+ empty_line = false;
+ }
+
+ if (got_id) {
+ if (payload_size == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] No payload text.",
+ path, n);
+
+ r = finish_item(h, id, lang ?: deflang, payload, payload_size);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int64_t write_catalog(const char *database, struct strbuf *sb,
+ CatalogItem *items, size_t n) {
+ CatalogHeader header;
+ _cleanup_fclose_ FILE *w = NULL;
+ int r;
+ _cleanup_free_ char *d, *p = NULL;
+ size_t k;
+
+ d = dirname_malloc(database);
+ if (!d)
+ return log_oom();
+
+ r = mkdir_p(d, 0775);
+ if (r < 0)
+ return log_error_errno(r, "Recursive mkdir %s: %m", d);
+
+ r = fopen_temporary(database, &w, &p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open database for writing: %s: %m",
+ database);
+
+ zero(header);
+ memcpy(header.signature, CATALOG_SIGNATURE, sizeof(header.signature));
+ header.header_size = htole64(ALIGN_TO(sizeof(CatalogHeader), 8));
+ header.catalog_item_size = htole64(sizeof(CatalogItem));
+ header.n_items = htole64(n);
+
+ r = -EIO;
+
+ k = fwrite(&header, 1, sizeof(header), w);
+ if (k != sizeof(header)) {
+ log_error("%s: failed to write header.", p);
+ goto error;
+ }
+
+ k = fwrite(items, 1, n * sizeof(CatalogItem), w);
+ if (k != n * sizeof(CatalogItem)) {
+ log_error("%s: failed to write database.", p);
+ goto error;
+ }
+
+ k = fwrite(sb->buf, 1, sb->len, w);
+ if (k != sb->len) {
+ log_error("%s: failed to write strings.", p);
+ goto error;
+ }
+
+ r = fflush_and_check(w);
+ if (r < 0) {
+ log_error_errno(r, "%s: failed to write database: %m", p);
+ goto error;
+ }
+
+ fchmod(fileno(w), 0644);
+
+ if (rename(p, database) < 0) {
+ r = log_error_errno(errno, "rename (%s -> %s) failed: %m", p, database);
+ goto error;
+ }
+
+ return ftello(w);
+
+error:
+ (void) unlink(p);
+ return r;
+}
+
+int catalog_update(const char* database, const char* root, const char* const* dirs) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ _cleanup_(strbuf_cleanupp) struct strbuf *sb = NULL;
+ _cleanup_hashmap_free_free_free_ Hashmap *h = NULL;
+ _cleanup_free_ CatalogItem *items = NULL;
+ ssize_t offset;
+ char *payload;
+ CatalogItem *i;
+ Iterator j;
+ unsigned n;
+ int r;
+ int64_t sz;
+
+ h = hashmap_new(&catalog_hash_ops);
+ sb = strbuf_new();
+ if (!h || !sb)
+ return log_oom();
+
+ r = conf_files_list_strv(&files, ".catalog", root, 0, dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get catalog files: %m");
+
+ STRV_FOREACH(f, files) {
+ log_debug("Reading file '%s'", *f);
+ r = catalog_import_file(h, *f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to import file '%s': %m", *f);
+ }
+
+ if (hashmap_size(h) <= 0) {
+ log_info("No items in catalog.");
+ return 0;
+ } else
+ log_debug("Found %u items in catalog.", hashmap_size(h));
+
+ items = new(CatalogItem, hashmap_size(h));
+ if (!items)
+ return log_oom();
+
+ n = 0;
+ HASHMAP_FOREACH_KEY(payload, i, h, j) {
+ log_debug("Found " SD_ID128_FORMAT_STR ", language %s",
+ SD_ID128_FORMAT_VAL(i->id),
+ isempty(i->language) ? "C" : i->language);
+
+ offset = strbuf_add_string(sb, payload, strlen(payload));
+ if (offset < 0)
+ return log_oom();
+
+ i->offset = htole64((uint64_t) offset);
+ items[n++] = *i;
+ }
+
+ assert(n == hashmap_size(h));
+ typesafe_qsort(items, n, catalog_compare_func);
+
+ strbuf_complete(sb);
+
+ sz = write_catalog(database, sb, items, n);
+ if (sz < 0)
+ return log_error_errno(sz, "Failed to write %s: %m", database);
+
+ log_debug("%s: wrote %u items, with %zu bytes of strings, %"PRIi64" total size.",
+ database, n, sb->len, sz);
+ return 0;
+}
+
+static int open_mmap(const char *database, int *_fd, struct stat *_st, void **_p) {
+ const CatalogHeader *h;
+ int fd;
+ void *p;
+ struct stat st;
+
+ assert(_fd);
+ assert(_st);
+ assert(_p);
+
+ fd = open(database, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ if (st.st_size < (off_t) sizeof(CatalogHeader)) {
+ safe_close(fd);
+ return -EINVAL;
+ }
+
+ p = mmap(NULL, PAGE_ALIGN(st.st_size), PROT_READ, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ h = p;
+ if (memcmp(h->signature, CATALOG_SIGNATURE, sizeof(h->signature)) != 0 ||
+ le64toh(h->header_size) < sizeof(CatalogHeader) ||
+ le64toh(h->catalog_item_size) < sizeof(CatalogItem) ||
+ h->incompatible_flags != 0 ||
+ le64toh(h->n_items) <= 0 ||
+ st.st_size < (off_t) (le64toh(h->header_size) + le64toh(h->catalog_item_size) * le64toh(h->n_items))) {
+ safe_close(fd);
+ munmap(p, st.st_size);
+ return -EBADMSG;
+ }
+
+ *_fd = fd;
+ *_st = st;
+ *_p = p;
+
+ return 0;
+}
+
+static const char *find_id(void *p, sd_id128_t id) {
+ CatalogItem *f = NULL, key = { .id = id };
+ const CatalogHeader *h = p;
+ const char *loc;
+
+ loc = setlocale(LC_MESSAGES, NULL);
+ if (!isempty(loc) && !STR_IN_SET(loc, "C", "POSIX")) {
+ size_t len;
+
+ len = strcspn(loc, ".@");
+ if (len > sizeof(key.language) - 1)
+ log_debug("LC_MESSAGES value too long, ignoring: \"%.*s\"", (int) len, loc);
+ else {
+ strncpy(key.language, loc, len);
+ key.language[len] = '\0';
+
+ f = bsearch(&key,
+ (const uint8_t*) p + le64toh(h->header_size),
+ le64toh(h->n_items),
+ le64toh(h->catalog_item_size),
+ (comparison_fn_t) catalog_compare_func);
+ if (!f) {
+ char *e;
+
+ e = strchr(key.language, '_');
+ if (e) {
+ *e = 0;
+ f = bsearch(&key,
+ (const uint8_t*) p + le64toh(h->header_size),
+ le64toh(h->n_items),
+ le64toh(h->catalog_item_size),
+ (comparison_fn_t) catalog_compare_func);
+ }
+ }
+ }
+ }
+
+ if (!f) {
+ zero(key.language);
+ f = bsearch(&key,
+ (const uint8_t*) p + le64toh(h->header_size),
+ le64toh(h->n_items),
+ le64toh(h->catalog_item_size),
+ (comparison_fn_t) catalog_compare_func);
+ }
+
+ if (!f)
+ return NULL;
+
+ return (const char*) p +
+ le64toh(h->header_size) +
+ le64toh(h->n_items) * le64toh(h->catalog_item_size) +
+ le64toh(f->offset);
+}
+
+int catalog_get(const char* database, sd_id128_t id, char **_text) {
+ _cleanup_close_ int fd = -1;
+ void *p = NULL;
+ struct stat st = {};
+ char *text = NULL;
+ int r;
+ const char *s;
+
+ assert(_text);
+
+ r = open_mmap(database, &fd, &st, &p);
+ if (r < 0)
+ return r;
+
+ s = find_id(p, id);
+ if (!s) {
+ r = -ENOENT;
+ goto finish;
+ }
+
+ text = strdup(s);
+ if (!text) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ *_text = text;
+ r = 0;
+
+finish:
+ if (p)
+ munmap(p, st.st_size);
+
+ return r;
+}
+
+static char *find_header(const char *s, const char *header) {
+
+ for (;;) {
+ const char *v;
+
+ v = startswith(s, header);
+ if (v) {
+ v += strspn(v, WHITESPACE);
+ return strndup(v, strcspn(v, NEWLINE));
+ }
+
+ if (!next_header(&s))
+ return NULL;
+ }
+}
+
+static void dump_catalog_entry(FILE *f, sd_id128_t id, const char *s, bool oneline) {
+ if (oneline) {
+ _cleanup_free_ char *subject = NULL, *defined_by = NULL;
+
+ subject = find_header(s, "Subject:");
+ defined_by = find_header(s, "Defined-By:");
+
+ fprintf(f, SD_ID128_FORMAT_STR " %s: %s\n",
+ SD_ID128_FORMAT_VAL(id),
+ strna(defined_by), strna(subject));
+ } else
+ fprintf(f, "-- " SD_ID128_FORMAT_STR "\n%s\n",
+ SD_ID128_FORMAT_VAL(id), s);
+}
+
+int catalog_list(FILE *f, const char *database, bool oneline) {
+ _cleanup_close_ int fd = -1;
+ void *p = NULL;
+ struct stat st;
+ const CatalogHeader *h;
+ const CatalogItem *items;
+ int r;
+ unsigned n;
+ sd_id128_t last_id;
+ bool last_id_set = false;
+
+ r = open_mmap(database, &fd, &st, &p);
+ if (r < 0)
+ return r;
+
+ h = p;
+ items = (const CatalogItem*) ((const uint8_t*) p + le64toh(h->header_size));
+
+ for (n = 0; n < le64toh(h->n_items); n++) {
+ const char *s;
+
+ if (last_id_set && sd_id128_equal(last_id, items[n].id))
+ continue;
+
+ assert_se(s = find_id(p, items[n].id));
+
+ dump_catalog_entry(f, items[n].id, s, oneline);
+
+ last_id_set = true;
+ last_id = items[n].id;
+ }
+
+ munmap(p, st.st_size);
+
+ return 0;
+}
+
+int catalog_list_items(FILE *f, const char *database, bool oneline, char **items) {
+ char **item;
+ int r = 0;
+
+ STRV_FOREACH(item, items) {
+ sd_id128_t id;
+ int k;
+ _cleanup_free_ char *msg = NULL;
+
+ k = sd_id128_from_string(*item, &id);
+ if (k < 0) {
+ log_error_errno(k, "Failed to parse id128 '%s': %m", *item);
+ if (r == 0)
+ r = k;
+ continue;
+ }
+
+ k = catalog_get(database, id, &msg);
+ if (k < 0) {
+ log_full_errno(k == -ENOENT ? LOG_NOTICE : LOG_ERR, k,
+ "Failed to retrieve catalog entry for '%s': %m", *item);
+ if (r == 0)
+ r = k;
+ continue;
+ }
+
+ dump_catalog_entry(f, id, msg, oneline);
+ }
+
+ return r;
+}
diff --git a/src/journal/catalog.h b/src/journal/catalog.h
new file mode 100644
index 0000000..4e6f161
--- /dev/null
+++ b/src/journal/catalog.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+#include "hashmap.h"
+#include "strbuf.h"
+
+int catalog_import_file(Hashmap *h, const char *path);
+int catalog_update(const char* database, const char* root, const char* const* dirs);
+int catalog_get(const char* database, sd_id128_t id, char **data);
+int catalog_list(FILE *f, const char* database, bool oneline);
+int catalog_list_items(FILE *f, const char* database, bool oneline, char **items);
+int catalog_file_lang(const char *filename, char **lang);
+extern const char * const catalog_file_dirs[];
+extern const struct hash_ops catalog_hash_ops;
diff --git a/src/journal/compress.c b/src/journal/compress.c
new file mode 100644
index 0000000..e95ce2b
--- /dev/null
+++ b/src/journal/compress.c
@@ -0,0 +1,677 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#if HAVE_XZ
+#include <lzma.h>
+#endif
+
+#if HAVE_LZ4
+#include <lz4.h>
+#include <lz4frame.h>
+#endif
+
+#include "alloc-util.h"
+#include "compress.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "journal-def.h"
+#include "macro.h"
+#include "sparse-endian.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "util.h"
+
+#if HAVE_LZ4
+DEFINE_TRIVIAL_CLEANUP_FUNC(LZ4F_compressionContext_t, LZ4F_freeCompressionContext);
+DEFINE_TRIVIAL_CLEANUP_FUNC(LZ4F_decompressionContext_t, LZ4F_freeDecompressionContext);
+#endif
+
+#define ALIGN_8(l) ALIGN_TO(l, sizeof(size_t))
+
+static const char* const object_compressed_table[_OBJECT_COMPRESSED_MAX] = {
+ [OBJECT_COMPRESSED_XZ] = "XZ",
+ [OBJECT_COMPRESSED_LZ4] = "LZ4",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(object_compressed, int);
+
+int compress_blob_xz(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size) {
+#if HAVE_XZ
+ static const lzma_options_lzma opt = {
+ 1u << 20u, NULL, 0, LZMA_LC_DEFAULT, LZMA_LP_DEFAULT,
+ LZMA_PB_DEFAULT, LZMA_MODE_FAST, 128, LZMA_MF_HC3, 4
+ };
+ static const lzma_filter filters[] = {
+ { LZMA_FILTER_LZMA2, (lzma_options_lzma*) &opt },
+ { LZMA_VLI_UNKNOWN, NULL }
+ };
+ lzma_ret ret;
+ size_t out_pos = 0;
+
+ assert(src);
+ assert(src_size > 0);
+ assert(dst);
+ assert(dst_alloc_size > 0);
+ assert(dst_size);
+
+ /* Returns < 0 if we couldn't compress the data or the
+ * compressed result is longer than the original */
+
+ if (src_size < 80)
+ return -ENOBUFS;
+
+ ret = lzma_stream_buffer_encode((lzma_filter*) filters, LZMA_CHECK_NONE, NULL,
+ src, src_size, dst, &out_pos, dst_alloc_size);
+ if (ret != LZMA_OK)
+ return -ENOBUFS;
+
+ *dst_size = out_pos;
+ return 0;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int compress_blob_lz4(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size) {
+#if HAVE_LZ4
+ int r;
+
+ assert(src);
+ assert(src_size > 0);
+ assert(dst);
+ assert(dst_alloc_size > 0);
+ assert(dst_size);
+
+ /* Returns < 0 if we couldn't compress the data or the
+ * compressed result is longer than the original */
+
+ if (src_size < 9)
+ return -ENOBUFS;
+
+ r = LZ4_compress_default(src, (char*)dst + 8, src_size, (int) dst_alloc_size - 8);
+ if (r <= 0)
+ return -ENOBUFS;
+
+ *(le64_t*) dst = htole64(src_size);
+ *dst_size = r + 8;
+
+ return 0;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_blob_xz(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
+
+#if HAVE_XZ
+ _cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT;
+ lzma_ret ret;
+ size_t space;
+
+ assert(src);
+ assert(src_size > 0);
+ assert(dst);
+ assert(dst_alloc_size);
+ assert(dst_size);
+ assert(*dst_alloc_size == 0 || *dst);
+
+ ret = lzma_stream_decoder(&s, UINT64_MAX, 0);
+ if (ret != LZMA_OK)
+ return -ENOMEM;
+
+ space = MIN(src_size * 2, dst_max ?: (size_t) -1);
+ if (!greedy_realloc(dst, dst_alloc_size, space, 1))
+ return -ENOMEM;
+
+ s.next_in = src;
+ s.avail_in = src_size;
+
+ s.next_out = *dst;
+ s.avail_out = space;
+
+ for (;;) {
+ size_t used;
+
+ ret = lzma_code(&s, LZMA_FINISH);
+
+ if (ret == LZMA_STREAM_END)
+ break;
+ else if (ret != LZMA_OK)
+ return -ENOMEM;
+
+ if (dst_max > 0 && (space - s.avail_out) >= dst_max)
+ break;
+ else if (dst_max > 0 && space == dst_max)
+ return -ENOBUFS;
+
+ used = space - s.avail_out;
+ space = MIN(2 * space, dst_max ?: (size_t) -1);
+ if (!greedy_realloc(dst, dst_alloc_size, space, 1))
+ return -ENOMEM;
+
+ s.avail_out = space - used;
+ s.next_out = *(uint8_t**)dst + used;
+ }
+
+ *dst_size = space - s.avail_out;
+ return 0;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_blob_lz4(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
+
+#if HAVE_LZ4
+ char* out;
+ int r, size; /* LZ4 uses int for size */
+
+ assert(src);
+ assert(src_size > 0);
+ assert(dst);
+ assert(dst_alloc_size);
+ assert(dst_size);
+ assert(*dst_alloc_size == 0 || *dst);
+
+ if (src_size <= 8)
+ return -EBADMSG;
+
+ size = le64toh( *(le64_t*)src );
+ if (size < 0 || (unsigned) size != le64toh(*(le64_t*)src))
+ return -EFBIG;
+ if ((size_t) size > *dst_alloc_size) {
+ out = realloc(*dst, size);
+ if (!out)
+ return -ENOMEM;
+ *dst = out;
+ *dst_alloc_size = size;
+ } else
+ out = *dst;
+
+ r = LZ4_decompress_safe((char*)src + 8, out, src_size - 8, size);
+ if (r < 0 || r != size)
+ return -EBADMSG;
+
+ *dst_size = size;
+ return 0;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_blob(int compression,
+ const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
+ if (compression == OBJECT_COMPRESSED_XZ)
+ return decompress_blob_xz(src, src_size,
+ dst, dst_alloc_size, dst_size, dst_max);
+ else if (compression == OBJECT_COMPRESSED_LZ4)
+ return decompress_blob_lz4(src, src_size,
+ dst, dst_alloc_size, dst_size, dst_max);
+ else
+ return -EBADMSG;
+}
+
+int decompress_startswith_xz(const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra) {
+
+#if HAVE_XZ
+ _cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT;
+ lzma_ret ret;
+
+ /* Checks whether the decompressed blob starts with the
+ * mentioned prefix. The byte extra needs to follow the
+ * prefix */
+
+ assert(src);
+ assert(src_size > 0);
+ assert(buffer);
+ assert(buffer_size);
+ assert(prefix);
+ assert(*buffer_size == 0 || *buffer);
+
+ ret = lzma_stream_decoder(&s, UINT64_MAX, 0);
+ if (ret != LZMA_OK)
+ return -EBADMSG;
+
+ if (!(greedy_realloc(buffer, buffer_size, ALIGN_8(prefix_len + 1), 1)))
+ return -ENOMEM;
+
+ s.next_in = src;
+ s.avail_in = src_size;
+
+ s.next_out = *buffer;
+ s.avail_out = *buffer_size;
+
+ for (;;) {
+ ret = lzma_code(&s, LZMA_FINISH);
+
+ if (!IN_SET(ret, LZMA_OK, LZMA_STREAM_END))
+ return -EBADMSG;
+
+ if (*buffer_size - s.avail_out >= prefix_len + 1)
+ return memcmp(*buffer, prefix, prefix_len) == 0 &&
+ ((const uint8_t*) *buffer)[prefix_len] == extra;
+
+ if (ret == LZMA_STREAM_END)
+ return 0;
+
+ s.avail_out += *buffer_size;
+
+ if (!(greedy_realloc(buffer, buffer_size, *buffer_size * 2, 1)))
+ return -ENOMEM;
+
+ s.next_out = *(uint8_t**)buffer + *buffer_size - s.avail_out;
+ }
+
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_startswith_lz4(const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra) {
+#if HAVE_LZ4
+ /* Checks whether the decompressed blob starts with the
+ * mentioned prefix. The byte extra needs to follow the
+ * prefix */
+
+ int r;
+
+ assert(src);
+ assert(src_size > 0);
+ assert(buffer);
+ assert(buffer_size);
+ assert(prefix);
+ assert(*buffer_size == 0 || *buffer);
+
+ if (src_size <= 8)
+ return -EBADMSG;
+
+ if (!(greedy_realloc(buffer, buffer_size, ALIGN_8(prefix_len + 1), 1)))
+ return -ENOMEM;
+
+ r = LZ4_decompress_safe_partial((char*)src + 8, *buffer, src_size - 8,
+ prefix_len + 1, *buffer_size);
+ /* One lz4 < 1.8.3, we might get "failure" (r < 0), or "success" where
+ * just a part of the buffer is decompressed. But if we get a smaller
+ * amount of bytes than requested, we don't know whether there isn't enough
+ * data to fill the requested size or whether we just got a partial answer.
+ */
+ if (r < 0 || (size_t) r < prefix_len + 1) {
+ size_t size;
+
+ if (LZ4_versionNumber() >= 10803)
+ /* We trust that the newer lz4 decompresses the number of bytes we
+ * requested if available in the compressed string. */
+ return 0;
+
+ if (r > 0)
+ /* Compare what we have first, in case of mismatch we can
+ * shortcut the full comparison. */
+ if (memcmp(*buffer, prefix, r) != 0)
+ return 0;
+
+ /* Before version 1.8.3, lz4 always tries to decode full a "sequence",
+ * so in pathological cases might need to decompress the full field. */
+ r = decompress_blob_lz4(src, src_size, buffer, buffer_size, &size, 0);
+ if (r < 0)
+ return r;
+
+ if (size < prefix_len + 1)
+ return 0;
+ }
+
+ return memcmp(*buffer, prefix, prefix_len) == 0 &&
+ ((const uint8_t*) *buffer)[prefix_len] == extra;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_startswith(int compression,
+ const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra) {
+ if (compression == OBJECT_COMPRESSED_XZ)
+ return decompress_startswith_xz(src, src_size,
+ buffer, buffer_size,
+ prefix, prefix_len,
+ extra);
+ else if (compression == OBJECT_COMPRESSED_LZ4)
+ return decompress_startswith_lz4(src, src_size,
+ buffer, buffer_size,
+ prefix, prefix_len,
+ extra);
+ else
+ return -EBADMSG;
+}
+
+int compress_stream_xz(int fdf, int fdt, uint64_t max_bytes) {
+#if HAVE_XZ
+ _cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT;
+ lzma_ret ret;
+ uint8_t buf[BUFSIZ], out[BUFSIZ];
+ lzma_action action = LZMA_RUN;
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ ret = lzma_easy_encoder(&s, LZMA_PRESET_DEFAULT, LZMA_CHECK_CRC64);
+ if (ret != LZMA_OK) {
+ log_error("Failed to initialize XZ encoder: code %u", ret);
+ return -EINVAL;
+ }
+
+ for (;;) {
+ if (s.avail_in == 0 && action == LZMA_RUN) {
+ size_t m = sizeof(buf);
+ ssize_t n;
+
+ if (max_bytes != (uint64_t) -1 && (uint64_t) m > max_bytes)
+ m = (size_t) max_bytes;
+
+ n = read(fdf, buf, m);
+ if (n < 0)
+ return -errno;
+ if (n == 0)
+ action = LZMA_FINISH;
+ else {
+ s.next_in = buf;
+ s.avail_in = n;
+
+ if (max_bytes != (uint64_t) -1) {
+ assert(max_bytes >= (uint64_t) n);
+ max_bytes -= n;
+ }
+ }
+ }
+
+ if (s.avail_out == 0) {
+ s.next_out = out;
+ s.avail_out = sizeof(out);
+ }
+
+ ret = lzma_code(&s, action);
+ if (!IN_SET(ret, LZMA_OK, LZMA_STREAM_END)) {
+ log_error("Compression failed: code %u", ret);
+ return -EBADMSG;
+ }
+
+ if (s.avail_out == 0 || ret == LZMA_STREAM_END) {
+ ssize_t n, k;
+
+ n = sizeof(out) - s.avail_out;
+
+ k = loop_write(fdt, out, n, false);
+ if (k < 0)
+ return k;
+
+ if (ret == LZMA_STREAM_END) {
+ log_debug("XZ compression finished (%"PRIu64" -> %"PRIu64" bytes, %.1f%%)",
+ s.total_in, s.total_out,
+ (double) s.total_out / s.total_in * 100);
+
+ return 0;
+ }
+ }
+ }
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+#define LZ4_BUFSIZE (512*1024u)
+
+int compress_stream_lz4(int fdf, int fdt, uint64_t max_bytes) {
+
+#if HAVE_LZ4
+ LZ4F_errorCode_t c;
+ _cleanup_(LZ4F_freeCompressionContextp) LZ4F_compressionContext_t ctx = NULL;
+ _cleanup_free_ char *buf = NULL;
+ char *src = NULL;
+ size_t size, n, total_in = 0, total_out, offset = 0, frame_size;
+ struct stat st;
+ int r;
+ static const LZ4F_compressOptions_t options = {
+ .stableSrc = 1,
+ };
+ static const LZ4F_preferences_t preferences = {
+ .frameInfo.blockSizeID = 5,
+ };
+
+ c = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
+ if (LZ4F_isError(c))
+ return -ENOMEM;
+
+ if (fstat(fdf, &st) < 0)
+ return log_debug_errno(errno, "fstat() failed: %m");
+
+ frame_size = LZ4F_compressBound(LZ4_BUFSIZE, &preferences);
+ size = frame_size + 64*1024; /* add some space for header and trailer */
+ buf = malloc(size);
+ if (!buf)
+ return -ENOMEM;
+
+ n = offset = total_out = LZ4F_compressBegin(ctx, buf, size, &preferences);
+ if (LZ4F_isError(n))
+ return -EINVAL;
+
+ src = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fdf, 0);
+ if (src == MAP_FAILED)
+ return -errno;
+
+ log_debug("Buffer size is %zu bytes, header size %zu bytes.", size, n);
+
+ while (total_in < (size_t) st.st_size) {
+ ssize_t k;
+
+ k = MIN(LZ4_BUFSIZE, st.st_size - total_in);
+ n = LZ4F_compressUpdate(ctx, buf + offset, size - offset,
+ src + total_in, k, &options);
+ if (LZ4F_isError(n)) {
+ r = -ENOTRECOVERABLE;
+ goto cleanup;
+ }
+
+ total_in += k;
+ offset += n;
+ total_out += n;
+
+ if (max_bytes != (uint64_t) -1 && total_out > (size_t) max_bytes) {
+ log_debug("Compressed stream longer than %"PRIu64" bytes", max_bytes);
+ return -EFBIG;
+ }
+
+ if (size - offset < frame_size + 4) {
+ k = loop_write(fdt, buf, offset, false);
+ if (k < 0) {
+ r = k;
+ goto cleanup;
+ }
+ offset = 0;
+ }
+ }
+
+ n = LZ4F_compressEnd(ctx, buf + offset, size - offset, &options);
+ if (LZ4F_isError(n)) {
+ r = -ENOTRECOVERABLE;
+ goto cleanup;
+ }
+
+ offset += n;
+ total_out += n;
+ r = loop_write(fdt, buf, offset, false);
+ if (r < 0)
+ goto cleanup;
+
+ log_debug("LZ4 compression finished (%zu -> %zu bytes, %.1f%%)",
+ total_in, total_out,
+ (double) total_out / total_in * 100);
+ cleanup:
+ munmap(src, st.st_size);
+ return r;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_stream_xz(int fdf, int fdt, uint64_t max_bytes) {
+
+#if HAVE_XZ
+ _cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT;
+ lzma_ret ret;
+
+ uint8_t buf[BUFSIZ], out[BUFSIZ];
+ lzma_action action = LZMA_RUN;
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ ret = lzma_stream_decoder(&s, UINT64_MAX, 0);
+ if (ret != LZMA_OK) {
+ log_debug("Failed to initialize XZ decoder: code %u", ret);
+ return -ENOMEM;
+ }
+
+ for (;;) {
+ if (s.avail_in == 0 && action == LZMA_RUN) {
+ ssize_t n;
+
+ n = read(fdf, buf, sizeof(buf));
+ if (n < 0)
+ return -errno;
+ if (n == 0)
+ action = LZMA_FINISH;
+ else {
+ s.next_in = buf;
+ s.avail_in = n;
+ }
+ }
+
+ if (s.avail_out == 0) {
+ s.next_out = out;
+ s.avail_out = sizeof(out);
+ }
+
+ ret = lzma_code(&s, action);
+ if (!IN_SET(ret, LZMA_OK, LZMA_STREAM_END)) {
+ log_debug("Decompression failed: code %u", ret);
+ return -EBADMSG;
+ }
+
+ if (s.avail_out == 0 || ret == LZMA_STREAM_END) {
+ ssize_t n, k;
+
+ n = sizeof(out) - s.avail_out;
+
+ if (max_bytes != (uint64_t) -1) {
+ if (max_bytes < (uint64_t) n)
+ return -EFBIG;
+
+ max_bytes -= n;
+ }
+
+ k = loop_write(fdt, out, n, false);
+ if (k < 0)
+ return k;
+
+ if (ret == LZMA_STREAM_END) {
+ log_debug("XZ decompression finished (%"PRIu64" -> %"PRIu64" bytes, %.1f%%)",
+ s.total_in, s.total_out,
+ (double) s.total_out / s.total_in * 100);
+
+ return 0;
+ }
+ }
+ }
+#else
+ log_debug("Cannot decompress file. Compiled without XZ support.");
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_stream_lz4(int in, int out, uint64_t max_bytes) {
+#if HAVE_LZ4
+ size_t c;
+ _cleanup_(LZ4F_freeDecompressionContextp) LZ4F_decompressionContext_t ctx = NULL;
+ _cleanup_free_ char *buf = NULL;
+ char *src;
+ struct stat st;
+ int r = 0;
+ size_t total_in = 0, total_out = 0;
+
+ c = LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION);
+ if (LZ4F_isError(c))
+ return -ENOMEM;
+
+ if (fstat(in, &st) < 0)
+ return log_debug_errno(errno, "fstat() failed: %m");
+
+ buf = malloc(LZ4_BUFSIZE);
+ if (!buf)
+ return -ENOMEM;
+
+ src = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, in, 0);
+ if (src == MAP_FAILED)
+ return -errno;
+
+ while (total_in < (size_t) st.st_size) {
+ size_t produced = LZ4_BUFSIZE;
+ size_t used = st.st_size - total_in;
+
+ c = LZ4F_decompress(ctx, buf, &produced, src + total_in, &used, NULL);
+ if (LZ4F_isError(c)) {
+ r = -EBADMSG;
+ goto cleanup;
+ }
+
+ total_in += used;
+ total_out += produced;
+
+ if (max_bytes != (uint64_t) -1 && total_out > (size_t) max_bytes) {
+ log_debug("Decompressed stream longer than %"PRIu64" bytes", max_bytes);
+ r = -EFBIG;
+ goto cleanup;
+ }
+
+ r = loop_write(out, buf, produced, false);
+ if (r < 0)
+ goto cleanup;
+ }
+
+ log_debug("LZ4 decompression finished (%zu -> %zu bytes, %.1f%%)",
+ total_in, total_out,
+ total_in > 0 ? (double) total_out / total_in * 100 : 0.0);
+ cleanup:
+ munmap(src, st.st_size);
+ return r;
+#else
+ log_debug("Cannot decompress file. Compiled without LZ4 support.");
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_stream(const char *filename, int fdf, int fdt, uint64_t max_bytes) {
+
+ if (endswith(filename, ".lz4"))
+ return decompress_stream_lz4(fdf, fdt, max_bytes);
+ else if (endswith(filename, ".xz"))
+ return decompress_stream_xz(fdf, fdt, max_bytes);
+ else
+ return -EPROTONOSUPPORT;
+}
diff --git a/src/journal/compress.h b/src/journal/compress.h
new file mode 100644
index 0000000..5641148
--- /dev/null
+++ b/src/journal/compress.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <unistd.h>
+
+#include "journal-def.h"
+
+const char* object_compressed_to_string(int compression);
+int object_compressed_from_string(const char *compression);
+
+int compress_blob_xz(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size);
+int compress_blob_lz4(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size);
+
+static inline int compress_blob(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size) {
+ int r;
+#if HAVE_LZ4
+ r = compress_blob_lz4(src, src_size, dst, dst_alloc_size, dst_size);
+ if (r == 0)
+ return OBJECT_COMPRESSED_LZ4;
+#else
+ r = compress_blob_xz(src, src_size, dst, dst_alloc_size, dst_size);
+ if (r == 0)
+ return OBJECT_COMPRESSED_XZ;
+#endif
+ return r;
+}
+
+int decompress_blob_xz(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
+int decompress_blob_lz4(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
+int decompress_blob(int compression,
+ const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
+
+int decompress_startswith_xz(const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra);
+int decompress_startswith_lz4(const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra);
+int decompress_startswith(int compression,
+ const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra);
+
+int compress_stream_xz(int fdf, int fdt, uint64_t max_bytes);
+int compress_stream_lz4(int fdf, int fdt, uint64_t max_bytes);
+
+int decompress_stream_xz(int fdf, int fdt, uint64_t max_size);
+int decompress_stream_lz4(int fdf, int fdt, uint64_t max_size);
+
+#if HAVE_LZ4
+# define compress_stream compress_stream_lz4
+# define COMPRESSED_EXT ".lz4"
+#else
+# define compress_stream compress_stream_xz
+# define COMPRESSED_EXT ".xz"
+#endif
+
+int decompress_stream(const char *filename, int fdf, int fdt, uint64_t max_bytes);
diff --git a/src/journal/fsprg.c b/src/journal/fsprg.c
new file mode 100644
index 0000000..6d062de
--- /dev/null
+++ b/src/journal/fsprg.c
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ *
+ * fsprg v0.1 - (seekable) forward-secure pseudorandom generator
+ * Copyright © 2012 B. Poettering
+ * Contact: fsprg@point-at-infinity.org
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+/*
+ * See "Practical Secure Logging: Seekable Sequential Key Generators"
+ * by G. A. Marson, B. Poettering for details:
+ *
+ * http://eprint.iacr.org/2013/397
+ */
+
+#include <gcrypt.h>
+#include <string.h>
+
+#include "fsprg.h"
+#include "gcrypt-util.h"
+
+#define ISVALID_SECPAR(secpar) (((secpar) % 16 == 0) && ((secpar) >= 16) && ((secpar) <= 16384))
+#define VALIDATE_SECPAR(secpar) assert(ISVALID_SECPAR(secpar));
+
+#define RND_HASH GCRY_MD_SHA256
+#define RND_GEN_P 0x01
+#define RND_GEN_Q 0x02
+#define RND_GEN_X 0x03
+
+#pragma GCC diagnostic ignored "-Wpointer-arith"
+/* TODO: remove void* arithmetic and this work-around */
+
+/******************************************************************************/
+
+static void mpi_export(void *buf, size_t buflen, const gcry_mpi_t x) {
+ unsigned len;
+ size_t nwritten;
+
+ assert(gcry_mpi_cmp_ui(x, 0) >= 0);
+ len = (gcry_mpi_get_nbits(x) + 7) / 8;
+ assert(len <= buflen);
+ memzero(buf, buflen);
+ gcry_mpi_print(GCRYMPI_FMT_USG, buf + (buflen - len), len, &nwritten, x);
+ assert(nwritten == len);
+}
+
+static gcry_mpi_t mpi_import(const void *buf, size_t buflen) {
+ gcry_mpi_t h;
+ unsigned len;
+
+ assert_se(gcry_mpi_scan(&h, GCRYMPI_FMT_USG, buf, buflen, NULL) == 0);
+ len = (gcry_mpi_get_nbits(h) + 7) / 8;
+ assert(len <= buflen);
+ assert(gcry_mpi_cmp_ui(h, 0) >= 0);
+
+ return h;
+}
+
+static void uint64_export(void *buf, size_t buflen, uint64_t x) {
+ assert(buflen == 8);
+ ((uint8_t*) buf)[0] = (x >> 56) & 0xff;
+ ((uint8_t*) buf)[1] = (x >> 48) & 0xff;
+ ((uint8_t*) buf)[2] = (x >> 40) & 0xff;
+ ((uint8_t*) buf)[3] = (x >> 32) & 0xff;
+ ((uint8_t*) buf)[4] = (x >> 24) & 0xff;
+ ((uint8_t*) buf)[5] = (x >> 16) & 0xff;
+ ((uint8_t*) buf)[6] = (x >> 8) & 0xff;
+ ((uint8_t*) buf)[7] = (x >> 0) & 0xff;
+}
+
+_pure_ static uint64_t uint64_import(const void *buf, size_t buflen) {
+ assert(buflen == 8);
+ return
+ (uint64_t)(((uint8_t*) buf)[0]) << 56 |
+ (uint64_t)(((uint8_t*) buf)[1]) << 48 |
+ (uint64_t)(((uint8_t*) buf)[2]) << 40 |
+ (uint64_t)(((uint8_t*) buf)[3]) << 32 |
+ (uint64_t)(((uint8_t*) buf)[4]) << 24 |
+ (uint64_t)(((uint8_t*) buf)[5]) << 16 |
+ (uint64_t)(((uint8_t*) buf)[6]) << 8 |
+ (uint64_t)(((uint8_t*) buf)[7]) << 0;
+}
+
+/* deterministically generate from seed/idx a string of buflen pseudorandom bytes */
+static void det_randomize(void *buf, size_t buflen, const void *seed, size_t seedlen, uint32_t idx) {
+ gcry_md_hd_t hd, hd2;
+ size_t olen, cpylen;
+ uint32_t ctr;
+
+ olen = gcry_md_get_algo_dlen(RND_HASH);
+ gcry_md_open(&hd, RND_HASH, 0);
+ gcry_md_write(hd, seed, seedlen);
+ gcry_md_putc(hd, (idx >> 24) & 0xff);
+ gcry_md_putc(hd, (idx >> 16) & 0xff);
+ gcry_md_putc(hd, (idx >> 8) & 0xff);
+ gcry_md_putc(hd, (idx >> 0) & 0xff);
+
+ for (ctr = 0; buflen; ctr++) {
+ gcry_md_copy(&hd2, hd);
+ gcry_md_putc(hd2, (ctr >> 24) & 0xff);
+ gcry_md_putc(hd2, (ctr >> 16) & 0xff);
+ gcry_md_putc(hd2, (ctr >> 8) & 0xff);
+ gcry_md_putc(hd2, (ctr >> 0) & 0xff);
+ gcry_md_final(hd2);
+ cpylen = (buflen < olen) ? buflen : olen;
+ memcpy(buf, gcry_md_read(hd2, RND_HASH), cpylen);
+ gcry_md_close(hd2);
+ buf += cpylen;
+ buflen -= cpylen;
+ }
+ gcry_md_close(hd);
+}
+
+/* deterministically generate from seed/idx a prime of length `bits' that is 3 (mod 4) */
+static gcry_mpi_t genprime3mod4(int bits, const void *seed, size_t seedlen, uint32_t idx) {
+ size_t buflen = bits / 8;
+ uint8_t buf[buflen];
+ gcry_mpi_t p;
+
+ assert(bits % 8 == 0);
+ assert(buflen > 0);
+
+ det_randomize(buf, buflen, seed, seedlen, idx);
+ buf[0] |= 0xc0; /* set upper two bits, so that n=pq has maximum size */
+ buf[buflen - 1] |= 0x03; /* set lower two bits, to have result 3 (mod 4) */
+
+ p = mpi_import(buf, buflen);
+ while (gcry_prime_check(p, 0))
+ gcry_mpi_add_ui(p, p, 4);
+
+ return p;
+}
+
+/* deterministically generate from seed/idx a quadratic residue (mod n) */
+static gcry_mpi_t gensquare(const gcry_mpi_t n, const void *seed, size_t seedlen, uint32_t idx, unsigned secpar) {
+ size_t buflen = secpar / 8;
+ uint8_t buf[buflen];
+ gcry_mpi_t x;
+
+ det_randomize(buf, buflen, seed, seedlen, idx);
+ buf[0] &= 0x7f; /* clear upper bit, so that we have x < n */
+ x = mpi_import(buf, buflen);
+ assert(gcry_mpi_cmp(x, n) < 0);
+ gcry_mpi_mulm(x, x, x, n);
+ return x;
+}
+
+/* compute 2^m (mod phi(p)), for a prime p */
+static gcry_mpi_t twopowmodphi(uint64_t m, const gcry_mpi_t p) {
+ gcry_mpi_t phi, r;
+ int n;
+
+ phi = gcry_mpi_new(0);
+ gcry_mpi_sub_ui(phi, p, 1);
+
+ /* count number of used bits in m */
+ for (n = 0; (1ULL << n) <= m; n++)
+ ;
+
+ r = gcry_mpi_new(0);
+ gcry_mpi_set_ui(r, 1);
+ while (n) { /* square and multiply algorithm for fast exponentiation */
+ n--;
+ gcry_mpi_mulm(r, r, r, phi);
+ if (m & ((uint64_t)1 << n)) {
+ gcry_mpi_add(r, r, r);
+ if (gcry_mpi_cmp(r, phi) >= 0)
+ gcry_mpi_sub(r, r, phi);
+ }
+ }
+
+ gcry_mpi_release(phi);
+ return r;
+}
+
+/* Decompose $x \in Z_n$ into $(xp,xq) \in Z_p \times Z_q$ using Chinese Remainder Theorem */
+static void CRT_decompose(gcry_mpi_t *xp, gcry_mpi_t *xq, const gcry_mpi_t x, const gcry_mpi_t p, const gcry_mpi_t q) {
+ *xp = gcry_mpi_new(0);
+ *xq = gcry_mpi_new(0);
+ gcry_mpi_mod(*xp, x, p);
+ gcry_mpi_mod(*xq, x, q);
+}
+
+/* Compose $(xp,xq) \in Z_p \times Z_q$ into $x \in Z_n$ using Chinese Remainder Theorem */
+static void CRT_compose(gcry_mpi_t *x, const gcry_mpi_t xp, const gcry_mpi_t xq, const gcry_mpi_t p, const gcry_mpi_t q) {
+ gcry_mpi_t a, u;
+
+ a = gcry_mpi_new(0);
+ u = gcry_mpi_new(0);
+ *x = gcry_mpi_new(0);
+ gcry_mpi_subm(a, xq, xp, q);
+ gcry_mpi_invm(u, p, q);
+ gcry_mpi_mulm(a, a, u, q); /* a = (xq - xp) / p (mod q) */
+ gcry_mpi_mul(*x, p, a);
+ gcry_mpi_add(*x, *x, xp); /* x = p * ((xq - xp) / p mod q) + xp */
+ gcry_mpi_release(a);
+ gcry_mpi_release(u);
+}
+
+/******************************************************************************/
+
+size_t FSPRG_mskinbytes(unsigned _secpar) {
+ VALIDATE_SECPAR(_secpar);
+ return 2 + 2 * (_secpar / 2) / 8; /* to store header,p,q */
+}
+
+size_t FSPRG_mpkinbytes(unsigned _secpar) {
+ VALIDATE_SECPAR(_secpar);
+ return 2 + _secpar / 8; /* to store header,n */
+}
+
+size_t FSPRG_stateinbytes(unsigned _secpar) {
+ VALIDATE_SECPAR(_secpar);
+ return 2 + 2 * _secpar / 8 + 8; /* to store header,n,x,epoch */
+}
+
+static void store_secpar(void *buf, uint16_t secpar) {
+ secpar = secpar / 16 - 1;
+ ((uint8_t*) buf)[0] = (secpar >> 8) & 0xff;
+ ((uint8_t*) buf)[1] = (secpar >> 0) & 0xff;
+}
+
+static uint16_t read_secpar(const void *buf) {
+ uint16_t secpar;
+ secpar =
+ (uint16_t)(((uint8_t*) buf)[0]) << 8 |
+ (uint16_t)(((uint8_t*) buf)[1]) << 0;
+ return 16 * (secpar + 1);
+}
+
+void FSPRG_GenMK(void *msk, void *mpk, const void *seed, size_t seedlen, unsigned _secpar) {
+ uint8_t iseed[FSPRG_RECOMMENDED_SEEDLEN];
+ gcry_mpi_t n, p, q;
+ uint16_t secpar;
+
+ VALIDATE_SECPAR(_secpar);
+ secpar = _secpar;
+
+ initialize_libgcrypt(false);
+
+ if (!seed) {
+ gcry_randomize(iseed, FSPRG_RECOMMENDED_SEEDLEN, GCRY_STRONG_RANDOM);
+ seed = iseed;
+ seedlen = FSPRG_RECOMMENDED_SEEDLEN;
+ }
+
+ p = genprime3mod4(secpar / 2, seed, seedlen, RND_GEN_P);
+ q = genprime3mod4(secpar / 2, seed, seedlen, RND_GEN_Q);
+
+ if (msk) {
+ store_secpar(msk + 0, secpar);
+ mpi_export(msk + 2 + 0 * (secpar / 2) / 8, (secpar / 2) / 8, p);
+ mpi_export(msk + 2 + 1 * (secpar / 2) / 8, (secpar / 2) / 8, q);
+ }
+
+ if (mpk) {
+ n = gcry_mpi_new(0);
+ gcry_mpi_mul(n, p, q);
+ assert(gcry_mpi_get_nbits(n) == secpar);
+
+ store_secpar(mpk + 0, secpar);
+ mpi_export(mpk + 2, secpar / 8, n);
+
+ gcry_mpi_release(n);
+ }
+
+ gcry_mpi_release(p);
+ gcry_mpi_release(q);
+}
+
+void FSPRG_GenState0(void *state, const void *mpk, const void *seed, size_t seedlen) {
+ gcry_mpi_t n, x;
+ uint16_t secpar;
+
+ initialize_libgcrypt(false);
+
+ secpar = read_secpar(mpk + 0);
+ n = mpi_import(mpk + 2, secpar / 8);
+ x = gensquare(n, seed, seedlen, RND_GEN_X, secpar);
+
+ memcpy(state, mpk, 2 + secpar / 8);
+ mpi_export(state + 2 + 1 * secpar / 8, secpar / 8, x);
+ memzero(state + 2 + 2 * secpar / 8, 8);
+
+ gcry_mpi_release(n);
+ gcry_mpi_release(x);
+}
+
+void FSPRG_Evolve(void *state) {
+ gcry_mpi_t n, x;
+ uint16_t secpar;
+ uint64_t epoch;
+
+ initialize_libgcrypt(false);
+
+ secpar = read_secpar(state + 0);
+ n = mpi_import(state + 2 + 0 * secpar / 8, secpar / 8);
+ x = mpi_import(state + 2 + 1 * secpar / 8, secpar / 8);
+ epoch = uint64_import(state + 2 + 2 * secpar / 8, 8);
+
+ gcry_mpi_mulm(x, x, x, n);
+ epoch++;
+
+ mpi_export(state + 2 + 1 * secpar / 8, secpar / 8, x);
+ uint64_export(state + 2 + 2 * secpar / 8, 8, epoch);
+
+ gcry_mpi_release(n);
+ gcry_mpi_release(x);
+}
+
+uint64_t FSPRG_GetEpoch(const void *state) {
+ uint16_t secpar;
+ secpar = read_secpar(state + 0);
+ return uint64_import(state + 2 + 2 * secpar / 8, 8);
+}
+
+void FSPRG_Seek(void *state, uint64_t epoch, const void *msk, const void *seed, size_t seedlen) {
+ gcry_mpi_t p, q, n, x, xp, xq, kp, kq, xm;
+ uint16_t secpar;
+
+ initialize_libgcrypt(false);
+
+ secpar = read_secpar(msk + 0);
+ p = mpi_import(msk + 2 + 0 * (secpar / 2) / 8, (secpar / 2) / 8);
+ q = mpi_import(msk + 2 + 1 * (secpar / 2) / 8, (secpar / 2) / 8);
+
+ n = gcry_mpi_new(0);
+ gcry_mpi_mul(n, p, q);
+
+ x = gensquare(n, seed, seedlen, RND_GEN_X, secpar);
+ CRT_decompose(&xp, &xq, x, p, q); /* split (mod n) into (mod p) and (mod q) using CRT */
+
+ kp = twopowmodphi(epoch, p); /* compute 2^epoch (mod phi(p)) */
+ kq = twopowmodphi(epoch, q); /* compute 2^epoch (mod phi(q)) */
+
+ gcry_mpi_powm(xp, xp, kp, p); /* compute x^(2^epoch) (mod p) */
+ gcry_mpi_powm(xq, xq, kq, q); /* compute x^(2^epoch) (mod q) */
+
+ CRT_compose(&xm, xp, xq, p, q); /* combine (mod p) and (mod q) to (mod n) using CRT */
+
+ store_secpar(state + 0, secpar);
+ mpi_export(state + 2 + 0 * secpar / 8, secpar / 8, n);
+ mpi_export(state + 2 + 1 * secpar / 8, secpar / 8, xm);
+ uint64_export(state + 2 + 2 * secpar / 8, 8, epoch);
+
+ gcry_mpi_release(p);
+ gcry_mpi_release(q);
+ gcry_mpi_release(n);
+ gcry_mpi_release(x);
+ gcry_mpi_release(xp);
+ gcry_mpi_release(xq);
+ gcry_mpi_release(kp);
+ gcry_mpi_release(kq);
+ gcry_mpi_release(xm);
+}
+
+void FSPRG_GetKey(const void *state, void *key, size_t keylen, uint32_t idx) {
+ uint16_t secpar;
+
+ initialize_libgcrypt(false);
+
+ secpar = read_secpar(state + 0);
+ det_randomize(key, keylen, state + 2, 2 * secpar / 8 + 8, idx);
+}
diff --git a/src/journal/fsprg.h b/src/journal/fsprg.h
new file mode 100644
index 0000000..3341267
--- /dev/null
+++ b/src/journal/fsprg.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/*
+ * fsprg v0.1 - (seekable) forward-secure pseudorandom generator
+ * Copyright © 2012 B. Poettering
+ * Contact: fsprg@point-at-infinity.org
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FSPRG_RECOMMENDED_SECPAR 1536
+#define FSPRG_RECOMMENDED_SEEDLEN (96/8)
+
+size_t FSPRG_mskinbytes(unsigned secpar) _const_;
+size_t FSPRG_mpkinbytes(unsigned secpar) _const_;
+size_t FSPRG_stateinbytes(unsigned secpar) _const_;
+
+/* Setup msk and mpk. Providing seed != NULL makes this algorithm deterministic. */
+void FSPRG_GenMK(void *msk, void *mpk, const void *seed, size_t seedlen, unsigned secpar);
+
+/* Initialize state deterministically in dependence on seed. */
+/* Note: in case one wants to run only one GenState0 per GenMK it is safe to use
+ the same seed for both GenMK and GenState0.
+*/
+void FSPRG_GenState0(void *state, const void *mpk, const void *seed, size_t seedlen);
+
+void FSPRG_Evolve(void *state);
+
+uint64_t FSPRG_GetEpoch(const void *state) _pure_;
+
+/* Seek to any arbitrary state (by providing msk together with seed from GenState0). */
+void FSPRG_Seek(void *state, uint64_t epoch, const void *msk, const void *seed, size_t seedlen);
+
+void FSPRG_GetKey(const void *state, void *key, size_t keylen, uint32_t idx);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/journal/generate-audit_type-list.sh b/src/journal/generate-audit_type-list.sh
new file mode 100755
index 0000000..2445a02
--- /dev/null
+++ b/src/journal/generate-audit_type-list.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+set -eu
+
+cpp="$1"
+shift
+
+includes=""
+for i in "$@"; do
+ includes="$includes -include $i"
+done
+
+$cpp -dM $includes - </dev/null | \
+ grep -vE 'AUDIT_.*(FIRST|LAST)_' | \
+ sed -r -n 's/^#define\s+AUDIT_(\w+)\s+([0-9]{4})\s*$$/\1\t\2/p' | \
+ sort -k2
diff --git a/src/journal/journal-authenticate.c b/src/journal/journal-authenticate.c
new file mode 100644
index 0000000..8a9ce8a
--- /dev/null
+++ b/src/journal/journal-authenticate.c
@@ -0,0 +1,534 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include "fd-util.h"
+#include "fsprg.h"
+#include "gcrypt-util.h"
+#include "hexdecoct.h"
+#include "journal-authenticate.h"
+#include "journal-def.h"
+#include "journal-file.h"
+
+static uint64_t journal_file_tag_seqnum(JournalFile *f) {
+ uint64_t r;
+
+ assert(f);
+
+ r = le64toh(f->header->n_tags) + 1;
+ f->header->n_tags = htole64(r);
+
+ return r;
+}
+
+int journal_file_append_tag(JournalFile *f) {
+ Object *o;
+ uint64_t p;
+ int r;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ if (!f->hmac_running)
+ return 0;
+
+ assert(f->hmac);
+
+ r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
+ if (r < 0)
+ return r;
+
+ o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
+ o->tag.epoch = htole64(FSPRG_GetEpoch(f->fsprg_state));
+
+ log_debug("Writing tag %"PRIu64" for epoch %"PRIu64"",
+ le64toh(o->tag.seqnum),
+ FSPRG_GetEpoch(f->fsprg_state));
+
+ /* Add the tag object itself, so that we can protect its
+ * header. This will exclude the actual hash value in it */
+ r = journal_file_hmac_put_object(f, OBJECT_TAG, o, p);
+ if (r < 0)
+ return r;
+
+ /* Get the HMAC tag and store it in the object */
+ memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
+ f->hmac_running = false;
+
+ return 0;
+}
+
+int journal_file_hmac_start(JournalFile *f) {
+ uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ if (f->hmac_running)
+ return 0;
+
+ /* Prepare HMAC for next cycle */
+ gcry_md_reset(f->hmac);
+ FSPRG_GetKey(f->fsprg_state, key, sizeof(key), 0);
+ gcry_md_setkey(f->hmac, key, sizeof(key));
+
+ f->hmac_running = true;
+
+ return 0;
+}
+
+static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
+ uint64_t t;
+
+ assert(f);
+ assert(epoch);
+ assert(f->seal);
+
+ if (f->fss_start_usec == 0 ||
+ f->fss_interval_usec == 0)
+ return -EOPNOTSUPP;
+
+ if (realtime < f->fss_start_usec)
+ return -ESTALE;
+
+ t = realtime - f->fss_start_usec;
+ t = t / f->fss_interval_usec;
+
+ *epoch = t;
+ return 0;
+}
+
+static int journal_file_fsprg_need_evolve(JournalFile *f, uint64_t realtime) {
+ uint64_t goal, epoch;
+ int r;
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ r = journal_file_get_epoch(f, realtime, &goal);
+ if (r < 0)
+ return r;
+
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+ if (epoch > goal)
+ return -ESTALE;
+
+ return epoch != goal;
+}
+
+int journal_file_fsprg_evolve(JournalFile *f, uint64_t realtime) {
+ uint64_t goal, epoch;
+ int r;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ r = journal_file_get_epoch(f, realtime, &goal);
+ if (r < 0)
+ return r;
+
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+ if (epoch < goal)
+ log_debug("Evolving FSPRG key from epoch %"PRIu64" to %"PRIu64".", epoch, goal);
+
+ for (;;) {
+ if (epoch > goal)
+ return -ESTALE;
+ if (epoch == goal)
+ return 0;
+
+ FSPRG_Evolve(f->fsprg_state);
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+ }
+}
+
+int journal_file_fsprg_seek(JournalFile *f, uint64_t goal) {
+ void *msk;
+ uint64_t epoch;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ assert(f->fsprg_seed);
+
+ if (f->fsprg_state) {
+ /* Cheaper... */
+
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+ if (goal == epoch)
+ return 0;
+
+ if (goal == epoch+1) {
+ FSPRG_Evolve(f->fsprg_state);
+ return 0;
+ }
+ } else {
+ f->fsprg_state_size = FSPRG_stateinbytes(FSPRG_RECOMMENDED_SECPAR);
+ f->fsprg_state = malloc(f->fsprg_state_size);
+
+ if (!f->fsprg_state)
+ return -ENOMEM;
+ }
+
+ log_debug("Seeking FSPRG key to %"PRIu64".", goal);
+
+ msk = alloca(FSPRG_mskinbytes(FSPRG_RECOMMENDED_SECPAR));
+ FSPRG_GenMK(msk, NULL, f->fsprg_seed, f->fsprg_seed_size, FSPRG_RECOMMENDED_SECPAR);
+ FSPRG_Seek(f->fsprg_state, goal, msk, f->fsprg_seed, f->fsprg_seed_size);
+ return 0;
+}
+
+int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
+ int r;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ if (realtime <= 0)
+ realtime = now(CLOCK_REALTIME);
+
+ r = journal_file_fsprg_need_evolve(f, realtime);
+ if (r <= 0)
+ return 0;
+
+ r = journal_file_append_tag(f);
+ if (r < 0)
+ return r;
+
+ r = journal_file_fsprg_evolve(f, realtime);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int journal_file_hmac_put_object(JournalFile *f, ObjectType type, Object *o, uint64_t p) {
+ int r;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ r = journal_file_hmac_start(f);
+ if (r < 0)
+ return r;
+
+ if (!o) {
+ r = journal_file_move_to_object(f, type, p, &o);
+ if (r < 0)
+ return r;
+ } else {
+ if (type > OBJECT_UNUSED && o->object.type != type)
+ return -EBADMSG;
+ }
+
+ gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
+
+ switch (o->object.type) {
+
+ case OBJECT_DATA:
+ /* All but hash and payload are mutable */
+ gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
+ gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
+ break;
+
+ case OBJECT_FIELD:
+ /* Same here */
+ gcry_md_write(f->hmac, &o->field.hash, sizeof(o->field.hash));
+ gcry_md_write(f->hmac, o->field.payload, le64toh(o->object.size) - offsetof(FieldObject, payload));
+ break;
+
+ case OBJECT_ENTRY:
+ /* All */
+ gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
+ break;
+
+ case OBJECT_FIELD_HASH_TABLE:
+ case OBJECT_DATA_HASH_TABLE:
+ case OBJECT_ENTRY_ARRAY:
+ /* Nothing: everything is mutable */
+ break;
+
+ case OBJECT_TAG:
+ /* All but the tag itself */
+ gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
+ gcry_md_write(f->hmac, &o->tag.epoch, sizeof(o->tag.epoch));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int journal_file_hmac_put_header(JournalFile *f) {
+ int r;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ r = journal_file_hmac_start(f);
+ if (r < 0)
+ return r;
+
+ /* All but state+reserved, boot_id, arena_size,
+ * tail_object_offset, n_objects, n_entries,
+ * tail_entry_seqnum, head_entry_seqnum, entry_array_offset,
+ * head_entry_realtime, tail_entry_realtime,
+ * tail_entry_monotonic, n_data, n_fields, n_tags,
+ * n_entry_arrays. */
+
+ gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
+ gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
+ gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
+ gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
+
+ return 0;
+}
+
+int journal_file_fss_load(JournalFile *f) {
+ int r, fd = -1;
+ char *p = NULL;
+ struct stat st;
+ FSSHeader *m = NULL;
+ sd_id128_t machine;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0)
+ return r;
+
+ if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fss",
+ SD_ID128_FORMAT_VAL(machine)) < 0)
+ return -ENOMEM;
+
+ fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ log_error_errno(errno, "Failed to open %s: %m", p);
+
+ r = -errno;
+ goto finish;
+ }
+
+ if (fstat(fd, &st) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (st.st_size < (off_t) sizeof(FSSHeader)) {
+ r = -ENODATA;
+ goto finish;
+ }
+
+ m = mmap(NULL, PAGE_ALIGN(sizeof(FSSHeader)), PROT_READ, MAP_SHARED, fd, 0);
+ if (m == MAP_FAILED) {
+ m = NULL;
+ r = -errno;
+ goto finish;
+ }
+
+ if (memcmp(m->signature, FSS_HEADER_SIGNATURE, 8) != 0) {
+ r = -EBADMSG;
+ goto finish;
+ }
+
+ if (m->incompatible_flags != 0) {
+ r = -EPROTONOSUPPORT;
+ goto finish;
+ }
+
+ if (le64toh(m->header_size) < sizeof(FSSHeader)) {
+ r = -EBADMSG;
+ goto finish;
+ }
+
+ if (le64toh(m->fsprg_state_size) != FSPRG_stateinbytes(le16toh(m->fsprg_secpar))) {
+ r = -EBADMSG;
+ goto finish;
+ }
+
+ f->fss_file_size = le64toh(m->header_size) + le64toh(m->fsprg_state_size);
+ if ((uint64_t) st.st_size < f->fss_file_size) {
+ r = -ENODATA;
+ goto finish;
+ }
+
+ if (!sd_id128_equal(machine, m->machine_id)) {
+ r = -EHOSTDOWN;
+ goto finish;
+ }
+
+ if (le64toh(m->start_usec) <= 0 ||
+ le64toh(m->interval_usec) <= 0) {
+ r = -EBADMSG;
+ goto finish;
+ }
+
+ f->fss_file = mmap(NULL, PAGE_ALIGN(f->fss_file_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (f->fss_file == MAP_FAILED) {
+ f->fss_file = NULL;
+ r = -errno;
+ goto finish;
+ }
+
+ f->fss_start_usec = le64toh(f->fss_file->start_usec);
+ f->fss_interval_usec = le64toh(f->fss_file->interval_usec);
+
+ f->fsprg_state = (uint8_t*) f->fss_file + le64toh(f->fss_file->header_size);
+ f->fsprg_state_size = le64toh(f->fss_file->fsprg_state_size);
+
+ r = 0;
+
+finish:
+ if (m)
+ munmap(m, PAGE_ALIGN(sizeof(FSSHeader)));
+
+ safe_close(fd);
+ free(p);
+
+ return r;
+}
+
+int journal_file_hmac_setup(JournalFile *f) {
+ gcry_error_t e;
+
+ if (!f->seal)
+ return 0;
+
+ initialize_libgcrypt(true);
+
+ e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
+ if (e != 0)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+int journal_file_append_first_tag(JournalFile *f) {
+ int r;
+ uint64_t p;
+
+ if (!f->seal)
+ return 0;
+
+ log_debug("Calculating first tag...");
+
+ r = journal_file_hmac_put_header(f);
+ if (r < 0)
+ return r;
+
+ p = le64toh(f->header->field_hash_table_offset);
+ if (p < offsetof(Object, hash_table.items))
+ return -EINVAL;
+ p -= offsetof(Object, hash_table.items);
+
+ r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, NULL, p);
+ if (r < 0)
+ return r;
+
+ p = le64toh(f->header->data_hash_table_offset);
+ if (p < offsetof(Object, hash_table.items))
+ return -EINVAL;
+ p -= offsetof(Object, hash_table.items);
+
+ r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, NULL, p);
+ if (r < 0)
+ return r;
+
+ r = journal_file_append_tag(f);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int journal_file_parse_verification_key(JournalFile *f, const char *key) {
+ uint8_t *seed;
+ size_t seed_size, c;
+ const char *k;
+ int r;
+ unsigned long long start, interval;
+
+ seed_size = FSPRG_RECOMMENDED_SEEDLEN;
+ seed = malloc(seed_size);
+ if (!seed)
+ return -ENOMEM;
+
+ k = key;
+ for (c = 0; c < seed_size; c++) {
+ int x, y;
+
+ while (*k == '-')
+ k++;
+
+ x = unhexchar(*k);
+ if (x < 0) {
+ free(seed);
+ return -EINVAL;
+ }
+ k++;
+ y = unhexchar(*k);
+ if (y < 0) {
+ free(seed);
+ return -EINVAL;
+ }
+ k++;
+
+ seed[c] = (uint8_t) (x * 16 + y);
+ }
+
+ if (*k != '/') {
+ free(seed);
+ return -EINVAL;
+ }
+ k++;
+
+ r = sscanf(k, "%llx-%llx", &start, &interval);
+ if (r != 2) {
+ free(seed);
+ return -EINVAL;
+ }
+
+ f->fsprg_seed = seed;
+ f->fsprg_seed_size = seed_size;
+
+ f->fss_start_usec = start * interval;
+ f->fss_interval_usec = interval;
+
+ return 0;
+}
+
+bool journal_file_next_evolve_usec(JournalFile *f, usec_t *u) {
+ uint64_t epoch;
+
+ assert(f);
+ assert(u);
+
+ if (!f->seal)
+ return false;
+
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+
+ *u = (usec_t) (f->fss_start_usec + f->fss_interval_usec * epoch + f->fss_interval_usec);
+
+ return true;
+}
diff --git a/src/journal/journal-authenticate.h b/src/journal/journal-authenticate.h
new file mode 100644
index 0000000..2ef0133
--- /dev/null
+++ b/src/journal/journal-authenticate.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "journal-file.h"
+
+int journal_file_append_tag(JournalFile *f);
+int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
+int journal_file_append_first_tag(JournalFile *f);
+
+int journal_file_hmac_setup(JournalFile *f);
+int journal_file_hmac_start(JournalFile *f);
+int journal_file_hmac_put_header(JournalFile *f);
+int journal_file_hmac_put_object(JournalFile *f, ObjectType type, Object *o, uint64_t p);
+
+int journal_file_fss_load(JournalFile *f);
+int journal_file_parse_verification_key(JournalFile *f, const char *key);
+
+int journal_file_fsprg_evolve(JournalFile *f, uint64_t realtime);
+int journal_file_fsprg_seek(JournalFile *f, uint64_t epoch);
+
+bool journal_file_next_evolve_usec(JournalFile *f, usec_t *u);
diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h
new file mode 100644
index 0000000..d68ce38
--- /dev/null
+++ b/src/journal/journal-def.h
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-id128.h"
+
+#include "macro.h"
+#include "sparse-endian.h"
+
+/*
+ * If you change this file you probably should also change its documentation:
+ *
+ * http://www.freedesktop.org/wiki/Software/systemd/journal-files
+ */
+
+typedef struct Header Header;
+
+typedef struct ObjectHeader ObjectHeader;
+typedef union Object Object;
+
+typedef struct DataObject DataObject;
+typedef struct FieldObject FieldObject;
+typedef struct EntryObject EntryObject;
+typedef struct HashTableObject HashTableObject;
+typedef struct EntryArrayObject EntryArrayObject;
+typedef struct TagObject TagObject;
+
+typedef struct EntryItem EntryItem;
+typedef struct HashItem HashItem;
+
+typedef struct FSSHeader FSSHeader;
+
+/* Object types */
+typedef enum ObjectType {
+ OBJECT_UNUSED, /* also serves as "any type" or "additional context" */
+ OBJECT_DATA,
+ OBJECT_FIELD,
+ OBJECT_ENTRY,
+ OBJECT_DATA_HASH_TABLE,
+ OBJECT_FIELD_HASH_TABLE,
+ OBJECT_ENTRY_ARRAY,
+ OBJECT_TAG,
+ _OBJECT_TYPE_MAX
+} ObjectType;
+
+/* Object flags */
+enum {
+ OBJECT_COMPRESSED_XZ = 1 << 0,
+ OBJECT_COMPRESSED_LZ4 = 1 << 1,
+ _OBJECT_COMPRESSED_MAX
+};
+
+#define OBJECT_COMPRESSION_MASK (OBJECT_COMPRESSED_XZ | OBJECT_COMPRESSED_LZ4)
+
+struct ObjectHeader {
+ uint8_t type;
+ uint8_t flags;
+ uint8_t reserved[6];
+ le64_t size;
+ uint8_t payload[];
+} _packed_;
+
+#define DataObject__contents { \
+ ObjectHeader object; \
+ le64_t hash; \
+ le64_t next_hash_offset; \
+ le64_t next_field_offset; \
+ le64_t entry_offset; /* the first array entry we store inline */ \
+ le64_t entry_array_offset; \
+ le64_t n_entries; \
+ uint8_t payload[]; \
+ }
+
+struct DataObject DataObject__contents;
+struct DataObject__packed DataObject__contents _packed_;
+assert_cc(sizeof(struct DataObject) == sizeof(struct DataObject__packed));
+
+struct FieldObject {
+ ObjectHeader object;
+ le64_t hash;
+ le64_t next_hash_offset;
+ le64_t head_data_offset;
+ uint8_t payload[];
+} _packed_;
+
+struct EntryItem {
+ le64_t object_offset;
+ le64_t hash;
+} _packed_;
+
+#define EntryObject__contents { \
+ ObjectHeader object; \
+ le64_t seqnum; \
+ le64_t realtime; \
+ le64_t monotonic; \
+ sd_id128_t boot_id; \
+ le64_t xor_hash; \
+ EntryItem items[]; \
+ }
+
+struct EntryObject EntryObject__contents;
+struct EntryObject__packed EntryObject__contents _packed_;
+assert_cc(sizeof(struct EntryObject) == sizeof(struct EntryObject__packed));
+
+
+struct HashItem {
+ le64_t head_hash_offset;
+ le64_t tail_hash_offset;
+} _packed_;
+
+struct HashTableObject {
+ ObjectHeader object;
+ HashItem items[];
+} _packed_;
+
+struct EntryArrayObject {
+ ObjectHeader object;
+ le64_t next_entry_array_offset;
+ le64_t items[];
+} _packed_;
+
+#define TAG_LENGTH (256/8)
+
+struct TagObject {
+ ObjectHeader object;
+ le64_t seqnum;
+ le64_t epoch;
+ uint8_t tag[TAG_LENGTH]; /* SHA-256 HMAC */
+} _packed_;
+
+union Object {
+ ObjectHeader object;
+ DataObject data;
+ FieldObject field;
+ EntryObject entry;
+ HashTableObject hash_table;
+ EntryArrayObject entry_array;
+ TagObject tag;
+};
+
+enum {
+ STATE_OFFLINE = 0,
+ STATE_ONLINE = 1,
+ STATE_ARCHIVED = 2,
+ _STATE_MAX
+};
+
+/* Header flags */
+enum {
+ HEADER_INCOMPATIBLE_COMPRESSED_XZ = 1 << 0,
+ HEADER_INCOMPATIBLE_COMPRESSED_LZ4 = 1 << 1,
+};
+
+#define HEADER_INCOMPATIBLE_ANY (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+
+#if HAVE_XZ && HAVE_LZ4
+# define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_ANY
+#elif HAVE_XZ
+# define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_COMPRESSED_XZ
+#elif HAVE_LZ4
+# define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_COMPRESSED_LZ4
+#else
+# define HEADER_INCOMPATIBLE_SUPPORTED 0
+#endif
+
+enum {
+ HEADER_COMPATIBLE_SEALED = 1
+};
+
+#define HEADER_COMPATIBLE_ANY HEADER_COMPATIBLE_SEALED
+#if HAVE_GCRYPT
+# define HEADER_COMPATIBLE_SUPPORTED HEADER_COMPATIBLE_SEALED
+#else
+# define HEADER_COMPATIBLE_SUPPORTED 0
+#endif
+
+#define HEADER_SIGNATURE ((char[]) { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' })
+
+#define struct_Header__contents { \
+ uint8_t signature[8]; /* "LPKSHHRH" */ \
+ le32_t compatible_flags; \
+ le32_t incompatible_flags; \
+ uint8_t state; \
+ uint8_t reserved[7]; \
+ sd_id128_t file_id; \
+ sd_id128_t machine_id; \
+ sd_id128_t boot_id; /* last writer */ \
+ sd_id128_t seqnum_id; \
+ le64_t header_size; \
+ le64_t arena_size; \
+ le64_t data_hash_table_offset; \
+ le64_t data_hash_table_size; \
+ le64_t field_hash_table_offset; \
+ le64_t field_hash_table_size; \
+ le64_t tail_object_offset; \
+ le64_t n_objects; \
+ le64_t n_entries; \
+ le64_t tail_entry_seqnum; \
+ le64_t head_entry_seqnum; \
+ le64_t entry_array_offset; \
+ le64_t head_entry_realtime; \
+ le64_t tail_entry_realtime; \
+ le64_t tail_entry_monotonic; \
+ /* Added in 187 */ \
+ le64_t n_data; \
+ le64_t n_fields; \
+ /* Added in 189 */ \
+ le64_t n_tags; \
+ le64_t n_entry_arrays; \
+ }
+
+struct Header struct_Header__contents;
+struct Header__packed struct_Header__contents _packed_;
+assert_cc(sizeof(struct Header) == sizeof(struct Header__packed));
+assert_cc(sizeof(struct Header) == 240);
+
+#define FSS_HEADER_SIGNATURE ((char[]) { 'K', 'S', 'H', 'H', 'R', 'H', 'L', 'P' })
+
+struct FSSHeader {
+ uint8_t signature[8]; /* "KSHHRHLP" */
+ le32_t compatible_flags;
+ le32_t incompatible_flags;
+ sd_id128_t machine_id;
+ sd_id128_t boot_id; /* last writer */
+ le64_t header_size;
+ le64_t start_usec;
+ le64_t interval_usec;
+ le16_t fsprg_secpar;
+ le16_t reserved[3];
+ le64_t fsprg_state_size;
+} _packed_;
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
new file mode 100644
index 0000000..56827f9
--- /dev/null
+++ b/src/journal/journal-file.c
@@ -0,0 +1,3908 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/statvfs.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "compress.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "journal-authenticate.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "lookup3.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "random-util.h"
+#include "set.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "xattr-util.h"
+
+#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
+#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
+
+#define DEFAULT_COMPRESS_THRESHOLD (512ULL)
+#define MIN_COMPRESS_THRESHOLD (8ULL)
+
+/* This is the minimum journal file size */
+#define JOURNAL_FILE_SIZE_MIN (512ULL*1024ULL) /* 512 KiB */
+
+/* These are the lower and upper bounds if we deduce the max_use value
+ * from the file system size */
+#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
+#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
+
+/* This is the default minimal use limit, how much we'll use even if keep_free suggests otherwise. */
+#define DEFAULT_MIN_USE (1ULL*1024ULL*1024ULL) /* 1 MiB */
+
+/* This is the upper bound if we deduce max_size from max_use */
+#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
+
+/* This is the upper bound if we deduce the keep_free value from the
+ * file system size */
+#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
+
+/* This is the keep_free value when we can't determine the system
+ * size */
+#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
+
+/* This is the default maximum number of journal files to keep around. */
+#define DEFAULT_N_MAX_FILES (100)
+
+/* n_data was the first entry we added after the initial file format design */
+#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
+
+/* How many entries to keep in the entry array chain cache at max */
+#define CHAIN_CACHE_MAX 20
+
+/* How much to increase the journal file size at once each time we allocate something new. */
+#define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL) /* 8MB */
+
+/* Reread fstat() of the file for detecting deletions at least this often */
+#define LAST_STAT_REFRESH_USEC (5*USEC_PER_SEC)
+
+/* The mmap context to use for the header we pick as one above the last defined typed */
+#define CONTEXT_HEADER _OBJECT_TYPE_MAX
+
+#ifdef __clang__
+# pragma GCC diagnostic ignored "-Waddress-of-packed-member"
+#endif
+
+/* This may be called from a separate thread to prevent blocking the caller for the duration of fsync().
+ * As a result we use atomic operations on f->offline_state for inter-thread communications with
+ * journal_file_set_offline() and journal_file_set_online(). */
+static void journal_file_set_offline_internal(JournalFile *f) {
+ assert(f);
+ assert(f->fd >= 0);
+ assert(f->header);
+
+ for (;;) {
+ switch (f->offline_state) {
+ case OFFLINE_CANCEL:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_CANCEL, OFFLINE_DONE))
+ continue;
+ return;
+
+ case OFFLINE_AGAIN_FROM_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_SYNCING))
+ continue;
+ break;
+
+ case OFFLINE_AGAIN_FROM_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_SYNCING))
+ continue;
+ break;
+
+ case OFFLINE_SYNCING:
+ (void) fsync(f->fd);
+
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_OFFLINING))
+ continue;
+
+ f->header->state = f->archive ? STATE_ARCHIVED : STATE_OFFLINE;
+ (void) fsync(f->fd);
+ break;
+
+ case OFFLINE_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_OFFLINING, OFFLINE_DONE))
+ continue;
+ _fallthrough_;
+ case OFFLINE_DONE:
+ return;
+
+ case OFFLINE_JOINED:
+ log_debug("OFFLINE_JOINED unexpected offline state for journal_file_set_offline_internal()");
+ return;
+ }
+ }
+}
+
+static void * journal_file_set_offline_thread(void *arg) {
+ JournalFile *f = arg;
+
+ (void) pthread_setname_np(pthread_self(), "journal-offline");
+
+ journal_file_set_offline_internal(f);
+
+ return NULL;
+}
+
+static int journal_file_set_offline_thread_join(JournalFile *f) {
+ int r;
+
+ assert(f);
+
+ if (f->offline_state == OFFLINE_JOINED)
+ return 0;
+
+ r = pthread_join(f->offline_thread, NULL);
+ if (r)
+ return -r;
+
+ f->offline_state = OFFLINE_JOINED;
+
+ if (mmap_cache_got_sigbus(f->mmap, f->cache_fd))
+ return -EIO;
+
+ return 0;
+}
+
+/* Trigger a restart if the offline thread is mid-flight in a restartable state. */
+static bool journal_file_set_offline_try_restart(JournalFile *f) {
+ for (;;) {
+ switch (f->offline_state) {
+ case OFFLINE_AGAIN_FROM_SYNCING:
+ case OFFLINE_AGAIN_FROM_OFFLINING:
+ return true;
+
+ case OFFLINE_CANCEL:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_CANCEL, OFFLINE_AGAIN_FROM_SYNCING))
+ continue;
+ return true;
+
+ case OFFLINE_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_AGAIN_FROM_SYNCING))
+ continue;
+ return true;
+
+ case OFFLINE_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_OFFLINING, OFFLINE_AGAIN_FROM_OFFLINING))
+ continue;
+ return true;
+
+ default:
+ return false;
+ }
+ }
+}
+
+/* Sets a journal offline.
+ *
+ * If wait is false then an offline is dispatched in a separate thread for a
+ * subsequent journal_file_set_offline() or journal_file_set_online() of the
+ * same journal to synchronize with.
+ *
+ * If wait is true, then either an existing offline thread will be restarted
+ * and joined, or if none exists the offline is simply performed in this
+ * context without involving another thread.
+ */
+int journal_file_set_offline(JournalFile *f, bool wait) {
+ bool restarted;
+ int r;
+
+ assert(f);
+
+ if (!f->writable)
+ return -EPERM;
+
+ if (f->fd < 0 || !f->header)
+ return -EINVAL;
+
+ /* An offlining journal is implicitly online and may modify f->header->state,
+ * we must also join any potentially lingering offline thread when not online. */
+ if (!journal_file_is_offlining(f) && f->header->state != STATE_ONLINE)
+ return journal_file_set_offline_thread_join(f);
+
+ /* Restart an in-flight offline thread and wait if needed, or join a lingering done one. */
+ restarted = journal_file_set_offline_try_restart(f);
+ if ((restarted && wait) || !restarted) {
+ r = journal_file_set_offline_thread_join(f);
+ if (r < 0)
+ return r;
+ }
+
+ if (restarted)
+ return 0;
+
+ /* Initiate a new offline. */
+ f->offline_state = OFFLINE_SYNCING;
+
+ if (wait) /* Without using a thread if waiting. */
+ journal_file_set_offline_internal(f);
+ else {
+ sigset_t ss, saved_ss;
+ int k;
+
+ assert_se(sigfillset(&ss) >= 0);
+
+ r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss);
+ if (r > 0)
+ return -r;
+
+ r = pthread_create(&f->offline_thread, NULL, journal_file_set_offline_thread, f);
+
+ k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL);
+ if (r > 0) {
+ f->offline_state = OFFLINE_JOINED;
+ return -r;
+ }
+ if (k > 0)
+ return -k;
+ }
+
+ return 0;
+}
+
+static int journal_file_set_online(JournalFile *f) {
+ bool wait = true;
+
+ assert(f);
+
+ if (!f->writable)
+ return -EPERM;
+
+ if (f->fd < 0 || !f->header)
+ return -EINVAL;
+
+ while (wait) {
+ switch (f->offline_state) {
+ case OFFLINE_JOINED:
+ /* No offline thread, no need to wait. */
+ wait = false;
+ break;
+
+ case OFFLINE_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_CANCEL))
+ continue;
+ /* Canceled syncing prior to offlining, no need to wait. */
+ wait = false;
+ break;
+
+ case OFFLINE_AGAIN_FROM_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_CANCEL))
+ continue;
+ /* Canceled restart from syncing, no need to wait. */
+ wait = false;
+ break;
+
+ case OFFLINE_AGAIN_FROM_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_CANCEL))
+ continue;
+ /* Canceled restart from offlining, must wait for offlining to complete however. */
+ _fallthrough_;
+ default: {
+ int r;
+
+ r = journal_file_set_offline_thread_join(f);
+ if (r < 0)
+ return r;
+
+ wait = false;
+ break;
+ }
+ }
+ }
+
+ if (mmap_cache_got_sigbus(f->mmap, f->cache_fd))
+ return -EIO;
+
+ switch (f->header->state) {
+ case STATE_ONLINE:
+ return 0;
+
+ case STATE_OFFLINE:
+ f->header->state = STATE_ONLINE;
+ (void) fsync(f->fd);
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+bool journal_file_is_offlining(JournalFile *f) {
+ assert(f);
+
+ __sync_synchronize();
+
+ if (IN_SET(f->offline_state, OFFLINE_DONE, OFFLINE_JOINED))
+ return false;
+
+ return true;
+}
+
+JournalFile* journal_file_close(JournalFile *f) {
+ assert(f);
+
+#if HAVE_GCRYPT
+ /* Write the final tag */
+ if (f->seal && f->writable) {
+ int r;
+
+ r = journal_file_append_tag(f);
+ if (r < 0)
+ log_error_errno(r, "Failed to append tag when closing journal: %m");
+ }
+#endif
+
+ if (f->post_change_timer) {
+ if (sd_event_source_get_enabled(f->post_change_timer, NULL) > 0)
+ journal_file_post_change(f);
+
+ (void) sd_event_source_set_enabled(f->post_change_timer, SD_EVENT_OFF);
+ sd_event_source_unref(f->post_change_timer);
+ }
+
+ journal_file_set_offline(f, true);
+
+ if (f->mmap && f->cache_fd)
+ mmap_cache_free_fd(f->mmap, f->cache_fd);
+
+ if (f->fd >= 0 && f->defrag_on_close) {
+
+ /* Be friendly to btrfs: turn COW back on again now,
+ * and defragment the file. We won't write to the file
+ * ever again, hence remove all fragmentation, and
+ * reenable all the good bits COW usually provides
+ * (such as data checksumming). */
+
+ (void) chattr_fd(f->fd, 0, FS_NOCOW_FL, NULL);
+ (void) btrfs_defrag_fd(f->fd);
+ }
+
+ if (f->close_fd)
+ safe_close(f->fd);
+ free(f->path);
+
+ mmap_cache_unref(f->mmap);
+
+ ordered_hashmap_free_free(f->chain_cache);
+
+#if HAVE_XZ || HAVE_LZ4
+ free(f->compress_buffer);
+#endif
+
+#if HAVE_GCRYPT
+ if (f->fss_file)
+ munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
+ else
+ free(f->fsprg_state);
+
+ free(f->fsprg_seed);
+
+ if (f->hmac)
+ gcry_md_close(f->hmac);
+#endif
+
+ return mfree(f);
+}
+
+static int journal_file_init_header(JournalFile *f, JournalFile *template) {
+ Header h = {};
+ ssize_t k;
+ int r;
+
+ assert(f);
+
+ memcpy(h.signature, HEADER_SIGNATURE, 8);
+ h.header_size = htole64(ALIGN64(sizeof(h)));
+
+ h.incompatible_flags |= htole32(
+ f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
+ f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
+
+ h.compatible_flags = htole32(
+ f->seal * HEADER_COMPATIBLE_SEALED);
+
+ r = sd_id128_randomize(&h.file_id);
+ if (r < 0)
+ return r;
+
+ if (template) {
+ h.seqnum_id = template->header->seqnum_id;
+ h.tail_entry_seqnum = template->header->tail_entry_seqnum;
+ } else
+ h.seqnum_id = h.file_id;
+
+ k = pwrite(f->fd, &h, sizeof(h), 0);
+ if (k < 0)
+ return -errno;
+
+ if (k != sizeof(h))
+ return -EIO;
+
+ return 0;
+}
+
+static int journal_file_refresh_header(JournalFile *f) {
+ sd_id128_t boot_id;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ r = sd_id128_get_machine(&f->header->machine_id);
+ if (IN_SET(r, -ENOENT, -ENOMEDIUM))
+ /* We don't have a machine-id, let's continue without */
+ zero(f->header->machine_id);
+ else if (r < 0)
+ return r;
+
+ r = sd_id128_get_boot(&boot_id);
+ if (r < 0)
+ return r;
+
+ f->header->boot_id = boot_id;
+
+ r = journal_file_set_online(f);
+
+ /* Sync the online state to disk */
+ (void) fsync(f->fd);
+
+ /* We likely just created a new file, also sync the directory this file is located in. */
+ (void) fsync_directory_of_file(f->fd);
+
+ return r;
+}
+
+static bool warn_wrong_flags(const JournalFile *f, bool compatible) {
+ const uint32_t any = compatible ? HEADER_COMPATIBLE_ANY : HEADER_INCOMPATIBLE_ANY,
+ supported = compatible ? HEADER_COMPATIBLE_SUPPORTED : HEADER_INCOMPATIBLE_SUPPORTED;
+ const char *type = compatible ? "compatible" : "incompatible";
+ uint32_t flags;
+
+ flags = le32toh(compatible ? f->header->compatible_flags : f->header->incompatible_flags);
+
+ if (flags & ~supported) {
+ if (flags & ~any)
+ log_debug("Journal file %s has unknown %s flags 0x%"PRIx32,
+ f->path, type, flags & ~any);
+ flags = (flags & any) & ~supported;
+ if (flags) {
+ const char* strv[3];
+ unsigned n = 0;
+ _cleanup_free_ char *t = NULL;
+
+ if (compatible && (flags & HEADER_COMPATIBLE_SEALED))
+ strv[n++] = "sealed";
+ if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ))
+ strv[n++] = "xz-compressed";
+ if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4))
+ strv[n++] = "lz4-compressed";
+ strv[n] = NULL;
+ assert(n < ELEMENTSOF(strv));
+
+ t = strv_join((char**) strv, ", ");
+ log_debug("Journal file %s uses %s %s %s disabled at compilation time.",
+ f->path, type, n > 1 ? "flags" : "flag", strnull(t));
+ }
+ return true;
+ }
+
+ return false;
+}
+
+static int journal_file_verify_header(JournalFile *f) {
+ uint64_t arena_size, header_size;
+
+ assert(f);
+ assert(f->header);
+
+ if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
+ return -EBADMSG;
+
+ /* In both read and write mode we refuse to open files with incompatible
+ * flags we don't know. */
+ if (warn_wrong_flags(f, false))
+ return -EPROTONOSUPPORT;
+
+ /* When open for writing we refuse to open files with compatible flags, too. */
+ if (f->writable && warn_wrong_flags(f, true))
+ return -EPROTONOSUPPORT;
+
+ if (f->header->state >= _STATE_MAX)
+ return -EBADMSG;
+
+ header_size = le64toh(f->header->header_size);
+
+ /* The first addition was n_data, so check that we are at least this large */
+ if (header_size < HEADER_SIZE_MIN)
+ return -EBADMSG;
+
+ if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
+ return -EBADMSG;
+
+ arena_size = le64toh(f->header->arena_size);
+
+ if (UINT64_MAX - header_size < arena_size || header_size + arena_size > (uint64_t) f->last_stat.st_size)
+ return -ENODATA;
+
+ if (le64toh(f->header->tail_object_offset) > header_size + arena_size)
+ return -ENODATA;
+
+ if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
+ !VALID64(le64toh(f->header->field_hash_table_offset)) ||
+ !VALID64(le64toh(f->header->tail_object_offset)) ||
+ !VALID64(le64toh(f->header->entry_array_offset)))
+ return -ENODATA;
+
+ if (f->writable) {
+ sd_id128_t machine_id;
+ uint8_t state;
+ int r;
+
+ r = sd_id128_get_machine(&machine_id);
+ if (r < 0)
+ return r;
+
+ if (!sd_id128_equal(machine_id, f->header->machine_id))
+ return -EHOSTDOWN;
+
+ state = f->header->state;
+
+ if (state == STATE_ARCHIVED)
+ return -ESHUTDOWN; /* Already archived */
+ else if (state == STATE_ONLINE)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBUSY),
+ "Journal file %s is already online. Assuming unclean closing.",
+ f->path);
+ else if (state != STATE_OFFLINE)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBUSY),
+ "Journal file %s has unknown state %i.",
+ f->path, state);
+
+ if (f->header->field_hash_table_size == 0 || f->header->data_hash_table_size == 0)
+ return -EBADMSG;
+
+ /* Don't permit appending to files from the future. Because otherwise the realtime timestamps wouldn't
+ * be strictly ordered in the entries in the file anymore, and we can't have that since it breaks
+ * bisection. */
+ if (le64toh(f->header->tail_entry_realtime) > now(CLOCK_REALTIME))
+ return log_debug_errno(SYNTHETIC_ERRNO(ETXTBSY),
+ "Journal file %s is from the future, refusing to append new data to it that'd be older.",
+ f->path);
+ }
+
+ f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
+ f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
+
+ f->seal = JOURNAL_HEADER_SEALED(f->header);
+
+ return 0;
+}
+
+static int journal_file_fstat(JournalFile *f) {
+ int r;
+
+ assert(f);
+ assert(f->fd >= 0);
+
+ if (fstat(f->fd, &f->last_stat) < 0)
+ return -errno;
+
+ f->last_stat_usec = now(CLOCK_MONOTONIC);
+
+ /* Refuse dealing with with files that aren't regular */
+ r = stat_verify_regular(&f->last_stat);
+ if (r < 0)
+ return r;
+
+ /* Refuse appending to files that are already deleted */
+ if (f->last_stat.st_nlink <= 0)
+ return -EIDRM;
+
+ return 0;
+}
+
+static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
+ uint64_t old_size, new_size;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ /* We assume that this file is not sparse, and we know that
+ * for sure, since we always call posix_fallocate()
+ * ourselves */
+
+ if (mmap_cache_got_sigbus(f->mmap, f->cache_fd))
+ return -EIO;
+
+ old_size =
+ le64toh(f->header->header_size) +
+ le64toh(f->header->arena_size);
+
+ new_size = PAGE_ALIGN(offset + size);
+ if (new_size < le64toh(f->header->header_size))
+ new_size = le64toh(f->header->header_size);
+
+ if (new_size <= old_size) {
+
+ /* We already pre-allocated enough space, but before
+ * we write to it, let's check with fstat() if the
+ * file got deleted, in order make sure we don't throw
+ * away the data immediately. Don't check fstat() for
+ * all writes though, but only once ever 10s. */
+
+ if (f->last_stat_usec + LAST_STAT_REFRESH_USEC > now(CLOCK_MONOTONIC))
+ return 0;
+
+ return journal_file_fstat(f);
+ }
+
+ /* Allocate more space. */
+
+ if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
+ return -E2BIG;
+
+ if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
+ struct statvfs svfs;
+
+ if (fstatvfs(f->fd, &svfs) >= 0) {
+ uint64_t available;
+
+ available = LESS_BY((uint64_t) svfs.f_bfree * (uint64_t) svfs.f_bsize, f->metrics.keep_free);
+
+ if (new_size - old_size > available)
+ return -E2BIG;
+ }
+ }
+
+ /* Increase by larger blocks at once */
+ new_size = DIV_ROUND_UP(new_size, FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
+ if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
+ new_size = f->metrics.max_size;
+
+ /* Note that the glibc fallocate() fallback is very
+ inefficient, hence we try to minimize the allocation area
+ as we can. */
+ r = posix_fallocate(f->fd, old_size, new_size - old_size);
+ if (r != 0)
+ return -r;
+
+ f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
+
+ return journal_file_fstat(f);
+}
+
+static unsigned type_to_context(ObjectType type) {
+ /* One context for each type, plus one catch-all for the rest */
+ assert_cc(_OBJECT_TYPE_MAX <= MMAP_CACHE_MAX_CONTEXTS);
+ assert_cc(CONTEXT_HEADER < MMAP_CACHE_MAX_CONTEXTS);
+ return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;
+}
+
+static int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_always, uint64_t offset, uint64_t size, void **ret, size_t *ret_size) {
+ int r;
+
+ assert(f);
+ assert(ret);
+
+ if (size <= 0)
+ return -EINVAL;
+
+ /* Avoid SIGBUS on invalid accesses */
+ if (offset + size > (uint64_t) f->last_stat.st_size) {
+ /* Hmm, out of range? Let's refresh the fstat() data
+ * first, before we trust that check. */
+
+ r = journal_file_fstat(f);
+ if (r < 0)
+ return r;
+
+ if (offset + size > (uint64_t) f->last_stat.st_size)
+ return -EADDRNOTAVAIL;
+ }
+
+ return mmap_cache_get(f->mmap, f->cache_fd, f->prot, type_to_context(type), keep_always, offset, size, &f->last_stat, ret, ret_size);
+}
+
+static uint64_t minimum_header_size(Object *o) {
+
+ static const uint64_t table[] = {
+ [OBJECT_DATA] = sizeof(DataObject),
+ [OBJECT_FIELD] = sizeof(FieldObject),
+ [OBJECT_ENTRY] = sizeof(EntryObject),
+ [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
+ [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
+ [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
+ [OBJECT_TAG] = sizeof(TagObject),
+ };
+
+ if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
+ return sizeof(ObjectHeader);
+
+ return table[o->object.type];
+}
+
+/* Lightweight object checks. We want this to be fast, so that we won't
+ * slowdown every journal_file_move_to_object() call too much. */
+static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o) {
+ assert(f);
+ assert(o);
+
+ switch (o->object.type) {
+
+ case OBJECT_DATA: {
+ if ((le64toh(o->data.entry_offset) == 0) ^ (le64toh(o->data.n_entries) == 0))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Bad n_entries: %" PRIu64 ": %" PRIu64,
+ le64toh(o->data.n_entries),
+ offset);
+
+ if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Bad object size (<= %zu): %" PRIu64 ": %" PRIu64,
+ offsetof(DataObject, payload),
+ le64toh(o->object.size),
+ offset);
+
+ if (!VALID64(le64toh(o->data.next_hash_offset)) ||
+ !VALID64(le64toh(o->data.next_field_offset)) ||
+ !VALID64(le64toh(o->data.entry_offset)) ||
+ !VALID64(le64toh(o->data.entry_array_offset)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid offset, next_hash_offset=" OFSfmt ", next_field_offset=" OFSfmt ", entry_offset=" OFSfmt ", entry_array_offset=" OFSfmt ": %" PRIu64,
+ le64toh(o->data.next_hash_offset),
+ le64toh(o->data.next_field_offset),
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset),
+ offset);
+
+ break;
+ }
+
+ case OBJECT_FIELD:
+ if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Bad field size (<= %zu): %" PRIu64 ": %" PRIu64,
+ offsetof(FieldObject, payload),
+ le64toh(o->object.size),
+ offset);
+
+ if (!VALID64(le64toh(o->field.next_hash_offset)) ||
+ !VALID64(le64toh(o->field.head_data_offset)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid offset, next_hash_offset=" OFSfmt ", head_data_offset=" OFSfmt ": %" PRIu64,
+ le64toh(o->field.next_hash_offset),
+ le64toh(o->field.head_data_offset),
+ offset);
+ break;
+
+ case OBJECT_ENTRY:
+ if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Bad entry size (<= %zu): %" PRIu64 ": %" PRIu64,
+ offsetof(EntryObject, items),
+ le64toh(o->object.size),
+ offset);
+
+ if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid number items in entry: %" PRIu64 ": %" PRIu64,
+ (le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem),
+ offset);
+
+ if (le64toh(o->entry.seqnum) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid entry seqnum: %" PRIx64 ": %" PRIu64,
+ le64toh(o->entry.seqnum),
+ offset);
+
+ if (!VALID_REALTIME(le64toh(o->entry.realtime)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid entry realtime timestamp: %" PRIu64 ": %" PRIu64,
+ le64toh(o->entry.realtime),
+ offset);
+
+ if (!VALID_MONOTONIC(le64toh(o->entry.monotonic)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid entry monotonic timestamp: %" PRIu64 ": %" PRIu64,
+ le64toh(o->entry.monotonic),
+ offset);
+
+ break;
+
+ case OBJECT_DATA_HASH_TABLE:
+ case OBJECT_FIELD_HASH_TABLE:
+ if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||
+ (le64toh(o->object.size) - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid %s hash table size: %" PRIu64 ": %" PRIu64,
+ o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
+ le64toh(o->object.size),
+ offset);
+
+ break;
+
+ case OBJECT_ENTRY_ARRAY:
+ if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||
+ (le64toh(o->object.size) - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object entry array size: %" PRIu64 ": %" PRIu64,
+ le64toh(o->object.size),
+ offset);
+
+ if (!VALID64(le64toh(o->entry_array.next_entry_array_offset)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object entry array next_entry_array_offset: " OFSfmt ": %" PRIu64,
+ le64toh(o->entry_array.next_entry_array_offset),
+ offset);
+
+ break;
+
+ case OBJECT_TAG:
+ if (le64toh(o->object.size) != sizeof(TagObject))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object tag size: %" PRIu64 ": %" PRIu64,
+ le64toh(o->object.size),
+ offset);
+
+ if (!VALID_EPOCH(le64toh(o->tag.epoch)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object tag epoch: %" PRIu64 ": %" PRIu64,
+ le64toh(o->tag.epoch), offset);
+
+ break;
+ }
+
+ return 0;
+}
+
+int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret) {
+ int r;
+ void *t;
+ size_t tsize;
+ Object *o;
+ uint64_t s;
+
+ assert(f);
+ assert(ret);
+
+ /* Objects may only be located at multiple of 64 bit */
+ if (!VALID64(offset))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to object at non-64bit boundary: %" PRIu64,
+ offset);
+
+ /* Object may not be located in the file header */
+ if (offset < le64toh(f->header->header_size))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to object located in file header: %" PRIu64,
+ offset);
+
+ r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), &t, &tsize);
+ if (r < 0)
+ return r;
+
+ o = (Object*) t;
+ s = le64toh(o->object.size);
+
+ if (s == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to uninitialized object: %" PRIu64,
+ offset);
+ if (s < sizeof(ObjectHeader))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to overly short object: %" PRIu64,
+ offset);
+
+ if (o->object.type <= OBJECT_UNUSED)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to object with invalid type: %" PRIu64,
+ offset);
+
+ if (s < minimum_header_size(o))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to truncated object: %" PRIu64,
+ offset);
+
+ if (type > OBJECT_UNUSED && o->object.type != type)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to object of unexpected type: %" PRIu64,
+ offset);
+
+ if (s > tsize) {
+ r = journal_file_move_to(f, type, false, offset, s, &t, NULL);
+ if (r < 0)
+ return r;
+
+ o = (Object*) t;
+ }
+
+ r = journal_file_check_object(f, offset, o);
+ if (r < 0)
+ return r;
+
+ *ret = o;
+ return 0;
+}
+
+static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
+ uint64_t r;
+
+ assert(f);
+ assert(f->header);
+
+ r = le64toh(f->header->tail_entry_seqnum) + 1;
+
+ if (seqnum) {
+ /* If an external seqnum counter was passed, we update
+ * both the local and the external one, and set it to
+ * the maximum of both */
+
+ if (*seqnum + 1 > r)
+ r = *seqnum + 1;
+
+ *seqnum = r;
+ }
+
+ f->header->tail_entry_seqnum = htole64(r);
+
+ if (f->header->head_entry_seqnum == 0)
+ f->header->head_entry_seqnum = htole64(r);
+
+ return r;
+}
+
+int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *offset) {
+ int r;
+ uint64_t p;
+ Object *tail, *o;
+ void *t;
+
+ assert(f);
+ assert(f->header);
+ assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
+ assert(size >= sizeof(ObjectHeader));
+ assert(offset);
+ assert(ret);
+
+ r = journal_file_set_online(f);
+ if (r < 0)
+ return r;
+
+ p = le64toh(f->header->tail_object_offset);
+ if (p == 0)
+ p = le64toh(f->header->header_size);
+ else {
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);
+ if (r < 0)
+ return r;
+
+ p += ALIGN64(le64toh(tail->object.size));
+ }
+
+ r = journal_file_allocate(f, p, size);
+ if (r < 0)
+ return r;
+
+ r = journal_file_move_to(f, type, false, p, size, &t, NULL);
+ if (r < 0)
+ return r;
+
+ o = (Object*) t;
+
+ zero(o->object);
+ o->object.type = type;
+ o->object.size = htole64(size);
+
+ f->header->tail_object_offset = htole64(p);
+ f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
+
+ *ret = o;
+ *offset = p;
+
+ return 0;
+}
+
+static int journal_file_setup_data_hash_table(JournalFile *f) {
+ uint64_t s, p;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ /* We estimate that we need 1 hash table entry per 768 bytes
+ of journal file and we want to make sure we never get
+ beyond 75% fill level. Calculate the hash table size for
+ the maximum file size based on these metrics. */
+
+ s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
+ if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
+ s = DEFAULT_DATA_HASH_TABLE_SIZE;
+
+ log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
+
+ r = journal_file_append_object(f,
+ OBJECT_DATA_HASH_TABLE,
+ offsetof(Object, hash_table.items) + s,
+ &o, &p);
+ if (r < 0)
+ return r;
+
+ memzero(o->hash_table.items, s);
+
+ f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
+ f->header->data_hash_table_size = htole64(s);
+
+ return 0;
+}
+
+static int journal_file_setup_field_hash_table(JournalFile *f) {
+ uint64_t s, p;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ /* We use a fixed size hash table for the fields as this
+ * number should grow very slowly only */
+
+ s = DEFAULT_FIELD_HASH_TABLE_SIZE;
+ r = journal_file_append_object(f,
+ OBJECT_FIELD_HASH_TABLE,
+ offsetof(Object, hash_table.items) + s,
+ &o, &p);
+ if (r < 0)
+ return r;
+
+ memzero(o->hash_table.items, s);
+
+ f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
+ f->header->field_hash_table_size = htole64(s);
+
+ return 0;
+}
+
+int journal_file_map_data_hash_table(JournalFile *f) {
+ uint64_t s, p;
+ void *t;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ if (f->data_hash_table)
+ return 0;
+
+ p = le64toh(f->header->data_hash_table_offset);
+ s = le64toh(f->header->data_hash_table_size);
+
+ r = journal_file_move_to(f,
+ OBJECT_DATA_HASH_TABLE,
+ true,
+ p, s,
+ &t, NULL);
+ if (r < 0)
+ return r;
+
+ f->data_hash_table = t;
+ return 0;
+}
+
+int journal_file_map_field_hash_table(JournalFile *f) {
+ uint64_t s, p;
+ void *t;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ if (f->field_hash_table)
+ return 0;
+
+ p = le64toh(f->header->field_hash_table_offset);
+ s = le64toh(f->header->field_hash_table_size);
+
+ r = journal_file_move_to(f,
+ OBJECT_FIELD_HASH_TABLE,
+ true,
+ p, s,
+ &t, NULL);
+ if (r < 0)
+ return r;
+
+ f->field_hash_table = t;
+ return 0;
+}
+
+static int journal_file_link_field(
+ JournalFile *f,
+ Object *o,
+ uint64_t offset,
+ uint64_t hash) {
+
+ uint64_t p, h, m;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(f->field_hash_table);
+ assert(o);
+ assert(offset > 0);
+
+ if (o->object.type != OBJECT_FIELD)
+ return -EINVAL;
+
+ m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
+ if (m <= 0)
+ return -EBADMSG;
+
+ /* This might alter the window we are looking at */
+ o->field.next_hash_offset = o->field.head_data_offset = 0;
+
+ h = hash % m;
+ p = le64toh(f->field_hash_table[h].tail_hash_offset);
+ if (p == 0)
+ f->field_hash_table[h].head_hash_offset = htole64(offset);
+ else {
+ r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
+ if (r < 0)
+ return r;
+
+ o->field.next_hash_offset = htole64(offset);
+ }
+
+ f->field_hash_table[h].tail_hash_offset = htole64(offset);
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+ f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
+
+ return 0;
+}
+
+static int journal_file_link_data(
+ JournalFile *f,
+ Object *o,
+ uint64_t offset,
+ uint64_t hash) {
+
+ uint64_t p, h, m;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(f->data_hash_table);
+ assert(o);
+ assert(offset > 0);
+
+ if (o->object.type != OBJECT_DATA)
+ return -EINVAL;
+
+ m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+ if (m <= 0)
+ return -EBADMSG;
+
+ /* This might alter the window we are looking at */
+ o->data.next_hash_offset = o->data.next_field_offset = 0;
+ o->data.entry_offset = o->data.entry_array_offset = 0;
+ o->data.n_entries = 0;
+
+ h = hash % m;
+ p = le64toh(f->data_hash_table[h].tail_hash_offset);
+ if (p == 0)
+ /* Only entry in the hash table is easy */
+ f->data_hash_table[h].head_hash_offset = htole64(offset);
+ else {
+ /* Move back to the previous data object, to patch in
+ * pointer */
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ o->data.next_hash_offset = htole64(offset);
+ }
+
+ f->data_hash_table[h].tail_hash_offset = htole64(offset);
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+ f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
+
+ return 0;
+}
+
+int journal_file_find_field_object_with_hash(
+ JournalFile *f,
+ const void *field, uint64_t size, uint64_t hash,
+ Object **ret, uint64_t *offset) {
+
+ uint64_t p, osize, h, m;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(field && size > 0);
+
+ /* If the field hash table is empty, we can't find anything */
+ if (le64toh(f->header->field_hash_table_size) <= 0)
+ return 0;
+
+ /* Map the field hash table, if it isn't mapped yet. */
+ r = journal_file_map_field_hash_table(f);
+ if (r < 0)
+ return r;
+
+ osize = offsetof(Object, field.payload) + size;
+
+ m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
+ if (m <= 0)
+ return -EBADMSG;
+
+ h = hash % m;
+ p = le64toh(f->field_hash_table[h].head_hash_offset);
+
+ while (p > 0) {
+ Object *o;
+
+ r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le64toh(o->field.hash) == hash &&
+ le64toh(o->object.size) == osize &&
+ memcmp(o->field.payload, field, size) == 0) {
+
+ if (ret)
+ *ret = o;
+ if (offset)
+ *offset = p;
+
+ return 1;
+ }
+
+ p = le64toh(o->field.next_hash_offset);
+ }
+
+ return 0;
+}
+
+int journal_file_find_field_object(
+ JournalFile *f,
+ const void *field, uint64_t size,
+ Object **ret, uint64_t *offset) {
+
+ uint64_t hash;
+
+ assert(f);
+ assert(field && size > 0);
+
+ hash = hash64(field, size);
+
+ return journal_file_find_field_object_with_hash(f,
+ field, size, hash,
+ ret, offset);
+}
+
+int journal_file_find_data_object_with_hash(
+ JournalFile *f,
+ const void *data, uint64_t size, uint64_t hash,
+ Object **ret, uint64_t *offset) {
+
+ uint64_t p, osize, h, m;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(data || size == 0);
+
+ /* If there's no data hash table, then there's no entry. */
+ if (le64toh(f->header->data_hash_table_size) <= 0)
+ return 0;
+
+ /* Map the data hash table, if it isn't mapped yet. */
+ r = journal_file_map_data_hash_table(f);
+ if (r < 0)
+ return r;
+
+ osize = offsetof(Object, data.payload) + size;
+
+ m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+ if (m <= 0)
+ return -EBADMSG;
+
+ h = hash % m;
+ p = le64toh(f->data_hash_table[h].head_hash_offset);
+
+ while (p > 0) {
+ Object *o;
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le64toh(o->data.hash) != hash)
+ goto next;
+
+ if (o->object.flags & OBJECT_COMPRESSION_MASK) {
+#if HAVE_XZ || HAVE_LZ4
+ uint64_t l;
+ size_t rsize = 0;
+
+ l = le64toh(o->object.size);
+ if (l <= offsetof(Object, data.payload))
+ return -EBADMSG;
+
+ l -= offsetof(Object, data.payload);
+
+ r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
+ o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0);
+ if (r < 0)
+ return r;
+
+ if (rsize == size &&
+ memcmp(f->compress_buffer, data, size) == 0) {
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = p;
+
+ return 1;
+ }
+#else
+ return -EPROTONOSUPPORT;
+#endif
+ } else if (le64toh(o->object.size) == osize &&
+ memcmp(o->data.payload, data, size) == 0) {
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = p;
+
+ return 1;
+ }
+
+ next:
+ p = le64toh(o->data.next_hash_offset);
+ }
+
+ return 0;
+}
+
+int journal_file_find_data_object(
+ JournalFile *f,
+ const void *data, uint64_t size,
+ Object **ret, uint64_t *offset) {
+
+ uint64_t hash;
+
+ assert(f);
+ assert(data || size == 0);
+
+ hash = hash64(data, size);
+
+ return journal_file_find_data_object_with_hash(f,
+ data, size, hash,
+ ret, offset);
+}
+
+static int journal_file_append_field(
+ JournalFile *f,
+ const void *field, uint64_t size,
+ Object **ret, uint64_t *offset) {
+
+ uint64_t hash, p;
+ uint64_t osize;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(field && size > 0);
+
+ hash = hash64(field, size);
+
+ r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
+ if (r < 0)
+ return r;
+ else if (r > 0) {
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = p;
+
+ return 0;
+ }
+
+ osize = offsetof(Object, field.payload) + size;
+ r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
+ if (r < 0)
+ return r;
+
+ o->field.hash = htole64(hash);
+ memcpy(o->field.payload, field, size);
+
+ r = journal_file_link_field(f, o, p, hash);
+ if (r < 0)
+ return r;
+
+ /* The linking might have altered the window, so let's
+ * refresh our pointer */
+ r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
+ if (r < 0)
+ return r;
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
+ if (r < 0)
+ return r;
+#endif
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = p;
+
+ return 0;
+}
+
+static int journal_file_append_data(
+ JournalFile *f,
+ const void *data, uint64_t size,
+ Object **ret, uint64_t *offset) {
+
+ uint64_t hash, p;
+ uint64_t osize;
+ Object *o;
+ int r, compression = 0;
+ const void *eq;
+
+ assert(f);
+ assert(data || size == 0);
+
+ hash = hash64(data, size);
+
+ r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = p;
+
+ return 0;
+ }
+
+ osize = offsetof(Object, data.payload) + size;
+ r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
+ if (r < 0)
+ return r;
+
+ o->data.hash = htole64(hash);
+
+#if HAVE_XZ || HAVE_LZ4
+ if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) {
+ size_t rsize = 0;
+
+ compression = compress_blob(data, size, o->data.payload, size - 1, &rsize);
+
+ if (compression >= 0) {
+ o->object.size = htole64(offsetof(Object, data.payload) + rsize);
+ o->object.flags |= compression;
+
+ log_debug("Compressed data object %"PRIu64" -> %zu using %s",
+ size, rsize, object_compressed_to_string(compression));
+ } else
+ /* Compression didn't work, we don't really care why, let's continue without compression */
+ compression = 0;
+ }
+#endif
+
+ if (compression == 0)
+ memcpy_safe(o->data.payload, data, size);
+
+ r = journal_file_link_data(f, o, p, hash);
+ if (r < 0)
+ return r;
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
+ if (r < 0)
+ return r;
+#endif
+
+ /* The linking might have altered the window, so let's
+ * refresh our pointer */
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ if (!data)
+ eq = NULL;
+ else
+ eq = memchr(data, '=', size);
+ if (eq && eq > data) {
+ Object *fo = NULL;
+ uint64_t fp;
+
+ /* Create field object ... */
+ r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
+ if (r < 0)
+ return r;
+
+ /* ... and link it in. */
+ o->data.next_field_offset = fo->field.head_data_offset;
+ fo->field.head_data_offset = le64toh(p);
+ }
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = p;
+
+ return 0;
+}
+
+uint64_t journal_file_entry_n_items(Object *o) {
+ assert(o);
+
+ if (o->object.type != OBJECT_ENTRY)
+ return 0;
+
+ return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
+}
+
+uint64_t journal_file_entry_array_n_items(Object *o) {
+ assert(o);
+
+ if (o->object.type != OBJECT_ENTRY_ARRAY)
+ return 0;
+
+ return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
+}
+
+uint64_t journal_file_hash_table_n_items(Object *o) {
+ assert(o);
+
+ if (!IN_SET(o->object.type, OBJECT_DATA_HASH_TABLE, OBJECT_FIELD_HASH_TABLE))
+ return 0;
+
+ return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
+}
+
+static int link_entry_into_array(JournalFile *f,
+ le64_t *first,
+ le64_t *idx,
+ uint64_t p) {
+ int r;
+ uint64_t n = 0, ap = 0, q, i, a, hidx;
+ Object *o;
+
+ assert(f);
+ assert(f->header);
+ assert(first);
+ assert(idx);
+ assert(p > 0);
+
+ a = le64toh(*first);
+ i = hidx = le64toh(*idx);
+ while (a > 0) {
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ n = journal_file_entry_array_n_items(o);
+ if (i < n) {
+ o->entry_array.items[i] = htole64(p);
+ *idx = htole64(hidx + 1);
+ return 0;
+ }
+
+ i -= n;
+ ap = a;
+ a = le64toh(o->entry_array.next_entry_array_offset);
+ }
+
+ if (hidx > n)
+ n = (hidx+1) * 2;
+ else
+ n = n * 2;
+
+ if (n < 4)
+ n = 4;
+
+ r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
+ offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
+ &o, &q);
+ if (r < 0)
+ return r;
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
+ if (r < 0)
+ return r;
+#endif
+
+ o->entry_array.items[i] = htole64(p);
+
+ if (ap == 0)
+ *first = htole64(q);
+ else {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
+ if (r < 0)
+ return r;
+
+ o->entry_array.next_entry_array_offset = htole64(q);
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
+ f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
+
+ *idx = htole64(hidx + 1);
+
+ return 0;
+}
+
+static int link_entry_into_array_plus_one(JournalFile *f,
+ le64_t *extra,
+ le64_t *first,
+ le64_t *idx,
+ uint64_t p) {
+
+ int r;
+
+ assert(f);
+ assert(extra);
+ assert(first);
+ assert(idx);
+ assert(p > 0);
+
+ if (*idx == 0)
+ *extra = htole64(p);
+ else {
+ le64_t i;
+
+ i = htole64(le64toh(*idx) - 1);
+ r = link_entry_into_array(f, first, &i, p);
+ if (r < 0)
+ return r;
+ }
+
+ *idx = htole64(le64toh(*idx) + 1);
+ return 0;
+}
+
+static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
+ uint64_t p;
+ int r;
+ assert(f);
+ assert(o);
+ assert(offset > 0);
+
+ p = le64toh(o->entry.items[i].object_offset);
+ if (p == 0)
+ return -EINVAL;
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ return link_entry_into_array_plus_one(f,
+ &o->data.entry_offset,
+ &o->data.entry_array_offset,
+ &o->data.n_entries,
+ offset);
+}
+
+static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
+ uint64_t n, i;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(o);
+ assert(offset > 0);
+
+ if (o->object.type != OBJECT_ENTRY)
+ return -EINVAL;
+
+ __sync_synchronize();
+
+ /* Link up the entry itself */
+ r = link_entry_into_array(f,
+ &f->header->entry_array_offset,
+ &f->header->n_entries,
+ offset);
+ if (r < 0)
+ return r;
+
+ /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
+
+ if (f->header->head_entry_realtime == 0)
+ f->header->head_entry_realtime = o->entry.realtime;
+
+ f->header->tail_entry_realtime = o->entry.realtime;
+ f->header->tail_entry_monotonic = o->entry.monotonic;
+
+ /* Link up the items */
+ n = journal_file_entry_n_items(o);
+ for (i = 0; i < n; i++) {
+ r = journal_file_link_entry_item(f, o, offset, i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int journal_file_append_entry_internal(
+ JournalFile *f,
+ const dual_timestamp *ts,
+ const sd_id128_t *boot_id,
+ uint64_t xor_hash,
+ const EntryItem items[], unsigned n_items,
+ uint64_t *seqnum,
+ Object **ret, uint64_t *offset) {
+ uint64_t np;
+ uint64_t osize;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(items || n_items == 0);
+ assert(ts);
+
+ osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
+
+ r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
+ if (r < 0)
+ return r;
+
+ o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
+ memcpy_safe(o->entry.items, items, n_items * sizeof(EntryItem));
+ o->entry.realtime = htole64(ts->realtime);
+ o->entry.monotonic = htole64(ts->monotonic);
+ o->entry.xor_hash = htole64(xor_hash);
+ o->entry.boot_id = boot_id ? *boot_id : f->header->boot_id;
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
+ if (r < 0)
+ return r;
+#endif
+
+ r = journal_file_link_entry(f, o, np);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = np;
+
+ return 0;
+}
+
+void journal_file_post_change(JournalFile *f) {
+ assert(f);
+
+ if (f->fd < 0)
+ return;
+
+ /* inotify() does not receive IN_MODIFY events from file
+ * accesses done via mmap(). After each access we hence
+ * trigger IN_MODIFY by truncating the journal file to its
+ * current size which triggers IN_MODIFY. */
+
+ __sync_synchronize();
+
+ if (ftruncate(f->fd, f->last_stat.st_size) < 0)
+ log_debug_errno(errno, "Failed to truncate file to its own size: %m");
+}
+
+static int post_change_thunk(sd_event_source *timer, uint64_t usec, void *userdata) {
+ assert(userdata);
+
+ journal_file_post_change(userdata);
+
+ return 1;
+}
+
+static void schedule_post_change(JournalFile *f) {
+ uint64_t now;
+ int r;
+
+ assert(f);
+ assert(f->post_change_timer);
+
+ r = sd_event_source_get_enabled(f->post_change_timer, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to get ftruncate timer state: %m");
+ goto fail;
+ }
+ if (r > 0)
+ return;
+
+ r = sd_event_now(sd_event_source_get_event(f->post_change_timer), CLOCK_MONOTONIC, &now);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to get clock's now for scheduling ftruncate: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_time(f->post_change_timer, now + f->post_change_timer_period);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to set time for scheduling ftruncate: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_enabled(f->post_change_timer, SD_EVENT_ONESHOT);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to enable scheduled ftruncate: %m");
+ goto fail;
+ }
+
+ return;
+
+fail:
+ /* On failure, let's simply post the change immediately. */
+ journal_file_post_change(f);
+}
+
+/* Enable coalesced change posting in a timer on the provided sd_event instance */
+int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *timer = NULL;
+ int r;
+
+ assert(f);
+ assert_return(!f->post_change_timer, -EINVAL);
+ assert(e);
+ assert(t);
+
+ r = sd_event_add_time(e, &timer, CLOCK_MONOTONIC, 0, 0, post_change_thunk, f);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(timer, SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+
+ f->post_change_timer = TAKE_PTR(timer);
+ f->post_change_timer_period = t;
+
+ return r;
+}
+
+static int entry_item_cmp(const EntryItem *a, const EntryItem *b) {
+ return CMP(le64toh(a->object_offset), le64toh(b->object_offset));
+}
+
+int journal_file_append_entry(
+ JournalFile *f,
+ const dual_timestamp *ts,
+ const sd_id128_t *boot_id,
+ const struct iovec iovec[], unsigned n_iovec,
+ uint64_t *seqnum,
+ Object **ret, uint64_t *offset) {
+
+ unsigned i;
+ EntryItem *items;
+ int r;
+ uint64_t xor_hash = 0;
+ struct dual_timestamp _ts;
+
+ assert(f);
+ assert(f->header);
+ assert(iovec || n_iovec == 0);
+
+ if (ts) {
+ if (!VALID_REALTIME(ts->realtime))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid realtime timestamp %" PRIu64 ", refusing entry.",
+ ts->realtime);
+ if (!VALID_MONOTONIC(ts->monotonic))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid monotomic timestamp %" PRIu64 ", refusing entry.",
+ ts->monotonic);
+ } else {
+ dual_timestamp_get(&_ts);
+ ts = &_ts;
+ }
+
+#if HAVE_GCRYPT
+ r = journal_file_maybe_append_tag(f, ts->realtime);
+ if (r < 0)
+ return r;
+#endif
+
+ /* alloca() can't take 0, hence let's allocate at least one */
+ items = newa(EntryItem, MAX(1u, n_iovec));
+
+ for (i = 0; i < n_iovec; i++) {
+ uint64_t p;
+ Object *o;
+
+ r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
+ if (r < 0)
+ return r;
+
+ xor_hash ^= le64toh(o->data.hash);
+ items[i].object_offset = htole64(p);
+ items[i].hash = o->data.hash;
+ }
+
+ /* Order by the position on disk, in order to improve seek
+ * times for rotating media. */
+ typesafe_qsort(items, n_iovec, entry_item_cmp);
+
+ r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, ret, offset);
+
+ /* If the memory mapping triggered a SIGBUS then we return an
+ * IO error and ignore the error code passed down to us, since
+ * it is very likely just an effect of a nullified replacement
+ * mapping page */
+
+ if (mmap_cache_got_sigbus(f->mmap, f->cache_fd))
+ r = -EIO;
+
+ if (f->post_change_timer)
+ schedule_post_change(f);
+ else
+ journal_file_post_change(f);
+
+ return r;
+}
+
+typedef struct ChainCacheItem {
+ uint64_t first; /* the array at the beginning of the chain */
+ uint64_t array; /* the cached array */
+ uint64_t begin; /* the first item in the cached array */
+ uint64_t total; /* the total number of items in all arrays before this one in the chain */
+ uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
+} ChainCacheItem;
+
+static void chain_cache_put(
+ OrderedHashmap *h,
+ ChainCacheItem *ci,
+ uint64_t first,
+ uint64_t array,
+ uint64_t begin,
+ uint64_t total,
+ uint64_t last_index) {
+
+ if (!ci) {
+ /* If the chain item to cache for this chain is the
+ * first one it's not worth caching anything */
+ if (array == first)
+ return;
+
+ if (ordered_hashmap_size(h) >= CHAIN_CACHE_MAX) {
+ ci = ordered_hashmap_steal_first(h);
+ assert(ci);
+ } else {
+ ci = new(ChainCacheItem, 1);
+ if (!ci)
+ return;
+ }
+
+ ci->first = first;
+
+ if (ordered_hashmap_put(h, &ci->first, ci) < 0) {
+ free(ci);
+ return;
+ }
+ } else
+ assert(ci->first == first);
+
+ ci->array = array;
+ ci->begin = begin;
+ ci->total = total;
+ ci->last_index = last_index;
+}
+
+static int generic_array_get(
+ JournalFile *f,
+ uint64_t first,
+ uint64_t i,
+ Object **ret, uint64_t *offset) {
+
+ Object *o;
+ uint64_t p = 0, a, t = 0;
+ int r;
+ ChainCacheItem *ci;
+
+ assert(f);
+
+ a = first;
+
+ /* Try the chain cache first */
+ ci = ordered_hashmap_get(f->chain_cache, &first);
+ if (ci && i > ci->total) {
+ a = ci->array;
+ i -= ci->total;
+ t = ci->total;
+ }
+
+ while (a > 0) {
+ uint64_t k;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ k = journal_file_entry_array_n_items(o);
+ if (i < k) {
+ p = le64toh(o->entry_array.items[i]);
+ goto found;
+ }
+
+ i -= k;
+ t += k;
+ a = le64toh(o->entry_array.next_entry_array_offset);
+ }
+
+ return 0;
+
+found:
+ /* Let's cache this item for the next invocation */
+ chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = p;
+
+ return 1;
+}
+
+static int generic_array_get_plus_one(
+ JournalFile *f,
+ uint64_t extra,
+ uint64_t first,
+ uint64_t i,
+ Object **ret, uint64_t *offset) {
+
+ Object *o;
+
+ assert(f);
+
+ if (i == 0) {
+ int r;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = extra;
+
+ return 1;
+ }
+
+ return generic_array_get(f, first, i-1, ret, offset);
+}
+
+enum {
+ TEST_FOUND,
+ TEST_LEFT,
+ TEST_RIGHT
+};
+
+static int generic_array_bisect(
+ JournalFile *f,
+ uint64_t first,
+ uint64_t n,
+ uint64_t needle,
+ int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset,
+ uint64_t *idx) {
+
+ uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
+ bool subtract_one = false;
+ Object *o, *array = NULL;
+ int r;
+ ChainCacheItem *ci;
+
+ assert(f);
+ assert(test_object);
+
+ /* Start with the first array in the chain */
+ a = first;
+
+ ci = ordered_hashmap_get(f->chain_cache, &first);
+ if (ci && n > ci->total && ci->begin != 0) {
+ /* Ah, we have iterated this bisection array chain
+ * previously! Let's see if we can skip ahead in the
+ * chain, as far as the last time. But we can't jump
+ * backwards in the chain, so let's check that
+ * first. */
+
+ r = test_object(f, ci->begin, needle);
+ if (r < 0)
+ return r;
+
+ if (r == TEST_LEFT) {
+ /* OK, what we are looking for is right of the
+ * begin of this EntryArray, so let's jump
+ * straight to previously cached array in the
+ * chain */
+
+ a = ci->array;
+ n -= ci->total;
+ t = ci->total;
+ last_index = ci->last_index;
+ }
+ }
+
+ while (a > 0) {
+ uint64_t left, right, k, lp;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
+ if (r < 0)
+ return r;
+
+ k = journal_file_entry_array_n_items(array);
+ right = MIN(k, n);
+ if (right <= 0)
+ return 0;
+
+ i = right - 1;
+ lp = p = le64toh(array->entry_array.items[i]);
+ if (p <= 0)
+ r = -EBADMSG;
+ else
+ r = test_object(f, p, needle);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Encountered invalid entry while bisecting, cutting algorithm short. (1)");
+ n = i;
+ continue;
+ }
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ if (r == TEST_RIGHT) {
+ left = 0;
+ right -= 1;
+
+ if (last_index != (uint64_t) -1) {
+ assert(last_index <= right);
+
+ /* If we cached the last index we
+ * looked at, let's try to not to jump
+ * too wildly around and see if we can
+ * limit the range to look at early to
+ * the immediate neighbors of the last
+ * index we looked at. */
+
+ if (last_index > 0) {
+ uint64_t x = last_index - 1;
+
+ p = le64toh(array->entry_array.items[x]);
+ if (p <= 0)
+ return -EBADMSG;
+
+ r = test_object(f, p, needle);
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ if (r == TEST_RIGHT)
+ right = x;
+ else
+ left = x + 1;
+ }
+
+ if (last_index < right) {
+ uint64_t y = last_index + 1;
+
+ p = le64toh(array->entry_array.items[y]);
+ if (p <= 0)
+ return -EBADMSG;
+
+ r = test_object(f, p, needle);
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ if (r == TEST_RIGHT)
+ right = y;
+ else
+ left = y + 1;
+ }
+ }
+
+ for (;;) {
+ if (left == right) {
+ if (direction == DIRECTION_UP)
+ subtract_one = true;
+
+ i = left;
+ goto found;
+ }
+
+ assert(left < right);
+ i = (left + right) / 2;
+
+ p = le64toh(array->entry_array.items[i]);
+ if (p <= 0)
+ r = -EBADMSG;
+ else
+ r = test_object(f, p, needle);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Encountered invalid entry while bisecting, cutting algorithm short. (2)");
+ right = n = i;
+ continue;
+ }
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ if (r == TEST_RIGHT)
+ right = i;
+ else
+ left = i + 1;
+ }
+ }
+
+ if (k >= n) {
+ if (direction == DIRECTION_UP) {
+ i = n;
+ subtract_one = true;
+ goto found;
+ }
+
+ return 0;
+ }
+
+ last_p = lp;
+
+ n -= k;
+ t += k;
+ last_index = (uint64_t) -1;
+ a = le64toh(array->entry_array.next_entry_array_offset);
+ }
+
+ return 0;
+
+found:
+ if (subtract_one && t == 0 && i == 0)
+ return 0;
+
+ /* Let's cache this item for the next invocation */
+ chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
+
+ if (subtract_one && i == 0)
+ p = last_p;
+ else if (subtract_one)
+ p = le64toh(array->entry_array.items[i-1]);
+ else
+ p = le64toh(array->entry_array.items[i]);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = p;
+
+ if (idx)
+ *idx = t + i + (subtract_one ? -1 : 0);
+
+ return 1;
+}
+
+static int generic_array_bisect_plus_one(
+ JournalFile *f,
+ uint64_t extra,
+ uint64_t first,
+ uint64_t n,
+ uint64_t needle,
+ int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset,
+ uint64_t *idx) {
+
+ int r;
+ bool step_back = false;
+ Object *o;
+
+ assert(f);
+ assert(test_object);
+
+ if (n <= 0)
+ return 0;
+
+ /* This bisects the array in object 'first', but first checks
+ * an extra */
+ r = test_object(f, extra, needle);
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ /* if we are looking with DIRECTION_UP then we need to first
+ see if in the actual array there is a matching entry, and
+ return the last one of that. But if there isn't any we need
+ to return this one. Hence remember this, and return it
+ below. */
+ if (r == TEST_LEFT)
+ step_back = direction == DIRECTION_UP;
+
+ if (r == TEST_RIGHT) {
+ if (direction == DIRECTION_DOWN)
+ goto found;
+ else
+ return 0;
+ }
+
+ r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
+
+ if (r == 0 && step_back)
+ goto found;
+
+ if (r > 0 && idx)
+ (*idx)++;
+
+ return r;
+
+found:
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = extra;
+
+ if (idx)
+ *idx = 0;
+
+ return 1;
+}
+
+_pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
+ assert(f);
+ assert(p > 0);
+
+ if (p == needle)
+ return TEST_FOUND;
+ else if (p < needle)
+ return TEST_LEFT;
+ else
+ return TEST_RIGHT;
+}
+
+static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(p > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le64toh(o->entry.seqnum) == needle)
+ return TEST_FOUND;
+ else if (le64toh(o->entry.seqnum) < needle)
+ return TEST_LEFT;
+ else
+ return TEST_RIGHT;
+}
+
+int journal_file_move_to_entry_by_seqnum(
+ JournalFile *f,
+ uint64_t seqnum,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+ assert(f);
+ assert(f->header);
+
+ return generic_array_bisect(f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ seqnum,
+ test_object_seqnum,
+ direction,
+ ret, offset, NULL);
+}
+
+static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(p > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le64toh(o->entry.realtime) == needle)
+ return TEST_FOUND;
+ else if (le64toh(o->entry.realtime) < needle)
+ return TEST_LEFT;
+ else
+ return TEST_RIGHT;
+}
+
+int journal_file_move_to_entry_by_realtime(
+ JournalFile *f,
+ uint64_t realtime,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+ assert(f);
+ assert(f->header);
+
+ return generic_array_bisect(f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ realtime,
+ test_object_realtime,
+ direction,
+ ret, offset, NULL);
+}
+
+static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(p > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le64toh(o->entry.monotonic) == needle)
+ return TEST_FOUND;
+ else if (le64toh(o->entry.monotonic) < needle)
+ return TEST_LEFT;
+ else
+ return TEST_RIGHT;
+}
+
+static int find_data_object_by_boot_id(
+ JournalFile *f,
+ sd_id128_t boot_id,
+ Object **o,
+ uint64_t *b) {
+
+ char t[STRLEN("_BOOT_ID=") + 32 + 1] = "_BOOT_ID=";
+
+ sd_id128_to_string(boot_id, t + 9);
+ return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
+}
+
+int journal_file_move_to_entry_by_monotonic(
+ JournalFile *f,
+ sd_id128_t boot_id,
+ uint64_t monotonic,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ Object *o;
+ int r;
+
+ assert(f);
+
+ r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOENT;
+
+ return generic_array_bisect_plus_one(f,
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset),
+ le64toh(o->data.n_entries),
+ monotonic,
+ test_object_monotonic,
+ direction,
+ ret, offset, NULL);
+}
+
+void journal_file_reset_location(JournalFile *f) {
+ f->location_type = LOCATION_HEAD;
+ f->current_offset = 0;
+ f->current_seqnum = 0;
+ f->current_realtime = 0;
+ f->current_monotonic = 0;
+ zero(f->current_boot_id);
+ f->current_xor_hash = 0;
+}
+
+void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset) {
+ f->location_type = LOCATION_SEEK;
+ f->current_offset = offset;
+ f->current_seqnum = le64toh(o->entry.seqnum);
+ f->current_realtime = le64toh(o->entry.realtime);
+ f->current_monotonic = le64toh(o->entry.monotonic);
+ f->current_boot_id = o->entry.boot_id;
+ f->current_xor_hash = le64toh(o->entry.xor_hash);
+}
+
+int journal_file_compare_locations(JournalFile *af, JournalFile *bf) {
+ int r;
+
+ assert(af);
+ assert(af->header);
+ assert(bf);
+ assert(bf->header);
+ assert(af->location_type == LOCATION_SEEK);
+ assert(bf->location_type == LOCATION_SEEK);
+
+ /* If contents and timestamps match, these entries are
+ * identical, even if the seqnum does not match */
+ if (sd_id128_equal(af->current_boot_id, bf->current_boot_id) &&
+ af->current_monotonic == bf->current_monotonic &&
+ af->current_realtime == bf->current_realtime &&
+ af->current_xor_hash == bf->current_xor_hash)
+ return 0;
+
+ if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
+
+ /* If this is from the same seqnum source, compare
+ * seqnums */
+ r = CMP(af->current_seqnum, bf->current_seqnum);
+ if (r != 0)
+ return r;
+
+ /* Wow! This is weird, different data but the same
+ * seqnums? Something is borked, but let's make the
+ * best of it and compare by time. */
+ }
+
+ if (sd_id128_equal(af->current_boot_id, bf->current_boot_id)) {
+
+ /* If the boot id matches, compare monotonic time */
+ r = CMP(af->current_monotonic, bf->current_monotonic);
+ if (r != 0)
+ return r;
+ }
+
+ /* Otherwise, compare UTC time */
+ r = CMP(af->current_realtime, bf->current_realtime);
+ if (r != 0)
+ return r;
+
+ /* Finally, compare by contents */
+ return CMP(af->current_xor_hash, bf->current_xor_hash);
+}
+
+static int bump_array_index(uint64_t *i, direction_t direction, uint64_t n) {
+
+ /* Increase or decrease the specified index, in the right direction. */
+
+ if (direction == DIRECTION_DOWN) {
+ if (*i >= n - 1)
+ return 0;
+
+ (*i) ++;
+ } else {
+ if (*i <= 0)
+ return 0;
+
+ (*i) --;
+ }
+
+ return 1;
+}
+
+static bool check_properly_ordered(uint64_t new_offset, uint64_t old_offset, direction_t direction) {
+
+ /* Consider it an error if any of the two offsets is uninitialized */
+ if (old_offset == 0 || new_offset == 0)
+ return false;
+
+ /* If we go down, the new offset must be larger than the old one. */
+ return direction == DIRECTION_DOWN ?
+ new_offset > old_offset :
+ new_offset < old_offset;
+}
+
+int journal_file_next_entry(
+ JournalFile *f,
+ uint64_t p,
+ direction_t direction,
+ Object **ret, uint64_t *offset) {
+
+ uint64_t i, n, ofs;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ n = le64toh(f->header->n_entries);
+ if (n <= 0)
+ return 0;
+
+ if (p == 0)
+ i = direction == DIRECTION_DOWN ? 0 : n - 1;
+ else {
+ r = generic_array_bisect(f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ p,
+ test_object_offset,
+ DIRECTION_DOWN,
+ NULL, NULL,
+ &i);
+ if (r <= 0)
+ return r;
+
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
+
+ /* And jump to it */
+ for (;;) {
+ r = generic_array_get(f,
+ le64toh(f->header->entry_array_offset),
+ i,
+ ret, &ofs);
+ if (r > 0)
+ break;
+ if (r != -EBADMSG)
+ return r;
+
+ /* OK, so this entry is borked. Most likely some entry didn't get synced to disk properly, let's see if
+ * the next one might work for us instead. */
+ log_debug_errno(r, "Entry item %" PRIu64 " is bad, skipping over it.", i);
+
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
+
+ /* Ensure our array is properly ordered. */
+ if (p > 0 && !check_properly_ordered(ofs, p, direction))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s: entry array not properly ordered at entry %" PRIu64,
+ f->path, i);
+
+ if (offset)
+ *offset = ofs;
+
+ return 1;
+}
+
+int journal_file_next_entry_for_data(
+ JournalFile *f,
+ Object *o, uint64_t p,
+ uint64_t data_offset,
+ direction_t direction,
+ Object **ret, uint64_t *offset) {
+
+ uint64_t i, n, ofs;
+ Object *d;
+ int r;
+
+ assert(f);
+ assert(p > 0 || !o);
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
+ if (r < 0)
+ return r;
+
+ n = le64toh(d->data.n_entries);
+ if (n <= 0)
+ return n;
+
+ if (!o)
+ i = direction == DIRECTION_DOWN ? 0 : n - 1;
+ else {
+ if (o->object.type != OBJECT_ENTRY)
+ return -EINVAL;
+
+ r = generic_array_bisect_plus_one(f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ p,
+ test_object_offset,
+ DIRECTION_DOWN,
+ NULL, NULL,
+ &i);
+
+ if (r <= 0)
+ return r;
+
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
+
+ for (;;) {
+ r = generic_array_get_plus_one(f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ i,
+ ret, &ofs);
+ if (r > 0)
+ break;
+ if (r != -EBADMSG)
+ return r;
+
+ log_debug_errno(r, "Data entry item %" PRIu64 " is bad, skipping over it.", i);
+
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
+
+ /* Ensure our array is properly ordered. */
+ if (p > 0 && check_properly_ordered(ofs, p, direction))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s data entry array not properly ordered at entry %" PRIu64,
+ f->path, i);
+
+ if (offset)
+ *offset = ofs;
+
+ return 1;
+}
+
+int journal_file_move_to_entry_by_offset_for_data(
+ JournalFile *f,
+ uint64_t data_offset,
+ uint64_t p,
+ direction_t direction,
+ Object **ret, uint64_t *offset) {
+
+ int r;
+ Object *d;
+
+ assert(f);
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
+ if (r < 0)
+ return r;
+
+ return generic_array_bisect_plus_one(f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ p,
+ test_object_offset,
+ direction,
+ ret, offset, NULL);
+}
+
+int journal_file_move_to_entry_by_monotonic_for_data(
+ JournalFile *f,
+ uint64_t data_offset,
+ sd_id128_t boot_id,
+ uint64_t monotonic,
+ direction_t direction,
+ Object **ret, uint64_t *offset) {
+
+ Object *o, *d;
+ int r;
+ uint64_t b, z;
+
+ assert(f);
+
+ /* First, seek by time */
+ r = find_data_object_by_boot_id(f, boot_id, &o, &b);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOENT;
+
+ r = generic_array_bisect_plus_one(f,
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset),
+ le64toh(o->data.n_entries),
+ monotonic,
+ test_object_monotonic,
+ direction,
+ NULL, &z, NULL);
+ if (r <= 0)
+ return r;
+
+ /* And now, continue seeking until we find an entry that
+ * exists in both bisection arrays */
+
+ for (;;) {
+ Object *qo;
+ uint64_t p, q;
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
+ if (r < 0)
+ return r;
+
+ r = generic_array_bisect_plus_one(f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ z,
+ test_object_offset,
+ direction,
+ NULL, &p, NULL);
+ if (r <= 0)
+ return r;
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
+ if (r < 0)
+ return r;
+
+ r = generic_array_bisect_plus_one(f,
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset),
+ le64toh(o->data.n_entries),
+ p,
+ test_object_offset,
+ direction,
+ &qo, &q, NULL);
+
+ if (r <= 0)
+ return r;
+
+ if (p == q) {
+ if (ret)
+ *ret = qo;
+ if (offset)
+ *offset = q;
+
+ return 1;
+ }
+
+ z = q;
+ }
+}
+
+int journal_file_move_to_entry_by_seqnum_for_data(
+ JournalFile *f,
+ uint64_t data_offset,
+ uint64_t seqnum,
+ direction_t direction,
+ Object **ret, uint64_t *offset) {
+
+ Object *d;
+ int r;
+
+ assert(f);
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
+ if (r < 0)
+ return r;
+
+ return generic_array_bisect_plus_one(f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ seqnum,
+ test_object_seqnum,
+ direction,
+ ret, offset, NULL);
+}
+
+int journal_file_move_to_entry_by_realtime_for_data(
+ JournalFile *f,
+ uint64_t data_offset,
+ uint64_t realtime,
+ direction_t direction,
+ Object **ret, uint64_t *offset) {
+
+ Object *d;
+ int r;
+
+ assert(f);
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
+ if (r < 0)
+ return r;
+
+ return generic_array_bisect_plus_one(f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ realtime,
+ test_object_realtime,
+ direction,
+ ret, offset, NULL);
+}
+
+void journal_file_dump(JournalFile *f) {
+ Object *o;
+ int r;
+ uint64_t p;
+
+ assert(f);
+ assert(f->header);
+
+ journal_file_print_header(f);
+
+ p = le64toh(f->header->header_size);
+ while (p != 0) {
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
+ if (r < 0)
+ goto fail;
+
+ switch (o->object.type) {
+
+ case OBJECT_UNUSED:
+ printf("Type: OBJECT_UNUSED\n");
+ break;
+
+ case OBJECT_DATA:
+ printf("Type: OBJECT_DATA\n");
+ break;
+
+ case OBJECT_FIELD:
+ printf("Type: OBJECT_FIELD\n");
+ break;
+
+ case OBJECT_ENTRY:
+ printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
+ le64toh(o->entry.seqnum),
+ le64toh(o->entry.monotonic),
+ le64toh(o->entry.realtime));
+ break;
+
+ case OBJECT_FIELD_HASH_TABLE:
+ printf("Type: OBJECT_FIELD_HASH_TABLE\n");
+ break;
+
+ case OBJECT_DATA_HASH_TABLE:
+ printf("Type: OBJECT_DATA_HASH_TABLE\n");
+ break;
+
+ case OBJECT_ENTRY_ARRAY:
+ printf("Type: OBJECT_ENTRY_ARRAY\n");
+ break;
+
+ case OBJECT_TAG:
+ printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
+ le64toh(o->tag.seqnum),
+ le64toh(o->tag.epoch));
+ break;
+
+ default:
+ printf("Type: unknown (%i)\n", o->object.type);
+ break;
+ }
+
+ if (o->object.flags & OBJECT_COMPRESSION_MASK)
+ printf("Flags: %s\n",
+ object_compressed_to_string(o->object.flags & OBJECT_COMPRESSION_MASK));
+
+ if (p == le64toh(f->header->tail_object_offset))
+ p = 0;
+ else
+ p = p + ALIGN64(le64toh(o->object.size));
+ }
+
+ return;
+fail:
+ log_error("File corrupt");
+}
+
+static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
+ const char *x;
+
+ x = format_timestamp(buf, l, t);
+ if (x)
+ return x;
+ return " --- ";
+}
+
+void journal_file_print_header(JournalFile *f) {
+ char a[33], b[33], c[33], d[33];
+ char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
+ struct stat st;
+ char bytes[FORMAT_BYTES_MAX];
+
+ assert(f);
+ assert(f->header);
+
+ printf("File Path: %s\n"
+ "File ID: %s\n"
+ "Machine ID: %s\n"
+ "Boot ID: %s\n"
+ "Sequential Number ID: %s\n"
+ "State: %s\n"
+ "Compatible Flags:%s%s\n"
+ "Incompatible Flags:%s%s%s\n"
+ "Header size: %"PRIu64"\n"
+ "Arena size: %"PRIu64"\n"
+ "Data Hash Table Size: %"PRIu64"\n"
+ "Field Hash Table Size: %"PRIu64"\n"
+ "Rotate Suggested: %s\n"
+ "Head Sequential Number: %"PRIu64" (%"PRIx64")\n"
+ "Tail Sequential Number: %"PRIu64" (%"PRIx64")\n"
+ "Head Realtime Timestamp: %s (%"PRIx64")\n"
+ "Tail Realtime Timestamp: %s (%"PRIx64")\n"
+ "Tail Monotonic Timestamp: %s (%"PRIx64")\n"
+ "Objects: %"PRIu64"\n"
+ "Entry Objects: %"PRIu64"\n",
+ f->path,
+ sd_id128_to_string(f->header->file_id, a),
+ sd_id128_to_string(f->header->machine_id, b),
+ sd_id128_to_string(f->header->boot_id, c),
+ sd_id128_to_string(f->header->seqnum_id, d),
+ f->header->state == STATE_OFFLINE ? "OFFLINE" :
+ f->header->state == STATE_ONLINE ? "ONLINE" :
+ f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
+ JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
+ (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
+ JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
+ JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
+ (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
+ le64toh(f->header->header_size),
+ le64toh(f->header->arena_size),
+ le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
+ le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
+ yes_no(journal_file_rotate_suggested(f, 0)),
+ le64toh(f->header->head_entry_seqnum), le64toh(f->header->head_entry_seqnum),
+ le64toh(f->header->tail_entry_seqnum), le64toh(f->header->tail_entry_seqnum),
+ format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)), le64toh(f->header->head_entry_realtime),
+ format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)), le64toh(f->header->tail_entry_realtime),
+ format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC), le64toh(f->header->tail_entry_monotonic),
+ le64toh(f->header->n_objects),
+ le64toh(f->header->n_entries));
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+ printf("Data Objects: %"PRIu64"\n"
+ "Data Hash Table Fill: %.1f%%\n",
+ le64toh(f->header->n_data),
+ 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+ printf("Field Objects: %"PRIu64"\n"
+ "Field Hash Table Fill: %.1f%%\n",
+ le64toh(f->header->n_fields),
+ 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
+ printf("Tag Objects: %"PRIu64"\n",
+ le64toh(f->header->n_tags));
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
+ printf("Entry Array Objects: %"PRIu64"\n",
+ le64toh(f->header->n_entry_arrays));
+
+ if (fstat(f->fd, &st) >= 0)
+ printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (uint64_t) st.st_blocks * 512ULL));
+}
+
+static int journal_file_warn_btrfs(JournalFile *f) {
+ unsigned attrs;
+ int r;
+
+ assert(f);
+
+ /* Before we write anything, check if the COW logic is turned
+ * off on btrfs. Given our write pattern that is quite
+ * unfriendly to COW file systems this should greatly improve
+ * performance on COW file systems, such as btrfs, at the
+ * expense of data integrity features (which shouldn't be too
+ * bad, given that we do our own checksumming). */
+
+ r = btrfs_is_filesystem(f->fd);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to determine if journal is on btrfs: %m");
+ if (!r)
+ return 0;
+
+ r = read_attr_fd(f->fd, &attrs);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read file attributes: %m");
+
+ if (attrs & FS_NOCOW_FL) {
+ log_debug("Detected btrfs file system with copy-on-write disabled, all is good.");
+ return 0;
+ }
+
+ log_notice("Creating journal file %s on a btrfs file system, and copy-on-write is enabled. "
+ "This is likely to slow down journal access substantially, please consider turning "
+ "off the copy-on-write file attribute on the journal directory, using chattr +C.", f->path);
+
+ return 1;
+}
+
+int journal_file_open(
+ int fd,
+ const char *fname,
+ int flags,
+ mode_t mode,
+ bool compress,
+ uint64_t compress_threshold_bytes,
+ bool seal,
+ JournalMetrics *metrics,
+ MMapCache *mmap_cache,
+ Set *deferred_closes,
+ JournalFile *template,
+ JournalFile **ret) {
+
+ bool newly_created = false;
+ JournalFile *f;
+ void *h;
+ int r;
+ char bytes[FORMAT_BYTES_MAX];
+
+ assert(ret);
+ assert(fd >= 0 || fname);
+
+ if (!IN_SET((flags & O_ACCMODE), O_RDONLY, O_RDWR))
+ return -EINVAL;
+
+ if (fname && (flags & O_CREAT) && !endswith(fname, ".journal"))
+ return -EINVAL;
+
+ f = new(JournalFile, 1);
+ if (!f)
+ return -ENOMEM;
+
+ *f = (JournalFile) {
+ .fd = fd,
+ .mode = mode,
+
+ .flags = flags,
+ .prot = prot_from_flags(flags),
+ .writable = (flags & O_ACCMODE) != O_RDONLY,
+
+#if HAVE_LZ4
+ .compress_lz4 = compress,
+#elif HAVE_XZ
+ .compress_xz = compress,
+#endif
+ .compress_threshold_bytes = compress_threshold_bytes == (uint64_t) -1 ?
+ DEFAULT_COMPRESS_THRESHOLD :
+ MAX(MIN_COMPRESS_THRESHOLD, compress_threshold_bytes),
+#if HAVE_GCRYPT
+ .seal = seal,
+#endif
+ };
+
+ log_debug("Journal effective settings seal=%s compress=%s compress_threshold_bytes=%s",
+ yes_no(f->seal), yes_no(JOURNAL_FILE_COMPRESS(f)),
+ format_bytes(bytes, sizeof(bytes), f->compress_threshold_bytes));
+
+ if (mmap_cache)
+ f->mmap = mmap_cache_ref(mmap_cache);
+ else {
+ f->mmap = mmap_cache_new();
+ if (!f->mmap) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (fname) {
+ f->path = strdup(fname);
+ if (!f->path) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ } else {
+ assert(fd >= 0);
+
+ /* If we don't know the path, fill in something explanatory and vaguely useful */
+ if (asprintf(&f->path, "/proc/self/%i", fd) < 0) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ f->chain_cache = ordered_hashmap_new(&uint64_hash_ops);
+ if (!f->chain_cache) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (f->fd < 0) {
+ /* We pass O_NONBLOCK here, so that in case somebody pointed us to some character device node or FIFO
+ * or so, we likely fail quickly than block for long. For regular files O_NONBLOCK has no effect, hence
+ * it doesn't hurt in that case. */
+
+ f->fd = open(f->path, f->flags|O_CLOEXEC|O_NONBLOCK, f->mode);
+ if (f->fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ /* fds we opened here by us should also be closed by us. */
+ f->close_fd = true;
+
+ r = fd_nonblock(f->fd, false);
+ if (r < 0)
+ goto fail;
+ }
+
+ f->cache_fd = mmap_cache_add_fd(f->mmap, f->fd);
+ if (!f->cache_fd) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ r = journal_file_fstat(f);
+ if (r < 0)
+ goto fail;
+
+ if (f->last_stat.st_size == 0 && f->writable) {
+
+ (void) journal_file_warn_btrfs(f);
+
+ /* Let's attach the creation time to the journal file, so that the vacuuming code knows the age of this
+ * file even if the file might end up corrupted one day... Ideally we'd just use the creation time many
+ * file systems maintain for each file, but the API to query this is very new, hence let's emulate this
+ * via extended attributes. If extended attributes are not supported we'll just skip this, and rely
+ * solely on mtime/atime/ctime of the file. */
+ (void) fd_setcrtime(f->fd, 0);
+
+#if HAVE_GCRYPT
+ /* Try to load the FSPRG state, and if we can't, then
+ * just don't do sealing */
+ if (f->seal) {
+ r = journal_file_fss_load(f);
+ if (r < 0)
+ f->seal = false;
+ }
+#endif
+
+ r = journal_file_init_header(f, template);
+ if (r < 0)
+ goto fail;
+
+ r = journal_file_fstat(f);
+ if (r < 0)
+ goto fail;
+
+ newly_created = true;
+ }
+
+ if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
+ r = -ENODATA;
+ goto fail;
+ }
+
+ r = mmap_cache_get(f->mmap, f->cache_fd, f->prot, CONTEXT_HEADER, true, 0, PAGE_ALIGN(sizeof(Header)), &f->last_stat, &h, NULL);
+ if (r < 0)
+ goto fail;
+
+ f->header = h;
+
+ if (!newly_created) {
+ set_clear_with_destructor(deferred_closes, journal_file_close);
+
+ r = journal_file_verify_header(f);
+ if (r < 0)
+ goto fail;
+ }
+
+#if HAVE_GCRYPT
+ if (!newly_created && f->writable) {
+ r = journal_file_fss_load(f);
+ if (r < 0)
+ goto fail;
+ }
+#endif
+
+ if (f->writable) {
+ if (metrics) {
+ journal_default_metrics(metrics, f->fd);
+ f->metrics = *metrics;
+ } else if (template)
+ f->metrics = template->metrics;
+
+ r = journal_file_refresh_header(f);
+ if (r < 0)
+ goto fail;
+ }
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_setup(f);
+ if (r < 0)
+ goto fail;
+#endif
+
+ if (newly_created) {
+ r = journal_file_setup_field_hash_table(f);
+ if (r < 0)
+ goto fail;
+
+ r = journal_file_setup_data_hash_table(f);
+ if (r < 0)
+ goto fail;
+
+#if HAVE_GCRYPT
+ r = journal_file_append_first_tag(f);
+ if (r < 0)
+ goto fail;
+#endif
+ }
+
+ if (mmap_cache_got_sigbus(f->mmap, f->cache_fd)) {
+ r = -EIO;
+ goto fail;
+ }
+
+ if (template && template->post_change_timer) {
+ r = journal_file_enable_post_change_timer(
+ f,
+ sd_event_source_get_event(template->post_change_timer),
+ template->post_change_timer_period);
+
+ if (r < 0)
+ goto fail;
+ }
+
+ /* The file is opened now successfully, thus we take possession of any passed in fd. */
+ f->close_fd = true;
+
+ *ret = f;
+ return 0;
+
+fail:
+ if (f->cache_fd && mmap_cache_got_sigbus(f->mmap, f->cache_fd))
+ r = -EIO;
+
+ (void) journal_file_close(f);
+
+ return r;
+}
+
+int journal_file_archive(JournalFile *f) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(f);
+
+ if (!f->writable)
+ return -EINVAL;
+
+ /* Is this a journal file that was passed to us as fd? If so, we synthesized a path name for it, and we refuse
+ * rotation, since we don't know the actual path, and couldn't rename the file hence. */
+ if (path_startswith(f->path, "/proc/self/fd"))
+ return -EINVAL;
+
+ if (!endswith(f->path, ".journal"))
+ return -EINVAL;
+
+ if (asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
+ (int) strlen(f->path) - 8, f->path,
+ SD_ID128_FORMAT_VAL(f->header->seqnum_id),
+ le64toh(f->header->head_entry_seqnum),
+ le64toh(f->header->head_entry_realtime)) < 0)
+ return -ENOMEM;
+
+ /* Try to rename the file to the archived version. If the file already was deleted, we'll get ENOENT, let's
+ * ignore that case. */
+ if (rename(f->path, p) < 0 && errno != ENOENT)
+ return -errno;
+
+ /* Sync the rename to disk */
+ (void) fsync_directory_of_file(f->fd);
+
+ /* Set as archive so offlining commits w/state=STATE_ARCHIVED. Previously we would set old_file->header->state
+ * to STATE_ARCHIVED directly here, but journal_file_set_offline() short-circuits when state != STATE_ONLINE,
+ * which would result in the rotated journal never getting fsync() called before closing. Now we simply queue
+ * the archive state by setting an archive bit, leaving the state as STATE_ONLINE so proper offlining
+ * occurs. */
+ f->archive = true;
+
+ /* Currently, btrfs is not very good with out write patterns and fragments heavily. Let's defrag our journal
+ * files when we archive them */
+ f->defrag_on_close = true;
+
+ return 0;
+}
+
+JournalFile* journal_initiate_close(
+ JournalFile *f,
+ Set *deferred_closes) {
+
+ int r;
+
+ assert(f);
+
+ if (deferred_closes) {
+
+ r = set_put(deferred_closes, f);
+ if (r < 0)
+ log_debug_errno(r, "Failed to add file to deferred close set, closing immediately.");
+ else {
+ (void) journal_file_set_offline(f, false);
+ return NULL;
+ }
+ }
+
+ return journal_file_close(f);
+}
+
+int journal_file_rotate(
+ JournalFile **f,
+ bool compress,
+ uint64_t compress_threshold_bytes,
+ bool seal,
+ Set *deferred_closes) {
+
+ JournalFile *new_file = NULL;
+ int r;
+
+ assert(f);
+ assert(*f);
+
+ r = journal_file_archive(*f);
+ if (r < 0)
+ return r;
+
+ r = journal_file_open(
+ -1,
+ (*f)->path,
+ (*f)->flags,
+ (*f)->mode,
+ compress,
+ compress_threshold_bytes,
+ seal,
+ NULL, /* metrics */
+ (*f)->mmap,
+ deferred_closes,
+ *f, /* template */
+ &new_file);
+
+ journal_initiate_close(*f, deferred_closes);
+ *f = new_file;
+
+ return r;
+}
+
+int journal_file_dispose(int dir_fd, const char *fname) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+
+ assert(fname);
+
+ /* Renames a journal file to *.journal~, i.e. to mark it as corruped or otherwise uncleanly shutdown. Note that
+ * this is done without looking into the file or changing any of its contents. The idea is that this is called
+ * whenever something is suspicious and we want to move the file away and make clear that it is not accessed
+ * for writing anymore. */
+
+ if (!endswith(fname, ".journal"))
+ return -EINVAL;
+
+ if (asprintf(&p, "%.*s@%016" PRIx64 "-%016" PRIx64 ".journal~",
+ (int) strlen(fname) - 8, fname,
+ now(CLOCK_REALTIME),
+ random_u64()) < 0)
+ return -ENOMEM;
+
+ if (renameat(dir_fd, fname, dir_fd, p) < 0)
+ return -errno;
+
+ /* btrfs doesn't cope well with our write pattern and fragments heavily. Let's defrag all files we rotate */
+ fd = openat(dir_fd, p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ log_debug_errno(errno, "Failed to open file for defragmentation/FS_NOCOW_FL, ignoring: %m");
+ else {
+ (void) chattr_fd(fd, 0, FS_NOCOW_FL, NULL);
+ (void) btrfs_defrag_fd(fd);
+ }
+
+ return 0;
+}
+
+int journal_file_open_reliably(
+ const char *fname,
+ int flags,
+ mode_t mode,
+ bool compress,
+ uint64_t compress_threshold_bytes,
+ bool seal,
+ JournalMetrics *metrics,
+ MMapCache *mmap_cache,
+ Set *deferred_closes,
+ JournalFile *template,
+ JournalFile **ret) {
+
+ int r;
+
+ r = journal_file_open(-1, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics, mmap_cache,
+ deferred_closes, template, ret);
+ if (!IN_SET(r,
+ -EBADMSG, /* Corrupted */
+ -ENODATA, /* Truncated */
+ -EHOSTDOWN, /* Other machine */
+ -EPROTONOSUPPORT, /* Incompatible feature */
+ -EBUSY, /* Unclean shutdown */
+ -ESHUTDOWN, /* Already archived */
+ -EIO, /* IO error, including SIGBUS on mmap */
+ -EIDRM, /* File has been deleted */
+ -ETXTBSY)) /* File is from the future */
+ return r;
+
+ if ((flags & O_ACCMODE) == O_RDONLY)
+ return r;
+
+ if (!(flags & O_CREAT))
+ return r;
+
+ if (!endswith(fname, ".journal"))
+ return r;
+
+ /* The file is corrupted. Rotate it away and try it again (but only once) */
+ log_warning_errno(r, "File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
+
+ r = journal_file_dispose(AT_FDCWD, fname);
+ if (r < 0)
+ return r;
+
+ return journal_file_open(-1, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics, mmap_cache,
+ deferred_closes, template, ret);
+}
+
+int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p) {
+ uint64_t i, n;
+ uint64_t q, xor_hash = 0;
+ int r;
+ EntryItem *items;
+ dual_timestamp ts;
+ const sd_id128_t *boot_id;
+
+ assert(from);
+ assert(to);
+ assert(o);
+ assert(p);
+
+ if (!to->writable)
+ return -EPERM;
+
+ ts.monotonic = le64toh(o->entry.monotonic);
+ ts.realtime = le64toh(o->entry.realtime);
+ boot_id = &o->entry.boot_id;
+
+ n = journal_file_entry_n_items(o);
+ /* alloca() can't take 0, hence let's allocate at least one */
+ items = newa(EntryItem, MAX(1u, n));
+
+ for (i = 0; i < n; i++) {
+ uint64_t l, h;
+ le64_t le_hash;
+ size_t t;
+ void *data;
+ Object *u;
+
+ q = le64toh(o->entry.items[i].object_offset);
+ le_hash = o->entry.items[i].hash;
+
+ r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
+ if (r < 0)
+ return r;
+
+ if (le_hash != o->data.hash)
+ return -EBADMSG;
+
+ l = le64toh(o->object.size) - offsetof(Object, data.payload);
+ t = (size_t) l;
+
+ /* We hit the limit on 32bit machines */
+ if ((uint64_t) t != l)
+ return -E2BIG;
+
+ if (o->object.flags & OBJECT_COMPRESSION_MASK) {
+#if HAVE_XZ || HAVE_LZ4
+ size_t rsize = 0;
+
+ r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
+ o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0);
+ if (r < 0)
+ return r;
+
+ data = from->compress_buffer;
+ l = rsize;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+ } else
+ data = o->data.payload;
+
+ r = journal_file_append_data(to, data, l, &u, &h);
+ if (r < 0)
+ return r;
+
+ xor_hash ^= le64toh(u->data.hash);
+ items[i].object_offset = htole64(h);
+ items[i].hash = u->data.hash;
+
+ r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+ }
+
+ r = journal_file_append_entry_internal(to, &ts, boot_id, xor_hash, items, n,
+ NULL, NULL, NULL);
+
+ if (mmap_cache_got_sigbus(to->mmap, to->cache_fd))
+ return -EIO;
+
+ return r;
+}
+
+void journal_reset_metrics(JournalMetrics *m) {
+ assert(m);
+
+ /* Set everything to "pick automatic values". */
+
+ *m = (JournalMetrics) {
+ .min_use = (uint64_t) -1,
+ .max_use = (uint64_t) -1,
+ .min_size = (uint64_t) -1,
+ .max_size = (uint64_t) -1,
+ .keep_free = (uint64_t) -1,
+ .n_max_files = (uint64_t) -1,
+ };
+}
+
+void journal_default_metrics(JournalMetrics *m, int fd) {
+ char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX], e[FORMAT_BYTES_MAX];
+ struct statvfs ss;
+ uint64_t fs_size;
+
+ assert(m);
+ assert(fd >= 0);
+
+ if (fstatvfs(fd, &ss) >= 0)
+ fs_size = ss.f_frsize * ss.f_blocks;
+ else {
+ log_debug_errno(errno, "Failed to determine disk size: %m");
+ fs_size = 0;
+ }
+
+ if (m->max_use == (uint64_t) -1) {
+
+ if (fs_size > 0) {
+ m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
+
+ if (m->max_use > DEFAULT_MAX_USE_UPPER)
+ m->max_use = DEFAULT_MAX_USE_UPPER;
+
+ if (m->max_use < DEFAULT_MAX_USE_LOWER)
+ m->max_use = DEFAULT_MAX_USE_LOWER;
+ } else
+ m->max_use = DEFAULT_MAX_USE_LOWER;
+ } else {
+ m->max_use = PAGE_ALIGN(m->max_use);
+
+ if (m->max_use != 0 && m->max_use < JOURNAL_FILE_SIZE_MIN*2)
+ m->max_use = JOURNAL_FILE_SIZE_MIN*2;
+ }
+
+ if (m->min_use == (uint64_t) -1)
+ m->min_use = DEFAULT_MIN_USE;
+
+ if (m->min_use > m->max_use)
+ m->min_use = m->max_use;
+
+ if (m->max_size == (uint64_t) -1) {
+ m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
+
+ if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
+ m->max_size = DEFAULT_MAX_SIZE_UPPER;
+ } else
+ m->max_size = PAGE_ALIGN(m->max_size);
+
+ if (m->max_size != 0) {
+ if (m->max_size < JOURNAL_FILE_SIZE_MIN)
+ m->max_size = JOURNAL_FILE_SIZE_MIN;
+
+ if (m->max_use != 0 && m->max_size*2 > m->max_use)
+ m->max_use = m->max_size*2;
+ }
+
+ if (m->min_size == (uint64_t) -1)
+ m->min_size = JOURNAL_FILE_SIZE_MIN;
+ else {
+ m->min_size = PAGE_ALIGN(m->min_size);
+
+ if (m->min_size < JOURNAL_FILE_SIZE_MIN)
+ m->min_size = JOURNAL_FILE_SIZE_MIN;
+
+ if (m->max_size != 0 && m->min_size > m->max_size)
+ m->max_size = m->min_size;
+ }
+
+ if (m->keep_free == (uint64_t) -1) {
+
+ if (fs_size > 0) {
+ m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
+
+ if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
+ m->keep_free = DEFAULT_KEEP_FREE_UPPER;
+
+ } else
+ m->keep_free = DEFAULT_KEEP_FREE;
+ }
+
+ if (m->n_max_files == (uint64_t) -1)
+ m->n_max_files = DEFAULT_N_MAX_FILES;
+
+ log_debug("Fixed min_use=%s max_use=%s max_size=%s min_size=%s keep_free=%s n_max_files=%" PRIu64,
+ format_bytes(a, sizeof(a), m->min_use),
+ format_bytes(b, sizeof(b), m->max_use),
+ format_bytes(c, sizeof(c), m->max_size),
+ format_bytes(d, sizeof(d), m->min_size),
+ format_bytes(e, sizeof(e), m->keep_free),
+ m->n_max_files);
+}
+
+int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
+ assert(f);
+ assert(f->header);
+ assert(from || to);
+
+ if (from) {
+ if (f->header->head_entry_realtime == 0)
+ return -ENOENT;
+
+ *from = le64toh(f->header->head_entry_realtime);
+ }
+
+ if (to) {
+ if (f->header->tail_entry_realtime == 0)
+ return -ENOENT;
+
+ *to = le64toh(f->header->tail_entry_realtime);
+ }
+
+ return 1;
+}
+
+int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
+ Object *o;
+ uint64_t p;
+ int r;
+
+ assert(f);
+ assert(from || to);
+
+ r = find_data_object_by_boot_id(f, boot_id, &o, &p);
+ if (r <= 0)
+ return r;
+
+ if (le64toh(o->data.n_entries) <= 0)
+ return 0;
+
+ if (from) {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
+ if (r < 0)
+ return r;
+
+ *from = le64toh(o->entry.monotonic);
+ }
+
+ if (to) {
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ r = generic_array_get_plus_one(f,
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset),
+ le64toh(o->data.n_entries)-1,
+ &o, NULL);
+ if (r <= 0)
+ return r;
+
+ *to = le64toh(o->entry.monotonic);
+ }
+
+ return 1;
+}
+
+bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
+ assert(f);
+ assert(f->header);
+
+ /* If we gained new header fields we gained new features,
+ * hence suggest a rotation */
+ if (le64toh(f->header->header_size) < sizeof(Header)) {
+ log_debug("%s uses an outdated header, suggesting rotation.", f->path);
+ return true;
+ }
+
+ /* Let's check if the hash tables grew over a certain fill
+ * level (75%, borrowing this value from Java's hash table
+ * implementation), and if so suggest a rotation. To calculate
+ * the fill level we need the n_data field, which only exists
+ * in newer versions. */
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+ if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
+ log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
+ f->path,
+ 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
+ le64toh(f->header->n_data),
+ le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
+ (unsigned long long) f->last_stat.st_size,
+ f->last_stat.st_size / le64toh(f->header->n_data));
+ return true;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+ if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
+ log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
+ f->path,
+ 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
+ le64toh(f->header->n_fields),
+ le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
+ return true;
+ }
+
+ /* Are the data objects properly indexed by field objects? */
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
+ JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
+ le64toh(f->header->n_data) > 0 &&
+ le64toh(f->header->n_fields) == 0)
+ return true;
+
+ if (max_file_usec > 0) {
+ usec_t t, h;
+
+ h = le64toh(f->header->head_entry_realtime);
+ t = now(CLOCK_REALTIME);
+
+ if (h > 0 && t > h + max_file_usec)
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
new file mode 100644
index 0000000..29e324d
--- /dev/null
+++ b/src/journal/journal-file.h
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+
+#if HAVE_GCRYPT
+# include <gcrypt.h>
+#endif
+
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "hashmap.h"
+#include "journal-def.h"
+#include "macro.h"
+#include "mmap-cache.h"
+#include "sparse-endian.h"
+
+typedef struct JournalMetrics {
+ /* For all these: -1 means "pick automatically", and 0 means "no limit enforced" */
+ uint64_t max_size; /* how large journal files grow at max */
+ uint64_t min_size; /* how large journal files grow at least */
+ uint64_t max_use; /* how much disk space to use in total at max, keep_free permitting */
+ uint64_t min_use; /* how much disk space to use in total at least, even if keep_free says not to */
+ uint64_t keep_free; /* how much to keep free on disk */
+ uint64_t n_max_files; /* how many files to keep around at max */
+} JournalMetrics;
+
+typedef enum direction {
+ DIRECTION_UP,
+ DIRECTION_DOWN
+} direction_t;
+
+typedef enum LocationType {
+ /* The first and last entries, resp. */
+ LOCATION_HEAD,
+ LOCATION_TAIL,
+
+ /* We already read the entry we currently point to, and the
+ * next one to read should probably not be this one again. */
+ LOCATION_DISCRETE,
+
+ /* We should seek to the precise location specified, and
+ * return it, as we haven't read it yet. */
+ LOCATION_SEEK
+} LocationType;
+
+typedef enum OfflineState {
+ OFFLINE_JOINED,
+ OFFLINE_SYNCING,
+ OFFLINE_OFFLINING,
+ OFFLINE_CANCEL,
+ OFFLINE_AGAIN_FROM_SYNCING,
+ OFFLINE_AGAIN_FROM_OFFLINING,
+ OFFLINE_DONE
+} OfflineState;
+
+typedef struct JournalFile {
+ int fd;
+ MMapFileDescriptor *cache_fd;
+
+ mode_t mode;
+
+ int flags;
+ int prot;
+ bool writable:1;
+ bool compress_xz:1;
+ bool compress_lz4:1;
+ bool seal:1;
+ bool defrag_on_close:1;
+ bool close_fd:1;
+ bool archive:1;
+
+ direction_t last_direction;
+ LocationType location_type;
+ uint64_t last_n_entries;
+
+ char *path;
+ struct stat last_stat;
+ usec_t last_stat_usec;
+
+ Header *header;
+ HashItem *data_hash_table;
+ HashItem *field_hash_table;
+
+ uint64_t current_offset;
+ uint64_t current_seqnum;
+ uint64_t current_realtime;
+ uint64_t current_monotonic;
+ sd_id128_t current_boot_id;
+ uint64_t current_xor_hash;
+
+ JournalMetrics metrics;
+ MMapCache *mmap;
+
+ sd_event_source *post_change_timer;
+ usec_t post_change_timer_period;
+
+ OrderedHashmap *chain_cache;
+
+ pthread_t offline_thread;
+ volatile OfflineState offline_state;
+
+ unsigned last_seen_generation;
+
+ uint64_t compress_threshold_bytes;
+#if HAVE_XZ || HAVE_LZ4
+ void *compress_buffer;
+ size_t compress_buffer_size;
+#endif
+
+#if HAVE_GCRYPT
+ gcry_md_hd_t hmac;
+ bool hmac_running;
+
+ FSSHeader *fss_file;
+ size_t fss_file_size;
+
+ uint64_t fss_start_usec;
+ uint64_t fss_interval_usec;
+
+ void *fsprg_state;
+ size_t fsprg_state_size;
+
+ void *fsprg_seed;
+ size_t fsprg_seed_size;
+#endif
+} JournalFile;
+
+int journal_file_open(
+ int fd,
+ const char *fname,
+ int flags,
+ mode_t mode,
+ bool compress,
+ uint64_t compress_threshold_bytes,
+ bool seal,
+ JournalMetrics *metrics,
+ MMapCache *mmap_cache,
+ Set *deferred_closes,
+ JournalFile *template,
+ JournalFile **ret);
+
+int journal_file_set_offline(JournalFile *f, bool wait);
+bool journal_file_is_offlining(JournalFile *f);
+JournalFile* journal_file_close(JournalFile *j);
+
+int journal_file_open_reliably(
+ const char *fname,
+ int flags,
+ mode_t mode,
+ bool compress,
+ uint64_t compress_threshold_bytes,
+ bool seal,
+ JournalMetrics *metrics,
+ MMapCache *mmap_cache,
+ Set *deferred_closes,
+ JournalFile *template,
+ JournalFile **ret);
+
+#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
+#define VALID64(x) (((x) & 7ULL) == 0ULL)
+
+/* Use six characters to cover the offsets common in smallish journal
+ * files without adding too many zeros. */
+#define OFSfmt "%06"PRIx64
+
+static inline bool VALID_REALTIME(uint64_t u) {
+ /* This considers timestamps until the year 3112 valid. That should be plenty room... */
+ return u > 0 && u < (1ULL << 55);
+}
+
+static inline bool VALID_MONOTONIC(uint64_t u) {
+ /* This considers timestamps until 1142 years of runtime valid. */
+ return u < (1ULL << 55);
+}
+
+static inline bool VALID_EPOCH(uint64_t u) {
+ /* This allows changing the key for 1142 years, every usec. */
+ return u < (1ULL << 55);
+}
+
+#define JOURNAL_HEADER_CONTAINS(h, field) \
+ (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
+
+#define JOURNAL_HEADER_SEALED(h) \
+ (!!(le32toh((h)->compatible_flags) & HEADER_COMPATIBLE_SEALED))
+
+#define JOURNAL_HEADER_COMPRESSED_XZ(h) \
+ (!!(le32toh((h)->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED_XZ))
+
+#define JOURNAL_HEADER_COMPRESSED_LZ4(h) \
+ (!!(le32toh((h)->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED_LZ4))
+
+int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret);
+
+uint64_t journal_file_entry_n_items(Object *o) _pure_;
+uint64_t journal_file_entry_array_n_items(Object *o) _pure_;
+uint64_t journal_file_hash_table_n_items(Object *o) _pure_;
+
+int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *offset);
+int journal_file_append_entry(
+ JournalFile *f,
+ const dual_timestamp *ts,
+ const sd_id128_t *boot_id,
+ const struct iovec iovec[], unsigned n_iovec,
+ uint64_t *seqno,
+ Object **ret,
+ uint64_t *offset);
+
+int journal_file_find_data_object(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset);
+int journal_file_find_data_object_with_hash(JournalFile *f, const void *data, uint64_t size, uint64_t hash, Object **ret, uint64_t *offset);
+
+int journal_file_find_field_object(JournalFile *f, const void *field, uint64_t size, Object **ret, uint64_t *offset);
+int journal_file_find_field_object_with_hash(JournalFile *f, const void *field, uint64_t size, uint64_t hash, Object **ret, uint64_t *offset);
+
+void journal_file_reset_location(JournalFile *f);
+void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset);
+int journal_file_compare_locations(JournalFile *af, JournalFile *bf);
+int journal_file_next_entry(JournalFile *f, uint64_t p, direction_t direction, Object **ret, uint64_t *offset);
+
+int journal_file_next_entry_for_data(JournalFile *f, Object *o, uint64_t p, uint64_t data_offset, direction_t direction, Object **ret, uint64_t *offset);
+
+int journal_file_move_to_entry_by_seqnum(JournalFile *f, uint64_t seqnum, direction_t direction, Object **ret, uint64_t *offset);
+int journal_file_move_to_entry_by_realtime(JournalFile *f, uint64_t realtime, direction_t direction, Object **ret, uint64_t *offset);
+int journal_file_move_to_entry_by_monotonic(JournalFile *f, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret, uint64_t *offset);
+
+int journal_file_move_to_entry_by_offset_for_data(JournalFile *f, uint64_t data_offset, uint64_t p, direction_t direction, Object **ret, uint64_t *offset);
+int journal_file_move_to_entry_by_seqnum_for_data(JournalFile *f, uint64_t data_offset, uint64_t seqnum, direction_t direction, Object **ret, uint64_t *offset);
+int journal_file_move_to_entry_by_realtime_for_data(JournalFile *f, uint64_t data_offset, uint64_t realtime, direction_t direction, Object **ret, uint64_t *offset);
+int journal_file_move_to_entry_by_monotonic_for_data(JournalFile *f, uint64_t data_offset, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret, uint64_t *offset);
+
+int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p);
+
+void journal_file_dump(JournalFile *f);
+void journal_file_print_header(JournalFile *f);
+
+int journal_file_archive(JournalFile *f);
+JournalFile* journal_initiate_close(JournalFile *f, Set *deferred_closes);
+int journal_file_rotate(JournalFile **f, bool compress, uint64_t compress_threshold_bytes, bool seal, Set *deferred_closes);
+
+int journal_file_dispose(int dir_fd, const char *fname);
+
+void journal_file_post_change(JournalFile *f);
+int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t);
+
+void journal_reset_metrics(JournalMetrics *m);
+void journal_default_metrics(JournalMetrics *m, int fd);
+
+int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to);
+int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot, usec_t *from, usec_t *to);
+
+bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec);
+
+int journal_file_map_data_hash_table(JournalFile *f);
+int journal_file_map_field_hash_table(JournalFile *f);
+
+static inline bool JOURNAL_FILE_COMPRESS(JournalFile *f) {
+ assert(f);
+ return f->compress_xz || f->compress_lz4;
+}
diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h
new file mode 100644
index 0000000..80a69da
--- /dev/null
+++ b/src/journal/journal-internal.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+#include "sd-journal.h"
+
+#include "hashmap.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "list.h"
+#include "set.h"
+
+typedef struct Match Match;
+typedef struct Location Location;
+typedef struct Directory Directory;
+
+typedef enum MatchType {
+ MATCH_DISCRETE,
+ MATCH_OR_TERM,
+ MATCH_AND_TERM
+} MatchType;
+
+struct Match {
+ MatchType type;
+ Match *parent;
+ LIST_FIELDS(Match, matches);
+
+ /* For concrete matches */
+ char *data;
+ size_t size;
+ le64_t le_hash;
+
+ /* For terms */
+ LIST_HEAD(Match, matches);
+};
+
+struct Location {
+ LocationType type;
+
+ bool seqnum_set;
+ bool realtime_set;
+ bool monotonic_set;
+ bool xor_hash_set;
+
+ uint64_t seqnum;
+ sd_id128_t seqnum_id;
+
+ uint64_t realtime;
+
+ uint64_t monotonic;
+ sd_id128_t boot_id;
+
+ uint64_t xor_hash;
+};
+
+struct Directory {
+ char *path;
+ int wd;
+ bool is_root;
+ unsigned last_seen_generation;
+};
+
+struct sd_journal {
+ int toplevel_fd;
+
+ char *path;
+ char *prefix;
+
+ OrderedHashmap *files;
+ IteratedCache *files_cache;
+ MMapCache *mmap;
+
+ Location current_location;
+
+ JournalFile *current_file;
+ uint64_t current_field;
+
+ Match *level0, *level1, *level2;
+
+ pid_t original_pid;
+
+ int inotify_fd;
+ unsigned current_invalidate_counter, last_invalidate_counter;
+ usec_t last_process_usec;
+ unsigned generation;
+
+ /* Iterating through unique fields and their data values */
+ char *unique_field;
+ JournalFile *unique_file;
+ uint64_t unique_offset;
+
+ /* Iterating through known fields */
+ JournalFile *fields_file;
+ uint64_t fields_offset;
+ uint64_t fields_hash_table_index;
+ char *fields_buffer;
+ size_t fields_buffer_allocated;
+
+ int flags;
+
+ bool on_network:1;
+ bool no_new_files:1;
+ bool no_inotify:1;
+ bool unique_file_lost:1; /* File we were iterating over got
+ removed, and there were no more
+ files, so sd_j_enumerate_unique
+ will return a value equal to 0. */
+ bool fields_file_lost:1;
+ bool has_runtime_files:1;
+ bool has_persistent_files:1;
+
+ size_t data_threshold;
+
+ Hashmap *directories_by_path;
+ Hashmap *directories_by_wd;
+
+ Hashmap *errors;
+};
+
+char *journal_make_match_string(sd_journal *j);
+void journal_print_header(sd_journal *j);
+
+#define JOURNAL_FOREACH_DATA_RETVAL(j, data, l, retval) \
+ for (sd_journal_restart_data(j); ((retval) = sd_journal_enumerate_data((j), &(data), &(l))) > 0; )
diff --git a/src/journal/journal-qrcode.c b/src/journal/journal-qrcode.c
new file mode 100644
index 0000000..35d6ec4
--- /dev/null
+++ b/src/journal/journal-qrcode.c
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <qrencode.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+
+#include "journal-qrcode.h"
+#include "macro.h"
+
+#define WHITE_ON_BLACK "\033[40;37;1m"
+#define NORMAL "\033[0m"
+
+static void print_border(FILE *output, unsigned width) {
+ unsigned x, y;
+
+ /* Four rows of border */
+ for (y = 0; y < 4; y += 2) {
+ fputs(WHITE_ON_BLACK, output);
+
+ for (x = 0; x < 4 + width + 4; x++)
+ fputs("\342\226\210", output);
+
+ fputs(NORMAL "\n", output);
+ }
+}
+
+int print_qr_code(
+ FILE *output,
+ const void *seed,
+ size_t seed_size,
+ uint64_t start,
+ uint64_t interval,
+ const char *hn,
+ sd_id128_t machine) {
+
+ FILE *f;
+ char *url = NULL;
+ size_t url_size = 0, i;
+ QRcode* qr;
+ unsigned x, y;
+
+ assert(seed);
+ assert(seed_size > 0);
+
+ f = open_memstream(&url, &url_size);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fputs("fss://", f);
+
+ for (i = 0; i < seed_size; i++) {
+ if (i > 0 && i % 3 == 0)
+ fputc('-', f);
+ fprintf(f, "%02x", ((uint8_t*) seed)[i]);
+ }
+
+ fprintf(f, "/%"PRIx64"-%"PRIx64"?machine=" SD_ID128_FORMAT_STR,
+ start,
+ interval,
+ SD_ID128_FORMAT_VAL(machine));
+
+ if (hn)
+ fprintf(f, ";hostname=%s", hn);
+
+ if (ferror(f)) {
+ fclose(f);
+ free(url);
+ return -ENOMEM;
+ }
+
+ fclose(f);
+
+ qr = QRcode_encodeString(url, 0, QR_ECLEVEL_L, QR_MODE_8, 1);
+ free(url);
+
+ if (!qr)
+ return -ENOMEM;
+
+ print_border(output, qr->width);
+
+ for (y = 0; y < (unsigned) qr->width; y += 2) {
+ const uint8_t *row1, *row2;
+
+ row1 = qr->data + qr->width * y;
+ row2 = row1 + qr->width;
+
+ fputs(WHITE_ON_BLACK, output);
+ for (x = 0; x < 4; x++)
+ fputs("\342\226\210", output);
+
+ for (x = 0; x < (unsigned) qr->width; x ++) {
+ bool a, b;
+
+ a = row1[x] & 1;
+ b = (y+1) < (unsigned) qr->width ? (row2[x] & 1) : false;
+
+ if (a && b)
+ fputc(' ', output);
+ else if (a)
+ fputs("\342\226\204", output);
+ else if (b)
+ fputs("\342\226\200", output);
+ else
+ fputs("\342\226\210", output);
+ }
+
+ for (x = 0; x < 4; x++)
+ fputs("\342\226\210", output);
+ fputs(NORMAL "\n", output);
+ }
+
+ print_border(output, qr->width);
+
+ QRcode_free(qr);
+ return 0;
+}
diff --git a/src/journal/journal-qrcode.h b/src/journal/journal-qrcode.h
new file mode 100644
index 0000000..0774608
--- /dev/null
+++ b/src/journal/journal-qrcode.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "sd-id128.h"
+
+int print_qr_code(FILE *f, const void *seed, size_t seed_size, uint64_t start, uint64_t interval, const char *hn, sd_id128_t machine);
diff --git a/src/journal/journal-send.c b/src/journal/journal-send.c
new file mode 100644
index 0000000..8618454
--- /dev/null
+++ b/src/journal/journal-send.c
@@ -0,0 +1,543 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <printf.h>
+#include <stddef.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#define SD_JOURNAL_SUPPRESS_LOCATION
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "memfd-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+#define ALLOCA_CODE_FUNC(f, func) \
+ do { \
+ size_t _fl; \
+ const char *_func = (func); \
+ char **_f = &(f); \
+ _fl = strlen(_func) + 1; \
+ *_f = newa(char, _fl + 10); \
+ memcpy(*_f, "CODE_FUNC=", 10); \
+ memcpy(*_f + 10, _func, _fl); \
+ } while (false)
+
+/* We open a single fd, and we'll share it with the current process,
+ * all its threads, and all its subprocesses. This means we need to
+ * initialize it atomically, and need to operate on it atomically
+ * never assuming we are the only user */
+
+static int journal_fd(void) {
+ int fd;
+ static int fd_plus_one = 0;
+
+retry:
+ if (fd_plus_one > 0)
+ return fd_plus_one - 1;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ if (!__sync_bool_compare_and_swap(&fd_plus_one, 0, fd+1)) {
+ safe_close(fd);
+ goto retry;
+ }
+
+ return fd;
+}
+
+_public_ int sd_journal_print(int priority, const char *format, ...) {
+ int r;
+ va_list ap;
+
+ va_start(ap, format);
+ r = sd_journal_printv(priority, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_journal_printv(int priority, const char *format, va_list ap) {
+
+ /* FIXME: Instead of limiting things to LINE_MAX we could do a
+ C99 variable-length array on the stack here in a loop. */
+
+ char buffer[8 + LINE_MAX], p[STRLEN("PRIORITY=") + DECIMAL_STR_MAX(int) + 1];
+ struct iovec iov[2];
+
+ assert_return(priority >= 0, -EINVAL);
+ assert_return(priority <= 7, -EINVAL);
+ assert_return(format, -EINVAL);
+
+ xsprintf(p, "PRIORITY=%i", priority & LOG_PRIMASK);
+
+ memcpy(buffer, "MESSAGE=", 8);
+ vsnprintf(buffer+8, sizeof(buffer) - 8, format, ap);
+
+ /* Strip trailing whitespace, keep prefix whitespace. */
+ (void) strstrip(buffer);
+
+ /* Suppress empty lines */
+ if (isempty(buffer+8))
+ return 0;
+
+ iov[0] = IOVEC_MAKE_STRING(buffer);
+ iov[1] = IOVEC_MAKE_STRING(p);
+
+ return sd_journal_sendv(iov, 2);
+}
+
+_printf_(1, 0) static int fill_iovec_sprintf(const char *format, va_list ap, int extra, struct iovec **_iov) {
+ PROTECT_ERRNO;
+ int r, n = 0, i = 0, j;
+ struct iovec *iov = NULL;
+
+ assert(_iov);
+
+ if (extra > 0) {
+ n = MAX(extra * 2, extra + 4);
+ iov = malloc0(n * sizeof(struct iovec));
+ if (!iov) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ i = extra;
+ }
+
+ while (format) {
+ struct iovec *c;
+ char *buffer;
+ va_list aq;
+
+ if (i >= n) {
+ n = MAX(i*2, 4);
+ c = realloc(iov, n * sizeof(struct iovec));
+ if (!c) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ iov = c;
+ }
+
+ va_copy(aq, ap);
+ if (vasprintf(&buffer, format, aq) < 0) {
+ va_end(aq);
+ r = -ENOMEM;
+ goto fail;
+ }
+ va_end(aq);
+
+ VA_FORMAT_ADVANCE(format, ap);
+
+ (void) strstrip(buffer); /* strip trailing whitespace, keep prefixing whitespace */
+
+ iov[i++] = IOVEC_MAKE_STRING(buffer);
+
+ format = va_arg(ap, char *);
+ }
+
+ *_iov = iov;
+
+ return i;
+
+fail:
+ for (j = 0; j < i; j++)
+ free(iov[j].iov_base);
+
+ free(iov);
+
+ return r;
+}
+
+_public_ int sd_journal_send(const char *format, ...) {
+ int r, i, j;
+ va_list ap;
+ struct iovec *iov = NULL;
+
+ va_start(ap, format);
+ i = fill_iovec_sprintf(format, ap, 0, &iov);
+ va_end(ap);
+
+ if (_unlikely_(i < 0)) {
+ r = i;
+ goto finish;
+ }
+
+ r = sd_journal_sendv(iov, i);
+
+finish:
+ for (j = 0; j < i; j++)
+ free(iov[j].iov_base);
+
+ free(iov);
+
+ return r;
+}
+
+_public_ int sd_journal_sendv(const struct iovec *iov, int n) {
+ PROTECT_ERRNO;
+ int fd, r;
+ _cleanup_close_ int buffer_fd = -1;
+ struct iovec *w;
+ uint64_t *l;
+ int i, j = 0;
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/socket",
+ };
+ struct msghdr mh = {
+ .msg_name = (struct sockaddr*) &sa.sa,
+ .msg_namelen = SOCKADDR_UN_LEN(sa.un),
+ };
+ ssize_t k;
+ bool have_syslog_identifier = false;
+ bool seal = true;
+
+ assert_return(iov, -EINVAL);
+ assert_return(n > 0, -EINVAL);
+
+ w = newa(struct iovec, n * 5 + 3);
+ l = newa(uint64_t, n);
+
+ for (i = 0; i < n; i++) {
+ char *c, *nl;
+
+ if (_unlikely_(!iov[i].iov_base || iov[i].iov_len <= 1))
+ return -EINVAL;
+
+ c = memchr(iov[i].iov_base, '=', iov[i].iov_len);
+ if (_unlikely_(!c || c == iov[i].iov_base))
+ return -EINVAL;
+
+ have_syslog_identifier = have_syslog_identifier ||
+ (c == (char *) iov[i].iov_base + 17 &&
+ startswith(iov[i].iov_base, "SYSLOG_IDENTIFIER"));
+
+ nl = memchr(iov[i].iov_base, '\n', iov[i].iov_len);
+ if (nl) {
+ if (_unlikely_(nl < c))
+ return -EINVAL;
+
+ /* Already includes a newline? Bummer, then
+ * let's write the variable name, then a
+ * newline, then the size (64bit LE), followed
+ * by the data and a final newline */
+
+ w[j++] = IOVEC_MAKE(iov[i].iov_base, c - (char*) iov[i].iov_base);
+ w[j++] = IOVEC_MAKE_STRING("\n");
+
+ l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
+ w[j++] = IOVEC_MAKE(&l[i], sizeof(uint64_t));
+
+ w[j++] = IOVEC_MAKE(c + 1, iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
+ } else
+ /* Nothing special? Then just add the line and
+ * append a newline */
+ w[j++] = iov[i];
+
+ w[j++] = IOVEC_MAKE_STRING("\n");
+ }
+
+ if (!have_syslog_identifier &&
+ string_is_safe(program_invocation_short_name)) {
+
+ /* Implicitly add program_invocation_short_name, if it
+ * is not set explicitly. We only do this for
+ * program_invocation_short_name, and nothing else
+ * since everything else is much nicer to retrieve
+ * from the outside. */
+
+ w[j++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=");
+ w[j++] = IOVEC_MAKE_STRING(program_invocation_short_name);
+ w[j++] = IOVEC_MAKE_STRING("\n");
+ }
+
+ fd = journal_fd();
+ if (_unlikely_(fd < 0))
+ return fd;
+
+ mh.msg_iov = w;
+ mh.msg_iovlen = j;
+
+ k = sendmsg(fd, &mh, MSG_NOSIGNAL);
+ if (k >= 0)
+ return 0;
+
+ /* Fail silently if the journal is not available */
+ if (errno == ENOENT)
+ return 0;
+
+ if (!IN_SET(errno, EMSGSIZE, ENOBUFS))
+ return -errno;
+
+ /* Message doesn't fit... Let's dump the data in a memfd or
+ * temporary file and just pass a file descriptor of it to the
+ * other side.
+ *
+ * For the temporary files we use /dev/shm instead of /tmp
+ * here, since we want this to be a tmpfs, and one that is
+ * available from early boot on and where unprivileged users
+ * can create files. */
+ buffer_fd = memfd_new(NULL);
+ if (buffer_fd < 0) {
+ if (buffer_fd == -ENOSYS) {
+ buffer_fd = open_tmpfile_unlinkable("/dev/shm", O_RDWR | O_CLOEXEC);
+ if (buffer_fd < 0)
+ return buffer_fd;
+
+ seal = false;
+ } else
+ return buffer_fd;
+ }
+
+ n = writev(buffer_fd, w, j);
+ if (n < 0)
+ return -errno;
+
+ if (seal) {
+ r = memfd_set_sealed(buffer_fd);
+ if (r < 0)
+ return r;
+ }
+
+ r = send_one_fd_sa(fd, buffer_fd, mh.msg_name, mh.msg_namelen, 0);
+ if (r == -ENOENT)
+ /* Fail silently if the journal is not available */
+ return 0;
+ return r;
+}
+
+static int fill_iovec_perror_and_send(const char *message, int skip, struct iovec iov[]) {
+ PROTECT_ERRNO;
+ size_t n, k;
+
+ k = isempty(message) ? 0 : strlen(message) + 2;
+ n = 8 + k + 256 + 1;
+
+ for (;;) {
+ char buffer[n];
+ char* j;
+
+ errno = 0;
+ j = strerror_r(_saved_errno_, buffer + 8 + k, n - 8 - k);
+ if (errno == 0) {
+ char error[STRLEN("ERRNO=") + DECIMAL_STR_MAX(int) + 1];
+
+ if (j != buffer + 8 + k)
+ memmove(buffer + 8 + k, j, strlen(j)+1);
+
+ memcpy(buffer, "MESSAGE=", 8);
+
+ if (k > 0) {
+ memcpy(buffer + 8, message, k - 2);
+ memcpy(buffer + 8 + k - 2, ": ", 2);
+ }
+
+ xsprintf(error, "ERRNO=%i", _saved_errno_);
+
+ assert_cc(3 == LOG_ERR);
+ iov[skip+0] = IOVEC_MAKE_STRING("PRIORITY=3");
+ iov[skip+1] = IOVEC_MAKE_STRING(buffer);
+ iov[skip+2] = IOVEC_MAKE_STRING(error);
+
+ return sd_journal_sendv(iov, skip + 3);
+ }
+
+ if (errno != ERANGE)
+ return -errno;
+
+ n *= 2;
+ }
+}
+
+_public_ int sd_journal_perror(const char *message) {
+ struct iovec iovec[3];
+
+ return fill_iovec_perror_and_send(message, 0, iovec);
+}
+
+_public_ int sd_journal_stream_fd(const char *identifier, int priority, int level_prefix) {
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/stdout",
+ };
+ _cleanup_close_ int fd = -1;
+ char *header;
+ size_t l;
+ int r;
+
+ assert_return(priority >= 0, -EINVAL);
+ assert_return(priority <= 7, -EINVAL);
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return -errno;
+
+ if (shutdown(fd, SHUT_RD) < 0)
+ return -errno;
+
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ identifier = strempty(identifier);
+
+ l = strlen(identifier);
+ header = newa(char, l + 1 + 1 + 2 + 2 + 2 + 2 + 2);
+
+ memcpy(header, identifier, l);
+ header[l++] = '\n';
+ header[l++] = '\n'; /* unit id */
+ header[l++] = '0' + priority;
+ header[l++] = '\n';
+ header[l++] = '0' + !!level_prefix;
+ header[l++] = '\n';
+ header[l++] = '0';
+ header[l++] = '\n';
+ header[l++] = '0';
+ header[l++] = '\n';
+ header[l++] = '0';
+ header[l++] = '\n';
+
+ r = loop_write(fd, header, l, false);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(fd);
+}
+
+_public_ int sd_journal_print_with_location(int priority, const char *file, const char *line, const char *func, const char *format, ...) {
+ int r;
+ va_list ap;
+
+ va_start(ap, format);
+ r = sd_journal_printv_with_location(priority, file, line, func, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_journal_printv_with_location(int priority, const char *file, const char *line, const char *func, const char *format, va_list ap) {
+ char buffer[8 + LINE_MAX], p[STRLEN("PRIORITY=") + DECIMAL_STR_MAX(int) + 1];
+ struct iovec iov[5];
+ char *f;
+
+ assert_return(priority >= 0, -EINVAL);
+ assert_return(priority <= 7, -EINVAL);
+ assert_return(format, -EINVAL);
+
+ xsprintf(p, "PRIORITY=%i", priority & LOG_PRIMASK);
+
+ memcpy(buffer, "MESSAGE=", 8);
+ vsnprintf(buffer+8, sizeof(buffer) - 8, format, ap);
+
+ /* Strip trailing whitespace, keep prefixing whitespace */
+ (void) strstrip(buffer);
+
+ /* Suppress empty lines */
+ if (isempty(buffer+8))
+ return 0;
+
+ /* func is initialized from __func__ which is not a macro, but
+ * a static const char[], hence cannot easily be prefixed with
+ * CODE_FUNC=, hence let's do it manually here. */
+ ALLOCA_CODE_FUNC(f, func);
+
+ iov[0] = IOVEC_MAKE_STRING(buffer);
+ iov[1] = IOVEC_MAKE_STRING(p);
+ iov[2] = IOVEC_MAKE_STRING(file);
+ iov[3] = IOVEC_MAKE_STRING(line);
+ iov[4] = IOVEC_MAKE_STRING(f);
+
+ return sd_journal_sendv(iov, ELEMENTSOF(iov));
+}
+
+_public_ int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) {
+ _cleanup_free_ struct iovec *iov = NULL;
+ int r, i, j;
+ va_list ap;
+ char *f;
+
+ va_start(ap, format);
+ i = fill_iovec_sprintf(format, ap, 3, &iov);
+ va_end(ap);
+
+ if (_unlikely_(i < 0)) {
+ r = i;
+ goto finish;
+ }
+
+ ALLOCA_CODE_FUNC(f, func);
+
+ iov[0] = IOVEC_MAKE_STRING(file);
+ iov[1] = IOVEC_MAKE_STRING(line);
+ iov[2] = IOVEC_MAKE_STRING(f);
+
+ r = sd_journal_sendv(iov, i);
+
+finish:
+ for (j = 3; j < i; j++)
+ free(iov[j].iov_base);
+
+ return r;
+}
+
+_public_ int sd_journal_sendv_with_location(
+ const char *file, const char *line,
+ const char *func,
+ const struct iovec *iov, int n) {
+
+ struct iovec *niov;
+ char *f;
+
+ assert_return(iov, -EINVAL);
+ assert_return(n > 0, -EINVAL);
+
+ niov = newa(struct iovec, n + 3);
+ memcpy(niov, iov, sizeof(struct iovec) * n);
+
+ ALLOCA_CODE_FUNC(f, func);
+
+ niov[n++] = IOVEC_MAKE_STRING(file);
+ niov[n++] = IOVEC_MAKE_STRING(line);
+ niov[n++] = IOVEC_MAKE_STRING(f);
+
+ return sd_journal_sendv(niov, n);
+}
+
+_public_ int sd_journal_perror_with_location(
+ const char *file, const char *line,
+ const char *func,
+ const char *message) {
+
+ struct iovec iov[6];
+ char *f;
+
+ ALLOCA_CODE_FUNC(f, func);
+
+ iov[0] = IOVEC_MAKE_STRING(file);
+ iov[1] = IOVEC_MAKE_STRING(line);
+ iov[2] = IOVEC_MAKE_STRING(f);
+
+ return fill_iovec_perror_and_send(message, 3, iov);
+}
diff --git a/src/journal/journal-vacuum.c b/src/journal/journal-vacuum.c
new file mode 100644
index 0000000..2778ce4
--- /dev/null
+++ b/src/journal/journal-vacuum.c
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-vacuum.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "util.h"
+#include "xattr-util.h"
+
+struct vacuum_info {
+ uint64_t usage;
+ char *filename;
+
+ uint64_t realtime;
+
+ sd_id128_t seqnum_id;
+ uint64_t seqnum;
+ bool have_seqnum;
+};
+
+static int vacuum_compare(const struct vacuum_info *a, const struct vacuum_info *b) {
+ int r;
+
+ if (a->have_seqnum && b->have_seqnum &&
+ sd_id128_equal(a->seqnum_id, b->seqnum_id))
+ return CMP(a->seqnum, b->seqnum);
+
+ r = CMP(a->realtime, b->realtime);
+ if (r != 0)
+ return r;
+
+ if (a->have_seqnum && b->have_seqnum)
+ return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
+
+ return strcmp(a->filename, b->filename);
+}
+
+static void patch_realtime(
+ int fd,
+ const char *fn,
+ const struct stat *st,
+ unsigned long long *realtime) {
+
+ usec_t x, crtime = 0;
+
+ /* The timestamp was determined by the file name, but let's
+ * see if the file might actually be older than the file name
+ * suggested... */
+
+ assert(fd >= 0);
+ assert(fn);
+ assert(st);
+ assert(realtime);
+
+ x = timespec_load(&st->st_ctim);
+ if (x > 0 && x != USEC_INFINITY && x < *realtime)
+ *realtime = x;
+
+ x = timespec_load(&st->st_atim);
+ if (x > 0 && x != USEC_INFINITY && x < *realtime)
+ *realtime = x;
+
+ x = timespec_load(&st->st_mtim);
+ if (x > 0 && x != USEC_INFINITY && x < *realtime)
+ *realtime = x;
+
+ /* Let's read the original creation time, if possible. Ideally
+ * we'd just query the creation time the FS might provide, but
+ * unfortunately there's currently no sane API to query
+ * it. Hence let's implement this manually... */
+
+ if (fd_getcrtime_at(fd, fn, &crtime, 0) >= 0) {
+ if (crtime < *realtime)
+ *realtime = crtime;
+ }
+}
+
+static int journal_file_empty(int dir_fd, const char *name) {
+ _cleanup_close_ int fd;
+ struct stat st;
+ le64_t n_entries;
+ ssize_t n;
+
+ fd = openat(dir_fd, name, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK|O_NOATIME);
+ if (fd < 0) {
+ /* Maybe failed due to O_NOATIME and lack of privileges? */
+ fd = openat(dir_fd, name, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* If an offline file doesn't even have a header we consider it empty */
+ if (st.st_size < (off_t) sizeof(Header))
+ return 1;
+
+ /* If the number of entries is empty, we consider it empty, too */
+ n = pread(fd, &n_entries, sizeof(n_entries), offsetof(Header, n_entries));
+ if (n < 0)
+ return -errno;
+ if (n != sizeof(n_entries))
+ return -EIO;
+
+ return le64toh(n_entries) <= 0;
+}
+
+int journal_directory_vacuum(
+ const char *directory,
+ uint64_t max_use,
+ uint64_t n_max_files,
+ usec_t max_retention_usec,
+ usec_t *oldest_usec,
+ bool verbose) {
+
+ uint64_t sum = 0, freed = 0, n_active_files = 0;
+ size_t n_list = 0, n_allocated = 0, i;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct vacuum_info *list = NULL;
+ usec_t retention_limit = 0;
+ char sbytes[FORMAT_BYTES_MAX];
+ struct dirent *de;
+ int r;
+
+ assert(directory);
+
+ if (max_use <= 0 && max_retention_usec <= 0 && n_max_files <= 0)
+ return 0;
+
+ if (max_retention_usec > 0)
+ retention_limit = usec_sub_unsigned(now(CLOCK_REALTIME), max_retention_usec);
+
+ d = opendir(directory);
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(de, d, r = -errno; goto finish) {
+
+ unsigned long long seqnum = 0, realtime;
+ _cleanup_free_ char *p = NULL;
+ sd_id128_t seqnum_id;
+ bool have_seqnum;
+ uint64_t size;
+ struct stat st;
+ size_t q;
+
+ if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ log_debug_errno(errno, "Failed to stat file %s while vacuuming, ignoring: %m", de->d_name);
+ continue;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ q = strlen(de->d_name);
+
+ if (endswith(de->d_name, ".journal")) {
+
+ /* Vacuum archived files. Active files are
+ * left around */
+
+ if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8) {
+ n_active_files++;
+ continue;
+ }
+
+ if (de->d_name[q-8-16-1] != '-' ||
+ de->d_name[q-8-16-1-16-1] != '-' ||
+ de->d_name[q-8-16-1-16-1-32-1] != '@') {
+ n_active_files++;
+ continue;
+ }
+
+ p = strdup(de->d_name);
+ if (!p) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ de->d_name[q-8-16-1-16-1] = 0;
+ if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
+ n_active_files++;
+ continue;
+ }
+
+ if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
+ n_active_files++;
+ continue;
+ }
+
+ have_seqnum = true;
+
+ } else if (endswith(de->d_name, ".journal~")) {
+ unsigned long long tmp;
+
+ /* Vacuum corrupted files */
+
+ if (q < 1 + 16 + 1 + 16 + 8 + 1) {
+ n_active_files++;
+ continue;
+ }
+
+ if (de->d_name[q-1-8-16-1] != '-' ||
+ de->d_name[q-1-8-16-1-16-1] != '@') {
+ n_active_files++;
+ continue;
+ }
+
+ p = strdup(de->d_name);
+ if (!p) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
+ n_active_files++;
+ continue;
+ }
+
+ have_seqnum = false;
+ } else {
+ /* We do not vacuum unknown files! */
+ log_debug("Not vacuuming unknown file %s.", de->d_name);
+ continue;
+ }
+
+ size = 512UL * (uint64_t) st.st_blocks;
+
+ r = journal_file_empty(dirfd(d), p);
+ if (r < 0) {
+ log_debug_errno(r, "Failed check if %s is empty, ignoring: %m", p);
+ continue;
+ }
+ if (r > 0) {
+ /* Always vacuum empty non-online files. */
+
+ r = unlinkat_deallocate(dirfd(d), p, 0);
+ if (r >= 0) {
+
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Deleted empty archived journal %s/%s (%s).", directory, p, format_bytes(sbytes, sizeof(sbytes), size));
+
+ freed += size;
+ } else if (r != -ENOENT)
+ log_warning_errno(r, "Failed to delete empty archived journal %s/%s: %m", directory, p);
+
+ continue;
+ }
+
+ patch_realtime(dirfd(d), p, &st, &realtime);
+
+ if (!GREEDY_REALLOC(list, n_allocated, n_list + 1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ list[n_list++] = (struct vacuum_info) {
+ .filename = TAKE_PTR(p),
+ .usage = size,
+ .seqnum = seqnum,
+ .realtime = realtime,
+ .seqnum_id = seqnum_id,
+ .have_seqnum = have_seqnum,
+ };
+
+ sum += size;
+ }
+
+ typesafe_qsort(list, n_list, vacuum_compare);
+
+ for (i = 0; i < n_list; i++) {
+ uint64_t left;
+
+ left = n_active_files + n_list - i;
+
+ if ((max_retention_usec <= 0 || list[i].realtime >= retention_limit) &&
+ (max_use <= 0 || sum <= max_use) &&
+ (n_max_files <= 0 || left <= n_max_files))
+ break;
+
+ r = unlinkat_deallocate(dirfd(d), list[i].filename, 0);
+ if (r >= 0) {
+ log_full(verbose ? LOG_INFO : LOG_DEBUG, "Deleted archived journal %s/%s (%s).", directory, list[i].filename, format_bytes(sbytes, sizeof(sbytes), list[i].usage));
+ freed += list[i].usage;
+
+ if (list[i].usage < sum)
+ sum -= list[i].usage;
+ else
+ sum = 0;
+
+ } else if (r != -ENOENT)
+ log_warning_errno(r, "Failed to delete archived journal %s/%s: %m", directory, list[i].filename);
+ }
+
+ if (oldest_usec && i < n_list && (*oldest_usec == 0 || list[i].realtime < *oldest_usec))
+ *oldest_usec = list[i].realtime;
+
+ r = 0;
+
+finish:
+ for (i = 0; i < n_list; i++)
+ free(list[i].filename);
+ free(list);
+
+ log_full(verbose ? LOG_INFO : LOG_DEBUG, "Vacuuming done, freed %s of archived journals from %s.", format_bytes(sbytes, sizeof(sbytes), freed), directory);
+
+ return r;
+}
diff --git a/src/journal/journal-vacuum.h b/src/journal/journal-vacuum.h
new file mode 100644
index 0000000..0b336ac
--- /dev/null
+++ b/src/journal/journal-vacuum.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "time-util.h"
+
+int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t n_max_files, usec_t max_retention_usec, usec_t *oldest_usec, bool verbose);
diff --git a/src/journal/journal-verify.c b/src/journal/journal-verify.c
new file mode 100644
index 0000000..5eff80a
--- /dev/null
+++ b/src/journal/journal-verify.c
@@ -0,0 +1,1320 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "compress.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "journal-authenticate.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-verify.h"
+#include "lookup3.h"
+#include "macro.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void draw_progress(uint64_t p, usec_t *last_usec) {
+ unsigned n, i, j, k;
+ usec_t z, x;
+
+ if (!on_tty())
+ return;
+
+ z = now(CLOCK_MONOTONIC);
+ x = *last_usec;
+
+ if (x != 0 && x + 40 * USEC_PER_MSEC > z)
+ return;
+
+ *last_usec = z;
+
+ n = (3 * columns()) / 4;
+ j = (n * (unsigned) p) / 65535ULL;
+ k = n - j;
+
+ fputs("\r", stdout);
+ if (colors_enabled())
+ fputs("\x1B[?25l" ANSI_HIGHLIGHT_GREEN, stdout);
+
+ for (i = 0; i < j; i++)
+ fputs("\xe2\x96\x88", stdout);
+
+ fputs(ANSI_NORMAL, stdout);
+
+ for (i = 0; i < k; i++)
+ fputs("\xe2\x96\x91", stdout);
+
+ printf(" %3"PRIu64"%%", 100U * p / 65535U);
+
+ fputs("\r", stdout);
+ if (colors_enabled())
+ fputs("\x1B[?25h", stdout);
+
+ fflush(stdout);
+}
+
+static uint64_t scale_progress(uint64_t scale, uint64_t p, uint64_t m) {
+ /* Calculates scale * p / m, but handles m == 0 safely, and saturates.
+ * Currently all callers use m >= 1, but we keep the check to be defensive.
+ */
+
+ if (p >= m || m == 0) /* lgtm [cpp/constant-comparison] */
+ return scale;
+
+ return scale * p / m;
+}
+
+static void flush_progress(void) {
+ unsigned n, i;
+
+ if (!on_tty())
+ return;
+
+ n = (3 * columns()) / 4;
+
+ putchar('\r');
+
+ for (i = 0; i < n + 5; i++)
+ putchar(' ');
+
+ putchar('\r');
+ fflush(stdout);
+}
+
+#define debug(_offset, _fmt, ...) do { \
+ flush_progress(); \
+ log_debug(OFSfmt": " _fmt, _offset, ##__VA_ARGS__); \
+ } while (0)
+
+#define warning(_offset, _fmt, ...) do { \
+ flush_progress(); \
+ log_warning(OFSfmt": " _fmt, _offset, ##__VA_ARGS__); \
+ } while (0)
+
+#define error(_offset, _fmt, ...) do { \
+ flush_progress(); \
+ log_error(OFSfmt": " _fmt, (uint64_t)_offset, ##__VA_ARGS__); \
+ } while (0)
+
+#define error_errno(_offset, error, _fmt, ...) do { \
+ flush_progress(); \
+ log_error_errno(error, OFSfmt": " _fmt, (uint64_t)_offset, ##__VA_ARGS__); \
+ } while (0)
+
+static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o) {
+ uint64_t i;
+
+ assert(f);
+ assert(offset);
+ assert(o);
+
+ /* This does various superficial tests about the length an
+ * possible field values. It does not follow any references to
+ * other objects. */
+
+ if ((o->object.flags & OBJECT_COMPRESSED_XZ) &&
+ o->object.type != OBJECT_DATA) {
+ error(offset, "Found compressed object that isn't of type DATA, which is not allowed.");
+ return -EBADMSG;
+ }
+
+ switch (o->object.type) {
+
+ case OBJECT_DATA: {
+ uint64_t h1, h2;
+ int compression, r;
+
+ if (le64toh(o->data.entry_offset) == 0)
+ warning(offset, "Unused data (entry_offset==0)");
+
+ if ((le64toh(o->data.entry_offset) == 0) ^ (le64toh(o->data.n_entries) == 0)) {
+ error(offset, "Bad n_entries: %"PRIu64, le64toh(o->data.n_entries));
+ return -EBADMSG;
+ }
+
+ if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0) {
+ error(offset, "Bad object size (<= %zu): %"PRIu64,
+ offsetof(DataObject, payload),
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ h1 = le64toh(o->data.hash);
+
+ compression = o->object.flags & OBJECT_COMPRESSION_MASK;
+ if (compression) {
+ _cleanup_free_ void *b = NULL;
+ size_t alloc = 0, b_size;
+
+ r = decompress_blob(compression,
+ o->data.payload,
+ le64toh(o->object.size) - offsetof(Object, data.payload),
+ &b, &alloc, &b_size, 0);
+ if (r < 0) {
+ error_errno(offset, r, "%s decompression failed: %m",
+ object_compressed_to_string(compression));
+ return r;
+ }
+
+ h2 = hash64(b, b_size);
+ } else
+ h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
+
+ if (h1 != h2) {
+ error(offset, "Invalid hash (%08"PRIx64" vs. %08"PRIx64, h1, h2);
+ return -EBADMSG;
+ }
+
+ if (!VALID64(le64toh(o->data.next_hash_offset)) ||
+ !VALID64(le64toh(o->data.next_field_offset)) ||
+ !VALID64(le64toh(o->data.entry_offset)) ||
+ !VALID64(le64toh(o->data.entry_array_offset))) {
+ error(offset, "Invalid offset (next_hash_offset="OFSfmt", next_field_offset="OFSfmt", entry_offset="OFSfmt", entry_array_offset="OFSfmt,
+ le64toh(o->data.next_hash_offset),
+ le64toh(o->data.next_field_offset),
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset));
+ return -EBADMSG;
+ }
+
+ break;
+ }
+
+ case OBJECT_FIELD:
+ if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0) {
+ error(offset,
+ "Bad field size (<= %zu): %"PRIu64,
+ offsetof(FieldObject, payload),
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ if (!VALID64(le64toh(o->field.next_hash_offset)) ||
+ !VALID64(le64toh(o->field.head_data_offset))) {
+ error(offset,
+ "Invalid offset (next_hash_offset="OFSfmt", head_data_offset="OFSfmt,
+ le64toh(o->field.next_hash_offset),
+ le64toh(o->field.head_data_offset));
+ return -EBADMSG;
+ }
+ break;
+
+ case OBJECT_ENTRY:
+ if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0) {
+ error(offset,
+ "Bad entry size (<= %zu): %"PRIu64,
+ offsetof(EntryObject, items),
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0) {
+ error(offset,
+ "Invalid number items in entry: %"PRIu64,
+ (le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem));
+ return -EBADMSG;
+ }
+
+ if (le64toh(o->entry.seqnum) <= 0) {
+ error(offset,
+ "Invalid entry seqnum: %"PRIx64,
+ le64toh(o->entry.seqnum));
+ return -EBADMSG;
+ }
+
+ if (!VALID_REALTIME(le64toh(o->entry.realtime))) {
+ error(offset,
+ "Invalid entry realtime timestamp: %"PRIu64,
+ le64toh(o->entry.realtime));
+ return -EBADMSG;
+ }
+
+ if (!VALID_MONOTONIC(le64toh(o->entry.monotonic))) {
+ error(offset,
+ "Invalid entry monotonic timestamp: %"PRIu64,
+ le64toh(o->entry.monotonic));
+ return -EBADMSG;
+ }
+
+ for (i = 0; i < journal_file_entry_n_items(o); i++) {
+ if (le64toh(o->entry.items[i].object_offset) == 0 ||
+ !VALID64(le64toh(o->entry.items[i].object_offset))) {
+ error(offset,
+ "Invalid entry item (%"PRIu64"/%"PRIu64" offset: "OFSfmt,
+ i, journal_file_entry_n_items(o),
+ le64toh(o->entry.items[i].object_offset));
+ return -EBADMSG;
+ }
+ }
+
+ break;
+
+ case OBJECT_DATA_HASH_TABLE:
+ case OBJECT_FIELD_HASH_TABLE:
+ if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||
+ (le64toh(o->object.size) - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0) {
+ error(offset,
+ "Invalid %s hash table size: %"PRIu64,
+ o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ for (i = 0; i < journal_file_hash_table_n_items(o); i++) {
+ if (o->hash_table.items[i].head_hash_offset != 0 &&
+ !VALID64(le64toh(o->hash_table.items[i].head_hash_offset))) {
+ error(offset,
+ "Invalid %s hash table item (%"PRIu64"/%"PRIu64") head_hash_offset: "OFSfmt,
+ o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
+ i, journal_file_hash_table_n_items(o),
+ le64toh(o->hash_table.items[i].head_hash_offset));
+ return -EBADMSG;
+ }
+ if (o->hash_table.items[i].tail_hash_offset != 0 &&
+ !VALID64(le64toh(o->hash_table.items[i].tail_hash_offset))) {
+ error(offset,
+ "Invalid %s hash table item (%"PRIu64"/%"PRIu64") tail_hash_offset: "OFSfmt,
+ o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
+ i, journal_file_hash_table_n_items(o),
+ le64toh(o->hash_table.items[i].tail_hash_offset));
+ return -EBADMSG;
+ }
+
+ if ((o->hash_table.items[i].head_hash_offset != 0) !=
+ (o->hash_table.items[i].tail_hash_offset != 0)) {
+ error(offset,
+ "Invalid %s hash table item (%"PRIu64"/%"PRIu64"): head_hash_offset="OFSfmt" tail_hash_offset="OFSfmt,
+ o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
+ i, journal_file_hash_table_n_items(o),
+ le64toh(o->hash_table.items[i].head_hash_offset),
+ le64toh(o->hash_table.items[i].tail_hash_offset));
+ return -EBADMSG;
+ }
+ }
+
+ break;
+
+ case OBJECT_ENTRY_ARRAY:
+ if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||
+ (le64toh(o->object.size) - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0) {
+ error(offset,
+ "Invalid object entry array size: %"PRIu64,
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ if (!VALID64(le64toh(o->entry_array.next_entry_array_offset))) {
+ error(offset,
+ "Invalid object entry array next_entry_array_offset: "OFSfmt,
+ le64toh(o->entry_array.next_entry_array_offset));
+ return -EBADMSG;
+ }
+
+ for (i = 0; i < journal_file_entry_array_n_items(o); i++)
+ if (le64toh(o->entry_array.items[i]) != 0 &&
+ !VALID64(le64toh(o->entry_array.items[i]))) {
+ error(offset,
+ "Invalid object entry array item (%"PRIu64"/%"PRIu64"): "OFSfmt,
+ i, journal_file_entry_array_n_items(o),
+ le64toh(o->entry_array.items[i]));
+ return -EBADMSG;
+ }
+
+ break;
+
+ case OBJECT_TAG:
+ if (le64toh(o->object.size) != sizeof(TagObject)) {
+ error(offset,
+ "Invalid object tag size: %"PRIu64,
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ if (!VALID_EPOCH(le64toh(o->tag.epoch))) {
+ error(offset,
+ "Invalid object tag epoch: %"PRIu64,
+ le64toh(o->tag.epoch));
+ return -EBADMSG;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+static int write_uint64(int fd, uint64_t p) {
+ ssize_t k;
+
+ k = write(fd, &p, sizeof(p));
+ if (k < 0)
+ return -errno;
+ if (k != sizeof(p))
+ return -EIO;
+
+ return 0;
+}
+
+static int contains_uint64(MMapCache *m, MMapFileDescriptor *f, uint64_t n, uint64_t p) {
+ uint64_t a, b;
+ int r;
+
+ assert(m);
+ assert(f);
+
+ /* Bisection ... */
+
+ a = 0; b = n;
+ while (a < b) {
+ uint64_t c, *z;
+
+ c = (a + b) / 2;
+
+ r = mmap_cache_get(m, f, PROT_READ|PROT_WRITE, 0, false, c * sizeof(uint64_t), sizeof(uint64_t), NULL, (void **) &z, NULL);
+ if (r < 0)
+ return r;
+
+ if (*z == p)
+ return 1;
+
+ if (a + 1 >= b)
+ return 0;
+
+ if (p < *z)
+ b = c;
+ else
+ a = c;
+ }
+
+ return 0;
+}
+
+static int entry_points_to_data(
+ JournalFile *f,
+ MMapFileDescriptor *cache_entry_fd,
+ uint64_t n_entries,
+ uint64_t entry_p,
+ uint64_t data_p) {
+
+ int r;
+ uint64_t i, n, a;
+ Object *o;
+ bool found = false;
+
+ assert(f);
+ assert(cache_entry_fd);
+
+ if (!contains_uint64(f->mmap, cache_entry_fd, n_entries, entry_p)) {
+ error(data_p, "Data object references invalid entry at "OFSfmt, entry_p);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, entry_p, &o);
+ if (r < 0)
+ return r;
+
+ n = journal_file_entry_n_items(o);
+ for (i = 0; i < n; i++)
+ if (le64toh(o->entry.items[i].object_offset) == data_p) {
+ found = true;
+ break;
+ }
+
+ if (!found) {
+ error(entry_p, "Data object at "OFSfmt" not referenced by linked entry", data_p);
+ return -EBADMSG;
+ }
+
+ /* Check if this entry is also in main entry array. Since the
+ * main entry array has already been verified we can rely on
+ * its consistency. */
+
+ i = 0;
+ n = le64toh(f->header->n_entries);
+ a = le64toh(f->header->entry_array_offset);
+
+ while (i < n) {
+ uint64_t m, u;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ m = journal_file_entry_array_n_items(o);
+ u = MIN(n - i, m);
+
+ if (entry_p <= le64toh(o->entry_array.items[u-1])) {
+ uint64_t x, y, z;
+
+ x = 0;
+ y = u;
+
+ while (x < y) {
+ z = (x + y) / 2;
+
+ if (le64toh(o->entry_array.items[z]) == entry_p)
+ return 0;
+
+ if (x + 1 >= y)
+ break;
+
+ if (entry_p < le64toh(o->entry_array.items[z]))
+ y = z;
+ else
+ x = z;
+ }
+
+ error(entry_p, "Entry object doesn't exist in main entry array");
+ return -EBADMSG;
+ }
+
+ i += u;
+ a = le64toh(o->entry_array.next_entry_array_offset);
+ }
+
+ return 0;
+}
+
+static int verify_data(
+ JournalFile *f,
+ Object *o, uint64_t p,
+ MMapFileDescriptor *cache_entry_fd, uint64_t n_entries,
+ MMapFileDescriptor *cache_entry_array_fd, uint64_t n_entry_arrays) {
+
+ uint64_t i, n, a, last, q;
+ int r;
+
+ assert(f);
+ assert(o);
+ assert(cache_entry_fd);
+ assert(cache_entry_array_fd);
+
+ n = le64toh(o->data.n_entries);
+ a = le64toh(o->data.entry_array_offset);
+
+ /* Entry array means at least two objects */
+ if (a && n < 2) {
+ error(p, "Entry array present (entry_array_offset="OFSfmt", but n_entries=%"PRIu64")", a, n);
+ return -EBADMSG;
+ }
+
+ if (n == 0)
+ return 0;
+
+ /* We already checked that earlier */
+ assert(o->data.entry_offset);
+
+ last = q = le64toh(o->data.entry_offset);
+ r = entry_points_to_data(f, cache_entry_fd, n_entries, q, p);
+ if (r < 0)
+ return r;
+
+ i = 1;
+ while (i < n) {
+ uint64_t next, m, j;
+
+ if (a == 0) {
+ error(p, "Array chain too short");
+ return -EBADMSG;
+ }
+
+ if (!contains_uint64(f->mmap, cache_entry_array_fd, n_entry_arrays, a)) {
+ error(p, "Invalid array offset "OFSfmt, a);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ next = le64toh(o->entry_array.next_entry_array_offset);
+ if (next != 0 && next <= a) {
+ error(p, "Array chain has cycle (jumps back from "OFSfmt" to "OFSfmt")", a, next);
+ return -EBADMSG;
+ }
+
+ m = journal_file_entry_array_n_items(o);
+ for (j = 0; i < n && j < m; i++, j++) {
+
+ q = le64toh(o->entry_array.items[j]);
+ if (q <= last) {
+ error(p, "Data object's entry array not sorted");
+ return -EBADMSG;
+ }
+ last = q;
+
+ r = entry_points_to_data(f, cache_entry_fd, n_entries, q, p);
+ if (r < 0)
+ return r;
+
+ /* Pointer might have moved, reposition */
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+ }
+
+ a = next;
+ }
+
+ return 0;
+}
+
+static int verify_hash_table(
+ JournalFile *f,
+ MMapFileDescriptor *cache_data_fd, uint64_t n_data,
+ MMapFileDescriptor *cache_entry_fd, uint64_t n_entries,
+ MMapFileDescriptor *cache_entry_array_fd, uint64_t n_entry_arrays,
+ usec_t *last_usec,
+ bool show_progress) {
+
+ uint64_t i, n;
+ int r;
+
+ assert(f);
+ assert(cache_data_fd);
+ assert(cache_entry_fd);
+ assert(cache_entry_array_fd);
+ assert(last_usec);
+
+ n = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+ if (n <= 0)
+ return 0;
+
+ r = journal_file_map_data_hash_table(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to map data hash table: %m");
+
+ for (i = 0; i < n; i++) {
+ uint64_t last = 0, p;
+
+ if (show_progress)
+ draw_progress(0xC000 + scale_progress(0x3FFF, i, n), last_usec);
+
+ p = le64toh(f->data_hash_table[i].head_hash_offset);
+ while (p != 0) {
+ Object *o;
+ uint64_t next;
+
+ if (!contains_uint64(f->mmap, cache_data_fd, n_data, p)) {
+ error(p, "Invalid data object at hash entry %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ next = le64toh(o->data.next_hash_offset);
+ if (next != 0 && next <= p) {
+ error(p, "Hash chain has a cycle in hash entry %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ if (le64toh(o->data.hash) % n != i) {
+ error(p, "Hash value mismatch in hash entry %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ r = verify_data(f, o, p, cache_entry_fd, n_entries, cache_entry_array_fd, n_entry_arrays);
+ if (r < 0)
+ return r;
+
+ last = p;
+ p = next;
+ }
+
+ if (last != le64toh(f->data_hash_table[i].tail_hash_offset)) {
+ error(p, "Tail hash pointer mismatch in hash table");
+ return -EBADMSG;
+ }
+ }
+
+ return 0;
+}
+
+static int data_object_in_hash_table(JournalFile *f, uint64_t hash, uint64_t p) {
+ uint64_t n, h, q;
+ int r;
+ assert(f);
+
+ n = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+ if (n <= 0)
+ return 0;
+
+ r = journal_file_map_data_hash_table(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to map data hash table: %m");
+
+ h = hash % n;
+
+ q = le64toh(f->data_hash_table[h].head_hash_offset);
+ while (q != 0) {
+ Object *o;
+
+ if (p == q)
+ return 1;
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, q, &o);
+ if (r < 0)
+ return r;
+
+ q = le64toh(o->data.next_hash_offset);
+ }
+
+ return 0;
+}
+
+static int verify_entry(
+ JournalFile *f,
+ Object *o, uint64_t p,
+ MMapFileDescriptor *cache_data_fd, uint64_t n_data) {
+
+ uint64_t i, n;
+ int r;
+
+ assert(f);
+ assert(o);
+ assert(cache_data_fd);
+
+ n = journal_file_entry_n_items(o);
+ for (i = 0; i < n; i++) {
+ uint64_t q, h;
+ Object *u;
+
+ q = le64toh(o->entry.items[i].object_offset);
+ h = le64toh(o->entry.items[i].hash);
+
+ if (!contains_uint64(f->mmap, cache_data_fd, n_data, q)) {
+ error(p, "Invalid data object of entry");
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, q, &u);
+ if (r < 0)
+ return r;
+
+ if (le64toh(u->data.hash) != h) {
+ error(p, "Hash mismatch for data object of entry");
+ return -EBADMSG;
+ }
+
+ r = data_object_in_hash_table(f, h, q);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ error(p, "Data object missing from hash table");
+ return -EBADMSG;
+ }
+ }
+
+ return 0;
+}
+
+static int verify_entry_array(
+ JournalFile *f,
+ MMapFileDescriptor *cache_data_fd, uint64_t n_data,
+ MMapFileDescriptor *cache_entry_fd, uint64_t n_entries,
+ MMapFileDescriptor *cache_entry_array_fd, uint64_t n_entry_arrays,
+ usec_t *last_usec,
+ bool show_progress) {
+
+ uint64_t i = 0, a, n, last = 0;
+ int r;
+
+ assert(f);
+ assert(cache_data_fd);
+ assert(cache_entry_fd);
+ assert(cache_entry_array_fd);
+ assert(last_usec);
+
+ n = le64toh(f->header->n_entries);
+ a = le64toh(f->header->entry_array_offset);
+ while (i < n) {
+ uint64_t next, m, j;
+ Object *o;
+
+ if (show_progress)
+ draw_progress(0x8000 + scale_progress(0x3FFF, i, n), last_usec);
+
+ if (a == 0) {
+ error(a, "Array chain too short at %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ if (!contains_uint64(f->mmap, cache_entry_array_fd, n_entry_arrays, a)) {
+ error(a, "Invalid array %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ next = le64toh(o->entry_array.next_entry_array_offset);
+ if (next != 0 && next <= a) {
+ error(a, "Array chain has cycle at %"PRIu64" of %"PRIu64" (jumps back from to "OFSfmt")", i, n, next);
+ return -EBADMSG;
+ }
+
+ m = journal_file_entry_array_n_items(o);
+ for (j = 0; i < n && j < m; i++, j++) {
+ uint64_t p;
+
+ p = le64toh(o->entry_array.items[j]);
+ if (p <= last) {
+ error(a, "Entry array not sorted at %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+ last = p;
+
+ if (!contains_uint64(f->mmap, cache_entry_fd, n_entries, p)) {
+ error(a, "Invalid array entry at %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ r = verify_entry(f, o, p, cache_data_fd, n_data);
+ if (r < 0)
+ return r;
+
+ /* Pointer might have moved, reposition */
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+ }
+
+ a = next;
+ }
+
+ return 0;
+}
+
+int journal_file_verify(
+ JournalFile *f,
+ const char *key,
+ usec_t *first_contained, usec_t *last_validated, usec_t *last_contained,
+ bool show_progress) {
+ int r;
+ Object *o;
+ uint64_t p = 0, last_epoch = 0, last_tag_realtime = 0, last_sealed_realtime = 0;
+
+ uint64_t entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
+ sd_id128_t entry_boot_id;
+ bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
+ uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0, n_tags = 0;
+ usec_t last_usec = 0;
+ int data_fd = -1, entry_fd = -1, entry_array_fd = -1;
+ MMapFileDescriptor *cache_data_fd = NULL, *cache_entry_fd = NULL, *cache_entry_array_fd = NULL;
+ unsigned i;
+ bool found_last = false;
+ const char *tmp_dir = NULL;
+
+#if HAVE_GCRYPT
+ uint64_t last_tag = 0;
+#endif
+ assert(f);
+
+ if (key) {
+#if HAVE_GCRYPT
+ r = journal_file_parse_verification_key(f, key);
+ if (r < 0) {
+ log_error("Failed to parse seed.");
+ return r;
+ }
+#else
+ return -EOPNOTSUPP;
+#endif
+ } else if (f->seal)
+ return -ENOKEY;
+
+ r = var_tmp_dir(&tmp_dir);
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine temporary directory: %m");
+ goto fail;
+ }
+
+ data_fd = open_tmpfile_unlinkable(tmp_dir, O_RDWR | O_CLOEXEC);
+ if (data_fd < 0) {
+ r = log_error_errno(data_fd, "Failed to create data file: %m");
+ goto fail;
+ }
+
+ entry_fd = open_tmpfile_unlinkable(tmp_dir, O_RDWR | O_CLOEXEC);
+ if (entry_fd < 0) {
+ r = log_error_errno(entry_fd, "Failed to create entry file: %m");
+ goto fail;
+ }
+
+ entry_array_fd = open_tmpfile_unlinkable(tmp_dir, O_RDWR | O_CLOEXEC);
+ if (entry_array_fd < 0) {
+ r = log_error_errno(entry_array_fd,
+ "Failed to create entry array file: %m");
+ goto fail;
+ }
+
+ cache_data_fd = mmap_cache_add_fd(f->mmap, data_fd);
+ if (!cache_data_fd) {
+ r = log_oom();
+ goto fail;
+ }
+
+ cache_entry_fd = mmap_cache_add_fd(f->mmap, entry_fd);
+ if (!cache_entry_fd) {
+ r = log_oom();
+ goto fail;
+ }
+
+ cache_entry_array_fd = mmap_cache_add_fd(f->mmap, entry_array_fd);
+ if (!cache_entry_array_fd) {
+ r = log_oom();
+ goto fail;
+ }
+
+ if (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SUPPORTED) {
+ log_error("Cannot verify file with unknown extensions.");
+ r = -EOPNOTSUPP;
+ goto fail;
+ }
+
+ for (i = 0; i < sizeof(f->header->reserved); i++)
+ if (f->header->reserved[i] != 0) {
+ error(offsetof(Header, reserved[i]), "Reserved field is non-zero");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ /* First iteration: we go through all objects, verify the
+ * superficial structure, headers, hashes. */
+
+ p = le64toh(f->header->header_size);
+ for (;;) {
+ /* Early exit if there are no objects in the file, at all */
+ if (le64toh(f->header->tail_object_offset) == 0)
+ break;
+
+ if (show_progress)
+ draw_progress(scale_progress(0x7FFF, p, le64toh(f->header->tail_object_offset)), &last_usec);
+
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
+ if (r < 0) {
+ error(p, "Invalid object");
+ goto fail;
+ }
+
+ if (p > le64toh(f->header->tail_object_offset)) {
+ error(offsetof(Header, tail_object_offset), "Invalid tail object pointer");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ n_objects++;
+
+ r = journal_file_object_verify(f, p, o);
+ if (r < 0) {
+ error_errno(p, r, "Invalid object contents: %m");
+ goto fail;
+ }
+
+ if ((o->object.flags & OBJECT_COMPRESSED_XZ) &&
+ (o->object.flags & OBJECT_COMPRESSED_LZ4)) {
+ error(p, "Objected with double compression");
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if ((o->object.flags & OBJECT_COMPRESSED_XZ) && !JOURNAL_HEADER_COMPRESSED_XZ(f->header)) {
+ error(p, "XZ compressed object in file without XZ compression");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if ((o->object.flags & OBJECT_COMPRESSED_LZ4) && !JOURNAL_HEADER_COMPRESSED_LZ4(f->header)) {
+ error(p, "LZ4 compressed object in file without LZ4 compression");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ switch (o->object.type) {
+
+ case OBJECT_DATA:
+ r = write_uint64(data_fd, p);
+ if (r < 0)
+ goto fail;
+
+ n_data++;
+ break;
+
+ case OBJECT_FIELD:
+ n_fields++;
+ break;
+
+ case OBJECT_ENTRY:
+ if (JOURNAL_HEADER_SEALED(f->header) && n_tags <= 0) {
+ error(p, "First entry before first tag");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ r = write_uint64(entry_fd, p);
+ if (r < 0)
+ goto fail;
+
+ if (le64toh(o->entry.realtime) < last_tag_realtime) {
+ error(p, "Older entry after newer tag");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (!entry_seqnum_set &&
+ le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
+ error(p, "Head entry sequence number incorrect");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (entry_seqnum_set &&
+ entry_seqnum >= le64toh(o->entry.seqnum)) {
+ error(p, "Entry sequence number out of synchronization");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ entry_seqnum = le64toh(o->entry.seqnum);
+ entry_seqnum_set = true;
+
+ if (entry_monotonic_set &&
+ sd_id128_equal(entry_boot_id, o->entry.boot_id) &&
+ entry_monotonic > le64toh(o->entry.monotonic)) {
+ error(p, "Entry timestamp out of synchronization");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ entry_monotonic = le64toh(o->entry.monotonic);
+ entry_boot_id = o->entry.boot_id;
+ entry_monotonic_set = true;
+
+ if (!entry_realtime_set &&
+ le64toh(o->entry.realtime) != le64toh(f->header->head_entry_realtime)) {
+ error(p, "Head entry realtime timestamp incorrect");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ entry_realtime = le64toh(o->entry.realtime);
+ entry_realtime_set = true;
+
+ n_entries++;
+ break;
+
+ case OBJECT_DATA_HASH_TABLE:
+ if (n_data_hash_tables > 1) {
+ error(p, "More than one data hash table");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (le64toh(f->header->data_hash_table_offset) != p + offsetof(HashTableObject, items) ||
+ le64toh(f->header->data_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
+ error(p, "header fields for data hash table invalid");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ n_data_hash_tables++;
+ break;
+
+ case OBJECT_FIELD_HASH_TABLE:
+ if (n_field_hash_tables > 1) {
+ error(p, "More than one field hash table");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (le64toh(f->header->field_hash_table_offset) != p + offsetof(HashTableObject, items) ||
+ le64toh(f->header->field_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
+ error(p, "Header fields for field hash table invalid");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ n_field_hash_tables++;
+ break;
+
+ case OBJECT_ENTRY_ARRAY:
+ r = write_uint64(entry_array_fd, p);
+ if (r < 0)
+ goto fail;
+
+ if (p == le64toh(f->header->entry_array_offset)) {
+ if (found_main_entry_array) {
+ error(p, "More than one main entry array");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ found_main_entry_array = true;
+ }
+
+ n_entry_arrays++;
+ break;
+
+ case OBJECT_TAG:
+ if (!JOURNAL_HEADER_SEALED(f->header)) {
+ error(p, "Tag object in file without sealing");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (le64toh(o->tag.seqnum) != n_tags + 1) {
+ error(p, "Tag sequence number out of synchronization");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (le64toh(o->tag.epoch) < last_epoch) {
+ error(p, "Epoch sequence out of synchronization");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+#if HAVE_GCRYPT
+ if (f->seal) {
+ uint64_t q, rt;
+
+ debug(p, "Checking tag %"PRIu64"...", le64toh(o->tag.seqnum));
+
+ rt = f->fss_start_usec + le64toh(o->tag.epoch) * f->fss_interval_usec;
+ if (entry_realtime_set && entry_realtime >= rt + f->fss_interval_usec) {
+ error(p, "tag/entry realtime timestamp out of synchronization");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ /* OK, now we know the epoch. So let's now set
+ * it, and calculate the HMAC for everything
+ * since the last tag. */
+ r = journal_file_fsprg_seek(f, le64toh(o->tag.epoch));
+ if (r < 0)
+ goto fail;
+
+ r = journal_file_hmac_start(f);
+ if (r < 0)
+ goto fail;
+
+ if (last_tag == 0) {
+ r = journal_file_hmac_put_header(f);
+ if (r < 0)
+ goto fail;
+
+ q = le64toh(f->header->header_size);
+ } else
+ q = last_tag;
+
+ while (q <= p) {
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, q, &o);
+ if (r < 0)
+ goto fail;
+
+ r = journal_file_hmac_put_object(f, OBJECT_UNUSED, o, q);
+ if (r < 0)
+ goto fail;
+
+ q = q + ALIGN64(le64toh(o->object.size));
+ }
+
+ /* Position might have changed, let's reposition things */
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
+ if (r < 0)
+ goto fail;
+
+ if (memcmp(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH) != 0) {
+ error(p, "Tag failed verification");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ f->hmac_running = false;
+ last_tag_realtime = rt;
+ last_sealed_realtime = entry_realtime;
+ }
+
+ last_tag = p + ALIGN64(le64toh(o->object.size));
+#endif
+
+ last_epoch = le64toh(o->tag.epoch);
+
+ n_tags++;
+ break;
+
+ default:
+ n_weird++;
+ }
+
+ if (p == le64toh(f->header->tail_object_offset)) {
+ found_last = true;
+ break;
+ }
+
+ p = p + ALIGN64(le64toh(o->object.size));
+ };
+
+ if (!found_last && le64toh(f->header->tail_object_offset) != 0) {
+ error(le64toh(f->header->tail_object_offset), "Tail object pointer dead");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (n_objects != le64toh(f->header->n_objects)) {
+ error(offsetof(Header, n_objects), "Object number mismatch");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (n_entries != le64toh(f->header->n_entries)) {
+ error(offsetof(Header, n_entries), "Entry number mismatch");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
+ n_data != le64toh(f->header->n_data)) {
+ error(offsetof(Header, n_data), "Data number mismatch");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
+ n_fields != le64toh(f->header->n_fields)) {
+ error(offsetof(Header, n_fields), "Field number mismatch");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) &&
+ n_tags != le64toh(f->header->n_tags)) {
+ error(offsetof(Header, n_tags), "Tag number mismatch");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays) &&
+ n_entry_arrays != le64toh(f->header->n_entry_arrays)) {
+ error(offsetof(Header, n_entry_arrays), "Entry array number mismatch");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (!found_main_entry_array && le64toh(f->header->entry_array_offset) != 0) {
+ error(0, "Missing entry array");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (entry_seqnum_set &&
+ entry_seqnum != le64toh(f->header->tail_entry_seqnum)) {
+ error(offsetof(Header, tail_entry_seqnum), "Invalid tail seqnum");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (entry_monotonic_set &&
+ (sd_id128_equal(entry_boot_id, f->header->boot_id) &&
+ entry_monotonic != le64toh(f->header->tail_entry_monotonic))) {
+ error(0, "Invalid tail monotonic timestamp");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (entry_realtime_set && entry_realtime != le64toh(f->header->tail_entry_realtime)) {
+ error(0, "Invalid tail realtime timestamp");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ /* Second iteration: we follow all objects referenced from the
+ * two entry points: the object hash table and the entry
+ * array. We also check that everything referenced (directly
+ * or indirectly) in the data hash table also exists in the
+ * entry array, and vice versa. Note that we do not care for
+ * unreferenced objects. We only care that everything that is
+ * referenced is consistent. */
+
+ r = verify_entry_array(f,
+ cache_data_fd, n_data,
+ cache_entry_fd, n_entries,
+ cache_entry_array_fd, n_entry_arrays,
+ &last_usec,
+ show_progress);
+ if (r < 0)
+ goto fail;
+
+ r = verify_hash_table(f,
+ cache_data_fd, n_data,
+ cache_entry_fd, n_entries,
+ cache_entry_array_fd, n_entry_arrays,
+ &last_usec,
+ show_progress);
+ if (r < 0)
+ goto fail;
+
+ if (show_progress)
+ flush_progress();
+
+ mmap_cache_free_fd(f->mmap, cache_data_fd);
+ mmap_cache_free_fd(f->mmap, cache_entry_fd);
+ mmap_cache_free_fd(f->mmap, cache_entry_array_fd);
+
+ safe_close(data_fd);
+ safe_close(entry_fd);
+ safe_close(entry_array_fd);
+
+ if (first_contained)
+ *first_contained = le64toh(f->header->head_entry_realtime);
+ if (last_validated)
+ *last_validated = last_sealed_realtime;
+ if (last_contained)
+ *last_contained = le64toh(f->header->tail_entry_realtime);
+
+ return 0;
+
+fail:
+ if (show_progress)
+ flush_progress();
+
+ log_error("File corruption detected at %s:"OFSfmt" (of %llu bytes, %"PRIu64"%%).",
+ f->path,
+ p,
+ (unsigned long long) f->last_stat.st_size,
+ 100 * p / f->last_stat.st_size);
+
+ if (data_fd >= 0)
+ safe_close(data_fd);
+
+ if (entry_fd >= 0)
+ safe_close(entry_fd);
+
+ if (entry_array_fd >= 0)
+ safe_close(entry_array_fd);
+
+ if (cache_data_fd)
+ mmap_cache_free_fd(f->mmap, cache_data_fd);
+
+ if (cache_entry_fd)
+ mmap_cache_free_fd(f->mmap, cache_entry_fd);
+
+ if (cache_entry_array_fd)
+ mmap_cache_free_fd(f->mmap, cache_entry_array_fd);
+
+ return r;
+}
diff --git a/src/journal/journal-verify.h b/src/journal/journal-verify.h
new file mode 100644
index 0000000..f0ea31a
--- /dev/null
+++ b/src/journal/journal-verify.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "journal-file.h"
+
+int journal_file_verify(JournalFile *f, const char *key, usec_t *first_contained, usec_t *last_validated, usec_t *last_contained, bool show_progress);
diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c
new file mode 100644
index 0000000..14a02ed
--- /dev/null
+++ b/src/journal/journalctl.c
@@ -0,0 +1,2706 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <getopt.h>
+#include <linux/fs.h>
+#include <locale.h>
+#include <poll.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#if HAVE_PCRE2
+# define PCRE2_CODE_UNIT_WIDTH 8
+# include <pcre2.h>
+#endif
+
+#include "sd-bus.h"
+#include "sd-device.h"
+#include "sd-journal.h"
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "catalog.h"
+#include "chattr-util.h"
+#include "def.h"
+#include "device-private.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "fsprg.h"
+#include "glob-util.h"
+#include "hostname-util.h"
+#include "id128-print.h"
+#include "io-util.h"
+#include "journal-def.h"
+#include "journal-internal.h"
+#include "journal-qrcode.h"
+#include "journal-util.h"
+#include "journal-vacuum.h"
+#include "journal-verify.h"
+#include "locale-util.h"
+#include "log.h"
+#include "logs-show.h"
+#include "mkdir.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "rlimit-util.h"
+#include "set.h"
+#include "sigbus.h"
+#include "string-table.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+#define DEFAULT_FSS_INTERVAL_USEC (15*USEC_PER_MINUTE)
+
+#define PROCESS_INOTIFY_INTERVAL 1024 /* Every 1,024 messages processed */
+
+#if HAVE_PCRE2
+DEFINE_TRIVIAL_CLEANUP_FUNC(pcre2_match_data*, pcre2_match_data_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(pcre2_code*, pcre2_code_free);
+
+static int pattern_compile(const char *pattern, unsigned flags, pcre2_code **out) {
+ int errorcode, r;
+ PCRE2_SIZE erroroffset;
+ pcre2_code *p;
+
+ p = pcre2_compile((PCRE2_SPTR8) pattern,
+ PCRE2_ZERO_TERMINATED, flags, &errorcode, &erroroffset, NULL);
+ if (!p) {
+ unsigned char buf[LINE_MAX];
+
+ r = pcre2_get_error_message(errorcode, buf, sizeof buf);
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Bad pattern \"%s\": %s", pattern,
+ r < 0 ? "unknown error" : (char *)buf);
+ }
+
+ *out = p;
+ return 0;
+}
+
+#endif
+
+enum {
+ /* Special values for arg_lines */
+ ARG_LINES_DEFAULT = -2,
+ ARG_LINES_ALL = -1,
+};
+
+static OutputMode arg_output = OUTPUT_SHORT;
+static bool arg_utc = false;
+static bool arg_follow = false;
+static bool arg_full = true;
+static bool arg_all = false;
+static PagerFlags arg_pager_flags = 0;
+static int arg_lines = ARG_LINES_DEFAULT;
+static bool arg_no_tail = false;
+static bool arg_quiet = false;
+static bool arg_merge = false;
+static bool arg_boot = false;
+static sd_id128_t arg_boot_id = {};
+static int arg_boot_offset = 0;
+static bool arg_dmesg = false;
+static bool arg_no_hostname = false;
+static const char *arg_cursor = NULL;
+static const char *arg_after_cursor = NULL;
+static bool arg_show_cursor = false;
+static const char *arg_directory = NULL;
+static char **arg_file = NULL;
+static bool arg_file_stdin = false;
+static int arg_priorities = 0xFF;
+static char *arg_verify_key = NULL;
+#if HAVE_GCRYPT
+static usec_t arg_interval = DEFAULT_FSS_INTERVAL_USEC;
+static bool arg_force = false;
+#endif
+static usec_t arg_since, arg_until;
+static bool arg_since_set = false, arg_until_set = false;
+static char **arg_syslog_identifier = NULL;
+static char **arg_system_units = NULL;
+static char **arg_user_units = NULL;
+static const char *arg_field = NULL;
+static bool arg_catalog = false;
+static bool arg_reverse = false;
+static int arg_journal_type = 0;
+static char *arg_root = NULL;
+static const char *arg_machine = NULL;
+static uint64_t arg_vacuum_size = 0;
+static uint64_t arg_vacuum_n_files = 0;
+static usec_t arg_vacuum_time = 0;
+static char **arg_output_fields = NULL;
+
+#if HAVE_PCRE2
+static const char *arg_pattern = NULL;
+static pcre2_code *arg_compiled_pattern = NULL;
+static int arg_case_sensitive = -1; /* -1 means be smart */
+#endif
+
+static enum {
+ ACTION_SHOW,
+ ACTION_NEW_ID128,
+ ACTION_PRINT_HEADER,
+ ACTION_SETUP_KEYS,
+ ACTION_VERIFY,
+ ACTION_DISK_USAGE,
+ ACTION_LIST_CATALOG,
+ ACTION_DUMP_CATALOG,
+ ACTION_UPDATE_CATALOG,
+ ACTION_LIST_BOOTS,
+ ACTION_FLUSH,
+ ACTION_SYNC,
+ ACTION_ROTATE,
+ ACTION_VACUUM,
+ ACTION_ROTATE_AND_VACUUM,
+ ACTION_LIST_FIELDS,
+ ACTION_LIST_FIELD_NAMES,
+} arg_action = ACTION_SHOW;
+
+typedef struct BootId {
+ sd_id128_t id;
+ uint64_t first;
+ uint64_t last;
+ LIST_FIELDS(struct BootId, boot_list);
+} BootId;
+
+static int add_matches_for_device(sd_journal *j, const char *devpath) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ sd_device *d = NULL;
+ struct stat st;
+ int r;
+
+ assert(j);
+ assert(devpath);
+
+ if (!path_startswith(devpath, "/dev/")) {
+ log_error("Devpath does not start with /dev/");
+ return -EINVAL;
+ }
+
+ if (stat(devpath, &st) < 0)
+ return log_error_errno(errno, "Couldn't stat file: %m");
+
+ r = device_new_from_stat_rdev(&device, &st);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from devnum %u:%u: %m", major(st.st_rdev), minor(st.st_rdev));
+
+ for (d = device; d; ) {
+ _cleanup_free_ char *match = NULL;
+ const char *subsys, *sysname, *devnode;
+ sd_device *parent;
+
+ r = sd_device_get_subsystem(d, &subsys);
+ if (r < 0)
+ goto get_parent;
+
+ r = sd_device_get_sysname(d, &sysname);
+ if (r < 0)
+ goto get_parent;
+
+ match = strjoin("_KERNEL_DEVICE=+", subsys, ":", sysname);
+ if (!match)
+ return log_oom();
+
+ r = sd_journal_add_match(j, match, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+
+ if (sd_device_get_devname(d, &devnode) >= 0) {
+ _cleanup_free_ char *match1 = NULL;
+
+ r = stat(devnode, &st);
+ if (r < 0)
+ return log_error_errno(r, "Failed to stat() device node \"%s\": %m", devnode);
+
+ r = asprintf(&match1, "_KERNEL_DEVICE=%c%u:%u", S_ISBLK(st.st_mode) ? 'b' : 'c', major(st.st_rdev), minor(st.st_rdev));
+ if (r < 0)
+ return log_oom();
+
+ r = sd_journal_add_match(j, match1, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+ }
+
+get_parent:
+ if (sd_device_get_parent(d, &parent) < 0)
+ break;
+
+ d = parent;
+ }
+
+ r = add_match_this_boot(j, arg_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match for the current boot: %m");
+
+ return 0;
+}
+
+static char *format_timestamp_maybe_utc(char *buf, size_t l, usec_t t) {
+
+ if (arg_utc)
+ return format_timestamp_utc(buf, l, t);
+
+ return format_timestamp(buf, l, t);
+}
+
+static int parse_boot_descriptor(const char *x, sd_id128_t *boot_id, int *offset) {
+ sd_id128_t id = SD_ID128_NULL;
+ int off = 0, r;
+
+ if (strlen(x) >= 32) {
+ char *t;
+
+ t = strndupa(x, 32);
+ r = sd_id128_from_string(t, &id);
+ if (r >= 0)
+ x += 32;
+
+ if (!IN_SET(*x, 0, '-', '+'))
+ return -EINVAL;
+
+ if (*x != 0) {
+ r = safe_atoi(x, &off);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ r = safe_atoi(x, &off);
+ if (r < 0)
+ return r;
+ }
+
+ if (boot_id)
+ *boot_id = id;
+
+ if (offset)
+ *offset = off;
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("journalctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [MATCHES...]\n\n"
+ "Query the journal.\n\n"
+ "Options:\n"
+ " --system Show the system journal\n"
+ " --user Show the user journal for the current user\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " -S --since=DATE Show entries not older than the specified date\n"
+ " -U --until=DATE Show entries not newer than the specified date\n"
+ " -c --cursor=CURSOR Show entries starting at the specified cursor\n"
+ " --after-cursor=CURSOR Show entries after the specified cursor\n"
+ " --show-cursor Print the cursor after all the entries\n"
+ " -b --boot[=ID] Show current boot or the specified boot\n"
+ " --list-boots Show terse information about recorded boots\n"
+ " -k --dmesg Show kernel message log from the current boot\n"
+ " -u --unit=UNIT Show logs from the specified unit\n"
+ " --user-unit=UNIT Show logs from the specified user unit\n"
+ " -t --identifier=STRING Show entries with the specified syslog identifier\n"
+ " -p --priority=RANGE Show entries with the specified priority\n"
+ " -g --grep=PATTERN Show entries with MESSAGE matching PATTERN\n"
+ " --case-sensitive[=BOOL] Force case sensitive or insenstive matching\n"
+ " -e --pager-end Immediately jump to the end in the pager\n"
+ " -f --follow Follow the journal\n"
+ " -n --lines[=INTEGER] Number of journal entries to show\n"
+ " --no-tail Show all lines, even in follow mode\n"
+ " -r --reverse Show the newest entries first\n"
+ " -o --output=STRING Change journal output mode (short, short-precise,\n"
+ " short-iso, short-iso-precise, short-full,\n"
+ " short-monotonic, short-unix, verbose, export,\n"
+ " json, json-pretty, json-sse, json-seq, cat,\n"
+ " with-unit)\n"
+ " --output-fields=LIST Select fields to print in verbose/export/json modes\n"
+ " --utc Express time in Coordinated Universal Time (UTC)\n"
+ " -x --catalog Add message explanations where available\n"
+ " --no-full Ellipsize fields\n"
+ " -a --all Show all fields, including long and unprintable\n"
+ " -q --quiet Do not show info messages and privilege warning\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-hostname Suppress output of hostname field\n"
+ " -m --merge Show entries from all available journals\n"
+ " -D --directory=PATH Show journal files from directory\n"
+ " --file=PATH Show journal file\n"
+ " --root=ROOT Operate on files below a root directory\n"
+ " --interval=TIME Time interval for changing the FSS sealing key\n"
+ " --verify-key=KEY Specify FSS verification key\n"
+ " --force Override of the FSS key pair with --setup-keys\n"
+ "\nCommands:\n"
+ " -h --help Show this help text\n"
+ " --version Show package version\n"
+ " -N --fields List all field names currently used\n"
+ " -F --field=FIELD List all values that a specified field takes\n"
+ " --disk-usage Show total disk usage of all journal files\n"
+ " --vacuum-size=BYTES Reduce disk usage below specified size\n"
+ " --vacuum-files=INT Leave only the specified number of journal files\n"
+ " --vacuum-time=TIME Remove journal files older than specified time\n"
+ " --verify Verify journal file consistency\n"
+ " --sync Synchronize unwritten journal messages to disk\n"
+ " --flush Flush all journal data from /run into /var\n"
+ " --rotate Request immediate rotation of the journal files\n"
+ " --header Show journal header information\n"
+ " --list-catalog Show all message IDs in the catalog\n"
+ " --dump-catalog Show entries in the message catalog\n"
+ " --update-catalog Update the message catalog database\n"
+ " --setup-keys Generate a new FSS key pair\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_FULL,
+ ARG_NO_TAIL,
+ ARG_NEW_ID128,
+ ARG_THIS_BOOT,
+ ARG_LIST_BOOTS,
+ ARG_USER,
+ ARG_SYSTEM,
+ ARG_ROOT,
+ ARG_HEADER,
+ ARG_SETUP_KEYS,
+ ARG_FILE,
+ ARG_INTERVAL,
+ ARG_VERIFY,
+ ARG_VERIFY_KEY,
+ ARG_DISK_USAGE,
+ ARG_AFTER_CURSOR,
+ ARG_SHOW_CURSOR,
+ ARG_USER_UNIT,
+ ARG_LIST_CATALOG,
+ ARG_DUMP_CATALOG,
+ ARG_UPDATE_CATALOG,
+ ARG_FORCE,
+ ARG_CASE_SENSITIVE,
+ ARG_UTC,
+ ARG_SYNC,
+ ARG_FLUSH,
+ ARG_ROTATE,
+ ARG_VACUUM_SIZE,
+ ARG_VACUUM_FILES,
+ ARG_VACUUM_TIME,
+ ARG_NO_HOSTNAME,
+ ARG_OUTPUT_FIELDS,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version" , no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "pager-end", no_argument, NULL, 'e' },
+ { "follow", no_argument, NULL, 'f' },
+ { "force", no_argument, NULL, ARG_FORCE },
+ { "output", required_argument, NULL, 'o' },
+ { "all", no_argument, NULL, 'a' },
+ { "full", no_argument, NULL, 'l' },
+ { "no-full", no_argument, NULL, ARG_NO_FULL },
+ { "lines", optional_argument, NULL, 'n' },
+ { "no-tail", no_argument, NULL, ARG_NO_TAIL },
+ { "new-id128", no_argument, NULL, ARG_NEW_ID128 }, /* deprecated */
+ { "quiet", no_argument, NULL, 'q' },
+ { "merge", no_argument, NULL, 'm' },
+ { "this-boot", no_argument, NULL, ARG_THIS_BOOT }, /* deprecated */
+ { "boot", optional_argument, NULL, 'b' },
+ { "list-boots", no_argument, NULL, ARG_LIST_BOOTS },
+ { "dmesg", no_argument, NULL, 'k' },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "user", no_argument, NULL, ARG_USER },
+ { "directory", required_argument, NULL, 'D' },
+ { "file", required_argument, NULL, ARG_FILE },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "header", no_argument, NULL, ARG_HEADER },
+ { "identifier", required_argument, NULL, 't' },
+ { "priority", required_argument, NULL, 'p' },
+ { "grep", required_argument, NULL, 'g' },
+ { "case-sensitive", optional_argument, NULL, ARG_CASE_SENSITIVE },
+ { "setup-keys", no_argument, NULL, ARG_SETUP_KEYS },
+ { "interval", required_argument, NULL, ARG_INTERVAL },
+ { "verify", no_argument, NULL, ARG_VERIFY },
+ { "verify-key", required_argument, NULL, ARG_VERIFY_KEY },
+ { "disk-usage", no_argument, NULL, ARG_DISK_USAGE },
+ { "cursor", required_argument, NULL, 'c' },
+ { "after-cursor", required_argument, NULL, ARG_AFTER_CURSOR },
+ { "show-cursor", no_argument, NULL, ARG_SHOW_CURSOR },
+ { "since", required_argument, NULL, 'S' },
+ { "until", required_argument, NULL, 'U' },
+ { "unit", required_argument, NULL, 'u' },
+ { "user-unit", required_argument, NULL, ARG_USER_UNIT },
+ { "field", required_argument, NULL, 'F' },
+ { "fields", no_argument, NULL, 'N' },
+ { "catalog", no_argument, NULL, 'x' },
+ { "list-catalog", no_argument, NULL, ARG_LIST_CATALOG },
+ { "dump-catalog", no_argument, NULL, ARG_DUMP_CATALOG },
+ { "update-catalog", no_argument, NULL, ARG_UPDATE_CATALOG },
+ { "reverse", no_argument, NULL, 'r' },
+ { "machine", required_argument, NULL, 'M' },
+ { "utc", no_argument, NULL, ARG_UTC },
+ { "flush", no_argument, NULL, ARG_FLUSH },
+ { "sync", no_argument, NULL, ARG_SYNC },
+ { "rotate", no_argument, NULL, ARG_ROTATE },
+ { "vacuum-size", required_argument, NULL, ARG_VACUUM_SIZE },
+ { "vacuum-files", required_argument, NULL, ARG_VACUUM_FILES },
+ { "vacuum-time", required_argument, NULL, ARG_VACUUM_TIME },
+ { "no-hostname", no_argument, NULL, ARG_NO_HOSTNAME },
+ { "output-fields", required_argument, NULL, ARG_OUTPUT_FIELDS },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hefo:aln::qmb::kD:p:g:c:S:U:t:u:NF:xrM:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case 'e':
+ arg_pager_flags |= PAGER_JUMP_TO_END;
+
+ if (arg_lines == ARG_LINES_DEFAULT)
+ arg_lines = 1000;
+
+ break;
+
+ case 'f':
+ arg_follow = true;
+ break;
+
+ case 'o':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(output_mode, OutputMode, _OUTPUT_MODE_MAX);
+ return 0;
+ }
+
+ arg_output = output_mode_from_string(optarg);
+ if (arg_output < 0) {
+ log_error("Unknown output format '%s'.", optarg);
+ return -EINVAL;
+ }
+
+ if (IN_SET(arg_output, OUTPUT_EXPORT, OUTPUT_JSON, OUTPUT_JSON_PRETTY, OUTPUT_JSON_SSE, OUTPUT_JSON_SEQ, OUTPUT_CAT))
+ arg_quiet = true;
+
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case ARG_NO_FULL:
+ arg_full = false;
+ break;
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case 'n':
+ if (optarg) {
+ if (streq(optarg, "all"))
+ arg_lines = ARG_LINES_ALL;
+ else {
+ r = safe_atoi(optarg, &arg_lines);
+ if (r < 0 || arg_lines < 0) {
+ log_error("Failed to parse lines '%s'", optarg);
+ return -EINVAL;
+ }
+ }
+ } else {
+ arg_lines = 10;
+
+ /* Hmm, no argument? Maybe the next
+ * word on the command line is
+ * supposed to be the argument? Let's
+ * see if there is one, and is
+ * parsable. */
+ if (optind < argc) {
+ int n;
+ if (streq(argv[optind], "all")) {
+ arg_lines = ARG_LINES_ALL;
+ optind++;
+ } else if (safe_atoi(argv[optind], &n) >= 0 && n >= 0) {
+ arg_lines = n;
+ optind++;
+ }
+ }
+ }
+
+ break;
+
+ case ARG_NO_TAIL:
+ arg_no_tail = true;
+ break;
+
+ case ARG_NEW_ID128:
+ arg_action = ACTION_NEW_ID128;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case 'm':
+ arg_merge = true;
+ break;
+
+ case ARG_THIS_BOOT:
+ arg_boot = true;
+ break;
+
+ case 'b':
+ arg_boot = true;
+
+ if (optarg) {
+ r = parse_boot_descriptor(optarg, &arg_boot_id, &arg_boot_offset);
+ if (r < 0) {
+ log_error("Failed to parse boot descriptor '%s'", optarg);
+ return -EINVAL;
+ }
+ } else {
+
+ /* Hmm, no argument? Maybe the next
+ * word on the command line is
+ * supposed to be the argument? Let's
+ * see if there is one and is parsable
+ * as a boot descriptor... */
+
+ if (optind < argc &&
+ parse_boot_descriptor(argv[optind], &arg_boot_id, &arg_boot_offset) >= 0)
+ optind++;
+ }
+
+ break;
+
+ case ARG_LIST_BOOTS:
+ arg_action = ACTION_LIST_BOOTS;
+ break;
+
+ case 'k':
+ arg_boot = arg_dmesg = true;
+ break;
+
+ case ARG_SYSTEM:
+ arg_journal_type |= SD_JOURNAL_SYSTEM;
+ break;
+
+ case ARG_USER:
+ arg_journal_type |= SD_JOURNAL_CURRENT_USER;
+ break;
+
+ case 'M':
+ arg_machine = optarg;
+ break;
+
+ case 'D':
+ arg_directory = optarg;
+ break;
+
+ case ARG_FILE:
+ if (streq(optarg, "-"))
+ /* An undocumented feature: we can read journal files from STDIN. We don't document
+ * this though, since after all we only support this for mmap-able, seekable files, and
+ * not for example pipes which are probably the primary usecase for reading things from
+ * STDIN. To avoid confusion we hence don't document this feature. */
+ arg_file_stdin = true;
+ else {
+ r = glob_extend(&arg_file, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add paths: %m");
+ }
+ break;
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, true, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case 'c':
+ arg_cursor = optarg;
+ break;
+
+ case ARG_AFTER_CURSOR:
+ arg_after_cursor = optarg;
+ break;
+
+ case ARG_SHOW_CURSOR:
+ arg_show_cursor = true;
+ break;
+
+ case ARG_HEADER:
+ arg_action = ACTION_PRINT_HEADER;
+ break;
+
+ case ARG_VERIFY:
+ arg_action = ACTION_VERIFY;
+ break;
+
+ case ARG_DISK_USAGE:
+ arg_action = ACTION_DISK_USAGE;
+ break;
+
+ case ARG_VACUUM_SIZE:
+ r = parse_size(optarg, 1024, &arg_vacuum_size);
+ if (r < 0) {
+ log_error("Failed to parse vacuum size: %s", optarg);
+ return r;
+ }
+
+ arg_action = arg_action == ACTION_ROTATE ? ACTION_ROTATE_AND_VACUUM : ACTION_VACUUM;
+ break;
+
+ case ARG_VACUUM_FILES:
+ r = safe_atou64(optarg, &arg_vacuum_n_files);
+ if (r < 0) {
+ log_error("Failed to parse vacuum files: %s", optarg);
+ return r;
+ }
+
+ arg_action = arg_action == ACTION_ROTATE ? ACTION_ROTATE_AND_VACUUM : ACTION_VACUUM;
+ break;
+
+ case ARG_VACUUM_TIME:
+ r = parse_sec(optarg, &arg_vacuum_time);
+ if (r < 0) {
+ log_error("Failed to parse vacuum time: %s", optarg);
+ return r;
+ }
+
+ arg_action = arg_action == ACTION_ROTATE ? ACTION_ROTATE_AND_VACUUM : ACTION_VACUUM;
+ break;
+
+#if HAVE_GCRYPT
+ case ARG_FORCE:
+ arg_force = true;
+ break;
+
+ case ARG_SETUP_KEYS:
+ arg_action = ACTION_SETUP_KEYS;
+ break;
+
+ case ARG_VERIFY_KEY:
+ arg_action = ACTION_VERIFY;
+ r = free_and_strdup(&arg_verify_key, optarg);
+ if (r < 0)
+ return r;
+ /* Use memset not string_erase so this doesn't look confusing
+ * in ps or htop output. */
+ memset(optarg, 'x', strlen(optarg));
+
+ arg_merge = false;
+ break;
+
+ case ARG_INTERVAL:
+ r = parse_sec(optarg, &arg_interval);
+ if (r < 0 || arg_interval <= 0) {
+ log_error("Failed to parse sealing key change interval: %s", optarg);
+ return -EINVAL;
+ }
+ break;
+#else
+ case ARG_SETUP_KEYS:
+ case ARG_VERIFY_KEY:
+ case ARG_INTERVAL:
+ case ARG_FORCE:
+ log_error("Compiled without forward-secure sealing support.");
+ return -EOPNOTSUPP;
+#endif
+
+ case 'p': {
+ const char *dots;
+
+ dots = strstr(optarg, "..");
+ if (dots) {
+ char *a;
+ int from, to, i;
+
+ /* a range */
+ a = strndup(optarg, dots - optarg);
+ if (!a)
+ return log_oom();
+
+ from = log_level_from_string(a);
+ to = log_level_from_string(dots + 2);
+ free(a);
+
+ if (from < 0 || to < 0) {
+ log_error("Failed to parse log level range %s", optarg);
+ return -EINVAL;
+ }
+
+ arg_priorities = 0;
+
+ if (from < to) {
+ for (i = from; i <= to; i++)
+ arg_priorities |= 1 << i;
+ } else {
+ for (i = to; i <= from; i++)
+ arg_priorities |= 1 << i;
+ }
+
+ } else {
+ int p, i;
+
+ p = log_level_from_string(optarg);
+ if (p < 0) {
+ log_error("Unknown log level %s", optarg);
+ return -EINVAL;
+ }
+
+ arg_priorities = 0;
+
+ for (i = 0; i <= p; i++)
+ arg_priorities |= 1 << i;
+ }
+
+ break;
+ }
+
+#if HAVE_PCRE2
+ case 'g':
+ arg_pattern = optarg;
+ break;
+
+ case ARG_CASE_SENSITIVE:
+ if (optarg) {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Bad --case-sensitive= argument \"%s\": %m", optarg);
+ arg_case_sensitive = r;
+ } else
+ arg_case_sensitive = true;
+
+ break;
+#else
+ case 'g':
+ case ARG_CASE_SENSITIVE:
+ return log_error("Compiled without pattern matching support");
+#endif
+
+ case 'S':
+ r = parse_timestamp(optarg, &arg_since);
+ if (r < 0) {
+ log_error("Failed to parse timestamp: %s", optarg);
+ return -EINVAL;
+ }
+ arg_since_set = true;
+ break;
+
+ case 'U':
+ r = parse_timestamp(optarg, &arg_until);
+ if (r < 0) {
+ log_error("Failed to parse timestamp: %s", optarg);
+ return -EINVAL;
+ }
+ arg_until_set = true;
+ break;
+
+ case 't':
+ r = strv_extend(&arg_syslog_identifier, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case 'u':
+ r = strv_extend(&arg_system_units, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case ARG_USER_UNIT:
+ r = strv_extend(&arg_user_units, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case 'F':
+ arg_action = ACTION_LIST_FIELDS;
+ arg_field = optarg;
+ break;
+
+ case 'N':
+ arg_action = ACTION_LIST_FIELD_NAMES;
+ break;
+
+ case ARG_NO_HOSTNAME:
+ arg_no_hostname = true;
+ break;
+
+ case 'x':
+ arg_catalog = true;
+ break;
+
+ case ARG_LIST_CATALOG:
+ arg_action = ACTION_LIST_CATALOG;
+ break;
+
+ case ARG_DUMP_CATALOG:
+ arg_action = ACTION_DUMP_CATALOG;
+ break;
+
+ case ARG_UPDATE_CATALOG:
+ arg_action = ACTION_UPDATE_CATALOG;
+ break;
+
+ case 'r':
+ arg_reverse = true;
+ break;
+
+ case ARG_UTC:
+ arg_utc = true;
+ break;
+
+ case ARG_FLUSH:
+ arg_action = ACTION_FLUSH;
+ break;
+
+ case ARG_ROTATE:
+ arg_action = arg_action == ACTION_VACUUM ? ACTION_ROTATE_AND_VACUUM : ACTION_ROTATE;
+ break;
+
+ case ARG_SYNC:
+ arg_action = ACTION_SYNC;
+ break;
+
+ case ARG_OUTPUT_FIELDS: {
+ _cleanup_strv_free_ char **v = NULL;
+
+ v = strv_split(optarg, ",");
+ if (!v)
+ return log_oom();
+
+ if (!arg_output_fields)
+ arg_output_fields = TAKE_PTR(v);
+ else {
+ r = strv_extend_strv(&arg_output_fields, v, true);
+ if (r < 0)
+ return log_oom();
+ }
+ break;
+ }
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_follow && !arg_no_tail && !arg_since && arg_lines == ARG_LINES_DEFAULT)
+ arg_lines = 10;
+
+ if (!!arg_directory + !!arg_file + !!arg_machine + !!arg_root > 1) {
+ log_error("Please specify at most one of -D/--directory=, --file=, -M/--machine=, --root.");
+ return -EINVAL;
+ }
+
+ if (arg_since_set && arg_until_set && arg_since > arg_until) {
+ log_error("--since= must be before --until=.");
+ return -EINVAL;
+ }
+
+ if (!!arg_cursor + !!arg_after_cursor + !!arg_since_set > 1) {
+ log_error("Please specify only one of --since=, --cursor=, and --after-cursor.");
+ return -EINVAL;
+ }
+
+ if (arg_follow && arg_reverse) {
+ log_error("Please specify either --reverse= or --follow=, not both.");
+ return -EINVAL;
+ }
+
+ if (!IN_SET(arg_action, ACTION_SHOW, ACTION_DUMP_CATALOG, ACTION_LIST_CATALOG) && optind < argc) {
+ log_error("Extraneous arguments starting with '%s'", argv[optind]);
+ return -EINVAL;
+ }
+
+ if ((arg_boot || arg_action == ACTION_LIST_BOOTS) && arg_merge) {
+ log_error("Using --boot or --list-boots with --merge is not supported.");
+ return -EINVAL;
+ }
+
+ if (!strv_isempty(arg_system_units) && arg_journal_type == SD_JOURNAL_CURRENT_USER) {
+ /* Specifying --user and --unit= at the same time makes no sense (as the former excludes the user
+ * journal, but the latter excludes the system journal, thus resulting in empty output). Let's be nice
+ * to users, and automatically turn --unit= into --user-unit= if combined with --user. */
+ r = strv_extend_strv(&arg_user_units, arg_system_units, true);
+ if (r < 0)
+ return r;
+
+ arg_system_units = strv_free(arg_system_units);
+ }
+
+#if HAVE_PCRE2
+ if (arg_pattern) {
+ unsigned flags;
+
+ if (arg_case_sensitive >= 0)
+ flags = !arg_case_sensitive * PCRE2_CASELESS;
+ else {
+ _cleanup_(pcre2_match_data_freep) pcre2_match_data *md = NULL;
+ bool has_case;
+ _cleanup_(pcre2_code_freep) pcre2_code *cs = NULL;
+
+ md = pcre2_match_data_create(1, NULL);
+ if (!md)
+ return log_oom();
+
+ r = pattern_compile("[[:upper:]]", 0, &cs);
+ if (r < 0)
+ return r;
+
+ r = pcre2_match(cs, (PCRE2_SPTR8) arg_pattern, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL);
+ has_case = r >= 0;
+
+ flags = !has_case * PCRE2_CASELESS;
+ }
+
+ log_debug("Doing case %s matching based on %s",
+ flags & PCRE2_CASELESS ? "insensitive" : "sensitive",
+ arg_case_sensitive >= 0 ? "request" : "pattern casing");
+
+ r = pattern_compile(arg_pattern, flags, &arg_compiled_pattern);
+ if (r < 0)
+ return r;
+ }
+#endif
+
+ return 1;
+}
+
+static int add_matches(sd_journal *j, char **args) {
+ char **i;
+ bool have_term = false;
+
+ assert(j);
+
+ STRV_FOREACH(i, args) {
+ int r;
+
+ if (streq(*i, "+")) {
+ if (!have_term)
+ break;
+ r = sd_journal_add_disjunction(j);
+ have_term = false;
+
+ } else if (path_is_absolute(*i)) {
+ _cleanup_free_ char *p = NULL, *t = NULL, *t2 = NULL, *interpreter = NULL;
+ struct stat st;
+
+ r = chase_symlinks(*i, NULL, CHASE_TRAIL_SLASH, &p);
+ if (r < 0)
+ return log_error_errno(r, "Couldn't canonicalize path: %m");
+
+ if (lstat(p, &st) < 0)
+ return log_error_errno(errno, "Couldn't stat file: %m");
+
+ if (S_ISREG(st.st_mode) && (0111 & st.st_mode)) {
+ if (executable_is_script(p, &interpreter) > 0) {
+ _cleanup_free_ char *comm;
+
+ comm = strndup(basename(p), 15);
+ if (!comm)
+ return log_oom();
+
+ t = strappend("_COMM=", comm);
+ if (!t)
+ return log_oom();
+
+ /* Append _EXE only if the interpreter is not a link.
+ Otherwise, it might be outdated often. */
+ if (lstat(interpreter, &st) == 0 && !S_ISLNK(st.st_mode)) {
+ t2 = strappend("_EXE=", interpreter);
+ if (!t2)
+ return log_oom();
+ }
+ } else {
+ t = strappend("_EXE=", p);
+ if (!t)
+ return log_oom();
+ }
+
+ r = sd_journal_add_match(j, t, 0);
+
+ if (r >=0 && t2)
+ r = sd_journal_add_match(j, t2, 0);
+
+ } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
+ r = add_matches_for_device(j, p);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "File is neither a device node, nor regular file, nor executable: %s",
+ *i);
+
+ have_term = true;
+ } else {
+ r = sd_journal_add_match(j, *i, 0);
+ have_term = true;
+ }
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match '%s': %m", *i);
+ }
+
+ if (!strv_isempty(args) && !have_term)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "\"+\" can only be used between terms");
+
+ return 0;
+}
+
+static void boot_id_free_all(BootId *l) {
+
+ while (l) {
+ BootId *i = l;
+ LIST_REMOVE(boot_list, l, i);
+ free(i);
+ }
+}
+
+static int discover_next_boot(sd_journal *j,
+ sd_id128_t previous_boot_id,
+ bool advance_older,
+ BootId **ret) {
+
+ _cleanup_free_ BootId *next_boot = NULL;
+ char match[9+32+1] = "_BOOT_ID=";
+ sd_id128_t boot_id;
+ int r;
+
+ assert(j);
+ assert(ret);
+
+ /* We expect the journal to be on the last position of a boot
+ * (in relation to the direction we are going), so that the next
+ * invocation of sd_journal_next/previous will be from a different
+ * boot. We then collect any information we desire and then jump
+ * to the last location of the new boot by using a _BOOT_ID match
+ * coming from the other journal direction. */
+
+ /* Make sure we aren't restricted by any _BOOT_ID matches, so that
+ * we can actually advance to a *different* boot. */
+ sd_journal_flush_matches(j);
+
+ do {
+ if (advance_older)
+ r = sd_journal_previous(j);
+ else
+ r = sd_journal_next(j);
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ return 0; /* End of journal, yay. */
+
+ r = sd_journal_get_monotonic_usec(j, NULL, &boot_id);
+ if (r < 0)
+ return r;
+
+ /* We iterate through this in a loop, until the boot ID differs from the previous one. Note that
+ * normally, this will only require a single iteration, as we seeked to the last entry of the previous
+ * boot entry already. However, it might happen that the per-journal-field entry arrays are less
+ * complete than the main entry array, and hence might reference an entry that's not actually the last
+ * one of the boot ID as last one. Let's hence use the per-field array is initial seek position to
+ * speed things up, but let's not trust that it is complete, and hence, manually advance as
+ * necessary. */
+
+ } while (sd_id128_equal(boot_id, previous_boot_id));
+
+ next_boot = new0(BootId, 1);
+ if (!next_boot)
+ return -ENOMEM;
+
+ next_boot->id = boot_id;
+
+ r = sd_journal_get_realtime_usec(j, &next_boot->first);
+ if (r < 0)
+ return r;
+
+ /* Now seek to the last occurrence of this boot ID. */
+ sd_id128_to_string(next_boot->id, match + 9);
+ r = sd_journal_add_match(j, match, sizeof(match) - 1);
+ if (r < 0)
+ return r;
+
+ if (advance_older)
+ r = sd_journal_seek_head(j);
+ else
+ r = sd_journal_seek_tail(j);
+ if (r < 0)
+ return r;
+
+ if (advance_older)
+ r = sd_journal_next(j);
+ else
+ r = sd_journal_previous(j);
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODATA),
+ "Whoopsie! We found a boot ID but can't read its last entry."); /* This shouldn't happen. We just came from this very boot ID. */
+
+ r = sd_journal_get_realtime_usec(j, &next_boot->last);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(next_boot);
+
+ return 0;
+}
+
+static int get_boots(
+ sd_journal *j,
+ BootId **boots,
+ sd_id128_t *boot_id,
+ int offset) {
+
+ bool skip_once;
+ int r, count = 0;
+ BootId *head = NULL, *tail = NULL, *id;
+ const bool advance_older = boot_id && offset <= 0;
+ sd_id128_t previous_boot_id;
+
+ assert(j);
+
+ /* Adjust for the asymmetry that offset 0 is
+ * the last (and current) boot, while 1 is considered the
+ * (chronological) first boot in the journal. */
+ skip_once = boot_id && sd_id128_is_null(*boot_id) && offset <= 0;
+
+ /* Advance to the earliest/latest occurrence of our reference
+ * boot ID (taking our lookup direction into account), so that
+ * discover_next_boot() can do its job.
+ * If no reference is given, the journal head/tail will do,
+ * they're "virtual" boots after all. */
+ if (boot_id && !sd_id128_is_null(*boot_id)) {
+ char match[9+32+1] = "_BOOT_ID=";
+
+ sd_journal_flush_matches(j);
+
+ sd_id128_to_string(*boot_id, match + 9);
+ r = sd_journal_add_match(j, match, sizeof(match) - 1);
+ if (r < 0)
+ return r;
+
+ if (advance_older)
+ r = sd_journal_seek_head(j); /* seek to oldest */
+ else
+ r = sd_journal_seek_tail(j); /* seek to newest */
+ if (r < 0)
+ return r;
+
+ if (advance_older)
+ r = sd_journal_next(j); /* read the oldest entry */
+ else
+ r = sd_journal_previous(j); /* read the most recently added entry */
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ goto finish;
+ else if (offset == 0) {
+ count = 1;
+ goto finish;
+ }
+
+ /* At this point the read pointer is positioned at the oldest/newest occurence of the reference boot
+ * ID. After flushing the matches, one more invocation of _previous()/_next() will hence place us at
+ * the following entry, which must then have an older/newer boot ID */
+ } else {
+
+ if (advance_older)
+ r = sd_journal_seek_tail(j); /* seek to newest */
+ else
+ r = sd_journal_seek_head(j); /* seek to oldest */
+ if (r < 0)
+ return r;
+
+ /* No sd_journal_next()/_previous() here.
+ *
+ * At this point the read pointer is positioned after the newest/before the oldest entry in the whole
+ * journal. The next invocation of _previous()/_next() will hence position us at the newest/oldest
+ * entry we have. */
+ }
+
+ previous_boot_id = SD_ID128_NULL;
+ for (;;) {
+ _cleanup_free_ BootId *current = NULL;
+
+ r = discover_next_boot(j, previous_boot_id, advance_older, &current);
+ if (r < 0) {
+ boot_id_free_all(head);
+ return r;
+ }
+
+ if (!current)
+ break;
+
+ previous_boot_id = current->id;
+
+ if (boot_id) {
+ if (!skip_once)
+ offset += advance_older ? 1 : -1;
+ skip_once = false;
+
+ if (offset == 0) {
+ count = 1;
+ *boot_id = current->id;
+ break;
+ }
+ } else {
+ LIST_FOREACH(boot_list, id, head) {
+ if (sd_id128_equal(id->id, current->id)) {
+ /* boot id already stored, something wrong with the journal files */
+ /* exiting as otherwise this problem would cause forever loop */
+ goto finish;
+ }
+ }
+ LIST_INSERT_AFTER(boot_list, head, tail, current);
+ tail = TAKE_PTR(current);
+ count++;
+ }
+ }
+
+finish:
+ if (boots)
+ *boots = head;
+
+ sd_journal_flush_matches(j);
+
+ return count;
+}
+
+static int list_boots(sd_journal *j) {
+ int w, i, count;
+ BootId *id, *all_ids;
+
+ assert(j);
+
+ count = get_boots(j, &all_ids, NULL, 0);
+ if (count < 0)
+ return log_error_errno(count, "Failed to determine boots: %m");
+ if (count == 0)
+ return count;
+
+ (void) pager_open(arg_pager_flags);
+
+ /* numbers are one less, but we need an extra char for the sign */
+ w = DECIMAL_STR_WIDTH(count - 1) + 1;
+
+ i = 0;
+ LIST_FOREACH(boot_list, id, all_ids) {
+ char a[FORMAT_TIMESTAMP_MAX], b[FORMAT_TIMESTAMP_MAX];
+
+ printf("% *i " SD_ID128_FORMAT_STR " %s—%s\n",
+ w, i - count + 1,
+ SD_ID128_FORMAT_VAL(id->id),
+ format_timestamp_maybe_utc(a, sizeof(a), id->first),
+ format_timestamp_maybe_utc(b, sizeof(b), id->last));
+ i++;
+ }
+
+ boot_id_free_all(all_ids);
+
+ return 0;
+}
+
+static int add_boot(sd_journal *j) {
+ char match[9+32+1] = "_BOOT_ID=";
+ sd_id128_t boot_id;
+ int r;
+
+ assert(j);
+
+ if (!arg_boot)
+ return 0;
+
+ /* Take a shortcut and use the current boot_id, which we can do very quickly.
+ * We can do this only when we logs are coming from the current machine,
+ * so take the slow path if log location is specified. */
+ if (arg_boot_offset == 0 && sd_id128_is_null(arg_boot_id) &&
+ !arg_directory && !arg_file && !arg_root)
+
+ return add_match_this_boot(j, arg_machine);
+
+ boot_id = arg_boot_id;
+ r = get_boots(j, NULL, &boot_id, arg_boot_offset);
+ assert(r <= 1);
+ if (r <= 0) {
+ const char *reason = (r == 0) ? "No such boot ID in journal" : strerror(-r);
+
+ if (sd_id128_is_null(arg_boot_id))
+ log_error("Data from the specified boot (%+i) is not available: %s",
+ arg_boot_offset, reason);
+ else
+ log_error("Data from the specified boot ("SD_ID128_FORMAT_STR") is not available: %s",
+ SD_ID128_FORMAT_VAL(arg_boot_id), reason);
+
+ return r == 0 ? -ENODATA : r;
+ }
+
+ sd_id128_to_string(boot_id, match + 9);
+
+ r = sd_journal_add_match(j, match, sizeof(match) - 1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add conjunction: %m");
+
+ return 0;
+}
+
+static int add_dmesg(sd_journal *j) {
+ int r;
+ assert(j);
+
+ if (!arg_dmesg)
+ return 0;
+
+ r = sd_journal_add_match(j, "_TRANSPORT=kernel",
+ STRLEN("_TRANSPORT=kernel"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add conjunction: %m");
+
+ return 0;
+}
+
+static int get_possible_units(
+ sd_journal *j,
+ const char *fields,
+ char **patterns,
+ Set **units) {
+
+ _cleanup_set_free_free_ Set *found;
+ const char *field;
+ int r;
+
+ found = set_new(&string_hash_ops);
+ if (!found)
+ return -ENOMEM;
+
+ NULSTR_FOREACH(field, fields) {
+ const void *data;
+ size_t size;
+
+ r = sd_journal_query_unique(j, field);
+ if (r < 0)
+ return r;
+
+ SD_JOURNAL_FOREACH_UNIQUE(j, data, size) {
+ char **pattern, *eq;
+ size_t prefix;
+ _cleanup_free_ char *u = NULL;
+
+ eq = memchr(data, '=', size);
+ if (eq)
+ prefix = eq - (char*) data + 1;
+ else
+ prefix = 0;
+
+ u = strndup((char*) data + prefix, size - prefix);
+ if (!u)
+ return -ENOMEM;
+
+ STRV_FOREACH(pattern, patterns)
+ if (fnmatch(*pattern, u, FNM_NOESCAPE) == 0) {
+ log_debug("Matched %s with pattern %s=%s", u, field, *pattern);
+
+ r = set_consume(found, u);
+ u = NULL;
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ break;
+ }
+ }
+ }
+
+ *units = TAKE_PTR(found);
+
+ return 0;
+}
+
+/* This list is supposed to return the superset of unit names
+ * possibly matched by rules added with add_matches_for_unit... */
+#define SYSTEM_UNITS \
+ "_SYSTEMD_UNIT\0" \
+ "COREDUMP_UNIT\0" \
+ "UNIT\0" \
+ "OBJECT_SYSTEMD_UNIT\0" \
+ "_SYSTEMD_SLICE\0"
+
+/* ... and add_matches_for_user_unit */
+#define USER_UNITS \
+ "_SYSTEMD_USER_UNIT\0" \
+ "USER_UNIT\0" \
+ "COREDUMP_USER_UNIT\0" \
+ "OBJECT_SYSTEMD_USER_UNIT\0"
+
+static int add_units(sd_journal *j) {
+ _cleanup_strv_free_ char **patterns = NULL;
+ int r, count = 0;
+ char **i;
+
+ assert(j);
+
+ STRV_FOREACH(i, arg_system_units) {
+ _cleanup_free_ char *u = NULL;
+
+ r = unit_name_mangle(*i, UNIT_NAME_MANGLE_GLOB | (arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN), &u);
+ if (r < 0)
+ return r;
+
+ if (string_is_glob(u)) {
+ r = strv_push(&patterns, u);
+ if (r < 0)
+ return r;
+ u = NULL;
+ } else {
+ r = add_matches_for_unit(j, u);
+ if (r < 0)
+ return r;
+ r = sd_journal_add_disjunction(j);
+ if (r < 0)
+ return r;
+ count++;
+ }
+ }
+
+ if (!strv_isempty(patterns)) {
+ _cleanup_set_free_free_ Set *units = NULL;
+ Iterator it;
+ char *u;
+
+ r = get_possible_units(j, SYSTEM_UNITS, patterns, &units);
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(u, units, it) {
+ r = add_matches_for_unit(j, u);
+ if (r < 0)
+ return r;
+ r = sd_journal_add_disjunction(j);
+ if (r < 0)
+ return r;
+ count++;
+ }
+ }
+
+ patterns = strv_free(patterns);
+
+ STRV_FOREACH(i, arg_user_units) {
+ _cleanup_free_ char *u = NULL;
+
+ r = unit_name_mangle(*i, UNIT_NAME_MANGLE_GLOB | (arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN), &u);
+ if (r < 0)
+ return r;
+
+ if (string_is_glob(u)) {
+ r = strv_push(&patterns, u);
+ if (r < 0)
+ return r;
+ u = NULL;
+ } else {
+ r = add_matches_for_user_unit(j, u, getuid());
+ if (r < 0)
+ return r;
+ r = sd_journal_add_disjunction(j);
+ if (r < 0)
+ return r;
+ count++;
+ }
+ }
+
+ if (!strv_isempty(patterns)) {
+ _cleanup_set_free_free_ Set *units = NULL;
+ Iterator it;
+ char *u;
+
+ r = get_possible_units(j, USER_UNITS, patterns, &units);
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(u, units, it) {
+ r = add_matches_for_user_unit(j, u, getuid());
+ if (r < 0)
+ return r;
+ r = sd_journal_add_disjunction(j);
+ if (r < 0)
+ return r;
+ count++;
+ }
+ }
+
+ /* Complain if the user request matches but nothing whatsoever was
+ * found, since otherwise everything would be matched. */
+ if (!(strv_isempty(arg_system_units) && strv_isempty(arg_user_units)) && count == 0)
+ return -ENODATA;
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int add_priorities(sd_journal *j) {
+ char match[] = "PRIORITY=0";
+ int i, r;
+ assert(j);
+
+ if (arg_priorities == 0xFF)
+ return 0;
+
+ for (i = LOG_EMERG; i <= LOG_DEBUG; i++)
+ if (arg_priorities & (1 << i)) {
+ match[sizeof(match)-2] = '0' + i;
+
+ r = sd_journal_add_match(j, match, strlen(match));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+ }
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add conjunction: %m");
+
+ return 0;
+}
+
+static int add_syslog_identifier(sd_journal *j) {
+ int r;
+ char **i;
+
+ assert(j);
+
+ STRV_FOREACH(i, arg_syslog_identifier) {
+ char *u;
+
+ u = strjoina("SYSLOG_IDENTIFIER=", *i);
+ r = sd_journal_add_match(j, u, 0);
+ if (r < 0)
+ return r;
+ r = sd_journal_add_disjunction(j);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int setup_keys(void) {
+#if HAVE_GCRYPT
+ size_t mpk_size, seed_size, state_size, i;
+ uint8_t *mpk, *seed, *state;
+ int fd = -1, r;
+ sd_id128_t machine, boot;
+ char *p = NULL, *k = NULL;
+ struct FSSHeader h;
+ uint64_t n;
+ struct stat st;
+
+ r = stat("/var/log/journal", &st);
+ if (r < 0 && !IN_SET(errno, ENOENT, ENOTDIR))
+ return log_error_errno(errno, "stat(\"%s\") failed: %m", "/var/log/journal");
+
+ if (r < 0 || !S_ISDIR(st.st_mode)) {
+ log_error("%s is not a directory, must be using persistent logging for FSS.",
+ "/var/log/journal");
+ return r < 0 ? -errno : -ENOTDIR;
+ }
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get machine ID: %m");
+
+ r = sd_id128_get_boot(&boot);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot ID: %m");
+
+ if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fss",
+ SD_ID128_FORMAT_VAL(machine)) < 0)
+ return log_oom();
+
+ if (arg_force) {
+ r = unlink(p);
+ if (r < 0 && errno != ENOENT) {
+ r = log_error_errno(errno, "unlink(\"%s\") failed: %m", p);
+ goto finish;
+ }
+ } else if (access(p, F_OK) >= 0) {
+ log_error("Sealing key file %s exists already. Use --force to recreate.", p);
+ r = -EEXIST;
+ goto finish;
+ }
+
+ if (asprintf(&k, "/var/log/journal/" SD_ID128_FORMAT_STR "/fss.tmp.XXXXXX",
+ SD_ID128_FORMAT_VAL(machine)) < 0) {
+ r = log_oom();
+ goto finish;
+ }
+
+ mpk_size = FSPRG_mskinbytes(FSPRG_RECOMMENDED_SECPAR);
+ mpk = alloca(mpk_size);
+
+ seed_size = FSPRG_RECOMMENDED_SEEDLEN;
+ seed = alloca(seed_size);
+
+ state_size = FSPRG_stateinbytes(FSPRG_RECOMMENDED_SECPAR);
+ state = alloca(state_size);
+
+ fd = open("/dev/random", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0) {
+ r = log_error_errno(errno, "Failed to open /dev/random: %m");
+ goto finish;
+ }
+
+ log_info("Generating seed...");
+ r = loop_read_exact(fd, seed, seed_size, true);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read random seed: %m");
+ goto finish;
+ }
+
+ log_info("Generating key pair...");
+ FSPRG_GenMK(NULL, mpk, seed, seed_size, FSPRG_RECOMMENDED_SECPAR);
+
+ log_info("Generating sealing key...");
+ FSPRG_GenState0(state, mpk, seed, seed_size);
+
+ assert(arg_interval > 0);
+
+ n = now(CLOCK_REALTIME);
+ n /= arg_interval;
+
+ safe_close(fd);
+ fd = mkostemp_safe(k);
+ if (fd < 0) {
+ r = log_error_errno(fd, "Failed to open %s: %m", k);
+ goto finish;
+ }
+
+ /* Enable secure remove, exclusion from dump, synchronous
+ * writing and in-place updating */
+ r = chattr_fd(fd, FS_SECRM_FL|FS_NODUMP_FL|FS_SYNC_FL|FS_NOCOW_FL, FS_SECRM_FL|FS_NODUMP_FL|FS_SYNC_FL|FS_NOCOW_FL, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set file attributes: %m");
+
+ zero(h);
+ memcpy(h.signature, "KSHHRHLP", 8);
+ h.machine_id = machine;
+ h.boot_id = boot;
+ h.header_size = htole64(sizeof(h));
+ h.start_usec = htole64(n * arg_interval);
+ h.interval_usec = htole64(arg_interval);
+ h.fsprg_secpar = htole16(FSPRG_RECOMMENDED_SECPAR);
+ h.fsprg_state_size = htole64(state_size);
+
+ r = loop_write(fd, &h, sizeof(h), false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write header: %m");
+ goto finish;
+ }
+
+ r = loop_write(fd, state, state_size, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write state: %m");
+ goto finish;
+ }
+
+ if (link(k, p) < 0) {
+ r = log_error_errno(errno, "Failed to link file: %m");
+ goto finish;
+ }
+
+ if (on_tty()) {
+ fprintf(stderr,
+ "\n"
+ "The new key pair has been generated. The %ssecret sealing key%s has been written to\n"
+ "the following local file. This key file is automatically updated when the\n"
+ "sealing key is advanced. It should not be used on multiple hosts.\n"
+ "\n"
+ "\t%s\n"
+ "\n"
+ "Please write down the following %ssecret verification key%s. It should be stored\n"
+ "at a safe location and should not be saved locally on disk.\n"
+ "\n\t%s",
+ ansi_highlight(), ansi_normal(),
+ p,
+ ansi_highlight(), ansi_normal(),
+ ansi_highlight_red());
+ fflush(stderr);
+ }
+ for (i = 0; i < seed_size; i++) {
+ if (i > 0 && i % 3 == 0)
+ putchar('-');
+ printf("%02x", ((uint8_t*) seed)[i]);
+ }
+
+ printf("/%llx-%llx\n", (unsigned long long) n, (unsigned long long) arg_interval);
+
+ if (on_tty()) {
+ char tsb[FORMAT_TIMESPAN_MAX], *hn;
+
+ fprintf(stderr,
+ "%s\n"
+ "The sealing key is automatically changed every %s.\n",
+ ansi_normal(),
+ format_timespan(tsb, sizeof(tsb), arg_interval, 0));
+
+ hn = gethostname_malloc();
+
+ if (hn) {
+ hostname_cleanup(hn);
+ fprintf(stderr, "\nThe keys have been generated for host %s/" SD_ID128_FORMAT_STR ".\n", hn, SD_ID128_FORMAT_VAL(machine));
+ } else
+ fprintf(stderr, "\nThe keys have been generated for host " SD_ID128_FORMAT_STR ".\n", SD_ID128_FORMAT_VAL(machine));
+
+#if HAVE_QRENCODE
+ /* If this is not an UTF-8 system don't print any QR codes */
+ if (is_locale_utf8()) {
+ fputs("\nTo transfer the verification key to your phone please scan the QR code below:\n\n", stderr);
+ print_qr_code(stderr, seed, seed_size, n, arg_interval, hn, machine);
+ }
+#endif
+ free(hn);
+ }
+
+ r = 0;
+
+finish:
+ safe_close(fd);
+
+ if (k) {
+ unlink(k);
+ free(k);
+ }
+
+ free(p);
+
+ return r;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Forward-secure sealing not available.");
+#endif
+}
+
+static int verify(sd_journal *j) {
+ int r = 0;
+ Iterator i;
+ JournalFile *f;
+
+ assert(j);
+
+ log_show_color(true);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files, i) {
+ int k;
+ usec_t first = 0, validated = 0, last = 0;
+
+#if HAVE_GCRYPT
+ if (!arg_verify_key && JOURNAL_HEADER_SEALED(f->header))
+ log_notice("Journal file %s has sealing enabled but verification key has not been passed using --verify-key=.", f->path);
+#endif
+
+ k = journal_file_verify(f, arg_verify_key, &first, &validated, &last, true);
+ if (k == -EINVAL) {
+ /* If the key was invalid give up right-away. */
+ return k;
+ } else if (k < 0) {
+ log_warning_errno(k, "FAIL: %s (%m)", f->path);
+ r = k;
+ } else {
+ char a[FORMAT_TIMESTAMP_MAX], b[FORMAT_TIMESTAMP_MAX], c[FORMAT_TIMESPAN_MAX];
+ log_info("PASS: %s", f->path);
+
+ if (arg_verify_key && JOURNAL_HEADER_SEALED(f->header)) {
+ if (validated > 0) {
+ log_info("=> Validated from %s to %s, final %s entries not sealed.",
+ format_timestamp_maybe_utc(a, sizeof(a), first),
+ format_timestamp_maybe_utc(b, sizeof(b), validated),
+ format_timespan(c, sizeof(c), last > validated ? last - validated : 0, 0));
+ } else if (last > 0)
+ log_info("=> No sealing yet, %s of entries not sealed.",
+ format_timespan(c, sizeof(c), last - first, 0));
+ else
+ log_info("=> No sealing yet, no entries in file.");
+ }
+ }
+ }
+
+ return r;
+}
+
+static int flush_to_var(void) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_close_ int watch_fd = -1;
+ int r;
+
+ if (arg_machine) {
+ log_error("--flush is not supported in conjunction with --machine=.");
+ return -EOPNOTSUPP;
+ }
+
+ /* Quick exit */
+ if (access("/run/systemd/journal/flushed", F_OK) >= 0)
+ return 0;
+
+ /* OK, let's actually do the full logic, send SIGUSR1 to the
+ * daemon and set up inotify to wait for the flushed file to appear */
+ r = bus_connect_system_systemd(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get D-Bus connection: %m");
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "KillUnit",
+ &error,
+ NULL,
+ "ssi", "systemd-journald.service", "main", SIGUSR1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to kill journal service: %s", bus_error_message(&error, r));
+
+ mkdir_p("/run/systemd/journal", 0755);
+
+ watch_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (watch_fd < 0)
+ return log_error_errno(errno, "Failed to create inotify watch: %m");
+
+ r = inotify_add_watch(watch_fd, "/run/systemd/journal", IN_CREATE|IN_DONT_FOLLOW|IN_ONLYDIR);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to watch journal directory: %m");
+
+ for (;;) {
+ if (access("/run/systemd/journal/flushed", F_OK) >= 0)
+ break;
+
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to check for existence of /run/systemd/journal/flushed: %m");
+
+ r = fd_wait_for_event(watch_fd, POLLIN, USEC_INFINITY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for event: %m");
+
+ r = flush_fd(watch_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to flush inotify events: %m");
+ }
+
+ return 0;
+}
+
+static int send_signal_and_wait(int sig, const char *watch_path) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_close_ int watch_fd = -1;
+ usec_t start;
+ int r;
+
+ if (arg_machine) {
+ log_error("--sync and --rotate are not supported in conjunction with --machine=.");
+ return -EOPNOTSUPP;
+ }
+
+ start = now(CLOCK_MONOTONIC);
+
+ /* This call sends the specified signal to journald, and waits
+ * for acknowledgment by watching the mtime of the specified
+ * flag file. This is used to trigger syncing or rotation and
+ * then wait for the operation to complete. */
+
+ for (;;) {
+ usec_t tstamp;
+
+ /* See if a sync happened by now. */
+ r = read_timestamp_file(watch_path, &tstamp);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to read %s: %m", watch_path);
+ if (r >= 0 && tstamp >= start)
+ return 0;
+
+ /* Let's ask for a sync, but only once. */
+ if (!bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = bus_connect_system_systemd(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get D-Bus connection: %m");
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "KillUnit",
+ &error,
+ NULL,
+ "ssi", "systemd-journald.service", "main", sig);
+ if (r < 0)
+ return log_error_errno(r, "Failed to kill journal service: %s", bus_error_message(&error, r));
+
+ continue;
+ }
+
+ /* Let's install the inotify watch, if we didn't do that yet. */
+ if (watch_fd < 0) {
+
+ mkdir_p("/run/systemd/journal", 0755);
+
+ watch_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (watch_fd < 0)
+ return log_error_errno(errno, "Failed to create inotify watch: %m");
+
+ r = inotify_add_watch(watch_fd, "/run/systemd/journal", IN_MOVED_TO|IN_DONT_FOLLOW|IN_ONLYDIR);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to watch journal directory: %m");
+
+ /* Recheck the flag file immediately, so that we don't miss any event since the last check. */
+ continue;
+ }
+
+ /* OK, all preparatory steps done, let's wait until
+ * inotify reports an event. */
+
+ r = fd_wait_for_event(watch_fd, POLLIN, USEC_INFINITY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for event: %m");
+
+ r = flush_fd(watch_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to flush inotify events: %m");
+ }
+
+ return 0;
+}
+
+static int rotate(void) {
+ return send_signal_and_wait(SIGUSR2, "/run/systemd/journal/rotated");
+}
+
+static int sync_journal(void) {
+ return send_signal_and_wait(SIGRTMIN+1, "/run/systemd/journal/synced");
+}
+
+static int wait_for_change(sd_journal *j, int poll_fd) {
+ struct pollfd pollfds[] = {
+ { .fd = poll_fd, .events = POLLIN },
+ { .fd = STDOUT_FILENO },
+ };
+
+ struct timespec ts;
+ usec_t timeout;
+ int r;
+
+ assert(j);
+ assert(poll_fd >= 0);
+
+ /* Much like sd_journal_wait() but also keeps an eye on STDOUT, and exits as soon as we see a POLLHUP on that,
+ * i.e. when it is closed. */
+
+ r = sd_journal_get_timeout(j, &timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine journal waiting time: %m");
+
+ if (ppoll(pollfds, ELEMENTSOF(pollfds),
+ timeout == USEC_INFINITY ? NULL : timespec_store(&ts, timeout), NULL) < 0) {
+ if (errno == EINTR)
+ return 0;
+
+ return log_error_errno(errno, "Couldn't wait for journal event: %m");
+ }
+
+ if (pollfds[1].revents & (POLLHUP|POLLERR)) /* STDOUT has been closed? */
+ return log_debug_errno(SYNTHETIC_ERRNO(ECANCELED),
+ "Standard output has been closed.");
+
+ r = sd_journal_process(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to process journal events: %m");
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ bool previous_boot_id_valid = false, first_line = true, ellipsized = false, need_seek = false;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ sd_id128_t previous_boot_id;
+ int n_shown = 0, r, poll_fd = -1;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ /* Increase max number of open files if we can, we might needs this when browsing journal files, which might be
+ * split up into many files. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ goto finish;
+
+ signal(SIGWINCH, columns_lines_cache_reset);
+ sigbus_install();
+
+ switch (arg_action) {
+
+ case ACTION_NEW_ID128:
+ r = id128_print_new(true);
+ goto finish;
+
+ case ACTION_SETUP_KEYS:
+ r = setup_keys();
+ goto finish;
+
+ case ACTION_LIST_CATALOG:
+ case ACTION_DUMP_CATALOG:
+ case ACTION_UPDATE_CATALOG: {
+ _cleanup_free_ char *database;
+
+ database = path_join(arg_root, CATALOG_DATABASE);
+ if (!database) {
+ r = log_oom();
+ goto finish;
+ }
+
+ if (arg_action == ACTION_UPDATE_CATALOG) {
+ r = catalog_update(database, arg_root, catalog_file_dirs);
+ if (r < 0)
+ log_error_errno(r, "Failed to list catalog: %m");
+ } else {
+ bool oneline = arg_action == ACTION_LIST_CATALOG;
+
+ (void) pager_open(arg_pager_flags);
+
+ if (optind < argc)
+ r = catalog_list_items(stdout, database, oneline, argv + optind);
+ else
+ r = catalog_list(stdout, database, oneline);
+ if (r < 0)
+ log_error_errno(r, "Failed to list catalog: %m");
+ }
+
+ goto finish;
+ }
+
+ case ACTION_FLUSH:
+ r = flush_to_var();
+ goto finish;
+
+ case ACTION_SYNC:
+ r = sync_journal();
+ goto finish;
+
+ case ACTION_ROTATE:
+ r = rotate();
+ goto finish;
+
+ case ACTION_SHOW:
+ case ACTION_PRINT_HEADER:
+ case ACTION_VERIFY:
+ case ACTION_DISK_USAGE:
+ case ACTION_LIST_BOOTS:
+ case ACTION_VACUUM:
+ case ACTION_ROTATE_AND_VACUUM:
+ case ACTION_LIST_FIELDS:
+ case ACTION_LIST_FIELD_NAMES:
+ /* These ones require access to the journal files, continue below. */
+ break;
+
+ default:
+ assert_not_reached("Unknown action");
+ }
+
+ if (arg_directory)
+ r = sd_journal_open_directory(&j, arg_directory, arg_journal_type);
+ else if (arg_root)
+ r = sd_journal_open_directory(&j, arg_root, arg_journal_type | SD_JOURNAL_OS_ROOT);
+ else if (arg_file_stdin) {
+ int ifd = STDIN_FILENO;
+ r = sd_journal_open_files_fd(&j, &ifd, 1, 0);
+ } else if (arg_file)
+ r = sd_journal_open_files(&j, (const char**) arg_file, 0);
+ else if (arg_machine) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int fd;
+
+ if (geteuid() != 0) {
+ /* The file descriptor returned by OpenMachineRootDirectory() will be owned by users/groups of
+ * the container, thus we need root privileges to override them. */
+ log_error("Using the --machine= switch requires root privileges.");
+ r = -EPERM;
+ goto finish;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open system bus: %m");
+ goto finish;
+ }
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "OpenMachineRootDirectory",
+ &error,
+ &reply,
+ "s", arg_machine);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open root directory: %s", bus_error_message(&error, r));
+ goto finish;
+ }
+
+ r = sd_bus_message_read(reply, "h", &fd);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ goto finish;
+ }
+
+ fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (fd < 0) {
+ r = log_error_errno(errno, "Failed to duplicate file descriptor: %m");
+ goto finish;
+ }
+
+ r = sd_journal_open_directory_fd(&j, fd, SD_JOURNAL_OS_ROOT);
+ if (r < 0)
+ safe_close(fd);
+ } else
+ r = sd_journal_open(&j, !arg_merge*SD_JOURNAL_LOCAL_ONLY + arg_journal_type);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open %s: %m", arg_directory ?: arg_file ? "files" : "journal");
+ goto finish;
+ }
+
+ r = journal_access_check_and_warn(j, arg_quiet,
+ !(arg_journal_type == SD_JOURNAL_CURRENT_USER || arg_user_units));
+ if (r < 0)
+ goto finish;
+
+ switch (arg_action) {
+
+ case ACTION_NEW_ID128:
+ case ACTION_SETUP_KEYS:
+ case ACTION_LIST_CATALOG:
+ case ACTION_DUMP_CATALOG:
+ case ACTION_UPDATE_CATALOG:
+ case ACTION_FLUSH:
+ case ACTION_SYNC:
+ case ACTION_ROTATE:
+ assert_not_reached("Unexpected action.");
+
+ case ACTION_PRINT_HEADER:
+ journal_print_header(j);
+ r = 0;
+ goto finish;
+
+ case ACTION_VERIFY:
+ r = verify(j);
+ goto finish;
+
+ case ACTION_DISK_USAGE: {
+ uint64_t bytes = 0;
+ char sbytes[FORMAT_BYTES_MAX];
+
+ r = sd_journal_get_usage(j, &bytes);
+ if (r < 0)
+ goto finish;
+
+ printf("Archived and active journals take up %s in the file system.\n",
+ format_bytes(sbytes, sizeof(sbytes), bytes));
+ goto finish;
+ }
+
+ case ACTION_LIST_BOOTS:
+ r = list_boots(j);
+ goto finish;
+
+ case ACTION_ROTATE_AND_VACUUM:
+
+ r = rotate();
+ if (r < 0)
+ goto finish;
+
+ _fallthrough_;
+
+ case ACTION_VACUUM: {
+ Directory *d;
+ Iterator i;
+
+ HASHMAP_FOREACH(d, j->directories_by_path, i) {
+ int q;
+
+ if (d->is_root)
+ continue;
+
+ q = journal_directory_vacuum(d->path, arg_vacuum_size, arg_vacuum_n_files, arg_vacuum_time, NULL, !arg_quiet);
+ if (q < 0) {
+ log_error_errno(q, "Failed to vacuum %s: %m", d->path);
+ r = q;
+ }
+ }
+
+ goto finish;
+ }
+
+ case ACTION_LIST_FIELD_NAMES: {
+ const char *field;
+
+ SD_JOURNAL_FOREACH_FIELD(j, field) {
+ printf("%s\n", field);
+ n_shown++;
+ }
+
+ r = 0;
+ goto finish;
+ }
+
+ case ACTION_SHOW:
+ case ACTION_LIST_FIELDS:
+ break;
+
+ default:
+ assert_not_reached("Unknown action");
+ }
+
+ if (arg_boot_offset != 0 &&
+ sd_journal_has_runtime_files(j) > 0 &&
+ sd_journal_has_persistent_files(j) == 0) {
+ log_info("Specifying boot ID or boot offset has no effect, no persistent journal was found.");
+ r = 0;
+ goto finish;
+ }
+ /* add_boot() must be called first!
+ * It may need to seek the journal to find parent boot IDs. */
+ r = add_boot(j);
+ if (r < 0)
+ goto finish;
+
+ r = add_dmesg(j);
+ if (r < 0)
+ goto finish;
+
+ r = add_units(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add filter for units: %m");
+ goto finish;
+ }
+
+ r = add_syslog_identifier(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add filter for syslog identifiers: %m");
+ goto finish;
+ }
+
+ r = add_priorities(j);
+ if (r < 0)
+ goto finish;
+
+ r = add_matches(j, argv + optind);
+ if (r < 0)
+ goto finish;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *filter;
+
+ filter = journal_make_match_string(j);
+ if (!filter)
+ return log_oom();
+
+ log_debug("Journal filter: %s", filter);
+ }
+
+ if (arg_action == ACTION_LIST_FIELDS) {
+ const void *data;
+ size_t size;
+
+ assert(arg_field);
+
+ r = sd_journal_set_data_threshold(j, 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to unset data size threshold: %m");
+ goto finish;
+ }
+
+ r = sd_journal_query_unique(j, arg_field);
+ if (r < 0) {
+ log_error_errno(r, "Failed to query unique data objects: %m");
+ goto finish;
+ }
+
+ SD_JOURNAL_FOREACH_UNIQUE(j, data, size) {
+ const void *eq;
+
+ if (arg_lines >= 0 && n_shown >= arg_lines)
+ break;
+
+ eq = memchr(data, '=', size);
+ if (eq)
+ printf("%.*s\n", (int) (size - ((const uint8_t*) eq - (const uint8_t*) data + 1)), (const char*) eq + 1);
+ else
+ printf("%.*s\n", (int) size, (const char*) data);
+
+ n_shown++;
+ }
+
+ r = 0;
+ goto finish;
+ }
+
+ /* Opening the fd now means the first sd_journal_wait() will actually wait */
+ if (arg_follow) {
+ poll_fd = sd_journal_get_fd(j);
+ if (poll_fd == -EMFILE) {
+ log_warning_errno(poll_fd, "Insufficent watch descriptors available. Reverting to -n.");
+ arg_follow = false;
+ } else if (poll_fd == -EMEDIUMTYPE) {
+ log_error_errno(poll_fd, "The --follow switch is not supported in conjunction with reading from STDIN.");
+ goto finish;
+ } else if (poll_fd < 0) {
+ log_error_errno(poll_fd, "Failed to get journal fd: %m");
+ goto finish;
+ }
+ }
+
+ if (arg_cursor || arg_after_cursor) {
+ r = sd_journal_seek_cursor(j, arg_cursor ?: arg_after_cursor);
+ if (r < 0) {
+ log_error_errno(r, "Failed to seek to cursor: %m");
+ goto finish;
+ }
+
+ if (!arg_reverse)
+ r = sd_journal_next_skip(j, 1 + !!arg_after_cursor);
+ else
+ r = sd_journal_previous_skip(j, 1 + !!arg_after_cursor);
+
+ if (arg_after_cursor && r < 2) {
+ /* We couldn't find the next entry after the cursor. */
+ if (arg_follow)
+ need_seek = true;
+ else
+ arg_lines = 0;
+ }
+
+ } else if (arg_since_set && !arg_reverse) {
+ r = sd_journal_seek_realtime_usec(j, arg_since);
+ if (r < 0) {
+ log_error_errno(r, "Failed to seek to date: %m");
+ goto finish;
+ }
+ r = sd_journal_next(j);
+
+ } else if (arg_until_set && arg_reverse) {
+ r = sd_journal_seek_realtime_usec(j, arg_until);
+ if (r < 0) {
+ log_error_errno(r, "Failed to seek to date: %m");
+ goto finish;
+ }
+ r = sd_journal_previous(j);
+
+ } else if (arg_lines >= 0) {
+ r = sd_journal_seek_tail(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to seek to tail: %m");
+ goto finish;
+ }
+
+ r = sd_journal_previous_skip(j, arg_lines);
+
+ } else if (arg_reverse) {
+ r = sd_journal_seek_tail(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to seek to tail: %m");
+ goto finish;
+ }
+
+ r = sd_journal_previous(j);
+
+ } else {
+ r = sd_journal_seek_head(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to seek to head: %m");
+ goto finish;
+ }
+
+ r = sd_journal_next(j);
+ }
+
+ if (r < 0) {
+ log_error_errno(r, "Failed to iterate through journal: %m");
+ goto finish;
+ }
+ if (r == 0)
+ need_seek = true;
+
+ if (!arg_follow)
+ (void) pager_open(arg_pager_flags);
+
+ if (!arg_quiet && (arg_lines != 0 || arg_follow)) {
+ usec_t start, end;
+ char start_buf[FORMAT_TIMESTAMP_MAX], end_buf[FORMAT_TIMESTAMP_MAX];
+
+ r = sd_journal_get_cutoff_realtime_usec(j, &start, &end);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get cutoff: %m");
+ goto finish;
+ }
+
+ if (r > 0) {
+ if (arg_follow)
+ printf("-- Logs begin at %s. --\n",
+ format_timestamp_maybe_utc(start_buf, sizeof(start_buf), start));
+ else
+ printf("-- Logs begin at %s, end at %s. --\n",
+ format_timestamp_maybe_utc(start_buf, sizeof(start_buf), start),
+ format_timestamp_maybe_utc(end_buf, sizeof(end_buf), end));
+ }
+ }
+
+ for (;;) {
+ while (arg_lines < 0 || n_shown < arg_lines || (arg_follow && !first_line)) {
+ int flags;
+ size_t highlight[2] = {};
+
+ if (need_seek) {
+ if (!arg_reverse)
+ r = sd_journal_next(j);
+ else
+ r = sd_journal_previous(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to iterate through journal: %m");
+ goto finish;
+ }
+ if (r == 0)
+ break;
+ }
+
+ if (arg_until_set && !arg_reverse) {
+ usec_t usec;
+
+ r = sd_journal_get_realtime_usec(j, &usec);
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine timestamp: %m");
+ goto finish;
+ }
+ if (usec > arg_until)
+ goto finish;
+ }
+
+ if (arg_since_set && arg_reverse) {
+ usec_t usec;
+
+ r = sd_journal_get_realtime_usec(j, &usec);
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine timestamp: %m");
+ goto finish;
+ }
+ if (usec < arg_since)
+ goto finish;
+ }
+
+ if (!arg_merge && !arg_quiet) {
+ sd_id128_t boot_id;
+
+ r = sd_journal_get_monotonic_usec(j, NULL, &boot_id);
+ if (r >= 0) {
+ if (previous_boot_id_valid &&
+ !sd_id128_equal(boot_id, previous_boot_id))
+ printf("%s-- Reboot --%s\n",
+ ansi_highlight(), ansi_normal());
+
+ previous_boot_id = boot_id;
+ previous_boot_id_valid = true;
+ }
+ }
+
+#if HAVE_PCRE2
+ if (arg_compiled_pattern) {
+ _cleanup_(pcre2_match_data_freep) pcre2_match_data *md = NULL;
+ const void *message;
+ size_t len;
+ PCRE2_SIZE *ovec;
+
+ md = pcre2_match_data_create(1, NULL);
+ if (!md)
+ return log_oom();
+
+ r = sd_journal_get_data(j, "MESSAGE", &message, &len);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ need_seek = true;
+ continue;
+ }
+
+ log_error_errno(r, "Failed to get MESSAGE field: %m");
+ goto finish;
+ }
+
+ assert_se(message = startswith(message, "MESSAGE="));
+
+ r = pcre2_match(arg_compiled_pattern,
+ message,
+ len - strlen("MESSAGE="),
+ 0, /* start at offset 0 in the subject */
+ 0, /* default options */
+ md,
+ NULL);
+ if (r == PCRE2_ERROR_NOMATCH) {
+ need_seek = true;
+ continue;
+ }
+ if (r < 0) {
+ unsigned char buf[LINE_MAX];
+ int r2;
+
+ r2 = pcre2_get_error_message(r, buf, sizeof buf);
+ log_error("Pattern matching failed: %s",
+ r2 < 0 ? "unknown error" : (char*) buf);
+ r = -EINVAL;
+ goto finish;
+ }
+
+ ovec = pcre2_get_ovector_pointer(md);
+ highlight[0] = ovec[0];
+ highlight[1] = ovec[1];
+ }
+#endif
+
+ flags =
+ arg_all * OUTPUT_SHOW_ALL |
+ arg_full * OUTPUT_FULL_WIDTH |
+ colors_enabled() * OUTPUT_COLOR |
+ arg_catalog * OUTPUT_CATALOG |
+ arg_utc * OUTPUT_UTC |
+ arg_no_hostname * OUTPUT_NO_HOSTNAME;
+
+ r = show_journal_entry(stdout, j, arg_output, 0, flags,
+ arg_output_fields, highlight, &ellipsized);
+ need_seek = true;
+ if (r == -EADDRNOTAVAIL)
+ break;
+ else if (r < 0)
+ goto finish;
+
+ n_shown++;
+
+ /* If journalctl take a long time to process messages, and during that time journal file
+ * rotation occurs, a journalctl client will keep those rotated files open until it calls
+ * sd_journal_process(), which typically happens as a result of calling sd_journal_wait() below
+ * in the "following" case. By periodically calling sd_journal_process() during the processing
+ * loop we shrink the window of time a client instance has open file descriptors for rotated
+ * (deleted) journal files. */
+ if ((n_shown % PROCESS_INOTIFY_INTERVAL) == 0) {
+ r = sd_journal_process(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process inotify events: %m");
+ goto finish;
+ }
+ }
+ }
+
+ if (!arg_follow) {
+ if (n_shown == 0 && !arg_quiet)
+ printf("-- No entries --\n");
+
+ if (arg_show_cursor) {
+ _cleanup_free_ char *cursor = NULL;
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0 && r != -EADDRNOTAVAIL)
+ log_error_errno(r, "Failed to get cursor: %m");
+ else if (r >= 0)
+ printf("-- cursor: %s\n", cursor);
+ }
+
+ break;
+ }
+
+ fflush(stdout);
+
+ r = wait_for_change(j, poll_fd);
+ if (r < 0)
+ goto finish;
+
+ first_line = false;
+ }
+
+finish:
+ fflush(stdout);
+ pager_close();
+
+ strv_free(arg_file);
+
+ strv_free(arg_syslog_identifier);
+ strv_free(arg_system_units);
+ strv_free(arg_user_units);
+ strv_free(arg_output_fields);
+
+ free(arg_root);
+ free(arg_verify_key);
+
+#if HAVE_PCRE2
+ if (arg_compiled_pattern)
+ pcre2_code_free(arg_compiled_pattern);
+#endif
+
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/journal/journald-audit.c b/src/journal/journald-audit.c
new file mode 100644
index 0000000..accbad4
--- /dev/null
+++ b/src/journal/journald-audit.c
@@ -0,0 +1,545 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "audit-type.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "io-util.h"
+#include "journald-audit.h"
+#include "missing.h"
+#include "string-util.h"
+
+typedef struct MapField {
+ const char *audit_field;
+ const char *journal_field;
+ int (*map)(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, size_t *n_iov);
+} MapField;
+
+static int map_simple_field(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, size_t *n_iov) {
+ _cleanup_free_ char *c = NULL;
+ size_t l = 0, allocated = 0;
+ const char *e;
+
+ assert(field);
+ assert(p);
+ assert(iov);
+ assert(n_iov);
+
+ l = strlen(field);
+ allocated = l + 1;
+ c = malloc(allocated);
+ if (!c)
+ return -ENOMEM;
+
+ memcpy(c, field, l);
+ for (e = *p; !IN_SET(*e, 0, ' '); e++) {
+ if (!GREEDY_REALLOC(c, allocated, l+2))
+ return -ENOMEM;
+
+ c[l++] = *e;
+ }
+
+ c[l] = 0;
+
+ if (!GREEDY_REALLOC(*iov, *n_iov_allocated, *n_iov + 1))
+ return -ENOMEM;
+
+ (*iov)[(*n_iov)++] = IOVEC_MAKE(c, l);
+
+ *p = e;
+ c = NULL;
+
+ return 1;
+}
+
+static int map_string_field_internal(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, size_t *n_iov, bool filter_printable) {
+ _cleanup_free_ char *c = NULL;
+ const char *s, *e;
+ size_t l;
+
+ assert(field);
+ assert(p);
+ assert(iov);
+ assert(n_iov);
+
+ /* The kernel formats string fields in one of two formats. */
+
+ if (**p == '"') {
+ /* Normal quoted syntax */
+ s = *p + 1;
+ e = strchr(s, '"');
+ if (!e)
+ return 0;
+
+ l = strlen(field) + (e - s);
+ c = malloc(l+1);
+ if (!c)
+ return -ENOMEM;
+
+ *((char*) mempcpy(stpcpy(c, field), s, e - s)) = 0;
+
+ e += 1;
+
+ } else if (unhexchar(**p) >= 0) {
+ /* Hexadecimal escaping */
+ size_t allocated = 0;
+
+ l = strlen(field);
+ allocated = l + 2;
+ c = malloc(allocated);
+ if (!c)
+ return -ENOMEM;
+
+ memcpy(c, field, l);
+ for (e = *p; !IN_SET(*e, 0, ' '); e += 2) {
+ int a, b;
+ uint8_t x;
+
+ a = unhexchar(e[0]);
+ if (a < 0)
+ return 0;
+
+ b = unhexchar(e[1]);
+ if (b < 0)
+ return 0;
+
+ x = ((uint8_t) a << 4 | (uint8_t) b);
+
+ if (filter_printable && x < (uint8_t) ' ')
+ x = (uint8_t) ' ';
+
+ if (!GREEDY_REALLOC(c, allocated, l+2))
+ return -ENOMEM;
+
+ c[l++] = (char) x;
+ }
+
+ c[l] = 0;
+ } else
+ return 0;
+
+ if (!GREEDY_REALLOC(*iov, *n_iov_allocated, *n_iov + 1))
+ return -ENOMEM;
+
+ (*iov)[(*n_iov)++] = IOVEC_MAKE(c, l);
+
+ *p = e;
+ c = NULL;
+
+ return 1;
+}
+
+static int map_string_field(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, size_t *n_iov) {
+ return map_string_field_internal(field, p, iov, n_iov_allocated, n_iov, false);
+}
+
+static int map_string_field_printable(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, size_t *n_iov) {
+ return map_string_field_internal(field, p, iov, n_iov_allocated, n_iov, true);
+}
+
+static int map_generic_field(const char *prefix, const char **p, struct iovec **iov, size_t *n_iov_allocated, size_t *n_iov) {
+ const char *e, *f;
+ char *c, *t;
+ int r;
+
+ /* Implements fallback mappings for all fields we don't know */
+
+ for (e = *p; e < *p + 16; e++) {
+
+ if (IN_SET(*e, 0, ' '))
+ return 0;
+
+ if (*e == '=')
+ break;
+
+ if (!((*e >= 'a' && *e <= 'z') ||
+ (*e >= 'A' && *e <= 'Z') ||
+ (*e >= '0' && *e <= '9') ||
+ IN_SET(*e, '_', '-')))
+ return 0;
+ }
+
+ if (e <= *p || e >= *p + 16)
+ return 0;
+
+ c = newa(char, strlen(prefix) + (e - *p) + 2);
+
+ t = stpcpy(c, prefix);
+ for (f = *p; f < e; f++) {
+ char x;
+
+ if (*f >= 'a' && *f <= 'z')
+ x = (*f - 'a') + 'A'; /* uppercase */
+ else if (*f == '-')
+ x = '_'; /* dashes → underscores */
+ else
+ x = *f;
+
+ *(t++) = x;
+ }
+ strcpy(t, "=");
+
+ e++;
+
+ r = map_simple_field(c, &e, iov, n_iov_allocated, n_iov);
+ if (r < 0)
+ return r;
+
+ *p = e;
+ return r;
+}
+
+/* Kernel fields are those occurring in the audit string before
+ * msg='. All of these fields are trusted, hence carry the "_" prefix.
+ * We try to translate the fields we know into our native names. The
+ * other's are generically mapped to _AUDIT_FIELD_XYZ= */
+static const MapField map_fields_kernel[] = {
+
+ /* First, we map certain well-known audit fields into native
+ * well-known fields */
+ { "pid=", "_PID=", map_simple_field },
+ { "ppid=", "_PPID=", map_simple_field },
+ { "uid=", "_UID=", map_simple_field },
+ { "euid=", "_EUID=", map_simple_field },
+ { "fsuid=", "_FSUID=", map_simple_field },
+ { "gid=", "_GID=", map_simple_field },
+ { "egid=", "_EGID=", map_simple_field },
+ { "fsgid=", "_FSGID=", map_simple_field },
+ { "tty=", "_TTY=", map_simple_field },
+ { "ses=", "_AUDIT_SESSION=", map_simple_field },
+ { "auid=", "_AUDIT_LOGINUID=", map_simple_field },
+ { "subj=", "_SELINUX_CONTEXT=", map_simple_field },
+ { "comm=", "_COMM=", map_string_field },
+ { "exe=", "_EXE=", map_string_field },
+ { "proctitle=", "_CMDLINE=", map_string_field_printable },
+
+ /* Some fields don't map to native well-known fields. However,
+ * we know that they are string fields, hence let's undo
+ * string field escaping for them, though we stick to the
+ * generic field names. */
+ { "path=", "_AUDIT_FIELD_PATH=", map_string_field },
+ { "dev=", "_AUDIT_FIELD_DEV=", map_string_field },
+ { "name=", "_AUDIT_FIELD_NAME=", map_string_field },
+ {}
+};
+
+/* Userspace fields are those occurring in the audit string after
+ * msg='. All of these fields are untrusted, hence carry no "_"
+ * prefix. We map the fields we don't know to AUDIT_FIELD_XYZ= */
+static const MapField map_fields_userspace[] = {
+ { "cwd=", "AUDIT_FIELD_CWD=", map_string_field },
+ { "cmd=", "AUDIT_FIELD_CMD=", map_string_field },
+ { "acct=", "AUDIT_FIELD_ACCT=", map_string_field },
+ { "exe=", "AUDIT_FIELD_EXE=", map_string_field },
+ { "comm=", "AUDIT_FIELD_COMM=", map_string_field },
+ {}
+};
+
+static int map_all_fields(
+ const char *p,
+ const MapField map_fields[],
+ const char *prefix,
+ bool handle_msg,
+ struct iovec **iov,
+ size_t *n_iov_allocated,
+ size_t *n_iov) {
+
+ int r;
+
+ assert(p);
+ assert(iov);
+ assert(n_iov_allocated);
+ assert(n_iov);
+
+ for (;;) {
+ bool mapped = false;
+ const MapField *m;
+ const char *v;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p == 0)
+ return 0;
+
+ if (handle_msg) {
+ v = startswith(p, "msg='");
+ if (v) {
+ const char *e;
+ char *c;
+
+ /* Userspace message. It's enclosed in
+ simple quotation marks, is not
+ escaped, but the last field in the
+ line, hence let's remove the
+ quotation mark, and apply the
+ userspace mapping instead of the
+ kernel mapping. */
+
+ e = endswith(v, "'");
+ if (!e)
+ return 0; /* don't continue splitting up if the final quotation mark is missing */
+
+ c = strndupa(v, e - v);
+ return map_all_fields(c, map_fields_userspace, "AUDIT_FIELD_", false, iov, n_iov_allocated, n_iov);
+ }
+ }
+
+ /* Try to map the kernel fields to our own names */
+ for (m = map_fields; m->audit_field; m++) {
+ v = startswith(p, m->audit_field);
+ if (!v)
+ continue;
+
+ r = m->map(m->journal_field, &v, iov, n_iov_allocated, n_iov);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse audit array: %m");
+
+ if (r > 0) {
+ mapped = true;
+ p = v;
+ break;
+ }
+ }
+
+ if (!mapped) {
+ r = map_generic_field(prefix, &p, iov, n_iov_allocated, n_iov);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse audit array: %m");
+
+ if (r == 0)
+ /* Couldn't process as generic field, let's just skip over it */
+ p += strcspn(p, WHITESPACE);
+ }
+ }
+}
+
+void process_audit_string(Server *s, int type, const char *data, size_t size) {
+ size_t n_iov_allocated = 0, n_iov = 0, z;
+ _cleanup_free_ struct iovec *iov = NULL;
+ uint64_t seconds, msec, id;
+ const char *p, *type_name;
+ char id_field[sizeof("_AUDIT_ID=") + DECIMAL_STR_MAX(uint64_t)],
+ type_field[sizeof("_AUDIT_TYPE=") + DECIMAL_STR_MAX(int)],
+ source_time_field[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
+ char *m, *type_field_name;
+ int k;
+
+ assert(s);
+
+ if (size <= 0)
+ return;
+
+ if (!data)
+ return;
+
+ /* Note that the input buffer is NUL terminated, but let's
+ * check whether there is a spurious NUL byte */
+ if (memchr(data, 0, size))
+ return;
+
+ p = startswith(data, "audit");
+ if (!p)
+ return;
+
+ k = 0;
+ if (sscanf(p, "(%" PRIu64 ".%" PRIu64 ":%" PRIu64 "):%n",
+ &seconds,
+ &msec,
+ &id,
+ &k) != 3 || k == 0)
+ return;
+
+ p += k;
+ p += strspn(p, WHITESPACE);
+
+ if (isempty(p))
+ return;
+
+ n_iov_allocated = N_IOVEC_META_FIELDS + 8;
+ iov = new(struct iovec, n_iov_allocated);
+ if (!iov) {
+ log_oom();
+ return;
+ }
+
+ iov[n_iov++] = IOVEC_MAKE_STRING("_TRANSPORT=audit");
+
+ sprintf(source_time_field, "_SOURCE_REALTIME_TIMESTAMP=%" PRIu64,
+ (usec_t) seconds * USEC_PER_SEC + (usec_t) msec * USEC_PER_MSEC);
+ iov[n_iov++] = IOVEC_MAKE_STRING(source_time_field);
+
+ sprintf(type_field, "_AUDIT_TYPE=%i", type);
+ iov[n_iov++] = IOVEC_MAKE_STRING(type_field);
+
+ sprintf(id_field, "_AUDIT_ID=%" PRIu64, id);
+ iov[n_iov++] = IOVEC_MAKE_STRING(id_field);
+
+ assert_cc(4 == LOG_FAC(LOG_AUTH));
+ iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=4");
+ iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=audit");
+
+ type_name = audit_type_name_alloca(type);
+
+ type_field_name = strjoina("_AUDIT_TYPE_NAME=", type_name);
+ iov[n_iov++] = IOVEC_MAKE_STRING(type_field_name);
+
+ m = strjoina("MESSAGE=", type_name, " ", p);
+ iov[n_iov++] = IOVEC_MAKE_STRING(m);
+
+ z = n_iov;
+
+ map_all_fields(p, map_fields_kernel, "_AUDIT_FIELD_", true, &iov, &n_iov_allocated, &n_iov);
+
+ if (!GREEDY_REALLOC(iov, n_iov_allocated, n_iov + N_IOVEC_META_FIELDS)) {
+ log_oom();
+ goto finish;
+ }
+
+ server_dispatch_message(s, iov, n_iov, n_iov_allocated, NULL, NULL, LOG_NOTICE, 0);
+
+finish:
+ /* free() all entries that map_all_fields() added. All others
+ * are allocated on the stack or are constant. */
+
+ for (; z < n_iov; z++)
+ free(iov[z].iov_base);
+}
+
+void server_process_audit_message(
+ Server *s,
+ const void *buffer,
+ size_t buffer_size,
+ const struct ucred *ucred,
+ const union sockaddr_union *sa,
+ socklen_t salen) {
+
+ const struct nlmsghdr *nl = buffer;
+
+ assert(s);
+
+ if (buffer_size < ALIGN(sizeof(struct nlmsghdr)))
+ return;
+
+ assert(buffer);
+
+ /* Filter out fake data */
+ if (!sa ||
+ salen != sizeof(struct sockaddr_nl) ||
+ sa->nl.nl_family != AF_NETLINK ||
+ sa->nl.nl_pid != 0) {
+ log_debug("Audit netlink message from invalid sender.");
+ return;
+ }
+
+ if (!ucred || ucred->pid != 0) {
+ log_debug("Audit netlink message with invalid credentials.");
+ return;
+ }
+
+ if (!NLMSG_OK(nl, buffer_size)) {
+ log_error("Audit netlink message truncated.");
+ return;
+ }
+
+ /* Ignore special Netlink messages */
+ if (IN_SET(nl->nlmsg_type, NLMSG_NOOP, NLMSG_ERROR))
+ return;
+
+ /* Except AUDIT_USER, all messsages below AUDIT_FIRST_USER_MSG are control messages, let's ignore those */
+ if (nl->nlmsg_type < AUDIT_FIRST_USER_MSG && nl->nlmsg_type != AUDIT_USER)
+ return;
+
+ process_audit_string(s, nl->nlmsg_type, NLMSG_DATA(nl), nl->nlmsg_len - ALIGN(sizeof(struct nlmsghdr)));
+}
+
+static int enable_audit(int fd, bool b) {
+ struct {
+ union {
+ struct nlmsghdr header;
+ uint8_t header_space[NLMSG_HDRLEN];
+ };
+ struct audit_status body;
+ } _packed_ request = {
+ .header.nlmsg_len = NLMSG_LENGTH(sizeof(struct audit_status)),
+ .header.nlmsg_type = AUDIT_SET,
+ .header.nlmsg_flags = NLM_F_REQUEST,
+ .header.nlmsg_seq = 1,
+ .header.nlmsg_pid = 0,
+ .body.mask = AUDIT_STATUS_ENABLED,
+ .body.enabled = b,
+ };
+ union sockaddr_union sa = {
+ .nl.nl_family = AF_NETLINK,
+ .nl.nl_pid = 0,
+ };
+ struct iovec iovec = {
+ .iov_base = &request,
+ .iov_len = NLMSG_LENGTH(sizeof(struct audit_status)),
+ };
+ struct msghdr mh = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_name = &sa.sa,
+ .msg_namelen = sizeof(sa.nl),
+ };
+
+ ssize_t n;
+
+ n = sendmsg(fd, &mh, MSG_NOSIGNAL);
+ if (n < 0)
+ return -errno;
+ if (n != NLMSG_LENGTH(sizeof(struct audit_status)))
+ return -EIO;
+
+ /* We don't wait for the result here, we can't do anything
+ * about it anyway */
+
+ return 0;
+}
+
+int server_open_audit(Server *s) {
+ int r;
+
+ if (s->audit_fd < 0) {
+ static const union sockaddr_union sa = {
+ .nl.nl_family = AF_NETLINK,
+ .nl.nl_pid = 0,
+ .nl.nl_groups = AUDIT_NLGRP_READLOG,
+ };
+
+ s->audit_fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_AUDIT);
+ if (s->audit_fd < 0) {
+ if (IN_SET(errno, EAFNOSUPPORT, EPROTONOSUPPORT))
+ log_debug("Audit not supported in the kernel.");
+ else
+ log_warning_errno(errno, "Failed to create audit socket, ignoring: %m");
+
+ return 0;
+ }
+
+ if (bind(s->audit_fd, &sa.sa, sizeof(sa.nl)) < 0) {
+ log_warning_errno(errno,
+ "Failed to join audit multicast group. "
+ "The kernel is probably too old or multicast reading is not supported. "
+ "Ignoring: %m");
+ s->audit_fd = safe_close(s->audit_fd);
+ return 0;
+ }
+ } else
+ (void) fd_nonblock(s->audit_fd, true);
+
+ r = setsockopt_int(s->audit_fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set SO_PASSCRED on audit socket: %m");
+
+ r = sd_event_add_io(s->event, &s->audit_event_source, s->audit_fd, EPOLLIN, server_process_datagram, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add audit fd to event loop: %m");
+
+ /* We are listening now, try to enable audit */
+ r = enable_audit(s->audit_fd, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to issue audit enable call: %m");
+
+ return 0;
+}
diff --git a/src/journal/journald-audit.h b/src/journal/journald-audit.h
new file mode 100644
index 0000000..df41f81
--- /dev/null
+++ b/src/journal/journald-audit.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "journald-server.h"
+#include "socket-util.h"
+
+void server_process_audit_message(Server *s, const void *buffer, size_t buffer_size, const struct ucred *ucred, const union sockaddr_union *sa, socklen_t salen);
+
+void process_audit_string(Server *s, int type, const char *data, size_t size);
+
+int server_open_audit(Server *s);
diff --git a/src/journal/journald-console.c b/src/journal/journald-console.c
new file mode 100644
index 0000000..80e2958
--- /dev/null
+++ b/src/journal/journald-console.c
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <time.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "journald-console.h"
+#include "journald-server.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "terminal-util.h"
+
+static bool prefix_timestamp(void) {
+
+ static int cached_printk_time = -1;
+
+ if (_unlikely_(cached_printk_time < 0)) {
+ _cleanup_free_ char *p = NULL;
+
+ cached_printk_time =
+ read_one_line_file("/sys/module/printk/parameters/time", &p) >= 0
+ && parse_boolean(p) > 0;
+ }
+
+ return cached_printk_time;
+}
+
+void server_forward_console(
+ Server *s,
+ int priority,
+ const char *identifier,
+ const char *message,
+ const struct ucred *ucred) {
+
+ struct iovec iovec[5];
+ struct timespec ts;
+ char tbuf[STRLEN("[] ") + DECIMAL_STR_MAX(ts.tv_sec) + DECIMAL_STR_MAX(ts.tv_nsec)-3 + 1];
+ char header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t)];
+ _cleanup_free_ char *ident_buf = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *tty;
+ int n = 0;
+
+ assert(s);
+ assert(message);
+
+ if (LOG_PRI(priority) > s->max_level_console)
+ return;
+
+ /* First: timestamp */
+ if (prefix_timestamp()) {
+ assert_se(clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
+ xsprintf(tbuf, "[%5"PRI_TIME".%06"PRI_NSEC"] ",
+ ts.tv_sec,
+ (nsec_t)ts.tv_nsec / 1000);
+
+ iovec[n++] = IOVEC_MAKE_STRING(tbuf);
+ }
+
+ /* Second: identifier and PID */
+ if (ucred) {
+ if (!identifier) {
+ get_process_comm(ucred->pid, &ident_buf);
+ identifier = ident_buf;
+ }
+
+ xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
+
+ if (identifier)
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+
+ iovec[n++] = IOVEC_MAKE_STRING(header_pid);
+ } else if (identifier) {
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(": ");
+ }
+
+ /* Fourth: message */
+ iovec[n++] = IOVEC_MAKE_STRING(message);
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
+
+ tty = s->tty_path ?: "/dev/console";
+
+ /* Before you ask: yes, on purpose we open/close the console for each log line we write individually. This is a
+ * good strategy to avoid journald getting killed by the kernel's SAK concept (it doesn't fix this entirely,
+ * but minimizes the time window the kernel might end up killing journald due to SAK). It also makes things
+ * easier for us so that we don't have to recover from hangups and suchlike triggered on the console. */
+
+ fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0) {
+ log_debug_errno(fd, "Failed to open %s for logging: %m", tty);
+ return;
+ }
+
+ if (writev(fd, iovec, n) < 0)
+ log_debug_errno(errno, "Failed to write to %s for logging: %m", tty);
+}
diff --git a/src/journal/journald-console.h b/src/journal/journald-console.h
new file mode 100644
index 0000000..3def00a
--- /dev/null
+++ b/src/journal/journald-console.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "journald-server.h"
+
+void server_forward_console(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred);
diff --git a/src/journal/journald-context.c b/src/journal/journald-context.c
new file mode 100644
index 0000000..7c51f2f
--- /dev/null
+++ b/src/journal/journald-context.c
@@ -0,0 +1,782 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "cgroup-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "journal-util.h"
+#include "journald-context.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "unaligned.h"
+#include "user-util.h"
+
+/* This implements a metadata cache for clients, which are identified by their PID. Requesting metadata through /proc
+ * is expensive, hence let's cache the data if we can. Note that this means the metadata might be out-of-date when we
+ * store it, but it might already be anyway, as we request the data asynchronously from /proc at a different time the
+ * log entry was originally created. We hence just increase the "window of inaccuracy" a bit.
+ *
+ * The cache is indexed by the PID. Entries may be "pinned" in the cache, in which case the entries are not removed
+ * until they are unpinned. Unpinned entries are kept around until cache pressure is seen. Cache entries older than 5s
+ * are never used (a sad attempt to deal with the UNIX weakness of PIDs reuse), cache entries older than 1s are
+ * refreshed in an incremental way (meaning: data is reread from /proc, but any old data we can't refresh is not
+ * flushed out). Data newer than 1s is used immediately without refresh.
+ *
+ * Log stream clients (i.e. all clients using the AF_UNIX/SOCK_STREAM stdout/stderr transport) will pin a cache entry
+ * as long as their socket is connected. Note that cache entries are shared between different transports. That means a
+ * cache entry pinned for the stream connection logic may be reused for the syslog or native protocols.
+ *
+ * Caching metadata like this has two major benefits:
+ *
+ * 1. Reading metadata is expensive, and we can thus substantially speed up log processing under flood.
+ *
+ * 2. Because metadata caching is shared between stream and datagram transports and stream connections pin a cache
+ * entry there's a good chance we can properly map a substantial set of datagram log messages to their originating
+ * service, as all services (unless explicitly configured otherwise) will have their stdout/stderr connected to a
+ * stream connection. This should improve cases where a service process logs immediately before exiting and we
+ * previously had trouble associating the log message with the service.
+ *
+ * NB: With and without the metadata cache: the implicitly added entry metadata in the journal (with the exception of
+ * UID/PID/GID and SELinux label) must be understood as possibly slightly out of sync (i.e. sometimes slighly older
+ * and sometimes slightly newer than what was current at the log event).
+ */
+
+/* We refresh every 1s */
+#define REFRESH_USEC (1*USEC_PER_SEC)
+
+/* Data older than 5s we flush out */
+#define MAX_USEC (5*USEC_PER_SEC)
+
+/* Keep at most 16K entries in the cache. (Note though that this limit may be violated if enough streams pin entries in
+ * the cache, in which case we *do* permit this limit to be breached. That's safe however, as the number of stream
+ * clients itself is limited.) */
+#define CACHE_MAX_FALLBACK 128U
+#define CACHE_MAX_MAX (16*1024U)
+#define CACHE_MAX_MIN 64U
+
+static size_t cache_max(void) {
+ static size_t cached = -1;
+
+ if (cached == (size_t) -1) {
+ uint64_t mem_total;
+ int r;
+
+ r = procfs_memory_get(&mem_total, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "Cannot query /proc/meminfo for MemTotal: %m");
+ cached = CACHE_MAX_FALLBACK;
+ } else {
+ /* Cache entries are usually a few kB, but the process cmdline is controlled by the
+ * user and can be up to _SC_ARG_MAX, usually 2MB. Let's say that approximately up to
+ * 1/8th of memory may be used by the cache.
+ *
+ * In the common case, this formula gives 64 cache entries for each GB of RAM.
+ */
+ long l = sysconf(_SC_ARG_MAX);
+ assert(l > 0);
+
+ cached = CLAMP(mem_total / 8 / (uint64_t) l, CACHE_MAX_MIN, CACHE_MAX_MAX);
+ }
+ }
+
+ return cached;
+}
+
+static int client_context_compare(const void *a, const void *b) {
+ const ClientContext *x = a, *y = b;
+ int r;
+
+ r = CMP(x->timestamp, y->timestamp);
+ if (r != 0)
+ return r;
+
+ return CMP(x->pid, y->pid);
+}
+
+static int client_context_new(Server *s, pid_t pid, ClientContext **ret) {
+ ClientContext *c;
+ int r;
+
+ assert(s);
+ assert(pid_is_valid(pid));
+ assert(ret);
+
+ r = hashmap_ensure_allocated(&s->client_contexts, NULL);
+ if (r < 0)
+ return r;
+
+ r = prioq_ensure_allocated(&s->client_contexts_lru, client_context_compare);
+ if (r < 0)
+ return r;
+
+ c = new0(ClientContext, 1);
+ if (!c)
+ return -ENOMEM;
+
+ c->pid = pid;
+
+ c->uid = UID_INVALID;
+ c->gid = GID_INVALID;
+ c->auditid = AUDIT_SESSION_INVALID;
+ c->loginuid = UID_INVALID;
+ c->owner_uid = UID_INVALID;
+ c->lru_index = PRIOQ_IDX_NULL;
+ c->timestamp = USEC_INFINITY;
+ c->extra_fields_mtime = NSEC_INFINITY;
+ c->log_level_max = -1;
+ c->log_rate_limit_interval = s->rate_limit_interval;
+ c->log_rate_limit_burst = s->rate_limit_burst;
+
+ r = hashmap_put(s->client_contexts, PID_TO_PTR(pid), c);
+ if (r < 0) {
+ free(c);
+ return r;
+ }
+
+ *ret = c;
+ return 0;
+}
+
+static void client_context_reset(Server *s, ClientContext *c) {
+ assert(s);
+ assert(c);
+
+ c->timestamp = USEC_INFINITY;
+
+ c->uid = UID_INVALID;
+ c->gid = GID_INVALID;
+
+ c->comm = mfree(c->comm);
+ c->exe = mfree(c->exe);
+ c->cmdline = mfree(c->cmdline);
+ c->capeff = mfree(c->capeff);
+
+ c->auditid = AUDIT_SESSION_INVALID;
+ c->loginuid = UID_INVALID;
+
+ c->cgroup = mfree(c->cgroup);
+ c->session = mfree(c->session);
+ c->owner_uid = UID_INVALID;
+ c->unit = mfree(c->unit);
+ c->user_unit = mfree(c->user_unit);
+ c->slice = mfree(c->slice);
+ c->user_slice = mfree(c->user_slice);
+
+ c->invocation_id = SD_ID128_NULL;
+
+ c->label = mfree(c->label);
+ c->label_size = 0;
+
+ c->extra_fields_iovec = mfree(c->extra_fields_iovec);
+ c->extra_fields_n_iovec = 0;
+ c->extra_fields_data = mfree(c->extra_fields_data);
+ c->extra_fields_mtime = NSEC_INFINITY;
+
+ c->log_level_max = -1;
+
+ c->log_rate_limit_interval = s->rate_limit_interval;
+ c->log_rate_limit_burst = s->rate_limit_burst;
+}
+
+static ClientContext* client_context_free(Server *s, ClientContext *c) {
+ assert(s);
+
+ if (!c)
+ return NULL;
+
+ assert_se(hashmap_remove(s->client_contexts, PID_TO_PTR(c->pid)) == c);
+
+ if (c->in_lru)
+ assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0);
+
+ client_context_reset(s, c);
+
+ return mfree(c);
+}
+
+static void client_context_read_uid_gid(ClientContext *c, const struct ucred *ucred) {
+ assert(c);
+ assert(pid_is_valid(c->pid));
+
+ /* The ucred data passed in is always the most current and accurate, if we have any. Use it. */
+ if (ucred && uid_is_valid(ucred->uid))
+ c->uid = ucred->uid;
+ else
+ (void) get_process_uid(c->pid, &c->uid);
+
+ if (ucred && gid_is_valid(ucred->gid))
+ c->gid = ucred->gid;
+ else
+ (void) get_process_gid(c->pid, &c->gid);
+}
+
+static void client_context_read_basic(ClientContext *c) {
+ char *t;
+
+ assert(c);
+ assert(pid_is_valid(c->pid));
+
+ if (get_process_comm(c->pid, &t) >= 0)
+ free_and_replace(c->comm, t);
+
+ if (get_process_exe(c->pid, &t) >= 0)
+ free_and_replace(c->exe, t);
+
+ if (get_process_cmdline(c->pid, 0, false, &t) >= 0)
+ free_and_replace(c->cmdline, t);
+
+ if (get_process_capeff(c->pid, &t) >= 0)
+ free_and_replace(c->capeff, t);
+}
+
+static int client_context_read_label(
+ ClientContext *c,
+ const char *label, size_t label_size) {
+
+ assert(c);
+ assert(pid_is_valid(c->pid));
+ assert(label_size == 0 || label);
+
+ if (label_size > 0) {
+ char *l;
+
+ /* If we got an SELinux label passed in it counts. */
+
+ l = newdup_suffix0(char, label, label_size);
+ if (!l)
+ return -ENOMEM;
+
+ free_and_replace(c->label, l);
+ c->label_size = label_size;
+ }
+#if HAVE_SELINUX
+ else {
+ char *con;
+
+ /* If we got no SELinux label passed in, let's try to acquire one */
+
+ if (getpidcon(c->pid, &con) >= 0) {
+ free_and_replace(c->label, con);
+ c->label_size = strlen(c->label);
+ }
+ }
+#endif
+
+ return 0;
+}
+
+static int client_context_read_cgroup(Server *s, ClientContext *c, const char *unit_id) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(c);
+
+ /* Try to acquire the current cgroup path */
+ r = cg_pid_get_path_shifted(c->pid, s->cgroup_root, &t);
+ if (r < 0 || empty_or_root(t)) {
+ /* We use the unit ID passed in as fallback if we have nothing cached yet and cg_pid_get_path_shifted()
+ * failed or process is running in a root cgroup. Zombie processes are automatically migrated to root cgroup
+ * on cgroup v1 and we want to be able to map log messages from them too. */
+ if (unit_id && !c->unit) {
+ c->unit = strdup(unit_id);
+ if (c->unit)
+ return 0;
+ }
+
+ return r;
+ }
+
+ /* Let's shortcut this if the cgroup path didn't change */
+ if (streq_ptr(c->cgroup, t))
+ return 0;
+
+ free_and_replace(c->cgroup, t);
+
+ (void) cg_path_get_session(c->cgroup, &t);
+ free_and_replace(c->session, t);
+
+ if (cg_path_get_owner_uid(c->cgroup, &c->owner_uid) < 0)
+ c->owner_uid = UID_INVALID;
+
+ (void) cg_path_get_unit(c->cgroup, &t);
+ free_and_replace(c->unit, t);
+
+ (void) cg_path_get_user_unit(c->cgroup, &t);
+ free_and_replace(c->user_unit, t);
+
+ (void) cg_path_get_slice(c->cgroup, &t);
+ free_and_replace(c->slice, t);
+
+ (void) cg_path_get_user_slice(c->cgroup, &t);
+ free_and_replace(c->user_slice, t);
+
+ return 0;
+}
+
+static int client_context_read_invocation_id(
+ Server *s,
+ ClientContext *c) {
+
+ _cleanup_free_ char *value = NULL;
+ const char *p;
+ int r;
+
+ assert(s);
+ assert(c);
+
+ /* Read the invocation ID of a unit off a unit. PID 1 stores it in a per-unit symlink in /run/systemd/units/ */
+
+ if (!c->unit)
+ return 0;
+
+ p = strjoina("/run/systemd/units/invocation:", c->unit);
+ r = readlink_malloc(p, &value);
+ if (r < 0)
+ return r;
+
+ return sd_id128_from_string(value, &c->invocation_id);
+}
+
+static int client_context_read_log_level_max(
+ Server *s,
+ ClientContext *c) {
+
+ _cleanup_free_ char *value = NULL;
+ const char *p;
+ int r, ll;
+
+ if (!c->unit)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-level-max:", c->unit);
+ r = readlink_malloc(p, &value);
+ if (r < 0)
+ return r;
+
+ ll = log_level_from_string(value);
+ if (ll < 0)
+ return -EINVAL;
+
+ c->log_level_max = ll;
+ return 0;
+}
+
+static int client_context_read_extra_fields(
+ Server *s,
+ ClientContext *c) {
+
+ size_t size = 0, n_iovec = 0, n_allocated = 0, left;
+ _cleanup_free_ struct iovec *iovec = NULL;
+ _cleanup_free_ void *data = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ struct stat st;
+ const char *p;
+ uint8_t *q;
+ int r;
+
+ if (!c->unit)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-extra-fields:", c->unit);
+
+ if (c->extra_fields_mtime != NSEC_INFINITY) {
+ if (stat(p, &st) < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (timespec_load_nsec(&st.st_mtim) == c->extra_fields_mtime)
+ return 0;
+ }
+
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (fstat(fileno(f), &st) < 0) /* The file might have been replaced since the stat() above, let's get a new
+ * one, that matches the stuff we are reading */
+ return -errno;
+
+ r = read_full_stream(f, (char**) &data, &size);
+ if (r < 0)
+ return r;
+
+ q = data, left = size;
+ while (left > 0) {
+ uint8_t *field, *eq;
+ uint64_t v, n;
+
+ if (left < sizeof(uint64_t))
+ return -EBADMSG;
+
+ v = unaligned_read_le64(q);
+ if (v < 2)
+ return -EBADMSG;
+
+ n = sizeof(uint64_t) + v;
+ if (left < n)
+ return -EBADMSG;
+
+ field = q + sizeof(uint64_t);
+
+ eq = memchr(field, '=', v);
+ if (!eq)
+ return -EBADMSG;
+
+ if (!journal_field_valid((const char *) field, eq - field, false))
+ return -EBADMSG;
+
+ if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec+1))
+ return -ENOMEM;
+
+ iovec[n_iovec++] = IOVEC_MAKE(field, v);
+
+ left -= n, q += n;
+ }
+
+ free(c->extra_fields_iovec);
+ free(c->extra_fields_data);
+
+ c->extra_fields_iovec = TAKE_PTR(iovec);
+ c->extra_fields_n_iovec = n_iovec;
+ c->extra_fields_data = TAKE_PTR(data);
+ c->extra_fields_mtime = timespec_load_nsec(&st.st_mtim);
+
+ return 0;
+}
+
+static int client_context_read_log_rate_limit_interval(ClientContext *c) {
+ _cleanup_free_ char *value = NULL;
+ const char *p;
+ int r;
+
+ assert(c);
+
+ if (!c->unit)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-rate-limit-interval:", c->unit);
+ r = readlink_malloc(p, &value);
+ if (r < 0)
+ return r;
+
+ return safe_atou64(value, &c->log_rate_limit_interval);
+}
+
+static int client_context_read_log_rate_limit_burst(ClientContext *c) {
+ _cleanup_free_ char *value = NULL;
+ const char *p;
+ int r;
+
+ assert(c);
+
+ if (!c->unit)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-rate-limit-burst:", c->unit);
+ r = readlink_malloc(p, &value);
+ if (r < 0)
+ return r;
+
+ return safe_atou(value, &c->log_rate_limit_burst);
+}
+
+static void client_context_really_refresh(
+ Server *s,
+ ClientContext *c,
+ const struct ucred *ucred,
+ const char *label, size_t label_size,
+ const char *unit_id,
+ usec_t timestamp) {
+
+ assert(s);
+ assert(c);
+ assert(pid_is_valid(c->pid));
+
+ if (timestamp == USEC_INFINITY)
+ timestamp = now(CLOCK_MONOTONIC);
+
+ client_context_read_uid_gid(c, ucred);
+ client_context_read_basic(c);
+ (void) client_context_read_label(c, label, label_size);
+
+ (void) audit_session_from_pid(c->pid, &c->auditid);
+ (void) audit_loginuid_from_pid(c->pid, &c->loginuid);
+
+ (void) client_context_read_cgroup(s, c, unit_id);
+ (void) client_context_read_invocation_id(s, c);
+ (void) client_context_read_log_level_max(s, c);
+ (void) client_context_read_extra_fields(s, c);
+ (void) client_context_read_log_rate_limit_interval(c);
+ (void) client_context_read_log_rate_limit_burst(c);
+
+ c->timestamp = timestamp;
+
+ if (c->in_lru) {
+ assert(c->n_ref == 0);
+ assert_se(prioq_reshuffle(s->client_contexts_lru, c, &c->lru_index) >= 0);
+ }
+}
+
+void client_context_maybe_refresh(
+ Server *s,
+ ClientContext *c,
+ const struct ucred *ucred,
+ const char *label, size_t label_size,
+ const char *unit_id,
+ usec_t timestamp) {
+
+ assert(s);
+ assert(c);
+
+ if (timestamp == USEC_INFINITY)
+ timestamp = now(CLOCK_MONOTONIC);
+
+ /* No cached data so far? Let's fill it up */
+ if (c->timestamp == USEC_INFINITY)
+ goto refresh;
+
+ /* If the data isn't pinned and if the cashed data is older than the upper limit, we flush it out
+ * entirely. This follows the logic that as long as an entry is pinned the PID reuse is unlikely. */
+ if (c->n_ref == 0 && c->timestamp + MAX_USEC < timestamp) {
+ client_context_reset(s, c);
+ goto refresh;
+ }
+
+ /* If the data is older than the lower limit, we refresh, but keep the old data for all we can't update */
+ if (c->timestamp + REFRESH_USEC < timestamp)
+ goto refresh;
+
+ /* If the data passed along doesn't match the cached data we also do a refresh */
+ if (ucred && uid_is_valid(ucred->uid) && c->uid != ucred->uid)
+ goto refresh;
+
+ if (ucred && gid_is_valid(ucred->gid) && c->gid != ucred->gid)
+ goto refresh;
+
+ if (label_size > 0 && (label_size != c->label_size || memcmp(label, c->label, label_size) != 0))
+ goto refresh;
+
+ return;
+
+refresh:
+ client_context_really_refresh(s, c, ucred, label, label_size, unit_id, timestamp);
+}
+
+static void client_context_try_shrink_to(Server *s, size_t limit) {
+ ClientContext *c;
+ usec_t t;
+
+ assert(s);
+
+ /* Flush any cache entries for PIDs that have already moved on. Don't do this
+ * too often, since it's a slow process. */
+ t = now(CLOCK_MONOTONIC);
+ if (s->last_cache_pid_flush + MAX_USEC < t) {
+ unsigned n = prioq_size(s->client_contexts_lru), idx = 0;
+
+ /* We do a number of iterations based on the initial size of the prioq. When we remove an
+ * item, a new item is moved into its places, and items to the right might be reshuffled.
+ */
+ for (unsigned i = 0; i < n; i++) {
+ c = prioq_peek_by_index(s->client_contexts_lru, idx);
+
+ assert(c->n_ref == 0);
+
+ if (!pid_is_unwaited(c->pid))
+ client_context_free(s, c);
+ else
+ idx ++;
+ }
+
+ s->last_cache_pid_flush = t;
+ }
+
+ /* Bring the number of cache entries below the indicated limit, so that we can create a new entry without
+ * breaching the limit. Note that we only flush out entries that aren't pinned here. This means the number of
+ * cache entries may very well grow beyond the limit, if all entries stored remain pinned. */
+
+ while (hashmap_size(s->client_contexts) > limit) {
+ c = prioq_pop(s->client_contexts_lru);
+ if (!c)
+ break; /* All remaining entries are pinned, give up */
+
+ assert(c->in_lru);
+ assert(c->n_ref == 0);
+
+ c->in_lru = false;
+
+ client_context_free(s, c);
+ }
+}
+
+void client_context_flush_all(Server *s) {
+ assert(s);
+
+ /* Flush out all remaining entries. This assumes all references are already dropped. */
+
+ s->my_context = client_context_release(s, s->my_context);
+ s->pid1_context = client_context_release(s, s->pid1_context);
+
+ client_context_try_shrink_to(s, 0);
+
+ assert(prioq_size(s->client_contexts_lru) == 0);
+ assert(hashmap_size(s->client_contexts) == 0);
+
+ s->client_contexts_lru = prioq_free(s->client_contexts_lru);
+ s->client_contexts = hashmap_free(s->client_contexts);
+}
+
+static int client_context_get_internal(
+ Server *s,
+ pid_t pid,
+ const struct ucred *ucred,
+ const char *label, size_t label_len,
+ const char *unit_id,
+ bool add_ref,
+ ClientContext **ret) {
+
+ ClientContext *c;
+ int r;
+
+ assert(s);
+ assert(ret);
+
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ c = hashmap_get(s->client_contexts, PID_TO_PTR(pid));
+ if (c) {
+
+ if (add_ref) {
+ if (c->in_lru) {
+ /* The entry wasn't pinned so far, let's remove it from the LRU list then */
+ assert(c->n_ref == 0);
+ assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0);
+ c->in_lru = false;
+ }
+
+ c->n_ref++;
+ }
+
+ client_context_maybe_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY);
+
+ *ret = c;
+ return 0;
+ }
+
+ client_context_try_shrink_to(s, cache_max()-1);
+
+ r = client_context_new(s, pid, &c);
+ if (r < 0)
+ return r;
+
+ if (add_ref)
+ c->n_ref++;
+ else {
+ r = prioq_put(s->client_contexts_lru, c, &c->lru_index);
+ if (r < 0) {
+ client_context_free(s, c);
+ return r;
+ }
+
+ c->in_lru = true;
+ }
+
+ client_context_really_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY);
+
+ *ret = c;
+ return 0;
+}
+
+int client_context_get(
+ Server *s,
+ pid_t pid,
+ const struct ucred *ucred,
+ const char *label, size_t label_len,
+ const char *unit_id,
+ ClientContext **ret) {
+
+ return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, false, ret);
+}
+
+int client_context_acquire(
+ Server *s,
+ pid_t pid,
+ const struct ucred *ucred,
+ const char *label, size_t label_len,
+ const char *unit_id,
+ ClientContext **ret) {
+
+ return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, true, ret);
+};
+
+ClientContext *client_context_release(Server *s, ClientContext *c) {
+ assert(s);
+
+ if (!c)
+ return NULL;
+
+ assert(c->n_ref > 0);
+ assert(!c->in_lru);
+
+ c->n_ref--;
+ if (c->n_ref > 0)
+ return NULL;
+
+ /* The entry is not pinned anymore, let's add it to the LRU prioq if we can. If we can't we'll drop it
+ * right-away */
+
+ if (prioq_put(s->client_contexts_lru, c, &c->lru_index) < 0)
+ client_context_free(s, c);
+ else
+ c->in_lru = true;
+
+ return NULL;
+}
+
+void client_context_acquire_default(Server *s) {
+ int r;
+
+ assert(s);
+
+ /* Ensure that our own and PID1's contexts are always pinned. Our own context is particularly useful to
+ * generate driver messages. */
+
+ if (!s->my_context) {
+ struct ucred ucred = {
+ .pid = getpid_cached(),
+ .uid = getuid(),
+ .gid = getgid(),
+ };
+
+ r = client_context_acquire(s, ucred.pid, &ucred, NULL, 0, NULL, &s->my_context);
+ if (r < 0)
+ log_warning_errno(r, "Failed to acquire our own context, ignoring: %m");
+ }
+
+ if (!s->pid1_context) {
+
+ r = client_context_acquire(s, 1, NULL, NULL, 0, NULL, &s->pid1_context);
+ if (r < 0)
+ log_warning_errno(r, "Failed to acquire PID1's context, ignoring: %m");
+
+ }
+}
diff --git a/src/journal/journald-context.h b/src/journal/journald-context.h
new file mode 100644
index 0000000..5e19c71
--- /dev/null
+++ b/src/journal/journald-context.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+typedef struct ClientContext ClientContext;
+
+#include "journald-server.h"
+
+struct ClientContext {
+ unsigned n_ref;
+ unsigned lru_index;
+ usec_t timestamp;
+ bool in_lru;
+
+ pid_t pid;
+ uid_t uid;
+ gid_t gid;
+
+ char *comm;
+ char *exe;
+ char *cmdline;
+ char *capeff;
+
+ uint32_t auditid;
+ uid_t loginuid;
+
+ char *cgroup;
+ char *session;
+ uid_t owner_uid;
+
+ char *unit;
+ char *user_unit;
+
+ char *slice;
+ char *user_slice;
+
+ sd_id128_t invocation_id;
+
+ char *label;
+ size_t label_size;
+
+ int log_level_max;
+
+ struct iovec *extra_fields_iovec;
+ size_t extra_fields_n_iovec;
+ void *extra_fields_data;
+ nsec_t extra_fields_mtime;
+
+ usec_t log_rate_limit_interval;
+ unsigned log_rate_limit_burst;
+};
+
+int client_context_get(
+ Server *s,
+ pid_t pid,
+ const struct ucred *ucred,
+ const char *label, size_t label_len,
+ const char *unit_id,
+ ClientContext **ret);
+
+int client_context_acquire(
+ Server *s,
+ pid_t pid,
+ const struct ucred *ucred,
+ const char *label, size_t label_len,
+ const char *unit_id,
+ ClientContext **ret);
+
+ClientContext* client_context_release(Server *s, ClientContext *c);
+
+void client_context_maybe_refresh(
+ Server *s,
+ ClientContext *c,
+ const struct ucred *ucred,
+ const char *label, size_t label_size,
+ const char *unit_id,
+ usec_t tstamp);
+
+void client_context_acquire_default(Server *s);
+void client_context_flush_all(Server *s);
+
+static inline size_t client_context_extra_fields_n_iovec(const ClientContext *c) {
+ return c ? c->extra_fields_n_iovec : 0;
+}
+
+static inline bool client_context_test_priority(const ClientContext *c, int priority) {
+ if (!c)
+ return true;
+
+ if (c->log_level_max < 0)
+ return true;
+
+ return LOG_PRI(priority) <= c->log_level_max;
+}
diff --git a/src/journal/journald-gperf.gperf b/src/journal/journald-gperf.gperf
new file mode 100644
index 0000000..1adcb50
--- /dev/null
+++ b/src/journal/journald-gperf.gperf
@@ -0,0 +1,51 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include <sys/socket.h>
+#include "conf-parser.h"
+#include "journald-server.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name journald_gperf_hash
+%define lookup-function-name journald_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Journal.Storage, config_parse_storage, 0, offsetof(Server, storage)
+Journal.Compress, config_parse_compress, 0, offsetof(Server, compress)
+Journal.Seal, config_parse_bool, 0, offsetof(Server, seal)
+Journal.ReadKMsg, config_parse_bool, 0, offsetof(Server, read_kmsg)
+Journal.SyncIntervalSec, config_parse_sec, 0, offsetof(Server, sync_interval_usec)
+# The following is a legacy name for compatibility
+Journal.RateLimitInterval, config_parse_sec, 0, offsetof(Server, rate_limit_interval)
+Journal.RateLimitIntervalSec,config_parse_sec, 0, offsetof(Server, rate_limit_interval)
+Journal.RateLimitBurst, config_parse_unsigned, 0, offsetof(Server, rate_limit_burst)
+Journal.SystemMaxUse, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.max_use)
+Journal.SystemMaxFileSize, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.max_size)
+Journal.SystemKeepFree, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.keep_free)
+Journal.SystemMaxFiles, config_parse_uint64, 0, offsetof(Server, system_storage.metrics.n_max_files)
+Journal.RuntimeMaxUse, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.max_use)
+Journal.RuntimeMaxFileSize, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.max_size)
+Journal.RuntimeKeepFree, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.keep_free)
+Journal.RuntimeMaxFiles, config_parse_uint64, 0, offsetof(Server, runtime_storage.metrics.n_max_files)
+Journal.MaxRetentionSec, config_parse_sec, 0, offsetof(Server, max_retention_usec)
+Journal.MaxFileSec, config_parse_sec, 0, offsetof(Server, max_file_usec)
+Journal.ForwardToSyslog, config_parse_bool, 0, offsetof(Server, forward_to_syslog)
+Journal.ForwardToKMsg, config_parse_bool, 0, offsetof(Server, forward_to_kmsg)
+Journal.ForwardToConsole, config_parse_bool, 0, offsetof(Server, forward_to_console)
+Journal.ForwardToWall, config_parse_bool, 0, offsetof(Server, forward_to_wall)
+Journal.TTYPath, config_parse_path, 0, offsetof(Server, tty_path)
+Journal.MaxLevelStore, config_parse_log_level, 0, offsetof(Server, max_level_store)
+Journal.MaxLevelSyslog, config_parse_log_level, 0, offsetof(Server, max_level_syslog)
+Journal.MaxLevelKMsg, config_parse_log_level, 0, offsetof(Server, max_level_kmsg)
+Journal.MaxLevelConsole, config_parse_log_level, 0, offsetof(Server, max_level_console)
+Journal.MaxLevelWall, config_parse_log_level, 0, offsetof(Server, max_level_wall)
+Journal.SplitMode, config_parse_split_mode, 0, offsetof(Server, split_mode)
+Journal.LineMax, config_parse_line_max, 0, offsetof(Server, line_max)
diff --git a/src/journal/journald-kmsg.c b/src/journal/journald-kmsg.c
new file mode 100644
index 0000000..ce82102
--- /dev/null
+++ b/src/journal/journald-kmsg.c
@@ -0,0 +1,450 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "device-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "journald-kmsg.h"
+#include "journald-server.h"
+#include "journald-syslog.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+void server_forward_kmsg(
+ Server *s,
+ int priority,
+ const char *identifier,
+ const char *message,
+ const struct ucred *ucred) {
+
+ _cleanup_free_ char *ident_buf = NULL;
+ struct iovec iovec[5];
+ char header_priority[DECIMAL_STR_MAX(priority) + 3],
+ header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t) + 1];
+ int n = 0;
+
+ assert(s);
+ assert(priority >= 0);
+ assert(priority <= 999);
+ assert(message);
+
+ if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
+ return;
+
+ if (_unlikely_(s->dev_kmsg_fd < 0))
+ return;
+
+ /* Never allow messages with kernel facility to be written to
+ * kmsg, regardless where the data comes from. */
+ priority = syslog_fixup_facility(priority);
+
+ /* First: priority field */
+ xsprintf(header_priority, "<%i>", priority);
+ iovec[n++] = IOVEC_MAKE_STRING(header_priority);
+
+ /* Second: identifier and PID */
+ if (ucred) {
+ if (!identifier) {
+ get_process_comm(ucred->pid, &ident_buf);
+ identifier = ident_buf;
+ }
+
+ xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
+
+ if (identifier)
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+
+ iovec[n++] = IOVEC_MAKE_STRING(header_pid);
+ } else if (identifier) {
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(": ");
+ }
+
+ /* Fourth: message */
+ iovec[n++] = IOVEC_MAKE_STRING(message);
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
+
+ if (writev(s->dev_kmsg_fd, iovec, n) < 0)
+ log_debug_errno(errno, "Failed to write to /dev/kmsg for logging: %m");
+}
+
+static bool is_us(const char *identifier, const char *pid) {
+ pid_t pid_num;
+
+ if (!identifier || !pid)
+ return false;
+
+ if (parse_pid(pid, &pid_num) < 0)
+ return false;
+
+ return pid_num == getpid_cached() &&
+ streq(identifier, program_invocation_short_name);
+}
+
+void dev_kmsg_record(Server *s, char *p, size_t l) {
+
+ _cleanup_free_ char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL, *identifier = NULL, *pid = NULL;
+ struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS];
+ char *kernel_device = NULL;
+ unsigned long long usec;
+ size_t n = 0, z = 0, j;
+ int priority, r;
+ char *e, *f, *k;
+ uint64_t serial;
+ size_t pl;
+
+ assert(s);
+ assert(p);
+
+ if (l <= 0)
+ return;
+
+ e = memchr(p, ',', l);
+ if (!e)
+ return;
+ *e = 0;
+
+ r = safe_atoi(p, &priority);
+ if (r < 0 || priority < 0 || priority > 999)
+ return;
+
+ if (s->forward_to_kmsg && LOG_FAC(priority) != LOG_KERN)
+ return;
+
+ l -= (e - p) + 1;
+ p = e + 1;
+ e = memchr(p, ',', l);
+ if (!e)
+ return;
+ *e = 0;
+
+ r = safe_atou64(p, &serial);
+ if (r < 0)
+ return;
+
+ if (s->kernel_seqnum) {
+ /* We already read this one? */
+ if (serial < *s->kernel_seqnum)
+ return;
+
+ /* Did we lose any? */
+ if (serial > *s->kernel_seqnum)
+ server_driver_message(s, 0,
+ "MESSAGE_ID=" SD_MESSAGE_JOURNAL_MISSED_STR,
+ LOG_MESSAGE("Missed %"PRIu64" kernel messages",
+ serial - *s->kernel_seqnum),
+ NULL);
+
+ /* Make sure we never read this one again. Note that
+ * we always store the next message serial we expect
+ * here, simply because this makes handling the first
+ * message with serial 0 easy. */
+ *s->kernel_seqnum = serial + 1;
+ }
+
+ l -= (e - p) + 1;
+ p = e + 1;
+ f = memchr(p, ';', l);
+ if (!f)
+ return;
+ /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
+ e = memchr(p, ',', l);
+ if (!e || f < e)
+ e = f;
+ *e = 0;
+
+ r = safe_atollu(p, &usec);
+ if (r < 0)
+ return;
+
+ l -= (f - p) + 1;
+ p = f + 1;
+ e = memchr(p, '\n', l);
+ if (!e)
+ return;
+ *e = 0;
+
+ pl = e - p;
+ l -= (e - p) + 1;
+ k = e + 1;
+
+ for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
+ char *m;
+ /* Metadata fields attached */
+
+ if (*k != ' ')
+ break;
+
+ k++, l--;
+
+ e = memchr(k, '\n', l);
+ if (!e)
+ goto finish;
+
+ *e = 0;
+
+ if (cunescape_length_with_prefix(k, e - k, "_KERNEL_", UNESCAPE_RELAX, &m) < 0)
+ break;
+
+ if (startswith(m, "_KERNEL_DEVICE="))
+ kernel_device = m + 15;
+
+ iovec[n++] = IOVEC_MAKE_STRING(m);
+ z++;
+
+ l -= (e - k) + 1;
+ k = e + 1;
+ }
+
+ if (kernel_device) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+
+ if (sd_device_new_from_device_id(&d, kernel_device) >= 0) {
+ const char *g;
+ char *b;
+
+ if (sd_device_get_devname(d, &g) >= 0) {
+ b = strappend("_UDEV_DEVNODE=", g);
+ if (b) {
+ iovec[n++] = IOVEC_MAKE_STRING(b);
+ z++;
+ }
+ }
+
+ if (sd_device_get_sysname(d, &g) >= 0) {
+ b = strappend("_UDEV_SYSNAME=", g);
+ if (b) {
+ iovec[n++] = IOVEC_MAKE_STRING(b);
+ z++;
+ }
+ }
+
+ j = 0;
+ FOREACH_DEVICE_DEVLINK(d, g) {
+
+ if (j >= N_IOVEC_UDEV_FIELDS)
+ break;
+
+ b = strappend("_UDEV_DEVLINK=", g);
+ if (b) {
+ iovec[n++] = IOVEC_MAKE_STRING(b);
+ z++;
+ }
+
+ j++;
+ }
+ }
+ }
+
+ if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu", usec) >= 0)
+ iovec[n++] = IOVEC_MAKE_STRING(source_time);
+
+ iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=kernel");
+
+ if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
+
+ if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
+
+ if (LOG_FAC(priority) == LOG_KERN)
+ iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=kernel");
+ else {
+ pl -= syslog_parse_identifier((const char**) &p, &identifier, &pid);
+
+ /* Avoid any messages we generated ourselves via
+ * log_info() and friends. */
+ if (is_us(identifier, pid))
+ goto finish;
+
+ if (identifier) {
+ syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
+ if (syslog_identifier)
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
+ }
+
+ if (pid) {
+ syslog_pid = strappend("SYSLOG_PID=", pid);
+ if (syslog_pid)
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
+ }
+ }
+
+ if (cunescape_length_with_prefix(p, pl, "MESSAGE=", UNESCAPE_RELAX, &message) >= 0)
+ iovec[n++] = IOVEC_MAKE_STRING(message);
+
+ server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, priority, 0);
+
+finish:
+ for (j = 0; j < z; j++)
+ free(iovec[j].iov_base);
+}
+
+static int server_read_dev_kmsg(Server *s) {
+ char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
+ ssize_t l;
+
+ assert(s);
+ assert(s->dev_kmsg_fd >= 0);
+
+ l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
+ if (l == 0)
+ return 0;
+ if (l < 0) {
+ /* Old kernels who don't allow reading from /dev/kmsg
+ * return EINVAL when we try. So handle this cleanly,
+ * but don' try to ever read from it again. */
+ if (errno == EINVAL) {
+ s->dev_kmsg_event_source = sd_event_source_unref(s->dev_kmsg_event_source);
+ return 0;
+ }
+
+ if (IN_SET(errno, EAGAIN, EINTR, EPIPE))
+ return 0;
+
+ return log_error_errno(errno, "Failed to read from kernel: %m");
+ }
+
+ dev_kmsg_record(s, buffer, l);
+ return 1;
+}
+
+int server_flush_dev_kmsg(Server *s) {
+ int r;
+
+ assert(s);
+
+ if (s->dev_kmsg_fd < 0)
+ return 0;
+
+ if (!s->dev_kmsg_readable)
+ return 0;
+
+ log_debug("Flushing /dev/kmsg...");
+
+ for (;;) {
+ r = server_read_dev_kmsg(s);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ break;
+ }
+
+ return 0;
+}
+
+static int dispatch_dev_kmsg(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Server *s = userdata;
+
+ assert(es);
+ assert(fd == s->dev_kmsg_fd);
+ assert(s);
+
+ if (revents & EPOLLERR)
+ log_warning("/dev/kmsg buffer overrun, some messages lost.");
+
+ if (!(revents & EPOLLIN))
+ log_error("Got invalid event from epoll for /dev/kmsg: %"PRIx32, revents);
+
+ return server_read_dev_kmsg(s);
+}
+
+int server_open_dev_kmsg(Server *s) {
+ mode_t mode;
+ int r;
+
+ assert(s);
+
+ if (s->read_kmsg)
+ mode = O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY;
+ else
+ mode = O_WRONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY;
+
+ s->dev_kmsg_fd = open("/dev/kmsg", mode);
+ if (s->dev_kmsg_fd < 0) {
+ log_full(errno == ENOENT ? LOG_DEBUG : LOG_WARNING,
+ "Failed to open /dev/kmsg, ignoring: %m");
+ return 0;
+ }
+
+ if (!s->read_kmsg)
+ return 0;
+
+ r = sd_event_add_io(s->event, &s->dev_kmsg_event_source, s->dev_kmsg_fd, EPOLLIN, dispatch_dev_kmsg, s);
+ if (r < 0) {
+
+ /* This will fail with EPERM on older kernels where
+ * /dev/kmsg is not readable. */
+ if (r == -EPERM) {
+ r = 0;
+ goto fail;
+ }
+
+ log_error_errno(r, "Failed to add /dev/kmsg fd to event loop: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_priority(s->dev_kmsg_event_source, SD_EVENT_PRIORITY_IMPORTANT+10);
+ if (r < 0) {
+ log_error_errno(r, "Failed to adjust priority of kmsg event source: %m");
+ goto fail;
+ }
+
+ s->dev_kmsg_readable = true;
+
+ return 0;
+
+fail:
+ s->dev_kmsg_event_source = sd_event_source_unref(s->dev_kmsg_event_source);
+ s->dev_kmsg_fd = safe_close(s->dev_kmsg_fd);
+
+ return r;
+}
+
+int server_open_kernel_seqnum(Server *s) {
+ _cleanup_close_ int fd;
+ uint64_t *p;
+ int r;
+
+ assert(s);
+
+ /* We store the seqnum we last read in an mmaped file. That
+ * way we can just use it like a variable, but it is
+ * persistent and automatically flushed at reboot. */
+
+ fd = open("/run/systemd/journal/kernel-seqnum", O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
+ if (fd < 0) {
+ log_error_errno(errno, "Failed to open /run/systemd/journal/kernel-seqnum, ignoring: %m");
+ return 0;
+ }
+
+ r = posix_fallocate(fd, 0, sizeof(uint64_t));
+ if (r != 0) {
+ log_error_errno(r, "Failed to allocate sequential number file, ignoring: %m");
+ return 0;
+ }
+
+ p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED) {
+ log_error_errno(errno, "Failed to map sequential number file, ignoring: %m");
+ return 0;
+ }
+
+ s->kernel_seqnum = p;
+
+ return 0;
+}
diff --git a/src/journal/journald-kmsg.h b/src/journal/journald-kmsg.h
new file mode 100644
index 0000000..2326bc8
--- /dev/null
+++ b/src/journal/journald-kmsg.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "journald-server.h"
+
+int server_open_dev_kmsg(Server *s);
+int server_flush_dev_kmsg(Server *s);
+
+void server_forward_kmsg(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred);
+
+int server_open_kernel_seqnum(Server *s);
+
+void dev_kmsg_record(Server *s, char *p, size_t l);
diff --git a/src/journal/journald-native.c b/src/journal/journald-native.c
new file mode 100644
index 0000000..221188d
--- /dev/null
+++ b/src/journal/journald-native.c
@@ -0,0 +1,502 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stddef.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "journal-importer.h"
+#include "journal-util.h"
+#include "journald-console.h"
+#include "journald-kmsg.h"
+#include "journald-native.h"
+#include "journald-server.h"
+#include "journald-syslog.h"
+#include "journald-wall.h"
+#include "memfd-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unaligned.h"
+
+static bool allow_object_pid(const struct ucred *ucred) {
+ return ucred && ucred->uid == 0;
+}
+
+static void server_process_entry_meta(
+ const char *p, size_t l,
+ const struct ucred *ucred,
+ int *priority,
+ char **identifier,
+ char **message,
+ pid_t *object_pid) {
+
+ /* We need to determine the priority of this entry for the rate limiting logic */
+
+ if (l == 10 &&
+ startswith(p, "PRIORITY=") &&
+ p[9] >= '0' && p[9] <= '9')
+ *priority = (*priority & LOG_FACMASK) | (p[9] - '0');
+
+ else if (l == 17 &&
+ startswith(p, "SYSLOG_FACILITY=") &&
+ p[16] >= '0' && p[16] <= '9')
+ *priority = (*priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
+
+ else if (l == 18 &&
+ startswith(p, "SYSLOG_FACILITY=") &&
+ p[16] >= '0' && p[16] <= '9' &&
+ p[17] >= '0' && p[17] <= '9')
+ *priority = (*priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
+
+ else if (l >= 19 &&
+ startswith(p, "SYSLOG_IDENTIFIER=")) {
+ char *t;
+
+ t = strndup(p + 18, l - 18);
+ if (t) {
+ free(*identifier);
+ *identifier = t;
+ }
+
+ } else if (l >= 8 &&
+ startswith(p, "MESSAGE=")) {
+ char *t;
+
+ t = strndup(p + 8, l - 8);
+ if (t) {
+ free(*message);
+ *message = t;
+ }
+
+ } else if (l > STRLEN("OBJECT_PID=") &&
+ l < STRLEN("OBJECT_PID=") + DECIMAL_STR_MAX(pid_t) &&
+ startswith(p, "OBJECT_PID=") &&
+ allow_object_pid(ucred)) {
+ char buf[DECIMAL_STR_MAX(pid_t)];
+ memcpy(buf, p + STRLEN("OBJECT_PID="),
+ l - STRLEN("OBJECT_PID="));
+ buf[l-STRLEN("OBJECT_PID=")] = '\0';
+
+ (void) parse_pid(buf, object_pid);
+ }
+}
+
+static int server_process_entry(
+ Server *s,
+ const void *buffer, size_t *remaining,
+ ClientContext *context,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label, size_t label_len) {
+
+ /* Process a single entry from a native message. Returns 0 if nothing special happened and the message
+ * processing should continue, and a negative or positive value otherwise.
+ *
+ * Note that *remaining is altered on both success and failure. */
+
+ size_t n = 0, j, tn = (size_t) -1, m = 0, entry_size = 0;
+ char *identifier = NULL, *message = NULL;
+ struct iovec *iovec = NULL;
+ int priority = LOG_INFO;
+ pid_t object_pid = 0;
+ const char *p;
+ int r = 1;
+
+ p = buffer;
+
+ while (*remaining > 0) {
+ const char *e, *q;
+
+ e = memchr(p, '\n', *remaining);
+
+ if (!e) {
+ /* Trailing noise, let's ignore it, and flush what we collected */
+ log_debug("Received message with trailing noise, ignoring.");
+ break; /* finish processing of the message */
+ }
+
+ if (e == p) {
+ /* Entry separator */
+ *remaining -= 1;
+ break;
+ }
+
+ if (IN_SET(*p, '.', '#')) {
+ /* Ignore control commands for now, and comments too. */
+ *remaining -= (e - p) + 1;
+ p = e + 1;
+ continue;
+ }
+
+ /* A property follows */
+ if (n > ENTRY_FIELD_COUNT_MAX) {
+ log_debug("Received an entry that has more than " STRINGIFY(ENTRY_FIELD_COUNT_MAX) " fields, ignoring entry.");
+ goto finish;
+ }
+
+ /* n existing properties, 1 new, +1 for _TRANSPORT */
+ if (!GREEDY_REALLOC(iovec, m,
+ n + 2 +
+ N_IOVEC_META_FIELDS + N_IOVEC_OBJECT_FIELDS +
+ client_context_extra_fields_n_iovec(context))) {
+ r = log_oom();
+ goto finish;
+ }
+
+ q = memchr(p, '=', e - p);
+ if (q) {
+ if (journal_field_valid(p, q - p, false)) {
+ size_t l;
+
+ l = e - p;
+ if (l > DATA_SIZE_MAX) {
+ log_debug("Received text block of %zu bytes is too large, ignoring entry.", l);
+ goto finish;
+ }
+
+ if (entry_size + l + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
+ log_debug("Entry is too big (%zu bytes after processing %zu entries), ignoring entry.",
+ entry_size + l, n + 1);
+ goto finish;
+ }
+
+ /* If the field name starts with an underscore, skip the variable, since that indicates
+ * a trusted field */
+ iovec[n++] = IOVEC_MAKE((char*) p, l);
+ entry_size += l;
+
+ server_process_entry_meta(p, l, ucred,
+ &priority,
+ &identifier,
+ &message,
+ &object_pid);
+ }
+
+ *remaining -= (e - p) + 1;
+ p = e + 1;
+ continue;
+ } else {
+ uint64_t l, total;
+ char *k;
+
+ if (*remaining < e - p + 1 + sizeof(uint64_t) + 1) {
+ log_debug("Failed to parse message, ignoring.");
+ break;
+ }
+
+ l = unaligned_read_le64(e + 1);
+ if (l > DATA_SIZE_MAX) {
+ log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring entry.", l);
+ goto finish;
+ }
+
+ total = (e - p) + 1 + l;
+ if (entry_size + total + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
+ log_debug("Entry is too big (%"PRIu64"bytes after processing %zu fields), ignoring.",
+ entry_size + total, n + 1);
+ goto finish;
+ }
+
+ if ((uint64_t) *remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
+ e[1+sizeof(uint64_t)+l] != '\n') {
+ log_debug("Failed to parse message, ignoring.");
+ break;
+ }
+
+ k = malloc(total);
+ if (!k) {
+ log_oom();
+ break;
+ }
+
+ memcpy(k, p, e - p);
+ k[e - p] = '=';
+ memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
+
+ if (journal_field_valid(p, e - p, false)) {
+ iovec[n] = IOVEC_MAKE(k, (e - p) + 1 + l);
+ entry_size += iovec[n].iov_len;
+ n++;
+
+ server_process_entry_meta(k, (e - p) + 1 + l, ucred,
+ &priority,
+ &identifier,
+ &message,
+ &object_pid);
+ } else
+ free(k);
+
+ *remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
+ p = e + 1 + sizeof(uint64_t) + l + 1;
+ }
+ }
+
+ if (n <= 0)
+ goto finish;
+
+ tn = n++;
+ iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
+ entry_size += STRLEN("_TRANSPORT=journal");
+
+ if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
+ log_debug("Entry is too big with %zu properties and %zu bytes, ignoring.", n, entry_size);
+ goto finish;
+ }
+
+ r = 0; /* Success, we read the message. */
+
+ if (!client_context_test_priority(context, priority))
+ goto finish;
+
+ if (message) {
+ if (s->forward_to_syslog)
+ server_forward_syslog(s, syslog_fixup_facility(priority), identifier, message, ucred, tv);
+
+ if (s->forward_to_kmsg)
+ server_forward_kmsg(s, priority, identifier, message, ucred);
+
+ if (s->forward_to_console)
+ server_forward_console(s, priority, identifier, message, ucred);
+
+ if (s->forward_to_wall)
+ server_forward_wall(s, priority, identifier, message, ucred);
+ }
+
+ server_dispatch_message(s, iovec, n, m, context, tv, priority, object_pid);
+
+finish:
+ for (j = 0; j < n; j++) {
+ if (j == tn)
+ continue;
+
+ if (iovec[j].iov_base < buffer ||
+ (const char*) iovec[j].iov_base >= p + *remaining)
+ free(iovec[j].iov_base);
+ }
+
+ free(iovec);
+ free(identifier);
+ free(message);
+
+ return r;
+}
+
+void server_process_native_message(
+ Server *s,
+ const char *buffer, size_t buffer_size,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label, size_t label_len) {
+
+ size_t remaining = buffer_size;
+ ClientContext *context = NULL;
+ int r;
+
+ assert(s);
+ assert(buffer || buffer_size == 0);
+
+ if (ucred && pid_is_valid(ucred->pid)) {
+ r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
+ if (r < 0)
+ log_warning_errno(r, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid);
+ }
+
+ do {
+ r = server_process_entry(s,
+ (const uint8_t*) buffer + (buffer_size - remaining), &remaining,
+ context, ucred, tv, label, label_len);
+ } while (r == 0);
+}
+
+void server_process_native_file(
+ Server *s,
+ int fd,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label, size_t label_len) {
+
+ struct stat st;
+ bool sealed;
+ int r;
+
+ /* Data is in the passed fd, probably it didn't fit in a datagram. */
+
+ assert(s);
+ assert(fd >= 0);
+
+ /* If it's a memfd, check if it is sealed. If so, we can just
+ * mmap it and use it, and do not need to copy the data out. */
+ sealed = memfd_get_sealed(fd) > 0;
+
+ if (!sealed && (!ucred || ucred->uid != 0)) {
+ _cleanup_free_ char *k = NULL;
+ const char *e;
+
+ /* If this is not a sealed memfd, and the peer is unknown or
+ * unprivileged, then verify the path. */
+
+ r = fd_get_path(fd, &k);
+ if (r < 0) {
+ log_error_errno(r, "readlink(/proc/self/fd/%i) failed: %m", fd);
+ return;
+ }
+
+ e = PATH_STARTSWITH_SET(k, "/dev/shm/", "/tmp/", "/var/tmp/");
+ if (!e) {
+ log_error("Received file outside of allowed directories. Refusing.");
+ return;
+ }
+
+ if (!filename_is_valid(e)) {
+ log_error("Received file in subdirectory of allowed directories. Refusing.");
+ return;
+ }
+ }
+
+ if (fstat(fd, &st) < 0) {
+ log_error_errno(errno, "Failed to stat passed file, ignoring: %m");
+ return;
+ }
+
+ if (!S_ISREG(st.st_mode)) {
+ log_error("File passed is not regular. Ignoring.");
+ return;
+ }
+
+ if (st.st_size <= 0)
+ return;
+
+ /* When !sealed, set a lower memory limit. We have to read the file,
+ * effectively doubling memory use. */
+ if (st.st_size > ENTRY_SIZE_MAX / (sealed ? 1 : 2)) {
+ log_error("File passed too large (%"PRIu64" bytes). Ignoring.", (uint64_t) st.st_size);
+ return;
+ }
+
+ if (sealed) {
+ void *p;
+ size_t ps;
+
+ /* The file is sealed, we can just map it and use it. */
+
+ ps = PAGE_ALIGN(st.st_size);
+ p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (p == MAP_FAILED) {
+ log_error_errno(errno, "Failed to map memfd, ignoring: %m");
+ return;
+ }
+
+ server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len);
+ assert_se(munmap(p, ps) >= 0);
+ } else {
+ _cleanup_free_ void *p = NULL;
+ struct statvfs vfs;
+ ssize_t n;
+
+ if (fstatvfs(fd, &vfs) < 0) {
+ log_error_errno(errno, "Failed to stat file system of passed file, not processing it: %m");
+ return;
+ }
+
+ /* Refuse operating on file systems that have
+ * mandatory locking enabled, see:
+ *
+ * https://github.com/systemd/systemd/issues/1822
+ */
+ if (vfs.f_flag & ST_MANDLOCK) {
+ log_error("Received file descriptor from file system with mandatory locking enabled, not processing it.");
+ return;
+ }
+
+ /* Make the fd non-blocking. On regular files this has
+ * the effect of bypassing mandatory locking. Of
+ * course, this should normally not be necessary given
+ * the check above, but let's better be safe than
+ * sorry, after all NFS is pretty confusing regarding
+ * file system flags, and we better don't trust it,
+ * and so is SMB. */
+ r = fd_nonblock(fd, true);
+ if (r < 0) {
+ log_error_errno(r, "Failed to make fd non-blocking, not processing it: %m");
+ return;
+ }
+
+ /* The file is not sealed, we can't map the file here, since
+ * clients might then truncate it and trigger a SIGBUS for
+ * us. So let's stupidly read it. */
+
+ p = malloc(st.st_size);
+ if (!p) {
+ log_oom();
+ return;
+ }
+
+ n = pread(fd, p, st.st_size, 0);
+ if (n < 0)
+ log_error_errno(errno, "Failed to read file, ignoring: %m");
+ else if (n > 0)
+ server_process_native_message(s, p, n, ucred, tv, label, label_len);
+ }
+}
+
+int server_open_native_socket(Server *s) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/socket",
+ };
+ int r;
+
+ assert(s);
+
+ if (s->native_fd < 0) {
+ s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s->native_fd < 0)
+ return log_error_errno(errno, "socket() failed: %m");
+
+ (void) sockaddr_un_unlink(&sa.un);
+
+ r = bind(s->native_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+
+ (void) chmod(sa.un.sun_path, 0666);
+ } else
+ (void) fd_nonblock(s->native_fd, true);
+
+ r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_PASSCRED failed: %m");
+
+#if HAVE_SELINUX
+ if (mac_selinux_use()) {
+ r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_PASSSEC, true);
+ if (r < 0)
+ log_warning_errno(r, "SO_PASSSEC failed: %m");
+ }
+#endif
+
+ r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_TIMESTAMP failed: %m");
+
+ r = sd_event_add_io(s->event, &s->native_event_source, s->native_fd, EPOLLIN, server_process_datagram, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add native server fd to event loop: %m");
+
+ r = sd_event_source_set_priority(s->native_event_source, SD_EVENT_PRIORITY_NORMAL+5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust native event source priority: %m");
+
+ return 0;
+}
diff --git a/src/journal/journald-native.h b/src/journal/journald-native.h
new file mode 100644
index 0000000..2a33ef7
--- /dev/null
+++ b/src/journal/journald-native.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "journald-server.h"
+
+void server_process_native_message(
+ Server *s,
+ const char *buffer,
+ size_t buffer_size,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label,
+ size_t label_len);
+
+void server_process_native_file(
+ Server *s,
+ int fd,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label,
+ size_t label_len);
+
+int server_open_native_socket(Server *s);
diff --git a/src/journal/journald-rate-limit.c b/src/journal/journald-rate-limit.c
new file mode 100644
index 0000000..0b42d53
--- /dev/null
+++ b/src/journal/journald-rate-limit.c
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "hashmap.h"
+#include "journald-rate-limit.h"
+#include "list.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "util.h"
+
+#define POOLS_MAX 5
+#define BUCKETS_MAX 127
+#define GROUPS_MAX 2047
+
+static const int priority_map[] = {
+ [LOG_EMERG] = 0,
+ [LOG_ALERT] = 0,
+ [LOG_CRIT] = 0,
+ [LOG_ERR] = 1,
+ [LOG_WARNING] = 2,
+ [LOG_NOTICE] = 3,
+ [LOG_INFO] = 3,
+ [LOG_DEBUG] = 4
+};
+
+typedef struct JournalRateLimitPool JournalRateLimitPool;
+typedef struct JournalRateLimitGroup JournalRateLimitGroup;
+
+struct JournalRateLimitPool {
+ usec_t begin;
+ unsigned num;
+ unsigned suppressed;
+};
+
+struct JournalRateLimitGroup {
+ JournalRateLimit *parent;
+
+ char *id;
+
+ /* Interval is stored to keep track of when the group expires */
+ usec_t interval;
+
+ JournalRateLimitPool pools[POOLS_MAX];
+ uint64_t hash;
+
+ LIST_FIELDS(JournalRateLimitGroup, bucket);
+ LIST_FIELDS(JournalRateLimitGroup, lru);
+};
+
+struct JournalRateLimit {
+
+ JournalRateLimitGroup* buckets[BUCKETS_MAX];
+ JournalRateLimitGroup *lru, *lru_tail;
+
+ unsigned n_groups;
+
+ uint8_t hash_key[16];
+};
+
+JournalRateLimit *journal_rate_limit_new(void) {
+ JournalRateLimit *r;
+
+ r = new0(JournalRateLimit, 1);
+ if (!r)
+ return NULL;
+
+ random_bytes(r->hash_key, sizeof(r->hash_key));
+
+ return r;
+}
+
+static void journal_rate_limit_group_free(JournalRateLimitGroup *g) {
+ assert(g);
+
+ if (g->parent) {
+ assert(g->parent->n_groups > 0);
+
+ if (g->parent->lru_tail == g)
+ g->parent->lru_tail = g->lru_prev;
+
+ LIST_REMOVE(lru, g->parent->lru, g);
+ LIST_REMOVE(bucket, g->parent->buckets[g->hash % BUCKETS_MAX], g);
+
+ g->parent->n_groups--;
+ }
+
+ free(g->id);
+ free(g);
+}
+
+void journal_rate_limit_free(JournalRateLimit *r) {
+ assert(r);
+
+ while (r->lru)
+ journal_rate_limit_group_free(r->lru);
+
+ free(r);
+}
+
+_pure_ static bool journal_rate_limit_group_expired(JournalRateLimitGroup *g, usec_t ts) {
+ unsigned i;
+
+ assert(g);
+
+ for (i = 0; i < POOLS_MAX; i++)
+ if (g->pools[i].begin + g->interval >= ts)
+ return false;
+
+ return true;
+}
+
+static void journal_rate_limit_vacuum(JournalRateLimit *r, usec_t ts) {
+ assert(r);
+
+ /* Makes room for at least one new item, but drop all
+ * expored items too. */
+
+ while (r->n_groups >= GROUPS_MAX ||
+ (r->lru_tail && journal_rate_limit_group_expired(r->lru_tail, ts)))
+ journal_rate_limit_group_free(r->lru_tail);
+}
+
+static JournalRateLimitGroup* journal_rate_limit_group_new(JournalRateLimit *r, const char *id, usec_t interval, usec_t ts) {
+ JournalRateLimitGroup *g;
+
+ assert(r);
+ assert(id);
+
+ g = new0(JournalRateLimitGroup, 1);
+ if (!g)
+ return NULL;
+
+ g->id = strdup(id);
+ if (!g->id)
+ goto fail;
+
+ g->hash = siphash24_string(g->id, r->hash_key);
+
+ g->interval = interval;
+
+ journal_rate_limit_vacuum(r, ts);
+
+ LIST_PREPEND(bucket, r->buckets[g->hash % BUCKETS_MAX], g);
+ LIST_PREPEND(lru, r->lru, g);
+ if (!g->lru_next)
+ r->lru_tail = g;
+ r->n_groups++;
+
+ g->parent = r;
+ return g;
+
+fail:
+ journal_rate_limit_group_free(g);
+ return NULL;
+}
+
+static unsigned burst_modulate(unsigned burst, uint64_t available) {
+ unsigned k;
+
+ /* Modulates the burst rate a bit with the amount of available
+ * disk space */
+
+ k = u64log2(available);
+
+ /* 1MB */
+ if (k <= 20)
+ return burst;
+
+ burst = (burst * (k-16)) / 4;
+
+ /*
+ * Example:
+ *
+ * <= 1MB = rate * 1
+ * 16MB = rate * 2
+ * 256MB = rate * 3
+ * 4GB = rate * 4
+ * 64GB = rate * 5
+ * 1TB = rate * 6
+ */
+
+ return burst;
+}
+
+int journal_rate_limit_test(JournalRateLimit *r, const char *id, usec_t rl_interval, unsigned rl_burst, int priority, uint64_t available) {
+ uint64_t h;
+ JournalRateLimitGroup *g;
+ JournalRateLimitPool *p;
+ unsigned burst;
+ usec_t ts;
+
+ assert(id);
+
+ /* Returns:
+ *
+ * 0 → the log message shall be suppressed,
+ * 1 + n → the log message shall be permitted, and n messages were dropped from the peer before
+ * < 0 → error
+ */
+
+ if (!r)
+ return 1;
+
+ ts = now(CLOCK_MONOTONIC);
+
+ h = siphash24_string(id, r->hash_key);
+ g = r->buckets[h % BUCKETS_MAX];
+
+ LIST_FOREACH(bucket, g, g)
+ if (streq(g->id, id))
+ break;
+
+ if (!g) {
+ g = journal_rate_limit_group_new(r, id, rl_interval, ts);
+ if (!g)
+ return -ENOMEM;
+ } else
+ g->interval = rl_interval;
+
+ if (rl_interval == 0 || rl_burst == 0)
+ return 1;
+
+ burst = burst_modulate(rl_burst, available);
+
+ p = &g->pools[priority_map[priority]];
+
+ if (p->begin <= 0) {
+ p->suppressed = 0;
+ p->num = 1;
+ p->begin = ts;
+ return 1;
+ }
+
+ if (p->begin + rl_interval < ts) {
+ unsigned s;
+
+ s = p->suppressed;
+ p->suppressed = 0;
+ p->num = 1;
+ p->begin = ts;
+
+ return 1 + s;
+ }
+
+ if (p->num < burst) {
+ p->num++;
+ return 1;
+ }
+
+ p->suppressed++;
+ return 0;
+}
diff --git a/src/journal/journald-rate-limit.h b/src/journal/journald-rate-limit.h
new file mode 100644
index 0000000..a299280
--- /dev/null
+++ b/src/journal/journald-rate-limit.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "util.h"
+
+typedef struct JournalRateLimit JournalRateLimit;
+
+JournalRateLimit *journal_rate_limit_new(void);
+void journal_rate_limit_free(JournalRateLimit *r);
+int journal_rate_limit_test(JournalRateLimit *r, const char *id, usec_t rl_interval, unsigned rl_burst, int priority, uint64_t available);
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
new file mode 100644
index 0000000..2a960eb
--- /dev/null
+++ b/src/journal/journald-server.c
@@ -0,0 +1,2192 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/signalfd.h>
+#include <sys/statvfs.h>
+#include <linux/sockios.h>
+
+#include "sd-daemon.h"
+#include "sd-journal.h"
+#include "sd-messages.h"
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "cgroup-util.h"
+#include "conf-parser.h"
+#include "dirent-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "journal-authenticate.h"
+#include "journal-file.h"
+#include "journal-internal.h"
+#include "journal-vacuum.h"
+#include "journald-audit.h"
+#include "journald-context.h"
+#include "journald-kmsg.h"
+#include "journald-native.h"
+#include "journald-rate-limit.h"
+#include "journald-server.h"
+#include "journald-stream.h"
+#include "journald-syslog.h"
+#include "log.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "rm-rf.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "user-util.h"
+
+#define USER_JOURNALS_MAX 1024
+
+#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
+#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
+#define DEFAULT_RATE_LIMIT_BURST 10000
+#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
+
+#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
+
+#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
+
+/* The period to insert between posting changes for coalescing */
+#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
+
+/* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
+ * for a bit of additional metadata. */
+#define DEFAULT_LINE_MAX (48*1024)
+
+#define DEFERRED_CLOSES_MAX (4096)
+
+static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ struct statvfs ss;
+
+ assert(ret_used);
+ assert(ret_free);
+
+ d = opendir(path);
+ if (!d)
+ return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
+ errno, "Failed to open %s: %m", path);
+
+ if (fstatvfs(dirfd(d), &ss) < 0)
+ return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
+
+ *ret_free = ss.f_bsize * ss.f_bavail;
+ *ret_used = 0;
+ FOREACH_DIRENT_ALL(de, d, break) {
+ struct stat st;
+
+ if (!endswith(de->d_name, ".journal") &&
+ !endswith(de->d_name, ".journal~"))
+ continue;
+
+ if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
+ continue;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ *ret_used += (uint64_t) st.st_blocks * 512UL;
+ }
+
+ return 0;
+}
+
+static void cache_space_invalidate(JournalStorageSpace *space) {
+ zero(*space);
+}
+
+static int cache_space_refresh(Server *s, JournalStorage *storage) {
+ JournalStorageSpace *space;
+ JournalMetrics *metrics;
+ uint64_t vfs_used, vfs_avail, avail;
+ usec_t ts;
+ int r;
+
+ assert(s);
+
+ metrics = &storage->metrics;
+ space = &storage->space;
+
+ ts = now(CLOCK_MONOTONIC);
+
+ if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
+ return 0;
+
+ r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
+ if (r < 0)
+ return r;
+
+ space->vfs_used = vfs_used;
+ space->vfs_available = vfs_avail;
+
+ avail = LESS_BY(vfs_avail, metrics->keep_free);
+
+ space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
+ space->available = LESS_BY(space->limit, vfs_used);
+ space->timestamp = ts;
+ return 1;
+}
+
+static void patch_min_use(JournalStorage *storage) {
+ assert(storage);
+
+ /* Let's bump the min_use limit to the current usage on disk. We do
+ * this when starting up and first opening the journal files. This way
+ * sudden spikes in disk usage will not cause journald to vacuum files
+ * without bounds. Note that this means that only a restart of journald
+ * will make it reset this value. */
+
+ storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
+}
+
+static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
+ JournalStorage *js;
+ int r;
+
+ assert(s);
+
+ js = s->system_journal ? &s->system_storage : &s->runtime_storage;
+
+ r = cache_space_refresh(s, js);
+ if (r >= 0) {
+ if (available)
+ *available = js->space.available;
+ if (limit)
+ *limit = js->space.limit;
+ }
+ return r;
+}
+
+void server_space_usage_message(Server *s, JournalStorage *storage) {
+ char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
+ fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
+ JournalMetrics *metrics;
+
+ assert(s);
+
+ if (!storage)
+ storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
+
+ if (cache_space_refresh(s, storage) < 0)
+ return;
+
+ metrics = &storage->metrics;
+ format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
+ format_bytes(fb2, sizeof(fb2), metrics->max_use);
+ format_bytes(fb3, sizeof(fb3), metrics->keep_free);
+ format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
+ format_bytes(fb5, sizeof(fb5), storage->space.limit);
+ format_bytes(fb6, sizeof(fb6), storage->space.available);
+
+ server_driver_message(s, 0,
+ "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
+ LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
+ storage->name, storage->path, fb1, fb5, fb6),
+ "JOURNAL_NAME=%s", storage->name,
+ "JOURNAL_PATH=%s", storage->path,
+ "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
+ "CURRENT_USE_PRETTY=%s", fb1,
+ "MAX_USE=%"PRIu64, metrics->max_use,
+ "MAX_USE_PRETTY=%s", fb2,
+ "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
+ "DISK_KEEP_FREE_PRETTY=%s", fb3,
+ "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
+ "DISK_AVAILABLE_PRETTY=%s", fb4,
+ "LIMIT=%"PRIu64, storage->space.limit,
+ "LIMIT_PRETTY=%s", fb5,
+ "AVAILABLE=%"PRIu64, storage->space.available,
+ "AVAILABLE_PRETTY=%s", fb6,
+ NULL);
+}
+
+static bool uid_for_system_journal(uid_t uid) {
+
+ /* Returns true if the specified UID shall get its data stored in the system journal*/
+
+ return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
+}
+
+static void server_add_acls(JournalFile *f, uid_t uid) {
+#if HAVE_ACL
+ int r;
+#endif
+ assert(f);
+
+#if HAVE_ACL
+ if (uid_for_system_journal(uid))
+ return;
+
+ r = add_acls_for_user(f->fd, uid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
+#endif
+}
+
+static int open_journal(
+ Server *s,
+ bool reliably,
+ const char *fname,
+ int flags,
+ bool seal,
+ JournalMetrics *metrics,
+ JournalFile **ret) {
+
+ JournalFile *f;
+ int r;
+
+ assert(s);
+ assert(fname);
+ assert(ret);
+
+ if (reliably)
+ r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
+ seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
+ else
+ r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
+ metrics, s->mmap, s->deferred_closes, NULL, &f);
+
+ if (r < 0)
+ return r;
+
+ r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
+ if (r < 0) {
+ (void) journal_file_close(f);
+ return r;
+ }
+
+ *ret = f;
+ return r;
+}
+
+static bool flushed_flag_is_set(void) {
+ return access("/run/systemd/journal/flushed", F_OK) >= 0;
+}
+
+static int system_journal_open(Server *s, bool flush_requested) {
+ const char *fn;
+ int r = 0;
+
+ if (!s->system_journal &&
+ IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
+ (flush_requested || flushed_flag_is_set())) {
+
+ /* If in auto mode: first try to create the machine
+ * path, but not the prefix.
+ *
+ * If in persistent mode: create /var/log/journal and
+ * the machine path */
+
+ if (s->storage == STORAGE_PERSISTENT)
+ (void) mkdir_p("/var/log/journal/", 0755);
+
+ (void) mkdir(s->system_storage.path, 0755);
+
+ fn = strjoina(s->system_storage.path, "/system.journal");
+ r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
+ if (r >= 0) {
+ server_add_acls(s->system_journal, 0);
+ (void) cache_space_refresh(s, &s->system_storage);
+ patch_min_use(&s->system_storage);
+ } else {
+ if (!IN_SET(r, -ENOENT, -EROFS))
+ log_warning_errno(r, "Failed to open system journal: %m");
+
+ r = 0;
+ }
+
+ /* If the runtime journal is open, and we're post-flush, we're
+ * recovering from a failed system journal rotate (ENOSPC)
+ * for which the runtime journal was reopened.
+ *
+ * Perform an implicit flush to var, leaving the runtime
+ * journal closed, now that the system journal is back.
+ */
+ if (!flush_requested)
+ (void) server_flush_to_var(s, true);
+ }
+
+ if (!s->runtime_journal &&
+ (s->storage != STORAGE_NONE)) {
+
+ fn = strjoina(s->runtime_storage.path, "/system.journal");
+
+ if (s->system_journal) {
+
+ /* Try to open the runtime journal, but only
+ * if it already exists, so that we can flush
+ * it into the system journal */
+
+ r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
+ if (r < 0) {
+ if (r != -ENOENT)
+ log_warning_errno(r, "Failed to open runtime journal: %m");
+
+ r = 0;
+ }
+
+ } else {
+
+ /* OK, we really need the runtime journal, so create
+ * it if necessary. */
+
+ (void) mkdir("/run/log", 0755);
+ (void) mkdir("/run/log/journal", 0755);
+ (void) mkdir_parents(fn, 0750);
+
+ r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open runtime journal: %m");
+ }
+
+ if (s->runtime_journal) {
+ server_add_acls(s->runtime_journal, 0);
+ (void) cache_space_refresh(s, &s->runtime_storage);
+ patch_min_use(&s->runtime_storage);
+ }
+ }
+
+ return r;
+}
+
+static JournalFile* find_journal(Server *s, uid_t uid) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+ JournalFile *f;
+ sd_id128_t machine;
+
+ assert(s);
+
+ /* A rotate that fails to create the new journal (ENOSPC) leaves the
+ * rotated journal as NULL. Unless we revisit opening, even after
+ * space is made available we'll continue to return NULL indefinitely.
+ *
+ * system_journal_open() is a noop if the journals are already open, so
+ * we can just call it here to recover from failed rotates (or anything
+ * else that's left the journals as NULL).
+ *
+ * Fixes https://github.com/systemd/systemd/issues/3968 */
+ (void) system_journal_open(s, false);
+
+ /* We split up user logs only on /var, not on /run. If the
+ * runtime file is open, we write to it exclusively, in order
+ * to guarantee proper order as soon as we flush /run to
+ * /var and close the runtime file. */
+
+ if (s->runtime_journal)
+ return s->runtime_journal;
+
+ if (uid_for_system_journal(uid))
+ return s->system_journal;
+
+ f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
+ if (f)
+ return f;
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine machine ID, using system log: %m");
+ return s->system_journal;
+ }
+
+ if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
+ SD_ID128_FORMAT_VAL(machine), uid) < 0) {
+ log_oom();
+ return s->system_journal;
+ }
+
+ while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
+ /* Too many open? Then let's close one */
+ f = ordered_hashmap_steal_first(s->user_journals);
+ assert(f);
+ (void) journal_file_close(f);
+ }
+
+ r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
+ if (r < 0)
+ return s->system_journal;
+
+ server_add_acls(f, uid);
+
+ r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
+ if (r < 0) {
+ (void) journal_file_close(f);
+ return s->system_journal;
+ }
+
+ return f;
+}
+
+static int do_rotate(
+ Server *s,
+ JournalFile **f,
+ const char* name,
+ bool seal,
+ uint32_t uid) {
+
+ int r;
+ assert(s);
+
+ if (!*f)
+ return -EINVAL;
+
+ r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
+ if (r < 0) {
+ if (*f)
+ return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
+ else
+ return log_error_errno(r, "Failed to create new %s journal: %m", name);
+ }
+
+ server_add_acls(*f, uid);
+
+ return r;
+}
+
+static void server_process_deferred_closes(Server *s) {
+ JournalFile *f;
+ Iterator i;
+
+ /* Perform any deferred closes which aren't still offlining. */
+ SET_FOREACH(f, s->deferred_closes, i) {
+ if (journal_file_is_offlining(f))
+ continue;
+
+ (void) set_remove(s->deferred_closes, f);
+ (void) journal_file_close(f);
+ }
+}
+
+static void server_vacuum_deferred_closes(Server *s) {
+ assert(s);
+
+ /* Make some room in the deferred closes list, so that it doesn't grow without bounds */
+ if (set_size(s->deferred_closes) < DEFERRED_CLOSES_MAX)
+ return;
+
+ /* Let's first remove all journal files that might already have completed closing */
+ server_process_deferred_closes(s);
+
+ /* And now, let's close some more until we reach the limit again. */
+ while (set_size(s->deferred_closes) >= DEFERRED_CLOSES_MAX) {
+ JournalFile *f;
+
+ assert_se(f = set_steal_first(s->deferred_closes));
+ journal_file_close(f);
+ }
+}
+
+static int open_user_journal_directory(Server *s, DIR **ret_dir, char **ret_path) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ _cleanup_free_ char *path = NULL;
+ sd_id128_t machine;
+ int r;
+
+ assert(s);
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine machine ID, ignoring: %m");
+
+ if (asprintf(&path, "/var/log/journal/" SD_ID128_FORMAT_STR "/", SD_ID128_FORMAT_VAL(machine)) < 0)
+ return log_oom();
+
+ dir = opendir(path);
+ if (!dir)
+ return log_error_errno(errno, "Failed to open user journal directory '%s': %m", path);
+
+ if (ret_dir)
+ *ret_dir = TAKE_PTR(dir);
+ if (ret_path)
+ *ret_path = TAKE_PTR(path);
+
+ return 0;
+}
+
+void server_rotate(Server *s) {
+ _cleanup_free_ char *path = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ JournalFile *f;
+ Iterator i;
+ void *k;
+ int r;
+
+ log_debug("Rotating...");
+
+ /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */
+ (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
+ (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
+
+ /* Then, rotate all user journals we have open (keeping them open) */
+ ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
+ r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
+ if (r >= 0)
+ ordered_hashmap_replace(s->user_journals, k, f);
+ else if (!f)
+ /* Old file has been closed and deallocated */
+ ordered_hashmap_remove(s->user_journals, k);
+ }
+
+ /* Finally, also rotate all user journals we currently do not have open. (But do so only if we actually have
+ * access to /var, i.e. are not in the log-to-runtime-journal mode). */
+ if (!s->runtime_journal &&
+ open_user_journal_directory(s, &d, &path) >= 0) {
+
+ struct dirent *de;
+
+ FOREACH_DIRENT(de, d, log_warning_errno(errno, "Failed to enumerate %s, ignoring: %m", path)) {
+ _cleanup_free_ char *u = NULL, *full = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *a, *b;
+ uid_t uid;
+
+ a = startswith(de->d_name, "user-");
+ if (!a)
+ continue;
+ b = endswith(de->d_name, ".journal");
+ if (!b)
+ continue;
+
+ u = strndup(a, b-a);
+ if (!u) {
+ log_oom();
+ break;
+ }
+
+ r = parse_uid(u, &uid);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse UID from file name '%s', ignoring: %m", de->d_name);
+ continue;
+ }
+
+ /* Already rotated in the above loop? i.e. is it an open user journal? */
+ if (ordered_hashmap_contains(s->user_journals, UID_TO_PTR(uid)))
+ continue;
+
+ full = strjoin(path, de->d_name);
+ if (!full) {
+ log_oom();
+ break;
+ }
+
+ fd = openat(dirfd(d), de->d_name, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
+ if (fd < 0) {
+ log_full_errno(IN_SET(errno, ELOOP, ENOENT) ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to open journal file '%s' for rotation: %m", full);
+ continue;
+ }
+
+ /* Make some room in the set of deferred close()s */
+ server_vacuum_deferred_closes(s);
+
+ /* Open the file briefly, so that we can archive it */
+ r = journal_file_open(fd,
+ full,
+ O_RDWR,
+ 0640,
+ s->compress.enabled,
+ s->compress.threshold_bytes,
+ s->seal,
+ &s->system_storage.metrics,
+ s->mmap,
+ s->deferred_closes,
+ NULL,
+ &f);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full);
+
+ r = journal_file_dispose(dirfd(d), de->d_name);
+ if (r < 0)
+ log_warning_errno(r, "Failed to move %s out of the way, ignoring: %m", full);
+ else
+ log_debug("Successfully moved %s out of the way.", full);
+
+ continue;
+ }
+
+ TAKE_FD(fd); /* Donated to journal_file_open() */
+
+ r = journal_file_archive(f);
+ if (r < 0)
+ log_debug_errno(r, "Failed to archive journal file '%s', ignoring: %m", full);
+
+ f = journal_initiate_close(f, s->deferred_closes);
+ }
+ }
+
+ server_process_deferred_closes(s);
+}
+
+void server_sync(Server *s) {
+ JournalFile *f;
+ Iterator i;
+ int r;
+
+ if (s->system_journal) {
+ r = journal_file_set_offline(s->system_journal, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
+ }
+
+ ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
+ r = journal_file_set_offline(f, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
+ }
+
+ if (s->sync_event_source) {
+ r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ log_error_errno(r, "Failed to disable sync timer source: %m");
+ }
+
+ s->sync_scheduled = false;
+}
+
+static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
+
+ int r;
+
+ assert(s);
+ assert(storage);
+
+ (void) cache_space_refresh(s, storage);
+
+ if (verbose)
+ server_space_usage_message(s, storage);
+
+ r = journal_directory_vacuum(storage->path, storage->space.limit,
+ storage->metrics.n_max_files, s->max_retention_usec,
+ &s->oldest_file_usec, verbose);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
+
+ cache_space_invalidate(&storage->space);
+}
+
+int server_vacuum(Server *s, bool verbose) {
+ assert(s);
+
+ log_debug("Vacuuming...");
+
+ s->oldest_file_usec = 0;
+
+ if (s->system_journal)
+ do_vacuum(s, &s->system_storage, verbose);
+ if (s->runtime_journal)
+ do_vacuum(s, &s->runtime_storage, verbose);
+
+ return 0;
+}
+
+static void server_cache_machine_id(Server *s) {
+ sd_id128_t id;
+ int r;
+
+ assert(s);
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return;
+
+ sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
+}
+
+static void server_cache_boot_id(Server *s) {
+ sd_id128_t id;
+ int r;
+
+ assert(s);
+
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return;
+
+ sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
+}
+
+static void server_cache_hostname(Server *s) {
+ _cleanup_free_ char *t = NULL;
+ char *x;
+
+ assert(s);
+
+ t = gethostname_malloc();
+ if (!t)
+ return;
+
+ x = strappend("_HOSTNAME=", t);
+ if (!x)
+ return;
+
+ free(s->hostname_field);
+ s->hostname_field = x;
+}
+
+static bool shall_try_append_again(JournalFile *f, int r) {
+ switch(r) {
+
+ case -E2BIG: /* Hit configured limit */
+ case -EFBIG: /* Hit fs limit */
+ case -EDQUOT: /* Quota limit hit */
+ case -ENOSPC: /* Disk full */
+ log_debug("%s: Allocation limit reached, rotating.", f->path);
+ return true;
+
+ case -EIO: /* I/O error of some kind (mmap) */
+ log_warning("%s: IO error, rotating.", f->path);
+ return true;
+
+ case -EHOSTDOWN: /* Other machine */
+ log_info("%s: Journal file from other machine, rotating.", f->path);
+ return true;
+
+ case -EBUSY: /* Unclean shutdown */
+ log_info("%s: Unclean shutdown, rotating.", f->path);
+ return true;
+
+ case -EPROTONOSUPPORT: /* Unsupported feature */
+ log_info("%s: Unsupported feature, rotating.", f->path);
+ return true;
+
+ case -EBADMSG: /* Corrupted */
+ case -ENODATA: /* Truncated */
+ case -ESHUTDOWN: /* Already archived */
+ log_warning("%s: Journal file corrupted, rotating.", f->path);
+ return true;
+
+ case -EIDRM: /* Journal file has been deleted */
+ log_warning("%s: Journal file has been deleted, rotating.", f->path);
+ return true;
+
+ case -ETXTBSY: /* Journal file is from the future */
+ log_warning("%s: Journal file is from the future, rotating.", f->path);
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
+ bool vacuumed = false, rotate = false;
+ struct dual_timestamp ts;
+ JournalFile *f;
+ int r;
+
+ assert(s);
+ assert(iovec);
+ assert(n > 0);
+
+ /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
+ * the source time, and not even the time the event was originally seen, but instead simply the time we started
+ * processing it, as we want strictly linear ordering in what we write out.) */
+ assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
+ assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
+
+ if (ts.realtime < s->last_realtime_clock) {
+ /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
+ * regular operation. However, when it does happen, then we should make sure that we start fresh files
+ * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
+ * bisection works correctly. */
+
+ log_debug("Time jumped backwards, rotating.");
+ rotate = true;
+ } else {
+
+ f = find_journal(s, uid);
+ if (!f)
+ return;
+
+ if (journal_file_rotate_suggested(f, s->max_file_usec)) {
+ log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
+ rotate = true;
+ }
+ }
+
+ if (rotate) {
+ server_rotate(s);
+ server_vacuum(s, false);
+ vacuumed = true;
+
+ f = find_journal(s, uid);
+ if (!f)
+ return;
+ }
+
+ s->last_realtime_clock = ts.realtime;
+
+ r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
+ if (r >= 0) {
+ server_schedule_sync(s, priority);
+ return;
+ }
+
+ if (vacuumed || !shall_try_append_again(f, r)) {
+ log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
+ return;
+ }
+
+ server_rotate(s);
+ server_vacuum(s, false);
+
+ f = find_journal(s, uid);
+ if (!f)
+ return;
+
+ log_debug("Retrying write.");
+ r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
+ else
+ server_schedule_sync(s, priority);
+}
+
+#define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
+ if (isset(value)) { \
+ char *k; \
+ k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
+ sprintf(k, field "=" format, value); \
+ iovec[n++] = IOVEC_MAKE_STRING(k); \
+ }
+
+#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
+ if (!isempty(value)) { \
+ char *k; \
+ k = strjoina(field "=", value); \
+ iovec[n++] = IOVEC_MAKE_STRING(k); \
+ }
+
+#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
+ if (!sd_id128_is_null(value)) { \
+ char *k; \
+ k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
+ sd_id128_to_string(value, stpcpy(k, field "=")); \
+ iovec[n++] = IOVEC_MAKE_STRING(k); \
+ }
+
+#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
+ if (value_size > 0) { \
+ char *k; \
+ k = newa(char, STRLEN(field "=") + value_size + 1); \
+ *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
+ iovec[n++] = IOVEC_MAKE_STRING(k); \
+ } \
+
+static void dispatch_message_real(
+ Server *s,
+ struct iovec *iovec, size_t n, size_t m,
+ const ClientContext *c,
+ const struct timeval *tv,
+ int priority,
+ pid_t object_pid) {
+
+ char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
+ _cleanup_free_ char *cmdline1 = NULL, *cmdline2 = NULL;
+ uid_t journal_uid;
+ ClientContext *o;
+
+ assert(s);
+ assert(iovec);
+ assert(n > 0);
+ assert(n +
+ N_IOVEC_META_FIELDS +
+ (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
+ client_context_extra_fields_n_iovec(c) <= m);
+
+ if (c) {
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
+
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM"); /* At most TASK_COMM_LENGTH (16 bytes) */
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE"); /* A path, so at most PATH_MAX (4096 bytes) */
+
+ if (c->cmdline)
+ /* At most _SC_ARG_MAX (2MB usually), which is too much to put on stack.
+ * Let's use a heap allocation for this one. */
+ cmdline1 = set_iovec_string_field(iovec, &n, "_CMDLINE=", c->cmdline);
+
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE"); /* Read from /proc/.../status */
+ IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
+
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP"); /* A path */
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT"); /* Unit names are bounded by UNIT_NAME_MAX */
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
+
+ IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
+
+ if (c->extra_fields_n_iovec > 0) {
+ memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
+ n += c->extra_fields_n_iovec;
+ }
+ }
+
+ assert(n <= m);
+
+ if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
+
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
+
+ /* See above for size limits, only ->cmdline may be large, so use a heap allocation for it. */
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
+ if (o->cmdline)
+ cmdline2 = set_iovec_string_field(iovec, &n, "OBJECT_CMDLINE=", o->cmdline);
+
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
+ IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
+
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
+
+ IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
+ }
+
+ assert(n <= m);
+
+ if (tv) {
+ sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
+ iovec[n++] = IOVEC_MAKE_STRING(source_time);
+ }
+
+ /* Note that strictly speaking storing the boot id here is
+ * redundant since the entry includes this in-line
+ * anyway. However, we need this indexed, too. */
+ if (!isempty(s->boot_id_field))
+ iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
+
+ if (!isempty(s->machine_id_field))
+ iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
+
+ if (!isempty(s->hostname_field))
+ iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
+
+ assert(n <= m);
+
+ if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
+ /* Split up strictly by (non-root) UID */
+ journal_uid = c->uid;
+ else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
+ /* Split up by login UIDs. We do this only if the
+ * realuid is not root, in order not to accidentally
+ * leak privileged information to the user that is
+ * logged by a privileged process that is part of an
+ * unprivileged session. */
+ journal_uid = c->owner_uid;
+ else
+ journal_uid = 0;
+
+ write_to_journal(s, journal_uid, iovec, n, priority);
+}
+
+void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
+
+ struct iovec *iovec;
+ size_t n = 0, k, m;
+ va_list ap;
+ int r;
+
+ assert(s);
+ assert(format);
+
+ m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
+ iovec = newa(struct iovec, m);
+
+ assert_cc(3 == LOG_FAC(LOG_DAEMON));
+ iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
+ iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
+
+ iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
+ assert_cc(6 == LOG_INFO);
+ iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
+
+ if (message_id)
+ iovec[n++] = IOVEC_MAKE_STRING(message_id);
+ k = n;
+
+ va_start(ap, format);
+ r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
+ /* Error handling below */
+ va_end(ap);
+
+ if (r >= 0)
+ dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
+
+ while (k < n)
+ free(iovec[k++].iov_base);
+
+ if (r < 0) {
+ /* We failed to format the message. Emit a warning instead. */
+ char buf[LINE_MAX];
+
+ xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
+
+ n = 3;
+ iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
+ iovec[n++] = IOVEC_MAKE_STRING(buf);
+ dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
+ }
+}
+
+void server_dispatch_message(
+ Server *s,
+ struct iovec *iovec, size_t n, size_t m,
+ ClientContext *c,
+ const struct timeval *tv,
+ int priority,
+ pid_t object_pid) {
+
+ uint64_t available = 0;
+ int rl;
+
+ assert(s);
+ assert(iovec || n == 0);
+
+ if (n == 0)
+ return;
+
+ if (LOG_PRI(priority) > s->max_level_store)
+ return;
+
+ /* Stop early in case the information will not be stored
+ * in a journal. */
+ if (s->storage == STORAGE_NONE)
+ return;
+
+ if (c && c->unit) {
+ (void) determine_space(s, &available, NULL);
+
+ rl = journal_rate_limit_test(s->rate_limit, c->unit, c->log_rate_limit_interval, c->log_rate_limit_burst, priority & LOG_PRIMASK, available);
+ if (rl == 0)
+ return;
+
+ /* Write a suppression message if we suppressed something */
+ if (rl > 1)
+ server_driver_message(s, c->pid,
+ "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
+ LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
+ "N_DROPPED=%i", rl - 1,
+ NULL);
+ }
+
+ dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
+}
+
+int server_flush_to_var(Server *s, bool require_flag_file) {
+ sd_id128_t machine;
+ sd_journal *j = NULL;
+ char ts[FORMAT_TIMESPAN_MAX];
+ usec_t start;
+ unsigned n = 0;
+ int r;
+
+ assert(s);
+
+ if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
+ return 0;
+
+ if (!s->runtime_journal)
+ return 0;
+
+ if (require_flag_file && !flushed_flag_is_set())
+ return 0;
+
+ (void) system_journal_open(s, true);
+
+ if (!s->system_journal)
+ return 0;
+
+ log_debug("Flushing to /var...");
+
+ start = now(CLOCK_MONOTONIC);
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0)
+ return r;
+
+ r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read runtime journal: %m");
+
+ sd_journal_set_data_threshold(j, 0);
+
+ SD_JOURNAL_FOREACH(j) {
+ Object *o = NULL;
+ JournalFile *f;
+
+ f = j->current_file;
+ assert(f && f->current_offset > 0);
+
+ n++;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0) {
+ log_error_errno(r, "Can't read entry: %m");
+ goto finish;
+ }
+
+ r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
+ if (r >= 0)
+ continue;
+
+ if (!shall_try_append_again(s->system_journal, r)) {
+ log_error_errno(r, "Can't write entry: %m");
+ goto finish;
+ }
+
+ server_rotate(s);
+ server_vacuum(s, false);
+
+ if (!s->system_journal) {
+ log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
+ r = -EIO;
+ goto finish;
+ }
+
+ log_debug("Retrying write.");
+ r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
+ if (r < 0) {
+ log_error_errno(r, "Can't write entry: %m");
+ goto finish;
+ }
+ }
+
+ r = 0;
+
+finish:
+ if (s->system_journal)
+ journal_file_post_change(s->system_journal);
+
+ s->runtime_journal = journal_file_close(s->runtime_journal);
+
+ if (r >= 0)
+ (void) rm_rf("/run/log/journal", REMOVE_ROOT);
+
+ sd_journal_close(j);
+
+ server_driver_message(s, 0, NULL,
+ LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
+ format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
+ n),
+ NULL);
+
+ return r;
+}
+
+int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Server *s = userdata;
+ struct ucred *ucred = NULL;
+ struct timeval *tv = NULL;
+ struct cmsghdr *cmsg;
+ char *label = NULL;
+ size_t label_len = 0, m;
+ struct iovec iovec;
+ ssize_t n;
+ int *fds = NULL, v = 0;
+ size_t n_fds = 0;
+
+ union {
+ struct cmsghdr cmsghdr;
+
+ /* We use NAME_MAX space for the SELinux label
+ * here. The kernel currently enforces no
+ * limit, but according to suggestions from
+ * the SELinux people this will change and it
+ * will probably be identical to NAME_MAX. For
+ * now we use that, but this should be updated
+ * one day when the final limit is known. */
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(struct timeval)) +
+ CMSG_SPACE(sizeof(int)) + /* fd */
+ CMSG_SPACE(NAME_MAX)]; /* selinux label */
+ } control = {};
+
+ union sockaddr_union sa = {};
+
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_name = &sa,
+ .msg_namelen = sizeof(sa),
+ };
+
+ assert(s);
+ assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
+
+ if (revents != EPOLLIN)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Got invalid event from epoll for datagram fd: %" PRIx32,
+ revents);
+
+ /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
+ * it.) */
+ (void) ioctl(fd, SIOCINQ, &v);
+
+ /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
+ m = PAGE_ALIGN(MAX3((size_t) v + 1,
+ (size_t) LINE_MAX,
+ ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
+
+ if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
+ return log_oom();
+
+ iovec = IOVEC_MAKE(s->buffer, s->buffer_size - 1); /* Leave room for trailing NUL we add later */
+
+ n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (n < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ return 0;
+
+ return log_error_errno(errno, "recvmsg() failed: %m");
+ }
+
+ CMSG_FOREACH(cmsg, &msghdr)
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+ else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_SECURITY) {
+ label = (char*) CMSG_DATA(cmsg);
+ label_len = cmsg->cmsg_len - CMSG_LEN(0);
+ } else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SO_TIMESTAMP &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
+ tv = (struct timeval*) CMSG_DATA(cmsg);
+ else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+ fds = (int*) CMSG_DATA(cmsg);
+ n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+ }
+
+ /* And a trailing NUL, just in case */
+ s->buffer[n] = 0;
+
+ if (fd == s->syslog_fd) {
+ if (n > 0 && n_fds == 0)
+ server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len);
+ else if (n_fds > 0)
+ log_warning("Got file descriptors via syslog socket. Ignoring.");
+
+ } else if (fd == s->native_fd) {
+ if (n > 0 && n_fds == 0)
+ server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
+ else if (n == 0 && n_fds == 1)
+ server_process_native_file(s, fds[0], ucred, tv, label, label_len);
+ else if (n_fds > 0)
+ log_warning("Got too many file descriptors via native socket. Ignoring.");
+
+ } else {
+ assert(fd == s->audit_fd);
+
+ if (n > 0 && n_fds == 0)
+ server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
+ else if (n_fds > 0)
+ log_warning("Got file descriptors via audit socket. Ignoring.");
+ }
+
+ close_many(fds, n_fds);
+ return 0;
+}
+
+static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+
+ log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
+
+ (void) server_flush_to_var(s, false);
+ server_sync(s);
+ server_vacuum(s, false);
+
+ r = touch("/run/systemd/journal/flushed");
+ if (r < 0)
+ log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
+
+ server_space_usage_message(s, NULL);
+ return 0;
+}
+
+static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+
+ log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
+ server_rotate(s);
+ server_vacuum(s, true);
+
+ if (s->system_journal)
+ patch_min_use(&s->system_storage);
+ if (s->runtime_journal)
+ patch_min_use(&s->runtime_storage);
+
+ /* Let clients know when the most recent rotation happened. */
+ r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
+ if (r < 0)
+ log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
+
+ return 0;
+}
+
+static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
+ Server *s = userdata;
+
+ assert(s);
+
+ log_received_signal(LOG_INFO, si);
+
+ sd_event_exit(s->event, 0);
+ return 0;
+}
+
+static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+
+ log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
+
+ server_sync(s);
+
+ /* Let clients know when the most recent sync happened. */
+ r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
+ if (r < 0)
+ log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
+
+ return 0;
+}
+
+static int setup_signals(Server *s) {
+ int r;
+
+ assert(s);
+
+ assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
+
+ r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
+ if (r < 0)
+ return r;
+
+ /* Let's process SIGTERM late, so that we flush all queued
+ * messages to disk before we exit */
+ r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
+ if (r < 0)
+ return r;
+
+ /* When journald is invoked on the terminal (when debugging),
+ * it's useful if C-c is handled equivalent to SIGTERM. */
+ r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
+ if (r < 0)
+ return r;
+
+ /* SIGRTMIN+1 causes an immediate sync. We process this very
+ * late, so that everything else queued at this point is
+ * really written to disk. Clients can watch
+ * /run/systemd/journal/synced with inotify until its mtime
+ * changes to see when a sync happened. */
+ r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ Server *s = data;
+ int r;
+
+ assert(s);
+
+ if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
+ else
+ s->forward_to_syslog = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
+ else
+ s->forward_to_kmsg = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
+ else
+ s->forward_to_console = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
+ else
+ s->forward_to_wall = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
+ else
+ s->max_level_console = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
+ else
+ s->max_level_store = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
+ else
+ s->max_level_syslog = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
+ else
+ s->max_level_kmsg = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
+ else
+ s->max_level_wall = r;
+
+ } else if (startswith(key, "systemd.journald"))
+ log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
+
+ /* do not warn about state here, since probably systemd already did */
+ return 0;
+}
+
+static int server_parse_config_file(Server *s) {
+ assert(s);
+
+ return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
+ CONF_PATHS_NULSTR("systemd/journald.conf.d"),
+ "Journal\0",
+ config_item_perf_lookup, journald_gperf_lookup,
+ CONFIG_PARSE_WARN, s);
+}
+
+static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
+ Server *s = userdata;
+
+ assert(s);
+
+ server_sync(s);
+ return 0;
+}
+
+int server_schedule_sync(Server *s, int priority) {
+ int r;
+
+ assert(s);
+
+ if (priority <= LOG_CRIT) {
+ /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
+ server_sync(s);
+ return 0;
+ }
+
+ if (s->sync_scheduled)
+ return 0;
+
+ if (s->sync_interval_usec > 0) {
+ usec_t when;
+
+ r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
+ if (r < 0)
+ return r;
+
+ when += s->sync_interval_usec;
+
+ if (!s->sync_event_source) {
+ r = sd_event_add_time(
+ s->event,
+ &s->sync_event_source,
+ CLOCK_MONOTONIC,
+ when, 0,
+ server_dispatch_sync, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
+ } else {
+ r = sd_event_source_set_time(s->sync_event_source, when);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
+ }
+ if (r < 0)
+ return r;
+
+ s->sync_scheduled = true;
+ }
+
+ return 0;
+}
+
+static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Server *s = userdata;
+
+ assert(s);
+
+ server_cache_hostname(s);
+ return 0;
+}
+
+static int server_open_hostname(Server *s) {
+ int r;
+
+ assert(s);
+
+ s->hostname_fd = open("/proc/sys/kernel/hostname",
+ O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (s->hostname_fd < 0)
+ return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
+
+ r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
+ if (r < 0) {
+ /* kernels prior to 3.2 don't support polling this file. Ignore
+ * the failure. */
+ if (r == -EPERM) {
+ log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
+ s->hostname_fd = safe_close(s->hostname_fd);
+ return 0;
+ }
+
+ return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
+ }
+
+ r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
+
+ return 0;
+}
+
+static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+ assert(s->notify_event_source == es);
+ assert(s->notify_fd == fd);
+
+ /* The $NOTIFY_SOCKET is writable again, now send exactly one
+ * message on it. Either it's the watchdog event, the initial
+ * READY=1 event or an stdout stream event. If there's nothing
+ * to write anymore, turn our event source off. The next time
+ * there's something to send it will be turned on again. */
+
+ if (!s->sent_notify_ready) {
+ static const char p[] =
+ "READY=1\n"
+ "STATUS=Processing requests...";
+ ssize_t l;
+
+ l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
+ }
+
+ s->sent_notify_ready = true;
+ log_debug("Sent READY=1 notification.");
+
+ } else if (s->send_watchdog) {
+
+ static const char p[] =
+ "WATCHDOG=1";
+
+ ssize_t l;
+
+ l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
+ }
+
+ s->send_watchdog = false;
+ log_debug("Sent WATCHDOG=1 notification.");
+
+ } else if (s->stdout_streams_notify_queue)
+ /* Dispatch one stream notification event */
+ stdout_stream_send_notify(s->stdout_streams_notify_queue);
+
+ /* Leave us enabled if there's still more to do. */
+ if (s->send_watchdog || s->stdout_streams_notify_queue)
+ return 0;
+
+ /* There was nothing to do anymore, let's turn ourselves off. */
+ r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
+ if (r < 0)
+ return log_error_errno(r, "Failed to turn off notify event source: %m");
+
+ return 0;
+}
+
+static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+
+ s->send_watchdog = true;
+
+ r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
+ if (r < 0)
+ log_warning_errno(r, "Failed to turn on notify event source: %m");
+
+ r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
+ if (r < 0)
+ return log_error_errno(r, "Failed to restart watchdog event source: %m");
+
+ r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable watchdog event source: %m");
+
+ return 0;
+}
+
+static int server_connect_notify(Server *s) {
+ union sockaddr_union sa = {};
+ const char *e;
+ int r, salen;
+
+ assert(s);
+ assert(s->notify_fd < 0);
+ assert(!s->notify_event_source);
+
+ /*
+ So here's the problem: we'd like to send notification
+ messages to PID 1, but we cannot do that via sd_notify(),
+ since that's synchronous, and we might end up blocking on
+ it. Specifically: given that PID 1 might block on
+ dbus-daemon during IPC, and dbus-daemon is logging to us,
+ and might hence block on us, we might end up in a deadlock
+ if we block on sending PID 1 notification messages — by
+ generating a full blocking circle. To avoid this, let's
+ create a non-blocking socket, and connect it to the
+ notification socket, and then wait for POLLOUT before we
+ send anything. This should efficiently avoid any deadlocks,
+ as we'll never block on PID 1, hence PID 1 can safely block
+ on dbus-daemon which can safely block on us again.
+
+ Don't think that this issue is real? It is, see:
+ https://github.com/systemd/systemd/issues/1505
+ */
+
+ e = getenv("NOTIFY_SOCKET");
+ if (!e)
+ return 0;
+
+ salen = sockaddr_un_set_path(&sa.un, e);
+ if (salen < 0)
+ return log_error_errno(salen, "NOTIFY_SOCKET set to invalid value '%s': %m", e);
+
+ s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s->notify_fd < 0)
+ return log_error_errno(errno, "Failed to create notify socket: %m");
+
+ (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
+
+ r = connect(s->notify_fd, &sa.sa, salen);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to connect to notify socket: %m");
+
+ r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch notification socket: %m");
+
+ if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
+ s->send_watchdog = true;
+
+ r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add watchdog time event: %m");
+ }
+
+ /* This should fire pretty soon, which we'll use to send the
+ * READY=1 event. */
+
+ return 0;
+}
+
+int server_init(Server *s) {
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ int n, r, fd;
+ bool no_sockets;
+
+ assert(s);
+
+ zero(*s);
+ s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
+ s->compress.enabled = true;
+ s->compress.threshold_bytes = (uint64_t) -1;
+ s->seal = true;
+ s->read_kmsg = true;
+
+ s->watchdog_usec = USEC_INFINITY;
+
+ s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
+ s->sync_scheduled = false;
+
+ s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
+ s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
+
+ s->forward_to_wall = true;
+
+ s->max_file_usec = DEFAULT_MAX_FILE_USEC;
+
+ s->max_level_store = LOG_DEBUG;
+ s->max_level_syslog = LOG_DEBUG;
+ s->max_level_kmsg = LOG_NOTICE;
+ s->max_level_console = LOG_INFO;
+ s->max_level_wall = LOG_EMERG;
+
+ s->line_max = DEFAULT_LINE_MAX;
+
+ journal_reset_metrics(&s->system_storage.metrics);
+ journal_reset_metrics(&s->runtime_storage.metrics);
+
+ server_parse_config_file(s);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
+ log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
+ s->rate_limit_interval, s->rate_limit_burst);
+ s->rate_limit_interval = s->rate_limit_burst = 0;
+ }
+
+ (void) mkdir_p("/run/systemd/journal", 0755);
+
+ s->user_journals = ordered_hashmap_new(NULL);
+ if (!s->user_journals)
+ return log_oom();
+
+ s->mmap = mmap_cache_new();
+ if (!s->mmap)
+ return log_oom();
+
+ s->deferred_closes = set_new(NULL);
+ if (!s->deferred_closes)
+ return log_oom();
+
+ r = sd_event_default(&s->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create event loop: %m");
+
+ n = sd_listen_fds(true);
+ if (n < 0)
+ return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
+
+ if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
+
+ if (s->native_fd >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many native sockets passed.");
+
+ s->native_fd = fd;
+
+ } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
+
+ if (s->stdout_fd >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many stdout sockets passed.");
+
+ s->stdout_fd = fd;
+
+ } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
+ sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
+
+ if (s->syslog_fd >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many /dev/log sockets passed.");
+
+ s->syslog_fd = fd;
+
+ } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
+
+ if (s->audit_fd >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many audit sockets passed.");
+
+ s->audit_fd = fd;
+
+ } else {
+
+ if (!fds) {
+ fds = fdset_new();
+ if (!fds)
+ return log_oom();
+ }
+
+ r = fdset_put(fds, fd);
+ if (r < 0)
+ return log_oom();
+ }
+ }
+
+ /* Try to restore streams, but don't bother if this fails */
+ (void) server_restore_streams(s, fds);
+
+ if (fdset_size(fds) > 0) {
+ log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
+ fds = fdset_free(fds);
+ }
+
+ no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
+
+ /* always open stdout, syslog, native, and kmsg sockets */
+
+ /* systemd-journald.socket: /run/systemd/journal/stdout */
+ r = server_open_stdout_socket(s);
+ if (r < 0)
+ return r;
+
+ /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
+ r = server_open_syslog_socket(s);
+ if (r < 0)
+ return r;
+
+ /* systemd-journald.socket: /run/systemd/journal/socket */
+ r = server_open_native_socket(s);
+ if (r < 0)
+ return r;
+
+ /* /dev/kmsg */
+ r = server_open_dev_kmsg(s);
+ if (r < 0)
+ return r;
+
+ /* Unless we got *some* sockets and not audit, open audit socket */
+ if (s->audit_fd >= 0 || no_sockets) {
+ r = server_open_audit(s);
+ if (r < 0)
+ return r;
+ }
+
+ r = server_open_kernel_seqnum(s);
+ if (r < 0)
+ return r;
+
+ r = server_open_hostname(s);
+ if (r < 0)
+ return r;
+
+ r = setup_signals(s);
+ if (r < 0)
+ return r;
+
+ s->rate_limit = journal_rate_limit_new();
+ if (!s->rate_limit)
+ return -ENOMEM;
+
+ r = cg_get_root_path(&s->cgroup_root);
+ if (r < 0)
+ return r;
+
+ server_cache_hostname(s);
+ server_cache_boot_id(s);
+ server_cache_machine_id(s);
+
+ s->runtime_storage.name = "Runtime journal";
+ s->system_storage.name = "System journal";
+
+ s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
+ s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
+ if (!s->runtime_storage.path || !s->system_storage.path)
+ return -ENOMEM;
+
+ (void) server_connect_notify(s);
+
+ (void) client_context_acquire_default(s);
+
+ return system_journal_open(s, false);
+}
+
+void server_maybe_append_tags(Server *s) {
+#if HAVE_GCRYPT
+ JournalFile *f;
+ Iterator i;
+ usec_t n;
+
+ n = now(CLOCK_REALTIME);
+
+ if (s->system_journal)
+ journal_file_maybe_append_tag(s->system_journal, n);
+
+ ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
+ journal_file_maybe_append_tag(f, n);
+#endif
+}
+
+void server_done(Server *s) {
+ assert(s);
+
+ set_free_with_destructor(s->deferred_closes, journal_file_close);
+
+ while (s->stdout_streams)
+ stdout_stream_free(s->stdout_streams);
+
+ client_context_flush_all(s);
+
+ if (s->system_journal)
+ (void) journal_file_close(s->system_journal);
+
+ if (s->runtime_journal)
+ (void) journal_file_close(s->runtime_journal);
+
+ ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
+
+ sd_event_source_unref(s->syslog_event_source);
+ sd_event_source_unref(s->native_event_source);
+ sd_event_source_unref(s->stdout_event_source);
+ sd_event_source_unref(s->dev_kmsg_event_source);
+ sd_event_source_unref(s->audit_event_source);
+ sd_event_source_unref(s->sync_event_source);
+ sd_event_source_unref(s->sigusr1_event_source);
+ sd_event_source_unref(s->sigusr2_event_source);
+ sd_event_source_unref(s->sigterm_event_source);
+ sd_event_source_unref(s->sigint_event_source);
+ sd_event_source_unref(s->sigrtmin1_event_source);
+ sd_event_source_unref(s->hostname_event_source);
+ sd_event_source_unref(s->notify_event_source);
+ sd_event_source_unref(s->watchdog_event_source);
+ sd_event_unref(s->event);
+
+ safe_close(s->syslog_fd);
+ safe_close(s->native_fd);
+ safe_close(s->stdout_fd);
+ safe_close(s->dev_kmsg_fd);
+ safe_close(s->audit_fd);
+ safe_close(s->hostname_fd);
+ safe_close(s->notify_fd);
+
+ if (s->rate_limit)
+ journal_rate_limit_free(s->rate_limit);
+
+ if (s->kernel_seqnum)
+ munmap(s->kernel_seqnum, sizeof(uint64_t));
+
+ free(s->buffer);
+ free(s->tty_path);
+ free(s->cgroup_root);
+ free(s->hostname_field);
+ free(s->runtime_storage.path);
+ free(s->system_storage.path);
+
+ if (s->mmap)
+ mmap_cache_unref(s->mmap);
+}
+
+static const char* const storage_table[_STORAGE_MAX] = {
+ [STORAGE_AUTO] = "auto",
+ [STORAGE_VOLATILE] = "volatile",
+ [STORAGE_PERSISTENT] = "persistent",
+ [STORAGE_NONE] = "none"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
+
+static const char* const split_mode_table[_SPLIT_MAX] = {
+ [SPLIT_LOGIN] = "login",
+ [SPLIT_UID] = "uid",
+ [SPLIT_NONE] = "none",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
+
+int config_parse_line_max(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ size_t *sz = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue))
+ /* Empty assignment means default */
+ *sz = DEFAULT_LINE_MAX;
+ else {
+ uint64_t v;
+
+ r = parse_size(rvalue, 1024, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (v < 79) {
+ /* Why specify 79 here as minimum line length? Simply, because the most common traditional
+ * terminal size is 80ch, and it might make sense to break one character before the natural
+ * line break would occur on that. */
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
+ *sz = 79;
+ } else if (v > (uint64_t) (SSIZE_MAX-1)) {
+ /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
+ * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
+ * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
+ * fail much earlier anyway. */
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
+ *sz = SSIZE_MAX-1;
+ } else
+ *sz = (size_t) v;
+ }
+
+ return 0;
+}
+
+int config_parse_compress(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ JournalCompressOptions* compress = data;
+ int r;
+
+ if (streq(rvalue, "1")) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Compress= ambiguously specified as 1, enabling compression with default threshold");
+ compress->enabled = true;
+ } else if (streq(rvalue, "0")) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Compress= ambiguously specified as 0, disabling compression");
+ compress->enabled = false;
+ } else if ((r = parse_boolean(rvalue)) >= 0)
+ compress->enabled = r;
+ else if (parse_size(rvalue, 1024, &compress->threshold_bytes) == 0)
+ compress->enabled = true;
+ else if (isempty(rvalue)) {
+ compress->enabled = true;
+ compress->threshold_bytes = (uint64_t) -1;
+ } else
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Compress= value, ignoring: %s", rvalue);
+
+ return 0;
+}
diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h
new file mode 100644
index 0000000..3f6b42d
--- /dev/null
+++ b/src/journal/journald-server.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-event.h"
+
+typedef struct Server Server;
+
+#include "conf-parser.h"
+#include "hashmap.h"
+#include "journal-file.h"
+#include "journald-context.h"
+#include "journald-rate-limit.h"
+#include "journald-stream.h"
+#include "list.h"
+#include "prioq.h"
+
+typedef enum Storage {
+ STORAGE_AUTO,
+ STORAGE_VOLATILE,
+ STORAGE_PERSISTENT,
+ STORAGE_NONE,
+ _STORAGE_MAX,
+ _STORAGE_INVALID = -1
+} Storage;
+
+typedef enum SplitMode {
+ SPLIT_UID,
+ SPLIT_LOGIN, /* deprecated */
+ SPLIT_NONE,
+ _SPLIT_MAX,
+ _SPLIT_INVALID = -1
+} SplitMode;
+
+typedef struct JournalCompressOptions {
+ bool enabled;
+ uint64_t threshold_bytes;
+} JournalCompressOptions;
+
+typedef struct JournalStorageSpace {
+ usec_t timestamp;
+
+ uint64_t available;
+ uint64_t limit;
+
+ uint64_t vfs_used; /* space used by journal files */
+ uint64_t vfs_available;
+} JournalStorageSpace;
+
+typedef struct JournalStorage {
+ const char *name;
+ char *path;
+
+ JournalMetrics metrics;
+ JournalStorageSpace space;
+} JournalStorage;
+
+struct Server {
+ int syslog_fd;
+ int native_fd;
+ int stdout_fd;
+ int dev_kmsg_fd;
+ int audit_fd;
+ int hostname_fd;
+ int notify_fd;
+
+ sd_event *event;
+
+ sd_event_source *syslog_event_source;
+ sd_event_source *native_event_source;
+ sd_event_source *stdout_event_source;
+ sd_event_source *dev_kmsg_event_source;
+ sd_event_source *audit_event_source;
+ sd_event_source *sync_event_source;
+ sd_event_source *sigusr1_event_source;
+ sd_event_source *sigusr2_event_source;
+ sd_event_source *sigterm_event_source;
+ sd_event_source *sigint_event_source;
+ sd_event_source *sigrtmin1_event_source;
+ sd_event_source *hostname_event_source;
+ sd_event_source *notify_event_source;
+ sd_event_source *watchdog_event_source;
+
+ JournalFile *runtime_journal;
+ JournalFile *system_journal;
+ OrderedHashmap *user_journals;
+
+ uint64_t seqnum;
+
+ char *buffer;
+ size_t buffer_size;
+
+ JournalRateLimit *rate_limit;
+ usec_t sync_interval_usec;
+ usec_t rate_limit_interval;
+ unsigned rate_limit_burst;
+
+ JournalStorage runtime_storage;
+ JournalStorage system_storage;
+
+ JournalCompressOptions compress;
+ bool seal;
+ bool read_kmsg;
+
+ bool forward_to_kmsg;
+ bool forward_to_syslog;
+ bool forward_to_console;
+ bool forward_to_wall;
+
+ unsigned n_forward_syslog_missed;
+ usec_t last_warn_forward_syslog_missed;
+
+ uint64_t var_available_timestamp;
+
+ usec_t max_retention_usec;
+ usec_t max_file_usec;
+ usec_t oldest_file_usec;
+
+ LIST_HEAD(StdoutStream, stdout_streams);
+ LIST_HEAD(StdoutStream, stdout_streams_notify_queue);
+ unsigned n_stdout_streams;
+
+ char *tty_path;
+
+ int max_level_store;
+ int max_level_syslog;
+ int max_level_kmsg;
+ int max_level_console;
+ int max_level_wall;
+
+ Storage storage;
+ SplitMode split_mode;
+
+ MMapCache *mmap;
+
+ Set *deferred_closes;
+
+ uint64_t *kernel_seqnum;
+ bool dev_kmsg_readable:1;
+
+ bool send_watchdog:1;
+ bool sent_notify_ready:1;
+ bool sync_scheduled:1;
+
+ char machine_id_field[sizeof("_MACHINE_ID=") + 32];
+ char boot_id_field[sizeof("_BOOT_ID=") + 32];
+ char *hostname_field;
+
+ /* Cached cgroup root, so that we don't have to query that all the time */
+ char *cgroup_root;
+
+ usec_t watchdog_usec;
+
+ usec_t last_realtime_clock;
+
+ size_t line_max;
+
+ /* Caching of client metadata */
+ Hashmap *client_contexts;
+ Prioq *client_contexts_lru;
+
+ usec_t last_cache_pid_flush;
+
+ ClientContext *my_context; /* the context of journald itself */
+ ClientContext *pid1_context; /* the context of PID 1 */
+};
+
+#define SERVER_MACHINE_ID(s) ((s)->machine_id_field + STRLEN("_MACHINE_ID="))
+
+/* Extra fields for any log messages */
+#define N_IOVEC_META_FIELDS 22
+
+/* Extra fields for log messages that contain OBJECT_PID= (i.e. log about another process) */
+#define N_IOVEC_OBJECT_FIELDS 18
+
+/* Maximum number of fields we'll add in for driver (i.e. internal) messages */
+#define N_IOVEC_PAYLOAD_FIELDS 16
+
+/* kmsg: Maximum number of extra fields we'll import from the kernel's /dev/kmsg */
+#define N_IOVEC_KERNEL_FIELDS 64
+
+/* kmsg: Maximum number of extra fields we'll import from udev's devices */
+#define N_IOVEC_UDEV_FIELDS 32
+
+void server_dispatch_message(Server *s, struct iovec *iovec, size_t n, size_t m, ClientContext *c, const struct timeval *tv, int priority, pid_t object_pid);
+void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) _sentinel_ _printf_(4,0);
+
+/* gperf lookup function */
+const struct ConfigPerfItem* journald_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_storage);
+CONFIG_PARSER_PROTOTYPE(config_parse_line_max);
+CONFIG_PARSER_PROTOTYPE(config_parse_compress);
+
+const char *storage_to_string(Storage s) _const_;
+Storage storage_from_string(const char *s) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_split_mode);
+
+const char *split_mode_to_string(SplitMode s) _const_;
+SplitMode split_mode_from_string(const char *s) _pure_;
+
+int server_init(Server *s);
+void server_done(Server *s);
+void server_sync(Server *s);
+int server_vacuum(Server *s, bool verbose);
+void server_rotate(Server *s);
+int server_schedule_sync(Server *s, int priority);
+int server_flush_to_var(Server *s, bool require_flag_file);
+void server_maybe_append_tags(Server *s);
+int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata);
+void server_space_usage_message(Server *s, JournalStorage *storage);
diff --git a/src/journal/journald-stream.c b/src/journal/journald-stream.c
new file mode 100644
index 0000000..137c8f0
--- /dev/null
+++ b/src/journal/journald-stream.c
@@ -0,0 +1,876 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stddef.h>
+#include <unistd.h>
+
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "journald-console.h"
+#include "journald-context.h"
+#include "journald-kmsg.h"
+#include "journald-server.h"
+#include "journald-stream.h"
+#include "journald-syslog.h"
+#include "journald-wall.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "tmpfile-util.h"
+#include "unit-name.h"
+
+#define STDOUT_STREAMS_MAX 4096
+
+typedef enum StdoutStreamState {
+ STDOUT_STREAM_IDENTIFIER,
+ STDOUT_STREAM_UNIT_ID,
+ STDOUT_STREAM_PRIORITY,
+ STDOUT_STREAM_LEVEL_PREFIX,
+ STDOUT_STREAM_FORWARD_TO_SYSLOG,
+ STDOUT_STREAM_FORWARD_TO_KMSG,
+ STDOUT_STREAM_FORWARD_TO_CONSOLE,
+ STDOUT_STREAM_RUNNING
+} StdoutStreamState;
+
+/* The different types of log record terminators: a real \n was read, a NUL character was read, the maximum line length
+ * was reached, or the end of the stream was reached */
+
+typedef enum LineBreak {
+ LINE_BREAK_NEWLINE,
+ LINE_BREAK_NUL,
+ LINE_BREAK_LINE_MAX,
+ LINE_BREAK_EOF,
+} LineBreak;
+
+struct StdoutStream {
+ Server *server;
+ StdoutStreamState state;
+
+ int fd;
+
+ struct ucred ucred;
+ char *label;
+ char *identifier;
+ char *unit_id;
+ int priority;
+ bool level_prefix:1;
+ bool forward_to_syslog:1;
+ bool forward_to_kmsg:1;
+ bool forward_to_console:1;
+
+ bool fdstore:1;
+ bool in_notify_queue:1;
+
+ char *buffer;
+ size_t length;
+ size_t allocated;
+
+ sd_event_source *event_source;
+
+ char *state_file;
+
+ ClientContext *context;
+
+ LIST_FIELDS(StdoutStream, stdout_stream);
+ LIST_FIELDS(StdoutStream, stdout_stream_notify_queue);
+
+ char id_field[STRLEN("_STREAM_ID=") + SD_ID128_STRING_MAX];
+};
+
+void stdout_stream_free(StdoutStream *s) {
+ if (!s)
+ return;
+
+ if (s->server) {
+
+ if (s->context)
+ client_context_release(s->server, s->context);
+
+ assert(s->server->n_stdout_streams > 0);
+ s->server->n_stdout_streams--;
+ LIST_REMOVE(stdout_stream, s->server->stdout_streams, s);
+
+ if (s->in_notify_queue)
+ LIST_REMOVE(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s);
+ }
+
+ if (s->event_source) {
+ sd_event_source_set_enabled(s->event_source, SD_EVENT_OFF);
+ s->event_source = sd_event_source_unref(s->event_source);
+ }
+
+ safe_close(s->fd);
+ free(s->label);
+ free(s->identifier);
+ free(s->unit_id);
+ free(s->state_file);
+ free(s->buffer);
+
+ free(s);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(StdoutStream*, stdout_stream_free);
+
+void stdout_stream_destroy(StdoutStream *s) {
+ if (!s)
+ return;
+
+ if (s->state_file)
+ (void) unlink(s->state_file);
+
+ stdout_stream_free(s);
+}
+
+static int stdout_stream_save(StdoutStream *s) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(s);
+
+ if (s->state != STDOUT_STREAM_RUNNING)
+ return 0;
+
+ if (!s->state_file) {
+ struct stat st;
+
+ r = fstat(s->fd, &st);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to stat connected stream: %m");
+
+ /* We use device and inode numbers as identifier for the stream */
+ if (asprintf(&s->state_file, "/run/systemd/journal/streams/%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
+ return log_oom();
+ }
+
+ mkdir_p("/run/systemd/journal/streams", 0755);
+
+ r = fopen_temporary(s->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ fprintf(f,
+ "# This is private data. Do not parse\n"
+ "PRIORITY=%i\n"
+ "LEVEL_PREFIX=%i\n"
+ "FORWARD_TO_SYSLOG=%i\n"
+ "FORWARD_TO_KMSG=%i\n"
+ "FORWARD_TO_CONSOLE=%i\n"
+ "STREAM_ID=%s\n",
+ s->priority,
+ s->level_prefix,
+ s->forward_to_syslog,
+ s->forward_to_kmsg,
+ s->forward_to_console,
+ s->id_field + STRLEN("_STREAM_ID="));
+
+ if (!isempty(s->identifier)) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->identifier);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "IDENTIFIER=%s\n", escaped);
+ }
+
+ if (!isempty(s->unit_id)) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->unit_id);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "UNIT=%s\n", escaped);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, s->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (!s->fdstore && !s->in_notify_queue) {
+ LIST_PREPEND(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s);
+ s->in_notify_queue = true;
+
+ if (s->server->notify_event_source) {
+ r = sd_event_source_set_enabled(s->server->notify_event_source, SD_EVENT_ON);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable notify event source: %m");
+ }
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(s->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save stream data %s: %m", s->state_file);
+}
+
+static int stdout_stream_log(StdoutStream *s, const char *p, LineBreak line_break) {
+ struct iovec *iovec;
+ int priority;
+ char syslog_priority[] = "PRIORITY=\0";
+ char syslog_facility[STRLEN("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int) + 1];
+ _cleanup_free_ char *message = NULL, *syslog_identifier = NULL;
+ size_t n = 0, m;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ if (s->context)
+ (void) client_context_maybe_refresh(s->server, s->context, NULL, NULL, 0, NULL, USEC_INFINITY);
+ else if (pid_is_valid(s->ucred.pid)) {
+ r = client_context_acquire(s->server, s->ucred.pid, &s->ucred, s->label, strlen_ptr(s->label), s->unit_id, &s->context);
+ if (r < 0)
+ log_warning_errno(r, "Failed to acquire client context, ignoring: %m");
+ }
+
+ priority = s->priority;
+
+ if (s->level_prefix)
+ syslog_parse_priority(&p, &priority, false);
+
+ if (!client_context_test_priority(s->context, priority))
+ return 0;
+
+ if (isempty(p))
+ return 0;
+
+ if (s->forward_to_syslog || s->server->forward_to_syslog)
+ server_forward_syslog(s->server, syslog_fixup_facility(priority), s->identifier, p, &s->ucred, NULL);
+
+ if (s->forward_to_kmsg || s->server->forward_to_kmsg)
+ server_forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
+
+ if (s->forward_to_console || s->server->forward_to_console)
+ server_forward_console(s->server, priority, s->identifier, p, &s->ucred);
+
+ if (s->server->forward_to_wall)
+ server_forward_wall(s->server, priority, s->identifier, p, &s->ucred);
+
+ m = N_IOVEC_META_FIELDS + 7 + client_context_extra_fields_n_iovec(s->context);
+ iovec = newa(struct iovec, m);
+
+ iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=stdout");
+ iovec[n++] = IOVEC_MAKE_STRING(s->id_field);
+
+ syslog_priority[STRLEN("PRIORITY=")] = '0' + LOG_PRI(priority);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
+
+ if (priority & LOG_FACMASK) {
+ xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
+ }
+
+ if (s->identifier) {
+ syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
+ if (syslog_identifier)
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
+ }
+
+ if (line_break != LINE_BREAK_NEWLINE) {
+ const char *c;
+
+ /* If this log message was generated due to an uncommon line break then mention this in the log
+ * entry */
+
+ c = line_break == LINE_BREAK_NUL ? "_LINE_BREAK=nul" :
+ line_break == LINE_BREAK_LINE_MAX ? "_LINE_BREAK=line-max" :
+ "_LINE_BREAK=eof";
+ iovec[n++] = IOVEC_MAKE_STRING(c);
+ }
+
+ message = strappend("MESSAGE=", p);
+ if (message)
+ iovec[n++] = IOVEC_MAKE_STRING(message);
+
+ server_dispatch_message(s->server, iovec, n, m, s->context, NULL, priority, 0);
+ return 0;
+}
+
+static int stdout_stream_line(StdoutStream *s, char *p, LineBreak line_break) {
+ int r;
+ char *orig;
+
+ assert(s);
+ assert(p);
+
+ orig = p;
+ p = strstrip(p);
+
+ /* line breaks by NUL, line max length or EOF are not permissible during the negotiation part of the protocol */
+ if (line_break != LINE_BREAK_NEWLINE && s->state != STDOUT_STREAM_RUNNING) {
+ log_warning("Control protocol line not properly terminated.");
+ return -EINVAL;
+ }
+
+ switch (s->state) {
+
+ case STDOUT_STREAM_IDENTIFIER:
+ if (!isempty(p)) {
+ s->identifier = strdup(p);
+ if (!s->identifier)
+ return log_oom();
+ }
+
+ s->state = STDOUT_STREAM_UNIT_ID;
+ return 0;
+
+ case STDOUT_STREAM_UNIT_ID:
+ if (s->ucred.uid == 0 &&
+ unit_name_is_valid(p, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) {
+
+ s->unit_id = strdup(p);
+ if (!s->unit_id)
+ return log_oom();
+ }
+
+ s->state = STDOUT_STREAM_PRIORITY;
+ return 0;
+
+ case STDOUT_STREAM_PRIORITY:
+ r = safe_atoi(p, &s->priority);
+ if (r < 0 || s->priority < 0 || s->priority > 999) {
+ log_warning("Failed to parse log priority line.");
+ return -EINVAL;
+ }
+
+ s->state = STDOUT_STREAM_LEVEL_PREFIX;
+ return 0;
+
+ case STDOUT_STREAM_LEVEL_PREFIX:
+ r = parse_boolean(p);
+ if (r < 0) {
+ log_warning("Failed to parse level prefix line.");
+ return -EINVAL;
+ }
+
+ s->level_prefix = r;
+ s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
+ return 0;
+
+ case STDOUT_STREAM_FORWARD_TO_SYSLOG:
+ r = parse_boolean(p);
+ if (r < 0) {
+ log_warning("Failed to parse forward to syslog line.");
+ return -EINVAL;
+ }
+
+ s->forward_to_syslog = r;
+ s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
+ return 0;
+
+ case STDOUT_STREAM_FORWARD_TO_KMSG:
+ r = parse_boolean(p);
+ if (r < 0) {
+ log_warning("Failed to parse copy to kmsg line.");
+ return -EINVAL;
+ }
+
+ s->forward_to_kmsg = r;
+ s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
+ return 0;
+
+ case STDOUT_STREAM_FORWARD_TO_CONSOLE:
+ r = parse_boolean(p);
+ if (r < 0) {
+ log_warning("Failed to parse copy to console line.");
+ return -EINVAL;
+ }
+
+ s->forward_to_console = r;
+ s->state = STDOUT_STREAM_RUNNING;
+
+ /* Try to save the stream, so that journald can be restarted and we can recover */
+ (void) stdout_stream_save(s);
+ return 0;
+
+ case STDOUT_STREAM_RUNNING:
+ return stdout_stream_log(s, orig, line_break);
+ }
+
+ assert_not_reached("Unknown stream state");
+}
+
+static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
+ char *p;
+ size_t remaining;
+ int r;
+
+ assert(s);
+
+ p = s->buffer;
+ remaining = s->length;
+
+ /* XXX: This function does nothing if (s->length == 0) */
+
+ for (;;) {
+ LineBreak line_break;
+ size_t skip;
+ char *end1, *end2;
+
+ end1 = memchr(p, '\n', remaining);
+ end2 = memchr(p, 0, end1 ? (size_t) (end1 - p) : remaining);
+
+ if (end2) {
+ /* We found a NUL terminator */
+ skip = end2 - p + 1;
+ line_break = LINE_BREAK_NUL;
+ } else if (end1) {
+ /* We found a \n terminator */
+ *end1 = 0;
+ skip = end1 - p + 1;
+ line_break = LINE_BREAK_NEWLINE;
+ } else if (remaining >= s->server->line_max) {
+ /* Force a line break after the maximum line length */
+ *(p + s->server->line_max) = 0;
+ skip = remaining;
+ line_break = LINE_BREAK_LINE_MAX;
+ } else
+ break;
+
+ r = stdout_stream_line(s, p, line_break);
+ if (r < 0)
+ return r;
+
+ remaining -= skip;
+ p += skip;
+ }
+
+ if (force_flush && remaining > 0) {
+ p[remaining] = 0;
+ r = stdout_stream_line(s, p, LINE_BREAK_EOF);
+ if (r < 0)
+ return r;
+
+ p += remaining;
+ remaining = 0;
+ }
+
+ if (p > s->buffer) {
+ memmove(s->buffer, p, remaining);
+ s->length = remaining;
+ }
+
+ return 0;
+}
+
+static int stdout_stream_process(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ StdoutStream *s = userdata;
+ size_t limit;
+ ssize_t l;
+ int r;
+
+ assert(s);
+
+ if ((revents|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
+ log_error("Got invalid event from epoll for stdout stream: %"PRIx32, revents);
+ goto terminate;
+ }
+
+ /* If the buffer is full already (discounting the extra NUL we need), add room for another 1K */
+ if (s->length + 1 >= s->allocated) {
+ if (!GREEDY_REALLOC(s->buffer, s->allocated, s->length + 1 + 1024)) {
+ log_oom();
+ goto terminate;
+ }
+ }
+
+ /* Try to make use of the allocated buffer in full, but never read more than the configured line size. Also,
+ * always leave room for a terminating NUL we might need to add. */
+ limit = MIN(s->allocated - 1, s->server->line_max);
+
+ l = read(s->fd, s->buffer + s->length, limit - s->length);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ log_warning_errno(errno, "Failed to read from stream: %m");
+ goto terminate;
+ }
+
+ if (l == 0) {
+ stdout_stream_scan(s, true);
+ goto terminate;
+ }
+
+ s->length += l;
+ r = stdout_stream_scan(s, false);
+ if (r < 0)
+ goto terminate;
+
+ return 1;
+
+terminate:
+ stdout_stream_destroy(s);
+ return 0;
+}
+
+int stdout_stream_install(Server *s, int fd, StdoutStream **ret) {
+ _cleanup_(stdout_stream_freep) StdoutStream *stream = NULL;
+ sd_id128_t id;
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ r = sd_id128_randomize(&id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate stream ID: %m");
+
+ stream = new0(StdoutStream, 1);
+ if (!stream)
+ return log_oom();
+
+ stream->fd = -1;
+ stream->priority = LOG_INFO;
+
+ xsprintf(stream->id_field, "_STREAM_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id));
+
+ r = getpeercred(fd, &stream->ucred);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine peer credentials: %m");
+
+ if (mac_selinux_use()) {
+ r = getpeersec(fd, &stream->label);
+ if (r < 0 && r != -EOPNOTSUPP)
+ (void) log_warning_errno(r, "Failed to determine peer security context: %m");
+ }
+
+ (void) shutdown(fd, SHUT_WR);
+
+ r = sd_event_add_io(s->event, &stream->event_source, fd, EPOLLIN, stdout_stream_process, stream);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add stream to event loop: %m");
+
+ r = sd_event_source_set_priority(stream->event_source, SD_EVENT_PRIORITY_NORMAL+5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust stdout event source priority: %m");
+
+ stream->fd = fd;
+
+ stream->server = s;
+ LIST_PREPEND(stdout_stream, s->stdout_streams, stream);
+ s->n_stdout_streams++;
+
+ if (ret)
+ *ret = stream;
+
+ stream = NULL;
+
+ return 0;
+}
+
+static int stdout_stream_new(sd_event_source *es, int listen_fd, uint32_t revents, void *userdata) {
+ _cleanup_close_ int fd = -1;
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+
+ if (revents != EPOLLIN)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Got invalid event from epoll for stdout server fd: %" PRIx32,
+ revents);
+
+ fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (fd < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ return log_error_errno(errno, "Failed to accept stdout connection: %m");
+ }
+
+ if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
+ struct ucred u;
+
+ r = getpeercred(fd, &u);
+
+ /* By closing fd here we make sure that the client won't wait too long for journald to
+ * gather all the data it adds to the error message to find out that the connection has
+ * just been refused.
+ */
+ fd = safe_close(fd);
+
+ server_driver_message(s, r < 0 ? 0 : u.pid, NULL, LOG_MESSAGE("Too many stdout streams, refusing connection."), NULL);
+ return 0;
+ }
+
+ r = stdout_stream_install(s, fd, NULL);
+ if (r < 0)
+ return r;
+
+ fd = -1;
+ return 0;
+}
+
+static int stdout_stream_load(StdoutStream *stream, const char *fname) {
+ _cleanup_free_ char
+ *priority = NULL,
+ *level_prefix = NULL,
+ *forward_to_syslog = NULL,
+ *forward_to_kmsg = NULL,
+ *forward_to_console = NULL,
+ *stream_id = NULL;
+ int r;
+
+ assert(stream);
+ assert(fname);
+
+ if (!stream->state_file) {
+ stream->state_file = strappend("/run/systemd/journal/streams/", fname);
+ if (!stream->state_file)
+ return log_oom();
+ }
+
+ r = parse_env_file(NULL, stream->state_file,
+ "PRIORITY", &priority,
+ "LEVEL_PREFIX", &level_prefix,
+ "FORWARD_TO_SYSLOG", &forward_to_syslog,
+ "FORWARD_TO_KMSG", &forward_to_kmsg,
+ "FORWARD_TO_CONSOLE", &forward_to_console,
+ "IDENTIFIER", &stream->identifier,
+ "UNIT", &stream->unit_id,
+ "STREAM_ID", &stream_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read: %s", stream->state_file);
+
+ if (priority) {
+ int p;
+
+ p = log_level_from_string(priority);
+ if (p >= 0)
+ stream->priority = p;
+ }
+
+ if (level_prefix) {
+ r = parse_boolean(level_prefix);
+ if (r >= 0)
+ stream->level_prefix = r;
+ }
+
+ if (forward_to_syslog) {
+ r = parse_boolean(forward_to_syslog);
+ if (r >= 0)
+ stream->forward_to_syslog = r;
+ }
+
+ if (forward_to_kmsg) {
+ r = parse_boolean(forward_to_kmsg);
+ if (r >= 0)
+ stream->forward_to_kmsg = r;
+ }
+
+ if (forward_to_console) {
+ r = parse_boolean(forward_to_console);
+ if (r >= 0)
+ stream->forward_to_console = r;
+ }
+
+ if (stream_id) {
+ sd_id128_t id;
+
+ r = sd_id128_from_string(stream_id, &id);
+ if (r >= 0)
+ xsprintf(stream->id_field, "_STREAM_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id));
+ }
+
+ return 0;
+}
+
+static int stdout_stream_restore(Server *s, const char *fname, int fd) {
+ StdoutStream *stream;
+ int r;
+
+ assert(s);
+ assert(fname);
+ assert(fd >= 0);
+
+ if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
+ log_warning("Too many stdout streams, refusing restoring of stream.");
+ return -ENOBUFS;
+ }
+
+ r = stdout_stream_install(s, fd, &stream);
+ if (r < 0)
+ return r;
+
+ stream->state = STDOUT_STREAM_RUNNING;
+ stream->fdstore = true;
+
+ /* Ignore all parsing errors */
+ (void) stdout_stream_load(stream, fname);
+
+ return 0;
+}
+
+int server_restore_streams(Server *s, FDSet *fds) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r;
+
+ d = opendir("/run/systemd/journal/streams");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to enumerate /run/systemd/journal/streams: %m");
+ }
+
+ FOREACH_DIRENT(de, d, goto fail) {
+ unsigned long st_dev, st_ino;
+ bool found = false;
+ Iterator i;
+ int fd;
+
+ if (sscanf(de->d_name, "%lu:%lu", &st_dev, &st_ino) != 2)
+ continue;
+
+ FDSET_FOREACH(fd, fds, i) {
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat %s: %m", de->d_name);
+
+ if (S_ISSOCK(st.st_mode) && st.st_dev == st_dev && st.st_ino == st_ino) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ /* No file descriptor? Then let's delete the state file */
+ log_debug("Cannot restore stream file %s", de->d_name);
+ if (unlinkat(dirfd(d), de->d_name, 0) < 0)
+ log_warning_errno(errno, "Failed to remove /run/systemd/journal/streams/%s: %m",
+ de->d_name);
+ continue;
+ }
+
+ fdset_remove(fds, fd);
+
+ r = stdout_stream_restore(s, de->d_name, fd);
+ if (r < 0)
+ safe_close(fd);
+ }
+
+ return 0;
+
+fail:
+ return log_error_errno(errno, "Failed to read streams directory: %m");
+}
+
+int server_open_stdout_socket(Server *s) {
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/stdout",
+ };
+ int r;
+
+ assert(s);
+
+ if (s->stdout_fd < 0) {
+ s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s->stdout_fd < 0)
+ return log_error_errno(errno, "socket() failed: %m");
+
+ (void) sockaddr_un_unlink(&sa.un);
+
+ r = bind(s->stdout_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+
+ (void) chmod(sa.un.sun_path, 0666);
+
+ if (listen(s->stdout_fd, SOMAXCONN) < 0)
+ return log_error_errno(errno, "listen(%s) failed: %m", sa.un.sun_path);
+ } else
+ (void) fd_nonblock(s->stdout_fd, true);
+
+ r = sd_event_add_io(s->event, &s->stdout_event_source, s->stdout_fd, EPOLLIN, stdout_stream_new, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add stdout server fd to event source: %m");
+
+ r = sd_event_source_set_priority(s->stdout_event_source, SD_EVENT_PRIORITY_NORMAL+5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust priority of stdout server event source: %m");
+
+ return 0;
+}
+
+void stdout_stream_send_notify(StdoutStream *s) {
+ struct iovec iovec = {
+ .iov_base = (char*) "FDSTORE=1",
+ .iov_len = STRLEN("FDSTORE=1"),
+ };
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+ ssize_t l;
+
+ assert(s);
+ assert(!s->fdstore);
+ assert(s->in_notify_queue);
+ assert(s->server);
+ assert(s->server->notify_fd >= 0);
+
+ /* Store the connection fd in PID 1, so that we get it passed
+ * in again on next start */
+
+ msghdr.msg_controllen = CMSG_SPACE(sizeof(int));
+ msghdr.msg_control = alloca0(msghdr.msg_controllen);
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+
+ memcpy(CMSG_DATA(cmsg), &s->fd, sizeof(int));
+
+ l = sendmsg(s->server->notify_fd, &msghdr, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return;
+
+ log_error_errno(errno, "Failed to send stream file descriptor to service manager: %m");
+ } else {
+ log_debug("Successfully sent stream file descriptor to service manager.");
+ s->fdstore = 1;
+ }
+
+ LIST_REMOVE(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s);
+ s->in_notify_queue = false;
+
+}
diff --git a/src/journal/journald-stream.h b/src/journal/journald-stream.h
new file mode 100644
index 0000000..487376e
--- /dev/null
+++ b/src/journal/journald-stream.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct StdoutStream StdoutStream;
+
+#include "fdset.h"
+#include "journald-server.h"
+
+int server_open_stdout_socket(Server *s);
+int server_restore_streams(Server *s, FDSet *fds);
+
+void stdout_stream_free(StdoutStream *s);
+int stdout_stream_install(Server *s, int fd, StdoutStream **ret);
+void stdout_stream_destroy(StdoutStream *s);
+void stdout_stream_send_notify(StdoutStream *s);
diff --git a/src/journal/journald-syslog.c b/src/journal/journald-syslog.c
new file mode 100644
index 0000000..a60a259
--- /dev/null
+++ b/src/journal/journald-syslog.c
@@ -0,0 +1,516 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stddef.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "journald-console.h"
+#include "journald-kmsg.h"
+#include "journald-server.h"
+#include "journald-syslog.h"
+#include "journald-wall.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "syslog-util.h"
+
+/* Warn once every 30s if we missed syslog message */
+#define WARN_FORWARD_SYSLOG_MISSED_USEC (30 * USEC_PER_SEC)
+
+static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, const struct ucred *ucred, const struct timeval *tv) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/syslog",
+ };
+ struct msghdr msghdr = {
+ .msg_iov = (struct iovec *) iovec,
+ .msg_iovlen = n_iovec,
+ .msg_name = (struct sockaddr*) &sa.sa,
+ .msg_namelen = SOCKADDR_UN_LEN(sa.un),
+ };
+ struct cmsghdr *cmsg;
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
+ } control;
+
+ assert(s);
+ assert(iovec);
+ assert(n_iovec > 0);
+
+ if (ucred) {
+ zero(control);
+ msghdr.msg_control = &control;
+ msghdr.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+ memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
+ msghdr.msg_controllen = cmsg->cmsg_len;
+ }
+
+ /* Forward the syslog message we received via /dev/log to
+ * /run/systemd/syslog. Unfortunately we currently can't set
+ * the SO_TIMESTAMP auxiliary data, and hence we don't. */
+
+ if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
+ return;
+
+ /* The socket is full? I guess the syslog implementation is
+ * too slow, and we shouldn't wait for that... */
+ if (errno == EAGAIN) {
+ s->n_forward_syslog_missed++;
+ return;
+ }
+
+ if (ucred && IN_SET(errno, ESRCH, EPERM)) {
+ struct ucred u;
+
+ /* Hmm, presumably the sender process vanished
+ * by now, or we don't have CAP_SYS_AMDIN, so
+ * let's fix it as good as we can, and retry */
+
+ u = *ucred;
+ u.pid = getpid_cached();
+ memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
+
+ if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
+ return;
+
+ if (errno == EAGAIN) {
+ s->n_forward_syslog_missed++;
+ return;
+ }
+ }
+
+ if (errno != ENOENT)
+ log_debug_errno(errno, "Failed to forward syslog message: %m");
+}
+
+static void forward_syslog_raw(Server *s, int priority, const char *buffer, size_t buffer_len, const struct ucred *ucred, const struct timeval *tv) {
+ struct iovec iovec;
+
+ assert(s);
+ assert(buffer);
+
+ if (LOG_PRI(priority) > s->max_level_syslog)
+ return;
+
+ iovec = IOVEC_MAKE((char *) buffer, buffer_len);
+ forward_syslog_iovec(s, &iovec, 1, ucred, tv);
+}
+
+void server_forward_syslog(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred, const struct timeval *tv) {
+ struct iovec iovec[5];
+ char header_priority[DECIMAL_STR_MAX(priority) + 3], header_time[64],
+ header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t) + 1];
+ int n = 0;
+ time_t t;
+ struct tm tm;
+ _cleanup_free_ char *ident_buf = NULL;
+
+ assert(s);
+ assert(priority >= 0);
+ assert(priority <= 999);
+ assert(message);
+
+ if (LOG_PRI(priority) > s->max_level_syslog)
+ return;
+
+ /* First: priority field */
+ xsprintf(header_priority, "<%i>", priority);
+ iovec[n++] = IOVEC_MAKE_STRING(header_priority);
+
+ /* Second: timestamp */
+ t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
+ if (!localtime_r(&t, &tm))
+ return;
+ if (strftime(header_time, sizeof(header_time), "%h %e %T ", &tm) <= 0)
+ return;
+ iovec[n++] = IOVEC_MAKE_STRING(header_time);
+
+ /* Third: identifier and PID */
+ if (ucred) {
+ if (!identifier) {
+ get_process_comm(ucred->pid, &ident_buf);
+ identifier = ident_buf;
+ }
+
+ xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
+
+ if (identifier)
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+
+ iovec[n++] = IOVEC_MAKE_STRING(header_pid);
+ } else if (identifier) {
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(": ");
+ }
+
+ /* Fourth: message */
+ iovec[n++] = IOVEC_MAKE_STRING(message);
+
+ forward_syslog_iovec(s, iovec, n, ucred, tv);
+}
+
+int syslog_fixup_facility(int priority) {
+
+ if ((priority & LOG_FACMASK) == 0)
+ return (priority & LOG_PRIMASK) | LOG_USER;
+
+ return priority;
+}
+
+size_t syslog_parse_identifier(const char **buf, char **identifier, char **pid) {
+ const char *p;
+ char *t;
+ size_t l, e;
+
+ assert(buf);
+ assert(identifier);
+ assert(pid);
+
+ p = *buf;
+
+ p += strspn(p, WHITESPACE);
+ l = strcspn(p, WHITESPACE);
+
+ if (l <= 0 ||
+ p[l-1] != ':')
+ return 0;
+
+ e = l;
+ l--;
+
+ if (l > 0 && p[l-1] == ']') {
+ size_t k = l-1;
+
+ for (;;) {
+
+ if (p[k] == '[') {
+ t = strndup(p+k+1, l-k-2);
+ if (t)
+ *pid = t;
+
+ l = k;
+ break;
+ }
+
+ if (k == 0)
+ break;
+
+ k--;
+ }
+ }
+
+ t = strndup(p, l);
+ if (t)
+ *identifier = t;
+
+ /* Single space is used as separator */
+ if (p[e] != '\0' && strchr(WHITESPACE, p[e]))
+ e++;
+
+ l = (p - *buf) + e;
+ *buf = p + e;
+ return l;
+}
+
+static int syslog_skip_timestamp(const char **buf) {
+ enum {
+ LETTER,
+ SPACE,
+ NUMBER,
+ SPACE_OR_NUMBER,
+ COLON
+ } sequence[] = {
+ LETTER, LETTER, LETTER,
+ SPACE,
+ SPACE_OR_NUMBER, NUMBER,
+ SPACE,
+ SPACE_OR_NUMBER, NUMBER,
+ COLON,
+ SPACE_OR_NUMBER, NUMBER,
+ COLON,
+ SPACE_OR_NUMBER, NUMBER,
+ SPACE
+ };
+
+ const char *p, *t;
+ unsigned i;
+
+ assert(buf);
+ assert(*buf);
+
+ for (i = 0, p = *buf; i < ELEMENTSOF(sequence); i++, p++) {
+ if (!*p)
+ return 0;
+
+ switch (sequence[i]) {
+
+ case SPACE:
+ if (*p != ' ')
+ return 0;
+ break;
+
+ case SPACE_OR_NUMBER:
+ if (*p == ' ')
+ break;
+
+ _fallthrough_;
+ case NUMBER:
+ if (*p < '0' || *p > '9')
+ return 0;
+
+ break;
+
+ case LETTER:
+ if (!(*p >= 'A' && *p <= 'Z') &&
+ !(*p >= 'a' && *p <= 'z'))
+ return 0;
+
+ break;
+
+ case COLON:
+ if (*p != ':')
+ return 0;
+ break;
+
+ }
+ }
+
+ t = *buf;
+ *buf = p;
+ return p - t;
+}
+
+void server_process_syslog_message(
+ Server *s,
+ const char *buf,
+ size_t raw_len,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label,
+ size_t label_len) {
+
+ char *t, syslog_priority[sizeof("PRIORITY=") + DECIMAL_STR_MAX(int)],
+ syslog_facility[sizeof("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int)];
+ const char *msg, *syslog_ts, *a;
+ _cleanup_free_ char *identifier = NULL, *pid = NULL,
+ *dummy = NULL, *msg_msg = NULL, *msg_raw = NULL;
+ int priority = LOG_USER | LOG_INFO, r;
+ ClientContext *context = NULL;
+ struct iovec *iovec;
+ size_t n = 0, m, i, leading_ws, syslog_ts_len;
+ bool store_raw;
+
+ assert(s);
+ assert(buf);
+ /* The message cannot be empty. */
+ assert(raw_len > 0);
+ /* The buffer NUL-terminated and can be used a string. raw_len is the length
+ * without the terminating NUL byte, the buffer is actually one bigger. */
+ assert(buf[raw_len] == '\0');
+
+ if (ucred && pid_is_valid(ucred->pid)) {
+ r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
+ if (r < 0)
+ log_warning_errno(r, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid);
+ }
+
+ /* We are creating a copy of the message because we want to forward the original message
+ verbatim to the legacy syslog implementation */
+ for (i = raw_len; i > 0; i--)
+ if (!strchr(WHITESPACE, buf[i-1]))
+ break;
+
+ leading_ws = strspn(buf, WHITESPACE);
+
+ if (i == 0)
+ /* The message contains only whitespaces */
+ msg = buf + raw_len;
+ else if (i == raw_len)
+ /* Nice! No need to strip anything on the end, let's optimize this a bit */
+ msg = buf + leading_ws;
+ else {
+ msg = dummy = new(char, i - leading_ws + 1);
+ if (!dummy) {
+ log_oom();
+ return;
+ }
+
+ memcpy(dummy, buf + leading_ws, i - leading_ws);
+ dummy[i - leading_ws] = 0;
+ }
+
+ /* We will add the SYSLOG_RAW= field when we stripped anything
+ * _or_ if the input message contained NUL bytes. */
+ store_raw = msg != buf || strlen(msg) != raw_len;
+
+ syslog_parse_priority(&msg, &priority, true);
+
+ if (!client_context_test_priority(context, priority))
+ return;
+
+ syslog_ts = msg;
+ syslog_ts_len = syslog_skip_timestamp(&msg);
+ if (syslog_ts_len == 0)
+ /* We failed to parse the full timestamp, store the raw message too */
+ store_raw = true;
+
+ syslog_parse_identifier(&msg, &identifier, &pid);
+
+ if (s->forward_to_syslog)
+ forward_syslog_raw(s, priority, buf, raw_len, ucred, tv);
+
+ if (s->forward_to_kmsg)
+ server_forward_kmsg(s, priority, identifier, msg, ucred);
+
+ if (s->forward_to_console)
+ server_forward_console(s, priority, identifier, msg, ucred);
+
+ if (s->forward_to_wall)
+ server_forward_wall(s, priority, identifier, msg, ucred);
+
+ m = N_IOVEC_META_FIELDS + 8 + client_context_extra_fields_n_iovec(context);
+ iovec = newa(struct iovec, m);
+
+ iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=syslog");
+
+ xsprintf(syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
+
+ if (priority & LOG_FACMASK) {
+ xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
+ }
+
+ if (identifier) {
+ a = strjoina("SYSLOG_IDENTIFIER=", identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(a);
+ }
+
+ if (pid) {
+ a = strjoina("SYSLOG_PID=", pid);
+ iovec[n++] = IOVEC_MAKE_STRING(a);
+ }
+
+ if (syslog_ts_len > 0) {
+ const size_t hlen = STRLEN("SYSLOG_TIMESTAMP=");
+
+ t = newa(char, hlen + syslog_ts_len);
+ memcpy(t, "SYSLOG_TIMESTAMP=", hlen);
+ memcpy(t + hlen, syslog_ts, syslog_ts_len);
+
+ iovec[n++] = IOVEC_MAKE(t, hlen + syslog_ts_len);
+ }
+
+ msg_msg = strjoin("MESSAGE=", msg);
+ if (!msg_msg) {
+ log_oom();
+ return;
+ }
+ iovec[n++] = IOVEC_MAKE_STRING(msg_msg);
+
+ if (store_raw) {
+ const size_t hlen = STRLEN("SYSLOG_RAW=");
+
+ msg_raw = new(char, hlen + raw_len);
+ if (!msg_raw) {
+ log_oom();
+ return;
+ }
+
+ memcpy(msg_raw, "SYSLOG_RAW=", hlen);
+ memcpy(msg_raw + hlen, buf, raw_len);
+
+ iovec[n++] = IOVEC_MAKE(msg_raw, hlen + raw_len);
+ }
+
+ server_dispatch_message(s, iovec, n, m, context, tv, priority, 0);
+}
+
+int server_open_syslog_socket(Server *s) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/dev-log",
+ };
+ int r;
+
+ assert(s);
+
+ if (s->syslog_fd < 0) {
+ s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s->syslog_fd < 0)
+ return log_error_errno(errno, "socket() failed: %m");
+
+ (void) sockaddr_un_unlink(&sa.un);
+
+ r = bind(s->syslog_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+
+ (void) chmod(sa.un.sun_path, 0666);
+ } else
+ (void) fd_nonblock(s->syslog_fd, true);
+
+ r = setsockopt_int(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_PASSCRED failed: %m");
+
+#if HAVE_SELINUX
+ if (mac_selinux_use()) {
+ r = setsockopt_int(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, true);
+ if (r < 0)
+ log_warning_errno(r, "SO_PASSSEC failed: %m");
+ }
+#endif
+
+ r = setsockopt_int(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_TIMESTAMP failed: %m");
+
+ r = sd_event_add_io(s->event, &s->syslog_event_source, s->syslog_fd, EPOLLIN, server_process_datagram, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add syslog server fd to event loop: %m");
+
+ r = sd_event_source_set_priority(s->syslog_event_source, SD_EVENT_PRIORITY_NORMAL+5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust syslog event source priority: %m");
+
+ return 0;
+}
+
+void server_maybe_warn_forward_syslog_missed(Server *s) {
+ usec_t n;
+
+ assert(s);
+
+ if (s->n_forward_syslog_missed <= 0)
+ return;
+
+ n = now(CLOCK_MONOTONIC);
+ if (s->last_warn_forward_syslog_missed + WARN_FORWARD_SYSLOG_MISSED_USEC > n)
+ return;
+
+ server_driver_message(s, 0,
+ "MESSAGE_ID=" SD_MESSAGE_FORWARD_SYSLOG_MISSED_STR,
+ LOG_MESSAGE("Forwarding to syslog missed %u messages.",
+ s->n_forward_syslog_missed),
+ NULL);
+
+ s->n_forward_syslog_missed = 0;
+ s->last_warn_forward_syslog_missed = n;
+}
diff --git a/src/journal/journald-syslog.h b/src/journal/journald-syslog.h
new file mode 100644
index 0000000..7ca5897
--- /dev/null
+++ b/src/journal/journald-syslog.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "journald-server.h"
+
+int syslog_fixup_facility(int priority) _const_;
+
+size_t syslog_parse_identifier(const char **buf, char **identifier, char **pid);
+
+void server_forward_syslog(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred, const struct timeval *tv);
+
+void server_process_syslog_message(Server *s, const char *buf, size_t buf_len, const struct ucred *ucred, const struct timeval *tv, const char *label, size_t label_len);
+int server_open_syslog_socket(Server *s);
+
+void server_maybe_warn_forward_syslog_missed(Server *s);
diff --git a/src/journal/journald-wall.c b/src/journal/journald-wall.c
new file mode 100644
index 0000000..370c9b3
--- /dev/null
+++ b/src/journal/journald-wall.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "format-util.h"
+#include "journald-server.h"
+#include "journald-wall.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "utmp-wtmp.h"
+
+void server_forward_wall(
+ Server *s,
+ int priority,
+ const char *identifier,
+ const char *message,
+ const struct ucred *ucred) {
+
+ _cleanup_free_ char *ident_buf = NULL, *l_buf = NULL;
+ const char *l;
+ int r;
+
+ assert(s);
+ assert(message);
+
+ if (LOG_PRI(priority) > s->max_level_wall)
+ return;
+
+ if (ucred) {
+ if (!identifier) {
+ get_process_comm(ucred->pid, &ident_buf);
+ identifier = ident_buf;
+ }
+
+ if (asprintf(&l_buf, "%s["PID_FMT"]: %s", strempty(identifier), ucred->pid, message) < 0) {
+ log_oom();
+ return;
+ }
+
+ l = l_buf;
+
+ } else if (identifier) {
+
+ l = l_buf = strjoin(identifier, ": ", message);
+ if (!l_buf) {
+ log_oom();
+ return;
+ }
+ } else
+ l = message;
+
+ r = utmp_wall(l, "systemd-journald", NULL, NULL, NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send wall message: %m");
+}
diff --git a/src/journal/journald-wall.h b/src/journal/journald-wall.h
new file mode 100644
index 0000000..b73059a
--- /dev/null
+++ b/src/journal/journald-wall.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "journald-server.h"
+
+void server_forward_wall(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred);
diff --git a/src/journal/journald.c b/src/journal/journald.c
new file mode 100644
index 0000000..5e7b1dc
--- /dev/null
+++ b/src/journal/journald.c
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "sd-daemon.h"
+#include "sd-messages.h"
+
+#include "format-util.h"
+#include "journal-authenticate.h"
+#include "journald-kmsg.h"
+#include "journald-server.h"
+#include "journald-syslog.h"
+#include "process-util.h"
+#include "sigbus.h"
+
+int main(int argc, char *argv[]) {
+ Server server;
+ int r;
+
+ if (argc > 1) {
+ log_error("This program does not take arguments.");
+ return EXIT_FAILURE;
+ }
+
+ log_set_prohibit_ipc(true);
+ log_set_target(LOG_TARGET_AUTO);
+ log_set_facility(LOG_SYSLOG);
+ log_parse_environment();
+ log_open();
+
+ umask(0022);
+
+ sigbus_install();
+
+ r = server_init(&server);
+ if (r < 0)
+ goto finish;
+
+ server_vacuum(&server, false);
+ server_flush_to_var(&server, true);
+ server_flush_dev_kmsg(&server);
+
+ log_debug("systemd-journald running as pid "PID_FMT, getpid_cached());
+ server_driver_message(&server, 0,
+ "MESSAGE_ID=" SD_MESSAGE_JOURNAL_START_STR,
+ LOG_MESSAGE("Journal started"),
+ NULL);
+
+ /* Make sure to send the usage message *after* flushing the
+ * journal so entries from the runtime journals are ordered
+ * before this message. See #4190 for some details. */
+ server_space_usage_message(&server, NULL);
+
+ for (;;) {
+ usec_t t = USEC_INFINITY, n;
+
+ r = sd_event_get_state(server.event);
+ if (r < 0)
+ goto finish;
+ if (r == SD_EVENT_FINISHED)
+ break;
+
+ n = now(CLOCK_REALTIME);
+
+ if (server.max_retention_usec > 0 && server.oldest_file_usec > 0) {
+
+ /* The retention time is reached, so let's vacuum! */
+ if (server.oldest_file_usec + server.max_retention_usec < n) {
+ log_info("Retention time reached.");
+ server_rotate(&server);
+ server_vacuum(&server, false);
+ continue;
+ }
+
+ /* Calculate when to rotate the next time */
+ t = server.oldest_file_usec + server.max_retention_usec - n;
+ }
+
+#if HAVE_GCRYPT
+ if (server.system_journal) {
+ usec_t u;
+
+ if (journal_file_next_evolve_usec(server.system_journal, &u)) {
+ if (n >= u)
+ t = 0;
+ else
+ t = MIN(t, u - n);
+ }
+ }
+#endif
+
+ r = sd_event_run(server.event, t);
+ if (r < 0) {
+ log_error_errno(r, "Failed to run event loop: %m");
+ goto finish;
+ }
+
+ server_maybe_append_tags(&server);
+ server_maybe_warn_forward_syslog_missed(&server);
+ }
+
+ log_debug("systemd-journald stopped as pid "PID_FMT, getpid_cached());
+ server_driver_message(&server, 0,
+ "MESSAGE_ID=" SD_MESSAGE_JOURNAL_STOP_STR,
+ LOG_MESSAGE("Journal stopped"),
+ NULL);
+
+finish:
+ server_done(&server);
+
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/journal/journald.conf b/src/journal/journald.conf
new file mode 100644
index 0000000..2f1c661
--- /dev/null
+++ b/src/journal/journald.conf
@@ -0,0 +1,43 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See journald.conf(5) for details.
+
+[Journal]
+#Storage=auto
+#Compress=yes
+#Seal=yes
+#SplitMode=uid
+#SyncIntervalSec=5m
+#RateLimitIntervalSec=30s
+#RateLimitBurst=10000
+#SystemMaxUse=
+#SystemKeepFree=
+#SystemMaxFileSize=
+#SystemMaxFiles=100
+#RuntimeMaxUse=
+#RuntimeKeepFree=
+#RuntimeMaxFileSize=
+#RuntimeMaxFiles=100
+#MaxRetentionSec=
+#MaxFileSec=1month
+#ForwardToSyslog=no
+#ForwardToKMsg=no
+#ForwardToConsole=no
+#ForwardToWall=yes
+#TTYPath=/dev/console
+#MaxLevelStore=debug
+#MaxLevelSyslog=debug
+#MaxLevelKMsg=notice
+#MaxLevelConsole=info
+#MaxLevelWall=emerg
+#LineMax=48K
+#ReadKMsg=yes
diff --git a/src/journal/lookup3.c b/src/journal/lookup3.c
new file mode 100644
index 0000000..6c61f17
--- /dev/null
+++ b/src/journal/lookup3.c
@@ -0,0 +1,1005 @@
+/* Slightly modified by Lennart Poettering, to avoid name clashes, and
+ * unexport a few functions. */
+
+#include "lookup3.h"
+
+/*
+-------------------------------------------------------------------------------
+lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+
+These are functions for producing 32-bit hashes for hash table lookup.
+hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+are externally useful functions. Routines to test the hash are included
+if SELF_TEST is defined. You can use this free for any purpose. It's in
+the public domain. It has no warranty.
+
+You probably want to use hashlittle(). hashlittle() and hashbig()
+hash byte arrays. hashlittle() is faster than hashbig() on
+little-endian machines. Intel and AMD are little-endian machines.
+On second thought, you probably want hashlittle2(), which is identical to
+hashlittle() except it returns two 32-bit hashes for the price of one.
+You could implement hashbig2() if you wanted but I haven't bothered here.
+
+If you want to find a hash of, say, exactly 7 integers, do
+ a = i1; b = i2; c = i3;
+ mix(a,b,c);
+ a += i4; b += i5; c += i6;
+ mix(a,b,c);
+ a += i7;
+ final(a,b,c);
+then use c as the hash value. If you have a variable length array of
+4-byte integers to hash, use hashword(). If you have a byte array (like
+a character string), use hashlittle(). If you have several byte arrays, or
+a mix of things, see the comments above hashlittle().
+
+Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
+then mix those integers. This is fast (you can do a lot more thorough
+mixing with 12*3 instructions on 3 integers than you can with 3 instructions
+on 1 byte), but shoehorning those bytes into integers efficiently is messy.
+-------------------------------------------------------------------------------
+*/
+/* #define SELF_TEST 1 */
+
+#include <stdint.h> /* defines uint32_t etc */
+#include <stdio.h> /* defines printf for tests */
+#include <sys/param.h> /* attempt to define endianness */
+#include <time.h> /* defines time_t for timings in the test */
+#ifdef linux
+# include <endian.h> /* attempt to define endianness */
+#endif
+
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+
+/*
+ * My best guess at if you are big-endian or little-endian. This may
+ * need adjustment.
+ */
+#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
+ __BYTE_ORDER == __LITTLE_ENDIAN) || \
+ (defined(i386) || defined(__i386__) || defined(__i486__) || \
+ defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
+# define HASH_LITTLE_ENDIAN 1
+# define HASH_BIG_ENDIAN 0
+#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
+ __BYTE_ORDER == __BIG_ENDIAN) || \
+ (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
+# define HASH_LITTLE_ENDIAN 0
+# define HASH_BIG_ENDIAN 1
+#else
+# define HASH_LITTLE_ENDIAN 0
+# define HASH_BIG_ENDIAN 0
+#endif
+
+#define hashsize(n) ((uint32_t)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+/*
+-------------------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+
+This is reversible, so any information in (a,b,c) before mix() is
+still in (a,b,c) after mix().
+
+If four pairs of (a,b,c) inputs are run through mix(), or through
+mix() in reverse, there are at least 32 bits of the output that
+are sometimes the same for one pair and different for another pair.
+This was tested for:
+* pairs that differed by one bit, by two bits, in any combination
+ of top bits of (a,b,c), or in any combination of bottom bits of
+ (a,b,c).
+* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ is commonly produced by subtraction) look like a single 1-bit
+ difference.
+* the base values were pseudorandom, all zero but one bit set, or
+ all zero plus a counter that starts at zero.
+
+Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
+satisfy this are
+ 4 6 8 16 19 4
+ 9 15 3 18 27 15
+ 14 9 3 7 17 3
+Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
+for "differ" defined as + with a one-bit base and a two-bit delta. I
+used http://burtleburtle.net/bob/hash/avalanche.html to choose
+the operations, constants, and arrangements of the variables.
+
+This does not achieve avalanche. There are input bits of (a,b,c)
+that fail to affect some output bits of (a,b,c), especially of a. The
+most thoroughly mixed value is c, but it doesn't really even achieve
+avalanche in c.
+
+This allows some parallelism. Read-after-writes are good at doubling
+the number of bits affected, so the goal of mixing pulls in the opposite
+direction as the goal of parallelism. I did what I could. Rotates
+seem to cost as much as shifts on every machine I could lay my hands
+on, and rotates are much kinder to the top and bottom bits, so I used
+rotates.
+-------------------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+ a -= c; a ^= rot(c, 4); c += b; \
+ b -= a; b ^= rot(a, 6); a += c; \
+ c -= b; c ^= rot(b, 8); b += a; \
+ a -= c; a ^= rot(c,16); c += b; \
+ b -= a; b ^= rot(a,19); a += c; \
+ c -= b; c ^= rot(b, 4); b += a; \
+}
+
+/*
+-------------------------------------------------------------------------------
+final -- final mixing of 3 32-bit values (a,b,c) into c
+
+Pairs of (a,b,c) values differing in only a few bits will usually
+produce values of c that look totally different. This was tested for
+* pairs that differed by one bit, by two bits, in any combination
+ of top bits of (a,b,c), or in any combination of bottom bits of
+ (a,b,c).
+* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ is commonly produced by subtraction) look like a single 1-bit
+ difference.
+* the base values were pseudorandom, all zero but one bit set, or
+ all zero plus a counter that starts at zero.
+
+These constants passed:
+ 14 11 25 16 4 14 24
+ 12 14 25 16 4 14 24
+and these came close:
+ 4 8 15 26 3 22 24
+ 10 8 15 26 3 22 24
+ 11 8 15 26 3 22 24
+-------------------------------------------------------------------------------
+*/
+#define final(a,b,c) \
+{ \
+ c ^= b; c -= rot(b,14); \
+ a ^= c; a -= rot(c,11); \
+ b ^= a; b -= rot(a,25); \
+ c ^= b; c -= rot(b,16); \
+ a ^= c; a -= rot(c,4); \
+ b ^= a; b -= rot(a,14); \
+ c ^= b; c -= rot(b,24); \
+}
+
+/*
+--------------------------------------------------------------------
+ This works on all machines. To be useful, it requires
+ -- that the key be an array of uint32_t's, and
+ -- that the length be the number of uint32_t's in the key
+
+ The function hashword() is identical to hashlittle() on little-endian
+ machines, and identical to hashbig() on big-endian machines,
+ except that the length has to be measured in uint32_ts rather than in
+ bytes. hashlittle() is more complicated than hashword() only because
+ hashlittle() has to dance around fitting the key bytes into registers.
+--------------------------------------------------------------------
+*/
+uint32_t jenkins_hashword(
+const uint32_t *k, /* the key, an array of uint32_t values */
+size_t length, /* the length of the key, in uint32_ts */
+uint32_t initval) /* the previous hash, or an arbitrary value */
+{
+ uint32_t a,b,c;
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
+
+ /*------------------------------------------------- handle most of the key */
+ while (length > 3)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 3;
+ k += 3;
+ }
+
+ /*------------------------------------------- handle the last 3 uint32_t's */
+ switch(length) /* all the case statements fall through */
+ {
+ case 3 : c+=k[2];
+ case 2 : b+=k[1];
+ case 1 : a+=k[0];
+ final(a,b,c);
+ case 0: /* case 0: nothing left to add */
+ break;
+ }
+ /*------------------------------------------------------ report the result */
+ return c;
+}
+
+/*
+--------------------------------------------------------------------
+hashword2() -- same as hashword(), but take two seeds and return two
+32-bit values. pc and pb must both be nonnull, and *pc and *pb must
+both be initialized with seeds. If you pass in (*pb)==0, the output
+(*pc) will be the same as the return value from hashword().
+--------------------------------------------------------------------
+*/
+void jenkins_hashword2 (
+const uint32_t *k, /* the key, an array of uint32_t values */
+size_t length, /* the length of the key, in uint32_ts */
+uint32_t *pc, /* IN: seed OUT: primary hash value */
+uint32_t *pb) /* IN: more seed OUT: secondary hash value */
+{
+ uint32_t a,b,c;
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
+ c += *pb;
+
+ /*------------------------------------------------- handle most of the key */
+ while (length > 3)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 3;
+ k += 3;
+ }
+
+ /*------------------------------------------- handle the last 3 uint32_t's */
+ switch(length) /* all the case statements fall through */
+ {
+ case 3 : c+=k[2];
+ case 2 : b+=k[1];
+ case 1 : a+=k[0];
+ final(a,b,c);
+ case 0: /* case 0: nothing left to add */
+ break;
+ }
+ /*------------------------------------------------------ report the result */
+ *pc=c; *pb=b;
+}
+
+/*
+-------------------------------------------------------------------------------
+hashlittle() -- hash a variable-length key into a 32-bit value
+ k : the key (the unaligned variable-length array of bytes)
+ length : the length of the key, counting by bytes
+ initval : can be any 4-byte value
+Returns a 32-bit value. Every bit of the key affects every bit of
+the return value. Two keys differing by one or two bits will have
+totally different hash values.
+
+The best hash table sizes are powers of 2. There is no need to do
+mod a prime (mod is sooo slow!). If you need less than 32 bits,
+use a bitmask. For example, if you need only 10 bits, do
+ h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (uint8_t **)k, do it like this:
+ for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
+
+By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
+code any way you wish, private, educational, or commercial. It's free.
+
+Use for hash table lookup, or anything where one collision in 2^^32 is
+acceptable. Do NOT use for cryptographic purposes.
+-------------------------------------------------------------------------------
+*/
+
+uint32_t jenkins_hashlittle( const void *key, size_t length, uint32_t initval)
+{
+ uint32_t a,b,c; /* internal state */
+ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+ u.ptr = key;
+ if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
+
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 12;
+ k += 3;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ /*
+ * "k[2]&0xffffff" actually reads beyond the end of the string, but
+ * then masks off the part it's not allowed to read. Because the
+ * string is aligned, the masked-off tail is in the same word as the
+ * rest of the string. Every machine with memory protection I've seen
+ * does it on word boundaries, so is OK with this. But valgrind will
+ * still catch it and complain. The masking trick does make the hash
+ * noticeably faster for short strings (like English words).
+ */
+#if !VALGRIND && !HAS_FEATURE_ADDRESS_SANITIZER
+
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+ case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+ case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+ case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+ case 5 : b+=k[1]&0xff; a+=k[0]; break;
+ case 4 : a+=k[0]; break;
+ case 3 : a+=k[0]&0xffffff; break;
+ case 2 : a+=k[0]&0xffff; break;
+ case 1 : a+=k[0]&0xff; break;
+ case 0 : return c; /* zero length strings require no mixing */
+ }
+
+#else /* make valgrind happy */
+ {
+ const uint8_t *k8 = (const uint8_t *) k;
+
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]; break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
+ case 1 : a+=k8[0]; break;
+ case 0 : return c;
+ }
+ }
+
+#endif /* !valgrind */
+
+ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
+ const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
+ const uint8_t *k8;
+
+ /*--------------- all but last block: aligned reads and different mixing */
+ while (length > 12)
+ {
+ a += k[0] + (((uint32_t)k[1])<<16);
+ b += k[2] + (((uint32_t)k[3])<<16);
+ c += k[4] + (((uint32_t)k[5])<<16);
+ mix(a,b,c);
+ length -= 12;
+ k += 6;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ k8 = (const uint8_t *)k;
+ switch(length)
+ {
+ case 12: c+=k[4]+(((uint32_t)k[5])<<16);
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=k[4];
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=k[2];
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=k[0];
+ break;
+ case 1 : a+=k8[0];
+ break;
+ case 0 : return c; /* zero length requires no mixing */
+ }
+
+ } else { /* need to read the key one byte at a time */
+ const uint8_t *k = (const uint8_t *)key;
+
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ a += ((uint32_t)k[1])<<8;
+ a += ((uint32_t)k[2])<<16;
+ a += ((uint32_t)k[3])<<24;
+ b += k[4];
+ b += ((uint32_t)k[5])<<8;
+ b += ((uint32_t)k[6])<<16;
+ b += ((uint32_t)k[7])<<24;
+ c += k[8];
+ c += ((uint32_t)k[9])<<8;
+ c += ((uint32_t)k[10])<<16;
+ c += ((uint32_t)k[11])<<24;
+ mix(a,b,c);
+ length -= 12;
+ k += 12;
+ }
+
+ /*-------------------------------- last block: affect all 32 bits of (c) */
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=((uint32_t)k[11])<<24;
+ case 11: c+=((uint32_t)k[10])<<16;
+ case 10: c+=((uint32_t)k[9])<<8;
+ case 9 : c+=k[8];
+ case 8 : b+=((uint32_t)k[7])<<24;
+ case 7 : b+=((uint32_t)k[6])<<16;
+ case 6 : b+=((uint32_t)k[5])<<8;
+ case 5 : b+=k[4];
+ case 4 : a+=((uint32_t)k[3])<<24;
+ case 3 : a+=((uint32_t)k[2])<<16;
+ case 2 : a+=((uint32_t)k[1])<<8;
+ case 1 : a+=k[0];
+ break;
+ case 0 : return c;
+ }
+ }
+
+ final(a,b,c);
+ return c;
+}
+
+/*
+ * hashlittle2: return 2 32-bit hash values
+ *
+ * This is identical to hashlittle(), except it returns two 32-bit hash
+ * values instead of just one. This is good enough for hash table
+ * lookup with 2^^64 buckets, or if you want a second hash if you're not
+ * happy with the first, or if you want a probably-unique 64-bit ID for
+ * the key. *pc is better mixed than *pb, so use *pc first. If you want
+ * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
+ */
+void jenkins_hashlittle2(
+ const void *key, /* the key to hash */
+ size_t length, /* length of the key */
+ uint32_t *pc, /* IN: primary initval, OUT: primary hash */
+ uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
+{
+ uint32_t a,b,c; /* internal state */
+ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
+ c += *pb;
+
+ u.ptr = key;
+ if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
+
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 12;
+ k += 3;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ /*
+ * "k[2]&0xffffff" actually reads beyond the end of the string, but
+ * then masks off the part it's not allowed to read. Because the
+ * string is aligned, the masked-off tail is in the same word as the
+ * rest of the string. Every machine with memory protection I've seen
+ * does it on word boundaries, so is OK with this. But valgrind will
+ * still catch it and complain. The masking trick does make the hash
+ * noticeably faster for short strings (like English words).
+ */
+#if !VALGRIND && !HAS_FEATURE_ADDRESS_SANITIZER
+
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+ case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+ case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+ case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+ case 5 : b+=k[1]&0xff; a+=k[0]; break;
+ case 4 : a+=k[0]; break;
+ case 3 : a+=k[0]&0xffffff; break;
+ case 2 : a+=k[0]&0xffff; break;
+ case 1 : a+=k[0]&0xff; break;
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
+ }
+
+#else /* make valgrind happy */
+
+ {
+ const uint8_t *k8 = (const uint8_t *)k;
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]; break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
+ case 1 : a+=k8[0]; break;
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
+ }
+ }
+
+#endif /* !valgrind */
+
+ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
+ const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
+ const uint8_t *k8;
+
+ /*--------------- all but last block: aligned reads and different mixing */
+ while (length > 12)
+ {
+ a += k[0] + (((uint32_t)k[1])<<16);
+ b += k[2] + (((uint32_t)k[3])<<16);
+ c += k[4] + (((uint32_t)k[5])<<16);
+ mix(a,b,c);
+ length -= 12;
+ k += 6;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ k8 = (const uint8_t *)k;
+ switch(length)
+ {
+ case 12: c+=k[4]+(((uint32_t)k[5])<<16);
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=k[4];
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=k[2];
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=k[0];
+ break;
+ case 1 : a+=k8[0];
+ break;
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
+ }
+
+ } else { /* need to read the key one byte at a time */
+ const uint8_t *k = (const uint8_t *)key;
+
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ a += ((uint32_t)k[1])<<8;
+ a += ((uint32_t)k[2])<<16;
+ a += ((uint32_t)k[3])<<24;
+ b += k[4];
+ b += ((uint32_t)k[5])<<8;
+ b += ((uint32_t)k[6])<<16;
+ b += ((uint32_t)k[7])<<24;
+ c += k[8];
+ c += ((uint32_t)k[9])<<8;
+ c += ((uint32_t)k[10])<<16;
+ c += ((uint32_t)k[11])<<24;
+ mix(a,b,c);
+ length -= 12;
+ k += 12;
+ }
+
+ /*-------------------------------- last block: affect all 32 bits of (c) */
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=((uint32_t)k[11])<<24;
+ case 11: c+=((uint32_t)k[10])<<16;
+ case 10: c+=((uint32_t)k[9])<<8;
+ case 9 : c+=k[8];
+ case 8 : b+=((uint32_t)k[7])<<24;
+ case 7 : b+=((uint32_t)k[6])<<16;
+ case 6 : b+=((uint32_t)k[5])<<8;
+ case 5 : b+=k[4];
+ case 4 : a+=((uint32_t)k[3])<<24;
+ case 3 : a+=((uint32_t)k[2])<<16;
+ case 2 : a+=((uint32_t)k[1])<<8;
+ case 1 : a+=k[0];
+ break;
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
+ }
+ }
+
+ final(a,b,c);
+ *pc=c; *pb=b;
+}
+
+/*
+ * hashbig():
+ * This is the same as hashword() on big-endian machines. It is different
+ * from hashlittle() on all machines. hashbig() takes advantage of
+ * big-endian byte ordering.
+ */
+uint32_t jenkins_hashbig( const void *key, size_t length, uint32_t initval)
+{
+ uint32_t a,b,c;
+ union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+ u.ptr = key;
+ if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
+
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 12;
+ k += 3;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ /*
+ * "k[2]<<8" actually reads beyond the end of the string, but
+ * then shifts out the part it's not allowed to read. Because the
+ * string is aligned, the illegal read is in the same word as the
+ * rest of the string. Every machine with memory protection I've seen
+ * does it on word boundaries, so is OK with this. But valgrind will
+ * still catch it and complain. The masking trick does make the hash
+ * noticeably faster for short strings (like English words).
+ */
+#if !VALGRIND && !HAS_FEATURE_ADDRESS_SANITIZER
+
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
+ case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
+ case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
+ case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
+ case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
+ case 4 : a+=k[0]; break;
+ case 3 : a+=k[0]&0xffffff00; break;
+ case 2 : a+=k[0]&0xffff0000; break;
+ case 1 : a+=k[0]&0xff000000; break;
+ case 0 : return c; /* zero length strings require no mixing */
+ }
+
+#else /* make valgrind happy */
+
+ {
+ const uint8_t *k8 = (const uint8_t *)k;
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
+ case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
+ case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
+ case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
+ case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
+ case 4 : a+=k[0]; break;
+ case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
+ case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
+ case 1 : a+=((uint32_t)k8[0])<<24; break;
+ case 0 : return c;
+ }
+ }
+
+#endif /* !VALGRIND */
+
+ } else { /* need to read the key one byte at a time */
+ const uint8_t *k = (const uint8_t *)key;
+
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += ((uint32_t)k[0])<<24;
+ a += ((uint32_t)k[1])<<16;
+ a += ((uint32_t)k[2])<<8;
+ a += ((uint32_t)k[3]);
+ b += ((uint32_t)k[4])<<24;
+ b += ((uint32_t)k[5])<<16;
+ b += ((uint32_t)k[6])<<8;
+ b += ((uint32_t)k[7]);
+ c += ((uint32_t)k[8])<<24;
+ c += ((uint32_t)k[9])<<16;
+ c += ((uint32_t)k[10])<<8;
+ c += ((uint32_t)k[11]);
+ mix(a,b,c);
+ length -= 12;
+ k += 12;
+ }
+
+ /*-------------------------------- last block: affect all 32 bits of (c) */
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=k[11];
+ case 11: c+=((uint32_t)k[10])<<8;
+ case 10: c+=((uint32_t)k[9])<<16;
+ case 9 : c+=((uint32_t)k[8])<<24;
+ case 8 : b+=k[7];
+ case 7 : b+=((uint32_t)k[6])<<8;
+ case 6 : b+=((uint32_t)k[5])<<16;
+ case 5 : b+=((uint32_t)k[4])<<24;
+ case 4 : a+=k[3];
+ case 3 : a+=((uint32_t)k[2])<<8;
+ case 2 : a+=((uint32_t)k[1])<<16;
+ case 1 : a+=((uint32_t)k[0])<<24;
+ break;
+ case 0 : return c;
+ }
+ }
+
+ final(a,b,c);
+ return c;
+}
+
+#ifdef SELF_TEST
+
+/* used for timings */
+void driver1()
+{
+ uint8_t buf[256];
+ uint32_t i;
+ uint32_t h=0;
+ time_t a,z;
+
+ time(&a);
+ for (i=0; i<256; ++i) buf[i] = 'x';
+ for (i=0; i<1; ++i)
+ {
+ h = hashlittle(&buf[0],1,h);
+ }
+ time(&z);
+ if (z-a > 0) printf("time %d %.8x\n", z-a, h);
+}
+
+/* check that every input bit changes every output bit half the time */
+#define HASHSTATE 1
+#define HASHLEN 1
+#define MAXPAIR 60
+#define MAXLEN 70
+void driver2()
+{
+ uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
+ uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
+ uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
+ uint32_t x[HASHSTATE],y[HASHSTATE];
+ uint32_t hlen;
+
+ printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
+ for (hlen=0; hlen < MAXLEN; ++hlen)
+ {
+ z=0;
+ for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
+ {
+ for (j=0; j<8; ++j) /*------------------------ for each input bit, */
+ {
+ for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
+ {
+ for (l=0; l<HASHSTATE; ++l)
+ e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
+
+ /*---- check that every output bit is affected by that input bit */
+ for (k=0; k<MAXPAIR; k+=2)
+ {
+ uint32_t finished=1;
+ /* keys have one bit different */
+ for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
+ /* have a and b be two keys differing in only one bit */
+ a[i] ^= (k<<j);
+ a[i] ^= (k>>(8-j));
+ c[0] = hashlittle(a, hlen, m);
+ b[i] ^= ((k+1)<<j);
+ b[i] ^= ((k+1)>>(8-j));
+ d[0] = hashlittle(b, hlen, m);
+ /* check every bit is 1, 0, set, and not set at least once */
+ for (l=0; l<HASHSTATE; ++l)
+ {
+ e[l] &= (c[l]^d[l]);
+ f[l] &= ~(c[l]^d[l]);
+ g[l] &= c[l];
+ h[l] &= ~c[l];
+ x[l] &= d[l];
+ y[l] &= ~d[l];
+ if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
+ }
+ if (finished) break;
+ }
+ if (k>z) z=k;
+ if (k==MAXPAIR)
+ {
+ printf("Some bit didn't change: ");
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
+ e[0],f[0],g[0],h[0],x[0],y[0]);
+ printf("i %d j %d m %d len %d\n", i, j, m, hlen);
+ }
+ if (z==MAXPAIR) goto done;
+ }
+ }
+ }
+ done:
+ if (z < MAXPAIR)
+ {
+ printf("Mix success %2d bytes %2d initvals ",i,m);
+ printf("required %d trials\n", z/2);
+ }
+ }
+ printf("\n");
+}
+
+/* Check for reading beyond the end of the buffer and alignment problems */
+void driver3()
+{
+ uint8_t buf[MAXLEN+20], *b;
+ uint32_t len;
+ uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
+ uint32_t h;
+ uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
+ uint32_t i;
+ uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
+ uint32_t j;
+ uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
+ uint32_t ref,x,y;
+ uint8_t *p;
+
+ printf("Endianness. These lines should all be the same (for values filled in):\n");
+ printf("%.8x %.8x %.8x\n",
+ hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
+ hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
+ hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
+ p = q;
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+ p = &qq[1];
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+ p = &qqq[2];
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+ p = &qqqq[3];
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+ printf("\n");
+
+ /* check that hashlittle2 and hashlittle produce the same results */
+ i=47; j=0;
+ hashlittle2(q, sizeof(q), &i, &j);
+ if (hashlittle(q, sizeof(q), 47) != i)
+ printf("hashlittle2 and hashlittle mismatch\n");
+
+ /* check that hashword2 and hashword produce the same results */
+ len = 0xdeadbeef;
+ i=47, j=0;
+ hashword2(&len, 1, &i, &j);
+ if (hashword(&len, 1, 47) != i)
+ printf("hashword2 and hashword mismatch %x %x\n",
+ i, hashword(&len, 1, 47));
+
+ /* check hashlittle doesn't read before or after the ends of the string */
+ for (h=0, b=buf+1; h<8; ++h, ++b)
+ {
+ for (i=0; i<MAXLEN; ++i)
+ {
+ len = i;
+ for (j=0; j<i; ++j) *(b+j)=0;
+
+ /* these should all be equal */
+ ref = hashlittle(b, len, (uint32_t)1);
+ *(b+i)=(uint8_t)~0;
+ *(b-1)=(uint8_t)~0;
+ x = hashlittle(b, len, (uint32_t)1);
+ y = hashlittle(b, len, (uint32_t)1);
+ if ((ref != x) || (ref != y))
+ {
+ printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
+ h, i);
+ }
+ }
+ }
+}
+
+/* check for problems with nulls */
+ void driver4()
+{
+ uint8_t buf[1];
+ uint32_t h,i,state[HASHSTATE];
+
+ buf[0] = ~0;
+ for (i=0; i<HASHSTATE; ++i) state[i] = 1;
+ printf("These should all be different\n");
+ for (i=0, h=0; i<8; ++i)
+ {
+ h = hashlittle(buf, 0, h);
+ printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
+ }
+}
+
+void driver5()
+{
+ uint32_t b,c;
+ b=0, c=0, hashlittle2("", 0, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* deadbeef deadbeef */
+ b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* bd5b7dde deadbeef */
+ b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* 9c093ccd bd5b7dde */
+ b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* 17770551 ce7226e6 */
+ b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* e3607cae bd371de4 */
+ b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* cd628161 6cbea4b3 */
+ c = hashlittle("Four score and seven years ago", 30, 0);
+ printf("hash is %.8lx\n", c); /* 17770551 */
+ c = hashlittle("Four score and seven years ago", 30, 1);
+ printf("hash is %.8lx\n", c); /* cd628161 */
+}
+
+int main()
+{
+ driver1(); /* test that the key is hashed: used for timings */
+ driver2(); /* test that whole key is hashed thoroughly */
+ driver3(); /* test that nothing but the key is hashed */
+ driver4(); /* test hashing multiple buffers (all buffers are null) */
+ driver5(); /* test the hash against known vectors */
+ return 1;
+}
+
+#endif /* SELF_TEST */
diff --git a/src/journal/lookup3.h b/src/journal/lookup3.h
new file mode 100644
index 0000000..787921f
--- /dev/null
+++ b/src/journal/lookup3.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+uint32_t jenkins_hashword(const uint32_t *k, size_t length, uint32_t initval) _pure_;
+void jenkins_hashword2(const uint32_t *k, size_t length, uint32_t *pc, uint32_t *pb);
+
+uint32_t jenkins_hashlittle(const void *key, size_t length, uint32_t initval) _pure_;
+void jenkins_hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb);
+
+uint32_t jenkins_hashbig(const void *key, size_t length, uint32_t initval) _pure_;
+
+static inline uint64_t hash64(const void *data, size_t length) {
+ uint32_t a = 0, b = 0;
+
+ jenkins_hashlittle2(data, length, &a, &b);
+
+ return ((uint64_t) a << 32ULL) | (uint64_t) b;
+}
diff --git a/src/journal/meson.build b/src/journal/meson.build
new file mode 100644
index 0000000..e03d6dc
--- /dev/null
+++ b/src/journal/meson.build
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+journal_client_sources = files('''
+ audit-type.c
+ audit-type.h
+ catalog.c
+ catalog.h
+ compress.c
+ compress.h
+ journal-def.h
+ journal-file.c
+ journal-file.h
+ journal-send.c
+ journal-vacuum.c
+ journal-vacuum.h
+ journal-verify.c
+ journal-verify.h
+ lookup3.c
+ lookup3.h
+ mmap-cache.c
+ mmap-cache.h
+ sd-journal.c
+'''.split())
+
+if conf.get('HAVE_GCRYPT') == 1
+ journal_client_sources += files('''
+ journal-authenticate.c
+ journal-authenticate.h
+ fsprg.c
+ fsprg.h
+ '''.split())
+endif
+
+############################################################
+
+audit_type_includes = [config_h,
+ missing_audit_h,
+ 'linux/audit.h']
+if conf.get('HAVE_AUDIT') == 1
+ audit_type_includes += 'libaudit.h'
+endif
+
+generate_audit_type_list = find_program('generate-audit_type-list.sh')
+audit_type_list_txt = custom_target(
+ 'audit_type-list.txt',
+ output : 'audit_type-list.txt',
+ command : [generate_audit_type_list, cpp] + audit_type_includes,
+ capture : true)
+
+audit_type_to_name = custom_target(
+ 'audit_type-to-name.h',
+ input : ['audit_type-to-name.awk', audit_type_list_txt],
+ output : 'audit_type-to-name.h',
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+journal_client_sources += [audit_type_to_name]
+
+libjournal_client = static_library(
+ 'journal-client',
+ journal_client_sources,
+ include_directories : includes,
+ c_args : ['-fvisibility=default'])
+
+############################################################
+
+libjournal_core_sources = files('''
+ journald-audit.c
+ journald-audit.h
+ journald-console.c
+ journald-console.h
+ journald-context.c
+ journald-context.h
+ journald-kmsg.c
+ journald-kmsg.h
+ journald-native.c
+ journald-native.h
+ journald-rate-limit.c
+ journald-rate-limit.h
+ journald-server.c
+ journald-server.h
+ journald-stream.c
+ journald-stream.h
+ journald-syslog.c
+ journald-syslog.h
+ journald-wall.c
+ journald-wall.h
+ journal-internal.h
+'''.split())
+
+systemd_journald_sources = files('''
+ journald.c
+ journald-server.h
+'''.split())
+
+journald_gperf_c = custom_target(
+ 'journald-gperf.c',
+ input : 'journald-gperf.gperf',
+ output : 'journald-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+systemd_cat_sources = files('cat.c')
+
+journalctl_sources = files('journalctl.c')
+
+if conf.get('HAVE_QRENCODE') == 1
+ journalctl_sources += files('journal-qrcode.c',
+ 'journal-qrcode.h')
+endif
+
+install_data('journald.conf',
+ install_dir : pkgsysconfdir)
+
+meson.add_install_script(
+ 'sh', '-c',
+ mkdir_p.format('/var/log/journal'))
+meson.add_install_script(
+ 'sh', '-c',
+ '''chown 0:0 $DESTDIR/var/log/journal &&
+ chmod 755 $DESTDIR/var/log/journal || :''')
+if get_option('adm-group')
+ meson.add_install_script(
+ 'sh', '-c',
+ 'setfacl -nm g:adm:rx,d:g:adm:rx $DESTDIR/var/log/journal || :')
+endif
+if get_option('wheel-group')
+ meson.add_install_script(
+ 'sh', '-c',
+ 'setfacl -nm g:wheel:rx,d:g:wheel:rx $DESTDIR/var/log/journal || :')
+endif
diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c
new file mode 100644
index 0000000..0dc453e
--- /dev/null
+++ b/src/journal/mmap-cache.c
@@ -0,0 +1,668 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "list.h"
+#include "log.h"
+#include "macro.h"
+#include "mmap-cache.h"
+#include "sigbus.h"
+#include "util.h"
+
+typedef struct Window Window;
+typedef struct Context Context;
+
+struct Window {
+ MMapCache *cache;
+
+ bool invalidated:1;
+ bool keep_always:1;
+ bool in_unused:1;
+
+ int prot;
+ void *ptr;
+ uint64_t offset;
+ size_t size;
+
+ MMapFileDescriptor *fd;
+
+ LIST_FIELDS(Window, by_fd);
+ LIST_FIELDS(Window, unused);
+
+ LIST_HEAD(Context, contexts);
+};
+
+struct Context {
+ MMapCache *cache;
+ unsigned id;
+ Window *window;
+
+ LIST_FIELDS(Context, by_window);
+};
+
+struct MMapFileDescriptor {
+ MMapCache *cache;
+ int fd;
+ bool sigbus;
+ LIST_HEAD(Window, windows);
+};
+
+struct MMapCache {
+ unsigned n_ref;
+ unsigned n_windows;
+
+ unsigned n_hit, n_missed;
+
+ Hashmap *fds;
+ Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
+
+ LIST_HEAD(Window, unused);
+ Window *last_unused;
+};
+
+#define WINDOWS_MIN 64
+
+#if ENABLE_DEBUG_MMAP_CACHE
+/* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
+# define WINDOW_SIZE (page_size())
+#else
+# define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
+#endif
+
+MMapCache* mmap_cache_new(void) {
+ MMapCache *m;
+
+ m = new0(MMapCache, 1);
+ if (!m)
+ return NULL;
+
+ m->n_ref = 1;
+ return m;
+}
+
+static void window_unlink(Window *w) {
+ Context *c;
+
+ assert(w);
+
+ if (w->ptr)
+ munmap(w->ptr, w->size);
+
+ if (w->fd)
+ LIST_REMOVE(by_fd, w->fd->windows, w);
+
+ if (w->in_unused) {
+ if (w->cache->last_unused == w)
+ w->cache->last_unused = w->unused_prev;
+
+ LIST_REMOVE(unused, w->cache->unused, w);
+ }
+
+ LIST_FOREACH(by_window, c, w->contexts) {
+ assert(c->window == w);
+ c->window = NULL;
+ }
+}
+
+static void window_invalidate(Window *w) {
+ assert(w);
+
+ if (w->invalidated)
+ return;
+
+ /* Replace the window with anonymous pages. This is useful
+ * when we hit a SIGBUS and want to make sure the file cannot
+ * trigger any further SIGBUS, possibly overrunning the sigbus
+ * queue. */
+
+ assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
+ w->invalidated = true;
+}
+
+static void window_free(Window *w) {
+ assert(w);
+
+ window_unlink(w);
+ w->cache->n_windows--;
+ free(w);
+}
+
+_pure_ static bool window_matches(Window *w, int prot, uint64_t offset, size_t size) {
+ assert(w);
+ assert(size > 0);
+
+ return
+ prot == w->prot &&
+ offset >= w->offset &&
+ offset + size <= w->offset + w->size;
+}
+
+_pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, int prot, uint64_t offset, size_t size) {
+ assert(w);
+ assert(f);
+
+ return
+ w->fd &&
+ f->fd == w->fd->fd &&
+ window_matches(w, prot, offset, size);
+}
+
+static Window *window_add(MMapCache *m, MMapFileDescriptor *f, int prot, bool keep_always, uint64_t offset, size_t size, void *ptr) {
+ Window *w;
+
+ assert(m);
+ assert(f);
+
+ if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
+
+ /* Allocate a new window */
+ w = new0(Window, 1);
+ if (!w)
+ return NULL;
+ m->n_windows++;
+ } else {
+
+ /* Reuse an existing one */
+ w = m->last_unused;
+ window_unlink(w);
+ zero(*w);
+ }
+
+ w->cache = m;
+ w->fd = f;
+ w->prot = prot;
+ w->keep_always = keep_always;
+ w->offset = offset;
+ w->size = size;
+ w->ptr = ptr;
+
+ LIST_PREPEND(by_fd, f->windows, w);
+
+ return w;
+}
+
+static void context_detach_window(Context *c) {
+ Window *w;
+
+ assert(c);
+
+ if (!c->window)
+ return;
+
+ w = TAKE_PTR(c->window);
+ LIST_REMOVE(by_window, w->contexts, c);
+
+ if (!w->contexts && !w->keep_always) {
+ /* Not used anymore? */
+#if ENABLE_DEBUG_MMAP_CACHE
+ /* Unmap unused windows immediately to expose use-after-unmap
+ * by SIGSEGV. */
+ window_free(w);
+#else
+ LIST_PREPEND(unused, c->cache->unused, w);
+ if (!c->cache->last_unused)
+ c->cache->last_unused = w;
+
+ w->in_unused = true;
+#endif
+ }
+}
+
+static void context_attach_window(Context *c, Window *w) {
+ assert(c);
+ assert(w);
+
+ if (c->window == w)
+ return;
+
+ context_detach_window(c);
+
+ if (w->in_unused) {
+ /* Used again? */
+ LIST_REMOVE(unused, c->cache->unused, w);
+ if (c->cache->last_unused == w)
+ c->cache->last_unused = w->unused_prev;
+
+ w->in_unused = false;
+ }
+
+ c->window = w;
+ LIST_PREPEND(by_window, w->contexts, c);
+}
+
+static Context *context_add(MMapCache *m, unsigned id) {
+ Context *c;
+
+ assert(m);
+
+ c = m->contexts[id];
+ if (c)
+ return c;
+
+ c = new0(Context, 1);
+ if (!c)
+ return NULL;
+
+ c->cache = m;
+ c->id = id;
+
+ assert(!m->contexts[id]);
+ m->contexts[id] = c;
+
+ return c;
+}
+
+static void context_free(Context *c) {
+ assert(c);
+
+ context_detach_window(c);
+
+ if (c->cache) {
+ assert(c->cache->contexts[c->id] == c);
+ c->cache->contexts[c->id] = NULL;
+ }
+
+ free(c);
+}
+
+static MMapCache *mmap_cache_free(MMapCache *m) {
+ int i;
+
+ assert(m);
+
+ for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
+ if (m->contexts[i])
+ context_free(m->contexts[i]);
+
+ hashmap_free(m->fds);
+
+ while (m->unused)
+ window_free(m->unused);
+
+ return mfree(m);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache, mmap_cache, mmap_cache_free);
+
+static int make_room(MMapCache *m) {
+ assert(m);
+
+ if (!m->last_unused)
+ return 0;
+
+ window_free(m->last_unused);
+ return 1;
+}
+
+static int try_context(
+ MMapCache *m,
+ MMapFileDescriptor *f,
+ int prot,
+ unsigned context,
+ bool keep_always,
+ uint64_t offset,
+ size_t size,
+ void **ret,
+ size_t *ret_size) {
+
+ Context *c;
+
+ assert(m);
+ assert(m->n_ref > 0);
+ assert(f);
+ assert(size > 0);
+ assert(ret);
+
+ c = m->contexts[context];
+ if (!c)
+ return 0;
+
+ assert(c->id == context);
+
+ if (!c->window)
+ return 0;
+
+ if (!window_matches_fd(c->window, f, prot, offset, size)) {
+
+ /* Drop the reference to the window, since it's unnecessary now */
+ context_detach_window(c);
+ return 0;
+ }
+
+ if (c->window->fd->sigbus)
+ return -EIO;
+
+ c->window->keep_always = c->window->keep_always || keep_always;
+
+ *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
+ if (ret_size)
+ *ret_size = c->window->size - (offset - c->window->offset);
+
+ return 1;
+}
+
+static int find_mmap(
+ MMapCache *m,
+ MMapFileDescriptor *f,
+ int prot,
+ unsigned context,
+ bool keep_always,
+ uint64_t offset,
+ size_t size,
+ void **ret,
+ size_t *ret_size) {
+
+ Window *w;
+ Context *c;
+
+ assert(m);
+ assert(m->n_ref > 0);
+ assert(f);
+ assert(size > 0);
+
+ if (f->sigbus)
+ return -EIO;
+
+ LIST_FOREACH(by_fd, w, f->windows)
+ if (window_matches(w, prot, offset, size))
+ break;
+
+ if (!w)
+ return 0;
+
+ c = context_add(m, context);
+ if (!c)
+ return -ENOMEM;
+
+ context_attach_window(c, w);
+ w->keep_always = w->keep_always || keep_always;
+
+ *ret = (uint8_t*) w->ptr + (offset - w->offset);
+ if (ret_size)
+ *ret_size = w->size - (offset - w->offset);
+
+ return 1;
+}
+
+static int mmap_try_harder(MMapCache *m, void *addr, MMapFileDescriptor *f, int prot, int flags, uint64_t offset, size_t size, void **res) {
+ void *ptr;
+
+ assert(m);
+ assert(f);
+ assert(res);
+
+ for (;;) {
+ int r;
+
+ ptr = mmap(addr, size, prot, flags, f->fd, offset);
+ if (ptr != MAP_FAILED)
+ break;
+ if (errno != ENOMEM)
+ return negative_errno();
+
+ r = make_room(m);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOMEM;
+ }
+
+ *res = ptr;
+ return 0;
+}
+
+static int add_mmap(
+ MMapCache *m,
+ MMapFileDescriptor *f,
+ int prot,
+ unsigned context,
+ bool keep_always,
+ uint64_t offset,
+ size_t size,
+ struct stat *st,
+ void **ret,
+ size_t *ret_size) {
+
+ uint64_t woffset, wsize;
+ Context *c;
+ Window *w;
+ void *d;
+ int r;
+
+ assert(m);
+ assert(m->n_ref > 0);
+ assert(f);
+ assert(size > 0);
+ assert(ret);
+
+ woffset = offset & ~((uint64_t) page_size() - 1ULL);
+ wsize = size + (offset - woffset);
+ wsize = PAGE_ALIGN(wsize);
+
+ if (wsize < WINDOW_SIZE) {
+ uint64_t delta;
+
+ delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
+
+ if (delta > offset)
+ woffset = 0;
+ else
+ woffset -= delta;
+
+ wsize = WINDOW_SIZE;
+ }
+
+ if (st) {
+ /* Memory maps that are larger then the files
+ underneath have undefined behavior. Hence, clamp
+ things to the file size if we know it */
+
+ if (woffset >= (uint64_t) st->st_size)
+ return -EADDRNOTAVAIL;
+
+ if (woffset + wsize > (uint64_t) st->st_size)
+ wsize = PAGE_ALIGN(st->st_size - woffset);
+ }
+
+ r = mmap_try_harder(m, NULL, f, prot, MAP_SHARED, woffset, wsize, &d);
+ if (r < 0)
+ return r;
+
+ c = context_add(m, context);
+ if (!c)
+ goto outofmem;
+
+ w = window_add(m, f, prot, keep_always, woffset, wsize, d);
+ if (!w)
+ goto outofmem;
+
+ context_attach_window(c, w);
+
+ *ret = (uint8_t*) w->ptr + (offset - w->offset);
+ if (ret_size)
+ *ret_size = w->size - (offset - w->offset);
+
+ return 1;
+
+outofmem:
+ (void) munmap(d, wsize);
+ return -ENOMEM;
+}
+
+int mmap_cache_get(
+ MMapCache *m,
+ MMapFileDescriptor *f,
+ int prot,
+ unsigned context,
+ bool keep_always,
+ uint64_t offset,
+ size_t size,
+ struct stat *st,
+ void **ret,
+ size_t *ret_size) {
+
+ int r;
+
+ assert(m);
+ assert(m->n_ref > 0);
+ assert(f);
+ assert(size > 0);
+ assert(ret);
+ assert(context < MMAP_CACHE_MAX_CONTEXTS);
+
+ /* Check whether the current context is the right one already */
+ r = try_context(m, f, prot, context, keep_always, offset, size, ret, ret_size);
+ if (r != 0) {
+ m->n_hit++;
+ return r;
+ }
+
+ /* Search for a matching mmap */
+ r = find_mmap(m, f, prot, context, keep_always, offset, size, ret, ret_size);
+ if (r != 0) {
+ m->n_hit++;
+ return r;
+ }
+
+ m->n_missed++;
+
+ /* Create a new mmap */
+ return add_mmap(m, f, prot, context, keep_always, offset, size, st, ret, ret_size);
+}
+
+unsigned mmap_cache_get_hit(MMapCache *m) {
+ assert(m);
+
+ return m->n_hit;
+}
+
+unsigned mmap_cache_get_missed(MMapCache *m) {
+ assert(m);
+
+ return m->n_missed;
+}
+
+static void mmap_cache_process_sigbus(MMapCache *m) {
+ bool found = false;
+ MMapFileDescriptor *f;
+ Iterator i;
+ int r;
+
+ assert(m);
+
+ /* Iterate through all triggered pages and mark their files as
+ * invalidated */
+ for (;;) {
+ bool ours;
+ void *addr;
+
+ r = sigbus_pop(&addr);
+ if (_likely_(r == 0))
+ break;
+ if (r < 0) {
+ log_error_errno(r, "SIGBUS handling failed: %m");
+ abort();
+ }
+
+ ours = false;
+ HASHMAP_FOREACH(f, m->fds, i) {
+ Window *w;
+
+ LIST_FOREACH(by_fd, w, f->windows) {
+ if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
+ (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
+ found = ours = f->sigbus = true;
+ break;
+ }
+ }
+
+ if (ours)
+ break;
+ }
+
+ /* Didn't find a matching window, give up */
+ if (!ours) {
+ log_error("Unknown SIGBUS page, aborting.");
+ abort();
+ }
+ }
+
+ /* The list of triggered pages is now empty. Now, let's remap
+ * all windows of the triggered file to anonymous maps, so
+ * that no page of the file in question is triggered again, so
+ * that we can be sure not to hit the queue size limit. */
+ if (_likely_(!found))
+ return;
+
+ HASHMAP_FOREACH(f, m->fds, i) {
+ Window *w;
+
+ if (!f->sigbus)
+ continue;
+
+ LIST_FOREACH(by_fd, w, f->windows)
+ window_invalidate(w);
+ }
+}
+
+bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f) {
+ assert(m);
+ assert(f);
+
+ mmap_cache_process_sigbus(m);
+
+ return f->sigbus;
+}
+
+MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd) {
+ MMapFileDescriptor *f;
+ int r;
+
+ assert(m);
+ assert(fd >= 0);
+
+ f = hashmap_get(m->fds, FD_TO_PTR(fd));
+ if (f)
+ return f;
+
+ r = hashmap_ensure_allocated(&m->fds, NULL);
+ if (r < 0)
+ return NULL;
+
+ f = new0(MMapFileDescriptor, 1);
+ if (!f)
+ return NULL;
+
+ f->cache = m;
+ f->fd = fd;
+
+ r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
+ if (r < 0)
+ return mfree(f);
+
+ return f;
+}
+
+void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f) {
+ assert(m);
+ assert(f);
+
+ /* Make sure that any queued SIGBUS are first dispatched, so
+ * that we don't end up with a SIGBUS entry we cannot relate
+ * to any existing memory map */
+
+ mmap_cache_process_sigbus(m);
+
+ while (f->windows)
+ window_free(f->windows);
+
+ if (f->cache)
+ assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
+
+ free(f);
+}
diff --git a/src/journal/mmap-cache.h b/src/journal/mmap-cache.h
new file mode 100644
index 0000000..bf70d32
--- /dev/null
+++ b/src/journal/mmap-cache.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/stat.h>
+
+/* One context per object type, plus one of the header, plus one "additional" one */
+#define MMAP_CACHE_MAX_CONTEXTS 9
+
+typedef struct MMapCache MMapCache;
+typedef struct MMapFileDescriptor MMapFileDescriptor;
+
+MMapCache* mmap_cache_new(void);
+MMapCache* mmap_cache_ref(MMapCache *m);
+MMapCache* mmap_cache_unref(MMapCache *m);
+
+int mmap_cache_get(
+ MMapCache *m,
+ MMapFileDescriptor *f,
+ int prot,
+ unsigned context,
+ bool keep_always,
+ uint64_t offset,
+ size_t size,
+ struct stat *st,
+ void **ret,
+ size_t *ret_size);
+MMapFileDescriptor * mmap_cache_add_fd(MMapCache *m, int fd);
+void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f);
+
+unsigned mmap_cache_get_hit(MMapCache *m);
+unsigned mmap_cache_get_missed(MMapCache *m);
+
+bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f);
diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
new file mode 100644
index 0000000..0f99628
--- /dev/null
+++ b/src/journal/sd-journal.c
@@ -0,0 +1,3137 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/magic.h>
+#include <poll.h>
+#include <stddef.h>
+#include <sys/inotify.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "catalog.h"
+#include "compress.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-internal.h"
+#include "list.h"
+#include "lookup3.h"
+#include "missing.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "replace-var.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+#define JOURNAL_FILES_MAX 7168
+
+#define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
+
+#define REPLACE_VAR_MAX 256
+
+#define DEFAULT_DATA_THRESHOLD (64*1024)
+
+static void remove_file_real(sd_journal *j, JournalFile *f);
+
+static bool journal_pid_changed(sd_journal *j) {
+ assert(j);
+
+ /* We don't support people creating a journal object and
+ * keeping it around over a fork(). Let's complain. */
+
+ return j->original_pid != getpid_cached();
+}
+
+static int journal_put_error(sd_journal *j, int r, const char *path) {
+ char *copy;
+ int k;
+
+ /* Memorize an error we encountered, and store which
+ * file/directory it was generated from. Note that we store
+ * only *one* path per error code, as the error code is the
+ * key into the hashmap, and the path is the value. This means
+ * we keep track only of all error kinds, but not of all error
+ * locations. This has the benefit that the hashmap cannot
+ * grow beyond bounds.
+ *
+ * We return an error here only if we didn't manage to
+ * memorize the real error. */
+
+ if (r >= 0)
+ return r;
+
+ k = hashmap_ensure_allocated(&j->errors, NULL);
+ if (k < 0)
+ return k;
+
+ if (path) {
+ copy = strdup(path);
+ if (!copy)
+ return -ENOMEM;
+ } else
+ copy = NULL;
+
+ k = hashmap_put(j->errors, INT_TO_PTR(r), copy);
+ if (k < 0) {
+ free(copy);
+
+ if (k == -EEXIST)
+ return 0;
+
+ return k;
+ }
+
+ return 0;
+}
+
+static void detach_location(sd_journal *j) {
+ Iterator i;
+ JournalFile *f;
+
+ assert(j);
+
+ j->current_file = NULL;
+ j->current_field = 0;
+
+ ORDERED_HASHMAP_FOREACH(f, j->files, i)
+ journal_file_reset_location(f);
+}
+
+static void reset_location(sd_journal *j) {
+ assert(j);
+
+ detach_location(j);
+ zero(j->current_location);
+}
+
+static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
+ assert(l);
+ assert(IN_SET(type, LOCATION_DISCRETE, LOCATION_SEEK));
+ assert(f);
+ assert(o->object.type == OBJECT_ENTRY);
+
+ l->type = type;
+ l->seqnum = le64toh(o->entry.seqnum);
+ l->seqnum_id = f->header->seqnum_id;
+ l->realtime = le64toh(o->entry.realtime);
+ l->monotonic = le64toh(o->entry.monotonic);
+ l->boot_id = o->entry.boot_id;
+ l->xor_hash = le64toh(o->entry.xor_hash);
+
+ l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
+}
+
+static void set_location(sd_journal *j, JournalFile *f, Object *o) {
+ assert(j);
+ assert(f);
+ assert(o);
+
+ init_location(&j->current_location, LOCATION_DISCRETE, f, o);
+
+ j->current_file = f;
+ j->current_field = 0;
+
+ /* Let f know its candidate entry was picked. */
+ assert(f->location_type == LOCATION_SEEK);
+ f->location_type = LOCATION_DISCRETE;
+}
+
+static int match_is_valid(const void *data, size_t size) {
+ const char *b, *p;
+
+ assert(data);
+
+ if (size < 2)
+ return false;
+
+ if (startswith(data, "__"))
+ return false;
+
+ b = data;
+ for (p = b; p < b + size; p++) {
+
+ if (*p == '=')
+ return p > b;
+
+ if (*p == '_')
+ continue;
+
+ if (*p >= 'A' && *p <= 'Z')
+ continue;
+
+ if (*p >= '0' && *p <= '9')
+ continue;
+
+ return false;
+ }
+
+ return false;
+}
+
+static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
+ const uint8_t *a = _a, *b = _b;
+ size_t j;
+
+ for (j = 0; j < s && j < t; j++) {
+
+ if (a[j] != b[j])
+ return false;
+
+ if (a[j] == '=')
+ return true;
+ }
+
+ assert_not_reached("\"=\" not found");
+}
+
+static Match *match_new(Match *p, MatchType t) {
+ Match *m;
+
+ m = new0(Match, 1);
+ if (!m)
+ return NULL;
+
+ m->type = t;
+
+ if (p) {
+ m->parent = p;
+ LIST_PREPEND(matches, p->matches, m);
+ }
+
+ return m;
+}
+
+static void match_free(Match *m) {
+ assert(m);
+
+ while (m->matches)
+ match_free(m->matches);
+
+ if (m->parent)
+ LIST_REMOVE(matches, m->parent->matches, m);
+
+ free(m->data);
+ free(m);
+}
+
+static void match_free_if_empty(Match *m) {
+ if (!m || m->matches)
+ return;
+
+ match_free(m);
+}
+
+_public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
+ Match *l3, *l4, *add_here = NULL, *m;
+ le64_t le_hash;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(data, -EINVAL);
+
+ if (size == 0)
+ size = strlen(data);
+
+ assert_return(match_is_valid(data, size), -EINVAL);
+
+ /* level 0: AND term
+ * level 1: OR terms
+ * level 2: AND terms
+ * level 3: OR terms
+ * level 4: concrete matches */
+
+ if (!j->level0) {
+ j->level0 = match_new(NULL, MATCH_AND_TERM);
+ if (!j->level0)
+ return -ENOMEM;
+ }
+
+ if (!j->level1) {
+ j->level1 = match_new(j->level0, MATCH_OR_TERM);
+ if (!j->level1)
+ return -ENOMEM;
+ }
+
+ if (!j->level2) {
+ j->level2 = match_new(j->level1, MATCH_AND_TERM);
+ if (!j->level2)
+ return -ENOMEM;
+ }
+
+ assert(j->level0->type == MATCH_AND_TERM);
+ assert(j->level1->type == MATCH_OR_TERM);
+ assert(j->level2->type == MATCH_AND_TERM);
+
+ le_hash = htole64(hash64(data, size));
+
+ LIST_FOREACH(matches, l3, j->level2->matches) {
+ assert(l3->type == MATCH_OR_TERM);
+
+ LIST_FOREACH(matches, l4, l3->matches) {
+ assert(l4->type == MATCH_DISCRETE);
+
+ /* Exactly the same match already? Then ignore
+ * this addition */
+ if (l4->le_hash == le_hash &&
+ l4->size == size &&
+ memcmp(l4->data, data, size) == 0)
+ return 0;
+
+ /* Same field? Then let's add this to this OR term */
+ if (same_field(data, size, l4->data, l4->size)) {
+ add_here = l3;
+ break;
+ }
+ }
+
+ if (add_here)
+ break;
+ }
+
+ if (!add_here) {
+ add_here = match_new(j->level2, MATCH_OR_TERM);
+ if (!add_here)
+ goto fail;
+ }
+
+ m = match_new(add_here, MATCH_DISCRETE);
+ if (!m)
+ goto fail;
+
+ m->le_hash = le_hash;
+ m->size = size;
+ m->data = memdup(data, size);
+ if (!m->data)
+ goto fail;
+
+ detach_location(j);
+
+ return 0;
+
+fail:
+ match_free_if_empty(add_here);
+ match_free_if_empty(j->level2);
+ match_free_if_empty(j->level1);
+ match_free_if_empty(j->level0);
+
+ return -ENOMEM;
+}
+
+_public_ int sd_journal_add_conjunction(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ if (!j->level0)
+ return 0;
+
+ if (!j->level1)
+ return 0;
+
+ if (!j->level1->matches)
+ return 0;
+
+ j->level1 = NULL;
+ j->level2 = NULL;
+
+ return 0;
+}
+
+_public_ int sd_journal_add_disjunction(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ if (!j->level0)
+ return 0;
+
+ if (!j->level1)
+ return 0;
+
+ if (!j->level2)
+ return 0;
+
+ if (!j->level2->matches)
+ return 0;
+
+ j->level2 = NULL;
+ return 0;
+}
+
+static char *match_make_string(Match *m) {
+ char *p = NULL, *r;
+ Match *i;
+ bool enclose = false;
+
+ if (!m)
+ return strdup("none");
+
+ if (m->type == MATCH_DISCRETE)
+ return cescape_length(m->data, m->size);
+
+ LIST_FOREACH(matches, i, m->matches) {
+ char *t, *k;
+
+ t = match_make_string(i);
+ if (!t)
+ return mfree(p);
+
+ if (p) {
+ k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t);
+ free(p);
+ free(t);
+
+ if (!k)
+ return NULL;
+
+ p = k;
+
+ enclose = true;
+ } else
+ p = t;
+ }
+
+ if (enclose) {
+ r = strjoin("(", p, ")");
+ free(p);
+ return r;
+ }
+
+ return p;
+}
+
+char *journal_make_match_string(sd_journal *j) {
+ assert(j);
+
+ return match_make_string(j->level0);
+}
+
+_public_ void sd_journal_flush_matches(sd_journal *j) {
+ if (!j)
+ return;
+
+ if (j->level0)
+ match_free(j->level0);
+
+ j->level0 = j->level1 = j->level2 = NULL;
+
+ detach_location(j);
+}
+
+_pure_ static int compare_with_location(JournalFile *f, Location *l) {
+ int r;
+
+ assert(f);
+ assert(l);
+ assert(f->location_type == LOCATION_SEEK);
+ assert(IN_SET(l->type, LOCATION_DISCRETE, LOCATION_SEEK));
+
+ if (l->monotonic_set &&
+ sd_id128_equal(f->current_boot_id, l->boot_id) &&
+ l->realtime_set &&
+ f->current_realtime == l->realtime &&
+ l->xor_hash_set &&
+ f->current_xor_hash == l->xor_hash)
+ return 0;
+
+ if (l->seqnum_set &&
+ sd_id128_equal(f->header->seqnum_id, l->seqnum_id)) {
+
+ r = CMP(f->current_seqnum, l->seqnum);
+ if (r != 0)
+ return r;
+ }
+
+ if (l->monotonic_set &&
+ sd_id128_equal(f->current_boot_id, l->boot_id)) {
+
+ r = CMP(f->current_monotonic, l->monotonic);
+ if (r != 0)
+ return r;
+ }
+
+ if (l->realtime_set) {
+
+ r = CMP(f->current_realtime, l->realtime);
+ if (r != 0)
+ return r;
+ }
+
+ if (l->xor_hash_set) {
+
+ r = CMP(f->current_xor_hash, l->xor_hash);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int next_for_match(
+ sd_journal *j,
+ Match *m,
+ JournalFile *f,
+ uint64_t after_offset,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ int r;
+ uint64_t np = 0;
+ Object *n;
+
+ assert(j);
+ assert(m);
+ assert(f);
+
+ if (m->type == MATCH_DISCRETE) {
+ uint64_t dp;
+
+ r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
+ if (r <= 0)
+ return r;
+
+ return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
+
+ } else if (m->type == MATCH_OR_TERM) {
+ Match *i;
+
+ /* Find the earliest match beyond after_offset */
+
+ LIST_FOREACH(matches, i, m->matches) {
+ uint64_t cp;
+
+ r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
+ if (r < 0)
+ return r;
+ else if (r > 0) {
+ if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
+ np = cp;
+ }
+ }
+
+ if (np == 0)
+ return 0;
+
+ } else if (m->type == MATCH_AND_TERM) {
+ Match *i, *last_moved;
+
+ /* Always jump to the next matching entry and repeat
+ * this until we find an offset that matches for all
+ * matches. */
+
+ if (!m->matches)
+ return 0;
+
+ r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
+ if (r <= 0)
+ return r;
+
+ assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
+ last_moved = m->matches;
+
+ LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
+ uint64_t cp;
+
+ r = next_for_match(j, i, f, np, direction, NULL, &cp);
+ if (r <= 0)
+ return r;
+
+ assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
+ if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
+ np = cp;
+ last_moved = i;
+ }
+ }
+ }
+
+ assert(np > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = n;
+ if (offset)
+ *offset = np;
+
+ return 1;
+}
+
+static int find_location_for_match(
+ sd_journal *j,
+ Match *m,
+ JournalFile *f,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ int r;
+
+ assert(j);
+ assert(m);
+ assert(f);
+
+ if (m->type == MATCH_DISCRETE) {
+ uint64_t dp;
+
+ r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
+ if (r <= 0)
+ return r;
+
+ /* FIXME: missing: find by monotonic */
+
+ if (j->current_location.type == LOCATION_HEAD)
+ return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
+ if (j->current_location.type == LOCATION_TAIL)
+ return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
+ if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
+ return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
+ if (j->current_location.monotonic_set) {
+ r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
+ if (r != -ENOENT)
+ return r;
+ }
+ if (j->current_location.realtime_set)
+ return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
+
+ return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
+
+ } else if (m->type == MATCH_OR_TERM) {
+ uint64_t np = 0;
+ Object *n;
+ Match *i;
+
+ /* Find the earliest match */
+
+ LIST_FOREACH(matches, i, m->matches) {
+ uint64_t cp;
+
+ r = find_location_for_match(j, i, f, direction, NULL, &cp);
+ if (r < 0)
+ return r;
+ else if (r > 0) {
+ if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
+ np = cp;
+ }
+ }
+
+ if (np == 0)
+ return 0;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = n;
+ if (offset)
+ *offset = np;
+
+ return 1;
+
+ } else {
+ Match *i;
+ uint64_t np = 0;
+
+ assert(m->type == MATCH_AND_TERM);
+
+ /* First jump to the last match, and then find the
+ * next one where all matches match */
+
+ if (!m->matches)
+ return 0;
+
+ LIST_FOREACH(matches, i, m->matches) {
+ uint64_t cp;
+
+ r = find_location_for_match(j, i, f, direction, NULL, &cp);
+ if (r <= 0)
+ return r;
+
+ if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
+ np = cp;
+ }
+
+ return next_for_match(j, m, f, np, direction, ret, offset);
+ }
+}
+
+static int find_location_with_matches(
+ sd_journal *j,
+ JournalFile *f,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ int r;
+
+ assert(j);
+ assert(f);
+ assert(ret);
+ assert(offset);
+
+ if (!j->level0) {
+ /* No matches is simple */
+
+ if (j->current_location.type == LOCATION_HEAD)
+ return journal_file_next_entry(f, 0, DIRECTION_DOWN, ret, offset);
+ if (j->current_location.type == LOCATION_TAIL)
+ return journal_file_next_entry(f, 0, DIRECTION_UP, ret, offset);
+ if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
+ return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
+ if (j->current_location.monotonic_set) {
+ r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
+ if (r != -ENOENT)
+ return r;
+ }
+ if (j->current_location.realtime_set)
+ return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
+
+ return journal_file_next_entry(f, 0, direction, ret, offset);
+ } else
+ return find_location_for_match(j, j->level0, f, direction, ret, offset);
+}
+
+static int next_with_matches(
+ sd_journal *j,
+ JournalFile *f,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ assert(j);
+ assert(f);
+ assert(ret);
+ assert(offset);
+
+ /* No matches is easy. We simple advance the file
+ * pointer by one. */
+ if (!j->level0)
+ return journal_file_next_entry(f, f->current_offset, direction, ret, offset);
+
+ /* If we have a match then we look for the next matching entry
+ * with an offset at least one step larger */
+ return next_for_match(j, j->level0, f,
+ direction == DIRECTION_DOWN ? f->current_offset + 1
+ : f->current_offset - 1,
+ direction, ret, offset);
+}
+
+static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction) {
+ Object *c;
+ uint64_t cp, n_entries;
+ int r;
+
+ assert(j);
+ assert(f);
+
+ n_entries = le64toh(f->header->n_entries);
+
+ /* If we hit EOF before, we don't need to look into this file again
+ * unless direction changed or new entries appeared. */
+ if (f->last_direction == direction && f->location_type == LOCATION_TAIL &&
+ n_entries == f->last_n_entries)
+ return 0;
+
+ f->last_n_entries = n_entries;
+
+ if (f->last_direction == direction && f->current_offset > 0) {
+ /* LOCATION_SEEK here means we did the work in a previous
+ * iteration and the current location already points to a
+ * candidate entry. */
+ if (f->location_type != LOCATION_SEEK) {
+ r = next_with_matches(j, f, direction, &c, &cp);
+ if (r <= 0)
+ return r;
+
+ journal_file_save_location(f, c, cp);
+ }
+ } else {
+ f->last_direction = direction;
+
+ r = find_location_with_matches(j, f, direction, &c, &cp);
+ if (r <= 0)
+ return r;
+
+ journal_file_save_location(f, c, cp);
+ }
+
+ /* OK, we found the spot, now let's advance until an entry
+ * that is actually different from what we were previously
+ * looking at. This is necessary to handle entries which exist
+ * in two (or more) journal files, and which shall all be
+ * suppressed but one. */
+
+ for (;;) {
+ bool found;
+
+ if (j->current_location.type == LOCATION_DISCRETE) {
+ int k;
+
+ k = compare_with_location(f, &j->current_location);
+
+ found = direction == DIRECTION_DOWN ? k > 0 : k < 0;
+ } else
+ found = true;
+
+ if (found)
+ return 1;
+
+ r = next_with_matches(j, f, direction, &c, &cp);
+ if (r <= 0)
+ return r;
+
+ journal_file_save_location(f, c, cp);
+ }
+}
+
+static int real_journal_next(sd_journal *j, direction_t direction) {
+ JournalFile *new_file = NULL;
+ unsigned i, n_files;
+ const void **files;
+ Object *o;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ r = iterated_cache_get(j->files_cache, NULL, &files, &n_files);
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n_files; i++) {
+ JournalFile *f = (JournalFile *)files[i];
+ bool found;
+
+ r = next_beyond_location(j, f, direction);
+ if (r < 0) {
+ log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path);
+ remove_file_real(j, f);
+ continue;
+ } else if (r == 0) {
+ f->location_type = LOCATION_TAIL;
+ continue;
+ }
+
+ if (!new_file)
+ found = true;
+ else {
+ int k;
+
+ k = journal_file_compare_locations(f, new_file);
+
+ found = direction == DIRECTION_DOWN ? k < 0 : k > 0;
+ }
+
+ if (found)
+ new_file = f;
+ }
+
+ if (!new_file)
+ return 0;
+
+ r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_file->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ set_location(j, new_file, o);
+
+ return 1;
+}
+
+_public_ int sd_journal_next(sd_journal *j) {
+ return real_journal_next(j, DIRECTION_DOWN);
+}
+
+_public_ int sd_journal_previous(sd_journal *j) {
+ return real_journal_next(j, DIRECTION_UP);
+}
+
+static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
+ int c = 0, r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ if (skip == 0) {
+ /* If this is not a discrete skip, then at least
+ * resolve the current location */
+ if (j->current_location.type != LOCATION_DISCRETE) {
+ r = real_journal_next(j, direction);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+ }
+
+ do {
+ r = real_journal_next(j, direction);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ return c;
+
+ skip--;
+ c++;
+ } while (skip > 0);
+
+ return c;
+}
+
+_public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
+ return real_journal_next_skip(j, DIRECTION_DOWN, skip);
+}
+
+_public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
+ return real_journal_next_skip(j, DIRECTION_UP, skip);
+}
+
+_public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
+ Object *o;
+ int r;
+ char bid[33], sid[33];
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(cursor, -EINVAL);
+
+ if (!j->current_file || j->current_file->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ sd_id128_to_string(j->current_file->header->seqnum_id, sid);
+ sd_id128_to_string(o->entry.boot_id, bid);
+
+ if (asprintf(cursor,
+ "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
+ sid, le64toh(o->entry.seqnum),
+ bid, le64toh(o->entry.monotonic),
+ le64toh(o->entry.realtime),
+ le64toh(o->entry.xor_hash)) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+_public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
+ const char *word, *state;
+ size_t l;
+ unsigned long long seqnum, monotonic, realtime, xor_hash;
+ bool
+ seqnum_id_set = false,
+ seqnum_set = false,
+ boot_id_set = false,
+ monotonic_set = false,
+ realtime_set = false,
+ xor_hash_set = false;
+ sd_id128_t seqnum_id, boot_id;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(!isempty(cursor), -EINVAL);
+
+ FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
+ char *item;
+ int k = 0;
+
+ if (l < 2 || word[1] != '=')
+ return -EINVAL;
+
+ item = strndup(word, l);
+ if (!item)
+ return -ENOMEM;
+
+ switch (word[0]) {
+
+ case 's':
+ seqnum_id_set = true;
+ k = sd_id128_from_string(item+2, &seqnum_id);
+ break;
+
+ case 'i':
+ seqnum_set = true;
+ if (sscanf(item+2, "%llx", &seqnum) != 1)
+ k = -EINVAL;
+ break;
+
+ case 'b':
+ boot_id_set = true;
+ k = sd_id128_from_string(item+2, &boot_id);
+ break;
+
+ case 'm':
+ monotonic_set = true;
+ if (sscanf(item+2, "%llx", &monotonic) != 1)
+ k = -EINVAL;
+ break;
+
+ case 't':
+ realtime_set = true;
+ if (sscanf(item+2, "%llx", &realtime) != 1)
+ k = -EINVAL;
+ break;
+
+ case 'x':
+ xor_hash_set = true;
+ if (sscanf(item+2, "%llx", &xor_hash) != 1)
+ k = -EINVAL;
+ break;
+ }
+
+ free(item);
+
+ if (k < 0)
+ return k;
+ }
+
+ if ((!seqnum_set || !seqnum_id_set) &&
+ (!monotonic_set || !boot_id_set) &&
+ !realtime_set)
+ return -EINVAL;
+
+ reset_location(j);
+
+ j->current_location.type = LOCATION_SEEK;
+
+ if (realtime_set) {
+ j->current_location.realtime = (uint64_t) realtime;
+ j->current_location.realtime_set = true;
+ }
+
+ if (seqnum_set && seqnum_id_set) {
+ j->current_location.seqnum = (uint64_t) seqnum;
+ j->current_location.seqnum_id = seqnum_id;
+ j->current_location.seqnum_set = true;
+ }
+
+ if (monotonic_set && boot_id_set) {
+ j->current_location.monotonic = (uint64_t) monotonic;
+ j->current_location.boot_id = boot_id;
+ j->current_location.monotonic_set = true;
+ }
+
+ if (xor_hash_set) {
+ j->current_location.xor_hash = (uint64_t) xor_hash;
+ j->current_location.xor_hash_set = true;
+ }
+
+ return 0;
+}
+
+_public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
+ int r;
+ Object *o;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(!isempty(cursor), -EINVAL);
+
+ if (!j->current_file || j->current_file->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *item = NULL;
+ unsigned long long ll;
+ sd_id128_t id;
+ int k = 0;
+
+ r = extract_first_word(&cursor, &item, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ break;
+
+ if (strlen(item) < 2 || item[1] != '=')
+ return -EINVAL;
+
+ switch (item[0]) {
+
+ case 's':
+ k = sd_id128_from_string(item+2, &id);
+ if (k < 0)
+ return k;
+ if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
+ return 0;
+ break;
+
+ case 'i':
+ if (sscanf(item+2, "%llx", &ll) != 1)
+ return -EINVAL;
+ if (ll != le64toh(o->entry.seqnum))
+ return 0;
+ break;
+
+ case 'b':
+ k = sd_id128_from_string(item+2, &id);
+ if (k < 0)
+ return k;
+ if (!sd_id128_equal(id, o->entry.boot_id))
+ return 0;
+ break;
+
+ case 'm':
+ if (sscanf(item+2, "%llx", &ll) != 1)
+ return -EINVAL;
+ if (ll != le64toh(o->entry.monotonic))
+ return 0;
+ break;
+
+ case 't':
+ if (sscanf(item+2, "%llx", &ll) != 1)
+ return -EINVAL;
+ if (ll != le64toh(o->entry.realtime))
+ return 0;
+ break;
+
+ case 'x':
+ if (sscanf(item+2, "%llx", &ll) != 1)
+ return -EINVAL;
+ if (ll != le64toh(o->entry.xor_hash))
+ return 0;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+_public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ reset_location(j);
+ j->current_location.type = LOCATION_SEEK;
+ j->current_location.boot_id = boot_id;
+ j->current_location.monotonic = usec;
+ j->current_location.monotonic_set = true;
+
+ return 0;
+}
+
+_public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ reset_location(j);
+ j->current_location.type = LOCATION_SEEK;
+ j->current_location.realtime = usec;
+ j->current_location.realtime_set = true;
+
+ return 0;
+}
+
+_public_ int sd_journal_seek_head(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ reset_location(j);
+ j->current_location.type = LOCATION_HEAD;
+
+ return 0;
+}
+
+_public_ int sd_journal_seek_tail(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ reset_location(j);
+ j->current_location.type = LOCATION_TAIL;
+
+ return 0;
+}
+
+static void check_network(sd_journal *j, int fd) {
+ assert(j);
+
+ if (j->on_network)
+ return;
+
+ j->on_network = fd_is_network_fs(fd);
+}
+
+static bool file_has_type_prefix(const char *prefix, const char *filename) {
+ const char *full, *tilded, *atted;
+
+ full = strjoina(prefix, ".journal");
+ tilded = strjoina(full, "~");
+ atted = strjoina(prefix, "@");
+
+ return STR_IN_SET(filename, full, tilded) ||
+ startswith(filename, atted);
+}
+
+static bool file_type_wanted(int flags, const char *filename) {
+ assert(filename);
+
+ if (!endswith(filename, ".journal") && !endswith(filename, ".journal~"))
+ return false;
+
+ /* no flags set → every type is OK */
+ if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
+ return true;
+
+ if (flags & SD_JOURNAL_SYSTEM && file_has_type_prefix("system", filename))
+ return true;
+
+ if (flags & SD_JOURNAL_CURRENT_USER) {
+ char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
+
+ xsprintf(prefix, "user-"UID_FMT, getuid());
+
+ if (file_has_type_prefix(prefix, filename))
+ return true;
+ }
+
+ return false;
+}
+
+static bool path_has_prefix(sd_journal *j, const char *path, const char *prefix) {
+ assert(j);
+ assert(path);
+ assert(prefix);
+
+ if (j->toplevel_fd >= 0)
+ return false;
+
+ return path_startswith(path, prefix);
+}
+
+static void track_file_disposition(sd_journal *j, JournalFile *f) {
+ assert(j);
+ assert(f);
+
+ if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run"))
+ j->has_runtime_files = true;
+ else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var"))
+ j->has_persistent_files = true;
+}
+
+static const char *skip_slash(const char *p) {
+
+ if (!p)
+ return NULL;
+
+ while (*p == '/')
+ p++;
+
+ return p;
+}
+
+static int add_any_file(
+ sd_journal *j,
+ int fd,
+ const char *path) {
+
+ bool close_fd = false;
+ JournalFile *f;
+ struct stat st;
+ int r, k;
+
+ assert(j);
+ assert(fd >= 0 || path);
+
+ if (fd < 0) {
+ if (j->toplevel_fd >= 0)
+ /* If there's a top-level fd defined make the path relative, explicitly, since otherwise
+ * openat() ignores the first argument. */
+
+ fd = openat(j->toplevel_fd, skip_slash(path), O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ else
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0) {
+ r = log_debug_errno(errno, "Failed to open journal file %s: %m", path);
+ goto finish;
+ }
+
+ close_fd = true;
+
+ r = fd_nonblock(fd, false);
+ if (r < 0) {
+ r = log_debug_errno(errno, "Failed to turn off O_NONBLOCK for %s: %m", path);
+ goto finish;
+ }
+ }
+
+ if (fstat(fd, &st) < 0) {
+ r = log_debug_errno(errno, "Failed to fstat file '%s': %m", path);
+ goto finish;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0) {
+ log_debug_errno(r, "Refusing to open '%s', as it is not a regular file.", path);
+ goto finish;
+ }
+
+ f = ordered_hashmap_get(j->files, path);
+ if (f) {
+ if (f->last_stat.st_dev == st.st_dev &&
+ f->last_stat.st_ino == st.st_ino) {
+
+ /* We already track this file, under the same path and with the same device/inode numbers, it's
+ * hence really the same. Mark this file as seen in this generation. This is used to GC old
+ * files in process_q_overflow() to detect journal files that are still there and discern them
+ * from those which are gone. */
+
+ f->last_seen_generation = j->generation;
+ r = 0;
+ goto finish;
+ }
+
+ /* So we tracked a file under this name, but it has a different inode/device. In that case, it got
+ * replaced (probably due to rotation?), let's drop it hence from our list. */
+ remove_file_real(j, f);
+ f = NULL;
+ }
+
+ if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
+ log_debug("Too many open journal files, not adding %s.", path);
+ r = -ETOOMANYREFS;
+ goto finish;
+ }
+
+ r = journal_file_open(fd, path, O_RDONLY, 0, false, 0, false, NULL, j->mmap, NULL, NULL, &f);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to open journal file %s: %m", path);
+ goto finish;
+ }
+
+ /* journal_file_dump(f); */
+
+ r = ordered_hashmap_put(j->files, f->path, f);
+ if (r < 0) {
+ f->close_fd = false; /* make sure journal_file_close() doesn't close the caller's fd (or our own). We'll let the caller do that, or ourselves */
+ (void) journal_file_close(f);
+ goto finish;
+ }
+
+ close_fd = false; /* the fd is now owned by the JournalFile object */
+
+ f->last_seen_generation = j->generation;
+
+ track_file_disposition(j, f);
+ check_network(j, f->fd);
+
+ j->current_invalidate_counter++;
+
+ log_debug("File %s added.", f->path);
+
+ r = 0;
+
+finish:
+ if (close_fd)
+ safe_close(fd);
+
+ if (r < 0) {
+ k = journal_put_error(j, r, path);
+ if (k < 0)
+ return k;
+ }
+
+ return r;
+}
+
+static int add_file_by_name(
+ sd_journal *j,
+ const char *prefix,
+ const char *filename) {
+
+ const char *path;
+
+ assert(j);
+ assert(prefix);
+ assert(filename);
+
+ if (j->no_new_files)
+ return 0;
+
+ if (!file_type_wanted(j->flags, filename))
+ return 0;
+
+ path = strjoina(prefix, "/", filename);
+ return add_any_file(j, -1, path);
+}
+
+static void remove_file_by_name(
+ sd_journal *j,
+ const char *prefix,
+ const char *filename) {
+
+ const char *path;
+ JournalFile *f;
+
+ assert(j);
+ assert(prefix);
+ assert(filename);
+
+ path = strjoina(prefix, "/", filename);
+ f = ordered_hashmap_get(j->files, path);
+ if (!f)
+ return;
+
+ remove_file_real(j, f);
+}
+
+static void remove_file_real(sd_journal *j, JournalFile *f) {
+ assert(j);
+ assert(f);
+
+ (void) ordered_hashmap_remove(j->files, f->path);
+
+ log_debug("File %s removed.", f->path);
+
+ if (j->current_file == f) {
+ j->current_file = NULL;
+ j->current_field = 0;
+ }
+
+ if (j->unique_file == f) {
+ /* Jump to the next unique_file or NULL if that one was last */
+ j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
+ j->unique_offset = 0;
+ if (!j->unique_file)
+ j->unique_file_lost = true;
+ }
+
+ if (j->fields_file == f) {
+ j->fields_file = ordered_hashmap_next(j->files, j->fields_file->path);
+ j->fields_offset = 0;
+ if (!j->fields_file)
+ j->fields_file_lost = true;
+ }
+
+ (void) journal_file_close(f);
+
+ j->current_invalidate_counter++;
+}
+
+static int dirname_is_machine_id(const char *fn) {
+ sd_id128_t id, machine;
+ int r;
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0)
+ return r;
+
+ r = sd_id128_from_string(fn, &id);
+ if (r < 0)
+ return r;
+
+ return sd_id128_equal(id, machine);
+}
+
+static bool dirent_is_journal_file(const struct dirent *de) {
+ assert(de);
+
+ if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
+ return false;
+
+ return endswith(de->d_name, ".journal") ||
+ endswith(de->d_name, ".journal~");
+}
+
+static bool dirent_is_id128_subdir(const struct dirent *de) {
+ assert(de);
+
+ if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
+ return false;
+
+ return id128_is_valid(de->d_name);
+}
+
+static int directory_open(sd_journal *j, const char *path, DIR **ret) {
+ DIR *d;
+
+ assert(j);
+ assert(path);
+ assert(ret);
+
+ if (j->toplevel_fd < 0)
+ d = opendir(path);
+ else
+ /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
+ * relative, by dropping the initial slash */
+ d = xopendirat(j->toplevel_fd, skip_slash(path), 0);
+ if (!d)
+ return -errno;
+
+ *ret = d;
+ return 0;
+}
+
+static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
+
+static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
+ struct dirent *de;
+
+ assert(j);
+ assert(m);
+ assert(d);
+
+ FOREACH_DIRENT_ALL(de, d, goto fail) {
+
+ if (dirent_is_journal_file(de))
+ (void) add_file_by_name(j, m->path, de->d_name);
+
+ if (m->is_root && dirent_is_id128_subdir(de))
+ (void) add_directory(j, m->path, de->d_name);
+ }
+
+ return;
+
+fail:
+ log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
+}
+
+static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
+ int r;
+
+ assert(j);
+ assert(m);
+ assert(fd >= 0);
+
+ /* Watch this directory if that's enabled and if it not being watched yet. */
+
+ if (m->wd > 0) /* Already have a watch? */
+ return;
+ if (j->inotify_fd < 0) /* Not watching at all? */
+ return;
+
+ m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
+ if (m->wd < 0) {
+ log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
+ return;
+ }
+
+ r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m);
+ if (r == -EEXIST)
+ log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
+ (void) inotify_rm_watch(j->inotify_fd, m->wd);
+ m->wd = -1;
+ }
+}
+
+static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
+ _cleanup_free_ char *path = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ Directory *m;
+ int r, k;
+
+ assert(j);
+ assert(prefix);
+
+ /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
+ * and reenumerates directory contents */
+
+ if (dirname)
+ path = strjoin(prefix, "/", dirname);
+ else
+ path = strdup(prefix);
+ if (!path) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ log_debug("Considering directory '%s'.", path);
+
+ /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
+ if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
+ !((dirname && dirname_is_machine_id(dirname) > 0) || path_has_prefix(j, path, "/run")))
+ return 0;
+
+ r = directory_open(j, path, &d);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to open directory '%s': %m", path);
+ goto fail;
+ }
+
+ m = hashmap_get(j->directories_by_path, path);
+ if (!m) {
+ m = new0(Directory, 1);
+ if (!m) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ m->is_root = false;
+ m->path = path;
+
+ if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
+ free(m);
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ path = NULL; /* avoid freeing in cleanup */
+ j->current_invalidate_counter++;
+
+ log_debug("Directory %s added.", m->path);
+
+ } else if (m->is_root)
+ return 0; /* Don't 'downgrade' from root directory */
+
+ m->last_seen_generation = j->generation;
+
+ directory_watch(j, m, dirfd(d),
+ IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
+ IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
+ IN_ONLYDIR);
+
+ if (!j->no_new_files)
+ directory_enumerate(j, m, d);
+
+ check_network(j, dirfd(d));
+
+ return 0;
+
+fail:
+ k = journal_put_error(j, r, path ?: prefix);
+ if (k < 0)
+ return k;
+
+ return r;
+}
+
+static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ Directory *m;
+ int r, k;
+
+ assert(j);
+
+ /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
+ * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
+ * populate the set, as well as to update it later. */
+
+ if (p) {
+ /* If there's a path specified, use it. */
+
+ log_debug("Considering root directory '%s'.", p);
+
+ if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
+ !path_has_prefix(j, p, "/run"))
+ return -EINVAL;
+
+ if (j->prefix)
+ p = strjoina(j->prefix, p);
+
+ r = directory_open(j, p, &d);
+ if (r == -ENOENT && missing_ok)
+ return 0;
+ if (r < 0) {
+ log_debug_errno(r, "Failed to open root directory %s: %m", p);
+ goto fail;
+ }
+ } else {
+ int dfd;
+
+ /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
+ * opendir() will take possession of the fd, and close it, which we don't want. */
+
+ p = "."; /* store this as "." in the directories hashmap */
+
+ dfd = fcntl(j->toplevel_fd, F_DUPFD_CLOEXEC, 3);
+ if (dfd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ d = fdopendir(dfd);
+ if (!d) {
+ r = -errno;
+ safe_close(dfd);
+ goto fail;
+ }
+
+ rewinddir(d);
+ }
+
+ m = hashmap_get(j->directories_by_path, p);
+ if (!m) {
+ m = new0(Directory, 1);
+ if (!m) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ m->is_root = true;
+
+ m->path = strdup(p);
+ if (!m->path) {
+ free(m);
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
+ free(m->path);
+ free(m);
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ j->current_invalidate_counter++;
+
+ log_debug("Root directory %s added.", m->path);
+
+ } else if (!m->is_root)
+ return 0;
+
+ directory_watch(j, m, dirfd(d),
+ IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
+ IN_ONLYDIR);
+
+ if (!j->no_new_files)
+ directory_enumerate(j, m, d);
+
+ check_network(j, dirfd(d));
+
+ return 0;
+
+fail:
+ k = journal_put_error(j, r, p);
+ if (k < 0)
+ return k;
+
+ return r;
+}
+
+static void remove_directory(sd_journal *j, Directory *d) {
+ assert(j);
+
+ if (d->wd > 0) {
+ hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
+
+ if (j->inotify_fd >= 0)
+ inotify_rm_watch(j->inotify_fd, d->wd);
+ }
+
+ hashmap_remove(j->directories_by_path, d->path);
+
+ if (d->is_root)
+ log_debug("Root directory %s removed.", d->path);
+ else
+ log_debug("Directory %s removed.", d->path);
+
+ free(d->path);
+ free(d);
+}
+
+static int add_search_paths(sd_journal *j) {
+
+ static const char search_paths[] =
+ "/run/log/journal\0"
+ "/var/log/journal\0";
+ const char *p;
+
+ assert(j);
+
+ /* We ignore most errors here, since the idea is to only open
+ * what's actually accessible, and ignore the rest. */
+
+ NULSTR_FOREACH(p, search_paths)
+ (void) add_root_directory(j, p, true);
+
+ if (!(j->flags & SD_JOURNAL_LOCAL_ONLY))
+ (void) add_root_directory(j, "/var/log/journal/remote", true);
+
+ return 0;
+}
+
+static int add_current_paths(sd_journal *j) {
+ Iterator i;
+ JournalFile *f;
+
+ assert(j);
+ assert(j->no_new_files);
+
+ /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
+ * treat them as fatal. */
+
+ ORDERED_HASHMAP_FOREACH(f, j->files, i) {
+ _cleanup_free_ char *dir;
+ int r;
+
+ dir = dirname_malloc(f->path);
+ if (!dir)
+ return -ENOMEM;
+
+ r = add_directory(j, dir, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int allocate_inotify(sd_journal *j) {
+ assert(j);
+
+ if (j->inotify_fd < 0) {
+ j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (j->inotify_fd < 0)
+ return -errno;
+ }
+
+ return hashmap_ensure_allocated(&j->directories_by_wd, NULL);
+}
+
+static sd_journal *journal_new(int flags, const char *path) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+
+ j = new0(sd_journal, 1);
+ if (!j)
+ return NULL;
+
+ j->original_pid = getpid_cached();
+ j->toplevel_fd = -1;
+ j->inotify_fd = -1;
+ j->flags = flags;
+ j->data_threshold = DEFAULT_DATA_THRESHOLD;
+
+ if (path) {
+ char *t;
+
+ t = strdup(path);
+ if (!t)
+ return NULL;
+
+ if (flags & SD_JOURNAL_OS_ROOT)
+ j->prefix = t;
+ else
+ j->path = t;
+ }
+
+ j->files = ordered_hashmap_new(&path_hash_ops);
+ if (!j->files)
+ return NULL;
+
+ j->files_cache = ordered_hashmap_iterated_cache_new(j->files);
+ j->directories_by_path = hashmap_new(&path_hash_ops);
+ j->mmap = mmap_cache_new();
+ if (!j->files_cache || !j->directories_by_path || !j->mmap)
+ return NULL;
+
+ return TAKE_PTR(j);
+}
+
+#define OPEN_ALLOWED_FLAGS \
+ (SD_JOURNAL_LOCAL_ONLY | \
+ SD_JOURNAL_RUNTIME_ONLY | \
+ SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)
+
+_public_ int sd_journal_open(sd_journal **ret, int flags) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return((flags & ~OPEN_ALLOWED_FLAGS) == 0, -EINVAL);
+
+ j = journal_new(flags, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ r = add_search_paths(j);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+#define OPEN_CONTAINER_ALLOWED_FLAGS \
+ (SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_SYSTEM)
+
+_public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
+ _cleanup_free_ char *root = NULL, *class = NULL;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ char *p;
+ int r;
+
+ /* This is pretty much deprecated, people should use machined's OpenMachineRootDirectory() call instead in
+ * combination with sd_journal_open_directory_fd(). */
+
+ assert_return(machine, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return((flags & ~OPEN_CONTAINER_ALLOWED_FLAGS) == 0, -EINVAL);
+ assert_return(machine_name_is_valid(machine), -EINVAL);
+
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p,
+ "ROOT", &root,
+ "CLASS", &class);
+ if (r == -ENOENT)
+ return -EHOSTDOWN;
+ if (r < 0)
+ return r;
+ if (!root)
+ return -ENODATA;
+
+ if (!streq_ptr(class, "container"))
+ return -EIO;
+
+ j = journal_new(flags, root);
+ if (!j)
+ return -ENOMEM;
+
+ r = add_search_paths(j);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+#define OPEN_DIRECTORY_ALLOWED_FLAGS \
+ (SD_JOURNAL_OS_ROOT | \
+ SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
+
+_public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(path, -EINVAL);
+ assert_return((flags & ~OPEN_DIRECTORY_ALLOWED_FLAGS) == 0, -EINVAL);
+
+ j = journal_new(flags, path);
+ if (!j)
+ return -ENOMEM;
+
+ if (flags & SD_JOURNAL_OS_ROOT)
+ r = add_search_paths(j);
+ else
+ r = add_root_directory(j, path, false);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+_public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ const char **path;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(flags == 0, -EINVAL);
+
+ j = journal_new(flags, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ STRV_FOREACH(path, paths) {
+ r = add_any_file(j, -1, *path);
+ if (r < 0)
+ return r;
+ }
+
+ j->no_new_files = true;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+#define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \
+ (SD_JOURNAL_OS_ROOT | \
+ SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
+
+_public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ struct stat st;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+ assert_return((flags & ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS) == 0, -EINVAL);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -EBADFD;
+
+ j = journal_new(flags, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ j->toplevel_fd = fd;
+
+ if (flags & SD_JOURNAL_OS_ROOT)
+ r = add_search_paths(j);
+ else
+ r = add_root_directory(j, NULL, false);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+_public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags) {
+ Iterator iterator;
+ JournalFile *f;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ unsigned i;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(n_fds > 0, -EBADF);
+ assert_return(flags == 0, -EINVAL);
+
+ j = journal_new(flags, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ for (i = 0; i < n_fds; i++) {
+ struct stat st;
+
+ if (fds[i] < 0) {
+ r = -EBADF;
+ goto fail;
+ }
+
+ if (fstat(fds[i], &st) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ goto fail;
+
+ r = add_any_file(j, fds[i], NULL);
+ if (r < 0)
+ goto fail;
+ }
+
+ j->no_new_files = true;
+ j->no_inotify = true;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+
+fail:
+ /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
+ * remain open */
+ ORDERED_HASHMAP_FOREACH(f, j->files, iterator)
+ f->close_fd = false;
+
+ return r;
+}
+
+_public_ void sd_journal_close(sd_journal *j) {
+ Directory *d;
+
+ if (!j)
+ return;
+
+ sd_journal_flush_matches(j);
+
+ ordered_hashmap_free_with_destructor(j->files, journal_file_close);
+ iterated_cache_free(j->files_cache);
+
+ while ((d = hashmap_first(j->directories_by_path)))
+ remove_directory(j, d);
+
+ while ((d = hashmap_first(j->directories_by_wd)))
+ remove_directory(j, d);
+
+ hashmap_free(j->directories_by_path);
+ hashmap_free(j->directories_by_wd);
+
+ safe_close(j->inotify_fd);
+
+ if (j->mmap) {
+ log_debug("mmap cache statistics: %u hit, %u miss", mmap_cache_get_hit(j->mmap), mmap_cache_get_missed(j->mmap));
+ mmap_cache_unref(j->mmap);
+ }
+
+ hashmap_free_free(j->errors);
+
+ free(j->path);
+ free(j->prefix);
+ free(j->unique_field);
+ free(j->fields_buffer);
+ free(j);
+}
+
+_public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
+ Object *o;
+ JournalFile *f;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ *ret = le64toh(o->entry.realtime);
+ return 0;
+}
+
+_public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
+ Object *o;
+ JournalFile *f;
+ int r;
+ sd_id128_t id;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ if (ret_boot_id)
+ *ret_boot_id = o->entry.boot_id;
+ else {
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return r;
+
+ if (!sd_id128_equal(id, o->entry.boot_id))
+ return -ESTALE;
+ }
+
+ if (ret)
+ *ret = le64toh(o->entry.monotonic);
+
+ return 0;
+}
+
+static bool field_is_valid(const char *field) {
+ const char *p;
+
+ assert(field);
+
+ if (isempty(field))
+ return false;
+
+ if (startswith(field, "__"))
+ return false;
+
+ for (p = field; *p; p++) {
+
+ if (*p == '_')
+ continue;
+
+ if (*p >= 'A' && *p <= 'Z')
+ continue;
+
+ if (*p >= '0' && *p <= '9')
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+_public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
+ JournalFile *f;
+ uint64_t i, n;
+ size_t field_length;
+ int r;
+ Object *o;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(field, -EINVAL);
+ assert_return(data, -EINVAL);
+ assert_return(size, -EINVAL);
+ assert_return(field_is_valid(field), -EINVAL);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ field_length = strlen(field);
+
+ n = journal_file_entry_n_items(o);
+ for (i = 0; i < n; i++) {
+ uint64_t p, l;
+ le64_t le_hash;
+ size_t t;
+ int compression;
+
+ p = le64toh(o->entry.items[i].object_offset);
+ le_hash = o->entry.items[i].hash;
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le_hash != o->data.hash)
+ return -EBADMSG;
+
+ l = le64toh(o->object.size) - offsetof(Object, data.payload);
+
+ compression = o->object.flags & OBJECT_COMPRESSION_MASK;
+ if (compression) {
+#if HAVE_XZ || HAVE_LZ4
+ r = decompress_startswith(compression,
+ o->data.payload, l,
+ &f->compress_buffer, &f->compress_buffer_size,
+ field, field_length, '=');
+ if (r < 0)
+ log_debug_errno(r, "Cannot decompress %s object of length %"PRIu64" at offset "OFSfmt": %m",
+ object_compressed_to_string(compression), l, p);
+ else if (r > 0) {
+
+ size_t rsize;
+
+ r = decompress_blob(compression,
+ o->data.payload, l,
+ &f->compress_buffer, &f->compress_buffer_size, &rsize,
+ j->data_threshold);
+ if (r < 0)
+ return r;
+
+ *data = f->compress_buffer;
+ *size = (size_t) rsize;
+
+ return 0;
+ }
+#else
+ return -EPROTONOSUPPORT;
+#endif
+ } else if (l >= field_length+1 &&
+ memcmp(o->data.payload, field, field_length) == 0 &&
+ o->data.payload[field_length] == '=') {
+
+ t = (size_t) l;
+
+ if ((uint64_t) t != l)
+ return -E2BIG;
+
+ *data = o->data.payload;
+ *size = t;
+
+ return 0;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+ }
+
+ return -ENOENT;
+}
+
+static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
+ size_t t;
+ uint64_t l;
+ int compression;
+
+ l = le64toh(o->object.size) - offsetof(Object, data.payload);
+ t = (size_t) l;
+
+ /* We can't read objects larger than 4G on a 32bit machine */
+ if ((uint64_t) t != l)
+ return -E2BIG;
+
+ compression = o->object.flags & OBJECT_COMPRESSION_MASK;
+ if (compression) {
+#if HAVE_XZ || HAVE_LZ4
+ size_t rsize;
+ int r;
+
+ r = decompress_blob(compression,
+ o->data.payload, l, &f->compress_buffer,
+ &f->compress_buffer_size, &rsize, j->data_threshold);
+ if (r < 0)
+ return r;
+
+ *data = f->compress_buffer;
+ *size = (size_t) rsize;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+ } else {
+ *data = o->data.payload;
+ *size = t;
+ }
+
+ return 0;
+}
+
+_public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
+ JournalFile *f;
+ uint64_t p, n;
+ le64_t le_hash;
+ int r;
+ Object *o;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(data, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ n = journal_file_entry_n_items(o);
+ if (j->current_field >= n)
+ return 0;
+
+ p = le64toh(o->entry.items[j->current_field].object_offset);
+ le_hash = o->entry.items[j->current_field].hash;
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le_hash != o->data.hash)
+ return -EBADMSG;
+
+ r = return_data(j, f, o, data, size);
+ if (r < 0)
+ return r;
+
+ j->current_field++;
+
+ return 1;
+}
+
+_public_ void sd_journal_restart_data(sd_journal *j) {
+ if (!j)
+ return;
+
+ j->current_field = 0;
+}
+
+static int reiterate_all_paths(sd_journal *j) {
+ assert(j);
+
+ if (j->no_new_files)
+ return add_current_paths(j);
+
+ if (j->flags & SD_JOURNAL_OS_ROOT)
+ return add_search_paths(j);
+
+ if (j->toplevel_fd >= 0)
+ return add_root_directory(j, NULL, false);
+
+ if (j->path)
+ return add_root_directory(j, j->path, true);
+
+ return add_search_paths(j);
+}
+
+_public_ int sd_journal_get_fd(sd_journal *j) {
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ if (j->no_inotify)
+ return -EMEDIUMTYPE;
+
+ if (j->inotify_fd >= 0)
+ return j->inotify_fd;
+
+ r = allocate_inotify(j);
+ if (r < 0)
+ return r;
+
+ log_debug("Reiterating files to get inotify watches established.");
+
+ /* Iterate through all dirs again, to add them to the inotify */
+ r = reiterate_all_paths(j);
+ if (r < 0)
+ return r;
+
+ return j->inotify_fd;
+}
+
+_public_ int sd_journal_get_events(sd_journal *j) {
+ int fd;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ fd = sd_journal_get_fd(j);
+ if (fd < 0)
+ return fd;
+
+ return POLLIN;
+}
+
+_public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
+ int fd;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(timeout_usec, -EINVAL);
+
+ fd = sd_journal_get_fd(j);
+ if (fd < 0)
+ return fd;
+
+ if (!j->on_network) {
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+ }
+
+ /* If we are on the network we need to regularly check for
+ * changes manually */
+
+ *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
+ return 1;
+}
+
+static void process_q_overflow(sd_journal *j) {
+ JournalFile *f;
+ Directory *m;
+ Iterator i;
+
+ assert(j);
+
+ /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
+ * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
+ * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
+ * are subject for unloading. */
+
+ log_debug("Inotify queue overrun, reiterating everything.");
+
+ j->generation++;
+ (void) reiterate_all_paths(j);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files, i) {
+
+ if (f->last_seen_generation == j->generation)
+ continue;
+
+ log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
+ remove_file_real(j, f);
+ }
+
+ HASHMAP_FOREACH(m, j->directories_by_path, i) {
+
+ if (m->last_seen_generation == j->generation)
+ continue;
+
+ if (m->is_root) /* Never GC root directories */
+ continue;
+
+ log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
+ remove_directory(j, m);
+ }
+
+ log_debug("Reiteration complete.");
+}
+
+static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
+ Directory *d;
+
+ assert(j);
+ assert(e);
+
+ if (e->mask & IN_Q_OVERFLOW) {
+ process_q_overflow(j);
+ return;
+ }
+
+ /* Is this a subdirectory we watch? */
+ d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
+ if (d) {
+ if (!(e->mask & IN_ISDIR) && e->len > 0 &&
+ (endswith(e->name, ".journal") ||
+ endswith(e->name, ".journal~"))) {
+
+ /* Event for a journal file */
+
+ if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
+ (void) add_file_by_name(j, d->path, e->name);
+ else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT))
+ remove_file_by_name(j, d->path, e->name);
+
+ } else if (!d->is_root && e->len == 0) {
+
+ /* Event for a subdirectory */
+
+ if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT))
+ remove_directory(j, d);
+
+ } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && id128_is_valid(e->name)) {
+
+ /* Event for root directory */
+
+ if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
+ (void) add_directory(j, d->path, e->name);
+ }
+
+ return;
+ }
+
+ if (e->mask & IN_IGNORED)
+ return;
+
+ log_debug("Unexpected inotify event.");
+}
+
+static int determine_change(sd_journal *j) {
+ bool b;
+
+ assert(j);
+
+ b = j->current_invalidate_counter != j->last_invalidate_counter;
+ j->last_invalidate_counter = j->current_invalidate_counter;
+
+ return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
+}
+
+_public_ int sd_journal_process(sd_journal *j) {
+ bool got_something = false;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ if (j->inotify_fd < 0) /* We have no inotify fd yet? Then there's noting to process. */
+ return 0;
+
+ j->last_process_usec = now(CLOCK_MONOTONIC);
+ j->last_invalidate_counter = j->current_invalidate_counter;
+
+ for (;;) {
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+
+ l = read(j->inotify_fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return got_something ? determine_change(j) : SD_JOURNAL_NOP;
+
+ return -errno;
+ }
+
+ got_something = true;
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l)
+ process_inotify_event(j, e);
+ }
+}
+
+_public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
+ int r;
+ uint64_t t;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ if (j->inotify_fd < 0) {
+
+ /* This is the first invocation, hence create the
+ * inotify watch */
+ r = sd_journal_get_fd(j);
+ if (r < 0)
+ return r;
+
+ /* The journal might have changed since the context
+ * object was created and we weren't watching before,
+ * hence don't wait for anything, and return
+ * immediately. */
+ return determine_change(j);
+ }
+
+ r = sd_journal_get_timeout(j, &t);
+ if (r < 0)
+ return r;
+
+ if (t != (uint64_t) -1) {
+ usec_t n;
+
+ n = now(CLOCK_MONOTONIC);
+ t = t > n ? t - n : 0;
+
+ if (timeout_usec == (uint64_t) -1 || timeout_usec > t)
+ timeout_usec = t;
+ }
+
+ do {
+ r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
+ } while (r == -EINTR);
+
+ if (r < 0)
+ return r;
+
+ return sd_journal_process(j);
+}
+
+_public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
+ Iterator i;
+ JournalFile *f;
+ bool first = true;
+ uint64_t fmin = 0, tmax = 0;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(from || to, -EINVAL);
+ assert_return(from != to, -EINVAL);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files, i) {
+ usec_t fr, t;
+
+ r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (first) {
+ fmin = fr;
+ tmax = t;
+ first = false;
+ } else {
+ fmin = MIN(fr, fmin);
+ tmax = MAX(t, tmax);
+ }
+ }
+
+ if (from)
+ *from = fmin;
+ if (to)
+ *to = tmax;
+
+ return first ? 0 : 1;
+}
+
+_public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
+ Iterator i;
+ JournalFile *f;
+ bool found = false;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(from || to, -EINVAL);
+ assert_return(from != to, -EINVAL);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files, i) {
+ usec_t fr, t;
+
+ r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (found) {
+ if (from)
+ *from = MIN(fr, *from);
+ if (to)
+ *to = MAX(t, *to);
+ } else {
+ if (from)
+ *from = fr;
+ if (to)
+ *to = t;
+ found = true;
+ }
+ }
+
+ return found;
+}
+
+void journal_print_header(sd_journal *j) {
+ Iterator i;
+ JournalFile *f;
+ bool newline = false;
+
+ assert(j);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files, i) {
+ if (newline)
+ putchar('\n');
+ else
+ newline = true;
+
+ journal_file_print_header(f);
+ }
+}
+
+_public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
+ Iterator i;
+ JournalFile *f;
+ uint64_t sum = 0;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(bytes, -EINVAL);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files, i) {
+ struct stat st;
+
+ if (fstat(f->fd, &st) < 0)
+ return -errno;
+
+ sum += (uint64_t) st.st_blocks * 512ULL;
+ }
+
+ *bytes = sum;
+ return 0;
+}
+
+_public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
+ char *f;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(!isempty(field), -EINVAL);
+ assert_return(field_is_valid(field), -EINVAL);
+
+ f = strdup(field);
+ if (!f)
+ return -ENOMEM;
+
+ free(j->unique_field);
+ j->unique_field = f;
+ j->unique_file = NULL;
+ j->unique_offset = 0;
+ j->unique_file_lost = false;
+
+ return 0;
+}
+
+_public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
+ size_t k;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(data, -EINVAL);
+ assert_return(l, -EINVAL);
+ assert_return(j->unique_field, -EINVAL);
+
+ k = strlen(j->unique_field);
+
+ if (!j->unique_file) {
+ if (j->unique_file_lost)
+ return 0;
+
+ j->unique_file = ordered_hashmap_first(j->files);
+ if (!j->unique_file)
+ return 0;
+
+ j->unique_offset = 0;
+ }
+
+ for (;;) {
+ JournalFile *of;
+ Iterator i;
+ Object *o;
+ const void *odata;
+ size_t ol;
+ bool found;
+ int r;
+
+ /* Proceed to next data object in the field's linked list */
+ if (j->unique_offset == 0) {
+ r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
+ if (r < 0)
+ return r;
+
+ j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
+ } else {
+ r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
+ if (r < 0)
+ return r;
+
+ j->unique_offset = le64toh(o->data.next_field_offset);
+ }
+
+ /* We reached the end of the list? Then start again, with the next file */
+ if (j->unique_offset == 0) {
+ j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
+ if (!j->unique_file)
+ return 0;
+
+ continue;
+ }
+
+ /* We do not use OBJECT_DATA context here, but OBJECT_UNUSED
+ * instead, so that we can look at this data object at the same
+ * time as one on another file */
+ r = journal_file_move_to_object(j->unique_file, OBJECT_UNUSED, j->unique_offset, &o);
+ if (r < 0)
+ return r;
+
+ /* Let's do the type check by hand, since we used 0 context above. */
+ if (o->object.type != OBJECT_DATA)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s:offset " OFSfmt ": object has type %d, expected %d",
+ j->unique_file->path,
+ j->unique_offset,
+ o->object.type, OBJECT_DATA);
+
+ r = return_data(j, j->unique_file, o, &odata, &ol);
+ if (r < 0)
+ return r;
+
+ /* Check if we have at least the field name and "=". */
+ if (ol <= k)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
+ j->unique_file->path,
+ j->unique_offset, ol, k + 1);
+
+ if (memcmp(odata, j->unique_field, k) || ((const char*) odata)[k] != '=')
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s:offset " OFSfmt ": object does not start with \"%s=\"",
+ j->unique_file->path,
+ j->unique_offset,
+ j->unique_field);
+
+ /* OK, now let's see if we already returned this data
+ * object by checking if it exists in the earlier
+ * traversed files. */
+ found = false;
+ ORDERED_HASHMAP_FOREACH(of, j->files, i) {
+ if (of == j->unique_file)
+ break;
+
+ /* Skip this file it didn't have any fields indexed */
+ if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
+ continue;
+
+ r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), NULL, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found)
+ continue;
+
+ r = return_data(j, j->unique_file, o, data, l);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+}
+
+_public_ void sd_journal_restart_unique(sd_journal *j) {
+ if (!j)
+ return;
+
+ j->unique_file = NULL;
+ j->unique_offset = 0;
+ j->unique_file_lost = false;
+}
+
+_public_ int sd_journal_enumerate_fields(sd_journal *j, const char **field) {
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(field, -EINVAL);
+
+ if (!j->fields_file) {
+ if (j->fields_file_lost)
+ return 0;
+
+ j->fields_file = ordered_hashmap_first(j->files);
+ if (!j->fields_file)
+ return 0;
+
+ j->fields_hash_table_index = 0;
+ j->fields_offset = 0;
+ }
+
+ for (;;) {
+ JournalFile *f, *of;
+ Iterator i;
+ uint64_t m;
+ Object *o;
+ size_t sz;
+ bool found;
+
+ f = j->fields_file;
+
+ if (j->fields_offset == 0) {
+ bool eof = false;
+
+ /* We are not yet positioned at any field. Let's pick the first one */
+ r = journal_file_map_field_hash_table(f);
+ if (r < 0)
+ return r;
+
+ m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
+ for (;;) {
+ if (j->fields_hash_table_index >= m) {
+ /* Reached the end of the hash table, go to the next file. */
+ eof = true;
+ break;
+ }
+
+ j->fields_offset = le64toh(f->field_hash_table[j->fields_hash_table_index].head_hash_offset);
+
+ if (j->fields_offset != 0)
+ break;
+
+ /* Empty hash table bucket, go to next one */
+ j->fields_hash_table_index++;
+ }
+
+ if (eof) {
+ /* Proceed with next file */
+ j->fields_file = ordered_hashmap_next(j->files, f->path);
+ if (!j->fields_file) {
+ *field = NULL;
+ return 0;
+ }
+
+ j->fields_offset = 0;
+ j->fields_hash_table_index = 0;
+ continue;
+ }
+
+ } else {
+ /* We are already positioned at a field. If so, let's figure out the next field from it */
+
+ r = journal_file_move_to_object(f, OBJECT_FIELD, j->fields_offset, &o);
+ if (r < 0)
+ return r;
+
+ j->fields_offset = le64toh(o->field.next_hash_offset);
+ if (j->fields_offset == 0) {
+ /* Reached the end of the hash table chain */
+ j->fields_hash_table_index++;
+ continue;
+ }
+ }
+
+ /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, j->fields_offset, &o);
+ if (r < 0)
+ return r;
+
+ /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
+ if (o->object.type != OBJECT_FIELD)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s:offset " OFSfmt ": object has type %i, expected %i",
+ f->path, j->fields_offset,
+ o->object.type, OBJECT_FIELD);
+
+ sz = le64toh(o->object.size) - offsetof(Object, field.payload);
+
+ /* Let's see if we already returned this field name before. */
+ found = false;
+ ORDERED_HASHMAP_FOREACH(of, j->files, i) {
+ if (of == f)
+ break;
+
+ /* Skip this file it didn't have any fields indexed */
+ if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
+ continue;
+
+ r = journal_file_find_field_object_with_hash(of, o->field.payload, sz, le64toh(o->field.hash), NULL, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found)
+ continue;
+
+ /* Check if this is really a valid string containing no NUL byte */
+ if (memchr(o->field.payload, 0, sz))
+ return -EBADMSG;
+
+ if (sz > j->data_threshold)
+ sz = j->data_threshold;
+
+ if (!GREEDY_REALLOC(j->fields_buffer, j->fields_buffer_allocated, sz + 1))
+ return -ENOMEM;
+
+ memcpy(j->fields_buffer, o->field.payload, sz);
+ j->fields_buffer[sz] = 0;
+
+ if (!field_is_valid(j->fields_buffer))
+ return -EBADMSG;
+
+ *field = j->fields_buffer;
+ return 1;
+ }
+}
+
+_public_ void sd_journal_restart_fields(sd_journal *j) {
+ if (!j)
+ return;
+
+ j->fields_file = NULL;
+ j->fields_hash_table_index = 0;
+ j->fields_offset = 0;
+ j->fields_file_lost = false;
+}
+
+_public_ int sd_journal_reliable_fd(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ return !j->on_network;
+}
+
+static char *lookup_field(const char *field, void *userdata) {
+ sd_journal *j = userdata;
+ const void *data;
+ size_t size, d;
+ int r;
+
+ assert(field);
+ assert(j);
+
+ r = sd_journal_get_data(j, field, &data, &size);
+ if (r < 0 ||
+ size > REPLACE_VAR_MAX)
+ return strdup(field);
+
+ d = strlen(field) + 1;
+
+ return strndup((const char*) data + d, size - d);
+}
+
+_public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
+ const void *data;
+ size_t size;
+ sd_id128_t id;
+ _cleanup_free_ char *text = NULL, *cid = NULL;
+ char *t;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
+ if (r < 0)
+ return r;
+
+ cid = strndup((const char*) data + 11, size - 11);
+ if (!cid)
+ return -ENOMEM;
+
+ r = sd_id128_from_string(cid, &id);
+ if (r < 0)
+ return r;
+
+ r = catalog_get(CATALOG_DATABASE, id, &text);
+ if (r < 0)
+ return r;
+
+ t = replace_var(text, lookup_field, j);
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+_public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
+ assert_return(ret, -EINVAL);
+
+ return catalog_get(CATALOG_DATABASE, id, ret);
+}
+
+_public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ j->data_threshold = sz;
+ return 0;
+}
+
+_public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(sz, -EINVAL);
+
+ *sz = j->data_threshold;
+ return 0;
+}
+
+_public_ int sd_journal_has_runtime_files(sd_journal *j) {
+ assert_return(j, -EINVAL);
+
+ return j->has_runtime_files;
+}
+
+_public_ int sd_journal_has_persistent_files(sd_journal *j) {
+ assert_return(j, -EINVAL);
+
+ return j->has_persistent_files;
+}
diff --git a/src/journal/test-audit-type.c b/src/journal/test-audit-type.c
new file mode 100644
index 0000000..20e654b
--- /dev/null
+++ b/src/journal/test-audit-type.c
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <linux/audit.h>
+
+#include "audit-type.h"
+
+static void print_audit_label(int i) {
+ const char *name;
+
+ name = audit_type_name_alloca(i);
+ /* This is a separate function only because of alloca */
+ printf("%i → %s → %s\n", i, audit_type_to_string(i), name);
+}
+
+static void test_audit_type(void) {
+ int i;
+
+ for (i = 0; i <= AUDIT_KERNEL; i++)
+ print_audit_label(i);
+}
+
+int main(int argc, char **argv) {
+ test_audit_type();
+}
diff --git a/src/journal/test-catalog.c b/src/journal/test-catalog.c
new file mode 100644
index 0000000..192bb0c
--- /dev/null
+++ b/src/journal/test-catalog.c
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "catalog.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static char** catalog_dirs = NULL;
+static const char *no_catalog_dirs[] = {
+ "/bin/hopefully/with/no/catalog",
+ NULL
+};
+
+static Hashmap* test_import(const char* contents, ssize_t size, int code) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-catalog.XXXXXX";
+ _cleanup_close_ int fd;
+ Hashmap *h;
+
+ if (size < 0)
+ size = strlen(contents);
+
+ assert_se(h = hashmap_new(&catalog_hash_ops));
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, contents, size) == size);
+
+ assert_se(catalog_import_file(h, name) == code);
+
+ return h;
+}
+
+static void test_catalog_import_invalid(void) {
+ _cleanup_hashmap_free_free_free_ Hashmap *h = NULL;
+
+ h = test_import("xxx", -1, -EINVAL);
+ assert_se(hashmap_isempty(h));
+}
+
+static void test_catalog_import_badid(void) {
+ _cleanup_hashmap_free_free_free_ Hashmap *h = NULL;
+ const char *input =
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededede\n" \
+"Subject: message\n" \
+"\n" \
+"payload\n";
+ h = test_import(input, -1, -EINVAL);
+}
+
+static void test_catalog_import_one(void) {
+ _cleanup_hashmap_free_free_free_ Hashmap *h = NULL;
+ char *payload;
+ Iterator j;
+
+ const char *input =
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: message\n" \
+"\n" \
+"payload\n";
+ const char *expect =
+"Subject: message\n" \
+"\n" \
+"payload\n";
+
+ h = test_import(input, -1, 0);
+ assert_se(hashmap_size(h) == 1);
+
+ HASHMAP_FOREACH(payload, h, j) {
+ printf("expect: %s\n", expect);
+ printf("actual: %s\n", payload);
+ assert_se(streq(expect, payload));
+ }
+}
+
+static void test_catalog_import_merge(void) {
+ _cleanup_hashmap_free_free_free_ Hashmap *h = NULL;
+ char *payload;
+ Iterator j;
+
+ const char *input =
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: message\n" \
+"Defined-By: me\n" \
+"\n" \
+"payload\n" \
+"\n" \
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: override subject\n" \
+"X-Header: hello\n" \
+"\n" \
+"override payload\n";
+
+ const char *combined =
+"Subject: override subject\n" \
+"X-Header: hello\n" \
+"Subject: message\n" \
+"Defined-By: me\n" \
+"\n" \
+"override payload\n";
+
+ h = test_import(input, -1, 0);
+ assert_se(hashmap_size(h) == 1);
+
+ HASHMAP_FOREACH(payload, h, j) {
+ assert_se(streq(combined, payload));
+ }
+}
+
+static void test_catalog_import_merge_no_body(void) {
+ _cleanup_hashmap_free_free_free_ Hashmap *h = NULL;
+ char *payload;
+ Iterator j;
+
+ const char *input =
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: message\n" \
+"Defined-By: me\n" \
+"\n" \
+"payload\n" \
+"\n" \
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: override subject\n" \
+"X-Header: hello\n" \
+"\n";
+
+ const char *combined =
+"Subject: override subject\n" \
+"X-Header: hello\n" \
+"Subject: message\n" \
+"Defined-By: me\n" \
+"\n" \
+"payload\n";
+
+ h = test_import(input, -1, 0);
+ assert_se(hashmap_size(h) == 1);
+
+ HASHMAP_FOREACH(payload, h, j) {
+ assert_se(streq(combined, payload));
+ }
+}
+
+static void test_catalog_update(const char *database) {
+ int r;
+
+ /* Test what happens if there are no files. */
+ r = catalog_update(database, NULL, NULL);
+ assert_se(r == 0);
+
+ /* Test what happens if there are no files in the directory. */
+ r = catalog_update(database, NULL, no_catalog_dirs);
+ assert_se(r == 0);
+
+ /* Make sure that we at least have some files loaded or the
+ * catalog_list below will fail. */
+ r = catalog_update(database, NULL, (const char * const *) catalog_dirs);
+ assert_se(r == 0);
+}
+
+static void test_catalog_file_lang(void) {
+ _cleanup_free_ char *lang = NULL, *lang2 = NULL, *lang3 = NULL, *lang4 = NULL;
+
+ assert_se(catalog_file_lang("systemd.de_DE.catalog", &lang) == 1);
+ assert_se(streq(lang, "de_DE"));
+
+ assert_se(catalog_file_lang("systemd..catalog", &lang2) == 0);
+ assert_se(lang2 == NULL);
+
+ assert_se(catalog_file_lang("systemd.fr.catalog", &lang2) == 1);
+ assert_se(streq(lang2, "fr"));
+
+ assert_se(catalog_file_lang("systemd.fr.catalog.gz", &lang3) == 0);
+ assert_se(lang3 == NULL);
+
+ assert_se(catalog_file_lang("systemd.01234567890123456789012345678901.catalog", &lang3) == 0);
+ assert_se(lang3 == NULL);
+
+ assert_se(catalog_file_lang("systemd.0123456789012345678901234567890.catalog", &lang3) == 1);
+ assert_se(streq(lang3, "0123456789012345678901234567890"));
+
+ assert_se(catalog_file_lang("/x/y/systemd.catalog", &lang4) == 0);
+ assert_se(lang4 == NULL);
+
+ assert_se(catalog_file_lang("/x/y/systemd.ru_RU.catalog", &lang4) == 1);
+ assert_se(streq(lang4, "ru_RU"));
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(unlink_tempfilep) char database[] = "/tmp/test-catalog.XXXXXX";
+ _cleanup_free_ char *text = NULL;
+ int r;
+
+ setlocale(LC_ALL, "de_DE.UTF-8");
+
+ test_setup_logging(LOG_DEBUG);
+
+ /* If test-catalog is located at the build directory, then use catalogs in that.
+ * If it is not, e.g. installed by systemd-tests package, then use installed catalogs. */
+ catalog_dirs = STRV_MAKE(get_catalog_dir());
+
+ assert_se(access(catalog_dirs[0], F_OK) >= 0);
+ log_notice("Using catalog directory '%s'", catalog_dirs[0]);
+
+ test_catalog_file_lang();
+
+ test_catalog_import_invalid();
+ test_catalog_import_badid();
+ test_catalog_import_one();
+ test_catalog_import_merge();
+ test_catalog_import_merge_no_body();
+
+ assert_se(mkostemp_safe(database) >= 0);
+
+ test_catalog_update(database);
+
+ r = catalog_list(stdout, database, true);
+ assert_se(r >= 0);
+
+ r = catalog_list(stdout, database, false);
+ assert_se(r >= 0);
+
+ assert_se(catalog_get(database, SD_MESSAGE_COREDUMP, &text) >= 0);
+ printf(">>>%s<<<\n", text);
+
+ return 0;
+}
diff --git a/src/journal/test-compress-benchmark.c b/src/journal/test-compress-benchmark.c
new file mode 100644
index 0000000..7f13b61
--- /dev/null
+++ b/src/journal/test-compress-benchmark.c
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "compress.h"
+#include "env-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+
+typedef int (compress_t)(const void *src, uint64_t src_size, void *dst,
+ size_t dst_alloc_size, size_t *dst_size);
+typedef int (decompress_t)(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
+
+#if HAVE_XZ || HAVE_LZ4
+
+static usec_t arg_duration;
+static size_t arg_start;
+
+#define MAX_SIZE (1024*1024LU)
+#define PRIME 1048571 /* A prime close enough to one megabyte that mod 4 == 3 */
+
+static size_t _permute(size_t x) {
+ size_t residue;
+
+ if (x >= PRIME)
+ return x;
+
+ residue = x*x % PRIME;
+ if (x <= PRIME / 2)
+ return residue;
+ else
+ return PRIME - residue;
+}
+
+static size_t permute(size_t x) {
+ return _permute((_permute(x) + arg_start) % MAX_SIZE ^ 0xFF345);
+}
+
+static char* make_buf(size_t count, const char *type) {
+ char *buf;
+ size_t i;
+
+ buf = malloc(count);
+ assert_se(buf);
+
+ if (streq(type, "zeros"))
+ memzero(buf, count);
+ else if (streq(type, "simple"))
+ for (i = 0; i < count; i++)
+ buf[i] = 'a' + i % ('z' - 'a' + 1);
+ else if (streq(type, "random")) {
+ size_t step = count / 10;
+
+ random_bytes(buf, step);
+ memzero(buf + 1*step, step);
+ random_bytes(buf + 2*step, step);
+ memzero(buf + 3*step, step);
+ random_bytes(buf + 4*step, step);
+ memzero(buf + 5*step, step);
+ random_bytes(buf + 6*step, step);
+ memzero(buf + 7*step, step);
+ random_bytes(buf + 8*step, step);
+ memzero(buf + 9*step, step);
+ } else
+ assert_not_reached("here");
+
+ return buf;
+}
+
+static void test_compress_decompress(const char* label, const char* type,
+ compress_t compress, decompress_t decompress) {
+ usec_t n, n2 = 0;
+ float dt;
+
+ _cleanup_free_ char *text, *buf;
+ _cleanup_free_ void *buf2 = NULL;
+ size_t buf2_allocated = 0;
+ size_t skipped = 0, compressed = 0, total = 0;
+
+ text = make_buf(MAX_SIZE, type);
+ buf = calloc(MAX_SIZE + 1, 1);
+ assert_se(text && buf);
+
+ n = now(CLOCK_MONOTONIC);
+
+ for (size_t i = 0; i <= MAX_SIZE; i++) {
+ size_t j = 0, k = 0, size;
+ int r;
+
+ size = permute(i);
+ if (size == 0)
+ continue;
+
+ log_debug("%s %zu %zu", type, i, size);
+
+ memzero(buf, MIN(size + 1000, MAX_SIZE));
+
+ r = compress(text, size, buf, size, &j);
+ /* assume compression must be successful except for small or random inputs */
+ assert_se(r == 0 || (size < 2048 && r == -ENOBUFS) || streq(type, "random"));
+
+ /* check for overwrites */
+ assert_se(buf[size] == 0);
+ if (r != 0) {
+ skipped += size;
+ continue;
+ }
+
+ assert_se(j > 0);
+ if (j >= size)
+ log_error("%s \"compressed\" %zu -> %zu", label, size, j);
+
+ r = decompress(buf, j, &buf2, &buf2_allocated, &k, 0);
+ assert_se(r == 0);
+ assert_se(buf2_allocated >= k);
+ assert_se(k == size);
+
+ assert_se(memcmp(text, buf2, size) == 0);
+
+ total += size;
+ compressed += j;
+
+ n2 = now(CLOCK_MONOTONIC);
+ if (n2 - n > arg_duration)
+ break;
+ }
+
+ dt = (n2-n) / 1e6;
+
+ log_info("%s/%s: compressed & decompressed %zu bytes in %.2fs (%.2fMiB/s), "
+ "mean compression %.2f%%, skipped %zu bytes",
+ label, type, total, dt,
+ total / 1024. / 1024 / dt,
+ 100 - compressed * 100. / total,
+ skipped);
+}
+#endif
+
+int main(int argc, char *argv[]) {
+#if HAVE_XZ || HAVE_LZ4
+ test_setup_logging(LOG_INFO);
+
+ if (argc >= 2) {
+ unsigned x;
+
+ assert_se(safe_atou(argv[1], &x) >= 0);
+ arg_duration = x * USEC_PER_SEC;
+ } else
+ arg_duration = slow_tests_enabled() ?
+ 2 * USEC_PER_SEC : USEC_PER_SEC / 50;
+
+ if (argc == 3)
+ (void) safe_atozu(argv[2], &arg_start);
+ else
+ arg_start = getpid_cached();
+
+ const char *i;
+ NULSTR_FOREACH(i, "zeros\0simple\0random\0") {
+#if HAVE_XZ
+ test_compress_decompress("XZ", i, compress_blob_xz, decompress_blob_xz);
+#endif
+#if HAVE_LZ4
+ test_compress_decompress("LZ4", i, compress_blob_lz4, decompress_blob_lz4);
+#endif
+ }
+ return 0;
+#else
+ return log_tests_skipped("No compression feature is enabled");
+#endif
+}
diff --git a/src/journal/test-compress.c b/src/journal/test-compress.c
new file mode 100644
index 0000000..1b050b7
--- /dev/null
+++ b/src/journal/test-compress.c
@@ -0,0 +1,341 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_LZ4
+#include <lz4.h>
+#endif
+
+#include "alloc-util.h"
+#include "compress.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "random-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#if HAVE_XZ
+# define XZ_OK 0
+#else
+# define XZ_OK -EPROTONOSUPPORT
+#endif
+
+#if HAVE_LZ4
+# define LZ4_OK 0
+#else
+# define LZ4_OK -EPROTONOSUPPORT
+#endif
+
+typedef int (compress_blob_t)(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size);
+typedef int (decompress_blob_t)(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size,
+ size_t* dst_size, size_t dst_max);
+typedef int (decompress_sw_t)(const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra);
+
+typedef int (compress_stream_t)(int fdf, int fdt, uint64_t max_bytes);
+typedef int (decompress_stream_t)(int fdf, int fdt, uint64_t max_size);
+
+#if HAVE_XZ || HAVE_LZ4
+static void test_compress_decompress(int compression,
+ compress_blob_t compress,
+ decompress_blob_t decompress,
+ const char *data,
+ size_t data_len,
+ bool may_fail) {
+ char compressed[512];
+ size_t csize, usize = 0;
+ _cleanup_free_ char *decompressed = NULL;
+ int r;
+
+ log_info("/* testing %s %s blob compression/decompression */",
+ object_compressed_to_string(compression), data);
+
+ r = compress(data, data_len, compressed, sizeof(compressed), &csize);
+ if (r == -ENOBUFS) {
+ log_info_errno(r, "compression failed: %m");
+ assert_se(may_fail);
+ } else {
+ assert_se(r == 0);
+ r = decompress(compressed, csize,
+ (void **) &decompressed, &usize, &csize, 0);
+ assert_se(r == 0);
+ assert_se(decompressed);
+ assert_se(memcmp(decompressed, data, data_len) == 0);
+ }
+
+ r = decompress("garbage", 7,
+ (void **) &decompressed, &usize, &csize, 0);
+ assert_se(r < 0);
+
+ /* make sure to have the minimal lz4 compressed size */
+ r = decompress("00000000\1g", 9,
+ (void **) &decompressed, &usize, &csize, 0);
+ assert_se(r < 0);
+
+ r = decompress("\100000000g", 9,
+ (void **) &decompressed, &usize, &csize, 0);
+ assert_se(r < 0);
+
+ memzero(decompressed, usize);
+}
+
+static void test_decompress_startswith(int compression,
+ compress_blob_t compress,
+ decompress_sw_t decompress_sw,
+ const char *data,
+ size_t data_len,
+ bool may_fail) {
+
+ char *compressed;
+ _cleanup_free_ char *compressed1 = NULL, *compressed2 = NULL, *decompressed = NULL;
+ size_t csize, usize = 0, len;
+ int r;
+
+ log_info("/* testing decompress_startswith with %s on %.20s text */",
+ object_compressed_to_string(compression), data);
+
+#define BUFSIZE_1 512
+#define BUFSIZE_2 20000
+
+ compressed = compressed1 = malloc(BUFSIZE_1);
+ assert_se(compressed1);
+ r = compress(data, data_len, compressed, BUFSIZE_1, &csize);
+ if (r == -ENOBUFS) {
+ log_info_errno(r, "compression failed: %m");
+ assert_se(may_fail);
+
+ compressed = compressed2 = malloc(BUFSIZE_2);
+ assert_se(compressed2);
+ r = compress(data, data_len, compressed, BUFSIZE_2, &csize);
+ assert(r == 0);
+ }
+ assert_se(r == 0);
+
+ len = strlen(data);
+
+ r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len, '\0');
+ assert_se(r > 0);
+ r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len, 'w');
+ assert_se(r == 0);
+ r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, "barbarbar", 9, ' ');
+ assert_se(r == 0);
+ r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len - 1, data[len-1]);
+ assert_se(r > 0);
+ r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len - 1, 'w');
+ assert_se(r == 0);
+ r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len, '\0');
+ assert_se(r > 0);
+}
+
+static void test_decompress_startswith_short(int compression,
+ compress_blob_t compress,
+ decompress_sw_t decompress_sw) {
+
+#define TEXT "HUGE=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+ char buf[1024];
+ size_t i, csize;
+ int r;
+
+ log_info("/* %s with %s */", __func__, object_compressed_to_string(compression));
+
+ r = compress(TEXT, sizeof TEXT, buf, sizeof buf, &csize);
+ assert_se(r == 0);
+
+ for (i = 1; i < strlen(TEXT); i++) {
+ size_t alloc_size = i;
+ _cleanup_free_ void *buf2 = NULL;
+
+ assert_se(buf2 = malloc(i));
+
+ assert_se(decompress_sw(buf, csize, &buf2, &alloc_size, TEXT, i, TEXT[i]) == 1);
+ assert_se(decompress_sw(buf, csize, &buf2, &alloc_size, TEXT, i, 'y') == 0);
+ }
+}
+
+static void test_compress_stream(int compression,
+ const char* cat,
+ compress_stream_t compress,
+ decompress_stream_t decompress,
+ const char *srcfile) {
+
+ _cleanup_close_ int src = -1, dst = -1, dst2 = -1;
+ _cleanup_(unlink_tempfilep) char
+ pattern[] = "/tmp/systemd-test.compressed.XXXXXX",
+ pattern2[] = "/tmp/systemd-test.compressed.XXXXXX";
+ int r;
+ _cleanup_free_ char *cmd = NULL, *cmd2 = NULL;
+ struct stat st = {};
+
+ r = find_binary(cat, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Skipping %s, could not find %s binary: %m", __func__, cat);
+ return;
+ }
+
+ log_debug("/* testing %s compression */",
+ object_compressed_to_string(compression));
+
+ log_debug("/* create source from %s */", srcfile);
+
+ assert_se((src = open(srcfile, O_RDONLY|O_CLOEXEC)) >= 0);
+
+ log_debug("/* test compression */");
+
+ assert_se((dst = mkostemp_safe(pattern)) >= 0);
+
+ assert_se(compress(src, dst, -1) == 0);
+
+ if (cat) {
+ assert_se(asprintf(&cmd, "%s %s | diff %s -", cat, pattern, srcfile) > 0);
+ assert_se(system(cmd) == 0);
+ }
+
+ log_debug("/* test decompression */");
+
+ assert_se((dst2 = mkostemp_safe(pattern2)) >= 0);
+
+ assert_se(stat(srcfile, &st) == 0);
+
+ assert_se(lseek(dst, 0, SEEK_SET) == 0);
+ r = decompress(dst, dst2, st.st_size);
+ assert_se(r == 0);
+
+ assert_se(asprintf(&cmd2, "diff %s %s", srcfile, pattern2) > 0);
+ assert_se(system(cmd2) == 0);
+
+ log_debug("/* test faulty decompression */");
+
+ assert_se(lseek(dst, 1, SEEK_SET) == 1);
+ r = decompress(dst, dst2, st.st_size);
+ assert_se(IN_SET(r, 0, -EBADMSG));
+
+ assert_se(lseek(dst, 0, SEEK_SET) == 0);
+ assert_se(lseek(dst2, 0, SEEK_SET) == 0);
+ r = decompress(dst, dst2, st.st_size - 1);
+ assert_se(r == -EFBIG);
+}
+#endif
+
+#if HAVE_LZ4
+static void test_lz4_decompress_partial(void) {
+ char buf[20000], buf2[100];
+ size_t buf_size = sizeof(buf), compressed;
+ int r;
+ _cleanup_free_ char *huge = NULL;
+
+#define HUGE_SIZE (4096*1024)
+ assert_se(huge = malloc(HUGE_SIZE));
+ memset(huge, 'x', HUGE_SIZE);
+ memcpy(huge, "HUGE=", 5);
+
+ r = LZ4_compress_default(huge, buf, HUGE_SIZE, buf_size);
+ assert_se(r >= 0);
+ compressed = r;
+ log_info("Compressed %i → %zu", HUGE_SIZE, compressed);
+
+ r = LZ4_decompress_safe(buf, huge, r, HUGE_SIZE);
+ assert_se(r >= 0);
+ log_info("Decompressed → %i", r);
+
+ r = LZ4_decompress_safe_partial(buf, huge,
+ compressed,
+ 12, HUGE_SIZE);
+ assert_se(r >= 0);
+ log_info("Decompressed partial %i/%i → %i", 12, HUGE_SIZE, r);
+
+ for (size_t size = 1; size < sizeof(buf2); size++) {
+ /* This failed in older lz4s but works in newer ones. */
+ r = LZ4_decompress_safe_partial(buf, buf2, compressed, size, size);
+ log_info("Decompressed partial %zu/%zu → %i (%s)", size, size, r,
+ r < 0 ? "bad" : "good");
+ if (r >= 0 && LZ4_versionNumber() >= 10803)
+ /* lz4 <= 1.8.2 should fail that test, let's only check for newer ones */
+ assert_se(memcmp(buf2, huge, r) == 0);
+ }
+}
+#endif
+
+int main(int argc, char *argv[]) {
+#if HAVE_XZ || HAVE_LZ4
+ const char text[] =
+ "text\0foofoofoofoo AAAA aaaaaaaaa ghost busters barbarbar FFF"
+ "foofoofoofoo AAAA aaaaaaaaa ghost busters barbarbar FFF";
+
+ /* The file to test compression on can be specified as the first argument */
+ const char *srcfile = argc > 1 ? argv[1] : argv[0];
+
+ char data[512] = "random\0";
+
+ char huge[4096*1024];
+ memset(huge, 'x', sizeof(huge));
+ memcpy(huge, "HUGE=", 5);
+ char_array_0(huge);
+
+ test_setup_logging(LOG_DEBUG);
+
+ random_bytes(data + 7, sizeof(data) - 7);
+
+#if HAVE_XZ
+ test_compress_decompress(OBJECT_COMPRESSED_XZ, compress_blob_xz, decompress_blob_xz,
+ text, sizeof(text), false);
+ test_compress_decompress(OBJECT_COMPRESSED_XZ, compress_blob_xz, decompress_blob_xz,
+ data, sizeof(data), true);
+
+ test_decompress_startswith(OBJECT_COMPRESSED_XZ,
+ compress_blob_xz, decompress_startswith_xz,
+ text, sizeof(text), false);
+ test_decompress_startswith(OBJECT_COMPRESSED_XZ,
+ compress_blob_xz, decompress_startswith_xz,
+ data, sizeof(data), true);
+ test_decompress_startswith(OBJECT_COMPRESSED_XZ,
+ compress_blob_xz, decompress_startswith_xz,
+ huge, sizeof(huge), true);
+
+ test_compress_stream(OBJECT_COMPRESSED_XZ, "xzcat",
+ compress_stream_xz, decompress_stream_xz, srcfile);
+
+ test_decompress_startswith_short(OBJECT_COMPRESSED_XZ, compress_blob_xz, decompress_startswith_xz);
+
+#else
+ log_info("/* XZ test skipped */");
+#endif
+
+#if HAVE_LZ4
+ test_compress_decompress(OBJECT_COMPRESSED_LZ4, compress_blob_lz4, decompress_blob_lz4,
+ text, sizeof(text), false);
+ test_compress_decompress(OBJECT_COMPRESSED_LZ4, compress_blob_lz4, decompress_blob_lz4,
+ data, sizeof(data), true);
+
+ test_decompress_startswith(OBJECT_COMPRESSED_LZ4,
+ compress_blob_lz4, decompress_startswith_lz4,
+ text, sizeof(text), false);
+ test_decompress_startswith(OBJECT_COMPRESSED_LZ4,
+ compress_blob_lz4, decompress_startswith_lz4,
+ data, sizeof(data), true);
+ test_decompress_startswith(OBJECT_COMPRESSED_LZ4,
+ compress_blob_lz4, decompress_startswith_lz4,
+ huge, sizeof(huge), true);
+
+ test_compress_stream(OBJECT_COMPRESSED_LZ4, "lz4cat",
+ compress_stream_lz4, decompress_stream_lz4, srcfile);
+
+ test_lz4_decompress_partial();
+
+ test_decompress_startswith_short(OBJECT_COMPRESSED_LZ4, compress_blob_lz4, decompress_startswith_lz4);
+
+#else
+ log_info("/* LZ4 test skipped */");
+#endif
+
+ return 0;
+#else
+ log_info("/* XZ and LZ4 tests skipped */");
+ return EXIT_TEST_SKIP;
+#endif
+}
diff --git a/src/journal/test-journal-config.c b/src/journal/test-journal-config.c
new file mode 100644
index 0000000..1482490
--- /dev/null
+++ b/src/journal/test-journal-config.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdbool.h>
+
+#include "journald-server.h"
+
+#define _COMPRESS_PARSE_CHECK(str, enab, thresh, varname) \
+ do { \
+ JournalCompressOptions varname = {true, 111}; \
+ config_parse_compress("", "", 0, "", 0, "", 0, str, \
+ &varname, NULL); \
+ assert_se((enab) == varname.enabled); \
+ if (varname.enabled) \
+ assert_se((thresh) == varname.threshold_bytes); \
+ } while (0)
+
+#define COMPRESS_PARSE_CHECK(str, enabled, threshold) \
+ _COMPRESS_PARSE_CHECK(str, enabled, threshold, conf##__COUNTER__)
+
+static void test_config_compress(void) {
+ COMPRESS_PARSE_CHECK("yes", true, 111);
+ COMPRESS_PARSE_CHECK("no", false, 111);
+ COMPRESS_PARSE_CHECK("y", true, 111);
+ COMPRESS_PARSE_CHECK("n", false, 111);
+ COMPRESS_PARSE_CHECK("true", true, 111);
+ COMPRESS_PARSE_CHECK("false", false, 111);
+ COMPRESS_PARSE_CHECK("t", true, 111);
+ COMPRESS_PARSE_CHECK("f", false, 111);
+ COMPRESS_PARSE_CHECK("on", true, 111);
+ COMPRESS_PARSE_CHECK("off", false, 111);
+
+ /* Weird size/bool overlapping case. We preserve backward compatibility instead of assuming these are byte
+ * counts. */
+ COMPRESS_PARSE_CHECK("1", true, 111);
+ COMPRESS_PARSE_CHECK("0", false, 111);
+
+ /* IEC sizing */
+ COMPRESS_PARSE_CHECK("1B", true, 1);
+ COMPRESS_PARSE_CHECK("1K", true, 1024);
+ COMPRESS_PARSE_CHECK("1M", true, 1024 * 1024);
+ COMPRESS_PARSE_CHECK("1G", true, 1024 * 1024 * 1024);
+
+ /* Invalid Case */
+ COMPRESS_PARSE_CHECK("-1", true, 111);
+ COMPRESS_PARSE_CHECK("blah blah", true, 111);
+ COMPRESS_PARSE_CHECK("", true, (uint64_t)-1);
+}
+
+int main(int argc, char *argv[]) {
+ test_config_compress();
+
+ return 0;
+}
diff --git a/src/journal/test-journal-enum.c b/src/journal/test-journal-enum.c
new file mode 100644
index 0000000..8e83992
--- /dev/null
+++ b/src/journal/test-journal-enum.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+
+#include "sd-journal.h"
+
+#include "journal-internal.h"
+#include "log.h"
+#include "macro.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ unsigned n = 0;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY) >= 0);
+
+ assert_se(sd_journal_add_match(j, "_TRANSPORT=syslog", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "_UID=0", 0) >= 0);
+
+ SD_JOURNAL_FOREACH_BACKWARDS(j) {
+ const void *d;
+ size_t l;
+
+ assert_se(sd_journal_get_data(j, "MESSAGE", &d, &l) >= 0);
+
+ printf("%.*s\n", (int) l, (char*) d);
+
+ n++;
+ if (n >= 10)
+ break;
+ }
+
+ return 0;
+}
diff --git a/src/journal/test-journal-flush.c b/src/journal/test-journal-flush.c
new file mode 100644
index 0000000..81dbc22
--- /dev/null
+++ b/src/journal/test-journal-flush.c
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "journal-file.h"
+#include "journal-internal.h"
+#include "macro.h"
+#include "string-util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_free_ char *fn = NULL;
+ char dn[] = "/var/tmp/test-journal-flush.XXXXXX";
+ JournalFile *new_journal = NULL;
+ sd_journal *j = NULL;
+ unsigned n = 0;
+ int r;
+
+ assert_se(mkdtemp(dn));
+ fn = strappend(dn, "/test.journal");
+
+ r = journal_file_open(-1, fn, O_CREAT|O_RDWR, 0644, false, 0, false, NULL, NULL, NULL, NULL, &new_journal);
+ assert_se(r >= 0);
+
+ r = sd_journal_open(&j, 0);
+ assert_se(r >= 0);
+
+ sd_journal_set_data_threshold(j, 0);
+
+ SD_JOURNAL_FOREACH(j) {
+ Object *o;
+ JournalFile *f;
+
+ f = j->current_file;
+ assert_se(f && f->current_offset > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ assert_se(r >= 0);
+
+ r = journal_file_copy_entry(f, new_journal, o, f->current_offset);
+ assert_se(r >= 0);
+
+ n++;
+ if (n > 10000)
+ break;
+ }
+
+ sd_journal_close(j);
+
+ (void) journal_file_close(new_journal);
+
+ unlink(fn);
+ assert_se(rmdir(dn) == 0);
+
+ return 0;
+}
diff --git a/src/journal/test-journal-init.c b/src/journal/test-journal-init.c
new file mode 100644
index 0000000..860baca
--- /dev/null
+++ b/src/journal/test-journal-init.c
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-journal.h"
+
+#include "log.h"
+#include "parse-util.h"
+#include "rm-rf.h"
+#include "tests.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ sd_journal *j;
+ int r, i, I = 100;
+ char t[] = "/tmp/journal-stream-XXXXXX";
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (argc >= 2) {
+ r = safe_atoi(argv[1], &I);
+ if (r < 0)
+ log_info("Could not parse loop count argument. Using default.");
+ }
+
+ log_info("Running %d loops", I);
+
+ assert_se(mkdtemp(t));
+
+ for (i = 0; i < I; i++) {
+ r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
+ assert_se(r == 0);
+
+ sd_journal_close(j);
+
+ r = sd_journal_open_directory(&j, t, 0);
+ assert_se(r == 0);
+
+ sd_journal_close(j);
+
+ j = NULL;
+ r = sd_journal_open_directory(&j, t, SD_JOURNAL_LOCAL_ONLY);
+ assert_se(r == -EINVAL);
+ assert_se(j == NULL);
+ }
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+
+ return 0;
+}
diff --git a/src/journal/test-journal-interleaving.c b/src/journal/test-journal-interleaving.c
new file mode 100644
index 0000000..cf0561d
--- /dev/null
+++ b/src/journal/test-journal-interleaving.c
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "io-util.h"
+#include "journal-file.h"
+#include "journal-vacuum.h"
+#include "log.h"
+#include "parse-util.h"
+#include "rm-rf.h"
+#include "tests.h"
+#include "util.h"
+
+/* This program tests skipping around in a multi-file journal. */
+
+static bool arg_keep = false;
+
+_noreturn_ static void log_assert_errno(const char *text, int error, const char *file, int line, const char *func) {
+ log_internal(LOG_CRIT, error, file, line, func,
+ "'%s' failed at %s:%u (%s): %m", text, file, line, func);
+ abort();
+}
+
+#define assert_ret(expr) \
+ do { \
+ int _r_ = (expr); \
+ if (_unlikely_(_r_ < 0)) \
+ log_assert_errno(#expr, -_r_, __FILE__, __LINE__, __PRETTY_FUNCTION__); \
+ } while (false)
+
+static JournalFile *test_open(const char *name) {
+ JournalFile *f;
+ assert_ret(journal_file_open(-1, name, O_RDWR|O_CREAT, 0644, true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &f));
+ return f;
+}
+
+static void test_close(JournalFile *f) {
+ (void) journal_file_close (f);
+}
+
+static void append_number(JournalFile *f, int n, uint64_t *seqnum) {
+ char *p;
+ dual_timestamp ts;
+ static dual_timestamp previous_ts = {};
+ struct iovec iovec[1];
+
+ dual_timestamp_get(&ts);
+
+ if (ts.monotonic <= previous_ts.monotonic)
+ ts.monotonic = previous_ts.monotonic + 1;
+
+ if (ts.realtime <= previous_ts.realtime)
+ ts.realtime = previous_ts.realtime + 1;
+
+ previous_ts = ts;
+
+ assert_se(asprintf(&p, "NUMBER=%d", n) >= 0);
+ iovec[0] = IOVEC_MAKE_STRING(p);
+ assert_ret(journal_file_append_entry(f, &ts, NULL, iovec, 1, seqnum, NULL, NULL));
+ free(p);
+}
+
+static void test_check_number (sd_journal *j, int n) {
+ const void *d;
+ _cleanup_free_ char *k;
+ size_t l;
+ int x;
+
+ assert_ret(sd_journal_get_data(j, "NUMBER", &d, &l));
+ assert_se(k = strndup(d, l));
+ printf("%s\n", k);
+
+ assert_se(safe_atoi(k + 7, &x) >= 0);
+ assert_se(n == x);
+}
+
+static void test_check_numbers_down (sd_journal *j, int count) {
+ int i;
+
+ for (i = 1; i <= count; i++) {
+ int r;
+ test_check_number(j, i);
+ assert_ret(r = sd_journal_next(j));
+ if (i == count)
+ assert_se(r == 0);
+ else
+ assert_se(r == 1);
+ }
+
+}
+
+static void test_check_numbers_up (sd_journal *j, int count) {
+ for (int i = count; i >= 1; i--) {
+ int r;
+ test_check_number(j, i);
+ assert_ret(r = sd_journal_previous(j));
+ if (i == 1)
+ assert_se(r == 0);
+ else
+ assert_se(r == 1);
+ }
+
+}
+
+static void setup_sequential(void) {
+ JournalFile *one, *two;
+ one = test_open("one.journal");
+ two = test_open("two.journal");
+ append_number(one, 1, NULL);
+ append_number(one, 2, NULL);
+ append_number(two, 3, NULL);
+ append_number(two, 4, NULL);
+ test_close(one);
+ test_close(two);
+}
+
+static void setup_interleaved(void) {
+ JournalFile *one, *two;
+ one = test_open("one.journal");
+ two = test_open("two.journal");
+ append_number(one, 1, NULL);
+ append_number(two, 2, NULL);
+ append_number(one, 3, NULL);
+ append_number(two, 4, NULL);
+ test_close(one);
+ test_close(two);
+}
+
+static void test_skip(void (*setup)(void)) {
+ char t[] = "/tmp/journal-skip-XXXXXX";
+ sd_journal *j;
+ int r;
+
+ assert_se(mkdtemp(t));
+ assert_se(chdir(t) >= 0);
+
+ setup();
+
+ /* Seek to head, iterate down.
+ */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_head(j));
+ assert_ret(sd_journal_next(j));
+ test_check_numbers_down(j, 4);
+ sd_journal_close(j);
+
+ /* Seek to tail, iterate up.
+ */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_tail(j));
+ assert_ret(sd_journal_previous(j));
+ test_check_numbers_up(j, 4);
+ sd_journal_close(j);
+
+ /* Seek to tail, skip to head, iterate down.
+ */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_tail(j));
+ assert_ret(r = sd_journal_previous_skip(j, 4));
+ assert_se(r == 4);
+ test_check_numbers_down(j, 4);
+ sd_journal_close(j);
+
+ /* Seek to head, skip to tail, iterate up.
+ */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_head(j));
+ assert_ret(r = sd_journal_next_skip(j, 4));
+ assert_se(r == 4);
+ test_check_numbers_up(j, 4);
+ sd_journal_close(j);
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+
+ puts("------------------------------------------------------------");
+}
+
+static void test_sequence_numbers(void) {
+
+ char t[] = "/tmp/journal-seq-XXXXXX";
+ JournalFile *one, *two;
+ uint64_t seqnum = 0;
+ sd_id128_t seqnum_id;
+
+ assert_se(mkdtemp(t));
+ assert_se(chdir(t) >= 0);
+
+ assert_se(journal_file_open(-1, "one.journal", O_RDWR|O_CREAT, 0644,
+ true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &one) == 0);
+
+ append_number(one, 1, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 1);
+ append_number(one, 2, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 2);
+
+ assert_se(one->header->state == STATE_ONLINE);
+ assert_se(!sd_id128_equal(one->header->file_id, one->header->machine_id));
+ assert_se(!sd_id128_equal(one->header->file_id, one->header->boot_id));
+ assert_se(sd_id128_equal(one->header->file_id, one->header->seqnum_id));
+
+ memcpy(&seqnum_id, &one->header->seqnum_id, sizeof(sd_id128_t));
+
+ assert_se(journal_file_open(-1, "two.journal", O_RDWR|O_CREAT, 0644,
+ true, (uint64_t) -1, false, NULL, NULL, NULL, one, &two) == 0);
+
+ assert_se(two->header->state == STATE_ONLINE);
+ assert_se(!sd_id128_equal(two->header->file_id, one->header->file_id));
+ assert_se(sd_id128_equal(one->header->machine_id, one->header->machine_id));
+ assert_se(sd_id128_equal(one->header->boot_id, one->header->boot_id));
+ assert_se(sd_id128_equal(one->header->seqnum_id, one->header->seqnum_id));
+
+ append_number(two, 3, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 3);
+ append_number(two, 4, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 4);
+
+ test_close(two);
+
+ append_number(one, 5, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 5);
+
+ append_number(one, 6, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 6);
+
+ test_close(one);
+
+ /* restart server */
+ seqnum = 0;
+
+ assert_se(journal_file_open(-1, "two.journal", O_RDWR, 0,
+ true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &two) == 0);
+
+ assert_se(sd_id128_equal(two->header->seqnum_id, seqnum_id));
+
+ append_number(two, 7, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 5);
+
+ /* So..., here we have the same seqnum in two files with the
+ * same seqnum_id. */
+
+ test_close(two);
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ /* journal_file_open requires a valid machine id */
+ if (access("/etc/machine-id", F_OK) != 0)
+ return log_tests_skipped("/etc/machine-id not found");
+
+ arg_keep = argc > 1;
+
+ test_skip(setup_sequential);
+ test_skip(setup_interleaved);
+
+ test_sequence_numbers();
+
+ return 0;
+}
diff --git a/src/journal/test-journal-match.c b/src/journal/test-journal-match.c
new file mode 100644
index 0000000..b175279
--- /dev/null
+++ b/src/journal/test-journal-match.c
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "journal-internal.h"
+#include "log.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ _cleanup_free_ char *t;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_journal_open(&j, 0) >= 0);
+
+ assert_se(sd_journal_add_match(j, "foobar", 0) < 0);
+ assert_se(sd_journal_add_match(j, "foobar=waldo", 0) < 0);
+ assert_se(sd_journal_add_match(j, "", 0) < 0);
+ assert_se(sd_journal_add_match(j, "=", 0) < 0);
+ assert_se(sd_journal_add_match(j, "=xxxxx", 0) < 0);
+ assert_se(sd_journal_add_match(j, (uint8_t[4]){'A', '=', '\1', '\2'}, 4) >= 0);
+ assert_se(sd_journal_add_match(j, (uint8_t[5]){'B', '=', 'C', '\0', 'D'}, 5) >= 0);
+ assert_se(sd_journal_add_match(j, "HALLO=WALDO", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "QUUX=mmmm", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "QUUX=xxxxx", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "HALLO=", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "QUUX=xxxxx", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "QUUX=yyyyy", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "PIFF=paff", 0) >= 0);
+
+ assert_se(sd_journal_add_disjunction(j) >= 0);
+
+ assert_se(sd_journal_add_match(j, "ONE=one", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "ONE=two", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "TWO=two", 0) >= 0);
+
+ assert_se(sd_journal_add_conjunction(j) >= 0);
+
+ assert_se(sd_journal_add_match(j, "L4_1=yes", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "L4_1=ok", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "L4_2=yes", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "L4_2=ok", 0) >= 0);
+
+ assert_se(sd_journal_add_disjunction(j) >= 0);
+
+ assert_se(sd_journal_add_match(j, "L3=yes", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "L3=ok", 0) >= 0);
+
+ assert_se(t = journal_make_match_string(j));
+
+ printf("resulting match expression is: %s\n", t);
+
+ assert_se(streq(t, "(((L3=ok OR L3=yes) OR ((L4_2=ok OR L4_2=yes) AND (L4_1=ok OR L4_1=yes))) AND ((TWO=two AND (ONE=two OR ONE=one)) OR (PIFF=paff AND (QUUX=yyyyy OR QUUX=xxxxx OR QUUX=mmmm) AND (HALLO= OR HALLO=WALDO) AND B=C\\000D AND A=\\001\\002)))"));
+
+ return 0;
+}
diff --git a/src/journal/test-journal-send.c b/src/journal/test-journal-send.c
new file mode 100644
index 0000000..484308e
--- /dev/null
+++ b/src/journal/test-journal-send.c
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "macro.h"
+
+int main(int argc, char *argv[]) {
+ char huge[4096*1024];
+
+ /* utf-8 and non-utf-8, message-less and message-ful iovecs */
+ struct iovec graph1[] = {
+ {(char*) "GRAPH=graph", STRLEN("GRAPH=graph")}
+ };
+ struct iovec graph2[] = {
+ {(char*) "GRAPH=graph\n", STRLEN("GRAPH=graph\n")}
+ };
+ struct iovec message1[] = {
+ {(char*) "MESSAGE=graph", STRLEN("MESSAGE=graph")}
+ };
+ struct iovec message2[] = {
+ {(char*) "MESSAGE=graph\n", STRLEN("MESSAGE=graph\n")}
+ };
+
+ assert_se(sd_journal_print(LOG_INFO, "piepapo") == 0);
+
+ assert_se(sd_journal_send("MESSAGE=foobar",
+ "VALUE=%i", 7,
+ NULL) == 0);
+
+ errno = ENOENT;
+ assert_se(sd_journal_perror("Foobar") == 0);
+
+ assert_se(sd_journal_perror("") == 0);
+
+ memset(huge, 'x', sizeof(huge));
+ memcpy(huge, "HUGE=", 5);
+ char_array_0(huge);
+
+ assert_se(sd_journal_send("MESSAGE=Huge field attached",
+ huge,
+ NULL) == 0);
+
+ assert_se(sd_journal_send("MESSAGE=uiui",
+ "VALUE=A",
+ "VALUE=B",
+ "VALUE=C",
+ "SINGLETON=1",
+ "OTHERVALUE=X",
+ "OTHERVALUE=Y",
+ "WITH_BINARY=this is a binary value \a",
+ NULL) == 0);
+
+ syslog(LOG_NOTICE, "Hello World!");
+
+ assert_se(sd_journal_print(LOG_NOTICE, "Hello World") == 0);
+
+ assert_se(sd_journal_send("MESSAGE=Hello World!",
+ "MESSAGE_ID=52fb62f99e2c49d89cfbf9d6de5e3555",
+ "PRIORITY=5",
+ "HOME=%s", getenv("HOME"),
+ "TERM=%s", getenv("TERM"),
+ "PAGE_SIZE=%li", sysconf(_SC_PAGESIZE),
+ "N_CPUS=%li", sysconf(_SC_NPROCESSORS_ONLN),
+ NULL) == 0);
+
+ assert_se(sd_journal_sendv(graph1, 1) == 0);
+ assert_se(sd_journal_sendv(graph2, 1) == 0);
+ assert_se(sd_journal_sendv(message1, 1) == 0);
+ assert_se(sd_journal_sendv(message2, 1) == 0);
+
+ /* test without location fields */
+#undef sd_journal_sendv
+ assert_se(sd_journal_sendv(graph1, 1) == 0);
+ assert_se(sd_journal_sendv(graph2, 1) == 0);
+ assert_se(sd_journal_sendv(message1, 1) == 0);
+ assert_se(sd_journal_sendv(message2, 1) == 0);
+
+ sleep(1);
+
+ return 0;
+}
diff --git a/src/journal/test-journal-stream.c b/src/journal/test-journal-stream.c
new file mode 100644
index 0000000..226c30f
--- /dev/null
+++ b/src/journal/test-journal-stream.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "journal-file.h"
+#include "journal-internal.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "rm-rf.h"
+#include "tests.h"
+#include "util.h"
+
+#define N_ENTRIES 200
+
+static void verify_contents(sd_journal *j, unsigned skip) {
+ unsigned i;
+
+ assert_se(j);
+
+ i = 0;
+ SD_JOURNAL_FOREACH(j) {
+ const void *d;
+ char *k, *c;
+ size_t l;
+ unsigned u = 0;
+
+ assert_se(sd_journal_get_cursor(j, &k) >= 0);
+ printf("cursor: %s\n", k);
+ free(k);
+
+ assert_se(sd_journal_get_data(j, "MAGIC", &d, &l) >= 0);
+ printf("\t%.*s\n", (int) l, (const char*) d);
+
+ assert_se(sd_journal_get_data(j, "NUMBER", &d, &l) >= 0);
+ assert_se(k = strndup(d, l));
+ printf("\t%s\n", k);
+
+ if (skip > 0) {
+ assert_se(safe_atou(k + 7, &u) >= 0);
+ assert_se(i == u);
+ i += skip;
+ }
+
+ free(k);
+
+ assert_se(sd_journal_get_cursor(j, &c) >= 0);
+ assert_se(sd_journal_test_cursor(j, c) > 0);
+ free(c);
+ }
+
+ if (skip > 0)
+ assert_se(i == N_ENTRIES);
+}
+
+int main(int argc, char *argv[]) {
+ JournalFile *one, *two, *three;
+ char t[] = "/tmp/journal-stream-XXXXXX";
+ unsigned i;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ char *z;
+ const void *data;
+ size_t l;
+ dual_timestamp previous_ts = DUAL_TIMESTAMP_NULL;
+
+ /* journal_file_open requires a valid machine id */
+ if (access("/etc/machine-id", F_OK) != 0)
+ return log_tests_skipped("/etc/machine-id not found");
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(mkdtemp(t));
+ assert_se(chdir(t) >= 0);
+
+ assert_se(journal_file_open(-1, "one.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &one) == 0);
+ assert_se(journal_file_open(-1, "two.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &two) == 0);
+ assert_se(journal_file_open(-1, "three.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &three) == 0);
+
+ for (i = 0; i < N_ENTRIES; i++) {
+ char *p, *q;
+ dual_timestamp ts;
+ struct iovec iovec[2];
+
+ dual_timestamp_get(&ts);
+
+ if (ts.monotonic <= previous_ts.monotonic)
+ ts.monotonic = previous_ts.monotonic + 1;
+
+ if (ts.realtime <= previous_ts.realtime)
+ ts.realtime = previous_ts.realtime + 1;
+
+ previous_ts = ts;
+
+ assert_se(asprintf(&p, "NUMBER=%u", i) >= 0);
+ iovec[0].iov_base = p;
+ iovec[0].iov_len = strlen(p);
+
+ assert_se(asprintf(&q, "MAGIC=%s", i % 5 == 0 ? "quux" : "waldo") >= 0);
+
+ iovec[1].iov_base = q;
+ iovec[1].iov_len = strlen(q);
+
+ if (i % 10 == 0)
+ assert_se(journal_file_append_entry(three, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
+ else {
+ if (i % 3 == 0)
+ assert_se(journal_file_append_entry(two, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
+
+ assert_se(journal_file_append_entry(one, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
+ }
+
+ free(p);
+ free(q);
+ }
+
+ (void) journal_file_close(one);
+ (void) journal_file_close(two);
+ (void) journal_file_close(three);
+
+ assert_se(sd_journal_open_directory(&j, t, 0) >= 0);
+
+ assert_se(sd_journal_add_match(j, "MAGIC=quux", 0) >= 0);
+ SD_JOURNAL_FOREACH_BACKWARDS(j) {
+ _cleanup_free_ char *c;
+
+ assert_se(sd_journal_get_data(j, "NUMBER", &data, &l) >= 0);
+ printf("\t%.*s\n", (int) l, (const char*) data);
+
+ assert_se(sd_journal_get_cursor(j, &c) >= 0);
+ assert_se(sd_journal_test_cursor(j, c) > 0);
+ }
+
+ SD_JOURNAL_FOREACH(j) {
+ _cleanup_free_ char *c;
+
+ assert_se(sd_journal_get_data(j, "NUMBER", &data, &l) >= 0);
+ printf("\t%.*s\n", (int) l, (const char*) data);
+
+ assert_se(sd_journal_get_cursor(j, &c) >= 0);
+ assert_se(sd_journal_test_cursor(j, c) > 0);
+ }
+
+ sd_journal_flush_matches(j);
+
+ verify_contents(j, 1);
+
+ printf("NEXT TEST\n");
+ assert_se(sd_journal_add_match(j, "MAGIC=quux", 0) >= 0);
+
+ assert_se(z = journal_make_match_string(j));
+ printf("resulting match expression is: %s\n", z);
+ free(z);
+
+ verify_contents(j, 5);
+
+ printf("NEXT TEST\n");
+ sd_journal_flush_matches(j);
+ assert_se(sd_journal_add_match(j, "MAGIC=waldo", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "NUMBER=10", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "NUMBER=11", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "NUMBER=12", 0) >= 0);
+
+ assert_se(z = journal_make_match_string(j));
+ printf("resulting match expression is: %s\n", z);
+ free(z);
+
+ verify_contents(j, 0);
+
+ assert_se(sd_journal_query_unique(j, "NUMBER") >= 0);
+ SD_JOURNAL_FOREACH_UNIQUE(j, data, l)
+ printf("%.*s\n", (int) l, (const char*) data);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+
+ return 0;
+}
diff --git a/src/journal/test-journal-syslog.c b/src/journal/test-journal-syslog.c
new file mode 100644
index 0000000..45be7e5
--- /dev/null
+++ b/src/journal/test-journal-syslog.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "journald-syslog.h"
+#include "macro.h"
+#include "string-util.h"
+#include "syslog-util.h"
+
+static void test_syslog_parse_identifier(const char *str,
+ const char *ident, const char *pid, const char *rest, int ret) {
+ const char *buf = str;
+ _cleanup_free_ char *ident2 = NULL, *pid2 = NULL;
+ int ret2;
+
+ ret2 = syslog_parse_identifier(&buf, &ident2, &pid2);
+
+ assert_se(ret == ret2);
+ assert_se(ident == ident2 || streq_ptr(ident, ident2));
+ assert_se(pid == pid2 || streq_ptr(pid, pid2));
+ assert_se(streq(buf, rest));
+}
+
+static void test_syslog_parse_priority(const char *str, int priority, int ret) {
+ const char *buf = str;
+ int priority2 = 0, ret2;
+
+ ret2 = syslog_parse_priority(&buf, &priority2, false);
+
+ assert_se(ret == ret2);
+ if (ret2 == 1)
+ assert_se(priority == priority2);
+}
+
+int main(void) {
+ test_syslog_parse_identifier("pidu[111]: xxx", "pidu", "111", "xxx", 11);
+ test_syslog_parse_identifier("pidu: xxx", "pidu", NULL, "xxx", 6);
+ test_syslog_parse_identifier("pidu: xxx", "pidu", NULL, " xxx", 6);
+ test_syslog_parse_identifier("pidu xxx", NULL, NULL, "pidu xxx", 0);
+ test_syslog_parse_identifier(" pidu xxx", NULL, NULL, " pidu xxx", 0);
+ test_syslog_parse_identifier("", NULL, NULL, "", 0);
+ test_syslog_parse_identifier(" ", NULL, NULL, " ", 0);
+ test_syslog_parse_identifier(":", "", NULL, "", 1);
+ test_syslog_parse_identifier(": ", "", NULL, " ", 2);
+ test_syslog_parse_identifier(" :", "", NULL, "", 2);
+ test_syslog_parse_identifier(" pidu:", "pidu", NULL, "", 8);
+ test_syslog_parse_identifier("pidu:", "pidu", NULL, "", 5);
+ test_syslog_parse_identifier("pidu: ", "pidu", NULL, "", 6);
+ test_syslog_parse_identifier("pidu : ", NULL, NULL, "pidu : ", 0);
+
+ test_syslog_parse_priority("<>", 0, 0);
+ test_syslog_parse_priority("<>aaa", 0, 0);
+ test_syslog_parse_priority("<aaaa>", 0, 0);
+ test_syslog_parse_priority("<aaaa>aaa", 0, 0);
+ test_syslog_parse_priority(" <aaaa>", 0, 0);
+ test_syslog_parse_priority(" <aaaa>aaa", 0, 0);
+ /* TODO: add test cases of valid priorities */
+
+ return 0;
+}
diff --git a/src/journal/test-journal-verify.c b/src/journal/test-journal-verify.c
new file mode 100644
index 0000000..c4fa41e
--- /dev/null
+++ b/src/journal/test-journal-verify.c
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "io-util.h"
+#include "journal-file.h"
+#include "journal-verify.h"
+#include "log.h"
+#include "rm-rf.h"
+#include "terminal-util.h"
+#include "tests.h"
+#include "util.h"
+
+#define N_ENTRIES 6000
+#define RANDOM_RANGE 77
+
+static void bit_toggle(const char *fn, uint64_t p) {
+ uint8_t b;
+ ssize_t r;
+ int fd;
+
+ fd = open(fn, O_RDWR|O_CLOEXEC);
+ assert_se(fd >= 0);
+
+ r = pread(fd, &b, 1, p/8);
+ assert_se(r == 1);
+
+ b ^= 1 << (p % 8);
+
+ r = pwrite(fd, &b, 1, p/8);
+ assert_se(r == 1);
+
+ safe_close(fd);
+}
+
+static int raw_verify(const char *fn, const char *verification_key) {
+ JournalFile *f;
+ int r;
+
+ r = journal_file_open(-1, fn, O_RDONLY, 0666, true, (uint64_t) -1, !!verification_key, NULL, NULL, NULL, NULL, &f);
+ if (r < 0)
+ return r;
+
+ r = journal_file_verify(f, verification_key, NULL, NULL, NULL, false);
+ (void) journal_file_close(f);
+
+ return r;
+}
+
+int main(int argc, char *argv[]) {
+ char t[] = "/tmp/journal-XXXXXX";
+ unsigned n;
+ JournalFile *f;
+ const char *verification_key = argv[1];
+ usec_t from = 0, to = 0, total = 0;
+ char a[FORMAT_TIMESTAMP_MAX];
+ char b[FORMAT_TIMESTAMP_MAX];
+ char c[FORMAT_TIMESPAN_MAX];
+ struct stat st;
+ uint64_t p;
+
+ /* journal_file_open requires a valid machine id */
+ if (access("/etc/machine-id", F_OK) != 0)
+ return log_tests_skipped("/etc/machine-id not found");
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(mkdtemp(t));
+ assert_se(chdir(t) >= 0);
+
+ log_info("Generating...");
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, !!verification_key, NULL, NULL, NULL, NULL, &f) == 0);
+
+ for (n = 0; n < N_ENTRIES; n++) {
+ struct iovec iovec;
+ struct dual_timestamp ts;
+ char *test;
+
+ dual_timestamp_get(&ts);
+
+ assert_se(asprintf(&test, "RANDOM=%lu", random() % RANDOM_RANGE));
+
+ iovec = IOVEC_MAKE_STRING(test);
+
+ assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+
+ free(test);
+ }
+
+ (void) journal_file_close(f);
+
+ log_info("Verifying...");
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDONLY, 0666, true, (uint64_t) -1, !!verification_key, NULL, NULL, NULL, NULL, &f) == 0);
+ /* journal_file_print_header(f); */
+ journal_file_dump(f);
+
+ assert_se(journal_file_verify(f, verification_key, &from, &to, &total, true) >= 0);
+
+ if (verification_key && JOURNAL_HEADER_SEALED(f->header))
+ log_info("=> Validated from %s to %s, %s missing",
+ format_timestamp(a, sizeof(a), from),
+ format_timestamp(b, sizeof(b), to),
+ format_timespan(c, sizeof(c), total > to ? total - to : 0, 0));
+
+ (void) journal_file_close(f);
+
+ if (verification_key) {
+ log_info("Toggling bits...");
+
+ assert_se(stat("test.journal", &st) >= 0);
+
+ for (p = 38448*8+0; p < ((uint64_t) st.st_size * 8); p ++) {
+ bit_toggle("test.journal", p);
+
+ log_info("[ %"PRIu64"+%"PRIu64"]", p / 8, p % 8);
+
+ if (raw_verify("test.journal", verification_key) >= 0)
+ log_notice(ANSI_HIGHLIGHT_RED ">>>> %"PRIu64" (bit %"PRIu64") can be toggled without detection." ANSI_NORMAL, p / 8, p % 8);
+
+ bit_toggle("test.journal", p);
+ }
+ }
+
+ log_info("Exiting...");
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+
+ return 0;
+}
diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c
new file mode 100644
index 0000000..0795e0d
--- /dev/null
+++ b/src/journal/test-journal.c
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "io-util.h"
+#include "journal-authenticate.h"
+#include "journal-file.h"
+#include "journal-vacuum.h"
+#include "log.h"
+#include "rm-rf.h"
+#include "tests.h"
+
+static bool arg_keep = false;
+
+static void test_non_empty(void) {
+ dual_timestamp ts;
+ JournalFile *f;
+ struct iovec iovec;
+ static const char test[] = "TEST1=1", test2[] = "TEST2=2";
+ Object *o;
+ uint64_t p;
+ sd_id128_t fake_boot_id;
+ char t[] = "/tmp/journal-XXXXXX";
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(mkdtemp(t));
+ assert_se(chdir(t) >= 0);
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, true, NULL, NULL, NULL, NULL, &f) == 0);
+
+ assert_se(dual_timestamp_get(&ts));
+ assert_se(sd_id128_randomize(&fake_boot_id) == 0);
+
+ iovec = IOVEC_MAKE_STRING(test);
+ assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+
+ iovec = IOVEC_MAKE_STRING(test2);
+ assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+
+ iovec = IOVEC_MAKE_STRING(test);
+ assert_se(journal_file_append_entry(f, &ts, &fake_boot_id, &iovec, 1, NULL, NULL, NULL) == 0);
+
+#if HAVE_GCRYPT
+ journal_file_append_tag(f);
+#endif
+ journal_file_dump(f);
+
+ assert_se(journal_file_next_entry(f, 0, DIRECTION_DOWN, &o, &p) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 1);
+
+ assert_se(journal_file_next_entry(f, p, DIRECTION_DOWN, &o, &p) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 2);
+
+ assert_se(journal_file_next_entry(f, p, DIRECTION_DOWN, &o, &p) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 3);
+ assert_se(sd_id128_equal(o->entry.boot_id, fake_boot_id));
+
+ assert_se(journal_file_next_entry(f, p, DIRECTION_DOWN, &o, &p) == 0);
+
+ assert_se(journal_file_next_entry(f, 0, DIRECTION_DOWN, &o, &p) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 1);
+
+ assert_se(journal_file_find_data_object(f, test, strlen(test), NULL, &p) == 1);
+ assert_se(journal_file_next_entry_for_data(f, NULL, 0, p, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 1);
+
+ assert_se(journal_file_next_entry_for_data(f, NULL, 0, p, DIRECTION_UP, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 3);
+
+ assert_se(journal_file_find_data_object(f, test2, strlen(test2), NULL, &p) == 1);
+ assert_se(journal_file_next_entry_for_data(f, NULL, 0, p, DIRECTION_UP, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 2);
+
+ assert_se(journal_file_next_entry_for_data(f, NULL, 0, p, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 2);
+
+ assert_se(journal_file_find_data_object(f, "quux", 4, NULL, &p) == 0);
+
+ assert_se(journal_file_move_to_entry_by_seqnum(f, 1, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 1);
+
+ assert_se(journal_file_move_to_entry_by_seqnum(f, 3, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 3);
+
+ assert_se(journal_file_move_to_entry_by_seqnum(f, 2, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 2);
+
+ assert_se(journal_file_move_to_entry_by_seqnum(f, 10, DIRECTION_DOWN, &o, NULL) == 0);
+
+ journal_file_rotate(&f, true, (uint64_t) -1, true, NULL);
+ journal_file_rotate(&f, true, (uint64_t) -1, true, NULL);
+
+ (void) journal_file_close(f);
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+
+ puts("------------------------------------------------------------");
+}
+
+static void test_empty(void) {
+ JournalFile *f1, *f2, *f3, *f4;
+ char t[] = "/tmp/journal-XXXXXX";
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(mkdtemp(t));
+ assert_se(chdir(t) >= 0);
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, false, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &f1) == 0);
+
+ assert_se(journal_file_open(-1, "test-compress.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &f2) == 0);
+
+ assert_se(journal_file_open(-1, "test-seal.journal", O_RDWR|O_CREAT, 0666, false, (uint64_t) -1, true, NULL, NULL, NULL, NULL, &f3) == 0);
+
+ assert_se(journal_file_open(-1, "test-seal-compress.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, true, NULL, NULL, NULL, NULL, &f4) == 0);
+
+ journal_file_print_header(f1);
+ puts("");
+ journal_file_print_header(f2);
+ puts("");
+ journal_file_print_header(f3);
+ puts("");
+ journal_file_print_header(f4);
+ puts("");
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+
+ (void) journal_file_close(f1);
+ (void) journal_file_close(f2);
+ (void) journal_file_close(f3);
+ (void) journal_file_close(f4);
+}
+
+#if HAVE_XZ || HAVE_LZ4
+static bool check_compressed(uint64_t compress_threshold, uint64_t data_size) {
+ dual_timestamp ts;
+ JournalFile *f;
+ struct iovec iovec;
+ Object *o;
+ uint64_t p;
+ char t[] = "/tmp/journal-XXXXXX";
+ char data[2048] = {0};
+ bool is_compressed;
+ int r;
+
+ assert_se(data_size <= sizeof(data));
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(mkdtemp(t));
+ assert_se(chdir(t) >= 0);
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, true, compress_threshold, true, NULL, NULL, NULL, NULL, &f) == 0);
+
+ dual_timestamp_get(&ts);
+
+ iovec = IOVEC_MAKE(data, data_size);
+ assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+
+#if HAVE_GCRYPT
+ journal_file_append_tag(f);
+#endif
+ journal_file_dump(f);
+
+ /* We have to partially reimplement some of the dump logic, because the normal next_entry does the
+ * decompression for us. */
+ p = le64toh(f->header->header_size);
+ for (;;) {
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
+ assert_se(r == 0);
+ if (o->object.type == OBJECT_DATA)
+ break;
+
+ assert_se(p < le64toh(f->header->tail_object_offset));
+ p = p + ALIGN64(le64toh(o->object.size));
+ }
+
+ is_compressed = (o->object.flags & OBJECT_COMPRESSION_MASK) != 0;
+
+ (void) journal_file_close(f);
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+
+ puts("------------------------------------------------------------");
+
+ return is_compressed;
+}
+
+static void test_min_compress_size(void) {
+ /* Note that XZ will actually fail to compress anything under 80 bytes, so you have to choose the limits
+ * carefully */
+
+ /* DEFAULT_MIN_COMPRESS_SIZE is 512 */
+ assert_se(!check_compressed((uint64_t) -1, 255));
+ assert_se(check_compressed((uint64_t) -1, 513));
+
+ /* compress everything */
+ assert_se(check_compressed(0, 96));
+ assert_se(check_compressed(8, 96));
+
+ /* Ensure we don't try to compress less than 8 bytes */
+ assert_se(!check_compressed(0, 7));
+
+ /* check boundary conditions */
+ assert_se(check_compressed(256, 256));
+ assert_se(!check_compressed(256, 255));
+}
+#endif
+
+int main(int argc, char *argv[]) {
+ arg_keep = argc > 1;
+
+ test_setup_logging(LOG_INFO);
+
+ /* journal_file_open requires a valid machine id */
+ if (access("/etc/machine-id", F_OK) != 0)
+ return log_tests_skipped("/etc/machine-id not found");
+
+ test_non_empty();
+ test_empty();
+#if HAVE_XZ || HAVE_LZ4
+ test_min_compress_size();
+#endif
+
+ return 0;
+}
diff --git a/src/journal/test-mmap-cache.c b/src/journal/test-mmap-cache.c
new file mode 100644
index 0000000..8f755ef
--- /dev/null
+++ b/src/journal/test-mmap-cache.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "macro.h"
+#include "mmap-cache.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ MMapFileDescriptor *fx;
+ int x, y, z, r;
+ char px[] = "/tmp/testmmapXXXXXXX", py[] = "/tmp/testmmapYXXXXXX", pz[] = "/tmp/testmmapZXXXXXX";
+ MMapCache *m;
+ void *p, *q;
+
+ assert_se(m = mmap_cache_new());
+
+ x = mkostemp_safe(px);
+ assert_se(x >= 0);
+ unlink(px);
+
+ assert_se(fx = mmap_cache_add_fd(m, x));
+
+ y = mkostemp_safe(py);
+ assert_se(y >= 0);
+ unlink(py);
+
+ z = mkostemp_safe(pz);
+ assert_se(z >= 0);
+ unlink(pz);
+
+ r = mmap_cache_get(m, fx, PROT_READ, 0, false, 1, 2, NULL, &p, NULL);
+ assert_se(r >= 0);
+
+ r = mmap_cache_get(m, fx, PROT_READ, 0, false, 2, 2, NULL, &q, NULL);
+ assert_se(r >= 0);
+
+ assert_se((uint8_t*) p + 1 == (uint8_t*) q);
+
+ r = mmap_cache_get(m, fx, PROT_READ, 1, false, 3, 2, NULL, &q, NULL);
+ assert_se(r >= 0);
+
+ assert_se((uint8_t*) p + 2 == (uint8_t*) q);
+
+ r = mmap_cache_get(m, fx, PROT_READ, 0, false, 16ULL*1024ULL*1024ULL, 2, NULL, &p, NULL);
+ assert_se(r >= 0);
+
+ r = mmap_cache_get(m, fx, PROT_READ, 1, false, 16ULL*1024ULL*1024ULL+1, 2, NULL, &q, NULL);
+ assert_se(r >= 0);
+
+ assert_se((uint8_t*) p + 1 == (uint8_t*) q);
+
+ mmap_cache_free_fd(m, fx);
+ mmap_cache_unref(m);
+
+ safe_close(x);
+ safe_close(y);
+ safe_close(z);
+
+ return 0;
+}
diff --git a/src/kernel-install/50-depmod.install b/src/kernel-install/50-depmod.install
new file mode 100644
index 0000000..88f550a
--- /dev/null
+++ b/src/kernel-install/50-depmod.install
@@ -0,0 +1,17 @@
+#!/bin/bash
+# -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*-
+# ex: ts=8 sw=4 sts=4 et filetype=sh
+
+[[ $2 ]] || exit 1
+
+case "$1" in
+ add)
+ [[ -d /lib/modules/"$2"/kernel ]] || exit 0
+ exec depmod -a "$2"
+ ;;
+ remove)
+ exec rm -f /lib/modules/"$2"/modules.{alias{,.bin},builtin.bin,dep{,.bin},devname,softdep,symbols{,.bin}}
+ ;;
+ *)
+ exit 0
+esac
diff --git a/src/kernel-install/90-loaderentry.install b/src/kernel-install/90-loaderentry.install
new file mode 100644
index 0000000..75dd5a1
--- /dev/null
+++ b/src/kernel-install/90-loaderentry.install
@@ -0,0 +1,124 @@
+#!/bin/bash
+# -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*-
+# ex: ts=8 sw=4 sts=4 et filetype=sh
+
+COMMAND="$1"
+KERNEL_VERSION="$2"
+BOOT_DIR_ABS="$3"
+KERNEL_IMAGE="$4"
+INITRD_OPTIONS_START="5"
+
+if ! [[ $KERNEL_INSTALL_MACHINE_ID ]]; then
+ exit 0
+fi
+
+if ! [[ -d "$BOOT_DIR_ABS" ]]; then
+ exit 0
+fi
+
+MACHINE_ID=$KERNEL_INSTALL_MACHINE_ID
+
+BOOT_DIR="/$MACHINE_ID/$KERNEL_VERSION"
+BOOT_ROOT=${BOOT_DIR_ABS%$BOOT_DIR}
+
+if [[ $COMMAND == remove ]]; then
+ rm -f "$BOOT_ROOT/loader/entries/$MACHINE_ID-$KERNEL_VERSION.conf"
+ rm -f "$BOOT_ROOT/loader/entries/$MACHINE_ID-$KERNEL_VERSION+"*".conf"
+ exit 0
+fi
+
+if ! [[ $COMMAND == add ]]; then
+ exit 1
+fi
+
+if ! [[ $KERNEL_IMAGE ]]; then
+ exit 1
+fi
+
+if [[ -f /etc/os-release ]]; then
+ . /etc/os-release
+elif [[ -f /usr/lib/os-release ]]; then
+ . /usr/lib/os-release
+fi
+
+if ! [[ $PRETTY_NAME ]]; then
+ PRETTY_NAME="Linux $KERNEL_VERSION"
+fi
+
+declare -a BOOT_OPTIONS
+
+if [[ -f /etc/kernel/cmdline ]]; then
+ read -r -d '' -a BOOT_OPTIONS < /etc/kernel/cmdline
+fi
+
+if ! [[ ${BOOT_OPTIONS[*]} ]]; then
+ read -r -d '' -a line < /proc/cmdline
+ for i in "${line[@]}"; do
+ [[ "${i#initrd=*}" != "$i" ]] && continue
+ BOOT_OPTIONS+=("$i")
+ done
+fi
+
+if ! [[ ${BOOT_OPTIONS[*]} ]]; then
+ echo "Could not determine the kernel command line parameters." >&2
+ echo "Please specify the kernel command line in /etc/kernel/cmdline!" >&2
+ exit 1
+fi
+
+if [[ -f /etc/kernel/tries ]]; then
+ read -r TRIES </etc/kernel/tries
+ if ! [[ "$TRIES" =~ ^[0-9]+$ ]] ; then
+ echo "/etc/kernel/tries does not contain an integer." >&2
+ exit 1
+ fi
+ LOADER_ENTRY="$BOOT_ROOT/loader/entries/$MACHINE_ID-$KERNEL_VERSION+$TRIES.conf"
+else
+ LOADER_ENTRY="$BOOT_ROOT/loader/entries/$MACHINE_ID-$KERNEL_VERSION.conf"
+fi
+
+cp "$KERNEL_IMAGE" "$BOOT_DIR_ABS/linux" &&
+ chown root:root "$BOOT_DIR_ABS/linux" &&
+ chmod 0644 "$BOOT_DIR_ABS/linux" || {
+ echo "Could not copy '$KERNEL_IMAGE to '$BOOT_DIR_ABS/linux'." >&2
+ exit 1
+}
+
+INITRD_OPTIONS=( "${@:${INITRD_OPTIONS_START}}" )
+
+for initrd in "${INITRD_OPTIONS[@]}"; do
+ if [[ -f "${initrd}" ]]; then
+ initrd_basename="$(basename ${initrd})"
+ cp "${initrd}" "$BOOT_DIR_ABS/${initrd_basename}" &&
+ chown root:root "$BOOT_DIR_ABS/${initrd_basename}" &&
+ chmod 0644 "$BOOT_DIR_ABS/${initrd_basename}" || {
+ echo "Could not copy '${initrd}' to '$BOOT_DIR_ABS/${initrd_basename}'." >&2
+ exit 1
+ }
+ fi
+done
+
+# If no initrd option is supplied, fallback to "initrd" which is
+# the name used by dracut when generating it in its kernel-install hook
+[[ ${#INITRD_OPTIONS[@]} == 0 ]] && INITRD_OPTIONS=( initrd )
+
+mkdir -p "${LOADER_ENTRY%/*}" || {
+ echo "Could not create loader entry directory '${LOADER_ENTRY%/*}'." >&2
+ exit 1
+}
+
+{
+ echo "title $PRETTY_NAME"
+ echo "version $KERNEL_VERSION"
+ echo "machine-id $MACHINE_ID"
+ echo "options ${BOOT_OPTIONS[*]}"
+ echo "linux $BOOT_DIR/linux"
+ for initrd in "${INITRD_OPTIONS[@]}"; do
+ [[ -f $BOOT_DIR_ABS/$(basename ${initrd}) ]] && \
+ echo "initrd $BOOT_DIR/$(basename ${initrd})"
+ done
+ :
+} > "$LOADER_ENTRY" || {
+ echo "Could not create loader entry '$LOADER_ENTRY'." >&2
+ exit 1
+}
+exit 0
diff --git a/src/kernel-install/kernel-install b/src/kernel-install/kernel-install
new file mode 100644
index 0000000..b85c7c5
--- /dev/null
+++ b/src/kernel-install/kernel-install
@@ -0,0 +1,171 @@
+#!/bin/bash
+# -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*-
+# ex: ts=8 sw=4 sts=4 et filetype=sh
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# systemd is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with systemd; If not, see <http://www.gnu.org/licenses/>.
+
+SKIP_REMAINING=77
+
+usage()
+{
+ echo "Usage:"
+ echo " $0 add KERNEL-VERSION KERNEL-IMAGE [INITRD-FILE ...]"
+ echo " $0 remove KERNEL-VERSION"
+}
+
+dropindirs_sort()
+{
+ local suffix=$1; shift
+ local -a files
+ local f d i
+
+ readarray -t files <<<"$(
+ for d in "$@"; do
+ for i in "$d/"*"$suffix"; do
+ if [[ -e "$i" ]]; then
+ echo "${i##*/}"
+ fi
+ done
+ done | sort -Vu
+ )"
+
+ for f in "${files[@]}"; do
+ for d in "$@"; do
+ if [[ -e "$d/$f" ]]; then
+ echo "$d/$f"
+ continue 2
+ fi
+ done
+ done
+}
+
+export LC_COLLATE=C
+
+for i in "$@"; do
+ if [ "$i" == "--help" -o "$i" == "-h" ]; then
+ usage
+ exit 0
+ fi
+done
+
+if [[ "${0##*/}" == 'installkernel' ]]; then
+ COMMAND='add'
+ # make install doesn't pass any parameter wrt initrd handling
+ INITRD_OPTIONS=()
+else
+ COMMAND="$1"
+ shift
+ INITRD_OPTIONS=( "${@:3}" )
+fi
+
+KERNEL_VERSION="$1"
+KERNEL_IMAGE="$2"
+
+if [[ -f /etc/machine-id ]]; then
+ read MACHINE_ID < /etc/machine-id
+fi
+
+if [[ ! $COMMAND ]] || [[ ! $KERNEL_VERSION ]]; then
+ echo "Not enough arguments" >&2
+ exit 1
+fi
+
+if ! [[ $MACHINE_ID ]]; then
+ BOOT_DIR_ABS=$(mktemp -d /tmp/kernel-install.XXXXX) || exit 1
+ trap "rm -rf '$BOOT_DIR_ABS'" EXIT INT QUIT PIPE
+elif [[ -d /efi/loader/entries ]] || [[ -d /efi/$MACHINE_ID ]]; then
+ BOOT_DIR_ABS="/efi/$MACHINE_ID/$KERNEL_VERSION"
+elif [[ -d /boot/loader/entries ]] || [[ -d /boot/$MACHINE_ID ]]; then
+ BOOT_DIR_ABS="/boot/$MACHINE_ID/$KERNEL_VERSION"
+elif [[ -d /boot/efi/loader/entries ]] || [[ -d /boot/efi/$MACHINE_ID ]]; then
+ BOOT_DIR_ABS="/boot/efi/$MACHINE_ID/$KERNEL_VERSION"
+elif mountpoint -q /efi; then
+ BOOT_DIR_ABS="/efi/$MACHINE_ID/$KERNEL_VERSION"
+elif mountpoint -q /boot/efi; then
+ BOOT_DIR_ABS="/boot/efi/$MACHINE_ID/$KERNEL_VERSION"
+else
+ BOOT_DIR_ABS="/boot/$MACHINE_ID/$KERNEL_VERSION"
+fi
+
+export KERNEL_INSTALL_MACHINE_ID=$MACHINE_ID
+
+ret=0
+
+readarray -t PLUGINS <<<"$(
+ dropindirs_sort ".install" \
+ "/etc/kernel/install.d" \
+ "/usr/lib/kernel/install.d"
+)"
+
+case $COMMAND in
+ add)
+ if [[ ! "$KERNEL_IMAGE" ]]; then
+ echo "Command 'add' requires an argument" >&2
+ exit 1
+ fi
+
+ mkdir -p "$BOOT_DIR_ABS" || {
+ echo "Could not create boot directory '$BOOT_DIR_ABS'." >&2
+ exit 1
+ }
+
+ for f in "${PLUGINS[@]}"; do
+ if [[ -x $f ]]; then
+ "$f" add "$KERNEL_VERSION" "$BOOT_DIR_ABS" "$KERNEL_IMAGE" "${INITRD_OPTIONS[@]}"
+ x=$?
+ if [[ $x == $SKIP_REMAINING ]]; then
+ ret=0
+ break
+ fi
+ ((ret+=$x))
+ fi
+ done
+
+ if ! [[ $MACHINE_ID ]] && ! rmdir "$BOOT_DIR_ABS"; then
+ echo "Warning: In kernel-install plugins, requiring BOOT_DIR_ABS to be preset is deprecated." >&2
+ echo " All plugins should not put anything in BOOT_DIR_ABS if the environment" >&2
+ echo " variable KERNEL_INSTALL_MACHINE_ID is empty." >&2
+ rm -rf "$BOOT_DIR_ABS"
+ ((ret+=$?))
+ fi
+ ;;
+
+ remove)
+ for f in "${PLUGINS[@]}"; do
+ if [[ -x $f ]]; then
+ "$f" remove "$KERNEL_VERSION" "$BOOT_DIR_ABS"
+ x=$?
+ if [[ $x == $SKIP_REMAINING ]]; then
+ ret=0
+ break
+ fi
+ ((ret+=$x))
+ fi
+ done
+
+ rm -rf "$BOOT_DIR_ABS"
+ ((ret+=$?))
+ ;;
+
+ *)
+ echo "Unknown command '$COMMAND'" >&2
+ exit 1
+ ;;
+esac
+
+exit $ret
diff --git a/src/kernel-install/meson.build b/src/kernel-install/meson.build
new file mode 100644
index 0000000..c6e6f81
--- /dev/null
+++ b/src/kernel-install/meson.build
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+install_data('kernel-install',
+ install_mode : 'rwxr-xr-x',
+ install_dir : bindir)
+
+install_data('50-depmod.install',
+ '90-loaderentry.install',
+ install_mode : 'rwxr-xr-x',
+ install_dir : kernelinstalldir)
+
+meson.add_install_script('sh', '-c',
+ mkdir_p.format(join_paths(sysconfdir, 'kernel/install.d')))
diff --git a/src/libsystemd-network/arp-util.c b/src/libsystemd-network/arp-util.c
new file mode 100644
index 0000000..3a86f3f
--- /dev/null
+++ b/src/libsystemd-network/arp-util.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+***/
+
+#include <linux/filter.h>
+#include <arpa/inet.h>
+
+#include "arp-util.h"
+#include "fd-util.h"
+#include "unaligned.h"
+#include "util.h"
+
+int arp_network_bind_raw_socket(int ifindex, be32_t address, const struct ether_addr *eth_mac) {
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_W + BPF_LEN, 0), /* A <- packet length */
+ BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, sizeof(struct ether_arp), 1, 0), /* packet >= arp packet ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_hrd)), /* A <- header */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPHRD_ETHER, 1, 0), /* header == ethernet ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_pro)), /* A <- protocol */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 1, 0), /* protocol == IP ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_hln)), /* A <- hardware address length */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, sizeof(struct ether_addr), 1, 0), /* length == sizeof(ether_addr)? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_pln)), /* A <- protocol address length */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, sizeof(struct in_addr), 1, 0), /* length == sizeof(in_addr) ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_op)), /* A <- operation */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REQUEST, 2, 0), /* protocol == request ? */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REPLY, 1, 0), /* protocol == reply ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ /* Sender Hardware Address must be different from our own */
+ BPF_STMT(BPF_LD + BPF_IMM, unaligned_read_be32(&eth_mac->ether_addr_octet[0])),/* A <- 4 bytes of client's MAC */
+ BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_sha)), /* A <- 4 bytes of SHA */
+ BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 6), /* A == 0 ? */
+ BPF_STMT(BPF_LD + BPF_IMM, unaligned_read_be16(&eth_mac->ether_addr_octet[4])),/* A <- remainder of client's MAC */
+ BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, arp_sha) + 4), /* A <- remainder of SHA */
+ BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ /* Sender Protocol Address or Target Protocol Address must be equal to the one we care about */
+ BPF_STMT(BPF_LD + BPF_IMM, htobe32(address)), /* A <- clients IP */
+ BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_spa)), /* A <- SPA */
+ BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* X xor A */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 65535), /* return all */
+ BPF_STMT(BPF_LD + BPF_IMM, htobe32(address)), /* A <- clients IP */
+ BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_tpa)), /* A <- TPA */
+ BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* X xor A */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 65535), /* return all */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ };
+ struct sock_fprog fprog = {
+ .len = ELEMENTSOF(filter),
+ .filter = (struct sock_filter*) filter
+ };
+ union sockaddr_union link = {
+ .ll.sll_family = AF_PACKET,
+ .ll.sll_protocol = htobe16(ETH_P_ARP),
+ .ll.sll_ifindex = ifindex,
+ .ll.sll_halen = ETH_ALEN,
+ .ll.sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(ifindex > 0);
+
+ s = socket(PF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return -errno;
+
+ r = setsockopt(s, SOL_SOCKET, SO_ATTACH_FILTER, &fprog, sizeof(fprog));
+ if (r < 0)
+ return -errno;
+
+ r = bind(s, &link.sa, sizeof(link.ll));
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(s);
+}
+
+static int arp_send_packet(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha,
+ bool announce) {
+ union sockaddr_union link = {
+ .ll.sll_family = AF_PACKET,
+ .ll.sll_protocol = htobe16(ETH_P_ARP),
+ .ll.sll_ifindex = ifindex,
+ .ll.sll_halen = ETH_ALEN,
+ .ll.sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ };
+ struct ether_arp arp = {
+ .ea_hdr.ar_hrd = htobe16(ARPHRD_ETHER), /* HTYPE */
+ .ea_hdr.ar_pro = htobe16(ETHERTYPE_IP), /* PTYPE */
+ .ea_hdr.ar_hln = ETH_ALEN, /* HLEN */
+ .ea_hdr.ar_pln = sizeof(be32_t), /* PLEN */
+ .ea_hdr.ar_op = htobe16(ARPOP_REQUEST), /* REQUEST */
+ };
+ int r;
+
+ assert(fd >= 0);
+ assert(pa != 0);
+ assert(ha);
+
+ memcpy(&arp.arp_sha, ha, ETH_ALEN);
+ memcpy(&arp.arp_tpa, &pa, sizeof(pa));
+
+ if (announce)
+ memcpy(&arp.arp_spa, &pa, sizeof(pa));
+
+ r = sendto(fd, &arp, sizeof(struct ether_arp), 0, &link.sa, sizeof(link.ll));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int arp_send_probe(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha) {
+ return arp_send_packet(fd, ifindex, pa, ha, false);
+}
+
+int arp_send_announcement(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha) {
+ return arp_send_packet(fd, ifindex, pa, ha, true);
+}
diff --git a/src/libsystemd-network/arp-util.h b/src/libsystemd-network/arp-util.h
new file mode 100644
index 0000000..10c6848
--- /dev/null
+++ b/src/libsystemd-network/arp-util.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+***/
+
+#include <netinet/if_ether.h>
+
+#include "socket-util.h"
+#include "sparse-endian.h"
+
+int arp_network_bind_raw_socket(int index, be32_t address, const struct ether_addr *eth_mac);
+
+int arp_send_probe(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha);
+int arp_send_announcement(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha);
diff --git a/src/libsystemd-network/dhcp-identifier.c b/src/libsystemd-network/dhcp-identifier.c
new file mode 100644
index 0000000..04bf64c
--- /dev/null
+++ b/src/libsystemd-network/dhcp-identifier.c
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <linux/if_infiniband.h>
+#include <net/if_arp.h>
+
+#include "sd-device.h"
+#include "sd-id128.h"
+
+#include "dhcp-identifier.h"
+#include "dhcp6-protocol.h"
+#include "network-internal.h"
+#include "siphash24.h"
+#include "sparse-endian.h"
+#include "stdio-util.h"
+#include "virt.h"
+
+#define SYSTEMD_PEN 43793
+#define HASH_KEY SD_ID128_MAKE(80,11,8c,c2,fe,4a,03,ee,3e,d6,0c,6f,36,39,14,09)
+#define APPLICATION_ID SD_ID128_MAKE(a5,0a,d1,12,bf,60,45,77,a2,fb,74,1a,b1,95,5b,03)
+#define USEC_2000 ((usec_t) 946684800000000) /* 2000-01-01 00:00:00 UTC */
+
+int dhcp_validate_duid_len(uint16_t duid_type, size_t duid_len, bool strict) {
+ struct duid d;
+
+ assert_cc(sizeof(d.raw) >= MAX_DUID_LEN);
+ if (duid_len > MAX_DUID_LEN)
+ return -EINVAL;
+
+ if (!strict) {
+ /* Strict validation is not requested. We only ensure that the
+ * DUID is not too long. */
+ return 0;
+ }
+
+ switch (duid_type) {
+ case DUID_TYPE_LLT:
+ if (duid_len <= sizeof(d.llt))
+ return -EINVAL;
+ break;
+ case DUID_TYPE_EN:
+ if (duid_len != sizeof(d.en))
+ return -EINVAL;
+ break;
+ case DUID_TYPE_LL:
+ if (duid_len <= sizeof(d.ll))
+ return -EINVAL;
+ break;
+ case DUID_TYPE_UUID:
+ if (duid_len != sizeof(d.uuid))
+ return -EINVAL;
+ break;
+ default:
+ /* accept unknown type in order to be forward compatible */
+ break;
+ }
+ return 0;
+}
+
+int dhcp_identifier_set_duid_llt(struct duid *duid, usec_t t, const uint8_t *addr, size_t addr_len, uint16_t arp_type, size_t *len) {
+ uint16_t time_from_2000y;
+
+ assert(duid);
+ assert(len);
+ assert(addr);
+
+ if (arp_type == ARPHRD_ETHER)
+ assert_return(addr_len == ETH_ALEN, -EINVAL);
+ else if (arp_type == ARPHRD_INFINIBAND)
+ assert_return(addr_len == INFINIBAND_ALEN, -EINVAL);
+ else
+ return -EINVAL;
+
+ if (t < USEC_2000)
+ time_from_2000y = 0;
+ else
+ time_from_2000y = (uint16_t) (((t - USEC_2000) / USEC_PER_SEC) & 0xffffffff);
+
+ unaligned_write_be16(&duid->type, DUID_TYPE_LLT);
+ unaligned_write_be16(&duid->llt.htype, arp_type);
+ unaligned_write_be32(&duid->llt.time, time_from_2000y);
+ memcpy(duid->llt.haddr, addr, addr_len);
+
+ *len = sizeof(duid->type) + sizeof(duid->llt.htype) + sizeof(duid->llt.time) + addr_len;
+
+ return 0;
+}
+
+int dhcp_identifier_set_duid_ll(struct duid *duid, const uint8_t *addr, size_t addr_len, uint16_t arp_type, size_t *len) {
+ assert(duid);
+ assert(len);
+ assert(addr);
+
+ if (arp_type == ARPHRD_ETHER)
+ assert_return(addr_len == ETH_ALEN, -EINVAL);
+ else if (arp_type == ARPHRD_INFINIBAND)
+ assert_return(addr_len == INFINIBAND_ALEN, -EINVAL);
+ else
+ return -EINVAL;
+
+ unaligned_write_be16(&duid->type, DUID_TYPE_LL);
+ unaligned_write_be16(&duid->ll.htype, arp_type);
+ memcpy(duid->ll.haddr, addr, addr_len);
+
+ *len = sizeof(duid->type) + sizeof(duid->ll.htype) + addr_len;
+
+ return 0;
+}
+
+int dhcp_identifier_set_duid_en(struct duid *duid, size_t *len) {
+ sd_id128_t machine_id;
+ uint64_t hash;
+ int r;
+
+ assert(duid);
+ assert(len);
+
+ r = sd_id128_get_machine(&machine_id);
+ if (r < 0) {
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ machine_id = SD_ID128_MAKE(01, 02, 03, 04, 05, 06, 07, 08, 09, 0a, 0b, 0c, 0d, 0e, 0f, 10);
+#else
+ return r;
+#endif
+ }
+
+ unaligned_write_be16(&duid->type, DUID_TYPE_EN);
+ unaligned_write_be32(&duid->en.pen, SYSTEMD_PEN);
+
+ *len = sizeof(duid->type) + sizeof(duid->en);
+
+ /* a bit of snake-oil perhaps, but no need to expose the machine-id
+ * directly; duid->en.id might not be aligned, so we need to copy */
+ hash = htole64(siphash24(&machine_id, sizeof(machine_id), HASH_KEY.bytes));
+ memcpy(duid->en.id, &hash, sizeof(duid->en.id));
+
+ return 0;
+}
+
+int dhcp_identifier_set_duid_uuid(struct duid *duid, size_t *len) {
+ sd_id128_t machine_id;
+ int r;
+
+ assert(duid);
+ assert(len);
+
+ r = sd_id128_get_machine_app_specific(APPLICATION_ID, &machine_id);
+ if (r < 0)
+ return r;
+
+ unaligned_write_be16(&duid->type, DUID_TYPE_UUID);
+ memcpy(&duid->raw.data, &machine_id, sizeof(machine_id));
+
+ *len = sizeof(duid->type) + sizeof(machine_id);
+
+ return 0;
+}
+
+int dhcp_identifier_set_iaid(
+ int ifindex,
+ const uint8_t *mac,
+ size_t mac_len,
+ bool legacy_unstable_byteorder,
+ void *_id) {
+ /* name is a pointer to memory in the sd_device struct, so must
+ * have the same scope */
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ const char *name = NULL;
+ uint64_t id;
+ uint32_t id32;
+
+ if (detect_container() <= 0) {
+ /* not in a container, udev will be around */
+ char ifindex_str[1 + DECIMAL_STR_MAX(int)];
+ int r;
+
+ xsprintf(ifindex_str, "n%d", ifindex);
+ if (sd_device_new_from_device_id(&device, ifindex_str) >= 0) {
+ r = sd_device_get_is_initialized(device);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ /* not yet ready */
+ return -EBUSY;
+
+ name = net_get_name(device);
+ }
+ }
+
+ if (name)
+ id = siphash24(name, strlen(name), HASH_KEY.bytes);
+ else
+ /* fall back to MAC address if no predictable name available */
+ id = siphash24(mac, mac_len, HASH_KEY.bytes);
+
+ id32 = (id & 0xffffffff) ^ (id >> 32);
+
+ if (legacy_unstable_byteorder)
+ /* for historical reasons (a bug), the bits were swapped and thus
+ * the result was endianness dependent. Preserve that behavior. */
+ id32 = __bswap_32(id32);
+ else
+ /* the fixed behavior returns a stable byte order. Since LE is expected
+ * to be more common, swap the bytes on LE to give the same as legacy
+ * behavior. */
+ id32 = be32toh(id32);
+
+ unaligned_write_ne32(_id, id32);
+ return 0;
+}
diff --git a/src/libsystemd-network/dhcp-identifier.h b/src/libsystemd-network/dhcp-identifier.h
new file mode 100644
index 0000000..b311512
--- /dev/null
+++ b/src/libsystemd-network/dhcp-identifier.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-id128.h"
+
+#include "macro.h"
+#include "sparse-endian.h"
+#include "time-util.h"
+#include "unaligned.h"
+
+typedef enum DUIDType {
+ DUID_TYPE_LLT = 1,
+ DUID_TYPE_EN = 2,
+ DUID_TYPE_LL = 3,
+ DUID_TYPE_UUID = 4,
+ _DUID_TYPE_MAX,
+ _DUID_TYPE_INVALID = -1,
+} DUIDType;
+
+/* RFC 3315 section 9.1:
+ * A DUID can be no more than 128 octets long (not including the type code).
+ */
+#define MAX_DUID_LEN 128
+
+/* https://tools.ietf.org/html/rfc3315#section-9.1 */
+struct duid {
+ be16_t type;
+ union {
+ struct {
+ /* DUID_TYPE_LLT */
+ be16_t htype;
+ be32_t time;
+ uint8_t haddr[0];
+ } _packed_ llt;
+ struct {
+ /* DUID_TYPE_EN */
+ be32_t pen;
+ uint8_t id[8];
+ } _packed_ en;
+ struct {
+ /* DUID_TYPE_LL */
+ be16_t htype;
+ uint8_t haddr[0];
+ } _packed_ ll;
+ struct {
+ /* DUID_TYPE_UUID */
+ sd_id128_t uuid;
+ } _packed_ uuid;
+ struct {
+ uint8_t data[MAX_DUID_LEN];
+ } _packed_ raw;
+ };
+} _packed_;
+
+int dhcp_validate_duid_len(uint16_t duid_type, size_t duid_len, bool strict);
+int dhcp_identifier_set_duid_llt(struct duid *duid, usec_t t, const uint8_t *addr, size_t addr_len, uint16_t arp_type, size_t *len);
+int dhcp_identifier_set_duid_ll(struct duid *duid, const uint8_t *addr, size_t addr_len, uint16_t arp_type, size_t *len);
+int dhcp_identifier_set_duid_en(struct duid *duid, size_t *len);
+int dhcp_identifier_set_duid_uuid(struct duid *duid, size_t *len);
+int dhcp_identifier_set_iaid(int ifindex, const uint8_t *mac, size_t mac_len, bool legacy_unstable_byteorder, void *_id);
diff --git a/src/libsystemd-network/dhcp-internal.h b/src/libsystemd-network/dhcp-internal.h
new file mode 100644
index 0000000..a0f9c22
--- /dev/null
+++ b/src/libsystemd-network/dhcp-internal.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <linux/if_packet.h>
+#include <net/ethernet.h>
+#include <net/if_arp.h>
+#include <stdint.h>
+
+#include "sd-dhcp-client.h"
+
+#include "dhcp-protocol.h"
+#include "socket-util.h"
+
+int dhcp_network_bind_raw_socket(int ifindex, union sockaddr_union *link,
+ uint32_t xid, const uint8_t *mac_addr,
+ size_t mac_addr_len, uint16_t arp_type,
+ uint16_t port);
+int dhcp_network_bind_udp_socket(int ifindex, be32_t address, uint16_t port);
+int dhcp_network_send_raw_socket(int s, const union sockaddr_union *link,
+ const void *packet, size_t len);
+int dhcp_network_send_udp_socket(int s, be32_t address, uint16_t port,
+ const void *packet, size_t len);
+
+int dhcp_option_append(DHCPMessage *message, size_t size, size_t *offset, uint8_t overload,
+ uint8_t code, size_t optlen, const void *optval);
+
+typedef int (*dhcp_option_callback_t)(uint8_t code, uint8_t len,
+ const void *option, void *userdata);
+
+int dhcp_option_parse(DHCPMessage *message, size_t len, dhcp_option_callback_t cb, void *userdata, char **error_message);
+
+int dhcp_message_init(DHCPMessage *message, uint8_t op, uint32_t xid,
+ uint8_t type, uint16_t arp_type, size_t optlen,
+ size_t *optoffset);
+
+uint16_t dhcp_packet_checksum(uint8_t *buf, size_t len);
+
+void dhcp_packet_append_ip_headers(DHCPPacket *packet, be32_t source_addr,
+ uint16_t source, be32_t destination_addr,
+ uint16_t destination, uint16_t len);
+
+int dhcp_packet_verify_headers(DHCPPacket *packet, size_t len, bool checksum, uint16_t port);
+
+/* If we are invoking callbacks of a dhcp-client, ensure unreffing the
+ * client from the callback doesn't destroy the object we are working
+ * on */
+#define DHCP_CLIENT_DONT_DESTROY(client) \
+ _cleanup_(sd_dhcp_client_unrefp) _unused_ sd_dhcp_client *_dont_destroy_##client = sd_dhcp_client_ref(client)
+
+#define log_dhcp_client_errno(client, error, fmt, ...) log_internal(LOG_DEBUG, error, __FILE__, __LINE__, __func__, "DHCP CLIENT (0x%x): " fmt, client->xid, ##__VA_ARGS__)
+#define log_dhcp_client(client, fmt, ...) log_dhcp_client_errno(client, 0, fmt, ##__VA_ARGS__)
diff --git a/src/libsystemd-network/dhcp-lease-internal.h b/src/libsystemd-network/dhcp-lease-internal.h
new file mode 100644
index 0000000..9d245a9
--- /dev/null
+++ b/src/libsystemd-network/dhcp-lease-internal.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <stdint.h>
+#include <linux/if_packet.h>
+
+#include "sd-dhcp-client.h"
+
+#include "dhcp-protocol.h"
+#include "list.h"
+#include "util.h"
+
+struct sd_dhcp_route {
+ struct in_addr dst_addr;
+ struct in_addr gw_addr;
+ unsigned char dst_prefixlen;
+
+ uint8_t option;
+};
+
+struct sd_dhcp_raw_option {
+ LIST_FIELDS(struct sd_dhcp_raw_option, options);
+
+ uint8_t tag;
+ uint8_t length;
+ void *data;
+};
+
+struct sd_dhcp_lease {
+ unsigned n_ref;
+
+ /* each 0 if unset */
+ uint32_t t1;
+ uint32_t t2;
+ uint32_t lifetime;
+
+ /* each 0 if unset */
+ be32_t address;
+ be32_t server_address;
+ be32_t router;
+ be32_t next_server;
+
+ bool have_subnet_mask;
+ be32_t subnet_mask;
+
+ bool have_broadcast;
+ be32_t broadcast;
+
+ struct in_addr *dns;
+ size_t dns_size;
+
+ struct in_addr *ntp;
+ size_t ntp_size;
+
+ struct sd_dhcp_route *static_route;
+ size_t static_route_size, static_route_allocated;
+
+ uint16_t mtu; /* 0 if unset */
+
+ char *domainname;
+ char **search_domains;
+ char *hostname;
+ char *root_path;
+
+ void *client_id;
+ size_t client_id_len;
+
+ void *vendor_specific;
+ size_t vendor_specific_len;
+
+ char *timezone;
+
+ LIST_HEAD(struct sd_dhcp_raw_option, private_options);
+};
+
+int dhcp_lease_new(sd_dhcp_lease **ret);
+
+int dhcp_lease_parse_options(uint8_t code, uint8_t len, const void *option, void *userdata);
+int dhcp_lease_parse_search_domains(const uint8_t *option, size_t len, char ***domains);
+int dhcp_lease_insert_private_option(sd_dhcp_lease *lease, uint8_t tag, const void *data, uint8_t len);
+
+int dhcp_lease_set_default_subnet_mask(sd_dhcp_lease *lease);
+
+int dhcp_lease_set_client_id(sd_dhcp_lease *lease, const void *client_id, size_t client_id_len);
+
+int dhcp_lease_save(sd_dhcp_lease *lease, const char *lease_file);
+int dhcp_lease_load(sd_dhcp_lease **ret, const char *lease_file);
diff --git a/src/libsystemd-network/dhcp-network.c b/src/libsystemd-network/dhcp-network.c
new file mode 100644
index 0000000..0e5b414
--- /dev/null
+++ b/src/libsystemd-network/dhcp-network.c
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <linux/filter.h>
+#include <linux/if_infiniband.h>
+#include <linux/if_packet.h>
+
+#include "dhcp-internal.h"
+#include "fd-util.h"
+#include "socket-util.h"
+#include "unaligned.h"
+
+static int _bind_raw_socket(int ifindex, union sockaddr_union *link,
+ uint32_t xid, const uint8_t *mac_addr,
+ size_t mac_addr_len,
+ const uint8_t *bcast_addr,
+ const struct ether_addr *eth_mac,
+ uint16_t arp_type, uint8_t dhcp_hlen,
+ uint16_t port) {
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_W + BPF_LEN, 0), /* A <- packet length */
+ BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, sizeof(DHCPPacket), 1, 0), /* packet >= DHCPPacket ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(DHCPPacket, ip.protocol)), /* A <- IP protocol */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 1, 0), /* IP protocol == UDP ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(DHCPPacket, ip.frag_off)), /* A <- Flags */
+ BPF_STMT(BPF_ALU + BPF_AND + BPF_K, 0x20), /* A <- A & 0x20 (More Fragments bit) */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 1, 0), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(DHCPPacket, ip.frag_off)), /* A <- Flags + Fragment offset */
+ BPF_STMT(BPF_ALU + BPF_AND + BPF_K, 0x1fff), /* A <- A & 0x1fff (Fragment offset) */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 1, 0), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(DHCPPacket, udp.dest)), /* A <- UDP destination port */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, port, 1, 0), /* UDP destination port == DHCP client port ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(DHCPPacket, dhcp.op)), /* A <- DHCP op */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, BOOTREPLY, 1, 0), /* op == BOOTREPLY ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(DHCPPacket, dhcp.htype)), /* A <- DHCP header type */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, arp_type, 1, 0), /* header type == arp_type ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(DHCPPacket, dhcp.hlen)), /* A <- MAC address length */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, dhcp_hlen, 1, 0), /* address length == dhcp_hlen ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(DHCPPacket, dhcp.xid)), /* A <- client identifier */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, xid, 1, 0), /* client identifier == xid ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_IMM, unaligned_read_be32(&eth_mac->ether_addr_octet[0])), /* A <- 4 bytes of client's MAC */
+ BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(DHCPPacket, dhcp.chaddr)), /* A <- 4 bytes of MAC from dhcp.chaddr */
+ BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 1, 0), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_IMM, unaligned_read_be16(&eth_mac->ether_addr_octet[4])), /* A <- remainder of client's MAC */
+ BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(DHCPPacket, dhcp.chaddr) + 4), /* A <- remainder of MAC from dhcp.chaddr */
+ BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 1, 0), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(DHCPPacket, dhcp.magic)), /* A <- DHCP magic cookie */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DHCP_MAGIC_COOKIE, 1, 0), /* cookie == DHCP magic cookie ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_RET + BPF_K, 65535), /* return all */
+ };
+ struct sock_fprog fprog = {
+ .len = ELEMENTSOF(filter),
+ .filter = filter
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(ifindex > 0);
+ assert(link);
+
+ s = socket(AF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return -errno;
+
+ r = setsockopt_int(s, SOL_PACKET, PACKET_AUXDATA, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt(s, SOL_SOCKET, SO_ATTACH_FILTER, &fprog, sizeof(fprog));
+ if (r < 0)
+ return -errno;
+
+ link->ll = (struct sockaddr_ll) {
+ .sll_family = AF_PACKET,
+ .sll_protocol = htobe16(ETH_P_IP),
+ .sll_ifindex = ifindex,
+ .sll_hatype = htobe16(arp_type),
+ .sll_halen = mac_addr_len,
+ };
+ memcpy(link->ll.sll_addr, bcast_addr, mac_addr_len);
+
+ r = bind(s, &link->sa, SOCKADDR_LL_LEN(link->ll));
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(s);
+}
+
+int dhcp_network_bind_raw_socket(int ifindex, union sockaddr_union *link,
+ uint32_t xid, const uint8_t *mac_addr,
+ size_t mac_addr_len, uint16_t arp_type,
+ uint16_t port) {
+ static const uint8_t eth_bcast[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+ /* Default broadcast address for IPoIB */
+ static const uint8_t ib_bcast[] = {
+ 0x00, 0xff, 0xff, 0xff, 0xff, 0x12, 0x40, 0x1b,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff
+ };
+ struct ether_addr eth_mac = { { 0, 0, 0, 0, 0, 0 } };
+ const uint8_t *bcast_addr = NULL;
+ uint8_t dhcp_hlen = 0;
+
+ if (arp_type == ARPHRD_ETHER) {
+ assert_return(mac_addr_len == ETH_ALEN, -EINVAL);
+ memcpy(&eth_mac, mac_addr, ETH_ALEN);
+ bcast_addr = eth_bcast;
+ dhcp_hlen = ETH_ALEN;
+ } else if (arp_type == ARPHRD_INFINIBAND) {
+ assert_return(mac_addr_len == INFINIBAND_ALEN, -EINVAL);
+ bcast_addr = ib_bcast;
+ } else
+ return -EINVAL;
+
+ return _bind_raw_socket(ifindex, link, xid, mac_addr, mac_addr_len,
+ bcast_addr, &eth_mac, arp_type, dhcp_hlen, port);
+}
+
+int dhcp_network_bind_udp_socket(int ifindex, be32_t address, uint16_t port) {
+ union sockaddr_union src = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(port),
+ .in.sin_addr.s_addr = address,
+ };
+ _cleanup_close_ int s = -1;
+ char ifname[IF_NAMESIZE] = "";
+ int r;
+
+ s = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return -errno;
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_TOS, IPTOS_CLASS_CS6);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return r;
+
+ if (ifindex > 0) {
+ if (if_indextoname(ifindex, ifname) == 0)
+ return -errno;
+
+ r = setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE, ifname, strlen(ifname));
+ if (r < 0)
+ return -errno;
+ }
+
+ if (address == INADDR_ANY) {
+ r = setsockopt_int(s, IPPROTO_IP, IP_PKTINFO, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, SOL_SOCKET, SO_BROADCAST, true);
+ if (r < 0)
+ return r;
+
+ } else {
+ r = setsockopt_int(s, IPPROTO_IP, IP_FREEBIND, true);
+ if (r < 0)
+ return r;
+ }
+
+ r = bind(s, &src.sa, sizeof(src.in));
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(s);
+}
+
+int dhcp_network_send_raw_socket(int s, const union sockaddr_union *link,
+ const void *packet, size_t len) {
+ int r;
+
+ assert(link);
+ assert(packet);
+ assert(len);
+
+ r = sendto(s, packet, len, 0, &link->sa, SOCKADDR_LL_LEN(link->ll));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int dhcp_network_send_udp_socket(int s, be32_t address, uint16_t port,
+ const void *packet, size_t len) {
+ union sockaddr_union dest = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(port),
+ .in.sin_addr.s_addr = address,
+ };
+ int r;
+
+ assert(s >= 0);
+ assert(packet);
+ assert(len);
+
+ r = sendto(s, packet, len, 0, &dest.sa, sizeof(dest.in));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/libsystemd-network/dhcp-option.c b/src/libsystemd-network/dhcp-option.c
new file mode 100644
index 0000000..ad3f925
--- /dev/null
+++ b/src/libsystemd-network/dhcp-option.c
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "utf8.h"
+#include "strv.h"
+
+#include "dhcp-internal.h"
+
+static int option_append(uint8_t options[], size_t size, size_t *offset,
+ uint8_t code, size_t optlen, const void *optval) {
+ assert(options);
+ assert(offset);
+
+ if (code != SD_DHCP_OPTION_END)
+ /* always make sure there is space for an END option */
+ size--;
+
+ switch (code) {
+
+ case SD_DHCP_OPTION_PAD:
+ case SD_DHCP_OPTION_END:
+ if (size < *offset + 1)
+ return -ENOBUFS;
+
+ options[*offset] = code;
+ *offset += 1;
+ break;
+
+ case SD_DHCP_OPTION_USER_CLASS: {
+ size_t len = 0;
+ char **s;
+
+ STRV_FOREACH(s, (char **) optval)
+ len += strlen(*s) + 1;
+
+ if (size < *offset + len + 2)
+ return -ENOBUFS;
+
+ options[*offset] = code;
+ options[*offset + 1] = len;
+ *offset += 2;
+
+ STRV_FOREACH(s, (char **) optval) {
+ len = strlen(*s);
+
+ if (len > 255)
+ return -ENAMETOOLONG;
+
+ options[*offset] = len;
+
+ memcpy_safe(&options[*offset + 1], *s, len);
+ *offset += len + 1;
+ }
+
+ break;
+ }
+ default:
+ if (size < *offset + optlen + 2)
+ return -ENOBUFS;
+
+ options[*offset] = code;
+ options[*offset + 1] = optlen;
+
+ memcpy_safe(&options[*offset + 2], optval, optlen);
+ *offset += optlen + 2;
+
+ break;
+ }
+
+ return 0;
+}
+
+int dhcp_option_append(DHCPMessage *message, size_t size, size_t *offset,
+ uint8_t overload,
+ uint8_t code, size_t optlen, const void *optval) {
+ size_t file_offset = 0, sname_offset =0;
+ bool file, sname;
+ int r;
+
+ assert(message);
+ assert(offset);
+
+ file = overload & DHCP_OVERLOAD_FILE;
+ sname = overload & DHCP_OVERLOAD_SNAME;
+
+ if (*offset < size) {
+ /* still space in the options array */
+ r = option_append(message->options, size, offset, code, optlen, optval);
+ if (r >= 0)
+ return 0;
+ else if (r == -ENOBUFS && (file || sname)) {
+ /* did not fit, but we have more buffers to try
+ close the options array and move the offset to its end */
+ r = option_append(message->options, size, offset, SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ *offset = size;
+ } else
+ return r;
+ }
+
+ if (overload & DHCP_OVERLOAD_FILE) {
+ file_offset = *offset - size;
+
+ if (file_offset < sizeof(message->file)) {
+ /* still space in the 'file' array */
+ r = option_append(message->file, sizeof(message->file), &file_offset, code, optlen, optval);
+ if (r >= 0) {
+ *offset = size + file_offset;
+ return 0;
+ } else if (r == -ENOBUFS && sname) {
+ /* did not fit, but we have more buffers to try
+ close the file array and move the offset to its end */
+ r = option_append(message->options, size, offset, SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ *offset = size + sizeof(message->file);
+ } else
+ return r;
+ }
+ }
+
+ if (overload & DHCP_OVERLOAD_SNAME) {
+ sname_offset = *offset - size - (file ? sizeof(message->file) : 0);
+
+ if (sname_offset < sizeof(message->sname)) {
+ /* still space in the 'sname' array */
+ r = option_append(message->sname, sizeof(message->sname), &sname_offset, code, optlen, optval);
+ if (r >= 0) {
+ *offset = size + (file ? sizeof(message->file) : 0) + sname_offset;
+ return 0;
+ } else {
+ /* no space, or other error, give up */
+ return r;
+ }
+ }
+ }
+
+ return -ENOBUFS;
+}
+
+static int parse_options(const uint8_t options[], size_t buflen, uint8_t *overload,
+ uint8_t *message_type, char **error_message, dhcp_option_callback_t cb,
+ void *userdata) {
+ uint8_t code, len;
+ const uint8_t *option;
+ size_t offset = 0;
+
+ while (offset < buflen) {
+ code = options[offset ++];
+
+ switch (code) {
+ case SD_DHCP_OPTION_PAD:
+ continue;
+
+ case SD_DHCP_OPTION_END:
+ return 0;
+ }
+
+ if (buflen < offset + 1)
+ return -ENOBUFS;
+
+ len = options[offset ++];
+
+ if (buflen < offset + len)
+ return -EINVAL;
+
+ option = &options[offset];
+
+ switch (code) {
+ case SD_DHCP_OPTION_MESSAGE_TYPE:
+ if (len != 1)
+ return -EINVAL;
+
+ if (message_type)
+ *message_type = *option;
+
+ break;
+
+ case SD_DHCP_OPTION_ERROR_MESSAGE:
+ if (len == 0)
+ return -EINVAL;
+
+ if (error_message) {
+ _cleanup_free_ char *string = NULL;
+
+ /* Accept a trailing NUL byte */
+ if (memchr(option, 0, len - 1))
+ return -EINVAL;
+
+ string = strndup((const char *) option, len);
+ if (!string)
+ return -ENOMEM;
+
+ if (!ascii_is_valid(string))
+ return -EINVAL;
+
+ free_and_replace(*error_message, string);
+ }
+
+ break;
+ case SD_DHCP_OPTION_OVERLOAD:
+ if (len != 1)
+ return -EINVAL;
+
+ if (overload)
+ *overload = *option;
+
+ break;
+
+ default:
+ if (cb)
+ cb(code, len, option, userdata);
+
+ break;
+ }
+
+ offset += len;
+ }
+
+ if (offset < buflen)
+ return -EINVAL;
+
+ return 0;
+}
+
+int dhcp_option_parse(DHCPMessage *message, size_t len, dhcp_option_callback_t cb, void *userdata, char **_error_message) {
+ _cleanup_free_ char *error_message = NULL;
+ uint8_t overload = 0;
+ uint8_t message_type = 0;
+ int r;
+
+ if (!message)
+ return -EINVAL;
+
+ if (len < sizeof(DHCPMessage))
+ return -EINVAL;
+
+ len -= sizeof(DHCPMessage);
+
+ r = parse_options(message->options, len, &overload, &message_type, &error_message, cb, userdata);
+ if (r < 0)
+ return r;
+
+ if (overload & DHCP_OVERLOAD_FILE) {
+ r = parse_options(message->file, sizeof(message->file), NULL, &message_type, &error_message, cb, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ if (overload & DHCP_OVERLOAD_SNAME) {
+ r = parse_options(message->sname, sizeof(message->sname), NULL, &message_type, &error_message, cb, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ if (message_type == 0)
+ return -ENOMSG;
+
+ if (_error_message && IN_SET(message_type, DHCP_NAK, DHCP_DECLINE))
+ *_error_message = TAKE_PTR(error_message);
+
+ return message_type;
+}
diff --git a/src/libsystemd-network/dhcp-packet.c b/src/libsystemd-network/dhcp-packet.c
new file mode 100644
index 0000000..ad5f8e2
--- /dev/null
+++ b/src/libsystemd-network/dhcp-packet.c
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <net/ethernet.h>
+#include <net/if_arp.h>
+#include <string.h>
+
+#include "dhcp-internal.h"
+#include "dhcp-protocol.h"
+
+#define DHCP_CLIENT_MIN_OPTIONS_SIZE 312
+
+int dhcp_message_init(DHCPMessage *message, uint8_t op, uint32_t xid,
+ uint8_t type, uint16_t arp_type, size_t optlen,
+ size_t *optoffset) {
+ size_t offset = 0;
+ int r;
+
+ assert(IN_SET(op, BOOTREQUEST, BOOTREPLY));
+ assert(IN_SET(arp_type, ARPHRD_ETHER, ARPHRD_INFINIBAND));
+
+ message->op = op;
+ message->htype = arp_type;
+ message->hlen = (arp_type == ARPHRD_ETHER) ? ETHER_ADDR_LEN : 0;
+ message->xid = htobe32(xid);
+ message->magic = htobe32(DHCP_MAGIC_COOKIE);
+
+ r = dhcp_option_append(message, optlen, &offset, 0,
+ SD_DHCP_OPTION_MESSAGE_TYPE, 1, &type);
+ if (r < 0)
+ return r;
+
+ *optoffset = offset;
+
+ return 0;
+}
+
+uint16_t dhcp_packet_checksum(uint8_t *buf, size_t len) {
+ uint64_t *buf_64 = (uint64_t*)buf;
+ uint64_t *end_64 = buf_64 + (len / sizeof(uint64_t));
+ uint64_t sum = 0;
+
+ /* See RFC1071 */
+
+ while (buf_64 < end_64) {
+ sum += *buf_64;
+ if (sum < *buf_64)
+ /* wrap around in one's complement */
+ sum++;
+
+ buf_64++;
+ }
+
+ if (len % sizeof(uint64_t)) {
+ /* If the buffer is not aligned to 64-bit, we need
+ to zero-pad the last few bytes and add them in */
+ uint64_t buf_tail = 0;
+
+ memcpy(&buf_tail, buf_64, len % sizeof(uint64_t));
+
+ sum += buf_tail;
+ if (sum < buf_tail)
+ /* wrap around */
+ sum++;
+ }
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return ~sum;
+}
+
+void dhcp_packet_append_ip_headers(DHCPPacket *packet, be32_t source_addr,
+ uint16_t source_port, be32_t destination_addr,
+ uint16_t destination_port, uint16_t len) {
+ packet->ip.version = IPVERSION;
+ packet->ip.ihl = DHCP_IP_SIZE / 4;
+ packet->ip.tot_len = htobe16(len);
+
+ packet->ip.tos = IPTOS_CLASS_CS6;
+
+ packet->ip.protocol = IPPROTO_UDP;
+ packet->ip.saddr = source_addr;
+ packet->ip.daddr = destination_addr;
+
+ packet->udp.source = htobe16(source_port);
+ packet->udp.dest = htobe16(destination_port);
+
+ packet->udp.len = htobe16(len - DHCP_IP_SIZE);
+
+ packet->ip.check = packet->udp.len;
+ packet->udp.check = dhcp_packet_checksum((uint8_t*)&packet->ip.ttl, len - 8);
+
+ packet->ip.ttl = IPDEFTTL;
+ packet->ip.check = 0;
+ packet->ip.check = dhcp_packet_checksum((uint8_t*)&packet->ip, DHCP_IP_SIZE);
+}
+
+int dhcp_packet_verify_headers(DHCPPacket *packet, size_t len, bool checksum, uint16_t port) {
+ size_t hdrlen;
+
+ assert(packet);
+
+ /* IP */
+
+ if (packet->ip.version != IPVERSION)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: not IPv4");
+
+ if (packet->ip.ihl < 5)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: IPv4 IHL (%u words) invalid",
+ packet->ip.ihl);
+
+ hdrlen = packet->ip.ihl * 4;
+ if (hdrlen < 20)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: IPv4 IHL (%zu bytes) "
+ "smaller than minimum (20 bytes)",
+ hdrlen);
+
+ if (len < hdrlen)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: packet (%zu bytes) "
+ "smaller than expected (%zu) by IP header",
+ len, hdrlen);
+
+ /* UDP */
+
+ if (packet->ip.protocol != IPPROTO_UDP)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: not UDP");
+
+ if (len < hdrlen + be16toh(packet->udp.len))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: packet (%zu bytes) "
+ "smaller than expected (%zu) by UDP header",
+ len, hdrlen + be16toh(packet->udp.len));
+
+ if (be16toh(packet->udp.dest) != port)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: to port %u, which "
+ "is not the DHCP client port (%u)",
+ be16toh(packet->udp.dest), port);
+
+ /* checksums - computing these is relatively expensive, so only do it
+ if all the other checks have passed
+ */
+
+ if (dhcp_packet_checksum((uint8_t*)&packet->ip, hdrlen))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: invalid IP checksum");
+
+ if (checksum && packet->udp.check) {
+ packet->ip.check = packet->udp.len;
+ packet->ip.ttl = 0;
+
+ if (dhcp_packet_checksum((uint8_t*)&packet->ip.ttl,
+ be16toh(packet->udp.len) + 12))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: invalid UDP checksum");
+ }
+
+ return 0;
+}
diff --git a/src/libsystemd-network/dhcp-protocol.h b/src/libsystemd-network/dhcp-protocol.h
new file mode 100644
index 0000000..f036632
--- /dev/null
+++ b/src/libsystemd-network/dhcp-protocol.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <stdint.h>
+
+#include "macro.h"
+#include "sparse-endian.h"
+
+struct DHCPMessage {
+ uint8_t op;
+ uint8_t htype;
+ uint8_t hlen;
+ uint8_t hops;
+ be32_t xid;
+ be16_t secs;
+ be16_t flags;
+ be32_t ciaddr;
+ be32_t yiaddr;
+ be32_t siaddr;
+ be32_t giaddr;
+ uint8_t chaddr[16];
+ uint8_t sname[64];
+ uint8_t file[128];
+ be32_t magic;
+ uint8_t options[0];
+} _packed_;
+
+typedef struct DHCPMessage DHCPMessage;
+
+struct DHCPPacket {
+ struct iphdr ip;
+ struct udphdr udp;
+ DHCPMessage dhcp;
+} _packed_;
+
+typedef struct DHCPPacket DHCPPacket;
+
+#define DHCP_IP_SIZE (int32_t)(sizeof(struct iphdr))
+#define DHCP_IP_UDP_SIZE (int32_t)(sizeof(struct udphdr) + DHCP_IP_SIZE)
+#define DHCP_MESSAGE_SIZE (int32_t)(sizeof(DHCPMessage))
+#define DHCP_DEFAULT_MIN_SIZE 576 /* the minimum internet hosts must be able to receive */
+#define DHCP_MIN_OPTIONS_SIZE (DHCP_DEFAULT_MIN_SIZE - DHCP_IP_UDP_SIZE - DHCP_MESSAGE_SIZE)
+#define DHCP_MAGIC_COOKIE (uint32_t)(0x63825363)
+
+enum {
+ DHCP_PORT_SERVER = 67,
+ DHCP_PORT_CLIENT = 68,
+};
+
+enum DHCPState {
+ DHCP_STATE_INIT = 0,
+ DHCP_STATE_SELECTING = 1,
+ DHCP_STATE_INIT_REBOOT = 2,
+ DHCP_STATE_REBOOTING = 3,
+ DHCP_STATE_REQUESTING = 4,
+ DHCP_STATE_BOUND = 5,
+ DHCP_STATE_RENEWING = 6,
+ DHCP_STATE_REBINDING = 7,
+ DHCP_STATE_STOPPED = 8,
+};
+
+typedef enum DHCPState DHCPState;
+
+enum {
+ BOOTREQUEST = 1,
+ BOOTREPLY = 2,
+};
+
+enum {
+ DHCP_DISCOVER = 1,
+ DHCP_OFFER = 2,
+ DHCP_REQUEST = 3,
+ DHCP_DECLINE = 4,
+ DHCP_ACK = 5,
+ DHCP_NAK = 6,
+ DHCP_RELEASE = 7,
+ DHCP_INFORM = 8,
+ DHCP_FORCERENEW = 9,
+};
+
+enum {
+ DHCP_OVERLOAD_FILE = 1,
+ DHCP_OVERLOAD_SNAME = 2,
+};
+
+#define DHCP_MAX_FQDN_LENGTH 255
+
+enum {
+ DHCP_FQDN_FLAG_S = (1 << 0),
+ DHCP_FQDN_FLAG_O = (1 << 1),
+ DHCP_FQDN_FLAG_E = (1 << 2),
+ DHCP_FQDN_FLAG_N = (1 << 3),
+};
diff --git a/src/libsystemd-network/dhcp-server-internal.h b/src/libsystemd-network/dhcp-server-internal.h
new file mode 100644
index 0000000..8a7c5bc
--- /dev/null
+++ b/src/libsystemd-network/dhcp-server-internal.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include "sd-dhcp-server.h"
+#include "sd-event.h"
+
+#include "dhcp-internal.h"
+#include "hashmap.h"
+#include "log.h"
+#include "util.h"
+
+typedef struct DHCPClientId {
+ size_t length;
+ void *data;
+} DHCPClientId;
+
+typedef struct DHCPLease {
+ DHCPClientId client_id;
+
+ be32_t address;
+ be32_t gateway;
+ uint8_t chaddr[16];
+ usec_t expiration;
+} DHCPLease;
+
+struct sd_dhcp_server {
+ unsigned n_ref;
+
+ sd_event *event;
+ int event_priority;
+ sd_event_source *receive_message;
+ int fd;
+ int fd_raw;
+
+ int ifindex;
+ be32_t address;
+ be32_t netmask;
+ be32_t subnet;
+ uint32_t pool_offset;
+ uint32_t pool_size;
+
+ char *timezone;
+
+ struct in_addr *ntp, *dns;
+ unsigned n_ntp, n_dns;
+
+ bool emit_router;
+
+ Hashmap *leases_by_client_id;
+ DHCPLease **bound_leases;
+ DHCPLease invalid_lease;
+
+ uint32_t max_lease_time, default_lease_time;
+};
+
+typedef struct DHCPRequest {
+ /* received message */
+ DHCPMessage *message;
+
+ /* options */
+ DHCPClientId client_id;
+ size_t max_optlen;
+ be32_t server_id;
+ be32_t requested_ip;
+ uint32_t lifetime;
+} DHCPRequest;
+
+#define log_dhcp_server(client, fmt, ...) log_internal(LOG_DEBUG, 0, __FILE__, __LINE__, __func__, "DHCP SERVER: " fmt, ##__VA_ARGS__)
+#define log_dhcp_server_errno(client, error, fmt, ...) log_internal(LOG_DEBUG, error, __FILE__, __LINE__, __func__, "DHCP SERVER: " fmt, ##__VA_ARGS__)
+
+int dhcp_server_handle_message(sd_dhcp_server *server, DHCPMessage *message,
+ size_t length);
+int dhcp_server_send_packet(sd_dhcp_server *server,
+ DHCPRequest *req, DHCPPacket *packet,
+ int type, size_t optoffset);
+
+void client_id_hash_func(const DHCPClientId *p, struct siphash *state);
+int client_id_compare_func(const DHCPClientId *a, const DHCPClientId *b);
diff --git a/src/libsystemd-network/dhcp6-internal.h b/src/libsystemd-network/dhcp6-internal.h
new file mode 100644
index 0000000..157fc0a
--- /dev/null
+++ b/src/libsystemd-network/dhcp6-internal.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+***/
+
+#include <net/ethernet.h>
+#include <netinet/in.h>
+
+#include "sd-event.h"
+
+#include "list.h"
+#include "macro.h"
+#include "sparse-endian.h"
+
+/* Common option header */
+typedef struct DHCP6Option {
+ be16_t code;
+ be16_t len;
+ uint8_t data[];
+} _packed_ DHCP6Option;
+
+/* Address option */
+struct iaaddr {
+ struct in6_addr address;
+ be32_t lifetime_preferred;
+ be32_t lifetime_valid;
+} _packed_;
+
+/* Prefix Delegation Prefix option */
+struct iapdprefix {
+ be32_t lifetime_preferred;
+ be32_t lifetime_valid;
+ uint8_t prefixlen;
+ struct in6_addr address;
+} _packed_;
+
+typedef struct DHCP6Address DHCP6Address;
+
+struct DHCP6Address {
+ LIST_FIELDS(DHCP6Address, addresses);
+
+ union {
+ struct iaaddr iaaddr;
+ struct iapdprefix iapdprefix;
+ };
+};
+
+/* Non-temporary Address option */
+struct ia_na {
+ be32_t id;
+ be32_t lifetime_t1;
+ be32_t lifetime_t2;
+} _packed_;
+
+/* Prefix Delegation option */
+struct ia_pd {
+ be32_t id;
+ be32_t lifetime_t1;
+ be32_t lifetime_t2;
+} _packed_;
+
+/* Temporary Address option */
+struct ia_ta {
+ be32_t id;
+} _packed_;
+
+struct DHCP6IA {
+ uint16_t type;
+ union {
+ struct ia_na ia_na;
+ struct ia_pd ia_pd;
+ struct ia_ta ia_ta;
+ };
+
+ LIST_HEAD(DHCP6Address, addresses);
+};
+
+typedef struct DHCP6IA DHCP6IA;
+
+#define log_dhcp6_client_errno(p, error, fmt, ...) log_internal(LOG_DEBUG, error, __FILE__, __LINE__, __func__, "DHCPv6 CLIENT: " fmt, ##__VA_ARGS__)
+#define log_dhcp6_client(p, fmt, ...) log_dhcp6_client_errno(p, 0, fmt, ##__VA_ARGS__)
+
+int dhcp6_option_append(uint8_t **buf, size_t *buflen, uint16_t code,
+ size_t optlen, const void *optval);
+int dhcp6_option_append_ia(uint8_t **buf, size_t *buflen, const DHCP6IA *ia);
+int dhcp6_option_append_pd(uint8_t *buf, size_t len, const DHCP6IA *pd);
+int dhcp6_option_append_fqdn(uint8_t **buf, size_t *buflen, const char *fqdn);
+int dhcp6_option_parse(uint8_t **buf, size_t *buflen, uint16_t *optcode,
+ size_t *optlen, uint8_t **optvalue);
+int dhcp6_option_parse_status(DHCP6Option *option, size_t len);
+int dhcp6_option_parse_ia(DHCP6Option *iaoption, DHCP6IA *ia);
+int dhcp6_option_parse_ip6addrs(uint8_t *optval, uint16_t optlen,
+ struct in6_addr **addrs, size_t count,
+ size_t *allocated);
+int dhcp6_option_parse_domainname(const uint8_t *optval, uint16_t optlen,
+ char ***str_arr);
+
+int dhcp6_network_bind_udp_socket(int index, struct in6_addr *address);
+int dhcp6_network_send_udp_socket(int s, struct in6_addr *address,
+ const void *packet, size_t len);
+
+const char *dhcp6_message_type_to_string(int s) _const_;
+int dhcp6_message_type_from_string(const char *s) _pure_;
+const char *dhcp6_message_status_to_string(int s) _const_;
+int dhcp6_message_status_from_string(const char *s) _pure_;
diff --git a/src/libsystemd-network/dhcp6-lease-internal.h b/src/libsystemd-network/dhcp6-lease-internal.h
new file mode 100644
index 0000000..e004f48
--- /dev/null
+++ b/src/libsystemd-network/dhcp6-lease-internal.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+***/
+
+#include <stdint.h>
+
+#include "sd-dhcp6-lease.h"
+
+#include "dhcp6-internal.h"
+
+struct sd_dhcp6_lease {
+ unsigned n_ref;
+
+ uint8_t *serverid;
+ size_t serverid_len;
+ uint8_t preference;
+ bool rapid_commit;
+
+ DHCP6IA ia;
+ DHCP6IA pd;
+
+ DHCP6Address *addr_iter;
+ DHCP6Address *prefix_iter;
+
+ struct in6_addr *dns;
+ size_t dns_count;
+ size_t dns_allocated;
+ char **domains;
+ size_t domains_count;
+ struct in6_addr *ntp;
+ size_t ntp_count;
+ size_t ntp_allocated;
+ char **ntp_fqdn;
+ size_t ntp_fqdn_count;
+};
+
+int dhcp6_lease_ia_rebind_expire(const DHCP6IA *ia, uint32_t *expire);
+DHCP6IA *dhcp6_lease_free_ia(DHCP6IA *ia);
+
+int dhcp6_lease_set_serverid(sd_dhcp6_lease *lease, const uint8_t *id,
+ size_t len);
+int dhcp6_lease_get_serverid(sd_dhcp6_lease *lease, uint8_t **id, size_t *len);
+int dhcp6_lease_set_preference(sd_dhcp6_lease *lease, uint8_t preference);
+int dhcp6_lease_get_preference(sd_dhcp6_lease *lease, uint8_t *preference);
+int dhcp6_lease_set_rapid_commit(sd_dhcp6_lease *lease);
+int dhcp6_lease_get_rapid_commit(sd_dhcp6_lease *lease, bool *rapid_commit);
+
+int dhcp6_lease_get_iaid(sd_dhcp6_lease *lease, be32_t *iaid);
+int dhcp6_lease_get_pd_iaid(sd_dhcp6_lease *lease, be32_t *iaid);
+
+int dhcp6_lease_set_dns(sd_dhcp6_lease *lease, uint8_t *optval, size_t optlen);
+int dhcp6_lease_set_domains(sd_dhcp6_lease *lease, uint8_t *optval,
+ size_t optlen);
+int dhcp6_lease_set_ntp(sd_dhcp6_lease *lease, uint8_t *optval, size_t optlen);
+int dhcp6_lease_set_sntp(sd_dhcp6_lease *lease, uint8_t *optval,
+ size_t optlen) ;
+
+int dhcp6_lease_new(sd_dhcp6_lease **ret);
diff --git a/src/libsystemd-network/dhcp6-network.c b/src/libsystemd-network/dhcp6-network.c
new file mode 100644
index 0000000..580f43b
--- /dev/null
+++ b/src/libsystemd-network/dhcp6-network.c
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <netinet/ip6.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <linux/if_packet.h>
+
+#include "dhcp6-internal.h"
+#include "dhcp6-protocol.h"
+#include "fd-util.h"
+#include "socket-util.h"
+
+int dhcp6_network_bind_udp_socket(int index, struct in6_addr *local_address) {
+ union sockaddr_union src = {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_port = htobe16(DHCP6_PORT_CLIENT),
+ .in6.sin6_scope_id = index,
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(index > 0);
+ assert(local_address);
+
+ src.in6.sin6_addr = *local_address;
+
+ s = socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_UDP);
+ if (s < 0)
+ return -errno;
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_V6ONLY, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, false);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return r;
+
+ r = bind(s, &src.sa, sizeof(src.in6));
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(s);
+}
+
+int dhcp6_network_send_udp_socket(int s, struct in6_addr *server_address,
+ const void *packet, size_t len) {
+ union sockaddr_union dest = {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_port = htobe16(DHCP6_PORT_SERVER),
+ };
+ int r;
+
+ assert(server_address);
+
+ memcpy(&dest.in6.sin6_addr, server_address, sizeof(dest.in6.sin6_addr));
+
+ r = sendto(s, packet, len, 0, &dest.sa, sizeof(dest.in6));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/libsystemd-network/dhcp6-option.c b/src/libsystemd-network/dhcp6-option.c
new file mode 100644
index 0000000..a2aac9a
--- /dev/null
+++ b/src/libsystemd-network/dhcp6-option.c
@@ -0,0 +1,592 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <string.h>
+
+#include "sd-dhcp6-client.h"
+
+#include "alloc-util.h"
+#include "dhcp6-internal.h"
+#include "dhcp6-lease-internal.h"
+#include "dhcp6-protocol.h"
+#include "dns-domain.h"
+#include "sparse-endian.h"
+#include "strv.h"
+#include "unaligned.h"
+#include "util.h"
+
+typedef struct DHCP6StatusOption {
+ struct DHCP6Option option;
+ be16_t status;
+ char msg[];
+} _packed_ DHCP6StatusOption;
+
+typedef struct DHCP6AddressOption {
+ struct DHCP6Option option;
+ struct iaaddr iaaddr;
+ uint8_t options[];
+} _packed_ DHCP6AddressOption;
+
+typedef struct DHCP6PDPrefixOption {
+ struct DHCP6Option option;
+ struct iapdprefix iapdprefix;
+ uint8_t options[];
+} _packed_ DHCP6PDPrefixOption;
+
+#define DHCP6_OPTION_IA_NA_LEN (sizeof(struct ia_na))
+#define DHCP6_OPTION_IA_PD_LEN (sizeof(struct ia_pd))
+#define DHCP6_OPTION_IA_TA_LEN (sizeof(struct ia_ta))
+
+static int option_append_hdr(uint8_t **buf, size_t *buflen, uint16_t optcode,
+ size_t optlen) {
+ DHCP6Option *option = (DHCP6Option*) *buf;
+
+ assert_return(buf, -EINVAL);
+ assert_return(*buf, -EINVAL);
+ assert_return(buflen, -EINVAL);
+
+ if (optlen > 0xffff || *buflen < optlen + offsetof(DHCP6Option, data))
+ return -ENOBUFS;
+
+ option->code = htobe16(optcode);
+ option->len = htobe16(optlen);
+
+ *buf += offsetof(DHCP6Option, data);
+ *buflen -= offsetof(DHCP6Option, data);
+
+ return 0;
+}
+
+int dhcp6_option_append(uint8_t **buf, size_t *buflen, uint16_t code,
+ size_t optlen, const void *optval) {
+ int r;
+
+ assert_return(optval || optlen == 0, -EINVAL);
+
+ r = option_append_hdr(buf, buflen, code, optlen);
+ if (r < 0)
+ return r;
+
+ memcpy_safe(*buf, optval, optlen);
+
+ *buf += optlen;
+ *buflen -= optlen;
+
+ return 0;
+}
+
+int dhcp6_option_append_ia(uint8_t **buf, size_t *buflen, const DHCP6IA *ia) {
+ uint16_t len;
+ uint8_t *ia_hdr;
+ size_t iaid_offset, ia_buflen, ia_addrlen = 0;
+ DHCP6Address *addr;
+ int r;
+
+ assert_return(buf, -EINVAL);
+ assert_return(*buf, -EINVAL);
+ assert_return(buflen, -EINVAL);
+ assert_return(ia, -EINVAL);
+
+ switch (ia->type) {
+ case SD_DHCP6_OPTION_IA_NA:
+ len = DHCP6_OPTION_IA_NA_LEN;
+ iaid_offset = offsetof(DHCP6IA, ia_na);
+ break;
+
+ case SD_DHCP6_OPTION_IA_TA:
+ len = DHCP6_OPTION_IA_TA_LEN;
+ iaid_offset = offsetof(DHCP6IA, ia_ta);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (*buflen < offsetof(DHCP6Option, data) + len)
+ return -ENOBUFS;
+
+ ia_hdr = *buf;
+ ia_buflen = *buflen;
+
+ *buf += offsetof(DHCP6Option, data);
+ *buflen -= offsetof(DHCP6Option, data);
+
+ memcpy(*buf, (char*) ia + iaid_offset, len);
+
+ *buf += len;
+ *buflen -= len;
+
+ LIST_FOREACH(addresses, addr, ia->addresses) {
+ r = option_append_hdr(buf, buflen, SD_DHCP6_OPTION_IAADDR,
+ sizeof(addr->iaaddr));
+ if (r < 0)
+ return r;
+
+ memcpy(*buf, &addr->iaaddr, sizeof(addr->iaaddr));
+
+ *buf += sizeof(addr->iaaddr);
+ *buflen -= sizeof(addr->iaaddr);
+
+ ia_addrlen += offsetof(DHCP6Option, data) + sizeof(addr->iaaddr);
+ }
+
+ r = option_append_hdr(&ia_hdr, &ia_buflen, ia->type, len + ia_addrlen);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int dhcp6_option_append_fqdn(uint8_t **buf, size_t *buflen, const char *fqdn) {
+ uint8_t buffer[1 + DNS_WIRE_FORMAT_HOSTNAME_MAX];
+ int r;
+
+ assert_return(buf && *buf && buflen && fqdn, -EINVAL);
+
+ buffer[0] = DHCP6_FQDN_FLAG_S; /* Request server to perform AAAA RR DNS updates */
+
+ /* Store domain name after flags field */
+ r = dns_name_to_wire_format(fqdn, buffer + 1, sizeof(buffer) - 1, false);
+ if (r <= 0)
+ return r;
+
+ /*
+ * According to RFC 4704, chapter 4.2 only add terminating zero-length
+ * label in case a FQDN is provided. Since dns_name_to_wire_format
+ * always adds terminating zero-length label remove if only a hostname
+ * is provided.
+ */
+ if (dns_name_is_single_label(fqdn))
+ r--;
+
+ r = dhcp6_option_append(buf, buflen, SD_DHCP6_OPTION_FQDN, 1 + r, buffer);
+
+ return r;
+}
+
+int dhcp6_option_append_pd(uint8_t *buf, size_t len, const DHCP6IA *pd) {
+ DHCP6Option *option = (DHCP6Option *)buf;
+ size_t i = sizeof(*option) + sizeof(pd->ia_pd);
+ DHCP6Address *prefix;
+
+ assert_return(buf, -EINVAL);
+ assert_return(pd, -EINVAL);
+ assert_return(pd->type == SD_DHCP6_OPTION_IA_PD, -EINVAL);
+
+ if (len < i)
+ return -ENOBUFS;
+
+ option->code = htobe16(SD_DHCP6_OPTION_IA_PD);
+
+ memcpy(&option->data, &pd->ia_pd, sizeof(pd->ia_pd));
+
+ LIST_FOREACH(addresses, prefix, pd->addresses) {
+ DHCP6PDPrefixOption *prefix_opt;
+
+ if (len < i + sizeof(*prefix_opt))
+ return -ENOBUFS;
+
+ prefix_opt = (DHCP6PDPrefixOption *)&buf[i];
+ prefix_opt->option.code = htobe16(SD_DHCP6_OPTION_IA_PD_PREFIX);
+ prefix_opt->option.len = htobe16(sizeof(prefix_opt->iapdprefix));
+
+ memcpy(&prefix_opt->iapdprefix, &prefix->iapdprefix,
+ sizeof(struct iapdprefix));
+
+ i += sizeof(*prefix_opt);
+ }
+
+ option->len = htobe16(i - sizeof(*option));
+
+ return i;
+}
+
+static int option_parse_hdr(uint8_t **buf, size_t *buflen, uint16_t *optcode, size_t *optlen) {
+ DHCP6Option *option = (DHCP6Option*) *buf;
+ uint16_t len;
+
+ assert_return(buf, -EINVAL);
+ assert_return(optcode, -EINVAL);
+ assert_return(optlen, -EINVAL);
+
+ if (*buflen < offsetof(DHCP6Option, data))
+ return -ENOMSG;
+
+ len = be16toh(option->len);
+
+ if (len > *buflen)
+ return -ENOMSG;
+
+ *optcode = be16toh(option->code);
+ *optlen = len;
+
+ *buf += 4;
+ *buflen -= 4;
+
+ return 0;
+}
+
+int dhcp6_option_parse(uint8_t **buf, size_t *buflen, uint16_t *optcode,
+ size_t *optlen, uint8_t **optvalue) {
+ int r;
+
+ assert_return(buf && buflen && optcode && optlen && optvalue, -EINVAL);
+
+ r = option_parse_hdr(buf, buflen, optcode, optlen);
+ if (r < 0)
+ return r;
+
+ if (*optlen > *buflen)
+ return -ENOBUFS;
+
+ *optvalue = *buf;
+ *buflen -= *optlen;
+ *buf += *optlen;
+
+ return 0;
+}
+
+int dhcp6_option_parse_status(DHCP6Option *option, size_t len) {
+ DHCP6StatusOption *statusopt = (DHCP6StatusOption *)option;
+
+ if (len < sizeof(DHCP6StatusOption) ||
+ be16toh(option->len) + offsetof(DHCP6Option, data) < sizeof(DHCP6StatusOption))
+ return -ENOBUFS;
+
+ return be16toh(statusopt->status);
+}
+
+static int dhcp6_option_parse_address(DHCP6Option *option, DHCP6IA *ia,
+ uint32_t *lifetime_valid) {
+ DHCP6AddressOption *addr_option = (DHCP6AddressOption *)option;
+ DHCP6Address *addr;
+ uint32_t lt_valid, lt_pref;
+ int r;
+
+ if (be16toh(option->len) + offsetof(DHCP6Option, data) < sizeof(*addr_option))
+ return -ENOBUFS;
+
+ lt_valid = be32toh(addr_option->iaaddr.lifetime_valid);
+ lt_pref = be32toh(addr_option->iaaddr.lifetime_preferred);
+
+ if (lt_valid == 0 || lt_pref > lt_valid) {
+ log_dhcp6_client(client, "Valid lifetime of an IA address is zero or preferred lifetime %d > valid lifetime %d",
+ lt_pref, lt_valid);
+
+ return 0;
+ }
+
+ if (be16toh(option->len) + offsetof(DHCP6Option, data) > sizeof(*addr_option)) {
+ r = dhcp6_option_parse_status((DHCP6Option *)addr_option->options, be16toh(option->len) + offsetof(DHCP6Option, data) - sizeof(*addr_option));
+ if (r != 0)
+ return r < 0 ? r: 0;
+ }
+
+ addr = new0(DHCP6Address, 1);
+ if (!addr)
+ return -ENOMEM;
+
+ LIST_INIT(addresses, addr);
+ memcpy(&addr->iaaddr, option->data, sizeof(addr->iaaddr));
+
+ LIST_PREPEND(addresses, ia->addresses, addr);
+
+ *lifetime_valid = be32toh(addr->iaaddr.lifetime_valid);
+
+ return 0;
+}
+
+static int dhcp6_option_parse_pdprefix(DHCP6Option *option, DHCP6IA *ia,
+ uint32_t *lifetime_valid) {
+ DHCP6PDPrefixOption *pdprefix_option = (DHCP6PDPrefixOption *)option;
+ DHCP6Address *prefix;
+ uint32_t lt_valid, lt_pref;
+ int r;
+
+ if (be16toh(option->len) + offsetof(DHCP6Option, data) < sizeof(*pdprefix_option))
+ return -ENOBUFS;
+
+ lt_valid = be32toh(pdprefix_option->iapdprefix.lifetime_valid);
+ lt_pref = be32toh(pdprefix_option->iapdprefix.lifetime_preferred);
+
+ if (lt_valid == 0 || lt_pref > lt_valid) {
+ log_dhcp6_client(client, "Valid lifetieme of a PD prefix is zero or preferred lifetime %d > valid lifetime %d",
+ lt_pref, lt_valid);
+
+ return 0;
+ }
+
+ if (be16toh(option->len) + offsetof(DHCP6Option, data) > sizeof(*pdprefix_option)) {
+ r = dhcp6_option_parse_status((DHCP6Option *)pdprefix_option->options, be16toh(option->len) + offsetof(DHCP6Option, data) - sizeof(*pdprefix_option));
+ if (r != 0)
+ return r < 0 ? r: 0;
+ }
+
+ prefix = new0(DHCP6Address, 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ LIST_INIT(addresses, prefix);
+ memcpy(&prefix->iapdprefix, option->data, sizeof(prefix->iapdprefix));
+
+ LIST_PREPEND(addresses, ia->addresses, prefix);
+
+ *lifetime_valid = be32toh(prefix->iapdprefix.lifetime_valid);
+
+ return 0;
+}
+
+int dhcp6_option_parse_ia(DHCP6Option *iaoption, DHCP6IA *ia) {
+ uint16_t iatype, optlen;
+ size_t i, len;
+ int r = 0, status;
+ uint16_t opt;
+ size_t iaaddr_offset;
+ uint32_t lt_t1, lt_t2, lt_valid = 0, lt_min = UINT32_MAX;
+
+ assert_return(ia, -EINVAL);
+ assert_return(!ia->addresses, -EINVAL);
+
+ iatype = be16toh(iaoption->code);
+ len = be16toh(iaoption->len);
+
+ switch (iatype) {
+ case SD_DHCP6_OPTION_IA_NA:
+
+ if (len < DHCP6_OPTION_IA_NA_LEN)
+ return -ENOBUFS;
+
+ iaaddr_offset = DHCP6_OPTION_IA_NA_LEN;
+ memcpy(&ia->ia_na, iaoption->data, sizeof(ia->ia_na));
+
+ lt_t1 = be32toh(ia->ia_na.lifetime_t1);
+ lt_t2 = be32toh(ia->ia_na.lifetime_t2);
+
+ if (lt_t1 && lt_t2 && lt_t1 > lt_t2) {
+ log_dhcp6_client(client, "IA NA T1 %ds > T2 %ds",
+ lt_t1, lt_t2);
+ return -EINVAL;
+ }
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_PD:
+
+ if (len < sizeof(ia->ia_pd))
+ return -ENOBUFS;
+
+ iaaddr_offset = sizeof(ia->ia_pd);
+ memcpy(&ia->ia_pd, iaoption->data, sizeof(ia->ia_pd));
+
+ lt_t1 = be32toh(ia->ia_pd.lifetime_t1);
+ lt_t2 = be32toh(ia->ia_pd.lifetime_t2);
+
+ if (lt_t1 && lt_t2 && lt_t1 > lt_t2) {
+ log_dhcp6_client(client, "IA PD T1 %ds > T2 %ds",
+ lt_t1, lt_t2);
+ return -EINVAL;
+ }
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_TA:
+ if (len < DHCP6_OPTION_IA_TA_LEN)
+ return -ENOBUFS;
+
+ iaaddr_offset = DHCP6_OPTION_IA_TA_LEN;
+ memcpy(&ia->ia_ta.id, iaoption->data, sizeof(ia->ia_ta));
+
+ break;
+
+ default:
+ return -ENOMSG;
+ }
+
+ ia->type = iatype;
+ i = iaaddr_offset;
+
+ while (i < len) {
+ DHCP6Option *option = (DHCP6Option *)&iaoption->data[i];
+
+ if (len < i + sizeof(*option) || len < i + sizeof(*option) + be16toh(option->len))
+ return -ENOBUFS;
+
+ opt = be16toh(option->code);
+ optlen = be16toh(option->len);
+
+ switch (opt) {
+ case SD_DHCP6_OPTION_IAADDR:
+
+ if (!IN_SET(ia->type, SD_DHCP6_OPTION_IA_NA, SD_DHCP6_OPTION_IA_TA)) {
+ log_dhcp6_client(client, "IA Address option not in IA NA or TA option");
+ return -EINVAL;
+ }
+
+ r = dhcp6_option_parse_address(option, ia, &lt_valid);
+ if (r < 0)
+ return r;
+
+ if (lt_valid < lt_min)
+ lt_min = lt_valid;
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_PD_PREFIX:
+
+ if (!IN_SET(ia->type, SD_DHCP6_OPTION_IA_PD)) {
+ log_dhcp6_client(client, "IA PD Prefix option not in IA PD option");
+ return -EINVAL;
+ }
+
+ r = dhcp6_option_parse_pdprefix(option, ia, &lt_valid);
+ if (r < 0)
+ return r;
+
+ if (lt_valid < lt_min)
+ lt_min = lt_valid;
+
+ break;
+
+ case SD_DHCP6_OPTION_STATUS_CODE:
+
+ status = dhcp6_option_parse_status(option, optlen + offsetof(DHCP6Option, data));
+ if (status < 0)
+ return status;
+ if (status > 0) {
+ log_dhcp6_client(client, "IA status %d",
+ status);
+
+ return -EINVAL;
+ }
+
+ break;
+
+ default:
+ log_dhcp6_client(client, "Unknown IA option %d", opt);
+ break;
+ }
+
+ i += sizeof(*option) + optlen;
+ }
+
+ switch(iatype) {
+ case SD_DHCP6_OPTION_IA_NA:
+ if (!ia->ia_na.lifetime_t1 && !ia->ia_na.lifetime_t2) {
+ lt_t1 = lt_min / 2;
+ lt_t2 = lt_min / 10 * 8;
+ ia->ia_na.lifetime_t1 = htobe32(lt_t1);
+ ia->ia_na.lifetime_t2 = htobe32(lt_t2);
+
+ log_dhcp6_client(client, "Computed IA NA T1 %ds and T2 %ds as both were zero",
+ lt_t1, lt_t2);
+ }
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_PD:
+ if (!ia->ia_pd.lifetime_t1 && !ia->ia_pd.lifetime_t2) {
+ lt_t1 = lt_min / 2;
+ lt_t2 = lt_min / 10 * 8;
+ ia->ia_pd.lifetime_t1 = htobe32(lt_t1);
+ ia->ia_pd.lifetime_t2 = htobe32(lt_t2);
+
+ log_dhcp6_client(client, "Computed IA PD T1 %ds and T2 %ds as both were zero",
+ lt_t1, lt_t2);
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int dhcp6_option_parse_ip6addrs(uint8_t *optval, uint16_t optlen,
+ struct in6_addr **addrs, size_t count,
+ size_t *allocated) {
+
+ if (optlen == 0 || optlen % sizeof(struct in6_addr) != 0)
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(*addrs, *allocated,
+ count * sizeof(struct in6_addr) + optlen))
+ return -ENOMEM;
+
+ memcpy(*addrs + count, optval, optlen);
+
+ count += optlen / sizeof(struct in6_addr);
+
+ return count;
+}
+
+int dhcp6_option_parse_domainname(const uint8_t *optval, uint16_t optlen, char ***str_arr) {
+ size_t pos = 0, idx = 0;
+ _cleanup_strv_free_ char **names = NULL;
+ int r;
+
+ assert_return(optlen > 1, -ENODATA);
+ assert_return(optval[optlen - 1] == '\0', -EINVAL);
+
+ while (pos < optlen) {
+ _cleanup_free_ char *ret = NULL;
+ size_t n = 0, allocated = 0;
+ bool first = true;
+
+ for (;;) {
+ const char *label;
+ uint8_t c;
+
+ c = optval[pos++];
+
+ if (c == 0)
+ /* End of name */
+ break;
+ if (c > 63)
+ return -EBADMSG;
+
+ /* Literal label */
+ label = (const char *)&optval[pos];
+ pos += c;
+ if (pos >= optlen)
+ return -EMSGSIZE;
+
+ if (!GREEDY_REALLOC(ret, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ if (first)
+ first = false;
+ else
+ ret[n++] = '.';
+
+ r = dns_label_escape(label, c, ret + n, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ n += r;
+ }
+
+ if (n == 0)
+ continue;
+
+ if (!GREEDY_REALLOC(ret, allocated, n + 1))
+ return -ENOMEM;
+
+ ret[n] = 0;
+
+ r = strv_extend(&names, ret);
+ if (r < 0)
+ return r;
+
+ idx++;
+ }
+
+ *str_arr = TAKE_PTR(names);
+
+ return idx;
+}
diff --git a/src/libsystemd-network/dhcp6-protocol.h b/src/libsystemd-network/dhcp6-protocol.h
new file mode 100644
index 0000000..ffae445
--- /dev/null
+++ b/src/libsystemd-network/dhcp6-protocol.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+
+#include "macro.h"
+#include "sparse-endian.h"
+
+struct DHCP6Message {
+ union {
+ struct {
+ uint8_t type;
+ uint8_t _pad[3];
+ } _packed_;
+ be32_t transaction_id;
+ };
+ uint8_t options[];
+} _packed_;
+
+typedef struct DHCP6Message DHCP6Message;
+
+#define DHCP6_MIN_OPTIONS_SIZE \
+ 1280 - sizeof(struct ip6_hdr) - sizeof(struct udphdr)
+
+#define IN6ADDR_ALL_DHCP6_RELAY_AGENTS_AND_SERVERS_INIT \
+ { { { 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02 } } }
+
+enum {
+ DHCP6_PORT_SERVER = 547,
+ DHCP6_PORT_CLIENT = 546,
+};
+
+#define DHCP6_INF_TIMEOUT 1 * USEC_PER_SEC
+#define DHCP6_INF_MAX_RT 120 * USEC_PER_SEC
+#define DHCP6_SOL_MAX_DELAY 1 * USEC_PER_SEC
+#define DHCP6_SOL_TIMEOUT 1 * USEC_PER_SEC
+#define DHCP6_SOL_MAX_RT 120 * USEC_PER_SEC
+#define DHCP6_REQ_TIMEOUT 1 * USEC_PER_SEC
+#define DHCP6_REQ_MAX_RT 120 * USEC_PER_SEC
+#define DHCP6_REQ_MAX_RC 10
+#define DHCP6_REN_TIMEOUT 10 * USEC_PER_SEC
+#define DHCP6_REN_MAX_RT 600 * USEC_PER_SEC
+#define DHCP6_REB_TIMEOUT 10 * USEC_PER_SEC
+#define DHCP6_REB_MAX_RT 600 * USEC_PER_SEC
+
+enum DHCP6State {
+ DHCP6_STATE_STOPPED = 0,
+ DHCP6_STATE_INFORMATION_REQUEST = 1,
+ DHCP6_STATE_SOLICITATION = 2,
+ DHCP6_STATE_REQUEST = 3,
+ DHCP6_STATE_BOUND = 4,
+ DHCP6_STATE_RENEW = 5,
+ DHCP6_STATE_REBIND = 6,
+};
+
+enum {
+ DHCP6_SOLICIT = 1,
+ DHCP6_ADVERTISE = 2,
+ DHCP6_REQUEST = 3,
+ DHCP6_CONFIRM = 4,
+ DHCP6_RENEW = 5,
+ DHCP6_REBIND = 6,
+ DHCP6_REPLY = 7,
+ DHCP6_RELEASE = 8,
+ DHCP6_DECLINE = 9,
+ DHCP6_RECONFIGURE = 10,
+ DHCP6_INFORMATION_REQUEST = 11,
+ DHCP6_RELAY_FORW = 12,
+ DHCP6_RELAY_REPL = 13,
+ _DHCP6_MESSAGE_MAX = 14,
+};
+
+enum {
+ DHCP6_NTP_SUBOPTION_SRV_ADDR = 1,
+ DHCP6_NTP_SUBOPTION_MC_ADDR = 2,
+ DHCP6_NTP_SUBOPTION_SRV_FQDN = 3,
+};
+
+enum {
+ DHCP6_STATUS_SUCCESS = 0,
+ DHCP6_STATUS_UNSPEC_FAIL = 1,
+ DHCP6_STATUS_NO_ADDRS_AVAIL = 2,
+ DHCP6_STATUS_NO_BINDING = 3,
+ DHCP6_STATUS_NOT_ON_LINK = 4,
+ DHCP6_STATUS_USE_MULTICAST = 5,
+ _DHCP6_STATUS_MAX = 6,
+};
+
+enum {
+ DHCP6_FQDN_FLAG_S = (1 << 0),
+ DHCP6_FQDN_FLAG_O = (1 << 1),
+ DHCP6_FQDN_FLAG_N = (1 << 2),
+};
diff --git a/src/libsystemd-network/icmp6-util.c b/src/libsystemd-network/icmp6-util.c
new file mode 100644
index 0000000..e535b12
--- /dev/null
+++ b/src/libsystemd-network/icmp6-util.c
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <netinet/icmp6.h>
+#include <netinet/in.h>
+#include <netinet/ip6.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <net/if.h>
+#include <linux/if_packet.h>
+
+#include "fd-util.h"
+#include "icmp6-util.h"
+#include "in-addr-util.h"
+#include "io-util.h"
+#include "socket-util.h"
+
+#define IN6ADDR_ALL_ROUTERS_MULTICAST_INIT \
+ { { { 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } } }
+
+#define IN6ADDR_ALL_NODES_MULTICAST_INIT \
+ { { { 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } } }
+
+static int icmp6_bind_router_message(const struct icmp6_filter *filter,
+ const struct ipv6_mreq *mreq) {
+ int index = mreq->ipv6mr_interface;
+ _cleanup_close_ int s = -1;
+ char ifname[IF_NAMESIZE] = "";
+ int r;
+
+ s = socket(AF_INET6, SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_ICMPV6);
+ if (s < 0)
+ return -errno;
+
+ r = setsockopt(s, IPPROTO_ICMPV6, ICMP6_FILTER, filter, sizeof(*filter));
+ if (r < 0)
+ return -errno;
+
+ r = setsockopt(s, IPPROTO_IPV6, IPV6_ADD_MEMBERSHIP, mreq, sizeof(*mreq));
+ if (r < 0)
+ return -errno;
+
+ /* RFC 3315, section 6.7, bullet point 2 may indicate that an
+ IPV6_PKTINFO socket option also applies for ICMPv6 multicast.
+ Empirical experiments indicates otherwise and therefore an
+ IPV6_MULTICAST_IF socket option is used here instead */
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_IF, index);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, false);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 255);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 255);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, SOL_IPV6, IPV6_RECVHOPLIMIT, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, SOL_SOCKET, SO_TIMESTAMP, true);
+ if (r < 0)
+ return r;
+
+ if (if_indextoname(index, ifname) == 0)
+ return -errno;
+
+ r = setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE, ifname, strlen(ifname));
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(s);
+}
+
+int icmp6_bind_router_solicitation(int index) {
+ struct icmp6_filter filter = {};
+ struct ipv6_mreq mreq = {
+ .ipv6mr_multiaddr = IN6ADDR_ALL_NODES_MULTICAST_INIT,
+ .ipv6mr_interface = index,
+ };
+
+ ICMP6_FILTER_SETBLOCKALL(&filter);
+ ICMP6_FILTER_SETPASS(ND_ROUTER_ADVERT, &filter);
+
+ return icmp6_bind_router_message(&filter, &mreq);
+}
+
+int icmp6_bind_router_advertisement(int index) {
+ struct icmp6_filter filter = {};
+ struct ipv6_mreq mreq = {
+ .ipv6mr_multiaddr = IN6ADDR_ALL_ROUTERS_MULTICAST_INIT,
+ .ipv6mr_interface = index,
+ };
+
+ ICMP6_FILTER_SETBLOCKALL(&filter);
+ ICMP6_FILTER_SETPASS(ND_ROUTER_SOLICIT, &filter);
+
+ return icmp6_bind_router_message(&filter, &mreq);
+}
+
+int icmp6_send_router_solicitation(int s, const struct ether_addr *ether_addr) {
+ struct sockaddr_in6 dst = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ALL_ROUTERS_MULTICAST_INIT,
+ };
+ struct {
+ struct nd_router_solicit rs;
+ struct nd_opt_hdr rs_opt;
+ struct ether_addr rs_opt_mac;
+ } _packed_ rs = {
+ .rs.nd_rs_type = ND_ROUTER_SOLICIT,
+ .rs_opt.nd_opt_type = ND_OPT_SOURCE_LINKADDR,
+ .rs_opt.nd_opt_len = 1,
+ };
+ struct iovec iov = {
+ .iov_base = &rs,
+ .iov_len = sizeof(rs),
+ };
+ struct msghdr msg = {
+ .msg_name = &dst,
+ .msg_namelen = sizeof(dst),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ int r;
+
+ assert(s >= 0);
+ assert(ether_addr);
+
+ rs.rs_opt_mac = *ether_addr;
+
+ r = sendmsg(s, &msg, 0);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int icmp6_receive(int fd, void *buffer, size_t size, struct in6_addr *dst,
+ triple_timestamp *timestamp) {
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int)) + /* ttl */
+ CMSG_SPACE(sizeof(struct timeval))];
+ } control = {};
+ struct iovec iov = {};
+ union sockaddr_union sa = {};
+ struct msghdr msg = {
+ .msg_name = &sa.sa,
+ .msg_namelen = sizeof(sa),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ ssize_t len;
+
+ iov = IOVEC_MAKE(buffer, size);
+
+ len = recvmsg(fd, &msg, MSG_DONTWAIT);
+ if (len < 0)
+ return -errno;
+
+ if ((size_t) len != size)
+ return -EINVAL;
+
+ if (msg.msg_namelen == sizeof(struct sockaddr_in6) &&
+ sa.in6.sin6_family == AF_INET6) {
+
+ *dst = sa.in6.sin6_addr;
+ if (in_addr_is_link_local(AF_INET6, (union in_addr_union*) dst) <= 0)
+ return -EADDRNOTAVAIL;
+
+ } else if (msg.msg_namelen > 0)
+ return -EPFNOSUPPORT;
+
+ /* namelen == 0 only happens when running the test-suite over a socketpair */
+
+ assert(!(msg.msg_flags & MSG_CTRUNC));
+ assert(!(msg.msg_flags & MSG_TRUNC));
+
+ CMSG_FOREACH(cmsg, &msg) {
+ if (cmsg->cmsg_level == SOL_IPV6 &&
+ cmsg->cmsg_type == IPV6_HOPLIMIT &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(int))) {
+ int hops = *(int*) CMSG_DATA(cmsg);
+
+ if (hops != 255)
+ return -EMULTIHOP;
+ }
+
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SO_TIMESTAMP &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
+ triple_timestamp_from_realtime(timestamp, timeval_load((struct timeval*) CMSG_DATA(cmsg)));
+ }
+
+ if (!triple_timestamp_is_set(timestamp))
+ triple_timestamp_get(timestamp);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/icmp6-util.h b/src/libsystemd-network/icmp6-util.h
new file mode 100644
index 0000000..725a680
--- /dev/null
+++ b/src/libsystemd-network/icmp6-util.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+***/
+
+#include <net/ethernet.h>
+
+#include "time-util.h"
+
+#define IN6ADDR_ALL_ROUTERS_MULTICAST_INIT \
+ { { { 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } } }
+
+#define IN6ADDR_ALL_NODES_MULTICAST_INIT \
+ { { { 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } } }
+
+int icmp6_bind_router_solicitation(int index);
+int icmp6_bind_router_advertisement(int index);
+int icmp6_send_router_solicitation(int s, const struct ether_addr *ether_addr);
+int icmp6_receive(int fd, void *buffer, size_t size, struct in6_addr *dst,
+ triple_timestamp *timestamp);
diff --git a/src/libsystemd-network/lldp-internal.h b/src/libsystemd-network/lldp-internal.h
new file mode 100644
index 0000000..88b5493
--- /dev/null
+++ b/src/libsystemd-network/lldp-internal.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-event.h"
+#include "sd-lldp.h"
+
+#include "hashmap.h"
+#include "log.h"
+#include "prioq.h"
+
+struct sd_lldp {
+ unsigned n_ref;
+
+ int ifindex;
+ int fd;
+
+ sd_event *event;
+ int64_t event_priority;
+ sd_event_source *io_event_source;
+ sd_event_source *timer_event_source;
+
+ Prioq *neighbor_by_expiry;
+ Hashmap *neighbor_by_id;
+
+ uint64_t neighbors_max;
+
+ sd_lldp_callback_t callback;
+ void *userdata;
+
+ uint16_t capability_mask;
+
+ struct ether_addr filter_address;
+};
+
+#define log_lldp_errno(error, fmt, ...) log_internal(LOG_DEBUG, error, __FILE__, __LINE__, __func__, "LLDP: " fmt, ##__VA_ARGS__)
+#define log_lldp(fmt, ...) log_lldp_errno(0, fmt, ##__VA_ARGS__)
+
+const char* lldp_event_to_string(sd_lldp_event e) _const_;
+sd_lldp_event lldp_event_from_string(const char *s) _pure_;
diff --git a/src/libsystemd-network/lldp-neighbor.c b/src/libsystemd-network/lldp-neighbor.c
new file mode 100644
index 0000000..f6db625
--- /dev/null
+++ b/src/libsystemd-network/lldp-neighbor.c
@@ -0,0 +1,769 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "ether-addr-util.h"
+#include "hexdecoct.h"
+#include "in-addr-util.h"
+#include "lldp-internal.h"
+#include "lldp-neighbor.h"
+#include "missing.h"
+#include "unaligned.h"
+#include "util.h"
+
+static void lldp_neighbor_id_hash_func(const LLDPNeighborID *id, struct siphash *state) {
+ siphash24_compress(id->chassis_id, id->chassis_id_size, state);
+ siphash24_compress(&id->chassis_id_size, sizeof(id->chassis_id_size), state);
+ siphash24_compress(id->port_id, id->port_id_size, state);
+ siphash24_compress(&id->port_id_size, sizeof(id->port_id_size), state);
+}
+
+int lldp_neighbor_id_compare_func(const LLDPNeighborID *x, const LLDPNeighborID *y) {
+ return memcmp_nn(x->chassis_id, x->chassis_id_size, y->chassis_id, y->chassis_id_size)
+ ?: memcmp_nn(x->port_id, x->port_id_size, y->port_id, y->port_id_size);
+}
+
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(lldp_neighbor_hash_ops, LLDPNeighborID, lldp_neighbor_id_hash_func, lldp_neighbor_id_compare_func,
+ sd_lldp_neighbor, lldp_neighbor_unlink);
+
+int lldp_neighbor_prioq_compare_func(const void *a, const void *b) {
+ const sd_lldp_neighbor *x = a, *y = b;
+
+ return CMP(x->until, y->until);
+}
+
+_public_ sd_lldp_neighbor *sd_lldp_neighbor_ref(sd_lldp_neighbor *n) {
+ if (!n)
+ return NULL;
+
+ assert(n->n_ref > 0 || n->lldp);
+ n->n_ref++;
+
+ return n;
+}
+
+static void lldp_neighbor_free(sd_lldp_neighbor *n) {
+ assert(n);
+
+ free(n->id.port_id);
+ free(n->id.chassis_id);
+ free(n->port_description);
+ free(n->system_name);
+ free(n->system_description);
+ free(n->chassis_id_as_string);
+ free(n->port_id_as_string);
+ free(n);
+}
+
+_public_ sd_lldp_neighbor *sd_lldp_neighbor_unref(sd_lldp_neighbor *n) {
+
+ /* Drops one reference from the neighbor. Note that the object is not freed unless it is already unlinked from
+ * the sd_lldp object. */
+
+ if (!n)
+ return NULL;
+
+ assert(n->n_ref > 0);
+ n->n_ref--;
+
+ if (n->n_ref <= 0 && !n->lldp)
+ lldp_neighbor_free(n);
+
+ return NULL;
+}
+
+sd_lldp_neighbor *lldp_neighbor_unlink(sd_lldp_neighbor *n) {
+
+ /* Removes the neighbor object from the LLDP object, and frees it if it also has no other reference. */
+
+ if (!n)
+ return NULL;
+
+ if (!n->lldp)
+ return NULL;
+
+ /* Only remove the neighbor object from the hash table if it's in there, don't complain if it isn't. This is
+ * because we are used as destructor call for hashmap_clear() and thus sometimes are called to de-register
+ * ourselves from the hashtable and sometimes are called after we already are de-registered. */
+
+ (void) hashmap_remove_value(n->lldp->neighbor_by_id, &n->id, n);
+
+ assert_se(prioq_remove(n->lldp->neighbor_by_expiry, n, &n->prioq_idx) >= 0);
+
+ n->lldp = NULL;
+
+ if (n->n_ref <= 0)
+ lldp_neighbor_free(n);
+
+ return NULL;
+}
+
+sd_lldp_neighbor *lldp_neighbor_new(size_t raw_size) {
+ sd_lldp_neighbor *n;
+
+ n = malloc0(ALIGN(sizeof(sd_lldp_neighbor)) + raw_size);
+ if (!n)
+ return NULL;
+
+ n->raw_size = raw_size;
+ n->n_ref = 1;
+
+ return n;
+}
+
+static int parse_string(char **s, const void *q, size_t n) {
+ const char *p = q;
+ char *k;
+
+ assert(s);
+ assert(p || n == 0);
+
+ if (*s) {
+ log_lldp("Found duplicate string, ignoring field.");
+ return 0;
+ }
+
+ /* Strip trailing NULs, just to be nice */
+ while (n > 0 && p[n-1] == 0)
+ n--;
+
+ if (n <= 0) /* Ignore empty strings */
+ return 0;
+
+ /* Look for inner NULs */
+ if (memchr(p, 0, n)) {
+ log_lldp("Found inner NUL in string, ignoring field.");
+ return 0;
+ }
+
+ /* Let's escape weird chars, for security reasons */
+ k = cescape_length(p, n);
+ if (!k)
+ return -ENOMEM;
+
+ free(*s);
+ *s = k;
+
+ return 1;
+}
+
+int lldp_neighbor_parse(sd_lldp_neighbor *n) {
+ struct ether_header h;
+ const uint8_t *p;
+ size_t left;
+ int r;
+
+ assert(n);
+
+ if (n->raw_size < sizeof(struct ether_header)) {
+ log_lldp("Received truncated packet, ignoring.");
+ return -EBADMSG;
+ }
+
+ memcpy(&h, LLDP_NEIGHBOR_RAW(n), sizeof(h));
+
+ if (h.ether_type != htobe16(ETHERTYPE_LLDP)) {
+ log_lldp("Received packet with wrong type, ignoring.");
+ return -EBADMSG;
+ }
+
+ if (h.ether_dhost[0] != 0x01 ||
+ h.ether_dhost[1] != 0x80 ||
+ h.ether_dhost[2] != 0xc2 ||
+ h.ether_dhost[3] != 0x00 ||
+ h.ether_dhost[4] != 0x00 ||
+ !IN_SET(h.ether_dhost[5], 0x00, 0x03, 0x0e)) {
+ log_lldp("Received packet with wrong destination address, ignoring.");
+ return -EBADMSG;
+ }
+
+ memcpy(&n->source_address, h.ether_shost, sizeof(struct ether_addr));
+ memcpy(&n->destination_address, h.ether_dhost, sizeof(struct ether_addr));
+
+ p = (const uint8_t*) LLDP_NEIGHBOR_RAW(n) + sizeof(struct ether_header);
+ left = n->raw_size - sizeof(struct ether_header);
+
+ for (;;) {
+ uint8_t type;
+ uint16_t length;
+
+ if (left < 2) {
+ log_lldp("TLV lacks header, ignoring.");
+ return -EBADMSG;
+ }
+
+ type = p[0] >> 1;
+ length = p[1] + (((uint16_t) (p[0] & 1)) << 8);
+ p += 2, left -= 2;
+
+ if (left < length) {
+ log_lldp("TLV truncated, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ switch (type) {
+
+ case SD_LLDP_TYPE_END:
+ if (length != 0) {
+ log_lldp("End marker TLV not zero-sized, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ /* Note that after processing the SD_LLDP_TYPE_END left could still be > 0
+ * as the message may contain padding (see IEEE 802.1AB-2016, sec. 8.5.12) */
+
+ goto end_marker;
+
+ case SD_LLDP_TYPE_CHASSIS_ID:
+ if (length < 2 || length > 256) { /* includes the chassis subtype, hence one extra byte */
+ log_lldp("Chassis ID field size out of range, ignoring datagram.");
+ return -EBADMSG;
+ }
+ if (n->id.chassis_id) {
+ log_lldp("Duplicate chassis ID field, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ n->id.chassis_id = memdup(p, length);
+ if (!n->id.chassis_id)
+ return -ENOMEM;
+
+ n->id.chassis_id_size = length;
+ break;
+
+ case SD_LLDP_TYPE_PORT_ID:
+ if (length < 2 || length > 256) { /* includes the port subtype, hence one extra byte */
+ log_lldp("Port ID field size out of range, ignoring datagram.");
+ return -EBADMSG;
+ }
+ if (n->id.port_id) {
+ log_lldp("Duplicate port ID field, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ n->id.port_id = memdup(p, length);
+ if (!n->id.port_id)
+ return -ENOMEM;
+
+ n->id.port_id_size = length;
+ break;
+
+ case SD_LLDP_TYPE_TTL:
+ if (length != 2) {
+ log_lldp("TTL field has wrong size, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ if (n->has_ttl) {
+ log_lldp("Duplicate TTL field, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ n->ttl = unaligned_read_be16(p);
+ n->has_ttl = true;
+ break;
+
+ case SD_LLDP_TYPE_PORT_DESCRIPTION:
+ r = parse_string(&n->port_description, p, length);
+ if (r < 0)
+ return r;
+ break;
+
+ case SD_LLDP_TYPE_SYSTEM_NAME:
+ r = parse_string(&n->system_name, p, length);
+ if (r < 0)
+ return r;
+ break;
+
+ case SD_LLDP_TYPE_SYSTEM_DESCRIPTION:
+ r = parse_string(&n->system_description, p, length);
+ if (r < 0)
+ return r;
+ break;
+
+ case SD_LLDP_TYPE_SYSTEM_CAPABILITIES:
+ if (length != 4)
+ log_lldp("System capabilities field has wrong size, ignoring.");
+ else {
+ n->system_capabilities = unaligned_read_be16(p);
+ n->enabled_capabilities = unaligned_read_be16(p + 2);
+ n->has_capabilities = true;
+ }
+
+ break;
+
+ case SD_LLDP_TYPE_PRIVATE:
+ if (length < 4)
+ log_lldp("Found private TLV that is too short, ignoring.");
+
+ break;
+ }
+
+ p += length, left -= length;
+ }
+
+end_marker:
+ if (!n->id.chassis_id || !n->id.port_id || !n->has_ttl) {
+ log_lldp("One or more mandatory TLV missing in datagram. Ignoring.");
+ return -EBADMSG;
+
+ }
+
+ n->rindex = sizeof(struct ether_header);
+
+ return 0;
+}
+
+void lldp_neighbor_start_ttl(sd_lldp_neighbor *n) {
+ assert(n);
+
+ if (n->ttl > 0) {
+ usec_t base;
+
+ /* Use the packet's timestamp if there is one known */
+ base = triple_timestamp_by_clock(&n->timestamp, clock_boottime_or_monotonic());
+ if (base <= 0 || base == USEC_INFINITY)
+ base = now(clock_boottime_or_monotonic()); /* Otherwise, take the current time */
+
+ n->until = usec_add(base, n->ttl * USEC_PER_SEC);
+ } else
+ n->until = 0;
+
+ if (n->lldp)
+ prioq_reshuffle(n->lldp->neighbor_by_expiry, n, &n->prioq_idx);
+}
+
+bool lldp_neighbor_equal(const sd_lldp_neighbor *a, const sd_lldp_neighbor *b) {
+ if (a == b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ if (a->raw_size != b->raw_size)
+ return false;
+
+ return memcmp(LLDP_NEIGHBOR_RAW(a), LLDP_NEIGHBOR_RAW(b), a->raw_size) == 0;
+}
+
+_public_ int sd_lldp_neighbor_get_source_address(sd_lldp_neighbor *n, struct ether_addr* address) {
+ assert_return(n, -EINVAL);
+ assert_return(address, -EINVAL);
+
+ *address = n->source_address;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_destination_address(sd_lldp_neighbor *n, struct ether_addr* address) {
+ assert_return(n, -EINVAL);
+ assert_return(address, -EINVAL);
+
+ *address = n->destination_address;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_raw(sd_lldp_neighbor *n, const void **ret, size_t *size) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ *ret = LLDP_NEIGHBOR_RAW(n);
+ *size = n->raw_size;
+
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_chassis_id(sd_lldp_neighbor *n, uint8_t *type, const void **ret, size_t *size) {
+ assert_return(n, -EINVAL);
+ assert_return(type, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ assert(n->id.chassis_id_size > 0);
+
+ *type = *(uint8_t*) n->id.chassis_id;
+ *ret = (uint8_t*) n->id.chassis_id + 1;
+ *size = n->id.chassis_id_size - 1;
+
+ return 0;
+}
+
+static int format_mac_address(const void *data, size_t sz, char **ret) {
+ struct ether_addr a;
+ char *k;
+
+ assert(data || sz <= 0);
+
+ if (sz != 7)
+ return 0;
+
+ memcpy(&a, (uint8_t*) data + 1, sizeof(a));
+
+ k = new(char, ETHER_ADDR_TO_STRING_MAX);
+ if (!k)
+ return -ENOMEM;
+
+ *ret = ether_addr_to_string(&a, k);
+ return 1;
+}
+
+static int format_network_address(const void *data, size_t sz, char **ret) {
+ union in_addr_union a;
+ int family, r;
+
+ if (sz == 6 && ((uint8_t*) data)[1] == 1) {
+ memcpy(&a.in, (uint8_t*) data + 2, sizeof(a.in));
+ family = AF_INET;
+ } else if (sz == 18 && ((uint8_t*) data)[1] == 2) {
+ memcpy(&a.in6, (uint8_t*) data + 2, sizeof(a.in6));
+ family = AF_INET6;
+ } else
+ return 0;
+
+ r = in_addr_to_string(family, &a, ret);
+ if (r < 0)
+ return r;
+ return 1;
+}
+
+_public_ int sd_lldp_neighbor_get_chassis_id_as_string(sd_lldp_neighbor *n, const char **ret) {
+ char *k;
+ int r;
+
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (n->chassis_id_as_string) {
+ *ret = n->chassis_id_as_string;
+ return 0;
+ }
+
+ assert(n->id.chassis_id_size > 0);
+
+ switch (*(uint8_t*) n->id.chassis_id) {
+
+ case SD_LLDP_CHASSIS_SUBTYPE_CHASSIS_COMPONENT:
+ case SD_LLDP_CHASSIS_SUBTYPE_INTERFACE_ALIAS:
+ case SD_LLDP_CHASSIS_SUBTYPE_PORT_COMPONENT:
+ case SD_LLDP_CHASSIS_SUBTYPE_INTERFACE_NAME:
+ case SD_LLDP_CHASSIS_SUBTYPE_LOCALLY_ASSIGNED:
+ k = cescape_length((char*) n->id.chassis_id + 1, n->id.chassis_id_size - 1);
+ if (!k)
+ return -ENOMEM;
+
+ goto done;
+
+ case SD_LLDP_CHASSIS_SUBTYPE_MAC_ADDRESS:
+ r = format_mac_address(n->id.chassis_id, n->id.chassis_id_size, &k);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto done;
+
+ break;
+
+ case SD_LLDP_CHASSIS_SUBTYPE_NETWORK_ADDRESS:
+ r = format_network_address(n->id.chassis_id, n->id.chassis_id_size, &k);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto done;
+
+ break;
+ }
+
+ /* Generic fallback */
+ k = hexmem(n->id.chassis_id, n->id.chassis_id_size);
+ if (!k)
+ return -ENOMEM;
+
+done:
+ *ret = n->chassis_id_as_string = k;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_port_id(sd_lldp_neighbor *n, uint8_t *type, const void **ret, size_t *size) {
+ assert_return(n, -EINVAL);
+ assert_return(type, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ assert(n->id.port_id_size > 0);
+
+ *type = *(uint8_t*) n->id.port_id;
+ *ret = (uint8_t*) n->id.port_id + 1;
+ *size = n->id.port_id_size - 1;
+
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_port_id_as_string(sd_lldp_neighbor *n, const char **ret) {
+ char *k;
+ int r;
+
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (n->port_id_as_string) {
+ *ret = n->port_id_as_string;
+ return 0;
+ }
+
+ assert(n->id.port_id_size > 0);
+
+ switch (*(uint8_t*) n->id.port_id) {
+
+ case SD_LLDP_PORT_SUBTYPE_INTERFACE_ALIAS:
+ case SD_LLDP_PORT_SUBTYPE_PORT_COMPONENT:
+ case SD_LLDP_PORT_SUBTYPE_INTERFACE_NAME:
+ case SD_LLDP_PORT_SUBTYPE_LOCALLY_ASSIGNED:
+ k = cescape_length((char*) n->id.port_id + 1, n->id.port_id_size - 1);
+ if (!k)
+ return -ENOMEM;
+
+ goto done;
+
+ case SD_LLDP_PORT_SUBTYPE_MAC_ADDRESS:
+ r = format_mac_address(n->id.port_id, n->id.port_id_size, &k);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto done;
+
+ break;
+
+ case SD_LLDP_PORT_SUBTYPE_NETWORK_ADDRESS:
+ r = format_network_address(n->id.port_id, n->id.port_id_size, &k);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto done;
+
+ break;
+ }
+
+ /* Generic fallback */
+ k = hexmem(n->id.port_id, n->id.port_id_size);
+ if (!k)
+ return -ENOMEM;
+
+done:
+ *ret = n->port_id_as_string = k;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_ttl(sd_lldp_neighbor *n, uint16_t *ret_sec) {
+ assert_return(n, -EINVAL);
+ assert_return(ret_sec, -EINVAL);
+
+ *ret_sec = n->ttl;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_system_name(sd_lldp_neighbor *n, const char **ret) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!n->system_name)
+ return -ENODATA;
+
+ *ret = n->system_name;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_system_description(sd_lldp_neighbor *n, const char **ret) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!n->system_description)
+ return -ENODATA;
+
+ *ret = n->system_description;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_port_description(sd_lldp_neighbor *n, const char **ret) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!n->port_description)
+ return -ENODATA;
+
+ *ret = n->port_description;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_system_capabilities(sd_lldp_neighbor *n, uint16_t *ret) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!n->has_capabilities)
+ return -ENODATA;
+
+ *ret = n->system_capabilities;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_enabled_capabilities(sd_lldp_neighbor *n, uint16_t *ret) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!n->has_capabilities)
+ return -ENODATA;
+
+ *ret = n->enabled_capabilities;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_from_raw(sd_lldp_neighbor **ret, const void *raw, size_t raw_size) {
+ _cleanup_(sd_lldp_neighbor_unrefp) sd_lldp_neighbor *n = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(raw || raw_size <= 0, -EINVAL);
+
+ n = lldp_neighbor_new(raw_size);
+ if (!n)
+ return -ENOMEM;
+
+ memcpy(LLDP_NEIGHBOR_RAW(n), raw, raw_size);
+ r = lldp_neighbor_parse(n);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(n);
+
+ return r;
+}
+
+_public_ int sd_lldp_neighbor_tlv_rewind(sd_lldp_neighbor *n) {
+ assert_return(n, -EINVAL);
+
+ assert(n->raw_size >= sizeof(struct ether_header));
+ n->rindex = sizeof(struct ether_header);
+
+ return n->rindex < n->raw_size;
+}
+
+_public_ int sd_lldp_neighbor_tlv_next(sd_lldp_neighbor *n) {
+ size_t length;
+
+ assert_return(n, -EINVAL);
+
+ if (n->rindex == n->raw_size) /* EOF */
+ return -ESPIPE;
+
+ if (n->rindex + 2 > n->raw_size) /* Truncated message */
+ return -EBADMSG;
+
+ length = LLDP_NEIGHBOR_TLV_LENGTH(n);
+ if (n->rindex + 2 + length > n->raw_size)
+ return -EBADMSG;
+
+ n->rindex += 2 + length;
+ return n->rindex < n->raw_size;
+}
+
+_public_ int sd_lldp_neighbor_tlv_get_type(sd_lldp_neighbor *n, uint8_t *type) {
+ assert_return(n, -EINVAL);
+ assert_return(type, -EINVAL);
+
+ if (n->rindex == n->raw_size) /* EOF */
+ return -ESPIPE;
+
+ if (n->rindex + 2 > n->raw_size)
+ return -EBADMSG;
+
+ *type = LLDP_NEIGHBOR_TLV_TYPE(n);
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_tlv_is_type(sd_lldp_neighbor *n, uint8_t type) {
+ uint8_t k;
+ int r;
+
+ assert_return(n, -EINVAL);
+
+ r = sd_lldp_neighbor_tlv_get_type(n, &k);
+ if (r < 0)
+ return r;
+
+ return type == k;
+}
+
+_public_ int sd_lldp_neighbor_tlv_get_oui(sd_lldp_neighbor *n, uint8_t oui[_SD_ARRAY_STATIC 3], uint8_t *subtype) {
+ const uint8_t *d;
+ size_t length;
+ int r;
+
+ assert_return(n, -EINVAL);
+ assert_return(oui, -EINVAL);
+ assert_return(subtype, -EINVAL);
+
+ r = sd_lldp_neighbor_tlv_is_type(n, SD_LLDP_TYPE_PRIVATE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENXIO;
+
+ length = LLDP_NEIGHBOR_TLV_LENGTH(n);
+ if (length < 4)
+ return -EBADMSG;
+
+ if (n->rindex + 2 + length > n->raw_size)
+ return -EBADMSG;
+
+ d = LLDP_NEIGHBOR_TLV_DATA(n);
+ memcpy(oui, d, 3);
+ *subtype = d[3];
+
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_tlv_is_oui(sd_lldp_neighbor *n, const uint8_t oui[_SD_ARRAY_STATIC 3], uint8_t subtype) {
+ uint8_t k[3], st;
+ int r;
+
+ r = sd_lldp_neighbor_tlv_get_oui(n, k, &st);
+ if (r == -ENXIO)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return memcmp(k, oui, 3) == 0 && st == subtype;
+}
+
+_public_ int sd_lldp_neighbor_tlv_get_raw(sd_lldp_neighbor *n, const void **ret, size_t *size) {
+ size_t length;
+
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ /* Note that this returns the full TLV, including the TLV header */
+
+ if (n->rindex + 2 > n->raw_size)
+ return -EBADMSG;
+
+ length = LLDP_NEIGHBOR_TLV_LENGTH(n);
+ if (n->rindex + 2 + length > n->raw_size)
+ return -EBADMSG;
+
+ *ret = (uint8_t*) LLDP_NEIGHBOR_RAW(n) + n->rindex;
+ *size = length + 2;
+
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_timestamp(sd_lldp_neighbor *n, clockid_t clock, uint64_t *ret) {
+ assert_return(n, -EINVAL);
+ assert_return(TRIPLE_TIMESTAMP_HAS_CLOCK(clock), -EOPNOTSUPP);
+ assert_return(clock_supported(clock), -EOPNOTSUPP);
+ assert_return(ret, -EINVAL);
+
+ if (!triple_timestamp_is_set(&n->timestamp))
+ return -ENODATA;
+
+ *ret = triple_timestamp_by_clock(&n->timestamp, clock);
+ return 0;
+}
diff --git a/src/libsystemd-network/lldp-neighbor.h b/src/libsystemd-network/lldp-neighbor.h
new file mode 100644
index 0000000..62dbff4
--- /dev/null
+++ b/src/libsystemd-network/lldp-neighbor.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-lldp.h"
+
+#include "hash-funcs.h"
+#include "lldp-internal.h"
+#include "time-util.h"
+
+typedef struct LLDPNeighborID {
+ /* The spec calls this an "MSAP identifier" */
+ void *chassis_id;
+ size_t chassis_id_size;
+
+ void *port_id;
+ size_t port_id_size;
+} LLDPNeighborID;
+
+struct sd_lldp_neighbor {
+ /* Neighbor objects stay around as long as they are linked into an "sd_lldp" object or n_ref > 0. */
+ sd_lldp *lldp;
+ unsigned n_ref;
+
+ triple_timestamp timestamp;
+
+ usec_t until;
+ unsigned prioq_idx;
+
+ struct ether_addr source_address;
+ struct ether_addr destination_address;
+
+ LLDPNeighborID id;
+
+ /* The raw packet size. The data is appended to the object, accessible via LLDP_NEIGHBOR_RAW() */
+ size_t raw_size;
+
+ /* The current read index for the iterative TLV interface */
+ size_t rindex;
+
+ /* And a couple of fields parsed out. */
+ bool has_ttl:1;
+ bool has_capabilities:1;
+ bool has_port_vlan_id:1;
+
+ uint16_t ttl;
+
+ uint16_t system_capabilities;
+ uint16_t enabled_capabilities;
+
+ char *port_description;
+ char *system_name;
+ char *system_description;
+
+ uint16_t port_vlan_id;
+
+ char *chassis_id_as_string;
+ char *port_id_as_string;
+};
+
+static inline void *LLDP_NEIGHBOR_RAW(const sd_lldp_neighbor *n) {
+ return (uint8_t*) n + ALIGN(sizeof(sd_lldp_neighbor));
+}
+
+static inline uint8_t LLDP_NEIGHBOR_TLV_TYPE(const sd_lldp_neighbor *n) {
+ return ((uint8_t*) LLDP_NEIGHBOR_RAW(n))[n->rindex] >> 1;
+}
+
+static inline size_t LLDP_NEIGHBOR_TLV_LENGTH(const sd_lldp_neighbor *n) {
+ uint8_t *p;
+
+ p = (uint8_t*) LLDP_NEIGHBOR_RAW(n) + n->rindex;
+ return p[1] + (((size_t) (p[0] & 1)) << 8);
+}
+
+static inline void* LLDP_NEIGHBOR_TLV_DATA(const sd_lldp_neighbor *n) {
+ return ((uint8_t*) LLDP_NEIGHBOR_RAW(n)) + n->rindex + 2;
+}
+
+extern const struct hash_ops lldp_neighbor_hash_ops;
+int lldp_neighbor_id_compare_func(const LLDPNeighborID *x, const LLDPNeighborID *y);
+int lldp_neighbor_prioq_compare_func(const void *a, const void *b);
+
+sd_lldp_neighbor *lldp_neighbor_unlink(sd_lldp_neighbor *n);
+sd_lldp_neighbor *lldp_neighbor_new(size_t raw_size);
+int lldp_neighbor_parse(sd_lldp_neighbor *n);
+void lldp_neighbor_start_ttl(sd_lldp_neighbor *n);
+bool lldp_neighbor_equal(const sd_lldp_neighbor *a, const sd_lldp_neighbor *b);
diff --git a/src/libsystemd-network/lldp-network.c b/src/libsystemd-network/lldp-network.c
new file mode 100644
index 0000000..870584c
--- /dev/null
+++ b/src/libsystemd-network/lldp-network.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <linux/filter.h>
+#include <netinet/if_ether.h>
+
+#include "fd-util.h"
+#include "lldp-network.h"
+#include "missing.h"
+#include "socket-util.h"
+
+int lldp_network_bind_raw_socket(int ifindex) {
+
+ static const struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ethhdr, h_dest)), /* A <- 4 bytes of destination MAC */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x0180c200, 1, 0), /* A != 01:80:c2:00 */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* drop packet */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ethhdr, h_dest) + 4), /* A <- remaining 2 bytes of destination MAC */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x0000, 3, 0), /* A != 00:00 */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x0003, 2, 0), /* A != 00:03 */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x000e, 1, 0), /* A != 00:0e */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* drop packet */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ethhdr, h_proto)), /* A <- protocol */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_LLDP, 1, 0), /* A != ETHERTYPE_LLDP */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* drop packet */
+ BPF_STMT(BPF_RET + BPF_K, (uint32_t) -1), /* accept packet */
+ };
+
+ static const struct sock_fprog fprog = {
+ .len = ELEMENTSOF(filter),
+ .filter = (struct sock_filter*) filter,
+ };
+
+ struct packet_mreq mreq = {
+ .mr_ifindex = ifindex,
+ .mr_type = PACKET_MR_MULTICAST,
+ .mr_alen = ETH_ALEN,
+ .mr_address = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x00 }
+ };
+
+ union sockaddr_union saddrll = {
+ .ll.sll_family = AF_PACKET,
+ .ll.sll_ifindex = ifindex,
+ };
+
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(ifindex > 0);
+
+ fd = socket(PF_PACKET, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK,
+ htobe16(ETHERTYPE_LLDP));
+ if (fd < 0)
+ return -errno;
+
+ r = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &fprog, sizeof(fprog));
+ if (r < 0)
+ return -errno;
+
+ r = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
+ if (r < 0)
+ return -errno;
+
+ mreq.mr_address[ETH_ALEN - 1] = 0x03;
+ r = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
+ if (r < 0)
+ return -errno;
+
+ mreq.mr_address[ETH_ALEN - 1] = 0x0E;
+ r = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
+ if (r < 0)
+ return -errno;
+
+ r = bind(fd, &saddrll.sa, sizeof(saddrll.ll));
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(fd);
+}
diff --git a/src/libsystemd-network/lldp-network.h b/src/libsystemd-network/lldp-network.h
new file mode 100644
index 0000000..e4ed289
--- /dev/null
+++ b/src/libsystemd-network/lldp-network.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-event.h"
+
+int lldp_network_bind_raw_socket(int ifindex);
diff --git a/src/libsystemd-network/meson.build b/src/libsystemd-network/meson.build
new file mode 100644
index 0000000..56d470f
--- /dev/null
+++ b/src/libsystemd-network/meson.build
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+sources = files('''
+ sd-dhcp-client.c
+ sd-dhcp-server.c
+ dhcp-network.c
+ dhcp-option.c
+ dhcp-packet.c
+ dhcp-internal.h
+ dhcp-server-internal.h
+ dhcp-protocol.h
+ dhcp-lease-internal.h
+ sd-dhcp-lease.c
+ sd-ipv4ll.c
+ sd-ipv4acd.c
+ arp-util.h
+ arp-util.c
+ network-internal.c
+ sd-ndisc.c
+ ndisc-internal.h
+ ndisc-router.h
+ ndisc-router.c
+ sd-radv.c
+ radv-internal.h
+ icmp6-util.h
+ icmp6-util.c
+ sd-dhcp6-client.c
+ dhcp6-internal.h
+ dhcp6-protocol.h
+ dhcp6-network.c
+ dhcp6-option.c
+ dhcp6-lease-internal.h
+ sd-dhcp6-lease.c
+ dhcp-identifier.h
+ dhcp-identifier.c
+ lldp-internal.h
+ lldp-network.h
+ lldp-network.c
+ lldp-neighbor.h
+ lldp-neighbor.c
+ sd-lldp.c
+'''.split())
+
+network_internal_h = files('network-internal.h')
+
+libsystemd_network = static_library(
+ 'systemd-network',
+ sources,
+ network_internal_h,
+ include_directories : includes)
diff --git a/src/libsystemd-network/ndisc-internal.h b/src/libsystemd-network/ndisc-internal.h
new file mode 100644
index 0000000..0c04fea
--- /dev/null
+++ b/src/libsystemd-network/ndisc-internal.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include "log.h"
+
+#include "sd-ndisc.h"
+
+#define NDISC_ROUTER_SOLICITATION_INTERVAL (4U * USEC_PER_SEC)
+#define NDISC_MAX_ROUTER_SOLICITATION_INTERVAL (3600U * USEC_PER_SEC)
+#define NDISC_MAX_ROUTER_SOLICITATIONS 3U
+
+struct sd_ndisc {
+ unsigned n_ref;
+
+ int ifindex;
+ int fd;
+
+ sd_event *event;
+ int event_priority;
+
+ struct ether_addr mac_addr;
+ uint8_t hop_limit;
+ uint32_t mtu;
+
+ sd_event_source *recv_event_source;
+ sd_event_source *timeout_event_source;
+ sd_event_source *timeout_no_ra;
+
+ usec_t retransmit_time;
+
+ sd_ndisc_callback_t callback;
+ void *userdata;
+};
+
+#define log_ndisc_errno(error, fmt, ...) log_internal(LOG_DEBUG, error, __FILE__, __LINE__, __func__, "NDISC: " fmt, ##__VA_ARGS__)
+#define log_ndisc(fmt, ...) log_ndisc_errno(0, fmt, ##__VA_ARGS__)
+
+const char* ndisc_event_to_string(sd_ndisc_event e) _const_;
+sd_ndisc_event ndisc_event_from_string(const char *s) _pure_;
diff --git a/src/libsystemd-network/ndisc-router.c b/src/libsystemd-network/ndisc-router.c
new file mode 100644
index 0000000..6935311
--- /dev/null
+++ b/src/libsystemd-network/ndisc-router.c
@@ -0,0 +1,749 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+
+#include "sd-ndisc.h"
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "hostname-util.h"
+#include "missing.h"
+#include "ndisc-internal.h"
+#include "ndisc-router.h"
+#include "strv.h"
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_ndisc_router, sd_ndisc_router, mfree);
+
+sd_ndisc_router *ndisc_router_new(size_t raw_size) {
+ sd_ndisc_router *rt;
+
+ rt = malloc0(ALIGN(sizeof(sd_ndisc_router)) + raw_size);
+ if (!rt)
+ return NULL;
+
+ rt->raw_size = raw_size;
+ rt->n_ref = 1;
+
+ return rt;
+}
+
+_public_ int sd_ndisc_router_from_raw(sd_ndisc_router **ret, const void *raw, size_t raw_size) {
+ _cleanup_(sd_ndisc_router_unrefp) sd_ndisc_router *rt = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(raw || raw_size <= 0, -EINVAL);
+
+ rt = ndisc_router_new(raw_size);
+ if (!rt)
+ return -ENOMEM;
+
+ memcpy(NDISC_ROUTER_RAW(rt), raw, raw_size);
+ r = ndisc_router_parse(rt);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(rt);
+
+ return r;
+}
+
+_public_ int sd_ndisc_router_get_address(sd_ndisc_router *rt, struct in6_addr *ret_addr) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret_addr, -EINVAL);
+
+ if (IN6_IS_ADDR_UNSPECIFIED(&rt->address))
+ return -ENODATA;
+
+ *ret_addr = rt->address;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_timestamp(sd_ndisc_router *rt, clockid_t clock, uint64_t *ret) {
+ assert_return(rt, -EINVAL);
+ assert_return(TRIPLE_TIMESTAMP_HAS_CLOCK(clock), -EOPNOTSUPP);
+ assert_return(clock_supported(clock), -EOPNOTSUPP);
+ assert_return(ret, -EINVAL);
+
+ if (!triple_timestamp_is_set(&rt->timestamp))
+ return -ENODATA;
+
+ *ret = triple_timestamp_by_clock(&rt->timestamp, clock);
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_raw(sd_ndisc_router *rt, const void **ret, size_t *size) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ *ret = NDISC_ROUTER_RAW(rt);
+ *size = rt->raw_size;
+
+ return 0;
+}
+
+int ndisc_router_parse(sd_ndisc_router *rt) {
+ struct nd_router_advert *a;
+ const uint8_t *p;
+ bool has_mtu = false, has_flag_extension = false;
+ size_t left;
+
+ assert(rt);
+
+ if (rt->raw_size < sizeof(struct nd_router_advert)) {
+ log_ndisc("Too small to be a router advertisement, ignoring.");
+ return -EBADMSG;
+ }
+
+ /* Router advertisement packets are neatly aligned to 64bit boundaries, hence we can access them directly */
+ a = NDISC_ROUTER_RAW(rt);
+
+ if (a->nd_ra_type != ND_ROUTER_ADVERT) {
+ log_ndisc("Received ND packet that is not a router advertisement, ignoring.");
+ return -EBADMSG;
+ }
+
+ if (a->nd_ra_code != 0) {
+ log_ndisc("Received ND packet with wrong RA code, ignoring.");
+ return -EBADMSG;
+ }
+
+ rt->hop_limit = a->nd_ra_curhoplimit;
+ rt->flags = a->nd_ra_flags_reserved; /* the first 8bit */
+ rt->lifetime = be16toh(a->nd_ra_router_lifetime);
+
+ rt->preference = (rt->flags >> 3) & 3;
+ if (!IN_SET(rt->preference, SD_NDISC_PREFERENCE_LOW, SD_NDISC_PREFERENCE_HIGH))
+ rt->preference = SD_NDISC_PREFERENCE_MEDIUM;
+
+ p = (const uint8_t*) NDISC_ROUTER_RAW(rt) + sizeof(struct nd_router_advert);
+ left = rt->raw_size - sizeof(struct nd_router_advert);
+
+ for (;;) {
+ uint8_t type;
+ size_t length;
+
+ if (left == 0)
+ break;
+
+ if (left < 2) {
+ log_ndisc("Option lacks header, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ type = p[0];
+ length = p[1] * 8;
+
+ if (length == 0) {
+ log_ndisc("Zero-length option, ignoring datagram.");
+ return -EBADMSG;
+ }
+ if (left < length) {
+ log_ndisc("Option truncated, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ switch (type) {
+
+ case SD_NDISC_OPTION_PREFIX_INFORMATION:
+
+ if (length != 4*8) {
+ log_ndisc("Prefix option of invalid size, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ if (p[2] > 128) {
+ log_ndisc("Bad prefix length, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ break;
+
+ case SD_NDISC_OPTION_MTU: {
+ uint32_t m;
+
+ if (has_mtu) {
+ log_ndisc("MTU option specified twice, ignoring.");
+ break;
+ }
+
+ if (length != 8) {
+ log_ndisc("MTU option of invalid size, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ m = be32toh(*(uint32_t*) (p + 4));
+ if (m >= IPV6_MIN_MTU) /* ignore invalidly small MTUs */
+ rt->mtu = m;
+
+ has_mtu = true;
+ break;
+ }
+
+ case SD_NDISC_OPTION_ROUTE_INFORMATION:
+ if (length < 1*8 || length > 3*8) {
+ log_ndisc("Route information option of invalid size, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ if (p[2] > 128) {
+ log_ndisc("Bad route prefix length, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ break;
+
+ case SD_NDISC_OPTION_RDNSS:
+ if (length < 3*8 || (length % (2*8)) != 1*8) {
+ log_ndisc("RDNSS option has invalid size.");
+ return -EBADMSG;
+ }
+
+ break;
+
+ case SD_NDISC_OPTION_FLAGS_EXTENSION:
+
+ if (has_flag_extension) {
+ log_ndisc("Flags extension option specified twice, ignoring.");
+ break;
+ }
+
+ if (length < 1*8) {
+ log_ndisc("Flags extension option has invalid size.");
+ return -EBADMSG;
+ }
+
+ /* Add in the additional flags bits */
+ rt->flags |=
+ ((uint64_t) p[2] << 8) |
+ ((uint64_t) p[3] << 16) |
+ ((uint64_t) p[4] << 24) |
+ ((uint64_t) p[5] << 32) |
+ ((uint64_t) p[6] << 40) |
+ ((uint64_t) p[7] << 48);
+
+ has_flag_extension = true;
+ break;
+
+ case SD_NDISC_OPTION_DNSSL:
+ if (length < 2*8) {
+ log_ndisc("DNSSL option has invalid size.");
+ return -EBADMSG;
+ }
+
+ break;
+ }
+
+ p += length, left -= length;
+ }
+
+ rt->rindex = sizeof(struct nd_router_advert);
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_hop_limit(sd_ndisc_router *rt, uint8_t *ret) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ *ret = rt->hop_limit;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_flags(sd_ndisc_router *rt, uint64_t *ret_flags) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret_flags, -EINVAL);
+
+ *ret_flags = rt->flags;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_lifetime(sd_ndisc_router *rt, uint16_t *ret_lifetime) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret_lifetime, -EINVAL);
+
+ *ret_lifetime = rt->lifetime;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_preference(sd_ndisc_router *rt, unsigned *ret) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ *ret = rt->preference;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_mtu(sd_ndisc_router *rt, uint32_t *ret) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (rt->mtu <= 0)
+ return -ENODATA;
+
+ *ret = rt->mtu;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_option_rewind(sd_ndisc_router *rt) {
+ assert_return(rt, -EINVAL);
+
+ assert(rt->raw_size >= sizeof(struct nd_router_advert));
+ rt->rindex = sizeof(struct nd_router_advert);
+
+ return rt->rindex < rt->raw_size;
+}
+
+_public_ int sd_ndisc_router_option_next(sd_ndisc_router *rt) {
+ size_t length;
+
+ assert_return(rt, -EINVAL);
+
+ if (rt->rindex == rt->raw_size) /* EOF */
+ return -ESPIPE;
+
+ if (rt->rindex + 2 > rt->raw_size) /* Truncated message */
+ return -EBADMSG;
+
+ length = NDISC_ROUTER_OPTION_LENGTH(rt);
+ if (rt->rindex + length > rt->raw_size)
+ return -EBADMSG;
+
+ rt->rindex += length;
+ return rt->rindex < rt->raw_size;
+}
+
+_public_ int sd_ndisc_router_option_get_type(sd_ndisc_router *rt, uint8_t *ret) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (rt->rindex == rt->raw_size) /* EOF */
+ return -ESPIPE;
+
+ if (rt->rindex + 2 > rt->raw_size) /* Truncated message */
+ return -EBADMSG;
+
+ *ret = NDISC_ROUTER_OPTION_TYPE(rt);
+ return 0;
+}
+
+_public_ int sd_ndisc_router_option_is_type(sd_ndisc_router *rt, uint8_t type) {
+ uint8_t k;
+ int r;
+
+ assert_return(rt, -EINVAL);
+
+ r = sd_ndisc_router_option_get_type(rt, &k);
+ if (r < 0)
+ return r;
+
+ return type == k;
+}
+
+_public_ int sd_ndisc_router_option_get_raw(sd_ndisc_router *rt, const void **ret, size_t *size) {
+ size_t length;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ /* Note that this returns the full option, including the option header */
+
+ if (rt->rindex + 2 > rt->raw_size)
+ return -EBADMSG;
+
+ length = NDISC_ROUTER_OPTION_LENGTH(rt);
+ if (rt->rindex + length > rt->raw_size)
+ return -EBADMSG;
+
+ *ret = (uint8_t*) NDISC_ROUTER_RAW(rt) + rt->rindex;
+ *size = length;
+
+ return 0;
+}
+
+static int get_prefix_info(sd_ndisc_router *rt, struct nd_opt_prefix_info **ret) {
+ struct nd_opt_prefix_info *ri;
+ size_t length;
+ int r;
+
+ assert(rt);
+ assert(ret);
+
+ r = sd_ndisc_router_option_is_type(rt, SD_NDISC_OPTION_PREFIX_INFORMATION);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EMEDIUMTYPE;
+
+ length = NDISC_ROUTER_OPTION_LENGTH(rt);
+ if (length != sizeof(struct nd_opt_prefix_info))
+ return -EBADMSG;
+
+ ri = (struct nd_opt_prefix_info*) ((uint8_t*) NDISC_ROUTER_RAW(rt) + rt->rindex);
+ if (ri->nd_opt_pi_prefix_len > 128)
+ return -EBADMSG;
+
+ *ret = ri;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_prefix_get_valid_lifetime(sd_ndisc_router *rt, uint32_t *ret) {
+ struct nd_opt_prefix_info *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_prefix_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret = be32toh(ri->nd_opt_pi_valid_time);
+ return 0;
+}
+
+_public_ int sd_ndisc_router_prefix_get_preferred_lifetime(sd_ndisc_router *rt, uint32_t *ret) {
+ struct nd_opt_prefix_info *pi;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_prefix_info(rt, &pi);
+ if (r < 0)
+ return r;
+
+ *ret = be32toh(pi->nd_opt_pi_preferred_time);
+ return 0;
+}
+
+_public_ int sd_ndisc_router_prefix_get_flags(sd_ndisc_router *rt, uint8_t *ret) {
+ struct nd_opt_prefix_info *pi;
+ uint8_t flags;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_prefix_info(rt, &pi);
+ if (r < 0)
+ return r;
+
+ flags = pi->nd_opt_pi_flags_reserved;
+
+ if ((flags & ND_OPT_PI_FLAG_AUTO) && (pi->nd_opt_pi_prefix_len != 64)) {
+ log_ndisc("Invalid prefix length, ignoring prefix for stateless autoconfiguration.");
+ flags &= ~ND_OPT_PI_FLAG_AUTO;
+ }
+
+ *ret = flags;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_prefix_get_address(sd_ndisc_router *rt, struct in6_addr *ret_addr) {
+ struct nd_opt_prefix_info *pi;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret_addr, -EINVAL);
+
+ r = get_prefix_info(rt, &pi);
+ if (r < 0)
+ return r;
+
+ *ret_addr = pi->nd_opt_pi_prefix;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_prefix_get_prefixlen(sd_ndisc_router *rt, unsigned *ret) {
+ struct nd_opt_prefix_info *pi;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_prefix_info(rt, &pi);
+ if (r < 0)
+ return r;
+
+ if (pi->nd_opt_pi_prefix_len > 128)
+ return -EBADMSG;
+
+ *ret = pi->nd_opt_pi_prefix_len;
+ return 0;
+}
+
+static int get_route_info(sd_ndisc_router *rt, uint8_t **ret) {
+ uint8_t *ri;
+ size_t length;
+ int r;
+
+ assert(rt);
+ assert(ret);
+
+ r = sd_ndisc_router_option_is_type(rt, SD_NDISC_OPTION_ROUTE_INFORMATION);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EMEDIUMTYPE;
+
+ length = NDISC_ROUTER_OPTION_LENGTH(rt);
+ if (length < 1*8 || length > 3*8)
+ return -EBADMSG;
+
+ ri = (uint8_t*) NDISC_ROUTER_RAW(rt) + rt->rindex;
+
+ if (ri[2] > 128)
+ return -EBADMSG;
+
+ *ret = ri;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_route_get_lifetime(sd_ndisc_router *rt, uint32_t *ret) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_route_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret = be32toh(*(uint32_t*) (ri + 4));
+ return 0;
+}
+
+_public_ int sd_ndisc_router_route_get_address(sd_ndisc_router *rt, struct in6_addr *ret_addr) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret_addr, -EINVAL);
+
+ r = get_route_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ zero(*ret_addr);
+ memcpy(ret_addr, ri + 8, NDISC_ROUTER_OPTION_LENGTH(rt) - 8);
+
+ return 0;
+}
+
+_public_ int sd_ndisc_router_route_get_prefixlen(sd_ndisc_router *rt, unsigned *ret) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_route_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret = ri[2];
+ return 0;
+}
+
+_public_ int sd_ndisc_router_route_get_preference(sd_ndisc_router *rt, unsigned *ret) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_route_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret = (ri[3] >> 3) & 3;
+ if (!IN_SET(*ret, SD_NDISC_PREFERENCE_LOW, SD_NDISC_PREFERENCE_HIGH))
+ *ret = SD_NDISC_PREFERENCE_MEDIUM;
+
+ return 0;
+}
+
+static int get_rdnss_info(sd_ndisc_router *rt, uint8_t **ret) {
+ size_t length;
+ int r;
+
+ assert(rt);
+ assert(ret);
+
+ r = sd_ndisc_router_option_is_type(rt, SD_NDISC_OPTION_RDNSS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EMEDIUMTYPE;
+
+ length = NDISC_ROUTER_OPTION_LENGTH(rt);
+ if (length < 3*8 || (length % (2*8)) != 1*8)
+ return -EBADMSG;
+
+ *ret = (uint8_t*) NDISC_ROUTER_RAW(rt) + rt->rindex;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_rdnss_get_addresses(sd_ndisc_router *rt, const struct in6_addr **ret) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_rdnss_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret = (const struct in6_addr*) (ri + 8);
+ return (NDISC_ROUTER_OPTION_LENGTH(rt) - 8) / 16;
+}
+
+_public_ int sd_ndisc_router_rdnss_get_lifetime(sd_ndisc_router *rt, uint32_t *ret) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_rdnss_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret = be32toh(*(uint32_t*) (ri + 4));
+ return 0;
+}
+
+static int get_dnssl_info(sd_ndisc_router *rt, uint8_t **ret) {
+ size_t length;
+ int r;
+
+ assert(rt);
+ assert(ret);
+
+ r = sd_ndisc_router_option_is_type(rt, SD_NDISC_OPTION_DNSSL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EMEDIUMTYPE;
+
+ length = NDISC_ROUTER_OPTION_LENGTH(rt);
+ if (length < 2*8)
+ return -EBADMSG;
+
+ *ret = (uint8_t*) NDISC_ROUTER_RAW(rt) + rt->rindex;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_dnssl_get_domains(sd_ndisc_router *rt, char ***ret) {
+ _cleanup_strv_free_ char **l = NULL;
+ _cleanup_free_ char *e = NULL;
+ size_t allocated = 0, n = 0, left;
+ uint8_t *ri, *p;
+ bool first = true;
+ int r;
+ unsigned k = 0;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_dnssl_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ p = ri + 8;
+ left = NDISC_ROUTER_OPTION_LENGTH(rt) - 8;
+
+ for (;;) {
+ if (left == 0) {
+
+ if (n > 0) /* Not properly NUL terminated */
+ return -EBADMSG;
+
+ break;
+ }
+
+ if (*p == 0) {
+ /* Found NUL termination */
+
+ if (n > 0) {
+ _cleanup_free_ char *normalized = NULL;
+
+ e[n] = 0;
+ r = dns_name_normalize(e, 0, &normalized);
+ if (r < 0)
+ return r;
+
+ /* Ignore the root domain name or "localhost" and friends */
+ if (!is_localhost(normalized) &&
+ !dns_name_is_root(normalized)) {
+
+ if (strv_push(&l, normalized) < 0)
+ return -ENOMEM;
+
+ normalized = NULL;
+ k++;
+ }
+ }
+
+ n = 0;
+ first = true;
+ p++, left--;
+ continue;
+ }
+
+ /* Check for compression (which is not allowed) */
+ if (*p > 63)
+ return -EBADMSG;
+
+ if (1U + *p + 1U > left)
+ return -EBADMSG;
+
+ if (!GREEDY_REALLOC(e, allocated, n + !first + DNS_LABEL_ESCAPED_MAX + 1U))
+ return -ENOMEM;
+
+ if (first)
+ first = false;
+ else
+ e[n++] = '.';
+
+ r = dns_label_escape((char*) p+1, *p, e + n, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ n += r;
+
+ left -= 1 + *p;
+ p += 1 + *p;
+ }
+
+ if (strv_isempty(l)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ *ret = TAKE_PTR(l);
+
+ return k;
+}
+
+_public_ int sd_ndisc_router_dnssl_get_lifetime(sd_ndisc_router *rt, uint32_t *ret_sec) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret_sec, -EINVAL);
+
+ r = get_dnssl_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret_sec = be32toh(*(uint32_t*) (ri + 4));
+ return 0;
+}
diff --git a/src/libsystemd-network/ndisc-router.h b/src/libsystemd-network/ndisc-router.h
new file mode 100644
index 0000000..2e2c1af
--- /dev/null
+++ b/src/libsystemd-network/ndisc-router.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include "sd-ndisc.h"
+
+#include "time-util.h"
+
+struct sd_ndisc_router {
+ unsigned n_ref;
+
+ triple_timestamp timestamp;
+ struct in6_addr address;
+
+ /* The raw packet size. The data is appended to the object, accessible via NDIS_ROUTER_RAW() */
+ size_t raw_size;
+
+ /* The current read index for the iterative option interface */
+ size_t rindex;
+
+ uint64_t flags;
+ unsigned preference;
+ uint16_t lifetime;
+
+ uint8_t hop_limit;
+ uint32_t mtu;
+};
+
+static inline void* NDISC_ROUTER_RAW(const sd_ndisc_router *rt) {
+ return (uint8_t*) rt + ALIGN(sizeof(sd_ndisc_router));
+}
+
+static inline void *NDISC_ROUTER_OPTION_DATA(const sd_ndisc_router *rt) {
+ return ((uint8_t*) NDISC_ROUTER_RAW(rt)) + rt->rindex;
+}
+
+static inline uint8_t NDISC_ROUTER_OPTION_TYPE(const sd_ndisc_router *rt) {
+ return ((uint8_t*) NDISC_ROUTER_OPTION_DATA(rt))[0];
+}
+static inline size_t NDISC_ROUTER_OPTION_LENGTH(const sd_ndisc_router *rt) {
+ return ((uint8_t*) NDISC_ROUTER_OPTION_DATA(rt))[1] * 8;
+}
+
+sd_ndisc_router *ndisc_router_new(size_t raw_size);
+int ndisc_router_parse(sd_ndisc_router *rt);
diff --git a/src/libsystemd-network/network-internal.c b/src/libsystemd-network/network-internal.c
new file mode 100644
index 0000000..0348e7f
--- /dev/null
+++ b/src/libsystemd-network/network-internal.c
@@ -0,0 +1,620 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <netinet/ether.h>
+
+#include "sd-id128.h"
+#include "sd-ndisc.h"
+
+#include "alloc-util.h"
+#include "condition.h"
+#include "conf-parser.h"
+#include "dhcp-lease-internal.h"
+#include "ether-addr-util.h"
+#include "hexdecoct.h"
+#include "log.h"
+#include "network-internal.h"
+#include "parse-util.h"
+#include "siphash24.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+#include "util.h"
+
+const char *net_get_name(sd_device *device) {
+ const char *name, *field;
+
+ assert(device);
+
+ /* fetch some persistent data unique (on this machine) to this device */
+ FOREACH_STRING(field, "ID_NET_NAME_ONBOARD", "ID_NET_NAME_SLOT", "ID_NET_NAME_PATH", "ID_NET_NAME_MAC")
+ if (sd_device_get_property_value(device, field, &name) >= 0)
+ return name;
+
+ return NULL;
+}
+
+#define HASH_KEY SD_ID128_MAKE(d3,1e,48,fa,90,fe,4b,4c,9d,af,d5,d7,a1,b1,2e,8a)
+
+int net_get_unique_predictable_data(sd_device *device, uint64_t *result) {
+ size_t l, sz = 0;
+ const char *name = NULL;
+ int r;
+ uint8_t *v;
+
+ assert(device);
+
+ name = net_get_name(device);
+ if (!name)
+ return -ENOENT;
+
+ l = strlen(name);
+ sz = sizeof(sd_id128_t) + l;
+ v = newa(uint8_t, sz);
+
+ /* fetch some persistent data unique to this machine */
+ r = sd_id128_get_machine((sd_id128_t*) v);
+ if (r < 0)
+ return r;
+ memcpy(v + sizeof(sd_id128_t), name, l);
+
+ /* Let's hash the machine ID plus the device name. We
+ * use a fixed, but originally randomly created hash
+ * key here. */
+ *result = htole64(siphash24(v, sz, HASH_KEY.bytes));
+
+ return 0;
+}
+
+static bool net_condition_test_strv(char * const *raw_patterns,
+ const char *string) {
+ if (strv_isempty(raw_patterns))
+ return true;
+
+ /* If the patterns begin with "!", edit it out and negate the test. */
+ if (raw_patterns[0][0] == '!') {
+ char **patterns;
+ size_t i, length;
+
+ length = strv_length(raw_patterns) + 1; /* Include the NULL. */
+ patterns = newa(char*, length);
+ patterns[0] = raw_patterns[0] + 1; /* Skip the "!". */
+ for (i = 1; i < length; i++)
+ patterns[i] = raw_patterns[i];
+
+ return !string || !strv_fnmatch(patterns, string, 0);
+ }
+
+ return string && strv_fnmatch(raw_patterns, string, 0);
+}
+
+bool net_match_config(Set *match_mac,
+ char * const *match_paths,
+ char * const *match_drivers,
+ char * const *match_types,
+ char * const *match_names,
+ Condition *match_host,
+ Condition *match_virt,
+ Condition *match_kernel_cmdline,
+ Condition *match_kernel_version,
+ Condition *match_arch,
+ const struct ether_addr *dev_mac,
+ const char *dev_path,
+ const char *dev_parent_driver,
+ const char *dev_driver,
+ const char *dev_type,
+ const char *dev_name) {
+
+ if (match_host && condition_test(match_host) <= 0)
+ return false;
+
+ if (match_virt && condition_test(match_virt) <= 0)
+ return false;
+
+ if (match_kernel_cmdline && condition_test(match_kernel_cmdline) <= 0)
+ return false;
+
+ if (match_kernel_version && condition_test(match_kernel_version) <= 0)
+ return false;
+
+ if (match_arch && condition_test(match_arch) <= 0)
+ return false;
+
+ if (match_mac && (!dev_mac || !set_contains(match_mac, dev_mac)))
+ return false;
+
+ if (!net_condition_test_strv(match_paths, dev_path))
+ return false;
+
+ if (!net_condition_test_strv(match_drivers, dev_driver))
+ return false;
+
+ if (!net_condition_test_strv(match_types, dev_type))
+ return false;
+
+ if (!net_condition_test_strv(match_names, dev_name))
+ return false;
+
+ return true;
+}
+
+int config_parse_net_condition(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ConditionType cond = ltype;
+ Condition **ret = data;
+ bool negate;
+ Condition *c;
+ _cleanup_free_ char *s = NULL;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ negate = rvalue[0] == '!';
+ if (negate)
+ rvalue++;
+
+ s = strdup(rvalue);
+ if (!s)
+ return log_oom();
+
+ c = condition_new(cond, s, false, negate);
+ if (!c)
+ return log_oom();
+
+ if (*ret)
+ condition_free(*ret);
+
+ *ret = c;
+ return 0;
+}
+
+int config_parse_ifnames(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***sv = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&rvalue, &word, NULL, 0);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse interface name list: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ break;
+
+ if (!ifname_valid(word)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Interface name is not valid or too long, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = strv_push(sv, word);
+ if (r < 0)
+ return log_oom();
+
+ word = NULL;
+ }
+
+ return 0;
+}
+
+int config_parse_ifalias(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+ _cleanup_free_ char *n = NULL;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ n = strdup(rvalue);
+ if (!n)
+ return log_oom();
+
+ if (!ascii_is_valid(n) || strlen(n) >= IFALIASZ) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Interface alias is not ASCII clean or is too long, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ if (isempty(n))
+ *s = mfree(*s);
+ else
+ free_and_replace(*s, n);
+
+ return 0;
+}
+
+int config_parse_hwaddr(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ struct ether_addr *n = NULL;
+ struct ether_addr **hwaddr = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ n = new0(struct ether_addr, 1);
+ if (!n)
+ return log_oom();
+
+ r = ether_addr_from_string(rvalue, n);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Not a valid MAC address, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ free_and_replace(*hwaddr, n);
+
+ return 0;
+}
+
+int config_parse_hwaddrs(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_set_free_free_ Set *s = NULL;
+ const char *p = rvalue;
+ Set **hwaddrs = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *hwaddrs = set_free_free(*hwaddrs);
+ return 0;
+ }
+
+ s = set_new(&ether_addr_hash_ops);
+ if (!s)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ _cleanup_free_ struct ether_addr *n = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n = new(struct ether_addr, 1);
+ if (!n)
+ return log_oom();
+
+ r = ether_addr_from_string(word, n);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Not a valid MAC address, ignoring: %s", word);
+ continue;
+ }
+
+ r = set_put(s, n);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ n = NULL; /* avoid cleanup */
+ }
+
+ r = set_ensure_allocated(hwaddrs, &ether_addr_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = set_move(*hwaddrs, s);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_bridge_port_priority(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t i;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou16(rvalue, &i);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse bridge port priority, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (i > LINK_BRIDGE_PORT_PRIORITY_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Bridge port priority is larger than maximum %u, ignoring: %s", LINK_BRIDGE_PORT_PRIORITY_MAX, rvalue);
+ return 0;
+ }
+
+ *((uint16_t *)data) = i;
+
+ return 0;
+}
+
+void serialize_in_addrs(FILE *f, const struct in_addr *addresses, size_t size) {
+ unsigned i;
+
+ assert(f);
+ assert(addresses);
+ assert(size);
+
+ for (i = 0; i < size; i++)
+ fprintf(f, "%s%s", inet_ntoa(addresses[i]),
+ (i < (size - 1)) ? " ": "");
+}
+
+int deserialize_in_addrs(struct in_addr **ret, const char *string) {
+ _cleanup_free_ struct in_addr *addresses = NULL;
+ int size = 0;
+
+ assert(ret);
+ assert(string);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ struct in_addr *new_addresses;
+ int r;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ new_addresses = reallocarray(addresses, size + 1, sizeof(struct in_addr));
+ if (!new_addresses)
+ return -ENOMEM;
+ else
+ addresses = new_addresses;
+
+ r = inet_pton(AF_INET, word, &(addresses[size]));
+ if (r <= 0)
+ continue;
+
+ size++;
+ }
+
+ *ret = TAKE_PTR(addresses);
+
+ return size;
+}
+
+void serialize_in6_addrs(FILE *f, const struct in6_addr *addresses, size_t size) {
+ unsigned i;
+
+ assert(f);
+ assert(addresses);
+ assert(size);
+
+ for (i = 0; i < size; i++) {
+ char buffer[INET6_ADDRSTRLEN];
+
+ fputs(inet_ntop(AF_INET6, addresses+i, buffer, sizeof(buffer)), f);
+
+ if (i < size - 1)
+ fputc(' ', f);
+ }
+}
+
+int deserialize_in6_addrs(struct in6_addr **ret, const char *string) {
+ _cleanup_free_ struct in6_addr *addresses = NULL;
+ int size = 0;
+
+ assert(ret);
+ assert(string);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ struct in6_addr *new_addresses;
+ int r;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ new_addresses = reallocarray(addresses, size + 1, sizeof(struct in6_addr));
+ if (!new_addresses)
+ return -ENOMEM;
+ else
+ addresses = new_addresses;
+
+ r = inet_pton(AF_INET6, word, &(addresses[size]));
+ if (r <= 0)
+ continue;
+
+ size++;
+ }
+
+ *ret = TAKE_PTR(addresses);
+
+ return size;
+}
+
+void serialize_dhcp_routes(FILE *f, const char *key, sd_dhcp_route **routes, size_t size) {
+ unsigned i;
+
+ assert(f);
+ assert(key);
+ assert(routes);
+ assert(size);
+
+ fprintf(f, "%s=", key);
+
+ for (i = 0; i < size; i++) {
+ struct in_addr dest, gw;
+ uint8_t length;
+
+ assert_se(sd_dhcp_route_get_destination(routes[i], &dest) >= 0);
+ assert_se(sd_dhcp_route_get_gateway(routes[i], &gw) >= 0);
+ assert_se(sd_dhcp_route_get_destination_prefix_length(routes[i], &length) >= 0);
+
+ fprintf(f, "%s/%" PRIu8, inet_ntoa(dest), length);
+ fprintf(f, ",%s%s", inet_ntoa(gw), (i < (size - 1)) ? " ": "");
+ }
+
+ fputs("\n", f);
+}
+
+int deserialize_dhcp_routes(struct sd_dhcp_route **ret, size_t *ret_size, size_t *ret_allocated, const char *string) {
+ _cleanup_free_ struct sd_dhcp_route *routes = NULL;
+ size_t size = 0, allocated = 0;
+
+ assert(ret);
+ assert(ret_size);
+ assert(ret_allocated);
+ assert(string);
+
+ /* WORD FORMAT: dst_ip/dst_prefixlen,gw_ip */
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ char *tok, *tok_end;
+ unsigned n;
+ int r;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!GREEDY_REALLOC(routes, allocated, size + 1))
+ return -ENOMEM;
+
+ tok = word;
+
+ /* get the subnet */
+ tok_end = strchr(tok, '/');
+ if (!tok_end)
+ continue;
+ *tok_end = '\0';
+
+ r = inet_aton(tok, &routes[size].dst_addr);
+ if (r == 0)
+ continue;
+
+ tok = tok_end + 1;
+
+ /* get the prefixlen */
+ tok_end = strchr(tok, ',');
+ if (!tok_end)
+ continue;
+
+ *tok_end = '\0';
+
+ r = safe_atou(tok, &n);
+ if (r < 0 || n > 32)
+ continue;
+
+ routes[size].dst_prefixlen = (uint8_t) n;
+ tok = tok_end + 1;
+
+ /* get the gateway */
+ r = inet_aton(tok, &routes[size].gw_addr);
+ if (r == 0)
+ continue;
+
+ size++;
+ }
+
+ *ret_size = size;
+ *ret_allocated = allocated;
+ *ret = TAKE_PTR(routes);
+
+ return 0;
+}
+
+int serialize_dhcp_option(FILE *f, const char *key, const void *data, size_t size) {
+ _cleanup_free_ char *hex_buf = NULL;
+
+ assert(f);
+ assert(key);
+ assert(data);
+
+ hex_buf = hexmem(data, size);
+ if (hex_buf == NULL)
+ return -ENOMEM;
+
+ fprintf(f, "%s=%s\n", key, hex_buf);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/network-internal.h b/src/libsystemd-network/network-internal.h
new file mode 100644
index 0000000..0c8da84
--- /dev/null
+++ b/src/libsystemd-network/network-internal.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-device.h"
+#include "sd-dhcp-lease.h"
+
+#include "condition.h"
+#include "conf-parser.h"
+#include "set.h"
+
+#define LINK_BRIDGE_PORT_PRIORITY_INVALID 128
+#define LINK_BRIDGE_PORT_PRIORITY_MAX 63
+
+bool net_match_config(Set *match_mac,
+ char * const *match_path,
+ char * const *match_driver,
+ char * const *match_type,
+ char * const *match_name,
+ Condition *match_host,
+ Condition *match_virt,
+ Condition *match_kernel_cmdline,
+ Condition *match_kernel_version,
+ Condition *match_arch,
+ const struct ether_addr *dev_mac,
+ const char *dev_path,
+ const char *dev_parent_driver,
+ const char *dev_driver,
+ const char *dev_type,
+ const char *dev_name);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_net_condition);
+CONFIG_PARSER_PROTOTYPE(config_parse_hwaddr);
+CONFIG_PARSER_PROTOTYPE(config_parse_hwaddrs);
+CONFIG_PARSER_PROTOTYPE(config_parse_ifnames);
+CONFIG_PARSER_PROTOTYPE(config_parse_ifalias);
+CONFIG_PARSER_PROTOTYPE(config_parse_bridge_port_priority);
+
+int net_get_unique_predictable_data(sd_device *device, uint64_t *result);
+const char *net_get_name(sd_device *device);
+
+void serialize_in_addrs(FILE *f, const struct in_addr *addresses, size_t size);
+int deserialize_in_addrs(struct in_addr **addresses, const char *string);
+void serialize_in6_addrs(FILE *f, const struct in6_addr *addresses,
+ size_t size);
+int deserialize_in6_addrs(struct in6_addr **addresses, const char *string);
+
+/* don't include "dhcp-lease-internal.h" as it causes conflicts between netinet/ip.h and linux/ip.h */
+struct sd_dhcp_route;
+
+void serialize_dhcp_routes(FILE *f, const char *key, sd_dhcp_route **routes, size_t size);
+int deserialize_dhcp_routes(struct sd_dhcp_route **ret, size_t *ret_size, size_t *ret_allocated, const char *string);
+
+/* It is not necessary to add deserialize_dhcp_option(). Use unhexmem() instead. */
+int serialize_dhcp_option(FILE *f, const char *key, const void *data, size_t size);
diff --git a/src/libsystemd-network/radv-internal.h b/src/libsystemd-network/radv-internal.h
new file mode 100644
index 0000000..66f49ed
--- /dev/null
+++ b/src/libsystemd-network/radv-internal.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2017 Intel Corporation. All rights reserved.
+***/
+
+#include "sd-radv.h"
+
+#include "log.h"
+#include "list.h"
+#include "sparse-endian.h"
+
+assert_cc(SD_RADV_DEFAULT_MIN_TIMEOUT_USEC <= SD_RADV_DEFAULT_MAX_TIMEOUT_USEC)
+
+#define SD_RADV_MAX_INITIAL_RTR_ADVERT_INTERVAL_USEC (16*USEC_PER_SEC)
+#define SD_RADV_MAX_INITIAL_RTR_ADVERTISEMENTS 3
+#define SD_RADV_MAX_FINAL_RTR_ADVERTISEMENTS 3
+#define SD_RADV_MIN_DELAY_BETWEEN_RAS 3
+#define SD_RADV_MAX_RA_DELAY_TIME_USEC (500*USEC_PER_MSEC)
+
+#define SD_RADV_OPT_RDNSS 25
+#define SD_RADV_OPT_DNSSL 31
+
+enum RAdvState {
+ SD_RADV_STATE_IDLE = 0,
+ SD_RADV_STATE_ADVERTISING = 1,
+};
+typedef enum RAdvState RAdvState;
+
+struct sd_radv_opt_dns {
+ uint8_t type;
+ uint8_t length;
+ uint16_t reserved;
+ be32_t lifetime;
+} _packed_;
+
+struct sd_radv {
+ unsigned n_ref;
+ RAdvState state;
+
+ int ifindex;
+
+ sd_event *event;
+ int event_priority;
+
+ struct ether_addr mac_addr;
+ uint8_t hop_limit;
+ uint8_t flags;
+ uint32_t mtu;
+ uint16_t lifetime;
+
+ int fd;
+ unsigned ra_sent;
+ sd_event_source *recv_event_source;
+ sd_event_source *timeout_event_source;
+
+ unsigned n_prefixes;
+ LIST_HEAD(sd_radv_prefix, prefixes);
+
+ size_t n_rdnss;
+ struct sd_radv_opt_dns *rdnss;
+ struct sd_radv_opt_dns *dnssl;
+};
+
+#define radv_prefix_opt__contents { \
+ uint8_t type; \
+ uint8_t length; \
+ uint8_t prefixlen; \
+ uint8_t flags; \
+ be32_t valid_lifetime; \
+ be32_t preferred_lifetime; \
+ uint32_t reserved; \
+ struct in6_addr in6_addr; \
+}
+
+struct radv_prefix_opt radv_prefix_opt__contents;
+
+/* We need the opt substructure to be packed, because we use it in send(). But
+ * if we use _packed_, this means that the structure cannot be used directly in
+ * normal code in general, because the fields might not be properly aligned.
+ * But in this particular case, the structure is defined in a way that gives
+ * proper alignment, even without the explicit _packed_ attribute. To appease
+ * the compiler we use the "unpacked" structure, but we also verify that
+ * structure contains no holes, so offsets are the same when _packed_ is used.
+ */
+struct radv_prefix_opt__packed radv_prefix_opt__contents _packed_;
+assert_cc(sizeof(struct radv_prefix_opt) == sizeof(struct radv_prefix_opt__packed));
+
+struct sd_radv_prefix {
+ unsigned n_ref;
+
+ struct radv_prefix_opt opt;
+
+ LIST_FIELDS(struct sd_radv_prefix, prefix);
+
+ usec_t valid_until;
+ usec_t preferred_until;
+};
+
+#define log_radv_full(level, error, fmt, ...) log_internal(level, error, __FILE__, __LINE__, __func__, "RADV: " fmt, ##__VA_ARGS__)
+#define log_radv_errno(error, fmt, ...) log_radv_full(LOG_DEBUG, error, fmt, ##__VA_ARGS__)
+#define log_radv(fmt, ...) log_radv_errno(0, fmt, ##__VA_ARGS__)
diff --git a/src/libsystemd-network/sd-dhcp-client.c b/src/libsystemd-network/sd-dhcp-client.c
new file mode 100644
index 0000000..35fc88e
--- /dev/null
+++ b/src/libsystemd-network/sd-dhcp-client.c
@@ -0,0 +1,1945 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <net/ethernet.h>
+#include <net/if_arp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/if_infiniband.h>
+
+#include "sd-dhcp-client.h"
+
+#include "alloc-util.h"
+#include "async.h"
+#include "dhcp-identifier.h"
+#include "dhcp-internal.h"
+#include "dhcp-lease-internal.h"
+#include "dhcp-protocol.h"
+#include "dns-domain.h"
+#include "event-util.h"
+#include "hostname-util.h"
+#include "io-util.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+#define MAX_CLIENT_ID_LEN (sizeof(uint32_t) + MAX_DUID_LEN) /* Arbitrary limit */
+#define MAX_MAC_ADDR_LEN CONST_MAX(INFINIBAND_ALEN, ETH_ALEN)
+
+#define RESTART_AFTER_NAK_MIN_USEC (1 * USEC_PER_SEC)
+#define RESTART_AFTER_NAK_MAX_USEC (30 * USEC_PER_MINUTE)
+
+struct sd_dhcp_client {
+ unsigned n_ref;
+
+ DHCPState state;
+ sd_event *event;
+ int event_priority;
+ sd_event_source *timeout_resend;
+ int ifindex;
+ int fd;
+ uint16_t port;
+ union sockaddr_union link;
+ sd_event_source *receive_message;
+ bool request_broadcast;
+ uint8_t *req_opts;
+ size_t req_opts_allocated;
+ size_t req_opts_size;
+ bool anonymize;
+ be32_t last_addr;
+ uint8_t mac_addr[MAX_MAC_ADDR_LEN];
+ size_t mac_addr_len;
+ uint16_t arp_type;
+ struct {
+ uint8_t type;
+ union {
+ struct {
+ /* 0: Generic (non-LL) (RFC 2132) */
+ uint8_t data[MAX_CLIENT_ID_LEN];
+ } _packed_ gen;
+ struct {
+ /* 1: Ethernet Link-Layer (RFC 2132) */
+ uint8_t haddr[ETH_ALEN];
+ } _packed_ eth;
+ struct {
+ /* 2 - 254: ARP/Link-Layer (RFC 2132) */
+ uint8_t haddr[0];
+ } _packed_ ll;
+ struct {
+ /* 255: Node-specific (RFC 4361) */
+ be32_t iaid;
+ struct duid duid;
+ } _packed_ ns;
+ struct {
+ uint8_t data[MAX_CLIENT_ID_LEN];
+ } _packed_ raw;
+ };
+ } _packed_ client_id;
+ size_t client_id_len;
+ char *hostname;
+ char *vendor_class_identifier;
+ char **user_class;
+ uint32_t mtu;
+ uint32_t xid;
+ usec_t start_time;
+ unsigned attempt;
+ usec_t request_sent;
+ sd_event_source *timeout_t1;
+ sd_event_source *timeout_t2;
+ sd_event_source *timeout_expire;
+ sd_dhcp_client_callback_t callback;
+ void *userdata;
+ sd_dhcp_lease *lease;
+ usec_t start_delay;
+};
+
+static const uint8_t default_req_opts[] = {
+ SD_DHCP_OPTION_SUBNET_MASK,
+ SD_DHCP_OPTION_ROUTER,
+ SD_DHCP_OPTION_HOST_NAME,
+ SD_DHCP_OPTION_DOMAIN_NAME,
+ SD_DHCP_OPTION_DOMAIN_NAME_SERVER,
+};
+
+/* RFC7844 section 3:
+ MAY contain the Parameter Request List option.
+ RFC7844 section 3.6:
+ The client intending to protect its privacy SHOULD only request a
+ minimal number of options in the PRL and SHOULD also randomly shuffle
+ the ordering of option codes in the PRL. If this random ordering
+ cannot be implemented, the client MAY order the option codes in the
+ PRL by option code number (lowest to highest).
+*/
+/* NOTE: using PRL options that Windows 10 RFC7844 implementation uses */
+static const uint8_t default_req_opts_anonymize[] = {
+ SD_DHCP_OPTION_SUBNET_MASK, /* 1 */
+ SD_DHCP_OPTION_ROUTER, /* 3 */
+ SD_DHCP_OPTION_DOMAIN_NAME_SERVER, /* 6 */
+ SD_DHCP_OPTION_DOMAIN_NAME, /* 15 */
+ SD_DHCP_OPTION_ROUTER_DISCOVER, /* 31 */
+ SD_DHCP_OPTION_STATIC_ROUTE, /* 33 */
+ SD_DHCP_OPTION_VENDOR_SPECIFIC, /* 43 */
+ SD_DHCP_OPTION_NETBIOS_NAMESERVER, /* 44 */
+ SD_DHCP_OPTION_NETBIOS_NODETYPE, /* 46 */
+ SD_DHCP_OPTION_NETBIOS_SCOPE, /* 47 */
+ SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE, /* 121 */
+ SD_DHCP_OPTION_PRIVATE_CLASSLESS_STATIC_ROUTE, /* 249 */
+ SD_DHCP_OPTION_PRIVATE_PROXY_AUTODISCOVERY, /* 252 */
+};
+
+static int client_receive_message_raw(
+ sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata);
+static int client_receive_message_udp(
+ sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata);
+static void client_stop(sd_dhcp_client *client, int error);
+
+int sd_dhcp_client_set_callback(
+ sd_dhcp_client *client,
+ sd_dhcp_client_callback_t cb,
+ void *userdata) {
+
+ assert_return(client, -EINVAL);
+
+ client->callback = cb;
+ client->userdata = userdata;
+
+ return 0;
+}
+
+int sd_dhcp_client_set_request_broadcast(sd_dhcp_client *client, int broadcast) {
+ assert_return(client, -EINVAL);
+
+ client->request_broadcast = !!broadcast;
+
+ return 0;
+}
+
+int sd_dhcp_client_set_request_option(sd_dhcp_client *client, uint8_t option) {
+ size_t i;
+
+ assert_return(client, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED), -EBUSY);
+
+ switch(option) {
+
+ case SD_DHCP_OPTION_PAD:
+ case SD_DHCP_OPTION_OVERLOAD:
+ case SD_DHCP_OPTION_MESSAGE_TYPE:
+ case SD_DHCP_OPTION_PARAMETER_REQUEST_LIST:
+ case SD_DHCP_OPTION_END:
+ return -EINVAL;
+
+ default:
+ break;
+ }
+
+ for (i = 0; i < client->req_opts_size; i++)
+ if (client->req_opts[i] == option)
+ return -EEXIST;
+
+ if (!GREEDY_REALLOC(client->req_opts, client->req_opts_allocated,
+ client->req_opts_size + 1))
+ return -ENOMEM;
+
+ client->req_opts[client->req_opts_size++] = option;
+
+ return 0;
+}
+
+int sd_dhcp_client_set_request_address(
+ sd_dhcp_client *client,
+ const struct in_addr *last_addr) {
+
+ assert_return(client, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED), -EBUSY);
+
+ if (last_addr)
+ client->last_addr = last_addr->s_addr;
+ else
+ client->last_addr = INADDR_ANY;
+
+ return 0;
+}
+
+int sd_dhcp_client_set_ifindex(sd_dhcp_client *client, int ifindex) {
+
+ assert_return(client, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED), -EBUSY);
+ assert_return(ifindex > 0, -EINVAL);
+
+ client->ifindex = ifindex;
+ return 0;
+}
+
+int sd_dhcp_client_set_mac(
+ sd_dhcp_client *client,
+ const uint8_t *addr,
+ size_t addr_len,
+ uint16_t arp_type) {
+
+ DHCP_CLIENT_DONT_DESTROY(client);
+ bool need_restart = false;
+
+ assert_return(client, -EINVAL);
+ assert_return(addr, -EINVAL);
+ assert_return(addr_len > 0 && addr_len <= MAX_MAC_ADDR_LEN, -EINVAL);
+ assert_return(arp_type > 0, -EINVAL);
+
+ if (arp_type == ARPHRD_ETHER)
+ assert_return(addr_len == ETH_ALEN, -EINVAL);
+ else if (arp_type == ARPHRD_INFINIBAND)
+ assert_return(addr_len == INFINIBAND_ALEN, -EINVAL);
+ else
+ return -EINVAL;
+
+ if (client->mac_addr_len == addr_len &&
+ memcmp(&client->mac_addr, addr, addr_len) == 0)
+ return 0;
+
+ if (!IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED)) {
+ log_dhcp_client(client, "Changing MAC address on running DHCP client, restarting");
+ need_restart = true;
+ client_stop(client, SD_DHCP_CLIENT_EVENT_STOP);
+ }
+
+ memcpy(&client->mac_addr, addr, addr_len);
+ client->mac_addr_len = addr_len;
+ client->arp_type = arp_type;
+
+ if (need_restart && client->state != DHCP_STATE_STOPPED)
+ sd_dhcp_client_start(client);
+
+ return 0;
+}
+
+int sd_dhcp_client_get_client_id(
+ sd_dhcp_client *client,
+ uint8_t *type,
+ const uint8_t **data,
+ size_t *data_len) {
+
+ assert_return(client, -EINVAL);
+ assert_return(type, -EINVAL);
+ assert_return(data, -EINVAL);
+ assert_return(data_len, -EINVAL);
+
+ *type = 0;
+ *data = NULL;
+ *data_len = 0;
+ if (client->client_id_len) {
+ *type = client->client_id.type;
+ *data = client->client_id.raw.data;
+ *data_len = client->client_id_len - sizeof(client->client_id.type);
+ }
+
+ return 0;
+}
+
+int sd_dhcp_client_set_client_id(
+ sd_dhcp_client *client,
+ uint8_t type,
+ const uint8_t *data,
+ size_t data_len) {
+
+ DHCP_CLIENT_DONT_DESTROY(client);
+ bool need_restart = false;
+
+ assert_return(client, -EINVAL);
+ assert_return(data, -EINVAL);
+ assert_return(data_len > 0 && data_len <= MAX_CLIENT_ID_LEN, -EINVAL);
+
+ if (client->client_id_len == data_len + sizeof(client->client_id.type) &&
+ client->client_id.type == type &&
+ memcmp(&client->client_id.raw.data, data, data_len) == 0)
+ return 0;
+
+ /* For hardware types, log debug message about unexpected data length.
+ *
+ * Note that infiniband's INFINIBAND_ALEN is 20 bytes long, but only
+ * last last 8 bytes of the address are stable and suitable to put into
+ * the client-id. The caller is advised to account for that. */
+ if ((type == ARPHRD_ETHER && data_len != ETH_ALEN) ||
+ (type == ARPHRD_INFINIBAND && data_len != 8))
+ log_dhcp_client(client, "Changing client ID to hardware type %u with "
+ "unexpected address length %zu",
+ type, data_len);
+
+ if (!IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED)) {
+ log_dhcp_client(client, "Changing client ID on running DHCP "
+ "client, restarting");
+ need_restart = true;
+ client_stop(client, SD_DHCP_CLIENT_EVENT_STOP);
+ }
+
+ client->client_id.type = type;
+ memcpy(&client->client_id.raw.data, data, data_len);
+ client->client_id_len = data_len + sizeof (client->client_id.type);
+
+ if (need_restart && client->state != DHCP_STATE_STOPPED)
+ sd_dhcp_client_start(client);
+
+ return 0;
+}
+
+/**
+ * Sets IAID and DUID. If duid is non-null, the DUID is set to duid_type + duid
+ * without further modification. Otherwise, if duid_type is supported, DUID
+ * is set based on that type. Otherwise, an error is returned.
+ */
+static int dhcp_client_set_iaid_duid_internal(
+ sd_dhcp_client *client,
+ bool iaid_append,
+ bool iaid_set,
+ uint32_t iaid,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len,
+ usec_t llt_time) {
+
+ DHCP_CLIENT_DONT_DESTROY(client);
+ int r;
+ size_t len;
+
+ assert_return(client, -EINVAL);
+ assert_return(duid_len == 0 || duid, -EINVAL);
+
+ if (duid) {
+ r = dhcp_validate_duid_len(duid_type, duid_len, true);
+ if (r < 0)
+ return r;
+ }
+
+ zero(client->client_id);
+ client->client_id.type = 255;
+
+ if (iaid_append) {
+ if (iaid_set)
+ client->client_id.ns.iaid = htobe32(iaid);
+ else {
+ r = dhcp_identifier_set_iaid(client->ifindex, client->mac_addr,
+ client->mac_addr_len,
+ true,
+ &client->client_id.ns.iaid);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (duid) {
+ client->client_id.ns.duid.type = htobe16(duid_type);
+ memcpy(&client->client_id.ns.duid.raw.data, duid, duid_len);
+ len = sizeof(client->client_id.ns.duid.type) + duid_len;
+ } else
+ switch (duid_type) {
+ case DUID_TYPE_LLT:
+ if (client->mac_addr_len == 0)
+ return -EOPNOTSUPP;
+
+ r = dhcp_identifier_set_duid_llt(&client->client_id.ns.duid, llt_time, client->mac_addr, client->mac_addr_len, client->arp_type, &len);
+ if (r < 0)
+ return r;
+ break;
+ case DUID_TYPE_EN:
+ r = dhcp_identifier_set_duid_en(&client->client_id.ns.duid, &len);
+ if (r < 0)
+ return r;
+ break;
+ case DUID_TYPE_LL:
+ if (client->mac_addr_len == 0)
+ return -EOPNOTSUPP;
+
+ r = dhcp_identifier_set_duid_ll(&client->client_id.ns.duid, client->mac_addr, client->mac_addr_len, client->arp_type, &len);
+ if (r < 0)
+ return r;
+ break;
+ case DUID_TYPE_UUID:
+ r = dhcp_identifier_set_duid_uuid(&client->client_id.ns.duid, &len);
+ if (r < 0)
+ return r;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ client->client_id_len = sizeof(client->client_id.type) + len +
+ (iaid_append ? sizeof(client->client_id.ns.iaid) : 0);
+
+ if (!IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED)) {
+ log_dhcp_client(client, "Configured %sDUID, restarting.", iaid_append ? "IAID+" : "");
+ client_stop(client, SD_DHCP_CLIENT_EVENT_STOP);
+ sd_dhcp_client_start(client);
+ }
+
+ return 0;
+}
+
+int sd_dhcp_client_set_iaid_duid(
+ sd_dhcp_client *client,
+ bool iaid_set,
+ uint32_t iaid,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len) {
+ return dhcp_client_set_iaid_duid_internal(client, true, iaid_set, iaid, duid_type, duid, duid_len, 0);
+}
+
+int sd_dhcp_client_set_iaid_duid_llt(
+ sd_dhcp_client *client,
+ bool iaid_set,
+ uint32_t iaid,
+ usec_t llt_time) {
+ return dhcp_client_set_iaid_duid_internal(client, true, iaid_set, iaid, DUID_TYPE_LLT, NULL, 0, llt_time);
+}
+
+int sd_dhcp_client_set_duid(
+ sd_dhcp_client *client,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len) {
+ return dhcp_client_set_iaid_duid_internal(client, false, false, 0, duid_type, duid, duid_len, 0);
+}
+
+int sd_dhcp_client_set_duid_llt(
+ sd_dhcp_client *client,
+ usec_t llt_time) {
+ return dhcp_client_set_iaid_duid_internal(client, false, false, 0, DUID_TYPE_LLT, NULL, 0, llt_time);
+}
+
+int sd_dhcp_client_set_hostname(
+ sd_dhcp_client *client,
+ const char *hostname) {
+
+ assert_return(client, -EINVAL);
+
+ /* Make sure hostnames qualify as DNS and as Linux hostnames */
+ if (hostname &&
+ !(hostname_is_valid(hostname, false) && dns_name_is_valid(hostname) > 0))
+ return -EINVAL;
+
+ return free_and_strdup(&client->hostname, hostname);
+}
+
+int sd_dhcp_client_set_vendor_class_identifier(
+ sd_dhcp_client *client,
+ const char *vci) {
+
+ assert_return(client, -EINVAL);
+
+ return free_and_strdup(&client->vendor_class_identifier, vci);
+}
+
+int sd_dhcp_client_set_user_class(
+ sd_dhcp_client *client,
+ const char* const* user_class) {
+
+ _cleanup_strv_free_ char **s = NULL;
+ char **p;
+
+ STRV_FOREACH(p, (char **) user_class)
+ if (strlen(*p) > 255)
+ return -ENAMETOOLONG;
+
+ s = strv_copy((char **) user_class);
+ if (!s)
+ return -ENOMEM;
+
+ client->user_class = TAKE_PTR(s);
+
+ return 0;
+}
+
+int sd_dhcp_client_set_client_port(
+ sd_dhcp_client *client,
+ uint16_t port) {
+
+ assert_return(client, -EINVAL);
+
+ client->port = port;
+
+ return 0;
+}
+
+int sd_dhcp_client_set_mtu(sd_dhcp_client *client, uint32_t mtu) {
+ assert_return(client, -EINVAL);
+ assert_return(mtu >= DHCP_DEFAULT_MIN_SIZE, -ERANGE);
+
+ client->mtu = mtu;
+
+ return 0;
+}
+
+int sd_dhcp_client_get_lease(sd_dhcp_client *client, sd_dhcp_lease **ret) {
+ assert_return(client, -EINVAL);
+
+ if (!IN_SET(client->state, DHCP_STATE_BOUND, DHCP_STATE_RENEWING, DHCP_STATE_REBINDING))
+ return -EADDRNOTAVAIL;
+
+ if (ret)
+ *ret = client->lease;
+
+ return 0;
+}
+
+static void client_notify(sd_dhcp_client *client, int event) {
+ assert(client);
+
+ if (client->callback)
+ client->callback(client, event, client->userdata);
+}
+
+static int client_initialize(sd_dhcp_client *client) {
+ assert_return(client, -EINVAL);
+
+ client->receive_message = sd_event_source_unref(client->receive_message);
+
+ client->fd = asynchronous_close(client->fd);
+
+ (void) event_source_disable(client->timeout_resend);
+ (void) event_source_disable(client->timeout_t1);
+ (void) event_source_disable(client->timeout_t2);
+ (void) event_source_disable(client->timeout_expire);
+
+ client->attempt = 1;
+
+ client->state = DHCP_STATE_INIT;
+ client->xid = 0;
+
+ client->lease = sd_dhcp_lease_unref(client->lease);
+
+ return 0;
+}
+
+static void client_stop(sd_dhcp_client *client, int error) {
+ assert(client);
+
+ if (error < 0)
+ log_dhcp_client(client, "STOPPED: %s", strerror(-error));
+ else if (error == SD_DHCP_CLIENT_EVENT_STOP)
+ log_dhcp_client(client, "STOPPED");
+ else
+ log_dhcp_client(client, "STOPPED: Unknown event");
+
+ client_notify(client, error);
+
+ client_initialize(client);
+}
+
+static int client_message_init(
+ sd_dhcp_client *client,
+ DHCPPacket **ret,
+ uint8_t type,
+ size_t *_optlen,
+ size_t *_optoffset) {
+
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ size_t optlen, optoffset, size;
+ be16_t max_size;
+ usec_t time_now;
+ uint16_t secs;
+ int r;
+
+ assert(client);
+ assert(client->start_time);
+ assert(ret);
+ assert(_optlen);
+ assert(_optoffset);
+ assert(IN_SET(type, DHCP_DISCOVER, DHCP_REQUEST));
+
+ optlen = DHCP_MIN_OPTIONS_SIZE;
+ size = sizeof(DHCPPacket) + optlen;
+
+ packet = malloc0(size);
+ if (!packet)
+ return -ENOMEM;
+
+ r = dhcp_message_init(&packet->dhcp, BOOTREQUEST, client->xid, type,
+ client->arp_type, optlen, &optoffset);
+ if (r < 0)
+ return r;
+
+ /* Although 'secs' field is a SHOULD in RFC 2131, certain DHCP servers
+ refuse to issue an DHCP lease if 'secs' is set to zero */
+ r = sd_event_now(client->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return r;
+ assert(time_now >= client->start_time);
+
+ /* seconds between sending first and last DISCOVER
+ * must always be strictly positive to deal with broken servers */
+ secs = ((time_now - client->start_time) / USEC_PER_SEC) ? : 1;
+ packet->dhcp.secs = htobe16(secs);
+
+ /* RFC2132 section 4.1
+ A client that cannot receive unicast IP datagrams until its protocol
+ software has been configured with an IP address SHOULD set the
+ BROADCAST bit in the 'flags' field to 1 in any DHCPDISCOVER or
+ DHCPREQUEST messages that client sends. The BROADCAST bit will
+ provide a hint to the DHCP server and BOOTP relay agent to broadcast
+ any messages to the client on the client's subnet.
+
+ Note: some interfaces needs this to be enabled, but some networks
+ needs this to be disabled as broadcasts are filteretd, so this
+ needs to be configurable */
+ if (client->request_broadcast || client->arp_type != ARPHRD_ETHER)
+ packet->dhcp.flags = htobe16(0x8000);
+
+ /* RFC2132 section 4.1.1:
+ The client MUST include its hardware address in the ’chaddr’ field, if
+ necessary for delivery of DHCP reply messages. Non-Ethernet
+ interfaces will leave 'chaddr' empty and use the client identifier
+ instead (eg, RFC 4390 section 2.1).
+ */
+ if (client->arp_type == ARPHRD_ETHER)
+ memcpy(&packet->dhcp.chaddr, &client->mac_addr, ETH_ALEN);
+
+ /* If no client identifier exists, construct an RFC 4361-compliant one */
+ if (client->client_id_len == 0) {
+ size_t duid_len;
+
+ client->client_id.type = 255;
+
+ r = dhcp_identifier_set_iaid(client->ifindex, client->mac_addr, client->mac_addr_len,
+ true, &client->client_id.ns.iaid);
+ if (r < 0)
+ return r;
+
+ r = dhcp_identifier_set_duid_en(&client->client_id.ns.duid, &duid_len);
+ if (r < 0)
+ return r;
+
+ client->client_id_len = sizeof(client->client_id.type) + sizeof(client->client_id.ns.iaid) + duid_len;
+ }
+
+ /* Some DHCP servers will refuse to issue an DHCP lease if the Client
+ Identifier option is not set */
+ if (client->client_id_len) {
+ r = dhcp_option_append(&packet->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_CLIENT_IDENTIFIER,
+ client->client_id_len,
+ &client->client_id);
+ if (r < 0)
+ return r;
+ }
+
+ /* RFC2131 section 3.5:
+ in its initial DHCPDISCOVER or DHCPREQUEST message, a
+ client may provide the server with a list of specific
+ parameters the client is interested in. If the client
+ includes a list of parameters in a DHCPDISCOVER message,
+ it MUST include that list in any subsequent DHCPREQUEST
+ messages.
+ */
+
+ /* RFC7844 section 3:
+ MAY contain the Parameter Request List option. */
+ /* NOTE: in case that there would be an option to do not send
+ * any PRL at all, the size should be checked before sending */
+ if (client->req_opts_size > 0) {
+ r = dhcp_option_append(&packet->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_PARAMETER_REQUEST_LIST,
+ client->req_opts_size, client->req_opts);
+ if (r < 0)
+ return r;
+ }
+
+ /* RFC2131 section 3.5:
+ The client SHOULD include the ’maximum DHCP message size’ option to
+ let the server know how large the server may make its DHCP messages.
+
+ Note (from ConnMan): Some DHCP servers will send bigger DHCP packets
+ than the defined default size unless the Maximum Message Size option
+ is explicitly set
+
+ RFC3442 "Requirements to Avoid Sizing Constraints":
+ Because a full routing table can be quite large, the standard 576
+ octet maximum size for a DHCP message may be too short to contain
+ some legitimate Classless Static Route options. Because of this,
+ clients implementing the Classless Static Route option SHOULD send a
+ Maximum DHCP Message Size [4] option if the DHCP client's TCP/IP
+ stack is capable of receiving larger IP datagrams. In this case, the
+ client SHOULD set the value of this option to at least the MTU of the
+ interface that the client is configuring. The client MAY set the
+ value of this option higher, up to the size of the largest UDP packet
+ it is prepared to accept. (Note that the value specified in the
+ Maximum DHCP Message Size option is the total maximum packet size,
+ including IP and UDP headers.)
+ */
+ /* RFC7844 section 3:
+ SHOULD NOT contain any other option. */
+ if (!client->anonymize) {
+ max_size = htobe16(size);
+ r = dhcp_option_append(&packet->dhcp, client->mtu, &optoffset, 0,
+ SD_DHCP_OPTION_MAXIMUM_MESSAGE_SIZE,
+ 2, &max_size);
+ if (r < 0)
+ return r;
+ }
+
+ *_optlen = optlen;
+ *_optoffset = optoffset;
+ *ret = TAKE_PTR(packet);
+
+ return 0;
+}
+
+static int client_append_fqdn_option(
+ DHCPMessage *message,
+ size_t optlen,
+ size_t *optoffset,
+ const char *fqdn) {
+
+ uint8_t buffer[3 + DHCP_MAX_FQDN_LENGTH];
+ int r;
+
+ buffer[0] = DHCP_FQDN_FLAG_S | /* Request server to perform A RR DNS updates */
+ DHCP_FQDN_FLAG_E; /* Canonical wire format */
+ buffer[1] = 0; /* RCODE1 (deprecated) */
+ buffer[2] = 0; /* RCODE2 (deprecated) */
+
+ r = dns_name_to_wire_format(fqdn, buffer + 3, sizeof(buffer) - 3, false);
+ if (r > 0)
+ r = dhcp_option_append(message, optlen, optoffset, 0,
+ SD_DHCP_OPTION_FQDN, 3 + r, buffer);
+
+ return r;
+}
+
+static int dhcp_client_send_raw(
+ sd_dhcp_client *client,
+ DHCPPacket *packet,
+ size_t len) {
+
+ dhcp_packet_append_ip_headers(packet, INADDR_ANY, client->port,
+ INADDR_BROADCAST, DHCP_PORT_SERVER, len);
+
+ return dhcp_network_send_raw_socket(client->fd, &client->link,
+ packet, len);
+}
+
+static int client_send_discover(sd_dhcp_client *client) {
+ _cleanup_free_ DHCPPacket *discover = NULL;
+ size_t optoffset, optlen;
+ int r;
+
+ assert(client);
+ assert(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_SELECTING));
+
+ r = client_message_init(client, &discover, DHCP_DISCOVER,
+ &optlen, &optoffset);
+ if (r < 0)
+ return r;
+
+ /* the client may suggest values for the network address
+ and lease time in the DHCPDISCOVER message. The client may include
+ the ’requested IP address’ option to suggest that a particular IP
+ address be assigned, and may include the ’IP address lease time’
+ option to suggest the lease time it would like.
+ */
+ if (client->last_addr != INADDR_ANY) {
+ r = dhcp_option_append(&discover->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_REQUESTED_IP_ADDRESS,
+ 4, &client->last_addr);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->hostname) {
+ /* According to RFC 4702 "clients that send the Client FQDN option in
+ their messages MUST NOT also send the Host Name option". Just send
+ one of the two depending on the hostname type.
+ */
+ if (dns_name_is_single_label(client->hostname)) {
+ /* it is unclear from RFC 2131 if client should send hostname in
+ DHCPDISCOVER but dhclient does and so we do as well
+ */
+ r = dhcp_option_append(&discover->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_HOST_NAME,
+ strlen(client->hostname), client->hostname);
+ } else
+ r = client_append_fqdn_option(&discover->dhcp, optlen, &optoffset,
+ client->hostname);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->vendor_class_identifier) {
+ r = dhcp_option_append(&discover->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_VENDOR_CLASS_IDENTIFIER,
+ strlen(client->vendor_class_identifier),
+ client->vendor_class_identifier);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->user_class) {
+ r = dhcp_option_append(&discover->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_USER_CLASS,
+ strv_length(client->user_class),
+ client->user_class);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp_option_append(&discover->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ /* We currently ignore:
+ The client SHOULD wait a random time between one and ten seconds to
+ desynchronize the use of DHCP at startup.
+ */
+ r = dhcp_client_send_raw(client, discover, sizeof(DHCPPacket) + optoffset);
+ if (r < 0)
+ return r;
+
+ log_dhcp_client(client, "DISCOVER");
+
+ return 0;
+}
+
+static int client_send_request(sd_dhcp_client *client) {
+ _cleanup_free_ DHCPPacket *request = NULL;
+ size_t optoffset, optlen;
+ int r;
+
+ assert(client);
+
+ r = client_message_init(client, &request, DHCP_REQUEST, &optlen, &optoffset);
+ if (r < 0)
+ return r;
+
+ switch (client->state) {
+ /* See RFC2131 section 4.3.2 (note that there is a typo in the RFC,
+ SELECTING should be REQUESTING)
+ */
+
+ case DHCP_STATE_REQUESTING:
+ /* Client inserts the address of the selected server in ’server
+ identifier’, ’ciaddr’ MUST be zero, ’requested IP address’ MUST be
+ filled in with the yiaddr value from the chosen DHCPOFFER.
+ */
+
+ r = dhcp_option_append(&request->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_SERVER_IDENTIFIER,
+ 4, &client->lease->server_address);
+ if (r < 0)
+ return r;
+
+ r = dhcp_option_append(&request->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_REQUESTED_IP_ADDRESS,
+ 4, &client->lease->address);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case DHCP_STATE_INIT_REBOOT:
+ /* ’server identifier’ MUST NOT be filled in, ’requested IP address’
+ option MUST be filled in with client’s notion of its previously
+ assigned address. ’ciaddr’ MUST be zero.
+ */
+ r = dhcp_option_append(&request->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_REQUESTED_IP_ADDRESS,
+ 4, &client->last_addr);
+ if (r < 0)
+ return r;
+ break;
+
+ case DHCP_STATE_RENEWING:
+ /* ’server identifier’ MUST NOT be filled in, ’requested IP address’
+ option MUST NOT be filled in, ’ciaddr’ MUST be filled in with
+ client’s IP address.
+ */
+
+ case DHCP_STATE_REBINDING:
+ /* ’server identifier’ MUST NOT be filled in, ’requested IP address’
+ option MUST NOT be filled in, ’ciaddr’ MUST be filled in with
+ client’s IP address.
+
+ This message MUST be broadcast to the 0xffffffff IP broadcast address.
+ */
+ request->dhcp.ciaddr = client->lease->address;
+
+ break;
+
+ case DHCP_STATE_INIT:
+ case DHCP_STATE_SELECTING:
+ case DHCP_STATE_REBOOTING:
+ case DHCP_STATE_BOUND:
+ case DHCP_STATE_STOPPED:
+ return -EINVAL;
+ }
+
+ if (client->hostname) {
+ if (dns_name_is_single_label(client->hostname))
+ r = dhcp_option_append(&request->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_HOST_NAME,
+ strlen(client->hostname), client->hostname);
+ else
+ r = client_append_fqdn_option(&request->dhcp, optlen, &optoffset,
+ client->hostname);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->vendor_class_identifier) {
+ r = dhcp_option_append(&request->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_VENDOR_CLASS_IDENTIFIER,
+ strlen(client->vendor_class_identifier),
+ client->vendor_class_identifier);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp_option_append(&request->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ if (client->state == DHCP_STATE_RENEWING) {
+ r = dhcp_network_send_udp_socket(client->fd,
+ client->lease->server_address,
+ DHCP_PORT_SERVER,
+ &request->dhcp,
+ sizeof(DHCPMessage) + optoffset);
+ } else {
+ r = dhcp_client_send_raw(client, request, sizeof(DHCPPacket) + optoffset);
+ }
+ if (r < 0)
+ return r;
+
+ switch (client->state) {
+
+ case DHCP_STATE_REQUESTING:
+ log_dhcp_client(client, "REQUEST (requesting)");
+ break;
+
+ case DHCP_STATE_INIT_REBOOT:
+ log_dhcp_client(client, "REQUEST (init-reboot)");
+ break;
+
+ case DHCP_STATE_RENEWING:
+ log_dhcp_client(client, "REQUEST (renewing)");
+ break;
+
+ case DHCP_STATE_REBINDING:
+ log_dhcp_client(client, "REQUEST (rebinding)");
+ break;
+
+ default:
+ log_dhcp_client(client, "REQUEST (invalid)");
+ break;
+ }
+
+ return 0;
+}
+
+static int client_start(sd_dhcp_client *client);
+
+static int client_timeout_resend(
+ sd_event_source *s,
+ uint64_t usec,
+ void *userdata) {
+
+ sd_dhcp_client *client = userdata;
+ DHCP_CLIENT_DONT_DESTROY(client);
+ usec_t next_timeout = 0;
+ uint64_t time_now;
+ uint32_t time_left;
+ int r;
+
+ assert(s);
+ assert(client);
+ assert(client->event);
+
+ r = sd_event_now(client->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ goto error;
+
+ switch (client->state) {
+
+ case DHCP_STATE_RENEWING:
+
+ time_left = (client->lease->t2 - client->lease->t1) / 2;
+ if (time_left < 60)
+ time_left = 60;
+
+ next_timeout = time_now + time_left * USEC_PER_SEC;
+
+ break;
+
+ case DHCP_STATE_REBINDING:
+
+ time_left = (client->lease->lifetime - client->lease->t2) / 2;
+ if (time_left < 60)
+ time_left = 60;
+
+ next_timeout = time_now + time_left * USEC_PER_SEC;
+ break;
+
+ case DHCP_STATE_REBOOTING:
+ /* start over as we did not receive a timely ack or nak */
+ r = client_initialize(client);
+ if (r < 0)
+ goto error;
+
+ r = client_start(client);
+ if (r < 0)
+ goto error;
+ else {
+ log_dhcp_client(client, "REBOOTED");
+ return 0;
+ }
+
+ case DHCP_STATE_INIT:
+ case DHCP_STATE_INIT_REBOOT:
+ case DHCP_STATE_SELECTING:
+ case DHCP_STATE_REQUESTING:
+ case DHCP_STATE_BOUND:
+
+ if (client->attempt < 64)
+ client->attempt *= 2;
+
+ next_timeout = time_now + (client->attempt - 1) * USEC_PER_SEC;
+
+ break;
+
+ case DHCP_STATE_STOPPED:
+ r = -EINVAL;
+ goto error;
+ }
+
+ next_timeout += (random_u32() & 0x1fffff);
+
+ r = event_reset_time(client->event, &client->timeout_resend,
+ clock_boottime_or_monotonic(),
+ next_timeout, 10 * USEC_PER_MSEC,
+ client_timeout_resend, client,
+ client->event_priority, "dhcp4-resend-timer", true);
+ if (r < 0)
+ goto error;
+
+ switch (client->state) {
+ case DHCP_STATE_INIT:
+ r = client_send_discover(client);
+ if (r >= 0) {
+ client->state = DHCP_STATE_SELECTING;
+ client->attempt = 1;
+ } else {
+ if (client->attempt >= 64)
+ goto error;
+ }
+
+ break;
+
+ case DHCP_STATE_SELECTING:
+ r = client_send_discover(client);
+ if (r < 0 && client->attempt >= 64)
+ goto error;
+
+ break;
+
+ case DHCP_STATE_INIT_REBOOT:
+ case DHCP_STATE_REQUESTING:
+ case DHCP_STATE_RENEWING:
+ case DHCP_STATE_REBINDING:
+ r = client_send_request(client);
+ if (r < 0 && client->attempt >= 64)
+ goto error;
+
+ if (client->state == DHCP_STATE_INIT_REBOOT)
+ client->state = DHCP_STATE_REBOOTING;
+
+ client->request_sent = time_now;
+
+ break;
+
+ case DHCP_STATE_REBOOTING:
+ case DHCP_STATE_BOUND:
+
+ break;
+
+ case DHCP_STATE_STOPPED:
+ r = -EINVAL;
+ goto error;
+ }
+
+ return 0;
+
+error:
+ client_stop(client, r);
+
+ /* Errors were dealt with when stopping the client, don't spill
+ errors into the event loop handler */
+ return 0;
+}
+
+static int client_initialize_io_events(
+ sd_dhcp_client *client,
+ sd_event_io_handler_t io_callback) {
+
+ int r;
+
+ assert(client);
+ assert(client->event);
+
+ r = sd_event_add_io(client->event, &client->receive_message,
+ client->fd, EPOLLIN, io_callback,
+ client);
+ if (r < 0)
+ goto error;
+
+ r = sd_event_source_set_priority(client->receive_message,
+ client->event_priority);
+ if (r < 0)
+ goto error;
+
+ r = sd_event_source_set_description(client->receive_message, "dhcp4-receive-message");
+ if (r < 0)
+ goto error;
+
+error:
+ if (r < 0)
+ client_stop(client, r);
+
+ return 0;
+}
+
+static int client_initialize_time_events(sd_dhcp_client *client) {
+ uint64_t usec = 0;
+ int r;
+
+ assert(client);
+ assert(client->event);
+
+ if (client->start_delay) {
+ assert_se(sd_event_now(client->event, clock_boottime_or_monotonic(), &usec) >= 0);
+ usec += client->start_delay;
+ }
+
+ r = event_reset_time(client->event, &client->timeout_resend,
+ clock_boottime_or_monotonic(),
+ usec, 0,
+ client_timeout_resend, client,
+ client->event_priority, "dhcp4-resend-timer", true);
+ if (r < 0)
+ client_stop(client, r);
+
+ return 0;
+
+}
+
+static int client_initialize_events(sd_dhcp_client *client, sd_event_io_handler_t io_callback) {
+ client_initialize_io_events(client, io_callback);
+ client_initialize_time_events(client);
+
+ return 0;
+}
+
+static int client_start_delayed(sd_dhcp_client *client) {
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(client->event, -EINVAL);
+ assert_return(client->ifindex > 0, -EINVAL);
+ assert_return(client->fd < 0, -EBUSY);
+ assert_return(client->xid == 0, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_INIT_REBOOT), -EBUSY);
+
+ client->xid = random_u32();
+
+ r = dhcp_network_bind_raw_socket(client->ifindex, &client->link,
+ client->xid, client->mac_addr,
+ client->mac_addr_len, client->arp_type, client->port);
+ if (r < 0) {
+ client_stop(client, r);
+ return r;
+ }
+ client->fd = r;
+
+ if (IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_INIT_REBOOT))
+ client->start_time = now(clock_boottime_or_monotonic());
+
+ return client_initialize_events(client, client_receive_message_raw);
+}
+
+static int client_start(sd_dhcp_client *client) {
+ client->start_delay = 0;
+ return client_start_delayed(client);
+}
+
+static int client_timeout_expire(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_dhcp_client *client = userdata;
+ DHCP_CLIENT_DONT_DESTROY(client);
+
+ log_dhcp_client(client, "EXPIRED");
+
+ client_notify(client, SD_DHCP_CLIENT_EVENT_EXPIRED);
+
+ /* lease was lost, start over if not freed or stopped in callback */
+ if (client->state != DHCP_STATE_STOPPED) {
+ client_initialize(client);
+ client_start(client);
+ }
+
+ return 0;
+}
+
+static int client_timeout_t2(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_dhcp_client *client = userdata;
+ DHCP_CLIENT_DONT_DESTROY(client);
+ int r;
+
+ assert(client);
+
+ client->receive_message = sd_event_source_unref(client->receive_message);
+ client->fd = asynchronous_close(client->fd);
+
+ client->state = DHCP_STATE_REBINDING;
+ client->attempt = 1;
+
+ r = dhcp_network_bind_raw_socket(client->ifindex, &client->link,
+ client->xid, client->mac_addr,
+ client->mac_addr_len, client->arp_type,
+ client->port);
+ if (r < 0) {
+ client_stop(client, r);
+ return 0;
+ }
+ client->fd = r;
+
+ return client_initialize_events(client, client_receive_message_raw);
+}
+
+static int client_timeout_t1(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_dhcp_client *client = userdata;
+ DHCP_CLIENT_DONT_DESTROY(client);
+
+ client->state = DHCP_STATE_RENEWING;
+ client->attempt = 1;
+
+ return client_initialize_time_events(client);
+}
+
+static int client_handle_offer(sd_dhcp_client *client, DHCPMessage *offer, size_t len) {
+ _cleanup_(sd_dhcp_lease_unrefp) sd_dhcp_lease *lease = NULL;
+ int r;
+
+ r = dhcp_lease_new(&lease);
+ if (r < 0)
+ return r;
+
+ if (client->client_id_len) {
+ r = dhcp_lease_set_client_id(lease,
+ (uint8_t *) &client->client_id,
+ client->client_id_len);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp_option_parse(offer, len, dhcp_lease_parse_options, lease, NULL);
+ if (r != DHCP_OFFER) {
+ log_dhcp_client(client, "received message was not an OFFER, ignoring");
+ return -ENOMSG;
+ }
+
+ lease->next_server = offer->siaddr;
+ lease->address = offer->yiaddr;
+
+ if (lease->address == 0 ||
+ lease->server_address == 0 ||
+ lease->lifetime == 0) {
+ log_dhcp_client(client, "received lease lacks address, server address or lease lifetime, ignoring");
+ return -ENOMSG;
+ }
+
+ if (!lease->have_subnet_mask) {
+ r = dhcp_lease_set_default_subnet_mask(lease);
+ if (r < 0) {
+ log_dhcp_client(client,
+ "received lease lacks subnet mask, "
+ "and a fallback one cannot be generated, ignoring");
+ return -ENOMSG;
+ }
+ }
+
+ sd_dhcp_lease_unref(client->lease);
+ client->lease = TAKE_PTR(lease);
+
+ log_dhcp_client(client, "OFFER");
+
+ return 0;
+}
+
+static int client_handle_forcerenew(sd_dhcp_client *client, DHCPMessage *force, size_t len) {
+ int r;
+
+ r = dhcp_option_parse(force, len, NULL, NULL, NULL);
+ if (r != DHCP_FORCERENEW)
+ return -ENOMSG;
+
+ log_dhcp_client(client, "FORCERENEW");
+
+ return 0;
+}
+
+static int client_handle_ack(sd_dhcp_client *client, DHCPMessage *ack, size_t len) {
+ _cleanup_(sd_dhcp_lease_unrefp) sd_dhcp_lease *lease = NULL;
+ _cleanup_free_ char *error_message = NULL;
+ int r;
+
+ r = dhcp_lease_new(&lease);
+ if (r < 0)
+ return r;
+
+ if (client->client_id_len) {
+ r = dhcp_lease_set_client_id(lease,
+ (uint8_t *) &client->client_id,
+ client->client_id_len);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp_option_parse(ack, len, dhcp_lease_parse_options, lease, &error_message);
+ if (r == DHCP_NAK) {
+ log_dhcp_client(client, "NAK: %s", strna(error_message));
+ return -EADDRNOTAVAIL;
+ }
+
+ if (r != DHCP_ACK) {
+ log_dhcp_client(client, "received message was not an ACK, ignoring");
+ return -ENOMSG;
+ }
+
+ lease->next_server = ack->siaddr;
+
+ lease->address = ack->yiaddr;
+
+ if (lease->address == INADDR_ANY ||
+ lease->server_address == INADDR_ANY ||
+ lease->lifetime == 0) {
+ log_dhcp_client(client, "received lease lacks address, server "
+ "address or lease lifetime, ignoring");
+ return -ENOMSG;
+ }
+
+ if (lease->subnet_mask == INADDR_ANY) {
+ r = dhcp_lease_set_default_subnet_mask(lease);
+ if (r < 0) {
+ log_dhcp_client(client,
+ "received lease lacks subnet mask, "
+ "and a fallback one cannot be generated, ignoring");
+ return -ENOMSG;
+ }
+ }
+
+ r = SD_DHCP_CLIENT_EVENT_IP_ACQUIRE;
+ if (client->lease) {
+ if (client->lease->address != lease->address ||
+ client->lease->subnet_mask != lease->subnet_mask ||
+ client->lease->router != lease->router) {
+ r = SD_DHCP_CLIENT_EVENT_IP_CHANGE;
+ } else
+ r = SD_DHCP_CLIENT_EVENT_RENEW;
+
+ client->lease = sd_dhcp_lease_unref(client->lease);
+ }
+
+ client->lease = TAKE_PTR(lease);
+
+ log_dhcp_client(client, "ACK");
+
+ return r;
+}
+
+static uint64_t client_compute_timeout(sd_dhcp_client *client, uint32_t lifetime, double factor) {
+ assert(client);
+ assert(client->request_sent);
+ assert(lifetime > 0);
+
+ if (lifetime > 3)
+ lifetime -= 3;
+ else
+ lifetime = 0;
+
+ return client->request_sent + (lifetime * USEC_PER_SEC * factor) +
+ + (random_u32() & 0x1fffff);
+}
+
+static int client_set_lease_timeouts(sd_dhcp_client *client) {
+ usec_t time_now;
+ uint64_t lifetime_timeout;
+ uint64_t t2_timeout;
+ uint64_t t1_timeout;
+ char time_string[FORMAT_TIMESPAN_MAX];
+ int r;
+
+ assert(client);
+ assert(client->event);
+ assert(client->lease);
+ assert(client->lease->lifetime);
+
+ /* don't set timers for infinite leases */
+ if (client->lease->lifetime == 0xffffffff) {
+ (void) event_source_disable(client->timeout_t1);
+ (void) event_source_disable(client->timeout_t2);
+ (void) event_source_disable(client->timeout_expire);
+
+ return 0;
+ }
+
+ r = sd_event_now(client->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return r;
+ assert(client->request_sent <= time_now);
+
+ /* convert the various timeouts from relative (secs) to absolute (usecs) */
+ lifetime_timeout = client_compute_timeout(client, client->lease->lifetime, 1);
+ if (client->lease->t1 > 0 && client->lease->t2 > 0) {
+ /* both T1 and T2 are given */
+ if (client->lease->t1 < client->lease->t2 &&
+ client->lease->t2 < client->lease->lifetime) {
+ /* they are both valid */
+ t2_timeout = client_compute_timeout(client, client->lease->t2, 1);
+ t1_timeout = client_compute_timeout(client, client->lease->t1, 1);
+ } else {
+ /* discard both */
+ t2_timeout = client_compute_timeout(client, client->lease->lifetime, 7.0 / 8.0);
+ client->lease->t2 = (client->lease->lifetime * 7) / 8;
+ t1_timeout = client_compute_timeout(client, client->lease->lifetime, 0.5);
+ client->lease->t1 = client->lease->lifetime / 2;
+ }
+ } else if (client->lease->t2 > 0 && client->lease->t2 < client->lease->lifetime) {
+ /* only T2 is given, and it is valid */
+ t2_timeout = client_compute_timeout(client, client->lease->t2, 1);
+ t1_timeout = client_compute_timeout(client, client->lease->lifetime, 0.5);
+ client->lease->t1 = client->lease->lifetime / 2;
+ if (t2_timeout <= t1_timeout) {
+ /* the computed T1 would be invalid, so discard T2 */
+ t2_timeout = client_compute_timeout(client, client->lease->lifetime, 7.0 / 8.0);
+ client->lease->t2 = (client->lease->lifetime * 7) / 8;
+ }
+ } else if (client->lease->t1 > 0 && client->lease->t1 < client->lease->lifetime) {
+ /* only T1 is given, and it is valid */
+ t1_timeout = client_compute_timeout(client, client->lease->t1, 1);
+ t2_timeout = client_compute_timeout(client, client->lease->lifetime, 7.0 / 8.0);
+ client->lease->t2 = (client->lease->lifetime * 7) / 8;
+ if (t2_timeout <= t1_timeout) {
+ /* the computed T2 would be invalid, so discard T1 */
+ t2_timeout = client_compute_timeout(client, client->lease->lifetime, 0.5);
+ client->lease->t2 = client->lease->lifetime / 2;
+ }
+ } else {
+ /* fall back to the default timeouts */
+ t1_timeout = client_compute_timeout(client, client->lease->lifetime, 0.5);
+ client->lease->t1 = client->lease->lifetime / 2;
+ t2_timeout = client_compute_timeout(client, client->lease->lifetime, 7.0 / 8.0);
+ client->lease->t2 = (client->lease->lifetime * 7) / 8;
+ }
+
+ /* arm lifetime timeout */
+ r = event_reset_time(client->event, &client->timeout_expire,
+ clock_boottime_or_monotonic(),
+ lifetime_timeout, 10 * USEC_PER_MSEC,
+ client_timeout_expire, client,
+ client->event_priority, "dhcp4-lifetime", true);
+ if (r < 0)
+ return r;
+
+ log_dhcp_client(client, "lease expires in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, lifetime_timeout - time_now, USEC_PER_SEC));
+
+ /* don't arm earlier timeouts if this has already expired */
+ if (lifetime_timeout <= time_now)
+ return 0;
+
+ /* arm T2 timeout */
+ r = event_reset_time(client->event, &client->timeout_t2,
+ clock_boottime_or_monotonic(),
+ t2_timeout, 10 * USEC_PER_MSEC,
+ client_timeout_t2, client,
+ client->event_priority, "dhcp4-t2-timeout", true);
+ if (r < 0)
+ return r;
+
+ log_dhcp_client(client, "T2 expires in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, t2_timeout - time_now, USEC_PER_SEC));
+
+ /* don't arm earlier timeout if this has already expired */
+ if (t2_timeout <= time_now)
+ return 0;
+
+ /* arm T1 timeout */
+ r = event_reset_time(client->event, &client->timeout_t1,
+ clock_boottime_or_monotonic(),
+ t1_timeout, 10 * USEC_PER_MSEC,
+ client_timeout_t1, client,
+ client->event_priority, "dhcp4-t1-timer", true);
+ if (r < 0)
+ return r;
+
+ log_dhcp_client(client, "T1 expires in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, t1_timeout - time_now, USEC_PER_SEC));
+
+ return 0;
+}
+
+static int client_handle_message(sd_dhcp_client *client, DHCPMessage *message, int len) {
+ DHCP_CLIENT_DONT_DESTROY(client);
+ char time_string[FORMAT_TIMESPAN_MAX];
+ int r = 0, notify_event = 0;
+
+ assert(client);
+ assert(client->event);
+ assert(message);
+
+ switch (client->state) {
+ case DHCP_STATE_SELECTING:
+
+ r = client_handle_offer(client, message, len);
+ if (r >= 0) {
+
+ client->state = DHCP_STATE_REQUESTING;
+ client->attempt = 1;
+
+ r = event_reset_time(client->event, &client->timeout_resend,
+ clock_boottime_or_monotonic(),
+ 0, 0,
+ client_timeout_resend, client,
+ client->event_priority, "dhcp4-resend-timer", true);
+ if (r < 0)
+ goto error;
+ } else if (r == -ENOMSG)
+ /* invalid message, let's ignore it */
+ return 0;
+
+ break;
+
+ case DHCP_STATE_REBOOTING:
+ case DHCP_STATE_REQUESTING:
+ case DHCP_STATE_RENEWING:
+ case DHCP_STATE_REBINDING:
+
+ r = client_handle_ack(client, message, len);
+ if (r >= 0) {
+ client->start_delay = 0;
+ (void) event_source_disable(client->timeout_resend);
+ client->receive_message =
+ sd_event_source_unref(client->receive_message);
+ client->fd = asynchronous_close(client->fd);
+
+ if (IN_SET(client->state, DHCP_STATE_REQUESTING,
+ DHCP_STATE_REBOOTING))
+ notify_event = SD_DHCP_CLIENT_EVENT_IP_ACQUIRE;
+ else if (r != SD_DHCP_CLIENT_EVENT_IP_ACQUIRE)
+ notify_event = r;
+
+ client->state = DHCP_STATE_BOUND;
+ client->attempt = 1;
+
+ client->last_addr = client->lease->address;
+
+ r = client_set_lease_timeouts(client);
+ if (r < 0) {
+ log_dhcp_client(client, "could not set lease timeouts");
+ goto error;
+ }
+
+ r = dhcp_network_bind_udp_socket(client->ifindex, client->lease->address, client->port);
+ if (r < 0) {
+ log_dhcp_client(client, "could not bind UDP socket");
+ goto error;
+ }
+
+ client->fd = r;
+
+ client_initialize_io_events(client, client_receive_message_udp);
+
+ if (notify_event) {
+ client_notify(client, notify_event);
+ if (client->state == DHCP_STATE_STOPPED)
+ return 0;
+ }
+
+ } else if (r == -EADDRNOTAVAIL) {
+ /* got a NAK, let's restart the client */
+ client_notify(client, SD_DHCP_CLIENT_EVENT_EXPIRED);
+
+ r = client_initialize(client);
+ if (r < 0)
+ goto error;
+
+ r = client_start_delayed(client);
+ if (r < 0)
+ goto error;
+
+ log_dhcp_client(client, "REBOOT in %s", format_timespan(time_string, FORMAT_TIMESPAN_MAX,
+ client->start_delay, USEC_PER_SEC));
+
+ client->start_delay = CLAMP(client->start_delay * 2,
+ RESTART_AFTER_NAK_MIN_USEC, RESTART_AFTER_NAK_MAX_USEC);
+
+ return 0;
+ } else if (r == -ENOMSG)
+ /* invalid message, let's ignore it */
+ return 0;
+
+ break;
+
+ case DHCP_STATE_BOUND:
+ r = client_handle_forcerenew(client, message, len);
+ if (r >= 0) {
+ r = client_timeout_t1(NULL, 0, client);
+ if (r < 0)
+ goto error;
+ } else if (r == -ENOMSG)
+ /* invalid message, let's ignore it */
+ return 0;
+
+ break;
+
+ case DHCP_STATE_INIT:
+ case DHCP_STATE_INIT_REBOOT:
+
+ break;
+
+ case DHCP_STATE_STOPPED:
+ r = -EINVAL;
+ goto error;
+ }
+
+error:
+ if (r < 0)
+ client_stop(client, r);
+
+ return r;
+}
+
+static int client_receive_message_udp(
+ sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+
+ sd_dhcp_client *client = userdata;
+ _cleanup_free_ DHCPMessage *message = NULL;
+ const struct ether_addr zero_mac = {};
+ const struct ether_addr *expected_chaddr = NULL;
+ uint8_t expected_hlen = 0;
+ ssize_t len, buflen;
+
+ assert(s);
+ assert(client);
+
+ buflen = next_datagram_size_fd(fd);
+ if (buflen < 0)
+ return buflen;
+
+ message = malloc0(buflen);
+ if (!message)
+ return -ENOMEM;
+
+ len = recv(fd, message, buflen, 0);
+ if (len < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return log_dhcp_client_errno(client, errno,
+ "Could not receive message from UDP socket: %m");
+ }
+ if ((size_t) len < sizeof(DHCPMessage)) {
+ log_dhcp_client(client, "Too small to be a DHCP message: ignoring");
+ return 0;
+ }
+
+ if (be32toh(message->magic) != DHCP_MAGIC_COOKIE) {
+ log_dhcp_client(client, "Not a DHCP message: ignoring");
+ return 0;
+ }
+
+ if (message->op != BOOTREPLY) {
+ log_dhcp_client(client, "Not a BOOTREPLY message: ignoring");
+ return 0;
+ }
+
+ if (message->htype != client->arp_type) {
+ log_dhcp_client(client, "Packet type does not match client type");
+ return 0;
+ }
+
+ if (client->arp_type == ARPHRD_ETHER) {
+ expected_hlen = ETH_ALEN;
+ expected_chaddr = (const struct ether_addr *) &client->mac_addr;
+ } else {
+ /* Non-Ethernet links expect zero chaddr */
+ expected_hlen = 0;
+ expected_chaddr = &zero_mac;
+ }
+
+ if (message->hlen != expected_hlen) {
+ log_dhcp_client(client, "Unexpected packet hlen %d", message->hlen);
+ return 0;
+ }
+
+ if (memcmp(&message->chaddr[0], expected_chaddr, ETH_ALEN)) {
+ log_dhcp_client(client, "Received chaddr does not match expected: ignoring");
+ return 0;
+ }
+
+ if (client->state != DHCP_STATE_BOUND &&
+ be32toh(message->xid) != client->xid) {
+ /* in BOUND state, we may receive FORCERENEW with xid set by server,
+ so ignore the xid in this case */
+ log_dhcp_client(client, "Received xid (%u) does not match expected (%u): ignoring",
+ be32toh(message->xid), client->xid);
+ return 0;
+ }
+
+ return client_handle_message(client, message, len);
+}
+
+static int client_receive_message_raw(
+ sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+
+ sd_dhcp_client *client = userdata;
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ uint8_t cmsgbuf[CMSG_LEN(sizeof(struct tpacket_auxdata))];
+ struct iovec iov = {};
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = cmsgbuf,
+ .msg_controllen = sizeof(cmsgbuf),
+ };
+ struct cmsghdr *cmsg;
+ bool checksum = true;
+ ssize_t buflen, len;
+ int r;
+
+ assert(s);
+ assert(client);
+
+ buflen = next_datagram_size_fd(fd);
+ if (buflen < 0)
+ return buflen;
+
+ packet = malloc0(buflen);
+ if (!packet)
+ return -ENOMEM;
+
+ iov = IOVEC_MAKE(packet, buflen);
+
+ len = recvmsg(fd, &msg, 0);
+ if (len < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return log_dhcp_client_errno(client, errno,
+ "Could not receive message from raw socket: %m");
+ } else if ((size_t)len < sizeof(DHCPPacket))
+ return 0;
+
+ CMSG_FOREACH(cmsg, &msg)
+ if (cmsg->cmsg_level == SOL_PACKET &&
+ cmsg->cmsg_type == PACKET_AUXDATA &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct tpacket_auxdata))) {
+ struct tpacket_auxdata *aux = (struct tpacket_auxdata*)CMSG_DATA(cmsg);
+
+ checksum = !(aux->tp_status & TP_STATUS_CSUMNOTREADY);
+ break;
+ }
+
+ r = dhcp_packet_verify_headers(packet, len, checksum, client->port);
+ if (r < 0)
+ return 0;
+
+ len -= DHCP_IP_UDP_SIZE;
+
+ return client_handle_message(client, &packet->dhcp, len);
+}
+
+int sd_dhcp_client_start(sd_dhcp_client *client) {
+ int r;
+
+ assert_return(client, -EINVAL);
+
+ r = client_initialize(client);
+ if (r < 0)
+ return r;
+
+ /* RFC7844 section 3.3:
+ SHOULD perform a complete four-way handshake, starting with a
+ DHCPDISCOVER, to obtain a new address lease. If the client can
+ ascertain that this is exactly the same network to which it was
+ previously connected, and if the link-layer address did not change,
+ the client MAY issue a DHCPREQUEST to try to reclaim the current
+ address. */
+ if (client->last_addr && !client->anonymize)
+ client->state = DHCP_STATE_INIT_REBOOT;
+
+ r = client_start(client);
+ if (r >= 0)
+ log_dhcp_client(client, "STARTED on ifindex %i", client->ifindex);
+
+ return r;
+}
+
+int sd_dhcp_client_stop(sd_dhcp_client *client) {
+ DHCP_CLIENT_DONT_DESTROY(client);
+
+ assert_return(client, -EINVAL);
+
+ client_stop(client, SD_DHCP_CLIENT_EVENT_STOP);
+ client->state = DHCP_STATE_STOPPED;
+
+ return 0;
+}
+
+int sd_dhcp_client_attach_event(sd_dhcp_client *client, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(!client->event, -EBUSY);
+
+ if (event)
+ client->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&client->event);
+ if (r < 0)
+ return 0;
+ }
+
+ client->event_priority = priority;
+
+ return 0;
+}
+
+int sd_dhcp_client_detach_event(sd_dhcp_client *client) {
+ assert_return(client, -EINVAL);
+
+ client->event = sd_event_unref(client->event);
+
+ return 0;
+}
+
+sd_event *sd_dhcp_client_get_event(sd_dhcp_client *client) {
+ assert_return(client, NULL);
+
+ return client->event;
+}
+
+static sd_dhcp_client *dhcp_client_free(sd_dhcp_client *client) {
+ assert(client);
+
+ log_dhcp_client(client, "FREE");
+
+ client->timeout_resend = sd_event_source_unref(client->timeout_resend);
+ client->timeout_t1 = sd_event_source_unref(client->timeout_t1);
+ client->timeout_t2 = sd_event_source_unref(client->timeout_t2);
+ client->timeout_expire = sd_event_source_unref(client->timeout_expire);
+
+ client_initialize(client);
+
+ sd_dhcp_client_detach_event(client);
+
+ sd_dhcp_lease_unref(client->lease);
+
+ free(client->req_opts);
+ free(client->hostname);
+ free(client->vendor_class_identifier);
+ client->user_class = strv_free(client->user_class);
+ return mfree(client);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_dhcp_client, sd_dhcp_client, dhcp_client_free);
+
+int sd_dhcp_client_new(sd_dhcp_client **ret, int anonymize) {
+ _cleanup_(sd_dhcp_client_unrefp) sd_dhcp_client *client = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ client = new(sd_dhcp_client, 1);
+ if (!client)
+ return -ENOMEM;
+
+ *client = (sd_dhcp_client) {
+ .n_ref = 1,
+ .state = DHCP_STATE_INIT,
+ .ifindex = -1,
+ .fd = -1,
+ .attempt = 1,
+ .mtu = DHCP_DEFAULT_MIN_SIZE,
+ .port = DHCP_PORT_CLIENT,
+ .anonymize = !!anonymize,
+ };
+ /* NOTE: this could be moved to a function. */
+ if (anonymize) {
+ client->req_opts_size = ELEMENTSOF(default_req_opts_anonymize);
+ client->req_opts = memdup(default_req_opts_anonymize, client->req_opts_size);
+ } else {
+ client->req_opts_size = ELEMENTSOF(default_req_opts);
+ client->req_opts = memdup(default_req_opts, client->req_opts_size);
+ }
+ if (!client->req_opts)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(client);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/sd-dhcp-lease.c b/src/libsystemd-network/sd-dhcp-lease.c
new file mode 100644
index 0000000..13badbf
--- /dev/null
+++ b/src/libsystemd-network/sd-dhcp-lease.c
@@ -0,0 +1,1309 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sd-dhcp-lease.h"
+
+#include "alloc-util.h"
+#include "dhcp-lease-internal.h"
+#include "dhcp-protocol.h"
+#include "dns-domain.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "network-internal.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "unaligned.h"
+
+int sd_dhcp_lease_get_address(sd_dhcp_lease *lease, struct in_addr *addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (lease->address == 0)
+ return -ENODATA;
+
+ addr->s_addr = lease->address;
+ return 0;
+}
+
+int sd_dhcp_lease_get_broadcast(sd_dhcp_lease *lease, struct in_addr *addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (!lease->have_broadcast)
+ return -ENODATA;
+
+ addr->s_addr = lease->broadcast;
+ return 0;
+}
+
+int sd_dhcp_lease_get_lifetime(sd_dhcp_lease *lease, uint32_t *lifetime) {
+ assert_return(lease, -EINVAL);
+ assert_return(lifetime, -EINVAL);
+
+ if (lease->lifetime <= 0)
+ return -ENODATA;
+
+ *lifetime = lease->lifetime;
+ return 0;
+}
+
+int sd_dhcp_lease_get_t1(sd_dhcp_lease *lease, uint32_t *t1) {
+ assert_return(lease, -EINVAL);
+ assert_return(t1, -EINVAL);
+
+ if (lease->t1 <= 0)
+ return -ENODATA;
+
+ *t1 = lease->t1;
+ return 0;
+}
+
+int sd_dhcp_lease_get_t2(sd_dhcp_lease *lease, uint32_t *t2) {
+ assert_return(lease, -EINVAL);
+ assert_return(t2, -EINVAL);
+
+ if (lease->t2 <= 0)
+ return -ENODATA;
+
+ *t2 = lease->t2;
+ return 0;
+}
+
+int sd_dhcp_lease_get_mtu(sd_dhcp_lease *lease, uint16_t *mtu) {
+ assert_return(lease, -EINVAL);
+ assert_return(mtu, -EINVAL);
+
+ if (lease->mtu <= 0)
+ return -ENODATA;
+
+ *mtu = lease->mtu;
+ return 0;
+}
+
+int sd_dhcp_lease_get_dns(sd_dhcp_lease *lease, const struct in_addr **addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (lease->dns_size <= 0)
+ return -ENODATA;
+
+ *addr = lease->dns;
+ return (int) lease->dns_size;
+}
+
+int sd_dhcp_lease_get_ntp(sd_dhcp_lease *lease, const struct in_addr **addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (lease->ntp_size <= 0)
+ return -ENODATA;
+
+ *addr = lease->ntp;
+ return (int) lease->ntp_size;
+}
+
+int sd_dhcp_lease_get_domainname(sd_dhcp_lease *lease, const char **domainname) {
+ assert_return(lease, -EINVAL);
+ assert_return(domainname, -EINVAL);
+
+ if (!lease->domainname)
+ return -ENODATA;
+
+ *domainname = lease->domainname;
+ return 0;
+}
+
+int sd_dhcp_lease_get_hostname(sd_dhcp_lease *lease, const char **hostname) {
+ assert_return(lease, -EINVAL);
+ assert_return(hostname, -EINVAL);
+
+ if (!lease->hostname)
+ return -ENODATA;
+
+ *hostname = lease->hostname;
+ return 0;
+}
+
+int sd_dhcp_lease_get_root_path(sd_dhcp_lease *lease, const char **root_path) {
+ assert_return(lease, -EINVAL);
+ assert_return(root_path, -EINVAL);
+
+ if (!lease->root_path)
+ return -ENODATA;
+
+ *root_path = lease->root_path;
+ return 0;
+}
+
+int sd_dhcp_lease_get_router(sd_dhcp_lease *lease, struct in_addr *addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (lease->router == 0)
+ return -ENODATA;
+
+ addr->s_addr = lease->router;
+ return 0;
+}
+
+int sd_dhcp_lease_get_netmask(sd_dhcp_lease *lease, struct in_addr *addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (!lease->have_subnet_mask)
+ return -ENODATA;
+
+ addr->s_addr = lease->subnet_mask;
+ return 0;
+}
+
+int sd_dhcp_lease_get_server_identifier(sd_dhcp_lease *lease, struct in_addr *addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (lease->server_address == 0)
+ return -ENODATA;
+
+ addr->s_addr = lease->server_address;
+ return 0;
+}
+
+int sd_dhcp_lease_get_next_server(sd_dhcp_lease *lease, struct in_addr *addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (lease->next_server == 0)
+ return -ENODATA;
+
+ addr->s_addr = lease->next_server;
+ return 0;
+}
+
+/*
+ * The returned routes array must be freed by the caller.
+ * Route objects have the same lifetime of the lease and must not be freed.
+ */
+int sd_dhcp_lease_get_routes(sd_dhcp_lease *lease, sd_dhcp_route ***routes) {
+ sd_dhcp_route **ret;
+ unsigned i;
+
+ assert_return(lease, -EINVAL);
+ assert_return(routes, -EINVAL);
+
+ if (lease->static_route_size <= 0)
+ return -ENODATA;
+
+ ret = new(sd_dhcp_route *, lease->static_route_size);
+ if (!ret)
+ return -ENOMEM;
+
+ for (i = 0; i < lease->static_route_size; i++)
+ ret[i] = &lease->static_route[i];
+
+ *routes = ret;
+ return (int) lease->static_route_size;
+}
+
+int sd_dhcp_lease_get_search_domains(sd_dhcp_lease *lease, char ***domains) {
+ size_t r;
+
+ assert_return(lease, -EINVAL);
+ assert_return(domains, -EINVAL);
+
+ r = strv_length(lease->search_domains);
+ if (r > 0) {
+ *domains = lease->search_domains;
+ return (int) r;
+ }
+
+ return -ENODATA;
+}
+
+int sd_dhcp_lease_get_vendor_specific(sd_dhcp_lease *lease, const void **data, size_t *data_len) {
+ assert_return(lease, -EINVAL);
+ assert_return(data, -EINVAL);
+ assert_return(data_len, -EINVAL);
+
+ if (lease->vendor_specific_len <= 0)
+ return -ENODATA;
+
+ *data = lease->vendor_specific;
+ *data_len = lease->vendor_specific_len;
+ return 0;
+}
+
+static sd_dhcp_lease *dhcp_lease_free(sd_dhcp_lease *lease) {
+ assert(lease);
+
+ while (lease->private_options) {
+ struct sd_dhcp_raw_option *option = lease->private_options;
+
+ LIST_REMOVE(options, lease->private_options, option);
+
+ free(option->data);
+ free(option);
+ }
+
+ free(lease->root_path);
+ free(lease->timezone);
+ free(lease->hostname);
+ free(lease->domainname);
+ free(lease->dns);
+ free(lease->ntp);
+ free(lease->static_route);
+ free(lease->client_id);
+ free(lease->vendor_specific);
+ strv_free(lease->search_domains);
+ return mfree(lease);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_dhcp_lease, sd_dhcp_lease, dhcp_lease_free);
+
+static int lease_parse_u32(const uint8_t *option, size_t len, uint32_t *ret, uint32_t min) {
+ assert(option);
+ assert(ret);
+
+ if (len != 4)
+ return -EINVAL;
+
+ *ret = unaligned_read_be32((be32_t*) option);
+ if (*ret < min)
+ *ret = min;
+
+ return 0;
+}
+
+static int lease_parse_u16(const uint8_t *option, size_t len, uint16_t *ret, uint16_t min) {
+ assert(option);
+ assert(ret);
+
+ if (len != 2)
+ return -EINVAL;
+
+ *ret = unaligned_read_be16((be16_t*) option);
+ if (*ret < min)
+ *ret = min;
+
+ return 0;
+}
+
+static int lease_parse_be32(const uint8_t *option, size_t len, be32_t *ret) {
+ assert(option);
+ assert(ret);
+
+ if (len != 4)
+ return -EINVAL;
+
+ memcpy(ret, option, 4);
+ return 0;
+}
+
+static int lease_parse_string(const uint8_t *option, size_t len, char **ret) {
+ assert(option);
+ assert(ret);
+
+ if (len <= 0)
+ *ret = mfree(*ret);
+ else {
+ char *string;
+
+ /*
+ * One trailing NUL byte is OK, we don't mind. See:
+ * https://github.com/systemd/systemd/issues/1337
+ */
+ if (memchr(option, 0, len - 1))
+ return -EINVAL;
+
+ string = strndup((const char *) option, len);
+ if (!string)
+ return -ENOMEM;
+
+ free_and_replace(*ret, string);
+ }
+
+ return 0;
+}
+
+static int lease_parse_domain(const uint8_t *option, size_t len, char **ret) {
+ _cleanup_free_ char *name = NULL, *normalized = NULL;
+ int r;
+
+ assert(option);
+ assert(ret);
+
+ r = lease_parse_string(option, len, &name);
+ if (r < 0)
+ return r;
+ if (!name) {
+ *ret = mfree(*ret);
+ return 0;
+ }
+
+ r = dns_name_normalize(name, 0, &normalized);
+ if (r < 0)
+ return r;
+
+ if (is_localhost(normalized))
+ return -EINVAL;
+
+ if (dns_name_is_root(normalized))
+ return -EINVAL;
+
+ free_and_replace(*ret, normalized);
+
+ return 0;
+}
+
+static void filter_bogus_addresses(struct in_addr *addresses, size_t *n) {
+ size_t i, j;
+
+ /* Silently filter DNS/NTP servers supplied to us that do not make outside of the local scope. */
+
+ for (i = 0, j = 0; i < *n; i ++) {
+
+ if (in4_addr_is_null(addresses+i) ||
+ in4_addr_is_localhost(addresses+i))
+ continue;
+
+ addresses[j++] = addresses[i];
+ }
+
+ *n = j;
+}
+
+static int lease_parse_in_addrs(const uint8_t *option, size_t len, struct in_addr **ret, size_t *n_ret) {
+ assert(option);
+ assert(ret);
+ assert(n_ret);
+
+ if (len <= 0) {
+ *ret = mfree(*ret);
+ *n_ret = 0;
+ } else {
+ size_t n_addresses;
+ struct in_addr *addresses;
+
+ if (len % 4 != 0)
+ return -EINVAL;
+
+ n_addresses = len / 4;
+
+ addresses = newdup(struct in_addr, option, n_addresses);
+ if (!addresses)
+ return -ENOMEM;
+
+ filter_bogus_addresses(addresses, &n_addresses);
+
+ free(*ret);
+ *ret = addresses;
+ *n_ret = n_addresses;
+ }
+
+ return 0;
+}
+
+static int lease_parse_routes(
+ const uint8_t *option, size_t len,
+ struct sd_dhcp_route **routes, size_t *routes_size, size_t *routes_allocated) {
+
+ struct in_addr addr;
+
+ assert(option || len <= 0);
+ assert(routes);
+ assert(routes_size);
+ assert(routes_allocated);
+
+ if (len <= 0)
+ return 0;
+
+ if (len % 8 != 0)
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(*routes, *routes_allocated, *routes_size + (len / 8)))
+ return -ENOMEM;
+
+ while (len >= 8) {
+ struct sd_dhcp_route *route = *routes + *routes_size;
+ int r;
+
+ route->option = SD_DHCP_OPTION_STATIC_ROUTE;
+ r = in4_addr_default_prefixlen((struct in_addr*) option, &route->dst_prefixlen);
+ if (r < 0) {
+ log_debug("Failed to determine destination prefix length from class based IP, ignoring");
+ continue;
+ }
+
+ assert_se(lease_parse_be32(option, 4, &addr.s_addr) >= 0);
+ route->dst_addr = inet_makeaddr(inet_netof(addr), 0);
+ option += 4;
+
+ assert_se(lease_parse_be32(option, 4, &route->gw_addr.s_addr) >= 0);
+ option += 4;
+
+ len -= 8;
+ (*routes_size)++;
+ }
+
+ return 0;
+}
+
+/* parses RFC3442 Classless Static Route Option */
+static int lease_parse_classless_routes(
+ const uint8_t *option, size_t len,
+ struct sd_dhcp_route **routes, size_t *routes_size, size_t *routes_allocated) {
+
+ assert(option || len <= 0);
+ assert(routes);
+ assert(routes_size);
+ assert(routes_allocated);
+
+ if (len <= 0)
+ return 0;
+
+ /* option format: (subnet-mask-width significant-subnet-octets gateway-ip)* */
+
+ while (len > 0) {
+ uint8_t dst_octets;
+ struct sd_dhcp_route *route;
+
+ if (!GREEDY_REALLOC(*routes, *routes_allocated, *routes_size + 1))
+ return -ENOMEM;
+
+ route = *routes + *routes_size;
+ route->option = SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE;
+
+ dst_octets = (*option == 0 ? 0 : ((*option - 1) / 8) + 1);
+ route->dst_prefixlen = *option;
+ option++;
+ len--;
+
+ /* can't have more than 4 octets in IPv4 */
+ if (dst_octets > 4 || len < dst_octets)
+ return -EINVAL;
+
+ route->dst_addr.s_addr = 0;
+ memcpy(&route->dst_addr.s_addr, option, dst_octets);
+ option += dst_octets;
+ len -= dst_octets;
+
+ if (len < 4)
+ return -EINVAL;
+
+ assert_se(lease_parse_be32(option, 4, &route->gw_addr.s_addr) >= 0);
+ option += 4;
+ len -= 4;
+
+ (*routes_size)++;
+ }
+
+ return 0;
+}
+
+int dhcp_lease_parse_options(uint8_t code, uint8_t len, const void *option, void *userdata) {
+ sd_dhcp_lease *lease = userdata;
+ int r;
+
+ assert(lease);
+
+ switch(code) {
+
+ case SD_DHCP_OPTION_IP_ADDRESS_LEASE_TIME:
+ r = lease_parse_u32(option, len, &lease->lifetime, 1);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse lease time, ignoring: %m");
+
+ break;
+
+ case SD_DHCP_OPTION_SERVER_IDENTIFIER:
+ r = lease_parse_be32(option, len, &lease->server_address);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse server identifier, ignoring: %m");
+
+ break;
+
+ case SD_DHCP_OPTION_SUBNET_MASK:
+ r = lease_parse_be32(option, len, &lease->subnet_mask);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse subnet mask, ignoring: %m");
+ else
+ lease->have_subnet_mask = true;
+ break;
+
+ case SD_DHCP_OPTION_BROADCAST:
+ r = lease_parse_be32(option, len, &lease->broadcast);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse broadcast address, ignoring: %m");
+ else
+ lease->have_broadcast = true;
+ break;
+
+ case SD_DHCP_OPTION_ROUTER:
+ if (len >= 4) {
+ r = lease_parse_be32(option, 4, &lease->router);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse router address, ignoring: %m");
+ }
+ break;
+
+ case SD_DHCP_OPTION_DOMAIN_NAME_SERVER:
+ r = lease_parse_in_addrs(option, len, &lease->dns, &lease->dns_size);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse DNS server, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_NTP_SERVER:
+ r = lease_parse_in_addrs(option, len, &lease->ntp, &lease->ntp_size);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse NTP server, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_STATIC_ROUTE:
+ r = lease_parse_routes(option, len, &lease->static_route, &lease->static_route_size, &lease->static_route_allocated);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse static routes, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_INTERFACE_MTU:
+ r = lease_parse_u16(option, len, &lease->mtu, 68);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse MTU, ignoring: %m");
+ if (lease->mtu < DHCP_DEFAULT_MIN_SIZE) {
+ log_debug("MTU value of %" PRIu16 " too small. Using default MTU value of %d instead.", lease->mtu, DHCP_DEFAULT_MIN_SIZE);
+ lease->mtu = DHCP_DEFAULT_MIN_SIZE;
+ }
+
+ break;
+
+ case SD_DHCP_OPTION_DOMAIN_NAME:
+ r = lease_parse_domain(option, len, &lease->domainname);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse domain name, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ case SD_DHCP_OPTION_DOMAIN_SEARCH_LIST:
+ r = dhcp_lease_parse_search_domains(option, len, &lease->search_domains);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse Domain Search List, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_HOST_NAME:
+ r = lease_parse_domain(option, len, &lease->hostname);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse host name, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ case SD_DHCP_OPTION_ROOT_PATH:
+ r = lease_parse_string(option, len, &lease->root_path);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse root path, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_RENEWAL_T1_TIME:
+ r = lease_parse_u32(option, len, &lease->t1, 1);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse T1 time, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_REBINDING_T2_TIME:
+ r = lease_parse_u32(option, len, &lease->t2, 1);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse T2 time, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE:
+ r = lease_parse_classless_routes(
+ option, len,
+ &lease->static_route,
+ &lease->static_route_size,
+ &lease->static_route_allocated);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse classless routes, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_NEW_TZDB_TIMEZONE: {
+ _cleanup_free_ char *tz = NULL;
+
+ r = lease_parse_string(option, len, &tz);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse timezone option, ignoring: %m");
+ return 0;
+ }
+
+ if (!timezone_is_valid(tz, LOG_DEBUG)) {
+ log_debug_errno(r, "Timezone is not valid, ignoring: %m");
+ return 0;
+ }
+
+ free_and_replace(lease->timezone, tz);
+
+ break;
+ }
+
+ case SD_DHCP_OPTION_VENDOR_SPECIFIC:
+
+ if (len <= 0)
+ lease->vendor_specific = mfree(lease->vendor_specific);
+ else {
+ void *p;
+
+ p = memdup(option, len);
+ if (!p)
+ return -ENOMEM;
+
+ free(lease->vendor_specific);
+ lease->vendor_specific = p;
+ }
+
+ lease->vendor_specific_len = len;
+ break;
+
+ case SD_DHCP_OPTION_PRIVATE_BASE ... SD_DHCP_OPTION_PRIVATE_LAST:
+ r = dhcp_lease_insert_private_option(lease, code, option, len);
+ if (r < 0)
+ return r;
+
+ break;
+
+ default:
+ log_debug("Ignoring option DHCP option %"PRIu8" while parsing.", code);
+ break;
+ }
+
+ return 0;
+}
+
+/* Parses compressed domain names. */
+int dhcp_lease_parse_search_domains(const uint8_t *option, size_t len, char ***domains) {
+ _cleanup_strv_free_ char **names = NULL;
+ size_t pos = 0, cnt = 0;
+ int r;
+
+ assert(domains);
+ assert_return(option && len > 0, -ENODATA);
+
+ while (pos < len) {
+ _cleanup_free_ char *name = NULL;
+ size_t n = 0, allocated = 0;
+ size_t jump_barrier = pos, next_chunk = 0;
+ bool first = true;
+
+ for (;;) {
+ uint8_t c;
+ c = option[pos++];
+
+ if (c == 0) {
+ /* End of name */
+ break;
+ } else if (c <= 63) {
+ const char *label;
+
+ /* Literal label */
+ label = (const char*) (option + pos);
+ pos += c;
+ if (pos >= len)
+ return -EBADMSG;
+
+ if (!GREEDY_REALLOC(name, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ if (first)
+ first = false;
+ else
+ name[n++] = '.';
+
+ r = dns_label_escape(label, c, name + n, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ n += r;
+ } else if ((c & 0xc0) == 0xc0) {
+ /* Pointer */
+
+ uint8_t d;
+ uint16_t ptr;
+
+ if (pos >= len)
+ return -EBADMSG;
+
+ d = option[pos++];
+ ptr = (uint16_t) (c & ~0xc0) << 8 | (uint16_t) d;
+
+ /* Jumps are limited to a "prior occurrence" (RFC-1035 4.1.4) */
+ if (ptr >= jump_barrier)
+ return -EBADMSG;
+ jump_barrier = ptr;
+
+ /* Save current location so we don't end up re-parsing what's parsed so far. */
+ if (next_chunk == 0)
+ next_chunk = pos;
+
+ pos = ptr;
+ } else
+ return -EBADMSG;
+ }
+
+ if (!GREEDY_REALLOC(name, allocated, n + 1))
+ return -ENOMEM;
+ name[n] = 0;
+
+ r = strv_extend(&names, name);
+ if (r < 0)
+ return r;
+
+ cnt++;
+
+ if (next_chunk != 0)
+ pos = next_chunk;
+ }
+
+ *domains = TAKE_PTR(names);
+
+ return cnt;
+}
+
+int dhcp_lease_insert_private_option(sd_dhcp_lease *lease, uint8_t tag, const void *data, uint8_t len) {
+ struct sd_dhcp_raw_option *cur, *option;
+
+ assert(lease);
+
+ LIST_FOREACH(options, cur, lease->private_options) {
+ if (tag < cur->tag)
+ break;
+ if (tag == cur->tag) {
+ log_debug("Ignoring duplicate option, tagged %i.", tag);
+ return 0;
+ }
+ }
+
+ option = new(struct sd_dhcp_raw_option, 1);
+ if (!option)
+ return -ENOMEM;
+
+ option->tag = tag;
+ option->length = len;
+ option->data = memdup(data, len);
+ if (!option->data) {
+ free(option);
+ return -ENOMEM;
+ }
+
+ LIST_INSERT_BEFORE(options, lease->private_options, cur, option);
+ return 0;
+}
+
+int dhcp_lease_new(sd_dhcp_lease **ret) {
+ sd_dhcp_lease *lease;
+
+ lease = new0(sd_dhcp_lease, 1);
+ if (!lease)
+ return -ENOMEM;
+
+ lease->router = INADDR_ANY;
+ lease->n_ref = 1;
+
+ *ret = lease;
+ return 0;
+}
+
+int dhcp_lease_save(sd_dhcp_lease *lease, const char *lease_file) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ struct sd_dhcp_raw_option *option;
+ struct in_addr address;
+ const struct in_addr *addresses;
+ const void *client_id, *data;
+ size_t client_id_len, data_len;
+ const char *string;
+ uint16_t mtu;
+ _cleanup_free_ sd_dhcp_route **routes = NULL;
+ char **search_domains = NULL;
+ uint32_t t1, t2, lifetime;
+ int r;
+
+ assert(lease);
+ assert(lease_file);
+
+ r = fopen_temporary(lease_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n");
+
+ r = sd_dhcp_lease_get_address(lease, &address);
+ if (r >= 0)
+ fprintf(f, "ADDRESS=%s\n", inet_ntoa(address));
+
+ r = sd_dhcp_lease_get_netmask(lease, &address);
+ if (r >= 0)
+ fprintf(f, "NETMASK=%s\n", inet_ntoa(address));
+
+ r = sd_dhcp_lease_get_router(lease, &address);
+ if (r >= 0)
+ fprintf(f, "ROUTER=%s\n", inet_ntoa(address));
+
+ r = sd_dhcp_lease_get_server_identifier(lease, &address);
+ if (r >= 0)
+ fprintf(f, "SERVER_ADDRESS=%s\n", inet_ntoa(address));
+
+ r = sd_dhcp_lease_get_next_server(lease, &address);
+ if (r >= 0)
+ fprintf(f, "NEXT_SERVER=%s\n", inet_ntoa(address));
+
+ r = sd_dhcp_lease_get_broadcast(lease, &address);
+ if (r >= 0)
+ fprintf(f, "BROADCAST=%s\n", inet_ntoa(address));
+
+ r = sd_dhcp_lease_get_mtu(lease, &mtu);
+ if (r >= 0)
+ fprintf(f, "MTU=%" PRIu16 "\n", mtu);
+
+ r = sd_dhcp_lease_get_t1(lease, &t1);
+ if (r >= 0)
+ fprintf(f, "T1=%" PRIu32 "\n", t1);
+
+ r = sd_dhcp_lease_get_t2(lease, &t2);
+ if (r >= 0)
+ fprintf(f, "T2=%" PRIu32 "\n", t2);
+
+ r = sd_dhcp_lease_get_lifetime(lease, &lifetime);
+ if (r >= 0)
+ fprintf(f, "LIFETIME=%" PRIu32 "\n", lifetime);
+
+ r = sd_dhcp_lease_get_dns(lease, &addresses);
+ if (r > 0) {
+ fputs("DNS=", f);
+ serialize_in_addrs(f, addresses, r);
+ fputs("\n", f);
+ }
+
+ r = sd_dhcp_lease_get_ntp(lease, &addresses);
+ if (r > 0) {
+ fputs("NTP=", f);
+ serialize_in_addrs(f, addresses, r);
+ fputs("\n", f);
+ }
+
+ r = sd_dhcp_lease_get_domainname(lease, &string);
+ if (r >= 0)
+ fprintf(f, "DOMAINNAME=%s\n", string);
+
+ r = sd_dhcp_lease_get_search_domains(lease, &search_domains);
+ if (r > 0) {
+ fputs("DOMAIN_SEARCH_LIST=", f);
+ fputstrv(f, search_domains, NULL, NULL);
+ fputs("\n", f);
+ }
+
+ r = sd_dhcp_lease_get_hostname(lease, &string);
+ if (r >= 0)
+ fprintf(f, "HOSTNAME=%s\n", string);
+
+ r = sd_dhcp_lease_get_root_path(lease, &string);
+ if (r >= 0)
+ fprintf(f, "ROOT_PATH=%s\n", string);
+
+ r = sd_dhcp_lease_get_routes(lease, &routes);
+ if (r > 0)
+ serialize_dhcp_routes(f, "ROUTES", routes, r);
+
+ r = sd_dhcp_lease_get_timezone(lease, &string);
+ if (r >= 0)
+ fprintf(f, "TIMEZONE=%s\n", string);
+
+ r = sd_dhcp_lease_get_client_id(lease, &client_id, &client_id_len);
+ if (r >= 0) {
+ _cleanup_free_ char *client_id_hex = NULL;
+
+ client_id_hex = hexmem(client_id, client_id_len);
+ if (!client_id_hex) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ fprintf(f, "CLIENTID=%s\n", client_id_hex);
+ }
+
+ r = sd_dhcp_lease_get_vendor_specific(lease, &data, &data_len);
+ if (r >= 0) {
+ _cleanup_free_ char *option_hex = NULL;
+
+ option_hex = hexmem(data, data_len);
+ if (!option_hex) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ fprintf(f, "VENDOR_SPECIFIC=%s\n", option_hex);
+ }
+
+ LIST_FOREACH(options, option, lease->private_options) {
+ char key[STRLEN("OPTION_000")+1];
+
+ xsprintf(key, "OPTION_%" PRIu8, option->tag);
+ r = serialize_dhcp_option(f, key, option->data, option->length);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, lease_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save lease data %s: %m", lease_file);
+}
+
+int dhcp_lease_load(sd_dhcp_lease **ret, const char *lease_file) {
+
+ _cleanup_(sd_dhcp_lease_unrefp) sd_dhcp_lease *lease = NULL;
+ _cleanup_free_ char
+ *address = NULL,
+ *router = NULL,
+ *netmask = NULL,
+ *server_address = NULL,
+ *next_server = NULL,
+ *broadcast = NULL,
+ *dns = NULL,
+ *ntp = NULL,
+ *mtu = NULL,
+ *routes = NULL,
+ *domains = NULL,
+ *client_id_hex = NULL,
+ *vendor_specific_hex = NULL,
+ *lifetime = NULL,
+ *t1 = NULL,
+ *t2 = NULL,
+ *options[SD_DHCP_OPTION_PRIVATE_LAST - SD_DHCP_OPTION_PRIVATE_BASE + 1] = {};
+
+ int r, i;
+
+ assert(lease_file);
+ assert(ret);
+
+ r = dhcp_lease_new(&lease);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, lease_file,
+ "ADDRESS", &address,
+ "ROUTER", &router,
+ "NETMASK", &netmask,
+ "SERVER_IDENTIFIER", &server_address,
+ "NEXT_SERVER", &next_server,
+ "BROADCAST", &broadcast,
+ "DNS", &dns,
+ "NTP", &ntp,
+ "MTU", &mtu,
+ "DOMAINNAME", &lease->domainname,
+ "HOSTNAME", &lease->hostname,
+ "DOMAIN_SEARCH_LIST", &domains,
+ "ROOT_PATH", &lease->root_path,
+ "ROUTES", &routes,
+ "CLIENTID", &client_id_hex,
+ "TIMEZONE", &lease->timezone,
+ "VENDOR_SPECIFIC", &vendor_specific_hex,
+ "LIFETIME", &lifetime,
+ "T1", &t1,
+ "T2", &t2,
+ "OPTION_224", &options[0],
+ "OPTION_225", &options[1],
+ "OPTION_226", &options[2],
+ "OPTION_227", &options[3],
+ "OPTION_228", &options[4],
+ "OPTION_229", &options[5],
+ "OPTION_230", &options[6],
+ "OPTION_231", &options[7],
+ "OPTION_232", &options[8],
+ "OPTION_233", &options[9],
+ "OPTION_234", &options[10],
+ "OPTION_235", &options[11],
+ "OPTION_236", &options[12],
+ "OPTION_237", &options[13],
+ "OPTION_238", &options[14],
+ "OPTION_239", &options[15],
+ "OPTION_240", &options[16],
+ "OPTION_241", &options[17],
+ "OPTION_242", &options[18],
+ "OPTION_243", &options[19],
+ "OPTION_244", &options[20],
+ "OPTION_245", &options[21],
+ "OPTION_246", &options[22],
+ "OPTION_247", &options[23],
+ "OPTION_248", &options[24],
+ "OPTION_249", &options[25],
+ "OPTION_250", &options[26],
+ "OPTION_251", &options[27],
+ "OPTION_252", &options[28],
+ "OPTION_253", &options[29],
+ "OPTION_254", &options[30]);
+ if (r < 0)
+ return r;
+
+ if (address) {
+ r = inet_pton(AF_INET, address, &lease->address);
+ if (r <= 0)
+ log_debug("Failed to parse address %s, ignoring.", address);
+ }
+
+ if (router) {
+ r = inet_pton(AF_INET, router, &lease->router);
+ if (r <= 0)
+ log_debug("Failed to parse router %s, ignoring.", router);
+ }
+
+ if (netmask) {
+ r = inet_pton(AF_INET, netmask, &lease->subnet_mask);
+ if (r <= 0)
+ log_debug("Failed to parse netmask %s, ignoring.", netmask);
+ else
+ lease->have_subnet_mask = true;
+ }
+
+ if (server_address) {
+ r = inet_pton(AF_INET, server_address, &lease->server_address);
+ if (r <= 0)
+ log_debug("Failed to parse server address %s, ignoring.", server_address);
+ }
+
+ if (next_server) {
+ r = inet_pton(AF_INET, next_server, &lease->next_server);
+ if (r <= 0)
+ log_debug("Failed to parse next server %s, ignoring.", next_server);
+ }
+
+ if (broadcast) {
+ r = inet_pton(AF_INET, broadcast, &lease->broadcast);
+ if (r <= 0)
+ log_debug("Failed to parse broadcast address %s, ignoring.", broadcast);
+ else
+ lease->have_broadcast = true;
+ }
+
+ if (dns) {
+ r = deserialize_in_addrs(&lease->dns, dns);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deserialize DNS servers %s, ignoring: %m", dns);
+ else
+ lease->dns_size = r;
+ }
+
+ if (ntp) {
+ r = deserialize_in_addrs(&lease->ntp, ntp);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deserialize NTP servers %s, ignoring: %m", ntp);
+ else
+ lease->ntp_size = r;
+ }
+
+ if (mtu) {
+ r = safe_atou16(mtu, &lease->mtu);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse MTU %s, ignoring: %m", mtu);
+ }
+
+ if (domains) {
+ _cleanup_strv_free_ char **a = NULL;
+ a = strv_split(domains, " ");
+ if (!a)
+ return -ENOMEM;
+
+ if (!strv_isempty(a)) {
+ lease->search_domains = a;
+ a = NULL;
+ }
+ }
+
+ if (routes) {
+ r = deserialize_dhcp_routes(
+ &lease->static_route,
+ &lease->static_route_size,
+ &lease->static_route_allocated,
+ routes);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse DHCP routes %s, ignoring: %m", routes);
+ }
+
+ if (lifetime) {
+ r = safe_atou32(lifetime, &lease->lifetime);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse lifetime %s, ignoring: %m", lifetime);
+ }
+
+ if (t1) {
+ r = safe_atou32(t1, &lease->t1);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse T1 %s, ignoring: %m", t1);
+ }
+
+ if (t2) {
+ r = safe_atou32(t2, &lease->t2);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse T2 %s, ignoring: %m", t2);
+ }
+
+ if (client_id_hex) {
+ r = unhexmem(client_id_hex, (size_t) -1, &lease->client_id, &lease->client_id_len);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse client ID %s, ignoring: %m", client_id_hex);
+ }
+
+ if (vendor_specific_hex) {
+ r = unhexmem(vendor_specific_hex, (size_t) -1, &lease->vendor_specific, &lease->vendor_specific_len);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse vendor specific data %s, ignoring: %m", vendor_specific_hex);
+ }
+
+ for (i = 0; i <= SD_DHCP_OPTION_PRIVATE_LAST - SD_DHCP_OPTION_PRIVATE_BASE; i++) {
+ _cleanup_free_ void *data = NULL;
+ size_t len;
+
+ if (!options[i])
+ continue;
+
+ r = unhexmem(options[i], (size_t) -1, &data, &len);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse private DHCP option %s, ignoring: %m", options[i]);
+ continue;
+ }
+
+ r = dhcp_lease_insert_private_option(lease, SD_DHCP_OPTION_PRIVATE_BASE + i, data, len);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(lease);
+
+ return 0;
+}
+
+int dhcp_lease_set_default_subnet_mask(sd_dhcp_lease *lease) {
+ struct in_addr address, mask;
+ int r;
+
+ assert(lease);
+
+ if (lease->address == 0)
+ return -ENODATA;
+
+ address.s_addr = lease->address;
+
+ /* fall back to the default subnet masks based on address class */
+ r = in4_addr_default_subnet_mask(&address, &mask);
+ if (r < 0)
+ return r;
+
+ lease->subnet_mask = mask.s_addr;
+ lease->have_subnet_mask = true;
+
+ return 0;
+}
+
+int sd_dhcp_lease_get_client_id(sd_dhcp_lease *lease, const void **client_id, size_t *client_id_len) {
+ assert_return(lease, -EINVAL);
+ assert_return(client_id, -EINVAL);
+ assert_return(client_id_len, -EINVAL);
+
+ if (!lease->client_id)
+ return -ENODATA;
+
+ *client_id = lease->client_id;
+ *client_id_len = lease->client_id_len;
+
+ return 0;
+}
+
+int dhcp_lease_set_client_id(sd_dhcp_lease *lease, const void *client_id, size_t client_id_len) {
+ assert_return(lease, -EINVAL);
+ assert_return(client_id || client_id_len <= 0, -EINVAL);
+
+ if (client_id_len <= 0)
+ lease->client_id = mfree(lease->client_id);
+ else {
+ void *p;
+
+ p = memdup(client_id, client_id_len);
+ if (!p)
+ return -ENOMEM;
+
+ free(lease->client_id);
+ lease->client_id = p;
+ lease->client_id_len = client_id_len;
+ }
+
+ return 0;
+}
+
+int sd_dhcp_lease_get_timezone(sd_dhcp_lease *lease, const char **tz) {
+ assert_return(lease, -EINVAL);
+ assert_return(tz, -EINVAL);
+
+ if (!lease->timezone)
+ return -ENODATA;
+
+ *tz = lease->timezone;
+ return 0;
+}
+
+int sd_dhcp_route_get_destination(sd_dhcp_route *route, struct in_addr *destination) {
+ assert_return(route, -EINVAL);
+ assert_return(destination, -EINVAL);
+
+ *destination = route->dst_addr;
+ return 0;
+}
+
+int sd_dhcp_route_get_destination_prefix_length(sd_dhcp_route *route, uint8_t *length) {
+ assert_return(route, -EINVAL);
+ assert_return(length, -EINVAL);
+
+ *length = route->dst_prefixlen;
+ return 0;
+}
+
+int sd_dhcp_route_get_gateway(sd_dhcp_route *route, struct in_addr *gateway) {
+ assert_return(route, -EINVAL);
+ assert_return(gateway, -EINVAL);
+
+ *gateway = route->gw_addr;
+ return 0;
+}
+
+int sd_dhcp_route_get_option(sd_dhcp_route *route) {
+ assert_return(route, -EINVAL);
+
+ return route->option;
+}
diff --git a/src/libsystemd-network/sd-dhcp-server.c b/src/libsystemd-network/sd-dhcp-server.c
new file mode 100644
index 0000000..7cb44d1
--- /dev/null
+++ b/src/libsystemd-network/sd-dhcp-server.c
@@ -0,0 +1,1136 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <sys/ioctl.h>
+
+#include "sd-dhcp-server.h"
+
+#include "alloc-util.h"
+#include "dhcp-internal.h"
+#include "dhcp-server-internal.h"
+#include "fd-util.h"
+#include "in-addr-util.h"
+#include "io-util.h"
+#include "sd-id128.h"
+#include "siphash24.h"
+#include "string-util.h"
+#include "unaligned.h"
+
+#define DHCP_DEFAULT_LEASE_TIME_USEC USEC_PER_HOUR
+#define DHCP_MAX_LEASE_TIME_USEC (USEC_PER_HOUR*12)
+
+static DHCPLease *dhcp_lease_free(DHCPLease *lease) {
+ if (!lease)
+ return NULL;
+
+ free(lease->client_id.data);
+ return mfree(lease);
+}
+
+/* configures the server's address and subnet, and optionally the pool's size and offset into the subnet
+ * the whole pool must fit into the subnet, and may not contain the first (any) nor last (broadcast) address
+ * moreover, the server's own address may be in the pool, and is in that case reserved in order not to
+ * accidentally hand it out */
+int sd_dhcp_server_configure_pool(sd_dhcp_server *server, struct in_addr *address, unsigned char prefixlen, uint32_t offset, uint32_t size) {
+ struct in_addr netmask_addr;
+ be32_t netmask;
+ uint32_t server_off, broadcast_off, size_max;
+
+ assert_return(server, -EINVAL);
+ assert_return(address, -EINVAL);
+ assert_return(address->s_addr != INADDR_ANY, -EINVAL);
+ assert_return(prefixlen <= 32, -ERANGE);
+
+ assert_se(in4_addr_prefixlen_to_netmask(&netmask_addr, prefixlen));
+ netmask = netmask_addr.s_addr;
+
+ server_off = be32toh(address->s_addr & ~netmask);
+ broadcast_off = be32toh(~netmask);
+
+ /* the server address cannot be the subnet address */
+ assert_return(server_off != 0, -ERANGE);
+
+ /* nor the broadcast address */
+ assert_return(server_off != broadcast_off, -ERANGE);
+
+ /* 0 offset means we should set a default, we skip the first (subnet) address
+ and take the next one */
+ if (offset == 0)
+ offset = 1;
+
+ size_max = (broadcast_off + 1) /* the number of addresses in the subnet */
+ - offset /* exclude the addresses before the offset */
+ - 1; /* exclude the last (broadcast) address */
+
+ /* The pool must contain at least one address */
+ assert_return(size_max >= 1, -ERANGE);
+
+ if (size != 0)
+ assert_return(size <= size_max, -ERANGE);
+ else
+ size = size_max;
+
+ if (server->address != address->s_addr || server->netmask != netmask || server->pool_size != size || server->pool_offset != offset) {
+
+ free(server->bound_leases);
+ server->bound_leases = new0(DHCPLease*, size);
+ if (!server->bound_leases)
+ return -ENOMEM;
+
+ server->pool_offset = offset;
+ server->pool_size = size;
+
+ server->address = address->s_addr;
+ server->netmask = netmask;
+ server->subnet = address->s_addr & netmask;
+
+ if (server_off >= offset && server_off - offset < size)
+ server->bound_leases[server_off - offset] = &server->invalid_lease;
+
+ /* Drop any leases associated with the old address range */
+ hashmap_clear(server->leases_by_client_id);
+ }
+
+ return 0;
+}
+
+int sd_dhcp_server_is_running(sd_dhcp_server *server) {
+ assert_return(server, false);
+
+ return !!server->receive_message;
+}
+
+void client_id_hash_func(const DHCPClientId *id, struct siphash *state) {
+ assert(id);
+ assert(id->length);
+ assert(id->data);
+
+ siphash24_compress(&id->length, sizeof(id->length), state);
+ siphash24_compress(id->data, id->length, state);
+}
+
+int client_id_compare_func(const DHCPClientId *a, const DHCPClientId *b) {
+ int r;
+
+ assert(!a->length || a->data);
+ assert(!b->length || b->data);
+
+ r = CMP(a->length, b->length);
+ if (r != 0)
+ return r;
+
+ return memcmp(a->data, b->data, a->length);
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(dhcp_lease_hash_ops, DHCPClientId, client_id_hash_func, client_id_compare_func,
+ DHCPLease, dhcp_lease_free);
+
+static sd_dhcp_server *dhcp_server_free(sd_dhcp_server *server) {
+ assert(server);
+
+ log_dhcp_server(server, "UNREF");
+
+ sd_dhcp_server_stop(server);
+
+ sd_event_unref(server->event);
+
+ free(server->timezone);
+ free(server->dns);
+ free(server->ntp);
+
+ hashmap_free(server->leases_by_client_id);
+
+ free(server->bound_leases);
+ return mfree(server);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_dhcp_server, sd_dhcp_server, dhcp_server_free);
+
+int sd_dhcp_server_new(sd_dhcp_server **ret, int ifindex) {
+ _cleanup_(sd_dhcp_server_unrefp) sd_dhcp_server *server = NULL;
+
+ assert_return(ret, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+
+ server = new0(sd_dhcp_server, 1);
+ if (!server)
+ return -ENOMEM;
+
+ server->n_ref = 1;
+ server->fd_raw = -1;
+ server->fd = -1;
+ server->address = htobe32(INADDR_ANY);
+ server->netmask = htobe32(INADDR_ANY);
+ server->ifindex = ifindex;
+
+ server->leases_by_client_id = hashmap_new(&dhcp_lease_hash_ops);
+ if (!server->leases_by_client_id)
+ return -ENOMEM;
+
+ server->default_lease_time = DIV_ROUND_UP(DHCP_DEFAULT_LEASE_TIME_USEC, USEC_PER_SEC);
+ server->max_lease_time = DIV_ROUND_UP(DHCP_MAX_LEASE_TIME_USEC, USEC_PER_SEC);
+
+ *ret = TAKE_PTR(server);
+
+ return 0;
+}
+
+int sd_dhcp_server_attach_event(sd_dhcp_server *server, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(server, -EINVAL);
+ assert_return(!server->event, -EBUSY);
+
+ if (event)
+ server->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&server->event);
+ if (r < 0)
+ return r;
+ }
+
+ server->event_priority = priority;
+
+ return 0;
+}
+
+int sd_dhcp_server_detach_event(sd_dhcp_server *server) {
+ assert_return(server, -EINVAL);
+
+ server->event = sd_event_unref(server->event);
+
+ return 0;
+}
+
+sd_event *sd_dhcp_server_get_event(sd_dhcp_server *server) {
+ assert_return(server, NULL);
+
+ return server->event;
+}
+
+int sd_dhcp_server_stop(sd_dhcp_server *server) {
+ assert_return(server, -EINVAL);
+
+ server->receive_message =
+ sd_event_source_unref(server->receive_message);
+
+ server->fd_raw = safe_close(server->fd_raw);
+ server->fd = safe_close(server->fd);
+
+ log_dhcp_server(server, "STOPPED");
+
+ return 0;
+}
+
+static int dhcp_server_send_unicast_raw(sd_dhcp_server *server,
+ DHCPPacket *packet, size_t len) {
+ union sockaddr_union link = {
+ .ll.sll_family = AF_PACKET,
+ .ll.sll_protocol = htobe16(ETH_P_IP),
+ .ll.sll_ifindex = server->ifindex,
+ .ll.sll_halen = ETH_ALEN,
+ };
+
+ assert(server);
+ assert(server->ifindex > 0);
+ assert(server->address);
+ assert(packet);
+ assert(len > sizeof(DHCPPacket));
+
+ memcpy(&link.ll.sll_addr, &packet->dhcp.chaddr, ETH_ALEN);
+
+ dhcp_packet_append_ip_headers(packet, server->address, DHCP_PORT_SERVER,
+ packet->dhcp.yiaddr,
+ DHCP_PORT_CLIENT, len);
+
+ return dhcp_network_send_raw_socket(server->fd_raw, &link, packet, len);
+}
+
+static int dhcp_server_send_udp(sd_dhcp_server *server, be32_t destination,
+ uint16_t destination_port,
+ DHCPMessage *message, size_t len) {
+ union sockaddr_union dest = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(destination_port),
+ .in.sin_addr.s_addr = destination,
+ };
+ struct iovec iov = {
+ .iov_base = message,
+ .iov_len = len,
+ };
+ uint8_t cmsgbuf[CMSG_LEN(sizeof(struct in_pktinfo))] = {};
+ struct msghdr msg = {
+ .msg_name = &dest,
+ .msg_namelen = sizeof(dest.in),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = cmsgbuf,
+ .msg_controllen = sizeof(cmsgbuf),
+ };
+ struct cmsghdr *cmsg;
+ struct in_pktinfo *pktinfo;
+
+ assert(server);
+ assert(server->fd >= 0);
+ assert(message);
+ assert(len > sizeof(DHCPMessage));
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ assert(cmsg);
+
+ cmsg->cmsg_level = IPPROTO_IP;
+ cmsg->cmsg_type = IP_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
+
+ /* we attach source interface and address info to the message
+ rather than binding the socket. This will be mostly useful
+ when we gain support for arbitrary number of server addresses
+ */
+ pktinfo = (struct in_pktinfo*) CMSG_DATA(cmsg);
+ assert(pktinfo);
+
+ pktinfo->ipi_ifindex = server->ifindex;
+ pktinfo->ipi_spec_dst.s_addr = server->address;
+
+ if (sendmsg(server->fd, &msg, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static bool requested_broadcast(DHCPRequest *req) {
+ assert(req);
+
+ return req->message->flags & htobe16(0x8000);
+}
+
+int dhcp_server_send_packet(sd_dhcp_server *server,
+ DHCPRequest *req, DHCPPacket *packet,
+ int type, size_t optoffset) {
+ be32_t destination = INADDR_ANY;
+ uint16_t destination_port = DHCP_PORT_CLIENT;
+ int r;
+
+ assert(server);
+ assert(req);
+ assert(req->max_optlen);
+ assert(optoffset <= req->max_optlen);
+ assert(packet);
+
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &optoffset, 0,
+ SD_DHCP_OPTION_SERVER_IDENTIFIER,
+ 4, &server->address);
+ if (r < 0)
+ return r;
+
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &optoffset, 0,
+ SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ /* RFC 2131 Section 4.1
+
+ If the ’giaddr’ field in a DHCP message from a client is non-zero,
+ the server sends any return messages to the ’DHCP server’ port on the
+ BOOTP relay agent whose address appears in ’giaddr’. If the ’giaddr’
+ field is zero and the ’ciaddr’ field is nonzero, then the server
+ unicasts DHCPOFFER and DHCPACK messages to the address in ’ciaddr’.
+ If ’giaddr’ is zero and ’ciaddr’ is zero, and the broadcast bit is
+ set, then the server broadcasts DHCPOFFER and DHCPACK messages to
+ 0xffffffff. If the broadcast bit is not set and ’giaddr’ is zero and
+ ’ciaddr’ is zero, then the server unicasts DHCPOFFER and DHCPACK
+ messages to the client’s hardware address and ’yiaddr’ address. In
+ all cases, when ’giaddr’ is zero, the server broadcasts any DHCPNAK
+ messages to 0xffffffff.
+
+ Section 4.3.2
+
+ If ’giaddr’ is set in the DHCPREQUEST message, the client is on a
+ different subnet. The server MUST set the broadcast bit in the
+ DHCPNAK, so that the relay agent will broadcast the DHCPNAK to the
+ client, because the client may not have a correct network address
+ or subnet mask, and the client may not be answering ARP requests.
+ */
+ if (req->message->giaddr) {
+ destination = req->message->giaddr;
+ destination_port = DHCP_PORT_SERVER;
+ if (type == DHCP_NAK)
+ packet->dhcp.flags = htobe16(0x8000);
+ } else if (req->message->ciaddr && type != DHCP_NAK)
+ destination = req->message->ciaddr;
+
+ if (destination != INADDR_ANY)
+ return dhcp_server_send_udp(server, destination,
+ destination_port, &packet->dhcp,
+ sizeof(DHCPMessage) + optoffset);
+ else if (requested_broadcast(req) || type == DHCP_NAK)
+ return dhcp_server_send_udp(server, INADDR_BROADCAST,
+ destination_port, &packet->dhcp,
+ sizeof(DHCPMessage) + optoffset);
+ else
+ /* we cannot send UDP packet to specific MAC address when the
+ address is not yet configured, so must fall back to raw
+ packets */
+ return dhcp_server_send_unicast_raw(server, packet,
+ sizeof(DHCPPacket) + optoffset);
+}
+
+static int server_message_init(sd_dhcp_server *server, DHCPPacket **ret,
+ uint8_t type, size_t *_optoffset,
+ DHCPRequest *req) {
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ size_t optoffset = 0;
+ int r;
+
+ assert(server);
+ assert(ret);
+ assert(_optoffset);
+ assert(IN_SET(type, DHCP_OFFER, DHCP_ACK, DHCP_NAK));
+
+ packet = malloc0(sizeof(DHCPPacket) + req->max_optlen);
+ if (!packet)
+ return -ENOMEM;
+
+ r = dhcp_message_init(&packet->dhcp, BOOTREPLY,
+ be32toh(req->message->xid), type, ARPHRD_ETHER,
+ req->max_optlen, &optoffset);
+ if (r < 0)
+ return r;
+
+ packet->dhcp.flags = req->message->flags;
+ packet->dhcp.giaddr = req->message->giaddr;
+ memcpy(&packet->dhcp.chaddr, &req->message->chaddr, ETH_ALEN);
+
+ *_optoffset = optoffset;
+ *ret = TAKE_PTR(packet);
+
+ return 0;
+}
+
+static int server_send_offer(sd_dhcp_server *server, DHCPRequest *req,
+ be32_t address) {
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ size_t offset;
+ be32_t lease_time;
+ int r;
+
+ r = server_message_init(server, &packet, DHCP_OFFER, &offset, req);
+ if (r < 0)
+ return r;
+
+ packet->dhcp.yiaddr = address;
+
+ lease_time = htobe32(req->lifetime);
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_IP_ADDRESS_LEASE_TIME, 4,
+ &lease_time);
+ if (r < 0)
+ return r;
+
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_SUBNET_MASK, 4, &server->netmask);
+ if (r < 0)
+ return r;
+
+ if (server->emit_router) {
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_ROUTER, 4, &server->address);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp_server_send_packet(server, req, packet, DHCP_OFFER, offset);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int server_send_ack(sd_dhcp_server *server, DHCPRequest *req,
+ be32_t address) {
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ size_t offset;
+ be32_t lease_time;
+ int r;
+
+ r = server_message_init(server, &packet, DHCP_ACK, &offset, req);
+ if (r < 0)
+ return r;
+
+ packet->dhcp.yiaddr = address;
+
+ lease_time = htobe32(req->lifetime);
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_IP_ADDRESS_LEASE_TIME, 4,
+ &lease_time);
+ if (r < 0)
+ return r;
+
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_SUBNET_MASK, 4, &server->netmask);
+ if (r < 0)
+ return r;
+
+ if (server->emit_router) {
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_ROUTER, 4, &server->address);
+ if (r < 0)
+ return r;
+ }
+
+ if (server->n_dns > 0) {
+ r = dhcp_option_append(
+ &packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_DOMAIN_NAME_SERVER,
+ sizeof(struct in_addr) * server->n_dns, server->dns);
+ if (r < 0)
+ return r;
+ }
+
+ if (server->n_ntp > 0) {
+ r = dhcp_option_append(
+ &packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_NTP_SERVER,
+ sizeof(struct in_addr) * server->n_ntp, server->ntp);
+ if (r < 0)
+ return r;
+ }
+
+ if (server->timezone) {
+ r = dhcp_option_append(
+ &packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_NEW_TZDB_TIMEZONE,
+ strlen(server->timezone), server->timezone);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp_server_send_packet(server, req, packet, DHCP_ACK, offset);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int server_send_nak(sd_dhcp_server *server, DHCPRequest *req) {
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ size_t offset;
+ int r;
+
+ r = server_message_init(server, &packet, DHCP_NAK, &offset, req);
+ if (r < 0)
+ return r;
+
+ return dhcp_server_send_packet(server, req, packet, DHCP_NAK, offset);
+}
+
+static int server_send_forcerenew(sd_dhcp_server *server, be32_t address,
+ be32_t gateway, uint8_t chaddr[]) {
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ size_t optoffset = 0;
+ int r;
+
+ assert(server);
+ assert(address != INADDR_ANY);
+ assert(chaddr);
+
+ packet = malloc0(sizeof(DHCPPacket) + DHCP_MIN_OPTIONS_SIZE);
+ if (!packet)
+ return -ENOMEM;
+
+ r = dhcp_message_init(&packet->dhcp, BOOTREPLY, 0,
+ DHCP_FORCERENEW, ARPHRD_ETHER,
+ DHCP_MIN_OPTIONS_SIZE, &optoffset);
+ if (r < 0)
+ return r;
+
+ r = dhcp_option_append(&packet->dhcp, DHCP_MIN_OPTIONS_SIZE,
+ &optoffset, 0, SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ memcpy(&packet->dhcp.chaddr, chaddr, ETH_ALEN);
+
+ r = dhcp_server_send_udp(server, address, DHCP_PORT_CLIENT,
+ &packet->dhcp,
+ sizeof(DHCPMessage) + optoffset);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int parse_request(uint8_t code, uint8_t len, const void *option, void *userdata) {
+ DHCPRequest *req = userdata;
+
+ assert(req);
+
+ switch(code) {
+ case SD_DHCP_OPTION_IP_ADDRESS_LEASE_TIME:
+ if (len == 4)
+ req->lifetime = unaligned_read_be32(option);
+
+ break;
+ case SD_DHCP_OPTION_REQUESTED_IP_ADDRESS:
+ if (len == 4)
+ memcpy(&req->requested_ip, option, sizeof(be32_t));
+
+ break;
+ case SD_DHCP_OPTION_SERVER_IDENTIFIER:
+ if (len == 4)
+ memcpy(&req->server_id, option, sizeof(be32_t));
+
+ break;
+ case SD_DHCP_OPTION_CLIENT_IDENTIFIER:
+ if (len >= 2) {
+ uint8_t *data;
+
+ data = memdup(option, len);
+ if (!data)
+ return -ENOMEM;
+
+ free(req->client_id.data);
+ req->client_id.data = data;
+ req->client_id.length = len;
+ }
+
+ break;
+ case SD_DHCP_OPTION_MAXIMUM_MESSAGE_SIZE:
+
+ if (len == 2 && unaligned_read_be16(option) >= sizeof(DHCPPacket))
+ req->max_optlen = unaligned_read_be16(option) - sizeof(DHCPPacket);
+
+ break;
+ }
+
+ return 0;
+}
+
+static void dhcp_request_free(DHCPRequest *req) {
+ if (!req)
+ return;
+
+ free(req->client_id.data);
+ free(req);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DHCPRequest*, dhcp_request_free);
+
+static int ensure_sane_request(sd_dhcp_server *server, DHCPRequest *req, DHCPMessage *message) {
+ assert(req);
+ assert(message);
+
+ req->message = message;
+
+ /* set client id based on MAC address if client did not send an explicit
+ one */
+ if (!req->client_id.data) {
+ void *data;
+
+ data = malloc0(ETH_ALEN + 1);
+ if (!data)
+ return -ENOMEM;
+
+ ((uint8_t*) data)[0] = 0x01;
+ memcpy((uint8_t*) data + 1, &message->chaddr, ETH_ALEN);
+
+ req->client_id.length = ETH_ALEN + 1;
+ req->client_id.data = data;
+ }
+
+ if (req->max_optlen < DHCP_MIN_OPTIONS_SIZE)
+ req->max_optlen = DHCP_MIN_OPTIONS_SIZE;
+
+ if (req->lifetime <= 0)
+ req->lifetime = MAX(1ULL, server->default_lease_time);
+
+ if (server->max_lease_time > 0 && req->lifetime > server->max_lease_time)
+ req->lifetime = server->max_lease_time;
+
+ return 0;
+}
+
+static int get_pool_offset(sd_dhcp_server *server, be32_t requested_ip) {
+ assert(server);
+
+ if (!server->pool_size)
+ return -EINVAL;
+
+ if (be32toh(requested_ip) < (be32toh(server->subnet) | server->pool_offset) ||
+ be32toh(requested_ip) >= (be32toh(server->subnet) | (server->pool_offset + server->pool_size)))
+ return -ERANGE;
+
+ return be32toh(requested_ip & ~server->netmask) - server->pool_offset;
+}
+
+#define HASH_KEY SD_ID128_MAKE(0d,1d,fe,bd,f1,24,bd,b3,47,f1,dd,6e,73,21,93,30)
+
+int dhcp_server_handle_message(sd_dhcp_server *server, DHCPMessage *message,
+ size_t length) {
+ _cleanup_(dhcp_request_freep) DHCPRequest *req = NULL;
+ _cleanup_free_ char *error_message = NULL;
+ DHCPLease *existing_lease;
+ int type, r;
+
+ assert(server);
+ assert(message);
+
+ if (message->op != BOOTREQUEST ||
+ message->htype != ARPHRD_ETHER ||
+ message->hlen != ETHER_ADDR_LEN)
+ return 0;
+
+ req = new0(DHCPRequest, 1);
+ if (!req)
+ return -ENOMEM;
+
+ type = dhcp_option_parse(message, length, parse_request, req, &error_message);
+ if (type < 0)
+ return 0;
+
+ r = ensure_sane_request(server, req, message);
+ if (r < 0)
+ /* this only fails on critical errors */
+ return r;
+
+ existing_lease = hashmap_get(server->leases_by_client_id,
+ &req->client_id);
+
+ switch(type) {
+
+ case DHCP_DISCOVER: {
+ be32_t address = INADDR_ANY;
+ unsigned i;
+
+ log_dhcp_server(server, "DISCOVER (0x%x)",
+ be32toh(req->message->xid));
+
+ if (!server->pool_size)
+ /* no pool allocated */
+ return 0;
+
+ /* for now pick a random free address from the pool */
+ if (existing_lease)
+ address = existing_lease->address;
+ else {
+ struct siphash state;
+ uint64_t hash;
+ uint32_t next_offer;
+
+ /* even with no persistence of leases, we try to offer the same client
+ the same IP address. we do this by using the hash of the client id
+ as the offset into the pool of leases when finding the next free one */
+
+ siphash24_init(&state, HASH_KEY.bytes);
+ client_id_hash_func(&req->client_id, &state);
+ hash = htole64(siphash24_finalize(&state));
+ next_offer = hash % server->pool_size;
+
+ for (i = 0; i < server->pool_size; i++) {
+ if (!server->bound_leases[next_offer]) {
+ address = server->subnet | htobe32(server->pool_offset + next_offer);
+ break;
+ }
+
+ next_offer = (next_offer + 1) % server->pool_size;
+ }
+ }
+
+ if (address == INADDR_ANY)
+ /* no free addresses left */
+ return 0;
+
+ r = server_send_offer(server, req, address);
+ if (r < 0)
+ /* this only fails on critical errors */
+ return log_dhcp_server_errno(server, r, "Could not send offer: %m");
+
+ log_dhcp_server(server, "OFFER (0x%x)", be32toh(req->message->xid));
+ return DHCP_OFFER;
+ }
+ case DHCP_DECLINE:
+ log_dhcp_server(server, "DECLINE (0x%x): %s", be32toh(req->message->xid), strna(error_message));
+
+ /* TODO: make sure we don't offer this address again */
+
+ return 1;
+
+ case DHCP_REQUEST: {
+ be32_t address;
+ bool init_reboot = false;
+ int pool_offset;
+
+ /* see RFC 2131, section 4.3.2 */
+
+ if (req->server_id) {
+ log_dhcp_server(server, "REQUEST (selecting) (0x%x)",
+ be32toh(req->message->xid));
+
+ /* SELECTING */
+ if (req->server_id != server->address)
+ /* client did not pick us */
+ return 0;
+
+ if (req->message->ciaddr)
+ /* this MUST be zero */
+ return 0;
+
+ if (!req->requested_ip)
+ /* this must be filled in with the yiaddr
+ from the chosen OFFER */
+ return 0;
+
+ address = req->requested_ip;
+ } else if (req->requested_ip) {
+ log_dhcp_server(server, "REQUEST (init-reboot) (0x%x)",
+ be32toh(req->message->xid));
+
+ /* INIT-REBOOT */
+ if (req->message->ciaddr)
+ /* this MUST be zero */
+ return 0;
+
+ /* TODO: check more carefully if IP is correct */
+ address = req->requested_ip;
+ init_reboot = true;
+ } else {
+ log_dhcp_server(server, "REQUEST (rebinding/renewing) (0x%x)",
+ be32toh(req->message->xid));
+
+ /* REBINDING / RENEWING */
+ if (!req->message->ciaddr)
+ /* this MUST be filled in with clients IP address */
+ return 0;
+
+ address = req->message->ciaddr;
+ }
+
+ pool_offset = get_pool_offset(server, address);
+
+ /* verify that the requested address is from the pool, and either
+ owned by the current client or free */
+ if (pool_offset >= 0 &&
+ server->bound_leases[pool_offset] == existing_lease) {
+ DHCPLease *lease;
+ usec_t time_now = 0;
+
+ if (!existing_lease) {
+ lease = new0(DHCPLease, 1);
+ if (!lease)
+ return -ENOMEM;
+ lease->address = address;
+ lease->client_id.data = memdup(req->client_id.data,
+ req->client_id.length);
+ if (!lease->client_id.data) {
+ free(lease);
+ return -ENOMEM;
+ }
+ lease->client_id.length = req->client_id.length;
+ memcpy(&lease->chaddr, &req->message->chaddr,
+ ETH_ALEN);
+ lease->gateway = req->message->giaddr;
+ } else
+ lease = existing_lease;
+
+ r = sd_event_now(server->event,
+ clock_boottime_or_monotonic(),
+ &time_now);
+ if (r < 0) {
+ if (!existing_lease)
+ dhcp_lease_free(lease);
+ return r;
+ }
+
+ lease->expiration = req->lifetime * USEC_PER_SEC + time_now;
+
+ r = server_send_ack(server, req, address);
+ if (r < 0) {
+ /* this only fails on critical errors */
+ log_dhcp_server_errno(server, r, "Could not send ack: %m");
+
+ if (!existing_lease)
+ dhcp_lease_free(lease);
+
+ return r;
+ } else {
+ log_dhcp_server(server, "ACK (0x%x)",
+ be32toh(req->message->xid));
+
+ server->bound_leases[pool_offset] = lease;
+ hashmap_put(server->leases_by_client_id,
+ &lease->client_id, lease);
+
+ return DHCP_ACK;
+ }
+
+ } else if (init_reboot) {
+ r = server_send_nak(server, req);
+ if (r < 0)
+ /* this only fails on critical errors */
+ return log_dhcp_server_errno(server, r, "Could not send nak: %m");
+
+ log_dhcp_server(server, "NAK (0x%x)", be32toh(req->message->xid));
+ return DHCP_NAK;
+ }
+
+ break;
+ }
+
+ case DHCP_RELEASE: {
+ int pool_offset;
+
+ log_dhcp_server(server, "RELEASE (0x%x)",
+ be32toh(req->message->xid));
+
+ if (!existing_lease)
+ return 0;
+
+ if (existing_lease->address != req->message->ciaddr)
+ return 0;
+
+ pool_offset = get_pool_offset(server, req->message->ciaddr);
+ if (pool_offset < 0)
+ return 0;
+
+ if (server->bound_leases[pool_offset] == existing_lease) {
+ server->bound_leases[pool_offset] = NULL;
+ hashmap_remove(server->leases_by_client_id, existing_lease);
+ dhcp_lease_free(existing_lease);
+ }
+
+ return 0;
+ }}
+
+ return 0;
+}
+
+static int server_receive_message(sd_event_source *s, int fd,
+ uint32_t revents, void *userdata) {
+ _cleanup_free_ DHCPMessage *message = NULL;
+ uint8_t cmsgbuf[CMSG_LEN(sizeof(struct in_pktinfo))];
+ sd_dhcp_server *server = userdata;
+ struct iovec iov = {};
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = cmsgbuf,
+ .msg_controllen = sizeof(cmsgbuf),
+ };
+ struct cmsghdr *cmsg;
+ ssize_t buflen, len;
+ int r;
+
+ assert(server);
+
+ buflen = next_datagram_size_fd(fd);
+ if (buflen < 0)
+ return buflen;
+
+ message = malloc(buflen);
+ if (!message)
+ return -ENOMEM;
+
+ iov = IOVEC_MAKE(message, buflen);
+
+ len = recvmsg(fd, &msg, 0);
+ if (len < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return -errno;
+ }
+ if ((size_t)len < sizeof(DHCPMessage))
+ return 0;
+
+ CMSG_FOREACH(cmsg, &msg) {
+ if (cmsg->cmsg_level == IPPROTO_IP &&
+ cmsg->cmsg_type == IP_PKTINFO &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct in_pktinfo))) {
+ struct in_pktinfo *info = (struct in_pktinfo*)CMSG_DATA(cmsg);
+
+ /* TODO figure out if this can be done as a filter on
+ * the socket, like for IPv6 */
+ if (server->ifindex != info->ipi_ifindex)
+ return 0;
+
+ break;
+ }
+ }
+
+ r = dhcp_server_handle_message(server, message, (size_t) len);
+ if (r < 0)
+ log_dhcp_server_errno(server, r, "Couldn't process incoming message: %m");
+
+ return 0;
+}
+
+int sd_dhcp_server_start(sd_dhcp_server *server) {
+ int r;
+
+ assert_return(server, -EINVAL);
+ assert_return(server->event, -EINVAL);
+ assert_return(!server->receive_message, -EBUSY);
+ assert_return(server->fd_raw < 0, -EBUSY);
+ assert_return(server->fd < 0, -EBUSY);
+ assert_return(server->address != htobe32(INADDR_ANY), -EUNATCH);
+
+ r = socket(AF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (r < 0) {
+ r = -errno;
+ sd_dhcp_server_stop(server);
+ return r;
+ }
+ server->fd_raw = r;
+
+ r = dhcp_network_bind_udp_socket(server->ifindex, INADDR_ANY, DHCP_PORT_SERVER);
+ if (r < 0) {
+ sd_dhcp_server_stop(server);
+ return r;
+ }
+ server->fd = r;
+
+ r = sd_event_add_io(server->event, &server->receive_message,
+ server->fd, EPOLLIN,
+ server_receive_message, server);
+ if (r < 0) {
+ sd_dhcp_server_stop(server);
+ return r;
+ }
+
+ r = sd_event_source_set_priority(server->receive_message,
+ server->event_priority);
+ if (r < 0) {
+ sd_dhcp_server_stop(server);
+ return r;
+ }
+
+ log_dhcp_server(server, "STARTED");
+
+ return 0;
+}
+
+int sd_dhcp_server_forcerenew(sd_dhcp_server *server) {
+ unsigned i;
+ int r = 0;
+
+ assert_return(server, -EINVAL);
+ assert(server->bound_leases);
+
+ for (i = 0; i < server->pool_size; i++) {
+ DHCPLease *lease = server->bound_leases[i];
+
+ if (!lease || lease == &server->invalid_lease)
+ continue;
+
+ r = server_send_forcerenew(server, lease->address,
+ lease->gateway,
+ lease->chaddr);
+ if (r < 0)
+ return r;
+
+ log_dhcp_server(server, "FORCERENEW");
+ }
+
+ return r;
+}
+
+int sd_dhcp_server_set_timezone(sd_dhcp_server *server, const char *tz) {
+ int r;
+
+ assert_return(server, -EINVAL);
+ assert_return(timezone_is_valid(tz, LOG_DEBUG), -EINVAL);
+
+ if (streq_ptr(tz, server->timezone))
+ return 0;
+
+ r = free_and_strdup(&server->timezone, tz);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int sd_dhcp_server_set_max_lease_time(sd_dhcp_server *server, uint32_t t) {
+ assert_return(server, -EINVAL);
+
+ if (t == server->max_lease_time)
+ return 0;
+
+ server->max_lease_time = t;
+ return 1;
+}
+
+int sd_dhcp_server_set_default_lease_time(sd_dhcp_server *server, uint32_t t) {
+ assert_return(server, -EINVAL);
+
+ if (t == server->default_lease_time)
+ return 0;
+
+ server->default_lease_time = t;
+ return 1;
+}
+
+int sd_dhcp_server_set_dns(sd_dhcp_server *server, const struct in_addr dns[], unsigned n) {
+ assert_return(server, -EINVAL);
+ assert_return(dns || n <= 0, -EINVAL);
+
+ if (server->n_dns == n &&
+ memcmp(server->dns, dns, sizeof(struct in_addr) * n) == 0)
+ return 0;
+
+ if (n <= 0) {
+ server->dns = mfree(server->dns);
+ server->n_dns = 0;
+ } else {
+ struct in_addr *c;
+
+ c = newdup(struct in_addr, dns, n);
+ if (!c)
+ return -ENOMEM;
+
+ free(server->dns);
+ server->dns = c;
+ server->n_dns = n;
+ }
+
+ return 1;
+}
+
+int sd_dhcp_server_set_ntp(sd_dhcp_server *server, const struct in_addr ntp[], unsigned n) {
+ assert_return(server, -EINVAL);
+ assert_return(ntp || n <= 0, -EINVAL);
+
+ if (server->n_ntp == n &&
+ memcmp(server->ntp, ntp, sizeof(struct in_addr) * n) == 0)
+ return 0;
+
+ if (n <= 0) {
+ server->ntp = mfree(server->ntp);
+ server->n_ntp = 0;
+ } else {
+ struct in_addr *c;
+
+ c = newdup(struct in_addr, ntp, n);
+ if (!c)
+ return -ENOMEM;
+
+ free(server->ntp);
+ server->ntp = c;
+ server->n_ntp = n;
+ }
+
+ return 1;
+}
+
+int sd_dhcp_server_set_emit_router(sd_dhcp_server *server, int enabled) {
+ assert_return(server, -EINVAL);
+
+ if (enabled == server->emit_router)
+ return 0;
+
+ server->emit_router = enabled;
+
+ return 1;
+}
diff --git a/src/libsystemd-network/sd-dhcp6-client.c b/src/libsystemd-network/sd-dhcp6-client.c
new file mode 100644
index 0000000..593ffd1
--- /dev/null
+++ b/src/libsystemd-network/sd-dhcp6-client.c
@@ -0,0 +1,1516 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/if_infiniband.h>
+
+#include "sd-dhcp6-client.h"
+
+#include "alloc-util.h"
+#include "dhcp-identifier.h"
+#include "dhcp6-internal.h"
+#include "dhcp6-lease-internal.h"
+#include "dhcp6-protocol.h"
+#include "dns-domain.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "network-internal.h"
+#include "random-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "util.h"
+
+#define MAX_MAC_ADDR_LEN INFINIBAND_ALEN
+
+/* what to request from the server, addresses (IA_NA) and/or prefixes (IA_PD) */
+enum {
+ DHCP6_REQUEST_IA_NA = 1,
+ DHCP6_REQUEST_IA_TA = 2, /* currently not used */
+ DHCP6_REQUEST_IA_PD = 4,
+};
+
+struct sd_dhcp6_client {
+ unsigned n_ref;
+
+ enum DHCP6State state;
+ sd_event *event;
+ int event_priority;
+ int ifindex;
+ struct in6_addr local_address;
+ uint8_t mac_addr[MAX_MAC_ADDR_LEN];
+ size_t mac_addr_len;
+ uint16_t arp_type;
+ DHCP6IA ia_na;
+ DHCP6IA ia_pd;
+ sd_event_source *timeout_t1;
+ sd_event_source *timeout_t2;
+ unsigned request;
+ be32_t transaction_id;
+ usec_t transaction_start;
+ struct sd_dhcp6_lease *lease;
+ int fd;
+ bool information_request;
+ bool iaid_set;
+ be16_t *req_opts;
+ size_t req_opts_allocated;
+ size_t req_opts_len;
+ char *fqdn;
+ sd_event_source *receive_message;
+ usec_t retransmit_time;
+ uint8_t retransmit_count;
+ sd_event_source *timeout_resend;
+ sd_event_source *timeout_resend_expire;
+ sd_dhcp6_client_callback_t callback;
+ void *userdata;
+ struct duid duid;
+ size_t duid_len;
+};
+
+static const uint16_t default_req_opts[] = {
+ SD_DHCP6_OPTION_DNS_SERVERS,
+ SD_DHCP6_OPTION_DOMAIN_LIST,
+ SD_DHCP6_OPTION_NTP_SERVER,
+ SD_DHCP6_OPTION_SNTP_SERVERS,
+};
+
+const char * dhcp6_message_type_table[_DHCP6_MESSAGE_MAX] = {
+ [DHCP6_SOLICIT] = "SOLICIT",
+ [DHCP6_ADVERTISE] = "ADVERTISE",
+ [DHCP6_REQUEST] = "REQUEST",
+ [DHCP6_CONFIRM] = "CONFIRM",
+ [DHCP6_RENEW] = "RENEW",
+ [DHCP6_REBIND] = "REBIND",
+ [DHCP6_REPLY] = "REPLY",
+ [DHCP6_RELEASE] = "RELEASE",
+ [DHCP6_DECLINE] = "DECLINE",
+ [DHCP6_RECONFIGURE] = "RECONFIGURE",
+ [DHCP6_INFORMATION_REQUEST] = "INFORMATION-REQUEST",
+ [DHCP6_RELAY_FORW] = "RELAY-FORW",
+ [DHCP6_RELAY_REPL] = "RELAY-REPL",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(dhcp6_message_type, int);
+
+const char * dhcp6_message_status_table[_DHCP6_STATUS_MAX] = {
+ [DHCP6_STATUS_SUCCESS] = "Success",
+ [DHCP6_STATUS_UNSPEC_FAIL] = "Unspecified failure",
+ [DHCP6_STATUS_NO_ADDRS_AVAIL] = "No addresses available",
+ [DHCP6_STATUS_NO_BINDING] = "Binding unavailable",
+ [DHCP6_STATUS_NOT_ON_LINK] = "Not on link",
+ [DHCP6_STATUS_USE_MULTICAST] = "Use multicast",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(dhcp6_message_status, int);
+
+#define DHCP6_CLIENT_DONT_DESTROY(client) \
+ _cleanup_(sd_dhcp6_client_unrefp) _unused_ sd_dhcp6_client *_dont_destroy_##client = sd_dhcp6_client_ref(client)
+
+static int client_start(sd_dhcp6_client *client, enum DHCP6State state);
+
+int sd_dhcp6_client_set_callback(
+ sd_dhcp6_client *client,
+ sd_dhcp6_client_callback_t cb,
+ void *userdata) {
+
+ assert_return(client, -EINVAL);
+
+ client->callback = cb;
+ client->userdata = userdata;
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_ifindex(sd_dhcp6_client *client, int ifindex) {
+
+ assert_return(client, -EINVAL);
+ assert_return(ifindex >= -1, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ client->ifindex = ifindex;
+ return 0;
+}
+
+int sd_dhcp6_client_set_local_address(
+ sd_dhcp6_client *client,
+ const struct in6_addr *local_address) {
+
+ assert_return(client, -EINVAL);
+ assert_return(local_address, -EINVAL);
+ assert_return(in_addr_is_link_local(AF_INET6, (const union in_addr_union *) local_address) > 0, -EINVAL);
+
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ client->local_address = *local_address;
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_mac(
+ sd_dhcp6_client *client,
+ const uint8_t *addr, size_t addr_len,
+ uint16_t arp_type) {
+
+ assert_return(client, -EINVAL);
+ assert_return(addr, -EINVAL);
+ assert_return(addr_len > 0 && addr_len <= MAX_MAC_ADDR_LEN, -EINVAL);
+ assert_return(arp_type > 0, -EINVAL);
+
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ if (arp_type == ARPHRD_ETHER)
+ assert_return(addr_len == ETH_ALEN, -EINVAL);
+ else if (arp_type == ARPHRD_INFINIBAND)
+ assert_return(addr_len == INFINIBAND_ALEN, -EINVAL);
+ else
+ return -EINVAL;
+
+ if (client->mac_addr_len == addr_len &&
+ memcmp(&client->mac_addr, addr, addr_len) == 0)
+ return 0;
+
+ memcpy(&client->mac_addr, addr, addr_len);
+ client->mac_addr_len = addr_len;
+ client->arp_type = arp_type;
+
+ return 0;
+}
+
+static int client_ensure_duid(sd_dhcp6_client *client) {
+ if (client->duid_len != 0)
+ return 0;
+
+ return dhcp_identifier_set_duid_en(&client->duid, &client->duid_len);
+}
+
+/**
+ * Sets DUID. If duid is non-null, the DUID is set to duid_type + duid
+ * without further modification. Otherwise, if duid_type is supported, DUID
+ * is set based on that type. Otherwise, an error is returned.
+ */
+static int dhcp6_client_set_duid_internal(
+ sd_dhcp6_client *client,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len,
+ usec_t llt_time) {
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(duid_len == 0 || duid != NULL, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ if (duid != NULL) {
+ r = dhcp_validate_duid_len(duid_type, duid_len, true);
+ if (r < 0) {
+ r = dhcp_validate_duid_len(duid_type, duid_len, false);
+ if (r < 0)
+ return r;
+ log_dhcp6_client(client, "Setting DUID of type %u with unexpected content", duid_type);
+ }
+
+ client->duid.type = htobe16(duid_type);
+ memcpy(&client->duid.raw.data, duid, duid_len);
+ client->duid_len = sizeof(client->duid.type) + duid_len;
+ } else
+ switch (duid_type) {
+ case DUID_TYPE_LLT:
+ if (client->mac_addr_len == 0)
+ return -EOPNOTSUPP;
+
+ r = dhcp_identifier_set_duid_llt(&client->duid, llt_time, client->mac_addr, client->mac_addr_len, client->arp_type, &client->duid_len);
+ if (r < 0)
+ return r;
+ break;
+ case DUID_TYPE_EN:
+ r = dhcp_identifier_set_duid_en(&client->duid, &client->duid_len);
+ if (r < 0)
+ return r;
+ break;
+ case DUID_TYPE_LL:
+ if (client->mac_addr_len == 0)
+ return -EOPNOTSUPP;
+
+ r = dhcp_identifier_set_duid_ll(&client->duid, client->mac_addr, client->mac_addr_len, client->arp_type, &client->duid_len);
+ if (r < 0)
+ return r;
+ break;
+ case DUID_TYPE_UUID:
+ r = dhcp_identifier_set_duid_uuid(&client->duid, &client->duid_len);
+ if (r < 0)
+ return r;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_duid(
+ sd_dhcp6_client *client,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len) {
+ return dhcp6_client_set_duid_internal(client, duid_type, duid, duid_len, 0);
+}
+
+int sd_dhcp6_client_set_duid_llt(
+ sd_dhcp6_client *client,
+ usec_t llt_time) {
+ return dhcp6_client_set_duid_internal(client, DUID_TYPE_LLT, NULL, 0, llt_time);
+}
+
+int sd_dhcp6_client_set_iaid(sd_dhcp6_client *client, uint32_t iaid) {
+ assert_return(client, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ client->ia_na.ia_na.id = htobe32(iaid);
+ client->ia_pd.ia_pd.id = htobe32(iaid);
+ client->iaid_set = true;
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_fqdn(
+ sd_dhcp6_client *client,
+ const char *fqdn) {
+
+ assert_return(client, -EINVAL);
+
+ /* Make sure FQDN qualifies as DNS and as Linux hostname */
+ if (fqdn &&
+ !(hostname_is_valid(fqdn, false) && dns_name_is_valid(fqdn) > 0))
+ return -EINVAL;
+
+ return free_and_strdup(&client->fqdn, fqdn);
+}
+
+int sd_dhcp6_client_set_information_request(sd_dhcp6_client *client, int enabled) {
+ assert_return(client, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ client->information_request = enabled;
+
+ return 0;
+}
+
+int sd_dhcp6_client_get_information_request(sd_dhcp6_client *client, int *enabled) {
+ assert_return(client, -EINVAL);
+ assert_return(enabled, -EINVAL);
+
+ *enabled = client->information_request;
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_request_option(sd_dhcp6_client *client, uint16_t option) {
+ size_t t;
+
+ assert_return(client, -EINVAL);
+ assert_return(client->state == DHCP6_STATE_STOPPED, -EBUSY);
+
+ switch(option) {
+
+ case SD_DHCP6_OPTION_DNS_SERVERS:
+ case SD_DHCP6_OPTION_DOMAIN_LIST:
+ case SD_DHCP6_OPTION_SNTP_SERVERS:
+ case SD_DHCP6_OPTION_NTP_SERVER:
+ case SD_DHCP6_OPTION_RAPID_COMMIT:
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ for (t = 0; t < client->req_opts_len; t++)
+ if (client->req_opts[t] == htobe16(option))
+ return -EEXIST;
+
+ if (!GREEDY_REALLOC(client->req_opts, client->req_opts_allocated,
+ client->req_opts_len + 1))
+ return -ENOMEM;
+
+ client->req_opts[client->req_opts_len++] = htobe16(option);
+
+ return 0;
+}
+
+int sd_dhcp6_client_get_prefix_delegation(sd_dhcp6_client *client, int *delegation) {
+ assert_return(client, -EINVAL);
+ assert_return(delegation, -EINVAL);
+
+ *delegation = FLAGS_SET(client->request, DHCP6_REQUEST_IA_PD);
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_prefix_delegation(sd_dhcp6_client *client, int delegation) {
+ assert_return(client, -EINVAL);
+
+ SET_FLAG(client->request, DHCP6_REQUEST_IA_PD, delegation);
+
+ return 0;
+}
+
+int sd_dhcp6_client_get_address_request(sd_dhcp6_client *client, int *request) {
+ assert_return(client, -EINVAL);
+ assert_return(request, -EINVAL);
+
+ *request = FLAGS_SET(client->request, DHCP6_REQUEST_IA_NA);
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_address_request(sd_dhcp6_client *client, int request) {
+ assert_return(client, -EINVAL);
+
+ SET_FLAG(client->request, DHCP6_REQUEST_IA_NA, request);
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_transaction_id(sd_dhcp6_client *client, uint32_t transaction_id) {
+ assert_return(client, -EINVAL);
+
+ client->transaction_id = transaction_id;
+
+ return 0;
+}
+
+int sd_dhcp6_client_get_lease(sd_dhcp6_client *client, sd_dhcp6_lease **ret) {
+ assert_return(client, -EINVAL);
+
+ if (!client->lease)
+ return -ENOMSG;
+
+ if (ret)
+ *ret = client->lease;
+
+ return 0;
+}
+
+static void client_notify(sd_dhcp6_client *client, int event) {
+ assert(client);
+
+ if (client->callback)
+ client->callback(client, event, client->userdata);
+}
+
+static int client_reset(sd_dhcp6_client *client) {
+ assert(client);
+
+ client->lease = sd_dhcp6_lease_unref(client->lease);
+
+ client->receive_message =
+ sd_event_source_unref(client->receive_message);
+
+ client->transaction_id = 0;
+ client->transaction_start = 0;
+
+ client->retransmit_time = 0;
+ client->retransmit_count = 0;
+
+ (void) event_source_disable(client->timeout_resend);
+ (void) event_source_disable(client->timeout_resend_expire);
+ (void) event_source_disable(client->timeout_t1);
+ (void) event_source_disable(client->timeout_t2);
+
+ client->state = DHCP6_STATE_STOPPED;
+
+ return 0;
+}
+
+static void client_stop(sd_dhcp6_client *client, int error) {
+ DHCP6_CLIENT_DONT_DESTROY(client);
+
+ assert(client);
+
+ client_notify(client, error);
+
+ client_reset(client);
+}
+
+static int client_send_message(sd_dhcp6_client *client, usec_t time_now) {
+ _cleanup_free_ DHCP6Message *message = NULL;
+ struct in6_addr all_servers =
+ IN6ADDR_ALL_DHCP6_RELAY_AGENTS_AND_SERVERS_INIT;
+ size_t len, optlen = 512;
+ uint8_t *opt;
+ int r;
+ usec_t elapsed_usec;
+ be16_t elapsed_time;
+
+ assert(client);
+
+ len = sizeof(DHCP6Message) + optlen;
+
+ message = malloc0(len);
+ if (!message)
+ return -ENOMEM;
+
+ opt = (uint8_t *)(message + 1);
+
+ message->transaction_id = client->transaction_id;
+
+ switch(client->state) {
+ case DHCP6_STATE_INFORMATION_REQUEST:
+ message->type = DHCP6_INFORMATION_REQUEST;
+
+ break;
+
+ case DHCP6_STATE_SOLICITATION:
+ message->type = DHCP6_SOLICIT;
+
+ r = dhcp6_option_append(&opt, &optlen,
+ SD_DHCP6_OPTION_RAPID_COMMIT, 0, NULL);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_NA)) {
+ r = dhcp6_option_append_ia(&opt, &optlen,
+ &client->ia_na);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->fqdn) {
+ r = dhcp6_option_append_fqdn(&opt, &optlen, client->fqdn);
+ if (r < 0)
+ return r;
+ }
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_PD)) {
+ r = dhcp6_option_append_pd(opt, optlen, &client->ia_pd);
+ if (r < 0)
+ return r;
+
+ opt += r;
+ optlen -= r;
+ }
+
+ break;
+
+ case DHCP6_STATE_REQUEST:
+ case DHCP6_STATE_RENEW:
+
+ if (client->state == DHCP6_STATE_REQUEST)
+ message->type = DHCP6_REQUEST;
+ else
+ message->type = DHCP6_RENEW;
+
+ r = dhcp6_option_append(&opt, &optlen, SD_DHCP6_OPTION_SERVERID,
+ client->lease->serverid_len,
+ client->lease->serverid);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_NA)) {
+ r = dhcp6_option_append_ia(&opt, &optlen,
+ &client->lease->ia);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->fqdn) {
+ r = dhcp6_option_append_fqdn(&opt, &optlen, client->fqdn);
+ if (r < 0)
+ return r;
+ }
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_PD)) {
+ r = dhcp6_option_append_pd(opt, optlen, &client->lease->pd);
+ if (r < 0)
+ return r;
+
+ opt += r;
+ optlen -= r;
+ }
+
+ break;
+
+ case DHCP6_STATE_REBIND:
+ message->type = DHCP6_REBIND;
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_NA)) {
+ r = dhcp6_option_append_ia(&opt, &optlen, &client->lease->ia);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->fqdn) {
+ r = dhcp6_option_append_fqdn(&opt, &optlen, client->fqdn);
+ if (r < 0)
+ return r;
+ }
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_PD)) {
+ r = dhcp6_option_append_pd(opt, optlen, &client->lease->pd);
+ if (r < 0)
+ return r;
+
+ opt += r;
+ optlen -= r;
+ }
+
+ break;
+
+ case DHCP6_STATE_STOPPED:
+ case DHCP6_STATE_BOUND:
+ return -EINVAL;
+ }
+
+ r = dhcp6_option_append(&opt, &optlen, SD_DHCP6_OPTION_ORO,
+ client->req_opts_len * sizeof(be16_t),
+ client->req_opts);
+ if (r < 0)
+ return r;
+
+ assert(client->duid_len);
+ r = dhcp6_option_append(&opt, &optlen, SD_DHCP6_OPTION_CLIENTID,
+ client->duid_len, &client->duid);
+ if (r < 0)
+ return r;
+
+ elapsed_usec = time_now - client->transaction_start;
+ if (elapsed_usec < 0xffff * USEC_PER_MSEC * 10)
+ elapsed_time = htobe16(elapsed_usec / USEC_PER_MSEC / 10);
+ else
+ elapsed_time = 0xffff;
+
+ r = dhcp6_option_append(&opt, &optlen, SD_DHCP6_OPTION_ELAPSED_TIME,
+ sizeof(elapsed_time), &elapsed_time);
+ if (r < 0)
+ return r;
+
+ r = dhcp6_network_send_udp_socket(client->fd, &all_servers, message,
+ len - optlen);
+ if (r < 0)
+ return r;
+
+ log_dhcp6_client(client, "Sent %s",
+ dhcp6_message_type_to_string(message->type));
+
+ return 0;
+}
+
+static int client_timeout_t2(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_dhcp6_client *client = userdata;
+
+ assert(s);
+ assert(client);
+ assert(client->lease);
+
+ (void) event_source_disable(client->timeout_t2);
+
+ log_dhcp6_client(client, "Timeout T2");
+
+ client_start(client, DHCP6_STATE_REBIND);
+
+ return 0;
+}
+
+static int client_timeout_t1(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_dhcp6_client *client = userdata;
+
+ assert(s);
+ assert(client);
+ assert(client->lease);
+
+ (void) event_source_disable(client->timeout_t1);
+
+ log_dhcp6_client(client, "Timeout T1");
+
+ client_start(client, DHCP6_STATE_RENEW);
+
+ return 0;
+}
+
+static int client_timeout_resend_expire(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_dhcp6_client *client = userdata;
+ DHCP6_CLIENT_DONT_DESTROY(client);
+ enum DHCP6State state;
+
+ assert(s);
+ assert(client);
+ assert(client->event);
+
+ state = client->state;
+
+ client_stop(client, SD_DHCP6_CLIENT_EVENT_RESEND_EXPIRE);
+
+ /* RFC 3315, section 18.1.4., says that "...the client may choose to
+ use a Solicit message to locate a new DHCP server..." */
+ if (state == DHCP6_STATE_REBIND)
+ client_start(client, DHCP6_STATE_SOLICITATION);
+
+ return 0;
+}
+
+static usec_t client_timeout_compute_random(usec_t val) {
+ return val - val / 10 +
+ (random_u32() % (2 * USEC_PER_SEC)) * val / 10 / USEC_PER_SEC;
+}
+
+static int client_timeout_resend(sd_event_source *s, uint64_t usec, void *userdata) {
+ int r = 0;
+ sd_dhcp6_client *client = userdata;
+ usec_t time_now, init_retransmit_time = 0, max_retransmit_time = 0;
+ usec_t max_retransmit_duration = 0;
+ uint8_t max_retransmit_count = 0;
+ char time_string[FORMAT_TIMESPAN_MAX];
+ uint32_t expire = 0;
+
+ assert(s);
+ assert(client);
+ assert(client->event);
+
+ (void) event_source_disable(client->timeout_resend);
+
+ switch (client->state) {
+ case DHCP6_STATE_INFORMATION_REQUEST:
+ init_retransmit_time = DHCP6_INF_TIMEOUT;
+ max_retransmit_time = DHCP6_INF_MAX_RT;
+
+ break;
+
+ case DHCP6_STATE_SOLICITATION:
+
+ if (client->retransmit_count && client->lease) {
+ client_start(client, DHCP6_STATE_REQUEST);
+ return 0;
+ }
+
+ init_retransmit_time = DHCP6_SOL_TIMEOUT;
+ max_retransmit_time = DHCP6_SOL_MAX_RT;
+
+ break;
+
+ case DHCP6_STATE_REQUEST:
+ init_retransmit_time = DHCP6_REQ_TIMEOUT;
+ max_retransmit_time = DHCP6_REQ_MAX_RT;
+ max_retransmit_count = DHCP6_REQ_MAX_RC;
+
+ break;
+
+ case DHCP6_STATE_RENEW:
+ init_retransmit_time = DHCP6_REN_TIMEOUT;
+ max_retransmit_time = DHCP6_REN_MAX_RT;
+
+ /* RFC 3315, section 18.1.3. says max retransmit duration will
+ be the remaining time until T2. Instead of setting MRD,
+ wait for T2 to trigger with the same end result */
+
+ break;
+
+ case DHCP6_STATE_REBIND:
+ init_retransmit_time = DHCP6_REB_TIMEOUT;
+ max_retransmit_time = DHCP6_REB_MAX_RT;
+
+ if (event_source_is_enabled(client->timeout_resend_expire) <= 0) {
+ r = dhcp6_lease_ia_rebind_expire(&client->lease->ia,
+ &expire);
+ if (r < 0) {
+ client_stop(client, r);
+ return 0;
+ }
+ max_retransmit_duration = expire * USEC_PER_SEC;
+ }
+
+ break;
+
+ case DHCP6_STATE_STOPPED:
+ case DHCP6_STATE_BOUND:
+ return 0;
+ }
+
+ if (max_retransmit_count &&
+ client->retransmit_count >= max_retransmit_count) {
+ client_stop(client, SD_DHCP6_CLIENT_EVENT_RETRANS_MAX);
+ return 0;
+ }
+
+ r = sd_event_now(client->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ goto error;
+
+ r = client_send_message(client, time_now);
+ if (r >= 0)
+ client->retransmit_count++;
+
+ if (!client->retransmit_time) {
+ client->retransmit_time =
+ client_timeout_compute_random(init_retransmit_time);
+
+ if (client->state == DHCP6_STATE_SOLICITATION)
+ client->retransmit_time += init_retransmit_time / 10;
+
+ } else {
+ if (max_retransmit_time &&
+ client->retransmit_time > max_retransmit_time / 2)
+ client->retransmit_time = client_timeout_compute_random(max_retransmit_time);
+ else
+ client->retransmit_time += client_timeout_compute_random(client->retransmit_time);
+ }
+
+ log_dhcp6_client(client, "Next retransmission in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, client->retransmit_time, USEC_PER_SEC));
+
+ r = event_reset_time(client->event, &client->timeout_resend,
+ clock_boottime_or_monotonic(),
+ time_now + client->retransmit_time, 10 * USEC_PER_MSEC,
+ client_timeout_resend, client,
+ client->event_priority, "dhcp6-resend-timer", true);
+ if (r < 0)
+ goto error;
+
+ if (max_retransmit_duration && event_source_is_enabled(client->timeout_resend_expire) <= 0) {
+
+ log_dhcp6_client(client, "Max retransmission duration %"PRIu64" secs",
+ max_retransmit_duration / USEC_PER_SEC);
+
+ r = event_reset_time(client->event, &client->timeout_resend_expire,
+ clock_boottime_or_monotonic(),
+ time_now + max_retransmit_duration, USEC_PER_SEC,
+ client_timeout_resend_expire, client,
+ client->event_priority, "dhcp6-resend-expire-timer", true);
+ if (r < 0)
+ goto error;
+ }
+
+error:
+ if (r < 0)
+ client_stop(client, r);
+
+ return 0;
+}
+
+static int client_ensure_iaid(sd_dhcp6_client *client) {
+ int r;
+ uint32_t iaid;
+
+ assert(client);
+
+ if (client->iaid_set)
+ return 0;
+
+ r = dhcp_identifier_set_iaid(client->ifindex, client->mac_addr, client->mac_addr_len, true, &iaid);
+ if (r < 0)
+ return r;
+
+ client->ia_na.ia_na.id = iaid;
+ client->ia_pd.ia_pd.id = iaid;
+ client->iaid_set = true;
+
+ return 0;
+}
+
+static int client_parse_message(
+ sd_dhcp6_client *client,
+ DHCP6Message *message,
+ size_t len,
+ sd_dhcp6_lease *lease) {
+
+ uint32_t lt_t1 = ~0, lt_t2 = ~0;
+ bool clientid = false;
+ size_t pos = 0;
+ int r;
+
+ assert(client);
+ assert(message);
+ assert(len >= sizeof(DHCP6Message));
+ assert(lease);
+
+ len -= sizeof(DHCP6Message);
+
+ while (pos < len) {
+ DHCP6Option *option = (DHCP6Option *) &message->options[pos];
+ uint16_t optcode, optlen;
+ be32_t iaid_lease;
+ uint8_t *optval;
+ int status;
+
+ if (len < pos + offsetof(DHCP6Option, data))
+ return -ENOBUFS;
+
+ optcode = be16toh(option->code);
+ optlen = be16toh(option->len);
+ optval = option->data;
+
+ if (len < pos + offsetof(DHCP6Option, data) + optlen)
+ return -ENOBUFS;
+
+ switch (optcode) {
+ case SD_DHCP6_OPTION_CLIENTID:
+ if (clientid) {
+ log_dhcp6_client(client, "%s contains multiple clientids",
+ dhcp6_message_type_to_string(message->type));
+ return -EINVAL;
+ }
+
+ if (optlen != client->duid_len ||
+ memcmp(&client->duid, optval, optlen) != 0) {
+ log_dhcp6_client(client, "%s DUID does not match",
+ dhcp6_message_type_to_string(message->type));
+
+ return -EINVAL;
+ }
+ clientid = true;
+
+ break;
+
+ case SD_DHCP6_OPTION_SERVERID:
+ r = dhcp6_lease_get_serverid(lease, NULL, NULL);
+ if (r >= 0) {
+ log_dhcp6_client(client, "%s contains multiple serverids",
+ dhcp6_message_type_to_string(message->type));
+ return -EINVAL;
+ }
+
+ r = dhcp6_lease_set_serverid(lease, optval, optlen);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_PREFERENCE:
+ if (optlen != 1)
+ return -EINVAL;
+
+ r = dhcp6_lease_set_preference(lease, optval[0]);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_STATUS_CODE:
+ status = dhcp6_option_parse_status(option, optlen + sizeof(DHCP6Option));
+ if (status < 0)
+ return status;
+
+ if (status > 0) {
+ log_dhcp6_client(client, "%s Status %s",
+ dhcp6_message_type_to_string(message->type),
+ dhcp6_message_status_to_string(status));
+
+ return -EINVAL;
+ }
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_NA:
+ if (client->state == DHCP6_STATE_INFORMATION_REQUEST) {
+ log_dhcp6_client(client, "Information request ignoring IA NA option");
+
+ break;
+ }
+
+ r = dhcp6_option_parse_ia(option, &lease->ia);
+ if (r < 0 && r != -ENOMSG)
+ return r;
+
+ r = dhcp6_lease_get_iaid(lease, &iaid_lease);
+ if (r < 0)
+ return r;
+
+ if (client->ia_na.ia_na.id != iaid_lease) {
+ log_dhcp6_client(client, "%s has wrong IAID for IA NA",
+ dhcp6_message_type_to_string(message->type));
+ return -EINVAL;
+ }
+
+ if (lease->ia.addresses) {
+ lt_t1 = MIN(lt_t1, be32toh(lease->ia.ia_na.lifetime_t1));
+ lt_t2 = MIN(lt_t2, be32toh(lease->ia.ia_na.lifetime_t1));
+ }
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_PD:
+ if (client->state == DHCP6_STATE_INFORMATION_REQUEST) {
+ log_dhcp6_client(client, "Information request ignoring IA PD option");
+
+ break;
+ }
+
+ r = dhcp6_option_parse_ia(option, &lease->pd);
+ if (r < 0 && r != -ENOMSG)
+ return r;
+
+ r = dhcp6_lease_get_pd_iaid(lease, &iaid_lease);
+ if (r < 0)
+ return r;
+
+ if (client->ia_pd.ia_pd.id != iaid_lease) {
+ log_dhcp6_client(client, "%s has wrong IAID for IA PD",
+ dhcp6_message_type_to_string(message->type));
+ return -EINVAL;
+ }
+
+ if (lease->pd.addresses) {
+ lt_t1 = MIN(lt_t1, be32toh(lease->pd.ia_pd.lifetime_t1));
+ lt_t2 = MIN(lt_t2, be32toh(lease->pd.ia_pd.lifetime_t2));
+ }
+
+ break;
+
+ case SD_DHCP6_OPTION_RAPID_COMMIT:
+ r = dhcp6_lease_set_rapid_commit(lease);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_DNS_SERVERS:
+ r = dhcp6_lease_set_dns(lease, optval, optlen);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_DOMAIN_LIST:
+ r = dhcp6_lease_set_domains(lease, optval, optlen);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_NTP_SERVER:
+ r = dhcp6_lease_set_ntp(lease, optval, optlen);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_SNTP_SERVERS:
+ r = dhcp6_lease_set_sntp(lease, optval, optlen);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ pos += offsetof(DHCP6Option, data) + optlen;
+ }
+
+ if (!clientid) {
+ log_dhcp6_client(client, "%s has incomplete options",
+ dhcp6_message_type_to_string(message->type));
+ return -EINVAL;
+ }
+
+ if (client->state != DHCP6_STATE_INFORMATION_REQUEST) {
+ r = dhcp6_lease_get_serverid(lease, NULL, NULL);
+ if (r < 0) {
+ log_dhcp6_client(client, "%s has no server id",
+ dhcp6_message_type_to_string(message->type));
+ return -EINVAL;
+ }
+
+ } else {
+ if (lease->ia.addresses) {
+ lease->ia.ia_na.lifetime_t1 = htobe32(lt_t1);
+ lease->ia.ia_na.lifetime_t2 = htobe32(lt_t2);
+ }
+
+ if (lease->pd.addresses) {
+ lease->pd.ia_pd.lifetime_t1 = htobe32(lt_t1);
+ lease->pd.ia_pd.lifetime_t2 = htobe32(lt_t2);
+ }
+ }
+
+ return 0;
+}
+
+static int client_receive_reply(sd_dhcp6_client *client, DHCP6Message *reply, size_t len) {
+ _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease = NULL;
+ bool rapid_commit;
+ int r;
+
+ assert(client);
+ assert(reply);
+
+ if (reply->type != DHCP6_REPLY)
+ return 0;
+
+ r = dhcp6_lease_new(&lease);
+ if (r < 0)
+ return -ENOMEM;
+
+ r = client_parse_message(client, reply, len, lease);
+ if (r < 0)
+ return r;
+
+ if (client->state == DHCP6_STATE_SOLICITATION) {
+ r = dhcp6_lease_get_rapid_commit(lease, &rapid_commit);
+ if (r < 0)
+ return r;
+
+ if (!rapid_commit)
+ return 0;
+ }
+
+ sd_dhcp6_lease_unref(client->lease);
+ client->lease = TAKE_PTR(lease);
+
+ return DHCP6_STATE_BOUND;
+}
+
+static int client_receive_advertise(sd_dhcp6_client *client, DHCP6Message *advertise, size_t len) {
+ _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease = NULL;
+ uint8_t pref_advertise = 0, pref_lease = 0;
+ int r;
+
+ if (advertise->type != DHCP6_ADVERTISE)
+ return 0;
+
+ r = dhcp6_lease_new(&lease);
+ if (r < 0)
+ return r;
+
+ r = client_parse_message(client, advertise, len, lease);
+ if (r < 0)
+ return r;
+
+ r = dhcp6_lease_get_preference(lease, &pref_advertise);
+ if (r < 0)
+ return r;
+
+ r = dhcp6_lease_get_preference(client->lease, &pref_lease);
+
+ if (r < 0 || pref_advertise > pref_lease) {
+ sd_dhcp6_lease_unref(client->lease);
+ client->lease = TAKE_PTR(lease);
+ r = 0;
+ }
+
+ if (pref_advertise == 255 || client->retransmit_count > 1)
+ r = DHCP6_STATE_REQUEST;
+
+ return r;
+}
+
+static int client_receive_message(
+ sd_event_source *s,
+ int fd, uint32_t
+ revents,
+ void *userdata) {
+
+ sd_dhcp6_client *client = userdata;
+ DHCP6_CLIENT_DONT_DESTROY(client);
+ _cleanup_free_ DHCP6Message *message = NULL;
+ ssize_t buflen, len;
+ int r = 0;
+
+ assert(s);
+ assert(client);
+ assert(client->event);
+
+ buflen = next_datagram_size_fd(fd);
+ if (buflen < 0)
+ return buflen;
+
+ message = malloc(buflen);
+ if (!message)
+ return -ENOMEM;
+
+ len = recv(fd, message, buflen, 0);
+ if (len < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return log_dhcp6_client_errno(client, errno, "Could not receive message from UDP socket: %m");
+
+ }
+ if ((size_t) len < sizeof(DHCP6Message)) {
+ log_dhcp6_client(client, "Too small to be DHCP6 message: ignoring");
+ return 0;
+ }
+
+ switch(message->type) {
+ case DHCP6_SOLICIT:
+ case DHCP6_REQUEST:
+ case DHCP6_CONFIRM:
+ case DHCP6_RENEW:
+ case DHCP6_REBIND:
+ case DHCP6_RELEASE:
+ case DHCP6_DECLINE:
+ case DHCP6_INFORMATION_REQUEST:
+ case DHCP6_RELAY_FORW:
+ case DHCP6_RELAY_REPL:
+ return 0;
+
+ case DHCP6_ADVERTISE:
+ case DHCP6_REPLY:
+ case DHCP6_RECONFIGURE:
+ break;
+
+ default:
+ log_dhcp6_client(client, "Unknown message type %d", message->type);
+ return 0;
+ }
+
+ if (client->transaction_id != (message->transaction_id &
+ htobe32(0x00ffffff)))
+ return 0;
+
+ switch (client->state) {
+ case DHCP6_STATE_INFORMATION_REQUEST:
+ r = client_receive_reply(client, message, len);
+ if (r < 0)
+ return 0;
+
+ client_notify(client, SD_DHCP6_CLIENT_EVENT_INFORMATION_REQUEST);
+
+ client_start(client, DHCP6_STATE_STOPPED);
+
+ break;
+
+ case DHCP6_STATE_SOLICITATION:
+ r = client_receive_advertise(client, message, len);
+
+ if (r == DHCP6_STATE_REQUEST) {
+ client_start(client, r);
+
+ break;
+ }
+
+ _fallthrough_; /* for Soliciation Rapid Commit option check */
+ case DHCP6_STATE_REQUEST:
+ case DHCP6_STATE_RENEW:
+ case DHCP6_STATE_REBIND:
+
+ r = client_receive_reply(client, message, len);
+ if (r < 0)
+ return 0;
+
+ if (r == DHCP6_STATE_BOUND) {
+
+ r = client_start(client, DHCP6_STATE_BOUND);
+ if (r < 0) {
+ client_stop(client, r);
+ return 0;
+ }
+
+ client_notify(client, SD_DHCP6_CLIENT_EVENT_IP_ACQUIRE);
+ }
+
+ break;
+
+ case DHCP6_STATE_BOUND:
+
+ break;
+
+ case DHCP6_STATE_STOPPED:
+ return 0;
+ }
+
+ log_dhcp6_client(client, "Recv %s",
+ dhcp6_message_type_to_string(message->type));
+
+ return 0;
+}
+
+static int client_get_lifetime(sd_dhcp6_client *client, uint32_t *lifetime_t1,
+ uint32_t *lifetime_t2) {
+ assert_return(client, -EINVAL);
+ assert_return(client->lease, -EINVAL);
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_NA) && client->lease->ia.addresses) {
+ *lifetime_t1 = be32toh(client->lease->ia.ia_na.lifetime_t1);
+ *lifetime_t2 = be32toh(client->lease->ia.ia_na.lifetime_t2);
+
+ return 0;
+ }
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_PD) && client->lease->pd.addresses) {
+ *lifetime_t1 = be32toh(client->lease->pd.ia_pd.lifetime_t1);
+ *lifetime_t2 = be32toh(client->lease->pd.ia_pd.lifetime_t2);
+
+ return 0;
+ }
+
+ return -ENOMSG;
+}
+
+static int client_start(sd_dhcp6_client *client, enum DHCP6State state) {
+ int r;
+ usec_t timeout, time_now;
+ char time_string[FORMAT_TIMESPAN_MAX];
+ uint32_t lifetime_t1, lifetime_t2;
+
+ assert_return(client, -EINVAL);
+ assert_return(client->event, -EINVAL);
+ assert_return(client->ifindex > 0, -EINVAL);
+ assert_return(client->state != state, -EINVAL);
+
+ (void) event_source_disable(client->timeout_resend_expire);
+ (void) event_source_disable(client->timeout_resend);
+ client->retransmit_time = 0;
+ client->retransmit_count = 0;
+
+ r = sd_event_now(client->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return r;
+
+ if (!client->receive_message) {
+ r = sd_event_add_io(client->event, &client->receive_message,
+ client->fd, EPOLLIN, client_receive_message,
+ client);
+ if (r < 0)
+ goto error;
+
+ r = sd_event_source_set_priority(client->receive_message,
+ client->event_priority);
+ if (r < 0)
+ goto error;
+
+ r = sd_event_source_set_description(client->receive_message,
+ "dhcp6-receive-message");
+ if (r < 0)
+ goto error;
+ }
+
+ switch (state) {
+ case DHCP6_STATE_STOPPED:
+ if (client->state == DHCP6_STATE_INFORMATION_REQUEST) {
+ client->state = DHCP6_STATE_STOPPED;
+
+ return 0;
+ }
+
+ _fallthrough_;
+ case DHCP6_STATE_SOLICITATION:
+ client->state = DHCP6_STATE_SOLICITATION;
+
+ break;
+
+ case DHCP6_STATE_INFORMATION_REQUEST:
+ case DHCP6_STATE_REQUEST:
+ case DHCP6_STATE_RENEW:
+ case DHCP6_STATE_REBIND:
+
+ client->state = state;
+
+ break;
+
+ case DHCP6_STATE_BOUND:
+
+ r = client_get_lifetime(client, &lifetime_t1, &lifetime_t2);
+ if (r < 0)
+ goto error;
+
+ if (lifetime_t1 == 0xffffffff || lifetime_t2 == 0xffffffff) {
+ log_dhcp6_client(client, "Infinite T1 0x%08x or T2 0x%08x",
+ lifetime_t1, lifetime_t2);
+
+ return 0;
+ }
+
+ timeout = client_timeout_compute_random(lifetime_t1 * USEC_PER_SEC);
+
+ log_dhcp6_client(client, "T1 expires in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, timeout, USEC_PER_SEC));
+
+ r = event_reset_time(client->event, &client->timeout_t1,
+ clock_boottime_or_monotonic(),
+ time_now + timeout, 10 * USEC_PER_SEC,
+ client_timeout_t1, client,
+ client->event_priority, "dhcp6-t1-timeout", true);
+ if (r < 0)
+ goto error;
+
+ timeout = client_timeout_compute_random(lifetime_t2 * USEC_PER_SEC);
+
+ log_dhcp6_client(client, "T2 expires in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, timeout, USEC_PER_SEC));
+
+ r = event_reset_time(client->event, &client->timeout_t2,
+ clock_boottime_or_monotonic(),
+ time_now + timeout, 10 * USEC_PER_SEC,
+ client_timeout_t2, client,
+ client->event_priority, "dhcp6-t2-timeout", true);
+ if (r < 0)
+ goto error;
+
+ client->state = state;
+
+ return 0;
+ }
+
+ client->transaction_id = random_u32() & htobe32(0x00ffffff);
+ client->transaction_start = time_now;
+
+ r = event_reset_time(client->event, &client->timeout_resend,
+ clock_boottime_or_monotonic(),
+ 0, 0,
+ client_timeout_resend, client,
+ client->event_priority, "dhcp6-resend-timeout", true);
+ if (r < 0)
+ goto error;
+
+ return 0;
+
+ error:
+ client_reset(client);
+ return r;
+}
+
+int sd_dhcp6_client_stop(sd_dhcp6_client *client) {
+ assert_return(client, -EINVAL);
+
+ client_stop(client, SD_DHCP6_CLIENT_EVENT_STOP);
+
+ client->fd = safe_close(client->fd);
+
+ return 0;
+}
+
+int sd_dhcp6_client_is_running(sd_dhcp6_client *client) {
+ assert_return(client, -EINVAL);
+
+ return client->state != DHCP6_STATE_STOPPED;
+}
+
+int sd_dhcp6_client_start(sd_dhcp6_client *client) {
+ enum DHCP6State state = DHCP6_STATE_SOLICITATION;
+ int r = 0;
+
+ assert_return(client, -EINVAL);
+ assert_return(client->event, -EINVAL);
+ assert_return(client->ifindex > 0, -EINVAL);
+ assert_return(in_addr_is_link_local(AF_INET6, (const union in_addr_union *) &client->local_address) > 0, -EINVAL);
+
+ if (!IN_SET(client->state, DHCP6_STATE_STOPPED))
+ return -EBUSY;
+
+ if (!client->information_request && !client->request)
+ return -EINVAL;
+
+ r = client_reset(client);
+ if (r < 0)
+ return r;
+
+ r = client_ensure_iaid(client);
+ if (r < 0)
+ return r;
+
+ r = client_ensure_duid(client);
+ if (r < 0)
+ return r;
+
+ if (client->fd < 0) {
+ r = dhcp6_network_bind_udp_socket(client->ifindex, &client->local_address);
+ if (r < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ (void) in_addr_to_string(AF_INET6, (const union in_addr_union*) &client->local_address, &p);
+ return log_dhcp6_client_errno(client, r,
+ "Failed to bind to UDP socket at address %s: %m", strna(p));
+ }
+
+ client->fd = r;
+ }
+
+ if (client->information_request)
+ state = DHCP6_STATE_INFORMATION_REQUEST;
+
+ log_dhcp6_client(client, "Started in %s mode",
+ client->information_request? "Information request":
+ "Managed");
+
+ return client_start(client, state);
+}
+
+int sd_dhcp6_client_attach_event(sd_dhcp6_client *client, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(!client->event, -EBUSY);
+
+ if (event)
+ client->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&client->event);
+ if (r < 0)
+ return 0;
+ }
+
+ client->event_priority = priority;
+
+ return 0;
+}
+
+int sd_dhcp6_client_detach_event(sd_dhcp6_client *client) {
+ assert_return(client, -EINVAL);
+
+ client->event = sd_event_unref(client->event);
+
+ return 0;
+}
+
+sd_event *sd_dhcp6_client_get_event(sd_dhcp6_client *client) {
+ assert_return(client, NULL);
+
+ return client->event;
+}
+
+static sd_dhcp6_client *dhcp6_client_free(sd_dhcp6_client *client) {
+ assert(client);
+
+ client->timeout_resend = sd_event_source_unref(client->timeout_resend);
+ client->timeout_resend_expire = sd_event_source_unref(client->timeout_resend_expire);
+ client->timeout_t1 = sd_event_source_unref(client->timeout_t1);
+ client->timeout_t2 = sd_event_source_unref(client->timeout_t2);
+
+ client_reset(client);
+
+ client->fd = safe_close(client->fd);
+
+ sd_dhcp6_client_detach_event(client);
+
+ free(client->req_opts);
+ free(client->fqdn);
+ return mfree(client);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_dhcp6_client, sd_dhcp6_client, dhcp6_client_free);
+
+int sd_dhcp6_client_new(sd_dhcp6_client **ret) {
+ _cleanup_(sd_dhcp6_client_unrefp) sd_dhcp6_client *client = NULL;
+ _cleanup_free_ be16_t *req_opts = NULL;
+ size_t t;
+
+ assert_return(ret, -EINVAL);
+
+ req_opts = new(be16_t, ELEMENTSOF(default_req_opts));
+ if (!req_opts)
+ return -ENOMEM;
+
+ for (t = 0; t < ELEMENTSOF(default_req_opts); t++)
+ req_opts[t] = htobe16(default_req_opts[t]);
+
+ client = new(sd_dhcp6_client, 1);
+ if (!client)
+ return -ENOMEM;
+
+ *client = (sd_dhcp6_client) {
+ .n_ref = 1,
+ .ia_na.type = SD_DHCP6_OPTION_IA_NA,
+ .ia_pd.type = SD_DHCP6_OPTION_IA_PD,
+ .ifindex = -1,
+ .request = DHCP6_REQUEST_IA_NA,
+ .fd = -1,
+ .req_opts_len = ELEMENTSOF(default_req_opts),
+ .req_opts = TAKE_PTR(req_opts),
+ };
+
+ *ret = TAKE_PTR(client);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/sd-dhcp6-lease.c b/src/libsystemd-network/sd-dhcp6-lease.c
new file mode 100644
index 0000000..8b42481
--- /dev/null
+++ b/src/libsystemd-network/sd-dhcp6-lease.c
@@ -0,0 +1,408 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "dhcp6-lease-internal.h"
+#include "dhcp6-protocol.h"
+#include "strv.h"
+#include "util.h"
+
+int dhcp6_lease_ia_rebind_expire(const DHCP6IA *ia, uint32_t *expire) {
+ DHCP6Address *addr;
+ uint32_t valid = 0, t;
+
+ assert_return(ia, -EINVAL);
+ assert_return(expire, -EINVAL);
+
+ LIST_FOREACH(addresses, addr, ia->addresses) {
+ t = be32toh(addr->iaaddr.lifetime_valid);
+ if (valid < t)
+ valid = t;
+ }
+
+ t = be32toh(ia->ia_na.lifetime_t2);
+ if (t > valid)
+ return -EINVAL;
+
+ *expire = valid - t;
+
+ return 0;
+}
+
+DHCP6IA *dhcp6_lease_free_ia(DHCP6IA *ia) {
+ DHCP6Address *address;
+
+ if (!ia)
+ return NULL;
+
+ while (ia->addresses) {
+ address = ia->addresses;
+
+ LIST_REMOVE(addresses, ia->addresses, address);
+
+ free(address);
+ }
+
+ return NULL;
+}
+
+int dhcp6_lease_set_serverid(sd_dhcp6_lease *lease, const uint8_t *id,
+ size_t len) {
+ uint8_t *serverid;
+
+ assert_return(lease, -EINVAL);
+ assert_return(id, -EINVAL);
+
+ serverid = memdup(id, len);
+ if (!serverid)
+ return -ENOMEM;
+
+ free_and_replace(lease->serverid, serverid);
+ lease->serverid_len = len;
+
+ return 0;
+}
+
+int dhcp6_lease_get_serverid(sd_dhcp6_lease *lease, uint8_t **id, size_t *len) {
+ assert_return(lease, -EINVAL);
+
+ if (!lease->serverid)
+ return -ENOMSG;
+
+ if (id)
+ *id = lease->serverid;
+ if (len)
+ *len = lease->serverid_len;
+
+ return 0;
+}
+
+int dhcp6_lease_set_preference(sd_dhcp6_lease *lease, uint8_t preference) {
+ assert_return(lease, -EINVAL);
+
+ lease->preference = preference;
+
+ return 0;
+}
+
+int dhcp6_lease_get_preference(sd_dhcp6_lease *lease, uint8_t *preference) {
+ assert_return(preference, -EINVAL);
+
+ if (!lease)
+ return -EINVAL;
+
+ *preference = lease->preference;
+
+ return 0;
+}
+
+int dhcp6_lease_set_rapid_commit(sd_dhcp6_lease *lease) {
+ assert_return(lease, -EINVAL);
+
+ lease->rapid_commit = true;
+
+ return 0;
+}
+
+int dhcp6_lease_get_rapid_commit(sd_dhcp6_lease *lease, bool *rapid_commit) {
+ assert_return(lease, -EINVAL);
+ assert_return(rapid_commit, -EINVAL);
+
+ *rapid_commit = lease->rapid_commit;
+
+ return 0;
+}
+
+int dhcp6_lease_get_iaid(sd_dhcp6_lease *lease, be32_t *iaid) {
+ assert_return(lease, -EINVAL);
+ assert_return(iaid, -EINVAL);
+
+ *iaid = lease->ia.ia_na.id;
+
+ return 0;
+}
+
+int dhcp6_lease_get_pd_iaid(sd_dhcp6_lease *lease, be32_t *iaid) {
+ assert_return(lease, -EINVAL);
+ assert_return(iaid, -EINVAL);
+
+ *iaid = lease->pd.ia_pd.id;
+
+ return 0;
+}
+
+int sd_dhcp6_lease_get_address(sd_dhcp6_lease *lease, struct in6_addr *addr,
+ uint32_t *lifetime_preferred,
+ uint32_t *lifetime_valid) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+ assert_return(lifetime_preferred, -EINVAL);
+ assert_return(lifetime_valid, -EINVAL);
+
+ if (!lease->addr_iter)
+ return -ENOMSG;
+
+ memcpy(addr, &lease->addr_iter->iaaddr.address,
+ sizeof(struct in6_addr));
+ *lifetime_preferred =
+ be32toh(lease->addr_iter->iaaddr.lifetime_preferred);
+ *lifetime_valid = be32toh(lease->addr_iter->iaaddr.lifetime_valid);
+
+ lease->addr_iter = lease->addr_iter->addresses_next;
+
+ return 0;
+}
+
+void sd_dhcp6_lease_reset_address_iter(sd_dhcp6_lease *lease) {
+ if (lease)
+ lease->addr_iter = lease->ia.addresses;
+}
+
+int sd_dhcp6_lease_get_pd(sd_dhcp6_lease *lease, struct in6_addr *prefix,
+ uint8_t *prefix_len,
+ uint32_t *lifetime_preferred,
+ uint32_t *lifetime_valid) {
+ assert_return(lease, -EINVAL);
+ assert_return(prefix, -EINVAL);
+ assert_return(prefix_len, -EINVAL);
+ assert_return(lifetime_preferred, -EINVAL);
+ assert_return(lifetime_valid, -EINVAL);
+
+ if (!lease->prefix_iter)
+ return -ENOMSG;
+
+ memcpy(prefix, &lease->prefix_iter->iapdprefix.address,
+ sizeof(struct in6_addr));
+ *prefix_len = lease->prefix_iter->iapdprefix.prefixlen;
+ *lifetime_preferred =
+ be32toh(lease->prefix_iter->iapdprefix.lifetime_preferred);
+ *lifetime_valid =
+ be32toh(lease->prefix_iter->iapdprefix.lifetime_valid);
+
+ lease->prefix_iter = lease->prefix_iter->addresses_next;
+
+ return 0;
+}
+
+void sd_dhcp6_lease_reset_pd_prefix_iter(sd_dhcp6_lease *lease) {
+ if (lease)
+ lease->prefix_iter = lease->pd.addresses;
+}
+
+int dhcp6_lease_set_dns(sd_dhcp6_lease *lease, uint8_t *optval, size_t optlen) {
+ int r;
+
+ assert_return(lease, -EINVAL);
+ assert_return(optval, -EINVAL);
+
+ if (!optlen)
+ return 0;
+
+ r = dhcp6_option_parse_ip6addrs(optval, optlen, &lease->dns,
+ lease->dns_count,
+ &lease->dns_allocated);
+ if (r < 0) {
+ log_dhcp6_client(client, "Invalid DNS server option: %s",
+ strerror(-r));
+
+ return r;
+ }
+
+ lease->dns_count = r;
+
+ return 0;
+}
+
+int sd_dhcp6_lease_get_dns(sd_dhcp6_lease *lease, struct in6_addr **addrs) {
+ assert_return(lease, -EINVAL);
+ assert_return(addrs, -EINVAL);
+
+ if (lease->dns_count) {
+ *addrs = lease->dns;
+ return lease->dns_count;
+ }
+
+ return -ENOENT;
+}
+
+int dhcp6_lease_set_domains(sd_dhcp6_lease *lease, uint8_t *optval,
+ size_t optlen) {
+ int r;
+ char **domains;
+
+ assert_return(lease, -EINVAL);
+ assert_return(optval, -EINVAL);
+
+ if (!optlen)
+ return 0;
+
+ r = dhcp6_option_parse_domainname(optval, optlen, &domains);
+ if (r < 0)
+ return 0;
+
+ strv_free_and_replace(lease->domains, domains);
+ lease->domains_count = r;
+
+ return r;
+}
+
+int sd_dhcp6_lease_get_domains(sd_dhcp6_lease *lease, char ***domains) {
+ assert_return(lease, -EINVAL);
+ assert_return(domains, -EINVAL);
+
+ if (lease->domains_count) {
+ *domains = lease->domains;
+ return lease->domains_count;
+ }
+
+ return -ENOENT;
+}
+
+int dhcp6_lease_set_ntp(sd_dhcp6_lease *lease, uint8_t *optval, size_t optlen) {
+ int r;
+ uint16_t subopt;
+ size_t sublen;
+ uint8_t *subval;
+
+ assert_return(lease, -EINVAL);
+ assert_return(optval, -EINVAL);
+
+ lease->ntp = mfree(lease->ntp);
+ lease->ntp_count = 0;
+ lease->ntp_allocated = 0;
+
+ while ((r = dhcp6_option_parse(&optval, &optlen, &subopt, &sublen,
+ &subval)) >= 0) {
+ int s;
+ char **servers;
+
+ switch(subopt) {
+ case DHCP6_NTP_SUBOPTION_SRV_ADDR:
+ case DHCP6_NTP_SUBOPTION_MC_ADDR:
+ if (sublen != 16)
+ return 0;
+
+ s = dhcp6_option_parse_ip6addrs(subval, sublen,
+ &lease->ntp,
+ lease->ntp_count,
+ &lease->ntp_allocated);
+ if (s < 0)
+ return s;
+
+ lease->ntp_count = s;
+
+ break;
+
+ case DHCP6_NTP_SUBOPTION_SRV_FQDN:
+ r = dhcp6_option_parse_domainname(subval, sublen,
+ &servers);
+ if (r < 0)
+ return 0;
+
+ strv_free_and_replace(lease->ntp_fqdn, servers);
+ lease->ntp_fqdn_count = r;
+
+ break;
+ }
+ }
+
+ if (r != -ENOMSG)
+ return r;
+
+ return 0;
+}
+
+int dhcp6_lease_set_sntp(sd_dhcp6_lease *lease, uint8_t *optval, size_t optlen) {
+ int r;
+
+ assert_return(lease, -EINVAL);
+ assert_return(optval, -EINVAL);
+
+ if (!optlen)
+ return 0;
+
+ if (lease->ntp || lease->ntp_fqdn) {
+ log_dhcp6_client(client, "NTP information already provided");
+
+ return 0;
+ }
+
+ log_dhcp6_client(client, "Using deprecated SNTP information");
+
+ r = dhcp6_option_parse_ip6addrs(optval, optlen, &lease->ntp,
+ lease->ntp_count,
+ &lease->ntp_allocated);
+ if (r < 0) {
+ log_dhcp6_client(client, "Invalid SNTP server option: %s",
+ strerror(-r));
+
+ return r;
+ }
+
+ lease->ntp_count = r;
+
+ return 0;
+}
+
+int sd_dhcp6_lease_get_ntp_addrs(sd_dhcp6_lease *lease,
+ struct in6_addr **addrs) {
+ assert_return(lease, -EINVAL);
+ assert_return(addrs, -EINVAL);
+
+ if (lease->ntp_count) {
+ *addrs = lease->ntp;
+ return lease->ntp_count;
+ }
+
+ return -ENOENT;
+}
+
+int sd_dhcp6_lease_get_ntp_fqdn(sd_dhcp6_lease *lease, char ***ntp_fqdn) {
+ assert_return(lease, -EINVAL);
+ assert_return(ntp_fqdn, -EINVAL);
+
+ if (lease->ntp_fqdn_count) {
+ *ntp_fqdn = lease->ntp_fqdn;
+ return lease->ntp_fqdn_count;
+ }
+
+ return -ENOENT;
+}
+
+static sd_dhcp6_lease *dhcp6_lease_free(sd_dhcp6_lease *lease) {
+ assert(lease);
+
+ free(lease->serverid);
+ dhcp6_lease_free_ia(&lease->ia);
+ dhcp6_lease_free_ia(&lease->pd);
+
+ free(lease->dns);
+
+ lease->domains = strv_free(lease->domains);
+
+ free(lease->ntp);
+
+ lease->ntp_fqdn = strv_free(lease->ntp_fqdn);
+ return mfree(lease);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_dhcp6_lease, sd_dhcp6_lease, dhcp6_lease_free);
+
+int dhcp6_lease_new(sd_dhcp6_lease **ret) {
+ sd_dhcp6_lease *lease;
+
+ lease = new0(sd_dhcp6_lease, 1);
+ if (!lease)
+ return -ENOMEM;
+
+ lease->n_ref = 1;
+
+ LIST_HEAD_INIT(lease->ia.addresses);
+
+ *ret = lease;
+ return 0;
+}
diff --git a/src/libsystemd-network/sd-ipv4acd.c b/src/libsystemd-network/sd-ipv4acd.c
new file mode 100644
index 0000000..59359ae
--- /dev/null
+++ b/src/libsystemd-network/sd-ipv4acd.c
@@ -0,0 +1,483 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+***/
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sd-ipv4acd.h"
+
+#include "alloc-util.h"
+#include "arp-util.h"
+#include "ether-addr-util.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "in-addr-util.h"
+#include "list.h"
+#include "random-util.h"
+#include "siphash24.h"
+#include "string-util.h"
+#include "util.h"
+
+/* Constants from the RFC */
+#define PROBE_WAIT_USEC (1U * USEC_PER_SEC)
+#define PROBE_NUM 3U
+#define PROBE_MIN_USEC (1U * USEC_PER_SEC)
+#define PROBE_MAX_USEC (2U * USEC_PER_SEC)
+#define ANNOUNCE_WAIT_USEC (2U * USEC_PER_SEC)
+#define ANNOUNCE_NUM 2U
+#define ANNOUNCE_INTERVAL_USEC (2U * USEC_PER_SEC)
+#define MAX_CONFLICTS 10U
+#define RATE_LIMIT_INTERVAL_USEC (60U * USEC_PER_SEC)
+#define DEFEND_INTERVAL_USEC (10U * USEC_PER_SEC)
+
+typedef enum IPv4ACDState {
+ IPV4ACD_STATE_INIT,
+ IPV4ACD_STATE_STARTED,
+ IPV4ACD_STATE_WAITING_PROBE,
+ IPV4ACD_STATE_PROBING,
+ IPV4ACD_STATE_WAITING_ANNOUNCE,
+ IPV4ACD_STATE_ANNOUNCING,
+ IPV4ACD_STATE_RUNNING,
+ _IPV4ACD_STATE_MAX,
+ _IPV4ACD_STATE_INVALID = -1
+} IPv4ACDState;
+
+struct sd_ipv4acd {
+ unsigned n_ref;
+
+ IPv4ACDState state;
+ int ifindex;
+ int fd;
+
+ unsigned n_iteration;
+ unsigned n_conflict;
+
+ sd_event_source *receive_message_event_source;
+ sd_event_source *timer_event_source;
+
+ usec_t defend_window;
+ be32_t address;
+
+ /* External */
+ struct ether_addr mac_addr;
+
+ sd_event *event;
+ int event_priority;
+ sd_ipv4acd_callback_t callback;
+ void* userdata;
+};
+
+#define log_ipv4acd_errno(acd, error, fmt, ...) log_internal(LOG_DEBUG, error, __FILE__, __LINE__, __func__, "IPV4ACD: " fmt, ##__VA_ARGS__)
+#define log_ipv4acd(acd, fmt, ...) log_ipv4acd_errno(acd, 0, fmt, ##__VA_ARGS__)
+
+static void ipv4acd_set_state(sd_ipv4acd *acd, IPv4ACDState st, bool reset_counter) {
+ assert(acd);
+ assert(st < _IPV4ACD_STATE_MAX);
+
+ if (st == acd->state && !reset_counter)
+ acd->n_iteration++;
+ else {
+ acd->state = st;
+ acd->n_iteration = 0;
+ }
+}
+
+static void ipv4acd_reset(sd_ipv4acd *acd) {
+ assert(acd);
+
+ (void) event_source_disable(acd->timer_event_source);
+ acd->receive_message_event_source = sd_event_source_unref(acd->receive_message_event_source);
+
+ acd->fd = safe_close(acd->fd);
+
+ ipv4acd_set_state(acd, IPV4ACD_STATE_INIT, true);
+}
+
+static sd_ipv4acd *ipv4acd_free(sd_ipv4acd *acd) {
+ assert(acd);
+
+ acd->timer_event_source = sd_event_source_unref(acd->timer_event_source);
+
+ ipv4acd_reset(acd);
+ sd_ipv4acd_detach_event(acd);
+
+ return mfree(acd);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_ipv4acd, sd_ipv4acd, ipv4acd_free);
+
+int sd_ipv4acd_new(sd_ipv4acd **ret) {
+ _cleanup_(sd_ipv4acd_unrefp) sd_ipv4acd *acd = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ acd = new(sd_ipv4acd, 1);
+ if (!acd)
+ return -ENOMEM;
+
+ *acd = (sd_ipv4acd) {
+ .n_ref = 1,
+ .state = IPV4ACD_STATE_INIT,
+ .ifindex = -1,
+ .fd = -1,
+ };
+
+ *ret = TAKE_PTR(acd);
+
+ return 0;
+}
+
+static void ipv4acd_client_notify(sd_ipv4acd *acd, int event) {
+ assert(acd);
+
+ if (!acd->callback)
+ return;
+
+ acd->callback(acd, event, acd->userdata);
+}
+
+int sd_ipv4acd_stop(sd_ipv4acd *acd) {
+ assert_return(acd, -EINVAL);
+
+ ipv4acd_reset(acd);
+
+ log_ipv4acd(acd, "STOPPED");
+
+ ipv4acd_client_notify(acd, SD_IPV4ACD_EVENT_STOP);
+
+ return 0;
+}
+
+static int ipv4acd_on_timeout(sd_event_source *s, uint64_t usec, void *userdata);
+
+static int ipv4acd_set_next_wakeup(sd_ipv4acd *acd, usec_t usec, usec_t random_usec) {
+ usec_t next_timeout, time_now;
+
+ assert(acd);
+
+ next_timeout = usec;
+
+ if (random_usec > 0)
+ next_timeout += (usec_t) random_u64() % random_usec;
+
+ assert_se(sd_event_now(acd->event, clock_boottime_or_monotonic(), &time_now) >= 0);
+
+ return event_reset_time(acd->event, &acd->timer_event_source,
+ clock_boottime_or_monotonic(),
+ time_now + next_timeout, 0,
+ ipv4acd_on_timeout, acd,
+ acd->event_priority, "ipv4acd-timer", true);
+}
+
+static bool ipv4acd_arp_conflict(sd_ipv4acd *acd, struct ether_arp *arp) {
+ assert(acd);
+ assert(arp);
+
+ /* see the BPF */
+ if (memcmp(arp->arp_spa, &acd->address, sizeof(acd->address)) == 0)
+ return true;
+
+ /* the TPA matched instead of the SPA, this is not a conflict */
+ return false;
+}
+
+static int ipv4acd_on_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_ipv4acd *acd = userdata;
+ int r = 0;
+
+ assert(acd);
+
+ switch (acd->state) {
+
+ case IPV4ACD_STATE_STARTED:
+ ipv4acd_set_state(acd, IPV4ACD_STATE_WAITING_PROBE, true);
+
+ if (acd->n_conflict >= MAX_CONFLICTS) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ log_ipv4acd(acd, "Max conflicts reached, delaying by %s", format_timespan(ts, sizeof(ts), RATE_LIMIT_INTERVAL_USEC, 0));
+
+ r = ipv4acd_set_next_wakeup(acd, RATE_LIMIT_INTERVAL_USEC, PROBE_WAIT_USEC);
+ if (r < 0)
+ goto fail;
+ } else {
+ r = ipv4acd_set_next_wakeup(acd, 0, PROBE_WAIT_USEC);
+ if (r < 0)
+ goto fail;
+ }
+
+ break;
+
+ case IPV4ACD_STATE_WAITING_PROBE:
+ case IPV4ACD_STATE_PROBING:
+ /* Send a probe */
+ r = arp_send_probe(acd->fd, acd->ifindex, acd->address, &acd->mac_addr);
+ if (r < 0) {
+ log_ipv4acd_errno(acd, r, "Failed to send ARP probe: %m");
+ goto fail;
+ } else {
+ _cleanup_free_ char *address = NULL;
+ union in_addr_union addr = { .in.s_addr = acd->address };
+
+ (void) in_addr_to_string(AF_INET, &addr, &address);
+ log_ipv4acd(acd, "Probing %s", strna(address));
+ }
+
+ if (acd->n_iteration < PROBE_NUM - 2) {
+ ipv4acd_set_state(acd, IPV4ACD_STATE_PROBING, false);
+
+ r = ipv4acd_set_next_wakeup(acd, PROBE_MIN_USEC, (PROBE_MAX_USEC-PROBE_MIN_USEC));
+ if (r < 0)
+ goto fail;
+ } else {
+ ipv4acd_set_state(acd, IPV4ACD_STATE_WAITING_ANNOUNCE, true);
+
+ r = ipv4acd_set_next_wakeup(acd, ANNOUNCE_WAIT_USEC, 0);
+ if (r < 0)
+ goto fail;
+ }
+
+ break;
+
+ case IPV4ACD_STATE_ANNOUNCING:
+ if (acd->n_iteration >= ANNOUNCE_NUM - 1) {
+ ipv4acd_set_state(acd, IPV4ACD_STATE_RUNNING, false);
+ break;
+ }
+
+ _fallthrough_;
+ case IPV4ACD_STATE_WAITING_ANNOUNCE:
+ /* Send announcement packet */
+ r = arp_send_announcement(acd->fd, acd->ifindex, acd->address, &acd->mac_addr);
+ if (r < 0) {
+ log_ipv4acd_errno(acd, r, "Failed to send ARP announcement: %m");
+ goto fail;
+ } else
+ log_ipv4acd(acd, "ANNOUNCE");
+
+ ipv4acd_set_state(acd, IPV4ACD_STATE_ANNOUNCING, false);
+
+ r = ipv4acd_set_next_wakeup(acd, ANNOUNCE_INTERVAL_USEC, 0);
+ if (r < 0)
+ goto fail;
+
+ if (acd->n_iteration == 0) {
+ acd->n_conflict = 0;
+ ipv4acd_client_notify(acd, SD_IPV4ACD_EVENT_BIND);
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Invalid state.");
+ }
+
+ return 0;
+
+fail:
+ sd_ipv4acd_stop(acd);
+ return 0;
+}
+
+static void ipv4acd_on_conflict(sd_ipv4acd *acd) {
+ _cleanup_free_ char *address = NULL;
+ union in_addr_union addr = { .in.s_addr = acd->address };
+
+ assert(acd);
+
+ acd->n_conflict++;
+
+ (void) in_addr_to_string(AF_INET, &addr, &address);
+ log_ipv4acd(acd, "Conflict on %s (%u)", strna(address), acd->n_conflict);
+
+ ipv4acd_reset(acd);
+ ipv4acd_client_notify(acd, SD_IPV4ACD_EVENT_CONFLICT);
+}
+
+static int ipv4acd_on_packet(
+ sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+
+ sd_ipv4acd *acd = userdata;
+ struct ether_arp packet;
+ ssize_t n;
+ int r;
+
+ assert(s);
+ assert(acd);
+ assert(fd >= 0);
+
+ n = recv(fd, &packet, sizeof(struct ether_arp), 0);
+ if (n < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ log_ipv4acd_errno(acd, errno, "Failed to read ARP packet: %m");
+ goto fail;
+ }
+ if ((size_t) n != sizeof(struct ether_arp)) {
+ log_ipv4acd(acd, "Ignoring too short ARP packet.");
+ return 0;
+ }
+
+ switch (acd->state) {
+
+ case IPV4ACD_STATE_ANNOUNCING:
+ case IPV4ACD_STATE_RUNNING:
+
+ if (ipv4acd_arp_conflict(acd, &packet)) {
+ usec_t ts;
+
+ assert_se(sd_event_now(acd->event, clock_boottime_or_monotonic(), &ts) >= 0);
+
+ /* Defend address */
+ if (ts > acd->defend_window) {
+ acd->defend_window = ts + DEFEND_INTERVAL_USEC;
+ r = arp_send_announcement(acd->fd, acd->ifindex, acd->address, &acd->mac_addr);
+ if (r < 0) {
+ log_ipv4acd_errno(acd, r, "Failed to send ARP announcement: %m");
+ goto fail;
+ } else
+ log_ipv4acd(acd, "DEFEND");
+
+ } else
+ ipv4acd_on_conflict(acd);
+ }
+ break;
+
+ case IPV4ACD_STATE_WAITING_PROBE:
+ case IPV4ACD_STATE_PROBING:
+ case IPV4ACD_STATE_WAITING_ANNOUNCE:
+ /* BPF ensures this packet indicates a conflict */
+ ipv4acd_on_conflict(acd);
+ break;
+
+ default:
+ assert_not_reached("Invalid state.");
+ }
+
+ return 0;
+
+fail:
+ sd_ipv4acd_stop(acd);
+ return 0;
+}
+
+int sd_ipv4acd_set_ifindex(sd_ipv4acd *acd, int ifindex) {
+ assert_return(acd, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(acd->state == IPV4ACD_STATE_INIT, -EBUSY);
+
+ acd->ifindex = ifindex;
+
+ return 0;
+}
+
+int sd_ipv4acd_set_mac(sd_ipv4acd *acd, const struct ether_addr *addr) {
+ assert_return(acd, -EINVAL);
+ assert_return(addr, -EINVAL);
+ assert_return(acd->state == IPV4ACD_STATE_INIT, -EBUSY);
+
+ acd->mac_addr = *addr;
+
+ return 0;
+}
+
+int sd_ipv4acd_detach_event(sd_ipv4acd *acd) {
+ assert_return(acd, -EINVAL);
+
+ acd->event = sd_event_unref(acd->event);
+
+ return 0;
+}
+
+int sd_ipv4acd_attach_event(sd_ipv4acd *acd, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(acd, -EINVAL);
+ assert_return(!acd->event, -EBUSY);
+
+ if (event)
+ acd->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&acd->event);
+ if (r < 0)
+ return r;
+ }
+
+ acd->event_priority = priority;
+
+ return 0;
+}
+
+int sd_ipv4acd_set_callback(sd_ipv4acd *acd, sd_ipv4acd_callback_t cb, void *userdata) {
+ assert_return(acd, -EINVAL);
+
+ acd->callback = cb;
+ acd->userdata = userdata;
+
+ return 0;
+}
+
+int sd_ipv4acd_set_address(sd_ipv4acd *acd, const struct in_addr *address) {
+ assert_return(acd, -EINVAL);
+ assert_return(address, -EINVAL);
+ assert_return(acd->state == IPV4ACD_STATE_INIT, -EBUSY);
+
+ acd->address = address->s_addr;
+
+ return 0;
+}
+
+int sd_ipv4acd_is_running(sd_ipv4acd *acd) {
+ assert_return(acd, false);
+
+ return acd->state != IPV4ACD_STATE_INIT;
+}
+
+int sd_ipv4acd_start(sd_ipv4acd *acd) {
+ int r;
+
+ assert_return(acd, -EINVAL);
+ assert_return(acd->event, -EINVAL);
+ assert_return(acd->ifindex > 0, -EINVAL);
+ assert_return(acd->address != 0, -EINVAL);
+ assert_return(!ether_addr_is_null(&acd->mac_addr), -EINVAL);
+ assert_return(acd->state == IPV4ACD_STATE_INIT, -EBUSY);
+
+ r = arp_network_bind_raw_socket(acd->ifindex, acd->address, &acd->mac_addr);
+ if (r < 0)
+ return r;
+
+ safe_close(acd->fd);
+ acd->fd = r;
+ acd->defend_window = 0;
+ acd->n_conflict = 0;
+
+ r = sd_event_add_io(acd->event, &acd->receive_message_event_source, acd->fd, EPOLLIN, ipv4acd_on_packet, acd);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(acd->receive_message_event_source, acd->event_priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(acd->receive_message_event_source, "ipv4acd-receive-message");
+
+ r = ipv4acd_set_next_wakeup(acd, 0, 0);
+ if (r < 0)
+ goto fail;
+
+ ipv4acd_set_state(acd, IPV4ACD_STATE_STARTED, true);
+ return 0;
+
+fail:
+ ipv4acd_reset(acd);
+ return r;
+}
diff --git a/src/libsystemd-network/sd-ipv4ll.c b/src/libsystemd-network/sd-ipv4ll.c
new file mode 100644
index 0000000..e451dff
--- /dev/null
+++ b/src/libsystemd-network/sd-ipv4ll.c
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+***/
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sd-id128.h"
+#include "sd-ipv4acd.h"
+#include "sd-ipv4ll.h"
+
+#include "alloc-util.h"
+#include "ether-addr-util.h"
+#include "in-addr-util.h"
+#include "list.h"
+#include "random-util.h"
+#include "siphash24.h"
+#include "sparse-endian.h"
+#include "string-util.h"
+#include "util.h"
+
+#define IPV4LL_NETWORK UINT32_C(0xA9FE0000)
+#define IPV4LL_NETMASK UINT32_C(0xFFFF0000)
+
+#define IPV4LL_DONT_DESTROY(ll) \
+ _cleanup_(sd_ipv4ll_unrefp) _unused_ sd_ipv4ll *_dont_destroy_##ll = sd_ipv4ll_ref(ll)
+
+struct sd_ipv4ll {
+ unsigned n_ref;
+
+ sd_ipv4acd *acd;
+
+ be32_t address; /* the address pushed to ACD */
+ struct ether_addr mac;
+
+ struct {
+ le64_t value;
+ le64_t generation;
+ } seed;
+ bool seed_set;
+
+ /* External */
+ be32_t claimed_address;
+
+ sd_ipv4ll_callback_t callback;
+ void* userdata;
+};
+
+#define log_ipv4ll_errno(ll, error, fmt, ...) log_internal(LOG_DEBUG, error, __FILE__, __LINE__, __func__, "IPV4LL: " fmt, ##__VA_ARGS__)
+#define log_ipv4ll(ll, fmt, ...) log_ipv4ll_errno(ll, 0, fmt, ##__VA_ARGS__)
+
+static void ipv4ll_on_acd(sd_ipv4acd *ll, int event, void *userdata);
+
+static sd_ipv4ll *ipv4ll_free(sd_ipv4ll *ll) {
+ assert(ll);
+
+ sd_ipv4acd_unref(ll->acd);
+ return mfree(ll);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_ipv4ll, sd_ipv4ll, ipv4ll_free);
+
+int sd_ipv4ll_new(sd_ipv4ll **ret) {
+ _cleanup_(sd_ipv4ll_unrefp) sd_ipv4ll *ll = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ ll = new0(sd_ipv4ll, 1);
+ if (!ll)
+ return -ENOMEM;
+
+ ll->n_ref = 1;
+
+ r = sd_ipv4acd_new(&ll->acd);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_set_callback(ll->acd, ipv4ll_on_acd, ll);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(ll);
+
+ return 0;
+}
+
+int sd_ipv4ll_stop(sd_ipv4ll *ll) {
+ assert_return(ll, -EINVAL);
+
+ return sd_ipv4acd_stop(ll->acd);
+}
+
+int sd_ipv4ll_set_ifindex(sd_ipv4ll *ll, int ifindex) {
+ assert_return(ll, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(sd_ipv4ll_is_running(ll) == 0, -EBUSY);
+
+ return sd_ipv4acd_set_ifindex(ll->acd, ifindex);
+}
+
+int sd_ipv4ll_set_mac(sd_ipv4ll *ll, const struct ether_addr *addr) {
+ int r;
+
+ assert_return(ll, -EINVAL);
+ assert_return(addr, -EINVAL);
+ assert_return(sd_ipv4ll_is_running(ll) == 0, -EBUSY);
+
+ r = sd_ipv4acd_set_mac(ll->acd, addr);
+ if (r < 0)
+ return r;
+
+ ll->mac = *addr;
+ return 0;
+}
+
+int sd_ipv4ll_detach_event(sd_ipv4ll *ll) {
+ assert_return(ll, -EINVAL);
+
+ return sd_ipv4acd_detach_event(ll->acd);
+}
+
+int sd_ipv4ll_attach_event(sd_ipv4ll *ll, sd_event *event, int64_t priority) {
+ assert_return(ll, -EINVAL);
+
+ return sd_ipv4acd_attach_event(ll->acd, event, priority);
+}
+
+int sd_ipv4ll_set_callback(sd_ipv4ll *ll, sd_ipv4ll_callback_t cb, void *userdata) {
+ assert_return(ll, -EINVAL);
+
+ ll->callback = cb;
+ ll->userdata = userdata;
+
+ return 0;
+}
+
+int sd_ipv4ll_get_address(sd_ipv4ll *ll, struct in_addr *address) {
+ assert_return(ll, -EINVAL);
+ assert_return(address, -EINVAL);
+
+ if (ll->claimed_address == 0)
+ return -ENOENT;
+
+ address->s_addr = ll->claimed_address;
+
+ return 0;
+}
+
+int sd_ipv4ll_set_address_seed(sd_ipv4ll *ll, uint64_t seed) {
+ assert_return(ll, -EINVAL);
+ assert_return(sd_ipv4ll_is_running(ll) == 0, -EBUSY);
+
+ ll->seed.value = htole64(seed);
+ ll->seed_set = true;
+
+ return 0;
+}
+
+int sd_ipv4ll_is_running(sd_ipv4ll *ll) {
+ assert_return(ll, false);
+
+ return sd_ipv4acd_is_running(ll->acd);
+}
+
+static bool ipv4ll_address_is_valid(const struct in_addr *address) {
+ assert(address);
+
+ if (!in_addr_is_link_local(AF_INET, (const union in_addr_union *) address))
+ return false;
+
+ return !IN_SET(be32toh(address->s_addr) & 0x0000FF00U, 0x0000U, 0xFF00U);
+}
+
+int sd_ipv4ll_set_address(sd_ipv4ll *ll, const struct in_addr *address) {
+ int r;
+
+ assert_return(ll, -EINVAL);
+ assert_return(address, -EINVAL);
+ assert_return(ipv4ll_address_is_valid(address), -EINVAL);
+
+ r = sd_ipv4acd_set_address(ll->acd, address);
+ if (r < 0)
+ return r;
+
+ ll->address = address->s_addr;
+
+ return 0;
+}
+
+#define PICK_HASH_KEY SD_ID128_MAKE(15,ac,82,a6,d6,3f,49,78,98,77,5d,0c,69,02,94,0b)
+
+static int ipv4ll_pick_address(sd_ipv4ll *ll) {
+ _cleanup_free_ char *address = NULL;
+ be32_t addr;
+
+ assert(ll);
+
+ do {
+ uint64_t h;
+
+ h = siphash24(&ll->seed, sizeof(ll->seed), PICK_HASH_KEY.bytes);
+
+ /* Increase the generation counter by one */
+ ll->seed.generation = htole64(le64toh(ll->seed.generation) + 1);
+
+ addr = htobe32((h & UINT32_C(0x0000FFFF)) | IPV4LL_NETWORK);
+ } while (addr == ll->address ||
+ IN_SET(be32toh(addr) & 0x0000FF00U, 0x0000U, 0xFF00U));
+
+ (void) in_addr_to_string(AF_INET, &(union in_addr_union) { .in.s_addr = addr }, &address);
+ log_ipv4ll(ll, "Picked new IP address %s.", strna(address));
+
+ return sd_ipv4ll_set_address(ll, &(struct in_addr) { addr });
+}
+
+int sd_ipv4ll_restart(sd_ipv4ll *ll) {
+ ll->address = 0;
+
+ return sd_ipv4ll_start(ll);
+}
+
+#define MAC_HASH_KEY SD_ID128_MAKE(df,04,22,98,3f,ad,14,52,f9,87,2e,d1,9c,70,e2,f2)
+
+int sd_ipv4ll_start(sd_ipv4ll *ll) {
+ int r;
+ bool picked_address = false;
+
+ assert_return(ll, -EINVAL);
+ assert_return(!ether_addr_is_null(&ll->mac), -EINVAL);
+ assert_return(sd_ipv4ll_is_running(ll) == 0, -EBUSY);
+
+ /* If no random seed is set, generate some from the MAC address */
+ if (!ll->seed_set)
+ ll->seed.value = htole64(siphash24(ll->mac.ether_addr_octet, ETH_ALEN, MAC_HASH_KEY.bytes));
+
+ /* Restart the generation counter. */
+ ll->seed.generation = 0;
+
+ if (ll->address == 0) {
+ r = ipv4ll_pick_address(ll);
+ if (r < 0)
+ return r;
+
+ picked_address = true;
+ }
+
+ r = sd_ipv4acd_start(ll->acd);
+ if (r < 0) {
+
+ /* We couldn't start? If so, let's forget the picked address again, the user might make a change and
+ * retry, and we want the new data to take effect when picking an address. */
+ if (picked_address)
+ ll->address = 0;
+
+ return r;
+ }
+
+ return 0;
+}
+
+static void ipv4ll_client_notify(sd_ipv4ll *ll, int event) {
+ assert(ll);
+
+ if (ll->callback)
+ ll->callback(ll, event, ll->userdata);
+}
+
+void ipv4ll_on_acd(sd_ipv4acd *acd, int event, void *userdata) {
+ sd_ipv4ll *ll = userdata;
+ IPV4LL_DONT_DESTROY(ll);
+ int r;
+
+ assert(acd);
+ assert(ll);
+
+ switch (event) {
+
+ case SD_IPV4ACD_EVENT_STOP:
+ ipv4ll_client_notify(ll, SD_IPV4LL_EVENT_STOP);
+ ll->claimed_address = 0;
+ break;
+
+ case SD_IPV4ACD_EVENT_BIND:
+ ll->claimed_address = ll->address;
+ ipv4ll_client_notify(ll, SD_IPV4LL_EVENT_BIND);
+ break;
+
+ case SD_IPV4ACD_EVENT_CONFLICT:
+ /* if an address was already bound we must call up to the
+ user to handle this, otherwise we just try again */
+ if (ll->claimed_address != 0) {
+ ipv4ll_client_notify(ll, SD_IPV4LL_EVENT_CONFLICT);
+
+ ll->claimed_address = 0;
+ } else {
+ r = ipv4ll_pick_address(ll);
+ if (r < 0)
+ goto error;
+
+ r = sd_ipv4acd_start(ll->acd);
+ if (r < 0)
+ goto error;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Invalid IPv4ACD event.");
+ }
+
+ return;
+
+error:
+ ipv4ll_client_notify(ll, SD_IPV4LL_EVENT_STOP);
+}
diff --git a/src/libsystemd-network/sd-lldp.c b/src/libsystemd-network/sd-lldp.c
new file mode 100644
index 0000000..969fc71
--- /dev/null
+++ b/src/libsystemd-network/sd-lldp.c
@@ -0,0 +1,496 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+
+#include "sd-lldp.h"
+
+#include "alloc-util.h"
+#include "ether-addr-util.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "lldp-internal.h"
+#include "lldp-neighbor.h"
+#include "lldp-network.h"
+#include "socket-util.h"
+#include "string-table.h"
+
+#define LLDP_DEFAULT_NEIGHBORS_MAX 128U
+
+static const char * const lldp_event_table[_SD_LLDP_EVENT_MAX] = {
+ [SD_LLDP_EVENT_ADDED] = "added",
+ [SD_LLDP_EVENT_REMOVED] = "removed",
+ [SD_LLDP_EVENT_UPDATED] = "updated",
+ [SD_LLDP_EVENT_REFRESHED] = "refreshed",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(lldp_event, sd_lldp_event);
+
+static void lldp_flush_neighbors(sd_lldp *lldp) {
+ assert(lldp);
+
+ hashmap_clear(lldp->neighbor_by_id);
+}
+
+static void lldp_callback(sd_lldp *lldp, sd_lldp_event event, sd_lldp_neighbor *n) {
+ assert(lldp);
+ assert(event >= 0 && event < _SD_LLDP_EVENT_MAX);
+
+ if (!lldp->callback) {
+ log_lldp("Received '%s' event.", lldp_event_to_string(event));
+ return;
+ }
+
+ log_lldp("Invoking callback for '%s' event.", lldp_event_to_string(event));
+ lldp->callback(lldp, event, n, lldp->userdata);
+}
+
+static int lldp_make_space(sd_lldp *lldp, size_t extra) {
+ usec_t t = USEC_INFINITY;
+ bool changed = false;
+
+ assert(lldp);
+
+ /* Remove all entries that are past their TTL, and more until at least the specified number of extra entries
+ * are free. */
+
+ for (;;) {
+ _cleanup_(sd_lldp_neighbor_unrefp) sd_lldp_neighbor *n = NULL;
+
+ n = prioq_peek(lldp->neighbor_by_expiry);
+ if (!n)
+ break;
+
+ sd_lldp_neighbor_ref(n);
+
+ if (hashmap_size(lldp->neighbor_by_id) > LESS_BY(lldp->neighbors_max, extra))
+ goto remove_one;
+
+ if (t == USEC_INFINITY)
+ t = now(clock_boottime_or_monotonic());
+
+ if (n->until > t)
+ break;
+
+ remove_one:
+ lldp_neighbor_unlink(n);
+ lldp_callback(lldp, SD_LLDP_EVENT_REMOVED, n);
+ changed = true;
+ }
+
+ return changed;
+}
+
+static bool lldp_keep_neighbor(sd_lldp *lldp, sd_lldp_neighbor *n) {
+ assert(lldp);
+ assert(n);
+
+ /* Don't keep data with a zero TTL */
+ if (n->ttl <= 0)
+ return false;
+
+ /* Filter out data from the filter address */
+ if (!ether_addr_is_null(&lldp->filter_address) &&
+ ether_addr_equal(&lldp->filter_address, &n->source_address))
+ return false;
+
+ /* Only add if the neighbor has a capability we are interested in. Note that we also store all neighbors with
+ * no caps field set. */
+ if (n->has_capabilities &&
+ (n->enabled_capabilities & lldp->capability_mask) == 0)
+ return false;
+
+ /* Keep everything else */
+ return true;
+}
+
+static int lldp_start_timer(sd_lldp *lldp, sd_lldp_neighbor *neighbor);
+
+static int lldp_add_neighbor(sd_lldp *lldp, sd_lldp_neighbor *n) {
+ _cleanup_(sd_lldp_neighbor_unrefp) sd_lldp_neighbor *old = NULL;
+ bool keep;
+ int r;
+
+ assert(lldp);
+ assert(n);
+ assert(!n->lldp);
+
+ keep = lldp_keep_neighbor(lldp, n);
+
+ /* First retrieve the old entry for this MSAP */
+ old = hashmap_get(lldp->neighbor_by_id, &n->id);
+ if (old) {
+ sd_lldp_neighbor_ref(old);
+
+ if (!keep) {
+ lldp_neighbor_unlink(old);
+ lldp_callback(lldp, SD_LLDP_EVENT_REMOVED, old);
+ return 0;
+ }
+
+ if (lldp_neighbor_equal(n, old)) {
+ /* Is this equal, then restart the TTL counter, but don't do anything else. */
+ old->timestamp = n->timestamp;
+ lldp_start_timer(lldp, old);
+ lldp_callback(lldp, SD_LLDP_EVENT_REFRESHED, old);
+ return 0;
+ }
+
+ /* Data changed, remove the old entry, and add a new one */
+ lldp_neighbor_unlink(old);
+
+ } else if (!keep)
+ return 0;
+
+ /* Then, make room for at least one new neighbor */
+ lldp_make_space(lldp, 1);
+
+ r = hashmap_put(lldp->neighbor_by_id, &n->id, n);
+ if (r < 0)
+ goto finish;
+
+ r = prioq_put(lldp->neighbor_by_expiry, n, &n->prioq_idx);
+ if (r < 0) {
+ assert_se(hashmap_remove(lldp->neighbor_by_id, &n->id) == n);
+ goto finish;
+ }
+
+ n->lldp = lldp;
+
+ lldp_start_timer(lldp, n);
+ lldp_callback(lldp, old ? SD_LLDP_EVENT_UPDATED : SD_LLDP_EVENT_ADDED, n);
+
+ return 1;
+
+finish:
+ if (old)
+ lldp_callback(lldp, SD_LLDP_EVENT_REMOVED, old);
+
+ return r;
+}
+
+static int lldp_handle_datagram(sd_lldp *lldp, sd_lldp_neighbor *n) {
+ int r;
+
+ assert(lldp);
+ assert(n);
+
+ r = lldp_neighbor_parse(n);
+ if (r == -EBADMSG) /* Ignore bad messages */
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = lldp_add_neighbor(lldp, n);
+ if (r < 0) {
+ log_lldp_errno(r, "Failed to add datagram. Ignoring.");
+ return 0;
+ }
+
+ log_lldp("Successfully processed LLDP datagram.");
+ return 0;
+}
+
+static int lldp_receive_datagram(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(sd_lldp_neighbor_unrefp) sd_lldp_neighbor *n = NULL;
+ ssize_t space, length;
+ sd_lldp *lldp = userdata;
+ struct timespec ts;
+
+ assert(fd >= 0);
+ assert(lldp);
+
+ space = next_datagram_size_fd(fd);
+ if (space < 0)
+ return log_lldp_errno(space, "Failed to determine datagram size to read: %m");
+
+ n = lldp_neighbor_new(space);
+ if (!n)
+ return -ENOMEM;
+
+ length = recv(fd, LLDP_NEIGHBOR_RAW(n), n->raw_size, MSG_DONTWAIT);
+ if (length < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return log_lldp_errno(errno, "Failed to read LLDP datagram: %m");
+ }
+
+ if ((size_t) length != n->raw_size) {
+ log_lldp("Packet size mismatch.");
+ return -EINVAL;
+ }
+
+ /* Try to get the timestamp of this packet if it is known */
+ if (ioctl(fd, SIOCGSTAMPNS, &ts) >= 0)
+ triple_timestamp_from_realtime(&n->timestamp, timespec_load(&ts));
+ else
+ triple_timestamp_get(&n->timestamp);
+
+ return lldp_handle_datagram(lldp, n);
+}
+
+static void lldp_reset(sd_lldp *lldp) {
+ assert(lldp);
+
+ (void) event_source_disable(lldp->timer_event_source);
+ lldp->io_event_source = sd_event_source_unref(lldp->io_event_source);
+ lldp->fd = safe_close(lldp->fd);
+}
+
+_public_ int sd_lldp_start(sd_lldp *lldp) {
+ int r;
+
+ assert_return(lldp, -EINVAL);
+ assert_return(lldp->event, -EINVAL);
+ assert_return(lldp->ifindex > 0, -EINVAL);
+
+ if (lldp->fd >= 0)
+ return 0;
+
+ assert(!lldp->io_event_source);
+
+ lldp->fd = lldp_network_bind_raw_socket(lldp->ifindex);
+ if (lldp->fd < 0)
+ return lldp->fd;
+
+ r = sd_event_add_io(lldp->event, &lldp->io_event_source, lldp->fd, EPOLLIN, lldp_receive_datagram, lldp);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(lldp->io_event_source, lldp->event_priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(lldp->io_event_source, "lldp-io");
+
+ log_lldp("Started LLDP client");
+ return 1;
+
+fail:
+ lldp_reset(lldp);
+ return r;
+}
+
+_public_ int sd_lldp_stop(sd_lldp *lldp) {
+ assert_return(lldp, -EINVAL);
+
+ if (lldp->fd < 0)
+ return 0;
+
+ log_lldp("Stopping LLDP client");
+
+ lldp_reset(lldp);
+ lldp_flush_neighbors(lldp);
+
+ return 1;
+}
+
+_public_ int sd_lldp_attach_event(sd_lldp *lldp, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(lldp, -EINVAL);
+ assert_return(lldp->fd < 0, -EBUSY);
+ assert_return(!lldp->event, -EBUSY);
+
+ if (event)
+ lldp->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&lldp->event);
+ if (r < 0)
+ return r;
+ }
+
+ lldp->event_priority = priority;
+
+ return 0;
+}
+
+_public_ int sd_lldp_detach_event(sd_lldp *lldp) {
+
+ assert_return(lldp, -EINVAL);
+ assert_return(lldp->fd < 0, -EBUSY);
+
+ lldp->event = sd_event_unref(lldp->event);
+ return 0;
+}
+
+_public_ sd_event* sd_lldp_get_event(sd_lldp *lldp) {
+ assert_return(lldp, NULL);
+
+ return lldp->event;
+}
+
+_public_ int sd_lldp_set_callback(sd_lldp *lldp, sd_lldp_callback_t cb, void *userdata) {
+ assert_return(lldp, -EINVAL);
+
+ lldp->callback = cb;
+ lldp->userdata = userdata;
+
+ return 0;
+}
+
+_public_ int sd_lldp_set_ifindex(sd_lldp *lldp, int ifindex) {
+ assert_return(lldp, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(lldp->fd < 0, -EBUSY);
+
+ lldp->ifindex = ifindex;
+ return 0;
+}
+
+static sd_lldp* lldp_free(sd_lldp *lldp) {
+ assert(lldp);
+
+ lldp->timer_event_source = sd_event_source_unref(lldp->timer_event_source);
+
+ lldp_reset(lldp);
+ sd_lldp_detach_event(lldp);
+ lldp_flush_neighbors(lldp);
+
+ hashmap_free(lldp->neighbor_by_id);
+ prioq_free(lldp->neighbor_by_expiry);
+ return mfree(lldp);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_lldp, sd_lldp, lldp_free);
+
+_public_ int sd_lldp_new(sd_lldp **ret) {
+ _cleanup_(sd_lldp_unrefp) sd_lldp *lldp = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ lldp = new(sd_lldp, 1);
+ if (!lldp)
+ return -ENOMEM;
+
+ *lldp = (sd_lldp) {
+ .n_ref = 1,
+ .fd = -1,
+ .neighbors_max = LLDP_DEFAULT_NEIGHBORS_MAX,
+ .capability_mask = (uint16_t) -1,
+ };
+
+ lldp->neighbor_by_id = hashmap_new(&lldp_neighbor_hash_ops);
+ if (!lldp->neighbor_by_id)
+ return -ENOMEM;
+
+ r = prioq_ensure_allocated(&lldp->neighbor_by_expiry, lldp_neighbor_prioq_compare_func);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(lldp);
+
+ return 0;
+}
+
+static int neighbor_compare_func(sd_lldp_neighbor * const *a, sd_lldp_neighbor * const *b) {
+ return lldp_neighbor_id_compare_func(&(*a)->id, &(*b)->id);
+}
+
+static int on_timer_event(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_lldp *lldp = userdata;
+ int r;
+
+ r = lldp_make_space(lldp, 0);
+ if (r < 0)
+ return log_lldp_errno(r, "Failed to make space: %m");
+
+ r = lldp_start_timer(lldp, NULL);
+ if (r < 0)
+ return log_lldp_errno(r, "Failed to restart timer: %m");
+
+ return 0;
+}
+
+static int lldp_start_timer(sd_lldp *lldp, sd_lldp_neighbor *neighbor) {
+ sd_lldp_neighbor *n;
+
+ assert(lldp);
+
+ if (neighbor)
+ lldp_neighbor_start_ttl(neighbor);
+
+ n = prioq_peek(lldp->neighbor_by_expiry);
+ if (!n)
+ return event_source_disable(lldp->timer_event_source);
+
+ if (!lldp->event)
+ return 0;
+
+ return event_reset_time(lldp->event, &lldp->timer_event_source,
+ clock_boottime_or_monotonic(),
+ n->until, 0,
+ on_timer_event, lldp,
+ lldp->event_priority, "lldp-timer", true);
+}
+
+_public_ int sd_lldp_get_neighbors(sd_lldp *lldp, sd_lldp_neighbor ***ret) {
+ sd_lldp_neighbor **l = NULL, *n;
+ Iterator i;
+ int k = 0, r;
+
+ assert_return(lldp, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (hashmap_isempty(lldp->neighbor_by_id)) { /* Special shortcut */
+ *ret = NULL;
+ return 0;
+ }
+
+ l = new0(sd_lldp_neighbor*, hashmap_size(lldp->neighbor_by_id));
+ if (!l)
+ return -ENOMEM;
+
+ r = lldp_start_timer(lldp, NULL);
+ if (r < 0) {
+ free(l);
+ return r;
+ }
+
+ HASHMAP_FOREACH(n, lldp->neighbor_by_id, i)
+ l[k++] = sd_lldp_neighbor_ref(n);
+
+ assert((size_t) k == hashmap_size(lldp->neighbor_by_id));
+
+ /* Return things in a stable order */
+ typesafe_qsort(l, k, neighbor_compare_func);
+ *ret = l;
+
+ return k;
+}
+
+_public_ int sd_lldp_set_neighbors_max(sd_lldp *lldp, uint64_t m) {
+ assert_return(lldp, -EINVAL);
+ assert_return(m <= 0, -EINVAL);
+
+ lldp->neighbors_max = m;
+ lldp_make_space(lldp, 0);
+
+ return 0;
+}
+
+_public_ int sd_lldp_match_capabilities(sd_lldp *lldp, uint16_t mask) {
+ assert_return(lldp, -EINVAL);
+ assert_return(mask != 0, -EINVAL);
+
+ lldp->capability_mask = mask;
+
+ return 0;
+}
+
+_public_ int sd_lldp_set_filter_address(sd_lldp *lldp, const struct ether_addr *addr) {
+ assert_return(lldp, -EINVAL);
+
+ /* In order to deal nicely with bridges that send back our own packets, allow one address to be filtered, so
+ * that our own can be filtered out here. */
+
+ if (addr)
+ lldp->filter_address = *addr;
+ else
+ zero(lldp->filter_address);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/sd-ndisc.c b/src/libsystemd-network/sd-ndisc.c
new file mode 100644
index 0000000..32c20b1
--- /dev/null
+++ b/src/libsystemd-network/sd-ndisc.c
@@ -0,0 +1,388 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+#include <netinet/in.h>
+
+#include "sd-ndisc.h"
+
+#include "alloc-util.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "icmp6-util.h"
+#include "in-addr-util.h"
+#include "ndisc-internal.h"
+#include "ndisc-router.h"
+#include "random-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "util.h"
+
+#define NDISC_TIMEOUT_NO_RA_USEC (NDISC_ROUTER_SOLICITATION_INTERVAL * NDISC_MAX_ROUTER_SOLICITATIONS)
+
+static const char * const ndisc_event_table[_SD_NDISC_EVENT_MAX] = {
+ [SD_NDISC_EVENT_TIMEOUT] = "timeout",
+ [SD_NDISC_EVENT_ROUTER] = "router",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(ndisc_event, sd_ndisc_event);
+
+static void ndisc_callback(sd_ndisc *ndisc, sd_ndisc_event event, sd_ndisc_router *rt) {
+ assert(ndisc);
+ assert(event >= 0 && event < _SD_NDISC_EVENT_MAX);
+
+ if (!ndisc->callback) {
+ log_ndisc("Received '%s' event.", ndisc_event_to_string(event));
+ return;
+ }
+
+ log_ndisc("Invoking callback for '%s' event.", ndisc_event_to_string(event));
+ ndisc->callback(ndisc, event, rt, ndisc->userdata);
+}
+
+_public_ int sd_ndisc_set_callback(
+ sd_ndisc *nd,
+ sd_ndisc_callback_t callback,
+ void *userdata) {
+
+ assert_return(nd, -EINVAL);
+
+ nd->callback = callback;
+ nd->userdata = userdata;
+
+ return 0;
+}
+
+_public_ int sd_ndisc_set_ifindex(sd_ndisc *nd, int ifindex) {
+ assert_return(nd, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(nd->fd < 0, -EBUSY);
+
+ nd->ifindex = ifindex;
+ return 0;
+}
+
+_public_ int sd_ndisc_set_mac(sd_ndisc *nd, const struct ether_addr *mac_addr) {
+ assert_return(nd, -EINVAL);
+
+ if (mac_addr)
+ nd->mac_addr = *mac_addr;
+ else
+ zero(nd->mac_addr);
+
+ return 0;
+}
+
+_public_ int sd_ndisc_attach_event(sd_ndisc *nd, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(nd, -EINVAL);
+ assert_return(nd->fd < 0, -EBUSY);
+ assert_return(!nd->event, -EBUSY);
+
+ if (event)
+ nd->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&nd->event);
+ if (r < 0)
+ return 0;
+ }
+
+ nd->event_priority = priority;
+
+ return 0;
+}
+
+_public_ int sd_ndisc_detach_event(sd_ndisc *nd) {
+
+ assert_return(nd, -EINVAL);
+ assert_return(nd->fd < 0, -EBUSY);
+
+ nd->event = sd_event_unref(nd->event);
+ return 0;
+}
+
+_public_ sd_event *sd_ndisc_get_event(sd_ndisc *nd) {
+ assert_return(nd, NULL);
+
+ return nd->event;
+}
+
+static void ndisc_reset(sd_ndisc *nd) {
+ assert(nd);
+
+ (void) event_source_disable(nd->timeout_event_source);
+ (void) event_source_disable(nd->timeout_no_ra);
+ nd->retransmit_time = 0;
+ nd->recv_event_source = sd_event_source_unref(nd->recv_event_source);
+ nd->fd = safe_close(nd->fd);
+}
+
+static sd_ndisc *ndisc_free(sd_ndisc *nd) {
+ assert(nd);
+
+ nd->timeout_event_source = sd_event_source_unref(nd->timeout_event_source);
+ nd->timeout_no_ra = sd_event_source_unref(nd->timeout_no_ra);
+
+ ndisc_reset(nd);
+ sd_ndisc_detach_event(nd);
+ return mfree(nd);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_ndisc, sd_ndisc, ndisc_free);
+
+_public_ int sd_ndisc_new(sd_ndisc **ret) {
+ _cleanup_(sd_ndisc_unrefp) sd_ndisc *nd = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ nd = new(sd_ndisc, 1);
+ if (!nd)
+ return -ENOMEM;
+
+ *nd = (sd_ndisc) {
+ .n_ref = 1,
+ .fd = -1,
+ };
+
+ *ret = TAKE_PTR(nd);
+
+ return 0;
+}
+
+_public_ int sd_ndisc_get_mtu(sd_ndisc *nd, uint32_t *mtu) {
+ assert_return(nd, -EINVAL);
+ assert_return(mtu, -EINVAL);
+
+ if (nd->mtu == 0)
+ return -ENODATA;
+
+ *mtu = nd->mtu;
+ return 0;
+}
+
+_public_ int sd_ndisc_get_hop_limit(sd_ndisc *nd, uint8_t *ret) {
+ assert_return(nd, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (nd->hop_limit == 0)
+ return -ENODATA;
+
+ *ret = nd->hop_limit;
+ return 0;
+}
+
+static int ndisc_handle_datagram(sd_ndisc *nd, sd_ndisc_router *rt) {
+ int r;
+
+ assert(nd);
+ assert(rt);
+
+ r = ndisc_router_parse(rt);
+ if (r == -EBADMSG) /* Bad packet */
+ return 0;
+ if (r < 0)
+ return 0;
+
+ /* Update global variables we keep */
+ if (rt->mtu > 0)
+ nd->mtu = rt->mtu;
+ if (rt->hop_limit > 0)
+ nd->hop_limit = rt->hop_limit;
+
+ log_ndisc("Received Router Advertisement: flags %s preference %s lifetime %" PRIu16 " sec",
+ rt->flags & ND_RA_FLAG_MANAGED ? "MANAGED" : rt->flags & ND_RA_FLAG_OTHER ? "OTHER" : "none",
+ rt->preference == SD_NDISC_PREFERENCE_HIGH ? "high" : rt->preference == SD_NDISC_PREFERENCE_LOW ? "low" : "medium",
+ rt->lifetime);
+
+ ndisc_callback(nd, SD_NDISC_EVENT_ROUTER, rt);
+ return 0;
+}
+
+static int ndisc_recv(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(sd_ndisc_router_unrefp) sd_ndisc_router *rt = NULL;
+ sd_ndisc *nd = userdata;
+ ssize_t buflen;
+ int r;
+ _cleanup_free_ char *addr = NULL;
+
+ assert(s);
+ assert(nd);
+ assert(nd->event);
+
+ buflen = next_datagram_size_fd(fd);
+ if (buflen < 0)
+ return log_ndisc_errno(buflen, "Failed to determine datagram size to read: %m");
+
+ rt = ndisc_router_new(buflen);
+ if (!rt)
+ return -ENOMEM;
+
+ r = icmp6_receive(fd, NDISC_ROUTER_RAW(rt), rt->raw_size, &rt->address,
+ &rt->timestamp);
+ if (r < 0) {
+ switch (r) {
+ case -EADDRNOTAVAIL:
+ (void) in_addr_to_string(AF_INET6, (union in_addr_union*) &rt->address, &addr);
+ log_ndisc("Received RA from non-link-local address %s. Ignoring", addr);
+ break;
+
+ case -EMULTIHOP:
+ log_ndisc("Received RA with invalid hop limit. Ignoring.");
+ break;
+
+ case -EPFNOSUPPORT:
+ log_ndisc("Received invalid source address from ICMPv6 socket. Ignoring.");
+ break;
+
+ case -EAGAIN: /* ignore spurious wakeups */
+ break;
+
+ default:
+ log_ndisc_errno(r, "Unexpected error while reading from ICMPv6, ignoring: %m");
+ break;
+ }
+
+ return 0;
+ }
+
+ (void) event_source_disable(nd->timeout_event_source);
+
+ return ndisc_handle_datagram(nd, rt);
+}
+
+static usec_t ndisc_timeout_compute_random(usec_t val) {
+ /* compute a time that is random within ±10% of the given value */
+ return val - val / 10 +
+ (random_u64() % (2 * USEC_PER_SEC)) * val / 10 / USEC_PER_SEC;
+}
+
+static int ndisc_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
+ char time_string[FORMAT_TIMESPAN_MAX];
+ sd_ndisc *nd = userdata;
+ usec_t time_now;
+ int r;
+
+ assert(s);
+ assert(nd);
+ assert(nd->event);
+
+ assert_se(sd_event_now(nd->event, clock_boottime_or_monotonic(), &time_now) >= 0);
+
+ if (!nd->retransmit_time)
+ nd->retransmit_time = ndisc_timeout_compute_random(NDISC_ROUTER_SOLICITATION_INTERVAL);
+ else {
+ if (nd->retransmit_time > NDISC_MAX_ROUTER_SOLICITATION_INTERVAL / 2)
+ nd->retransmit_time = ndisc_timeout_compute_random(NDISC_MAX_ROUTER_SOLICITATION_INTERVAL);
+ else
+ nd->retransmit_time += ndisc_timeout_compute_random(nd->retransmit_time);
+ }
+
+ r = event_reset_time(nd->event, &nd->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ time_now + nd->retransmit_time, 10 * USEC_PER_MSEC,
+ ndisc_timeout, nd,
+ nd->event_priority, "ndisc-timeout-no-ra", true);
+ if (r < 0)
+ goto fail;
+
+ r = icmp6_send_router_solicitation(nd->fd, &nd->mac_addr);
+ if (r < 0) {
+ log_ndisc_errno(r, "Error sending Router Solicitation: %m");
+ goto fail;
+ }
+
+ log_ndisc("Sent Router Solicitation, next solicitation in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX,
+ nd->retransmit_time, USEC_PER_SEC));
+
+ return 0;
+
+fail:
+ (void) sd_ndisc_stop(nd);
+ return 0;
+}
+
+static int ndisc_timeout_no_ra(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_ndisc *nd = userdata;
+
+ assert(s);
+ assert(nd);
+
+ log_ndisc("No RA received before link confirmation timeout");
+
+ (void) event_source_disable(nd->timeout_no_ra);
+ ndisc_callback(nd, SD_NDISC_EVENT_TIMEOUT, NULL);
+
+ return 0;
+}
+
+_public_ int sd_ndisc_stop(sd_ndisc *nd) {
+ assert_return(nd, -EINVAL);
+
+ if (nd->fd < 0)
+ return 0;
+
+ log_ndisc("Stopping IPv6 Router Solicitation client");
+
+ ndisc_reset(nd);
+ return 1;
+}
+
+_public_ int sd_ndisc_start(sd_ndisc *nd) {
+ int r;
+ usec_t time_now;
+
+ assert_return(nd, -EINVAL);
+ assert_return(nd->event, -EINVAL);
+ assert_return(nd->ifindex > 0, -EINVAL);
+
+ if (nd->fd >= 0)
+ return 0;
+
+ assert(!nd->recv_event_source);
+
+ r = sd_event_now(nd->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ goto fail;
+
+ nd->fd = icmp6_bind_router_solicitation(nd->ifindex);
+ if (nd->fd < 0)
+ return nd->fd;
+
+ r = sd_event_add_io(nd->event, &nd->recv_event_source, nd->fd, EPOLLIN, ndisc_recv, nd);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(nd->recv_event_source, nd->event_priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(nd->recv_event_source, "ndisc-receive-message");
+
+ r = event_reset_time(nd->event, &nd->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ 0, 0,
+ ndisc_timeout, nd,
+ nd->event_priority, "ndisc-timeout", true);
+ if (r < 0)
+ goto fail;
+
+ r = event_reset_time(nd->event, &nd->timeout_no_ra,
+ clock_boottime_or_monotonic(),
+ time_now + NDISC_TIMEOUT_NO_RA_USEC, 10 * USEC_PER_MSEC,
+ ndisc_timeout_no_ra, nd,
+ nd->event_priority, "ndisc-timeout-no-ra", true);
+ if (r < 0)
+ goto fail;
+
+ log_ndisc("Started IPv6 Router Solicitation client");
+ return 1;
+
+fail:
+ ndisc_reset(nd);
+ return r;
+}
diff --git a/src/libsystemd-network/sd-radv.c b/src/libsystemd-network/sd-radv.c
new file mode 100644
index 0000000..098e01f
--- /dev/null
+++ b/src/libsystemd-network/sd-radv.c
@@ -0,0 +1,771 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2017 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "sd-radv.h"
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "ether-addr-util.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "icmp6-util.h"
+#include "in-addr-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "radv-internal.h"
+#include "random-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+_public_ int sd_radv_new(sd_radv **ret) {
+ _cleanup_(sd_radv_unrefp) sd_radv *ra = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ ra = new(sd_radv, 1);
+ if (!ra)
+ return -ENOMEM;
+
+ *ra = (sd_radv) {
+ .n_ref = 1,
+ .fd = -1,
+ };
+
+ *ret = TAKE_PTR(ra);
+
+ return 0;
+}
+
+_public_ int sd_radv_attach_event(sd_radv *ra, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(ra, -EINVAL);
+ assert_return(!ra->event, -EBUSY);
+
+ if (event)
+ ra->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&ra->event);
+ if (r < 0)
+ return 0;
+ }
+
+ ra->event_priority = priority;
+
+ return 0;
+}
+
+_public_ int sd_radv_detach_event(sd_radv *ra) {
+
+ assert_return(ra, -EINVAL);
+
+ ra->event = sd_event_unref(ra->event);
+ return 0;
+}
+
+_public_ sd_event *sd_radv_get_event(sd_radv *ra) {
+ assert_return(ra, NULL);
+
+ return ra->event;
+}
+
+static void radv_reset(sd_radv *ra) {
+ assert(ra);
+
+ (void) event_source_disable(ra->timeout_event_source);
+
+ ra->recv_event_source =
+ sd_event_source_unref(ra->recv_event_source);
+
+ ra->ra_sent = 0;
+}
+
+static sd_radv *radv_free(sd_radv *ra) {
+ if (!ra)
+ return NULL;
+
+ while (ra->prefixes) {
+ sd_radv_prefix *p = ra->prefixes;
+
+ LIST_REMOVE(prefix, ra->prefixes, p);
+ sd_radv_prefix_unref(p);
+ }
+
+ free(ra->rdnss);
+ free(ra->dnssl);
+
+ ra->timeout_event_source = sd_event_source_unref(ra->timeout_event_source);
+
+ radv_reset(ra);
+
+ sd_radv_detach_event(ra);
+
+ ra->fd = safe_close(ra->fd);
+
+ return mfree(ra);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_radv, sd_radv, radv_free);
+
+static int radv_send(sd_radv *ra, const struct in6_addr *dst, uint32_t router_lifetime) {
+ sd_radv_prefix *p;
+ struct sockaddr_in6 dst_addr = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ALL_NODES_MULTICAST_INIT,
+ };
+ struct nd_router_advert adv = {};
+ struct {
+ struct nd_opt_hdr opthdr;
+ struct ether_addr slladdr;
+ } _packed_ opt_mac = {
+ .opthdr = {
+ .nd_opt_type = ND_OPT_SOURCE_LINKADDR,
+ .nd_opt_len = (sizeof(struct nd_opt_hdr) +
+ sizeof(struct ether_addr) - 1) /8 + 1,
+ },
+ };
+ struct nd_opt_mtu opt_mtu = {
+ .nd_opt_mtu_type = ND_OPT_MTU,
+ .nd_opt_mtu_len = 1,
+ };
+ /* Reserve iov space for RA header, linkaddr, MTU, N prefixes, RDNSS
+ and DNSSL */
+ struct iovec iov[5 + ra->n_prefixes];
+ struct msghdr msg = {
+ .msg_name = &dst_addr,
+ .msg_namelen = sizeof(dst_addr),
+ .msg_iov = iov,
+ };
+ usec_t time_now;
+ int r;
+
+ assert(ra);
+
+ r = sd_event_now(ra->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return r;
+
+ if (dst && !IN6_IS_ADDR_UNSPECIFIED(dst))
+ dst_addr.sin6_addr = *dst;
+
+ adv.nd_ra_type = ND_ROUTER_ADVERT;
+ adv.nd_ra_curhoplimit = ra->hop_limit;
+ adv.nd_ra_flags_reserved = ra->flags;
+ adv.nd_ra_router_lifetime = htobe16(router_lifetime);
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(&adv, sizeof(adv));
+
+ /* MAC address is optional, either because the link does not use L2
+ addresses or load sharing is desired. See RFC 4861, Section 4.2 */
+ if (!ether_addr_is_null(&ra->mac_addr)) {
+ opt_mac.slladdr = ra->mac_addr;
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(&opt_mac, sizeof(opt_mac));
+ }
+
+ if (ra->mtu) {
+ opt_mtu.nd_opt_mtu_mtu = htobe32(ra->mtu);
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(&opt_mtu, sizeof(opt_mtu));
+ }
+
+ LIST_FOREACH(prefix, p, ra->prefixes) {
+ if (p->valid_until) {
+
+ if (time_now > p->valid_until)
+ p->opt.valid_lifetime = 0;
+ else
+ p->opt.valid_lifetime = htobe32((p->valid_until - time_now) / USEC_PER_SEC);
+
+ if (time_now > p->preferred_until)
+ p->opt.preferred_lifetime = 0;
+ else
+ p->opt.preferred_lifetime = htobe32((p->preferred_until - time_now) / USEC_PER_SEC);
+ }
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(&p->opt, sizeof(p->opt));
+ }
+
+ if (ra->rdnss)
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(ra->rdnss, ra->rdnss->length * 8);
+
+ if (ra->dnssl)
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(ra->dnssl, ra->dnssl->length * 8);
+
+ if (sendmsg(ra->fd, &msg, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int radv_recv(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_radv *ra = userdata;
+ _cleanup_free_ char *addr = NULL;
+ struct in6_addr src;
+ triple_timestamp timestamp;
+ int r;
+ ssize_t buflen;
+ _cleanup_free_ char *buf = NULL;
+
+ assert(s);
+ assert(ra);
+ assert(ra->event);
+
+ buflen = next_datagram_size_fd(fd);
+ if (buflen < 0)
+ return (int) buflen;
+
+ buf = new0(char, buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = icmp6_receive(fd, buf, buflen, &src, &timestamp);
+ if (r < 0) {
+ switch (r) {
+ case -EADDRNOTAVAIL:
+ (void) in_addr_to_string(AF_INET6, (union in_addr_union*) &src, &addr);
+ log_radv("Received RS from non-link-local address %s. Ignoring", addr);
+ break;
+
+ case -EMULTIHOP:
+ log_radv("Received RS with invalid hop limit. Ignoring.");
+ break;
+
+ case -EPFNOSUPPORT:
+ log_radv("Received invalid source address from ICMPv6 socket. Ignoring.");
+ break;
+
+ case -EAGAIN: /* ignore spurious wakeups */
+ break;
+
+ default:
+ log_radv_errno(r, "Unexpected error receiving from ICMPv6 socket: %m");
+ break;
+ }
+
+ return 0;
+ }
+
+ if ((size_t) buflen < sizeof(struct nd_router_solicit)) {
+ log_radv("Too short packet received");
+ return 0;
+ }
+
+ (void) in_addr_to_string(AF_INET6, (union in_addr_union*) &src, &addr);
+
+ r = radv_send(ra, &src, ra->lifetime);
+ if (r < 0)
+ log_radv_errno(r, "Unable to send solicited Router Advertisement to %s: %m", strnull(addr));
+ else
+ log_radv("Sent solicited Router Advertisement to %s", strnull(addr));
+
+ return 0;
+}
+
+static usec_t radv_compute_timeout(usec_t min, usec_t max) {
+ assert_return(min <= max, SD_RADV_DEFAULT_MIN_TIMEOUT_USEC);
+
+ return min + (random_u32() % (max - min));
+}
+
+static int radv_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
+ int r;
+ sd_radv *ra = userdata;
+ usec_t min_timeout = SD_RADV_DEFAULT_MIN_TIMEOUT_USEC;
+ usec_t max_timeout = SD_RADV_DEFAULT_MAX_TIMEOUT_USEC;
+ usec_t time_now, timeout;
+ char time_string[FORMAT_TIMESPAN_MAX];
+
+ assert(s);
+ assert(ra);
+ assert(ra->event);
+
+ r = sd_event_now(ra->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ goto fail;
+
+ r = radv_send(ra, NULL, ra->lifetime);
+ if (r < 0)
+ log_radv_errno(r, "Unable to send Router Advertisement: %m");
+
+ /* RFC 4861, Section 6.2.4, sending initial Router Advertisements */
+ if (ra->ra_sent < SD_RADV_MAX_INITIAL_RTR_ADVERTISEMENTS) {
+ max_timeout = SD_RADV_MAX_INITIAL_RTR_ADVERT_INTERVAL_USEC;
+ min_timeout = SD_RADV_MAX_INITIAL_RTR_ADVERT_INTERVAL_USEC / 3;
+ }
+
+ timeout = radv_compute_timeout(min_timeout, max_timeout);
+
+ log_radv("Next Router Advertisement in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX,
+ timeout, USEC_PER_SEC));
+
+ r = event_reset_time(ra->event, &ra->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ time_now + timeout, MSEC_PER_SEC,
+ radv_timeout, ra,
+ ra->event_priority, "radv-timeout", true);
+ if (r < 0)
+ goto fail;
+
+ ra->ra_sent++;
+
+ return 0;
+
+fail:
+ sd_radv_stop(ra);
+
+ return 0;
+}
+
+_public_ int sd_radv_stop(sd_radv *ra) {
+ int r;
+
+ assert_return(ra, -EINVAL);
+
+ if (ra->state == SD_RADV_STATE_IDLE)
+ return 0;
+
+ log_radv("Stopping IPv6 Router Advertisement daemon");
+
+ /* RFC 4861, Section 6.2.5, send at least one Router Advertisement
+ with zero lifetime */
+ r = radv_send(ra, NULL, 0);
+ if (r < 0)
+ log_radv_errno(r, "Unable to send last Router Advertisement with router lifetime set to zero: %m");
+
+ radv_reset(ra);
+ ra->fd = safe_close(ra->fd);
+ ra->state = SD_RADV_STATE_IDLE;
+
+ return 0;
+}
+
+_public_ int sd_radv_start(sd_radv *ra) {
+ int r;
+
+ assert_return(ra, -EINVAL);
+ assert_return(ra->event, -EINVAL);
+ assert_return(ra->ifindex > 0, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return 0;
+
+ r = event_reset_time(ra->event, &ra->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ 0, 0,
+ radv_timeout, ra,
+ ra->event_priority, "radv-timeout", true);
+ if (r < 0)
+ goto fail;
+
+ r = icmp6_bind_router_advertisement(ra->ifindex);
+ if (r < 0)
+ goto fail;
+
+ ra->fd = r;
+
+ r = sd_event_add_io(ra->event, &ra->recv_event_source, ra->fd, EPOLLIN, radv_recv, ra);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(ra->recv_event_source, ra->event_priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(ra->recv_event_source, "radv-receive-message");
+
+ ra->state = SD_RADV_STATE_ADVERTISING;
+
+ log_radv("Started IPv6 Router Advertisement daemon");
+
+ return 0;
+
+ fail:
+ radv_reset(ra);
+
+ return r;
+}
+
+_public_ int sd_radv_set_ifindex(sd_radv *ra, int ifindex) {
+ assert_return(ra, -EINVAL);
+ assert_return(ifindex >= -1, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return -EBUSY;
+
+ ra->ifindex = ifindex;
+
+ return 0;
+}
+
+_public_ int sd_radv_set_mac(sd_radv *ra, const struct ether_addr *mac_addr) {
+ assert_return(ra, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return -EBUSY;
+
+ if (mac_addr)
+ ra->mac_addr = *mac_addr;
+ else
+ zero(ra->mac_addr);
+
+ return 0;
+}
+
+_public_ int sd_radv_set_mtu(sd_radv *ra, uint32_t mtu) {
+ assert_return(ra, -EINVAL);
+ assert_return(mtu >= 1280, -EINVAL);
+
+ ra->mtu = mtu;
+
+ return 0;
+}
+
+_public_ int sd_radv_set_hop_limit(sd_radv *ra, uint8_t hop_limit) {
+ assert_return(ra, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return -EBUSY;
+
+ ra->hop_limit = hop_limit;
+
+ return 0;
+}
+
+_public_ int sd_radv_set_router_lifetime(sd_radv *ra, uint32_t router_lifetime) {
+ assert_return(ra, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return -EBUSY;
+
+ /* RFC 4191, Section 2.2, "...If the Router Lifetime is zero, the
+ preference value MUST be set to (00) by the sender..." */
+ if (router_lifetime == 0 &&
+ (ra->flags & (0x3 << 3)) != (SD_NDISC_PREFERENCE_MEDIUM << 3))
+ return -ETIME;
+
+ ra->lifetime = router_lifetime;
+
+ return 0;
+}
+
+_public_ int sd_radv_set_managed_information(sd_radv *ra, int managed) {
+ assert_return(ra, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return -EBUSY;
+
+ SET_FLAG(ra->flags, ND_RA_FLAG_MANAGED, managed);
+
+ return 0;
+}
+
+_public_ int sd_radv_set_other_information(sd_radv *ra, int other) {
+ assert_return(ra, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return -EBUSY;
+
+ SET_FLAG(ra->flags, ND_RA_FLAG_OTHER, other);
+
+ return 0;
+}
+
+_public_ int sd_radv_set_preference(sd_radv *ra, unsigned preference) {
+ int r = 0;
+
+ assert_return(ra, -EINVAL);
+ assert_return(IN_SET(preference,
+ SD_NDISC_PREFERENCE_LOW,
+ SD_NDISC_PREFERENCE_MEDIUM,
+ SD_NDISC_PREFERENCE_HIGH), -EINVAL);
+
+ ra->flags = (ra->flags & ~(0x3 << 3)) | (preference << 3);
+
+ return r;
+}
+
+_public_ int sd_radv_add_prefix(sd_radv *ra, sd_radv_prefix *p, int dynamic) {
+ sd_radv_prefix *cur;
+ int r;
+ _cleanup_free_ char *addr_p = NULL;
+ char time_string_preferred[FORMAT_TIMESPAN_MAX];
+ char time_string_valid[FORMAT_TIMESPAN_MAX];
+ usec_t time_now, valid, preferred, valid_until, preferred_until;
+
+ assert_return(ra, -EINVAL);
+
+ if (!p)
+ return -EINVAL;
+
+ /* Refuse prefixes that don't have a prefix set */
+ if (IN6_IS_ADDR_UNSPECIFIED(&p->opt.in6_addr))
+ return -ENOEXEC;
+
+ LIST_FOREACH(prefix, cur, ra->prefixes) {
+
+ r = in_addr_prefix_intersect(AF_INET6,
+ (union in_addr_union*) &cur->opt.in6_addr,
+ cur->opt.prefixlen,
+ (union in_addr_union*) &p->opt.in6_addr,
+ p->opt.prefixlen);
+ if (r > 0) {
+ _cleanup_free_ char *addr_cur = NULL;
+
+ (void) in_addr_to_string(AF_INET6,
+ (union in_addr_union*) &p->opt.in6_addr,
+ &addr_p);
+
+ if (dynamic && cur->opt.prefixlen == p->opt.prefixlen)
+ goto update;
+
+ (void) in_addr_to_string(AF_INET6,
+ (union in_addr_union*) &cur->opt.in6_addr,
+ &addr_cur);
+ log_radv("IPv6 prefix %s/%u already configured, ignoring %s/%u",
+ addr_cur, cur->opt.prefixlen,
+ addr_p, p->opt.prefixlen);
+
+ return -EEXIST;
+ }
+ }
+
+ p = sd_radv_prefix_ref(p);
+
+ LIST_APPEND(prefix, ra->prefixes, p);
+
+ ra->n_prefixes++;
+
+ (void) in_addr_to_string(AF_INET6, (union in_addr_union*) &p->opt.in6_addr, &addr_p);
+
+ if (!dynamic) {
+ log_radv("Added prefix %s/%d", addr_p, p->opt.prefixlen);
+ return 0;
+ }
+
+ cur = p;
+
+ update:
+ r = sd_event_now(ra->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return r;
+
+ valid = be32toh(p->opt.valid_lifetime) * USEC_PER_SEC;
+ valid_until = usec_add(valid, time_now);
+ if (valid_until == USEC_INFINITY)
+ return -EOVERFLOW;
+
+ preferred = be32toh(p->opt.preferred_lifetime) * USEC_PER_SEC;
+ preferred_until = usec_add(preferred, time_now);
+ if (preferred_until == USEC_INFINITY)
+ return -EOVERFLOW;
+
+ cur->valid_until = valid_until;
+ cur->preferred_until = preferred_until;
+
+ log_radv("%s prefix %s/%u preferred %s valid %s",
+ cur? "Updated": "Added",
+ addr_p, p->opt.prefixlen,
+ format_timespan(time_string_preferred, FORMAT_TIMESPAN_MAX,
+ preferred, USEC_PER_SEC),
+ format_timespan(time_string_valid, FORMAT_TIMESPAN_MAX,
+ valid, USEC_PER_SEC));
+
+ return 0;
+}
+
+_public_ sd_radv_prefix *sd_radv_remove_prefix(sd_radv *ra,
+ const struct in6_addr *prefix,
+ unsigned char prefixlen) {
+ sd_radv_prefix *cur, *next;
+
+ assert_return(ra, NULL);
+ assert_return(prefix, NULL);
+
+ LIST_FOREACH_SAFE(prefix, cur, next, ra->prefixes) {
+ if (prefixlen != cur->opt.prefixlen)
+ continue;
+
+ if (!in_addr_equal(AF_INET6,
+ (union in_addr_union *)prefix,
+ (union in_addr_union *)&cur->opt.in6_addr))
+ continue;
+
+ LIST_REMOVE(prefix, ra->prefixes, cur);
+ ra->n_prefixes--;
+
+ break;
+ }
+
+ return cur;
+}
+
+_public_ int sd_radv_set_rdnss(sd_radv *ra, uint32_t lifetime,
+ const struct in6_addr *dns, size_t n_dns) {
+ _cleanup_free_ struct sd_radv_opt_dns *opt_rdnss = NULL;
+ size_t len;
+
+ assert_return(ra, -EINVAL);
+ assert_return(n_dns < 128, -EINVAL);
+
+ if (!dns || n_dns == 0) {
+ ra->rdnss = mfree(ra->rdnss);
+ ra->n_rdnss = 0;
+
+ return 0;
+ }
+
+ len = sizeof(struct sd_radv_opt_dns) + sizeof(struct in6_addr) * n_dns;
+
+ opt_rdnss = malloc0(len);
+ if (!opt_rdnss)
+ return -ENOMEM;
+
+ opt_rdnss->type = SD_RADV_OPT_RDNSS;
+ opt_rdnss->length = len / 8;
+ opt_rdnss->lifetime = htobe32(lifetime);
+
+ memcpy(opt_rdnss + 1, dns, n_dns * sizeof(struct in6_addr));
+
+ free_and_replace(ra->rdnss, opt_rdnss);
+
+ ra->n_rdnss = n_dns;
+
+ return 0;
+}
+
+_public_ int sd_radv_set_dnssl(sd_radv *ra, uint32_t lifetime,
+ char **search_list) {
+ _cleanup_free_ struct sd_radv_opt_dns *opt_dnssl = NULL;
+ size_t len = 0;
+ char **s;
+ uint8_t *p;
+
+ assert_return(ra, -EINVAL);
+
+ if (strv_isempty(search_list)) {
+ ra->dnssl = mfree(ra->dnssl);
+ return 0;
+ }
+
+ STRV_FOREACH(s, search_list)
+ len += strlen(*s) + 2;
+
+ len = (sizeof(struct sd_radv_opt_dns) + len + 7) & ~0x7;
+
+ opt_dnssl = malloc0(len);
+ if (!opt_dnssl)
+ return -ENOMEM;
+
+ opt_dnssl->type = SD_RADV_OPT_DNSSL;
+ opt_dnssl->length = len / 8;
+ opt_dnssl->lifetime = htobe32(lifetime);
+
+ p = (uint8_t *)(opt_dnssl + 1);
+ len -= sizeof(struct sd_radv_opt_dns);
+
+ STRV_FOREACH(s, search_list) {
+ int r;
+
+ r = dns_name_to_wire_format(*s, p, len, false);
+ if (r < 0)
+ return r;
+
+ if (len < (size_t)r)
+ return -ENOBUFS;
+
+ p += r;
+ len -= r;
+ }
+
+ free_and_replace(ra->dnssl, opt_dnssl);
+
+ return 0;
+}
+
+_public_ int sd_radv_prefix_new(sd_radv_prefix **ret) {
+ sd_radv_prefix *p;
+
+ assert_return(ret, -EINVAL);
+
+ p = new(sd_radv_prefix, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = (sd_radv_prefix) {
+ .n_ref = 1,
+
+ .opt.type = ND_OPT_PREFIX_INFORMATION,
+ .opt.length = (sizeof(p->opt) - 1)/8 + 1,
+ .opt.prefixlen = 64,
+
+ /* RFC 4861, Section 6.2.1 */
+ .opt.flags = ND_OPT_PI_FLAG_ONLINK|ND_OPT_PI_FLAG_AUTO,
+
+ .opt.preferred_lifetime = htobe32(604800),
+ .opt.valid_lifetime = htobe32(2592000),
+ };
+
+ *ret = p;
+ return 0;
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_radv_prefix, sd_radv_prefix, mfree);
+
+_public_ int sd_radv_prefix_set_prefix(sd_radv_prefix *p, const struct in6_addr *in6_addr,
+ unsigned char prefixlen) {
+ assert_return(p, -EINVAL);
+ assert_return(in6_addr, -EINVAL);
+
+ if (prefixlen < 3 || prefixlen > 128)
+ return -EINVAL;
+
+ if (prefixlen > 64)
+ /* unusual but allowed, log it */
+ log_radv("Unusual prefix length %d greater than 64", prefixlen);
+
+ p->opt.in6_addr = *in6_addr;
+ p->opt.prefixlen = prefixlen;
+
+ return 0;
+}
+
+_public_ int sd_radv_prefix_set_onlink(sd_radv_prefix *p, int onlink) {
+ assert_return(p, -EINVAL);
+
+ SET_FLAG(p->opt.flags, ND_OPT_PI_FLAG_ONLINK, onlink);
+
+ return 0;
+}
+
+_public_ int sd_radv_prefix_set_address_autoconfiguration(sd_radv_prefix *p,
+ int address_autoconfiguration) {
+ assert_return(p, -EINVAL);
+
+ SET_FLAG(p->opt.flags, ND_OPT_PI_FLAG_AUTO, address_autoconfiguration);
+
+ return 0;
+}
+
+_public_ int sd_radv_prefix_set_valid_lifetime(sd_radv_prefix *p,
+ uint32_t valid_lifetime) {
+ assert_return(p, -EINVAL);
+
+ p->opt.valid_lifetime = htobe32(valid_lifetime);
+
+ return 0;
+}
+
+_public_ int sd_radv_prefix_set_preferred_lifetime(sd_radv_prefix *p,
+ uint32_t preferred_lifetime) {
+ assert_return(p, -EINVAL);
+
+ p->opt.preferred_lifetime = htobe32(preferred_lifetime);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-acd.c b/src/libsystemd-network/test-acd.c
new file mode 100644
index 0000000..302eea2
--- /dev/null
+++ b/src/libsystemd-network/test-acd.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/veth.h>
+#include <net/if.h>
+
+#include "sd-event.h"
+#include "sd-ipv4acd.h"
+#include "sd-netlink.h"
+
+#include "in-addr-util.h"
+#include "netlink-util.h"
+#include "tests.h"
+#include "util.h"
+
+static void acd_handler(sd_ipv4acd *acd, int event, void *userdata) {
+ assert_se(acd);
+
+ switch (event) {
+ case SD_IPV4ACD_EVENT_BIND:
+ log_info("bound");
+ break;
+ case SD_IPV4ACD_EVENT_CONFLICT:
+ log_info("conflict");
+ break;
+ case SD_IPV4ACD_EVENT_STOP:
+ log_error("the client was stopped");
+ break;
+ default:
+ assert_not_reached("invalid ACD event");
+ }
+}
+
+static int client_run(int ifindex, const struct in_addr *pa, const struct ether_addr *ha, sd_event *e) {
+ sd_ipv4acd *acd;
+
+ assert_se(sd_ipv4acd_new(&acd) >= 0);
+ assert_se(sd_ipv4acd_attach_event(acd, e, 0) >= 0);
+
+ assert_se(sd_ipv4acd_set_ifindex(acd, ifindex) >= 0);
+ assert_se(sd_ipv4acd_set_mac(acd, ha) >= 0);
+ assert_se(sd_ipv4acd_set_address(acd, pa) >= 0);
+ assert_se(sd_ipv4acd_set_callback(acd, acd_handler, NULL) >= 0);
+
+ log_info("starting IPv4ACD client");
+
+ assert_se(sd_ipv4acd_start(acd) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ assert_se(!sd_ipv4acd_unref(acd));
+
+ return EXIT_SUCCESS;
+}
+
+static int test_acd(const char *ifname, const char *address) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL;
+ union in_addr_union pa;
+ struct ether_addr ha;
+ int ifindex;
+
+ assert_se(in_addr_from_string(AF_INET, address, &pa) >= 0);
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ assert_se(sd_netlink_attach_event(rtnl, e, 0) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, 0) >= 0);
+ assert_se(sd_netlink_message_append_string(m, IFLA_IFNAME, ifname) >= 0);
+ assert_se(sd_netlink_call(rtnl, m, 0, &reply) >= 0);
+
+ assert_se(sd_rtnl_message_link_get_ifindex(reply, &ifindex) >= 0);
+ assert_se(sd_netlink_message_read_ether_addr(reply, IFLA_ADDRESS, &ha) >= 0);
+
+ client_run(ifindex, &pa.in, &ha, e);
+
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ if (argc == 3)
+ return test_acd(argv[1], argv[2]);
+ else {
+ log_error("This program takes two arguments.\n"
+ "\t %s <ifname> <IPv4 address>", program_invocation_short_name);
+ return EXIT_FAILURE;
+ }
+}
diff --git a/src/libsystemd-network/test-dhcp-client.c b/src/libsystemd-network/test-dhcp-client.c
new file mode 100644
index 0000000..fe6788d
--- /dev/null
+++ b/src/libsystemd-network/test-dhcp-client.c
@@ -0,0 +1,579 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <net/if.h>
+
+#include "sd-dhcp-client.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "dhcp-identifier.h"
+#include "dhcp-internal.h"
+#include "dhcp-protocol.h"
+#include "fd-util.h"
+#include "random-util.h"
+#include "tests.h"
+#include "util.h"
+
+static uint8_t mac_addr[] = {'A', 'B', 'C', '1', '2', '3'};
+
+typedef int (*test_callback_recv_t)(size_t size, DHCPMessage *dhcp);
+
+static bool verbose = true;
+static int test_fd[2];
+static test_callback_recv_t callback_recv;
+static be32_t xid;
+static sd_event_source *test_hangcheck;
+
+static int test_dhcp_hangcheck(sd_event_source *s, uint64_t usec, void *userdata) {
+ assert_not_reached("Test case should have completed in 2 seconds");
+
+ return 0;
+}
+
+static void test_request_basic(sd_event *e) {
+ int r;
+
+ sd_dhcp_client *client;
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ /* Initialize client without Anonymize settings. */
+ r = sd_dhcp_client_new(&client, false);
+
+ assert_se(r >= 0);
+ assert_se(client);
+
+ r = sd_dhcp_client_attach_event(client, e, 0);
+ assert_se(r >= 0);
+
+ assert_se(sd_dhcp_client_set_request_option(NULL, 0) == -EINVAL);
+ assert_se(sd_dhcp_client_set_request_address(NULL, NULL) == -EINVAL);
+ assert_se(sd_dhcp_client_set_ifindex(NULL, 0) == -EINVAL);
+
+ assert_se(sd_dhcp_client_set_ifindex(client, 15) == 0);
+ assert_se(sd_dhcp_client_set_ifindex(client, -42) == -EINVAL);
+ assert_se(sd_dhcp_client_set_ifindex(client, -1) == -EINVAL);
+ assert_se(sd_dhcp_client_set_ifindex(client, 0) == -EINVAL);
+ assert_se(sd_dhcp_client_set_ifindex(client, 1) == 0);
+
+ assert_se(sd_dhcp_client_set_hostname(client, "host") == 1);
+ assert_se(sd_dhcp_client_set_hostname(client, "host.domain") == 1);
+ assert_se(sd_dhcp_client_set_hostname(client, NULL) == 1);
+ assert_se(sd_dhcp_client_set_hostname(client, "~host") == -EINVAL);
+ assert_se(sd_dhcp_client_set_hostname(client, "~host.domain") == -EINVAL);
+
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_SUBNET_MASK) == -EEXIST);
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_ROUTER) == -EEXIST);
+ /* This PRL option is not set when using Anonymize, but in this test
+ * Anonymize settings are not being used. */
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_HOST_NAME) == -EEXIST);
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_DOMAIN_NAME) == -EEXIST);
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_DOMAIN_NAME_SERVER) == -EEXIST);
+
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_PAD) == -EINVAL);
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_END) == -EINVAL);
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_MESSAGE_TYPE) == -EINVAL);
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_OVERLOAD) == -EINVAL);
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_PARAMETER_REQUEST_LIST)
+ == -EINVAL);
+
+ /* RFC7844: option 33 (SD_DHCP_OPTION_STATIC_ROUTE) is set in the
+ * default PRL when using Anonymize, so it is changed to other option
+ * that is not set by default, to check that it was set successfully.
+ * Options not set by default (using or not anonymize) are option 17
+ * (SD_DHCP_OPTION_ROOT_PATH) and 42 (SD_DHCP_OPTION_NTP_SERVER) */
+ assert_se(sd_dhcp_client_set_request_option(client, 17) == 0);
+ assert_se(sd_dhcp_client_set_request_option(client, 17) == -EEXIST);
+ assert_se(sd_dhcp_client_set_request_option(client, 42) == 0);
+ assert_se(sd_dhcp_client_set_request_option(client, 17) == -EEXIST);
+
+ sd_dhcp_client_unref(client);
+}
+
+static void test_request_anonymize(sd_event *e) {
+ int r;
+
+ sd_dhcp_client *client;
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ /* Initialize client with Anonymize settings. */
+ r = sd_dhcp_client_new(&client, true);
+
+ assert_se(r >= 0);
+ assert_se(client);
+
+ r = sd_dhcp_client_attach_event(client, e, 0);
+ assert_se(r >= 0);
+
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_NETBIOS_NAMESERVER) == -EEXIST);
+ /* This PRL option is not set when using Anonymize */
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_HOST_NAME) == 0);
+ assert_se(sd_dhcp_client_set_request_option(client,
+ SD_DHCP_OPTION_PARAMETER_REQUEST_LIST)
+ == -EINVAL);
+
+ /* RFC7844: option 101 (SD_DHCP_OPTION_NEW_TZDB_TIMEZONE) is not set in the
+ * default PRL when using Anonymize, */
+ assert_se(sd_dhcp_client_set_request_option(client, 101) == 0);
+ assert_se(sd_dhcp_client_set_request_option(client, 101) == -EEXIST);
+
+ sd_dhcp_client_unref(client);
+}
+
+static void test_checksum(void) {
+ uint8_t buf[20] = {
+ 0x45, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00,
+ 0x40, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff
+ };
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ assert_se(dhcp_packet_checksum((uint8_t*)&buf, 20) == be16toh(0x78ae));
+}
+
+static void test_dhcp_identifier_set_iaid(void) {
+ uint32_t iaid_legacy;
+ be32_t iaid;
+ int ifindex;
+
+ for (;;) {
+ char ifname[IFNAMSIZ];
+
+ /* try to find an ifindex which does not exist. I causes dhcp_identifier_set_iaid()
+ * to hash the MAC address. */
+ pseudo_random_bytes(&ifindex, sizeof(ifindex));
+ if (ifindex > 0 && !if_indextoname(ifindex, ifname))
+ break;
+ }
+
+ assert_se(dhcp_identifier_set_iaid(ifindex, mac_addr, sizeof(mac_addr), true, &iaid_legacy) >= 0);
+ assert_se(dhcp_identifier_set_iaid(ifindex, mac_addr, sizeof(mac_addr), false, &iaid) >= 0);
+
+ /* we expect, that the MAC address was hashed. The legacy value is in native
+ * endianness. */
+ assert_se(iaid_legacy == 0x8dde4ba8u);
+ assert_se(iaid == htole32(0x8dde4ba8u));
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ assert_se(iaid == iaid_legacy);
+#else
+ assert_se(iaid == __bswap_32(iaid_legacy));
+#endif
+}
+
+static int check_options(uint8_t code, uint8_t len, const void *option, void *userdata) {
+ switch(code) {
+ case SD_DHCP_OPTION_CLIENT_IDENTIFIER:
+ {
+ uint32_t iaid;
+ struct duid duid;
+ size_t duid_len;
+
+ assert_se(dhcp_identifier_set_duid_en(&duid, &duid_len) >= 0);
+ assert_se(dhcp_identifier_set_iaid(42, mac_addr, ETH_ALEN, true, &iaid) >= 0);
+
+ assert_se(len == sizeof(uint8_t) + sizeof(uint32_t) + duid_len);
+ assert_se(len == 19);
+ assert_se(((uint8_t*) option)[0] == 0xff);
+
+ assert_se(memcmp((uint8_t*) option + 1, &iaid, sizeof(iaid)) == 0);
+ assert_se(memcmp((uint8_t*) option + 5, &duid, duid_len) == 0);
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int dhcp_network_send_raw_socket(int s, const union sockaddr_union *link, const void *packet, size_t len) {
+ size_t size;
+ _cleanup_free_ DHCPPacket *discover;
+ uint16_t ip_check, udp_check;
+
+ assert_se(s >= 0);
+ assert_se(packet);
+
+ size = sizeof(DHCPPacket);
+ assert_se(len > size);
+
+ discover = memdup(packet, len);
+
+ assert_se(discover->ip.ttl == IPDEFTTL);
+ assert_se(discover->ip.protocol == IPPROTO_UDP);
+ assert_se(discover->ip.saddr == INADDR_ANY);
+ assert_se(discover->ip.daddr == INADDR_BROADCAST);
+ assert_se(discover->udp.source == be16toh(DHCP_PORT_CLIENT));
+ assert_se(discover->udp.dest == be16toh(DHCP_PORT_SERVER));
+
+ ip_check = discover->ip.check;
+
+ discover->ip.ttl = 0;
+ discover->ip.check = discover->udp.len;
+
+ udp_check = ~dhcp_packet_checksum((uint8_t*)&discover->ip.ttl, len - 8);
+ assert_se(udp_check == 0xffff);
+
+ discover->ip.ttl = IPDEFTTL;
+ discover->ip.check = ip_check;
+
+ ip_check = ~dhcp_packet_checksum((uint8_t*)&discover->ip, sizeof(discover->ip));
+ assert_se(ip_check == 0xffff);
+
+ assert_se(discover->dhcp.xid);
+ assert_se(memcmp(discover->dhcp.chaddr, &mac_addr, ETH_ALEN) == 0);
+
+ size = len - sizeof(struct iphdr) - sizeof(struct udphdr);
+
+ assert_se(callback_recv);
+ callback_recv(size, &discover->dhcp);
+
+ return 575;
+}
+
+int dhcp_network_bind_raw_socket(
+ int index,
+ union sockaddr_union *link,
+ uint32_t id,
+ const uint8_t *addr, size_t addr_len,
+ uint16_t arp_type, uint16_t port) {
+
+ if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) < 0)
+ return -errno;
+
+ return test_fd[0];
+}
+
+int dhcp_network_bind_udp_socket(int ifindex, be32_t address, uint16_t port) {
+ int fd;
+
+ fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int dhcp_network_send_udp_socket(int s, be32_t address, uint16_t port, const void *packet, size_t len) {
+ return 0;
+}
+
+static int test_discover_message_verify(size_t size, struct DHCPMessage *dhcp) {
+ int res;
+
+ res = dhcp_option_parse(dhcp, size, check_options, NULL, NULL);
+ assert_se(res == DHCP_DISCOVER);
+
+ if (verbose)
+ printf(" recv DHCP Discover 0x%08x\n", be32toh(dhcp->xid));
+
+ return 0;
+}
+
+static void test_discover_message(sd_event *e) {
+ sd_dhcp_client *client;
+ int res, r;
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ r = sd_dhcp_client_new(&client, false);
+ assert_se(r >= 0);
+ assert_se(client);
+
+ r = sd_dhcp_client_attach_event(client, e, 0);
+ assert_se(r >= 0);
+
+ assert_se(sd_dhcp_client_set_ifindex(client, 42) >= 0);
+ assert_se(sd_dhcp_client_set_mac(client, mac_addr, ETH_ALEN, ARPHRD_ETHER) >= 0);
+
+ assert_se(sd_dhcp_client_set_request_option(client, 248) >= 0);
+
+ callback_recv = test_discover_message_verify;
+
+ res = sd_dhcp_client_start(client);
+
+ assert_se(IN_SET(res, 0, -EINPROGRESS));
+
+ sd_event_run(e, (uint64_t) -1);
+
+ sd_dhcp_client_stop(client);
+ sd_dhcp_client_unref(client);
+
+ test_fd[1] = safe_close(test_fd[1]);
+
+ callback_recv = NULL;
+}
+
+static uint8_t test_addr_acq_offer[] = {
+ 0x45, 0x10, 0x01, 0x48, 0x00, 0x00, 0x00, 0x00,
+ 0x80, 0x11, 0xb3, 0x84, 0xc0, 0xa8, 0x02, 0x01,
+ 0xc0, 0xa8, 0x02, 0xbf, 0x00, 0x43, 0x00, 0x44,
+ 0x01, 0x34, 0x00, 0x00, 0x02, 0x01, 0x06, 0x00,
+ 0x6f, 0x95, 0x2f, 0x30, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xc0, 0xa8, 0x02, 0xbf,
+ 0xc0, 0xa8, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x63, 0x82, 0x53, 0x63, 0x35, 0x01, 0x02, 0x36,
+ 0x04, 0xc0, 0xa8, 0x02, 0x01, 0x33, 0x04, 0x00,
+ 0x00, 0x02, 0x58, 0x01, 0x04, 0xff, 0xff, 0xff,
+ 0x00, 0x2a, 0x04, 0xc0, 0xa8, 0x02, 0x01, 0x0f,
+ 0x09, 0x6c, 0x61, 0x62, 0x2e, 0x69, 0x6e, 0x74,
+ 0x72, 0x61, 0x03, 0x04, 0xc0, 0xa8, 0x02, 0x01,
+ 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static uint8_t test_addr_acq_ack[] = {
+ 0x45, 0x10, 0x01, 0x48, 0x00, 0x00, 0x00, 0x00,
+ 0x80, 0x11, 0xb3, 0x84, 0xc0, 0xa8, 0x02, 0x01,
+ 0xc0, 0xa8, 0x02, 0xbf, 0x00, 0x43, 0x00, 0x44,
+ 0x01, 0x34, 0x00, 0x00, 0x02, 0x01, 0x06, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xc0, 0xa8, 0x02, 0xbf,
+ 0xc0, 0xa8, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x63, 0x82, 0x53, 0x63, 0x35, 0x01, 0x05, 0x36,
+ 0x04, 0xc0, 0xa8, 0x02, 0x01, 0x33, 0x04, 0x00,
+ 0x00, 0x02, 0x58, 0x01, 0x04, 0xff, 0xff, 0xff,
+ 0x00, 0x2a, 0x04, 0xc0, 0xa8, 0x02, 0x01, 0x0f,
+ 0x09, 0x6c, 0x61, 0x62, 0x2e, 0x69, 0x6e, 0x74,
+ 0x72, 0x61, 0x03, 0x04, 0xc0, 0xa8, 0x02, 0x01,
+ 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static void test_addr_acq_acquired(sd_dhcp_client *client, int event,
+ void *userdata) {
+ sd_event *e = userdata;
+ sd_dhcp_lease *lease;
+ struct in_addr addr;
+
+ assert_se(client);
+ assert_se(event == SD_DHCP_CLIENT_EVENT_IP_ACQUIRE);
+
+ assert_se(sd_dhcp_client_get_lease(client, &lease) >= 0);
+ assert_se(lease);
+
+ assert_se(sd_dhcp_lease_get_address(lease, &addr) >= 0);
+ assert_se(memcmp(&addr.s_addr, &test_addr_acq_ack[44],
+ sizeof(addr.s_addr)) == 0);
+
+ assert_se(sd_dhcp_lease_get_netmask(lease, &addr) >= 0);
+ assert_se(memcmp(&addr.s_addr, &test_addr_acq_ack[285],
+ sizeof(addr.s_addr)) == 0);
+
+ assert_se(sd_dhcp_lease_get_router(lease, &addr) >= 0);
+ assert_se(memcmp(&addr.s_addr, &test_addr_acq_ack[308],
+ sizeof(addr.s_addr)) == 0);
+
+ if (verbose)
+ printf(" DHCP address acquired\n");
+
+ sd_event_exit(e, 0);
+}
+
+static int test_addr_acq_recv_request(size_t size, DHCPMessage *request) {
+ uint16_t udp_check = 0;
+ uint8_t *msg_bytes = (uint8_t *)request;
+ int res;
+
+ res = dhcp_option_parse(request, size, check_options, NULL, NULL);
+ assert_se(res == DHCP_REQUEST);
+ assert_se(xid == request->xid);
+
+ assert_se(msg_bytes[size - 1] == SD_DHCP_OPTION_END);
+
+ if (verbose)
+ printf(" recv DHCP Request 0x%08x\n", be32toh(xid));
+
+ memcpy(&test_addr_acq_ack[26], &udp_check, sizeof(udp_check));
+ memcpy(&test_addr_acq_ack[32], &xid, sizeof(xid));
+ memcpy(&test_addr_acq_ack[56], &mac_addr, ETHER_ADDR_LEN);
+
+ callback_recv = NULL;
+
+ res = write(test_fd[1], test_addr_acq_ack,
+ sizeof(test_addr_acq_ack));
+ assert_se(res == sizeof(test_addr_acq_ack));
+
+ if (verbose)
+ printf(" send DHCP Ack\n");
+
+ return 0;
+};
+
+static int test_addr_acq_recv_discover(size_t size, DHCPMessage *discover) {
+ uint16_t udp_check = 0;
+ uint8_t *msg_bytes = (uint8_t *)discover;
+ int res;
+
+ res = dhcp_option_parse(discover, size, check_options, NULL, NULL);
+ assert_se(res == DHCP_DISCOVER);
+
+ assert_se(msg_bytes[size - 1] == SD_DHCP_OPTION_END);
+
+ xid = discover->xid;
+
+ if (verbose)
+ printf(" recv DHCP Discover 0x%08x\n", be32toh(xid));
+
+ memcpy(&test_addr_acq_offer[26], &udp_check, sizeof(udp_check));
+ memcpy(&test_addr_acq_offer[32], &xid, sizeof(xid));
+ memcpy(&test_addr_acq_offer[56], &mac_addr, ETHER_ADDR_LEN);
+
+ callback_recv = test_addr_acq_recv_request;
+
+ res = write(test_fd[1], test_addr_acq_offer,
+ sizeof(test_addr_acq_offer));
+ assert_se(res == sizeof(test_addr_acq_offer));
+
+ if (verbose)
+ printf(" sent DHCP Offer\n");
+
+ return 0;
+}
+
+static void test_addr_acq(sd_event *e) {
+ usec_t time_now = now(clock_boottime_or_monotonic());
+ sd_dhcp_client *client;
+ int res, r;
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ r = sd_dhcp_client_new(&client, false);
+ assert_se(r >= 0);
+ assert_se(client);
+
+ r = sd_dhcp_client_attach_event(client, e, 0);
+ assert_se(r >= 0);
+
+ assert_se(sd_dhcp_client_set_ifindex(client, 42) >= 0);
+ assert_se(sd_dhcp_client_set_mac(client, mac_addr, ETH_ALEN, ARPHRD_ETHER) >= 0);
+
+ assert_se(sd_dhcp_client_set_callback(client, test_addr_acq_acquired, e) >= 0);
+
+ callback_recv = test_addr_acq_recv_discover;
+
+ assert_se(sd_event_add_time(e, &test_hangcheck,
+ clock_boottime_or_monotonic(),
+ time_now + 2 * USEC_PER_SEC, 0,
+ test_dhcp_hangcheck, NULL) >= 0);
+
+ res = sd_dhcp_client_start(client);
+ assert_se(IN_SET(res, 0, -EINPROGRESS));
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ test_hangcheck = sd_event_source_unref(test_hangcheck);
+
+ assert_se(sd_dhcp_client_set_callback(client, NULL, NULL) >= 0);
+ assert_se(sd_dhcp_client_stop(client) >= 0);
+ sd_dhcp_client_unref(client);
+
+ test_fd[1] = safe_close(test_fd[1]);
+
+ callback_recv = NULL;
+ xid = 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *e;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ test_request_basic(e);
+ test_request_anonymize(e);
+ test_checksum();
+ test_dhcp_identifier_set_iaid();
+
+ test_discover_message(e);
+ test_addr_acq(e);
+
+#if VALGRIND
+ /* Make sure the async_close thread has finished.
+ * valgrind would report some of the phread_* structures
+ * as not cleaned up properly. */
+ sleep(1);
+#endif
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-dhcp-option.c b/src/libsystemd-network/test-dhcp-option.c
new file mode 100644
index 0000000..d84859c
--- /dev/null
+++ b/src/libsystemd-network/test-dhcp-option.c
@@ -0,0 +1,367 @@
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "dhcp-internal.h"
+#include "dhcp-protocol.h"
+#include "macro.h"
+#include "util.h"
+
+struct option_desc {
+ uint8_t sname[64];
+ int snamelen;
+ uint8_t file[128];
+ int filelen;
+ uint8_t options[128];
+ int len;
+ bool success;
+ int filepos;
+ int snamepos;
+ int pos;
+};
+
+static bool verbose = false;
+
+static struct option_desc option_tests[] = {
+ { {}, 0, {}, 0, { 42, 5, 65, 66, 67, 68, 69 }, 7, false, },
+ { {}, 0, {}, 0, { 42, 5, 65, 66, 67, 68, 69, 0, 0,
+ SD_DHCP_OPTION_MESSAGE_TYPE, 1, DHCP_ACK }, 12, true, },
+ { {}, 0, {}, 0, { 8, 255, 70, 71, 72 }, 5, false, },
+ { {}, 0, {}, 0, { 0x35, 0x01, 0x05, 0x36, 0x04, 0x01, 0x00, 0xa8,
+ 0xc0, 0x33, 0x04, 0x00, 0x01, 0x51, 0x80, 0x01,
+ 0x04, 0xff, 0xff, 0xff, 0x00, 0x03, 0x04, 0xc0,
+ 0xa8, 0x00, 0x01, 0x06, 0x04, 0xc0, 0xa8, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, },
+ 40, true, },
+ { {}, 0, {}, 0, { SD_DHCP_OPTION_MESSAGE_TYPE, 1, DHCP_OFFER,
+ 42, 3, 0, 0, 0 }, 8, true, },
+ { {}, 0, {}, 0, { 42, 2, 1, 2, 44 }, 5, false, },
+
+ { {}, 0,
+ { 222, 3, 1, 2, 3, SD_DHCP_OPTION_MESSAGE_TYPE, 1, DHCP_NAK }, 8,
+ { SD_DHCP_OPTION_OVERLOAD, 1, DHCP_OVERLOAD_FILE }, 3, true, },
+
+ { { 1, 4, 1, 2, 3, 4, SD_DHCP_OPTION_MESSAGE_TYPE, 1, DHCP_ACK }, 9,
+ { 222, 3, 1, 2, 3 }, 5,
+ { SD_DHCP_OPTION_OVERLOAD, 1,
+ DHCP_OVERLOAD_FILE|DHCP_OVERLOAD_SNAME }, 3, true, },
+};
+
+static const char *dhcp_type(int type) {
+ switch(type) {
+ case DHCP_DISCOVER:
+ return "DHCPDISCOVER";
+ case DHCP_OFFER:
+ return "DHCPOFFER";
+ case DHCP_REQUEST:
+ return "DHCPREQUEST";
+ case DHCP_DECLINE:
+ return "DHCPDECLINE";
+ case DHCP_ACK:
+ return "DHCPACK";
+ case DHCP_NAK:
+ return "DHCPNAK";
+ case DHCP_RELEASE:
+ return "DHCPRELEASE";
+ default:
+ return "unknown";
+ }
+}
+
+static void test_invalid_buffer_length(void) {
+ DHCPMessage message;
+
+ assert_se(dhcp_option_parse(&message, 0, NULL, NULL, NULL) == -EINVAL);
+ assert_se(dhcp_option_parse(&message, sizeof(DHCPMessage) - 1, NULL, NULL, NULL) == -EINVAL);
+}
+
+static void test_message_init(void) {
+ _cleanup_free_ DHCPMessage *message = NULL;
+ size_t optlen = 4, optoffset;
+ size_t len = sizeof(DHCPMessage) + optlen;
+ uint8_t *magic;
+
+ message = malloc0(len);
+
+ assert_se(dhcp_message_init(message, BOOTREQUEST, 0x12345678,
+ DHCP_DISCOVER, ARPHRD_ETHER, optlen, &optoffset) >= 0);
+
+ assert_se(message->xid == htobe32(0x12345678));
+ assert_se(message->op == BOOTREQUEST);
+
+ magic = (uint8_t*)&message->magic;
+
+ assert_se(magic[0] == 99);
+ assert_se(magic[1] == 130);
+ assert_se(magic[2] == 83);
+ assert_se(magic[3] == 99);
+
+ assert_se(dhcp_option_parse(message, len, NULL, NULL, NULL) >= 0);
+}
+
+static DHCPMessage *create_message(uint8_t *options, uint16_t optlen,
+ uint8_t *file, uint8_t filelen,
+ uint8_t *sname, uint8_t snamelen) {
+ DHCPMessage *message;
+ size_t len = sizeof(DHCPMessage) + optlen;
+
+ message = malloc0(len);
+ assert_se(message);
+
+ memcpy_safe(&message->options, options, optlen);
+ memcpy_safe(&message->file, file, filelen);
+ memcpy_safe(&message->sname, sname, snamelen);
+
+ return message;
+}
+
+static void test_ignore_opts(uint8_t *descoption, int *descpos, int *desclen) {
+ assert(*descpos >= 0);
+
+ while (*descpos < *desclen) {
+ switch(descoption[*descpos]) {
+ case SD_DHCP_OPTION_PAD:
+ *descpos += 1;
+ break;
+
+ case SD_DHCP_OPTION_MESSAGE_TYPE:
+ case SD_DHCP_OPTION_OVERLOAD:
+ *descpos += 3;
+ break;
+
+ default:
+ return;
+ }
+ }
+}
+
+static int test_options_cb(uint8_t code, uint8_t len, const void *option, void *userdata) {
+ struct option_desc *desc = userdata;
+ uint8_t *descoption = NULL;
+ int *desclen = NULL, *descpos = NULL;
+ uint8_t optcode = 0;
+ uint8_t optlen = 0;
+ uint8_t i;
+
+ assert_se((!desc && !code && !len) || desc);
+
+ if (!desc)
+ return -EINVAL;
+
+ assert_se(code != SD_DHCP_OPTION_PAD);
+ assert_se(code != SD_DHCP_OPTION_END);
+ assert_se(code != SD_DHCP_OPTION_MESSAGE_TYPE);
+ assert_se(code != SD_DHCP_OPTION_OVERLOAD);
+
+ while (desc->pos >= 0 || desc->filepos >= 0 || desc->snamepos >= 0) {
+
+ if (desc->pos >= 0) {
+ descoption = &desc->options[0];
+ desclen = &desc->len;
+ descpos = &desc->pos;
+ } else if (desc->filepos >= 0) {
+ descoption = &desc->file[0];
+ desclen = &desc->filelen;
+ descpos = &desc->filepos;
+ } else if (desc->snamepos >= 0) {
+ descoption = &desc->sname[0];
+ desclen = &desc->snamelen;
+ descpos = &desc->snamepos;
+ }
+
+ assert_se(descoption && desclen && descpos);
+
+ if (*desclen)
+ test_ignore_opts(descoption, descpos, desclen);
+
+ if (*descpos < *desclen)
+ break;
+
+ if (*descpos == *desclen)
+ *descpos = -1;
+ }
+
+ assert_se(descpos);
+ assert_se(*descpos != -1);
+
+ optcode = descoption[*descpos];
+ optlen = descoption[*descpos + 1];
+
+ if (verbose)
+ printf("DHCP code %2d(%2d) len %2d(%2d) ", code, optcode,
+ len, optlen);
+
+ assert_se(code == optcode);
+ assert_se(len == optlen);
+
+ for (i = 0; i < len; i++) {
+
+ if (verbose)
+ printf("0x%02x(0x%02x) ", ((uint8_t*) option)[i],
+ descoption[*descpos + 2 + i]);
+
+ assert_se(((uint8_t*) option)[i] == descoption[*descpos + 2 + i]);
+ }
+
+ if (verbose)
+ printf("\n");
+
+ *descpos += optlen + 2;
+
+ test_ignore_opts(descoption, descpos, desclen);
+
+ if (desc->pos != -1 && desc->pos == desc->len)
+ desc->pos = -1;
+
+ if (desc->filepos != -1 && desc->filepos == desc->filelen)
+ desc->filepos = -1;
+
+ if (desc->snamepos != -1 && desc->snamepos == desc->snamelen)
+ desc->snamepos = -1;
+
+ return 0;
+}
+
+static void test_options(struct option_desc *desc) {
+ uint8_t *options = NULL;
+ uint8_t *file = NULL;
+ uint8_t *sname = NULL;
+ int optlen = 0;
+ int filelen = 0;
+ int snamelen = 0;
+ int buflen = 0;
+ _cleanup_free_ DHCPMessage *message = NULL;
+ int res;
+
+ if (desc) {
+ file = &desc->file[0];
+ filelen = desc->filelen;
+ if (!filelen)
+ desc->filepos = -1;
+
+ sname = &desc->sname[0];
+ snamelen = desc->snamelen;
+ if (!snamelen)
+ desc->snamepos = -1;
+
+ options = &desc->options[0];
+ optlen = desc->len;
+ desc->pos = 0;
+ }
+ message = create_message(options, optlen, file, filelen,
+ sname, snamelen);
+
+ buflen = sizeof(DHCPMessage) + optlen;
+
+ if (!desc) {
+ assert_se((res = dhcp_option_parse(message, buflen, test_options_cb, NULL, NULL)) == -ENOMSG);
+ } else if (desc->success) {
+ assert_se((res = dhcp_option_parse(message, buflen, test_options_cb, desc, NULL)) >= 0);
+ assert_se(desc->pos == -1 && desc->filepos == -1 && desc->snamepos == -1);
+ } else
+ assert_se((res = dhcp_option_parse(message, buflen, test_options_cb, desc, NULL)) < 0);
+
+ if (verbose)
+ printf("DHCP type %s\n", dhcp_type(res));
+}
+
+static uint8_t options[64] = {
+ 'A', 'B', 'C', 'D',
+ 160, 2, 0x11, 0x12,
+ 0,
+ 31, 8, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+ 0,
+ 55, 3, 0x51, 0x52, 0x53,
+ 17, 7, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 255
+};
+
+static void test_option_set(void) {
+ _cleanup_free_ DHCPMessage *result = NULL;
+ size_t offset = 0, len, pos;
+ unsigned i;
+
+ result = malloc0(sizeof(DHCPMessage) + 11);
+ assert_se(result);
+
+ result->options[0] = 'A';
+ result->options[1] = 'B';
+ result->options[2] = 'C';
+ result->options[3] = 'D';
+
+ assert_se(dhcp_option_append(result, 0, &offset, 0, SD_DHCP_OPTION_PAD,
+ 0, NULL) == -ENOBUFS);
+ assert_se(offset == 0);
+
+ offset = 4;
+ assert_se(dhcp_option_append(result, 5, &offset, 0, SD_DHCP_OPTION_PAD,
+ 0, NULL) == -ENOBUFS);
+ assert_se(offset == 4);
+ assert_se(dhcp_option_append(result, 6, &offset, 0, SD_DHCP_OPTION_PAD,
+ 0, NULL) >= 0);
+ assert_se(offset == 5);
+
+ offset = pos = 4;
+ len = 11;
+ while (pos < len && options[pos] != SD_DHCP_OPTION_END) {
+ assert_se(dhcp_option_append(result, len, &offset, DHCP_OVERLOAD_SNAME,
+ options[pos],
+ options[pos + 1],
+ &options[pos + 2]) >= 0);
+
+ if (options[pos] == SD_DHCP_OPTION_PAD)
+ pos++;
+ else
+ pos += 2 + options[pos + 1];
+
+ if (pos < len)
+ assert_se(offset == pos);
+ }
+
+ for (i = 0; i < 9; i++) {
+ if (verbose)
+ printf("%2u: 0x%02x(0x%02x) (options)\n", i, result->options[i],
+ options[i]);
+ assert_se(result->options[i] == options[i]);
+ }
+
+ if (verbose)
+ printf("%2d: 0x%02x(0x%02x) (options)\n", 9, result->options[9],
+ SD_DHCP_OPTION_END);
+
+ assert_se(result->options[9] == SD_DHCP_OPTION_END);
+
+ if (verbose)
+ printf("%2d: 0x%02x(0x%02x) (options)\n", 10, result->options[10],
+ SD_DHCP_OPTION_PAD);
+
+ assert_se(result->options[10] == SD_DHCP_OPTION_PAD);
+
+ for (i = 0; i < pos - 8; i++) {
+ if (verbose)
+ printf("%2u: 0x%02x(0x%02x) (sname)\n", i, result->sname[i],
+ options[i + 9]);
+ assert_se(result->sname[i] == options[i + 9]);
+ }
+
+ if (verbose)
+ printf ("\n");
+}
+
+int main(int argc, char *argv[]) {
+ unsigned i;
+
+ test_invalid_buffer_length();
+ test_message_init();
+
+ test_options(NULL);
+
+ for (i = 0; i < ELEMENTSOF(option_tests); i++)
+ test_options(&option_tests[i]);
+
+ test_option_set();
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-dhcp-server.c b/src/libsystemd-network/test-dhcp-server.c
new file mode 100644
index 0000000..ea99893
--- /dev/null
+++ b/src/libsystemd-network/test-dhcp-server.c
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+
+#include "sd-dhcp-server.h"
+#include "sd-event.h"
+
+#include "dhcp-server-internal.h"
+#include "tests.h"
+
+static void test_pool(struct in_addr *address, unsigned size, int ret) {
+ _cleanup_(sd_dhcp_server_unrefp) sd_dhcp_server *server = NULL;
+
+ assert_se(sd_dhcp_server_new(&server, 1) >= 0);
+
+ assert_se(sd_dhcp_server_configure_pool(server, address, 8, 0, size) == ret);
+}
+
+static int test_basic(sd_event *event) {
+ _cleanup_(sd_dhcp_server_unrefp) sd_dhcp_server *server = NULL;
+ struct in_addr address_lo = {
+ .s_addr = htonl(INADDR_LOOPBACK),
+ };
+ struct in_addr address_any = {
+ .s_addr = htonl(INADDR_ANY),
+ };
+ int r;
+
+ /* attach to loopback interface */
+ assert_se(sd_dhcp_server_new(&server, 1) >= 0);
+ assert_se(server);
+
+ assert_se(sd_dhcp_server_attach_event(server, event, 0) >= 0);
+ assert_se(sd_dhcp_server_attach_event(server, event, 0) == -EBUSY);
+ assert_se(sd_dhcp_server_get_event(server) == event);
+ assert_se(sd_dhcp_server_detach_event(server) >= 0);
+ assert_se(!sd_dhcp_server_get_event(server));
+ assert_se(sd_dhcp_server_attach_event(server, NULL, 0) >= 0);
+ assert_se(sd_dhcp_server_attach_event(server, NULL, 0) == -EBUSY);
+
+ assert_se(sd_dhcp_server_ref(server) == server);
+ assert_se(!sd_dhcp_server_unref(server));
+
+ assert_se(sd_dhcp_server_start(server) == -EUNATCH);
+
+ assert_se(sd_dhcp_server_configure_pool(server, &address_any, 28, 0, 0) == -EINVAL);
+ assert_se(sd_dhcp_server_configure_pool(server, &address_lo, 38, 0, 0) == -ERANGE);
+ assert_se(sd_dhcp_server_configure_pool(server, &address_lo, 8, 0, 0) >= 0);
+ assert_se(sd_dhcp_server_configure_pool(server, &address_lo, 8, 0, 0) >= 0);
+
+ test_pool(&address_any, 1, -EINVAL);
+ test_pool(&address_lo, 1, 0);
+
+ r = sd_dhcp_server_start(server);
+ if (r == -EPERM)
+ return log_info_errno(r, "sd_dhcp_server_start failed: %m");
+ assert_se(r >= 0);
+
+ assert_se(sd_dhcp_server_start(server) == -EBUSY);
+ assert_se(sd_dhcp_server_stop(server) >= 0);
+ assert_se(sd_dhcp_server_stop(server) >= 0);
+ assert_se(sd_dhcp_server_start(server) >= 0);
+
+ return 0;
+}
+
+static void test_message_handler(void) {
+ _cleanup_(sd_dhcp_server_unrefp) sd_dhcp_server *server = NULL;
+ struct {
+ DHCPMessage message;
+ struct {
+ uint8_t code;
+ uint8_t length;
+ uint8_t type;
+ } _packed_ option_type;
+ struct {
+ uint8_t code;
+ uint8_t length;
+ be32_t address;
+ } _packed_ option_requested_ip;
+ struct {
+ uint8_t code;
+ uint8_t length;
+ be32_t address;
+ } _packed_ option_server_id;
+ struct {
+ uint8_t code;
+ uint8_t length;
+ uint8_t id[7];
+ } _packed_ option_client_id;
+ uint8_t end;
+ } _packed_ test = {
+ .message.op = BOOTREQUEST,
+ .message.htype = ARPHRD_ETHER,
+ .message.hlen = ETHER_ADDR_LEN,
+ .message.xid = htobe32(0x12345678),
+ .message.chaddr = { 'A', 'B', 'C', 'D', 'E', 'F' },
+ .option_type.code = SD_DHCP_OPTION_MESSAGE_TYPE,
+ .option_type.length = 1,
+ .option_type.type = DHCP_DISCOVER,
+ .end = SD_DHCP_OPTION_END,
+ };
+ struct in_addr address_lo = {
+ .s_addr = htonl(INADDR_LOOPBACK),
+ };
+
+ assert_se(sd_dhcp_server_new(&server, 1) >= 0);
+ assert_se(sd_dhcp_server_configure_pool(server, &address_lo, 8, 0, 0) >= 0);
+ assert_se(sd_dhcp_server_attach_event(server, NULL, 0) >= 0);
+ assert_se(sd_dhcp_server_start(server) >= 0);
+
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+
+ test.end = 0;
+ /* TODO, shouldn't this fail? */
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+ test.end = SD_DHCP_OPTION_END;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+
+ test.option_type.code = 0;
+ test.option_type.length = 0;
+ test.option_type.type = 0;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.option_type.code = SD_DHCP_OPTION_MESSAGE_TYPE;
+ test.option_type.length = 1;
+ test.option_type.type = DHCP_DISCOVER;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+
+ test.message.op = 0;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.message.op = BOOTREQUEST;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+
+ test.message.htype = 0;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.message.htype = ARPHRD_ETHER;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+
+ test.message.hlen = 0;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.message.hlen = ETHER_ADDR_LEN;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+
+ test.option_type.type = DHCP_REQUEST;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.option_requested_ip.code = SD_DHCP_OPTION_REQUESTED_IP_ADDRESS;
+ test.option_requested_ip.length = 4;
+ test.option_requested_ip.address = htobe32(0x12345678);
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_NAK);
+ test.option_server_id.code = SD_DHCP_OPTION_SERVER_IDENTIFIER;
+ test.option_server_id.length = 4;
+ test.option_server_id.address = htobe32(INADDR_LOOPBACK);
+ test.option_requested_ip.address = htobe32(INADDR_LOOPBACK + 3);
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_ACK);
+
+ test.option_server_id.address = htobe32(0x12345678);
+ test.option_requested_ip.address = htobe32(INADDR_LOOPBACK + 3);
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.option_server_id.address = htobe32(INADDR_LOOPBACK);
+ test.option_requested_ip.address = htobe32(INADDR_LOOPBACK + 4);
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.option_requested_ip.address = htobe32(INADDR_LOOPBACK + 3);
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_ACK);
+
+ test.option_client_id.code = SD_DHCP_OPTION_CLIENT_IDENTIFIER;
+ test.option_client_id.length = 7;
+ test.option_client_id.id[0] = 0x01;
+ test.option_client_id.id[1] = 'A';
+ test.option_client_id.id[2] = 'B';
+ test.option_client_id.id[3] = 'C';
+ test.option_client_id.id[4] = 'D';
+ test.option_client_id.id[5] = 'E';
+ test.option_client_id.id[6] = 'F';
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_ACK);
+
+ test.option_requested_ip.address = htobe32(INADDR_LOOPBACK + 30);
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+}
+
+static uint64_t client_id_hash_helper(DHCPClientId *id, uint8_t key[HASH_KEY_SIZE]) {
+ struct siphash state;
+
+ siphash24_init(&state, key);
+ client_id_hash_func(id, &state);
+
+ return htole64(siphash24_finalize(&state));
+}
+
+static void test_client_id_hash(void) {
+ DHCPClientId a = {
+ .length = 4,
+ }, b = {
+ .length = 4,
+ };
+ uint8_t hash_key[HASH_KEY_SIZE] = {
+ '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+ };
+
+ a.data = (uint8_t*)strdup("abcd");
+ b.data = (uint8_t*)strdup("abcd");
+
+ assert_se(client_id_compare_func(&a, &b) == 0);
+ assert_se(client_id_hash_helper(&a, hash_key) == client_id_hash_helper(&b, hash_key));
+ a.length = 3;
+ assert_se(client_id_compare_func(&a, &b) != 0);
+ a.length = 4;
+ assert_se(client_id_compare_func(&a, &b) == 0);
+ assert_se(client_id_hash_helper(&a, hash_key) == client_id_hash_helper(&b, hash_key));
+
+ b.length = 3;
+ assert_se(client_id_compare_func(&a, &b) != 0);
+ b.length = 4;
+ assert_se(client_id_compare_func(&a, &b) == 0);
+ assert_se(client_id_hash_helper(&a, hash_key) == client_id_hash_helper(&b, hash_key));
+
+ free(b.data);
+ b.data = (uint8_t*)strdup("abce");
+ assert_se(client_id_compare_func(&a, &b) != 0);
+
+ free(a.data);
+ free(b.data);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *e;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ r = test_basic(e);
+ if (r != 0)
+ return log_tests_skipped("cannot start dhcp server");
+
+ test_message_handler();
+ test_client_id_hash();
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-dhcp6-client.c b/src/libsystemd-network/test-dhcp6-client.c
new file mode 100644
index 0000000..fa94b3c
--- /dev/null
+++ b/src/libsystemd-network/test-dhcp6-client.c
@@ -0,0 +1,949 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <net/ethernet.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-dhcp6-client.h"
+#include "sd-event.h"
+
+#include "dhcp6-internal.h"
+#include "dhcp6-lease-internal.h"
+#include "dhcp6-protocol.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "socket-util.h"
+#include "tests.h"
+#include "util.h"
+#include "virt.h"
+
+static struct ether_addr mac_addr = {
+ .ether_addr_octet = {'A', 'B', 'C', '1', '2', '3'}
+};
+
+static sd_event_source *hangcheck;
+static int test_dhcp_fd[2];
+static int test_index = 42;
+static int test_client_message_num;
+static be32_t test_iaid = 0;
+static uint8_t test_duid[14] = { };
+
+static int test_client_basic(sd_event *e) {
+ sd_dhcp6_client *client;
+ int v;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(sd_dhcp6_client_new(&client) >= 0);
+ assert_se(client);
+
+ assert_se(sd_dhcp6_client_attach_event(client, e, 0) >= 0);
+
+ assert_se(sd_dhcp6_client_set_ifindex(client, 15) == 0);
+ assert_se(sd_dhcp6_client_set_ifindex(client, -42) == -EINVAL);
+ assert_se(sd_dhcp6_client_set_ifindex(client, -1) == 0);
+ assert_se(sd_dhcp6_client_set_ifindex(client, 42) >= 0);
+
+ assert_se(sd_dhcp6_client_set_mac(client, (const uint8_t *) &mac_addr,
+ sizeof (mac_addr),
+ ARPHRD_ETHER) >= 0);
+
+ assert_se(sd_dhcp6_client_set_fqdn(client, "host") == 1);
+ assert_se(sd_dhcp6_client_set_fqdn(client, "host.domain") == 1);
+ assert_se(sd_dhcp6_client_set_fqdn(client, NULL) == 1);
+ assert_se(sd_dhcp6_client_set_fqdn(client, "~host") == -EINVAL);
+ assert_se(sd_dhcp6_client_set_fqdn(client, "~host.domain") == -EINVAL);
+
+ assert_se(sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_CLIENTID) == -EINVAL);
+ assert_se(sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_DNS_SERVERS) == -EEXIST);
+ assert_se(sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_NTP_SERVER) == -EEXIST);
+ assert_se(sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_SNTP_SERVERS) == -EEXIST);
+ assert_se(sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_DOMAIN_LIST) == -EEXIST);
+ assert_se(sd_dhcp6_client_set_request_option(client, 10) == -EINVAL);
+
+ assert_se(sd_dhcp6_client_set_information_request(client, 1) >= 0);
+ v = 0;
+ assert_se(sd_dhcp6_client_get_information_request(client, &v) >= 0);
+ assert_se(v);
+ assert_se(sd_dhcp6_client_set_information_request(client, 0) >= 0);
+ v = 42;
+ assert_se(sd_dhcp6_client_get_information_request(client, &v) >= 0);
+ assert_se(v == 0);
+
+ v = 0;
+ assert_se(sd_dhcp6_client_get_address_request(client, &v) >= 0);
+ assert_se(v);
+ v = 0;
+ assert_se(sd_dhcp6_client_set_address_request(client, 1) >= 0);
+ assert_se(sd_dhcp6_client_get_address_request(client, &v) >= 0);
+ assert_se(v);
+ v = 42;
+ assert_se(sd_dhcp6_client_set_address_request(client, 1) >= 0);
+ assert_se(sd_dhcp6_client_get_address_request(client, &v) >= 0);
+ assert_se(v);
+
+ assert_se(sd_dhcp6_client_set_address_request(client, 1) >= 0);
+ assert_se(sd_dhcp6_client_set_prefix_delegation(client, 1) >= 0);
+ v = 0;
+ assert_se(sd_dhcp6_client_get_address_request(client, &v) >= 0);
+ assert_se(v);
+ v = 0;
+ assert_se(sd_dhcp6_client_get_prefix_delegation(client, &v) >= 0);
+ assert_se(v);
+
+ assert_se(sd_dhcp6_client_set_callback(client, NULL, NULL) >= 0);
+
+ assert_se(sd_dhcp6_client_detach_event(client) >= 0);
+ assert_se(!sd_dhcp6_client_unref(client));
+
+ return 0;
+}
+
+static int test_option(sd_event *e) {
+ uint8_t packet[] = {
+ 'F', 'O', 'O',
+ 0x00, SD_DHCP6_OPTION_ORO, 0x00, 0x07,
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+ 0x00, SD_DHCP6_OPTION_VENDOR_CLASS, 0x00, 0x09,
+ '1', '2', '3', '4', '5', '6', '7', '8', '9',
+ 'B', 'A', 'R',
+ };
+ uint8_t result[] = {
+ 'F', 'O', 'O',
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 'B', 'A', 'R',
+ };
+ uint16_t optcode;
+ size_t optlen;
+ uint8_t *optval, *buf, *out;
+ size_t zero = 0, pos = 3;
+ size_t buflen = sizeof(packet), outlen = sizeof(result);
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(buflen == outlen);
+
+ assert_se(dhcp6_option_parse(&buf, &zero, &optcode, &optlen,
+ &optval) == -ENOMSG);
+
+ buflen -= 3;
+ buf = &packet[3];
+ outlen -= 3;
+ out = &result[3];
+
+ assert_se(dhcp6_option_parse(&buf, &buflen, &optcode, &optlen,
+ &optval) >= 0);
+ pos += 4 + optlen;
+ assert_se(buf == &packet[pos]);
+ assert_se(optcode == SD_DHCP6_OPTION_ORO);
+ assert_se(optlen == 7);
+ assert_se(buflen + pos == sizeof(packet));
+
+ assert_se(dhcp6_option_append(&out, &outlen, optcode, optlen,
+ optval) >= 0);
+ assert_se(out == &result[pos]);
+ assert_se(*out == 0x00);
+
+ assert_se(dhcp6_option_parse(&buf, &buflen, &optcode, &optlen,
+ &optval) >= 0);
+ pos += 4 + optlen;
+ assert_se(buf == &packet[pos]);
+ assert_se(optcode == SD_DHCP6_OPTION_VENDOR_CLASS);
+ assert_se(optlen == 9);
+ assert_se(buflen + pos == sizeof(packet));
+
+ assert_se(dhcp6_option_append(&out, &outlen, optcode, optlen,
+ optval) >= 0);
+ assert_se(out == &result[pos]);
+ assert_se(*out == 'B');
+
+ assert_se(memcmp(packet, result, sizeof(packet)) == 0);
+
+ return 0;
+}
+
+static int test_option_status(sd_event *e) {
+ uint8_t option1[] = {
+ /* IA NA */
+ 0x00, 0x03, 0x00, 0x12, 0x1a, 0x1d, 0x1a, 0x1d,
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x02, 0x00, 0x02,
+ /* status option */
+ 0x00, 0x0d, 0x00, 0x02, 0x00, 0x01,
+ };
+ static const uint8_t option2[] = {
+ /* IA NA */
+ 0x00, 0x03, 0x00, 0x2e, 0x1a, 0x1d, 0x1a, 0x1d,
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x02, 0x00, 0x02,
+ /* IA Addr */
+ 0x00, 0x05, 0x00, 0x1e,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
+ 0x01, 0x02, 0x03, 0x04, 0x0a, 0x0b, 0x0c, 0x0d,
+ /* status option */
+ 0x00, 0x0d, 0x00, 0x02, 0x00, 0x01,
+ };
+ static const uint8_t option3[] = {
+ /* IA NA */
+ 0x00, 0x03, 0x00, 0x34, 0x1a, 0x1d, 0x1a, 0x1d,
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x02, 0x00, 0x02,
+ /* IA Addr */
+ 0x00, 0x05, 0x00, 0x24,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
+ 0x01, 0x02, 0x03, 0x04, 0x0a, 0x0b, 0x0c, 0x0d,
+ /* status option */
+ 0x00, 0x0d, 0x00, 0x08, 0x00, 0x00, 'f', 'o',
+ 'o', 'b', 'a', 'r',
+ };
+ static const uint8_t option4[] = {
+ /* IA PD */
+ 0x00, 0x19, 0x00, 0x2f, 0x1a, 0x1d, 0x1a, 0x1d,
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x02, 0x00, 0x02,
+ /* IA PD Prefix */
+ 0x00, 0x1a, 0x00, 0x1f,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x80, 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe,
+ 0xef, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00,
+ /* status option */
+ 0x00, 0x0d, 0x00, 0x02, 0x00, 0x00,
+ };
+ static const uint8_t option5[] = {
+ /* IA PD */
+ 0x00, 0x19, 0x00, 0x52, 0x1a, 0x1d, 0x1a, 0x1d,
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x02, 0x00, 0x02,
+ /* IA PD Prefix #1 */
+ 0x00, 0x1a, 0x00, 0x1f,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x80, 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe,
+ 0xef, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00,
+ /* status option */
+ 0x00, 0x0d, 0x00, 0x02, 0x00, 0x00,
+ /* IA PD Prefix #2 */
+ 0x00, 0x1a, 0x00, 0x1f,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x80, 0x20, 0x01, 0x0d, 0xb8, 0xc0, 0x0l, 0xd0,
+ 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00,
+ 0x00, 0x0d, 0x00, 0x02, 0x00, 0x00,
+ };
+ DHCP6Option *option;
+ DHCP6IA ia, pd;
+ int r = 0;
+
+ log_debug("/* %s */", __func__);
+
+ zero(ia);
+ option = (DHCP6Option *)option1;
+ assert_se(sizeof(option1) == sizeof(DHCP6Option) + be16toh(option->len));
+
+ r = dhcp6_option_parse_ia(option, &ia);
+ assert_se(r == -EINVAL);
+ assert_se(ia.addresses == NULL);
+
+ option->len = htobe16(17);
+ r = dhcp6_option_parse_ia(option, &ia);
+ assert_se(r == -ENOBUFS);
+ assert_se(ia.addresses == NULL);
+
+ option->len = htobe16(sizeof(DHCP6Option));
+ r = dhcp6_option_parse_ia(option, &ia);
+ assert_se(r == -ENOBUFS);
+ assert_se(ia.addresses == NULL);
+
+ zero(ia);
+ option = (DHCP6Option *)option2;
+ assert_se(sizeof(option2) == sizeof(DHCP6Option) + be16toh(option->len));
+
+ r = dhcp6_option_parse_ia(option, &ia);
+ assert_se(r >= 0);
+ assert_se(ia.addresses == NULL);
+
+ zero(ia);
+ option = (DHCP6Option *)option3;
+ assert_se(sizeof(option3) == sizeof(DHCP6Option) + be16toh(option->len));
+
+ r = dhcp6_option_parse_ia(option, &ia);
+ assert_se(r >= 0);
+ assert_se(ia.addresses != NULL);
+ dhcp6_lease_free_ia(&ia);
+
+ zero(pd);
+ option = (DHCP6Option *)option4;
+ assert_se(sizeof(option4) == sizeof(DHCP6Option) + be16toh(option->len));
+
+ r = dhcp6_option_parse_ia(option, &pd);
+ assert_se(r == 0);
+ assert_se(pd.addresses != NULL);
+ assert_se(memcmp(&pd.ia_pd.id, &option4[4], 4) == 0);
+ assert_se(memcmp(&pd.ia_pd.lifetime_t1, &option4[8], 4) == 0);
+ assert_se(memcmp(&pd.ia_pd.lifetime_t2, &option4[12], 4) == 0);
+ dhcp6_lease_free_ia(&pd);
+
+ zero(pd);
+ option = (DHCP6Option *)option5;
+ assert_se(sizeof(option5) == sizeof(DHCP6Option) + be16toh(option->len));
+
+ r = dhcp6_option_parse_ia(option, &pd);
+ assert_se(r == 0);
+ assert_se(pd.addresses != NULL);
+ dhcp6_lease_free_ia(&pd);
+
+ return 0;
+}
+
+static uint8_t msg_advertise[198] = {
+ 0x02, 0x0f, 0xb4, 0xe5, 0x00, 0x01, 0x00, 0x0e,
+ 0x00, 0x01, 0x00, 0x01, 0x1a, 0x6b, 0xf3, 0x30,
+ 0x3c, 0x97, 0x0e, 0xcf, 0xa3, 0x7d, 0x00, 0x03,
+ 0x00, 0x5e, 0x0e, 0xcf, 0xa3, 0x7d, 0x00, 0x00,
+ 0x00, 0x50, 0x00, 0x00, 0x00, 0x78, 0x00, 0x05,
+ 0x00, 0x18, 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad,
+ 0xbe, 0xef, 0x78, 0xee, 0x1c, 0xf3, 0x09, 0x3c,
+ 0x55, 0xad, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00,
+ 0x00, 0xb4, 0x00, 0x0d, 0x00, 0x32, 0x00, 0x00,
+ 0x41, 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x28,
+ 0x65, 0x73, 0x29, 0x20, 0x72, 0x65, 0x6e, 0x65,
+ 0x77, 0x65, 0x64, 0x2e, 0x20, 0x47, 0x72, 0x65,
+ 0x65, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x66,
+ 0x72, 0x6f, 0x6d, 0x20, 0x70, 0x6c, 0x61, 0x6e,
+ 0x65, 0x74, 0x20, 0x45, 0x61, 0x72, 0x74, 0x68,
+ 0x00, 0x17, 0x00, 0x10, 0x20, 0x01, 0x0d, 0xb8,
+ 0xde, 0xad, 0xbe, 0xef, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01, 0x00, 0x18, 0x00, 0x0b,
+ 0x03, 0x6c, 0x61, 0x62, 0x05, 0x69, 0x6e, 0x74,
+ 0x72, 0x61, 0x00, 0x00, 0x1f, 0x00, 0x10, 0x20,
+ 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe, 0xef, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+ 0x02, 0x00, 0x0e, 0x00, 0x01, 0x00, 0x01, 0x19,
+ 0x40, 0x5c, 0x53, 0x78, 0x2b, 0xcb, 0xb3, 0x6d,
+ 0x53, 0x00, 0x07, 0x00, 0x01, 0x00
+};
+
+static uint8_t msg_reply[173] = {
+ 0x07, 0xf7, 0x4e, 0x57, 0x00, 0x02, 0x00, 0x0e,
+ 0x00, 0x01, 0x00, 0x01, 0x19, 0x40, 0x5c, 0x53,
+ 0x78, 0x2b, 0xcb, 0xb3, 0x6d, 0x53, 0x00, 0x01,
+ 0x00, 0x0e, 0x00, 0x01, 0x00, 0x01, 0x1a, 0x6b,
+ 0xf3, 0x30, 0x3c, 0x97, 0x0e, 0xcf, 0xa3, 0x7d,
+ 0x00, 0x03, 0x00, 0x4a, 0x0e, 0xcf, 0xa3, 0x7d,
+ 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x78,
+ 0x00, 0x05, 0x00, 0x18, 0x20, 0x01, 0x0d, 0xb8,
+ 0xde, 0xad, 0xbe, 0xef, 0x78, 0xee, 0x1c, 0xf3,
+ 0x09, 0x3c, 0x55, 0xad, 0x00, 0x00, 0x00, 0x96,
+ 0x00, 0x00, 0x00, 0xb4, 0x00, 0x0d, 0x00, 0x1e,
+ 0x00, 0x00, 0x41, 0x6c, 0x6c, 0x20, 0x61, 0x64,
+ 0x64, 0x72, 0x65, 0x73, 0x73, 0x65, 0x73, 0x20,
+ 0x77, 0x65, 0x72, 0x65, 0x20, 0x61, 0x73, 0x73,
+ 0x69, 0x67, 0x6e, 0x65, 0x64, 0x2e, 0x00, 0x17,
+ 0x00, 0x10, 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad,
+ 0xbe, 0xef, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x00, 0x18, 0x00, 0x0b, 0x03, 0x6c,
+ 0x61, 0x62, 0x05, 0x69, 0x6e, 0x74, 0x72, 0x61,
+ 0x00, 0x00, 0x1f, 0x00, 0x10, 0x20, 0x01, 0x0d,
+ 0xb8, 0xde, 0xad, 0xbe, 0xef, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01
+};
+
+static uint8_t fqdn_wire[16] = {
+ 0x04, 'h', 'o', 's', 't', 0x03, 'l', 'a', 'b',
+ 0x05, 'i', 'n', 't', 'r', 'a', 0x00
+};
+
+static int test_advertise_option(sd_event *e) {
+ _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease = NULL;
+ DHCP6Message *advertise = (DHCP6Message *)msg_advertise;
+ size_t len = sizeof(msg_advertise) - sizeof(DHCP6Message), pos = 0;
+ be32_t val;
+ uint8_t preference = 255;
+ struct in6_addr addr;
+ uint32_t lt_pref, lt_valid;
+ int r;
+ uint8_t *opt;
+ bool opt_clientid = false;
+ struct in6_addr *addrs;
+ char **domains;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(len >= sizeof(DHCP6Message));
+
+ assert_se(dhcp6_lease_new(&lease) >= 0);
+
+ assert_se(advertise->type == DHCP6_ADVERTISE);
+ assert_se((be32toh(advertise->transaction_id) & 0x00ffffff) ==
+ 0x0fb4e5);
+
+ while (pos < len) {
+ DHCP6Option *option = (DHCP6Option *)&advertise->options[pos];
+ const uint16_t optcode = be16toh(option->code);
+ const uint16_t optlen = be16toh(option->len);
+ uint8_t *optval = option->data;
+
+ switch(optcode) {
+ case SD_DHCP6_OPTION_CLIENTID:
+ assert_se(optlen == 14);
+
+ opt_clientid = true;
+ break;
+
+ case SD_DHCP6_OPTION_IA_NA:
+ assert_se(optlen == 94);
+ assert_se(!memcmp(optval, &msg_advertise[26], optlen));
+
+ val = htobe32(0x0ecfa37d);
+ assert_se(!memcmp(optval, &val, sizeof(val)));
+
+ val = htobe32(80);
+ assert_se(!memcmp(optval + 4, &val, sizeof(val)));
+
+ val = htobe32(120);
+ assert_se(!memcmp(optval + 8, &val, sizeof(val)));
+
+ assert_se(dhcp6_option_parse_ia(option, &lease->ia) >= 0);
+
+ break;
+
+ case SD_DHCP6_OPTION_SERVERID:
+ assert_se(optlen == 14);
+ assert_se(!memcmp(optval, &msg_advertise[179], optlen));
+
+ assert_se(dhcp6_lease_set_serverid(lease, optval,
+ optlen) >= 0);
+ break;
+
+ case SD_DHCP6_OPTION_PREFERENCE:
+ assert_se(optlen == 1);
+ assert_se(!*optval);
+
+ assert_se(dhcp6_lease_set_preference(lease,
+ *optval) >= 0);
+ break;
+
+ case SD_DHCP6_OPTION_ELAPSED_TIME:
+ assert_se(optlen == 2);
+
+ break;
+
+ case SD_DHCP6_OPTION_DNS_SERVERS:
+ assert_se(optlen == 16);
+ assert_se(dhcp6_lease_set_dns(lease, optval,
+ optlen) >= 0);
+ break;
+
+ case SD_DHCP6_OPTION_DOMAIN_LIST:
+ assert_se(optlen == 11);
+ assert_se(dhcp6_lease_set_domains(lease, optval,
+ optlen) >= 0);
+ break;
+
+ case SD_DHCP6_OPTION_SNTP_SERVERS:
+ assert_se(optlen == 16);
+ assert_se(dhcp6_lease_set_sntp(lease, optval,
+ optlen) >= 0);
+ break;
+
+ default:
+ break;
+ }
+
+ pos += sizeof(*option) + optlen;
+ }
+
+ assert_se(pos == len);
+ assert_se(opt_clientid);
+
+ sd_dhcp6_lease_reset_address_iter(lease);
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) >= 0);
+ assert_se(!memcmp(&addr, &msg_advertise[42], sizeof(addr)));
+ assert_se(lt_pref == 150);
+ assert_se(lt_valid == 180);
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) == -ENOMSG);
+
+ sd_dhcp6_lease_reset_address_iter(lease);
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) >= 0);
+ assert_se(!memcmp(&addr, &msg_advertise[42], sizeof(addr)));
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) == -ENOMSG);
+ sd_dhcp6_lease_reset_address_iter(lease);
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) >= 0);
+ assert_se(!memcmp(&addr, &msg_advertise[42], sizeof(addr)));
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) == -ENOMSG);
+
+ assert_se(dhcp6_lease_get_serverid(lease, &opt, &len) >= 0);
+ assert_se(len == 14);
+ assert_se(!memcmp(opt, &msg_advertise[179], len));
+
+ assert_se(dhcp6_lease_get_preference(lease, &preference) >= 0);
+ assert_se(preference == 0);
+
+ r = sd_dhcp6_lease_get_dns(lease, &addrs);
+ assert_se(r == 1);
+ assert_se(!memcmp(addrs, &msg_advertise[124], r * 16));
+
+ r = sd_dhcp6_lease_get_domains(lease, &domains);
+ assert_se(r == 1);
+ assert_se(!strcmp("lab.intra", domains[0]));
+ assert_se(domains[1] == NULL);
+
+ r = sd_dhcp6_lease_get_ntp_addrs(lease, &addrs);
+ assert_se(r == 1);
+ assert_se(!memcmp(addrs, &msg_advertise[159], r * 16));
+
+ return 0;
+}
+
+static int test_hangcheck(sd_event_source *s, uint64_t usec, void *userdata) {
+ assert_not_reached("Test case should have completed in 2 seconds");
+
+ return 0;
+}
+
+static void test_client_solicit_cb(sd_dhcp6_client *client, int event,
+ void *userdata) {
+ sd_event *e = userdata;
+ sd_dhcp6_lease *lease;
+ struct in6_addr *addrs;
+ char **domains;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(e);
+ assert_se(event == SD_DHCP6_CLIENT_EVENT_IP_ACQUIRE);
+
+ assert_se(sd_dhcp6_client_get_lease(client, &lease) >= 0);
+
+ assert_se(sd_dhcp6_lease_get_domains(lease, &domains) == 1);
+ assert_se(!strcmp("lab.intra", domains[0]));
+ assert_se(domains[1] == NULL);
+
+ assert_se(sd_dhcp6_lease_get_dns(lease, &addrs) == 1);
+ assert_se(!memcmp(addrs, &msg_advertise[124], 16));
+
+ assert_se(sd_dhcp6_lease_get_ntp_addrs(lease, &addrs) == 1);
+ assert_se(!memcmp(addrs, &msg_advertise[159], 16));
+
+ assert_se(sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_DNS_SERVERS) == -EBUSY);
+
+ sd_event_exit(e, 0);
+}
+
+static int test_client_send_reply(DHCP6Message *request) {
+ DHCP6Message reply;
+
+ reply.transaction_id = request->transaction_id;
+ reply.type = DHCP6_REPLY;
+
+ memcpy(msg_reply, &reply.transaction_id, 4);
+
+ memcpy(&msg_reply[26], test_duid, sizeof(test_duid));
+
+ memcpy(&msg_reply[44], &test_iaid, sizeof(test_iaid));
+
+ assert_se(write(test_dhcp_fd[1], msg_reply, sizeof(msg_reply))
+ == sizeof(msg_reply));
+
+ return 0;
+}
+
+static int test_client_verify_request(DHCP6Message *request, size_t len) {
+ _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease = NULL;
+ size_t pos = 0;
+ bool found_clientid = false, found_iana = false, found_serverid = false,
+ found_elapsed_time = false, found_fqdn = false;
+ struct in6_addr addr;
+ be32_t val;
+ uint32_t lt_pref, lt_valid;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(request->type == DHCP6_REQUEST);
+ assert_se(dhcp6_lease_new(&lease) >= 0);
+
+ len -= sizeof(DHCP6Message);
+
+ while (pos < len) {
+ DHCP6Option *option = (DHCP6Option *)&request->options[pos];
+ uint16_t optcode = be16toh(option->code);
+ uint16_t optlen = be16toh(option->len);
+ uint8_t *optval = option->data;
+
+ switch(optcode) {
+ case SD_DHCP6_OPTION_CLIENTID:
+ assert_se(!found_clientid);
+ found_clientid = true;
+
+ assert_se(!memcmp(optval, &test_duid,
+ sizeof(test_duid)));
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_NA:
+ assert_se(!found_iana);
+ found_iana = true;
+
+ assert_se(optlen == 40);
+ assert_se(!memcmp(optval, &test_iaid, sizeof(test_iaid)));
+
+ val = htobe32(80);
+ assert_se(!memcmp(optval + 4, &val, sizeof(val)));
+
+ val = htobe32(120);
+ assert_se(!memcmp(optval + 8, &val, sizeof(val)));
+
+ assert_se(!dhcp6_option_parse_ia(option, &lease->ia));
+
+ break;
+
+ case SD_DHCP6_OPTION_SERVERID:
+ assert_se(!found_serverid);
+ found_serverid = true;
+
+ assert_se(optlen == 14);
+ assert_se(!memcmp(&msg_advertise[179], optval, optlen));
+
+ break;
+
+ case SD_DHCP6_OPTION_ELAPSED_TIME:
+ assert_se(!found_elapsed_time);
+ found_elapsed_time = true;
+
+ assert_se(optlen == 2);
+
+ break;
+ case SD_DHCP6_OPTION_FQDN:
+ assert_se(!found_fqdn);
+ found_fqdn = true;
+
+ assert_se(optlen == 17);
+
+ assert_se(optval[0] == 0x01);
+ assert_se(!memcmp(optval + 1, fqdn_wire, sizeof(fqdn_wire)));
+ break;
+ }
+
+ pos += sizeof(*option) + optlen;
+ }
+
+ assert_se(found_clientid && found_iana && found_serverid &&
+ found_elapsed_time);
+
+ sd_dhcp6_lease_reset_address_iter(lease);
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) >= 0);
+ assert_se(!memcmp(&addr, &msg_advertise[42], sizeof(addr)));
+ assert_se(lt_pref == 150);
+ assert_se(lt_valid == 180);
+
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) == -ENOMSG);
+
+ return 0;
+}
+
+static int test_client_send_advertise(DHCP6Message *solicit) {
+ DHCP6Message advertise;
+
+ advertise.transaction_id = solicit->transaction_id;
+ advertise.type = DHCP6_ADVERTISE;
+
+ memcpy(msg_advertise, &advertise.transaction_id, 4);
+
+ memcpy(&msg_advertise[8], test_duid, sizeof(test_duid));
+
+ memcpy(&msg_advertise[26], &test_iaid, sizeof(test_iaid));
+
+ assert_se(write(test_dhcp_fd[1], msg_advertise, sizeof(msg_advertise))
+ == sizeof(msg_advertise));
+
+ return 0;
+}
+
+static int test_client_verify_solicit(DHCP6Message *solicit, size_t len) {
+ bool found_clientid = false, found_iana = false,
+ found_elapsed_time = false, found_fqdn = false;
+ size_t pos = 0;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(solicit->type == DHCP6_SOLICIT);
+
+ len -= sizeof(DHCP6Message);
+
+ while (pos < len) {
+ DHCP6Option *option = (DHCP6Option *)&solicit->options[pos];
+ uint16_t optcode = be16toh(option->code);
+ uint16_t optlen = be16toh(option->len);
+ uint8_t *optval = option->data;
+
+ switch(optcode) {
+ case SD_DHCP6_OPTION_CLIENTID:
+ assert_se(!found_clientid);
+ found_clientid = true;
+
+ assert_se(optlen == sizeof(test_duid));
+ memcpy(&test_duid, optval, sizeof(test_duid));
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_NA:
+ assert_se(!found_iana);
+ found_iana = true;
+
+ assert_se(optlen == 12);
+
+ memcpy(&test_iaid, optval, sizeof(test_iaid));
+
+ break;
+
+ case SD_DHCP6_OPTION_ELAPSED_TIME:
+ assert_se(!found_elapsed_time);
+ found_elapsed_time = true;
+
+ assert_se(optlen == 2);
+
+ break;
+
+ case SD_DHCP6_OPTION_FQDN:
+ assert_se(!found_fqdn);
+ found_fqdn = true;
+
+ assert_se(optlen == 17);
+
+ assert_se(optval[0] == 0x01);
+ assert_se(!memcmp(optval + 1, fqdn_wire, sizeof(fqdn_wire)));
+
+ break;
+ }
+
+ pos += sizeof(*option) + optlen;
+ }
+
+ assert_se(pos == len);
+ assert_se(found_clientid && found_iana && found_elapsed_time);
+
+ return 0;
+}
+
+static void test_client_information_cb(sd_dhcp6_client *client, int event,
+ void *userdata) {
+ sd_event *e = userdata;
+ sd_dhcp6_lease *lease;
+ struct in6_addr *addrs;
+ struct in6_addr address = { { { 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01 } } };
+ char **domains;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(e);
+ assert_se(event == SD_DHCP6_CLIENT_EVENT_INFORMATION_REQUEST);
+
+ assert_se(sd_dhcp6_client_get_lease(client, &lease) >= 0);
+
+ assert_se(sd_dhcp6_lease_get_domains(lease, &domains) == 1);
+ assert_se(!strcmp("lab.intra", domains[0]));
+ assert_se(domains[1] == NULL);
+
+ assert_se(sd_dhcp6_lease_get_dns(lease, &addrs) == 1);
+ assert_se(!memcmp(addrs, &msg_advertise[124], 16));
+
+ assert_se(sd_dhcp6_lease_get_ntp_addrs(lease, &addrs) == 1);
+ assert_se(!memcmp(addrs, &msg_advertise[159], 16));
+
+ assert_se(sd_dhcp6_client_set_information_request(client, false) == -EBUSY);
+ assert_se(sd_dhcp6_client_set_callback(client, NULL, e) >= 0);
+ assert_se(sd_dhcp6_client_stop(client) >= 0);
+ assert_se(sd_dhcp6_client_set_information_request(client, false) >= 0);
+
+ assert_se(sd_dhcp6_client_set_callback(client,
+ test_client_solicit_cb, e) >= 0);
+
+ assert_se(sd_dhcp6_client_set_local_address(client, &address) >= 0);
+
+ assert_se(sd_dhcp6_client_start(client) >= 0);
+
+}
+
+static int test_client_verify_information_request(DHCP6Message *information_request,
+ size_t len) {
+
+ _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease = NULL;
+ size_t pos = 0;
+ bool found_clientid = false, found_elapsed_time = false;
+ struct in6_addr addr;
+ uint32_t lt_pref, lt_valid;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(information_request->type == DHCP6_INFORMATION_REQUEST);
+ assert_se(dhcp6_lease_new(&lease) >= 0);
+
+ len -= sizeof(DHCP6Message);
+
+ while (pos < len) {
+ DHCP6Option *option = (DHCP6Option *)&information_request->options[pos];
+ uint16_t optcode = be16toh(option->code);
+ uint16_t optlen = be16toh(option->len);
+ uint8_t *optval = option->data;
+
+ switch(optcode) {
+ case SD_DHCP6_OPTION_CLIENTID:
+ assert_se(!found_clientid);
+ found_clientid = true;
+
+ assert_se(optlen == sizeof(test_duid));
+ memcpy(&test_duid, optval, sizeof(test_duid));
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_NA:
+ assert_not_reached("IA TA option must not be present");
+
+ break;
+
+ case SD_DHCP6_OPTION_SERVERID:
+ assert_not_reached("Server ID option must not be present");
+
+ break;
+
+ case SD_DHCP6_OPTION_ELAPSED_TIME:
+ assert_se(!found_elapsed_time);
+ found_elapsed_time = true;
+
+ assert_se(optlen == 2);
+
+ break;
+ }
+
+ pos += sizeof(*option) + optlen;
+ }
+
+ assert_se(pos == len);
+ assert_se(found_clientid && found_elapsed_time);
+
+ sd_dhcp6_lease_reset_address_iter(lease);
+
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) == -ENOMSG);
+
+ return 0;
+}
+
+int dhcp6_network_send_udp_socket(int s, struct in6_addr *server_address,
+ const void *packet, size_t len) {
+ struct in6_addr mcast =
+ IN6ADDR_ALL_DHCP6_RELAY_AGENTS_AND_SERVERS_INIT;
+ DHCP6Message *message;
+
+ assert_se(s == test_dhcp_fd[0]);
+ assert_se(server_address);
+ assert_se(packet);
+ assert_se(len > sizeof(DHCP6Message) + 4);
+ assert_se(IN6_ARE_ADDR_EQUAL(server_address, &mcast));
+
+ message = (DHCP6Message *)packet;
+
+ assert_se(message->transaction_id & 0x00ffffff);
+
+ if (test_client_message_num == 0) {
+ test_client_verify_information_request(message, len);
+ test_client_send_reply(message);
+ test_client_message_num++;
+ } else if (test_client_message_num == 1) {
+ test_client_verify_solicit(message, len);
+ test_client_send_advertise(message);
+ test_client_message_num++;
+ } else if (test_client_message_num == 2) {
+ test_client_verify_request(message, len);
+ test_client_send_reply(message);
+ test_client_message_num++;
+ }
+
+ return len;
+}
+
+int dhcp6_network_bind_udp_socket(int index, struct in6_addr *local_address) {
+ assert_se(index == test_index);
+
+ if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_dhcp_fd) < 0)
+ return -errno;
+
+ return test_dhcp_fd[0];
+}
+
+static int test_client_solicit(sd_event *e) {
+ sd_dhcp6_client *client;
+ usec_t time_now = now(clock_boottime_or_monotonic());
+ struct in6_addr address = { { { 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01 } } };
+ int val;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(sd_dhcp6_client_new(&client) >= 0);
+ assert_se(client);
+
+ assert_se(sd_dhcp6_client_attach_event(client, e, 0) >= 0);
+
+ assert_se(sd_dhcp6_client_set_ifindex(client, test_index) == 0);
+ assert_se(sd_dhcp6_client_set_mac(client, (const uint8_t *) &mac_addr,
+ sizeof (mac_addr),
+ ARPHRD_ETHER) >= 0);
+ assert_se(sd_dhcp6_client_set_fqdn(client, "host.lab.intra") == 1);
+
+ assert_se(sd_dhcp6_client_get_information_request(client, &val) >= 0);
+ assert_se(val == 0);
+ assert_se(sd_dhcp6_client_set_information_request(client, 42) >= 0);
+ assert_se(sd_dhcp6_client_get_information_request(client, &val) >= 0);
+ assert_se(val);
+
+ assert_se(sd_dhcp6_client_set_callback(client,
+ test_client_information_cb, e) >= 0);
+
+ assert_se(sd_event_add_time(e, &hangcheck, clock_boottime_or_monotonic(),
+ time_now + 2 * USEC_PER_SEC, 0,
+ test_hangcheck, NULL) >= 0);
+
+ assert_se(sd_dhcp6_client_set_local_address(client, &address) >= 0);
+
+ assert_se(sd_dhcp6_client_start(client) >= 0);
+
+ sd_event_loop(e);
+
+ hangcheck = sd_event_source_unref(hangcheck);
+
+ assert_se(!sd_dhcp6_client_unref(client));
+
+ test_dhcp_fd[1] = safe_close(test_dhcp_fd[1]);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *e;
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_client_basic(e);
+ test_option(e);
+ test_option_status(e);
+ test_advertise_option(e);
+ test_client_solicit(e);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-ipv4ll-manual.c b/src/libsystemd-network/test-ipv4ll-manual.c
new file mode 100644
index 0000000..fd827ff
--- /dev/null
+++ b/src/libsystemd-network/test-ipv4ll-manual.c
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <net/if.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/veth.h>
+
+#include "sd-event.h"
+#include "sd-ipv4ll.h"
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "in-addr-util.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+
+static void ll_handler(sd_ipv4ll *ll, int event, void *userdata) {
+ _cleanup_free_ char *address = NULL;
+ struct in_addr addr = {};
+
+ assert_se(ll);
+
+ if (sd_ipv4ll_get_address(ll, &addr) >= 0)
+ assert_se(in_addr_to_string(AF_INET, (const union in_addr_union*) &addr, &address) >= 0);
+
+ switch (event) {
+ case SD_IPV4LL_EVENT_BIND:
+ log_info("bound %s", strna(address));
+ break;
+ case SD_IPV4LL_EVENT_CONFLICT:
+ log_info("conflict on %s", strna(address));
+ break;
+ case SD_IPV4LL_EVENT_STOP:
+ log_error("the client was stopped with address %s", strna(address));
+ break;
+ default:
+ assert_not_reached("invalid LL event");
+ }
+}
+
+static int client_run(int ifindex, const char *seed_str, const struct ether_addr *ha, sd_event *e) {
+ sd_ipv4ll *ll;
+
+ assert_se(sd_ipv4ll_new(&ll) >= 0);
+ assert_se(sd_ipv4ll_attach_event(ll, e, 0) >= 0);
+
+ assert_se(sd_ipv4ll_set_ifindex(ll, ifindex) >= 0);
+ assert_se(sd_ipv4ll_set_mac(ll, ha) >= 0);
+ assert_se(sd_ipv4ll_set_callback(ll, ll_handler, NULL) >= 0);
+
+ if (seed_str) {
+ unsigned seed;
+
+ assert_se(safe_atou(seed_str, &seed) >= 0);
+
+ assert_se(sd_ipv4ll_set_address_seed(ll, seed) >= 0);
+ }
+
+ log_info("starting IPv4LL client");
+
+ assert_se(sd_ipv4ll_start(ll) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ assert_se(!sd_ipv4ll_unref(ll));
+
+ return EXIT_SUCCESS;
+}
+
+static int test_ll(const char *ifname, const char *seed) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL;
+ struct ether_addr ha;
+ int ifindex;
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ assert_se(sd_netlink_attach_event(rtnl, e, 0) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, 0) >= 0);
+ assert_se(sd_netlink_message_append_string(m, IFLA_IFNAME, ifname) >= 0);
+ assert_se(sd_netlink_call(rtnl, m, 0, &reply) >= 0);
+
+ assert_se(sd_rtnl_message_link_get_ifindex(reply, &ifindex) >= 0);
+ assert_se(sd_netlink_message_read_ether_addr(reply, IFLA_ADDRESS, &ha) >= 0);
+
+ client_run(ifindex, seed, &ha, e);
+
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ if (argc == 2)
+ return test_ll(argv[1], NULL);
+ else if (argc == 3)
+ return test_ll(argv[1], argv[2]);
+ else {
+ log_error("This program takes one or two arguments.\n"
+ "\t %s <ifname> [<seed>]", program_invocation_short_name);
+ return EXIT_FAILURE;
+ }
+}
diff --git a/src/libsystemd-network/test-ipv4ll.c b/src/libsystemd-network/test-ipv4ll.c
new file mode 100644
index 0000000..2e1488c
--- /dev/null
+++ b/src/libsystemd-network/test-ipv4ll.c
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+***/
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-ipv4ll.h"
+
+#include "arp-util.h"
+#include "fd-util.h"
+#include "socket-util.h"
+#include "tests.h"
+#include "util.h"
+
+static bool verbose = false;
+static bool extended = false;
+static int test_fd[2];
+
+static int basic_request_handler_bind = 0;
+static int basic_request_handler_stop = 0;
+static void* basic_request_handler_userdata = (void*) 0xCABCAB;
+
+static void basic_request_handler(sd_ipv4ll *ll, int event, void *userdata) {
+ assert_se(userdata == basic_request_handler_userdata);
+
+ switch(event) {
+ case SD_IPV4LL_EVENT_STOP:
+ basic_request_handler_stop = 1;
+ break;
+ case SD_IPV4LL_EVENT_BIND:
+ basic_request_handler_bind = 1;
+ break;
+ default:
+ assert_se(0);
+ break;
+ }
+}
+
+static int arp_network_send_raw_socket(int fd, int ifindex,
+ const struct ether_arp *arp) {
+ assert_se(arp);
+ assert_se(ifindex > 0);
+ assert_se(fd >= 0);
+
+ if (send(fd, arp, sizeof(struct ether_arp), 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int arp_send_probe(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha) {
+ struct ether_arp ea = {};
+
+ assert_se(fd >= 0);
+ assert_se(ifindex > 0);
+ assert_se(pa != 0);
+ assert_se(ha);
+
+ return arp_network_send_raw_socket(fd, ifindex, &ea);
+}
+
+int arp_send_announcement(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha) {
+ struct ether_arp ea = {};
+
+ assert_se(fd >= 0);
+ assert_se(ifindex > 0);
+ assert_se(pa != 0);
+ assert_se(ha);
+
+ return arp_network_send_raw_socket(fd, ifindex, &ea);
+}
+
+int arp_network_bind_raw_socket(int index, be32_t address, const struct ether_addr *eth_mac) {
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) < 0)
+ return -errno;
+
+ return test_fd[0];
+}
+
+static void test_public_api_setters(sd_event *e) {
+ struct in_addr address = {};
+ uint64_t seed = 0;
+ sd_ipv4ll *ll;
+ struct ether_addr mac_addr = {
+ .ether_addr_octet = {'A', 'B', 'C', '1', '2', '3'}};
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ assert_se(sd_ipv4ll_new(&ll) == 0);
+ assert_se(ll);
+
+ assert_se(sd_ipv4ll_attach_event(NULL, NULL, 0) == -EINVAL);
+ assert_se(sd_ipv4ll_attach_event(ll, e, 0) == 0);
+ assert_se(sd_ipv4ll_attach_event(ll, e, 0) == -EBUSY);
+
+ assert_se(sd_ipv4ll_set_callback(NULL, NULL, NULL) == -EINVAL);
+ assert_se(sd_ipv4ll_set_callback(ll, NULL, NULL) == 0);
+
+ assert_se(sd_ipv4ll_set_address(ll, &address) == -EINVAL);
+ address.s_addr |= htobe32(169U << 24 | 254U << 16);
+ assert_se(sd_ipv4ll_set_address(ll, &address) == -EINVAL);
+ address.s_addr |= htobe32(0x00FF);
+ assert_se(sd_ipv4ll_set_address(ll, &address) == -EINVAL);
+ address.s_addr |= htobe32(0xF000);
+ assert_se(sd_ipv4ll_set_address(ll, &address) == 0);
+ address.s_addr |= htobe32(0x0F00);
+ assert_se(sd_ipv4ll_set_address(ll, &address) == -EINVAL);
+
+ assert_se(sd_ipv4ll_set_address_seed(NULL, seed) == -EINVAL);
+ assert_se(sd_ipv4ll_set_address_seed(ll, seed) == 0);
+
+ assert_se(sd_ipv4ll_set_mac(NULL, NULL) == -EINVAL);
+ assert_se(sd_ipv4ll_set_mac(ll, NULL) == -EINVAL);
+ assert_se(sd_ipv4ll_set_mac(ll, &mac_addr) == 0);
+
+ assert_se(sd_ipv4ll_set_ifindex(NULL, -1) == -EINVAL);
+ assert_se(sd_ipv4ll_set_ifindex(ll, -1) == -EINVAL);
+ assert_se(sd_ipv4ll_set_ifindex(ll, -99) == -EINVAL);
+ assert_se(sd_ipv4ll_set_ifindex(ll, 1) == 0);
+ assert_se(sd_ipv4ll_set_ifindex(ll, 99) == 0);
+
+ assert_se(sd_ipv4ll_ref(ll) == ll);
+ assert_se(sd_ipv4ll_unref(ll) == NULL);
+
+ /* Cleanup */
+ assert_se(sd_ipv4ll_unref(ll) == NULL);
+}
+
+static void test_basic_request(sd_event *e) {
+
+ sd_ipv4ll *ll;
+ struct ether_arp arp;
+ struct ether_addr mac_addr = {
+ .ether_addr_octet = {'A', 'B', 'C', '1', '2', '3'}};
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ assert_se(sd_ipv4ll_new(&ll) == 0);
+ assert_se(sd_ipv4ll_start(ll) == -EINVAL);
+
+ assert_se(sd_ipv4ll_attach_event(ll, e, 0) == 0);
+ assert_se(sd_ipv4ll_start(ll) == -EINVAL);
+
+ assert_se(sd_ipv4ll_set_mac(ll, &mac_addr) == 0);
+ assert_se(sd_ipv4ll_start(ll) == -EINVAL);
+
+ assert_se(sd_ipv4ll_set_callback(ll, basic_request_handler,
+ basic_request_handler_userdata) == 0);
+ assert_se(sd_ipv4ll_start(ll) == -EINVAL);
+
+ assert_se(sd_ipv4ll_set_ifindex(ll, 1) == 0);
+ assert_se(sd_ipv4ll_start(ll) == 0);
+
+ sd_event_run(e, (uint64_t) -1);
+ assert_se(sd_ipv4ll_start(ll) == -EBUSY);
+
+ assert_se(sd_ipv4ll_is_running(ll));
+
+ /* PROBE */
+ sd_event_run(e, (uint64_t) -1);
+ assert_se(recv(test_fd[1], &arp, sizeof(struct ether_arp), 0) == sizeof(struct ether_arp));
+
+ if (extended) {
+ /* PROBE */
+ sd_event_run(e, (uint64_t) -1);
+ assert_se(recv(test_fd[1], &arp, sizeof(struct ether_arp), 0) == sizeof(struct ether_arp));
+
+ /* PROBE */
+ sd_event_run(e, (uint64_t) -1);
+ assert_se(recv(test_fd[1], &arp, sizeof(struct ether_arp), 0) == sizeof(struct ether_arp));
+
+ sd_event_run(e, (uint64_t) -1);
+ assert_se(basic_request_handler_bind == 1);
+ }
+
+ sd_ipv4ll_stop(ll);
+ assert_se(basic_request_handler_stop == 1);
+
+ /* Cleanup */
+ assert_se(sd_ipv4ll_unref(ll) == NULL);
+ safe_close(test_fd[1]);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ test_public_api_setters(e);
+ test_basic_request(e);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-lldp.c b/src/libsystemd-network/test-lldp.c
new file mode 100644
index 0000000..b6c896f
--- /dev/null
+++ b/src/libsystemd-network/test-lldp.c
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <net/ethernet.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+#include "sd-lldp.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "lldp-network.h"
+#include "macro.h"
+#include "string-util.h"
+
+#define TEST_LLDP_PORT "em1"
+#define TEST_LLDP_TYPE_SYSTEM_NAME "systemd-lldp"
+#define TEST_LLDP_TYPE_SYSTEM_DESC "systemd-lldp-desc"
+
+static int test_fd[2] = { -1, -1 };
+static int lldp_handler_calls;
+
+int lldp_network_bind_raw_socket(int ifindex) {
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) < 0)
+ return -errno;
+
+ return test_fd[0];
+}
+
+static void lldp_handler(sd_lldp *lldp, sd_lldp_event event, sd_lldp_neighbor *n, void *userdata) {
+ lldp_handler_calls++;
+}
+
+static int start_lldp(sd_lldp **lldp, sd_event *e, sd_lldp_callback_t cb, void *cb_data) {
+ int r;
+
+ r = sd_lldp_new(lldp);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_set_ifindex(*lldp, 42);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_set_callback(*lldp, cb, cb_data);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_attach_event(*lldp, e, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_start(*lldp);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int stop_lldp(sd_lldp *lldp) {
+ int r;
+
+ r = sd_lldp_stop(lldp);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_detach_event(lldp);
+ if (r < 0)
+ return r;
+
+ sd_lldp_unref(lldp);
+ safe_close(test_fd[1]);
+
+ return 0;
+}
+
+static void test_receive_basic_packet(sd_event *e) {
+
+ static const uint8_t frame[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x07, 0x04, 0x00, 0x01, 0x02, /* Chassis: MAC, 00:01:02:03:04:05 */
+ 0x03, 0x04, 0x05,
+ 0x04, 0x04, 0x05, 0x31, 0x2f, 0x33, /* Port: interface name, "1/3" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ /* LLDP optional TLVs */
+ 0x08, 0x04, 0x50, 0x6f, 0x72, 0x74, /* Port Description: "Port" */
+ 0x0a, 0x03, 0x53, 0x59, 0x53, /* System Name: "SYS" */
+ 0x0c, 0x04, 0x66, 0x6f, 0x6f, 0x00, /* System Description: "foo" (NULL-terminated) */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+
+ sd_lldp *lldp;
+ sd_lldp_neighbor **neighbors;
+ uint8_t type;
+ const void *data;
+ uint16_t ttl;
+ size_t length;
+ const char *str;
+
+ lldp_handler_calls = 0;
+ assert_se(start_lldp(&lldp, e, lldp_handler, NULL) == 0);
+
+ assert_se(write(test_fd[1], frame, sizeof(frame)) == sizeof(frame));
+ sd_event_run(e, 0);
+ assert_se(lldp_handler_calls == 1);
+ assert_se(sd_lldp_get_neighbors(lldp, &neighbors) == 1);
+
+ assert_se(sd_lldp_neighbor_get_chassis_id(neighbors[0], &type, &data, &length) == 0);
+ assert_se(type == SD_LLDP_CHASSIS_SUBTYPE_MAC_ADDRESS);
+ assert_se(length == ETH_ALEN);
+ assert_se(!memcmp(data, "\x00\x01\x02\x03\x04\x05", ETH_ALEN));
+
+ assert_se(sd_lldp_neighbor_get_port_id(neighbors[0], &type, &data, &length) == 0);
+ assert_se(type == SD_LLDP_PORT_SUBTYPE_INTERFACE_NAME);
+ assert_se(length == 3);
+ assert_se(!memcmp(data, "1/3", 3));
+
+ assert_se(sd_lldp_neighbor_get_port_description(neighbors[0], &str) == 0);
+ assert_se(streq(str, "Port"));
+
+ assert_se(sd_lldp_neighbor_get_system_name(neighbors[0], &str) == 0);
+ assert_se(streq(str, "SYS"));
+
+ assert_se(sd_lldp_neighbor_get_system_description(neighbors[0], &str) == 0);
+ assert_se(streq(str, "foo"));
+
+ assert_se(sd_lldp_neighbor_get_ttl(neighbors[0], &ttl) == 0);
+ assert_se(ttl == 120);
+
+ sd_lldp_neighbor_unref(neighbors[0]);
+ free(neighbors);
+
+ assert_se(stop_lldp(lldp) == 0);
+}
+
+static void test_receive_incomplete_packet(sd_event *e) {
+ sd_lldp *lldp;
+ sd_lldp_neighbor **neighbors;
+ uint8_t frame[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x07, 0x04, 0x00, 0x01, 0x02, /* Chassis: MAC, 00:01:02:03:04:05 */
+ 0x03, 0x04, 0x05,
+ 0x04, 0x04, 0x05, 0x31, 0x2f, 0x33, /* Port: interface name, "1/3" */
+ /* Missing TTL */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+
+ lldp_handler_calls = 0;
+ assert_se(start_lldp(&lldp, e, lldp_handler, NULL) == 0);
+
+ assert_se(write(test_fd[1], frame, sizeof(frame)) == sizeof(frame));
+ sd_event_run(e, 0);
+ assert_se(lldp_handler_calls == 0);
+ assert_se(sd_lldp_get_neighbors(lldp, &neighbors) == 0);
+
+ assert_se(stop_lldp(lldp) == 0);
+}
+
+static void test_receive_oui_packet(sd_event *e) {
+ sd_lldp *lldp;
+ sd_lldp_neighbor **neighbors;
+ uint8_t frame[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x07, 0x04, 0x00, 0x01, 0x02, /* Chassis: MAC, 00:01:02:03:04:05 */
+ 0x03, 0x04, 0x05,
+ 0x04, 0x04, 0x05, 0x31, 0x2f, 0x33, /* Port TLV: interface name, "1/3" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ /* LLDP optional TLVs */
+ 0xfe, 0x06, 0x00, 0x80, 0xc2, 0x01, /* Port VLAN ID: 0x1234 */
+ 0x12, 0x34,
+ 0xfe, 0x07, 0x00, 0x80, 0xc2, 0x02, /* Port and protocol: flag 1, PPVID 0x7788 */
+ 0x01, 0x77, 0x88,
+ 0xfe, 0x0d, 0x00, 0x80, 0xc2, 0x03, /* VLAN Name: ID 0x1234, name "Vlan51" */
+ 0x12, 0x34, 0x06, 0x56, 0x6c, 0x61,
+ 0x6e, 0x35, 0x31,
+ 0xfe, 0x06, 0x00, 0x80, 0xc2, 0x06, /* Management VID: 0x0102 */
+ 0x01, 0x02,
+ 0xfe, 0x09, 0x00, 0x80, 0xc2, 0x07, /* Link aggregation: status 1, ID 0x00140012 */
+ 0x01, 0x00, 0x14, 0x00, 0x12,
+ 0xfe, 0x07, 0x00, 0x12, 0x0f, 0x02, /* 802.3 Power via MDI: PSE, MDI enabled */
+ 0x07, 0x01, 0x00,
+ 0x00, 0x00 /* End of LLDPDU */
+ };
+
+ lldp_handler_calls = 0;
+ assert_se(start_lldp(&lldp, e, lldp_handler, NULL) == 0);
+
+ assert_se(write(test_fd[1], frame, sizeof(frame)) == sizeof(frame));
+ sd_event_run(e, 0);
+ assert_se(lldp_handler_calls == 1);
+ assert_se(sd_lldp_get_neighbors(lldp, &neighbors) == 1);
+
+ assert_se(sd_lldp_neighbor_tlv_rewind(neighbors[0]) >= 0);
+ assert_se(sd_lldp_neighbor_tlv_is_type(neighbors[0], SD_LLDP_TYPE_CHASSIS_ID) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_type(neighbors[0], SD_LLDP_TYPE_PORT_ID) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_type(neighbors[0], SD_LLDP_TYPE_TTL) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_oui(neighbors[0], SD_LLDP_OUI_802_1, SD_LLDP_OUI_802_1_SUBTYPE_PORT_VLAN_ID) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_oui(neighbors[0], SD_LLDP_OUI_802_1, SD_LLDP_OUI_802_1_SUBTYPE_PORT_PROTOCOL_VLAN_ID) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_oui(neighbors[0], SD_LLDP_OUI_802_1, SD_LLDP_OUI_802_1_SUBTYPE_VLAN_NAME) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_oui(neighbors[0], SD_LLDP_OUI_802_1, SD_LLDP_OUI_802_1_SUBTYPE_MANAGEMENT_VID) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_oui(neighbors[0], SD_LLDP_OUI_802_1, SD_LLDP_OUI_802_1_SUBTYPE_LINK_AGGREGATION) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_oui(neighbors[0], SD_LLDP_OUI_802_3, SD_LLDP_OUI_802_3_SUBTYPE_POWER_VIA_MDI) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_type(neighbors[0], SD_LLDP_TYPE_END) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) == 0);
+
+ sd_lldp_neighbor_unref(neighbors[0]);
+ free(neighbors);
+
+ assert_se(stop_lldp(lldp) == 0);
+}
+
+static void test_multiple_neighbors_sorted(sd_event *e) {
+
+ static const uint8_t frame1[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x04, 0x01, '1', '/', '2', /* Chassis component: "1/2" */
+ 0x04, 0x04, 0x02, '2', '/', '3', /* Port component: "2/3" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+ static const uint8_t frame2[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x04, 0x01, '2', '/', '1', /* Chassis component: "2/1" */
+ 0x04, 0x04, 0x02, '1', '/', '3', /* Port component: "1/3" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+ static const uint8_t frame3[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x05, 0x01, '2', '/', '1', '0', /* Chassis component: "2/10" */
+ 0x04, 0x04, 0x02, '1', '/', '0', /* Port component: "1/0" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+ static const uint8_t frame4[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x05, 0x01, '2', '/', '1', '9', /* Chassis component: "2/19" */
+ 0x04, 0x04, 0x02, '1', '/', '0', /* Port component: "1/0" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+ static const uint8_t frame5[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x04, 0x01, '1', '/', '2', /* Chassis component: "1/2" */
+ 0x04, 0x05, 0x02, '2', '/', '1', '0', /* Port component: "2/10" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+ static const uint8_t frame6[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x04, 0x01, '1', '/', '2', /* Chassis component: "1/2" */
+ 0x04, 0x05, 0x02, '2', '/', '3', '9', /* Port component: "2/10" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+ static const char* expected[] = {
+ /* ordered pairs of Chassis+Port */
+ "1/2", "2/10",
+ "1/2", "2/3",
+ "1/2", "2/39",
+ "2/1", "1/3",
+ "2/10", "1/0",
+ "2/19", "1/0",
+ };
+
+ sd_lldp *lldp;
+ sd_lldp_neighbor **neighbors;
+ int i;
+ uint8_t type;
+ const void *data;
+ size_t length, expected_length;
+ uint16_t ttl;
+
+ lldp_handler_calls = 0;
+ assert_se(start_lldp(&lldp, e, lldp_handler, NULL) == 0);
+
+ assert_se(write(test_fd[1], frame1, sizeof(frame1)) == sizeof(frame1));
+ sd_event_run(e, 0);
+ assert_se(write(test_fd[1], frame2, sizeof(frame2)) == sizeof(frame2));
+ sd_event_run(e, 0);
+ assert_se(write(test_fd[1], frame3, sizeof(frame3)) == sizeof(frame3));
+ sd_event_run(e, 0);
+ assert_se(write(test_fd[1], frame4, sizeof(frame4)) == sizeof(frame4));
+ sd_event_run(e, 0);
+ assert_se(write(test_fd[1], frame5, sizeof(frame5)) == sizeof(frame5));
+ sd_event_run(e, 0);
+ assert_se(write(test_fd[1], frame6, sizeof(frame6)) == sizeof(frame6));
+ sd_event_run(e, 0);
+ assert_se(lldp_handler_calls == 6);
+
+ assert_se(sd_lldp_get_neighbors(lldp, &neighbors) == 6);
+
+ for (i = 0; i < 6; i++) {
+ assert_se(sd_lldp_neighbor_get_chassis_id(neighbors[i], &type, &data, &length) == 0);
+ assert_se(type == SD_LLDP_CHASSIS_SUBTYPE_CHASSIS_COMPONENT);
+ expected_length = strlen(expected[2 * i]);
+ assert_se(length == expected_length);
+ assert_se(memcmp(data, expected[2 * i], expected_length) == 0);
+
+ assert_se(sd_lldp_neighbor_get_port_id(neighbors[i], &type, &data, &length) == 0);
+ assert_se(type == SD_LLDP_PORT_SUBTYPE_PORT_COMPONENT);
+ expected_length = strlen(expected[2 * i + 1]);
+ assert_se(length == expected_length);
+ assert_se(memcmp(data, expected[2 * i + 1], expected_length) == 0);
+
+ assert_se(sd_lldp_neighbor_get_ttl(neighbors[i], &ttl) == 0);
+ assert_se(ttl == 120);
+ }
+
+ for (i = 0; i < 6; i++)
+ sd_lldp_neighbor_unref(neighbors[i]);
+ free(neighbors);
+
+ assert_se(stop_lldp(lldp) == 0);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+
+ log_set_max_level(LOG_DEBUG);
+
+ /* LLDP reception tests */
+ assert_se(sd_event_new(&e) == 0);
+ test_receive_basic_packet(e);
+ test_receive_incomplete_packet(e);
+ test_receive_oui_packet(e);
+ test_multiple_neighbors_sorted(e);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-ndisc-ra.c b/src/libsystemd-network/test-ndisc-ra.c
new file mode 100644
index 0000000..c4c1c81
--- /dev/null
+++ b/src/libsystemd-network/test-ndisc-ra.c
@@ -0,0 +1,369 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2017 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+#include <arpa/inet.h>
+
+#include "sd-radv.h"
+
+#include "alloc-util.h"
+#include "hexdecoct.h"
+#include "icmp6-util.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static struct ether_addr mac_addr = {
+ .ether_addr_octet = { 0x78, 0x2b, 0xcb, 0xb3, 0x6d, 0x53 }
+};
+
+static uint8_t advertisement[] = {
+ /* ICMPv6 Router Advertisement, no checksum */
+ 0x86, 0x00, 0x00, 0x00, 0x40, 0xc0, 0x00, 0xb4,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ /* Source Link Layer Address Option */
+ 0x01, 0x01, 0x78, 0x2b, 0xcb, 0xb3, 0x6d, 0x53,
+ /* Prefix Information Option */
+ 0x03, 0x04, 0x40, 0xc0, 0x00, 0x00, 0x01, 0xf4,
+ 0x00, 0x00, 0x01, 0xb8, 0x00, 0x00, 0x00, 0x00,
+ 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe, 0xef,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ /* Prefix Information Option */
+ 0x03, 0x04, 0x40, 0xc0, 0x00, 0x27, 0x8d, 0x00,
+ 0x00, 0x09, 0x3a, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x20, 0x01, 0x0d, 0xb8, 0x0b, 0x16, 0xd0, 0x0d,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ /* Prefix Information Option */
+ 0x03, 0x04, 0x30, 0xc0, 0x00, 0x27, 0x8d, 0x00,
+ 0x00, 0x09, 0x3a, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x20, 0x01, 0x0d, 0xb8, 0xc0, 0x01, 0x0d, 0xad,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ /* Recursive DNS Server Option */
+ 0x19, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c,
+ 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe, 0xef,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+ /* DNS Search List Option */
+ 0x1f, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c,
+ 0x03, 0x6c, 0x61, 0x62, 0x05, 0x69, 0x6e, 0x74,
+ 0x72, 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static sd_event_source *test_hangcheck;
+static bool test_stopped;
+static int test_fd[2];
+static sd_event_source *recv_router_advertisement;
+static struct {
+ struct in6_addr address;
+ unsigned char prefixlen;
+ uint32_t valid;
+ uint32_t preferred;
+ bool succesful;
+} prefix[] = {
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe, 0xef,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 64,
+ 500, 440, true },
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0x0b, 0x16, 0xd0, 0x0d,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 64,
+ /* indicate default valid and preferred lifetimes for the test code */
+ 0, 0, true },
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0x0b, 0x16, 0xd0, 0x0d,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 58,
+ 0, 0,
+ /* indicate that this prefix already exists */
+ false },
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0x0b, 0x16, 0xd0, 0x0d,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 120,
+ 0, 0,
+ /* indicate that this prefix already exists */
+ false },
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0x0b, 0x16, 0xd0, 0x0d,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 12,
+ 0, 0,
+ /* indicate that this prefix already exists */
+ false },
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0xc0, 0x01, 0x0d, 0xad,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 48,
+ 0, 0, true },
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0xc0, 0x01, 0x0d, 0xad,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 60,
+ 0, 0,
+ /* indicate that this prefix already exists */
+ false },
+};
+
+static const struct in6_addr test_rdnss = { { { 0x20, 0x01, 0x0d, 0xb8,
+ 0xde, 0xad, 0xbe, 0xef,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01 } } };
+static const char *test_dnssl[] = { "lab.intra",
+ NULL };
+
+static int test_rs_hangcheck(sd_event_source *s, uint64_t usec,
+ void *userdata) {
+ assert_se(false);
+
+ return 0;
+}
+
+static void test_radv_prefix(void) {
+ sd_radv_prefix *p;
+
+ printf("* %s\n", __FUNCTION__);
+
+ assert_se(sd_radv_prefix_new(&p) >= 0);
+
+ assert_se(sd_radv_prefix_set_onlink(NULL, true) < 0);
+ assert_se(sd_radv_prefix_set_onlink(p, true) >= 0);
+ assert_se(sd_radv_prefix_set_onlink(p, false) >= 0);
+
+ assert_se(sd_radv_prefix_set_address_autoconfiguration(NULL, true) < 0);
+ assert_se(sd_radv_prefix_set_address_autoconfiguration(p, true) >= 0);
+ assert_se(sd_radv_prefix_set_address_autoconfiguration(p, false) >= 0);
+
+ assert_se(sd_radv_prefix_set_valid_lifetime(NULL, true) < 0);
+ assert_se(sd_radv_prefix_set_valid_lifetime(p, ~0) >= 0);
+ assert_se(sd_radv_prefix_set_valid_lifetime(p, 42) >= 0);
+ assert_se(sd_radv_prefix_set_valid_lifetime(p, 0) >= 0);
+
+ assert_se(sd_radv_prefix_set_preferred_lifetime(NULL, true) < 0);
+ assert_se(sd_radv_prefix_set_preferred_lifetime(p, ~0) >= 0);
+ assert_se(sd_radv_prefix_set_preferred_lifetime(p, 42) >= 0);
+ assert_se(sd_radv_prefix_set_preferred_lifetime(p, 0) >= 0);
+
+ assert_se(sd_radv_prefix_set_prefix(NULL, NULL, 0) < 0);
+ assert_se(sd_radv_prefix_set_prefix(p, NULL, 0) < 0);
+
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 64) >= 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 0) < 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 1) < 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 2) < 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 3) >= 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 125) >= 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 128) >= 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 129) < 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 255) < 0);
+
+ p = sd_radv_prefix_unref(p);
+ assert_se(!p);
+}
+
+static void test_radv(void) {
+ sd_radv *ra;
+
+ printf("* %s\n", __FUNCTION__);
+
+ assert_se(sd_radv_new(&ra) >= 0);
+ assert_se(ra);
+
+ assert_se(sd_radv_set_ifindex(NULL, 0) < 0);
+ assert_se(sd_radv_set_ifindex(ra, 0) >= 0);
+ assert_se(sd_radv_set_ifindex(ra, -1) >= 0);
+ assert_se(sd_radv_set_ifindex(ra, -2) < 0);
+ assert_se(sd_radv_set_ifindex(ra, 42) >= 0);
+
+ assert_se(sd_radv_set_mac(NULL, NULL) < 0);
+ assert_se(sd_radv_set_mac(ra, NULL) >= 0);
+ assert_se(sd_radv_set_mac(ra, &mac_addr) >= 0);
+
+ assert_se(sd_radv_set_mtu(NULL, 0) < 0);
+ assert_se(sd_radv_set_mtu(ra, 0) < 0);
+ assert_se(sd_radv_set_mtu(ra, 1279) < 0);
+ assert_se(sd_radv_set_mtu(ra, 1280) >= 0);
+ assert_se(sd_radv_set_mtu(ra, ~0) >= 0);
+
+ assert_se(sd_radv_set_hop_limit(NULL, 0) < 0);
+ assert_se(sd_radv_set_hop_limit(ra, 0) >= 0);
+ assert_se(sd_radv_set_hop_limit(ra, ~0) >= 0);
+
+ assert_se(sd_radv_set_router_lifetime(NULL, 0) < 0);
+ assert_se(sd_radv_set_router_lifetime(ra, 0) >= 0);
+ assert_se(sd_radv_set_router_lifetime(ra, ~0) >= 0);
+
+ assert_se(sd_radv_set_preference(NULL, 0) < 0);
+ assert_se(sd_radv_set_preference(ra, SD_NDISC_PREFERENCE_LOW) >= 0);
+ assert_se(sd_radv_set_preference(ra, SD_NDISC_PREFERENCE_MEDIUM) >= 0);
+ assert_se(sd_radv_set_preference(ra, SD_NDISC_PREFERENCE_HIGH) >= 0);
+ assert_se(sd_radv_set_preference(ra, ~0) < 0);
+
+ assert_se(sd_radv_set_preference(ra, SD_NDISC_PREFERENCE_HIGH) >= 0);
+ assert_se(sd_radv_set_router_lifetime(ra, 42000) >= 0);
+ assert_se(sd_radv_set_router_lifetime(ra, 0) < 0);
+ assert_se(sd_radv_set_preference(ra, SD_NDISC_PREFERENCE_MEDIUM) >= 0);
+ assert_se(sd_radv_set_router_lifetime(ra, 0) >= 0);
+
+ assert_se(sd_radv_set_managed_information(NULL, true) < 0);
+ assert_se(sd_radv_set_managed_information(ra, true) >= 0);
+ assert_se(sd_radv_set_managed_information(ra, false) >= 0);
+
+ assert_se(sd_radv_set_other_information(NULL, true) < 0);
+ assert_se(sd_radv_set_other_information(ra, true) >= 0);
+ assert_se(sd_radv_set_other_information(ra, false) >= 0);
+
+ assert_se(sd_radv_set_rdnss(NULL, 0, NULL, 0) < 0);
+ assert_se(sd_radv_set_rdnss(ra, 0, NULL, 0) >= 0);
+ assert_se(sd_radv_set_rdnss(ra, 0, NULL, 128) < 0);
+ assert_se(sd_radv_set_rdnss(ra, 600, &test_rdnss, 0) >= 0);
+ assert_se(sd_radv_set_rdnss(ra, 600, &test_rdnss, 1) >= 0);
+ assert_se(sd_radv_set_rdnss(ra, 0, &test_rdnss, 1) >= 0);
+ assert_se(sd_radv_set_rdnss(ra, 0, NULL, 0) >= 0);
+
+ assert_se(sd_radv_set_dnssl(ra, 0, NULL) >= 0);
+ assert_se(sd_radv_set_dnssl(ra, 600, NULL) >= 0);
+ assert_se(sd_radv_set_dnssl(ra, 0, (char **)test_dnssl) >= 0);
+ assert_se(sd_radv_set_dnssl(ra, 600, (char **)test_dnssl) >= 0);
+
+ ra = sd_radv_unref(ra);
+ assert_se(!ra);
+}
+
+int icmp6_bind_router_solicitation(int index) {
+ return -ENOSYS;
+}
+
+int icmp6_bind_router_advertisement(int index) {
+ assert_se(index == 42);
+
+ return test_fd[1];
+}
+
+int icmp6_send_router_solicitation(int s, const struct ether_addr *ether_addr) {
+
+ return 0;
+}
+
+int icmp6_receive(int fd, void *iov_base, size_t iov_len,
+ struct in6_addr *dst, triple_timestamp *timestamp) {
+ assert_se(read (fd, iov_base, iov_len) == (ssize_t)iov_len);
+
+ if (timestamp)
+ triple_timestamp_get(timestamp);
+
+ return 0;
+}
+
+static int radv_recv(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_radv *ra = userdata;
+ unsigned char buf[168];
+ size_t i;
+
+ assert_se(read(test_fd[0], &buf, sizeof(buf)) == sizeof(buf));
+
+ /* router lifetime must be zero when test is stopped */
+ if (test_stopped) {
+ advertisement[6] = 0x00;
+ advertisement[7] = 0x00;
+ }
+
+ printf ("Received Router Advertisement with lifetime %u\n",
+ (advertisement[6] << 8) + advertisement[7]);
+
+ /* test only up to buf size, rest is not yet implemented */
+ for (i = 0; i < sizeof(buf); i++) {
+ if (!(i % 8))
+ printf("%3zd: ", i);
+
+ printf("0x%02x", buf[i]);
+
+ assert_se(buf[i] == advertisement[i]);
+
+ if ((i + 1) % 8)
+ printf(", ");
+ else
+ printf("\n");
+ }
+
+ if (test_stopped) {
+ sd_event *e;
+
+ e = sd_radv_get_event(ra);
+ sd_event_exit(e, 0);
+
+ return 0;
+ }
+
+ assert_se(sd_radv_stop(ra) >= 0);
+ test_stopped = true;
+
+ return 0;
+}
+
+static void test_ra(void) {
+ sd_event *e;
+ sd_radv *ra;
+ usec_t time_now = now(clock_boottime_or_monotonic());
+ unsigned i;
+
+ printf("* %s\n", __FUNCTION__);
+
+ assert_se(socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) >= 0);
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ assert_se(sd_radv_new(&ra) >= 0);
+ assert_se(ra);
+
+ assert_se(sd_radv_attach_event(ra, e, 0) >= 0);
+
+ assert_se(sd_radv_set_ifindex(ra, 42) >= 0);
+ assert_se(sd_radv_set_mac(ra, &mac_addr) >= 0);
+ assert_se(sd_radv_set_router_lifetime(ra, 180) >= 0);
+ assert_se(sd_radv_set_hop_limit(ra, 64) >= 0);
+ assert_se(sd_radv_set_managed_information(ra, true) >= 0);
+ assert_se(sd_radv_set_other_information(ra, true) >= 0);
+ assert_se(sd_radv_set_rdnss(ra, 60, &test_rdnss, 1) >= 0);
+ assert_se(sd_radv_set_dnssl(ra, 60, (char **)test_dnssl) >= 0);
+
+ for (i = 0; i < ELEMENTSOF(prefix); i++) {
+ sd_radv_prefix *p;
+
+ printf("Test prefix %u\n", i);
+ assert_se(sd_radv_prefix_new(&p) >= 0);
+
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[i].address,
+ prefix[i].prefixlen) >= 0);
+ if (prefix[i].valid)
+ assert_se(sd_radv_prefix_set_valid_lifetime(p, prefix[i].valid) >= 0);
+ if (prefix[i].preferred)
+ assert_se(sd_radv_prefix_set_preferred_lifetime(p, prefix[i].preferred) >= 0);
+
+ assert_se((sd_radv_add_prefix(ra, p, false) >= 0) == prefix[i].succesful);
+ assert_se(sd_radv_add_prefix(ra, p, false) < 0);
+
+ p = sd_radv_prefix_unref(p);
+ assert_se(!p);
+ }
+
+ assert_se(sd_event_add_io(e, &recv_router_advertisement, test_fd[0],
+ EPOLLIN, radv_recv, ra) >= 0);
+
+ assert_se(sd_event_add_time(e, &test_hangcheck, clock_boottime_or_monotonic(),
+ time_now + 2 *USEC_PER_SEC, 0,
+ test_rs_hangcheck, NULL) >= 0);
+
+ assert_se(sd_radv_start(ra) >= 0);
+
+ sd_event_loop(e);
+
+ test_hangcheck = sd_event_source_unref(test_hangcheck);
+
+ ra = sd_radv_unref(ra);
+ assert_se(!ra);
+
+ close(test_fd[0]);
+
+ sd_event_unref(e);
+}
+
+int main(int argc, char *argv[]) {
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_radv_prefix();
+ test_radv();
+ test_ra();
+
+ printf("* done\n");
+ return 0;
+}
diff --git a/src/libsystemd-network/test-ndisc-rs.c b/src/libsystemd-network/test-ndisc-rs.c
new file mode 100644
index 0000000..caf94d1
--- /dev/null
+++ b/src/libsystemd-network/test-ndisc-rs.c
@@ -0,0 +1,417 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+#include <arpa/inet.h>
+
+#include "sd-ndisc.h"
+
+#include "alloc-util.h"
+#include "hexdecoct.h"
+#include "icmp6-util.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "ndisc-internal.h"
+#include "tests.h"
+
+static struct ether_addr mac_addr = {
+ .ether_addr_octet = {'A', 'B', 'C', '1', '2', '3'}
+};
+
+static bool verbose = false;
+static sd_event_source *test_hangcheck;
+static int test_fd[2];
+static sd_ndisc *test_timeout_nd;
+
+typedef int (*send_ra_t)(uint8_t flags);
+static send_ra_t send_ra_function;
+
+static void router_dump(sd_ndisc_router *rt) {
+ struct in6_addr addr;
+ char buf[FORMAT_TIMESTAMP_MAX];
+ uint8_t hop_limit;
+ uint64_t t, flags;
+ uint32_t mtu;
+ uint16_t lifetime;
+ unsigned preference;
+ int r;
+
+ assert_se(rt);
+
+ log_info("--");
+ assert_se(sd_ndisc_router_get_address(rt, &addr) == -ENODATA);
+
+ assert_se(sd_ndisc_router_get_timestamp(rt, CLOCK_REALTIME, &t) >= 0);
+ log_info("Timestamp: %s", format_timestamp(buf, sizeof(buf), t));
+
+ assert_se(sd_ndisc_router_get_timestamp(rt, CLOCK_MONOTONIC, &t) >= 0);
+ log_info("Monotonic: %" PRIu64, t);
+
+ if (sd_ndisc_router_get_hop_limit(rt, &hop_limit) < 0)
+ log_info("No hop limit set");
+ else
+ log_info("Hop limit: %u", hop_limit);
+
+ assert_se(sd_ndisc_router_get_flags(rt, &flags) >= 0);
+ log_info("Flags: <%s|%s>",
+ flags & ND_RA_FLAG_OTHER ? "OTHER" : "",
+ flags & ND_RA_FLAG_MANAGED ? "MANAGED" : "");
+
+ assert_se(sd_ndisc_router_get_preference(rt, &preference) >= 0);
+ log_info("Preference: %s",
+ preference == SD_NDISC_PREFERENCE_LOW ? "low" :
+ preference == SD_NDISC_PREFERENCE_HIGH ? "high" : "medium");
+
+ assert_se(sd_ndisc_router_get_lifetime(rt, &lifetime) >= 0);
+ log_info("Lifetime: %" PRIu16, lifetime);
+
+ if (sd_ndisc_router_get_mtu(rt, &mtu) < 0)
+ log_info("No MTU set");
+ else
+ log_info("MTU: %" PRIu32, mtu);
+
+ r = sd_ndisc_router_option_rewind(rt);
+ for (;;) {
+ uint8_t type;
+
+ assert_se(r >= 0);
+
+ if (r == 0)
+ break;
+
+ assert_se(sd_ndisc_router_option_get_type(rt, &type) >= 0);
+
+ log_info(">> Option %u", type);
+
+ switch (type) {
+
+ case SD_NDISC_OPTION_SOURCE_LL_ADDRESS:
+ case SD_NDISC_OPTION_TARGET_LL_ADDRESS: {
+ _cleanup_free_ char *c = NULL;
+ const void *p;
+ size_t n;
+
+ assert_se(sd_ndisc_router_option_get_raw(rt, &p, &n) >= 0);
+ assert_se(n > 2);
+ assert_se(c = hexmem((uint8_t*) p + 2, n - 2));
+
+ log_info("Address: %s", c);
+ break;
+ }
+
+ case SD_NDISC_OPTION_PREFIX_INFORMATION: {
+ uint32_t lifetime_valid, lifetime_preferred;
+ unsigned prefix_len;
+ uint8_t pfl;
+ struct in6_addr a;
+ char buff[INET6_ADDRSTRLEN];
+
+ assert_se(sd_ndisc_router_prefix_get_valid_lifetime(rt, &lifetime_valid) >= 0);
+ log_info("Valid Lifetime: %" PRIu32, lifetime_valid);
+
+ assert_se(sd_ndisc_router_prefix_get_preferred_lifetime(rt, &lifetime_preferred) >= 0);
+ log_info("Preferred Lifetime: %" PRIu32, lifetime_preferred);
+
+ assert_se(sd_ndisc_router_prefix_get_flags(rt, &pfl) >= 0);
+ log_info("Flags: <%s|%s>",
+ pfl & ND_OPT_PI_FLAG_ONLINK ? "ONLINK" : "",
+ pfl & ND_OPT_PI_FLAG_AUTO ? "AUTO" : "");
+
+ assert_se(sd_ndisc_router_prefix_get_prefixlen(rt, &prefix_len) >= 0);
+ log_info("Prefix Length: %u", prefix_len);
+
+ assert_se(sd_ndisc_router_prefix_get_address(rt, &a) >= 0);
+ log_info("Prefix: %s", inet_ntop(AF_INET6, &a, buff, sizeof(buff)));
+
+ break;
+ }
+
+ case SD_NDISC_OPTION_RDNSS: {
+ const struct in6_addr *a;
+ uint32_t lt;
+ int n, i;
+
+ n = sd_ndisc_router_rdnss_get_addresses(rt, &a);
+ assert_se(n > 0);
+
+ for (i = 0; i < n; i++) {
+ char buff[INET6_ADDRSTRLEN];
+ log_info("DNS: %s", inet_ntop(AF_INET6, a + i, buff, sizeof(buff)));
+ }
+
+ assert_se(sd_ndisc_router_rdnss_get_lifetime(rt, &lt) >= 0);
+ log_info("Lifetime: %" PRIu32, lt);
+ break;
+ }
+
+ case SD_NDISC_OPTION_DNSSL: {
+ _cleanup_strv_free_ char **l = NULL;
+ uint32_t lt;
+ int n, i;
+
+ n = sd_ndisc_router_dnssl_get_domains(rt, &l);
+ assert_se(n > 0);
+
+ for (i = 0; i < n; i++)
+ log_info("Domain: %s", l[i]);
+
+ assert_se(sd_ndisc_router_dnssl_get_lifetime(rt, &lt) >= 0);
+ log_info("Lifetime: %" PRIu32, lt);
+ break;
+ }}
+
+ r = sd_ndisc_router_option_next(rt);
+ }
+}
+
+static int test_rs_hangcheck(sd_event_source *s, uint64_t usec,
+ void *userdata) {
+ assert_se(false);
+
+ return 0;
+}
+
+int icmp6_bind_router_solicitation(int index) {
+ assert_se(index == 42);
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) < 0)
+ return -errno;
+
+ return test_fd[0];
+}
+
+int icmp6_bind_router_advertisement(int index) {
+
+ return -ENOSYS;
+}
+
+int icmp6_receive(int fd, void *iov_base, size_t iov_len,
+ struct in6_addr *dst, triple_timestamp *timestamp) {
+ assert_se(read (fd, iov_base, iov_len) == (ssize_t)iov_len);
+
+ if (timestamp)
+ triple_timestamp_get(timestamp);
+
+ return 0;
+}
+
+static int send_ra(uint8_t flags) {
+ uint8_t advertisement[] = {
+ 0x86, 0x00, 0xde, 0x83, 0x40, 0xc0, 0x00, 0xb4,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x03, 0x04, 0x40, 0xc0, 0x00, 0x00, 0x01, 0xf4,
+ 0x00, 0x00, 0x01, 0xb8, 0x00, 0x00, 0x00, 0x00,
+ 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe, 0xef,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x19, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c,
+ 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe, 0xef,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+ 0x1f, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c,
+ 0x03, 0x6c, 0x61, 0x62, 0x05, 0x69, 0x6e, 0x74,
+ 0x72, 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x01, 0x78, 0x2b, 0xcb, 0xb3, 0x6d, 0x53,
+ };
+
+ advertisement[5] = flags;
+
+ assert_se(write(test_fd[1], advertisement, sizeof(advertisement)) ==
+ sizeof(advertisement));
+
+ if (verbose)
+ printf(" sent RA with flag 0x%02x\n", flags);
+
+ return 0;
+}
+
+int icmp6_send_router_solicitation(int s, const struct ether_addr *ether_addr) {
+ if (!send_ra_function)
+ return 0;
+
+ return send_ra_function(0);
+}
+
+static void test_callback(sd_ndisc *nd, sd_ndisc_event event, sd_ndisc_router *rt, void *userdata) {
+ sd_event *e = userdata;
+ static unsigned idx = 0;
+ uint64_t flags_array[] = {
+ 0,
+ 0,
+ 0,
+ ND_RA_FLAG_OTHER,
+ ND_RA_FLAG_MANAGED
+ };
+ uint64_t flags;
+ uint32_t mtu;
+
+ assert_se(nd);
+
+ if (event != SD_NDISC_EVENT_ROUTER)
+ return;
+
+ router_dump(rt);
+
+ assert_se(sd_ndisc_router_get_flags(rt, &flags) >= 0);
+ assert_se(flags == flags_array[idx]);
+ idx++;
+
+ if (verbose)
+ printf(" got event 0x%02" PRIx64 "\n", flags);
+
+ if (idx < ELEMENTSOF(flags_array)) {
+ send_ra(flags_array[idx]);
+ return;
+ }
+
+ assert_se(sd_ndisc_get_mtu(nd, &mtu) == -ENODATA);
+
+ sd_event_exit(e, 0);
+}
+
+static void test_rs(void) {
+ sd_event *e;
+ sd_ndisc *nd;
+ usec_t time_now = now(clock_boottime_or_monotonic());
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ send_ra_function = send_ra;
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ assert_se(sd_ndisc_new(&nd) >= 0);
+ assert_se(nd);
+
+ assert_se(sd_ndisc_attach_event(nd, e, 0) >= 0);
+
+ assert_se(sd_ndisc_set_ifindex(nd, 42) >= 0);
+ assert_se(sd_ndisc_set_mac(nd, &mac_addr) >= 0);
+ assert_se(sd_ndisc_set_callback(nd, test_callback, e) >= 0);
+
+ assert_se(sd_event_add_time(e, &test_hangcheck, clock_boottime_or_monotonic(),
+ time_now + 2 *USEC_PER_SEC, 0,
+ test_rs_hangcheck, NULL) >= 0);
+
+ assert_se(sd_ndisc_stop(nd) >= 0);
+ assert_se(sd_ndisc_start(nd) >= 0);
+ assert_se(sd_ndisc_stop(nd) >= 0);
+
+ assert_se(sd_ndisc_start(nd) >= 0);
+
+ sd_event_loop(e);
+
+ test_hangcheck = sd_event_source_unref(test_hangcheck);
+
+ nd = sd_ndisc_unref(nd);
+ assert_se(!nd);
+
+ close(test_fd[1]);
+
+ sd_event_unref(e);
+}
+
+static int test_timeout_value(uint8_t flags) {
+ static int count = 0;
+ static usec_t last = 0;
+ sd_ndisc *nd = test_timeout_nd;
+ usec_t min, max;
+ char time_string_min[FORMAT_TIMESPAN_MAX];
+ char time_string_nd[FORMAT_TIMESPAN_MAX];
+ char time_string_max[FORMAT_TIMESPAN_MAX];
+
+ assert_se(nd);
+ assert_se(nd->event);
+
+ if (++count >= 20)
+ sd_event_exit(nd->event, 0);
+
+ if (last == 0) {
+ /* initial RT = IRT + RAND*IRT */
+ min = NDISC_ROUTER_SOLICITATION_INTERVAL -
+ NDISC_ROUTER_SOLICITATION_INTERVAL / 10;
+ max = NDISC_ROUTER_SOLICITATION_INTERVAL +
+ NDISC_ROUTER_SOLICITATION_INTERVAL / 10;
+ } else {
+ /* next RT = 2*RTprev + RAND*RTprev */
+ min = 2 * last - last / 10;
+ max = 2 * last + last / 10;
+ }
+
+ /* final RT > MRT */
+ if (last * 2 > NDISC_MAX_ROUTER_SOLICITATION_INTERVAL) {
+ min = NDISC_MAX_ROUTER_SOLICITATION_INTERVAL -
+ NDISC_MAX_ROUTER_SOLICITATION_INTERVAL / 10;
+ max = NDISC_MAX_ROUTER_SOLICITATION_INTERVAL +
+ NDISC_MAX_ROUTER_SOLICITATION_INTERVAL / 10;
+ }
+
+ format_timespan(time_string_min, FORMAT_TIMESPAN_MAX,
+ min, USEC_PER_MSEC);
+ format_timespan(time_string_nd, FORMAT_TIMESPAN_MAX,
+ nd->retransmit_time, USEC_PER_MSEC);
+ format_timespan(time_string_max, FORMAT_TIMESPAN_MAX,
+ max, USEC_PER_MSEC);
+
+ log_info("backoff timeout interval %2d %s%s <= %s <= %s",
+ count,
+ (last * 2 > NDISC_MAX_ROUTER_SOLICITATION_INTERVAL)? "(max) ": "",
+ time_string_min, time_string_nd, time_string_max);
+
+ assert_se(min <= nd->retransmit_time);
+ assert_se(max >= nd->retransmit_time);
+
+ last = nd->retransmit_time;
+
+ assert_se(sd_event_source_set_time(nd->timeout_event_source, 0) >= 0);
+
+ return 0;
+}
+
+static void test_timeout(void) {
+ sd_event *e;
+ sd_ndisc *nd;
+ usec_t time_now = now(clock_boottime_or_monotonic());
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ send_ra_function = test_timeout_value;
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ assert_se(sd_ndisc_new(&nd) >= 0);
+ assert_se(nd);
+
+ test_timeout_nd = nd;
+
+ assert_se(sd_ndisc_attach_event(nd, e, 0) >= 0);
+
+ assert_se(sd_ndisc_set_ifindex(nd, 42) >= 0);
+ assert_se(sd_ndisc_set_mac(nd, &mac_addr) >= 0);
+
+ assert_se(sd_event_add_time(e, &test_hangcheck, clock_boottime_or_monotonic(),
+ time_now + 2U * USEC_PER_SEC, 0,
+ test_rs_hangcheck, NULL) >= 0);
+
+ assert_se(sd_ndisc_start(nd) >= 0);
+
+ sd_event_loop(e);
+
+ test_hangcheck = sd_event_source_unref(test_hangcheck);
+
+ nd = sd_ndisc_unref(nd);
+
+ sd_event_unref(e);
+}
+
+int main(int argc, char *argv[]) {
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_rs();
+ test_timeout();
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-sd-dhcp-lease.c b/src/libsystemd-network/test-sd-dhcp-lease.c
new file mode 100644
index 0000000..0f88180
--- /dev/null
+++ b/src/libsystemd-network/test-sd-dhcp-lease.c
@@ -0,0 +1,90 @@
+#include <errno.h>
+
+#include "dhcp-lease-internal.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+
+/* According to RFC1035 section 4.1.4, a domain name in a message can be either:
+ * - a sequence of labels ending in a zero octet
+ * - a pointer
+ * - a sequence of labels ending with a pointer
+ */
+static void test_dhcp_lease_parse_search_domains_basic(void) {
+ int r;
+ _cleanup_strv_free_ char **domains = NULL;
+ static const uint8_t optionbuf[] = {
+ 0x03, 'F', 'O', 'O', 0x03, 'B', 'A', 'R', 0x00,
+ 0x04, 'A', 'B', 'C', 'D', 0x03, 'E', 'F', 'G', 0x00,
+ };
+
+ r = dhcp_lease_parse_search_domains(optionbuf, sizeof(optionbuf), &domains);
+ assert_se(r == 2);
+ assert_se(streq(domains[0], "FOO.BAR"));
+ assert_se(streq(domains[1], "ABCD.EFG"));
+}
+
+static void test_dhcp_lease_parse_search_domains_ptr(void) {
+ int r;
+ _cleanup_strv_free_ char **domains = NULL;
+ static const uint8_t optionbuf[] = {
+ 0x03, 'F', 'O', 'O', 0x00, 0xC0, 0x00,
+ };
+
+ r = dhcp_lease_parse_search_domains(optionbuf, sizeof(optionbuf), &domains);
+ assert_se(r == 2);
+ assert_se(streq(domains[0], "FOO"));
+ assert_se(streq(domains[1], "FOO"));
+}
+
+static void test_dhcp_lease_parse_search_domains_labels_and_ptr(void) {
+ int r;
+ _cleanup_strv_free_ char **domains = NULL;
+ static const uint8_t optionbuf[] = {
+ 0x03, 'F', 'O', 'O', 0x03, 'B', 'A', 'R', 0x00,
+ 0x03, 'A', 'B', 'C', 0xC0, 0x04,
+ };
+
+ r = dhcp_lease_parse_search_domains(optionbuf, sizeof(optionbuf), &domains);
+ assert_se(r == 2);
+ assert_se(streq(domains[0], "FOO.BAR"));
+ assert_se(streq(domains[1], "ABC.BAR"));
+}
+
+/* Tests for exceptions. */
+
+static void test_dhcp_lease_parse_search_domains_no_data(void) {
+ _cleanup_strv_free_ char **domains = NULL;
+ static const uint8_t optionbuf[3] = {0, 0, 0};
+
+ assert_se(dhcp_lease_parse_search_domains(NULL, 0, &domains) == -ENODATA);
+ assert_se(dhcp_lease_parse_search_domains(optionbuf, 0, &domains) == -ENODATA);
+}
+
+static void test_dhcp_lease_parse_search_domains_loops(void) {
+ _cleanup_strv_free_ char **domains = NULL;
+ static const uint8_t optionbuf[] = {
+ 0x03, 'F', 'O', 'O', 0x00, 0x03, 'B', 'A', 'R', 0xC0, 0x06,
+ };
+
+ assert_se(dhcp_lease_parse_search_domains(optionbuf, sizeof(optionbuf), &domains) == -EBADMSG);
+}
+
+static void test_dhcp_lease_parse_search_domains_wrong_len(void) {
+ _cleanup_strv_free_ char **domains = NULL;
+ static const uint8_t optionbuf[] = {
+ 0x03, 'F', 'O', 'O', 0x03, 'B', 'A', 'R', 0x00,
+ 0x04, 'A', 'B', 'C', 'D', 0x03, 'E', 'F', 'G', 0x00,
+ };
+
+ assert_se(dhcp_lease_parse_search_domains(optionbuf, sizeof(optionbuf) - 5, &domains) == -EBADMSG);
+}
+
+int main(int argc, char *argv[]) {
+ test_dhcp_lease_parse_search_domains_basic();
+ test_dhcp_lease_parse_search_domains_ptr();
+ test_dhcp_lease_parse_search_domains_labels_and_ptr();
+ test_dhcp_lease_parse_search_domains_no_data();
+ test_dhcp_lease_parse_search_domains_loops();
+ test_dhcp_lease_parse_search_domains_wrong_len();
+}
diff --git a/src/libsystemd/disable-mempool.c b/src/libsystemd/disable-mempool.c
new file mode 100644
index 0000000..034bd24
--- /dev/null
+++ b/src/libsystemd/disable-mempool.c
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "mempool.h"
+
+const bool mempool_use_allowed = false;
diff --git a/src/libsystemd/libsystemd.pc.in b/src/libsystemd/libsystemd.pc.in
new file mode 100644
index 0000000..a010dea
--- /dev/null
+++ b/src/libsystemd/libsystemd.pc.in
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@rootlibdir@
+includedir=@includedir@
+
+Name: systemd
+Description: systemd Library
+URL: @PROJECT_URL@
+Version: @PROJECT_VERSION@
+Libs: -L${libdir} -lsystemd
+Cflags: -I${includedir}
diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym
new file mode 100644
index 0000000..a6748ce
--- /dev/null
+++ b/src/libsystemd/libsystemd.sym
@@ -0,0 +1,678 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1+
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+LIBSYSTEMD_209 {
+global:
+ /* sd-journal */
+ sd_journal_print;
+ sd_journal_printv;
+ sd_journal_send;
+ sd_journal_sendv;
+ sd_journal_stream_fd;
+ sd_journal_open;
+ sd_journal_close;
+ sd_journal_previous;
+ sd_journal_next;
+ sd_journal_previous_skip;
+ sd_journal_next_skip;
+ sd_journal_get_realtime_usec;
+ sd_journal_get_monotonic_usec;
+ sd_journal_get_data;
+ sd_journal_enumerate_data;
+ sd_journal_restart_data;
+ sd_journal_add_match;
+ sd_journal_flush_matches;
+ sd_journal_seek_head;
+ sd_journal_seek_tail;
+ sd_journal_seek_monotonic_usec;
+ sd_journal_seek_realtime_usec;
+ sd_journal_seek_cursor;
+ sd_journal_get_cursor;
+ sd_journal_get_fd;
+ sd_journal_process;
+ sd_journal_print_with_location;
+ sd_journal_printv_with_location;
+ sd_journal_send_with_location;
+ sd_journal_sendv_with_location;
+ sd_journal_get_cutoff_realtime_usec;
+ sd_journal_get_cutoff_monotonic_usec;
+ sd_journal_wait;
+ sd_journal_open_directory;
+ sd_journal_add_disjunction;
+ sd_journal_perror;
+ sd_journal_perror_with_location;
+ sd_journal_get_usage;
+ sd_journal_test_cursor;
+ sd_journal_query_unique;
+ sd_journal_enumerate_unique;
+ sd_journal_restart_unique;
+ sd_journal_get_catalog;
+ sd_journal_get_catalog_for_message_id;
+ sd_journal_set_data_threshold;
+ sd_journal_get_data_threshold;
+ sd_journal_reliable_fd;
+ sd_journal_get_events;
+ sd_journal_get_timeout;
+ sd_journal_add_conjunction;
+ sd_journal_open_files;
+ sd_journal_open_container;
+
+ /* sd-daemon */
+ sd_booted;
+ sd_is_fifo;
+ sd_is_mq;
+ sd_is_socket;
+ sd_is_socket_inet;
+ sd_is_socket_unix;
+ sd_is_special;
+ sd_listen_fds;
+ sd_notify;
+ sd_notifyf;
+ sd_watchdog_enabled;
+
+ /* sd-id128 */
+ sd_id128_to_string;
+ sd_id128_from_string;
+ sd_id128_randomize;
+ sd_id128_get_machine;
+ sd_id128_get_boot;
+
+ /* sd-login */
+ sd_get_seats;
+ sd_get_sessions;
+ sd_get_uids;
+ sd_login_monitor_flush;
+ sd_login_monitor_get_fd;
+ sd_login_monitor_new;
+ sd_login_monitor_unref;
+ sd_pid_get_owner_uid;
+ sd_pid_get_session;
+ sd_seat_can_multi_session;
+ sd_seat_get_active;
+ sd_seat_get_sessions;
+ sd_session_get_seat;
+ sd_session_get_uid;
+ sd_session_is_active;
+ sd_uid_get_seats;
+ sd_uid_get_sessions;
+ sd_uid_get_state;
+ sd_uid_is_on_seat;
+ sd_pid_get_unit;
+ sd_session_get_service;
+ sd_session_get_type;
+ sd_session_get_class;
+ sd_session_get_display;
+ sd_session_get_state;
+ sd_seat_can_tty;
+ sd_seat_can_graphical;
+ sd_session_get_tty;
+ sd_login_monitor_get_events;
+ sd_login_monitor_get_timeout;
+ sd_pid_get_user_unit;
+ sd_pid_get_machine_name;
+ sd_get_machine_names;
+ sd_pid_get_slice;
+ sd_session_get_vt;
+ sd_session_is_remote;
+ sd_session_get_remote_user;
+ sd_session_get_remote_host;
+local:
+ *;
+};
+
+LIBSYSTEMD_211 {
+global:
+ sd_machine_get_class;
+ sd_peer_get_session;
+ sd_peer_get_owner_uid;
+ sd_peer_get_unit;
+ sd_peer_get_user_unit;
+ sd_peer_get_machine_name;
+ sd_peer_get_slice;
+} LIBSYSTEMD_209;
+
+LIBSYSTEMD_213 {
+global:
+ sd_uid_get_display;
+} LIBSYSTEMD_211;
+
+LIBSYSTEMD_214 {
+global:
+ sd_pid_notify;
+ sd_pid_notifyf;
+} LIBSYSTEMD_213;
+
+LIBSYSTEMD_216 {
+global:
+ sd_machine_get_ifindices;
+} LIBSYSTEMD_214;
+
+LIBSYSTEMD_217 {
+global:
+ sd_session_get_desktop;
+} LIBSYSTEMD_216;
+
+LIBSYSTEMD_219 {
+global:
+ sd_pid_notify_with_fds;
+} LIBSYSTEMD_217;
+
+LIBSYSTEMD_220 {
+global:
+ sd_pid_get_user_slice;
+ sd_peer_get_user_slice;
+} LIBSYSTEMD_219;
+
+LIBSYSTEMD_221 {
+global:
+ /* sd-bus */
+ sd_bus_default;
+ sd_bus_default_user;
+ sd_bus_default_system;
+ sd_bus_open;
+ sd_bus_open_user;
+ sd_bus_open_system;
+ sd_bus_open_system_remote;
+ sd_bus_open_system_machine;
+ sd_bus_new;
+ sd_bus_set_address;
+ sd_bus_set_fd;
+ sd_bus_set_exec;
+ sd_bus_get_address;
+ sd_bus_set_bus_client;
+ sd_bus_is_bus_client;
+ sd_bus_set_server;
+ sd_bus_is_server;
+ sd_bus_set_anonymous;
+ sd_bus_is_anonymous;
+ sd_bus_set_trusted;
+ sd_bus_is_trusted;
+ sd_bus_set_monitor;
+ sd_bus_is_monitor;
+ sd_bus_set_description;
+ sd_bus_get_description;
+ sd_bus_negotiate_creds;
+ sd_bus_negotiate_timestamp;
+ sd_bus_negotiate_fds;
+ sd_bus_can_send;
+ sd_bus_get_creds_mask;
+ sd_bus_set_allow_interactive_authorization;
+ sd_bus_get_allow_interactive_authorization;
+ sd_bus_start;
+ sd_bus_close;
+ sd_bus_try_close;
+ sd_bus_ref;
+ sd_bus_unref;
+ sd_bus_is_open;
+ sd_bus_get_bus_id;
+ sd_bus_get_scope;
+ sd_bus_get_tid;
+ sd_bus_get_owner_creds;
+ sd_bus_send;
+ sd_bus_send_to;
+ sd_bus_call;
+ sd_bus_call_async;
+ sd_bus_get_fd;
+ sd_bus_get_events;
+ sd_bus_get_timeout;
+ sd_bus_process;
+ sd_bus_process_priority;
+ sd_bus_wait;
+ sd_bus_flush;
+ sd_bus_get_current_slot;
+ sd_bus_get_current_message;
+ sd_bus_get_current_handler;
+ sd_bus_get_current_userdata;
+ sd_bus_attach_event;
+ sd_bus_detach_event;
+ sd_bus_get_event;
+ sd_bus_add_filter;
+ sd_bus_add_match;
+ sd_bus_add_object;
+ sd_bus_add_fallback;
+ sd_bus_add_object_vtable;
+ sd_bus_add_fallback_vtable;
+ sd_bus_add_node_enumerator;
+ sd_bus_add_object_manager;
+ sd_bus_slot_ref;
+ sd_bus_slot_unref;
+ sd_bus_slot_get_bus;
+ sd_bus_slot_get_userdata;
+ sd_bus_slot_set_userdata;
+ sd_bus_slot_get_description;
+ sd_bus_slot_set_description;
+ sd_bus_slot_get_current_message;
+ sd_bus_slot_get_current_handler;
+ sd_bus_slot_get_current_userdata;
+ sd_bus_message_new_signal;
+ sd_bus_message_new_method_call;
+ sd_bus_message_new_method_return;
+ sd_bus_message_new_method_error;
+ sd_bus_message_new_method_errorf;
+ sd_bus_message_new_method_errno;
+ sd_bus_message_new_method_errnof;
+ sd_bus_message_ref;
+ sd_bus_message_unref;
+ sd_bus_message_get_type;
+ sd_bus_message_get_cookie;
+ sd_bus_message_get_reply_cookie;
+ sd_bus_message_get_priority;
+ sd_bus_message_get_expect_reply;
+ sd_bus_message_get_auto_start;
+ sd_bus_message_get_allow_interactive_authorization;
+ sd_bus_message_get_signature;
+ sd_bus_message_get_path;
+ sd_bus_message_get_interface;
+ sd_bus_message_get_member;
+ sd_bus_message_get_destination;
+ sd_bus_message_get_sender;
+ sd_bus_message_get_error;
+ sd_bus_message_get_errno;
+ sd_bus_message_get_monotonic_usec;
+ sd_bus_message_get_realtime_usec;
+ sd_bus_message_get_seqnum;
+ sd_bus_message_get_bus;
+ sd_bus_message_get_creds;
+ sd_bus_message_is_signal;
+ sd_bus_message_is_method_call;
+ sd_bus_message_is_method_error;
+ sd_bus_message_is_empty;
+ sd_bus_message_has_signature;
+ sd_bus_message_set_expect_reply;
+ sd_bus_message_set_auto_start;
+ sd_bus_message_set_allow_interactive_authorization;
+ sd_bus_message_set_destination;
+ sd_bus_message_set_priority;
+ sd_bus_message_append;
+ sd_bus_message_append_basic;
+ sd_bus_message_append_array;
+ sd_bus_message_append_array_space;
+ sd_bus_message_append_array_iovec;
+ sd_bus_message_append_array_memfd;
+ sd_bus_message_append_string_space;
+ sd_bus_message_append_string_iovec;
+ sd_bus_message_append_string_memfd;
+ sd_bus_message_append_strv;
+ sd_bus_message_open_container;
+ sd_bus_message_close_container;
+ sd_bus_message_copy;
+ sd_bus_message_read;
+ sd_bus_message_read_basic;
+ sd_bus_message_read_array;
+ sd_bus_message_read_strv;
+ sd_bus_message_skip;
+ sd_bus_message_enter_container;
+ sd_bus_message_exit_container;
+ sd_bus_message_peek_type;
+ sd_bus_message_verify_type;
+ sd_bus_message_at_end;
+ sd_bus_message_rewind;
+ sd_bus_get_unique_name;
+ sd_bus_request_name;
+ sd_bus_release_name;
+ sd_bus_list_names;
+ sd_bus_get_name_creds;
+ sd_bus_get_name_machine_id;
+ sd_bus_call_method;
+ sd_bus_call_method_async;
+ sd_bus_get_property;
+ sd_bus_get_property_trivial;
+ sd_bus_get_property_string;
+ sd_bus_get_property_strv;
+ sd_bus_set_property;
+ sd_bus_reply_method_return;
+ sd_bus_reply_method_error;
+ sd_bus_reply_method_errorf;
+ sd_bus_reply_method_errno;
+ sd_bus_reply_method_errnof;
+ sd_bus_emit_signal;
+ sd_bus_emit_properties_changed_strv;
+ sd_bus_emit_properties_changed;
+ sd_bus_emit_interfaces_added_strv;
+ sd_bus_emit_interfaces_added;
+ sd_bus_emit_interfaces_removed_strv;
+ sd_bus_emit_interfaces_removed;
+ sd_bus_query_sender_creds;
+ sd_bus_query_sender_privilege;
+ sd_bus_creds_new_from_pid;
+ sd_bus_creds_ref;
+ sd_bus_creds_unref;
+ sd_bus_creds_get_mask;
+ sd_bus_creds_get_augmented_mask;
+ sd_bus_creds_get_pid;
+ sd_bus_creds_get_ppid;
+ sd_bus_creds_get_tid;
+ sd_bus_creds_get_uid;
+ sd_bus_creds_get_euid;
+ sd_bus_creds_get_suid;
+ sd_bus_creds_get_fsuid;
+ sd_bus_creds_get_gid;
+ sd_bus_creds_get_egid;
+ sd_bus_creds_get_sgid;
+ sd_bus_creds_get_fsgid;
+ sd_bus_creds_get_supplementary_gids;
+ sd_bus_creds_get_comm;
+ sd_bus_creds_get_tid_comm;
+ sd_bus_creds_get_exe;
+ sd_bus_creds_get_cmdline;
+ sd_bus_creds_get_cgroup;
+ sd_bus_creds_get_unit;
+ sd_bus_creds_get_slice;
+ sd_bus_creds_get_user_unit;
+ sd_bus_creds_get_user_slice;
+ sd_bus_creds_get_session;
+ sd_bus_creds_get_owner_uid;
+ sd_bus_creds_has_effective_cap;
+ sd_bus_creds_has_permitted_cap;
+ sd_bus_creds_has_inheritable_cap;
+ sd_bus_creds_has_bounding_cap;
+ sd_bus_creds_get_selinux_context;
+ sd_bus_creds_get_audit_session_id;
+ sd_bus_creds_get_audit_login_uid;
+ sd_bus_creds_get_tty;
+ sd_bus_creds_get_unique_name;
+ sd_bus_creds_get_well_known_names;
+ sd_bus_creds_get_description;
+ sd_bus_error_free;
+ sd_bus_error_set;
+ sd_bus_error_setf;
+ sd_bus_error_set_const;
+ sd_bus_error_set_errno;
+ sd_bus_error_set_errnof;
+ sd_bus_error_set_errnofv;
+ sd_bus_error_get_errno;
+ sd_bus_error_copy;
+ sd_bus_error_is_set;
+ sd_bus_error_has_name;
+ sd_bus_error_add_map;
+ sd_bus_path_encode;
+ sd_bus_path_decode;
+ sd_bus_track_new;
+ sd_bus_track_ref;
+ sd_bus_track_unref;
+ sd_bus_track_get_bus;
+ sd_bus_track_get_userdata;
+ sd_bus_track_set_userdata;
+ sd_bus_track_add_sender;
+ sd_bus_track_remove_sender;
+ sd_bus_track_add_name;
+ sd_bus_track_remove_name;
+ sd_bus_track_count;
+ sd_bus_track_contains;
+ sd_bus_track_first;
+ sd_bus_track_next;
+
+ /* sd-event */
+ sd_event_default;
+ sd_event_new;
+ sd_event_ref;
+ sd_event_unref;
+ sd_event_add_io;
+ sd_event_add_time;
+ sd_event_add_signal;
+ sd_event_add_child;
+ sd_event_add_defer;
+ sd_event_add_post;
+ sd_event_add_exit;
+ sd_event_prepare;
+ sd_event_wait;
+ sd_event_dispatch;
+ sd_event_run;
+ sd_event_loop;
+ sd_event_exit;
+ sd_event_now;
+ sd_event_get_fd;
+ sd_event_get_state;
+ sd_event_get_tid;
+ sd_event_get_exit_code;
+ sd_event_set_watchdog;
+ sd_event_get_watchdog;
+ sd_event_source_ref;
+ sd_event_source_unref;
+ sd_event_source_get_event;
+ sd_event_source_get_userdata;
+ sd_event_source_set_userdata;
+ sd_event_source_set_description;
+ sd_event_source_get_description;
+ sd_event_source_set_prepare;
+ sd_event_source_get_pending;
+ sd_event_source_get_priority;
+ sd_event_source_set_priority;
+ sd_event_source_get_enabled;
+ sd_event_source_set_enabled;
+ sd_event_source_get_io_fd;
+ sd_event_source_set_io_fd;
+ sd_event_source_get_io_events;
+ sd_event_source_set_io_events;
+ sd_event_source_get_io_revents;
+ sd_event_source_get_time;
+ sd_event_source_set_time;
+ sd_event_source_set_time_accuracy;
+ sd_event_source_get_time_accuracy;
+ sd_event_source_get_time_clock;
+ sd_event_source_get_signal;
+ sd_event_source_get_child_pid;
+} LIBSYSTEMD_220;
+
+LIBSYSTEMD_222 {
+global:
+ /* sd-bus */
+ sd_bus_emit_object_added;
+ sd_bus_emit_object_removed;
+ sd_bus_flush_close_unref;
+} LIBSYSTEMD_221;
+
+LIBSYSTEMD_226 {
+global:
+ sd_pid_get_cgroup;
+ sd_peer_get_cgroup;
+} LIBSYSTEMD_222;
+
+LIBSYSTEMD_227 {
+global:
+ sd_bus_default_flush_close;
+ sd_bus_path_decode_many;
+ sd_bus_path_encode_many;
+ sd_listen_fds_with_names;
+} LIBSYSTEMD_226;
+
+LIBSYSTEMD_229 {
+global:
+ sd_journal_has_runtime_files;
+ sd_journal_has_persistent_files;
+ sd_journal_enumerate_fields;
+ sd_journal_restart_fields;
+} LIBSYSTEMD_227;
+
+LIBSYSTEMD_230 {
+global:
+ sd_journal_open_directory_fd;
+ sd_journal_open_files_fd;
+} LIBSYSTEMD_229;
+
+LIBSYSTEMD_231 {
+global:
+ sd_event_get_iteration;
+} LIBSYSTEMD_230;
+
+LIBSYSTEMD_232 {
+global:
+ sd_bus_track_set_recursive;
+ sd_bus_track_get_recursive;
+ sd_bus_track_count_name;
+ sd_bus_track_count_sender;
+ sd_bus_set_exit_on_disconnect;
+ sd_bus_get_exit_on_disconnect;
+ sd_id128_get_invocation;
+} LIBSYSTEMD_231;
+
+LIBSYSTEMD_233 {
+global:
+ sd_id128_get_machine_app_specific;
+ sd_is_socket_sockaddr;
+} LIBSYSTEMD_232;
+
+LIBSYSTEMD_234 {
+global:
+ sd_bus_message_appendv;
+} LIBSYSTEMD_233;
+
+LIBSYSTEMD_236 {
+global:
+ sd_bus_message_new;
+ sd_bus_message_seal;
+} LIBSYSTEMD_234;
+
+LIBSYSTEMD_237 {
+global:
+ sd_bus_set_watch_bind;
+ sd_bus_get_watch_bind;
+ sd_bus_request_name_async;
+ sd_bus_release_name_async;
+ sd_bus_add_match_async;
+ sd_bus_match_signal;
+ sd_bus_match_signal_async;
+ sd_bus_is_ready;
+ sd_bus_set_connected_signal;
+ sd_bus_get_connected_signal;
+ sd_bus_set_sender;
+ sd_bus_get_sender;
+ sd_bus_message_set_sender;
+ sd_event_source_get_io_fd_own;
+ sd_event_source_set_io_fd_own;
+} LIBSYSTEMD_236;
+
+LIBSYSTEMD_238 {
+global:
+ sd_bus_get_n_queued_read;
+ sd_bus_get_n_queued_write;
+} LIBSYSTEMD_237;
+
+LIBSYSTEMD_239 {
+global:
+ sd_bus_open_with_description;
+ sd_bus_open_user_with_description;
+ sd_bus_open_system_with_description;
+ sd_bus_slot_get_floating;
+ sd_bus_slot_set_floating;
+ sd_bus_slot_get_destroy_callback;
+ sd_bus_slot_set_destroy_callback;
+ sd_bus_track_get_destroy_callback;
+ sd_bus_track_set_destroy_callback;
+ sd_event_add_inotify;
+ sd_event_source_get_inotify_mask;
+ sd_event_source_set_destroy_callback;
+ sd_event_source_get_destroy_callback;
+} LIBSYSTEMD_238;
+
+LIBSYSTEMD_240 {
+global:
+ sd_bus_message_readv;
+ sd_bus_set_method_call_timeout;
+ sd_bus_get_method_call_timeout;
+
+ sd_bus_error_move;
+
+ sd_bus_set_close_on_exit;
+ sd_bus_get_close_on_exit;
+
+ sd_device_ref;
+ sd_device_unref;
+
+ sd_device_new_from_syspath;
+ sd_device_new_from_devnum;
+ sd_device_new_from_subsystem_sysname;
+ sd_device_new_from_device_id;
+
+ sd_device_get_parent;
+ sd_device_get_parent_with_subsystem_devtype;
+
+ sd_device_get_syspath;
+ sd_device_get_subsystem;
+ sd_device_get_devtype;
+ sd_device_get_devnum;
+ sd_device_get_ifindex;
+ sd_device_get_driver;
+ sd_device_get_devpath;
+ sd_device_get_devname;
+ sd_device_get_sysname;
+ sd_device_get_sysnum;
+
+ sd_device_get_is_initialized;
+ sd_device_get_usec_since_initialized;
+
+ sd_device_get_tag_first;
+ sd_device_get_tag_next;
+ sd_device_get_devlink_first;
+ sd_device_get_devlink_next;
+ sd_device_get_property_first;
+ sd_device_get_property_next;
+ sd_device_get_sysattr_first;
+ sd_device_get_sysattr_next;
+
+ sd_device_has_tag;
+ sd_device_get_property_value;
+ sd_device_get_sysattr_value;
+
+ sd_device_set_sysattr_value;
+
+ sd_device_enumerator_new;
+ sd_device_enumerator_ref;
+ sd_device_enumerator_unref;
+
+ sd_device_enumerator_get_device_first;
+ sd_device_enumerator_get_device_next;
+ sd_device_enumerator_get_subsystem_first;
+ sd_device_enumerator_get_subsystem_next;
+
+ sd_device_enumerator_add_match_subsystem;
+ sd_device_enumerator_add_match_sysattr;
+ sd_device_enumerator_add_match_property;
+ sd_device_enumerator_add_match_sysname;
+ sd_device_enumerator_add_match_tag;
+ sd_device_enumerator_add_match_parent;
+ sd_device_enumerator_allow_uninitialized;
+
+ sd_hwdb_ref;
+ sd_hwdb_unref;
+
+ sd_hwdb_new;
+
+ sd_hwdb_get;
+
+ sd_hwdb_seek;
+ sd_hwdb_enumerate;
+
+ sd_id128_get_boot_app_specific;
+
+ sd_device_monitor_new;
+ sd_device_monitor_ref;
+ sd_device_monitor_unref;
+
+ sd_device_monitor_set_receive_buffer_size;
+ sd_device_monitor_attach_event;
+ sd_device_monitor_detach_event;
+ sd_device_monitor_get_event;
+ sd_device_monitor_get_event_source;
+ sd_device_monitor_start;
+ sd_device_monitor_stop;
+
+ sd_device_monitor_filter_add_match_subsystem_devtype;
+ sd_device_monitor_filter_add_match_tag;
+ sd_device_monitor_filter_update;
+ sd_device_monitor_filter_remove;
+
+ sd_event_source_get_floating;
+ sd_event_source_set_floating;
+} LIBSYSTEMD_239;
+
+LIBSYSTEMD_241 {
+global:
+ sd_bus_close_unref;
+} LIBSYSTEMD_240;
diff --git a/src/libsystemd/meson.build b/src/libsystemd/meson.build
new file mode 100644
index 0000000..67add38
--- /dev/null
+++ b/src/libsystemd/meson.build
@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+id128_sources = files('''
+ sd-id128/id128-util.c
+ sd-id128/id128-util.h
+ sd-id128/sd-id128.c
+'''.split())
+
+sd_daemon_sources = files('sd-daemon/sd-daemon.c')
+
+sd_event_sources = files('''
+ sd-event/event-source.h
+ sd-event/event-util.c
+ sd-event/event-util.h
+ sd-event/sd-event.c
+'''.split())
+
+sd_login_sources = files('sd-login/sd-login.c')
+
+libsystemd_sources = files('''
+ sd-bus/bus-common-errors.c
+ sd-bus/bus-common-errors.h
+ sd-bus/bus-container.c
+ sd-bus/bus-container.h
+ sd-bus/bus-control.c
+ sd-bus/bus-control.h
+ sd-bus/bus-convenience.c
+ sd-bus/bus-creds.c
+ sd-bus/bus-creds.h
+ sd-bus/bus-dump.c
+ sd-bus/bus-dump.h
+ sd-bus/bus-error.c
+ sd-bus/bus-error.h
+ sd-bus/bus-gvariant.c
+ sd-bus/bus-gvariant.h
+ sd-bus/bus-internal.c
+ sd-bus/bus-internal.h
+ sd-bus/bus-introspect.c
+ sd-bus/bus-introspect.h
+ sd-bus/bus-kernel.c
+ sd-bus/bus-kernel.h
+ sd-bus/bus-match.c
+ sd-bus/bus-match.h
+ sd-bus/bus-message.c
+ sd-bus/bus-message.h
+ sd-bus/bus-objects.c
+ sd-bus/bus-objects.h
+ sd-bus/bus-protocol.h
+ sd-bus/bus-signature.c
+ sd-bus/bus-signature.h
+ sd-bus/bus-slot.c
+ sd-bus/bus-slot.h
+ sd-bus/bus-socket.c
+ sd-bus/bus-socket.h
+ sd-bus/bus-track.c
+ sd-bus/bus-track.h
+ sd-bus/bus-type.c
+ sd-bus/bus-type.h
+ sd-bus/sd-bus.c
+ sd-device/device-enumerator-private.h
+ sd-device/device-enumerator.c
+ sd-device/device-internal.h
+ sd-device/device-monitor-private.h
+ sd-device/device-monitor.c
+ sd-device/device-private.c
+ sd-device/device-private.h
+ sd-device/device-util.h
+ sd-device/sd-device.c
+ sd-hwdb/hwdb-internal.h
+ sd-hwdb/hwdb-util.c
+ sd-hwdb/hwdb-util.h
+ sd-hwdb/sd-hwdb.c
+ sd-netlink/generic-netlink.c
+ sd-netlink/local-addresses.c
+ sd-netlink/local-addresses.h
+ sd-netlink/netlink-internal.h
+ sd-netlink/netlink-message.c
+ sd-netlink/netlink-slot.c
+ sd-netlink/netlink-slot.h
+ sd-netlink/netlink-socket.c
+ sd-netlink/netlink-types.c
+ sd-netlink/netlink-types.h
+ sd-netlink/netlink-util.c
+ sd-netlink/netlink-util.h
+ sd-netlink/rtnl-message.c
+ sd-netlink/sd-netlink.c
+ sd-network/network-util.c
+ sd-network/network-util.h
+ sd-network/sd-network.c
+ sd-path/sd-path.c
+ sd-resolve/resolve-private.h
+ sd-resolve/sd-resolve.c
+ sd-utf8/sd-utf8.c
+'''.split()) + id128_sources + sd_daemon_sources + sd_event_sources + sd_login_sources
+
+disable_mempool_c = files('disable-mempool.c')
+
+libsystemd_c_args = ['-fvisibility=default']
+
+libsystemd_static = static_library(
+ 'systemd_static',
+ libsystemd_sources,
+ install : false,
+ include_directories : includes,
+ link_with : libbasic,
+ dependencies : [threads,
+ librt],
+ c_args : libsystemd_c_args)
+
+libsystemd_sym = 'src/libsystemd/libsystemd.sym'
+
+configure_file(
+ input : 'libsystemd.pc.in',
+ output : 'libsystemd.pc',
+ configuration : substs,
+ install_dir : pkgconfiglibdir == 'no' ? '' : pkgconfiglibdir)
diff --git a/src/libsystemd/sd-bus/GVARIANT-SERIALIZATION b/src/libsystemd/sd-bus/GVARIANT-SERIALIZATION
new file mode 100644
index 0000000..3110d57
--- /dev/null
+++ b/src/libsystemd/sd-bus/GVARIANT-SERIALIZATION
@@ -0,0 +1,105 @@
+How we use GVariant for serializing D-Bus messages
+--------------------------------------------------
+
+We stay close to the original dbus1 framing as possible, but make
+certain changes to adapt for GVariant. dbus1 has the following
+framing:
+
+ 1. A fixed header of "yyyyuu"
+ 2. Additional header fields of "a(yv)"
+ 3. Padding with NUL bytes to pad up to next 8byte boundary
+ 4. The body
+
+Note that the body is not padded at the end, the complete message
+hence might have a non-aligned size. Reading multiple messages at once
+will hence result in possibly unaligned messages in memory.
+
+The header consists of the following:
+
+ y Endianness, 'l' or 'B'
+ y Message Type
+ y Flags
+ y Protocol version, '1'
+ u Length of the body, i.e. the length of part 4 above
+ u 32bit Serial number
+
+ = 12 bytes
+
+This header is then followed by the fields array, whose first value is
+a 32bit array size.
+
+When using GVariant we keep the basic structure in place, only
+slightly alter the header, and define protocol version '2'. The new
+header:
+
+ y Endianness, 'l' or 'B'
+ y Message Type
+ y Flags
+ y Protocol version, '2'
+ u Reserved, must be 0
+ t 64bit Cookie
+
+ = 16 bytes
+
+This is then followed by the GVariant fields array ("a{tv}"), and
+finally the actual body as variant (v). Putting this altogether a
+packet on dbus2 hence qualifies as a fully compliant GVariant
+structure of (yyyyuta{tv}v).
+
+For details on gvariant, see:
+
+https://people.gnome.org/~desrt/gvariant-serialisation.pdf
+
+Regarding the framing of dbus2, also see:
+
+https://wiki.gnome.org/Projects/GLib/GDBus/Version2
+
+The first four bytes of the header are defined the same way for dbus1
+and dbus2. The first bytes contain the endianess field and the
+protocol version, so that the remainder of the message can be safely
+made sense of just by looking at the first 32bit.
+
+Note that the length of the body is no longer included in the header
+on dbus2! In fact, the message size must be known in advance, from the
+underlying transport in order to parse dbus2 messages, while it is
+directly included in dbus1 message headers. This change of semantics
+is an effect of GVariant's basic design.
+
+The serial number has been renamed cookie and has been extended from
+32bit to 64bit. It is recommended to avoid the higher 32bit of the
+cookie field though, to simplify compatibility with dbus1 peers. Note
+that not only the cookie/serial field in the fixed header, but also
+the reply_cookie/reply_serial additional header field has been
+increased from 32bit to 64bit, too!
+
+The header field identifiers have been extended from 8bit to
+64bit. This has been done to simplify things, and has no effect
+on the serialization size, as due to alignment for each 8bit
+header field identifier 56 bits of padding had to be added.
+
+Note that the header size changed, due to these changes. However,
+consider that on dbus1 the beginning of the fields array contains the
+32bit array size (since that is how arrays are encoded on dbus1),
+thus, if one considers that size part of the header, instead of the
+array, the size of the header on dbus1 and dbus2 stays identical, at
+16 bytes.
+
+ 0 4 8 12 16
+ Common: | E | T | F | V | ...
+
+ dbus1: | (as above) | Body Length | Serial | Fields Length | Fields array ...
+
+ gvariant: | (as above) | Reserved | Cookie | Fields array ...
+
+And that's already it.
+
+Note: To simplify parsing, valid dbus2 messages must include the entire
+ fixed header and additional header fields in a single non-memfd
+ message part. Also, the signature string of the body variant all the
+ way to the end of the message must be in a single non-memfd part
+ too. The parts for this extended header and footer can be the same
+ one, and can also continue any amount of additional body bytes.
+
+Note: The GVariant "MAYBE" type is not supported, so that messages can
+ be fully converted forth and back between dbus1 and gvariant
+ representations.
diff --git a/src/libsystemd/sd-bus/bus-common-errors.c b/src/libsystemd/sd-bus/bus-common-errors.c
new file mode 100644
index 0000000..6e5fe00
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-common-errors.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "sd-bus.h"
+
+#include "bus-common-errors.h"
+#include "bus-error.h"
+
+BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_common_errors[] = {
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_UNIT, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_UNIT_FOR_PID, ESRCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_EXISTS, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_LOAD_FAILED, EIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_UNIT_SETTING, ENOEXEC),
+ SD_BUS_ERROR_MAP(BUS_ERROR_JOB_FAILED, EREMOTEIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_JOB, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NOT_SUBSCRIBED, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_ALREADY_SUBSCRIBED, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_ONLY_BY_DEPENDENCY, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TRANSACTION_JOBS_CONFLICTING, EDEADLK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TRANSACTION_ORDER_IS_CYCLIC, EDEADLK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE, EDEADLK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_MASKED, ERFKILL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_GENERATED, EADDRNOTAVAIL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_LINKED, ELOOP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_JOB_TYPE_NOT_APPLICABLE, EBADR),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_ISOLATION, EPERM),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SHUTTING_DOWN, ECANCELED),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SCOPE_NOT_RUNNING, EHOSTDOWN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_DYNAMIC_USER, ESRCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NOT_REFERENCED, EUNATCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DISK_FULL, ENOSPC),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_MACHINE, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_IMAGE, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_MACHINE_FOR_PID, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_MACHINE_EXISTS, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_PRIVATE_NETWORKING, ENOSYS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_USER_MAPPING, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_GROUP_MAPPING, ENXIO),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_PORTABLE_IMAGE, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_PORTABLE_IMAGE_TYPE, EMEDIUMTYPE),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_SESSION, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SESSION_FOR_PID, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_USER, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_USER_FOR_PID, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_SEAT, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SESSION_NOT_ON_SEAT, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NOT_IN_CONTROL, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DEVICE_IS_TAKEN, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DEVICE_NOT_TAKEN, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_OPERATION_IN_PROGRESS, EINPROGRESS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SLEEP_VERB_NOT_SUPPORTED, EOPNOTSUPP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SESSION_BUSY, EBUSY),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_AUTOMATIC_TIME_SYNC_ENABLED, EALREADY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_NTP_SUPPORT, EOPNOTSUPP),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_PROCESS, ESRCH),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_NAME_SERVERS, ESRCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_INVALID_REPLY, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_RR, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_CNAME_LOOP, EDEADLK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_ABORTED, ECANCELED),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_SERVICE, EUNATCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNSSEC_FAILED, EHOSTUNREACH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_TRUST_ANCHOR, EHOSTUNREACH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_RR_TYPE_UNSUPPORTED, EOPNOTSUPP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_LINK, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_LINK_BUSY, EBUSY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NETWORK_DOWN, ENETDOWN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_DNSSD_SERVICE, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNSSD_SERVICE_EXISTS, EEXIST),
+
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "FORMERR", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "SERVFAIL", EHOSTDOWN),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "NXDOMAIN", ENXIO),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "NOTIMP", ENOSYS),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "REFUSED", EACCES),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "YXDOMAIN", EEXIST),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "YRRSET", EEXIST),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "NXRRSET", ENOENT),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "NOTAUTH", EACCES),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "NOTZONE", EREMOTE),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADVERS", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADKEY", EKEYREJECTED),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADTIME", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADMODE", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADNAME", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADALG", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADTRUNC", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADCOOKIE", EBADR),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_TRANSFER, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TRANSFER_IN_PROGRESS, EBUSY),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_PRODUCT_UUID, EOPNOTSUPP),
+
+ SD_BUS_ERROR_MAP_END
+};
diff --git a/src/libsystemd/sd-bus/bus-common-errors.h b/src/libsystemd/sd-bus/bus-common-errors.h
new file mode 100644
index 0000000..8339feb
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-common-errors.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "bus-error.h"
+
+#define BUS_ERROR_NO_SUCH_UNIT "org.freedesktop.systemd1.NoSuchUnit"
+#define BUS_ERROR_NO_UNIT_FOR_PID "org.freedesktop.systemd1.NoUnitForPID"
+#define BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID "org.freedesktop.systemd1.NoUnitForInvocationID"
+#define BUS_ERROR_UNIT_EXISTS "org.freedesktop.systemd1.UnitExists"
+#define BUS_ERROR_LOAD_FAILED "org.freedesktop.systemd1.LoadFailed"
+#define BUS_ERROR_BAD_UNIT_SETTING "org.freedesktop.systemd1.BadUnitSetting"
+#define BUS_ERROR_JOB_FAILED "org.freedesktop.systemd1.JobFailed"
+#define BUS_ERROR_NO_SUCH_JOB "org.freedesktop.systemd1.NoSuchJob"
+#define BUS_ERROR_NOT_SUBSCRIBED "org.freedesktop.systemd1.NotSubscribed"
+#define BUS_ERROR_ALREADY_SUBSCRIBED "org.freedesktop.systemd1.AlreadySubscribed"
+#define BUS_ERROR_ONLY_BY_DEPENDENCY "org.freedesktop.systemd1.OnlyByDependency"
+#define BUS_ERROR_TRANSACTION_JOBS_CONFLICTING "org.freedesktop.systemd1.TransactionJobsConflicting"
+#define BUS_ERROR_TRANSACTION_ORDER_IS_CYCLIC "org.freedesktop.systemd1.TransactionOrderIsCyclic"
+#define BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE "org.freedesktop.systemd1.TransactionIsDestructive"
+#define BUS_ERROR_UNIT_MASKED "org.freedesktop.systemd1.UnitMasked"
+#define BUS_ERROR_UNIT_GENERATED "org.freedesktop.systemd1.UnitGenerated"
+#define BUS_ERROR_UNIT_LINKED "org.freedesktop.systemd1.UnitLinked"
+#define BUS_ERROR_JOB_TYPE_NOT_APPLICABLE "org.freedesktop.systemd1.JobTypeNotApplicable"
+#define BUS_ERROR_NO_ISOLATION "org.freedesktop.systemd1.NoIsolation"
+#define BUS_ERROR_SHUTTING_DOWN "org.freedesktop.systemd1.ShuttingDown"
+#define BUS_ERROR_SCOPE_NOT_RUNNING "org.freedesktop.systemd1.ScopeNotRunning"
+#define BUS_ERROR_NO_SUCH_DYNAMIC_USER "org.freedesktop.systemd1.NoSuchDynamicUser"
+#define BUS_ERROR_NOT_REFERENCED "org.freedesktop.systemd1.NotReferenced"
+#define BUS_ERROR_DISK_FULL "org.freedesktop.systemd1.DiskFull"
+
+#define BUS_ERROR_NO_SUCH_MACHINE "org.freedesktop.machine1.NoSuchMachine"
+#define BUS_ERROR_NO_SUCH_IMAGE "org.freedesktop.machine1.NoSuchImage"
+#define BUS_ERROR_NO_MACHINE_FOR_PID "org.freedesktop.machine1.NoMachineForPID"
+#define BUS_ERROR_MACHINE_EXISTS "org.freedesktop.machine1.MachineExists"
+#define BUS_ERROR_NO_PRIVATE_NETWORKING "org.freedesktop.machine1.NoPrivateNetworking"
+#define BUS_ERROR_NO_SUCH_USER_MAPPING "org.freedesktop.machine1.NoSuchUserMapping"
+#define BUS_ERROR_NO_SUCH_GROUP_MAPPING "org.freedesktop.machine1.NoSuchGroupMapping"
+
+#define BUS_ERROR_NO_SUCH_PORTABLE_IMAGE "org.freedesktop.portable1.NoSuchImage"
+#define BUS_ERROR_BAD_PORTABLE_IMAGE_TYPE "org.freedesktop.portable1.BadImageType"
+
+#define BUS_ERROR_NO_SUCH_SESSION "org.freedesktop.login1.NoSuchSession"
+#define BUS_ERROR_NO_SESSION_FOR_PID "org.freedesktop.login1.NoSessionForPID"
+#define BUS_ERROR_NO_SUCH_USER "org.freedesktop.login1.NoSuchUser"
+#define BUS_ERROR_NO_USER_FOR_PID "org.freedesktop.login1.NoUserForPID"
+#define BUS_ERROR_NO_SUCH_SEAT "org.freedesktop.login1.NoSuchSeat"
+#define BUS_ERROR_SESSION_NOT_ON_SEAT "org.freedesktop.login1.SessionNotOnSeat"
+#define BUS_ERROR_NOT_IN_CONTROL "org.freedesktop.login1.NotInControl"
+#define BUS_ERROR_DEVICE_IS_TAKEN "org.freedesktop.login1.DeviceIsTaken"
+#define BUS_ERROR_DEVICE_NOT_TAKEN "org.freedesktop.login1.DeviceNotTaken"
+#define BUS_ERROR_OPERATION_IN_PROGRESS "org.freedesktop.login1.OperationInProgress"
+#define BUS_ERROR_SLEEP_VERB_NOT_SUPPORTED "org.freedesktop.login1.SleepVerbNotSupported"
+#define BUS_ERROR_SESSION_BUSY "org.freedesktop.login1.SessionBusy"
+
+#define BUS_ERROR_AUTOMATIC_TIME_SYNC_ENABLED "org.freedesktop.timedate1.AutomaticTimeSyncEnabled"
+#define BUS_ERROR_NO_NTP_SUPPORT "org.freedesktop.timedate1.NoNTPSupport"
+
+#define BUS_ERROR_NO_SUCH_PROCESS "org.freedesktop.systemd1.NoSuchProcess"
+
+#define BUS_ERROR_NO_NAME_SERVERS "org.freedesktop.resolve1.NoNameServers"
+#define BUS_ERROR_INVALID_REPLY "org.freedesktop.resolve1.InvalidReply"
+#define BUS_ERROR_NO_SUCH_RR "org.freedesktop.resolve1.NoSuchRR"
+#define BUS_ERROR_CNAME_LOOP "org.freedesktop.resolve1.CNameLoop"
+#define BUS_ERROR_ABORTED "org.freedesktop.resolve1.Aborted"
+#define BUS_ERROR_NO_SUCH_SERVICE "org.freedesktop.resolve1.NoSuchService"
+#define BUS_ERROR_DNSSEC_FAILED "org.freedesktop.resolve1.DnssecFailed"
+#define BUS_ERROR_NO_TRUST_ANCHOR "org.freedesktop.resolve1.NoTrustAnchor"
+#define BUS_ERROR_RR_TYPE_UNSUPPORTED "org.freedesktop.resolve1.ResourceRecordTypeUnsupported"
+#define BUS_ERROR_NO_SUCH_LINK "org.freedesktop.resolve1.NoSuchLink"
+#define BUS_ERROR_LINK_BUSY "org.freedesktop.resolve1.LinkBusy"
+#define BUS_ERROR_NETWORK_DOWN "org.freedesktop.resolve1.NetworkDown"
+#define BUS_ERROR_NO_SUCH_DNSSD_SERVICE "org.freedesktop.resolve1.NoSuchDnssdService"
+#define BUS_ERROR_DNSSD_SERVICE_EXISTS "org.freedesktop.resolve1.DnssdServiceExists"
+#define _BUS_ERROR_DNS "org.freedesktop.resolve1.DnsError."
+
+#define BUS_ERROR_NO_SUCH_TRANSFER "org.freedesktop.import1.NoSuchTransfer"
+#define BUS_ERROR_TRANSFER_IN_PROGRESS "org.freedesktop.import1.TransferInProgress"
+
+#define BUS_ERROR_NO_PRODUCT_UUID "org.freedesktop.hostname1.NoProductUUID"
+
+BUS_ERROR_MAP_ELF_USE(bus_common_errors);
diff --git a/src/libsystemd/sd-bus/bus-container.c b/src/libsystemd/sd-bus/bus-container.c
new file mode 100644
index 0000000..2cfeefc
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-container.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "bus-container.h"
+#include "bus-internal.h"
+#include "bus-socket.h"
+#include "fd-util.h"
+#include "process-util.h"
+#include "util.h"
+
+int bus_container_connect_socket(sd_bus *b) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1;
+ int r, error_buf = 0;
+ pid_t child;
+ ssize_t n;
+
+ assert(b);
+ assert(b->input_fd < 0);
+ assert(b->output_fd < 0);
+ assert(b->nspid > 0 || b->machine);
+
+ if (b->nspid <= 0) {
+ r = container_get_leader(b->machine, &b->nspid);
+ if (r < 0)
+ return r;
+ }
+
+ r = namespace_open(b->nspid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd);
+ if (r < 0)
+ return r;
+
+ b->input_fd = socket(b->sockaddr.sa.sa_family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (b->input_fd < 0)
+ return -errno;
+
+ b->input_fd = fd_move_above_stdio(b->input_fd);
+
+ b->output_fd = b->input_fd;
+
+ bus_socket_setup(b);
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-buscntrns)", "(sd-buscntr)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ pair[0] = safe_close(pair[0]);
+
+ r = connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size);
+ if (r < 0) {
+ /* Try to send error up */
+ error_buf = errno;
+ (void) write(pair[1], &error_buf, sizeof(error_buf));
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-buscntrns)", child, 0);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EPROTO;
+
+ n = read(pair[0], &error_buf, sizeof(error_buf));
+ if (n < 0)
+ return -errno;
+
+ if (n > 0) {
+ if (n != sizeof(error_buf))
+ return -EIO;
+
+ if (error_buf < 0)
+ return -EIO;
+
+ if (error_buf == EINPROGRESS)
+ return 1;
+
+ if (error_buf > 0)
+ return -error_buf;
+ }
+
+ return bus_socket_start_auth(b);
+}
diff --git a/src/libsystemd/sd-bus/bus-container.h b/src/libsystemd/sd-bus/bus-container.h
new file mode 100644
index 0000000..f6ef688
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-container.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+int bus_container_connect_socket(sd_bus *b);
diff --git a/src/libsystemd/sd-bus/bus-control.c b/src/libsystemd/sd-bus/bus-control.c
new file mode 100644
index 0000000..7775d2b
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-control.c
@@ -0,0 +1,930 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+#include <valgrind/memcheck.h>
+#endif
+
+#include <errno.h>
+#include <stddef.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-control.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "capability-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+_public_ int sd_bus_get_unique_name(sd_bus *bus, const char **unique) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(unique, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ return r;
+
+ *unique = bus->unique_name;
+ return 0;
+}
+
+static int validate_request_name_parameters(
+ sd_bus *bus,
+ const char *name,
+ uint64_t flags,
+ uint32_t *ret_param) {
+
+ uint32_t param = 0;
+
+ assert(bus);
+ assert(name);
+ assert(ret_param);
+
+ assert_return(!(flags & ~(SD_BUS_NAME_ALLOW_REPLACEMENT|SD_BUS_NAME_REPLACE_EXISTING|SD_BUS_NAME_QUEUE)), -EINVAL);
+ assert_return(service_name_is_valid(name), -EINVAL);
+ assert_return(name[0] != ':', -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ /* Don't allow requesting the special driver and local names */
+ if (STR_IN_SET(name, "org.freedesktop.DBus", "org.freedesktop.DBus.Local"))
+ return -EINVAL;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (flags & SD_BUS_NAME_ALLOW_REPLACEMENT)
+ param |= BUS_NAME_ALLOW_REPLACEMENT;
+ if (flags & SD_BUS_NAME_REPLACE_EXISTING)
+ param |= BUS_NAME_REPLACE_EXISTING;
+ if (!(flags & SD_BUS_NAME_QUEUE))
+ param |= BUS_NAME_DO_NOT_QUEUE;
+
+ *ret_param = param;
+
+ return 0;
+}
+
+_public_ int sd_bus_request_name(
+ sd_bus *bus,
+ const char *name,
+ uint64_t flags) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ uint32_t ret, param = 0;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ r = validate_request_name_parameters(bus, name, flags, &param);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "RequestName",
+ NULL,
+ &reply,
+ "su",
+ name,
+ param);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "u", &ret);
+ if (r < 0)
+ return r;
+
+ switch (ret) {
+
+ case BUS_NAME_ALREADY_OWNER:
+ return -EALREADY;
+
+ case BUS_NAME_EXISTS:
+ return -EEXIST;
+
+ case BUS_NAME_IN_QUEUE:
+ return 0;
+
+ case BUS_NAME_PRIMARY_OWNER:
+ return 1;
+ }
+
+ return -EIO;
+}
+
+static int default_request_name_handler(
+ sd_bus_message *m,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ uint32_t ret;
+ int r;
+
+ assert(m);
+
+ if (sd_bus_message_is_method_error(m, NULL)) {
+ log_debug_errno(sd_bus_message_get_errno(m),
+ "Unable to request name, failing connection: %s",
+ sd_bus_message_get_error(m)->message);
+
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+ }
+
+ r = sd_bus_message_read(m, "u", &ret);
+ if (r < 0)
+ return r;
+
+ switch (ret) {
+
+ case BUS_NAME_ALREADY_OWNER:
+ log_debug("Already owner of requested service name, ignoring.");
+ return 1;
+
+ case BUS_NAME_IN_QUEUE:
+ log_debug("In queue for requested service name.");
+ return 1;
+
+ case BUS_NAME_PRIMARY_OWNER:
+ log_debug("Successfully acquired requested service name.");
+ return 1;
+
+ case BUS_NAME_EXISTS:
+ log_debug("Requested service name already owned, failing connection.");
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+ }
+
+ log_debug("Unexpected response from RequestName(), failing connection.");
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+}
+
+_public_ int sd_bus_request_name_async(
+ sd_bus *bus,
+ sd_bus_slot **ret_slot,
+ const char *name,
+ uint64_t flags,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ uint32_t param = 0;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ r = validate_request_name_parameters(bus, name, flags, &param);
+ if (r < 0)
+ return r;
+
+ return sd_bus_call_method_async(
+ bus,
+ ret_slot,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "RequestName",
+ callback ?: default_request_name_handler,
+ userdata,
+ "su",
+ name,
+ param);
+}
+
+static int validate_release_name_parameters(
+ sd_bus *bus,
+ const char *name) {
+
+ assert(bus);
+ assert(name);
+
+ assert_return(service_name_is_valid(name), -EINVAL);
+ assert_return(name[0] != ':', -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ /* Don't allow releasing the special driver and local names */
+ if (STR_IN_SET(name, "org.freedesktop.DBus", "org.freedesktop.DBus.Local"))
+ return -EINVAL;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ return 0;
+}
+
+_public_ int sd_bus_release_name(
+ sd_bus *bus,
+ const char *name) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ uint32_t ret;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ r = validate_release_name_parameters(bus, name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "ReleaseName",
+ NULL,
+ &reply,
+ "s",
+ name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "u", &ret);
+ if (r < 0)
+ return r;
+
+ switch (ret) {
+
+ case BUS_NAME_NON_EXISTENT:
+ return -ESRCH;
+
+ case BUS_NAME_NOT_OWNER:
+ return -EADDRINUSE;
+
+ case BUS_NAME_RELEASED:
+ return 0;
+ }
+
+ return -EIO;
+}
+
+static int default_release_name_handler(
+ sd_bus_message *m,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ uint32_t ret;
+ int r;
+
+ assert(m);
+
+ if (sd_bus_message_is_method_error(m, NULL)) {
+ log_debug_errno(sd_bus_message_get_errno(m),
+ "Unable to release name, failing connection: %s",
+ sd_bus_message_get_error(m)->message);
+
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+ }
+
+ r = sd_bus_message_read(m, "u", &ret);
+ if (r < 0)
+ return r;
+
+ switch (ret) {
+
+ case BUS_NAME_NON_EXISTENT:
+ log_debug("Name asked to release is not taken currently, ignoring.");
+ return 1;
+
+ case BUS_NAME_NOT_OWNER:
+ log_debug("Name asked to release is owned by somebody else, ignoring.");
+ return 1;
+
+ case BUS_NAME_RELEASED:
+ log_debug("Name successfully released.");
+ return 1;
+ }
+
+ log_debug("Unexpected response from ReleaseName(), failing connection.");
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+}
+
+_public_ int sd_bus_release_name_async(
+ sd_bus *bus,
+ sd_bus_slot **ret_slot,
+ const char *name,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ r = validate_release_name_parameters(bus, name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_call_method_async(
+ bus,
+ ret_slot,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "ReleaseName",
+ callback ?: default_release_name_handler,
+ userdata,
+ "s",
+ name);
+}
+
+_public_ int sd_bus_list_names(sd_bus *bus, char ***acquired, char ***activatable) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_strv_free_ char **x = NULL, **y = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(acquired || activatable, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (acquired) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "ListNames",
+ NULL,
+ &reply,
+ NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(reply, &x);
+ if (r < 0)
+ return r;
+
+ reply = sd_bus_message_unref(reply);
+ }
+
+ if (activatable) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "ListActivatableNames",
+ NULL,
+ &reply,
+ NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(reply, &y);
+ if (r < 0)
+ return r;
+
+ *activatable = TAKE_PTR(y);
+ }
+
+ if (acquired)
+ *acquired = TAKE_PTR(x);
+
+ return 0;
+}
+
+_public_ int sd_bus_get_name_creds(
+ sd_bus *bus,
+ const char *name,
+ uint64_t mask,
+ sd_bus_creds **creds) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply_unique = NULL, *reply = NULL;
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL;
+ const char *unique;
+ pid_t pid = 0;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return((mask & ~SD_BUS_CREDS_AUGMENT) <= _SD_BUS_CREDS_ALL, -EOPNOTSUPP);
+ assert_return(mask == 0 || creds, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(service_name_is_valid(name), -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ /* Turn off augmenting if this isn't a local connection. If the connection is not local, then /proc is not
+ * going to match. */
+ if (!bus->is_local)
+ mask &= ~SD_BUS_CREDS_AUGMENT;
+
+ if (streq(name, "org.freedesktop.DBus.Local"))
+ return -EINVAL;
+
+ if (streq(name, "org.freedesktop.DBus"))
+ return sd_bus_get_owner_creds(bus, mask, creds);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* If the name is unique anyway, we can use it directly */
+ unique = name[0] == ':' ? name : NULL;
+
+ /* Only query the owner if the caller wants to know it and the name is not unique anyway, or if the caller just
+ * wants to check whether a name exists */
+ if ((FLAGS_SET(mask, SD_BUS_CREDS_UNIQUE_NAME) && !unique) || mask == 0) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetNameOwner",
+ NULL,
+ &reply_unique,
+ "s",
+ name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply_unique, "s", &unique);
+ if (r < 0)
+ return r;
+ }
+
+ if (mask != 0) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool need_pid, need_uid, need_selinux, need_separate_calls;
+
+ c = bus_creds_new();
+ if (!c)
+ return -ENOMEM;
+
+ if ((mask & SD_BUS_CREDS_UNIQUE_NAME) && unique) {
+ c->unique_name = strdup(unique);
+ if (!c->unique_name)
+ return -ENOMEM;
+
+ c->mask |= SD_BUS_CREDS_UNIQUE_NAME;
+ }
+
+ need_pid = (mask & SD_BUS_CREDS_PID) ||
+ ((mask & SD_BUS_CREDS_AUGMENT) &&
+ (mask & (SD_BUS_CREDS_UID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID|
+ SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID|
+ SD_BUS_CREDS_SUPPLEMENTARY_GIDS|
+ SD_BUS_CREDS_COMM|SD_BUS_CREDS_EXE|SD_BUS_CREDS_CMDLINE|
+ SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID|
+ SD_BUS_CREDS_EFFECTIVE_CAPS|SD_BUS_CREDS_PERMITTED_CAPS|SD_BUS_CREDS_INHERITABLE_CAPS|SD_BUS_CREDS_BOUNDING_CAPS|
+ SD_BUS_CREDS_SELINUX_CONTEXT|
+ SD_BUS_CREDS_AUDIT_SESSION_ID|SD_BUS_CREDS_AUDIT_LOGIN_UID)));
+ need_uid = mask & SD_BUS_CREDS_EUID;
+ need_selinux = mask & SD_BUS_CREDS_SELINUX_CONTEXT;
+
+ if (need_pid + need_uid + need_selinux > 1) {
+
+ /* If we need more than one of the credentials, then use GetConnectionCredentials() */
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetConnectionCredentials",
+ &error,
+ &reply,
+ "s",
+ unique ?: name);
+
+ if (r < 0) {
+
+ if (!sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD))
+ return r;
+
+ /* If we got an unknown method error, fall back to the invidual calls... */
+ need_separate_calls = true;
+ sd_bus_error_free(&error);
+
+ } else {
+ need_separate_calls = false;
+
+ r = sd_bus_message_enter_container(reply, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *m;
+
+ r = sd_bus_message_enter_container(reply, 'e', "sv");
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(reply, "s", &m);
+ if (r < 0)
+ return r;
+
+ if (need_uid && streq(m, "UnixUserID")) {
+ uint32_t u;
+
+ r = sd_bus_message_read(reply, "v", "u", &u);
+ if (r < 0)
+ return r;
+
+ c->euid = u;
+ c->mask |= SD_BUS_CREDS_EUID;
+
+ } else if (need_pid && streq(m, "ProcessID")) {
+ uint32_t p;
+
+ r = sd_bus_message_read(reply, "v", "u", &p);
+ if (r < 0)
+ return r;
+
+ pid = p;
+ if (mask & SD_BUS_CREDS_PID) {
+ c->pid = p;
+ c->mask |= SD_BUS_CREDS_PID;
+ }
+
+ } else if (need_selinux && streq(m, "LinuxSecurityLabel")) {
+ const void *p = NULL;
+ size_t sz = 0;
+
+ r = sd_bus_message_enter_container(reply, 'v', "ay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(reply, 'y', &p, &sz);
+ if (r < 0)
+ return r;
+
+ free(c->label);
+ c->label = strndup(p, sz);
+ if (!c->label)
+ return -ENOMEM;
+
+ c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_bus_message_skip(reply, "v");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+
+ if (need_pid && pid == 0)
+ return -EPROTO;
+ }
+
+ } else /* When we only need a single field, then let's use separate calls */
+ need_separate_calls = true;
+
+ if (need_separate_calls) {
+ if (need_pid) {
+ uint32_t u;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetConnectionUnixProcessID",
+ NULL,
+ &reply,
+ "s",
+ unique ?: name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "u", &u);
+ if (r < 0)
+ return r;
+
+ pid = u;
+ if (mask & SD_BUS_CREDS_PID) {
+ c->pid = u;
+ c->mask |= SD_BUS_CREDS_PID;
+ }
+
+ reply = sd_bus_message_unref(reply);
+ }
+
+ if (need_uid) {
+ uint32_t u;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetConnectionUnixUser",
+ NULL,
+ &reply,
+ "s",
+ unique ?: name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "u", &u);
+ if (r < 0)
+ return r;
+
+ c->euid = u;
+ c->mask |= SD_BUS_CREDS_EUID;
+
+ reply = sd_bus_message_unref(reply);
+ }
+
+ if (need_selinux) {
+ const void *p = NULL;
+ size_t sz = 0;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetConnectionSELinuxSecurityContext",
+ &error,
+ &reply,
+ "s",
+ unique ?: name);
+ if (r < 0) {
+ if (!sd_bus_error_has_name(&error, "org.freedesktop.DBus.Error.SELinuxSecurityContextUnknown"))
+ return r;
+
+ /* no data is fine */
+ } else {
+ r = sd_bus_message_read_array(reply, 'y', &p, &sz);
+ if (r < 0)
+ return r;
+
+ c->label = memdup_suffix0(p, sz);
+ if (!c->label)
+ return -ENOMEM;
+
+ c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+ }
+ }
+
+ r = bus_creds_add_more(c, mask, pid, 0);
+ if (r < 0)
+ return r;
+ }
+
+ if (creds)
+ *creds = TAKE_PTR(c);
+
+ return 0;
+}
+
+_public_ int sd_bus_get_owner_creds(sd_bus *bus, uint64_t mask, sd_bus_creds **ret) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL;
+ bool do_label, do_groups;
+ pid_t pid = 0;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return((mask & ~SD_BUS_CREDS_AUGMENT) <= _SD_BUS_CREDS_ALL, -EOPNOTSUPP);
+ assert_return(ret, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (!bus->is_local)
+ mask &= ~SD_BUS_CREDS_AUGMENT;
+
+ do_label = bus->label && (mask & SD_BUS_CREDS_SELINUX_CONTEXT);
+ do_groups = bus->n_groups != (size_t) -1 && (mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS);
+
+ /* Avoid allocating anything if we have no chance of returning useful data */
+ if (!bus->ucred_valid && !do_label && !do_groups)
+ return -ENODATA;
+
+ c = bus_creds_new();
+ if (!c)
+ return -ENOMEM;
+
+ if (bus->ucred_valid) {
+ if (pid_is_valid(bus->ucred.pid)) {
+ pid = c->pid = bus->ucred.pid;
+ c->mask |= SD_BUS_CREDS_PID & mask;
+ }
+
+ if (uid_is_valid(bus->ucred.uid)) {
+ c->euid = bus->ucred.uid;
+ c->mask |= SD_BUS_CREDS_EUID & mask;
+ }
+
+ if (gid_is_valid(bus->ucred.gid)) {
+ c->egid = bus->ucred.gid;
+ c->mask |= SD_BUS_CREDS_EGID & mask;
+ }
+ }
+
+ if (do_label) {
+ c->label = strdup(bus->label);
+ if (!c->label)
+ return -ENOMEM;
+
+ c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+
+ if (do_groups) {
+ c->supplementary_gids = newdup(gid_t, bus->groups, bus->n_groups);
+ if (!c->supplementary_gids)
+ return -ENOMEM;
+
+ c->n_supplementary_gids = bus->n_groups;
+
+ c->mask |= SD_BUS_CREDS_SUPPLEMENTARY_GIDS;
+ }
+
+ r = bus_creds_add_more(c, mask, pid, 0);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(c);
+
+ return 0;
+}
+
+#define append_eavesdrop(bus, m) \
+ ((bus)->is_monitor \
+ ? (isempty(m) ? "eavesdrop='true'" : strjoina((m), ",eavesdrop='true'")) \
+ : (m))
+
+int bus_add_match_internal(
+ sd_bus *bus,
+ const char *match) {
+
+ const char *e;
+
+ assert(bus);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ e = append_eavesdrop(bus, match);
+
+ return sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "AddMatch",
+ NULL,
+ NULL,
+ "s",
+ e);
+}
+int bus_add_match_internal_async(
+ sd_bus *bus,
+ sd_bus_slot **ret_slot,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ const char *e;
+
+ assert(bus);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ e = append_eavesdrop(bus, match);
+
+ return sd_bus_call_method_async(
+ bus,
+ ret_slot,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "AddMatch",
+ callback,
+ userdata,
+ "s",
+ e);
+}
+
+int bus_remove_match_internal(
+ sd_bus *bus,
+ const char *match) {
+
+ const char *e;
+
+ assert(bus);
+ assert(match);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ e = append_eavesdrop(bus, match);
+
+ /* Fire and forget */
+
+ return sd_bus_call_method_async(
+ bus,
+ NULL,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "RemoveMatch",
+ NULL,
+ NULL,
+ "s",
+ e);
+}
+
+_public_ int sd_bus_get_name_machine_id(sd_bus *bus, const char *name, sd_id128_t *machine) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL;
+ const char *mid;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(machine, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(service_name_is_valid(name), -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (streq_ptr(name, bus->unique_name))
+ return sd_id128_get_machine(machine);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ name,
+ "/",
+ "org.freedesktop.DBus.Peer",
+ "GetMachineId");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_set_auto_start(m, false);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(bus, m, 0, NULL, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "s", &mid);
+ if (r < 0)
+ return r;
+
+ return sd_id128_from_string(mid, machine);
+}
diff --git a/src/libsystemd/sd-bus/bus-control.h b/src/libsystemd/sd-bus/bus-control.h
new file mode 100644
index 0000000..3fb52b6
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-control.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+int bus_add_match_internal(sd_bus *bus, const char *match);
+int bus_add_match_internal_async(sd_bus *bus, sd_bus_slot **ret, const char *match, sd_bus_message_handler_t callback, void *userdata);
+
+int bus_remove_match_internal(sd_bus *bus, const char *match);
diff --git a/src/libsystemd/sd-bus/bus-convenience.c b/src/libsystemd/sd-bus/bus-convenience.c
new file mode 100644
index 0000000..c4d4016
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-convenience.c
@@ -0,0 +1,669 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-signature.h"
+#include "bus-type.h"
+#include "bus-util.h"
+#include "string-util.h"
+
+_public_ int sd_bus_emit_signal(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *member,
+ const char *types, ...) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ r = sd_bus_message_new_signal(bus, &m, path, interface, member);
+ if (r < 0)
+ return r;
+
+ if (!isempty(types)) {
+ va_list ap;
+
+ va_start(ap, types);
+ r = sd_bus_message_appendv(m, types, ap);
+ va_end(ap);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+_public_ int sd_bus_call_method_async(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ void *userdata,
+ const char *types, ...) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ r = sd_bus_message_new_method_call(bus, &m, destination, path, interface, member);
+ if (r < 0)
+ return r;
+
+ if (!isempty(types)) {
+ va_list ap;
+
+ va_start(ap, types);
+ r = sd_bus_message_appendv(m, types, ap);
+ va_end(ap);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_call_async(bus, slot, m, callback, userdata, 0);
+}
+
+_public_ int sd_bus_call_method(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *types, ...) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_message_new_method_call(bus, &m, destination, path, interface, member);
+ if (r < 0)
+ goto fail;
+
+ if (!isempty(types)) {
+ va_list ap;
+
+ va_start(ap, types);
+ r = sd_bus_message_appendv(m, types, ap);
+ va_end(ap);
+ if (r < 0)
+ goto fail;
+ }
+
+ return sd_bus_call(bus, m, 0, error, reply);
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_reply_method_return(
+ sd_bus_message *call,
+ const char *types, ...) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ r = sd_bus_message_new_method_return(call, &m);
+ if (r < 0)
+ return r;
+
+ if (!isempty(types)) {
+ va_list ap;
+
+ va_start(ap, types);
+ r = sd_bus_message_appendv(m, types, ap);
+ va_end(ap);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_send(call->bus, m, NULL);
+}
+
+_public_ int sd_bus_reply_method_error(
+ sd_bus_message *call,
+ const sd_bus_error *e) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(sd_bus_error_is_set(e), -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ r = sd_bus_message_new_method_error(call, &m, e);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(call->bus, m, NULL);
+}
+
+_public_ int sd_bus_reply_method_errorf(
+ sd_bus_message *call,
+ const char *name,
+ const char *format,
+ ...) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ va_list ap;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ va_start(ap, format);
+ bus_error_setfv(&error, name, format, ap);
+ va_end(ap);
+
+ return sd_bus_reply_method_error(call, &error);
+}
+
+_public_ int sd_bus_reply_method_errno(
+ sd_bus_message *call,
+ int error,
+ const sd_bus_error *p) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ if (sd_bus_error_is_set(p))
+ return sd_bus_reply_method_error(call, p);
+
+ sd_bus_error_set_errno(&berror, error);
+
+ return sd_bus_reply_method_error(call, &berror);
+}
+
+_public_ int sd_bus_reply_method_errnof(
+ sd_bus_message *call,
+ int error,
+ const char *format,
+ ...) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL;
+ va_list ap;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ va_start(ap, format);
+ sd_bus_error_set_errnofv(&berror, error, format, ap);
+ va_end(ap);
+
+ return sd_bus_reply_method_error(call, &berror);
+}
+
+_public_ int sd_bus_get_property(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *type) {
+
+ sd_bus_message *rep = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(reply, -EINVAL, error);
+ bus_assert_return(signature_is_single(type, false), -EINVAL, error);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &rep, "ss", strempty(interface), member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(rep, 'v', type);
+ if (r < 0) {
+ sd_bus_message_unref(rep);
+ goto fail;
+ }
+
+ *reply = rep;
+ return 0;
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_get_property_trivial(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ char type, void *ptr) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(bus_type_is_trivial(type), -EINVAL, error);
+ bus_assert_return(ptr, -EINVAL, error);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &reply, "ss", strempty(interface), member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, 'v', CHAR_TO_STR(type));
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_basic(reply, type, ptr);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_get_property_string(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ char **ret) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *s;
+ char *n;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(ret, -EINVAL, error);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &reply, "ss", strempty(interface), member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, 'v', "s");
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_basic(reply, 's', &s);
+ if (r < 0)
+ goto fail;
+
+ n = strdup(s);
+ if (!n) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ *ret = n;
+ return 0;
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_get_property_strv(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ char ***ret) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(ret, -EINVAL, error);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &reply, "ss", strempty(interface), member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, 'v', NULL);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_strv(reply, ret);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_set_property(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ const char *type, ...) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ va_list ap;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(signature_is_single(type, false), -EINVAL, error);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_message_new_method_call(bus, &m, destination, path, "org.freedesktop.DBus.Properties", "Set");
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_append(m, "ss", strempty(interface), member);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_open_container(m, 'v', type);
+ if (r < 0)
+ goto fail;
+
+ va_start(ap, type);
+ r = sd_bus_message_appendv(m, type, ap);
+ va_end(ap);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ goto fail;
+
+ return sd_bus_call(bus, m, 0, error, NULL);
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_query_sender_creds(sd_bus_message *call, uint64_t mask, sd_bus_creds **creds) {
+ sd_bus_creds *c;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ c = sd_bus_message_get_creds(call);
+
+ /* All data we need? */
+ if (c && (mask & ~c->mask) == 0) {
+ *creds = sd_bus_creds_ref(c);
+ return 0;
+ }
+
+ /* No data passed? Or not enough data passed to retrieve the missing bits? */
+ if (!c || !(c->mask & SD_BUS_CREDS_PID)) {
+ /* We couldn't read anything from the call, let's try
+ * to get it from the sender or peer. */
+
+ if (call->sender)
+ /* There's a sender, but the creds are missing. */
+ return sd_bus_get_name_creds(call->bus, call->sender, mask, creds);
+ else
+ /* There's no sender. For direct connections
+ * the credentials of the AF_UNIX peer matter,
+ * which may be queried via sd_bus_get_owner_creds(). */
+ return sd_bus_get_owner_creds(call->bus, mask, creds);
+ }
+
+ return bus_creds_extend_by_pid(c, mask, creds);
+}
+
+_public_ int sd_bus_query_sender_privilege(sd_bus_message *call, int capability) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ uid_t our_uid;
+ bool know_caps = false;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (capability >= 0) {
+
+ r = sd_bus_query_sender_creds(call, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS, &creds);
+ if (r < 0)
+ return r;
+
+ /* We cannot use augmented caps for authorization,
+ * since then data is acquired raceful from
+ * /proc. This can never actually happen, but let's
+ * better be safe than sorry, and do an extra check
+ * here. */
+ assert_return((sd_bus_creds_get_augmented_mask(creds) & SD_BUS_CREDS_EFFECTIVE_CAPS) == 0, -EPERM);
+
+ r = sd_bus_creds_has_effective_cap(creds, capability);
+ if (r > 0)
+ return 1;
+ if (r == 0)
+ know_caps = true;
+ } else {
+ r = sd_bus_query_sender_creds(call, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+ }
+
+ /* Now, check the UID, but only if the capability check wasn't
+ * sufficient */
+ our_uid = getuid();
+ if (our_uid != 0 || !know_caps || capability < 0) {
+ uid_t sender_uid;
+
+ /* We cannot use augmented uid/euid for authorization,
+ * since then data is acquired raceful from
+ * /proc. This can never actually happen, but let's
+ * better be safe than sorry, and do an extra check
+ * here. */
+ assert_return((sd_bus_creds_get_augmented_mask(creds) & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID)) == 0, -EPERM);
+
+ /* Try to use the EUID, if we have it. */
+ r = sd_bus_creds_get_euid(creds, &sender_uid);
+ if (r < 0)
+ r = sd_bus_creds_get_uid(creds, &sender_uid);
+
+ if (r >= 0) {
+ /* Sender has same UID as us, then let's grant access */
+ if (sender_uid == our_uid)
+ return 1;
+
+ /* Sender is root, we are not root. */
+ if (our_uid != 0 && sender_uid == 0)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+#define make_expression(sender, path, interface, member) \
+ strjoina( \
+ "type='signal'", \
+ sender ? ",sender='" : "", \
+ sender ?: "", \
+ sender ? "'" : "", \
+ path ? ",path='" : "", \
+ path ?: "", \
+ path ? "'" : "", \
+ interface ? ",interface='" : "", \
+ interface ?: "", \
+ interface ? "'" : "", \
+ member ? ",member='" : "", \
+ member ?: "", \
+ member ? "'" : "" \
+ )
+
+_public_ int sd_bus_match_signal(
+ sd_bus *bus,
+ sd_bus_slot **ret,
+ const char *sender,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ const char *expression;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(!sender || service_name_is_valid(sender), -EINVAL);
+ assert_return(!path || object_path_is_valid(path), -EINVAL);
+ assert_return(!interface || interface_name_is_valid(interface), -EINVAL);
+ assert_return(!member || member_name_is_valid(member), -EINVAL);
+
+ expression = make_expression(sender, path, interface, member);
+
+ return sd_bus_add_match(bus, ret, expression, callback, userdata);
+}
+
+_public_ int sd_bus_match_signal_async(
+ sd_bus *bus,
+ sd_bus_slot **ret,
+ const char *sender,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ sd_bus_message_handler_t install_callback,
+ void *userdata) {
+
+ const char *expression;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(!sender || service_name_is_valid(sender), -EINVAL);
+ assert_return(!path || object_path_is_valid(path), -EINVAL);
+ assert_return(!interface || interface_name_is_valid(interface), -EINVAL);
+ assert_return(!member || member_name_is_valid(member), -EINVAL);
+
+ expression = make_expression(sender, path, interface, member);
+
+ return sd_bus_add_match_async(bus, ret, expression, callback, install_callback, userdata);
+}
diff --git a/src/libsystemd/sd-bus/bus-creds.c b/src/libsystemd/sd-bus/bus-creds.c
new file mode 100644
index 0000000..81d97ff
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-creds.c
@@ -0,0 +1,1338 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <linux/capability.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "bus-creds.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "capability-util.h"
+#include "cgroup-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "hexdecoct.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-util.h"
+#include "util.h"
+
+enum {
+ CAP_OFFSET_INHERITABLE = 0,
+ CAP_OFFSET_PERMITTED = 1,
+ CAP_OFFSET_EFFECTIVE = 2,
+ CAP_OFFSET_BOUNDING = 3
+};
+
+void bus_creds_done(sd_bus_creds *c) {
+ assert(c);
+
+ /* For internal bus cred structures that are allocated by
+ * something else */
+
+ free(c->session);
+ free(c->unit);
+ free(c->user_unit);
+ free(c->slice);
+ free(c->user_slice);
+ free(c->unescaped_description);
+ free(c->supplementary_gids);
+ free(c->tty);
+
+ free(c->well_known_names); /* note that this is an strv, but
+ * we only free the array, not the
+ * strings the array points to. The
+ * full strv we only free if
+ * c->allocated is set, see
+ * below. */
+
+ strv_free(c->cmdline_array);
+}
+
+_public_ sd_bus_creds *sd_bus_creds_ref(sd_bus_creds *c) {
+
+ if (!c)
+ return NULL;
+
+ if (c->allocated) {
+ assert(c->n_ref > 0);
+ c->n_ref++;
+ } else {
+ sd_bus_message *m;
+
+ /* If this is an embedded creds structure, then
+ * forward ref counting to the message */
+ m = container_of(c, sd_bus_message, creds);
+ sd_bus_message_ref(m);
+ }
+
+ return c;
+}
+
+_public_ sd_bus_creds *sd_bus_creds_unref(sd_bus_creds *c) {
+
+ if (!c)
+ return NULL;
+
+ if (c->allocated) {
+ assert(c->n_ref > 0);
+ c->n_ref--;
+
+ if (c->n_ref == 0) {
+ free(c->comm);
+ free(c->tid_comm);
+ free(c->exe);
+ free(c->cmdline);
+ free(c->cgroup);
+ free(c->capability);
+ free(c->label);
+ free(c->unique_name);
+ free(c->cgroup_root);
+ free(c->description);
+
+ c->supplementary_gids = mfree(c->supplementary_gids);
+
+ c->well_known_names = strv_free(c->well_known_names);
+
+ bus_creds_done(c);
+
+ free(c);
+ }
+ } else {
+ sd_bus_message *m;
+
+ m = container_of(c, sd_bus_message, creds);
+ sd_bus_message_unref(m);
+ }
+
+ return NULL;
+}
+
+_public_ uint64_t sd_bus_creds_get_mask(const sd_bus_creds *c) {
+ assert_return(c, 0);
+
+ return c->mask;
+}
+
+_public_ uint64_t sd_bus_creds_get_augmented_mask(const sd_bus_creds *c) {
+ assert_return(c, 0);
+
+ return c->augmented;
+}
+
+sd_bus_creds* bus_creds_new(void) {
+ sd_bus_creds *c;
+
+ c = new0(sd_bus_creds, 1);
+ if (!c)
+ return NULL;
+
+ c->allocated = true;
+ c->n_ref = 1;
+ return c;
+}
+
+_public_ int sd_bus_creds_new_from_pid(sd_bus_creds **ret, pid_t pid, uint64_t mask) {
+ sd_bus_creds *c;
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(mask <= _SD_BUS_CREDS_ALL, -EOPNOTSUPP);
+ assert_return(ret, -EINVAL);
+
+ if (pid == 0)
+ pid = getpid_cached();
+
+ c = bus_creds_new();
+ if (!c)
+ return -ENOMEM;
+
+ r = bus_creds_add_more(c, mask | SD_BUS_CREDS_AUGMENT, pid, 0);
+ if (r < 0) {
+ sd_bus_creds_unref(c);
+ return r;
+ }
+
+ /* Check if the process existed at all, in case we haven't
+ * figured that out already */
+ if (!pid_is_alive(pid)) {
+ sd_bus_creds_unref(c);
+ return -ESRCH;
+ }
+
+ *ret = c;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_uid(sd_bus_creds *c, uid_t *uid) {
+ assert_return(c, -EINVAL);
+ assert_return(uid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_UID))
+ return -ENODATA;
+
+ *uid = c->uid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_euid(sd_bus_creds *c, uid_t *euid) {
+ assert_return(c, -EINVAL);
+ assert_return(euid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_EUID))
+ return -ENODATA;
+
+ *euid = c->euid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_suid(sd_bus_creds *c, uid_t *suid) {
+ assert_return(c, -EINVAL);
+ assert_return(suid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SUID))
+ return -ENODATA;
+
+ *suid = c->suid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_fsuid(sd_bus_creds *c, uid_t *fsuid) {
+ assert_return(c, -EINVAL);
+ assert_return(fsuid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_FSUID))
+ return -ENODATA;
+
+ *fsuid = c->fsuid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_gid(sd_bus_creds *c, gid_t *gid) {
+ assert_return(c, -EINVAL);
+ assert_return(gid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_GID))
+ return -ENODATA;
+
+ *gid = c->gid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_egid(sd_bus_creds *c, gid_t *egid) {
+ assert_return(c, -EINVAL);
+ assert_return(egid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_EGID))
+ return -ENODATA;
+
+ *egid = c->egid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_sgid(sd_bus_creds *c, gid_t *sgid) {
+ assert_return(c, -EINVAL);
+ assert_return(sgid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SGID))
+ return -ENODATA;
+
+ *sgid = c->sgid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_fsgid(sd_bus_creds *c, gid_t *fsgid) {
+ assert_return(c, -EINVAL);
+ assert_return(fsgid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_FSGID))
+ return -ENODATA;
+
+ *fsgid = c->fsgid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_supplementary_gids(sd_bus_creds *c, const gid_t **gids) {
+ assert_return(c, -EINVAL);
+ assert_return(gids, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS))
+ return -ENODATA;
+
+ *gids = c->supplementary_gids;
+ return (int) c->n_supplementary_gids;
+}
+
+_public_ int sd_bus_creds_get_pid(sd_bus_creds *c, pid_t *pid) {
+ assert_return(c, -EINVAL);
+ assert_return(pid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_PID))
+ return -ENODATA;
+
+ assert(c->pid > 0);
+ *pid = c->pid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_ppid(sd_bus_creds *c, pid_t *ppid) {
+ assert_return(c, -EINVAL);
+ assert_return(ppid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_PPID))
+ return -ENODATA;
+
+ /* PID 1 has no parent process. Let's distinguish the case of
+ * not knowing and not having a parent process by the returned
+ * error code. */
+ if (c->ppid == 0)
+ return -ENXIO;
+
+ *ppid = c->ppid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_tid(sd_bus_creds *c, pid_t *tid) {
+ assert_return(c, -EINVAL);
+ assert_return(tid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_TID))
+ return -ENODATA;
+
+ assert(c->tid > 0);
+ *tid = c->tid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_selinux_context(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SELINUX_CONTEXT))
+ return -ENODATA;
+
+ assert(c->label);
+ *ret = c->label;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_comm(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_COMM))
+ return -ENODATA;
+
+ assert(c->comm);
+ *ret = c->comm;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_tid_comm(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_TID_COMM))
+ return -ENODATA;
+
+ assert(c->tid_comm);
+ *ret = c->tid_comm;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_exe(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_EXE))
+ return -ENODATA;
+
+ if (!c->exe)
+ return -ENXIO;
+
+ *ret = c->exe;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_cgroup(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_CGROUP))
+ return -ENODATA;
+
+ assert(c->cgroup);
+ *ret = c->cgroup;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_unit(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_UNIT))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->unit) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_unit(shifted, (char**) &c->unit);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->unit;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_user_unit(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_USER_UNIT))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->user_unit) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_user_unit(shifted, (char**) &c->user_unit);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->user_unit;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_slice(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SLICE))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->slice) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_slice(shifted, (char**) &c->slice);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->slice;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_user_slice(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_USER_SLICE))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->user_slice) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_user_slice(shifted, (char**) &c->user_slice);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->user_slice;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_session(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SESSION))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->session) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_session(shifted, (char**) &c->session);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->session;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_owner_uid(sd_bus_creds *c, uid_t *uid) {
+ const char *shifted;
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(uid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_OWNER_UID))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_owner_uid(shifted, uid);
+}
+
+_public_ int sd_bus_creds_get_cmdline(sd_bus_creds *c, char ***cmdline) {
+ assert_return(c, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_CMDLINE))
+ return -ENODATA;
+
+ if (!c->cmdline)
+ return -ENXIO;
+
+ if (!c->cmdline_array) {
+ c->cmdline_array = strv_parse_nulstr(c->cmdline, c->cmdline_size);
+ if (!c->cmdline_array)
+ return -ENOMEM;
+ }
+
+ *cmdline = c->cmdline_array;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_audit_session_id(sd_bus_creds *c, uint32_t *sessionid) {
+ assert_return(c, -EINVAL);
+ assert_return(sessionid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_AUDIT_SESSION_ID))
+ return -ENODATA;
+
+ if (!audit_session_is_valid(c->audit_session_id))
+ return -ENXIO;
+
+ *sessionid = c->audit_session_id;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_audit_login_uid(sd_bus_creds *c, uid_t *uid) {
+ assert_return(c, -EINVAL);
+ assert_return(uid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_AUDIT_LOGIN_UID))
+ return -ENODATA;
+
+ if (!uid_is_valid(c->audit_login_uid))
+ return -ENXIO;
+
+ *uid = c->audit_login_uid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_tty(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_TTY))
+ return -ENODATA;
+
+ if (!c->tty)
+ return -ENXIO;
+
+ *ret = c->tty;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_unique_name(sd_bus_creds *c, const char **unique_name) {
+ assert_return(c, -EINVAL);
+ assert_return(unique_name, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_UNIQUE_NAME))
+ return -ENODATA;
+
+ *unique_name = c->unique_name;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_well_known_names(sd_bus_creds *c, char ***well_known_names) {
+ assert_return(c, -EINVAL);
+ assert_return(well_known_names, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_WELL_KNOWN_NAMES))
+ return -ENODATA;
+
+ /* As a special hack we return the bus driver as well-known
+ * names list when this is requested. */
+ if (c->well_known_names_driver) {
+ static const char* const wkn[] = {
+ "org.freedesktop.DBus",
+ NULL
+ };
+
+ *well_known_names = (char**) wkn;
+ return 0;
+ }
+
+ if (c->well_known_names_local) {
+ static const char* const wkn[] = {
+ "org.freedesktop.DBus.Local",
+ NULL
+ };
+
+ *well_known_names = (char**) wkn;
+ return 0;
+ }
+
+ *well_known_names = c->well_known_names;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_description(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_DESCRIPTION))
+ return -ENODATA;
+
+ assert(c->description);
+
+ if (!c->unescaped_description) {
+ c->unescaped_description = bus_label_unescape(c->description);
+ if (!c->unescaped_description)
+ return -ENOMEM;
+ }
+
+ *ret = c->unescaped_description;
+ return 0;
+}
+
+static int has_cap(sd_bus_creds *c, size_t offset, int capability) {
+ unsigned long lc;
+ size_t sz;
+
+ assert(c);
+ assert(capability >= 0);
+ assert(c->capability);
+
+ lc = cap_last_cap();
+
+ if ((unsigned long) capability > lc)
+ return 0;
+
+ sz = DIV_ROUND_UP(lc, 32LU);
+
+ return !!(c->capability[offset * sz + CAP_TO_INDEX((uint32_t) capability)] & CAP_TO_MASK_CORRECTED((uint32_t) capability));
+}
+
+_public_ int sd_bus_creds_has_effective_cap(sd_bus_creds *c, int capability) {
+ assert_return(c, -EINVAL);
+ assert_return(capability >= 0, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_EFFECTIVE_CAPS))
+ return -ENODATA;
+
+ return has_cap(c, CAP_OFFSET_EFFECTIVE, capability);
+}
+
+_public_ int sd_bus_creds_has_permitted_cap(sd_bus_creds *c, int capability) {
+ assert_return(c, -EINVAL);
+ assert_return(capability >= 0, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_PERMITTED_CAPS))
+ return -ENODATA;
+
+ return has_cap(c, CAP_OFFSET_PERMITTED, capability);
+}
+
+_public_ int sd_bus_creds_has_inheritable_cap(sd_bus_creds *c, int capability) {
+ assert_return(c, -EINVAL);
+ assert_return(capability >= 0, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_INHERITABLE_CAPS))
+ return -ENODATA;
+
+ return has_cap(c, CAP_OFFSET_INHERITABLE, capability);
+}
+
+_public_ int sd_bus_creds_has_bounding_cap(sd_bus_creds *c, int capability) {
+ assert_return(c, -EINVAL);
+ assert_return(capability >= 0, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_BOUNDING_CAPS))
+ return -ENODATA;
+
+ return has_cap(c, CAP_OFFSET_BOUNDING, capability);
+}
+
+static int parse_caps(sd_bus_creds *c, unsigned offset, const char *p) {
+ size_t sz, max;
+ unsigned i, j;
+
+ assert(c);
+ assert(p);
+
+ max = DIV_ROUND_UP(cap_last_cap(), 32U);
+ p += strspn(p, WHITESPACE);
+
+ sz = strlen(p);
+ if (sz % 8 != 0)
+ return -EINVAL;
+
+ sz /= 8;
+ if (sz > max)
+ return -EINVAL;
+
+ if (!c->capability) {
+ c->capability = new0(uint32_t, max * 4);
+ if (!c->capability)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < sz; i ++) {
+ uint32_t v = 0;
+
+ for (j = 0; j < 8; ++j) {
+ int t;
+
+ t = unhexchar(*p++);
+ if (t < 0)
+ return -EINVAL;
+
+ v = (v << 4) | t;
+ }
+
+ c->capability[offset * max + (sz - i - 1)] = v;
+ }
+
+ return 0;
+}
+
+int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid) {
+ uint64_t missing;
+ int r;
+
+ assert(c);
+ assert(c->allocated);
+
+ if (!(mask & SD_BUS_CREDS_AUGMENT))
+ return 0;
+
+ /* Try to retrieve PID from creds if it wasn't passed to us */
+ if (pid > 0) {
+ c->pid = pid;
+ c->mask |= SD_BUS_CREDS_PID;
+ } else if (c->mask & SD_BUS_CREDS_PID)
+ pid = c->pid;
+ else
+ /* Without pid we cannot do much... */
+ return 0;
+
+ /* Try to retrieve TID from creds if it wasn't passed to us */
+ if (tid <= 0 && (c->mask & SD_BUS_CREDS_TID))
+ tid = c->tid;
+
+ /* Calculate what we shall and can add */
+ missing = mask & ~(c->mask|SD_BUS_CREDS_PID|SD_BUS_CREDS_TID|SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_WELL_KNOWN_NAMES|SD_BUS_CREDS_DESCRIPTION|SD_BUS_CREDS_AUGMENT);
+ if (missing == 0)
+ return 0;
+
+ if (tid > 0) {
+ c->tid = tid;
+ c->mask |= SD_BUS_CREDS_TID;
+ }
+
+ if (missing & (SD_BUS_CREDS_PPID |
+ SD_BUS_CREDS_UID | SD_BUS_CREDS_EUID | SD_BUS_CREDS_SUID | SD_BUS_CREDS_FSUID |
+ SD_BUS_CREDS_GID | SD_BUS_CREDS_EGID | SD_BUS_CREDS_SGID | SD_BUS_CREDS_FSGID |
+ SD_BUS_CREDS_SUPPLEMENTARY_GIDS |
+ SD_BUS_CREDS_EFFECTIVE_CAPS | SD_BUS_CREDS_INHERITABLE_CAPS |
+ SD_BUS_CREDS_PERMITTED_CAPS | SD_BUS_CREDS_BOUNDING_CAPS)) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+
+ p = procfs_file_alloca(pid, "status");
+
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return -ESRCH;
+ else if (!IN_SET(errno, EPERM, EACCES))
+ return -errno;
+ } else {
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (missing & SD_BUS_CREDS_PPID) {
+ p = startswith(line, "PPid:");
+ if (p) {
+ p += strspn(p, WHITESPACE);
+
+ /* Explicitly check for PPID 0 (which is the case for PID 1) */
+ if (!streq(p, "0")) {
+ r = parse_pid(p, &c->ppid);
+ if (r < 0)
+ return r;
+
+ } else
+ c->ppid = 0;
+
+ c->mask |= SD_BUS_CREDS_PPID;
+ continue;
+ }
+ }
+
+ if (missing & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID)) {
+ p = startswith(line, "Uid:");
+ if (p) {
+ unsigned long uid, euid, suid, fsuid;
+
+ p += strspn(p, WHITESPACE);
+ if (sscanf(p, "%lu %lu %lu %lu", &uid, &euid, &suid, &fsuid) != 4)
+ return -EIO;
+
+ if (missing & SD_BUS_CREDS_UID)
+ c->uid = (uid_t) uid;
+ if (missing & SD_BUS_CREDS_EUID)
+ c->euid = (uid_t) euid;
+ if (missing & SD_BUS_CREDS_SUID)
+ c->suid = (uid_t) suid;
+ if (missing & SD_BUS_CREDS_FSUID)
+ c->fsuid = (uid_t) fsuid;
+
+ c->mask |= missing & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID);
+ continue;
+ }
+ }
+
+ if (missing & (SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID)) {
+ p = startswith(line, "Gid:");
+ if (p) {
+ unsigned long gid, egid, sgid, fsgid;
+
+ p += strspn(p, WHITESPACE);
+ if (sscanf(p, "%lu %lu %lu %lu", &gid, &egid, &sgid, &fsgid) != 4)
+ return -EIO;
+
+ if (missing & SD_BUS_CREDS_GID)
+ c->gid = (gid_t) gid;
+ if (missing & SD_BUS_CREDS_EGID)
+ c->egid = (gid_t) egid;
+ if (missing & SD_BUS_CREDS_SGID)
+ c->sgid = (gid_t) sgid;
+ if (missing & SD_BUS_CREDS_FSGID)
+ c->fsgid = (gid_t) fsgid;
+
+ c->mask |= missing & (SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID);
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_SUPPLEMENTARY_GIDS) {
+ p = startswith(line, "Groups:");
+ if (p) {
+ size_t allocated = 0;
+
+ for (;;) {
+ unsigned long g;
+ int n = 0;
+
+ p += strspn(p, WHITESPACE);
+ if (*p == 0)
+ break;
+
+ if (sscanf(p, "%lu%n", &g, &n) != 1)
+ return -EIO;
+
+ if (!GREEDY_REALLOC(c->supplementary_gids, allocated, c->n_supplementary_gids+1))
+ return -ENOMEM;
+
+ c->supplementary_gids[c->n_supplementary_gids++] = (gid_t) g;
+ p += n;
+ }
+
+ c->mask |= SD_BUS_CREDS_SUPPLEMENTARY_GIDS;
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_EFFECTIVE_CAPS) {
+ p = startswith(line, "CapEff:");
+ if (p) {
+ r = parse_caps(c, CAP_OFFSET_EFFECTIVE, p);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_EFFECTIVE_CAPS;
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_PERMITTED_CAPS) {
+ p = startswith(line, "CapPrm:");
+ if (p) {
+ r = parse_caps(c, CAP_OFFSET_PERMITTED, p);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_PERMITTED_CAPS;
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_INHERITABLE_CAPS) {
+ p = startswith(line, "CapInh:");
+ if (p) {
+ r = parse_caps(c, CAP_OFFSET_INHERITABLE, p);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_INHERITABLE_CAPS;
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_BOUNDING_CAPS) {
+ p = startswith(line, "CapBnd:");
+ if (p) {
+ r = parse_caps(c, CAP_OFFSET_BOUNDING, p);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_BOUNDING_CAPS;
+ continue;
+ }
+ }
+ }
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_SELINUX_CONTEXT) {
+ const char *p;
+
+ p = procfs_file_alloca(pid, "attr/current");
+ r = read_one_line_file(p, &c->label);
+ if (r < 0) {
+ if (!IN_SET(r, -ENOENT, -EINVAL, -EPERM, -EACCES))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+
+ if (missing & SD_BUS_CREDS_COMM) {
+ r = get_process_comm(pid, &c->comm);
+ if (r < 0) {
+ if (!IN_SET(r, -EPERM, -EACCES))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_COMM;
+ }
+
+ if (missing & SD_BUS_CREDS_EXE) {
+ r = get_process_exe(pid, &c->exe);
+ if (r == -ESRCH) {
+ /* Unfortunately we cannot really distinguish
+ * the case here where the process does not
+ * exist, and /proc/$PID/exe being unreadable
+ * because $PID is a kernel thread. Hence,
+ * assume it is a kernel thread, and rely on
+ * that this case is caught with a later
+ * call. */
+ c->exe = NULL;
+ c->mask |= SD_BUS_CREDS_EXE;
+ } else if (r < 0) {
+ if (!IN_SET(r, -EPERM, -EACCES))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_EXE;
+ }
+
+ if (missing & SD_BUS_CREDS_CMDLINE) {
+ const char *p;
+
+ p = procfs_file_alloca(pid, "cmdline");
+ r = read_full_file(p, &c->cmdline, &c->cmdline_size);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0) {
+ if (!IN_SET(r, -EPERM, -EACCES))
+ return r;
+ } else {
+ if (c->cmdline_size == 0)
+ c->cmdline = mfree(c->cmdline);
+
+ c->mask |= SD_BUS_CREDS_CMDLINE;
+ }
+ }
+
+ if (tid > 0 && (missing & SD_BUS_CREDS_TID_COMM)) {
+ _cleanup_free_ char *p = NULL;
+
+ if (asprintf(&p, "/proc/"PID_FMT"/task/"PID_FMT"/comm", pid, tid) < 0)
+ return -ENOMEM;
+
+ r = read_one_line_file(p, &c->tid_comm);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0) {
+ if (!IN_SET(r, -EPERM, -EACCES))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_TID_COMM;
+ }
+
+ if (missing & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID)) {
+
+ if (!c->cgroup) {
+ r = cg_pid_get_path(NULL, pid, &c->cgroup);
+ if (r < 0) {
+ if (!IN_SET(r, -EPERM, -EACCES))
+ return r;
+ }
+ }
+
+ if (!c->cgroup_root) {
+ r = cg_get_root_path(&c->cgroup_root);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->cgroup)
+ c->mask |= missing & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID);
+ }
+
+ if (missing & SD_BUS_CREDS_AUDIT_SESSION_ID) {
+ r = audit_session_from_pid(pid, &c->audit_session_id);
+ if (r == -ENODATA) {
+ /* ENODATA means: no audit session id assigned */
+ c->audit_session_id = AUDIT_SESSION_INVALID;
+ c->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID;
+ } else if (r < 0) {
+ if (!IN_SET(r, -EOPNOTSUPP, -ENOENT, -EPERM, -EACCES))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID;
+ }
+
+ if (missing & SD_BUS_CREDS_AUDIT_LOGIN_UID) {
+ r = audit_loginuid_from_pid(pid, &c->audit_login_uid);
+ if (r == -ENODATA) {
+ /* ENODATA means: no audit login uid assigned */
+ c->audit_login_uid = UID_INVALID;
+ c->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID;
+ } else if (r < 0) {
+ if (!IN_SET(r, -EOPNOTSUPP, -ENOENT, -EPERM, -EACCES))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID;
+ }
+
+ if (missing & SD_BUS_CREDS_TTY) {
+ r = get_ctty(pid, NULL, &c->tty);
+ if (r == -ENXIO) {
+ /* ENXIO means: process has no controlling TTY */
+ c->tty = NULL;
+ c->mask |= SD_BUS_CREDS_TTY;
+ } else if (r < 0) {
+ if (!IN_SET(r, -EPERM, -EACCES, -ENOENT))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_TTY;
+ }
+
+ /* In case only the exe path was to be read we cannot
+ * distinguish the case where the exe path was unreadable
+ * because the process was a kernel thread, or when the
+ * process didn't exist at all. Hence, let's do a final check,
+ * to be sure. */
+ if (!pid_is_alive(pid))
+ return -ESRCH;
+
+ if (tid > 0 && tid != pid && !pid_is_unwaited(tid))
+ return -ESRCH;
+
+ c->augmented = missing & c->mask;
+
+ return 0;
+}
+
+int bus_creds_extend_by_pid(sd_bus_creds *c, uint64_t mask, sd_bus_creds **ret) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *n = NULL;
+ int r;
+
+ assert(c);
+ assert(ret);
+
+ if ((mask & ~c->mask) == 0 || (!(mask & SD_BUS_CREDS_AUGMENT))) {
+ /* There's already all data we need, or augmentation
+ * wasn't turned on. */
+
+ *ret = sd_bus_creds_ref(c);
+ return 0;
+ }
+
+ n = bus_creds_new();
+ if (!n)
+ return -ENOMEM;
+
+ /* Copy the original data over */
+
+ if (c->mask & mask & SD_BUS_CREDS_PID) {
+ n->pid = c->pid;
+ n->mask |= SD_BUS_CREDS_PID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_TID) {
+ n->tid = c->tid;
+ n->mask |= SD_BUS_CREDS_TID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_PPID) {
+ n->ppid = c->ppid;
+ n->mask |= SD_BUS_CREDS_PPID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_UID) {
+ n->uid = c->uid;
+ n->mask |= SD_BUS_CREDS_UID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_EUID) {
+ n->euid = c->euid;
+ n->mask |= SD_BUS_CREDS_EUID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_SUID) {
+ n->suid = c->suid;
+ n->mask |= SD_BUS_CREDS_SUID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_FSUID) {
+ n->fsuid = c->fsuid;
+ n->mask |= SD_BUS_CREDS_FSUID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_GID) {
+ n->gid = c->gid;
+ n->mask |= SD_BUS_CREDS_GID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_EGID) {
+ n->egid = c->egid;
+ n->mask |= SD_BUS_CREDS_EGID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_SGID) {
+ n->sgid = c->sgid;
+ n->mask |= SD_BUS_CREDS_SGID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_FSGID) {
+ n->fsgid = c->fsgid;
+ n->mask |= SD_BUS_CREDS_FSGID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS) {
+ if (c->supplementary_gids) {
+ n->supplementary_gids = newdup(gid_t, c->supplementary_gids, c->n_supplementary_gids);
+ if (!n->supplementary_gids)
+ return -ENOMEM;
+ n->n_supplementary_gids = c->n_supplementary_gids;
+ } else {
+ n->supplementary_gids = NULL;
+ n->n_supplementary_gids = 0;
+ }
+
+ n->mask |= SD_BUS_CREDS_SUPPLEMENTARY_GIDS;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_COMM) {
+ assert(c->comm);
+
+ n->comm = strdup(c->comm);
+ if (!n->comm)
+ return -ENOMEM;
+
+ n->mask |= SD_BUS_CREDS_COMM;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_TID_COMM) {
+ assert(c->tid_comm);
+
+ n->tid_comm = strdup(c->tid_comm);
+ if (!n->tid_comm)
+ return -ENOMEM;
+
+ n->mask |= SD_BUS_CREDS_TID_COMM;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_EXE) {
+ if (c->exe) {
+ n->exe = strdup(c->exe);
+ if (!n->exe)
+ return -ENOMEM;
+ } else
+ n->exe = NULL;
+
+ n->mask |= SD_BUS_CREDS_EXE;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_CMDLINE) {
+ if (c->cmdline) {
+ n->cmdline = memdup(c->cmdline, c->cmdline_size);
+ if (!n->cmdline)
+ return -ENOMEM;
+
+ n->cmdline_size = c->cmdline_size;
+ } else {
+ n->cmdline = NULL;
+ n->cmdline_size = 0;
+ }
+
+ n->mask |= SD_BUS_CREDS_CMDLINE;
+ }
+
+ if (c->mask & mask & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_OWNER_UID)) {
+ assert(c->cgroup);
+
+ n->cgroup = strdup(c->cgroup);
+ if (!n->cgroup)
+ return -ENOMEM;
+
+ n->cgroup_root = strdup(c->cgroup_root);
+ if (!n->cgroup_root)
+ return -ENOMEM;
+
+ n->mask |= mask & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_OWNER_UID);
+ }
+
+ if (c->mask & mask & (SD_BUS_CREDS_EFFECTIVE_CAPS|SD_BUS_CREDS_PERMITTED_CAPS|SD_BUS_CREDS_INHERITABLE_CAPS|SD_BUS_CREDS_BOUNDING_CAPS)) {
+ assert(c->capability);
+
+ n->capability = memdup(c->capability, DIV_ROUND_UP(cap_last_cap(), 32U) * 4 * 4);
+ if (!n->capability)
+ return -ENOMEM;
+
+ n->mask |= c->mask & mask & (SD_BUS_CREDS_EFFECTIVE_CAPS|SD_BUS_CREDS_PERMITTED_CAPS|SD_BUS_CREDS_INHERITABLE_CAPS|SD_BUS_CREDS_BOUNDING_CAPS);
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_SELINUX_CONTEXT) {
+ assert(c->label);
+
+ n->label = strdup(c->label);
+ if (!n->label)
+ return -ENOMEM;
+ n->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_AUDIT_SESSION_ID) {
+ n->audit_session_id = c->audit_session_id;
+ n->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID;
+ }
+ if (c->mask & mask & SD_BUS_CREDS_AUDIT_LOGIN_UID) {
+ n->audit_login_uid = c->audit_login_uid;
+ n->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_TTY) {
+ if (c->tty) {
+ n->tty = strdup(c->tty);
+ if (!n->tty)
+ return -ENOMEM;
+ } else
+ n->tty = NULL;
+ n->mask |= SD_BUS_CREDS_TTY;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_UNIQUE_NAME) {
+ assert(c->unique_name);
+
+ n->unique_name = strdup(c->unique_name);
+ if (!n->unique_name)
+ return -ENOMEM;
+ n->mask |= SD_BUS_CREDS_UNIQUE_NAME;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_WELL_KNOWN_NAMES) {
+ if (strv_isempty(c->well_known_names))
+ n->well_known_names = NULL;
+ else {
+ n->well_known_names = strv_copy(c->well_known_names);
+ if (!n->well_known_names)
+ return -ENOMEM;
+ }
+ n->well_known_names_driver = c->well_known_names_driver;
+ n->well_known_names_local = c->well_known_names_local;
+ n->mask |= SD_BUS_CREDS_WELL_KNOWN_NAMES;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_DESCRIPTION) {
+ assert(c->description);
+ n->description = strdup(c->description);
+ if (!n->description)
+ return -ENOMEM;
+ n->mask |= SD_BUS_CREDS_DESCRIPTION;
+ }
+
+ n->augmented = c->augmented & n->mask;
+
+ /* Get more data */
+
+ r = bus_creds_add_more(n, mask, 0, 0);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(n);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/bus-creds.h b/src/libsystemd/sd-bus/bus-creds.h
new file mode 100644
index 0000000..508ef9d
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-creds.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-bus.h"
+
+struct sd_bus_creds {
+ bool allocated;
+ unsigned n_ref;
+
+ uint64_t mask;
+ uint64_t augmented;
+
+ uid_t uid;
+ uid_t euid;
+ uid_t suid;
+ uid_t fsuid;
+ gid_t gid;
+ gid_t egid;
+ gid_t sgid;
+ gid_t fsgid;
+
+ gid_t *supplementary_gids;
+ unsigned n_supplementary_gids;
+
+ pid_t ppid;
+ pid_t pid;
+ pid_t tid;
+
+ char *comm;
+ char *tid_comm;
+ char *exe;
+
+ char *cmdline;
+ size_t cmdline_size;
+ char **cmdline_array;
+
+ char *cgroup;
+ char *session;
+ char *unit;
+ char *user_unit;
+ char *slice;
+ char *user_slice;
+
+ char *tty;
+
+ uint32_t *capability;
+
+ uint32_t audit_session_id;
+ uid_t audit_login_uid;
+
+ char *label;
+
+ char *unique_name;
+
+ char **well_known_names;
+ bool well_known_names_driver:1;
+ bool well_known_names_local:1;
+
+ char *cgroup_root;
+
+ char *description, *unescaped_description;
+};
+
+sd_bus_creds* bus_creds_new(void);
+
+void bus_creds_done(sd_bus_creds *c);
+
+int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid);
+
+int bus_creds_extend_by_pid(sd_bus_creds *c, uint64_t mask, sd_bus_creds **ret);
diff --git a/src/libsystemd/sd-bus/bus-dump.c b/src/libsystemd/sd-bus/bus-dump.c
new file mode 100644
index 0000000..9a6a81d
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-dump.c
@@ -0,0 +1,593 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/time.h>
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-type.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static char *indent(unsigned level, unsigned flags) {
+ char *p;
+ unsigned n, i = 0;
+
+ n = 0;
+
+ if (flags & BUS_MESSAGE_DUMP_SUBTREE_ONLY && level > 0)
+ level -= 1;
+
+ if (flags & BUS_MESSAGE_DUMP_WITH_HEADER)
+ n += 2;
+
+ p = new(char, n + level*8 + 1);
+ if (!p)
+ return NULL;
+
+ if (flags & BUS_MESSAGE_DUMP_WITH_HEADER) {
+ p[i++] = ' ';
+ p[i++] = ' ';
+ }
+
+ memset(p + i, ' ', level*8);
+ p[i + level*8] = 0;
+
+ return p;
+}
+
+int bus_message_dump(sd_bus_message *m, FILE *f, unsigned flags) {
+ unsigned level = 1;
+ int r;
+
+ assert(m);
+
+ if (!f)
+ f = stdout;
+
+ if (flags & BUS_MESSAGE_DUMP_WITH_HEADER) {
+ fprintf(f,
+ "%s%s%s Type=%s%s%s Endian=%c Flags=%u Version=%u Priority=%"PRIi64,
+ m->header->type == SD_BUS_MESSAGE_METHOD_ERROR ? ansi_highlight_red() :
+ m->header->type == SD_BUS_MESSAGE_METHOD_RETURN ? ansi_highlight_green() :
+ m->header->type != SD_BUS_MESSAGE_SIGNAL ? ansi_highlight() : "",
+ special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET),
+ ansi_normal(),
+
+ ansi_highlight(),
+ bus_message_type_to_string(m->header->type) ?: "(unknown)",
+ ansi_normal(),
+
+ m->header->endian,
+ m->header->flags,
+ m->header->version,
+ m->priority);
+
+ /* Display synthetic message serial number in a more readable
+ * format than (uint32_t) -1 */
+ if (BUS_MESSAGE_COOKIE(m) == 0xFFFFFFFFULL)
+ fprintf(f, " Cookie=-1");
+ else
+ fprintf(f, " Cookie=%" PRIu64, BUS_MESSAGE_COOKIE(m));
+
+ if (m->reply_cookie != 0)
+ fprintf(f, " ReplyCookie=%" PRIu64, m->reply_cookie);
+
+ fputs("\n", f);
+
+ if (m->sender)
+ fprintf(f, " Sender=%s%s%s", ansi_highlight(), m->sender, ansi_normal());
+ if (m->destination)
+ fprintf(f, " Destination=%s%s%s", ansi_highlight(), m->destination, ansi_normal());
+ if (m->path)
+ fprintf(f, " Path=%s%s%s", ansi_highlight(), m->path, ansi_normal());
+ if (m->interface)
+ fprintf(f, " Interface=%s%s%s", ansi_highlight(), m->interface, ansi_normal());
+ if (m->member)
+ fprintf(f, " Member=%s%s%s", ansi_highlight(), m->member, ansi_normal());
+
+ if (m->sender || m->destination || m->path || m->interface || m->member)
+ fputs("\n", f);
+
+ if (sd_bus_error_is_set(&m->error))
+ fprintf(f,
+ " ErrorName=%s%s%s"
+ " ErrorMessage=%s\"%s\"%s\n",
+ ansi_highlight_red(), strna(m->error.name), ansi_normal(),
+ ansi_highlight_red(), strna(m->error.message), ansi_normal());
+
+ if (m->monotonic != 0)
+ fprintf(f, " Monotonic="USEC_FMT, m->monotonic);
+ if (m->realtime != 0)
+ fprintf(f, " Realtime="USEC_FMT, m->realtime);
+ if (m->seqnum != 0)
+ fprintf(f, " SequenceNumber=%"PRIu64, m->seqnum);
+
+ if (m->monotonic != 0 || m->realtime != 0 || m->seqnum != 0)
+ fputs("\n", f);
+
+ bus_creds_dump(&m->creds, f, true);
+ }
+
+ r = sd_bus_message_rewind(m, !(flags & BUS_MESSAGE_DUMP_SUBTREE_ONLY));
+ if (r < 0)
+ return log_error_errno(r, "Failed to rewind: %m");
+
+ if (!(flags & BUS_MESSAGE_DUMP_SUBTREE_ONLY)) {
+ _cleanup_free_ char *prefix = NULL;
+
+ prefix = indent(0, flags);
+ if (!prefix)
+ return log_oom();
+
+ fprintf(f, "%sMESSAGE \"%s\" {\n", prefix, strempty(m->root_container.signature));
+ }
+
+ for (;;) {
+ _cleanup_free_ char *prefix = NULL;
+ const char *contents = NULL;
+ char type;
+ union {
+ uint8_t u8;
+ uint16_t u16;
+ int16_t s16;
+ uint32_t u32;
+ int32_t s32;
+ uint64_t u64;
+ int64_t s64;
+ double d64;
+ const char *string;
+ int i;
+ } basic;
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return log_error_errno(r, "Failed to peek type: %m");
+
+ if (r == 0) {
+ if (level <= 1)
+ break;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to exit container: %m");
+
+ level--;
+
+ prefix = indent(level, flags);
+ if (!prefix)
+ return log_oom();
+
+ fprintf(f, "%s};\n", prefix);
+ continue;
+ }
+
+ prefix = indent(level, flags);
+ if (!prefix)
+ return log_oom();
+
+ if (bus_type_is_container(type) > 0) {
+ r = sd_bus_message_enter_container(m, type, contents);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enter container: %m");
+
+ if (type == SD_BUS_TYPE_ARRAY)
+ fprintf(f, "%sARRAY \"%s\" {\n", prefix, contents);
+ else if (type == SD_BUS_TYPE_VARIANT)
+ fprintf(f, "%sVARIANT \"%s\" {\n", prefix, contents);
+ else if (type == SD_BUS_TYPE_STRUCT)
+ fprintf(f, "%sSTRUCT \"%s\" {\n", prefix, contents);
+ else if (type == SD_BUS_TYPE_DICT_ENTRY)
+ fprintf(f, "%sDICT_ENTRY \"%s\" {\n", prefix, contents);
+
+ level++;
+
+ continue;
+ }
+
+ r = sd_bus_message_read_basic(m, type, &basic);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get basic: %m");
+
+ assert(r > 0);
+
+ switch (type) {
+
+ case SD_BUS_TYPE_BYTE:
+ fprintf(f, "%sBYTE %s%u%s;\n", prefix, ansi_highlight(), basic.u8, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ fprintf(f, "%sBOOLEAN %s%s%s;\n", prefix, ansi_highlight(), true_false(basic.i), ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ fprintf(f, "%sINT16 %s%i%s;\n", prefix, ansi_highlight(), basic.s16, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_UINT16:
+ fprintf(f, "%sUINT16 %s%u%s;\n", prefix, ansi_highlight(), basic.u16, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ fprintf(f, "%sINT32 %s%i%s;\n", prefix, ansi_highlight(), basic.s32, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_UINT32:
+ fprintf(f, "%sUINT32 %s%u%s;\n", prefix, ansi_highlight(), basic.u32, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ fprintf(f, "%sINT64 %s%"PRIi64"%s;\n", prefix, ansi_highlight(), basic.s64, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_UINT64:
+ fprintf(f, "%sUINT64 %s%"PRIu64"%s;\n", prefix, ansi_highlight(), basic.u64, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_DOUBLE:
+ fprintf(f, "%sDOUBLE %s%g%s;\n", prefix, ansi_highlight(), basic.d64, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_STRING:
+ fprintf(f, "%sSTRING \"%s%s%s\";\n", prefix, ansi_highlight(), basic.string, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_OBJECT_PATH:
+ fprintf(f, "%sOBJECT_PATH \"%s%s%s\";\n", prefix, ansi_highlight(), basic.string, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_SIGNATURE:
+ fprintf(f, "%sSIGNATURE \"%s%s%s\";\n", prefix, ansi_highlight(), basic.string, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD:
+ fprintf(f, "%sUNIX_FD %s%i%s;\n", prefix, ansi_highlight(), basic.i, ansi_normal());
+ break;
+
+ default:
+ assert_not_reached("Unknown basic type.");
+ }
+ }
+
+ if (!(flags & BUS_MESSAGE_DUMP_SUBTREE_ONLY)) {
+ _cleanup_free_ char *prefix = NULL;
+
+ prefix = indent(0, flags);
+ if (!prefix)
+ return log_oom();
+
+ fprintf(f, "%s};\n\n", prefix);
+ }
+
+ return 0;
+}
+
+static void dump_capabilities(
+ sd_bus_creds *c,
+ FILE *f,
+ const char *name,
+ bool terse,
+ int (*has)(sd_bus_creds *c, int capability)) {
+
+ unsigned long i, last_cap;
+ unsigned n = 0;
+ int r;
+
+ assert(c);
+ assert(f);
+ assert(name);
+ assert(has);
+
+ i = 0;
+ r = has(c, i);
+ if (r < 0)
+ return;
+
+ fprintf(f, "%s%s=%s", terse ? " " : "", name, terse ? "" : ansi_highlight());
+ last_cap = cap_last_cap();
+
+ for (;;) {
+ if (r > 0) {
+
+ if (n > 0)
+ fputc(' ', f);
+ if (n % 4 == 3)
+ fprintf(f, terse ? "\n " : "\n ");
+
+ fprintf(f, "%s", strna(capability_to_name(i)));
+ n++;
+ }
+
+ i++;
+
+ if (i > last_cap)
+ break;
+
+ r = has(c, i);
+ }
+
+ fputs("\n", f);
+
+ if (!terse)
+ fputs(ansi_normal(), f);
+}
+
+int bus_creds_dump(sd_bus_creds *c, FILE *f, bool terse) {
+ uid_t owner, audit_loginuid;
+ uint32_t audit_sessionid;
+ char **cmdline = NULL, **well_known = NULL;
+ const char *prefix, *color, *suffix, *s;
+ int r, q, v, w, z;
+
+ assert(c);
+
+ if (!f)
+ f = stdout;
+
+ if (terse) {
+ prefix = " ";
+ suffix = "";
+ color = "";
+ } else {
+ const char *off;
+
+ prefix = "";
+ color = ansi_highlight();
+
+ off = ansi_normal();
+ suffix = strjoina(off, "\n");
+ }
+
+ if (c->mask & SD_BUS_CREDS_PID)
+ fprintf(f, "%sPID=%s"PID_FMT"%s", prefix, color, c->pid, suffix);
+ if (c->mask & SD_BUS_CREDS_TID)
+ fprintf(f, "%sTID=%s"PID_FMT"%s", prefix, color, c->tid, suffix);
+ if (c->mask & SD_BUS_CREDS_PPID) {
+ if (c->ppid == 0)
+ fprintf(f, "%sPPID=%sn/a%s", prefix, color, suffix);
+ else
+ fprintf(f, "%sPPID=%s"PID_FMT"%s", prefix, color, c->ppid, suffix);
+ }
+ if (c->mask & SD_BUS_CREDS_TTY)
+ fprintf(f, "%sTTY=%s%s%s", prefix, color, strna(c->tty), suffix);
+
+ if (terse && ((c->mask & (SD_BUS_CREDS_PID|SD_BUS_CREDS_TID|SD_BUS_CREDS_PPID|SD_BUS_CREDS_TTY))))
+ fputs("\n", f);
+
+ if (c->mask & SD_BUS_CREDS_UID)
+ fprintf(f, "%sUID=%s"UID_FMT"%s", prefix, color, c->uid, suffix);
+ if (c->mask & SD_BUS_CREDS_EUID)
+ fprintf(f, "%sEUID=%s"UID_FMT"%s", prefix, color, c->euid, suffix);
+ if (c->mask & SD_BUS_CREDS_SUID)
+ fprintf(f, "%sSUID=%s"UID_FMT"%s", prefix, color, c->suid, suffix);
+ if (c->mask & SD_BUS_CREDS_FSUID)
+ fprintf(f, "%sFSUID=%s"UID_FMT"%s", prefix, color, c->fsuid, suffix);
+ r = sd_bus_creds_get_owner_uid(c, &owner);
+ if (r >= 0)
+ fprintf(f, "%sOwnerUID=%s"UID_FMT"%s", prefix, color, owner, suffix);
+ if (c->mask & SD_BUS_CREDS_GID)
+ fprintf(f, "%sGID=%s"GID_FMT"%s", prefix, color, c->gid, suffix);
+ if (c->mask & SD_BUS_CREDS_EGID)
+ fprintf(f, "%sEGID=%s"GID_FMT"%s", prefix, color, c->egid, suffix);
+ if (c->mask & SD_BUS_CREDS_SGID)
+ fprintf(f, "%sSGID=%s"GID_FMT"%s", prefix, color, c->sgid, suffix);
+ if (c->mask & SD_BUS_CREDS_FSGID)
+ fprintf(f, "%sFSGID=%s"GID_FMT"%s", prefix, color, c->fsgid, suffix);
+
+ if (c->mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS) {
+ unsigned i;
+
+ fprintf(f, "%sSupplementaryGIDs=%s", prefix, color);
+ for (i = 0; i < c->n_supplementary_gids; i++)
+ fprintf(f, "%s" GID_FMT, i > 0 ? " " : "", c->supplementary_gids[i]);
+ fprintf(f, "%s", suffix);
+ }
+
+ if (terse && ((c->mask & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID|
+ SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID|
+ SD_BUS_CREDS_SUPPLEMENTARY_GIDS)) || r >= 0))
+ fputs("\n", f);
+
+ if (c->mask & SD_BUS_CREDS_COMM)
+ fprintf(f, "%sComm=%s%s%s", prefix, color, c->comm, suffix);
+ if (c->mask & SD_BUS_CREDS_TID_COMM)
+ fprintf(f, "%sTIDComm=%s%s%s", prefix, color, c->tid_comm, suffix);
+ if (c->mask & SD_BUS_CREDS_EXE)
+ fprintf(f, "%sExe=%s%s%s", prefix, color, strna(c->exe), suffix);
+
+ if (terse && (c->mask & (SD_BUS_CREDS_EXE|SD_BUS_CREDS_COMM|SD_BUS_CREDS_TID_COMM)))
+ fputs("\n", f);
+
+ r = sd_bus_creds_get_cmdline(c, &cmdline);
+ if (r >= 0) {
+ char **i;
+
+ fprintf(f, "%sCommandLine=%s", prefix, color);
+ STRV_FOREACH(i, cmdline) {
+ if (i != cmdline)
+ fputc(' ', f);
+
+ fputs(*i, f);
+ }
+
+ fprintf(f, "%s", suffix);
+ } else if (r != -ENODATA)
+ fprintf(f, "%sCommandLine=%sn/a%s", prefix, color, suffix);
+
+ if (c->mask & SD_BUS_CREDS_SELINUX_CONTEXT)
+ fprintf(f, "%sLabel=%s%s%s", prefix, color, c->label, suffix);
+ if (c->mask & SD_BUS_CREDS_DESCRIPTION)
+ fprintf(f, "%sDescription=%s%s%s", prefix, color, c->description, suffix);
+
+ if (terse && (c->mask & (SD_BUS_CREDS_SELINUX_CONTEXT|SD_BUS_CREDS_DESCRIPTION)))
+ fputs("\n", f);
+
+ if (c->mask & SD_BUS_CREDS_CGROUP)
+ fprintf(f, "%sCGroup=%s%s%s", prefix, color, c->cgroup, suffix);
+ s = NULL;
+ r = sd_bus_creds_get_unit(c, &s);
+ if (r != -ENODATA)
+ fprintf(f, "%sUnit=%s%s%s", prefix, color, strna(s), suffix);
+ s = NULL;
+ v = sd_bus_creds_get_slice(c, &s);
+ if (v != -ENODATA)
+ fprintf(f, "%sSlice=%s%s%s", prefix, color, strna(s), suffix);
+ s = NULL;
+ q = sd_bus_creds_get_user_unit(c, &s);
+ if (q != -ENODATA)
+ fprintf(f, "%sUserUnit=%s%s%s", prefix, color, strna(s), suffix);
+ s = NULL;
+ w = sd_bus_creds_get_user_slice(c, &s);
+ if (w != -ENODATA)
+ fprintf(f, "%sUserSlice=%s%s%s", prefix, color, strna(s), suffix);
+ s = NULL;
+ z = sd_bus_creds_get_session(c, &s);
+ if (z != -ENODATA)
+ fprintf(f, "%sSession=%s%s%s", prefix, color, strna(s), suffix);
+
+ if (terse && ((c->mask & SD_BUS_CREDS_CGROUP) || r != -ENODATA || q != -ENODATA || v != -ENODATA || w != -ENODATA || z != -ENODATA))
+ fputs("\n", f);
+
+ r = sd_bus_creds_get_audit_login_uid(c, &audit_loginuid);
+ if (r >= 0)
+ fprintf(f, "%sAuditLoginUID=%s"UID_FMT"%s", prefix, color, audit_loginuid, suffix);
+ else if (r != -ENODATA)
+ fprintf(f, "%sAuditLoginUID=%sn/a%s", prefix, color, suffix);
+ q = sd_bus_creds_get_audit_session_id(c, &audit_sessionid);
+ if (q >= 0)
+ fprintf(f, "%sAuditSessionID=%s%"PRIu32"%s", prefix, color, audit_sessionid, suffix);
+ else if (q != -ENODATA)
+ fprintf(f, "%sAuditSessionID=%sn/a%s", prefix, color, suffix);
+
+ if (terse && (r != -ENODATA || q != -ENODATA))
+ fputs("\n", f);
+
+ if (c->mask & SD_BUS_CREDS_UNIQUE_NAME)
+ fprintf(f, "%sUniqueName=%s%s%s", prefix, color, c->unique_name, suffix);
+
+ if (sd_bus_creds_get_well_known_names(c, &well_known) >= 0) {
+ char **i;
+
+ fprintf(f, "%sWellKnownNames=%s", prefix, color);
+ STRV_FOREACH(i, well_known) {
+ if (i != well_known)
+ fputc(' ', f);
+
+ fputs(*i, f);
+ }
+
+ fprintf(f, "%s", suffix);
+ }
+
+ if (terse && (c->mask & SD_BUS_CREDS_UNIQUE_NAME || well_known))
+ fputc('\n', f);
+
+ dump_capabilities(c, f, "EffectiveCapabilities", terse, sd_bus_creds_has_effective_cap);
+ dump_capabilities(c, f, "PermittedCapabilities", terse, sd_bus_creds_has_permitted_cap);
+ dump_capabilities(c, f, "InheritableCapabilities", terse, sd_bus_creds_has_inheritable_cap);
+ dump_capabilities(c, f, "BoundingCapabilities", terse, sd_bus_creds_has_bounding_cap);
+
+ return 0;
+}
+
+/*
+ * For details about the file format, see:
+ *
+ * http://wiki.wireshark.org/Development/LibpcapFileFormat
+ */
+
+typedef struct _packed_ pcap_hdr_s {
+ uint32_t magic_number; /* magic number */
+ uint16_t version_major; /* major version number */
+ uint16_t version_minor; /* minor version number */
+ int32_t thiszone; /* GMT to local correction */
+ uint32_t sigfigs; /* accuracy of timestamps */
+ uint32_t snaplen; /* max length of captured packets, in octets */
+ uint32_t network; /* data link type */
+} pcap_hdr_t ;
+
+typedef struct _packed_ pcaprec_hdr_s {
+ uint32_t ts_sec; /* timestamp seconds */
+ uint32_t ts_usec; /* timestamp microseconds */
+ uint32_t incl_len; /* number of octets of packet saved in file */
+ uint32_t orig_len; /* actual length of packet */
+} pcaprec_hdr_t;
+
+int bus_pcap_header(size_t snaplen, FILE *f) {
+
+ pcap_hdr_t hdr = {
+ .magic_number = 0xa1b2c3d4U,
+ .version_major = 2,
+ .version_minor = 4,
+ .thiszone = 0, /* UTC */
+ .sigfigs = 0,
+ .network = 231, /* D-Bus */
+ };
+
+ if (!f)
+ f = stdout;
+
+ assert(snaplen > 0);
+ assert((size_t) (uint32_t) snaplen == snaplen);
+
+ hdr.snaplen = (uint32_t) snaplen;
+
+ fwrite(&hdr, 1, sizeof(hdr), f);
+
+ return fflush_and_check(f);
+}
+
+int bus_message_pcap_frame(sd_bus_message *m, size_t snaplen, FILE *f) {
+ struct bus_body_part *part;
+ pcaprec_hdr_t hdr = {};
+ struct timeval tv;
+ unsigned i;
+ size_t w;
+
+ if (!f)
+ f = stdout;
+
+ assert(m);
+ assert(snaplen > 0);
+ assert((size_t) (uint32_t) snaplen == snaplen);
+
+ if (m->realtime != 0)
+ timeval_store(&tv, m->realtime);
+ else
+ assert_se(gettimeofday(&tv, NULL) >= 0);
+
+ hdr.ts_sec = tv.tv_sec;
+ hdr.ts_usec = tv.tv_usec;
+ hdr.orig_len = BUS_MESSAGE_SIZE(m);
+ hdr.incl_len = MIN(hdr.orig_len, snaplen);
+
+ /* write the pcap header */
+ fwrite(&hdr, 1, sizeof(hdr), f);
+
+ /* write the dbus header */
+ w = MIN(BUS_MESSAGE_BODY_BEGIN(m), snaplen);
+ fwrite(m->header, 1, w, f);
+ snaplen -= w;
+
+ /* write the dbus body */
+ MESSAGE_FOREACH_PART(part, i, m) {
+ if (snaplen <= 0)
+ break;
+
+ w = MIN(part->size, snaplen);
+ fwrite(part->data, 1, w, f);
+ snaplen -= w;
+ }
+
+ return fflush_and_check(f);
+}
diff --git a/src/libsystemd/sd-bus/bus-dump.h b/src/libsystemd/sd-bus/bus-dump.h
new file mode 100644
index 0000000..a1b67c6
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-dump.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "sd-bus.h"
+
+enum {
+ BUS_MESSAGE_DUMP_WITH_HEADER = 1 << 0,
+ BUS_MESSAGE_DUMP_SUBTREE_ONLY = 1 << 1,
+};
+
+int bus_message_dump(sd_bus_message *m, FILE *f, unsigned flags);
+
+int bus_creds_dump(sd_bus_creds *c, FILE *f, bool terse);
+
+int bus_pcap_header(size_t snaplen, FILE *f);
+int bus_message_pcap_frame(sd_bus_message *m, size_t snaplen, FILE *f);
diff --git a/src/libsystemd/sd-bus/bus-error.c b/src/libsystemd/sd-bus/bus-error.c
new file mode 100644
index 0000000..dc95237
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-error.c
@@ -0,0 +1,610 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "errno-list.h"
+#include "string-util.h"
+#include "util.h"
+
+BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_standard_errors[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.Failed", EACCES),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.NoMemory", ENOMEM),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.ServiceUnknown", EHOSTUNREACH),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.NameHasNoOwner", ENXIO),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.NoReply", ETIMEDOUT),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.IOError", EIO),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.BadAddress", EADDRNOTAVAIL),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.NotSupported", EOPNOTSUPP),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.LimitsExceeded", ENOBUFS),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.AccessDenied", EACCES),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.AuthFailed", EACCES),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.InteractiveAuthorizationRequired", EACCES),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.NoServer", EHOSTDOWN),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.Timeout", ETIMEDOUT),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.NoNetwork", ENONET),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.AddressInUse", EADDRINUSE),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.Disconnected", ECONNRESET),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.InvalidArgs", EINVAL),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.FileNotFound", ENOENT),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.FileExists", EEXIST),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.UnknownMethod", EBADR),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.UnknownObject", EBADR),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.UnknownInterface", EBADR),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.UnknownProperty", EBADR),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.PropertyReadOnly", EROFS),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.UnixProcessIdUnknown", ESRCH),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.InvalidSignature", EINVAL),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.InconsistentMessage", EBADMSG),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.TimedOut", ETIMEDOUT),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.MatchRuleInvalid", EINVAL),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.InvalidFileContent", EINVAL),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.MatchRuleNotFound", ENOENT),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.SELinuxSecurityContextUnknown", ESRCH),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.ObjectPathInUse", EBUSY),
+ SD_BUS_ERROR_MAP_END
+};
+
+/* GCC maps this magically to the beginning and end of the BUS_ERROR_MAP section */
+extern const sd_bus_error_map __start_SYSTEMD_BUS_ERROR_MAP[];
+extern const sd_bus_error_map __stop_SYSTEMD_BUS_ERROR_MAP[];
+
+/* Additional maps registered with sd_bus_error_add_map() are in this
+ * NULL terminated array */
+static const sd_bus_error_map **additional_error_maps = NULL;
+
+static int bus_error_name_to_errno(const char *name) {
+ const sd_bus_error_map **map, *m;
+ const char *p;
+ int r;
+
+ if (!name)
+ return EINVAL;
+
+ p = startswith(name, "System.Error.");
+ if (p) {
+ r = errno_from_name(p);
+ if (r < 0)
+ return EIO;
+
+ return r;
+ }
+
+ if (additional_error_maps)
+ for (map = additional_error_maps; *map; map++)
+ for (m = *map;; m++) {
+ /* For additional error maps the end marker is actually the end marker */
+ if (m->code == BUS_ERROR_MAP_END_MARKER)
+ break;
+
+ if (streq(m->name, name))
+ return m->code;
+ }
+
+ m = ALIGN_TO_PTR(__start_SYSTEMD_BUS_ERROR_MAP, sizeof(void*));
+ while (m < __stop_SYSTEMD_BUS_ERROR_MAP) {
+ /* For magic ELF error maps, the end marker might
+ * appear in the middle of things, since multiple maps
+ * might appear in the same section. Hence, let's skip
+ * over it, but realign the pointer to the next 8 byte
+ * boundary, which is the selected alignment for the
+ * arrays. */
+ if (m->code == BUS_ERROR_MAP_END_MARKER) {
+ m = ALIGN_TO_PTR(m + 1, sizeof(void*));
+ continue;
+ }
+
+ if (streq(m->name, name))
+ return m->code;
+
+ m++;
+ }
+
+ return EIO;
+}
+
+static sd_bus_error errno_to_bus_error_const(int error) {
+
+ if (error < 0)
+ error = -error;
+
+ switch (error) {
+
+ case ENOMEM:
+ return BUS_ERROR_OOM;
+
+ case EPERM:
+ case EACCES:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_ACCESS_DENIED, "Access denied");
+
+ case EINVAL:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_INVALID_ARGS, "Invalid argument");
+
+ case ESRCH:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_UNIX_PROCESS_ID_UNKNOWN, "No such process");
+
+ case ENOENT:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FILE_NOT_FOUND, "File not found");
+
+ case EEXIST:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FILE_EXISTS, "File exists");
+
+ case ETIMEDOUT:
+ case ETIME:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_TIMEOUT, "Timed out");
+
+ case EIO:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_IO_ERROR, "Input/output error");
+
+ case ENETRESET:
+ case ECONNABORTED:
+ case ECONNRESET:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_DISCONNECTED, "Disconnected");
+
+ case EOPNOTSUPP:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NOT_SUPPORTED, "Not supported");
+
+ case EADDRNOTAVAIL:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_BAD_ADDRESS, "Address not available");
+
+ case ENOBUFS:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_LIMITS_EXCEEDED, "Limits exceeded");
+
+ case EADDRINUSE:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_ADDRESS_IN_USE, "Address in use");
+
+ case EBADMSG:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_INCONSISTENT_MESSAGE, "Inconsistent message");
+ }
+
+ return SD_BUS_ERROR_NULL;
+}
+
+static int errno_to_bus_error_name_new(int error, char **ret) {
+ const char *name;
+ char *n;
+
+ if (error < 0)
+ error = -error;
+
+ name = errno_to_name(error);
+ if (!name)
+ return 0;
+
+ n = strappend("System.Error.", name);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 1;
+}
+
+bool bus_error_is_dirty(sd_bus_error *e) {
+ if (!e)
+ return false;
+
+ return e->name || e->message || e->_need_free != 0;
+}
+
+_public_ void sd_bus_error_free(sd_bus_error *e) {
+ if (!e)
+ return;
+
+ if (e->_need_free > 0) {
+ free((void*) e->name);
+ free((void*) e->message);
+ }
+
+ *e = SD_BUS_ERROR_NULL;
+}
+
+_public_ int sd_bus_error_set(sd_bus_error *e, const char *name, const char *message) {
+
+ if (!name)
+ return 0;
+ if (!e)
+ goto finish;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ e->name = strdup(name);
+ if (!e->name) {
+ *e = BUS_ERROR_OOM;
+ return -ENOMEM;
+ }
+
+ if (message)
+ e->message = strdup(message);
+
+ e->_need_free = 1;
+
+finish:
+ return -bus_error_name_to_errno(name);
+}
+
+int bus_error_setfv(sd_bus_error *e, const char *name, const char *format, va_list ap) {
+
+ if (!name)
+ return 0;
+
+ if (e) {
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ e->name = strdup(name);
+ if (!e->name) {
+ *e = BUS_ERROR_OOM;
+ return -ENOMEM;
+ }
+
+ /* If we hit OOM on formatting the pretty message, we ignore
+ * this, since we at least managed to write the error name */
+ if (format)
+ (void) vasprintf((char**) &e->message, format, ap);
+
+ e->_need_free = 1;
+ }
+
+ return -bus_error_name_to_errno(name);
+}
+
+_public_ int sd_bus_error_setf(sd_bus_error *e, const char *name, const char *format, ...) {
+
+ if (format) {
+ int r;
+ va_list ap;
+
+ va_start(ap, format);
+ r = bus_error_setfv(e, name, format, ap);
+ va_end(ap);
+
+ return r;
+ }
+
+ return sd_bus_error_set(e, name, NULL);
+}
+
+_public_ int sd_bus_error_copy(sd_bus_error *dest, const sd_bus_error *e) {
+
+ if (!sd_bus_error_is_set(e))
+ return 0;
+ if (!dest)
+ goto finish;
+
+ assert_return(!bus_error_is_dirty(dest), -EINVAL);
+
+ /*
+ * _need_free < 0 indicates that the error is temporarily const, needs deep copying
+ * _need_free == 0 indicates that the error is perpetually const, needs no deep copying
+ * _need_free > 0 indicates that the error is fully dynamic, needs deep copying
+ */
+
+ if (e->_need_free == 0)
+ *dest = *e;
+ else {
+ dest->name = strdup(e->name);
+ if (!dest->name) {
+ *dest = BUS_ERROR_OOM;
+ return -ENOMEM;
+ }
+
+ if (e->message)
+ dest->message = strdup(e->message);
+
+ dest->_need_free = 1;
+ }
+
+finish:
+ return -bus_error_name_to_errno(e->name);
+}
+
+_public_ int sd_bus_error_move(sd_bus_error *dest, sd_bus_error *e) {
+ int r;
+
+ if (!sd_bus_error_is_set(e)) {
+
+ if (dest)
+ *dest = SD_BUS_ERROR_NULL;
+
+ return 0;
+ }
+
+ r = -bus_error_name_to_errno(e->name);
+
+ if (dest) {
+ *dest = *e;
+ *e = SD_BUS_ERROR_NULL;
+ } else
+ sd_bus_error_free(e);
+
+ return r;
+}
+
+_public_ int sd_bus_error_set_const(sd_bus_error *e, const char *name, const char *message) {
+ if (!name)
+ return 0;
+ if (!e)
+ goto finish;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ *e = SD_BUS_ERROR_MAKE_CONST(name, message);
+
+finish:
+ return -bus_error_name_to_errno(name);
+}
+
+_public_ int sd_bus_error_is_set(const sd_bus_error *e) {
+ if (!e)
+ return 0;
+
+ return !!e->name;
+}
+
+_public_ int sd_bus_error_has_name(const sd_bus_error *e, const char *name) {
+ if (!e)
+ return 0;
+
+ return streq_ptr(e->name, name);
+}
+
+_public_ int sd_bus_error_get_errno(const sd_bus_error* e) {
+ if (!e)
+ return 0;
+
+ if (!e->name)
+ return 0;
+
+ return bus_error_name_to_errno(e->name);
+}
+
+static void bus_error_strerror(sd_bus_error *e, int error) {
+ size_t k = 64;
+ char *m;
+
+ assert(e);
+
+ for (;;) {
+ char *x;
+
+ m = new(char, k);
+ if (!m)
+ return;
+
+ errno = 0;
+ x = strerror_r(error, m, k);
+ if (errno == ERANGE || strlen(x) >= k - 1) {
+ free(m);
+ k *= 2;
+ continue;
+ }
+
+ if (errno) {
+ free(m);
+ return;
+ }
+
+ if (x == m) {
+ if (e->_need_free > 0) {
+ /* Error is already dynamic, let's just update the message */
+ free((char*) e->message);
+ e->message = x;
+
+ } else {
+ char *t;
+ /* Error was const so far, let's make it dynamic, if we can */
+
+ t = strdup(e->name);
+ if (!t) {
+ free(m);
+ return;
+ }
+
+ e->_need_free = 1;
+ e->name = t;
+ e->message = x;
+ }
+ } else {
+ free(m);
+
+ if (e->_need_free > 0) {
+ char *t;
+
+ /* Error is dynamic, let's hence make the message also dynamic */
+ t = strdup(x);
+ if (!t)
+ return;
+
+ free((char*) e->message);
+ e->message = t;
+ } else {
+ /* Error is const, hence we can just override */
+ e->message = x;
+ }
+ }
+
+ return;
+ }
+}
+
+_public_ int sd_bus_error_set_errno(sd_bus_error *e, int error) {
+
+ if (error < 0)
+ error = -error;
+
+ if (!e)
+ return -error;
+ if (error == 0)
+ return -error;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ /* First, try a const translation */
+ *e = errno_to_bus_error_const(error);
+
+ if (!sd_bus_error_is_set(e)) {
+ int k;
+
+ /* If that didn't work, try a dynamic one. */
+
+ k = errno_to_bus_error_name_new(error, (char**) &e->name);
+ if (k > 0)
+ e->_need_free = 1;
+ else if (k < 0) {
+ *e = BUS_ERROR_OOM;
+ return -error;
+ } else
+ *e = BUS_ERROR_FAILED;
+ }
+
+ /* Now, fill in the message from strerror() if we can */
+ bus_error_strerror(e, error);
+ return -error;
+}
+
+_public_ int sd_bus_error_set_errnofv(sd_bus_error *e, int error, const char *format, va_list ap) {
+ PROTECT_ERRNO;
+ int r;
+
+ if (error < 0)
+ error = -error;
+
+ if (!e)
+ return -error;
+ if (error == 0)
+ return 0;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ /* First, try a const translation */
+ *e = errno_to_bus_error_const(error);
+
+ if (!sd_bus_error_is_set(e)) {
+ int k;
+
+ /* If that didn't work, try a dynamic one */
+
+ k = errno_to_bus_error_name_new(error, (char**) &e->name);
+ if (k > 0)
+ e->_need_free = 1;
+ else if (k < 0) {
+ *e = BUS_ERROR_OOM;
+ return -ENOMEM;
+ } else
+ *e = BUS_ERROR_FAILED;
+ }
+
+ if (format) {
+ char *m;
+
+ /* Then, let's try to fill in the supplied message */
+
+ errno = error; /* Make sure that %m resolves to the specified error */
+ r = vasprintf(&m, format, ap);
+ if (r >= 0) {
+
+ if (e->_need_free <= 0) {
+ char *t;
+
+ t = strdup(e->name);
+ if (t) {
+ e->_need_free = 1;
+ e->name = t;
+ e->message = m;
+ return -error;
+ }
+
+ free(m);
+ } else {
+ free((char*) e->message);
+ e->message = m;
+ return -error;
+ }
+ }
+ }
+
+ /* If that didn't work, use strerror() for the message */
+ bus_error_strerror(e, error);
+ return -error;
+}
+
+_public_ int sd_bus_error_set_errnof(sd_bus_error *e, int error, const char *format, ...) {
+ int r;
+
+ if (error < 0)
+ error = -error;
+
+ if (!e)
+ return -error;
+ if (error == 0)
+ return 0;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ if (format) {
+ va_list ap;
+
+ va_start(ap, format);
+ r = sd_bus_error_set_errnofv(e, error, format, ap);
+ va_end(ap);
+
+ return r;
+ }
+
+ return sd_bus_error_set_errno(e, error);
+}
+
+const char *bus_error_message(const sd_bus_error *e, int error) {
+
+ if (e) {
+ /* Sometimes, the D-Bus server is a little bit too verbose with
+ * its error messages, so let's override them here */
+ if (sd_bus_error_has_name(e, SD_BUS_ERROR_ACCESS_DENIED))
+ return "Access denied";
+
+ if (e->message)
+ return e->message;
+ }
+
+ if (error < 0)
+ error = -error;
+
+ return strerror(error);
+}
+
+static bool map_ok(const sd_bus_error_map *map) {
+ for (; map->code != BUS_ERROR_MAP_END_MARKER; map++)
+ if (!map->name || map->code <=0)
+ return false;
+ return true;
+}
+
+_public_ int sd_bus_error_add_map(const sd_bus_error_map *map) {
+ const sd_bus_error_map **maps = NULL;
+ unsigned n = 0;
+
+ assert_return(map, -EINVAL);
+ assert_return(map_ok(map), -EINVAL);
+
+ if (additional_error_maps)
+ for (; additional_error_maps[n] != NULL; n++)
+ if (additional_error_maps[n] == map)
+ return 0;
+
+ maps = reallocarray(additional_error_maps, n + 2, sizeof(struct sd_bus_error_map*));
+ if (!maps)
+ return -ENOMEM;
+
+ maps[n] = map;
+ maps[n+1] = NULL;
+
+ additional_error_maps = maps;
+ return 1;
+}
diff --git a/src/libsystemd/sd-bus/bus-error.h b/src/libsystemd/sd-bus/bus-error.h
new file mode 100644
index 0000000..a6523e5
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-error.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-bus.h"
+
+#include "macro.h"
+
+bool bus_error_is_dirty(sd_bus_error *e);
+
+const char *bus_error_message(const sd_bus_error *e, int error);
+
+int bus_error_setfv(sd_bus_error *e, const char *name, const char *format, va_list ap) _printf_(3,0);
+int bus_error_set_errnofv(sd_bus_error *e, int error, const char *format, va_list ap) _printf_(3,0);
+
+#define BUS_ERROR_OOM SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NO_MEMORY, "Out of memory")
+#define BUS_ERROR_FAILED SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FAILED, "Operation failed")
+
+/*
+ * There are two ways to register error maps with the error translation
+ * logic: by using BUS_ERROR_MAP_ELF_REGISTER, which however only
+ * works when linked into the same ELF module, or via
+ * sd_bus_error_add_map() which is the official, external API, that
+ * works from any module.
+ *
+ * Note that BUS_ERROR_MAP_ELF_REGISTER has to be used as decorator in
+ * the bus error table, and BUS_ERROR_MAP_ELF_USE has to be used at
+ * least once per compilation unit (i.e. per library), to ensure that
+ * the error map is really added to the final binary.
+ */
+
+#define BUS_ERROR_MAP_ELF_REGISTER \
+ _section_("SYSTEMD_BUS_ERROR_MAP") \
+ _used_ \
+ _alignptr_ \
+ _variable_no_sanitize_address_
+
+#define BUS_ERROR_MAP_ELF_USE(errors) \
+ extern const sd_bus_error_map errors[]; \
+ _used_ \
+ static const sd_bus_error_map * const CONCATENATE(errors ## _copy_, __COUNTER__) = errors;
+
+/* We use something exotic as end marker, to ensure people build the
+ * maps using the macsd-ros. */
+#define BUS_ERROR_MAP_END_MARKER -'x'
+
+BUS_ERROR_MAP_ELF_USE(bus_standard_errors);
diff --git a/src/libsystemd/sd-bus/bus-gvariant.c b/src/libsystemd/sd-bus/bus-gvariant.c
new file mode 100644
index 0000000..ba503b3
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-gvariant.c
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "sd-bus.h"
+
+#include "bus-gvariant.h"
+#include "bus-signature.h"
+#include "bus-type.h"
+
+int bus_gvariant_get_size(const char *signature) {
+ const char *p;
+ int sum = 0, r;
+
+ /* For fixed size structs. Fails for variable size structs. */
+
+ p = signature;
+ while (*p != 0) {
+ size_t n;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+ else {
+ char t[n+1];
+
+ memcpy(t, p, n);
+ t[n] = 0;
+
+ r = bus_gvariant_get_alignment(t);
+ if (r < 0)
+ return r;
+
+ sum = ALIGN_TO(sum, r);
+ }
+
+ switch (*p) {
+
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_BYTE:
+ sum += 1;
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ sum += 2;
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_UNIX_FD:
+ sum += 4;
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ sum += 8;
+ break;
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ if (n == 2) {
+ /* unary type () has fixed size of 1 */
+ r = 1;
+ } else {
+ char t[n-1];
+
+ memcpy(t, p + 1, n - 2);
+ t[n - 2] = 0;
+
+ r = bus_gvariant_get_size(t);
+ if (r < 0)
+ return r;
+ }
+
+ sum += r;
+ break;
+ }
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_ARRAY:
+ case SD_BUS_TYPE_VARIANT:
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown signature type");
+ }
+
+ p += n;
+ }
+
+ r = bus_gvariant_get_alignment(signature);
+ if (r < 0)
+ return r;
+
+ return ALIGN_TO(sum, r);
+}
+
+int bus_gvariant_get_alignment(const char *signature) {
+ size_t alignment = 1;
+ const char *p;
+ int r;
+
+ p = signature;
+ while (*p != 0 && alignment < 8) {
+ size_t n;
+ int a;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+
+ switch (*p) {
+
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+ a = 1;
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ a = 2;
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_UNIX_FD:
+ a = 4;
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ case SD_BUS_TYPE_VARIANT:
+ a = 8;
+ break;
+
+ case SD_BUS_TYPE_ARRAY: {
+ char t[n];
+
+ memcpy(t, p + 1, n - 1);
+ t[n - 1] = 0;
+
+ a = bus_gvariant_get_alignment(t);
+ break;
+ }
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ char t[n-1];
+
+ memcpy(t, p + 1, n - 2);
+ t[n - 2] = 0;
+
+ a = bus_gvariant_get_alignment(t);
+ break;
+ }
+
+ default:
+ assert_not_reached("Unknown signature type");
+ }
+
+ if (a < 0)
+ return a;
+
+ assert(a > 0 && a <= 8);
+ if ((size_t) a > alignment)
+ alignment = (size_t) a;
+
+ p += n;
+ }
+
+ return alignment;
+}
+
+int bus_gvariant_is_fixed_size(const char *signature) {
+ const char *p;
+ int r;
+
+ assert(signature);
+
+ p = signature;
+ while (*p != 0) {
+ size_t n;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+
+ switch (*p) {
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_ARRAY:
+ case SD_BUS_TYPE_VARIANT:
+ return 0;
+
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_UNIX_FD:
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ break;
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ char t[n-1];
+
+ memcpy(t, p + 1, n - 2);
+ t[n - 2] = 0;
+
+ r = bus_gvariant_is_fixed_size(t);
+ if (r <= 0)
+ return r;
+ break;
+ }
+
+ default:
+ assert_not_reached("Unknown signature type");
+ }
+
+ p += n;
+ }
+
+ return true;
+}
+
+size_t bus_gvariant_determine_word_size(size_t sz, size_t extra) {
+ if (sz + extra <= 0xFF)
+ return 1;
+ else if (sz + extra*2 <= 0xFFFF)
+ return 2;
+ else if (sz + extra*4 <= 0xFFFFFFFF)
+ return 4;
+ else
+ return 8;
+}
+
+size_t bus_gvariant_read_word_le(void *p, size_t sz) {
+ union {
+ uint16_t u16;
+ uint32_t u32;
+ uint64_t u64;
+ } x;
+
+ assert(p);
+
+ if (sz == 1)
+ return *(uint8_t*) p;
+
+ memcpy(&x, p, sz);
+
+ if (sz == 2)
+ return le16toh(x.u16);
+ else if (sz == 4)
+ return le32toh(x.u32);
+ else if (sz == 8)
+ return le64toh(x.u64);
+
+ assert_not_reached("unknown word width");
+}
+
+void bus_gvariant_write_word_le(void *p, size_t sz, size_t value) {
+ union {
+ uint16_t u16;
+ uint32_t u32;
+ uint64_t u64;
+ } x;
+
+ assert(p);
+ assert(sz == 8 || (value < (1ULL << (sz*8))));
+
+ if (sz == 1) {
+ *(uint8_t*) p = value;
+ return;
+ } else if (sz == 2)
+ x.u16 = htole16((uint16_t) value);
+ else if (sz == 4)
+ x.u32 = htole32((uint32_t) value);
+ else if (sz == 8)
+ x.u64 = htole64((uint64_t) value);
+ else
+ assert_not_reached("unknown word width");
+
+ memcpy(p, &x, sz);
+}
diff --git a/src/libsystemd/sd-bus/bus-gvariant.h b/src/libsystemd/sd-bus/bus-gvariant.h
new file mode 100644
index 0000000..644b5f4
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-gvariant.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "macro.h"
+
+int bus_gvariant_get_size(const char *signature) _pure_;
+int bus_gvariant_get_alignment(const char *signature) _pure_;
+int bus_gvariant_is_fixed_size(const char *signature) _pure_;
+
+size_t bus_gvariant_determine_word_size(size_t sz, size_t extra);
+void bus_gvariant_write_word_le(void *p, size_t sz, size_t value);
+size_t bus_gvariant_read_word_le(void *p, size_t sz);
diff --git a/src/libsystemd/sd-bus/bus-internal.c b/src/libsystemd/sd-bus/bus-internal.c
new file mode 100644
index 0000000..40acae2
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-internal.c
@@ -0,0 +1,342 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "hexdecoct.h"
+#include "string-util.h"
+
+bool object_path_is_valid(const char *p) {
+ const char *q;
+ bool slash;
+
+ if (!p)
+ return false;
+
+ if (p[0] != '/')
+ return false;
+
+ if (p[1] == 0)
+ return true;
+
+ for (slash = true, q = p+1; *q; q++)
+ if (*q == '/') {
+ if (slash)
+ return false;
+
+ slash = true;
+ } else {
+ bool good;
+
+ good =
+ (*q >= 'a' && *q <= 'z') ||
+ (*q >= 'A' && *q <= 'Z') ||
+ (*q >= '0' && *q <= '9') ||
+ *q == '_';
+
+ if (!good)
+ return false;
+
+ slash = false;
+ }
+
+ if (slash)
+ return false;
+
+ return true;
+}
+
+char* object_path_startswith(const char *a, const char *b) {
+ const char *p;
+
+ if (!object_path_is_valid(a) ||
+ !object_path_is_valid(b))
+ return NULL;
+
+ if (streq(b, "/"))
+ return (char*) a + 1;
+
+ p = startswith(a, b);
+ if (!p)
+ return NULL;
+
+ if (*p == 0)
+ return (char*) p;
+
+ if (*p == '/')
+ return (char*) p + 1;
+
+ return NULL;
+}
+
+bool interface_name_is_valid(const char *p) {
+ const char *q;
+ bool dot, found_dot = false;
+
+ if (isempty(p))
+ return false;
+
+ for (dot = true, q = p; *q; q++)
+ if (*q == '.') {
+ if (dot)
+ return false;
+
+ found_dot = dot = true;
+ } else {
+ bool good;
+
+ good =
+ (*q >= 'a' && *q <= 'z') ||
+ (*q >= 'A' && *q <= 'Z') ||
+ (!dot && *q >= '0' && *q <= '9') ||
+ *q == '_';
+
+ if (!good)
+ return false;
+
+ dot = false;
+ }
+
+ if (q - p > 255)
+ return false;
+
+ if (dot)
+ return false;
+
+ if (!found_dot)
+ return false;
+
+ return true;
+}
+
+bool service_name_is_valid(const char *p) {
+ const char *q;
+ bool dot, found_dot = false, unique;
+
+ if (isempty(p))
+ return false;
+
+ unique = p[0] == ':';
+
+ for (dot = true, q = unique ? p+1 : p; *q; q++)
+ if (*q == '.') {
+ if (dot)
+ return false;
+
+ found_dot = dot = true;
+ } else {
+ bool good;
+
+ good =
+ (*q >= 'a' && *q <= 'z') ||
+ (*q >= 'A' && *q <= 'Z') ||
+ ((!dot || unique) && *q >= '0' && *q <= '9') ||
+ IN_SET(*q, '_', '-');
+
+ if (!good)
+ return false;
+
+ dot = false;
+ }
+
+ if (q - p > 255)
+ return false;
+
+ if (dot)
+ return false;
+
+ if (!found_dot)
+ return false;
+
+ return true;
+}
+
+bool member_name_is_valid(const char *p) {
+ const char *q;
+
+ if (isempty(p))
+ return false;
+
+ for (q = p; *q; q++) {
+ bool good;
+
+ good =
+ (*q >= 'a' && *q <= 'z') ||
+ (*q >= 'A' && *q <= 'Z') ||
+ (*q >= '0' && *q <= '9') ||
+ *q == '_';
+
+ if (!good)
+ return false;
+ }
+
+ if (q - p > 255)
+ return false;
+
+ return true;
+}
+
+/*
+ * Complex pattern match
+ * This checks whether @a is a 'complex-prefix' of @b, or @b is a
+ * 'complex-prefix' of @a, based on strings that consist of labels with @c as
+ * spearator. This function returns true if:
+ * - both strings are equal
+ * - either is a prefix of the other and ends with @c
+ * The second rule makes sure that either string needs to be fully included in
+ * the other, and the string which is considered the prefix needs to end with a
+ * separator.
+ */
+static bool complex_pattern_check(char c, const char *a, const char *b) {
+ bool separator = false;
+
+ if (!a && !b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ for (;;) {
+ if (*a != *b)
+ return (separator && (*a == 0 || *b == 0));
+
+ if (*a == 0)
+ return true;
+
+ separator = *a == c;
+
+ a++, b++;
+ }
+}
+
+bool namespace_complex_pattern(const char *pattern, const char *value) {
+ return complex_pattern_check('.', pattern, value);
+}
+
+bool path_complex_pattern(const char *pattern, const char *value) {
+ return complex_pattern_check('/', pattern, value);
+}
+
+/*
+ * Simple pattern match
+ * This checks whether @a is a 'simple-prefix' of @b, based on strings that
+ * consist of labels with @c as separator. This function returns true, if:
+ * - if @a and @b are equal
+ * - if @a is a prefix of @b, and the first following character in @b (or the
+ * last character in @a) is @c
+ * The second rule basically makes sure that if @a is a prefix of @b, then @b
+ * must follow with a new label separated by @c. It cannot extend the label.
+ */
+static bool simple_pattern_check(char c, const char *a, const char *b) {
+ bool separator = false;
+
+ if (!a && !b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ for (;;) {
+ if (*a != *b)
+ return *a == 0 && (*b == c || separator);
+
+ if (*a == 0)
+ return true;
+
+ separator = *a == c;
+
+ a++, b++;
+ }
+}
+
+bool namespace_simple_pattern(const char *pattern, const char *value) {
+ return simple_pattern_check('.', pattern, value);
+}
+
+bool path_simple_pattern(const char *pattern, const char *value) {
+ return simple_pattern_check('/', pattern, value);
+}
+
+int bus_message_type_from_string(const char *s, uint8_t *u) {
+ if (streq(s, "signal"))
+ *u = SD_BUS_MESSAGE_SIGNAL;
+ else if (streq(s, "method_call"))
+ *u = SD_BUS_MESSAGE_METHOD_CALL;
+ else if (streq(s, "error"))
+ *u = SD_BUS_MESSAGE_METHOD_ERROR;
+ else if (streq(s, "method_return"))
+ *u = SD_BUS_MESSAGE_METHOD_RETURN;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+const char *bus_message_type_to_string(uint8_t u) {
+ if (u == SD_BUS_MESSAGE_SIGNAL)
+ return "signal";
+ else if (u == SD_BUS_MESSAGE_METHOD_CALL)
+ return "method_call";
+ else if (u == SD_BUS_MESSAGE_METHOD_ERROR)
+ return "error";
+ else if (u == SD_BUS_MESSAGE_METHOD_RETURN)
+ return "method_return";
+ else
+ return NULL;
+}
+
+char *bus_address_escape(const char *v) {
+ const char *a;
+ char *r, *b;
+
+ r = new(char, strlen(v)*3+1);
+ if (!r)
+ return NULL;
+
+ for (a = v, b = r; *a; a++) {
+
+ if ((*a >= '0' && *a <= '9') ||
+ (*a >= 'a' && *a <= 'z') ||
+ (*a >= 'A' && *a <= 'Z') ||
+ strchr("_-/.", *a))
+ *(b++) = *a;
+ else {
+ *(b++) = '%';
+ *(b++) = hexchar(*a >> 4);
+ *(b++) = hexchar(*a & 0xF);
+ }
+ }
+
+ *b = 0;
+ return r;
+}
+
+int bus_maybe_reply_error(sd_bus_message *m, int r, sd_bus_error *error) {
+ assert(m);
+
+ if (r < 0) {
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_CALL)
+ sd_bus_reply_method_errno(m, r, error);
+
+ } else if (sd_bus_error_is_set(error)) {
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_CALL)
+ sd_bus_reply_method_error(m, error);
+ } else
+ return r;
+
+ log_debug("Failed to process message type=%s sender=%s destination=%s path=%s interface=%s member=%s cookie=%" PRIu64 " reply_cookie=%" PRIu64 " signature=%s error-name=%s error-message=%s: %s",
+ bus_message_type_to_string(m->header->type),
+ strna(sd_bus_message_get_sender(m)),
+ strna(sd_bus_message_get_destination(m)),
+ strna(sd_bus_message_get_path(m)),
+ strna(sd_bus_message_get_interface(m)),
+ strna(sd_bus_message_get_member(m)),
+ BUS_MESSAGE_COOKIE(m),
+ m->reply_cookie,
+ strna(m->root_container.signature),
+ strna(m->error.name),
+ strna(m->error.message),
+ bus_error_message(error, r));
+
+ return 1;
+}
diff --git a/src/libsystemd/sd-bus/bus-internal.h b/src/libsystemd/sd-bus/bus-internal.h
new file mode 100644
index 0000000..f208b29
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-internal.h
@@ -0,0 +1,407 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <pthread.h>
+#include <sys/socket.h>
+
+#include "sd-bus.h"
+
+#include "bus-error.h"
+#include "bus-kernel.h"
+#include "bus-match.h"
+#include "def.h"
+#include "hashmap.h"
+#include "list.h"
+#include "prioq.h"
+#include "refcnt.h"
+#include "socket-util.h"
+#include "util.h"
+
+struct reply_callback {
+ sd_bus_message_handler_t callback;
+ usec_t timeout_usec; /* this is a relative timeout until we reach the BUS_HELLO state, and an absolute one right after */
+ uint64_t cookie;
+ unsigned prioq_idx;
+};
+
+struct filter_callback {
+ sd_bus_message_handler_t callback;
+
+ unsigned last_iteration;
+
+ LIST_FIELDS(struct filter_callback, callbacks);
+};
+
+struct match_callback {
+ sd_bus_message_handler_t callback;
+ sd_bus_message_handler_t install_callback;
+
+ sd_bus_slot *install_slot; /* The AddMatch() call */
+
+ unsigned last_iteration;
+
+ char *match_string;
+
+ struct bus_match_node *match_node;
+};
+
+struct node {
+ char *path;
+ struct node *parent;
+ LIST_HEAD(struct node, child);
+ LIST_FIELDS(struct node, siblings);
+
+ LIST_HEAD(struct node_callback, callbacks);
+ LIST_HEAD(struct node_vtable, vtables);
+ LIST_HEAD(struct node_enumerator, enumerators);
+ LIST_HEAD(struct node_object_manager, object_managers);
+};
+
+struct node_callback {
+ struct node *node;
+
+ bool is_fallback;
+ sd_bus_message_handler_t callback;
+
+ unsigned last_iteration;
+
+ LIST_FIELDS(struct node_callback, callbacks);
+};
+
+struct node_enumerator {
+ struct node *node;
+
+ sd_bus_node_enumerator_t callback;
+
+ unsigned last_iteration;
+
+ LIST_FIELDS(struct node_enumerator, enumerators);
+};
+
+struct node_object_manager {
+ struct node *node;
+
+ LIST_FIELDS(struct node_object_manager, object_managers);
+};
+
+struct node_vtable {
+ struct node *node;
+
+ char *interface;
+ bool is_fallback;
+ const sd_bus_vtable *vtable;
+ sd_bus_object_find_t find;
+
+ unsigned last_iteration;
+
+ LIST_FIELDS(struct node_vtable, vtables);
+};
+
+struct vtable_member {
+ const char *path;
+ const char *interface;
+ const char *member;
+ struct node_vtable *parent;
+ unsigned last_iteration;
+ const sd_bus_vtable *vtable;
+};
+
+typedef enum BusSlotType {
+ BUS_REPLY_CALLBACK,
+ BUS_FILTER_CALLBACK,
+ BUS_MATCH_CALLBACK,
+ BUS_NODE_CALLBACK,
+ BUS_NODE_ENUMERATOR,
+ BUS_NODE_VTABLE,
+ BUS_NODE_OBJECT_MANAGER,
+ _BUS_SLOT_INVALID = -1,
+} BusSlotType;
+
+struct sd_bus_slot {
+ unsigned n_ref;
+ sd_bus *bus;
+ void *userdata;
+ sd_bus_destroy_t destroy_callback;
+ BusSlotType type:5;
+
+ /* Slots can be "floating" or not. If they are not floating (the usual case) then they reference the bus object
+ * they are associated with. This means the bus object stays allocated at least as long as there is a slot
+ * around associated with it. If it is floating, then the slot's lifecycle is bound to the lifecycle of the
+ * bus: it will be disconnected from the bus when the bus is destroyed, and it keeping the slot reffed hence
+ * won't mean the bus stays reffed too. Internally this means the reference direction is reversed: floating
+ * slots objects are referenced by the bus object, and not vice versa. */
+ bool floating:1;
+
+ bool match_added:1;
+ char *description;
+
+ LIST_FIELDS(sd_bus_slot, slots);
+
+ union {
+ struct reply_callback reply_callback;
+ struct filter_callback filter_callback;
+ struct match_callback match_callback;
+ struct node_callback node_callback;
+ struct node_enumerator node_enumerator;
+ struct node_object_manager node_object_manager;
+ struct node_vtable node_vtable;
+ };
+};
+
+enum bus_state {
+ BUS_UNSET,
+ BUS_WATCH_BIND, /* waiting for the socket to appear via inotify */
+ BUS_OPENING, /* the kernel's connect() is still not ready */
+ BUS_AUTHENTICATING, /* we are currently in the "SASL" authorization phase of dbus */
+ BUS_HELLO, /* we are waiting for the Hello() response */
+ BUS_RUNNING,
+ BUS_CLOSING,
+ BUS_CLOSED,
+ _BUS_STATE_MAX,
+};
+
+static inline bool BUS_IS_OPEN(enum bus_state state) {
+ return state > BUS_UNSET && state < BUS_CLOSING;
+}
+
+enum bus_auth {
+ _BUS_AUTH_INVALID,
+ BUS_AUTH_EXTERNAL,
+ BUS_AUTH_ANONYMOUS
+};
+
+struct sd_bus {
+ /* We use atomic ref counting here since sd_bus_message
+ objects retain references to their originating sd_bus but
+ we want to allow them to be processed in a different
+ thread. We won't provide full thread safety, but only the
+ bare minimum that makes it possible to use sd_bus and
+ sd_bus_message objects independently and on different
+ threads as long as each object is used only once at the
+ same time. */
+ RefCount n_ref;
+
+ enum bus_state state;
+ int input_fd, output_fd;
+ int inotify_fd;
+ int message_version;
+ int message_endian;
+
+ bool can_fds:1;
+ bool bus_client:1;
+ bool ucred_valid:1;
+ bool is_server:1;
+ bool anonymous_auth:1;
+ bool prefer_readv:1;
+ bool prefer_writev:1;
+ bool match_callbacks_modified:1;
+ bool filter_callbacks_modified:1;
+ bool nodes_modified:1;
+ bool trusted:1;
+ bool manual_peer_interface:1;
+ bool is_system:1;
+ bool is_user:1;
+ bool allow_interactive_authorization:1;
+ bool exit_on_disconnect:1;
+ bool exited:1;
+ bool exit_triggered:1;
+ bool is_local:1;
+ bool watch_bind:1;
+ bool is_monitor:1;
+ bool accept_fd:1;
+ bool attach_timestamp:1;
+ bool connected_signal:1;
+ bool close_on_exit:1;
+
+ int use_memfd;
+
+ void *rbuffer;
+ size_t rbuffer_size;
+
+ sd_bus_message **rqueue;
+ unsigned rqueue_size;
+ size_t rqueue_allocated;
+
+ sd_bus_message **wqueue;
+ unsigned wqueue_size;
+ size_t windex;
+ size_t wqueue_allocated;
+
+ uint64_t cookie;
+
+ char *unique_name;
+ uint64_t unique_id;
+
+ struct bus_match_node match_callbacks;
+ Prioq *reply_callbacks_prioq;
+ OrderedHashmap *reply_callbacks;
+ LIST_HEAD(struct filter_callback, filter_callbacks);
+
+ Hashmap *nodes;
+ Hashmap *vtable_methods;
+ Hashmap *vtable_properties;
+
+ union sockaddr_union sockaddr;
+ socklen_t sockaddr_size;
+
+ char *machine;
+ pid_t nspid;
+
+ sd_id128_t server_id;
+
+ char *address;
+ unsigned address_index;
+
+ int last_connect_error;
+
+ enum bus_auth auth;
+ size_t auth_rbegin;
+ struct iovec auth_iovec[3];
+ unsigned auth_index;
+ char *auth_buffer;
+ usec_t auth_timeout;
+
+ struct ucred ucred;
+ char *label;
+ gid_t *groups;
+ size_t n_groups;
+
+ uint64_t creds_mask;
+
+ int *fds;
+ size_t n_fds;
+
+ char *exec_path;
+ char **exec_argv;
+
+ unsigned iteration_counter;
+
+ /* We do locking around the memfd cache, since we want to
+ * allow people to process a sd_bus_message in a different
+ * thread then it was generated on and free it there. Since
+ * adding something to the memfd cache might happen when a
+ * message is released, we hence need to protect this bit with
+ * a mutex. */
+ pthread_mutex_t memfd_cache_mutex;
+ struct memfd_cache memfd_cache[MEMFD_CACHE_MAX];
+ unsigned n_memfd_cache;
+
+ pid_t original_pid;
+ pid_t busexec_pid;
+
+ sd_event_source *input_io_event_source;
+ sd_event_source *output_io_event_source;
+ sd_event_source *time_event_source;
+ sd_event_source *quit_event_source;
+ sd_event_source *inotify_event_source;
+ sd_event *event;
+ int event_priority;
+
+ sd_bus_message *current_message;
+ sd_bus_slot *current_slot;
+ sd_bus_message_handler_t current_handler;
+ void *current_userdata;
+
+ sd_bus **default_bus_ptr;
+ pid_t tid;
+
+ char *description;
+ char *patch_sender;
+
+ sd_bus_track *track_queue;
+
+ LIST_HEAD(sd_bus_slot, slots);
+ LIST_HEAD(sd_bus_track, tracks);
+
+ int *inotify_watches;
+ size_t n_inotify_watches;
+
+ /* zero means use value specified by $SYSTEMD_BUS_TIMEOUT= environment variable or built-in default */
+ usec_t method_call_timeout;
+};
+
+/* For method calls we timeout at 25s, like in the D-Bus reference implementation */
+#define BUS_DEFAULT_TIMEOUT ((usec_t) (25 * USEC_PER_SEC))
+
+/* For the authentication phase we grant 90s, to provide extra room during boot, when RNGs and such are not filled up
+ * with enough entropy yet and might delay the boot */
+#define BUS_AUTH_TIMEOUT ((usec_t) DEFAULT_TIMEOUT_USEC)
+
+#define BUS_WQUEUE_MAX (192*1024)
+#define BUS_RQUEUE_MAX (192*1024)
+
+#define BUS_MESSAGE_SIZE_MAX (128*1024*1024)
+#define BUS_AUTH_SIZE_MAX (64*1024)
+
+#define BUS_CONTAINER_DEPTH 128
+
+/* Defined by the specification as maximum size of an array in bytes */
+#define BUS_ARRAY_MAX_SIZE 67108864
+
+#define BUS_FDS_MAX 1024
+
+#define BUS_EXEC_ARGV_MAX 256
+
+bool interface_name_is_valid(const char *p) _pure_;
+bool service_name_is_valid(const char *p) _pure_;
+bool member_name_is_valid(const char *p) _pure_;
+bool object_path_is_valid(const char *p) _pure_;
+char *object_path_startswith(const char *a, const char *b) _pure_;
+
+bool namespace_complex_pattern(const char *pattern, const char *value) _pure_;
+bool path_complex_pattern(const char *pattern, const char *value) _pure_;
+
+bool namespace_simple_pattern(const char *pattern, const char *value) _pure_;
+bool path_simple_pattern(const char *pattern, const char *value) _pure_;
+
+int bus_message_type_from_string(const char *s, uint8_t *u) _pure_;
+const char *bus_message_type_to_string(uint8_t u) _pure_;
+
+#define error_name_is_valid interface_name_is_valid
+
+sd_bus *bus_resolve(sd_bus *bus);
+
+int bus_ensure_running(sd_bus *bus);
+int bus_start_running(sd_bus *bus);
+int bus_next_address(sd_bus *bus);
+
+int bus_seal_synthetic_message(sd_bus *b, sd_bus_message *m);
+
+int bus_rqueue_make_room(sd_bus *bus);
+
+bool bus_pid_changed(sd_bus *bus);
+
+char *bus_address_escape(const char *v);
+
+int bus_attach_io_events(sd_bus *b);
+int bus_attach_inotify_event(sd_bus *b);
+
+void bus_close_inotify_fd(sd_bus *b);
+void bus_close_io_fds(sd_bus *b);
+
+#define OBJECT_PATH_FOREACH_PREFIX(prefix, path) \
+ for (char *_slash = ({ strcpy((prefix), (path)); streq((prefix), "/") ? NULL : strrchr((prefix), '/'); }) ; \
+ _slash && ((_slash[(_slash) == (prefix)] = 0), true); \
+ _slash = streq((prefix), "/") ? NULL : strrchr((prefix), '/'))
+
+/* If we are invoking callbacks of a bus object, ensure unreffing the
+ * bus from the callback doesn't destroy the object we are working on */
+#define BUS_DONT_DESTROY(bus) \
+ _cleanup_(sd_bus_unrefp) _unused_ sd_bus *_dont_destroy_##bus = sd_bus_ref(bus)
+
+int bus_set_address_system(sd_bus *bus);
+int bus_set_address_user(sd_bus *bus);
+int bus_set_address_system_remote(sd_bus *b, const char *host);
+int bus_set_address_system_machine(sd_bus *b, const char *machine);
+
+int bus_maybe_reply_error(sd_bus_message *m, int r, sd_bus_error *error);
+
+#define bus_assert_return(expr, r, error) \
+ do { \
+ if (!assert_log(expr, #expr)) \
+ return sd_bus_error_set_errno(error, r); \
+ } while (false)
+
+void bus_enter_closing(sd_bus *bus);
+
+void bus_set_state(sd_bus *bus, enum bus_state state);
diff --git a/src/libsystemd/sd-bus/bus-introspect.c b/src/libsystemd/sd-bus/bus-introspect.c
new file mode 100644
index 0000000..f623dd9
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-introspect.c
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio_ext.h>
+
+#include "bus-internal.h"
+#include "bus-introspect.h"
+#include "bus-protocol.h"
+#include "bus-signature.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "string-util.h"
+#include "util.h"
+
+int introspect_begin(struct introspect *i, bool trusted) {
+ assert(i);
+
+ zero(*i);
+ i->trusted = trusted;
+
+ i->f = open_memstream(&i->introspection, &i->size);
+ if (!i->f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(i->f, FSETLOCKING_BYCALLER);
+
+ fputs(BUS_INTROSPECT_DOCTYPE
+ "<node>\n", i->f);
+
+ return 0;
+}
+
+int introspect_write_default_interfaces(struct introspect *i, bool object_manager) {
+ assert(i);
+
+ fputs(BUS_INTROSPECT_INTERFACE_PEER
+ BUS_INTROSPECT_INTERFACE_INTROSPECTABLE
+ BUS_INTROSPECT_INTERFACE_PROPERTIES, i->f);
+
+ if (object_manager)
+ fputs(BUS_INTROSPECT_INTERFACE_OBJECT_MANAGER, i->f);
+
+ return 0;
+}
+
+int introspect_write_child_nodes(struct introspect *i, Set *s, const char *prefix) {
+ char *node;
+
+ assert(i);
+ assert(prefix);
+
+ while ((node = set_steal_first(s))) {
+ const char *e;
+
+ e = object_path_startswith(node, prefix);
+ if (e && e[0])
+ fprintf(i->f, " <node name=\"%s\"/>\n", e);
+
+ free(node);
+ }
+
+ return 0;
+}
+
+static void introspect_write_flags(struct introspect *i, int type, uint64_t flags) {
+ if (flags & SD_BUS_VTABLE_DEPRECATED)
+ fputs(" <annotation name=\"org.freedesktop.DBus.Deprecated\" value=\"true\"/>\n", i->f);
+
+ if (type == _SD_BUS_VTABLE_METHOD && (flags & SD_BUS_VTABLE_METHOD_NO_REPLY))
+ fputs(" <annotation name=\"org.freedesktop.DBus.Method.NoReply\" value=\"true\"/>\n", i->f);
+
+ if (IN_SET(type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY)) {
+ if (flags & SD_BUS_VTABLE_PROPERTY_EXPLICIT)
+ fputs(" <annotation name=\"org.freedesktop.systemd1.Explicit\" value=\"true\"/>\n", i->f);
+
+ if (flags & SD_BUS_VTABLE_PROPERTY_CONST)
+ fputs(" <annotation name=\"org.freedesktop.DBus.Property.EmitsChangedSignal\" value=\"const\"/>\n", i->f);
+ else if (flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)
+ fputs(" <annotation name=\"org.freedesktop.DBus.Property.EmitsChangedSignal\" value=\"invalidates\"/>\n", i->f);
+ else if (!(flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE))
+ fputs(" <annotation name=\"org.freedesktop.DBus.Property.EmitsChangedSignal\" value=\"false\"/>\n", i->f);
+ }
+
+ if (!i->trusted &&
+ IN_SET(type, _SD_BUS_VTABLE_METHOD, _SD_BUS_VTABLE_WRITABLE_PROPERTY) &&
+ !(flags & SD_BUS_VTABLE_UNPRIVILEGED))
+ fputs(" <annotation name=\"org.freedesktop.systemd1.Privileged\" value=\"true\"/>\n", i->f);
+}
+
+static int introspect_write_arguments(struct introspect *i, const char *signature, const char *direction) {
+ int r;
+
+ for (;;) {
+ size_t l;
+
+ if (!*signature)
+ return 0;
+
+ r = signature_element_length(signature, &l);
+ if (r < 0)
+ return r;
+
+ fprintf(i->f, " <arg type=\"%.*s\"", (int) l, signature);
+
+ if (direction)
+ fprintf(i->f, " direction=\"%s\"/>\n", direction);
+ else
+ fputs("/>\n", i->f);
+
+ signature += l;
+ }
+}
+
+int introspect_write_interface(struct introspect *i, const sd_bus_vtable *v) {
+ assert(i);
+ assert(v);
+
+ for (; v->type != _SD_BUS_VTABLE_END; v++) {
+
+ /* Ignore methods, signals and properties that are
+ * marked "hidden", but do show the interface
+ * itself */
+
+ if (v->type != _SD_BUS_VTABLE_START && (v->flags & SD_BUS_VTABLE_HIDDEN))
+ continue;
+
+ switch (v->type) {
+
+ case _SD_BUS_VTABLE_START:
+ if (v->flags & SD_BUS_VTABLE_DEPRECATED)
+ fputs(" <annotation name=\"org.freedesktop.DBus.Deprecated\" value=\"true\"/>\n", i->f);
+ break;
+
+ case _SD_BUS_VTABLE_METHOD:
+ fprintf(i->f, " <method name=\"%s\">\n", v->x.method.member);
+ introspect_write_arguments(i, strempty(v->x.method.signature), "in");
+ introspect_write_arguments(i, strempty(v->x.method.result), "out");
+ introspect_write_flags(i, v->type, v->flags);
+ fputs(" </method>\n", i->f);
+ break;
+
+ case _SD_BUS_VTABLE_PROPERTY:
+ case _SD_BUS_VTABLE_WRITABLE_PROPERTY:
+ fprintf(i->f, " <property name=\"%s\" type=\"%s\" access=\"%s\">\n",
+ v->x.property.member,
+ v->x.property.signature,
+ v->type == _SD_BUS_VTABLE_WRITABLE_PROPERTY ? "readwrite" : "read");
+ introspect_write_flags(i, v->type, v->flags);
+ fputs(" </property>\n", i->f);
+ break;
+
+ case _SD_BUS_VTABLE_SIGNAL:
+ fprintf(i->f, " <signal name=\"%s\">\n", v->x.signal.member);
+ introspect_write_arguments(i, strempty(v->x.signal.signature), NULL);
+ introspect_write_flags(i, v->type, v->flags);
+ fputs(" </signal>\n", i->f);
+ break;
+ }
+
+ }
+
+ return 0;
+}
+
+int introspect_finish(struct introspect *i, sd_bus *bus, sd_bus_message *m, sd_bus_message **reply) {
+ sd_bus_message *q;
+ int r;
+
+ assert(i);
+ assert(m);
+ assert(reply);
+
+ fputs("</node>\n", i->f);
+
+ r = fflush_and_check(i->f);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(m, &q);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(q, "s", i->introspection);
+ if (r < 0) {
+ sd_bus_message_unref(q);
+ return r;
+ }
+
+ *reply = q;
+ return 0;
+}
+
+void introspect_free(struct introspect *i) {
+ assert(i);
+
+ safe_fclose(i->f);
+
+ free(i->introspection);
+ zero(*i);
+}
diff --git a/src/libsystemd/sd-bus/bus-introspect.h b/src/libsystemd/sd-bus/bus-introspect.h
new file mode 100644
index 0000000..bb2dd7e
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-introspect.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+#include "sd-bus.h"
+
+#include "set.h"
+
+struct introspect {
+ FILE *f;
+ char *introspection;
+ size_t size;
+ bool trusted;
+};
+
+int introspect_begin(struct introspect *i, bool trusted);
+int introspect_write_default_interfaces(struct introspect *i, bool object_manager);
+int introspect_write_child_nodes(struct introspect *i, Set *s, const char *prefix);
+int introspect_write_interface(struct introspect *i, const sd_bus_vtable *v);
+int introspect_finish(struct introspect *i, sd_bus *bus, sd_bus_message *m, sd_bus_message **reply);
+void introspect_free(struct introspect *i);
diff --git a/src/libsystemd/sd-bus/bus-kernel.c b/src/libsystemd/sd-bus/bus-kernel.c
new file mode 100644
index 0000000..34c8a9f
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-kernel.c
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+#include <valgrind/memcheck.h>
+#endif
+
+#include <fcntl.h>
+#include <malloc.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the POSIX
+ * version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-kernel.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "capability-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "memfd-util.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+void close_and_munmap(int fd, void *address, size_t size) {
+ if (size > 0)
+ assert_se(munmap(address, PAGE_ALIGN(size)) >= 0);
+
+ safe_close(fd);
+}
+
+void bus_flush_memfd(sd_bus *b) {
+ unsigned i;
+
+ assert(b);
+
+ for (i = 0; i < b->n_memfd_cache; i++)
+ close_and_munmap(b->memfd_cache[i].fd, b->memfd_cache[i].address, b->memfd_cache[i].mapped);
+}
diff --git a/src/libsystemd/sd-bus/bus-kernel.h b/src/libsystemd/sd-bus/bus-kernel.h
new file mode 100644
index 0000000..fbbc43f
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-kernel.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#define MEMFD_CACHE_MAX 32
+
+/* When we cache a memfd block for reuse, we will truncate blocks
+ * longer than this in order not to keep too much data around. */
+#define MEMFD_CACHE_ITEM_SIZE_MAX (128*1024)
+
+/* This determines at which minimum size we prefer sending memfds over
+ * sending vectors */
+#define MEMFD_MIN_SIZE (512*1024)
+
+struct memfd_cache {
+ int fd;
+ void *address;
+ size_t mapped;
+ size_t allocated;
+};
+
+void close_and_munmap(int fd, void *address, size_t size);
+void bus_flush_memfd(sd_bus *bus);
diff --git a/src/libsystemd/sd-bus/bus-match.c b/src/libsystemd/sd-bus/bus-match.c
new file mode 100644
index 0000000..9642de1
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-match.c
@@ -0,0 +1,1097 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio_ext.h>
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-match.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "string-util.h"
+#include "strv.h"
+
+/* Example:
+ *
+ * A: type=signal,sender=foo,interface=bar
+ * B: type=signal,sender=quux,interface=fips
+ * C: type=signal,sender=quux,interface=waldo
+ * D: type=signal,member=test
+ * E: sender=miau
+ * F: type=signal
+ * G: type=signal
+ *
+ * results in this tree:
+ *
+ * BUS_MATCH_ROOT
+ * + BUS_MATCH_MESSAGE_TYPE
+ * | ` BUS_MATCH_VALUE: value == signal
+ * | + DBUS_MATCH_SENDER
+ * | | + BUS_MATCH_VALUE: value == foo
+ * | | | ` DBUS_MATCH_INTERFACE
+ * | | | ` BUS_MATCH_VALUE: value == bar
+ * | | | ` BUS_MATCH_LEAF: A
+ * | | ` BUS_MATCH_VALUE: value == quux
+ * | | ` DBUS_MATCH_INTERFACE
+ * | | | BUS_MATCH_VALUE: value == fips
+ * | | | ` BUS_MATCH_LEAF: B
+ * | | ` BUS_MATCH_VALUE: value == waldo
+ * | | ` BUS_MATCH_LEAF: C
+ * | + DBUS_MATCH_MEMBER
+ * | | ` BUS_MATCH_VALUE: value == test
+ * | | ` BUS_MATCH_LEAF: D
+ * | + BUS_MATCH_LEAF: F
+ * | ` BUS_MATCH_LEAF: G
+ * ` BUS_MATCH_SENDER
+ * ` BUS_MATCH_VALUE: value == miau
+ * ` BUS_MATCH_LEAF: E
+ */
+
+static bool BUS_MATCH_IS_COMPARE(enum bus_match_node_type t) {
+ return t >= BUS_MATCH_SENDER && t <= BUS_MATCH_ARG_HAS_LAST;
+}
+
+static bool BUS_MATCH_CAN_HASH(enum bus_match_node_type t) {
+ return (t >= BUS_MATCH_MESSAGE_TYPE && t <= BUS_MATCH_PATH) ||
+ (t >= BUS_MATCH_ARG && t <= BUS_MATCH_ARG_LAST) ||
+ (t >= BUS_MATCH_ARG_HAS && t <= BUS_MATCH_ARG_HAS_LAST);
+}
+
+static void bus_match_node_free(struct bus_match_node *node) {
+ assert(node);
+ assert(node->parent);
+ assert(!node->child);
+ assert(node->type != BUS_MATCH_ROOT);
+ assert(node->type < _BUS_MATCH_NODE_TYPE_MAX);
+
+ if (node->parent->child) {
+ /* We are apparently linked into the parent's child
+ * list. Let's remove us from there. */
+ if (node->prev) {
+ assert(node->prev->next == node);
+ node->prev->next = node->next;
+ } else {
+ assert(node->parent->child == node);
+ node->parent->child = node->next;
+ }
+
+ if (node->next)
+ node->next->prev = node->prev;
+ }
+
+ if (node->type == BUS_MATCH_VALUE) {
+ /* We might be in the parent's hash table, so clean
+ * this up */
+
+ if (node->parent->type == BUS_MATCH_MESSAGE_TYPE)
+ hashmap_remove(node->parent->compare.children, UINT_TO_PTR(node->value.u8));
+ else if (BUS_MATCH_CAN_HASH(node->parent->type) && node->value.str)
+ hashmap_remove(node->parent->compare.children, node->value.str);
+
+ free(node->value.str);
+ }
+
+ if (BUS_MATCH_IS_COMPARE(node->type)) {
+ assert(hashmap_isempty(node->compare.children));
+ hashmap_free(node->compare.children);
+ }
+
+ free(node);
+}
+
+static bool bus_match_node_maybe_free(struct bus_match_node *node) {
+ assert(node);
+
+ if (node->type == BUS_MATCH_ROOT)
+ return false;
+
+ if (node->child)
+ return false;
+
+ if (BUS_MATCH_IS_COMPARE(node->type) && !hashmap_isempty(node->compare.children))
+ return true;
+
+ bus_match_node_free(node);
+ return true;
+}
+
+static bool value_node_test(
+ struct bus_match_node *node,
+ enum bus_match_node_type parent_type,
+ uint8_t value_u8,
+ const char *value_str,
+ char **value_strv,
+ sd_bus_message *m) {
+
+ assert(node);
+ assert(node->type == BUS_MATCH_VALUE);
+
+ /* Tests parameters against this value node, doing prefix
+ * magic and stuff. */
+
+ switch (parent_type) {
+
+ case BUS_MATCH_MESSAGE_TYPE:
+ return node->value.u8 == value_u8;
+
+ case BUS_MATCH_SENDER:
+ if (streq_ptr(node->value.str, value_str))
+ return true;
+
+ if (m->creds.mask & SD_BUS_CREDS_WELL_KNOWN_NAMES) {
+ char **i;
+
+ /* on kdbus we have the well known names list
+ * in the credentials, let's make use of that
+ * for an accurate match */
+
+ STRV_FOREACH(i, m->creds.well_known_names)
+ if (streq_ptr(node->value.str, *i))
+ return true;
+
+ } else {
+
+ /* If we don't have kdbus, we don't know the
+ * well-known names of the senders. In that,
+ * let's just hope that dbus-daemon doesn't
+ * send us stuff we didn't want. */
+
+ if (node->value.str[0] != ':' && value_str && value_str[0] == ':')
+ return true;
+ }
+
+ return false;
+
+ case BUS_MATCH_DESTINATION:
+ case BUS_MATCH_INTERFACE:
+ case BUS_MATCH_MEMBER:
+ case BUS_MATCH_PATH:
+ case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST:
+
+ if (value_str)
+ return streq_ptr(node->value.str, value_str);
+
+ return false;
+
+ case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST: {
+ char **i;
+
+ STRV_FOREACH(i, value_strv)
+ if (streq_ptr(node->value.str, *i))
+ return true;
+
+ return false;
+ }
+
+ case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST:
+ if (value_str)
+ return namespace_simple_pattern(node->value.str, value_str);
+
+ return false;
+
+ case BUS_MATCH_PATH_NAMESPACE:
+ return path_simple_pattern(node->value.str, value_str);
+
+ case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST:
+ if (value_str)
+ return path_complex_pattern(node->value.str, value_str);
+
+ return false;
+
+ default:
+ assert_not_reached("Invalid node type");
+ }
+}
+
+static bool value_node_same(
+ struct bus_match_node *node,
+ enum bus_match_node_type parent_type,
+ uint8_t value_u8,
+ const char *value_str) {
+
+ /* Tests parameters against this value node, not doing prefix
+ * magic and stuff, i.e. this one actually compares the match
+ * itself. */
+
+ assert(node);
+ assert(node->type == BUS_MATCH_VALUE);
+
+ switch (parent_type) {
+
+ case BUS_MATCH_MESSAGE_TYPE:
+ return node->value.u8 == value_u8;
+
+ case BUS_MATCH_SENDER:
+ case BUS_MATCH_DESTINATION:
+ case BUS_MATCH_INTERFACE:
+ case BUS_MATCH_MEMBER:
+ case BUS_MATCH_PATH:
+ case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST:
+ case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST:
+ case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST:
+ case BUS_MATCH_PATH_NAMESPACE:
+ case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST:
+ return streq(node->value.str, value_str);
+
+ default:
+ assert_not_reached("Invalid node type");
+ }
+}
+
+int bus_match_run(
+ sd_bus *bus,
+ struct bus_match_node *node,
+ sd_bus_message *m) {
+
+ _cleanup_strv_free_ char **test_strv = NULL;
+ const char *test_str = NULL;
+ uint8_t test_u8 = 0;
+ int r;
+
+ assert(m);
+
+ if (!node)
+ return 0;
+
+ if (bus && bus->match_callbacks_modified)
+ return 0;
+
+ /* Not these special semantics: when traversing the tree we
+ * usually let bus_match_run() when called for a node
+ * recursively invoke bus_match_run(). There's are two
+ * exceptions here though, which are BUS_NODE_ROOT (which
+ * cannot have a sibling), and BUS_NODE_VALUE (whose siblings
+ * are invoked anyway by its parent. */
+
+ switch (node->type) {
+
+ case BUS_MATCH_ROOT:
+
+ /* Run all children. Since we cannot have any siblings
+ * we won't call any. The children of the root node
+ * are compares or leaves, they will automatically
+ * call their siblings. */
+ return bus_match_run(bus, node->child, m);
+
+ case BUS_MATCH_VALUE:
+
+ /* Run all children. We don't execute any siblings, we
+ * assume our caller does that. The children of value
+ * nodes are compares or leaves, they will
+ * automatically call their siblings */
+
+ assert(node->child);
+ return bus_match_run(bus, node->child, m);
+
+ case BUS_MATCH_LEAF:
+
+ if (bus) {
+ if (node->leaf.callback->last_iteration == bus->iteration_counter)
+ return 0;
+
+ node->leaf.callback->last_iteration = bus->iteration_counter;
+ }
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ /* Run the callback. And then invoke siblings. */
+ if (node->leaf.callback->callback) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ sd_bus_slot *slot;
+
+ slot = container_of(node->leaf.callback, sd_bus_slot, match_callback);
+ if (bus) {
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = node->leaf.callback->callback;
+ bus->current_userdata = slot->userdata;
+ }
+ r = node->leaf.callback->callback(m, slot->userdata, &error_buffer);
+ if (bus) {
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+ }
+
+ r = bus_maybe_reply_error(m, r, &error_buffer);
+ if (r != 0)
+ return r;
+
+ if (bus && bus->match_callbacks_modified)
+ return 0;
+ }
+
+ return bus_match_run(bus, node->next, m);
+
+ case BUS_MATCH_MESSAGE_TYPE:
+ test_u8 = m->header->type;
+ break;
+
+ case BUS_MATCH_SENDER:
+ test_str = m->sender;
+ /* FIXME: resolve test_str from a well-known to a unique name first */
+ break;
+
+ case BUS_MATCH_DESTINATION:
+ test_str = m->destination;
+ break;
+
+ case BUS_MATCH_INTERFACE:
+ test_str = m->interface;
+ break;
+
+ case BUS_MATCH_MEMBER:
+ test_str = m->member;
+ break;
+
+ case BUS_MATCH_PATH:
+ case BUS_MATCH_PATH_NAMESPACE:
+ test_str = m->path;
+ break;
+
+ case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST:
+ (void) bus_message_get_arg(m, node->type - BUS_MATCH_ARG, &test_str);
+ break;
+
+ case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST:
+ (void) bus_message_get_arg(m, node->type - BUS_MATCH_ARG_PATH, &test_str);
+ break;
+
+ case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST:
+ (void) bus_message_get_arg(m, node->type - BUS_MATCH_ARG_NAMESPACE, &test_str);
+ break;
+
+ case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST:
+ (void) bus_message_get_arg_strv(m, node->type - BUS_MATCH_ARG_HAS, &test_strv);
+ break;
+
+ default:
+ assert_not_reached("Unknown match type.");
+ }
+
+ if (BUS_MATCH_CAN_HASH(node->type)) {
+ struct bus_match_node *found;
+
+ /* Lookup via hash table, nice! So let's jump directly. */
+
+ if (test_str)
+ found = hashmap_get(node->compare.children, test_str);
+ else if (test_strv) {
+ char **i;
+
+ STRV_FOREACH(i, test_strv) {
+ found = hashmap_get(node->compare.children, *i);
+ if (found) {
+ r = bus_match_run(bus, found, m);
+ if (r != 0)
+ return r;
+ }
+ }
+
+ found = NULL;
+ } else if (node->type == BUS_MATCH_MESSAGE_TYPE)
+ found = hashmap_get(node->compare.children, UINT_TO_PTR(test_u8));
+ else
+ found = NULL;
+
+ if (found) {
+ r = bus_match_run(bus, found, m);
+ if (r != 0)
+ return r;
+ }
+ } else {
+ struct bus_match_node *c;
+
+ /* No hash table, so let's iterate manually... */
+
+ for (c = node->child; c; c = c->next) {
+ if (!value_node_test(c, node->type, test_u8, test_str, test_strv, m))
+ continue;
+
+ r = bus_match_run(bus, c, m);
+ if (r != 0)
+ return r;
+
+ if (bus && bus->match_callbacks_modified)
+ return 0;
+ }
+ }
+
+ if (bus && bus->match_callbacks_modified)
+ return 0;
+
+ /* And now, let's invoke our siblings */
+ return bus_match_run(bus, node->next, m);
+}
+
+static int bus_match_add_compare_value(
+ struct bus_match_node *where,
+ enum bus_match_node_type t,
+ uint8_t value_u8,
+ const char *value_str,
+ struct bus_match_node **ret) {
+
+ struct bus_match_node *c = NULL, *n = NULL;
+ int r;
+
+ assert(where);
+ assert(IN_SET(where->type, BUS_MATCH_ROOT, BUS_MATCH_VALUE));
+ assert(BUS_MATCH_IS_COMPARE(t));
+ assert(ret);
+
+ for (c = where->child; c && c->type != t; c = c->next)
+ ;
+
+ if (c) {
+ /* Comparison node already exists? Then let's see if
+ * the value node exists too. */
+
+ if (t == BUS_MATCH_MESSAGE_TYPE)
+ n = hashmap_get(c->compare.children, UINT_TO_PTR(value_u8));
+ else if (BUS_MATCH_CAN_HASH(t))
+ n = hashmap_get(c->compare.children, value_str);
+ else {
+ for (n = c->child; n && !value_node_same(n, t, value_u8, value_str); n = n->next)
+ ;
+ }
+
+ if (n) {
+ *ret = n;
+ return 0;
+ }
+ } else {
+ /* Comparison node, doesn't exist yet? Then let's
+ * create it. */
+
+ c = new0(struct bus_match_node, 1);
+ if (!c) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ c->type = t;
+ c->parent = where;
+ c->next = where->child;
+ if (c->next)
+ c->next->prev = c;
+ where->child = c;
+
+ if (t == BUS_MATCH_MESSAGE_TYPE) {
+ c->compare.children = hashmap_new(NULL);
+ if (!c->compare.children) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ } else if (BUS_MATCH_CAN_HASH(t)) {
+ c->compare.children = hashmap_new(&string_hash_ops);
+ if (!c->compare.children) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+ }
+
+ n = new0(struct bus_match_node, 1);
+ if (!n) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n->type = BUS_MATCH_VALUE;
+ n->value.u8 = value_u8;
+ if (value_str) {
+ n->value.str = strdup(value_str);
+ if (!n->value.str) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ n->parent = c;
+ if (c->compare.children) {
+
+ if (t == BUS_MATCH_MESSAGE_TYPE)
+ r = hashmap_put(c->compare.children, UINT_TO_PTR(value_u8), n);
+ else
+ r = hashmap_put(c->compare.children, n->value.str, n);
+
+ if (r < 0)
+ goto fail;
+ } else {
+ n->next = c->child;
+ if (n->next)
+ n->next->prev = n;
+ c->child = n;
+ }
+
+ *ret = n;
+ return 1;
+
+fail:
+ if (c)
+ bus_match_node_maybe_free(c);
+
+ if (n) {
+ free(n->value.str);
+ free(n);
+ }
+
+ return r;
+}
+
+static int bus_match_add_leaf(
+ struct bus_match_node *where,
+ struct match_callback *callback) {
+
+ struct bus_match_node *n;
+
+ assert(where);
+ assert(IN_SET(where->type, BUS_MATCH_ROOT, BUS_MATCH_VALUE));
+ assert(callback);
+
+ n = new0(struct bus_match_node, 1);
+ if (!n)
+ return -ENOMEM;
+
+ n->type = BUS_MATCH_LEAF;
+ n->parent = where;
+ n->next = where->child;
+ if (n->next)
+ n->next->prev = n;
+
+ n->leaf.callback = callback;
+ callback->match_node = n;
+
+ where->child = n;
+
+ return 1;
+}
+
+enum bus_match_node_type bus_match_node_type_from_string(const char *k, size_t n) {
+ assert(k);
+
+ if (n == 4 && startswith(k, "type"))
+ return BUS_MATCH_MESSAGE_TYPE;
+ if (n == 6 && startswith(k, "sender"))
+ return BUS_MATCH_SENDER;
+ if (n == 11 && startswith(k, "destination"))
+ return BUS_MATCH_DESTINATION;
+ if (n == 9 && startswith(k, "interface"))
+ return BUS_MATCH_INTERFACE;
+ if (n == 6 && startswith(k, "member"))
+ return BUS_MATCH_MEMBER;
+ if (n == 4 && startswith(k, "path"))
+ return BUS_MATCH_PATH;
+ if (n == 14 && startswith(k, "path_namespace"))
+ return BUS_MATCH_PATH_NAMESPACE;
+
+ if (n == 4 && startswith(k, "arg")) {
+ int j;
+
+ j = undecchar(k[3]);
+ if (j < 0)
+ return -EINVAL;
+
+ return BUS_MATCH_ARG + j;
+ }
+
+ if (n == 5 && startswith(k, "arg")) {
+ int a, b;
+ enum bus_match_node_type t;
+
+ a = undecchar(k[3]);
+ b = undecchar(k[4]);
+ if (a <= 0 || b < 0)
+ return -EINVAL;
+
+ t = BUS_MATCH_ARG + a * 10 + b;
+ if (t > BUS_MATCH_ARG_LAST)
+ return -EINVAL;
+
+ return t;
+ }
+
+ if (n == 8 && startswith(k, "arg") && startswith(k + 4, "path")) {
+ int j;
+
+ j = undecchar(k[3]);
+ if (j < 0)
+ return -EINVAL;
+
+ return BUS_MATCH_ARG_PATH + j;
+ }
+
+ if (n == 9 && startswith(k, "arg") && startswith(k + 5, "path")) {
+ enum bus_match_node_type t;
+ int a, b;
+
+ a = undecchar(k[3]);
+ b = undecchar(k[4]);
+ if (a <= 0 || b < 0)
+ return -EINVAL;
+
+ t = BUS_MATCH_ARG_PATH + a * 10 + b;
+ if (t > BUS_MATCH_ARG_PATH_LAST)
+ return -EINVAL;
+
+ return t;
+ }
+
+ if (n == 13 && startswith(k, "arg") && startswith(k + 4, "namespace")) {
+ int j;
+
+ j = undecchar(k[3]);
+ if (j < 0)
+ return -EINVAL;
+
+ return BUS_MATCH_ARG_NAMESPACE + j;
+ }
+
+ if (n == 14 && startswith(k, "arg") && startswith(k + 5, "namespace")) {
+ enum bus_match_node_type t;
+ int a, b;
+
+ a = undecchar(k[3]);
+ b = undecchar(k[4]);
+ if (a <= 0 || b < 0)
+ return -EINVAL;
+
+ t = BUS_MATCH_ARG_NAMESPACE + a * 10 + b;
+ if (t > BUS_MATCH_ARG_NAMESPACE_LAST)
+ return -EINVAL;
+
+ return t;
+ }
+
+ if (n == 7 && startswith(k, "arg") && startswith(k + 4, "has")) {
+ int j;
+
+ j = undecchar(k[3]);
+ if (j < 0)
+ return -EINVAL;
+
+ return BUS_MATCH_ARG_HAS + j;
+ }
+
+ if (n == 8 && startswith(k, "arg") && startswith(k + 5, "has")) {
+ enum bus_match_node_type t;
+ int a, b;
+
+ a = undecchar(k[3]);
+ b = undecchar(k[4]);
+ if (a <= 0 || b < 0)
+ return -EINVAL;
+
+ t = BUS_MATCH_ARG_HAS + a * 10 + b;
+ if (t > BUS_MATCH_ARG_HAS_LAST)
+ return -EINVAL;
+
+ return t;
+ }
+
+ return -EINVAL;
+}
+
+static int match_component_compare(const struct bus_match_component *a, const struct bus_match_component *b) {
+ return CMP(a->type, b->type);
+}
+
+void bus_match_parse_free(struct bus_match_component *components, unsigned n_components) {
+ unsigned i;
+
+ for (i = 0; i < n_components; i++)
+ free(components[i].value_str);
+
+ free(components);
+}
+
+int bus_match_parse(
+ const char *match,
+ struct bus_match_component **_components,
+ unsigned *_n_components) {
+
+ const char *p = match;
+ struct bus_match_component *components = NULL;
+ size_t components_allocated = 0;
+ unsigned n_components = 0, i;
+ _cleanup_free_ char *value = NULL;
+ int r;
+
+ assert(match);
+ assert(_components);
+ assert(_n_components);
+
+ while (*p != 0) {
+ const char *eq, *q;
+ enum bus_match_node_type t;
+ unsigned j = 0;
+ size_t value_allocated = 0;
+ bool escaped = false, quoted;
+ uint8_t u;
+
+ /* Avahi's match rules appear to include whitespace, skip over it */
+ p += strspn(p, " ");
+
+ eq = strchr(p, '=');
+ if (!eq)
+ return -EINVAL;
+
+ t = bus_match_node_type_from_string(p, eq - p);
+ if (t < 0)
+ return -EINVAL;
+
+ quoted = eq[1] == '\'';
+
+ for (q = eq + 1 + quoted;; q++) {
+
+ if (*q == 0) {
+
+ if (quoted) {
+ r = -EINVAL;
+ goto fail;
+ } else {
+ if (value)
+ value[j] = 0;
+ break;
+ }
+ }
+
+ if (!escaped) {
+ if (*q == '\\') {
+ escaped = true;
+ continue;
+ }
+
+ if (quoted) {
+ if (*q == '\'') {
+ if (value)
+ value[j] = 0;
+ break;
+ }
+ } else {
+ if (*q == ',') {
+ if (value)
+ value[j] = 0;
+
+ break;
+ }
+ }
+ }
+
+ if (!GREEDY_REALLOC(value, value_allocated, j + 2)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ value[j++] = *q;
+ escaped = false;
+ }
+
+ if (!value) {
+ value = strdup("");
+ if (!value) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (t == BUS_MATCH_MESSAGE_TYPE) {
+ r = bus_message_type_from_string(value, &u);
+ if (r < 0)
+ goto fail;
+
+ value = mfree(value);
+ } else
+ u = 0;
+
+ if (!GREEDY_REALLOC(components, components_allocated, n_components + 1)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ components[n_components].type = t;
+ components[n_components].value_str = TAKE_PTR(value);
+ components[n_components].value_u8 = u;
+ n_components++;
+
+ if (q[quoted] == 0)
+ break;
+
+ if (q[quoted] != ',') {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ p = q + 1 + quoted;
+ }
+
+ /* Order the whole thing, so that we always generate the same tree */
+ typesafe_qsort(components, n_components, match_component_compare);
+
+ /* Check for duplicates */
+ for (i = 0; i+1 < n_components; i++)
+ if (components[i].type == components[i+1].type) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ *_components = components;
+ *_n_components = n_components;
+
+ return 0;
+
+fail:
+ bus_match_parse_free(components, n_components);
+ return r;
+}
+
+char *bus_match_to_string(struct bus_match_component *components, unsigned n_components) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char *buffer = NULL;
+ size_t size = 0;
+ unsigned i;
+ int r;
+
+ if (n_components <= 0)
+ return strdup("");
+
+ assert(components);
+
+ f = open_memstream(&buffer, &size);
+ if (!f)
+ return NULL;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ for (i = 0; i < n_components; i++) {
+ char buf[32];
+
+ if (i != 0)
+ fputc(',', f);
+
+ fputs(bus_match_node_type_to_string(components[i].type, buf, sizeof(buf)), f);
+ fputc('=', f);
+ fputc('\'', f);
+
+ if (components[i].type == BUS_MATCH_MESSAGE_TYPE)
+ fputs(bus_message_type_to_string(components[i].value_u8), f);
+ else
+ fputs(components[i].value_str, f);
+
+ fputc('\'', f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return NULL;
+
+ return buffer;
+}
+
+int bus_match_add(
+ struct bus_match_node *root,
+ struct bus_match_component *components,
+ unsigned n_components,
+ struct match_callback *callback) {
+
+ unsigned i;
+ struct bus_match_node *n;
+ int r;
+
+ assert(root);
+ assert(callback);
+
+ n = root;
+ for (i = 0; i < n_components; i++) {
+ r = bus_match_add_compare_value(
+ n, components[i].type,
+ components[i].value_u8, components[i].value_str, &n);
+ if (r < 0)
+ return r;
+ }
+
+ return bus_match_add_leaf(n, callback);
+}
+
+int bus_match_remove(
+ struct bus_match_node *root,
+ struct match_callback *callback) {
+
+ struct bus_match_node *node, *pp;
+
+ assert(root);
+ assert(callback);
+
+ node = callback->match_node;
+ if (!node)
+ return 0;
+
+ assert(node->type == BUS_MATCH_LEAF);
+
+ callback->match_node = NULL;
+
+ /* Free the leaf */
+ pp = node->parent;
+ bus_match_node_free(node);
+
+ /* Prune the tree above */
+ while (pp) {
+ node = pp;
+ pp = node->parent;
+
+ if (!bus_match_node_maybe_free(node))
+ break;
+ }
+
+ return 1;
+}
+
+void bus_match_free(struct bus_match_node *node) {
+ struct bus_match_node *c;
+
+ if (!node)
+ return;
+
+ if (BUS_MATCH_CAN_HASH(node->type)) {
+ Iterator i;
+
+ HASHMAP_FOREACH(c, node->compare.children, i)
+ bus_match_free(c);
+
+ assert(hashmap_isempty(node->compare.children));
+ }
+
+ while ((c = node->child))
+ bus_match_free(c);
+
+ if (node->type != BUS_MATCH_ROOT)
+ bus_match_node_free(node);
+}
+
+const char* bus_match_node_type_to_string(enum bus_match_node_type t, char buf[], size_t l) {
+ switch (t) {
+
+ case BUS_MATCH_ROOT:
+ return "root";
+
+ case BUS_MATCH_VALUE:
+ return "value";
+
+ case BUS_MATCH_LEAF:
+ return "leaf";
+
+ case BUS_MATCH_MESSAGE_TYPE:
+ return "type";
+
+ case BUS_MATCH_SENDER:
+ return "sender";
+
+ case BUS_MATCH_DESTINATION:
+ return "destination";
+
+ case BUS_MATCH_INTERFACE:
+ return "interface";
+
+ case BUS_MATCH_MEMBER:
+ return "member";
+
+ case BUS_MATCH_PATH:
+ return "path";
+
+ case BUS_MATCH_PATH_NAMESPACE:
+ return "path_namespace";
+
+ case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST:
+ snprintf(buf, l, "arg%i", t - BUS_MATCH_ARG);
+ return buf;
+
+ case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST:
+ snprintf(buf, l, "arg%ipath", t - BUS_MATCH_ARG_PATH);
+ return buf;
+
+ case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST:
+ snprintf(buf, l, "arg%inamespace", t - BUS_MATCH_ARG_NAMESPACE);
+ return buf;
+
+ case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST:
+ snprintf(buf, l, "arg%ihas", t - BUS_MATCH_ARG_HAS);
+ return buf;
+
+ default:
+ return NULL;
+ }
+}
+
+void bus_match_dump(struct bus_match_node *node, unsigned level) {
+ struct bus_match_node *c;
+ _cleanup_free_ char *pfx = NULL;
+ char buf[32];
+
+ if (!node)
+ return;
+
+ pfx = strrep(" ", level);
+ printf("%s[%s]", strempty(pfx), bus_match_node_type_to_string(node->type, buf, sizeof(buf)));
+
+ if (node->type == BUS_MATCH_VALUE) {
+ if (node->parent->type == BUS_MATCH_MESSAGE_TYPE)
+ printf(" <%u>\n", node->value.u8);
+ else
+ printf(" <%s>\n", node->value.str);
+ } else if (node->type == BUS_MATCH_ROOT)
+ puts(" root");
+ else if (node->type == BUS_MATCH_LEAF)
+ printf(" %p/%p\n", node->leaf.callback->callback, container_of(node->leaf.callback, sd_bus_slot, match_callback)->userdata);
+ else
+ putchar('\n');
+
+ if (BUS_MATCH_CAN_HASH(node->type)) {
+ Iterator i;
+
+ HASHMAP_FOREACH(c, node->compare.children, i)
+ bus_match_dump(c, level + 1);
+ }
+
+ for (c = node->child; c; c = c->next)
+ bus_match_dump(c, level + 1);
+}
+
+enum bus_match_scope bus_match_get_scope(const struct bus_match_component *components, unsigned n_components) {
+ bool found_driver = false;
+ unsigned i;
+
+ if (n_components <= 0)
+ return BUS_MATCH_GENERIC;
+
+ assert(components);
+
+ /* Checks whether the specified match can only match the
+ * pseudo-service for local messages, which we detect by
+ * sender, interface or path. If a match is not restricted to
+ * local messages, then we check if it only matches on the
+ * driver. */
+
+ for (i = 0; i < n_components; i++) {
+ const struct bus_match_component *c = components + i;
+
+ if (c->type == BUS_MATCH_SENDER) {
+ if (streq_ptr(c->value_str, "org.freedesktop.DBus.Local"))
+ return BUS_MATCH_LOCAL;
+
+ if (streq_ptr(c->value_str, "org.freedesktop.DBus"))
+ found_driver = true;
+ }
+
+ if (c->type == BUS_MATCH_INTERFACE && streq_ptr(c->value_str, "org.freedesktop.DBus.Local"))
+ return BUS_MATCH_LOCAL;
+
+ if (c->type == BUS_MATCH_PATH && streq_ptr(c->value_str, "/org/freedesktop/DBus/Local"))
+ return BUS_MATCH_LOCAL;
+ }
+
+ return found_driver ? BUS_MATCH_DRIVER : BUS_MATCH_GENERIC;
+
+}
diff --git a/src/libsystemd/sd-bus/bus-match.h b/src/libsystemd/sd-bus/bus-match.h
new file mode 100644
index 0000000..a6f67ce
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-match.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "hashmap.h"
+
+enum bus_match_node_type {
+ BUS_MATCH_ROOT,
+ BUS_MATCH_VALUE,
+ BUS_MATCH_LEAF,
+
+ /* The following are all different kinds of compare nodes */
+ BUS_MATCH_SENDER,
+ BUS_MATCH_MESSAGE_TYPE,
+ BUS_MATCH_DESTINATION,
+ BUS_MATCH_INTERFACE,
+ BUS_MATCH_MEMBER,
+ BUS_MATCH_PATH,
+ BUS_MATCH_PATH_NAMESPACE,
+ BUS_MATCH_ARG,
+ BUS_MATCH_ARG_LAST = BUS_MATCH_ARG + 63,
+ BUS_MATCH_ARG_PATH,
+ BUS_MATCH_ARG_PATH_LAST = BUS_MATCH_ARG_PATH + 63,
+ BUS_MATCH_ARG_NAMESPACE,
+ BUS_MATCH_ARG_NAMESPACE_LAST = BUS_MATCH_ARG_NAMESPACE + 63,
+ BUS_MATCH_ARG_HAS,
+ BUS_MATCH_ARG_HAS_LAST = BUS_MATCH_ARG_HAS + 63,
+ _BUS_MATCH_NODE_TYPE_MAX,
+ _BUS_MATCH_NODE_TYPE_INVALID = -1
+};
+
+struct bus_match_node {
+ enum bus_match_node_type type;
+ struct bus_match_node *parent, *next, *prev, *child;
+
+ union {
+ struct {
+ char *str;
+ uint8_t u8;
+ } value;
+ struct {
+ struct match_callback *callback;
+ } leaf;
+ struct {
+ /* If this is set, then the child is NULL */
+ Hashmap *children;
+ } compare;
+ };
+};
+
+struct bus_match_component {
+ enum bus_match_node_type type;
+ uint8_t value_u8;
+ char *value_str;
+};
+
+enum bus_match_scope {
+ BUS_MATCH_GENERIC,
+ BUS_MATCH_LOCAL,
+ BUS_MATCH_DRIVER,
+};
+
+int bus_match_run(sd_bus *bus, struct bus_match_node *root, sd_bus_message *m);
+
+int bus_match_add(struct bus_match_node *root, struct bus_match_component *components, unsigned n_components, struct match_callback *callback);
+int bus_match_remove(struct bus_match_node *root, struct match_callback *callback);
+
+void bus_match_free(struct bus_match_node *node);
+
+void bus_match_dump(struct bus_match_node *node, unsigned level);
+
+const char* bus_match_node_type_to_string(enum bus_match_node_type t, char buf[], size_t l);
+enum bus_match_node_type bus_match_node_type_from_string(const char *k, size_t n);
+
+int bus_match_parse(const char *match, struct bus_match_component **_components, unsigned *_n_components);
+void bus_match_parse_free(struct bus_match_component *components, unsigned n_components);
+char *bus_match_to_string(struct bus_match_component *components, unsigned n_components);
+
+enum bus_match_scope bus_match_get_scope(const struct bus_match_component *components, unsigned n_components);
diff --git a/src/libsystemd/sd-bus/bus-message.c b/src/libsystemd/sd-bus/bus-message.c
new file mode 100644
index 0000000..bb7e09c
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-message.c
@@ -0,0 +1,5846 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-gvariant.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-signature.h"
+#include "bus-type.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "memfd-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+
+static int message_append_basic(sd_bus_message *m, char type, const void *p, const void **stored);
+
+static void *adjust_pointer(const void *p, void *old_base, size_t sz, void *new_base) {
+
+ if (p == NULL)
+ return NULL;
+
+ if (old_base == new_base)
+ return (void*) p;
+
+ if ((uint8_t*) p < (uint8_t*) old_base)
+ return (void*) p;
+
+ if ((uint8_t*) p >= (uint8_t*) old_base + sz)
+ return (void*) p;
+
+ return (uint8_t*) new_base + ((uint8_t*) p - (uint8_t*) old_base);
+}
+
+static void message_free_part(sd_bus_message *m, struct bus_body_part *part) {
+ assert(m);
+ assert(part);
+
+ if (part->memfd >= 0)
+ close_and_munmap(part->memfd, part->mmap_begin, part->mapped);
+ else if (part->munmap_this)
+ munmap(part->mmap_begin, part->mapped);
+ else if (part->free_this)
+ free(part->data);
+
+ if (part != &m->body)
+ free(part);
+}
+
+static void message_reset_parts(sd_bus_message *m) {
+ struct bus_body_part *part;
+
+ assert(m);
+
+ part = &m->body;
+ while (m->n_body_parts > 0) {
+ struct bus_body_part *next = part->next;
+ message_free_part(m, part);
+ part = next;
+ m->n_body_parts--;
+ }
+
+ m->body_end = NULL;
+
+ m->cached_rindex_part = NULL;
+ m->cached_rindex_part_begin = 0;
+}
+
+static struct bus_container *message_get_last_container(sd_bus_message *m) {
+ assert(m);
+
+ if (m->n_containers == 0)
+ return &m->root_container;
+
+ assert(m->containers);
+ return m->containers + m->n_containers - 1;
+}
+
+static void message_free_last_container(sd_bus_message *m) {
+ struct bus_container *c;
+
+ c = message_get_last_container(m);
+
+ free(c->signature);
+ free(c->peeked_signature);
+ free(c->offsets);
+
+ /* Move to previous container, but not if we are on root container */
+ if (m->n_containers > 0)
+ m->n_containers--;
+}
+
+static void message_reset_containers(sd_bus_message *m) {
+ assert(m);
+
+ while (m->n_containers > 0)
+ message_free_last_container(m);
+
+ m->containers = mfree(m->containers);
+ m->containers_allocated = 0;
+ m->root_container.index = 0;
+}
+
+static sd_bus_message* message_free(sd_bus_message *m) {
+ assert(m);
+
+ if (m->free_header)
+ free(m->header);
+
+ message_reset_parts(m);
+
+ sd_bus_unref(m->bus);
+
+ if (m->free_fds) {
+ close_many(m->fds, m->n_fds);
+ free(m->fds);
+ }
+
+ if (m->iovec != m->iovec_fixed)
+ free(m->iovec);
+
+ message_reset_containers(m);
+ assert(m->n_containers == 0);
+ message_free_last_container(m);
+
+ bus_creds_done(&m->creds);
+ return mfree(m);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(sd_bus_message*, message_free);
+
+static void *message_extend_fields(sd_bus_message *m, size_t align, size_t sz, bool add_offset) {
+ void *op, *np;
+ size_t old_size, new_size, start;
+
+ assert(m);
+
+ if (m->poisoned)
+ return NULL;
+
+ old_size = sizeof(struct bus_header) + m->fields_size;
+ start = ALIGN_TO(old_size, align);
+ new_size = start + sz;
+
+ if (new_size < start ||
+ new_size > (size_t) ((uint32_t) -1))
+ goto poison;
+
+ if (old_size == new_size)
+ return (uint8_t*) m->header + old_size;
+
+ if (m->free_header) {
+ np = realloc(m->header, ALIGN8(new_size));
+ if (!np)
+ goto poison;
+ } else {
+ /* Initially, the header is allocated as part of
+ * the sd_bus_message itself, let's replace it by
+ * dynamic data */
+
+ np = malloc(ALIGN8(new_size));
+ if (!np)
+ goto poison;
+
+ memcpy(np, m->header, sizeof(struct bus_header));
+ }
+
+ /* Zero out padding */
+ if (start > old_size)
+ memzero((uint8_t*) np + old_size, start - old_size);
+
+ op = m->header;
+ m->header = np;
+ m->fields_size = new_size - sizeof(struct bus_header);
+
+ /* Adjust quick access pointers */
+ m->path = adjust_pointer(m->path, op, old_size, m->header);
+ m->interface = adjust_pointer(m->interface, op, old_size, m->header);
+ m->member = adjust_pointer(m->member, op, old_size, m->header);
+ m->destination = adjust_pointer(m->destination, op, old_size, m->header);
+ m->sender = adjust_pointer(m->sender, op, old_size, m->header);
+ m->error.name = adjust_pointer(m->error.name, op, old_size, m->header);
+
+ m->free_header = true;
+
+ if (add_offset) {
+ if (m->n_header_offsets >= ELEMENTSOF(m->header_offsets))
+ goto poison;
+
+ m->header_offsets[m->n_header_offsets++] = new_size - sizeof(struct bus_header);
+ }
+
+ return (uint8_t*) np + start;
+
+poison:
+ m->poisoned = true;
+ return NULL;
+}
+
+static int message_append_field_string(
+ sd_bus_message *m,
+ uint64_t h,
+ char type,
+ const char *s,
+ const char **ret) {
+
+ size_t l;
+ uint8_t *p;
+
+ assert(m);
+
+ /* dbus1 only allows 8bit header field ids */
+ if (h > 0xFF)
+ return -EINVAL;
+
+ /* dbus1 doesn't allow strings over 32bit, let's enforce this
+ * globally, to not risk convertability */
+ l = strlen(s);
+ if (l > UINT32_MAX)
+ return -EINVAL;
+
+ /* Signature "(yv)" where the variant contains "s" */
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+
+ /* (field id 64bit, ((string + NUL) + NUL + signature string 's') */
+ p = message_extend_fields(m, 8, 8 + l + 1 + 1 + 1, true);
+ if (!p)
+ return -ENOMEM;
+
+ *((uint64_t*) p) = h;
+ memcpy(p+8, s, l);
+ p[8+l] = 0;
+ p[8+l+1] = 0;
+ p[8+l+2] = type;
+
+ if (ret)
+ *ret = (char*) p + 8;
+
+ } else {
+ /* (field id byte + (signature length + signature 's' + NUL) + (string length + string + NUL)) */
+ p = message_extend_fields(m, 8, 4 + 4 + l + 1, false);
+ if (!p)
+ return -ENOMEM;
+
+ p[0] = (uint8_t) h;
+ p[1] = 1;
+ p[2] = type;
+ p[3] = 0;
+
+ ((uint32_t*) p)[1] = l;
+ memcpy(p + 8, s, l + 1);
+
+ if (ret)
+ *ret = (char*) p + 8;
+ }
+
+ return 0;
+}
+
+static int message_append_field_signature(
+ sd_bus_message *m,
+ uint64_t h,
+ const char *s,
+ const char **ret) {
+
+ size_t l;
+ uint8_t *p;
+
+ assert(m);
+
+ /* dbus1 only allows 8bit header field ids */
+ if (h > 0xFF)
+ return -EINVAL;
+
+ /* dbus1 doesn't allow signatures over 8bit, let's enforce
+ * this globally, to not risk convertability */
+ l = strlen(s);
+ if (l > 255)
+ return -EINVAL;
+
+ /* Signature "(yv)" where the variant contains "g" */
+
+ if (BUS_MESSAGE_IS_GVARIANT(m))
+ /* For gvariant the serialization is the same as for normal strings */
+ return message_append_field_string(m, h, 'g', s, ret);
+ else {
+ /* (field id byte + (signature length + signature 'g' + NUL) + (string length + string + NUL)) */
+ p = message_extend_fields(m, 8, 4 + 1 + l + 1, false);
+ if (!p)
+ return -ENOMEM;
+
+ p[0] = (uint8_t) h;
+ p[1] = 1;
+ p[2] = SD_BUS_TYPE_SIGNATURE;
+ p[3] = 0;
+ p[4] = l;
+ memcpy(p + 5, s, l + 1);
+
+ if (ret)
+ *ret = (const char*) p + 5;
+ }
+
+ return 0;
+}
+
+static int message_append_field_uint32(sd_bus_message *m, uint64_t h, uint32_t x) {
+ uint8_t *p;
+
+ assert(m);
+
+ /* dbus1 only allows 8bit header field ids */
+ if (h > 0xFF)
+ return -EINVAL;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ /* (field id 64bit + ((value + NUL + signature string 'u') */
+
+ p = message_extend_fields(m, 8, 8 + 4 + 1 + 1, true);
+ if (!p)
+ return -ENOMEM;
+
+ *((uint64_t*) p) = h;
+ *((uint32_t*) (p + 8)) = x;
+ p[12] = 0;
+ p[13] = 'u';
+ } else {
+ /* (field id byte + (signature length + signature 'u' + NUL) + value) */
+ p = message_extend_fields(m, 8, 4 + 4, false);
+ if (!p)
+ return -ENOMEM;
+
+ p[0] = (uint8_t) h;
+ p[1] = 1;
+ p[2] = 'u';
+ p[3] = 0;
+
+ ((uint32_t*) p)[1] = x;
+ }
+
+ return 0;
+}
+
+static int message_append_field_uint64(sd_bus_message *m, uint64_t h, uint64_t x) {
+ uint8_t *p;
+
+ assert(m);
+
+ /* dbus1 only allows 8bit header field ids */
+ if (h > 0xFF)
+ return -EINVAL;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ /* (field id 64bit + ((value + NUL + signature string 't') */
+
+ p = message_extend_fields(m, 8, 8 + 8 + 1 + 1, true);
+ if (!p)
+ return -ENOMEM;
+
+ *((uint64_t*) p) = h;
+ *((uint64_t*) (p + 8)) = x;
+ p[16] = 0;
+ p[17] = 't';
+ } else {
+ /* (field id byte + (signature length + signature 't' + NUL) + 4 byte padding + value) */
+ p = message_extend_fields(m, 8, 4 + 4 + 8, false);
+ if (!p)
+ return -ENOMEM;
+
+ p[0] = (uint8_t) h;
+ p[1] = 1;
+ p[2] = 't';
+ p[3] = 0;
+ p[4] = 0;
+ p[5] = 0;
+ p[6] = 0;
+ p[7] = 0;
+
+ ((uint64_t*) p)[1] = x;
+ }
+
+ return 0;
+}
+
+static int message_append_reply_cookie(sd_bus_message *m, uint64_t cookie) {
+ assert(m);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m))
+ return message_append_field_uint64(m, BUS_MESSAGE_HEADER_REPLY_SERIAL, cookie);
+ else {
+ /* 64bit cookies are not supported on dbus1 */
+ if (cookie > 0xffffffffUL)
+ return -EOPNOTSUPP;
+
+ return message_append_field_uint32(m, BUS_MESSAGE_HEADER_REPLY_SERIAL, (uint32_t) cookie);
+ }
+}
+
+int bus_message_from_header(
+ sd_bus *bus,
+ void *header,
+ size_t header_accessible,
+ void *footer,
+ size_t footer_accessible,
+ size_t message_size,
+ int *fds,
+ size_t n_fds,
+ const char *label,
+ size_t extra,
+ sd_bus_message **ret) {
+
+ _cleanup_free_ sd_bus_message *m = NULL;
+ struct bus_header *h;
+ size_t a, label_sz;
+
+ assert(bus);
+ assert(header || header_accessible <= 0);
+ assert(footer || footer_accessible <= 0);
+ assert(fds || n_fds <= 0);
+ assert(ret);
+
+ if (header_accessible < sizeof(struct bus_header))
+ return -EBADMSG;
+
+ if (header_accessible > message_size)
+ return -EBADMSG;
+ if (footer_accessible > message_size)
+ return -EBADMSG;
+
+ h = header;
+ if (!IN_SET(h->version, 1, 2))
+ return -EBADMSG;
+
+ if (h->type == _SD_BUS_MESSAGE_TYPE_INVALID)
+ return -EBADMSG;
+
+ if (!IN_SET(h->endian, BUS_LITTLE_ENDIAN, BUS_BIG_ENDIAN))
+ return -EBADMSG;
+
+ /* Note that we are happy with unknown flags in the flags header! */
+
+ a = ALIGN(sizeof(sd_bus_message)) + ALIGN(extra);
+
+ if (label) {
+ label_sz = strlen(label);
+ a += label_sz + 1;
+ }
+
+ m = malloc0(a);
+ if (!m)
+ return -ENOMEM;
+
+ m->n_ref = 1;
+ m->sealed = true;
+ m->header = header;
+ m->header_accessible = header_accessible;
+ m->footer = footer;
+ m->footer_accessible = footer_accessible;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ size_t ws;
+
+ if (h->dbus2.cookie == 0)
+ return -EBADMSG;
+
+ /* dbus2 derives the sizes from the message size and
+ the offset table at the end, since it is formatted as
+ gvariant "yyyyuta{tv}v". Since the message itself is a
+ structure with precisely to variable sized entries,
+ there's only one offset in the table, which marks the
+ end of the fields array. */
+
+ ws = bus_gvariant_determine_word_size(message_size, 0);
+ if (footer_accessible < ws)
+ return -EBADMSG;
+
+ m->fields_size = bus_gvariant_read_word_le((uint8_t*) footer + footer_accessible - ws, ws);
+ if (ALIGN8(m->fields_size) > message_size - ws)
+ return -EBADMSG;
+ if (m->fields_size < sizeof(struct bus_header))
+ return -EBADMSG;
+
+ m->fields_size -= sizeof(struct bus_header);
+ m->body_size = message_size - (sizeof(struct bus_header) + ALIGN8(m->fields_size));
+ } else {
+ if (h->dbus1.serial == 0)
+ return -EBADMSG;
+
+ /* dbus1 has the sizes in the header */
+ m->fields_size = BUS_MESSAGE_BSWAP32(m, h->dbus1.fields_size);
+ m->body_size = BUS_MESSAGE_BSWAP32(m, h->dbus1.body_size);
+
+ if (sizeof(struct bus_header) + ALIGN8(m->fields_size) + m->body_size != message_size)
+ return -EBADMSG;
+ }
+
+ m->fds = fds;
+ m->n_fds = n_fds;
+
+ if (label) {
+ m->creds.label = (char*) m + ALIGN(sizeof(sd_bus_message)) + ALIGN(extra);
+ memcpy(m->creds.label, label, label_sz + 1);
+
+ m->creds.mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+
+ m->bus = sd_bus_ref(bus);
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+int bus_message_from_malloc(
+ sd_bus *bus,
+ void *buffer,
+ size_t length,
+ int *fds,
+ size_t n_fds,
+ const char *label,
+ sd_bus_message **ret) {
+
+ _cleanup_(message_freep) sd_bus_message *m = NULL;
+ size_t sz;
+ int r;
+
+ r = bus_message_from_header(
+ bus,
+ buffer, length, /* in this case the initial bytes and the final bytes are the same */
+ buffer, length,
+ length,
+ fds, n_fds,
+ label,
+ 0, &m);
+ if (r < 0)
+ return r;
+
+ sz = length - sizeof(struct bus_header) - ALIGN8(m->fields_size);
+ if (sz > 0) {
+ m->n_body_parts = 1;
+ m->body.data = (uint8_t*) buffer + sizeof(struct bus_header) + ALIGN8(m->fields_size);
+ m->body.size = sz;
+ m->body.sealed = true;
+ m->body.memfd = -1;
+ }
+
+ m->n_iovec = 1;
+ m->iovec = m->iovec_fixed;
+ m->iovec[0] = IOVEC_MAKE(buffer, length);
+
+ r = bus_message_parse_fields(m);
+ if (r < 0)
+ return r;
+
+ /* We take possession of the memory and fds now */
+ m->free_header = true;
+ m->free_fds = true;
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+_public_ int sd_bus_message_new(
+ sd_bus *bus,
+ sd_bus_message **m,
+ uint8_t type) {
+
+ sd_bus_message *t;
+
+ assert_return(bus, -ENOTCONN);
+ assert_return(bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(m, -EINVAL);
+ assert_return(type < _SD_BUS_MESSAGE_TYPE_MAX, -EINVAL);
+
+ t = malloc0(ALIGN(sizeof(sd_bus_message)) + sizeof(struct bus_header));
+ if (!t)
+ return -ENOMEM;
+
+ t->n_ref = 1;
+ t->header = (struct bus_header*) ((uint8_t*) t + ALIGN(sizeof(struct sd_bus_message)));
+ t->header->endian = BUS_NATIVE_ENDIAN;
+ t->header->type = type;
+ t->header->version = bus->message_version;
+ t->allow_fds = bus->can_fds || !IN_SET(bus->state, BUS_HELLO, BUS_RUNNING);
+ t->root_container.need_offsets = BUS_MESSAGE_IS_GVARIANT(t);
+ t->bus = sd_bus_ref(bus);
+
+ if (bus->allow_interactive_authorization)
+ t->header->flags |= BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION;
+
+ *m = t;
+ return 0;
+}
+
+_public_ int sd_bus_message_new_signal(
+ sd_bus *bus,
+ sd_bus_message **m,
+ const char *path,
+ const char *interface,
+ const char *member) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL;
+ int r;
+
+ assert_return(bus, -ENOTCONN);
+ assert_return(bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(interface_name_is_valid(interface), -EINVAL);
+ assert_return(member_name_is_valid(member), -EINVAL);
+ assert_return(m, -EINVAL);
+
+ r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_SIGNAL);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(t);
+
+ t->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_PATH, SD_BUS_TYPE_OBJECT_PATH, path, &t->path);
+ if (r < 0)
+ return r;
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_INTERFACE, SD_BUS_TYPE_STRING, interface, &t->interface);
+ if (r < 0)
+ return r;
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_MEMBER, SD_BUS_TYPE_STRING, member, &t->member);
+ if (r < 0)
+ return r;
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+_public_ int sd_bus_message_new_method_call(
+ sd_bus *bus,
+ sd_bus_message **m,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member) {
+
+ _cleanup_(message_freep) sd_bus_message *t = NULL;
+ int r;
+
+ assert_return(bus, -ENOTCONN);
+ assert_return(bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(!destination || service_name_is_valid(destination), -EINVAL);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!interface || interface_name_is_valid(interface), -EINVAL);
+ assert_return(member_name_is_valid(member), -EINVAL);
+ assert_return(m, -EINVAL);
+
+ r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_METHOD_CALL);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(t);
+
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_PATH, SD_BUS_TYPE_OBJECT_PATH, path, &t->path);
+ if (r < 0)
+ return r;
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_MEMBER, SD_BUS_TYPE_STRING, member, &t->member);
+ if (r < 0)
+ return r;
+
+ if (interface) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_INTERFACE, SD_BUS_TYPE_STRING, interface, &t->interface);
+ if (r < 0)
+ return r;
+ }
+
+ if (destination) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, destination, &t->destination);
+ if (r < 0)
+ return r;
+ }
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+static int message_new_reply(
+ sd_bus_message *call,
+ uint8_t type,
+ sd_bus_message **m) {
+
+ _cleanup_(message_freep) sd_bus_message *t = NULL;
+ uint64_t cookie;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(m, -EINVAL);
+
+ cookie = BUS_MESSAGE_COOKIE(call);
+ if (cookie == 0)
+ return -EOPNOTSUPP;
+
+ r = sd_bus_message_new(call->bus, &t, type);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(t);
+
+ t->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+ t->reply_cookie = cookie;
+ r = message_append_reply_cookie(t, t->reply_cookie);
+ if (r < 0)
+ return r;
+
+ if (call->sender) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, call->sender, &t->destination);
+ if (r < 0)
+ return r;
+ }
+
+ t->dont_send = !!(call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED);
+ t->enforced_reply_signature = call->enforced_reply_signature;
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+_public_ int sd_bus_message_new_method_return(
+ sd_bus_message *call,
+ sd_bus_message **m) {
+
+ return message_new_reply(call, SD_BUS_MESSAGE_METHOD_RETURN, m);
+}
+
+_public_ int sd_bus_message_new_method_error(
+ sd_bus_message *call,
+ sd_bus_message **m,
+ const sd_bus_error *e) {
+
+ _cleanup_(message_freep) sd_bus_message *t = NULL;
+ int r;
+
+ assert_return(sd_bus_error_is_set(e), -EINVAL);
+ assert_return(m, -EINVAL);
+
+ r = message_new_reply(call, SD_BUS_MESSAGE_METHOD_ERROR, &t);
+ if (r < 0)
+ return r;
+
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_ERROR_NAME, SD_BUS_TYPE_STRING, e->name, &t->error.name);
+ if (r < 0)
+ return r;
+
+ if (e->message) {
+ r = message_append_basic(t, SD_BUS_TYPE_STRING, e->message, (const void**) &t->error.message);
+ if (r < 0)
+ return r;
+ }
+
+ t->error._need_free = -1;
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+_public_ int sd_bus_message_new_method_errorf(
+ sd_bus_message *call,
+ sd_bus_message **m,
+ const char *name,
+ const char *format,
+ ...) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ va_list ap;
+
+ assert_return(name, -EINVAL);
+ assert_return(m, -EINVAL);
+
+ va_start(ap, format);
+ bus_error_setfv(&error, name, format, ap);
+ va_end(ap);
+
+ return sd_bus_message_new_method_error(call, m, &error);
+}
+
+_public_ int sd_bus_message_new_method_errno(
+ sd_bus_message *call,
+ sd_bus_message **m,
+ int error,
+ const sd_bus_error *p) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL;
+
+ if (sd_bus_error_is_set(p))
+ return sd_bus_message_new_method_error(call, m, p);
+
+ sd_bus_error_set_errno(&berror, error);
+
+ return sd_bus_message_new_method_error(call, m, &berror);
+}
+
+_public_ int sd_bus_message_new_method_errnof(
+ sd_bus_message *call,
+ sd_bus_message **m,
+ int error,
+ const char *format,
+ ...) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL;
+ va_list ap;
+
+ va_start(ap, format);
+ sd_bus_error_set_errnofv(&berror, error, format, ap);
+ va_end(ap);
+
+ return sd_bus_message_new_method_error(call, m, &berror);
+}
+
+void bus_message_set_sender_local(sd_bus *bus, sd_bus_message *m) {
+ assert(bus);
+ assert(m);
+
+ m->sender = m->creds.unique_name = (char*) "org.freedesktop.DBus.Local";
+ m->creds.well_known_names_local = true;
+ m->creds.mask |= (SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_WELL_KNOWN_NAMES) & bus->creds_mask;
+}
+
+void bus_message_set_sender_driver(sd_bus *bus, sd_bus_message *m) {
+ assert(bus);
+ assert(m);
+
+ m->sender = m->creds.unique_name = (char*) "org.freedesktop.DBus";
+ m->creds.well_known_names_driver = true;
+ m->creds.mask |= (SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_WELL_KNOWN_NAMES) & bus->creds_mask;
+}
+
+int bus_message_new_synthetic_error(
+ sd_bus *bus,
+ uint64_t cookie,
+ const sd_bus_error *e,
+ sd_bus_message **m) {
+
+ _cleanup_(message_freep) sd_bus_message *t = NULL;
+ int r;
+
+ assert(bus);
+ assert(sd_bus_error_is_set(e));
+ assert(m);
+
+ r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_METHOD_ERROR);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(t);
+
+ t->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+ t->reply_cookie = cookie;
+
+ r = message_append_reply_cookie(t, t->reply_cookie);
+ if (r < 0)
+ return r;
+
+ if (bus && bus->unique_name) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, bus->unique_name, &t->destination);
+ if (r < 0)
+ return r;
+ }
+
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_ERROR_NAME, SD_BUS_TYPE_STRING, e->name, &t->error.name);
+ if (r < 0)
+ return r;
+
+ if (e->message) {
+ r = message_append_basic(t, SD_BUS_TYPE_STRING, e->message, (const void**) &t->error.message);
+ if (r < 0)
+ return r;
+ }
+
+ t->error._need_free = -1;
+
+ bus_message_set_sender_driver(bus, t);
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_bus_message, sd_bus_message, message_free);
+
+_public_ int sd_bus_message_get_type(sd_bus_message *m, uint8_t *type) {
+ assert_return(m, -EINVAL);
+ assert_return(type, -EINVAL);
+
+ *type = m->header->type;
+ return 0;
+}
+
+_public_ int sd_bus_message_get_cookie(sd_bus_message *m, uint64_t *cookie) {
+ uint64_t c;
+
+ assert_return(m, -EINVAL);
+ assert_return(cookie, -EINVAL);
+
+ c = BUS_MESSAGE_COOKIE(m);
+ if (c == 0)
+ return -ENODATA;
+
+ *cookie = BUS_MESSAGE_COOKIE(m);
+ return 0;
+}
+
+_public_ int sd_bus_message_get_reply_cookie(sd_bus_message *m, uint64_t *cookie) {
+ assert_return(m, -EINVAL);
+ assert_return(cookie, -EINVAL);
+
+ if (m->reply_cookie == 0)
+ return -ENODATA;
+
+ *cookie = m->reply_cookie;
+ return 0;
+}
+
+_public_ int sd_bus_message_get_expect_reply(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ return m->header->type == SD_BUS_MESSAGE_METHOD_CALL &&
+ !(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED);
+}
+
+_public_ int sd_bus_message_get_auto_start(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ return !(m->header->flags & BUS_MESSAGE_NO_AUTO_START);
+}
+
+_public_ int sd_bus_message_get_allow_interactive_authorization(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ return m->header->type == SD_BUS_MESSAGE_METHOD_CALL &&
+ (m->header->flags & BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION);
+}
+
+_public_ const char *sd_bus_message_get_path(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->path;
+}
+
+_public_ const char *sd_bus_message_get_interface(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->interface;
+}
+
+_public_ const char *sd_bus_message_get_member(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->member;
+}
+
+_public_ const char *sd_bus_message_get_destination(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->destination;
+}
+
+_public_ const char *sd_bus_message_get_sender(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->sender;
+}
+
+_public_ const sd_bus_error *sd_bus_message_get_error(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ if (!sd_bus_error_is_set(&m->error))
+ return NULL;
+
+ return &m->error;
+}
+
+_public_ int sd_bus_message_get_monotonic_usec(sd_bus_message *m, uint64_t *usec) {
+ assert_return(m, -EINVAL);
+ assert_return(usec, -EINVAL);
+
+ if (m->monotonic <= 0)
+ return -ENODATA;
+
+ *usec = m->monotonic;
+ return 0;
+}
+
+_public_ int sd_bus_message_get_realtime_usec(sd_bus_message *m, uint64_t *usec) {
+ assert_return(m, -EINVAL);
+ assert_return(usec, -EINVAL);
+
+ if (m->realtime <= 0)
+ return -ENODATA;
+
+ *usec = m->realtime;
+ return 0;
+}
+
+_public_ int sd_bus_message_get_seqnum(sd_bus_message *m, uint64_t *seqnum) {
+ assert_return(m, -EINVAL);
+ assert_return(seqnum, -EINVAL);
+
+ if (m->seqnum <= 0)
+ return -ENODATA;
+
+ *seqnum = m->seqnum;
+ return 0;
+}
+
+_public_ sd_bus_creds *sd_bus_message_get_creds(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ if (m->creds.mask == 0)
+ return NULL;
+
+ return &m->creds;
+}
+
+_public_ int sd_bus_message_is_signal(
+ sd_bus_message *m,
+ const char *interface,
+ const char *member) {
+
+ assert_return(m, -EINVAL);
+
+ if (m->header->type != SD_BUS_MESSAGE_SIGNAL)
+ return 0;
+
+ if (interface && !streq_ptr(m->interface, interface))
+ return 0;
+
+ if (member && !streq_ptr(m->member, member))
+ return 0;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_is_method_call(
+ sd_bus_message *m,
+ const char *interface,
+ const char *member) {
+
+ assert_return(m, -EINVAL);
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL)
+ return 0;
+
+ if (interface && !streq_ptr(m->interface, interface))
+ return 0;
+
+ if (member && !streq_ptr(m->member, member))
+ return 0;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_is_method_error(sd_bus_message *m, const char *name) {
+ assert_return(m, -EINVAL);
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_ERROR)
+ return 0;
+
+ if (name && !streq_ptr(m->error.name, name))
+ return 0;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_set_expect_reply(sd_bus_message *m, int b) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EPERM);
+
+ SET_FLAG(m->header->flags, BUS_MESSAGE_NO_REPLY_EXPECTED, !b);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_set_auto_start(sd_bus_message *m, int b) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ SET_FLAG(m->header->flags, BUS_MESSAGE_NO_AUTO_START, !b);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_set_allow_interactive_authorization(sd_bus_message *m, int b) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ SET_FLAG(m->header->flags, BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION, b);
+
+ return 0;
+}
+
+struct bus_body_part *message_append_part(sd_bus_message *m) {
+ struct bus_body_part *part;
+
+ assert(m);
+
+ if (m->poisoned)
+ return NULL;
+
+ if (m->n_body_parts <= 0) {
+ part = &m->body;
+ zero(*part);
+ } else {
+ assert(m->body_end);
+
+ part = new0(struct bus_body_part, 1);
+ if (!part) {
+ m->poisoned = true;
+ return NULL;
+ }
+
+ m->body_end->next = part;
+ }
+
+ part->memfd = -1;
+ m->body_end = part;
+ m->n_body_parts++;
+
+ return part;
+}
+
+static void part_zero(struct bus_body_part *part, size_t sz) {
+ assert(part);
+ assert(sz > 0);
+ assert(sz < 8);
+
+ /* All other fields can be left in their defaults */
+ assert(!part->data);
+ assert(part->memfd < 0);
+
+ part->size = sz;
+ part->is_zero = true;
+ part->sealed = true;
+}
+
+static int part_make_space(
+ struct sd_bus_message *m,
+ struct bus_body_part *part,
+ size_t sz,
+ void **q) {
+
+ void *n;
+
+ assert(m);
+ assert(part);
+ assert(!part->sealed);
+
+ if (m->poisoned)
+ return -ENOMEM;
+
+ if (part->allocated == 0 || sz > part->allocated) {
+ size_t new_allocated;
+
+ new_allocated = sz > 0 ? 2 * sz : 64;
+ n = realloc(part->data, new_allocated);
+ if (!n) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ part->data = n;
+ part->allocated = new_allocated;
+ part->free_this = true;
+ }
+
+ if (q)
+ *q = part->data ? (uint8_t*) part->data + part->size : NULL;
+
+ part->size = sz;
+ return 0;
+}
+
+static int message_add_offset(sd_bus_message *m, size_t offset) {
+ struct bus_container *c;
+
+ assert(m);
+ assert(BUS_MESSAGE_IS_GVARIANT(m));
+
+ /* Add offset to current container, unless this is the first
+ * item in it, which will have the 0 offset, which we can
+ * ignore. */
+ c = message_get_last_container(m);
+
+ if (!c->need_offsets)
+ return 0;
+
+ if (!GREEDY_REALLOC(c->offsets, c->offsets_allocated, c->n_offsets + 1))
+ return -ENOMEM;
+
+ c->offsets[c->n_offsets++] = offset;
+ return 0;
+}
+
+static void message_extend_containers(sd_bus_message *m, size_t expand) {
+ struct bus_container *c;
+
+ assert(m);
+
+ if (expand <= 0)
+ return;
+
+ /* Update counters */
+ for (c = m->containers; c < m->containers + m->n_containers; c++) {
+
+ if (c->array_size)
+ *c->array_size += expand;
+ }
+}
+
+static void *message_extend_body(
+ sd_bus_message *m,
+ size_t align,
+ size_t sz,
+ bool add_offset,
+ bool force_inline) {
+
+ size_t start_body, end_body, padding, added;
+ void *p;
+ int r;
+
+ assert(m);
+ assert(align > 0);
+ assert(!m->sealed);
+
+ if (m->poisoned)
+ return NULL;
+
+ start_body = ALIGN_TO((size_t) m->body_size, align);
+ end_body = start_body + sz;
+
+ padding = start_body - m->body_size;
+ added = padding + sz;
+
+ /* Check for 32bit overflows */
+ if (end_body > (size_t) ((uint32_t) -1) ||
+ end_body < start_body) {
+ m->poisoned = true;
+ return NULL;
+ }
+
+ if (added > 0) {
+ struct bus_body_part *part = NULL;
+ bool add_new_part;
+
+ add_new_part =
+ m->n_body_parts <= 0 ||
+ m->body_end->sealed ||
+ (padding != ALIGN_TO(m->body_end->size, align) - m->body_end->size) ||
+ (force_inline && m->body_end->size > MEMFD_MIN_SIZE);
+ /* If this must be an inlined extension, let's create a new part if
+ * the previous part is large enough to be inlined. */
+
+ if (add_new_part) {
+ if (padding > 0) {
+ part = message_append_part(m);
+ if (!part)
+ return NULL;
+
+ part_zero(part, padding);
+ }
+
+ part = message_append_part(m);
+ if (!part)
+ return NULL;
+
+ r = part_make_space(m, part, sz, &p);
+ if (r < 0)
+ return NULL;
+ } else {
+ struct bus_container *c;
+ void *op;
+ size_t os, start_part, end_part;
+
+ part = m->body_end;
+ op = part->data;
+ os = part->size;
+
+ start_part = ALIGN_TO(part->size, align);
+ end_part = start_part + sz;
+
+ r = part_make_space(m, part, end_part, &p);
+ if (r < 0)
+ return NULL;
+
+ if (padding > 0) {
+ memzero(p, padding);
+ p = (uint8_t*) p + padding;
+ }
+
+ /* Readjust pointers */
+ for (c = m->containers; c < m->containers + m->n_containers; c++)
+ c->array_size = adjust_pointer(c->array_size, op, os, part->data);
+
+ m->error.message = (const char*) adjust_pointer(m->error.message, op, os, part->data);
+ }
+ } else
+ /* Return something that is not NULL and is aligned */
+ p = (uint8_t*) align;
+
+ m->body_size = end_body;
+ message_extend_containers(m, added);
+
+ if (add_offset) {
+ r = message_add_offset(m, end_body);
+ if (r < 0) {
+ m->poisoned = true;
+ return NULL;
+ }
+ }
+
+ return p;
+}
+
+static int message_push_fd(sd_bus_message *m, int fd) {
+ int *f, copy;
+
+ assert(m);
+
+ if (fd < 0)
+ return -EINVAL;
+
+ if (!m->allow_fds)
+ return -EOPNOTSUPP;
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ f = reallocarray(m->fds, sizeof(int), m->n_fds + 1);
+ if (!f) {
+ m->poisoned = true;
+ safe_close(copy);
+ return -ENOMEM;
+ }
+
+ m->fds = f;
+ m->fds[m->n_fds] = copy;
+ m->free_fds = true;
+
+ return copy;
+}
+
+int message_append_basic(sd_bus_message *m, char type, const void *p, const void **stored) {
+ _cleanup_close_ int fd = -1;
+ struct bus_container *c;
+ ssize_t align, sz;
+ void *a;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(bus_type_is_basic(type), -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ c = message_get_last_container(m);
+
+ if (c->signature && c->signature[c->index]) {
+ /* Container signature is already set */
+
+ if (c->signature[c->index] != type)
+ return -ENXIO;
+ } else {
+ char *e;
+
+ /* Maybe we can append to the signature? But only if this is the top-level container */
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(type), NULL);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ uint8_t u8;
+ uint32_t u32;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_STRING:
+ p = strempty(p);
+
+ _fallthrough_;
+ case SD_BUS_TYPE_OBJECT_PATH:
+ if (!p)
+ return -EINVAL;
+
+ align = 1;
+ sz = strlen(p) + 1;
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+
+ u8 = p && *(int*) p;
+ p = &u8;
+
+ align = sz = 1;
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD:
+
+ if (!p)
+ return -EINVAL;
+
+ fd = message_push_fd(m, *(int*) p);
+ if (fd < 0)
+ return fd;
+
+ u32 = m->n_fds;
+ p = &u32;
+
+ align = sz = 4;
+ break;
+
+ default:
+ align = bus_gvariant_get_alignment(CHAR_TO_STR(type));
+ sz = bus_gvariant_get_size(CHAR_TO_STR(type));
+ break;
+ }
+
+ assert(align > 0);
+ assert(sz > 0);
+
+ a = message_extend_body(m, align, sz, true, false);
+ if (!a)
+ return -ENOMEM;
+
+ memcpy(a, p, sz);
+
+ if (stored)
+ *stored = (const uint8_t*) a;
+
+ } else {
+ uint32_t u32;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRING:
+ /* To make things easy we'll serialize a NULL string
+ * into the empty string */
+ p = strempty(p);
+
+ _fallthrough_;
+ case SD_BUS_TYPE_OBJECT_PATH:
+
+ if (!p)
+ return -EINVAL;
+
+ align = 4;
+ sz = 4 + strlen(p) + 1;
+ break;
+
+ case SD_BUS_TYPE_SIGNATURE:
+
+ p = strempty(p);
+
+ align = 1;
+ sz = 1 + strlen(p) + 1;
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+
+ u32 = p && *(int*) p;
+ p = &u32;
+
+ align = sz = 4;
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD:
+
+ if (!p)
+ return -EINVAL;
+
+ fd = message_push_fd(m, *(int*) p);
+ if (fd < 0)
+ return fd;
+
+ u32 = m->n_fds;
+ p = &u32;
+
+ align = sz = 4;
+ break;
+
+ default:
+ align = bus_type_get_alignment(type);
+ sz = bus_type_get_size(type);
+ break;
+ }
+
+ assert(align > 0);
+ assert(sz > 0);
+
+ a = message_extend_body(m, align, sz, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ if (IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH)) {
+ *(uint32_t*) a = sz - 5;
+ memcpy((uint8_t*) a + 4, p, sz - 4);
+
+ if (stored)
+ *stored = (const uint8_t*) a + 4;
+
+ } else if (type == SD_BUS_TYPE_SIGNATURE) {
+ *(uint8_t*) a = sz - 2;
+ memcpy((uint8_t*) a + 1, p, sz - 1);
+
+ if (stored)
+ *stored = (const uint8_t*) a + 1;
+ } else {
+ memcpy(a, p, sz);
+
+ if (stored)
+ *stored = a;
+ }
+ }
+
+ if (type == SD_BUS_TYPE_UNIX_FD)
+ m->n_fds++;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ fd = -1;
+ return 0;
+}
+
+_public_ int sd_bus_message_append_basic(sd_bus_message *m, char type, const void *p) {
+ return message_append_basic(m, type, p, NULL);
+}
+
+_public_ int sd_bus_message_append_string_space(
+ sd_bus_message *m,
+ size_t size,
+ char **s) {
+
+ struct bus_container *c;
+ void *a;
+
+ assert_return(m, -EINVAL);
+ assert_return(s, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ c = message_get_last_container(m);
+
+ if (c->signature && c->signature[c->index]) {
+ /* Container signature is already set */
+
+ if (c->signature[c->index] != SD_BUS_TYPE_STRING)
+ return -ENXIO;
+ } else {
+ char *e;
+
+ /* Maybe we can append to the signature? But only if this is the top-level container */
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_STRING), NULL);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ a = message_extend_body(m, 1, size + 1, true, false);
+ if (!a)
+ return -ENOMEM;
+
+ *s = a;
+ } else {
+ a = message_extend_body(m, 4, 4 + size + 1, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ *(uint32_t*) a = size;
+ *s = (char*) a + 4;
+ }
+
+ (*s)[size] = 0;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 0;
+}
+
+_public_ int sd_bus_message_append_string_iovec(
+ sd_bus_message *m,
+ const struct iovec *iov,
+ unsigned n /* should be size_t, but is API now… 😞 */) {
+
+ size_t size;
+ unsigned i;
+ char *p;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(iov || n == 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ size = IOVEC_TOTAL_SIZE(iov, n);
+
+ r = sd_bus_message_append_string_space(m, size, &p);
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n; i++) {
+
+ if (iov[i].iov_base)
+ memcpy(p, iov[i].iov_base, iov[i].iov_len);
+ else
+ memset(p, ' ', iov[i].iov_len);
+
+ p += iov[i].iov_len;
+ }
+
+ return 0;
+}
+
+static int bus_message_open_array(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ uint32_t **array_size,
+ size_t *begin,
+ bool *need_offsets) {
+
+ unsigned nindex;
+ int alignment, r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(array_size);
+ assert(begin);
+ assert(need_offsets);
+
+ if (!signature_is_single(contents, true))
+ return -EINVAL;
+
+ if (c->signature && c->signature[c->index]) {
+
+ /* Verify the existing signature */
+
+ if (c->signature[c->index] != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+
+ if (!startswith(c->signature + c->index + 1, contents))
+ return -ENXIO;
+
+ nindex = c->index + 1 + strlen(contents);
+ } else {
+ char *e;
+
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ /* Extend the existing signature */
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_ARRAY), contents, NULL);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ nindex = e - c->signature;
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ alignment = bus_gvariant_get_alignment(contents);
+ if (alignment < 0)
+ return alignment;
+
+ /* Add alignment padding and add to offset list */
+ if (!message_extend_body(m, alignment, 0, false, false))
+ return -ENOMEM;
+
+ r = bus_gvariant_is_fixed_size(contents);
+ if (r < 0)
+ return r;
+
+ *begin = m->body_size;
+ *need_offsets = r == 0;
+ } else {
+ void *a, *op;
+ size_t os;
+ struct bus_body_part *o;
+
+ alignment = bus_type_get_alignment(contents[0]);
+ if (alignment < 0)
+ return alignment;
+
+ a = message_extend_body(m, 4, 4, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ o = m->body_end;
+ op = m->body_end->data;
+ os = m->body_end->size;
+
+ /* Add alignment between size and first element */
+ if (!message_extend_body(m, alignment, 0, false, false))
+ return -ENOMEM;
+
+ /* location of array size might have changed so let's readjust a */
+ if (o == m->body_end)
+ a = adjust_pointer(a, op, os, m->body_end->data);
+
+ *(uint32_t*) a = 0;
+ *array_size = a;
+ }
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index = nindex;
+
+ return 0;
+}
+
+static int bus_message_open_variant(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents) {
+
+ assert(m);
+ assert(c);
+ assert(contents);
+
+ if (!signature_is_single(contents, false))
+ return -EINVAL;
+
+ if (*contents == SD_BUS_TYPE_DICT_ENTRY_BEGIN)
+ return -EINVAL;
+
+ if (c->signature && c->signature[c->index]) {
+
+ if (c->signature[c->index] != SD_BUS_TYPE_VARIANT)
+ return -ENXIO;
+
+ } else {
+ char *e;
+
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_VARIANT), NULL);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ /* Variants are always aligned to 8 */
+
+ if (!message_extend_body(m, 8, 0, false, false))
+ return -ENOMEM;
+
+ } else {
+ size_t l;
+ void *a;
+
+ l = strlen(contents);
+ a = message_extend_body(m, 1, 1 + l + 1, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ *(uint8_t*) a = l;
+ memcpy((uint8_t*) a + 1, contents, l + 1);
+ }
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 0;
+}
+
+static int bus_message_open_struct(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *begin,
+ bool *need_offsets) {
+
+ size_t nindex;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(begin);
+ assert(need_offsets);
+
+ if (!signature_is_valid(contents, false))
+ return -EINVAL;
+
+ if (c->signature && c->signature[c->index]) {
+ size_t l;
+
+ l = strlen(contents);
+
+ if (c->signature[c->index] != SD_BUS_TYPE_STRUCT_BEGIN ||
+ !startswith(c->signature + c->index + 1, contents) ||
+ c->signature[c->index + 1 + l] != SD_BUS_TYPE_STRUCT_END)
+ return -ENXIO;
+
+ nindex = c->index + 1 + l + 1;
+ } else {
+ char *e;
+
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_STRUCT_BEGIN), contents, CHAR_TO_STR(SD_BUS_TYPE_STRUCT_END), NULL);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ nindex = e - c->signature;
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ int alignment;
+
+ alignment = bus_gvariant_get_alignment(contents);
+ if (alignment < 0)
+ return alignment;
+
+ if (!message_extend_body(m, alignment, 0, false, false))
+ return -ENOMEM;
+
+ r = bus_gvariant_is_fixed_size(contents);
+ if (r < 0)
+ return r;
+
+ *begin = m->body_size;
+ *need_offsets = r == 0;
+ } else {
+ /* Align contents to 8 byte boundary */
+ if (!message_extend_body(m, 8, 0, false, false))
+ return -ENOMEM;
+ }
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index = nindex;
+
+ return 0;
+}
+
+static int bus_message_open_dict_entry(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *begin,
+ bool *need_offsets) {
+
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(begin);
+ assert(need_offsets);
+
+ if (!signature_is_pair(contents))
+ return -EINVAL;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+
+ if (c->signature && c->signature[c->index]) {
+ size_t l;
+
+ l = strlen(contents);
+
+ if (c->signature[c->index] != SD_BUS_TYPE_DICT_ENTRY_BEGIN ||
+ !startswith(c->signature + c->index + 1, contents) ||
+ c->signature[c->index + 1 + l] != SD_BUS_TYPE_DICT_ENTRY_END)
+ return -ENXIO;
+ } else
+ return -ENXIO;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ int alignment;
+
+ alignment = bus_gvariant_get_alignment(contents);
+ if (alignment < 0)
+ return alignment;
+
+ if (!message_extend_body(m, alignment, 0, false, false))
+ return -ENOMEM;
+
+ r = bus_gvariant_is_fixed_size(contents);
+ if (r < 0)
+ return r;
+
+ *begin = m->body_size;
+ *need_offsets = r == 0;
+ } else {
+ /* Align contents to 8 byte boundary */
+ if (!message_extend_body(m, 8, 0, false, false))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_message_open_container(
+ sd_bus_message *m,
+ char type,
+ const char *contents) {
+
+ struct bus_container *c;
+ uint32_t *array_size = NULL;
+ _cleanup_free_ char *signature = NULL;
+ size_t before, begin = 0;
+ bool need_offsets = false;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(contents, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ /* Make sure we have space for one more container */
+ if (!GREEDY_REALLOC(m->containers, m->containers_allocated, m->n_containers + 1)) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ c = message_get_last_container(m);
+
+ signature = strdup(contents);
+ if (!signature) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ /* Save old index in the parent container, in case we have to
+ * abort this container */
+ c->saved_index = c->index;
+ before = m->body_size;
+
+ if (type == SD_BUS_TYPE_ARRAY)
+ r = bus_message_open_array(m, c, contents, &array_size, &begin, &need_offsets);
+ else if (type == SD_BUS_TYPE_VARIANT)
+ r = bus_message_open_variant(m, c, contents);
+ else if (type == SD_BUS_TYPE_STRUCT)
+ r = bus_message_open_struct(m, c, contents, &begin, &need_offsets);
+ else if (type == SD_BUS_TYPE_DICT_ENTRY)
+ r = bus_message_open_dict_entry(m, c, contents, &begin, &need_offsets);
+ else
+ r = -EINVAL;
+ if (r < 0)
+ return r;
+
+ /* OK, let's fill it in */
+ m->containers[m->n_containers++] = (struct bus_container) {
+ .enclosing = type,
+ .signature = TAKE_PTR(signature),
+ .array_size = array_size,
+ .before = before,
+ .begin = begin,
+ .need_offsets = need_offsets,
+ };
+
+ return 0;
+}
+
+static int bus_message_close_array(sd_bus_message *m, struct bus_container *c) {
+
+ assert(m);
+ assert(c);
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m))
+ return 0;
+
+ if (c->need_offsets) {
+ size_t payload, sz, i;
+ uint8_t *a;
+
+ /* Variable-width arrays */
+
+ payload = c->n_offsets > 0 ? c->offsets[c->n_offsets-1] - c->begin : 0;
+ sz = bus_gvariant_determine_word_size(payload, c->n_offsets);
+
+ a = message_extend_body(m, 1, sz * c->n_offsets, true, false);
+ if (!a)
+ return -ENOMEM;
+
+ for (i = 0; i < c->n_offsets; i++)
+ bus_gvariant_write_word_le(a + sz*i, sz, c->offsets[i] - c->begin);
+ } else {
+ void *a;
+
+ /* Fixed-width or empty arrays */
+
+ a = message_extend_body(m, 1, 0, true, false); /* let's add offset to parent */
+ if (!a)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int bus_message_close_variant(sd_bus_message *m, struct bus_container *c) {
+ uint8_t *a;
+ size_t l;
+
+ assert(m);
+ assert(c);
+ assert(c->signature);
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m))
+ return 0;
+
+ l = strlen(c->signature);
+
+ a = message_extend_body(m, 1, 1 + l, true, false);
+ if (!a)
+ return -ENOMEM;
+
+ a[0] = 0;
+ memcpy(a+1, c->signature, l);
+
+ return 0;
+}
+
+static int bus_message_close_struct(sd_bus_message *m, struct bus_container *c, bool add_offset) {
+ bool fixed_size = true;
+ size_t n_variable = 0;
+ unsigned i = 0;
+ const char *p;
+ uint8_t *a;
+ int r;
+
+ assert(m);
+ assert(c);
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m))
+ return 0;
+
+ p = strempty(c->signature);
+ while (*p != 0) {
+ size_t n;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+ else {
+ char t[n+1];
+
+ memcpy(t, p, n);
+ t[n] = 0;
+
+ r = bus_gvariant_is_fixed_size(t);
+ if (r < 0)
+ return r;
+ }
+
+ assert(!c->need_offsets || i <= c->n_offsets);
+
+ /* We need to add an offset for each item that has a
+ * variable size and that is not the last one in the
+ * list */
+ if (r == 0)
+ fixed_size = false;
+ if (r == 0 && p[n] != 0)
+ n_variable++;
+
+ i++;
+ p += n;
+ }
+
+ assert(!c->need_offsets || i == c->n_offsets);
+ assert(c->need_offsets || n_variable == 0);
+
+ if (isempty(c->signature)) {
+ /* The unary type is encoded as fixed 1 byte padding */
+ a = message_extend_body(m, 1, 1, add_offset, false);
+ if (!a)
+ return -ENOMEM;
+
+ *a = 0;
+ } else if (n_variable <= 0) {
+ int alignment = 1;
+
+ /* Structures with fixed-size members only have to be
+ * fixed-size themselves. But gvariant requires all fixed-size
+ * elements to be sized a multiple of their alignment. Hence,
+ * we must *always* add final padding after the last member so
+ * the overall size of the structure is properly aligned. */
+ if (fixed_size)
+ alignment = bus_gvariant_get_alignment(strempty(c->signature));
+
+ assert(alignment > 0);
+
+ a = message_extend_body(m, alignment, 0, add_offset, false);
+ if (!a)
+ return -ENOMEM;
+ } else {
+ size_t sz;
+ unsigned j;
+
+ assert(c->offsets[c->n_offsets-1] == m->body_size);
+
+ sz = bus_gvariant_determine_word_size(m->body_size - c->begin, n_variable);
+
+ a = message_extend_body(m, 1, sz * n_variable, add_offset, false);
+ if (!a)
+ return -ENOMEM;
+
+ p = strempty(c->signature);
+ for (i = 0, j = 0; i < c->n_offsets; i++) {
+ unsigned k;
+ size_t n;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+ else {
+ char t[n+1];
+
+ memcpy(t, p, n);
+ t[n] = 0;
+
+ p += n;
+
+ r = bus_gvariant_is_fixed_size(t);
+ if (r < 0)
+ return r;
+ if (r > 0 || p[0] == 0)
+ continue;
+ }
+
+ k = n_variable - 1 - j;
+
+ bus_gvariant_write_word_le(a + k * sz, sz, c->offsets[i] - c->begin);
+
+ j++;
+ }
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_message_close_container(sd_bus_message *m) {
+ struct bus_container *c;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers > 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ c = message_get_last_container(m);
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ if (c->signature && c->signature[c->index] != 0)
+ return -EINVAL;
+
+ m->n_containers--;
+
+ if (c->enclosing == SD_BUS_TYPE_ARRAY)
+ r = bus_message_close_array(m, c);
+ else if (c->enclosing == SD_BUS_TYPE_VARIANT)
+ r = bus_message_close_variant(m, c);
+ else if (IN_SET(c->enclosing, SD_BUS_TYPE_STRUCT, SD_BUS_TYPE_DICT_ENTRY))
+ r = bus_message_close_struct(m, c, true);
+ else
+ assert_not_reached("Unknown container type");
+
+ free(c->signature);
+ free(c->offsets);
+
+ return r;
+}
+
+typedef struct {
+ const char *types;
+ unsigned n_struct;
+ unsigned n_array;
+} TypeStack;
+
+static int type_stack_push(TypeStack *stack, unsigned max, unsigned *i, const char *types, unsigned n_struct, unsigned n_array) {
+ assert(stack);
+ assert(max > 0);
+
+ if (*i >= max)
+ return -EINVAL;
+
+ stack[*i].types = types;
+ stack[*i].n_struct = n_struct;
+ stack[*i].n_array = n_array;
+ (*i)++;
+
+ return 0;
+}
+
+static int type_stack_pop(TypeStack *stack, unsigned max, unsigned *i, const char **types, unsigned *n_struct, unsigned *n_array) {
+ assert(stack);
+ assert(max > 0);
+ assert(types);
+ assert(n_struct);
+ assert(n_array);
+
+ if (*i <= 0)
+ return 0;
+
+ (*i)--;
+ *types = stack[*i].types;
+ *n_struct = stack[*i].n_struct;
+ *n_array = stack[*i].n_array;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_appendv(
+ sd_bus_message *m,
+ const char *types,
+ va_list ap) {
+
+ unsigned n_array, n_struct;
+ TypeStack stack[BUS_CONTAINER_DEPTH];
+ unsigned stack_ptr = 0;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(types, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ n_array = (unsigned) -1;
+ n_struct = strlen(types);
+
+ for (;;) {
+ const char *t;
+
+ if (n_array == 0 || (n_array == (unsigned) -1 && n_struct == 0)) {
+ r = type_stack_pop(stack, ELEMENTSOF(stack), &stack_ptr, &types, &n_struct, &n_array);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ t = types;
+ if (n_array != (unsigned) -1)
+ n_array--;
+ else {
+ types++;
+ n_struct--;
+ }
+
+ switch (*t) {
+
+ case SD_BUS_TYPE_BYTE: {
+ uint8_t x;
+
+ x = (uint8_t) va_arg(ap, int);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_UNIX_FD: {
+ uint32_t x;
+
+ /* We assume a boolean is the same as int32_t */
+ assert_cc(sizeof(int32_t) == sizeof(int));
+
+ x = va_arg(ap, uint32_t);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16: {
+ uint16_t x;
+
+ x = (uint16_t) va_arg(ap, int);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t x;
+
+ x = va_arg(ap, uint64_t);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double x;
+
+ x = va_arg(ap, double);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE: {
+ const char *x;
+
+ x = va_arg(ap, const char*);
+ r = sd_bus_message_append_basic(m, *t, x);
+ break;
+ }
+
+ case SD_BUS_TYPE_ARRAY: {
+ size_t k;
+
+ r = signature_element_length(t + 1, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k + 1];
+ memcpy(s, t + 1, k);
+ s[k] = 0;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, s);
+ if (r < 0)
+ return r;
+ }
+
+ if (n_array == (unsigned) -1) {
+ types += k;
+ n_struct -= k;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = t + 1;
+ n_struct = k;
+ n_array = va_arg(ap, unsigned);
+
+ break;
+ }
+
+ case SD_BUS_TYPE_VARIANT: {
+ const char *s;
+
+ s = va_arg(ap, const char*);
+ if (!s)
+ return -EINVAL;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_VARIANT, s);
+ if (r < 0)
+ return r;
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = s;
+ n_struct = strlen(s);
+ n_array = (unsigned) -1;
+
+ break;
+ }
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ size_t k;
+
+ r = signature_element_length(t, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k - 1];
+
+ memcpy(s, t + 1, k - 2);
+ s[k - 2] = 0;
+
+ r = sd_bus_message_open_container(m, *t == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s);
+ if (r < 0)
+ return r;
+ }
+
+ if (n_array == (unsigned) -1) {
+ types += k - 1;
+ n_struct -= k - 1;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = t + 1;
+ n_struct = k - 2;
+ n_array = (unsigned) -1;
+
+ break;
+ }
+
+ default:
+ r = -EINVAL;
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+_public_ int sd_bus_message_append(sd_bus_message *m, const char *types, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_message_appendv(m, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_message_append_array_space(
+ sd_bus_message *m,
+ char type,
+ size_t size,
+ void **ptr) {
+
+ ssize_t align, sz;
+ void *a;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(bus_type_is_trivial(type) && type != SD_BUS_TYPE_BOOLEAN, -EINVAL);
+ assert_return(ptr || size == 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ /* alignment and size of the trivial types (except bool) is
+ * identical for gvariant and dbus1 marshalling */
+ align = bus_type_get_alignment(type);
+ sz = bus_type_get_size(type);
+
+ assert_se(align > 0);
+ assert_se(sz > 0);
+
+ if (size % sz != 0)
+ return -EINVAL;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, CHAR_TO_STR(type));
+ if (r < 0)
+ return r;
+
+ a = message_extend_body(m, align, size, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ *ptr = a;
+ return 0;
+}
+
+_public_ int sd_bus_message_append_array(
+ sd_bus_message *m,
+ char type,
+ const void *ptr,
+ size_t size) {
+ int r;
+ void *p;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(bus_type_is_trivial(type), -EINVAL);
+ assert_return(ptr || size == 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ r = sd_bus_message_append_array_space(m, type, size, &p);
+ if (r < 0)
+ return r;
+
+ memcpy_safe(p, ptr, size);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_append_array_iovec(
+ sd_bus_message *m,
+ char type,
+ const struct iovec *iov,
+ unsigned n /* should be size_t, but is API now… 😞 */) {
+
+ size_t size;
+ unsigned i;
+ void *p;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(bus_type_is_trivial(type), -EINVAL);
+ assert_return(iov || n == 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ size = IOVEC_TOTAL_SIZE(iov, n);
+
+ r = sd_bus_message_append_array_space(m, type, size, &p);
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n; i++) {
+
+ if (iov[i].iov_base)
+ memcpy(p, iov[i].iov_base, iov[i].iov_len);
+ else
+ memzero(p, iov[i].iov_len);
+
+ p = (uint8_t*) p + iov[i].iov_len;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_message_append_array_memfd(
+ sd_bus_message *m,
+ char type,
+ int memfd,
+ uint64_t offset,
+ uint64_t size) {
+
+ _cleanup_close_ int copy_fd = -1;
+ struct bus_body_part *part;
+ ssize_t align, sz;
+ uint64_t real_size;
+ void *a;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(memfd >= 0, -EBADF);
+ assert_return(bus_type_is_trivial(type), -EINVAL);
+ assert_return(size > 0, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ r = memfd_set_sealed(memfd);
+ if (r < 0)
+ return r;
+
+ copy_fd = fcntl(memfd, F_DUPFD_CLOEXEC, 3);
+ if (copy_fd < 0)
+ return copy_fd;
+
+ r = memfd_get_size(memfd, &real_size);
+ if (r < 0)
+ return r;
+
+ if (offset == 0 && size == (uint64_t) -1)
+ size = real_size;
+ else if (offset + size > real_size)
+ return -EMSGSIZE;
+
+ align = bus_type_get_alignment(type);
+ sz = bus_type_get_size(type);
+
+ assert_se(align > 0);
+ assert_se(sz > 0);
+
+ if (offset % align != 0)
+ return -EINVAL;
+
+ if (size % sz != 0)
+ return -EINVAL;
+
+ if (size > (uint64_t) (uint32_t) -1)
+ return -EINVAL;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, CHAR_TO_STR(type));
+ if (r < 0)
+ return r;
+
+ a = message_extend_body(m, align, 0, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ part = message_append_part(m);
+ if (!part)
+ return -ENOMEM;
+
+ part->memfd = copy_fd;
+ part->memfd_offset = offset;
+ part->sealed = true;
+ part->size = size;
+ copy_fd = -1;
+
+ m->body_size += size;
+ message_extend_containers(m, size);
+
+ return sd_bus_message_close_container(m);
+}
+
+_public_ int sd_bus_message_append_string_memfd(
+ sd_bus_message *m,
+ int memfd,
+ uint64_t offset,
+ uint64_t size) {
+
+ _cleanup_close_ int copy_fd = -1;
+ struct bus_body_part *part;
+ struct bus_container *c;
+ uint64_t real_size;
+ void *a;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(memfd >= 0, -EBADF);
+ assert_return(size > 0, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ r = memfd_set_sealed(memfd);
+ if (r < 0)
+ return r;
+
+ copy_fd = fcntl(memfd, FD_CLOEXEC, 3);
+ if (copy_fd < 0)
+ return copy_fd;
+
+ r = memfd_get_size(memfd, &real_size);
+ if (r < 0)
+ return r;
+
+ if (offset == 0 && size == (uint64_t) -1)
+ size = real_size;
+ else if (offset + size > real_size)
+ return -EMSGSIZE;
+
+ /* We require this to be NUL terminated */
+ if (size == 0)
+ return -EINVAL;
+
+ if (size > (uint64_t) (uint32_t) -1)
+ return -EINVAL;
+
+ c = message_get_last_container(m);
+ if (c->signature && c->signature[c->index]) {
+ /* Container signature is already set */
+
+ if (c->signature[c->index] != SD_BUS_TYPE_STRING)
+ return -ENXIO;
+ } else {
+ char *e;
+
+ /* Maybe we can append to the signature? But only if this is the top-level container */
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_STRING), NULL);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m)) {
+ a = message_extend_body(m, 4, 4, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ *(uint32_t*) a = size - 1;
+ }
+
+ part = message_append_part(m);
+ if (!part)
+ return -ENOMEM;
+
+ part->memfd = copy_fd;
+ part->memfd_offset = offset;
+ part->sealed = true;
+ part->size = size;
+ copy_fd = -1;
+
+ m->body_size += size;
+ message_extend_containers(m, size);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ r = message_add_offset(m, m->body_size);
+ if (r < 0) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 0;
+}
+
+_public_ int sd_bus_message_append_strv(sd_bus_message *m, char **l) {
+ char **i;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, l) {
+ r = sd_bus_message_append_basic(m, 's', *i);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(m);
+}
+
+static int bus_message_close_header(sd_bus_message *m) {
+
+ assert(m);
+
+ /* The actual user data is finished now, we just complete the
+ variant and struct now (at least on gvariant). Remember
+ this position, so that during parsing we know where to
+ put the outer container end. */
+ m->user_body_size = m->body_size;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ const char *signature;
+ size_t sz, l;
+ void *d;
+
+ /* Add offset table to end of fields array */
+ if (m->n_header_offsets >= 1) {
+ uint8_t *a;
+ unsigned i;
+
+ assert(m->fields_size == m->header_offsets[m->n_header_offsets-1]);
+
+ sz = bus_gvariant_determine_word_size(m->fields_size, m->n_header_offsets);
+ a = message_extend_fields(m, 1, sz * m->n_header_offsets, false);
+ if (!a)
+ return -ENOMEM;
+
+ for (i = 0; i < m->n_header_offsets; i++)
+ bus_gvariant_write_word_le(a + sz*i, sz, m->header_offsets[i]);
+ }
+
+ /* Add gvariant NUL byte plus signature to the end of
+ * the body, followed by the final offset pointing to
+ * the end of the fields array */
+
+ signature = strempty(m->root_container.signature);
+ l = strlen(signature);
+
+ sz = bus_gvariant_determine_word_size(sizeof(struct bus_header) + ALIGN8(m->fields_size) + m->body_size + 1 + l + 2, 1);
+ d = message_extend_body(m, 1, 1 + l + 2 + sz, false, true);
+ if (!d)
+ return -ENOMEM;
+
+ *(uint8_t*) d = 0;
+ *((uint8_t*) d + 1) = SD_BUS_TYPE_STRUCT_BEGIN;
+ memcpy((uint8_t*) d + 2, signature, l);
+ *((uint8_t*) d + 1 + l + 1) = SD_BUS_TYPE_STRUCT_END;
+
+ bus_gvariant_write_word_le((uint8_t*) d + 1 + l + 2, sz, sizeof(struct bus_header) + m->fields_size);
+
+ m->footer = d;
+ m->footer_accessible = 1 + l + 2 + sz;
+ } else {
+ m->header->dbus1.fields_size = m->fields_size;
+ m->header->dbus1.body_size = m->body_size;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_message_seal(sd_bus_message *m, uint64_t cookie, uint64_t timeout_usec) {
+ struct bus_body_part *part;
+ size_t a;
+ unsigned i;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ if (m->sealed)
+ return -EPERM;
+
+ if (m->n_containers > 0)
+ return -EBADMSG;
+
+ if (m->poisoned)
+ return -ESTALE;
+
+ if (cookie > 0xffffffffULL &&
+ !BUS_MESSAGE_IS_GVARIANT(m))
+ return -EOPNOTSUPP;
+
+ /* In vtables the return signature of method calls is listed,
+ * let's check if they match if this is a response */
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_RETURN &&
+ m->enforced_reply_signature &&
+ !streq(strempty(m->root_container.signature), m->enforced_reply_signature))
+ return -ENOMSG;
+
+ /* If gvariant marshalling is used we need to close the body structure */
+ r = bus_message_close_struct(m, &m->root_container, false);
+ if (r < 0)
+ return r;
+
+ /* If there's a non-trivial signature set, then add it in
+ * here, but only on dbus1 */
+ if (!isempty(m->root_container.signature) && !BUS_MESSAGE_IS_GVARIANT(m)) {
+ r = message_append_field_signature(m, BUS_MESSAGE_HEADER_SIGNATURE, m->root_container.signature, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ if (m->n_fds > 0) {
+ r = message_append_field_uint32(m, BUS_MESSAGE_HEADER_UNIX_FDS, m->n_fds);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_message_close_header(m);
+ if (r < 0)
+ return r;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m))
+ m->header->dbus2.cookie = cookie;
+ else
+ m->header->dbus1.serial = (uint32_t) cookie;
+
+ m->timeout = m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED ? 0 : timeout_usec;
+
+ /* Add padding at the end of the fields part, since we know
+ * the body needs to start at an 8 byte alignment. We made
+ * sure we allocated enough space for this, so all we need to
+ * do here is to zero it out. */
+ a = ALIGN8(m->fields_size) - m->fields_size;
+ if (a > 0)
+ memzero((uint8_t*) BUS_MESSAGE_FIELDS(m) + m->fields_size, a);
+
+ /* If this is something we can send as memfd, then let's seal
+ the memfd now. Note that we can send memfds as payload only
+ for directed messages, and not for broadcasts. */
+ if (m->destination && m->bus->use_memfd) {
+ MESSAGE_FOREACH_PART(part, i, m)
+ if (part->memfd >= 0 &&
+ !part->sealed &&
+ (part->size > MEMFD_MIN_SIZE || m->bus->use_memfd < 0) &&
+ part != m->body_end) { /* The last part may never be sent as memfd */
+ uint64_t sz;
+
+ /* Try to seal it if that makes
+ * sense. First, unmap our own map to
+ * make sure we don't keep it busy. */
+ bus_body_part_unmap(part);
+
+ /* Then, sync up real memfd size */
+ sz = part->size;
+ r = memfd_set_size(part->memfd, sz);
+ if (r < 0)
+ return r;
+
+ /* Finally, try to seal */
+ if (memfd_set_sealed(part->memfd) >= 0)
+ part->sealed = true;
+ }
+ }
+
+ m->root_container.end = m->user_body_size;
+ m->root_container.index = 0;
+ m->root_container.offset_index = 0;
+ m->root_container.item_size = m->root_container.n_offsets > 0 ? m->root_container.offsets[0] : 0;
+
+ m->sealed = true;
+
+ return 0;
+}
+
+int bus_body_part_map(struct bus_body_part *part) {
+ void *p;
+ size_t psz, shift;
+
+ assert_se(part);
+
+ if (part->data)
+ return 0;
+
+ if (part->size <= 0)
+ return 0;
+
+ /* For smaller zero parts (as used for padding) we don't need to map anything... */
+ if (part->memfd < 0 && part->is_zero && part->size < 8) {
+ static const uint8_t zeroes[7] = { };
+ part->data = (void*) zeroes;
+ return 0;
+ }
+
+ shift = part->memfd_offset - ((part->memfd_offset / page_size()) * page_size());
+ psz = PAGE_ALIGN(part->size + shift);
+
+ if (part->memfd >= 0)
+ p = mmap(NULL, psz, PROT_READ, MAP_PRIVATE, part->memfd, part->memfd_offset - shift);
+ else if (part->is_zero)
+ p = mmap(NULL, psz, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ else
+ return -EINVAL;
+
+ if (p == MAP_FAILED)
+ return -errno;
+
+ part->mapped = psz;
+ part->mmap_begin = p;
+ part->data = (uint8_t*) p + shift;
+ part->munmap_this = true;
+
+ return 0;
+}
+
+void bus_body_part_unmap(struct bus_body_part *part) {
+
+ assert_se(part);
+
+ if (part->memfd < 0)
+ return;
+
+ if (!part->mmap_begin)
+ return;
+
+ if (!part->munmap_this)
+ return;
+
+ assert_se(munmap(part->mmap_begin, part->mapped) == 0);
+
+ part->mmap_begin = NULL;
+ part->data = NULL;
+ part->mapped = 0;
+ part->munmap_this = false;
+
+ return;
+}
+
+static int buffer_peek(const void *p, uint32_t sz, size_t *rindex, size_t align, size_t nbytes, void **r) {
+ size_t k, start, end;
+
+ assert(rindex);
+ assert(align > 0);
+
+ start = ALIGN_TO((size_t) *rindex, align);
+ end = start + nbytes;
+
+ if (end > sz)
+ return -EBADMSG;
+
+ /* Verify that padding is 0 */
+ for (k = *rindex; k < start; k++)
+ if (((const uint8_t*) p)[k] != 0)
+ return -EBADMSG;
+
+ if (r)
+ *r = (uint8_t*) p + start;
+
+ *rindex = end;
+
+ return 1;
+}
+
+static bool message_end_of_signature(sd_bus_message *m) {
+ struct bus_container *c;
+
+ assert(m);
+
+ c = message_get_last_container(m);
+ return !c->signature || c->signature[c->index] == 0;
+}
+
+static bool message_end_of_array(sd_bus_message *m, size_t index) {
+ struct bus_container *c;
+
+ assert(m);
+
+ c = message_get_last_container(m);
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ return false;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m))
+ return index >= c->end;
+ else {
+ assert(c->array_size);
+ return index >= c->begin + BUS_MESSAGE_BSWAP32(m, *c->array_size);
+ }
+}
+
+_public_ int sd_bus_message_at_end(sd_bus_message *m, int complete) {
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ if (complete && m->n_containers > 0)
+ return false;
+
+ if (message_end_of_signature(m))
+ return true;
+
+ if (message_end_of_array(m, m->rindex))
+ return true;
+
+ return false;
+}
+
+static struct bus_body_part* find_part(sd_bus_message *m, size_t index, size_t sz, void **p) {
+ struct bus_body_part *part;
+ size_t begin;
+ int r;
+
+ assert(m);
+
+ if (m->cached_rindex_part && index >= m->cached_rindex_part_begin) {
+ part = m->cached_rindex_part;
+ begin = m->cached_rindex_part_begin;
+ } else {
+ part = &m->body;
+ begin = 0;
+ }
+
+ while (part) {
+ if (index < begin)
+ return NULL;
+
+ if (index + sz <= begin + part->size) {
+
+ r = bus_body_part_map(part);
+ if (r < 0)
+ return NULL;
+
+ if (p)
+ *p = (uint8_t*) part->data + index - begin;
+
+ m->cached_rindex_part = part;
+ m->cached_rindex_part_begin = begin;
+
+ return part;
+ }
+
+ begin += part->size;
+ part = part->next;
+ }
+
+ return NULL;
+}
+
+static int container_next_item(sd_bus_message *m, struct bus_container *c, size_t *rindex) {
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(rindex);
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m))
+ return 0;
+
+ if (c->enclosing == SD_BUS_TYPE_ARRAY) {
+ int sz;
+
+ sz = bus_gvariant_get_size(c->signature);
+ if (sz < 0) {
+ int alignment;
+
+ if (c->offset_index+1 >= c->n_offsets)
+ goto end;
+
+ /* Variable-size array */
+
+ alignment = bus_gvariant_get_alignment(c->signature);
+ assert(alignment > 0);
+
+ *rindex = ALIGN_TO(c->offsets[c->offset_index], alignment);
+ assert(c->offsets[c->offset_index+1] >= *rindex);
+ c->item_size = c->offsets[c->offset_index+1] - *rindex;
+ } else {
+
+ if (c->offset_index+1 >= (c->end-c->begin)/sz)
+ goto end;
+
+ /* Fixed-size array */
+ *rindex = c->begin + (c->offset_index+1) * sz;
+ c->item_size = sz;
+ }
+
+ c->offset_index++;
+
+ } else if (IN_SET(c->enclosing, 0, SD_BUS_TYPE_STRUCT, SD_BUS_TYPE_DICT_ENTRY)) {
+
+ int alignment;
+ size_t n, j;
+
+ if (c->offset_index+1 >= c->n_offsets)
+ goto end;
+
+ r = signature_element_length(c->signature + c->index, &n);
+ if (r < 0)
+ return r;
+
+ r = signature_element_length(c->signature + c->index + n, &j);
+ if (r < 0)
+ return r;
+ else {
+ char t[j+1];
+ memcpy(t, c->signature + c->index + n, j);
+ t[j] = 0;
+
+ alignment = bus_gvariant_get_alignment(t);
+ }
+
+ assert(alignment > 0);
+
+ *rindex = ALIGN_TO(c->offsets[c->offset_index], alignment);
+ assert(c->offsets[c->offset_index+1] >= *rindex);
+ c->item_size = c->offsets[c->offset_index+1] - *rindex;
+
+ c->offset_index++;
+
+ } else if (c->enclosing == SD_BUS_TYPE_VARIANT)
+ goto end;
+ else
+ assert_not_reached("Unknown container type");
+
+ return 0;
+
+end:
+ /* Reached the end */
+ *rindex = c->end;
+ c->item_size = 0;
+ return 0;
+}
+
+static int message_peek_body(
+ sd_bus_message *m,
+ size_t *rindex,
+ size_t align,
+ size_t nbytes,
+ void **ret) {
+
+ size_t k, start, end, padding;
+ struct bus_body_part *part;
+ uint8_t *q;
+
+ assert(m);
+ assert(rindex);
+ assert(align > 0);
+
+ start = ALIGN_TO((size_t) *rindex, align);
+ padding = start - *rindex;
+ end = start + nbytes;
+
+ if (end > m->user_body_size)
+ return -EBADMSG;
+
+ part = find_part(m, *rindex, padding, (void**) &q);
+ if (!part)
+ return -EBADMSG;
+
+ if (q) {
+ /* Verify padding */
+ for (k = 0; k < padding; k++)
+ if (q[k] != 0)
+ return -EBADMSG;
+ }
+
+ part = find_part(m, start, nbytes, (void**) &q);
+ if (!part || (nbytes > 0 && !q))
+ return -EBADMSG;
+
+ *rindex = end;
+
+ if (ret)
+ *ret = q;
+
+ return 0;
+}
+
+static bool validate_nul(const char *s, size_t l) {
+
+ /* Check for NUL chars in the string */
+ if (memchr(s, 0, l))
+ return false;
+
+ /* Check for NUL termination */
+ if (s[l] != 0)
+ return false;
+
+ return true;
+}
+
+static bool validate_string(const char *s, size_t l) {
+
+ if (!validate_nul(s, l))
+ return false;
+
+ /* Check if valid UTF8 */
+ if (!utf8_is_valid(s))
+ return false;
+
+ return true;
+}
+
+static bool validate_signature(const char *s, size_t l) {
+
+ if (!validate_nul(s, l))
+ return false;
+
+ /* Check if valid signature */
+ if (!signature_is_valid(s, true))
+ return false;
+
+ return true;
+}
+
+static bool validate_object_path(const char *s, size_t l) {
+
+ if (!validate_nul(s, l))
+ return false;
+
+ if (!object_path_is_valid(s))
+ return false;
+
+ return true;
+}
+
+_public_ int sd_bus_message_read_basic(sd_bus_message *m, char type, void *p) {
+ struct bus_container *c;
+ size_t rindex;
+ void *q;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(bus_type_is_basic(type), -EINVAL);
+
+ if (message_end_of_signature(m))
+ return -ENXIO;
+
+ if (message_end_of_array(m, m->rindex))
+ return 0;
+
+ c = message_get_last_container(m);
+ if (c->signature[c->index] != type)
+ return -ENXIO;
+
+ rindex = m->rindex;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+
+ if (IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE)) {
+ bool ok;
+
+ /* D-Bus spec: The marshalling formats for the string-like types all end
+ * with a single zero (NUL) byte, but that byte is not considered to be part
+ * of the text. */
+ if (c->item_size == 0)
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, c->item_size, &q);
+ if (r < 0)
+ return r;
+
+ if (type == SD_BUS_TYPE_STRING)
+ ok = validate_string(q, c->item_size-1);
+ else if (type == SD_BUS_TYPE_OBJECT_PATH)
+ ok = validate_object_path(q, c->item_size-1);
+ else
+ ok = validate_signature(q, c->item_size-1);
+
+ if (!ok)
+ return -EBADMSG;
+
+ if (p)
+ *(const char**) p = q;
+ } else {
+ int sz, align;
+
+ sz = bus_gvariant_get_size(CHAR_TO_STR(type));
+ assert(sz > 0);
+ if ((size_t) sz != c->item_size)
+ return -EBADMSG;
+
+ align = bus_gvariant_get_alignment(CHAR_TO_STR(type));
+ assert(align > 0);
+
+ r = message_peek_body(m, &rindex, align, c->item_size, &q);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_BYTE:
+ if (p)
+ *(uint8_t*) p = *(uint8_t*) q;
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ if (p)
+ *(int*) p = !!*(uint8_t*) q;
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ if (p)
+ *(uint16_t*) p = BUS_MESSAGE_BSWAP16(m, *(uint16_t*) q);
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ if (p)
+ *(uint32_t*) p = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ if (p)
+ *(uint64_t*) p = BUS_MESSAGE_BSWAP64(m, *(uint64_t*) q);
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD: {
+ uint32_t j;
+
+ j = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ if (j >= m->n_fds)
+ return -EBADMSG;
+
+ if (p)
+ *(int*) p = m->fds[j];
+
+ break;
+ }
+
+ default:
+ assert_not_reached("unexpected type");
+ }
+ }
+
+ r = container_next_item(m, c, &rindex);
+ if (r < 0)
+ return r;
+ } else {
+
+ if (IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH)) {
+ uint32_t l;
+ bool ok;
+
+ r = message_peek_body(m, &rindex, 4, 4, &q);
+ if (r < 0)
+ return r;
+
+ l = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ if (l == UINT32_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (type == SD_BUS_TYPE_OBJECT_PATH)
+ ok = validate_object_path(q, l);
+ else
+ ok = validate_string(q, l);
+ if (!ok)
+ return -EBADMSG;
+
+ if (p)
+ *(const char**) p = q;
+
+ } else if (type == SD_BUS_TYPE_SIGNATURE) {
+ uint8_t l;
+
+ r = message_peek_body(m, &rindex, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ l = *(uint8_t*) q;
+ if (l == UINT8_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (!validate_signature(q, l))
+ return -EBADMSG;
+
+ if (p)
+ *(const char**) p = q;
+
+ } else {
+ ssize_t sz, align;
+
+ align = bus_type_get_alignment(type);
+ assert(align > 0);
+
+ sz = bus_type_get_size(type);
+ assert(sz > 0);
+
+ r = message_peek_body(m, &rindex, align, sz, &q);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_BYTE:
+ if (p)
+ *(uint8_t*) p = *(uint8_t*) q;
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ if (p)
+ *(int*) p = !!*(uint32_t*) q;
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ if (p)
+ *(uint16_t*) p = BUS_MESSAGE_BSWAP16(m, *(uint16_t*) q);
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ if (p)
+ *(uint32_t*) p = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ if (p)
+ *(uint64_t*) p = BUS_MESSAGE_BSWAP64(m, *(uint64_t*) q);
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD: {
+ uint32_t j;
+
+ j = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ if (j >= m->n_fds)
+ return -EBADMSG;
+
+ if (p)
+ *(int*) p = m->fds[j];
+ break;
+ }
+
+ default:
+ assert_not_reached("Unknown basic type...");
+ }
+ }
+ }
+
+ m->rindex = rindex;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 1;
+}
+
+static int bus_message_enter_array(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ uint32_t **array_size,
+ size_t *item_size,
+ size_t **offsets,
+ size_t *n_offsets) {
+
+ size_t rindex;
+ void *q;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(array_size);
+ assert(item_size);
+ assert(offsets);
+ assert(n_offsets);
+
+ if (!signature_is_single(contents, true))
+ return -EINVAL;
+
+ if (!c->signature || c->signature[c->index] == 0)
+ return -ENXIO;
+
+ if (c->signature[c->index] != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+
+ if (!startswith(c->signature + c->index + 1, contents))
+ return -ENXIO;
+
+ rindex = m->rindex;
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m)) {
+ /* dbus1 */
+ int alignment;
+
+ r = message_peek_body(m, &rindex, 4, 4, &q);
+ if (r < 0)
+ return r;
+
+ if (BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q) > BUS_ARRAY_MAX_SIZE)
+ return -EBADMSG;
+
+ alignment = bus_type_get_alignment(contents[0]);
+ if (alignment < 0)
+ return alignment;
+
+ r = message_peek_body(m, &rindex, alignment, 0, NULL);
+ if (r < 0)
+ return r;
+
+ *array_size = (uint32_t*) q;
+
+ } else if (c->item_size <= 0) {
+
+ /* gvariant: empty array */
+ *item_size = 0;
+ *offsets = NULL;
+ *n_offsets = 0;
+
+ } else if (bus_gvariant_is_fixed_size(contents)) {
+
+ /* gvariant: fixed length array */
+ *item_size = bus_gvariant_get_size(contents);
+ *offsets = NULL;
+ *n_offsets = 0;
+
+ } else {
+ size_t where, previous = 0, framing, sz;
+ int alignment;
+ unsigned i;
+
+ /* gvariant: variable length array */
+ sz = bus_gvariant_determine_word_size(c->item_size, 0);
+
+ where = rindex + c->item_size - sz;
+ r = message_peek_body(m, &where, 1, sz, &q);
+ if (r < 0)
+ return r;
+
+ framing = bus_gvariant_read_word_le(q, sz);
+ if (framing > c->item_size - sz)
+ return -EBADMSG;
+ if ((c->item_size - framing) % sz != 0)
+ return -EBADMSG;
+
+ *n_offsets = (c->item_size - framing) / sz;
+
+ where = rindex + framing;
+ r = message_peek_body(m, &where, 1, *n_offsets * sz, &q);
+ if (r < 0)
+ return r;
+
+ *offsets = new(size_t, *n_offsets);
+ if (!*offsets)
+ return -ENOMEM;
+
+ alignment = bus_gvariant_get_alignment(c->signature);
+ assert(alignment > 0);
+
+ for (i = 0; i < *n_offsets; i++) {
+ size_t x, start;
+
+ start = ALIGN_TO(previous, alignment);
+
+ x = bus_gvariant_read_word_le((uint8_t*) q + i * sz, sz);
+ if (x > c->item_size - sz)
+ return -EBADMSG;
+ if (x < start)
+ return -EBADMSG;
+
+ (*offsets)[i] = rindex + x;
+ previous = x;
+ }
+
+ *item_size = (*offsets)[0] - rindex;
+ }
+
+ m->rindex = rindex;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index += 1 + strlen(contents);
+
+ return 1;
+}
+
+static int bus_message_enter_variant(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *item_size) {
+
+ size_t rindex;
+ uint8_t l;
+ void *q;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(item_size);
+
+ if (!signature_is_single(contents, false))
+ return -EINVAL;
+
+ if (*contents == SD_BUS_TYPE_DICT_ENTRY_BEGIN)
+ return -EINVAL;
+
+ if (!c->signature || c->signature[c->index] == 0)
+ return -ENXIO;
+
+ if (c->signature[c->index] != SD_BUS_TYPE_VARIANT)
+ return -ENXIO;
+
+ rindex = m->rindex;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ size_t k, where;
+
+ k = strlen(contents);
+ if (1+k > c->item_size)
+ return -EBADMSG;
+
+ where = rindex + c->item_size - (1+k);
+ r = message_peek_body(m, &where, 1, 1+k, &q);
+ if (r < 0)
+ return r;
+
+ if (*(char*) q != 0)
+ return -EBADMSG;
+
+ if (memcmp((uint8_t*) q+1, contents, k))
+ return -ENXIO;
+
+ *item_size = c->item_size - (1+k);
+
+ } else {
+ r = message_peek_body(m, &rindex, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ l = *(uint8_t*) q;
+ if (l == UINT8_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (!validate_signature(q, l))
+ return -EBADMSG;
+
+ if (!streq(q, contents))
+ return -ENXIO;
+ }
+
+ m->rindex = rindex;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 1;
+}
+
+static int build_struct_offsets(
+ sd_bus_message *m,
+ const char *signature,
+ size_t size,
+ size_t *item_size,
+ size_t **offsets,
+ size_t *n_offsets) {
+
+ unsigned n_variable = 0, n_total = 0, v;
+ size_t previous, where;
+ const char *p;
+ size_t sz;
+ void *q;
+ int r;
+
+ assert(m);
+ assert(item_size);
+ assert(offsets);
+ assert(n_offsets);
+
+ if (isempty(signature)) {
+ /* Unary type is encoded as *fixed* 1 byte padding */
+ r = message_peek_body(m, &m->rindex, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ if (*(uint8_t *) q != 0)
+ return -EBADMSG;
+
+ *item_size = 0;
+ *offsets = NULL;
+ *n_offsets = 0;
+ return 0;
+ }
+
+ sz = bus_gvariant_determine_word_size(size, 0);
+ if (sz <= 0)
+ return -EBADMSG;
+
+ /* First, loop over signature and count variable elements and
+ * elements in general. We use this to know how large the
+ * offset array is at the end of the structure. Note that
+ * GVariant only stores offsets for all variable size elements
+ * that are not the last item. */
+
+ p = signature;
+ while (*p != 0) {
+ size_t n;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+ else {
+ char t[n+1];
+
+ memcpy(t, p, n);
+ t[n] = 0;
+
+ r = bus_gvariant_is_fixed_size(t);
+ }
+
+ if (r < 0)
+ return r;
+ if (r == 0 && p[n] != 0) /* except the last item */
+ n_variable++;
+ n_total++;
+
+ p += n;
+ }
+
+ if (size < n_variable * sz)
+ return -EBADMSG;
+
+ where = m->rindex + size - (n_variable * sz);
+ r = message_peek_body(m, &where, 1, n_variable * sz, &q);
+ if (r < 0)
+ return r;
+
+ v = n_variable;
+
+ *offsets = new(size_t, n_total);
+ if (!*offsets)
+ return -ENOMEM;
+
+ *n_offsets = 0;
+
+ /* Second, loop again and build an offset table */
+ p = signature;
+ previous = m->rindex;
+ while (*p != 0) {
+ size_t n, offset;
+ int k;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+ else {
+ char t[n+1];
+
+ memcpy(t, p, n);
+ t[n] = 0;
+
+ size_t align = bus_gvariant_get_alignment(t);
+ assert(align > 0);
+
+ /* The possible start of this member after including alignment */
+ size_t start = ALIGN_TO(previous, align);
+
+ k = bus_gvariant_get_size(t);
+ if (k < 0) {
+ size_t x;
+
+ /* Variable size */
+ if (v > 0) {
+ v--;
+
+ x = bus_gvariant_read_word_le((uint8_t*) q + v*sz, sz);
+ if (x >= size)
+ return -EBADMSG;
+ } else
+ /* The last item's end is determined
+ * from the start of the offset array */
+ x = size - (n_variable * sz);
+
+ offset = m->rindex + x;
+ if (offset < start)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "For type %s with alignment %zu, message specifies offset %zu which is smaller than previous end %zu + alignment = %zu",
+ t, align,
+ offset,
+ previous,
+ start);
+ } else
+ /* Fixed size */
+ offset = start + k;
+ }
+
+ previous = (*offsets)[(*n_offsets)++] = offset;
+ p += n;
+ }
+
+ assert(v == 0);
+ assert(*n_offsets == n_total);
+
+ *item_size = (*offsets)[0] - m->rindex;
+ return 0;
+}
+
+static int enter_struct_or_dict_entry(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *item_size,
+ size_t **offsets,
+ size_t *n_offsets) {
+
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(item_size);
+ assert(offsets);
+ assert(n_offsets);
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m)) {
+
+ /* dbus1 */
+ r = message_peek_body(m, &m->rindex, 8, 0, NULL);
+ if (r < 0)
+ return r;
+
+ } else
+ /* gvariant with contents */
+ return build_struct_offsets(m, contents, c->item_size, item_size, offsets, n_offsets);
+
+ return 0;
+}
+
+static int bus_message_enter_struct(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *item_size,
+ size_t **offsets,
+ size_t *n_offsets) {
+
+ size_t l;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(item_size);
+ assert(offsets);
+ assert(n_offsets);
+
+ if (!signature_is_valid(contents, false))
+ return -EINVAL;
+
+ if (!c->signature || c->signature[c->index] == 0)
+ return -ENXIO;
+
+ l = strlen(contents);
+
+ if (c->signature[c->index] != SD_BUS_TYPE_STRUCT_BEGIN ||
+ !startswith(c->signature + c->index + 1, contents) ||
+ c->signature[c->index + 1 + l] != SD_BUS_TYPE_STRUCT_END)
+ return -ENXIO;
+
+ r = enter_struct_or_dict_entry(m, c, contents, item_size, offsets, n_offsets);
+ if (r < 0)
+ return r;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index += 1 + l + 1;
+
+ return 1;
+}
+
+static int bus_message_enter_dict_entry(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *item_size,
+ size_t **offsets,
+ size_t *n_offsets) {
+
+ size_t l;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+
+ if (!signature_is_pair(contents))
+ return -EINVAL;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+
+ if (!c->signature || c->signature[c->index] == 0)
+ return 0;
+
+ l = strlen(contents);
+
+ if (c->signature[c->index] != SD_BUS_TYPE_DICT_ENTRY_BEGIN ||
+ !startswith(c->signature + c->index + 1, contents) ||
+ c->signature[c->index + 1 + l] != SD_BUS_TYPE_DICT_ENTRY_END)
+ return -ENXIO;
+
+ r = enter_struct_or_dict_entry(m, c, contents, item_size, offsets, n_offsets);
+ if (r < 0)
+ return r;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index += 1 + l + 1;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_enter_container(sd_bus_message *m,
+ char type,
+ const char *contents) {
+ struct bus_container *c;
+ uint32_t *array_size = NULL;
+ _cleanup_free_ char *signature = NULL;
+ size_t before, end;
+ _cleanup_free_ size_t *offsets = NULL;
+ size_t n_offsets = 0, item_size = 0;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(type != 0 || !contents, -EINVAL);
+
+ if (type == 0 || !contents) {
+ const char *cc;
+ char tt;
+
+ /* Allow entering into anonymous containers */
+ r = sd_bus_message_peek_type(m, &tt, &cc);
+ if (r < 0)
+ return r;
+
+ if (type != 0 && type != tt)
+ return -ENXIO;
+
+ if (contents && !streq(contents, cc))
+ return -ENXIO;
+
+ type = tt;
+ contents = cc;
+ }
+
+ /*
+ * We enforce a global limit on container depth, that is much
+ * higher than the 32 structs and 32 arrays the specification
+ * mandates. This is simpler to implement for us, and we need
+ * this only to ensure our container array doesn't grow
+ * without bounds. We are happy to return any data from a
+ * message as long as the data itself is valid, even if the
+ * overall message might be not.
+ *
+ * Note that the message signature is validated when
+ * parsing the headers, and that validation does check the
+ * 32/32 limit.
+ *
+ * Note that the specification defines no limits on the depth
+ * of stacked variants, but we do.
+ */
+ if (m->n_containers >= BUS_CONTAINER_DEPTH)
+ return -EBADMSG;
+
+ if (!GREEDY_REALLOC(m->containers, m->containers_allocated, m->n_containers + 1))
+ return -ENOMEM;
+
+ if (message_end_of_signature(m))
+ return -ENXIO;
+
+ if (message_end_of_array(m, m->rindex))
+ return 0;
+
+ c = message_get_last_container(m);
+
+ signature = strdup(contents);
+ if (!signature)
+ return -ENOMEM;
+
+ c->saved_index = c->index;
+ before = m->rindex;
+
+ if (type == SD_BUS_TYPE_ARRAY)
+ r = bus_message_enter_array(m, c, contents, &array_size, &item_size, &offsets, &n_offsets);
+ else if (type == SD_BUS_TYPE_VARIANT)
+ r = bus_message_enter_variant(m, c, contents, &item_size);
+ else if (type == SD_BUS_TYPE_STRUCT)
+ r = bus_message_enter_struct(m, c, contents, &item_size, &offsets, &n_offsets);
+ else if (type == SD_BUS_TYPE_DICT_ENTRY)
+ r = bus_message_enter_dict_entry(m, c, contents, &item_size, &offsets, &n_offsets);
+ else
+ r = -EINVAL;
+ if (r <= 0)
+ return r;
+
+ /* OK, let's fill it in */
+ if (BUS_MESSAGE_IS_GVARIANT(m) &&
+ type == SD_BUS_TYPE_STRUCT &&
+ isempty(signature))
+ end = m->rindex + 0;
+ else
+ end = m->rindex + c->item_size;
+
+ m->containers[m->n_containers++] = (struct bus_container) {
+ .enclosing = type,
+ .signature = TAKE_PTR(signature),
+
+ .before = before,
+ .begin = m->rindex,
+ /* Unary type has fixed size of 1, but virtual size of 0 */
+ .end = end,
+ .array_size = array_size,
+ .item_size = item_size,
+ .offsets = TAKE_PTR(offsets),
+ .n_offsets = n_offsets,
+ };
+
+ return 1;
+}
+
+_public_ int sd_bus_message_exit_container(sd_bus_message *m) {
+ struct bus_container *c;
+ unsigned saved;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(m->n_containers > 0, -ENXIO);
+
+ c = message_get_last_container(m);
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY) {
+ if (c->signature && c->signature[c->index] != 0)
+ return -EBUSY;
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ if (m->rindex < c->end)
+ return -EBUSY;
+
+ } else if (c->enclosing == SD_BUS_TYPE_ARRAY) {
+ uint32_t l;
+
+ l = BUS_MESSAGE_BSWAP32(m, *c->array_size);
+ if (c->begin + l != m->rindex)
+ return -EBUSY;
+ }
+
+ message_free_last_container(m);
+
+ c = message_get_last_container(m);
+ saved = c->index;
+ c->index = c->saved_index;
+ r = container_next_item(m, c, &m->rindex);
+ c->index = saved;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static void message_quit_container(sd_bus_message *m) {
+ struct bus_container *c;
+
+ assert(m);
+ assert(m->sealed);
+ assert(m->n_containers > 0);
+
+ /* Undo seeks */
+ c = message_get_last_container(m);
+ assert(m->rindex >= c->before);
+ m->rindex = c->before;
+
+ /* Free container */
+ message_free_last_container(m);
+
+ /* Correct index of new top-level container */
+ c = message_get_last_container(m);
+ c->index = c->saved_index;
+}
+
+_public_ int sd_bus_message_peek_type(sd_bus_message *m, char *type, const char **contents) {
+ struct bus_container *c;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ if (message_end_of_signature(m))
+ goto eof;
+
+ if (message_end_of_array(m, m->rindex))
+ goto eof;
+
+ c = message_get_last_container(m);
+
+ if (bus_type_is_basic(c->signature[c->index])) {
+ if (contents)
+ *contents = NULL;
+ if (type)
+ *type = c->signature[c->index];
+ return 1;
+ }
+
+ if (c->signature[c->index] == SD_BUS_TYPE_ARRAY) {
+
+ if (contents) {
+ size_t l;
+
+ r = signature_element_length(c->signature+c->index+1, &l);
+ if (r < 0)
+ return r;
+
+ /* signature_element_length does verification internally */
+
+ /* The array element must not be empty */
+ assert(l >= 1);
+ if (free_and_strndup(&c->peeked_signature,
+ c->signature + c->index + 1, l) < 0)
+ return -ENOMEM;
+
+ *contents = c->peeked_signature;
+ }
+
+ if (type)
+ *type = SD_BUS_TYPE_ARRAY;
+
+ return 1;
+ }
+
+ if (IN_SET(c->signature[c->index], SD_BUS_TYPE_STRUCT_BEGIN, SD_BUS_TYPE_DICT_ENTRY_BEGIN)) {
+
+ if (contents) {
+ size_t l;
+
+ r = signature_element_length(c->signature+c->index, &l);
+ if (r < 0)
+ return r;
+
+ assert(l >= 3);
+ if (free_and_strndup(&c->peeked_signature,
+ c->signature + c->index + 1, l - 2) < 0)
+ return -ENOMEM;
+
+ *contents = c->peeked_signature;
+ }
+
+ if (type)
+ *type = c->signature[c->index] == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY;
+
+ return 1;
+ }
+
+ if (c->signature[c->index] == SD_BUS_TYPE_VARIANT) {
+ if (contents) {
+ void *q;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ size_t k;
+
+ if (c->item_size < 2)
+ return -EBADMSG;
+
+ /* Look for the NUL delimiter that
+ separates the payload from the
+ signature. Since the body might be
+ in a different part that then the
+ signature we map byte by byte. */
+
+ for (k = 2; k <= c->item_size; k++) {
+ size_t where;
+
+ where = m->rindex + c->item_size - k;
+ r = message_peek_body(m, &where, 1, k, &q);
+ if (r < 0)
+ return r;
+
+ if (*(char*) q == 0)
+ break;
+ }
+
+ if (k > c->item_size)
+ return -EBADMSG;
+
+ if (free_and_strndup(&c->peeked_signature,
+ (char*) q + 1, k - 1) < 0)
+ return -ENOMEM;
+
+ if (!signature_is_valid(c->peeked_signature, true))
+ return -EBADMSG;
+
+ *contents = c->peeked_signature;
+ } else {
+ size_t rindex, l;
+
+ rindex = m->rindex;
+ r = message_peek_body(m, &rindex, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ l = *(uint8_t*) q;
+ if (l == UINT8_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (!validate_signature(q, l))
+ return -EBADMSG;
+
+ *contents = q;
+ }
+ }
+
+ if (type)
+ *type = SD_BUS_TYPE_VARIANT;
+
+ return 1;
+ }
+
+ return -EINVAL;
+
+eof:
+ if (type)
+ *type = 0;
+ if (contents)
+ *contents = NULL;
+ return 0;
+}
+
+_public_ int sd_bus_message_rewind(sd_bus_message *m, int complete) {
+ struct bus_container *c;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ if (complete) {
+ message_reset_containers(m);
+ m->rindex = 0;
+
+ c = message_get_last_container(m);
+ } else {
+ c = message_get_last_container(m);
+
+ c->index = 0;
+ m->rindex = c->begin;
+ }
+
+ c->offset_index = 0;
+ c->item_size = (c->n_offsets > 0 ? c->offsets[0] : c->end) - c->begin;
+
+ return !isempty(c->signature);
+}
+
+_public_ int sd_bus_message_readv(
+ sd_bus_message *m,
+ const char *types,
+ va_list ap) {
+
+ unsigned n_array, n_struct;
+ TypeStack stack[BUS_CONTAINER_DEPTH];
+ unsigned stack_ptr = 0;
+ unsigned n_loop = 0;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(types, -EINVAL);
+
+ if (isempty(types))
+ return 0;
+
+ /* Ideally, we'd just call ourselves recursively on every
+ * complex type. However, the state of a va_list that is
+ * passed to a function is undefined after that function
+ * returns. This means we need to decode the va_list linearly
+ * in a single stackframe. We hence implement our own
+ * home-grown stack in an array. */
+
+ n_array = (unsigned) -1; /* length of current array entries */
+ n_struct = strlen(types); /* length of current struct contents signature */
+
+ for (;;) {
+ const char *t;
+
+ n_loop++;
+
+ if (n_array == 0 || (n_array == (unsigned) -1 && n_struct == 0)) {
+ r = type_stack_pop(stack, ELEMENTSOF(stack), &stack_ptr, &types, &n_struct, &n_array);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ t = types;
+ if (n_array != (unsigned) -1)
+ n_array--;
+ else {
+ types++;
+ n_struct--;
+ }
+
+ switch (*t) {
+
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_UNIX_FD: {
+ void *p;
+
+ p = va_arg(ap, void*);
+ r = sd_bus_message_read_basic(m, *t, p);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (n_loop <= 1)
+ return 0;
+
+ return -ENXIO;
+ }
+
+ break;
+ }
+
+ case SD_BUS_TYPE_ARRAY: {
+ size_t k;
+
+ r = signature_element_length(t + 1, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k + 1];
+ memcpy(s, t + 1, k);
+ s[k] = 0;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, s);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (n_loop <= 1)
+ return 0;
+
+ return -ENXIO;
+ }
+ }
+
+ if (n_array == (unsigned) -1) {
+ types += k;
+ n_struct -= k;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = t + 1;
+ n_struct = k;
+ n_array = va_arg(ap, unsigned);
+
+ break;
+ }
+
+ case SD_BUS_TYPE_VARIANT: {
+ const char *s;
+
+ s = va_arg(ap, const char *);
+ if (!s)
+ return -EINVAL;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, s);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (n_loop <= 1)
+ return 0;
+
+ return -ENXIO;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = s;
+ n_struct = strlen(s);
+ n_array = (unsigned) -1;
+
+ break;
+ }
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ size_t k;
+
+ r = signature_element_length(t, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k - 1];
+ memcpy(s, t + 1, k - 2);
+ s[k - 2] = 0;
+
+ r = sd_bus_message_enter_container(m, *t == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (n_loop <= 1)
+ return 0;
+ return -ENXIO;
+ }
+ }
+
+ if (n_array == (unsigned) -1) {
+ types += k - 1;
+ n_struct -= k - 1;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = t + 1;
+ n_struct = k - 2;
+ n_array = (unsigned) -1;
+
+ break;
+ }
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 1;
+}
+
+_public_ int sd_bus_message_read(sd_bus_message *m, const char *types, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_message_readv(m, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_message_skip(sd_bus_message *m, const char *types) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ /* If types is NULL, read exactly one element */
+ if (!types) {
+ struct bus_container *c;
+ size_t l;
+
+ if (message_end_of_signature(m))
+ return -ENXIO;
+
+ if (message_end_of_array(m, m->rindex))
+ return 0;
+
+ c = message_get_last_container(m);
+
+ r = signature_element_length(c->signature + c->index, &l);
+ if (r < 0)
+ return r;
+
+ types = strndupa(c->signature + c->index, l);
+ }
+
+ switch (*types) {
+
+ case 0: /* Nothing to drop */
+ return 0;
+
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_UNIX_FD:
+
+ r = sd_bus_message_read_basic(m, *types, NULL);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_message_skip(m, types + 1);
+ if (r < 0)
+ return r;
+
+ return 1;
+
+ case SD_BUS_TYPE_ARRAY: {
+ size_t k;
+
+ r = signature_element_length(types + 1, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k+1];
+ memcpy(s, types+1, k);
+ s[k] = 0;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, s);
+ if (r <= 0)
+ return r;
+
+ for (;;) {
+ r = sd_bus_message_skip(m, s);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_skip(m, types + 1 + k);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_VARIANT: {
+ const char *contents;
+ char x;
+
+ r = sd_bus_message_peek_type(m, &x, &contents);
+ if (r <= 0)
+ return r;
+
+ if (x != SD_BUS_TYPE_VARIANT)
+ return -ENXIO;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_message_skip(m, contents);
+ if (r < 0)
+ return r;
+ assert(r != 0);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_skip(m, types + 1);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ size_t k;
+
+ r = signature_element_length(types, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k-1];
+ memcpy(s, types+1, k-2);
+ s[k-2] = 0;
+
+ r = sd_bus_message_enter_container(m, *types == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_message_skip(m, s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_skip(m, types + k);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ default:
+ return -EINVAL;
+ }
+}
+
+_public_ int sd_bus_message_read_array(
+ sd_bus_message *m,
+ char type,
+ const void **ptr,
+ size_t *size) {
+
+ struct bus_container *c;
+ void *p;
+ size_t sz;
+ ssize_t align;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(bus_type_is_trivial(type), -EINVAL);
+ assert_return(ptr, -EINVAL);
+ assert_return(size, -EINVAL);
+ assert_return(!BUS_MESSAGE_NEED_BSWAP(m), -EOPNOTSUPP);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, CHAR_TO_STR(type));
+ if (r <= 0)
+ return r;
+
+ c = message_get_last_container(m);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ align = bus_gvariant_get_alignment(CHAR_TO_STR(type));
+ if (align < 0)
+ return align;
+
+ sz = c->end - c->begin;
+ } else {
+ align = bus_type_get_alignment(type);
+ if (align < 0)
+ return align;
+
+ sz = BUS_MESSAGE_BSWAP32(m, *c->array_size);
+ }
+
+ if (sz == 0)
+ /* Zero length array, let's return some aligned
+ * pointer that is not NULL */
+ p = (uint8_t*) align;
+ else {
+ r = message_peek_body(m, &m->rindex, align, sz, &p);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ goto fail;
+
+ *ptr = (const void*) p;
+ *size = sz;
+
+ return 1;
+
+fail:
+ message_quit_container(m);
+ return r;
+}
+
+static int message_peek_fields(
+ sd_bus_message *m,
+ size_t *rindex,
+ size_t align,
+ size_t nbytes,
+ void **ret) {
+
+ assert(m);
+ assert(rindex);
+ assert(align > 0);
+
+ return buffer_peek(BUS_MESSAGE_FIELDS(m), m->fields_size, rindex, align, nbytes, ret);
+}
+
+static int message_peek_field_uint32(
+ sd_bus_message *m,
+ size_t *ri,
+ size_t item_size,
+ uint32_t *ret) {
+
+ int r;
+ void *q;
+
+ assert(m);
+ assert(ri);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m) && item_size != 4)
+ return -EBADMSG;
+
+ /* identical for gvariant and dbus1 */
+
+ r = message_peek_fields(m, ri, 4, 4, &q);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+
+ return 0;
+}
+
+static int message_peek_field_uint64(
+ sd_bus_message *m,
+ size_t *ri,
+ size_t item_size,
+ uint64_t *ret) {
+
+ int r;
+ void *q;
+
+ assert(m);
+ assert(ri);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m) && item_size != 8)
+ return -EBADMSG;
+
+ /* identical for gvariant and dbus1 */
+
+ r = message_peek_fields(m, ri, 8, 8, &q);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = BUS_MESSAGE_BSWAP64(m, *(uint64_t*) q);
+
+ return 0;
+}
+
+static int message_peek_field_string(
+ sd_bus_message *m,
+ bool (*validate)(const char *p),
+ size_t *ri,
+ size_t item_size,
+ const char **ret) {
+
+ uint32_t l;
+ int r;
+ void *q;
+
+ assert(m);
+ assert(ri);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+
+ if (item_size <= 0)
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, 1, item_size, &q);
+ if (r < 0)
+ return r;
+
+ l = item_size - 1;
+ } else {
+ r = message_peek_field_uint32(m, ri, 4, &l);
+ if (r < 0)
+ return r;
+
+ if (l == UINT32_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, 1, l+1, &q);
+ if (r < 0)
+ return r;
+ }
+
+ if (validate) {
+ if (!validate_nul(q, l))
+ return -EBADMSG;
+
+ if (!validate(q))
+ return -EBADMSG;
+ } else {
+ if (!validate_string(q, l))
+ return -EBADMSG;
+ }
+
+ if (ret)
+ *ret = q;
+
+ return 0;
+}
+
+static int message_peek_field_signature(
+ sd_bus_message *m,
+ size_t *ri,
+ size_t item_size,
+ const char **ret) {
+
+ size_t l;
+ int r;
+ void *q;
+
+ assert(m);
+ assert(ri);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+
+ if (item_size <= 0)
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, 1, item_size, &q);
+ if (r < 0)
+ return r;
+
+ l = item_size - 1;
+ } else {
+ r = message_peek_fields(m, ri, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ l = *(uint8_t*) q;
+ if (l == UINT8_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, 1, l+1, &q);
+ if (r < 0)
+ return r;
+ }
+
+ if (!validate_signature(q, l))
+ return -EBADMSG;
+
+ if (ret)
+ *ret = q;
+
+ return 0;
+}
+
+static int message_skip_fields(
+ sd_bus_message *m,
+ size_t *ri,
+ uint32_t array_size,
+ const char **signature) {
+
+ size_t original_index;
+ int r;
+
+ assert(m);
+ assert(ri);
+ assert(signature);
+ assert(!BUS_MESSAGE_IS_GVARIANT(m));
+
+ original_index = *ri;
+
+ for (;;) {
+ char t;
+ size_t l;
+
+ if (array_size != (uint32_t) -1 &&
+ array_size <= *ri - original_index)
+ return 0;
+
+ t = **signature;
+ if (!t)
+ return 0;
+
+ if (t == SD_BUS_TYPE_STRING) {
+
+ r = message_peek_field_string(m, NULL, ri, 0, NULL);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (t == SD_BUS_TYPE_OBJECT_PATH) {
+
+ r = message_peek_field_string(m, object_path_is_valid, ri, 0, NULL);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (t == SD_BUS_TYPE_SIGNATURE) {
+
+ r = message_peek_field_signature(m, ri, 0, NULL);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (bus_type_is_basic(t)) {
+ ssize_t align, k;
+
+ align = bus_type_get_alignment(t);
+ k = bus_type_get_size(t);
+ assert(align > 0 && k > 0);
+
+ r = message_peek_fields(m, ri, align, k, NULL);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (t == SD_BUS_TYPE_ARRAY) {
+
+ r = signature_element_length(*signature + 1, &l);
+ if (r < 0)
+ return r;
+
+ assert(l >= 1);
+ {
+ char sig[l + 1], *s = sig;
+ uint32_t nas;
+ int alignment;
+
+ strncpy(sig, *signature + 1, l);
+ sig[l] = '\0';
+
+ alignment = bus_type_get_alignment(sig[0]);
+ if (alignment < 0)
+ return alignment;
+
+ r = message_peek_field_uint32(m, ri, 0, &nas);
+ if (r < 0)
+ return r;
+ if (nas > BUS_ARRAY_MAX_SIZE)
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, alignment, 0, NULL);
+ if (r < 0)
+ return r;
+
+ r = message_skip_fields(m, ri, nas, (const char**) &s);
+ if (r < 0)
+ return r;
+ }
+
+ (*signature) += 1 + l;
+
+ } else if (t == SD_BUS_TYPE_VARIANT) {
+ const char *s;
+
+ r = message_peek_field_signature(m, ri, 0, &s);
+ if (r < 0)
+ return r;
+
+ r = message_skip_fields(m, ri, (uint32_t) -1, (const char**) &s);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (IN_SET(t, SD_BUS_TYPE_STRUCT, SD_BUS_TYPE_DICT_ENTRY)) {
+
+ r = signature_element_length(*signature, &l);
+ if (r < 0)
+ return r;
+
+ assert(l >= 2);
+ {
+ char sig[l + 1], *s = sig;
+ strncpy(sig, *signature + 1, l);
+ sig[l] = '\0';
+
+ r = message_skip_fields(m, ri, (uint32_t) -1, (const char**) &s);
+ if (r < 0)
+ return r;
+ }
+
+ *signature += l;
+ } else
+ return -EBADMSG;
+ }
+}
+
+int bus_message_parse_fields(sd_bus_message *m) {
+ size_t ri;
+ int r;
+ uint32_t unix_fds = 0;
+ bool unix_fds_set = false;
+ void *offsets = NULL;
+ unsigned n_offsets = 0;
+ size_t sz = 0;
+ unsigned i = 0;
+
+ assert(m);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ char *p;
+
+ /* Read the signature from the end of the body variant first */
+ sz = bus_gvariant_determine_word_size(BUS_MESSAGE_SIZE(m), 0);
+ if (m->footer_accessible < 1 + sz)
+ return -EBADMSG;
+
+ p = (char*) m->footer + m->footer_accessible - (1 + sz);
+ for (;;) {
+ if (p < (char*) m->footer)
+ return -EBADMSG;
+
+ if (*p == 0) {
+ char *k;
+ size_t l;
+
+ /* We found the beginning of the signature
+ * string, yay! We require the body to be a
+ * structure, so verify it and then strip the
+ * opening/closing brackets. */
+
+ l = (char*) m->footer + m->footer_accessible - p - (1 + sz);
+ if (l < 2 ||
+ p[1] != SD_BUS_TYPE_STRUCT_BEGIN ||
+ p[1 + l - 1] != SD_BUS_TYPE_STRUCT_END)
+ return -EBADMSG;
+
+ k = memdup_suffix0(p + 1 + 1, l - 2);
+ if (!k)
+ return -ENOMEM;
+
+ free_and_replace(m->root_container.signature, k);
+ break;
+ }
+
+ p--;
+ }
+
+ /* Calculate the actual user body size, by removing
+ * the trailing variant signature and struct offset
+ * table */
+ m->user_body_size = m->body_size - ((char*) m->footer + m->footer_accessible - p);
+
+ /* Pull out the offset table for the fields array */
+ sz = bus_gvariant_determine_word_size(m->fields_size, 0);
+ if (sz > 0) {
+ size_t framing;
+ void *q;
+
+ ri = m->fields_size - sz;
+ r = message_peek_fields(m, &ri, 1, sz, &q);
+ if (r < 0)
+ return r;
+
+ framing = bus_gvariant_read_word_le(q, sz);
+ if (framing >= m->fields_size - sz)
+ return -EBADMSG;
+ if ((m->fields_size - framing) % sz != 0)
+ return -EBADMSG;
+
+ ri = framing;
+ r = message_peek_fields(m, &ri, 1, m->fields_size - framing, &offsets);
+ if (r < 0)
+ return r;
+
+ n_offsets = (m->fields_size - framing) / sz;
+ }
+ } else
+ m->user_body_size = m->body_size;
+
+ ri = 0;
+ while (ri < m->fields_size) {
+ _cleanup_free_ char *sig = NULL;
+ const char *signature;
+ uint64_t field_type;
+ size_t item_size = (size_t) -1;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ uint64_t *u64;
+
+ if (i >= n_offsets)
+ break;
+
+ if (i == 0)
+ ri = 0;
+ else
+ ri = ALIGN_TO(bus_gvariant_read_word_le((uint8_t*) offsets + (i-1)*sz, sz), 8);
+
+ r = message_peek_fields(m, &ri, 8, 8, (void**) &u64);
+ if (r < 0)
+ return r;
+
+ field_type = BUS_MESSAGE_BSWAP64(m, *u64);
+ } else {
+ uint8_t *u8;
+
+ r = message_peek_fields(m, &ri, 8, 1, (void**) &u8);
+ if (r < 0)
+ return r;
+
+ field_type = *u8;
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ size_t where, end;
+ char *b;
+ void *q;
+
+ end = bus_gvariant_read_word_le((uint8_t*) offsets + i*sz, sz);
+
+ if (end < ri)
+ return -EBADMSG;
+
+ where = ri = ALIGN_TO(ri, 8);
+ item_size = end - ri;
+ r = message_peek_fields(m, &where, 1, item_size, &q);
+ if (r < 0)
+ return r;
+
+ b = memrchr(q, 0, item_size);
+ if (!b)
+ return -EBADMSG;
+
+ sig = strndup(b+1, item_size - (b+1-(char*) q));
+ if (!sig)
+ return -ENOMEM;
+
+ signature = sig;
+ item_size = b - (char*) q;
+ } else {
+ r = message_peek_field_signature(m, &ri, 0, &signature);
+ if (r < 0)
+ return r;
+ }
+
+ switch (field_type) {
+
+ case _BUS_MESSAGE_HEADER_INVALID:
+ return -EBADMSG;
+
+ case BUS_MESSAGE_HEADER_PATH:
+
+ if (m->path)
+ return -EBADMSG;
+
+ if (!streq(signature, "o"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, object_path_is_valid, &ri, item_size, &m->path);
+ break;
+
+ case BUS_MESSAGE_HEADER_INTERFACE:
+
+ if (m->interface)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, interface_name_is_valid, &ri, item_size, &m->interface);
+ break;
+
+ case BUS_MESSAGE_HEADER_MEMBER:
+
+ if (m->member)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, member_name_is_valid, &ri, item_size, &m->member);
+ break;
+
+ case BUS_MESSAGE_HEADER_ERROR_NAME:
+
+ if (m->error.name)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, error_name_is_valid, &ri, item_size, &m->error.name);
+ if (r >= 0)
+ m->error._need_free = -1;
+
+ break;
+
+ case BUS_MESSAGE_HEADER_DESTINATION:
+
+ if (m->destination)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, service_name_is_valid, &ri, item_size, &m->destination);
+ break;
+
+ case BUS_MESSAGE_HEADER_SENDER:
+
+ if (m->sender)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, service_name_is_valid, &ri, item_size, &m->sender);
+
+ if (r >= 0 && m->sender[0] == ':' && m->bus->bus_client) {
+ m->creds.unique_name = (char*) m->sender;
+ m->creds.mask |= SD_BUS_CREDS_UNIQUE_NAME & m->bus->creds_mask;
+ }
+
+ break;
+
+ case BUS_MESSAGE_HEADER_SIGNATURE: {
+ const char *s;
+ char *c;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) /* only applies to dbus1 */
+ return -EBADMSG;
+
+ if (m->root_container.signature)
+ return -EBADMSG;
+
+ if (!streq(signature, "g"))
+ return -EBADMSG;
+
+ r = message_peek_field_signature(m, &ri, item_size, &s);
+ if (r < 0)
+ return r;
+
+ c = strdup(s);
+ if (!c)
+ return -ENOMEM;
+
+ free_and_replace(m->root_container.signature, c);
+ break;
+ }
+
+ case BUS_MESSAGE_HEADER_REPLY_SERIAL:
+
+ if (m->reply_cookie != 0)
+ return -EBADMSG;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ /* 64bit on dbus2 */
+
+ if (!streq(signature, "t"))
+ return -EBADMSG;
+
+ r = message_peek_field_uint64(m, &ri, item_size, &m->reply_cookie);
+ if (r < 0)
+ return r;
+ } else {
+ /* 32bit on dbus1 */
+ uint32_t serial;
+
+ if (!streq(signature, "u"))
+ return -EBADMSG;
+
+ r = message_peek_field_uint32(m, &ri, item_size, &serial);
+ if (r < 0)
+ return r;
+
+ m->reply_cookie = serial;
+ }
+
+ if (m->reply_cookie == 0)
+ return -EBADMSG;
+
+ break;
+
+ case BUS_MESSAGE_HEADER_UNIX_FDS:
+ if (unix_fds_set)
+ return -EBADMSG;
+
+ if (!streq(signature, "u"))
+ return -EBADMSG;
+
+ r = message_peek_field_uint32(m, &ri, item_size, &unix_fds);
+ if (r < 0)
+ return -EBADMSG;
+
+ unix_fds_set = true;
+ break;
+
+ default:
+ if (!BUS_MESSAGE_IS_GVARIANT(m))
+ r = message_skip_fields(m, &ri, (uint32_t) -1, (const char **) &signature);
+ }
+
+ if (r < 0)
+ return r;
+
+ i++;
+ }
+
+ if (m->n_fds != unix_fds)
+ return -EBADMSG;
+
+ switch (m->header->type) {
+
+ case SD_BUS_MESSAGE_SIGNAL:
+ if (!m->path || !m->interface || !m->member)
+ return -EBADMSG;
+
+ if (m->reply_cookie != 0)
+ return -EBADMSG;
+
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_CALL:
+
+ if (!m->path || !m->member)
+ return -EBADMSG;
+
+ if (m->reply_cookie != 0)
+ return -EBADMSG;
+
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_RETURN:
+
+ if (m->reply_cookie == 0)
+ return -EBADMSG;
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_ERROR:
+
+ if (m->reply_cookie == 0 || !m->error.name)
+ return -EBADMSG;
+ break;
+ }
+
+ /* Refuse non-local messages that claim they are local */
+ if (streq_ptr(m->path, "/org/freedesktop/DBus/Local"))
+ return -EBADMSG;
+ if (streq_ptr(m->interface, "org.freedesktop.DBus.Local"))
+ return -EBADMSG;
+ if (streq_ptr(m->sender, "org.freedesktop.DBus.Local"))
+ return -EBADMSG;
+
+ m->root_container.end = m->user_body_size;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ r = build_struct_offsets(
+ m,
+ m->root_container.signature,
+ m->user_body_size,
+ &m->root_container.item_size,
+ &m->root_container.offsets,
+ &m->root_container.n_offsets);
+ if (r == -EINVAL)
+ return -EBADMSG;
+ if (r < 0)
+ return r;
+ }
+
+ /* Try to read the error message, but if we can't it's a non-issue */
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_ERROR)
+ (void) sd_bus_message_read(m, "s", &m->error.message);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_set_destination(sd_bus_message *m, const char *destination) {
+ assert_return(m, -EINVAL);
+ assert_return(destination, -EINVAL);
+ assert_return(service_name_is_valid(destination), -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->destination, -EEXIST);
+
+ return message_append_field_string(m, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, destination, &m->destination);
+}
+
+_public_ int sd_bus_message_set_sender(sd_bus_message *m, const char *sender) {
+ assert_return(m, -EINVAL);
+ assert_return(sender, -EINVAL);
+ assert_return(service_name_is_valid(sender), -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->sender, -EEXIST);
+
+ return message_append_field_string(m, BUS_MESSAGE_HEADER_SENDER, SD_BUS_TYPE_STRING, sender, &m->sender);
+}
+
+int bus_message_get_blob(sd_bus_message *m, void **buffer, size_t *sz) {
+ size_t total;
+ void *p, *e;
+ size_t i;
+ struct bus_body_part *part;
+
+ assert(m);
+ assert(buffer);
+ assert(sz);
+
+ total = BUS_MESSAGE_SIZE(m);
+
+ p = malloc(total);
+ if (!p)
+ return -ENOMEM;
+
+ e = mempcpy(p, m->header, BUS_MESSAGE_BODY_BEGIN(m));
+ MESSAGE_FOREACH_PART(part, i, m)
+ e = mempcpy(e, part->data, part->size);
+
+ assert(total == (size_t) ((uint8_t*) e - (uint8_t*) p));
+
+ *buffer = p;
+ *sz = total;
+
+ return 0;
+}
+
+int bus_message_read_strv_extend(sd_bus_message *m, char ***l) {
+ const char *s;
+ int r;
+
+ assert(m);
+ assert(l);
+
+ r = sd_bus_message_enter_container(m, 'a', "s");
+ if (r <= 0)
+ return r;
+
+ while ((r = sd_bus_message_read_basic(m, 's', &s)) > 0) {
+ r = strv_extend(l, s);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_read_strv(sd_bus_message *m, char ***l) {
+ _cleanup_strv_free_ char **strv = NULL;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(l, -EINVAL);
+
+ r = bus_message_read_strv_extend(m, &strv);
+ if (r <= 0)
+ return r;
+
+ *l = TAKE_PTR(strv);
+ return 1;
+}
+
+static int bus_message_get_arg_skip(
+ sd_bus_message *m,
+ unsigned i,
+ char *_type,
+ const char **_contents) {
+
+ unsigned j;
+ int r;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ for (j = 0;; j++) {
+ const char *contents;
+ char type;
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENXIO;
+
+ /* Don't match against arguments after the first one we don't understand */
+ if (!IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE) &&
+ !(type == SD_BUS_TYPE_ARRAY && STR_IN_SET(contents, "s", "o", "g")))
+ return -ENXIO;
+
+ if (j >= i) {
+ if (_contents)
+ *_contents = contents;
+ if (_type)
+ *_type = type;
+ return 0;
+ }
+
+ r = sd_bus_message_skip(m, NULL);
+ if (r < 0)
+ return r;
+ }
+
+}
+
+int bus_message_get_arg(sd_bus_message *m, unsigned i, const char **str) {
+ char type;
+ int r;
+
+ assert(m);
+ assert(str);
+
+ r = bus_message_get_arg_skip(m, i, &type, NULL);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE))
+ return -ENXIO;
+
+ return sd_bus_message_read_basic(m, type, str);
+}
+
+int bus_message_get_arg_strv(sd_bus_message *m, unsigned i, char ***strv) {
+ const char *contents;
+ char type;
+ int r;
+
+ assert(m);
+ assert(strv);
+
+ r = bus_message_get_arg_skip(m, i, &type, &contents);
+ if (r < 0)
+ return r;
+
+ if (type != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+ if (!STR_IN_SET(contents, "s", "o", "g"))
+ return -ENXIO;
+
+ return sd_bus_message_read_strv(m, strv);
+}
+
+_public_ int sd_bus_message_get_errno(sd_bus_message *m) {
+ assert_return(m, EINVAL);
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_ERROR)
+ return 0;
+
+ return sd_bus_error_get_errno(&m->error);
+}
+
+_public_ const char* sd_bus_message_get_signature(sd_bus_message *m, int complete) {
+ struct bus_container *c;
+
+ assert_return(m, NULL);
+
+ c = complete ? &m->root_container : message_get_last_container(m);
+ return strempty(c->signature);
+}
+
+_public_ int sd_bus_message_is_empty(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ return isempty(m->root_container.signature);
+}
+
+_public_ int sd_bus_message_has_signature(sd_bus_message *m, const char *signature) {
+ assert_return(m, -EINVAL);
+
+ return streq(strempty(m->root_container.signature), strempty(signature));
+}
+
+_public_ int sd_bus_message_copy(sd_bus_message *m, sd_bus_message *source, int all) {
+ bool done_something = false;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(source, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(source->sealed, -EPERM);
+
+ do {
+ const char *contents;
+ char type;
+ union {
+ uint8_t u8;
+ uint16_t u16;
+ int16_t s16;
+ uint32_t u32;
+ int32_t s32;
+ uint64_t u64;
+ int64_t s64;
+ double d64;
+ const char *string;
+ int i;
+ } basic;
+
+ r = sd_bus_message_peek_type(source, &type, &contents);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ done_something = true;
+
+ if (bus_type_is_container(type) > 0) {
+
+ r = sd_bus_message_enter_container(source, type, contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, type, contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_copy(m, source, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(source);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ r = sd_bus_message_read_basic(source, type, &basic);
+ if (r < 0)
+ return r;
+
+ assert(r > 0);
+
+ if (IN_SET(type, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE, SD_BUS_TYPE_STRING))
+ r = sd_bus_message_append_basic(m, type, basic.string);
+ else
+ r = sd_bus_message_append_basic(m, type, &basic);
+
+ if (r < 0)
+ return r;
+
+ } while (all);
+
+ return done_something;
+}
+
+_public_ int sd_bus_message_verify_type(sd_bus_message *m, char type, const char *contents) {
+ const char *c;
+ char t;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(!type || bus_type_is_valid(type), -EINVAL);
+ assert_return(!contents || signature_is_valid(contents, true), -EINVAL);
+ assert_return(type || contents, -EINVAL);
+ assert_return(!contents || !type || bus_type_is_container(type), -EINVAL);
+
+ r = sd_bus_message_peek_type(m, &t, &c);
+ if (r <= 0)
+ return r;
+
+ if (type != 0 && type != t)
+ return 0;
+
+ if (contents && !streq_ptr(contents, c))
+ return 0;
+
+ return 1;
+}
+
+_public_ sd_bus *sd_bus_message_get_bus(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->bus;
+}
+
+int bus_message_remarshal(sd_bus *bus, sd_bus_message **m) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *n = NULL;
+ usec_t timeout;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(*m);
+
+ switch ((*m)->header->type) {
+
+ case SD_BUS_MESSAGE_SIGNAL:
+ r = sd_bus_message_new_signal(bus, &n, (*m)->path, (*m)->interface, (*m)->member);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_CALL:
+ r = sd_bus_message_new_method_call(bus, &n, (*m)->destination, (*m)->path, (*m)->interface, (*m)->member);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_RETURN:
+ case SD_BUS_MESSAGE_METHOD_ERROR:
+
+ r = sd_bus_message_new(bus, &n, (*m)->header->type);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(n);
+
+ n->reply_cookie = (*m)->reply_cookie;
+
+ r = message_append_reply_cookie(n, n->reply_cookie);
+ if (r < 0)
+ return r;
+
+ if ((*m)->header->type == SD_BUS_MESSAGE_METHOD_ERROR && (*m)->error.name) {
+ r = message_append_field_string(n, BUS_MESSAGE_HEADER_ERROR_NAME, SD_BUS_TYPE_STRING, (*m)->error.name, &n->error.message);
+ if (r < 0)
+ return r;
+
+ n->error._need_free = -1;
+ }
+
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if ((*m)->destination && !n->destination) {
+ r = message_append_field_string(n, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, (*m)->destination, &n->destination);
+ if (r < 0)
+ return r;
+ }
+
+ if ((*m)->sender && !n->sender) {
+ r = message_append_field_string(n, BUS_MESSAGE_HEADER_SENDER, SD_BUS_TYPE_STRING, (*m)->sender, &n->sender);
+ if (r < 0)
+ return r;
+ }
+
+ n->header->flags |= (*m)->header->flags & (BUS_MESSAGE_NO_REPLY_EXPECTED|BUS_MESSAGE_NO_AUTO_START);
+
+ r = sd_bus_message_copy(n, *m, true);
+ if (r < 0)
+ return r;
+
+ timeout = (*m)->timeout;
+ if (timeout == 0 && !((*m)->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)) {
+ r = sd_bus_get_method_call_timeout(bus, &timeout);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_seal(n, BUS_MESSAGE_COOKIE(*m), timeout);
+ if (r < 0)
+ return r;
+
+ sd_bus_message_unref(*m);
+ *m = TAKE_PTR(n);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_get_priority(sd_bus_message *m, int64_t *priority) {
+ assert_return(m, -EINVAL);
+ assert_return(priority, -EINVAL);
+
+ *priority = m->priority;
+ return 0;
+}
+
+_public_ int sd_bus_message_set_priority(sd_bus_message *m, int64_t priority) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ m->priority = priority;
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/bus-message.h b/src/libsystemd/sd-bus/bus-message.h
new file mode 100644
index 0000000..0115437
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-message.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <byteswap.h>
+#include <stdbool.h>
+#include <sys/socket.h>
+
+#include "sd-bus.h"
+
+#include "bus-creds.h"
+#include "bus-protocol.h"
+#include "macro.h"
+#include "time-util.h"
+
+struct bus_container {
+ char enclosing;
+ bool need_offsets:1;
+
+ /* Indexes into the signature string */
+ unsigned index, saved_index;
+ char *signature;
+
+ size_t before, begin, end;
+
+ /* dbus1: pointer to the array size value, if this is a value */
+ uint32_t *array_size;
+
+ /* gvariant: list of offsets to end of children if this is struct/dict entry/array */
+ size_t *offsets, n_offsets, offsets_allocated, offset_index;
+ size_t item_size;
+
+ char *peeked_signature;
+};
+
+struct bus_body_part {
+ struct bus_body_part *next;
+ void *data;
+ void *mmap_begin;
+ size_t size;
+ size_t mapped;
+ size_t allocated;
+ uint64_t memfd_offset;
+ int memfd;
+ bool free_this:1;
+ bool munmap_this:1;
+ bool sealed:1;
+ bool is_zero:1;
+};
+
+struct sd_bus_message {
+ unsigned n_ref;
+
+ sd_bus *bus;
+
+ uint64_t reply_cookie;
+
+ const char *path;
+ const char *interface;
+ const char *member;
+ const char *destination;
+ const char *sender;
+
+ sd_bus_error error;
+
+ sd_bus_creds creds;
+
+ usec_t monotonic;
+ usec_t realtime;
+ uint64_t seqnum;
+ int64_t priority;
+ uint64_t verify_destination_id;
+
+ bool sealed:1;
+ bool dont_send:1;
+ bool allow_fds:1;
+ bool free_header:1;
+ bool free_fds:1;
+ bool poisoned:1;
+
+ /* The first and last bytes of the message */
+ struct bus_header *header;
+ void *footer;
+
+ /* How many bytes are accessible in the above pointers */
+ size_t header_accessible;
+ size_t footer_accessible;
+
+ size_t fields_size;
+ size_t body_size;
+ size_t user_body_size;
+
+ struct bus_body_part body;
+ struct bus_body_part *body_end;
+ unsigned n_body_parts;
+
+ size_t rindex;
+ struct bus_body_part *cached_rindex_part;
+ size_t cached_rindex_part_begin;
+
+ uint32_t n_fds;
+ int *fds;
+
+ struct bus_container root_container, *containers;
+ size_t n_containers;
+ size_t containers_allocated;
+
+ struct iovec *iovec;
+ struct iovec iovec_fixed[2];
+ unsigned n_iovec;
+
+ char *peeked_signature;
+
+ /* If set replies to this message must carry the signature
+ * specified here to successfully seal. This is initialized
+ * from the vtable data */
+ const char *enforced_reply_signature;
+
+ usec_t timeout;
+
+ size_t header_offsets[_BUS_MESSAGE_HEADER_MAX];
+ unsigned n_header_offsets;
+};
+
+static inline bool BUS_MESSAGE_NEED_BSWAP(sd_bus_message *m) {
+ return m->header->endian != BUS_NATIVE_ENDIAN;
+}
+
+static inline uint16_t BUS_MESSAGE_BSWAP16(sd_bus_message *m, uint16_t u) {
+ return BUS_MESSAGE_NEED_BSWAP(m) ? bswap_16(u) : u;
+}
+
+static inline uint32_t BUS_MESSAGE_BSWAP32(sd_bus_message *m, uint32_t u) {
+ return BUS_MESSAGE_NEED_BSWAP(m) ? bswap_32(u) : u;
+}
+
+static inline uint64_t BUS_MESSAGE_BSWAP64(sd_bus_message *m, uint64_t u) {
+ return BUS_MESSAGE_NEED_BSWAP(m) ? bswap_64(u) : u;
+}
+
+static inline uint64_t BUS_MESSAGE_COOKIE(sd_bus_message *m) {
+ if (m->header->version == 2)
+ return BUS_MESSAGE_BSWAP64(m, m->header->dbus2.cookie);
+
+ return BUS_MESSAGE_BSWAP32(m, m->header->dbus1.serial);
+}
+
+static inline size_t BUS_MESSAGE_SIZE(sd_bus_message *m) {
+ return
+ sizeof(struct bus_header) +
+ ALIGN8(m->fields_size) +
+ m->body_size;
+}
+
+static inline size_t BUS_MESSAGE_BODY_BEGIN(sd_bus_message *m) {
+ return
+ sizeof(struct bus_header) +
+ ALIGN8(m->fields_size);
+}
+
+static inline void* BUS_MESSAGE_FIELDS(sd_bus_message *m) {
+ return (uint8_t*) m->header + sizeof(struct bus_header);
+}
+
+static inline bool BUS_MESSAGE_IS_GVARIANT(sd_bus_message *m) {
+ return m->header->version == 2;
+}
+
+int bus_message_get_blob(sd_bus_message *m, void **buffer, size_t *sz);
+int bus_message_read_strv_extend(sd_bus_message *m, char ***l);
+
+int bus_message_from_header(
+ sd_bus *bus,
+ void *header,
+ size_t header_accessible,
+ void *footer,
+ size_t footer_accessible,
+ size_t message_size,
+ int *fds,
+ size_t n_fds,
+ const char *label,
+ size_t extra,
+ sd_bus_message **ret);
+
+int bus_message_from_malloc(
+ sd_bus *bus,
+ void *buffer,
+ size_t length,
+ int *fds,
+ size_t n_fds,
+ const char *label,
+ sd_bus_message **ret);
+
+int bus_message_get_arg(sd_bus_message *m, unsigned i, const char **str);
+int bus_message_get_arg_strv(sd_bus_message *m, unsigned i, char ***strv);
+
+int bus_message_parse_fields(sd_bus_message *m);
+
+struct bus_body_part *message_append_part(sd_bus_message *m);
+
+#define MESSAGE_FOREACH_PART(part, i, m) \
+ for ((i) = 0, (part) = &(m)->body; (i) < (m)->n_body_parts; (i)++, (part) = (part)->next)
+
+int bus_body_part_map(struct bus_body_part *part);
+void bus_body_part_unmap(struct bus_body_part *part);
+
+int bus_message_to_errno(sd_bus_message *m);
+
+int bus_message_new_synthetic_error(sd_bus *bus, uint64_t serial, const sd_bus_error *e, sd_bus_message **m);
+
+int bus_message_remarshal(sd_bus *bus, sd_bus_message **m);
+
+void bus_message_set_sender_driver(sd_bus *bus, sd_bus_message *m);
+void bus_message_set_sender_local(sd_bus *bus, sd_bus_message *m);
diff --git a/src/libsystemd/sd-bus/bus-objects.c b/src/libsystemd/sd-bus/bus-objects.c
new file mode 100644
index 0000000..58329f3
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-objects.c
@@ -0,0 +1,2816 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-introspect.h"
+#include "bus-message.h"
+#include "bus-objects.h"
+#include "bus-signature.h"
+#include "bus-slot.h"
+#include "bus-type.h"
+#include "bus-util.h"
+#include "missing_capability.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+
+static int node_vtable_get_userdata(
+ sd_bus *bus,
+ const char *path,
+ struct node_vtable *c,
+ void **userdata,
+ sd_bus_error *error) {
+
+ sd_bus_slot *s;
+ void *u, *found_u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(c);
+
+ s = container_of(c, sd_bus_slot, node_vtable);
+ u = s->userdata;
+ if (c->find) {
+ bus->current_slot = sd_bus_slot_ref(s);
+ bus->current_userdata = u;
+ r = c->find(bus, path, c->interface, u, &found_u, error);
+ bus->current_userdata = NULL;
+ bus->current_slot = sd_bus_slot_unref(s);
+
+ if (r < 0)
+ return r;
+ if (sd_bus_error_is_set(error))
+ return -sd_bus_error_get_errno(error);
+ if (r == 0)
+ return r;
+ } else
+ found_u = u;
+
+ if (userdata)
+ *userdata = found_u;
+
+ return 1;
+}
+
+static void *vtable_method_convert_userdata(const sd_bus_vtable *p, void *u) {
+ assert(p);
+
+ return (uint8_t*) u + p->x.method.offset;
+}
+
+static void *vtable_property_convert_userdata(const sd_bus_vtable *p, void *u) {
+ assert(p);
+
+ return (uint8_t*) u + p->x.property.offset;
+}
+
+static int vtable_property_get_userdata(
+ sd_bus *bus,
+ const char *path,
+ struct vtable_member *p,
+ void **userdata,
+ sd_bus_error *error) {
+
+ void *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(p);
+ assert(userdata);
+
+ r = node_vtable_get_userdata(bus, path, p->parent, &u, error);
+ if (r <= 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ *userdata = vtable_property_convert_userdata(p->vtable, u);
+ return 1;
+}
+
+static int add_enumerated_to_set(
+ sd_bus *bus,
+ const char *prefix,
+ struct node_enumerator *first,
+ Set *s,
+ sd_bus_error *error) {
+
+ struct node_enumerator *c;
+ int r;
+
+ assert(bus);
+ assert(prefix);
+ assert(s);
+
+ LIST_FOREACH(enumerators, c, first) {
+ char **children = NULL, **k;
+ sd_bus_slot *slot;
+
+ if (bus->nodes_modified)
+ return 0;
+
+ slot = container_of(c, sd_bus_slot, node_enumerator);
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_userdata = slot->userdata;
+ r = c->callback(bus, prefix, slot->userdata, &children, error);
+ bus->current_userdata = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ if (r < 0)
+ return r;
+ if (sd_bus_error_is_set(error))
+ return -sd_bus_error_get_errno(error);
+
+ STRV_FOREACH(k, children) {
+ if (r < 0) {
+ free(*k);
+ continue;
+ }
+
+ if (!object_path_is_valid(*k)) {
+ free(*k);
+ r = -EINVAL;
+ continue;
+ }
+
+ if (!object_path_startswith(*k, prefix)) {
+ free(*k);
+ continue;
+ }
+
+ r = set_consume(s, *k);
+ if (r == -EEXIST)
+ r = 0;
+ }
+
+ free(children);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+enum {
+ /* if set, add_subtree() works recursively */
+ CHILDREN_RECURSIVE = 1 << 0,
+ /* if set, add_subtree() scans object-manager hierarchies recursively */
+ CHILDREN_SUBHIERARCHIES = 1 << 1,
+};
+
+static int add_subtree_to_set(
+ sd_bus *bus,
+ const char *prefix,
+ struct node *n,
+ unsigned flags,
+ Set *s,
+ sd_bus_error *error) {
+
+ struct node *i;
+ int r;
+
+ assert(bus);
+ assert(prefix);
+ assert(n);
+ assert(s);
+
+ r = add_enumerated_to_set(bus, prefix, n->enumerators, s, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ LIST_FOREACH(siblings, i, n->child) {
+ char *t;
+
+ if (!object_path_startswith(i->path, prefix))
+ continue;
+
+ t = strdup(i->path);
+ if (!t)
+ return -ENOMEM;
+
+ r = set_consume(s, t);
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ if ((flags & CHILDREN_RECURSIVE) &&
+ ((flags & CHILDREN_SUBHIERARCHIES) || !i->object_managers)) {
+ r = add_subtree_to_set(bus, prefix, i, flags, s, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static int get_child_nodes(
+ sd_bus *bus,
+ const char *prefix,
+ struct node *n,
+ unsigned flags,
+ Set **_s,
+ sd_bus_error *error) {
+
+ Set *s = NULL;
+ int r;
+
+ assert(bus);
+ assert(prefix);
+ assert(n);
+ assert(_s);
+
+ s = set_new(&string_hash_ops);
+ if (!s)
+ return -ENOMEM;
+
+ r = add_subtree_to_set(bus, prefix, n, flags, s, error);
+ if (r < 0) {
+ set_free_free(s);
+ return r;
+ }
+
+ *_s = s;
+ return 0;
+}
+
+static int node_callbacks_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct node_callback *first,
+ bool require_fallback,
+ bool *found_object) {
+
+ struct node_callback *c;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(found_object);
+
+ LIST_FOREACH(callbacks, c, first) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ sd_bus_slot *slot;
+
+ if (bus->nodes_modified)
+ return 0;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ *found_object = true;
+
+ if (c->last_iteration == bus->iteration_counter)
+ continue;
+
+ c->last_iteration = bus->iteration_counter;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ slot = container_of(c, sd_bus_slot, node_callback);
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->callback;
+ bus->current_userdata = slot->userdata;
+ r = c->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ r = bus_maybe_reply_error(m, r, &error_buffer);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+#define CAPABILITY_SHIFT(x) (((x) >> __builtin_ctzll(_SD_BUS_VTABLE_CAPABILITY_MASK)) & 0xFFFF)
+
+static int check_access(sd_bus *bus, sd_bus_message *m, struct vtable_member *c, sd_bus_error *error) {
+ uint64_t cap;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(c);
+
+ /* If the entire bus is trusted let's grant access */
+ if (bus->trusted)
+ return 0;
+
+ /* If the member is marked UNPRIVILEGED let's grant access */
+ if (c->vtable->flags & SD_BUS_VTABLE_UNPRIVILEGED)
+ return 0;
+
+ /* Check have the caller has the requested capability
+ * set. Note that the flags value contains the capability
+ * number plus one, which we need to subtract here. We do this
+ * so that we have 0 as special value for "default
+ * capability". */
+ cap = CAPABILITY_SHIFT(c->vtable->flags);
+ if (cap == 0)
+ cap = CAPABILITY_SHIFT(c->parent->vtable[0].flags);
+ if (cap == 0)
+ cap = CAP_SYS_ADMIN;
+ else
+ cap--;
+
+ r = sd_bus_query_sender_privilege(m, cap);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Access to %s.%s() not permitted.", c->interface, c->member);
+}
+
+static int method_callbacks_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct vtable_member *c,
+ bool require_fallback,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *signature;
+ void *u;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(c);
+ assert(found_object);
+
+ if (require_fallback && !c->parent->is_fallback)
+ return 0;
+
+ r = check_access(bus, m, c, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ r = node_vtable_get_userdata(bus, m->path, c->parent, &u, &error);
+ if (r <= 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+
+ u = vtable_method_convert_userdata(c->vtable, u);
+
+ *found_object = true;
+
+ if (c->last_iteration == bus->iteration_counter)
+ return 0;
+
+ c->last_iteration = bus->iteration_counter;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ signature = sd_bus_message_get_signature(m, true);
+ if (!signature)
+ return -EINVAL;
+
+ if (!streq(strempty(c->vtable->x.method.signature), signature))
+ return sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid arguments '%s' to call %s.%s(), expecting '%s'.",
+ signature, c->interface, c->member, strempty(c->vtable->x.method.signature));
+
+ /* Keep track what the signature of the reply to this message
+ * should be, so that this can be enforced when sealing the
+ * reply. */
+ m->enforced_reply_signature = strempty(c->vtable->x.method.result);
+
+ if (c->vtable->x.method.handler) {
+ sd_bus_slot *slot;
+
+ slot = container_of(c->parent, sd_bus_slot, node_vtable);
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->vtable->x.method.handler;
+ bus->current_userdata = u;
+ r = c->vtable->x.method.handler(m, u, &error);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ return bus_maybe_reply_error(m, r, &error);
+ }
+
+ /* If the method callback is NULL, make this a successful NOP */
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int invoke_property_get(
+ sd_bus *bus,
+ sd_bus_slot *slot,
+ const sd_bus_vtable *v,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const void *p;
+ int r;
+
+ assert(bus);
+ assert(slot);
+ assert(v);
+ assert(path);
+ assert(interface);
+ assert(property);
+ assert(reply);
+
+ if (v->x.property.get) {
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_userdata = userdata;
+ r = v->x.property.get(bus, path, interface, property, reply, userdata, error);
+ bus->current_userdata = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ if (r < 0)
+ return r;
+ if (sd_bus_error_is_set(error))
+ return -sd_bus_error_get_errno(error);
+ return r;
+ }
+
+ /* Automatic handling if no callback is defined. */
+
+ if (streq(v->x.property.signature, "as"))
+ return sd_bus_message_append_strv(reply, *(char***) userdata);
+
+ assert(signature_is_single(v->x.property.signature, false));
+ assert(bus_type_is_basic(v->x.property.signature[0]));
+
+ switch (v->x.property.signature[0]) {
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_SIGNATURE:
+ p = strempty(*(char**) userdata);
+ break;
+
+ case SD_BUS_TYPE_OBJECT_PATH:
+ p = *(char**) userdata;
+ assert(p);
+ break;
+
+ default:
+ p = userdata;
+ break;
+ }
+
+ return sd_bus_message_append_basic(reply, v->x.property.signature[0], p);
+}
+
+static int invoke_property_set(
+ sd_bus *bus,
+ sd_bus_slot *slot,
+ const sd_bus_vtable *v,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(bus);
+ assert(slot);
+ assert(v);
+ assert(path);
+ assert(interface);
+ assert(property);
+ assert(value);
+
+ if (v->x.property.set) {
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_userdata = userdata;
+ r = v->x.property.set(bus, path, interface, property, value, userdata, error);
+ bus->current_userdata = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ if (r < 0)
+ return r;
+ if (sd_bus_error_is_set(error))
+ return -sd_bus_error_get_errno(error);
+ return r;
+ }
+
+ /* Automatic handling if no callback is defined. */
+
+ assert(signature_is_single(v->x.property.signature, false));
+ assert(bus_type_is_basic(v->x.property.signature[0]));
+
+ switch (v->x.property.signature[0]) {
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE: {
+ const char *p;
+ char *n;
+
+ r = sd_bus_message_read_basic(value, v->x.property.signature[0], &p);
+ if (r < 0)
+ return r;
+
+ n = strdup(p);
+ if (!n)
+ return -ENOMEM;
+
+ free(*(char**) userdata);
+ *(char**) userdata = n;
+
+ break;
+ }
+
+ default:
+ r = sd_bus_message_read_basic(value, v->x.property.signature[0], userdata);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ return 1;
+}
+
+static int property_get_set_callbacks_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct vtable_member *c,
+ bool require_fallback,
+ bool is_get,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ sd_bus_slot *slot;
+ void *u = NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(c);
+ assert(found_object);
+
+ if (require_fallback && !c->parent->is_fallback)
+ return 0;
+
+ r = vtable_property_get_userdata(bus, m->path, c, &u, &error);
+ if (r <= 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+
+ slot = container_of(c->parent, sd_bus_slot, node_vtable);
+
+ *found_object = true;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ if (is_get) {
+ /* Note that we do not protect against reexecution
+ * here (using the last_iteration check, see below),
+ * should the node tree have changed and we got called
+ * again. We assume that property Get() calls are
+ * ultimately without side-effects or if they aren't
+ * then at least idempotent. */
+
+ r = sd_bus_message_open_container(reply, 'v', c->vtable->x.property.signature);
+ if (r < 0)
+ return r;
+
+ /* Note that we do not do an access check here. Read
+ * access to properties is always unrestricted, since
+ * PropertiesChanged signals broadcast contents
+ * anyway. */
+
+ r = invoke_property_get(bus, slot, c->vtable, m->path, c->interface, c->member, reply, u, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ if (bus->nodes_modified)
+ return 0;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ } else {
+ const char *signature = NULL;
+ char type = 0;
+
+ if (c->vtable->type != _SD_BUS_VTABLE_WRITABLE_PROPERTY)
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_PROPERTY_READ_ONLY, "Property '%s' is not writable.", c->member);
+
+ /* Avoid that we call the set routine more than once
+ * if the processing of this message got restarted
+ * because the node tree changed. */
+ if (c->last_iteration == bus->iteration_counter)
+ return 0;
+
+ c->last_iteration = bus->iteration_counter;
+
+ r = sd_bus_message_peek_type(m, &type, &signature);
+ if (r < 0)
+ return r;
+
+ if (type != 'v' || !streq(strempty(signature), strempty(c->vtable->x.property.signature)))
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Incorrect parameters for property '%s', expected '%s', got '%s'.", c->member, strempty(c->vtable->x.property.signature), strempty(signature));
+
+ r = sd_bus_message_enter_container(m, 'v', c->vtable->x.property.signature);
+ if (r < 0)
+ return r;
+
+ r = check_access(bus, m, c, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ r = invoke_property_set(bus, slot, c->vtable, m->path, c->interface, c->member, m, u, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ if (bus->nodes_modified)
+ return 0;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int vtable_append_one_property(
+ sd_bus *bus,
+ sd_bus_message *reply,
+ const char *path,
+ struct node_vtable *c,
+ const sd_bus_vtable *v,
+ void *userdata,
+ sd_bus_error *error) {
+
+ sd_bus_slot *slot;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(path);
+ assert(c);
+ assert(v);
+
+ r = sd_bus_message_open_container(reply, 'e', "sv");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", v->x.property.member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'v', v->x.property.signature);
+ if (r < 0)
+ return r;
+
+ slot = container_of(c, sd_bus_slot, node_vtable);
+
+ r = invoke_property_get(bus, slot, v, path, c->interface, v->x.property.member, reply, vtable_property_convert_userdata(v, userdata), error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int vtable_append_all_properties(
+ sd_bus *bus,
+ sd_bus_message *reply,
+ const char *path,
+ struct node_vtable *c,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const sd_bus_vtable *v;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(path);
+ assert(c);
+
+ if (c->vtable[0].flags & SD_BUS_VTABLE_HIDDEN)
+ return 1;
+
+ for (v = c->vtable+1; v->type != _SD_BUS_VTABLE_END; v++) {
+ if (!IN_SET(v->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY))
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_HIDDEN)
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_PROPERTY_EXPLICIT)
+ continue;
+
+ r = vtable_append_one_property(bus, reply, path, c, v, userdata, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 1;
+}
+
+static int property_get_all_callbacks_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct node_vtable *first,
+ bool require_fallback,
+ const char *iface,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ struct node_vtable *c;
+ bool found_interface;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(found_object);
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ found_interface = !iface ||
+ streq(iface, "org.freedesktop.DBus.Properties") ||
+ streq(iface, "org.freedesktop.DBus.Peer") ||
+ streq(iface, "org.freedesktop.DBus.Introspectable");
+
+ LIST_FOREACH(vtables, c, first) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ void *u;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, m->path, c, &u, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ *found_object = true;
+
+ if (iface && !streq(c->interface, iface))
+ continue;
+ found_interface = true;
+
+ r = vtable_append_all_properties(bus, reply, m->path, c, u, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ if (!*found_object)
+ return 0;
+
+ if (!found_interface) {
+ r = sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_UNKNOWN_INTERFACE,
+ "Unknown interface '%s'.", iface);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int bus_node_exists(
+ sd_bus *bus,
+ struct node *n,
+ const char *path,
+ bool require_fallback) {
+
+ struct node_vtable *c;
+ struct node_callback *k;
+ int r;
+
+ assert(bus);
+ assert(n);
+ assert(path);
+
+ /* Tests if there's anything attached directly to this node
+ * for the specified path */
+
+ if (!require_fallback && (n->enumerators || n->object_managers))
+ return true;
+
+ LIST_FOREACH(callbacks, k, n->callbacks) {
+ if (require_fallback && !k->is_fallback)
+ continue;
+
+ return 1;
+ }
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, NULL, &error);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 0;
+}
+
+static int process_introspect(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct node *n,
+ bool require_fallback,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_set_free_free_ Set *s = NULL;
+ const char *previous_interface = NULL;
+ struct introspect intro;
+ struct node_vtable *c;
+ bool empty;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(n);
+ assert(found_object);
+
+ r = get_child_nodes(bus, m->path, n, 0, &s, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+
+ r = introspect_begin(&intro, bus->trusted);
+ if (r < 0)
+ return r;
+
+ r = introspect_write_default_interfaces(&intro, !require_fallback && n->object_managers);
+ if (r < 0)
+ return r;
+
+ empty = set_isempty(s);
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, m->path, c, NULL, &error);
+ if (r < 0) {
+ r = bus_maybe_reply_error(m, r, &error);
+ goto finish;
+ }
+ if (bus->nodes_modified) {
+ r = 0;
+ goto finish;
+ }
+ if (r == 0)
+ continue;
+
+ empty = false;
+
+ if (c->vtable[0].flags & SD_BUS_VTABLE_HIDDEN)
+ continue;
+
+ if (!streq_ptr(previous_interface, c->interface)) {
+
+ if (previous_interface)
+ fputs(" </interface>\n", intro.f);
+
+ fprintf(intro.f, " <interface name=\"%s\">\n", c->interface);
+ }
+
+ r = introspect_write_interface(&intro, c->vtable);
+ if (r < 0)
+ goto finish;
+
+ previous_interface = c->interface;
+ }
+
+ if (previous_interface)
+ fputs(" </interface>\n", intro.f);
+
+ if (empty) {
+ /* Nothing?, let's see if we exist at all, and if not
+ * refuse to do anything */
+ r = bus_node_exists(bus, n, m->path, require_fallback);
+ if (r <= 0) {
+ r = bus_maybe_reply_error(m, r, &error);
+ goto finish;
+ }
+ if (bus->nodes_modified) {
+ r = 0;
+ goto finish;
+ }
+ }
+
+ *found_object = true;
+
+ r = introspect_write_child_nodes(&intro, s, m->path);
+ if (r < 0)
+ goto finish;
+
+ r = introspect_finish(&intro, bus, m, &reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ goto finish;
+
+ r = 1;
+
+finish:
+ introspect_free(&intro);
+ return r;
+}
+
+static int object_manager_serialize_path(
+ sd_bus *bus,
+ sd_bus_message *reply,
+ const char *prefix,
+ const char *path,
+ bool require_fallback,
+ sd_bus_error *error) {
+
+ const char *previous_interface = NULL;
+ bool found_something = false;
+ struct node_vtable *i;
+ struct node *n;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(prefix);
+ assert(path);
+ assert(error);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ LIST_FOREACH(vtables, i, n->vtables) {
+ void *u;
+
+ if (require_fallback && !i->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, i, &u, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ if (!found_something) {
+
+ /* Open the object part */
+
+ r = sd_bus_message_open_container(reply, 'e', "oa{sa{sv}}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "o", path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{sa{sv}}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.Peer", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.Introspectable", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.Properties", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.ObjectManager", 0);
+ if (r < 0)
+ return r;
+
+ found_something = true;
+ }
+
+ if (!streq_ptr(previous_interface, i->interface)) {
+
+ /* Maybe close the previous interface part */
+
+ if (previous_interface) {
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ /* Open the new interface part */
+
+ r = sd_bus_message_open_container(reply, 'e', "sa{sv}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", i->interface);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{sv}");
+ if (r < 0)
+ return r;
+ }
+
+ r = vtable_append_all_properties(bus, reply, path, i, u, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ previous_interface = i->interface;
+ }
+
+ if (previous_interface) {
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ if (found_something) {
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+static int object_manager_serialize_path_and_fallbacks(
+ sd_bus *bus,
+ sd_bus_message *reply,
+ const char *path,
+ sd_bus_error *error) {
+
+ char *prefix;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(path);
+ assert(error);
+
+ /* First, add all vtables registered for this path */
+ r = object_manager_serialize_path(bus, reply, path, path, false, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ /* Second, add fallback vtables registered for any of the prefixes */
+ prefix = newa(char, strlen(path) + 1);
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = object_manager_serialize_path(bus, reply, prefix, path, true, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 0;
+}
+
+static int process_get_managed_objects(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct node *n,
+ bool require_fallback,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_set_free_free_ Set *s = NULL;
+ Iterator i;
+ char *path;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(n);
+ assert(found_object);
+
+ /* Spec says, GetManagedObjects() is only implemented on the root of a
+ * sub-tree. Therefore, we require a registered object-manager on
+ * exactly the queried path, otherwise, we refuse to respond. */
+
+ if (require_fallback || !n->object_managers)
+ return 0;
+
+ r = get_child_nodes(bus, m->path, n, CHILDREN_RECURSIVE, &s, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{oa{sa{sv}}}");
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(path, s, i) {
+ r = object_manager_serialize_path_and_fallbacks(bus, reply, path, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int object_find_and_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ const char *p,
+ bool require_fallback,
+ bool *found_object) {
+
+ struct node *n;
+ struct vtable_member vtable_key, *v;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(p);
+ assert(found_object);
+
+ n = hashmap_get(bus->nodes, p);
+ if (!n)
+ return 0;
+
+ /* First, try object callbacks */
+ r = node_callbacks_run(bus, m, n->callbacks, require_fallback, found_object);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ if (!m->interface || !m->member)
+ return 0;
+
+ /* Then, look for a known method */
+ vtable_key.path = (char*) p;
+ vtable_key.interface = m->interface;
+ vtable_key.member = m->member;
+
+ v = hashmap_get(bus->vtable_methods, &vtable_key);
+ if (v) {
+ r = method_callbacks_run(bus, m, v, require_fallback, found_object);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ /* Then, look for a known property */
+ if (streq(m->interface, "org.freedesktop.DBus.Properties")) {
+ bool get = false;
+
+ get = streq(m->member, "Get");
+
+ if (get || streq(m->member, "Set")) {
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ vtable_key.path = (char*) p;
+
+ r = sd_bus_message_read(m, "ss", &vtable_key.interface, &vtable_key.member);
+ if (r < 0)
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected interface and member parameters");
+
+ v = hashmap_get(bus->vtable_properties, &vtable_key);
+ if (v) {
+ r = property_get_set_callbacks_run(bus, m, v, require_fallback, get, found_object);
+ if (r != 0)
+ return r;
+ }
+
+ } else if (streq(m->member, "GetAll")) {
+ const char *iface;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "s", &iface);
+ if (r < 0)
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected interface parameter");
+
+ if (iface[0] == 0)
+ iface = NULL;
+
+ r = property_get_all_callbacks_run(bus, m, n->vtables, require_fallback, iface, found_object);
+ if (r != 0)
+ return r;
+ }
+
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.DBus.Introspectable", "Introspect")) {
+
+ if (!isempty(sd_bus_message_get_signature(m, true)))
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected no parameters");
+
+ r = process_introspect(bus, m, n, require_fallback, found_object);
+ if (r != 0)
+ return r;
+
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.DBus.ObjectManager", "GetManagedObjects")) {
+
+ if (!isempty(sd_bus_message_get_signature(m, true)))
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected no parameters");
+
+ r = process_get_managed_objects(bus, m, n, require_fallback, found_object);
+ if (r != 0)
+ return r;
+ }
+
+ if (bus->nodes_modified)
+ return 0;
+
+ if (!*found_object) {
+ r = bus_node_exists(bus, n, m->path, require_fallback);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, NULL);
+ if (bus->nodes_modified)
+ return 0;
+ if (r > 0)
+ *found_object = true;
+ }
+
+ return 0;
+}
+
+int bus_process_object(sd_bus *bus, sd_bus_message *m) {
+ int r;
+ size_t pl;
+ bool found_object = false;
+
+ assert(bus);
+ assert(m);
+
+ if (bus->is_monitor)
+ return 0;
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL)
+ return 0;
+
+ if (hashmap_isempty(bus->nodes))
+ return 0;
+
+ /* Never respond to broadcast messages */
+ if (bus->bus_client && !m->destination)
+ return 0;
+
+ assert(m->path);
+ assert(m->member);
+
+ pl = strlen(m->path);
+ do {
+ char prefix[pl+1];
+
+ bus->nodes_modified = false;
+
+ r = object_find_and_run(bus, m, m->path, false, &found_object);
+ if (r != 0)
+ return r;
+
+ /* Look for fallback prefixes */
+ OBJECT_PATH_FOREACH_PREFIX(prefix, m->path) {
+
+ if (bus->nodes_modified)
+ break;
+
+ r = object_find_and_run(bus, m, prefix, true, &found_object);
+ if (r != 0)
+ return r;
+ }
+
+ } while (bus->nodes_modified);
+
+ if (!found_object)
+ return 0;
+
+ if (sd_bus_message_is_method_call(m, "org.freedesktop.DBus.Properties", "Get") ||
+ sd_bus_message_is_method_call(m, "org.freedesktop.DBus.Properties", "Set"))
+ r = sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_UNKNOWN_PROPERTY,
+ "Unknown property or interface.");
+ else
+ r = sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_UNKNOWN_METHOD,
+ "Unknown method '%s' or interface '%s'.", m->member, m->interface);
+
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static struct node *bus_node_allocate(sd_bus *bus, const char *path) {
+ struct node *n, *parent;
+ const char *e;
+ _cleanup_free_ char *s = NULL;
+ char *p;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(path[0] == '/');
+
+ n = hashmap_get(bus->nodes, path);
+ if (n)
+ return n;
+
+ r = hashmap_ensure_allocated(&bus->nodes, &string_hash_ops);
+ if (r < 0)
+ return NULL;
+
+ s = strdup(path);
+ if (!s)
+ return NULL;
+
+ if (streq(path, "/"))
+ parent = NULL;
+ else {
+ e = strrchr(path, '/');
+ assert(e);
+
+ p = strndupa(path, MAX(1, e - path));
+
+ parent = bus_node_allocate(bus, p);
+ if (!parent)
+ return NULL;
+ }
+
+ n = new0(struct node, 1);
+ if (!n)
+ return NULL;
+
+ n->parent = parent;
+ n->path = TAKE_PTR(s);
+
+ r = hashmap_put(bus->nodes, n->path, n);
+ if (r < 0) {
+ free(n->path);
+ return mfree(n);
+ }
+
+ if (parent)
+ LIST_PREPEND(siblings, parent->child, n);
+
+ return n;
+}
+
+void bus_node_gc(sd_bus *b, struct node *n) {
+ assert(b);
+
+ if (!n)
+ return;
+
+ if (n->child ||
+ n->callbacks ||
+ n->vtables ||
+ n->enumerators ||
+ n->object_managers)
+ return;
+
+ assert_se(hashmap_remove(b->nodes, n->path) == n);
+
+ if (n->parent)
+ LIST_REMOVE(siblings, n->parent->child, n);
+
+ free(n->path);
+ bus_node_gc(b, n->parent);
+ free(n);
+}
+
+static int bus_find_parent_object_manager(sd_bus *bus, struct node **out, const char *path) {
+ struct node *n;
+
+ assert(bus);
+ assert(path);
+
+ n = hashmap_get(bus->nodes, path);
+ if (!n) {
+ char *prefix;
+
+ prefix = newa(char, strlen(path) + 1);
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ n = hashmap_get(bus->nodes, prefix);
+ if (n)
+ break;
+ }
+ }
+
+ while (n && !n->object_managers)
+ n = n->parent;
+
+ if (out)
+ *out = n;
+ return !!n;
+}
+
+static int bus_add_object(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ bool fallback,
+ const char *path,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ sd_bus_slot *s;
+ struct node *n;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ n = bus_node_allocate(bus, path);
+ if (!n)
+ return -ENOMEM;
+
+ s = bus_slot_allocate(bus, !slot, BUS_NODE_CALLBACK, sizeof(struct node_callback), userdata);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ s->node_callback.callback = callback;
+ s->node_callback.is_fallback = fallback;
+
+ s->node_callback.node = n;
+ LIST_PREPEND(callbacks, n->callbacks, &s->node_callback);
+ bus->nodes_modified = true;
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+
+fail:
+ sd_bus_slot_unref(s);
+ bus_node_gc(bus, n);
+
+ return r;
+}
+
+_public_ int sd_bus_add_object(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *path,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ return bus_add_object(bus, slot, false, path, callback, userdata);
+}
+
+_public_ int sd_bus_add_fallback(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *prefix,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ return bus_add_object(bus, slot, true, prefix, callback, userdata);
+}
+
+static void vtable_member_hash_func(const struct vtable_member *m, struct siphash *state) {
+ assert(m);
+
+ string_hash_func(m->path, state);
+ string_hash_func(m->interface, state);
+ string_hash_func(m->member, state);
+}
+
+static int vtable_member_compare_func(const struct vtable_member *x, const struct vtable_member *y) {
+ int r;
+
+ assert(x);
+ assert(y);
+
+ r = strcmp(x->path, y->path);
+ if (r != 0)
+ return r;
+
+ r = strcmp(x->interface, y->interface);
+ if (r != 0)
+ return r;
+
+ return strcmp(x->member, y->member);
+}
+
+DEFINE_PRIVATE_HASH_OPS(vtable_member_hash_ops, struct vtable_member, vtable_member_hash_func, vtable_member_compare_func);
+
+static int add_object_vtable_internal(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *path,
+ const char *interface,
+ const sd_bus_vtable *vtable,
+ bool fallback,
+ sd_bus_object_find_t find,
+ void *userdata) {
+
+ sd_bus_slot *s = NULL;
+ struct node_vtable *i, *existing = NULL;
+ const sd_bus_vtable *v;
+ struct node *n;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(interface_name_is_valid(interface), -EINVAL);
+ assert_return(vtable, -EINVAL);
+ assert_return(vtable[0].type == _SD_BUS_VTABLE_START, -EINVAL);
+ assert_return(vtable[0].x.start.element_size == sizeof(struct sd_bus_vtable), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(!streq(interface, "org.freedesktop.DBus.Properties") &&
+ !streq(interface, "org.freedesktop.DBus.Introspectable") &&
+ !streq(interface, "org.freedesktop.DBus.Peer") &&
+ !streq(interface, "org.freedesktop.DBus.ObjectManager"), -EINVAL);
+
+ r = hashmap_ensure_allocated(&bus->vtable_methods, &vtable_member_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&bus->vtable_properties, &vtable_member_hash_ops);
+ if (r < 0)
+ return r;
+
+ n = bus_node_allocate(bus, path);
+ if (!n)
+ return -ENOMEM;
+
+ LIST_FOREACH(vtables, i, n->vtables) {
+ if (i->is_fallback != fallback) {
+ r = -EPROTOTYPE;
+ goto fail;
+ }
+
+ if (streq(i->interface, interface)) {
+
+ if (i->vtable == vtable) {
+ r = -EEXIST;
+ goto fail;
+ }
+
+ existing = i;
+ }
+ }
+
+ s = bus_slot_allocate(bus, !slot, BUS_NODE_VTABLE, sizeof(struct node_vtable), userdata);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ s->node_vtable.is_fallback = fallback;
+ s->node_vtable.vtable = vtable;
+ s->node_vtable.find = find;
+
+ s->node_vtable.interface = strdup(interface);
+ if (!s->node_vtable.interface) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ for (v = s->node_vtable.vtable+1; v->type != _SD_BUS_VTABLE_END; v++) {
+
+ switch (v->type) {
+
+ case _SD_BUS_VTABLE_METHOD: {
+ struct vtable_member *m;
+
+ if (!member_name_is_valid(v->x.method.member) ||
+ !signature_is_valid(strempty(v->x.method.signature), false) ||
+ !signature_is_valid(strempty(v->x.method.result), false) ||
+ !(v->x.method.handler || (isempty(v->x.method.signature) && isempty(v->x.method.result))) ||
+ v->flags & (SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ m = new0(struct vtable_member, 1);
+ if (!m) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ m->parent = &s->node_vtable;
+ m->path = n->path;
+ m->interface = s->node_vtable.interface;
+ m->member = v->x.method.member;
+ m->vtable = v;
+
+ r = hashmap_put(bus->vtable_methods, m, m);
+ if (r < 0) {
+ free(m);
+ goto fail;
+ }
+
+ break;
+ }
+
+ case _SD_BUS_VTABLE_WRITABLE_PROPERTY:
+
+ if (!(v->x.property.set || bus_type_is_basic(v->x.property.signature[0]))) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if (v->flags & SD_BUS_VTABLE_PROPERTY_CONST) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ _fallthrough_;
+ case _SD_BUS_VTABLE_PROPERTY: {
+ struct vtable_member *m;
+
+ if (!member_name_is_valid(v->x.property.member) ||
+ !signature_is_single(v->x.property.signature, false) ||
+ !(v->x.property.get || bus_type_is_basic(v->x.property.signature[0]) || streq(v->x.property.signature, "as")) ||
+ (v->flags & SD_BUS_VTABLE_METHOD_NO_REPLY) ||
+ (!!(v->flags & SD_BUS_VTABLE_PROPERTY_CONST) + !!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE) + !!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)) > 1 ||
+ ((v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE) && (v->flags & SD_BUS_VTABLE_PROPERTY_EXPLICIT)) ||
+ (v->flags & SD_BUS_VTABLE_UNPRIVILEGED && v->type == _SD_BUS_VTABLE_PROPERTY)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ m = new0(struct vtable_member, 1);
+ if (!m) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ m->parent = &s->node_vtable;
+ m->path = n->path;
+ m->interface = s->node_vtable.interface;
+ m->member = v->x.property.member;
+ m->vtable = v;
+
+ r = hashmap_put(bus->vtable_properties, m, m);
+ if (r < 0) {
+ free(m);
+ goto fail;
+ }
+
+ break;
+ }
+
+ case _SD_BUS_VTABLE_SIGNAL:
+
+ if (!member_name_is_valid(v->x.signal.member) ||
+ !signature_is_valid(strempty(v->x.signal.signature), false) ||
+ v->flags & SD_BUS_VTABLE_UNPRIVILEGED) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ break;
+
+ default:
+ r = -EINVAL;
+ goto fail;
+ }
+ }
+
+ s->node_vtable.node = n;
+ LIST_INSERT_AFTER(vtables, n->vtables, existing, &s->node_vtable);
+ bus->nodes_modified = true;
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+
+fail:
+ sd_bus_slot_unref(s);
+ bus_node_gc(bus, n);
+
+ return r;
+}
+
+_public_ int sd_bus_add_object_vtable(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *path,
+ const char *interface,
+ const sd_bus_vtable *vtable,
+ void *userdata) {
+
+ return add_object_vtable_internal(bus, slot, path, interface, vtable, false, NULL, userdata);
+}
+
+_public_ int sd_bus_add_fallback_vtable(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *prefix,
+ const char *interface,
+ const sd_bus_vtable *vtable,
+ sd_bus_object_find_t find,
+ void *userdata) {
+
+ return add_object_vtable_internal(bus, slot, prefix, interface, vtable, true, find, userdata);
+}
+
+_public_ int sd_bus_add_node_enumerator(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *path,
+ sd_bus_node_enumerator_t callback,
+ void *userdata) {
+
+ sd_bus_slot *s;
+ struct node *n;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ n = bus_node_allocate(bus, path);
+ if (!n)
+ return -ENOMEM;
+
+ s = bus_slot_allocate(bus, !slot, BUS_NODE_ENUMERATOR, sizeof(struct node_enumerator), userdata);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ s->node_enumerator.callback = callback;
+
+ s->node_enumerator.node = n;
+ LIST_PREPEND(enumerators, n->enumerators, &s->node_enumerator);
+ bus->nodes_modified = true;
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+
+fail:
+ sd_bus_slot_unref(s);
+ bus_node_gc(bus, n);
+
+ return r;
+}
+
+static int emit_properties_changed_on_interface(
+ sd_bus *bus,
+ const char *prefix,
+ const char *path,
+ const char *interface,
+ bool require_fallback,
+ bool *found_interface,
+ char **names) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ bool has_invalidating = false, has_changing = false;
+ struct vtable_member key = {};
+ struct node_vtable *c;
+ struct node *n;
+ char **property;
+ void *u = NULL;
+ int r;
+
+ assert(bus);
+ assert(prefix);
+ assert(path);
+ assert(interface);
+ assert(found_interface);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ r = sd_bus_message_new_signal(bus, &m, path, "org.freedesktop.DBus.Properties", "PropertiesChanged");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "s", interface);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ key.path = prefix;
+ key.interface = interface;
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ if (!streq(c->interface, interface))
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ *found_interface = true;
+
+ if (names) {
+ /* If the caller specified a list of
+ * properties we include exactly those in the
+ * PropertiesChanged message */
+
+ STRV_FOREACH(property, names) {
+ struct vtable_member *v;
+
+ assert_return(member_name_is_valid(*property), -EINVAL);
+
+ key.member = *property;
+ v = hashmap_get(bus->vtable_properties, &key);
+ if (!v)
+ return -ENOENT;
+
+ /* If there are two vtables for the same
+ * interface, let's handle this property when
+ * we come to that vtable. */
+ if (c != v->parent)
+ continue;
+
+ assert_return(v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE ||
+ v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION, -EDOM);
+
+ assert_return(!(v->vtable->flags & SD_BUS_VTABLE_HIDDEN), -EDOM);
+
+ if (v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION) {
+ has_invalidating = true;
+ continue;
+ }
+
+ has_changing = true;
+
+ r = vtable_append_one_property(bus, m, m->path, c, v->vtable, u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+ } else {
+ const sd_bus_vtable *v;
+
+ /* If the caller specified no properties list
+ * we include all properties that are marked
+ * as changing in the message. */
+
+ for (v = c->vtable+1; v->type != _SD_BUS_VTABLE_END; v++) {
+ if (!IN_SET(v->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY))
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_HIDDEN)
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION) {
+ has_invalidating = true;
+ continue;
+ }
+
+ if (!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE))
+ continue;
+
+ has_changing = true;
+
+ r = vtable_append_one_property(bus, m, m->path, c, v, u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+ }
+ }
+
+ if (!has_invalidating && !has_changing)
+ return 0;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ if (has_invalidating) {
+ LIST_FOREACH(vtables, c, n->vtables) {
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ if (!streq(c->interface, interface))
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ if (names) {
+ STRV_FOREACH(property, names) {
+ struct vtable_member *v;
+
+ key.member = *property;
+ assert_se(v = hashmap_get(bus->vtable_properties, &key));
+ assert(c == v->parent);
+
+ if (!(v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION))
+ continue;
+
+ r = sd_bus_message_append(m, "s", *property);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ const sd_bus_vtable *v;
+
+ for (v = c->vtable+1; v->type != _SD_BUS_VTABLE_END; v++) {
+ if (!IN_SET(v->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY))
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_HIDDEN)
+ continue;
+
+ if (!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION))
+ continue;
+
+ r = sd_bus_message_append(m, "s", v->x.property.member);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+_public_ int sd_bus_emit_properties_changed_strv(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ char **names) {
+
+ bool found_interface = false;
+ char *prefix;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(interface_name_is_valid(interface), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* A non-NULL but empty names list means nothing needs to be
+ generated. A NULL list OTOH indicates that all properties
+ that are set to EMITS_CHANGE or EMITS_INVALIDATION shall be
+ included in the PropertiesChanged message. */
+ if (names && names[0] == NULL)
+ return 0;
+
+ BUS_DONT_DESTROY(bus);
+
+ do {
+ bus->nodes_modified = false;
+
+ r = emit_properties_changed_on_interface(bus, path, path, interface, false, &found_interface, names);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ continue;
+
+ prefix = newa(char, strlen(path) + 1);
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = emit_properties_changed_on_interface(bus, prefix, path, interface, true, &found_interface, names);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ break;
+ }
+
+ } while (bus->nodes_modified);
+
+ return found_interface ? 0 : -ENOENT;
+}
+
+_public_ int sd_bus_emit_properties_changed(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *name, ...) {
+
+ char **names;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(interface_name_is_valid(interface), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (!name)
+ return 0;
+
+ names = strv_from_stdarg_alloca(name);
+
+ return sd_bus_emit_properties_changed_strv(bus, path, interface, names);
+}
+
+static int object_added_append_all_prefix(
+ sd_bus *bus,
+ sd_bus_message *m,
+ Set *s,
+ const char *prefix,
+ const char *path,
+ bool require_fallback) {
+
+ const char *previous_interface = NULL;
+ struct node_vtable *c;
+ struct node *n;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(s);
+ assert(prefix);
+ assert(path);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ void *u = NULL;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ if (!streq_ptr(c->interface, previous_interface)) {
+ /* If a child-node already handled this interface, we
+ * skip it on any of its parents. The child vtables
+ * always fully override any conflicting vtables of
+ * any parent node. */
+ if (set_get(s, c->interface))
+ continue;
+
+ r = set_put(s, c->interface);
+ if (r < 0)
+ return r;
+
+ if (previous_interface) {
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_open_container(m, 'e', "sa{sv}");
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "s", c->interface);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_open_container(m, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ previous_interface = c->interface;
+ }
+
+ r = vtable_append_all_properties(bus, m, path, c, u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ if (previous_interface) {
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int object_added_append_all(sd_bus *bus, sd_bus_message *m, const char *path) {
+ _cleanup_set_free_ Set *s = NULL;
+ char *prefix;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(path);
+
+ /*
+ * This appends all interfaces registered on path @path. We first add
+ * the builtin interfaces, which are always available and handled by
+ * sd-bus. Then, we add all interfaces registered on the exact node,
+ * followed by all fallback interfaces registered on any parent prefix.
+ *
+ * If an interface is registered multiple times on the same node with
+ * different vtables, we merge all the properties across all vtables.
+ * However, if a child node has the same interface registered as one of
+ * its parent nodes has as fallback, we make the child overwrite the
+ * parent instead of extending it. Therefore, we keep a "Set" of all
+ * handled interfaces during parent traversal, so we skip interfaces on
+ * a parent that were overwritten by a child.
+ */
+
+ s = set_new(&string_hash_ops);
+ if (!s)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.Peer", 0);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.Introspectable", 0);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.Properties", 0);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.ObjectManager", 0);
+ if (r < 0)
+ return r;
+
+ r = object_added_append_all_prefix(bus, m, s, path, path, false);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ prefix = newa(char, strlen(path) + 1);
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = object_added_append_all_prefix(bus, m, s, prefix, path, true);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_emit_object_added(sd_bus *bus, const char *path) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct node *object_manager;
+ int r;
+
+ /*
+ * This emits an InterfacesAdded signal on the given path, by iterating
+ * all registered vtables and fallback vtables on the path. All
+ * properties are queried and included in the signal.
+ * This call is equivalent to sd_bus_emit_interfaces_added() with an
+ * explicit list of registered interfaces. However, unlike
+ * interfaces_added(), this call can figure out the list of supported
+ * interfaces itself. Furthermore, it properly adds the builtin
+ * org.freedesktop.DBus.* interfaces.
+ */
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ r = bus_find_parent_object_manager(bus, &object_manager, path);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ BUS_DONT_DESTROY(bus);
+
+ do {
+ bus->nodes_modified = false;
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(m, 'o', path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "{sa{sv}}");
+ if (r < 0)
+ return r;
+
+ r = object_added_append_all(bus, m, path);
+ if (r < 0)
+ return r;
+
+ if (bus->nodes_modified)
+ continue;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ } while (bus->nodes_modified);
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+static int object_removed_append_all_prefix(
+ sd_bus *bus,
+ sd_bus_message *m,
+ Set *s,
+ const char *prefix,
+ const char *path,
+ bool require_fallback) {
+
+ const char *previous_interface = NULL;
+ struct node_vtable *c;
+ struct node *n;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(s);
+ assert(prefix);
+ assert(path);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ void *u = NULL;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+ if (streq_ptr(c->interface, previous_interface))
+ continue;
+
+ /* If a child-node already handled this interface, we
+ * skip it on any of its parents. The child vtables
+ * always fully override any conflicting vtables of
+ * any parent node. */
+ if (set_get(s, c->interface))
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ r = set_put(s, c->interface);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "s", c->interface);
+ if (r < 0)
+ return r;
+
+ previous_interface = c->interface;
+ }
+
+ return 0;
+}
+
+static int object_removed_append_all(sd_bus *bus, sd_bus_message *m, const char *path) {
+ _cleanup_set_free_ Set *s = NULL;
+ char *prefix;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(path);
+
+ /* see sd_bus_emit_object_added() for details */
+
+ s = set_new(&string_hash_ops);
+ if (!s)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.Peer");
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.Introspectable");
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.Properties");
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.ObjectManager");
+ if (r < 0)
+ return r;
+
+ r = object_removed_append_all_prefix(bus, m, s, path, path, false);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ prefix = newa(char, strlen(path) + 1);
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = object_removed_append_all_prefix(bus, m, s, prefix, path, true);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_emit_object_removed(sd_bus *bus, const char *path) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct node *object_manager;
+ int r;
+
+ /*
+ * This is like sd_bus_emit_object_added(), but emits an
+ * InterfacesRemoved signal on the given path. This only includes any
+ * registered interfaces but skips the properties. Note that this will
+ * call into the find() callbacks of any registered vtable. Therefore,
+ * you must call this function before destroying/unlinking your object.
+ * Otherwise, the list of interfaces will be incomplete. However, note
+ * that this will *NOT* call into any property callback. Therefore, the
+ * object might be in an "destructed" state, as long as we can find it.
+ */
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ r = bus_find_parent_object_manager(bus, &object_manager, path);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ BUS_DONT_DESTROY(bus);
+
+ do {
+ bus->nodes_modified = false;
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(m, 'o', path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ r = object_removed_append_all(bus, m, path);
+ if (r < 0)
+ return r;
+
+ if (bus->nodes_modified)
+ continue;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ } while (bus->nodes_modified);
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+static int interfaces_added_append_one_prefix(
+ sd_bus *bus,
+ sd_bus_message *m,
+ const char *prefix,
+ const char *path,
+ const char *interface,
+ bool require_fallback) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool found_interface = false;
+ struct node_vtable *c;
+ struct node *n;
+ void *u = NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(prefix);
+ assert(path);
+ assert(interface);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ if (!streq(c->interface, interface))
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ if (!found_interface) {
+ r = sd_bus_message_append_basic(m, 's', interface);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ found_interface = true;
+ }
+
+ r = vtable_append_all_properties(bus, m, path, c, u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ if (found_interface) {
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ return found_interface;
+}
+
+static int interfaces_added_append_one(
+ sd_bus *bus,
+ sd_bus_message *m,
+ const char *path,
+ const char *interface) {
+
+ char *prefix;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(path);
+ assert(interface);
+
+ r = interfaces_added_append_one_prefix(bus, m, path, path, interface, false);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ prefix = newa(char, strlen(path) + 1);
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = interfaces_added_append_one_prefix(bus, m, prefix, path, interface, true);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+_public_ int sd_bus_emit_interfaces_added_strv(sd_bus *bus, const char *path, char **interfaces) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct node *object_manager;
+ char **i;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (strv_isempty(interfaces))
+ return 0;
+
+ r = bus_find_parent_object_manager(bus, &object_manager, path);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ BUS_DONT_DESTROY(bus);
+
+ do {
+ bus->nodes_modified = false;
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(m, 'o', path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "{sa{sv}}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, interfaces) {
+ assert_return(interface_name_is_valid(*i), -EINVAL);
+
+ r = sd_bus_message_open_container(m, 'e', "sa{sv}");
+ if (r < 0)
+ return r;
+
+ r = interfaces_added_append_one(bus, m, path, *i);
+ if (r < 0)
+ return r;
+
+ if (bus->nodes_modified)
+ break;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ if (bus->nodes_modified)
+ continue;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ } while (bus->nodes_modified);
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+_public_ int sd_bus_emit_interfaces_added(sd_bus *bus, const char *path, const char *interface, ...) {
+ char **interfaces;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ interfaces = strv_from_stdarg_alloca(interface);
+
+ return sd_bus_emit_interfaces_added_strv(bus, path, interfaces);
+}
+
+_public_ int sd_bus_emit_interfaces_removed_strv(sd_bus *bus, const char *path, char **interfaces) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct node *object_manager;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (strv_isempty(interfaces))
+ return 0;
+
+ r = bus_find_parent_object_manager(bus, &object_manager, path);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(m, 'o', path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(m, interfaces);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+_public_ int sd_bus_emit_interfaces_removed(sd_bus *bus, const char *path, const char *interface, ...) {
+ char **interfaces;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ interfaces = strv_from_stdarg_alloca(interface);
+
+ return sd_bus_emit_interfaces_removed_strv(bus, path, interfaces);
+}
+
+_public_ int sd_bus_add_object_manager(sd_bus *bus, sd_bus_slot **slot, const char *path) {
+ sd_bus_slot *s;
+ struct node *n;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ n = bus_node_allocate(bus, path);
+ if (!n)
+ return -ENOMEM;
+
+ s = bus_slot_allocate(bus, !slot, BUS_NODE_OBJECT_MANAGER, sizeof(struct node_object_manager), NULL);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ s->node_object_manager.node = n;
+ LIST_PREPEND(object_managers, n->object_managers, &s->node_object_manager);
+ bus->nodes_modified = true;
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+
+fail:
+ sd_bus_slot_unref(s);
+ bus_node_gc(bus, n);
+
+ return r;
+}
diff --git a/src/libsystemd/sd-bus/bus-objects.h b/src/libsystemd/sd-bus/bus-objects.h
new file mode 100644
index 0000000..a119ff9
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-objects.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "bus-internal.h"
+
+int bus_process_object(sd_bus *bus, sd_bus_message *m);
+void bus_node_gc(sd_bus *b, struct node *n);
diff --git a/src/libsystemd/sd-bus/bus-protocol.h b/src/libsystemd/sd-bus/bus-protocol.h
new file mode 100644
index 0000000..d01fd8e
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-protocol.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <endian.h>
+
+#include "macro.h"
+
+/* Packet header */
+
+struct _packed_ bus_header {
+ /* The first four fields are identical for dbus1, and dbus2 */
+ uint8_t endian;
+ uint8_t type;
+ uint8_t flags;
+ uint8_t version;
+
+ union _packed_ {
+ /* dbus1: Used for SOCK_STREAM connections */
+ struct _packed_ {
+ uint32_t body_size;
+
+ /* Note that what the bus spec calls "serial" we'll call
+ "cookie" instead, because we don't want to imply that the
+ cookie was in any way monotonically increasing. */
+ uint32_t serial;
+ uint32_t fields_size;
+ } dbus1;
+
+ /* dbus2: Used for kdbus connections */
+ struct _packed_ {
+ uint32_t _reserved;
+ uint64_t cookie;
+ } dbus2;
+
+ /* Note that both header versions have the same size! */
+ };
+};
+
+/* Endianness */
+
+enum {
+ _BUS_INVALID_ENDIAN = 0,
+ BUS_LITTLE_ENDIAN = 'l',
+ BUS_BIG_ENDIAN = 'B',
+#if __BYTE_ORDER == __BIG_ENDIAN
+ BUS_NATIVE_ENDIAN = BUS_BIG_ENDIAN,
+ BUS_REVERSE_ENDIAN = BUS_LITTLE_ENDIAN
+#else
+ BUS_NATIVE_ENDIAN = BUS_LITTLE_ENDIAN,
+ BUS_REVERSE_ENDIAN = BUS_BIG_ENDIAN
+#endif
+};
+
+/* Flags */
+
+enum {
+ BUS_MESSAGE_NO_REPLY_EXPECTED = 1 << 0,
+ BUS_MESSAGE_NO_AUTO_START = 1 << 1,
+ BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION = 1 << 2,
+};
+
+/* Header fields */
+
+enum {
+ _BUS_MESSAGE_HEADER_INVALID = 0,
+ BUS_MESSAGE_HEADER_PATH,
+ BUS_MESSAGE_HEADER_INTERFACE,
+ BUS_MESSAGE_HEADER_MEMBER,
+ BUS_MESSAGE_HEADER_ERROR_NAME,
+ BUS_MESSAGE_HEADER_REPLY_SERIAL,
+ BUS_MESSAGE_HEADER_DESTINATION,
+ BUS_MESSAGE_HEADER_SENDER,
+ BUS_MESSAGE_HEADER_SIGNATURE,
+ BUS_MESSAGE_HEADER_UNIX_FDS,
+ _BUS_MESSAGE_HEADER_MAX
+};
+
+/* RequestName parameters */
+
+enum {
+ BUS_NAME_ALLOW_REPLACEMENT = 1 << 0,
+ BUS_NAME_REPLACE_EXISTING = 1 << 1,
+ BUS_NAME_DO_NOT_QUEUE = 1 << 2,
+};
+
+/* RequestName returns */
+enum {
+ BUS_NAME_PRIMARY_OWNER = 1,
+ BUS_NAME_IN_QUEUE = 2,
+ BUS_NAME_EXISTS = 3,
+ BUS_NAME_ALREADY_OWNER = 4
+};
+
+/* ReleaseName returns */
+enum {
+ BUS_NAME_RELEASED = 1,
+ BUS_NAME_NON_EXISTENT = 2,
+ BUS_NAME_NOT_OWNER = 3,
+};
+
+/* StartServiceByName returns */
+enum {
+ BUS_START_REPLY_SUCCESS = 1,
+ BUS_START_REPLY_ALREADY_RUNNING = 2,
+};
+
+#define BUS_INTROSPECT_DOCTYPE \
+ "<!DOCTYPE node PUBLIC \"-//freedesktop//DTD D-BUS Object Introspection 1.0//EN\"\n" \
+ "\"http://www.freedesktop.org/standards/dbus/1.0/introspect.dtd\">\n"
+
+#define BUS_INTROSPECT_INTERFACE_PEER \
+ " <interface name=\"org.freedesktop.DBus.Peer\">\n" \
+ " <method name=\"Ping\"/>\n" \
+ " <method name=\"GetMachineId\">\n" \
+ " <arg type=\"s\" name=\"machine_uuid\" direction=\"out\"/>\n" \
+ " </method>\n" \
+ " </interface>\n"
+
+#define BUS_INTROSPECT_INTERFACE_INTROSPECTABLE \
+ " <interface name=\"org.freedesktop.DBus.Introspectable\">\n" \
+ " <method name=\"Introspect\">\n" \
+ " <arg name=\"data\" type=\"s\" direction=\"out\"/>\n" \
+ " </method>\n" \
+ " </interface>\n"
+
+#define BUS_INTROSPECT_INTERFACE_PROPERTIES \
+ " <interface name=\"org.freedesktop.DBus.Properties\">\n" \
+ " <method name=\"Get\">\n" \
+ " <arg name=\"interface\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"property\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"value\" direction=\"out\" type=\"v\"/>\n" \
+ " </method>\n" \
+ " <method name=\"GetAll\">\n" \
+ " <arg name=\"interface\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"properties\" direction=\"out\" type=\"a{sv}\"/>\n" \
+ " </method>\n" \
+ " <method name=\"Set\">\n" \
+ " <arg name=\"interface\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"property\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"value\" direction=\"in\" type=\"v\"/>\n" \
+ " </method>\n" \
+ " <signal name=\"PropertiesChanged\">\n" \
+ " <arg type=\"s\" name=\"interface\"/>\n" \
+ " <arg type=\"a{sv}\" name=\"changed_properties\"/>\n" \
+ " <arg type=\"as\" name=\"invalidated_properties\"/>\n" \
+ " </signal>\n" \
+ " </interface>\n"
+
+#define BUS_INTROSPECT_INTERFACE_OBJECT_MANAGER \
+ " <interface name=\"org.freedesktop.DBus.ObjectManager\">\n" \
+ " <method name=\"GetManagedObjects\">\n" \
+ " <arg type=\"a{oa{sa{sv}}}\" name=\"object_paths_interfaces_and_properties\" direction=\"out\"/>\n" \
+ " </method>\n" \
+ " <signal name=\"InterfacesAdded\">\n" \
+ " <arg type=\"o\" name=\"object_path\"/>\n" \
+ " <arg type=\"a{sa{sv}}\" name=\"interfaces_and_properties\"/>\n" \
+ " </signal>\n" \
+ " <signal name=\"InterfacesRemoved\">\n" \
+ " <arg type=\"o\" name=\"object_path\"/>\n" \
+ " <arg type=\"as\" name=\"interfaces\"/>\n" \
+ " </signal>\n" \
+ " </interface>\n"
diff --git a/src/libsystemd/sd-bus/bus-signature.c b/src/libsystemd/sd-bus/bus-signature.c
new file mode 100644
index 0000000..1ecd6e8
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-signature.c
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <util.h>
+
+#include "sd-bus.h"
+
+#include "bus-signature.h"
+#include "bus-type.h"
+
+static int signature_element_length_internal(
+ const char *s,
+ bool allow_dict_entry,
+ unsigned array_depth,
+ unsigned struct_depth,
+ size_t *l) {
+
+ int r;
+
+ if (!s)
+ return -EINVAL;
+
+ assert(l);
+
+ if (bus_type_is_basic(*s) || *s == SD_BUS_TYPE_VARIANT) {
+ *l = 1;
+ return 0;
+ }
+
+ if (*s == SD_BUS_TYPE_ARRAY) {
+ size_t t;
+
+ if (array_depth >= 32)
+ return -EINVAL;
+
+ r = signature_element_length_internal(s + 1, true, array_depth+1, struct_depth, &t);
+ if (r < 0)
+ return r;
+
+ *l = t + 1;
+ return 0;
+ }
+
+ if (*s == SD_BUS_TYPE_STRUCT_BEGIN) {
+ const char *p = s + 1;
+
+ if (struct_depth >= 32)
+ return -EINVAL;
+
+ while (*p != SD_BUS_TYPE_STRUCT_END) {
+ size_t t;
+
+ r = signature_element_length_internal(p, false, array_depth, struct_depth+1, &t);
+ if (r < 0)
+ return r;
+
+ p += t;
+ }
+
+ if (p - s < 2)
+ /* D-Bus spec: Empty structures are not allowed; there
+ * must be at least one type code between the parentheses.
+ */
+ return -EINVAL;
+
+ *l = p - s + 1;
+ return 0;
+ }
+
+ if (*s == SD_BUS_TYPE_DICT_ENTRY_BEGIN && allow_dict_entry) {
+ const char *p = s + 1;
+ unsigned n = 0;
+
+ if (struct_depth >= 32)
+ return -EINVAL;
+
+ while (*p != SD_BUS_TYPE_DICT_ENTRY_END) {
+ size_t t;
+
+ if (n == 0 && !bus_type_is_basic(*p))
+ return -EINVAL;
+
+ r = signature_element_length_internal(p, false, array_depth, struct_depth+1, &t);
+ if (r < 0)
+ return r;
+
+ p += t;
+ n++;
+ }
+
+ if (n != 2)
+ return -EINVAL;
+
+ *l = p - s + 1;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int signature_element_length(const char *s, size_t *l) {
+ return signature_element_length_internal(s, true, 0, 0, l);
+}
+
+bool signature_is_single(const char *s, bool allow_dict_entry) {
+ int r;
+ size_t t;
+
+ if (!s)
+ return false;
+
+ r = signature_element_length_internal(s, allow_dict_entry, 0, 0, &t);
+ if (r < 0)
+ return false;
+
+ return s[t] == 0;
+}
+
+bool signature_is_pair(const char *s) {
+
+ if (!s)
+ return false;
+
+ if (!bus_type_is_basic(*s))
+ return false;
+
+ return signature_is_single(s + 1, false);
+}
+
+bool signature_is_valid(const char *s, bool allow_dict_entry) {
+ const char *p;
+ int r;
+
+ if (!s)
+ return false;
+
+ p = s;
+ while (*p) {
+ size_t t;
+
+ r = signature_element_length_internal(p, allow_dict_entry, 0, 0, &t);
+ if (r < 0)
+ return false;
+
+ p += t;
+ }
+
+ return p - s <= 255;
+}
diff --git a/src/libsystemd/sd-bus/bus-signature.h b/src/libsystemd/sd-bus/bus-signature.h
new file mode 100644
index 0000000..b87bec8
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-signature.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+bool signature_is_single(const char *s, bool allow_dict_entry);
+bool signature_is_pair(const char *s);
+bool signature_is_valid(const char *s, bool allow_dict_entry);
+
+int signature_element_length(const char *s, size_t *l);
diff --git a/src/libsystemd/sd-bus/bus-slot.c b/src/libsystemd/sd-bus/bus-slot.c
new file mode 100644
index 0000000..c9aca07
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-slot.c
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-control.h"
+#include "bus-objects.h"
+#include "bus-slot.h"
+#include "string-util.h"
+
+sd_bus_slot *bus_slot_allocate(
+ sd_bus *bus,
+ bool floating,
+ BusSlotType type,
+ size_t extra,
+ void *userdata) {
+
+ sd_bus_slot *slot;
+
+ assert(bus);
+
+ slot = malloc0(offsetof(sd_bus_slot, reply_callback) + extra);
+ if (!slot)
+ return NULL;
+
+ slot->n_ref = 1;
+ slot->type = type;
+ slot->bus = bus;
+ slot->floating = floating;
+ slot->userdata = userdata;
+
+ if (!floating)
+ sd_bus_ref(bus);
+
+ LIST_PREPEND(slots, bus->slots, slot);
+
+ return slot;
+}
+
+void bus_slot_disconnect(sd_bus_slot *slot, bool unref) {
+ sd_bus *bus;
+
+ assert(slot);
+
+ if (!slot->bus)
+ return;
+
+ switch (slot->type) {
+
+ case BUS_REPLY_CALLBACK:
+
+ if (slot->reply_callback.cookie != 0)
+ ordered_hashmap_remove(slot->bus->reply_callbacks, &slot->reply_callback.cookie);
+
+ if (slot->reply_callback.timeout_usec != 0)
+ prioq_remove(slot->bus->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
+
+ break;
+
+ case BUS_FILTER_CALLBACK:
+ slot->bus->filter_callbacks_modified = true;
+ LIST_REMOVE(callbacks, slot->bus->filter_callbacks, &slot->filter_callback);
+ break;
+
+ case BUS_MATCH_CALLBACK:
+
+ if (slot->match_added)
+ (void) bus_remove_match_internal(slot->bus, slot->match_callback.match_string);
+
+ if (slot->match_callback.install_slot) {
+ bus_slot_disconnect(slot->match_callback.install_slot, true);
+ slot->match_callback.install_slot = sd_bus_slot_unref(slot->match_callback.install_slot);
+ }
+
+ slot->bus->match_callbacks_modified = true;
+ bus_match_remove(&slot->bus->match_callbacks, &slot->match_callback);
+
+ slot->match_callback.match_string = mfree(slot->match_callback.match_string);
+
+ break;
+
+ case BUS_NODE_CALLBACK:
+
+ if (slot->node_callback.node) {
+ LIST_REMOVE(callbacks, slot->node_callback.node->callbacks, &slot->node_callback);
+ slot->bus->nodes_modified = true;
+
+ bus_node_gc(slot->bus, slot->node_callback.node);
+ }
+
+ break;
+
+ case BUS_NODE_ENUMERATOR:
+
+ if (slot->node_enumerator.node) {
+ LIST_REMOVE(enumerators, slot->node_enumerator.node->enumerators, &slot->node_enumerator);
+ slot->bus->nodes_modified = true;
+
+ bus_node_gc(slot->bus, slot->node_enumerator.node);
+ }
+
+ break;
+
+ case BUS_NODE_OBJECT_MANAGER:
+
+ if (slot->node_object_manager.node) {
+ LIST_REMOVE(object_managers, slot->node_object_manager.node->object_managers, &slot->node_object_manager);
+ slot->bus->nodes_modified = true;
+
+ bus_node_gc(slot->bus, slot->node_object_manager.node);
+ }
+
+ break;
+
+ case BUS_NODE_VTABLE:
+
+ if (slot->node_vtable.node && slot->node_vtable.interface && slot->node_vtable.vtable) {
+ const sd_bus_vtable *v;
+
+ for (v = slot->node_vtable.vtable; v->type != _SD_BUS_VTABLE_END; v++) {
+ struct vtable_member *x = NULL;
+
+ switch (v->type) {
+
+ case _SD_BUS_VTABLE_METHOD: {
+ struct vtable_member key;
+
+ key.path = slot->node_vtable.node->path;
+ key.interface = slot->node_vtable.interface;
+ key.member = v->x.method.member;
+
+ x = hashmap_remove(slot->bus->vtable_methods, &key);
+ break;
+ }
+
+ case _SD_BUS_VTABLE_PROPERTY:
+ case _SD_BUS_VTABLE_WRITABLE_PROPERTY: {
+ struct vtable_member key;
+
+ key.path = slot->node_vtable.node->path;
+ key.interface = slot->node_vtable.interface;
+ key.member = v->x.method.member;
+
+ x = hashmap_remove(slot->bus->vtable_properties, &key);
+ break;
+ }}
+
+ free(x);
+ }
+ }
+
+ slot->node_vtable.interface = mfree(slot->node_vtable.interface);
+
+ if (slot->node_vtable.node) {
+ LIST_REMOVE(vtables, slot->node_vtable.node->vtables, &slot->node_vtable);
+ slot->bus->nodes_modified = true;
+
+ bus_node_gc(slot->bus, slot->node_vtable.node);
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Wut? Unknown slot type?");
+ }
+
+ bus = slot->bus;
+
+ slot->type = _BUS_SLOT_INVALID;
+ slot->bus = NULL;
+ LIST_REMOVE(slots, bus->slots, slot);
+
+ if (!slot->floating)
+ sd_bus_unref(bus);
+ else if (unref)
+ sd_bus_slot_unref(slot);
+}
+
+static sd_bus_slot* bus_slot_free(sd_bus_slot *slot) {
+ assert(slot);
+
+ bus_slot_disconnect(slot, false);
+
+ if (slot->destroy_callback)
+ slot->destroy_callback(slot->userdata);
+
+ free(slot->description);
+ return mfree(slot);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_bus_slot, sd_bus_slot, bus_slot_free);
+
+_public_ sd_bus* sd_bus_slot_get_bus(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+
+ return slot->bus;
+}
+
+_public_ void *sd_bus_slot_get_userdata(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+
+ return slot->userdata;
+}
+
+_public_ void *sd_bus_slot_set_userdata(sd_bus_slot *slot, void *userdata) {
+ void *ret;
+
+ assert_return(slot, NULL);
+
+ ret = slot->userdata;
+ slot->userdata = userdata;
+
+ return ret;
+}
+
+_public_ int sd_bus_slot_set_destroy_callback(sd_bus_slot *slot, sd_bus_destroy_t callback) {
+ assert_return(slot, -EINVAL);
+
+ slot->destroy_callback = callback;
+ return 0;
+}
+
+_public_ int sd_bus_slot_get_destroy_callback(sd_bus_slot *slot, sd_bus_destroy_t *callback) {
+ assert_return(slot, -EINVAL);
+
+ if (callback)
+ *callback = slot->destroy_callback;
+
+ return !!slot->destroy_callback;
+}
+
+_public_ sd_bus_message *sd_bus_slot_get_current_message(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+ assert_return(slot->type >= 0, NULL);
+
+ if (slot->bus->current_slot != slot)
+ return NULL;
+
+ return slot->bus->current_message;
+}
+
+_public_ sd_bus_message_handler_t sd_bus_slot_get_current_handler(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+ assert_return(slot->type >= 0, NULL);
+
+ if (slot->bus->current_slot != slot)
+ return NULL;
+
+ return slot->bus->current_handler;
+}
+
+_public_ void* sd_bus_slot_get_current_userdata(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+ assert_return(slot->type >= 0, NULL);
+
+ if (slot->bus->current_slot != slot)
+ return NULL;
+
+ return slot->bus->current_userdata;
+}
+
+_public_ int sd_bus_slot_get_floating(sd_bus_slot *slot) {
+ assert_return(slot, -EINVAL);
+
+ return slot->floating;
+}
+
+_public_ int sd_bus_slot_set_floating(sd_bus_slot *slot, int b) {
+ assert_return(slot, -EINVAL);
+
+ if (slot->floating == !!b)
+ return 0;
+
+ if (!slot->bus) /* already disconnected slots can't be reconnected */
+ return -ESTALE;
+
+ slot->floating = b;
+
+ /* When a slot is "floating" then the bus references the slot. Otherwise the slot references the bus. Hence,
+ * when we move from one to the other, let's increase one reference and decrease the other. */
+
+ if (b) {
+ sd_bus_slot_ref(slot);
+ sd_bus_unref(slot->bus);
+ } else {
+ sd_bus_ref(slot->bus);
+ sd_bus_slot_unref(slot);
+ }
+
+ return 1;
+}
+
+_public_ int sd_bus_slot_set_description(sd_bus_slot *slot, const char *description) {
+ assert_return(slot, -EINVAL);
+
+ return free_and_strdup(&slot->description, description);
+}
+
+_public_ int sd_bus_slot_get_description(sd_bus_slot *slot, const char **description) {
+ assert_return(slot, -EINVAL);
+ assert_return(description, -EINVAL);
+
+ if (slot->description)
+ *description = slot->description;
+ else if (slot->type == BUS_MATCH_CALLBACK)
+ *description = slot->match_callback.match_string;
+ else
+ return -ENXIO;
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/bus-slot.h b/src/libsystemd/sd-bus/bus-slot.h
new file mode 100644
index 0000000..48eb045
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-slot.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+
+sd_bus_slot *bus_slot_allocate(sd_bus *bus, bool floating, BusSlotType type, size_t extra, void *userdata);
+
+void bus_slot_disconnect(sd_bus_slot *slot, bool unref);
diff --git a/src/libsystemd/sd-bus/bus-socket.c b/src/libsystemd/sd-bus/bus-socket.c
new file mode 100644
index 0000000..441b4a8
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-socket.c
@@ -0,0 +1,1292 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <endian.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-socket.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "io-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "user-util.h"
+#include "utf8.h"
+#include "util.h"
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+static void iovec_advance(struct iovec iov[], unsigned *idx, size_t size) {
+
+ while (size > 0) {
+ struct iovec *i = iov + *idx;
+
+ if (i->iov_len > size) {
+ i->iov_base = (uint8_t*) i->iov_base + size;
+ i->iov_len -= size;
+ return;
+ }
+
+ size -= i->iov_len;
+
+ *i = IOVEC_MAKE(NULL, 0);
+
+ (*idx)++;
+ }
+}
+
+static int append_iovec(sd_bus_message *m, const void *p, size_t sz) {
+ assert(m);
+ assert(p);
+ assert(sz > 0);
+
+ m->iovec[m->n_iovec++] = IOVEC_MAKE((void*) p, sz);
+
+ return 0;
+}
+
+static int bus_message_setup_iovec(sd_bus_message *m) {
+ struct bus_body_part *part;
+ unsigned n, i;
+ int r;
+
+ assert(m);
+ assert(m->sealed);
+
+ if (m->n_iovec > 0)
+ return 0;
+
+ assert(!m->iovec);
+
+ n = 1 + m->n_body_parts;
+ if (n < ELEMENTSOF(m->iovec_fixed))
+ m->iovec = m->iovec_fixed;
+ else {
+ m->iovec = new(struct iovec, n);
+ if (!m->iovec) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ r = append_iovec(m, m->header, BUS_MESSAGE_BODY_BEGIN(m));
+ if (r < 0)
+ goto fail;
+
+ MESSAGE_FOREACH_PART(part, i, m) {
+ r = bus_body_part_map(part);
+ if (r < 0)
+ goto fail;
+
+ r = append_iovec(m, part->data, part->size);
+ if (r < 0)
+ goto fail;
+ }
+
+ assert(n == m->n_iovec);
+
+ return 0;
+
+fail:
+ m->poisoned = true;
+ return r;
+}
+
+bool bus_socket_auth_needs_write(sd_bus *b) {
+
+ unsigned i;
+
+ if (b->auth_index >= ELEMENTSOF(b->auth_iovec))
+ return false;
+
+ for (i = b->auth_index; i < ELEMENTSOF(b->auth_iovec); i++) {
+ struct iovec *j = b->auth_iovec + i;
+
+ if (j->iov_len > 0)
+ return true;
+ }
+
+ return false;
+}
+
+static int bus_socket_write_auth(sd_bus *b) {
+ ssize_t k;
+
+ assert(b);
+ assert(b->state == BUS_AUTHENTICATING);
+
+ if (!bus_socket_auth_needs_write(b))
+ return 0;
+
+ if (b->prefer_writev)
+ k = writev(b->output_fd, b->auth_iovec + b->auth_index, ELEMENTSOF(b->auth_iovec) - b->auth_index);
+ else {
+ struct msghdr mh;
+ zero(mh);
+
+ mh.msg_iov = b->auth_iovec + b->auth_index;
+ mh.msg_iovlen = ELEMENTSOF(b->auth_iovec) - b->auth_index;
+
+ k = sendmsg(b->output_fd, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (k < 0 && errno == ENOTSOCK) {
+ b->prefer_writev = true;
+ k = writev(b->output_fd, b->auth_iovec + b->auth_index, ELEMENTSOF(b->auth_iovec) - b->auth_index);
+ }
+ }
+
+ if (k < 0)
+ return errno == EAGAIN ? 0 : -errno;
+
+ iovec_advance(b->auth_iovec, &b->auth_index, (size_t) k);
+ return 1;
+}
+
+static int bus_socket_auth_verify_client(sd_bus *b) {
+ char *e, *f, *start;
+ sd_id128_t peer;
+ unsigned i;
+ int r;
+
+ assert(b);
+
+ /* We expect two response lines: "OK" and possibly
+ * "AGREE_UNIX_FD" */
+
+ e = memmem_safe(b->rbuffer, b->rbuffer_size, "\r\n", 2);
+ if (!e)
+ return 0;
+
+ if (b->accept_fd) {
+ f = memmem(e + 2, b->rbuffer_size - (e - (char*) b->rbuffer) - 2, "\r\n", 2);
+ if (!f)
+ return 0;
+
+ start = f + 2;
+ } else {
+ f = NULL;
+ start = e + 2;
+ }
+
+ /* Nice! We got all the lines we need. First check the OK
+ * line */
+
+ if (e - (char*) b->rbuffer != 3 + 32)
+ return -EPERM;
+
+ if (memcmp(b->rbuffer, "OK ", 3))
+ return -EPERM;
+
+ b->auth = b->anonymous_auth ? BUS_AUTH_ANONYMOUS : BUS_AUTH_EXTERNAL;
+
+ for (i = 0; i < 32; i += 2) {
+ int x, y;
+
+ x = unhexchar(((char*) b->rbuffer)[3 + i]);
+ y = unhexchar(((char*) b->rbuffer)[3 + i + 1]);
+
+ if (x < 0 || y < 0)
+ return -EINVAL;
+
+ peer.bytes[i/2] = ((uint8_t) x << 4 | (uint8_t) y);
+ }
+
+ if (!sd_id128_is_null(b->server_id) &&
+ !sd_id128_equal(b->server_id, peer))
+ return -EPERM;
+
+ b->server_id = peer;
+
+ /* And possibly check the second line, too */
+
+ if (f)
+ b->can_fds =
+ (f - e == STRLEN("\r\nAGREE_UNIX_FD")) &&
+ memcmp(e + 2, "AGREE_UNIX_FD",
+ STRLEN("AGREE_UNIX_FD")) == 0;
+
+ b->rbuffer_size -= (start - (char*) b->rbuffer);
+ memmove(b->rbuffer, start, b->rbuffer_size);
+
+ r = bus_start_running(b);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static bool line_equals(const char *s, size_t m, const char *line) {
+ size_t l;
+
+ l = strlen(line);
+ if (l != m)
+ return false;
+
+ return memcmp(s, line, l) == 0;
+}
+
+static bool line_begins(const char *s, size_t m, const char *word) {
+ const char *p;
+
+ p = memory_startswith(s, m, word);
+ return p && (p == (s + m) || *p == ' ');
+}
+
+static int verify_anonymous_token(sd_bus *b, const char *p, size_t l) {
+ _cleanup_free_ char *token = NULL;
+ size_t len;
+ int r;
+
+ if (!b->anonymous_auth)
+ return 0;
+
+ if (l <= 0)
+ return 1;
+
+ assert(p[0] == ' ');
+ p++; l--;
+
+ if (l % 2 != 0)
+ return 0;
+
+ r = unhexmem(p, l, (void **) &token, &len);
+ if (r < 0)
+ return 0;
+
+ if (memchr(token, 0, len))
+ return 0;
+
+ return !!utf8_is_valid(token);
+}
+
+static int verify_external_token(sd_bus *b, const char *p, size_t l) {
+ _cleanup_free_ char *token = NULL;
+ size_t len;
+ uid_t u;
+ int r;
+
+ /* We don't do any real authentication here. Instead, we if
+ * the owner of this bus wanted authentication he should have
+ * checked SO_PEERCRED before even creating the bus object. */
+
+ if (!b->anonymous_auth && !b->ucred_valid)
+ return 0;
+
+ if (l <= 0)
+ return 1;
+
+ assert(p[0] == ' ');
+ p++; l--;
+
+ if (l % 2 != 0)
+ return 0;
+
+ r = unhexmem(p, l, (void**) &token, &len);
+ if (r < 0)
+ return 0;
+
+ if (memchr(token, 0, len))
+ return 0;
+
+ r = parse_uid(token, &u);
+ if (r < 0)
+ return 0;
+
+ /* We ignore the passed value if anonymous authentication is
+ * on anyway. */
+ if (!b->anonymous_auth && u != b->ucred.uid)
+ return 0;
+
+ return 1;
+}
+
+static int bus_socket_auth_write(sd_bus *b, const char *t) {
+ char *p;
+ size_t l;
+
+ assert(b);
+ assert(t);
+
+ /* We only make use of the first iovec */
+ assert(IN_SET(b->auth_index, 0, 1));
+
+ l = strlen(t);
+ p = malloc(b->auth_iovec[0].iov_len + l);
+ if (!p)
+ return -ENOMEM;
+
+ memcpy_safe(p, b->auth_iovec[0].iov_base, b->auth_iovec[0].iov_len);
+ memcpy(p + b->auth_iovec[0].iov_len, t, l);
+
+ b->auth_iovec[0].iov_base = p;
+ b->auth_iovec[0].iov_len += l;
+
+ free(b->auth_buffer);
+ b->auth_buffer = p;
+ b->auth_index = 0;
+ return 0;
+}
+
+static int bus_socket_auth_write_ok(sd_bus *b) {
+ char t[3 + 32 + 2 + 1];
+
+ assert(b);
+
+ xsprintf(t, "OK " SD_ID128_FORMAT_STR "\r\n", SD_ID128_FORMAT_VAL(b->server_id));
+
+ return bus_socket_auth_write(b, t);
+}
+
+static int bus_socket_auth_verify_server(sd_bus *b) {
+ char *e;
+ const char *line;
+ size_t l;
+ bool processed = false;
+ int r;
+
+ assert(b);
+
+ if (b->rbuffer_size < 1)
+ return 0;
+
+ /* First char must be a NUL byte */
+ if (*(char*) b->rbuffer != 0)
+ return -EIO;
+
+ if (b->rbuffer_size < 3)
+ return 0;
+
+ /* Begin with the first line */
+ if (b->auth_rbegin <= 0)
+ b->auth_rbegin = 1;
+
+ for (;;) {
+ /* Check if line is complete */
+ line = (char*) b->rbuffer + b->auth_rbegin;
+ e = memmem(line, b->rbuffer_size - b->auth_rbegin, "\r\n", 2);
+ if (!e)
+ return processed;
+
+ l = e - line;
+
+ if (line_begins(line, l, "AUTH ANONYMOUS")) {
+
+ r = verify_anonymous_token(b, line + 14, l - 14);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ r = bus_socket_auth_write(b, "REJECTED\r\n");
+ else {
+ b->auth = BUS_AUTH_ANONYMOUS;
+ r = bus_socket_auth_write_ok(b);
+ }
+
+ } else if (line_begins(line, l, "AUTH EXTERNAL")) {
+
+ r = verify_external_token(b, line + 13, l - 13);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ r = bus_socket_auth_write(b, "REJECTED\r\n");
+ else {
+ b->auth = BUS_AUTH_EXTERNAL;
+ r = bus_socket_auth_write_ok(b);
+ }
+
+ } else if (line_begins(line, l, "AUTH"))
+ r = bus_socket_auth_write(b, "REJECTED EXTERNAL ANONYMOUS\r\n");
+ else if (line_equals(line, l, "CANCEL") ||
+ line_begins(line, l, "ERROR")) {
+
+ b->auth = _BUS_AUTH_INVALID;
+ r = bus_socket_auth_write(b, "REJECTED\r\n");
+
+ } else if (line_equals(line, l, "BEGIN")) {
+
+ if (b->auth == _BUS_AUTH_INVALID)
+ r = bus_socket_auth_write(b, "ERROR\r\n");
+ else {
+ /* We can't leave from the auth phase
+ * before we haven't written
+ * everything queued, so let's check
+ * that */
+
+ if (bus_socket_auth_needs_write(b))
+ return 1;
+
+ b->rbuffer_size -= (e + 2 - (char*) b->rbuffer);
+ memmove(b->rbuffer, e + 2, b->rbuffer_size);
+ return bus_start_running(b);
+ }
+
+ } else if (line_begins(line, l, "DATA")) {
+
+ if (b->auth == _BUS_AUTH_INVALID)
+ r = bus_socket_auth_write(b, "ERROR\r\n");
+ else {
+ if (b->auth == BUS_AUTH_ANONYMOUS)
+ r = verify_anonymous_token(b, line + 4, l - 4);
+ else
+ r = verify_external_token(b, line + 4, l - 4);
+
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ b->auth = _BUS_AUTH_INVALID;
+ r = bus_socket_auth_write(b, "REJECTED\r\n");
+ } else
+ r = bus_socket_auth_write_ok(b);
+ }
+ } else if (line_equals(line, l, "NEGOTIATE_UNIX_FD")) {
+ if (b->auth == _BUS_AUTH_INVALID || !b->accept_fd)
+ r = bus_socket_auth_write(b, "ERROR\r\n");
+ else {
+ b->can_fds = true;
+ r = bus_socket_auth_write(b, "AGREE_UNIX_FD\r\n");
+ }
+ } else
+ r = bus_socket_auth_write(b, "ERROR\r\n");
+
+ if (r < 0)
+ return r;
+
+ b->auth_rbegin = e + 2 - (char*) b->rbuffer;
+
+ processed = true;
+ }
+}
+
+static int bus_socket_auth_verify(sd_bus *b) {
+ assert(b);
+
+ if (b->is_server)
+ return bus_socket_auth_verify_server(b);
+ else
+ return bus_socket_auth_verify_client(b);
+}
+
+static int bus_socket_read_auth(sd_bus *b) {
+ struct msghdr mh;
+ struct iovec iov = {};
+ size_t n;
+ ssize_t k;
+ int r;
+ void *p;
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int) * BUS_FDS_MAX)];
+ } control;
+ bool handle_cmsg = false;
+
+ assert(b);
+ assert(b->state == BUS_AUTHENTICATING);
+
+ r = bus_socket_auth_verify(b);
+ if (r != 0)
+ return r;
+
+ n = MAX(256u, b->rbuffer_size * 2);
+
+ if (n > BUS_AUTH_SIZE_MAX)
+ n = BUS_AUTH_SIZE_MAX;
+
+ if (b->rbuffer_size >= n)
+ return -ENOBUFS;
+
+ p = realloc(b->rbuffer, n);
+ if (!p)
+ return -ENOMEM;
+
+ b->rbuffer = p;
+
+ iov = IOVEC_MAKE((uint8_t *)b->rbuffer + b->rbuffer_size, n - b->rbuffer_size);
+
+ if (b->prefer_readv)
+ k = readv(b->input_fd, &iov, 1);
+ else {
+ zero(mh);
+ mh.msg_iov = &iov;
+ mh.msg_iovlen = 1;
+ mh.msg_control = &control;
+ mh.msg_controllen = sizeof(control);
+
+ k = recvmsg(b->input_fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (k < 0 && errno == ENOTSOCK) {
+ b->prefer_readv = true;
+ k = readv(b->input_fd, &iov, 1);
+ } else
+ handle_cmsg = true;
+ }
+ if (k < 0)
+ return errno == EAGAIN ? 0 : -errno;
+ if (k == 0)
+ return -ECONNRESET;
+
+ b->rbuffer_size += k;
+
+ if (handle_cmsg) {
+ struct cmsghdr *cmsg;
+
+ CMSG_FOREACH(cmsg, &mh)
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+ int j;
+
+ /* Whut? We received fds during the auth
+ * protocol? Somebody is playing games with
+ * us. Close them all, and fail */
+ j = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+ close_many((int*) CMSG_DATA(cmsg), j);
+ return -EIO;
+ } else
+ log_debug("Got unexpected auxiliary data with level=%d and type=%d",
+ cmsg->cmsg_level, cmsg->cmsg_type);
+ }
+
+ r = bus_socket_auth_verify(b);
+ if (r != 0)
+ return r;
+
+ return 1;
+}
+
+void bus_socket_setup(sd_bus *b) {
+ assert(b);
+
+ /* Increase the buffers to 8 MB */
+ (void) fd_inc_rcvbuf(b->input_fd, SNDBUF_SIZE);
+ (void) fd_inc_sndbuf(b->output_fd, SNDBUF_SIZE);
+
+ b->message_version = 1;
+ b->message_endian = 0;
+}
+
+static void bus_get_peercred(sd_bus *b) {
+ int r;
+
+ assert(b);
+ assert(!b->ucred_valid);
+ assert(!b->label);
+ assert(b->n_groups == (size_t) -1);
+
+ /* Get the peer for socketpair() sockets */
+ b->ucred_valid = getpeercred(b->input_fd, &b->ucred) >= 0;
+
+ /* Get the SELinux context of the peer */
+ r = getpeersec(b->input_fd, &b->label);
+ if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENOPROTOOPT))
+ log_debug_errno(r, "Failed to determine peer security context: %m");
+
+ /* Get the list of auxiliary groups of the peer */
+ r = getpeergroups(b->input_fd, &b->groups);
+ if (r >= 0)
+ b->n_groups = (size_t) r;
+ else if (!IN_SET(r, -EOPNOTSUPP, -ENOPROTOOPT))
+ log_debug_errno(r, "Failed to determine peer's group list: %m");
+}
+
+static int bus_socket_start_auth_client(sd_bus *b) {
+ size_t l;
+ const char *auth_suffix, *auth_prefix;
+
+ assert(b);
+
+ if (b->anonymous_auth) {
+ auth_prefix = "\0AUTH ANONYMOUS ";
+
+ /* For ANONYMOUS auth we send some arbitrary "trace" string */
+ l = 9;
+ b->auth_buffer = hexmem("anonymous", l);
+ } else {
+ char text[DECIMAL_STR_MAX(uid_t) + 1];
+
+ auth_prefix = "\0AUTH EXTERNAL ";
+
+ xsprintf(text, UID_FMT, geteuid());
+
+ l = strlen(text);
+ b->auth_buffer = hexmem(text, l);
+ }
+
+ if (!b->auth_buffer)
+ return -ENOMEM;
+
+ if (b->accept_fd)
+ auth_suffix = "\r\nNEGOTIATE_UNIX_FD\r\nBEGIN\r\n";
+ else
+ auth_suffix = "\r\nBEGIN\r\n";
+
+ b->auth_iovec[0] = IOVEC_MAKE((void*) auth_prefix, 1 + strlen(auth_prefix + 1));
+ b->auth_iovec[1] = IOVEC_MAKE(b->auth_buffer, l * 2);
+ b->auth_iovec[2] = IOVEC_MAKE_STRING(auth_suffix);
+
+ return bus_socket_write_auth(b);
+}
+
+int bus_socket_start_auth(sd_bus *b) {
+ assert(b);
+
+ bus_get_peercred(b);
+
+ bus_set_state(b, BUS_AUTHENTICATING);
+ b->auth_timeout = now(CLOCK_MONOTONIC) + BUS_AUTH_TIMEOUT;
+
+ if (sd_is_socket(b->input_fd, AF_UNIX, 0, 0) <= 0)
+ b->accept_fd = false;
+
+ if (b->output_fd != b->input_fd)
+ if (sd_is_socket(b->output_fd, AF_UNIX, 0, 0) <= 0)
+ b->accept_fd = false;
+
+ if (b->is_server)
+ return bus_socket_read_auth(b);
+ else
+ return bus_socket_start_auth_client(b);
+}
+
+static int bus_socket_inotify_setup(sd_bus *b) {
+ _cleanup_free_ int *new_watches = NULL;
+ _cleanup_free_ char *absolute = NULL;
+ size_t n_allocated = 0, n = 0, done = 0, i;
+ unsigned max_follow = 32;
+ const char *p;
+ int wd, r;
+
+ assert(b);
+ assert(b->watch_bind);
+ assert(b->sockaddr.sa.sa_family == AF_UNIX);
+ assert(b->sockaddr.un.sun_path[0] != 0);
+
+ /* Sets up an inotify fd in case watch_bind is enabled: wait until the configured AF_UNIX file system socket
+ * appears before connecting to it. The implemented is pretty simplistic: we just subscribe to relevant changes
+ * to all prefix components of the path, and every time we get an event for that we try to reconnect again,
+ * without actually caring what precisely the event we got told us. If we still can't connect we re-subscribe
+ * to all relevant changes of anything in the path, so that our watches include any possibly newly created path
+ * components. */
+
+ if (b->inotify_fd < 0) {
+ b->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (b->inotify_fd < 0)
+ return -errno;
+
+ b->inotify_fd = fd_move_above_stdio(b->inotify_fd);
+ }
+
+ /* Make sure the path is NUL terminated */
+ p = strndupa(b->sockaddr.un.sun_path, sizeof(b->sockaddr.un.sun_path));
+
+ /* Make sure the path is absolute */
+ r = path_make_absolute_cwd(p, &absolute);
+ if (r < 0)
+ goto fail;
+
+ /* Watch all parent directories, and don't mind any prefix that doesn't exist yet. For the innermost directory
+ * that exists we want to know when files are created or moved into it. For all parents of it we just care if
+ * they are removed or renamed. */
+
+ if (!GREEDY_REALLOC(new_watches, n_allocated, n + 1)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ /* Start with the top-level directory, which is a bit simpler than the rest, since it can't be a symlink, and
+ * always exists */
+ wd = inotify_add_watch(b->inotify_fd, "/", IN_CREATE|IN_MOVED_TO);
+ if (wd < 0) {
+ r = log_debug_errno(errno, "Failed to add inotify watch on /: %m");
+ goto fail;
+ } else
+ new_watches[n++] = wd;
+
+ for (;;) {
+ _cleanup_free_ char *component = NULL, *prefix = NULL, *destination = NULL;
+ size_t n_slashes, n_component;
+ char *c = NULL;
+
+ n_slashes = strspn(absolute + done, "/");
+ n_component = n_slashes + strcspn(absolute + done + n_slashes, "/");
+
+ if (n_component == 0) /* The end */
+ break;
+
+ component = strndup(absolute + done, n_component);
+ if (!component) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ /* A trailing slash? That's a directory, and not a socket then */
+ if (path_equal(component, "/")) {
+ r = -EISDIR;
+ goto fail;
+ }
+
+ /* A single dot? Let's eat this up */
+ if (path_equal(component, "/.")) {
+ done += n_component;
+ continue;
+ }
+
+ prefix = strndup(absolute, done + n_component);
+ if (!prefix) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (!GREEDY_REALLOC(new_watches, n_allocated, n + 1)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ wd = inotify_add_watch(b->inotify_fd, prefix, IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB|IN_CREATE|IN_MOVED_TO|IN_DONT_FOLLOW);
+ log_debug("Added inotify watch for %s on bus %s: %i", prefix, strna(b->description), wd);
+
+ if (wd < 0) {
+ if (IN_SET(errno, ENOENT, ELOOP))
+ break; /* This component doesn't exist yet, or the path contains a cyclic symlink right now */
+
+ r = log_debug_errno(errno, "Failed to add inotify watch on %s: %m", empty_to_root(prefix));
+ goto fail;
+ } else
+ new_watches[n++] = wd;
+
+ /* Check if this is possibly a symlink. If so, let's follow it and watch it too. */
+ r = readlink_malloc(prefix, &destination);
+ if (r == -EINVAL) { /* not a symlink */
+ done += n_component;
+ continue;
+ }
+ if (r < 0)
+ goto fail;
+
+ if (isempty(destination)) { /* Empty symlink target? Yuck! */
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if (max_follow <= 0) { /* Let's make sure we don't follow symlinks forever */
+ r = -ELOOP;
+ goto fail;
+ }
+
+ if (path_is_absolute(destination)) {
+ /* For absolute symlinks we build the new path and start anew */
+ c = strjoin(destination, absolute + done + n_component);
+ done = 0;
+ } else {
+ _cleanup_free_ char *t = NULL;
+
+ /* For relative symlinks we replace the last component, and try again */
+ t = strndup(absolute, done);
+ if (!t)
+ return -ENOMEM;
+
+ c = strjoin(t, "/", destination, absolute + done + n_component);
+ }
+ if (!c) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ free(absolute);
+ absolute = c;
+
+ max_follow--;
+ }
+
+ /* And now, let's remove all watches from the previous iteration we don't need anymore */
+ for (i = 0; i < b->n_inotify_watches; i++) {
+ bool found = false;
+ size_t j;
+
+ for (j = 0; j < n; j++)
+ if (new_watches[j] == b->inotify_watches[i]) {
+ found = true;
+ break;
+ }
+
+ if (found)
+ continue;
+
+ (void) inotify_rm_watch(b->inotify_fd, b->inotify_watches[i]);
+ }
+
+ free_and_replace(b->inotify_watches, new_watches);
+ b->n_inotify_watches = n;
+
+ return 0;
+
+fail:
+ bus_close_inotify_fd(b);
+ return r;
+}
+
+int bus_socket_connect(sd_bus *b) {
+ bool inotify_done = false;
+ int r;
+
+ assert(b);
+
+ for (;;) {
+ assert(b->input_fd < 0);
+ assert(b->output_fd < 0);
+ assert(b->sockaddr.sa.sa_family != AF_UNSPEC);
+
+ b->input_fd = socket(b->sockaddr.sa.sa_family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (b->input_fd < 0)
+ return -errno;
+
+ b->input_fd = fd_move_above_stdio(b->input_fd);
+
+ b->output_fd = b->input_fd;
+ bus_socket_setup(b);
+
+ if (connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size) < 0) {
+ if (errno == EINPROGRESS) {
+
+ /* If we have any inotify watches open, close them now, we don't need them anymore, as
+ * we have successfully initiated a connection */
+ bus_close_inotify_fd(b);
+
+ /* Note that very likely we are already in BUS_OPENING state here, as we enter it when
+ * we start parsing the address string. The only reason we set the state explicitly
+ * here, is to undo BUS_WATCH_BIND, in case we did the inotify magic. */
+ bus_set_state(b, BUS_OPENING);
+ return 1;
+ }
+
+ if (IN_SET(errno, ENOENT, ECONNREFUSED) && /* ENOENT → unix socket doesn't exist at all; ECONNREFUSED → unix socket stale */
+ b->watch_bind &&
+ b->sockaddr.sa.sa_family == AF_UNIX &&
+ b->sockaddr.un.sun_path[0] != 0) {
+
+ /* This connection attempt failed, let's release the socket for now, and start with a
+ * fresh one when reconnecting. */
+ bus_close_io_fds(b);
+
+ if (inotify_done) {
+ /* inotify set up already, don't do it again, just return now, and remember
+ * that we are waiting for inotify events now. */
+ bus_set_state(b, BUS_WATCH_BIND);
+ return 1;
+ }
+
+ /* This is a file system socket, and the inotify logic is enabled. Let's create the necessary inotify fd. */
+ r = bus_socket_inotify_setup(b);
+ if (r < 0)
+ return r;
+
+ /* Let's now try to connect a second time, because in theory there's otherwise a race
+ * here: the socket might have been created in the time between our first connect() and
+ * the time we set up the inotify logic. But let's remember that we set up inotify now,
+ * so that we don't do the connect() more than twice. */
+ inotify_done = true;
+
+ } else
+ return -errno;
+ } else
+ break;
+ }
+
+ /* Yay, established, we don't need no inotify anymore! */
+ bus_close_inotify_fd(b);
+
+ return bus_socket_start_auth(b);
+}
+
+int bus_socket_exec(sd_bus *b) {
+ int s[2], r;
+
+ assert(b);
+ assert(b->input_fd < 0);
+ assert(b->output_fd < 0);
+ assert(b->exec_path);
+ assert(b->busexec_pid == 0);
+
+ r = socketpair(AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, s);
+ if (r < 0)
+ return -errno;
+
+ r = safe_fork_full("(sd-busexec)", s+1, 1, FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS, &b->busexec_pid);
+ if (r < 0) {
+ safe_close_pair(s);
+ return r;
+ }
+ if (r == 0) {
+ /* Child */
+
+ if (rearrange_stdio(s[1], s[1], STDERR_FILENO) < 0)
+ _exit(EXIT_FAILURE);
+
+ (void) rlimit_nofile_safe();
+
+ if (b->exec_argv)
+ execvp(b->exec_path, b->exec_argv);
+ else {
+ const char *argv[] = { b->exec_path, NULL };
+ execvp(b->exec_path, (char**) argv);
+ }
+
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close(s[1]);
+ b->output_fd = b->input_fd = fd_move_above_stdio(s[0]);
+
+ bus_socket_setup(b);
+
+ return bus_socket_start_auth(b);
+}
+
+int bus_socket_take_fd(sd_bus *b) {
+ assert(b);
+
+ bus_socket_setup(b);
+
+ return bus_socket_start_auth(b);
+}
+
+int bus_socket_write_message(sd_bus *bus, sd_bus_message *m, size_t *idx) {
+ struct iovec *iov;
+ ssize_t k;
+ size_t n;
+ unsigned j;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(idx);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ if (*idx >= BUS_MESSAGE_SIZE(m))
+ return 0;
+
+ r = bus_message_setup_iovec(m);
+ if (r < 0)
+ return r;
+
+ n = m->n_iovec * sizeof(struct iovec);
+ iov = newa(struct iovec, n);
+ memcpy_safe(iov, m->iovec, n);
+
+ j = 0;
+ iovec_advance(iov, &j, *idx);
+
+ if (bus->prefer_writev)
+ k = writev(bus->output_fd, iov, m->n_iovec);
+ else {
+ struct msghdr mh = {
+ .msg_iov = iov,
+ .msg_iovlen = m->n_iovec,
+ };
+
+ if (m->n_fds > 0 && *idx == 0) {
+ struct cmsghdr *control;
+
+ mh.msg_control = control = alloca(CMSG_SPACE(sizeof(int) * m->n_fds));
+ mh.msg_controllen = control->cmsg_len = CMSG_LEN(sizeof(int) * m->n_fds);
+ control->cmsg_level = SOL_SOCKET;
+ control->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(control), m->fds, sizeof(int) * m->n_fds);
+ }
+
+ k = sendmsg(bus->output_fd, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (k < 0 && errno == ENOTSOCK) {
+ bus->prefer_writev = true;
+ k = writev(bus->output_fd, iov, m->n_iovec);
+ }
+ }
+
+ if (k < 0)
+ return errno == EAGAIN ? 0 : -errno;
+
+ *idx += (size_t) k;
+ return 1;
+}
+
+static int bus_socket_read_message_need(sd_bus *bus, size_t *need) {
+ uint32_t a, b;
+ uint8_t e;
+ uint64_t sum;
+
+ assert(bus);
+ assert(need);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ if (bus->rbuffer_size < sizeof(struct bus_header)) {
+ *need = sizeof(struct bus_header) + 8;
+
+ /* Minimum message size:
+ *
+ * Header +
+ *
+ * Method Call: +2 string headers
+ * Signal: +3 string headers
+ * Method Error: +1 string headers
+ * +1 uint32 headers
+ * Method Reply: +1 uint32 headers
+ *
+ * A string header is at least 9 bytes
+ * A uint32 header is at least 8 bytes
+ *
+ * Hence the minimum message size of a valid message
+ * is header + 8 bytes */
+
+ return 0;
+ }
+
+ a = ((const uint32_t*) bus->rbuffer)[1];
+ b = ((const uint32_t*) bus->rbuffer)[3];
+
+ e = ((const uint8_t*) bus->rbuffer)[0];
+ if (e == BUS_LITTLE_ENDIAN) {
+ a = le32toh(a);
+ b = le32toh(b);
+ } else if (e == BUS_BIG_ENDIAN) {
+ a = be32toh(a);
+ b = be32toh(b);
+ } else
+ return -EBADMSG;
+
+ sum = (uint64_t) sizeof(struct bus_header) + (uint64_t) ALIGN_TO(b, 8) + (uint64_t) a;
+ if (sum >= BUS_MESSAGE_SIZE_MAX)
+ return -ENOBUFS;
+
+ *need = (size_t) sum;
+ return 0;
+}
+
+static int bus_socket_make_message(sd_bus *bus, size_t size) {
+ sd_bus_message *t = NULL;
+ void *b;
+ int r;
+
+ assert(bus);
+ assert(bus->rbuffer_size >= size);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ r = bus_rqueue_make_room(bus);
+ if (r < 0)
+ return r;
+
+ if (bus->rbuffer_size > size) {
+ b = memdup((const uint8_t*) bus->rbuffer + size,
+ bus->rbuffer_size - size);
+ if (!b)
+ return -ENOMEM;
+ } else
+ b = NULL;
+
+ r = bus_message_from_malloc(bus,
+ bus->rbuffer, size,
+ bus->fds, bus->n_fds,
+ NULL,
+ &t);
+ if (r == -EBADMSG)
+ log_debug_errno(r, "Received invalid message from connection %s, dropping.", strna(bus->description));
+ else if (r < 0) {
+ free(b);
+ return r;
+ }
+
+ bus->rbuffer = b;
+ bus->rbuffer_size -= size;
+
+ bus->fds = NULL;
+ bus->n_fds = 0;
+
+ if (t)
+ bus->rqueue[bus->rqueue_size++] = t;
+
+ return 1;
+}
+
+int bus_socket_read_message(sd_bus *bus) {
+ struct msghdr mh;
+ struct iovec iov = {};
+ ssize_t k;
+ size_t need;
+ int r;
+ void *b;
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int) * BUS_FDS_MAX)];
+ } control;
+ bool handle_cmsg = false;
+
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ r = bus_socket_read_message_need(bus, &need);
+ if (r < 0)
+ return r;
+
+ if (bus->rbuffer_size >= need)
+ return bus_socket_make_message(bus, need);
+
+ b = realloc(bus->rbuffer, need);
+ if (!b)
+ return -ENOMEM;
+
+ bus->rbuffer = b;
+
+ iov = IOVEC_MAKE((uint8_t *)bus->rbuffer + bus->rbuffer_size, need - bus->rbuffer_size);
+
+ if (bus->prefer_readv)
+ k = readv(bus->input_fd, &iov, 1);
+ else {
+ zero(mh);
+ mh.msg_iov = &iov;
+ mh.msg_iovlen = 1;
+ mh.msg_control = &control;
+ mh.msg_controllen = sizeof(control);
+
+ k = recvmsg(bus->input_fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (k < 0 && errno == ENOTSOCK) {
+ bus->prefer_readv = true;
+ k = readv(bus->input_fd, &iov, 1);
+ } else
+ handle_cmsg = true;
+ }
+ if (k < 0)
+ return errno == EAGAIN ? 0 : -errno;
+ if (k == 0)
+ return -ECONNRESET;
+
+ bus->rbuffer_size += k;
+
+ if (handle_cmsg) {
+ struct cmsghdr *cmsg;
+
+ CMSG_FOREACH(cmsg, &mh)
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+ int n, *f, i;
+
+ n = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+ if (!bus->can_fds) {
+ /* Whut? We received fds but this
+ * isn't actually enabled? Close them,
+ * and fail */
+
+ close_many((int*) CMSG_DATA(cmsg), n);
+ return -EIO;
+ }
+
+ f = reallocarray(bus->fds, bus->n_fds + n, sizeof(int));
+ if (!f) {
+ close_many((int*) CMSG_DATA(cmsg), n);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < n; i++)
+ f[bus->n_fds++] = fd_move_above_stdio(((int*) CMSG_DATA(cmsg))[i]);
+ bus->fds = f;
+ } else
+ log_debug("Got unexpected auxiliary data with level=%d and type=%d",
+ cmsg->cmsg_level, cmsg->cmsg_type);
+ }
+
+ r = bus_socket_read_message_need(bus, &need);
+ if (r < 0)
+ return r;
+
+ if (bus->rbuffer_size >= need)
+ return bus_socket_make_message(bus, need);
+
+ return 1;
+}
+
+int bus_socket_process_opening(sd_bus *b) {
+ int error = 0;
+ socklen_t slen = sizeof(error);
+ struct pollfd p = {
+ .fd = b->output_fd,
+ .events = POLLOUT,
+ };
+ int r;
+
+ assert(b->state == BUS_OPENING);
+
+ r = poll(&p, 1, 0);
+ if (r < 0)
+ return -errno;
+
+ if (!(p.revents & (POLLOUT|POLLERR|POLLHUP)))
+ return 0;
+
+ r = getsockopt(b->output_fd, SOL_SOCKET, SO_ERROR, &error, &slen);
+ if (r < 0)
+ b->last_connect_error = errno;
+ else if (error != 0)
+ b->last_connect_error = error;
+ else if (p.revents & (POLLERR|POLLHUP))
+ b->last_connect_error = ECONNREFUSED;
+ else
+ return bus_socket_start_auth(b);
+
+ return bus_next_address(b);
+}
+
+int bus_socket_process_authenticating(sd_bus *b) {
+ int r;
+
+ assert(b);
+ assert(b->state == BUS_AUTHENTICATING);
+
+ if (now(CLOCK_MONOTONIC) >= b->auth_timeout)
+ return -ETIMEDOUT;
+
+ r = bus_socket_write_auth(b);
+ if (r != 0)
+ return r;
+
+ return bus_socket_read_auth(b);
+}
+
+int bus_socket_process_watch_bind(sd_bus *b) {
+ int r, q;
+
+ assert(b);
+ assert(b->state == BUS_WATCH_BIND);
+ assert(b->inotify_fd >= 0);
+
+ r = flush_fd(b->inotify_fd);
+ if (r <= 0)
+ return r;
+
+ log_debug("Got inotify event on bus %s.", strna(b->description));
+
+ /* We flushed events out of the inotify fd. In that case, maybe the socket is valid now? Let's try to connect
+ * to it again */
+
+ r = bus_socket_connect(b);
+ if (r < 0)
+ return r;
+
+ q = bus_attach_io_events(b);
+ if (q < 0)
+ return q;
+
+ q = bus_attach_inotify_event(b);
+ if (q < 0)
+ return q;
+
+ return r;
+}
diff --git a/src/libsystemd/sd-bus/bus-socket.h b/src/libsystemd/sd-bus/bus-socket.h
new file mode 100644
index 0000000..f8d2455
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-socket.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+void bus_socket_setup(sd_bus *b);
+
+int bus_socket_connect(sd_bus *b);
+int bus_socket_exec(sd_bus *b);
+int bus_socket_take_fd(sd_bus *b);
+int bus_socket_start_auth(sd_bus *b);
+
+int bus_socket_write_message(sd_bus *bus, sd_bus_message *m, size_t *idx);
+int bus_socket_read_message(sd_bus *bus);
+
+int bus_socket_process_opening(sd_bus *b);
+int bus_socket_process_authenticating(sd_bus *b);
+int bus_socket_process_watch_bind(sd_bus *b);
+
+bool bus_socket_auth_needs_write(sd_bus *b);
diff --git a/src/libsystemd/sd-bus/bus-track.c b/src/libsystemd/sd-bus/bus-track.c
new file mode 100644
index 0000000..efbd3ed
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-track.c
@@ -0,0 +1,492 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-track.h"
+#include "bus-util.h"
+
+struct track_item {
+ unsigned n_ref;
+ char *name;
+ sd_bus_slot *slot;
+};
+
+struct sd_bus_track {
+ unsigned n_ref;
+ unsigned n_adding; /* are we in the process of adding a new name? */
+ sd_bus *bus;
+ sd_bus_track_handler_t handler;
+ void *userdata;
+ Hashmap *names;
+ LIST_FIELDS(sd_bus_track, queue);
+ Iterator iterator;
+ bool in_list:1; /* In bus->tracks? */
+ bool in_queue:1; /* In bus->track_queue? */
+ bool modified:1;
+ bool recursive:1;
+ sd_bus_destroy_t destroy_callback;
+
+ LIST_FIELDS(sd_bus_track, tracks);
+};
+
+#define MATCH_FOR_NAME(name) \
+ strjoina("type='signal'," \
+ "sender='org.freedesktop.DBus'," \
+ "path='/org/freedesktop/DBus'," \
+ "interface='org.freedesktop.DBus'," \
+ "member='NameOwnerChanged'," \
+ "arg0='", name, "'")
+
+static struct track_item* track_item_free(struct track_item *i) {
+
+ if (!i)
+ return NULL;
+
+ sd_bus_slot_unref(i->slot);
+ free(i->name);
+ return mfree(i);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct track_item*, track_item_free);
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(track_item_hash_ops, char, string_hash_func, string_compare_func,
+ struct track_item, track_item_free);
+
+static void bus_track_add_to_queue(sd_bus_track *track) {
+ assert(track);
+
+ /* Adds the bus track object to the queue of objects we should dispatch next, subject to a number of
+ * conditions. */
+
+ /* Already in the queue? */
+ if (track->in_queue)
+ return;
+
+ /* if we are currently in the process of adding a new name, then let's not enqueue this just yet, let's wait
+ * until the addition is complete. */
+ if (track->n_adding > 0)
+ return;
+
+ /* still referenced? */
+ if (hashmap_size(track->names) > 0)
+ return;
+
+ /* Nothing to call? */
+ if (!track->handler)
+ return;
+
+ /* Already closed? */
+ if (!track->in_list)
+ return;
+
+ LIST_PREPEND(queue, track->bus->track_queue, track);
+ track->in_queue = true;
+}
+
+static void bus_track_remove_from_queue(sd_bus_track *track) {
+ assert(track);
+
+ if (!track->in_queue)
+ return;
+
+ LIST_REMOVE(queue, track->bus->track_queue, track);
+ track->in_queue = false;
+}
+
+static int bus_track_remove_name_fully(sd_bus_track *track, const char *name) {
+ struct track_item *i;
+
+ assert(track);
+ assert(name);
+
+ i = hashmap_remove(track->names, name);
+ if (!i)
+ return 0;
+
+ track_item_free(i);
+
+ bus_track_add_to_queue(track);
+
+ track->modified = true;
+ return 1;
+}
+
+_public_ int sd_bus_track_new(
+ sd_bus *bus,
+ sd_bus_track **track,
+ sd_bus_track_handler_t handler,
+ void *userdata) {
+
+ sd_bus_track *t;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(track, -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ t = new0(sd_bus_track, 1);
+ if (!t)
+ return -ENOMEM;
+
+ t->n_ref = 1;
+ t->handler = handler;
+ t->userdata = userdata;
+ t->bus = sd_bus_ref(bus);
+
+ LIST_PREPEND(tracks, bus->tracks, t);
+ t->in_list = true;
+
+ bus_track_add_to_queue(t);
+
+ *track = t;
+ return 0;
+}
+
+static sd_bus_track *track_free(sd_bus_track *track) {
+ assert(track);
+
+ if (track->in_list)
+ LIST_REMOVE(tracks, track->bus->tracks, track);
+
+ bus_track_remove_from_queue(track);
+ track->names = hashmap_free(track->names);
+ track->bus = sd_bus_unref(track->bus);
+
+ if (track->destroy_callback)
+ track->destroy_callback(track->userdata);
+
+ return mfree(track);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_bus_track, sd_bus_track, track_free);
+
+static int on_name_owner_changed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ sd_bus_track *track = userdata;
+ const char *name, *old, *new;
+ int r;
+
+ assert(message);
+ assert(track);
+
+ r = sd_bus_message_read(message, "sss", &name, &old, &new);
+ if (r < 0)
+ return 0;
+
+ bus_track_remove_name_fully(track, name);
+ return 0;
+}
+
+_public_ int sd_bus_track_add_name(sd_bus_track *track, const char *name) {
+ _cleanup_(track_item_freep) struct track_item *n = NULL;
+ struct track_item *i;
+ const char *match;
+ int r;
+
+ assert_return(track, -EINVAL);
+ assert_return(service_name_is_valid(name), -EINVAL);
+
+ i = hashmap_get(track->names, name);
+ if (i) {
+ if (track->recursive) {
+ unsigned k = track->n_ref + 1;
+
+ if (k < track->n_ref) /* Check for overflow */
+ return -EOVERFLOW;
+
+ track->n_ref = k;
+ }
+
+ bus_track_remove_from_queue(track);
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&track->names, &track_item_hash_ops);
+ if (r < 0)
+ return r;
+
+ n = new0(struct track_item, 1);
+ if (!n)
+ return -ENOMEM;
+ n->name = strdup(name);
+ if (!n->name)
+ return -ENOMEM;
+
+ /* First, subscribe to this name */
+ match = MATCH_FOR_NAME(name);
+
+ bus_track_remove_from_queue(track); /* don't dispatch this while we work in it */
+
+ r = sd_bus_add_match_async(track->bus, &n->slot, match, on_name_owner_changed, NULL, track);
+ if (r < 0) {
+ bus_track_add_to_queue(track);
+ return r;
+ }
+
+ r = hashmap_put(track->names, n->name, n);
+ if (r < 0) {
+ bus_track_add_to_queue(track);
+ return r;
+ }
+
+ /* Second, check if it is currently existing, or maybe doesn't, or maybe disappeared already. */
+ track->n_adding++; /* again, make sure this isn't dispatch while we are working in it */
+ r = sd_bus_get_name_creds(track->bus, name, 0, NULL);
+ track->n_adding--;
+ if (r < 0) {
+ hashmap_remove(track->names, name);
+ bus_track_add_to_queue(track);
+ return r;
+ }
+
+ n->n_ref = 1;
+ n = NULL;
+
+ bus_track_remove_from_queue(track);
+ track->modified = true;
+
+ return 1;
+}
+
+_public_ int sd_bus_track_remove_name(sd_bus_track *track, const char *name) {
+ struct track_item *i;
+
+ assert_return(name, -EINVAL);
+
+ if (!track) /* Treat a NULL track object as an empty track object */
+ return 0;
+
+ if (!track->recursive)
+ return bus_track_remove_name_fully(track, name);
+
+ i = hashmap_get(track->names, name);
+ if (!i)
+ return -EUNATCH;
+ if (i->n_ref <= 0)
+ return -EUNATCH;
+
+ i->n_ref--;
+
+ if (i->n_ref <= 0)
+ return bus_track_remove_name_fully(track, name);
+
+ return 1;
+}
+
+_public_ unsigned sd_bus_track_count(sd_bus_track *track) {
+
+ if (!track) /* Let's consider a NULL object equivalent to an empty object */
+ return 0;
+
+ /* This signature really should have returned an int, so that we can propagate errors. But well, ... Also, note
+ * that this returns the number of names being watched, and multiple references to the same name are not
+ * counted. */
+
+ return hashmap_size(track->names);
+}
+
+_public_ const char* sd_bus_track_contains(sd_bus_track *track, const char *name) {
+ assert_return(name, NULL);
+
+ if (!track) /* Let's consider a NULL object equivalent to an empty object */
+ return NULL;
+
+ return hashmap_get(track->names, (void*) name) ? name : NULL;
+}
+
+_public_ const char* sd_bus_track_first(sd_bus_track *track) {
+ const char *n = NULL;
+
+ if (!track)
+ return NULL;
+
+ track->modified = false;
+ track->iterator = ITERATOR_FIRST;
+
+ hashmap_iterate(track->names, &track->iterator, NULL, (const void**) &n);
+ return n;
+}
+
+_public_ const char* sd_bus_track_next(sd_bus_track *track) {
+ const char *n = NULL;
+
+ if (!track)
+ return NULL;
+
+ if (track->modified)
+ return NULL;
+
+ hashmap_iterate(track->names, &track->iterator, NULL, (const void**) &n);
+ return n;
+}
+
+_public_ int sd_bus_track_add_sender(sd_bus_track *track, sd_bus_message *m) {
+ const char *sender;
+
+ assert_return(track, -EINVAL);
+ assert_return(m, -EINVAL);
+
+ if (sd_bus_message_get_bus(m) != track->bus)
+ return -EINVAL;
+
+ sender = sd_bus_message_get_sender(m);
+ if (!sender)
+ return -EINVAL;
+
+ return sd_bus_track_add_name(track, sender);
+}
+
+_public_ int sd_bus_track_remove_sender(sd_bus_track *track, sd_bus_message *m) {
+ const char *sender;
+
+ assert_return(m, -EINVAL);
+
+ if (!track) /* Treat a NULL track object as an empty track object */
+ return 0;
+
+ if (sd_bus_message_get_bus(m) != track->bus)
+ return -EINVAL;
+
+ sender = sd_bus_message_get_sender(m);
+ if (!sender)
+ return -EINVAL;
+
+ return sd_bus_track_remove_name(track, sender);
+}
+
+_public_ sd_bus* sd_bus_track_get_bus(sd_bus_track *track) {
+ assert_return(track, NULL);
+
+ return track->bus;
+}
+
+void bus_track_dispatch(sd_bus_track *track) {
+ int r;
+
+ assert(track);
+ assert(track->handler);
+
+ bus_track_remove_from_queue(track);
+
+ sd_bus_track_ref(track);
+
+ r = track->handler(track, track->userdata);
+ if (r < 0)
+ log_debug_errno(r, "Failed to process track handler: %m");
+ else if (r == 0)
+ bus_track_add_to_queue(track);
+
+ sd_bus_track_unref(track);
+}
+
+void bus_track_close(sd_bus_track *track) {
+ assert(track);
+
+ /* Called whenever our bus connected is closed. If so, and our track object is non-empty, dispatch it
+ * immediately, as we are closing now, but first flush out all names. */
+
+ if (!track->in_list)
+ return; /* We already closed this one, don't close it again. */
+
+ /* Remember that this one is closed now */
+ LIST_REMOVE(tracks, track->bus->tracks, track);
+ track->in_list = false;
+
+ /* If there's no name in this one anyway, we don't have to dispatch */
+ if (hashmap_isempty(track->names))
+ return;
+
+ /* Let's flush out all names */
+ hashmap_clear(track->names);
+
+ /* Invoke handler */
+ if (track->handler)
+ bus_track_dispatch(track);
+}
+
+_public_ void *sd_bus_track_get_userdata(sd_bus_track *track) {
+ assert_return(track, NULL);
+
+ return track->userdata;
+}
+
+_public_ void *sd_bus_track_set_userdata(sd_bus_track *track, void *userdata) {
+ void *ret;
+
+ assert_return(track, NULL);
+
+ ret = track->userdata;
+ track->userdata = userdata;
+
+ return ret;
+}
+
+_public_ int sd_bus_track_set_destroy_callback(sd_bus_track *track, sd_bus_destroy_t callback) {
+ assert_return(track, -EINVAL);
+
+ track->destroy_callback = callback;
+ return 0;
+}
+
+_public_ int sd_bus_track_get_destroy_callback(sd_bus_track *track, sd_bus_destroy_t *ret) {
+ assert_return(track, -EINVAL);
+
+ if (ret)
+ *ret = track->destroy_callback;
+
+ return !!track->destroy_callback;
+}
+
+_public_ int sd_bus_track_set_recursive(sd_bus_track *track, int b) {
+ assert_return(track, -EINVAL);
+
+ if (track->recursive == !!b)
+ return 0;
+
+ if (!hashmap_isempty(track->names))
+ return -EBUSY;
+
+ track->recursive = b;
+ return 0;
+}
+
+_public_ int sd_bus_track_get_recursive(sd_bus_track *track) {
+ assert_return(track, -EINVAL);
+
+ return track->recursive;
+}
+
+_public_ int sd_bus_track_count_sender(sd_bus_track *track, sd_bus_message *m) {
+ const char *sender;
+
+ assert_return(m, -EINVAL);
+
+ if (!track) /* Let's consider a NULL object equivalent to an empty object */
+ return 0;
+
+ if (sd_bus_message_get_bus(m) != track->bus)
+ return -EINVAL;
+
+ sender = sd_bus_message_get_sender(m);
+ if (!sender)
+ return -EINVAL;
+
+ return sd_bus_track_count_name(track, sender);
+}
+
+_public_ int sd_bus_track_count_name(sd_bus_track *track, const char *name) {
+ struct track_item *i;
+
+ assert_return(service_name_is_valid(name), -EINVAL);
+
+ if (!track) /* Let's consider a NULL object equivalent to an empty object */
+ return 0;
+
+ i = hashmap_get(track->names, name);
+ if (!i)
+ return 0;
+
+ return i->n_ref;
+}
diff --git a/src/libsystemd/sd-bus/bus-track.h b/src/libsystemd/sd-bus/bus-track.h
new file mode 100644
index 0000000..209b989
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-track.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+void bus_track_dispatch(sd_bus_track *track);
+void bus_track_close(sd_bus_track *track);
diff --git a/src/libsystemd/sd-bus/bus-type.c b/src/libsystemd/sd-bus/bus-type.c
new file mode 100644
index 0000000..18564a5
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-type.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "sd-bus.h"
+
+#include "bus-type.h"
+
+bool bus_type_is_valid(char c) {
+ static const char valid[] = {
+ SD_BUS_TYPE_BYTE,
+ SD_BUS_TYPE_BOOLEAN,
+ SD_BUS_TYPE_INT16,
+ SD_BUS_TYPE_UINT16,
+ SD_BUS_TYPE_INT32,
+ SD_BUS_TYPE_UINT32,
+ SD_BUS_TYPE_INT64,
+ SD_BUS_TYPE_UINT64,
+ SD_BUS_TYPE_DOUBLE,
+ SD_BUS_TYPE_STRING,
+ SD_BUS_TYPE_OBJECT_PATH,
+ SD_BUS_TYPE_SIGNATURE,
+ SD_BUS_TYPE_ARRAY,
+ SD_BUS_TYPE_VARIANT,
+ SD_BUS_TYPE_STRUCT,
+ SD_BUS_TYPE_DICT_ENTRY,
+ SD_BUS_TYPE_UNIX_FD
+ };
+
+ return !!memchr(valid, c, sizeof(valid));
+}
+
+bool bus_type_is_basic(char c) {
+ static const char valid[] = {
+ SD_BUS_TYPE_BYTE,
+ SD_BUS_TYPE_BOOLEAN,
+ SD_BUS_TYPE_INT16,
+ SD_BUS_TYPE_UINT16,
+ SD_BUS_TYPE_INT32,
+ SD_BUS_TYPE_UINT32,
+ SD_BUS_TYPE_INT64,
+ SD_BUS_TYPE_UINT64,
+ SD_BUS_TYPE_DOUBLE,
+ SD_BUS_TYPE_STRING,
+ SD_BUS_TYPE_OBJECT_PATH,
+ SD_BUS_TYPE_SIGNATURE,
+ SD_BUS_TYPE_UNIX_FD
+ };
+
+ return !!memchr(valid, c, sizeof(valid));
+}
+
+bool bus_type_is_trivial(char c) {
+ static const char valid[] = {
+ SD_BUS_TYPE_BYTE,
+ SD_BUS_TYPE_BOOLEAN,
+ SD_BUS_TYPE_INT16,
+ SD_BUS_TYPE_UINT16,
+ SD_BUS_TYPE_INT32,
+ SD_BUS_TYPE_UINT32,
+ SD_BUS_TYPE_INT64,
+ SD_BUS_TYPE_UINT64,
+ SD_BUS_TYPE_DOUBLE
+ };
+
+ return !!memchr(valid, c, sizeof(valid));
+}
+
+bool bus_type_is_container(char c) {
+ static const char valid[] = {
+ SD_BUS_TYPE_ARRAY,
+ SD_BUS_TYPE_VARIANT,
+ SD_BUS_TYPE_STRUCT,
+ SD_BUS_TYPE_DICT_ENTRY
+ };
+
+ return !!memchr(valid, c, sizeof(valid));
+}
+
+int bus_type_get_alignment(char c) {
+
+ switch (c) {
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_VARIANT:
+ return 1;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ return 2;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_ARRAY:
+ case SD_BUS_TYPE_UNIX_FD:
+ return 4;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ case SD_BUS_TYPE_STRUCT:
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN:
+ return 8;
+ }
+
+ return -EINVAL;
+}
+
+int bus_type_get_size(char c) {
+
+ switch (c) {
+ case SD_BUS_TYPE_BYTE:
+ return 1;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ return 2;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_UNIX_FD:
+ return 4;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ return 8;
+ }
+
+ return -EINVAL;
+}
diff --git a/src/libsystemd/sd-bus/bus-type.h b/src/libsystemd/sd-bus/bus-type.h
new file mode 100644
index 0000000..0ecd851
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-type.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+bool bus_type_is_valid(char c) _const_;
+bool bus_type_is_basic(char c) _const_;
+/* "trivial" is systemd's term for what the D-Bus Specification calls
+ * a "fixed type": that is, a basic type of fixed length */
+bool bus_type_is_trivial(char c) _const_;
+bool bus_type_is_container(char c) _const_;
+
+int bus_type_get_alignment(char c) _const_;
+int bus_type_get_size(char c) _const_;
diff --git a/src/libsystemd/sd-bus/sd-bus.c b/src/libsystemd/sd-bus/sd-bus.c
new file mode 100644
index 0000000..1ff858f
--- /dev/null
+++ b/src/libsystemd/sd-bus/sd-bus.c
@@ -0,0 +1,4146 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <endian.h>
+#include <netdb.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-container.h"
+#include "bus-control.h"
+#include "bus-internal.h"
+#include "bus-kernel.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "bus-objects.h"
+#include "bus-protocol.h"
+#include "bus-slot.h"
+#include "bus-socket.h"
+#include "bus-track.h"
+#include "bus-type.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+#define log_debug_bus_message(m) \
+ do { \
+ sd_bus_message *_mm = (m); \
+ log_debug("Got message type=%s sender=%s destination=%s path=%s interface=%s member=%s cookie=%" PRIu64 " reply_cookie=%" PRIu64 " signature=%s error-name=%s error-message=%s", \
+ bus_message_type_to_string(_mm->header->type), \
+ strna(sd_bus_message_get_sender(_mm)), \
+ strna(sd_bus_message_get_destination(_mm)), \
+ strna(sd_bus_message_get_path(_mm)), \
+ strna(sd_bus_message_get_interface(_mm)), \
+ strna(sd_bus_message_get_member(_mm)), \
+ BUS_MESSAGE_COOKIE(_mm), \
+ _mm->reply_cookie, \
+ strna(_mm->root_container.signature), \
+ strna(_mm->error.name), \
+ strna(_mm->error.message)); \
+ } while (false)
+
+static int bus_poll(sd_bus *bus, bool need_more, uint64_t timeout_usec);
+static void bus_detach_io_events(sd_bus *b);
+static void bus_detach_inotify_event(sd_bus *b);
+
+static thread_local sd_bus *default_system_bus = NULL;
+static thread_local sd_bus *default_user_bus = NULL;
+static thread_local sd_bus *default_starter_bus = NULL;
+
+static sd_bus **bus_choose_default(int (**bus_open)(sd_bus **)) {
+ const char *e;
+
+ /* Let's try our best to reuse another cached connection. If
+ * the starter bus type is set, connect via our normal
+ * connection logic, ignoring $DBUS_STARTER_ADDRESS, so that
+ * we can share the connection with the user/system default
+ * bus. */
+
+ e = secure_getenv("DBUS_STARTER_BUS_TYPE");
+ if (e) {
+ if (streq(e, "system")) {
+ if (bus_open)
+ *bus_open = sd_bus_open_system;
+ return &default_system_bus;
+ } else if (STR_IN_SET(e, "user", "session")) {
+ if (bus_open)
+ *bus_open = sd_bus_open_user;
+ return &default_user_bus;
+ }
+ }
+
+ /* No type is specified, so we have not other option than to
+ * use the starter address if it is set. */
+ e = secure_getenv("DBUS_STARTER_ADDRESS");
+ if (e) {
+ if (bus_open)
+ *bus_open = sd_bus_open;
+ return &default_starter_bus;
+ }
+
+ /* Finally, if nothing is set use the cached connection for
+ * the right scope */
+
+ if (cg_pid_get_owner_uid(0, NULL) >= 0) {
+ if (bus_open)
+ *bus_open = sd_bus_open_user;
+ return &default_user_bus;
+ } else {
+ if (bus_open)
+ *bus_open = sd_bus_open_system;
+ return &default_system_bus;
+ }
+}
+
+sd_bus *bus_resolve(sd_bus *bus) {
+ switch ((uintptr_t) bus) {
+ case (uintptr_t) SD_BUS_DEFAULT:
+ return *(bus_choose_default(NULL));
+ case (uintptr_t) SD_BUS_DEFAULT_USER:
+ return default_user_bus;
+ case (uintptr_t) SD_BUS_DEFAULT_SYSTEM:
+ return default_system_bus;
+ default:
+ return bus;
+ }
+}
+
+void bus_close_io_fds(sd_bus *b) {
+ assert(b);
+
+ bus_detach_io_events(b);
+
+ if (b->input_fd != b->output_fd)
+ safe_close(b->output_fd);
+ b->output_fd = b->input_fd = safe_close(b->input_fd);
+}
+
+void bus_close_inotify_fd(sd_bus *b) {
+ assert(b);
+
+ bus_detach_inotify_event(b);
+
+ b->inotify_fd = safe_close(b->inotify_fd);
+ b->inotify_watches = mfree(b->inotify_watches);
+ b->n_inotify_watches = 0;
+}
+
+static void bus_reset_queues(sd_bus *b) {
+ assert(b);
+
+ while (b->rqueue_size > 0)
+ sd_bus_message_unref(b->rqueue[--b->rqueue_size]);
+
+ b->rqueue = mfree(b->rqueue);
+ b->rqueue_allocated = 0;
+
+ while (b->wqueue_size > 0)
+ sd_bus_message_unref(b->wqueue[--b->wqueue_size]);
+
+ b->wqueue = mfree(b->wqueue);
+ b->wqueue_allocated = 0;
+}
+
+static sd_bus* bus_free(sd_bus *b) {
+ sd_bus_slot *s;
+
+ assert(b);
+ assert(!b->track_queue);
+ assert(!b->tracks);
+
+ b->state = BUS_CLOSED;
+
+ sd_bus_detach_event(b);
+
+ while ((s = b->slots)) {
+ /* At this point only floating slots can still be
+ * around, because the non-floating ones keep a
+ * reference to the bus, and we thus couldn't be
+ * destructing right now... We forcibly disconnect the
+ * slots here, so that they still can be referenced by
+ * apps, but are dead. */
+
+ assert(s->floating);
+ bus_slot_disconnect(s, true);
+ }
+
+ if (b->default_bus_ptr)
+ *b->default_bus_ptr = NULL;
+
+ bus_close_io_fds(b);
+ bus_close_inotify_fd(b);
+
+ free(b->label);
+ free(b->groups);
+ free(b->rbuffer);
+ free(b->unique_name);
+ free(b->auth_buffer);
+ free(b->address);
+ free(b->machine);
+ free(b->description);
+ free(b->patch_sender);
+
+ free(b->exec_path);
+ strv_free(b->exec_argv);
+
+ close_many(b->fds, b->n_fds);
+ free(b->fds);
+
+ bus_reset_queues(b);
+
+ ordered_hashmap_free_free(b->reply_callbacks);
+ prioq_free(b->reply_callbacks_prioq);
+
+ assert(b->match_callbacks.type == BUS_MATCH_ROOT);
+ bus_match_free(&b->match_callbacks);
+
+ hashmap_free_free(b->vtable_methods);
+ hashmap_free_free(b->vtable_properties);
+
+ assert(hashmap_isempty(b->nodes));
+ hashmap_free(b->nodes);
+
+ bus_flush_memfd(b);
+
+ assert_se(pthread_mutex_destroy(&b->memfd_cache_mutex) == 0);
+
+ return mfree(b);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(sd_bus*, bus_free);
+
+_public_ int sd_bus_new(sd_bus **ret) {
+ _cleanup_free_ sd_bus *b = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ b = new(sd_bus, 1);
+ if (!b)
+ return -ENOMEM;
+
+ *b = (sd_bus) {
+ .n_ref = REFCNT_INIT,
+ .input_fd = -1,
+ .output_fd = -1,
+ .inotify_fd = -1,
+ .message_version = 1,
+ .creds_mask = SD_BUS_CREDS_WELL_KNOWN_NAMES|SD_BUS_CREDS_UNIQUE_NAME,
+ .accept_fd = true,
+ .original_pid = getpid_cached(),
+ .n_groups = (size_t) -1,
+ .close_on_exit = true,
+ };
+
+ assert_se(pthread_mutex_init(&b->memfd_cache_mutex, NULL) == 0);
+
+ /* We guarantee that wqueue always has space for at least one entry */
+ if (!GREEDY_REALLOC(b->wqueue, b->wqueue_allocated, 1))
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_set_address(sd_bus *bus, const char *address) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(address, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return free_and_strdup(&bus->address, address);
+}
+
+_public_ int sd_bus_set_fd(sd_bus *bus, int input_fd, int output_fd) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(input_fd >= 0, -EBADF);
+ assert_return(output_fd >= 0, -EBADF);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->input_fd = input_fd;
+ bus->output_fd = output_fd;
+ return 0;
+}
+
+_public_ int sd_bus_set_exec(sd_bus *bus, const char *path, char *const argv[]) {
+ _cleanup_strv_free_ char **a = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(path, -EINVAL);
+ assert_return(!strv_isempty(argv), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ a = strv_copy(argv);
+ if (!a)
+ return -ENOMEM;
+
+ r = free_and_strdup(&bus->exec_path, path);
+ if (r < 0)
+ return r;
+
+ return strv_free_and_replace(bus->exec_argv, a);
+}
+
+_public_ int sd_bus_set_bus_client(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus->patch_sender, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->bus_client = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_set_monitor(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->is_monitor = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_negotiate_fds(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->accept_fd = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_negotiate_timestamp(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!IN_SET(bus->state, BUS_CLOSING, BUS_CLOSED), -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ /* This is not actually supported by any of our transports these days, but we do honour it for synthetic
+ * replies, and maybe one day classic D-Bus learns this too */
+ bus->attach_timestamp = !!b;
+
+ return 0;
+}
+
+_public_ int sd_bus_negotiate_creds(sd_bus *bus, int b, uint64_t mask) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(mask <= _SD_BUS_CREDS_ALL, -EINVAL);
+ assert_return(!IN_SET(bus->state, BUS_CLOSING, BUS_CLOSED), -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ SET_FLAG(bus->creds_mask, mask, b);
+
+ /* The well knowns we need unconditionally, so that matches can work */
+ bus->creds_mask |= SD_BUS_CREDS_WELL_KNOWN_NAMES|SD_BUS_CREDS_UNIQUE_NAME;
+
+ return 0;
+}
+
+_public_ int sd_bus_set_server(sd_bus *bus, int b, sd_id128_t server_id) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(b || sd_id128_equal(server_id, SD_ID128_NULL), -EINVAL);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->is_server = !!b;
+ bus->server_id = server_id;
+ return 0;
+}
+
+_public_ int sd_bus_set_anonymous(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->anonymous_auth = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_set_trusted(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->trusted = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_set_description(sd_bus *bus, const char *description) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return free_and_strdup(&bus->description, description);
+}
+
+_public_ int sd_bus_set_allow_interactive_authorization(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->allow_interactive_authorization = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_get_allow_interactive_authorization(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->allow_interactive_authorization;
+}
+
+_public_ int sd_bus_set_watch_bind(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->watch_bind = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_get_watch_bind(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->watch_bind;
+}
+
+_public_ int sd_bus_set_connected_signal(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->connected_signal = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_get_connected_signal(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->connected_signal;
+}
+
+static int synthesize_connected_signal(sd_bus *bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+
+ /* If enabled, synthesizes a local "Connected" signal mirroring the local "Disconnected" signal. This is called
+ * whenever we fully established a connection, i.e. after the authorization phase, and after receiving the
+ * Hello() reply. Or in other words, whenver we enter BUS_RUNNING state.
+ *
+ * This is useful so that clients can start doing stuff whenver the connection is fully established in a way
+ * that works independently from whether we connected to a full bus or just a direct connection. */
+
+ if (!bus->connected_signal)
+ return 0;
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/DBus/Local",
+ "org.freedesktop.DBus.Local",
+ "Connected");
+ if (r < 0)
+ return r;
+
+ bus_message_set_sender_local(bus, m);
+
+ r = bus_seal_synthetic_message(bus, m);
+ if (r < 0)
+ return r;
+
+ r = bus_rqueue_make_room(bus);
+ if (r < 0)
+ return r;
+
+ /* Insert at the very front */
+ memmove(bus->rqueue + 1, bus->rqueue, sizeof(sd_bus_message*) * bus->rqueue_size);
+ bus->rqueue[0] = TAKE_PTR(m);
+ bus->rqueue_size++;
+
+ return 0;
+}
+
+void bus_set_state(sd_bus *bus, enum bus_state state) {
+
+ static const char * const table[_BUS_STATE_MAX] = {
+ [BUS_UNSET] = "UNSET",
+ [BUS_WATCH_BIND] = "WATCH_BIND",
+ [BUS_OPENING] = "OPENING",
+ [BUS_AUTHENTICATING] = "AUTHENTICATING",
+ [BUS_HELLO] = "HELLO",
+ [BUS_RUNNING] = "RUNNING",
+ [BUS_CLOSING] = "CLOSING",
+ [BUS_CLOSED] = "CLOSED",
+ };
+
+ assert(bus);
+ assert(state < _BUS_STATE_MAX);
+
+ if (state == bus->state)
+ return;
+
+ log_debug("Bus %s: changing state %s → %s", strna(bus->description), table[bus->state], table[state]);
+ bus->state = state;
+}
+
+static int hello_callback(sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ const char *s;
+ sd_bus *bus;
+ int r;
+
+ assert(reply);
+ bus = reply->bus;
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_HELLO, BUS_CLOSING));
+
+ r = sd_bus_message_get_errno(reply);
+ if (r > 0)
+ return -r;
+
+ r = sd_bus_message_read(reply, "s", &s);
+ if (r < 0)
+ return r;
+
+ if (!service_name_is_valid(s) || s[0] != ':')
+ return -EBADMSG;
+
+ r = free_and_strdup(&bus->unique_name, s);
+ if (r < 0)
+ return r;
+
+ if (bus->state == BUS_HELLO) {
+ bus_set_state(bus, BUS_RUNNING);
+
+ r = synthesize_connected_signal(bus);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+static int bus_send_hello(sd_bus *bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+
+ if (!bus->bus_client)
+ return 0;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "Hello");
+ if (r < 0)
+ return r;
+
+ return sd_bus_call_async(bus, NULL, m, hello_callback, NULL, 0);
+}
+
+int bus_start_running(sd_bus *bus) {
+ struct reply_callback *c;
+ Iterator i;
+ usec_t n;
+ int r;
+
+ assert(bus);
+ assert(bus->state < BUS_HELLO);
+
+ /* We start all method call timeouts when we enter BUS_HELLO or BUS_RUNNING mode. At this point let's convert
+ * all relative to absolute timestamps. Note that we do not reshuffle the reply callback priority queue since
+ * adding a fixed value to all entries should not alter the internal order. */
+
+ n = now(CLOCK_MONOTONIC);
+ ORDERED_HASHMAP_FOREACH(c, bus->reply_callbacks, i) {
+ if (c->timeout_usec == 0)
+ continue;
+
+ c->timeout_usec = usec_add(n, c->timeout_usec);
+ }
+
+ if (bus->bus_client) {
+ bus_set_state(bus, BUS_HELLO);
+ return 1;
+ }
+
+ bus_set_state(bus, BUS_RUNNING);
+
+ r = synthesize_connected_signal(bus);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int parse_address_key(const char **p, const char *key, char **value) {
+ size_t l, n = 0, allocated = 0;
+ _cleanup_free_ char *r = NULL;
+ const char *a;
+
+ assert(p);
+ assert(*p);
+ assert(value);
+
+ if (key) {
+ l = strlen(key);
+ if (strncmp(*p, key, l) != 0)
+ return 0;
+
+ if ((*p)[l] != '=')
+ return 0;
+
+ if (*value)
+ return -EINVAL;
+
+ a = *p + l + 1;
+ } else
+ a = *p;
+
+ while (!IN_SET(*a, ';', ',', 0)) {
+ char c;
+
+ if (*a == '%') {
+ int x, y;
+
+ x = unhexchar(a[1]);
+ if (x < 0)
+ return x;
+
+ y = unhexchar(a[2]);
+ if (y < 0)
+ return y;
+
+ c = (char) ((x << 4) | y);
+ a += 3;
+ } else {
+ c = *a;
+ a++;
+ }
+
+ if (!GREEDY_REALLOC(r, allocated, n + 2))
+ return -ENOMEM;
+
+ r[n++] = c;
+ }
+
+ if (!r) {
+ r = strdup("");
+ if (!r)
+ return -ENOMEM;
+ } else
+ r[n] = 0;
+
+ if (*a == ',')
+ a++;
+
+ *p = a;
+
+ free_and_replace(*value, r);
+
+ return 1;
+}
+
+static void skip_address_key(const char **p) {
+ assert(p);
+ assert(*p);
+
+ *p += strcspn(*p, ",");
+
+ if (**p == ',')
+ (*p)++;
+}
+
+static int parse_unix_address(sd_bus *b, const char **p, char **guid) {
+ _cleanup_free_ char *path = NULL, *abstract = NULL;
+ size_t l;
+ int r;
+
+ assert(b);
+ assert(p);
+ assert(*p);
+ assert(guid);
+
+ while (!IN_SET(**p, 0, ';')) {
+ r = parse_address_key(p, "guid", guid);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "path", &path);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "abstract", &abstract);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ skip_address_key(p);
+ }
+
+ if (!path && !abstract)
+ return -EINVAL;
+
+ if (path && abstract)
+ return -EINVAL;
+
+ if (path) {
+ l = strlen(path);
+ if (l >= sizeof(b->sockaddr.un.sun_path)) /* We insist on NUL termination */
+ return -E2BIG;
+
+ b->sockaddr.un = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ };
+
+ memcpy(b->sockaddr.un.sun_path, path, l);
+ b->sockaddr_size = offsetof(struct sockaddr_un, sun_path) + l + 1;
+
+ } else {
+ assert(abstract);
+
+ l = strlen(abstract);
+ if (l >= sizeof(b->sockaddr.un.sun_path) - 1) /* We insist on NUL termination */
+ return -E2BIG;
+
+ b->sockaddr.un = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ };
+
+ memcpy(b->sockaddr.un.sun_path+1, abstract, l);
+ b->sockaddr_size = offsetof(struct sockaddr_un, sun_path) + 1 + l;
+ }
+
+ b->is_local = true;
+
+ return 0;
+}
+
+static int parse_tcp_address(sd_bus *b, const char **p, char **guid) {
+ _cleanup_free_ char *host = NULL, *port = NULL, *family = NULL;
+ int r;
+ struct addrinfo *result, hints = {
+ .ai_socktype = SOCK_STREAM,
+ .ai_flags = AI_ADDRCONFIG,
+ };
+
+ assert(b);
+ assert(p);
+ assert(*p);
+ assert(guid);
+
+ while (!IN_SET(**p, 0, ';')) {
+ r = parse_address_key(p, "guid", guid);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "host", &host);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "port", &port);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "family", &family);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ skip_address_key(p);
+ }
+
+ if (!host || !port)
+ return -EINVAL;
+
+ if (family) {
+ if (streq(family, "ipv4"))
+ hints.ai_family = AF_INET;
+ else if (streq(family, "ipv6"))
+ hints.ai_family = AF_INET6;
+ else
+ return -EINVAL;
+ }
+
+ r = getaddrinfo(host, port, &hints, &result);
+ if (r == EAI_SYSTEM)
+ return -errno;
+ else if (r != 0)
+ return -EADDRNOTAVAIL;
+
+ memcpy(&b->sockaddr, result->ai_addr, result->ai_addrlen);
+ b->sockaddr_size = result->ai_addrlen;
+
+ freeaddrinfo(result);
+
+ b->is_local = false;
+
+ return 0;
+}
+
+static int parse_exec_address(sd_bus *b, const char **p, char **guid) {
+ char *path = NULL;
+ unsigned n_argv = 0, j;
+ char **argv = NULL;
+ size_t allocated = 0;
+ int r;
+
+ assert(b);
+ assert(p);
+ assert(*p);
+ assert(guid);
+
+ while (!IN_SET(**p, 0, ';')) {
+ r = parse_address_key(p, "guid", guid);
+ if (r < 0)
+ goto fail;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "path", &path);
+ if (r < 0)
+ goto fail;
+ else if (r > 0)
+ continue;
+
+ if (startswith(*p, "argv")) {
+ unsigned ul;
+
+ errno = 0;
+ ul = strtoul(*p + 4, (char**) p, 10);
+ if (errno > 0 || **p != '=' || ul > 256) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ (*p)++;
+
+ if (ul >= n_argv) {
+ if (!GREEDY_REALLOC0(argv, allocated, ul + 2)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n_argv = ul + 1;
+ }
+
+ r = parse_address_key(p, NULL, argv + ul);
+ if (r < 0)
+ goto fail;
+
+ continue;
+ }
+
+ skip_address_key(p);
+ }
+
+ if (!path) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ /* Make sure there are no holes in the array, with the
+ * exception of argv[0] */
+ for (j = 1; j < n_argv; j++)
+ if (!argv[j]) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if (argv && argv[0] == NULL) {
+ argv[0] = strdup(path);
+ if (!argv[0]) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ b->exec_path = path;
+ b->exec_argv = argv;
+
+ b->is_local = false;
+
+ return 0;
+
+fail:
+ for (j = 0; j < n_argv; j++)
+ free(argv[j]);
+
+ free(argv);
+ free(path);
+ return r;
+}
+
+static int parse_container_unix_address(sd_bus *b, const char **p, char **guid) {
+ _cleanup_free_ char *machine = NULL, *pid = NULL;
+ int r;
+
+ assert(b);
+ assert(p);
+ assert(*p);
+ assert(guid);
+
+ while (!IN_SET(**p, 0, ';')) {
+ r = parse_address_key(p, "guid", guid);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "machine", &machine);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "pid", &pid);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ skip_address_key(p);
+ }
+
+ if (!machine == !pid)
+ return -EINVAL;
+
+ if (machine) {
+ if (!streq(machine, ".host") && !machine_name_is_valid(machine))
+ return -EINVAL;
+
+ free_and_replace(b->machine, machine);
+ } else {
+ b->machine = mfree(b->machine);
+ }
+
+ if (pid) {
+ r = parse_pid(pid, &b->nspid);
+ if (r < 0)
+ return r;
+ } else
+ b->nspid = 0;
+
+ b->sockaddr.un = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ /* Note that we use the old /var/run prefix here, to increase compatibility with really old containers */
+ .sun_path = "/var/run/dbus/system_bus_socket",
+ };
+ b->sockaddr_size = SOCKADDR_UN_LEN(b->sockaddr.un);
+ b->is_local = false;
+
+ return 0;
+}
+
+static void bus_reset_parsed_address(sd_bus *b) {
+ assert(b);
+
+ zero(b->sockaddr);
+ b->sockaddr_size = 0;
+ b->exec_argv = strv_free(b->exec_argv);
+ b->exec_path = mfree(b->exec_path);
+ b->server_id = SD_ID128_NULL;
+ b->machine = mfree(b->machine);
+ b->nspid = 0;
+}
+
+static int bus_parse_next_address(sd_bus *b) {
+ _cleanup_free_ char *guid = NULL;
+ const char *a;
+ int r;
+
+ assert(b);
+
+ if (!b->address)
+ return 0;
+ if (b->address[b->address_index] == 0)
+ return 0;
+
+ bus_reset_parsed_address(b);
+
+ a = b->address + b->address_index;
+
+ while (*a != 0) {
+
+ if (*a == ';') {
+ a++;
+ continue;
+ }
+
+ if (startswith(a, "unix:")) {
+ a += 5;
+
+ r = parse_unix_address(b, &a, &guid);
+ if (r < 0)
+ return r;
+ break;
+
+ } else if (startswith(a, "tcp:")) {
+
+ a += 4;
+ r = parse_tcp_address(b, &a, &guid);
+ if (r < 0)
+ return r;
+
+ break;
+
+ } else if (startswith(a, "unixexec:")) {
+
+ a += 9;
+ r = parse_exec_address(b, &a, &guid);
+ if (r < 0)
+ return r;
+
+ break;
+
+ } else if (startswith(a, "x-machine-unix:")) {
+
+ a += 15;
+ r = parse_container_unix_address(b, &a, &guid);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ a = strchr(a, ';');
+ if (!a)
+ return 0;
+ }
+
+ if (guid) {
+ r = sd_id128_from_string(guid, &b->server_id);
+ if (r < 0)
+ return r;
+ }
+
+ b->address_index = a - b->address;
+ return 1;
+}
+
+static void bus_kill_exec(sd_bus *bus) {
+ if (pid_is_valid(bus->busexec_pid) > 0) {
+ sigterm_wait(bus->busexec_pid);
+ bus->busexec_pid = 0;
+ }
+}
+
+static int bus_start_address(sd_bus *b) {
+ int r;
+
+ assert(b);
+
+ for (;;) {
+ bus_close_io_fds(b);
+ bus_close_inotify_fd(b);
+
+ bus_kill_exec(b);
+
+ /* If you provide multiple different bus-addresses, we
+ * try all of them in order and use the first one that
+ * succeeds. */
+
+ if (b->exec_path)
+ r = bus_socket_exec(b);
+ else if ((b->nspid > 0 || b->machine) && b->sockaddr.sa.sa_family != AF_UNSPEC)
+ r = bus_container_connect_socket(b);
+ else if (b->sockaddr.sa.sa_family != AF_UNSPEC)
+ r = bus_socket_connect(b);
+ else
+ goto next;
+
+ if (r >= 0) {
+ int q;
+
+ q = bus_attach_io_events(b);
+ if (q < 0)
+ return q;
+
+ q = bus_attach_inotify_event(b);
+ if (q < 0)
+ return q;
+
+ return r;
+ }
+
+ b->last_connect_error = -r;
+
+ next:
+ r = bus_parse_next_address(b);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return b->last_connect_error > 0 ? -b->last_connect_error : -ECONNREFUSED;
+ }
+}
+
+int bus_next_address(sd_bus *b) {
+ assert(b);
+
+ bus_reset_parsed_address(b);
+ return bus_start_address(b);
+}
+
+static int bus_start_fd(sd_bus *b) {
+ struct stat st;
+ int r;
+
+ assert(b);
+ assert(b->input_fd >= 0);
+ assert(b->output_fd >= 0);
+
+ r = fd_nonblock(b->input_fd, true);
+ if (r < 0)
+ return r;
+
+ r = fd_cloexec(b->input_fd, true);
+ if (r < 0)
+ return r;
+
+ if (b->input_fd != b->output_fd) {
+ r = fd_nonblock(b->output_fd, true);
+ if (r < 0)
+ return r;
+
+ r = fd_cloexec(b->output_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (fstat(b->input_fd, &st) < 0)
+ return -errno;
+
+ return bus_socket_take_fd(b);
+}
+
+_public_ int sd_bus_start(sd_bus *bus) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus_set_state(bus, BUS_OPENING);
+
+ if (bus->is_server && bus->bus_client)
+ return -EINVAL;
+
+ if (bus->input_fd >= 0)
+ r = bus_start_fd(bus);
+ else if (bus->address || bus->sockaddr.sa.sa_family != AF_UNSPEC || bus->exec_path || bus->machine)
+ r = bus_start_address(bus);
+ else
+ return -EINVAL;
+
+ if (r < 0) {
+ sd_bus_close(bus);
+ return r;
+ }
+
+ return bus_send_hello(bus);
+}
+
+_public_ int sd_bus_open_with_description(sd_bus **ret, const char *description) {
+ const char *e;
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ /* Let's connect to the starter bus if it is set, and
+ * otherwise to the bus that is appropropriate for the scope
+ * we are running in */
+
+ e = secure_getenv("DBUS_STARTER_BUS_TYPE");
+ if (e) {
+ if (streq(e, "system"))
+ return sd_bus_open_system_with_description(ret, description);
+ else if (STR_IN_SET(e, "session", "user"))
+ return sd_bus_open_user_with_description(ret, description);
+ }
+
+ e = secure_getenv("DBUS_STARTER_ADDRESS");
+ if (!e) {
+ if (cg_pid_get_owner_uid(0, NULL) >= 0)
+ return sd_bus_open_user_with_description(ret, description);
+ else
+ return sd_bus_open_system_with_description(ret, description);
+ }
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_address(b, e);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+
+ /* We don't know whether the bus is trusted or not, so better
+ * be safe, and authenticate everything */
+ b->trusted = false;
+ b->is_local = false;
+ b->creds_mask |= SD_BUS_CREDS_UID | SD_BUS_CREDS_EUID | SD_BUS_CREDS_EFFECTIVE_CAPS;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_open(sd_bus **ret) {
+ return sd_bus_open_with_description(ret, NULL);
+}
+
+int bus_set_address_system(sd_bus *b) {
+ const char *e;
+ assert(b);
+
+ e = secure_getenv("DBUS_SYSTEM_BUS_ADDRESS");
+ if (e)
+ return sd_bus_set_address(b, e);
+
+ return sd_bus_set_address(b, DEFAULT_SYSTEM_BUS_ADDRESS);
+}
+
+_public_ int sd_bus_open_system_with_description(sd_bus **ret, const char *description) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ if (description) {
+ r = sd_bus_set_description(b, description);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_set_address_system(b);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+ b->is_system = true;
+
+ /* Let's do per-method access control on the system bus. We
+ * need the caller's UID and capability set for that. */
+ b->trusted = false;
+ b->creds_mask |= SD_BUS_CREDS_UID | SD_BUS_CREDS_EUID | SD_BUS_CREDS_EFFECTIVE_CAPS;
+ b->is_local = true;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_open_system(sd_bus **ret) {
+ return sd_bus_open_system_with_description(ret, NULL);
+}
+
+int bus_set_address_user(sd_bus *b) {
+ const char *e;
+ _cleanup_free_ char *ee = NULL, *s = NULL;
+
+ assert(b);
+
+ e = secure_getenv("DBUS_SESSION_BUS_ADDRESS");
+ if (e)
+ return sd_bus_set_address(b, e);
+
+ e = secure_getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return -ENOENT;
+
+ ee = bus_address_escape(e);
+ if (!ee)
+ return -ENOMEM;
+
+ if (asprintf(&s, DEFAULT_USER_BUS_ADDRESS_FMT, ee) < 0)
+ return -ENOMEM;
+
+ b->address = TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_bus_open_user_with_description(sd_bus **ret, const char *description) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ if (description) {
+ r = sd_bus_set_description(b, description);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_set_address_user(b);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+ b->is_user = true;
+
+ /* We don't do any per-method access control on the user
+ * bus. */
+ b->trusted = true;
+ b->is_local = true;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_open_user(sd_bus **ret) {
+ return sd_bus_open_user_with_description(ret, NULL);
+}
+
+int bus_set_address_system_remote(sd_bus *b, const char *host) {
+ _cleanup_free_ char *e = NULL;
+ char *m = NULL, *c = NULL, *a, *rbracket = NULL, *p = NULL;
+
+ assert(b);
+ assert(host);
+
+ /* Skip ":"s in ipv6 addresses */
+ if (*host == '[') {
+ char *t;
+
+ rbracket = strchr(host, ']');
+ if (!rbracket)
+ return -EINVAL;
+ t = strndupa(host + 1, rbracket - host - 1);
+ e = bus_address_escape(t);
+ if (!e)
+ return -ENOMEM;
+ } else if ((a = strchr(host, '@'))) {
+ if (*(a + 1) == '[') {
+ _cleanup_free_ char *t = NULL;
+
+ rbracket = strchr(a + 1, ']');
+ if (!rbracket)
+ return -EINVAL;
+ t = new0(char, strlen(host));
+ if (!t)
+ return -ENOMEM;
+ strncat(t, host, a - host + 1);
+ strncat(t, a + 2, rbracket - a - 2);
+ e = bus_address_escape(t);
+ if (!e)
+ return -ENOMEM;
+ } else if (*(a + 1) == '\0' || strchr(a + 1, '@'))
+ return -EINVAL;
+ }
+
+ /* Let's see if a port was given */
+ m = strchr(rbracket ? rbracket + 1 : host, ':');
+ if (m) {
+ char *t;
+ bool got_forward_slash = false;
+
+ p = m + 1;
+
+ t = strchr(p, '/');
+ if (t) {
+ p = strndupa(p, t - p);
+ got_forward_slash = true;
+ }
+
+ if (!in_charset(p, "0123456789") || *p == '\0') {
+ if (!machine_name_is_valid(p) || got_forward_slash)
+ return -EINVAL;
+
+ m = TAKE_PTR(p);
+ goto interpret_port_as_machine_old_syntax;
+ }
+ }
+
+ /* Let's see if a machine was given */
+ m = strchr(rbracket ? rbracket + 1 : host, '/');
+ if (m) {
+ m++;
+interpret_port_as_machine_old_syntax:
+ /* Let's make sure this is not a port of some kind,
+ * and is a valid machine name. */
+ if (!in_charset(m, "0123456789") && machine_name_is_valid(m))
+ c = strjoina(",argv", p ? "7" : "5", "=--machine=", m);
+ }
+
+ if (!e) {
+ char *t;
+
+ t = strndupa(host, strcspn(host, ":/"));
+
+ e = bus_address_escape(t);
+ if (!e)
+ return -ENOMEM;
+ }
+
+ a = strjoin("unixexec:path=ssh,argv1=-xT", p ? ",argv2=-p,argv3=" : "", strempty(p),
+ ",argv", p ? "4" : "2", "=--,argv", p ? "5" : "3", "=", e,
+ ",argv", p ? "6" : "4", "=systemd-stdio-bridge", c);
+ if (!a)
+ return -ENOMEM;
+
+ return free_and_replace(b->address, a);
+}
+
+_public_ int sd_bus_open_system_remote(sd_bus **ret, const char *host) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(host, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ r = bus_set_address_system_remote(b, host);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+ b->trusted = false;
+ b->is_system = true;
+ b->is_local = false;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+int bus_set_address_system_machine(sd_bus *b, const char *machine) {
+ _cleanup_free_ char *e = NULL;
+ char *a;
+
+ assert(b);
+ assert(machine);
+
+ e = bus_address_escape(machine);
+ if (!e)
+ return -ENOMEM;
+
+ a = strjoin("x-machine-unix:machine=", e);
+ if (!a)
+ return -ENOMEM;
+
+ return free_and_replace(b->address, a);
+}
+
+_public_ int sd_bus_open_system_machine(sd_bus **ret, const char *machine) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(machine, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(streq(machine, ".host") || machine_name_is_valid(machine), -EINVAL);
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ r = bus_set_address_system_machine(b, machine);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+ b->trusted = false;
+ b->is_system = true;
+ b->is_local = false;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ void sd_bus_close(sd_bus *bus) {
+ if (!bus)
+ return;
+ if (bus->state == BUS_CLOSED)
+ return;
+ if (bus_pid_changed(bus))
+ return;
+
+ /* Don't leave ssh hanging around */
+ bus_kill_exec(bus);
+
+ bus_set_state(bus, BUS_CLOSED);
+
+ sd_bus_detach_event(bus);
+
+ /* Drop all queued messages so that they drop references to
+ * the bus object and the bus may be freed */
+ bus_reset_queues(bus);
+
+ bus_close_io_fds(bus);
+ bus_close_inotify_fd(bus);
+}
+
+_public_ sd_bus *sd_bus_close_unref(sd_bus *bus) {
+ if (!bus)
+ return NULL;
+
+ sd_bus_close(bus);
+
+ return sd_bus_unref(bus);
+}
+
+_public_ sd_bus* sd_bus_flush_close_unref(sd_bus *bus) {
+ if (!bus)
+ return NULL;
+
+ /* Have to do this before flush() to prevent hang */
+ bus_kill_exec(bus);
+ sd_bus_flush(bus);
+
+ return sd_bus_close_unref(bus);
+}
+
+void bus_enter_closing(sd_bus *bus) {
+ assert(bus);
+
+ if (!IN_SET(bus->state, BUS_WATCH_BIND, BUS_OPENING, BUS_AUTHENTICATING, BUS_HELLO, BUS_RUNNING))
+ return;
+
+ bus_set_state(bus, BUS_CLOSING);
+}
+
+DEFINE_PUBLIC_ATOMIC_REF_UNREF_FUNC(sd_bus, sd_bus, bus_free);
+
+_public_ int sd_bus_is_open(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return BUS_IS_OPEN(bus->state);
+}
+
+_public_ int sd_bus_is_ready(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->state == BUS_RUNNING;
+}
+
+_public_ int sd_bus_can_send(sd_bus *bus, char type) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->is_monitor)
+ return 0;
+
+ if (type == SD_BUS_TYPE_UNIX_FD) {
+ if (!bus->accept_fd)
+ return 0;
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ return r;
+
+ return bus->can_fds;
+ }
+
+ return bus_type_is_valid(type);
+}
+
+_public_ int sd_bus_get_bus_id(sd_bus *bus, sd_id128_t *id) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(id, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ return r;
+
+ *id = bus->server_id;
+ return 0;
+}
+
+static int bus_seal_message(sd_bus *b, sd_bus_message *m, usec_t timeout) {
+ int r;
+
+ assert(b);
+ assert(m);
+
+ if (m->sealed) {
+ /* If we copy the same message to multiple
+ * destinations, avoid using the same cookie
+ * numbers. */
+ b->cookie = MAX(b->cookie, BUS_MESSAGE_COOKIE(m));
+ return 0;
+ }
+
+ if (timeout == 0) {
+ r = sd_bus_get_method_call_timeout(b, &timeout);
+ if (r < 0)
+ return r;
+ }
+
+ if (!m->sender && b->patch_sender) {
+ r = sd_bus_message_set_sender(m, b->patch_sender);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_seal(m, ++b->cookie, timeout);
+}
+
+static int bus_remarshal_message(sd_bus *b, sd_bus_message **m) {
+ bool remarshal = false;
+
+ assert(b);
+
+ /* wrong packet version */
+ if (b->message_version != 0 && b->message_version != (*m)->header->version)
+ remarshal = true;
+
+ /* wrong packet endianness */
+ if (b->message_endian != 0 && b->message_endian != (*m)->header->endian)
+ remarshal = true;
+
+ return remarshal ? bus_message_remarshal(b, m) : 0;
+}
+
+int bus_seal_synthetic_message(sd_bus *b, sd_bus_message *m) {
+ assert(b);
+ assert(m);
+
+ /* Fake some timestamps, if they were requested, and not
+ * already initialized */
+ if (b->attach_timestamp) {
+ if (m->realtime <= 0)
+ m->realtime = now(CLOCK_REALTIME);
+
+ if (m->monotonic <= 0)
+ m->monotonic = now(CLOCK_MONOTONIC);
+ }
+
+ /* The bus specification says the serial number cannot be 0,
+ * hence let's fill something in for synthetic messages. Since
+ * synthetic messages might have a fake sender and we don't
+ * want to interfere with the real sender's serial numbers we
+ * pick a fixed, artificial one. We use (uint32_t) -1 rather
+ * than (uint64_t) -1 since dbus1 only had 32bit identifiers,
+ * even though kdbus can do 64bit. */
+ return sd_bus_message_seal(m, 0xFFFFFFFFULL, 0);
+}
+
+static int bus_write_message(sd_bus *bus, sd_bus_message *m, size_t *idx) {
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ r = bus_socket_write_message(bus, m, idx);
+ if (r <= 0)
+ return r;
+
+ if (*idx >= BUS_MESSAGE_SIZE(m))
+ log_debug("Sent message type=%s sender=%s destination=%s path=%s interface=%s member=%s cookie=%" PRIu64 " reply_cookie=%" PRIu64 " signature=%s error-name=%s error-message=%s",
+ bus_message_type_to_string(m->header->type),
+ strna(sd_bus_message_get_sender(m)),
+ strna(sd_bus_message_get_destination(m)),
+ strna(sd_bus_message_get_path(m)),
+ strna(sd_bus_message_get_interface(m)),
+ strna(sd_bus_message_get_member(m)),
+ BUS_MESSAGE_COOKIE(m),
+ m->reply_cookie,
+ strna(m->root_container.signature),
+ strna(m->error.name),
+ strna(m->error.message));
+
+ return r;
+}
+
+static int dispatch_wqueue(sd_bus *bus) {
+ int r, ret = 0;
+
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ while (bus->wqueue_size > 0) {
+
+ r = bus_write_message(bus, bus->wqueue[0], &bus->windex);
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ /* Didn't do anything this time */
+ return ret;
+ else if (bus->windex >= BUS_MESSAGE_SIZE(bus->wqueue[0])) {
+ /* Fully written. Let's drop the entry from
+ * the queue.
+ *
+ * This isn't particularly optimized, but
+ * well, this is supposed to be our worst-case
+ * buffer only, and the socket buffer is
+ * supposed to be our primary buffer, and if
+ * it got full, then all bets are off
+ * anyway. */
+
+ bus->wqueue_size--;
+ sd_bus_message_unref(bus->wqueue[0]);
+ memmove(bus->wqueue, bus->wqueue + 1, sizeof(sd_bus_message*) * bus->wqueue_size);
+ bus->windex = 0;
+
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int bus_read_message(sd_bus *bus, bool hint_priority, int64_t priority) {
+ assert(bus);
+
+ return bus_socket_read_message(bus);
+}
+
+int bus_rqueue_make_room(sd_bus *bus) {
+ assert(bus);
+
+ if (bus->rqueue_size >= BUS_RQUEUE_MAX)
+ return -ENOBUFS;
+
+ if (!GREEDY_REALLOC(bus->rqueue, bus->rqueue_allocated, bus->rqueue_size + 1))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int dispatch_rqueue(sd_bus *bus, bool hint_priority, int64_t priority, sd_bus_message **m) {
+ int r, ret = 0;
+
+ assert(bus);
+ assert(m);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ /* Note that the priority logic is only available on kdbus,
+ * where the rqueue is unused. We check the rqueue here
+ * anyway, because it's simple... */
+
+ for (;;) {
+ if (bus->rqueue_size > 0) {
+ /* Dispatch a queued message */
+
+ *m = bus->rqueue[0];
+ bus->rqueue_size--;
+ memmove(bus->rqueue, bus->rqueue + 1, sizeof(sd_bus_message*) * bus->rqueue_size);
+ return 1;
+ }
+
+ /* Try to read a new message */
+ r = bus_read_message(bus, hint_priority, priority);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return ret;
+
+ ret = 1;
+ }
+}
+
+_public_ int sd_bus_send(sd_bus *bus, sd_bus_message *_m, uint64_t *cookie) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m);
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ if (!bus)
+ bus = m->bus;
+
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (m->n_fds > 0) {
+ r = sd_bus_can_send(bus, SD_BUS_TYPE_UNIX_FD);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EOPNOTSUPP;
+ }
+
+ /* If the cookie number isn't kept, then we know that no reply
+ * is expected */
+ if (!cookie && !m->sealed)
+ m->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+
+ r = bus_seal_message(bus, m, 0);
+ if (r < 0)
+ return r;
+
+ /* Remarshall if we have to. This will possibly unref the
+ * message and place a replacement in m */
+ r = bus_remarshal_message(bus, &m);
+ if (r < 0)
+ return r;
+
+ /* If this is a reply and no reply was requested, then let's
+ * suppress this, if we can */
+ if (m->dont_send)
+ goto finish;
+
+ if (IN_SET(bus->state, BUS_RUNNING, BUS_HELLO) && bus->wqueue_size <= 0) {
+ size_t idx = 0;
+
+ r = bus_write_message(bus, m, &idx);
+ if (r < 0) {
+ if (IN_SET(r, -ENOTCONN, -ECONNRESET, -EPIPE, -ESHUTDOWN)) {
+ bus_enter_closing(bus);
+ return -ECONNRESET;
+ }
+
+ return r;
+ }
+
+ if (idx < BUS_MESSAGE_SIZE(m)) {
+ /* Wasn't fully written. So let's remember how
+ * much was written. Note that the first entry
+ * of the wqueue array is always allocated so
+ * that we always can remember how much was
+ * written. */
+ bus->wqueue[0] = sd_bus_message_ref(m);
+ bus->wqueue_size = 1;
+ bus->windex = idx;
+ }
+
+ } else {
+ /* Just append it to the queue. */
+
+ if (bus->wqueue_size >= BUS_WQUEUE_MAX)
+ return -ENOBUFS;
+
+ if (!GREEDY_REALLOC(bus->wqueue, bus->wqueue_allocated, bus->wqueue_size + 1))
+ return -ENOMEM;
+
+ bus->wqueue[bus->wqueue_size++] = sd_bus_message_ref(m);
+ }
+
+finish:
+ if (cookie)
+ *cookie = BUS_MESSAGE_COOKIE(m);
+
+ return 1;
+}
+
+_public_ int sd_bus_send_to(sd_bus *bus, sd_bus_message *m, const char *destination, uint64_t *cookie) {
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ if (!bus)
+ bus = m->bus;
+
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (!streq_ptr(m->destination, destination)) {
+
+ if (!destination)
+ return -EEXIST;
+
+ r = sd_bus_message_set_destination(m, destination);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_send(bus, m, cookie);
+}
+
+static usec_t calc_elapse(sd_bus *bus, uint64_t usec) {
+ assert(bus);
+
+ if (usec == (uint64_t) -1)
+ return 0;
+
+ /* We start all timeouts the instant we enter BUS_HELLO/BUS_RUNNING state, so that the don't run in parallel
+ * with any connection setup states. Hence, if a method callback is started earlier than that we just store the
+ * relative timestamp, and afterwards the absolute one. */
+
+ if (IN_SET(bus->state, BUS_WATCH_BIND, BUS_OPENING, BUS_AUTHENTICATING))
+ return usec;
+ else
+ return now(CLOCK_MONOTONIC) + usec;
+}
+
+static int timeout_compare(const void *a, const void *b) {
+ const struct reply_callback *x = a, *y = b;
+
+ if (x->timeout_usec != 0 && y->timeout_usec == 0)
+ return -1;
+
+ if (x->timeout_usec == 0 && y->timeout_usec != 0)
+ return 1;
+
+ return CMP(x->timeout_usec, y->timeout_usec);
+}
+
+_public_ int sd_bus_call_async(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ sd_bus_message *_m,
+ sd_bus_message_handler_t callback,
+ void *userdata,
+ uint64_t usec) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m);
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *s = NULL;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(!m->sealed || (!!callback == !(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)), -EINVAL);
+
+ if (!bus)
+ bus = m->bus;
+
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* If no callback is specified and there's no interest in a slot, then there's no reason to ask for a reply */
+ if (!callback && !slot && !m->sealed)
+ m->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+
+ r = ordered_hashmap_ensure_allocated(&bus->reply_callbacks, &uint64_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = prioq_ensure_allocated(&bus->reply_callbacks_prioq, timeout_compare);
+ if (r < 0)
+ return r;
+
+ r = bus_seal_message(bus, m, usec);
+ if (r < 0)
+ return r;
+
+ r = bus_remarshal_message(bus, &m);
+ if (r < 0)
+ return r;
+
+ if (slot || callback) {
+ s = bus_slot_allocate(bus, !slot, BUS_REPLY_CALLBACK, sizeof(struct reply_callback), userdata);
+ if (!s)
+ return -ENOMEM;
+
+ s->reply_callback.callback = callback;
+
+ s->reply_callback.cookie = BUS_MESSAGE_COOKIE(m);
+ r = ordered_hashmap_put(bus->reply_callbacks, &s->reply_callback.cookie, &s->reply_callback);
+ if (r < 0) {
+ s->reply_callback.cookie = 0;
+ return r;
+ }
+
+ s->reply_callback.timeout_usec = calc_elapse(bus, m->timeout);
+ if (s->reply_callback.timeout_usec != 0) {
+ r = prioq_put(bus->reply_callbacks_prioq, &s->reply_callback, &s->reply_callback.prioq_idx);
+ if (r < 0) {
+ s->reply_callback.timeout_usec = 0;
+ return r;
+ }
+ }
+ }
+
+ r = sd_bus_send(bus, m, s ? &s->reply_callback.cookie : NULL);
+ if (r < 0)
+ return r;
+
+ if (slot)
+ *slot = s;
+ s = NULL;
+
+ return r;
+}
+
+int bus_ensure_running(sd_bus *bus) {
+ int r;
+
+ assert(bus);
+
+ if (IN_SET(bus->state, BUS_UNSET, BUS_CLOSED, BUS_CLOSING))
+ return -ENOTCONN;
+ if (bus->state == BUS_RUNNING)
+ return 1;
+
+ for (;;) {
+ r = sd_bus_process(bus, NULL);
+ if (r < 0)
+ return r;
+ if (bus->state == BUS_RUNNING)
+ return 1;
+ if (r > 0)
+ continue;
+
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+}
+
+_public_ int sd_bus_call(
+ sd_bus *bus,
+ sd_bus_message *_m,
+ uint64_t usec,
+ sd_bus_error *error,
+ sd_bus_message **reply) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m);
+ usec_t timeout;
+ uint64_t cookie;
+ unsigned i;
+ int r;
+
+ bus_assert_return(m, -EINVAL, error);
+ bus_assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL, error);
+ bus_assert_return(!(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED), -EINVAL, error);
+ bus_assert_return(!bus_error_is_dirty(error), -EINVAL, error);
+
+ if (!bus)
+ bus = m->bus;
+
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ goto fail;
+
+ i = bus->rqueue_size;
+
+ r = bus_seal_message(bus, m, usec);
+ if (r < 0)
+ goto fail;
+
+ r = bus_remarshal_message(bus, &m);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_send(bus, m, &cookie);
+ if (r < 0)
+ goto fail;
+
+ timeout = calc_elapse(bus, m->timeout);
+
+ for (;;) {
+ usec_t left;
+
+ while (i < bus->rqueue_size) {
+ sd_bus_message *incoming = NULL;
+
+ incoming = bus->rqueue[i];
+
+ if (incoming->reply_cookie == cookie) {
+ /* Found a match! */
+
+ memmove(bus->rqueue + i, bus->rqueue + i + 1, sizeof(sd_bus_message*) * (bus->rqueue_size - i - 1));
+ bus->rqueue_size--;
+ log_debug_bus_message(incoming);
+
+ if (incoming->header->type == SD_BUS_MESSAGE_METHOD_RETURN) {
+
+ if (incoming->n_fds <= 0 || bus->accept_fd) {
+ if (reply)
+ *reply = incoming;
+ else
+ sd_bus_message_unref(incoming);
+
+ return 1;
+ }
+
+ r = sd_bus_error_setf(error, SD_BUS_ERROR_INCONSISTENT_MESSAGE, "Reply message contained file descriptors which I couldn't accept. Sorry.");
+ sd_bus_message_unref(incoming);
+ return r;
+
+ } else if (incoming->header->type == SD_BUS_MESSAGE_METHOD_ERROR) {
+ r = sd_bus_error_copy(error, &incoming->error);
+ sd_bus_message_unref(incoming);
+ return r;
+ } else {
+ r = -EIO;
+ goto fail;
+ }
+
+ } else if (BUS_MESSAGE_COOKIE(incoming) == cookie &&
+ bus->unique_name &&
+ incoming->sender &&
+ streq(bus->unique_name, incoming->sender)) {
+
+ memmove(bus->rqueue + i, bus->rqueue + i + 1, sizeof(sd_bus_message*) * (bus->rqueue_size - i - 1));
+ bus->rqueue_size--;
+
+ /* Our own message? Somebody is trying
+ * to send its own client a message,
+ * let's not dead-lock, let's fail
+ * immediately. */
+
+ sd_bus_message_unref(incoming);
+ r = -ELOOP;
+ goto fail;
+ }
+
+ /* Try to read more, right-away */
+ i++;
+ }
+
+ r = bus_read_message(bus, false, 0);
+ if (r < 0) {
+ if (IN_SET(r, -ENOTCONN, -ECONNRESET, -EPIPE, -ESHUTDOWN)) {
+ bus_enter_closing(bus);
+ r = -ECONNRESET;
+ }
+
+ goto fail;
+ }
+ if (r > 0)
+ continue;
+
+ if (timeout > 0) {
+ usec_t n;
+
+ n = now(CLOCK_MONOTONIC);
+ if (n >= timeout) {
+ r = -ETIMEDOUT;
+ goto fail;
+ }
+
+ left = timeout - n;
+ } else
+ left = (uint64_t) -1;
+
+ r = bus_poll(bus, true, left);
+ if (r < 0)
+ goto fail;
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto fail;
+ }
+
+ r = dispatch_wqueue(bus);
+ if (r < 0) {
+ if (IN_SET(r, -ENOTCONN, -ECONNRESET, -EPIPE, -ESHUTDOWN)) {
+ bus_enter_closing(bus);
+ r = -ECONNRESET;
+ }
+
+ goto fail;
+ }
+ }
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_get_fd(sd_bus *bus) {
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->input_fd == bus->output_fd, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->state == BUS_CLOSED)
+ return -ENOTCONN;
+
+ if (bus->inotify_fd >= 0)
+ return bus->inotify_fd;
+
+ if (bus->input_fd >= 0)
+ return bus->input_fd;
+
+ return -ENOTCONN;
+}
+
+_public_ int sd_bus_get_events(sd_bus *bus) {
+ int flags = 0;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ switch (bus->state) {
+
+ case BUS_UNSET:
+ case BUS_CLOSED:
+ return -ENOTCONN;
+
+ case BUS_WATCH_BIND:
+ flags |= POLLIN;
+ break;
+
+ case BUS_OPENING:
+ flags |= POLLOUT;
+ break;
+
+ case BUS_AUTHENTICATING:
+ if (bus_socket_auth_needs_write(bus))
+ flags |= POLLOUT;
+
+ flags |= POLLIN;
+ break;
+
+ case BUS_RUNNING:
+ case BUS_HELLO:
+ if (bus->rqueue_size <= 0)
+ flags |= POLLIN;
+ if (bus->wqueue_size > 0)
+ flags |= POLLOUT;
+ break;
+
+ case BUS_CLOSING:
+ break;
+
+ default:
+ assert_not_reached("Unknown state");
+ }
+
+ return flags;
+}
+
+_public_ int sd_bus_get_timeout(sd_bus *bus, uint64_t *timeout_usec) {
+ struct reply_callback *c;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(timeout_usec, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state) && bus->state != BUS_CLOSING)
+ return -ENOTCONN;
+
+ if (bus->track_queue) {
+ *timeout_usec = 0;
+ return 1;
+ }
+
+ switch (bus->state) {
+
+ case BUS_AUTHENTICATING:
+ *timeout_usec = bus->auth_timeout;
+ return 1;
+
+ case BUS_RUNNING:
+ case BUS_HELLO:
+ if (bus->rqueue_size > 0) {
+ *timeout_usec = 0;
+ return 1;
+ }
+
+ c = prioq_peek(bus->reply_callbacks_prioq);
+ if (!c) {
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+ }
+
+ if (c->timeout_usec == 0) {
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+ }
+
+ *timeout_usec = c->timeout_usec;
+ return 1;
+
+ case BUS_CLOSING:
+ *timeout_usec = 0;
+ return 1;
+
+ case BUS_WATCH_BIND:
+ case BUS_OPENING:
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+
+ default:
+ assert_not_reached("Unknown or unexpected stat");
+ }
+}
+
+static int process_timeout(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* m = NULL;
+ struct reply_callback *c;
+ sd_bus_slot *slot;
+ bool is_hello;
+ usec_t n;
+ int r;
+
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ c = prioq_peek(bus->reply_callbacks_prioq);
+ if (!c)
+ return 0;
+
+ n = now(CLOCK_MONOTONIC);
+ if (c->timeout_usec > n)
+ return 0;
+
+ r = bus_message_new_synthetic_error(
+ bus,
+ c->cookie,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NO_REPLY, "Method call timed out"),
+ &m);
+ if (r < 0)
+ return r;
+
+ r = bus_seal_synthetic_message(bus, m);
+ if (r < 0)
+ return r;
+
+ assert_se(prioq_pop(bus->reply_callbacks_prioq) == c);
+ c->timeout_usec = 0;
+
+ ordered_hashmap_remove(bus->reply_callbacks, &c->cookie);
+ c->cookie = 0;
+
+ slot = container_of(c, sd_bus_slot, reply_callback);
+
+ bus->iteration_counter++;
+
+ is_hello = bus->state == BUS_HELLO && c->callback == hello_callback;
+
+ bus->current_message = m;
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->callback;
+ bus->current_userdata = slot->userdata;
+ r = c->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = NULL;
+ bus->current_message = NULL;
+
+ if (slot->floating)
+ bus_slot_disconnect(slot, true);
+
+ sd_bus_slot_unref(slot);
+
+ /* When this is the hello message and it timed out, then make sure to propagate the error up, don't just log
+ * and ignore the callback handler's return value. */
+ if (is_hello)
+ return r;
+
+ return bus_maybe_reply_error(m, r, &error_buffer);
+}
+
+static int process_hello(sd_bus *bus, sd_bus_message *m) {
+ assert(bus);
+ assert(m);
+
+ if (bus->state != BUS_HELLO)
+ return 0;
+
+ /* Let's make sure the first message on the bus is the HELLO
+ * reply. But note that we don't actually parse the message
+ * here (we leave that to the usual handling), we just verify
+ * we don't let any earlier msg through. */
+
+ if (!IN_SET(m->header->type, SD_BUS_MESSAGE_METHOD_RETURN, SD_BUS_MESSAGE_METHOD_ERROR))
+ return -EIO;
+
+ if (m->reply_cookie != 1)
+ return -EIO;
+
+ return 0;
+}
+
+static int process_reply(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *synthetic_reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ struct reply_callback *c;
+ sd_bus_slot *slot;
+ bool is_hello;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ if (!IN_SET(m->header->type, SD_BUS_MESSAGE_METHOD_RETURN, SD_BUS_MESSAGE_METHOD_ERROR))
+ return 0;
+
+ if (m->destination && bus->unique_name && !streq_ptr(m->destination, bus->unique_name))
+ return 0;
+
+ c = ordered_hashmap_remove(bus->reply_callbacks, &m->reply_cookie);
+ if (!c)
+ return 0;
+
+ c->cookie = 0;
+
+ slot = container_of(c, sd_bus_slot, reply_callback);
+
+ if (m->n_fds > 0 && !bus->accept_fd) {
+
+ /* If the reply contained a file descriptor which we
+ * didn't want we pass an error instead. */
+
+ r = bus_message_new_synthetic_error(
+ bus,
+ m->reply_cookie,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_INCONSISTENT_MESSAGE, "Reply message contained file descriptor"),
+ &synthetic_reply);
+ if (r < 0)
+ return r;
+
+ /* Copy over original timestamp */
+ synthetic_reply->realtime = m->realtime;
+ synthetic_reply->monotonic = m->monotonic;
+ synthetic_reply->seqnum = m->seqnum;
+
+ r = bus_seal_synthetic_message(bus, synthetic_reply);
+ if (r < 0)
+ return r;
+
+ m = synthetic_reply;
+ } else {
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->timeout_usec != 0) {
+ prioq_remove(bus->reply_callbacks_prioq, c, &c->prioq_idx);
+ c->timeout_usec = 0;
+ }
+
+ is_hello = bus->state == BUS_HELLO && c->callback == hello_callback;
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->callback;
+ bus->current_userdata = slot->userdata;
+ r = c->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = NULL;
+
+ if (slot->floating)
+ bus_slot_disconnect(slot, true);
+
+ sd_bus_slot_unref(slot);
+
+ /* When this is the hello message and it failed, then make sure to propagate the error up, don't just log and
+ * ignore the callback handler's return value. */
+ if (is_hello)
+ return r;
+
+ return bus_maybe_reply_error(m, r, &error_buffer);
+}
+
+static int process_filter(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ struct filter_callback *l;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ do {
+ bus->filter_callbacks_modified = false;
+
+ LIST_FOREACH(callbacks, l, bus->filter_callbacks) {
+ sd_bus_slot *slot;
+
+ if (bus->filter_callbacks_modified)
+ break;
+
+ /* Don't run this more than once per iteration */
+ if (l->last_iteration == bus->iteration_counter)
+ continue;
+
+ l->last_iteration = bus->iteration_counter;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ slot = container_of(l, sd_bus_slot, filter_callback);
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = l->callback;
+ bus->current_userdata = slot->userdata;
+ r = l->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ r = bus_maybe_reply_error(m, r, &error_buffer);
+ if (r != 0)
+ return r;
+
+ }
+
+ } while (bus->filter_callbacks_modified);
+
+ return 0;
+}
+
+static int process_match(sd_bus *bus, sd_bus_message *m) {
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ do {
+ bus->match_callbacks_modified = false;
+
+ r = bus_match_run(bus, &bus->match_callbacks, m);
+ if (r != 0)
+ return r;
+
+ } while (bus->match_callbacks_modified);
+
+ return 0;
+}
+
+static int process_builtin(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ if (bus->is_monitor)
+ return 0;
+
+ if (bus->manual_peer_interface)
+ return 0;
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL)
+ return 0;
+
+ if (!streq_ptr(m->interface, "org.freedesktop.DBus.Peer"))
+ return 0;
+
+ if (m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 1;
+
+ if (streq_ptr(m->member, "Ping"))
+ r = sd_bus_message_new_method_return(m, &reply);
+ else if (streq_ptr(m->member, "GetMachineId")) {
+ sd_id128_t id;
+ char sid[33];
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", sd_id128_to_string(id, sid));
+ } else {
+ r = sd_bus_message_new_method_errorf(
+ m, &reply,
+ SD_BUS_ERROR_UNKNOWN_METHOD,
+ "Unknown method '%s' on interface '%s'.", m->member, m->interface);
+ }
+
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int process_fd_check(sd_bus *bus, sd_bus_message *m) {
+ assert(bus);
+ assert(m);
+
+ /* If we got a message with a file descriptor which we didn't
+ * want to accept, then let's drop it. How can this even
+ * happen? For example, when the kernel queues a message into
+ * an activatable names's queue which allows fds, and then is
+ * delivered to us later even though we ourselves did not
+ * negotiate it. */
+
+ if (bus->is_monitor)
+ return 0;
+
+ if (m->n_fds <= 0)
+ return 0;
+
+ if (bus->accept_fd)
+ return 0;
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL)
+ return 1; /* just eat it up */
+
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INCONSISTENT_MESSAGE, "Message contains file descriptors, which I cannot accept. Sorry.");
+}
+
+static int process_message(sd_bus *bus, sd_bus_message *m) {
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ bus->current_message = m;
+ bus->iteration_counter++;
+
+ log_debug_bus_message(m);
+
+ r = process_hello(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_reply(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_fd_check(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_filter(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_match(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_builtin(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = bus_process_object(bus, m);
+
+finish:
+ bus->current_message = NULL;
+ return r;
+}
+
+static int dispatch_track(sd_bus *bus) {
+ assert(bus);
+
+ if (!bus->track_queue)
+ return 0;
+
+ bus_track_dispatch(bus->track_queue);
+ return 1;
+}
+
+static int process_running(sd_bus *bus, bool hint_priority, int64_t priority, sd_bus_message **ret) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ r = process_timeout(bus);
+ if (r != 0)
+ goto null_message;
+
+ r = dispatch_wqueue(bus);
+ if (r != 0)
+ goto null_message;
+
+ r = dispatch_track(bus);
+ if (r != 0)
+ goto null_message;
+
+ r = dispatch_rqueue(bus, hint_priority, priority, &m);
+ if (r < 0)
+ return r;
+ if (!m)
+ goto null_message;
+
+ r = process_message(bus, m);
+ if (r != 0)
+ goto null_message;
+
+ if (ret) {
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+
+ return 1;
+ }
+
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_CALL) {
+
+ log_debug("Unprocessed message call sender=%s object=%s interface=%s member=%s",
+ strna(sd_bus_message_get_sender(m)),
+ strna(sd_bus_message_get_path(m)),
+ strna(sd_bus_message_get_interface(m)),
+ strna(sd_bus_message_get_member(m)));
+
+ r = sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_UNKNOWN_OBJECT,
+ "Unknown object '%s'.", m->path);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+
+null_message:
+ if (r >= 0 && ret)
+ *ret = NULL;
+
+ return r;
+}
+
+static int bus_exit_now(sd_bus *bus) {
+ assert(bus);
+
+ /* Exit due to close, if this is requested. If this is bus object is attached to an event source, invokes
+ * sd_event_exit(), otherwise invokes libc exit(). */
+
+ if (bus->exited) /* did we already exit? */
+ return 0;
+ if (!bus->exit_triggered) /* was the exit condition triggered? */
+ return 0;
+ if (!bus->exit_on_disconnect) /* Shall we actually exit on disconnection? */
+ return 0;
+
+ bus->exited = true; /* never exit more than once */
+
+ log_debug("Bus connection disconnected, exiting.");
+
+ if (bus->event)
+ return sd_event_exit(bus->event, EXIT_FAILURE);
+ else
+ exit(EXIT_FAILURE);
+
+ assert_not_reached("exit() didn't exit?");
+}
+
+static int process_closing_reply_callback(sd_bus *bus, struct reply_callback *c) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ sd_bus_slot *slot;
+ int r;
+
+ assert(bus);
+ assert(c);
+
+ r = bus_message_new_synthetic_error(
+ bus,
+ c->cookie,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NO_REPLY, "Connection terminated"),
+ &m);
+ if (r < 0)
+ return r;
+
+ r = bus_seal_synthetic_message(bus, m);
+ if (r < 0)
+ return r;
+
+ if (c->timeout_usec != 0) {
+ prioq_remove(bus->reply_callbacks_prioq, c, &c->prioq_idx);
+ c->timeout_usec = 0;
+ }
+
+ ordered_hashmap_remove(bus->reply_callbacks, &c->cookie);
+ c->cookie = 0;
+
+ slot = container_of(c, sd_bus_slot, reply_callback);
+
+ bus->iteration_counter++;
+
+ bus->current_message = m;
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->callback;
+ bus->current_userdata = slot->userdata;
+ r = c->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = NULL;
+ bus->current_message = NULL;
+
+ if (slot->floating)
+ bus_slot_disconnect(slot, true);
+
+ sd_bus_slot_unref(slot);
+
+ return bus_maybe_reply_error(m, r, &error_buffer);
+}
+
+static int process_closing(sd_bus *bus, sd_bus_message **ret) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct reply_callback *c;
+ int r;
+
+ assert(bus);
+ assert(bus->state == BUS_CLOSING);
+
+ /* First, fail all outstanding method calls */
+ c = ordered_hashmap_first(bus->reply_callbacks);
+ if (c)
+ return process_closing_reply_callback(bus, c);
+
+ /* Then, fake-drop all remaining bus tracking references */
+ if (bus->tracks) {
+ bus_track_close(bus->tracks);
+ return 1;
+ }
+
+ /* Then, synthesize a Disconnected message */
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/DBus/Local",
+ "org.freedesktop.DBus.Local",
+ "Disconnected");
+ if (r < 0)
+ return r;
+
+ bus_message_set_sender_local(bus, m);
+
+ r = bus_seal_synthetic_message(bus, m);
+ if (r < 0)
+ return r;
+
+ sd_bus_close(bus);
+
+ bus->current_message = m;
+ bus->iteration_counter++;
+
+ r = process_filter(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_match(bus, m);
+ if (r != 0)
+ goto finish;
+
+ /* Nothing else to do, exit now, if the condition holds */
+ bus->exit_triggered = true;
+ (void) bus_exit_now(bus);
+
+ if (ret)
+ *ret = TAKE_PTR(m);
+
+ r = 1;
+
+finish:
+ bus->current_message = NULL;
+
+ return r;
+}
+
+static int bus_process_internal(sd_bus *bus, bool hint_priority, int64_t priority, sd_bus_message **ret) {
+ int r;
+
+ /* Returns 0 when we didn't do anything. This should cause the
+ * caller to invoke sd_bus_wait() before returning the next
+ * time. Returns > 0 when we did something, which possibly
+ * means *ret is filled in with an unprocessed message. */
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ /* We don't allow recursively invoking sd_bus_process(). */
+ assert_return(!bus->current_message, -EBUSY);
+ assert(!bus->current_slot); /* This should be NULL whenever bus->current_message is */
+
+ BUS_DONT_DESTROY(bus);
+
+ switch (bus->state) {
+
+ case BUS_UNSET:
+ return -ENOTCONN;
+
+ case BUS_CLOSED:
+ return -ECONNRESET;
+
+ case BUS_WATCH_BIND:
+ r = bus_socket_process_watch_bind(bus);
+ break;
+
+ case BUS_OPENING:
+ r = bus_socket_process_opening(bus);
+ break;
+
+ case BUS_AUTHENTICATING:
+ r = bus_socket_process_authenticating(bus);
+ break;
+
+ case BUS_RUNNING:
+ case BUS_HELLO:
+ r = process_running(bus, hint_priority, priority, ret);
+ if (r >= 0)
+ return r;
+
+ /* This branch initializes *ret, hence we don't use the generic error checking below */
+ break;
+
+ case BUS_CLOSING:
+ return process_closing(bus, ret);
+
+ default:
+ assert_not_reached("Unknown state");
+ }
+
+ if (IN_SET(r, -ENOTCONN, -ECONNRESET, -EPIPE, -ESHUTDOWN)) {
+ bus_enter_closing(bus);
+ r = 1;
+ } else if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = NULL;
+
+ return r;
+}
+
+_public_ int sd_bus_process(sd_bus *bus, sd_bus_message **ret) {
+ return bus_process_internal(bus, false, 0, ret);
+}
+
+_public_ int sd_bus_process_priority(sd_bus *bus, int64_t priority, sd_bus_message **ret) {
+ return bus_process_internal(bus, true, priority, ret);
+}
+
+static int bus_poll(sd_bus *bus, bool need_more, uint64_t timeout_usec) {
+ struct pollfd p[2] = {};
+ int r, n;
+ struct timespec ts;
+ usec_t m = USEC_INFINITY;
+
+ assert(bus);
+
+ if (bus->state == BUS_CLOSING)
+ return 1;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (bus->state == BUS_WATCH_BIND) {
+ assert(bus->inotify_fd >= 0);
+
+ p[0].events = POLLIN;
+ p[0].fd = bus->inotify_fd;
+ n = 1;
+ } else {
+ int e;
+
+ e = sd_bus_get_events(bus);
+ if (e < 0)
+ return e;
+
+ if (need_more)
+ /* The caller really needs some more data, he doesn't
+ * care about what's already read, or any timeouts
+ * except its own. */
+ e |= POLLIN;
+ else {
+ usec_t until;
+ /* The caller wants to process if there's something to
+ * process, but doesn't care otherwise */
+
+ r = sd_bus_get_timeout(bus, &until);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ m = usec_sub_unsigned(until, now(CLOCK_MONOTONIC));
+ }
+
+ p[0].fd = bus->input_fd;
+ if (bus->output_fd == bus->input_fd) {
+ p[0].events = e;
+ n = 1;
+ } else {
+ p[0].events = e & POLLIN;
+ p[1].fd = bus->output_fd;
+ p[1].events = e & POLLOUT;
+ n = 2;
+ }
+ }
+
+ if (timeout_usec != (uint64_t) -1 && (m == USEC_INFINITY || timeout_usec < m))
+ m = timeout_usec;
+
+ r = ppoll(p, n, m == USEC_INFINITY ? NULL : timespec_store(&ts, m), NULL);
+ if (r < 0)
+ return -errno;
+
+ return r > 0 ? 1 : 0;
+}
+
+_public_ int sd_bus_wait(sd_bus *bus, uint64_t timeout_usec) {
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->state == BUS_CLOSING)
+ return 0;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (bus->rqueue_size > 0)
+ return 0;
+
+ return bus_poll(bus, false, timeout_usec);
+}
+
+_public_ int sd_bus_flush(sd_bus *bus) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->state == BUS_CLOSING)
+ return 0;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* We never were connected? Don't hang in inotify for good, as there's no timeout set for it */
+ if (bus->state == BUS_WATCH_BIND)
+ return -EUNATCH;
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ return r;
+
+ if (bus->wqueue_size <= 0)
+ return 0;
+
+ for (;;) {
+ r = dispatch_wqueue(bus);
+ if (r < 0) {
+ if (IN_SET(r, -ENOTCONN, -ECONNRESET, -EPIPE, -ESHUTDOWN)) {
+ bus_enter_closing(bus);
+ return -ECONNRESET;
+ }
+
+ return r;
+ }
+
+ if (bus->wqueue_size <= 0)
+ return 0;
+
+ r = bus_poll(bus, false, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+}
+
+_public_ int sd_bus_add_filter(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ sd_bus_slot *s;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(callback, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ s = bus_slot_allocate(bus, !slot, BUS_FILTER_CALLBACK, sizeof(struct filter_callback), userdata);
+ if (!s)
+ return -ENOMEM;
+
+ s->filter_callback.callback = callback;
+
+ bus->filter_callbacks_modified = true;
+ LIST_PREPEND(callbacks, bus->filter_callbacks, &s->filter_callback);
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+}
+
+static int add_match_callback(
+ sd_bus_message *m,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ sd_bus_slot *match_slot = userdata;
+ bool failed = false;
+ int r;
+
+ assert(m);
+ assert(match_slot);
+
+ sd_bus_slot_ref(match_slot);
+
+ if (sd_bus_message_is_method_error(m, NULL)) {
+ log_debug_errno(sd_bus_message_get_errno(m),
+ "Unable to add match %s, failing connection: %s",
+ match_slot->match_callback.match_string,
+ sd_bus_message_get_error(m)->message);
+
+ failed = true;
+ } else
+ log_debug("Match %s successfully installed.", match_slot->match_callback.match_string);
+
+ if (match_slot->match_callback.install_callback) {
+ sd_bus *bus;
+
+ bus = sd_bus_message_get_bus(m);
+
+ /* This function has been called as slot handler, and we want to call another slot handler. Let's
+ * update the slot callback metadata temporarily with our own data, and then revert back to the old
+ * values. */
+
+ assert(bus->current_slot == match_slot->match_callback.install_slot);
+ assert(bus->current_handler == add_match_callback);
+ assert(bus->current_userdata == userdata);
+
+ bus->current_slot = match_slot;
+ bus->current_handler = match_slot->match_callback.install_callback;
+ bus->current_userdata = match_slot->userdata;
+
+ r = match_slot->match_callback.install_callback(m, match_slot->userdata, ret_error);
+
+ bus->current_slot = match_slot->match_callback.install_slot;
+ bus->current_handler = add_match_callback;
+ bus->current_userdata = userdata;
+
+ match_slot->match_callback.install_slot = sd_bus_slot_unref(match_slot->match_callback.install_slot);
+ } else {
+ if (failed) /* Generic failure handling: destroy the connection */
+ bus_enter_closing(sd_bus_message_get_bus(m));
+
+ r = 1;
+ }
+
+ if (failed && match_slot->floating)
+ bus_slot_disconnect(match_slot, true);
+
+ sd_bus_slot_unref(match_slot);
+
+ return r;
+}
+
+static int bus_add_match_full(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ bool asynchronous,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ sd_bus_message_handler_t install_callback,
+ void *userdata) {
+
+ struct bus_match_component *components = NULL;
+ unsigned n_components = 0;
+ sd_bus_slot *s = NULL;
+ int r = 0;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(match, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ r = bus_match_parse(match, &components, &n_components);
+ if (r < 0)
+ goto finish;
+
+ s = bus_slot_allocate(bus, !slot, BUS_MATCH_CALLBACK, sizeof(struct match_callback), userdata);
+ if (!s) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ s->match_callback.callback = callback;
+ s->match_callback.install_callback = install_callback;
+
+ if (bus->bus_client) {
+ enum bus_match_scope scope;
+
+ scope = bus_match_get_scope(components, n_components);
+
+ /* Do not install server-side matches for matches against the local service, interface or bus path. */
+ if (scope != BUS_MATCH_LOCAL) {
+
+ /* We store the original match string, so that we can use it to remove the match again. */
+
+ s->match_callback.match_string = strdup(match);
+ if (!s->match_callback.match_string) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ if (asynchronous) {
+ r = bus_add_match_internal_async(bus,
+ &s->match_callback.install_slot,
+ s->match_callback.match_string,
+ add_match_callback,
+ s);
+
+ if (r < 0)
+ return r;
+
+ /* Make the slot of the match call floating now. We need the reference, but we don't
+ * want that this match pins the bus object, hence we first create it non-floating, but
+ * then make it floating. */
+ r = sd_bus_slot_set_floating(s->match_callback.install_slot, true);
+ } else
+ r = bus_add_match_internal(bus, s->match_callback.match_string);
+ if (r < 0)
+ goto finish;
+
+ s->match_added = true;
+ }
+ }
+
+ bus->match_callbacks_modified = true;
+ r = bus_match_add(&bus->match_callbacks, components, n_components, &s->match_callback);
+ if (r < 0)
+ goto finish;
+
+ if (slot)
+ *slot = s;
+ s = NULL;
+
+finish:
+ bus_match_parse_free(components, n_components);
+ sd_bus_slot_unref(s);
+
+ return r;
+}
+
+_public_ int sd_bus_add_match(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ return bus_add_match_full(bus, slot, false, match, callback, NULL, userdata);
+}
+
+_public_ int sd_bus_add_match_async(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ sd_bus_message_handler_t install_callback,
+ void *userdata) {
+
+ return bus_add_match_full(bus, slot, true, match, callback, install_callback, userdata);
+}
+
+bool bus_pid_changed(sd_bus *bus) {
+ assert(bus);
+
+ /* We don't support people creating a bus connection and
+ * keeping it around over a fork(). Let's complain. */
+
+ return bus->original_pid != getpid_cached();
+}
+
+static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ /* Note that this is called both on input_fd, output_fd as well as inotify_fd events */
+
+ r = sd_bus_process(bus, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Processing of bus failed, closing down: %m");
+ bus_enter_closing(bus);
+ }
+
+ return 1;
+}
+
+static int time_callback(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ r = sd_bus_process(bus, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Processing of bus failed, closing down: %m");
+ bus_enter_closing(bus);
+ }
+
+ return 1;
+}
+
+static int prepare_callback(sd_event_source *s, void *userdata) {
+ sd_bus *bus = userdata;
+ int r, e;
+ usec_t until;
+
+ assert(s);
+ assert(bus);
+
+ e = sd_bus_get_events(bus);
+ if (e < 0) {
+ r = e;
+ goto fail;
+ }
+
+ if (bus->output_fd != bus->input_fd) {
+
+ r = sd_event_source_set_io_events(bus->input_io_event_source, e & POLLIN);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_io_events(bus->output_io_event_source, e & POLLOUT);
+ } else
+ r = sd_event_source_set_io_events(bus->input_io_event_source, e);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_get_timeout(bus, &until);
+ if (r < 0)
+ goto fail;
+ if (r > 0) {
+ int j;
+
+ j = sd_event_source_set_time(bus->time_event_source, until);
+ if (j < 0) {
+ r = j;
+ goto fail;
+ }
+ }
+
+ r = sd_event_source_set_enabled(bus->time_event_source, r > 0);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ log_debug_errno(r, "Preparing of bus events failed, closing down: %m");
+ bus_enter_closing(bus);
+
+ return 1;
+}
+
+static int quit_callback(sd_event_source *event, void *userdata) {
+ sd_bus *bus = userdata;
+
+ assert(event);
+
+ if (bus->close_on_exit) {
+ sd_bus_flush(bus);
+ sd_bus_close(bus);
+ }
+
+ return 1;
+}
+
+int bus_attach_io_events(sd_bus *bus) {
+ int r;
+
+ assert(bus);
+
+ if (bus->input_fd < 0)
+ return 0;
+
+ if (!bus->event)
+ return 0;
+
+ if (!bus->input_io_event_source) {
+ r = sd_event_add_io(bus->event, &bus->input_io_event_source, bus->input_fd, 0, io_callback, bus);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_prepare(bus->input_io_event_source, prepare_callback);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(bus->input_io_event_source, bus->event_priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_description(bus->input_io_event_source, "bus-input");
+ } else
+ r = sd_event_source_set_io_fd(bus->input_io_event_source, bus->input_fd);
+
+ if (r < 0)
+ return r;
+
+ if (bus->output_fd != bus->input_fd) {
+ assert(bus->output_fd >= 0);
+
+ if (!bus->output_io_event_source) {
+ r = sd_event_add_io(bus->event, &bus->output_io_event_source, bus->output_fd, 0, io_callback, bus);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(bus->output_io_event_source, bus->event_priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_description(bus->input_io_event_source, "bus-output");
+ } else
+ r = sd_event_source_set_io_fd(bus->output_io_event_source, bus->output_fd);
+
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static void bus_detach_io_events(sd_bus *bus) {
+ assert(bus);
+
+ if (bus->input_io_event_source) {
+ sd_event_source_set_enabled(bus->input_io_event_source, SD_EVENT_OFF);
+ bus->input_io_event_source = sd_event_source_unref(bus->input_io_event_source);
+ }
+
+ if (bus->output_io_event_source) {
+ sd_event_source_set_enabled(bus->output_io_event_source, SD_EVENT_OFF);
+ bus->output_io_event_source = sd_event_source_unref(bus->output_io_event_source);
+ }
+}
+
+int bus_attach_inotify_event(sd_bus *bus) {
+ int r;
+
+ assert(bus);
+
+ if (bus->inotify_fd < 0)
+ return 0;
+
+ if (!bus->event)
+ return 0;
+
+ if (!bus->inotify_event_source) {
+ r = sd_event_add_io(bus->event, &bus->inotify_event_source, bus->inotify_fd, EPOLLIN, io_callback, bus);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(bus->inotify_event_source, bus->event_priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_description(bus->inotify_event_source, "bus-inotify");
+ } else
+ r = sd_event_source_set_io_fd(bus->inotify_event_source, bus->inotify_fd);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void bus_detach_inotify_event(sd_bus *bus) {
+ assert(bus);
+
+ if (bus->inotify_event_source) {
+ sd_event_source_set_enabled(bus->inotify_event_source, SD_EVENT_OFF);
+ bus->inotify_event_source = sd_event_source_unref(bus->inotify_event_source);
+ }
+}
+
+_public_ int sd_bus_attach_event(sd_bus *bus, sd_event *event, int priority) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus->event, -EBUSY);
+
+ assert(!bus->input_io_event_source);
+ assert(!bus->output_io_event_source);
+ assert(!bus->time_event_source);
+
+ if (event)
+ bus->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&bus->event);
+ if (r < 0)
+ return r;
+ }
+
+ bus->event_priority = priority;
+
+ r = sd_event_add_time(bus->event, &bus->time_event_source, CLOCK_MONOTONIC, 0, 0, time_callback, bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(bus->time_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_description(bus->time_event_source, "bus-time");
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_add_exit(bus->event, &bus->quit_event_source, quit_callback, bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_description(bus->quit_event_source, "bus-exit");
+ if (r < 0)
+ goto fail;
+
+ r = bus_attach_io_events(bus);
+ if (r < 0)
+ goto fail;
+
+ r = bus_attach_inotify_event(bus);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ sd_bus_detach_event(bus);
+ return r;
+}
+
+_public_ int sd_bus_detach_event(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ if (!bus->event)
+ return 0;
+
+ bus_detach_io_events(bus);
+ bus_detach_inotify_event(bus);
+
+ if (bus->time_event_source) {
+ sd_event_source_set_enabled(bus->time_event_source, SD_EVENT_OFF);
+ bus->time_event_source = sd_event_source_unref(bus->time_event_source);
+ }
+
+ if (bus->quit_event_source) {
+ sd_event_source_set_enabled(bus->quit_event_source, SD_EVENT_OFF);
+ bus->quit_event_source = sd_event_source_unref(bus->quit_event_source);
+ }
+
+ bus->event = sd_event_unref(bus->event);
+ return 1;
+}
+
+_public_ sd_event* sd_bus_get_event(sd_bus *bus) {
+ assert_return(bus, NULL);
+
+ return bus->event;
+}
+
+_public_ sd_bus_message* sd_bus_get_current_message(sd_bus *bus) {
+ assert_return(bus, NULL);
+
+ return bus->current_message;
+}
+
+_public_ sd_bus_slot* sd_bus_get_current_slot(sd_bus *bus) {
+ assert_return(bus, NULL);
+
+ return bus->current_slot;
+}
+
+_public_ sd_bus_message_handler_t sd_bus_get_current_handler(sd_bus *bus) {
+ assert_return(bus, NULL);
+
+ return bus->current_handler;
+}
+
+_public_ void* sd_bus_get_current_userdata(sd_bus *bus) {
+ assert_return(bus, NULL);
+
+ return bus->current_userdata;
+}
+
+static int bus_default(int (*bus_open)(sd_bus **), sd_bus **default_bus, sd_bus **ret) {
+ sd_bus *b = NULL;
+ int r;
+
+ assert(bus_open);
+ assert(default_bus);
+
+ if (!ret)
+ return !!*default_bus;
+
+ if (*default_bus) {
+ *ret = sd_bus_ref(*default_bus);
+ return 0;
+ }
+
+ r = bus_open(&b);
+ if (r < 0)
+ return r;
+
+ b->default_bus_ptr = default_bus;
+ b->tid = gettid();
+ *default_bus = b;
+
+ *ret = b;
+ return 1;
+}
+
+_public_ int sd_bus_default_system(sd_bus **ret) {
+ return bus_default(sd_bus_open_system, &default_system_bus, ret);
+}
+
+_public_ int sd_bus_default_user(sd_bus **ret) {
+ return bus_default(sd_bus_open_user, &default_user_bus, ret);
+}
+
+_public_ int sd_bus_default(sd_bus **ret) {
+ int (*bus_open)(sd_bus **) = NULL;
+ sd_bus **busp;
+
+ busp = bus_choose_default(&bus_open);
+ return bus_default(bus_open, busp, ret);
+}
+
+_public_ int sd_bus_get_tid(sd_bus *b, pid_t *tid) {
+ assert_return(b, -EINVAL);
+ assert_return(tid, -EINVAL);
+ assert_return(!bus_pid_changed(b), -ECHILD);
+
+ if (b->tid != 0) {
+ *tid = b->tid;
+ return 0;
+ }
+
+ if (b->event)
+ return sd_event_get_tid(b->event, tid);
+
+ return -ENXIO;
+}
+
+_public_ int sd_bus_path_encode(const char *prefix, const char *external_id, char **ret_path) {
+ _cleanup_free_ char *e = NULL;
+ char *ret;
+
+ assert_return(object_path_is_valid(prefix), -EINVAL);
+ assert_return(external_id, -EINVAL);
+ assert_return(ret_path, -EINVAL);
+
+ e = bus_label_escape(external_id);
+ if (!e)
+ return -ENOMEM;
+
+ ret = strjoin(prefix, "/", e);
+ if (!ret)
+ return -ENOMEM;
+
+ *ret_path = ret;
+ return 0;
+}
+
+_public_ int sd_bus_path_decode(const char *path, const char *prefix, char **external_id) {
+ const char *e;
+ char *ret;
+
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(object_path_is_valid(prefix), -EINVAL);
+ assert_return(external_id, -EINVAL);
+
+ e = object_path_startswith(path, prefix);
+ if (!e) {
+ *external_id = NULL;
+ return 0;
+ }
+
+ ret = bus_label_unescape(e);
+ if (!ret)
+ return -ENOMEM;
+
+ *external_id = ret;
+ return 1;
+}
+
+_public_ int sd_bus_path_encode_many(char **out, const char *path_template, ...) {
+ _cleanup_strv_free_ char **labels = NULL;
+ char *path, *path_pos, **label_pos;
+ const char *sep, *template_pos;
+ size_t path_length;
+ va_list list;
+ int r;
+
+ assert_return(out, -EINVAL);
+ assert_return(path_template, -EINVAL);
+
+ path_length = strlen(path_template);
+
+ va_start(list, path_template);
+ for (sep = strchr(path_template, '%'); sep; sep = strchr(sep + 1, '%')) {
+ const char *arg;
+ char *label;
+
+ arg = va_arg(list, const char *);
+ if (!arg) {
+ va_end(list);
+ return -EINVAL;
+ }
+
+ label = bus_label_escape(arg);
+ if (!label) {
+ va_end(list);
+ return -ENOMEM;
+ }
+
+ r = strv_consume(&labels, label);
+ if (r < 0) {
+ va_end(list);
+ return r;
+ }
+
+ /* add label length, but account for the format character */
+ path_length += strlen(label) - 1;
+ }
+ va_end(list);
+
+ path = malloc(path_length + 1);
+ if (!path)
+ return -ENOMEM;
+
+ path_pos = path;
+ label_pos = labels;
+
+ for (template_pos = path_template; *template_pos; ) {
+ sep = strchrnul(template_pos, '%');
+ path_pos = mempcpy(path_pos, template_pos, sep - template_pos);
+ if (!*sep)
+ break;
+
+ path_pos = stpcpy(path_pos, *label_pos++);
+ template_pos = sep + 1;
+ }
+
+ *path_pos = 0;
+ *out = path;
+ return 0;
+}
+
+_public_ int sd_bus_path_decode_many(const char *path, const char *path_template, ...) {
+ _cleanup_strv_free_ char **labels = NULL;
+ const char *template_pos, *path_pos;
+ char **label_pos;
+ va_list list;
+ int r;
+
+ /*
+ * This decodes an object-path based on a template argument. The
+ * template consists of a verbatim path, optionally including special
+ * directives:
+ *
+ * - Each occurrence of '%' in the template matches an arbitrary
+ * substring of a label in the given path. At most one such
+ * directive is allowed per label. For each such directive, the
+ * caller must provide an output parameter (char **) via va_arg. If
+ * NULL is passed, the given label is verified, but not returned.
+ * For each matched label, the *decoded* label is stored in the
+ * passed output argument, and the caller is responsible to free
+ * it. Note that the output arguments are only modified if the
+ * actualy path matched the template. Otherwise, they're left
+ * untouched.
+ *
+ * This function returns <0 on error, 0 if the path does not match the
+ * template, 1 if it matched.
+ */
+
+ assert_return(path, -EINVAL);
+ assert_return(path_template, -EINVAL);
+
+ path_pos = path;
+
+ for (template_pos = path_template; *template_pos; ) {
+ const char *sep;
+ size_t length;
+ char *label;
+
+ /* verify everything until the next '%' matches verbatim */
+ sep = strchrnul(template_pos, '%');
+ length = sep - template_pos;
+ if (strncmp(path_pos, template_pos, length))
+ return 0;
+
+ path_pos += length;
+ template_pos += length;
+
+ if (!*template_pos)
+ break;
+
+ /* We found the next '%' character. Everything up until here
+ * matched. We now skip ahead to the end of this label and make
+ * sure it matches the tail of the label in the path. Then we
+ * decode the string in-between and save it for later use. */
+
+ ++template_pos; /* skip over '%' */
+
+ sep = strchrnul(template_pos, '/');
+ length = sep - template_pos; /* length of suffix to match verbatim */
+
+ /* verify the suffixes match */
+ sep = strchrnul(path_pos, '/');
+ if (sep - path_pos < (ssize_t)length ||
+ strncmp(sep - length, template_pos, length))
+ return 0;
+
+ template_pos += length; /* skip over matched label */
+ length = sep - path_pos - length; /* length of sub-label to decode */
+
+ /* store unescaped label for later use */
+ label = bus_label_unescape_n(path_pos, length);
+ if (!label)
+ return -ENOMEM;
+
+ r = strv_consume(&labels, label);
+ if (r < 0)
+ return r;
+
+ path_pos = sep; /* skip decoded label and suffix */
+ }
+
+ /* end of template must match end of path */
+ if (*path_pos)
+ return 0;
+
+ /* copy the labels over to the caller */
+ va_start(list, path_template);
+ for (label_pos = labels; label_pos && *label_pos; ++label_pos) {
+ char **arg;
+
+ arg = va_arg(list, char **);
+ if (arg)
+ *arg = *label_pos;
+ else
+ free(*label_pos);
+ }
+ va_end(list);
+
+ labels = mfree(labels);
+ return 1;
+}
+
+_public_ int sd_bus_try_close(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return -EOPNOTSUPP;
+}
+
+_public_ int sd_bus_get_description(sd_bus *bus, const char **description) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(description, -EINVAL);
+ assert_return(bus->description, -ENXIO);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->description)
+ *description = bus->description;
+ else if (bus->is_system)
+ *description = "system";
+ else if (bus->is_user)
+ *description = "user";
+ else
+ *description = NULL;
+
+ return 0;
+}
+
+_public_ int sd_bus_get_scope(sd_bus *bus, const char **scope) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(scope, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->is_user) {
+ *scope = "user";
+ return 0;
+ }
+
+ if (bus->is_system) {
+ *scope = "system";
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+_public_ int sd_bus_get_address(sd_bus *bus, const char **address) {
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(address, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->address) {
+ *address = bus->address;
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+_public_ int sd_bus_get_creds_mask(sd_bus *bus, uint64_t *mask) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(mask, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ *mask = bus->creds_mask;
+ return 0;
+}
+
+_public_ int sd_bus_is_bus_client(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->bus_client;
+}
+
+_public_ int sd_bus_is_server(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->is_server;
+}
+
+_public_ int sd_bus_is_anonymous(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->anonymous_auth;
+}
+
+_public_ int sd_bus_is_trusted(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->trusted;
+}
+
+_public_ int sd_bus_is_monitor(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->is_monitor;
+}
+
+static void flush_close(sd_bus *bus) {
+ if (!bus)
+ return;
+
+ /* Flushes and closes the specified bus. We take a ref before,
+ * to ensure the flushing does not cause the bus to be
+ * unreferenced. */
+
+ sd_bus_flush_close_unref(sd_bus_ref(bus));
+}
+
+_public_ void sd_bus_default_flush_close(void) {
+ flush_close(default_starter_bus);
+ flush_close(default_user_bus);
+ flush_close(default_system_bus);
+}
+
+_public_ int sd_bus_set_exit_on_disconnect(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ /* Turns on exit-on-disconnect, and triggers it immediately if the bus connection was already
+ * disconnected. Note that this is triggered exclusively on disconnections triggered by the server side, never
+ * from the client side. */
+ bus->exit_on_disconnect = b;
+
+ /* If the exit condition was triggered already, exit immediately. */
+ return bus_exit_now(bus);
+}
+
+_public_ int sd_bus_get_exit_on_disconnect(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ return bus->exit_on_disconnect;
+}
+
+_public_ int sd_bus_set_sender(sd_bus *bus, const char *sender) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus->bus_client, -EPERM);
+ assert_return(!sender || service_name_is_valid(sender), -EINVAL);
+
+ return free_and_strdup(&bus->patch_sender, sender);
+}
+
+_public_ int sd_bus_get_sender(sd_bus *bus, const char **ret) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(ret, -EINVAL);
+
+ if (!bus->patch_sender)
+ return -ENODATA;
+
+ *ret = bus->patch_sender;
+ return 0;
+}
+
+_public_ int sd_bus_get_n_queued_read(sd_bus *bus, uint64_t *ret) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ *ret = bus->rqueue_size;
+ return 0;
+}
+
+_public_ int sd_bus_get_n_queued_write(sd_bus *bus, uint64_t *ret) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ *ret = bus->wqueue_size;
+ return 0;
+}
+
+_public_ int sd_bus_set_method_call_timeout(sd_bus *bus, uint64_t usec) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ bus->method_call_timeout = usec;
+ return 0;
+}
+
+_public_ int sd_bus_get_method_call_timeout(sd_bus *bus, uint64_t *ret) {
+ const char *e;
+ usec_t usec;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(ret, -EINVAL);
+
+ if (bus->method_call_timeout != 0) {
+ *ret = bus->method_call_timeout;
+ return 0;
+ }
+
+ e = secure_getenv("SYSTEMD_BUS_TIMEOUT");
+ if (e && parse_sec(e, &usec) >= 0 && usec != 0) {
+ /* Save the parsed value to avoid multiple parsing. To change the timeout value,
+ * use sd_bus_set_method_call_timeout() instead of setenv(). */
+ *ret = bus->method_call_timeout = usec;
+ return 0;
+ }
+
+ *ret = bus->method_call_timeout = BUS_DEFAULT_TIMEOUT;
+ return 0;
+}
+
+_public_ int sd_bus_set_close_on_exit(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ bus->close_on_exit = b;
+ return 0;
+}
+
+_public_ int sd_bus_get_close_on_exit(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ return bus->close_on_exit;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-address.c b/src/libsystemd/sd-bus/test-bus-address.c
new file mode 100644
index 0000000..db5ff72
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-address.c
@@ -0,0 +1,69 @@
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+#include "log.h"
+#include "string-util.h"
+#include "strv.h"
+
+static void test_one_address(sd_bus *b,
+ const char *host,
+ int result, const char *expected) {
+ int r;
+
+ r = bus_set_address_system_remote(b, host);
+ log_info("\"%s\" → %d, \"%s\"", host, r, strna(r >= 0 ? b->address : NULL));
+ if (result < 0 || expected) {
+ assert(r == result);
+ if (r >= 0)
+ assert_se(streq(b->address, expected));
+ }
+}
+
+static void test_bus_set_address_system_remote(char **args) {
+ _cleanup_(sd_bus_unrefp) sd_bus *b = NULL;
+
+ assert_se(sd_bus_new(&b) >= 0);
+ if (!strv_isempty(args)) {
+ char **a;
+ STRV_FOREACH(a, args)
+ test_one_address(b, *a, 0, NULL);
+ return;
+ };
+
+ test_one_address(b, "host",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=host,argv4=systemd-stdio-bridge");
+ test_one_address(b, "host:123",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=-p,argv3=123,argv4=--,argv5=host,argv6=systemd-stdio-bridge");
+ test_one_address(b, "host:123:123",
+ -EINVAL, NULL);
+ test_one_address(b, "host:",
+ -EINVAL, NULL);
+ test_one_address(b, "user@host",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=user%40host,argv4=systemd-stdio-bridge");
+ test_one_address(b, "user@host@host",
+ -EINVAL, NULL);
+ test_one_address(b, "[::1]",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=%3a%3a1,argv4=systemd-stdio-bridge");
+ test_one_address(b, "user@[::1]",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=user%40%3a%3a1,argv4=systemd-stdio-bridge");
+ test_one_address(b, "user@[::1]:99",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=-p,argv3=99,argv4=--,argv5=user%40%3a%3a1,argv6=systemd-stdio-bridge");
+ test_one_address(b, "user@[::1]:",
+ -EINVAL, NULL);
+ test_one_address(b, "user@[::1:",
+ -EINVAL, NULL);
+ test_one_address(b, "user@",
+ -EINVAL, NULL);
+ test_one_address(b, "user@@",
+ -EINVAL, NULL);
+}
+
+int main(int argc, char *argv[]) {
+ log_set_max_level(LOG_INFO);
+ log_parse_environment();
+ log_open();
+
+ test_bus_set_address_system_remote(argv + 1);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-benchmark.c b/src/libsystemd/sd-bus/test-bus-benchmark.c
new file mode 100644
index 0000000..2dd3d41
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-benchmark.c
@@ -0,0 +1,323 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/wait.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-kernel.h"
+#include "bus-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "missing_resource.h"
+#include "time-util.h"
+#include "util.h"
+
+#define MAX_SIZE (2*1024*1024)
+
+static usec_t arg_loop_usec = 100 * USEC_PER_MSEC;
+
+typedef enum Type {
+ TYPE_LEGACY,
+ TYPE_DIRECT,
+} Type;
+
+static void server(sd_bus *b, size_t *result) {
+ int r;
+
+ for (;;) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = sd_bus_process(b, &m);
+ assert_se(r >= 0);
+
+ if (r == 0)
+ assert_se(sd_bus_wait(b, USEC_INFINITY) >= 0);
+ if (!m)
+ continue;
+
+ if (sd_bus_message_is_method_call(m, "benchmark.server", "Ping"))
+ assert_se(sd_bus_reply_method_return(m, NULL) >= 0);
+ else if (sd_bus_message_is_method_call(m, "benchmark.server", "Work")) {
+ const void *p;
+ size_t sz;
+
+ /* Make sure the mmap is mapped */
+ assert_se(sd_bus_message_read_array(m, 'y', &p, &sz) > 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+ } else if (sd_bus_message_is_method_call(m, "benchmark.server", "Exit")) {
+ uint64_t res;
+ assert_se(sd_bus_message_read(m, "t", &res) > 0);
+
+ *result = res;
+ return;
+
+ } else if (!sd_bus_message_is_signal(m, NULL, NULL))
+ assert_not_reached("Unknown method");
+ }
+}
+
+static void transaction(sd_bus *b, size_t sz, const char *server_name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ uint8_t *p;
+
+ assert_se(sd_bus_message_new_method_call(b, &m, server_name, "/", "benchmark.server", "Work") >= 0);
+ assert_se(sd_bus_message_append_array_space(m, 'y', sz, (void**) &p) >= 0);
+
+ memset(p, 0x80, sz);
+
+ assert_se(sd_bus_call(b, m, 0, NULL, &reply) >= 0);
+}
+
+static void client_bisect(const char *address, const char *server_name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *x = NULL;
+ size_t lsize, rsize, csize;
+ sd_bus *b;
+ int r;
+
+ r = sd_bus_new(&b);
+ assert_se(r >= 0);
+
+ r = sd_bus_set_address(b, address);
+ assert_se(r >= 0);
+
+ r = sd_bus_start(b);
+ assert_se(r >= 0);
+
+ r = sd_bus_call_method(b, server_name, "/", "benchmark.server", "Ping", NULL, NULL, NULL);
+ assert_se(r >= 0);
+
+ lsize = 1;
+ rsize = MAX_SIZE;
+
+ printf("SIZE\tCOPY\tMEMFD\n");
+
+ for (;;) {
+ usec_t t;
+ unsigned n_copying, n_memfd;
+
+ csize = (lsize + rsize) / 2;
+
+ if (csize <= lsize)
+ break;
+
+ if (csize <= 0)
+ break;
+
+ printf("%zu\t", csize);
+
+ b->use_memfd = 0;
+
+ t = now(CLOCK_MONOTONIC);
+ for (n_copying = 0;; n_copying++) {
+ transaction(b, csize, server_name);
+ if (now(CLOCK_MONOTONIC) >= t + arg_loop_usec)
+ break;
+ }
+ printf("%u\t", (unsigned) ((n_copying * USEC_PER_SEC) / arg_loop_usec));
+
+ b->use_memfd = -1;
+
+ t = now(CLOCK_MONOTONIC);
+ for (n_memfd = 0;; n_memfd++) {
+ transaction(b, csize, server_name);
+ if (now(CLOCK_MONOTONIC) >= t + arg_loop_usec)
+ break;
+ }
+ printf("%u\n", (unsigned) ((n_memfd * USEC_PER_SEC) / arg_loop_usec));
+
+ if (n_copying == n_memfd)
+ break;
+
+ if (n_copying > n_memfd)
+ lsize = csize;
+ else
+ rsize = csize;
+ }
+
+ b->use_memfd = 1;
+ assert_se(sd_bus_message_new_method_call(b, &x, server_name, "/", "benchmark.server", "Exit") >= 0);
+ assert_se(sd_bus_message_append(x, "t", csize) >= 0);
+ assert_se(sd_bus_send(b, x, NULL) >= 0);
+
+ sd_bus_unref(b);
+}
+
+static void client_chart(Type type, const char *address, const char *server_name, int fd) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *x = NULL;
+ size_t csize;
+ sd_bus *b;
+ int r;
+
+ r = sd_bus_new(&b);
+ assert_se(r >= 0);
+
+ if (type == TYPE_DIRECT) {
+ r = sd_bus_set_fd(b, fd, fd);
+ assert_se(r >= 0);
+ } else {
+ r = sd_bus_set_address(b, address);
+ assert_se(r >= 0);
+
+ r = sd_bus_set_bus_client(b, true);
+ assert_se(r >= 0);
+ }
+
+ r = sd_bus_start(b);
+ assert_se(r >= 0);
+
+ r = sd_bus_call_method(b, server_name, "/", "benchmark.server", "Ping", NULL, NULL, NULL);
+ assert_se(r >= 0);
+
+ switch (type) {
+ case TYPE_LEGACY:
+ printf("SIZE\tLEGACY\n");
+ break;
+ case TYPE_DIRECT:
+ printf("SIZE\tDIRECT\n");
+ break;
+ }
+
+ for (csize = 1; csize <= MAX_SIZE; csize *= 2) {
+ usec_t t;
+ unsigned n_memfd;
+
+ printf("%zu\t", csize);
+
+ t = now(CLOCK_MONOTONIC);
+ for (n_memfd = 0;; n_memfd++) {
+ transaction(b, csize, server_name);
+ if (now(CLOCK_MONOTONIC) >= t + arg_loop_usec)
+ break;
+ }
+
+ printf("%u\n", (unsigned) ((n_memfd * USEC_PER_SEC) / arg_loop_usec));
+ }
+
+ b->use_memfd = 1;
+ assert_se(sd_bus_message_new_method_call(b, &x, server_name, "/", "benchmark.server", "Exit") >= 0);
+ assert_se(sd_bus_message_append(x, "t", csize) >= 0);
+ assert_se(sd_bus_send(b, x, NULL) >= 0);
+
+ sd_bus_unref(b);
+}
+
+int main(int argc, char *argv[]) {
+ enum {
+ MODE_BISECT,
+ MODE_CHART,
+ } mode = MODE_BISECT;
+ Type type = TYPE_LEGACY;
+ int i, pair[2] = { -1, -1 };
+ _cleanup_free_ char *address = NULL, *server_name = NULL;
+ _cleanup_close_ int bus_ref = -1;
+ const char *unique;
+ cpu_set_t cpuset;
+ size_t result;
+ sd_bus *b;
+ pid_t pid;
+ int r;
+
+ for (i = 1; i < argc; i++) {
+ if (streq(argv[i], "chart")) {
+ mode = MODE_CHART;
+ continue;
+ } else if (streq(argv[i], "legacy")) {
+ type = TYPE_LEGACY;
+ continue;
+ } else if (streq(argv[i], "direct")) {
+ type = TYPE_DIRECT;
+ continue;
+ }
+
+ assert_se(parse_sec(argv[i], &arg_loop_usec) >= 0);
+ }
+
+ assert_se(arg_loop_usec > 0);
+
+ if (type == TYPE_LEGACY) {
+ const char *e;
+
+ e = secure_getenv("DBUS_SESSION_BUS_ADDRESS");
+ assert_se(e);
+
+ address = strdup(e);
+ assert_se(address);
+ }
+
+ r = sd_bus_new(&b);
+ assert_se(r >= 0);
+
+ if (type == TYPE_DIRECT) {
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, pair) >= 0);
+
+ r = sd_bus_set_fd(b, pair[0], pair[0]);
+ assert_se(r >= 0);
+
+ r = sd_bus_set_server(b, true, SD_ID128_NULL);
+ assert_se(r >= 0);
+ } else {
+ r = sd_bus_set_address(b, address);
+ assert_se(r >= 0);
+
+ r = sd_bus_set_bus_client(b, true);
+ assert_se(r >= 0);
+ }
+
+ r = sd_bus_start(b);
+ assert_se(r >= 0);
+
+ if (type != TYPE_DIRECT) {
+ r = sd_bus_get_unique_name(b, &unique);
+ assert_se(r >= 0);
+
+ server_name = strdup(unique);
+ assert_se(server_name);
+ }
+
+ sync();
+ setpriority(PRIO_PROCESS, 0, -19);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+
+ safe_close(bus_ref);
+ sd_bus_unref(b);
+
+ switch (mode) {
+ case MODE_BISECT:
+ client_bisect(address, server_name);
+ break;
+
+ case MODE_CHART:
+ client_chart(type, address, server_name, pair[1]);
+ break;
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+
+ server(b, &result);
+
+ if (mode == MODE_BISECT)
+ printf("Copying/memfd are equally fast at %zu bytes\n", result);
+
+ assert_se(waitpid(pid, NULL, 0) == pid);
+
+ safe_close(pair[1]);
+ sd_bus_unref(b);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-chat.c b/src/libsystemd/sd-bus/test-bus-chat.c
new file mode 100644
index 0000000..6181fb1
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-chat.c
@@ -0,0 +1,547 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-internal.h"
+#include "bus-match.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "macro.h"
+#include "tests.h"
+#include "util.h"
+
+static int match_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ log_info("Match triggered! interface=%s member=%s", strna(sd_bus_message_get_interface(m)), strna(sd_bus_message_get_member(m)));
+ return 0;
+}
+
+static int object_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ int r;
+
+ if (sd_bus_message_is_method_error(m, NULL))
+ return 0;
+
+ if (sd_bus_message_is_method_call(m, "org.object.test", "Foobar")) {
+ log_info("Invoked Foobar() on %s", sd_bus_message_get_path(m));
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send reply: %m");
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int server_init(sd_bus **_bus) {
+ sd_bus *bus = NULL;
+ sd_id128_t id;
+ int r;
+ const char *unique, *desc;
+
+ assert_se(_bus);
+
+ r = sd_bus_open_user_with_description(&bus, "my bus!");
+ if (r < 0) {
+ log_error_errno(r, "Failed to connect to user bus: %m");
+ goto fail;
+ }
+
+ r = sd_bus_get_bus_id(bus, &id);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get server ID: %m");
+ goto fail;
+ }
+
+ r = sd_bus_get_unique_name(bus, &unique);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get unique name: %m");
+ goto fail;
+ }
+
+ r = sd_bus_get_description(bus, &desc);
+ assert_se(streq(desc, "my bus!"));
+
+ log_info("Peer ID is " SD_ID128_FORMAT_STR ".", SD_ID128_FORMAT_VAL(id));
+ log_info("Unique ID: %s", unique);
+ log_info("Can send file handles: %i", sd_bus_can_send(bus, 'h'));
+
+ r = sd_bus_request_name(bus, "org.freedesktop.systemd.test", 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to acquire name: %m");
+ goto fail;
+ }
+
+ r = sd_bus_add_fallback(bus, NULL, "/foo/bar", object_callback, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add object: %m");
+ goto fail;
+ }
+
+ r = sd_bus_match_signal(bus, NULL, NULL, NULL, "foo.bar", "Notify", match_callback, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to request match: %m");
+ goto fail;
+ }
+
+ r = sd_bus_add_match(bus, NULL, "type='signal',interface='org.freedesktop.DBus',member='NameOwnerChanged'", match_callback, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add match: %m");
+ goto fail;
+ }
+
+ bus_match_dump(&bus->match_callbacks, 0);
+
+ *_bus = bus;
+ return 0;
+
+fail:
+ sd_bus_unref(bus);
+ return r;
+}
+
+static int server(sd_bus *bus) {
+ int r;
+ bool client1_gone = false, client2_gone = false;
+
+ while (!client1_gone || !client2_gone) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ pid_t pid = 0;
+ const char *label = NULL;
+
+ r = sd_bus_process(bus, &m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process requests: %m");
+ goto fail;
+ }
+
+ if (r == 0) {
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to wait: %m");
+ goto fail;
+ }
+
+ continue;
+ }
+
+ if (!m)
+ continue;
+
+ sd_bus_creds_get_pid(sd_bus_message_get_creds(m), &pid);
+ sd_bus_creds_get_selinux_context(sd_bus_message_get_creds(m), &label);
+ log_info("Got message! member=%s pid="PID_FMT" label=%s",
+ strna(sd_bus_message_get_member(m)),
+ pid,
+ strna(label));
+ /* bus_message_dump(m); */
+ /* sd_bus_message_rewind(m, true); */
+
+ if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "LowerCase")) {
+ const char *hello;
+ _cleanup_free_ char *lowercase = NULL;
+
+ r = sd_bus_message_read(m, "s", &hello);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get parameter: %m");
+ goto fail;
+ }
+
+ lowercase = strdup(hello);
+ if (!lowercase) {
+ r = log_oom();
+ goto fail;
+ }
+
+ ascii_strlower(lowercase);
+
+ r = sd_bus_reply_method_return(m, "s", lowercase);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "ExitClient1")) {
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+
+ client1_gone = true;
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "ExitClient2")) {
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+
+ client2_gone = true;
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "Slow")) {
+
+ sleep(1);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "FileDescriptor")) {
+ int fd;
+ static const char x = 'X';
+
+ r = sd_bus_message_read(m, "h", &fd);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get parameter: %m");
+ goto fail;
+ }
+
+ log_info("Received fd=%d", fd);
+
+ if (write(fd, &x, 1) < 0) {
+ log_error_errno(errno, "Failed to write to fd: %m");
+ safe_close(fd);
+ goto fail;
+ }
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+
+ } else if (sd_bus_message_is_method_call(m, NULL, NULL)) {
+
+ r = sd_bus_reply_method_error(
+ m,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_UNKNOWN_METHOD, "Unknown method."));
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+ }
+ }
+
+ r = 0;
+
+fail:
+ if (bus) {
+ sd_bus_flush(bus);
+ sd_bus_unref(bus);
+ }
+
+ return r;
+}
+
+static void* client1(void *p) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *hello;
+ int r;
+ _cleanup_close_pair_ int pp[2] = { -1, -1 };
+ char x;
+
+ r = sd_bus_open_user(&bus);
+ if (r < 0) {
+ log_error_errno(r, "Failed to connect to user bus: %m");
+ goto finish;
+ }
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "LowerCase",
+ &error,
+ &reply,
+ "s",
+ "HELLO");
+ if (r < 0) {
+ log_error_errno(r, "Failed to issue method call: %m");
+ goto finish;
+ }
+
+ r = sd_bus_message_read(reply, "s", &hello);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get string: %m");
+ goto finish;
+ }
+
+ assert_se(streq(hello, "hello"));
+
+ if (pipe2(pp, O_CLOEXEC|O_NONBLOCK) < 0) {
+ log_error_errno(errno, "Failed to allocate pipe: %m");
+ r = -errno;
+ goto finish;
+ }
+
+ log_info("Sending fd=%d", pp[1]);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "FileDescriptor",
+ &error,
+ NULL,
+ "h",
+ pp[1]);
+ if (r < 0) {
+ log_error_errno(r, "Failed to issue method call: %m");
+ goto finish;
+ }
+
+ errno = 0;
+ if (read(pp[0], &x, 1) <= 0) {
+ log_error("Failed to read from pipe: %s", errno ? strerror(errno) : "early read");
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ if (bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *q;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &q,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "ExitClient1");
+ if (r < 0)
+ log_error_errno(r, "Failed to allocate method call: %m");
+ else
+ sd_bus_send(bus, q, NULL);
+
+ }
+
+ return INT_TO_PTR(r);
+}
+
+static int quit_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ bool *x = userdata;
+
+ log_error_errno(sd_bus_message_get_errno(m), "Quit callback: %m");
+
+ *x = 1;
+ return 1;
+}
+
+static void* client2(void *p) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool quit = false;
+ const char *mid;
+ int r;
+
+ r = sd_bus_open_user(&bus);
+ if (r < 0) {
+ log_error_errno(r, "Failed to connect to user bus: %m");
+ goto finish;
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/foo/bar/waldo/piep",
+ "org.object.test",
+ "Foobar");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0) {
+ log_error("Failed to issue method call: %s", bus_error_message(&error, -r));
+ goto finish;
+ }
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/foobar",
+ "foo.bar",
+ "Notify");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate signal: %m");
+ goto finish;
+ }
+
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0) {
+ log_error("Failed to issue signal: %s", bus_error_message(&error, -r));
+ goto finish;
+ }
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.DBus.Peer",
+ "GetMachineId");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0) {
+ log_error("Failed to issue method call: %s", bus_error_message(&error, -r));
+ goto finish;
+ }
+
+ r = sd_bus_message_read(reply, "s", &mid);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse machine ID: %m");
+ goto finish;
+ }
+
+ log_info("Machine ID is %s.", mid);
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "Slow");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call(bus, m, 200 * USEC_PER_MSEC, &error, &reply);
+ if (r < 0)
+ log_info("Failed to issue method call: %s", bus_error_message(&error, -r));
+ else
+ log_info("Slow call succeed.");
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "Slow");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ r = sd_bus_call_async(bus, NULL, m, quit_callback, &quit, 200 * USEC_PER_MSEC);
+ if (r < 0) {
+ log_info("Failed to issue method call: %s", bus_error_message(&error, -r));
+ goto finish;
+ }
+
+ while (!quit) {
+ r = sd_bus_process(bus, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process requests: %m");
+ goto finish;
+ }
+ if (r == 0) {
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to wait: %m");
+ goto finish;
+ }
+ }
+ }
+
+ r = 0;
+
+finish:
+ if (bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *q;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &q,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "ExitClient2");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ (void) sd_bus_send(bus, q, NULL);
+ }
+
+ return INT_TO_PTR(r);
+}
+
+int main(int argc, char *argv[]) {
+ pthread_t c1, c2;
+ sd_bus *bus;
+ void *p;
+ int q, r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = server_init(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ log_info("Initialized...");
+
+ r = pthread_create(&c1, NULL, client1, bus);
+ if (r != 0)
+ return EXIT_FAILURE;
+
+ r = pthread_create(&c2, NULL, client2, bus);
+ if (r != 0)
+ return EXIT_FAILURE;
+
+ r = server(bus);
+
+ q = pthread_join(c1, &p);
+ if (q != 0)
+ return EXIT_FAILURE;
+ if (PTR_TO_INT(p) < 0)
+ return EXIT_FAILURE;
+
+ q = pthread_join(c2, &p);
+ if (q != 0)
+ return EXIT_FAILURE;
+ if (PTR_TO_INT(p) < 0)
+ return EXIT_FAILURE;
+
+ if (r < 0)
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-cleanup.c b/src/libsystemd/sd-bus/test-bus-cleanup.c
new file mode 100644
index 0000000..bea722b
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-cleanup.c
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "refcnt.h"
+#include "tests.h"
+
+static bool use_system_bus = false;
+
+static void test_bus_new(void) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+
+ assert_se(sd_bus_new(&bus) == 0);
+ printf("after new: refcount %u\n", REFCNT_GET(bus->n_ref));
+}
+
+static int test_bus_open(void) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ r = sd_bus_open_user(&bus);
+ if (IN_SET(r, -ECONNREFUSED, -ENOENT)) {
+ r = sd_bus_open_system(&bus);
+ if (IN_SET(r, -ECONNREFUSED, -ENOENT))
+ return r;
+ use_system_bus = true;
+ }
+
+ assert_se(r >= 0);
+ printf("after open: refcount %u\n", REFCNT_GET(bus->n_ref));
+
+ return 0;
+}
+
+static void test_bus_new_method_call(void) {
+ sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ assert_se(use_system_bus ? sd_bus_open_system(&bus) >= 0 : sd_bus_open_user(&bus) >= 0);
+
+ assert_se(sd_bus_message_new_method_call(bus, &m, "a.service.name", "/an/object/path", "an.interface.name", "AMethodName") >= 0);
+
+ printf("after message_new_method_call: refcount %u\n", REFCNT_GET(bus->n_ref));
+
+ sd_bus_flush_close_unref(bus);
+ printf("after bus_flush_close_unref: refcount %u\n", m->n_ref);
+}
+
+static void test_bus_new_signal(void) {
+ sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ assert_se(use_system_bus ? sd_bus_open_system(&bus) >= 0 : sd_bus_open_user(&bus) >= 0);
+
+ assert_se(sd_bus_message_new_signal(bus, &m, "/an/object/path", "an.interface.name", "Name") >= 0);
+
+ printf("after message_new_signal: refcount %u\n", REFCNT_GET(bus->n_ref));
+
+ sd_bus_flush_close_unref(bus);
+ printf("after bus_flush_close_unref: refcount %u\n", m->n_ref);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_INFO);
+
+ test_bus_new();
+
+ if (test_bus_open() < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ test_bus_new_method_call();
+ test_bus_new_signal();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-creds.c b/src/libsystemd/sd-bus/test-bus-creds.c
new file mode 100644
index 0000000..c02c459
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-creds.c
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-bus.h"
+
+#include "bus-dump.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (cg_unified_flush() == -ENOMEDIUM)
+ return log_tests_skipped("/sys/fs/cgroup/ not available");
+
+ r = sd_bus_creds_new_from_pid(&creds, 0, _SD_BUS_CREDS_ALL);
+ log_full_errno(r < 0 ? LOG_ERR : LOG_DEBUG, r, "sd_bus_creds_new_from_pid: %m");
+ assert_se(r >= 0);
+
+ bus_creds_dump(creds, NULL, true);
+
+ creds = sd_bus_creds_unref(creds);
+
+ r = sd_bus_creds_new_from_pid(&creds, 1, _SD_BUS_CREDS_ALL);
+ if (r != -EACCES) {
+ assert_se(r >= 0);
+ putchar('\n');
+ bus_creds_dump(creds, NULL, true);
+ }
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-error.c b/src/libsystemd/sd-bus/test-bus-error.c
new file mode 100644
index 0000000..f464b5b
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-error.c
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-bus.h"
+
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "errno-list.h"
+
+static void test_error(void) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL, second = SD_BUS_ERROR_NULL;
+ const sd_bus_error const_error = SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FILE_EXISTS, "const error");
+ const sd_bus_error temporarily_const_error = {
+ .name = SD_BUS_ERROR_ACCESS_DENIED,
+ .message = "oh! no",
+ ._need_free = -1
+ };
+
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set(&error, SD_BUS_ERROR_NOT_SUPPORTED, "xxx") == -EOPNOTSUPP);
+ assert_se(streq(error.name, SD_BUS_ERROR_NOT_SUPPORTED));
+ assert_se(streq(error.message, "xxx"));
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_NOT_SUPPORTED));
+ assert_se(sd_bus_error_get_errno(&error) == EOPNOTSUPP);
+ assert_se(sd_bus_error_is_set(&error));
+ sd_bus_error_free(&error);
+
+ /* Check with no error */
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_setf(&error, NULL, "yyy %i", -1) == 0);
+ assert_se(error.name == NULL);
+ assert_se(error.message == NULL);
+ assert_se(!sd_bus_error_has_name(&error, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_get_errno(&error) == 0);
+ assert_se(!sd_bus_error_is_set(&error));
+
+ assert_se(sd_bus_error_setf(&error, SD_BUS_ERROR_FILE_NOT_FOUND, "yyy %i", -1) == -ENOENT);
+ assert_se(streq(error.name, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(streq(error.message, "yyy -1"));
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_get_errno(&error) == ENOENT);
+ assert_se(sd_bus_error_is_set(&error));
+
+ assert_se(!sd_bus_error_is_set(&second));
+ assert_se(second._need_free == 0);
+ assert_se(error._need_free > 0);
+ assert_se(sd_bus_error_copy(&second, &error) == -ENOENT);
+ assert_se(second._need_free > 0);
+ assert_se(streq(error.name, second.name));
+ assert_se(streq(error.message, second.message));
+ assert_se(sd_bus_error_get_errno(&second) == ENOENT);
+ assert_se(sd_bus_error_has_name(&second, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_is_set(&second));
+
+ sd_bus_error_free(&error);
+ sd_bus_error_free(&second);
+
+ assert_se(!sd_bus_error_is_set(&second));
+ assert_se(const_error._need_free == 0);
+ assert_se(sd_bus_error_copy(&second, &const_error) == -EEXIST);
+ assert_se(second._need_free == 0);
+ assert_se(streq(const_error.name, second.name));
+ assert_se(streq(const_error.message, second.message));
+ assert_se(sd_bus_error_get_errno(&second) == EEXIST);
+ assert_se(sd_bus_error_has_name(&second, SD_BUS_ERROR_FILE_EXISTS));
+ assert_se(sd_bus_error_is_set(&second));
+ sd_bus_error_free(&second);
+
+ assert_se(!sd_bus_error_is_set(&second));
+ assert_se(temporarily_const_error._need_free < 0);
+ assert_se(sd_bus_error_copy(&second, &temporarily_const_error) == -EACCES);
+ assert_se(second._need_free > 0);
+ assert_se(streq(temporarily_const_error.name, second.name));
+ assert_se(streq(temporarily_const_error.message, second.message));
+ assert_se(sd_bus_error_get_errno(&second) == EACCES);
+ assert_se(sd_bus_error_has_name(&second, SD_BUS_ERROR_ACCESS_DENIED));
+ assert_se(sd_bus_error_is_set(&second));
+
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set_const(&error, "System.Error.EUCLEAN", "Hallo") == -EUCLEAN);
+ assert_se(streq(error.name, "System.Error.EUCLEAN"));
+ assert_se(streq(error.message, "Hallo"));
+ assert_se(sd_bus_error_has_name(&error, "System.Error.EUCLEAN"));
+ assert_se(sd_bus_error_get_errno(&error) == EUCLEAN);
+ assert_se(sd_bus_error_is_set(&error));
+ sd_bus_error_free(&error);
+
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set_errno(&error, EBUSY) == -EBUSY);
+ assert_se(streq(error.name, "System.Error.EBUSY"));
+ assert_se(streq(error.message, strerror(EBUSY)));
+ assert_se(sd_bus_error_has_name(&error, "System.Error.EBUSY"));
+ assert_se(sd_bus_error_get_errno(&error) == EBUSY);
+ assert_se(sd_bus_error_is_set(&error));
+ sd_bus_error_free(&error);
+
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set_errnof(&error, EIO, "Waldi %c", 'X') == -EIO);
+ assert_se(streq(error.name, SD_BUS_ERROR_IO_ERROR));
+ assert_se(streq(error.message, "Waldi X"));
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_IO_ERROR));
+ assert_se(sd_bus_error_get_errno(&error) == EIO);
+ assert_se(sd_bus_error_is_set(&error));
+ sd_bus_error_free(&error);
+
+ /* Check with no error */
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set_errnof(&error, 0, "Waldi %c", 'X') == 0);
+ assert_se(error.name == NULL);
+ assert_se(error.message == NULL);
+ assert_se(!sd_bus_error_has_name(&error, SD_BUS_ERROR_IO_ERROR));
+ assert_se(sd_bus_error_get_errno(&error) == 0);
+ assert_se(!sd_bus_error_is_set(&error));
+}
+
+extern const sd_bus_error_map __start_SYSTEMD_BUS_ERROR_MAP[];
+extern const sd_bus_error_map __stop_SYSTEMD_BUS_ERROR_MAP[];
+
+static void dump_mapping_table(void) {
+ const sd_bus_error_map *m;
+
+ printf("----- errno mappings ------\n");
+ m = ALIGN_TO_PTR(__start_SYSTEMD_BUS_ERROR_MAP, sizeof(void*));
+ while (m < __stop_SYSTEMD_BUS_ERROR_MAP) {
+
+ if (m->code == BUS_ERROR_MAP_END_MARKER) {
+ m = ALIGN_TO_PTR(m + 1, sizeof(void*));
+ continue;
+ }
+
+ printf("%s -> %i/%s\n", strna(m->name), m->code, strna(errno_to_name(m->code)));
+ m++;
+ }
+ printf("---------------------------\n");
+}
+
+static void test_errno_mapping_standard(void) {
+ assert_se(sd_bus_error_set(NULL, "System.Error.EUCLEAN", NULL) == -EUCLEAN);
+ assert_se(sd_bus_error_set(NULL, "System.Error.EBUSY", NULL) == -EBUSY);
+ assert_se(sd_bus_error_set(NULL, "System.Error.EINVAL", NULL) == -EINVAL);
+ assert_se(sd_bus_error_set(NULL, "System.Error.WHATSIT", NULL) == -EIO);
+}
+
+BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map test_errors[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error", 5),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-2", 52),
+ SD_BUS_ERROR_MAP_END
+};
+
+BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map test_errors2[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-3", 33),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-4", 44),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-33", 333),
+ SD_BUS_ERROR_MAP_END
+};
+
+static const sd_bus_error_map test_errors3[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-88", 888),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-99", 999),
+ SD_BUS_ERROR_MAP_END
+};
+
+static const sd_bus_error_map test_errors4[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-77", 777),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-78", 778),
+ SD_BUS_ERROR_MAP_END
+};
+
+static const sd_bus_error_map test_errors_bad1[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-1", 0),
+ SD_BUS_ERROR_MAP_END
+};
+
+static const sd_bus_error_map test_errors_bad2[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-1", -1),
+ SD_BUS_ERROR_MAP_END
+};
+
+static void test_errno_mapping_custom(void) {
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error", NULL) == -5);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-2", NULL) == -52);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-x", NULL) == -EIO);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-33", NULL) == -333);
+
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-88", NULL) == -EIO);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-99", NULL) == -EIO);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-77", NULL) == -EIO);
+
+ assert_se(sd_bus_error_add_map(test_errors3) > 0);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-88", NULL) == -888);
+ assert_se(sd_bus_error_add_map(test_errors4) > 0);
+ assert_se(sd_bus_error_add_map(test_errors4) == 0);
+ assert_se(sd_bus_error_add_map(test_errors3) == 0);
+
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-99", NULL) == -999);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-77", NULL) == -777);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-78", NULL) == -778);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-2", NULL) == -52);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-y", NULL) == -EIO);
+
+ assert_se(sd_bus_error_set(NULL, BUS_ERROR_NO_SUCH_UNIT, NULL) == -ENOENT);
+
+ assert_se(sd_bus_error_add_map(test_errors_bad1) == -EINVAL);
+ assert_se(sd_bus_error_add_map(test_errors_bad2) == -EINVAL);
+}
+
+int main(int argc, char *argv[]) {
+ dump_mapping_table();
+
+ test_error();
+ test_errno_mapping_standard();
+ test_errno_mapping_custom();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-gvariant.c b/src/libsystemd/sd-bus/test-bus-gvariant.c
new file mode 100644
index 0000000..1a9a35d
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-gvariant.c
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_GLIB
+#include <glib.h>
+#endif
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-gvariant.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "macro.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_bus_gvariant_is_fixed_size(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(bus_gvariant_is_fixed_size("") > 0);
+ assert_se(bus_gvariant_is_fixed_size("()") == -EINVAL);
+ assert_se(bus_gvariant_is_fixed_size("y") > 0);
+ assert_se(bus_gvariant_is_fixed_size("u") > 0);
+ assert_se(bus_gvariant_is_fixed_size("b") > 0);
+ assert_se(bus_gvariant_is_fixed_size("n") > 0);
+ assert_se(bus_gvariant_is_fixed_size("q") > 0);
+ assert_se(bus_gvariant_is_fixed_size("i") > 0);
+ assert_se(bus_gvariant_is_fixed_size("t") > 0);
+ assert_se(bus_gvariant_is_fixed_size("d") > 0);
+ assert_se(bus_gvariant_is_fixed_size("s") == 0);
+ assert_se(bus_gvariant_is_fixed_size("o") == 0);
+ assert_se(bus_gvariant_is_fixed_size("g") == 0);
+ assert_se(bus_gvariant_is_fixed_size("h") > 0);
+ assert_se(bus_gvariant_is_fixed_size("ay") == 0);
+ assert_se(bus_gvariant_is_fixed_size("v") == 0);
+ assert_se(bus_gvariant_is_fixed_size("(u)") > 0);
+ assert_se(bus_gvariant_is_fixed_size("(uuuuy)") > 0);
+ assert_se(bus_gvariant_is_fixed_size("(uusuuy)") == 0);
+ assert_se(bus_gvariant_is_fixed_size("a{ss}") == 0);
+ assert_se(bus_gvariant_is_fixed_size("((u)yyy(b(iiii)))") > 0);
+ assert_se(bus_gvariant_is_fixed_size("((u)yyy(b(iiivi)))") == 0);
+}
+
+static void test_bus_gvariant_get_size(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(bus_gvariant_get_size("") == 0);
+ assert_se(bus_gvariant_get_size("()") == -EINVAL);
+ assert_se(bus_gvariant_get_size("y") == 1);
+ assert_se(bus_gvariant_get_size("u") == 4);
+ assert_se(bus_gvariant_get_size("b") == 1);
+ assert_se(bus_gvariant_get_size("n") == 2);
+ assert_se(bus_gvariant_get_size("q") == 2);
+ assert_se(bus_gvariant_get_size("i") == 4);
+ assert_se(bus_gvariant_get_size("t") == 8);
+ assert_se(bus_gvariant_get_size("d") == 8);
+ assert_se(bus_gvariant_get_size("s") < 0);
+ assert_se(bus_gvariant_get_size("o") < 0);
+ assert_se(bus_gvariant_get_size("g") < 0);
+ assert_se(bus_gvariant_get_size("h") == 4);
+ assert_se(bus_gvariant_get_size("ay") < 0);
+ assert_se(bus_gvariant_get_size("v") < 0);
+ assert_se(bus_gvariant_get_size("(u)") == 4);
+ assert_se(bus_gvariant_get_size("(uuuuy)") == 20);
+ assert_se(bus_gvariant_get_size("(uusuuy)") < 0);
+ assert_se(bus_gvariant_get_size("a{ss}") < 0);
+ assert_se(bus_gvariant_get_size("((u)yyy(b(iiii)))") == 28);
+ assert_se(bus_gvariant_get_size("((u)yyy(b(iiivi)))") < 0);
+ assert_se(bus_gvariant_get_size("((b)(t))") == 16);
+ assert_se(bus_gvariant_get_size("((b)(b)(t))") == 16);
+ assert_se(bus_gvariant_get_size("(bt)") == 16);
+ assert_se(bus_gvariant_get_size("((t)(b))") == 16);
+ assert_se(bus_gvariant_get_size("(tb)") == 16);
+ assert_se(bus_gvariant_get_size("((b)(b))") == 2);
+ assert_se(bus_gvariant_get_size("((t)(t))") == 16);
+}
+
+static void test_bus_gvariant_get_alignment(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(bus_gvariant_get_alignment("") == 1);
+ assert_se(bus_gvariant_get_alignment("()") == -EINVAL);
+ assert_se(bus_gvariant_get_alignment("y") == 1);
+ assert_se(bus_gvariant_get_alignment("b") == 1);
+ assert_se(bus_gvariant_get_alignment("u") == 4);
+ assert_se(bus_gvariant_get_alignment("s") == 1);
+ assert_se(bus_gvariant_get_alignment("o") == 1);
+ assert_se(bus_gvariant_get_alignment("g") == 1);
+ assert_se(bus_gvariant_get_alignment("v") == 8);
+ assert_se(bus_gvariant_get_alignment("h") == 4);
+ assert_se(bus_gvariant_get_alignment("i") == 4);
+ assert_se(bus_gvariant_get_alignment("t") == 8);
+ assert_se(bus_gvariant_get_alignment("x") == 8);
+ assert_se(bus_gvariant_get_alignment("q") == 2);
+ assert_se(bus_gvariant_get_alignment("n") == 2);
+ assert_se(bus_gvariant_get_alignment("d") == 8);
+ assert_se(bus_gvariant_get_alignment("ay") == 1);
+ assert_se(bus_gvariant_get_alignment("as") == 1);
+ assert_se(bus_gvariant_get_alignment("au") == 4);
+ assert_se(bus_gvariant_get_alignment("an") == 2);
+ assert_se(bus_gvariant_get_alignment("ans") == 2);
+ assert_se(bus_gvariant_get_alignment("ant") == 8);
+ assert_se(bus_gvariant_get_alignment("(ss)") == 1);
+ assert_se(bus_gvariant_get_alignment("(ssu)") == 4);
+ assert_se(bus_gvariant_get_alignment("a(ssu)") == 4);
+ assert_se(bus_gvariant_get_alignment("(u)") == 4);
+ assert_se(bus_gvariant_get_alignment("(uuuuy)") == 4);
+ assert_se(bus_gvariant_get_alignment("(uusuuy)") == 4);
+ assert_se(bus_gvariant_get_alignment("a{ss}") == 1);
+ assert_se(bus_gvariant_get_alignment("((u)yyy(b(iiii)))") == 4);
+ assert_se(bus_gvariant_get_alignment("((u)yyy(b(iiivi)))") == 8);
+ assert_se(bus_gvariant_get_alignment("((b)(t))") == 8);
+ assert_se(bus_gvariant_get_alignment("((b)(b)(t))") == 8);
+ assert_se(bus_gvariant_get_alignment("(bt)") == 8);
+ assert_se(bus_gvariant_get_alignment("((t)(b))") == 8);
+ assert_se(bus_gvariant_get_alignment("(tb)") == 8);
+ assert_se(bus_gvariant_get_alignment("((b)(b))") == 1);
+ assert_se(bus_gvariant_get_alignment("((t)(t))") == 8);
+}
+
+static int test_marshal(void) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *n = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ void *blob = NULL;
+ size_t sz;
+ int r;
+
+ r = sd_bus_open_user(&bus);
+ if (r < 0)
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_tests_skipped_errno(r, "Failed to connect to bus");
+
+ bus->message_version = 2; /* dirty hack to enable gvariant */
+
+ r = sd_bus_message_new_method_call(bus, &m, "a.service.name",
+ "/an/object/path/which/is/really/really/long/so/that/we/hit/the/eight/bit/boundary/by/quite/some/margin/to/test/this/stuff/that/it/really/works",
+ "an.interface.name", "AMethodName");
+ assert_se(r >= 0);
+
+ assert_cc(sizeof(struct bus_header) == 16);
+
+ assert_se(sd_bus_message_append(m,
+ "a(usv)", 3,
+ 4711, "first-string-parameter", "(st)", "X", (uint64_t) 1111,
+ 4712, "second-string-parameter", "(a(si))", 2, "Y", 5, "Z", 6,
+ 4713, "third-string-parameter", "(uu)", 1, 2) >= 0);
+
+ assert_se(sd_bus_message_seal(m, 4711, 0) >= 0);
+
+#if HAVE_GLIB
+ {
+ GVariant *v;
+ char *t;
+
+#if !defined(GLIB_VERSION_2_36)
+ g_type_init();
+#endif
+
+ v = g_variant_new_from_data(G_VARIANT_TYPE("(yyyyuta{tv})"), m->header, sizeof(struct bus_header) + m->fields_size, false, NULL, NULL);
+ assert_se(g_variant_is_normal_form(v));
+ t = g_variant_print(v, TRUE);
+ printf("%s\n", t);
+ g_free(t);
+ g_variant_unref(v);
+
+ v = g_variant_new_from_data(G_VARIANT_TYPE("(a(usv))"), m->body.data, m->user_body_size, false, NULL, NULL);
+ assert_se(g_variant_is_normal_form(v));
+ t = g_variant_print(v, TRUE);
+ printf("%s\n", t);
+ g_free(t);
+ g_variant_unref(v);
+ }
+#endif
+
+ assert_se(bus_message_dump(m, NULL, BUS_MESSAGE_DUMP_WITH_HEADER) >= 0);
+
+ assert_se(bus_message_get_blob(m, &blob, &sz) >= 0);
+
+#if HAVE_GLIB
+ {
+ GVariant *v;
+ char *t;
+
+ v = g_variant_new_from_data(G_VARIANT_TYPE("(yyyyuta{tv}v)"), blob, sz, false, NULL, NULL);
+ assert_se(g_variant_is_normal_form(v));
+ t = g_variant_print(v, TRUE);
+ printf("%s\n", t);
+ g_free(t);
+ g_variant_unref(v);
+ }
+#endif
+
+ assert_se(bus_message_from_malloc(bus, blob, sz, NULL, 0, NULL, &n) >= 0);
+ blob = NULL;
+
+ assert_se(bus_message_dump(n, NULL, BUS_MESSAGE_DUMP_WITH_HEADER) >= 0);
+
+ m = sd_bus_message_unref(m);
+
+ assert_se(sd_bus_message_new_method_call(bus, &m, "a.x", "/a/x", "a.x", "Ax") >= 0);
+
+ assert_se(sd_bus_message_append(m, "as", 0) >= 0);
+
+ assert_se(sd_bus_message_seal(m, 4712, 0) >= 0);
+ assert_se(bus_message_dump(m, NULL, BUS_MESSAGE_DUMP_WITH_HEADER) >= 0);
+
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_bus_gvariant_is_fixed_size();
+ test_bus_gvariant_get_size();
+ test_bus_gvariant_get_alignment();
+
+ return test_marshal();
+}
diff --git a/src/libsystemd/sd-bus/test-bus-introspect.c b/src/libsystemd/sd-bus/test-bus-introspect.c
new file mode 100644
index 0000000..940dbfe
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-introspect.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "bus-introspect.h"
+#include "log.h"
+
+static int prop_get(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ return -EINVAL;
+}
+
+static int prop_set(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ return -EINVAL;
+}
+
+static const sd_bus_vtable vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("Hello", "ssas", "a(uu)", NULL, 0),
+ SD_BUS_METHOD("DeprecatedHello", "", "", NULL, SD_BUS_VTABLE_DEPRECATED),
+ SD_BUS_METHOD("DeprecatedHelloNoReply", "", "", NULL, SD_BUS_VTABLE_DEPRECATED|SD_BUS_VTABLE_METHOD_NO_REPLY),
+ SD_BUS_SIGNAL("Wowza", "sss", 0),
+ SD_BUS_SIGNAL("DeprecatedWowza", "ut", SD_BUS_VTABLE_DEPRECATED),
+ SD_BUS_WRITABLE_PROPERTY("AProperty", "s", prop_get, prop_set, 0, 0),
+ SD_BUS_PROPERTY("AReadOnlyDeprecatedProperty", "(ut)", prop_get, 0, SD_BUS_VTABLE_DEPRECATED),
+ SD_BUS_PROPERTY("ChangingProperty", "t", prop_get, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Invalidating", "t", prop_get, 0, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("Constant", "t", prop_get, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_PROPERTY_EXPLICIT),
+ SD_BUS_VTABLE_END
+};
+
+int main(int argc, char *argv[]) {
+ struct introspect intro;
+
+ log_set_max_level(LOG_DEBUG);
+
+ assert_se(introspect_begin(&intro, false) >= 0);
+
+ fprintf(intro.f, " <interface name=\"org.foo\">\n");
+ assert_se(introspect_write_interface(&intro, vtable) >= 0);
+ fputs(" </interface>\n", intro.f);
+
+ fflush(intro.f);
+ fputs(intro.introspection, stdout);
+
+ introspect_free(&intro);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-marshal.c b/src/libsystemd/sd-bus/test-bus-marshal.c
new file mode 100644
index 0000000..1e9810c
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-marshal.c
@@ -0,0 +1,420 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <math.h>
+#include <stdlib.h>
+
+#if HAVE_GLIB
+#include <gio/gio.h>
+#endif
+
+#if HAVE_DBUS
+#include <dbus/dbus.h>
+#endif
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "log.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_bus_path_encode_unique(void) {
+ _cleanup_free_ char *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL;
+
+ assert_se(bus_path_encode_unique(NULL, "/foo/bar", "some.sender", "a.suffix", &a) >= 0 && streq_ptr(a, "/foo/bar/some_2esender/a_2esuffix"));
+ assert_se(bus_path_decode_unique(a, "/foo/bar", &b, &c) > 0 && streq_ptr(b, "some.sender") && streq_ptr(c, "a.suffix"));
+ assert_se(bus_path_decode_unique(a, "/bar/foo", &d, &d) == 0 && !d);
+ assert_se(bus_path_decode_unique("/foo/bar/onlyOneSuffix", "/foo/bar", &d, &d) == 0 && !d);
+ assert_se(bus_path_decode_unique("/foo/bar/_/_", "/foo/bar", &d, &e) > 0 && streq_ptr(d, "") && streq_ptr(e, ""));
+}
+
+static void test_bus_path_encode(void) {
+ _cleanup_free_ char *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *f = NULL;
+
+ assert_se(sd_bus_path_encode("/foo/bar", "waldo", &a) >= 0 && streq(a, "/foo/bar/waldo"));
+ assert_se(sd_bus_path_decode(a, "/waldo", &b) == 0 && b == NULL);
+ assert_se(sd_bus_path_decode(a, "/foo/bar", &b) > 0 && streq(b, "waldo"));
+
+ assert_se(sd_bus_path_encode("xxxx", "waldo", &c) < 0);
+ assert_se(sd_bus_path_encode("/foo/", "waldo", &c) < 0);
+
+ assert_se(sd_bus_path_encode("/foo/bar", "", &c) >= 0 && streq(c, "/foo/bar/_"));
+ assert_se(sd_bus_path_decode(c, "/foo/bar", &d) > 0 && streq(d, ""));
+
+ assert_se(sd_bus_path_encode("/foo/bar", "foo.bar", &e) >= 0 && streq(e, "/foo/bar/foo_2ebar"));
+ assert_se(sd_bus_path_decode(e, "/foo/bar", &f) > 0 && streq(f, "foo.bar"));
+}
+
+static void test_bus_path_encode_many(void) {
+ _cleanup_free_ char *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *f = NULL;
+
+ assert_se(sd_bus_path_decode_many("/foo/bar", "/prefix/%", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/prefix/bar", "/prefix/%bar", NULL) == 1);
+ assert_se(sd_bus_path_decode_many("/foo/bar", "/prefix/%/suffix", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/prefix/foobar/suffix", "/prefix/%/suffix", &a) == 1 && streq_ptr(a, "foobar"));
+ assert_se(sd_bus_path_decode_many("/prefix/one_foo_two/mid/three_bar_four/suffix", "/prefix/one_%_two/mid/three_%_four/suffix", &b, &c) == 1 && streq_ptr(b, "foo") && streq_ptr(c, "bar"));
+ assert_se(sd_bus_path_decode_many("/prefix/one_foo_two/mid/three_bar_four/suffix", "/prefix/one_%_two/mid/three_%_four/suffix", NULL, &d) == 1 && streq_ptr(d, "bar"));
+
+ assert_se(sd_bus_path_decode_many("/foo/bar", "/foo/bar/%", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/bar%", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/bar", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%bar", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/bar/suffix") == 1);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%%/suffix", NULL, NULL) == 0); /* multiple '%' are treated verbatim */
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/suffi", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/suffix", &e) == 1 && streq_ptr(e, "bar"));
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/%", NULL, NULL) == 1);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/%/%", NULL, NULL, NULL) == 1);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "%/%/%", NULL, NULL, NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/%", NULL, NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/%/", NULL, NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "%", NULL) == 0);
+
+ assert_se(sd_bus_path_encode_many(&f, "/prefix/one_%_two/mid/three_%_four/suffix", "foo", "bar") >= 0 && streq_ptr(f, "/prefix/one_foo_two/mid/three_bar_four/suffix"));
+}
+
+static void test_bus_label_escape_one(const char *a, const char *b) {
+ _cleanup_free_ char *t = NULL, *x = NULL, *y = NULL;
+
+ assert_se(t = bus_label_escape(a));
+ assert_se(streq(t, b));
+
+ assert_se(x = bus_label_unescape(t));
+ assert_se(streq(a, x));
+
+ assert_se(y = bus_label_unescape(b));
+ assert_se(streq(a, y));
+}
+
+static void test_bus_label_escape(void) {
+ test_bus_label_escape_one("foo123bar", "foo123bar");
+ test_bus_label_escape_one("foo.bar", "foo_2ebar");
+ test_bus_label_escape_one("foo_2ebar", "foo_5f2ebar");
+ test_bus_label_escape_one("", "_");
+ test_bus_label_escape_one("_", "_5f");
+ test_bus_label_escape_one("1", "_31");
+ test_bus_label_escape_one(":1", "_3a1");
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *copy = NULL;
+ int r, boolean;
+ const char *x, *x2, *y, *z, *a, *b, *c, *d, *a_signature;
+ uint8_t u, v;
+ void *buffer = NULL;
+ size_t sz;
+ _cleanup_free_ char *h = NULL;
+ const int32_t integer_array[] = { -1, -2, 0, 1, 2 }, *return_array;
+ char *s;
+ _cleanup_free_ char *first = NULL, *second = NULL, *third = NULL;
+ _cleanup_fclose_ FILE *ms = NULL;
+ size_t first_size = 0, second_size = 0, third_size = 0;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ double dbl;
+ uint64_t u64;
+
+ test_setup_logging(LOG_INFO);
+
+ r = sd_bus_default_user(&bus);
+ if (r < 0)
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ r = sd_bus_message_new_method_call(bus, &m, "foobar.waldo", "/", "foobar.waldo", "Piep");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "s", "a string");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "s", NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "asg", 2, "string #1", "string #2", "sba(tt)ss");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "sass", "foobar", 5, "foo", "bar", "waldo", "piep", "pap", "after");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "a{yv}", 2, 3, "s", "foo", 5, "s", "waldo");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "y(ty)y(yt)y", 8, 777ULL, 7, 9, 77, 7777ULL, 10);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "()");
+ assert_se(r == -EINVAL);
+
+ r = sd_bus_message_append(m, "ba(ss)", 255, 3, "aaa", "1", "bbb", "2", "ccc", "3");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append_basic(m, 's', "foobar");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append_basic(m, 's', "waldo");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_close_container(m);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append_string_space(m, 5, &s);
+ assert_se(r >= 0);
+ strcpy(s, "hallo");
+
+ r = sd_bus_message_append_array(m, 'i', integer_array, sizeof(integer_array));
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append_array(m, 'u', NULL, 0);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "a(stdo)", 1, "foo", 815ULL, 47.0, "/");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_seal(m, 4711, 0);
+ assert_se(r >= 0);
+
+ bus_message_dump(m, stdout, BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ ms = open_memstream(&first, &first_size);
+ bus_message_dump(m, ms, 0);
+ fflush(ms);
+ assert_se(!ferror(ms));
+
+ r = bus_message_get_blob(m, &buffer, &sz);
+ assert_se(r >= 0);
+
+ h = cescape_length(buffer, sz);
+ assert_se(h);
+ log_info("message size = %zu, contents =\n%s", sz, h);
+
+#if HAVE_GLIB
+#if !HAS_FEATURE_ADDRESS_SANITIZER
+ {
+ GDBusMessage *g;
+ char *p;
+
+#if !defined(GLIB_VERSION_2_36)
+ g_type_init();
+#endif
+
+ g = g_dbus_message_new_from_blob(buffer, sz, 0, NULL);
+ p = g_dbus_message_print(g, 0);
+ log_info("%s", p);
+ g_free(p);
+ g_object_unref(g);
+ }
+#endif
+#endif
+
+#if HAVE_DBUS
+ {
+ DBusMessage *w;
+ DBusError error;
+
+ dbus_error_init(&error);
+
+ w = dbus_message_demarshal(buffer, sz, &error);
+ if (!w)
+ log_error("%s", error.message);
+ else
+ dbus_message_unref(w);
+
+ dbus_error_free(&error);
+ }
+#endif
+
+ m = sd_bus_message_unref(m);
+
+ r = bus_message_from_malloc(bus, buffer, sz, NULL, 0, NULL, &m);
+ assert_se(r >= 0);
+
+ bus_message_dump(m, stdout, BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ fclose(ms);
+ ms = open_memstream(&second, &second_size);
+ bus_message_dump(m, ms, 0);
+ fflush(ms);
+ assert_se(!ferror(ms));
+ assert_se(first_size == second_size);
+ assert_se(memcmp(first, second, first_size) == 0);
+
+ assert_se(sd_bus_message_rewind(m, true) >= 0);
+
+ r = sd_bus_message_read(m, "ssasg", &x, &x2, 2, &y, &z, &a_signature);
+ assert_se(r > 0);
+ assert_se(streq(x, "a string"));
+ assert_se(streq(x2, ""));
+ assert_se(streq(y, "string #1"));
+ assert_se(streq(z, "string #2"));
+ assert_se(streq(a_signature, "sba(tt)ss"));
+
+ r = sd_bus_message_read(m, "sass", &x, 5, &y, &z, &a, &b, &c, &d);
+ assert_se(r > 0);
+ assert_se(streq(x, "foobar"));
+ assert_se(streq(y, "foo"));
+ assert_se(streq(z, "bar"));
+ assert_se(streq(a, "waldo"));
+ assert_se(streq(b, "piep"));
+ assert_se(streq(c, "pap"));
+ assert_se(streq(d, "after"));
+
+ r = sd_bus_message_read(m, "a{yv}", 2, &u, "s", &x, &v, "s", &y);
+ assert_se(r > 0);
+ assert_se(u == 3);
+ assert_se(streq(x, "foo"));
+ assert_se(v == 5);
+ assert_se(streq(y, "waldo"));
+
+ r = sd_bus_message_read(m, "y(ty)", &v, &u64, &u);
+ assert_se(r > 0);
+ assert_se(v == 8);
+ assert_se(u64 == 777);
+ assert_se(u == 7);
+
+ r = sd_bus_message_read(m, "y(yt)", &v, &u, &u64);
+ assert_se(r > 0);
+ assert_se(v == 9);
+ assert_se(u == 77);
+ assert_se(u64 == 7777);
+
+ r = sd_bus_message_read(m, "y", &v);
+ assert_se(r > 0);
+ assert_se(v == 10);
+
+ r = sd_bus_message_read(m, "()");
+ assert_se(r < 0);
+
+ r = sd_bus_message_read(m, "ba(ss)", &boolean, 3, &x, &y, &a, &b, &c, &d);
+ assert_se(r > 0);
+ assert_se(boolean);
+ assert_se(streq(x, "aaa"));
+ assert_se(streq(y, "1"));
+ assert_se(streq(a, "bbb"));
+ assert_se(streq(b, "2"));
+ assert_se(streq(c, "ccc"));
+ assert_se(streq(d, "3"));
+
+ assert_se(sd_bus_message_verify_type(m, 'a', "s") > 0);
+
+ r = sd_bus_message_read(m, "as", 2, &x, &y);
+ assert_se(r > 0);
+ assert_se(streq(x, "foobar"));
+ assert_se(streq(y, "waldo"));
+
+ r = sd_bus_message_read_basic(m, 's', &s);
+ assert_se(r > 0);
+ assert_se(streq(s, "hallo"));
+
+ r = sd_bus_message_read_array(m, 'i', (const void**) &return_array, &sz);
+ assert_se(r > 0);
+ assert_se(sz == sizeof(integer_array));
+ assert_se(memcmp(integer_array, return_array, sz) == 0);
+
+ r = sd_bus_message_read_array(m, 'u', (const void**) &return_array, &sz);
+ assert_se(r > 0);
+ assert_se(sz == 0);
+
+ r = sd_bus_message_read(m, "a(stdo)", 1, &x, &u64, &dbl, &y);
+ assert_se(r > 0);
+ assert_se(streq(x, "foo"));
+ assert_se(u64 == 815ULL);
+ assert_se(fabs(dbl - 47.0) < 0.1);
+ assert_se(streq(y, "/"));
+
+ r = sd_bus_message_peek_type(m, NULL, NULL);
+ assert_se(r == 0);
+
+ r = sd_bus_message_new_method_call(bus, &copy, "foobar.waldo", "/", "foobar.waldo", "Piep");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_rewind(m, true);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_copy(copy, m, true);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_seal(copy, 4712, 0);
+ assert_se(r >= 0);
+
+ fclose(ms);
+ ms = open_memstream(&third, &third_size);
+ bus_message_dump(copy, ms, 0);
+ fflush(ms);
+ assert_se(!ferror(ms));
+
+ printf("<%.*s>\n", (int) first_size, first);
+ printf("<%.*s>\n", (int) third_size, third);
+
+ assert_se(first_size == third_size);
+ assert_se(memcmp(first, third, third_size) == 0);
+
+ r = sd_bus_message_rewind(m, true);
+ assert_se(r >= 0);
+
+ assert_se(sd_bus_message_verify_type(m, 's', NULL) > 0);
+
+ r = sd_bus_message_skip(m, "ssasg");
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_verify_type(m, 's', NULL) > 0);
+
+ r = sd_bus_message_skip(m, "sass");
+ assert_se(r >= 0);
+
+ assert_se(sd_bus_message_verify_type(m, 'a', "{yv}") > 0);
+
+ r = sd_bus_message_skip(m, "a{yv}y(ty)y(yt)y");
+ assert_se(r >= 0);
+
+ assert_se(sd_bus_message_verify_type(m, 'b', NULL) > 0);
+
+ r = sd_bus_message_read(m, "b", &boolean);
+ assert_se(r > 0);
+ assert_se(boolean);
+
+ r = sd_bus_message_enter_container(m, 0, NULL);
+ assert_se(r > 0);
+
+ r = sd_bus_message_read(m, "(ss)", &x, &y);
+ assert_se(r > 0);
+
+ r = sd_bus_message_read(m, "(ss)", &a, &b);
+ assert_se(r > 0);
+
+ r = sd_bus_message_read(m, "(ss)", &c, &d);
+ assert_se(r > 0);
+
+ r = sd_bus_message_read(m, "(ss)", &x, &y);
+ assert_se(r == 0);
+
+ r = sd_bus_message_exit_container(m);
+ assert_se(r >= 0);
+
+ assert_se(streq(x, "aaa"));
+ assert_se(streq(y, "1"));
+ assert_se(streq(a, "bbb"));
+ assert_se(streq(b, "2"));
+ assert_se(streq(c, "ccc"));
+ assert_se(streq(d, "3"));
+
+ test_bus_label_escape();
+ test_bus_path_encode();
+ test_bus_path_encode_unique();
+ test_bus_path_encode_many();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-match.c b/src/libsystemd/sd-bus/test-bus-match.c
new file mode 100644
index 0000000..0949d8d
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-match.c
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "bus-match.h"
+#include "bus-message.h"
+#include "bus-slot.h"
+#include "bus-util.h"
+#include "log.h"
+#include "macro.h"
+#include "tests.h"
+
+static bool mask[32];
+
+static int filter(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ log_info("Ran %u", PTR_TO_UINT(userdata));
+ assert_se(PTR_TO_UINT(userdata) < ELEMENTSOF(mask));
+ mask[PTR_TO_UINT(userdata)] = true;
+ return 0;
+}
+
+static bool mask_contains(unsigned a[], unsigned n) {
+ unsigned i, j;
+
+ for (i = 0; i < ELEMENTSOF(mask); i++) {
+ bool found = false;
+
+ for (j = 0; j < n; j++)
+ if (a[j] == i) {
+ found = true;
+ break;
+ }
+
+ if (found != mask[i])
+ return false;
+ }
+
+ return true;
+}
+
+static int match_add(sd_bus_slot *slots, struct bus_match_node *root, const char *match, int value) {
+ struct bus_match_component *components = NULL;
+ unsigned n_components = 0;
+ sd_bus_slot *s;
+ int r;
+
+ s = slots + value;
+ zero(*s);
+
+ r = bus_match_parse(match, &components, &n_components);
+ if (r < 0)
+ return r;
+
+ s->userdata = INT_TO_PTR(value);
+ s->match_callback.callback = filter;
+
+ r = bus_match_add(root, components, n_components, &s->match_callback);
+ bus_match_parse_free(components, n_components);
+
+ return r;
+}
+
+static void test_match_scope(const char *match, enum bus_match_scope scope) {
+ struct bus_match_component *components = NULL;
+ unsigned n_components = 0;
+
+ assert_se(bus_match_parse(match, &components, &n_components) >= 0);
+ assert_se(bus_match_get_scope(components, n_components) == scope);
+ bus_match_parse_free(components, n_components);
+}
+
+int main(int argc, char *argv[]) {
+ struct bus_match_node root = {
+ .type = BUS_MATCH_ROOT,
+ };
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ enum bus_match_node_type i;
+ sd_bus_slot slots[19];
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = sd_bus_open_user(&bus);
+ if (r < 0)
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ assert_se(match_add(slots, &root, "arg2='wal\\'do',sender='foo',type='signal',interface='bar.x',", 1) >= 0);
+ assert_se(match_add(slots, &root, "arg2='wal\\'do2',sender='foo',type='signal',interface='bar.x',", 2) >= 0);
+ assert_se(match_add(slots, &root, "arg3='test',sender='foo',type='signal',interface='bar.x',", 3) >= 0);
+ assert_se(match_add(slots, &root, "arg3='test',sender='foo',type='method_call',interface='bar.x',", 4) >= 0);
+ assert_se(match_add(slots, &root, "", 5) >= 0);
+ assert_se(match_add(slots, &root, "interface='quux.x'", 6) >= 0);
+ assert_se(match_add(slots, &root, "interface='bar.x'", 7) >= 0);
+ assert_se(match_add(slots, &root, "member='waldo',path='/foo/bar'", 8) >= 0);
+ assert_se(match_add(slots, &root, "path='/foo/bar'", 9) >= 0);
+ assert_se(match_add(slots, &root, "path_namespace='/foo'", 10) >= 0);
+ assert_se(match_add(slots, &root, "path_namespace='/foo/quux'", 11) >= 0);
+ assert_se(match_add(slots, &root, "arg1='two'", 12) >= 0);
+ assert_se(match_add(slots, &root, "member='waldo',arg2path='/prefix/'", 13) >= 0);
+ assert_se(match_add(slots, &root, "member=waldo,path='/foo/bar',arg3namespace='prefix'", 14) >= 0);
+ assert_se(match_add(slots, &root, "arg4has='pi'", 15) >= 0);
+ assert_se(match_add(slots, &root, "arg4has='pa'", 16) >= 0);
+ assert_se(match_add(slots, &root, "arg4has='po'", 17) >= 0);
+ assert_se(match_add(slots, &root, "arg4='pi'", 18) >= 0);
+
+ bus_match_dump(&root, 0);
+
+ assert_se(sd_bus_message_new_signal(bus, &m, "/foo/bar", "bar.x", "waldo") >= 0);
+ assert_se(sd_bus_message_append(m, "ssssas", "one", "two", "/prefix/three", "prefix.four", 3, "pi", "pa", "po") >= 0);
+ assert_se(sd_bus_message_seal(m, 1, 0) >= 0);
+
+ zero(mask);
+ assert_se(bus_match_run(NULL, &root, m) == 0);
+ assert_se(mask_contains((unsigned[]) { 9, 8, 7, 5, 10, 12, 13, 14, 15, 16, 17 }, 11));
+
+ assert_se(bus_match_remove(&root, &slots[8].match_callback) >= 0);
+ assert_se(bus_match_remove(&root, &slots[13].match_callback) >= 0);
+
+ bus_match_dump(&root, 0);
+
+ zero(mask);
+ assert_se(bus_match_run(NULL, &root, m) == 0);
+ assert_se(mask_contains((unsigned[]) { 9, 5, 10, 12, 14, 7, 15, 16, 17 }, 9));
+
+ for (i = 0; i < _BUS_MATCH_NODE_TYPE_MAX; i++) {
+ char buf[32];
+ const char *x;
+
+ assert_se(x = bus_match_node_type_to_string(i, buf, sizeof(buf)));
+
+ if (i >= BUS_MATCH_MESSAGE_TYPE)
+ assert_se(bus_match_node_type_from_string(x, strlen(x)) == i);
+ }
+
+ bus_match_free(&root);
+
+ test_match_scope("interface='foobar'", BUS_MATCH_GENERIC);
+ test_match_scope("", BUS_MATCH_GENERIC);
+ test_match_scope("interface='org.freedesktop.DBus.Local'", BUS_MATCH_LOCAL);
+ test_match_scope("sender='org.freedesktop.DBus.Local'", BUS_MATCH_LOCAL);
+ test_match_scope("member='gurke',path='/org/freedesktop/DBus/Local'", BUS_MATCH_LOCAL);
+ test_match_scope("arg2='piep',sender='org.freedesktop.DBus',member='waldo'", BUS_MATCH_DRIVER);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-objects.c b/src/libsystemd/sd-bus/test-bus-objects.c
new file mode 100644
index 0000000..3c5bb88
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-objects.c
@@ -0,0 +1,536 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <pthread.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "log.h"
+#include "macro.h"
+#include "strv.h"
+#include "util.h"
+
+struct context {
+ int fds[2];
+ bool quit;
+ char *something;
+ char *automatic_string_property;
+ uint32_t automatic_integer_property;
+};
+
+static int something_handler(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ struct context *c = userdata;
+ const char *s;
+ char *n = NULL;
+ int r;
+
+ r = sd_bus_message_read(m, "s", &s);
+ assert_se(r > 0);
+
+ n = strjoin("<<<", s, ">>>");
+ assert_se(n);
+
+ free(c->something);
+ c->something = n;
+
+ log_info("AlterSomething() called, got %s, returning %s", s, n);
+
+ /* This should fail, since the return type doesn't match */
+ assert_se(sd_bus_reply_method_return(m, "u", 4711) == -ENOMSG);
+
+ r = sd_bus_reply_method_return(m, "s", n);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int exit_handler(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ struct context *c = userdata;
+ int r;
+
+ c->quit = true;
+
+ log_info("Exit called");
+
+ r = sd_bus_reply_method_return(m, "");
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int get_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ struct context *c = userdata;
+ int r;
+
+ log_info("property get for %s called, returning \"%s\".", property, c->something);
+
+ r = sd_bus_message_append(reply, "s", c->something);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int set_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error) {
+ struct context *c = userdata;
+ const char *s;
+ char *n;
+ int r;
+
+ log_info("property set for %s called", property);
+
+ r = sd_bus_message_read(value, "s", &s);
+ assert_se(r >= 0);
+
+ n = strdup(s);
+ assert_se(n);
+
+ free(c->something);
+ c->something = n;
+
+ return 1;
+}
+
+static int value_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *s = NULL;
+ const char *x;
+ int r;
+
+ assert_se(asprintf(&s, "object %p, path %s", userdata, path) >= 0);
+ r = sd_bus_message_append(reply, "s", s);
+ assert_se(r >= 0);
+
+ assert_se(x = startswith(path, "/value/"));
+
+ assert_se(PTR_TO_UINT(userdata) == 30);
+
+ return 1;
+}
+
+static int notify_test(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_properties_changed(sd_bus_message_get_bus(m), m->path, "org.freedesktop.systemd.ValueTest", "Value", NULL) >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int notify_test2(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_properties_changed_strv(sd_bus_message_get_bus(m), m->path, "org.freedesktop.systemd.ValueTest", NULL) >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int emit_interfaces_added(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_interfaces_added(sd_bus_message_get_bus(m), "/value/a/x", "org.freedesktop.systemd.ValueTest", NULL) >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int emit_interfaces_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_interfaces_removed(sd_bus_message_get_bus(m), "/value/a/x", "org.freedesktop.systemd.ValueTest", NULL) >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int emit_object_added(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_object_added(sd_bus_message_get_bus(m), "/value/a/x") >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int emit_object_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_object_removed(sd_bus_message_get_bus(m), "/value/a/x") >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static const sd_bus_vtable vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("AlterSomething", "s", "s", something_handler, 0),
+ SD_BUS_METHOD("Exit", "", "", exit_handler, 0),
+ SD_BUS_WRITABLE_PROPERTY("Something", "s", get_handler, set_handler, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("AutomaticStringProperty", "s", NULL, NULL, offsetof(struct context, automatic_string_property), 0),
+ SD_BUS_WRITABLE_PROPERTY("AutomaticIntegerProperty", "u", NULL, NULL, offsetof(struct context, automatic_integer_property), 0),
+ SD_BUS_METHOD("NoOperation", NULL, NULL, NULL, 0),
+ SD_BUS_METHOD("EmitInterfacesAdded", NULL, NULL, emit_interfaces_added, 0),
+ SD_BUS_METHOD("EmitInterfacesRemoved", NULL, NULL, emit_interfaces_removed, 0),
+ SD_BUS_METHOD("EmitObjectAdded", NULL, NULL, emit_object_added, 0),
+ SD_BUS_METHOD("EmitObjectRemoved", NULL, NULL, emit_object_removed, 0),
+ SD_BUS_VTABLE_END
+};
+
+static const sd_bus_vtable vtable2[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("NotifyTest", "", "", notify_test, 0),
+ SD_BUS_METHOD("NotifyTest2", "", "", notify_test2, 0),
+ SD_BUS_PROPERTY("Value", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Value2", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("Value3", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Value4", "s", value_handler, 10, 0),
+ SD_BUS_PROPERTY("AnExplicitProperty", "s", NULL, offsetof(struct context, something), SD_BUS_VTABLE_PROPERTY_EXPLICIT|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_VTABLE_END
+};
+
+static int enumerator_callback(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+
+ if (object_path_startswith("/value", path))
+ assert_se(*nodes = strv_new("/value/a", "/value/b", "/value/c"));
+
+ return 1;
+}
+
+static int enumerator2_callback(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+
+ if (object_path_startswith("/value/a", path))
+ assert_se(*nodes = strv_new("/value/a/x", "/value/a/y", "/value/a/z"));
+
+ return 1;
+}
+
+static void *server(void *p) {
+ struct context *c = p;
+ sd_bus *bus = NULL;
+ sd_id128_t id;
+ int r;
+
+ c->quit = false;
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, c->fds[0], c->fds[0]) >= 0);
+ assert_se(sd_bus_set_server(bus, 1, id) >= 0);
+
+ assert_se(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.test", vtable, c) >= 0);
+ assert_se(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.test2", vtable, c) >= 0);
+ assert_se(sd_bus_add_fallback_vtable(bus, NULL, "/value", "org.freedesktop.systemd.ValueTest", vtable2, NULL, UINT_TO_PTR(20)) >= 0);
+ assert_se(sd_bus_add_node_enumerator(bus, NULL, "/value", enumerator_callback, NULL) >= 0);
+ assert_se(sd_bus_add_node_enumerator(bus, NULL, "/value/a", enumerator2_callback, NULL) >= 0);
+ assert_se(sd_bus_add_object_manager(bus, NULL, "/value") >= 0);
+ assert_se(sd_bus_add_object_manager(bus, NULL, "/value/a") >= 0);
+
+ assert_se(sd_bus_start(bus) >= 0);
+
+ log_error("Entering event loop on server");
+
+ while (!c->quit) {
+ log_error("Loop!");
+
+ r = sd_bus_process(bus, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process requests: %m");
+ goto fail;
+ }
+
+ if (r == 0) {
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to wait: %m");
+ goto fail;
+ }
+
+ continue;
+ }
+ }
+
+ r = 0;
+
+fail:
+ if (bus) {
+ sd_bus_flush(bus);
+ sd_bus_unref(bus);
+ }
+
+ return INT_TO_PTR(r);
+}
+
+static int client(struct context *c) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *s;
+ int r;
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, c->fds[1], c->fds[1]) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "NoOperation", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AlterSomething", &error, &reply, "s", "hallo");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ assert_se(streq(s, "<<<hallo>>>"));
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Doesntexist", &error, &reply, "");
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD));
+
+ sd_bus_error_free(&error);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AlterSomething", &error, &reply, "as", 1, "hallo");
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_INVALID_ARGS));
+
+ sd_bus_error_free(&error);
+
+ r = sd_bus_get_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Something", &error, &reply, "s");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ assert_se(streq(s, "<<<hallo>>>"));
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_set_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Something", &error, "s", "test");
+ assert_se(r >= 0);
+
+ r = sd_bus_get_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Something", &error, &reply, "s");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ assert_se(streq(s, "test"));
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_set_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AutomaticIntegerProperty", &error, "u", 815);
+ assert_se(r >= 0);
+
+ assert_se(c->automatic_integer_property == 815);
+
+ r = sd_bus_set_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AutomaticStringProperty", &error, "s", "Du Dödel, Du!");
+ assert_se(r >= 0);
+
+ assert_se(streq(c->automatic_string_property, "Du Dödel, Du!"));
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_get_property(bus, "org.freedesktop.systemd.test", "/value/xuzz", "org.freedesktop.systemd.ValueTest", "Value", &error, &reply, "s");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ log_info("read %s", s);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.Properties", "GetAll", &error, &reply, "s", "");
+ assert_se(r >= 0);
+
+ bus_message_dump(reply, stdout, BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.DBus.Properties", "GetAll", &error, &reply, "s", "org.freedesktop.systemd.ValueTest2");
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_INTERFACE));
+ sd_bus_error_free(&error);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.ObjectManager", "GetManagedObjects", &error, &reply, "");
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD));
+ sd_bus_error_free(&error);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value", "org.freedesktop.DBus.ObjectManager", "GetManagedObjects", &error, &reply, "");
+ assert_se(r >= 0);
+
+ bus_message_dump(reply, stdout, BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.systemd.ValueTest", "NotifyTest", &error, NULL, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.Properties", "PropertiesChanged"));
+ bus_message_dump(reply, stdout, BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.systemd.ValueTest", "NotifyTest2", &error, NULL, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.Properties", "PropertiesChanged"));
+ bus_message_dump(reply, stdout, BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitInterfacesAdded", &error, NULL, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded"));
+ bus_message_dump(reply, stdout, BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitInterfacesRemoved", &error, NULL, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved"));
+ bus_message_dump(reply, stdout, BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitObjectAdded", &error, NULL, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded"));
+ bus_message_dump(reply, stdout, BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitObjectRemoved", &error, NULL, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved"));
+ bus_message_dump(reply, stdout, BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Exit", &error, NULL, "");
+ assert_se(r >= 0);
+
+ sd_bus_flush(bus);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ struct context c = {};
+ pthread_t s;
+ void *p;
+ int r, q;
+
+ c.automatic_integer_property = 4711;
+ assert_se(c.automatic_string_property = strdup("dudeldu"));
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, c.fds) >= 0);
+
+ r = pthread_create(&s, NULL, server, &c);
+ if (r != 0)
+ return -r;
+
+ r = client(&c);
+
+ q = pthread_join(s, &p);
+ if (q != 0)
+ return -q;
+
+ if (r < 0)
+ return r;
+
+ if (PTR_TO_INT(p) < 0)
+ return PTR_TO_INT(p);
+
+ free(c.something);
+ free(c.automatic_string_property);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-server.c b/src/libsystemd/sd-bus/test-bus-server.c
new file mode 100644
index 0000000..0f1b964
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-server.c
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <pthread.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+#include "bus-util.h"
+#include "log.h"
+#include "macro.h"
+#include "util.h"
+
+struct context {
+ int fds[2];
+
+ bool client_negotiate_unix_fds;
+ bool server_negotiate_unix_fds;
+
+ bool client_anonymous_auth;
+ bool server_anonymous_auth;
+};
+
+static void *server(void *p) {
+ struct context *c = p;
+ sd_bus *bus = NULL;
+ sd_id128_t id;
+ bool quit = false;
+ int r;
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, c->fds[0], c->fds[0]) >= 0);
+ assert_se(sd_bus_set_server(bus, 1, id) >= 0);
+ assert_se(sd_bus_set_anonymous(bus, c->server_anonymous_auth) >= 0);
+ assert_se(sd_bus_negotiate_fds(bus, c->server_negotiate_unix_fds) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ while (!quit) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+
+ r = sd_bus_process(bus, &m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process requests: %m");
+ goto fail;
+ }
+
+ if (r == 0) {
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to wait: %m");
+ goto fail;
+ }
+
+ continue;
+ }
+
+ if (!m)
+ continue;
+
+ log_info("Got message! member=%s", strna(sd_bus_message_get_member(m)));
+
+ if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "Exit")) {
+
+ assert_se((sd_bus_can_send(bus, 'h') >= 1) ==
+ (c->server_negotiate_unix_fds && c->client_negotiate_unix_fds));
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate return: %m");
+ goto fail;
+ }
+
+ quit = true;
+
+ } else if (sd_bus_message_is_method_call(m, NULL, NULL)) {
+ r = sd_bus_message_new_method_error(
+ m,
+ &reply,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_UNKNOWN_METHOD, "Unknown method."));
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate return: %m");
+ goto fail;
+ }
+ }
+
+ if (reply) {
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+ }
+ }
+
+ r = 0;
+
+fail:
+ if (bus) {
+ sd_bus_flush(bus);
+ sd_bus_unref(bus);
+ }
+
+ return INT_TO_PTR(r);
+}
+
+static int client(struct context *c) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, c->fds[1], c->fds[1]) >= 0);
+ assert_se(sd_bus_negotiate_fds(bus, c->client_negotiate_unix_fds) >= 0);
+ assert_se(sd_bus_set_anonymous(bus, c->client_anonymous_auth) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "Exit");
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate method call: %m");
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, -r));
+
+ return 0;
+}
+
+static int test_one(bool client_negotiate_unix_fds, bool server_negotiate_unix_fds,
+ bool client_anonymous_auth, bool server_anonymous_auth) {
+
+ struct context c;
+ pthread_t s;
+ void *p;
+ int r, q;
+
+ zero(c);
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, c.fds) >= 0);
+
+ c.client_negotiate_unix_fds = client_negotiate_unix_fds;
+ c.server_negotiate_unix_fds = server_negotiate_unix_fds;
+ c.client_anonymous_auth = client_anonymous_auth;
+ c.server_anonymous_auth = server_anonymous_auth;
+
+ r = pthread_create(&s, NULL, server, &c);
+ if (r != 0)
+ return -r;
+
+ r = client(&c);
+
+ q = pthread_join(s, &p);
+ if (q != 0)
+ return -q;
+
+ if (r < 0)
+ return r;
+
+ if (PTR_TO_INT(p) < 0)
+ return PTR_TO_INT(p);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ int r;
+
+ r = test_one(true, true, false, false);
+ assert_se(r >= 0);
+
+ r = test_one(true, false, false, false);
+ assert_se(r >= 0);
+
+ r = test_one(false, true, false, false);
+ assert_se(r >= 0);
+
+ r = test_one(false, false, false, false);
+ assert_se(r >= 0);
+
+ r = test_one(true, true, true, true);
+ assert_se(r >= 0);
+
+ r = test_one(true, true, false, true);
+ assert_se(r >= 0);
+
+ r = test_one(true, true, true, false);
+ assert_se(r == -EPERM);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-signature.c b/src/libsystemd/sd-bus/test-bus-signature.c
new file mode 100644
index 0000000..84648db
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-signature.c
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "bus-internal.h"
+#include "bus-signature.h"
+#include "log.h"
+#include "string-util.h"
+
+int main(int argc, char *argv[]) {
+ char prefix[256];
+ int r;
+
+ assert_se(signature_is_single("y", false));
+ assert_se(signature_is_single("u", false));
+ assert_se(signature_is_single("v", false));
+ assert_se(signature_is_single("as", false));
+ assert_se(signature_is_single("(ss)", false));
+ assert_se(!signature_is_single("()", false));
+ assert_se(!signature_is_single("(()()()()())", false));
+ assert_se(!signature_is_single("(((())))", false));
+ assert_se(signature_is_single("((((s))))", false));
+ assert_se(signature_is_single("{ss}", true));
+ assert_se(signature_is_single("a{ss}", false));
+ assert_se(!signature_is_single("uu", false));
+ assert_se(!signature_is_single("", false));
+ assert_se(!signature_is_single("(", false));
+ assert_se(!signature_is_single(")", false));
+ assert_se(!signature_is_single("())", false));
+ assert_se(!signature_is_single("((())", false));
+ assert_se(!signature_is_single("{)", false));
+ assert_se(!signature_is_single("{}", true));
+ assert_se(!signature_is_single("{sss}", true));
+ assert_se(!signature_is_single("{s}", true));
+ assert_se(!signature_is_single("{ss}", false));
+ assert_se(!signature_is_single("{ass}", true));
+ assert_se(!signature_is_single("a}", true));
+
+ assert_se(signature_is_pair("yy"));
+ assert_se(signature_is_pair("ss"));
+ assert_se(signature_is_pair("sas"));
+ assert_se(signature_is_pair("sv"));
+ assert_se(signature_is_pair("sa(vs)"));
+ assert_se(!signature_is_pair(""));
+ assert_se(!signature_is_pair("va"));
+ assert_se(!signature_is_pair("sss"));
+ assert_se(!signature_is_pair("{s}ss"));
+
+ assert_se(signature_is_valid("ssa{ss}sssub", true));
+ assert_se(signature_is_valid("ssa{ss}sssub", false));
+ assert_se(signature_is_valid("{ss}", true));
+ assert_se(!signature_is_valid("{ss}", false));
+ assert_se(signature_is_valid("", true));
+ assert_se(signature_is_valid("", false));
+
+ assert_se(signature_is_valid("sssusa(uuubbba(uu)uuuu)a{u(uuuvas)}", false));
+
+ assert_se(!signature_is_valid("a", false));
+ assert_se(signature_is_valid("as", false));
+ assert_se(signature_is_valid("aas", false));
+ assert_se(signature_is_valid("aaas", false));
+ assert_se(signature_is_valid("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaad", false));
+ assert_se(signature_is_valid("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaas", false));
+ assert_se(!signature_is_valid("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaau", false));
+
+ assert_se(signature_is_valid("((((((((((((((((((((((((((((((((s))))))))))))))))))))))))))))))))", false));
+ assert_se(!signature_is_valid("((((((((((((((((((((((((((((((((()))))))))))))))))))))))))))))))))", false));
+
+ assert_se(namespace_complex_pattern("", ""));
+ assert_se(namespace_complex_pattern("foobar", "foobar"));
+ assert_se(namespace_complex_pattern("foobar.waldo", "foobar.waldo"));
+ assert_se(namespace_complex_pattern("foobar.", "foobar.waldo"));
+ assert_se(namespace_complex_pattern("foobar.waldo", "foobar."));
+ assert_se(!namespace_complex_pattern("foobar.waldo", "foobar"));
+ assert_se(!namespace_complex_pattern("foobar", "foobar.waldo"));
+ assert_se(!namespace_complex_pattern("", "foo"));
+ assert_se(!namespace_complex_pattern("foo", ""));
+ assert_se(!namespace_complex_pattern("foo.", ""));
+
+ assert_se(path_complex_pattern("", ""));
+ assert_se(!path_complex_pattern("", "/"));
+ assert_se(!path_complex_pattern("/", ""));
+ assert_se(path_complex_pattern("/", "/"));
+ assert_se(path_complex_pattern("/foobar/", "/"));
+ assert_se(!path_complex_pattern("/foobar/", "/foobar"));
+ assert_se(path_complex_pattern("/foobar", "/foobar"));
+ assert_se(!path_complex_pattern("/foobar", "/foobar/"));
+ assert_se(!path_complex_pattern("/foobar", "/foobar/waldo"));
+ assert_se(path_complex_pattern("/foobar/", "/foobar/waldo"));
+ assert_se(path_complex_pattern("/foobar/waldo", "/foobar/"));
+
+ assert_se(path_simple_pattern("/foo/", "/foo/bar/waldo"));
+
+ assert_se(namespace_simple_pattern("", ""));
+ assert_se(namespace_simple_pattern("", ".foobar"));
+ assert_se(namespace_simple_pattern("foobar", "foobar"));
+ assert_se(namespace_simple_pattern("foobar.waldo", "foobar.waldo"));
+ assert_se(namespace_simple_pattern("foobar", "foobar.waldo"));
+ assert_se(!namespace_simple_pattern("foobar.waldo", "foobar"));
+ assert_se(!namespace_simple_pattern("", "foo"));
+ assert_se(!namespace_simple_pattern("foo", ""));
+ assert_se(namespace_simple_pattern("foo.", "foo.bar.waldo"));
+
+ assert_se(streq(object_path_startswith("/foo/bar", "/foo"), "bar"));
+ assert_se(streq(object_path_startswith("/foo", "/foo"), ""));
+ assert_se(streq(object_path_startswith("/foo", "/"), "foo"));
+ assert_se(streq(object_path_startswith("/", "/"), ""));
+ assert_se(!object_path_startswith("/foo", "/bar"));
+ assert_se(!object_path_startswith("/", "/bar"));
+ assert_se(!object_path_startswith("/foo", ""));
+
+ assert_se(object_path_is_valid("/foo/bar"));
+ assert_se(object_path_is_valid("/foo"));
+ assert_se(object_path_is_valid("/"));
+ assert_se(object_path_is_valid("/foo5"));
+ assert_se(object_path_is_valid("/foo_5"));
+ assert_se(!object_path_is_valid(""));
+ assert_se(!object_path_is_valid("/foo/"));
+ assert_se(!object_path_is_valid("//"));
+ assert_se(!object_path_is_valid("//foo"));
+ assert_se(!object_path_is_valid("/foo//bar"));
+ assert_se(!object_path_is_valid("/foo/aaaäöä"));
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, "/") {
+ log_info("<%s>", prefix);
+ assert_not_reached("???");
+ }
+
+ r = 0;
+ OBJECT_PATH_FOREACH_PREFIX(prefix, "/xxx") {
+ log_info("<%s>", prefix);
+ assert_se(streq(prefix, "/"));
+ assert_se(r == 0);
+ r++;
+ }
+ assert_se(r == 1);
+
+ r = 0;
+ OBJECT_PATH_FOREACH_PREFIX(prefix, "/xxx/yyy/zzz") {
+ log_info("<%s>", prefix);
+ assert_se(r != 0 || streq(prefix, "/xxx/yyy"));
+ assert_se(r != 1 || streq(prefix, "/xxx"));
+ assert_se(r != 2 || streq(prefix, "/"));
+ r++;
+ }
+ assert_se(r == 3);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-track.c b/src/libsystemd/sd-bus/test-bus-track.c
new file mode 100644
index 0000000..68a0010
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-track.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/socket.h>
+
+#include "sd-bus.h"
+
+#include "macro.h"
+#include "tests.h"
+
+static bool track_cb_called_x = false;
+static bool track_cb_called_y = false;
+
+static int track_cb_x(sd_bus_track *t, void *userdata) {
+
+ log_error("TRACK CB X");
+
+ assert_se(!track_cb_called_x);
+ track_cb_called_x = true;
+
+ /* This means b's name disappeared. Let's now disconnect, to make sure the track handling on disconnect works
+ * as it should. */
+
+ assert_se(shutdown(sd_bus_get_fd(sd_bus_track_get_bus(t)), SHUT_RDWR) >= 0);
+ return 1;
+}
+
+static int track_cb_y(sd_bus_track *t, void *userdata) {
+ int r;
+
+ log_error("TRACK CB Y");
+
+ assert_se(!track_cb_called_y);
+ track_cb_called_y = true;
+
+ /* We got disconnected, let's close everything */
+
+ r = sd_event_exit(sd_bus_get_event(sd_bus_track_get_bus(t)), EXIT_SUCCESS);
+ assert_se(r >= 0);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_bus_track_unrefp) sd_bus_track *x = NULL, *y = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *a = NULL, *b = NULL;
+ bool use_system_bus = false;
+ const char *unique;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = sd_event_default(&event);
+ assert_se(r >= 0);
+
+ r = sd_bus_open_user(&a);
+ if (IN_SET(r, -ECONNREFUSED, -ENOENT)) {
+ r = sd_bus_open_system(&a);
+ if (IN_SET(r, -ECONNREFUSED, -ENOENT))
+ return log_tests_skipped("Failed to connect to bus");
+ use_system_bus = true;
+ }
+ assert_se(r >= 0);
+
+ r = sd_bus_attach_event(a, event, SD_EVENT_PRIORITY_NORMAL);
+ assert_se(r >= 0);
+
+ if (use_system_bus)
+ r = sd_bus_open_system(&b);
+ else
+ r = sd_bus_open_user(&b);
+ assert_se(r >= 0);
+
+ r = sd_bus_attach_event(b, event, SD_EVENT_PRIORITY_NORMAL);
+ assert_se(r >= 0);
+
+ /* Watch b's name from a */
+ r = sd_bus_track_new(a, &x, track_cb_x, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_get_unique_name(b, &unique);
+ assert_se(r >= 0);
+
+ r = sd_bus_track_add_name(x, unique);
+ assert_se(r >= 0);
+
+ /* Watch's a's own name from a */
+ r = sd_bus_track_new(a, &y, track_cb_y, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_get_unique_name(a, &unique);
+ assert_se(r >= 0);
+
+ r = sd_bus_track_add_name(y, unique);
+ assert_se(r >= 0);
+
+ /* Now make b's name disappear */
+ sd_bus_close(b);
+
+ r = sd_event_loop(event);
+ assert_se(r >= 0);
+
+ assert_se(track_cb_called_x);
+ assert_se(track_cb_called_y);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-vtable-cc.cc b/src/libsystemd/sd-bus/test-bus-vtable-cc.cc
new file mode 120000
index 0000000..abee398
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-vtable-cc.cc
@@ -0,0 +1 @@
+test-bus-vtable.c \ No newline at end of file
diff --git a/src/libsystemd/sd-bus/test-bus-vtable.c b/src/libsystemd/sd-bus/test-bus-vtable.c
new file mode 100644
index 0000000..fd9ad81
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-vtable.c
@@ -0,0 +1,81 @@
+#include <stdbool.h>
+#include <stddef.h>
+
+/* We use system assert.h here, because we don't want to keep macro.h and log.h C++ compatible */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+
+#include "sd-bus-vtable.h"
+
+#define DEFAULT_BUS_PATH "unix:path=/run/dbus/system_bus_socket"
+
+struct context {
+ bool quit;
+ char *something;
+ char *automatic_string_property;
+ uint32_t automatic_integer_property;
+};
+
+static int handler(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return 1;
+}
+
+static int value_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ return 1;
+}
+
+static int get_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ return 1;
+}
+
+static int set_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error) {
+ return 1;
+}
+
+static const sd_bus_vtable vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("AlterSomething", "s", "s", handler, 0),
+ SD_BUS_METHOD("Exit", "", "", handler, 0),
+ SD_BUS_METHOD_WITH_OFFSET("AlterSomething2", "s", "s", handler, 200, 0),
+ SD_BUS_METHOD_WITH_OFFSET("Exit2", "", "", handler, 200, 0),
+ SD_BUS_PROPERTY("Value", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Value2", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("Value3", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Value4", "s", value_handler, 10, 0),
+ SD_BUS_PROPERTY("AnExplicitProperty", "s", NULL, offsetof(struct context, something),
+ SD_BUS_VTABLE_PROPERTY_EXPLICIT|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_WRITABLE_PROPERTY("Something", "s", get_handler, set_handler, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("AutomaticStringProperty", "s", NULL, NULL,
+ offsetof(struct context, automatic_string_property), 0),
+ SD_BUS_WRITABLE_PROPERTY("AutomaticIntegerProperty", "u", NULL, NULL,
+ offsetof(struct context, automatic_integer_property), 0),
+ SD_BUS_METHOD("NoOperation", NULL, NULL, NULL, 0),
+ SD_BUS_SIGNAL("DummySignal", "b", 0),
+ SD_BUS_SIGNAL("DummySignal2", "so", 0),
+ SD_BUS_VTABLE_END
+};
+
+static void test_vtable(void) {
+ sd_bus *bus = NULL;
+ struct context c = {};
+ int r;
+
+ assert(sd_bus_new(&bus) >= 0);
+
+ assert(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.testVtable", vtable, &c) >= 0);
+ assert(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.testVtable2", vtable, &c) >= 0);
+
+ assert(sd_bus_set_address(bus, DEFAULT_BUS_PATH) >= 0);
+ r = sd_bus_start(bus);
+ assert(r == 0 || /* success */
+ r == -ENOENT /* dbus is inactive */ );
+
+ sd_bus_unref(bus);
+}
+
+int main(int argc, char **argv) {
+ test_vtable();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-watch-bind.c b/src/libsystemd/sd-bus/test-bus-watch-bind.c
new file mode 100644
index 0000000..4b3da30
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-watch-bind.c
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <pthread.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "random-util.h"
+#include "rm-rf.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+
+static int method_foobar(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ log_info("Got Foobar() call.");
+
+ assert_se(sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 0) >= 0);
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_exit(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ log_info("Got Exit() call");
+ assert_se(sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 1) >= 0);
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static const sd_bus_vtable vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("Foobar", NULL, NULL, method_foobar, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Exit", NULL, NULL, method_exit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END,
+};
+
+static void* thread_server(void *p) {
+ _cleanup_free_ char *suffixed = NULL, *suffixed2 = NULL, *d = NULL;
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union u = {};
+ const char *path = p;
+ int salen;
+
+ log_debug("Initializing server");
+
+ /* Let's play some games, by slowly creating the socket directory, and renaming it in the middle */
+ (void) usleep(100 * USEC_PER_MSEC);
+
+ assert_se(mkdir_parents(path, 0755) >= 0);
+ (void) usleep(100 * USEC_PER_MSEC);
+
+ d = dirname_malloc(path);
+ assert_se(d);
+ assert_se(asprintf(&suffixed, "%s.%" PRIx64, d, random_u64()) >= 0);
+ assert_se(rename(d, suffixed) >= 0);
+ (void) usleep(100 * USEC_PER_MSEC);
+
+ assert_se(asprintf(&suffixed2, "%s.%" PRIx64, d, random_u64()) >= 0);
+ assert_se(symlink(suffixed2, d) >= 0);
+ (void) usleep(100 * USEC_PER_MSEC);
+
+ assert_se(symlink(basename(suffixed), suffixed2) >= 0);
+ (void) usleep(100 * USEC_PER_MSEC);
+
+ salen = sockaddr_un_set_path(&u.un, path);
+ assert_se(salen >= 0);
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ assert_se(fd >= 0);
+
+ assert_se(bind(fd, &u.sa, salen) >= 0);
+ usleep(100 * USEC_PER_MSEC);
+
+ assert_se(listen(fd, SOMAXCONN) >= 0);
+ usleep(100 * USEC_PER_MSEC);
+
+ assert_se(touch(path) >= 0);
+ usleep(100 * USEC_PER_MSEC);
+
+ log_debug("Initialized server");
+
+ for (;;) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ sd_id128_t id;
+ int bus_fd, code;
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+
+ assert_se(sd_event_new(&event) >= 0);
+
+ bus_fd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ assert_se(bus_fd >= 0);
+
+ log_debug("Accepted server connection");
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_description(bus, "server") >= 0);
+ assert_se(sd_bus_set_fd(bus, bus_fd, bus_fd) >= 0);
+ assert_se(sd_bus_set_server(bus, true, id) >= 0);
+ /* assert_se(sd_bus_set_anonymous(bus, true) >= 0); */
+
+ assert_se(sd_bus_attach_event(bus, event, 0) >= 0);
+
+ assert_se(sd_bus_add_object_vtable(bus, NULL, "/foo", "foo.TestInterface", vtable, NULL) >= 0);
+
+ assert_se(sd_bus_start(bus) >= 0);
+
+ assert_se(sd_event_loop(event) >= 0);
+
+ assert_se(sd_event_get_exit_code(event, &code) >= 0);
+
+ if (code > 0)
+ break;
+ }
+
+ log_debug("Server done");
+
+ return NULL;
+}
+
+static void* thread_client1(void *p) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ const char *path = p, *t;
+ int r;
+
+ log_debug("Initializing client1");
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_description(bus, "client1") >= 0);
+
+ t = strjoina("unix:path=", path);
+ assert_se(sd_bus_set_address(bus, t) >= 0);
+ assert_se(sd_bus_set_watch_bind(bus, true) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ r = sd_bus_call_method(bus, "foo.bar", "/foo", "foo.TestInterface", "Foobar", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ log_debug("Client1 done");
+
+ return NULL;
+}
+
+static int client2_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ assert_se(sd_bus_message_is_method_error(m, NULL) == 0);
+ assert_se(sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 0) >= 0);
+ return 0;
+}
+
+static void* thread_client2(void *p) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ const char *path = p, *t;
+
+ log_debug("Initializing client2");
+
+ assert_se(sd_event_new(&event) >= 0);
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_description(bus, "client2") >= 0);
+
+ t = strjoina("unix:path=", path);
+ assert_se(sd_bus_set_address(bus, t) >= 0);
+ assert_se(sd_bus_set_watch_bind(bus, true) >= 0);
+ assert_se(sd_bus_attach_event(bus, event, 0) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ assert_se(sd_bus_call_method_async(bus, NULL, "foo.bar", "/foo", "foo.TestInterface", "Foobar", client2_callback, NULL, NULL) >= 0);
+
+ assert_se(sd_event_loop(event) >= 0);
+
+ log_debug("Client2 done");
+
+ return NULL;
+}
+
+static void request_exit(const char *path) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ const char *t;
+
+ assert_se(sd_bus_new(&bus) >= 0);
+
+ t = strjoina("unix:path=", path);
+ assert_se(sd_bus_set_address(bus, t) >= 0);
+ assert_se(sd_bus_set_watch_bind(bus, true) >= 0);
+ assert_se(sd_bus_set_description(bus, "request-exit") >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ assert_se(sd_bus_call_method(bus, "foo.bar", "/foo", "foo.TestInterface", "Exit", NULL, NULL, NULL) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *d = NULL;
+ pthread_t server, client1, client2;
+ char *path;
+
+ log_set_max_level(LOG_DEBUG);
+
+ /* We use /dev/shm here rather than /tmp, since some weird distros might set up /tmp as some weird fs that
+ * doesn't support inotify properly. */
+ assert_se(mkdtemp_malloc("/dev/shm/systemd-watch-bind-XXXXXX", &d) >= 0);
+
+ path = strjoina(d, "/this/is/a/socket");
+
+ assert_se(pthread_create(&server, NULL, thread_server, path) == 0);
+ assert_se(pthread_create(&client1, NULL, thread_client1, path) == 0);
+ assert_se(pthread_create(&client2, NULL, thread_client2, path) == 0);
+
+ assert_se(pthread_join(client1, NULL) == 0);
+ assert_se(pthread_join(client2, NULL) == 0);
+
+ request_exit(path);
+
+ assert_se(pthread_join(server, NULL) == 0);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-daemon/sd-daemon.c b/src/libsystemd/sd-daemon/sd-daemon.c
new file mode 100644
index 0000000..9e8f0a7
--- /dev/null
+++ b/src/libsystemd/sd-daemon/sd-daemon.c
@@ -0,0 +1,660 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <mqueue.h>
+#include <netinet/in.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "util.h"
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+static void unsetenv_all(bool unset_environment) {
+
+ if (!unset_environment)
+ return;
+
+ unsetenv("LISTEN_PID");
+ unsetenv("LISTEN_FDS");
+ unsetenv("LISTEN_FDNAMES");
+}
+
+_public_ int sd_listen_fds(int unset_environment) {
+ const char *e;
+ int n, r, fd;
+ pid_t pid;
+
+ e = getenv("LISTEN_PID");
+ if (!e) {
+ r = 0;
+ goto finish;
+ }
+
+ r = parse_pid(e, &pid);
+ if (r < 0)
+ goto finish;
+
+ /* Is this for us? */
+ if (getpid_cached() != pid) {
+ r = 0;
+ goto finish;
+ }
+
+ e = getenv("LISTEN_FDS");
+ if (!e) {
+ r = 0;
+ goto finish;
+ }
+
+ r = safe_atoi(e, &n);
+ if (r < 0)
+ goto finish;
+
+ assert_cc(SD_LISTEN_FDS_START < INT_MAX);
+ if (n <= 0 || n > INT_MAX - SD_LISTEN_FDS_START) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd ++) {
+ r = fd_cloexec(fd, true);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = n;
+
+finish:
+ unsetenv_all(unset_environment);
+ return r;
+}
+
+_public_ int sd_listen_fds_with_names(int unset_environment, char ***names) {
+ _cleanup_strv_free_ char **l = NULL;
+ bool have_names;
+ int n_names = 0, n_fds;
+ const char *e;
+ int r;
+
+ if (!names)
+ return sd_listen_fds(unset_environment);
+
+ e = getenv("LISTEN_FDNAMES");
+ if (e) {
+ n_names = strv_split_extract(&l, e, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (n_names < 0) {
+ unsetenv_all(unset_environment);
+ return n_names;
+ }
+
+ have_names = true;
+ } else
+ have_names = false;
+
+ n_fds = sd_listen_fds(unset_environment);
+ if (n_fds <= 0)
+ return n_fds;
+
+ if (have_names) {
+ if (n_names != n_fds)
+ return -EINVAL;
+ } else {
+ r = strv_extend_n(&l, "unknown", n_fds);
+ if (r < 0)
+ return r;
+ }
+
+ *names = TAKE_PTR(l);
+
+ return n_fds;
+}
+
+_public_ int sd_is_fifo(int fd, const char *path) {
+ struct stat st_fd;
+
+ assert_return(fd >= 0, -EBADF);
+
+ if (fstat(fd, &st_fd) < 0)
+ return -errno;
+
+ if (!S_ISFIFO(st_fd.st_mode))
+ return 0;
+
+ if (path) {
+ struct stat st_path;
+
+ if (stat(path, &st_path) < 0) {
+
+ if (IN_SET(errno, ENOENT, ENOTDIR))
+ return 0;
+
+ return -errno;
+ }
+
+ return
+ st_path.st_dev == st_fd.st_dev &&
+ st_path.st_ino == st_fd.st_ino;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_special(int fd, const char *path) {
+ struct stat st_fd;
+
+ assert_return(fd >= 0, -EBADF);
+
+ if (fstat(fd, &st_fd) < 0)
+ return -errno;
+
+ if (!S_ISREG(st_fd.st_mode) && !S_ISCHR(st_fd.st_mode))
+ return 0;
+
+ if (path) {
+ struct stat st_path;
+
+ if (stat(path, &st_path) < 0) {
+
+ if (IN_SET(errno, ENOENT, ENOTDIR))
+ return 0;
+
+ return -errno;
+ }
+
+ if (S_ISREG(st_fd.st_mode) && S_ISREG(st_path.st_mode))
+ return
+ st_path.st_dev == st_fd.st_dev &&
+ st_path.st_ino == st_fd.st_ino;
+ else if (S_ISCHR(st_fd.st_mode) && S_ISCHR(st_path.st_mode))
+ return st_path.st_rdev == st_fd.st_rdev;
+ else
+ return 0;
+ }
+
+ return 1;
+}
+
+static int sd_is_socket_internal(int fd, int type, int listening) {
+ struct stat st_fd;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(type >= 0, -EINVAL);
+
+ if (fstat(fd, &st_fd) < 0)
+ return -errno;
+
+ if (!S_ISSOCK(st_fd.st_mode))
+ return 0;
+
+ if (type != 0) {
+ int other_type = 0;
+ socklen_t l = sizeof(other_type);
+
+ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &other_type, &l) < 0)
+ return -errno;
+
+ if (l != sizeof(other_type))
+ return -EINVAL;
+
+ if (other_type != type)
+ return 0;
+ }
+
+ if (listening >= 0) {
+ int accepting = 0;
+ socklen_t l = sizeof(accepting);
+
+ if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &accepting, &l) < 0)
+ return -errno;
+
+ if (l != sizeof(accepting))
+ return -EINVAL;
+
+ if (!accepting != !listening)
+ return 0;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_socket(int fd, int family, int type, int listening) {
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(family >= 0, -EINVAL);
+
+ r = sd_is_socket_internal(fd, type, listening);
+ if (r <= 0)
+ return r;
+
+ if (family > 0) {
+ union sockaddr_union sockaddr = {};
+ socklen_t l = sizeof(sockaddr);
+
+ if (getsockname(fd, &sockaddr.sa, &l) < 0)
+ return -errno;
+
+ if (l < sizeof(sa_family_t))
+ return -EINVAL;
+
+ return sockaddr.sa.sa_family == family;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_socket_inet(int fd, int family, int type, int listening, uint16_t port) {
+ union sockaddr_union sockaddr = {};
+ socklen_t l = sizeof(sockaddr);
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(IN_SET(family, 0, AF_INET, AF_INET6), -EINVAL);
+
+ r = sd_is_socket_internal(fd, type, listening);
+ if (r <= 0)
+ return r;
+
+ if (getsockname(fd, &sockaddr.sa, &l) < 0)
+ return -errno;
+
+ if (l < sizeof(sa_family_t))
+ return -EINVAL;
+
+ if (!IN_SET(sockaddr.sa.sa_family, AF_INET, AF_INET6))
+ return 0;
+
+ if (family != 0)
+ if (sockaddr.sa.sa_family != family)
+ return 0;
+
+ if (port > 0) {
+ unsigned sa_port;
+
+ r = sockaddr_port(&sockaddr.sa, &sa_port);
+ if (r < 0)
+ return r;
+
+ return port == sa_port;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_socket_sockaddr(int fd, int type, const struct sockaddr* addr, unsigned addr_len, int listening) {
+ union sockaddr_union sockaddr = {};
+ socklen_t l = sizeof(sockaddr);
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(addr, -EINVAL);
+ assert_return(addr_len >= sizeof(sa_family_t), -ENOBUFS);
+ assert_return(IN_SET(addr->sa_family, AF_INET, AF_INET6), -EPFNOSUPPORT);
+
+ r = sd_is_socket_internal(fd, type, listening);
+ if (r <= 0)
+ return r;
+
+ if (getsockname(fd, &sockaddr.sa, &l) < 0)
+ return -errno;
+
+ if (l < sizeof(sa_family_t))
+ return -EINVAL;
+
+ if (sockaddr.sa.sa_family != addr->sa_family)
+ return 0;
+
+ if (sockaddr.sa.sa_family == AF_INET) {
+ const struct sockaddr_in *in = (const struct sockaddr_in *) addr;
+
+ if (l < sizeof(struct sockaddr_in) || addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ if (in->sin_port != 0 &&
+ sockaddr.in.sin_port != in->sin_port)
+ return false;
+
+ return sockaddr.in.sin_addr.s_addr == in->sin_addr.s_addr;
+
+ } else {
+ const struct sockaddr_in6 *in = (const struct sockaddr_in6 *) addr;
+
+ if (l < sizeof(struct sockaddr_in6) || addr_len < sizeof(struct sockaddr_in6))
+ return -EINVAL;
+
+ if (in->sin6_port != 0 &&
+ sockaddr.in6.sin6_port != in->sin6_port)
+ return false;
+
+ if (in->sin6_flowinfo != 0 &&
+ sockaddr.in6.sin6_flowinfo != in->sin6_flowinfo)
+ return false;
+
+ if (in->sin6_scope_id != 0 &&
+ sockaddr.in6.sin6_scope_id != in->sin6_scope_id)
+ return false;
+
+ return memcmp(sockaddr.in6.sin6_addr.s6_addr, in->sin6_addr.s6_addr,
+ sizeof(in->sin6_addr.s6_addr)) == 0;
+ }
+}
+
+_public_ int sd_is_socket_unix(int fd, int type, int listening, const char *path, size_t length) {
+ union sockaddr_union sockaddr = {};
+ socklen_t l = sizeof(sockaddr);
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+
+ r = sd_is_socket_internal(fd, type, listening);
+ if (r <= 0)
+ return r;
+
+ if (getsockname(fd, &sockaddr.sa, &l) < 0)
+ return -errno;
+
+ if (l < sizeof(sa_family_t))
+ return -EINVAL;
+
+ if (sockaddr.sa.sa_family != AF_UNIX)
+ return 0;
+
+ if (path) {
+ if (length == 0)
+ length = strlen(path);
+
+ if (length == 0)
+ /* Unnamed socket */
+ return l == offsetof(struct sockaddr_un, sun_path);
+
+ if (path[0])
+ /* Normal path socket */
+ return
+ (l >= offsetof(struct sockaddr_un, sun_path) + length + 1) &&
+ memcmp(path, sockaddr.un.sun_path, length+1) == 0;
+ else
+ /* Abstract namespace socket */
+ return
+ (l == offsetof(struct sockaddr_un, sun_path) + length) &&
+ memcmp(path, sockaddr.un.sun_path, length) == 0;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_mq(int fd, const char *path) {
+ struct mq_attr attr;
+
+ /* Check that the fd is valid */
+ assert_return(fcntl(fd, F_GETFD) >= 0, -errno);
+
+ if (mq_getattr(fd, &attr) < 0) {
+ if (errno == EBADF)
+ /* A non-mq fd (or an invalid one, but we ruled that out above) */
+ return 0;
+ return -errno;
+ }
+
+ if (path) {
+ char fpath[PATH_MAX];
+ struct stat a, b;
+
+ assert_return(path_is_absolute(path), -EINVAL);
+
+ if (fstat(fd, &a) < 0)
+ return -errno;
+
+ strncpy(stpcpy(fpath, "/dev/mqueue"), path, sizeof(fpath) - 12);
+ fpath[sizeof(fpath)-1] = 0;
+
+ if (stat(fpath, &b) < 0)
+ return -errno;
+
+ if (a.st_dev != b.st_dev ||
+ a.st_ino != b.st_ino)
+ return 0;
+ }
+
+ return 1;
+}
+
+_public_ int sd_pid_notify_with_fds(
+ pid_t pid,
+ int unset_environment,
+ const char *state,
+ const int *fds,
+ unsigned n_fds) {
+
+ union sockaddr_union sockaddr = {};
+ struct iovec iovec;
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_name = &sockaddr,
+ };
+ _cleanup_close_ int fd = -1;
+ struct cmsghdr *cmsg = NULL;
+ const char *e;
+ bool send_ucred;
+ int r, salen;
+
+ if (!state) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (n_fds > 0 && !fds) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ e = getenv("NOTIFY_SOCKET");
+ if (!e)
+ return 0;
+
+ salen = sockaddr_un_set_path(&sockaddr.un, e);
+ if (salen < 0) {
+ r = salen;
+ goto finish;
+ }
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+ if (fd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ iovec = IOVEC_MAKE_STRING(state);
+ msghdr.msg_namelen = salen;
+
+ send_ucred =
+ (pid != 0 && pid != getpid_cached()) ||
+ getuid() != geteuid() ||
+ getgid() != getegid();
+
+ if (n_fds > 0 || send_ucred) {
+ /* CMSG_SPACE(0) may return value different than zero, which results in miscalculated controllen. */
+ msghdr.msg_controllen =
+ (n_fds > 0 ? CMSG_SPACE(sizeof(int) * n_fds) : 0) +
+ (send_ucred ? CMSG_SPACE(sizeof(struct ucred)) : 0);
+
+ msghdr.msg_control = alloca0(msghdr.msg_controllen);
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ if (n_fds > 0) {
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int) * n_fds);
+
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(int) * n_fds);
+
+ if (send_ucred)
+ assert_se(cmsg = CMSG_NXTHDR(&msghdr, cmsg));
+ }
+
+ if (send_ucred) {
+ struct ucred *ucred;
+
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+ ucred->pid = pid != 0 ? pid : getpid_cached();
+ ucred->uid = getuid();
+ ucred->gid = getgid();
+ }
+ }
+
+ /* First try with fake ucred data, as requested */
+ if (sendmsg(fd, &msghdr, MSG_NOSIGNAL) >= 0) {
+ r = 1;
+ goto finish;
+ }
+
+ /* If that failed, try with our own ucred instead */
+ if (send_ucred) {
+ msghdr.msg_controllen -= CMSG_SPACE(sizeof(struct ucred));
+ if (msghdr.msg_controllen == 0)
+ msghdr.msg_control = NULL;
+
+ if (sendmsg(fd, &msghdr, MSG_NOSIGNAL) >= 0) {
+ r = 1;
+ goto finish;
+ }
+ }
+
+ r = -errno;
+
+finish:
+ if (unset_environment)
+ unsetenv("NOTIFY_SOCKET");
+
+ return r;
+}
+
+_public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state) {
+ return sd_pid_notify_with_fds(pid, unset_environment, state, NULL, 0);
+}
+
+_public_ int sd_notify(int unset_environment, const char *state) {
+ return sd_pid_notify_with_fds(0, unset_environment, state, NULL, 0);
+}
+
+_public_ int sd_pid_notifyf(pid_t pid, int unset_environment, const char *format, ...) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ if (format) {
+ va_list ap;
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0 || !p)
+ return -ENOMEM;
+ }
+
+ return sd_pid_notify(pid, unset_environment, p);
+}
+
+_public_ int sd_notifyf(int unset_environment, const char *format, ...) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ if (format) {
+ va_list ap;
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0 || !p)
+ return -ENOMEM;
+ }
+
+ return sd_pid_notify(0, unset_environment, p);
+}
+
+_public_ int sd_booted(void) {
+ /* We test whether the runtime unit file directory has been
+ * created. This takes place in mount-setup.c, so is
+ * guaranteed to happen very early during boot. */
+
+ if (laccess("/run/systemd/system/", F_OK) >= 0)
+ return true;
+
+ if (errno == ENOENT)
+ return false;
+
+ return -errno;
+}
+
+_public_ int sd_watchdog_enabled(int unset_environment, uint64_t *usec) {
+ const char *s, *p = ""; /* p is set to dummy value to do unsetting */
+ uint64_t u;
+ int r = 0;
+
+ s = getenv("WATCHDOG_USEC");
+ if (!s)
+ goto finish;
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ goto finish;
+ if (u <= 0 || u >= USEC_INFINITY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ p = getenv("WATCHDOG_PID");
+ if (p) {
+ pid_t pid;
+
+ r = parse_pid(p, &pid);
+ if (r < 0)
+ goto finish;
+
+ /* Is this for us? */
+ if (getpid_cached() != pid) {
+ r = 0;
+ goto finish;
+ }
+ }
+
+ if (usec)
+ *usec = u;
+
+ r = 1;
+
+finish:
+ if (unset_environment && s)
+ unsetenv("WATCHDOG_USEC");
+ if (unset_environment && p)
+ unsetenv("WATCHDOG_PID");
+
+ return r;
+}
diff --git a/src/libsystemd/sd-device/device-enumerator-private.h b/src/libsystemd/sd-device/device-enumerator-private.h
new file mode 100644
index 0000000..87411bf
--- /dev/null
+++ b/src/libsystemd/sd-device/device-enumerator-private.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-device.h"
+
+int device_enumerator_scan_devices(sd_device_enumerator *enumeartor);
+int device_enumerator_scan_subsystems(sd_device_enumerator *enumeartor);
+int device_enumerator_add_device(sd_device_enumerator *enumerator, sd_device *device);
+int device_enumerator_add_match_is_initialized(sd_device_enumerator *enumerator);
+sd_device *device_enumerator_get_first(sd_device_enumerator *enumerator);
+sd_device *device_enumerator_get_next(sd_device_enumerator *enumerator);
+sd_device **device_enumerator_get_devices(sd_device_enumerator *enumerator, size_t *ret_n_devices);
+
+#define FOREACH_DEVICE_AND_SUBSYSTEM(enumerator, device) \
+ for (device = device_enumerator_get_first(enumerator); \
+ device; \
+ device = device_enumerator_get_next(enumerator))
diff --git a/src/libsystemd/sd-device/device-enumerator.c b/src/libsystemd/sd-device/device-enumerator.c
new file mode 100644
index 0000000..20529aa
--- /dev/null
+++ b/src/libsystemd/sd-device/device-enumerator.c
@@ -0,0 +1,1004 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-enumerator-private.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+#define DEVICE_ENUMERATE_MAX_DEPTH 256
+
+typedef enum DeviceEnumerationType {
+ DEVICE_ENUMERATION_TYPE_DEVICES,
+ DEVICE_ENUMERATION_TYPE_SUBSYSTEMS,
+ _DEVICE_ENUMERATION_TYPE_MAX,
+ _DEVICE_ENUMERATION_TYPE_INVALID = -1,
+} DeviceEnumerationType;
+
+struct sd_device_enumerator {
+ unsigned n_ref;
+
+ DeviceEnumerationType type;
+ sd_device **devices;
+ size_t n_devices, n_allocated, current_device_index;
+ bool scan_uptodate;
+
+ Set *match_subsystem;
+ Set *nomatch_subsystem;
+ Hashmap *match_sysattr;
+ Hashmap *nomatch_sysattr;
+ Hashmap *match_property;
+ Set *match_sysname;
+ Set *match_tag;
+ sd_device *match_parent;
+ bool match_allow_uninitialized;
+};
+
+_public_ int sd_device_enumerator_new(sd_device_enumerator **ret) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *enumerator = NULL;
+
+ assert(ret);
+
+ enumerator = new(sd_device_enumerator, 1);
+ if (!enumerator)
+ return -ENOMEM;
+
+ *enumerator = (sd_device_enumerator) {
+ .n_ref = 1,
+ .type = _DEVICE_ENUMERATION_TYPE_INVALID,
+ };
+
+ *ret = TAKE_PTR(enumerator);
+
+ return 0;
+}
+
+static sd_device_enumerator *device_enumerator_free(sd_device_enumerator *enumerator) {
+ size_t i;
+
+ assert(enumerator);
+
+ for (i = 0; i < enumerator->n_devices; i++)
+ sd_device_unref(enumerator->devices[i]);
+
+ free(enumerator->devices);
+ set_free_free(enumerator->match_subsystem);
+ set_free_free(enumerator->nomatch_subsystem);
+ hashmap_free_free_free(enumerator->match_sysattr);
+ hashmap_free_free_free(enumerator->nomatch_sysattr);
+ hashmap_free_free_free(enumerator->match_property);
+ set_free_free(enumerator->match_sysname);
+ set_free_free(enumerator->match_tag);
+ sd_device_unref(enumerator->match_parent);
+
+ return mfree(enumerator);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device_enumerator, sd_device_enumerator, device_enumerator_free);
+
+_public_ int sd_device_enumerator_add_match_subsystem(sd_device_enumerator *enumerator, const char *subsystem, int match) {
+ Set **set;
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(subsystem, -EINVAL);
+
+ if (match)
+ set = &enumerator->match_subsystem;
+ else
+ set = &enumerator->nomatch_subsystem;
+
+ r = set_ensure_allocated(set, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_put_strdup(*set, subsystem);
+ if (r < 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 0;
+}
+
+_public_ int sd_device_enumerator_add_match_sysattr(sd_device_enumerator *enumerator, const char *_sysattr, const char *_value, int match) {
+ _cleanup_free_ char *sysattr = NULL, *value = NULL;
+ Hashmap **hashmap;
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(_sysattr, -EINVAL);
+
+ if (match)
+ hashmap = &enumerator->match_sysattr;
+ else
+ hashmap = &enumerator->nomatch_sysattr;
+
+ r = hashmap_ensure_allocated(hashmap, NULL);
+ if (r < 0)
+ return r;
+
+ sysattr = strdup(_sysattr);
+ if (!sysattr)
+ return -ENOMEM;
+
+ if (_value) {
+ value = strdup(_value);
+ if (!value)
+ return -ENOMEM;
+ }
+
+ r = hashmap_put(*hashmap, sysattr, value);
+ if (r < 0)
+ return r;
+
+ sysattr = NULL;
+ value = NULL;
+
+ enumerator->scan_uptodate = false;
+
+ return 0;
+}
+
+_public_ int sd_device_enumerator_add_match_property(sd_device_enumerator *enumerator, const char *_property, const char *_value) {
+ _cleanup_free_ char *property = NULL, *value = NULL;
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(_property, -EINVAL);
+
+ r = hashmap_ensure_allocated(&enumerator->match_property, NULL);
+ if (r < 0)
+ return r;
+
+ property = strdup(_property);
+ if (!property)
+ return -ENOMEM;
+
+ if (_value) {
+ value = strdup(_value);
+ if (!value)
+ return -ENOMEM;
+ }
+
+ r = hashmap_put(enumerator->match_property, property, value);
+ if (r < 0)
+ return r;
+
+ property = NULL;
+ value = NULL;
+
+ enumerator->scan_uptodate = false;
+
+ return 0;
+}
+
+_public_ int sd_device_enumerator_add_match_sysname(sd_device_enumerator *enumerator, const char *sysname) {
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(sysname, -EINVAL);
+
+ r = set_ensure_allocated(&enumerator->match_sysname, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_put_strdup(enumerator->match_sysname, sysname);
+ if (r < 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 0;
+}
+
+_public_ int sd_device_enumerator_add_match_tag(sd_device_enumerator *enumerator, const char *tag) {
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(tag, -EINVAL);
+
+ r = set_ensure_allocated(&enumerator->match_tag, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_put_strdup(enumerator->match_tag, tag);
+ if (r < 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 0;
+}
+
+_public_ int sd_device_enumerator_add_match_parent(sd_device_enumerator *enumerator, sd_device *parent) {
+ assert_return(enumerator, -EINVAL);
+ assert_return(parent, -EINVAL);
+
+ sd_device_unref(enumerator->match_parent);
+ enumerator->match_parent = sd_device_ref(parent);
+
+ enumerator->scan_uptodate = false;
+
+ return 0;
+}
+
+_public_ int sd_device_enumerator_allow_uninitialized(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, -EINVAL);
+
+ enumerator->match_allow_uninitialized = true;
+
+ enumerator->scan_uptodate = false;
+
+ return 0;
+}
+
+int device_enumerator_add_match_is_initialized(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, -EINVAL);
+
+ enumerator->match_allow_uninitialized = false;
+
+ enumerator->scan_uptodate = false;
+
+ return 0;
+}
+
+static int device_compare(sd_device * const *_a, sd_device * const *_b) {
+ sd_device *a = *(sd_device **)_a, *b = *(sd_device **)_b;
+ const char *devpath_a, *devpath_b, *sound_a;
+ bool delay_a, delay_b;
+ int r;
+
+ assert_se(sd_device_get_devpath(a, &devpath_a) >= 0);
+ assert_se(sd_device_get_devpath(b, &devpath_b) >= 0);
+
+ sound_a = strstr(devpath_a, "/sound/card");
+ if (sound_a) {
+ /* For sound cards the control device must be enumerated last to
+ * make sure it's the final device node that gets ACLs applied.
+ * Applications rely on this fact and use ACL changes on the
+ * control node as an indicator that the ACL change of the
+ * entire sound card completed. The kernel makes this guarantee
+ * when creating those devices, and hence we should too when
+ * enumerating them. */
+ sound_a += STRLEN("/sound/card");
+ sound_a = strchr(sound_a, '/');
+
+ if (sound_a) {
+ unsigned prefix_len;
+
+ prefix_len = sound_a - devpath_a;
+
+ if (strncmp(devpath_a, devpath_b, prefix_len) == 0) {
+ const char *sound_b;
+
+ sound_b = devpath_b + prefix_len;
+
+ if (startswith(sound_a, "/controlC") &&
+ !startswith(sound_b, "/contolC"))
+ return 1;
+
+ if (!startswith(sound_a, "/controlC") &&
+ startswith(sound_b, "/controlC"))
+ return -1;
+ }
+ }
+ }
+
+ /* md and dm devices are enumerated after all other devices */
+ delay_a = strstr(devpath_a, "/block/md") || strstr(devpath_a, "/block/dm-");
+ delay_b = strstr(devpath_b, "/block/md") || strstr(devpath_b, "/block/dm-");
+ r = CMP(delay_a, delay_b);
+ if (r != 0)
+ return r;
+
+ return strcmp(devpath_a, devpath_b);
+}
+
+int device_enumerator_add_device(sd_device_enumerator *enumerator, sd_device *device) {
+ assert_return(enumerator, -EINVAL);
+ assert_return(device, -EINVAL);
+
+ if (!GREEDY_REALLOC(enumerator->devices, enumerator->n_allocated, enumerator->n_devices + 1))
+ return -ENOMEM;
+
+ enumerator->devices[enumerator->n_devices++] = sd_device_ref(device);
+
+ return 0;
+}
+
+static bool match_sysattr_value(sd_device *device, const char *sysattr, const char *match_value) {
+ const char *value;
+ int r;
+
+ assert(device);
+ assert(sysattr);
+
+ r = sd_device_get_sysattr_value(device, sysattr, &value);
+ if (r < 0)
+ return false;
+
+ if (!match_value)
+ return true;
+
+ if (fnmatch(match_value, value, 0) == 0)
+ return true;
+
+ return false;
+}
+
+static bool match_sysattr(sd_device_enumerator *enumerator, sd_device *device) {
+ const char *sysattr;
+ const char *value;
+ Iterator i;
+
+ assert(enumerator);
+ assert(device);
+
+ HASHMAP_FOREACH_KEY(value, sysattr, enumerator->nomatch_sysattr, i)
+ if (match_sysattr_value(device, sysattr, value))
+ return false;
+
+ HASHMAP_FOREACH_KEY(value, sysattr, enumerator->match_sysattr, i)
+ if (!match_sysattr_value(device, sysattr, value))
+ return false;
+
+ return true;
+}
+
+static bool match_property(sd_device_enumerator *enumerator, sd_device *device) {
+ const char *property;
+ const char *value;
+ Iterator i;
+
+ assert(enumerator);
+ assert(device);
+
+ if (hashmap_isempty(enumerator->match_property))
+ return true;
+
+ HASHMAP_FOREACH_KEY(value, property, enumerator->match_property, i) {
+ const char *property_dev, *value_dev;
+
+ FOREACH_DEVICE_PROPERTY(device, property_dev, value_dev) {
+ if (fnmatch(property, property_dev, 0) != 0)
+ continue;
+
+ if (!value && !value_dev)
+ return true;
+
+ if (!value || !value_dev)
+ continue;
+
+ if (fnmatch(value, value_dev, 0) == 0)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool match_tag(sd_device_enumerator *enumerator, sd_device *device) {
+ const char *tag;
+ Iterator i;
+
+ assert(enumerator);
+ assert(device);
+
+ SET_FOREACH(tag, enumerator->match_tag, i)
+ if (!sd_device_has_tag(device, tag))
+ return false;
+
+ return true;
+}
+
+static bool match_parent(sd_device_enumerator *enumerator, sd_device *device) {
+ const char *devpath, *devpath_dev;
+ int r;
+
+ assert(enumerator);
+ assert(device);
+
+ if (!enumerator->match_parent)
+ return true;
+
+ r = sd_device_get_devpath(enumerator->match_parent, &devpath);
+ assert(r >= 0);
+
+ r = sd_device_get_devpath(device, &devpath_dev);
+ assert(r >= 0);
+
+ return startswith(devpath_dev, devpath);
+}
+
+static bool match_sysname(sd_device_enumerator *enumerator, const char *sysname) {
+ const char *sysname_match;
+ Iterator i;
+
+ assert(enumerator);
+ assert(sysname);
+
+ if (set_isempty(enumerator->match_sysname))
+ return true;
+
+ SET_FOREACH(sysname_match, enumerator->match_sysname, i)
+ if (fnmatch(sysname_match, sysname, 0) == 0)
+ return true;
+
+ return false;
+}
+
+static int enumerator_scan_dir_and_add_devices(sd_device_enumerator *enumerator, const char *basedir, const char *subdir1, const char *subdir2) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ char *path;
+ struct dirent *dent;
+ int r = 0;
+
+ assert(enumerator);
+ assert(basedir);
+
+ path = strjoina("/sys/", basedir, "/");
+
+ if (subdir1)
+ path = strjoina(path, subdir1, "/");
+
+ if (subdir2)
+ path = strjoina(path, subdir2, "/");
+
+ dir = opendir(path);
+ if (!dir)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(dent, dir, return -errno) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ char syspath[strlen(path) + 1 + strlen(dent->d_name) + 1];
+ int initialized, k;
+
+ if (dent->d_name[0] == '.')
+ continue;
+
+ if (!match_sysname(enumerator, dent->d_name))
+ continue;
+
+ (void) sprintf(syspath, "%s%s", path, dent->d_name);
+
+ k = sd_device_new_from_syspath(&device, syspath);
+ if (k < 0) {
+ if (k != -ENODEV)
+ /* this is necessarily racey, so ignore missing devices */
+ r = k;
+
+ continue;
+ }
+
+ initialized = sd_device_get_is_initialized(device);
+ if (initialized < 0) {
+ r = initialized;
+ continue;
+ }
+
+ /*
+ * All devices with a device node or network interfaces
+ * possibly need udev to adjust the device node permission
+ * or context, or rename the interface before it can be
+ * reliably used from other processes.
+ *
+ * For now, we can only check these types of devices, we
+ * might not store a database, and have no way to find out
+ * for all other types of devices.
+ */
+ if (!enumerator->match_allow_uninitialized &&
+ !initialized &&
+ (sd_device_get_devnum(device, NULL) >= 0 ||
+ sd_device_get_ifindex(device, NULL) >= 0))
+ continue;
+
+ if (!match_parent(enumerator, device))
+ continue;
+
+ if (!match_tag(enumerator, device))
+ continue;
+
+ if (!match_property(enumerator, device))
+ continue;
+
+ if (!match_sysattr(enumerator, device))
+ continue;
+
+ k = device_enumerator_add_device(enumerator, device);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static bool match_subsystem(sd_device_enumerator *enumerator, const char *subsystem) {
+ const char *subsystem_match;
+ Iterator i;
+
+ assert(enumerator);
+
+ if (!subsystem)
+ return false;
+
+ SET_FOREACH(subsystem_match, enumerator->nomatch_subsystem, i)
+ if (fnmatch(subsystem_match, subsystem, 0) == 0)
+ return false;
+
+ if (set_isempty(enumerator->match_subsystem))
+ return true;
+
+ SET_FOREACH(subsystem_match, enumerator->match_subsystem, i)
+ if (fnmatch(subsystem_match, subsystem, 0) == 0)
+ return true;
+
+ return false;
+}
+
+static int enumerator_scan_dir(sd_device_enumerator *enumerator, const char *basedir, const char *subdir, const char *subsystem) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ char *path;
+ struct dirent *dent;
+ int r = 0;
+
+ path = strjoina("/sys/", basedir);
+
+ dir = opendir(path);
+ if (!dir)
+ return -errno;
+
+ log_debug("sd-device-enumerator: Scanning %s", path);
+
+ FOREACH_DIRENT_ALL(dent, dir, return -errno) {
+ int k;
+
+ if (dent->d_name[0] == '.')
+ continue;
+
+ if (!match_subsystem(enumerator, subsystem ? : dent->d_name))
+ continue;
+
+ k = enumerator_scan_dir_and_add_devices(enumerator, basedir, dent->d_name, subdir);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int enumerator_scan_devices_tag(sd_device_enumerator *enumerator, const char *tag) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ char *path;
+ struct dirent *dent;
+ int r = 0;
+
+ assert(enumerator);
+ assert(tag);
+
+ path = strjoina("/run/udev/tags/", tag);
+
+ dir = opendir(path);
+ if (!dir) {
+ if (errno != ENOENT)
+ return log_debug_errno(errno, "sd-device-enumerator: Failed to open tags directory %s: %m", path);
+ return 0;
+ }
+
+ /* TODO: filter away subsystems? */
+
+ FOREACH_DIRENT_ALL(dent, dir, return -errno) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ const char *subsystem, *sysname;
+ int k;
+
+ if (dent->d_name[0] == '.')
+ continue;
+
+ k = sd_device_new_from_device_id(&device, dent->d_name);
+ if (k < 0) {
+ if (k != -ENODEV)
+ /* this is necessarily racy, so ignore missing devices */
+ r = k;
+
+ continue;
+ }
+
+ k = sd_device_get_subsystem(device, &subsystem);
+ if (k < 0) {
+ r = k;
+ continue;
+ }
+
+ if (!match_subsystem(enumerator, subsystem))
+ continue;
+
+ k = sd_device_get_sysname(device, &sysname);
+ if (k < 0) {
+ r = k;
+ continue;
+ }
+
+ if (!match_sysname(enumerator, sysname))
+ continue;
+
+ if (!match_parent(enumerator, device))
+ continue;
+
+ if (!match_property(enumerator, device))
+ continue;
+
+ if (!match_sysattr(enumerator, device))
+ continue;
+
+ k = device_enumerator_add_device(enumerator, device);
+ if (k < 0) {
+ r = k;
+ continue;
+ }
+ }
+
+ return r;
+}
+
+static int enumerator_scan_devices_tags(sd_device_enumerator *enumerator) {
+ const char *tag;
+ Iterator i;
+ int r = 0;
+
+ assert(enumerator);
+
+ SET_FOREACH(tag, enumerator->match_tag, i) {
+ int k;
+
+ k = enumerator_scan_devices_tag(enumerator, tag);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int parent_add_child(sd_device_enumerator *enumerator, const char *path) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ const char *subsystem, *sysname;
+ int r;
+
+ r = sd_device_new_from_syspath(&device, path);
+ if (r == -ENODEV)
+ /* this is necessarily racy, so ignore missing devices */
+ return 0;
+ else if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (!match_subsystem(enumerator, subsystem))
+ return 0;
+
+ r = sd_device_get_sysname(device, &sysname);
+ if (r < 0)
+ return r;
+
+ if (!match_sysname(enumerator, sysname))
+ return 0;
+
+ if (!match_property(enumerator, device))
+ return 0;
+
+ if (!match_sysattr(enumerator, device))
+ return 0;
+
+ r = device_enumerator_add_device(enumerator, device);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int parent_crawl_children(sd_device_enumerator *enumerator, const char *path, unsigned maxdepth) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+ int r = 0;
+
+ dir = opendir(path);
+ if (!dir)
+ return log_debug_errno(errno, "sd-device-enumerator: Failed to open parent directory %s: %m", path);
+
+ FOREACH_DIRENT_ALL(dent, dir, return -errno) {
+ _cleanup_free_ char *child = NULL;
+ int k;
+
+ if (dent->d_name[0] == '.')
+ continue;
+
+ if (dent->d_type != DT_DIR)
+ continue;
+
+ child = strjoin(path, "/", dent->d_name);
+ if (!child)
+ return -ENOMEM;
+
+ k = parent_add_child(enumerator, child);
+ if (k < 0)
+ r = k;
+
+ if (maxdepth > 0)
+ parent_crawl_children(enumerator, child, maxdepth - 1);
+ else
+ log_debug("sd-device-enumerator: Max depth reached, %s: ignoring devices", child);
+ }
+
+ return r;
+}
+
+static int enumerator_scan_devices_children(sd_device_enumerator *enumerator) {
+ const char *path;
+ int r = 0, k;
+
+ r = sd_device_get_syspath(enumerator->match_parent, &path);
+ if (r < 0)
+ return r;
+
+ k = parent_add_child(enumerator, path);
+ if (k < 0)
+ r = k;
+
+ k = parent_crawl_children(enumerator, path, DEVICE_ENUMERATE_MAX_DEPTH);
+ if (k < 0)
+ r = k;
+
+ return r;
+}
+
+static int enumerator_scan_devices_all(sd_device_enumerator *enumerator) {
+ int r = 0;
+
+ log_debug("sd-device-enumerator: Scan all dirs");
+
+ if (access("/sys/subsystem", F_OK) >= 0) {
+ /* we have /subsystem/, forget all the old stuff */
+ r = enumerator_scan_dir(enumerator, "subsystem", "devices", NULL);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device-enumerator: Failed to scan /sys/subsystem: %m");
+ } else {
+ int k;
+
+ k = enumerator_scan_dir(enumerator, "bus", "devices", NULL);
+ if (k < 0) {
+ log_debug_errno(k, "sd-device-enumerator: Failed to scan /sys/bus: %m");
+ r = k;
+ }
+
+ k = enumerator_scan_dir(enumerator, "class", NULL, NULL);
+ if (k < 0) {
+ log_debug_errno(k, "sd-device-enumerator: Failed to scan /sys/class: %m");
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+static void device_enumerator_dedup_devices(sd_device_enumerator *enumerator) {
+ sd_device **a, **b, **end;
+
+ assert(enumerator);
+
+ if (enumerator->n_devices <= 1)
+ return;
+
+ a = enumerator->devices + 1;
+ b = enumerator->devices;
+ end = enumerator->devices + enumerator->n_devices;
+
+ for (; a < end; a++) {
+ const char *devpath_a, *devpath_b;
+
+ assert_se(sd_device_get_devpath(*a, &devpath_a) >= 0);
+ assert_se(sd_device_get_devpath(*b, &devpath_b) >= 0);
+
+ if (path_equal(devpath_a, devpath_b))
+ sd_device_unref(*a);
+ else
+ *(++b) = *a;
+ }
+
+ enumerator->n_devices = b - enumerator->devices + 1;
+}
+
+int device_enumerator_scan_devices(sd_device_enumerator *enumerator) {
+ int r = 0, k;
+ size_t i;
+
+ assert(enumerator);
+
+ if (enumerator->scan_uptodate &&
+ enumerator->type == DEVICE_ENUMERATION_TYPE_DEVICES)
+ return 0;
+
+ for (i = 0; i < enumerator->n_devices; i++)
+ sd_device_unref(enumerator->devices[i]);
+
+ enumerator->n_devices = 0;
+
+ if (!set_isempty(enumerator->match_tag)) {
+ k = enumerator_scan_devices_tags(enumerator);
+ if (k < 0)
+ r = k;
+ } else if (enumerator->match_parent) {
+ k = enumerator_scan_devices_children(enumerator);
+ if (k < 0)
+ r = k;
+ } else {
+ k = enumerator_scan_devices_all(enumerator);
+ if (k < 0)
+ r = k;
+ }
+
+ typesafe_qsort(enumerator->devices, enumerator->n_devices, device_compare);
+ device_enumerator_dedup_devices(enumerator);
+
+ enumerator->scan_uptodate = true;
+ enumerator->type = DEVICE_ENUMERATION_TYPE_DEVICES;
+
+ return r;
+}
+
+_public_ sd_device *sd_device_enumerator_get_device_first(sd_device_enumerator *enumerator) {
+ int r;
+
+ assert_return(enumerator, NULL);
+
+ r = device_enumerator_scan_devices(enumerator);
+ if (r < 0)
+ return NULL;
+
+ enumerator->current_device_index = 0;
+
+ if (enumerator->n_devices == 0)
+ return NULL;
+
+ return enumerator->devices[0];
+}
+
+_public_ sd_device *sd_device_enumerator_get_device_next(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (!enumerator->scan_uptodate ||
+ enumerator->type != DEVICE_ENUMERATION_TYPE_DEVICES ||
+ enumerator->current_device_index + 1 >= enumerator->n_devices)
+ return NULL;
+
+ return enumerator->devices[++enumerator->current_device_index];
+}
+
+int device_enumerator_scan_subsystems(sd_device_enumerator *enumerator) {
+ const char *subsysdir;
+ int r = 0, k;
+ size_t i;
+
+ assert(enumerator);
+
+ if (enumerator->scan_uptodate &&
+ enumerator->type == DEVICE_ENUMERATION_TYPE_SUBSYSTEMS)
+ return 0;
+
+ for (i = 0; i < enumerator->n_devices; i++)
+ sd_device_unref(enumerator->devices[i]);
+
+ enumerator->n_devices = 0;
+
+ /* modules */
+ if (match_subsystem(enumerator, "module")) {
+ k = enumerator_scan_dir_and_add_devices(enumerator, "module", NULL, NULL);
+ if (k < 0) {
+ log_debug_errno(k, "sd-device-enumerator: Failed to scan modules: %m");
+ r = k;
+ }
+ }
+
+ if (access("/sys/subsystem", F_OK) >= 0)
+ subsysdir = "subsystem";
+ else
+ subsysdir = "bus";
+
+ /* subsystems (only buses support coldplug) */
+ if (match_subsystem(enumerator, "subsystem")) {
+ k = enumerator_scan_dir_and_add_devices(enumerator, subsysdir, NULL, NULL);
+ if (k < 0) {
+ log_debug_errno(k, "sd-device-enumerator: Failed to scan subsystems: %m");
+ r = k;
+ }
+ }
+
+ /* subsystem drivers */
+ if (match_subsystem(enumerator, "drivers")) {
+ k = enumerator_scan_dir(enumerator, subsysdir, "drivers", "drivers");
+ if (k < 0) {
+ log_debug_errno(k, "sd-device-enumerator: Failed to scan drivers: %m");
+ r = k;
+ }
+ }
+
+ typesafe_qsort(enumerator->devices, enumerator->n_devices, device_compare);
+ device_enumerator_dedup_devices(enumerator);
+
+ enumerator->scan_uptodate = true;
+ enumerator->type = DEVICE_ENUMERATION_TYPE_SUBSYSTEMS;
+
+ return r;
+}
+
+_public_ sd_device *sd_device_enumerator_get_subsystem_first(sd_device_enumerator *enumerator) {
+ int r;
+
+ assert_return(enumerator, NULL);
+
+ r = device_enumerator_scan_subsystems(enumerator);
+ if (r < 0)
+ return NULL;
+
+ enumerator->current_device_index = 0;
+
+ if (enumerator->n_devices == 0)
+ return NULL;
+
+ return enumerator->devices[0];
+}
+
+_public_ sd_device *sd_device_enumerator_get_subsystem_next(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (!enumerator->scan_uptodate ||
+ enumerator->type != DEVICE_ENUMERATION_TYPE_SUBSYSTEMS ||
+ enumerator->current_device_index + 1 >= enumerator->n_devices)
+ return NULL;
+
+ return enumerator->devices[++enumerator->current_device_index];
+}
+
+sd_device *device_enumerator_get_first(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (!enumerator->scan_uptodate)
+ return NULL;
+
+ enumerator->current_device_index = 0;
+
+ if (enumerator->n_devices == 0)
+ return NULL;
+
+ return enumerator->devices[0];
+}
+
+sd_device *device_enumerator_get_next(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (!enumerator->scan_uptodate ||
+ enumerator->current_device_index + 1 >= enumerator->n_devices)
+ return NULL;
+
+ return enumerator->devices[++enumerator->current_device_index];
+}
+
+sd_device **device_enumerator_get_devices(sd_device_enumerator *enumerator, size_t *ret_n_devices) {
+ assert(enumerator);
+ assert(ret_n_devices);
+
+ if (!enumerator->scan_uptodate)
+ return NULL;
+
+ *ret_n_devices = enumerator->n_devices;
+ return enumerator->devices;
+}
diff --git a/src/libsystemd/sd-device/device-internal.h b/src/libsystemd/sd-device/device-internal.h
new file mode 100644
index 0000000..3ffca35
--- /dev/null
+++ b/src/libsystemd/sd-device/device-internal.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-device.h"
+
+#include "hashmap.h"
+#include "set.h"
+#include "time-util.h"
+
+struct sd_device {
+ unsigned n_ref;
+
+ int watch_handle;
+
+ sd_device *parent;
+
+ OrderedHashmap *properties;
+ Iterator properties_iterator;
+ uint64_t properties_generation; /* changes whenever the properties are changed */
+ uint64_t properties_iterator_generation; /* generation when iteration was started */
+
+ /* the subset of the properties that should be written to the db */
+ OrderedHashmap *properties_db;
+
+ Hashmap *sysattr_values; /* cached sysattr values */
+
+ Set *sysattrs; /* names of sysattrs */
+ Iterator sysattrs_iterator;
+
+ Set *tags;
+ Iterator tags_iterator;
+ uint64_t tags_generation; /* changes whenever the tags are changed */
+ uint64_t tags_iterator_generation; /* generation when iteration was started */
+
+ Set *devlinks;
+ Iterator devlinks_iterator;
+ uint64_t devlinks_generation; /* changes whenever the devlinks are changed */
+ uint64_t devlinks_iterator_generation; /* generation when iteration was started */
+ int devlink_priority;
+
+ int ifindex;
+ char *devtype;
+ char *devname;
+ dev_t devnum;
+
+ char **properties_strv; /* the properties hashmap as a strv */
+ uint8_t *properties_nulstr; /* the same as a nulstr */
+ size_t properties_nulstr_len;
+
+ char *syspath;
+ const char *devpath;
+ const char *sysnum;
+ char *sysname;
+
+ char *subsystem;
+ char *driver_subsystem; /* only set for the 'drivers' subsystem */
+ char *driver;
+
+ char *id_filename;
+
+ uint64_t usec_initialized;
+
+ mode_t devmode;
+ uid_t devuid;
+ gid_t devgid;
+
+ bool parent_set:1; /* no need to try to reload parent */
+ bool sysattrs_read:1; /* don't try to re-read sysattrs once read */
+ bool property_tags_outdated:1; /* need to update TAGS= property */
+ bool property_devlinks_outdated:1; /* need to update DEVLINKS= property */
+ bool properties_buf_outdated:1; /* need to reread hashmap */
+ bool sysname_set:1; /* don't reread sysname */
+ bool subsystem_set:1; /* don't reread subsystem */
+ bool driver_subsystem_set:1; /* don't reread subsystem */
+ bool driver_set:1; /* don't reread driver */
+ bool uevent_loaded:1; /* don't reread uevent */
+ bool db_loaded; /* don't reread db */
+
+ bool is_initialized:1;
+ bool sealed:1; /* don't read more information from uevent/db */
+ bool db_persist:1; /* don't clean up the db when switching from initrd to real root */
+};
+
+typedef enum DeviceAction {
+ DEVICE_ACTION_ADD,
+ DEVICE_ACTION_REMOVE,
+ DEVICE_ACTION_CHANGE,
+ DEVICE_ACTION_MOVE,
+ DEVICE_ACTION_ONLINE,
+ DEVICE_ACTION_OFFLINE,
+ DEVICE_ACTION_BIND,
+ DEVICE_ACTION_UNBIND,
+ _DEVICE_ACTION_MAX,
+ _DEVICE_ACTION_INVALID = -1,
+} DeviceAction;
+
+int device_new_aux(sd_device **ret);
+int device_add_property_aux(sd_device *device, const char *key, const char *value, bool db);
+int device_add_property_internal(sd_device *device, const char *key, const char *value);
+int device_read_uevent_file(sd_device *device);
+
+int device_set_syspath(sd_device *device, const char *_syspath, bool verify);
+int device_set_ifindex(sd_device *device, const char *ifindex);
+int device_set_devmode(sd_device *device, const char *devmode);
+int device_set_devname(sd_device *device, const char *_devname);
+int device_set_devtype(sd_device *device, const char *_devtype);
+int device_set_devnum(sd_device *device, const char *major, const char *minor);
+int device_set_subsystem(sd_device *device, const char *_subsystem);
+int device_set_driver(sd_device *device, const char *_driver);
+int device_set_usec_initialized(sd_device *device, usec_t when);
+
+DeviceAction device_action_from_string(const char *s) _pure_;
+const char *device_action_to_string(DeviceAction a) _const_;
diff --git a/src/libsystemd/sd-device/device-monitor-private.h b/src/libsystemd/sd-device/device-monitor-private.h
new file mode 100644
index 0000000..2659cc3
--- /dev/null
+++ b/src/libsystemd/sd-device/device-monitor-private.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-device.h"
+
+typedef enum MonitorNetlinkGroup {
+ MONITOR_GROUP_NONE,
+ MONITOR_GROUP_KERNEL,
+ MONITOR_GROUP_UDEV,
+ _MONITOR_NETLINK_GROUP_MAX,
+ _MONITOR_NETLINK_GROUP_INVALID = -1,
+} MonitorNetlinkGroup;
+
+int device_monitor_new_full(sd_device_monitor **ret, MonitorNetlinkGroup group, int fd);
+int device_monitor_disconnect(sd_device_monitor *m);
+int device_monitor_allow_unicast_sender(sd_device_monitor *m, sd_device_monitor *sender);
+int device_monitor_enable_receiving(sd_device_monitor *m);
+int device_monitor_get_fd(sd_device_monitor *m);
+int device_monitor_send_device(sd_device_monitor *m, sd_device_monitor *destination, sd_device *device);
+int device_monitor_receive_device(sd_device_monitor *m, sd_device **ret);
diff --git a/src/libsystemd/sd-device/device-monitor.c b/src/libsystemd/sd-device/device-monitor.c
new file mode 100644
index 0000000..27d0af5
--- /dev/null
+++ b/src/libsystemd/sd-device/device-monitor.c
@@ -0,0 +1,756 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <linux/filter.h>
+#include <linux/netlink.h>
+#include <sys/socket.h>
+
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "MurmurHash2.h"
+#include "alloc-util.h"
+#include "device-monitor-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "io-util.h"
+#include "missing.h"
+#include "mountpoint-util.h"
+#include "set.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+struct sd_device_monitor {
+ unsigned n_ref;
+
+ int sock;
+ union sockaddr_union snl;
+ union sockaddr_union snl_trusted_sender;
+ bool bound;
+
+ Hashmap *subsystem_filter;
+ Set *tag_filter;
+ bool filter_uptodate;
+
+ sd_event *event;
+ sd_event_source *event_source;
+ sd_device_monitor_handler_t callback;
+ void *userdata;
+};
+
+#define UDEV_MONITOR_MAGIC 0xfeedcafe
+
+typedef struct monitor_netlink_header {
+ /* "libudev" prefix to distinguish libudev and kernel messages */
+ char prefix[8];
+ /* Magic to protect against daemon <-> Library message format mismatch
+ * Used in the kernel from socket filter rules; needs to be stored in network order */
+ unsigned magic;
+ /* Total length of header structure known to the sender */
+ unsigned header_size;
+ /* Properties string buffer */
+ unsigned properties_off;
+ unsigned properties_len;
+ /* Hashes of primary device properties strings, to let libudev subscribers
+ * use in-kernel socket filters; values need to be stored in network order */
+ unsigned filter_subsystem_hash;
+ unsigned filter_devtype_hash;
+ unsigned filter_tag_bloom_hi;
+ unsigned filter_tag_bloom_lo;
+} monitor_netlink_header;
+
+static int monitor_set_nl_address(sd_device_monitor *m) {
+ union sockaddr_union snl;
+ socklen_t addrlen;
+
+ assert(m);
+
+ /* Get the address the kernel has assigned us.
+ * It is usually, but not necessarily the pid. */
+ addrlen = sizeof(struct sockaddr_nl);
+ if (getsockname(m->sock, &snl.sa, &addrlen) < 0)
+ return -errno;
+
+ m->snl.nl.nl_pid = snl.nl.nl_pid;
+ return 0;
+}
+
+int device_monitor_allow_unicast_sender(sd_device_monitor *m, sd_device_monitor *sender) {
+ assert_return(m, -EINVAL);
+ assert_return(sender, -EINVAL);
+
+ m->snl_trusted_sender.nl.nl_pid = sender->snl.nl.nl_pid;
+ return 0;
+}
+
+_public_ int sd_device_monitor_set_receive_buffer_size(sd_device_monitor *m, size_t size) {
+ int r, n = (int) size;
+
+ assert_return(m, -EINVAL);
+ assert_return((size_t) n == size, -EINVAL);
+
+ if (setsockopt_int(m->sock, SOL_SOCKET, SO_RCVBUFFORCE, n) < 0) {
+ r = setsockopt_int(m->sock, SOL_SOCKET, SO_RCVBUF, n);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int device_monitor_disconnect(sd_device_monitor *m) {
+ assert(m);
+
+ m->sock = safe_close(m->sock);
+ return 0;
+}
+
+int device_monitor_get_fd(sd_device_monitor *m) {
+ assert_return(m, -EINVAL);
+
+ return m->sock;
+}
+
+int device_monitor_new_full(sd_device_monitor **ret, MonitorNetlinkGroup group, int fd) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL;
+ _cleanup_close_ int sock = -1;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(group >= 0 && group < _MONITOR_NETLINK_GROUP_MAX, -EINVAL);
+
+ if (group == MONITOR_GROUP_UDEV &&
+ access("/run/udev/control", F_OK) < 0 &&
+ dev_is_devtmpfs() <= 0) {
+
+ /*
+ * We do not support subscribing to uevents if no instance of
+ * udev is running. Uevents would otherwise broadcast the
+ * processing data of the host into containers, which is not
+ * desired.
+ *
+ * Containers will currently not get any udev uevents, until
+ * a supporting infrastructure is available.
+ *
+ * We do not set a netlink multicast group here, so the socket
+ * will not receive any messages.
+ */
+
+ log_debug("sd-device-monitor: The udev service seems not to be active, disabling the monitor");
+ group = MONITOR_GROUP_NONE;
+ }
+
+ if (fd < 0) {
+ sock = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_KOBJECT_UEVENT);
+ if (sock < 0)
+ return log_debug_errno(errno, "sd-device-monitor: Failed to create socket: %m");
+ }
+
+ m = new(sd_device_monitor, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (sd_device_monitor) {
+ .n_ref = 1,
+ .sock = fd >= 0 ? fd : TAKE_FD(sock),
+ .bound = fd >= 0,
+ .snl.nl.nl_family = AF_NETLINK,
+ .snl.nl.nl_groups = group,
+ };
+
+ if (fd >= 0) {
+ r = monitor_set_nl_address(m);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device-monitor: Failed to set netlink address: %m");
+ }
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+_public_ int sd_device_monitor_new(sd_device_monitor **ret) {
+ return device_monitor_new_full(ret, MONITOR_GROUP_UDEV, -1);
+}
+
+_public_ int sd_device_monitor_stop(sd_device_monitor *m) {
+ assert_return(m, -EINVAL);
+
+ m->event_source = sd_event_source_unref(m->event_source);
+ (void) device_monitor_disconnect(m);
+
+ return 0;
+}
+
+static int device_monitor_event_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ sd_device_monitor *m = userdata;
+
+ assert(m);
+
+ if (device_monitor_receive_device(m, &device) <= 0)
+ return 0;
+
+ if (m->callback)
+ return m->callback(m, device, m->userdata);
+
+ return 0;
+}
+
+_public_ int sd_device_monitor_start(sd_device_monitor *m, sd_device_monitor_handler_t callback, void *userdata) {
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ if (!m->event) {
+ r = sd_device_monitor_attach_event(m, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = device_monitor_enable_receiving(m);
+ if (r < 0)
+ return r;
+
+ m->callback = callback;
+ m->userdata = userdata;
+
+ r = sd_event_add_io(m->event, &m->event_source, m->sock, EPOLLIN, device_monitor_event_handler, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->event_source, "sd-device-monitor");
+
+ return 0;
+}
+
+_public_ int sd_device_monitor_detach_event(sd_device_monitor *m) {
+ assert_return(m, -EINVAL);
+
+ (void) sd_device_monitor_stop(m);
+ m->event = sd_event_unref(m->event);
+
+ return 0;
+}
+
+_public_ int sd_device_monitor_attach_event(sd_device_monitor *m, sd_event *event) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->event, -EBUSY);
+
+ if (event)
+ m->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+_public_ sd_event *sd_device_monitor_get_event(sd_device_monitor *m) {
+ assert_return(m, NULL);
+
+ return m->event;
+}
+
+_public_ sd_event_source *sd_device_monitor_get_event_source(sd_device_monitor *m) {
+ assert_return(m, NULL);
+
+ return m->event_source;
+}
+
+int device_monitor_enable_receiving(sd_device_monitor *m) {
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = sd_device_monitor_filter_update(m);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device-monitor: Failed to update filter: %m");
+
+ if (!m->bound) {
+ /* enable receiving of sender credentials */
+ r = setsockopt_int(m->sock, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device-monitor: Failed to set socket option SO_PASSCRED: %m");
+
+ if (bind(m->sock, &m->snl.sa, sizeof(struct sockaddr_nl)) < 0)
+ return log_debug_errno(errno, "sd-device-monitor: Failed to bind monitoring socket: %m");
+
+ m->bound = true;
+
+ r = monitor_set_nl_address(m);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device-monitor: Failed to set address: %m");
+ }
+
+ return 0;
+}
+
+static sd_device_monitor *device_monitor_free(sd_device_monitor *m) {
+ assert(m);
+
+ (void) sd_device_monitor_detach_event(m);
+
+ hashmap_free_free_free(m->subsystem_filter);
+ set_free_free(m->tag_filter);
+
+ return mfree(m);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device_monitor, sd_device_monitor, device_monitor_free);
+
+static int passes_filter(sd_device_monitor *m, sd_device *device) {
+ const char *tag, *subsystem, *devtype, *s, *d = NULL;
+ Iterator i;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(device, -EINVAL);
+
+ if (hashmap_isempty(m->subsystem_filter))
+ goto tag;
+
+ r = sd_device_get_subsystem(device, &s);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devtype(device, &d);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ HASHMAP_FOREACH_KEY(devtype, subsystem, m->subsystem_filter, i) {
+ if (!streq(s, subsystem))
+ continue;
+
+ if (!devtype)
+ goto tag;
+
+ if (!d)
+ continue;
+
+ if (streq(d, devtype))
+ goto tag;
+ }
+
+ return 0;
+
+tag:
+ if (set_isempty(m->tag_filter))
+ return 1;
+
+ SET_FOREACH(tag, m->tag_filter, i)
+ if (sd_device_has_tag(device, tag) > 0)
+ return 1;
+
+ return 0;
+}
+
+int device_monitor_receive_device(sd_device_monitor *m, sd_device **ret) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ union {
+ monitor_netlink_header nlh;
+ char raw[8192];
+ } buf;
+ struct iovec iov = {
+ .iov_base = &buf,
+ .iov_len = sizeof(buf)
+ };
+ char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
+ union sockaddr_union snl;
+ struct msghdr smsg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = cred_msg,
+ .msg_controllen = sizeof(cred_msg),
+ .msg_name = &snl,
+ .msg_namelen = sizeof(snl),
+ };
+ struct cmsghdr *cmsg;
+ struct ucred *cred;
+ ssize_t buflen, bufpos;
+ bool is_initialized = false;
+ int r;
+
+ assert(ret);
+
+ buflen = recvmsg(m->sock, &smsg, 0);
+ if (buflen < 0) {
+ if (errno != EINTR)
+ log_debug_errno(errno, "sd-device-monitor: Failed to receive message: %m");
+ return -errno;
+ }
+
+ if (buflen < 32 || (smsg.msg_flags & MSG_TRUNC))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "sd-device-monitor: Invalid message length.");
+
+ if (snl.nl.nl_groups == MONITOR_GROUP_NONE) {
+ /* unicast message, check if we trust the sender */
+ if (m->snl_trusted_sender.nl.nl_pid == 0 ||
+ snl.nl.nl_pid != m->snl_trusted_sender.nl.nl_pid)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Unicast netlink message ignored.");
+
+ } else if (snl.nl.nl_groups == MONITOR_GROUP_KERNEL) {
+ if (snl.nl.nl_pid > 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Multicast kernel netlink message from PID %"PRIu32" ignored.", snl.nl.nl_pid);
+ }
+
+ cmsg = CMSG_FIRSTHDR(&smsg);
+ if (!cmsg || cmsg->cmsg_type != SCM_CREDENTIALS)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: No sender credentials received, message ignored.");
+
+ cred = (struct ucred*) CMSG_DATA(cmsg);
+ if (cred->uid != 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Sender uid="UID_FMT", message ignored.", cred->uid);
+
+ if (streq(buf.raw, "libudev")) {
+ /* udev message needs proper version magic */
+ if (buf.nlh.magic != htobe32(UDEV_MONITOR_MAGIC))
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Invalid message signature (%x != %x)",
+ buf.nlh.magic, htobe32(UDEV_MONITOR_MAGIC));
+
+ if (buf.nlh.properties_off+32 > (size_t) buflen)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Invalid message length (%u > %zd)",
+ buf.nlh.properties_off+32, buflen);
+
+ bufpos = buf.nlh.properties_off;
+
+ /* devices received from udev are always initialized */
+ is_initialized = true;
+
+ } else {
+ /* kernel message with header */
+ bufpos = strlen(buf.raw) + 1;
+ if ((size_t) bufpos < sizeof("a@/d") || bufpos >= buflen)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Invalid message length");
+
+ /* check message header */
+ if (!strstr(buf.raw, "@/"))
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Invalid message header");
+ }
+
+ r = device_new_from_nulstr(&device, (uint8_t*) &buf.raw[bufpos], buflen - bufpos);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device-monitor: Failed to create device from received message: %m");
+
+ if (is_initialized)
+ device_set_is_initialized(device);
+
+ /* Skip device, if it does not pass the current filter */
+ r = passes_filter(m, device);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device-monitor: Failed to check received device passing filter: %m");
+ if (r == 0)
+ log_device_debug(device, "sd-device-monitor: Received device does not pass filter, ignoring");
+ else
+ *ret = TAKE_PTR(device);
+
+ return r;
+}
+
+static uint32_t string_hash32(const char *str) {
+ return MurmurHash2(str, strlen(str), 0);
+}
+
+/* Get a bunch of bit numbers out of the hash, and set the bits in our bit field */
+static uint64_t string_bloom64(const char *str) {
+ uint64_t bits = 0;
+ uint32_t hash = string_hash32(str);
+
+ bits |= 1LLU << (hash & 63);
+ bits |= 1LLU << ((hash >> 6) & 63);
+ bits |= 1LLU << ((hash >> 12) & 63);
+ bits |= 1LLU << ((hash >> 18) & 63);
+ return bits;
+}
+
+int device_monitor_send_device(
+ sd_device_monitor *m,
+ sd_device_monitor *destination,
+ sd_device *device) {
+
+ monitor_netlink_header nlh = {
+ .prefix = "libudev",
+ .magic = htobe32(UDEV_MONITOR_MAGIC),
+ .header_size = sizeof nlh,
+ };
+ struct iovec iov[2] = {
+ { .iov_base = &nlh, .iov_len = sizeof nlh },
+ };
+ struct msghdr smsg = {
+ .msg_iov = iov,
+ .msg_iovlen = 2,
+ };
+ /* default destination for sending */
+ union sockaddr_union default_destination = {
+ .nl.nl_family = AF_NETLINK,
+ .nl.nl_groups = MONITOR_GROUP_UDEV,
+ };
+ uint64_t tag_bloom_bits;
+ const char *buf, *val;
+ ssize_t count;
+ size_t blen;
+ int r;
+
+ assert(m);
+ assert(device);
+
+ r = device_get_properties_nulstr(device, (const uint8_t **) &buf, &blen);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device-monitor: Failed to get device properties: %m");
+ if (blen < 32) {
+ log_device_debug(device, "sd-device-monitor: Length of device property nulstr is too small to contain valid device information");
+ return -EINVAL;
+ }
+
+ /* fill in versioned header */
+ r = sd_device_get_subsystem(device, &val);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device-monitor: Failed to get device subsystem: %m");
+ nlh.filter_subsystem_hash = htobe32(string_hash32(val));
+
+ if (sd_device_get_devtype(device, &val) >= 0)
+ nlh.filter_devtype_hash = htobe32(string_hash32(val));
+
+ /* add tag bloom filter */
+ tag_bloom_bits = 0;
+ FOREACH_DEVICE_TAG(device, val)
+ tag_bloom_bits |= string_bloom64(val);
+
+ if (tag_bloom_bits > 0) {
+ nlh.filter_tag_bloom_hi = htobe32(tag_bloom_bits >> 32);
+ nlh.filter_tag_bloom_lo = htobe32(tag_bloom_bits & 0xffffffff);
+ }
+
+ /* add properties list */
+ nlh.properties_off = iov[0].iov_len;
+ nlh.properties_len = blen;
+ iov[1] = IOVEC_MAKE((char*) buf, blen);
+
+ /*
+ * Use custom address for target, or the default one.
+ *
+ * If we send to a multicast group, we will get
+ * ECONNREFUSED, which is expected.
+ */
+ smsg.msg_name = destination ? &destination->snl : &default_destination;
+ smsg.msg_namelen = sizeof(struct sockaddr_nl);
+ count = sendmsg(m->sock, &smsg, 0);
+ if (count < 0) {
+ if (!destination && errno == ECONNREFUSED) {
+ log_device_debug(device, "sd-device-monitor: Passed to netlink monitor");
+ return 0;
+ } else
+ return log_device_debug_errno(device, errno, "sd-device-monitor: Failed to send device to netlink monitor: %m");
+ }
+
+ log_device_debug(device, "sd-device-monitor: Passed %zi byte to netlink monitor", count);
+ return count;
+}
+
+static void bpf_stmt(struct sock_filter *ins, unsigned *i,
+ unsigned short code, unsigned data) {
+ ins[(*i)++] = (struct sock_filter) {
+ .code = code,
+ .k = data,
+ };
+}
+
+static void bpf_jmp(struct sock_filter *ins, unsigned *i,
+ unsigned short code, unsigned data,
+ unsigned short jt, unsigned short jf) {
+ ins[(*i)++] = (struct sock_filter) {
+ .code = code,
+ .jt = jt,
+ .jf = jf,
+ .k = data,
+ };
+}
+
+_public_ int sd_device_monitor_filter_update(sd_device_monitor *m) {
+ struct sock_filter ins[512] = {};
+ struct sock_fprog filter;
+ const char *subsystem, *devtype, *tag;
+ unsigned i = 0;
+ Iterator it;
+
+ assert_return(m, -EINVAL);
+
+ if (m->filter_uptodate)
+ return 0;
+
+ if (hashmap_isempty(m->subsystem_filter) &&
+ set_isempty(m->tag_filter)) {
+ m->filter_uptodate = true;
+ return 0;
+ }
+
+ /* load magic in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, magic));
+ /* jump if magic matches */
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, UDEV_MONITOR_MAGIC, 1, 0);
+ /* wrong magic, pass packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff);
+
+ if (!set_isempty(m->tag_filter)) {
+ int tag_matches = set_size(m->tag_filter);
+
+ /* add all tags matches */
+ SET_FOREACH(tag, m->tag_filter, it) {
+ uint64_t tag_bloom_bits = string_bloom64(tag);
+ uint32_t tag_bloom_hi = tag_bloom_bits >> 32;
+ uint32_t tag_bloom_lo = tag_bloom_bits & 0xffffffff;
+
+ /* load device bloom bits in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_tag_bloom_hi));
+ /* clear bits (tag bits & bloom bits) */
+ bpf_stmt(ins, &i, BPF_ALU|BPF_AND|BPF_K, tag_bloom_hi);
+ /* jump to next tag if it does not match */
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, tag_bloom_hi, 0, 3);
+
+ /* load device bloom bits in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_tag_bloom_lo));
+ /* clear bits (tag bits & bloom bits) */
+ bpf_stmt(ins, &i, BPF_ALU|BPF_AND|BPF_K, tag_bloom_lo);
+ /* jump behind end of tag match block if tag matches */
+ tag_matches--;
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, tag_bloom_lo, 1 + (tag_matches * 6), 0);
+ }
+
+ /* nothing matched, drop packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0);
+ }
+
+ /* add all subsystem matches */
+ if (!hashmap_isempty(m->subsystem_filter)) {
+ HASHMAP_FOREACH_KEY(devtype, subsystem, m->subsystem_filter, it) {
+ uint32_t hash = string_hash32(subsystem);
+
+ /* load device subsystem value in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_subsystem_hash));
+ if (!devtype) {
+ /* jump if subsystem does not match */
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 1);
+ } else {
+ /* jump if subsystem does not match */
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 3);
+ /* load device devtype value in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_devtype_hash));
+ /* jump if value does not match */
+ hash = string_hash32(devtype);
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 1);
+ }
+
+ /* matched, pass packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff);
+
+ if (i+1 >= ELEMENTSOF(ins))
+ return -E2BIG;
+ }
+
+ /* nothing matched, drop packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0);
+ }
+
+ /* matched, pass packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff);
+
+ /* install filter */
+ filter = (struct sock_fprog) {
+ .len = i,
+ .filter = ins,
+ };
+ if (setsockopt(m->sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)) < 0)
+ return -errno;
+
+ m->filter_uptodate = true;
+ return 0;
+}
+
+_public_ int sd_device_monitor_filter_add_match_subsystem_devtype(sd_device_monitor *m, const char *subsystem, const char *devtype) {
+ _cleanup_free_ char *s = NULL, *d = NULL;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(subsystem, -EINVAL);
+
+ s = strdup(subsystem);
+ if (!s)
+ return -ENOMEM;
+
+ if (devtype) {
+ d = strdup(devtype);
+ if (!d)
+ return -ENOMEM;
+ }
+
+ r = hashmap_ensure_allocated(&m->subsystem_filter, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->subsystem_filter, s, d);
+ if (r < 0)
+ return r;
+
+ s = d = NULL;
+ m->filter_uptodate = false;
+
+ return 0;
+}
+
+_public_ int sd_device_monitor_filter_add_match_tag(sd_device_monitor *m, const char *tag) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(tag, -EINVAL);
+
+ t = strdup(tag);
+ if (!t)
+ return -ENOMEM;
+
+ r = set_ensure_allocated(&m->tag_filter, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put(m->tag_filter, t);
+ if (r == -EEXIST)
+ return 0;
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(t);
+ m->filter_uptodate = false;
+
+ return 0;
+}
+
+_public_ int sd_device_monitor_filter_remove(sd_device_monitor *m) {
+ static const struct sock_fprog filter = { 0, NULL };
+
+ assert_return(m, -EINVAL);
+
+ m->subsystem_filter = hashmap_free_free_free(m->subsystem_filter);
+ m->tag_filter = set_free_free(m->tag_filter);
+
+ if (setsockopt(m->sock, SOL_SOCKET, SO_DETACH_FILTER, &filter, sizeof(filter)) < 0)
+ return -errno;
+
+ m->filter_uptodate = true;
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/device-private.c b/src/libsystemd/sd-device/device-private.c
new file mode 100644
index 0000000..76267a1
--- /dev/null
+++ b/src/libsystemd/sd-device/device-private.c
@@ -0,0 +1,956 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <net/if.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-internal.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "refcnt.h"
+#include "set.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int device_add_property(sd_device *device, const char *key, const char *value) {
+ int r;
+
+ assert(device);
+ assert(key);
+
+ r = device_add_property_aux(device, key, value, false);
+ if (r < 0)
+ return r;
+
+ if (key[0] != '.') {
+ r = device_add_property_aux(device, key, value, true);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+void device_set_devlink_priority(sd_device *device, int priority) {
+ assert(device);
+
+ device->devlink_priority = priority;
+}
+
+void device_set_is_initialized(sd_device *device) {
+ assert(device);
+
+ device->is_initialized = true;
+}
+
+int device_ensure_usec_initialized(sd_device *device, sd_device *device_old) {
+ usec_t when;
+
+ assert(device);
+
+ if (device_old && device_old->usec_initialized > 0)
+ when = device_old->usec_initialized;
+ else
+ when = now(CLOCK_MONOTONIC);
+
+ return device_set_usec_initialized(device, when);
+}
+
+uint64_t device_get_properties_generation(sd_device *device) {
+ assert(device);
+
+ return device->properties_generation;
+}
+
+uint64_t device_get_tags_generation(sd_device *device) {
+ assert(device);
+
+ return device->tags_generation;
+}
+
+uint64_t device_get_devlinks_generation(sd_device *device) {
+ assert(device);
+
+ return device->devlinks_generation;
+}
+
+int device_get_devnode_mode(sd_device *device, mode_t *mode) {
+ int r;
+
+ assert(device);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->devmode == (mode_t) -1)
+ return -ENOENT;
+
+ if (mode)
+ *mode = device->devmode;
+
+ return 0;
+}
+
+int device_get_devnode_uid(sd_device *device, uid_t *uid) {
+ int r;
+
+ assert(device);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->devuid == (uid_t) -1)
+ return -ENOENT;
+
+ if (uid)
+ *uid = device->devuid;
+
+ return 0;
+}
+
+static int device_set_devuid(sd_device *device, const char *uid) {
+ unsigned u;
+ int r;
+
+ assert(device);
+ assert(uid);
+
+ r = safe_atou(uid, &u);
+ if (r < 0)
+ return r;
+
+ r = device_add_property_internal(device, "DEVUID", uid);
+ if (r < 0)
+ return r;
+
+ device->devuid = u;
+
+ return 0;
+}
+
+int device_get_devnode_gid(sd_device *device, gid_t *gid) {
+ int r;
+
+ assert(device);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->devgid == (gid_t) -1)
+ return -ENOENT;
+
+ if (gid)
+ *gid = device->devgid;
+
+ return 0;
+}
+
+static int device_set_devgid(sd_device *device, const char *gid) {
+ unsigned g;
+ int r;
+
+ assert(device);
+ assert(gid);
+
+ r = safe_atou(gid, &g);
+ if (r < 0)
+ return r;
+
+ r = device_add_property_internal(device, "DEVGID", gid);
+ if (r < 0)
+ return r;
+
+ device->devgid = g;
+
+ return 0;
+}
+
+static int device_amend(sd_device *device, const char *key, const char *value) {
+ int r;
+
+ assert(device);
+ assert(key);
+ assert(value);
+
+ if (streq(key, "DEVPATH")) {
+ char *path;
+
+ path = strjoina("/sys", value);
+
+ /* the caller must verify or trust this data (e.g., if it comes from the kernel) */
+ r = device_set_syspath(device, path, false);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set syspath to '%s': %m", path);
+ } else if (streq(key, "SUBSYSTEM")) {
+ r = device_set_subsystem(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set subsystem to '%s': %m", value);
+ } else if (streq(key, "DEVTYPE")) {
+ r = device_set_devtype(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devtype to '%s': %m", value);
+ } else if (streq(key, "DEVNAME")) {
+ r = device_set_devname(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devname to '%s': %m", value);
+ } else if (streq(key, "USEC_INITIALIZED")) {
+ usec_t t;
+
+ r = safe_atou64(value, &t);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to parse timestamp '%s': %m", value);
+
+ r = device_set_usec_initialized(device, t);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set usec-initialized to '%s': %m", value);
+ } else if (streq(key, "DRIVER")) {
+ r = device_set_driver(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set driver to '%s': %m", value);
+ } else if (streq(key, "IFINDEX")) {
+ r = device_set_ifindex(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set ifindex to '%s': %m", value);
+ } else if (streq(key, "DEVMODE")) {
+ r = device_set_devmode(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devmode to '%s': %m", value);
+ } else if (streq(key, "DEVUID")) {
+ r = device_set_devuid(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devuid to '%s': %m", value);
+ } else if (streq(key, "DEVGID")) {
+ r = device_set_devgid(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devgid to '%s': %m", value);
+ } else if (streq(key, "DEVLINKS")) {
+ const char *word, *state;
+ size_t l;
+
+ FOREACH_WORD(word, l, value, state) {
+ char devlink[l + 1];
+
+ strncpy(devlink, word, l);
+ devlink[l] = '\0';
+
+ r = device_add_devlink(device, devlink);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to add devlink '%s': %m", devlink);
+ }
+ } else if (streq(key, "TAGS")) {
+ const char *word, *state;
+ size_t l;
+
+ FOREACH_WORD_SEPARATOR(word, l, value, ":", state) {
+ char tag[l + 1];
+
+ (void) strncpy(tag, word, l);
+ tag[l] = '\0';
+
+ r = device_add_tag(device, tag);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to add tag '%s': %m", tag);
+ }
+ } else {
+ r = device_add_property_internal(device, key, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to add property '%s=%s': %m", key, value);
+ }
+
+ return 0;
+}
+
+static const char* const device_action_table[_DEVICE_ACTION_MAX] = {
+ [DEVICE_ACTION_ADD] = "add",
+ [DEVICE_ACTION_REMOVE] = "remove",
+ [DEVICE_ACTION_CHANGE] = "change",
+ [DEVICE_ACTION_MOVE] = "move",
+ [DEVICE_ACTION_ONLINE] = "online",
+ [DEVICE_ACTION_OFFLINE] = "offline",
+ [DEVICE_ACTION_BIND] = "bind",
+ [DEVICE_ACTION_UNBIND] = "unbind",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(device_action, DeviceAction);
+
+static int device_append(sd_device *device, char *key, const char **_major, const char **_minor, uint64_t *_seqnum,
+ DeviceAction *_action) {
+ DeviceAction action = _DEVICE_ACTION_INVALID;
+ uint64_t seqnum = 0;
+ const char *major = NULL, *minor = NULL;
+ char *value;
+ int r;
+
+ assert(device);
+ assert(key);
+ assert(_major);
+ assert(_minor);
+ assert(_seqnum);
+ assert(_action);
+
+ value = strchr(key, '=');
+ if (!value) {
+ log_device_debug(device, "sd-device: Not a key-value pair: '%s'", key);
+ return -EINVAL;
+ }
+
+ *value = '\0';
+
+ value++;
+
+ if (streq(key, "MAJOR"))
+ major = value;
+ else if (streq(key, "MINOR"))
+ minor = value;
+ else {
+ if (streq(key, "ACTION")) {
+ action = device_action_from_string(value);
+ if (action == _DEVICE_ACTION_INVALID)
+ return -EINVAL;
+ } else if (streq(key, "SEQNUM")) {
+ r = safe_atou64(value, &seqnum);
+ if (r < 0)
+ return r;
+ else if (seqnum == 0)
+ /* kernel only sends seqnum > 0 */
+ return -EINVAL;
+ }
+
+ r = device_amend(device, key, value);
+ if (r < 0)
+ return r;
+ }
+
+ if (major != 0)
+ *_major = major;
+
+ if (minor != 0)
+ *_minor = minor;
+
+ if (action != _DEVICE_ACTION_INVALID)
+ *_action = action;
+
+ if (seqnum > 0)
+ *_seqnum = seqnum;
+
+ return 0;
+}
+
+void device_seal(sd_device *device) {
+ assert(device);
+
+ device->sealed = true;
+}
+
+static int device_verify(sd_device *device, DeviceAction action, uint64_t seqnum) {
+ assert(device);
+
+ if (!device->devpath || !device->subsystem || action == _DEVICE_ACTION_INVALID || seqnum == 0) {
+ log_device_debug(device, "sd-device: Device created from strv or nulstr lacks devpath, subsystem, action or seqnum.");
+ return -EINVAL;
+ }
+
+ device->sealed = true;
+
+ return 0;
+}
+
+int device_new_from_strv(sd_device **ret, char **strv) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ char **key;
+ const char *major = NULL, *minor = NULL;
+ DeviceAction action = _DEVICE_ACTION_INVALID;
+ uint64_t seqnum = 0;
+ int r;
+
+ assert(ret);
+ assert(strv);
+
+ r = device_new_aux(&device);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(key, strv) {
+ r = device_append(device, *key, &major, &minor, &seqnum, &action);
+ if (r < 0)
+ return r;
+ }
+
+ if (major) {
+ r = device_set_devnum(device, major, minor);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devnum %s:%s: %m", major, minor);
+ }
+
+ r = device_verify(device, action, seqnum);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(device);
+
+ return 0;
+}
+
+int device_new_from_nulstr(sd_device **ret, uint8_t *nulstr, size_t len) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ const char *major = NULL, *minor = NULL;
+ DeviceAction action = _DEVICE_ACTION_INVALID;
+ uint64_t seqnum = 0;
+ unsigned i = 0;
+ int r;
+
+ assert(ret);
+ assert(nulstr);
+ assert(len);
+
+ r = device_new_aux(&device);
+ if (r < 0)
+ return r;
+
+ while (i < len) {
+ char *key;
+ const char *end;
+
+ key = (char*)&nulstr[i];
+ end = memchr(key, '\0', len - i);
+ if (!end) {
+ log_device_debug(device, "sd-device: Failed to parse nulstr");
+ return -EINVAL;
+ }
+ i += end - key + 1;
+
+ r = device_append(device, key, &major, &minor, &seqnum, &action);
+ if (r < 0)
+ return r;
+ }
+
+ if (major) {
+ r = device_set_devnum(device, major, minor);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devnum %s:%s: %m", major, minor);
+ }
+
+ r = device_verify(device, action, seqnum);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(device);
+
+ return 0;
+}
+
+static int device_update_properties_bufs(sd_device *device) {
+ const char *val, *prop;
+ _cleanup_free_ char **buf_strv = NULL;
+ _cleanup_free_ uint8_t *buf_nulstr = NULL;
+ size_t allocated_nulstr = 0;
+ size_t nulstr_len = 0, num = 0, i = 0;
+
+ assert(device);
+
+ if (!device->properties_buf_outdated)
+ return 0;
+
+ FOREACH_DEVICE_PROPERTY(device, prop, val) {
+ size_t len = 0;
+
+ len = strlen(prop) + 1 + strlen(val);
+
+ buf_nulstr = GREEDY_REALLOC0(buf_nulstr, allocated_nulstr, nulstr_len + len + 2);
+ if (!buf_nulstr)
+ return -ENOMEM;
+
+ strscpyl((char *)buf_nulstr + nulstr_len, len + 1, prop, "=", val, NULL);
+ nulstr_len += len + 1;
+ ++num;
+ }
+
+ /* build buf_strv from buf_nulstr */
+ buf_strv = new0(char *, num + 1);
+ if (!buf_strv)
+ return -ENOMEM;
+
+ NULSTR_FOREACH(val, (char*) buf_nulstr) {
+ buf_strv[i] = (char *) val;
+ assert(i < num);
+ i++;
+ }
+
+ free_and_replace(device->properties_nulstr, buf_nulstr);
+ device->properties_nulstr_len = nulstr_len;
+ free_and_replace(device->properties_strv, buf_strv);
+
+ device->properties_buf_outdated = false;
+
+ return 0;
+}
+
+int device_get_properties_nulstr(sd_device *device, const uint8_t **nulstr, size_t *len) {
+ int r;
+
+ assert(device);
+ assert(nulstr);
+ assert(len);
+
+ r = device_update_properties_bufs(device);
+ if (r < 0)
+ return r;
+
+ *nulstr = device->properties_nulstr;
+ *len = device->properties_nulstr_len;
+
+ return 0;
+}
+
+int device_get_properties_strv(sd_device *device, char ***strv) {
+ int r;
+
+ assert(device);
+ assert(strv);
+
+ r = device_update_properties_bufs(device);
+ if (r < 0)
+ return r;
+
+ *strv = device->properties_strv;
+
+ return 0;
+}
+
+int device_get_devlink_priority(sd_device *device, int *priority) {
+ int r;
+
+ assert(device);
+ assert(priority);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ *priority = device->devlink_priority;
+
+ return 0;
+}
+
+int device_get_watch_handle(sd_device *device, int *handle) {
+ int r;
+
+ assert(device);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->watch_handle < 0)
+ return -ENOENT;
+
+ if (handle)
+ *handle = device->watch_handle;
+
+ return 0;
+}
+
+void device_set_watch_handle(sd_device *device, int handle) {
+ assert(device);
+
+ device->watch_handle = handle;
+}
+
+int device_rename(sd_device *device, const char *name) {
+ _cleanup_free_ char *dirname = NULL;
+ char *new_syspath;
+ const char *interface;
+ int r;
+
+ assert(device);
+ assert(name);
+
+ dirname = dirname_malloc(device->syspath);
+ if (!dirname)
+ return -ENOMEM;
+
+ new_syspath = strjoina(dirname, "/", name);
+
+ /* the user must trust that the new name is correct */
+ r = device_set_syspath(device, new_syspath, false);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_property_value(device, "INTERFACE", &interface);
+ if (r >= 0) {
+ /* like DEVPATH_OLD, INTERFACE_OLD is not saved to the db, but only stays around for the current event */
+ r = device_add_property_internal(device, "INTERFACE_OLD", interface);
+ if (r < 0)
+ return r;
+
+ r = device_add_property_internal(device, "INTERFACE", name);
+ if (r < 0)
+ return r;
+ } else if (r != -ENOENT)
+ return r;
+
+ return 0;
+}
+
+int device_shallow_clone(sd_device *old_device, sd_device **new_device) {
+ _cleanup_(sd_device_unrefp) sd_device *ret = NULL;
+ int r;
+
+ assert(old_device);
+ assert(new_device);
+
+ r = device_new_aux(&ret);
+ if (r < 0)
+ return r;
+
+ r = device_set_syspath(ret, old_device->syspath, false);
+ if (r < 0)
+ return r;
+
+ r = device_set_subsystem(ret, old_device->subsystem);
+ if (r < 0)
+ return r;
+
+ ret->devnum = old_device->devnum;
+
+ *new_device = TAKE_PTR(ret);
+
+ return 0;
+}
+
+int device_clone_with_db(sd_device *old_device, sd_device **new_device) {
+ _cleanup_(sd_device_unrefp) sd_device *ret = NULL;
+ int r;
+
+ assert(old_device);
+ assert(new_device);
+
+ r = device_shallow_clone(old_device, &ret);
+ if (r < 0)
+ return r;
+
+ r = device_read_db(ret);
+ if (r < 0)
+ return r;
+
+ ret->sealed = true;
+
+ *new_device = TAKE_PTR(ret);
+
+ return 0;
+}
+
+int device_new_from_synthetic_event(sd_device **new_device, const char *syspath, const char *action) {
+ _cleanup_(sd_device_unrefp) sd_device *ret = NULL;
+ int r;
+
+ assert(new_device);
+ assert(syspath);
+ assert(action);
+
+ r = sd_device_new_from_syspath(&ret, syspath);
+ if (r < 0)
+ return r;
+
+ r = device_read_uevent_file(ret);
+ if (r < 0)
+ return r;
+
+ r = device_add_property_internal(ret, "ACTION", action);
+ if (r < 0)
+ return r;
+
+ *new_device = TAKE_PTR(ret);
+
+ return 0;
+}
+
+int device_new_from_stat_rdev(sd_device **ret, const struct stat *st) {
+ char type;
+
+ assert(ret);
+ assert(st);
+
+ if (S_ISBLK(st->st_mode))
+ type = 'b';
+ else if (S_ISCHR(st->st_mode))
+ type = 'c';
+ else
+ return -ENOTTY;
+
+ return sd_device_new_from_devnum(ret, type, st->st_rdev);
+}
+
+int device_copy_properties(sd_device *device_dst, sd_device *device_src) {
+ const char *property, *value;
+ Iterator i;
+ int r;
+
+ assert(device_dst);
+ assert(device_src);
+
+ r = device_properties_prepare(device_src);
+ if (r < 0)
+ return r;
+
+ ORDERED_HASHMAP_FOREACH_KEY(value, property, device_src->properties_db, i) {
+ r = device_add_property_aux(device_dst, property, value, true);
+ if (r < 0)
+ return r;
+ }
+
+ ORDERED_HASHMAP_FOREACH_KEY(value, property, device_src->properties, i) {
+ r = device_add_property_aux(device_dst, property, value, false);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+void device_cleanup_tags(sd_device *device) {
+ assert(device);
+
+ set_free_free(device->tags);
+ device->tags = NULL;
+ device->property_tags_outdated = true;
+ device->tags_generation++;
+}
+
+void device_cleanup_devlinks(sd_device *device) {
+ assert(device);
+
+ set_free_free(device->devlinks);
+ device->devlinks = NULL;
+ device->property_devlinks_outdated = true;
+ device->devlinks_generation++;
+}
+
+void device_remove_tag(sd_device *device, const char *tag) {
+ assert(device);
+ assert(tag);
+
+ free(set_remove(device->tags, tag));
+ device->property_tags_outdated = true;
+ device->tags_generation++;
+}
+
+static int device_tag(sd_device *device, const char *tag, bool add) {
+ const char *id;
+ char *path;
+ int r;
+
+ assert(device);
+ assert(tag);
+
+ r = device_get_id_filename(device, &id);
+ if (r < 0)
+ return r;
+
+ path = strjoina("/run/udev/tags/", tag, "/", id);
+
+ if (add) {
+ r = touch_file(path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, 0444);
+ if (r < 0)
+ return r;
+ } else {
+ r = unlink(path);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int device_tag_index(sd_device *device, sd_device *device_old, bool add) {
+ const char *tag;
+ int r = 0, k;
+
+ if (add && device_old) {
+ /* delete possible left-over tags */
+ FOREACH_DEVICE_TAG(device_old, tag) {
+ if (!sd_device_has_tag(device, tag)) {
+ k = device_tag(device_old, tag, false);
+ if (r >= 0 && k < 0)
+ r = k;
+ }
+ }
+ }
+
+ FOREACH_DEVICE_TAG(device, tag) {
+ k = device_tag(device, tag, add);
+ if (r >= 0 && k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static bool device_has_info(sd_device *device) {
+ assert(device);
+
+ if (!set_isempty(device->devlinks))
+ return true;
+
+ if (device->devlink_priority != 0)
+ return true;
+
+ if (!ordered_hashmap_isempty(device->properties_db))
+ return true;
+
+ if (!set_isempty(device->tags))
+ return true;
+
+ if (device->watch_handle >= 0)
+ return true;
+
+ return false;
+}
+
+void device_set_db_persist(sd_device *device) {
+ assert(device);
+
+ device->db_persist = true;
+}
+
+int device_update_db(sd_device *device) {
+ const char *id;
+ char *path;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *path_tmp = NULL;
+ bool has_info;
+ int r;
+
+ assert(device);
+
+ has_info = device_has_info(device);
+
+ r = device_get_id_filename(device, &id);
+ if (r < 0)
+ return r;
+
+ path = strjoina("/run/udev/data/", id);
+
+ /* do not store anything for otherwise empty devices */
+ if (!has_info && major(device->devnum) == 0 && device->ifindex == 0) {
+ r = unlink(path);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ return 0;
+ }
+
+ /* write a database file */
+ r = mkdir_parents(path, 0755);
+ if (r < 0)
+ return r;
+
+ r = fopen_temporary(path, &f, &path_tmp);
+ if (r < 0)
+ return r;
+
+ /*
+ * set 'sticky' bit to indicate that we should not clean the
+ * database when we transition from initramfs to the real root
+ */
+ if (device->db_persist) {
+ r = fchmod(fileno(f), 01644);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+ } else {
+ r = fchmod(fileno(f), 0644);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+ }
+
+ if (has_info) {
+ const char *property, *value, *tag;
+ Iterator i;
+
+ if (major(device->devnum) > 0) {
+ const char *devlink;
+
+ FOREACH_DEVICE_DEVLINK(device, devlink)
+ fprintf(f, "S:%s\n", devlink + STRLEN("/dev/"));
+
+ if (device->devlink_priority != 0)
+ fprintf(f, "L:%i\n", device->devlink_priority);
+
+ if (device->watch_handle >= 0)
+ fprintf(f, "W:%i\n", device->watch_handle);
+ }
+
+ if (device->usec_initialized > 0)
+ fprintf(f, "I:"USEC_FMT"\n", device->usec_initialized);
+
+ ORDERED_HASHMAP_FOREACH_KEY(value, property, device->properties_db, i)
+ fprintf(f, "E:%s=%s\n", property, value);
+
+ FOREACH_DEVICE_TAG(device, tag)
+ fprintf(f, "G:%s\n", tag);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ r = rename(path_tmp, path);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ log_device_debug(device, "sd-device: Created %s file '%s' for '%s'", has_info ? "db" : "empty",
+ path, device->devpath);
+
+ return 0;
+
+fail:
+ (void) unlink(path);
+ (void) unlink(path_tmp);
+
+ return log_device_debug_errno(device, r, "sd-device: Failed to create %s file '%s' for '%s'", has_info ? "db" : "empty", path, device->devpath);
+}
+
+int device_delete_db(sd_device *device) {
+ const char *id;
+ char *path;
+ int r;
+
+ assert(device);
+
+ r = device_get_id_filename(device, &id);
+ if (r < 0)
+ return r;
+
+ path = strjoina("/run/udev/data/", id);
+
+ r = unlink(path);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/device-private.h b/src/libsystemd/sd-device/device-private.h
new file mode 100644
index 0000000..062bfd6
--- /dev/null
+++ b/src/libsystemd/sd-device/device-private.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+int device_new_from_nulstr(sd_device **ret, uint8_t *nulstr, size_t len);
+int device_new_from_strv(sd_device **ret, char **strv);
+int device_new_from_stat_rdev(sd_device **ret, const struct stat *st);
+
+int device_get_id_filename(sd_device *device, const char **ret);
+
+int device_get_devlink_priority(sd_device *device, int *priority);
+int device_get_watch_handle(sd_device *device, int *handle);
+int device_get_devnode_mode(sd_device *device, mode_t *mode);
+int device_get_devnode_uid(sd_device *device, uid_t *uid);
+int device_get_devnode_gid(sd_device *device, gid_t *gid);
+
+void device_seal(sd_device *device);
+void device_set_is_initialized(sd_device *device);
+void device_set_watch_handle(sd_device *device, int fd);
+void device_set_db_persist(sd_device *device);
+void device_set_devlink_priority(sd_device *device, int priority);
+int device_ensure_usec_initialized(sd_device *device, sd_device *device_old);
+int device_add_devlink(sd_device *device, const char *devlink);
+int device_add_property(sd_device *device, const char *property, const char *value);
+int device_add_tag(sd_device *device, const char *tag);
+void device_remove_tag(sd_device *device, const char *tag);
+void device_cleanup_tags(sd_device *device);
+void device_cleanup_devlinks(sd_device *device);
+
+uint64_t device_get_properties_generation(sd_device *device);
+uint64_t device_get_tags_generation(sd_device *device);
+uint64_t device_get_devlinks_generation(sd_device *device);
+
+int device_properties_prepare(sd_device *device);
+int device_get_properties_nulstr(sd_device *device, const uint8_t **nulstr, size_t *len);
+int device_get_properties_strv(sd_device *device, char ***strv);
+
+int device_rename(sd_device *device, const char *name);
+int device_shallow_clone(sd_device *old_device, sd_device **new_device);
+int device_clone_with_db(sd_device *old_device, sd_device **new_device);
+int device_copy_properties(sd_device *device_dst, sd_device *device_src);
+int device_new_from_synthetic_event(sd_device **new_device, const char *syspath, const char *action);
+
+int device_tag_index(sd_device *dev, sd_device *dev_old, bool add);
+int device_update_db(sd_device *device);
+int device_delete_db(sd_device *device);
+int device_read_db_internal(sd_device *device, bool force);
+static inline int device_read_db(sd_device *device) {
+ return device_read_db_internal(device, false);
+}
diff --git a/src/libsystemd/sd-device/device-util.h b/src/libsystemd/sd-device/device-util.h
new file mode 100644
index 0000000..94f6174
--- /dev/null
+++ b/src/libsystemd/sd-device/device-util.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#define FOREACH_DEVICE_PROPERTY(device, key, value) \
+ for (key = sd_device_get_property_first(device, &(value)); \
+ key; \
+ key = sd_device_get_property_next(device, &(value)))
+
+#define FOREACH_DEVICE_TAG(device, tag) \
+ for (tag = sd_device_get_tag_first(device); \
+ tag; \
+ tag = sd_device_get_tag_next(device))
+
+#define FOREACH_DEVICE_SYSATTR(device, attr) \
+ for (attr = sd_device_get_sysattr_first(device); \
+ attr; \
+ attr = sd_device_get_sysattr_next(device))
+
+#define FOREACH_DEVICE_DEVLINK(device, devlink) \
+ for (devlink = sd_device_get_devlink_first(device); \
+ devlink; \
+ devlink = sd_device_get_devlink_next(device))
+
+#define FOREACH_DEVICE(enumerator, device) \
+ for (device = sd_device_enumerator_get_device_first(enumerator); \
+ device; \
+ device = sd_device_enumerator_get_device_next(enumerator))
+
+#define FOREACH_SUBSYSTEM(enumerator, device) \
+ for (device = sd_device_enumerator_get_subsystem_first(enumerator); \
+ device; \
+ device = sd_device_enumerator_get_subsystem_next(enumerator))
+
+#define log_device_full(device, level, error, ...) \
+ ({ \
+ const char *_sysname = NULL; \
+ sd_device *_d = (device); \
+ int _level = (level), _error = (error); \
+ \
+ if (_d && _unlikely_(log_get_max_level() >= _level)) \
+ (void) sd_device_get_sysname(_d, &_sysname); \
+ log_object_internal(_level, _error, __FILE__, __LINE__, __func__, \
+ _sysname ? "DEVICE=" : NULL, _sysname, \
+ NULL, NULL, ##__VA_ARGS__); \
+ })
+
+#define log_device_debug(device, ...) log_device_full(device, LOG_DEBUG, 0, ##__VA_ARGS__)
+#define log_device_info(device, ...) log_device_full(device, LOG_INFO, 0, ##__VA_ARGS__)
+#define log_device_notice(device, ...) log_device_full(device, LOG_NOTICE, 0, ##__VA_ARGS__)
+#define log_device_warning(device, ...) log_device_full(device, LOG_WARNING, 0, ##__VA_ARGS__)
+#define log_device_error(device, ...) log_device_full(device, LOG_ERR, 0, ##__VA_ARGS__)
+
+#define log_device_debug_errno(device, error, ...) log_device_full(device, LOG_DEBUG, error, ##__VA_ARGS__)
+#define log_device_info_errno(device, error, ...) log_device_full(device, LOG_INFO, error, ##__VA_ARGS__)
+#define log_device_notice_errno(device, error, ...) log_device_full(device, LOG_NOTICE, error, ##__VA_ARGS__)
+#define log_device_warning_errno(device, error, ...) log_device_full(device, LOG_WARNING, error, ##__VA_ARGS__)
+#define log_device_error_errno(device, error, ...) log_device_full(device, LOG_ERR, error, ##__VA_ARGS__)
diff --git a/src/libsystemd/sd-device/sd-device.c b/src/libsystemd/sd-device/sd-device.c
new file mode 100644
index 0000000..2a69f2e
--- /dev/null
+++ b/src/libsystemd/sd-device/sd-device.c
@@ -0,0 +1,1864 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-internal.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "util.h"
+
+int device_new_aux(sd_device **ret) {
+ sd_device *device;
+
+ assert(ret);
+
+ device = new(sd_device, 1);
+ if (!device)
+ return -ENOMEM;
+
+ *device = (sd_device) {
+ .n_ref = 1,
+ .watch_handle = -1,
+ .devmode = (mode_t) -1,
+ .devuid = (uid_t) -1,
+ .devgid = (gid_t) -1,
+ };
+
+ *ret = device;
+ return 0;
+}
+
+static sd_device *device_free(sd_device *device) {
+ assert(device);
+
+ sd_device_unref(device->parent);
+ free(device->syspath);
+ free(device->sysname);
+ free(device->devtype);
+ free(device->devname);
+ free(device->subsystem);
+ free(device->driver_subsystem);
+ free(device->driver);
+ free(device->id_filename);
+ free(device->properties_strv);
+ free(device->properties_nulstr);
+
+ ordered_hashmap_free_free_free(device->properties);
+ ordered_hashmap_free_free_free(device->properties_db);
+ hashmap_free_free_free(device->sysattr_values);
+ set_free_free(device->sysattrs);
+ set_free_free(device->tags);
+ set_free_free(device->devlinks);
+
+ return mfree(device);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device, sd_device, device_free);
+
+int device_add_property_aux(sd_device *device, const char *_key, const char *_value, bool db) {
+ OrderedHashmap **properties;
+
+ assert(device);
+ assert(_key);
+
+ if (db)
+ properties = &device->properties_db;
+ else
+ properties = &device->properties;
+
+ if (_value) {
+ _cleanup_free_ char *key = NULL, *value = NULL, *old_key = NULL, *old_value = NULL;
+ int r;
+
+ r = ordered_hashmap_ensure_allocated(properties, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ key = strdup(_key);
+ if (!key)
+ return -ENOMEM;
+
+ value = strdup(_value);
+ if (!value)
+ return -ENOMEM;
+
+ old_value = ordered_hashmap_get2(*properties, key, (void**) &old_key);
+
+ r = ordered_hashmap_replace(*properties, key, value);
+ if (r < 0)
+ return r;
+
+ key = NULL;
+ value = NULL;
+ } else {
+ _cleanup_free_ char *key = NULL;
+ _cleanup_free_ char *value = NULL;
+
+ value = ordered_hashmap_remove2(*properties, _key, (void**) &key);
+ }
+
+ if (!db) {
+ device->properties_generation++;
+ device->properties_buf_outdated = true;
+ }
+
+ return 0;
+}
+
+int device_add_property_internal(sd_device *device, const char *key, const char *value) {
+ return device_add_property_aux(device, key, value, false);
+}
+
+int device_set_syspath(sd_device *device, const char *_syspath, bool verify) {
+ _cleanup_free_ char *syspath = NULL;
+ const char *devpath;
+ int r;
+
+ assert(device);
+ assert(_syspath);
+
+ /* must be a subdirectory of /sys */
+ if (!path_startswith(_syspath, "/sys/"))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "sd-device: Syspath '%s' is not a subdirectory of /sys",
+ _syspath);
+
+ if (verify) {
+ r = chase_symlinks(_syspath, NULL, 0, &syspath);
+ if (r == -ENOENT)
+ return -ENODEV; /* the device does not exist (any more?) */
+ if (r < 0)
+ return log_debug_errno(r, "sd-device: Failed to get target of '%s': %m", _syspath);
+
+ if (!path_startswith(syspath, "/sys")) {
+ _cleanup_free_ char *real_sys = NULL, *new_syspath = NULL;
+ char *p;
+
+ /* /sys is a symlink to somewhere sysfs is mounted on? In that case, we convert the path to real sysfs to "/sys". */
+ r = chase_symlinks("/sys", NULL, 0, &real_sys);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device: Failed to chase symlink /sys: %m");
+
+ p = path_startswith(syspath, real_sys);
+ if (!p)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODEV),
+ "sd-device: Canonicalized path '%s' does not starts with sysfs mount point '%s'",
+ syspath, real_sys);
+
+ new_syspath = strjoin("/sys/", p);
+ if (!new_syspath)
+ return -ENOMEM;
+
+ free_and_replace(syspath, new_syspath);
+ path_simplify(syspath, false);
+ }
+
+ if (path_startswith(syspath, "/sys/devices/")) {
+ char *path;
+
+ /* all 'devices' require an 'uevent' file */
+ path = strjoina(syspath, "/uevent");
+ r = access(path, F_OK);
+ if (r < 0) {
+ if (errno == ENOENT)
+ /* this is not a valid device */
+ return -ENODEV;
+
+ return log_debug_errno(errno, "sd-device: %s does not have an uevent file: %m", syspath);
+ }
+ } else {
+ /* everything else just needs to be a directory */
+ if (!is_dir(syspath, false))
+ return -ENODEV;
+ }
+ } else {
+ syspath = strdup(_syspath);
+ if (!syspath)
+ return -ENOMEM;
+ }
+
+ devpath = syspath + STRLEN("/sys");
+
+ r = device_add_property_internal(device, "DEVPATH", devpath);
+ if (r < 0)
+ return r;
+
+ free_and_replace(device->syspath, syspath);
+ device->devpath = devpath;
+ return 0;
+}
+
+_public_ int sd_device_new_from_syspath(sd_device **ret, const char *syspath) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(syspath, -EINVAL);
+
+ r = device_new_aux(&device);
+ if (r < 0)
+ return r;
+
+ r = device_set_syspath(device, syspath, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(device);
+ return 0;
+}
+
+_public_ int sd_device_new_from_devnum(sd_device **ret, char type, dev_t devnum) {
+ char *syspath;
+ char id[DECIMAL_STR_MAX(unsigned) * 2 + 1];
+
+ assert_return(ret, -EINVAL);
+ assert_return(IN_SET(type, 'b', 'c'), -EINVAL);
+
+ /* use /sys/dev/{block,char}/<maj>:<min> link */
+ snprintf(id, sizeof(id), "%u:%u", major(devnum), minor(devnum));
+
+ syspath = strjoina("/sys/dev/", (type == 'b' ? "block" : "char"), "/", id);
+
+ return sd_device_new_from_syspath(ret, syspath);
+}
+
+_public_ int sd_device_new_from_subsystem_sysname(sd_device **ret, const char *subsystem, const char *sysname) {
+ char *name, *syspath;
+ size_t len = 0;
+
+ assert_return(ret, -EINVAL);
+ assert_return(subsystem, -EINVAL);
+ assert_return(sysname, -EINVAL);
+
+ if (streq(subsystem, "subsystem")) {
+ syspath = strjoina("/sys/subsystem/", sysname);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ syspath = strjoina("/sys/bus/", sysname);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ syspath = strjoina("/sys/class/", sysname);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+ } else if (streq(subsystem, "module")) {
+ syspath = strjoina("/sys/module/", sysname);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+ } else if (streq(subsystem, "drivers")) {
+ char subsys[PATH_MAX];
+ char *driver;
+
+ strscpy(subsys, sizeof(subsys), sysname);
+ driver = strchr(subsys, ':');
+ if (driver) {
+ driver[0] = '\0';
+ driver++;
+
+ syspath = strjoina("/sys/subsystem/", subsys, "/drivers/", driver);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ syspath = strjoina("/sys/bus/", subsys, "/drivers/", driver);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+ }
+ }
+
+ /* translate sysname back to sysfs filename */
+ name = strdupa(sysname);
+ while (name[len] != '\0') {
+ if (name[len] == '/')
+ name[len] = '!';
+
+ len++;
+ }
+
+ syspath = strjoina("/sys/subsystem/", subsystem, "/devices/", name);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ syspath = strjoina("/sys/bus/", subsystem, "/devices/", name);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ syspath = strjoina("/sys/class/", subsystem, "/", name);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ syspath = strjoina("/sys/firmware/", subsystem, "/", sysname);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ return -ENODEV;
+}
+
+int device_set_devtype(sd_device *device, const char *_devtype) {
+ _cleanup_free_ char *devtype = NULL;
+ int r;
+
+ assert(device);
+ assert(_devtype);
+
+ devtype = strdup(_devtype);
+ if (!devtype)
+ return -ENOMEM;
+
+ r = device_add_property_internal(device, "DEVTYPE", devtype);
+ if (r < 0)
+ return r;
+
+ free_and_replace(device->devtype, devtype);
+
+ return 0;
+}
+
+int device_set_ifindex(sd_device *device, const char *_ifindex) {
+ int ifindex, r;
+
+ assert(device);
+ assert(_ifindex);
+
+ r = parse_ifindex(_ifindex, &ifindex);
+ if (r < 0)
+ return r;
+
+ r = device_add_property_internal(device, "IFINDEX", _ifindex);
+ if (r < 0)
+ return r;
+
+ device->ifindex = ifindex;
+
+ return 0;
+}
+
+int device_set_devname(sd_device *device, const char *_devname) {
+ _cleanup_free_ char *devname = NULL;
+ int r;
+
+ assert(device);
+ assert(_devname);
+
+ if (_devname[0] != '/') {
+ r = asprintf(&devname, "/dev/%s", _devname);
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ devname = strdup(_devname);
+ if (!devname)
+ return -ENOMEM;
+ }
+
+ r = device_add_property_internal(device, "DEVNAME", devname);
+ if (r < 0)
+ return r;
+
+ free_and_replace(device->devname, devname);
+
+ return 0;
+}
+
+int device_set_devmode(sd_device *device, const char *_devmode) {
+ unsigned devmode;
+ int r;
+
+ assert(device);
+ assert(_devmode);
+
+ r = safe_atou(_devmode, &devmode);
+ if (r < 0)
+ return r;
+
+ if (devmode > 07777)
+ return -EINVAL;
+
+ r = device_add_property_internal(device, "DEVMODE", _devmode);
+ if (r < 0)
+ return r;
+
+ device->devmode = devmode;
+
+ return 0;
+}
+
+int device_set_devnum(sd_device *device, const char *major, const char *minor) {
+ unsigned maj = 0, min = 0;
+ int r;
+
+ assert(device);
+ assert(major);
+
+ r = safe_atou(major, &maj);
+ if (r < 0)
+ return r;
+ if (!maj)
+ return 0;
+
+ if (minor) {
+ r = safe_atou(minor, &min);
+ if (r < 0)
+ return r;
+ }
+
+ r = device_add_property_internal(device, "MAJOR", major);
+ if (r < 0)
+ return r;
+
+ if (minor) {
+ r = device_add_property_internal(device, "MINOR", minor);
+ if (r < 0)
+ return r;
+ }
+
+ device->devnum = makedev(maj, min);
+
+ return 0;
+}
+
+static int handle_uevent_line(sd_device *device, const char *key, const char *value, const char **major, const char **minor) {
+ int r;
+
+ assert(device);
+ assert(key);
+ assert(value);
+ assert(major);
+ assert(minor);
+
+ if (streq(key, "DEVTYPE")) {
+ r = device_set_devtype(device, value);
+ if (r < 0)
+ return r;
+ } else if (streq(key, "IFINDEX")) {
+ r = device_set_ifindex(device, value);
+ if (r < 0)
+ return r;
+ } else if (streq(key, "DEVNAME")) {
+ r = device_set_devname(device, value);
+ if (r < 0)
+ return r;
+ } else if (streq(key, "DEVMODE")) {
+ r = device_set_devmode(device, value);
+ if (r < 0)
+ return r;
+ } else if (streq(key, "MAJOR"))
+ *major = value;
+ else if (streq(key, "MINOR"))
+ *minor = value;
+ else {
+ r = device_add_property_internal(device, key, value);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int device_read_uevent_file(sd_device *device) {
+ _cleanup_free_ char *uevent = NULL;
+ const char *syspath, *key = NULL, *value = NULL, *major = NULL, *minor = NULL;
+ char *path;
+ size_t uevent_len;
+ unsigned i;
+ int r;
+
+ enum {
+ PRE_KEY,
+ KEY,
+ PRE_VALUE,
+ VALUE,
+ INVALID_LINE,
+ } state = PRE_KEY;
+
+ assert(device);
+
+ if (device->uevent_loaded || device->sealed)
+ return 0;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ path = strjoina(syspath, "/uevent");
+
+ r = read_full_file(path, &uevent, &uevent_len);
+ if (r == -EACCES) {
+ /* empty uevent files may be write-only */
+ device->uevent_loaded = true;
+ return 0;
+ }
+ if (r == -ENOENT)
+ /* some devices may not have uevent files, see set_syspath() */
+ return 0;
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to read uevent file '%s': %m", path);
+
+ device->uevent_loaded = true;
+
+ for (i = 0; i < uevent_len; i++)
+ switch (state) {
+ case PRE_KEY:
+ if (!strchr(NEWLINE, uevent[i])) {
+ key = &uevent[i];
+
+ state = KEY;
+ }
+
+ break;
+ case KEY:
+ if (uevent[i] == '=') {
+ uevent[i] = '\0';
+
+ state = PRE_VALUE;
+ } else if (strchr(NEWLINE, uevent[i])) {
+ uevent[i] = '\0';
+ log_device_debug(device, "sd-device: Invalid uevent line '%s', ignoring", key);
+
+ state = PRE_KEY;
+ }
+
+ break;
+ case PRE_VALUE:
+ value = &uevent[i];
+ state = VALUE;
+
+ _fallthrough_; /* to handle empty property */
+ case VALUE:
+ if (strchr(NEWLINE, uevent[i])) {
+ uevent[i] = '\0';
+
+ r = handle_uevent_line(device, key, value, &major, &minor);
+ if (r < 0)
+ log_device_debug_errno(device, r, "sd-device: Failed to handle uevent entry '%s=%s', ignoring: %m", key, value);
+
+ state = PRE_KEY;
+ }
+
+ break;
+ default:
+ assert_not_reached("Invalid state when parsing uevent file");
+ }
+
+ if (major) {
+ r = device_set_devnum(device, major, minor);
+ if (r < 0)
+ log_device_debug_errno(device, r, "sd-device: Failed to set 'MAJOR=%s' or 'MINOR=%s' from '%s', ignoring: %m", major, minor, path);
+ }
+
+ return 0;
+}
+
+_public_ int sd_device_get_ifindex(sd_device *device, int *ifindex) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (device->ifindex <= 0)
+ return -ENOENT;
+
+ if (ifindex)
+ *ifindex = device->ifindex;
+
+ return 0;
+}
+
+_public_ int sd_device_new_from_device_id(sd_device **ret, const char *id) {
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(id, -EINVAL);
+
+ switch (id[0]) {
+ case 'b':
+ case 'c': {
+ dev_t devt;
+
+ if (isempty(id))
+ return -EINVAL;
+
+ r = parse_dev(id + 1, &devt);
+ if (r < 0)
+ return r;
+
+ return sd_device_new_from_devnum(ret, id[0], devt);
+ }
+
+ case 'n': {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ _cleanup_close_ int sk = -1;
+ struct ifreq ifr = {};
+ int ifindex;
+
+ r = parse_ifindex(&id[1], &ifr.ifr_ifindex);
+ if (r < 0)
+ return r;
+
+ sk = socket_ioctl_fd();
+ if (sk < 0)
+ return sk;
+
+ r = ioctl(sk, SIOCGIFNAME, &ifr);
+ if (r < 0)
+ return -errno;
+
+ r = sd_device_new_from_subsystem_sysname(&device, "net", ifr.ifr_name);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_ifindex(device, &ifindex);
+ if (r < 0)
+ return r;
+
+ /* this is racey, so we might end up with the wrong device */
+ if (ifr.ifr_ifindex != ifindex)
+ return -ENODEV;
+
+ *ret = TAKE_PTR(device);
+ return 0;
+ }
+
+ case '+': {
+ char subsys[PATH_MAX];
+ char *sysname;
+
+ (void) strscpy(subsys, sizeof(subsys), id + 1);
+ sysname = strchr(subsys, ':');
+ if (!sysname)
+ return -EINVAL;
+
+ sysname[0] = '\0';
+ sysname++;
+
+ return sd_device_new_from_subsystem_sysname(ret, subsys, sysname);
+ }
+
+ default:
+ return -EINVAL;
+ }
+}
+
+_public_ int sd_device_get_syspath(sd_device *device, const char **ret) {
+ assert_return(device, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ assert(path_startswith(device->syspath, "/sys/"));
+
+ *ret = device->syspath;
+
+ return 0;
+}
+
+static int device_new_from_child(sd_device **ret, sd_device *child) {
+ _cleanup_free_ char *path = NULL;
+ const char *subdir, *syspath;
+ int r;
+
+ assert(ret);
+ assert(child);
+
+ r = sd_device_get_syspath(child, &syspath);
+ if (r < 0)
+ return r;
+
+ path = strdup(syspath);
+ if (!path)
+ return -ENOMEM;
+ subdir = path + STRLEN("/sys");
+
+ for (;;) {
+ char *pos;
+
+ pos = strrchr(subdir, '/');
+ if (!pos || pos < subdir + 2)
+ break;
+
+ *pos = '\0';
+
+ r = sd_device_new_from_syspath(ret, path);
+ if (r < 0)
+ continue;
+
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+_public_ int sd_device_get_parent(sd_device *child, sd_device **ret) {
+
+ assert_return(ret, -EINVAL);
+ assert_return(child, -EINVAL);
+
+ if (!child->parent_set) {
+ child->parent_set = true;
+
+ (void) device_new_from_child(&child->parent, child);
+ }
+
+ if (!child->parent)
+ return -ENOENT;
+
+ *ret = child->parent;
+ return 0;
+}
+
+int device_set_subsystem(sd_device *device, const char *_subsystem) {
+ _cleanup_free_ char *subsystem = NULL;
+ int r;
+
+ assert(device);
+ assert(_subsystem);
+
+ subsystem = strdup(_subsystem);
+ if (!subsystem)
+ return -ENOMEM;
+
+ r = device_add_property_internal(device, "SUBSYSTEM", subsystem);
+ if (r < 0)
+ return r;
+
+ device->subsystem_set = true;
+ return free_and_replace(device->subsystem, subsystem);
+}
+
+static int device_set_drivers_subsystem(sd_device *device, const char *_subsystem) {
+ _cleanup_free_ char *subsystem = NULL;
+ int r;
+
+ assert(device);
+ assert(_subsystem);
+ assert(*_subsystem);
+
+ subsystem = strdup(_subsystem);
+ if (!subsystem)
+ return -ENOMEM;
+
+ r = device_set_subsystem(device, "drivers");
+ if (r < 0)
+ return r;
+
+ return free_and_replace(device->driver_subsystem, subsystem);
+}
+
+_public_ int sd_device_get_subsystem(sd_device *device, const char **ret) {
+ const char *syspath, *drivers = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(device, -EINVAL);
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ if (!device->subsystem_set) {
+ _cleanup_free_ char *subsystem = NULL;
+ char *path;
+
+ /* read 'subsystem' link */
+ path = strjoina(syspath, "/subsystem");
+ r = readlink_value(path, &subsystem);
+ if (r >= 0)
+ r = device_set_subsystem(device, subsystem);
+ /* use implicit names */
+ else if (path_startswith(device->devpath, "/module/"))
+ r = device_set_subsystem(device, "module");
+ else if (!(drivers = strstr(syspath, "/drivers/")) &&
+ PATH_STARTSWITH_SET(device->devpath, "/subsystem/",
+ "/class/",
+ "/bus/"))
+ r = device_set_subsystem(device, "subsystem");
+ if (r < 0 && r != -ENOENT)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set subsystem for %s: %m", device->devpath);
+
+ device->subsystem_set = true;
+ } else if (!device->driver_subsystem_set)
+ drivers = strstr(syspath, "/drivers/");
+
+ if (!device->driver_subsystem_set) {
+ if (drivers) {
+ _cleanup_free_ char *subpath = NULL;
+
+ subpath = strndup(syspath, drivers - syspath);
+ if (!subpath)
+ r = -ENOMEM;
+ else {
+ const char *subsys;
+
+ subsys = strrchr(subpath, '/');
+ if (!subsys)
+ r = -EINVAL;
+ else
+ r = device_set_drivers_subsystem(device, subsys + 1);
+ }
+ if (r < 0 && r != -ENOENT)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set subsystem for driver %s: %m", device->devpath);
+ }
+
+ device->driver_subsystem_set = true;
+ }
+
+ if (!device->subsystem)
+ return -ENOENT;
+
+ *ret = device->subsystem;
+ return 0;
+}
+
+_public_ int sd_device_get_devtype(sd_device *device, const char **devtype) {
+ int r;
+
+ assert(devtype);
+ assert(device);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (!device->devtype)
+ return -ENOENT;
+
+ *devtype = device->devtype;
+
+ return 0;
+}
+
+_public_ int sd_device_get_parent_with_subsystem_devtype(sd_device *child, const char *subsystem, const char *devtype, sd_device **ret) {
+ sd_device *parent = NULL;
+ int r;
+
+ assert_return(child, -EINVAL);
+ assert_return(subsystem, -EINVAL);
+
+ r = sd_device_get_parent(child, &parent);
+ while (r >= 0) {
+ const char *parent_subsystem = NULL;
+ const char *parent_devtype = NULL;
+
+ (void) sd_device_get_subsystem(parent, &parent_subsystem);
+ if (streq_ptr(parent_subsystem, subsystem)) {
+ if (!devtype)
+ break;
+
+ (void) sd_device_get_devtype(parent, &parent_devtype);
+ if (streq_ptr(parent_devtype, devtype))
+ break;
+ }
+ r = sd_device_get_parent(parent, &parent);
+ }
+
+ if (r < 0)
+ return r;
+
+ *ret = parent;
+ return 0;
+}
+
+_public_ int sd_device_get_devnum(sd_device *device, dev_t *devnum) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (major(device->devnum) <= 0)
+ return -ENOENT;
+
+ if (devnum)
+ *devnum = device->devnum;
+
+ return 0;
+}
+
+int device_set_driver(sd_device *device, const char *_driver) {
+ _cleanup_free_ char *driver = NULL;
+ int r;
+
+ assert(device);
+ assert(_driver);
+
+ driver = strdup(_driver);
+ if (!driver)
+ return -ENOMEM;
+
+ r = device_add_property_internal(device, "DRIVER", driver);
+ if (r < 0)
+ return r;
+
+ device->driver_set = true;
+ return free_and_replace(device->driver, driver);
+}
+
+_public_ int sd_device_get_driver(sd_device *device, const char **ret) {
+ assert_return(device, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!device->driver_set) {
+ _cleanup_free_ char *driver = NULL;
+ const char *syspath;
+ char *path;
+ int r;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ path = strjoina(syspath, "/driver");
+ r = readlink_value(path, &driver);
+ if (r >= 0) {
+ r = device_set_driver(device, driver);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set driver for %s: %m", device->devpath);
+ } else if (r == -ENOENT)
+ device->driver_set = true;
+ else
+ return log_device_debug_errno(device, r, "sd-device: Failed to set driver for %s: %m", device->devpath);
+ }
+
+ if (!device->driver)
+ return -ENOENT;
+
+ *ret = device->driver;
+ return 0;
+}
+
+_public_ int sd_device_get_devpath(sd_device *device, const char **devpath) {
+ assert_return(device, -EINVAL);
+ assert_return(devpath, -EINVAL);
+
+ assert(device->devpath);
+ assert(device->devpath[0] == '/');
+
+ *devpath = device->devpath;
+ return 0;
+}
+
+_public_ int sd_device_get_devname(sd_device *device, const char **devname) {
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(devname, -EINVAL);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (!device->devname)
+ return -ENOENT;
+
+ assert(path_startswith(device->devname, "/dev/"));
+
+ *devname = device->devname;
+ return 0;
+}
+
+static int device_set_sysname(sd_device *device) {
+ _cleanup_free_ char *sysname = NULL;
+ const char *sysnum = NULL;
+ const char *pos;
+ size_t len = 0;
+
+ if (!device->devpath)
+ return -EINVAL;
+
+ pos = strrchr(device->devpath, '/');
+ if (!pos)
+ return -EINVAL;
+ pos++;
+
+ /* devpath is not a root directory */
+ if (*pos == '\0' || pos <= device->devpath)
+ return -EINVAL;
+
+ sysname = strdup(pos);
+ if (!sysname)
+ return -ENOMEM;
+
+ /* some devices have '!' in their name, change that to '/' */
+ while (sysname[len] != '\0') {
+ if (sysname[len] == '!')
+ sysname[len] = '/';
+
+ len++;
+ }
+
+ /* trailing number */
+ while (len > 0 && isdigit(sysname[--len]))
+ sysnum = &sysname[len];
+
+ if (len == 0)
+ sysnum = NULL;
+
+ device->sysname_set = true;
+ device->sysnum = sysnum;
+ return free_and_replace(device->sysname, sysname);
+}
+
+_public_ int sd_device_get_sysname(sd_device *device, const char **ret) {
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!device->sysname_set) {
+ r = device_set_sysname(device);
+ if (r < 0)
+ return r;
+ }
+
+ assert_return(device->sysname, -ENOENT);
+
+ *ret = device->sysname;
+ return 0;
+}
+
+_public_ int sd_device_get_sysnum(sd_device *device, const char **ret) {
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!device->sysname_set) {
+ r = device_set_sysname(device);
+ if (r < 0)
+ return r;
+ }
+
+ if (!device->sysnum)
+ return -ENOENT;
+
+ *ret = device->sysnum;
+ return 0;
+}
+
+static bool is_valid_tag(const char *tag) {
+ assert(tag);
+
+ return !strchr(tag, ':') && !strchr(tag, ' ');
+}
+
+int device_add_tag(sd_device *device, const char *tag) {
+ int r;
+
+ assert(device);
+ assert(tag);
+
+ if (!is_valid_tag(tag))
+ return -EINVAL;
+
+ r = set_ensure_allocated(&device->tags, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put_strdup(device->tags, tag);
+ if (r < 0)
+ return r;
+
+ device->tags_generation++;
+ device->property_tags_outdated = true;
+
+ return 0;
+}
+
+int device_add_devlink(sd_device *device, const char *devlink) {
+ int r;
+
+ assert(device);
+ assert(devlink);
+
+ r = set_ensure_allocated(&device->devlinks, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put_strdup(device->devlinks, devlink);
+ if (r < 0)
+ return r;
+
+ device->devlinks_generation++;
+ device->property_devlinks_outdated = true;
+
+ return 0;
+}
+
+static int device_add_property_internal_from_string(sd_device *device, const char *str) {
+ _cleanup_free_ char *key = NULL;
+ char *value;
+
+ assert(device);
+ assert(str);
+
+ key = strdup(str);
+ if (!key)
+ return -ENOMEM;
+
+ value = strchr(key, '=');
+ if (!value)
+ return -EINVAL;
+
+ *value = '\0';
+
+ if (isempty(++value))
+ value = NULL;
+
+ return device_add_property_internal(device, key, value);
+}
+
+int device_set_usec_initialized(sd_device *device, usec_t when) {
+ char s[DECIMAL_STR_MAX(usec_t)];
+ int r;
+
+ assert(device);
+
+ xsprintf(s, USEC_FMT, when);
+
+ r = device_add_property_internal(device, "USEC_INITIALIZED", s);
+ if (r < 0)
+ return r;
+
+ device->usec_initialized = when;
+ return 0;
+}
+
+static int handle_db_line(sd_device *device, char key, const char *value) {
+ char *path;
+ int r;
+
+ assert(device);
+ assert(value);
+
+ switch (key) {
+ case 'G':
+ r = device_add_tag(device, value);
+ if (r < 0)
+ return r;
+
+ break;
+ case 'S':
+ path = strjoina("/dev/", value);
+ r = device_add_devlink(device, path);
+ if (r < 0)
+ return r;
+
+ break;
+ case 'E':
+ r = device_add_property_internal_from_string(device, value);
+ if (r < 0)
+ return r;
+
+ break;
+ case 'I': {
+ usec_t t;
+
+ r = safe_atou64(value, &t);
+ if (r < 0)
+ return r;
+
+ r = device_set_usec_initialized(device, t);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+ case 'L':
+ r = safe_atoi(value, &device->devlink_priority);
+ if (r < 0)
+ return r;
+
+ break;
+ case 'W':
+ r = safe_atoi(value, &device->watch_handle);
+ if (r < 0)
+ return r;
+
+ break;
+ default:
+ log_device_debug(device, "sd-device: Unknown key '%c' in device db, ignoring", key);
+ }
+
+ return 0;
+}
+
+int device_get_id_filename(sd_device *device, const char **ret) {
+ assert(device);
+ assert(ret);
+
+ if (!device->id_filename) {
+ _cleanup_free_ char *id = NULL;
+ const char *subsystem;
+ dev_t devnum;
+ int ifindex, r;
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r < 0)
+ return r;
+
+ if (sd_device_get_devnum(device, &devnum) >= 0) {
+ assert(subsystem);
+
+ /* use dev_t — b259:131072, c254:0 */
+ r = asprintf(&id, "%c%u:%u",
+ streq(subsystem, "block") ? 'b' : 'c',
+ major(devnum), minor(devnum));
+ if (r < 0)
+ return -ENOMEM;
+ } else if (sd_device_get_ifindex(device, &ifindex) >= 0) {
+ /* use netdev ifindex — n3 */
+ r = asprintf(&id, "n%u", (unsigned) ifindex);
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ /* use $subsys:$sysname — pci:0000:00:1f.2
+ * sysname() has '!' translated, get it from devpath
+ */
+ const char *sysname;
+
+ sysname = basename(device->devpath);
+ if (!sysname)
+ return -EINVAL;
+
+ if (!subsystem)
+ return -EINVAL;
+
+ if (streq(subsystem, "drivers")) {
+ /* the 'drivers' pseudo-subsystem is special, and needs the real subsystem
+ * encoded as well */
+ r = asprintf(&id, "+drivers:%s:%s", device->driver_subsystem, sysname);
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ r = asprintf(&id, "+%s:%s", subsystem, sysname);
+ if (r < 0)
+ return -ENOMEM;
+ }
+ }
+
+ device->id_filename = TAKE_PTR(id);
+ }
+
+ *ret = device->id_filename;
+ return 0;
+}
+
+int device_read_db_internal(sd_device *device, bool force) {
+ _cleanup_free_ char *db = NULL;
+ char *path;
+ const char *id, *value;
+ char key;
+ size_t db_len;
+ unsigned i;
+ int r;
+
+ enum {
+ PRE_KEY,
+ KEY,
+ PRE_VALUE,
+ VALUE,
+ INVALID_LINE,
+ } state = PRE_KEY;
+
+ assert(device);
+
+ if (device->db_loaded || (!force && device->sealed))
+ return 0;
+
+ r = device_get_id_filename(device, &id);
+ if (r < 0)
+ return r;
+
+ path = strjoina("/run/udev/data/", id);
+
+ r = read_full_file(path, &db, &db_len);
+ if (r < 0) {
+ if (r == -ENOENT)
+ return 0;
+ else
+ return log_device_debug_errno(device, r, "sd-device: Failed to read db '%s': %m", path);
+ }
+
+ /* devices with a database entry are initialized */
+ device->is_initialized = true;
+
+ device->db_loaded = true;
+
+ for (i = 0; i < db_len; i++) {
+ switch (state) {
+ case PRE_KEY:
+ if (!strchr(NEWLINE, db[i])) {
+ key = db[i];
+
+ state = KEY;
+ }
+
+ break;
+ case KEY:
+ if (db[i] != ':') {
+ log_device_debug(device, "sd-device: Invalid db entry with key '%c', ignoring", key);
+
+ state = INVALID_LINE;
+ } else {
+ db[i] = '\0';
+
+ state = PRE_VALUE;
+ }
+
+ break;
+ case PRE_VALUE:
+ value = &db[i];
+
+ state = VALUE;
+
+ break;
+ case INVALID_LINE:
+ if (strchr(NEWLINE, db[i]))
+ state = PRE_KEY;
+
+ break;
+ case VALUE:
+ if (strchr(NEWLINE, db[i])) {
+ db[i] = '\0';
+ r = handle_db_line(device, key, value);
+ if (r < 0)
+ log_device_debug_errno(device, r, "sd-device: Failed to handle db entry '%c:%s', ignoring: %m", key, value);
+
+ state = PRE_KEY;
+ }
+
+ break;
+ default:
+ assert_not_reached("Invalid state when parsing db");
+ }
+ }
+
+ return 0;
+}
+
+_public_ int sd_device_get_is_initialized(sd_device *device) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ return device->is_initialized;
+}
+
+_public_ int sd_device_get_usec_since_initialized(sd_device *device, uint64_t *usec) {
+ usec_t now_ts;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(usec, -EINVAL);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (!device->is_initialized)
+ return -EBUSY;
+
+ if (!device->usec_initialized)
+ return -ENODATA;
+
+ now_ts = now(clock_boottime_or_monotonic());
+
+ if (now_ts < device->usec_initialized)
+ return -EIO;
+
+ *usec = now_ts - device->usec_initialized;
+ return 0;
+}
+
+_public_ const char *sd_device_get_tag_first(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ (void) device_read_db(device);
+
+ device->tags_iterator_generation = device->tags_generation;
+ device->tags_iterator = ITERATOR_FIRST;
+
+ (void) set_iterate(device->tags, &device->tags_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_tag_next(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ (void) device_read_db(device);
+
+ if (device->tags_iterator_generation != device->tags_generation)
+ return NULL;
+
+ (void) set_iterate(device->tags, &device->tags_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_devlink_first(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ (void) device_read_db(device);
+
+ device->devlinks_iterator_generation = device->devlinks_generation;
+ device->devlinks_iterator = ITERATOR_FIRST;
+
+ (void) set_iterate(device->devlinks, &device->devlinks_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_devlink_next(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ (void) device_read_db(device);
+
+ if (device->devlinks_iterator_generation != device->devlinks_generation)
+ return NULL;
+
+ (void) set_iterate(device->devlinks, &device->devlinks_iterator, &v);
+ return v;
+}
+
+int device_properties_prepare(sd_device *device) {
+ int r;
+
+ assert(device);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->property_devlinks_outdated) {
+ _cleanup_free_ char *devlinks = NULL;
+ size_t devlinks_allocated = 0, devlinks_len = 0;
+ const char *devlink;
+
+ for (devlink = sd_device_get_devlink_first(device); devlink; devlink = sd_device_get_devlink_next(device)) {
+ char *e;
+
+ if (!GREEDY_REALLOC(devlinks, devlinks_allocated, devlinks_len + strlen(devlink) + 2))
+ return -ENOMEM;
+ if (devlinks_len > 0)
+ stpcpy(devlinks + devlinks_len++, " ");
+ e = stpcpy(devlinks + devlinks_len, devlink);
+ devlinks_len = e - devlinks;
+ }
+
+ r = device_add_property_internal(device, "DEVLINKS", devlinks);
+ if (r < 0)
+ return r;
+
+ device->property_devlinks_outdated = false;
+ }
+
+ if (device->property_tags_outdated) {
+ _cleanup_free_ char *tags = NULL;
+ size_t tags_allocated = 0, tags_len = 0;
+ const char *tag;
+
+ if (!GREEDY_REALLOC(tags, tags_allocated, 2))
+ return -ENOMEM;
+ stpcpy(tags, ":");
+ tags_len++;
+
+ for (tag = sd_device_get_tag_first(device); tag; tag = sd_device_get_tag_next(device)) {
+ char *e;
+
+ if (!GREEDY_REALLOC(tags, tags_allocated, tags_len + strlen(tag) + 2))
+ return -ENOMEM;
+ e = stpcpy(stpcpy(tags + tags_len, tag), ":");
+ tags_len = e - tags;
+ }
+
+ r = device_add_property_internal(device, "TAGS", tags);
+ if (r < 0)
+ return r;
+
+ device->property_tags_outdated = false;
+ }
+
+ return 0;
+}
+
+_public_ const char *sd_device_get_property_first(sd_device *device, const char **_value) {
+ const char *key;
+ const char *value;
+ int r;
+
+ assert_return(device, NULL);
+
+ r = device_properties_prepare(device);
+ if (r < 0)
+ return NULL;
+
+ device->properties_iterator_generation = device->properties_generation;
+ device->properties_iterator = ITERATOR_FIRST;
+
+ ordered_hashmap_iterate(device->properties, &device->properties_iterator, (void**)&value, (const void**)&key);
+
+ if (_value)
+ *_value = value;
+ return key;
+}
+
+_public_ const char *sd_device_get_property_next(sd_device *device, const char **_value) {
+ const char *key;
+ const char *value;
+ int r;
+
+ assert_return(device, NULL);
+
+ r = device_properties_prepare(device);
+ if (r < 0)
+ return NULL;
+
+ if (device->properties_iterator_generation != device->properties_generation)
+ return NULL;
+
+ ordered_hashmap_iterate(device->properties, &device->properties_iterator, (void**)&value, (const void**)&key);
+
+ if (_value)
+ *_value = value;
+ return key;
+}
+
+static int device_sysattrs_read_all(sd_device *device) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ const char *syspath;
+ struct dirent *dent;
+ int r;
+
+ assert(device);
+
+ if (device->sysattrs_read)
+ return 0;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ dir = opendir(syspath);
+ if (!dir)
+ return -errno;
+
+ r = set_ensure_allocated(&device->sysattrs, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ FOREACH_DIRENT_ALL(dent, dir, return -errno) {
+ char *path;
+ struct stat statbuf;
+
+ /* only handle symlinks and regular files */
+ if (!IN_SET(dent->d_type, DT_LNK, DT_REG))
+ continue;
+
+ path = strjoina(syspath, "/", dent->d_name);
+
+ if (lstat(path, &statbuf) != 0)
+ continue;
+
+ if (!(statbuf.st_mode & S_IRUSR))
+ continue;
+
+ r = set_put_strdup(device->sysattrs, dent->d_name);
+ if (r < 0)
+ return r;
+ }
+
+ device->sysattrs_read = true;
+
+ return 0;
+}
+
+_public_ const char *sd_device_get_sysattr_first(sd_device *device) {
+ void *v;
+ int r;
+
+ assert_return(device, NULL);
+
+ if (!device->sysattrs_read) {
+ r = device_sysattrs_read_all(device);
+ if (r < 0) {
+ errno = -r;
+ return NULL;
+ }
+ }
+
+ device->sysattrs_iterator = ITERATOR_FIRST;
+
+ (void) set_iterate(device->sysattrs, &device->sysattrs_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_sysattr_next(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ if (!device->sysattrs_read)
+ return NULL;
+
+ (void) set_iterate(device->sysattrs, &device->sysattrs_iterator, &v);
+ return v;
+}
+
+_public_ int sd_device_has_tag(sd_device *device, const char *tag) {
+ assert_return(device, -EINVAL);
+ assert_return(tag, -EINVAL);
+
+ (void) device_read_db(device);
+
+ return !!set_contains(device->tags, tag);
+}
+
+_public_ int sd_device_get_property_value(sd_device *device, const char *key, const char **_value) {
+ char *value;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(key, -EINVAL);
+
+ r = device_properties_prepare(device);
+ if (r < 0)
+ return r;
+
+ value = ordered_hashmap_get(device->properties, key);
+ if (!value)
+ return -ENOENT;
+
+ if (_value)
+ *_value = value;
+ return 0;
+}
+
+/* replaces the value if it already exists */
+static int device_add_sysattr_value(sd_device *device, const char *_key, char *value) {
+ _cleanup_free_ char *key = NULL;
+ _cleanup_free_ char *value_old = NULL;
+ int r;
+
+ assert(device);
+ assert(_key);
+
+ r = hashmap_ensure_allocated(&device->sysattr_values, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ value_old = hashmap_remove2(device->sysattr_values, _key, (void **)&key);
+ if (!key) {
+ key = strdup(_key);
+ if (!key)
+ return -ENOMEM;
+ }
+
+ r = hashmap_put(device->sysattr_values, key, value);
+ if (r < 0)
+ return r;
+ TAKE_PTR(key);
+
+ return 0;
+}
+
+static int device_get_sysattr_value(sd_device *device, const char *_key, const char **_value) {
+ const char *key = NULL, *value;
+
+ assert(device);
+ assert(_key);
+
+ value = hashmap_get2(device->sysattr_values, _key, (void **) &key);
+ if (!key)
+ return -ENOENT;
+
+ if (_value)
+ *_value = value;
+ return 0;
+}
+
+/* We cache all sysattr lookups. If an attribute does not exist, it is stored
+ * with a NULL value in the cache, otherwise the returned string is stored */
+_public_ int sd_device_get_sysattr_value(sd_device *device, const char *sysattr, const char **_value) {
+ _cleanup_free_ char *value = NULL;
+ const char *syspath, *cached_value = NULL;
+ char *path;
+ struct stat statbuf;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(sysattr, -EINVAL);
+
+ /* look for possibly already cached result */
+ r = device_get_sysattr_value(device, sysattr, &cached_value);
+ if (r != -ENOENT) {
+ if (r < 0)
+ return r;
+
+ if (!cached_value)
+ /* we looked up the sysattr before and it did not exist */
+ return -ENOENT;
+
+ if (_value)
+ *_value = cached_value;
+
+ return 0;
+ }
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ path = strjoina(syspath, "/", sysattr);
+ r = lstat(path, &statbuf);
+ if (r < 0) {
+ /* remember that we could not access the sysattr */
+ r = device_add_sysattr_value(device, sysattr, NULL);
+ if (r < 0)
+ return r;
+
+ return -ENOENT;
+ } else if (S_ISLNK(statbuf.st_mode)) {
+ /* Some core links return only the last element of the target path,
+ * these are just values, the paths should not be exposed. */
+ if (STR_IN_SET(sysattr, "driver", "subsystem", "module")) {
+ r = readlink_value(path, &value);
+ if (r < 0)
+ return r;
+ } else
+ return -EINVAL;
+ } else if (S_ISDIR(statbuf.st_mode)) {
+ /* skip directories */
+ return -EINVAL;
+ } else if (!(statbuf.st_mode & S_IRUSR)) {
+ /* skip non-readable files */
+ return -EPERM;
+ } else {
+ size_t size;
+
+ /* read attribute value */
+ r = read_full_file(path, &value, &size);
+ if (r < 0)
+ return r;
+
+ /* drop trailing newlines */
+ while (size > 0 && value[--size] == '\n')
+ value[size] = '\0';
+ }
+
+ r = device_add_sysattr_value(device, sysattr, value);
+ if (r < 0)
+ return r;
+
+ *_value = TAKE_PTR(value);
+
+ return 0;
+}
+
+static void device_remove_sysattr_value(sd_device *device, const char *_key) {
+ _cleanup_free_ char *key = NULL;
+
+ assert(device);
+ assert(_key);
+
+ free(hashmap_remove2(device->sysattr_values, _key, (void **) &key));
+}
+
+/* set the attribute and save it in the cache. If a NULL value is passed the
+ * attribute is cleared from the cache */
+_public_ int sd_device_set_sysattr_value(sd_device *device, const char *sysattr, const char *_value) {
+ _cleanup_free_ char *value = NULL;
+ const char *syspath, *path;
+ size_t len;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(sysattr, -EINVAL);
+
+ if (!_value) {
+ device_remove_sysattr_value(device, sysattr);
+ return 0;
+ }
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ path = strjoina(syspath, "/", sysattr);
+
+ len = strlen(_value);
+
+ /* drop trailing newlines */
+ while (len > 0 && _value[len - 1] == '\n')
+ len --;
+
+ /* value length is limited to 4k */
+ if (len > 4096)
+ return -EINVAL;
+
+ value = strndup(_value, len);
+ if (!value)
+ return -ENOMEM;
+
+ r = write_string_file(path, value, WRITE_STRING_FILE_DISABLE_BUFFER | WRITE_STRING_FILE_NOFOLLOW);
+ if (r < 0) {
+ if (r == -ELOOP)
+ return -EINVAL;
+ if (r == -EISDIR)
+ return r;
+
+ r = free_and_strdup(&value, "");
+ if (r < 0)
+ return r;
+
+ r = device_add_sysattr_value(device, sysattr, value);
+ if (r < 0)
+ return r;
+ TAKE_PTR(value);
+
+ return -ENXIO;
+ }
+
+ r = device_add_sysattr_value(device, sysattr, value);
+ if (r < 0)
+ return r;
+ TAKE_PTR(value);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/test-sd-device-monitor.c b/src/libsystemd/sd-device/test-sd-device-monitor.c
new file mode 100644
index 0000000..aa1edaa
--- /dev/null
+++ b/src/libsystemd/sd-device/test-sd-device-monitor.c
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "device-monitor-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+#include "virt.h"
+
+static int monitor_handler(sd_device_monitor *m, sd_device *d, void *userdata) {
+ const char *s, *syspath = userdata;
+
+ assert_se(sd_device_get_syspath(d, &s) >= 0);
+ assert_se(streq(s, syspath));
+
+ return sd_event_exit(sd_device_monitor_get_event(m), 100);
+}
+
+static int test_receive_device_fail(void) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *loopback = NULL;
+ const char *syspath;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ /* Try to send device with invalid action and without seqnum. */
+ assert_se(sd_device_new_from_syspath(&loopback, "/sys/class/net/lo") >= 0);
+ assert_se(device_add_property(loopback, "ACTION", "hoge") >= 0);
+
+ assert_se(sd_device_get_syspath(loopback, &syspath) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_server), "sender") >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_client), "receiver") >= 0);
+
+ /* Do not use assert_se() here. */
+ r = device_monitor_send_device(monitor_server, monitor_client, loopback);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send loopback device: %m");
+
+ assert_se(sd_event_run(sd_device_monitor_get_event(monitor_client), 0) >= 0);
+
+ return 0;
+}
+
+static void test_send_receive_one(sd_device *device, bool subsystem_filter, bool tag_filter, bool use_bpf) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ const char *syspath, *subsystem, *tag, *devtype = NULL;
+
+ log_device_info(device, "/* %s(subsystem_filter=%s, tag_filter=%s, use_bpf=%s) */", __func__,
+ true_false(subsystem_filter), true_false(tag_filter), true_false(use_bpf));
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_server), "sender") >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_client), "receiver") >= 0);
+
+ if (subsystem_filter) {
+ assert_se(sd_device_get_subsystem(device, &subsystem) >= 0);
+ (void) sd_device_get_devtype(device, &devtype);
+ assert_se(sd_device_monitor_filter_add_match_subsystem_devtype(monitor_client, subsystem, devtype) >= 0);
+ }
+
+ if (tag_filter)
+ FOREACH_DEVICE_TAG(device, tag)
+ assert_se(sd_device_monitor_filter_add_match_tag(monitor_client, tag) >= 0);
+
+ if ((subsystem_filter || tag_filter) && use_bpf)
+ assert_se(sd_device_monitor_filter_update(monitor_client) >= 0);
+
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100);
+}
+
+static void test_subsystem_filter(sd_device *device) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ const char *syspath, *subsystem, *p, *s;
+ sd_device *d;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+ assert_se(sd_device_get_subsystem(device, &subsystem) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_server), "sender") >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_filter_add_match_subsystem_devtype(monitor_client, subsystem, NULL) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_client), "receiver") >= 0);
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, subsystem, false) >= 0);
+ FOREACH_DEVICE(e, d) {
+ assert_se(sd_device_get_syspath(d, &p) >= 0);
+ assert_se(sd_device_get_subsystem(d, &s) >= 0);
+
+ log_info("Sending device subsystem:%s syspath:%s", s, p);
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, d) >= 0);
+ }
+
+ log_info("Sending device subsystem:%s syspath:%s", subsystem, syspath);
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100);
+}
+
+static void test_sd_device_monitor_filter_remove(sd_device *device) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ const char *syspath;
+
+ log_device_info(device, "/* %s */", __func__);
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_server), "sender") >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_client), "receiver") >= 0);
+
+ assert_se(sd_device_monitor_filter_add_match_subsystem_devtype(monitor_client, "hoge", NULL) >= 0);
+ assert_se(sd_device_monitor_filter_update(monitor_client) >= 0);
+
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_run(sd_device_monitor_get_event(monitor_client), 0) >= 0);
+
+ assert_se(sd_device_monitor_filter_remove(monitor_client) >= 0);
+
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100);
+}
+
+static void test_device_copy_properties(sd_device *device) {
+ _cleanup_(sd_device_unrefp) sd_device *copy = NULL;
+
+ assert_se(device_shallow_clone(device, &copy) >= 0);
+ assert_se(device_copy_properties(copy, device) >= 0);
+
+ test_send_receive_one(copy, false, false, false);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_device_unrefp) sd_device *loopback = NULL, *sda = NULL;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ if (getuid() != 0)
+ return log_tests_skipped("not root");
+
+ r = test_receive_device_fail();
+ if (r < 0) {
+ assert_se(r == -EPERM && detect_container() > 0);
+ return log_tests_skipped("Running in container? Skipping remaining tests");
+ }
+
+ assert_se(sd_device_new_from_syspath(&loopback, "/sys/class/net/lo") >= 0);
+ assert_se(device_add_property(loopback, "ACTION", "add") >= 0);
+ assert_se(device_add_property(loopback, "SEQNUM", "10") >= 0);
+
+ test_send_receive_one(loopback, false, false, false);
+ test_send_receive_one(loopback, true, false, false);
+ test_send_receive_one(loopback, false, true, false);
+ test_send_receive_one(loopback, true, true, false);
+ test_send_receive_one(loopback, true, false, true);
+ test_send_receive_one(loopback, false, true, true);
+ test_send_receive_one(loopback, true, true, true);
+
+ test_subsystem_filter(loopback);
+ test_sd_device_monitor_filter_remove(loopback);
+ test_device_copy_properties(loopback);
+
+ r = sd_device_new_from_subsystem_sysname(&sda, "block", "sda");
+ if (r < 0) {
+ log_info_errno(r, "Failed to create sd_device for sda, skipping remaining tests: %m");
+ return 0;
+ }
+
+ assert_se(device_add_property(sda, "ACTION", "change") >= 0);
+ assert_se(device_add_property(sda, "SEQNUM", "11") >= 0);
+
+ test_send_receive_one(sda, false, false, false);
+ test_send_receive_one(sda, true, false, false);
+ test_send_receive_one(sda, false, true, false);
+ test_send_receive_one(sda, true, true, false);
+ test_send_receive_one(sda, true, false, true);
+ test_send_receive_one(sda, false, true, true);
+ test_send_receive_one(sda, true, true, true);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/test-sd-device-thread.c b/src/libsystemd/sd-device/test-sd-device-thread.c
new file mode 100644
index 0000000..9f1c023
--- /dev/null
+++ b/src/libsystemd/sd-device/test-sd-device-thread.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-device.h"
+
+#include "device-util.h"
+#include "macro.h"
+
+static void* thread(void *p) {
+ sd_device **d = p;
+
+ assert_se(!(*d = sd_device_unref(*d)));
+
+ return NULL;
+}
+
+int main(int argc, char *argv[]) {
+ sd_device *loopback;
+ pthread_t t;
+ const char *key, *value;
+
+ assert_se(unsetenv("SYSTEMD_MEMPOOL") == 0);
+
+ assert_se(sd_device_new_from_syspath(&loopback, "/sys/class/net/lo") >= 0);
+
+ FOREACH_DEVICE_PROPERTY(loopback, key, value)
+ printf("%s=%s\n", key, value);
+
+ assert_se(pthread_create(&t, NULL, thread, &loopback) == 0);
+ assert_se(pthread_join(t, NULL) == 0);
+
+ assert_se(!loopback);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/test-sd-device.c b/src/libsystemd/sd-device/test-sd-device.c
new file mode 100644
index 0000000..e459b13
--- /dev/null
+++ b/src/libsystemd/sd-device/test-sd-device.c
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "device-enumerator-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "hashmap.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_sd_device_one(sd_device *d) {
+ const char *syspath, *subsystem, *val;
+ dev_t devnum;
+ usec_t usec;
+ int i, r;
+
+ assert_se(sd_device_get_syspath(d, &syspath) >= 0);
+
+ r = sd_device_get_subsystem(d, &subsystem);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ r = sd_device_get_devtype(d, &val);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ r = sd_device_get_devnum(d, &devnum);
+ assert_se((r >= 0 && major(devnum) > 0) || r == -ENOENT);
+
+ r = sd_device_get_ifindex(d, &i);
+ assert_se((r >= 0 && i > 0) || r == -ENOENT);
+
+ r = sd_device_get_driver(d, &val);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ assert_se(sd_device_get_devpath(d, &val) >= 0);
+
+ r = sd_device_get_devname(d, &val);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ assert_se(sd_device_get_sysname(d, &val) >= 0);
+
+ r = sd_device_get_sysnum(d, &val);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ i = sd_device_get_is_initialized(d);
+ assert_se(i >= 0);
+ if (i > 0) {
+ r = sd_device_get_usec_since_initialized(d, &usec);
+ assert_se((r >= 0 && usec > 0) || r == -ENODATA);
+ }
+
+ r = sd_device_get_sysattr_value(d, "name_assign_type", &val);
+ assert_se(r >= 0 || IN_SET(r, -ENOENT, -EINVAL));
+
+ r = sd_device_get_property_value(d, "ID_NET_DRIVER", &val);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ log_info("syspath:%s subsystem:%s initialized:%s", syspath, strna(subsystem), yes_no(i));
+}
+
+static void test_sd_device_enumerator_devices(void) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0);
+ FOREACH_DEVICE(e, d)
+ test_sd_device_one(d);
+}
+
+static void test_sd_device_enumerator_subsystems(void) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0);
+ FOREACH_SUBSYSTEM(e, d)
+ test_sd_device_one(d);
+}
+
+static void test_sd_device_enumerator_filter_subsystem_one(const char *subsystem, Hashmap *h) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d, *t;
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, subsystem, true) >= 0);
+
+ FOREACH_DEVICE(e, d) {
+ const char *syspath;
+
+ assert_se(sd_device_get_syspath(d, &syspath) >= 0);
+ assert_se(t = hashmap_remove(h, syspath));
+ assert_se(!sd_device_unref(t));
+
+ log_debug("Removed subsystem:%s syspath:%s", subsystem, syspath);
+ }
+
+ assert_se(hashmap_isempty(h));
+}
+
+static void test_sd_device_enumerator_filter_subsystem(void) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ _cleanup_(hashmap_freep) Hashmap *subsystems;
+ sd_device *d;
+ Hashmap *h;
+ char *s;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(subsystems = hashmap_new(&string_hash_ops));
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+
+ FOREACH_DEVICE(e, d) {
+ const char *syspath, *subsystem;
+ int r;
+
+ assert_se(sd_device_get_syspath(d, &syspath) >= 0);
+
+ r = sd_device_get_subsystem(d, &subsystem);
+ assert_se(r >= 0 || r == -ENOENT);
+ if (r < 0)
+ continue;
+
+ h = hashmap_get(subsystems, subsystem);
+ if (!h) {
+ char *str;
+ assert_se(str = strdup(subsystem));
+ assert_se(h = hashmap_new(&string_hash_ops));
+ assert_se(hashmap_put(subsystems, str, h) >= 0);
+ }
+
+ assert_se(hashmap_put(h, syspath, d) >= 0);
+ assert_se(sd_device_ref(d));
+
+ log_debug("Added subsystem:%s syspath:%s", subsystem, syspath);
+ }
+
+ while ((h = hashmap_steal_first_key_and_value(subsystems, (void**) &s))) {
+ test_sd_device_enumerator_filter_subsystem_one(s, h);
+ hashmap_free(h);
+ free(s);
+ }
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_INFO);
+
+ test_sd_device_enumerator_devices();
+ test_sd_device_enumerator_subsystems();
+ test_sd_device_enumerator_filter_subsystem();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/test-udev-device-thread.c b/src/libsystemd/sd-device/test-udev-device-thread.c
new file mode 100644
index 0000000..2fc0f59
--- /dev/null
+++ b/src/libsystemd/sd-device/test-udev-device-thread.c
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "libudev.h"
+
+#include "macro.h"
+
+static void* thread(void *p) {
+ struct udev_device **d = p;
+
+ assert_se(!(*d = udev_device_unref(*d)));
+
+ return NULL;
+}
+
+int main(int argc, char *argv[]) {
+ struct udev_device *loopback;
+ pthread_t t;
+
+ assert_se(unsetenv("SYSTEMD_MEMPOOL") == 0);
+
+ assert_se(loopback = udev_device_new_from_syspath(NULL, "/sys/class/net/lo"));
+
+ assert_se(udev_device_get_properties_list_entry(loopback));
+
+ assert_se(pthread_create(&t, NULL, thread, &loopback) == 0);
+ assert_se(pthread_join(t, NULL) == 0);
+
+ assert_se(!loopback);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-event/event-source.h b/src/libsystemd/sd-event/event-source.h
new file mode 100644
index 0000000..99ab8fc
--- /dev/null
+++ b/src/libsystemd/sd-event/event-source.h
@@ -0,0 +1,206 @@
+#pragma once
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/epoll.h>
+#include <sys/timerfd.h>
+#include <sys/wait.h>
+
+#include "sd-event.h"
+
+#include "fs-util.h"
+#include "hashmap.h"
+#include "list.h"
+#include "prioq.h"
+
+typedef enum EventSourceType {
+ SOURCE_IO,
+ SOURCE_TIME_REALTIME,
+ SOURCE_TIME_BOOTTIME,
+ SOURCE_TIME_MONOTONIC,
+ SOURCE_TIME_REALTIME_ALARM,
+ SOURCE_TIME_BOOTTIME_ALARM,
+ SOURCE_SIGNAL,
+ SOURCE_CHILD,
+ SOURCE_DEFER,
+ SOURCE_POST,
+ SOURCE_EXIT,
+ SOURCE_WATCHDOG,
+ SOURCE_INOTIFY,
+ _SOURCE_EVENT_SOURCE_TYPE_MAX,
+ _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
+} EventSourceType;
+
+/* All objects we use in epoll events start with this value, so that
+ * we know how to dispatch it */
+typedef enum WakeupType {
+ WAKEUP_NONE,
+ WAKEUP_EVENT_SOURCE,
+ WAKEUP_CLOCK_DATA,
+ WAKEUP_SIGNAL_DATA,
+ WAKEUP_INOTIFY_DATA,
+ _WAKEUP_TYPE_MAX,
+ _WAKEUP_TYPE_INVALID = -1,
+} WakeupType;
+
+struct inode_data;
+
+struct sd_event_source {
+ WakeupType wakeup;
+
+ unsigned n_ref;
+
+ sd_event *event;
+ void *userdata;
+ sd_event_handler_t prepare;
+
+ char *description;
+
+ EventSourceType type:5;
+ signed int enabled:3;
+ bool pending:1;
+ bool dispatching:1;
+ bool floating:1;
+
+ int64_t priority;
+ unsigned pending_index;
+ unsigned prepare_index;
+ uint64_t pending_iteration;
+ uint64_t prepare_iteration;
+
+ sd_event_destroy_t destroy_callback;
+
+ LIST_FIELDS(sd_event_source, sources);
+
+ union {
+ struct {
+ sd_event_io_handler_t callback;
+ int fd;
+ uint32_t events;
+ uint32_t revents;
+ bool registered:1;
+ bool owned:1;
+ } io;
+ struct {
+ sd_event_time_handler_t callback;
+ usec_t next, accuracy;
+ unsigned earliest_index;
+ unsigned latest_index;
+ } time;
+ struct {
+ sd_event_signal_handler_t callback;
+ struct signalfd_siginfo siginfo;
+ int sig;
+ } signal;
+ struct {
+ sd_event_child_handler_t callback;
+ siginfo_t siginfo;
+ pid_t pid;
+ int options;
+ } child;
+ struct {
+ sd_event_handler_t callback;
+ } defer;
+ struct {
+ sd_event_handler_t callback;
+ } post;
+ struct {
+ sd_event_handler_t callback;
+ unsigned prioq_index;
+ } exit;
+ struct {
+ sd_event_inotify_handler_t callback;
+ uint32_t mask;
+ struct inode_data *inode_data;
+ LIST_FIELDS(sd_event_source, by_inode_data);
+ } inotify;
+ };
+};
+
+struct clock_data {
+ WakeupType wakeup;
+ int fd;
+
+ /* For all clocks we maintain two priority queues each, one
+ * ordered for the earliest times the events may be
+ * dispatched, and one ordered by the latest times they must
+ * have been dispatched. The range between the top entries in
+ * the two prioqs is the time window we can freely schedule
+ * wakeups in */
+
+ Prioq *earliest;
+ Prioq *latest;
+ usec_t next;
+
+ bool needs_rearm:1;
+};
+
+struct signal_data {
+ WakeupType wakeup;
+
+ /* For each priority we maintain one signal fd, so that we
+ * only have to dequeue a single event per priority at a
+ * time. */
+
+ int fd;
+ int64_t priority;
+ sigset_t sigset;
+ sd_event_source *current;
+};
+
+/* A structure listing all event sources currently watching a specific inode */
+struct inode_data {
+ /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
+ ino_t ino;
+ dev_t dev;
+
+ /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
+ * rearrange the priority still until then, as we need the original inode to change the priority as we need to
+ * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
+ * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
+ * the sd-event object, so that it is efficient to close everything, before entering the next event loop
+ * iteration. */
+ int fd;
+
+ /* The inotify "watch descriptor" */
+ int wd;
+
+ /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
+ * most recently been set on the watch descriptor. */
+ uint32_t combined_mask;
+
+ /* All event sources subscribed to this inode */
+ LIST_HEAD(sd_event_source, event_sources);
+
+ /* The inotify object we watch this inode with */
+ struct inotify_data *inotify_data;
+
+ /* A linked list of all inode data objects with fds to close (see above) */
+ LIST_FIELDS(struct inode_data, to_close);
+};
+
+/* A structure encapsulating an inotify fd */
+struct inotify_data {
+ WakeupType wakeup;
+
+ /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
+ * a time */
+
+ int fd;
+ int64_t priority;
+
+ Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
+ Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */
+
+ /* The buffer we read inotify events into */
+ union inotify_event_buffer buffer;
+ size_t buffer_filled; /* fill level of the buffer */
+
+ /* How many event sources are currently marked pending for this inotify. We won't read new events off the
+ * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
+ * the events locally if they can't be coalesced). */
+ unsigned n_pending;
+
+ /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
+ * to make it efficient to figure out what inotify objects to process data on next. */
+ LIST_FIELDS(struct inotify_data, buffered);
+};
diff --git a/src/libsystemd/sd-event/event-util.c b/src/libsystemd/sd-event/event-util.c
new file mode 100644
index 0000000..43e73d5
--- /dev/null
+++ b/src/libsystemd/sd-event/event-util.c
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "event-source.h"
+#include "event-util.h"
+#include "log.h"
+#include "string-util.h"
+
+int event_reset_time(
+ sd_event *e,
+ sd_event_source **s,
+ clockid_t clock,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata,
+ int64_t priority,
+ const char *description,
+ bool force_reset) {
+
+ bool created = false;
+ int enabled, r;
+ clockid_t c;
+
+ assert(e);
+ assert(s);
+
+ if (*s) {
+ if (!force_reset) {
+ r = sd_event_source_get_enabled(*s, &enabled);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to query whether event source \"%s\" is enabled or not: %m",
+ strna((*s)->description ?: description));
+
+ if (enabled != SD_EVENT_OFF)
+ return 0;
+ }
+
+ r = sd_event_source_get_time_clock(*s, &c);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to get clock id of event source \"%s\": %m", strna((*s)->description ?: description));
+
+ if (c != clock)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "sd-event: Current clock id %i of event source \"%s\" is different from specified one %i.",
+ (int)c,
+ strna((*s)->description ? : description),
+ (int)clock);
+
+ r = sd_event_source_set_time(*s, usec);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to set time for event source \"%s\": %m", strna((*s)->description ?: description));
+
+ r = sd_event_source_set_time_accuracy(*s, accuracy);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to set accuracy for event source \"%s\": %m", strna((*s)->description ?: description));
+
+ /* callback function is not updated, as we do not have sd_event_source_set_time_callback(). */
+
+ (void) sd_event_source_set_userdata(*s, userdata);
+
+ r = sd_event_source_set_enabled(*s, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to enable event source \"%s\": %m", strna((*s)->description ?: description));
+ } else {
+ r = sd_event_add_time(e, s, clock, usec, accuracy, callback, userdata);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to create timer event \"%s\": %m", strna(description));
+
+ created = true;
+ }
+
+ r = sd_event_source_set_priority(*s, priority);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to set priority for event source \"%s\": %m", strna((*s)->description ?: description));
+
+ if (description) {
+ r = sd_event_source_set_description(*s, description);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to set description for event source \"%s\": %m", description);
+ }
+
+ return created;
+}
+
+int event_source_disable(sd_event_source *s) {
+ if (!s)
+ return 0;
+
+ return sd_event_source_set_enabled(s, SD_EVENT_OFF);
+}
+
+int event_source_is_enabled(sd_event_source *s) {
+ if (!s)
+ return false;
+
+ return sd_event_source_get_enabled(s, NULL);
+}
diff --git a/src/libsystemd/sd-event/event-util.h b/src/libsystemd/sd-event/event-util.h
new file mode 100644
index 0000000..0018095
--- /dev/null
+++ b/src/libsystemd/sd-event/event-util.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-event.h"
+
+int event_reset_time(sd_event *e, sd_event_source **s,
+ clockid_t clock, uint64_t usec, uint64_t accuracy,
+ sd_event_time_handler_t callback, void *userdata,
+ int64_t priority, const char *description, bool force_reset);
+int event_source_disable(sd_event_source *s);
+int event_source_is_enabled(sd_event_source *s);
diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c
new file mode 100644
index 0000000..04ba7e2
--- /dev/null
+++ b/src/libsystemd/sd-event/sd-event.c
@@ -0,0 +1,3541 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/epoll.h>
+#include <sys/timerfd.h>
+#include <sys/wait.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "event-source.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "list.h"
+#include "macro.h"
+#include "missing.h"
+#include "prioq.h"
+#include "process-util.h"
+#include "set.h"
+#include "signal-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "util.h"
+
+#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
+
+static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
+ [SOURCE_IO] = "io",
+ [SOURCE_TIME_REALTIME] = "realtime",
+ [SOURCE_TIME_BOOTTIME] = "bootime",
+ [SOURCE_TIME_MONOTONIC] = "monotonic",
+ [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
+ [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
+ [SOURCE_SIGNAL] = "signal",
+ [SOURCE_CHILD] = "child",
+ [SOURCE_DEFER] = "defer",
+ [SOURCE_POST] = "post",
+ [SOURCE_EXIT] = "exit",
+ [SOURCE_WATCHDOG] = "watchdog",
+ [SOURCE_INOTIFY] = "inotify",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
+
+#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
+
+struct sd_event {
+ unsigned n_ref;
+
+ int epoll_fd;
+ int watchdog_fd;
+
+ Prioq *pending;
+ Prioq *prepare;
+
+ /* timerfd_create() only supports these five clocks so far. We
+ * can add support for more clocks when the kernel learns to
+ * deal with them, too. */
+ struct clock_data realtime;
+ struct clock_data boottime;
+ struct clock_data monotonic;
+ struct clock_data realtime_alarm;
+ struct clock_data boottime_alarm;
+
+ usec_t perturb;
+
+ sd_event_source **signal_sources; /* indexed by signal number */
+ Hashmap *signal_data; /* indexed by priority */
+
+ Hashmap *child_sources;
+ unsigned n_enabled_child_sources;
+
+ Set *post_sources;
+
+ Prioq *exit;
+
+ Hashmap *inotify_data; /* indexed by priority */
+
+ /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
+ LIST_HEAD(struct inode_data, inode_data_to_close);
+
+ /* A list of inotify objects that already have events buffered which aren't processed yet */
+ LIST_HEAD(struct inotify_data, inotify_data_buffered);
+
+ pid_t original_pid;
+
+ uint64_t iteration;
+ triple_timestamp timestamp;
+ int state;
+
+ bool exit_requested:1;
+ bool need_process_child:1;
+ bool watchdog:1;
+ bool profile_delays:1;
+
+ int exit_code;
+
+ pid_t tid;
+ sd_event **default_event_ptr;
+
+ usec_t watchdog_last, watchdog_period;
+
+ unsigned n_sources;
+
+ LIST_HEAD(sd_event_source, sources);
+
+ usec_t last_run, last_log;
+ unsigned delays[sizeof(usec_t) * 8];
+};
+
+static thread_local sd_event *default_event = NULL;
+
+static void source_disconnect(sd_event_source *s);
+static void event_gc_inode_data(sd_event *e, struct inode_data *d);
+
+static sd_event *event_resolve(sd_event *e) {
+ return e == SD_EVENT_DEFAULT ? default_event : e;
+}
+
+static int pending_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+ int r;
+
+ assert(x->pending);
+ assert(y->pending);
+
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+ return -1;
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+ return 1;
+
+ /* Lower priority values first */
+ r = CMP(x->priority, y->priority);
+ if (r != 0)
+ return r;
+
+ /* Older entries first */
+ return CMP(x->pending_iteration, y->pending_iteration);
+}
+
+static int prepare_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+ int r;
+
+ assert(x->prepare);
+ assert(y->prepare);
+
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+ return -1;
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+ return 1;
+
+ /* Move most recently prepared ones last, so that we can stop
+ * preparing as soon as we hit one that has already been
+ * prepared in the current iteration */
+ r = CMP(x->prepare_iteration, y->prepare_iteration);
+ if (r != 0)
+ return r;
+
+ /* Lower priority values first */
+ return CMP(x->priority, y->priority);
+}
+
+static int earliest_time_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+
+ assert(EVENT_SOURCE_IS_TIME(x->type));
+ assert(x->type == y->type);
+
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+ return -1;
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+ return 1;
+
+ /* Move the pending ones to the end */
+ if (!x->pending && y->pending)
+ return -1;
+ if (x->pending && !y->pending)
+ return 1;
+
+ /* Order by time */
+ return CMP(x->time.next, y->time.next);
+}
+
+static usec_t time_event_source_latest(const sd_event_source *s) {
+ return usec_add(s->time.next, s->time.accuracy);
+}
+
+static int latest_time_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+
+ assert(EVENT_SOURCE_IS_TIME(x->type));
+ assert(x->type == y->type);
+
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+ return -1;
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+ return 1;
+
+ /* Move the pending ones to the end */
+ if (!x->pending && y->pending)
+ return -1;
+ if (x->pending && !y->pending)
+ return 1;
+
+ /* Order by time */
+ return CMP(time_event_source_latest(x), time_event_source_latest(y));
+}
+
+static int exit_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+
+ assert(x->type == SOURCE_EXIT);
+ assert(y->type == SOURCE_EXIT);
+
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+ return -1;
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+ return 1;
+
+ /* Lower priority values first */
+ return CMP(x->priority, y->priority);
+}
+
+static void free_clock_data(struct clock_data *d) {
+ assert(d);
+ assert(d->wakeup == WAKEUP_CLOCK_DATA);
+
+ safe_close(d->fd);
+ prioq_free(d->earliest);
+ prioq_free(d->latest);
+}
+
+static sd_event *event_free(sd_event *e) {
+ sd_event_source *s;
+
+ assert(e);
+
+ while ((s = e->sources)) {
+ assert(s->floating);
+ source_disconnect(s);
+ sd_event_source_unref(s);
+ }
+
+ assert(e->n_sources == 0);
+
+ if (e->default_event_ptr)
+ *(e->default_event_ptr) = NULL;
+
+ safe_close(e->epoll_fd);
+ safe_close(e->watchdog_fd);
+
+ free_clock_data(&e->realtime);
+ free_clock_data(&e->boottime);
+ free_clock_data(&e->monotonic);
+ free_clock_data(&e->realtime_alarm);
+ free_clock_data(&e->boottime_alarm);
+
+ prioq_free(e->pending);
+ prioq_free(e->prepare);
+ prioq_free(e->exit);
+
+ free(e->signal_sources);
+ hashmap_free(e->signal_data);
+
+ hashmap_free(e->inotify_data);
+
+ hashmap_free(e->child_sources);
+ set_free(e->post_sources);
+
+ return mfree(e);
+}
+
+_public_ int sd_event_new(sd_event** ret) {
+ sd_event *e;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ e = new(sd_event, 1);
+ if (!e)
+ return -ENOMEM;
+
+ *e = (sd_event) {
+ .n_ref = 1,
+ .epoll_fd = -1,
+ .watchdog_fd = -1,
+ .realtime.wakeup = WAKEUP_CLOCK_DATA,
+ .realtime.fd = -1,
+ .realtime.next = USEC_INFINITY,
+ .boottime.wakeup = WAKEUP_CLOCK_DATA,
+ .boottime.fd = -1,
+ .boottime.next = USEC_INFINITY,
+ .monotonic.wakeup = WAKEUP_CLOCK_DATA,
+ .monotonic.fd = -1,
+ .monotonic.next = USEC_INFINITY,
+ .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
+ .realtime_alarm.fd = -1,
+ .realtime_alarm.next = USEC_INFINITY,
+ .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
+ .boottime_alarm.fd = -1,
+ .boottime_alarm.next = USEC_INFINITY,
+ .perturb = USEC_INFINITY,
+ .original_pid = getpid_cached(),
+ };
+
+ r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
+ if (r < 0)
+ goto fail;
+
+ e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (e->epoll_fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
+
+ if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
+ log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
+ e->profile_delays = true;
+ }
+
+ *ret = e;
+ return 0;
+
+fail:
+ event_free(e);
+ return r;
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
+
+static bool event_pid_changed(sd_event *e) {
+ assert(e);
+
+ /* We don't support people creating an event loop and keeping
+ * it around over a fork(). Let's complain. */
+
+ return e->original_pid != getpid_cached();
+}
+
+static void source_io_unregister(sd_event_source *s) {
+ int r;
+
+ assert(s);
+ assert(s->type == SOURCE_IO);
+
+ if (event_pid_changed(s->event))
+ return;
+
+ if (!s->io.registered)
+ return;
+
+ r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
+ if (r < 0)
+ log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
+ strna(s->description), event_source_type_to_string(s->type));
+
+ s->io.registered = false;
+}
+
+static int source_io_register(
+ sd_event_source *s,
+ int enabled,
+ uint32_t events) {
+
+ struct epoll_event ev;
+ int r;
+
+ assert(s);
+ assert(s->type == SOURCE_IO);
+ assert(enabled != SD_EVENT_OFF);
+
+ ev = (struct epoll_event) {
+ .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
+ .data.ptr = s,
+ };
+
+ if (s->io.registered)
+ r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
+ else
+ r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
+ if (r < 0)
+ return -errno;
+
+ s->io.registered = true;
+
+ return 0;
+}
+
+static clockid_t event_source_type_to_clock(EventSourceType t) {
+
+ switch (t) {
+
+ case SOURCE_TIME_REALTIME:
+ return CLOCK_REALTIME;
+
+ case SOURCE_TIME_BOOTTIME:
+ return CLOCK_BOOTTIME;
+
+ case SOURCE_TIME_MONOTONIC:
+ return CLOCK_MONOTONIC;
+
+ case SOURCE_TIME_REALTIME_ALARM:
+ return CLOCK_REALTIME_ALARM;
+
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ return CLOCK_BOOTTIME_ALARM;
+
+ default:
+ return (clockid_t) -1;
+ }
+}
+
+static EventSourceType clock_to_event_source_type(clockid_t clock) {
+
+ switch (clock) {
+
+ case CLOCK_REALTIME:
+ return SOURCE_TIME_REALTIME;
+
+ case CLOCK_BOOTTIME:
+ return SOURCE_TIME_BOOTTIME;
+
+ case CLOCK_MONOTONIC:
+ return SOURCE_TIME_MONOTONIC;
+
+ case CLOCK_REALTIME_ALARM:
+ return SOURCE_TIME_REALTIME_ALARM;
+
+ case CLOCK_BOOTTIME_ALARM:
+ return SOURCE_TIME_BOOTTIME_ALARM;
+
+ default:
+ return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
+ }
+}
+
+static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
+ assert(e);
+
+ switch (t) {
+
+ case SOURCE_TIME_REALTIME:
+ return &e->realtime;
+
+ case SOURCE_TIME_BOOTTIME:
+ return &e->boottime;
+
+ case SOURCE_TIME_MONOTONIC:
+ return &e->monotonic;
+
+ case SOURCE_TIME_REALTIME_ALARM:
+ return &e->realtime_alarm;
+
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ return &e->boottime_alarm;
+
+ default:
+ return NULL;
+ }
+}
+
+static void event_free_signal_data(sd_event *e, struct signal_data *d) {
+ assert(e);
+
+ if (!d)
+ return;
+
+ hashmap_remove(e->signal_data, &d->priority);
+ safe_close(d->fd);
+ free(d);
+}
+
+static int event_make_signal_data(
+ sd_event *e,
+ int sig,
+ struct signal_data **ret) {
+
+ struct epoll_event ev;
+ struct signal_data *d;
+ bool added = false;
+ sigset_t ss_copy;
+ int64_t priority;
+ int r;
+
+ assert(e);
+
+ if (event_pid_changed(e))
+ return -ECHILD;
+
+ if (e->signal_sources && e->signal_sources[sig])
+ priority = e->signal_sources[sig]->priority;
+ else
+ priority = SD_EVENT_PRIORITY_NORMAL;
+
+ d = hashmap_get(e->signal_data, &priority);
+ if (d) {
+ if (sigismember(&d->sigset, sig) > 0) {
+ if (ret)
+ *ret = d;
+ return 0;
+ }
+ } else {
+ r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = new(struct signal_data, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (struct signal_data) {
+ .wakeup = WAKEUP_SIGNAL_DATA,
+ .fd = -1,
+ .priority = priority,
+ };
+
+ r = hashmap_put(e->signal_data, &d->priority, d);
+ if (r < 0) {
+ free(d);
+ return r;
+ }
+
+ added = true;
+ }
+
+ ss_copy = d->sigset;
+ assert_se(sigaddset(&ss_copy, sig) >= 0);
+
+ r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ d->sigset = ss_copy;
+
+ if (d->fd >= 0) {
+ if (ret)
+ *ret = d;
+ return 0;
+ }
+
+ d->fd = fd_move_above_stdio(r);
+
+ ev = (struct epoll_event) {
+ .events = EPOLLIN,
+ .data.ptr = d,
+ };
+
+ r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (ret)
+ *ret = d;
+
+ return 0;
+
+fail:
+ if (added)
+ event_free_signal_data(e, d);
+
+ return r;
+}
+
+static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
+ assert(e);
+ assert(d);
+
+ /* Turns off the specified signal in the signal data
+ * object. If the signal mask of the object becomes empty that
+ * way removes it. */
+
+ if (sigismember(&d->sigset, sig) == 0)
+ return;
+
+ assert_se(sigdelset(&d->sigset, sig) >= 0);
+
+ if (sigisemptyset(&d->sigset)) {
+ /* If all the mask is all-zero we can get rid of the structure */
+ event_free_signal_data(e, d);
+ return;
+ }
+
+ assert(d->fd >= 0);
+
+ if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
+ log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
+}
+
+static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
+ struct signal_data *d;
+ static const int64_t zero_priority = 0;
+
+ assert(e);
+
+ /* Rechecks if the specified signal is still something we are
+ * interested in. If not, we'll unmask it, and possibly drop
+ * the signalfd for it. */
+
+ if (sig == SIGCHLD &&
+ e->n_enabled_child_sources > 0)
+ return;
+
+ if (e->signal_sources &&
+ e->signal_sources[sig] &&
+ e->signal_sources[sig]->enabled != SD_EVENT_OFF)
+ return;
+
+ /*
+ * The specified signal might be enabled in three different queues:
+ *
+ * 1) the one that belongs to the priority passed (if it is non-NULL)
+ * 2) the one that belongs to the priority of the event source of the signal (if there is one)
+ * 3) the 0 priority (to cover the SIGCHLD case)
+ *
+ * Hence, let's remove it from all three here.
+ */
+
+ if (priority) {
+ d = hashmap_get(e->signal_data, priority);
+ if (d)
+ event_unmask_signal_data(e, d, sig);
+ }
+
+ if (e->signal_sources && e->signal_sources[sig]) {
+ d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
+ if (d)
+ event_unmask_signal_data(e, d, sig);
+ }
+
+ d = hashmap_get(e->signal_data, &zero_priority);
+ if (d)
+ event_unmask_signal_data(e, d, sig);
+}
+
+static void source_disconnect(sd_event_source *s) {
+ sd_event *event;
+
+ assert(s);
+
+ if (!s->event)
+ return;
+
+ assert(s->event->n_sources > 0);
+
+ switch (s->type) {
+
+ case SOURCE_IO:
+ if (s->io.fd >= 0)
+ source_io_unregister(s);
+
+ break;
+
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM: {
+ struct clock_data *d;
+
+ d = event_get_clock_data(s->event, s->type);
+ assert(d);
+
+ prioq_remove(d->earliest, s, &s->time.earliest_index);
+ prioq_remove(d->latest, s, &s->time.latest_index);
+ d->needs_rearm = true;
+ break;
+ }
+
+ case SOURCE_SIGNAL:
+ if (s->signal.sig > 0) {
+
+ if (s->event->signal_sources)
+ s->event->signal_sources[s->signal.sig] = NULL;
+
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
+ }
+
+ break;
+
+ case SOURCE_CHILD:
+ if (s->child.pid > 0) {
+ if (s->enabled != SD_EVENT_OFF) {
+ assert(s->event->n_enabled_child_sources > 0);
+ s->event->n_enabled_child_sources--;
+ }
+
+ (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
+ }
+
+ break;
+
+ case SOURCE_DEFER:
+ /* nothing */
+ break;
+
+ case SOURCE_POST:
+ set_remove(s->event->post_sources, s);
+ break;
+
+ case SOURCE_EXIT:
+ prioq_remove(s->event->exit, s, &s->exit.prioq_index);
+ break;
+
+ case SOURCE_INOTIFY: {
+ struct inode_data *inode_data;
+
+ inode_data = s->inotify.inode_data;
+ if (inode_data) {
+ struct inotify_data *inotify_data;
+ assert_se(inotify_data = inode_data->inotify_data);
+
+ /* Detach this event source from the inode object */
+ LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
+ s->inotify.inode_data = NULL;
+
+ if (s->pending) {
+ assert(inotify_data->n_pending > 0);
+ inotify_data->n_pending--;
+ }
+
+ /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
+ * continued to being watched. That's because inotify doesn't really have an API for that: we
+ * can only change watch masks with access to the original inode either by fd or by path. But
+ * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
+ * continuously and keeping the mount busy which we can't really do. We could reconstruct the
+ * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
+ * there), but given the need for open_by_handle_at() which is privileged and not universally
+ * available this would be quite an incomplete solution. Hence we go the other way, leave the
+ * mask set, even if it is not minimized now, and ignore all events we aren't interested in
+ * anymore after reception. Yes, this sucks, but … Linux … */
+
+ /* Maybe release the inode data (and its inotify) */
+ event_gc_inode_data(s->event, inode_data);
+ }
+
+ break;
+ }
+
+ default:
+ assert_not_reached("Wut? I shouldn't exist.");
+ }
+
+ if (s->pending)
+ prioq_remove(s->event->pending, s, &s->pending_index);
+
+ if (s->prepare)
+ prioq_remove(s->event->prepare, s, &s->prepare_index);
+
+ event = s->event;
+
+ s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
+ s->event = NULL;
+ LIST_REMOVE(sources, event->sources, s);
+ event->n_sources--;
+
+ if (!s->floating)
+ sd_event_unref(event);
+}
+
+static void source_free(sd_event_source *s) {
+ assert(s);
+
+ source_disconnect(s);
+
+ if (s->type == SOURCE_IO && s->io.owned)
+ s->io.fd = safe_close(s->io.fd);
+
+ if (s->destroy_callback)
+ s->destroy_callback(s->userdata);
+
+ free(s->description);
+ free(s);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
+
+static int source_set_pending(sd_event_source *s, bool b) {
+ int r;
+
+ assert(s);
+ assert(s->type != SOURCE_EXIT);
+
+ if (s->pending == b)
+ return 0;
+
+ s->pending = b;
+
+ if (b) {
+ s->pending_iteration = s->event->iteration;
+
+ r = prioq_put(s->event->pending, s, &s->pending_index);
+ if (r < 0) {
+ s->pending = false;
+ return r;
+ }
+ } else
+ assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
+
+ if (EVENT_SOURCE_IS_TIME(s->type)) {
+ struct clock_data *d;
+
+ d = event_get_clock_data(s->event, s->type);
+ assert(d);
+
+ prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(d->latest, s, &s->time.latest_index);
+ d->needs_rearm = true;
+ }
+
+ if (s->type == SOURCE_SIGNAL && !b) {
+ struct signal_data *d;
+
+ d = hashmap_get(s->event->signal_data, &s->priority);
+ if (d && d->current == s)
+ d->current = NULL;
+ }
+
+ if (s->type == SOURCE_INOTIFY) {
+
+ assert(s->inotify.inode_data);
+ assert(s->inotify.inode_data->inotify_data);
+
+ if (b)
+ s->inotify.inode_data->inotify_data->n_pending ++;
+ else {
+ assert(s->inotify.inode_data->inotify_data->n_pending > 0);
+ s->inotify.inode_data->inotify_data->n_pending --;
+ }
+ }
+
+ return 0;
+}
+
+static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
+ sd_event_source *s;
+
+ assert(e);
+
+ s = new(sd_event_source, 1);
+ if (!s)
+ return NULL;
+
+ *s = (struct sd_event_source) {
+ .n_ref = 1,
+ .event = e,
+ .floating = floating,
+ .type = type,
+ .pending_index = PRIOQ_IDX_NULL,
+ .prepare_index = PRIOQ_IDX_NULL,
+ };
+
+ if (!floating)
+ sd_event_ref(e);
+
+ LIST_PREPEND(sources, e->sources, s);
+ e->n_sources++;
+
+ return s;
+}
+
+_public_ int sd_event_add_io(
+ sd_event *e,
+ sd_event_source **ret,
+ int fd,
+ uint32_t events,
+ sd_event_io_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(fd >= 0, -EBADF);
+ assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ s = source_new(e, !ret, SOURCE_IO);
+ if (!s)
+ return -ENOMEM;
+
+ s->wakeup = WAKEUP_EVENT_SOURCE;
+ s->io.fd = fd;
+ s->io.events = events;
+ s->io.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ON;
+
+ r = source_io_register(s, s->enabled, events);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+static void initialize_perturb(sd_event *e) {
+ sd_id128_t bootid = {};
+
+ /* When we sleep for longer, we try to realign the wakeup to
+ the same time within each minute/second/250ms, so that
+ events all across the system can be coalesced into a single
+ CPU wakeup. However, let's take some system-specific
+ randomness for this value, so that in a network of systems
+ with synced clocks timer events are distributed a
+ bit. Here, we calculate a perturbation usec offset from the
+ boot ID. */
+
+ if (_likely_(e->perturb != USEC_INFINITY))
+ return;
+
+ if (sd_id128_get_boot(&bootid) >= 0)
+ e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
+}
+
+static int event_setup_timer_fd(
+ sd_event *e,
+ struct clock_data *d,
+ clockid_t clock) {
+
+ struct epoll_event ev;
+ int r, fd;
+
+ assert(e);
+ assert(d);
+
+ if (_likely_(d->fd >= 0))
+ return 0;
+
+ fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ fd = fd_move_above_stdio(fd);
+
+ ev = (struct epoll_event) {
+ .events = EPOLLIN,
+ .data.ptr = d,
+ };
+
+ r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
+ if (r < 0) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ d->fd = fd;
+ return 0;
+}
+
+static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+_public_ int sd_event_add_time(
+ sd_event *e,
+ sd_event_source **ret,
+ clockid_t clock,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata) {
+
+ EventSourceType type;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ struct clock_data *d;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(accuracy != (uint64_t) -1, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
+ return -EOPNOTSUPP;
+
+ type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
+ if (type < 0)
+ return -EOPNOTSUPP;
+
+ if (!callback)
+ callback = time_exit_callback;
+
+ d = event_get_clock_data(e, type);
+ assert(d);
+
+ r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
+ if (r < 0)
+ return r;
+
+ r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
+ if (r < 0)
+ return r;
+
+ if (d->fd < 0) {
+ r = event_setup_timer_fd(e, d, clock);
+ if (r < 0)
+ return r;
+ }
+
+ s = source_new(e, !ret, type);
+ if (!s)
+ return -ENOMEM;
+
+ s->time.next = usec;
+ s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
+ s->time.callback = callback;
+ s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ d->needs_rearm = true;
+
+ r = prioq_put(d->earliest, s, &s->time.earliest_index);
+ if (r < 0)
+ return r;
+
+ r = prioq_put(d->latest, s, &s->time.latest_index);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+_public_ int sd_event_add_signal(
+ sd_event *e,
+ sd_event_source **ret,
+ int sig,
+ sd_event_signal_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ struct signal_data *d;
+ sigset_t ss;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(SIGNAL_VALID(sig), -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = signal_exit_callback;
+
+ r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
+ if (r != 0)
+ return -r;
+
+ if (!sigismember(&ss, sig))
+ return -EBUSY;
+
+ if (!e->signal_sources) {
+ e->signal_sources = new0(sd_event_source*, _NSIG);
+ if (!e->signal_sources)
+ return -ENOMEM;
+ } else if (e->signal_sources[sig])
+ return -EBUSY;
+
+ s = source_new(e, !ret, SOURCE_SIGNAL);
+ if (!s)
+ return -ENOMEM;
+
+ s->signal.sig = sig;
+ s->signal.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ON;
+
+ e->signal_sources[sig] = s;
+
+ r = event_make_signal_data(e, sig, &d);
+ if (r < 0)
+ return r;
+
+ /* Use the signal name as description for the event source by default */
+ (void) sd_event_source_set_description(s, signal_to_string(sig));
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_event_add_child(
+ sd_event *e,
+ sd_event_source **ret,
+ pid_t pid,
+ int options,
+ sd_event_child_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(pid > 1, -EINVAL);
+ assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
+ assert_return(options != 0, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ r = hashmap_ensure_allocated(&e->child_sources, NULL);
+ if (r < 0)
+ return r;
+
+ if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
+ return -EBUSY;
+
+ s = source_new(e, !ret, SOURCE_CHILD);
+ if (!s)
+ return -ENOMEM;
+
+ s->child.pid = pid;
+ s->child.options = options;
+ s->child.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
+ if (r < 0)
+ return r;
+
+ e->n_enabled_child_sources++;
+
+ r = event_make_signal_data(e, SIGCHLD, NULL);
+ if (r < 0) {
+ e->n_enabled_child_sources--;
+ return r;
+ }
+
+ e->need_process_child = true;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_event_add_defer(
+ sd_event *e,
+ sd_event_source **ret,
+ sd_event_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(callback, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ s = source_new(e, !ret, SOURCE_DEFER);
+ if (!s)
+ return -ENOMEM;
+
+ s->defer.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_event_add_post(
+ sd_event *e,
+ sd_event_source **ret,
+ sd_event_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(callback, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ r = set_ensure_allocated(&e->post_sources, NULL);
+ if (r < 0)
+ return r;
+
+ s = source_new(e, !ret, SOURCE_POST);
+ if (!s)
+ return -ENOMEM;
+
+ s->post.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ON;
+
+ r = set_put(e->post_sources, s);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_event_add_exit(
+ sd_event *e,
+ sd_event_source **ret,
+ sd_event_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(callback, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
+ if (r < 0)
+ return r;
+
+ s = source_new(e, !ret, SOURCE_EXIT);
+ if (!s)
+ return -ENOMEM;
+
+ s->exit.callback = callback;
+ s->userdata = userdata;
+ s->exit.prioq_index = PRIOQ_IDX_NULL;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
+ assert(e);
+
+ if (!d)
+ return;
+
+ assert(hashmap_isempty(d->inodes));
+ assert(hashmap_isempty(d->wd));
+
+ if (d->buffer_filled > 0)
+ LIST_REMOVE(buffered, e->inotify_data_buffered, d);
+
+ hashmap_free(d->inodes);
+ hashmap_free(d->wd);
+
+ assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
+
+ if (d->fd >= 0) {
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
+ log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
+
+ safe_close(d->fd);
+ }
+ free(d);
+}
+
+static int event_make_inotify_data(
+ sd_event *e,
+ int64_t priority,
+ struct inotify_data **ret) {
+
+ _cleanup_close_ int fd = -1;
+ struct inotify_data *d;
+ struct epoll_event ev;
+ int r;
+
+ assert(e);
+
+ d = hashmap_get(e->inotify_data, &priority);
+ if (d) {
+ if (ret)
+ *ret = d;
+ return 0;
+ }
+
+ fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ fd = fd_move_above_stdio(fd);
+
+ r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = new(struct inotify_data, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (struct inotify_data) {
+ .wakeup = WAKEUP_INOTIFY_DATA,
+ .fd = TAKE_FD(fd),
+ .priority = priority,
+ };
+
+ r = hashmap_put(e->inotify_data, &d->priority, d);
+ if (r < 0) {
+ d->fd = safe_close(d->fd);
+ free(d);
+ return r;
+ }
+
+ ev = (struct epoll_event) {
+ .events = EPOLLIN,
+ .data.ptr = d,
+ };
+
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
+ r = -errno;
+ d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
+ * remove the fd from the epoll first, which we don't want as we couldn't
+ * add it in the first place. */
+ event_free_inotify_data(e, d);
+ return r;
+ }
+
+ if (ret)
+ *ret = d;
+
+ return 1;
+}
+
+static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
+ int r;
+
+ assert(x);
+ assert(y);
+
+ r = CMP(x->dev, y->dev);
+ if (r != 0)
+ return r;
+
+ return CMP(x->ino, y->ino);
+}
+
+static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
+ assert(d);
+
+ siphash24_compress(&d->dev, sizeof(d->dev), state);
+ siphash24_compress(&d->ino, sizeof(d->ino), state);
+}
+
+DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
+
+static void event_free_inode_data(
+ sd_event *e,
+ struct inode_data *d) {
+
+ assert(e);
+
+ if (!d)
+ return;
+
+ assert(!d->event_sources);
+
+ if (d->fd >= 0) {
+ LIST_REMOVE(to_close, e->inode_data_to_close, d);
+ safe_close(d->fd);
+ }
+
+ if (d->inotify_data) {
+
+ if (d->wd >= 0) {
+ if (d->inotify_data->fd >= 0) {
+ /* So here's a problem. At the time this runs the watch descriptor might already be
+ * invalidated, because an IN_IGNORED event might be queued right the moment we enter
+ * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
+ * likely case to happen. */
+
+ if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
+ log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
+ }
+
+ assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
+ }
+
+ assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
+ }
+
+ free(d);
+}
+
+static void event_gc_inode_data(
+ sd_event *e,
+ struct inode_data *d) {
+
+ struct inotify_data *inotify_data;
+
+ assert(e);
+
+ if (!d)
+ return;
+
+ if (d->event_sources)
+ return;
+
+ inotify_data = d->inotify_data;
+ event_free_inode_data(e, d);
+
+ if (inotify_data && hashmap_isempty(inotify_data->inodes))
+ event_free_inotify_data(e, inotify_data);
+}
+
+static int event_make_inode_data(
+ sd_event *e,
+ struct inotify_data *inotify_data,
+ dev_t dev,
+ ino_t ino,
+ struct inode_data **ret) {
+
+ struct inode_data *d, key;
+ int r;
+
+ assert(e);
+ assert(inotify_data);
+
+ key = (struct inode_data) {
+ .ino = ino,
+ .dev = dev,
+ };
+
+ d = hashmap_get(inotify_data->inodes, &key);
+ if (d) {
+ if (ret)
+ *ret = d;
+
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = new(struct inode_data, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (struct inode_data) {
+ .dev = dev,
+ .ino = ino,
+ .wd = -1,
+ .fd = -1,
+ .inotify_data = inotify_data,
+ };
+
+ r = hashmap_put(inotify_data->inodes, d, d);
+ if (r < 0) {
+ free(d);
+ return r;
+ }
+
+ if (ret)
+ *ret = d;
+
+ return 1;
+}
+
+static uint32_t inode_data_determine_mask(struct inode_data *d) {
+ bool excl_unlink = true;
+ uint32_t combined = 0;
+ sd_event_source *s;
+
+ assert(d);
+
+ /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
+ * the IN_EXCL_UNLINK flag is ANDed instead.
+ *
+ * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
+ * because we cannot change the mask anymore after the event source was created once, since the kernel has no
+ * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
+ * events we don't care for client-side. */
+
+ LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
+
+ if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
+ excl_unlink = false;
+
+ combined |= s->inotify.mask;
+ }
+
+ return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
+}
+
+static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
+ uint32_t combined_mask;
+ int wd, r;
+
+ assert(d);
+ assert(d->fd >= 0);
+
+ combined_mask = inode_data_determine_mask(d);
+
+ if (d->wd >= 0 && combined_mask == d->combined_mask)
+ return 0;
+
+ r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
+ if (r < 0)
+ return r;
+
+ wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
+ if (wd < 0)
+ return -errno;
+
+ if (d->wd < 0) {
+ r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
+ if (r < 0) {
+ (void) inotify_rm_watch(d->inotify_data->fd, wd);
+ return r;
+ }
+
+ d->wd = wd;
+
+ } else if (d->wd != wd) {
+
+ log_debug("Weird, the watch descriptor we already knew for this inode changed?");
+ (void) inotify_rm_watch(d->fd, wd);
+ return -EINVAL;
+ }
+
+ d->combined_mask = combined_mask;
+ return 1;
+}
+
+_public_ int sd_event_add_inotify(
+ sd_event *e,
+ sd_event_source **ret,
+ const char *path,
+ uint32_t mask,
+ sd_event_inotify_handler_t callback,
+ void *userdata) {
+
+ struct inotify_data *inotify_data = NULL;
+ struct inode_data *inode_data = NULL;
+ _cleanup_close_ int fd = -1;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ struct stat st;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(path, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
+ * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
+ * the user can't use them for us. */
+ if (mask & IN_MASK_ADD)
+ return -EINVAL;
+
+ fd = open(path, O_PATH|O_CLOEXEC|
+ (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
+ (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ s = source_new(e, !ret, SOURCE_INOTIFY);
+ if (!s)
+ return -ENOMEM;
+
+ s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
+ s->inotify.mask = mask;
+ s->inotify.callback = callback;
+ s->userdata = userdata;
+
+ /* Allocate an inotify object for this priority, and an inode object within it */
+ r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
+ if (r < 0)
+ return r;
+
+ r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
+ if (r < 0) {
+ event_free_inotify_data(e, inotify_data);
+ return r;
+ }
+
+ /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
+ * the event source, until then, for which we need the original inode. */
+ if (inode_data->fd < 0) {
+ inode_data->fd = TAKE_FD(fd);
+ LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
+ }
+
+ /* Link our event source to the inode data object */
+ LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
+ s->inotify.inode_data = inode_data;
+
+ /* Actually realize the watch now */
+ r = inode_data_realize_watch(e, inode_data);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s, path);
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+static sd_event_source* event_source_free(sd_event_source *s) {
+ if (!s)
+ return NULL;
+
+ /* Here's a special hack: when we are called from a
+ * dispatch handler we won't free the event source
+ * immediately, but we will detach the fd from the
+ * epoll. This way it is safe for the caller to unref
+ * the event source and immediately close the fd, but
+ * we still retain a valid event source object after
+ * the callback. */
+
+ if (s->dispatching) {
+ if (s->type == SOURCE_IO)
+ source_io_unregister(s);
+
+ source_disconnect(s);
+ } else
+ source_free(s);
+
+ return NULL;
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
+
+_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ return free_and_strdup(&s->description, description);
+}
+
+_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
+ assert_return(s, -EINVAL);
+ assert_return(description, -EINVAL);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (!s->description)
+ return -ENXIO;
+
+ *description = s->description;
+ return 0;
+}
+
+_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
+ assert_return(s, NULL);
+
+ return s->event;
+}
+
+_public_ int sd_event_source_get_pending(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ return s->pending;
+}
+
+_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ return s->io.fd;
+}
+
+_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (s->io.fd == fd)
+ return 0;
+
+ if (s->enabled == SD_EVENT_OFF) {
+ s->io.fd = fd;
+ s->io.registered = false;
+ } else {
+ int saved_fd;
+
+ saved_fd = s->io.fd;
+ assert(s->io.registered);
+
+ s->io.fd = fd;
+ s->io.registered = false;
+
+ r = source_io_register(s, s->enabled, s->io.events);
+ if (r < 0) {
+ s->io.fd = saved_fd;
+ s->io.registered = true;
+ return r;
+ }
+
+ epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
+ }
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+
+ return s->io.owned;
+}
+
+_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+
+ s->io.owned = own;
+ return 0;
+}
+
+_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
+ assert_return(s, -EINVAL);
+ assert_return(events, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *events = s->io.events;
+ return 0;
+}
+
+_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ /* edge-triggered updates are never skipped, so we can reset edges */
+ if (s->io.events == events && !(events & EPOLLET))
+ return 0;
+
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+
+ if (s->enabled != SD_EVENT_OFF) {
+ r = source_io_register(s, s->enabled, events);
+ if (r < 0)
+ return r;
+ }
+
+ s->io.events = events;
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
+ assert_return(s, -EINVAL);
+ assert_return(revents, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(s->pending, -ENODATA);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *revents = s->io.revents;
+ return 0;
+}
+
+_public_ int sd_event_source_get_signal(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_SIGNAL, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ return s->signal.sig;
+}
+
+_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *priority = s->priority;
+ return 0;
+}
+
+_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
+ bool rm_inotify = false, rm_inode = false;
+ struct inotify_data *new_inotify_data = NULL;
+ struct inode_data *new_inode_data = NULL;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (s->priority == priority)
+ return 0;
+
+ if (s->type == SOURCE_INOTIFY) {
+ struct inode_data *old_inode_data;
+
+ assert(s->inotify.inode_data);
+ old_inode_data = s->inotify.inode_data;
+
+ /* We need the original fd to change the priority. If we don't have it we can't change the priority,
+ * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
+ * events we allow priority changes only until the first following iteration. */
+ if (old_inode_data->fd < 0)
+ return -EOPNOTSUPP;
+
+ r = event_make_inotify_data(s->event, priority, &new_inotify_data);
+ if (r < 0)
+ return r;
+ rm_inotify = r > 0;
+
+ r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
+ if (r < 0)
+ goto fail;
+ rm_inode = r > 0;
+
+ if (new_inode_data->fd < 0) {
+ /* Duplicate the fd for the new inode object if we don't have any yet */
+ new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
+ if (new_inode_data->fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
+ }
+
+ /* Move the event source to the new inode data structure */
+ LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
+ LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
+ s->inotify.inode_data = new_inode_data;
+
+ /* Now create the new watch */
+ r = inode_data_realize_watch(s->event, new_inode_data);
+ if (r < 0) {
+ /* Move it back */
+ LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
+ LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
+ s->inotify.inode_data = old_inode_data;
+ goto fail;
+ }
+
+ s->priority = priority;
+
+ event_gc_inode_data(s->event, old_inode_data);
+
+ } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
+ struct signal_data *old, *d;
+
+ /* Move us from the signalfd belonging to the old
+ * priority to the signalfd of the new priority */
+
+ assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
+
+ s->priority = priority;
+
+ r = event_make_signal_data(s->event, s->signal.sig, &d);
+ if (r < 0) {
+ s->priority = old->priority;
+ return r;
+ }
+
+ event_unmask_signal_data(s->event, old, s->signal.sig);
+ } else
+ s->priority = priority;
+
+ if (s->pending)
+ prioq_reshuffle(s->event->pending, s, &s->pending_index);
+
+ if (s->prepare)
+ prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
+
+ if (s->type == SOURCE_EXIT)
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
+
+ return 0;
+
+fail:
+ if (rm_inode)
+ event_free_inode_data(s->event, new_inode_data);
+
+ if (rm_inotify)
+ event_free_inotify_data(s->event, new_inotify_data);
+
+ return r;
+}
+
+_public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (m)
+ *m = s->enabled;
+ return s->enabled != SD_EVENT_OFF;
+}
+
+_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ /* If we are dead anyway, we are fine with turning off
+ * sources, but everything else needs to fail. */
+ if (s->event->state == SD_EVENT_FINISHED)
+ return m == SD_EVENT_OFF ? 0 : -ESTALE;
+
+ if (s->enabled == m)
+ return 0;
+
+ if (m == SD_EVENT_OFF) {
+
+ /* Unset the pending flag when this event source is disabled */
+ if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+ }
+
+ switch (s->type) {
+
+ case SOURCE_IO:
+ source_io_unregister(s);
+ s->enabled = m;
+ break;
+
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM: {
+ struct clock_data *d;
+
+ s->enabled = m;
+ d = event_get_clock_data(s->event, s->type);
+ assert(d);
+
+ prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(d->latest, s, &s->time.latest_index);
+ d->needs_rearm = true;
+ break;
+ }
+
+ case SOURCE_SIGNAL:
+ s->enabled = m;
+
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
+ break;
+
+ case SOURCE_CHILD:
+ s->enabled = m;
+
+ assert(s->event->n_enabled_child_sources > 0);
+ s->event->n_enabled_child_sources--;
+
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
+ break;
+
+ case SOURCE_EXIT:
+ s->enabled = m;
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
+ break;
+
+ case SOURCE_DEFER:
+ case SOURCE_POST:
+ case SOURCE_INOTIFY:
+ s->enabled = m;
+ break;
+
+ default:
+ assert_not_reached("Wut? I shouldn't exist.");
+ }
+
+ } else {
+
+ /* Unset the pending flag when this event source is enabled */
+ if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+ }
+
+ switch (s->type) {
+
+ case SOURCE_IO:
+ r = source_io_register(s, m, s->io.events);
+ if (r < 0)
+ return r;
+
+ s->enabled = m;
+ break;
+
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM: {
+ struct clock_data *d;
+
+ s->enabled = m;
+ d = event_get_clock_data(s->event, s->type);
+ assert(d);
+
+ prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(d->latest, s, &s->time.latest_index);
+ d->needs_rearm = true;
+ break;
+ }
+
+ case SOURCE_SIGNAL:
+
+ s->enabled = m;
+
+ r = event_make_signal_data(s->event, s->signal.sig, NULL);
+ if (r < 0) {
+ s->enabled = SD_EVENT_OFF;
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
+ return r;
+ }
+
+ break;
+
+ case SOURCE_CHILD:
+
+ if (s->enabled == SD_EVENT_OFF)
+ s->event->n_enabled_child_sources++;
+
+ s->enabled = m;
+
+ r = event_make_signal_data(s->event, SIGCHLD, NULL);
+ if (r < 0) {
+ s->enabled = SD_EVENT_OFF;
+ s->event->n_enabled_child_sources--;
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
+ return r;
+ }
+
+ break;
+
+ case SOURCE_EXIT:
+ s->enabled = m;
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
+ break;
+
+ case SOURCE_DEFER:
+ case SOURCE_POST:
+ case SOURCE_INOTIFY:
+ s->enabled = m;
+ break;
+
+ default:
+ assert_not_reached("Wut? I shouldn't exist.");
+ }
+ }
+
+ if (s->pending)
+ prioq_reshuffle(s->event->pending, s, &s->pending_index);
+
+ if (s->prepare)
+ prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
+ assert_return(s, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *usec = s->time.next;
+ return 0;
+}
+
+_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
+ struct clock_data *d;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+
+ s->time.next = usec;
+
+ d = event_get_clock_data(s->event, s->type);
+ assert(d);
+
+ prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(d->latest, s, &s->time.latest_index);
+ d->needs_rearm = true;
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
+ assert_return(s, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *usec = s->time.accuracy;
+ return 0;
+}
+
+_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
+ struct clock_data *d;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(usec != (uint64_t) -1, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+
+ if (usec == 0)
+ usec = DEFAULT_ACCURACY_USEC;
+
+ s->time.accuracy = usec;
+
+ d = event_get_clock_data(s->event, s->type);
+ assert(d);
+
+ prioq_reshuffle(d->latest, s, &s->time.latest_index);
+ d->needs_rearm = true;
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
+ assert_return(s, -EINVAL);
+ assert_return(clock, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *clock = event_source_type_to_clock(s->type);
+ return 0;
+}
+
+_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
+ assert_return(s, -EINVAL);
+ assert_return(pid, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *pid = s->child.pid;
+ return 0;
+}
+
+_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
+ assert_return(s, -EINVAL);
+ assert_return(mask, -EINVAL);
+ assert_return(s->type == SOURCE_INOTIFY, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *mask = s->inotify.mask;
+ return 0;
+}
+
+_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (s->prepare == callback)
+ return 0;
+
+ if (callback && s->prepare) {
+ s->prepare = callback;
+ return 0;
+ }
+
+ r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
+ if (r < 0)
+ return r;
+
+ s->prepare = callback;
+
+ if (callback) {
+ r = prioq_put(s->event->prepare, s, &s->prepare_index);
+ if (r < 0)
+ return r;
+ } else
+ prioq_remove(s->event->prepare, s, &s->prepare_index);
+
+ return 0;
+}
+
+_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
+ assert_return(s, NULL);
+
+ return s->userdata;
+}
+
+_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
+ void *ret;
+
+ assert_return(s, NULL);
+
+ ret = s->userdata;
+ s->userdata = userdata;
+
+ return ret;
+}
+
+static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
+ usec_t c;
+ assert(e);
+ assert(a <= b);
+
+ if (a <= 0)
+ return 0;
+ if (a >= USEC_INFINITY)
+ return USEC_INFINITY;
+
+ if (b <= a + 1)
+ return a;
+
+ initialize_perturb(e);
+
+ /*
+ Find a good time to wake up again between times a and b. We
+ have two goals here:
+
+ a) We want to wake up as seldom as possible, hence prefer
+ later times over earlier times.
+
+ b) But if we have to wake up, then let's make sure to
+ dispatch as much as possible on the entire system.
+
+ We implement this by waking up everywhere at the same time
+ within any given minute if we can, synchronised via the
+ perturbation value determined from the boot ID. If we can't,
+ then we try to find the same spot in every 10s, then 1s and
+ then 250ms step. Otherwise, we pick the last possible time
+ to wake up.
+ */
+
+ c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_MINUTE))
+ return b;
+
+ c -= USEC_PER_MINUTE;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_SEC*10))
+ return b;
+
+ c -= USEC_PER_SEC*10;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_SEC))
+ return b;
+
+ c -= USEC_PER_SEC;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_MSEC*250))
+ return b;
+
+ c -= USEC_PER_MSEC*250;
+ }
+
+ if (c >= a)
+ return c;
+
+ return b;
+}
+
+static int event_arm_timer(
+ sd_event *e,
+ struct clock_data *d) {
+
+ struct itimerspec its = {};
+ sd_event_source *a, *b;
+ usec_t t;
+ int r;
+
+ assert(e);
+ assert(d);
+
+ if (!d->needs_rearm)
+ return 0;
+ else
+ d->needs_rearm = false;
+
+ a = prioq_peek(d->earliest);
+ if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
+
+ if (d->fd < 0)
+ return 0;
+
+ if (d->next == USEC_INFINITY)
+ return 0;
+
+ /* disarm */
+ r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
+ if (r < 0)
+ return r;
+
+ d->next = USEC_INFINITY;
+ return 0;
+ }
+
+ b = prioq_peek(d->latest);
+ assert_se(b && b->enabled != SD_EVENT_OFF);
+
+ t = sleep_between(e, a->time.next, time_event_source_latest(b));
+ if (d->next == t)
+ return 0;
+
+ assert_se(d->fd >= 0);
+
+ if (t == 0) {
+ /* We don' want to disarm here, just mean some time looooong ago. */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 1;
+ } else
+ timespec_store(&its.it_value, t);
+
+ r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
+ if (r < 0)
+ return -errno;
+
+ d->next = t;
+ return 0;
+}
+
+static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
+ assert(e);
+ assert(s);
+ assert(s->type == SOURCE_IO);
+
+ /* If the event source was already pending, we just OR in the
+ * new revents, otherwise we reset the value. The ORing is
+ * necessary to handle EPOLLONESHOT events properly where
+ * readability might happen independently of writability, and
+ * we need to keep track of both */
+
+ if (s->pending)
+ s->io.revents |= revents;
+ else
+ s->io.revents = revents;
+
+ return source_set_pending(s, true);
+}
+
+static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
+ uint64_t x;
+ ssize_t ss;
+
+ assert(e);
+ assert(fd >= 0);
+
+ assert_return(events == EPOLLIN, -EIO);
+
+ ss = read(fd, &x, sizeof(x));
+ if (ss < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return -errno;
+ }
+
+ if (_unlikely_(ss != sizeof(x)))
+ return -EIO;
+
+ if (next)
+ *next = USEC_INFINITY;
+
+ return 0;
+}
+
+static int process_timer(
+ sd_event *e,
+ usec_t n,
+ struct clock_data *d) {
+
+ sd_event_source *s;
+ int r;
+
+ assert(e);
+ assert(d);
+
+ for (;;) {
+ s = prioq_peek(d->earliest);
+ if (!s ||
+ s->time.next > n ||
+ s->enabled == SD_EVENT_OFF ||
+ s->pending)
+ break;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+
+ prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(d->latest, s, &s->time.latest_index);
+ d->needs_rearm = true;
+ }
+
+ return 0;
+}
+
+static int process_child(sd_event *e) {
+ sd_event_source *s;
+ Iterator i;
+ int r;
+
+ assert(e);
+
+ e->need_process_child = false;
+
+ /*
+ So, this is ugly. We iteratively invoke waitid() with P_PID
+ + WNOHANG for each PID we wait for, instead of using
+ P_ALL. This is because we only want to get child
+ information of very specific child processes, and not all
+ of them. We might not have processed the SIGCHLD even of a
+ previous invocation and we don't want to maintain a
+ unbounded *per-child* event queue, hence we really don't
+ want anything flushed out of the kernel's queue that we
+ don't care about. Since this is O(n) this means that if you
+ have a lot of processes you probably want to handle SIGCHLD
+ yourself.
+
+ We do not reap the children here (by using WNOWAIT), this
+ is only done after the event source is dispatched so that
+ the callback still sees the process as a zombie.
+ */
+
+ HASHMAP_FOREACH(s, e->child_sources, i) {
+ assert(s->type == SOURCE_CHILD);
+
+ if (s->pending)
+ continue;
+
+ if (s->enabled == SD_EVENT_OFF)
+ continue;
+
+ zero(s->child.siginfo);
+ r = waitid(P_PID, s->child.pid, &s->child.siginfo,
+ WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
+ if (r < 0)
+ return -errno;
+
+ if (s->child.siginfo.si_pid != 0) {
+ bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
+
+ if (!zombie && (s->child.options & WEXITED)) {
+ /* If the child isn't dead then let's
+ * immediately remove the state change
+ * from the queue, since there's no
+ * benefit in leaving it queued */
+
+ assert(s->child.options & (WSTOPPED|WCONTINUED));
+ waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
+ }
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
+ bool read_one = false;
+ int r;
+
+ assert(e);
+ assert(d);
+ assert_return(events == EPOLLIN, -EIO);
+
+ /* If there's a signal queued on this priority and SIGCHLD is
+ on this priority too, then make sure to recheck the
+ children we watch. This is because we only ever dequeue
+ the first signal per priority, and if we dequeue one, and
+ SIGCHLD might be enqueued later we wouldn't know, but we
+ might have higher priority children we care about hence we
+ need to check that explicitly. */
+
+ if (sigismember(&d->sigset, SIGCHLD))
+ e->need_process_child = true;
+
+ /* If there's already an event source pending for this
+ * priority we don't read another */
+ if (d->current)
+ return 0;
+
+ for (;;) {
+ struct signalfd_siginfo si;
+ ssize_t n;
+ sd_event_source *s = NULL;
+
+ n = read(d->fd, &si, sizeof(si));
+ if (n < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return read_one;
+
+ return -errno;
+ }
+
+ if (_unlikely_(n != sizeof(si)))
+ return -EIO;
+
+ assert(SIGNAL_VALID(si.ssi_signo));
+
+ read_one = true;
+
+ if (e->signal_sources)
+ s = e->signal_sources[si.ssi_signo];
+ if (!s)
+ continue;
+ if (s->pending)
+ continue;
+
+ s->signal.siginfo = si;
+ d->current = s;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+}
+
+static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
+ ssize_t n;
+
+ assert(e);
+ assert(d);
+
+ assert_return(revents == EPOLLIN, -EIO);
+
+ /* If there's already an event source pending for this priority, don't read another */
+ if (d->n_pending > 0)
+ return 0;
+
+ /* Is the read buffer non-empty? If so, let's not read more */
+ if (d->buffer_filled > 0)
+ return 0;
+
+ n = read(d->fd, &d->buffer, sizeof(d->buffer));
+ if (n < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return -errno;
+ }
+
+ assert(n > 0);
+ d->buffer_filled = (size_t) n;
+ LIST_PREPEND(buffered, e->inotify_data_buffered, d);
+
+ return 1;
+}
+
+static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
+ assert(e);
+ assert(d);
+ assert(sz <= d->buffer_filled);
+
+ if (sz == 0)
+ return;
+
+ /* Move the rest to the buffer to the front, in order to get things properly aligned again */
+ memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
+ d->buffer_filled -= sz;
+
+ if (d->buffer_filled == 0)
+ LIST_REMOVE(buffered, e->inotify_data_buffered, d);
+}
+
+static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
+ int r;
+
+ assert(e);
+ assert(d);
+
+ /* If there's already an event source pending for this priority, don't read another */
+ if (d->n_pending > 0)
+ return 0;
+
+ while (d->buffer_filled > 0) {
+ size_t sz;
+
+ /* Let's validate that the event structures are complete */
+ if (d->buffer_filled < offsetof(struct inotify_event, name))
+ return -EIO;
+
+ sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
+ if (d->buffer_filled < sz)
+ return -EIO;
+
+ if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
+ struct inode_data *inode_data;
+ Iterator i;
+
+ /* The queue overran, let's pass this event to all event sources connected to this inotify
+ * object */
+
+ HASHMAP_FOREACH(inode_data, d->inodes, i) {
+ sd_event_source *s;
+
+ LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
+
+ if (s->enabled == SD_EVENT_OFF)
+ continue;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+ }
+ }
+ } else {
+ struct inode_data *inode_data;
+ sd_event_source *s;
+
+ /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
+ * our watch descriptor table. */
+ if (d->buffer.ev.mask & IN_IGNORED) {
+
+ inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
+ if (!inode_data) {
+ event_inotify_data_drop(e, d, sz);
+ continue;
+ }
+
+ /* The watch descriptor was removed by the kernel, let's drop it here too */
+ inode_data->wd = -1;
+ } else {
+ inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
+ if (!inode_data) {
+ event_inotify_data_drop(e, d, sz);
+ continue;
+ }
+ }
+
+ /* Trigger all event sources that are interested in these events. Also trigger all event
+ * sources if IN_IGNORED or IN_UNMOUNT is set. */
+ LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
+
+ if (s->enabled == SD_EVENT_OFF)
+ continue;
+
+ if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
+ (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
+ continue;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Something pending now? If so, let's finish, otherwise let's read more. */
+ if (d->n_pending > 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int process_inotify(sd_event *e) {
+ struct inotify_data *d;
+ int r, done = 0;
+
+ assert(e);
+
+ LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
+ r = event_inotify_data_process(e, d);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ done ++;
+ }
+
+ return done;
+}
+
+static int source_dispatch(sd_event_source *s) {
+ EventSourceType saved_type;
+ int r = 0;
+
+ assert(s);
+ assert(s->pending || s->type == SOURCE_EXIT);
+
+ /* Save the event source type, here, so that we still know it after the event callback which might invalidate
+ * the event. */
+ saved_type = s->type;
+
+ if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (s->type != SOURCE_POST) {
+ sd_event_source *z;
+ Iterator i;
+
+ /* If we execute a non-post source, let's mark all
+ * post sources as pending */
+
+ SET_FOREACH(z, s->event->post_sources, i) {
+ if (z->enabled == SD_EVENT_OFF)
+ continue;
+
+ r = source_set_pending(z, true);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (s->enabled == SD_EVENT_ONESHOT) {
+ r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+ }
+
+ s->dispatching = true;
+
+ switch (s->type) {
+
+ case SOURCE_IO:
+ r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
+ break;
+
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ r = s->time.callback(s, s->time.next, s->userdata);
+ break;
+
+ case SOURCE_SIGNAL:
+ r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
+ break;
+
+ case SOURCE_CHILD: {
+ bool zombie;
+
+ zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
+
+ r = s->child.callback(s, &s->child.siginfo, s->userdata);
+
+ /* Now, reap the PID for good. */
+ if (zombie)
+ (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
+
+ break;
+ }
+
+ case SOURCE_DEFER:
+ r = s->defer.callback(s, s->userdata);
+ break;
+
+ case SOURCE_POST:
+ r = s->post.callback(s, s->userdata);
+ break;
+
+ case SOURCE_EXIT:
+ r = s->exit.callback(s, s->userdata);
+ break;
+
+ case SOURCE_INOTIFY: {
+ struct sd_event *e = s->event;
+ struct inotify_data *d;
+ size_t sz;
+
+ assert(s->inotify.inode_data);
+ assert_se(d = s->inotify.inode_data->inotify_data);
+
+ assert(d->buffer_filled >= offsetof(struct inotify_event, name));
+ sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
+ assert(d->buffer_filled >= sz);
+
+ r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
+
+ /* When no event is pending anymore on this inotify object, then let's drop the event from the
+ * buffer. */
+ if (d->n_pending == 0)
+ event_inotify_data_drop(e, d, sz);
+
+ break;
+ }
+
+ case SOURCE_WATCHDOG:
+ case _SOURCE_EVENT_SOURCE_TYPE_MAX:
+ case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
+ assert_not_reached("Wut? I shouldn't exist.");
+ }
+
+ s->dispatching = false;
+
+ if (r < 0)
+ log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
+ strna(s->description), event_source_type_to_string(saved_type));
+
+ if (s->n_ref == 0)
+ source_free(s);
+ else if (r < 0)
+ sd_event_source_set_enabled(s, SD_EVENT_OFF);
+
+ return 1;
+}
+
+static int event_prepare(sd_event *e) {
+ int r;
+
+ assert(e);
+
+ for (;;) {
+ sd_event_source *s;
+
+ s = prioq_peek(e->prepare);
+ if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
+ break;
+
+ s->prepare_iteration = e->iteration;
+ r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
+ if (r < 0)
+ return r;
+
+ assert(s->prepare);
+
+ s->dispatching = true;
+ r = s->prepare(s, s->userdata);
+ s->dispatching = false;
+
+ if (r < 0)
+ log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
+ strna(s->description), event_source_type_to_string(s->type));
+
+ if (s->n_ref == 0)
+ source_free(s);
+ else if (r < 0)
+ sd_event_source_set_enabled(s, SD_EVENT_OFF);
+ }
+
+ return 0;
+}
+
+static int dispatch_exit(sd_event *e) {
+ sd_event_source *p;
+ _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
+ int r;
+
+ assert(e);
+
+ p = prioq_peek(e->exit);
+ if (!p || p->enabled == SD_EVENT_OFF) {
+ e->state = SD_EVENT_FINISHED;
+ return 0;
+ }
+
+ ref = sd_event_ref(e);
+ e->iteration++;
+ e->state = SD_EVENT_EXITING;
+ r = source_dispatch(p);
+ e->state = SD_EVENT_INITIAL;
+ return r;
+}
+
+static sd_event_source* event_next_pending(sd_event *e) {
+ sd_event_source *p;
+
+ assert(e);
+
+ p = prioq_peek(e->pending);
+ if (!p)
+ return NULL;
+
+ if (p->enabled == SD_EVENT_OFF)
+ return NULL;
+
+ return p;
+}
+
+static int arm_watchdog(sd_event *e) {
+ struct itimerspec its = {};
+ usec_t t;
+ int r;
+
+ assert(e);
+ assert(e->watchdog_fd >= 0);
+
+ t = sleep_between(e,
+ e->watchdog_last + (e->watchdog_period / 2),
+ e->watchdog_last + (e->watchdog_period * 3 / 4));
+
+ timespec_store(&its.it_value, t);
+
+ /* Make sure we never set the watchdog to 0, which tells the
+ * kernel to disable it. */
+ if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
+ its.it_value.tv_nsec = 1;
+
+ r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int process_watchdog(sd_event *e) {
+ assert(e);
+
+ if (!e->watchdog)
+ return 0;
+
+ /* Don't notify watchdog too often */
+ if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
+ return 0;
+
+ sd_notify(false, "WATCHDOG=1");
+ e->watchdog_last = e->timestamp.monotonic;
+
+ return arm_watchdog(e);
+}
+
+static void event_close_inode_data_fds(sd_event *e) {
+ struct inode_data *d;
+
+ assert(e);
+
+ /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
+ * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
+ * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
+ * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
+ * compromise. */
+
+ while ((d = e->inode_data_to_close)) {
+ assert(d->fd >= 0);
+ d->fd = safe_close(d->fd);
+
+ LIST_REMOVE(to_close, e->inode_data_to_close, d);
+ }
+}
+
+_public_ int sd_event_prepare(sd_event *e) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
+
+ if (e->exit_requested)
+ goto pending;
+
+ e->iteration++;
+
+ e->state = SD_EVENT_PREPARING;
+ r = event_prepare(e);
+ e->state = SD_EVENT_INITIAL;
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->realtime);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->boottime);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->monotonic);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->realtime_alarm);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->boottime_alarm);
+ if (r < 0)
+ return r;
+
+ event_close_inode_data_fds(e);
+
+ if (event_next_pending(e) || e->need_process_child)
+ goto pending;
+
+ e->state = SD_EVENT_ARMED;
+
+ return 0;
+
+pending:
+ e->state = SD_EVENT_ARMED;
+ r = sd_event_wait(e, 0);
+ if (r == 0)
+ e->state = SD_EVENT_ARMED;
+
+ return r;
+}
+
+_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
+ struct epoll_event *ev_queue;
+ unsigned ev_queue_max;
+ int r, m, i;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
+
+ if (e->exit_requested) {
+ e->state = SD_EVENT_PENDING;
+ return 1;
+ }
+
+ ev_queue_max = MAX(e->n_sources, 1u);
+ ev_queue = newa(struct epoll_event, ev_queue_max);
+
+ /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
+ if (e->inotify_data_buffered)
+ timeout = 0;
+
+ m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
+ timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
+ if (m < 0) {
+ if (errno == EINTR) {
+ e->state = SD_EVENT_PENDING;
+ return 1;
+ }
+
+ r = -errno;
+ goto finish;
+ }
+
+ triple_timestamp_get(&e->timestamp);
+
+ for (i = 0; i < m; i++) {
+
+ if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
+ r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
+ else {
+ WakeupType *t = ev_queue[i].data.ptr;
+
+ switch (*t) {
+
+ case WAKEUP_EVENT_SOURCE:
+ r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
+ break;
+
+ case WAKEUP_CLOCK_DATA: {
+ struct clock_data *d = ev_queue[i].data.ptr;
+ r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
+ break;
+ }
+
+ case WAKEUP_SIGNAL_DATA:
+ r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
+ break;
+
+ case WAKEUP_INOTIFY_DATA:
+ r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
+ break;
+
+ default:
+ assert_not_reached("Invalid wake-up pointer");
+ }
+ }
+ if (r < 0)
+ goto finish;
+ }
+
+ r = process_watchdog(e);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.realtime, &e->realtime);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.boottime, &e->boottime);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
+ if (r < 0)
+ goto finish;
+
+ if (e->need_process_child) {
+ r = process_child(e);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = process_inotify(e);
+ if (r < 0)
+ goto finish;
+
+ if (event_next_pending(e)) {
+ e->state = SD_EVENT_PENDING;
+
+ return 1;
+ }
+
+ r = 0;
+
+finish:
+ e->state = SD_EVENT_INITIAL;
+
+ return r;
+}
+
+_public_ int sd_event_dispatch(sd_event *e) {
+ sd_event_source *p;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
+
+ if (e->exit_requested)
+ return dispatch_exit(e);
+
+ p = event_next_pending(e);
+ if (p) {
+ _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
+
+ ref = sd_event_ref(e);
+ e->state = SD_EVENT_RUNNING;
+ r = source_dispatch(p);
+ e->state = SD_EVENT_INITIAL;
+ return r;
+ }
+
+ e->state = SD_EVENT_INITIAL;
+
+ return 1;
+}
+
+static void event_log_delays(sd_event *e) {
+ char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
+ unsigned i;
+ int o;
+
+ for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
+ o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
+ e->delays[i] = 0;
+ }
+ log_debug("Event loop iterations: %.*s", o, b);
+}
+
+_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
+
+ if (e->profile_delays && e->last_run) {
+ usec_t this_run;
+ unsigned l;
+
+ this_run = now(CLOCK_MONOTONIC);
+
+ l = u64log2(this_run - e->last_run);
+ assert(l < sizeof(e->delays));
+ e->delays[l]++;
+
+ if (this_run - e->last_log >= 5*USEC_PER_SEC) {
+ event_log_delays(e);
+ e->last_log = this_run;
+ }
+ }
+
+ r = sd_event_prepare(e);
+ if (r == 0)
+ /* There was nothing? Then wait... */
+ r = sd_event_wait(e, timeout);
+
+ if (e->profile_delays)
+ e->last_run = now(CLOCK_MONOTONIC);
+
+ if (r > 0) {
+ /* There's something now, then let's dispatch it */
+ r = sd_event_dispatch(e);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ return r;
+}
+
+_public_ int sd_event_loop(sd_event *e) {
+ _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+ assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
+
+ ref = sd_event_ref(e);
+
+ while (e->state != SD_EVENT_FINISHED) {
+ r = sd_event_run(e, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+
+ return e->exit_code;
+}
+
+_public_ int sd_event_get_fd(sd_event *e) {
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ return e->epoll_fd;
+}
+
+_public_ int sd_event_get_state(sd_event *e) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ return e->state;
+}
+
+_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(code, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!e->exit_requested)
+ return -ENODATA;
+
+ *code = e->exit_code;
+ return 0;
+}
+
+_public_ int sd_event_exit(sd_event *e, int code) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ e->exit_requested = true;
+ e->exit_code = code;
+
+ return 0;
+}
+
+_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(usec, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
+ return -EOPNOTSUPP;
+
+ /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
+ * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
+ * the purpose of getting the time this doesn't matter. */
+ if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
+ return -EOPNOTSUPP;
+
+ if (!triple_timestamp_is_set(&e->timestamp)) {
+ /* Implicitly fall back to now() if we never ran
+ * before and thus have no cached time. */
+ *usec = now(clock);
+ return 1;
+ }
+
+ *usec = triple_timestamp_by_clock(&e->timestamp, clock);
+ return 0;
+}
+
+_public_ int sd_event_default(sd_event **ret) {
+ sd_event *e = NULL;
+ int r;
+
+ if (!ret)
+ return !!default_event;
+
+ if (default_event) {
+ *ret = sd_event_ref(default_event);
+ return 0;
+ }
+
+ r = sd_event_new(&e);
+ if (r < 0)
+ return r;
+
+ e->default_event_ptr = &default_event;
+ e->tid = gettid();
+ default_event = e;
+
+ *ret = e;
+ return 1;
+}
+
+_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(tid, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (e->tid != 0) {
+ *tid = e->tid;
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+_public_ int sd_event_set_watchdog(sd_event *e, int b) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (e->watchdog == !!b)
+ return e->watchdog;
+
+ if (b) {
+ struct epoll_event ev;
+
+ r = sd_watchdog_enabled(false, &e->watchdog_period);
+ if (r <= 0)
+ return r;
+
+ /* Issue first ping immediately */
+ sd_notify(false, "WATCHDOG=1");
+ e->watchdog_last = now(CLOCK_MONOTONIC);
+
+ e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (e->watchdog_fd < 0)
+ return -errno;
+
+ r = arm_watchdog(e);
+ if (r < 0)
+ goto fail;
+
+ ev = (struct epoll_event) {
+ .events = EPOLLIN,
+ .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
+ };
+
+ r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ } else {
+ if (e->watchdog_fd >= 0) {
+ epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
+ e->watchdog_fd = safe_close(e->watchdog_fd);
+ }
+ }
+
+ e->watchdog = !!b;
+ return e->watchdog;
+
+fail:
+ e->watchdog_fd = safe_close(e->watchdog_fd);
+ return r;
+}
+
+_public_ int sd_event_get_watchdog(sd_event *e) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ return e->watchdog;
+}
+
+_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ *ret = e->iteration;
+ return 0;
+}
+
+_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
+ assert_return(s, -EINVAL);
+
+ s->destroy_callback = callback;
+ return 0;
+}
+
+_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
+ assert_return(s, -EINVAL);
+
+ if (ret)
+ *ret = s->destroy_callback;
+
+ return !!s->destroy_callback;
+}
+
+_public_ int sd_event_source_get_floating(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+
+ return s->floating;
+}
+
+_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
+ assert_return(s, -EINVAL);
+
+ if (s->floating == !!b)
+ return 0;
+
+ if (!s->event) /* Already disconnected */
+ return -ESTALE;
+
+ s->floating = b;
+
+ if (b) {
+ sd_event_source_ref(s);
+ sd_event_unref(s->event);
+ } else {
+ sd_event_ref(s->event);
+ sd_event_source_unref(s);
+ }
+
+ return 1;
+}
diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c
new file mode 100644
index 0000000..6fd6d9f
--- /dev/null
+++ b/src/libsystemd/sd-event/test-event.c
@@ -0,0 +1,495 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/wait.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "rm-rf.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static int prepare_handler(sd_event_source *s, void *userdata) {
+ log_info("preparing %c", PTR_TO_INT(userdata));
+ return 1;
+}
+
+static bool got_a, got_b, got_c, got_unref;
+static unsigned got_d;
+
+static int unref_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_event_source_unref(s);
+ got_unref = true;
+ return 0;
+}
+
+static int io_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+
+ log_info("got IO on %c", PTR_TO_INT(userdata));
+
+ if (userdata == INT_TO_PTR('a')) {
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
+ assert_se(!got_a);
+ got_a = true;
+ } else if (userdata == INT_TO_PTR('b')) {
+ assert_se(!got_b);
+ got_b = true;
+ } else if (userdata == INT_TO_PTR('d')) {
+ got_d++;
+ if (got_d < 2)
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_ONESHOT) >= 0);
+ else
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
+ } else
+ assert_not_reached("Yuck!");
+
+ return 1;
+}
+
+static int child_handler(sd_event_source *s, const siginfo_t *si, void *userdata) {
+
+ assert_se(s);
+ assert_se(si);
+
+ log_info("got child on %c", PTR_TO_INT(userdata));
+
+ assert_se(userdata == INT_TO_PTR('f'));
+
+ assert_se(sd_event_exit(sd_event_source_get_event(s), 0) >= 0);
+ sd_event_source_unref(s);
+
+ return 1;
+}
+
+static int signal_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ sd_event_source *p = NULL;
+ pid_t pid;
+
+ assert_se(s);
+ assert_se(si);
+
+ log_info("got signal on %c", PTR_TO_INT(userdata));
+
+ assert_se(userdata == INT_TO_PTR('e'));
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, -1) >= 0);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0)
+ _exit(EXIT_SUCCESS);
+
+ assert_se(sd_event_add_child(sd_event_source_get_event(s), &p, pid, WEXITED, child_handler, INT_TO_PTR('f')) >= 0);
+ assert_se(sd_event_source_set_enabled(p, SD_EVENT_ONESHOT) >= 0);
+
+ sd_event_source_unref(s);
+
+ return 1;
+}
+
+static int defer_handler(sd_event_source *s, void *userdata) {
+ sd_event_source *p = NULL;
+
+ assert_se(s);
+
+ log_info("got defer on %c", PTR_TO_INT(userdata));
+
+ assert_se(userdata == INT_TO_PTR('d'));
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGUSR1, -1) >= 0);
+
+ assert_se(sd_event_add_signal(sd_event_source_get_event(s), &p, SIGUSR1, signal_handler, INT_TO_PTR('e')) >= 0);
+ assert_se(sd_event_source_set_enabled(p, SD_EVENT_ONESHOT) >= 0);
+ raise(SIGUSR1);
+
+ sd_event_source_unref(s);
+
+ return 1;
+}
+
+static bool do_quit = false;
+
+static int time_handler(sd_event_source *s, uint64_t usec, void *userdata) {
+ log_info("got timer on %c", PTR_TO_INT(userdata));
+
+ if (userdata == INT_TO_PTR('c')) {
+
+ if (do_quit) {
+ sd_event_source *p;
+
+ assert_se(sd_event_add_defer(sd_event_source_get_event(s), &p, defer_handler, INT_TO_PTR('d')) >= 0);
+ assert_se(sd_event_source_set_enabled(p, SD_EVENT_ONESHOT) >= 0);
+ } else {
+ assert_se(!got_c);
+ got_c = true;
+ }
+ } else
+ assert_not_reached("Huh?");
+
+ return 2;
+}
+
+static bool got_exit = false;
+
+static int exit_handler(sd_event_source *s, void *userdata) {
+ log_info("got quit handler on %c", PTR_TO_INT(userdata));
+
+ got_exit = true;
+
+ return 3;
+}
+
+static bool got_post = false;
+
+static int post_handler(sd_event_source *s, void *userdata) {
+ log_info("got post handler");
+
+ got_post = true;
+
+ return 2;
+}
+
+static void test_basic(void) {
+ sd_event *e = NULL;
+ sd_event_source *w = NULL, *x = NULL, *y = NULL, *z = NULL, *q = NULL, *t = NULL;
+ static const char ch = 'x';
+ int a[2] = { -1, -1 }, b[2] = { -1, -1}, d[2] = { -1, -1}, k[2] = { -1, -1 };
+ uint64_t event_now;
+ int64_t priority;
+
+ assert_se(pipe(a) >= 0);
+ assert_se(pipe(b) >= 0);
+ assert_se(pipe(d) >= 0);
+ assert_se(pipe(k) >= 0);
+
+ assert_se(sd_event_default(&e) >= 0);
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) > 0);
+
+ assert_se(sd_event_set_watchdog(e, true) >= 0);
+
+ /* Test whether we cleanly can destroy an io event source from its own handler */
+ got_unref = false;
+ assert_se(sd_event_add_io(e, &t, k[0], EPOLLIN, unref_handler, NULL) >= 0);
+ assert_se(write(k[1], &ch, 1) == 1);
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(got_unref);
+
+ got_a = false, got_b = false, got_c = false, got_d = 0;
+
+ /* Add a oneshot handler, trigger it, reenable it, and trigger
+ * it again. */
+ assert_se(sd_event_add_io(e, &w, d[0], EPOLLIN, io_handler, INT_TO_PTR('d')) >= 0);
+ assert_se(sd_event_source_set_enabled(w, SD_EVENT_ONESHOT) >= 0);
+ assert_se(write(d[1], &ch, 1) >= 0);
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(got_d == 1);
+ assert_se(write(d[1], &ch, 1) >= 0);
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(got_d == 2);
+
+ assert_se(sd_event_add_io(e, &x, a[0], EPOLLIN, io_handler, INT_TO_PTR('a')) >= 0);
+ assert_se(sd_event_add_io(e, &y, b[0], EPOLLIN, io_handler, INT_TO_PTR('b')) >= 0);
+ assert_se(sd_event_add_time(e, &z, CLOCK_MONOTONIC, 0, 0, time_handler, INT_TO_PTR('c')) >= 0);
+ assert_se(sd_event_add_exit(e, &q, exit_handler, INT_TO_PTR('g')) >= 0);
+
+ assert_se(sd_event_source_set_priority(x, 99) >= 0);
+ assert_se(sd_event_source_get_priority(x, &priority) >= 0);
+ assert_se(priority == 99);
+ assert_se(sd_event_source_set_enabled(y, SD_EVENT_ONESHOT) >= 0);
+ assert_se(sd_event_source_set_prepare(x, prepare_handler) >= 0);
+ assert_se(sd_event_source_set_priority(z, 50) >= 0);
+ assert_se(sd_event_source_set_enabled(z, SD_EVENT_ONESHOT) >= 0);
+ assert_se(sd_event_source_set_prepare(z, prepare_handler) >= 0);
+
+ /* Test for floating event sources */
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN+1, -1) >= 0);
+ assert_se(sd_event_add_signal(e, NULL, SIGRTMIN+1, NULL, NULL) >= 0);
+
+ assert_se(write(a[1], &ch, 1) >= 0);
+ assert_se(write(b[1], &ch, 1) >= 0);
+
+ assert_se(!got_a && !got_b && !got_c);
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+
+ assert_se(!got_a && got_b && !got_c);
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+
+ assert_se(!got_a && got_b && got_c);
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+
+ assert_se(got_a && got_b && got_c);
+
+ sd_event_source_unref(x);
+ sd_event_source_unref(y);
+
+ do_quit = true;
+ assert_se(sd_event_add_post(e, NULL, post_handler, NULL) >= 0);
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) == 0);
+ assert_se(sd_event_source_set_time(z, event_now + 200 * USEC_PER_MSEC) >= 0);
+ assert_se(sd_event_source_set_enabled(z, SD_EVENT_ONESHOT) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+ assert_se(got_post);
+ assert_se(got_exit);
+
+ sd_event_source_unref(z);
+ sd_event_source_unref(q);
+
+ sd_event_source_unref(w);
+
+ sd_event_unref(e);
+
+ safe_close_pair(a);
+ safe_close_pair(b);
+ safe_close_pair(d);
+ safe_close_pair(k);
+}
+
+static void test_sd_event_now(void) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ uint64_t event_now;
+
+ assert_se(sd_event_new(&e) >= 0);
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) > 0);
+ assert_se(sd_event_now(e, CLOCK_REALTIME, &event_now) > 0);
+ assert_se(sd_event_now(e, CLOCK_REALTIME_ALARM, &event_now) > 0);
+ if (clock_boottime_supported()) {
+ assert_se(sd_event_now(e, CLOCK_BOOTTIME, &event_now) > 0);
+ assert_se(sd_event_now(e, CLOCK_BOOTTIME_ALARM, &event_now) > 0);
+ }
+ assert_se(sd_event_now(e, -1, &event_now) == -EOPNOTSUPP);
+ assert_se(sd_event_now(e, 900 /* arbitrary big number */, &event_now) == -EOPNOTSUPP);
+
+ assert_se(sd_event_run(e, 0) == 0);
+
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) == 0);
+ assert_se(sd_event_now(e, CLOCK_REALTIME, &event_now) == 0);
+ assert_se(sd_event_now(e, CLOCK_REALTIME_ALARM, &event_now) == 0);
+ if (clock_boottime_supported()) {
+ assert_se(sd_event_now(e, CLOCK_BOOTTIME, &event_now) == 0);
+ assert_se(sd_event_now(e, CLOCK_BOOTTIME_ALARM, &event_now) == 0);
+ }
+ assert_se(sd_event_now(e, -1, &event_now) == -EOPNOTSUPP);
+ assert_se(sd_event_now(e, 900 /* arbitrary big number */, &event_now) == -EOPNOTSUPP);
+}
+
+static int last_rtqueue_sigval = 0;
+static int n_rtqueue = 0;
+
+static int rtqueue_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ last_rtqueue_sigval = si->ssi_int;
+ n_rtqueue++;
+ return 0;
+}
+
+static void test_rtqueue(void) {
+ sd_event_source *u = NULL, *v = NULL, *s = NULL;
+ sd_event *e = NULL;
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN+2, SIGRTMIN+3, SIGUSR2, -1) >= 0);
+ assert_se(sd_event_add_signal(e, &u, SIGRTMIN+2, rtqueue_handler, NULL) >= 0);
+ assert_se(sd_event_add_signal(e, &v, SIGRTMIN+3, rtqueue_handler, NULL) >= 0);
+ assert_se(sd_event_add_signal(e, &s, SIGUSR2, rtqueue_handler, NULL) >= 0);
+
+ assert_se(sd_event_source_set_priority(v, -10) >= 0);
+
+ assert_se(sigqueue(getpid_cached(), SIGRTMIN+2, (union sigval) { .sival_int = 1 }) >= 0);
+ assert_se(sigqueue(getpid_cached(), SIGRTMIN+3, (union sigval) { .sival_int = 2 }) >= 0);
+ assert_se(sigqueue(getpid_cached(), SIGUSR2, (union sigval) { .sival_int = 3 }) >= 0);
+ assert_se(sigqueue(getpid_cached(), SIGRTMIN+3, (union sigval) { .sival_int = 4 }) >= 0);
+ assert_se(sigqueue(getpid_cached(), SIGUSR2, (union sigval) { .sival_int = 5 }) >= 0);
+
+ assert_se(n_rtqueue == 0);
+ assert_se(last_rtqueue_sigval == 0);
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(n_rtqueue == 1);
+ assert_se(last_rtqueue_sigval == 2); /* first SIGRTMIN+3 */
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(n_rtqueue == 2);
+ assert_se(last_rtqueue_sigval == 4); /* second SIGRTMIN+3 */
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(n_rtqueue == 3);
+ assert_se(last_rtqueue_sigval == 3); /* first SIGUSR2 */
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(n_rtqueue == 4);
+ assert_se(last_rtqueue_sigval == 1); /* SIGRTMIN+2 */
+
+ assert_se(sd_event_run(e, 0) == 0); /* the other SIGUSR2 is dropped, because the first one was still queued */
+ assert_se(n_rtqueue == 4);
+ assert_se(last_rtqueue_sigval == 1);
+
+ sd_event_source_unref(u);
+ sd_event_source_unref(v);
+ sd_event_source_unref(s);
+
+ sd_event_unref(e);
+}
+
+#define CREATE_EVENTS_MAX (70000U)
+
+struct inotify_context {
+ bool delete_self_handler_called;
+ unsigned create_called[CREATE_EVENTS_MAX];
+ unsigned create_overflow;
+ unsigned n_create_events;
+};
+
+static void maybe_exit(sd_event_source *s, struct inotify_context *c) {
+ unsigned n;
+
+ assert(s);
+ assert(c);
+
+ if (!c->delete_self_handler_called)
+ return;
+
+ for (n = 0; n < 3; n++) {
+ unsigned i;
+
+ if (c->create_overflow & (1U << n))
+ continue;
+
+ for (i = 0; i < c->n_create_events; i++)
+ if (!(c->create_called[i] & (1U << n)))
+ return;
+ }
+
+ sd_event_exit(sd_event_source_get_event(s), 0);
+}
+
+static int inotify_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) {
+ struct inotify_context *c = userdata;
+ const char *description;
+ unsigned bit, n;
+
+ assert_se(sd_event_source_get_description(s, &description) >= 0);
+ assert_se(safe_atou(description, &n) >= 0);
+
+ assert_se(n <= 3);
+ bit = 1U << n;
+
+ if (ev->mask & IN_Q_OVERFLOW) {
+ log_info("inotify-handler <%s>: overflow", description);
+ c->create_overflow |= bit;
+ } else if (ev->mask & IN_CREATE) {
+ unsigned i;
+
+ log_info("inotify-handler <%s>: create on %s", description, ev->name);
+
+ if (!streq(ev->name, "sub")) {
+ assert_se(safe_atou(ev->name, &i) >= 0);
+
+ assert_se(i < c->n_create_events);
+ c->create_called[i] |= bit;
+ }
+ } else if (ev->mask & IN_DELETE) {
+ log_info("inotify-handler <%s>: delete of %s", description, ev->name);
+ assert_se(streq(ev->name, "sub"));
+ } else
+ assert_not_reached("unexpected inotify event");
+
+ maybe_exit(s, c);
+ return 1;
+}
+
+static int delete_self_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) {
+ struct inotify_context *c = userdata;
+
+ if (ev->mask & IN_Q_OVERFLOW) {
+ log_info("delete-self-handler: overflow");
+ c->delete_self_handler_called = true;
+ } else if (ev->mask & IN_DELETE_SELF) {
+ log_info("delete-self-handler: delete-self");
+ c->delete_self_handler_called = true;
+ } else if (ev->mask & IN_IGNORED) {
+ log_info("delete-self-handler: ignore");
+ } else
+ assert_not_reached("unexpected inotify event (delete-self)");
+
+ maybe_exit(s, c);
+ return 1;
+}
+
+static void test_inotify(unsigned n_create_events) {
+ _cleanup_(rm_rf_physical_and_freep) char *p = NULL;
+ sd_event_source *a = NULL, *b = NULL, *c = NULL, *d = NULL;
+ struct inotify_context context = {
+ .n_create_events = n_create_events,
+ };
+ sd_event *e = NULL;
+ const char *q;
+ unsigned i;
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ assert_se(mkdtemp_malloc("/tmp/test-inotify-XXXXXX", &p) >= 0);
+
+ assert_se(sd_event_add_inotify(e, &a, p, IN_CREATE|IN_ONLYDIR, inotify_handler, &context) >= 0);
+ assert_se(sd_event_add_inotify(e, &b, p, IN_CREATE|IN_DELETE|IN_DONT_FOLLOW, inotify_handler, &context) >= 0);
+ assert_se(sd_event_source_set_priority(b, SD_EVENT_PRIORITY_IDLE) >= 0);
+ assert_se(sd_event_source_set_priority(b, SD_EVENT_PRIORITY_NORMAL) >= 0);
+ assert_se(sd_event_add_inotify(e, &c, p, IN_CREATE|IN_DELETE|IN_EXCL_UNLINK, inotify_handler, &context) >= 0);
+ assert_se(sd_event_source_set_priority(c, SD_EVENT_PRIORITY_IDLE) >= 0);
+
+ assert_se(sd_event_source_set_description(a, "0") >= 0);
+ assert_se(sd_event_source_set_description(b, "1") >= 0);
+ assert_se(sd_event_source_set_description(c, "2") >= 0);
+
+ q = strjoina(p, "/sub");
+ assert_se(touch(q) >= 0);
+ assert_se(sd_event_add_inotify(e, &d, q, IN_DELETE_SELF, delete_self_handler, &context) >= 0);
+
+ for (i = 0; i < n_create_events; i++) {
+ char buf[DECIMAL_STR_MAX(unsigned)+1];
+ _cleanup_free_ char *z;
+
+ xsprintf(buf, "%u", i);
+ assert_se(z = strjoin(p, "/", buf));
+
+ assert_se(touch(z) >= 0);
+ }
+
+ assert_se(unlink(q) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ sd_event_source_unref(a);
+ sd_event_source_unref(b);
+ sd_event_source_unref(c);
+ sd_event_source_unref(d);
+
+ sd_event_unref(e);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_basic();
+ test_sd_event_now();
+ test_rtqueue();
+
+ test_inotify(100); /* should work without overflow */
+ test_inotify(33000); /* should trigger a q overflow */
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-hwdb/hwdb-internal.h b/src/libsystemd/sd-hwdb/hwdb-internal.h
new file mode 100644
index 0000000..d82b8c1
--- /dev/null
+++ b/src/libsystemd/sd-hwdb/hwdb-internal.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdint.h>
+
+#include "sparse-endian.h"
+
+#define HWDB_SIG { 'K', 'S', 'L', 'P', 'H', 'H', 'R', 'H' }
+
+/* on-disk trie objects */
+struct trie_header_f {
+ uint8_t signature[8];
+
+ /* version of tool which created the file */
+ le64_t tool_version;
+ le64_t file_size;
+
+ /* size of structures to allow them to grow */
+ le64_t header_size;
+ le64_t node_size;
+ le64_t child_entry_size;
+ le64_t value_entry_size;
+
+ /* offset of the root trie node */
+ le64_t nodes_root_off;
+
+ /* size of the nodes and string section */
+ le64_t nodes_len;
+ le64_t strings_len;
+} _packed_;
+
+struct trie_node_f {
+ /* prefix of lookup string, shared by all children */
+ le64_t prefix_off;
+ /* size of children entry array appended to the node */
+ uint8_t children_count;
+ uint8_t padding[7];
+ /* size of value entry array appended to the node */
+ le64_t values_count;
+} _packed_;
+
+/* array of child entries, follows directly the node record */
+struct trie_child_entry_f {
+ /* index of the child node */
+ uint8_t c;
+ uint8_t padding[7];
+ /* offset of the child node */
+ le64_t child_off;
+} _packed_;
+
+/* array of value entries, follows directly the node record/child array */
+struct trie_value_entry_f {
+ le64_t key_off;
+ le64_t value_off;
+} _packed_;
+
+/* v2 extends v1 with filename and line-number */
+struct trie_value_entry2_f {
+ le64_t key_off;
+ le64_t value_off;
+ le64_t filename_off;
+ le32_t line_number;
+ le16_t file_priority;
+ le16_t padding;
+} _packed_;
diff --git a/src/libsystemd/sd-hwdb/hwdb-util.c b/src/libsystemd/sd-hwdb/hwdb-util.c
new file mode 100644
index 0000000..f852967
--- /dev/null
+++ b/src/libsystemd/sd-hwdb/hwdb-util.c
@@ -0,0 +1,686 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hwdb-internal.h"
+#include "hwdb-util.h"
+#include "label.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "strbuf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+static const char *default_hwdb_bin_dir = "/etc/udev";
+static const char * const conf_file_dirs[] = {
+ "/etc/udev/hwdb.d",
+ UDEVLIBEXECDIR "/hwdb.d",
+ NULL
+};
+
+/*
+ * Generic udev properties, key-value database based on modalias strings.
+ * Uses a Patricia/radix trie to index all matches for efficient lookup.
+ */
+
+/* in-memory trie objects */
+struct trie {
+ struct trie_node *root;
+ struct strbuf *strings;
+
+ size_t nodes_count;
+ size_t children_count;
+ size_t values_count;
+};
+
+struct trie_node {
+ /* prefix, common part for all children of this node */
+ size_t prefix_off;
+
+ /* sorted array of pointers to children nodes */
+ struct trie_child_entry *children;
+ uint8_t children_count;
+
+ /* sorted array of key-value pairs */
+ struct trie_value_entry *values;
+ size_t values_count;
+};
+
+/* children array item with char (0-255) index */
+struct trie_child_entry {
+ uint8_t c;
+ struct trie_node *child;
+};
+
+/* value array item with key-value pairs */
+struct trie_value_entry {
+ size_t key_off;
+ size_t value_off;
+ size_t filename_off;
+ uint32_t line_number;
+ uint16_t file_priority;
+};
+
+static int trie_children_cmp(const struct trie_child_entry *a, const struct trie_child_entry *b) {
+ return CMP(a->c, b->c);
+}
+
+static int node_add_child(struct trie *trie, struct trie_node *node, struct trie_node *node_child, uint8_t c) {
+ struct trie_child_entry *child;
+
+ /* extend array, add new entry, sort for bisection */
+ child = reallocarray(node->children, node->children_count + 1, sizeof(struct trie_child_entry));
+ if (!child)
+ return -ENOMEM;
+
+ node->children = child;
+ trie->children_count++;
+ node->children[node->children_count].c = c;
+ node->children[node->children_count].child = node_child;
+ node->children_count++;
+ typesafe_qsort(node->children, node->children_count, trie_children_cmp);
+ trie->nodes_count++;
+
+ return 0;
+}
+
+static struct trie_node *node_lookup(const struct trie_node *node, uint8_t c) {
+ struct trie_child_entry *child;
+ struct trie_child_entry search;
+
+ search.c = c;
+ child = typesafe_bsearch(&search, node->children, node->children_count, trie_children_cmp);
+ if (child)
+ return child->child;
+ return NULL;
+}
+
+static void trie_node_cleanup(struct trie_node *node) {
+ size_t i;
+
+ if (!node)
+ return;
+
+ for (i = 0; i < node->children_count; i++)
+ trie_node_cleanup(node->children[i].child);
+ free(node->children);
+ free(node->values);
+ free(node);
+}
+
+static void trie_free(struct trie *trie) {
+ if (!trie)
+ return;
+
+ trie_node_cleanup(trie->root);
+ strbuf_cleanup(trie->strings);
+ free(trie);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct trie*, trie_free);
+
+static int trie_values_cmp(const struct trie_value_entry *a, const struct trie_value_entry *b, struct trie *trie) {
+ return strcmp(trie->strings->buf + a->key_off,
+ trie->strings->buf + b->key_off);
+}
+
+static int trie_node_add_value(struct trie *trie, struct trie_node *node,
+ const char *key, const char *value,
+ const char *filename, uint16_t file_priority, uint32_t line_number, bool compat) {
+ ssize_t k, v, fn = 0;
+ struct trie_value_entry *val;
+
+ k = strbuf_add_string(trie->strings, key, strlen(key));
+ if (k < 0)
+ return k;
+ v = strbuf_add_string(trie->strings, value, strlen(value));
+ if (v < 0)
+ return v;
+
+ if (!compat) {
+ fn = strbuf_add_string(trie->strings, filename, strlen(filename));
+ if (fn < 0)
+ return fn;
+ }
+
+ if (node->values_count) {
+ struct trie_value_entry search = {
+ .key_off = k,
+ .value_off = v,
+ };
+
+ val = typesafe_bsearch_r(&search, node->values, node->values_count, trie_values_cmp, trie);
+ if (val) {
+ /* At this point we have 2 identical properties on the same match-string.
+ * Since we process files in order, we just replace the previous value. */
+ val->value_off = v;
+ val->filename_off = fn;
+ val->file_priority = file_priority;
+ val->line_number = line_number;
+ return 0;
+ }
+ }
+
+ /* extend array, add new entry, sort for bisection */
+ val = reallocarray(node->values, node->values_count + 1, sizeof(struct trie_value_entry));
+ if (!val)
+ return -ENOMEM;
+ trie->values_count++;
+ node->values = val;
+ node->values[node->values_count] = (struct trie_value_entry) {
+ .key_off = k,
+ .value_off = v,
+ .filename_off = fn,
+ .file_priority = file_priority,
+ .line_number = line_number,
+ };
+ node->values_count++;
+ typesafe_qsort_r(node->values, node->values_count, trie_values_cmp, trie);
+ return 0;
+}
+
+static int trie_insert(struct trie *trie, struct trie_node *node, const char *search,
+ const char *key, const char *value,
+ const char *filename, uint16_t file_priority, uint32_t line_number, bool compat) {
+ size_t i = 0;
+ int r = 0;
+
+ for (;;) {
+ size_t p;
+ uint8_t c;
+ struct trie_node *child;
+
+ for (p = 0; (c = trie->strings->buf[node->prefix_off + p]); p++) {
+ _cleanup_free_ struct trie_node *new_child = NULL;
+ _cleanup_free_ char *s = NULL;
+ ssize_t off;
+
+ if (c == search[i + p])
+ continue;
+
+ /* split node */
+ new_child = new(struct trie_node, 1);
+ if (!new_child)
+ return -ENOMEM;
+
+ /* move values from parent to child */
+ *new_child = (struct trie_node) {
+ .prefix_off = node->prefix_off + p+1,
+ .children = node->children,
+ .children_count = node->children_count,
+ .values = node->values,
+ .values_count = node->values_count,
+ };
+
+ /* update parent; use strdup() because the source gets realloc()d */
+ s = strndup(trie->strings->buf + node->prefix_off, p);
+ if (!s)
+ return -ENOMEM;
+
+ off = strbuf_add_string(trie->strings, s, p);
+ if (off < 0)
+ return off;
+
+ *node = (struct trie_node) {
+ .prefix_off = off,
+ };
+ r = node_add_child(trie, node, new_child, c);
+ if (r < 0)
+ return r;
+
+ new_child = NULL; /* avoid cleanup */
+ break;
+ }
+ i += p;
+
+ c = search[i];
+ if (c == '\0')
+ return trie_node_add_value(trie, node, key, value, filename, file_priority, line_number, compat);
+
+ child = node_lookup(node, c);
+ if (!child) {
+ _cleanup_free_ struct trie_node *new_child = NULL;
+ ssize_t off;
+
+ /* new child */
+ new_child = new(struct trie_node, 1);
+ if (!new_child)
+ return -ENOMEM;
+
+ off = strbuf_add_string(trie->strings, search + i+1, strlen(search + i+1));
+ if (off < 0)
+ return off;
+
+ *new_child = (struct trie_node) {
+ .prefix_off = off,
+ };
+
+ r = node_add_child(trie, node, new_child, c);
+ if (r < 0)
+ return r;
+
+ child = TAKE_PTR(new_child);
+ return trie_node_add_value(trie, child, key, value, filename, file_priority, line_number, compat);
+ }
+
+ node = child;
+ i++;
+ }
+}
+
+struct trie_f {
+ FILE *f;
+ struct trie *trie;
+ uint64_t strings_off;
+
+ uint64_t nodes_count;
+ uint64_t children_count;
+ uint64_t values_count;
+};
+
+/* calculate the storage space for the nodes, children arrays, value arrays */
+static void trie_store_nodes_size(struct trie_f *trie, struct trie_node *node, bool compat) {
+ uint64_t i;
+
+ for (i = 0; i < node->children_count; i++)
+ trie_store_nodes_size(trie, node->children[i].child, compat);
+
+ trie->strings_off += sizeof(struct trie_node_f);
+ for (i = 0; i < node->children_count; i++)
+ trie->strings_off += sizeof(struct trie_child_entry_f);
+ for (i = 0; i < node->values_count; i++)
+ trie->strings_off += compat ? sizeof(struct trie_value_entry_f) : sizeof(struct trie_value_entry2_f);
+}
+
+static int64_t trie_store_nodes(struct trie_f *trie, struct trie_node *node, bool compat) {
+ uint64_t i;
+ struct trie_node_f n = {
+ .prefix_off = htole64(trie->strings_off + node->prefix_off),
+ .children_count = node->children_count,
+ .values_count = htole64(node->values_count),
+ };
+ _cleanup_free_ struct trie_child_entry_f *children = NULL;
+ int64_t node_off;
+
+ if (node->children_count) {
+ children = new(struct trie_child_entry_f, node->children_count);
+ if (!children)
+ return -ENOMEM;
+ }
+
+ /* post-order recursion */
+ for (i = 0; i < node->children_count; i++) {
+ int64_t child_off;
+
+ child_off = trie_store_nodes(trie, node->children[i].child, compat);
+ if (child_off < 0)
+ return child_off;
+
+ children[i] = (struct trie_child_entry_f) {
+ .c = node->children[i].c,
+ .child_off = htole64(child_off),
+ };
+ }
+
+ /* write node */
+ node_off = ftello(trie->f);
+ fwrite(&n, sizeof(struct trie_node_f), 1, trie->f);
+ trie->nodes_count++;
+
+ /* append children array */
+ if (node->children_count) {
+ fwrite(children, sizeof(struct trie_child_entry_f), node->children_count, trie->f);
+ trie->children_count += node->children_count;
+ }
+
+ /* append values array */
+ for (i = 0; i < node->values_count; i++) {
+ struct trie_value_entry2_f v = {
+ .key_off = htole64(trie->strings_off + node->values[i].key_off),
+ .value_off = htole64(trie->strings_off + node->values[i].value_off),
+ .filename_off = htole64(trie->strings_off + node->values[i].filename_off),
+ .line_number = htole32(node->values[i].line_number),
+ .file_priority = htole16(node->values[i].file_priority),
+ };
+
+ fwrite(&v, compat ? sizeof(struct trie_value_entry_f) : sizeof(struct trie_value_entry2_f), 1, trie->f);
+ }
+ trie->values_count += node->values_count;
+
+ return node_off;
+}
+
+static int trie_store(struct trie *trie, const char *filename, bool compat) {
+ struct trie_f t = {
+ .trie = trie,
+ };
+ _cleanup_free_ char *filename_tmp = NULL;
+ int64_t pos;
+ int64_t root_off;
+ int64_t size;
+ struct trie_header_f h = {
+ .signature = HWDB_SIG,
+ .tool_version = htole64(PROJECT_VERSION),
+ .header_size = htole64(sizeof(struct trie_header_f)),
+ .node_size = htole64(sizeof(struct trie_node_f)),
+ .child_entry_size = htole64(sizeof(struct trie_child_entry_f)),
+ .value_entry_size = htole64(compat ? sizeof(struct trie_value_entry_f) : sizeof(struct trie_value_entry2_f)),
+ };
+ int r;
+
+ /* calculate size of header, nodes, children entries, value entries */
+ t.strings_off = sizeof(struct trie_header_f);
+ trie_store_nodes_size(&t, trie->root, compat);
+
+ r = fopen_temporary(filename, &t.f, &filename_tmp);
+ if (r < 0)
+ return r;
+ fchmod(fileno(t.f), 0444);
+
+ /* write nodes */
+ if (fseeko(t.f, sizeof(struct trie_header_f), SEEK_SET) < 0)
+ goto error_fclose;
+
+ root_off = trie_store_nodes(&t, trie->root, compat);
+ h.nodes_root_off = htole64(root_off);
+ pos = ftello(t.f);
+ h.nodes_len = htole64(pos - sizeof(struct trie_header_f));
+
+ /* write string buffer */
+ fwrite(trie->strings->buf, trie->strings->len, 1, t.f);
+ h.strings_len = htole64(trie->strings->len);
+
+ /* write header */
+ size = ftello(t.f);
+ h.file_size = htole64(size);
+ if (fseeko(t.f, 0, SEEK_SET) < 0)
+ goto error_fclose;
+ fwrite(&h, sizeof(struct trie_header_f), 1, t.f);
+
+ if (ferror(t.f))
+ goto error_fclose;
+ if (fflush(t.f) < 0)
+ goto error_fclose;
+ if (fsync(fileno(t.f)) < 0)
+ goto error_fclose;
+ if (rename(filename_tmp, filename) < 0)
+ goto error_fclose;
+
+ /* write succeeded */
+ fclose(t.f);
+
+ log_debug("=== trie on-disk ===");
+ log_debug("size: %8"PRIi64" bytes", size);
+ log_debug("header: %8zu bytes", sizeof(struct trie_header_f));
+ log_debug("nodes: %8"PRIu64" bytes (%8"PRIu64")",
+ t.nodes_count * sizeof(struct trie_node_f), t.nodes_count);
+ log_debug("child pointers: %8"PRIu64" bytes (%8"PRIu64")",
+ t.children_count * sizeof(struct trie_child_entry_f), t.children_count);
+ log_debug("value pointers: %8"PRIu64" bytes (%8"PRIu64")",
+ t.values_count * (compat ? sizeof(struct trie_value_entry_f) : sizeof(struct trie_value_entry2_f)), t.values_count);
+ log_debug("string store: %8zu bytes", trie->strings->len);
+ log_debug("strings start: %8"PRIu64, t.strings_off);
+ return 0;
+
+ error_fclose:
+ r = -errno;
+ fclose(t.f);
+ unlink(filename_tmp);
+ return r;
+}
+
+static int insert_data(struct trie *trie, char **match_list, char *line, const char *filename,
+ uint16_t file_priority, uint32_t line_number, bool compat) {
+ char *value, **entry;
+
+ assert(line[0] == ' ');
+
+ value = strchr(line, '=');
+ if (!value)
+ return log_syntax(NULL, LOG_WARNING, filename, line_number, EINVAL,
+ "Key-value pair expected but got \"%s\", ignoring", line);
+
+ value[0] = '\0';
+ value++;
+
+ /* Replace multiple leading spaces by a single space */
+ while (isblank(line[0]) && isblank(line[1]))
+ line++;
+
+ if (isempty(line + 1) || isempty(value))
+ return log_syntax(NULL, LOG_WARNING, filename, line_number, EINVAL,
+ "Empty %s in \"%s=%s\", ignoring",
+ isempty(line + 1) ? "key" : "value",
+ line, value);
+
+ STRV_FOREACH(entry, match_list)
+ trie_insert(trie, trie->root, *entry, line, value, filename, file_priority, line_number, compat);
+
+ return 0;
+}
+
+static int import_file(struct trie *trie, const char *filename, uint16_t file_priority, bool compat) {
+ enum {
+ HW_NONE,
+ HW_MATCH,
+ HW_DATA,
+ } state = HW_NONE;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **match_list = NULL;
+ uint32_t line_number = 0;
+ char *match = NULL;
+ int r = 0, err;
+
+ f = fopen(filename, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ size_t len;
+ char *pos;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ ++line_number;
+
+ /* comment line */
+ if (line[0] == '#')
+ continue;
+
+ /* strip trailing comment */
+ pos = strchr(line, '#');
+ if (pos)
+ pos[0] = '\0';
+
+ /* strip trailing whitespace */
+ len = strlen(line);
+ while (len > 0 && isspace(line[len-1]))
+ len--;
+ line[len] = '\0';
+
+ switch (state) {
+ case HW_NONE:
+ if (len == 0)
+ break;
+
+ if (line[0] == ' ') {
+ log_syntax(NULL, LOG_WARNING, filename, line_number, EINVAL,
+ "Match expected but got indented property \"%s\", ignoring line", line);
+ r = -EINVAL;
+ break;
+ }
+
+ /* start of record, first match */
+ state = HW_MATCH;
+
+ match = strdup(line);
+ if (!match)
+ return -ENOMEM;
+
+ err = strv_consume(&match_list, match);
+ if (err < 0)
+ return err;
+
+ break;
+
+ case HW_MATCH:
+ if (len == 0) {
+ log_syntax(NULL, LOG_WARNING, filename, line_number, EINVAL,
+ "Property expected, ignoring record with no properties");
+ r = -EINVAL;
+ state = HW_NONE;
+ strv_clear(match_list);
+ break;
+ }
+
+ if (line[0] != ' ') {
+ /* another match */
+ match = strdup(line);
+ if (!match)
+ return -ENOMEM;
+
+ err = strv_consume(&match_list, match);
+ if (err < 0)
+ return err;
+
+ break;
+ }
+
+ /* first data */
+ state = HW_DATA;
+ err = insert_data(trie, match_list, line, filename, file_priority, line_number, compat);
+ if (err < 0)
+ r = err;
+ break;
+
+ case HW_DATA:
+ if (len == 0) {
+ /* end of record */
+ state = HW_NONE;
+ strv_clear(match_list);
+ break;
+ }
+
+ if (line[0] != ' ') {
+ log_syntax(NULL, LOG_WARNING, filename, line_number, EINVAL,
+ "Property or empty line expected, got \"%s\", ignoring record", line);
+ r = -EINVAL;
+ state = HW_NONE;
+ strv_clear(match_list);
+ break;
+ }
+
+ err = insert_data(trie, match_list, line, filename, file_priority, line_number, compat);
+ if (err < 0)
+ r = err;
+ break;
+ };
+ }
+
+ if (state == HW_MATCH)
+ log_syntax(NULL, LOG_WARNING, filename, line_number, EINVAL,
+ "Property expected, ignoring record with no properties");
+
+ return r;
+}
+
+int hwdb_update(const char *root, const char *hwdb_bin_dir, bool strict, bool compat) {
+ _cleanup_free_ char *hwdb_bin = NULL;
+ _cleanup_(trie_freep) struct trie *trie = NULL;
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ uint16_t file_priority = 1;
+ int r = 0, err;
+
+ /* The argument 'compat' controls the format version of database. If false, then hwdb.bin will be created with
+ * additional information such that priority, line number, and filename of database source. If true, then hwdb.bin
+ * will be created without the information. systemd-hwdb command should set the argument false, and 'udevadm hwdb'
+ * command should set it true. */
+
+ trie = new0(struct trie, 1);
+ if (!trie)
+ return -ENOMEM;
+
+ /* string store */
+ trie->strings = strbuf_new();
+ if (!trie->strings)
+ return -ENOMEM;
+
+ /* index */
+ trie->root = new0(struct trie_node, 1);
+ if (!trie->root)
+ return -ENOMEM;
+
+ trie->nodes_count++;
+
+ err = conf_files_list_strv(&files, ".hwdb", root, 0, conf_file_dirs);
+ if (err < 0)
+ return log_error_errno(err, "Failed to enumerate hwdb files: %m");
+
+ STRV_FOREACH(f, files) {
+ log_debug("Reading file \"%s\"", *f);
+ err = import_file(trie, *f, file_priority++, compat);
+ if (err < 0 && strict)
+ r = err;
+ }
+
+ strbuf_complete(trie->strings);
+
+ log_debug("=== trie in-memory ===");
+ log_debug("nodes: %8zu bytes (%8zu)",
+ trie->nodes_count * sizeof(struct trie_node), trie->nodes_count);
+ log_debug("children arrays: %8zu bytes (%8zu)",
+ trie->children_count * sizeof(struct trie_child_entry), trie->children_count);
+ log_debug("values arrays: %8zu bytes (%8zu)",
+ trie->values_count * sizeof(struct trie_value_entry), trie->values_count);
+ log_debug("strings: %8zu bytes",
+ trie->strings->len);
+ log_debug("strings incoming: %8zu bytes (%8zu)",
+ trie->strings->in_len, trie->strings->in_count);
+ log_debug("strings dedup'ed: %8zu bytes (%8zu)",
+ trie->strings->dedup_len, trie->strings->dedup_count);
+
+ hwdb_bin = path_join(root, hwdb_bin_dir ?: default_hwdb_bin_dir, "hwdb.bin");
+ if (!hwdb_bin)
+ return -ENOMEM;
+
+ mkdir_parents_label(hwdb_bin, 0755);
+ err = trie_store(trie, hwdb_bin, compat);
+ if (err < 0)
+ return log_error_errno(err, "Failed to write database %s: %m", hwdb_bin);
+
+ err = label_fix(hwdb_bin, 0);
+ if (err < 0)
+ return err;
+
+ return r;
+}
+
+int hwdb_query(const char *modalias) {
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL;
+ const char *key, *value;
+ int r;
+
+ assert(modalias);
+
+ r = sd_hwdb_new(&hwdb);
+ if (r < 0)
+ return r;
+
+ SD_HWDB_FOREACH_PROPERTY(hwdb, modalias, key, value)
+ printf("%s=%s\n", key, value);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-hwdb/hwdb-util.h b/src/libsystemd/sd-hwdb/hwdb-util.h
new file mode 100644
index 0000000..425b4b3
--- /dev/null
+++ b/src/libsystemd/sd-hwdb/hwdb-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-hwdb.h"
+
+bool hwdb_validate(sd_hwdb *hwdb);
+int hwdb_update(const char *root, const char *hwdb_bin_dir, bool strict, bool compat);
+int hwdb_query(const char *modalias);
diff --git a/src/libsystemd/sd-hwdb/sd-hwdb.c b/src/libsystemd/sd-hwdb/sd-hwdb.c
new file mode 100644
index 0000000..233944c
--- /dev/null
+++ b/src/libsystemd/sd-hwdb/sd-hwdb.c
@@ -0,0 +1,468 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2008 Alan Jenkins <alan.christopher.jenkins@googlemail.com>
+***/
+
+#include <errno.h>
+#include <fnmatch.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "sd-hwdb.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "hwdb-internal.h"
+#include "hwdb-util.h"
+#include "refcnt.h"
+#include "string-util.h"
+#include "util.h"
+
+struct sd_hwdb {
+ RefCount n_ref;
+
+ FILE *f;
+ struct stat st;
+ union {
+ struct trie_header_f *head;
+ const char *map;
+ };
+
+ OrderedHashmap *properties;
+ Iterator properties_iterator;
+ bool properties_modified;
+};
+
+struct linebuf {
+ char bytes[LINE_MAX];
+ size_t size;
+ size_t len;
+};
+
+static void linebuf_init(struct linebuf *buf) {
+ buf->size = 0;
+ buf->len = 0;
+}
+
+static const char *linebuf_get(struct linebuf *buf) {
+ if (buf->len + 1 >= sizeof(buf->bytes))
+ return NULL;
+ buf->bytes[buf->len] = '\0';
+ return buf->bytes;
+}
+
+static bool linebuf_add(struct linebuf *buf, const char *s, size_t len) {
+ if (buf->len + len >= sizeof(buf->bytes))
+ return false;
+ memcpy(buf->bytes + buf->len, s, len);
+ buf->len += len;
+ return true;
+}
+
+static bool linebuf_add_char(struct linebuf *buf, char c) {
+ if (buf->len + 1 >= sizeof(buf->bytes))
+ return false;
+ buf->bytes[buf->len++] = c;
+ return true;
+}
+
+static void linebuf_rem(struct linebuf *buf, size_t count) {
+ assert(buf->len >= count);
+ buf->len -= count;
+}
+
+static void linebuf_rem_char(struct linebuf *buf) {
+ linebuf_rem(buf, 1);
+}
+
+static const struct trie_child_entry_f *trie_node_child(sd_hwdb *hwdb, const struct trie_node_f *node, size_t idx) {
+ const char *base = (const char *)node;
+
+ base += le64toh(hwdb->head->node_size);
+ base += idx * le64toh(hwdb->head->child_entry_size);
+ return (const struct trie_child_entry_f *)base;
+}
+
+static const struct trie_value_entry_f *trie_node_value(sd_hwdb *hwdb, const struct trie_node_f *node, size_t idx) {
+ const char *base = (const char *)node;
+
+ base += le64toh(hwdb->head->node_size);
+ base += node->children_count * le64toh(hwdb->head->child_entry_size);
+ base += idx * le64toh(hwdb->head->value_entry_size);
+ return (const struct trie_value_entry_f *)base;
+}
+
+static const struct trie_node_f *trie_node_from_off(sd_hwdb *hwdb, le64_t off) {
+ return (const struct trie_node_f *)(hwdb->map + le64toh(off));
+}
+
+static const char *trie_string(sd_hwdb *hwdb, le64_t off) {
+ return hwdb->map + le64toh(off);
+}
+
+static int trie_children_cmp_f(const void *v1, const void *v2) {
+ const struct trie_child_entry_f *n1 = v1;
+ const struct trie_child_entry_f *n2 = v2;
+
+ return n1->c - n2->c;
+}
+
+static const struct trie_node_f *node_lookup_f(sd_hwdb *hwdb, const struct trie_node_f *node, uint8_t c) {
+ struct trie_child_entry_f *child;
+ struct trie_child_entry_f search;
+
+ search.c = c;
+ child = bsearch(&search, (const char *)node + le64toh(hwdb->head->node_size), node->children_count,
+ le64toh(hwdb->head->child_entry_size), trie_children_cmp_f);
+ if (child)
+ return trie_node_from_off(hwdb, child->child_off);
+ return NULL;
+}
+
+static int hwdb_add_property(sd_hwdb *hwdb, const struct trie_value_entry_f *entry) {
+ const char *key;
+ int r;
+
+ assert(hwdb);
+
+ key = trie_string(hwdb, entry->key_off);
+
+ /*
+ * Silently ignore all properties which do not start with a
+ * space; future extensions might use additional prefixes.
+ */
+ if (key[0] != ' ')
+ return 0;
+
+ key++;
+
+ if (le64toh(hwdb->head->value_entry_size) >= sizeof(struct trie_value_entry2_f)) {
+ const struct trie_value_entry2_f *old, *entry2;
+
+ entry2 = (const struct trie_value_entry2_f *)entry;
+ old = ordered_hashmap_get(hwdb->properties, key);
+ if (old) {
+ /* On duplicates, we order by filename priority and line-number.
+ *
+ * v2 of the format had 64 bits for the line number.
+ * v3 reuses top 32 bits of line_number to store the priority.
+ * We check the top bits — if they are zero we have v2 format.
+ * This means that v2 clients will print wrong line numbers with
+ * v3 data.
+ *
+ * For v3 data: we compare the priority (of the source file)
+ * and the line number.
+ *
+ * For v2 data: we rely on the fact that the filenames in the hwdb
+ * are added in the order of priority (higher later), because they
+ * are *processed* in the order of priority. So we compare the
+ * indices to determine which file had higher priority. Comparing
+ * the strings alphabetically would be useless, because those are
+ * full paths, and e.g. /usr/lib would sort after /etc, even
+ * though it has lower priority. This is not reliable because of
+ * suffix compression, but should work for the most common case of
+ * /usr/lib/udev/hwbd.d and /etc/udev/hwdb.d, and is better than
+ * not doing the comparison at all.
+ */
+ bool lower;
+
+ if (entry2->file_priority == 0)
+ lower = entry2->filename_off < old->filename_off ||
+ (entry2->filename_off == old->filename_off && entry2->line_number < old->line_number);
+ else
+ lower = entry2->file_priority < old->file_priority ||
+ (entry2->file_priority == old->file_priority && entry2->line_number < old->line_number);
+ if (lower)
+ return 0;
+ }
+ }
+
+ r = ordered_hashmap_ensure_allocated(&hwdb->properties, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_replace(hwdb->properties, key, (void *)entry);
+ if (r < 0)
+ return r;
+
+ hwdb->properties_modified = true;
+
+ return 0;
+}
+
+static int trie_fnmatch_f(sd_hwdb *hwdb, const struct trie_node_f *node, size_t p,
+ struct linebuf *buf, const char *search) {
+ size_t len;
+ size_t i;
+ const char *prefix;
+ int err;
+
+ prefix = trie_string(hwdb, node->prefix_off);
+ len = strlen(prefix + p);
+ linebuf_add(buf, prefix + p, len);
+
+ for (i = 0; i < node->children_count; i++) {
+ const struct trie_child_entry_f *child = trie_node_child(hwdb, node, i);
+
+ linebuf_add_char(buf, child->c);
+ err = trie_fnmatch_f(hwdb, trie_node_from_off(hwdb, child->child_off), 0, buf, search);
+ if (err < 0)
+ return err;
+ linebuf_rem_char(buf);
+ }
+
+ if (le64toh(node->values_count) && fnmatch(linebuf_get(buf), search, 0) == 0)
+ for (i = 0; i < le64toh(node->values_count); i++) {
+ err = hwdb_add_property(hwdb, trie_node_value(hwdb, node, i));
+ if (err < 0)
+ return err;
+ }
+
+ linebuf_rem(buf, len);
+ return 0;
+}
+
+static int trie_search_f(sd_hwdb *hwdb, const char *search) {
+ struct linebuf buf;
+ const struct trie_node_f *node;
+ size_t i = 0;
+ int err;
+
+ linebuf_init(&buf);
+
+ node = trie_node_from_off(hwdb, hwdb->head->nodes_root_off);
+ while (node) {
+ const struct trie_node_f *child;
+ size_t p = 0;
+
+ if (node->prefix_off) {
+ char c;
+
+ for (; (c = trie_string(hwdb, node->prefix_off)[p]); p++) {
+ if (IN_SET(c, '*', '?', '['))
+ return trie_fnmatch_f(hwdb, node, p, &buf, search + i + p);
+ if (c != search[i + p])
+ return 0;
+ }
+ i += p;
+ }
+
+ child = node_lookup_f(hwdb, node, '*');
+ if (child) {
+ linebuf_add_char(&buf, '*');
+ err = trie_fnmatch_f(hwdb, child, 0, &buf, search + i);
+ if (err < 0)
+ return err;
+ linebuf_rem_char(&buf);
+ }
+
+ child = node_lookup_f(hwdb, node, '?');
+ if (child) {
+ linebuf_add_char(&buf, '?');
+ err = trie_fnmatch_f(hwdb, child, 0, &buf, search + i);
+ if (err < 0)
+ return err;
+ linebuf_rem_char(&buf);
+ }
+
+ child = node_lookup_f(hwdb, node, '[');
+ if (child) {
+ linebuf_add_char(&buf, '[');
+ err = trie_fnmatch_f(hwdb, child, 0, &buf, search + i);
+ if (err < 0)
+ return err;
+ linebuf_rem_char(&buf);
+ }
+
+ if (search[i] == '\0') {
+ size_t n;
+
+ for (n = 0; n < le64toh(node->values_count); n++) {
+ err = hwdb_add_property(hwdb, trie_node_value(hwdb, node, n));
+ if (err < 0)
+ return err;
+ }
+ return 0;
+ }
+
+ child = node_lookup_f(hwdb, node, search[i]);
+ node = child;
+ i++;
+ }
+ return 0;
+}
+
+static const char hwdb_bin_paths[] =
+ "/etc/systemd/hwdb/hwdb.bin\0"
+ "/etc/udev/hwdb.bin\0"
+ "/usr/lib/systemd/hwdb/hwdb.bin\0"
+#if HAVE_SPLIT_USR
+ "/lib/systemd/hwdb/hwdb.bin\0"
+#endif
+ UDEVLIBEXECDIR "/hwdb.bin\0";
+
+_public_ int sd_hwdb_new(sd_hwdb **ret) {
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL;
+ const char *hwdb_bin_path;
+ const char sig[] = HWDB_SIG;
+
+ assert_return(ret, -EINVAL);
+
+ hwdb = new0(sd_hwdb, 1);
+ if (!hwdb)
+ return -ENOMEM;
+
+ hwdb->n_ref = REFCNT_INIT;
+
+ /* find hwdb.bin in hwdb_bin_paths */
+ NULSTR_FOREACH(hwdb_bin_path, hwdb_bin_paths) {
+ hwdb->f = fopen(hwdb_bin_path, "re");
+ if (hwdb->f)
+ break;
+ else if (errno == ENOENT)
+ continue;
+ else
+ return log_debug_errno(errno, "Failed to open %s: %m", hwdb_bin_path);
+ }
+
+ if (!hwdb->f) {
+ log_debug("hwdb.bin does not exist, please run 'systemd-hwdb update'");
+ return -ENOENT;
+ }
+
+ if (fstat(fileno(hwdb->f), &hwdb->st) < 0 ||
+ (size_t) hwdb->st.st_size < offsetof(struct trie_header_f, strings_len) + 8)
+ return log_debug_errno(errno, "Failed to read %s: %m", hwdb_bin_path);
+
+ hwdb->map = mmap(0, hwdb->st.st_size, PROT_READ, MAP_SHARED, fileno(hwdb->f), 0);
+ if (hwdb->map == MAP_FAILED)
+ return log_debug_errno(errno, "Failed to map %s: %m", hwdb_bin_path);
+
+ if (memcmp(hwdb->map, sig, sizeof(hwdb->head->signature)) != 0 ||
+ (size_t) hwdb->st.st_size != le64toh(hwdb->head->file_size)) {
+ log_debug("Failed to recognize the format of %s", hwdb_bin_path);
+ return -EINVAL;
+ }
+
+ log_debug("=== trie on-disk ===");
+ log_debug("tool version: %"PRIu64, le64toh(hwdb->head->tool_version));
+ log_debug("file size: %8"PRIi64" bytes", hwdb->st.st_size);
+ log_debug("header size %8"PRIu64" bytes", le64toh(hwdb->head->header_size));
+ log_debug("strings %8"PRIu64" bytes", le64toh(hwdb->head->strings_len));
+ log_debug("nodes %8"PRIu64" bytes", le64toh(hwdb->head->nodes_len));
+
+ *ret = TAKE_PTR(hwdb);
+
+ return 0;
+}
+
+static sd_hwdb *hwdb_free(sd_hwdb *hwdb) {
+ assert(hwdb);
+
+ if (hwdb->map)
+ munmap((void *)hwdb->map, hwdb->st.st_size);
+ safe_fclose(hwdb->f);
+ ordered_hashmap_free(hwdb->properties);
+ return mfree(hwdb);
+}
+
+DEFINE_PUBLIC_ATOMIC_REF_UNREF_FUNC(sd_hwdb, sd_hwdb, hwdb_free)
+
+bool hwdb_validate(sd_hwdb *hwdb) {
+ bool found = false;
+ const char* p;
+ struct stat st;
+
+ if (!hwdb)
+ return false;
+ if (!hwdb->f)
+ return false;
+
+ /* if hwdb.bin doesn't exist anywhere, we need to update */
+ NULSTR_FOREACH(p, hwdb_bin_paths) {
+ if (stat(p, &st) >= 0) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return true;
+
+ if (timespec_load(&hwdb->st.st_mtim) != timespec_load(&st.st_mtim))
+ return true;
+ return false;
+}
+
+static int properties_prepare(sd_hwdb *hwdb, const char *modalias) {
+ assert(hwdb);
+ assert(modalias);
+
+ ordered_hashmap_clear(hwdb->properties);
+ hwdb->properties_modified = true;
+
+ return trie_search_f(hwdb, modalias);
+}
+
+_public_ int sd_hwdb_get(sd_hwdb *hwdb, const char *modalias, const char *key, const char **_value) {
+ const struct trie_value_entry_f *entry;
+ int r;
+
+ assert_return(hwdb, -EINVAL);
+ assert_return(hwdb->f, -EINVAL);
+ assert_return(modalias, -EINVAL);
+ assert_return(_value, -EINVAL);
+
+ r = properties_prepare(hwdb, modalias);
+ if (r < 0)
+ return r;
+
+ entry = ordered_hashmap_get(hwdb->properties, key);
+ if (!entry)
+ return -ENOENT;
+
+ *_value = trie_string(hwdb, entry->value_off);
+
+ return 0;
+}
+
+_public_ int sd_hwdb_seek(sd_hwdb *hwdb, const char *modalias) {
+ int r;
+
+ assert_return(hwdb, -EINVAL);
+ assert_return(hwdb->f, -EINVAL);
+ assert_return(modalias, -EINVAL);
+
+ r = properties_prepare(hwdb, modalias);
+ if (r < 0)
+ return r;
+
+ hwdb->properties_modified = false;
+ hwdb->properties_iterator = ITERATOR_FIRST;
+
+ return 0;
+}
+
+_public_ int sd_hwdb_enumerate(sd_hwdb *hwdb, const char **key, const char **value) {
+ const struct trie_value_entry_f *entry;
+ const void *k;
+
+ assert_return(hwdb, -EINVAL);
+ assert_return(key, -EINVAL);
+ assert_return(value, -EINVAL);
+
+ if (hwdb->properties_modified)
+ return -EAGAIN;
+
+ ordered_hashmap_iterate(hwdb->properties, &hwdb->properties_iterator, (void **)&entry, &k);
+ if (!k)
+ return 0;
+
+ *key = k;
+ *value = trie_string(hwdb, entry->value_off);
+
+ return 1;
+}
diff --git a/src/libsystemd/sd-id128/id128-util.c b/src/libsystemd/sd-id128/id128-util.c
new file mode 100644
index 0000000..9ffd594
--- /dev/null
+++ b/src/libsystemd/sd-id128/id128-util.c
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "stdio-util.h"
+
+char *id128_to_uuid_string(sd_id128_t id, char s[37]) {
+ unsigned n, k = 0;
+
+ assert(s);
+
+ /* Similar to sd_id128_to_string() but formats the result as UUID instead of plain hex chars */
+
+ for (n = 0; n < 16; n++) {
+
+ if (IN_SET(n, 4, 6, 8, 10))
+ s[k++] = '-';
+
+ s[k++] = hexchar(id.bytes[n] >> 4);
+ s[k++] = hexchar(id.bytes[n] & 0xF);
+ }
+
+ assert(k == 36);
+
+ s[k] = 0;
+
+ return s;
+}
+
+bool id128_is_valid(const char *s) {
+ size_t i, l;
+
+ assert(s);
+
+ l = strlen(s);
+ if (l == 32) {
+
+ /* Plain formatted 128bit hex string */
+
+ for (i = 0; i < l; i++) {
+ char c = s[i];
+
+ if (!(c >= '0' && c <= '9') &&
+ !(c >= 'a' && c <= 'z') &&
+ !(c >= 'A' && c <= 'Z'))
+ return false;
+ }
+
+ } else if (l == 36) {
+
+ /* Formatted UUID */
+
+ for (i = 0; i < l; i++) {
+ char c = s[i];
+
+ if (IN_SET(i, 8, 13, 18, 23)) {
+ if (c != '-')
+ return false;
+ } else {
+ if (!(c >= '0' && c <= '9') &&
+ !(c >= 'a' && c <= 'z') &&
+ !(c >= 'A' && c <= 'Z'))
+ return false;
+ }
+ }
+
+ } else
+ return false;
+
+ return true;
+}
+
+int id128_read_fd(int fd, Id128Format f, sd_id128_t *ret) {
+ char buffer[36 + 2];
+ ssize_t l;
+
+ assert(fd >= 0);
+ assert(f < _ID128_FORMAT_MAX);
+
+ /* Reads an 128bit ID from a file, which may either be in plain format (32 hex digits), or in UUID format, both
+ * optionally followed by a newline and nothing else. ID files should really be newline terminated, but if they
+ * aren't that's OK too, following the rule of "Be conservative in what you send, be liberal in what you
+ * accept". */
+
+ l = loop_read(fd, buffer, sizeof(buffer), false); /* we expect a short read of either 32/33 or 36/37 chars */
+ if (l < 0)
+ return (int) l;
+ if (l == 0) /* empty? */
+ return -ENOMEDIUM;
+
+ switch (l) {
+
+ case 33: /* plain UUID with trailing newline */
+ if (buffer[32] != '\n')
+ return -EINVAL;
+
+ _fallthrough_;
+ case 32: /* plain UUID without trailing newline */
+ if (f == ID128_UUID)
+ return -EINVAL;
+
+ buffer[32] = 0;
+ break;
+
+ case 37: /* RFC UUID with trailing newline */
+ if (buffer[36] != '\n')
+ return -EINVAL;
+
+ _fallthrough_;
+ case 36: /* RFC UUID without trailing newline */
+ if (f == ID128_PLAIN)
+ return -EINVAL;
+
+ buffer[36] = 0;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return sd_id128_from_string(buffer, ret);
+}
+
+int id128_read(const char *p, Id128Format f, sd_id128_t *ret) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return -errno;
+
+ return id128_read_fd(fd, f, ret);
+}
+
+int id128_write_fd(int fd, Id128Format f, sd_id128_t id, bool do_sync) {
+ char buffer[36 + 2];
+ size_t sz;
+ int r;
+
+ assert(fd >= 0);
+ assert(f < _ID128_FORMAT_MAX);
+
+ if (f != ID128_UUID) {
+ sd_id128_to_string(id, buffer);
+ buffer[32] = '\n';
+ sz = 33;
+ } else {
+ id128_to_uuid_string(id, buffer);
+ buffer[36] = '\n';
+ sz = 37;
+ }
+
+ r = loop_write(fd, buffer, sz, false);
+ if (r < 0)
+ return r;
+
+ if (do_sync) {
+ if (fsync(fd) < 0)
+ return -errno;
+
+ r = fsync_directory_of_file(fd);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int id128_write(const char *p, Id128Format f, sd_id128_t id, bool do_sync) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(p, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_TRUNC, 0444);
+ if (fd < 0)
+ return -errno;
+
+ return id128_write_fd(fd, f, id, do_sync);
+}
+
+void id128_hash_func(const sd_id128_t *p, struct siphash *state) {
+ siphash24_compress(p, sizeof(sd_id128_t), state);
+}
+
+int id128_compare_func(const sd_id128_t *a, const sd_id128_t *b) {
+ return memcmp(a, b, 16);
+}
+
+DEFINE_HASH_OPS(id128_hash_ops, sd_id128_t, id128_hash_func, id128_compare_func);
diff --git a/src/libsystemd/sd-id128/id128-util.h b/src/libsystemd/sd-id128/id128-util.h
new file mode 100644
index 0000000..65f14ab
--- /dev/null
+++ b/src/libsystemd/sd-id128/id128-util.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+#include "hash-funcs.h"
+#include "macro.h"
+
+char *id128_to_uuid_string(sd_id128_t id, char s[37]);
+
+/* Like SD_ID128_FORMAT_STR, but formats as UUID, not in plain format */
+#define ID128_UUID_FORMAT_STR "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x"
+
+bool id128_is_valid(const char *s) _pure_;
+
+typedef enum Id128Format {
+ ID128_ANY,
+ ID128_PLAIN, /* formatted as 32 hex chars as-is */
+ ID128_UUID, /* formatted as 36 character uuid string */
+ _ID128_FORMAT_MAX,
+} Id128Format;
+
+int id128_read_fd(int fd, Id128Format f, sd_id128_t *ret);
+int id128_read(const char *p, Id128Format f, sd_id128_t *ret);
+
+int id128_write_fd(int fd, Id128Format f, sd_id128_t id, bool do_sync);
+int id128_write(const char *p, Id128Format f, sd_id128_t id, bool do_sync);
+
+void id128_hash_func(const sd_id128_t *p, struct siphash *state);
+int id128_compare_func(const sd_id128_t *a, const sd_id128_t *b) _pure_;
+extern const struct hash_ops id128_hash_ops;
diff --git a/src/libsystemd/sd-id128/sd-id128.c b/src/libsystemd/sd-id128/sd-id128.c
new file mode 100644
index 0000000..e72af15
--- /dev/null
+++ b/src/libsystemd/sd-id128/sd-id128.c
@@ -0,0 +1,340 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "khash.h"
+#include "macro.h"
+#include "missing.h"
+#include "random-util.h"
+#include "user-util.h"
+#include "util.h"
+
+_public_ char *sd_id128_to_string(sd_id128_t id, char s[_SD_ARRAY_STATIC SD_ID128_STRING_MAX]) {
+ unsigned n;
+
+ assert_return(s, NULL);
+
+ for (n = 0; n < 16; n++) {
+ s[n*2] = hexchar(id.bytes[n] >> 4);
+ s[n*2+1] = hexchar(id.bytes[n] & 0xF);
+ }
+
+ s[32] = 0;
+
+ return s;
+}
+
+_public_ int sd_id128_from_string(const char s[], sd_id128_t *ret) {
+ unsigned n, i;
+ sd_id128_t t;
+ bool is_guid = false;
+
+ assert_return(s, -EINVAL);
+
+ for (n = 0, i = 0; n < 16;) {
+ int a, b;
+
+ if (s[i] == '-') {
+ /* Is this a GUID? Then be nice, and skip over
+ * the dashes */
+
+ if (i == 8)
+ is_guid = true;
+ else if (IN_SET(i, 13, 18, 23)) {
+ if (!is_guid)
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ i++;
+ continue;
+ }
+
+ a = unhexchar(s[i++]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unhexchar(s[i++]);
+ if (b < 0)
+ return -EINVAL;
+
+ t.bytes[n++] = (a << 4) | b;
+ }
+
+ if (i != (is_guid ? 36 : 32))
+ return -EINVAL;
+
+ if (s[i] != 0)
+ return -EINVAL;
+
+ if (ret)
+ *ret = t;
+ return 0;
+}
+
+_public_ int sd_id128_get_machine(sd_id128_t *ret) {
+ static thread_local sd_id128_t saved_machine_id = {};
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (sd_id128_is_null(saved_machine_id)) {
+ r = id128_read("/etc/machine-id", ID128_PLAIN, &saved_machine_id);
+ if (r < 0)
+ return r;
+
+ if (sd_id128_is_null(saved_machine_id))
+ return -ENOMEDIUM;
+ }
+
+ *ret = saved_machine_id;
+ return 0;
+}
+
+_public_ int sd_id128_get_boot(sd_id128_t *ret) {
+ static thread_local sd_id128_t saved_boot_id = {};
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (sd_id128_is_null(saved_boot_id)) {
+ r = id128_read("/proc/sys/kernel/random/boot_id", ID128_UUID, &saved_boot_id);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = saved_boot_id;
+ return 0;
+}
+
+static int get_invocation_from_keyring(sd_id128_t *ret) {
+
+ _cleanup_free_ char *description = NULL;
+ char *d, *p, *g, *u, *e;
+ unsigned long perms;
+ key_serial_t key;
+ size_t sz = 256;
+ uid_t uid;
+ gid_t gid;
+ int r, c;
+
+#define MAX_PERMS ((unsigned long) (KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH| \
+ KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH))
+
+ assert(ret);
+
+ key = request_key("user", "invocation_id", NULL, 0);
+ if (key == -1) {
+ /* Keyring support not available? No invocation key stored? */
+ if (IN_SET(errno, ENOSYS, ENOKEY))
+ return 0;
+
+ return -errno;
+ }
+
+ for (;;) {
+ description = new(char, sz);
+ if (!description)
+ return -ENOMEM;
+
+ c = keyctl(KEYCTL_DESCRIBE, key, (unsigned long) description, sz, 0);
+ if (c < 0)
+ return -errno;
+
+ if ((size_t) c <= sz)
+ break;
+
+ sz = c;
+ free(description);
+ }
+
+ /* The kernel returns a final NUL in the string, verify that. */
+ assert(description[c-1] == 0);
+
+ /* Chop off the final description string */
+ d = strrchr(description, ';');
+ if (!d)
+ return -EIO;
+ *d = 0;
+
+ /* Look for the permissions */
+ p = strrchr(description, ';');
+ if (!p)
+ return -EIO;
+
+ errno = 0;
+ perms = strtoul(p + 1, &e, 16);
+ if (errno > 0)
+ return -errno;
+ if (e == p + 1) /* Read at least one character */
+ return -EIO;
+ if (e != d) /* Must reached the end */
+ return -EIO;
+
+ if ((perms & ~MAX_PERMS) != 0)
+ return -EPERM;
+
+ *p = 0;
+
+ /* Look for the group ID */
+ g = strrchr(description, ';');
+ if (!g)
+ return -EIO;
+ r = parse_gid(g + 1, &gid);
+ if (r < 0)
+ return r;
+ if (gid != 0)
+ return -EPERM;
+ *g = 0;
+
+ /* Look for the user ID */
+ u = strrchr(description, ';');
+ if (!u)
+ return -EIO;
+ r = parse_uid(u + 1, &uid);
+ if (r < 0)
+ return r;
+ if (uid != 0)
+ return -EPERM;
+
+ c = keyctl(KEYCTL_READ, key, (unsigned long) ret, sizeof(sd_id128_t), 0);
+ if (c < 0)
+ return -errno;
+ if (c != sizeof(sd_id128_t))
+ return -EIO;
+
+ return 1;
+}
+
+_public_ int sd_id128_get_invocation(sd_id128_t *ret) {
+ static thread_local sd_id128_t saved_invocation_id = {};
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (sd_id128_is_null(saved_invocation_id)) {
+
+ /* We first try to read the invocation ID from the kernel keyring. This has the benefit that it is not
+ * fakeable by unprivileged code. If the information is not available in the keyring, we use
+ * $INVOCATION_ID but ignore the data if our process was called by less privileged code
+ * (i.e. secure_getenv() instead of getenv()).
+ *
+ * The kernel keyring is only relevant for system services (as for user services we don't store the
+ * invocation ID in the keyring, as there'd be no trust benefit in that). The environment variable is
+ * primarily relevant for user services, and sufficiently safe as no privilege boundary is involved. */
+
+ r = get_invocation_from_keyring(&saved_invocation_id);
+ if (r < 0)
+ return r;
+
+ if (r == 0) {
+ const char *e;
+
+ e = secure_getenv("INVOCATION_ID");
+ if (!e)
+ return -ENXIO;
+
+ r = sd_id128_from_string(e, &saved_invocation_id);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *ret = saved_invocation_id;
+ return 0;
+}
+
+static sd_id128_t make_v4_uuid(sd_id128_t id) {
+ /* Stolen from generate_random_uuid() of drivers/char/random.c
+ * in the kernel sources */
+
+ /* Set UUID version to 4 --- truly random generation */
+ id.bytes[6] = (id.bytes[6] & 0x0F) | 0x40;
+
+ /* Set the UUID variant to DCE */
+ id.bytes[8] = (id.bytes[8] & 0x3F) | 0x80;
+
+ return id;
+}
+
+_public_ int sd_id128_randomize(sd_id128_t *ret) {
+ sd_id128_t t;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ /* We allow usage if x86-64 RDRAND here. It might not be trusted enough for keeping secrets, but it should be
+ * fine for UUIDS. */
+ r = genuine_random_bytes(&t, sizeof t, RANDOM_ALLOW_RDRAND);
+ if (r < 0)
+ return r;
+
+ /* Turn this into a valid v4 UUID, to be nice. Note that we
+ * only guarantee this for newly generated UUIDs, not for
+ * pre-existing ones. */
+
+ *ret = make_v4_uuid(t);
+ return 0;
+}
+
+static int get_app_specific(sd_id128_t base, sd_id128_t app_id, sd_id128_t *ret) {
+ _cleanup_(khash_unrefp) khash *h = NULL;
+ sd_id128_t result;
+ const void *p;
+ int r;
+
+ assert(ret);
+
+ r = khash_new_with_key(&h, "hmac(sha256)", &base, sizeof(base));
+ if (r < 0)
+ return r;
+
+ r = khash_put(h, &app_id, sizeof(app_id));
+ if (r < 0)
+ return r;
+
+ r = khash_digest_data(h, &p);
+ if (r < 0)
+ return r;
+
+ /* We chop off the trailing 16 bytes */
+ memcpy(&result, p, MIN(khash_get_size(h), sizeof(result)));
+
+ *ret = make_v4_uuid(result);
+ return 0;
+}
+
+_public_ int sd_id128_get_machine_app_specific(sd_id128_t app_id, sd_id128_t *ret) {
+ sd_id128_t id;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return r;
+
+ return get_app_specific(id, app_id, ret);
+}
+
+_public_ int sd_id128_get_boot_app_specific(sd_id128_t app_id, sd_id128_t *ret) {
+ sd_id128_t id;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return r;
+
+ return get_app_specific(id, app_id, ret);
+}
diff --git a/src/libsystemd/sd-login/sd-login.c b/src/libsystemd/sd-login/sd-login.c
new file mode 100644
index 0000000..07f21e8
--- /dev/null
+++ b/src/libsystemd/sd-login/sd-login.c
@@ -0,0 +1,1060 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <poll.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+
+#include "sd-login.h"
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "io-util.h"
+#include "login-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+/* Error codes:
+ *
+ * invalid input parameters → -EINVAL
+ * invalid fd → -EBADF
+ * process does not exist → -ESRCH
+ * cgroup does not exist → -ENOENT
+ * machine, session does not exist → -ENXIO
+ * requested metadata on object is missing → -ENODATA
+ */
+
+_public_ int sd_pid_get_session(pid_t pid, char **session) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(session, -EINVAL);
+
+ r = cg_pid_get_session(pid, session);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_unit(pid_t pid, char **unit) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(unit, -EINVAL);
+
+ r = cg_pid_get_unit(pid, unit);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_user_unit(pid_t pid, char **unit) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(unit, -EINVAL);
+
+ r = cg_pid_get_user_unit(pid, unit);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_machine_name(pid_t pid, char **name) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(name, -EINVAL);
+
+ r = cg_pid_get_machine_name(pid, name);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_slice(pid_t pid, char **slice) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(slice, -EINVAL);
+
+ r = cg_pid_get_slice(pid, slice);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_user_slice(pid_t pid, char **slice) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(slice, -EINVAL);
+
+ r = cg_pid_get_user_slice(pid, slice);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_owner_uid(pid_t pid, uid_t *uid) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(uid, -EINVAL);
+
+ r = cg_pid_get_owner_uid(pid, uid);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_cgroup(pid_t pid, char **cgroup) {
+ char *c;
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(cgroup, -EINVAL);
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &c);
+ if (r < 0)
+ return r;
+
+ /* The internal APIs return the empty string for the root
+ * cgroup, let's return the "/" in the public APIs instead, as
+ * that's easier and less ambiguous for people to grok. */
+ if (isempty(c)) {
+ free(c);
+ c = strdup("/");
+ if (!c)
+ return -ENOMEM;
+
+ }
+
+ *cgroup = c;
+ return 0;
+}
+
+_public_ int sd_peer_get_session(int fd, char **session) {
+ struct ucred ucred = {};
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(session, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_session(ucred.pid, session);
+}
+
+_public_ int sd_peer_get_owner_uid(int fd, uid_t *uid) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(uid, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_owner_uid(ucred.pid, uid);
+}
+
+_public_ int sd_peer_get_unit(int fd, char **unit) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(unit, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_unit(ucred.pid, unit);
+}
+
+_public_ int sd_peer_get_user_unit(int fd, char **unit) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(unit, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_user_unit(ucred.pid, unit);
+}
+
+_public_ int sd_peer_get_machine_name(int fd, char **machine) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(machine, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_machine_name(ucred.pid, machine);
+}
+
+_public_ int sd_peer_get_slice(int fd, char **slice) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(slice, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_slice(ucred.pid, slice);
+}
+
+_public_ int sd_peer_get_user_slice(int fd, char **slice) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(slice, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_user_slice(ucred.pid, slice);
+}
+
+_public_ int sd_peer_get_cgroup(int fd, char **cgroup) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(cgroup, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return sd_pid_get_cgroup(ucred.pid, cgroup);
+}
+
+static int file_of_uid(uid_t uid, char **p) {
+
+ assert_return(uid_is_valid(uid), -EINVAL);
+ assert(p);
+
+ if (asprintf(p, "/run/systemd/users/" UID_FMT, uid) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+_public_ int sd_uid_get_state(uid_t uid, char**state) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert_return(state, -EINVAL);
+
+ r = file_of_uid(uid, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "STATE", &s);
+ if (r == -ENOENT) {
+ r = free_and_strdup(&s, "offline");
+ if (r < 0)
+ return r;
+ } else if (r < 0)
+ return r;
+ else if (isempty(s))
+ return -EIO;
+
+ *state = TAKE_PTR(s);
+ return 0;
+}
+
+_public_ int sd_uid_get_display(uid_t uid, char **session) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert_return(session, -EINVAL);
+
+ r = file_of_uid(uid, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "DISPLAY", &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ *session = TAKE_PTR(s);
+
+ return 0;
+}
+
+static int file_of_seat(const char *seat, char **_p) {
+ char *p;
+ int r;
+
+ assert(_p);
+
+ if (seat) {
+ if (!filename_is_valid(seat))
+ return -EINVAL;
+
+ p = strappend("/run/systemd/seats/", seat);
+ } else {
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_session_get_seat(NULL, &buf);
+ if (r < 0)
+ return r;
+
+ p = strappend("/run/systemd/seats/", buf);
+ }
+
+ if (!p)
+ return -ENOMEM;
+
+ *_p = TAKE_PTR(p);
+ return 0;
+}
+
+_public_ int sd_uid_is_on_seat(uid_t uid, int require_active, const char *seat) {
+ _cleanup_free_ char *t = NULL, *s = NULL, *p = NULL;
+ size_t l;
+ int r;
+ const char *word, *variable, *state;
+
+ assert_return(uid_is_valid(uid), -EINVAL);
+
+ r = file_of_seat(seat, &p);
+ if (r < 0)
+ return r;
+
+ variable = require_active ? "ACTIVE_UID" : "UIDS";
+
+ r = parse_env_file(NULL, p, variable, &s);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return 0;
+
+ if (asprintf(&t, UID_FMT, uid) < 0)
+ return -ENOMEM;
+
+ FOREACH_WORD(word, l, s, state)
+ if (strneq(t, word, l))
+ return 1;
+
+ return 0;
+}
+
+static int uid_get_array(uid_t uid, const char *variable, char ***array) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ char **a;
+ int r;
+
+ assert(variable);
+
+ r = file_of_uid(uid, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, variable, &s);
+ if (r == -ENOENT || (r >= 0 && isempty(s))) {
+ if (array)
+ *array = NULL;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ a = strv_split(s, " ");
+ if (!a)
+ return -ENOMEM;
+
+ strv_uniq(a);
+ r = (int) strv_length(a);
+
+ if (array)
+ *array = a;
+ else
+ strv_free(a);
+
+ return r;
+}
+
+_public_ int sd_uid_get_sessions(uid_t uid, int require_active, char ***sessions) {
+ return uid_get_array(
+ uid,
+ require_active == 0 ? "ONLINE_SESSIONS" :
+ require_active > 0 ? "ACTIVE_SESSIONS" :
+ "SESSIONS",
+ sessions);
+}
+
+_public_ int sd_uid_get_seats(uid_t uid, int require_active, char ***seats) {
+ return uid_get_array(
+ uid,
+ require_active == 0 ? "ONLINE_SEATS" :
+ require_active > 0 ? "ACTIVE_SEATS" :
+ "SEATS",
+ seats);
+}
+
+static int file_of_session(const char *session, char **_p) {
+ char *p;
+ int r;
+
+ assert(_p);
+
+ if (session) {
+ if (!session_id_valid(session))
+ return -EINVAL;
+
+ p = strappend("/run/systemd/sessions/", session);
+ } else {
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_pid_get_session(0, &buf);
+ if (r < 0)
+ return r;
+
+ p = strappend("/run/systemd/sessions/", buf);
+ }
+
+ if (!p)
+ return -ENOMEM;
+
+ *_p = p;
+ return 0;
+}
+
+_public_ int sd_session_is_active(const char *session) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "ACTIVE", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -EIO;
+
+ return parse_boolean(s);
+}
+
+_public_ int sd_session_is_remote(const char *session) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "REMOTE", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ return parse_boolean(s);
+}
+
+_public_ int sd_session_get_state(const char *session, char **state) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert_return(state, -EINVAL);
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "STATE", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -EIO;
+
+ *state = TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_session_get_uid(const char *session, uid_t *uid) {
+ int r;
+ _cleanup_free_ char *p = NULL, *s = NULL;
+
+ assert_return(uid, -EINVAL);
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "UID", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -EIO;
+
+ return parse_uid(s, uid);
+}
+
+static int session_get_string(const char *session, const char *field, char **value) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert_return(value, -EINVAL);
+ assert(field);
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, field, &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ *value = TAKE_PTR(s);
+ return 0;
+}
+
+_public_ int sd_session_get_seat(const char *session, char **seat) {
+ return session_get_string(session, "SEAT", seat);
+}
+
+_public_ int sd_session_get_tty(const char *session, char **tty) {
+ return session_get_string(session, "TTY", tty);
+}
+
+_public_ int sd_session_get_vt(const char *session, unsigned *vtnr) {
+ _cleanup_free_ char *vtnr_string = NULL;
+ unsigned u;
+ int r;
+
+ assert_return(vtnr, -EINVAL);
+
+ r = session_get_string(session, "VTNR", &vtnr_string);
+ if (r < 0)
+ return r;
+
+ r = safe_atou(vtnr_string, &u);
+ if (r < 0)
+ return r;
+
+ *vtnr = u;
+ return 0;
+}
+
+_public_ int sd_session_get_service(const char *session, char **service) {
+ return session_get_string(session, "SERVICE", service);
+}
+
+_public_ int sd_session_get_type(const char *session, char **type) {
+ return session_get_string(session, "TYPE", type);
+}
+
+_public_ int sd_session_get_class(const char *session, char **class) {
+ return session_get_string(session, "CLASS", class);
+}
+
+_public_ int sd_session_get_desktop(const char *session, char **desktop) {
+ _cleanup_free_ char *escaped = NULL;
+ char *t;
+ int r;
+
+ assert_return(desktop, -EINVAL);
+
+ r = session_get_string(session, "DESKTOP", &escaped);
+ if (r < 0)
+ return r;
+
+ r = cunescape(escaped, 0, &t);
+ if (r < 0)
+ return r;
+
+ *desktop = t;
+ return 0;
+}
+
+_public_ int sd_session_get_display(const char *session, char **display) {
+ return session_get_string(session, "DISPLAY", display);
+}
+
+_public_ int sd_session_get_remote_user(const char *session, char **remote_user) {
+ return session_get_string(session, "REMOTE_USER", remote_user);
+}
+
+_public_ int sd_session_get_remote_host(const char *session, char **remote_host) {
+ return session_get_string(session, "REMOTE_HOST", remote_host);
+}
+
+_public_ int sd_seat_get_active(const char *seat, char **session, uid_t *uid) {
+ _cleanup_free_ char *p = NULL, *s = NULL, *t = NULL;
+ int r;
+
+ assert_return(session || uid, -EINVAL);
+
+ r = file_of_seat(seat, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p,
+ "ACTIVE", &s,
+ "ACTIVE_UID", &t);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+
+ if (session && !s)
+ return -ENODATA;
+
+ if (uid && !t)
+ return -ENODATA;
+
+ if (uid && t) {
+ r = parse_uid(t, uid);
+ if (r < 0)
+ return r;
+ }
+
+ if (session && s)
+ *session = TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_seat_get_sessions(const char *seat, char ***sessions, uid_t **uids, unsigned *n_uids) {
+ _cleanup_free_ char *p = NULL, *s = NULL, *t = NULL;
+ _cleanup_strv_free_ char **a = NULL;
+ _cleanup_free_ uid_t *b = NULL;
+ unsigned n = 0;
+ int r;
+
+ r = file_of_seat(seat, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p,
+ "SESSIONS", &s,
+ "UIDS", &t);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+
+ if (s) {
+ a = strv_split(s, " ");
+ if (!a)
+ return -ENOMEM;
+ }
+
+ if (uids && t) {
+ const char *word, *state;
+ size_t l;
+
+ FOREACH_WORD(word, l, t, state)
+ n++;
+
+ if (n > 0) {
+ unsigned i = 0;
+
+ b = new(uid_t, n);
+ if (!b)
+ return -ENOMEM;
+
+ FOREACH_WORD(word, l, t, state) {
+ _cleanup_free_ char *k = NULL;
+
+ k = strndup(word, l);
+ if (!k)
+ return -ENOMEM;
+
+ r = parse_uid(k, b + i);
+ if (r < 0)
+ return r;
+
+ i++;
+ }
+ }
+ }
+
+ r = (int) strv_length(a);
+
+ if (sessions)
+ *sessions = TAKE_PTR(a);
+
+ if (uids)
+ *uids = TAKE_PTR(b);
+
+ if (n_uids)
+ *n_uids = n;
+
+ return r;
+}
+
+static int seat_get_can(const char *seat, const char *variable) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert(variable);
+
+ r = file_of_seat(seat, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p,
+ variable, &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ return parse_boolean(s);
+}
+
+_public_ int sd_seat_can_multi_session(const char *seat) {
+ return seat_get_can(seat, "CAN_MULTI_SESSION");
+}
+
+_public_ int sd_seat_can_tty(const char *seat) {
+ return seat_get_can(seat, "CAN_TTY");
+}
+
+_public_ int sd_seat_can_graphical(const char *seat) {
+ return seat_get_can(seat, "CAN_GRAPHICAL");
+}
+
+_public_ int sd_get_seats(char ***seats) {
+ int r;
+
+ r = get_files_in_directory("/run/systemd/seats/", seats);
+ if (r == -ENOENT) {
+ if (seats)
+ *seats = NULL;
+ return 0;
+ }
+ return r;
+}
+
+_public_ int sd_get_sessions(char ***sessions) {
+ int r;
+
+ r = get_files_in_directory("/run/systemd/sessions/", sessions);
+ if (r == -ENOENT) {
+ if (sessions)
+ *sessions = NULL;
+ return 0;
+ }
+ return r;
+}
+
+_public_ int sd_get_uids(uid_t **users) {
+ _cleanup_closedir_ DIR *d;
+ struct dirent *de;
+ int r = 0;
+ unsigned n = 0;
+ _cleanup_free_ uid_t *l = NULL;
+
+ d = opendir("/run/systemd/users/");
+ if (!d) {
+ if (errno == ENOENT) {
+ if (users)
+ *users = NULL;
+ return 0;
+ }
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ int k;
+ uid_t uid;
+
+ dirent_ensure_type(d, de);
+
+ if (!dirent_is_file(de))
+ continue;
+
+ k = parse_uid(de->d_name, &uid);
+ if (k < 0)
+ continue;
+
+ if (users) {
+ if ((unsigned) r >= n) {
+ uid_t *t;
+
+ n = MAX(16, 2*r);
+ t = realloc(l, sizeof(uid_t) * n);
+ if (!t)
+ return -ENOMEM;
+
+ l = t;
+ }
+
+ assert((unsigned) r < n);
+ l[r++] = uid;
+ } else
+ r++;
+ }
+
+ if (users)
+ *users = TAKE_PTR(l);
+
+ return r;
+}
+
+_public_ int sd_get_machine_names(char ***machines) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **a, **b;
+ int r;
+
+ r = get_files_in_directory("/run/systemd/machines/", &l);
+ if (r == -ENOENT) {
+ if (machines)
+ *machines = NULL;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ if (l) {
+ r = 0;
+
+ /* Filter out the unit: symlinks */
+ for (a = b = l; *a; a++) {
+ if (startswith(*a, "unit:") || !machine_name_is_valid(*a))
+ free(*a);
+ else {
+ *b = *a;
+ b++;
+ r++;
+ }
+ }
+
+ *b = NULL;
+ }
+
+ if (machines)
+ *machines = TAKE_PTR(l);
+
+ return r;
+}
+
+_public_ int sd_machine_get_class(const char *machine, char **class) {
+ _cleanup_free_ char *c = NULL;
+ const char *p;
+ int r;
+
+ assert_return(class, -EINVAL);
+
+ if (streq(machine, ".host")) {
+ c = strdup("host");
+ if (!c)
+ return -ENOMEM;
+ } else {
+ if (!machine_name_is_valid(machine))
+ return -EINVAL;
+
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p, "CLASS", &c);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (!c)
+ return -EIO;
+ }
+
+ *class = TAKE_PTR(c);
+ return 0;
+}
+
+_public_ int sd_machine_get_ifindices(const char *machine, int **ifindices) {
+ _cleanup_free_ char *netif = NULL;
+ size_t l, allocated = 0, nr = 0;
+ int *ni = NULL;
+ const char *p, *word, *state;
+ int r;
+
+ assert_return(machine_name_is_valid(machine), -EINVAL);
+ assert_return(ifindices, -EINVAL);
+
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p, "NETIF", &netif);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (!netif) {
+ *ifindices = NULL;
+ return 0;
+ }
+
+ FOREACH_WORD(word, l, netif, state) {
+ char buf[l+1];
+ int ifi;
+
+ *(char*) (mempcpy(buf, word, l)) = 0;
+
+ if (parse_ifindex(buf, &ifi) < 0)
+ continue;
+
+ if (!GREEDY_REALLOC(ni, allocated, nr+1)) {
+ free(ni);
+ return -ENOMEM;
+ }
+
+ ni[nr++] = ifi;
+ }
+
+ *ifindices = ni;
+ return nr;
+}
+
+static int MONITOR_TO_FD(sd_login_monitor *m) {
+ return (int) (unsigned long) m - 1;
+}
+
+static sd_login_monitor* FD_TO_MONITOR(int fd) {
+ return (sd_login_monitor*) (unsigned long) (fd + 1);
+}
+
+_public_ int sd_login_monitor_new(const char *category, sd_login_monitor **m) {
+ _cleanup_close_ int fd = -1;
+ bool good = false;
+ int k;
+
+ assert_return(m, -EINVAL);
+
+ fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (!category || streq(category, "seat")) {
+ k = inotify_add_watch(fd, "/run/systemd/seats/", IN_MOVED_TO|IN_DELETE);
+ if (k < 0)
+ return -errno;
+
+ good = true;
+ }
+
+ if (!category || streq(category, "session")) {
+ k = inotify_add_watch(fd, "/run/systemd/sessions/", IN_MOVED_TO|IN_DELETE);
+ if (k < 0)
+ return -errno;
+
+ good = true;
+ }
+
+ if (!category || streq(category, "uid")) {
+ k = inotify_add_watch(fd, "/run/systemd/users/", IN_MOVED_TO|IN_DELETE);
+ if (k < 0)
+ return -errno;
+
+ good = true;
+ }
+
+ if (!category || streq(category, "machine")) {
+ k = inotify_add_watch(fd, "/run/systemd/machines/", IN_MOVED_TO|IN_DELETE);
+ if (k < 0)
+ return -errno;
+
+ good = true;
+ }
+
+ if (!good)
+ return -EINVAL;
+
+ *m = FD_TO_MONITOR(fd);
+ fd = -1;
+
+ return 0;
+}
+
+_public_ sd_login_monitor* sd_login_monitor_unref(sd_login_monitor *m) {
+ int fd;
+
+ if (!m)
+ return NULL;
+
+ fd = MONITOR_TO_FD(m);
+ close_nointr(fd);
+
+ return NULL;
+}
+
+_public_ int sd_login_monitor_flush(sd_login_monitor *m) {
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = flush_fd(MONITOR_TO_FD(m));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+_public_ int sd_login_monitor_get_fd(sd_login_monitor *m) {
+
+ assert_return(m, -EINVAL);
+
+ return MONITOR_TO_FD(m);
+}
+
+_public_ int sd_login_monitor_get_events(sd_login_monitor *m) {
+
+ assert_return(m, -EINVAL);
+
+ /* For now we will only return POLLIN here, since we don't
+ * need anything else ever for inotify. However, let's have
+ * this API to keep our options open should we later on need
+ * it. */
+ return POLLIN;
+}
+
+_public_ int sd_login_monitor_get_timeout(sd_login_monitor *m, uint64_t *timeout_usec) {
+
+ assert_return(m, -EINVAL);
+ assert_return(timeout_usec, -EINVAL);
+
+ /* For now we will only return (uint64_t) -1, since we don't
+ * need any timeout. However, let's have this API to keep our
+ * options open should we later on need it. */
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+}
diff --git a/src/libsystemd/sd-login/test-login.c b/src/libsystemd/sd-login/test-login.c
new file mode 100644
index 0000000..1789e54
--- /dev/null
+++ b/src/libsystemd/sd-login/test-login.c
@@ -0,0 +1,287 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <poll.h>
+#include <string.h>
+
+#include "sd-login.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static char* format_uids(char **buf, uid_t* uids, int count) {
+ int pos = 0, k, inc;
+ size_t size = (DECIMAL_STR_MAX(uid_t) + 1) * count + 1;
+
+ assert_se(*buf = malloc(size));
+
+ for (k = 0; k < count; k++) {
+ sprintf(*buf + pos, "%s"UID_FMT"%n", k > 0 ? " " : "", uids[k], &inc);
+ pos += inc;
+ }
+
+ assert_se(pos < (ssize_t)size);
+ (*buf)[pos] = '\0';
+
+ return *buf;
+}
+
+static void test_login(void) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ _cleanup_free_ char *pp = NULL, *qq = NULL,
+ *display_session = NULL, *cgroup = NULL,
+ *display = NULL, *remote_user = NULL, *remote_host = NULL,
+ *type = NULL, *class = NULL, *state = NULL, *state2 = NULL,
+ *seat = NULL, *session = NULL,
+ *unit = NULL, *user_unit = NULL, *slice = NULL;
+ int r;
+ uid_t u, u2;
+ char *t, **seats, **sessions;
+
+ r = sd_pid_get_unit(0, &unit);
+ assert_se(r >= 0 || r == -ENODATA);
+ log_info("sd_pid_get_unit(0, …) → \"%s\"", strna(unit));
+
+ r = sd_pid_get_user_unit(0, &user_unit);
+ assert_se(r >= 0 || r == -ENODATA);
+ log_info("sd_pid_get_user_unit(0, …) → \"%s\"", strna(user_unit));
+
+ r = sd_pid_get_slice(0, &slice);
+ assert_se(r >= 0 || r == -ENODATA);
+ log_info("sd_pid_get_slice(0, …) → \"%s\"", strna(slice));
+
+ r = sd_pid_get_session(0, &session);
+ if (r < 0) {
+ log_warning_errno(r, "sd_pid_get_session(0, …): %m");
+ if (r == -ENODATA)
+ log_info("Seems we are not running in a session, skipping some tests.");
+ } else {
+ log_info("sd_pid_get_session(0, …) → \"%s\"", session);
+
+ assert_se(sd_pid_get_owner_uid(0, &u2) == 0);
+ log_info("sd_pid_get_owner_uid(0, …) → "UID_FMT, u2);
+
+ assert_se(sd_pid_get_cgroup(0, &cgroup) == 0);
+ log_info("sd_pid_get_cgroup(0, …) → \"%s\"", cgroup);
+
+ r = sd_uid_get_display(u2, &display_session);
+ assert_se(r >= 0 || r == -ENODATA);
+ log_info("sd_uid_get_display("UID_FMT", …) → \"%s\"",
+ u2, strnull(display_session));
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == 0);
+ sd_peer_get_session(pair[0], &pp);
+ sd_peer_get_session(pair[1], &qq);
+ assert_se(streq_ptr(pp, qq));
+
+ r = sd_uid_get_sessions(u2, false, &sessions);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(sessions));
+ assert_se(t = strv_join(sessions, " "));
+ strv_free(sessions);
+ log_info("sd_uid_get_sessions("UID_FMT", …) → [%i] \"%s\"", u2, r, t);
+ free(t);
+
+ assert_se(r == sd_uid_get_sessions(u2, false, NULL));
+
+ r = sd_uid_get_seats(u2, false, &seats);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(seats));
+ assert_se(t = strv_join(seats, " "));
+ strv_free(seats);
+ log_info("sd_uid_get_seats("UID_FMT", …) → [%i] \"%s\"", u2, r, t);
+ free(t);
+
+ assert_se(r == sd_uid_get_seats(u2, false, NULL));
+ }
+
+ if (session) {
+ r = sd_session_is_active(session);
+ assert_se(r >= 0);
+ log_info("sd_session_is_active(\"%s\") → %s", session, yes_no(r));
+
+ r = sd_session_is_remote(session);
+ assert_se(r >= 0);
+ log_info("sd_session_is_remote(\"%s\") → %s", session, yes_no(r));
+
+ r = sd_session_get_state(session, &state);
+ assert_se(r >= 0);
+ log_info("sd_session_get_state(\"%s\") → \"%s\"", session, state);
+
+ assert_se(sd_session_get_uid(session, &u) >= 0);
+ log_info("sd_session_get_uid(\"%s\") → "UID_FMT, session, u);
+ assert_se(u == u2);
+
+ assert_se(sd_session_get_type(session, &type) >= 0);
+ log_info("sd_session_get_type(\"%s\") → \"%s\"", session, type);
+
+ assert_se(sd_session_get_class(session, &class) >= 0);
+ log_info("sd_session_get_class(\"%s\") → \"%s\"", session, class);
+
+ r = sd_session_get_display(session, &display);
+ assert_se(r >= 0 || r == -ENODATA);
+ log_info("sd_session_get_display(\"%s\") → \"%s\"", session, strna(display));
+
+ r = sd_session_get_remote_user(session, &remote_user);
+ assert_se(r >= 0 || r == -ENODATA);
+ log_info("sd_session_get_remote_user(\"%s\") → \"%s\"",
+ session, strna(remote_user));
+
+ r = sd_session_get_remote_host(session, &remote_host);
+ assert_se(r >= 0 || r == -ENODATA);
+ log_info("sd_session_get_remote_host(\"%s\") → \"%s\"",
+ session, strna(remote_host));
+
+ r = sd_session_get_seat(session, &seat);
+ if (r >= 0) {
+ assert_se(seat);
+
+ log_info("sd_session_get_seat(\"%s\") → \"%s\"", session, seat);
+
+ r = sd_seat_can_multi_session(seat);
+ assert_se(r >= 0);
+ log_info("sd_session_can_multi_seat(\"%s\") → %s", seat, yes_no(r));
+
+ r = sd_seat_can_tty(seat);
+ assert_se(r >= 0);
+ log_info("sd_session_can_tty(\"%s\") → %s", seat, yes_no(r));
+
+ r = sd_seat_can_graphical(seat);
+ assert_se(r >= 0);
+ log_info("sd_session_can_graphical(\"%s\") → %s", seat, yes_no(r));
+ } else {
+ log_info_errno(r, "sd_session_get_seat(\"%s\"): %m", session);
+ assert_se(r == -ENODATA);
+ }
+
+ assert_se(sd_uid_get_state(u, &state2) >= 0);
+ log_info("sd_uid_get_state("UID_FMT", …) → %s", u, state2);
+ }
+
+ if (seat) {
+ _cleanup_free_ char *session2 = NULL, *buf = NULL;
+ _cleanup_free_ uid_t *uids = NULL;
+ unsigned n;
+
+ assert_se(sd_uid_is_on_seat(u, 0, seat) > 0);
+
+ r = sd_seat_get_active(seat, &session2, &u2);
+ assert_se(r >= 0);
+ log_info("sd_seat_get_active(\"%s\", …) → \"%s\", "UID_FMT, seat, session2, u2);
+
+ r = sd_uid_is_on_seat(u, 1, seat);
+ assert_se(r >= 0);
+ assert_se(!!r == streq(session, session2));
+
+ r = sd_seat_get_sessions(seat, &sessions, &uids, &n);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(sessions));
+ assert_se(t = strv_join(sessions, " "));
+ strv_free(sessions);
+ log_info("sd_seat_get_sessions(\"%s\", …) → %i, \"%s\", [%i] {%s}",
+ seat, r, t, n, format_uids(&buf, uids, n));
+ free(t);
+
+ assert_se(sd_seat_get_sessions(seat, NULL, NULL, NULL) == r);
+ }
+
+ r = sd_get_seats(&seats);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(seats));
+ assert_se(t = strv_join(seats, ", "));
+ strv_free(seats);
+ log_info("sd_get_seats(…) → [%i] \"%s\"", r, t);
+ t = mfree(t);
+
+ assert_se(sd_get_seats(NULL) == r);
+
+ r = sd_seat_get_active(NULL, &t, NULL);
+ assert_se(IN_SET(r, 0, -ENODATA));
+ log_info("sd_seat_get_active(NULL, …) (active session on current seat) → %s", strnull(t));
+ free(t);
+
+ r = sd_get_sessions(&sessions);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(sessions));
+ assert_se(t = strv_join(sessions, ", "));
+ strv_free(sessions);
+ log_info("sd_get_sessions(…) → [%i] \"%s\"", r, t);
+ free(t);
+
+ assert_se(sd_get_sessions(NULL) == r);
+
+ {
+ _cleanup_free_ uid_t *uids = NULL;
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_get_uids(&uids);
+ assert_se(r >= 0);
+ log_info("sd_get_uids(…) → [%i] {%s}", r, format_uids(&buf, uids, r));
+
+ assert_se(sd_get_uids(NULL) == r);
+ }
+
+ {
+ _cleanup_strv_free_ char **machines = NULL;
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_get_machine_names(&machines);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(machines));
+ assert_se(buf = strv_join(machines, " "));
+ log_info("sd_get_machines(…) → [%i] \"%s\"", r, buf);
+
+ assert_se(sd_get_machine_names(NULL) == r);
+ }
+}
+
+static void test_monitor(void) {
+ sd_login_monitor *m = NULL;
+ unsigned n;
+ int r;
+
+ r = sd_login_monitor_new("session", &m);
+ assert_se(r >= 0);
+
+ for (n = 0; n < 5; n++) {
+ struct pollfd pollfd = {};
+ usec_t timeout, nw;
+
+ assert_se((pollfd.fd = sd_login_monitor_get_fd(m)) >= 0);
+ assert_se((pollfd.events = sd_login_monitor_get_events(m)) >= 0);
+
+ assert_se(sd_login_monitor_get_timeout(m, &timeout) >= 0);
+
+ nw = now(CLOCK_MONOTONIC);
+
+ r = poll(&pollfd, 1,
+ timeout == (uint64_t) -1 ? -1 :
+ timeout > nw ? (int) ((timeout - nw) / 1000) :
+ 0);
+
+ assert_se(r >= 0);
+
+ sd_login_monitor_flush(m);
+ printf("Wake!\n");
+ }
+
+ sd_login_monitor_unref(m);
+}
+
+int main(int argc, char* argv[]) {
+ log_parse_environment();
+ log_open();
+
+ log_info("/* Information printed is from the live system */");
+
+ test_login();
+
+ if (streq_ptr(argv[1], "-m"))
+ test_monitor();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-netlink/generic-netlink.c b/src/libsystemd/sd-netlink/generic-netlink.c
new file mode 100644
index 0000000..3445757
--- /dev/null
+++ b/src/libsystemd/sd-netlink/generic-netlink.c
@@ -0,0 +1,97 @@
+#include <linux/genetlink.h>
+
+#include "sd-netlink.h"
+#include "netlink-internal.h"
+#include "alloc-util.h"
+
+typedef struct {
+ const char* name;
+ uint8_t version;
+} genl_family;
+
+static const genl_family genl_families[] = {
+ [SD_GENL_ID_CTRL] = { .name = "", .version = 1 },
+ [SD_GENL_WIREGUARD] = { .name = "wireguard", .version = 1 },
+ [SD_GENL_FOU] = { .name = "fou", .version = 1 },
+};
+
+int sd_genl_socket_open(sd_netlink **ret) {
+ return netlink_open_family(ret, NETLINK_GENERIC);
+}
+static int lookup_id(sd_netlink *nl, sd_genl_family family, uint16_t *id);
+
+static int genl_message_new(sd_netlink *nl, sd_genl_family family, uint16_t nlmsg_type, uint8_t cmd, sd_netlink_message **ret) {
+ int r;
+ struct genlmsghdr *genl;
+ const NLType *genl_cmd_type, *nl_type;
+ const NLTypeSystem *type_system;
+ size_t size;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+
+ assert_return(nl->protocol == NETLINK_GENERIC, -EINVAL);
+
+ r = type_system_get_type(&genl_family_type_system_root, &genl_cmd_type, family);
+ if (r < 0)
+ return r;
+
+ r = message_new_empty(nl, &m);
+ if (r < 0)
+ return r;
+
+ size = NLMSG_SPACE(sizeof(struct genlmsghdr));
+ m->hdr = malloc0(size);
+ if (!m->hdr)
+ return -ENOMEM;
+
+ m->hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+
+ type_get_type_system(genl_cmd_type, &type_system);
+
+ r = type_system_get_type(type_system, &nl_type, cmd);
+ if (r < 0)
+ return r;
+
+ m->hdr->nlmsg_len = size;
+ m->hdr->nlmsg_type = nlmsg_type;
+
+ type_get_type_system(nl_type, &m->containers[0].type_system);
+ genl = NLMSG_DATA(m->hdr);
+ genl->cmd = cmd;
+ genl->version = genl_families[family].version;
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+int sd_genl_message_new(sd_netlink *nl, sd_genl_family family, uint8_t cmd, sd_netlink_message **ret) {
+ int r;
+ uint16_t id = GENL_ID_CTRL;
+
+ if (family != SD_GENL_ID_CTRL) {
+ r = lookup_id(nl, family, &id);
+ if (r < 0)
+ return r;
+ }
+
+ return genl_message_new(nl, family, id, cmd, ret);
+}
+
+static int lookup_id(sd_netlink *nl, sd_genl_family family, uint16_t *id) {
+ int r;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+
+ r = sd_genl_message_new(nl, SD_GENL_ID_CTRL, CTRL_CMD_GETFAMILY, &req);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(req, CTRL_ATTR_FAMILY_NAME, genl_families[family].name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(nl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ return sd_netlink_message_read_u16(reply, CTRL_ATTR_FAMILY_ID, id);
+}
diff --git a/src/libsystemd/sd-netlink/local-addresses.c b/src/libsystemd/sd-netlink/local-addresses.c
new file mode 100644
index 0000000..5c37279
--- /dev/null
+++ b/src/libsystemd/sd-netlink/local-addresses.c
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "local-addresses.h"
+#include "macro.h"
+#include "netlink-util.h"
+
+static int address_compare(const struct local_address *a, const struct local_address *b) {
+ int r;
+
+ /* Order lowest scope first, IPv4 before IPv6, lowest interface index first */
+
+ if (a->family == AF_INET && b->family == AF_INET6)
+ return -1;
+ if (a->family == AF_INET6 && b->family == AF_INET)
+ return 1;
+
+ r = CMP(a->scope, b->scope);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->metric, b->metric);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->ifindex, b->ifindex);
+ if (r != 0)
+ return r;
+
+ return memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
+}
+
+int local_addresses(sd_netlink *context, int ifindex, int af, struct local_address **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_free_ struct local_address *list = NULL;
+ size_t n_list = 0, n_allocated = 0;
+ sd_netlink_message *m;
+ int r;
+
+ assert(ret);
+
+ if (context)
+ rtnl = sd_netlink_ref(context);
+ else {
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_addr(rtnl, &req, RTM_GETADDR, 0, af);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (m = reply; m; m = sd_netlink_message_next(m)) {
+ struct local_address *a;
+ unsigned char flags;
+ uint16_t type;
+ int ifi, family;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+ if (type != RTM_NEWADDR)
+ continue;
+
+ r = sd_rtnl_message_addr_get_ifindex(m, &ifi);
+ if (r < 0)
+ return r;
+ if (ifindex > 0 && ifi != ifindex)
+ continue;
+
+ r = sd_rtnl_message_addr_get_family(m, &family);
+ if (r < 0)
+ return r;
+ if (af != AF_UNSPEC && af != family)
+ continue;
+
+ r = sd_rtnl_message_addr_get_flags(m, &flags);
+ if (r < 0)
+ return r;
+ if (flags & IFA_F_DEPRECATED)
+ continue;
+
+ if (!GREEDY_REALLOC0(list, n_allocated, n_list+1))
+ return -ENOMEM;
+
+ a = list + n_list;
+
+ r = sd_rtnl_message_addr_get_scope(m, &a->scope);
+ if (r < 0)
+ return r;
+
+ if (ifindex == 0 && IN_SET(a->scope, RT_SCOPE_HOST, RT_SCOPE_NOWHERE))
+ continue;
+
+ switch (family) {
+
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(m, IFA_LOCAL, &a->address.in);
+ if (r < 0) {
+ r = sd_netlink_message_read_in_addr(m, IFA_ADDRESS, &a->address.in);
+ if (r < 0)
+ continue;
+ }
+ break;
+
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(m, IFA_LOCAL, &a->address.in6);
+ if (r < 0) {
+ r = sd_netlink_message_read_in6_addr(m, IFA_ADDRESS, &a->address.in6);
+ if (r < 0)
+ continue;
+ }
+ break;
+
+ default:
+ continue;
+ }
+
+ a->ifindex = ifi;
+ a->family = family;
+
+ n_list++;
+ };
+
+ typesafe_qsort(list, n_list, address_compare);
+
+ *ret = TAKE_PTR(list);
+
+ return (int) n_list;
+}
+
+int local_gateways(sd_netlink *context, int ifindex, int af, struct local_address **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_free_ struct local_address *list = NULL;
+ sd_netlink_message *m = NULL;
+ size_t n_list = 0, n_allocated = 0;
+ int r;
+
+ assert(ret);
+
+ if (context)
+ rtnl = sd_netlink_ref(context);
+ else {
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_route(rtnl, &req, RTM_GETROUTE, af, RTPROT_UNSPEC);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (m = reply; m; m = sd_netlink_message_next(m)) {
+ struct local_address *a;
+ uint16_t type;
+ unsigned char dst_len, src_len;
+ uint32_t ifi;
+ int family;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+ if (type != RTM_NEWROUTE)
+ continue;
+
+ /* We only care for default routes */
+ r = sd_rtnl_message_route_get_dst_prefixlen(m, &dst_len);
+ if (r < 0)
+ return r;
+ if (dst_len != 0)
+ continue;
+
+ r = sd_rtnl_message_route_get_src_prefixlen(m, &src_len);
+ if (r < 0)
+ return r;
+ if (src_len != 0)
+ continue;
+
+ r = sd_netlink_message_read_u32(m, RTA_OIF, &ifi);
+ if (r == -ENODATA) /* Not all routes have an RTA_OIF attribute (for example nexthop ones) */
+ continue;
+ if (r < 0)
+ return r;
+ if (ifindex > 0 && (int) ifi != ifindex)
+ continue;
+
+ r = sd_rtnl_message_route_get_family(m, &family);
+ if (r < 0)
+ return r;
+ if (af != AF_UNSPEC && af != family)
+ continue;
+
+ if (!GREEDY_REALLOC0(list, n_allocated, n_list + 1))
+ return -ENOMEM;
+
+ a = list + n_list;
+
+ switch (family) {
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(m, RTA_GATEWAY, &a->address.in);
+ if (r < 0)
+ continue;
+
+ break;
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(m, RTA_GATEWAY, &a->address.in6);
+ if (r < 0)
+ continue;
+
+ break;
+ default:
+ continue;
+ }
+
+ sd_netlink_message_read_u32(m, RTA_PRIORITY, &a->metric);
+
+ a->ifindex = ifi;
+ a->family = family;
+
+ n_list++;
+ }
+
+ typesafe_qsort(list, n_list, address_compare);
+
+ *ret = TAKE_PTR(list);
+
+ return (int) n_list;
+}
diff --git a/src/libsystemd/sd-netlink/local-addresses.h b/src/libsystemd/sd-netlink/local-addresses.h
new file mode 100644
index 0000000..e88c5e5
--- /dev/null
+++ b/src/libsystemd/sd-netlink/local-addresses.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "in-addr-util.h"
+
+struct local_address {
+ int family, ifindex;
+ unsigned char scope;
+ uint32_t metric;
+ union in_addr_union address;
+};
+
+int local_addresses(sd_netlink *rtnl, int ifindex, int af, struct local_address **ret);
+
+int local_gateways(sd_netlink *rtnl, int ifindex, int af, struct local_address **ret);
diff --git a/src/libsystemd/sd-netlink/netlink-internal.h b/src/libsystemd/sd-netlink/netlink-internal.h
new file mode 100644
index 0000000..98de4f0
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-internal.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/netlink.h>
+
+#include "sd-netlink.h"
+
+#include "list.h"
+#include "netlink-types.h"
+#include "prioq.h"
+#include "refcnt.h"
+
+#define RTNL_DEFAULT_TIMEOUT ((usec_t) (25 * USEC_PER_SEC))
+
+#define RTNL_WQUEUE_MAX 1024
+#define RTNL_RQUEUE_MAX 64*1024
+
+#define RTNL_CONTAINER_DEPTH 32
+
+struct reply_callback {
+ sd_netlink_message_handler_t callback;
+ usec_t timeout;
+ uint64_t serial;
+ unsigned prioq_idx;
+};
+
+struct match_callback {
+ sd_netlink_message_handler_t callback;
+ uint16_t type;
+
+ LIST_FIELDS(struct match_callback, match_callbacks);
+};
+
+typedef enum NetlinkSlotType {
+ NETLINK_REPLY_CALLBACK,
+ NETLINK_MATCH_CALLBACK,
+ _NETLINK_SLOT_INVALID = -1,
+} NetlinkSlotType;
+
+struct sd_netlink_slot {
+ unsigned n_ref;
+ sd_netlink *netlink;
+ void *userdata;
+ sd_netlink_destroy_t destroy_callback;
+ NetlinkSlotType type:2;
+
+ bool floating:1;
+ char *description;
+
+ LIST_FIELDS(sd_netlink_slot, slots);
+
+ union {
+ struct reply_callback reply_callback;
+ struct match_callback match_callback;
+ };
+};
+
+struct sd_netlink {
+ RefCount n_ref;
+
+ int fd;
+
+ union {
+ struct sockaddr sa;
+ struct sockaddr_nl nl;
+ } sockaddr;
+
+ int protocol;
+
+ Hashmap *broadcast_group_refs;
+ bool broadcast_group_dont_leave:1; /* until we can rely on 4.2 */
+
+ sd_netlink_message **rqueue;
+ unsigned rqueue_size;
+ size_t rqueue_allocated;
+
+ sd_netlink_message **rqueue_partial;
+ unsigned rqueue_partial_size;
+ size_t rqueue_partial_allocated;
+
+ struct nlmsghdr *rbuffer;
+ size_t rbuffer_allocated;
+
+ bool processing:1;
+
+ uint32_t serial;
+
+ struct Prioq *reply_callbacks_prioq;
+ Hashmap *reply_callbacks;
+
+ LIST_HEAD(struct match_callback, match_callbacks);
+
+ LIST_HEAD(sd_netlink_slot, slots);
+
+ pid_t original_pid;
+
+ sd_event_source *io_event_source;
+ sd_event_source *time_event_source;
+ sd_event_source *exit_event_source;
+ sd_event *event;
+};
+
+struct netlink_attribute {
+ size_t offset; /* offset from hdr to attribute */
+ bool nested:1;
+ bool net_byteorder:1;
+};
+
+struct netlink_container {
+ const struct NLTypeSystem *type_system; /* the type system of the container */
+ size_t offset; /* offset from hdr to the start of the container */
+ struct netlink_attribute *attributes;
+ unsigned short n_attributes; /* number of attributes in container */
+};
+
+struct sd_netlink_message {
+ RefCount n_ref;
+
+ sd_netlink *rtnl;
+
+ int protocol;
+
+ struct nlmsghdr *hdr;
+ struct netlink_container containers[RTNL_CONTAINER_DEPTH];
+ unsigned n_containers; /* number of containers */
+ bool sealed:1;
+ bool broadcast:1;
+
+ sd_netlink_message *next; /* next in a chain of multi-part messages */
+};
+
+int message_new(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t type);
+int message_new_empty(sd_netlink *rtnl, sd_netlink_message **ret);
+
+int netlink_open_family(sd_netlink **ret, int family);
+
+int socket_open(int family);
+int socket_bind(sd_netlink *nl);
+int socket_broadcast_group_ref(sd_netlink *nl, unsigned group);
+int socket_broadcast_group_unref(sd_netlink *nl, unsigned group);
+int socket_write_message(sd_netlink *nl, sd_netlink_message *m);
+int socket_read_message(sd_netlink *nl);
+
+int rtnl_rqueue_make_room(sd_netlink *rtnl);
+int rtnl_rqueue_partial_make_room(sd_netlink *rtnl);
+
+/* Make sure callbacks don't destroy the rtnl connection */
+#define NETLINK_DONT_DESTROY(rtnl) \
+ _cleanup_(sd_netlink_unrefp) _unused_ sd_netlink *_dont_destroy_##rtnl = sd_netlink_ref(rtnl)
diff --git a/src/libsystemd/sd-netlink/netlink-message.c b/src/libsystemd/sd-netlink/netlink-message.c
new file mode 100644
index 0000000..5e9bc45
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-message.c
@@ -0,0 +1,1042 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "format-util.h"
+#include "missing.h"
+#include "netlink-internal.h"
+#include "netlink-types.h"
+#include "netlink-util.h"
+#include "refcnt.h"
+#include "socket-util.h"
+#include "util.h"
+
+#define GET_CONTAINER(m, i) ((i) < (m)->n_containers ? (struct rtattr*)((uint8_t*)(m)->hdr + (m)->containers[i].offset) : NULL)
+#define PUSH_CONTAINER(m, new) (m)->container_offsets[(m)->n_containers++] = (uint8_t*)(new) - (uint8_t*)(m)->hdr;
+
+#define RTA_TYPE(rta) ((rta)->rta_type & NLA_TYPE_MASK)
+#define RTA_FLAGS(rta) ((rta)->rta_type & ~NLA_TYPE_MASK)
+
+int message_new_empty(sd_netlink *rtnl, sd_netlink_message **ret) {
+ sd_netlink_message *m;
+
+ assert_return(ret, -EINVAL);
+
+ /* Note that 'rtnl' is currently unused, if we start using it internally
+ we must take care to avoid problems due to mutual references between
+ buses and their queued messages. See sd-bus.
+ */
+
+ m = new0(sd_netlink_message, 1);
+ if (!m)
+ return -ENOMEM;
+
+ m->n_ref = REFCNT_INIT;
+ m->protocol = rtnl->protocol;
+ m->sealed = false;
+
+ *ret = m;
+
+ return 0;
+}
+
+int message_new(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t type) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ const NLType *nl_type;
+ const NLTypeSystem *type_system_root;
+ size_t size;
+ int r;
+
+ assert_return(rtnl, -EINVAL);
+
+ type_system_root = type_system_get_root(rtnl->protocol);
+
+ r = type_system_get_type(type_system_root, &nl_type, type);
+ if (r < 0)
+ return r;
+
+ if (type_get_type(nl_type) != NETLINK_TYPE_NESTED)
+ return -EINVAL;
+
+ r = message_new_empty(rtnl, &m);
+ if (r < 0)
+ return r;
+
+ size = NLMSG_SPACE(type_get_size(nl_type));
+
+ assert(size >= sizeof(struct nlmsghdr));
+ m->hdr = malloc0(size);
+ if (!m->hdr)
+ return -ENOMEM;
+
+ m->hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+
+ type_get_type_system(nl_type, &m->containers[0].type_system);
+ m->hdr->nlmsg_len = size;
+ m->hdr->nlmsg_type = type;
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+int sd_netlink_message_request_dump(sd_netlink_message *m, int dump) {
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+
+ assert_return(IN_SET(m->hdr->nlmsg_type, RTM_GETLINK, RTM_GETADDR, RTM_GETROUTE, RTM_GETNEIGH, RTM_GETRULE, RTM_GETADDRLABEL), -EINVAL);
+
+ SET_FLAG(m->hdr->nlmsg_flags, NLM_F_DUMP, dump);
+
+ return 0;
+}
+
+DEFINE_ATOMIC_REF_FUNC(sd_netlink_message, sd_netlink_message);
+
+sd_netlink_message *sd_netlink_message_unref(sd_netlink_message *m) {
+ sd_netlink_message *t;
+
+ while (m && REFCNT_DEC(m->n_ref) == 0) {
+ unsigned i;
+
+ free(m->hdr);
+
+ for (i = 0; i <= m->n_containers; i++)
+ free(m->containers[i].attributes);
+
+ t = m;
+ m = m->next;
+ free(t);
+ }
+
+ return NULL;
+}
+
+int sd_netlink_message_get_type(sd_netlink_message *m, uint16_t *type) {
+ assert_return(m, -EINVAL);
+ assert_return(type, -EINVAL);
+
+ *type = m->hdr->nlmsg_type;
+
+ return 0;
+}
+
+int sd_netlink_message_set_flags(sd_netlink_message *m, uint16_t flags) {
+ assert_return(m, -EINVAL);
+ assert_return(flags, -EINVAL);
+
+ m->hdr->nlmsg_flags = flags;
+
+ return 0;
+}
+
+int sd_netlink_message_is_broadcast(sd_netlink_message *m) {
+ assert_return(m, -EINVAL);
+
+ return m->broadcast;
+}
+
+/* If successful the updated message will be correctly aligned, if
+ unsuccessful the old message is untouched. */
+static int add_rtattr(sd_netlink_message *m, unsigned short type, const void *data, size_t data_length) {
+ uint32_t rta_length;
+ size_t message_length, padding_length;
+ struct nlmsghdr *new_hdr;
+ struct rtattr *rta;
+ char *padding;
+ unsigned i;
+ int offset;
+
+ assert(m);
+ assert(m->hdr);
+ assert(!m->sealed);
+ assert(NLMSG_ALIGN(m->hdr->nlmsg_len) == m->hdr->nlmsg_len);
+ assert(!data || data_length);
+
+ /* get offset of the new attribute */
+ offset = m->hdr->nlmsg_len;
+
+ /* get the size of the new rta attribute (with padding at the end) */
+ rta_length = RTA_LENGTH(data_length);
+
+ /* get the new message size (with padding at the end) */
+ message_length = offset + RTA_ALIGN(rta_length);
+
+ /* buffer should be smaller than both one page or 8K to be accepted by the kernel */
+ if (message_length > MIN(page_size(), 8192UL))
+ return -ENOBUFS;
+
+ /* realloc to fit the new attribute */
+ new_hdr = realloc(m->hdr, message_length);
+ if (!new_hdr)
+ return -ENOMEM;
+ m->hdr = new_hdr;
+
+ /* get pointer to the attribute we are about to add */
+ rta = (struct rtattr *) ((uint8_t *) m->hdr + offset);
+
+ /* if we are inside containers, extend them */
+ for (i = 0; i < m->n_containers; i++)
+ GET_CONTAINER(m, i)->rta_len += message_length - offset;
+
+ /* fill in the attribute */
+ rta->rta_type = type;
+ rta->rta_len = rta_length;
+ if (data)
+ /* we don't deal with the case where the user lies about the type
+ * and gives us too little data (so don't do that)
+ */
+ padding = mempcpy(RTA_DATA(rta), data, data_length);
+
+ else
+ /* if no data was passed, make sure we still initialize the padding
+ note that we can have data_length > 0 (used by some containers) */
+ padding = RTA_DATA(rta);
+
+ /* make sure also the padding at the end of the message is initialized */
+ padding_length = (uint8_t*)m->hdr + message_length - (uint8_t*)padding;
+ memzero(padding, padding_length);
+
+ /* update message size */
+ m->hdr->nlmsg_len = message_length;
+
+ return offset;
+}
+
+static int message_attribute_has_type(sd_netlink_message *m, size_t *out_size, uint16_t attribute_type, uint16_t data_type) {
+ const NLType *type;
+ int r;
+
+ assert(m);
+
+ r = type_system_get_type(m->containers[m->n_containers].type_system, &type, attribute_type);
+ if (r < 0)
+ return r;
+
+ if (type_get_type(type) != data_type)
+ return -EINVAL;
+
+ if (out_size)
+ *out_size = type_get_size(type);
+ return 0;
+}
+
+int sd_netlink_message_append_string(sd_netlink_message *m, unsigned short type, const char *data) {
+ size_t length, size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ r = message_attribute_has_type(m, &size, type, NETLINK_TYPE_STRING);
+ if (r < 0)
+ return r;
+
+ if (size) {
+ length = strnlen(data, size+1);
+ if (length > size)
+ return -EINVAL;
+ } else
+ length = strlen(data);
+
+ r = add_rtattr(m, type, data, length + 1);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_flag(sd_netlink_message *m, unsigned short type) {
+ size_t size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, &size, type, NETLINK_TYPE_FLAG);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, NULL, 0);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_u8(sd_netlink_message *m, unsigned short type, uint8_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U8);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, &data, sizeof(uint8_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_u16(sd_netlink_message *m, unsigned short type, uint16_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U16);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, &data, sizeof(uint16_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_u32(sd_netlink_message *m, unsigned short type, uint32_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U32);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, &data, sizeof(uint32_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_data(sd_netlink_message *m, unsigned short type, const void *data, size_t len) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = add_rtattr(m, type, data, len);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_in_addr(sd_netlink_message *m, unsigned short type, const struct in_addr *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_IN_ADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, data, sizeof(struct in_addr));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_in6_addr(sd_netlink_message *m, unsigned short type, const struct in6_addr *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_IN_ADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, data, sizeof(struct in6_addr));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_sockaddr_in(sd_netlink_message *m, unsigned short type, const struct sockaddr_in *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_SOCKADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, data, sizeof(struct sockaddr_in));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_sockaddr_in6(sd_netlink_message *m, unsigned short type, const struct sockaddr_in6 *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_SOCKADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, data, sizeof(struct sockaddr_in6));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_ether_addr(sd_netlink_message *m, unsigned short type, const struct ether_addr *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_ETHER_ADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, data, ETH_ALEN);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_cache_info(sd_netlink_message *m, unsigned short type, const struct ifa_cacheinfo *info) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(info, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_CACHE_INFO);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, info, sizeof(struct ifa_cacheinfo));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_open_container(sd_netlink_message *m, unsigned short type) {
+ size_t size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers < RTNL_CONTAINER_DEPTH, -ERANGE);
+
+ r = message_attribute_has_type(m, &size, type, NETLINK_TYPE_NESTED);
+ if (r < 0) {
+ const NLTypeSystemUnion *type_system_union;
+ int family;
+
+ r = message_attribute_has_type(m, &size, type, NETLINK_TYPE_UNION);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_get_family(m, &family);
+ if (r < 0)
+ return r;
+
+ r = type_system_get_type_system_union(m->containers[m->n_containers].type_system, &type_system_union, type);
+ if (r < 0)
+ return r;
+
+ r = type_system_union_protocol_get_type_system(type_system_union,
+ &m->containers[m->n_containers + 1].type_system,
+ family);
+ if (r < 0)
+ return r;
+ } else {
+ r = type_system_get_type_system(m->containers[m->n_containers].type_system,
+ &m->containers[m->n_containers + 1].type_system,
+ type);
+ if (r < 0)
+ return r;
+ }
+
+ r = add_rtattr(m, type | NLA_F_NESTED, NULL, size);
+ if (r < 0)
+ return r;
+
+ m->containers[m->n_containers++].offset = r;
+
+ return 0;
+}
+
+int sd_netlink_message_open_container_union(sd_netlink_message *m, unsigned short type, const char *key) {
+ const NLTypeSystemUnion *type_system_union;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = type_system_get_type_system_union(m->containers[m->n_containers].type_system, &type_system_union, type);
+ if (r < 0)
+ return r;
+
+ r = type_system_union_get_type_system(type_system_union,
+ &m->containers[m->n_containers + 1].type_system,
+ key);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, type_system_union->match, key);
+ if (r < 0)
+ return r;
+
+ /* do we ever need non-null size */
+ r = add_rtattr(m, type | NLA_F_NESTED, NULL, 0);
+ if (r < 0)
+ return r;
+
+ m->containers[m->n_containers++].offset = r;
+
+ return 0;
+}
+
+int sd_netlink_message_close_container(sd_netlink_message *m) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers > 0, -EINVAL);
+
+ m->containers[m->n_containers].type_system = NULL;
+ m->containers[m->n_containers].offset = 0;
+ m->n_containers--;
+
+ return 0;
+}
+
+int sd_netlink_message_open_array(sd_netlink_message *m, uint16_t type) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers > 0, -EINVAL);
+
+ r = add_rtattr(m, type | NLA_F_NESTED, NULL, 0);
+ if (r < 0)
+ return r;
+
+ m->containers[m->n_containers].offset = r;
+ m->n_containers++;
+ m->containers[m->n_containers].type_system = m->containers[m->n_containers - 1].type_system;
+
+ return 0;
+}
+
+int sd_netlink_message_cancel_array(sd_netlink_message *m) {
+ unsigned i;
+ uint32_t rta_len;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers > 1, -EINVAL);
+
+ rta_len = GET_CONTAINER(m, (m->n_containers - 1))->rta_len;
+
+ for (i = 0; i < m->n_containers; i++)
+ GET_CONTAINER(m, i)->rta_len -= rta_len;
+
+ m->hdr->nlmsg_len -= rta_len;
+
+ m->n_containers--;
+ m->containers[m->n_containers].type_system = NULL;
+
+ return 0;
+}
+
+static int netlink_message_read_internal(sd_netlink_message *m, unsigned short type, void **data, bool *net_byteorder) {
+ struct netlink_attribute *attribute;
+ struct rtattr *rta;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ assert(m->n_containers < RTNL_CONTAINER_DEPTH);
+ assert(m->containers[m->n_containers].attributes);
+ assert(type < m->containers[m->n_containers].n_attributes);
+
+ attribute = &m->containers[m->n_containers].attributes[type];
+
+ if (attribute->offset == 0)
+ return -ENODATA;
+
+ rta = (struct rtattr*)((uint8_t *) m->hdr + attribute->offset);
+
+ *data = RTA_DATA(rta);
+
+ if (net_byteorder)
+ *net_byteorder = attribute->net_byteorder;
+
+ return RTA_PAYLOAD(rta);
+}
+
+int sd_netlink_message_read(sd_netlink_message *m, unsigned short type, size_t size, void *data) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if ((size_t) r < size)
+ return -EIO;
+
+ if (data)
+ memcpy(data, attr_data, size);
+
+ return 0;
+}
+
+int sd_netlink_message_read_string(sd_netlink_message *m, unsigned short type, const char **data) {
+ int r;
+ void *attr_data;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_STRING);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+ else if (strnlen(attr_data, r) >= (size_t) r)
+ return -EIO;
+
+ if (data)
+ *data = (const char *) attr_data;
+
+ return 0;
+}
+
+int sd_netlink_message_read_u8(sd_netlink_message *m, unsigned short type, uint8_t *data) {
+ int r;
+ void *attr_data;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U8);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+ else if ((size_t) r < sizeof(uint8_t))
+ return -EIO;
+
+ if (data)
+ *data = *(uint8_t *) attr_data;
+
+ return 0;
+}
+
+int sd_netlink_message_read_u16(sd_netlink_message *m, unsigned short type, uint16_t *data) {
+ void *attr_data;
+ bool net_byteorder;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U16);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, &net_byteorder);
+ if (r < 0)
+ return r;
+ else if ((size_t) r < sizeof(uint16_t))
+ return -EIO;
+
+ if (data) {
+ if (net_byteorder)
+ *data = be16toh(*(uint16_t *) attr_data);
+ else
+ *data = *(uint16_t *) attr_data;
+ }
+
+ return 0;
+}
+
+int sd_netlink_message_read_u32(sd_netlink_message *m, unsigned short type, uint32_t *data) {
+ void *attr_data;
+ bool net_byteorder;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U32);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, &net_byteorder);
+ if (r < 0)
+ return r;
+ else if ((size_t)r < sizeof(uint32_t))
+ return -EIO;
+
+ if (data) {
+ if (net_byteorder)
+ *data = be32toh(*(uint32_t *) attr_data);
+ else
+ *data = *(uint32_t *) attr_data;
+ }
+
+ return 0;
+}
+
+int sd_netlink_message_read_ether_addr(sd_netlink_message *m, unsigned short type, struct ether_addr *data) {
+ int r;
+ void *attr_data;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_ETHER_ADDR);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+ else if ((size_t)r < sizeof(struct ether_addr))
+ return -EIO;
+
+ if (data)
+ memcpy(data, attr_data, sizeof(struct ether_addr));
+
+ return 0;
+}
+
+int sd_netlink_message_read_cache_info(sd_netlink_message *m, unsigned short type, struct ifa_cacheinfo *info) {
+ int r;
+ void *attr_data;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_CACHE_INFO);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+ else if ((size_t)r < sizeof(struct ifa_cacheinfo))
+ return -EIO;
+
+ if (info)
+ memcpy(info, attr_data, sizeof(struct ifa_cacheinfo));
+
+ return 0;
+}
+
+int sd_netlink_message_read_in_addr(sd_netlink_message *m, unsigned short type, struct in_addr *data) {
+ int r;
+ void *attr_data;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_IN_ADDR);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+ else if ((size_t)r < sizeof(struct in_addr))
+ return -EIO;
+
+ if (data)
+ memcpy(data, attr_data, sizeof(struct in_addr));
+
+ return 0;
+}
+
+int sd_netlink_message_read_in6_addr(sd_netlink_message *m, unsigned short type, struct in6_addr *data) {
+ int r;
+ void *attr_data;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_IN_ADDR);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+ else if ((size_t)r < sizeof(struct in6_addr))
+ return -EIO;
+
+ if (data)
+ memcpy(data, attr_data, sizeof(struct in6_addr));
+
+ return 0;
+}
+
+static int netlink_container_parse(sd_netlink_message *m,
+ struct netlink_container *container,
+ int count,
+ struct rtattr *rta,
+ unsigned rt_len) {
+ _cleanup_free_ struct netlink_attribute *attributes = NULL;
+
+ attributes = new0(struct netlink_attribute, count);
+ if (!attributes)
+ return -ENOMEM;
+
+ for (; RTA_OK(rta, rt_len); rta = RTA_NEXT(rta, rt_len)) {
+ unsigned short type;
+
+ type = RTA_TYPE(rta);
+
+ /* if the kernel is newer than the headers we used
+ when building, we ignore out-of-range attributes */
+ if (type >= count)
+ continue;
+
+ if (attributes[type].offset != 0)
+ log_debug("rtnl: message parse - overwriting repeated attribute");
+
+ attributes[type].offset = (uint8_t *) rta - (uint8_t *) m->hdr;
+ attributes[type].nested = RTA_FLAGS(rta) & NLA_F_NESTED;
+ attributes[type].net_byteorder = RTA_FLAGS(rta) & NLA_F_NET_BYTEORDER;
+ }
+
+ container->attributes = TAKE_PTR(attributes);
+ container->n_attributes = count;
+
+ return 0;
+}
+
+int sd_netlink_message_enter_container(sd_netlink_message *m, unsigned short type_id) {
+ const NLType *nl_type;
+ const NLTypeSystem *type_system;
+ void *container;
+ uint16_t type;
+ size_t size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->n_containers < RTNL_CONTAINER_DEPTH, -EINVAL);
+
+ r = type_system_get_type(m->containers[m->n_containers].type_system,
+ &nl_type,
+ type_id);
+ if (r < 0)
+ return r;
+
+ type = type_get_type(nl_type);
+
+ if (type == NETLINK_TYPE_NESTED) {
+ r = type_system_get_type_system(m->containers[m->n_containers].type_system,
+ &type_system,
+ type_id);
+ if (r < 0)
+ return r;
+ } else if (type == NETLINK_TYPE_UNION) {
+ const NLTypeSystemUnion *type_system_union;
+
+ r = type_system_get_type_system_union(m->containers[m->n_containers].type_system,
+ &type_system_union,
+ type_id);
+ if (r < 0)
+ return r;
+
+ switch (type_system_union->match_type) {
+ case NL_MATCH_SIBLING:
+ {
+ const char *key;
+
+ r = sd_netlink_message_read_string(m, type_system_union->match, &key);
+ if (r < 0)
+ return r;
+
+ r = type_system_union_get_type_system(type_system_union,
+ &type_system,
+ key);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+ case NL_MATCH_PROTOCOL:
+ {
+ int family;
+
+ r = sd_rtnl_message_get_family(m, &family);
+ if (r < 0)
+ return r;
+
+ r = type_system_union_protocol_get_type_system(type_system_union,
+ &type_system,
+ family);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+ default:
+ assert_not_reached("sd-netlink: invalid type system union type");
+ }
+ } else
+ return -EINVAL;
+
+ r = netlink_message_read_internal(m, type_id, &container, NULL);
+ if (r < 0)
+ return r;
+ else
+ size = (size_t)r;
+
+ m->n_containers++;
+
+ r = netlink_container_parse(m,
+ &m->containers[m->n_containers],
+ type_system_get_count(type_system),
+ container,
+ size);
+ if (r < 0) {
+ m->n_containers--;
+ return r;
+ }
+
+ m->containers[m->n_containers].type_system = type_system;
+
+ return 0;
+}
+
+int sd_netlink_message_exit_container(sd_netlink_message *m) {
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EINVAL);
+ assert_return(m->n_containers > 0, -EINVAL);
+
+ m->containers[m->n_containers].attributes = mfree(m->containers[m->n_containers].attributes);
+ m->containers[m->n_containers].type_system = NULL;
+
+ m->n_containers--;
+
+ return 0;
+}
+
+uint32_t rtnl_message_get_serial(sd_netlink_message *m) {
+ assert(m);
+ assert(m->hdr);
+
+ return m->hdr->nlmsg_seq;
+}
+
+int sd_netlink_message_is_error(sd_netlink_message *m) {
+ assert_return(m, 0);
+ assert_return(m->hdr, 0);
+
+ return m->hdr->nlmsg_type == NLMSG_ERROR;
+}
+
+int sd_netlink_message_get_errno(sd_netlink_message *m) {
+ struct nlmsgerr *err;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+
+ if (!sd_netlink_message_is_error(m))
+ return 0;
+
+ err = NLMSG_DATA(m->hdr);
+
+ return err->error;
+}
+
+int sd_netlink_message_rewind(sd_netlink_message *m) {
+ const NLType *nl_type;
+ const NLTypeSystem *type_system_root;
+ uint16_t type;
+ size_t size;
+ unsigned i;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ /* don't allow appending to message once parsed */
+ if (!m->sealed)
+ rtnl_message_seal(m);
+
+ type_system_root = type_system_get_root(m->protocol);
+
+ for (i = 1; i <= m->n_containers; i++)
+ m->containers[i].attributes = mfree(m->containers[i].attributes);
+
+ m->n_containers = 0;
+
+ if (m->containers[0].attributes)
+ /* top-level attributes have already been parsed */
+ return 0;
+
+ assert(m->hdr);
+
+ r = type_system_get_type(type_system_root, &nl_type, m->hdr->nlmsg_type);
+ if (r < 0)
+ return r;
+
+ type = type_get_type(nl_type);
+ size = type_get_size(nl_type);
+
+ if (type == NETLINK_TYPE_NESTED) {
+ const NLTypeSystem *type_system;
+
+ type_get_type_system(nl_type, &type_system);
+
+ m->containers[0].type_system = type_system;
+
+ r = netlink_container_parse(m,
+ &m->containers[m->n_containers],
+ type_system_get_count(type_system),
+ (struct rtattr*)((uint8_t*)NLMSG_DATA(m->hdr) + NLMSG_ALIGN(size)),
+ NLMSG_PAYLOAD(m->hdr, size));
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+void rtnl_message_seal(sd_netlink_message *m) {
+ assert(m);
+ assert(!m->sealed);
+
+ m->sealed = true;
+}
+
+sd_netlink_message *sd_netlink_message_next(sd_netlink_message *m) {
+ assert_return(m, NULL);
+
+ return m->next;
+}
diff --git a/src/libsystemd/sd-netlink/netlink-slot.c b/src/libsystemd/sd-netlink/netlink-slot.c
new file mode 100644
index 0000000..2b8675d
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-slot.c
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "netlink-internal.h"
+#include "netlink-slot.h"
+#include "string-util.h"
+
+int netlink_slot_allocate(
+ sd_netlink *nl,
+ bool floating,
+ NetlinkSlotType type,
+ size_t extra,
+ void *userdata,
+ const char *description,
+ sd_netlink_slot **ret) {
+
+ _cleanup_free_ sd_netlink_slot *slot = NULL;
+
+ assert(nl);
+ assert(ret);
+
+ slot = malloc0(offsetof(sd_netlink_slot, reply_callback) + extra);
+ if (!slot)
+ return -ENOMEM;
+
+ slot->n_ref = 1;
+ slot->netlink = nl;
+ slot->userdata = userdata;
+ slot->type = type;
+ slot->floating = floating;
+
+ if (description) {
+ slot->description = strdup(description);
+ if (!slot->description)
+ return -ENOMEM;
+ }
+
+ if (!floating)
+ sd_netlink_ref(nl);
+
+ LIST_PREPEND(slots, nl->slots, slot);
+
+ *ret = TAKE_PTR(slot);
+
+ return 0;
+}
+
+void netlink_slot_disconnect(sd_netlink_slot *slot, bool unref) {
+ sd_netlink *nl;
+
+ assert(slot);
+
+ nl = slot->netlink;
+ if (!nl)
+ return;
+
+ switch (slot->type) {
+
+ case NETLINK_REPLY_CALLBACK:
+ (void) hashmap_remove(nl->reply_callbacks, &slot->reply_callback.serial);
+
+ if (slot->reply_callback.timeout != 0)
+ prioq_remove(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
+
+ break;
+ case NETLINK_MATCH_CALLBACK:
+ LIST_REMOVE(match_callbacks, nl->match_callbacks, &slot->match_callback);
+
+ switch (slot->match_callback.type) {
+ case RTM_NEWLINK:
+ case RTM_DELLINK:
+ (void) socket_broadcast_group_unref(nl, RTNLGRP_LINK);
+
+ break;
+ case RTM_NEWADDR:
+ case RTM_DELADDR:
+ (void) socket_broadcast_group_unref(nl, RTNLGRP_IPV4_IFADDR);
+ (void) socket_broadcast_group_unref(nl, RTNLGRP_IPV6_IFADDR);
+
+ break;
+ case RTM_NEWROUTE:
+ case RTM_DELROUTE:
+ (void) socket_broadcast_group_unref(nl, RTNLGRP_IPV4_ROUTE);
+ (void) socket_broadcast_group_unref(nl, RTNLGRP_IPV6_ROUTE);
+
+ break;
+ }
+
+ break;
+ default:
+ assert_not_reached("Wut? Unknown slot type?");
+ }
+
+ slot->type = _NETLINK_SLOT_INVALID;
+ slot->netlink = NULL;
+ LIST_REMOVE(slots, nl->slots, slot);
+
+ if (!slot->floating)
+ sd_netlink_unref(nl);
+ else if (unref)
+ sd_netlink_slot_unref(slot);
+}
+
+static sd_netlink_slot* netlink_slot_free(sd_netlink_slot *slot) {
+ assert(slot);
+
+ netlink_slot_disconnect(slot, false);
+
+ if (slot->destroy_callback)
+ slot->destroy_callback(slot->userdata);
+
+ free(slot->description);
+ return mfree(slot);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_netlink_slot, sd_netlink_slot, netlink_slot_free);
+
+sd_netlink *sd_netlink_slot_get_netlink(sd_netlink_slot *slot) {
+ assert_return(slot, NULL);
+
+ return slot->netlink;
+}
+
+void *sd_netlink_slot_get_userdata(sd_netlink_slot *slot) {
+ assert_return(slot, NULL);
+
+ return slot->userdata;
+}
+
+void *sd_netlink_slot_set_userdata(sd_netlink_slot *slot, void *userdata) {
+ void *ret;
+
+ assert_return(slot, NULL);
+
+ ret = slot->userdata;
+ slot->userdata = userdata;
+
+ return ret;
+}
+
+int sd_netlink_slot_get_destroy_callback(sd_netlink_slot *slot, sd_netlink_destroy_t *callback) {
+ assert_return(slot, -EINVAL);
+
+ if (callback)
+ *callback = slot->destroy_callback;
+
+ return !!slot->destroy_callback;
+}
+
+int sd_netlink_slot_set_destroy_callback(sd_netlink_slot *slot, sd_netlink_destroy_t callback) {
+ assert_return(slot, -EINVAL);
+
+ slot->destroy_callback = callback;
+ return 0;
+}
+
+int sd_netlink_slot_get_floating(sd_netlink_slot *slot) {
+ assert_return(slot, -EINVAL);
+
+ return slot->floating;
+}
+
+int sd_netlink_slot_set_floating(sd_netlink_slot *slot, int b) {
+ assert_return(slot, -EINVAL);
+
+ if (slot->floating == !!b)
+ return 0;
+
+ if (!slot->netlink) /* Already disconnected */
+ return -ESTALE;
+
+ slot->floating = b;
+
+ if (b) {
+ sd_netlink_slot_ref(slot);
+ sd_netlink_unref(slot->netlink);
+ } else {
+ sd_netlink_ref(slot->netlink);
+ sd_netlink_slot_unref(slot);
+ }
+
+ return 1;
+}
+
+int sd_netlink_slot_get_description(sd_netlink_slot *slot, const char **description) {
+ assert_return(slot, -EINVAL);
+
+ if (description)
+ *description = slot->description;
+
+ return !!slot->description;
+}
+
+int sd_netlink_slot_set_description(sd_netlink_slot *slot, const char *description) {
+ assert_return(slot, -EINVAL);
+
+ return free_and_strdup(&slot->description, description);
+}
diff --git a/src/libsystemd/sd-netlink/netlink-slot.h b/src/libsystemd/sd-netlink/netlink-slot.h
new file mode 100644
index 0000000..2641ec6
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-slot.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-netlink.h"
+
+int netlink_slot_allocate(
+ sd_netlink *nl,
+ bool floating,
+ NetlinkSlotType type,
+ size_t extra,
+ void *userdata,
+ const char *description,
+ sd_netlink_slot **ret);
+void netlink_slot_disconnect(sd_netlink_slot *slot, bool unref);
diff --git a/src/libsystemd/sd-netlink/netlink-socket.c b/src/libsystemd/sd-netlink/netlink-socket.c
new file mode 100644
index 0000000..432e8e8
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-socket.c
@@ -0,0 +1,458 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "missing.h"
+#include "netlink-internal.h"
+#include "netlink-types.h"
+#include "netlink-util.h"
+#include "refcnt.h"
+#include "socket-util.h"
+#include "util.h"
+
+int socket_open(int family) {
+ int fd;
+
+ fd = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, family);
+ if (fd < 0)
+ return -errno;
+
+ return fd_move_above_stdio(fd);
+}
+
+static int broadcast_groups_get(sd_netlink *nl) {
+ _cleanup_free_ uint32_t *groups = NULL;
+ socklen_t len = 0, old_len;
+ unsigned i, j;
+ int r;
+
+ assert(nl);
+ assert(nl->fd >= 0);
+
+ r = getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, NULL, &len);
+ if (r < 0) {
+ if (errno == ENOPROTOOPT) {
+ nl->broadcast_group_dont_leave = true;
+ return 0;
+ } else
+ return -errno;
+ }
+
+ if (len == 0)
+ return 0;
+
+ groups = new0(uint32_t, len);
+ if (!groups)
+ return -ENOMEM;
+
+ old_len = len;
+
+ r = getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, groups, &len);
+ if (r < 0)
+ return -errno;
+
+ if (old_len != len)
+ return -EIO;
+
+ r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL);
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < len; i++) {
+ for (j = 0; j < sizeof(uint32_t) * 8; j++) {
+ uint32_t offset;
+ unsigned group;
+
+ offset = 1U << j;
+
+ if (!(groups[i] & offset))
+ continue;
+
+ group = i * sizeof(uint32_t) * 8 + j + 1;
+
+ r = hashmap_put(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(1));
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+int socket_bind(sd_netlink *nl) {
+ socklen_t addrlen;
+ int r;
+
+ r = setsockopt_int(nl->fd, SOL_NETLINK, NETLINK_PKTINFO, true);
+ if (r < 0)
+ return r;
+
+ addrlen = sizeof(nl->sockaddr);
+
+ r = bind(nl->fd, &nl->sockaddr.sa, addrlen);
+ /* ignore EINVAL to allow opening an already bound socket */
+ if (r < 0 && errno != EINVAL)
+ return -errno;
+
+ r = getsockname(nl->fd, &nl->sockaddr.sa, &addrlen);
+ if (r < 0)
+ return -errno;
+
+ r = broadcast_groups_get(nl);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static unsigned broadcast_group_get_ref(sd_netlink *nl, unsigned group) {
+ assert(nl);
+
+ return PTR_TO_UINT(hashmap_get(nl->broadcast_group_refs, UINT_TO_PTR(group)));
+}
+
+static int broadcast_group_set_ref(sd_netlink *nl, unsigned group, unsigned n_ref) {
+ int r;
+
+ assert(nl);
+
+ r = hashmap_replace(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(n_ref));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int broadcast_group_join(sd_netlink *nl, unsigned group) {
+ int r;
+
+ assert(nl);
+ assert(nl->fd >= 0);
+ assert(group > 0);
+
+ r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &group, sizeof(group));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int socket_broadcast_group_ref(sd_netlink *nl, unsigned group) {
+ unsigned n_ref;
+ int r;
+
+ assert(nl);
+
+ n_ref = broadcast_group_get_ref(nl, group);
+
+ n_ref++;
+
+ r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL);
+ if (r < 0)
+ return r;
+
+ r = broadcast_group_set_ref(nl, group, n_ref);
+ if (r < 0)
+ return r;
+
+ if (n_ref > 1)
+ /* not yet in the group */
+ return 0;
+
+ r = broadcast_group_join(nl, group);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int broadcast_group_leave(sd_netlink *nl, unsigned group) {
+ int r;
+
+ assert(nl);
+ assert(nl->fd >= 0);
+ assert(group > 0);
+
+ if (nl->broadcast_group_dont_leave)
+ return 0;
+
+ r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, &group, sizeof(group));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int socket_broadcast_group_unref(sd_netlink *nl, unsigned group) {
+ unsigned n_ref;
+ int r;
+
+ assert(nl);
+
+ n_ref = broadcast_group_get_ref(nl, group);
+
+ assert(n_ref > 0);
+
+ n_ref--;
+
+ r = broadcast_group_set_ref(nl, group, n_ref);
+ if (r < 0)
+ return r;
+
+ if (n_ref > 0)
+ /* still refs left */
+ return 0;
+
+ r = broadcast_group_leave(nl, group);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+/* returns the number of bytes sent, or a negative error code */
+int socket_write_message(sd_netlink *nl, sd_netlink_message *m) {
+ union {
+ struct sockaddr sa;
+ struct sockaddr_nl nl;
+ } addr = {
+ .nl.nl_family = AF_NETLINK,
+ };
+ ssize_t k;
+
+ assert(nl);
+ assert(m);
+ assert(m->hdr);
+
+ k = sendto(nl->fd, m->hdr, m->hdr->nlmsg_len,
+ 0, &addr.sa, sizeof(addr));
+ if (k < 0)
+ return -errno;
+
+ return k;
+}
+
+static int socket_recv_message(int fd, struct iovec *iov, uint32_t *_group, bool peek) {
+ union sockaddr_union sender;
+ uint8_t cmsg_buffer[CMSG_SPACE(sizeof(struct nl_pktinfo))];
+ struct msghdr msg = {
+ .msg_iov = iov,
+ .msg_iovlen = 1,
+ .msg_name = &sender,
+ .msg_namelen = sizeof(sender),
+ .msg_control = cmsg_buffer,
+ .msg_controllen = sizeof(cmsg_buffer),
+ };
+ struct cmsghdr *cmsg;
+ uint32_t group = 0;
+ ssize_t n;
+
+ assert(fd >= 0);
+ assert(iov);
+
+ n = recvmsg(fd, &msg, MSG_TRUNC | (peek ? MSG_PEEK : 0));
+ if (n < 0) {
+ /* no data */
+ if (errno == ENOBUFS)
+ log_debug("rtnl: kernel receive buffer overrun");
+ else if (errno == EAGAIN)
+ log_debug("rtnl: no data in socket");
+
+ return IN_SET(errno, EAGAIN, EINTR) ? 0 : -errno;
+ }
+
+ if (sender.nl.nl_pid != 0) {
+ /* not from the kernel, ignore */
+ log_debug("rtnl: ignoring message from portid %"PRIu32, sender.nl.nl_pid);
+
+ if (peek) {
+ /* drop the message */
+ n = recvmsg(fd, &msg, 0);
+ if (n < 0)
+ return IN_SET(errno, EAGAIN, EINTR) ? 0 : -errno;
+ }
+
+ return 0;
+ }
+
+ CMSG_FOREACH(cmsg, &msg) {
+ if (cmsg->cmsg_level == SOL_NETLINK &&
+ cmsg->cmsg_type == NETLINK_PKTINFO &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct nl_pktinfo))) {
+ struct nl_pktinfo *pktinfo = (void *)CMSG_DATA(cmsg);
+
+ /* multi-cast group */
+ group = pktinfo->group;
+ }
+ }
+
+ if (_group)
+ *_group = group;
+
+ return (int) n;
+}
+
+/* On success, the number of bytes received is returned and *ret points to the received message
+ * which has a valid header and the correct size.
+ * If nothing useful was received 0 is returned.
+ * On failure, a negative error code is returned.
+ */
+int socket_read_message(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *first = NULL;
+ struct iovec iov = {};
+ uint32_t group = 0;
+ bool multi_part = false, done = false;
+ struct nlmsghdr *new_msg;
+ size_t len;
+ int r;
+ unsigned i = 0;
+ const NLTypeSystem *type_system_root;
+
+ assert(rtnl);
+ assert(rtnl->rbuffer);
+ assert(rtnl->rbuffer_allocated >= sizeof(struct nlmsghdr));
+
+ type_system_root = type_system_get_root(rtnl->protocol);
+
+ /* read nothing, just get the pending message size */
+ r = socket_recv_message(rtnl->fd, &iov, NULL, true);
+ if (r <= 0)
+ return r;
+ else
+ len = (size_t) r;
+
+ /* make room for the pending message */
+ if (!greedy_realloc((void **)&rtnl->rbuffer,
+ &rtnl->rbuffer_allocated,
+ len, sizeof(uint8_t)))
+ return -ENOMEM;
+
+ iov = IOVEC_MAKE(rtnl->rbuffer, rtnl->rbuffer_allocated);
+
+ /* read the pending message */
+ r = socket_recv_message(rtnl->fd, &iov, &group, false);
+ if (r <= 0)
+ return r;
+ else
+ len = (size_t) r;
+
+ if (len > rtnl->rbuffer_allocated)
+ /* message did not fit in read buffer */
+ return -EIO;
+
+ if (NLMSG_OK(rtnl->rbuffer, len) && rtnl->rbuffer->nlmsg_flags & NLM_F_MULTI) {
+ multi_part = true;
+
+ for (i = 0; i < rtnl->rqueue_partial_size; i++) {
+ if (rtnl_message_get_serial(rtnl->rqueue_partial[i]) ==
+ rtnl->rbuffer->nlmsg_seq) {
+ first = rtnl->rqueue_partial[i];
+ break;
+ }
+ }
+ }
+
+ for (new_msg = rtnl->rbuffer; NLMSG_OK(new_msg, len) && !done; new_msg = NLMSG_NEXT(new_msg, len)) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ const NLType *nl_type;
+
+ if (!group && new_msg->nlmsg_pid != rtnl->sockaddr.nl.nl_pid)
+ /* not broadcast and not for us */
+ continue;
+
+ if (new_msg->nlmsg_type == NLMSG_NOOP)
+ /* silently drop noop messages */
+ continue;
+
+ if (new_msg->nlmsg_type == NLMSG_DONE) {
+ /* finished reading multi-part message */
+ done = true;
+
+ /* if first is not defined, put NLMSG_DONE into the receive queue. */
+ if (first)
+ continue;
+ }
+
+ /* check that we support this message type */
+ r = type_system_get_type(type_system_root, &nl_type, new_msg->nlmsg_type);
+ if (r < 0) {
+ if (r == -EOPNOTSUPP)
+ log_debug("sd-netlink: ignored message with unknown type: %i",
+ new_msg->nlmsg_type);
+
+ continue;
+ }
+
+ /* check that the size matches the message type */
+ if (new_msg->nlmsg_len < NLMSG_LENGTH(type_get_size(nl_type))) {
+ log_debug("sd-netlink: message larger than expected, dropping");
+ continue;
+ }
+
+ r = message_new_empty(rtnl, &m);
+ if (r < 0)
+ return r;
+
+ m->broadcast = !!group;
+
+ m->hdr = memdup(new_msg, new_msg->nlmsg_len);
+ if (!m->hdr)
+ return -ENOMEM;
+
+ /* seal and parse the top-level message */
+ r = sd_netlink_message_rewind(m);
+ if (r < 0)
+ return r;
+
+ /* push the message onto the multi-part message stack */
+ if (first)
+ m->next = first;
+ first = TAKE_PTR(m);
+ }
+
+ if (len > 0)
+ log_debug("sd-netlink: discarding %zu bytes of incoming message", len);
+
+ if (!first)
+ return 0;
+
+ if (!multi_part || done) {
+ /* we got a complete message, push it on the read queue */
+ r = rtnl_rqueue_make_room(rtnl);
+ if (r < 0)
+ return r;
+
+ rtnl->rqueue[rtnl->rqueue_size++] = TAKE_PTR(first);
+
+ if (multi_part && (i < rtnl->rqueue_partial_size)) {
+ /* remove the message form the partial read queue */
+ memmove(rtnl->rqueue_partial + i,rtnl->rqueue_partial + i + 1,
+ sizeof(sd_netlink_message*) * (rtnl->rqueue_partial_size - i - 1));
+ rtnl->rqueue_partial_size--;
+ }
+
+ return 1;
+ } else {
+ /* we only got a partial multi-part message, push it on the
+ partial read queue */
+ if (i < rtnl->rqueue_partial_size)
+ rtnl->rqueue_partial[i] = TAKE_PTR(first);
+ else {
+ r = rtnl_rqueue_partial_make_room(rtnl);
+ if (r < 0)
+ return r;
+
+ rtnl->rqueue_partial[rtnl->rqueue_partial_size++] = TAKE_PTR(first);
+ }
+
+ return 0;
+ }
+}
diff --git a/src/libsystemd/sd-netlink/netlink-types.c b/src/libsystemd/sd-netlink/netlink-types.c
new file mode 100644
index 0000000..9dcd3f2
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-types.c
@@ -0,0 +1,949 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/in.h>
+#include <stdint.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/genetlink.h>
+#include <linux/ip.h>
+#include <linux/if.h>
+#include <linux/can/netlink.h>
+#include <linux/fib_rules.h>
+#include <linux/if_addr.h>
+#include <linux/if_addrlabel.h>
+#include <linux/if_bridge.h>
+#include <linux/if_link.h>
+#include <linux/if_tunnel.h>
+#include <linux/veth.h>
+
+#if HAVE_LINUX_FOU_H
+#include <linux/fou.h>
+#endif
+
+#if HAVE_LINUX_CAN_VXCAN_H
+#include <linux/can/vxcan.h>
+#endif
+
+#include "macro.h"
+#include "missing.h"
+#include "netlink-types.h"
+#include "sd-netlink.h"
+#include "string-table.h"
+#include "util.h"
+#include "wireguard-netlink.h"
+
+/* Maximum ARP IP target defined in kernel */
+#define BOND_MAX_ARP_TARGETS 16
+
+typedef enum {
+ BOND_ARP_TARGETS_0,
+ BOND_ARP_TARGETS_1,
+ BOND_ARP_TARGETS_2,
+ BOND_ARP_TARGETS_3,
+ BOND_ARP_TARGETS_4,
+ BOND_ARP_TARGETS_5,
+ BOND_ARP_TARGETS_6,
+ BOND_ARP_TARGETS_7,
+ BOND_ARP_TARGETS_8,
+ BOND_ARP_TARGETS_9,
+ BOND_ARP_TARGETS_10,
+ BOND_ARP_TARGETS_11,
+ BOND_ARP_TARGETS_12,
+ BOND_ARP_TARGETS_13,
+ BOND_ARP_TARGETS_14,
+ BOND_ARP_TARGETS_MAX = BOND_MAX_ARP_TARGETS,
+} BondArpTargets;
+
+struct NLType {
+ uint16_t type;
+ size_t size;
+ const NLTypeSystem *type_system;
+ const NLTypeSystemUnion *type_system_union;
+};
+
+struct NLTypeSystem {
+ uint16_t count;
+ const NLType *types;
+};
+
+static const NLTypeSystem rtnl_link_type_system;
+
+static const NLType empty_types[1] = {
+ /* fake array to avoid .types==NULL, which denotes invalid type-systems */
+};
+
+static const NLTypeSystem empty_type_system = {
+ .count = 0,
+ .types = empty_types,
+};
+
+static const NLType rtnl_link_info_data_veth_types[] = {
+ [VETH_INFO_PEER] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+};
+
+static const NLType rtnl_link_info_data_vxcan_types[] = {
+ [VXCAN_INFO_PEER] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+};
+
+static const NLType rtnl_link_info_data_ipvlan_types[] = {
+ [IFLA_IPVLAN_MODE] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPVLAN_FLAGS] = { .type = NETLINK_TYPE_U16 },
+};
+
+static const NLType rtnl_link_info_data_macvlan_types[] = {
+ [IFLA_MACVLAN_MODE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_MACVLAN_FLAGS] = { .type = NETLINK_TYPE_U16 },
+};
+
+static const NLType rtnl_link_info_data_bridge_types[] = {
+ [IFLA_BR_FORWARD_DELAY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_HELLO_TIME] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_MAX_AGE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_AGEING_TIME] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_STP_STATE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_PRIORITY] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_VLAN_FILTERING] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_VLAN_PROTOCOL] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_GROUP_FWD_MASK] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_ROOT_PORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_ROOT_PATH_COST] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_TOPOLOGY_CHANGE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_TOPOLOGY_CHANGE_DETECTED] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_HELLO_TIMER] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_TCN_TIMER] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_TOPOLOGY_CHANGE_TIMER] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_GC_TIMER] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_GROUP_ADDR] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_FDB_FLUSH] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_MCAST_ROUTER] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_MCAST_SNOOPING] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_MCAST_QUERY_USE_IFADDR] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_MCAST_QUERIER] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_MCAST_HASH_ELASTICITY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_MCAST_HASH_MAX] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_MCAST_LAST_MEMBER_CNT] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_MCAST_STARTUP_QUERY_CNT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_MCAST_LAST_MEMBER_INTVL] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_MCAST_MEMBERSHIP_INTVL] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_MCAST_QUERIER_INTVL] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_MCAST_QUERY_INTVL] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_MCAST_QUERY_RESPONSE_INTVL] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_MCAST_STARTUP_QUERY_INTVL] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_NF_CALL_IPTABLES] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_NF_CALL_IP6TABLES] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_NF_CALL_ARPTABLES] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NETLINK_TYPE_U16 },
+};
+
+static const NLType rtnl_link_info_data_vlan_types[] = {
+ [IFLA_VLAN_ID] = { .type = NETLINK_TYPE_U16 },
+/*
+ [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) },
+ [IFLA_VLAN_EGRESS_QOS] = { .type = NETLINK_TYPE_NESTED },
+ [IFLA_VLAN_INGRESS_QOS] = { .type = NETLINK_TYPE_NESTED },
+*/
+ [IFLA_VLAN_PROTOCOL] = { .type = NETLINK_TYPE_U16 },
+};
+
+static const NLType rtnl_link_info_data_vxlan_types[] = {
+ [IFLA_VXLAN_ID] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VXLAN_GROUP] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_VXLAN_LINK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VXLAN_LOCAL] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_VXLAN_TTL] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_TOS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_LEARNING] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_AGEING] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VXLAN_LIMIT] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VXLAN_PORT_RANGE] = { .type = NETLINK_TYPE_U32},
+ [IFLA_VXLAN_PROXY] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_RSC] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_L2MISS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_L3MISS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_PORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_VXLAN_GROUP6] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_VXLAN_LOCAL6] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_VXLAN_UDP_CSUM] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_UDP_ZERO_CSUM6_TX] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_REMCSUM_TX] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_REMCSUM_RX] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_GBP] = { .type = NETLINK_TYPE_FLAG },
+ [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NETLINK_TYPE_FLAG },
+ [IFLA_VXLAN_COLLECT_METADATA] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_LABEL] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VXLAN_GPE] = { .type = NETLINK_TYPE_FLAG },
+};
+
+static const NLType rtnl_bond_arp_target_types[] = {
+ [BOND_ARP_TARGETS_0] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_1] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_2] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_3] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_4] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_5] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_6] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_7] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_8] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_9] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_10] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_11] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_12] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_13] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_14] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_MAX] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_bond_arp_type_system = {
+ .count = ELEMENTSOF(rtnl_bond_arp_target_types),
+ .types = rtnl_bond_arp_target_types,
+};
+
+static const NLType rtnl_link_info_data_bond_types[] = {
+ [IFLA_BOND_MODE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_ACTIVE_SLAVE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_MIIMON] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_UPDELAY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_DOWNDELAY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_USE_CARRIER] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_ARP_INTERVAL] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_ARP_IP_TARGET] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_bond_arp_type_system },
+ [IFLA_BOND_ARP_VALIDATE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_ARP_ALL_TARGETS] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_PRIMARY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_PRIMARY_RESELECT] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_FAIL_OVER_MAC] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_XMIT_HASH_POLICY] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_RESEND_IGMP] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_NUM_PEER_NOTIF] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_ALL_SLAVES_ACTIVE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_MIN_LINKS] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_LP_INTERVAL] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_PACKETS_PER_SLAVE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_AD_LACP_RATE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_AD_SELECT] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_AD_INFO] = { .type = NETLINK_TYPE_NESTED },
+ [IFLA_BOND_AD_ACTOR_SYS_PRIO] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BOND_AD_USER_PORT_KEY] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NETLINK_TYPE_ETHER_ADDR },
+ [IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NETLINK_TYPE_U8 },
+};
+
+static const NLType rtnl_link_info_data_iptun_types[] = {
+ [IFLA_IPTUN_LINK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_IPTUN_LOCAL] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_IPTUN_REMOTE] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_IPTUN_TTL] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_TOS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_PMTUDISC] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_FLAGS] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPTUN_PROTO] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_6RD_PREFIX] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPTUN_ENCAP_TYPE] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPTUN_ENCAP_SPORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPTUN_ENCAP_DPORT] = { .type = NETLINK_TYPE_U16 },
+};
+
+static const NLType rtnl_link_info_data_ipgre_types[] = {
+ [IFLA_GRE_LINK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GRE_IFLAGS] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GRE_OFLAGS] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GRE_IKEY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GRE_OKEY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GRE_LOCAL] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_GRE_REMOTE] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_GRE_TTL] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GRE_TOS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GRE_PMTUDISC] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GRE_FLOWINFO] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GRE_FLAGS] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GRE_ENCAP_TYPE] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GRE_ENCAP_FLAGS] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GRE_ENCAP_SPORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GRE_ENCAP_DPORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GRE_ERSPAN_INDEX] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_link_info_data_ipvti_types[] = {
+ [IFLA_VTI_LINK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VTI_IKEY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VTI_OKEY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VTI_LOCAL] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_VTI_REMOTE] = { .type = NETLINK_TYPE_IN_ADDR },
+};
+
+static const NLType rtnl_link_info_data_ip6tnl_types[] = {
+ [IFLA_IPTUN_LINK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_IPTUN_LOCAL] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_IPTUN_REMOTE] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_IPTUN_TTL] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_FLAGS] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_IPTUN_PROTO] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_FLOWINFO] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_link_info_data_vrf_types[] = {
+ [IFLA_VRF_TABLE] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_link_info_data_geneve_types[] = {
+ [IFLA_GENEVE_ID] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GENEVE_TTL] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GENEVE_TOS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GENEVE_PORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GENEVE_REMOTE] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_GENEVE_REMOTE6] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_GENEVE_UDP_CSUM] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GENEVE_LABEL] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_link_info_data_can_types[] = {
+ [IFLA_CAN_BITTIMING] = { .size = sizeof(struct can_bittiming) },
+ [IFLA_CAN_RESTART_MS] = { .type = NETLINK_TYPE_U32 },
+};
+
+/* these strings must match the .kind entries in the kernel */
+static const char* const nl_union_link_info_data_table[] = {
+ [NL_UNION_LINK_INFO_DATA_BOND] = "bond",
+ [NL_UNION_LINK_INFO_DATA_BRIDGE] = "bridge",
+ [NL_UNION_LINK_INFO_DATA_VLAN] = "vlan",
+ [NL_UNION_LINK_INFO_DATA_VETH] = "veth",
+ [NL_UNION_LINK_INFO_DATA_DUMMY] = "dummy",
+ [NL_UNION_LINK_INFO_DATA_MACVLAN] = "macvlan",
+ [NL_UNION_LINK_INFO_DATA_MACVTAP] = "macvtap",
+ [NL_UNION_LINK_INFO_DATA_IPVLAN] = "ipvlan",
+ [NL_UNION_LINK_INFO_DATA_VXLAN] = "vxlan",
+ [NL_UNION_LINK_INFO_DATA_IPIP_TUNNEL] = "ipip",
+ [NL_UNION_LINK_INFO_DATA_IPGRE_TUNNEL] = "gre",
+ [NL_UNION_LINK_INFO_DATA_ERSPAN] = "erspan",
+ [NL_UNION_LINK_INFO_DATA_IPGRETAP_TUNNEL] = "gretap",
+ [NL_UNION_LINK_INFO_DATA_IP6GRE_TUNNEL] = "ip6gre",
+ [NL_UNION_LINK_INFO_DATA_IP6GRETAP_TUNNEL] = "ip6gretap",
+ [NL_UNION_LINK_INFO_DATA_SIT_TUNNEL] = "sit",
+ [NL_UNION_LINK_INFO_DATA_VTI_TUNNEL] = "vti",
+ [NL_UNION_LINK_INFO_DATA_VTI6_TUNNEL] = "vti6",
+ [NL_UNION_LINK_INFO_DATA_IP6TNL_TUNNEL] = "ip6tnl",
+ [NL_UNION_LINK_INFO_DATA_VRF] = "vrf",
+ [NL_UNION_LINK_INFO_DATA_VCAN] = "vcan",
+ [NL_UNION_LINK_INFO_DATA_GENEVE] = "geneve",
+ [NL_UNION_LINK_INFO_DATA_VXCAN] = "vxcan",
+ [NL_UNION_LINK_INFO_DATA_WIREGUARD] = "wireguard",
+ [NL_UNION_LINK_INFO_DATA_NETDEVSIM] = "netdevsim",
+ [NL_UNION_LINK_INFO_DATA_CAN] = "can",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(nl_union_link_info_data, NLUnionLinkInfoData);
+
+static const NLTypeSystem rtnl_link_info_data_type_systems[] = {
+ [NL_UNION_LINK_INFO_DATA_BOND] = { .count = ELEMENTSOF(rtnl_link_info_data_bond_types),
+ .types = rtnl_link_info_data_bond_types },
+ [NL_UNION_LINK_INFO_DATA_BRIDGE] = { .count = ELEMENTSOF(rtnl_link_info_data_bridge_types),
+ .types = rtnl_link_info_data_bridge_types },
+ [NL_UNION_LINK_INFO_DATA_VLAN] = { .count = ELEMENTSOF(rtnl_link_info_data_vlan_types),
+ .types = rtnl_link_info_data_vlan_types },
+ [NL_UNION_LINK_INFO_DATA_VETH] = { .count = ELEMENTSOF(rtnl_link_info_data_veth_types),
+ .types = rtnl_link_info_data_veth_types },
+ [NL_UNION_LINK_INFO_DATA_MACVLAN] = { .count = ELEMENTSOF(rtnl_link_info_data_macvlan_types),
+ .types = rtnl_link_info_data_macvlan_types },
+ [NL_UNION_LINK_INFO_DATA_MACVTAP] = { .count = ELEMENTSOF(rtnl_link_info_data_macvlan_types),
+ .types = rtnl_link_info_data_macvlan_types },
+ [NL_UNION_LINK_INFO_DATA_IPVLAN] = { .count = ELEMENTSOF(rtnl_link_info_data_ipvlan_types),
+ .types = rtnl_link_info_data_ipvlan_types },
+ [NL_UNION_LINK_INFO_DATA_VXLAN] = { .count = ELEMENTSOF(rtnl_link_info_data_vxlan_types),
+ .types = rtnl_link_info_data_vxlan_types },
+ [NL_UNION_LINK_INFO_DATA_IPIP_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_iptun_types),
+ .types = rtnl_link_info_data_iptun_types },
+ [NL_UNION_LINK_INFO_DATA_IPGRE_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ipgre_types),
+ .types = rtnl_link_info_data_ipgre_types },
+ [NL_UNION_LINK_INFO_DATA_ERSPAN] = { .count = ELEMENTSOF(rtnl_link_info_data_ipgre_types),
+ .types = rtnl_link_info_data_ipgre_types },
+ [NL_UNION_LINK_INFO_DATA_IPGRETAP_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ipgre_types),
+ .types = rtnl_link_info_data_ipgre_types },
+ [NL_UNION_LINK_INFO_DATA_IP6GRE_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ipgre_types),
+ .types = rtnl_link_info_data_ipgre_types },
+ [NL_UNION_LINK_INFO_DATA_IP6GRETAP_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ipgre_types),
+ .types = rtnl_link_info_data_ipgre_types },
+ [NL_UNION_LINK_INFO_DATA_SIT_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_iptun_types),
+ .types = rtnl_link_info_data_iptun_types },
+ [NL_UNION_LINK_INFO_DATA_VTI_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ipvti_types),
+ .types = rtnl_link_info_data_ipvti_types },
+ [NL_UNION_LINK_INFO_DATA_VTI6_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ipvti_types),
+ .types = rtnl_link_info_data_ipvti_types },
+ [NL_UNION_LINK_INFO_DATA_IP6TNL_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ip6tnl_types),
+ .types = rtnl_link_info_data_ip6tnl_types },
+ [NL_UNION_LINK_INFO_DATA_VRF] = { .count = ELEMENTSOF(rtnl_link_info_data_vrf_types),
+ .types = rtnl_link_info_data_vrf_types },
+ [NL_UNION_LINK_INFO_DATA_GENEVE] = { .count = ELEMENTSOF(rtnl_link_info_data_geneve_types),
+ .types = rtnl_link_info_data_geneve_types },
+ [NL_UNION_LINK_INFO_DATA_VXCAN] = { .count = ELEMENTSOF(rtnl_link_info_data_vxcan_types),
+ .types = rtnl_link_info_data_vxcan_types },
+ [NL_UNION_LINK_INFO_DATA_CAN] = { .count = ELEMENTSOF(rtnl_link_info_data_can_types),
+ .types = rtnl_link_info_data_can_types },
+};
+
+static const NLTypeSystemUnion rtnl_link_info_data_type_system_union = {
+ .num = _NL_UNION_LINK_INFO_DATA_MAX,
+ .lookup = nl_union_link_info_data_from_string,
+ .type_systems = rtnl_link_info_data_type_systems,
+ .match_type = NL_MATCH_SIBLING,
+ .match = IFLA_INFO_KIND,
+};
+
+static const NLType rtnl_link_info_types[] = {
+ [IFLA_INFO_KIND] = { .type = NETLINK_TYPE_STRING },
+ [IFLA_INFO_DATA] = { .type = NETLINK_TYPE_UNION, .type_system_union = &rtnl_link_info_data_type_system_union},
+/*
+ [IFLA_INFO_XSTATS],
+ [IFLA_INFO_SLAVE_KIND] = { .type = NETLINK_TYPE_STRING },
+ [IFLA_INFO_SLAVE_DATA] = { .type = NETLINK_TYPE_NESTED },
+*/
+};
+
+static const NLTypeSystem rtnl_link_info_type_system = {
+ .count = ELEMENTSOF(rtnl_link_info_types),
+ .types = rtnl_link_info_types,
+};
+
+static const struct NLType rtnl_prot_info_bridge_port_types[] = {
+ [IFLA_BRPORT_STATE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_COST] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BRPORT_PRIORITY] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BRPORT_MODE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_GUARD] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_PROTECT] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_FAST_LEAVE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_LEARNING] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_PROXYARP] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_LEARNING_SYNC] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_ROOT_ID] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_BRIDGE_ID] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_DESIGNATED_PORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BRPORT_DESIGNATED_COST] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BRPORT_ID] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BRPORT_NO] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BRPORT_TOPOLOGY_CHANGE_ACK] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_CONFIG_PENDING] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_MESSAGE_AGE_TIMER] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BRPORT_FORWARD_DELAY_TIMER] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BRPORT_HOLD_TIMER] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BRPORT_FLUSH] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_PAD] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_MCAST_FLOOD] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_VLAN_TUNNEL] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_BCAST_FLOOD] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_ISOLATED] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_BACKUP_PORT] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_prot_info_type_systems[] = {
+ [AF_BRIDGE] = { .count = ELEMENTSOF(rtnl_prot_info_bridge_port_types),
+ .types = rtnl_prot_info_bridge_port_types },
+};
+
+static const NLTypeSystemUnion rtnl_prot_info_type_system_union = {
+ .num = AF_MAX,
+ .type_systems = rtnl_prot_info_type_systems,
+ .match_type = NL_MATCH_PROTOCOL,
+};
+
+static const struct NLType rtnl_af_spec_inet6_types[] = {
+ [IFLA_INET6_FLAGS] = { .type = NETLINK_TYPE_U32 },
+/*
+ IFLA_INET6_CONF,
+ IFLA_INET6_STATS,
+ IFLA_INET6_MCAST,
+ IFLA_INET6_CACHEINFO,
+ IFLA_INET6_ICMP6STATS,
+*/
+ [IFLA_INET6_TOKEN] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_INET6_ADDR_GEN_MODE] = { .type = NETLINK_TYPE_U8 },
+};
+
+static const NLTypeSystem rtnl_af_spec_inet6_type_system = {
+ .count = ELEMENTSOF(rtnl_af_spec_inet6_types),
+ .types = rtnl_af_spec_inet6_types,
+};
+
+static const NLType rtnl_af_spec_types[] = {
+ [AF_INET6] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_af_spec_inet6_type_system },
+};
+
+static const NLTypeSystem rtnl_af_spec_type_system = {
+ .count = ELEMENTSOF(rtnl_af_spec_types),
+ .types = rtnl_af_spec_types,
+};
+
+static const NLType rtnl_link_types[] = {
+ [IFLA_ADDRESS] = { .type = NETLINK_TYPE_ETHER_ADDR },
+ [IFLA_BROADCAST] = { .type = NETLINK_TYPE_ETHER_ADDR },
+ [IFLA_IFNAME] = { .type = NETLINK_TYPE_STRING, .size = IFNAMSIZ - 1 },
+ [IFLA_MTU] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_LINK] = { .type = NETLINK_TYPE_U32 },
+/*
+ [IFLA_QDISC],
+ [IFLA_STATS],
+ [IFLA_COST],
+ [IFLA_PRIORITY],
+*/
+ [IFLA_MASTER] = { .type = NETLINK_TYPE_U32 },
+/*
+ [IFLA_WIRELESS],
+*/
+ [IFLA_PROTINFO] = { .type = NETLINK_TYPE_UNION, .type_system_union = &rtnl_prot_info_type_system_union },
+ [IFLA_TXQLEN] = { .type = NETLINK_TYPE_U32 },
+/*
+ [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) },
+*/
+ [IFLA_WEIGHT] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_OPERSTATE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_LINKMODE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_LINKINFO] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_info_type_system },
+ [IFLA_NET_NS_PID] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_IFALIAS] = { .type = NETLINK_TYPE_STRING, .size = IFALIASZ - 1 },
+/*
+ [IFLA_NUM_VF],
+ [IFLA_VFINFO_LIST] = {. type = NETLINK_TYPE_NESTED, },
+ [IFLA_STATS64],
+ [IFLA_VF_PORTS] = { .type = NETLINK_TYPE_NESTED },
+ [IFLA_PORT_SELF] = { .type = NETLINK_TYPE_NESTED },
+*/
+ [IFLA_AF_SPEC] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_af_spec_type_system },
+/*
+ [IFLA_VF_PORTS],
+ [IFLA_PORT_SELF],
+ [IFLA_AF_SPEC],
+*/
+ [IFLA_GROUP] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_NET_NS_FD] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_EXT_MASK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_PROMISCUITY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_NUM_TX_QUEUES] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_NUM_RX_QUEUES] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_CARRIER] = { .type = NETLINK_TYPE_U8 },
+/*
+ [IFLA_PHYS_PORT_ID] = { .type = NETLINK_TYPE_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
+*/
+};
+
+static const NLTypeSystem rtnl_link_type_system = {
+ .count = ELEMENTSOF(rtnl_link_types),
+ .types = rtnl_link_types,
+};
+
+/* IFA_FLAGS was defined in kernel 3.14, but we still support older
+ * kernels where IFA_MAX is lower. */
+static const NLType rtnl_address_types[] = {
+ [IFA_ADDRESS] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFA_LOCAL] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFA_LABEL] = { .type = NETLINK_TYPE_STRING, .size = IFNAMSIZ - 1 },
+ [IFA_BROADCAST] = { .type = NETLINK_TYPE_IN_ADDR }, /* 6? */
+ [IFA_CACHEINFO] = { .type = NETLINK_TYPE_CACHE_INFO, .size = sizeof(struct ifa_cacheinfo) },
+/*
+ [IFA_ANYCAST],
+ [IFA_MULTICAST],
+*/
+ [IFA_FLAGS] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_address_type_system = {
+ .count = ELEMENTSOF(rtnl_address_types),
+ .types = rtnl_address_types,
+};
+
+/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */
+
+static const NLType rtnl_route_metrics_types[] = {
+ [RTAX_MTU] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_WINDOW] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_RTT] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_RTTVAR] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_SSTHRESH] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_CWND] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_ADVMSS] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_REORDERING] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_HOPLIMIT] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_INITCWND] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_FEATURES] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_RTO_MIN] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_INITRWND] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_QUICKACK] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_route_metrics_type_system = {
+ .count = ELEMENTSOF(rtnl_route_metrics_types),
+ .types = rtnl_route_metrics_types,
+};
+
+static const NLType rtnl_route_types[] = {
+ [RTA_DST] = { .type = NETLINK_TYPE_IN_ADDR }, /* 6? */
+ [RTA_SRC] = { .type = NETLINK_TYPE_IN_ADDR }, /* 6? */
+ [RTA_IIF] = { .type = NETLINK_TYPE_U32 },
+ [RTA_OIF] = { .type = NETLINK_TYPE_U32 },
+ [RTA_GATEWAY] = { .type = NETLINK_TYPE_IN_ADDR },
+ [RTA_PRIORITY] = { .type = NETLINK_TYPE_U32 },
+ [RTA_PREFSRC] = { .type = NETLINK_TYPE_IN_ADDR }, /* 6? */
+ [RTA_METRICS] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_route_metrics_type_system},
+/* [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
+*/
+ [RTA_FLOW] = { .type = NETLINK_TYPE_U32 }, /* 6? */
+/*
+ RTA_CACHEINFO,
+ RTA_TABLE,
+ RTA_MARK,
+ RTA_MFC_STATS,
+ RTA_VIA,
+ RTA_NEWDST,
+*/
+ [RTA_PREF] = { .type = NETLINK_TYPE_U8 },
+/*
+ RTA_ENCAP_TYPE,
+ RTA_ENCAP,
+ */
+ [RTA_EXPIRES] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_route_type_system = {
+ .count = ELEMENTSOF(rtnl_route_types),
+ .types = rtnl_route_types,
+};
+
+static const NLType rtnl_neigh_types[] = {
+ [NDA_DST] = { .type = NETLINK_TYPE_IN_ADDR },
+ [NDA_LLADDR] = { .type = NETLINK_TYPE_ETHER_ADDR },
+ [NDA_CACHEINFO] = { .type = NETLINK_TYPE_CACHE_INFO, .size = sizeof(struct nda_cacheinfo) },
+ [NDA_PROBES] = { .type = NETLINK_TYPE_U32 },
+ [NDA_VLAN] = { .type = NETLINK_TYPE_U16 },
+ [NDA_PORT] = { .type = NETLINK_TYPE_U16 },
+ [NDA_VNI] = { .type = NETLINK_TYPE_U32 },
+ [NDA_IFINDEX] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_neigh_type_system = {
+ .count = ELEMENTSOF(rtnl_neigh_types),
+ .types = rtnl_neigh_types,
+};
+
+static const NLType rtnl_addrlabel_types[] = {
+ [IFAL_ADDRESS] = { .type = NETLINK_TYPE_IN_ADDR, .size = sizeof(struct in6_addr) },
+ [IFAL_LABEL] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_addrlabel_type_system = {
+ .count = ELEMENTSOF(rtnl_addrlabel_types),
+ .types = rtnl_addrlabel_types,
+};
+
+static const NLType rtnl_routing_policy_rule_types[] = {
+ [FRA_DST] = { .type = NETLINK_TYPE_IN_ADDR },
+ [FRA_SRC] = { .type = NETLINK_TYPE_IN_ADDR },
+ [FRA_IIFNAME] = { .type = NETLINK_TYPE_STRING },
+ [RTA_OIF] = { .type = NETLINK_TYPE_U32 },
+ [RTA_GATEWAY] = { .type = NETLINK_TYPE_IN_ADDR },
+ [FRA_PRIORITY] = { .type = NETLINK_TYPE_U32 },
+ [FRA_FWMARK] = { .type = NETLINK_TYPE_U32 },
+ [FRA_FLOW] = { .type = NETLINK_TYPE_U32 },
+ [FRA_TUN_ID] = { .type = NETLINK_TYPE_U32 },
+ [FRA_SUPPRESS_IFGROUP] = { .type = NETLINK_TYPE_U32 },
+ [FRA_SUPPRESS_PREFIXLEN] = { .type = NETLINK_TYPE_U32 },
+ [FRA_TABLE] = { .type = NETLINK_TYPE_U32 },
+ [FRA_FWMASK] = { .type = NETLINK_TYPE_U32 },
+ [FRA_OIFNAME] = { .type = NETLINK_TYPE_STRING },
+ [FRA_PAD] = { .type = NETLINK_TYPE_U32 },
+ [FRA_L3MDEV] = { .type = NETLINK_TYPE_U64 },
+ [FRA_UID_RANGE] = { .size = sizeof(struct fib_rule_uid_range) },
+ [FRA_PROTOCOL] = { .type = NETLINK_TYPE_U8 },
+ [FRA_IP_PROTO] = { .type = NETLINK_TYPE_U8 },
+ [FRA_SPORT_RANGE] = { .size = sizeof(struct fib_rule_port_range) },
+ [FRA_DPORT_RANGE] = { .size = sizeof(struct fib_rule_port_range) },
+};
+
+static const NLTypeSystem rtnl_routing_policy_rule_type_system = {
+ .count = ELEMENTSOF(rtnl_routing_policy_rule_types),
+ .types = rtnl_routing_policy_rule_types,
+};
+
+static const NLType rtnl_types[] = {
+ [NLMSG_DONE] = { .type = NETLINK_TYPE_NESTED, .type_system = &empty_type_system, .size = 0 },
+ [NLMSG_ERROR] = { .type = NETLINK_TYPE_NESTED, .type_system = &empty_type_system, .size = sizeof(struct nlmsgerr) },
+ [RTM_NEWLINK] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+ [RTM_DELLINK] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+ [RTM_GETLINK] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+ [RTM_SETLINK] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+ [RTM_NEWADDR] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_address_type_system, .size = sizeof(struct ifaddrmsg) },
+ [RTM_DELADDR] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_address_type_system, .size = sizeof(struct ifaddrmsg) },
+ [RTM_GETADDR] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_address_type_system, .size = sizeof(struct ifaddrmsg) },
+ [RTM_NEWROUTE] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_route_type_system, .size = sizeof(struct rtmsg) },
+ [RTM_DELROUTE] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_route_type_system, .size = sizeof(struct rtmsg) },
+ [RTM_GETROUTE] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_route_type_system, .size = sizeof(struct rtmsg) },
+ [RTM_NEWNEIGH] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_neigh_type_system, .size = sizeof(struct ndmsg) },
+ [RTM_DELNEIGH] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_neigh_type_system, .size = sizeof(struct ndmsg) },
+ [RTM_GETNEIGH] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_neigh_type_system, .size = sizeof(struct ndmsg) },
+ [RTM_NEWADDRLABEL] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_addrlabel_type_system, .size = sizeof(struct ifaddrlblmsg) },
+ [RTM_DELADDRLABEL] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_addrlabel_type_system, .size = sizeof(struct ifaddrlblmsg) },
+ [RTM_GETADDRLABEL] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_addrlabel_type_system, .size = sizeof(struct ifaddrlblmsg) },
+ [RTM_NEWRULE] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_routing_policy_rule_type_system, .size = sizeof(struct rtmsg) },
+ [RTM_DELRULE] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_routing_policy_rule_type_system, .size = sizeof(struct rtmsg) },
+ [RTM_GETRULE] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_routing_policy_rule_type_system, .size = sizeof(struct rtmsg) },
+};
+
+const NLTypeSystem rtnl_type_system_root = {
+ .count = ELEMENTSOF(rtnl_types),
+ .types = rtnl_types,
+};
+
+static const NLType genl_wireguard_allowedip_types[] = {
+ [WGALLOWEDIP_A_FAMILY] = { .type = NETLINK_TYPE_U16 },
+ [WGALLOWEDIP_A_IPADDR] = { .type = NETLINK_TYPE_IN_ADDR },
+ [WGALLOWEDIP_A_CIDR_MASK] = { .type = NETLINK_TYPE_U8 },
+};
+
+static const NLTypeSystem genl_wireguard_allowedip_type_system = {
+ .count = ELEMENTSOF(genl_wireguard_allowedip_types),
+ .types = genl_wireguard_allowedip_types,
+};
+
+static const NLType genl_wireguard_peer_types[] = {
+ [WGPEER_A_PUBLIC_KEY] = { .size = WG_KEY_LEN },
+ [WGPEER_A_FLAGS] = { .type = NETLINK_TYPE_U32 },
+ [WGPEER_A_PRESHARED_KEY] = { .size = WG_KEY_LEN },
+ [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NETLINK_TYPE_U16 },
+ [WGPEER_A_ENDPOINT] = { .type = NETLINK_TYPE_SOCKADDR },
+ [WGPEER_A_ALLOWEDIPS] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_wireguard_allowedip_type_system },
+};
+
+static const NLTypeSystem genl_wireguard_peer_type_system = {
+ .count = ELEMENTSOF(genl_wireguard_peer_types),
+ .types = genl_wireguard_peer_types,
+};
+
+static const NLType genl_wireguard_set_device_types[] = {
+ [WGDEVICE_A_IFINDEX] = { .type = NETLINK_TYPE_U32 },
+ [WGDEVICE_A_IFNAME] = { .type = NETLINK_TYPE_STRING, .size = IFNAMSIZ-1 },
+ [WGDEVICE_A_FLAGS] = { .type = NETLINK_TYPE_U32 },
+ [WGDEVICE_A_PRIVATE_KEY] = { .size = WG_KEY_LEN },
+ [WGDEVICE_A_LISTEN_PORT] = { .type = NETLINK_TYPE_U16 },
+ [WGDEVICE_A_FWMARK] = { .type = NETLINK_TYPE_U32 },
+ [WGDEVICE_A_PEERS] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_wireguard_peer_type_system },
+};
+
+static const NLTypeSystem genl_wireguard_set_device_type_system = {
+ .count = ELEMENTSOF(genl_wireguard_set_device_types),
+ .types = genl_wireguard_set_device_types,
+};
+
+static const NLType genl_wireguard_cmds[] = {
+ [WG_CMD_SET_DEVICE] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_wireguard_set_device_type_system },
+};
+
+static const NLTypeSystem genl_wireguard_type_system = {
+ .count = ELEMENTSOF(genl_wireguard_cmds),
+ .types = genl_wireguard_cmds,
+};
+
+static const NLType genl_get_family_types[] = {
+ [CTRL_ATTR_FAMILY_NAME] = { .type = NETLINK_TYPE_STRING },
+ [CTRL_ATTR_FAMILY_ID] = { .type = NETLINK_TYPE_U16 },
+};
+
+static const NLTypeSystem genl_get_family_type_system = {
+ .count = ELEMENTSOF(genl_get_family_types),
+ .types = genl_get_family_types,
+};
+
+static const NLType genl_ctrl_id_ctrl_cmds[] = {
+ [CTRL_CMD_GETFAMILY] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_get_family_type_system },
+};
+
+static const NLTypeSystem genl_ctrl_id_ctrl_type_system = {
+ .count = ELEMENTSOF(genl_ctrl_id_ctrl_cmds),
+ .types = genl_ctrl_id_ctrl_cmds,
+};
+
+static const NLType genl_fou_types[] = {
+ [FOU_ATTR_PORT] = { .type = NETLINK_TYPE_U16 },
+ [FOU_ATTR_AF] = { .type = NETLINK_TYPE_U8 },
+ [FOU_ATTR_IPPROTO] = { .type = NETLINK_TYPE_U8 },
+ [FOU_ATTR_TYPE] = { .type = NETLINK_TYPE_U8 },
+ [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NETLINK_TYPE_FLAG },
+};
+
+static const NLTypeSystem genl_fou_type_system = {
+ .count = ELEMENTSOF(genl_fou_types),
+ .types = genl_fou_types,
+};
+
+static const NLType genl_fou_cmds[] = {
+ [FOU_CMD_ADD] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_fou_type_system },
+ [FOU_CMD_DEL] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_fou_type_system },
+ [FOU_CMD_GET] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_fou_type_system },
+};
+
+static const NLTypeSystem genl_fou_cmds_type_system = {
+ .count = ELEMENTSOF(genl_fou_cmds),
+ .types = genl_fou_cmds,
+};
+
+static const NLType genl_families[] = {
+ [SD_GENL_ID_CTRL] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_ctrl_id_ctrl_type_system },
+ [SD_GENL_WIREGUARD] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_wireguard_type_system },
+ [SD_GENL_FOU] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_fou_cmds_type_system},
+};
+
+const NLTypeSystem genl_family_type_system_root = {
+ .count = ELEMENTSOF(genl_families),
+ .types = genl_families,
+};
+
+static const NLType genl_types[] = {
+ [GENL_ID_CTRL] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_get_family_type_system, .size = sizeof(struct genlmsghdr) },
+};
+
+const NLTypeSystem genl_type_system_root = {
+ .count = ELEMENTSOF(genl_types),
+ .types = genl_types,
+};
+
+uint16_t type_get_type(const NLType *type) {
+ assert(type);
+ return type->type;
+}
+
+size_t type_get_size(const NLType *type) {
+ assert(type);
+ return type->size;
+}
+
+void type_get_type_system(const NLType *nl_type, const NLTypeSystem **ret) {
+ assert(nl_type);
+ assert(ret);
+ assert(nl_type->type == NETLINK_TYPE_NESTED);
+ assert(nl_type->type_system);
+
+ *ret = nl_type->type_system;
+}
+
+void type_get_type_system_union(const NLType *nl_type, const NLTypeSystemUnion **ret) {
+ assert(nl_type);
+ assert(ret);
+ assert(nl_type->type == NETLINK_TYPE_UNION);
+ assert(nl_type->type_system_union);
+
+ *ret = nl_type->type_system_union;
+}
+
+uint16_t type_system_get_count(const NLTypeSystem *type_system) {
+ assert(type_system);
+ return type_system->count;
+}
+
+const NLTypeSystem *type_system_get_root(int protocol) {
+ switch (protocol) {
+ case NETLINK_GENERIC:
+ return &genl_type_system_root;
+ default: /* NETLINK_ROUTE: */
+ return &rtnl_type_system_root;
+ }
+}
+
+int type_system_get_type(const NLTypeSystem *type_system, const NLType **ret, uint16_t type) {
+ const NLType *nl_type;
+
+ assert(ret);
+ assert(type_system);
+ assert(type_system->types);
+
+ if (type >= type_system->count)
+ return -EOPNOTSUPP;
+
+ nl_type = &type_system->types[type];
+
+ if (nl_type->type == NETLINK_TYPE_UNSPEC)
+ return -EOPNOTSUPP;
+
+ *ret = nl_type;
+
+ return 0;
+}
+
+int type_system_get_type_system(const NLTypeSystem *type_system, const NLTypeSystem **ret, uint16_t type) {
+ const NLType *nl_type;
+ int r;
+
+ assert(ret);
+
+ r = type_system_get_type(type_system, &nl_type, type);
+ if (r < 0)
+ return r;
+
+ type_get_type_system(nl_type, ret);
+ return 0;
+}
+
+int type_system_get_type_system_union(const NLTypeSystem *type_system, const NLTypeSystemUnion **ret, uint16_t type) {
+ const NLType *nl_type;
+ int r;
+
+ assert(ret);
+
+ r = type_system_get_type(type_system, &nl_type, type);
+ if (r < 0)
+ return r;
+
+ type_get_type_system_union(nl_type, ret);
+ return 0;
+}
+
+int type_system_union_get_type_system(const NLTypeSystemUnion *type_system_union, const NLTypeSystem **ret, const char *key) {
+ int type;
+
+ assert(type_system_union);
+ assert(type_system_union->match_type == NL_MATCH_SIBLING);
+ assert(type_system_union->lookup);
+ assert(type_system_union->type_systems);
+ assert(ret);
+ assert(key);
+
+ type = type_system_union->lookup(key);
+ if (type < 0)
+ return -EOPNOTSUPP;
+
+ assert(type < type_system_union->num);
+
+ *ret = &type_system_union->type_systems[type];
+
+ return 0;
+}
+
+int type_system_union_protocol_get_type_system(const NLTypeSystemUnion *type_system_union, const NLTypeSystem **ret, uint16_t protocol) {
+ const NLTypeSystem *type_system;
+
+ assert(type_system_union);
+ assert(type_system_union->type_systems);
+ assert(type_system_union->match_type == NL_MATCH_PROTOCOL);
+ assert(ret);
+
+ if (protocol >= type_system_union->num)
+ return -EOPNOTSUPP;
+
+ type_system = &type_system_union->type_systems[protocol];
+ if (!type_system->types)
+ return -EOPNOTSUPP;
+
+ *ret = type_system;
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-netlink/netlink-types.h b/src/libsystemd/sd-netlink/netlink-types.h
new file mode 100644
index 0000000..b84fa47
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-types.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "macro.h"
+
+enum {
+ NETLINK_TYPE_UNSPEC,
+ NETLINK_TYPE_U8, /* NLA_U8 */
+ NETLINK_TYPE_U16, /* NLA_U16 */
+ NETLINK_TYPE_U32, /* NLA_U32 */
+ NETLINK_TYPE_U64, /* NLA_U64 */
+ NETLINK_TYPE_STRING, /* NLA_STRING */
+ NETLINK_TYPE_FLAG, /* NLA_FLAG */
+ NETLINK_TYPE_IN_ADDR,
+ NETLINK_TYPE_ETHER_ADDR,
+ NETLINK_TYPE_CACHE_INFO,
+ NETLINK_TYPE_NESTED, /* NLA_NESTED */
+ NETLINK_TYPE_UNION,
+ NETLINK_TYPE_SOCKADDR,
+};
+
+typedef enum NLMatchType {
+ NL_MATCH_SIBLING,
+ NL_MATCH_PROTOCOL,
+} NLMatchType;
+
+typedef struct NLTypeSystemUnion NLTypeSystemUnion;
+typedef struct NLTypeSystem NLTypeSystem;
+typedef struct NLType NLType;
+
+struct NLTypeSystemUnion {
+ int num;
+ NLMatchType match_type;
+ uint16_t match;
+ int (*lookup)(const char *);
+ const NLTypeSystem *type_systems;
+};
+
+extern const NLTypeSystem rtnl_type_system_root;
+extern const NLTypeSystem genl_type_system_root;
+extern const NLTypeSystem genl_family_type_system_root;
+
+uint16_t type_get_type(const NLType *type);
+size_t type_get_size(const NLType *type);
+void type_get_type_system(const NLType *type, const NLTypeSystem **ret);
+void type_get_type_system_union(const NLType *type, const NLTypeSystemUnion **ret);
+
+const NLTypeSystem* type_system_get_root(int protocol);
+uint16_t type_system_get_count(const NLTypeSystem *type_system);
+int type_system_get_type(const NLTypeSystem *type_system, const NLType **ret, uint16_t type);
+int type_system_get_type_system(const NLTypeSystem *type_system, const NLTypeSystem **ret, uint16_t type);
+int type_system_get_type_system_union(const NLTypeSystem *type_system, const NLTypeSystemUnion **ret, uint16_t type);
+int type_system_union_get_type_system(const NLTypeSystemUnion *type_system_union, const NLTypeSystem **ret, const char *key);
+int type_system_union_protocol_get_type_system(const NLTypeSystemUnion *type_system_union, const NLTypeSystem **ret, uint16_t protocol);
+
+typedef enum NLUnionLinkInfoData {
+ NL_UNION_LINK_INFO_DATA_BOND,
+ NL_UNION_LINK_INFO_DATA_BRIDGE,
+ NL_UNION_LINK_INFO_DATA_VLAN,
+ NL_UNION_LINK_INFO_DATA_VETH,
+ NL_UNION_LINK_INFO_DATA_DUMMY,
+ NL_UNION_LINK_INFO_DATA_MACVLAN,
+ NL_UNION_LINK_INFO_DATA_MACVTAP,
+ NL_UNION_LINK_INFO_DATA_IPVLAN,
+ NL_UNION_LINK_INFO_DATA_VXLAN,
+ NL_UNION_LINK_INFO_DATA_IPIP_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_IPGRE_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_ERSPAN,
+ NL_UNION_LINK_INFO_DATA_IPGRETAP_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_IP6GRE_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_IP6GRETAP_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_SIT_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_VTI_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_VTI6_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_IP6TNL_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_VRF,
+ NL_UNION_LINK_INFO_DATA_VCAN,
+ NL_UNION_LINK_INFO_DATA_GENEVE,
+ NL_UNION_LINK_INFO_DATA_VXCAN,
+ NL_UNION_LINK_INFO_DATA_WIREGUARD,
+ NL_UNION_LINK_INFO_DATA_NETDEVSIM,
+ NL_UNION_LINK_INFO_DATA_CAN,
+ _NL_UNION_LINK_INFO_DATA_MAX,
+ _NL_UNION_LINK_INFO_DATA_INVALID = -1
+} NLUnionLinkInfoData;
+
+const char *nl_union_link_info_data_to_string(NLUnionLinkInfoData p) _const_;
+NLUnionLinkInfoData nl_union_link_info_data_from_string(const char *p) _pure_;
diff --git a/src/libsystemd/sd-netlink/netlink-util.c b/src/libsystemd/sd-netlink/netlink-util.c
new file mode 100644
index 0000000..3928dfb
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-util.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-netlink.h"
+
+#include "netlink-internal.h"
+#include "netlink-util.h"
+
+int rtnl_set_link_name(sd_netlink **rtnl, int ifindex, const char *name) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex > 0);
+ assert(name);
+
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_SETLINK, ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(message, IFLA_IFNAME, name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(*rtnl, message, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int rtnl_set_link_properties(sd_netlink **rtnl, int ifindex, const char *alias,
+ const struct ether_addr *mac, uint32_t mtu) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex > 0);
+
+ if (!alias && !mac && mtu == 0)
+ return 0;
+
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_SETLINK, ifindex);
+ if (r < 0)
+ return r;
+
+ if (alias) {
+ r = sd_netlink_message_append_string(message, IFLA_IFALIAS, alias);
+ if (r < 0)
+ return r;
+ }
+
+ if (mac) {
+ r = sd_netlink_message_append_ether_addr(message, IFLA_ADDRESS, mac);
+ if (r < 0)
+ return r;
+ }
+
+ if (mtu != 0) {
+ r = sd_netlink_message_append_u32(message, IFLA_MTU, mtu);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_netlink_call(*rtnl, message, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int rtnl_message_new_synthetic_error(sd_netlink *rtnl, int error, uint32_t serial, sd_netlink_message **ret) {
+ struct nlmsgerr *err;
+ int r;
+
+ assert(error <= 0);
+
+ r = message_new(rtnl, ret, NLMSG_ERROR);
+ if (r < 0)
+ return r;
+
+ (*ret)->hdr->nlmsg_seq = serial;
+
+ err = NLMSG_DATA((*ret)->hdr);
+
+ err->error = error;
+
+ return 0;
+}
+
+int rtnl_log_parse_error(int r) {
+ return log_error_errno(r, "Failed to parse netlink message: %m");
+}
+
+int rtnl_log_create_error(int r) {
+ return log_error_errno(r, "Failed to create netlink message: %m");
+}
diff --git a/src/libsystemd/sd-netlink/netlink-util.h b/src/libsystemd/sd-netlink/netlink-util.h
new file mode 100644
index 0000000..d272328
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-util.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "util.h"
+
+int rtnl_message_new_synthetic_error(sd_netlink *rtnl, int error, uint32_t serial, sd_netlink_message **ret);
+uint32_t rtnl_message_get_serial(sd_netlink_message *m);
+void rtnl_message_seal(sd_netlink_message *m);
+
+static inline bool rtnl_message_type_is_neigh(uint16_t type) {
+ return IN_SET(type, RTM_NEWNEIGH, RTM_GETNEIGH, RTM_DELNEIGH);
+}
+
+static inline bool rtnl_message_type_is_route(uint16_t type) {
+ return IN_SET(type, RTM_NEWROUTE, RTM_GETROUTE, RTM_DELROUTE);
+}
+
+static inline bool rtnl_message_type_is_link(uint16_t type) {
+ return IN_SET(type, RTM_NEWLINK, RTM_SETLINK, RTM_GETLINK, RTM_DELLINK);
+}
+
+static inline bool rtnl_message_type_is_addr(uint16_t type) {
+ return IN_SET(type, RTM_NEWADDR, RTM_GETADDR, RTM_DELADDR);
+}
+
+static inline bool rtnl_message_type_is_addrlabel(uint16_t type) {
+ return IN_SET(type, RTM_NEWADDRLABEL, RTM_DELADDRLABEL, RTM_GETADDRLABEL);
+}
+
+static inline bool rtnl_message_type_is_routing_policy_rule(uint16_t type) {
+ return IN_SET(type, RTM_NEWRULE, RTM_DELRULE, RTM_GETRULE);
+}
+
+int rtnl_set_link_name(sd_netlink **rtnl, int ifindex, const char *name);
+int rtnl_set_link_properties(sd_netlink **rtnl, int ifindex, const char *alias, const struct ether_addr *mac, uint32_t mtu);
+
+int rtnl_log_parse_error(int r);
+int rtnl_log_create_error(int r);
+
+#define netlink_call_async(nl, ret_slot, message, callback, destroy_callback, userdata) \
+ ({ \
+ int (*_callback_)(sd_netlink *, sd_netlink_message *, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ sd_netlink_call_async(nl, ret_slot, message, \
+ (sd_netlink_message_handler_t) _callback_, \
+ (sd_netlink_destroy_t) _destroy_, \
+ userdata, 0, __func__); \
+ })
+
+#define netlink_add_match(nl, ret_slot, metch, callback, destroy_callback, userdata) \
+ ({ \
+ int (*_callback_)(sd_netlink *, sd_netlink_message *, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ sd_netlink_add_match(nl, ret_slot, match, \
+ (sd_netlink_message_handler_t) _callback_, \
+ (sd_netlink_destroy_t) _destroy_, \
+ userdata, __func__); \
+ })
diff --git a/src/libsystemd/sd-netlink/rtnl-message.c b/src/libsystemd/sd-netlink/rtnl-message.c
new file mode 100644
index 0000000..2d4d00e
--- /dev/null
+++ b/src/libsystemd/sd-netlink/rtnl-message.c
@@ -0,0 +1,963 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/in.h>
+#include <linux/if_addrlabel.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "sd-netlink.h"
+
+#include "format-util.h"
+#include "missing.h"
+#include "netlink-internal.h"
+#include "netlink-types.h"
+#include "netlink-util.h"
+#include "refcnt.h"
+#include "socket-util.h"
+#include "util.h"
+
+int sd_rtnl_message_route_set_dst_prefixlen(sd_netlink_message *m, unsigned char prefixlen) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ if ((rtm->rtm_family == AF_INET && prefixlen > 32) ||
+ (rtm->rtm_family == AF_INET6 && prefixlen > 128))
+ return -ERANGE;
+
+ rtm->rtm_dst_len = prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_src_prefixlen(sd_netlink_message *m, unsigned char prefixlen) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ if ((rtm->rtm_family == AF_INET && prefixlen > 32) ||
+ (rtm->rtm_family == AF_INET6 && prefixlen > 128))
+ return -ERANGE;
+
+ rtm->rtm_src_len = prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_scope(sd_netlink_message *m, unsigned char scope) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_scope = scope;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_flags(sd_netlink_message *m, unsigned flags) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_flags = flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_flags(sd_netlink_message *m, unsigned *flags) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(flags, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *flags = rtm->rtm_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_table(sd_netlink_message *m, unsigned char table) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_table = table;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_family(sd_netlink_message *m, int *family) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(family, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *family = rtm->rtm_family;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_family(sd_netlink_message *m, int family) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_family = family;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_type(sd_netlink_message *m, unsigned char *type) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(type, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *type = rtm->rtm_type;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_type(sd_netlink_message *m, unsigned char type) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_type = type;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_protocol(sd_netlink_message *m, unsigned char *protocol) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(protocol, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *protocol = rtm->rtm_protocol;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_scope(sd_netlink_message *m, unsigned char *scope) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(scope, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *scope = rtm->rtm_scope;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_tos(sd_netlink_message *m, unsigned char *tos) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(tos, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *tos = rtm->rtm_tos;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_table(sd_netlink_message *m, unsigned char *table) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(table, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *table = rtm->rtm_table;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_dst_prefixlen(sd_netlink_message *m, unsigned char *dst_len) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(dst_len, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *dst_len = rtm->rtm_dst_len;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_src_prefixlen(sd_netlink_message *m, unsigned char *src_len) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(src_len, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *src_len = rtm->rtm_src_len;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_route(sd_netlink *rtnl, sd_netlink_message **ret,
+ uint16_t nlmsg_type, int rtm_family,
+ unsigned char rtm_protocol) {
+ struct rtmsg *rtm;
+ int r;
+
+ assert_return(rtnl_message_type_is_route(nlmsg_type), -EINVAL);
+ assert_return((nlmsg_type == RTM_GETROUTE && rtm_family == AF_UNSPEC) ||
+ IN_SET(rtm_family, AF_INET, AF_INET6), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWROUTE)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND;
+
+ rtm = NLMSG_DATA((*ret)->hdr);
+
+ rtm->rtm_family = rtm_family;
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ rtm->rtm_type = RTN_UNICAST;
+ rtm->rtm_table = RT_TABLE_MAIN;
+ rtm->rtm_protocol = rtm_protocol;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_set_flags(sd_netlink_message *m, uint8_t flags) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+ ndm->ndm_flags |= flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_set_state(sd_netlink_message *m, uint16_t state) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+ ndm->ndm_state |= state;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_get_flags(sd_netlink_message *m, uint8_t *flags) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+ *flags = ndm->ndm_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_get_state(sd_netlink_message *m, uint16_t *state) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+ *state = ndm->ndm_state;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_get_family(sd_netlink_message *m, int *family) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(family, -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+
+ *family = ndm->ndm_family;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_get_ifindex(sd_netlink_message *m, int *index) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(index, -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+
+ *index = ndm->ndm_ifindex;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_neigh(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int index, int ndm_family) {
+ struct ndmsg *ndm;
+ int r;
+
+ assert_return(rtnl_message_type_is_neigh(nlmsg_type), -EINVAL);
+ assert_return(IN_SET(ndm_family, AF_INET, AF_INET6, PF_BRIDGE), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWNEIGH)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND;
+
+ ndm = NLMSG_DATA((*ret)->hdr);
+
+ ndm->ndm_family = ndm_family;
+ ndm->ndm_ifindex = index;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_set_flags(sd_netlink_message *m, unsigned flags, unsigned change) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(change, -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ ifi->ifi_flags = flags;
+ ifi->ifi_change = change;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_set_type(sd_netlink_message *m, unsigned type) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ ifi->ifi_type = type;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_set_family(sd_netlink_message *m, unsigned family) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ ifi->ifi_family = family;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_link(sd_netlink *rtnl, sd_netlink_message **ret,
+ uint16_t nlmsg_type, int index) {
+ struct ifinfomsg *ifi;
+ int r;
+
+ assert_return(rtnl_message_type_is_link(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWLINK)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ ifi = NLMSG_DATA((*ret)->hdr);
+
+ ifi->ifi_family = AF_UNSPEC;
+ ifi->ifi_index = index;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_set_prefixlen(sd_netlink_message *m, unsigned char prefixlen) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ if ((ifa->ifa_family == AF_INET && prefixlen > 32) ||
+ (ifa->ifa_family == AF_INET6 && prefixlen > 128))
+ return -ERANGE;
+
+ ifa->ifa_prefixlen = prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_set_flags(sd_netlink_message *m, unsigned char flags) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ ifa->ifa_flags = flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_set_scope(sd_netlink_message *m, unsigned char scope) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ ifa->ifa_scope = scope;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_family(sd_netlink_message *m, int *family) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(family, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *family = ifa->ifa_family;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_prefixlen(sd_netlink_message *m, unsigned char *prefixlen) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(prefixlen, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *prefixlen = ifa->ifa_prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_scope(sd_netlink_message *m, unsigned char *scope) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(scope, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *scope = ifa->ifa_scope;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_flags(sd_netlink_message *m, unsigned char *flags) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(flags, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *flags = ifa->ifa_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_ifindex(sd_netlink_message *m, int *ifindex) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ifindex, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *ifindex = ifa->ifa_index;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_addr(sd_netlink *rtnl, sd_netlink_message **ret,
+ uint16_t nlmsg_type, int index,
+ int family) {
+ struct ifaddrmsg *ifa;
+ int r;
+
+ assert_return(rtnl_message_type_is_addr(nlmsg_type), -EINVAL);
+ assert_return((nlmsg_type == RTM_GETADDR && index == 0) ||
+ index > 0, -EINVAL);
+ assert_return((nlmsg_type == RTM_GETADDR && family == AF_UNSPEC) ||
+ IN_SET(family, AF_INET, AF_INET6), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_GETADDR)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_DUMP;
+
+ ifa = NLMSG_DATA((*ret)->hdr);
+
+ ifa->ifa_index = index;
+ ifa->ifa_family = family;
+ if (family == AF_INET)
+ ifa->ifa_prefixlen = 32;
+ else if (family == AF_INET6)
+ ifa->ifa_prefixlen = 128;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_addr_update(sd_netlink *rtnl, sd_netlink_message **ret,
+ int index, int family) {
+ int r;
+
+ r = sd_rtnl_message_new_addr(rtnl, ret, RTM_NEWADDR, index, family);
+ if (r < 0)
+ return r;
+
+ (*ret)->hdr->nlmsg_flags |= NLM_F_REPLACE;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_get_ifindex(sd_netlink_message *m, int *ifindex) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ifindex, -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ *ifindex = ifi->ifi_index;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_get_flags(sd_netlink_message *m, unsigned *flags) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(flags, -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ *flags = ifi->ifi_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_get_type(sd_netlink_message *m, unsigned short *type) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(type, -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ *type = ifi->ifi_type;
+
+ return 0;
+}
+
+int sd_rtnl_message_get_family(sd_netlink_message *m, int *family) {
+ assert_return(m, -EINVAL);
+ assert_return(family, -EINVAL);
+
+ assert(m->hdr);
+
+ if (rtnl_message_type_is_link(m->hdr->nlmsg_type)) {
+ struct ifinfomsg *ifi;
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ *family = ifi->ifi_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_route(m->hdr->nlmsg_type)) {
+ struct rtmsg *rtm;
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *family = rtm->rtm_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_neigh(m->hdr->nlmsg_type)) {
+ struct ndmsg *ndm;
+
+ ndm = NLMSG_DATA(m->hdr);
+
+ *family = ndm->ndm_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_addr(m->hdr->nlmsg_type)) {
+ struct ifaddrmsg *ifa;
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *family = ifa->ifa_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type)) {
+ struct rtmsg *rtm;
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *family = rtm->rtm_family;
+
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+int sd_rtnl_message_new_addrlabel(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int ifindex, int ifal_family) {
+ struct ifaddrlblmsg *addrlabel;
+ int r;
+
+ assert_return(rtnl_message_type_is_addrlabel(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWADDRLABEL)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ addrlabel = NLMSG_DATA((*ret)->hdr);
+
+ addrlabel->ifal_family = ifal_family;
+ addrlabel->ifal_index = ifindex;
+
+ return 0;
+}
+
+int sd_rtnl_message_addrlabel_set_prefixlen(sd_netlink_message *m, unsigned char prefixlen) {
+ struct ifaddrlblmsg *addrlabel;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addrlabel(m->hdr->nlmsg_type), -EINVAL);
+
+ addrlabel = NLMSG_DATA(m->hdr);
+
+ if (prefixlen > 128)
+ return -ERANGE;
+
+ addrlabel->ifal_prefixlen = prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_addrlabel_get_prefixlen(sd_netlink_message *m, unsigned char *prefixlen) {
+ struct ifaddrlblmsg *addrlabel;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addrlabel(m->hdr->nlmsg_type), -EINVAL);
+
+ addrlabel = NLMSG_DATA(m->hdr);
+
+ *prefixlen = addrlabel->ifal_prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_routing_policy_rule(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int ifal_family) {
+ struct rtmsg *rtm;
+ int r;
+
+ assert_return(rtnl_message_type_is_routing_policy_rule(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWRULE)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ rtm = NLMSG_DATA((*ret)->hdr);
+ rtm->rtm_family = ifal_family;
+ rtm->rtm_protocol = RTPROT_BOOT;
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ rtm->rtm_type = RTN_UNICAST;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_tos(sd_netlink_message *m, unsigned char tos) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ routing_policy_rule->rtm_tos = tos;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_tos(sd_netlink_message *m, unsigned char *tos) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ *tos = routing_policy_rule->rtm_tos;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_table(sd_netlink_message *m, unsigned char table) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ routing_policy_rule->rtm_table = table;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_table(sd_netlink_message *m, unsigned char *table) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ *table = routing_policy_rule->rtm_table;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_flags(sd_netlink_message *m, unsigned flags) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+ routing_policy_rule->rtm_flags |= flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_flags(sd_netlink_message *m, unsigned *flags) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+ *flags = routing_policy_rule->rtm_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_rtm_type(sd_netlink_message *m, unsigned char type) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ routing_policy_rule->rtm_type = type;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_rtm_type(sd_netlink_message *m, unsigned char *type) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ *type = routing_policy_rule->rtm_type;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_rtm_dst_prefixlen(sd_netlink_message *m, unsigned char len) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ routing_policy_rule->rtm_dst_len = len;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_rtm_dst_prefixlen(sd_netlink_message *m, unsigned char *len) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ *len = routing_policy_rule->rtm_dst_len;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_rtm_src_prefixlen(sd_netlink_message *m, unsigned char len) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ routing_policy_rule->rtm_src_len = len;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_rtm_src_prefixlen(sd_netlink_message *m, unsigned char *len) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ *len = routing_policy_rule->rtm_src_len;
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c
new file mode 100644
index 0000000..d83952d
--- /dev/null
+++ b/src/libsystemd/sd-netlink/sd-netlink.c
@@ -0,0 +1,907 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <poll.h>
+#include <sys/socket.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "missing.h"
+#include "netlink-internal.h"
+#include "netlink-slot.h"
+#include "netlink-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "util.h"
+
+static int sd_netlink_new(sd_netlink **ret) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ rtnl = new(sd_netlink, 1);
+ if (!rtnl)
+ return -ENOMEM;
+
+ *rtnl = (sd_netlink) {
+ .n_ref = REFCNT_INIT,
+ .fd = -1,
+ .sockaddr.nl.nl_family = AF_NETLINK,
+ .original_pid = getpid_cached(),
+ .protocol = -1,
+
+ /* Change notification responses have sequence 0, so we must
+ * start our request sequence numbers at 1, or we may confuse our
+ * responses with notifications from the kernel */
+ .serial = 1,
+
+ };
+
+ /* We guarantee that the read buffer has at least space for
+ * a message header */
+ if (!greedy_realloc((void**)&rtnl->rbuffer, &rtnl->rbuffer_allocated,
+ sizeof(struct nlmsghdr), sizeof(uint8_t)))
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(rtnl);
+
+ return 0;
+}
+
+int sd_netlink_new_from_netlink(sd_netlink **ret, int fd) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ socklen_t addrlen;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_netlink_new(&rtnl);
+ if (r < 0)
+ return r;
+
+ addrlen = sizeof(rtnl->sockaddr);
+
+ r = getsockname(fd, &rtnl->sockaddr.sa, &addrlen);
+ if (r < 0)
+ return -errno;
+
+ if (rtnl->sockaddr.nl.nl_family != AF_NETLINK)
+ return -EINVAL;
+
+ rtnl->fd = fd;
+
+ *ret = TAKE_PTR(rtnl);
+
+ return 0;
+}
+
+static bool rtnl_pid_changed(sd_netlink *rtnl) {
+ assert(rtnl);
+
+ /* We don't support people creating an rtnl connection and
+ * keeping it around over a fork(). Let's complain. */
+
+ return rtnl->original_pid != getpid_cached();
+}
+
+int sd_netlink_open_fd(sd_netlink **ret, int fd) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ int r;
+ int protocol;
+ socklen_t l;
+
+ assert_return(ret, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+
+ r = sd_netlink_new(&rtnl);
+ if (r < 0)
+ return r;
+
+ l = sizeof(protocol);
+ r = getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &l);
+ if (r < 0)
+ return r;
+
+ rtnl->fd = fd;
+ rtnl->protocol = protocol;
+
+ r = socket_bind(rtnl);
+ if (r < 0) {
+ rtnl->fd = -1; /* on failure, the caller remains owner of the fd, hence don't close it here */
+ rtnl->protocol = -1;
+ return r;
+ }
+
+ *ret = TAKE_PTR(rtnl);
+
+ return 0;
+}
+
+int netlink_open_family(sd_netlink **ret, int family) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ fd = socket_open(family);
+ if (fd < 0)
+ return fd;
+
+ r = sd_netlink_open_fd(ret, fd);
+ if (r < 0)
+ return r;
+
+ fd = -1;
+
+ return 0;
+}
+
+int sd_netlink_open(sd_netlink **ret) {
+ return netlink_open_family(ret, NETLINK_ROUTE);
+}
+
+int sd_netlink_inc_rcvbuf(sd_netlink *rtnl, size_t size) {
+ assert_return(rtnl, -EINVAL);
+ assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
+
+ return fd_inc_rcvbuf(rtnl->fd, size);
+}
+
+static sd_netlink *netlink_free(sd_netlink *rtnl) {
+ sd_netlink_slot *s;
+ unsigned i;
+
+ assert(rtnl);
+
+ for (i = 0; i < rtnl->rqueue_size; i++)
+ sd_netlink_message_unref(rtnl->rqueue[i]);
+ free(rtnl->rqueue);
+
+ for (i = 0; i < rtnl->rqueue_partial_size; i++)
+ sd_netlink_message_unref(rtnl->rqueue_partial[i]);
+ free(rtnl->rqueue_partial);
+
+ free(rtnl->rbuffer);
+
+ while ((s = rtnl->slots)) {
+ assert(s->floating);
+ netlink_slot_disconnect(s, true);
+ }
+ hashmap_free(rtnl->reply_callbacks);
+ prioq_free(rtnl->reply_callbacks_prioq);
+
+ sd_event_source_unref(rtnl->io_event_source);
+ sd_event_source_unref(rtnl->time_event_source);
+ sd_event_unref(rtnl->event);
+
+ hashmap_free(rtnl->broadcast_group_refs);
+
+ safe_close(rtnl->fd);
+ return mfree(rtnl);
+}
+
+DEFINE_ATOMIC_REF_UNREF_FUNC(sd_netlink, sd_netlink, netlink_free);
+
+static void rtnl_seal_message(sd_netlink *rtnl, sd_netlink_message *m) {
+ assert(rtnl);
+ assert(!rtnl_pid_changed(rtnl));
+ assert(m);
+ assert(m->hdr);
+
+ /* don't use seq == 0, as that is used for broadcasts, so we
+ would get confused by replies to such messages */
+ m->hdr->nlmsg_seq = rtnl->serial++ ? : rtnl->serial++;
+
+ rtnl_message_seal(m);
+
+ return;
+}
+
+int sd_netlink_send(sd_netlink *nl,
+ sd_netlink_message *message,
+ uint32_t *serial) {
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(!rtnl_pid_changed(nl), -ECHILD);
+ assert_return(message, -EINVAL);
+ assert_return(!message->sealed, -EPERM);
+
+ rtnl_seal_message(nl, message);
+
+ r = socket_write_message(nl, message);
+ if (r < 0)
+ return r;
+
+ if (serial)
+ *serial = rtnl_message_get_serial(message);
+
+ return 1;
+}
+
+int rtnl_rqueue_make_room(sd_netlink *rtnl) {
+ assert(rtnl);
+
+ if (rtnl->rqueue_size >= RTNL_RQUEUE_MAX)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS),
+ "rtnl: exhausted the read queue size (%d)",
+ RTNL_RQUEUE_MAX);
+
+ if (!GREEDY_REALLOC(rtnl->rqueue, rtnl->rqueue_allocated, rtnl->rqueue_size + 1))
+ return -ENOMEM;
+
+ return 0;
+}
+
+int rtnl_rqueue_partial_make_room(sd_netlink *rtnl) {
+ assert(rtnl);
+
+ if (rtnl->rqueue_partial_size >= RTNL_RQUEUE_MAX)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS),
+ "rtnl: exhausted the partial read queue size (%d)",
+ RTNL_RQUEUE_MAX);
+
+ if (!GREEDY_REALLOC(rtnl->rqueue_partial, rtnl->rqueue_partial_allocated,
+ rtnl->rqueue_partial_size + 1))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int dispatch_rqueue(sd_netlink *rtnl, sd_netlink_message **message) {
+ int r;
+
+ assert(rtnl);
+ assert(message);
+
+ if (rtnl->rqueue_size <= 0) {
+ /* Try to read a new message */
+ r = socket_read_message(rtnl);
+ if (r == -ENOBUFS) { /* FIXME: ignore buffer overruns for now */
+ log_debug_errno(r, "Got ENOBUFS from netlink socket, ignoring.");
+ return 1;
+ }
+ if (r <= 0)
+ return r;
+ }
+
+ /* Dispatch a queued message */
+ *message = rtnl->rqueue[0];
+ rtnl->rqueue_size--;
+ memmove(rtnl->rqueue, rtnl->rqueue + 1, sizeof(sd_netlink_message*) * rtnl->rqueue_size);
+
+ return 1;
+}
+
+static int process_timeout(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ struct reply_callback *c;
+ sd_netlink_slot *slot;
+ usec_t n;
+ int r;
+
+ assert(rtnl);
+
+ c = prioq_peek(rtnl->reply_callbacks_prioq);
+ if (!c)
+ return 0;
+
+ n = now(CLOCK_MONOTONIC);
+ if (c->timeout > n)
+ return 0;
+
+ r = rtnl_message_new_synthetic_error(rtnl, -ETIMEDOUT, c->serial, &m);
+ if (r < 0)
+ return r;
+
+ assert_se(prioq_pop(rtnl->reply_callbacks_prioq) == c);
+ c->timeout = 0;
+ hashmap_remove(rtnl->reply_callbacks, &c->serial);
+
+ slot = container_of(c, sd_netlink_slot, reply_callback);
+
+ r = c->callback(rtnl, m, slot->userdata);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: timedout callback %s%s%sfailed: %m",
+ slot->description ? "'" : "",
+ strempty(slot->description),
+ slot->description ? "' " : "");
+
+ if (slot->floating)
+ netlink_slot_disconnect(slot, true);
+
+ return 1;
+}
+
+static int process_reply(sd_netlink *rtnl, sd_netlink_message *m) {
+ struct reply_callback *c;
+ sd_netlink_slot *slot;
+ uint64_t serial;
+ uint16_t type;
+ int r;
+
+ assert(rtnl);
+ assert(m);
+
+ serial = rtnl_message_get_serial(m);
+ c = hashmap_remove(rtnl->reply_callbacks, &serial);
+ if (!c)
+ return 0;
+
+ if (c->timeout != 0) {
+ prioq_remove(rtnl->reply_callbacks_prioq, c, &c->prioq_idx);
+ c->timeout = 0;
+ }
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+
+ if (type == NLMSG_DONE)
+ m = NULL;
+
+ slot = container_of(c, sd_netlink_slot, reply_callback);
+
+ r = c->callback(rtnl, m, slot->userdata);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: reply callback %s%s%sfailed: %m",
+ slot->description ? "'" : "",
+ strempty(slot->description),
+ slot->description ? "' " : "");
+
+ if (slot->floating)
+ netlink_slot_disconnect(slot, true);
+
+ return 1;
+}
+
+static int process_match(sd_netlink *rtnl, sd_netlink_message *m) {
+ struct match_callback *c;
+ sd_netlink_slot *slot;
+ uint16_t type;
+ int r;
+
+ assert(rtnl);
+ assert(m);
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(match_callbacks, c, rtnl->match_callbacks) {
+ if (type == c->type) {
+ slot = container_of(c, sd_netlink_slot, match_callback);
+
+ r = c->callback(rtnl, m, slot->userdata);
+ if (r != 0) {
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: match callback %s%s%sfailed: %m",
+ slot->description ? "'" : "",
+ strempty(slot->description),
+ slot->description ? "' " : "");
+
+ break;
+ }
+ }
+ }
+
+ return 1;
+}
+
+static int process_running(sd_netlink *rtnl, sd_netlink_message **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(rtnl);
+
+ r = process_timeout(rtnl);
+ if (r != 0)
+ goto null_message;
+
+ r = dispatch_rqueue(rtnl, &m);
+ if (r < 0)
+ return r;
+ if (!m)
+ goto null_message;
+
+ if (sd_netlink_message_is_broadcast(m)) {
+ r = process_match(rtnl, m);
+ if (r != 0)
+ goto null_message;
+ } else {
+ r = process_reply(rtnl, m);
+ if (r != 0)
+ goto null_message;
+ }
+
+ if (ret) {
+ *ret = TAKE_PTR(m);
+
+ return 1;
+ }
+
+ return 1;
+
+null_message:
+ if (r >= 0 && ret)
+ *ret = NULL;
+
+ return r;
+}
+
+int sd_netlink_process(sd_netlink *rtnl, sd_netlink_message **ret) {
+ NETLINK_DONT_DESTROY(rtnl);
+ int r;
+
+ assert_return(rtnl, -EINVAL);
+ assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
+ assert_return(!rtnl->processing, -EBUSY);
+
+ rtnl->processing = true;
+ r = process_running(rtnl, ret);
+ rtnl->processing = false;
+
+ return r;
+}
+
+static usec_t calc_elapse(uint64_t usec) {
+ if (usec == (uint64_t) -1)
+ return 0;
+
+ if (usec == 0)
+ usec = RTNL_DEFAULT_TIMEOUT;
+
+ return now(CLOCK_MONOTONIC) + usec;
+}
+
+static int rtnl_poll(sd_netlink *rtnl, bool need_more, uint64_t timeout_usec) {
+ struct pollfd p[1] = {};
+ struct timespec ts;
+ usec_t m = USEC_INFINITY;
+ int r, e;
+
+ assert(rtnl);
+
+ e = sd_netlink_get_events(rtnl);
+ if (e < 0)
+ return e;
+
+ if (need_more)
+ /* Caller wants more data, and doesn't care about
+ * what's been read or any other timeouts. */
+ e |= POLLIN;
+ else {
+ usec_t until;
+ /* Caller wants to process if there is something to
+ * process, but doesn't care otherwise */
+
+ r = sd_netlink_get_timeout(rtnl, &until);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ usec_t nw;
+ nw = now(CLOCK_MONOTONIC);
+ m = until > nw ? until - nw : 0;
+ }
+ }
+
+ if (timeout_usec != (uint64_t) -1 && (m == (uint64_t) -1 || timeout_usec < m))
+ m = timeout_usec;
+
+ p[0].fd = rtnl->fd;
+ p[0].events = e;
+
+ r = ppoll(p, 1, m == (uint64_t) -1 ? NULL : timespec_store(&ts, m), NULL);
+ if (r < 0)
+ return -errno;
+
+ return r > 0 ? 1 : 0;
+}
+
+int sd_netlink_wait(sd_netlink *nl, uint64_t timeout_usec) {
+ assert_return(nl, -EINVAL);
+ assert_return(!rtnl_pid_changed(nl), -ECHILD);
+
+ if (nl->rqueue_size > 0)
+ return 0;
+
+ return rtnl_poll(nl, false, timeout_usec);
+}
+
+static int timeout_compare(const void *a, const void *b) {
+ const struct reply_callback *x = a, *y = b;
+
+ if (x->timeout != 0 && y->timeout == 0)
+ return -1;
+
+ if (x->timeout == 0 && y->timeout != 0)
+ return 1;
+
+ return CMP(x->timeout, y->timeout);
+}
+
+int sd_netlink_call_async(
+ sd_netlink *nl,
+ sd_netlink_slot **ret_slot,
+ sd_netlink_message *m,
+ sd_netlink_message_handler_t callback,
+ sd_netlink_destroy_t destroy_callback,
+ void *userdata,
+ uint64_t usec,
+ const char *description) {
+ _cleanup_free_ sd_netlink_slot *slot = NULL;
+ uint32_t s;
+ int r, k;
+
+ assert_return(nl, -EINVAL);
+ assert_return(m, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!rtnl_pid_changed(nl), -ECHILD);
+
+ r = hashmap_ensure_allocated(&nl->reply_callbacks, &uint64_hash_ops);
+ if (r < 0)
+ return r;
+
+ if (usec != (uint64_t) -1) {
+ r = prioq_ensure_allocated(&nl->reply_callbacks_prioq, timeout_compare);
+ if (r < 0)
+ return r;
+ }
+
+ r = netlink_slot_allocate(nl, !ret_slot, NETLINK_REPLY_CALLBACK, sizeof(struct reply_callback), userdata, description, &slot);
+ if (r < 0)
+ return r;
+
+ slot->reply_callback.callback = callback;
+ slot->reply_callback.timeout = calc_elapse(usec);
+
+ k = sd_netlink_send(nl, m, &s);
+ if (k < 0)
+ return k;
+
+ slot->reply_callback.serial = s;
+
+ r = hashmap_put(nl->reply_callbacks, &slot->reply_callback.serial, &slot->reply_callback);
+ if (r < 0)
+ return r;
+
+ if (slot->reply_callback.timeout != 0) {
+ r = prioq_put(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
+ if (r < 0) {
+ (void) hashmap_remove(nl->reply_callbacks, &slot->reply_callback.serial);
+ return r;
+ }
+ }
+
+ /* Set this at last. Otherwise, some failures in above call the destroy callback but some do not. */
+ slot->destroy_callback = destroy_callback;
+
+ if (ret_slot)
+ *ret_slot = slot;
+
+ TAKE_PTR(slot);
+
+ return k;
+}
+
+int sd_netlink_call(sd_netlink *rtnl,
+ sd_netlink_message *message,
+ uint64_t usec,
+ sd_netlink_message **ret) {
+ usec_t timeout;
+ uint32_t serial;
+ int r;
+
+ assert_return(rtnl, -EINVAL);
+ assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
+ assert_return(message, -EINVAL);
+
+ r = sd_netlink_send(rtnl, message, &serial);
+ if (r < 0)
+ return r;
+
+ timeout = calc_elapse(usec);
+
+ for (;;) {
+ usec_t left;
+ unsigned i;
+
+ for (i = 0; i < rtnl->rqueue_size; i++) {
+ uint32_t received_serial;
+
+ received_serial = rtnl_message_get_serial(rtnl->rqueue[i]);
+
+ if (received_serial == serial) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *incoming = NULL;
+ uint16_t type;
+
+ incoming = rtnl->rqueue[i];
+
+ /* found a match, remove from rqueue and return it */
+ memmove(rtnl->rqueue + i,rtnl->rqueue + i + 1,
+ sizeof(sd_netlink_message*) * (rtnl->rqueue_size - i - 1));
+ rtnl->rqueue_size--;
+
+ r = sd_netlink_message_get_errno(incoming);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_get_type(incoming, &type);
+ if (r < 0)
+ return r;
+
+ if (type == NLMSG_DONE) {
+ *ret = NULL;
+ return 0;
+ }
+
+ if (ret)
+ *ret = TAKE_PTR(incoming);
+
+ return 1;
+ }
+ }
+
+ r = socket_read_message(rtnl);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ /* received message, so try to process straight away */
+ continue;
+
+ if (timeout > 0) {
+ usec_t n;
+
+ n = now(CLOCK_MONOTONIC);
+ if (n >= timeout)
+ return -ETIMEDOUT;
+
+ left = timeout - n;
+ } else
+ left = (uint64_t) -1;
+
+ r = rtnl_poll(rtnl, true, left);
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ return -ETIMEDOUT;
+ }
+}
+
+int sd_netlink_get_events(sd_netlink *rtnl) {
+ assert_return(rtnl, -EINVAL);
+ assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
+
+ if (rtnl->rqueue_size == 0)
+ return POLLIN;
+ else
+ return 0;
+}
+
+int sd_netlink_get_timeout(sd_netlink *rtnl, uint64_t *timeout_usec) {
+ struct reply_callback *c;
+
+ assert_return(rtnl, -EINVAL);
+ assert_return(timeout_usec, -EINVAL);
+ assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
+
+ if (rtnl->rqueue_size > 0) {
+ *timeout_usec = 0;
+ return 1;
+ }
+
+ c = prioq_peek(rtnl->reply_callbacks_prioq);
+ if (!c) {
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+ }
+
+ *timeout_usec = c->timeout;
+
+ return 1;
+}
+
+static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_netlink *rtnl = userdata;
+ int r;
+
+ assert(rtnl);
+
+ r = sd_netlink_process(rtnl, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int time_callback(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_netlink *rtnl = userdata;
+ int r;
+
+ assert(rtnl);
+
+ r = sd_netlink_process(rtnl, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int prepare_callback(sd_event_source *s, void *userdata) {
+ sd_netlink *rtnl = userdata;
+ int r, e;
+ usec_t until;
+
+ assert(s);
+ assert(rtnl);
+
+ e = sd_netlink_get_events(rtnl);
+ if (e < 0)
+ return e;
+
+ r = sd_event_source_set_io_events(rtnl->io_event_source, e);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_get_timeout(rtnl, &until);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ int j;
+
+ j = sd_event_source_set_time(rtnl->time_event_source, until);
+ if (j < 0)
+ return j;
+ }
+
+ r = sd_event_source_set_enabled(rtnl->time_event_source, r > 0);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int sd_netlink_attach_event(sd_netlink *rtnl, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(rtnl, -EINVAL);
+ assert_return(!rtnl->event, -EBUSY);
+
+ assert(!rtnl->io_event_source);
+ assert(!rtnl->time_event_source);
+
+ if (event)
+ rtnl->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&rtnl->event);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_add_io(rtnl->event, &rtnl->io_event_source, rtnl->fd, 0, io_callback, rtnl);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(rtnl->io_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_description(rtnl->io_event_source, "rtnl-receive-message");
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_prepare(rtnl->io_event_source, prepare_callback);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_add_time(rtnl->event, &rtnl->time_event_source, CLOCK_MONOTONIC, 0, 0, time_callback, rtnl);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(rtnl->time_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_description(rtnl->time_event_source, "rtnl-timer");
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ sd_netlink_detach_event(rtnl);
+ return r;
+}
+
+int sd_netlink_detach_event(sd_netlink *rtnl) {
+ assert_return(rtnl, -EINVAL);
+ assert_return(rtnl->event, -ENXIO);
+
+ rtnl->io_event_source = sd_event_source_unref(rtnl->io_event_source);
+
+ rtnl->time_event_source = sd_event_source_unref(rtnl->time_event_source);
+
+ rtnl->event = sd_event_unref(rtnl->event);
+
+ return 0;
+}
+
+int sd_netlink_add_match(
+ sd_netlink *rtnl,
+ sd_netlink_slot **ret_slot,
+ uint16_t type,
+ sd_netlink_message_handler_t callback,
+ sd_netlink_destroy_t destroy_callback,
+ void *userdata,
+ const char *description) {
+ _cleanup_free_ sd_netlink_slot *slot = NULL;
+ int r;
+
+ assert_return(rtnl, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
+
+ r = netlink_slot_allocate(rtnl, !ret_slot, NETLINK_MATCH_CALLBACK, sizeof(struct match_callback), userdata, description, &slot);
+ if (r < 0)
+ return r;
+
+ slot->match_callback.callback = callback;
+ slot->match_callback.type = type;
+
+ switch (type) {
+ case RTM_NEWLINK:
+ case RTM_DELLINK:
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_LINK);
+ if (r < 0)
+ return r;
+
+ break;
+ case RTM_NEWADDR:
+ case RTM_DELADDR:
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_IPV4_IFADDR);
+ if (r < 0)
+ return r;
+
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_IPV6_IFADDR);
+ if (r < 0)
+ return r;
+
+ break;
+ case RTM_NEWROUTE:
+ case RTM_DELROUTE:
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_IPV4_ROUTE);
+ if (r < 0)
+ return r;
+
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_IPV6_ROUTE);
+ if (r < 0)
+ return r;
+ break;
+ case RTM_NEWRULE:
+ case RTM_DELRULE:
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_IPV4_RULE);
+ if (r < 0)
+ return r;
+
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_IPV6_RULE);
+ if (r < 0)
+ return r;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ LIST_PREPEND(match_callbacks, rtnl->match_callbacks, &slot->match_callback);
+
+ /* Set this at last. Otherwise, some failures in above call the destroy callback but some do not. */
+ slot->destroy_callback = destroy_callback;
+
+ if (ret_slot)
+ *ret_slot = slot;
+
+ TAKE_PTR(slot);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-netlink/test-local-addresses.c b/src/libsystemd/sd-netlink/test-local-addresses.c
new file mode 100644
index 0000000..1711426
--- /dev/null
+++ b/src/libsystemd/sd-netlink/test-local-addresses.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "in-addr-util.h"
+#include "local-addresses.h"
+#include "tests.h"
+
+static void print_local_addresses(struct local_address *a, unsigned n) {
+ unsigned i;
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *b = NULL;
+
+ assert_se(in_addr_to_string(a[i].family, &a[i].address, &b) >= 0);
+ printf("%s if%i scope=%i metric=%u address=%s\n", af_to_name(a[i].family), a[i].ifindex, a[i].scope, a[i].metric, b);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ struct local_address *a;
+ int n;
+
+ test_setup_logging(LOG_DEBUG);
+
+ a = NULL;
+ n = local_addresses(NULL, 0, AF_UNSPEC, &a);
+ assert_se(n >= 0);
+
+ printf("Local Addresses:\n");
+ print_local_addresses(a, (unsigned) n);
+ a = mfree(a);
+
+ n = local_gateways(NULL, 0, AF_UNSPEC, &a);
+ assert_se(n >= 0);
+
+ printf("Local Gateways:\n");
+ print_local_addresses(a, (unsigned) n);
+ free(a);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-netlink/test-netlink.c b/src/libsystemd/sd-netlink/test-netlink.c
new file mode 100644
index 0000000..b13fa22
--- /dev/null
+++ b/src/libsystemd/sd-netlink/test-netlink.c
@@ -0,0 +1,567 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+#include <netinet/ether.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "ether-addr-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "netlink-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "util.h"
+
+static void test_message_link_bridge(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ uint32_t cost;
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_NEWLINK, 1) >= 0);
+ assert_se(sd_rtnl_message_link_set_family(message, PF_BRIDGE) >= 0);
+ assert_se(sd_netlink_message_open_container(message, IFLA_PROTINFO) >= 0);
+ assert_se(sd_netlink_message_append_u32(message, IFLA_BRPORT_COST, 10) >= 0);
+ assert_se(sd_netlink_message_close_container(message) >= 0);
+
+ assert_se(sd_netlink_message_rewind(message) >= 0);
+
+ assert_se(sd_netlink_message_enter_container(message, IFLA_PROTINFO) >= 0);
+ assert_se(sd_netlink_message_read_u32(message, IFLA_BRPORT_COST, &cost) >= 0);
+ assert_se(cost == 10);
+ assert_se(sd_netlink_message_exit_container(message) >= 0);
+}
+
+static void test_link_configure(sd_netlink *rtnl, int ifindex) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ const char *mac = "98:fe:94:3f:c6:18", *name = "test";
+ char buffer[ETHER_ADDR_TO_STRING_MAX];
+ uint32_t mtu = 1450, mtu_out;
+ const char *name_out;
+ struct ether_addr mac_out;
+
+ /* we'd really like to test NEWLINK, but let's not mess with the running kernel */
+ assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_message_append_string(message, IFLA_IFNAME, name) >= 0);
+ assert_se(sd_netlink_message_append_ether_addr(message, IFLA_ADDRESS, ether_aton(mac)) >= 0);
+ assert_se(sd_netlink_message_append_u32(message, IFLA_MTU, mtu) >= 0);
+
+ assert_se(sd_netlink_call(rtnl, message, 0, NULL) == 1);
+ assert_se(sd_netlink_message_rewind(message) >= 0);
+
+ assert_se(sd_netlink_message_read_string(message, IFLA_IFNAME, &name_out) >= 0);
+ assert_se(streq(name, name_out));
+
+ assert_se(sd_netlink_message_read_ether_addr(message, IFLA_ADDRESS, &mac_out) >= 0);
+ assert_se(streq(mac, ether_addr_to_string(&mac_out, buffer)));
+
+ assert_se(sd_netlink_message_read_u32(message, IFLA_MTU, &mtu_out) >= 0);
+ assert_se(mtu == mtu_out);
+}
+
+static void test_link_get(sd_netlink *rtnl, int ifindex) {
+ sd_netlink_message *m;
+ sd_netlink_message *r;
+ uint32_t mtu = 1500;
+ const char *str_data;
+ uint8_t u8_data;
+ uint32_t u32_data;
+ struct ether_addr eth_data;
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(m);
+
+ /* u8 test cases */
+ assert_se(sd_netlink_message_append_u8(m, IFLA_CARRIER, 0) >= 0);
+ assert_se(sd_netlink_message_append_u8(m, IFLA_OPERSTATE, 0) >= 0);
+ assert_se(sd_netlink_message_append_u8(m, IFLA_LINKMODE, 0) >= 0);
+
+ /* u32 test cases */
+ assert_se(sd_netlink_message_append_u32(m, IFLA_MTU, mtu) >= 0);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_GROUP, 0) >= 0);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_TXQLEN, 0) >= 0);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_NUM_TX_QUEUES, 0) >= 0);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_NUM_RX_QUEUES, 0) >= 0);
+
+ assert_se(sd_netlink_call(rtnl, m, -1, &r) == 1);
+
+ assert_se(sd_netlink_message_read_string(r, IFLA_IFNAME, &str_data) == 0);
+
+ assert_se(sd_netlink_message_read_u8(r, IFLA_CARRIER, &u8_data) == 0);
+ assert_se(sd_netlink_message_read_u8(r, IFLA_OPERSTATE, &u8_data) == 0);
+ assert_se(sd_netlink_message_read_u8(r, IFLA_LINKMODE, &u8_data) == 0);
+
+ assert_se(sd_netlink_message_read_u32(r, IFLA_MTU, &u32_data) == 0);
+ assert_se(sd_netlink_message_read_u32(r, IFLA_GROUP, &u32_data) == 0);
+ assert_se(sd_netlink_message_read_u32(r, IFLA_TXQLEN, &u32_data) == 0);
+ assert_se(sd_netlink_message_read_u32(r, IFLA_NUM_TX_QUEUES, &u32_data) == 0);
+ assert_se(sd_netlink_message_read_u32(r, IFLA_NUM_RX_QUEUES, &u32_data) == 0);
+
+ assert_se(sd_netlink_message_read_ether_addr(r, IFLA_ADDRESS, &eth_data) == 0);
+
+ assert_se((m = sd_netlink_message_unref(m)) == NULL);
+ assert_se((r = sd_netlink_message_unref(r)) == NULL);
+}
+
+static void test_address_get(sd_netlink *rtnl, int ifindex) {
+ sd_netlink_message *m;
+ sd_netlink_message *r;
+ struct in_addr in_data;
+ struct ifa_cacheinfo cache;
+ const char *label;
+
+ assert_se(sd_rtnl_message_new_addr(rtnl, &m, RTM_GETADDR, ifindex, AF_INET) >= 0);
+ assert_se(m);
+
+ assert_se(sd_netlink_call(rtnl, m, -1, &r) == 1);
+
+ assert_se(sd_netlink_message_read_in_addr(r, IFA_LOCAL, &in_data) == 0);
+ assert_se(sd_netlink_message_read_in_addr(r, IFA_ADDRESS, &in_data) == 0);
+ assert_se(sd_netlink_message_read_string(r, IFA_LABEL, &label) == 0);
+ assert_se(sd_netlink_message_read_cache_info(r, IFA_CACHEINFO, &cache) == 0);
+
+ assert_se((m = sd_netlink_message_unref(m)) == NULL);
+ assert_se((r = sd_netlink_message_unref(r)) == NULL);
+
+}
+
+static void test_route(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req;
+ struct in_addr addr, addr_data;
+ uint32_t index = 2, u32_data;
+ int r;
+
+ r = sd_rtnl_message_new_route(rtnl, &req, RTM_NEWROUTE, AF_INET, RTPROT_STATIC);
+ if (r < 0) {
+ log_error_errno(r, "Could not create RTM_NEWROUTE message: %m");
+ return;
+ }
+
+ addr.s_addr = htonl(INADDR_LOOPBACK);
+
+ r = sd_netlink_message_append_in_addr(req, RTA_GATEWAY, &addr);
+ if (r < 0) {
+ log_error_errno(r, "Could not append RTA_GATEWAY attribute: %m");
+ return;
+ }
+
+ r = sd_netlink_message_append_u32(req, RTA_OIF, index);
+ if (r < 0) {
+ log_error_errno(r, "Could not append RTA_OIF attribute: %m");
+ return;
+ }
+
+ assert_se(sd_netlink_message_rewind(req) >= 0);
+
+ assert_se(sd_netlink_message_read_in_addr(req, RTA_GATEWAY, &addr_data) >= 0);
+ assert_se(addr_data.s_addr == addr.s_addr);
+
+ assert_se(sd_netlink_message_read_u32(req, RTA_OIF, &u32_data) >= 0);
+ assert_se(u32_data == index);
+
+ assert_se((req = sd_netlink_message_unref(req)) == NULL);
+}
+
+static void test_multiple(void) {
+ sd_netlink *rtnl1, *rtnl2;
+
+ assert_se(sd_netlink_open(&rtnl1) >= 0);
+ assert_se(sd_netlink_open(&rtnl2) >= 0);
+
+ rtnl1 = sd_netlink_unref(rtnl1);
+ rtnl2 = sd_netlink_unref(rtnl2);
+}
+
+static int link_handler(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ char *ifname = userdata;
+ const char *data;
+
+ assert_se(rtnl);
+ assert_se(m);
+ assert_se(userdata);
+
+ log_info("%s: got link info about %s", __func__, ifname);
+ free(ifname);
+
+ assert_se(sd_netlink_message_read_string(m, IFLA_IFNAME, &data) >= 0);
+ assert_se(streq(data, "lo"));
+
+ return 1;
+}
+
+static void test_event_loop(int ifindex) {
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ char *ifname;
+
+ ifname = strdup("lo2");
+ assert_se(ifname);
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+
+ assert_se(sd_netlink_call_async(rtnl, NULL, m, link_handler, NULL, ifname, 0, NULL) >= 0);
+
+ assert_se(sd_event_default(&event) >= 0);
+
+ assert_se(sd_netlink_attach_event(rtnl, event, 0) >= 0);
+
+ assert_se(sd_event_run(event, 0) >= 0);
+
+ assert_se(sd_netlink_detach_event(rtnl) >= 0);
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+static void test_async_destroy(void *userdata) {
+}
+
+static void test_async(int ifindex) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *r = NULL;
+ _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *slot = NULL;
+ sd_netlink_destroy_t destroy_callback;
+ const char *description;
+ char *ifname;
+
+ ifname = strdup("lo");
+ assert_se(ifname);
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+
+ assert_se(sd_netlink_call_async(rtnl, &slot, m, link_handler, test_async_destroy, ifname, 0, "hogehoge") >= 0);
+
+ assert_se(sd_netlink_slot_get_netlink(slot) == rtnl);
+ assert_se(sd_netlink_slot_get_userdata(slot) == ifname);
+ assert_se(sd_netlink_slot_get_destroy_callback(slot, &destroy_callback) == 1);
+ assert_se(destroy_callback == test_async_destroy);
+ assert_se(sd_netlink_slot_get_floating(slot) == 0);
+ assert_se(sd_netlink_slot_get_description(slot, &description) == 1);
+ assert_se(streq(description, "hogehoge"));
+
+ assert_se(sd_netlink_wait(rtnl, 0) >= 0);
+ assert_se(sd_netlink_process(rtnl, &r) >= 0);
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+static void test_slot_set(int ifindex) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *r = NULL;
+ _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *slot = NULL;
+ sd_netlink_destroy_t destroy_callback;
+ const char *description;
+ char *ifname;
+
+ ifname = strdup("lo");
+ assert_se(ifname);
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+
+ assert_se(sd_netlink_call_async(rtnl, &slot, m, link_handler, NULL, NULL, 0, NULL) >= 0);
+
+ assert_se(sd_netlink_slot_get_netlink(slot) == rtnl);
+ assert_se(!sd_netlink_slot_get_userdata(slot));
+ assert_se(!sd_netlink_slot_set_userdata(slot, ifname));
+ assert_se(sd_netlink_slot_get_userdata(slot) == ifname);
+ assert_se(sd_netlink_slot_get_destroy_callback(slot, NULL) == 0);
+ assert_se(sd_netlink_slot_set_destroy_callback(slot, test_async_destroy) >= 0);
+ assert_se(sd_netlink_slot_get_destroy_callback(slot, &destroy_callback) == 1);
+ assert_se(destroy_callback == test_async_destroy);
+ assert_se(sd_netlink_slot_get_floating(slot) == 0);
+ assert_se(sd_netlink_slot_set_floating(slot, 1) == 1);
+ assert_se(sd_netlink_slot_get_floating(slot) == 1);
+ assert_se(sd_netlink_slot_get_description(slot, NULL) == 0);
+ assert_se(sd_netlink_slot_set_description(slot, "hogehoge") >= 0);
+ assert_se(sd_netlink_slot_get_description(slot, &description) == 1);
+ assert_se(streq(description, "hogehoge"));
+
+ assert_se(sd_netlink_wait(rtnl, 0) >= 0);
+ assert_se(sd_netlink_process(rtnl, &r) >= 0);
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+struct test_async_object {
+ unsigned n_ref;
+ char *ifname;
+};
+
+static struct test_async_object *test_async_object_free(struct test_async_object *t) {
+ assert(t);
+
+ free(t->ifname);
+ return mfree(t);
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(struct test_async_object, test_async_object, test_async_object_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct test_async_object *, test_async_object_unref);
+
+static int link_handler2(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ struct test_async_object *t = userdata;
+ const char *data;
+
+ assert_se(rtnl);
+ assert_se(m);
+ assert_se(userdata);
+
+ log_info("%s: got link info about %s", __func__, t->ifname);
+
+ assert_se(sd_netlink_message_read_string(m, IFLA_IFNAME, &data) >= 0);
+ assert_se(streq(data, "lo"));
+
+ return 1;
+}
+
+static void test_async_object_destroy(void *userdata) {
+ struct test_async_object *t = userdata;
+
+ assert(userdata);
+
+ log_info("%s: n_ref=%u", __func__, t->n_ref);
+ test_async_object_unref(t);
+}
+
+static void test_async_destroy_callback(int ifindex) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *r = NULL;
+ _cleanup_(test_async_object_unrefp) struct test_async_object *t = NULL;
+ _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *slot = NULL;
+ char *ifname;
+
+ assert_se(t = new(struct test_async_object, 1));
+ assert_se(ifname = strdup("lo"));
+ *t = (struct test_async_object) {
+ .n_ref = 1,
+ .ifname = ifname,
+ };
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ /* destroy callback is called after processing message */
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_call_async(rtnl, NULL, m, link_handler2, test_async_object_destroy, t, 0, NULL) >= 0);
+
+ assert_se(t->n_ref == 1);
+ assert_se(test_async_object_ref(t));
+ assert_se(t->n_ref == 2);
+
+ assert_se(sd_netlink_wait(rtnl, 0) >= 0);
+ assert_se(sd_netlink_process(rtnl, &r) == 1);
+ assert_se(t->n_ref == 1);
+
+ assert_se(!sd_netlink_message_unref(m));
+
+ /* destroy callback is called when asynchronous call is cancelled, that is, slot is freed. */
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_call_async(rtnl, &slot, m, link_handler2, test_async_object_destroy, t, 0, NULL) >= 0);
+
+ assert_se(t->n_ref == 1);
+ assert_se(test_async_object_ref(t));
+ assert_se(t->n_ref == 2);
+
+ assert_se(!(slot = sd_netlink_slot_unref(slot)));
+ assert_se(t->n_ref == 1);
+
+ assert_se(!sd_netlink_message_unref(m));
+
+ /* destroy callback is also called by sd_netlink_unref() */
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_call_async(rtnl, NULL, m, link_handler2, test_async_object_destroy, t, 0, NULL) >= 0);
+
+ assert_se(t->n_ref == 1);
+ assert_se(test_async_object_ref(t));
+ assert_se(t->n_ref == 2);
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+ assert_se(t->n_ref == 1);
+}
+
+static int pipe_handler(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ int *counter = userdata;
+ int r;
+
+ (*counter)--;
+
+ r = sd_netlink_message_get_errno(m);
+
+ log_info_errno(r, "%d left in pipe. got reply: %m", *counter);
+
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static void test_pipe(int ifindex) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m1 = NULL, *m2 = NULL;
+ int counter = 0;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m1, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_rtnl_message_new_link(rtnl, &m2, RTM_GETLINK, ifindex) >= 0);
+
+ counter++;
+ assert_se(sd_netlink_call_async(rtnl, NULL, m1, pipe_handler, NULL, &counter, 0, NULL) >= 0);
+
+ counter++;
+ assert_se(sd_netlink_call_async(rtnl, NULL, m2, pipe_handler, NULL, &counter, 0, NULL) >= 0);
+
+ while (counter > 0) {
+ assert_se(sd_netlink_wait(rtnl, 0) >= 0);
+ assert_se(sd_netlink_process(rtnl, NULL) >= 0);
+ }
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+static void test_container(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ uint16_t u16_data;
+ uint32_t u32_data;
+ const char *string_data;
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0) >= 0);
+
+ assert_se(sd_netlink_message_open_container(m, IFLA_LINKINFO) >= 0);
+ assert_se(sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "vlan") >= 0);
+ assert_se(sd_netlink_message_append_u16(m, IFLA_VLAN_ID, 100) >= 0);
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+ assert_se(sd_netlink_message_append_string(m, IFLA_INFO_KIND, "vlan") >= 0);
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+ assert_se(sd_netlink_message_close_container(m) == -EINVAL);
+
+ assert_se(sd_netlink_message_rewind(m) >= 0);
+
+ assert_se(sd_netlink_message_enter_container(m, IFLA_LINKINFO) >= 0);
+ assert_se(sd_netlink_message_read_string(m, IFLA_INFO_KIND, &string_data) >= 0);
+ assert_se(streq("vlan", string_data));
+
+ assert_se(sd_netlink_message_enter_container(m, IFLA_INFO_DATA) >= 0);
+ assert_se(sd_netlink_message_read_u16(m, IFLA_VLAN_ID, &u16_data) >= 0);
+ assert_se(sd_netlink_message_exit_container(m) >= 0);
+
+ assert_se(sd_netlink_message_read_string(m, IFLA_INFO_KIND, &string_data) >= 0);
+ assert_se(streq("vlan", string_data));
+ assert_se(sd_netlink_message_exit_container(m) >= 0);
+
+ assert_se(sd_netlink_message_read_u32(m, IFLA_LINKINFO, &u32_data) < 0);
+
+ assert_se(sd_netlink_message_exit_container(m) == -EINVAL);
+}
+
+static void test_match(void) {
+ _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *s1 = NULL, *s2 = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_netlink_add_match(rtnl, &s1, RTM_NEWLINK, link_handler, NULL, NULL, NULL) >= 0);
+ assert_se(sd_netlink_add_match(rtnl, &s2, RTM_NEWLINK, link_handler, NULL, NULL, NULL) >= 0);
+ assert_se(sd_netlink_add_match(rtnl, NULL, RTM_NEWLINK, link_handler, NULL, NULL, NULL) >= 0);
+
+ assert_se(!(s1 = sd_netlink_slot_unref(s1)));
+ assert_se(!(s2 = sd_netlink_slot_unref(s2)));
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+static void test_get_addresses(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *m;
+
+ assert_se(sd_rtnl_message_new_addr(rtnl, &req, RTM_GETADDR, 0, AF_UNSPEC) >= 0);
+
+ assert_se(sd_netlink_call(rtnl, req, 0, &reply) >= 0);
+
+ for (m = reply; m; m = sd_netlink_message_next(m)) {
+ uint16_t type;
+ unsigned char scope, flags;
+ int family, ifindex;
+
+ assert_se(sd_netlink_message_get_type(m, &type) >= 0);
+ assert_se(type == RTM_NEWADDR);
+
+ assert_se(sd_rtnl_message_addr_get_ifindex(m, &ifindex) >= 0);
+ assert_se(sd_rtnl_message_addr_get_family(m, &family) >= 0);
+ assert_se(sd_rtnl_message_addr_get_scope(m, &scope) >= 0);
+ assert_se(sd_rtnl_message_addr_get_flags(m, &flags) >= 0);
+
+ assert_se(ifindex > 0);
+ assert_se(IN_SET(family, AF_INET, AF_INET6));
+
+ log_info("got IPv%u address on ifindex %i", family == AF_INET ? 4: 6, ifindex);
+ }
+}
+
+static void test_message(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+
+ assert_se(rtnl_message_new_synthetic_error(rtnl, -ETIMEDOUT, 1, &m) >= 0);
+ assert_se(sd_netlink_message_get_errno(m) == -ETIMEDOUT);
+}
+
+int main(void) {
+ sd_netlink *rtnl;
+ sd_netlink_message *m;
+ sd_netlink_message *r;
+ const char *string_data;
+ int if_loopback;
+ uint16_t type;
+
+ test_match();
+ test_multiple();
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ assert_se(rtnl);
+
+ test_route(rtnl);
+ test_message(rtnl);
+ test_container(rtnl);
+
+ if_loopback = (int) if_nametoindex("lo");
+ assert_se(if_loopback > 0);
+
+ test_async(if_loopback);
+ test_slot_set(if_loopback);
+ test_async_destroy_callback(if_loopback);
+ test_pipe(if_loopback);
+ test_event_loop(if_loopback);
+ test_link_configure(rtnl, if_loopback);
+
+ test_get_addresses(rtnl);
+ test_message_link_bridge(rtnl);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, if_loopback) >= 0);
+ assert_se(m);
+
+ assert_se(sd_netlink_message_get_type(m, &type) >= 0);
+ assert_se(type == RTM_GETLINK);
+
+ assert_se(sd_netlink_message_read_string(m, IFLA_IFNAME, &string_data) == -EPERM);
+
+ assert_se(sd_netlink_call(rtnl, m, 0, &r) == 1);
+ assert_se(sd_netlink_message_get_type(r, &type) >= 0);
+ assert_se(type == RTM_NEWLINK);
+
+ assert_se((r = sd_netlink_message_unref(r)) == NULL);
+
+ assert_se(sd_netlink_call(rtnl, m, -1, &r) == -EPERM);
+ assert_se((m = sd_netlink_message_unref(m)) == NULL);
+ assert_se((r = sd_netlink_message_unref(r)) == NULL);
+
+ test_link_get(rtnl, if_loopback);
+ test_address_get(rtnl, if_loopback);
+
+ assert_se((m = sd_netlink_message_unref(m)) == NULL);
+ assert_se((r = sd_netlink_message_unref(r)) == NULL);
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-network/network-util.c b/src/libsystemd/sd-network/network-util.c
new file mode 100644
index 0000000..df5ce86
--- /dev/null
+++ b/src/libsystemd/sd-network/network-util.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "network-util.h"
+#include "strv.h"
+
+bool network_is_online(void) {
+ _cleanup_free_ char *state = NULL;
+ int r;
+
+ r = sd_network_get_operational_state(&state);
+ if (r < 0) /* if we don't know anything, we consider the system online */
+ return true;
+
+ if (STR_IN_SET(state, "routable", "degraded"))
+ return true;
+
+ return false;
+}
diff --git a/src/libsystemd/sd-network/network-util.h b/src/libsystemd/sd-network/network-util.h
new file mode 100644
index 0000000..e868bc2
--- /dev/null
+++ b/src/libsystemd/sd-network/network-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-network.h"
+
+bool network_is_online(void);
diff --git a/src/libsystemd/sd-network/sd-network.c b/src/libsystemd/sd-network/sd-network.c
new file mode 100644
index 0000000..812826f
--- /dev/null
+++ b/src/libsystemd/sd-network/sd-network.c
@@ -0,0 +1,409 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <poll.h>
+#include <string.h>
+#include <sys/inotify.h>
+
+#include "sd-network.h"
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+_public_ int sd_network_get_operational_state(char **state) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(state, -EINVAL);
+
+ r = parse_env_file(NULL, "/run/systemd/netif/state", "OPER_STATE", &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ *state = TAKE_PTR(s);
+
+ return 0;
+}
+
+static int network_get_strv(const char *key, char ***ret) {
+ _cleanup_strv_free_ char **a = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = parse_env_file(NULL, "/run/systemd/netif/state", key, &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ a = strv_split(s, " ");
+ if (!a)
+ return -ENOMEM;
+
+ strv_uniq(a);
+ r = (int) strv_length(a);
+
+ *ret = TAKE_PTR(a);
+
+ return r;
+}
+
+_public_ int sd_network_get_dns(char ***ret) {
+ return network_get_strv("DNS", ret);
+}
+
+_public_ int sd_network_get_ntp(char ***ret) {
+ return network_get_strv("NTP", ret);
+}
+
+_public_ int sd_network_get_search_domains(char ***ret) {
+ return network_get_strv("DOMAINS", ret);
+}
+
+_public_ int sd_network_get_route_domains(char ***ret) {
+ return network_get_strv("ROUTE_DOMAINS", ret);
+}
+
+static int network_link_get_string(int ifindex, const char *field, char **ret) {
+ char path[STRLEN("/run/systemd/netif/links/") + DECIMAL_STR_MAX(ifindex) + 1];
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ xsprintf(path, "/run/systemd/netif/links/%i", ifindex);
+
+ r = parse_env_file(NULL, path, field, &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+static int network_link_get_strv(int ifindex, const char *key, char ***ret) {
+ char path[STRLEN("/run/systemd/netif/links/") + DECIMAL_STR_MAX(ifindex) + 1];
+ _cleanup_strv_free_ char **a = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ xsprintf(path, "/run/systemd/netif/links/%i", ifindex);
+ r = parse_env_file(NULL, path, key, &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ a = strv_split(s, " ");
+ if (!a)
+ return -ENOMEM;
+
+ strv_uniq(a);
+ r = (int) strv_length(a);
+
+ *ret = TAKE_PTR(a);
+
+ return r;
+}
+
+_public_ int sd_network_link_get_setup_state(int ifindex, char **state) {
+ return network_link_get_string(ifindex, "ADMIN_STATE", state);
+}
+
+_public_ int sd_network_link_get_network_file(int ifindex, char **filename) {
+ return network_link_get_string(ifindex, "NETWORK_FILE", filename);
+}
+
+_public_ int sd_network_link_get_operational_state(int ifindex, char **state) {
+ return network_link_get_string(ifindex, "OPER_STATE", state);
+}
+
+_public_ int sd_network_link_get_required_for_online(int ifindex) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ r = network_link_get_string(ifindex, "REQUIRED_FOR_ONLINE", &s);
+ if (r < 0) {
+ /* Handle -ENODATA as RequiredForOnline=yes, for compatibility */
+ if (r == -ENODATA)
+ return true;
+ return r;
+ }
+
+ return parse_boolean(s);
+}
+
+_public_ int sd_network_link_get_llmnr(int ifindex, char **llmnr) {
+ return network_link_get_string(ifindex, "LLMNR", llmnr);
+}
+
+_public_ int sd_network_link_get_mdns(int ifindex, char **mdns) {
+ return network_link_get_string(ifindex, "MDNS", mdns);
+}
+
+_public_ int sd_network_link_get_dns_over_tls(int ifindex, char **dns_over_tls) {
+ return network_link_get_string(ifindex, "DNS_OVER_TLS", dns_over_tls);
+}
+
+_public_ int sd_network_link_get_dnssec(int ifindex, char **dnssec) {
+ return network_link_get_string(ifindex, "DNSSEC", dnssec);
+}
+
+_public_ int sd_network_link_get_dnssec_negative_trust_anchors(int ifindex, char ***nta) {
+ return network_link_get_strv(ifindex, "DNSSEC_NTA", nta);
+}
+
+_public_ int sd_network_link_get_timezone(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "TIMEZONE", ret);
+}
+
+_public_ int sd_network_link_get_dns(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "DNS", ret);
+}
+
+_public_ int sd_network_link_get_ntp(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "NTP", ret);
+}
+
+_public_ int sd_network_link_get_search_domains(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "DOMAINS", ret);
+}
+
+_public_ int sd_network_link_get_route_domains(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "ROUTE_DOMAINS", ret);
+}
+
+_public_ int sd_network_link_get_dns_default_route(int ifindex) {
+ char path[STRLEN("/run/systemd/netif/links/") + DECIMAL_STR_MAX(ifindex) + 1];
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ifindex > 0, -EINVAL);
+
+ xsprintf(path, "/run/systemd/netif/links/%i", ifindex);
+
+ r = parse_env_file(NULL, path, "DNS_DEFAULT_ROUTE", &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+ return parse_boolean(s);
+}
+
+static int network_link_get_ifindexes(int ifindex, const char *key, int **ret) {
+ char path[STRLEN("/run/systemd/netif/links/") + DECIMAL_STR_MAX(ifindex) + 1];
+ _cleanup_free_ int *ifis = NULL;
+ _cleanup_free_ char *s = NULL;
+ size_t allocated = 0, c = 0;
+ const char *x;
+ int r;
+
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ xsprintf(path, "/run/systemd/netif/links/%i", ifindex);
+ r = parse_env_file(NULL, path, key, &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ for (x = s;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&x, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = parse_ifindex(word, &ifindex);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC(ifis, allocated, c + 2))
+ return -ENOMEM;
+
+ ifis[c++] = ifindex;
+ }
+
+ if (ifis)
+ ifis[c] = 0; /* Let's add a 0 ifindex to the end, to be nice */
+
+ *ret = TAKE_PTR(ifis);
+
+ return c;
+}
+
+_public_ int sd_network_link_get_carrier_bound_to(int ifindex, int **ret) {
+ return network_link_get_ifindexes(ifindex, "CARRIER_BOUND_TO", ret);
+}
+
+_public_ int sd_network_link_get_carrier_bound_by(int ifindex, int **ret) {
+ return network_link_get_ifindexes(ifindex, "CARRIER_BOUND_BY", ret);
+}
+
+static int MONITOR_TO_FD(sd_network_monitor *m) {
+ return (int) (unsigned long) m - 1;
+}
+
+static sd_network_monitor* FD_TO_MONITOR(int fd) {
+ return (sd_network_monitor*) (unsigned long) (fd + 1);
+}
+
+static int monitor_add_inotify_watch(int fd) {
+ int k;
+
+ k = inotify_add_watch(fd, "/run/systemd/netif/links/", IN_MOVED_TO|IN_DELETE);
+ if (k >= 0)
+ return 0;
+ else if (errno != ENOENT)
+ return -errno;
+
+ k = inotify_add_watch(fd, "/run/systemd/netif/", IN_CREATE|IN_ISDIR);
+ if (k >= 0)
+ return 0;
+ else if (errno != ENOENT)
+ return -errno;
+
+ k = inotify_add_watch(fd, "/run/systemd/", IN_CREATE|IN_ISDIR);
+ if (k < 0)
+ return -errno;
+
+ return 0;
+}
+
+_public_ int sd_network_monitor_new(sd_network_monitor **m, const char *category) {
+ _cleanup_close_ int fd = -1;
+ int k;
+ bool good = false;
+
+ assert_return(m, -EINVAL);
+
+ fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (!category || streq(category, "links")) {
+ k = monitor_add_inotify_watch(fd);
+ if (k < 0)
+ return k;
+
+ good = true;
+ }
+
+ if (!good)
+ return -EINVAL;
+
+ *m = FD_TO_MONITOR(fd);
+ fd = -1;
+
+ return 0;
+}
+
+_public_ sd_network_monitor* sd_network_monitor_unref(sd_network_monitor *m) {
+ int fd;
+
+ if (m) {
+ fd = MONITOR_TO_FD(m);
+ close_nointr(fd);
+ }
+
+ return NULL;
+}
+
+_public_ int sd_network_monitor_flush(sd_network_monitor *m) {
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+ int fd, k;
+
+ assert_return(m, -EINVAL);
+
+ fd = MONITOR_TO_FD(m);
+
+ l = read(fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return -errno;
+ }
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l) {
+ if (e->mask & IN_ISDIR) {
+ k = monitor_add_inotify_watch(fd);
+ if (k < 0)
+ return k;
+
+ k = inotify_rm_watch(fd, e->wd);
+ if (k < 0)
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+_public_ int sd_network_monitor_get_fd(sd_network_monitor *m) {
+
+ assert_return(m, -EINVAL);
+
+ return MONITOR_TO_FD(m);
+}
+
+_public_ int sd_network_monitor_get_events(sd_network_monitor *m) {
+
+ assert_return(m, -EINVAL);
+
+ /* For now we will only return POLLIN here, since we don't
+ * need anything else ever for inotify. However, let's have
+ * this API to keep our options open should we later on need
+ * it. */
+ return POLLIN;
+}
+
+_public_ int sd_network_monitor_get_timeout(sd_network_monitor *m, uint64_t *timeout_usec) {
+
+ assert_return(m, -EINVAL);
+ assert_return(timeout_usec, -EINVAL);
+
+ /* For now we will only return (uint64_t) -1, since we don't
+ * need any timeout. However, let's have this API to keep our
+ * options open should we later on need it. */
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+}
diff --git a/src/libsystemd/sd-path/sd-path.c b/src/libsystemd/sd-path/sd-path.c
new file mode 100644
index 0000000..9949c23
--- /dev/null
+++ b/src/libsystemd/sd-path/sd-path.c
@@ -0,0 +1,639 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-path.h"
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "missing.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+static int from_environment(const char *envname, const char *fallback, const char **ret) {
+ assert(ret);
+
+ if (envname) {
+ const char *e;
+
+ e = secure_getenv(envname);
+ if (e && path_is_absolute(e)) {
+ *ret = e;
+ return 0;
+ }
+ }
+
+ if (fallback) {
+ *ret = fallback;
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+static int from_home_dir(const char *envname, const char *suffix, char **buffer, const char **ret) {
+ _cleanup_free_ char *h = NULL;
+ char *cc = NULL;
+ int r;
+
+ assert(suffix);
+ assert(buffer);
+ assert(ret);
+
+ if (envname) {
+ const char *e = NULL;
+
+ e = secure_getenv(envname);
+ if (e && path_is_absolute(e)) {
+ *ret = e;
+ return 0;
+ }
+ }
+
+ r = get_home_dir(&h);
+ if (r < 0)
+ return r;
+
+ if (endswith(h, "/"))
+ cc = strappend(h, suffix);
+ else
+ cc = strjoin(h, "/", suffix);
+ if (!cc)
+ return -ENOMEM;
+
+ *buffer = cc;
+ *ret = cc;
+ return 0;
+}
+
+static int from_user_dir(const char *field, char **buffer, const char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *b = NULL;
+ _cleanup_free_ const char *fn = NULL;
+ const char *c = NULL;
+ size_t n;
+ int r;
+
+ assert(field);
+ assert(buffer);
+ assert(ret);
+
+ r = from_home_dir("XDG_CONFIG_HOME", ".config", &b, &c);
+ if (r < 0)
+ return r;
+
+ fn = strappend(c, "/user-dirs.dirs");
+ if (!fn)
+ return -ENOMEM;
+
+ f = fopen(fn, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ goto fallback;
+
+ return -errno;
+ }
+
+ /* This is an awful parse, but it follows closely what
+ * xdg-user-dirs does upstream */
+
+ n = strlen(field);
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l, *p, *e;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+
+ if (!strneq(l, field, n))
+ continue;
+
+ p = l + n;
+ p += strspn(p, WHITESPACE);
+
+ if (*p != '=')
+ continue;
+ p++;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p != '"')
+ continue;
+ p++;
+
+ e = strrchr(p, '"');
+ if (!e)
+ continue;
+ *e = 0;
+
+ /* Three syntaxes permitted: relative to $HOME, $HOME itself, and absolute path */
+ if (startswith(p, "$HOME/")) {
+ _cleanup_free_ char *h = NULL;
+ char *cc;
+
+ r = get_home_dir(&h);
+ if (r < 0)
+ return r;
+
+ cc = strappend(h, p+5);
+ if (!cc)
+ return -ENOMEM;
+
+ *buffer = cc;
+ *ret = cc;
+ return 0;
+ } else if (streq(p, "$HOME")) {
+
+ r = get_home_dir(buffer);
+ if (r < 0)
+ return r;
+
+ *ret = *buffer;
+ return 0;
+ } else if (path_is_absolute(p)) {
+ char *copy;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *buffer = copy;
+ *ret = copy;
+ return 0;
+ }
+ }
+
+fallback:
+ /* The desktop directory defaults to $HOME/Desktop, the others to $HOME */
+ if (streq(field, "XDG_DESKTOP_DIR")) {
+ _cleanup_free_ char *h = NULL;
+ char *cc;
+
+ r = get_home_dir(&h);
+ if (r < 0)
+ return r;
+
+ cc = strappend(h, "/Desktop");
+ if (!cc)
+ return -ENOMEM;
+
+ *buffer = cc;
+ *ret = cc;
+ } else {
+
+ r = get_home_dir(buffer);
+ if (r < 0)
+ return r;
+
+ *ret = *buffer;
+ }
+
+ return 0;
+}
+
+static int get_path(uint64_t type, char **buffer, const char **ret) {
+ int r;
+
+ assert(buffer);
+ assert(ret);
+
+ switch (type) {
+
+ case SD_PATH_TEMPORARY:
+ return tmp_dir(ret);
+
+ case SD_PATH_TEMPORARY_LARGE:
+ return var_tmp_dir(ret);
+
+ case SD_PATH_SYSTEM_BINARIES:
+ *ret = "/usr/bin";
+ return 0;
+
+ case SD_PATH_SYSTEM_INCLUDE:
+ *ret = "/usr/include";
+ return 0;
+
+ case SD_PATH_SYSTEM_LIBRARY_PRIVATE:
+ *ret = "/usr/lib";
+ return 0;
+
+ case SD_PATH_SYSTEM_LIBRARY_ARCH:
+ *ret = LIBDIR;
+ return 0;
+
+ case SD_PATH_SYSTEM_SHARED:
+ *ret = "/usr/share";
+ return 0;
+
+ case SD_PATH_SYSTEM_CONFIGURATION_FACTORY:
+ *ret = "/usr/share/factory/etc";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_FACTORY:
+ *ret = "/usr/share/factory/var";
+ return 0;
+
+ case SD_PATH_SYSTEM_CONFIGURATION:
+ *ret = "/etc";
+ return 0;
+
+ case SD_PATH_SYSTEM_RUNTIME:
+ *ret = "/run";
+ return 0;
+
+ case SD_PATH_SYSTEM_RUNTIME_LOGS:
+ *ret = "/run/log";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_PRIVATE:
+ *ret = "/var/lib";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_LOGS:
+ *ret = "/var/log";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_CACHE:
+ *ret = "/var/cache";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_SPOOL:
+ *ret = "/var/spool";
+ return 0;
+
+ case SD_PATH_USER_BINARIES:
+ return from_home_dir(NULL, ".local/bin", buffer, ret);
+
+ case SD_PATH_USER_LIBRARY_PRIVATE:
+ return from_home_dir(NULL, ".local/lib", buffer, ret);
+
+ case SD_PATH_USER_LIBRARY_ARCH:
+ return from_home_dir(NULL, ".local/lib/" LIB_ARCH_TUPLE, buffer, ret);
+
+ case SD_PATH_USER_SHARED:
+ return from_home_dir("XDG_DATA_HOME", ".local/share", buffer, ret);
+
+ case SD_PATH_USER_CONFIGURATION:
+ return from_home_dir("XDG_CONFIG_HOME", ".config", buffer, ret);
+
+ case SD_PATH_USER_RUNTIME:
+ return from_environment("XDG_RUNTIME_DIR", NULL, ret);
+
+ case SD_PATH_USER_STATE_CACHE:
+ return from_home_dir("XDG_CACHE_HOME", ".cache", buffer, ret);
+
+ case SD_PATH_USER:
+ r = get_home_dir(buffer);
+ if (r < 0)
+ return r;
+
+ *ret = *buffer;
+ return 0;
+
+ case SD_PATH_USER_DOCUMENTS:
+ return from_user_dir("XDG_DOCUMENTS_DIR", buffer, ret);
+
+ case SD_PATH_USER_MUSIC:
+ return from_user_dir("XDG_MUSIC_DIR", buffer, ret);
+
+ case SD_PATH_USER_PICTURES:
+ return from_user_dir("XDG_PICTURES_DIR", buffer, ret);
+
+ case SD_PATH_USER_VIDEOS:
+ return from_user_dir("XDG_VIDEOS_DIR", buffer, ret);
+
+ case SD_PATH_USER_DOWNLOAD:
+ return from_user_dir("XDG_DOWNLOAD_DIR", buffer, ret);
+
+ case SD_PATH_USER_PUBLIC:
+ return from_user_dir("XDG_PUBLICSHARE_DIR", buffer, ret);
+
+ case SD_PATH_USER_TEMPLATES:
+ return from_user_dir("XDG_TEMPLATES_DIR", buffer, ret);
+
+ case SD_PATH_USER_DESKTOP:
+ return from_user_dir("XDG_DESKTOP_DIR", buffer, ret);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+_public_ int sd_path_home(uint64_t type, const char *suffix, char **path) {
+ char *buffer = NULL, *cc;
+ const char *ret;
+ int r;
+
+ assert_return(path, -EINVAL);
+
+ if (IN_SET(type,
+ SD_PATH_SEARCH_BINARIES,
+ SD_PATH_SEARCH_BINARIES_DEFAULT,
+ SD_PATH_SEARCH_LIBRARY_PRIVATE,
+ SD_PATH_SEARCH_LIBRARY_ARCH,
+ SD_PATH_SEARCH_SHARED,
+ SD_PATH_SEARCH_CONFIGURATION_FACTORY,
+ SD_PATH_SEARCH_STATE_FACTORY,
+ SD_PATH_SEARCH_CONFIGURATION)) {
+
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_path_search(type, suffix, &l);
+ if (r < 0)
+ return r;
+
+ buffer = strv_join(l, ":");
+ if (!buffer)
+ return -ENOMEM;
+
+ *path = buffer;
+ return 0;
+ }
+
+ r = get_path(type, &buffer, &ret);
+ if (r < 0)
+ return r;
+
+ if (!suffix) {
+ if (!buffer) {
+ buffer = strdup(ret);
+ if (!buffer)
+ return -ENOMEM;
+ }
+
+ *path = buffer;
+ return 0;
+ }
+
+ suffix += strspn(suffix, "/");
+
+ if (endswith(ret, "/"))
+ cc = strappend(ret, suffix);
+ else
+ cc = strjoin(ret, "/", suffix);
+
+ free(buffer);
+
+ if (!cc)
+ return -ENOMEM;
+
+ *path = cc;
+ return 0;
+}
+
+static int search_from_environment(
+ char ***list,
+ const char *env_home,
+ const char *home_suffix,
+ const char *env_search,
+ bool env_search_sufficient,
+ const char *first, ...) {
+
+ const char *e;
+ char *h = NULL;
+ char **l = NULL;
+ int r;
+
+ assert(list);
+
+ if (env_search) {
+ e = secure_getenv(env_search);
+ if (e) {
+ l = strv_split(e, ":");
+ if (!l)
+ return -ENOMEM;
+
+ if (env_search_sufficient) {
+ *list = l;
+ return 0;
+ }
+ }
+ }
+
+ if (!l && first) {
+ va_list ap;
+
+ va_start(ap, first);
+ l = strv_new_ap(first, ap);
+ va_end(ap);
+
+ if (!l)
+ return -ENOMEM;
+ }
+
+ if (env_home) {
+ e = secure_getenv(env_home);
+ if (e && path_is_absolute(e)) {
+ h = strdup(e);
+ if (!h) {
+ strv_free(l);
+ return -ENOMEM;
+ }
+ }
+ }
+
+ if (!h && home_suffix) {
+ e = secure_getenv("HOME");
+ if (e && path_is_absolute(e)) {
+ if (endswith(e, "/"))
+ h = strappend(e, home_suffix);
+ else
+ h = strjoin(e, "/", home_suffix);
+
+ if (!h) {
+ strv_free(l);
+ return -ENOMEM;
+ }
+ }
+ }
+
+ if (h) {
+ r = strv_consume_prepend(&l, h);
+ if (r < 0) {
+ strv_free(l);
+ return -ENOMEM;
+ }
+ }
+
+ *list = l;
+ return 0;
+}
+
+#if HAVE_SPLIT_BIN
+# define ARRAY_SBIN_BIN(x) x "sbin", x "bin"
+#else
+# define ARRAY_SBIN_BIN(x) x "bin"
+#endif
+
+static int get_search(uint64_t type, char ***list) {
+
+ assert(list);
+
+ switch(type) {
+
+ case SD_PATH_SEARCH_BINARIES:
+ return search_from_environment(list,
+ NULL,
+ ".local/bin",
+ "PATH",
+ true,
+ ARRAY_SBIN_BIN("/usr/local/"),
+ ARRAY_SBIN_BIN("/usr/"),
+#if HAVE_SPLIT_USR
+ ARRAY_SBIN_BIN("/"),
+#endif
+ NULL);
+
+ case SD_PATH_SEARCH_LIBRARY_PRIVATE:
+ return search_from_environment(list,
+ NULL,
+ ".local/lib",
+ NULL,
+ false,
+ "/usr/local/lib",
+ "/usr/lib",
+#if HAVE_SPLIT_USR
+ "/lib",
+#endif
+ NULL);
+
+ case SD_PATH_SEARCH_LIBRARY_ARCH:
+ return search_from_environment(list,
+ NULL,
+ ".local/lib/" LIB_ARCH_TUPLE,
+ "LD_LIBRARY_PATH",
+ true,
+ LIBDIR,
+#if HAVE_SPLIT_USR
+ ROOTLIBDIR,
+#endif
+ NULL);
+
+ case SD_PATH_SEARCH_SHARED:
+ return search_from_environment(list,
+ "XDG_DATA_HOME",
+ ".local/share",
+ "XDG_DATA_DIRS",
+ false,
+ "/usr/local/share",
+ "/usr/share",
+ NULL);
+
+ case SD_PATH_SEARCH_CONFIGURATION_FACTORY:
+ return search_from_environment(list,
+ NULL,
+ NULL,
+ NULL,
+ false,
+ "/usr/local/share/factory/etc",
+ "/usr/share/factory/etc",
+ NULL);
+
+ case SD_PATH_SEARCH_STATE_FACTORY:
+ return search_from_environment(list,
+ NULL,
+ NULL,
+ NULL,
+ false,
+ "/usr/local/share/factory/var",
+ "/usr/share/factory/var",
+ NULL);
+
+ case SD_PATH_SEARCH_CONFIGURATION:
+ return search_from_environment(list,
+ "XDG_CONFIG_HOME",
+ ".config",
+ "XDG_CONFIG_DIRS",
+ false,
+ "/etc",
+ NULL);
+
+ case SD_PATH_SEARCH_BINARIES_DEFAULT: {
+ char **t;
+
+ t = strv_split_nulstr(DEFAULT_PATH_NULSTR);
+ if (!t)
+ return -ENOMEM;
+
+ *list = t;
+ return 0;
+ }}
+
+ return -EOPNOTSUPP;
+}
+
+_public_ int sd_path_search(uint64_t type, const char *suffix, char ***paths) {
+ char **i, **j;
+ _cleanup_strv_free_ char **l = NULL, **n = NULL;
+ int r;
+
+ assert_return(paths, -EINVAL);
+
+ if (!IN_SET(type,
+ SD_PATH_SEARCH_BINARIES,
+ SD_PATH_SEARCH_BINARIES_DEFAULT,
+ SD_PATH_SEARCH_LIBRARY_PRIVATE,
+ SD_PATH_SEARCH_LIBRARY_ARCH,
+ SD_PATH_SEARCH_SHARED,
+ SD_PATH_SEARCH_CONFIGURATION_FACTORY,
+ SD_PATH_SEARCH_STATE_FACTORY,
+ SD_PATH_SEARCH_CONFIGURATION)) {
+
+ char *p;
+
+ r = sd_path_home(type, suffix, &p);
+ if (r < 0)
+ return r;
+
+ l = new(char*, 2);
+ if (!l) {
+ free(p);
+ return -ENOMEM;
+ }
+
+ l[0] = p;
+ l[1] = NULL;
+
+ *paths = TAKE_PTR(l);
+ return 0;
+ }
+
+ r = get_search(type, &l);
+ if (r < 0)
+ return r;
+
+ if (!suffix) {
+ *paths = TAKE_PTR(l);
+ return 0;
+ }
+
+ n = new(char*, strv_length(l)+1);
+ if (!n)
+ return -ENOMEM;
+
+ j = n;
+ STRV_FOREACH(i, l) {
+
+ if (endswith(*i, "/"))
+ *j = strappend(*i, suffix);
+ else
+ *j = strjoin(*i, "/", suffix);
+
+ if (!*j)
+ return -ENOMEM;
+
+ j++;
+ }
+
+ *j = NULL;
+ *paths = TAKE_PTR(n);
+ return 0;
+}
diff --git a/src/libsystemd/sd-resolve/resolve-private.h b/src/libsystemd/sd-resolve/resolve-private.h
new file mode 100644
index 0000000..a0feb36
--- /dev/null
+++ b/src/libsystemd/sd-resolve/resolve-private.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-resolve.h"
+
+int resolve_getaddrinfo_with_destroy_callback(
+ sd_resolve *resolve, sd_resolve_query **q,
+ const char *node, const char *service, const struct addrinfo *hints,
+ sd_resolve_getaddrinfo_handler_t callback,
+ sd_resolve_destroy_t destroy_callback, void *userdata);
+int resolve_getnameinfo_with_destroy_callback(
+ sd_resolve *resolve, sd_resolve_query **q,
+ const struct sockaddr *sa, socklen_t salen, int flags, uint64_t get,
+ sd_resolve_getnameinfo_handler_t callback,
+ sd_resolve_destroy_t destroy_callback, void *userdata);
+
+#define resolve_getaddrinfo(resolve, ret_query, node, service, hints, callback, destroy_callback, userdata) \
+ ({ \
+ int (*_callback_)(sd_resolve_query*, int, const struct addrinfo*, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ resolve_getaddrinfo_with_destroy_callback( \
+ resolve, ret_query, \
+ node, service, hints, \
+ (sd_resolve_getaddrinfo_handler_t) _callback_, \
+ (sd_resolve_destroy_t) _destroy_, \
+ userdata); \
+ })
+
+#define resolve_getnameinfo(resolve, ret_query, sa, salen, flags, get, callback, destroy_callback, userdata) \
+ ({ \
+ int (*_callback_)(sd_resolve_query*, int, const char*, const char*, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ resolve_getaddrinfo_with_destroy_callback( \
+ resolve, ret_query, \
+ sa, salen, flags, get, \
+ (sd_resolve_getnameinfo_handler_t) _callback_, \
+ (sd_resolve_destroy_t) _destroy_, \
+ userdata); \
+ })
diff --git a/src/libsystemd/sd-resolve/sd-resolve.c b/src/libsystemd/sd-resolve/sd-resolve.c
new file mode 100644
index 0000000..36b9c8d
--- /dev/null
+++ b/src/libsystemd/sd-resolve/sd-resolve.c
@@ -0,0 +1,1290 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <poll.h>
+#include <pthread.h>
+#include <resolv.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "sd-resolve.h"
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "list.h"
+#include "missing.h"
+#include "resolve-private.h"
+#include "socket-util.h"
+#include "util.h"
+#include "process-util.h"
+
+#define WORKERS_MIN 1U
+#define WORKERS_MAX 16U
+#define QUERIES_MAX 256U
+#define BUFSIZE 10240U
+
+typedef enum {
+ REQUEST_ADDRINFO,
+ RESPONSE_ADDRINFO,
+ REQUEST_NAMEINFO,
+ RESPONSE_NAMEINFO,
+ REQUEST_TERMINATE,
+ RESPONSE_DIED
+} QueryType;
+
+enum {
+ REQUEST_RECV_FD,
+ REQUEST_SEND_FD,
+ RESPONSE_RECV_FD,
+ RESPONSE_SEND_FD,
+ _FD_MAX
+};
+
+struct sd_resolve {
+ unsigned n_ref;
+
+ bool dead:1;
+ pid_t original_pid;
+
+ int fds[_FD_MAX];
+
+ pthread_t workers[WORKERS_MAX];
+ unsigned n_valid_workers;
+
+ unsigned current_id;
+ sd_resolve_query* query_array[QUERIES_MAX];
+ unsigned n_queries, n_done, n_outstanding;
+
+ sd_event_source *event_source;
+ sd_event *event;
+
+ sd_resolve_query *current;
+
+ sd_resolve **default_resolve_ptr;
+ pid_t tid;
+
+ LIST_HEAD(sd_resolve_query, queries);
+};
+
+struct sd_resolve_query {
+ unsigned n_ref;
+
+ sd_resolve *resolve;
+
+ QueryType type:4;
+ bool done:1;
+ bool floating:1;
+ unsigned id;
+
+ int ret;
+ int _errno;
+ int _h_errno;
+ struct addrinfo *addrinfo;
+ char *serv, *host;
+
+ union {
+ sd_resolve_getaddrinfo_handler_t getaddrinfo_handler;
+ sd_resolve_getnameinfo_handler_t getnameinfo_handler;
+ };
+
+ void *userdata;
+ sd_resolve_destroy_t destroy_callback;
+
+ LIST_FIELDS(sd_resolve_query, queries);
+};
+
+typedef struct RHeader {
+ QueryType type;
+ unsigned id;
+ size_t length;
+} RHeader;
+
+typedef struct AddrInfoRequest {
+ struct RHeader header;
+ bool hints_valid;
+ int ai_flags;
+ int ai_family;
+ int ai_socktype;
+ int ai_protocol;
+ size_t node_len, service_len;
+} AddrInfoRequest;
+
+typedef struct AddrInfoResponse {
+ struct RHeader header;
+ int ret;
+ int _errno;
+ int _h_errno;
+ /* followed by addrinfo_serialization[] */
+} AddrInfoResponse;
+
+typedef struct AddrInfoSerialization {
+ int ai_flags;
+ int ai_family;
+ int ai_socktype;
+ int ai_protocol;
+ size_t ai_addrlen;
+ size_t canonname_len;
+ /* Followed by ai_addr amd ai_canonname with variable lengths */
+} AddrInfoSerialization;
+
+typedef struct NameInfoRequest {
+ struct RHeader header;
+ int flags;
+ socklen_t sockaddr_len;
+ bool gethost:1, getserv:1;
+} NameInfoRequest;
+
+typedef struct NameInfoResponse {
+ struct RHeader header;
+ size_t hostlen, servlen;
+ int ret;
+ int _errno;
+ int _h_errno;
+} NameInfoResponse;
+
+typedef union Packet {
+ RHeader rheader;
+ AddrInfoRequest addrinfo_request;
+ AddrInfoResponse addrinfo_response;
+ NameInfoRequest nameinfo_request;
+ NameInfoResponse nameinfo_response;
+} Packet;
+
+static int getaddrinfo_done(sd_resolve_query* q);
+static int getnameinfo_done(sd_resolve_query *q);
+
+static void resolve_query_disconnect(sd_resolve_query *q);
+
+#define RESOLVE_DONT_DESTROY(resolve) \
+ _cleanup_(sd_resolve_unrefp) _unused_ sd_resolve *_dont_destroy_##resolve = sd_resolve_ref(resolve)
+
+static void query_assign_errno(sd_resolve_query *q, int ret, int error, int h_error) {
+ assert(q);
+
+ q->ret = ret;
+ q->_errno = abs(error);
+ q->_h_errno = h_error;
+}
+
+static int send_died(int out_fd) {
+ RHeader rh = {
+ .type = RESPONSE_DIED,
+ .length = sizeof(RHeader),
+ };
+
+ assert(out_fd >= 0);
+
+ if (send(out_fd, &rh, rh.length, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static void *serialize_addrinfo(void *p, const struct addrinfo *ai, size_t *length, size_t maxlength) {
+ AddrInfoSerialization s;
+ size_t cnl, l;
+
+ assert(p);
+ assert(ai);
+ assert(length);
+ assert(*length <= maxlength);
+
+ cnl = ai->ai_canonname ? strlen(ai->ai_canonname)+1 : 0;
+ l = sizeof(AddrInfoSerialization) + ai->ai_addrlen + cnl;
+
+ if (*length + l > maxlength)
+ return NULL;
+
+ s = (AddrInfoSerialization) {
+ .ai_flags = ai->ai_flags,
+ .ai_family = ai->ai_family,
+ .ai_socktype = ai->ai_socktype,
+ .ai_protocol = ai->ai_protocol,
+ .ai_addrlen = ai->ai_addrlen,
+ .canonname_len = cnl,
+ };
+
+ memcpy((uint8_t*) p, &s, sizeof(AddrInfoSerialization));
+ memcpy((uint8_t*) p + sizeof(AddrInfoSerialization), ai->ai_addr, ai->ai_addrlen);
+ memcpy_safe((char*) p + sizeof(AddrInfoSerialization) + ai->ai_addrlen,
+ ai->ai_canonname, cnl);
+
+ *length += l;
+ return (uint8_t*) p + l;
+}
+
+static int send_addrinfo_reply(
+ int out_fd,
+ unsigned id,
+ int ret,
+ struct addrinfo *ai,
+ int _errno,
+ int _h_errno) {
+
+ AddrInfoResponse resp = {};
+ union {
+ AddrInfoSerialization ais;
+ uint8_t space[BUFSIZE];
+ } buffer;
+ struct iovec iov[2];
+ struct msghdr mh;
+
+ assert(out_fd >= 0);
+
+ resp = (AddrInfoResponse) {
+ .header.type = RESPONSE_ADDRINFO,
+ .header.id = id,
+ .header.length = sizeof(AddrInfoResponse),
+ .ret = ret,
+ ._errno = _errno,
+ ._h_errno = _h_errno,
+ };
+
+ if (ret == 0 && ai) {
+ void *p = &buffer;
+ struct addrinfo *k;
+
+ for (k = ai; k; k = k->ai_next) {
+ p = serialize_addrinfo(p, k, &resp.header.length, (uint8_t*) &buffer + BUFSIZE - (uint8_t*) p);
+ if (!p) {
+ freeaddrinfo(ai);
+ return -ENOBUFS;
+ }
+ }
+ }
+
+ if (ai)
+ freeaddrinfo(ai);
+
+ iov[0] = IOVEC_MAKE(&resp, sizeof(AddrInfoResponse));
+ iov[1] = IOVEC_MAKE(&buffer, resp.header.length - sizeof(AddrInfoResponse));
+
+ mh = (struct msghdr) {
+ .msg_iov = iov,
+ .msg_iovlen = ELEMENTSOF(iov)
+ };
+
+ if (sendmsg(out_fd, &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int send_nameinfo_reply(
+ int out_fd,
+ unsigned id,
+ int ret,
+ const char *host,
+ const char *serv,
+ int _errno,
+ int _h_errno) {
+
+ NameInfoResponse resp = {};
+ struct iovec iov[3];
+ struct msghdr mh;
+ size_t hl, sl;
+
+ assert(out_fd >= 0);
+
+ sl = serv ? strlen(serv)+1 : 0;
+ hl = host ? strlen(host)+1 : 0;
+
+ resp = (NameInfoResponse) {
+ .header.type = RESPONSE_NAMEINFO,
+ .header.id = id,
+ .header.length = sizeof(NameInfoResponse) + hl + sl,
+ .hostlen = hl,
+ .servlen = sl,
+ .ret = ret,
+ ._errno = _errno,
+ ._h_errno = _h_errno,
+ };
+
+ iov[0] = IOVEC_MAKE(&resp, sizeof(NameInfoResponse));
+ iov[1] = IOVEC_MAKE((void*) host, hl);
+ iov[2] = IOVEC_MAKE((void*) serv, sl);
+
+ mh = (struct msghdr) {
+ .msg_iov = iov,
+ .msg_iovlen = ELEMENTSOF(iov)
+ };
+
+ if (sendmsg(out_fd, &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int handle_request(int out_fd, const Packet *packet, size_t length) {
+ const RHeader *req;
+
+ assert(out_fd >= 0);
+ assert(packet);
+
+ req = &packet->rheader;
+
+ assert_return(length >= sizeof(RHeader), -EIO);
+ assert_return(length == req->length, -EIO);
+
+ switch (req->type) {
+
+ case REQUEST_ADDRINFO: {
+ const AddrInfoRequest *ai_req = &packet->addrinfo_request;
+ struct addrinfo hints, *result = NULL;
+ const char *node, *service;
+ int ret;
+
+ assert_return(length >= sizeof(AddrInfoRequest), -EBADMSG);
+ assert_return(length == sizeof(AddrInfoRequest) + ai_req->node_len + ai_req->service_len, -EBADMSG);
+
+ hints = (struct addrinfo) {
+ .ai_flags = ai_req->ai_flags,
+ .ai_family = ai_req->ai_family,
+ .ai_socktype = ai_req->ai_socktype,
+ .ai_protocol = ai_req->ai_protocol,
+ };
+
+ node = ai_req->node_len ? (const char*) ai_req + sizeof(AddrInfoRequest) : NULL;
+ service = ai_req->service_len ? (const char*) ai_req + sizeof(AddrInfoRequest) + ai_req->node_len : NULL;
+
+ ret = getaddrinfo(node, service,
+ ai_req->hints_valid ? &hints : NULL,
+ &result);
+
+ /* send_addrinfo_reply() frees result */
+ return send_addrinfo_reply(out_fd, req->id, ret, result, errno, h_errno);
+ }
+
+ case REQUEST_NAMEINFO: {
+ const NameInfoRequest *ni_req = &packet->nameinfo_request;
+ char hostbuf[NI_MAXHOST], servbuf[NI_MAXSERV];
+ union sockaddr_union sa;
+ int ret;
+
+ assert_return(length >= sizeof(NameInfoRequest), -EBADMSG);
+ assert_return(length == sizeof(NameInfoRequest) + ni_req->sockaddr_len, -EBADMSG);
+ assert_return(ni_req->sockaddr_len <= sizeof(sa), -EBADMSG);
+
+ memcpy(&sa, (const uint8_t *) ni_req + sizeof(NameInfoRequest), ni_req->sockaddr_len);
+
+ ret = getnameinfo(&sa.sa, ni_req->sockaddr_len,
+ ni_req->gethost ? hostbuf : NULL, ni_req->gethost ? sizeof(hostbuf) : 0,
+ ni_req->getserv ? servbuf : NULL, ni_req->getserv ? sizeof(servbuf) : 0,
+ ni_req->flags);
+
+ return send_nameinfo_reply(out_fd, req->id, ret,
+ ret == 0 && ni_req->gethost ? hostbuf : NULL,
+ ret == 0 && ni_req->getserv ? servbuf : NULL,
+ errno, h_errno);
+ }
+
+ case REQUEST_TERMINATE:
+ /* Quit */
+ return -ECONNRESET;
+
+ default:
+ assert_not_reached("Unknown request");
+ }
+
+ return 0;
+}
+
+static void* thread_worker(void *p) {
+ sd_resolve *resolve = p;
+
+ /* Assign a pretty name to this thread */
+ (void) pthread_setname_np(pthread_self(), "sd-resolve");
+
+ while (!resolve->dead) {
+ union {
+ Packet packet;
+ uint8_t space[BUFSIZE];
+ } buf;
+ ssize_t length;
+
+ length = recv(resolve->fds[REQUEST_RECV_FD], &buf, sizeof buf, 0);
+ if (length < 0) {
+ if (errno == EINTR)
+ continue;
+
+ break;
+ }
+ if (length == 0)
+ break;
+
+ if (handle_request(resolve->fds[RESPONSE_SEND_FD], &buf.packet, (size_t) length) < 0)
+ break;
+ }
+
+ send_died(resolve->fds[RESPONSE_SEND_FD]);
+
+ return NULL;
+}
+
+static int start_threads(sd_resolve *resolve, unsigned extra) {
+ sigset_t ss, saved_ss;
+ unsigned n;
+ int r, k;
+
+ assert_se(sigfillset(&ss) >= 0);
+
+ /* No signals in forked off threads please. We set the mask before forking, so that the threads never exist
+ * with a different mask than a fully blocked one */
+ r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss);
+ if (r > 0)
+ return -r;
+
+ n = resolve->n_outstanding + extra;
+ n = CLAMP(n, WORKERS_MIN, WORKERS_MAX);
+
+ while (resolve->n_valid_workers < n) {
+ r = pthread_create(&resolve->workers[resolve->n_valid_workers], NULL, thread_worker, resolve);
+ if (r > 0) {
+ r = -r;
+ goto finish;
+ }
+
+ resolve->n_valid_workers++;
+ }
+
+ r = 0;
+
+finish:
+ k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL);
+ if (k > 0 && r >= 0)
+ r = -k;
+
+ return r;
+}
+
+static bool resolve_pid_changed(sd_resolve *r) {
+ assert(r);
+
+ /* We don't support people creating a resolver and keeping it
+ * around after fork(). Let's complain. */
+
+ return r->original_pid != getpid_cached();
+}
+
+_public_ int sd_resolve_new(sd_resolve **ret) {
+ _cleanup_(sd_resolve_unrefp) sd_resolve *resolve = NULL;
+ int i;
+
+ assert_return(ret, -EINVAL);
+
+ resolve = new0(sd_resolve, 1);
+ if (!resolve)
+ return -ENOMEM;
+
+ resolve->n_ref = 1;
+ resolve->original_pid = getpid_cached();
+
+ for (i = 0; i < _FD_MAX; i++)
+ resolve->fds[i] = -1;
+
+ if (socketpair(PF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, resolve->fds + REQUEST_RECV_FD) < 0)
+ return -errno;
+
+ if (socketpair(PF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, resolve->fds + RESPONSE_RECV_FD) < 0)
+ return -errno;
+
+ for (i = 0; i < _FD_MAX; i++)
+ resolve->fds[i] = fd_move_above_stdio(resolve->fds[i]);
+
+ (void) fd_inc_sndbuf(resolve->fds[REQUEST_SEND_FD], QUERIES_MAX * BUFSIZE);
+ (void) fd_inc_rcvbuf(resolve->fds[REQUEST_RECV_FD], QUERIES_MAX * BUFSIZE);
+ (void) fd_inc_sndbuf(resolve->fds[RESPONSE_SEND_FD], QUERIES_MAX * BUFSIZE);
+ (void) fd_inc_rcvbuf(resolve->fds[RESPONSE_RECV_FD], QUERIES_MAX * BUFSIZE);
+
+ (void) fd_nonblock(resolve->fds[RESPONSE_RECV_FD], true);
+
+ *ret = TAKE_PTR(resolve);
+ return 0;
+}
+
+_public_ int sd_resolve_default(sd_resolve **ret) {
+ static thread_local sd_resolve *default_resolve = NULL;
+ sd_resolve *e = NULL;
+ int r;
+
+ if (!ret)
+ return !!default_resolve;
+
+ if (default_resolve) {
+ *ret = sd_resolve_ref(default_resolve);
+ return 0;
+ }
+
+ r = sd_resolve_new(&e);
+ if (r < 0)
+ return r;
+
+ e->default_resolve_ptr = &default_resolve;
+ e->tid = gettid();
+ default_resolve = e;
+
+ *ret = e;
+ return 1;
+}
+
+_public_ int sd_resolve_get_tid(sd_resolve *resolve, pid_t *tid) {
+ assert_return(resolve, -EINVAL);
+ assert_return(tid, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ if (resolve->tid != 0) {
+ *tid = resolve->tid;
+ return 0;
+ }
+
+ if (resolve->event)
+ return sd_event_get_tid(resolve->event, tid);
+
+ return -ENXIO;
+}
+
+static sd_resolve *resolve_free(sd_resolve *resolve) {
+ PROTECT_ERRNO;
+ sd_resolve_query *q;
+ unsigned i;
+
+ assert(resolve);
+
+ while ((q = resolve->queries)) {
+ assert(q->floating);
+ resolve_query_disconnect(q);
+ sd_resolve_query_unref(q);
+ }
+
+ if (resolve->default_resolve_ptr)
+ *(resolve->default_resolve_ptr) = NULL;
+
+ resolve->dead = true;
+
+ sd_resolve_detach_event(resolve);
+
+ if (resolve->fds[REQUEST_SEND_FD] >= 0) {
+
+ RHeader req = {
+ .type = REQUEST_TERMINATE,
+ .length = sizeof req,
+ };
+
+ /* Send one termination packet for each worker */
+ for (i = 0; i < resolve->n_valid_workers; i++)
+ (void) send(resolve->fds[REQUEST_SEND_FD], &req, req.length, MSG_NOSIGNAL);
+ }
+
+ /* Now terminate them and wait until they are gone.
+ If we get an error than most likely the thread already exited. */
+ for (i = 0; i < resolve->n_valid_workers; i++)
+ (void) pthread_join(resolve->workers[i], NULL);
+
+ /* Close all communication channels */
+ close_many(resolve->fds, _FD_MAX);
+
+ return mfree(resolve);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_resolve, sd_resolve, resolve_free);
+
+_public_ int sd_resolve_get_fd(sd_resolve *resolve) {
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ return resolve->fds[RESPONSE_RECV_FD];
+}
+
+_public_ int sd_resolve_get_events(sd_resolve *resolve) {
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ return resolve->n_queries > resolve->n_done ? POLLIN : 0;
+}
+
+_public_ int sd_resolve_get_timeout(sd_resolve *resolve, uint64_t *usec) {
+ assert_return(resolve, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ *usec = (uint64_t) -1;
+ return 0;
+}
+
+static sd_resolve_query *lookup_query(sd_resolve *resolve, unsigned id) {
+ sd_resolve_query *q;
+
+ assert(resolve);
+
+ q = resolve->query_array[id % QUERIES_MAX];
+ if (q)
+ if (q->id == id)
+ return q;
+
+ return NULL;
+}
+
+static int complete_query(sd_resolve *resolve, sd_resolve_query *q) {
+ int r;
+
+ assert(q);
+ assert(!q->done);
+ assert(q->resolve == resolve);
+
+ q->done = true;
+ resolve->n_done++;
+
+ resolve->current = sd_resolve_query_ref(q);
+
+ switch (q->type) {
+
+ case REQUEST_ADDRINFO:
+ r = getaddrinfo_done(q);
+ break;
+
+ case REQUEST_NAMEINFO:
+ r = getnameinfo_done(q);
+ break;
+
+ default:
+ assert_not_reached("Cannot complete unknown query type");
+ }
+
+ resolve->current = NULL;
+
+ if (q->floating) {
+ resolve_query_disconnect(q);
+ sd_resolve_query_unref(q);
+ }
+
+ sd_resolve_query_unref(q);
+
+ return r;
+}
+
+static int unserialize_addrinfo(const void **p, size_t *length, struct addrinfo **ret_ai) {
+ AddrInfoSerialization s;
+ struct addrinfo *ai;
+ size_t l;
+
+ assert(p);
+ assert(*p);
+ assert(ret_ai);
+ assert(length);
+
+ if (*length < sizeof(AddrInfoSerialization))
+ return -EBADMSG;
+
+ memcpy(&s, *p, sizeof(s));
+
+ l = sizeof(AddrInfoSerialization) + s.ai_addrlen + s.canonname_len;
+ if (*length < l)
+ return -EBADMSG;
+
+ ai = new(struct addrinfo, 1);
+ if (!ai)
+ return -ENOMEM;
+
+ *ai = (struct addrinfo) {
+ .ai_flags = s.ai_flags,
+ .ai_family = s.ai_family,
+ .ai_socktype = s.ai_socktype,
+ .ai_protocol = s.ai_protocol,
+ .ai_addrlen = s.ai_addrlen,
+ };
+
+ if (s.ai_addrlen > 0) {
+ ai->ai_addr = memdup((const uint8_t*) *p + sizeof(AddrInfoSerialization), s.ai_addrlen);
+ if (!ai->ai_addr) {
+ free(ai);
+ return -ENOMEM;
+ }
+ }
+
+ if (s.canonname_len > 0) {
+ ai->ai_canonname = memdup((const uint8_t*) *p + sizeof(AddrInfoSerialization) + s.ai_addrlen, s.canonname_len);
+ if (!ai->ai_canonname) {
+ free(ai->ai_addr);
+ free(ai);
+ return -ENOMEM;
+ }
+ }
+
+ *length -= l;
+ *ret_ai = ai;
+ *p = ((const uint8_t*) *p) + l;
+
+ return 0;
+}
+
+static int handle_response(sd_resolve *resolve, const Packet *packet, size_t length) {
+ const RHeader *resp;
+ sd_resolve_query *q;
+ int r;
+
+ assert(resolve);
+ assert(packet);
+
+ resp = &packet->rheader;
+ assert_return(length >= sizeof(RHeader), -EIO);
+ assert_return(length == resp->length, -EIO);
+
+ if (resp->type == RESPONSE_DIED) {
+ resolve->dead = true;
+ return 0;
+ }
+
+ assert(resolve->n_outstanding > 0);
+ resolve->n_outstanding--;
+
+ q = lookup_query(resolve, resp->id);
+ if (!q)
+ return 0;
+
+ switch (resp->type) {
+
+ case RESPONSE_ADDRINFO: {
+ const AddrInfoResponse *ai_resp = &packet->addrinfo_response;
+ const void *p;
+ size_t l;
+ struct addrinfo *prev = NULL;
+
+ assert_return(length >= sizeof(AddrInfoResponse), -EBADMSG);
+ assert_return(q->type == REQUEST_ADDRINFO, -EBADMSG);
+
+ query_assign_errno(q, ai_resp->ret, ai_resp->_errno, ai_resp->_h_errno);
+
+ l = length - sizeof(AddrInfoResponse);
+ p = (const uint8_t*) resp + sizeof(AddrInfoResponse);
+
+ while (l > 0 && p) {
+ struct addrinfo *ai = NULL;
+
+ r = unserialize_addrinfo(&p, &l, &ai);
+ if (r < 0) {
+ query_assign_errno(q, EAI_SYSTEM, r, 0);
+ freeaddrinfo(q->addrinfo);
+ q->addrinfo = NULL;
+ break;
+ }
+
+ if (prev)
+ prev->ai_next = ai;
+ else
+ q->addrinfo = ai;
+
+ prev = ai;
+ }
+
+ return complete_query(resolve, q);
+ }
+
+ case RESPONSE_NAMEINFO: {
+ const NameInfoResponse *ni_resp = &packet->nameinfo_response;
+
+ assert_return(length >= sizeof(NameInfoResponse), -EBADMSG);
+ assert_return(q->type == REQUEST_NAMEINFO, -EBADMSG);
+
+ if (ni_resp->hostlen > DNS_HOSTNAME_MAX ||
+ ni_resp->servlen > DNS_HOSTNAME_MAX ||
+ sizeof(NameInfoResponse) + ni_resp->hostlen + ni_resp->servlen > length)
+ query_assign_errno(q, EAI_SYSTEM, EIO, 0);
+ else {
+ query_assign_errno(q, ni_resp->ret, ni_resp->_errno, ni_resp->_h_errno);
+
+ if (ni_resp->hostlen > 0) {
+ q->host = strndup((const char*) ni_resp + sizeof(NameInfoResponse),
+ ni_resp->hostlen-1);
+ if (!q->host)
+ query_assign_errno(q, EAI_MEMORY, ENOMEM, 0);
+ }
+
+ if (ni_resp->servlen > 0) {
+ q->serv = strndup((const char*) ni_resp + sizeof(NameInfoResponse) + ni_resp->hostlen,
+ ni_resp->servlen-1);
+ if (!q->serv)
+ query_assign_errno(q, EAI_MEMORY, ENOMEM, 0);
+ }
+ }
+
+ return complete_query(resolve, q);
+ }
+
+ default:
+ return 0;
+ }
+}
+
+_public_ int sd_resolve_process(sd_resolve *resolve) {
+ RESOLVE_DONT_DESTROY(resolve);
+
+ union {
+ Packet packet;
+ uint8_t space[BUFSIZE];
+ } buf;
+ ssize_t l;
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ /* We don't allow recursively invoking sd_resolve_process(). */
+ assert_return(!resolve->current, -EBUSY);
+
+ l = recv(resolve->fds[RESPONSE_RECV_FD], &buf, sizeof buf, 0);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ return -errno;
+ }
+ if (l == 0)
+ return -ECONNREFUSED;
+
+ r = handle_response(resolve, &buf.packet, (size_t) l);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+_public_ int sd_resolve_wait(sd_resolve *resolve, uint64_t timeout_usec) {
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ if (resolve->n_done >= resolve->n_queries)
+ return 0;
+
+ do {
+ r = fd_wait_for_event(resolve->fds[RESPONSE_RECV_FD], POLLIN, timeout_usec);
+ } while (r == -EINTR);
+
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+
+ return sd_resolve_process(resolve);
+}
+
+static int alloc_query(sd_resolve *resolve, bool floating, sd_resolve_query **_q) {
+ sd_resolve_query *q;
+ int r;
+
+ assert(resolve);
+ assert(_q);
+
+ if (resolve->n_queries >= QUERIES_MAX)
+ return -ENOBUFS;
+
+ r = start_threads(resolve, 1);
+ if (r < 0)
+ return r;
+
+ while (resolve->query_array[resolve->current_id % QUERIES_MAX])
+ resolve->current_id++;
+
+ q = resolve->query_array[resolve->current_id % QUERIES_MAX] = new0(sd_resolve_query, 1);
+ if (!q)
+ return -ENOMEM;
+
+ q->n_ref = 1;
+ q->resolve = resolve;
+ q->floating = floating;
+ q->id = resolve->current_id++;
+
+ if (!floating)
+ sd_resolve_ref(resolve);
+
+ LIST_PREPEND(queries, resolve->queries, q);
+ resolve->n_queries++;
+
+ *_q = q;
+ return 0;
+}
+
+int resolve_getaddrinfo_with_destroy_callback(
+ sd_resolve *resolve,
+ sd_resolve_query **ret_query,
+ const char *node, const char *service,
+ const struct addrinfo *hints,
+ sd_resolve_getaddrinfo_handler_t callback,
+ sd_resolve_destroy_t destroy_callback,
+ void *userdata) {
+
+ _cleanup_(sd_resolve_query_unrefp) sd_resolve_query *q = NULL;
+ size_t node_len, service_len;
+ AddrInfoRequest req = {};
+ struct iovec iov[3];
+ struct msghdr mh = {};
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(node || service, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ r = alloc_query(resolve, !ret_query, &q);
+ if (r < 0)
+ return r;
+
+ q->type = REQUEST_ADDRINFO;
+ q->getaddrinfo_handler = callback;
+ q->userdata = userdata;
+
+ node_len = node ? strlen(node) + 1 : 0;
+ service_len = service ? strlen(service) + 1 : 0;
+
+ req = (AddrInfoRequest) {
+ .node_len = node_len,
+ .service_len = service_len,
+
+ .header.id = q->id,
+ .header.type = REQUEST_ADDRINFO,
+ .header.length = sizeof(AddrInfoRequest) + node_len + service_len,
+
+ .hints_valid = hints,
+ .ai_flags = hints ? hints->ai_flags : 0,
+ .ai_family = hints ? hints->ai_family : 0,
+ .ai_socktype = hints ? hints->ai_socktype : 0,
+ .ai_protocol = hints ? hints->ai_protocol : 0,
+ };
+
+ iov[mh.msg_iovlen++] = IOVEC_MAKE(&req, sizeof(AddrInfoRequest));
+ if (node)
+ iov[mh.msg_iovlen++] = IOVEC_MAKE((void*) node, req.node_len);
+ if (service)
+ iov[mh.msg_iovlen++] = IOVEC_MAKE((void*) service, req.service_len);
+ mh.msg_iov = iov;
+
+ if (sendmsg(resolve->fds[REQUEST_SEND_FD], &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ resolve->n_outstanding++;
+ q->destroy_callback = destroy_callback;
+
+ if (ret_query)
+ *ret_query = q;
+
+ TAKE_PTR(q);
+
+ return 0;
+}
+
+_public_ int sd_resolve_getaddrinfo(
+ sd_resolve *resolve,
+ sd_resolve_query **ret_query,
+ const char *node, const char *service,
+ const struct addrinfo *hints,
+ sd_resolve_getaddrinfo_handler_t callback,
+ void *userdata) {
+
+ return resolve_getaddrinfo_with_destroy_callback(resolve, ret_query, node, service, hints, callback, NULL, userdata);
+}
+
+static int getaddrinfo_done(sd_resolve_query* q) {
+ assert(q);
+ assert(q->done);
+ assert(q->getaddrinfo_handler);
+
+ errno = q->_errno;
+ h_errno = q->_h_errno;
+
+ return q->getaddrinfo_handler(q, q->ret, q->addrinfo, q->userdata);
+}
+
+int resolve_getnameinfo_with_destroy_callback(
+ sd_resolve *resolve,
+ sd_resolve_query **ret_query,
+ const struct sockaddr *sa, socklen_t salen,
+ int flags,
+ uint64_t get,
+ sd_resolve_getnameinfo_handler_t callback,
+ sd_resolve_destroy_t destroy_callback,
+ void *userdata) {
+
+ _cleanup_(sd_resolve_query_unrefp) sd_resolve_query *q = NULL;
+ NameInfoRequest req = {};
+ struct iovec iov[2];
+ struct msghdr mh;
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(sa, -EINVAL);
+ assert_return(salen >= sizeof(struct sockaddr), -EINVAL);
+ assert_return(salen <= sizeof(union sockaddr_union), -EINVAL);
+ assert_return((get & ~SD_RESOLVE_GET_BOTH) == 0, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ r = alloc_query(resolve, !ret_query, &q);
+ if (r < 0)
+ return r;
+
+ q->type = REQUEST_NAMEINFO;
+ q->getnameinfo_handler = callback;
+ q->userdata = userdata;
+
+ req = (NameInfoRequest) {
+ .header.id = q->id,
+ .header.type = REQUEST_NAMEINFO,
+ .header.length = sizeof(NameInfoRequest) + salen,
+
+ .flags = flags,
+ .sockaddr_len = salen,
+ .gethost = !!(get & SD_RESOLVE_GET_HOST),
+ .getserv = !!(get & SD_RESOLVE_GET_SERVICE),
+ };
+
+ iov[0] = IOVEC_MAKE(&req, sizeof(NameInfoRequest));
+ iov[1] = IOVEC_MAKE((void*) sa, salen);
+
+ mh = (struct msghdr) {
+ .msg_iov = iov,
+ .msg_iovlen = ELEMENTSOF(iov)
+ };
+
+ if (sendmsg(resolve->fds[REQUEST_SEND_FD], &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ resolve->n_outstanding++;
+ q->destroy_callback = destroy_callback;
+
+ if (ret_query)
+ *ret_query = q;
+
+ TAKE_PTR(q);
+
+ return 0;
+}
+
+_public_ int sd_resolve_getnameinfo(
+ sd_resolve *resolve,
+ sd_resolve_query **ret_query,
+ const struct sockaddr *sa, socklen_t salen,
+ int flags,
+ uint64_t get,
+ sd_resolve_getnameinfo_handler_t callback,
+ void *userdata) {
+
+ return resolve_getnameinfo_with_destroy_callback(resolve, ret_query, sa, salen, flags, get, callback, NULL, userdata);
+}
+
+static int getnameinfo_done(sd_resolve_query *q) {
+
+ assert(q);
+ assert(q->done);
+ assert(q->getnameinfo_handler);
+
+ errno = q->_errno;
+ h_errno = q->_h_errno;
+
+ return q->getnameinfo_handler(q, q->ret, q->host, q->serv, q->userdata);
+}
+
+static void resolve_freeaddrinfo(struct addrinfo *ai) {
+ while (ai) {
+ struct addrinfo *next = ai->ai_next;
+
+ free(ai->ai_addr);
+ free(ai->ai_canonname);
+ free(ai);
+ ai = next;
+ }
+}
+
+static void resolve_query_disconnect(sd_resolve_query *q) {
+ sd_resolve *resolve;
+ unsigned i;
+
+ assert(q);
+
+ if (!q->resolve)
+ return;
+
+ resolve = q->resolve;
+ assert(resolve->n_queries > 0);
+
+ if (q->done) {
+ assert(resolve->n_done > 0);
+ resolve->n_done--;
+ }
+
+ i = q->id % QUERIES_MAX;
+ assert(resolve->query_array[i] == q);
+ resolve->query_array[i] = NULL;
+ LIST_REMOVE(queries, resolve->queries, q);
+ resolve->n_queries--;
+
+ q->resolve = NULL;
+ if (!q->floating)
+ sd_resolve_unref(resolve);
+}
+
+static sd_resolve_query *resolve_query_free(sd_resolve_query *q) {
+ assert(q);
+
+ resolve_query_disconnect(q);
+
+ if (q->destroy_callback)
+ q->destroy_callback(q->userdata);
+
+ resolve_freeaddrinfo(q->addrinfo);
+ free(q->host);
+ free(q->serv);
+
+ return mfree(q);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_resolve_query, sd_resolve_query, resolve_query_free);
+
+_public_ int sd_resolve_query_is_done(sd_resolve_query *q) {
+ assert_return(q, -EINVAL);
+ assert_return(!resolve_pid_changed(q->resolve), -ECHILD);
+
+ return q->done;
+}
+
+_public_ void* sd_resolve_query_set_userdata(sd_resolve_query *q, void *userdata) {
+ void *ret;
+
+ assert_return(q, NULL);
+ assert_return(!resolve_pid_changed(q->resolve), NULL);
+
+ ret = q->userdata;
+ q->userdata = userdata;
+
+ return ret;
+}
+
+_public_ void* sd_resolve_query_get_userdata(sd_resolve_query *q) {
+ assert_return(q, NULL);
+ assert_return(!resolve_pid_changed(q->resolve), NULL);
+
+ return q->userdata;
+}
+
+_public_ sd_resolve *sd_resolve_query_get_resolve(sd_resolve_query *q) {
+ assert_return(q, NULL);
+ assert_return(!resolve_pid_changed(q->resolve), NULL);
+
+ return q->resolve;
+}
+
+_public_ int sd_resolve_query_get_destroy_callback(sd_resolve_query *q, sd_resolve_destroy_t *destroy_callback) {
+ assert_return(q, -EINVAL);
+
+ if (destroy_callback)
+ *destroy_callback = q->destroy_callback;
+
+ return !!q->destroy_callback;
+}
+
+_public_ int sd_resolve_query_set_destroy_callback(sd_resolve_query *q, sd_resolve_destroy_t destroy_callback) {
+ assert_return(q, -EINVAL);
+
+ q->destroy_callback = destroy_callback;
+ return 0;
+}
+
+_public_ int sd_resolve_query_get_floating(sd_resolve_query *q) {
+ assert_return(q, -EINVAL);
+
+ return q->floating;
+}
+
+_public_ int sd_resolve_query_set_floating(sd_resolve_query *q, int b) {
+ assert_return(q, -EINVAL);
+
+ if (q->floating == !!b)
+ return 0;
+
+ if (!q->resolve) /* Already disconnected */
+ return -ESTALE;
+
+ q->floating = b;
+
+ if (b) {
+ sd_resolve_query_ref(q);
+ sd_resolve_unref(q->resolve);
+ } else {
+ sd_resolve_ref(q->resolve);
+ sd_resolve_query_unref(q);
+ }
+
+ return 1;
+}
+
+static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_resolve *resolve = userdata;
+ int r;
+
+ assert(resolve);
+
+ r = sd_resolve_process(resolve);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+_public_ int sd_resolve_attach_event(sd_resolve *resolve, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve->event, -EBUSY);
+
+ assert(!resolve->event_source);
+
+ if (event)
+ resolve->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&resolve->event);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_add_io(resolve->event, &resolve->event_source, resolve->fds[RESPONSE_RECV_FD], POLLIN, io_callback, resolve);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(resolve->event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ sd_resolve_detach_event(resolve);
+ return r;
+}
+
+_public_ int sd_resolve_detach_event(sd_resolve *resolve) {
+ assert_return(resolve, -EINVAL);
+
+ if (!resolve->event)
+ return 0;
+
+ if (resolve->event_source) {
+ sd_event_source_set_enabled(resolve->event_source, SD_EVENT_OFF);
+ resolve->event_source = sd_event_source_unref(resolve->event_source);
+ }
+
+ resolve->event = sd_event_unref(resolve->event);
+ return 1;
+}
+
+_public_ sd_event *sd_resolve_get_event(sd_resolve *resolve) {
+ assert_return(resolve, NULL);
+
+ return resolve->event;
+}
diff --git a/src/libsystemd/sd-resolve/test-resolve.c b/src/libsystemd/sd-resolve/test-resolve.c
new file mode 100644
index 0000000..8de7adc
--- /dev/null
+++ b/src/libsystemd/sd-resolve/test-resolve.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <resolv.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "sd-resolve.h"
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+#define TEST_TIMEOUT_USEC (20*USEC_PER_SEC)
+
+static int getaddrinfo_handler(sd_resolve_query *q, int ret, const struct addrinfo *ai, void *userdata) {
+ const struct addrinfo *i;
+
+ assert_se(q);
+
+ if (ret != 0) {
+ log_error("getaddrinfo error: %s %i", gai_strerror(ret), ret);
+ return 0;
+ }
+
+ for (i = ai; i; i = i->ai_next) {
+ _cleanup_free_ char *addr = NULL;
+
+ assert_se(sockaddr_pretty(i->ai_addr, i->ai_addrlen, false, true, &addr) == 0);
+ puts(addr);
+ }
+
+ printf("canonical name: %s\n", strna(ai->ai_canonname));
+
+ return 0;
+}
+
+static int getnameinfo_handler(sd_resolve_query *q, int ret, const char *host, const char *serv, void *userdata) {
+ assert_se(q);
+
+ if (ret != 0) {
+ log_error("getnameinfo error: %s %i", gai_strerror(ret), ret);
+ return 0;
+ }
+
+ printf("Host: %s — Serv: %s\n", strna(host), strna(serv));
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_resolve_query_unrefp) sd_resolve_query *q1 = NULL, *q2 = NULL;
+ _cleanup_(sd_resolve_unrefp) sd_resolve *resolve = NULL;
+ int r = 0;
+
+ struct addrinfo hints = {
+ .ai_family = PF_UNSPEC,
+ .ai_socktype = SOCK_STREAM,
+ .ai_flags = AI_CANONNAME
+ };
+
+ struct sockaddr_in sa = {
+ .sin_family = AF_INET,
+ .sin_port = htons(80)
+ };
+
+ assert_se(sd_resolve_default(&resolve) >= 0);
+
+ /* Test a floating resolver query */
+ r = sd_resolve_getaddrinfo(resolve, NULL, "redhat.com", "http", NULL, getaddrinfo_handler, NULL);
+ if (r < 0)
+ log_error_errno(r, "sd_resolve_getaddrinfo(): %m");
+
+ /* Make a name -> address query */
+ r = sd_resolve_getaddrinfo(resolve, &q1, argc >= 2 ? argv[1] : "www.heise.de", NULL, &hints, getaddrinfo_handler, NULL);
+ if (r < 0)
+ log_error_errno(r, "sd_resolve_getaddrinfo(): %m");
+
+ /* Make an address -> name query */
+ sa.sin_addr.s_addr = inet_addr(argc >= 3 ? argv[2] : "193.99.144.71");
+ r = sd_resolve_getnameinfo(resolve, &q2, (struct sockaddr*) &sa, sizeof(sa), 0, SD_RESOLVE_GET_BOTH, getnameinfo_handler, NULL);
+ if (r < 0)
+ log_error_errno(r, "sd_resolve_getnameinfo(): %m");
+
+ /* Wait until all queries are completed */
+ for (;;) {
+ r = sd_resolve_wait(resolve, TEST_TIMEOUT_USEC);
+ if (r == 0)
+ break;
+ if (r == -ETIMEDOUT) {
+ /* Let's catch timeouts here, so that we can run safely in a CI that has no reliable DNS. Note
+ * that we invoke exit() directly here, as the stuck NSS call will not allow us to exit
+ * cleanly. */
+
+ log_notice_errno(r, "sd_resolve_wait() timed out, but that's OK");
+ exit(EXIT_SUCCESS);
+ }
+ if (r < 0) {
+ log_error_errno(r, "sd_resolve_wait(): %m");
+ assert_not_reached("sd_resolve_wait() failed");
+ }
+ }
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-utf8/sd-utf8.c b/src/libsystemd/sd-utf8/sd-utf8.c
new file mode 100644
index 0000000..78323cf
--- /dev/null
+++ b/src/libsystemd/sd-utf8/sd-utf8.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-utf8.h"
+
+#include "utf8.h"
+#include "util.h"
+
+_public_ const char *sd_utf8_is_valid(const char *s) {
+ assert_return(s, NULL);
+
+ return utf8_is_valid(s);
+}
+
+_public_ const char *sd_ascii_is_valid(const char *s) {
+ assert_return(s, NULL);
+
+ return ascii_is_valid(s);
+}
diff --git a/src/libudev/libudev-device-internal.h b/src/libudev/libudev-device-internal.h
new file mode 100644
index 0000000..8a6e5a4
--- /dev/null
+++ b/src/libudev/libudev-device-internal.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "libudev.h"
+#include "sd-device.h"
+
+#include "libudev-list-internal.h"
+
+/**
+ * udev_device:
+ *
+ * Opaque object representing one kernel sys device.
+ */
+struct udev_device {
+ struct udev *udev;
+
+ /* real device object */
+ sd_device *device;
+
+ /* legacy */
+ unsigned n_ref;
+
+ struct udev_device *parent;
+ bool parent_set;
+
+ struct udev_list properties;
+ uint64_t properties_generation;
+ struct udev_list tags;
+ uint64_t tags_generation;
+ struct udev_list devlinks;
+ uint64_t devlinks_generation;
+ bool properties_read:1;
+ bool tags_read:1;
+ bool devlinks_read:1;
+ struct udev_list sysattrs;
+ bool sysattrs_read;
+};
+
+struct udev_device *udev_device_new(struct udev *udev, sd_device *device);
diff --git a/src/libudev/libudev-device.c b/src/libudev/libudev-device.c
new file mode 100644
index 0000000..1e7d774
--- /dev/null
+++ b/src/libudev/libudev-device.c
@@ -0,0 +1,836 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "libudev.h"
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "libudev-device-internal.h"
+#include "parse-util.h"
+#include "time-util.h"
+
+/**
+ * SECTION:libudev-device
+ * @short_description: kernel sys devices
+ *
+ * Representation of kernel sys devices. Devices are uniquely identified
+ * by their syspath, every device has exactly one path in the kernel sys
+ * filesystem. Devices usually belong to a kernel subsystem, and have
+ * a unique name inside that subsystem.
+ */
+
+/**
+ * udev_device_get_seqnum:
+ * @udev_device: udev device
+ *
+ * This is only valid if the device was received through a monitor. Devices read from
+ * sys do not have a sequence number.
+ *
+ * Returns: the kernel event sequence number, or 0 if there is no sequence number available.
+ **/
+_public_ unsigned long long int udev_device_get_seqnum(struct udev_device *udev_device) {
+ const char *seqnum;
+ unsigned long long ret;
+ int r;
+
+ assert_return_errno(udev_device, 0, EINVAL);
+
+ r = sd_device_get_property_value(udev_device->device, "SEQNUM", &seqnum);
+ if (r == -ENOENT)
+ return 0;
+ else if (r < 0)
+ return_with_errno(0, r);
+
+ r = safe_atollu(seqnum, &ret);
+ if (r < 0)
+ return_with_errno(0, r);
+
+ return ret;
+}
+
+/**
+ * udev_device_get_devnum:
+ * @udev_device: udev device
+ *
+ * Get the device major/minor number.
+ *
+ * Returns: the dev_t number.
+ **/
+_public_ dev_t udev_device_get_devnum(struct udev_device *udev_device) {
+ dev_t devnum;
+ int r;
+
+ assert_return_errno(udev_device, makedev(0, 0), EINVAL);
+
+ r = sd_device_get_devnum(udev_device->device, &devnum);
+ if (r == -ENOENT)
+ return makedev(0, 0);
+ if (r < 0)
+ return_with_errno(makedev(0, 0), r);
+
+ return devnum;
+}
+
+/**
+ * udev_device_get_driver:
+ * @udev_device: udev device
+ *
+ * Get the kernel driver name.
+ *
+ * Returns: the driver name string, or #NULL if there is no driver attached.
+ **/
+_public_ const char *udev_device_get_driver(struct udev_device *udev_device) {
+ const char *driver;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_driver(udev_device->device, &driver);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return driver;
+}
+
+/**
+ * udev_device_get_devtype:
+ * @udev_device: udev device
+ *
+ * Retrieve the devtype string of the udev device.
+ *
+ * Returns: the devtype name of the udev device, or #NULL if it cannot be determined
+ **/
+_public_ const char *udev_device_get_devtype(struct udev_device *udev_device) {
+ const char *devtype;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_devtype(udev_device->device, &devtype);
+ if (r == -ENOENT)
+ return NULL;
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return devtype;
+}
+
+/**
+ * udev_device_get_subsystem:
+ * @udev_device: udev device
+ *
+ * Retrieve the subsystem string of the udev device. The string does not
+ * contain any "/".
+ *
+ * Returns: the subsystem name of the udev device, or #NULL if it cannot be determined
+ **/
+_public_ const char *udev_device_get_subsystem(struct udev_device *udev_device) {
+ const char *subsystem;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_subsystem(udev_device->device, &subsystem);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return subsystem;
+}
+
+/**
+ * udev_device_get_property_value:
+ * @udev_device: udev device
+ * @key: property name
+ *
+ * Get the value of a given property.
+ *
+ * Returns: the property string, or #NULL if there is no such property.
+ **/
+_public_ const char *udev_device_get_property_value(struct udev_device *udev_device, const char *key) {
+ const char *value;
+ int r;
+
+ assert_return_errno(udev_device && key, NULL, EINVAL);
+
+ r = sd_device_get_property_value(udev_device->device, key, &value);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return value;
+}
+
+struct udev_device *udev_device_new(struct udev *udev, sd_device *device) {
+ struct udev_device *udev_device;
+
+ assert(device);
+
+ udev_device = new(struct udev_device, 1);
+ if (!udev_device)
+ return_with_errno(NULL, ENOMEM);
+
+ *udev_device = (struct udev_device) {
+ .n_ref = 1,
+ .udev = udev,
+ .device = sd_device_ref(device),
+ };
+
+ udev_list_init(&udev_device->properties, true);
+ udev_list_init(&udev_device->tags, true);
+ udev_list_init(&udev_device->sysattrs, true);
+ udev_list_init(&udev_device->devlinks, true);
+
+ return udev_device;
+}
+
+/**
+ * udev_device_new_from_syspath:
+ * @udev: udev library context
+ * @syspath: sys device path including sys directory
+ *
+ * Create new udev device, and fill in information from the sys
+ * device and the udev database entry. The syspath is the absolute
+ * path to the device, including the sys mount point.
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev device.
+ *
+ * Returns: a new udev device, or #NULL, if it does not exist
+ **/
+_public_ struct udev_device *udev_device_new_from_syspath(struct udev *udev, const char *syspath) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ r = sd_device_new_from_syspath(&device, syspath);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(udev, device);
+}
+
+/**
+ * udev_device_new_from_devnum:
+ * @udev: udev library context
+ * @type: char or block device
+ * @devnum: device major/minor number
+ *
+ * Create new udev device, and fill in information from the sys
+ * device and the udev database entry. The device is looked-up
+ * by its major/minor number and type. Character and block device
+ * numbers are not unique across the two types.
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev device.
+ *
+ * Returns: a new udev device, or #NULL, if it does not exist
+ **/
+_public_ struct udev_device *udev_device_new_from_devnum(struct udev *udev, char type, dev_t devnum) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ r = sd_device_new_from_devnum(&device, type, devnum);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(udev, device);
+}
+
+/**
+ * udev_device_new_from_device_id:
+ * @udev: udev library context
+ * @id: text string identifying a kernel device
+ *
+ * Create new udev device, and fill in information from the sys
+ * device and the udev database entry. The device is looked-up
+ * by a special string:
+ * b8:2 - block device major:minor
+ * c128:1 - char device major:minor
+ * n3 - network device ifindex
+ * +sound:card29 - kernel driver core subsystem:device name
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev device.
+ *
+ * Returns: a new udev device, or #NULL, if it does not exist
+ **/
+_public_ struct udev_device *udev_device_new_from_device_id(struct udev *udev, const char *id) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ r = sd_device_new_from_device_id(&device, id);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(udev, device);
+}
+
+/**
+ * udev_device_new_from_subsystem_sysname:
+ * @udev: udev library context
+ * @subsystem: the subsystem of the device
+ * @sysname: the name of the device
+ *
+ * Create new udev device, and fill in information from the sys device
+ * and the udev database entry. The device is looked up by the subsystem
+ * and name string of the device, like "mem" / "zero", or "block" / "sda".
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev device.
+ *
+ * Returns: a new udev device, or #NULL, if it does not exist
+ **/
+_public_ struct udev_device *udev_device_new_from_subsystem_sysname(struct udev *udev, const char *subsystem, const char *sysname) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ r = sd_device_new_from_subsystem_sysname(&device, subsystem, sysname);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(udev, device);
+}
+
+/**
+ * udev_device_new_from_environment
+ * @udev: udev library context
+ *
+ * Create new udev device, and fill in information from the
+ * current process environment. This only works reliable if
+ * the process is called from a udev rule. It is usually used
+ * for tools executed from IMPORT= rules.
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev device.
+ *
+ * Returns: a new udev device, or #NULL, if it does not exist
+ **/
+_public_ struct udev_device *udev_device_new_from_environment(struct udev *udev) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ r = device_new_from_strv(&device, environ);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(udev, device);
+}
+
+static struct udev_device *device_new_from_parent(struct udev_device *child) {
+ sd_device *parent;
+ int r;
+
+ assert_return_errno(child, NULL, EINVAL);
+
+ r = sd_device_get_parent(child->device, &parent);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(child->udev, parent);
+}
+
+/**
+ * udev_device_get_parent:
+ * @udev_device: the device to start searching from
+ *
+ * Find the next parent device, and fill in information from the sys
+ * device and the udev database entry.
+ *
+ * Returned device is not referenced. It is attached to the child
+ * device, and will be cleaned up when the child device is cleaned up.
+ *
+ * It is not necessarily just the upper level directory, empty or not
+ * recognized sys directories are ignored.
+ *
+ * It can be called as many times as needed, without caring about
+ * references.
+ *
+ * Returns: a new udev device, or #NULL, if it no parent exist.
+ **/
+_public_ struct udev_device *udev_device_get_parent(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ if (!udev_device->parent_set) {
+ udev_device->parent_set = true;
+ udev_device->parent = device_new_from_parent(udev_device);
+ }
+
+ /* TODO: errno will differ here in case parent == NULL */
+ return udev_device->parent;
+}
+
+/**
+ * udev_device_get_parent_with_subsystem_devtype:
+ * @udev_device: udev device to start searching from
+ * @subsystem: the subsystem of the device
+ * @devtype: the type (DEVTYPE) of the device
+ *
+ * Find the next parent device, with a matching subsystem and devtype
+ * value, and fill in information from the sys device and the udev
+ * database entry.
+ *
+ * If devtype is #NULL, only subsystem is checked, and any devtype will
+ * match.
+ *
+ * Returned device is not referenced. It is attached to the child
+ * device, and will be cleaned up when the child device is cleaned up.
+ *
+ * It can be called as many times as needed, without caring about
+ * references.
+ *
+ * Returns: a new udev device, or #NULL if no matching parent exists.
+ **/
+_public_ struct udev_device *udev_device_get_parent_with_subsystem_devtype(struct udev_device *udev_device, const char *subsystem, const char *devtype) {
+ sd_device *parent;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ /* this relies on the fact that finding the subdevice of a parent or the
+ parent of a subdevice commute */
+
+ /* first find the correct sd_device */
+ r = sd_device_get_parent_with_subsystem_devtype(udev_device->device, subsystem, devtype, &parent);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ /* then walk the chain of udev_device parents until the corresponding
+ one is found */
+ while ((udev_device = udev_device_get_parent(udev_device)))
+ if (udev_device->device == parent)
+ return udev_device;
+
+ return_with_errno(NULL, ENOENT);
+}
+
+/**
+ * udev_device_get_udev:
+ * @udev_device: udev device
+ *
+ * Retrieve the udev library context the device was created with.
+ *
+ * Returns: the udev library context
+ **/
+_public_ struct udev *udev_device_get_udev(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ return udev_device->udev;
+}
+
+static struct udev_device *udev_device_free(struct udev_device *udev_device) {
+ assert(udev_device);
+
+ sd_device_unref(udev_device->device);
+ udev_device_unref(udev_device->parent);
+
+ udev_list_cleanup(&udev_device->properties);
+ udev_list_cleanup(&udev_device->sysattrs);
+ udev_list_cleanup(&udev_device->tags);
+ udev_list_cleanup(&udev_device->devlinks);
+
+ return mfree(udev_device);
+}
+
+/**
+ * udev_device_ref:
+ * @udev_device: udev device
+ *
+ * Take a reference of a udev device.
+ *
+ * Returns: the passed udev device
+ **/
+
+/**
+ * udev_device_unref:
+ * @udev_device: udev device
+ *
+ * Drop a reference of a udev device. If the refcount reaches zero,
+ * the resources of the device will be released.
+ *
+ * Returns: #NULL
+ **/
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(struct udev_device, udev_device, udev_device_free);
+
+/**
+ * udev_device_get_devpath:
+ * @udev_device: udev device
+ *
+ * Retrieve the kernel devpath value of the udev device. The path
+ * does not contain the sys mount point, and starts with a '/'.
+ *
+ * Returns: the devpath of the udev device
+ **/
+_public_ const char *udev_device_get_devpath(struct udev_device *udev_device) {
+ const char *devpath;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_devpath(udev_device->device, &devpath);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return devpath;
+}
+
+/**
+ * udev_device_get_syspath:
+ * @udev_device: udev device
+ *
+ * Retrieve the sys path of the udev device. The path is an
+ * absolute path and starts with the sys mount point.
+ *
+ * Returns: the sys path of the udev device
+ **/
+_public_ const char *udev_device_get_syspath(struct udev_device *udev_device) {
+ const char *syspath;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_syspath(udev_device->device, &syspath);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return syspath;
+}
+
+/**
+ * udev_device_get_sysname:
+ * @udev_device: udev device
+ *
+ * Get the kernel device name in /sys.
+ *
+ * Returns: the name string of the device
+ **/
+_public_ const char *udev_device_get_sysname(struct udev_device *udev_device) {
+ const char *sysname;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_sysname(udev_device->device, &sysname);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return sysname;
+}
+
+/**
+ * udev_device_get_sysnum:
+ * @udev_device: udev device
+ *
+ * Get the instance number of the device.
+ *
+ * Returns: the trailing number string of the device name
+ **/
+_public_ const char *udev_device_get_sysnum(struct udev_device *udev_device) {
+ const char *sysnum;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_sysnum(udev_device->device, &sysnum);
+ if (r == -ENOENT)
+ return NULL;
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return sysnum;
+}
+
+/**
+ * udev_device_get_devnode:
+ * @udev_device: udev device
+ *
+ * Retrieve the device node file name belonging to the udev device.
+ * The path is an absolute path, and starts with the device directory.
+ *
+ * Returns: the device node file name of the udev device, or #NULL if no device node exists
+ **/
+_public_ const char *udev_device_get_devnode(struct udev_device *udev_device) {
+ const char *devnode;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_devname(udev_device->device, &devnode);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return devnode;
+}
+
+/**
+ * udev_device_get_devlinks_list_entry:
+ * @udev_device: udev device
+ *
+ * Retrieve the list of device links pointing to the device file of
+ * the udev device. The next list entry can be retrieved with
+ * udev_list_entry_get_next(), which returns #NULL if no more entries exist.
+ * The devlink path can be retrieved from the list entry by
+ * udev_list_entry_get_name(). The path is an absolute path, and starts with
+ * the device directory.
+ *
+ * Returns: the first entry of the device node link list
+ **/
+_public_ struct udev_list_entry *udev_device_get_devlinks_list_entry(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ if (device_get_devlinks_generation(udev_device->device) != udev_device->devlinks_generation ||
+ !udev_device->devlinks_read) {
+ const char *devlink;
+
+ udev_list_cleanup(&udev_device->devlinks);
+
+ FOREACH_DEVICE_DEVLINK(udev_device->device, devlink)
+ if (!udev_list_entry_add(&udev_device->devlinks, devlink, NULL))
+ return_with_errno(NULL, ENOMEM);
+
+ udev_device->devlinks_read = true;
+ udev_device->devlinks_generation = device_get_devlinks_generation(udev_device->device);
+ }
+
+ return udev_list_get_entry(&udev_device->devlinks);
+}
+
+/**
+ * udev_device_get_event_properties_entry:
+ * @udev_device: udev device
+ *
+ * Retrieve the list of key/value device properties of the udev
+ * device. The next list entry can be retrieved with udev_list_entry_get_next(),
+ * which returns #NULL if no more entries exist. The property name
+ * can be retrieved from the list entry by udev_list_entry_get_name(),
+ * the property value by udev_list_entry_get_value().
+ *
+ * Returns: the first entry of the property list
+ **/
+_public_ struct udev_list_entry *udev_device_get_properties_list_entry(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ if (device_get_properties_generation(udev_device->device) != udev_device->properties_generation ||
+ !udev_device->properties_read) {
+ const char *key, *value;
+
+ udev_list_cleanup(&udev_device->properties);
+
+ FOREACH_DEVICE_PROPERTY(udev_device->device, key, value)
+ if (!udev_list_entry_add(&udev_device->properties, key, value))
+ return_with_errno(NULL, ENOMEM);
+
+ udev_device->properties_read = true;
+ udev_device->properties_generation = device_get_properties_generation(udev_device->device);
+ }
+
+ return udev_list_get_entry(&udev_device->properties);
+}
+
+/**
+ * udev_device_get_action:
+ * @udev_device: udev device
+ *
+ * This is only valid if the device was received through a monitor. Devices read from
+ * sys do not have an action string. Usual actions are: add, remove, change, online,
+ * offline.
+ *
+ * Returns: the kernel action value, or #NULL if there is no action value available.
+ **/
+_public_ const char *udev_device_get_action(struct udev_device *udev_device) {
+ const char *action;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_property_value(udev_device->device, "ACTION", &action);
+ if (r == -ENOENT)
+ return NULL;
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return action;
+}
+
+/**
+ * udev_device_get_usec_since_initialized:
+ * @udev_device: udev device
+ *
+ * Return the number of microseconds passed since udev set up the
+ * device for the first time.
+ *
+ * This is only implemented for devices with need to store properties
+ * in the udev database. All other devices return 0 here.
+ *
+ * Returns: the number of microseconds since the device was first seen.
+ **/
+_public_ unsigned long long int udev_device_get_usec_since_initialized(struct udev_device *udev_device) {
+ usec_t ts;
+ int r;
+
+ assert_return(udev_device, -EINVAL);
+
+ r = sd_device_get_usec_since_initialized(udev_device->device, &ts);
+ if (r < 0)
+ return_with_errno(0, r);
+
+ return ts;
+}
+
+/**
+ * udev_device_get_sysattr_value:
+ * @udev_device: udev device
+ * @sysattr: attribute name
+ *
+ * The retrieved value is cached in the device. Repeated calls will return the same
+ * value and not open the attribute again.
+ *
+ * Returns: the content of a sys attribute file, or #NULL if there is no sys attribute value.
+ **/
+_public_ const char *udev_device_get_sysattr_value(struct udev_device *udev_device, const char *sysattr) {
+ const char *value;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_sysattr_value(udev_device->device, sysattr, &value);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return value;
+}
+
+/**
+ * udev_device_set_sysattr_value:
+ * @udev_device: udev device
+ * @sysattr: attribute name
+ * @value: new value to be set
+ *
+ * Update the contents of the sys attribute and the cached value of the device.
+ *
+ * Returns: Negative error code on failure or 0 on success.
+ **/
+_public_ int udev_device_set_sysattr_value(struct udev_device *udev_device, const char *sysattr, const char *value) {
+ int r;
+
+ assert_return(udev_device, -EINVAL);
+
+ r = sd_device_set_sysattr_value(udev_device->device, sysattr, value);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+/**
+ * udev_device_get_sysattr_list_entry:
+ * @udev_device: udev device
+ *
+ * Retrieve the list of available sysattrs, with value being empty;
+ * This just return all available sysfs attributes for a particular
+ * device without reading their values.
+ *
+ * Returns: the first entry of the property list
+ **/
+_public_ struct udev_list_entry *udev_device_get_sysattr_list_entry(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ if (!udev_device->sysattrs_read) {
+ const char *sysattr;
+
+ udev_list_cleanup(&udev_device->sysattrs);
+
+ FOREACH_DEVICE_SYSATTR(udev_device->device, sysattr)
+ if (!udev_list_entry_add(&udev_device->sysattrs, sysattr, NULL))
+ return_with_errno(NULL, ENOMEM);
+
+ udev_device->sysattrs_read = true;
+ }
+
+ return udev_list_get_entry(&udev_device->sysattrs);
+}
+
+/**
+ * udev_device_get_is_initialized:
+ * @udev_device: udev device
+ *
+ * Check if udev has already handled the device and has set up
+ * device node permissions and context, or has renamed a network
+ * device.
+ *
+ * This is only implemented for devices with a device node
+ * or network interfaces. All other devices return 1 here.
+ *
+ * Returns: 1 if the device is set up. 0 otherwise.
+ **/
+_public_ int udev_device_get_is_initialized(struct udev_device *udev_device) {
+ int r;
+
+ assert_return(udev_device, -EINVAL);
+
+ r = sd_device_get_is_initialized(udev_device->device);
+ if (r < 0)
+ return_with_errno(0, r);
+
+ return r;
+}
+
+/**
+ * udev_device_get_tags_list_entry:
+ * @udev_device: udev device
+ *
+ * Retrieve the list of tags attached to the udev device. The next
+ * list entry can be retrieved with udev_list_entry_get_next(),
+ * which returns #NULL if no more entries exist. The tag string
+ * can be retrieved from the list entry by udev_list_entry_get_name().
+ *
+ * Returns: the first entry of the tag list
+ **/
+_public_ struct udev_list_entry *udev_device_get_tags_list_entry(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ if (device_get_tags_generation(udev_device->device) != udev_device->tags_generation ||
+ !udev_device->tags_read) {
+ const char *tag;
+
+ udev_list_cleanup(&udev_device->tags);
+
+ FOREACH_DEVICE_TAG(udev_device->device, tag)
+ if (!udev_list_entry_add(&udev_device->tags, tag, NULL))
+ return_with_errno(NULL, ENOMEM);
+
+ udev_device->tags_read = true;
+ udev_device->tags_generation = device_get_tags_generation(udev_device->device);
+ }
+
+ return udev_list_get_entry(&udev_device->tags);
+}
+
+/**
+ * udev_device_has_tag:
+ * @udev_device: udev device
+ * @tag: tag name
+ *
+ * Check if a given device has a certain tag associated.
+ *
+ * Returns: 1 if the tag is found. 0 otherwise.
+ **/
+_public_ int udev_device_has_tag(struct udev_device *udev_device, const char *tag) {
+ assert_return(udev_device, 0);
+
+ return sd_device_has_tag(udev_device->device, tag) > 0;
+}
diff --git a/src/libudev/libudev-enumerate.c b/src/libudev/libudev-enumerate.c
new file mode 100644
index 0000000..e54ee57
--- /dev/null
+++ b/src/libudev/libudev-enumerate.c
@@ -0,0 +1,392 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fnmatch.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include "libudev.h"
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-enumerator-private.h"
+#include "device-util.h"
+#include "libudev-device-internal.h"
+
+/**
+ * SECTION:libudev-enumerate
+ * @short_description: lookup and sort sys devices
+ *
+ * Lookup devices in the sys filesystem, filter devices by properties,
+ * and return a sorted list of devices.
+ */
+
+/**
+ * udev_enumerate:
+ *
+ * Opaque object representing one device lookup/sort context.
+ */
+struct udev_enumerate {
+ struct udev *udev;
+ unsigned n_ref;
+ struct udev_list devices_list;
+ bool devices_uptodate:1;
+
+ sd_device_enumerator *enumerator;
+};
+
+/**
+ * udev_enumerate_new:
+ * @udev: udev library context
+ *
+ * Create an enumeration context to scan /sys.
+ *
+ * Returns: an enumeration context.
+ **/
+_public_ struct udev_enumerate *udev_enumerate_new(struct udev *udev) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ struct udev_enumerate *udev_enumerate;
+ int r;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ udev_enumerate = new(struct udev_enumerate, 1);
+ if (!udev_enumerate)
+ return_with_errno(NULL, ENOMEM);
+
+ *udev_enumerate = (struct udev_enumerate) {
+ .udev = udev,
+ .n_ref = 1,
+ .enumerator = TAKE_PTR(e),
+ };
+
+ udev_list_init(&udev_enumerate->devices_list, false);
+
+ return udev_enumerate;
+}
+
+static struct udev_enumerate *udev_enumerate_free(struct udev_enumerate *udev_enumerate) {
+ assert(udev_enumerate);
+
+ udev_list_cleanup(&udev_enumerate->devices_list);
+ sd_device_enumerator_unref(udev_enumerate->enumerator);
+ return mfree(udev_enumerate);
+}
+
+/**
+ * udev_enumerate_ref:
+ * @udev_enumerate: context
+ *
+ * Take a reference of a enumeration context.
+ *
+ * Returns: the passed enumeration context
+ **/
+
+/**
+ * udev_enumerate_unref:
+ * @udev_enumerate: context
+ *
+ * Drop a reference of an enumeration context. If the refcount reaches zero,
+ * all resources of the enumeration context will be released.
+ *
+ * Returns: #NULL
+ **/
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(struct udev_enumerate, udev_enumerate, udev_enumerate_free);
+
+/**
+ * udev_enumerate_get_udev:
+ * @udev_enumerate: context
+ *
+ * Get the udev library context.
+ *
+ * Returns: a pointer to the context.
+ */
+_public_ struct udev *udev_enumerate_get_udev(struct udev_enumerate *udev_enumerate) {
+ assert_return_errno(udev_enumerate, NULL, EINVAL);
+
+ return udev_enumerate->udev;
+}
+
+/**
+ * udev_enumerate_get_list_entry:
+ * @udev_enumerate: context
+ *
+ * Get the first entry of the sorted list of device paths.
+ *
+ * Returns: a udev_list_entry.
+ */
+_public_ struct udev_list_entry *udev_enumerate_get_list_entry(struct udev_enumerate *udev_enumerate) {
+ struct udev_list_entry *e;
+
+ assert_return_errno(udev_enumerate, NULL, EINVAL);
+
+ if (!udev_enumerate->devices_uptodate) {
+ sd_device *device;
+
+ udev_list_cleanup(&udev_enumerate->devices_list);
+
+ FOREACH_DEVICE_AND_SUBSYSTEM(udev_enumerate->enumerator, device) {
+ const char *syspath;
+ int r;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ if (!udev_list_entry_add(&udev_enumerate->devices_list, syspath, NULL))
+ return_with_errno(NULL, ENOMEM);
+ }
+
+ udev_enumerate->devices_uptodate = true;
+ }
+
+ e = udev_list_get_entry(&udev_enumerate->devices_list);
+ if (!e)
+ return_with_errno(NULL, ENODATA);
+
+ return e;
+}
+
+/**
+ * udev_enumerate_add_match_subsystem:
+ * @udev_enumerate: context
+ * @subsystem: filter for a subsystem of the device to include in the list
+ *
+ * Match only devices belonging to a certain kernel subsystem.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_subsystem(struct udev_enumerate *udev_enumerate, const char *subsystem) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!subsystem)
+ return 0;
+
+ return sd_device_enumerator_add_match_subsystem(udev_enumerate->enumerator, subsystem, true);
+}
+
+/**
+ * udev_enumerate_add_nomatch_subsystem:
+ * @udev_enumerate: context
+ * @subsystem: filter for a subsystem of the device to exclude from the list
+ *
+ * Match only devices not belonging to a certain kernel subsystem.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_nomatch_subsystem(struct udev_enumerate *udev_enumerate, const char *subsystem) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!subsystem)
+ return 0;
+
+ return sd_device_enumerator_add_match_subsystem(udev_enumerate->enumerator, subsystem, false);
+}
+
+/**
+ * udev_enumerate_add_match_sysattr:
+ * @udev_enumerate: context
+ * @sysattr: filter for a sys attribute at the device to include in the list
+ * @value: optional value of the sys attribute
+ *
+ * Match only devices with a certain /sys device attribute.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_sysattr(struct udev_enumerate *udev_enumerate, const char *sysattr, const char *value) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!sysattr)
+ return 0;
+
+ return sd_device_enumerator_add_match_sysattr(udev_enumerate->enumerator, sysattr, value, true);
+}
+
+/**
+ * udev_enumerate_add_nomatch_sysattr:
+ * @udev_enumerate: context
+ * @sysattr: filter for a sys attribute at the device to exclude from the list
+ * @value: optional value of the sys attribute
+ *
+ * Match only devices not having a certain /sys device attribute.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_nomatch_sysattr(struct udev_enumerate *udev_enumerate, const char *sysattr, const char *value) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!sysattr)
+ return 0;
+
+ return sd_device_enumerator_add_match_sysattr(udev_enumerate->enumerator, sysattr, value, false);
+}
+
+/**
+ * udev_enumerate_add_match_property:
+ * @udev_enumerate: context
+ * @property: filter for a property of the device to include in the list
+ * @value: value of the property
+ *
+ * Match only devices with a certain property.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_property(struct udev_enumerate *udev_enumerate, const char *property, const char *value) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!property)
+ return 0;
+
+ return sd_device_enumerator_add_match_property(udev_enumerate->enumerator, property, value);
+}
+
+/**
+ * udev_enumerate_add_match_tag:
+ * @udev_enumerate: context
+ * @tag: filter for a tag of the device to include in the list
+ *
+ * Match only devices with a certain tag.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_tag(struct udev_enumerate *udev_enumerate, const char *tag) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!tag)
+ return 0;
+
+ return sd_device_enumerator_add_match_tag(udev_enumerate->enumerator, tag);
+}
+
+/**
+ * udev_enumerate_add_match_parent:
+ * @udev_enumerate: context
+ * @parent: parent device where to start searching
+ *
+ * Return the devices on the subtree of one given device. The parent
+ * itself is included in the list.
+ *
+ * A reference for the device is held until the udev_enumerate context
+ * is cleaned up.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_parent(struct udev_enumerate *udev_enumerate, struct udev_device *parent) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!parent)
+ return 0;
+
+ return sd_device_enumerator_add_match_parent(udev_enumerate->enumerator, parent->device);
+}
+
+/**
+ * udev_enumerate_add_match_is_initialized:
+ * @udev_enumerate: context
+ *
+ * Match only devices which udev has set up already. This makes
+ * sure, that the device node permissions and context are properly set
+ * and that network devices are fully renamed.
+ *
+ * Usually, devices which are found in the kernel but not already
+ * handled by udev, have still pending events. Services should subscribe
+ * to monitor events and wait for these devices to become ready, instead
+ * of using uninitialized devices.
+ *
+ * For now, this will not affect devices which do not have a device node
+ * and are not network interfaces.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_is_initialized(struct udev_enumerate *udev_enumerate) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ return device_enumerator_add_match_is_initialized(udev_enumerate->enumerator);
+}
+
+/**
+ * udev_enumerate_add_match_sysname:
+ * @udev_enumerate: context
+ * @sysname: filter for the name of the device to include in the list
+ *
+ * Match only devices with a given /sys device name.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_sysname(struct udev_enumerate *udev_enumerate, const char *sysname) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!sysname)
+ return 0;
+
+ return sd_device_enumerator_add_match_sysname(udev_enumerate->enumerator, sysname);
+}
+
+/**
+ * udev_enumerate_add_syspath:
+ * @udev_enumerate: context
+ * @syspath: path of a device
+ *
+ * Add a device to the list of devices, to retrieve it back sorted in dependency order.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_syspath(struct udev_enumerate *udev_enumerate, const char *syspath) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!syspath)
+ return 0;
+
+ r = sd_device_new_from_syspath(&device, syspath);
+ if (r < 0)
+ return r;
+
+ r = device_enumerator_add_device(udev_enumerate->enumerator, device);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+/**
+ * udev_enumerate_scan_devices:
+ * @udev_enumerate: udev enumeration context
+ *
+ * Scan /sys for all devices which match the given filters. No matches
+ * will return all currently available devices.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ **/
+_public_ int udev_enumerate_scan_devices(struct udev_enumerate *udev_enumerate) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ return device_enumerator_scan_devices(udev_enumerate->enumerator);
+}
+
+/**
+ * udev_enumerate_scan_subsystems:
+ * @udev_enumerate: udev enumeration context
+ *
+ * Scan /sys for all kernel subsystems, including buses, classes, drivers.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ **/
+_public_ int udev_enumerate_scan_subsystems(struct udev_enumerate *udev_enumerate) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ return device_enumerator_scan_subsystems(udev_enumerate->enumerator);
+}
diff --git a/src/libudev/libudev-hwdb.c b/src/libudev/libudev-hwdb.c
new file mode 100644
index 0000000..ed755e5
--- /dev/null
+++ b/src/libudev/libudev-hwdb.c
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "sd-hwdb.h"
+
+#include "alloc-util.h"
+#include "hwdb-util.h"
+#include "libudev-list-internal.h"
+
+/**
+ * SECTION:libudev-hwdb
+ * @short_description: retrieve properties from the hardware database
+ *
+ * Libudev hardware database interface.
+ */
+
+/**
+ * udev_hwdb:
+ *
+ * Opaque object representing the hardware database.
+ */
+struct udev_hwdb {
+ unsigned n_ref;
+ sd_hwdb *hwdb;
+ struct udev_list properties_list;
+};
+
+/**
+ * udev_hwdb_new:
+ * @udev: udev library context (unused)
+ *
+ * Create a hardware database context to query properties for devices.
+ *
+ * Returns: a hwdb context.
+ **/
+_public_ struct udev_hwdb *udev_hwdb_new(struct udev *udev) {
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb_internal = NULL;
+ struct udev_hwdb *hwdb;
+ int r;
+
+ r = sd_hwdb_new(&hwdb_internal);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ hwdb = new(struct udev_hwdb, 1);
+ if (!hwdb)
+ return_with_errno(NULL, ENOMEM);
+
+ *hwdb = (struct udev_hwdb) {
+ .n_ref = 1,
+ .hwdb = TAKE_PTR(hwdb_internal),
+ };
+
+ udev_list_init(&hwdb->properties_list, true);
+
+ return hwdb;
+}
+
+static struct udev_hwdb *udev_hwdb_free(struct udev_hwdb *hwdb) {
+ assert(hwdb);
+
+ sd_hwdb_unref(hwdb->hwdb);
+ udev_list_cleanup(&hwdb->properties_list);
+ return mfree(hwdb);
+}
+
+/**
+ * udev_hwdb_ref:
+ * @hwdb: context
+ *
+ * Take a reference of a hwdb context.
+ *
+ * Returns: the passed enumeration context
+ **/
+
+/**
+ * udev_hwdb_unref:
+ * @hwdb: context
+ *
+ * Drop a reference of a hwdb context. If the refcount reaches zero,
+ * all resources of the hwdb context will be released.
+ *
+ * Returns: #NULL
+ **/
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(struct udev_hwdb, udev_hwdb, udev_hwdb_free);
+
+/**
+ * udev_hwdb_get_properties_list_entry:
+ * @hwdb: context
+ * @modalias: modalias string
+ * @flags: (unused)
+ *
+ * Lookup a matching device in the hardware database. The lookup key is a
+ * modalias string, whose formats are defined for the Linux kernel modules.
+ * Examples are: pci:v00008086d00001C2D*, usb:v04F2pB221*. The first entry
+ * of a list of retrieved properties is returned.
+ *
+ * Returns: a udev_list_entry.
+ */
+_public_ struct udev_list_entry *udev_hwdb_get_properties_list_entry(struct udev_hwdb *hwdb, const char *modalias, unsigned flags) {
+ const char *key, *value;
+ struct udev_list_entry *e;
+
+ assert_return_errno(hwdb, NULL, EINVAL);
+ assert_return_errno(modalias, NULL, EINVAL);
+
+ udev_list_cleanup(&hwdb->properties_list);
+
+ SD_HWDB_FOREACH_PROPERTY(hwdb->hwdb, modalias, key, value)
+ if (!udev_list_entry_add(&hwdb->properties_list, key, value))
+ return_with_errno(NULL, ENOMEM);
+
+ e = udev_list_get_entry(&hwdb->properties_list);
+ if (!e)
+ return_with_errno(NULL, ENODATA);
+
+ return e;
+}
diff --git a/src/libudev/libudev-list-internal.h b/src/libudev/libudev-list-internal.h
new file mode 100644
index 0000000..4e1632c
--- /dev/null
+++ b/src/libudev/libudev-list-internal.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "libudev.h"
+
+struct udev_list_node {
+ struct udev_list_node *next, *prev;
+};
+
+struct udev_list {
+ struct udev_list_node node;
+ struct udev_list_entry **entries;
+ unsigned entries_cur;
+ unsigned entries_max;
+ bool unique;
+};
+
+void udev_list_init(struct udev_list *list, bool unique);
+void udev_list_cleanup(struct udev_list *list);
+struct udev_list_entry *udev_list_get_entry(struct udev_list *list);
+struct udev_list_entry *udev_list_entry_add(struct udev_list *list, const char *name, const char *value);
diff --git a/src/libudev/libudev-list.c b/src/libudev/libudev-list.c
new file mode 100644
index 0000000..2737326
--- /dev/null
+++ b/src/libudev/libudev-list.c
@@ -0,0 +1,301 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "libudev-list-internal.h"
+#include "util.h"
+
+/**
+ * SECTION:libudev-list
+ * @short_description: list operation
+ *
+ * Libudev list operations.
+ */
+
+/**
+ * udev_list_entry:
+ *
+ * Opaque object representing one entry in a list. An entry contains
+ * contains a name, and optionally a value.
+ */
+struct udev_list_entry {
+ struct udev_list_node node;
+ struct udev_list *list;
+ char *name;
+ char *value;
+ int num;
+};
+
+/* the list's head points to itself if empty */
+static void udev_list_node_init(struct udev_list_node *list) {
+ list->next = list;
+ list->prev = list;
+}
+
+static int udev_list_node_is_empty(struct udev_list_node *list) {
+ return list->next == list;
+}
+
+static void udev_list_node_insert_between(struct udev_list_node *new,
+ struct udev_list_node *prev,
+ struct udev_list_node *next) {
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+static void udev_list_node_remove(struct udev_list_node *entry) {
+ struct udev_list_node *prev = entry->prev;
+ struct udev_list_node *next = entry->next;
+
+ next->prev = prev;
+ prev->next = next;
+
+ entry->prev = NULL;
+ entry->next = NULL;
+}
+
+/* return list entry which embeds this node */
+static struct udev_list_entry *list_node_to_entry(struct udev_list_node *node) {
+ return container_of(node, struct udev_list_entry, node);
+}
+
+void udev_list_init(struct udev_list *list, bool unique) {
+ memzero(list, sizeof(struct udev_list));
+ list->unique = unique;
+ udev_list_node_init(&list->node);
+}
+
+/* insert entry into a list as the last element */
+static void udev_list_entry_append(struct udev_list_entry *new, struct udev_list *list) {
+ /* inserting before the list head make the node the last node in the list */
+ udev_list_node_insert_between(&new->node, list->node.prev, &list->node);
+ new->list = list;
+}
+
+/* insert entry into a list, before a given existing entry */
+static void udev_list_entry_insert_before(struct udev_list_entry *new, struct udev_list_entry *entry) {
+ udev_list_node_insert_between(&new->node, entry->node.prev, &entry->node);
+ new->list = entry->list;
+}
+
+/* binary search in sorted array */
+static int list_search(struct udev_list *list, const char *name) {
+ unsigned first, last;
+
+ first = 0;
+ last = list->entries_cur;
+ while (first < last) {
+ unsigned i;
+ int cmp;
+
+ i = (first + last)/2;
+ cmp = strcmp(name, list->entries[i]->name);
+ if (cmp < 0)
+ last = i;
+ else if (cmp > 0)
+ first = i+1;
+ else
+ return i;
+ }
+
+ /* not found, return negative insertion-index+1 */
+ return -(first+1);
+}
+
+struct udev_list_entry *udev_list_entry_add(struct udev_list *list, const char *name, const char *value) {
+ struct udev_list_entry *entry;
+ int i = 0;
+
+ if (list->unique) {
+ /* lookup existing name or insertion-index */
+ i = list_search(list, name);
+ if (i >= 0) {
+ entry = list->entries[i];
+
+ free(entry->value);
+ if (!value) {
+ entry->value = NULL;
+ return entry;
+ }
+ entry->value = strdup(value);
+ if (!entry->value)
+ return NULL;
+ return entry;
+ }
+ }
+
+ /* add new name */
+ entry = new0(struct udev_list_entry, 1);
+ if (!entry)
+ return NULL;
+
+ entry->name = strdup(name);
+ if (!entry->name)
+ return mfree(entry);
+
+ if (value) {
+ entry->value = strdup(value);
+ if (!entry->value) {
+ free(entry->name);
+ return mfree(entry);
+ }
+ }
+
+ if (list->unique) {
+ /* allocate or enlarge sorted array if needed */
+ if (list->entries_cur >= list->entries_max) {
+ struct udev_list_entry **entries;
+ unsigned add;
+
+ add = list->entries_max;
+ if (add < 1)
+ add = 64;
+ entries = reallocarray(list->entries, list->entries_max + add, sizeof(struct udev_list_entry *));
+ if (!entries) {
+ free(entry->name);
+ free(entry->value);
+ return mfree(entry);
+ }
+ list->entries = entries;
+ list->entries_max += add;
+ }
+
+ /* the negative i returned the insertion index */
+ i = (-i)-1;
+
+ /* insert into sorted list */
+ if ((unsigned)i < list->entries_cur)
+ udev_list_entry_insert_before(entry, list->entries[i]);
+ else
+ udev_list_entry_append(entry, list);
+
+ /* insert into sorted array */
+ memmove(&list->entries[i+1], &list->entries[i],
+ (list->entries_cur - i) * sizeof(struct udev_list_entry *));
+ list->entries[i] = entry;
+ list->entries_cur++;
+ } else
+ udev_list_entry_append(entry, list);
+
+ return entry;
+}
+
+static void udev_list_entry_delete(struct udev_list_entry *entry) {
+ if (entry->list->entries) {
+ int i;
+ struct udev_list *list = entry->list;
+
+ /* remove entry from sorted array */
+ i = list_search(list, entry->name);
+ if (i >= 0) {
+ memmove(&list->entries[i], &list->entries[i+1],
+ ((list->entries_cur-1) - i) * sizeof(struct udev_list_entry *));
+ list->entries_cur--;
+ }
+ }
+
+ udev_list_node_remove(&entry->node);
+ free(entry->name);
+ free(entry->value);
+ free(entry);
+}
+
+#define udev_list_entry_foreach_safe(entry, tmp, first) \
+ for (entry = first, tmp = udev_list_entry_get_next(entry); \
+ entry; \
+ entry = tmp, tmp = udev_list_entry_get_next(tmp))
+
+void udev_list_cleanup(struct udev_list *list) {
+ struct udev_list_entry *entry_loop;
+ struct udev_list_entry *entry_tmp;
+
+ list->entries = mfree(list->entries);
+ list->entries_cur = 0;
+ list->entries_max = 0;
+ udev_list_entry_foreach_safe(entry_loop, entry_tmp, udev_list_get_entry(list))
+ udev_list_entry_delete(entry_loop);
+}
+
+struct udev_list_entry *udev_list_get_entry(struct udev_list *list) {
+ if (udev_list_node_is_empty(&list->node))
+ return NULL;
+ return list_node_to_entry(list->node.next);
+}
+
+/**
+ * udev_list_entry_get_next:
+ * @list_entry: current entry
+ *
+ * Get the next entry from the list.
+ *
+ * Returns: udev_list_entry, #NULL if no more entries are available.
+ */
+_public_ struct udev_list_entry *udev_list_entry_get_next(struct udev_list_entry *list_entry) {
+ struct udev_list_node *next;
+
+ if (!list_entry)
+ return NULL;
+ next = list_entry->node.next;
+ /* empty list or no more entries */
+ if (next == &list_entry->list->node)
+ return NULL;
+ return list_node_to_entry(next);
+}
+
+/**
+ * udev_list_entry_get_by_name:
+ * @list_entry: current entry
+ * @name: name string to match
+ *
+ * Lookup an entry in the list with a certain name.
+ *
+ * Returns: udev_list_entry, #NULL if no matching entry is found.
+ */
+_public_ struct udev_list_entry *udev_list_entry_get_by_name(struct udev_list_entry *list_entry, const char *name) {
+ int i;
+
+ if (!list_entry)
+ return NULL;
+
+ if (!list_entry->list->unique)
+ return NULL;
+
+ i = list_search(list_entry->list, name);
+ if (i < 0)
+ return NULL;
+ return list_entry->list->entries[i];
+}
+
+/**
+ * udev_list_entry_get_name:
+ * @list_entry: current entry
+ *
+ * Get the name of a list entry.
+ *
+ * Returns: the name string of this entry.
+ */
+_public_ const char *udev_list_entry_get_name(struct udev_list_entry *list_entry) {
+ if (!list_entry)
+ return NULL;
+ return list_entry->name;
+}
+
+/**
+ * udev_list_entry_get_value:
+ * @list_entry: current entry
+ *
+ * Get the value of list entry.
+ *
+ * Returns: the value string of this entry.
+ */
+_public_ const char *udev_list_entry_get_value(struct udev_list_entry *list_entry) {
+ if (!list_entry)
+ return NULL;
+ return list_entry->value;
+}
diff --git a/src/libudev/libudev-monitor.c b/src/libudev/libudev-monitor.c
new file mode 100644
index 0000000..70036f5
--- /dev/null
+++ b/src/libudev/libudev-monitor.c
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <poll.h>
+
+#include "libudev.h"
+
+#include "alloc-util.h"
+#include "device-monitor-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "libudev-device-internal.h"
+#include "string-util.h"
+
+/**
+ * SECTION:libudev-monitor
+ * @short_description: device event source
+ *
+ * Connects to a device event source.
+ */
+
+/**
+ * udev_monitor:
+ *
+ * Opaque object handling an event source.
+ */
+struct udev_monitor {
+ struct udev *udev;
+ unsigned n_ref;
+ sd_device_monitor *monitor;
+};
+
+static MonitorNetlinkGroup monitor_netlink_group_from_string(const char *name) {
+ if (!name)
+ return MONITOR_GROUP_NONE;
+ if (streq(name, "udev"))
+ return MONITOR_GROUP_UDEV;
+ if (streq(name, "kernel"))
+ return MONITOR_GROUP_KERNEL;
+ return _MONITOR_NETLINK_GROUP_INVALID;
+}
+
+/**
+ * udev_monitor_new_from_netlink:
+ * @udev: udev library context
+ * @name: name of event source
+ *
+ * Create new udev monitor and connect to a specified event
+ * source. Valid sources identifiers are "udev" and "kernel".
+ *
+ * Applications should usually not connect directly to the
+ * "kernel" events, because the devices might not be useable
+ * at that time, before udev has configured them, and created
+ * device nodes. Accessing devices at the same time as udev,
+ * might result in unpredictable behavior. The "udev" events
+ * are sent out after udev has finished its event processing,
+ * all rules have been processed, and needed device nodes are
+ * created.
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev monitor.
+ *
+ * Returns: a new udev monitor, or #NULL, in case of an error
+ **/
+_public_ struct udev_monitor *udev_monitor_new_from_netlink(struct udev *udev, const char *name) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL;
+ struct udev_monitor *udev_monitor;
+ MonitorNetlinkGroup g;
+ int r;
+
+ g = monitor_netlink_group_from_string(name);
+ if (g < 0)
+ return_with_errno(NULL, EINVAL);
+
+ r = device_monitor_new_full(&m, g, -1);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ udev_monitor = new(struct udev_monitor, 1);
+ if (!udev_monitor)
+ return_with_errno(NULL, ENOMEM);
+
+ *udev_monitor = (struct udev_monitor) {
+ .udev = udev,
+ .n_ref = 1,
+ .monitor = TAKE_PTR(m),
+ };
+
+ return udev_monitor;
+}
+
+/**
+ * udev_monitor_filter_update:
+ * @udev_monitor: monitor
+ *
+ * Update the installed socket filter. This is only needed,
+ * if the filter was removed or changed.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_monitor_filter_update(struct udev_monitor *udev_monitor) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return sd_device_monitor_filter_update(udev_monitor->monitor);
+}
+
+/**
+ * udev_monitor_enable_receiving:
+ * @udev_monitor: the monitor which should receive events
+ *
+ * Binds the @udev_monitor socket to the event source.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_monitor_enable_receiving(struct udev_monitor *udev_monitor) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return device_monitor_enable_receiving(udev_monitor->monitor);
+}
+
+/**
+ * udev_monitor_set_receive_buffer_size:
+ * @udev_monitor: the monitor which should receive events
+ * @size: the size in bytes
+ *
+ * Set the size of the kernel socket buffer. This call needs the
+ * appropriate privileges to succeed.
+ *
+ * Returns: 0 on success, otherwise -1 on error.
+ */
+_public_ int udev_monitor_set_receive_buffer_size(struct udev_monitor *udev_monitor, int size) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return sd_device_monitor_set_receive_buffer_size(udev_monitor->monitor, (size_t) size);
+}
+
+static struct udev_monitor *udev_monitor_free(struct udev_monitor *udev_monitor) {
+ assert(udev_monitor);
+
+ sd_device_monitor_unref(udev_monitor->monitor);
+ return mfree(udev_monitor);
+}
+
+/**
+ * udev_monitor_ref:
+ * @udev_monitor: udev monitor
+ *
+ * Take a reference of a udev monitor.
+ *
+ * Returns: the passed udev monitor
+ **/
+
+/**
+ * udev_monitor_unref:
+ * @udev_monitor: udev monitor
+ *
+ * Drop a reference of a udev monitor. If the refcount reaches zero,
+ * the bound socket will be closed, and the resources of the monitor
+ * will be released.
+ *
+ * Returns: #NULL
+ **/
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(struct udev_monitor, udev_monitor, udev_monitor_free);
+
+/**
+ * udev_monitor_get_udev:
+ * @udev_monitor: udev monitor
+ *
+ * Retrieve the udev library context the monitor was created with.
+ *
+ * Returns: the udev library context
+ **/
+_public_ struct udev *udev_monitor_get_udev(struct udev_monitor *udev_monitor) {
+ assert_return(udev_monitor, NULL);
+
+ return udev_monitor->udev;
+}
+
+/**
+ * udev_monitor_get_fd:
+ * @udev_monitor: udev monitor
+ *
+ * Retrieve the socket file descriptor associated with the monitor.
+ *
+ * Returns: the socket file descriptor
+ **/
+_public_ int udev_monitor_get_fd(struct udev_monitor *udev_monitor) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return device_monitor_get_fd(udev_monitor->monitor);
+}
+
+static int udev_monitor_receive_sd_device(struct udev_monitor *udev_monitor, sd_device **ret) {
+ struct pollfd pfd;
+ int r;
+
+ assert(udev_monitor);
+ assert(ret);
+
+ pfd = (struct pollfd) {
+ .fd = device_monitor_get_fd(udev_monitor->monitor),
+ .events = POLLIN,
+ };
+
+ for (;;) {
+ /* r == 0 means a device is received but it does not pass the current filter. */
+ r = device_monitor_receive_device(udev_monitor->monitor, ret);
+ if (r != 0)
+ return r;
+
+ for (;;) {
+ /* wait next message */
+ r = poll(&pfd, 1, 0);
+ if (r < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ continue;
+
+ return -errno;
+ } else if (r == 0)
+ return -EAGAIN;
+
+ /* receive next message */
+ break;
+ }
+ }
+}
+
+/**
+ * udev_monitor_receive_device:
+ * @udev_monitor: udev monitor
+ *
+ * Receive data from the udev monitor socket, allocate a new udev
+ * device, fill in the received data, and return the device.
+ *
+ * Only socket connections with uid=0 are accepted.
+ *
+ * The monitor socket is by default set to NONBLOCK. A variant of poll() on
+ * the file descriptor returned by udev_monitor_get_fd() should to be used to
+ * wake up when new devices arrive, or alternatively the file descriptor
+ * switched into blocking mode.
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev device.
+ *
+ * Returns: a new udev device, or #NULL, in case of an error
+ **/
+_public_ struct udev_device *udev_monitor_receive_device(struct udev_monitor *udev_monitor) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ assert_return(udev_monitor, NULL);
+
+ r = udev_monitor_receive_sd_device(udev_monitor, &device);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(udev_monitor->udev, device);
+}
+
+/**
+ * udev_monitor_filter_add_match_subsystem_devtype:
+ * @udev_monitor: the monitor
+ * @subsystem: the subsystem value to match the incoming devices against
+ * @devtype: the devtype value to match the incoming devices against
+ *
+ * This filter is efficiently executed inside the kernel, and libudev subscribers
+ * will usually not be woken up for devices which do not match.
+ *
+ * The filter must be installed before the monitor is switched to listening mode.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_monitor_filter_add_match_subsystem_devtype(struct udev_monitor *udev_monitor, const char *subsystem, const char *devtype) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return sd_device_monitor_filter_add_match_subsystem_devtype(udev_monitor->monitor, subsystem, devtype);
+}
+
+/**
+ * udev_monitor_filter_add_match_tag:
+ * @udev_monitor: the monitor
+ * @tag: the name of a tag
+ *
+ * This filter is efficiently executed inside the kernel, and libudev subscribers
+ * will usually not be woken up for devices which do not match.
+ *
+ * The filter must be installed before the monitor is switched to listening mode.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_monitor_filter_add_match_tag(struct udev_monitor *udev_monitor, const char *tag) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return sd_device_monitor_filter_add_match_tag(udev_monitor->monitor, tag);
+}
+
+/**
+ * udev_monitor_filter_remove:
+ * @udev_monitor: monitor
+ *
+ * Remove all filters from monitor.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_monitor_filter_remove(struct udev_monitor *udev_monitor) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return sd_device_monitor_filter_remove(udev_monitor->monitor);
+}
diff --git a/src/libudev/libudev-queue.c b/src/libudev/libudev-queue.c
new file mode 100644
index 0000000..4e055bb
--- /dev/null
+++ b/src/libudev/libudev-queue.c
@@ -0,0 +1,236 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2009 Alan Jenkins <alan-jenkins@tuffmail.co.uk>
+***/
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+
+#include "libudev.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+
+/**
+ * SECTION:libudev-queue
+ * @short_description: access to currently active events
+ *
+ * This exports the current state of the udev processing queue.
+ */
+
+/**
+ * udev_queue:
+ *
+ * Opaque object representing the current event queue in the udev daemon.
+ */
+struct udev_queue {
+ struct udev *udev;
+ unsigned n_ref;
+ int fd;
+};
+
+/**
+ * udev_queue_new:
+ * @udev: udev library context
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev queue context.
+ *
+ * Returns: the udev queue context, or #NULL on error.
+ **/
+_public_ struct udev_queue *udev_queue_new(struct udev *udev) {
+ struct udev_queue *udev_queue;
+
+ udev_queue = new(struct udev_queue, 1);
+ if (!udev_queue)
+ return_with_errno(NULL, ENOMEM);
+
+ *udev_queue = (struct udev_queue) {
+ .udev = udev,
+ .n_ref = 1,
+ .fd = -1,
+ };
+
+ return udev_queue;
+}
+
+static struct udev_queue *udev_queue_free(struct udev_queue *udev_queue) {
+ assert(udev_queue);
+
+ safe_close(udev_queue->fd);
+ return mfree(udev_queue);
+}
+
+/**
+ * udev_queue_ref:
+ * @udev_queue: udev queue context
+ *
+ * Take a reference of a udev queue context.
+ *
+ * Returns: the same udev queue context.
+ **/
+
+/**
+ * udev_queue_unref:
+ * @udev_queue: udev queue context
+ *
+ * Drop a reference of a udev queue context. If the refcount reaches zero,
+ * the resources of the queue context will be released.
+ *
+ * Returns: #NULL
+ **/
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(struct udev_queue, udev_queue, udev_queue_free);
+
+/**
+ * udev_queue_get_udev:
+ * @udev_queue: udev queue context
+ *
+ * Retrieve the udev library context the queue context was created with.
+ *
+ * Returns: the udev library context.
+ **/
+_public_ struct udev *udev_queue_get_udev(struct udev_queue *udev_queue) {
+ assert_return_errno(udev_queue, NULL, EINVAL);
+
+ return udev_queue->udev;
+}
+
+/**
+ * udev_queue_get_kernel_seqnum:
+ * @udev_queue: udev queue context
+ *
+ * This function is deprecated.
+ *
+ * Returns: 0.
+ **/
+_public_ unsigned long long int udev_queue_get_kernel_seqnum(struct udev_queue *udev_queue) {
+ return 0;
+}
+
+/**
+ * udev_queue_get_udev_seqnum:
+ * @udev_queue: udev queue context
+ *
+ * This function is deprecated.
+ *
+ * Returns: 0.
+ **/
+_public_ unsigned long long int udev_queue_get_udev_seqnum(struct udev_queue *udev_queue) {
+ return 0;
+}
+
+/**
+ * udev_queue_get_udev_is_active:
+ * @udev_queue: udev queue context
+ *
+ * Check if udev is active on the system.
+ *
+ * Returns: a flag indicating if udev is active.
+ **/
+_public_ int udev_queue_get_udev_is_active(struct udev_queue *udev_queue) {
+ return access("/run/udev/control", F_OK) >= 0;
+}
+
+/**
+ * udev_queue_get_queue_is_empty:
+ * @udev_queue: udev queue context
+ *
+ * Check if udev is currently processing any events.
+ *
+ * Returns: a flag indicating if udev is currently handling events.
+ **/
+_public_ int udev_queue_get_queue_is_empty(struct udev_queue *udev_queue) {
+ return access("/run/udev/queue", F_OK) < 0;
+}
+
+/**
+ * udev_queue_get_seqnum_sequence_is_finished:
+ * @udev_queue: udev queue context
+ * @start: first event sequence number
+ * @end: last event sequence number
+ *
+ * This function is deprecated, it just returns the result of
+ * udev_queue_get_queue_is_empty().
+ *
+ * Returns: a flag indicating if udev is currently handling events.
+ **/
+_public_ int udev_queue_get_seqnum_sequence_is_finished(struct udev_queue *udev_queue,
+ unsigned long long int start, unsigned long long int end) {
+ return udev_queue_get_queue_is_empty(udev_queue);
+}
+
+/**
+ * udev_queue_get_seqnum_is_finished:
+ * @udev_queue: udev queue context
+ * @seqnum: sequence number
+ *
+ * This function is deprecated, it just returns the result of
+ * udev_queue_get_queue_is_empty().
+ *
+ * Returns: a flag indicating if udev is currently handling events.
+ **/
+_public_ int udev_queue_get_seqnum_is_finished(struct udev_queue *udev_queue, unsigned long long int seqnum) {
+ return udev_queue_get_queue_is_empty(udev_queue);
+}
+
+/**
+ * udev_queue_get_queued_list_entry:
+ * @udev_queue: udev queue context
+ *
+ * This function is deprecated.
+ *
+ * Returns: NULL.
+ **/
+_public_ struct udev_list_entry *udev_queue_get_queued_list_entry(struct udev_queue *udev_queue) {
+ return_with_errno(NULL, ENODATA);
+}
+
+/**
+ * udev_queue_get_fd:
+ * @udev_queue: udev queue context
+ *
+ * Returns: a file descriptor to watch for a queue to become empty.
+ */
+_public_ int udev_queue_get_fd(struct udev_queue *udev_queue) {
+ _cleanup_close_ int fd = -1;
+
+ assert_return(udev_queue, -EINVAL);
+
+ if (udev_queue->fd >= 0)
+ return udev_queue->fd;
+
+ fd = inotify_init1(IN_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (inotify_add_watch(fd, "/run/udev" , IN_DELETE) < 0)
+ return -errno;
+
+ udev_queue->fd = TAKE_FD(fd);
+ return udev_queue->fd;
+}
+
+/**
+ * udev_queue_flush:
+ * @udev_queue: udev queue context
+ *
+ * Returns: the result of clearing the watch for queue changes.
+ */
+_public_ int udev_queue_flush(struct udev_queue *udev_queue) {
+ int r;
+
+ assert_return(udev_queue, -EINVAL);
+
+ if (udev_queue->fd < 0)
+ return -EINVAL;
+
+ r = flush_fd(udev_queue->fd);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
diff --git a/src/libudev/libudev-util.c b/src/libudev/libudev-util.c
new file mode 100644
index 0000000..7e21719
--- /dev/null
+++ b/src/libudev/libudev-util.c
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <errno.h>
+
+#include "sd-device.h"
+
+#include "device-nodes.h"
+#include "libudev-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "utf8.h"
+
+/**
+ * SECTION:libudev-util
+ * @short_description: utils
+ *
+ * Utilities useful when dealing with devices and device node names.
+ */
+
+/* handle "[<SUBSYSTEM>/<KERNEL>]<attribute>" format */
+int util_resolve_subsys_kernel(const char *string, char *result, size_t maxsize, bool read_value) {
+ char temp[UTIL_PATH_SIZE], *subsys, *sysname, *attr;
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ const char *val;
+ int r;
+
+ if (string[0] != '[')
+ return -EINVAL;
+
+ strscpy(temp, sizeof(temp), string);
+
+ subsys = &temp[1];
+
+ sysname = strchr(subsys, '/');
+ if (!sysname)
+ return -EINVAL;
+ sysname[0] = '\0';
+ sysname = &sysname[1];
+
+ attr = strchr(sysname, ']');
+ if (!attr)
+ return -EINVAL;
+ attr[0] = '\0';
+ attr = &attr[1];
+ if (attr[0] == '/')
+ attr = &attr[1];
+ if (attr[0] == '\0')
+ attr = NULL;
+
+ if (read_value && !attr)
+ return -EINVAL;
+
+ r = sd_device_new_from_subsystem_sysname(&dev, subsys, sysname);
+ if (r < 0)
+ return r;
+
+ if (read_value) {
+ r = sd_device_get_sysattr_value(dev, attr, &val);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ if (r == -ENOENT)
+ result[0] = '\0';
+ else
+ strscpy(result, maxsize, val);
+ log_debug("value '[%s/%s]%s' is '%s'", subsys, sysname, attr, result);
+ } else {
+ r = sd_device_get_syspath(dev, &val);
+ if (r < 0)
+ return r;
+
+ strscpyl(result, maxsize, val, attr ? "/" : NULL, attr ?: NULL, NULL);
+ log_debug("path '[%s/%s]%s' is '%s'", subsys, sysname, strempty(attr), result);
+ }
+ return 0;
+}
+
+size_t util_path_encode(const char *src, char *dest, size_t size) {
+ size_t i, j;
+
+ assert(src);
+ assert(dest);
+
+ for (i = 0, j = 0; src[i] != '\0'; i++) {
+ if (src[i] == '/') {
+ if (j+4 >= size) {
+ j = 0;
+ break;
+ }
+ memcpy(&dest[j], "\\x2f", 4);
+ j += 4;
+ } else if (src[i] == '\\') {
+ if (j+4 >= size) {
+ j = 0;
+ break;
+ }
+ memcpy(&dest[j], "\\x5c", 4);
+ j += 4;
+ } else {
+ if (j+1 >= size) {
+ j = 0;
+ break;
+ }
+ dest[j] = src[i];
+ j++;
+ }
+ }
+ dest[j] = '\0';
+ return j;
+}
+
+/*
+ * Copy from 'str' to 'to', while removing all leading and trailing whitespace,
+ * and replacing each run of consecutive whitespace with a single underscore.
+ * The chars from 'str' are copied up to the \0 at the end of the string, or
+ * at most 'len' chars. This appends \0 to 'to', at the end of the copied
+ * characters.
+ *
+ * If 'len' chars are copied into 'to', the final \0 is placed at len+1
+ * (i.e. 'to[len] = \0'), so the 'to' buffer must have at least len+1
+ * chars available.
+ *
+ * Note this may be called with 'str' == 'to', i.e. to replace whitespace
+ * in-place in a buffer. This function can handle that situation.
+ *
+ * Note that only 'len' characters are read from 'str'.
+ */
+size_t util_replace_whitespace(const char *str, char *to, size_t len) {
+ bool is_space = false;
+ size_t i, j;
+
+ assert(str);
+ assert(to);
+
+ i = strspn(str, WHITESPACE);
+
+ for (j = 0; j < len && i < len && str[i] != '\0'; i++) {
+ if (isspace(str[i])) {
+ is_space = true;
+ continue;
+ }
+
+ if (is_space) {
+ if (j + 1 >= len)
+ break;
+
+ to[j++] = '_';
+ is_space = false;
+ }
+ to[j++] = str[i];
+ }
+
+ to[j] = '\0';
+ return j;
+}
+
+/* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */
+size_t util_replace_chars(char *str, const char *white) {
+ size_t i = 0, replaced = 0;
+
+ assert(str);
+
+ while (str[i] != '\0') {
+ int len;
+
+ if (whitelisted_char_for_devnode(str[i], white)) {
+ i++;
+ continue;
+ }
+
+ /* accept hex encoding */
+ if (str[i] == '\\' && str[i+1] == 'x') {
+ i += 2;
+ continue;
+ }
+
+ /* accept valid utf8 */
+ len = utf8_encoded_valid_unichar(&str[i]);
+ if (len > 1) {
+ i += len;
+ continue;
+ }
+
+ /* if space is allowed, replace whitespace with ordinary space */
+ if (isspace(str[i]) && white && strchr(white, ' ')) {
+ str[i] = ' ';
+ i++;
+ replaced++;
+ continue;
+ }
+
+ /* everything else is replaced with '_' */
+ str[i] = '_';
+ i++;
+ replaced++;
+ }
+ return replaced;
+}
+
+/**
+ * udev_util_encode_string:
+ * @str: input string to be encoded
+ * @str_enc: output string to store the encoded input string
+ * @len: maximum size of the output string, which may be
+ * four times as long as the input string
+ *
+ * Encode all potentially unsafe characters of a string to the
+ * corresponding 2 char hex value prefixed by '\x'.
+ *
+ * Returns: 0 if the entire string was copied, non-zero otherwise.
+ **/
+_public_ int udev_util_encode_string(const char *str, char *str_enc, size_t len) {
+ return encode_devnode_name(str, str_enc, len);
+}
diff --git a/src/libudev/libudev-util.h b/src/libudev/libudev-util.h
new file mode 100644
index 0000000..32b626e
--- /dev/null
+++ b/src/libudev/libudev-util.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "libudev.h"
+
+#include "macro.h"
+
+/* libudev-util.c */
+#define UTIL_PATH_SIZE 1024
+#define UTIL_NAME_SIZE 512
+#define UTIL_LINE_SIZE 16384
+#define UDEV_ALLOWED_CHARS_INPUT "/ $%?,"
+size_t util_path_encode(const char *src, char *dest, size_t size);
+size_t util_replace_whitespace(const char *str, char *to, size_t len);
+size_t util_replace_chars(char *str, const char *white);
+int util_resolve_subsys_kernel(const char *string, char *result, size_t maxsize, bool read_value);
+
+/* Cleanup functions */
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev*, udev_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_device*, udev_device_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_enumerate*, udev_enumerate_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_monitor*, udev_monitor_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_hwdb*, udev_hwdb_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_queue*, udev_queue_unref);
diff --git a/src/libudev/libudev.c b/src/libudev/libudev.c
new file mode 100644
index 0000000..d11e7a9
--- /dev/null
+++ b/src/libudev/libudev.c
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libudev.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "missing.h"
+#include "string-util.h"
+
+/**
+ * SECTION:libudev
+ * @short_description: libudev context
+ */
+
+/**
+ * udev:
+ *
+ * Opaque object representing the library context.
+ */
+struct udev {
+ unsigned n_ref;
+ void *userdata;
+};
+
+/**
+ * udev_get_userdata:
+ * @udev: udev library context
+ *
+ * Retrieve stored data pointer from library context. This might be useful
+ * to access from callbacks.
+ *
+ * Returns: stored userdata
+ **/
+_public_ void *udev_get_userdata(struct udev *udev) {
+ assert_return(udev, NULL);
+
+ return udev->userdata;
+}
+
+/**
+ * udev_set_userdata:
+ * @udev: udev library context
+ * @userdata: data pointer
+ *
+ * Store custom @userdata in the library context.
+ **/
+_public_ void udev_set_userdata(struct udev *udev, void *userdata) {
+ if (!udev)
+ return;
+
+ udev->userdata = userdata;
+}
+
+/**
+ * udev_new:
+ *
+ * Create udev library context. This only allocates the basic data structure.
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev library context.
+ *
+ * Returns: a new udev library context
+ **/
+_public_ struct udev *udev_new(void) {
+ struct udev *udev;
+
+ udev = new(struct udev, 1);
+ if (!udev)
+ return_with_errno(NULL, ENOMEM);
+
+ *udev = (struct udev) {
+ .n_ref = 1,
+ };
+
+ return udev;
+}
+
+/**
+ * udev_ref:
+ * @udev: udev library context
+ *
+ * Take a reference of the udev library context.
+ *
+ * Returns: the passed udev library context
+ **/
+DEFINE_PUBLIC_TRIVIAL_REF_FUNC(struct udev, udev);
+
+/**
+ * udev_unref:
+ * @udev: udev library context
+ *
+ * Drop a reference of the udev library context. If the refcount
+ * reaches zero, the resources of the context will be released.
+ *
+ * Returns: the passed udev library context if it has still an active reference, or #NULL otherwise.
+ **/
+_public_ struct udev *udev_unref(struct udev *udev) {
+ if (!udev)
+ return NULL;
+
+ assert(udev->n_ref > 0);
+ udev->n_ref--;
+ if (udev->n_ref > 0)
+ /* This is different from our convetion, but let's keep backward
+ * compatibility. So, do not use DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC()
+ * macro to define this function. */
+ return udev;
+
+ return mfree(udev);
+}
+
+/**
+ * udev_set_log_fn:
+ * @udev: udev library context
+ * @log_fn: function to be called for log messages
+ *
+ * This function is deprecated.
+ *
+ **/
+_public_ void udev_set_log_fn(
+ struct udev *udev,
+ void (*log_fn)(struct udev *udev,
+ int priority, const char *file, int line, const char *fn,
+ const char *format, va_list args)) {
+ return;
+}
+
+/**
+ * udev_get_log_priority:
+ * @udev: udev library context
+ *
+ * This function is deprecated.
+ *
+ **/
+_public_ int udev_get_log_priority(struct udev *udev) {
+ return log_get_max_level();
+}
+
+/**
+ * udev_set_log_priority:
+ * @udev: udev library context
+ * @priority: the new log priority
+ *
+ * This function is deprecated.
+ *
+ **/
+_public_ void udev_set_log_priority(struct udev *udev, int priority) {
+ log_set_max_level(priority);
+}
diff --git a/src/libudev/libudev.h b/src/libudev/libudev.h
new file mode 100644
index 0000000..02c2e5e
--- /dev/null
+++ b/src/libudev/libudev.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#ifndef _LIBUDEV_H_
+#define _LIBUDEV_H_
+
+#include <stdarg.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * udev - library context
+ *
+ * reads the udev config and system environment
+ * allows custom logging
+ */
+struct udev;
+struct udev *udev_ref(struct udev *udev);
+struct udev *udev_unref(struct udev *udev);
+struct udev *udev_new(void);
+void udev_set_log_fn(struct udev *udev,
+ void (*log_fn)(struct udev *udev,
+ int priority, const char *file, int line, const char *fn,
+ const char *format, va_list args)) __attribute__((__deprecated__));
+int udev_get_log_priority(struct udev *udev) __attribute__((__deprecated__));
+void udev_set_log_priority(struct udev *udev, int priority) __attribute__((__deprecated__));
+void *udev_get_userdata(struct udev *udev);
+void udev_set_userdata(struct udev *udev, void *userdata);
+
+/*
+ * udev_list
+ *
+ * access to libudev generated lists
+ */
+struct udev_list_entry;
+struct udev_list_entry *udev_list_entry_get_next(struct udev_list_entry *list_entry);
+struct udev_list_entry *udev_list_entry_get_by_name(struct udev_list_entry *list_entry, const char *name);
+const char *udev_list_entry_get_name(struct udev_list_entry *list_entry);
+const char *udev_list_entry_get_value(struct udev_list_entry *list_entry);
+/**
+ * udev_list_entry_foreach:
+ * @list_entry: entry to store the current position
+ * @first_entry: first entry to start with
+ *
+ * Helper to iterate over all entries of a list.
+ */
+#define udev_list_entry_foreach(list_entry, first_entry) \
+ for (list_entry = first_entry; \
+ list_entry; \
+ list_entry = udev_list_entry_get_next(list_entry))
+
+/*
+ * udev_device
+ *
+ * access to sysfs/kernel devices
+ */
+struct udev_device;
+struct udev_device *udev_device_ref(struct udev_device *udev_device);
+struct udev_device *udev_device_unref(struct udev_device *udev_device);
+struct udev *udev_device_get_udev(struct udev_device *udev_device);
+struct udev_device *udev_device_new_from_syspath(struct udev *udev, const char *syspath);
+struct udev_device *udev_device_new_from_devnum(struct udev *udev, char type, dev_t devnum);
+struct udev_device *udev_device_new_from_subsystem_sysname(struct udev *udev, const char *subsystem, const char *sysname);
+struct udev_device *udev_device_new_from_device_id(struct udev *udev, const char *id);
+struct udev_device *udev_device_new_from_environment(struct udev *udev);
+/* udev_device_get_parent_*() does not take a reference on the returned device, it is automatically unref'd with the parent */
+struct udev_device *udev_device_get_parent(struct udev_device *udev_device);
+struct udev_device *udev_device_get_parent_with_subsystem_devtype(struct udev_device *udev_device,
+ const char *subsystem, const char *devtype);
+/* retrieve device properties */
+const char *udev_device_get_devpath(struct udev_device *udev_device);
+const char *udev_device_get_subsystem(struct udev_device *udev_device);
+const char *udev_device_get_devtype(struct udev_device *udev_device);
+const char *udev_device_get_syspath(struct udev_device *udev_device);
+const char *udev_device_get_sysname(struct udev_device *udev_device);
+const char *udev_device_get_sysnum(struct udev_device *udev_device);
+const char *udev_device_get_devnode(struct udev_device *udev_device);
+int udev_device_get_is_initialized(struct udev_device *udev_device);
+struct udev_list_entry *udev_device_get_devlinks_list_entry(struct udev_device *udev_device);
+struct udev_list_entry *udev_device_get_properties_list_entry(struct udev_device *udev_device);
+struct udev_list_entry *udev_device_get_tags_list_entry(struct udev_device *udev_device);
+struct udev_list_entry *udev_device_get_sysattr_list_entry(struct udev_device *udev_device);
+const char *udev_device_get_property_value(struct udev_device *udev_device, const char *key);
+const char *udev_device_get_driver(struct udev_device *udev_device);
+dev_t udev_device_get_devnum(struct udev_device *udev_device);
+const char *udev_device_get_action(struct udev_device *udev_device);
+unsigned long long int udev_device_get_seqnum(struct udev_device *udev_device);
+unsigned long long int udev_device_get_usec_since_initialized(struct udev_device *udev_device);
+const char *udev_device_get_sysattr_value(struct udev_device *udev_device, const char *sysattr);
+int udev_device_set_sysattr_value(struct udev_device *udev_device, const char *sysattr, const char *value);
+int udev_device_has_tag(struct udev_device *udev_device, const char *tag);
+
+/*
+ * udev_monitor
+ *
+ * access to kernel uevents and udev events
+ */
+struct udev_monitor;
+struct udev_monitor *udev_monitor_ref(struct udev_monitor *udev_monitor);
+struct udev_monitor *udev_monitor_unref(struct udev_monitor *udev_monitor);
+struct udev *udev_monitor_get_udev(struct udev_monitor *udev_monitor);
+/* kernel and udev generated events over netlink */
+struct udev_monitor *udev_monitor_new_from_netlink(struct udev *udev, const char *name);
+/* bind socket */
+int udev_monitor_enable_receiving(struct udev_monitor *udev_monitor);
+int udev_monitor_set_receive_buffer_size(struct udev_monitor *udev_monitor, int size);
+int udev_monitor_get_fd(struct udev_monitor *udev_monitor);
+struct udev_device *udev_monitor_receive_device(struct udev_monitor *udev_monitor);
+/* in-kernel socket filters to select messages that get delivered to a listener */
+int udev_monitor_filter_add_match_subsystem_devtype(struct udev_monitor *udev_monitor,
+ const char *subsystem, const char *devtype);
+int udev_monitor_filter_add_match_tag(struct udev_monitor *udev_monitor, const char *tag);
+int udev_monitor_filter_update(struct udev_monitor *udev_monitor);
+int udev_monitor_filter_remove(struct udev_monitor *udev_monitor);
+
+/*
+ * udev_enumerate
+ *
+ * search sysfs for specific devices and provide a sorted list
+ */
+struct udev_enumerate;
+struct udev_enumerate *udev_enumerate_ref(struct udev_enumerate *udev_enumerate);
+struct udev_enumerate *udev_enumerate_unref(struct udev_enumerate *udev_enumerate);
+struct udev *udev_enumerate_get_udev(struct udev_enumerate *udev_enumerate);
+struct udev_enumerate *udev_enumerate_new(struct udev *udev);
+/* device properties filter */
+int udev_enumerate_add_match_subsystem(struct udev_enumerate *udev_enumerate, const char *subsystem);
+int udev_enumerate_add_nomatch_subsystem(struct udev_enumerate *udev_enumerate, const char *subsystem);
+int udev_enumerate_add_match_sysattr(struct udev_enumerate *udev_enumerate, const char *sysattr, const char *value);
+int udev_enumerate_add_nomatch_sysattr(struct udev_enumerate *udev_enumerate, const char *sysattr, const char *value);
+int udev_enumerate_add_match_property(struct udev_enumerate *udev_enumerate, const char *property, const char *value);
+int udev_enumerate_add_match_sysname(struct udev_enumerate *udev_enumerate, const char *sysname);
+int udev_enumerate_add_match_tag(struct udev_enumerate *udev_enumerate, const char *tag);
+int udev_enumerate_add_match_parent(struct udev_enumerate *udev_enumerate, struct udev_device *parent);
+int udev_enumerate_add_match_is_initialized(struct udev_enumerate *udev_enumerate);
+int udev_enumerate_add_syspath(struct udev_enumerate *udev_enumerate, const char *syspath);
+/* run enumeration with active filters */
+int udev_enumerate_scan_devices(struct udev_enumerate *udev_enumerate);
+int udev_enumerate_scan_subsystems(struct udev_enumerate *udev_enumerate);
+/* return device list */
+struct udev_list_entry *udev_enumerate_get_list_entry(struct udev_enumerate *udev_enumerate);
+
+/*
+ * udev_queue
+ *
+ * access to the currently running udev events
+ */
+struct udev_queue;
+struct udev_queue *udev_queue_ref(struct udev_queue *udev_queue);
+struct udev_queue *udev_queue_unref(struct udev_queue *udev_queue);
+struct udev *udev_queue_get_udev(struct udev_queue *udev_queue);
+struct udev_queue *udev_queue_new(struct udev *udev);
+unsigned long long int udev_queue_get_kernel_seqnum(struct udev_queue *udev_queue) __attribute__((__deprecated__));
+ unsigned long long int udev_queue_get_udev_seqnum(struct udev_queue *udev_queue) __attribute__((__deprecated__));
+int udev_queue_get_udev_is_active(struct udev_queue *udev_queue);
+int udev_queue_get_queue_is_empty(struct udev_queue *udev_queue);
+int udev_queue_get_seqnum_is_finished(struct udev_queue *udev_queue, unsigned long long int seqnum) __attribute__((__deprecated__));
+int udev_queue_get_seqnum_sequence_is_finished(struct udev_queue *udev_queue,
+ unsigned long long int start, unsigned long long int end) __attribute__((__deprecated__));
+int udev_queue_get_fd(struct udev_queue *udev_queue);
+int udev_queue_flush(struct udev_queue *udev_queue);
+struct udev_list_entry *udev_queue_get_queued_list_entry(struct udev_queue *udev_queue) __attribute__((__deprecated__));
+
+/*
+ * udev_hwdb
+ *
+ * access to the static hardware properties database
+ */
+struct udev_hwdb;
+struct udev_hwdb *udev_hwdb_new(struct udev *udev);
+struct udev_hwdb *udev_hwdb_ref(struct udev_hwdb *hwdb);
+struct udev_hwdb *udev_hwdb_unref(struct udev_hwdb *hwdb);
+struct udev_list_entry *udev_hwdb_get_properties_list_entry(struct udev_hwdb *hwdb, const char *modalias, unsigned flags);
+
+/*
+ * udev_util
+ *
+ * udev specific utilities
+ */
+int udev_util_encode_string(const char *str, char *str_enc, size_t len);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/src/libudev/libudev.pc.in b/src/libudev/libudev.pc.in
new file mode 100644
index 0000000..40b3403
--- /dev/null
+++ b/src/libudev/libudev.pc.in
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@rootlibdir@
+includedir=@includedir@
+
+Name: libudev
+Description: Library to access udev device information
+Version: @PROJECT_VERSION@
+Libs: -L${libdir} -ludev
+Cflags: -I${includedir}
diff --git a/src/libudev/libudev.sym b/src/libudev/libudev.sym
new file mode 100644
index 0000000..fb2e03e
--- /dev/null
+++ b/src/libudev/libudev.sym
@@ -0,0 +1,120 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1+
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+LIBUDEV_183 {
+global:
+ udev_device_get_action;
+ udev_device_get_devlinks_list_entry;
+ udev_device_get_devnode;
+ udev_device_get_devnum;
+ udev_device_get_devpath;
+ udev_device_get_devtype;
+ udev_device_get_driver;
+ udev_device_get_is_initialized;
+ udev_device_get_parent;
+ udev_device_get_parent_with_subsystem_devtype;
+ udev_device_get_properties_list_entry;
+ udev_device_get_property_value;
+ udev_device_get_seqnum;
+ udev_device_get_subsystem;
+ udev_device_get_sysattr_list_entry;
+ udev_device_get_sysattr_value;
+ udev_device_get_sysname;
+ udev_device_get_sysnum;
+ udev_device_get_syspath;
+ udev_device_get_tags_list_entry;
+ udev_device_get_udev;
+ udev_device_get_usec_since_initialized;
+ udev_device_has_tag;
+ udev_device_new_from_devnum;
+ udev_device_new_from_environment;
+ udev_device_new_from_subsystem_sysname;
+ udev_device_new_from_syspath;
+ udev_device_ref;
+ udev_device_unref;
+ udev_enumerate_add_match_is_initialized;
+ udev_enumerate_add_match_parent;
+ udev_enumerate_add_match_property;
+ udev_enumerate_add_match_subsystem;
+ udev_enumerate_add_match_sysattr;
+ udev_enumerate_add_match_sysname;
+ udev_enumerate_add_match_tag;
+ udev_enumerate_add_nomatch_subsystem;
+ udev_enumerate_add_nomatch_sysattr;
+ udev_enumerate_add_syspath;
+ udev_enumerate_get_list_entry;
+ udev_enumerate_get_udev;
+ udev_enumerate_new;
+ udev_enumerate_ref;
+ udev_enumerate_scan_devices;
+ udev_enumerate_scan_subsystems;
+ udev_enumerate_unref;
+ udev_get_log_priority;
+ udev_get_userdata;
+ udev_list_entry_get_by_name;
+ udev_list_entry_get_name;
+ udev_list_entry_get_next;
+ udev_list_entry_get_value;
+ udev_monitor_enable_receiving;
+ udev_monitor_filter_add_match_subsystem_devtype;
+ udev_monitor_filter_add_match_tag;
+ udev_monitor_filter_remove;
+ udev_monitor_filter_update;
+ udev_monitor_get_fd;
+ udev_monitor_get_udev;
+ udev_monitor_new_from_netlink;
+ udev_monitor_receive_device;
+ udev_monitor_ref;
+ udev_monitor_set_receive_buffer_size;
+ udev_monitor_unref;
+ udev_new;
+ udev_queue_get_kernel_seqnum;
+ udev_queue_get_queue_is_empty;
+ udev_queue_get_queued_list_entry;
+ udev_queue_get_seqnum_is_finished;
+ udev_queue_get_seqnum_sequence_is_finished;
+ udev_queue_get_udev;
+ udev_queue_get_udev_is_active;
+ udev_queue_get_udev_seqnum;
+ udev_queue_new;
+ udev_queue_ref;
+ udev_queue_unref;
+ udev_ref;
+ udev_set_log_fn;
+ udev_set_log_priority;
+ udev_set_userdata;
+ udev_unref;
+ udev_util_encode_string;
+local:
+ *;
+};
+
+LIBUDEV_189 {
+global:
+ udev_device_new_from_device_id;
+} LIBUDEV_183;
+
+LIBUDEV_196 {
+global:
+ udev_hwdb_new;
+ udev_hwdb_ref;
+ udev_hwdb_unref;
+ udev_hwdb_get_properties_list_entry;
+} LIBUDEV_189;
+
+LIBUDEV_199 {
+global:
+ udev_device_set_sysattr_value;
+} LIBUDEV_196;
+
+LIBUDEV_215 {
+global:
+ udev_queue_flush;
+ udev_queue_get_fd;
+} LIBUDEV_199;
diff --git a/src/libudev/meson.build b/src/libudev/meson.build
new file mode 100644
index 0000000..8818974
--- /dev/null
+++ b/src/libudev/meson.build
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+libudev_sources = files('''
+ libudev.c
+ libudev-device.c
+ libudev-device-internal.h
+ libudev-enumerate.c
+ libudev-hwdb.c
+ libudev-list.c
+ libudev-list-internal.h
+ libudev-monitor.c
+ libudev-queue.c
+ libudev-util.c
+ libudev-util.h
+'''.split())
+
+############################################################
+
+libudev_sym = files('libudev.sym')
+libudev_sym_path = meson.current_source_dir() + '/libudev.sym'
+
+install_headers('libudev.h')
+libudev_h_path = '@0@/libudev.h'.format(meson.current_source_dir())
+
+configure_file(
+ input : 'libudev.pc.in',
+ output : 'libudev.pc',
+ configuration : substs,
+ install_dir : pkgconfiglibdir == 'no' ? '' : pkgconfiglibdir)
diff --git a/src/locale/kbd-model-map b/src/locale/kbd-model-map
new file mode 100644
index 0000000..8fa984f
--- /dev/null
+++ b/src/locale/kbd-model-map
@@ -0,0 +1,68 @@
+# Generated from system-config-keyboard's model list
+# consolelayout xlayout xmodel xvariant xoptions
+sg ch pc105 de_nodeadkeys terminate:ctrl_alt_bksp
+nl nl pc105 - terminate:ctrl_alt_bksp
+mk-utf mk,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+trq tr pc105 - terminate:ctrl_alt_bksp
+uk gb pc105 - terminate:ctrl_alt_bksp
+is-latin1 is pc105 - terminate:ctrl_alt_bksp
+de de pc105 - terminate:ctrl_alt_bksp
+la-latin1 latam pc105 - terminate:ctrl_alt_bksp
+us us pc105+inet - terminate:ctrl_alt_bksp
+ko kr pc105 - terminate:ctrl_alt_bksp
+ro-std ro pc105 std terminate:ctrl_alt_bksp
+de-latin1 de pc105 - terminate:ctrl_alt_bksp
+slovene si pc105 - terminate:ctrl_alt_bksp
+hu101 hu pc105 qwerty terminate:ctrl_alt_bksp
+jp106 jp jp106 - terminate:ctrl_alt_bksp
+croat hr pc105 - terminate:ctrl_alt_bksp
+it2 it pc105 - terminate:ctrl_alt_bksp
+hu hu pc105 - terminate:ctrl_alt_bksp
+sr-latin rs pc105 latin terminate:ctrl_alt_bksp
+fi fi pc105 - terminate:ctrl_alt_bksp
+fr_CH ch pc105 fr terminate:ctrl_alt_bksp
+dk-latin1 dk pc105 - terminate:ctrl_alt_bksp
+fr fr pc105 - terminate:ctrl_alt_bksp
+it it pc105 - terminate:ctrl_alt_bksp
+ua-utf ua,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+fr-latin1 fr pc105 - terminate:ctrl_alt_bksp
+sg-latin1 ch pc105 de_nodeadkeys terminate:ctrl_alt_bksp
+be-latin1 be pc105 - terminate:ctrl_alt_bksp
+dk dk pc105 - terminate:ctrl_alt_bksp
+fr-pc fr pc105 - terminate:ctrl_alt_bksp
+bg_pho-utf8 bg,us pc105 ,phonetic terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+it-ibm it pc105 - terminate:ctrl_alt_bksp
+cz-us-qwertz cz,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+br-abnt2 br abnt2 - terminate:ctrl_alt_bksp
+ro ro pc105 - terminate:ctrl_alt_bksp
+us-acentos us pc105 intl terminate:ctrl_alt_bksp
+pt-latin1 pt pc105 - terminate:ctrl_alt_bksp
+ro-std-cedilla ro pc105 std_cedilla terminate:ctrl_alt_bksp
+tj_alt-UTF8 tj pc105 - terminate:ctrl_alt_bksp
+de-latin1-nodeadkeys de pc105 nodeadkeys terminate:ctrl_alt_bksp
+no no pc105 - terminate:ctrl_alt_bksp
+bg_bds-utf8 bg,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+dvorak us pc105 dvorak terminate:ctrl_alt_bksp
+dvorak us pc105 dvorak-alt-intl terminate:ctrl_alt_bksp
+ru ru,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+cz-lat2 cz pc105 qwerty terminate:ctrl_alt_bksp
+pl2 pl pc105 - terminate:ctrl_alt_bksp
+es es pc105 - terminate:ctrl_alt_bksp
+ro-cedilla ro pc105 cedilla terminate:ctrl_alt_bksp
+ie ie pc105 - terminate:ctrl_alt_bksp
+et ee pc105 - terminate:ctrl_alt_bksp
+sk-qwerty sk pc105 - terminate:ctrl_alt_bksp,qwerty
+sk-qwertz sk pc105 - terminate:ctrl_alt_bksp
+fr-latin9 fr pc105 latin9 terminate:ctrl_alt_bksp
+fr_CH-latin1 ch pc105 fr terminate:ctrl_alt_bksp
+cf ca pc105 - terminate:ctrl_alt_bksp
+sv-latin1 se pc105 - terminate:ctrl_alt_bksp
+sr-cy rs pc105 - terminate:ctrl_alt_bksp
+gr gr,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+by by,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+il il pc105 - terminate:ctrl_alt_bksp
+kazakh kz,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+lt.baltic lt pc105 - terminate:ctrl_alt_bksp
+lt.l4 lt pc105 - terminate:ctrl_alt_bksp
+lt lt pc105 - terminate:ctrl_alt_bksp
+khmer kh,us pc105 - terminate:ctrl_alt_bksp
diff --git a/src/locale/keymap-util.c b/src/locale/keymap-util.c
new file mode 100644
index 0000000..6b6b32a
--- /dev/null
+++ b/src/locale/keymap-util.c
@@ -0,0 +1,788 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio_ext.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "bus-util.h"
+#include "def.h"
+#include "env-file.h"
+#include "env-file-label.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio-label.h"
+#include "fileio.h"
+#include "keymap-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+static bool startswith_comma(const char *s, const char *prefix) {
+ s = startswith(s, prefix);
+ if (!s)
+ return false;
+
+ return IN_SET(*s, ',', '\0');
+}
+
+static const char* strnulldash(const char *s) {
+ return isempty(s) || streq(s, "-") ? NULL : s;
+}
+
+static const char* systemd_kbd_model_map(void) {
+ const char* s;
+
+ s = getenv("SYSTEMD_KBD_MODEL_MAP");
+ if (s)
+ return s;
+
+ return SYSTEMD_KBD_MODEL_MAP;
+}
+
+static const char* systemd_language_fallback_map(void) {
+ const char* s;
+
+ s = getenv("SYSTEMD_LANGUAGE_FALLBACK_MAP");
+ if (s)
+ return s;
+
+ return SYSTEMD_LANGUAGE_FALLBACK_MAP;
+}
+
+static void context_free_x11(Context *c) {
+ c->x11_layout = mfree(c->x11_layout);
+ c->x11_options = mfree(c->x11_options);
+ c->x11_model = mfree(c->x11_model);
+ c->x11_variant = mfree(c->x11_variant);
+}
+
+static void context_free_vconsole(Context *c) {
+ c->vc_keymap = mfree(c->vc_keymap);
+ c->vc_keymap_toggle = mfree(c->vc_keymap_toggle);
+}
+
+static void context_free_locale(Context *c) {
+ int p;
+
+ for (p = 0; p < _VARIABLE_LC_MAX; p++)
+ c->locale[p] = mfree(c->locale[p]);
+}
+
+void context_clear(Context *c) {
+ context_free_locale(c);
+ context_free_x11(c);
+ context_free_vconsole(c);
+
+ sd_bus_message_unref(c->locale_cache);
+ sd_bus_message_unref(c->x11_cache);
+ sd_bus_message_unref(c->vc_cache);
+
+ bus_verify_polkit_async_registry_free(c->polkit_registry);
+};
+
+void locale_simplify(char *locale[_VARIABLE_LC_MAX]) {
+ int p;
+
+ for (p = VARIABLE_LANG+1; p < _VARIABLE_LC_MAX; p++)
+ if (isempty(locale[p]) || streq_ptr(locale[VARIABLE_LANG], locale[p]))
+ locale[p] = mfree(locale[p]);
+}
+
+int locale_read_data(Context *c, sd_bus_message *m) {
+ struct stat st;
+ int r;
+
+ /* Do not try to re-read the file within single bus operation. */
+ if (m) {
+ if (m == c->locale_cache)
+ return 0;
+
+ sd_bus_message_unref(c->locale_cache);
+ c->locale_cache = sd_bus_message_ref(m);
+ }
+
+ r = stat("/etc/locale.conf", &st);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ if (r >= 0) {
+ usec_t t;
+
+ /* If mtime is not changed, then we do not need to re-read the file. */
+ t = timespec_load(&st.st_mtim);
+ if (c->locale_mtime != USEC_INFINITY && t == c->locale_mtime)
+ return 0;
+
+ c->locale_mtime = t;
+ context_free_locale(c);
+
+ r = parse_env_file(NULL, "/etc/locale.conf",
+ "LANG", &c->locale[VARIABLE_LANG],
+ "LANGUAGE", &c->locale[VARIABLE_LANGUAGE],
+ "LC_CTYPE", &c->locale[VARIABLE_LC_CTYPE],
+ "LC_NUMERIC", &c->locale[VARIABLE_LC_NUMERIC],
+ "LC_TIME", &c->locale[VARIABLE_LC_TIME],
+ "LC_COLLATE", &c->locale[VARIABLE_LC_COLLATE],
+ "LC_MONETARY", &c->locale[VARIABLE_LC_MONETARY],
+ "LC_MESSAGES", &c->locale[VARIABLE_LC_MESSAGES],
+ "LC_PAPER", &c->locale[VARIABLE_LC_PAPER],
+ "LC_NAME", &c->locale[VARIABLE_LC_NAME],
+ "LC_ADDRESS", &c->locale[VARIABLE_LC_ADDRESS],
+ "LC_TELEPHONE", &c->locale[VARIABLE_LC_TELEPHONE],
+ "LC_MEASUREMENT", &c->locale[VARIABLE_LC_MEASUREMENT],
+ "LC_IDENTIFICATION", &c->locale[VARIABLE_LC_IDENTIFICATION]);
+ if (r < 0)
+ return r;
+ } else {
+ int p;
+
+ c->locale_mtime = USEC_INFINITY;
+ context_free_locale(c);
+
+ /* Fill in what we got passed from systemd. */
+ for (p = 0; p < _VARIABLE_LC_MAX; p++) {
+ const char *name;
+
+ name = locale_variable_to_string(p);
+ assert(name);
+
+ r = free_and_strdup(&c->locale[p], empty_to_null(getenv(name)));
+ if (r < 0)
+ return r;
+ }
+ }
+
+ locale_simplify(c->locale);
+ return 0;
+}
+
+int vconsole_read_data(Context *c, sd_bus_message *m) {
+ struct stat st;
+ usec_t t;
+ int r;
+
+ /* Do not try to re-read the file within single bus operation. */
+ if (m) {
+ if (m == c->vc_cache)
+ return 0;
+
+ sd_bus_message_unref(c->vc_cache);
+ c->vc_cache = sd_bus_message_ref(m);
+ }
+
+ if (stat("/etc/vconsole.conf", &st) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ c->vc_mtime = USEC_INFINITY;
+ context_free_vconsole(c);
+ return 0;
+ }
+
+ /* If mtime is not changed, then we do not need to re-read */
+ t = timespec_load(&st.st_mtim);
+ if (c->vc_mtime != USEC_INFINITY && t == c->vc_mtime)
+ return 0;
+
+ c->vc_mtime = t;
+ context_free_vconsole(c);
+
+ r = parse_env_file(NULL, "/etc/vconsole.conf",
+ "KEYMAP", &c->vc_keymap,
+ "KEYMAP_TOGGLE", &c->vc_keymap_toggle);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int x11_read_data(Context *c, sd_bus_message *m) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool in_section = false;
+ struct stat st;
+ usec_t t;
+ int r;
+
+ /* Do not try to re-read the file within single bus operation. */
+ if (m) {
+ if (m == c->x11_cache)
+ return 0;
+
+ sd_bus_message_unref(c->x11_cache);
+ c->x11_cache = sd_bus_message_ref(m);
+ }
+
+ if (stat("/etc/X11/xorg.conf.d/00-keyboard.conf", &st) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ c->x11_mtime = USEC_INFINITY;
+ context_free_x11(c);
+ return 0;
+ }
+
+ /* If mtime is not changed, then we do not need to re-read */
+ t = timespec_load(&st.st_mtim);
+ if (c->x11_mtime != USEC_INFINITY && t == c->x11_mtime)
+ return 0;
+
+ c->x11_mtime = t;
+ context_free_x11(c);
+
+ f = fopen("/etc/X11/xorg.conf.d/00-keyboard.conf", "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+ if (IN_SET(l[0], 0, '#'))
+ continue;
+
+ if (in_section && first_word(l, "Option")) {
+ _cleanup_strv_free_ char **a = NULL;
+
+ r = strv_split_extract(&a, l, WHITESPACE, EXTRACT_QUOTES);
+ if (r < 0)
+ return r;
+
+ if (strv_length(a) == 3) {
+ char **p = NULL;
+
+ if (streq(a[1], "XkbLayout"))
+ p = &c->x11_layout;
+ else if (streq(a[1], "XkbModel"))
+ p = &c->x11_model;
+ else if (streq(a[1], "XkbVariant"))
+ p = &c->x11_variant;
+ else if (streq(a[1], "XkbOptions"))
+ p = &c->x11_options;
+
+ if (p) {
+ free_and_replace(*p, a[2]);
+ }
+ }
+
+ } else if (!in_section && first_word(l, "Section")) {
+ _cleanup_strv_free_ char **a = NULL;
+
+ r = strv_split_extract(&a, l, WHITESPACE, EXTRACT_QUOTES);
+ if (r < 0)
+ return -ENOMEM;
+
+ if (strv_length(a) == 2 && streq(a[1], "InputClass"))
+ in_section = true;
+
+ } else if (in_section && first_word(l, "EndSection"))
+ in_section = false;
+ }
+
+ return 0;
+}
+
+int locale_write_data(Context *c, char ***settings) {
+ _cleanup_strv_free_ char **l = NULL;
+ struct stat st;
+ int r, p;
+
+ /* Set values will be returned as strv in *settings on success. */
+
+ for (p = 0; p < _VARIABLE_LC_MAX; p++) {
+ _cleanup_free_ char *t = NULL;
+ char **u;
+ const char *name;
+
+ name = locale_variable_to_string(p);
+ assert(name);
+
+ if (isempty(c->locale[p]))
+ continue;
+
+ if (asprintf(&t, "%s=%s", name, c->locale[p]) < 0)
+ return -ENOMEM;
+
+ u = strv_env_set(l, t);
+ if (!u)
+ return -ENOMEM;
+
+ strv_free_and_replace(l, u);
+ }
+
+ if (strv_isempty(l)) {
+ if (unlink("/etc/locale.conf") < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ c->locale_mtime = USEC_INFINITY;
+ return 0;
+ }
+
+ r = write_env_file_label("/etc/locale.conf", l);
+ if (r < 0)
+ return r;
+
+ *settings = TAKE_PTR(l);
+
+ if (stat("/etc/locale.conf", &st) >= 0)
+ c->locale_mtime = timespec_load(&st.st_mtim);
+
+ return 0;
+}
+
+int vconsole_write_data(Context *c) {
+ _cleanup_strv_free_ char **l = NULL;
+ struct stat st;
+ int r;
+
+ r = load_env_file(NULL, "/etc/vconsole.conf", &l);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ if (isempty(c->vc_keymap))
+ l = strv_env_unset(l, "KEYMAP");
+ else {
+ _cleanup_free_ char *s = NULL;
+ char **u;
+
+ s = strappend("KEYMAP=", c->vc_keymap);
+ if (!s)
+ return -ENOMEM;
+
+ u = strv_env_set(l, s);
+ if (!u)
+ return -ENOMEM;
+
+ strv_free_and_replace(l, u);
+ }
+
+ if (isempty(c->vc_keymap_toggle))
+ l = strv_env_unset(l, "KEYMAP_TOGGLE");
+ else {
+ _cleanup_free_ char *s = NULL;
+ char **u;
+
+ s = strappend("KEYMAP_TOGGLE=", c->vc_keymap_toggle);
+ if (!s)
+ return -ENOMEM;
+
+ u = strv_env_set(l, s);
+ if (!u)
+ return -ENOMEM;
+
+ strv_free_and_replace(l, u);
+ }
+
+ if (strv_isempty(l)) {
+ if (unlink("/etc/vconsole.conf") < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ c->vc_mtime = USEC_INFINITY;
+ return 0;
+ }
+
+ r = write_env_file_label("/etc/vconsole.conf", l);
+ if (r < 0)
+ return r;
+
+ if (stat("/etc/vconsole.conf", &st) >= 0)
+ c->vc_mtime = timespec_load(&st.st_mtim);
+
+ return 0;
+}
+
+int x11_write_data(Context *c) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *temp_path = NULL;
+ struct stat st;
+ int r;
+
+ if (isempty(c->x11_layout) &&
+ isempty(c->x11_model) &&
+ isempty(c->x11_variant) &&
+ isempty(c->x11_options)) {
+
+ if (unlink("/etc/X11/xorg.conf.d/00-keyboard.conf") < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ c->vc_mtime = USEC_INFINITY;
+ return 0;
+ }
+
+ mkdir_p_label("/etc/X11/xorg.conf.d", 0755);
+
+ r = fopen_temporary("/etc/X11/xorg.conf.d/00-keyboard.conf", &f, &temp_path);
+ if (r < 0)
+ return r;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod(fileno(f), 0644);
+
+ fputs("# Written by systemd-localed(8), read by systemd-localed and Xorg. It's\n"
+ "# probably wise not to edit this file manually. Use localectl(1) to\n"
+ "# instruct systemd-localed to update it.\n"
+ "Section \"InputClass\"\n"
+ " Identifier \"system-keyboard\"\n"
+ " MatchIsKeyboard \"on\"\n", f);
+
+ if (!isempty(c->x11_layout))
+ fprintf(f, " Option \"XkbLayout\" \"%s\"\n", c->x11_layout);
+
+ if (!isempty(c->x11_model))
+ fprintf(f, " Option \"XkbModel\" \"%s\"\n", c->x11_model);
+
+ if (!isempty(c->x11_variant))
+ fprintf(f, " Option \"XkbVariant\" \"%s\"\n", c->x11_variant);
+
+ if (!isempty(c->x11_options))
+ fprintf(f, " Option \"XkbOptions\" \"%s\"\n", c->x11_options);
+
+ fputs("EndSection\n", f);
+
+ r = fflush_sync_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, "/etc/X11/xorg.conf.d/00-keyboard.conf") < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (stat("/etc/X11/xorg.conf.d/00-keyboard.conf", &st) >= 0)
+ c->x11_mtime = timespec_load(&st.st_mtim);
+
+ return 0;
+
+fail:
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return r;
+}
+
+static int read_next_mapping(const char* filename,
+ unsigned min_fields, unsigned max_fields,
+ FILE *f, unsigned *n, char ***a) {
+ assert(f);
+ assert(n);
+ assert(a);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ size_t length;
+ char *l, **b;
+ int r;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ (*n)++;
+
+ l = strstrip(line);
+ if (IN_SET(l[0], 0, '#'))
+ continue;
+
+ r = strv_split_extract(&b, l, WHITESPACE, EXTRACT_QUOTES);
+ if (r < 0)
+ return r;
+
+ length = strv_length(b);
+ if (length < min_fields || length > max_fields) {
+ log_error("Invalid line %s:%u, ignoring.", filename, *n);
+ strv_free(b);
+ continue;
+
+ }
+
+ *a = b;
+ return 1;
+ }
+
+ return 0;
+}
+
+int vconsole_convert_to_x11(Context *c) {
+ const char *map;
+ int modified = -1;
+
+ map = systemd_kbd_model_map();
+
+ if (isempty(c->vc_keymap)) {
+ modified =
+ !isempty(c->x11_layout) ||
+ !isempty(c->x11_model) ||
+ !isempty(c->x11_variant) ||
+ !isempty(c->x11_options);
+
+ context_free_x11(c);
+ } else {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned n = 0;
+
+ f = fopen(map, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_strv_free_ char **a = NULL;
+ int r;
+
+ r = read_next_mapping(map, 5, UINT_MAX, f, &n, &a);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!streq(c->vc_keymap, a[0]))
+ continue;
+
+ if (!streq_ptr(c->x11_layout, strnulldash(a[1])) ||
+ !streq_ptr(c->x11_model, strnulldash(a[2])) ||
+ !streq_ptr(c->x11_variant, strnulldash(a[3])) ||
+ !streq_ptr(c->x11_options, strnulldash(a[4]))) {
+
+ if (free_and_strdup(&c->x11_layout, strnulldash(a[1])) < 0 ||
+ free_and_strdup(&c->x11_model, strnulldash(a[2])) < 0 ||
+ free_and_strdup(&c->x11_variant, strnulldash(a[3])) < 0 ||
+ free_and_strdup(&c->x11_options, strnulldash(a[4])) < 0)
+ return -ENOMEM;
+
+ modified = true;
+ }
+
+ break;
+ }
+ }
+
+ if (modified > 0)
+ log_info("Changing X11 keyboard layout to '%s' model '%s' variant '%s' options '%s'",
+ strempty(c->x11_layout),
+ strempty(c->x11_model),
+ strempty(c->x11_variant),
+ strempty(c->x11_options));
+ else if (modified < 0)
+ log_notice("X11 keyboard layout was not modified: no conversion found for \"%s\".",
+ c->vc_keymap);
+ else
+ log_debug("X11 keyboard layout did not need to be modified.");
+
+ return modified > 0;
+}
+
+int find_converted_keymap(const char *x11_layout, const char *x11_variant, char **new_keymap) {
+ const char *dir;
+ _cleanup_free_ char *n;
+
+ if (x11_variant)
+ n = strjoin(x11_layout, "-", x11_variant);
+ else
+ n = strdup(x11_layout);
+ if (!n)
+ return -ENOMEM;
+
+ NULSTR_FOREACH(dir, KBD_KEYMAP_DIRS) {
+ _cleanup_free_ char *p = NULL, *pz = NULL;
+ bool uncompressed;
+
+ p = strjoin(dir, "xkb/", n, ".map");
+ pz = strjoin(dir, "xkb/", n, ".map.gz");
+ if (!p || !pz)
+ return -ENOMEM;
+
+ uncompressed = access(p, F_OK) == 0;
+ if (uncompressed || access(pz, F_OK) == 0) {
+ log_debug("Found converted keymap %s at %s",
+ n, uncompressed ? p : pz);
+
+ *new_keymap = TAKE_PTR(n);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int find_legacy_keymap(Context *c, char **ret) {
+ const char *map;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *new_keymap = NULL;
+ unsigned n = 0;
+ unsigned best_matching = 0;
+ int r;
+
+ assert(!isempty(c->x11_layout));
+
+ map = systemd_kbd_model_map();
+
+ f = fopen(map, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_strv_free_ char **a = NULL;
+ unsigned matching = 0;
+
+ r = read_next_mapping(map, 5, UINT_MAX, f, &n, &a);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ /* Determine how well matching this entry is */
+ if (streq(c->x11_layout, a[1]))
+ /* If we got an exact match, this is best */
+ matching = 10;
+ else {
+ /* We have multiple X layouts, look for an
+ * entry that matches our key with everything
+ * but the first layout stripped off. */
+ if (startswith_comma(c->x11_layout, a[1]))
+ matching = 5;
+ else {
+ char *x;
+
+ /* If that didn't work, strip off the
+ * other layouts from the entry, too */
+ x = strndupa(a[1], strcspn(a[1], ","));
+ if (startswith_comma(c->x11_layout, x))
+ matching = 1;
+ }
+ }
+
+ if (matching > 0) {
+ if (isempty(c->x11_model) || streq_ptr(c->x11_model, a[2])) {
+ matching++;
+
+ if (streq_ptr(c->x11_variant, a[3])) {
+ matching++;
+
+ if (streq_ptr(c->x11_options, a[4]))
+ matching++;
+ }
+ }
+ }
+
+ /* The best matching entry so far, then let's save that */
+ if (matching >= MAX(best_matching, 1u)) {
+ log_debug("Found legacy keymap %s with score %u",
+ a[0], matching);
+
+ if (matching > best_matching) {
+ best_matching = matching;
+
+ r = free_and_strdup(&new_keymap, a[0]);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ if (best_matching < 10 && c->x11_layout) {
+ /* The best match is only the first part of the X11
+ * keymap. Check if we have a converted map which
+ * matches just the first layout.
+ */
+ char *l, *v = NULL, *converted;
+
+ l = strndupa(c->x11_layout, strcspn(c->x11_layout, ","));
+ if (c->x11_variant)
+ v = strndupa(c->x11_variant, strcspn(c->x11_variant, ","));
+ r = find_converted_keymap(l, v, &converted);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ free_and_replace(new_keymap, converted);
+ }
+
+ *ret = TAKE_PTR(new_keymap);
+ return (bool) *ret;
+}
+
+int find_language_fallback(const char *lang, char **language) {
+ const char *map;
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned n = 0;
+
+ assert(lang);
+ assert(language);
+
+ map = systemd_language_fallback_map();
+
+ f = fopen(map, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_strv_free_ char **a = NULL;
+ int r;
+
+ r = read_next_mapping(map, 2, 2, f, &n, &a);
+ if (r <= 0)
+ return r;
+
+ if (streq(lang, a[0])) {
+ assert(strv_length(a) == 2);
+ *language = TAKE_PTR(a[1]);
+ return 1;
+ }
+ }
+
+ assert_not_reached("should not be here");
+}
+
+int x11_convert_to_vconsole(Context *c) {
+ bool modified = false;
+
+ if (isempty(c->x11_layout)) {
+ modified =
+ !isempty(c->vc_keymap) ||
+ !isempty(c->vc_keymap_toggle);
+
+ context_free_vconsole(c);
+ } else {
+ _cleanup_free_ char *new_keymap = NULL;
+ int r;
+
+ r = find_converted_keymap(c->x11_layout, c->x11_variant, &new_keymap);
+ if (r < 0)
+ return r;
+ else if (r == 0) {
+ r = find_legacy_keymap(c, &new_keymap);
+ if (r < 0)
+ return r;
+ }
+ if (r == 0)
+ /* We search for layout-variant match first, but then we also look
+ * for anything which matches just the layout. So it's accurate to say
+ * that we couldn't find anything which matches the layout. */
+ log_notice("No conversion to virtual console map found for \"%s\".",
+ c->x11_layout);
+
+ if (!streq_ptr(c->vc_keymap, new_keymap)) {
+ free_and_replace(c->vc_keymap, new_keymap);
+ c->vc_keymap_toggle = mfree(c->vc_keymap_toggle);
+ modified = true;
+ }
+ }
+
+ if (modified)
+ log_info("Changing virtual console keymap to '%s' toggle '%s'",
+ strempty(c->vc_keymap), strempty(c->vc_keymap_toggle));
+ else
+ log_debug("Virtual console keymap was not modified.");
+
+ return modified;
+}
diff --git a/src/locale/keymap-util.h b/src/locale/keymap-util.h
new file mode 100644
index 0000000..6eced84
--- /dev/null
+++ b/src/locale/keymap-util.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "hashmap.h"
+#include "locale-util.h"
+#include "time-util.h"
+
+typedef struct Context {
+ sd_bus_message *locale_cache;
+ usec_t locale_mtime;
+ char *locale[_VARIABLE_LC_MAX];
+
+ sd_bus_message *x11_cache;
+ usec_t x11_mtime;
+ char *x11_layout;
+ char *x11_model;
+ char *x11_variant;
+ char *x11_options;
+
+ sd_bus_message *vc_cache;
+ usec_t vc_mtime;
+ char *vc_keymap;
+ char *vc_keymap_toggle;
+
+ Hashmap *polkit_registry;
+} Context;
+
+int find_converted_keymap(const char *x11_layout, const char *x11_variant, char **new_keymap);
+int find_legacy_keymap(Context *c, char **new_keymap);
+int find_language_fallback(const char *lang, char **language);
+
+int locale_read_data(Context *c, sd_bus_message *m);
+int vconsole_read_data(Context *c, sd_bus_message *m);
+int x11_read_data(Context *c, sd_bus_message *m);
+
+void context_clear(Context *c);
+int vconsole_convert_to_x11(Context *c);
+int vconsole_write_data(Context *c);
+int x11_convert_to_vconsole(Context *c);
+int x11_write_data(Context *c);
+void locale_simplify(char *locale[_VARIABLE_LC_MAX]);
+int locale_write_data(Context *c, char ***settings);
diff --git a/src/locale/language-fallback-map b/src/locale/language-fallback-map
new file mode 100644
index 0000000..d0b02a6
--- /dev/null
+++ b/src/locale/language-fallback-map
@@ -0,0 +1,13 @@
+csb_PL csb:pl
+en_AU en_AU:en_GB
+en_IE en_IE:en_GB
+en_NZ en_NZ:en_GB
+en_ZA en_ZA:en_GB
+fr_BE fr_BE:fr_FR
+fr_CA fr_CA:fr_FR
+fr_CH fr_CH:fr_FR
+fr_LU fr_LU:fr_FR
+it_CH it_CH:it_IT
+mai_IN mai:hi
+nds_DE nds:de
+szl_PL szl:pl
diff --git a/src/locale/localectl.c b/src/locale/localectl.c
new file mode 100644
index 0000000..69f5667
--- /dev/null
+++ b/src/locale/localectl.c
@@ -0,0 +1,527 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ftw.h>
+#include <getopt.h>
+#include <locale.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sd-bus.h"
+
+#include "bus-error.h"
+#include "bus-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "locale-util.h"
+#include "main-func.h"
+#include "pager.h"
+#include "pretty-print.h"
+#include "proc-cmdline.h"
+#include "set.h"
+#include "spawn-polkit-agent.h"
+#include "strv.h"
+#include "util.h"
+#include "verbs.h"
+#include "virt.h"
+
+static PagerFlags arg_pager_flags = 0;
+static bool arg_ask_password = true;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static bool arg_convert = true;
+
+typedef struct StatusInfo {
+ char **locale;
+ const char *vconsole_keymap;
+ const char *vconsole_keymap_toggle;
+ const char *x11_layout;
+ const char *x11_model;
+ const char *x11_variant;
+ const char *x11_options;
+} StatusInfo;
+
+static void status_info_clear(StatusInfo *info) {
+ if (info) {
+ strv_free(info->locale);
+ zero(*info);
+ }
+}
+
+static void print_overridden_variables(void) {
+ _cleanup_(locale_variables_freep) char *variables[_VARIABLE_LC_MAX] = {};
+ bool print_warning = true;
+ LocaleVariable j;
+ int r;
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return;
+
+ r = proc_cmdline_get_key_many(
+ PROC_CMDLINE_STRIP_RD_PREFIX,
+ "locale.LANG", &variables[VARIABLE_LANG],
+ "locale.LANGUAGE", &variables[VARIABLE_LANGUAGE],
+ "locale.LC_CTYPE", &variables[VARIABLE_LC_CTYPE],
+ "locale.LC_NUMERIC", &variables[VARIABLE_LC_NUMERIC],
+ "locale.LC_TIME", &variables[VARIABLE_LC_TIME],
+ "locale.LC_COLLATE", &variables[VARIABLE_LC_COLLATE],
+ "locale.LC_MONETARY", &variables[VARIABLE_LC_MONETARY],
+ "locale.LC_MESSAGES", &variables[VARIABLE_LC_MESSAGES],
+ "locale.LC_PAPER", &variables[VARIABLE_LC_PAPER],
+ "locale.LC_NAME", &variables[VARIABLE_LC_NAME],
+ "locale.LC_ADDRESS", &variables[VARIABLE_LC_ADDRESS],
+ "locale.LC_TELEPHONE", &variables[VARIABLE_LC_TELEPHONE],
+ "locale.LC_MEASUREMENT", &variables[VARIABLE_LC_MEASUREMENT],
+ "locale.LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION]);
+ if (r < 0 && r != -ENOENT) {
+ log_warning_errno(r, "Failed to read /proc/cmdline: %m");
+ return;
+ }
+
+ for (j = 0; j < _VARIABLE_LC_MAX; j++)
+ if (variables[j]) {
+ if (print_warning) {
+ log_warning("Warning: Settings on kernel command line override system locale settings in /etc/locale.conf.\n"
+ " Command Line: %s=%s", locale_variable_to_string(j), variables[j]);
+
+ print_warning = false;
+ } else
+ log_warning(" %s=%s", locale_variable_to_string(j), variables[j]);
+ }
+}
+
+static void print_status_info(StatusInfo *i) {
+ assert(i);
+
+ if (strv_isempty(i->locale))
+ puts(" System Locale: n/a");
+ else {
+ char **j;
+
+ printf(" System Locale: %s\n", i->locale[0]);
+ STRV_FOREACH(j, i->locale + 1)
+ printf(" %s\n", *j);
+ }
+
+ printf(" VC Keymap: %s\n", strna(i->vconsole_keymap));
+ if (!isempty(i->vconsole_keymap_toggle))
+ printf("VC Toggle Keymap: %s\n", i->vconsole_keymap_toggle);
+
+ printf(" X11 Layout: %s\n", strna(i->x11_layout));
+ if (!isempty(i->x11_model))
+ printf(" X11 Model: %s\n", i->x11_model);
+ if (!isempty(i->x11_variant))
+ printf(" X11 Variant: %s\n", i->x11_variant);
+ if (!isempty(i->x11_options))
+ printf(" X11 Options: %s\n", i->x11_options);
+}
+
+static int show_status(int argc, char **argv, void *userdata) {
+ _cleanup_(status_info_clear) StatusInfo info = {};
+ static const struct bus_properties_map map[] = {
+ { "VConsoleKeymap", "s", NULL, offsetof(StatusInfo, vconsole_keymap) },
+ { "VConsoleKeymapToggle", "s", NULL, offsetof(StatusInfo, vconsole_keymap_toggle) },
+ { "X11Layout", "s", NULL, offsetof(StatusInfo, x11_layout) },
+ { "X11Model", "s", NULL, offsetof(StatusInfo, x11_model) },
+ { "X11Variant", "s", NULL, offsetof(StatusInfo, x11_variant) },
+ { "X11Options", "s", NULL, offsetof(StatusInfo, x11_options) },
+ { "Locale", "as", NULL, offsetof(StatusInfo, locale) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.locale1",
+ "/org/freedesktop/locale1",
+ map,
+ 0,
+ &error,
+ &m,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ print_overridden_variables();
+ print_status_info(&info);
+
+ return r;
+}
+
+static int set_locale(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.locale1",
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "SetLocale");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, argv + 1);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "b", arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, -r));
+
+ return 0;
+}
+
+static int list_locales(int argc, char **argv, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+ r = get_locales(&l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read list of locales: %m");
+
+ (void) pager_open(arg_pager_flags);
+ strv_print(l);
+
+ return 0;
+}
+
+static int set_vconsole_keymap(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *map, *toggle_map;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ map = argv[1];
+ toggle_map = argc > 2 ? argv[2] : "";
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.locale1",
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "SetVConsoleKeyboard",
+ &error,
+ NULL,
+ "ssbb", map, toggle_map, arg_convert, arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set keymap: %s", bus_error_message(&error, -r));
+
+ return 0;
+}
+
+static int list_vconsole_keymaps(int argc, char **argv, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+ r = get_keymaps(&l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read list of keymaps: %m");
+
+ (void) pager_open(arg_pager_flags);
+
+ strv_print(l);
+
+ return 0;
+}
+
+static int set_x11_keymap(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *layout, *model, *variant, *options;
+ sd_bus *bus = userdata;
+ int r;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ layout = argv[1];
+ model = argc > 2 ? argv[2] : "";
+ variant = argc > 3 ? argv[3] : "";
+ options = argc > 4 ? argv[4] : "";
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.locale1",
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "SetX11Keyboard",
+ &error,
+ NULL,
+ "ssssbb", layout, model, variant, options,
+ arg_convert, arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set keymap: %s", bus_error_message(&error, -r));
+
+ return 0;
+}
+
+static int list_x11_keymaps(int argc, char **argv, void *userdata) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **list = NULL;
+ enum {
+ NONE,
+ MODELS,
+ LAYOUTS,
+ VARIANTS,
+ OPTIONS
+ } state = NONE, look_for;
+ int r;
+
+ f = fopen("/usr/share/X11/xkb/rules/base.lst", "re");
+ if (!f)
+ return log_error_errno(errno, "Failed to open keyboard mapping list. %m");
+
+ if (streq(argv[0], "list-x11-keymap-models"))
+ look_for = MODELS;
+ else if (streq(argv[0], "list-x11-keymap-layouts"))
+ look_for = LAYOUTS;
+ else if (streq(argv[0], "list-x11-keymap-variants"))
+ look_for = VARIANTS;
+ else if (streq(argv[0], "list-x11-keymap-options"))
+ look_for = OPTIONS;
+ else
+ assert_not_reached("Wrong parameter");
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l, *w;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read keyboard mapping list: %m");
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+
+ if (isempty(l))
+ continue;
+
+ if (l[0] == '!') {
+ if (startswith(l, "! model"))
+ state = MODELS;
+ else if (startswith(l, "! layout"))
+ state = LAYOUTS;
+ else if (startswith(l, "! variant"))
+ state = VARIANTS;
+ else if (startswith(l, "! option"))
+ state = OPTIONS;
+ else
+ state = NONE;
+
+ continue;
+ }
+
+ if (state != look_for)
+ continue;
+
+ w = l + strcspn(l, WHITESPACE);
+
+ if (argc > 1) {
+ char *e;
+
+ if (*w == 0)
+ continue;
+
+ *w = 0;
+ w++;
+ w += strspn(w, WHITESPACE);
+
+ e = strchr(w, ':');
+ if (!e)
+ continue;
+
+ *e = 0;
+
+ if (!streq(w, argv[1]))
+ continue;
+ } else
+ *w = 0;
+
+ r = strv_extend(&list, l);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (strv_isempty(list))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Couldn't find any entries.");
+
+ strv_sort(list);
+ strv_uniq(list);
+
+ (void) pager_open(arg_pager_flags);
+
+ strv_print(list);
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("localectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "Query or change system locale and keyboard settings.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-ask-password Do not prompt for password\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --no-convert Don't convert keyboard mappings\n\n"
+ "Commands:\n"
+ " status Show current locale settings\n"
+ " set-locale LOCALE... Set system locale\n"
+ " list-locales Show known locales\n"
+ " set-keymap MAP [MAP] Set console and X11 keyboard mappings\n"
+ " list-keymaps Show known virtual console keyboard mappings\n"
+ " set-x11-keymap LAYOUT [MODEL [VARIANT [OPTIONS]]]\n"
+ " Set X11 and console keyboard mappings\n"
+ " list-x11-keymap-models Show known X11 keyboard mapping models\n"
+ " list-x11-keymap-layouts Show known X11 keyboard mapping layouts\n"
+ " list-x11-keymap-variants [LAYOUT]\n"
+ " Show known X11 keyboard mapping variants\n"
+ " list-x11-keymap-options Show known X11 keyboard mapping options\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int verb_help(int argc, char **argv, void *userdata) {
+ return help();
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_CONVERT,
+ ARG_NO_ASK_PASSWORD
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "no-convert", no_argument, NULL, ARG_NO_CONVERT },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hH:M:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_CONVERT:
+ arg_convert = false;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int localectl_main(sd_bus *bus, int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "status", VERB_ANY, 1, VERB_DEFAULT, show_status },
+ { "set-locale", 2, VERB_ANY, 0, set_locale },
+ { "list-locales", VERB_ANY, 1, 0, list_locales },
+ { "set-keymap", 2, 3, 0, set_vconsole_keymap },
+ { "list-keymaps", VERB_ANY, 1, 0, list_vconsole_keymaps },
+ { "set-x11-keymap", 2, 5, 0, set_x11_keymap },
+ { "list-x11-keymap-models", VERB_ANY, 1, 0, list_x11_keymaps },
+ { "list-x11-keymap-layouts", VERB_ANY, 1, 0, list_x11_keymaps },
+ { "list-x11-keymap-variants", VERB_ANY, 2, 0, list_x11_keymaps },
+ { "list-x11-keymap-options", VERB_ANY, 1, 0, list_x11_keymaps },
+ { "help", VERB_ANY, VERB_ANY, 0, verb_help }, /* Not documented, but supported since it is created. */
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, bus);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, &bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ return localectl_main(bus, argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/locale/localed.c b/src/locale/localed.c
new file mode 100644
index 0000000..f851d35
--- /dev/null
+++ b/src/locale/localed.c
@@ -0,0 +1,758 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#if HAVE_XKBCOMMON
+#include <xkbcommon/xkbcommon.h>
+#include <dlfcn.h>
+#endif
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "def.h"
+#include "keymap-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "main-func.h"
+#include "missing_capability.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int locale_update_system_manager(Context *c, sd_bus *bus) {
+ _cleanup_free_ char **l_unset = NULL;
+ _cleanup_strv_free_ char **l_set = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ sd_bus_error error = SD_BUS_ERROR_NULL;
+ size_t c_set, c_unset;
+ LocaleVariable p;
+ int r;
+
+ assert(bus);
+
+ l_unset = new0(char*, _VARIABLE_LC_MAX);
+ if (!l_unset)
+ return log_oom();
+
+ l_set = new0(char*, _VARIABLE_LC_MAX);
+ if (!l_set)
+ return log_oom();
+
+ for (p = 0, c_set = 0, c_unset = 0; p < _VARIABLE_LC_MAX; p++) {
+ const char *name;
+
+ name = locale_variable_to_string(p);
+ assert(name);
+
+ if (isempty(c->locale[p]))
+ l_unset[c_set++] = (char*) name;
+ else {
+ char *s;
+
+ s = strjoin(name, "=", c->locale[p]);
+ if (!s)
+ return log_oom();
+
+ l_set[c_unset++] = s;
+ }
+ }
+
+ assert(c_set + c_unset == _VARIABLE_LC_MAX);
+ r = sd_bus_message_new_method_call(bus, &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "UnsetAndSetEnvironment");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, l_unset);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, l_set);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update the manager environment: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int vconsole_reload(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "RestartUnit",
+ &error,
+ NULL,
+ "ss", "systemd-vconsole-setup.service", "replace");
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int vconsole_convert_to_x11_and_emit(Context *c, sd_bus_message *m) {
+ int r;
+
+ assert(m);
+
+ r = x11_read_data(c, m);
+ if (r < 0)
+ return r;
+
+ r = vconsole_convert_to_x11(c);
+ if (r <= 0)
+ return r;
+
+ /* modified */
+ r = x11_write_data(c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write X11 keyboard layout: %m");
+
+ sd_bus_emit_properties_changed(sd_bus_message_get_bus(m),
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "X11Layout", "X11Model", "X11Variant", "X11Options", NULL);
+
+ return 1;
+}
+
+static int x11_convert_to_vconsole_and_emit(Context *c, sd_bus_message *m) {
+ int r;
+
+ assert(m);
+
+ r = vconsole_read_data(c, m);
+ if (r < 0)
+ return r;
+
+ r = x11_convert_to_vconsole(c);
+ if (r <= 0)
+ return r;
+
+ /* modified */
+ r = vconsole_write_data(c);
+ if (r < 0)
+ log_error_errno(r, "Failed to save virtual console keymap: %m");
+
+ sd_bus_emit_properties_changed(sd_bus_message_get_bus(m),
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "VConsoleKeymap", "VConsoleKeymapToggle", NULL);
+
+ return vconsole_reload(sd_bus_message_get_bus(m));
+}
+
+static int property_get_locale(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ _cleanup_strv_free_ char **l = NULL;
+ int p, q, r;
+
+ r = locale_read_data(c, reply);
+ if (r < 0)
+ return r;
+
+ l = new0(char*, _VARIABLE_LC_MAX+1);
+ if (!l)
+ return -ENOMEM;
+
+ for (p = 0, q = 0; p < _VARIABLE_LC_MAX; p++) {
+ char *t;
+ const char *name;
+
+ name = locale_variable_to_string(p);
+ assert(name);
+
+ if (isempty(c->locale[p]))
+ continue;
+
+ if (asprintf(&t, "%s=%s", name, c->locale[p]) < 0)
+ return -ENOMEM;
+
+ l[q++] = t;
+ }
+
+ return sd_bus_message_append_strv(reply, l);
+}
+
+static int property_get_vconsole(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ int r;
+
+ r = vconsole_read_data(c, reply);
+ if (r < 0)
+ return r;
+
+ if (streq(property, "VConsoleKeymap"))
+ return sd_bus_message_append_basic(reply, 's', c->vc_keymap);
+ else if (streq(property, "VConsoleKeymapToggle"))
+ return sd_bus_message_append_basic(reply, 's', c->vc_keymap_toggle);
+
+ return -EINVAL;
+}
+
+static int property_get_xkb(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ int r;
+
+ r = x11_read_data(c, reply);
+ if (r < 0)
+ return r;
+
+ if (streq(property, "X11Layout"))
+ return sd_bus_message_append_basic(reply, 's', c->x11_layout);
+ else if (streq(property, "X11Model"))
+ return sd_bus_message_append_basic(reply, 's', c->x11_model);
+ else if (streq(property, "X11Variant"))
+ return sd_bus_message_append_basic(reply, 's', c->x11_variant);
+ else if (streq(property, "X11Options"))
+ return sd_bus_message_append_basic(reply, 's', c->x11_options);
+
+ return -EINVAL;
+}
+
+static int method_set_locale(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ _cleanup_(locale_variables_freep) char *new_locale[_VARIABLE_LC_MAX] = {};
+ _cleanup_strv_free_ char **settings = NULL, **l = NULL;
+ Context *c = userdata;
+ bool modified = false;
+ int interactive, p, r;
+ char **i;
+
+ assert(m);
+ assert(c);
+
+ r = bus_message_read_strv_extend(m, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_basic(m, 'b', &interactive);
+ if (r < 0)
+ return r;
+
+ /* If single locale without variable name is provided, then we assume it is LANG=. */
+ if (strv_length(l) == 1 && !strchr(*l, '=')) {
+ if (!locale_is_valid(*l))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid Locale data.");
+
+ new_locale[VARIABLE_LANG] = strdup(*l);
+ if (!new_locale[VARIABLE_LANG])
+ return -ENOMEM;
+
+ l = strv_free(l);
+ }
+
+ /* Check whether a variable is valid */
+ STRV_FOREACH(i, l) {
+ bool valid = false;
+
+ for (p = 0; p < _VARIABLE_LC_MAX; p++) {
+ size_t k;
+ const char *name;
+
+ name = locale_variable_to_string(p);
+ assert(name);
+
+ k = strlen(name);
+ if (startswith(*i, name) &&
+ (*i)[k] == '=' &&
+ locale_is_valid((*i) + k + 1)) {
+ valid = true;
+
+ new_locale[p] = strdup((*i) + k + 1);
+ if (!new_locale[p])
+ return -ENOMEM;
+
+ break;
+ }
+ }
+
+ if (!valid)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid Locale data.");
+ }
+
+ /* If LANG was specified, but not LANGUAGE, check if we should
+ * set it based on the language fallback table. */
+ if (!isempty(new_locale[VARIABLE_LANG]) &&
+ isempty(new_locale[VARIABLE_LANGUAGE])) {
+ _cleanup_free_ char *language = NULL;
+
+ (void) find_language_fallback(new_locale[VARIABLE_LANG], &language);
+ if (language) {
+ log_debug("Converted LANG=%s to LANGUAGE=%s", new_locale[VARIABLE_LANG], language);
+ free_and_replace(new_locale[VARIABLE_LANGUAGE], language);
+ }
+ }
+
+ r = locale_read_data(c, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read locale data: %m");
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Failed to read locale data");
+ }
+
+ /* Merge with the current settings */
+ for (p = 0; p < _VARIABLE_LC_MAX; p++)
+ if (!isempty(c->locale[p]) && isempty(new_locale[p])) {
+ new_locale[p] = strdup(c->locale[p]);
+ if (!new_locale[p])
+ return -ENOMEM;
+ }
+
+ locale_simplify(new_locale);
+
+ for (p = 0; p < _VARIABLE_LC_MAX; p++)
+ if (!streq_ptr(c->locale[p], new_locale[p])) {
+ modified = true;
+ break;
+ }
+
+ if (!modified) {
+ log_debug("Locale settings were not modified.");
+ return sd_bus_reply_method_return(m, NULL);
+ }
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.locale1.set-locale",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ for (p = 0; p < _VARIABLE_LC_MAX; p++)
+ free_and_replace(c->locale[p], new_locale[p]);
+
+ r = locale_write_data(c, &settings);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set locale: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set locale: %m");
+ }
+
+ (void) locale_update_system_manager(c, sd_bus_message_get_bus(m));
+
+ if (settings) {
+ _cleanup_free_ char *line;
+
+ line = strv_join(settings, ", ");
+ log_info("Changed locale to %s.", strnull(line));
+ } else
+ log_info("Changed locale to unset.");
+
+ (void) sd_bus_emit_properties_changed(
+ sd_bus_message_get_bus(m),
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "Locale", NULL);
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_set_vc_keyboard(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ Context *c = userdata;
+ const char *keymap, *keymap_toggle;
+ int convert, interactive, r;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read(m, "ssbb", &keymap, &keymap_toggle, &convert, &interactive);
+ if (r < 0)
+ return r;
+
+ keymap = empty_to_null(keymap);
+ keymap_toggle = empty_to_null(keymap_toggle);
+
+ r = vconsole_read_data(c, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read virtual console keymap data: %m");
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Failed to read virtual console keymap data");
+ }
+
+ if (streq_ptr(keymap, c->vc_keymap) &&
+ streq_ptr(keymap_toggle, c->vc_keymap_toggle))
+ return sd_bus_reply_method_return(m, NULL);
+
+ if ((keymap && (!filename_is_valid(keymap) || !string_is_safe(keymap))) ||
+ (keymap_toggle && (!filename_is_valid(keymap_toggle) || !string_is_safe(keymap_toggle))))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Received invalid keymap data");
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.locale1.set-keyboard",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ if (free_and_strdup(&c->vc_keymap, keymap) < 0 ||
+ free_and_strdup(&c->vc_keymap_toggle, keymap_toggle) < 0)
+ return -ENOMEM;
+
+ r = vconsole_write_data(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set virtual console keymap: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set virtual console keymap: %m");
+ }
+
+ log_info("Changed virtual console keymap to '%s' toggle '%s'",
+ strempty(c->vc_keymap), strempty(c->vc_keymap_toggle));
+
+ (void) vconsole_reload(sd_bus_message_get_bus(m));
+
+ (void) sd_bus_emit_properties_changed(
+ sd_bus_message_get_bus(m),
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "VConsoleKeymap", "VConsoleKeymapToggle", NULL);
+
+ if (convert) {
+ r = vconsole_convert_to_x11_and_emit(c, m);
+ if (r < 0)
+ log_error_errno(r, "Failed to convert keymap data: %m");
+ }
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+#if HAVE_XKBCOMMON
+
+_printf_(3, 0)
+static void log_xkb(struct xkb_context *ctx, enum xkb_log_level lvl, const char *format, va_list args) {
+ const char *fmt;
+
+ fmt = strjoina("libxkbcommon: ", format);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+ log_internalv(LOG_DEBUG, 0, __FILE__, __LINE__, __func__, fmt, args);
+#pragma GCC diagnostic pop
+}
+
+#define LOAD_SYMBOL(symbol, dl, name) \
+ ({ \
+ (symbol) = (typeof(symbol)) dlvsym((dl), (name), "V_0.5.0"); \
+ (symbol) ? 0 : -EOPNOTSUPP; \
+ })
+
+static int verify_xkb_rmlvo(const char *model, const char *layout, const char *variant, const char *options) {
+
+ /* We dlopen() the library in order to make the dependency soft. The library (and what it pulls in) is huge
+ * after all, hence let's support XKB maps when the library is around, and refuse otherwise. The function
+ * pointers to the shared library are below: */
+
+ struct xkb_context* (*symbol_xkb_context_new)(enum xkb_context_flags flags) = NULL;
+ void (*symbol_xkb_context_unref)(struct xkb_context *context) = NULL;
+ void (*symbol_xkb_context_set_log_fn)(struct xkb_context *context, void (*log_fn)(struct xkb_context *context, enum xkb_log_level level, const char *format, va_list args)) = NULL;
+ struct xkb_keymap* (*symbol_xkb_keymap_new_from_names)(struct xkb_context *context, const struct xkb_rule_names *names, enum xkb_keymap_compile_flags flags) = NULL;
+ void (*symbol_xkb_keymap_unref)(struct xkb_keymap *keymap) = NULL;
+
+ const struct xkb_rule_names rmlvo = {
+ .model = model,
+ .layout = layout,
+ .variant = variant,
+ .options = options,
+ };
+ struct xkb_context *ctx = NULL;
+ struct xkb_keymap *km = NULL;
+ void *dl;
+ int r;
+
+ /* Compile keymap from RMLVO information to check out its validity */
+
+ dl = dlopen("libxkbcommon.so.0", RTLD_LAZY);
+ if (!dl)
+ return -EOPNOTSUPP;
+
+ r = LOAD_SYMBOL(symbol_xkb_context_new, dl, "xkb_context_new");
+ if (r < 0)
+ goto finish;
+
+ r = LOAD_SYMBOL(symbol_xkb_context_unref, dl, "xkb_context_unref");
+ if (r < 0)
+ goto finish;
+
+ r = LOAD_SYMBOL(symbol_xkb_context_set_log_fn, dl, "xkb_context_set_log_fn");
+ if (r < 0)
+ goto finish;
+
+ r = LOAD_SYMBOL(symbol_xkb_keymap_new_from_names, dl, "xkb_keymap_new_from_names");
+ if (r < 0)
+ goto finish;
+
+ r = LOAD_SYMBOL(symbol_xkb_keymap_unref, dl, "xkb_keymap_unref");
+ if (r < 0)
+ goto finish;
+
+ ctx = symbol_xkb_context_new(XKB_CONTEXT_NO_ENVIRONMENT_NAMES);
+ if (!ctx) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ symbol_xkb_context_set_log_fn(ctx, log_xkb);
+
+ km = symbol_xkb_keymap_new_from_names(ctx, &rmlvo, XKB_KEYMAP_COMPILE_NO_FLAGS);
+ if (!km) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ if (symbol_xkb_keymap_unref && km)
+ symbol_xkb_keymap_unref(km);
+
+ if (symbol_xkb_context_unref && ctx)
+ symbol_xkb_context_unref(ctx);
+
+ (void) dlclose(dl);
+ return r;
+}
+
+#else
+
+static int verify_xkb_rmlvo(const char *model, const char *layout, const char *variant, const char *options) {
+ return 0;
+}
+
+#endif
+
+static int method_set_x11_keyboard(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ Context *c = userdata;
+ const char *layout, *model, *variant, *options;
+ int convert, interactive, r;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read(m, "ssssbb", &layout, &model, &variant, &options, &convert, &interactive);
+ if (r < 0)
+ return r;
+
+ layout = empty_to_null(layout);
+ model = empty_to_null(model);
+ variant = empty_to_null(variant);
+ options = empty_to_null(options);
+
+ r = x11_read_data(c, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read x11 keyboard layout data: %m");
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Failed to read x11 keyboard layout data");
+ }
+
+ if (streq_ptr(layout, c->x11_layout) &&
+ streq_ptr(model, c->x11_model) &&
+ streq_ptr(variant, c->x11_variant) &&
+ streq_ptr(options, c->x11_options))
+ return sd_bus_reply_method_return(m, NULL);
+
+ if ((layout && !string_is_safe(layout)) ||
+ (model && !string_is_safe(model)) ||
+ (variant && !string_is_safe(variant)) ||
+ (options && !string_is_safe(options)))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Received invalid keyboard data");
+
+ r = verify_xkb_rmlvo(model, layout, variant, options);
+ if (r < 0) {
+ log_error_errno(r, "Cannot compile XKB keymap for new x11 keyboard layout ('%s' / '%s' / '%s' / '%s'): %m",
+ strempty(model), strempty(layout), strempty(variant), strempty(options));
+
+ if (r == -EOPNOTSUPP)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Local keyboard configuration not supported on this system.");
+
+ return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Specified keymap cannot be compiled, refusing as invalid.");
+ }
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.locale1.set-keyboard",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ if (free_and_strdup(&c->x11_layout, layout) < 0 ||
+ free_and_strdup(&c->x11_model, model) < 0 ||
+ free_and_strdup(&c->x11_variant, variant) < 0 ||
+ free_and_strdup(&c->x11_options, options) < 0)
+ return -ENOMEM;
+
+ r = x11_write_data(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set X11 keyboard layout: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set X11 keyboard layout: %m");
+ }
+
+ log_info("Changed X11 keyboard layout to '%s' model '%s' variant '%s' options '%s'",
+ strempty(c->x11_layout),
+ strempty(c->x11_model),
+ strempty(c->x11_variant),
+ strempty(c->x11_options));
+
+ (void) sd_bus_emit_properties_changed(
+ sd_bus_message_get_bus(m),
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "X11Layout", "X11Model", "X11Variant", "X11Options", NULL);
+
+ if (convert) {
+ r = x11_convert_to_vconsole_and_emit(c, m);
+ if (r < 0)
+ log_error_errno(r, "Failed to convert keymap data: %m");
+ }
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static const sd_bus_vtable locale_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Locale", "as", property_get_locale, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("X11Layout", "s", property_get_xkb, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("X11Model", "s", property_get_xkb, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("X11Variant", "s", property_get_xkb, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("X11Options", "s", property_get_xkb, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("VConsoleKeymap", "s", property_get_vconsole, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("VConsoleKeymapToggle", "s", property_get_vconsole, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_METHOD("SetLocale", "asb", NULL, method_set_locale, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetVConsoleKeyboard", "ssbb", NULL, method_set_vc_keyboard, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetX11Keyboard", "ssssbb", NULL, method_set_x11_keyboard, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END
+};
+
+static int connect_bus(Context *c, sd_event *event, sd_bus **_bus) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(c);
+ assert(event);
+ assert(_bus);
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get system bus connection: %m");
+
+ r = sd_bus_add_object_vtable(bus, NULL, "/org/freedesktop/locale1", "org.freedesktop.locale1", locale_vtable, c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register object: %m");
+
+ r = sd_bus_request_name_async(bus, NULL, "org.freedesktop.locale1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_clear) Context context = {
+ .locale_mtime = USEC_INFINITY,
+ .vc_mtime = USEC_INFINITY,
+ .x11_mtime = USEC_INFINITY,
+ };
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+ mac_selinux_init();
+
+ if (argc != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments.");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ (void) sd_event_set_watchdog(event, true);
+
+ r = sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to install SIGINT handler: %m");
+
+ r = sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to install SIGTERM handler: %m");
+
+ r = connect_bus(&context, event, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_event_loop_with_idle(event, bus, "org.freedesktop.locale1", DEFAULT_EXIT_USEC, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/locale/meson.build b/src/locale/meson.build
new file mode 100644
index 0000000..e87a10e
--- /dev/null
+++ b/src/locale/meson.build
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+systemd_localed_sources = files('''
+ localed.c
+ keymap-util.c
+ keymap-util.h
+'''.split())
+
+localectl_sources = files('localectl.c')
+
+if conf.get('ENABLE_LOCALED') == 1
+ install_data('org.freedesktop.locale1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.locale1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.locale1.policy',
+ install_dir : polkitpolicydir)
+endif
+
+# If you know a way that allows the same variables to be used
+# in sources list and concatenated to a string for test_env,
+# let me know.
+kbd_model_map = join_paths(meson.current_source_dir(), 'kbd-model-map')
+language_fallback_map = join_paths(meson.current_source_dir(), 'language-fallback-map')
+
+if conf.get('ENABLE_LOCALED') == 1
+ install_data('kbd-model-map',
+ 'language-fallback-map',
+ install_dir : pkgdatadir)
+endif
+
+tests += [
+ [['src/locale/test-keymap-util.c',
+ 'src/locale/keymap-util.c',
+ 'src/locale/keymap-util.h'],
+ [libshared],
+ [libdl]],
+]
diff --git a/src/locale/org.freedesktop.locale1.conf b/src/locale/org.freedesktop.locale1.conf
new file mode 100644
index 0000000..d74cbc1
--- /dev/null
+++ b/src/locale/org.freedesktop.locale1.conf
@@ -0,0 +1,29 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.locale1"/>
+ <allow send_destination="org.freedesktop.locale1"/>
+ <allow receive_sender="org.freedesktop.locale1"/>
+ </policy>
+
+ <policy context="default">
+ <allow send_destination="org.freedesktop.locale1"/>
+ <allow receive_sender="org.freedesktop.locale1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/locale/org.freedesktop.locale1.policy b/src/locale/org.freedesktop.locale1.policy
new file mode 100644
index 0000000..f924174
--- /dev/null
+++ b/src/locale/org.freedesktop.locale1.policy
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.locale1.set-locale">
+ <description gettext-domain="systemd">Set system locale</description>
+ <message gettext-domain="systemd">Authentication is required to set the system locale.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.locale1.set-keyboard</annotate>
+ </action>
+
+ <action id="org.freedesktop.locale1.set-keyboard">
+ <description gettext-domain="systemd">Set system keyboard settings</description>
+ <message gettext-domain="systemd">Authentication is required to set the system keyboard settings.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/locale/org.freedesktop.locale1.service b/src/locale/org.freedesktop.locale1.service
new file mode 100644
index 0000000..b15d395
--- /dev/null
+++ b/src/locale/org.freedesktop.locale1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.locale1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.locale1.service
diff --git a/src/locale/test-keymap-util.c b/src/locale/test-keymap-util.c
new file mode 100644
index 0000000..2f82891
--- /dev/null
+++ b/src/locale/test-keymap-util.c
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "keymap-util.h"
+#include "log.h"
+#include "string-util.h"
+
+static void test_find_language_fallback(void) {
+ _cleanup_free_ char *ans = NULL, *ans2 = NULL;
+
+ log_info("/*** %s ***/", __func__);
+
+ assert_se(find_language_fallback("foobar", &ans) == 0);
+ assert_se(ans == NULL);
+
+ assert_se(find_language_fallback("csb", &ans) == 0);
+ assert_se(ans == NULL);
+
+ assert_se(find_language_fallback("csb_PL", &ans) == 1);
+ assert_se(streq(ans, "csb:pl"));
+
+ assert_se(find_language_fallback("szl_PL", &ans2) == 1);
+ assert_se(streq(ans2, "szl:pl"));
+}
+
+static void test_find_converted_keymap(void) {
+ _cleanup_free_ char *ans = NULL, *ans2 = NULL;
+ int r;
+
+ log_info("/*** %s ***/", __func__);
+
+ assert_se(find_converted_keymap("pl", "foobar", &ans) == 0);
+ assert_se(ans == NULL);
+
+ r = find_converted_keymap("pl", NULL, &ans);
+ if (r == 0) {
+ log_info("Skipping rest of %s: keymaps are not installed", __func__);
+ return;
+ }
+
+ assert_se(r == 1);
+ assert_se(streq(ans, "pl"));
+
+ assert_se(find_converted_keymap("pl", "dvorak", &ans2) == 1);
+ assert_se(streq(ans2, "pl-dvorak"));
+}
+
+static void test_find_legacy_keymap(void) {
+ Context c = {};
+ _cleanup_free_ char *ans = NULL, *ans2 = NULL;
+
+ log_info("/*** %s ***/", __func__);
+
+ c.x11_layout = (char*) "foobar";
+ assert_se(find_legacy_keymap(&c, &ans) == 0);
+ assert_se(ans == NULL);
+
+ c.x11_layout = (char*) "pl";
+ assert_se(find_legacy_keymap(&c, &ans) == 1);
+ assert_se(streq(ans, "pl2"));
+
+ c.x11_layout = (char*) "pl,ru";
+ assert_se(find_legacy_keymap(&c, &ans2) == 1);
+ assert_se(streq(ans, "pl2"));
+}
+
+static void test_vconsole_convert_to_x11(void) {
+ _cleanup_(context_clear) Context c = {};
+
+ log_info("/*** %s ***/", __func__);
+
+ log_info("/* test emptying first (:) */");
+ assert_se(free_and_strdup(&c.x11_layout, "foo") >= 0);
+ assert_se(free_and_strdup(&c.x11_variant, "bar") >= 0);
+ assert_se(vconsole_convert_to_x11(&c) == 1);
+ assert_se(c.x11_layout == NULL);
+ assert_se(c.x11_variant == NULL);
+
+ log_info("/* test emptying second (:) */");
+
+ assert_se(vconsole_convert_to_x11(&c) == 0);
+ assert_se(c.x11_layout == NULL);
+ assert_se(c.x11_variant == NULL);
+
+ log_info("/* test without variant, new mapping (es:) */");
+ assert_se(free_and_strdup(&c.vc_keymap, "es") >= 0);
+
+ assert_se(vconsole_convert_to_x11(&c) == 1);
+ assert_se(streq(c.x11_layout, "es"));
+ assert_se(c.x11_variant == NULL);
+
+ log_info("/* test with known variant, new mapping (es:dvorak) */");
+ assert_se(free_and_strdup(&c.vc_keymap, "es-dvorak") >= 0);
+
+ assert_se(vconsole_convert_to_x11(&c) == 0); // FIXME
+ assert_se(streq(c.x11_layout, "es"));
+ assert_se(c.x11_variant == NULL); // FIXME: "dvorak"
+
+ log_info("/* test with old mapping (fr:latin9) */");
+ assert_se(free_and_strdup(&c.vc_keymap, "fr-latin9") >= 0);
+
+ assert_se(vconsole_convert_to_x11(&c) == 1);
+ assert_se(streq(c.x11_layout, "fr"));
+ assert_se(streq(c.x11_variant, "latin9"));
+
+ log_info("/* test with a compound mapping (ru,us) */");
+ assert_se(free_and_strdup(&c.vc_keymap, "ru") >= 0);
+
+ assert_se(vconsole_convert_to_x11(&c) == 1);
+ assert_se(streq(c.x11_layout, "ru,us"));
+ assert_se(c.x11_variant == NULL);
+
+ log_info("/* test with a simple mapping (us) */");
+ assert_se(free_and_strdup(&c.vc_keymap, "us") >= 0);
+
+ assert_se(vconsole_convert_to_x11(&c) == 1);
+ assert_se(streq(c.x11_layout, "us"));
+ assert_se(c.x11_variant == NULL);
+}
+
+static void test_x11_convert_to_vconsole(void) {
+ _cleanup_(context_clear) Context c = {};
+ int r;
+
+ log_info("/*** %s ***/", __func__);
+
+ log_info("/* test emptying first (:) */");
+ assert_se(free_and_strdup(&c.vc_keymap, "foobar") >= 0);
+ assert_se(x11_convert_to_vconsole(&c) == 1);
+ assert_se(c.vc_keymap == NULL);
+
+ log_info("/* test emptying second (:) */");
+
+ assert_se(x11_convert_to_vconsole(&c) == 0);
+ assert_se(c.vc_keymap == NULL);
+
+ log_info("/* test without variant, new mapping (es:) */");
+ assert_se(free_and_strdup(&c.x11_layout, "es") >= 0);
+
+ assert_se(x11_convert_to_vconsole(&c) == 1);
+ assert_se(streq(c.vc_keymap, "es"));
+
+ log_info("/* test with unknown variant, new mapping (es:foobar) */");
+ assert_se(free_and_strdup(&c.x11_variant, "foobar") >= 0);
+
+ assert_se(x11_convert_to_vconsole(&c) == 0);
+ assert_se(streq(c.vc_keymap, "es"));
+
+ log_info("/* test with known variant, new mapping (es:dvorak) */");
+ assert_se(free_and_strdup(&c.x11_variant, "dvorak") >= 0);
+
+ r = x11_convert_to_vconsole(&c);
+ if (r == 0) {
+ log_info("Skipping rest of %s: keymaps are not installed", __func__);
+ return;
+ }
+
+ assert_se(r == 1);
+ assert_se(streq(c.vc_keymap, "es-dvorak"));
+
+ log_info("/* test with old mapping (fr:latin9) */");
+ assert_se(free_and_strdup(&c.x11_layout, "fr") >= 0);
+ assert_se(free_and_strdup(&c.x11_variant, "latin9") >= 0);
+
+ assert_se(x11_convert_to_vconsole(&c) == 1);
+ assert_se(streq(c.vc_keymap, "fr-latin9"));
+
+ log_info("/* test with a compound mapping (us,ru:) */");
+ assert_se(free_and_strdup(&c.x11_layout, "us,ru") >= 0);
+ assert_se(free_and_strdup(&c.x11_variant, NULL) >= 0);
+
+ assert_se(x11_convert_to_vconsole(&c) == 1);
+ assert_se(streq(c.vc_keymap, "us"));
+
+ log_info("/* test with a compound mapping (ru,us:) */");
+ assert_se(free_and_strdup(&c.x11_layout, "ru,us") >= 0);
+ assert_se(free_and_strdup(&c.x11_variant, NULL) >= 0);
+
+ assert_se(x11_convert_to_vconsole(&c) == 1);
+ assert_se(streq(c.vc_keymap, "ru"));
+
+ /* https://bugzilla.redhat.com/show_bug.cgi?id=1333998 */
+ log_info("/* test with a simple new mapping (ru:) */");
+ assert_se(free_and_strdup(&c.x11_layout, "ru") >= 0);
+ assert_se(free_and_strdup(&c.x11_variant, NULL) >= 0);
+
+ assert_se(x11_convert_to_vconsole(&c) == 0);
+ assert_se(streq(c.vc_keymap, "ru"));
+}
+
+int main(int argc, char **argv) {
+ log_set_max_level(LOG_DEBUG);
+ log_parse_environment();
+
+ test_find_language_fallback();
+ test_find_converted_keymap();
+ test_find_legacy_keymap();
+
+ test_vconsole_convert_to_x11();
+ test_x11_convert_to_vconsole();
+
+ return 0;
+}
diff --git a/src/login/70-power-switch.rules b/src/login/70-power-switch.rules
new file mode 100644
index 0000000..d69e65b
--- /dev/null
+++ b/src/login/70-power-switch.rules
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+ACTION=="remove", GOTO="power_switch_end"
+
+SUBSYSTEM=="input", KERNEL=="event*", ENV{ID_INPUT_SWITCH}=="1", TAG+="power-switch"
+SUBSYSTEM=="input", KERNEL=="event*", ENV{ID_INPUT_KEY}=="1", TAG+="power-switch"
+
+LABEL="power_switch_end"
diff --git a/src/login/70-uaccess.rules.m4 b/src/login/70-uaccess.rules.m4
new file mode 100644
index 0000000..d55e5bf
--- /dev/null
+++ b/src/login/70-uaccess.rules.m4
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+ACTION=="remove", GOTO="uaccess_end"
+ENV{MAJOR}=="", GOTO="uaccess_end"
+
+# PTP/MTP protocol devices, cameras, portable media players
+SUBSYSTEM=="usb", ENV{ID_USB_INTERFACES}=="*:060101:*", TAG+="uaccess"
+
+# Digicams with proprietary protocol
+ENV{ID_GPHOTO2}=="?*", TAG+="uaccess"
+
+# SCSI and USB scanners
+ENV{libsane_matched}=="yes", TAG+="uaccess"
+
+# HPLIP devices (necessary for ink level check and HP tool maintenance)
+ENV{ID_HPLIP}=="1", TAG+="uaccess"
+
+# optical drives
+SUBSYSTEM=="block", ENV{ID_CDROM}=="1", TAG+="uaccess"
+SUBSYSTEM=="scsi_generic", SUBSYSTEMS=="scsi", ATTRS{type}=="4|5", TAG+="uaccess"
+
+# Sound devices
+SUBSYSTEM=="sound", TAG+="uaccess", \
+ OPTIONS+="static_node=snd/timer", OPTIONS+="static_node=snd/seq"
+
+# ffado is an userspace driver for firewire sound cards
+SUBSYSTEM=="firewire", ENV{ID_FFADO}=="1", TAG+="uaccess"
+
+# Webcams, frame grabber, TV cards
+SUBSYSTEM=="video4linux", TAG+="uaccess"
+SUBSYSTEM=="dvb", TAG+="uaccess"
+
+# IIDC devices: industrial cameras and some webcams
+SUBSYSTEM=="firewire", ATTR{units}=="*0x00a02d:0x00010*", TAG+="uaccess"
+SUBSYSTEM=="firewire", ATTR{units}=="*0x00b09d:0x00010*", TAG+="uaccess"
+# AV/C devices: camcorders, set-top boxes, TV sets, audio devices, and more
+SUBSYSTEM=="firewire", ATTR{units}=="*0x00a02d:0x010001*", TAG+="uaccess"
+SUBSYSTEM=="firewire", ATTR{units}=="*0x00a02d:0x014001*", TAG+="uaccess"
+
+# DRI video devices
+SUBSYSTEM=="drm", KERNEL=="card*", TAG+="uaccess"
+m4_ifdef(`DEV_KVM_UACCESS',``
+# KVM
+SUBSYSTEM=="misc", KERNEL=="kvm", TAG+="uaccess"''
+)m4_dnl
+
+# smart-card readers
+ENV{ID_SMARTCARD_READER}=="?*", TAG+="uaccess"
+
+# (USB) authentication devices
+ENV{ID_SECURITY_TOKEN}=="?*", TAG+="uaccess"
+
+# PDA devices
+ENV{ID_PDA}=="?*", TAG+="uaccess"
+
+# Programmable remote control
+ENV{ID_REMOTE_CONTROL}=="1", TAG+="uaccess"
+
+# joysticks
+SUBSYSTEM=="input", ENV{ID_INPUT_JOYSTICK}=="?*", TAG+="uaccess"
+
+# color measurement devices
+ENV{COLOR_MEASUREMENT_DEVICE}=="?*", TAG+="uaccess"
+
+# DDC/CI device, usually high-end monitors such as the DreamColor
+ENV{DDC_DEVICE}=="?*", TAG+="uaccess"
+
+# media player raw devices (for user-mode drivers, Android SDK, etc.)
+SUBSYSTEM=="usb", ENV{ID_MEDIA_PLAYER}=="?*", TAG+="uaccess"
+
+# software-defined radio communication devices
+ENV{ID_SOFTWARE_RADIO}=="?*", TAG+="uaccess"
+
+# 3D printers, CNC machines, laser cutters, 3D scanners, etc.
+ENV{ID_MAKER_TOOL}=="?*", TAG+="uaccess"
+
+LABEL="uaccess_end"
diff --git a/src/login/71-seat.rules.in b/src/login/71-seat.rules.in
new file mode 100644
index 0000000..1f30900
--- /dev/null
+++ b/src/login/71-seat.rules.in
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+ACTION=="remove", GOTO="seat_end"
+
+TAG=="uaccess", SUBSYSTEM!="sound", TAG+="seat"
+SUBSYSTEM=="sound", KERNEL=="card*", TAG+="seat"
+SUBSYSTEM=="input", KERNEL=="input*", TAG+="seat"
+SUBSYSTEM=="graphics", KERNEL=="fb[0-9]*", TAG+="seat"
+SUBSYSTEM=="drm", KERNEL=="card[0-9]*", TAG+="seat", TAG+="master-of-seat"
+SUBSYSTEM=="usb", ATTR{bDeviceClass}=="09", TAG+="seat"
+
+# 'Plugable' USB hub, sound, network, graphics adapter
+SUBSYSTEM=="usb", ATTR{idVendor}=="2230", ATTR{idProduct}=="000[13]", ENV{ID_AUTOSEAT}="1"
+
+# qemu (version 2.4+) has a PCI-PCI bridge (-device pci-bridge-seat) to group
+# devices belonging to one seat. See:
+# http://git.qemu.org/?p=qemu.git;a=blob;f=docs/multiseat.txt
+SUBSYSTEM=="pci", ATTR{vendor}=="0x1b36", ATTR{device}=="0x000a", TAG+="seat", ENV{ID_AUTOSEAT}="1"
+
+# Mimo 720, with integrated USB hub, displaylink graphics, and e2i
+# touchscreen. This device carries no proper VID/PID in the USB hub,
+# but it does carry good ID data in the graphics component, hence we
+# check it from the parent. There's a bit of a race here however,
+# given that the child devices might not exist yet at the time this
+# rule is executed. To work around this we'll trigger the parent from
+# the child if we notice that the parent wasn't recognized yet.
+
+# Match parent
+SUBSYSTEM=="usb", ATTR{idVendor}=="058f", ATTR{idProduct}=="6254", \
+ ATTR{%k.2/idVendor}=="17e9", ATTR{%k.2/idProduct}=="401a", ATTR{%k.2/product}=="mimo inc", \
+ ENV{ID_AUTOSEAT}="1", ENV{ID_AVOID_LOOP}="1"
+
+# Match child, look for parent's ID_AVOID_LOOP
+SUBSYSTEM=="usb", ATTR{idVendor}=="17e9", ATTR{idProduct}=="401a", ATTR{product}=="mimo inc", \
+ ATTR{../idVendor}=="058f", ATTR{../idProduct}=="6254", \
+ IMPORT{parent}="ID_AVOID_LOOP"
+
+# Match child, retrigger parent
+SUBSYSTEM=="usb", ATTR{idVendor}=="17e9", ATTR{idProduct}=="401a", ATTR{product}=="mimo inc", \
+ ATTR{../idVendor}=="058f", ATTR{../idProduct}=="6254", \
+ ENV{ID_AVOID_LOOP}=="", \
+ RUN+="@rootbindir@/udevadm trigger --parent-match=%p/.."
+
+TAG=="seat", ENV{ID_PATH}=="", IMPORT{builtin}="path_id"
+TAG=="seat", ENV{ID_FOR_SEAT}=="", ENV{ID_PATH_TAG}!="", ENV{ID_FOR_SEAT}="$env{SUBSYSTEM}-$env{ID_PATH_TAG}"
+
+SUBSYSTEM=="input", ATTR{name}=="Wiebetech LLC Wiebetech", RUN+="@rootbindir@/loginctl lock-sessions"
+
+LABEL="seat_end"
diff --git a/src/login/73-seat-late.rules.m4 b/src/login/73-seat-late.rules.m4
new file mode 100644
index 0000000..4db8d4d
--- /dev/null
+++ b/src/login/73-seat-late.rules.m4
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+ACTION=="remove", GOTO="seat_late_end"
+
+ENV{ID_SEAT}=="", ENV{ID_AUTOSEAT}=="1", ENV{ID_FOR_SEAT}!="", ENV{ID_SEAT}="seat-$env{ID_FOR_SEAT}"
+ENV{ID_SEAT}=="", IMPORT{parent}="ID_SEAT"
+
+ENV{ID_SEAT}!="", TAG+="$env{ID_SEAT}"
+m4_ifdef(`HAVE_ACL',``
+TAG=="uaccess", ENV{MAJOR}!="", RUN{builtin}+="uaccess"''
+)m4_dnl
+
+LABEL="seat_late_end"
diff --git a/src/login/inhibit.c b/src/login/inhibit.c
new file mode 100644
index 0000000..f574d42
--- /dev/null
+++ b/src/login/inhibit.c
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "format-table.h"
+#include "format-util.h"
+#include "main-func.h"
+#include "pager.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+static const char* arg_what = "idle:sleep:shutdown";
+static const char* arg_who = NULL;
+static const char* arg_why = "Unknown reason";
+static const char* arg_mode = NULL;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+
+static enum {
+ ACTION_INHIBIT,
+ ACTION_LIST
+} arg_action = ACTION_INHIBIT;
+
+static int inhibit(sd_bus *bus, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+ int fd;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "Inhibit",
+ error,
+ &reply,
+ "ssss", arg_what, arg_who, arg_why, arg_mode);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_basic(reply, SD_BUS_TYPE_UNIX_FD, &fd);
+ if (r < 0)
+ return r;
+
+ r = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+static int print_inhibitors(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "ListInhibitors",
+ &error,
+ &reply,
+ "");
+ if (r < 0)
+ return log_error_errno(r, "Could not get active inhibitors: %s", bus_error_message(&error, r));
+
+ table = table_new("who", "uid", "user", "pid", "comm", "what", "why", "mode");
+ if (!table)
+ return log_oom();
+
+ /* If there's not enough space, shorten the "WHY" column, as it's little more than an explaining comment. */
+ (void) table_set_weight(table, TABLE_HEADER_CELL(6), 20);
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssuu)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *comm = NULL, *u = NULL;
+ const char *what, *who, *why, *mode;
+ uint32_t uid, pid;
+
+ r = sd_bus_message_read(reply, "(ssssuu)", &what, &who, &why, &mode, &uid, &pid);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ if (arg_mode && !streq(mode, arg_mode))
+ continue;
+
+ (void) get_process_comm(pid, &comm);
+ u = uid_to_name(uid);
+
+ r = table_add_many(table,
+ TABLE_STRING, who,
+ TABLE_UINT32, uid,
+ TABLE_STRING, strna(u),
+ TABLE_UINT32, pid,
+ TABLE_STRING, strna(comm),
+ TABLE_STRING, what,
+ TABLE_STRING, why,
+ TABLE_STRING, mode);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add table row: %m");
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (table_get_rows(table) > 1) {
+ r = table_set_sort(table, (size_t) 1, (size_t) 0, (size_t) 5, (size_t) 6, (size_t) -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to sort table: %m");
+
+ table_set_header(table, arg_legend);
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to show table: %m");
+ }
+
+ if (arg_legend) {
+ if (table_get_rows(table) > 1)
+ printf("\n%zu inhibitors listed.\n", table_get_rows(table) - 1);
+ else
+ printf("No inhibitors.\n");
+ }
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-inhibit", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Execute a process while inhibiting shutdown/sleep/idle.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " --what=WHAT Operations to inhibit, colon separated list of:\n"
+ " shutdown, sleep, idle, handle-power-key,\n"
+ " handle-suspend-key, handle-hibernate-key,\n"
+ " handle-lid-switch\n"
+ " --who=STRING A descriptive string who is inhibiting\n"
+ " --why=STRING A descriptive string why is being inhibited\n"
+ " --mode=MODE One of block or delay\n"
+ " --list List active inhibitors\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_WHAT,
+ ARG_WHO,
+ ARG_WHY,
+ ARG_MODE,
+ ARG_LIST,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "what", required_argument, NULL, ARG_WHAT },
+ { "who", required_argument, NULL, ARG_WHO },
+ { "why", required_argument, NULL, ARG_WHY },
+ { "mode", required_argument, NULL, ARG_MODE },
+ { "list", no_argument, NULL, ARG_LIST },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_WHAT:
+ arg_what = optarg;
+ break;
+
+ case ARG_WHO:
+ arg_who = optarg;
+ break;
+
+ case ARG_WHY:
+ arg_why = optarg;
+ break;
+
+ case ARG_MODE:
+ arg_mode = optarg;
+ break;
+
+ case ARG_LIST:
+ arg_action = ACTION_LIST;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_action == ACTION_INHIBIT && optind == argc)
+ arg_action = ACTION_LIST;
+
+ else if (arg_action == ACTION_INHIBIT && optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Missing command line to execute.");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to bus: %m");
+
+ if (arg_action == ACTION_LIST)
+ return print_inhibitors(bus);
+
+ else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *w = NULL;
+ pid_t pid;
+
+ /* Ignore SIGINT and allow the forked process to receive it */
+ (void) ignore_signals(SIGINT, -1);
+
+ if (!arg_who)
+ arg_who = w = strv_join(argv + optind, " ");
+
+ if (!arg_mode)
+ arg_mode = "block";
+
+ fd = inhibit(bus, &error);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to inhibit: %s", bus_error_message(&error, fd));
+
+ r = safe_fork("(inhibit)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execvp(argv[optind], argv + optind);
+ log_open();
+ log_error_errno(errno, "Failed to execute %s: %m", argv[optind]);
+ _exit(EXIT_FAILURE);
+ }
+
+ return wait_for_terminate_and_check(argv[optind], pid, WAIT_LOG);
+ }
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/login/loginctl.c b/src/login/loginctl.c
new file mode 100644
index 0000000..ab1b562
--- /dev/null
+++ b/src/login/loginctl.c
@@ -0,0 +1,1553 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <locale.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "format-table.h"
+#include "log.h"
+#include "logs-show.h"
+#include "macro.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "sigbus.h"
+#include "signal-util.h"
+#include "spawn-polkit-agent.h"
+#include "string-table.h"
+#include "strv.h"
+#include "sysfs-show.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "util.h"
+#include "verbs.h"
+
+static char **arg_property = NULL;
+static bool arg_all = false;
+static bool arg_value = false;
+static bool arg_full = false;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static const char *arg_kill_who = NULL;
+static int arg_signal = SIGTERM;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static char *arg_host = NULL;
+static bool arg_ask_password = true;
+static unsigned arg_lines = 10;
+static OutputMode arg_output = OUTPUT_SHORT;
+
+STATIC_DESTRUCTOR_REGISTER(arg_property, strv_freep);
+
+static OutputFlags get_output_flags(void) {
+
+ return
+ arg_all * OUTPUT_SHOW_ALL |
+ (arg_full || !on_tty() || pager_have()) * OUTPUT_FULL_WIDTH |
+ colors_enabled() * OUTPUT_COLOR;
+}
+
+static int get_session_path(sd_bus *bus, const char *session_id, sd_bus_error *error, char **path) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+ char *ans;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "GetSession",
+ error, &reply,
+ "s", session_id);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "o", &ans);
+ if (r < 0)
+ return r;
+
+ ans = strdup(ans);
+ if (!ans)
+ return -ENOMEM;
+
+ *path = ans;
+ return 0;
+}
+
+static int show_table(Table *table, const char *word) {
+ int r;
+
+ assert(table);
+ assert(word);
+
+ if (table_get_rows(table) > 1) {
+ r = table_set_sort(table, (size_t) 0, (size_t) -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to sort table: %m");
+
+ table_set_header(table, arg_legend);
+
+ if (OUTPUT_MODE_IS_JSON(arg_output))
+ r = table_print_json(table, NULL, output_mode_to_json_format_flags(arg_output) | JSON_FORMAT_COLOR_AUTO);
+ else
+ r = table_print(table, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to show table: %m");
+ }
+
+ if (arg_legend) {
+ if (table_get_rows(table) > 1)
+ printf("\n%zu %s listed.\n", table_get_rows(table) - 1, word);
+ else
+ printf("No %s.\n", word);
+ }
+
+ return 0;
+}
+
+static int list_sessions(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "ListSessions",
+ &error, &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list sessions: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "(susso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ table = table_new("session", "uid", "user", "seat", "tty");
+ if (!table)
+ return log_oom();
+
+ /* Right-align the first two fields (since they are numeric) */
+ (void) table_set_align_percent(table, TABLE_HEADER_CELL(0), 100);
+ (void) table_set_align_percent(table, TABLE_HEADER_CELL(1), 100);
+
+ for (;;) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_tty = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply_tty = NULL;
+ const char *id, *user, *seat, *object, *tty = NULL;
+ uint32_t uid;
+
+ r = sd_bus_message_read(reply, "(susso)", &id, &uid, &user, &seat, &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = sd_bus_get_property(
+ bus,
+ "org.freedesktop.login1",
+ object,
+ "org.freedesktop.login1.Session",
+ "TTY",
+ &error_tty,
+ &reply_tty,
+ "s");
+ if (r < 0)
+ log_warning_errno(r, "Failed to get TTY for session %s: %s", id, bus_error_message(&error_tty, r));
+ else {
+ r = sd_bus_message_read(reply_tty, "s", &tty);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ r = table_add_many(table,
+ TABLE_STRING, id,
+ TABLE_UINT32, uid,
+ TABLE_STRING, user,
+ TABLE_STRING, seat,
+ TABLE_STRING, strna(tty));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add row to table: %m");
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return show_table(table, "sessions");
+}
+
+static int list_users(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "ListUsers",
+ &error, &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list users: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "(uso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ table = table_new("uid", "user");
+ if (!table)
+ return log_oom();
+
+ (void) table_set_align_percent(table, TABLE_HEADER_CELL(0), 100);
+
+ for (;;) {
+ const char *user;
+ uint32_t uid;
+
+ r = sd_bus_message_read(reply, "(uso)", &uid, &user, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = table_add_many(table,
+ TABLE_UINT32, uid,
+ TABLE_STRING, user);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add row to table: %m");
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return show_table(table, "users");
+}
+
+static int list_seats(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "ListSeats",
+ &error, &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list seats: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "(so)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ table = table_new("seat");
+ if (!table)
+ return log_oom();
+
+ for (;;) {
+ const char *seat;
+
+ r = sd_bus_message_read(reply, "(so)", &seat, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = table_add_cell(table, NULL, TABLE_STRING, seat);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add row to table: %m");
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return show_table(table, "seats");
+}
+
+static int show_unit_cgroup(sd_bus *bus, const char *interface, const char *unit, pid_t leader) {
+ _cleanup_free_ char *cgroup = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ unsigned c;
+ int r;
+
+ assert(bus);
+ assert(unit);
+
+ r = show_cgroup_get_unit_path_and_warn(bus, unit, &cgroup);
+ if (r < 0)
+ return r;
+
+ if (isempty(cgroup))
+ return 0;
+
+ c = columns();
+ if (c > 18)
+ c -= 18;
+ else
+ c = 0;
+
+ r = unit_show_processes(bus, unit, cgroup, "\t\t ", c, get_output_flags(), &error);
+ if (r == -EBADR) {
+
+ if (arg_transport == BUS_TRANSPORT_REMOTE)
+ return 0;
+
+ /* Fallback for older systemd versions where the GetUnitProcesses() call is not yet available */
+
+ if (cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup) != 0 && leader <= 0)
+ return 0;
+
+ show_cgroup_and_extra(SYSTEMD_CGROUP_CONTROLLER, cgroup, "\t\t ", c, &leader, leader > 0, get_output_flags());
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to dump process list: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+typedef struct SessionStatusInfo {
+ const char *id;
+ uid_t uid;
+ const char *name;
+ struct dual_timestamp timestamp;
+ unsigned vtnr;
+ const char *seat;
+ const char *tty;
+ const char *display;
+ bool remote;
+ const char *remote_host;
+ const char *remote_user;
+ const char *service;
+ pid_t leader;
+ const char *type;
+ const char *class;
+ const char *state;
+ const char *scope;
+ const char *desktop;
+} SessionStatusInfo;
+
+typedef struct UserStatusInfo {
+ uid_t uid;
+ bool linger;
+ const char *name;
+ struct dual_timestamp timestamp;
+ const char *state;
+ char **sessions;
+ const char *display;
+ const char *slice;
+} UserStatusInfo;
+
+typedef struct SeatStatusInfo {
+ const char *id;
+ const char *active_session;
+ char **sessions;
+} SeatStatusInfo;
+
+static void user_status_info_clear(UserStatusInfo *info) {
+ if (info) {
+ strv_free(info->sessions);
+ zero(*info);
+ }
+}
+
+static void seat_status_info_clear(SeatStatusInfo *info) {
+ if (info) {
+ strv_free(info->sessions);
+ zero(*info);
+ }
+}
+
+static int prop_map_first_of_struct(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ const char *contents;
+ int r;
+
+ r = sd_bus_message_peek_type(m, NULL, &contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_STRUCT, contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_basic(m, contents[0], userdata);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_skip(m, contents+1);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int prop_map_sessions_strv(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ const char *name;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, 'a', "(so)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(m, "(so)", &name, NULL)) > 0) {
+ r = strv_extend(userdata, name);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_exit_container(m);
+}
+
+static int print_session_status_info(sd_bus *bus, const char *path, bool *new_line) {
+
+ static const struct bus_properties_map map[] = {
+ { "Id", "s", NULL, offsetof(SessionStatusInfo, id) },
+ { "Name", "s", NULL, offsetof(SessionStatusInfo, name) },
+ { "TTY", "s", NULL, offsetof(SessionStatusInfo, tty) },
+ { "Display", "s", NULL, offsetof(SessionStatusInfo, display) },
+ { "RemoteHost", "s", NULL, offsetof(SessionStatusInfo, remote_host) },
+ { "RemoteUser", "s", NULL, offsetof(SessionStatusInfo, remote_user) },
+ { "Service", "s", NULL, offsetof(SessionStatusInfo, service) },
+ { "Desktop", "s", NULL, offsetof(SessionStatusInfo, desktop) },
+ { "Type", "s", NULL, offsetof(SessionStatusInfo, type) },
+ { "Class", "s", NULL, offsetof(SessionStatusInfo, class) },
+ { "Scope", "s", NULL, offsetof(SessionStatusInfo, scope) },
+ { "State", "s", NULL, offsetof(SessionStatusInfo, state) },
+ { "VTNr", "u", NULL, offsetof(SessionStatusInfo, vtnr) },
+ { "Leader", "u", NULL, offsetof(SessionStatusInfo, leader) },
+ { "Remote", "b", NULL, offsetof(SessionStatusInfo, remote) },
+ { "Timestamp", "t", NULL, offsetof(SessionStatusInfo, timestamp.realtime) },
+ { "TimestampMonotonic", "t", NULL, offsetof(SessionStatusInfo, timestamp.monotonic) },
+ { "User", "(uo)", prop_map_first_of_struct, offsetof(SessionStatusInfo, uid) },
+ { "Seat", "(so)", prop_map_first_of_struct, offsetof(SessionStatusInfo, seat) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ char since1[FORMAT_TIMESTAMP_RELATIVE_MAX];
+ char since2[FORMAT_TIMESTAMP_MAX];
+ const char *s1, *s2;
+ SessionStatusInfo i = {};
+ int r;
+
+ r = bus_map_all_properties(bus, "org.freedesktop.login1", path, map, BUS_MAP_BOOLEAN_AS_BOOL, &error, &m, &i);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ printf("%s - ", strna(i.id));
+
+ if (i.name)
+ printf("%s (%"PRIu32")\n", i.name, i.uid);
+ else
+ printf("%"PRIu32"\n", i.uid);
+
+ s1 = format_timestamp_relative(since1, sizeof(since1), i.timestamp.realtime);
+ s2 = format_timestamp(since2, sizeof(since2), i.timestamp.realtime);
+
+ if (s1)
+ printf("\t Since: %s; %s\n", s2, s1);
+ else if (s2)
+ printf("\t Since: %s\n", s2);
+
+ if (i.leader > 0) {
+ _cleanup_free_ char *t = NULL;
+
+ printf("\t Leader: %"PRIu32, i.leader);
+
+ get_process_comm(i.leader, &t);
+ if (t)
+ printf(" (%s)", t);
+
+ printf("\n");
+ }
+
+ if (!isempty(i.seat)) {
+ printf("\t Seat: %s", i.seat);
+
+ if (i.vtnr > 0)
+ printf("; vc%u", i.vtnr);
+
+ printf("\n");
+ }
+
+ if (i.tty)
+ printf("\t TTY: %s\n", i.tty);
+ else if (i.display)
+ printf("\t Display: %s\n", i.display);
+
+ if (i.remote_host && i.remote_user)
+ printf("\t Remote: %s@%s\n", i.remote_user, i.remote_host);
+ else if (i.remote_host)
+ printf("\t Remote: %s\n", i.remote_host);
+ else if (i.remote_user)
+ printf("\t Remote: user %s\n", i.remote_user);
+ else if (i.remote)
+ printf("\t Remote: Yes\n");
+
+ if (i.service) {
+ printf("\t Service: %s", i.service);
+
+ if (i.type)
+ printf("; type %s", i.type);
+
+ if (i.class)
+ printf("; class %s", i.class);
+
+ printf("\n");
+ } else if (i.type) {
+ printf("\t Type: %s", i.type);
+
+ if (i.class)
+ printf("; class %s", i.class);
+
+ printf("\n");
+ } else if (i.class)
+ printf("\t Class: %s\n", i.class);
+
+ if (!isempty(i.desktop))
+ printf("\t Desktop: %s\n", i.desktop);
+
+ if (i.state)
+ printf("\t State: %s\n", i.state);
+
+ if (i.scope) {
+ printf("\t Unit: %s\n", i.scope);
+ show_unit_cgroup(bus, "org.freedesktop.systemd1.Scope", i.scope, i.leader);
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL) {
+
+ show_journal_by_unit(
+ stdout,
+ i.scope,
+ arg_output,
+ 0,
+ i.timestamp.monotonic,
+ arg_lines,
+ 0,
+ get_output_flags() | OUTPUT_BEGIN_NEWLINE,
+ SD_JOURNAL_LOCAL_ONLY,
+ true,
+ NULL);
+ }
+ }
+
+ return 0;
+}
+
+static int print_user_status_info(sd_bus *bus, const char *path, bool *new_line) {
+
+ static const struct bus_properties_map map[] = {
+ { "Name", "s", NULL, offsetof(UserStatusInfo, name) },
+ { "Linger", "b", NULL, offsetof(UserStatusInfo, linger) },
+ { "Slice", "s", NULL, offsetof(UserStatusInfo, slice) },
+ { "State", "s", NULL, offsetof(UserStatusInfo, state) },
+ { "UID", "u", NULL, offsetof(UserStatusInfo, uid) },
+ { "Timestamp", "t", NULL, offsetof(UserStatusInfo, timestamp.realtime) },
+ { "TimestampMonotonic", "t", NULL, offsetof(UserStatusInfo, timestamp.monotonic) },
+ { "Display", "(so)", prop_map_first_of_struct, offsetof(UserStatusInfo, display) },
+ { "Sessions", "a(so)", prop_map_sessions_strv, offsetof(UserStatusInfo, sessions) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ char since1[FORMAT_TIMESTAMP_RELATIVE_MAX];
+ char since2[FORMAT_TIMESTAMP_MAX];
+ const char *s1, *s2;
+ _cleanup_(user_status_info_clear) UserStatusInfo i = {};
+ int r;
+
+ r = bus_map_all_properties(bus, "org.freedesktop.login1", path, map, BUS_MAP_BOOLEAN_AS_BOOL, &error, &m, &i);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ if (i.name)
+ printf("%s (%"PRIu32")\n", i.name, i.uid);
+ else
+ printf("%"PRIu32"\n", i.uid);
+
+ s1 = format_timestamp_relative(since1, sizeof(since1), i.timestamp.realtime);
+ s2 = format_timestamp(since2, sizeof(since2), i.timestamp.realtime);
+
+ if (s1)
+ printf("\t Since: %s; %s\n", s2, s1);
+ else if (s2)
+ printf("\t Since: %s\n", s2);
+
+ if (!isempty(i.state))
+ printf("\t State: %s\n", i.state);
+
+ if (!strv_isempty(i.sessions)) {
+ char **l;
+ printf("\tSessions:");
+
+ STRV_FOREACH(l, i.sessions)
+ printf(" %s%s",
+ streq_ptr(*l, i.display) ? "*" : "",
+ *l);
+
+ printf("\n");
+ }
+
+ printf("\t Linger: %s\n", yes_no(i.linger));
+
+ if (i.slice) {
+ printf("\t Unit: %s\n", i.slice);
+ show_unit_cgroup(bus, "org.freedesktop.systemd1.Slice", i.slice, 0);
+
+ show_journal_by_unit(
+ stdout,
+ i.slice,
+ arg_output,
+ 0,
+ i.timestamp.monotonic,
+ arg_lines,
+ 0,
+ get_output_flags() | OUTPUT_BEGIN_NEWLINE,
+ SD_JOURNAL_LOCAL_ONLY,
+ true,
+ NULL);
+ }
+
+ return 0;
+}
+
+static int print_seat_status_info(sd_bus *bus, const char *path, bool *new_line) {
+
+ static const struct bus_properties_map map[] = {
+ { "Id", "s", NULL, offsetof(SeatStatusInfo, id) },
+ { "ActiveSession", "(so)", prop_map_first_of_struct, offsetof(SeatStatusInfo, active_session) },
+ { "Sessions", "a(so)", prop_map_sessions_strv, offsetof(SeatStatusInfo, sessions) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(seat_status_info_clear) SeatStatusInfo i = {};
+ int r;
+
+ r = bus_map_all_properties(bus, "org.freedesktop.login1", path, map, 0, &error, &m, &i);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ printf("%s\n", strna(i.id));
+
+ if (!strv_isempty(i.sessions)) {
+ char **l;
+ printf("\tSessions:");
+
+ STRV_FOREACH(l, i.sessions) {
+ if (streq_ptr(*l, i.active_session))
+ printf(" *%s", *l);
+ else
+ printf(" %s", *l);
+ }
+
+ printf("\n");
+ }
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL) {
+ unsigned c;
+
+ c = columns();
+ if (c > 21)
+ c -= 21;
+ else
+ c = 0;
+
+ printf("\t Devices:\n");
+
+ show_sysfs(i.id, "\t\t ", c, get_output_flags());
+ }
+
+ return 0;
+}
+
+static int print_property(const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all) {
+ char type;
+ const char *contents;
+ int r;
+
+ assert(name);
+ assert(m);
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRUCT:
+
+ if (contents[0] == SD_BUS_TYPE_STRING && STR_IN_SET(name, "Display", "Seat", "ActiveSession")) {
+ const char *s;
+
+ r = sd_bus_message_read(m, "(so)", &s, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (all || !isempty(s))
+ bus_print_property_value(name, expected_value, value, "%s", s);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_UINT32 && streq(name, "User")) {
+ uint32_t uid;
+
+ r = sd_bus_message_read(m, "(uo)", &uid, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!uid_is_valid(uid))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid user ID: " UID_FMT,
+ uid);
+
+ bus_print_property_value(name, expected_value, value, UID_FMT, uid);
+ return 1;
+ }
+ break;
+
+ case SD_BUS_TYPE_ARRAY:
+
+ if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "Sessions")) {
+ const char *s;
+ bool space = false;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(so)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!value)
+ printf("%s=", name);
+
+ while ((r = sd_bus_message_read(m, "(so)", &s, NULL)) > 0) {
+ printf("%s%s", space ? " " : "", s);
+ space = true;
+ }
+
+ if (space || !value)
+ printf("\n");
+
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int show_properties(sd_bus *bus, const char *path, bool *new_line) {
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(new_line);
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ r = bus_print_all_properties(bus, "org.freedesktop.login1", path, print_property, arg_property, arg_value, arg_all, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+static int show_session(int argc, char *argv[], void *userdata) {
+ bool properties, new_line = false;
+ sd_bus *bus = userdata;
+ int r, i;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+
+ assert(bus);
+ assert(argv);
+
+ properties = !strstr(argv[0], "status");
+
+ (void) pager_open(arg_pager_flags);
+
+ if (argc <= 1) {
+ const char *session, *p = "/org/freedesktop/login1/session/self";
+
+ if (properties)
+ /* If no argument is specified inspect the manager itself */
+ return show_properties(bus, "/org/freedesktop/login1", &new_line);
+
+ /* And in the pretty case, show data of the calling session */
+ session = getenv("XDG_SESSION_ID");
+ if (session) {
+ r = get_session_path(bus, session, &error, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get session path: %s", bus_error_message(&error, r));
+
+ p = path;
+ }
+
+ return print_session_status_info(bus, p, &new_line);
+ }
+
+ for (i = 1; i < argc; i++) {
+ r = get_session_path(bus, argv[i], &error, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get session path: %s", bus_error_message(&error, r));
+
+ if (properties)
+ r = show_properties(bus, path, &new_line);
+ else
+ r = print_session_status_info(bus, path, &new_line);
+
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int show_user(int argc, char *argv[], void *userdata) {
+ bool properties, new_line = false;
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+ assert(argv);
+
+ properties = !strstr(argv[0], "status");
+
+ (void) pager_open(arg_pager_flags);
+
+ if (argc <= 1) {
+ /* If not argument is specified inspect the manager
+ * itself */
+ if (properties)
+ return show_properties(bus, "/org/freedesktop/login1", &new_line);
+
+ return print_user_status_info(bus, "/org/freedesktop/login1/user/self", &new_line);
+ }
+
+ for (i = 1; i < argc; i++) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message * reply = NULL;
+ const char *path = NULL;
+ uid_t uid;
+
+ r = get_user_creds((const char**) (argv+i), &uid, NULL, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to look up user %s: %m", argv[i]);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "GetUser",
+ &error, &reply,
+ "u", (uint32_t) uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get user: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (properties)
+ r = show_properties(bus, path, &new_line);
+ else
+ r = print_user_status_info(bus, path, &new_line);
+
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int show_seat(int argc, char *argv[], void *userdata) {
+ bool properties, new_line = false;
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+ assert(argv);
+
+ properties = !strstr(argv[0], "status");
+
+ (void) pager_open(arg_pager_flags);
+
+ if (argc <= 1) {
+ /* If not argument is specified inspect the manager
+ * itself */
+ if (properties)
+ return show_properties(bus, "/org/freedesktop/login1", &new_line);
+
+ return print_seat_status_info(bus, "/org/freedesktop/login1/seat/self", &new_line);
+ }
+
+ for (i = 1; i < argc; i++) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message * reply = NULL;
+ const char *path = NULL;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "GetSeat",
+ &error, &reply,
+ "s", argv[i]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get seat: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (properties)
+ r = show_properties(bus, path, &new_line);
+ else
+ r = print_seat_status_info(bus, path, &new_line);
+
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int activate(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ char *short_argv[3];
+ int r, i;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (argc < 2) {
+ /* No argument? Let's either use $XDG_SESSION_ID (if specified), or an empty
+ * session name, in which case logind will try to guess our session. */
+
+ short_argv[0] = argv[0];
+ short_argv[1] = getenv("XDG_SESSION_ID") ?: (char*) "";
+ short_argv[2] = NULL;
+
+ argv = short_argv;
+ argc = 2;
+ }
+
+ for (i = 1; i < argc; i++) {
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ streq(argv[0], "lock-session") ? "LockSession" :
+ streq(argv[0], "unlock-session") ? "UnlockSession" :
+ streq(argv[0], "terminate-session") ? "TerminateSession" :
+ "ActivateSession",
+ &error, NULL,
+ "s", argv[i]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, -r));
+ }
+
+ return 0;
+}
+
+static int kill_session(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (!arg_kill_who)
+ arg_kill_who = "all";
+
+ for (i = 1; i < argc; i++) {
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "KillSession",
+ &error, NULL,
+ "ssi", argv[i], arg_kill_who, arg_signal);
+ if (r < 0)
+ return log_error_errno(r, "Could not kill session: %s", bus_error_message(&error, -r));
+ }
+
+ return 0;
+}
+
+static int enable_linger(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ char* short_argv[3];
+ bool b;
+ int r, i;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ b = streq(argv[0], "enable-linger");
+
+ if (argc < 2) {
+ /* No argument? Let's use an empty user name,
+ * then logind will use our user. */
+
+ short_argv[0] = argv[0];
+ short_argv[1] = (char*) "";
+ short_argv[2] = NULL;
+ argv = short_argv;
+ argc = 2;
+ }
+
+ for (i = 1; i < argc; i++) {
+ uid_t uid;
+
+ if (isempty(argv[i]))
+ uid = UID_INVALID;
+ else {
+ r = get_user_creds((const char**) (argv+i), &uid, NULL, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to look up user %s: %m", argv[i]);
+ }
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "SetUserLinger",
+ &error, NULL,
+ "ubb", (uint32_t) uid, b, true);
+ if (r < 0)
+ return log_error_errno(r, "Could not enable linger: %s", bus_error_message(&error, -r));
+ }
+
+ return 0;
+}
+
+static int terminate_user(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (i = 1; i < argc; i++) {
+ uid_t uid;
+
+ r = get_user_creds((const char**) (argv+i), &uid, NULL, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to look up user %s: %m", argv[i]);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "TerminateUser",
+ &error, NULL,
+ "u", (uint32_t) uid);
+ if (r < 0)
+ return log_error_errno(r, "Could not terminate user: %s", bus_error_message(&error, -r));
+ }
+
+ return 0;
+}
+
+static int kill_user(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (!arg_kill_who)
+ arg_kill_who = "all";
+
+ for (i = 1; i < argc; i++) {
+ uid_t uid;
+
+ r = get_user_creds((const char**) (argv+i), &uid, NULL, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to look up user %s: %m", argv[i]);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "KillUser",
+ &error, NULL,
+ "ui", (uint32_t) uid, arg_signal);
+ if (r < 0)
+ return log_error_errno(r, "Could not kill user: %s", bus_error_message(&error, -r));
+ }
+
+ return 0;
+}
+
+static int attach(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (i = 2; i < argc; i++) {
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "AttachDevice",
+ &error, NULL,
+ "ssb", argv[1], argv[i], true);
+
+ if (r < 0)
+ return log_error_errno(r, "Could not attach device: %s", bus_error_message(&error, -r));
+ }
+
+ return 0;
+}
+
+static int flush_devices(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "FlushDevices",
+ &error, NULL,
+ "b", true);
+ if (r < 0)
+ return log_error_errno(r, "Could not flush devices: %s", bus_error_message(&error, -r));
+
+ return 0;
+}
+
+static int lock_sessions(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ streq(argv[0], "lock-sessions") ? "LockSessions" : "UnlockSessions",
+ &error, NULL,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not lock sessions: %s", bus_error_message(&error, -r));
+
+ return 0;
+}
+
+static int terminate_seat(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (i = 1; i < argc; i++) {
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "TerminateSeat",
+ &error, NULL,
+ "s", argv[i]);
+ if (r < 0)
+ return log_error_errno(r, "Could not terminate seat: %s", bus_error_message(&error, -r));
+ }
+
+ return 0;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("loginctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Send control commands to or query the login manager.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " --no-ask-password Don't prompt for password\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " -p --property=NAME Show only properties by this name\n"
+ " -a --all Show all properties, including empty ones\n"
+ " --value When showing properties, only print the value\n"
+ " -l --full Do not ellipsize output\n"
+ " --kill-who=WHO Who to send signal to\n"
+ " -s --signal=SIGNAL Which signal to send\n"
+ " -n --lines=INTEGER Number of journal entries to show\n"
+ " -o --output=STRING Change journal output mode (short, short-precise,\n"
+ " short-iso, short-iso-precise, short-full,\n"
+ " short-monotonic, short-unix, verbose, export,\n"
+ " json, json-pretty, json-sse, json-seq, cat,\n"
+ " with-unit)\n"
+ "Session Commands:\n"
+ " list-sessions List sessions\n"
+ " session-status [ID...] Show session status\n"
+ " show-session [ID...] Show properties of sessions or the manager\n"
+ " activate [ID] Activate a session\n"
+ " lock-session [ID...] Screen lock one or more sessions\n"
+ " unlock-session [ID...] Screen unlock one or more sessions\n"
+ " lock-sessions Screen lock all current sessions\n"
+ " unlock-sessions Screen unlock all current sessions\n"
+ " terminate-session ID... Terminate one or more sessions\n"
+ " kill-session ID... Send signal to processes of a session\n\n"
+ "User Commands:\n"
+ " list-users List users\n"
+ " user-status [USER...] Show user status\n"
+ " show-user [USER...] Show properties of users or the manager\n"
+ " enable-linger [USER...] Enable linger state of one or more users\n"
+ " disable-linger [USER...] Disable linger state of one or more users\n"
+ " terminate-user USER... Terminate all sessions of one or more users\n"
+ " kill-user USER... Send signal to processes of a user\n\n"
+ "Seat Commands:\n"
+ " list-seats List seats\n"
+ " seat-status [NAME...] Show seat status\n"
+ " show-seat [NAME...] Show properties of seats or the manager\n"
+ " attach NAME DEVICE... Attach one or more devices to a seat\n"
+ " flush-devices Flush all device associations\n"
+ " terminate-seat NAME... Terminate all sessions on one or more seats\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_VALUE,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_KILL_WHO,
+ ARG_NO_ASK_PASSWORD,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "property", required_argument, NULL, 'p' },
+ { "all", no_argument, NULL, 'a' },
+ { "value", no_argument, NULL, ARG_VALUE },
+ { "full", no_argument, NULL, 'l' },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "kill-who", required_argument, NULL, ARG_KILL_WHO },
+ { "signal", required_argument, NULL, 's' },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "lines", required_argument, NULL, 'n' },
+ { "output", required_argument, NULL, 'o' },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hp:als:H:M:n:o:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case 'p': {
+ r = strv_extend(&arg_property, optarg);
+ if (r < 0)
+ return log_oom();
+
+ /* If the user asked for a particular
+ * property, show it to him, even if it is
+ * empty. */
+ arg_all = true;
+ break;
+ }
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case ARG_VALUE:
+ arg_value = true;
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case 'n':
+ if (safe_atou(optarg, &arg_lines) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse lines '%s'", optarg);
+ break;
+
+ case 'o':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(output_mode, OutputMode, _OUTPUT_MODE_MAX);
+ return 0;
+ }
+
+ arg_output = output_mode_from_string(optarg);
+ if (arg_output < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown output '%s'.", optarg);
+
+ if (OUTPUT_MODE_IS_JSON(arg_output))
+ arg_legend = false;
+
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case ARG_KILL_WHO:
+ arg_kill_who = optarg;
+ break;
+
+ case 's':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(signal, int, _NSIG);
+ return 0;
+ }
+
+ arg_signal = signal_from_string(optarg);
+ if (arg_signal < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse signal string %s.", optarg);
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int loginctl_main(int argc, char *argv[], sd_bus *bus) {
+
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "list-sessions", VERB_ANY, 1, VERB_DEFAULT, list_sessions },
+ { "session-status", VERB_ANY, VERB_ANY, 0, show_session },
+ { "show-session", VERB_ANY, VERB_ANY, 0, show_session },
+ { "activate", VERB_ANY, 2, 0, activate },
+ { "lock-session", VERB_ANY, VERB_ANY, 0, activate },
+ { "unlock-session", VERB_ANY, VERB_ANY, 0, activate },
+ { "lock-sessions", VERB_ANY, 1, 0, lock_sessions },
+ { "unlock-sessions", VERB_ANY, 1, 0, lock_sessions },
+ { "terminate-session", 2, VERB_ANY, 0, activate },
+ { "kill-session", 2, VERB_ANY, 0, kill_session },
+ { "list-users", VERB_ANY, 1, 0, list_users },
+ { "user-status", VERB_ANY, VERB_ANY, 0, show_user },
+ { "show-user", VERB_ANY, VERB_ANY, 0, show_user },
+ { "enable-linger", VERB_ANY, VERB_ANY, 0, enable_linger },
+ { "disable-linger", VERB_ANY, VERB_ANY, 0, enable_linger },
+ { "terminate-user", 2, VERB_ANY, 0, terminate_user },
+ { "kill-user", 2, VERB_ANY, 0, kill_user },
+ { "list-seats", VERB_ANY, 1, 0, list_seats },
+ { "seat-status", VERB_ANY, VERB_ANY, 0, show_seat },
+ { "show-seat", VERB_ANY, VERB_ANY, 0, show_seat },
+ { "attach", 3, VERB_ANY, 0, attach },
+ { "flush-devices", VERB_ANY, 1, 0, flush_devices },
+ { "terminate-seat", 2, VERB_ANY, 0, terminate_seat },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, bus);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ /* The journal merging logic potentially needs a lot of fds. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ sigbus_install();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, &bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ (void) sd_bus_set_allow_interactive_authorization(bus, arg_ask_password);
+
+ return loginctl_main(argc, argv, bus);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/login/logind-acl.c b/src/login/logind-acl.c
new file mode 100644
index 0000000..d2f4e60
--- /dev/null
+++ b/src/login/logind-acl.c
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "sd-device.h"
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "logind-acl.h"
+#include "set.h"
+#include "string-util.h"
+#include "util.h"
+
+static int flush_acl(acl_t acl) {
+ acl_entry_t i;
+ int found;
+ bool changed = false;
+
+ assert(acl);
+
+ for (found = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ found > 0;
+ found = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) {
+
+ acl_tag_t tag;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag != ACL_USER)
+ continue;
+
+ if (acl_delete_entry(acl, i) < 0)
+ return -errno;
+
+ changed = true;
+ }
+
+ if (found < 0)
+ return -errno;
+
+ return changed;
+}
+
+int devnode_acl(const char *path,
+ bool flush,
+ bool del, uid_t old_uid,
+ bool add, uid_t new_uid) {
+
+ acl_t acl;
+ int r = 0;
+ bool changed = false;
+
+ assert(path);
+
+ acl = acl_get_file(path, ACL_TYPE_ACCESS);
+ if (!acl)
+ return -errno;
+
+ if (flush) {
+
+ r = flush_acl(acl);
+ if (r < 0)
+ goto finish;
+ if (r > 0)
+ changed = true;
+
+ } else if (del && old_uid > 0) {
+ acl_entry_t entry;
+
+ r = acl_find_uid(acl, old_uid, &entry);
+ if (r < 0)
+ goto finish;
+
+ if (r > 0) {
+ if (acl_delete_entry(acl, entry) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ changed = true;
+ }
+ }
+
+ if (add && new_uid > 0) {
+ acl_entry_t entry;
+ acl_permset_t permset;
+ int rd, wt;
+
+ r = acl_find_uid(acl, new_uid, &entry);
+ if (r < 0)
+ goto finish;
+
+ if (r == 0) {
+ if (acl_create_entry(&acl, &entry) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (acl_set_tag_type(entry, ACL_USER) < 0 ||
+ acl_set_qualifier(entry, &new_uid) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+
+ if (acl_get_permset(entry, &permset) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ rd = acl_get_perm(permset, ACL_READ);
+ if (rd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ wt = acl_get_perm(permset, ACL_WRITE);
+ if (wt < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (!rd || !wt) {
+
+ if (acl_add_perm(permset, ACL_READ|ACL_WRITE) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ changed = true;
+ }
+ }
+
+ if (!changed)
+ goto finish;
+
+ if (acl_calc_mask(&acl) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (acl_set_file(path, ACL_TYPE_ACCESS, acl) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ acl_free(acl);
+
+ return r;
+}
+
+int devnode_acl_all(const char *seat,
+ bool flush,
+ bool del, uid_t old_uid,
+ bool add, uid_t new_uid) {
+
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ _cleanup_set_free_free_ Set *nodes = NULL;
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+ sd_device *d;
+ Iterator i;
+ char *n;
+ int r;
+
+ nodes = set_new(&path_hash_ops);
+ if (!nodes)
+ return -ENOMEM;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ if (isempty(seat))
+ seat = "seat0";
+
+ /* We can only match by one tag in libudev. We choose
+ * "uaccess" for that. If we could match for two tags here we
+ * could add the seat name as second match tag, but this would
+ * be hardly optimizable in libudev, and hence checking the
+ * second tag manually in our loop is a good solution. */
+ r = sd_device_enumerator_add_match_tag(e, "uaccess");
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ const char *node, *sn;
+
+ if (sd_device_get_property_value(d, "ID_SEAT", &sn) < 0 || isempty(sn))
+ sn = "seat0";
+
+ if (!streq(seat, sn))
+ continue;
+
+ /* In case people mistag devices with nodes, we need to ignore this */
+ if (sd_device_get_devname(d, &node) < 0)
+ continue;
+
+ log_device_debug(d, "Found udev node %s for seat %s", node, seat);
+ r = set_put_strdup(nodes, node);
+ if (r < 0)
+ return r;
+ }
+
+ /* udev exports "dead" device nodes to allow module on-demand loading,
+ * these devices are not known to the kernel at this moment */
+ dir = opendir("/run/udev/static_node-tags/uaccess");
+ if (dir) {
+ FOREACH_DIRENT(dent, dir, return -errno) {
+ _cleanup_free_ char *unescaped_devname = NULL;
+
+ if (cunescape(dent->d_name, UNESCAPE_RELAX, &unescaped_devname) < 0)
+ return -ENOMEM;
+
+ n = strappend("/dev/", unescaped_devname);
+ if (!n)
+ return -ENOMEM;
+
+ log_debug("Found static node %s for seat %s", n, seat);
+ r = set_consume(nodes, n);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ r = 0;
+ SET_FOREACH(n, nodes, i) {
+ int k;
+
+ log_debug("Changing ACLs at %s for seat %s (uid "UID_FMT"→"UID_FMT"%s%s)",
+ n, seat, old_uid, new_uid,
+ del ? " del" : "", add ? " add" : "");
+
+ k = devnode_acl(n, flush, del, old_uid, add, new_uid);
+ if (k == -ENOENT)
+ log_debug("Device %s disappeared while setting ACLs", n);
+ else if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
diff --git a/src/login/logind-acl.h b/src/login/logind-acl.h
new file mode 100644
index 0000000..00e286d
--- /dev/null
+++ b/src/login/logind-acl.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#if HAVE_ACL
+
+int devnode_acl(const char *path,
+ bool flush,
+ bool del, uid_t old_uid,
+ bool add, uid_t new_uid);
+
+int devnode_acl_all(const char *seat,
+ bool flush,
+ bool del, uid_t old_uid,
+ bool add, uid_t new_uid);
+#else
+
+static inline int devnode_acl(const char *path,
+ bool flush,
+ bool del, uid_t old_uid,
+ bool add, uid_t new_uid) {
+ return 0;
+}
+
+static inline int devnode_acl_all(const char *seat,
+ bool flush,
+ bool del, uid_t old_uid,
+ bool add, uid_t new_uid) {
+ return 0;
+}
+
+#endif
diff --git a/src/login/logind-action.c b/src/login/logind-action.c
new file mode 100644
index 0000000..6c93667
--- /dev/null
+++ b/src/login/logind-action.c
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "conf-parser.h"
+#include "format-util.h"
+#include "logind-action.h"
+#include "process-util.h"
+#include "sleep-config.h"
+#include "special.h"
+#include "string-table.h"
+#include "terminal-util.h"
+#include "user-util.h"
+
+const char* manager_target_for_action(HandleAction handle) {
+ static const char * const target_table[_HANDLE_ACTION_MAX] = {
+ [HANDLE_POWEROFF] = SPECIAL_POWEROFF_TARGET,
+ [HANDLE_REBOOT] = SPECIAL_REBOOT_TARGET,
+ [HANDLE_HALT] = SPECIAL_HALT_TARGET,
+ [HANDLE_KEXEC] = SPECIAL_KEXEC_TARGET,
+ [HANDLE_SUSPEND] = SPECIAL_SUSPEND_TARGET,
+ [HANDLE_HIBERNATE] = SPECIAL_HIBERNATE_TARGET,
+ [HANDLE_HYBRID_SLEEP] = SPECIAL_HYBRID_SLEEP_TARGET,
+ [HANDLE_SUSPEND_THEN_HIBERNATE] = SPECIAL_SUSPEND_THEN_HIBERNATE_TARGET,
+ };
+
+ assert(handle >= 0);
+ if (handle < (ssize_t) ELEMENTSOF(target_table))
+ return target_table[handle];
+ return NULL;
+}
+
+int manager_handle_action(
+ Manager *m,
+ InhibitWhat inhibit_key,
+ HandleAction handle,
+ bool ignore_inhibited,
+ bool is_edge) {
+
+ static const char * const message_table[_HANDLE_ACTION_MAX] = {
+ [HANDLE_POWEROFF] = "Powering Off...",
+ [HANDLE_REBOOT] = "Rebooting...",
+ [HANDLE_HALT] = "Halting...",
+ [HANDLE_KEXEC] = "Rebooting via kexec...",
+ [HANDLE_SUSPEND] = "Suspending...",
+ [HANDLE_HIBERNATE] = "Hibernating...",
+ [HANDLE_HYBRID_SLEEP] = "Hibernating and suspending...",
+ [HANDLE_SUSPEND_THEN_HIBERNATE] = "Suspending, then hibernating...",
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ InhibitWhat inhibit_operation;
+ Inhibitor *offending = NULL;
+ bool supported;
+ const char *target;
+ int r;
+
+ assert(m);
+
+ /* If the key handling is turned off, don't do anything */
+ if (handle == HANDLE_IGNORE) {
+ log_debug("Refusing operation, as it is turned off.");
+ return 0;
+ }
+
+ if (inhibit_key == INHIBIT_HANDLE_LID_SWITCH) {
+ /* If the last system suspend or startup is too close,
+ * let's not suspend for now, to give USB docking
+ * stations some time to settle so that we can
+ * properly watch its displays. */
+ if (m->lid_switch_ignore_event_source) {
+ log_debug("Ignoring lid switch request, system startup or resume too close.");
+ return 0;
+ }
+ }
+
+ /* If the key handling is inhibited, don't do anything */
+ if (inhibit_key > 0) {
+ if (manager_is_inhibited(m, inhibit_key, INHIBIT_BLOCK, NULL, true, false, 0, NULL)) {
+ log_debug("Refusing operation, %s is inhibited.", inhibit_what_to_string(inhibit_key));
+ return 0;
+ }
+ }
+
+ /* Locking is handled differently from the rest. */
+ if (handle == HANDLE_LOCK) {
+ if (!is_edge)
+ return 0;
+
+ log_info("Locking sessions...");
+ session_send_lock_all(m, true);
+ return 1;
+ }
+
+ if (handle == HANDLE_SUSPEND)
+ supported = can_sleep("suspend") > 0;
+ else if (handle == HANDLE_HIBERNATE)
+ supported = can_sleep("hibernate") > 0;
+ else if (handle == HANDLE_HYBRID_SLEEP)
+ supported = can_sleep("hybrid-sleep") > 0;
+ else if (handle == HANDLE_SUSPEND_THEN_HIBERNATE)
+ supported = can_sleep("suspend-then-hibernate") > 0;
+ else if (handle == HANDLE_KEXEC)
+ supported = access(KEXEC, X_OK) >= 0;
+ else
+ supported = true;
+
+ if (!supported && IN_SET(handle, HANDLE_HIBERNATE, HANDLE_HYBRID_SLEEP, HANDLE_SUSPEND_THEN_HIBERNATE)) {
+ supported = can_sleep("suspend") > 0;
+ if (supported) {
+ log_notice("Operation '%s' requested but not supported, using regular suspend instead.", handle_action_to_string(handle));
+ handle = HANDLE_SUSPEND;
+ }
+ }
+
+ if (!supported) {
+ log_warning("Requested operation not supported, ignoring.");
+ return -EOPNOTSUPP;
+ }
+
+ if (m->action_what > 0) {
+ log_debug("Action already in progress, ignoring.");
+ return -EALREADY;
+ }
+
+ assert_se(target = manager_target_for_action(handle));
+
+ inhibit_operation = IN_SET(handle, HANDLE_SUSPEND, HANDLE_HIBERNATE,
+ HANDLE_HYBRID_SLEEP,
+ HANDLE_SUSPEND_THEN_HIBERNATE) ? INHIBIT_SLEEP : INHIBIT_SHUTDOWN;
+
+ /* If the actual operation is inhibited, warn and fail */
+ if (!ignore_inhibited &&
+ manager_is_inhibited(m, inhibit_operation, INHIBIT_BLOCK, NULL, false, false, 0, &offending)) {
+ _cleanup_free_ char *comm = NULL, *u = NULL;
+
+ (void) get_process_comm(offending->pid, &comm);
+ u = uid_to_name(offending->uid);
+
+ /* If this is just a recheck of the lid switch then don't warn about anything */
+ if (!is_edge) {
+ log_debug("Refusing operation, %s is inhibited by UID "UID_FMT"/%s, PID "PID_FMT"/%s.",
+ inhibit_what_to_string(inhibit_operation),
+ offending->uid, strna(u),
+ offending->pid, strna(comm));
+ return 0;
+ }
+
+ log_error("Refusing operation, %s is inhibited by UID "UID_FMT"/%s, PID "PID_FMT"/%s.",
+ inhibit_what_to_string(inhibit_operation),
+ offending->uid, strna(u),
+ offending->pid, strna(comm));
+
+ return -EPERM;
+ }
+
+ log_info("%s", message_table[handle]);
+
+ r = bus_manager_shutdown_or_sleep_now_or_later(m, target, inhibit_operation, &error);
+ if (r < 0)
+ return log_error_errno(r, "Failed to execute operation: %s", bus_error_message(&error, r));
+
+ return 1;
+}
+
+static const char* const handle_action_table[_HANDLE_ACTION_MAX] = {
+ [HANDLE_IGNORE] = "ignore",
+ [HANDLE_POWEROFF] = "poweroff",
+ [HANDLE_REBOOT] = "reboot",
+ [HANDLE_HALT] = "halt",
+ [HANDLE_KEXEC] = "kexec",
+ [HANDLE_SUSPEND] = "suspend",
+ [HANDLE_HIBERNATE] = "hibernate",
+ [HANDLE_HYBRID_SLEEP] = "hybrid-sleep",
+ [HANDLE_SUSPEND_THEN_HIBERNATE] = "suspend-then-hibernate",
+ [HANDLE_LOCK] = "lock",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(handle_action, HandleAction);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_handle_action, handle_action, HandleAction, "Failed to parse handle action setting");
diff --git a/src/login/logind-action.h b/src/login/logind-action.h
new file mode 100644
index 0000000..a45aab7
--- /dev/null
+++ b/src/login/logind-action.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "conf-parser.h"
+
+typedef enum HandleAction {
+ HANDLE_IGNORE,
+ HANDLE_POWEROFF,
+ HANDLE_REBOOT,
+ HANDLE_HALT,
+ HANDLE_KEXEC,
+ HANDLE_SUSPEND,
+ HANDLE_HIBERNATE,
+ HANDLE_HYBRID_SLEEP,
+ HANDLE_SUSPEND_THEN_HIBERNATE,
+ HANDLE_LOCK,
+ _HANDLE_ACTION_MAX,
+ _HANDLE_ACTION_INVALID = -1
+} HandleAction;
+
+#include "logind-inhibit.h"
+#include "logind.h"
+
+int manager_handle_action(
+ Manager *m,
+ InhibitWhat inhibit_key,
+ HandleAction handle,
+ bool ignore_inhibited,
+ bool is_edge);
+
+const char* handle_action_to_string(HandleAction h) _const_;
+HandleAction handle_action_from_string(const char *s) _pure_;
+
+const char* manager_target_for_action(HandleAction handle);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_handle_action);
diff --git a/src/login/logind-button.c b/src/login/logind-button.c
new file mode 100644
index 0000000..daffbf0
--- /dev/null
+++ b/src/login/logind-button.c
@@ -0,0 +1,377 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <linux/input.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "logind-button.h"
+#include "missing_input.h"
+#include "string-util.h"
+#include "util.h"
+
+#define CONST_MAX4(a, b, c, d) CONST_MAX(CONST_MAX(a, b), CONST_MAX(c, d))
+
+#define ULONG_BITS (sizeof(unsigned long)*8)
+
+static bool bitset_get(const unsigned long *bits, unsigned i) {
+ return (bits[i / ULONG_BITS] >> (i % ULONG_BITS)) & 1UL;
+}
+
+static void bitset_put(unsigned long *bits, unsigned i) {
+ bits[i / ULONG_BITS] |= (unsigned long) 1 << (i % ULONG_BITS);
+}
+
+Button* button_new(Manager *m, const char *name) {
+ Button *b;
+
+ assert(m);
+ assert(name);
+
+ b = new0(Button, 1);
+ if (!b)
+ return NULL;
+
+ b->name = strdup(name);
+ if (!b->name)
+ return mfree(b);
+
+ if (hashmap_put(m->buttons, b->name, b) < 0) {
+ free(b->name);
+ return mfree(b);
+ }
+
+ b->manager = m;
+ b->fd = -1;
+
+ return b;
+}
+
+void button_free(Button *b) {
+ assert(b);
+
+ hashmap_remove(b->manager->buttons, b->name);
+
+ sd_event_source_unref(b->io_event_source);
+ sd_event_source_unref(b->check_event_source);
+
+ if (b->fd >= 0)
+ /* If the device has been unplugged close() returns
+ * ENODEV, let's ignore this, hence we don't use
+ * safe_close() */
+ (void) close(b->fd);
+
+ free(b->name);
+ free(b->seat);
+ free(b);
+}
+
+int button_set_seat(Button *b, const char *sn) {
+ char *s;
+
+ assert(b);
+ assert(sn);
+
+ s = strdup(sn);
+ if (!s)
+ return -ENOMEM;
+
+ free(b->seat);
+ b->seat = s;
+
+ return 0;
+}
+
+static void button_lid_switch_handle_action(Manager *manager, bool is_edge) {
+ HandleAction handle_action;
+
+ assert(manager);
+
+ /* If we are docked or on external power, handle the lid switch
+ * differently */
+ if (manager_is_docked_or_external_displays(manager))
+ handle_action = manager->handle_lid_switch_docked;
+ else if (manager->handle_lid_switch_ep != _HANDLE_ACTION_INVALID &&
+ manager_is_on_external_power())
+ handle_action = manager->handle_lid_switch_ep;
+ else
+ handle_action = manager->handle_lid_switch;
+
+ manager_handle_action(manager, INHIBIT_HANDLE_LID_SWITCH, handle_action, manager->lid_switch_ignore_inhibited, is_edge);
+}
+
+static int button_recheck(sd_event_source *e, void *userdata) {
+ Button *b = userdata;
+
+ assert(b);
+ assert(b->lid_closed);
+
+ button_lid_switch_handle_action(b->manager, false);
+ return 1;
+}
+
+static int button_install_check_event_source(Button *b) {
+ int r;
+ assert(b);
+
+ /* Install a post handler, so that we keep rechecking as long as the lid is closed. */
+
+ if (b->check_event_source)
+ return 0;
+
+ r = sd_event_add_post(b->manager->event, &b->check_event_source, button_recheck, b);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_priority(b->check_event_source, SD_EVENT_PRIORITY_IDLE+1);
+}
+
+static int button_dispatch(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Button *b = userdata;
+ struct input_event ev;
+ ssize_t l;
+
+ assert(s);
+ assert(fd == b->fd);
+ assert(b);
+
+ l = read(b->fd, &ev, sizeof(ev));
+ if (l < 0)
+ return errno != EAGAIN ? -errno : 0;
+ if ((size_t) l < sizeof(ev))
+ return -EIO;
+
+ if (ev.type == EV_KEY && ev.value > 0) {
+
+ switch (ev.code) {
+
+ case KEY_POWER:
+ case KEY_POWER2:
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Power key pressed."),
+ "MESSAGE_ID=" SD_MESSAGE_POWER_KEY_STR);
+
+ manager_handle_action(b->manager, INHIBIT_HANDLE_POWER_KEY, b->manager->handle_power_key, b->manager->power_key_ignore_inhibited, true);
+ break;
+
+ /* The kernel is a bit confused here:
+
+ KEY_SLEEP = suspend-to-ram, which everybody else calls "suspend"
+ KEY_SUSPEND = suspend-to-disk, which everybody else calls "hibernate"
+ */
+
+ case KEY_SLEEP:
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Suspend key pressed."),
+ "MESSAGE_ID=" SD_MESSAGE_SUSPEND_KEY_STR);
+
+ manager_handle_action(b->manager, INHIBIT_HANDLE_SUSPEND_KEY, b->manager->handle_suspend_key, b->manager->suspend_key_ignore_inhibited, true);
+ break;
+
+ case KEY_SUSPEND:
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Hibernate key pressed."),
+ "MESSAGE_ID=" SD_MESSAGE_HIBERNATE_KEY_STR);
+
+ manager_handle_action(b->manager, INHIBIT_HANDLE_HIBERNATE_KEY, b->manager->handle_hibernate_key, b->manager->hibernate_key_ignore_inhibited, true);
+ break;
+ }
+
+ } else if (ev.type == EV_SW && ev.value > 0) {
+
+ if (ev.code == SW_LID) {
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Lid closed."),
+ "MESSAGE_ID=" SD_MESSAGE_LID_CLOSED_STR);
+
+ b->lid_closed = true;
+ button_lid_switch_handle_action(b->manager, true);
+ button_install_check_event_source(b);
+
+ } else if (ev.code == SW_DOCK) {
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("System docked."),
+ "MESSAGE_ID=" SD_MESSAGE_SYSTEM_DOCKED_STR);
+
+ b->docked = true;
+ }
+
+ } else if (ev.type == EV_SW && ev.value == 0) {
+
+ if (ev.code == SW_LID) {
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Lid opened."),
+ "MESSAGE_ID=" SD_MESSAGE_LID_OPENED_STR);
+
+ b->lid_closed = false;
+ b->check_event_source = sd_event_source_unref(b->check_event_source);
+
+ } else if (ev.code == SW_DOCK) {
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("System undocked."),
+ "MESSAGE_ID=" SD_MESSAGE_SYSTEM_UNDOCKED_STR);
+
+ b->docked = false;
+ }
+ }
+
+ return 0;
+}
+
+static int button_suitable(Button *b) {
+ unsigned long types[CONST_MAX(EV_KEY, EV_SW)/ULONG_BITS+1];
+
+ assert(b);
+ assert(b->fd);
+
+ if (ioctl(b->fd, EVIOCGBIT(EV_SYN, sizeof(types)), types) < 0)
+ return -errno;
+
+ if (bitset_get(types, EV_KEY)) {
+ unsigned long keys[CONST_MAX4(KEY_POWER, KEY_POWER2, KEY_SLEEP, KEY_SUSPEND)/ULONG_BITS+1];
+
+ if (ioctl(b->fd, EVIOCGBIT(EV_KEY, sizeof(keys)), keys) < 0)
+ return -errno;
+
+ if (bitset_get(keys, KEY_POWER) ||
+ bitset_get(keys, KEY_POWER2) ||
+ bitset_get(keys, KEY_SLEEP) ||
+ bitset_get(keys, KEY_SUSPEND))
+ return true;
+ }
+
+ if (bitset_get(types, EV_SW)) {
+ unsigned long switches[CONST_MAX(SW_LID, SW_DOCK)/ULONG_BITS+1];
+
+ if (ioctl(b->fd, EVIOCGBIT(EV_SW, sizeof(switches)), switches) < 0)
+ return -errno;
+
+ if (bitset_get(switches, SW_LID) ||
+ bitset_get(switches, SW_DOCK))
+ return true;
+ }
+
+ return false;
+}
+
+static int button_set_mask(Button *b) {
+ unsigned long
+ types[CONST_MAX(EV_KEY, EV_SW)/ULONG_BITS+1] = {},
+ keys[CONST_MAX4(KEY_POWER, KEY_POWER2, KEY_SLEEP, KEY_SUSPEND)/ULONG_BITS+1] = {},
+ switches[CONST_MAX(SW_LID, SW_DOCK)/ULONG_BITS+1] = {};
+ struct input_mask mask;
+
+ assert(b);
+ assert(b->fd >= 0);
+
+ bitset_put(types, EV_KEY);
+ bitset_put(types, EV_SW);
+
+ mask = (struct input_mask) {
+ .type = EV_SYN,
+ .codes_size = sizeof(types),
+ .codes_ptr = PTR_TO_UINT64(types),
+ };
+
+ if (ioctl(b->fd, EVIOCSMASK, &mask) < 0)
+ /* Log only at debug level if the kernel doesn't do EVIOCSMASK yet */
+ return log_full_errno(IN_SET(errno, ENOTTY, EOPNOTSUPP, EINVAL) ? LOG_DEBUG : LOG_WARNING,
+ errno, "Failed to set EV_SYN event mask on /dev/input/%s: %m", b->name);
+
+ bitset_put(keys, KEY_POWER);
+ bitset_put(keys, KEY_POWER2);
+ bitset_put(keys, KEY_SLEEP);
+ bitset_put(keys, KEY_SUSPEND);
+
+ mask = (struct input_mask) {
+ .type = EV_KEY,
+ .codes_size = sizeof(keys),
+ .codes_ptr = PTR_TO_UINT64(keys),
+ };
+
+ if (ioctl(b->fd, EVIOCSMASK, &mask) < 0)
+ return log_warning_errno(errno, "Failed to set EV_KEY event mask on /dev/input/%s: %m", b->name);
+
+ bitset_put(switches, SW_LID);
+ bitset_put(switches, SW_DOCK);
+
+ mask = (struct input_mask) {
+ .type = EV_SW,
+ .codes_size = sizeof(switches),
+ .codes_ptr = PTR_TO_UINT64(switches),
+ };
+
+ if (ioctl(b->fd, EVIOCSMASK, &mask) < 0)
+ return log_warning_errno(errno, "Failed to set EV_SW event mask on /dev/input/%s: %m", b->name);
+
+ return 0;
+}
+
+int button_open(Button *b) {
+ char *p, name[256];
+ int r;
+
+ assert(b);
+
+ b->fd = safe_close(b->fd);
+
+ p = strjoina("/dev/input/", b->name);
+
+ b->fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (b->fd < 0)
+ return log_warning_errno(errno, "Failed to open %s: %m", p);
+
+ r = button_suitable(b);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to determine whether input device is relevant to us: %m");
+ if (r == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EADDRNOTAVAIL),
+ "Device %s does not expose keys or switches relevant to us, ignoring.",
+ p);
+
+ if (ioctl(b->fd, EVIOCGNAME(sizeof(name)), name) < 0) {
+ r = log_error_errno(errno, "Failed to get input name: %m");
+ goto fail;
+ }
+
+ (void) button_set_mask(b);
+
+ r = sd_event_add_io(b->manager->event, &b->io_event_source, b->fd, EPOLLIN, button_dispatch, b);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add button event: %m");
+ goto fail;
+ }
+
+ log_info("Watching system buttons on /dev/input/%s (%s)", b->name, name);
+
+ return 0;
+
+fail:
+ b->fd = safe_close(b->fd);
+ return r;
+}
+
+int button_check_switches(Button *b) {
+ unsigned long switches[CONST_MAX(SW_LID, SW_DOCK)/ULONG_BITS+1] = {};
+ assert(b);
+
+ if (b->fd < 0)
+ return -EINVAL;
+
+ if (ioctl(b->fd, EVIOCGSW(sizeof(switches)), switches) < 0)
+ return -errno;
+
+ b->lid_closed = bitset_get(switches, SW_LID);
+ b->docked = bitset_get(switches, SW_DOCK);
+
+ if (b->lid_closed)
+ button_install_check_event_source(b);
+
+ return 0;
+}
diff --git a/src/login/logind-button.h b/src/login/logind-button.h
new file mode 100644
index 0000000..d009851
--- /dev/null
+++ b/src/login/logind-button.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Button Button;
+
+#include "logind.h"
+
+struct Button {
+ Manager *manager;
+
+ sd_event_source *io_event_source;
+ sd_event_source *check_event_source;
+
+ char *name;
+ char *seat;
+ int fd;
+
+ bool lid_closed;
+ bool docked;
+};
+
+Button* button_new(Manager *m, const char *name);
+void button_free(Button *b);
+int button_open(Button *b);
+int button_set_seat(Button *b, const char *sn);
+int button_check_switches(Button *b);
diff --git a/src/login/logind-core.c b/src/login/logind-core.c
new file mode 100644
index 0000000..60831bb
--- /dev/null
+++ b/src/login/logind-core.c
@@ -0,0 +1,843 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <pwd.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <linux/vt.h>
+#if ENABLE_UTMP
+#include <utmpx.h>
+#endif
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "conf-parser.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "logind.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-util.h"
+
+void manager_reset_config(Manager *m) {
+ assert(m);
+
+ m->n_autovts = 6;
+ m->reserve_vt = 6;
+ m->remove_ipc = true;
+ m->inhibit_delay_max = 5 * USEC_PER_SEC;
+ m->user_stop_delay = 10 * USEC_PER_SEC;
+
+ m->handle_power_key = HANDLE_POWEROFF;
+ m->handle_suspend_key = HANDLE_SUSPEND;
+ m->handle_hibernate_key = HANDLE_HIBERNATE;
+ m->handle_lid_switch = HANDLE_SUSPEND;
+ m->handle_lid_switch_ep = _HANDLE_ACTION_INVALID;
+ m->handle_lid_switch_docked = HANDLE_IGNORE;
+ m->power_key_ignore_inhibited = false;
+ m->suspend_key_ignore_inhibited = false;
+ m->hibernate_key_ignore_inhibited = false;
+ m->lid_switch_ignore_inhibited = true;
+
+ m->holdoff_timeout_usec = 30 * USEC_PER_SEC;
+
+ m->idle_action_usec = 30 * USEC_PER_MINUTE;
+ m->idle_action = HANDLE_IGNORE;
+
+ m->runtime_dir_size = physical_memory_scale(10U, 100U); /* 10% */
+ m->user_tasks_max = system_tasks_max_scale(DEFAULT_USER_TASKS_MAX_PERCENTAGE, 100U); /* 33% */
+ m->sessions_max = 8192;
+ m->inhibitors_max = 8192;
+
+ m->kill_user_processes = KILL_USER_PROCESSES;
+
+ m->kill_only_users = strv_free(m->kill_only_users);
+ m->kill_exclude_users = strv_free(m->kill_exclude_users);
+}
+
+int manager_parse_config_file(Manager *m) {
+ assert(m);
+
+ return config_parse_many_nulstr(PKGSYSCONFDIR "/logind.conf",
+ CONF_PATHS_NULSTR("systemd/logind.conf.d"),
+ "Login\0",
+ config_item_perf_lookup, logind_gperf_lookup,
+ CONFIG_PARSE_WARN, m);
+}
+
+int manager_add_device(Manager *m, const char *sysfs, bool master, Device **_device) {
+ Device *d;
+
+ assert(m);
+ assert(sysfs);
+
+ d = hashmap_get(m->devices, sysfs);
+ if (d)
+ /* we support adding master-flags, but not removing them */
+ d->master = d->master || master;
+ else {
+ d = device_new(m, sysfs, master);
+ if (!d)
+ return -ENOMEM;
+ }
+
+ if (_device)
+ *_device = d;
+
+ return 0;
+}
+
+int manager_add_seat(Manager *m, const char *id, Seat **_seat) {
+ Seat *s;
+ int r;
+
+ assert(m);
+ assert(id);
+
+ s = hashmap_get(m->seats, id);
+ if (!s) {
+ r = seat_new(&s, m, id);
+ if (r < 0)
+ return r;
+ }
+
+ if (_seat)
+ *_seat = s;
+
+ return 0;
+}
+
+int manager_add_session(Manager *m, const char *id, Session **_session) {
+ Session *s;
+ int r;
+
+ assert(m);
+ assert(id);
+
+ s = hashmap_get(m->sessions, id);
+ if (!s) {
+ r = session_new(&s, m, id);
+ if (r < 0)
+ return r;
+ }
+
+ if (_session)
+ *_session = s;
+
+ return 0;
+}
+
+int manager_add_user(
+ Manager *m,
+ uid_t uid,
+ gid_t gid,
+ const char *name,
+ const char *home,
+ User **_user) {
+
+ User *u;
+ int r;
+
+ assert(m);
+ assert(name);
+
+ u = hashmap_get(m->users, UID_TO_PTR(uid));
+ if (!u) {
+ r = user_new(&u, m, uid, gid, name, home);
+ if (r < 0)
+ return r;
+ }
+
+ if (_user)
+ *_user = u;
+
+ return 0;
+}
+
+int manager_add_user_by_name(
+ Manager *m,
+ const char *name,
+ User **_user) {
+
+ const char *home = NULL;
+ uid_t uid;
+ gid_t gid;
+ int r;
+
+ assert(m);
+ assert(name);
+
+ r = get_user_creds(&name, &uid, &gid, &home, NULL, 0);
+ if (r < 0)
+ return r;
+
+ return manager_add_user(m, uid, gid, name, home, _user);
+}
+
+int manager_add_user_by_uid(Manager *m, uid_t uid, User **_user) {
+ struct passwd *p;
+
+ assert(m);
+
+ errno = 0;
+ p = getpwuid(uid);
+ if (!p)
+ return errno > 0 ? -errno : -ENOENT;
+
+ return manager_add_user(m, uid, p->pw_gid, p->pw_name, p->pw_dir, _user);
+}
+
+int manager_add_inhibitor(Manager *m, const char* id, Inhibitor **_inhibitor) {
+ Inhibitor *i;
+
+ assert(m);
+ assert(id);
+
+ i = hashmap_get(m->inhibitors, id);
+ if (i) {
+ if (_inhibitor)
+ *_inhibitor = i;
+
+ return 0;
+ }
+
+ i = inhibitor_new(m, id);
+ if (!i)
+ return -ENOMEM;
+
+ if (_inhibitor)
+ *_inhibitor = i;
+
+ return 0;
+}
+
+int manager_add_button(Manager *m, const char *name, Button **_button) {
+ Button *b;
+
+ assert(m);
+ assert(name);
+
+ b = hashmap_get(m->buttons, name);
+ if (!b) {
+ b = button_new(m, name);
+ if (!b)
+ return -ENOMEM;
+ }
+
+ if (_button)
+ *_button = b;
+
+ return 0;
+}
+
+int manager_process_seat_device(Manager *m, sd_device *d) {
+ const char *action;
+ Device *device;
+ int r;
+
+ assert(m);
+
+ if (sd_device_get_property_value(d, "ACTION", &action) >= 0 &&
+ streq(action, "remove")) {
+ const char *syspath;
+
+ r = sd_device_get_syspath(d, &syspath);
+ if (r < 0)
+ return 0;
+
+ device = hashmap_get(m->devices, syspath);
+ if (!device)
+ return 0;
+
+ seat_add_to_gc_queue(device->seat);
+ device_free(device);
+
+ } else {
+ const char *sn, *syspath;
+ bool master;
+ Seat *seat;
+
+ if (sd_device_get_property_value(d, "ID_SEAT", &sn) < 0 || isempty(sn))
+ sn = "seat0";
+
+ if (!seat_name_is_valid(sn)) {
+ log_device_warning(d, "Device with invalid seat name %s found, ignoring.", sn);
+ return 0;
+ }
+
+ seat = hashmap_get(m->seats, sn);
+ master = sd_device_has_tag(d, "master-of-seat") > 0;
+
+ /* Ignore non-master devices for unknown seats */
+ if (!master && !seat)
+ return 0;
+
+ r = sd_device_get_syspath(d, &syspath);
+ if (r < 0)
+ return r;
+
+ r = manager_add_device(m, syspath, master, &device);
+ if (r < 0)
+ return r;
+
+ if (!seat) {
+ r = manager_add_seat(m, sn, &seat);
+ if (r < 0) {
+ if (!device->seat)
+ device_free(device);
+
+ return r;
+ }
+ }
+
+ device_attach(device, seat);
+ seat_start(seat);
+ }
+
+ return 0;
+}
+
+int manager_process_button_device(Manager *m, sd_device *d) {
+ const char *action, *sysname;
+ Button *b;
+ int r;
+
+ assert(m);
+
+ r = sd_device_get_sysname(d, &sysname);
+ if (r < 0)
+ return r;
+
+ if (sd_device_get_property_value(d, "ACTION", &action) >= 0 &&
+ streq(action, "remove")) {
+
+ b = hashmap_get(m->buttons, sysname);
+ if (!b)
+ return 0;
+
+ button_free(b);
+
+ } else {
+ const char *sn;
+
+ r = manager_add_button(m, sysname, &b);
+ if (r < 0)
+ return r;
+
+ if (sd_device_get_property_value(d, "ID_SEAT", &sn) < 0 || isempty(sn))
+ sn = "seat0";
+
+ button_set_seat(b, sn);
+
+ r = button_open(b);
+ if (r < 0) /* event device doesn't have any keys or switches relevant to us? (or any other error
+ * opening the device?) let's close the button again. */
+ button_free(b);
+ }
+
+ return 0;
+}
+
+int manager_get_session_by_pid(Manager *m, pid_t pid, Session **ret) {
+ _cleanup_free_ char *unit = NULL;
+ Session *s;
+ int r;
+
+ assert(m);
+
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ s = hashmap_get(m->sessions_by_leader, PID_TO_PTR(pid));
+ if (!s) {
+ r = cg_pid_get_unit(pid, &unit);
+ if (r < 0)
+ goto not_found;
+
+ s = hashmap_get(m->session_units, unit);
+ if (!s)
+ goto not_found;
+ }
+
+ if (ret)
+ *ret = s;
+
+ return 1;
+
+not_found:
+ if (ret)
+ *ret = NULL;
+ return 0;
+}
+
+int manager_get_user_by_pid(Manager *m, pid_t pid, User **ret) {
+ _cleanup_free_ char *unit = NULL;
+ User *u;
+ int r;
+
+ assert(m);
+
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ r = cg_pid_get_slice(pid, &unit);
+ if (r < 0)
+ goto not_found;
+
+ u = hashmap_get(m->user_units, unit);
+ if (!u)
+ goto not_found;
+
+ if (ret)
+ *ret = u;
+
+ return 1;
+
+not_found:
+ if (ret)
+ *ret = NULL;
+
+ return 0;
+}
+
+int manager_get_idle_hint(Manager *m, dual_timestamp *t) {
+ Session *s;
+ bool idle_hint;
+ dual_timestamp ts = DUAL_TIMESTAMP_NULL;
+ Iterator i;
+
+ assert(m);
+
+ idle_hint = !manager_is_inhibited(m, INHIBIT_IDLE, INHIBIT_BLOCK, t, false, false, 0, NULL);
+
+ HASHMAP_FOREACH(s, m->sessions, i) {
+ dual_timestamp k;
+ int ih;
+
+ ih = session_get_idle_hint(s, &k);
+ if (ih < 0)
+ return ih;
+
+ if (!ih) {
+ if (!idle_hint) {
+ if (k.monotonic < ts.monotonic)
+ ts = k;
+ } else {
+ idle_hint = false;
+ ts = k;
+ }
+ } else if (idle_hint) {
+
+ if (k.monotonic > ts.monotonic)
+ ts = k;
+ }
+ }
+
+ if (t)
+ *t = ts;
+
+ return idle_hint;
+}
+
+bool manager_shall_kill(Manager *m, const char *user) {
+ assert(m);
+ assert(user);
+
+ if (!m->kill_exclude_users && streq(user, "root"))
+ return false;
+
+ if (strv_contains(m->kill_exclude_users, user))
+ return false;
+
+ if (!strv_isempty(m->kill_only_users))
+ return strv_contains(m->kill_only_users, user);
+
+ return m->kill_user_processes;
+}
+
+int config_parse_n_autovts(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ unsigned *n = data;
+ unsigned o;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou(rvalue, &o);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse number of autovts, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (o > 15) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "A maximum of 15 autovts are supported, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *n = o;
+ return 0;
+}
+
+static int vt_is_busy(unsigned vtnr) {
+ struct vt_stat vt_stat;
+ int r = 0;
+ _cleanup_close_ int fd;
+
+ assert(vtnr >= 1);
+
+ /* VT_GETSTATE "cannot return state for more than 16 VTs, since v_state is short" */
+ assert(vtnr <= 15);
+
+ /* We explicitly open /dev/tty1 here instead of /dev/tty0. If
+ * we'd open the latter we'd open the foreground tty which
+ * hence would be unconditionally busy. By opening /dev/tty1
+ * we avoid this. Since tty1 is special and needs to be an
+ * explicitly loaded getty or DM this is safe. */
+
+ fd = open_terminal("/dev/tty1", O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (ioctl(fd, VT_GETSTATE, &vt_stat) < 0)
+ r = -errno;
+ else
+ r = !!(vt_stat.v_state & (1 << vtnr));
+
+ return r;
+}
+
+int manager_spawn_autovt(Manager *m, unsigned vtnr) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char name[sizeof("autovt@tty.service") + DECIMAL_STR_MAX(unsigned)];
+ int r;
+
+ assert(m);
+ assert(vtnr >= 1);
+
+ if (vtnr > m->n_autovts &&
+ vtnr != m->reserve_vt)
+ return 0;
+
+ if (vtnr != m->reserve_vt) {
+ /* If this is the reserved TTY, we'll start the getty
+ * on it in any case, but otherwise only if it is not
+ * busy. */
+
+ r = vt_is_busy(vtnr);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ return -EBUSY;
+ }
+
+ snprintf(name, sizeof(name), "autovt@tty%u.service", vtnr);
+ r = sd_bus_call_method(
+ m->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnit",
+ &error,
+ NULL,
+ "ss", name, "fail");
+ if (r < 0)
+ return log_error_errno(r, "Failed to start %s: %s", name, bus_error_message(&error, r));
+
+ return 0;
+}
+
+bool manager_is_lid_closed(Manager *m) {
+ Iterator i;
+ Button *b;
+
+ HASHMAP_FOREACH(b, m->buttons, i)
+ if (b->lid_closed)
+ return true;
+
+ return false;
+}
+
+static bool manager_is_docked(Manager *m) {
+ Iterator i;
+ Button *b;
+
+ HASHMAP_FOREACH(b, m->buttons, i)
+ if (b->docked)
+ return true;
+
+ return false;
+}
+
+static int manager_count_external_displays(Manager *m) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r, n = 0;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(e, "drm", true);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ const char *status, *enabled, *dash, *nn, *subsys;
+ sd_device *p;
+
+ if (sd_device_get_parent(d, &p) < 0)
+ continue;
+
+ /* If the parent shares the same subsystem as the
+ * device we are looking at then it is a connector,
+ * which is what we are interested in. */
+ if (sd_device_get_subsystem(p, &subsys) < 0 || !streq(subsys, "drm"))
+ continue;
+
+ if (sd_device_get_sysname(d, &nn) < 0)
+ continue;
+
+ /* Ignore internal displays: the type is encoded in the sysfs name, as the second dash separated item
+ * (the first is the card name, the last the connector number). We implement a blacklist of external
+ * displays here, rather than a whitelist of internal ones, to ensure we don't block suspends too
+ * eagerly. */
+ dash = strchr(nn, '-');
+ if (!dash)
+ continue;
+
+ dash++;
+ if (!STARTSWITH_SET(dash,
+ "VGA-", "DVI-I-", "DVI-D-", "DVI-A-"
+ "Composite-", "SVIDEO-", "Component-",
+ "DIN-", "DP-", "HDMI-A-", "HDMI-B-", "TV-"))
+ continue;
+
+ /* Ignore ports that are not enabled */
+ if (sd_device_get_sysattr_value(d, "enabled", &enabled) < 0 || !streq(enabled, "enabled"))
+ continue;
+
+ /* We count any connector which is not explicitly
+ * "disconnected" as connected. */
+ if (sd_device_get_sysattr_value(d, "status", &status) < 0 || !streq(status, "disconnected"))
+ n++;
+ }
+
+ return n;
+}
+
+bool manager_is_docked_or_external_displays(Manager *m) {
+ int n;
+
+ /* If we are docked don't react to lid closing */
+ if (manager_is_docked(m)) {
+ log_debug("System is docked.");
+ return true;
+ }
+
+ /* If we have more than one display connected,
+ * assume that we are docked. */
+ n = manager_count_external_displays(m);
+ if (n < 0)
+ log_warning_errno(n, "Display counting failed: %m");
+ else if (n >= 1) {
+ log_debug("External (%i) displays connected.", n);
+ return true;
+ }
+
+ return false;
+}
+
+bool manager_is_on_external_power(void) {
+ int r;
+
+ /* For now we only check for AC power, but 'external power' can apply to anything that isn't an internal
+ * battery */
+ r = on_ac_power();
+ if (r < 0)
+ log_warning_errno(r, "Failed to read AC power status: %m");
+
+ return r != 0; /* Treat failure as 'on AC' */
+}
+
+bool manager_all_buttons_ignored(Manager *m) {
+ assert(m);
+
+ if (m->handle_power_key != HANDLE_IGNORE)
+ return false;
+ if (m->handle_suspend_key != HANDLE_IGNORE)
+ return false;
+ if (m->handle_hibernate_key != HANDLE_IGNORE)
+ return false;
+ if (m->handle_lid_switch != HANDLE_IGNORE)
+ return false;
+ if (m->handle_lid_switch_ep != _HANDLE_ACTION_INVALID &&
+ m->handle_lid_switch_ep != HANDLE_IGNORE)
+ return false;
+ if (m->handle_lid_switch_docked != HANDLE_IGNORE)
+ return false;
+
+ return true;
+}
+
+int manager_read_utmp(Manager *m) {
+#if ENABLE_UTMP
+ int r;
+
+ assert(m);
+
+ if (utmpxname(_PATH_UTMPX) < 0)
+ return log_error_errno(errno, "Failed to set utmp path to " _PATH_UTMPX ": %m");
+
+ setutxent();
+
+ for (;;) {
+ _cleanup_free_ char *t = NULL;
+ struct utmpx *u;
+ const char *c;
+ Session *s;
+
+ errno = 0;
+ u = getutxent();
+ if (!u) {
+ if (errno != 0)
+ log_warning_errno(errno, "Failed to read " _PATH_UTMPX ", ignoring: %m");
+ r = 0;
+ break;
+ }
+
+ if (u->ut_type != USER_PROCESS)
+ continue;
+
+ if (!pid_is_valid(u->ut_pid))
+ continue;
+
+ t = strndup(u->ut_line, sizeof(u->ut_line));
+ if (!t) {
+ r = log_oom();
+ break;
+ }
+
+ c = path_startswith(t, "/dev/");
+ if (c) {
+ r = free_and_strdup(&t, c);
+ if (r < 0) {
+ log_oom();
+ break;
+ }
+ }
+
+ if (isempty(t))
+ continue;
+
+ s = hashmap_get(m->sessions_by_leader, PID_TO_PTR(u->ut_pid));
+ if (!s)
+ continue;
+
+ if (s->tty_validity == TTY_FROM_UTMP && !streq_ptr(s->tty, t)) {
+ /* This may happen on multiplexed SSH connection (i.e. 'SSH connection sharing'). In
+ * this case PAM and utmp sessions don't match. In such a case let's invalidate the TTY
+ * information and never acquire it again. */
+
+ s->tty = mfree(s->tty);
+ s->tty_validity = TTY_UTMP_INCONSISTENT;
+ log_debug("Session '%s' has inconsistent TTY information, dropping TTY information.", s->id);
+ continue;
+ }
+
+ /* Never override what we figured out once */
+ if (s->tty || s->tty_validity >= 0)
+ continue;
+
+ s->tty = TAKE_PTR(t);
+ s->tty_validity = TTY_FROM_UTMP;
+ log_debug("Acquired TTY information '%s' from utmp for session '%s'.", s->tty, s->id);
+ }
+
+ endutxent();
+ return r;
+#else
+ return 0;
+#endif
+}
+
+#if ENABLE_UTMP
+static int manager_dispatch_utmp(sd_event_source *s, const struct inotify_event *event, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+
+ /* If there's indication the file itself might have been removed or became otherwise unavailable, then let's
+ * reestablish the watch on whatever there's now. */
+ if ((event->mask & (IN_ATTRIB|IN_DELETE_SELF|IN_MOVE_SELF|IN_Q_OVERFLOW|IN_UNMOUNT)) != 0)
+ manager_connect_utmp(m);
+
+ (void) manager_read_utmp(m);
+ return 0;
+}
+#endif
+
+void manager_connect_utmp(Manager *m) {
+#if ENABLE_UTMP
+ sd_event_source *s = NULL;
+ int r;
+
+ assert(m);
+
+ /* Watch utmp for changes via inotify. We do this to deal with tools such as ssh, which will register the PAM
+ * session early, and acquire a TTY only much later for the connection. Thus during PAM the TTY won't be known
+ * yet. ssh will register itself with utmp when it finally acquired the TTY. Hence, let's make use of this, and
+ * watch utmp for the TTY asynchronously. We use the PAM session's leader PID as key, to find the right entry.
+ *
+ * Yes, relying on utmp is pretty ugly, but it's good enough for informational purposes, as well as idle
+ * detection (which, for tty sessions, relies on the TTY used) */
+
+ r = sd_event_add_inotify(m->event, &s, _PATH_UTMPX, IN_MODIFY|IN_MOVE_SELF|IN_DELETE_SELF|IN_ATTRIB, manager_dispatch_utmp, m);
+ if (r < 0)
+ log_full_errno(r == -ENOENT ? LOG_DEBUG: LOG_WARNING, r, "Failed to create inotify watch on " _PATH_UTMPX ", ignoring: %m");
+ else {
+ r = sd_event_source_set_priority(s, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ log_warning_errno(r, "Failed to adjust utmp event source priority, ignoring: %m");
+
+ (void) sd_event_source_set_description(s, "utmp");
+ }
+
+ sd_event_source_unref(m->utmp_event_source);
+ m->utmp_event_source = s;
+#endif
+}
+
+void manager_reconnect_utmp(Manager *m) {
+#if ENABLE_UTMP
+ assert(m);
+
+ if (m->utmp_event_source)
+ return;
+
+ manager_connect_utmp(m);
+#endif
+}
diff --git a/src/login/logind-dbus.c b/src/login/logind-dbus.c
new file mode 100644
index 0000000..8ab498f
--- /dev/null
+++ b/src/login/logind-dbus.c
@@ -0,0 +1,3215 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <pwd.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "efivars.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio-label.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "logind.h"
+#include "missing_capability.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "sleep-config.h"
+#include "special.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "utmp-wtmp.h"
+
+static int get_sender_session(Manager *m, sd_bus_message *message, sd_bus_error *error, Session **ret) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ const char *name;
+ Session *session;
+ int r;
+
+ /* Get client login session. This is not what you are looking for these days,
+ * as apps may instead belong to a user service unit. This includes terminal
+ * emulators and hence command-line apps. */
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_SESSION|SD_BUS_CREDS_AUGMENT, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_session(creds, &name);
+ if (r == -ENXIO)
+ goto err_no_session;
+ if (r < 0)
+ return r;
+
+ session = hashmap_get(m->sessions, name);
+ if (!session)
+ goto err_no_session;
+
+ *ret = session;
+ return 0;
+
+err_no_session:
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SESSION_FOR_PID,
+ "Caller does not belong to any known session");
+}
+
+int manager_get_session_from_creds(Manager *m, sd_bus_message *message, const char *name, sd_bus_error *error, Session **ret) {
+ Session *session;
+
+ assert(m);
+ assert(message);
+ assert(ret);
+
+ if (isempty(name))
+ return get_sender_session(m, message, error, ret);
+
+ session = hashmap_get(m->sessions, name);
+ if (!session)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_SESSION, "No session '%s' known", name);
+
+ *ret = session;
+ return 0;
+}
+
+static int get_sender_user(Manager *m, sd_bus_message *message, sd_bus_error *error, User **ret) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ uid_t uid;
+ User *user;
+ int r;
+
+ /* Note that we get the owner UID of the session, not the actual client UID here! */
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_OWNER_UID|SD_BUS_CREDS_AUGMENT, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_owner_uid(creds, &uid);
+ if (r == -ENXIO)
+ goto err_no_user;
+ if (r < 0)
+ return r;
+
+ user = hashmap_get(m->users, UID_TO_PTR(uid));
+ if (!user)
+ goto err_no_user;
+
+ *ret = user;
+ return 0;
+
+err_no_user:
+ return sd_bus_error_setf(error, BUS_ERROR_NO_USER_FOR_PID, "Caller does not belong to any logged in user or lingering user");
+}
+
+int manager_get_user_from_creds(Manager *m, sd_bus_message *message, uid_t uid, sd_bus_error *error, User **ret) {
+ User *user;
+
+ assert(m);
+ assert(message);
+ assert(ret);
+
+ if (!uid_is_valid(uid))
+ return get_sender_user(m, message, error, ret);
+
+ user = hashmap_get(m->users, UID_TO_PTR(uid));
+ if (!user)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_USER, "User ID "UID_FMT" is not logged in or lingering", uid);
+
+ *ret = user;
+ return 0;
+}
+
+int manager_get_seat_from_creds(Manager *m, sd_bus_message *message, const char *name, sd_bus_error *error, Seat **ret) {
+ Seat *seat;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(ret);
+
+ if (isempty(name)) {
+ Session *session;
+
+ r = manager_get_session_from_creds(m, message, NULL, error, &session);
+ if (r < 0)
+ return r;
+
+ seat = session->seat;
+ if (!seat)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_SEAT, "Session has no seat.");
+ } else {
+ seat = hashmap_get(m->seats, name);
+ if (!seat)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_SEAT, "No seat '%s' known", name);
+ }
+
+ *ret = seat;
+ return 0;
+}
+
+static int property_get_idle_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ return sd_bus_message_append(reply, "b", manager_get_idle_hint(m, NULL) > 0);
+}
+
+static int property_get_idle_since_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ dual_timestamp t = DUAL_TIMESTAMP_NULL;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ manager_get_idle_hint(m, &t);
+
+ return sd_bus_message_append(reply, "t", streq(property, "IdleSinceHint") ? t.realtime : t.monotonic);
+}
+
+static int property_get_inhibited(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ InhibitWhat w;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ w = manager_inhibit_what(m, streq(property, "BlockInhibited") ? INHIBIT_BLOCK : INHIBIT_DELAY);
+
+ return sd_bus_message_append(reply, "s", inhibit_what_to_string(w));
+}
+
+static int property_get_preparing(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ bool b;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ if (streq(property, "PreparingForShutdown"))
+ b = m->action_what & INHIBIT_SHUTDOWN;
+ else
+ b = m->action_what & INHIBIT_SLEEP;
+
+ return sd_bus_message_append(reply, "b", b);
+}
+
+static int property_get_scheduled_shutdown(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ r = sd_bus_message_open_container(reply, 'r', "st");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "st", m->scheduled_shutdown_type, m->scheduled_shutdown_timeout);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_handle_action, handle_action, HandleAction);
+static BUS_DEFINE_PROPERTY_GET(property_get_docked, "b", Manager, manager_is_docked_or_external_displays);
+static BUS_DEFINE_PROPERTY_GET(property_get_lid_closed, "b", Manager, manager_is_lid_closed);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_on_external_power, "b", manager_is_on_external_power);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_compat_user_tasks_max, "t", CGROUP_LIMIT_MAX);
+static BUS_DEFINE_PROPERTY_GET_REF(property_get_hashmap_size, "t", Hashmap *, (uint64_t) hashmap_size);
+
+static int method_get_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ const char *name;
+ Session *session;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ p = session_bus_path(session);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+/* Get login session of a process. This is not what you are looking for these days,
+ * as apps may instead belong to a user service unit. This includes terminal
+ * emulators and hence command-line apps. */
+static int method_get_session_by_pid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Session *session = NULL;
+ Manager *m = userdata;
+ pid_t pid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+
+ r = sd_bus_message_read(message, "u", &pid);
+ if (r < 0)
+ return r;
+ if (pid < 0)
+ return -EINVAL;
+
+ if (pid == 0) {
+ r = manager_get_session_from_creds(m, message, NULL, error, &session);
+ if (r < 0)
+ return r;
+ } else {
+ r = manager_get_session_by_pid(m, pid, &session);
+ if (r < 0)
+ return r;
+
+ if (!session)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SESSION_FOR_PID, "PID "PID_FMT" does not belong to any known session", pid);
+ }
+
+ p = session_bus_path(session);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_get_user(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ uint32_t uid;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "u", &uid);
+ if (r < 0)
+ return r;
+
+ r = manager_get_user_from_creds(m, message, uid, error, &user);
+ if (r < 0)
+ return r;
+
+ p = user_bus_path(user);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_get_user_by_pid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ User *user = NULL;
+ pid_t pid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+
+ r = sd_bus_message_read(message, "u", &pid);
+ if (r < 0)
+ return r;
+ if (pid < 0)
+ return -EINVAL;
+
+ if (pid == 0) {
+ r = manager_get_user_from_creds(m, message, UID_INVALID, error, &user);
+ if (r < 0)
+ return r;
+ } else {
+ r = manager_get_user_by_pid(m, pid, &user);
+ if (r < 0)
+ return r;
+ if (!user)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_USER_FOR_PID,
+ "PID "PID_FMT" does not belong to any logged in user or lingering user",
+ pid);
+ }
+
+ p = user_bus_path(user);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_get_seat(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ const char *name;
+ Seat *seat;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_seat_from_creds(m, message, name, error, &seat);
+ if (r < 0)
+ return r;
+
+ p = seat_bus_path(seat);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_list_sessions(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Session *session;
+ Iterator i;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(susso)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(session, m->sessions, i) {
+ _cleanup_free_ char *p = NULL;
+
+ p = session_bus_path(session);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(susso)",
+ session->id,
+ (uint32_t) session->user->uid,
+ session->user->name,
+ session->seat ? session->seat->id : "",
+ p);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_list_users(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ User *user;
+ Iterator i;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(uso)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(user, m->users, i) {
+ _cleanup_free_ char *p = NULL;
+
+ p = user_bus_path(user);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(uso)",
+ (uint32_t) user->uid,
+ user->name,
+ p);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_list_seats(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Seat *seat;
+ Iterator i;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(so)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(seat, m->seats, i) {
+ _cleanup_free_ char *p = NULL;
+
+ p = seat_bus_path(seat);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(so)", seat->id, p);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_list_inhibitors(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Inhibitor *inhibitor;
+ Iterator i;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssssuu)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(inhibitor, m->inhibitors, i) {
+
+ r = sd_bus_message_append(reply, "(ssssuu)",
+ strempty(inhibit_what_to_string(inhibitor->what)),
+ strempty(inhibitor->who),
+ strempty(inhibitor->why),
+ strempty(inhibit_mode_to_string(inhibitor->mode)),
+ (uint32_t) inhibitor->uid,
+ (uint32_t) inhibitor->pid);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_create_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *service, *type, *class, *cseat, *tty, *display, *remote_user, *remote_host, *desktop;
+ _cleanup_free_ char *id = NULL;
+ Session *session = NULL;
+ uint32_t audit_id = 0;
+ Manager *m = userdata;
+ User *user = NULL;
+ Seat *seat = NULL;
+ pid_t leader;
+ uid_t uid;
+ int remote;
+ uint32_t vtnr = 0;
+ SessionType t;
+ SessionClass c;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+ assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+
+ r = sd_bus_message_read(message, "uusssssussbss", &uid, &leader, &service, &type, &class, &desktop, &cseat, &vtnr, &tty, &display, &remote, &remote_user, &remote_host);
+ if (r < 0)
+ return r;
+
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid UID");
+ if (leader < 0 || leader == 1 || leader == getpid_cached())
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid leader PID");
+
+ if (isempty(type))
+ t = _SESSION_TYPE_INVALID;
+ else {
+ t = session_type_from_string(type);
+ if (t < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid session type %s", type);
+ }
+
+ if (isempty(class))
+ c = _SESSION_CLASS_INVALID;
+ else {
+ c = session_class_from_string(class);
+ if (c < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid session class %s", class);
+ }
+
+ if (isempty(desktop))
+ desktop = NULL;
+ else {
+ if (!string_is_safe(desktop))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid desktop string %s", desktop);
+ }
+
+ if (isempty(cseat))
+ seat = NULL;
+ else {
+ seat = hashmap_get(m->seats, cseat);
+ if (!seat)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_SEAT, "No seat '%s' known", cseat);
+ }
+
+ if (tty_is_vc(tty)) {
+ int v;
+
+ if (!seat)
+ seat = m->seat0;
+ else if (seat != m->seat0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "TTY %s is virtual console but seat %s is not seat0", tty, seat->id);
+
+ v = vtnr_from_tty(tty);
+ if (v <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Cannot determine VT number from virtual console TTY %s", tty);
+
+ if (vtnr == 0)
+ vtnr = (uint32_t) v;
+ else if (vtnr != (uint32_t) v)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Specified TTY and VT number do not match");
+
+ } else if (tty_is_console(tty)) {
+
+ if (!seat)
+ seat = m->seat0;
+ else if (seat != m->seat0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Console TTY specified but seat is not seat0");
+
+ if (vtnr != 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Console TTY specified but VT number is not 0");
+ }
+
+ if (seat) {
+ if (seat_has_vts(seat)) {
+ if (vtnr <= 0 || vtnr > 63)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "VT number out of range");
+ } else {
+ if (vtnr != 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Seat has no VTs but VT number not 0");
+ }
+ }
+
+ if (t == _SESSION_TYPE_INVALID) {
+ if (!isempty(display))
+ t = SESSION_X11;
+ else if (!isempty(tty))
+ t = SESSION_TTY;
+ else
+ t = SESSION_UNSPECIFIED;
+ }
+
+ if (c == _SESSION_CLASS_INVALID) {
+ if (t == SESSION_UNSPECIFIED)
+ c = SESSION_BACKGROUND;
+ else
+ c = SESSION_USER;
+ }
+
+ if (leader == 0) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, (pid_t*) &leader);
+ if (r < 0)
+ return r;
+ }
+
+ /* Check if we are already in a logind session. Or if we are in user@.service which is a special PAM session
+ * that avoids creating a logind session. */
+ r = manager_get_user_by_pid(m, leader, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return sd_bus_error_setf(error, BUS_ERROR_SESSION_BUSY, "Already running in a session or user slice");
+
+ /*
+ * Old gdm and lightdm start the user-session on the same VT as
+ * the greeter session. But they destroy the greeter session
+ * after the user-session and want the user-session to take
+ * over the VT. We need to support this for
+ * backwards-compatibility, so make sure we allow new sessions
+ * on a VT that a greeter is running on. Furthermore, to allow
+ * re-logins, we have to allow a greeter to take over a used VT for
+ * the exact same reasons.
+ */
+ if (c != SESSION_GREETER &&
+ vtnr > 0 &&
+ vtnr < m->seat0->position_count &&
+ m->seat0->positions[vtnr] &&
+ m->seat0->positions[vtnr]->class != SESSION_GREETER)
+ return sd_bus_error_setf(error, BUS_ERROR_SESSION_BUSY, "Already occupied by a session");
+
+ if (hashmap_size(m->sessions) >= m->sessions_max)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Maximum number of sessions (%" PRIu64 ") reached, refusing further sessions.", m->sessions_max);
+
+ (void) audit_session_from_pid(leader, &audit_id);
+ if (audit_session_is_valid(audit_id)) {
+ /* Keep our session IDs and the audit session IDs in sync */
+
+ if (asprintf(&id, "%"PRIu32, audit_id) < 0)
+ return -ENOMEM;
+
+ /* Wut? There's already a session by this name and we
+ * didn't find it above? Weird, then let's not trust
+ * the audit data and let's better register a new
+ * ID */
+ if (hashmap_get(m->sessions, id)) {
+ log_warning("Existing logind session ID %s used by new audit session, ignoring.", id);
+ audit_id = AUDIT_SESSION_INVALID;
+ id = mfree(id);
+ }
+ }
+
+ if (!id) {
+ do {
+ id = mfree(id);
+
+ if (asprintf(&id, "c%lu", ++m->session_counter) < 0)
+ return -ENOMEM;
+
+ } while (hashmap_get(m->sessions, id));
+ }
+
+ /* If we are not watching utmp aleady, try again */
+ manager_reconnect_utmp(m);
+
+ r = manager_add_user_by_uid(m, uid, &user);
+ if (r < 0)
+ goto fail;
+
+ r = manager_add_session(m, id, &session);
+ if (r < 0)
+ goto fail;
+
+ session_set_user(session, user);
+ session_set_leader(session, leader);
+
+ session->type = t;
+ session->class = c;
+ session->remote = remote;
+ session->vtnr = vtnr;
+
+ if (!isempty(tty)) {
+ session->tty = strdup(tty);
+ if (!session->tty) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ session->tty_validity = TTY_FROM_PAM;
+ }
+
+ if (!isempty(display)) {
+ session->display = strdup(display);
+ if (!session->display) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!isempty(remote_user)) {
+ session->remote_user = strdup(remote_user);
+ if (!session->remote_user) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!isempty(remote_host)) {
+ session->remote_host = strdup(remote_host);
+ if (!session->remote_host) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!isempty(service)) {
+ session->service = strdup(service);
+ if (!session->service) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!isempty(desktop)) {
+ session->desktop = strdup(desktop);
+ if (!session->desktop) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (seat) {
+ r = seat_attach_session(seat, session);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = sd_bus_message_enter_container(message, 'a', "(sv)");
+ if (r < 0)
+ goto fail;
+
+ r = session_start(session, message, error);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ goto fail;
+
+ session->create_message = sd_bus_message_ref(message);
+
+ /* Now, let's wait until the slice unit and stuff got created. We send the reply back from
+ * session_send_create_reply(). */
+
+ return 1;
+
+fail:
+ if (session)
+ session_add_to_gc_queue(session);
+
+ if (user)
+ user_add_to_gc_queue(user);
+
+ return r;
+}
+
+static int method_release_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Session *session;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ r = session_release(session);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_activate_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Session *session;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ return bus_session_method_activate(message, session, error);
+}
+
+static int method_activate_session_on_seat(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *session_name, *seat_name;
+ Manager *m = userdata;
+ Session *session;
+ Seat *seat;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Same as ActivateSession() but refuses to work if
+ * the seat doesn't match */
+
+ r = sd_bus_message_read(message, "ss", &session_name, &seat_name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, session_name, error, &session);
+ if (r < 0)
+ return r;
+
+ r = manager_get_seat_from_creds(m, message, seat_name, error, &seat);
+ if (r < 0)
+ return r;
+
+ if (session->seat != seat)
+ return sd_bus_error_setf(error, BUS_ERROR_SESSION_NOT_ON_SEAT, "Session %s not on seat %s", session_name, seat_name);
+
+ r = session_activate(session);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_lock_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Session *session;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ return bus_session_method_lock(message, session, error);
+}
+
+static int method_lock_sessions(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.lock-sessions",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = session_send_lock_all(m, streq(sd_bus_message_get_member(message), "LockSessions"));
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_kill_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *name;
+ Manager *m = userdata;
+ Session *session;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ return bus_session_method_kill(message, session, error);
+}
+
+static int method_kill_user(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint32_t uid;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "u", &uid);
+ if (r < 0)
+ return r;
+
+ r = manager_get_user_from_creds(m, message, uid, error, &user);
+ if (r < 0)
+ return r;
+
+ return bus_user_method_kill(message, user, error);
+}
+
+static int method_terminate_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Session *session;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ return bus_session_method_terminate(message, session, error);
+}
+
+static int method_terminate_user(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint32_t uid;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "u", &uid);
+ if (r < 0)
+ return r;
+
+ r = manager_get_user_from_creds(m, message, uid, error, &user);
+ if (r < 0)
+ return r;
+
+ return bus_user_method_terminate(message, user, error);
+}
+
+static int method_terminate_seat(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Seat *seat;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_seat_from_creds(m, message, name, error, &seat);
+ if (r < 0)
+ return r;
+
+ return bus_seat_method_terminate(message, seat, error);
+}
+
+static int method_set_user_linger(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ _cleanup_free_ char *cc = NULL;
+ Manager *m = userdata;
+ int r, b, interactive;
+ struct passwd *pw;
+ const char *path;
+ uint32_t uid, auth_uid;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "ubb", &uid, &b, &interactive);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID |
+ SD_BUS_CREDS_OWNER_UID|SD_BUS_CREDS_AUGMENT, &creds);
+ if (r < 0)
+ return r;
+
+ if (!uid_is_valid(uid)) {
+ /* Note that we get the owner UID of the session or user unit,
+ * not the actual client UID here! */
+ r = sd_bus_creds_get_owner_uid(creds, &uid);
+ if (r < 0)
+ return r;
+ }
+
+ /* owner_uid is racy, so for authorization we must use euid */
+ r = sd_bus_creds_get_euid(creds, &auth_uid);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ pw = getpwuid(uid);
+ if (!pw)
+ return errno > 0 ? -errno : -ENOENT;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ uid == auth_uid ? "org.freedesktop.login1.set-self-linger" :
+ "org.freedesktop.login1.set-user-linger",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ mkdir_p_label("/var/lib/systemd", 0755);
+
+ r = mkdir_safe_label("/var/lib/systemd/linger", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ return r;
+
+ cc = cescape(pw->pw_name);
+ if (!cc)
+ return -ENOMEM;
+
+ path = strjoina("/var/lib/systemd/linger/", cc);
+ if (b) {
+ User *u;
+
+ r = touch(path);
+ if (r < 0)
+ return r;
+
+ if (manager_add_user_by_uid(m, uid, &u) >= 0)
+ user_start(u);
+
+ } else {
+ User *u;
+
+ r = unlink(path);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ u = hashmap_get(m->users, UID_TO_PTR(uid));
+ if (u)
+ user_add_to_gc_queue(u);
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int trigger_device(Manager *m, sd_device *d) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ int r;
+
+ assert(m);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ if (d) {
+ r = sd_device_enumerator_add_match_parent(e, d);
+ if (r < 0)
+ return r;
+ }
+
+ FOREACH_DEVICE(e, d) {
+ _cleanup_free_ char *t = NULL;
+ const char *p;
+
+ r = sd_device_get_syspath(d, &p);
+ if (r < 0)
+ return r;
+
+ t = strappend(p, "/uevent");
+ if (!t)
+ return -ENOMEM;
+
+ (void) write_string_file(t, "change", WRITE_STRING_FILE_DISABLE_BUFFER);
+ }
+
+ return 0;
+}
+
+static int attach_device(Manager *m, const char *seat, const char *sysfs) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_free_ char *rule = NULL, *file = NULL;
+ const char *id_for_seat;
+ int r;
+
+ assert(m);
+ assert(seat);
+ assert(sysfs);
+
+ r = sd_device_new_from_syspath(&d, sysfs);
+ if (r < 0)
+ return r;
+
+ if (sd_device_has_tag(d, "seat") <= 0)
+ return -ENODEV;
+
+ if (sd_device_get_property_value(d, "ID_FOR_SEAT", &id_for_seat) < 0)
+ return -ENODEV;
+
+ if (asprintf(&file, "/etc/udev/rules.d/72-seat-%s.rules", id_for_seat) < 0)
+ return -ENOMEM;
+
+ if (asprintf(&rule, "TAG==\"seat\", ENV{ID_FOR_SEAT}==\"%s\", ENV{ID_SEAT}=\"%s\"", id_for_seat, seat) < 0)
+ return -ENOMEM;
+
+ (void) mkdir_p_label("/etc/udev/rules.d", 0755);
+ r = write_string_file_atomic_label(file, rule);
+ if (r < 0)
+ return r;
+
+ return trigger_device(m, d);
+}
+
+static int flush_devices(Manager *m) {
+ _cleanup_closedir_ DIR *d;
+
+ assert(m);
+
+ d = opendir("/etc/udev/rules.d");
+ if (!d) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open /etc/udev/rules.d: %m");
+ } else {
+ struct dirent *de;
+
+ FOREACH_DIRENT_ALL(de, d, break) {
+ if (!dirent_is_file(de))
+ continue;
+
+ if (!startswith(de->d_name, "72-seat-"))
+ continue;
+
+ if (!endswith(de->d_name, ".rules"))
+ continue;
+
+ if (unlinkat(dirfd(d), de->d_name, 0) < 0)
+ log_warning_errno(errno, "Failed to unlink %s: %m", de->d_name);
+ }
+ }
+
+ return trigger_device(m, NULL);
+}
+
+static int method_attach_device(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *sysfs, *seat;
+ Manager *m = userdata;
+ int interactive, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "ssb", &seat, &sysfs, &interactive);
+ if (r < 0)
+ return r;
+
+ if (!path_startswith(sysfs, "/sys"))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not in /sys", sysfs);
+
+ if (!seat_name_is_valid(seat))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Seat %s is not valid", seat);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.attach-device",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = attach_device(m, seat, sysfs);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_flush_devices(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int interactive, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &interactive);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.flush-devices",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = flush_devices(m);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int have_multiple_sessions(
+ Manager *m,
+ uid_t uid) {
+
+ Session *session;
+ Iterator i;
+
+ assert(m);
+
+ /* Check for other users' sessions. Greeter sessions do not
+ * count, and non-login sessions do not count either. */
+ HASHMAP_FOREACH(session, m->sessions, i)
+ if (session->class == SESSION_USER &&
+ session->user->uid != uid)
+ return true;
+
+ return false;
+}
+
+static int bus_manager_log_shutdown(
+ Manager *m,
+ const char *unit_name) {
+
+ const char *p, *q;
+
+ assert(m);
+ assert(unit_name);
+
+ if (streq(unit_name, SPECIAL_POWEROFF_TARGET)) {
+ p = "MESSAGE=System is powering down";
+ q = "SHUTDOWN=power-off";
+ } else if (streq(unit_name, SPECIAL_REBOOT_TARGET)) {
+ p = "MESSAGE=System is rebooting";
+ q = "SHUTDOWN=reboot";
+ } else if (streq(unit_name, SPECIAL_HALT_TARGET)) {
+ p = "MESSAGE=System is halting";
+ q = "SHUTDOWN=halt";
+ } else if (streq(unit_name, SPECIAL_KEXEC_TARGET)) {
+ p = "MESSAGE=System is rebooting with kexec";
+ q = "SHUTDOWN=kexec";
+ } else {
+ p = "MESSAGE=System is shutting down";
+ q = NULL;
+ }
+
+ if (isempty(m->wall_message))
+ p = strjoina(p, ".");
+ else
+ p = strjoina(p, " (", m->wall_message, ").");
+
+ return log_struct(LOG_NOTICE,
+ "MESSAGE_ID=" SD_MESSAGE_SHUTDOWN_STR,
+ p,
+ q);
+}
+
+static int lid_switch_ignore_handler(sd_event_source *e, uint64_t usec, void *userdata) {
+ Manager *m = userdata;
+
+ assert(e);
+ assert(m);
+
+ m->lid_switch_ignore_event_source = sd_event_source_unref(m->lid_switch_ignore_event_source);
+ return 0;
+}
+
+int manager_set_lid_switch_ignore(Manager *m, usec_t until) {
+ int r;
+
+ assert(m);
+
+ if (until <= now(CLOCK_MONOTONIC))
+ return 0;
+
+ /* We want to ignore the lid switch for a while after each
+ * suspend, and after boot-up. Hence let's install a timer for
+ * this. As long as the event source exists we ignore the lid
+ * switch. */
+
+ if (m->lid_switch_ignore_event_source) {
+ usec_t u;
+
+ r = sd_event_source_get_time(m->lid_switch_ignore_event_source, &u);
+ if (r < 0)
+ return r;
+
+ if (until <= u)
+ return 0;
+
+ r = sd_event_source_set_time(m->lid_switch_ignore_event_source, until);
+ } else
+ r = sd_event_add_time(
+ m->event,
+ &m->lid_switch_ignore_event_source,
+ CLOCK_MONOTONIC,
+ until, 0,
+ lid_switch_ignore_handler, m);
+
+ return r;
+}
+
+static int send_prepare_for(Manager *m, InhibitWhat w, bool _active) {
+ int active = _active;
+
+ assert(m);
+ assert(IN_SET(w, INHIBIT_SHUTDOWN, INHIBIT_SLEEP));
+
+ return sd_bus_emit_signal(m->bus,
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ w == INHIBIT_SHUTDOWN ? "PrepareForShutdown" : "PrepareForSleep",
+ "b",
+ active);
+}
+
+static int execute_shutdown_or_sleep(
+ Manager *m,
+ InhibitWhat w,
+ const char *unit_name,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *p;
+ int r;
+
+ assert(m);
+ assert(w > 0);
+ assert(w < _INHIBIT_WHAT_MAX);
+ assert(unit_name);
+
+ if (w == INHIBIT_SHUTDOWN)
+ bus_manager_log_shutdown(m, unit_name);
+
+ r = sd_bus_call_method(
+ m->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnit",
+ error,
+ &reply,
+ "ss", unit_name, "replace-irreversibly");
+ if (r < 0)
+ goto error;
+
+ r = sd_bus_message_read(reply, "o", &p);
+ if (r < 0)
+ goto error;
+
+ r = free_and_strdup(&m->action_job, p);
+ if (r < 0)
+ goto error;
+
+ m->action_unit = unit_name;
+ m->action_what = w;
+
+ /* Make sure the lid switch is ignored for a while */
+ manager_set_lid_switch_ignore(m, now(CLOCK_MONOTONIC) + m->holdoff_timeout_usec);
+
+ return 0;
+
+error:
+ /* Tell people that they now may take a lock again */
+ (void) send_prepare_for(m, w, false);
+
+ return r;
+}
+
+int manager_dispatch_delayed(Manager *manager, bool timeout) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Inhibitor *offending = NULL;
+ int r;
+
+ assert(manager);
+
+ if (manager->action_what == 0 || manager->action_job)
+ return 0;
+
+ if (manager_is_inhibited(manager, manager->action_what, INHIBIT_DELAY, NULL, false, false, 0, &offending)) {
+ _cleanup_free_ char *comm = NULL, *u = NULL;
+
+ if (!timeout)
+ return 0;
+
+ (void) get_process_comm(offending->pid, &comm);
+ u = uid_to_name(offending->uid);
+
+ log_notice("Delay lock is active (UID "UID_FMT"/%s, PID "PID_FMT"/%s) but inhibitor timeout is reached.",
+ offending->uid, strna(u),
+ offending->pid, strna(comm));
+ }
+
+ /* Actually do the operation */
+ r = execute_shutdown_or_sleep(manager, manager->action_what, manager->action_unit, &error);
+ if (r < 0) {
+ log_warning("Error during inhibitor-delayed operation (already returned success to client): %s",
+ bus_error_message(&error, r));
+
+ manager->action_unit = NULL;
+ manager->action_what = 0;
+ return r;
+ }
+
+ return 1;
+}
+
+static int manager_inhibit_timeout_handler(
+ sd_event_source *s,
+ uint64_t usec,
+ void *userdata) {
+
+ Manager *manager = userdata;
+ int r;
+
+ assert(manager);
+ assert(manager->inhibit_timeout_source == s);
+
+ r = manager_dispatch_delayed(manager, true);
+ return (r < 0) ? r : 0;
+}
+
+static int delay_shutdown_or_sleep(
+ Manager *m,
+ InhibitWhat w,
+ const char *unit_name) {
+
+ int r;
+ usec_t timeout_val;
+
+ assert(m);
+ assert(w >= 0);
+ assert(w < _INHIBIT_WHAT_MAX);
+ assert(unit_name);
+
+ timeout_val = now(CLOCK_MONOTONIC) + m->inhibit_delay_max;
+
+ if (m->inhibit_timeout_source) {
+ r = sd_event_source_set_time(m->inhibit_timeout_source, timeout_val);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_time() failed: %m");
+
+ r = sd_event_source_set_enabled(m->inhibit_timeout_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_enabled() failed: %m");
+ } else {
+ r = sd_event_add_time(m->event, &m->inhibit_timeout_source, CLOCK_MONOTONIC,
+ timeout_val, 0, manager_inhibit_timeout_handler, m);
+ if (r < 0)
+ return r;
+ }
+
+ m->action_unit = unit_name;
+ m->action_what = w;
+
+ return 0;
+}
+
+int bus_manager_shutdown_or_sleep_now_or_later(
+ Manager *m,
+ const char *unit_name,
+ InhibitWhat w,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *load_state = NULL;
+ bool delayed;
+ int r;
+
+ assert(m);
+ assert(unit_name);
+ assert(w > 0);
+ assert(w < _INHIBIT_WHAT_MAX);
+ assert(!m->action_job);
+
+ r = unit_load_state(m->bus, unit_name, &load_state);
+ if (r < 0)
+ return r;
+
+ if (!streq(load_state, "loaded"))
+ return log_notice_errno(SYNTHETIC_ERRNO(EACCES),
+ "Unit %s is %s, refusing operation.",
+ unit_name, load_state);
+
+ /* Tell everybody to prepare for shutdown/sleep */
+ (void) send_prepare_for(m, w, true);
+
+ delayed =
+ m->inhibit_delay_max > 0 &&
+ manager_is_inhibited(m, w, INHIBIT_DELAY, NULL, false, false, 0, NULL);
+
+ if (delayed)
+ /* Shutdown is delayed, keep in mind what we
+ * want to do, and start a timeout */
+ r = delay_shutdown_or_sleep(m, w, unit_name);
+ else
+ /* Shutdown is not delayed, execute it
+ * immediately */
+ r = execute_shutdown_or_sleep(m, w, unit_name, error);
+
+ return r;
+}
+
+static int verify_shutdown_creds(
+ Manager *m,
+ sd_bus_message *message,
+ InhibitWhat w,
+ bool interactive,
+ const char *action,
+ const char *action_multiple_sessions,
+ const char *action_ignore_inhibit,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ bool multiple_sessions, blocked;
+ uid_t uid;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(w >= 0);
+ assert(w <= _INHIBIT_WHAT_MAX);
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ r = have_multiple_sessions(m, uid);
+ if (r < 0)
+ return r;
+
+ multiple_sessions = r > 0;
+ blocked = manager_is_inhibited(m, w, INHIBIT_BLOCK, NULL, false, true, uid, NULL);
+
+ if (multiple_sessions && action_multiple_sessions) {
+ r = bus_verify_polkit_async(message, CAP_SYS_BOOT, action_multiple_sessions, NULL, interactive, UID_INVALID, &m->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+ }
+
+ if (blocked && action_ignore_inhibit) {
+ r = bus_verify_polkit_async(message, CAP_SYS_BOOT, action_ignore_inhibit, NULL, interactive, UID_INVALID, &m->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+ }
+
+ if (!multiple_sessions && !blocked && action) {
+ r = bus_verify_polkit_async(message, CAP_SYS_BOOT, action, NULL, interactive, UID_INVALID, &m->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+ }
+
+ return 0;
+}
+
+static int method_do_shutdown_or_sleep(
+ Manager *m,
+ sd_bus_message *message,
+ const char *unit_name,
+ InhibitWhat w,
+ const char *action,
+ const char *action_multiple_sessions,
+ const char *action_ignore_inhibit,
+ const char *sleep_verb,
+ sd_bus_error *error) {
+
+ int interactive, r;
+
+ assert(m);
+ assert(message);
+ assert(unit_name);
+ assert(w >= 0);
+ assert(w <= _INHIBIT_WHAT_MAX);
+
+ r = sd_bus_message_read(message, "b", &interactive);
+ if (r < 0)
+ return r;
+
+ /* Don't allow multiple jobs being executed at the same time */
+ if (m->action_what > 0)
+ return sd_bus_error_setf(error, BUS_ERROR_OPERATION_IN_PROGRESS, "There's already a shutdown or sleep operation in progress");
+
+ if (sleep_verb) {
+ r = can_sleep(sleep_verb);
+ if (r == -ENOSPC)
+ return sd_bus_error_set(error, BUS_ERROR_SLEEP_VERB_NOT_SUPPORTED,
+ "Not enough swap space for hibernation");
+ if (r == 0)
+ return sd_bus_error_setf(error, BUS_ERROR_SLEEP_VERB_NOT_SUPPORTED,
+ "Sleep verb \"%s\" not supported", sleep_verb);
+ if (r < 0)
+ return r;
+ }
+
+ r = verify_shutdown_creds(m, message, w, interactive, action, action_multiple_sessions,
+ action_ignore_inhibit, error);
+ if (r != 0)
+ return r;
+
+ r = bus_manager_shutdown_or_sleep_now_or_later(m, unit_name, w, error);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_poweroff(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_POWEROFF_TARGET,
+ INHIBIT_SHUTDOWN,
+ "org.freedesktop.login1.power-off",
+ "org.freedesktop.login1.power-off-multiple-sessions",
+ "org.freedesktop.login1.power-off-ignore-inhibit",
+ NULL,
+ error);
+}
+
+static int method_reboot(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_REBOOT_TARGET,
+ INHIBIT_SHUTDOWN,
+ "org.freedesktop.login1.reboot",
+ "org.freedesktop.login1.reboot-multiple-sessions",
+ "org.freedesktop.login1.reboot-ignore-inhibit",
+ NULL,
+ error);
+}
+
+static int method_halt(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_HALT_TARGET,
+ INHIBIT_SHUTDOWN,
+ "org.freedesktop.login1.halt",
+ "org.freedesktop.login1.halt-multiple-sessions",
+ "org.freedesktop.login1.halt-ignore-inhibit",
+ NULL,
+ error);
+}
+
+static int method_suspend(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_SUSPEND_TARGET,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.suspend",
+ "org.freedesktop.login1.suspend-multiple-sessions",
+ "org.freedesktop.login1.suspend-ignore-inhibit",
+ "suspend",
+ error);
+}
+
+static int method_hibernate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_HIBERNATE_TARGET,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.hibernate",
+ "org.freedesktop.login1.hibernate-multiple-sessions",
+ "org.freedesktop.login1.hibernate-ignore-inhibit",
+ "hibernate",
+ error);
+}
+
+static int method_hybrid_sleep(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_HYBRID_SLEEP_TARGET,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.hibernate",
+ "org.freedesktop.login1.hibernate-multiple-sessions",
+ "org.freedesktop.login1.hibernate-ignore-inhibit",
+ "hybrid-sleep",
+ error);
+}
+
+static int method_suspend_then_hibernate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_SUSPEND_THEN_HIBERNATE_TARGET,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.hibernate",
+ "org.freedesktop.login1.hibernate-multiple-sessions",
+ "org.freedesktop.login1.hibernate-ignore-inhibit",
+ "hybrid-sleep",
+ error);
+}
+
+static int nologin_timeout_handler(
+ sd_event_source *s,
+ uint64_t usec,
+ void *userdata) {
+
+ Manager *m = userdata;
+
+ log_info("Creating /run/nologin, blocking further logins...");
+
+ m->unlink_nologin =
+ create_shutdown_run_nologin_or_warn() >= 0;
+
+ return 0;
+}
+
+static int update_schedule_file(Manager *m) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(m);
+
+ r = mkdir_safe_label("/run/systemd/shutdown", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create shutdown subdirectory: %m");
+
+ r = fopen_temporary("/run/systemd/shutdown/scheduled", &f, &temp_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to save information about scheduled shutdowns: %m");
+
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "USEC="USEC_FMT"\n"
+ "WARN_WALL=%i\n"
+ "MODE=%s\n",
+ m->scheduled_shutdown_timeout,
+ m->enable_wall_messages,
+ m->scheduled_shutdown_type);
+
+ if (!isempty(m->wall_message)) {
+ _cleanup_free_ char *t;
+
+ t = cescape(m->wall_message);
+ if (!t) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "WALL_MESSAGE=%s\n", t);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, "/run/systemd/shutdown/scheduled") < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(temp_path);
+ (void) unlink("/run/systemd/shutdown/scheduled");
+
+ return log_error_errno(r, "Failed to write information about scheduled shutdowns: %m");
+}
+
+static void reset_scheduled_shutdown(Manager *m) {
+ assert(m);
+
+ m->scheduled_shutdown_timeout_source = sd_event_source_unref(m->scheduled_shutdown_timeout_source);
+ m->wall_message_timeout_source = sd_event_source_unref(m->wall_message_timeout_source);
+ m->nologin_timeout_source = sd_event_source_unref(m->nologin_timeout_source);
+
+ m->scheduled_shutdown_type = mfree(m->scheduled_shutdown_type);
+ m->scheduled_shutdown_timeout = 0;
+ m->shutdown_dry_run = false;
+
+ if (m->unlink_nologin) {
+ (void) unlink_or_warn("/run/nologin");
+ m->unlink_nologin = false;
+ }
+
+ (void) unlink("/run/systemd/shutdown/scheduled");
+}
+
+static int manager_scheduled_shutdown_handler(
+ sd_event_source *s,
+ uint64_t usec,
+ void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Manager *m = userdata;
+ const char *target;
+ int r;
+
+ assert(m);
+
+ if (isempty(m->scheduled_shutdown_type))
+ return 0;
+
+ if (streq(m->scheduled_shutdown_type, "poweroff"))
+ target = SPECIAL_POWEROFF_TARGET;
+ else if (streq(m->scheduled_shutdown_type, "reboot"))
+ target = SPECIAL_REBOOT_TARGET;
+ else if (streq(m->scheduled_shutdown_type, "halt"))
+ target = SPECIAL_HALT_TARGET;
+ else
+ assert_not_reached("unexpected shutdown type");
+
+ /* Don't allow multiple jobs being executed at the same time */
+ if (m->action_what > 0) {
+ r = -EALREADY;
+ log_error("Scheduled shutdown to %s failed: shutdown or sleep operation already in progress", target);
+ goto error;
+ }
+
+ if (m->shutdown_dry_run) {
+ /* We do not process delay inhibitors here. Otherwise, we
+ * would have to be considered "in progress" (like the check
+ * above) for some seconds after our admin has seen the final
+ * wall message. */
+
+ bus_manager_log_shutdown(m, target);
+ log_info("Running in dry run, suppressing action.");
+ reset_scheduled_shutdown(m);
+
+ return 0;
+ }
+
+ r = bus_manager_shutdown_or_sleep_now_or_later(m, target, INHIBIT_SHUTDOWN, &error);
+ if (r < 0) {
+ log_error_errno(r, "Scheduled shutdown to %s failed: %m", target);
+ goto error;
+ }
+
+ return 0;
+
+error:
+ reset_scheduled_shutdown(m);
+ return r;
+}
+
+static int method_schedule_shutdown(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ const char *action_multiple_sessions = NULL;
+ const char *action_ignore_inhibit = NULL;
+ const char *action = NULL;
+ uint64_t elapse;
+ char *type;
+ int r;
+ bool dry_run = false;
+
+ assert(m);
+ assert(message);
+
+ r = sd_bus_message_read(message, "st", &type, &elapse);
+ if (r < 0)
+ return r;
+
+ if (startswith(type, "dry-")) {
+ type += 4;
+ dry_run = true;
+ }
+
+ if (streq(type, "poweroff")) {
+ action = "org.freedesktop.login1.power-off";
+ action_multiple_sessions = "org.freedesktop.login1.power-off-multiple-sessions";
+ action_ignore_inhibit = "org.freedesktop.login1.power-off-ignore-inhibit";
+ } else if (streq(type, "reboot")) {
+ action = "org.freedesktop.login1.reboot";
+ action_multiple_sessions = "org.freedesktop.login1.reboot-multiple-sessions";
+ action_ignore_inhibit = "org.freedesktop.login1.reboot-ignore-inhibit";
+ } else if (streq(type, "halt")) {
+ action = "org.freedesktop.login1.halt";
+ action_multiple_sessions = "org.freedesktop.login1.halt-multiple-sessions";
+ action_ignore_inhibit = "org.freedesktop.login1.halt-ignore-inhibit";
+ } else
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unsupported shutdown type");
+
+ r = verify_shutdown_creds(m, message, INHIBIT_SHUTDOWN, false,
+ action, action_multiple_sessions, action_ignore_inhibit, error);
+ if (r != 0)
+ return r;
+
+ if (m->scheduled_shutdown_timeout_source) {
+ r = sd_event_source_set_time(m->scheduled_shutdown_timeout_source, elapse);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_time() failed: %m");
+
+ r = sd_event_source_set_enabled(m->scheduled_shutdown_timeout_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_enabled() failed: %m");
+ } else {
+ r = sd_event_add_time(m->event, &m->scheduled_shutdown_timeout_source,
+ CLOCK_REALTIME, elapse, 0, manager_scheduled_shutdown_handler, m);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_add_time() failed: %m");
+ }
+
+ r = free_and_strdup(&m->scheduled_shutdown_type, type);
+ if (r < 0) {
+ m->scheduled_shutdown_timeout_source = sd_event_source_unref(m->scheduled_shutdown_timeout_source);
+ return log_oom();
+ }
+
+ m->shutdown_dry_run = dry_run;
+
+ if (m->nologin_timeout_source) {
+ r = sd_event_source_set_time(m->nologin_timeout_source, elapse);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_time() failed: %m");
+
+ r = sd_event_source_set_enabled(m->nologin_timeout_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_enabled() failed: %m");
+ } else {
+ r = sd_event_add_time(m->event, &m->nologin_timeout_source,
+ CLOCK_REALTIME, elapse - 5 * USEC_PER_MINUTE, 0, nologin_timeout_handler, m);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_add_time() failed: %m");
+ }
+
+ m->scheduled_shutdown_timeout = elapse;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_AUGMENT|SD_BUS_CREDS_TTY|SD_BUS_CREDS_UID, &creds);
+ if (r >= 0) {
+ const char *tty = NULL;
+
+ (void) sd_bus_creds_get_uid(creds, &m->scheduled_shutdown_uid);
+ (void) sd_bus_creds_get_tty(creds, &tty);
+
+ r = free_and_strdup(&m->scheduled_shutdown_tty, tty);
+ if (r < 0) {
+ m->scheduled_shutdown_timeout_source = sd_event_source_unref(m->scheduled_shutdown_timeout_source);
+ return log_oom();
+ }
+ }
+
+ r = manager_setup_wall_message_timer(m);
+ if (r < 0)
+ return r;
+
+ r = update_schedule_file(m);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_cancel_scheduled_shutdown(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ bool cancelled;
+
+ assert(m);
+ assert(message);
+
+ cancelled = m->scheduled_shutdown_type != NULL;
+ reset_scheduled_shutdown(m);
+
+ if (cancelled && m->enable_wall_messages) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ _cleanup_free_ char *username = NULL;
+ const char *tty = NULL;
+ uid_t uid = 0;
+ int r;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_AUGMENT|SD_BUS_CREDS_TTY|SD_BUS_CREDS_UID, &creds);
+ if (r >= 0) {
+ (void) sd_bus_creds_get_uid(creds, &uid);
+ (void) sd_bus_creds_get_tty(creds, &tty);
+ }
+
+ username = uid_to_name(uid);
+ utmp_wall("The system shutdown has been cancelled",
+ username, tty, logind_wall_tty_filter, m);
+ }
+
+ return sd_bus_reply_method_return(message, "b", cancelled);
+}
+
+static int method_can_shutdown_or_sleep(
+ Manager *m,
+ sd_bus_message *message,
+ InhibitWhat w,
+ const char *action,
+ const char *action_multiple_sessions,
+ const char *action_ignore_inhibit,
+ const char *sleep_verb,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ HandleAction handle;
+ bool multiple_sessions, challenge, blocked;
+ const char *result = NULL;
+ uid_t uid;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(w >= 0);
+ assert(w <= _INHIBIT_WHAT_MAX);
+ assert(action);
+ assert(action_multiple_sessions);
+ assert(action_ignore_inhibit);
+
+ if (sleep_verb) {
+ r = can_sleep(sleep_verb);
+ if (IN_SET(r, 0, -ENOSPC))
+ return sd_bus_reply_method_return(message, "s", "na");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ r = have_multiple_sessions(m, uid);
+ if (r < 0)
+ return r;
+
+ multiple_sessions = r > 0;
+ blocked = manager_is_inhibited(m, w, INHIBIT_BLOCK, NULL, false, true, uid, NULL);
+
+ handle = handle_action_from_string(sleep_verb);
+ if (handle >= 0) {
+ const char *target;
+
+ target = manager_target_for_action(handle);
+ if (target) {
+ _cleanup_free_ char *load_state = NULL;
+
+ r = unit_load_state(m->bus, target, &load_state);
+ if (r < 0)
+ return r;
+
+ if (!streq(load_state, "loaded")) {
+ result = "no";
+ goto finish;
+ }
+ }
+ }
+
+ if (multiple_sessions) {
+ r = bus_test_polkit(message, CAP_SYS_BOOT, action_multiple_sessions, NULL, UID_INVALID, &challenge, error);
+ if (r < 0)
+ return r;
+
+ if (r > 0)
+ result = "yes";
+ else if (challenge)
+ result = "challenge";
+ else
+ result = "no";
+ }
+
+ if (blocked) {
+ r = bus_test_polkit(message, CAP_SYS_BOOT, action_ignore_inhibit, NULL, UID_INVALID, &challenge, error);
+ if (r < 0)
+ return r;
+
+ if (r > 0) {
+ if (!result)
+ result = "yes";
+ } else if (challenge) {
+ if (!result || streq(result, "yes"))
+ result = "challenge";
+ } else
+ result = "no";
+ }
+
+ if (!multiple_sessions && !blocked) {
+ /* If neither inhibit nor multiple sessions
+ * apply then just check the normal policy */
+
+ r = bus_test_polkit(message, CAP_SYS_BOOT, action, NULL, UID_INVALID, &challenge, error);
+ if (r < 0)
+ return r;
+
+ if (r > 0)
+ result = "yes";
+ else if (challenge)
+ result = "challenge";
+ else
+ result = "no";
+ }
+
+ finish:
+ return sd_bus_reply_method_return(message, "s", result);
+}
+
+static int method_can_poweroff(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SHUTDOWN,
+ "org.freedesktop.login1.power-off",
+ "org.freedesktop.login1.power-off-multiple-sessions",
+ "org.freedesktop.login1.power-off-ignore-inhibit",
+ NULL,
+ error);
+}
+
+static int method_can_reboot(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SHUTDOWN,
+ "org.freedesktop.login1.reboot",
+ "org.freedesktop.login1.reboot-multiple-sessions",
+ "org.freedesktop.login1.reboot-ignore-inhibit",
+ NULL,
+ error);
+}
+
+static int method_can_halt(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SHUTDOWN,
+ "org.freedesktop.login1.halt",
+ "org.freedesktop.login1.halt-multiple-sessions",
+ "org.freedesktop.login1.halt-ignore-inhibit",
+ NULL,
+ error);
+}
+
+static int method_can_suspend(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.suspend",
+ "org.freedesktop.login1.suspend-multiple-sessions",
+ "org.freedesktop.login1.suspend-ignore-inhibit",
+ "suspend",
+ error);
+}
+
+static int method_can_hibernate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.hibernate",
+ "org.freedesktop.login1.hibernate-multiple-sessions",
+ "org.freedesktop.login1.hibernate-ignore-inhibit",
+ "hibernate",
+ error);
+}
+
+static int method_can_hybrid_sleep(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.hibernate",
+ "org.freedesktop.login1.hibernate-multiple-sessions",
+ "org.freedesktop.login1.hibernate-ignore-inhibit",
+ "hybrid-sleep",
+ error);
+}
+
+static int method_can_suspend_then_hibernate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.hibernate",
+ "org.freedesktop.login1.hibernate-multiple-sessions",
+ "org.freedesktop.login1.hibernate-ignore-inhibit",
+ "suspend-then-hibernate",
+ error);
+}
+
+static int property_get_reboot_to_firmware_setup(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ r = efi_get_reboot_to_firmware();
+ if (r < 0 && r != -EOPNOTSUPP)
+ log_warning_errno(r, "Failed to determine reboot-to-firmware state: %m");
+
+ return sd_bus_message_append(reply, "b", r > 0);
+}
+
+static int method_set_reboot_to_firmware_setup(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int b, r;
+ Manager *m = userdata;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.set-reboot-to-firmware-setup",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = efi_set_reboot_to_firmware(b);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_can_reboot_to_firmware_setup(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int r;
+ bool challenge;
+ const char *result;
+ Manager *m = userdata;
+
+ assert(message);
+ assert(m);
+
+ r = efi_reboot_to_firmware_supported();
+ if (r < 0) {
+ if (r != -EOPNOTSUPP)
+ log_warning_errno(r, "Failed to determine whether reboot to firmware is supported: %m");
+
+ return sd_bus_reply_method_return(message, "s", "na");
+ }
+
+ r = bus_test_polkit(message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.set-reboot-to-firmware-setup",
+ NULL,
+ UID_INVALID,
+ &challenge,
+ error);
+ if (r < 0)
+ return r;
+
+ if (r > 0)
+ result = "yes";
+ else if (challenge)
+ result = "challenge";
+ else
+ result = "no";
+
+ return sd_bus_reply_method_return(message, "s", result);
+}
+
+static int method_set_wall_message(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int r;
+ Manager *m = userdata;
+ char *wall_message;
+ unsigned enable_wall_messages;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "sb", &wall_message, &enable_wall_messages);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.set-wall-message",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = free_and_strdup(&m->wall_message, empty_to_null(wall_message));
+ if (r < 0)
+ return log_oom();
+
+ m->enable_wall_messages = enable_wall_messages;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_inhibit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ const char *who, *why, *what, *mode;
+ _cleanup_free_ char *id = NULL;
+ _cleanup_close_ int fifo_fd = -1;
+ Manager *m = userdata;
+ Inhibitor *i = NULL;
+ InhibitMode mm;
+ InhibitWhat w;
+ pid_t pid;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "ssss", &what, &who, &why, &mode);
+ if (r < 0)
+ return r;
+
+ w = inhibit_what_from_string(what);
+ if (w <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid what specification %s", what);
+
+ mm = inhibit_mode_from_string(mode);
+ if (mm < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid mode specification %s", mode);
+
+ /* Delay is only supported for shutdown/sleep */
+ if (mm == INHIBIT_DELAY && (w & ~(INHIBIT_SHUTDOWN|INHIBIT_SLEEP)))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delay inhibitors only supported for shutdown and sleep");
+
+ /* Don't allow taking delay locks while we are already
+ * executing the operation. We shouldn't create the impression
+ * that the lock was successful if the machine is about to go
+ * down/suspend any moment. */
+ if (m->action_what & w)
+ return sd_bus_error_setf(error, BUS_ERROR_OPERATION_IN_PROGRESS, "The operation inhibition has been requested for is already running");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_BOOT,
+ w == INHIBIT_SHUTDOWN ? (mm == INHIBIT_BLOCK ? "org.freedesktop.login1.inhibit-block-shutdown" : "org.freedesktop.login1.inhibit-delay-shutdown") :
+ w == INHIBIT_SLEEP ? (mm == INHIBIT_BLOCK ? "org.freedesktop.login1.inhibit-block-sleep" : "org.freedesktop.login1.inhibit-delay-sleep") :
+ w == INHIBIT_IDLE ? "org.freedesktop.login1.inhibit-block-idle" :
+ w == INHIBIT_HANDLE_POWER_KEY ? "org.freedesktop.login1.inhibit-handle-power-key" :
+ w == INHIBIT_HANDLE_SUSPEND_KEY ? "org.freedesktop.login1.inhibit-handle-suspend-key" :
+ w == INHIBIT_HANDLE_HIBERNATE_KEY ? "org.freedesktop.login1.inhibit-handle-hibernate-key" :
+ "org.freedesktop.login1.inhibit-handle-lid-switch",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID|SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+
+ if (hashmap_size(m->inhibitors) >= m->inhibitors_max)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Maximum number of inhibitors (%" PRIu64 ") reached, refusing further inhibitors.", m->inhibitors_max);
+
+ do {
+ id = mfree(id);
+
+ if (asprintf(&id, "%lu", ++m->inhibit_counter) < 0)
+ return -ENOMEM;
+
+ } while (hashmap_get(m->inhibitors, id));
+
+ r = manager_add_inhibitor(m, id, &i);
+ if (r < 0)
+ return r;
+
+ i->what = w;
+ i->mode = mm;
+ i->pid = pid;
+ i->uid = uid;
+ i->why = strdup(why);
+ i->who = strdup(who);
+
+ if (!i->why || !i->who) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fifo_fd = inhibitor_create_fifo(i);
+ if (fifo_fd < 0) {
+ r = fifo_fd;
+ goto fail;
+ }
+
+ inhibitor_start(i);
+
+ return sd_bus_reply_method_return(message, "h", fifo_fd);
+
+fail:
+ if (i)
+ inhibitor_free(i);
+
+ return r;
+}
+
+const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_WRITABLE_PROPERTY("EnableWallMessages", "b", NULL, NULL, offsetof(Manager, enable_wall_messages), 0),
+ SD_BUS_WRITABLE_PROPERTY("WallMessage", "s", NULL, NULL, offsetof(Manager, wall_message), 0),
+
+ SD_BUS_PROPERTY("NAutoVTs", "u", NULL, offsetof(Manager, n_autovts), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KillOnlyUsers", "as", NULL, offsetof(Manager, kill_only_users), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KillExcludeUsers", "as", NULL, offsetof(Manager, kill_exclude_users), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KillUserProcesses", "b", NULL, offsetof(Manager, kill_user_processes), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RebootToFirmwareSetup", "b", property_get_reboot_to_firmware_setup, 0, 0),
+ SD_BUS_PROPERTY("IdleHint", "b", property_get_idle_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHint", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHintMonotonic", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("BlockInhibited", "s", property_get_inhibited, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("DelayInhibited", "s", property_get_inhibited, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("InhibitDelayMaxUSec", "t", NULL, offsetof(Manager, inhibit_delay_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UserStopDelayUSec", "t", NULL, offsetof(Manager, user_stop_delay), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HandlePowerKey", "s", property_get_handle_action, offsetof(Manager, handle_power_key), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HandleSuspendKey", "s", property_get_handle_action, offsetof(Manager, handle_suspend_key), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HandleHibernateKey", "s", property_get_handle_action, offsetof(Manager, handle_hibernate_key), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HandleLidSwitch", "s", property_get_handle_action, offsetof(Manager, handle_lid_switch), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HandleLidSwitchExternalPower", "s", property_get_handle_action, offsetof(Manager, handle_lid_switch_ep), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HandleLidSwitchDocked", "s", property_get_handle_action, offsetof(Manager, handle_lid_switch_docked), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HoldoffTimeoutUSec", "t", NULL, offsetof(Manager, holdoff_timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IdleAction", "s", property_get_handle_action, offsetof(Manager, idle_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IdleActionUSec", "t", NULL, offsetof(Manager, idle_action_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PreparingForShutdown", "b", property_get_preparing, 0, 0),
+ SD_BUS_PROPERTY("PreparingForSleep", "b", property_get_preparing, 0, 0),
+ SD_BUS_PROPERTY("ScheduledShutdown", "(st)", property_get_scheduled_shutdown, 0, 0),
+ SD_BUS_PROPERTY("Docked", "b", property_get_docked, 0, 0),
+ SD_BUS_PROPERTY("LidClosed", "b", property_get_lid_closed, 0, 0),
+ SD_BUS_PROPERTY("OnExternalPower", "b", property_get_on_external_power, 0, 0),
+ SD_BUS_PROPERTY("RemoveIPC", "b", bus_property_get_bool, offsetof(Manager, remove_ipc), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimeDirectorySize", "t", NULL, offsetof(Manager, runtime_dir_size), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("InhibitorsMax", "t", NULL, offsetof(Manager, inhibitors_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NCurrentInhibitors", "t", property_get_hashmap_size, offsetof(Manager, inhibitors), 0),
+ SD_BUS_PROPERTY("SessionsMax", "t", NULL, offsetof(Manager, sessions_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NCurrentSessions", "t", property_get_hashmap_size, offsetof(Manager, sessions), 0),
+ SD_BUS_PROPERTY("UserTasksMax", "t", property_get_compat_user_tasks_max, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+
+ SD_BUS_METHOD("GetSession", "s", "o", method_get_session, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetSessionByPID", "u", "o", method_get_session_by_pid, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUser", "u", "o", method_get_user, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUserByPID", "u", "o", method_get_user_by_pid, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetSeat", "s", "o", method_get_seat, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListSessions", NULL, "a(susso)", method_list_sessions, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListUsers", NULL, "a(uso)", method_list_users, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListSeats", NULL, "a(so)", method_list_seats, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListInhibitors", NULL, "a(ssssuu)", method_list_inhibitors, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CreateSession", "uusssssussbssa(sv)", "soshusub", method_create_session, 0),
+ SD_BUS_METHOD("ReleaseSession", "s", NULL, method_release_session, 0),
+ SD_BUS_METHOD("ActivateSession", "s", NULL, method_activate_session, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ActivateSessionOnSeat", "ss", NULL, method_activate_session_on_seat, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LockSession", "s", NULL, method_lock_session, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("UnlockSession", "s", NULL, method_lock_session, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LockSessions", NULL, NULL, method_lock_sessions, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("UnlockSessions", NULL, NULL, method_lock_sessions, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("KillSession", "ssi", NULL, method_kill_session, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("KillUser", "ui", NULL, method_kill_user, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("TerminateSession", "s", NULL, method_terminate_session, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("TerminateUser", "u", NULL, method_terminate_user, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("TerminateSeat", "s", NULL, method_terminate_seat, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetUserLinger", "ubb", NULL, method_set_user_linger, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("AttachDevice", "ssb", NULL, method_attach_device, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("FlushDevices", "b", NULL, method_flush_devices, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("PowerOff", "b", NULL, method_poweroff, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Reboot", "b", NULL, method_reboot, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Halt", "b", NULL, method_halt, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Suspend", "b", NULL, method_suspend, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Hibernate", "b", NULL, method_hibernate, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("HybridSleep", "b", NULL, method_hybrid_sleep, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SuspendThenHibernate", "b", NULL, method_suspend_then_hibernate, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CanPowerOff", NULL, "s", method_can_poweroff, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CanReboot", NULL, "s", method_can_reboot, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CanHalt", NULL, "s", method_can_halt, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CanSuspend", NULL, "s", method_can_suspend, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CanHibernate", NULL, "s", method_can_hibernate, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CanHybridSleep", NULL, "s", method_can_hybrid_sleep, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CanSuspendThenHibernate", NULL, "s", method_can_suspend_then_hibernate, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ScheduleShutdown", "st", NULL, method_schedule_shutdown, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CancelScheduledShutdown", NULL, "b", method_cancel_scheduled_shutdown, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Inhibit", "ssss", "h", method_inhibit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CanRebootToFirmwareSetup", NULL, "s", method_can_reboot_to_firmware_setup, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetRebootToFirmwareSetup", "b", NULL, method_set_reboot_to_firmware_setup, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetWallMessage", "sb", NULL, method_set_wall_message, SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_SIGNAL("SessionNew", "so", 0),
+ SD_BUS_SIGNAL("SessionRemoved", "so", 0),
+ SD_BUS_SIGNAL("UserNew", "uo", 0),
+ SD_BUS_SIGNAL("UserRemoved", "uo", 0),
+ SD_BUS_SIGNAL("SeatNew", "so", 0),
+ SD_BUS_SIGNAL("SeatRemoved", "so", 0),
+ SD_BUS_SIGNAL("PrepareForShutdown", "b", 0),
+ SD_BUS_SIGNAL("PrepareForSleep", "b", 0),
+
+ SD_BUS_VTABLE_END
+};
+
+static int session_jobs_reply(Session *s, const char *unit, const char *result) {
+ assert(s);
+ assert(unit);
+
+ if (!s->started)
+ return 0;
+
+ if (result && !streq(result, "done")) {
+ _cleanup_(sd_bus_error_free) sd_bus_error e = SD_BUS_ERROR_NULL;
+
+ sd_bus_error_setf(&e, BUS_ERROR_JOB_FAILED, "Start job for unit '%s' failed with '%s'", unit, result);
+ return session_send_create_reply(s, &e);
+ }
+
+ return session_send_create_reply(s, NULL);
+}
+
+int match_job_removed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *path, *result, *unit;
+ Manager *m = userdata;
+ Session *session;
+ uint32_t id;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "uoss", &id, &path, &unit, &result);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (m->action_job && streq(m->action_job, path)) {
+ log_info("Operation '%s' finished.", inhibit_what_to_string(m->action_what));
+
+ /* Tell people that they now may take a lock again */
+ (void) send_prepare_for(m, m->action_what, false);
+
+ m->action_job = mfree(m->action_job);
+ m->action_unit = NULL;
+ m->action_what = 0;
+ return 0;
+ }
+
+ session = hashmap_get(m->session_units, unit);
+ if (session) {
+ if (streq_ptr(path, session->scope_job)) {
+ session->scope_job = mfree(session->scope_job);
+ (void) session_jobs_reply(session, unit, result);
+
+ session_save(session);
+ user_save(session->user);
+ }
+
+ session_add_to_gc_queue(session);
+ }
+
+ user = hashmap_get(m->user_units, unit);
+ if (user) {
+ if (streq_ptr(path, user->service_job)) {
+ user->service_job = mfree(user->service_job);
+
+ LIST_FOREACH(sessions_by_user, session, user->sessions)
+ (void) session_jobs_reply(session, unit, NULL /* don't propagate user service failures to the client */);
+
+ user_save(user);
+ }
+
+ user_add_to_gc_queue(user);
+ }
+
+ return 0;
+}
+
+int match_unit_removed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *path, *unit;
+ Manager *m = userdata;
+ Session *session;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "so", &unit, &path);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ session = hashmap_get(m->session_units, unit);
+ if (session)
+ session_add_to_gc_queue(session);
+
+ user = hashmap_get(m->user_units, unit);
+ if (user)
+ user_add_to_gc_queue(user);
+
+ return 0;
+}
+
+int match_properties_changed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *unit = NULL;
+ Manager *m = userdata;
+ const char *path;
+ Session *session;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ path = sd_bus_message_get_path(message);
+ if (!path)
+ return 0;
+
+ r = unit_name_from_dbus_path(path, &unit);
+ if (r == -EINVAL) /* not a unit */
+ return 0;
+ if (r < 0) {
+ log_oom();
+ return 0;
+ }
+
+ session = hashmap_get(m->session_units, unit);
+ if (session)
+ session_add_to_gc_queue(session);
+
+ user = hashmap_get(m->user_units, unit);
+ if (user)
+ user_add_to_gc_queue(user);
+
+ return 0;
+}
+
+int match_reloading(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Session *session;
+ Iterator i;
+ int b, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (b)
+ return 0;
+
+ /* systemd finished reloading, let's recheck all our sessions */
+ log_debug("System manager has been reloaded, rechecking sessions...");
+
+ HASHMAP_FOREACH(session, m->sessions, i)
+ session_add_to_gc_queue(session);
+
+ return 0;
+}
+
+int manager_send_changed(Manager *manager, const char *property, ...) {
+ char **l;
+
+ assert(manager);
+
+ l = strv_from_stdarg_alloca(property);
+
+ return sd_bus_emit_properties_changed_strv(
+ manager->bus,
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ l);
+}
+
+static int strdup_job(sd_bus_message *reply, char **job) {
+ const char *j;
+ char *copy;
+ int r;
+
+ r = sd_bus_message_read(reply, "o", &j);
+ if (r < 0)
+ return r;
+
+ copy = strdup(j);
+ if (!copy)
+ return -ENOMEM;
+
+ *job = copy;
+ return 1;
+}
+
+int manager_start_scope(
+ Manager *manager,
+ const char *scope,
+ pid_t pid,
+ const char *slice,
+ const char *description,
+ char **wants,
+ char **after,
+ const char *requires_mounts_for,
+ sd_bus_message *more_properties,
+ sd_bus_error *error,
+ char **job) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ char **i;
+ int r;
+
+ assert(manager);
+ assert(scope);
+ assert(pid > 1);
+ assert(job);
+
+ r = sd_bus_message_new_method_call(
+ manager->bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartTransientUnit");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "ss", strempty(scope), "fail");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return r;
+
+ if (!isempty(slice)) {
+ r = sd_bus_message_append(m, "(sv)", "Slice", "s", slice);
+ if (r < 0)
+ return r;
+ }
+
+ if (!isempty(description)) {
+ r = sd_bus_message_append(m, "(sv)", "Description", "s", description);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(i, wants) {
+ r = sd_bus_message_append(m, "(sv)", "Wants", "as", 1, *i);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(i, after) {
+ r = sd_bus_message_append(m, "(sv)", "After", "as", 1, *i);
+ if (r < 0)
+ return r;
+ }
+
+ if (!empty_or_root(requires_mounts_for)) {
+ r = sd_bus_message_append(m, "(sv)", "RequiresMountsFor", "as", 1, requires_mounts_for);
+ if (r < 0)
+ return r;
+ }
+
+ /* Make sure that the session shells are terminated with SIGHUP since bash and friends tend to ignore
+ * SIGTERM */
+ r = sd_bus_message_append(m, "(sv)", "SendSIGHUP", "b", true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "(sv)", "PIDs", "au", 1, pid);
+ if (r < 0)
+ return r;
+
+ /* disable TasksMax= for the session scope, rely on the slice setting for it */
+ r = sd_bus_message_append(m, "(sv)", "TasksMax", "t", (uint64_t)-1);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (more_properties) {
+ /* If TasksMax also appears here, it will overwrite the default value set above */
+ r = sd_bus_message_copy(m, more_properties, true);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "a(sa(sv))", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(manager->bus, m, 0, error, &reply);
+ if (r < 0)
+ return r;
+
+ return strdup_job(reply, job);
+}
+
+int manager_start_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(manager);
+ assert(unit);
+ assert(job);
+
+ r = sd_bus_call_method(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnit",
+ error,
+ &reply,
+ "ss", unit, "replace");
+ if (r < 0)
+ return r;
+
+ return strdup_job(reply, job);
+}
+
+int manager_stop_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(manager);
+ assert(unit);
+ assert(job);
+
+ r = sd_bus_call_method(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StopUnit",
+ error,
+ &reply,
+ "ss", unit, "fail");
+ if (r < 0) {
+ if (sd_bus_error_has_name(error, BUS_ERROR_NO_SUCH_UNIT) ||
+ sd_bus_error_has_name(error, BUS_ERROR_LOAD_FAILED)) {
+
+ *job = NULL;
+ sd_bus_error_free(error);
+ return 0;
+ }
+
+ return r;
+ }
+
+ return strdup_job(reply, job);
+}
+
+int manager_abandon_scope(Manager *manager, const char *scope, sd_bus_error *ret_error) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(manager);
+ assert(scope);
+
+ path = unit_dbus_path_from_name(scope);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_call_method(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Scope",
+ "Abandon",
+ &error,
+ NULL,
+ NULL);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_UNIT) ||
+ sd_bus_error_has_name(&error, BUS_ERROR_LOAD_FAILED) ||
+ sd_bus_error_has_name(&error, BUS_ERROR_SCOPE_NOT_RUNNING))
+ return 0;
+
+ sd_bus_error_move(ret_error, &error);
+ return r;
+ }
+
+ return 1;
+}
+
+int manager_kill_unit(Manager *manager, const char *unit, KillWho who, int signo, sd_bus_error *error) {
+ assert(manager);
+ assert(unit);
+
+ return sd_bus_call_method(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "KillUnit",
+ error,
+ NULL,
+ "ssi", unit, who == KILL_LEADER ? "main" : "all", signo);
+}
+
+int manager_unit_is_active(Manager *manager, const char *unit, sd_bus_error *ret_error) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *path = NULL;
+ const char *state;
+ int r;
+
+ assert(manager);
+ assert(unit);
+
+ path = unit_dbus_path_from_name(unit);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_get_property(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "ActiveState",
+ &error,
+ &reply,
+ "s");
+ if (r < 0) {
+ /* systemd might have droppped off momentarily, let's
+ * not make this an error */
+ if (sd_bus_error_has_name(&error, SD_BUS_ERROR_NO_REPLY) ||
+ sd_bus_error_has_name(&error, SD_BUS_ERROR_DISCONNECTED))
+ return true;
+
+ /* If the unit is already unloaded then it's not
+ * active */
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_UNIT) ||
+ sd_bus_error_has_name(&error, BUS_ERROR_LOAD_FAILED))
+ return false;
+
+ sd_bus_error_move(ret_error, &error);
+ return r;
+ }
+
+ r = sd_bus_message_read(reply, "s", &state);
+ if (r < 0)
+ return r;
+
+ return !STR_IN_SET(state, "inactive", "failed");
+}
+
+int manager_job_is_active(Manager *manager, const char *path, sd_bus_error *ret_error) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(manager);
+ assert(path);
+
+ r = sd_bus_get_property(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Job",
+ "State",
+ &error,
+ &reply,
+ "s");
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, SD_BUS_ERROR_NO_REPLY) ||
+ sd_bus_error_has_name(&error, SD_BUS_ERROR_DISCONNECTED))
+ return true;
+
+ if (sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_OBJECT))
+ return false;
+
+ sd_bus_error_move(ret_error, &error);
+ return r;
+ }
+
+ /* We don't actually care about the state really. The fact
+ * that we could read the job state is enough for us */
+
+ return true;
+}
diff --git a/src/login/logind-device.c b/src/login/logind-device.c
new file mode 100644
index 0000000..e724365
--- /dev/null
+++ b/src/login/logind-device.c
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+
+#include "alloc-util.h"
+#include "logind-device.h"
+#include "util.h"
+
+Device* device_new(Manager *m, const char *sysfs, bool master) {
+ Device *d;
+
+ assert(m);
+ assert(sysfs);
+
+ d = new0(Device, 1);
+ if (!d)
+ return NULL;
+
+ d->sysfs = strdup(sysfs);
+ if (!d->sysfs)
+ return mfree(d);
+
+ if (hashmap_put(m->devices, d->sysfs, d) < 0) {
+ free(d->sysfs);
+ return mfree(d);
+ }
+
+ d->manager = m;
+ d->master = master;
+ dual_timestamp_get(&d->timestamp);
+
+ return d;
+}
+
+static void device_detach(Device *d) {
+ Seat *s;
+ SessionDevice *sd;
+
+ assert(d);
+
+ if (!d->seat)
+ return;
+
+ while ((sd = d->session_devices))
+ session_device_free(sd);
+
+ s = d->seat;
+ LIST_REMOVE(devices, d->seat->devices, d);
+ d->seat = NULL;
+
+ if (!seat_has_master_device(s)) {
+ seat_add_to_gc_queue(s);
+ seat_send_changed(s, "CanGraphical", NULL);
+ }
+}
+
+void device_free(Device *d) {
+ assert(d);
+
+ device_detach(d);
+
+ hashmap_remove(d->manager->devices, d->sysfs);
+
+ free(d->sysfs);
+ free(d);
+}
+
+void device_attach(Device *d, Seat *s) {
+ Device *i;
+ bool had_master;
+
+ assert(d);
+ assert(s);
+
+ if (d->seat == s)
+ return;
+
+ if (d->seat)
+ device_detach(d);
+
+ d->seat = s;
+ had_master = seat_has_master_device(s);
+
+ /* We keep the device list sorted by the "master" flag. That is, master
+ * devices are at the front, other devices at the tail. As there is no
+ * way to easily add devices at the list-tail, we need to iterate the
+ * list to find the first non-master device when adding non-master
+ * devices. We assume there is only a few (normally 1) master devices
+ * per seat, so we iterate only a few times. */
+
+ if (d->master || !s->devices)
+ LIST_PREPEND(devices, s->devices, d);
+ else {
+ LIST_FOREACH(devices, i, s->devices) {
+ if (!i->devices_next || !i->master) {
+ LIST_INSERT_AFTER(devices, s->devices, i, d);
+ break;
+ }
+ }
+ }
+
+ if (!had_master && d->master && s->started) {
+ seat_save(s);
+ seat_send_changed(s, "CanGraphical", NULL);
+ }
+}
diff --git a/src/login/logind-device.h b/src/login/logind-device.h
new file mode 100644
index 0000000..cc6e523
--- /dev/null
+++ b/src/login/logind-device.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Device Device;
+
+#include "list.h"
+#include "logind-seat.h"
+#include "logind-session-device.h"
+
+struct Device {
+ Manager *manager;
+
+ char *sysfs;
+ Seat *seat;
+ bool master;
+
+ dual_timestamp timestamp;
+
+ LIST_FIELDS(struct Device, devices);
+ LIST_HEAD(SessionDevice, session_devices);
+};
+
+Device* device_new(Manager *m, const char *sysfs, bool master);
+void device_free(Device *d);
+void device_attach(Device *d, Seat *s);
diff --git a/src/login/logind-gperf.gperf b/src/login/logind-gperf.gperf
new file mode 100644
index 0000000..8829ce7
--- /dev/null
+++ b/src/login/logind-gperf.gperf
@@ -0,0 +1,44 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "logind.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name logind_gperf_hash
+%define lookup-function-name logind_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Login.NAutoVTs, config_parse_n_autovts, 0, offsetof(Manager, n_autovts)
+Login.ReserveVT, config_parse_unsigned, 0, offsetof(Manager, reserve_vt)
+Login.KillUserProcesses, config_parse_bool, 0, offsetof(Manager, kill_user_processes)
+Login.KillOnlyUsers, config_parse_strv, 0, offsetof(Manager, kill_only_users)
+Login.KillExcludeUsers, config_parse_strv, 0, offsetof(Manager, kill_exclude_users)
+Login.InhibitDelayMaxSec, config_parse_sec, 0, offsetof(Manager, inhibit_delay_max)
+Login.UserStopDelaySec, config_parse_sec, 0, offsetof(Manager, user_stop_delay)
+Login.HandlePowerKey, config_parse_handle_action, 0, offsetof(Manager, handle_power_key)
+Login.HandleSuspendKey, config_parse_handle_action, 0, offsetof(Manager, handle_suspend_key)
+Login.HandleHibernateKey, config_parse_handle_action, 0, offsetof(Manager, handle_hibernate_key)
+Login.HandleLidSwitch, config_parse_handle_action, 0, offsetof(Manager, handle_lid_switch)
+Login.HandleLidSwitchExternalPower, config_parse_handle_action, 0, offsetof(Manager, handle_lid_switch_ep)
+Login.HandleLidSwitchDocked, config_parse_handle_action, 0, offsetof(Manager, handle_lid_switch_docked)
+Login.PowerKeyIgnoreInhibited, config_parse_bool, 0, offsetof(Manager, power_key_ignore_inhibited)
+Login.SuspendKeyIgnoreInhibited, config_parse_bool, 0, offsetof(Manager, suspend_key_ignore_inhibited)
+Login.HibernateKeyIgnoreInhibited, config_parse_bool, 0, offsetof(Manager, hibernate_key_ignore_inhibited)
+Login.LidSwitchIgnoreInhibited, config_parse_bool, 0, offsetof(Manager, lid_switch_ignore_inhibited)
+Login.HoldoffTimeoutSec, config_parse_sec, 0, offsetof(Manager, holdoff_timeout_usec)
+Login.IdleAction, config_parse_handle_action, 0, offsetof(Manager, idle_action)
+Login.IdleActionSec, config_parse_sec, 0, offsetof(Manager, idle_action_usec)
+Login.RuntimeDirectorySize, config_parse_tmpfs_size, 0, offsetof(Manager, runtime_dir_size)
+Login.RemoveIPC, config_parse_bool, 0, offsetof(Manager, remove_ipc)
+Login.InhibitorsMax, config_parse_uint64, 0, offsetof(Manager, inhibitors_max)
+Login.SessionsMax, config_parse_uint64, 0, offsetof(Manager, sessions_max)
+Login.UserTasksMax, config_parse_compat_user_tasks_max, 0, offsetof(Manager, user_tasks_max)
diff --git a/src/login/logind-inhibit.c b/src/login/logind-inhibit.c
new file mode 100644
index 0000000..415c26b
--- /dev/null
+++ b/src/login/logind-inhibit.c
@@ -0,0 +1,471 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "logind-inhibit.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+Inhibitor* inhibitor_new(Manager *m, const char* id) {
+ Inhibitor *i;
+
+ assert(m);
+
+ i = new0(Inhibitor, 1);
+ if (!i)
+ return NULL;
+
+ i->state_file = strappend("/run/systemd/inhibit/", id);
+ if (!i->state_file)
+ return mfree(i);
+
+ i->id = basename(i->state_file);
+
+ if (hashmap_put(m->inhibitors, i->id, i) < 0) {
+ free(i->state_file);
+ return mfree(i);
+ }
+
+ i->manager = m;
+ i->fifo_fd = -1;
+
+ return i;
+}
+
+void inhibitor_free(Inhibitor *i) {
+ assert(i);
+
+ hashmap_remove(i->manager->inhibitors, i->id);
+
+ inhibitor_remove_fifo(i);
+
+ free(i->who);
+ free(i->why);
+
+ if (i->state_file) {
+ unlink(i->state_file);
+ free(i->state_file);
+ }
+
+ free(i);
+}
+
+int inhibitor_save(Inhibitor *i) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(i);
+
+ r = mkdir_safe_label("/run/systemd/inhibit", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ goto fail;
+
+ r = fopen_temporary(i->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "WHAT=%s\n"
+ "MODE=%s\n"
+ "UID="UID_FMT"\n"
+ "PID="PID_FMT"\n",
+ inhibit_what_to_string(i->what),
+ inhibit_mode_to_string(i->mode),
+ i->uid,
+ i->pid);
+
+ if (i->who) {
+ _cleanup_free_ char *cc = NULL;
+
+ cc = cescape(i->who);
+ if (!cc) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "WHO=%s\n", cc);
+ }
+
+ if (i->why) {
+ _cleanup_free_ char *cc = NULL;
+
+ cc = cescape(i->why);
+ if (!cc) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "WHY=%s\n", cc);
+ }
+
+ if (i->fifo_path)
+ fprintf(f, "FIFO=%s\n", i->fifo_path);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, i->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(i->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save inhibit data %s: %m", i->state_file);
+}
+
+int inhibitor_start(Inhibitor *i) {
+ assert(i);
+
+ if (i->started)
+ return 0;
+
+ dual_timestamp_get(&i->since);
+
+ log_debug("Inhibitor %s (%s) pid="PID_FMT" uid="UID_FMT" mode=%s started.",
+ strna(i->who), strna(i->why),
+ i->pid, i->uid,
+ inhibit_mode_to_string(i->mode));
+
+ inhibitor_save(i);
+
+ i->started = true;
+
+ manager_send_changed(i->manager, i->mode == INHIBIT_BLOCK ? "BlockInhibited" : "DelayInhibited", NULL);
+
+ return 0;
+}
+
+int inhibitor_stop(Inhibitor *i) {
+ assert(i);
+
+ if (i->started)
+ log_debug("Inhibitor %s (%s) pid="PID_FMT" uid="UID_FMT" mode=%s stopped.",
+ strna(i->who), strna(i->why),
+ i->pid, i->uid,
+ inhibit_mode_to_string(i->mode));
+
+ if (i->state_file)
+ unlink(i->state_file);
+
+ i->started = false;
+
+ manager_send_changed(i->manager, i->mode == INHIBIT_BLOCK ? "BlockInhibited" : "DelayInhibited", NULL);
+
+ return 0;
+}
+
+int inhibitor_load(Inhibitor *i) {
+
+ _cleanup_free_ char
+ *what = NULL,
+ *uid = NULL,
+ *pid = NULL,
+ *who = NULL,
+ *why = NULL,
+ *mode = NULL;
+
+ InhibitWhat w;
+ InhibitMode mm;
+ char *cc;
+ int r;
+
+ r = parse_env_file(NULL, i->state_file,
+ "WHAT", &what,
+ "UID", &uid,
+ "PID", &pid,
+ "WHO", &who,
+ "WHY", &why,
+ "MODE", &mode,
+ "FIFO", &i->fifo_path);
+ if (r < 0)
+ return r;
+
+ w = what ? inhibit_what_from_string(what) : 0;
+ if (w >= 0)
+ i->what = w;
+
+ mm = mode ? inhibit_mode_from_string(mode) : INHIBIT_BLOCK;
+ if (mm >= 0)
+ i->mode = mm;
+
+ if (uid) {
+ r = parse_uid(uid, &i->uid);
+ if (r < 0)
+ return r;
+ }
+
+ if (pid) {
+ r = parse_pid(pid, &i->pid);
+ if (r < 0)
+ return r;
+ }
+
+ if (who) {
+ r = cunescape(who, 0, &cc);
+ if (r < 0)
+ return r;
+
+ free(i->who);
+ i->who = cc;
+ }
+
+ if (why) {
+ r = cunescape(why, 0, &cc);
+ if (r < 0)
+ return r;
+
+ free(i->why);
+ i->why = cc;
+ }
+
+ if (i->fifo_path) {
+ int fd;
+
+ fd = inhibitor_create_fifo(i);
+ safe_close(fd);
+ }
+
+ return 0;
+}
+
+static int inhibitor_dispatch_fifo(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Inhibitor *i = userdata;
+
+ assert(s);
+ assert(fd == i->fifo_fd);
+ assert(i);
+
+ inhibitor_stop(i);
+ inhibitor_free(i);
+
+ return 0;
+}
+
+int inhibitor_create_fifo(Inhibitor *i) {
+ int r;
+
+ assert(i);
+
+ /* Create FIFO */
+ if (!i->fifo_path) {
+ r = mkdir_safe_label("/run/systemd/inhibit", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ return r;
+
+ i->fifo_path = strjoin("/run/systemd/inhibit/", i->id, ".ref");
+ if (!i->fifo_path)
+ return -ENOMEM;
+
+ if (mkfifo(i->fifo_path, 0600) < 0 && errno != EEXIST)
+ return -errno;
+ }
+
+ /* Open reading side */
+ if (i->fifo_fd < 0) {
+ i->fifo_fd = open(i->fifo_path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (i->fifo_fd < 0)
+ return -errno;
+ }
+
+ if (!i->event_source) {
+ r = sd_event_add_io(i->manager->event, &i->event_source, i->fifo_fd, 0, inhibitor_dispatch_fifo, i);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(i->event_source, SD_EVENT_PRIORITY_IDLE-10);
+ if (r < 0)
+ return r;
+ }
+
+ /* Open writing side */
+ r = open(i->fifo_path, O_WRONLY|O_CLOEXEC|O_NONBLOCK);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+void inhibitor_remove_fifo(Inhibitor *i) {
+ assert(i);
+
+ i->event_source = sd_event_source_unref(i->event_source);
+ i->fifo_fd = safe_close(i->fifo_fd);
+
+ if (i->fifo_path) {
+ unlink(i->fifo_path);
+ i->fifo_path = mfree(i->fifo_path);
+ }
+}
+
+InhibitWhat manager_inhibit_what(Manager *m, InhibitMode mm) {
+ Inhibitor *i;
+ Iterator j;
+ InhibitWhat what = 0;
+
+ assert(m);
+
+ HASHMAP_FOREACH(i, m->inhibitors, j)
+ if (i->mode == mm && i->started)
+ what |= i->what;
+
+ return what;
+}
+
+static int pid_is_active(Manager *m, pid_t pid) {
+ Session *s;
+ int r;
+
+ /* Get client session. This is not what you are looking for these days.
+ * FIXME #6852 */
+ r = manager_get_session_by_pid(m, pid, &s);
+ if (r < 0)
+ return r;
+
+ /* If there's no session assigned to it, then it's globally
+ * active on all ttys */
+ if (r == 0)
+ return 1;
+
+ return session_is_active(s);
+}
+
+bool manager_is_inhibited(
+ Manager *m,
+ InhibitWhat w,
+ InhibitMode mm,
+ dual_timestamp *since,
+ bool ignore_inactive,
+ bool ignore_uid,
+ uid_t uid,
+ Inhibitor **offending) {
+
+ Inhibitor *i;
+ Iterator j;
+ struct dual_timestamp ts = DUAL_TIMESTAMP_NULL;
+ bool inhibited = false;
+
+ assert(m);
+ assert(w > 0 && w < _INHIBIT_WHAT_MAX);
+
+ HASHMAP_FOREACH(i, m->inhibitors, j) {
+ if (!i->started)
+ continue;
+
+ if (!(i->what & w))
+ continue;
+
+ if (i->mode != mm)
+ continue;
+
+ if (ignore_inactive && pid_is_active(m, i->pid) <= 0)
+ continue;
+
+ if (ignore_uid && i->uid == uid)
+ continue;
+
+ if (!inhibited ||
+ i->since.monotonic < ts.monotonic)
+ ts = i->since;
+
+ inhibited = true;
+
+ if (offending)
+ *offending = i;
+ }
+
+ if (since)
+ *since = ts;
+
+ return inhibited;
+}
+
+const char *inhibit_what_to_string(InhibitWhat w) {
+ static thread_local char buffer[97];
+ char *p;
+
+ if (w < 0 || w >= _INHIBIT_WHAT_MAX)
+ return NULL;
+
+ p = buffer;
+ if (w & INHIBIT_SHUTDOWN)
+ p = stpcpy(p, "shutdown:");
+ if (w & INHIBIT_SLEEP)
+ p = stpcpy(p, "sleep:");
+ if (w & INHIBIT_IDLE)
+ p = stpcpy(p, "idle:");
+ if (w & INHIBIT_HANDLE_POWER_KEY)
+ p = stpcpy(p, "handle-power-key:");
+ if (w & INHIBIT_HANDLE_SUSPEND_KEY)
+ p = stpcpy(p, "handle-suspend-key:");
+ if (w & INHIBIT_HANDLE_HIBERNATE_KEY)
+ p = stpcpy(p, "handle-hibernate-key:");
+ if (w & INHIBIT_HANDLE_LID_SWITCH)
+ p = stpcpy(p, "handle-lid-switch:");
+
+ if (p > buffer)
+ *(p-1) = 0;
+ else
+ *p = 0;
+
+ return buffer;
+}
+
+InhibitWhat inhibit_what_from_string(const char *s) {
+ InhibitWhat what = 0;
+ const char *word, *state;
+ size_t l;
+
+ FOREACH_WORD_SEPARATOR(word, l, s, ":", state) {
+ if (l == 8 && strneq(word, "shutdown", l))
+ what |= INHIBIT_SHUTDOWN;
+ else if (l == 5 && strneq(word, "sleep", l))
+ what |= INHIBIT_SLEEP;
+ else if (l == 4 && strneq(word, "idle", l))
+ what |= INHIBIT_IDLE;
+ else if (l == 16 && strneq(word, "handle-power-key", l))
+ what |= INHIBIT_HANDLE_POWER_KEY;
+ else if (l == 18 && strneq(word, "handle-suspend-key", l))
+ what |= INHIBIT_HANDLE_SUSPEND_KEY;
+ else if (l == 20 && strneq(word, "handle-hibernate-key", l))
+ what |= INHIBIT_HANDLE_HIBERNATE_KEY;
+ else if (l == 17 && strneq(word, "handle-lid-switch", l))
+ what |= INHIBIT_HANDLE_LID_SWITCH;
+ else
+ return _INHIBIT_WHAT_INVALID;
+ }
+
+ return what;
+}
+
+static const char* const inhibit_mode_table[_INHIBIT_MODE_MAX] = {
+ [INHIBIT_BLOCK] = "block",
+ [INHIBIT_DELAY] = "delay"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(inhibit_mode, InhibitMode);
diff --git a/src/login/logind-inhibit.h b/src/login/logind-inhibit.h
new file mode 100644
index 0000000..6505871
--- /dev/null
+++ b/src/login/logind-inhibit.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Inhibitor Inhibitor;
+
+typedef enum InhibitWhat {
+ INHIBIT_SHUTDOWN = 1 << 0,
+ INHIBIT_SLEEP = 1 << 1,
+ INHIBIT_IDLE = 1 << 2,
+ INHIBIT_HANDLE_POWER_KEY = 1 << 3,
+ INHIBIT_HANDLE_SUSPEND_KEY = 1 << 4,
+ INHIBIT_HANDLE_HIBERNATE_KEY = 1 << 5,
+ INHIBIT_HANDLE_LID_SWITCH = 1 << 6,
+ _INHIBIT_WHAT_MAX = 1 << 7,
+ _INHIBIT_WHAT_INVALID = -1
+} InhibitWhat;
+
+typedef enum InhibitMode {
+ INHIBIT_BLOCK,
+ INHIBIT_DELAY,
+ _INHIBIT_MODE_MAX,
+ _INHIBIT_MODE_INVALID = -1
+} InhibitMode;
+
+#include "logind.h"
+
+struct Inhibitor {
+ Manager *manager;
+
+ sd_event_source *event_source;
+
+ char *id;
+ char *state_file;
+
+ bool started;
+
+ InhibitWhat what;
+ char *who;
+ char *why;
+ InhibitMode mode;
+
+ pid_t pid;
+ uid_t uid;
+
+ dual_timestamp since;
+
+ char *fifo_path;
+ int fifo_fd;
+};
+
+Inhibitor* inhibitor_new(Manager *m, const char *id);
+void inhibitor_free(Inhibitor *i);
+
+int inhibitor_save(Inhibitor *i);
+int inhibitor_load(Inhibitor *i);
+
+int inhibitor_start(Inhibitor *i);
+int inhibitor_stop(Inhibitor *i);
+
+int inhibitor_create_fifo(Inhibitor *i);
+void inhibitor_remove_fifo(Inhibitor *i);
+
+InhibitWhat manager_inhibit_what(Manager *m, InhibitMode mm);
+bool manager_is_inhibited(Manager *m, InhibitWhat w, InhibitMode mm, dual_timestamp *since, bool ignore_inactive, bool ignore_uid, uid_t uid, Inhibitor **offending);
+
+const char *inhibit_what_to_string(InhibitWhat k);
+InhibitWhat inhibit_what_from_string(const char *s);
+
+const char *inhibit_mode_to_string(InhibitMode k);
+InhibitMode inhibit_mode_from_string(const char *s);
diff --git a/src/login/logind-seat-dbus.c b/src/login/logind-seat-dbus.c
new file mode 100644
index 0000000..6ee5a1c
--- /dev/null
+++ b/src/login/logind-seat-dbus.c
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-label.h"
+#include "bus-util.h"
+#include "logind-seat.h"
+#include "logind.h"
+#include "missing_capability.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+static BUS_DEFINE_PROPERTY_GET(property_get_can_multi_session, "b", Seat, seat_can_multi_session);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_tty, "b", Seat, seat_can_tty);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_graphical, "b", Seat, seat_can_graphical);
+
+static int property_get_active_session(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *p = NULL;
+ Seat *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ p = s->active ? session_bus_path(s->active) : strdup("/");
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "(so)", s->active ? s->active->id : "", p);
+}
+
+static int property_get_sessions(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Seat *s = userdata;
+ Session *session;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ r = sd_bus_message_open_container(reply, 'a', "(so)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(sessions_by_seat, session, s->sessions) {
+ _cleanup_free_ char *p = NULL;
+
+ p = session_bus_path(session);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(so)", session->id, p);
+ if (r < 0)
+ return r;
+
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int property_get_idle_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Seat *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ return sd_bus_message_append(reply, "b", seat_get_idle_hint(s, NULL) > 0);
+}
+
+static int property_get_idle_since_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Seat *s = userdata;
+ dual_timestamp t;
+ uint64_t u;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ r = seat_get_idle_hint(s, &t);
+ if (r < 0)
+ return r;
+
+ u = streq(property, "IdleSinceHint") ? t.realtime : t.monotonic;
+
+ return sd_bus_message_append(reply, "t", u);
+}
+
+int bus_seat_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Seat *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.login1.manage",
+ NULL,
+ false,
+ UID_INVALID,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = seat_stop_sessions(s, true);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_activate_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Seat *s = userdata;
+ const char *name;
+ Session *session;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ session = hashmap_get(s->manager->sessions, name);
+ if (!session)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_SESSION, "No session '%s' known", name);
+
+ if (session->seat != s)
+ return sd_bus_error_setf(error, BUS_ERROR_SESSION_NOT_ON_SEAT, "Session %s not on seat %s", name, s->id);
+
+ r = session_activate(session);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_switch_to(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Seat *s = userdata;
+ unsigned to;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "u", &to);
+ if (r < 0)
+ return r;
+
+ if (to <= 0)
+ return -EINVAL;
+
+ r = seat_switch_to(s, to);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_switch_to_next(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Seat *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = seat_switch_to_next(s);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_switch_to_previous(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Seat *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = seat_switch_to_previous(s);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+const sd_bus_vtable seat_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Id", "s", NULL, offsetof(Seat, id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ActiveSession", "(so)", property_get_active_session, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CanMultiSession", "b", property_get_can_multi_session, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CanTTY", "b", property_get_can_tty, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CanGraphical", "b", property_get_can_graphical, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Sessions", "a(so)", property_get_sessions, 0, 0),
+ SD_BUS_PROPERTY("IdleHint", "b", property_get_idle_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHint", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHintMonotonic", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
+ SD_BUS_METHOD("Terminate", NULL, NULL, bus_seat_method_terminate, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ActivateSession", "s", NULL, method_activate_session, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SwitchTo", "u", NULL, method_switch_to, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SwitchToNext", NULL, NULL, method_switch_to_next, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SwitchToPrevious", NULL, NULL, method_switch_to_previous, SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END
+};
+
+int seat_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Seat *seat;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ if (streq(path, "/org/freedesktop/login1/seat/self")) {
+ sd_bus_message *message;
+
+ message = sd_bus_get_current_message(bus);
+ if (!message)
+ return 0;
+
+ r = manager_get_seat_from_creds(m, message, NULL, error, &seat);
+ if (r < 0)
+ return r;
+ } else {
+ _cleanup_free_ char *e = NULL;
+ const char *p;
+
+ p = startswith(path, "/org/freedesktop/login1/seat/");
+ if (!p)
+ return 0;
+
+ e = bus_label_unescape(p);
+ if (!e)
+ return -ENOMEM;
+
+ seat = hashmap_get(m->seats, e);
+ if (!seat)
+ return 0;
+ }
+
+ *found = seat;
+ return 1;
+}
+
+char *seat_bus_path(Seat *s) {
+ _cleanup_free_ char *t = NULL;
+
+ assert(s);
+
+ t = bus_label_escape(s->id);
+ if (!t)
+ return NULL;
+
+ return strappend("/org/freedesktop/login1/seat/", t);
+}
+
+int seat_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ sd_bus_message *message;
+ Manager *m = userdata;
+ Seat *seat;
+ Iterator i;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(nodes);
+
+ HASHMAP_FOREACH(seat, m->seats, i) {
+ char *p;
+
+ p = seat_bus_path(seat);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_consume(&l, p);
+ if (r < 0)
+ return r;
+ }
+
+ message = sd_bus_get_current_message(bus);
+ if (message) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ const char *name;
+ Session *session;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_SESSION|SD_BUS_CREDS_AUGMENT, &creds);
+ if (r >= 0) {
+ r = sd_bus_creds_get_session(creds, &name);
+ if (r >= 0) {
+ session = hashmap_get(m->sessions, name);
+ if (session && session->seat) {
+ r = strv_extend(&l, "/org/freedesktop/login1/seat/self");
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+int seat_send_signal(Seat *s, bool new_seat) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(s);
+
+ p = seat_bus_path(s);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_signal(
+ s->manager->bus,
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ new_seat ? "SeatNew" : "SeatRemoved",
+ "so", s->id, p);
+}
+
+int seat_send_changed(Seat *s, const char *properties, ...) {
+ _cleanup_free_ char *p = NULL;
+ char **l;
+
+ assert(s);
+
+ if (!s->started)
+ return 0;
+
+ p = seat_bus_path(s);
+ if (!p)
+ return -ENOMEM;
+
+ l = strv_from_stdarg_alloca(properties);
+
+ return sd_bus_emit_properties_changed_strv(s->manager->bus, p, "org.freedesktop.login1.Seat", l);
+}
diff --git a/src/login/logind-seat.c b/src/login/logind-seat.c
new file mode 100644
index 0000000..a6d88f8
--- /dev/null
+++ b/src/login/logind-seat.c
@@ -0,0 +1,669 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio_ext.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "logind-acl.h"
+#include "logind-seat.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+int seat_new(Seat** ret, Manager *m, const char *id) {
+ _cleanup_(seat_freep) Seat *s = NULL;
+ int r;
+
+ assert(ret);
+ assert(m);
+ assert(id);
+
+ if (!seat_name_is_valid(id))
+ return -EINVAL;
+
+ s = new(Seat, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (Seat) {
+ .manager = m,
+ };
+
+ s->state_file = strappend("/run/systemd/seats/", id);
+ if (!s->state_file)
+ return -ENOMEM;
+
+ s->id = basename(s->state_file);
+
+ r = hashmap_put(m->seats, s->id, s);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+Seat* seat_free(Seat *s) {
+ if (!s)
+ return NULL;
+
+ if (s->in_gc_queue)
+ LIST_REMOVE(gc_queue, s->manager->seat_gc_queue, s);
+
+ while (s->sessions)
+ session_free(s->sessions);
+
+ assert(!s->active);
+
+ while (s->devices)
+ device_free(s->devices);
+
+ hashmap_remove(s->manager->seats, s->id);
+
+ free(s->positions);
+ free(s->state_file);
+
+ return mfree(s);
+}
+
+int seat_save(Seat *s) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(s);
+
+ if (!s->started)
+ return 0;
+
+ r = mkdir_safe_label("/run/systemd/seats", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ goto fail;
+
+ r = fopen_temporary(s->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "IS_SEAT0=%i\n"
+ "CAN_MULTI_SESSION=%i\n"
+ "CAN_TTY=%i\n"
+ "CAN_GRAPHICAL=%i\n",
+ seat_is_seat0(s),
+ seat_can_multi_session(s),
+ seat_can_tty(s),
+ seat_can_graphical(s));
+
+ if (s->active) {
+ assert(s->active->user);
+
+ fprintf(f,
+ "ACTIVE=%s\n"
+ "ACTIVE_UID="UID_FMT"\n",
+ s->active->id,
+ s->active->user->uid);
+ }
+
+ if (s->sessions) {
+ Session *i;
+
+ fputs("SESSIONS=", f);
+ LIST_FOREACH(sessions_by_seat, i, s->sessions) {
+ fprintf(f,
+ "%s%c",
+ i->id,
+ i->sessions_by_seat_next ? ' ' : '\n');
+ }
+
+ fputs("UIDS=", f);
+ LIST_FOREACH(sessions_by_seat, i, s->sessions)
+ fprintf(f,
+ UID_FMT"%c",
+ i->user->uid,
+ i->sessions_by_seat_next ? ' ' : '\n');
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, s->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(s->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save seat data %s: %m", s->state_file);
+}
+
+int seat_load(Seat *s) {
+ assert(s);
+
+ /* There isn't actually anything to read here ... */
+
+ return 0;
+}
+
+static int vt_allocate(unsigned vtnr) {
+ char p[sizeof("/dev/tty") + DECIMAL_STR_MAX(unsigned)];
+ _cleanup_close_ int fd = -1;
+
+ assert(vtnr >= 1);
+
+ xsprintf(p, "/dev/tty%u", vtnr);
+ fd = open_terminal(p, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ return 0;
+}
+
+int seat_preallocate_vts(Seat *s) {
+ int r = 0;
+ unsigned i;
+
+ assert(s);
+ assert(s->manager);
+
+ log_debug("Preallocating VTs...");
+
+ if (s->manager->n_autovts <= 0)
+ return 0;
+
+ if (!seat_has_vts(s))
+ return 0;
+
+ for (i = 1; i <= s->manager->n_autovts; i++) {
+ int q;
+
+ q = vt_allocate(i);
+ if (q < 0)
+ r = log_error_errno(q, "Failed to preallocate VT %u: %m", i);
+ }
+
+ return r;
+}
+
+int seat_apply_acls(Seat *s, Session *old_active) {
+ int r;
+
+ assert(s);
+
+ r = devnode_acl_all(s->id,
+ false,
+ !!old_active, old_active ? old_active->user->uid : 0,
+ !!s->active, s->active ? s->active->user->uid : 0);
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to apply ACLs: %m");
+
+ return 0;
+}
+
+int seat_set_active(Seat *s, Session *session) {
+ Session *old_active;
+
+ assert(s);
+ assert(!session || session->seat == s);
+
+ if (session == s->active)
+ return 0;
+
+ old_active = s->active;
+ s->active = session;
+
+ if (old_active) {
+ session_device_pause_all(old_active);
+ session_send_changed(old_active, "Active", NULL);
+ }
+
+ (void) seat_apply_acls(s, old_active);
+
+ if (session && session->started) {
+ session_send_changed(session, "Active", NULL);
+ session_device_resume_all(session);
+ }
+
+ if (!session || session->started)
+ seat_send_changed(s, "ActiveSession", NULL);
+
+ seat_save(s);
+
+ if (session) {
+ session_save(session);
+ user_save(session->user);
+ }
+
+ if (old_active) {
+ session_save(old_active);
+ if (!session || session->user != old_active->user)
+ user_save(old_active->user);
+ }
+
+ return 0;
+}
+
+int seat_switch_to(Seat *s, unsigned num) {
+ /* Public session positions skip 0 (there is only F1-F12). Maybe it
+ * will get reassigned in the future, so return error for now. */
+ if (num == 0)
+ return -EINVAL;
+
+ if (num >= s->position_count || !s->positions[num]) {
+ /* allow switching to unused VTs to trigger auto-activate */
+ if (seat_has_vts(s) && num < 64)
+ return chvt(num);
+
+ return -EINVAL;
+ }
+
+ return session_activate(s->positions[num]);
+}
+
+int seat_switch_to_next(Seat *s) {
+ unsigned start, i;
+
+ if (s->position_count == 0)
+ return -EINVAL;
+
+ start = 1;
+ if (s->active && s->active->position > 0)
+ start = s->active->position;
+
+ for (i = start + 1; i < s->position_count; ++i)
+ if (s->positions[i])
+ return session_activate(s->positions[i]);
+
+ for (i = 1; i < start; ++i)
+ if (s->positions[i])
+ return session_activate(s->positions[i]);
+
+ return -EINVAL;
+}
+
+int seat_switch_to_previous(Seat *s) {
+ unsigned start, i;
+
+ if (s->position_count == 0)
+ return -EINVAL;
+
+ start = 1;
+ if (s->active && s->active->position > 0)
+ start = s->active->position;
+
+ for (i = start - 1; i > 0; --i)
+ if (s->positions[i])
+ return session_activate(s->positions[i]);
+
+ for (i = s->position_count - 1; i > start; --i)
+ if (s->positions[i])
+ return session_activate(s->positions[i]);
+
+ return -EINVAL;
+}
+
+int seat_active_vt_changed(Seat *s, unsigned vtnr) {
+ Session *i, *new_active = NULL;
+ int r;
+
+ assert(s);
+ assert(vtnr >= 1);
+
+ if (!seat_has_vts(s))
+ return -EINVAL;
+
+ log_debug("VT changed to %u", vtnr);
+
+ /* we might have earlier closing sessions on the same VT, so try to
+ * find a running one first */
+ LIST_FOREACH(sessions_by_seat, i, s->sessions)
+ if (i->vtnr == vtnr && !i->stopping) {
+ new_active = i;
+ break;
+ }
+
+ if (!new_active) {
+ /* no running one? then we can't decide which one is the
+ * active one, let the first one win */
+ LIST_FOREACH(sessions_by_seat, i, s->sessions)
+ if (i->vtnr == vtnr) {
+ new_active = i;
+ break;
+ }
+ }
+
+ r = seat_set_active(s, new_active);
+ manager_spawn_autovt(s->manager, vtnr);
+
+ return r;
+}
+
+int seat_read_active_vt(Seat *s) {
+ char t[64];
+ ssize_t k;
+ int vtnr;
+
+ assert(s);
+
+ if (!seat_has_vts(s))
+ return 0;
+
+ if (lseek(s->manager->console_active_fd, SEEK_SET, 0) < 0)
+ return log_error_errno(errno, "lseek on console_active_fd failed: %m");
+
+ k = read(s->manager->console_active_fd, t, sizeof(t)-1);
+ if (k <= 0) {
+ log_error("Failed to read current console: %s", k < 0 ? strerror(errno) : "EOF");
+ return k < 0 ? -errno : -EIO;
+ }
+
+ t[k] = 0;
+ truncate_nl(t);
+
+ vtnr = vtnr_from_tty(t);
+ if (vtnr < 0) {
+ log_error_errno(vtnr, "Hm, /sys/class/tty/tty0/active is badly formatted: %m");
+ return -EIO;
+ }
+
+ return seat_active_vt_changed(s, vtnr);
+}
+
+int seat_start(Seat *s) {
+ assert(s);
+
+ if (s->started)
+ return 0;
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_SEAT_START_STR,
+ "SEAT_ID=%s", s->id,
+ LOG_MESSAGE("New seat %s.", s->id));
+
+ /* Initialize VT magic stuff */
+ seat_preallocate_vts(s);
+
+ /* Read current VT */
+ seat_read_active_vt(s);
+
+ s->started = true;
+
+ /* Save seat data */
+ seat_save(s);
+
+ seat_send_signal(s, true);
+
+ return 0;
+}
+
+int seat_stop(Seat *s, bool force) {
+ int r;
+
+ assert(s);
+
+ if (s->started)
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_SEAT_STOP_STR,
+ "SEAT_ID=%s", s->id,
+ LOG_MESSAGE("Removed seat %s.", s->id));
+
+ r = seat_stop_sessions(s, force);
+
+ (void) unlink(s->state_file);
+ seat_add_to_gc_queue(s);
+
+ if (s->started)
+ seat_send_signal(s, false);
+
+ s->started = false;
+
+ return r;
+}
+
+int seat_stop_sessions(Seat *s, bool force) {
+ Session *session;
+ int r = 0, k;
+
+ assert(s);
+
+ LIST_FOREACH(sessions_by_seat, session, s->sessions) {
+ k = session_stop(session, force);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+void seat_evict_position(Seat *s, Session *session) {
+ Session *iter;
+ unsigned pos = session->position;
+
+ session->position = 0;
+
+ if (pos == 0)
+ return;
+
+ if (pos < s->position_count && s->positions[pos] == session) {
+ s->positions[pos] = NULL;
+
+ /* There might be another session claiming the same
+ * position (eg., during gdm->session transition), so let's look
+ * for it and set it on the free slot. */
+ LIST_FOREACH(sessions_by_seat, iter, s->sessions) {
+ if (iter->position == pos && session_get_state(iter) != SESSION_CLOSING) {
+ s->positions[pos] = iter;
+ break;
+ }
+ }
+ }
+}
+
+void seat_claim_position(Seat *s, Session *session, unsigned pos) {
+ /* with VTs, the position is always the same as the VTnr */
+ if (seat_has_vts(s))
+ pos = session->vtnr;
+
+ if (!GREEDY_REALLOC0(s->positions, s->position_count, pos + 1))
+ return;
+
+ seat_evict_position(s, session);
+
+ session->position = pos;
+ if (pos > 0)
+ s->positions[pos] = session;
+}
+
+static void seat_assign_position(Seat *s, Session *session) {
+ unsigned pos;
+
+ if (session->position > 0)
+ return;
+
+ for (pos = 1; pos < s->position_count; ++pos)
+ if (!s->positions[pos])
+ break;
+
+ seat_claim_position(s, session, pos);
+}
+
+int seat_attach_session(Seat *s, Session *session) {
+ assert(s);
+ assert(session);
+ assert(!session->seat);
+
+ if (!seat_has_vts(s) != !session->vtnr)
+ return -EINVAL;
+
+ session->seat = s;
+ LIST_PREPEND(sessions_by_seat, s->sessions, session);
+ seat_assign_position(s, session);
+
+ /* On seats with VTs, the VT logic defines which session is active. On
+ * seats without VTs, we automatically activate new sessions. */
+ if (!seat_has_vts(s))
+ seat_set_active(s, session);
+
+ return 0;
+}
+
+void seat_complete_switch(Seat *s) {
+ Session *session;
+
+ assert(s);
+
+ /* if no session-switch is pending or if it got canceled, do nothing */
+ if (!s->pending_switch)
+ return;
+
+ session = TAKE_PTR(s->pending_switch);
+
+ seat_set_active(s, session);
+}
+
+bool seat_has_vts(Seat *s) {
+ assert(s);
+
+ return seat_is_seat0(s) && s->manager->console_active_fd >= 0;
+}
+
+bool seat_is_seat0(Seat *s) {
+ assert(s);
+
+ return s->manager->seat0 == s;
+}
+
+bool seat_can_multi_session(Seat *s) {
+ assert(s);
+
+ return seat_has_vts(s);
+}
+
+bool seat_can_tty(Seat *s) {
+ assert(s);
+
+ return seat_has_vts(s);
+}
+
+bool seat_has_master_device(Seat *s) {
+ assert(s);
+
+ /* device list is ordered by "master" flag */
+ return !!s->devices && s->devices->master;
+}
+
+bool seat_can_graphical(Seat *s) {
+ assert(s);
+
+ return seat_has_master_device(s);
+}
+
+int seat_get_idle_hint(Seat *s, dual_timestamp *t) {
+ Session *session;
+ bool idle_hint = true;
+ dual_timestamp ts = DUAL_TIMESTAMP_NULL;
+
+ assert(s);
+
+ LIST_FOREACH(sessions_by_seat, session, s->sessions) {
+ dual_timestamp k;
+ int ih;
+
+ ih = session_get_idle_hint(session, &k);
+ if (ih < 0)
+ return ih;
+
+ if (!ih) {
+ if (!idle_hint) {
+ if (k.monotonic > ts.monotonic)
+ ts = k;
+ } else {
+ idle_hint = false;
+ ts = k;
+ }
+ } else if (idle_hint) {
+
+ if (k.monotonic > ts.monotonic)
+ ts = k;
+ }
+ }
+
+ if (t)
+ *t = ts;
+
+ return idle_hint;
+}
+
+bool seat_may_gc(Seat *s, bool drop_not_started) {
+ assert(s);
+
+ if (drop_not_started && !s->started)
+ return true;
+
+ if (seat_is_seat0(s))
+ return false;
+
+ return !seat_has_master_device(s);
+}
+
+void seat_add_to_gc_queue(Seat *s) {
+ assert(s);
+
+ if (s->in_gc_queue)
+ return;
+
+ LIST_PREPEND(gc_queue, s->manager->seat_gc_queue, s);
+ s->in_gc_queue = true;
+}
+
+static bool seat_name_valid_char(char c) {
+ return
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ IN_SET(c, '-', '_');
+}
+
+bool seat_name_is_valid(const char *name) {
+ const char *p;
+
+ assert(name);
+
+ if (!startswith(name, "seat"))
+ return false;
+
+ if (!name[4])
+ return false;
+
+ for (p = name; *p; p++)
+ if (!seat_name_valid_char(*p))
+ return false;
+
+ if (strlen(name) > 255)
+ return false;
+
+ return true;
+}
diff --git a/src/login/logind-seat.h b/src/login/logind-seat.h
new file mode 100644
index 0000000..6236f13
--- /dev/null
+++ b/src/login/logind-seat.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Seat Seat;
+
+#include "list.h"
+#include "logind-session.h"
+
+struct Seat {
+ Manager *manager;
+ char *id;
+
+ char *state_file;
+
+ LIST_HEAD(Device, devices);
+
+ Session *active;
+ Session *pending_switch;
+ LIST_HEAD(Session, sessions);
+
+ Session **positions;
+ size_t position_count;
+
+ bool in_gc_queue:1;
+ bool started:1;
+
+ LIST_FIELDS(Seat, gc_queue);
+};
+
+int seat_new(Seat **ret, Manager *m, const char *id);
+Seat* seat_free(Seat *s);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Seat *, seat_free);
+
+int seat_save(Seat *s);
+int seat_load(Seat *s);
+
+int seat_apply_acls(Seat *s, Session *old_active);
+int seat_set_active(Seat *s, Session *session);
+int seat_switch_to(Seat *s, unsigned num);
+int seat_switch_to_next(Seat *s);
+int seat_switch_to_previous(Seat *s);
+int seat_active_vt_changed(Seat *s, unsigned vtnr);
+int seat_read_active_vt(Seat *s);
+int seat_preallocate_vts(Seat *s);
+
+int seat_attach_session(Seat *s, Session *session);
+void seat_complete_switch(Seat *s);
+void seat_evict_position(Seat *s, Session *session);
+void seat_claim_position(Seat *s, Session *session, unsigned pos);
+
+bool seat_has_vts(Seat *s);
+bool seat_is_seat0(Seat *s);
+bool seat_can_multi_session(Seat *s);
+bool seat_can_tty(Seat *s);
+bool seat_has_master_device(Seat *s);
+bool seat_can_graphical(Seat *s);
+
+int seat_get_idle_hint(Seat *s, dual_timestamp *t);
+
+int seat_start(Seat *s);
+int seat_stop(Seat *s, bool force);
+int seat_stop_sessions(Seat *s, bool force);
+
+bool seat_may_gc(Seat *s, bool drop_not_started);
+void seat_add_to_gc_queue(Seat *s);
+
+bool seat_name_is_valid(const char *name);
+
+extern const sd_bus_vtable seat_vtable[];
+
+int seat_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error);
+int seat_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
+char *seat_bus_path(Seat *s);
+
+int seat_send_signal(Seat *s, bool new_seat);
+int seat_send_changed(Seat *s, const char *properties, ...) _sentinel_;
+
+int bus_seat_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error);
diff --git a/src/login/logind-session-dbus.c b/src/login/logind-session-dbus.c
new file mode 100644
index 0000000..df5bfba
--- /dev/null
+++ b/src/login/logind-session-dbus.c
@@ -0,0 +1,764 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-label.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "logind-session-device.h"
+#include "logind-session.h"
+#include "logind.h"
+#include "missing_capability.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "util.h"
+
+static int property_get_user(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *p = NULL;
+ Session *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ p = user_bus_path(s->user);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "(uo)", (uint32_t) s->user->uid, p);
+}
+
+static int property_get_name(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Session *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ return sd_bus_message_append(reply, "s", s->user->name);
+}
+
+static int property_get_seat(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *p = NULL;
+ Session *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ p = s->seat ? seat_bus_path(s->seat) : strdup("/");
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "(so)", s->seat ? s->seat->id : "", p);
+}
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, session_type, SessionType);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_class, session_class, SessionClass);
+static BUS_DEFINE_PROPERTY_GET(property_get_active, "b", Session, session_is_active);
+static BUS_DEFINE_PROPERTY_GET2(property_get_state, "s", Session, session_get_state, session_state_to_string);
+
+static int property_get_idle_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Session *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ return sd_bus_message_append(reply, "b", session_get_idle_hint(s, NULL) > 0);
+}
+
+static int property_get_idle_since_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Session *s = userdata;
+ dual_timestamp t = DUAL_TIMESTAMP_NULL;
+ uint64_t u;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ r = session_get_idle_hint(s, &t);
+ if (r < 0)
+ return r;
+
+ u = streq(property, "IdleSinceHint") ? t.realtime : t.monotonic;
+
+ return sd_bus_message_append(reply, "t", u);
+}
+
+static int property_get_locked_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Session *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ return sd_bus_message_append(reply, "b", session_get_locked_hint(s) > 0);
+}
+
+int bus_session_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.login1.manage",
+ NULL,
+ false,
+ s->user->uid,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = session_stop(s, true);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_session_method_activate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = session_activate(s);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_session_method_lock(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.lock-sessions",
+ NULL,
+ false,
+ s->user->uid,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = session_send_lock(s, strstr(sd_bus_message_get_member(message), "Lock"));
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_set_idle_hint(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ Session *s = userdata;
+ uid_t uid;
+ int r, b;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ if (uid != 0 && uid != s->user->uid)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Only owner of session may set idle hint");
+
+ session_set_idle_hint(s, b);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_set_locked_hint(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ Session *s = userdata;
+ uid_t uid;
+ int r, b;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ if (uid != 0 && uid != s->user->uid)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Only owner of session may set locked hint");
+
+ session_set_locked_hint(s, b);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_session_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ const char *swho;
+ int32_t signo;
+ KillWho who;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "si", &swho, &signo);
+ if (r < 0)
+ return r;
+
+ if (isempty(swho))
+ who = KILL_ALL;
+ else {
+ who = kill_who_from_string(swho);
+ if (who < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid kill parameter '%s'", swho);
+ }
+
+ if (!SIGNAL_VALID(signo))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid signal %i", signo);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.login1.manage",
+ NULL,
+ false,
+ s->user->uid,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = session_kill(s, who, signo);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_take_control(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ Session *s = userdata;
+ int r, force;
+ uid_t uid;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "b", &force);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ if (uid != 0 && (force || uid != s->user->uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Only owner of session may take control");
+
+ r = session_set_controller(s, sd_bus_message_get_sender(message), force, true);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_release_control(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+
+ assert(message);
+ assert(s);
+
+ if (!session_is_controller(s, sd_bus_message_get_sender(message)))
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_IN_CONTROL, "You are not in control of this session");
+
+ session_drop_controller(s);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_take_device(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ uint32_t major, minor;
+ SessionDevice *sd;
+ dev_t dev;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "uu", &major, &minor);
+ if (r < 0)
+ return r;
+
+ if (!DEVICE_MAJOR_VALID(major) || !DEVICE_MINOR_VALID(minor))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Device major/minor is not valid.");
+
+ if (!session_is_controller(s, sd_bus_message_get_sender(message)))
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_IN_CONTROL, "You are not in control of this session");
+
+ dev = makedev(major, minor);
+ sd = hashmap_get(s->devices, &dev);
+ if (sd)
+ /* We don't allow retrieving a device multiple times.
+ * The related ReleaseDevice call is not ref-counted.
+ * The caller should use dup() if it requires more
+ * than one fd (it would be functionally
+ * equivalent). */
+ return sd_bus_error_setf(error, BUS_ERROR_DEVICE_IS_TAKEN, "Device already taken");
+
+ r = session_device_new(s, dev, true, &sd);
+ if (r < 0)
+ return r;
+
+ r = session_device_save(sd);
+ if (r < 0)
+ goto error;
+
+ r = sd_bus_reply_method_return(message, "hb", sd->fd, !sd->active);
+ if (r < 0)
+ goto error;
+
+ session_save(s);
+ return 1;
+
+error:
+ session_device_free(sd);
+ return r;
+}
+
+static int method_release_device(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ uint32_t major, minor;
+ SessionDevice *sd;
+ dev_t dev;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "uu", &major, &minor);
+ if (r < 0)
+ return r;
+
+ if (!DEVICE_MAJOR_VALID(major) || !DEVICE_MINOR_VALID(minor))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Device major/minor is not valid.");
+
+ if (!session_is_controller(s, sd_bus_message_get_sender(message)))
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_IN_CONTROL, "You are not in control of this session");
+
+ dev = makedev(major, minor);
+ sd = hashmap_get(s->devices, &dev);
+ if (!sd)
+ return sd_bus_error_setf(error, BUS_ERROR_DEVICE_NOT_TAKEN, "Device not taken");
+
+ session_device_free(sd);
+ session_save(s);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_pause_device_complete(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ uint32_t major, minor;
+ SessionDevice *sd;
+ dev_t dev;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "uu", &major, &minor);
+ if (r < 0)
+ return r;
+
+ if (!DEVICE_MAJOR_VALID(major) || !DEVICE_MINOR_VALID(minor))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Device major/minor is not valid.");
+
+ if (!session_is_controller(s, sd_bus_message_get_sender(message)))
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_IN_CONTROL, "You are not in control of this session");
+
+ dev = makedev(major, minor);
+ sd = hashmap_get(s->devices, &dev);
+ if (!sd)
+ return sd_bus_error_setf(error, BUS_ERROR_DEVICE_NOT_TAKEN, "Device not taken");
+
+ session_device_complete_pause(sd);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+const sd_bus_vtable session_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Id", "s", NULL, offsetof(Session, id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("User", "(uo)", property_get_user, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Name", "s", property_get_name, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("Timestamp", offsetof(Session, timestamp), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("VTNr", "u", NULL, offsetof(Session, vtnr), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Seat", "(so)", property_get_seat, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TTY", "s", NULL, offsetof(Session, tty), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Display", "s", NULL, offsetof(Session, display), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Remote", "b", bus_property_get_bool, offsetof(Session, remote), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemoteHost", "s", NULL, offsetof(Session, remote_host), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemoteUser", "s", NULL, offsetof(Session, remote_user), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Service", "s", NULL, offsetof(Session, service), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Desktop", "s", NULL, offsetof(Session, desktop), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Scope", "s", NULL, offsetof(Session, scope), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Leader", "u", bus_property_get_pid, offsetof(Session, leader), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Audit", "u", NULL, offsetof(Session, audit_id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Session, type), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Class", "s", property_get_class, offsetof(Session, class), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Active", "b", property_get_active, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("State", "s", property_get_state, 0, 0),
+ SD_BUS_PROPERTY("IdleHint", "b", property_get_idle_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHint", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHintMonotonic", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("LockedHint", "b", property_get_locked_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
+ SD_BUS_METHOD("Terminate", NULL, NULL, bus_session_method_terminate, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Activate", NULL, NULL, bus_session_method_activate, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Lock", NULL, NULL, bus_session_method_lock, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Unlock", NULL, NULL, bus_session_method_lock, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetIdleHint", "b", NULL, method_set_idle_hint, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLockedHint", "b", NULL, method_set_locked_hint, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Kill", "si", NULL, bus_session_method_kill, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("TakeControl", "b", NULL, method_take_control, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ReleaseControl", NULL, NULL, method_release_control, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("TakeDevice", "uu", "hb", method_take_device, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ReleaseDevice", "uu", NULL, method_release_device, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("PauseDeviceComplete", "uu", NULL, method_pause_device_complete, SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_SIGNAL("PauseDevice", "uus", 0),
+ SD_BUS_SIGNAL("ResumeDevice", "uuh", 0),
+ SD_BUS_SIGNAL("Lock", NULL, 0),
+ SD_BUS_SIGNAL("Unlock", NULL, 0),
+
+ SD_BUS_VTABLE_END
+};
+
+int session_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Session *session;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ if (streq(path, "/org/freedesktop/login1/session/self")) {
+ sd_bus_message *message;
+
+ message = sd_bus_get_current_message(bus);
+ if (!message)
+ return 0;
+
+ r = manager_get_session_from_creds(m, message, NULL, error, &session);
+ if (r < 0)
+ return r;
+ } else {
+ _cleanup_free_ char *e = NULL;
+ const char *p;
+
+ p = startswith(path, "/org/freedesktop/login1/session/");
+ if (!p)
+ return 0;
+
+ e = bus_label_unescape(p);
+ if (!e)
+ return -ENOMEM;
+
+ session = hashmap_get(m->sessions, e);
+ if (!session)
+ return 0;
+ }
+
+ *found = session;
+ return 1;
+}
+
+char *session_bus_path(Session *s) {
+ _cleanup_free_ char *t = NULL;
+
+ assert(s);
+
+ t = bus_label_escape(s->id);
+ if (!t)
+ return NULL;
+
+ return strappend("/org/freedesktop/login1/session/", t);
+}
+
+int session_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ sd_bus_message *message;
+ Manager *m = userdata;
+ Session *session;
+ Iterator i;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(nodes);
+
+ HASHMAP_FOREACH(session, m->sessions, i) {
+ char *p;
+
+ p = session_bus_path(session);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_consume(&l, p);
+ if (r < 0)
+ return r;
+ }
+
+ message = sd_bus_get_current_message(bus);
+ if (message) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ const char *name;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_SESSION|SD_BUS_CREDS_AUGMENT, &creds);
+ if (r >= 0) {
+ r = sd_bus_creds_get_session(creds, &name);
+ if (r >= 0) {
+ session = hashmap_get(m->sessions, name);
+ if (session) {
+ r = strv_extend(&l, "/org/freedesktop/login1/session/self");
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+int session_send_signal(Session *s, bool new_session) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(s);
+
+ p = session_bus_path(s);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_signal(
+ s->manager->bus,
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ new_session ? "SessionNew" : "SessionRemoved",
+ "so", s->id, p);
+}
+
+int session_send_changed(Session *s, const char *properties, ...) {
+ _cleanup_free_ char *p = NULL;
+ char **l;
+
+ assert(s);
+
+ if (!s->started)
+ return 0;
+
+ p = session_bus_path(s);
+ if (!p)
+ return -ENOMEM;
+
+ l = strv_from_stdarg_alloca(properties);
+
+ return sd_bus_emit_properties_changed_strv(s->manager->bus, p, "org.freedesktop.login1.Session", l);
+}
+
+int session_send_lock(Session *s, bool lock) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(s);
+
+ p = session_bus_path(s);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_signal(
+ s->manager->bus,
+ p,
+ "org.freedesktop.login1.Session",
+ lock ? "Lock" : "Unlock",
+ NULL);
+}
+
+int session_send_lock_all(Manager *m, bool lock) {
+ Session *session;
+ Iterator i;
+ int r = 0;
+
+ assert(m);
+
+ HASHMAP_FOREACH(session, m->sessions, i) {
+ int k;
+
+ k = session_send_lock(session, lock);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static bool session_ready(Session *s) {
+ assert(s);
+
+ /* Returns true when the session is ready, i.e. all jobs we enqueued for it are done (regardless if successful or not) */
+
+ return !s->scope_job &&
+ !s->user->service_job;
+}
+
+int session_send_create_reply(Session *s, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *c = NULL;
+ _cleanup_close_ int fifo_fd = -1;
+ _cleanup_free_ char *p = NULL;
+
+ assert(s);
+
+ /* This is called after the session scope and the user service were successfully created, and finishes where
+ * bus_manager_create_session() left off. */
+
+ if (!s->create_message)
+ return 0;
+
+ if (!sd_bus_error_is_set(error) && !session_ready(s))
+ return 0;
+
+ c = TAKE_PTR(s->create_message);
+ if (error)
+ return sd_bus_reply_method_error(c, error);
+
+ fifo_fd = session_create_fifo(s);
+ if (fifo_fd < 0)
+ return fifo_fd;
+
+ /* Update the session state file before we notify the client about the result. */
+ session_save(s);
+
+ p = session_bus_path(s);
+ if (!p)
+ return -ENOMEM;
+
+ log_debug("Sending reply about created session: "
+ "id=%s object_path=%s uid=%u runtime_path=%s "
+ "session_fd=%d seat=%s vtnr=%u",
+ s->id,
+ p,
+ (uint32_t) s->user->uid,
+ s->user->runtime_path,
+ fifo_fd,
+ s->seat ? s->seat->id : "",
+ (uint32_t) s->vtnr);
+
+ return sd_bus_reply_method_return(
+ c, "soshusub",
+ s->id,
+ p,
+ s->user->runtime_path,
+ fifo_fd,
+ (uint32_t) s->user->uid,
+ s->seat ? s->seat->id : "",
+ (uint32_t) s->vtnr,
+ false);
+}
diff --git a/src/login/logind-session-device.c b/src/login/logind-session-device.c
new file mode 100644
index 0000000..f358524
--- /dev/null
+++ b/src/login/logind-session-device.c
@@ -0,0 +1,532 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <linux/input.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "logind-session-device.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "sd-daemon.h"
+#include "util.h"
+
+enum SessionDeviceNotifications {
+ SESSION_DEVICE_RESUME,
+ SESSION_DEVICE_TRY_PAUSE,
+ SESSION_DEVICE_PAUSE,
+ SESSION_DEVICE_RELEASE,
+};
+
+static int session_device_notify(SessionDevice *sd, enum SessionDeviceNotifications type) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *path = NULL;
+ const char *t = NULL;
+ uint32_t major, minor;
+ int r;
+
+ assert(sd);
+
+ major = major(sd->dev);
+ minor = minor(sd->dev);
+
+ if (!sd->session->controller)
+ return 0;
+
+ path = session_bus_path(sd->session);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_signal(
+ sd->session->manager->bus,
+ &m, path,
+ "org.freedesktop.login1.Session",
+ (type == SESSION_DEVICE_RESUME) ? "ResumeDevice" : "PauseDevice");
+ if (!m)
+ return r;
+
+ r = sd_bus_message_set_destination(m, sd->session->controller);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SESSION_DEVICE_RESUME:
+ r = sd_bus_message_append(m, "uuh", major, minor, sd->fd);
+ if (r < 0)
+ return r;
+ break;
+
+ case SESSION_DEVICE_TRY_PAUSE:
+ t = "pause";
+ break;
+
+ case SESSION_DEVICE_PAUSE:
+ t = "force";
+ break;
+
+ case SESSION_DEVICE_RELEASE:
+ t = "gone";
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (t) {
+ r = sd_bus_message_append(m, "uus", major, minor, t);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_send(sd->session->manager->bus, m, NULL);
+}
+
+static void sd_eviocrevoke(int fd) {
+ static bool warned = false;
+
+ assert(fd >= 0);
+
+ if (ioctl(fd, EVIOCREVOKE, NULL) < 0) {
+
+ if (errno == EINVAL && !warned) {
+ log_warning_errno(errno, "Kernel does not support evdev-revocation: %m");
+ warned = true;
+ }
+ }
+}
+
+static int sd_drmsetmaster(int fd) {
+ assert(fd >= 0);
+
+ if (ioctl(fd, DRM_IOCTL_SET_MASTER, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int sd_drmdropmaster(int fd) {
+ assert(fd >= 0);
+
+ if (ioctl(fd, DRM_IOCTL_DROP_MASTER, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int session_device_open(SessionDevice *sd, bool active) {
+ int fd, r;
+
+ assert(sd);
+ assert(sd->type != DEVICE_TYPE_UNKNOWN);
+ assert(sd->node);
+
+ /* open device and try to get an udev_device from it */
+ fd = open(sd->node, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+
+ switch (sd->type) {
+
+ case DEVICE_TYPE_DRM:
+ if (active) {
+ /* Weird legacy DRM semantics might return an error even though we're master. No way to detect
+ * that so fail at all times and let caller retry in inactive state. */
+ r = sd_drmsetmaster(fd);
+ if (r < 0) {
+ close_nointr(fd);
+ return r;
+ }
+ } else
+ /* DRM-Master is granted to the first user who opens a device automatically (ughh,
+ * racy!). Hence, we just drop DRM-Master in case we were the first. */
+ (void) sd_drmdropmaster(fd);
+ break;
+
+ case DEVICE_TYPE_EVDEV:
+ if (!active)
+ sd_eviocrevoke(fd);
+ break;
+
+ case DEVICE_TYPE_UNKNOWN:
+ default:
+ /* fallback for devices wihout synchronizations */
+ break;
+ }
+
+ return fd;
+}
+
+static int session_device_start(SessionDevice *sd) {
+ int r;
+
+ assert(sd);
+ assert(session_is_active(sd->session));
+
+ if (sd->active)
+ return 0;
+
+ switch (sd->type) {
+
+ case DEVICE_TYPE_DRM:
+ if (sd->fd < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADF),
+ "Failed to re-activate DRM fd, as the fd was lost (maybe logind restart went wrong?)");
+
+ /* Device is kept open. Simply call drmSetMaster() and hope there is no-one else. In case it fails, we
+ * keep the device paused. Maybe at some point we have a drmStealMaster(). */
+ r = sd_drmsetmaster(sd->fd);
+ if (r < 0)
+ return r;
+ break;
+
+ case DEVICE_TYPE_EVDEV:
+ /* Evdev devices are revoked while inactive. Reopen it and we are fine. */
+ r = session_device_open(sd, true);
+ if (r < 0)
+ return r;
+
+ /* For evdev devices, the file descriptor might be left uninitialized. This might happen while resuming
+ * into a session and logind has been restarted right before. */
+ safe_close(sd->fd);
+ sd->fd = r;
+ break;
+
+ case DEVICE_TYPE_UNKNOWN:
+ default:
+ /* fallback for devices without synchronizations */
+ break;
+ }
+
+ sd->active = true;
+ return 0;
+}
+
+static void session_device_stop(SessionDevice *sd) {
+ assert(sd);
+
+ if (!sd->active)
+ return;
+
+ switch (sd->type) {
+
+ case DEVICE_TYPE_DRM:
+ if (sd->fd < 0) {
+ log_error("Failed to de-activate DRM fd, as the fd was lost (maybe logind restart went wrong?)");
+ return;
+ }
+
+ /* On DRM devices we simply drop DRM-Master but keep it open.
+ * This allows the user to keep resources allocated. The
+ * CAP_SYS_ADMIN restriction to DRM-Master prevents users from
+ * circumventing this. */
+ sd_drmdropmaster(sd->fd);
+ break;
+
+ case DEVICE_TYPE_EVDEV:
+ /* Revoke access on evdev file-descriptors during deactivation.
+ * This will basically prevent any operations on the fd and
+ * cannot be undone. Good side is: it needs no CAP_SYS_ADMIN
+ * protection this way. */
+ sd_eviocrevoke(sd->fd);
+ break;
+
+ case DEVICE_TYPE_UNKNOWN:
+ default:
+ /* fallback for devices without synchronization */
+ break;
+ }
+
+ sd->active = false;
+}
+
+static DeviceType detect_device_type(sd_device *dev) {
+ const char *sysname, *subsystem;
+ DeviceType type = DEVICE_TYPE_UNKNOWN;
+
+ if (sd_device_get_sysname(dev, &sysname) < 0 ||
+ sd_device_get_subsystem(dev, &subsystem) < 0)
+ return type;
+
+ if (streq(subsystem, "drm")) {
+ if (startswith(sysname, "card"))
+ type = DEVICE_TYPE_DRM;
+ } else if (streq(subsystem, "input")) {
+ if (startswith(sysname, "event"))
+ type = DEVICE_TYPE_EVDEV;
+ }
+
+ return type;
+}
+
+static int session_device_verify(SessionDevice *sd) {
+ _cleanup_(sd_device_unrefp) sd_device *p = NULL;
+ const char *sp, *node;
+ sd_device *dev;
+ int r;
+
+ r = sd_device_new_from_devnum(&p, 'c', sd->dev);
+ if (r < 0)
+ return r;
+
+ dev = p;
+
+ if (sd_device_get_syspath(dev, &sp) < 0 ||
+ sd_device_get_devname(dev, &node) < 0)
+ return -EINVAL;
+
+ /* detect device type so we can find the correct sysfs parent */
+ sd->type = detect_device_type(dev);
+ if (sd->type == DEVICE_TYPE_UNKNOWN)
+ return -ENODEV;
+
+ else if (sd->type == DEVICE_TYPE_EVDEV) {
+ /* for evdev devices we need the parent node as device */
+ if (sd_device_get_parent_with_subsystem_devtype(p, "input", NULL, &dev) < 0)
+ return -ENODEV;
+ if (sd_device_get_syspath(dev, &sp) < 0)
+ return -ENODEV;
+
+ } else if (sd->type != DEVICE_TYPE_DRM)
+ /* Prevent opening unsupported devices. Especially devices of
+ * subsystem "input" must be opened via the evdev node as
+ * we require EVIOCREVOKE. */
+ return -ENODEV;
+
+ /* search for an existing seat device and return it if available */
+ sd->device = hashmap_get(sd->session->manager->devices, sp);
+ if (!sd->device) {
+ /* The caller might have gotten the udev event before we were
+ * able to process it. Hence, fake the "add" event and let the
+ * logind-manager handle the new device. */
+ r = manager_process_seat_device(sd->session->manager, dev);
+ if (r < 0)
+ return r;
+
+ /* if it's still not available, then the device is invalid */
+ sd->device = hashmap_get(sd->session->manager->devices, sp);
+ if (!sd->device)
+ return -ENODEV;
+ }
+
+ if (sd->device->seat != sd->session->seat)
+ return -EPERM;
+
+ sd->node = strdup(node);
+ if (!sd->node)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int session_device_new(Session *s, dev_t dev, bool open_device, SessionDevice **out) {
+ SessionDevice *sd;
+ int r;
+
+ assert(s);
+ assert(out);
+
+ if (!s->seat)
+ return -EPERM;
+
+ sd = new0(SessionDevice, 1);
+ if (!sd)
+ return -ENOMEM;
+
+ sd->session = s;
+ sd->dev = dev;
+ sd->fd = -1;
+ sd->type = DEVICE_TYPE_UNKNOWN;
+
+ r = session_device_verify(sd);
+ if (r < 0)
+ goto error;
+
+ r = hashmap_put(s->devices, &sd->dev, sd);
+ if (r < 0)
+ goto error;
+
+ if (open_device) {
+ /* Open the device for the first time. We need a valid fd to pass back
+ * to the caller. If the session is not active, this _might_ immediately
+ * revoke access and thus invalidate the fd. But this is still needed
+ * to pass a valid fd back. */
+ sd->active = session_is_active(s);
+ r = session_device_open(sd, sd->active);
+ if (r < 0) {
+ /* EINVAL _may_ mean a master is active; retry inactive */
+ if (sd->active && r == -EINVAL) {
+ sd->active = false;
+ r = session_device_open(sd, false);
+ }
+ if (r < 0)
+ goto error;
+ }
+ sd->fd = r;
+ }
+
+ LIST_PREPEND(sd_by_device, sd->device->session_devices, sd);
+
+ *out = sd;
+ return 0;
+
+error:
+ hashmap_remove(s->devices, &sd->dev);
+ free(sd->node);
+ free(sd);
+ return r;
+}
+
+void session_device_free(SessionDevice *sd) {
+ assert(sd);
+
+ /* Make sure to remove the pushed fd. */
+ if (sd->pushed_fd) {
+ _cleanup_free_ char *m = NULL;
+ const char *id;
+ int r;
+
+ /* Session ID does not contain separators. */
+ id = sd->session->id;
+ assert(*(id + strcspn(id, "-\n")) == '\0');
+
+ r = asprintf(&m, "FDSTOREREMOVE=1\n"
+ "FDNAME=session-%s-device-%u-%u\n",
+ id, major(sd->dev), minor(sd->dev));
+ if (r >= 0)
+ (void) sd_notify(false, m);
+ }
+
+ session_device_stop(sd);
+ session_device_notify(sd, SESSION_DEVICE_RELEASE);
+ safe_close(sd->fd);
+
+ LIST_REMOVE(sd_by_device, sd->device->session_devices, sd);
+
+ hashmap_remove(sd->session->devices, &sd->dev);
+
+ free(sd->node);
+ free(sd);
+}
+
+void session_device_complete_pause(SessionDevice *sd) {
+ SessionDevice *iter;
+ Iterator i;
+
+ if (!sd->active)
+ return;
+
+ session_device_stop(sd);
+
+ /* if not all devices are paused, wait for further completion events */
+ HASHMAP_FOREACH(iter, sd->session->devices, i)
+ if (iter->active)
+ return;
+
+ /* complete any pending session switch */
+ seat_complete_switch(sd->session->seat);
+}
+
+void session_device_resume_all(Session *s) {
+ SessionDevice *sd;
+ Iterator i;
+
+ assert(s);
+
+ HASHMAP_FOREACH(sd, s->devices, i) {
+ if (sd->active)
+ continue;
+
+ if (session_device_start(sd) < 0)
+ continue;
+ if (session_device_save(sd) < 0)
+ continue;
+
+ session_device_notify(sd, SESSION_DEVICE_RESUME);
+ }
+}
+
+void session_device_pause_all(Session *s) {
+ SessionDevice *sd;
+ Iterator i;
+
+ assert(s);
+
+ HASHMAP_FOREACH(sd, s->devices, i) {
+ if (!sd->active)
+ continue;
+
+ session_device_stop(sd);
+ session_device_notify(sd, SESSION_DEVICE_PAUSE);
+ }
+}
+
+unsigned session_device_try_pause_all(Session *s) {
+ unsigned num_pending = 0;
+ SessionDevice *sd;
+ Iterator i;
+
+ assert(s);
+
+ HASHMAP_FOREACH(sd, s->devices, i) {
+ if (!sd->active)
+ continue;
+
+ session_device_notify(sd, SESSION_DEVICE_TRY_PAUSE);
+ num_pending++;
+ }
+
+ return num_pending;
+}
+
+int session_device_save(SessionDevice *sd) {
+ _cleanup_free_ char *m = NULL;
+ const char *id;
+ int r;
+
+ assert(sd);
+
+ /* Store device fd in PID1. It will send it back to us on restart so revocation will continue to work. To make
+ * things simple, send fds for all type of devices even if they don't support the revocation mechanism so we
+ * don't have to handle them differently later.
+ *
+ * Note: for device supporting revocation, PID1 will drop a stored fd automatically if the corresponding device
+ * is revoked. */
+
+ if (sd->pushed_fd)
+ return 0;
+
+ /* Session ID does not contain separators. */
+ id = sd->session->id;
+ assert(*(id + strcspn(id, "-\n")) == '\0');
+
+ r = asprintf(&m, "FDSTORE=1\n"
+ "FDNAME=session-%s-device-%u-%u\n",
+ id, major(sd->dev), minor(sd->dev));
+ if (r < 0)
+ return r;
+
+ r = sd_pid_notify_with_fds(0, false, m, &sd->fd, 1);
+ if (r < 0)
+ return r;
+
+ sd->pushed_fd = true;
+ return 1;
+}
+
+void session_device_attach_fd(SessionDevice *sd, int fd, bool active) {
+ assert(fd >= 0);
+ assert(sd);
+ assert(sd->fd < 0);
+ assert(!sd->active);
+
+ sd->fd = fd;
+ sd->pushed_fd = true;
+ sd->active = active;
+}
diff --git a/src/login/logind-session-device.h b/src/login/logind-session-device.h
new file mode 100644
index 0000000..6c20403
--- /dev/null
+++ b/src/login/logind-session-device.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef enum DeviceType DeviceType;
+typedef struct SessionDevice SessionDevice;
+
+#include "list.h"
+#include "logind.h"
+
+enum DeviceType {
+ DEVICE_TYPE_UNKNOWN,
+ DEVICE_TYPE_DRM,
+ DEVICE_TYPE_EVDEV,
+};
+
+struct SessionDevice {
+ Session *session;
+ Device *device;
+
+ dev_t dev;
+ char *node;
+ int fd;
+ DeviceType type:3;
+ bool active:1;
+ bool pushed_fd:1;
+
+ LIST_FIELDS(struct SessionDevice, sd_by_device);
+};
+
+int session_device_new(Session *s, dev_t dev, bool open_device, SessionDevice **out);
+void session_device_free(SessionDevice *sd);
+void session_device_complete_pause(SessionDevice *sd);
+
+void session_device_resume_all(Session *s);
+void session_device_pause_all(Session *s);
+unsigned session_device_try_pause_all(Session *s);
+
+int session_device_save(SessionDevice *sd);
+void session_device_attach_fd(SessionDevice *sd, int fd, bool active);
diff --git a/src/login/logind-session.c b/src/login/logind-session.c
new file mode 100644
index 0000000..90a9108
--- /dev/null
+++ b/src/login/logind-session.c
@@ -0,0 +1,1420 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/kd.h>
+#include <linux/vt.h>
+#include <signal.h>
+#include <stdio_ext.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "logind-session.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "string-table.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+#define RELEASE_USEC (20*USEC_PER_SEC)
+
+static void session_remove_fifo(Session *s);
+static void session_restore_vt(Session *s);
+
+int session_new(Session **ret, Manager *m, const char *id) {
+ _cleanup_(session_freep) Session *s = NULL;
+ int r;
+
+ assert(ret);
+ assert(m);
+ assert(id);
+
+ if (!session_id_valid(id))
+ return -EINVAL;
+
+ s = new(Session, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (Session) {
+ .manager = m,
+ .fifo_fd = -1,
+ .vtfd = -1,
+ .audit_id = AUDIT_SESSION_INVALID,
+ .tty_validity = _TTY_VALIDITY_INVALID,
+ };
+
+ s->state_file = strappend("/run/systemd/sessions/", id);
+ if (!s->state_file)
+ return -ENOMEM;
+
+ s->id = basename(s->state_file);
+
+ s->devices = hashmap_new(&devt_hash_ops);
+ if (!s->devices)
+ return -ENOMEM;
+
+ r = hashmap_put(m->sessions, s->id, s);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+Session* session_free(Session *s) {
+ SessionDevice *sd;
+
+ if (!s)
+ return NULL;
+
+ if (s->in_gc_queue)
+ LIST_REMOVE(gc_queue, s->manager->session_gc_queue, s);
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ session_remove_fifo(s);
+
+ session_drop_controller(s);
+
+ while ((sd = hashmap_first(s->devices)))
+ session_device_free(sd);
+
+ hashmap_free(s->devices);
+
+ if (s->user) {
+ LIST_REMOVE(sessions_by_user, s->user->sessions, s);
+
+ if (s->user->display == s)
+ s->user->display = NULL;
+
+ user_update_last_session_timer(s->user);
+ }
+
+ if (s->seat) {
+ if (s->seat->active == s)
+ s->seat->active = NULL;
+ if (s->seat->pending_switch == s)
+ s->seat->pending_switch = NULL;
+
+ seat_evict_position(s->seat, s);
+ LIST_REMOVE(sessions_by_seat, s->seat->sessions, s);
+ }
+
+ if (s->scope) {
+ hashmap_remove(s->manager->session_units, s->scope);
+ free(s->scope);
+ }
+
+ if (pid_is_valid(s->leader))
+ (void) hashmap_remove_value(s->manager->sessions_by_leader, PID_TO_PTR(s->leader), s);
+
+ free(s->scope_job);
+
+ sd_bus_message_unref(s->create_message);
+
+ free(s->tty);
+ free(s->display);
+ free(s->remote_host);
+ free(s->remote_user);
+ free(s->service);
+ free(s->desktop);
+
+ hashmap_remove(s->manager->sessions, s->id);
+
+ free(s->state_file);
+
+ return mfree(s);
+}
+
+void session_set_user(Session *s, User *u) {
+ assert(s);
+ assert(!s->user);
+
+ s->user = u;
+ LIST_PREPEND(sessions_by_user, u->sessions, s);
+
+ user_update_last_session_timer(u);
+}
+
+int session_set_leader(Session *s, pid_t pid) {
+ int r;
+
+ assert(s);
+
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ if (s->leader == pid)
+ return 0;
+
+ r = hashmap_put(s->manager->sessions_by_leader, PID_TO_PTR(pid), s);
+ if (r < 0)
+ return r;
+
+ if (pid_is_valid(s->leader))
+ (void) hashmap_remove_value(s->manager->sessions_by_leader, PID_TO_PTR(s->leader), s);
+
+ s->leader = pid;
+ (void) audit_session_from_pid(pid, &s->audit_id);
+
+ return 1;
+}
+
+static void session_save_devices(Session *s, FILE *f) {
+ SessionDevice *sd;
+ Iterator i;
+
+ if (!hashmap_isempty(s->devices)) {
+ fprintf(f, "DEVICES=");
+ HASHMAP_FOREACH(sd, s->devices, i)
+ fprintf(f, "%u:%u ", major(sd->dev), minor(sd->dev));
+ fprintf(f, "\n");
+ }
+}
+
+int session_save(Session *s) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r = 0;
+
+ assert(s);
+
+ if (!s->user)
+ return -ESTALE;
+
+ if (!s->started)
+ return 0;
+
+ r = mkdir_safe_label("/run/systemd/sessions", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ goto fail;
+
+ r = fopen_temporary(s->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "UID="UID_FMT"\n"
+ "USER=%s\n"
+ "ACTIVE=%i\n"
+ "IS_DISPLAY=%i\n"
+ "STATE=%s\n"
+ "REMOTE=%i\n",
+ s->user->uid,
+ s->user->name,
+ session_is_active(s),
+ s->user->display == s,
+ session_state_to_string(session_get_state(s)),
+ s->remote);
+
+ if (s->type >= 0)
+ fprintf(f, "TYPE=%s\n", session_type_to_string(s->type));
+
+ if (s->class >= 0)
+ fprintf(f, "CLASS=%s\n", session_class_to_string(s->class));
+
+ if (s->scope)
+ fprintf(f, "SCOPE=%s\n", s->scope);
+ if (s->scope_job)
+ fprintf(f, "SCOPE_JOB=%s\n", s->scope_job);
+
+ if (s->fifo_path)
+ fprintf(f, "FIFO=%s\n", s->fifo_path);
+
+ if (s->seat)
+ fprintf(f, "SEAT=%s\n", s->seat->id);
+
+ if (s->tty)
+ fprintf(f, "TTY=%s\n", s->tty);
+
+ if (s->tty_validity >= 0)
+ fprintf(f, "TTY_VALIDITY=%s\n", tty_validity_to_string(s->tty_validity));
+
+ if (s->display)
+ fprintf(f, "DISPLAY=%s\n", s->display);
+
+ if (s->remote_host) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->remote_host);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "REMOTE_HOST=%s\n", escaped);
+ }
+
+ if (s->remote_user) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->remote_user);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "REMOTE_USER=%s\n", escaped);
+ }
+
+ if (s->service) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->service);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "SERVICE=%s\n", escaped);
+ }
+
+ if (s->desktop) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->desktop);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "DESKTOP=%s\n", escaped);
+ }
+
+ if (s->seat && seat_has_vts(s->seat))
+ fprintf(f, "VTNR=%u\n", s->vtnr);
+
+ if (!s->vtnr)
+ fprintf(f, "POSITION=%u\n", s->position);
+
+ if (pid_is_valid(s->leader))
+ fprintf(f, "LEADER="PID_FMT"\n", s->leader);
+
+ if (audit_session_is_valid(s->audit_id))
+ fprintf(f, "AUDIT=%"PRIu32"\n", s->audit_id);
+
+ if (dual_timestamp_is_set(&s->timestamp))
+ fprintf(f,
+ "REALTIME="USEC_FMT"\n"
+ "MONOTONIC="USEC_FMT"\n",
+ s->timestamp.realtime,
+ s->timestamp.monotonic);
+
+ if (s->controller) {
+ fprintf(f, "CONTROLLER=%s\n", s->controller);
+ session_save_devices(s, f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, s->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(s->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save session data %s: %m", s->state_file);
+}
+
+static int session_load_devices(Session *s, const char *devices) {
+ const char *p;
+ int r = 0;
+
+ assert(s);
+
+ for (p = devices;;) {
+ _cleanup_free_ char *word = NULL;
+ SessionDevice *sd;
+ dev_t dev;
+ int k;
+
+ k = extract_first_word(&p, &word, NULL, 0);
+ if (k == 0)
+ break;
+ if (k < 0) {
+ r = k;
+ break;
+ }
+
+ k = parse_dev(word, &dev);
+ if (k < 0) {
+ r = k;
+ continue;
+ }
+
+ /* The file descriptors for loaded devices will be reattached later. */
+ k = session_device_new(s, dev, false, &sd);
+ if (k < 0)
+ r = k;
+ }
+
+ if (r < 0)
+ log_error_errno(r, "Loading session devices for session %s failed: %m", s->id);
+
+ return r;
+}
+
+int session_load(Session *s) {
+ _cleanup_free_ char *remote = NULL,
+ *seat = NULL,
+ *tty_validity = NULL,
+ *vtnr = NULL,
+ *state = NULL,
+ *position = NULL,
+ *leader = NULL,
+ *type = NULL,
+ *class = NULL,
+ *uid = NULL,
+ *realtime = NULL,
+ *monotonic = NULL,
+ *controller = NULL,
+ *active = NULL,
+ *devices = NULL,
+ *is_display = NULL;
+
+ int k, r;
+
+ assert(s);
+
+ r = parse_env_file(NULL, s->state_file,
+ "REMOTE", &remote,
+ "SCOPE", &s->scope,
+ "SCOPE_JOB", &s->scope_job,
+ "FIFO", &s->fifo_path,
+ "SEAT", &seat,
+ "TTY", &s->tty,
+ "TTY_VALIDITY", &tty_validity,
+ "DISPLAY", &s->display,
+ "REMOTE_HOST", &s->remote_host,
+ "REMOTE_USER", &s->remote_user,
+ "SERVICE", &s->service,
+ "DESKTOP", &s->desktop,
+ "VTNR", &vtnr,
+ "STATE", &state,
+ "POSITION", &position,
+ "LEADER", &leader,
+ "TYPE", &type,
+ "CLASS", &class,
+ "UID", &uid,
+ "REALTIME", &realtime,
+ "MONOTONIC", &monotonic,
+ "CONTROLLER", &controller,
+ "ACTIVE", &active,
+ "DEVICES", &devices,
+ "IS_DISPLAY", &is_display);
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to read %s: %m", s->state_file);
+
+ if (!s->user) {
+ uid_t u;
+ User *user;
+
+ if (!uid)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "UID not specified for session %s",
+ s->id);
+
+ r = parse_uid(uid, &u);
+ if (r < 0) {
+ log_error("Failed to parse UID value %s for session %s.", uid, s->id);
+ return r;
+ }
+
+ user = hashmap_get(s->manager->users, UID_TO_PTR(u));
+ if (!user)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "User of session %s not known.",
+ s->id);
+
+ session_set_user(s, user);
+ }
+
+ if (remote) {
+ k = parse_boolean(remote);
+ if (k >= 0)
+ s->remote = k;
+ }
+
+ if (vtnr)
+ safe_atou(vtnr, &s->vtnr);
+
+ if (seat && !s->seat) {
+ Seat *o;
+
+ o = hashmap_get(s->manager->seats, seat);
+ if (o)
+ r = seat_attach_session(o, s);
+ if (!o || r < 0)
+ log_error("Cannot attach session %s to seat %s", s->id, seat);
+ }
+
+ if (!s->seat || !seat_has_vts(s->seat))
+ s->vtnr = 0;
+
+ if (position && s->seat) {
+ unsigned npos;
+
+ safe_atou(position, &npos);
+ seat_claim_position(s->seat, s, npos);
+ }
+
+ if (tty_validity) {
+ TTYValidity v;
+
+ v = tty_validity_from_string(tty_validity);
+ if (v < 0)
+ log_debug("Failed to parse TTY validity: %s", tty_validity);
+ else
+ s->tty_validity = v;
+ }
+
+ if (leader) {
+ pid_t pid;
+
+ r = parse_pid(leader, &pid);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse leader PID of session: %s", leader);
+ else {
+ r = session_set_leader(s, pid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set session leader PID, ignoring: %m");
+ }
+ }
+
+ if (type) {
+ SessionType t;
+
+ t = session_type_from_string(type);
+ if (t >= 0)
+ s->type = t;
+ }
+
+ if (class) {
+ SessionClass c;
+
+ c = session_class_from_string(class);
+ if (c >= 0)
+ s->class = c;
+ }
+
+ if (state && streq(state, "closing"))
+ s->stopping = true;
+
+ if (s->fifo_path) {
+ int fd;
+
+ /* If we open an unopened pipe for reading we will not
+ get an EOF. to trigger an EOF we hence open it for
+ writing, but close it right away which then will
+ trigger the EOF. This will happen immediately if no
+ other process has the FIFO open for writing, i. e.
+ when the session died before logind (re)started. */
+
+ fd = session_create_fifo(s);
+ safe_close(fd);
+ }
+
+ if (realtime)
+ (void) deserialize_usec(realtime, &s->timestamp.realtime);
+ if (monotonic)
+ (void) deserialize_usec(monotonic, &s->timestamp.monotonic);
+
+ if (active) {
+ k = parse_boolean(active);
+ if (k >= 0)
+ s->was_active = k;
+ }
+
+ if (is_display) {
+ /* Note that when enumerating users are loaded before sessions, hence the display session to use is
+ * something we have to store along with the session and not the user, as in that case we couldn't
+ * apply it at the time we load the user. */
+
+ k = parse_boolean(is_display);
+ if (k < 0)
+ log_warning_errno(k, "Failed to parse IS_DISPLAY session property: %m");
+ else if (k > 0)
+ s->user->display = s;
+ }
+
+ if (controller) {
+ if (bus_name_has_owner(s->manager->bus, controller, NULL) > 0) {
+ session_set_controller(s, controller, false, false);
+ session_load_devices(s, devices);
+ } else
+ session_restore_vt(s);
+ }
+
+ return r;
+}
+
+int session_activate(Session *s) {
+ unsigned num_pending;
+
+ assert(s);
+ assert(s->user);
+
+ if (!s->seat)
+ return -EOPNOTSUPP;
+
+ if (s->seat->active == s)
+ return 0;
+
+ /* on seats with VTs, we let VTs manage session-switching */
+ if (seat_has_vts(s->seat)) {
+ if (s->vtnr == 0)
+ return -EOPNOTSUPP;
+
+ return chvt(s->vtnr);
+ }
+
+ /* On seats without VTs, we implement session-switching in logind. We
+ * try to pause all session-devices and wait until the session
+ * controller acknowledged them. Once all devices are asleep, we simply
+ * switch the active session and be done.
+ * We save the session we want to switch to in seat->pending_switch and
+ * seat_complete_switch() will perform the final switch. */
+
+ s->seat->pending_switch = s;
+
+ /* if no devices are running, immediately perform the session switch */
+ num_pending = session_device_try_pause_all(s);
+ if (!num_pending)
+ seat_complete_switch(s->seat);
+
+ return 0;
+}
+
+static int session_start_scope(Session *s, sd_bus_message *properties, sd_bus_error *error) {
+ int r;
+
+ assert(s);
+ assert(s->user);
+
+ if (!s->scope) {
+ _cleanup_free_ char *scope = NULL;
+ const char *description;
+
+ s->scope_job = mfree(s->scope_job);
+
+ scope = strjoin("session-", s->id, ".scope");
+ if (!scope)
+ return log_oom();
+
+ description = strjoina("Session ", s->id, " of user ", s->user->name);
+
+ r = manager_start_scope(
+ s->manager,
+ scope,
+ s->leader,
+ s->user->slice,
+ description,
+ STRV_MAKE(s->user->runtime_dir_service, s->user->service), /* These two have StopWhenUnneeded= set, hence add a dep towards them */
+ STRV_MAKE("systemd-logind.service", "systemd-user-sessions.service", s->user->runtime_dir_service, s->user->service), /* And order us after some more */
+ s->user->home,
+ properties,
+ error,
+ &s->scope_job);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start session scope %s: %s", scope, bus_error_message(error, r));
+
+ s->scope = TAKE_PTR(scope);
+ }
+
+ if (s->scope)
+ (void) hashmap_put(s->manager->session_units, s->scope, s);
+
+ return 0;
+}
+
+int session_start(Session *s, sd_bus_message *properties, sd_bus_error *error) {
+ int r;
+
+ assert(s);
+
+ if (!s->user)
+ return -ESTALE;
+
+ if (s->stopping)
+ return -EINVAL;
+
+ if (s->started)
+ return 0;
+
+ r = user_start(s->user);
+ if (r < 0)
+ return r;
+
+ r = session_start_scope(s, properties, error);
+ if (r < 0)
+ return r;
+
+ log_struct(s->class == SESSION_BACKGROUND ? LOG_DEBUG : LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_SESSION_START_STR,
+ "SESSION_ID=%s", s->id,
+ "USER_ID=%s", s->user->name,
+ "LEADER="PID_FMT, s->leader,
+ LOG_MESSAGE("New session %s of user %s.", s->id, s->user->name));
+
+ if (!dual_timestamp_is_set(&s->timestamp))
+ dual_timestamp_get(&s->timestamp);
+
+ if (s->seat)
+ seat_read_active_vt(s->seat);
+
+ s->started = true;
+
+ user_elect_display(s->user);
+
+ /* Save data */
+ session_save(s);
+ user_save(s->user);
+ if (s->seat)
+ seat_save(s->seat);
+
+ /* Send signals */
+ session_send_signal(s, true);
+ user_send_changed(s->user, "Display", NULL);
+ if (s->seat) {
+ if (s->seat->active == s)
+ seat_send_changed(s->seat, "ActiveSession", NULL);
+ }
+
+ return 0;
+}
+
+static int session_stop_scope(Session *s, bool force) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(s);
+
+ if (!s->scope)
+ return 0;
+
+ /* Let's always abandon the scope first. This tells systemd that we are not interested anymore, and everything
+ * that is left in the scope is "left-over". Informing systemd about this has the benefit that it will log
+ * when killing any processes left after this point. */
+ r = manager_abandon_scope(s->manager, s->scope, &error);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to abandon session scope, ignoring: %s", bus_error_message(&error, r));
+ sd_bus_error_free(&error);
+ }
+
+ s->scope_job = mfree(s->scope_job);
+
+ /* Optionally, let's kill everything that's left now. */
+ if (force || manager_shall_kill(s->manager, s->user->name)) {
+
+ r = manager_stop_unit(s->manager, s->scope, &error, &s->scope_job);
+ if (r < 0) {
+ if (force)
+ return log_error_errno(r, "Failed to stop session scope: %s", bus_error_message(&error, r));
+
+ log_warning_errno(r, "Failed to stop session scope, ignoring: %s", bus_error_message(&error, r));
+ }
+ } else {
+
+ /* With no killing, this session is allowed to persist in "closing" state indefinitely.
+ * Therefore session stop and session removal may be two distinct events.
+ * Session stop is quite significant on its own, let's log it. */
+ log_struct(s->class == SESSION_BACKGROUND ? LOG_DEBUG : LOG_INFO,
+ "SESSION_ID=%s", s->id,
+ "USER_ID=%s", s->user->name,
+ "LEADER="PID_FMT, s->leader,
+ LOG_MESSAGE("Session %s logged out. Waiting for processes to exit.", s->id));
+ }
+
+ return 0;
+}
+
+int session_stop(Session *s, bool force) {
+ int r;
+
+ assert(s);
+
+ /* This is called whenever we begin with tearing down a session record. It's called in four cases: explicit API
+ * request via the bus (either directly for the session object or for the seat or user object this session
+ * belongs to; 'force' is true), or due to automatic GC (i.e. scope vanished; 'force' is false), or because the
+ * session FIFO saw an EOF ('force' is false), or because the release timer hit ('force' is false). */
+
+ if (!s->user)
+ return -ESTALE;
+ if (!s->started)
+ return 0;
+ if (s->stopping)
+ return 0;
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ if (s->seat)
+ seat_evict_position(s->seat, s);
+
+ /* We are going down, don't care about FIFOs anymore */
+ session_remove_fifo(s);
+
+ /* Kill cgroup */
+ r = session_stop_scope(s, force);
+
+ s->stopping = true;
+
+ user_elect_display(s->user);
+
+ session_save(s);
+ user_save(s->user);
+
+ return r;
+}
+
+int session_finalize(Session *s) {
+ SessionDevice *sd;
+
+ assert(s);
+
+ if (!s->user)
+ return -ESTALE;
+
+ if (s->started)
+ log_struct(s->class == SESSION_BACKGROUND ? LOG_DEBUG : LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_SESSION_STOP_STR,
+ "SESSION_ID=%s", s->id,
+ "USER_ID=%s", s->user->name,
+ "LEADER="PID_FMT, s->leader,
+ LOG_MESSAGE("Removed session %s.", s->id));
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ if (s->seat)
+ seat_evict_position(s->seat, s);
+
+ /* Kill session devices */
+ while ((sd = hashmap_first(s->devices)))
+ session_device_free(sd);
+
+ (void) unlink(s->state_file);
+ session_add_to_gc_queue(s);
+ user_add_to_gc_queue(s->user);
+
+ if (s->started) {
+ session_send_signal(s, false);
+ s->started = false;
+ }
+
+ if (s->seat) {
+ if (s->seat->active == s)
+ seat_set_active(s->seat, NULL);
+
+ seat_save(s->seat);
+ }
+
+ user_save(s->user);
+ user_send_changed(s->user, "Display", NULL);
+
+ return 0;
+}
+
+static int release_timeout_callback(sd_event_source *es, uint64_t usec, void *userdata) {
+ Session *s = userdata;
+
+ assert(es);
+ assert(s);
+
+ session_stop(s, false);
+ return 0;
+}
+
+int session_release(Session *s) {
+ assert(s);
+
+ if (!s->started || s->stopping)
+ return 0;
+
+ if (s->timer_event_source)
+ return 0;
+
+ return sd_event_add_time(s->manager->event,
+ &s->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec_add(now(CLOCK_MONOTONIC), RELEASE_USEC), 0,
+ release_timeout_callback, s);
+}
+
+bool session_is_active(Session *s) {
+ assert(s);
+
+ if (!s->seat)
+ return true;
+
+ return s->seat->active == s;
+}
+
+static int get_tty_atime(const char *tty, usec_t *atime) {
+ _cleanup_free_ char *p = NULL;
+ struct stat st;
+
+ assert(tty);
+ assert(atime);
+
+ if (!path_is_absolute(tty)) {
+ p = strappend("/dev/", tty);
+ if (!p)
+ return -ENOMEM;
+
+ tty = p;
+ } else if (!path_startswith(tty, "/dev/"))
+ return -ENOENT;
+
+ if (lstat(tty, &st) < 0)
+ return -errno;
+
+ *atime = timespec_load(&st.st_atim);
+ return 0;
+}
+
+static int get_process_ctty_atime(pid_t pid, usec_t *atime) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(pid > 0);
+ assert(atime);
+
+ r = get_ctty(pid, NULL, &p);
+ if (r < 0)
+ return r;
+
+ return get_tty_atime(p, atime);
+}
+
+int session_get_idle_hint(Session *s, dual_timestamp *t) {
+ usec_t atime = 0, n;
+ int r;
+
+ assert(s);
+
+ /* Explicit idle hint is set */
+ if (s->idle_hint) {
+ if (t)
+ *t = s->idle_hint_timestamp;
+
+ return s->idle_hint;
+ }
+
+ /* Graphical sessions should really implement a real
+ * idle hint logic */
+ if (SESSION_TYPE_IS_GRAPHICAL(s->type))
+ goto dont_know;
+
+ /* For sessions with an explicitly configured tty, let's check
+ * its atime */
+ if (s->tty) {
+ r = get_tty_atime(s->tty, &atime);
+ if (r >= 0)
+ goto found_atime;
+ }
+
+ /* For sessions with a leader but no explicitly configured
+ * tty, let's check the controlling tty of the leader */
+ if (pid_is_valid(s->leader)) {
+ r = get_process_ctty_atime(s->leader, &atime);
+ if (r >= 0)
+ goto found_atime;
+ }
+
+dont_know:
+ if (t)
+ *t = s->idle_hint_timestamp;
+
+ return 0;
+
+found_atime:
+ if (t)
+ dual_timestamp_from_realtime(t, atime);
+
+ n = now(CLOCK_REALTIME);
+
+ if (s->manager->idle_action_usec <= 0)
+ return 0;
+
+ return atime + s->manager->idle_action_usec <= n;
+}
+
+void session_set_idle_hint(Session *s, bool b) {
+ assert(s);
+
+ if (s->idle_hint == b)
+ return;
+
+ s->idle_hint = b;
+ dual_timestamp_get(&s->idle_hint_timestamp);
+
+ session_send_changed(s, "IdleHint", "IdleSinceHint", "IdleSinceHintMonotonic", NULL);
+
+ if (s->seat)
+ seat_send_changed(s->seat, "IdleHint", "IdleSinceHint", "IdleSinceHintMonotonic", NULL);
+
+ user_send_changed(s->user, "IdleHint", "IdleSinceHint", "IdleSinceHintMonotonic", NULL);
+ manager_send_changed(s->manager, "IdleHint", "IdleSinceHint", "IdleSinceHintMonotonic", NULL);
+}
+
+int session_get_locked_hint(Session *s) {
+ assert(s);
+
+ return s->locked_hint;
+}
+
+void session_set_locked_hint(Session *s, bool b) {
+ assert(s);
+
+ if (s->locked_hint == b)
+ return;
+
+ s->locked_hint = b;
+
+ session_send_changed(s, "LockedHint", NULL);
+}
+
+static int session_dispatch_fifo(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Session *s = userdata;
+
+ assert(s);
+ assert(s->fifo_fd == fd);
+
+ /* EOF on the FIFO means the session died abnormally. */
+
+ session_remove_fifo(s);
+ session_stop(s, false);
+
+ return 1;
+}
+
+int session_create_fifo(Session *s) {
+ int r;
+
+ assert(s);
+
+ /* Create FIFO */
+ if (!s->fifo_path) {
+ r = mkdir_safe_label("/run/systemd/sessions", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ return r;
+
+ s->fifo_path = strjoin("/run/systemd/sessions/", s->id, ".ref");
+ if (!s->fifo_path)
+ return -ENOMEM;
+
+ if (mkfifo(s->fifo_path, 0600) < 0 && errno != EEXIST)
+ return -errno;
+ }
+
+ /* Open reading side */
+ if (s->fifo_fd < 0) {
+ s->fifo_fd = open(s->fifo_path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (s->fifo_fd < 0)
+ return -errno;
+ }
+
+ if (!s->fifo_event_source) {
+ r = sd_event_add_io(s->manager->event, &s->fifo_event_source, s->fifo_fd, 0, session_dispatch_fifo, s);
+ if (r < 0)
+ return r;
+
+ /* Let's make sure we noticed dead sessions before we process new bus requests (which might create new
+ * sessions). */
+ r = sd_event_source_set_priority(s->fifo_event_source, SD_EVENT_PRIORITY_NORMAL-10);
+ if (r < 0)
+ return r;
+ }
+
+ /* Open writing side */
+ r = open(s->fifo_path, O_WRONLY|O_CLOEXEC|O_NONBLOCK);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+static void session_remove_fifo(Session *s) {
+ assert(s);
+
+ s->fifo_event_source = sd_event_source_unref(s->fifo_event_source);
+ s->fifo_fd = safe_close(s->fifo_fd);
+
+ if (s->fifo_path) {
+ (void) unlink(s->fifo_path);
+ s->fifo_path = mfree(s->fifo_path);
+ }
+}
+
+bool session_may_gc(Session *s, bool drop_not_started) {
+ int r;
+
+ assert(s);
+
+ if (drop_not_started && !s->started)
+ return true;
+
+ if (!s->user)
+ return true;
+
+ if (s->fifo_fd >= 0) {
+ if (pipe_eof(s->fifo_fd) <= 0)
+ return false;
+ }
+
+ if (s->scope_job) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = manager_job_is_active(s->manager, s->scope_job, &error);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine whether job '%s' is pending, ignoring: %s", s->scope_job, bus_error_message(&error, r));
+ if (r != 0)
+ return false;
+ }
+
+ if (s->scope) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = manager_unit_is_active(s->manager, s->scope, &error);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine whether unit '%s' is active, ignoring: %s", s->scope, bus_error_message(&error, r));
+ if (r != 0)
+ return false;
+ }
+
+ return true;
+}
+
+void session_add_to_gc_queue(Session *s) {
+ assert(s);
+
+ if (s->in_gc_queue)
+ return;
+
+ LIST_PREPEND(gc_queue, s->manager->session_gc_queue, s);
+ s->in_gc_queue = true;
+}
+
+SessionState session_get_state(Session *s) {
+ assert(s);
+
+ /* always check closing first */
+ if (s->stopping || s->timer_event_source)
+ return SESSION_CLOSING;
+
+ if (s->scope_job || s->fifo_fd < 0)
+ return SESSION_OPENING;
+
+ if (session_is_active(s))
+ return SESSION_ACTIVE;
+
+ return SESSION_ONLINE;
+}
+
+int session_kill(Session *s, KillWho who, int signo) {
+ assert(s);
+
+ if (!s->scope)
+ return -ESRCH;
+
+ return manager_kill_unit(s->manager, s->scope, who, signo, NULL);
+}
+
+static int session_open_vt(Session *s) {
+ char path[sizeof("/dev/tty") + DECIMAL_STR_MAX(s->vtnr)];
+
+ if (s->vtnr < 1)
+ return -ENODEV;
+
+ if (s->vtfd >= 0)
+ return s->vtfd;
+
+ sprintf(path, "/dev/tty%u", s->vtnr);
+ s->vtfd = open_terminal(path, O_RDWR | O_CLOEXEC | O_NONBLOCK | O_NOCTTY);
+ if (s->vtfd < 0)
+ return log_error_errno(s->vtfd, "cannot open VT %s of session %s: %m", path, s->id);
+
+ return s->vtfd;
+}
+
+int session_prepare_vt(Session *s) {
+ int vt, r;
+ struct vt_mode mode = { 0 };
+
+ if (s->vtnr < 1)
+ return 0;
+
+ vt = session_open_vt(s);
+ if (vt < 0)
+ return vt;
+
+ r = fchown(vt, s->user->uid, -1);
+ if (r < 0) {
+ r = log_error_errno(errno,
+ "Cannot change owner of /dev/tty%u: %m",
+ s->vtnr);
+ goto error;
+ }
+
+ r = ioctl(vt, KDSKBMODE, K_OFF);
+ if (r < 0) {
+ r = log_error_errno(errno,
+ "Cannot set K_OFF on /dev/tty%u: %m",
+ s->vtnr);
+ goto error;
+ }
+
+ r = ioctl(vt, KDSETMODE, KD_GRAPHICS);
+ if (r < 0) {
+ r = log_error_errno(errno,
+ "Cannot set KD_GRAPHICS on /dev/tty%u: %m",
+ s->vtnr);
+ goto error;
+ }
+
+ /* Oh, thanks to the VT layer, VT_AUTO does not work with KD_GRAPHICS.
+ * So we need a dummy handler here which just acknowledges *all* VT
+ * switch requests. */
+ mode.mode = VT_PROCESS;
+ mode.relsig = SIGRTMIN;
+ mode.acqsig = SIGRTMIN + 1;
+ r = ioctl(vt, VT_SETMODE, &mode);
+ if (r < 0) {
+ r = log_error_errno(errno,
+ "Cannot set VT_PROCESS on /dev/tty%u: %m",
+ s->vtnr);
+ goto error;
+ }
+
+ return 0;
+
+error:
+ session_restore_vt(s);
+ return r;
+}
+
+static void session_restore_vt(Session *s) {
+ int r, vt, old_fd;
+
+ /* We need to get a fresh handle to the virtual terminal,
+ * since the old file-descriptor is potentially in a hung-up
+ * state after the controlling process exited; we do a
+ * little dance to avoid having the terminal be available
+ * for reuse before we've cleaned it up.
+ */
+ old_fd = TAKE_FD(s->vtfd);
+
+ vt = session_open_vt(s);
+ safe_close(old_fd);
+
+ if (vt < 0)
+ return;
+
+ r = vt_restore(vt);
+ if (r < 0)
+ log_warning_errno(r, "Failed to restore VT, ignoring: %m");
+
+ s->vtfd = safe_close(s->vtfd);
+}
+
+void session_leave_vt(Session *s) {
+ int r;
+
+ assert(s);
+
+ /* This is called whenever we get a VT-switch signal from the kernel.
+ * We acknowledge all of them unconditionally. Note that session are
+ * free to overwrite those handlers and we only register them for
+ * sessions with controllers. Legacy sessions are not affected.
+ * However, if we switch from a non-legacy to a legacy session, we must
+ * make sure to pause all device before acknowledging the switch. We
+ * process the real switch only after we are notified via sysfs, so the
+ * legacy session might have already started using the devices. If we
+ * don't pause the devices before the switch, we might confuse the
+ * session we switch to. */
+
+ if (s->vtfd < 0)
+ return;
+
+ session_device_pause_all(s);
+ r = vt_release(s->vtfd, false);
+ if (r < 0)
+ log_debug_errno(r, "Cannot release VT of session %s: %m", s->id);
+}
+
+bool session_is_controller(Session *s, const char *sender) {
+ assert(s);
+
+ return streq_ptr(s->controller, sender);
+}
+
+static void session_release_controller(Session *s, bool notify) {
+ _cleanup_free_ char *name = NULL;
+ SessionDevice *sd;
+
+ if (!s->controller)
+ return;
+
+ name = s->controller;
+
+ /* By resetting the controller before releasing the devices, we won't
+ * send notification signals. This avoids sending useless notifications
+ * if the controller is released on disconnects. */
+ if (!notify)
+ s->controller = NULL;
+
+ while ((sd = hashmap_first(s->devices)))
+ session_device_free(sd);
+
+ s->controller = NULL;
+ s->track = sd_bus_track_unref(s->track);
+}
+
+static int on_bus_track(sd_bus_track *track, void *userdata) {
+ Session *s = userdata;
+
+ assert(track);
+ assert(s);
+
+ session_drop_controller(s);
+
+ return 0;
+}
+
+int session_set_controller(Session *s, const char *sender, bool force, bool prepare) {
+ _cleanup_free_ char *name = NULL;
+ int r;
+
+ assert(s);
+ assert(sender);
+
+ if (session_is_controller(s, sender))
+ return 0;
+ if (s->controller && !force)
+ return -EBUSY;
+
+ name = strdup(sender);
+ if (!name)
+ return -ENOMEM;
+
+ s->track = sd_bus_track_unref(s->track);
+ r = sd_bus_track_new(s->manager->bus, &s->track, on_bus_track, s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_track_add_name(s->track, name);
+ if (r < 0)
+ return r;
+
+ /* When setting a session controller, we forcibly mute the VT and set
+ * it into graphics-mode. Applications can override that by changing
+ * VT state after calling TakeControl(). However, this serves as a good
+ * default and well-behaving controllers can now ignore VTs entirely.
+ * Note that we reset the VT on ReleaseControl() and if the controller
+ * exits.
+ * If logind crashes/restarts, we restore the controller during restart
+ * (without preparing the VT since the controller has probably overridden
+ * VT state by now) or reset the VT in case it crashed/exited, too. */
+ if (prepare) {
+ r = session_prepare_vt(s);
+ if (r < 0) {
+ s->track = sd_bus_track_unref(s->track);
+ return r;
+ }
+ }
+
+ session_release_controller(s, true);
+ s->controller = TAKE_PTR(name);
+ session_save(s);
+
+ return 0;
+}
+
+void session_drop_controller(Session *s) {
+ assert(s);
+
+ if (!s->controller)
+ return;
+
+ s->track = sd_bus_track_unref(s->track);
+ session_release_controller(s, false);
+ session_save(s);
+ session_restore_vt(s);
+}
+
+static const char* const session_state_table[_SESSION_STATE_MAX] = {
+ [SESSION_OPENING] = "opening",
+ [SESSION_ONLINE] = "online",
+ [SESSION_ACTIVE] = "active",
+ [SESSION_CLOSING] = "closing"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(session_state, SessionState);
+
+static const char* const session_type_table[_SESSION_TYPE_MAX] = {
+ [SESSION_UNSPECIFIED] = "unspecified",
+ [SESSION_TTY] = "tty",
+ [SESSION_X11] = "x11",
+ [SESSION_WAYLAND] = "wayland",
+ [SESSION_MIR] = "mir",
+ [SESSION_WEB] = "web",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(session_type, SessionType);
+
+static const char* const session_class_table[_SESSION_CLASS_MAX] = {
+ [SESSION_USER] = "user",
+ [SESSION_GREETER] = "greeter",
+ [SESSION_LOCK_SCREEN] = "lock-screen",
+ [SESSION_BACKGROUND] = "background"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(session_class, SessionClass);
+
+static const char* const kill_who_table[_KILL_WHO_MAX] = {
+ [KILL_LEADER] = "leader",
+ [KILL_ALL] = "all"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);
+
+static const char* const tty_validity_table[_TTY_VALIDITY_MAX] = {
+ [TTY_FROM_PAM] = "from-pam",
+ [TTY_FROM_UTMP] = "from-utmp",
+ [TTY_UTMP_INCONSISTENT] = "utmp-inconsistent",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(tty_validity, TTYValidity);
diff --git a/src/login/logind-session.h b/src/login/logind-session.h
new file mode 100644
index 0000000..f3c17a8
--- /dev/null
+++ b/src/login/logind-session.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Session Session;
+typedef enum KillWho KillWho;
+
+#include "list.h"
+#include "login-util.h"
+#include "logind-user.h"
+
+typedef enum SessionState {
+ SESSION_OPENING, /* Session scope is being created */
+ SESSION_ONLINE, /* Logged in */
+ SESSION_ACTIVE, /* Logged in and in the fg */
+ SESSION_CLOSING, /* Logged out, but scope is still there */
+ _SESSION_STATE_MAX,
+ _SESSION_STATE_INVALID = -1
+} SessionState;
+
+typedef enum SessionClass {
+ SESSION_USER,
+ SESSION_GREETER,
+ SESSION_LOCK_SCREEN,
+ SESSION_BACKGROUND,
+ _SESSION_CLASS_MAX,
+ _SESSION_CLASS_INVALID = -1
+} SessionClass;
+
+typedef enum SessionType {
+ SESSION_UNSPECIFIED,
+ SESSION_TTY,
+ SESSION_X11,
+ SESSION_WAYLAND,
+ SESSION_MIR,
+ SESSION_WEB,
+ _SESSION_TYPE_MAX,
+ _SESSION_TYPE_INVALID = -1
+} SessionType;
+
+#define SESSION_TYPE_IS_GRAPHICAL(type) IN_SET(type, SESSION_X11, SESSION_WAYLAND, SESSION_MIR)
+
+enum KillWho {
+ KILL_LEADER,
+ KILL_ALL,
+ _KILL_WHO_MAX,
+ _KILL_WHO_INVALID = -1
+};
+
+typedef enum TTYValidity {
+ TTY_FROM_PAM,
+ TTY_FROM_UTMP,
+ TTY_UTMP_INCONSISTENT, /* may happen on ssh sessions with multiplexed TTYs */
+ _TTY_VALIDITY_MAX,
+ _TTY_VALIDITY_INVALID = -1,
+} TTYValidity;
+
+struct Session {
+ Manager *manager;
+
+ const char *id;
+ unsigned position;
+ SessionType type;
+ SessionClass class;
+
+ char *state_file;
+
+ User *user;
+
+ dual_timestamp timestamp;
+
+ char *display;
+ char *tty;
+ TTYValidity tty_validity;
+
+ bool remote;
+ char *remote_user;
+ char *remote_host;
+ char *service;
+ char *desktop;
+
+ char *scope;
+ char *scope_job;
+
+ Seat *seat;
+ unsigned vtnr;
+ int vtfd;
+
+ pid_t leader;
+ uint32_t audit_id;
+
+ int fifo_fd;
+ char *fifo_path;
+
+ sd_event_source *fifo_event_source;
+
+ bool idle_hint;
+ dual_timestamp idle_hint_timestamp;
+
+ bool locked_hint;
+
+ bool in_gc_queue:1;
+ bool started:1;
+ bool stopping:1;
+
+ bool was_active:1;
+
+ sd_bus_message *create_message;
+
+ /* Set up when a client requested to release the session via the bus */
+ sd_event_source *timer_event_source;
+
+ char *controller;
+ Hashmap *devices;
+ sd_bus_track *track;
+
+ LIST_FIELDS(Session, sessions_by_user);
+ LIST_FIELDS(Session, sessions_by_seat);
+
+ LIST_FIELDS(Session, gc_queue);
+};
+
+int session_new(Session **ret, Manager *m, const char *id);
+Session* session_free(Session *s);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Session *, session_free);
+
+void session_set_user(Session *s, User *u);
+int session_set_leader(Session *s, pid_t pid);
+bool session_may_gc(Session *s, bool drop_not_started);
+void session_add_to_gc_queue(Session *s);
+int session_activate(Session *s);
+bool session_is_active(Session *s);
+int session_get_idle_hint(Session *s, dual_timestamp *t);
+void session_set_idle_hint(Session *s, bool b);
+int session_get_locked_hint(Session *s);
+void session_set_locked_hint(Session *s, bool b);
+int session_create_fifo(Session *s);
+int session_start(Session *s, sd_bus_message *properties, sd_bus_error *error);
+int session_stop(Session *s, bool force);
+int session_finalize(Session *s);
+int session_release(Session *s);
+int session_save(Session *s);
+int session_load(Session *s);
+int session_kill(Session *s, KillWho who, int signo);
+
+SessionState session_get_state(Session *u);
+
+extern const sd_bus_vtable session_vtable[];
+int session_node_enumerator(sd_bus *bus, const char *path,void *userdata, char ***nodes, sd_bus_error *error);
+int session_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
+char *session_bus_path(Session *s);
+
+int session_send_signal(Session *s, bool new_session);
+int session_send_changed(Session *s, const char *properties, ...) _sentinel_;
+int session_send_lock(Session *s, bool lock);
+int session_send_lock_all(Manager *m, bool lock);
+
+int session_send_create_reply(Session *s, sd_bus_error *error);
+
+const char* session_state_to_string(SessionState t) _const_;
+SessionState session_state_from_string(const char *s) _pure_;
+
+const char* session_type_to_string(SessionType t) _const_;
+SessionType session_type_from_string(const char *s) _pure_;
+
+const char* session_class_to_string(SessionClass t) _const_;
+SessionClass session_class_from_string(const char *s) _pure_;
+
+const char *kill_who_to_string(KillWho k) _const_;
+KillWho kill_who_from_string(const char *s) _pure_;
+
+const char* tty_validity_to_string(TTYValidity t) _const_;
+TTYValidity tty_validity_from_string(const char *s) _pure_;
+
+int session_prepare_vt(Session *s);
+void session_leave_vt(Session *s);
+
+bool session_is_controller(Session *s, const char *sender);
+int session_set_controller(Session *s, const char *sender, bool force, bool prepare);
+void session_drop_controller(Session *s);
+
+int bus_session_method_activate(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_session_method_lock(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_session_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_session_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error);
diff --git a/src/login/logind-user-dbus.c b/src/login/logind-user-dbus.c
new file mode 100644
index 0000000..fcaeba1
--- /dev/null
+++ b/src/login/logind-user-dbus.c
@@ -0,0 +1,362 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "format-util.h"
+#include "logind-user.h"
+#include "logind.h"
+#include "missing_capability.h"
+#include "signal-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static BUS_DEFINE_PROPERTY_GET2(property_get_state, "s", User, user_get_state, user_state_to_string);
+
+static int property_get_display(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *p = NULL;
+ User *u = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ p = u->display ? session_bus_path(u->display) : strdup("/");
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "(so)", u->display ? u->display->id : "", p);
+}
+
+static int property_get_sessions(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ User *u = userdata;
+ Session *session;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = sd_bus_message_open_container(reply, 'a', "(so)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(sessions_by_user, session, u->sessions) {
+ _cleanup_free_ char *p = NULL;
+
+ p = session_bus_path(session);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(so)", session->id, p);
+ if (r < 0)
+ return r;
+
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_idle_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ User *u = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ return sd_bus_message_append(reply, "b", user_get_idle_hint(u, NULL) > 0);
+}
+
+static int property_get_idle_since_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ User *u = userdata;
+ dual_timestamp t = DUAL_TIMESTAMP_NULL;
+ uint64_t k;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ (void) user_get_idle_hint(u, &t);
+ k = streq(property, "IdleSinceHint") ? t.realtime : t.monotonic;
+
+ return sd_bus_message_append(reply, "t", k);
+}
+
+static int property_get_linger(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ User *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = user_check_linger_file(u);
+
+ return sd_bus_message_append(reply, "b", r > 0);
+}
+
+int bus_user_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ User *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.login1.manage",
+ NULL,
+ false,
+ u->uid,
+ &u->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = user_stop(u, true);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_user_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ User *u = userdata;
+ int32_t signo;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.login1.manage",
+ NULL,
+ false,
+ u->uid,
+ &u->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = sd_bus_message_read(message, "i", &signo);
+ if (r < 0)
+ return r;
+
+ if (!SIGNAL_VALID(signo))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid signal %i", signo);
+
+ r = user_kill(u, signo);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+const sd_bus_vtable user_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(User, uid), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(User, gid), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Name", "s", NULL, offsetof(User, name), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("Timestamp", offsetof(User, timestamp), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimePath", "s", NULL, offsetof(User, runtime_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Service", "s", NULL, offsetof(User, service), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Slice", "s", NULL, offsetof(User, slice), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Display", "(so)", property_get_display, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("State", "s", property_get_state, 0, 0),
+ SD_BUS_PROPERTY("Sessions", "a(so)", property_get_sessions, 0, 0),
+ SD_BUS_PROPERTY("IdleHint", "b", property_get_idle_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHint", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHintMonotonic", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Linger", "b", property_get_linger, 0, 0),
+
+ SD_BUS_METHOD("Terminate", NULL, NULL, bus_user_method_terminate, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Kill", "i", NULL, bus_user_method_kill, SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END
+};
+
+int user_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ uid_t uid;
+ User *user;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ if (streq(path, "/org/freedesktop/login1/user/self")) {
+ sd_bus_message *message;
+
+ message = sd_bus_get_current_message(bus);
+ if (!message)
+ return 0;
+
+ r = manager_get_user_from_creds(m, message, UID_INVALID, error, &user);
+ if (r < 0)
+ return r;
+ } else {
+ const char *p;
+
+ p = startswith(path, "/org/freedesktop/login1/user/_");
+ if (!p)
+ return 0;
+
+ r = parse_uid(p, &uid);
+ if (r < 0)
+ return 0;
+
+ user = hashmap_get(m->users, UID_TO_PTR(uid));
+ if (!user)
+ return 0;
+ }
+
+ *found = user;
+ return 1;
+}
+
+char *user_bus_path(User *u) {
+ char *s;
+
+ assert(u);
+
+ if (asprintf(&s, "/org/freedesktop/login1/user/_"UID_FMT, u->uid) < 0)
+ return NULL;
+
+ return s;
+}
+
+int user_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ sd_bus_message *message;
+ Manager *m = userdata;
+ User *user;
+ Iterator i;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(nodes);
+
+ HASHMAP_FOREACH(user, m->users, i) {
+ char *p;
+
+ p = user_bus_path(user);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_consume(&l, p);
+ if (r < 0)
+ return r;
+ }
+
+ message = sd_bus_get_current_message(bus);
+ if (message) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ uid_t uid;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_OWNER_UID|SD_BUS_CREDS_AUGMENT, &creds);
+ if (r >= 0) {
+ r = sd_bus_creds_get_owner_uid(creds, &uid);
+ if (r >= 0) {
+ user = hashmap_get(m->users, UID_TO_PTR(uid));
+ if (user) {
+ r = strv_extend(&l, "/org/freedesktop/login1/user/self");
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+int user_send_signal(User *u, bool new_user) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(u);
+
+ p = user_bus_path(u);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_signal(
+ u->manager->bus,
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ new_user ? "UserNew" : "UserRemoved",
+ "uo", (uint32_t) u->uid, p);
+}
+
+int user_send_changed(User *u, const char *properties, ...) {
+ _cleanup_free_ char *p = NULL;
+ char **l;
+
+ assert(u);
+
+ if (!u->started)
+ return 0;
+
+ p = user_bus_path(u);
+ if (!p)
+ return -ENOMEM;
+
+ l = strv_from_stdarg_alloca(properties);
+
+ return sd_bus_emit_properties_changed_strv(u->manager->bus, p, "org.freedesktop.login1.User", l);
+}
diff --git a/src/login/logind-user.c b/src/login/logind-user.c
new file mode 100644
index 0000000..ae27bfb
--- /dev/null
+++ b/src/login/logind-user.c
@@ -0,0 +1,859 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio_ext.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "clean-ipc.h"
+#include "env-file.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "label.h"
+#include "logind-user.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "serialize.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "util.h"
+
+int user_new(User **ret,
+ Manager *m,
+ uid_t uid,
+ gid_t gid,
+ const char *name,
+ const char *home) {
+
+ _cleanup_(user_freep) User *u = NULL;
+ char lu[DECIMAL_STR_MAX(uid_t) + 1];
+ int r;
+
+ assert(ret);
+ assert(m);
+ assert(name);
+
+ u = new(User, 1);
+ if (!u)
+ return -ENOMEM;
+
+ *u = (User) {
+ .manager = m,
+ .uid = uid,
+ .gid = gid,
+ .last_session_timestamp = USEC_INFINITY,
+ };
+
+ u->name = strdup(name);
+ if (!u->name)
+ return -ENOMEM;
+
+ u->home = strdup(home);
+ if (!u->home)
+ return -ENOMEM;
+
+ if (asprintf(&u->state_file, "/run/systemd/users/"UID_FMT, uid) < 0)
+ return -ENOMEM;
+
+ if (asprintf(&u->runtime_path, "/run/user/"UID_FMT, uid) < 0)
+ return -ENOMEM;
+
+ xsprintf(lu, UID_FMT, uid);
+ r = slice_build_subslice(SPECIAL_USER_SLICE, lu, &u->slice);
+ if (r < 0)
+ return r;
+
+ r = unit_name_build("user", lu, ".service", &u->service);
+ if (r < 0)
+ return r;
+
+ r = unit_name_build("user-runtime-dir", lu, ".service", &u->runtime_dir_service);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->users, UID_TO_PTR(uid), u);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->user_units, u->slice, u);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->user_units, u->service, u);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->user_units, u->runtime_dir_service, u);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(u);
+ return 0;
+}
+
+User *user_free(User *u) {
+ if (!u)
+ return NULL;
+
+ if (u->in_gc_queue)
+ LIST_REMOVE(gc_queue, u->manager->user_gc_queue, u);
+
+ while (u->sessions)
+ session_free(u->sessions);
+
+ if (u->service)
+ hashmap_remove_value(u->manager->user_units, u->service, u);
+
+ if (u->runtime_dir_service)
+ hashmap_remove_value(u->manager->user_units, u->runtime_dir_service, u);
+
+ if (u->slice)
+ hashmap_remove_value(u->manager->user_units, u->slice, u);
+
+ hashmap_remove_value(u->manager->users, UID_TO_PTR(u->uid), u);
+
+ (void) sd_event_source_unref(u->timer_event_source);
+
+ u->service_job = mfree(u->service_job);
+
+ u->service = mfree(u->service);
+ u->runtime_dir_service = mfree(u->runtime_dir_service);
+ u->slice = mfree(u->slice);
+ u->runtime_path = mfree(u->runtime_path);
+ u->state_file = mfree(u->state_file);
+ u->name = mfree(u->name);
+ u->home = mfree(u->home);
+
+ return mfree(u);
+}
+
+static int user_save_internal(User *u) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(u);
+ assert(u->state_file);
+
+ r = mkdir_safe_label("/run/systemd/users", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ goto fail;
+
+ r = fopen_temporary(u->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "NAME=%s\n"
+ "STATE=%s\n" /* friendly user-facing state */
+ "STOPPING=%s\n", /* low-level state */
+ u->name,
+ user_state_to_string(user_get_state(u)),
+ yes_no(u->stopping));
+
+ /* LEGACY: no-one reads RUNTIME= anymore, drop it at some point */
+ if (u->runtime_path)
+ fprintf(f, "RUNTIME=%s\n", u->runtime_path);
+
+ if (u->service_job)
+ fprintf(f, "SERVICE_JOB=%s\n", u->service_job);
+
+ if (u->display)
+ fprintf(f, "DISPLAY=%s\n", u->display->id);
+
+ if (dual_timestamp_is_set(&u->timestamp))
+ fprintf(f,
+ "REALTIME="USEC_FMT"\n"
+ "MONOTONIC="USEC_FMT"\n",
+ u->timestamp.realtime,
+ u->timestamp.monotonic);
+
+ if (u->last_session_timestamp != USEC_INFINITY)
+ fprintf(f, "LAST_SESSION_TIMESTAMP=" USEC_FMT "\n",
+ u->last_session_timestamp);
+
+ if (u->sessions) {
+ Session *i;
+ bool first;
+
+ fputs("SESSIONS=", f);
+ first = true;
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ fputs(i->id, f);
+ }
+
+ fputs("\nSEATS=", f);
+ first = true;
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ if (!i->seat)
+ continue;
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ fputs(i->seat->id, f);
+ }
+
+ fputs("\nACTIVE_SESSIONS=", f);
+ first = true;
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ if (!session_is_active(i))
+ continue;
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ fputs(i->id, f);
+ }
+
+ fputs("\nONLINE_SESSIONS=", f);
+ first = true;
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ if (session_get_state(i) == SESSION_CLOSING)
+ continue;
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ fputs(i->id, f);
+ }
+
+ fputs("\nACTIVE_SEATS=", f);
+ first = true;
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ if (!session_is_active(i) || !i->seat)
+ continue;
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ fputs(i->seat->id, f);
+ }
+
+ fputs("\nONLINE_SEATS=", f);
+ first = true;
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ if (session_get_state(i) == SESSION_CLOSING || !i->seat)
+ continue;
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ fputs(i->seat->id, f);
+ }
+ fputc('\n', f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, u->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(u->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save user data %s: %m", u->state_file);
+}
+
+int user_save(User *u) {
+ assert(u);
+
+ if (!u->started)
+ return 0;
+
+ return user_save_internal(u);
+}
+
+int user_load(User *u) {
+ _cleanup_free_ char *realtime = NULL, *monotonic = NULL, *stopping = NULL, *last_session_timestamp = NULL;
+ int r;
+
+ assert(u);
+
+ r = parse_env_file(NULL, u->state_file,
+ "SERVICE_JOB", &u->service_job,
+ "STOPPING", &stopping,
+ "REALTIME", &realtime,
+ "MONOTONIC", &monotonic,
+ "LAST_SESSION_TIMESTAMP", &last_session_timestamp);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to read %s: %m", u->state_file);
+
+ if (stopping) {
+ r = parse_boolean(stopping);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse 'STOPPING' boolean: %s", stopping);
+ else
+ u->stopping = r;
+ }
+
+ if (realtime)
+ (void) deserialize_usec(realtime, &u->timestamp.realtime);
+ if (monotonic)
+ (void) deserialize_usec(monotonic, &u->timestamp.monotonic);
+ if (last_session_timestamp)
+ (void) deserialize_usec(last_session_timestamp, &u->last_session_timestamp);
+
+ return 0;
+}
+
+static void user_start_service(User *u) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(u);
+
+ /* Start the service containing the "systemd --user" instance (user@.service). Note that we don't explicitly
+ * start the per-user slice or the systemd-runtime-dir@.service instance, as those are pulled in both by
+ * user@.service and the session scopes as dependencies. */
+
+ u->service_job = mfree(u->service_job);
+
+ r = manager_start_unit(u->manager, u->service, &error, &u->service_job);
+ if (r < 0)
+ log_warning_errno(r, "Failed to start user service '%s', ignoring: %s", u->service, bus_error_message(&error, r));
+}
+
+int user_start(User *u) {
+ assert(u);
+
+ if (u->started && !u->stopping)
+ return 0;
+
+ /* If u->stopping is set, the user is marked for removal and service stop-jobs are queued. We have to clear
+ * that flag before queing the start-jobs again. If they succeed, the user object can be re-used just fine
+ * (pid1 takes care of job-ordering and proper restart), but if they fail, we want to force another user_stop()
+ * so possibly pending units are stopped. */
+ u->stopping = false;
+
+ if (!u->started)
+ log_debug("Starting services for new user %s.", u->name);
+
+ /* Save the user data so far, because pam_systemd will read the XDG_RUNTIME_DIR out of it while starting up
+ * systemd --user. We need to do user_save_internal() because we have not "officially" started yet. */
+ user_save_internal(u);
+
+ /* Start user@UID.service */
+ user_start_service(u);
+
+ if (!u->started) {
+ if (!dual_timestamp_is_set(&u->timestamp))
+ dual_timestamp_get(&u->timestamp);
+ user_send_signal(u, true);
+ u->started = true;
+ }
+
+ /* Save new user data */
+ user_save(u);
+
+ return 0;
+}
+
+static void user_stop_service(User *u) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(u);
+ assert(u->service);
+
+ /* The reverse of user_start_service(). Note that we only stop user@UID.service here, and let StopWhenUnneeded=
+ * deal with the slice and the user-runtime-dir@.service instance. */
+
+ u->service_job = mfree(u->service_job);
+
+ r = manager_stop_unit(u->manager, u->service, &error, &u->service_job);
+ if (r < 0)
+ log_warning_errno(r, "Failed to stop user service '%s', ignoring: %s", u->service, bus_error_message(&error, r));
+}
+
+int user_stop(User *u, bool force) {
+ Session *s;
+ int r = 0;
+ assert(u);
+
+ /* This is called whenever we begin with tearing down a user record. It's called in two cases: explicit API
+ * request to do so via the bus (in which case 'force' is true) and automatically due to GC, if there's no
+ * session left pinning it (in which case 'force' is false). Note that this just initiates tearing down of the
+ * user, the User object will remain in memory until user_finalize() is called, see below. */
+
+ if (!u->started)
+ return 0;
+
+ if (u->stopping) { /* Stop jobs have already been queued */
+ user_save(u);
+ return 0;
+ }
+
+ LIST_FOREACH(sessions_by_user, s, u->sessions) {
+ int k;
+
+ k = session_stop(s, force);
+ if (k < 0)
+ r = k;
+ }
+
+ user_stop_service(u);
+
+ u->stopping = true;
+
+ user_save(u);
+
+ return r;
+}
+
+int user_finalize(User *u) {
+ Session *s;
+ int r = 0, k;
+
+ assert(u);
+
+ /* Called when the user is really ready to be freed, i.e. when all unit stop jobs and suchlike for it are
+ * done. This is called as a result of an earlier user_done() when all jobs are completed. */
+
+ if (u->started)
+ log_debug("User %s logged out.", u->name);
+
+ LIST_FOREACH(sessions_by_user, s, u->sessions) {
+ k = session_finalize(s);
+ if (k < 0)
+ r = k;
+ }
+
+ /* Clean SysV + POSIX IPC objects, but only if this is not a system user. Background: in many setups cronjobs
+ * are run in full PAM and thus logind sessions, even if the code run doesn't belong to actual users but to
+ * system components. Since enable RemoveIPC= globally for all users, we need to be a bit careful with such
+ * cases, as we shouldn't accidentally remove a system service's IPC objects while it is running, just because
+ * a cronjob running as the same user just finished. Hence: exclude system users generally from IPC clean-up,
+ * and do it only for normal users. */
+ if (u->manager->remove_ipc && !uid_is_system(u->uid)) {
+ k = clean_ipc_by_uid(u->uid);
+ if (k < 0)
+ r = k;
+ }
+
+ (void) unlink(u->state_file);
+ user_add_to_gc_queue(u);
+
+ if (u->started) {
+ user_send_signal(u, false);
+ u->started = false;
+ }
+
+ return r;
+}
+
+int user_get_idle_hint(User *u, dual_timestamp *t) {
+ Session *s;
+ bool idle_hint = true;
+ dual_timestamp ts = DUAL_TIMESTAMP_NULL;
+
+ assert(u);
+
+ LIST_FOREACH(sessions_by_user, s, u->sessions) {
+ dual_timestamp k;
+ int ih;
+
+ ih = session_get_idle_hint(s, &k);
+ if (ih < 0)
+ return ih;
+
+ if (!ih) {
+ if (!idle_hint) {
+ if (k.monotonic < ts.monotonic)
+ ts = k;
+ } else {
+ idle_hint = false;
+ ts = k;
+ }
+ } else if (idle_hint) {
+
+ if (k.monotonic > ts.monotonic)
+ ts = k;
+ }
+ }
+
+ if (t)
+ *t = ts;
+
+ return idle_hint;
+}
+
+int user_check_linger_file(User *u) {
+ _cleanup_free_ char *cc = NULL;
+ char *p = NULL;
+
+ cc = cescape(u->name);
+ if (!cc)
+ return -ENOMEM;
+
+ p = strjoina("/var/lib/systemd/linger/", cc);
+ if (access(p, F_OK) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ return false;
+ }
+
+ return true;
+}
+
+static bool user_unit_active(User *u) {
+ const char *i;
+ int r;
+
+ assert(u->service);
+ assert(u->runtime_dir_service);
+ assert(u->slice);
+
+ FOREACH_STRING(i, u->service, u->runtime_dir_service, u->slice) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = manager_unit_is_active(u->manager, i, &error);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine whether unit '%s' is active, ignoring: %s", u->service, bus_error_message(&error, r));
+ if (r != 0)
+ return true;
+ }
+
+ return false;
+}
+
+bool user_may_gc(User *u, bool drop_not_started) {
+ int r;
+
+ assert(u);
+
+ if (drop_not_started && !u->started)
+ return true;
+
+ if (u->sessions)
+ return false;
+
+ if (u->last_session_timestamp != USEC_INFINITY) {
+ /* All sessions have been closed. Let's see if we shall leave the user record around for a bit */
+
+ if (u->manager->user_stop_delay == USEC_INFINITY)
+ return false; /* Leave it around forever! */
+ if (u->manager->user_stop_delay > 0 &&
+ now(CLOCK_MONOTONIC) < usec_add(u->last_session_timestamp, u->manager->user_stop_delay))
+ return false; /* Leave it around for a bit longer. */
+ }
+
+ /* Is this a user that shall stay around forever ("linger")? Before we say "no" to GC'ing for lingering users, let's check
+ * if any of the three units that we maintain for this user is still around. If none of them is,
+ * there's no need to keep this user around even if lingering is enabled. */
+ if (user_check_linger_file(u) > 0 && user_unit_active(u))
+ return false;
+
+ /* Check if our job is still pending */
+ if (u->service_job) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = manager_job_is_active(u->manager, u->service_job, &error);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine whether job '%s' is pending, ignoring: %s", u->service_job, bus_error_message(&error, r));
+ if (r != 0)
+ return false;
+ }
+
+ /* Note that we don't care if the three units we manage for each user object are up or not, as we are managing
+ * their state rather than tracking it. */
+
+ return true;
+}
+
+void user_add_to_gc_queue(User *u) {
+ assert(u);
+
+ if (u->in_gc_queue)
+ return;
+
+ LIST_PREPEND(gc_queue, u->manager->user_gc_queue, u);
+ u->in_gc_queue = true;
+}
+
+UserState user_get_state(User *u) {
+ Session *i;
+
+ assert(u);
+
+ if (u->stopping)
+ return USER_CLOSING;
+
+ if (!u->started || u->service_job)
+ return USER_OPENING;
+
+ if (u->sessions) {
+ bool all_closing = true;
+
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ SessionState state;
+
+ state = session_get_state(i);
+ if (state == SESSION_ACTIVE)
+ return USER_ACTIVE;
+ if (state != SESSION_CLOSING)
+ all_closing = false;
+ }
+
+ return all_closing ? USER_CLOSING : USER_ONLINE;
+ }
+
+ if (user_check_linger_file(u) > 0 && user_unit_active(u))
+ return USER_LINGERING;
+
+ return USER_CLOSING;
+}
+
+int user_kill(User *u, int signo) {
+ assert(u);
+
+ return manager_kill_unit(u->manager, u->slice, KILL_ALL, signo, NULL);
+}
+
+static bool elect_display_filter(Session *s) {
+ /* Return true if the session is a candidate for the user’s ‘primary session’ or ‘display’. */
+ assert(s);
+
+ return s->class == SESSION_USER && s->started && !s->stopping;
+}
+
+static int elect_display_compare(Session *s1, Session *s2) {
+ /* Indexed by SessionType. Lower numbers mean more preferred. */
+ const int type_ranks[_SESSION_TYPE_MAX] = {
+ [SESSION_UNSPECIFIED] = 0,
+ [SESSION_TTY] = -2,
+ [SESSION_X11] = -3,
+ [SESSION_WAYLAND] = -3,
+ [SESSION_MIR] = -3,
+ [SESSION_WEB] = -1,
+ };
+
+ /* Calculate the partial order relationship between s1 and s2,
+ * returning < 0 if s1 is preferred as the user’s ‘primary session’,
+ * 0 if s1 and s2 are equally preferred or incomparable, or > 0 if s2
+ * is preferred.
+ *
+ * s1 or s2 may be NULL. */
+ if (!s1 && !s2)
+ return 0;
+
+ if ((s1 == NULL) != (s2 == NULL))
+ return (s1 == NULL) - (s2 == NULL);
+
+ if (s1->stopping != s2->stopping)
+ return s1->stopping - s2->stopping;
+
+ if ((s1->class != SESSION_USER) != (s2->class != SESSION_USER))
+ return (s1->class != SESSION_USER) - (s2->class != SESSION_USER);
+
+ if ((s1->type == _SESSION_TYPE_INVALID) != (s2->type == _SESSION_TYPE_INVALID))
+ return (s1->type == _SESSION_TYPE_INVALID) - (s2->type == _SESSION_TYPE_INVALID);
+
+ if (s1->type != s2->type)
+ return type_ranks[s1->type] - type_ranks[s2->type];
+
+ return 0;
+}
+
+void user_elect_display(User *u) {
+ Session *s;
+
+ assert(u);
+
+ /* This elects a primary session for each user, which we call the "display". We try to keep the assignment
+ * stable, but we "upgrade" to better choices. */
+ log_debug("Electing new display for user %s", u->name);
+
+ LIST_FOREACH(sessions_by_user, s, u->sessions) {
+ if (!elect_display_filter(s)) {
+ log_debug("Ignoring session %s", s->id);
+ continue;
+ }
+
+ if (elect_display_compare(s, u->display) < 0) {
+ log_debug("Choosing session %s in preference to %s", s->id, u->display ? u->display->id : "-");
+ u->display = s;
+ }
+ }
+}
+
+static int user_stop_timeout_callback(sd_event_source *es, uint64_t usec, void *userdata) {
+ User *u = userdata;
+
+ assert(u);
+ user_add_to_gc_queue(u);
+
+ return 0;
+}
+
+void user_update_last_session_timer(User *u) {
+ int r;
+
+ assert(u);
+
+ if (u->sessions) {
+ /* There are sessions, turn off the timer */
+ u->last_session_timestamp = USEC_INFINITY;
+ u->timer_event_source = sd_event_source_unref(u->timer_event_source);
+ return;
+ }
+
+ if (u->last_session_timestamp != USEC_INFINITY)
+ return; /* Timer already started */
+
+ u->last_session_timestamp = now(CLOCK_MONOTONIC);
+
+ assert(!u->timer_event_source);
+
+ if (u->manager->user_stop_delay == 0 || u->manager->user_stop_delay == USEC_INFINITY)
+ return;
+
+ if (sd_event_get_state(u->manager->event) == SD_EVENT_FINISHED) {
+ log_debug("Not allocating user stop timeout, since we are already exiting.");
+ return;
+ }
+
+ r = sd_event_add_time(u->manager->event,
+ &u->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec_add(u->last_session_timestamp, u->manager->user_stop_delay), 0,
+ user_stop_timeout_callback, u);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enqueue user stop event source, ignoring: %m");
+
+ if (DEBUG_LOGGING) {
+ char s[FORMAT_TIMESPAN_MAX];
+
+ log_debug("Last session of user '%s' logged out, terminating user context in %s.",
+ u->name,
+ format_timespan(s, sizeof(s), u->manager->user_stop_delay, USEC_PER_MSEC));
+ }
+}
+
+static const char* const user_state_table[_USER_STATE_MAX] = {
+ [USER_OFFLINE] = "offline",
+ [USER_OPENING] = "opening",
+ [USER_LINGERING] = "lingering",
+ [USER_ONLINE] = "online",
+ [USER_ACTIVE] = "active",
+ [USER_CLOSING] = "closing"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(user_state, UserState);
+
+int config_parse_tmpfs_size(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *sz = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* First, try to parse as percentage */
+ r = parse_permille(rvalue);
+ if (r > 0 && r < 1000)
+ *sz = physical_memory_scale(r, 1000U);
+ else {
+ uint64_t k;
+
+ /* If the passed argument was not a percentage, or out of range, parse as byte size */
+
+ r = parse_size(rvalue, 1024, &k);
+ if (r >= 0 && (k <= 0 || (uint64_t) (size_t) k != k))
+ r = -ERANGE;
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ *sz = PAGE_ALIGN((size_t) k);
+ }
+
+ return 0;
+}
+
+int config_parse_compat_user_tasks_max(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ log_syntax(unit, LOG_NOTICE, filename, line, 0,
+ "Support for option %s= has been removed.",
+ lvalue);
+ log_info("Hint: try creating /etc/systemd/system/user-.slice.d/50-limits.conf with:\n"
+ " [Slice]\n"
+ " TasksMax=%s",
+ rvalue);
+ return 0;
+}
diff --git a/src/login/logind-user.h b/src/login/logind-user.h
new file mode 100644
index 0000000..c41973e
--- /dev/null
+++ b/src/login/logind-user.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct User User;
+
+#include "conf-parser.h"
+#include "list.h"
+#include "logind.h"
+
+typedef enum UserState {
+ USER_OFFLINE, /* Not logged in at all */
+ USER_OPENING, /* Is logging in */
+ USER_LINGERING, /* Lingering has been enabled by the admin for this user */
+ USER_ONLINE, /* User logged in */
+ USER_ACTIVE, /* User logged in and has a session in the fg */
+ USER_CLOSING, /* User logged out, but processes still remain and lingering is not enabled */
+ _USER_STATE_MAX,
+ _USER_STATE_INVALID = -1
+} UserState;
+
+struct User {
+ Manager *manager;
+ uid_t uid;
+ gid_t gid;
+ char *name;
+ char *home;
+ char *state_file;
+ char *runtime_path;
+
+ char *slice; /* user-UID.slice */
+ char *service; /* user@UID.service */
+ char *runtime_dir_service; /* user-runtime-dir@UID.service */
+
+ char *service_job;
+
+ Session *display;
+
+ dual_timestamp timestamp; /* When this User object was 'started' the first time */
+ usec_t last_session_timestamp; /* When the number of sessions of this user went from 1 to 0 the last time */
+
+ /* Set up when the last session of the user logs out */
+ sd_event_source *timer_event_source;
+
+ bool in_gc_queue:1;
+
+ bool started:1; /* Whenever the user being started, has been started or is being stopped again. */
+ bool stopping:1; /* Whenever the user is being stopped or has been stopped. */
+
+ LIST_HEAD(Session, sessions);
+ LIST_FIELDS(User, gc_queue);
+};
+
+int user_new(User **out, Manager *m, uid_t uid, gid_t gid, const char *name, const char *home);
+User *user_free(User *u);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(User *, user_free);
+
+bool user_may_gc(User *u, bool drop_not_started);
+void user_add_to_gc_queue(User *u);
+int user_start(User *u);
+int user_stop(User *u, bool force);
+int user_finalize(User *u);
+UserState user_get_state(User *u);
+int user_get_idle_hint(User *u, dual_timestamp *t);
+int user_save(User *u);
+int user_load(User *u);
+int user_kill(User *u, int signo);
+int user_check_linger_file(User *u);
+void user_elect_display(User *u);
+void user_update_last_session_timer(User *u);
+
+extern const sd_bus_vtable user_vtable[];
+int user_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error);
+int user_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
+char *user_bus_path(User *s);
+
+int user_send_signal(User *u, bool new_user);
+int user_send_changed(User *u, const char *properties, ...) _sentinel_;
+
+const char* user_state_to_string(UserState s) _const_;
+UserState user_state_from_string(const char *s) _pure_;
+
+int bus_user_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_user_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_compat_user_tasks_max);
diff --git a/src/login/logind-utmp.c b/src/login/logind-utmp.c
new file mode 100644
index 0000000..3a5a333
--- /dev/null
+++ b/src/login/logind-utmp.c
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <pwd.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "format-util.h"
+#include "logind.h"
+#include "path-util.h"
+#include "special.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "utmp-wtmp.h"
+
+_const_ static usec_t when_wall(usec_t n, usec_t elapse) {
+
+ usec_t left;
+ unsigned i;
+ static const int wall_timers[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 25, 40, 55, 70, 100, 130, 150, 180,
+ };
+
+ /* If the time is already passed, then don't announce */
+ if (n >= elapse)
+ return 0;
+
+ left = elapse - n;
+
+ for (i = 1; i < ELEMENTSOF(wall_timers); i++)
+ if (wall_timers[i] * USEC_PER_MINUTE >= left)
+ return left - wall_timers[i-1] * USEC_PER_MINUTE;
+
+ return left % USEC_PER_HOUR;
+}
+
+bool logind_wall_tty_filter(const char *tty, void *userdata) {
+ Manager *m = userdata;
+ const char *p;
+
+ assert(m);
+
+ if (!m->scheduled_shutdown_tty)
+ return true;
+
+ p = path_startswith(tty, "/dev/");
+ if (!p)
+ return true;
+
+ return !streq(p, m->scheduled_shutdown_tty);
+}
+
+static int warn_wall(Manager *m, usec_t n) {
+ char date[FORMAT_TIMESTAMP_MAX] = {};
+ _cleanup_free_ char *l = NULL, *username = NULL;
+ usec_t left;
+ int r;
+
+ assert(m);
+
+ if (!m->enable_wall_messages)
+ return 0;
+
+ left = m->scheduled_shutdown_timeout > n;
+
+ r = asprintf(&l, "%s%sThe system is going down for %s %s%s!",
+ strempty(m->wall_message),
+ isempty(m->wall_message) ? "" : "\n",
+ m->scheduled_shutdown_type,
+ left ? "at " : "NOW",
+ left ? format_timestamp(date, sizeof(date), m->scheduled_shutdown_timeout) : "");
+ if (r < 0) {
+ log_oom();
+ return 0;
+ }
+
+ username = uid_to_name(m->scheduled_shutdown_uid);
+ utmp_wall(l, username, m->scheduled_shutdown_tty, logind_wall_tty_filter, m);
+
+ return 1;
+}
+
+static int wall_message_timeout_handler(
+ sd_event_source *s,
+ uint64_t usec,
+ void *userdata) {
+
+ Manager *m = userdata;
+ usec_t n, next;
+ int r;
+
+ assert(m);
+ assert(s == m->wall_message_timeout_source);
+
+ n = now(CLOCK_REALTIME);
+
+ r = warn_wall(m, n);
+ if (r == 0)
+ return 0;
+
+ next = when_wall(n, m->scheduled_shutdown_timeout);
+ if (next > 0) {
+ r = sd_event_source_set_time(s, n + next);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_time() failed. %m");
+
+ r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_enabled() failed. %m");
+ }
+
+ return 0;
+}
+
+int manager_setup_wall_message_timer(Manager *m) {
+
+ usec_t n, elapse;
+ int r;
+
+ assert(m);
+
+ n = now(CLOCK_REALTIME);
+ elapse = m->scheduled_shutdown_timeout;
+
+ /* wall message handling */
+
+ if (isempty(m->scheduled_shutdown_type)) {
+ warn_wall(m, n);
+ return 0;
+ }
+
+ if (elapse < n)
+ return 0;
+
+ /* Warn immediately if less than 15 minutes are left */
+ if (elapse - n < 15 * USEC_PER_MINUTE) {
+ r = warn_wall(m, n);
+ if (r == 0)
+ return 0;
+ }
+
+ elapse = when_wall(n, elapse);
+ if (elapse == 0)
+ return 0;
+
+ if (m->wall_message_timeout_source) {
+ r = sd_event_source_set_time(m->wall_message_timeout_source, n + elapse);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_time() failed. %m");
+
+ r = sd_event_source_set_enabled(m->wall_message_timeout_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_enabled() failed. %m");
+ } else {
+ r = sd_event_add_time(m->event, &m->wall_message_timeout_source,
+ CLOCK_REALTIME, n + elapse, 0, wall_message_timeout_handler, m);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_add_time() failed. %m");
+ }
+
+ return 0;
+}
diff --git a/src/login/logind.c b/src/login/logind.c
new file mode 100644
index 0000000..95ec0a5
--- /dev/null
+++ b/src/login/logind.c
@@ -0,0 +1,1245 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "logind.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+
+static Manager* manager_unref(Manager *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_unref);
+
+static int manager_new(Manager **ret) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (Manager) {
+ .console_active_fd = -1,
+ .reserve_vt_fd = -1,
+ };
+
+ m->idle_action_not_before_usec = now(CLOCK_MONOTONIC);
+
+ m->devices = hashmap_new(&string_hash_ops);
+ m->seats = hashmap_new(&string_hash_ops);
+ m->sessions = hashmap_new(&string_hash_ops);
+ m->sessions_by_leader = hashmap_new(NULL);
+ m->users = hashmap_new(NULL);
+ m->inhibitors = hashmap_new(&string_hash_ops);
+ m->buttons = hashmap_new(&string_hash_ops);
+
+ m->user_units = hashmap_new(&string_hash_ops);
+ m->session_units = hashmap_new(&string_hash_ops);
+
+ if (!m->devices || !m->seats || !m->sessions || !m->sessions_by_leader || !m->users || !m->inhibitors || !m->buttons || !m->user_units || !m->session_units)
+ return -ENOMEM;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ manager_reset_config(m);
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+static Manager* manager_unref(Manager *m) {
+ Session *session;
+ User *u;
+ Device *d;
+ Seat *s;
+ Inhibitor *i;
+ Button *b;
+
+ if (!m)
+ return NULL;
+
+ while ((session = hashmap_first(m->sessions)))
+ session_free(session);
+
+ while ((u = hashmap_first(m->users)))
+ user_free(u);
+
+ while ((d = hashmap_first(m->devices)))
+ device_free(d);
+
+ while ((s = hashmap_first(m->seats)))
+ seat_free(s);
+
+ while ((i = hashmap_first(m->inhibitors)))
+ inhibitor_free(i);
+
+ while ((b = hashmap_first(m->buttons)))
+ button_free(b);
+
+ hashmap_free(m->devices);
+ hashmap_free(m->seats);
+ hashmap_free(m->sessions);
+ hashmap_free(m->sessions_by_leader);
+ hashmap_free(m->users);
+ hashmap_free(m->inhibitors);
+ hashmap_free(m->buttons);
+
+ hashmap_free(m->user_units);
+ hashmap_free(m->session_units);
+
+ sd_event_source_unref(m->idle_action_event_source);
+ sd_event_source_unref(m->inhibit_timeout_source);
+ sd_event_source_unref(m->scheduled_shutdown_timeout_source);
+ sd_event_source_unref(m->nologin_timeout_source);
+ sd_event_source_unref(m->wall_message_timeout_source);
+
+ sd_event_source_unref(m->console_active_event_source);
+ sd_event_source_unref(m->lid_switch_ignore_event_source);
+
+#if ENABLE_UTMP
+ sd_event_source_unref(m->utmp_event_source);
+#endif
+
+ safe_close(m->console_active_fd);
+
+ sd_device_monitor_unref(m->device_seat_monitor);
+ sd_device_monitor_unref(m->device_monitor);
+ sd_device_monitor_unref(m->device_vcsa_monitor);
+ sd_device_monitor_unref(m->device_button_monitor);
+
+ if (m->unlink_nologin)
+ (void) unlink_or_warn("/run/nologin");
+
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+
+ sd_bus_flush_close_unref(m->bus);
+ sd_event_unref(m->event);
+
+ safe_close(m->reserve_vt_fd);
+
+ strv_free(m->kill_only_users);
+ strv_free(m->kill_exclude_users);
+
+ free(m->scheduled_shutdown_type);
+ free(m->scheduled_shutdown_tty);
+ free(m->wall_message);
+ free(m->action_job);
+
+ return mfree(m);
+}
+
+static int manager_enumerate_devices(Manager *m) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r;
+
+ assert(m);
+
+ /* Loads devices from udev and creates seats for them as
+ * necessary */
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_tag(e, "master-of-seat");
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ int k;
+
+ k = manager_process_seat_device(m, d);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int manager_enumerate_buttons(Manager *m) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r;
+
+ assert(m);
+
+ /* Loads buttons from udev */
+
+ if (manager_all_buttons_ignored(m))
+ return 0;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(e, "input", true);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_tag(e, "power-switch");
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ int k;
+
+ k = manager_process_button_device(m, d);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int manager_enumerate_seats(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(m);
+
+ /* This loads data about seats stored on disk, but does not
+ * actually create any seats. Removes data of seats that no
+ * longer exist. */
+
+ d = opendir("/run/systemd/seats");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /run/systemd/seats: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ Seat *s;
+ int k;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ s = hashmap_get(m->seats, de->d_name);
+ if (!s) {
+ if (unlinkat(dirfd(d), de->d_name, 0) < 0)
+ log_warning("Failed to remove /run/systemd/seats/%s: %m",
+ de->d_name);
+ continue;
+ }
+
+ k = seat_load(s);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int manager_enumerate_linger_users(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(m);
+
+ d = opendir("/var/lib/systemd/linger");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /var/lib/systemd/linger/: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ int k;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ k = manager_add_user_by_name(m, de->d_name, NULL);
+ if (k < 0) {
+ log_notice_errno(k, "Couldn't add lingering user %s: %m", de->d_name);
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+static int manager_enumerate_users(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r, k;
+
+ assert(m);
+
+ /* Add lingering users */
+ r = manager_enumerate_linger_users(m);
+
+ /* Read in user data stored on disk */
+ d = opendir("/run/systemd/users");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /run/systemd/users: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ User *u;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ k = manager_add_user_by_name(m, de->d_name, &u);
+ if (k < 0) {
+ log_error_errno(k, "Failed to add user by file name %s: %m", de->d_name);
+
+ r = k;
+ continue;
+ }
+
+ user_add_to_gc_queue(u);
+
+ k = user_load(u);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int parse_fdname(const char *fdname, char **session_id, dev_t *dev) {
+ _cleanup_strv_free_ char **parts = NULL;
+ _cleanup_free_ char *id = NULL;
+ unsigned major, minor;
+ int r;
+
+ parts = strv_split(fdname, "-");
+ if (!parts)
+ return -ENOMEM;
+ if (strv_length(parts) != 5)
+ return -EINVAL;
+
+ if (!streq(parts[0], "session"))
+ return -EINVAL;
+
+ id = strdup(parts[1]);
+ if (!id)
+ return -ENOMEM;
+
+ if (!streq(parts[2], "device"))
+ return -EINVAL;
+
+ r = safe_atou(parts[3], &major);
+ if (r < 0)
+ return r;
+ r = safe_atou(parts[4], &minor);
+ if (r < 0)
+ return r;
+
+ *dev = makedev(major, minor);
+ *session_id = TAKE_PTR(id);
+
+ return 0;
+}
+
+static int manager_attach_fds(Manager *m) {
+ _cleanup_strv_free_ char **fdnames = NULL;
+ int n, i, fd;
+
+ /* Upon restart, PID1 will send us back all fds of session devices
+ * that we previously opened. Each file descriptor is associated
+ * with a given session. The session ids are passed through FDNAMES. */
+
+ n = sd_listen_fds_with_names(true, &fdnames);
+ if (n <= 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *id = NULL;
+ dev_t dev;
+ struct stat st;
+ SessionDevice *sd;
+ Session *s;
+ int r;
+
+ fd = SD_LISTEN_FDS_START + i;
+
+ r = parse_fdname(fdnames[i], &id, &dev);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse fd name %s: %m", fdnames[i]);
+ close_nointr(fd);
+ continue;
+ }
+
+ s = hashmap_get(m->sessions, id);
+ if (!s) {
+ /* If the session doesn't exist anymore, the associated session
+ * device attached to this fd doesn't either. Let's simply close
+ * this fd. */
+ log_debug("Failed to attach fd for unknown session: %s", id);
+ close_nointr(fd);
+ continue;
+ }
+
+ if (fstat(fd, &st) < 0) {
+ /* The device is allowed to go away at a random point, in which
+ * case fstat failing is expected. */
+ log_debug_errno(errno, "Failed to stat device fd for session %s: %m", id);
+ close_nointr(fd);
+ continue;
+ }
+
+ if (!S_ISCHR(st.st_mode) || st.st_rdev != dev) {
+ log_debug("Device fd doesn't point to the expected character device node");
+ close_nointr(fd);
+ continue;
+ }
+
+ sd = hashmap_get(s->devices, &dev);
+ if (!sd) {
+ /* Weird, we got an fd for a session device which wasn't
+ * recorded in the session state file... */
+ log_warning("Got fd for missing session device [%u:%u] in session %s",
+ major(dev), minor(dev), s->id);
+ close_nointr(fd);
+ continue;
+ }
+
+ log_debug("Attaching fd to session device [%u:%u] for session %s",
+ major(dev), minor(dev), s->id);
+
+ session_device_attach_fd(sd, fd, s->was_active);
+ }
+
+ return 0;
+}
+
+static int manager_enumerate_sessions(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0, k;
+
+ assert(m);
+
+ /* Read in session data stored on disk */
+ d = opendir("/run/systemd/sessions");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /run/systemd/sessions: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ struct Session *s;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ if (!session_id_valid(de->d_name)) {
+ log_warning("Invalid session file name '%s', ignoring.", de->d_name);
+ r = -EINVAL;
+ continue;
+ }
+
+ k = manager_add_session(m, de->d_name, &s);
+ if (k < 0) {
+ log_error_errno(k, "Failed to add session by file name %s: %m", de->d_name);
+ r = k;
+ continue;
+ }
+
+ session_add_to_gc_queue(s);
+
+ k = session_load(s);
+ if (k < 0)
+ r = k;
+ }
+
+ /* We might be restarted and PID1 could have sent us back the
+ * session device fds we previously saved. */
+ k = manager_attach_fds(m);
+ if (k < 0)
+ log_warning_errno(k, "Failed to reattach session device fds: %m");
+
+ return r;
+}
+
+static int manager_enumerate_inhibitors(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(m);
+
+ d = opendir("/run/systemd/inhibit");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /run/systemd/inhibit: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ int k;
+ Inhibitor *i;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ k = manager_add_inhibitor(m, de->d_name, &i);
+ if (k < 0) {
+ log_notice_errno(k, "Couldn't add inhibitor %s: %m", de->d_name);
+ r = k;
+ continue;
+ }
+
+ k = inhibitor_load(i);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int manager_dispatch_seat_udev(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(device);
+
+ manager_process_seat_device(m, device);
+ return 0;
+}
+
+static int manager_dispatch_device_udev(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(device);
+
+ manager_process_seat_device(m, device);
+ return 0;
+}
+
+static int manager_dispatch_vcsa_udev(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ Manager *m = userdata;
+ const char *name, *action;
+
+ assert(m);
+ assert(device);
+
+ /* Whenever a VCSA device is removed try to reallocate our
+ * VTs, to make sure our auto VTs never go away. */
+
+ if (sd_device_get_sysname(device, &name) >= 0 &&
+ startswith(name, "vcsa") &&
+ sd_device_get_property_value(device, "ACTION", &action) >= 0 &&
+ streq(action, "remove"))
+ seat_preallocate_vts(m->seat0);
+
+ return 0;
+}
+
+static int manager_dispatch_button_udev(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(device);
+
+ manager_process_button_device(m, device);
+ return 0;
+}
+
+static int manager_dispatch_console(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(m->seat0);
+ assert(m->console_active_fd == fd);
+
+ seat_read_active_vt(m->seat0);
+ return 0;
+}
+
+static int manager_reserve_vt(Manager *m) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(m);
+
+ if (m->reserve_vt <= 0)
+ return 0;
+
+ if (asprintf(&p, "/dev/tty%u", m->reserve_vt) < 0)
+ return log_oom();
+
+ m->reserve_vt_fd = open(p, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (m->reserve_vt_fd < 0) {
+
+ /* Don't complain on VT-less systems */
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to pin reserved VT: %m");
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->bus);
+
+ r = sd_bus_default_system(&m->bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_add_object_vtable(m->bus, NULL, "/org/freedesktop/login1", "org.freedesktop.login1.Manager", manager_vtable, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add manager object vtable: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/login1/seat", "org.freedesktop.login1.Seat", seat_vtable, seat_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add seat object vtable: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/login1/seat", seat_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add seat enumerator: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/login1/session", "org.freedesktop.login1.Session", session_vtable, session_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add session object vtable: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/login1/session", session_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add session enumerator: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/login1/user", "org.freedesktop.login1.User", user_vtable, user_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add user object vtable: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/login1/user", user_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add user enumerator: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "JobRemoved",
+ match_job_removed, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for JobRemoved: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "UnitRemoved",
+ match_unit_removed, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for UnitRemoved: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ NULL,
+ "org.freedesktop.DBus.Properties",
+ "PropertiesChanged",
+ match_properties_changed, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for PropertiesChanged: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Reloading",
+ match_reloading, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for Reloading: %m");
+
+ r = sd_bus_call_method_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Subscribe",
+ NULL, NULL,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable subscription: %m");
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.login1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ return 0;
+}
+
+static int manager_vt_switch(sd_event_source *src, const struct signalfd_siginfo *si, void *data) {
+ Manager *m = data;
+ Session *active, *iter;
+
+ /*
+ * We got a VT-switch signal and we have to acknowledge it immediately.
+ * Preferably, we'd just use m->seat0->active->vtfd, but unfortunately,
+ * old user-space might run multiple sessions on a single VT, *sigh*.
+ * Therefore, we have to iterate all sessions and find one with a vtfd
+ * on the requested VT.
+ * As only VTs with active controllers have VT_PROCESS set, our current
+ * notion of the active VT might be wrong (for instance if the switch
+ * happens while we setup VT_PROCESS). Therefore, read the current VT
+ * first and then use s->active->vtnr as reference. Note that this is
+ * not racy, as no further VT-switch can happen as long as we're in
+ * synchronous VT_PROCESS mode.
+ */
+
+ assert(m->seat0);
+ seat_read_active_vt(m->seat0);
+
+ active = m->seat0->active;
+ if (!active || active->vtnr < 1) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ /* We are requested to acknowledge the VT-switch signal by the kernel but
+ * there's no registered sessions for the current VT. Normally this
+ * shouldn't happen but something wrong might have happened when we tried
+ * to release the VT. Better be safe than sorry, and try to release the VT
+ * one more time otherwise the user will be locked with the current VT. */
+
+ log_warning("Received VT_PROCESS signal without a registered session, restoring VT.");
+
+ /* At this point we only have the kernel mapping for referring to the
+ * current VT. */
+ fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0) {
+ log_warning_errno(fd, "Failed to open, ignoring: %m");
+ return 0;
+ }
+
+ r = vt_release(fd, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to release VT, ignoring: %m");
+
+ return 0;
+ }
+
+ if (active->vtfd >= 0) {
+ session_leave_vt(active);
+ } else {
+ LIST_FOREACH(sessions_by_seat, iter, m->seat0->sessions) {
+ if (iter->vtnr == active->vtnr && iter->vtfd >= 0) {
+ session_leave_vt(iter);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int manager_connect_console(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(m->console_active_fd < 0);
+
+ /* On certain systems (such as S390, Xen, and containers) /dev/tty0 does not exist (as there is no VC), so
+ * don't fail if we can't open it. */
+
+ if (access("/dev/tty0", F_OK) < 0)
+ return 0;
+
+ m->console_active_fd = open("/sys/class/tty/tty0/active", O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (m->console_active_fd < 0) {
+
+ /* On some systems /dev/tty0 may exist even though /sys/class/tty/tty0 does not. These are broken, but
+ * common. Let's complain but continue anyway. */
+ if (errno == ENOENT) {
+ log_warning_errno(errno, "System has /dev/tty0 but not /sys/class/tty/tty0/active which is broken, ignoring: %m");
+ return 0;
+ }
+
+ return log_error_errno(errno, "Failed to open /sys/class/tty/tty0/active: %m");
+ }
+
+ r = sd_event_add_io(m->event, &m->console_active_event_source, m->console_active_fd, 0, manager_dispatch_console, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch foreground console: %m");
+
+ /*
+ * SIGRTMIN is used as global VT-release signal, SIGRTMIN + 1 is used
+ * as VT-acquire signal. We ignore any acquire-events (yes, we still
+ * have to provide a valid signal-number for it!) and acknowledge all
+ * release events immediately.
+ */
+
+ if (SIGRTMIN + 1 > SIGRTMAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not enough real-time signals available: %u-%u",
+ SIGRTMIN, SIGRTMAX);
+
+ assert_se(ignore_signals(SIGRTMIN + 1, -1) >= 0);
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN, -1) >= 0);
+
+ r = sd_event_add_signal(m->event, NULL, SIGRTMIN, manager_vt_switch, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to subscribe to signal: %m");
+
+ return 0;
+}
+
+static int manager_connect_udev(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->device_seat_monitor);
+ assert(!m->device_monitor);
+ assert(!m->device_vcsa_monitor);
+ assert(!m->device_button_monitor);
+
+ r = sd_device_monitor_new(&m->device_seat_monitor);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_tag(m->device_seat_monitor, "master-of-seat");
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_attach_event(m->device_seat_monitor, m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_start(m->device_seat_monitor, manager_dispatch_seat_udev, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(m->device_seat_monitor), "logind-seat-monitor");
+
+ r = sd_device_monitor_new(&m->device_monitor);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "input", NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "graphics", NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "drm", NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_attach_event(m->device_monitor, m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_start(m->device_monitor, manager_dispatch_device_udev, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(m->device_monitor), "logind-device-monitor");
+
+ /* Don't watch keys if nobody cares */
+ if (!manager_all_buttons_ignored(m)) {
+ r = sd_device_monitor_new(&m->device_button_monitor);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_tag(m->device_button_monitor, "power-switch");
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_button_monitor, "input", NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_attach_event(m->device_button_monitor, m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_start(m->device_button_monitor, manager_dispatch_button_udev, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(m->device_button_monitor), "logind-button-monitor");
+ }
+
+ /* Don't bother watching VCSA devices, if nobody cares */
+ if (m->n_autovts > 0 && m->console_active_fd >= 0) {
+
+ r = sd_device_monitor_new(&m->device_vcsa_monitor);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_vcsa_monitor, "vc", NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_attach_event(m->device_vcsa_monitor, m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_start(m->device_vcsa_monitor, manager_dispatch_vcsa_udev, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(m->device_vcsa_monitor), "logind-vcsa-monitor");
+ }
+
+ return 0;
+}
+
+static void manager_gc(Manager *m, bool drop_not_started) {
+ Seat *seat;
+ Session *session;
+ User *user;
+
+ assert(m);
+
+ while ((seat = m->seat_gc_queue)) {
+ LIST_REMOVE(gc_queue, m->seat_gc_queue, seat);
+ seat->in_gc_queue = false;
+
+ if (seat_may_gc(seat, drop_not_started)) {
+ seat_stop(seat, false);
+ seat_free(seat);
+ }
+ }
+
+ while ((session = m->session_gc_queue)) {
+ LIST_REMOVE(gc_queue, m->session_gc_queue, session);
+ session->in_gc_queue = false;
+
+ /* First, if we are not closing yet, initiate stopping */
+ if (session_may_gc(session, drop_not_started) &&
+ session_get_state(session) != SESSION_CLOSING)
+ (void) session_stop(session, false);
+
+ /* Normally, this should make the session referenced
+ * again, if it doesn't then let's get rid of it
+ * immediately */
+ if (session_may_gc(session, drop_not_started)) {
+ (void) session_finalize(session);
+ session_free(session);
+ }
+ }
+
+ while ((user = m->user_gc_queue)) {
+ LIST_REMOVE(gc_queue, m->user_gc_queue, user);
+ user->in_gc_queue = false;
+
+ /* First step: queue stop jobs */
+ if (user_may_gc(user, drop_not_started))
+ (void) user_stop(user, false);
+
+ /* Second step: finalize user */
+ if (user_may_gc(user, drop_not_started)) {
+ (void) user_finalize(user);
+ user_free(user);
+ }
+ }
+}
+
+static int manager_dispatch_idle_action(sd_event_source *s, uint64_t t, void *userdata) {
+ Manager *m = userdata;
+ struct dual_timestamp since;
+ usec_t n, elapse;
+ int r;
+
+ assert(m);
+
+ if (m->idle_action == HANDLE_IGNORE ||
+ m->idle_action_usec <= 0)
+ return 0;
+
+ n = now(CLOCK_MONOTONIC);
+
+ r = manager_get_idle_hint(m, &since);
+ if (r <= 0)
+ /* Not idle. Let's check if after a timeout it might be idle then. */
+ elapse = n + m->idle_action_usec;
+ else {
+ /* Idle! Let's see if it's time to do something, or if
+ * we shall sleep for longer. */
+
+ if (n >= since.monotonic + m->idle_action_usec &&
+ (m->idle_action_not_before_usec <= 0 || n >= m->idle_action_not_before_usec + m->idle_action_usec)) {
+ log_info("System idle. Taking action.");
+
+ manager_handle_action(m, 0, m->idle_action, false, false);
+ m->idle_action_not_before_usec = n;
+ }
+
+ elapse = MAX(since.monotonic, m->idle_action_not_before_usec) + m->idle_action_usec;
+ }
+
+ if (!m->idle_action_event_source) {
+
+ r = sd_event_add_time(
+ m->event,
+ &m->idle_action_event_source,
+ CLOCK_MONOTONIC,
+ elapse, USEC_PER_SEC*30,
+ manager_dispatch_idle_action, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add idle event source: %m");
+
+ r = sd_event_source_set_priority(m->idle_action_event_source, SD_EVENT_PRIORITY_IDLE+10);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set idle event source priority: %m");
+ } else {
+ r = sd_event_source_set_time(m->idle_action_event_source, elapse);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set idle event timer: %m");
+
+ r = sd_event_source_set_enabled(m->idle_action_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable idle event timer: %m");
+ }
+
+ return 0;
+}
+
+static int manager_dispatch_reload_signal(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *m = userdata;
+ int r;
+
+ manager_reset_config(m);
+ r = manager_parse_config_file(m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse config file, using defaults: %m");
+ else
+ log_info("Config file reloaded.");
+
+ return 0;
+}
+
+static int manager_startup(Manager *m) {
+ int r;
+ Seat *seat;
+ Session *session;
+ User *user;
+ Button *button;
+ Inhibitor *inhibitor;
+ Iterator i;
+
+ assert(m);
+
+ r = sd_event_add_signal(m->event, NULL, SIGHUP, manager_dispatch_reload_signal, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register SIGHUP handler: %m");
+
+ /* Connect to utmp */
+ manager_connect_utmp(m);
+
+ /* Connect to console */
+ r = manager_connect_console(m);
+ if (r < 0)
+ return r;
+
+ /* Connect to udev */
+ r = manager_connect_udev(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create udev watchers: %m");
+
+ /* Connect to the bus */
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return r;
+
+ /* Instantiate magic seat 0 */
+ r = manager_add_seat(m, "seat0", &m->seat0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add seat0: %m");
+
+ r = manager_set_lid_switch_ignore(m, 0 + m->holdoff_timeout_usec);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set up lid switch ignore event source: %m");
+
+ /* Deserialize state */
+ r = manager_enumerate_devices(m);
+ if (r < 0)
+ log_warning_errno(r, "Device enumeration failed: %m");
+
+ r = manager_enumerate_seats(m);
+ if (r < 0)
+ log_warning_errno(r, "Seat enumeration failed: %m");
+
+ r = manager_enumerate_users(m);
+ if (r < 0)
+ log_warning_errno(r, "User enumeration failed: %m");
+
+ r = manager_enumerate_sessions(m);
+ if (r < 0)
+ log_warning_errno(r, "Session enumeration failed: %m");
+
+ r = manager_enumerate_inhibitors(m);
+ if (r < 0)
+ log_warning_errno(r, "Inhibitor enumeration failed: %m");
+
+ r = manager_enumerate_buttons(m);
+ if (r < 0)
+ log_warning_errno(r, "Button enumeration failed: %m");
+
+ /* Remove stale objects before we start them */
+ manager_gc(m, false);
+
+ /* Reserve the special reserved VT */
+ manager_reserve_vt(m);
+
+ /* Read in utmp if it exists */
+ manager_read_utmp(m);
+
+ /* And start everything */
+ HASHMAP_FOREACH(seat, m->seats, i)
+ (void) seat_start(seat);
+
+ HASHMAP_FOREACH(user, m->users, i)
+ (void) user_start(user);
+
+ HASHMAP_FOREACH(session, m->sessions, i)
+ (void) session_start(session, NULL, NULL);
+
+ HASHMAP_FOREACH(inhibitor, m->inhibitors, i)
+ inhibitor_start(inhibitor);
+
+ HASHMAP_FOREACH(button, m->buttons, i)
+ button_check_switches(button);
+
+ manager_dispatch_idle_action(NULL, 0, m);
+
+ return 0;
+}
+
+static int manager_run(Manager *m) {
+ int r;
+
+ assert(m);
+
+ for (;;) {
+ r = sd_event_get_state(m->event);
+ if (r < 0)
+ return r;
+ if (r == SD_EVENT_FINISHED)
+ return 0;
+
+ manager_gc(m, true);
+
+ r = manager_dispatch_delayed(m, false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ r = sd_event_run(m->event, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ log_set_facility(LOG_AUTH);
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc != 1) {
+ log_error("This program takes no arguments.");
+ return -EINVAL;
+ }
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return log_error_errno(r, "Could not initialize labelling: %m");
+
+ /* Always create the directories people can create inotify watches in. Note that some applications might check
+ * for the existence of /run/systemd/seats/ to determine whether logind is available, so please always make
+ * sure these directories are created early on and unconditionally. */
+ (void) mkdir_label("/run/systemd/seats", 0755);
+ (void) mkdir_label("/run/systemd/users", 0755);
+ (void) mkdir_label("/run/systemd/sessions", 0755);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGHUP, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate manager object: %m");
+
+ (void) manager_parse_config_file(m);
+
+ r = manager_startup(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fully start up daemon: %m");
+
+ log_debug("systemd-logind running as pid "PID_FMT, getpid_cached());
+ (void) sd_notify(false,
+ "READY=1\n"
+ "STATUS=Processing requests...");
+
+ r = manager_run(m);
+
+ log_debug("systemd-logind stopped as pid "PID_FMT, getpid_cached());
+ (void) sd_notify(false,
+ "STOPPING=1\n"
+ "STATUS=Shutting down...");
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/login/logind.conf.in b/src/login/logind.conf.in
new file mode 100644
index 0000000..1029e29
--- /dev/null
+++ b/src/login/logind.conf.in
@@ -0,0 +1,37 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See logind.conf(5) for details.
+
+[Login]
+#NAutoVTs=6
+#ReserveVT=6
+#KillUserProcesses=@KILL_USER_PROCESSES@
+#KillOnlyUsers=
+#KillExcludeUsers=root
+#InhibitDelayMaxSec=5
+#HandlePowerKey=poweroff
+#HandleSuspendKey=suspend
+#HandleHibernateKey=hibernate
+#HandleLidSwitch=suspend
+#HandleLidSwitchExternalPower=suspend
+#HandleLidSwitchDocked=ignore
+#PowerKeyIgnoreInhibited=no
+#SuspendKeyIgnoreInhibited=no
+#HibernateKeyIgnoreInhibited=no
+#LidSwitchIgnoreInhibited=yes
+#HoldoffTimeoutSec=30s
+#IdleAction=ignore
+#IdleActionSec=30min
+#RuntimeDirectorySize=10%
+#RemoveIPC=yes
+#InhibitorsMax=8192
+#SessionsMax=8192
diff --git a/src/login/logind.h b/src/login/logind.h
new file mode 100644
index 0000000..7b774f4
--- /dev/null
+++ b/src/login/logind.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "conf-parser.h"
+#include "hashmap.h"
+#include "list.h"
+#include "set.h"
+
+typedef struct Manager Manager;
+
+#include "logind-action.h"
+#include "logind-button.h"
+#include "logind-device.h"
+#include "logind-inhibit.h"
+
+struct Manager {
+ sd_event *event;
+ sd_bus *bus;
+
+ Hashmap *devices;
+ Hashmap *seats;
+ Hashmap *sessions;
+ Hashmap *sessions_by_leader;
+ Hashmap *users;
+ Hashmap *inhibitors;
+ Hashmap *buttons;
+
+ LIST_HEAD(Seat, seat_gc_queue);
+ LIST_HEAD(Session, session_gc_queue);
+ LIST_HEAD(User, user_gc_queue);
+
+ sd_device_monitor *device_seat_monitor, *device_monitor, *device_vcsa_monitor, *device_button_monitor;
+
+ sd_event_source *console_active_event_source;
+
+#if ENABLE_UTMP
+ sd_event_source *utmp_event_source;
+#endif
+
+ int console_active_fd;
+
+ unsigned n_autovts;
+
+ unsigned reserve_vt;
+ int reserve_vt_fd;
+
+ Seat *seat0;
+
+ char **kill_only_users, **kill_exclude_users;
+ bool kill_user_processes;
+
+ unsigned long session_counter;
+ unsigned long inhibit_counter;
+
+ Hashmap *session_units;
+ Hashmap *user_units;
+
+ usec_t inhibit_delay_max;
+ usec_t user_stop_delay;
+
+ /* If an action is currently being executed or is delayed,
+ * this is != 0 and encodes what is being done */
+ InhibitWhat action_what;
+
+ /* If a shutdown/suspend was delayed due to a inhibitor this
+ contains the unit name we are supposed to start after the
+ delay is over */
+ const char *action_unit;
+
+ /* If a shutdown/suspend is currently executed, then this is
+ * the job of it */
+ char *action_job;
+ sd_event_source *inhibit_timeout_source;
+
+ char *scheduled_shutdown_type;
+ usec_t scheduled_shutdown_timeout;
+ sd_event_source *scheduled_shutdown_timeout_source;
+ uid_t scheduled_shutdown_uid;
+ char *scheduled_shutdown_tty;
+ sd_event_source *nologin_timeout_source;
+ bool unlink_nologin;
+
+ char *wall_message;
+ unsigned enable_wall_messages;
+ sd_event_source *wall_message_timeout_source;
+
+ bool shutdown_dry_run;
+
+ sd_event_source *idle_action_event_source;
+ usec_t idle_action_usec;
+ usec_t idle_action_not_before_usec;
+ HandleAction idle_action;
+
+ HandleAction handle_power_key;
+ HandleAction handle_suspend_key;
+ HandleAction handle_hibernate_key;
+ HandleAction handle_lid_switch;
+ HandleAction handle_lid_switch_ep;
+ HandleAction handle_lid_switch_docked;
+
+ bool power_key_ignore_inhibited;
+ bool suspend_key_ignore_inhibited;
+ bool hibernate_key_ignore_inhibited;
+ bool lid_switch_ignore_inhibited;
+
+ bool remove_ipc;
+
+ Hashmap *polkit_registry;
+
+ usec_t holdoff_timeout_usec;
+ sd_event_source *lid_switch_ignore_event_source;
+
+ uint64_t runtime_dir_size;
+ uint64_t user_tasks_max;
+ uint64_t sessions_max;
+ uint64_t inhibitors_max;
+};
+
+void manager_reset_config(Manager *m);
+int manager_parse_config_file(Manager *m);
+
+int manager_add_device(Manager *m, const char *sysfs, bool master, Device **_device);
+int manager_add_button(Manager *m, const char *name, Button **_button);
+int manager_add_seat(Manager *m, const char *id, Seat **_seat);
+int manager_add_session(Manager *m, const char *id, Session **_session);
+int manager_add_user(Manager *m, uid_t uid, gid_t gid, const char *name, const char *home, User **_user);
+int manager_add_user_by_name(Manager *m, const char *name, User **_user);
+int manager_add_user_by_uid(Manager *m, uid_t uid, User **_user);
+int manager_add_inhibitor(Manager *m, const char* id, Inhibitor **_inhibitor);
+
+int manager_process_seat_device(Manager *m, sd_device *d);
+int manager_process_button_device(Manager *m, sd_device *d);
+
+int manager_spawn_autovt(Manager *m, unsigned vtnr);
+
+bool manager_shall_kill(Manager *m, const char *user);
+
+int manager_get_idle_hint(Manager *m, dual_timestamp *t);
+
+int manager_get_user_by_pid(Manager *m, pid_t pid, User **user);
+int manager_get_session_by_pid(Manager *m, pid_t pid, Session **session);
+
+bool manager_is_lid_closed(Manager *m);
+bool manager_is_docked_or_external_displays(Manager *m);
+bool manager_is_on_external_power(void);
+bool manager_all_buttons_ignored(Manager *m);
+
+int manager_read_utmp(Manager *m);
+void manager_connect_utmp(Manager *m);
+void manager_reconnect_utmp(Manager *m);
+
+extern const sd_bus_vtable manager_vtable[];
+
+int match_job_removed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_unit_removed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_properties_changed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_reloading(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_name_owner_changed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+int bus_manager_shutdown_or_sleep_now_or_later(Manager *m, const char *unit_name, InhibitWhat w, sd_bus_error *error);
+
+int manager_send_changed(Manager *manager, const char *property, ...) _sentinel_;
+
+int manager_start_scope(Manager *manager, const char *scope, pid_t pid, const char *slice, const char *description, char **wants, char **after, const char *requires_mounts_for, sd_bus_message *more_properties, sd_bus_error *error, char **job);
+int manager_start_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job);
+int manager_stop_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job);
+int manager_abandon_scope(Manager *manager, const char *scope, sd_bus_error *error);
+int manager_kill_unit(Manager *manager, const char *unit, KillWho who, int signo, sd_bus_error *error);
+int manager_unit_is_active(Manager *manager, const char *unit, sd_bus_error *error);
+int manager_job_is_active(Manager *manager, const char *path, sd_bus_error *error);
+
+/* gperf lookup function */
+const struct ConfigPerfItem* logind_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+int manager_set_lid_switch_ignore(Manager *m, usec_t until);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_n_autovts);
+CONFIG_PARSER_PROTOTYPE(config_parse_tmpfs_size);
+
+int manager_get_session_from_creds(Manager *m, sd_bus_message *message, const char *name, sd_bus_error *error, Session **ret);
+int manager_get_user_from_creds(Manager *m, sd_bus_message *message, uid_t uid, sd_bus_error *error, User **ret);
+int manager_get_seat_from_creds(Manager *m, sd_bus_message *message, const char *name, sd_bus_error *error, Seat **ret);
+
+int manager_setup_wall_message_timer(Manager *m);
+bool logind_wall_tty_filter(const char *tty, void *userdata);
+
+int manager_dispatch_delayed(Manager *manager, bool timeout);
diff --git a/src/login/meson.build b/src/login/meson.build
new file mode 100644
index 0000000..1cc75fd
--- /dev/null
+++ b/src/login/meson.build
@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+systemd_logind_sources = files('''
+ logind.c
+ logind.h
+'''.split())
+
+logind_gperf_c = custom_target(
+ 'logind_gperf.c',
+ input : 'logind-gperf.gperf',
+ output : 'logind-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+liblogind_core_sources = files('''
+ logind-core.c
+ logind-device.c
+ logind-device.h
+ logind-button.c
+ logind-button.h
+ logind-action.c
+ logind-action.h
+ logind-seat.c
+ logind-seat.h
+ logind-session.c
+ logind-session.h
+ logind-session-device.c
+ logind-session-device.h
+ logind-user.c
+ logind-user.h
+ logind-inhibit.c
+ logind-inhibit.h
+ logind-dbus.c
+ logind-session-dbus.c
+ logind-seat-dbus.c
+ logind-user-dbus.c
+ logind-utmp.c
+ logind-acl.h
+'''.split())
+
+liblogind_core_sources += [logind_gperf_c]
+
+logind_acl_c = files('logind-acl.c')
+if conf.get('HAVE_ACL') == 1
+ liblogind_core_sources += logind_acl_c
+endif
+
+liblogind_core = static_library(
+ 'logind-core',
+ liblogind_core_sources,
+ include_directories : includes,
+ dependencies : [libacl])
+
+loginctl_sources = files('''
+ loginctl.c
+ sysfs-show.h
+ sysfs-show.c
+'''.split())
+
+user_runtime_dir_sources = files('''
+ user-runtime-dir.c
+'''.split())
+
+if conf.get('ENABLE_LOGIND') == 1
+ logind_conf = configure_file(
+ input : 'logind.conf.in',
+ output : 'logind.conf',
+ configuration : substs)
+ install_data(logind_conf,
+ install_dir : pkgsysconfdir)
+
+ pam_systemd_sym = 'src/login/pam_systemd.sym'
+ pam_systemd_c = files('pam_systemd.c')
+
+ install_data('org.freedesktop.login1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.login1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.login1.policy',
+ install_dir : polkitpolicydir)
+
+ install_data('70-power-switch.rules', install_dir : udevrulesdir)
+
+ seat_rules = configure_file(
+ input : '71-seat.rules.in',
+ output : '71-seat.rules',
+ configuration : substs)
+ install_data(seat_rules,
+ install_dir : udevrulesdir)
+
+ custom_target(
+ '70-uaccess.rules',
+ input : '70-uaccess.rules.m4',
+ output: '70-uaccess.rules',
+ command : [meson_apply_m4, config_h, '@INPUT@'],
+ capture : true,
+ install : conf.get('HAVE_ACL') == 1,
+ install_dir : udevrulesdir)
+
+ custom_target(
+ '73-seat-late.rules',
+ input : '73-seat-late.rules.m4',
+ output: '73-seat-late.rules',
+ command : [meson_apply_m4, config_h, '@INPUT@'],
+ capture : true,
+ install : true,
+ install_dir : udevrulesdir)
+
+ custom_target(
+ 'systemd-user',
+ input : 'systemd-user.m4',
+ output: 'systemd-user',
+ command : [meson_apply_m4, config_h, '@INPUT@'],
+ capture : true,
+ install : pamconfdir != 'no',
+ install_dir : pamconfdir)
+endif
diff --git a/src/login/org.freedesktop.login1.conf b/src/login/org.freedesktop.login1.conf
new file mode 100644
index 0000000..f880f3e
--- /dev/null
+++ b/src/login/org.freedesktop.login1.conf
@@ -0,0 +1,292 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.login1"/>
+ <allow send_destination="org.freedesktop.login1"/>
+ <allow receive_sender="org.freedesktop.login1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.login1"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="GetSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="GetSessionByPID"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="GetUser"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="GetUserByPID"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="GetSeat"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ListSessions"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ListUsers"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ListSeats"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ListInhibitors"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="Inhibit"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="SetUserLinger"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ActivateSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ActivateSessionOnSeat"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="LockSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="UnlockSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="LockSessions"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="UnlockSessions"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="KillSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="KillUser"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="TerminateSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="TerminateUser"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="TerminateSeat"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="PowerOff"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="Reboot"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="Halt"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="Suspend"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="Hibernate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="HybridSleep"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="SuspendThenHibernate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanPowerOff"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanReboot"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanHalt"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanSuspend"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanHibernate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanHybridSleep"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanSuspendThenHibernate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ScheduleShutdown"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CancelScheduledShutdown"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanRebootToFirmwareSetup"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="SetRebootToFirmwareSetup"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="SetWallMessage"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="AttachDevice"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="FlushDevices"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Seat"
+ send_member="Terminate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Seat"
+ send_member="ActivateSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Seat"
+ send_member="SwitchTo"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Seat"
+ send_member="SwitchToPrevious"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Seat"
+ send_member="SwitchToNext"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="Terminate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="Activate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="Lock"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="Unlock"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="SetIdleHint"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="SetLockedHint"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="Kill"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="TakeControl"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="ReleaseControl"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="TakeDevice"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="ReleaseDevice"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="PauseDeviceComplete"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.User"
+ send_member="Terminate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.User"
+ send_member="Kill"/>
+
+ <allow receive_sender="org.freedesktop.login1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/login/org.freedesktop.login1.policy b/src/login/org.freedesktop.login1.policy
new file mode 100644
index 0000000..5ee62ab
--- /dev/null
+++ b/src/login/org.freedesktop.login1.policy
@@ -0,0 +1,361 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.login1.inhibit-block-shutdown">
+ <description gettext-domain="systemd">Allow applications to inhibit system shutdown</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system shutdown.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.inhibit-delay-shutdown org.freedesktop.login1.inhibit-block-sleep org.freedesktop.login1.inhibit-delay-sleep org.freedesktop.login1.inhibit-block-idle</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-delay-shutdown">
+ <description gettext-domain="systemd">Allow applications to delay system shutdown</description>
+ <message gettext-domain="systemd">Authentication is required for an application to delay system shutdown.</message>
+ <defaults>
+ <allow_any>yes</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.inhibit-delay-sleep</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-block-sleep">
+ <description gettext-domain="systemd">Allow applications to inhibit system sleep</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system sleep.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.inhibit-delay-sleep org.freedesktop.login1.inhibit-block-idle</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-delay-sleep">
+ <description gettext-domain="systemd">Allow applications to delay system sleep</description>
+ <message gettext-domain="systemd">Authentication is required for an application to delay system sleep.</message>
+ <defaults>
+ <allow_any>yes</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-block-idle">
+ <description gettext-domain="systemd">Allow applications to inhibit automatic system suspend</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit automatic system suspend.</message>
+ <defaults>
+ <allow_any>yes</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-handle-power-key">
+ <description gettext-domain="systemd">Allow applications to inhibit system handling of the power key</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system handling of the power key.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.inhibit-handle-suspend-key org.freedesktop.login1.inhibit-handle-hibernate-key org.freedesktop.login1.inhibit-handle-lid-switch</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-handle-suspend-key">
+ <description gettext-domain="systemd">Allow applications to inhibit system handling of the suspend key</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system handling of the suspend key.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.inhibit-handle-hibernate-key org.freedesktop.login1.inhibit-handle-lid-switch</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-handle-hibernate-key">
+ <description gettext-domain="systemd">Allow applications to inhibit system handling of the hibernate key</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system handling of the hibernate key.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-handle-lid-switch">
+ <description gettext-domain="systemd">Allow applications to inhibit system handling of the lid switch</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system handling of the lid switch.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.set-self-linger">
+ <description gettext-domain="systemd">Allow non-logged-in user to run programs</description>
+ <message gettext-domain="systemd">Explicit request is required to run programs as a non-logged-in user.</message>
+ <defaults>
+ <allow_any>yes</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.set-user-linger">
+ <description gettext-domain="systemd">Allow non-logged-in users to run programs</description>
+ <message gettext-domain="systemd">Authentication is required to run programs as a non-logged-in user.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.attach-device">
+ <description gettext-domain="systemd">Allow attaching devices to seats</description>
+ <message gettext-domain="systemd">Authentication is required for attaching a device to a seat.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.flush-devices</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.flush-devices">
+ <description gettext-domain="systemd">Flush device to seat attachments</description>
+ <message gettext-domain="systemd">Authentication is required for resetting how devices are attached to seats.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.power-off">
+ <description gettext-domain="systemd">Power off the system</description>
+ <message gettext-domain="systemd">Authentication is required for powering off the system.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.set-wall-message</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.power-off-multiple-sessions">
+ <description gettext-domain="systemd">Power off the system while other users are logged in</description>
+ <message gettext-domain="systemd">Authentication is required for powering off the system while other users are logged in.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.power-off</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.power-off-ignore-inhibit">
+ <description gettext-domain="systemd">Power off the system while an application asked to inhibit it</description>
+ <message gettext-domain="systemd">Authentication is required for powering off the system while an application asked to inhibit it.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.power-off</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.reboot">
+ <description gettext-domain="systemd">Reboot the system</description>
+ <message gettext-domain="systemd">Authentication is required for rebooting the system.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.set-wall-message</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.reboot-multiple-sessions">
+ <description gettext-domain="systemd">Reboot the system while other users are logged in</description>
+ <message gettext-domain="systemd">Authentication is required for rebooting the system while other users are logged in.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.reboot</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.reboot-ignore-inhibit">
+ <description gettext-domain="systemd">Reboot the system while an application asked to inhibit it</description>
+ <message gettext-domain="systemd">Authentication is required for rebooting the system while an application asked to inhibit it.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.reboot</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.halt">
+ <description gettext-domain="systemd">Halt the system</description>
+ <message gettext-domain="systemd">Authentication is required for halting the system.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.set-wall-message</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.halt-multiple-sessions">
+ <description gettext-domain="systemd">Halt the system while other users are logged in</description>
+ <message gettext-domain="systemd">Authentication is required for halting the system while other users are logged in.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.halt</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.halt-ignore-inhibit">
+ <description gettext-domain="systemd">Halt the system while an application asked to inhibit it</description>
+ <message gettext-domain="systemd">Authentication is required for halting the system while an application asked to inhibit it.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.halt</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.suspend">
+ <description gettext-domain="systemd">Suspend the system</description>
+ <message gettext-domain="systemd">Authentication is required for suspending the system.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.suspend-multiple-sessions">
+ <description gettext-domain="systemd">Suspend the system while other users are logged in</description>
+ <message gettext-domain="systemd">Authentication is required for suspending the system while other users are logged in.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.suspend</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.suspend-ignore-inhibit">
+ <description gettext-domain="systemd">Suspend the system while an application asked to inhibit it</description>
+ <message gettext-domain="systemd">Authentication is required for suspending the system while an application asked to inhibit it.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.suspend</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.hibernate">
+ <description gettext-domain="systemd">Hibernate the system</description>
+ <message gettext-domain="systemd">Authentication is required for hibernating the system.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.hibernate-multiple-sessions">
+ <description gettext-domain="systemd">Hibernate the system while other users are logged in</description>
+ <message gettext-domain="systemd">Authentication is required for hibernating the system while other users are logged in.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.hibernate</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.hibernate-ignore-inhibit">
+ <description gettext-domain="systemd">Hibernate the system while an application asked to inhibit it</description>
+ <message gettext-domain="systemd">Authentication is required for hibernating the system while an application asked to inhibit it.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.hibernate</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.manage">
+ <description gettext-domain="systemd">Manage active sessions, users and seats</description>
+ <message gettext-domain="systemd">Authentication is required for managing active sessions, users and seats.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.lock-sessions">
+ <description gettext-domain="systemd">Lock or unlock active sessions</description>
+ <message gettext-domain="systemd">Authentication is required to lock or unlock active sessions.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.set-reboot-to-firmware-setup">
+ <description gettext-domain="systemd">Allow indication to the firmware to boot to setup interface</description>
+ <message gettext-domain="systemd">Authentication is required to indicate to the firmware to boot to setup interface.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.reboot</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.set-wall-message">
+ <description gettext-domain="systemd">Set a wall message</description>
+ <message gettext-domain="systemd">Authentication is required to set a wall message</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/login/org.freedesktop.login1.service b/src/login/org.freedesktop.login1.service
new file mode 100644
index 0000000..68f1ed0
--- /dev/null
+++ b/src/login/org.freedesktop.login1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.login1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.login1.service
diff --git a/src/login/pam_systemd.c b/src/login/pam_systemd.c
new file mode 100644
index 0000000..997b74e
--- /dev/null
+++ b/src/login/pam_systemd.c
@@ -0,0 +1,770 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pwd.h>
+#include <security/_pam_macros.h>
+#include <security/pam_ext.h>
+#include <security/pam_misc.h>
+#include <security/pam_modules.h>
+#include <security/pam_modutil.h>
+#include <sys/file.h>
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "hostname-util.h"
+#include "login-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static int parse_argv(
+ pam_handle_t *handle,
+ int argc, const char **argv,
+ const char **class,
+ const char **type,
+ const char **desktop,
+ bool *debug) {
+
+ unsigned i;
+
+ assert(argc >= 0);
+ assert(argc == 0 || argv);
+
+ for (i = 0; i < (unsigned) argc; i++) {
+ if (startswith(argv[i], "class=")) {
+ if (class)
+ *class = argv[i] + 6;
+
+ } else if (startswith(argv[i], "type=")) {
+ if (type)
+ *type = argv[i] + 5;
+
+ } else if (startswith(argv[i], "desktop=")) {
+ if (desktop)
+ *desktop = argv[i] + 8;
+
+ } else if (streq(argv[i], "debug")) {
+ if (debug)
+ *debug = true;
+
+ } else if (startswith(argv[i], "debug=")) {
+ int k;
+
+ k = parse_boolean(argv[i] + 6);
+ if (k < 0)
+ pam_syslog(handle, LOG_WARNING, "Failed to parse debug= argument, ignoring.");
+ else if (debug)
+ *debug = k;
+
+ } else
+ pam_syslog(handle, LOG_WARNING, "Unknown parameter '%s', ignoring", argv[i]);
+ }
+
+ return 0;
+}
+
+static int get_user_data(
+ pam_handle_t *handle,
+ const char **ret_username,
+ struct passwd **ret_pw) {
+
+ const char *username = NULL;
+ struct passwd *pw = NULL;
+ int r;
+
+ assert(handle);
+ assert(ret_username);
+ assert(ret_pw);
+
+ r = pam_get_user(handle, &username, NULL);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to get user name.");
+ return r;
+ }
+
+ if (isempty(username)) {
+ pam_syslog(handle, LOG_ERR, "User name not valid.");
+ return PAM_AUTH_ERR;
+ }
+
+ pw = pam_modutil_getpwnam(handle, username);
+ if (!pw) {
+ pam_syslog(handle, LOG_ERR, "Failed to get user data.");
+ return PAM_USER_UNKNOWN;
+ }
+
+ *ret_pw = pw;
+ *ret_username = username;
+
+ return PAM_SUCCESS;
+}
+
+static int socket_from_display(const char *display, char **path) {
+ size_t k;
+ char *f, *c;
+
+ assert(display);
+ assert(path);
+
+ if (!display_is_local(display))
+ return -EINVAL;
+
+ k = strspn(display+1, "0123456789");
+
+ f = new(char, STRLEN("/tmp/.X11-unix/X") + k + 1);
+ if (!f)
+ return -ENOMEM;
+
+ c = stpcpy(f, "/tmp/.X11-unix/X");
+ memcpy(c, display+1, k);
+ c[k] = 0;
+
+ *path = f;
+
+ return 0;
+}
+
+static int get_seat_from_display(const char *display, const char **seat, uint32_t *vtnr) {
+ union sockaddr_union sa = {};
+ _cleanup_free_ char *p = NULL, *tty = NULL;
+ _cleanup_close_ int fd = -1;
+ struct ucred ucred;
+ int v, r, salen;
+
+ assert(display);
+ assert(vtnr);
+
+ /* We deduce the X11 socket from the display name, then use
+ * SO_PEERCRED to determine the X11 server process, ask for
+ * the controlling tty of that and if it's a VC then we know
+ * the seat and the virtual terminal. Sounds ugly, is only
+ * semi-ugly. */
+
+ r = socket_from_display(display, &p);
+ if (r < 0)
+ return r;
+ salen = sockaddr_un_set_path(&sa.un, p);
+ if (salen < 0)
+ return salen;
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (connect(fd, &sa.sa, salen) < 0)
+ return -errno;
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ r = get_ctty(ucred.pid, NULL, &tty);
+ if (r < 0)
+ return r;
+
+ v = vtnr_from_tty(tty);
+ if (v < 0)
+ return v;
+ else if (v == 0)
+ return -ENOENT;
+
+ if (seat)
+ *seat = "seat0";
+ *vtnr = (uint32_t) v;
+
+ return 0;
+}
+
+static int export_legacy_dbus_address(
+ pam_handle_t *handle,
+ uid_t uid,
+ const char *runtime) {
+
+ const char *s;
+ _cleanup_free_ char *t = NULL;
+ int r = PAM_BUF_ERR;
+
+ /* We need to export $DBUS_SESSION_BUS_ADDRESS because various applications will not connect
+ * correctly to the bus without it. This setting matches what dbus.socket does for the user
+ * session using 'systemctl --user set-environment'. We want to have the same configuration
+ * in processes started from the PAM session.
+ *
+ * The setting of the address is guarded by the access() check because it is also possible to compile
+ * dbus without --enable-user-session, in which case this socket is not used, and
+ * $DBUS_SESSION_BUS_ADDRESS should not be set. An alternative approach would to not do the access()
+ * check here, and let applications try on their own, by using "unix:path=%s/bus;autolaunch:". But we
+ * expect the socket to be present by the time we do this check, so we can just as well check once
+ * here. */
+
+ s = strjoina(runtime, "/bus");
+ if (access(s, F_OK) < 0)
+ return PAM_SUCCESS;
+
+ if (asprintf(&t, DEFAULT_USER_BUS_ADDRESS_FMT, runtime) < 0)
+ goto error;
+
+ r = pam_misc_setenv(handle, "DBUS_SESSION_BUS_ADDRESS", t, 0);
+ if (r != PAM_SUCCESS)
+ goto error;
+
+ return PAM_SUCCESS;
+
+error:
+ pam_syslog(handle, LOG_ERR, "Failed to set bus variable.");
+ return r;
+}
+
+static int append_session_memory_max(pam_handle_t *handle, sd_bus_message *m, const char *limit) {
+ uint64_t val;
+ int r;
+
+ if (isempty(limit))
+ return 0;
+
+ if (streq(limit, "infinity")) {
+ r = sd_bus_message_append(m, "(sv)", "MemoryMax", "t", (uint64_t)-1);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to append to bus message: %s", strerror(-r));
+ return r;
+ }
+ } else {
+ r = parse_permille(limit);
+ if (r >= 0) {
+ r = sd_bus_message_append(m, "(sv)", "MemoryMaxScale", "u", (uint32_t) (((uint64_t) r * UINT32_MAX) / 1000U));
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to append to bus message: %s", strerror(-r));
+ return r;
+ }
+ } else {
+ r = parse_size(limit, 1024, &val);
+ if (r >= 0) {
+ r = sd_bus_message_append(m, "(sv)", "MemoryMax", "t", val);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to append to bus message: %s", strerror(-r));
+ return r;
+ }
+ } else
+ pam_syslog(handle, LOG_WARNING, "Failed to parse systemd.limit: %s, ignoring.", limit);
+ }
+ }
+
+ return 0;
+}
+
+static int append_session_tasks_max(pam_handle_t *handle, sd_bus_message *m, const char *limit) {
+ uint64_t val;
+ int r;
+
+ /* No need to parse "infinity" here, it will be set unconditionally later in manager_start_scope() */
+ if (isempty(limit) || streq(limit, "infinity"))
+ return 0;
+
+ r = safe_atou64(limit, &val);
+ if (r >= 0) {
+ r = sd_bus_message_append(m, "(sv)", "TasksMax", "t", val);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to append to bus message: %s", strerror(-r));
+ return r;
+ }
+ } else
+ pam_syslog(handle, LOG_WARNING, "Failed to parse systemd.limit: %s, ignoring.", limit);
+
+ return 0;
+}
+
+static int append_session_cg_weight(pam_handle_t *handle, sd_bus_message *m, const char *limit, const char *field) {
+ uint64_t val;
+ int r;
+
+ if (isempty(limit))
+ return 0;
+
+ r = cg_weight_parse(limit, &val);
+ if (r >= 0) {
+ r = sd_bus_message_append(m, "(sv)", field, "t", val);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to append to bus message: %s", strerror(-r));
+ return r;
+ }
+ } else if (streq(field, "CPUWeight"))
+ pam_syslog(handle, LOG_WARNING, "Failed to parse systemd.cpu_weight: %s, ignoring.", limit);
+ else
+ pam_syslog(handle, LOG_WARNING, "Failed to parse systemd.io_weight: %s, ignoring.", limit);
+
+ return 0;
+}
+
+static const char* getenv_harder(pam_handle_t *handle, const char *key, const char *fallback) {
+ const char *v;
+
+ assert(handle);
+ assert(key);
+
+ /* Looks for an environment variable, preferrably in the environment block associated with the specified PAM
+ * handle, falling back to the process' block instead. */
+
+ v = pam_getenv(handle, key);
+ if (!isempty(v))
+ return v;
+
+ v = getenv(key);
+ if (!isempty(v))
+ return v;
+
+ return fallback;
+}
+
+static int update_environment(pam_handle_t *handle, const char *key, const char *value) {
+ int r;
+
+ assert(handle);
+ assert(key);
+
+ /* Updates the environment, but only if there's actually a value set. Also, log about errors */
+
+ if (isempty(value))
+ return PAM_SUCCESS;
+
+ r = pam_misc_setenv(handle, key, value, 0);
+ if (r != PAM_SUCCESS)
+ pam_syslog(handle, LOG_ERR, "Failed to set environment variable %s.", key);
+
+ return r;
+}
+
+static bool validate_runtime_directory(pam_handle_t *handle, const char *path, uid_t uid) {
+ struct stat st;
+
+ assert(path);
+
+ /* Just some extra paranoia: let's not set $XDG_RUNTIME_DIR if the directory we'd set it to isn't actually set
+ * up properly for us. */
+
+ if (lstat(path, &st) < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to stat() runtime directory '%s': %s", path, strerror(errno));
+ goto fail;
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ pam_syslog(handle, LOG_ERR, "Runtime directory '%s' is not actually a directory.", path);
+ goto fail;
+ }
+
+ if (st.st_uid != uid) {
+ pam_syslog(handle, LOG_ERR, "Runtime directory '%s' is not owned by UID " UID_FMT ", as it should.", path, uid);
+ goto fail;
+ }
+
+ return true;
+
+fail:
+ pam_syslog(handle, LOG_WARNING, "Not setting $XDG_RUNTIME_DIR, as the directory is not in order.");
+ return false;
+}
+
+_public_ PAM_EXTERN int pam_sm_open_session(
+ pam_handle_t *handle,
+ int flags,
+ int argc, const char **argv) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ const char
+ *username, *id, *object_path, *runtime_path,
+ *service = NULL,
+ *tty = NULL, *display = NULL,
+ *remote_user = NULL, *remote_host = NULL,
+ *seat = NULL,
+ *type = NULL, *class = NULL,
+ *class_pam = NULL, *type_pam = NULL, *cvtnr = NULL, *desktop = NULL, *desktop_pam = NULL,
+ *memory_max = NULL, *tasks_max = NULL, *cpu_weight = NULL, *io_weight = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int session_fd = -1, existing, r;
+ bool debug = false, remote;
+ struct passwd *pw;
+ uint32_t vtnr = 0;
+ uid_t original_uid;
+
+ assert(handle);
+
+ /* Make this a NOP on non-logind systems */
+ if (!logind_running())
+ return PAM_SUCCESS;
+
+ if (parse_argv(handle,
+ argc, argv,
+ &class_pam,
+ &type_pam,
+ &desktop_pam,
+ &debug) < 0)
+ return PAM_SESSION_ERR;
+
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "pam-systemd initializing");
+
+ r = get_user_data(handle, &username, &pw);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to get user data.");
+ return r;
+ }
+
+ /* Make sure we don't enter a loop by talking to
+ * systemd-logind when it is actually waiting for the
+ * background to finish start-up. If the service is
+ * "systemd-user" we simply set XDG_RUNTIME_DIR and
+ * leave. */
+
+ pam_get_item(handle, PAM_SERVICE, (const void**) &service);
+ if (streq_ptr(service, "systemd-user")) {
+ char rt[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
+
+ xsprintf(rt, "/run/user/"UID_FMT, pw->pw_uid);
+ if (validate_runtime_directory(handle, rt, pw->pw_uid)) {
+ r = pam_misc_setenv(handle, "XDG_RUNTIME_DIR", rt, 0);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to set runtime dir.");
+ return r;
+ }
+ }
+
+ r = export_legacy_dbus_address(handle, pw->pw_uid, rt);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ return PAM_SUCCESS;
+ }
+
+ /* Otherwise, we ask logind to create a session for us */
+
+ pam_get_item(handle, PAM_XDISPLAY, (const void**) &display);
+ pam_get_item(handle, PAM_TTY, (const void**) &tty);
+ pam_get_item(handle, PAM_RUSER, (const void**) &remote_user);
+ pam_get_item(handle, PAM_RHOST, (const void**) &remote_host);
+
+ seat = getenv_harder(handle, "XDG_SEAT", NULL);
+ cvtnr = getenv_harder(handle, "XDG_VTNR", NULL);
+ type = getenv_harder(handle, "XDG_SESSION_TYPE", type_pam);
+ class = getenv_harder(handle, "XDG_SESSION_CLASS", class_pam);
+ desktop = getenv_harder(handle, "XDG_SESSION_DESKTOP", desktop_pam);
+
+ tty = strempty(tty);
+
+ if (strchr(tty, ':')) {
+ /* A tty with a colon is usually an X11 display, placed there to show up in utmp. We rearrange things
+ * and don't pretend that an X display was a tty. */
+ if (isempty(display))
+ display = tty;
+ tty = NULL;
+
+ } else if (streq(tty, "cron")) {
+ /* cron is setting PAM_TTY to "cron" for some reason (the commit carries no information why, but
+ * probably because it wants to set it to something as pam_time/pam_access/… require PAM_TTY to be set
+ * (as they otherwise even try to update it!) — but cron doesn't actually allocate a TTY for its forked
+ * off processes.) */
+ type = "unspecified";
+ class = "background";
+ tty = NULL;
+
+ } else if (streq(tty, "ssh")) {
+ /* ssh has been setting PAM_TTY to "ssh" (for the same reason as cron does this, see above. For further
+ * details look for "PAM_TTY_KLUDGE" in the openssh sources). */
+ type ="tty";
+ class = "user";
+ tty = NULL; /* This one is particularly sad, as this means that ssh sessions — even though usually
+ * associated with a pty — won't be tracked by their tty in logind. This is because ssh
+ * does the PAM session registration early for new connections, and registers a pty only
+ * much later (this is because it doesn't know yet if it needs one at all, as whether to
+ * register a pty or not is negotiated much later in the protocol). */
+
+ } else
+ /* Chop off leading /dev prefix that some clients specify, but others do not. */
+ tty = skip_dev_prefix(tty);
+
+ /* If this fails vtnr will be 0, that's intended */
+ if (!isempty(cvtnr))
+ (void) safe_atou32(cvtnr, &vtnr);
+
+ if (!isempty(display) && !vtnr) {
+ if (isempty(seat))
+ (void) get_seat_from_display(display, &seat, &vtnr);
+ else if (streq(seat, "seat0"))
+ (void) get_seat_from_display(display, NULL, &vtnr);
+ }
+
+ if (seat && !streq(seat, "seat0") && vtnr != 0) {
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "Ignoring vtnr %"PRIu32" for %s which is not seat0", vtnr, seat);
+ vtnr = 0;
+ }
+
+ if (isempty(type))
+ type = !isempty(display) ? "x11" :
+ !isempty(tty) ? "tty" : "unspecified";
+
+ if (isempty(class))
+ class = streq(type, "unspecified") ? "background" : "user";
+
+ remote = !isempty(remote_host) && !is_localhost(remote_host);
+
+ (void) pam_get_data(handle, "systemd.memory_max", (const void **)&memory_max);
+ (void) pam_get_data(handle, "systemd.tasks_max", (const void **)&tasks_max);
+ (void) pam_get_data(handle, "systemd.cpu_weight", (const void **)&cpu_weight);
+ (void) pam_get_data(handle, "systemd.io_weight", (const void **)&io_weight);
+
+ /* Talk to logind over the message bus */
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to connect to system bus: %s", strerror(-r));
+ return PAM_SESSION_ERR;
+ }
+
+ if (debug) {
+ pam_syslog(handle, LOG_DEBUG, "Asking logind to create session: "
+ "uid="UID_FMT" pid="PID_FMT" service=%s type=%s class=%s desktop=%s seat=%s vtnr=%"PRIu32" tty=%s display=%s remote=%s remote_user=%s remote_host=%s",
+ pw->pw_uid, getpid_cached(),
+ strempty(service),
+ type, class, strempty(desktop),
+ strempty(seat), vtnr, strempty(tty), strempty(display),
+ yes_no(remote), strempty(remote_user), strempty(remote_host));
+ pam_syslog(handle, LOG_DEBUG, "Session limits: "
+ "memory_max=%s tasks_max=%s cpu_weight=%s io_weight=%s",
+ strna(memory_max), strna(tasks_max), strna(cpu_weight), strna(io_weight));
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "CreateSession");
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to create CreateSession method call: %s", strerror(-r));
+ return PAM_SESSION_ERR;
+ }
+
+ r = sd_bus_message_append(m, "uusssssussbss",
+ (uint32_t) pw->pw_uid,
+ 0,
+ service,
+ type,
+ class,
+ desktop,
+ seat,
+ vtnr,
+ tty,
+ display,
+ remote,
+ remote_user,
+ remote_host);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to append to bus message: %s", strerror(-r));
+ return PAM_SESSION_ERR;
+ }
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to open message container: %s", strerror(-r));
+ return PAM_SYSTEM_ERR;
+ }
+
+ r = append_session_memory_max(handle, m, memory_max);
+ if (r < 0)
+ return PAM_SESSION_ERR;
+
+ r = append_session_tasks_max(handle, m, tasks_max);
+ if (r < 0)
+ return PAM_SESSION_ERR;
+
+ r = append_session_cg_weight(handle, m, cpu_weight, "CPUWeight");
+ if (r < 0)
+ return PAM_SESSION_ERR;
+
+ r = append_session_cg_weight(handle, m, io_weight, "IOWeight");
+ if (r < 0)
+ return PAM_SESSION_ERR;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to close message container: %s", strerror(-r));
+ return PAM_SYSTEM_ERR;
+ }
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_SESSION_BUSY)) {
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "Not creating session: %s", bus_error_message(&error, r));
+ return PAM_SUCCESS;
+ } else {
+ pam_syslog(handle, LOG_ERR, "Failed to create session: %s", bus_error_message(&error, r));
+ return PAM_SYSTEM_ERR;
+ }
+ }
+
+ r = sd_bus_message_read(reply,
+ "soshusub",
+ &id,
+ &object_path,
+ &runtime_path,
+ &session_fd,
+ &original_uid,
+ &seat,
+ &vtnr,
+ &existing);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to parse message: %s", strerror(-r));
+ return PAM_SESSION_ERR;
+ }
+
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "Reply from logind: "
+ "id=%s object_path=%s runtime_path=%s session_fd=%d seat=%s vtnr=%u original_uid=%u",
+ id, object_path, runtime_path, session_fd, seat, vtnr, original_uid);
+
+ r = update_environment(handle, "XDG_SESSION_ID", id);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ if (original_uid == pw->pw_uid) {
+ /* Don't set $XDG_RUNTIME_DIR if the user we now
+ * authenticated for does not match the original user
+ * of the session. We do this in order not to result
+ * in privileged apps clobbering the runtime directory
+ * unnecessarily. */
+
+ if (validate_runtime_directory(handle, runtime_path, pw->pw_uid)) {
+ r = update_environment(handle, "XDG_RUNTIME_DIR", runtime_path);
+ if (r != PAM_SUCCESS)
+ return r;
+ }
+
+ r = export_legacy_dbus_address(handle, pw->pw_uid, runtime_path);
+ if (r != PAM_SUCCESS)
+ return r;
+ }
+
+ /* Most likely we got the session/type/class from environment variables, but might have gotten the data
+ * somewhere else (for example PAM module parameters). Let's now update the environment variables, so that this
+ * data is inherited into the session processes, and programs can rely on them to be initialized. */
+
+ r = update_environment(handle, "XDG_SESSION_TYPE", type);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = update_environment(handle, "XDG_SESSION_CLASS", class);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = update_environment(handle, "XDG_SESSION_DESKTOP", desktop);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = update_environment(handle, "XDG_SEAT", seat);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ if (vtnr > 0) {
+ char buf[DECIMAL_STR_MAX(vtnr)];
+ sprintf(buf, "%u", vtnr);
+
+ r = update_environment(handle, "XDG_VTNR", buf);
+ if (r != PAM_SUCCESS)
+ return r;
+ }
+
+ r = pam_set_data(handle, "systemd.existing", INT_TO_PTR(!!existing), NULL);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to install existing flag.");
+ return r;
+ }
+
+ if (session_fd >= 0) {
+ session_fd = fcntl(session_fd, F_DUPFD_CLOEXEC, 3);
+ if (session_fd < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to dup session fd: %m");
+ return PAM_SESSION_ERR;
+ }
+
+ r = pam_set_data(handle, "systemd.session-fd", FD_TO_PTR(session_fd), NULL);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to install session fd.");
+ safe_close(session_fd);
+ return r;
+ }
+ }
+
+ return PAM_SUCCESS;
+}
+
+_public_ PAM_EXTERN int pam_sm_close_session(
+ pam_handle_t *handle,
+ int flags,
+ int argc, const char **argv) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ const void *existing = NULL;
+ const char *id;
+ int r;
+
+ assert(handle);
+
+ /* Only release session if it wasn't pre-existing when we
+ * tried to create it */
+ (void) pam_get_data(handle, "systemd.existing", &existing);
+
+ id = pam_getenv(handle, "XDG_SESSION_ID");
+ if (id && !existing) {
+
+ /* Before we go and close the FIFO we need to tell
+ * logind that this is a clean session shutdown, so
+ * that it doesn't just go and slaughter us
+ * immediately after closing the fd */
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to connect to system bus: %s", strerror(-r));
+ return PAM_SESSION_ERR;
+ }
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "ReleaseSession",
+ &error,
+ NULL,
+ "s",
+ id);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to release session: %s", bus_error_message(&error, r));
+ return PAM_SESSION_ERR;
+ }
+ }
+
+ /* Note that we are knowingly leaking the FIFO fd here. This
+ * way, logind can watch us die. If we closed it here it would
+ * not have any clue when that is completed. Given that one
+ * cannot really have multiple PAM sessions open from the same
+ * process this means we will leak one FD at max. */
+
+ return PAM_SUCCESS;
+}
diff --git a/src/login/pam_systemd.sym b/src/login/pam_systemd.sym
new file mode 100644
index 0000000..62d5d26
--- /dev/null
+++ b/src/login/pam_systemd.sym
@@ -0,0 +1,15 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1+
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+{
+global:
+ pam_sm_close_session;
+ pam_sm_open_session;
+local: *;
+};
diff --git a/src/login/sysfs-show.c b/src/login/sysfs-show.c
new file mode 100644
index 0000000..a41d83b
--- /dev/null
+++ b/src/login/sysfs-show.c
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-enumerator-private.h"
+#include "locale-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "sysfs-show.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static int show_sysfs_one(
+ const char *seat,
+ sd_device **dev_list,
+ size_t *i_dev,
+ size_t n_dev,
+ const char *sub,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+
+ size_t max_width;
+ int r;
+
+ assert(seat);
+ assert(dev_list);
+ assert(i_dev);
+ assert(prefix);
+
+ if (flags & OUTPUT_FULL_WIDTH)
+ max_width = (size_t) -1;
+ else if (n_columns < 10)
+ max_width = 10;
+ else
+ max_width = n_columns;
+
+ while (*i_dev < n_dev) {
+ const char *sysfs, *sn, *name = NULL, *subsystem, *sysname;
+ _cleanup_free_ char *k = NULL, *l = NULL;
+ size_t lookahead;
+ bool is_master;
+
+ if (sd_device_get_syspath(dev_list[*i_dev], &sysfs) < 0 ||
+ !path_startswith(sysfs, sub))
+ return 0;
+
+ if (sd_device_get_property_value(dev_list[*i_dev], "ID_SEAT", &sn) < 0 || isempty(sn))
+ sn = "seat0";
+
+ /* Explicitly also check for tag 'seat' here */
+ if (!streq(seat, sn) ||
+ sd_device_has_tag(dev_list[*i_dev], "seat") <= 0 ||
+ sd_device_get_subsystem(dev_list[*i_dev], &subsystem) < 0 ||
+ sd_device_get_sysname(dev_list[*i_dev], &sysname) < 0) {
+ (*i_dev)++;
+ continue;
+ }
+
+ is_master = sd_device_has_tag(dev_list[*i_dev], "master-of-seat") > 0;
+
+ if (sd_device_get_sysattr_value(dev_list[*i_dev], "name", &name) < 0)
+ (void) sd_device_get_sysattr_value(dev_list[*i_dev], "id", &name);
+
+ /* Look if there's more coming after this */
+ for (lookahead = *i_dev + 1; lookahead < n_dev; lookahead++) {
+ const char *lookahead_sysfs;
+
+ if (sd_device_get_syspath(dev_list[lookahead], &lookahead_sysfs) < 0)
+ continue;
+
+ if (path_startswith(lookahead_sysfs, sub) &&
+ !path_startswith(lookahead_sysfs, sysfs)) {
+ const char *lookahead_sn;
+
+ if (sd_device_get_property_value(dev_list[lookahead], "ID_SEAT", &lookahead_sn) < 0 ||
+ isempty(lookahead_sn))
+ lookahead_sn = "seat0";
+
+ if (streq(seat, lookahead_sn) && sd_device_has_tag(dev_list[lookahead], "seat") > 0)
+ break;
+ }
+ }
+
+ k = ellipsize(sysfs, max_width, 20);
+ if (!k)
+ return -ENOMEM;
+
+ printf("%s%s%s\n", prefix, special_glyph(lookahead < n_dev ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT), k);
+
+ if (asprintf(&l,
+ "%s%s:%s%s%s%s",
+ is_master ? "[MASTER] " : "",
+ subsystem, sysname,
+ name ? " \"" : "", strempty(name), name ? "\"" : "") < 0)
+ return -ENOMEM;
+
+ free(k);
+ k = ellipsize(l, max_width, 70);
+ if (!k)
+ return -ENOMEM;
+
+ printf("%s%s%s\n", prefix, lookahead < n_dev ? special_glyph(SPECIAL_GLYPH_TREE_VERTICAL) : " ", k);
+
+ if (++(*i_dev) < n_dev) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strappend(prefix, lookahead < n_dev ? special_glyph(SPECIAL_GLYPH_TREE_VERTICAL) : " ");
+ if (!p)
+ return -ENOMEM;
+
+ r = show_sysfs_one(seat, dev_list, i_dev, n_dev, sysfs, p,
+ n_columns == (unsigned) -1 || n_columns < 2 ? n_columns : n_columns - 2,
+ flags);
+ if (r < 0)
+ return r;
+ }
+
+ }
+
+ return 0;
+}
+
+int show_sysfs(const char *seat, const char *prefix, unsigned n_columns, OutputFlags flags) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ size_t n_dev = 0, i = 0;
+ sd_device **dev_list;
+ int r;
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ if (isempty(seat))
+ seat = "seat0";
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_tag(e, streq(seat, "seat0") ? "seat" : seat);
+ if (r < 0)
+ return r;
+
+ r = device_enumerator_scan_devices(e);
+ if (r < 0)
+ return r;
+
+ dev_list = device_enumerator_get_devices(e, &n_dev);
+
+ if (dev_list && n_dev > 0)
+ show_sysfs_one(seat, dev_list, &i, n_dev, "/", prefix, n_columns, flags);
+ else
+ printf("%s%s%s\n", prefix, special_glyph(SPECIAL_GLYPH_TREE_RIGHT), "(none)");
+
+ return 0;
+}
diff --git a/src/login/sysfs-show.h b/src/login/sysfs-show.h
new file mode 100644
index 0000000..c05b977
--- /dev/null
+++ b/src/login/sysfs-show.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+#include "output-mode.h"
+
+int show_sysfs(const char *seat, const char *prefix, unsigned columns, OutputFlags flags);
diff --git a/src/login/systemd-user.m4 b/src/login/systemd-user.m4
new file mode 100644
index 0000000..4f85b4b
--- /dev/null
+++ b/src/login/systemd-user.m4
@@ -0,0 +1,12 @@
+# This file is part of systemd.
+#
+# Used by systemd --user instances.
+
+account required pam_unix.so
+m4_ifdef(`HAVE_SELINUX',
+session required pam_selinux.so close
+session required pam_selinux.so nottys open
+)m4_dnl
+session required pam_loginuid.so
+session optional pam_keyinit.so force revoke
+session optional pam_systemd.so
diff --git a/src/login/test-inhibit.c b/src/login/test-inhibit.c
new file mode 100644
index 0000000..a891b0a
--- /dev/null
+++ b/src/login/test-inhibit.c
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "bus-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "util.h"
+
+static int inhibit(sd_bus *bus, const char *what) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *who = "Test Tool", *reason = "Just because!", *mode = "block";
+ int fd;
+ int r;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "Inhibit",
+ &error,
+ &reply,
+ "ssss", what, who, reason, mode);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read_basic(reply, SD_BUS_TYPE_UNIX_FD, &fd);
+ assert_se(r >= 0);
+ assert_se(fd >= 0);
+
+ return dup(fd);
+}
+
+static void print_inhibitors(sd_bus *bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *what, *who, *why, *mode;
+ uint32_t uid, pid;
+ unsigned n = 0;
+ int r;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "ListInhibitors",
+ &error,
+ &reply,
+ "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssuu)");
+ assert_se(r >= 0);
+
+ while ((r = sd_bus_message_read(reply, "(ssssuu)", &what, &who, &why, &mode, &uid, &pid)) > 0) {
+ printf("what=<%s> who=<%s> why=<%s> mode=<%s> uid=<%"PRIu32"> pid=<%"PRIu32">\n",
+ what, who, why, mode, uid, pid);
+
+ n++;
+ }
+ assert_se(r >= 0);
+
+ printf("%u inhibitors\n", n);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ int fd1, fd2;
+ int r;
+
+ r = sd_bus_open_system(&bus);
+ assert_se(r >= 0);
+
+ print_inhibitors(bus);
+
+ fd1 = inhibit(bus, "sleep");
+ assert_se(fd1 >= 0);
+ print_inhibitors(bus);
+
+ fd2 = inhibit(bus, "idle:shutdown");
+ assert_se(fd2 >= 0);
+ print_inhibitors(bus);
+
+ safe_close(fd1);
+ sleep(1);
+ print_inhibitors(bus);
+
+ safe_close(fd2);
+ sleep(1);
+ print_inhibitors(bus);
+
+ return 0;
+}
diff --git a/src/login/test-login-shared.c b/src/login/test-login-shared.c
new file mode 100644
index 0000000..02ff57a
--- /dev/null
+++ b/src/login/test-login-shared.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "login-util.h"
+#include "macro.h"
+
+static void test_session_id_valid(void) {
+ assert_se(session_id_valid("c1"));
+ assert_se(session_id_valid("1234"));
+
+ assert_se(!session_id_valid("1-2"));
+ assert_se(!session_id_valid(""));
+ assert_se(!session_id_valid("\tid"));
+}
+
+int main(int argc, char* argv[]) {
+ log_parse_environment();
+ log_open();
+
+ test_session_id_valid();
+
+ return 0;
+}
diff --git a/src/login/test-login-tables.c b/src/login/test-login-tables.c
new file mode 100644
index 0000000..02b8719
--- /dev/null
+++ b/src/login/test-login-tables.c
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "logind-action.h"
+#include "logind-session.h"
+#include "test-tables.h"
+
+int main(int argc, char **argv) {
+ test_table(handle_action, HANDLE_ACTION);
+ test_table(inhibit_mode, INHIBIT_MODE);
+ test_table(kill_who, KILL_WHO);
+ test_table(session_class, SESSION_CLASS);
+ test_table(session_state, SESSION_STATE);
+ test_table(session_type, SESSION_TYPE);
+ test_table(user_state, USER_STATE);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/login/user-runtime-dir.c b/src/login/user-runtime-dir.c
new file mode 100644
index 0000000..eb66e18
--- /dev/null
+++ b/src/login/user-runtime-dir.c
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdint.h>
+#include <sys/mount.h>
+
+#include "sd-bus.h"
+
+#include "bus-error.h"
+#include "fs-util.h"
+#include "label.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int acquire_runtime_dir_size(uint64_t *ret) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_get_property_trivial(bus, "org.freedesktop.login1", "/org/freedesktop/login1", "org.freedesktop.login1.Manager", "RuntimeDirectorySize", &error, 't', ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire runtime directory size: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int user_mkdir_runtime_path(
+ const char *runtime_path,
+ uid_t uid,
+ gid_t gid,
+ uint64_t runtime_dir_size) {
+
+ int r;
+
+ assert(runtime_path);
+ assert(path_is_absolute(runtime_path));
+ assert(uid_is_valid(uid));
+ assert(gid_is_valid(gid));
+
+ r = mkdir_safe_label("/run/user", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/user: %m");
+
+ if (path_is_mount_point(runtime_path, NULL, 0) >= 0)
+ log_debug("%s is already a mount point", runtime_path);
+ else {
+ char options[sizeof("mode=0700,uid=,gid=,size=,smackfsroot=*")
+ + DECIMAL_STR_MAX(uid_t)
+ + DECIMAL_STR_MAX(gid_t)
+ + DECIMAL_STR_MAX(uint64_t)];
+
+ xsprintf(options,
+ "mode=0700,uid=" UID_FMT ",gid=" GID_FMT ",size=%" PRIu64 "%s",
+ uid, gid, runtime_dir_size,
+ mac_smack_use() ? ",smackfsroot=*" : "");
+
+ (void) mkdir_label(runtime_path, 0700);
+
+ r = mount("tmpfs", runtime_path, "tmpfs", MS_NODEV|MS_NOSUID, options);
+ if (r < 0) {
+ if (!IN_SET(errno, EPERM, EACCES)) {
+ r = log_error_errno(errno, "Failed to mount per-user tmpfs directory %s: %m", runtime_path);
+ goto fail;
+ }
+
+ log_debug_errno(errno, "Failed to mount per-user tmpfs directory %s.\n"
+ "Assuming containerized execution, ignoring: %m", runtime_path);
+
+ r = chmod_and_chown(runtime_path, 0700, uid, gid);
+ if (r < 0) {
+ log_error_errno(r, "Failed to change ownership and mode of \"%s\": %m", runtime_path);
+ goto fail;
+ }
+ }
+
+ r = label_fix(runtime_path, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to fix label of \"%s\", ignoring: %m", runtime_path);
+ }
+
+ return 0;
+
+fail:
+ /* Try to clean up, but ignore errors */
+ (void) rmdir(runtime_path);
+ return r;
+}
+
+static int user_remove_runtime_path(const char *runtime_path) {
+ int r;
+
+ assert(runtime_path);
+ assert(path_is_absolute(runtime_path));
+
+ r = rm_rf(runtime_path, 0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to remove runtime directory %s (before unmounting), ignoring: %m", runtime_path);
+
+ /* Ignore cases where the directory isn't mounted, as that's quite possible, if we lacked the permissions to
+ * mount something */
+ r = umount2(runtime_path, MNT_DETACH);
+ if (r < 0 && !IN_SET(errno, EINVAL, ENOENT))
+ log_debug_errno(errno, "Failed to unmount user runtime directory %s, ignoring: %m", runtime_path);
+
+ r = rm_rf(runtime_path, REMOVE_ROOT);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to remove runtime directory %s (after unmounting): %m", runtime_path);
+
+ return 0;
+}
+
+static int do_mount(const char *user) {
+ char runtime_path[sizeof("/run/user") + DECIMAL_STR_MAX(uid_t)];
+ uint64_t runtime_dir_size;
+ uid_t uid;
+ gid_t gid;
+ int r;
+
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r,
+ r == -ESRCH ? "No such user \"%s\"" :
+ r == -ENOMSG ? "UID \"%s\" is invalid or has an invalid main group"
+ : "Failed to look up user \"%s\": %m",
+ user);
+
+ r = acquire_runtime_dir_size(&runtime_dir_size);
+ if (r < 0)
+ return r;
+
+ xsprintf(runtime_path, "/run/user/" UID_FMT, uid);
+
+ log_debug("Will mount %s owned by "UID_FMT":"GID_FMT, runtime_path, uid, gid);
+ return user_mkdir_runtime_path(runtime_path, uid, gid, runtime_dir_size);
+}
+
+static int do_umount(const char *user) {
+ char runtime_path[sizeof("/run/user") + DECIMAL_STR_MAX(uid_t)];
+ uid_t uid;
+ int r;
+
+ /* The user may be already removed. So, first try to parse the string by parse_uid(),
+ * and if it fails, fallback to get_user_creds().*/
+ if (parse_uid(user, &uid) < 0) {
+ r = get_user_creds(&user, &uid, NULL, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r,
+ r == -ESRCH ? "No such user \"%s\"" :
+ r == -ENOMSG ? "UID \"%s\" is invalid or has an invalid main group"
+ : "Failed to look up user \"%s\": %m",
+ user);
+ }
+
+ xsprintf(runtime_path, "/run/user/" UID_FMT, uid);
+
+ log_debug("Will remove %s", runtime_path);
+ return user_remove_runtime_path(runtime_path);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ if (argc != 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program takes two arguments.");
+ if (!STR_IN_SET(argv[1], "start", "stop"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "First argument must be either \"start\" or \"stop\".");
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return log_error_errno(r, "Could not initialize labelling: %m\n");
+
+ umask(0022);
+
+ if (streq(argv[1], "start"))
+ return do_mount(argv[2]);
+ if (streq(argv[1], "stop"))
+ return do_umount(argv[2]);
+ assert_not_reached("Unknown verb!");
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/machine-id-setup/machine-id-setup-main.c b/src/machine-id-setup/machine-id-setup-main.c
new file mode 100644
index 0000000..1b575d7
--- /dev/null
+++ b/src/machine-id-setup/machine-id-setup-main.c
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "id128-util.h"
+#include "log.h"
+#include "machine-id-setup.h"
+#include "main-func.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "util.h"
+
+static char *arg_root = NULL;
+static bool arg_commit = false;
+static bool arg_print = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-machine-id-setup", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Initialize /etc/machine-id from a random source.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --root=ROOT Filesystem root\n"
+ " --commit Commit transient ID\n"
+ " --print Print used machine ID\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_ROOT,
+ ARG_COMMIT,
+ ARG_PRINT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "commit", no_argument, NULL, ARG_COMMIT },
+ { "print", no_argument, NULL, ARG_PRINT },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hqcv", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, true, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_COMMIT:
+ arg_commit = true;
+ break;
+
+ case ARG_PRINT:
+ arg_print = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Extraneous arguments");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ char buf[SD_ID128_STRING_MAX];
+ sd_id128_t id;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_commit) {
+ const char *etc_machine_id;
+
+ r = machine_id_commit(arg_root);
+ if (r < 0)
+ return r;
+
+ etc_machine_id = prefix_roota(arg_root, "/etc/machine-id");
+ r = id128_read(etc_machine_id, ID128_PLAIN, &id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read machine ID back: %m");
+ } else {
+ r = machine_id_setup(arg_root, SD_ID128_NULL, &id);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_print)
+ puts(sd_id128_to_string(id, buf));
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/machine/image-dbus.c b/src/machine/image-dbus.c
new file mode 100644
index 0000000..7e7f0d5
--- /dev/null
+++ b/src/machine/image-dbus.c
@@ -0,0 +1,498 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/file.h>
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "bus-label.h"
+#include "bus-util.h"
+#include "copy.h"
+#include "dissect-image.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "image-dbus.h"
+#include "io-util.h"
+#include "loop-util.h"
+#include "machine-image.h"
+#include "missing_capability.h"
+#include "mount-util.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "strv.h"
+#include "user-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, image_type, ImageType);
+
+int bus_image_method_remove(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
+ Image *image = userdata;
+ Manager *m = image->userdata;
+ pid_t child;
+ int r;
+
+ assert(message);
+ assert(image);
+
+ if (m->n_operations >= OPERATIONS_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing operations.");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
+
+ r = safe_fork("(sd-imgrm)", FORK_RESET_SIGNALS, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+ r = image_remove(image);
+ if (r < 0) {
+ (void) write(errno_pipe_fd[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ r = operation_new(m, NULL, child, message, errno_pipe_fd[0], NULL);
+ if (r < 0) {
+ (void) sigkill_wait(child);
+ return r;
+ }
+
+ errno_pipe_fd[0] = -1;
+
+ return 1;
+}
+
+int bus_image_method_rename(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ Manager *m = image->userdata;
+ const char *new_name;
+ int r;
+
+ assert(message);
+ assert(image);
+
+ r = sd_bus_message_read(message, "s", &new_name);
+ if (r < 0)
+ return r;
+
+ if (!image_name_is_valid(new_name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image name '%s' is invalid.", new_name);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = image_rename(image, new_name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_image_method_clone(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
+ Image *image = userdata;
+ Manager *m = image->userdata;
+ const char *new_name;
+ int r, read_only;
+ pid_t child;
+
+ assert(message);
+ assert(image);
+ assert(m);
+
+ if (m->n_operations >= OPERATIONS_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing operations.");
+
+ r = sd_bus_message_read(message, "sb", &new_name, &read_only);
+ if (r < 0)
+ return r;
+
+ if (!image_name_is_valid(new_name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image name '%s' is invalid.", new_name);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
+
+ r = safe_fork("(sd-imgclone)", FORK_RESET_SIGNALS, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+ r = image_clone(image, new_name, read_only);
+ if (r < 0) {
+ (void) write(errno_pipe_fd[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ r = operation_new(m, NULL, child, message, errno_pipe_fd[0], NULL);
+ if (r < 0) {
+ (void) sigkill_wait(child);
+ return r;
+ }
+
+ errno_pipe_fd[0] = -1;
+
+ return 1;
+}
+
+int bus_image_method_mark_read_only(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ Manager *m = image->userdata;
+ int r, read_only;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "b", &read_only);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = image_read_only(image, read_only);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_image_method_set_limit(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ Manager *m = image->userdata;
+ uint64_t limit;
+ int r;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "t", &limit);
+ if (r < 0)
+ return r;
+ if (!FILE_SIZE_VALID_OR_INFINITY(limit))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New limit out of range");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = image_set_limit(image, limit);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_image_method_get_hostname(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ int r;
+
+ if (!image->metadata_valid) {
+ r = image_read_metadata(image);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read image metadata: %m");
+ }
+
+ return sd_bus_reply_method_return(message, "s", image->hostname);
+}
+
+int bus_image_method_get_machine_id(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Image *image = userdata;
+ int r;
+
+ if (!image->metadata_valid) {
+ r = image_read_metadata(image);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read image metadata: %m");
+ }
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ if (sd_id128_is_null(image->machine_id)) /* Add an empty array if the ID is zero */
+ r = sd_bus_message_append(reply, "ay", 0);
+ else
+ r = sd_bus_message_append_array(reply, 'y', image->machine_id.bytes, 16);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+int bus_image_method_get_machine_info(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ int r;
+
+ if (!image->metadata_valid) {
+ r = image_read_metadata(image);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read image metadata: %m");
+ }
+
+ return bus_reply_pair_array(message, image->machine_info);
+}
+
+int bus_image_method_get_os_release(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ int r;
+
+ if (!image->metadata_valid) {
+ r = image_read_metadata(image);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read image metadata: %m");
+ }
+
+ return bus_reply_pair_array(message, image->os_release);
+}
+
+const sd_bus_vtable image_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Name", "s", NULL, offsetof(Image, name), 0),
+ SD_BUS_PROPERTY("Path", "s", NULL, offsetof(Image, path), 0),
+ SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Image, type), 0),
+ SD_BUS_PROPERTY("ReadOnly", "b", bus_property_get_bool, offsetof(Image, read_only), 0),
+ SD_BUS_PROPERTY("CreationTimestamp", "t", NULL, offsetof(Image, crtime), 0),
+ SD_BUS_PROPERTY("ModificationTimestamp", "t", NULL, offsetof(Image, mtime), 0),
+ SD_BUS_PROPERTY("Usage", "t", NULL, offsetof(Image, usage), 0),
+ SD_BUS_PROPERTY("Limit", "t", NULL, offsetof(Image, limit), 0),
+ SD_BUS_PROPERTY("UsageExclusive", "t", NULL, offsetof(Image, usage_exclusive), 0),
+ SD_BUS_PROPERTY("LimitExclusive", "t", NULL, offsetof(Image, limit_exclusive), 0),
+ SD_BUS_METHOD("Remove", NULL, NULL, bus_image_method_remove, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Rename", "s", NULL, bus_image_method_rename, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Clone", "sb", NULL, bus_image_method_clone, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("MarkReadOnly", "b", NULL, bus_image_method_mark_read_only, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLimit", "t", NULL, bus_image_method_set_limit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetHostname", NULL, "s", bus_image_method_get_hostname, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetMachineID", NULL, "ay", bus_image_method_get_machine_id, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetMachineInfo", NULL, "a{ss}", bus_image_method_get_machine_info, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetOSRelease", NULL, "a{ss}", bus_image_method_get_os_release, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END
+};
+
+static int image_flush_cache(sd_event_source *s, void *userdata) {
+ Manager *m = userdata;
+
+ assert(s);
+ assert(m);
+
+ hashmap_clear(m->image_cache);
+ return 0;
+}
+
+int image_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ _cleanup_free_ char *e = NULL;
+ Manager *m = userdata;
+ Image *image = NULL;
+ const char *p;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+
+ p = startswith(path, "/org/freedesktop/machine1/image/");
+ if (!p)
+ return 0;
+
+ e = bus_label_unescape(p);
+ if (!e)
+ return -ENOMEM;
+
+ image = hashmap_get(m->image_cache, e);
+ if (image) {
+ *found = image;
+ return 1;
+ }
+
+ r = hashmap_ensure_allocated(&m->image_cache, &image_hash_ops);
+ if (r < 0)
+ return r;
+
+ if (!m->image_cache_defer_event) {
+ r = sd_event_add_defer(m->event, &m->image_cache_defer_event, image_flush_cache, m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(m->image_cache_defer_event, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_source_set_enabled(m->image_cache_defer_event, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return r;
+
+ r = image_find(IMAGE_MACHINE, e, &image);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ image->userdata = m;
+
+ r = hashmap_put(m->image_cache, image->name, image);
+ if (r < 0) {
+ image_unref(image);
+ return r;
+ }
+
+ *found = image;
+ return 1;
+}
+
+char *image_bus_path(const char *name) {
+ _cleanup_free_ char *e = NULL;
+
+ assert(name);
+
+ e = bus_label_escape(name);
+ if (!e)
+ return NULL;
+
+ return strappend("/org/freedesktop/machine1/image/", e);
+}
+
+int image_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_hashmap_free_ Hashmap *images = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ Image *image;
+ Iterator i;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(nodes);
+
+ images = hashmap_new(&image_hash_ops);
+ if (!images)
+ return -ENOMEM;
+
+ r = image_discover(IMAGE_MACHINE, images);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(image, images, i) {
+ char *p;
+
+ p = image_bus_path(image->name);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_consume(&l, p);
+ if (r < 0)
+ return r;
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
diff --git a/src/machine/image-dbus.h b/src/machine/image-dbus.h
new file mode 100644
index 0000000..a918b77
--- /dev/null
+++ b/src/machine/image-dbus.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "machined.h"
+
+extern const sd_bus_vtable image_vtable[];
+
+char *image_bus_path(const char *name);
+
+int image_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
+int image_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error);
+
+int bus_image_method_remove(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_rename(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_clone(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_mark_read_only(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_set_limit(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_get_hostname(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_get_machine_id(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_get_machine_info(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_get_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error);
diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c
new file mode 100644
index 0000000..7a558df
--- /dev/null
+++ b/src/machine/machine-dbus.c
@@ -0,0 +1,1469 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the POSIX
+ * version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-internal.h"
+#include "bus-label.h"
+#include "bus-util.h"
+#include "copy.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "in-addr-util.h"
+#include "io-util.h"
+#include "local-addresses.h"
+#include "machine-dbus.h"
+#include "machine.h"
+#include "missing_capability.h"
+#include "mkdir.h"
+#include "os-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_class, machine_class, MachineClass);
+static BUS_DEFINE_PROPERTY_GET2(property_get_state, "s", Machine, machine_get_state, machine_state_to_string);
+
+static int property_get_netif(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Machine *m = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ return sd_bus_message_append_array(reply, 'i', m->netif, m->n_netif * sizeof(int));
+}
+
+int bus_machine_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Machine *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = machine_stop(m);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_machine_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Machine *m = userdata;
+ const char *swho;
+ int32_t signo;
+ KillWho who;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "si", &swho, &signo);
+ if (r < 0)
+ return r;
+
+ if (isempty(swho))
+ who = KILL_ALL;
+ else {
+ who = kill_who_from_string(swho);
+ if (who < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid kill parameter '%s'", swho);
+ }
+
+ if (!SIGNAL_VALID(signo))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid signal %i", signo);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = machine_kill(m, who, signo);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_machine_method_get_addresses(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Machine *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iay)");
+ if (r < 0)
+ return r;
+
+ switch (m->class) {
+
+ case MACHINE_HOST: {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ struct local_address *a;
+ int n, i;
+
+ n = local_addresses(NULL, 0, AF_UNSPEC, &addresses);
+ if (n < 0)
+ return n;
+
+ for (a = addresses, i = 0; i < n; a++, i++) {
+
+ r = sd_bus_message_open_container(reply, 'r', "iay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "i", addresses[i].family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &addresses[i].address, FAMILY_ADDRESS_SIZE(addresses[i].family));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+ }
+
+ case MACHINE_CONTAINER: {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ _cleanup_free_ char *us = NULL, *them = NULL;
+ _cleanup_close_ int netns_fd = -1;
+ const char *p;
+ pid_t child;
+
+ r = readlink_malloc("/proc/self/ns/net", &us);
+ if (r < 0)
+ return r;
+
+ p = procfs_file_alloca(m->leader, "ns/net");
+ r = readlink_malloc(p, &them);
+ if (r < 0)
+ return r;
+
+ if (streq(us, them))
+ return sd_bus_error_setf(error, BUS_ERROR_NO_PRIVATE_NETWORKING, "Machine %s does not use private networking", m->name);
+
+ r = namespace_open(m->leader, NULL, NULL, &netns_fd, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-addrns)", "(sd-addr)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ -1, -1, netns_fd, -1, -1, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ struct local_address *a;
+ int i, n;
+
+ pair[0] = safe_close(pair[0]);
+
+ n = local_addresses(NULL, 0, AF_UNSPEC, &addresses);
+ if (n < 0)
+ _exit(EXIT_FAILURE);
+
+ for (a = addresses, i = 0; i < n; a++, i++) {
+ struct iovec iov[2] = {
+ { .iov_base = &a->family, .iov_len = sizeof(a->family) },
+ { .iov_base = &a->address, .iov_len = FAMILY_ADDRESS_SIZE(a->family) },
+ };
+
+ r = writev(pair[1], iov, 2);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ for (;;) {
+ int family;
+ ssize_t n;
+ union in_addr_union in_addr;
+ struct iovec iov[2];
+ struct msghdr mh = {
+ .msg_iov = iov,
+ .msg_iovlen = 2,
+ };
+
+ iov[0] = IOVEC_MAKE(&family, sizeof(family));
+ iov[1] = IOVEC_MAKE(&in_addr, sizeof(in_addr));
+
+ n = recvmsg(pair[0], &mh, 0);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n < sizeof(family))
+ break;
+
+ r = sd_bus_message_open_container(reply, 'r', "iay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "i", family);
+ if (r < 0)
+ return r;
+
+ switch (family) {
+
+ case AF_INET:
+ if (n != sizeof(struct in_addr) + sizeof(family))
+ return -EIO;
+
+ r = sd_bus_message_append_array(reply, 'y', &in_addr.in, sizeof(in_addr.in));
+ break;
+
+ case AF_INET6:
+ if (n != sizeof(struct in6_addr) + sizeof(family))
+ return -EIO;
+
+ r = sd_bus_message_append_array(reply, 'y', &in_addr.in6, sizeof(in_addr.in6));
+ break;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ r = wait_for_terminate_and_check("(sd-addrns)", child, 0);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m");
+ if (r != EXIT_SUCCESS)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child died abnormally.");
+ break;
+ }
+
+ default:
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Requesting IP address data is only supported on container machines.");
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+#define EXIT_NOT_FOUND 2
+
+int bus_machine_method_get_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Machine *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ switch (m->class) {
+
+ case MACHINE_HOST:
+ r = load_os_release_pairs(NULL, &l);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case MACHINE_CONTAINER: {
+ _cleanup_close_ int mntns_fd = -1, root_fd = -1, pidns_fd = -1;
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t child;
+
+ r = namespace_open(m->leader, &pidns_fd, &mntns_fd, NULL, NULL, &root_fd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-osrelns)", "(sd-osrel)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidns_fd, mntns_fd, -1, -1, root_fd,
+ &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ int fd = -1;
+
+ pair[0] = safe_close(pair[0]);
+
+ r = open_os_release(NULL, NULL, &fd);
+ if (r == -ENOENT)
+ _exit(EXIT_NOT_FOUND);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ r = copy_bytes(fd, pair[1], (uint64_t) -1, 0);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ f = fdopen(pair[0], "r");
+ if (!f)
+ return -errno;
+
+ pair[0] = -1;
+
+ r = load_env_file_pairs(f, "/etc/os-release", &l);
+ if (r < 0)
+ return r;
+
+ r = wait_for_terminate_and_check("(sd-osrelns)", child, 0);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m");
+ if (r == EXIT_NOT_FOUND)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Machine does not contain OS release information");
+ if (r != EXIT_SUCCESS)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child died abnormally.");
+
+ break;
+ }
+
+ default:
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Requesting OS release data is only supported on container machines.");
+ }
+
+ return bus_reply_pair_array(message, l);
+}
+
+int bus_machine_method_open_pty(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *pty_name = NULL;
+ _cleanup_close_ int master = -1;
+ Machine *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ m->class == MACHINE_HOST ? "org.freedesktop.machine1.host-open-pty" : "org.freedesktop.machine1.open-pty",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ master = machine_openpt(m, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (master < 0)
+ return master;
+
+ r = ptsname_namespace(master, &pty_name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "hs", master, pty_name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int container_bus_new(Machine *m, sd_bus_error *error, sd_bus **ret) {
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ switch (m->class) {
+
+ case MACHINE_HOST:
+ *ret = NULL;
+ break;
+
+ case MACHINE_CONTAINER: {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ char *address;
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ if (asprintf(&address, "x-machine-kernel:pid=%1$" PID_PRI ";x-machine-unix:pid=%1$" PID_PRI, m->leader) < 0)
+ return -ENOMEM;
+
+ bus->address = address;
+ bus->bus_client = true;
+ bus->trusted = false;
+ bus->is_system = true;
+
+ r = sd_bus_start(bus);
+ if (r == -ENOENT)
+ return sd_bus_error_set_errnof(error, r, "There is no system bus in container %s.", m->name);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(bus);
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int bus_machine_method_open_login(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *pty_name = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *allocated_bus = NULL;
+ _cleanup_close_ int master = -1;
+ sd_bus *container_bus = NULL;
+ Machine *m = userdata;
+ const char *p, *getty;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ m->class == MACHINE_HOST ? "org.freedesktop.machine1.host-login" : "org.freedesktop.machine1.login",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ master = machine_openpt(m, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (master < 0)
+ return master;
+
+ r = ptsname_namespace(master, &pty_name);
+ if (r < 0)
+ return r;
+
+ p = path_startswith(pty_name, "/dev/pts/");
+ if (!p)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "PTS name %s is invalid", pty_name);
+
+ r = container_bus_new(m, error, &allocated_bus);
+ if (r < 0)
+ return r;
+
+ container_bus = allocated_bus ?: m->manager->bus;
+
+ getty = strjoina("container-getty@", p, ".service");
+
+ r = sd_bus_call_method(
+ container_bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnit",
+ error, NULL,
+ "ss", getty, "replace");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "hs", master, pty_name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *tm = NULL;
+ _cleanup_free_ char *pty_name = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *allocated_bus = NULL;
+ sd_bus *container_bus = NULL;
+ _cleanup_close_ int master = -1, slave = -1;
+ _cleanup_strv_free_ char **env = NULL, **args_wire = NULL, **args = NULL;
+ Machine *m = userdata;
+ const char *p, *unit, *user, *path, *description, *utmp_id;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "ss", &user, &path);
+ if (r < 0)
+ return r;
+ user = empty_to_null(user);
+ r = sd_bus_message_read_strv(message, &args_wire);
+ if (r < 0)
+ return r;
+ if (isempty(path)) {
+ path = "/bin/sh";
+
+ args = new0(char*, 3 + 1);
+ if (!args)
+ return -ENOMEM;
+ args[0] = strdup("sh");
+ if (!args[0])
+ return -ENOMEM;
+ args[1] = strdup("-c");
+ if (!args[1])
+ return -ENOMEM;
+ r = asprintf(&args[2],
+ "shell=$(getent passwd %s 2>/dev/null | { IFS=: read _ _ _ _ _ _ x; echo \"$x\"; })\n"\
+ "exec \"${shell:-/bin/sh}\" -l", /* -l is means --login */
+ isempty(user) ? "root" : user);
+ if (r < 0) {
+ args[2] = NULL;
+ return -ENOMEM;
+ }
+ } else {
+ if (!path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Specified path '%s' is not absolute", path);
+ args = TAKE_PTR(args_wire);
+ if (strv_isempty(args)) {
+ args = strv_free(args);
+
+ args = strv_new(path);
+ if (!args)
+ return -ENOMEM;
+ }
+ }
+
+ r = sd_bus_message_read_strv(message, &env);
+ if (r < 0)
+ return r;
+ if (!strv_env_is_valid(env))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment assignments");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ m->class == MACHINE_HOST ? "org.freedesktop.machine1.host-shell" : "org.freedesktop.machine1.shell",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ master = machine_openpt(m, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (master < 0)
+ return master;
+
+ r = ptsname_namespace(master, &pty_name);
+ if (r < 0)
+ return r;
+
+ p = path_startswith(pty_name, "/dev/pts/");
+ assert(p);
+
+ slave = machine_open_terminal(m, pty_name, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (slave < 0)
+ return slave;
+
+ utmp_id = path_startswith(pty_name, "/dev/");
+ assert(utmp_id);
+
+ r = container_bus_new(m, error, &allocated_bus);
+ if (r < 0)
+ return r;
+
+ container_bus = allocated_bus ?: m->manager->bus;
+
+ r = sd_bus_message_new_method_call(
+ container_bus,
+ &tm,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartTransientUnit");
+ if (r < 0)
+ return r;
+
+ /* Name and mode */
+ unit = strjoina("container-shell@", p, ".service");
+ r = sd_bus_message_append(tm, "ss", unit, "fail");
+ if (r < 0)
+ return r;
+
+ /* Properties */
+ r = sd_bus_message_open_container(tm, 'a', "(sv)");
+ if (r < 0)
+ return r;
+
+ description = strjoina("Shell for User ", isempty(user) ? "root" : user);
+ r = sd_bus_message_append(tm,
+ "(sv)(sv)(sv)(sv)(sv)(sv)(sv)(sv)(sv)(sv)(sv)(sv)",
+ "Description", "s", description,
+ "StandardInputFileDescriptor", "h", slave,
+ "StandardOutputFileDescriptor", "h", slave,
+ "StandardErrorFileDescriptor", "h", slave,
+ "SendSIGHUP", "b", true,
+ "IgnoreSIGPIPE", "b", false,
+ "KillMode", "s", "mixed",
+ "TTYReset", "b", true,
+ "UtmpIdentifier", "s", utmp_id,
+ "UtmpMode", "s", "user",
+ "PAMName", "s", "login",
+ "WorkingDirectory", "s", "-~");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(tm, "(sv)", "User", "s", isempty(user) ? "root" : user);
+ if (r < 0)
+ return r;
+
+ if (!strv_isempty(env)) {
+ r = sd_bus_message_open_container(tm, 'r', "sv");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(tm, "s", "Environment");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(tm, 'v', "as");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(tm, env);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+ }
+
+ /* Exec container */
+ r = sd_bus_message_open_container(tm, 'r', "sv");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(tm, "s", "ExecStart");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(tm, 'v', "a(sasb)");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(tm, 'a', "(sasb)");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(tm, 'r', "sasb");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(tm, "s", path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(tm, args);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(tm, "b", true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+
+ /* Auxiliary units */
+ r = sd_bus_message_append(tm, "a(sa(sv))", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(container_bus, tm, 0, error, NULL);
+ if (r < 0)
+ return r;
+
+ slave = safe_close(slave);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "hs", master, pty_name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
+ char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p;
+ bool mount_slave_created = false, mount_slave_mounted = false,
+ mount_tmp_created = false, mount_tmp_mounted = false,
+ mount_outside_created = false, mount_outside_mounted = false;
+ _cleanup_free_ char *chased_src = NULL;
+ int read_only, make_file_or_directory;
+ const char *dest, *src;
+ Machine *m = userdata;
+ struct stat st;
+ pid_t child;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ if (m->class != MACHINE_CONTAINER)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Bind mounting is only supported on container machines.");
+
+ r = sd_bus_message_read(message, "ssbb", &src, &dest, &read_only, &make_file_or_directory);
+ if (r < 0)
+ return r;
+
+ if (!path_is_absolute(src) || !path_is_normalized(src))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path must be absolute and not contain ../.");
+
+ if (isempty(dest))
+ dest = src;
+ else if (!path_is_absolute(dest) || !path_is_normalized(dest))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path must be absolute and not contain ../.");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = machine_get_uid_shift(m, &uid);
+ if (r < 0)
+ return r;
+ if (uid != 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Can't bind mount on container with user namespacing applied.");
+
+ /* One day, when bind mounting /proc/self/fd/n works across
+ * namespace boundaries we should rework this logic to make
+ * use of it... */
+
+ p = strjoina("/run/systemd/nspawn/propagate/", m->name, "/");
+ if (laccess(p, F_OK) < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Container does not allow propagation of mount points.");
+
+ r = chase_symlinks(src, NULL, CHASE_TRAIL_SLASH, &chased_src);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to resolve source path: %m");
+
+ if (lstat(chased_src, &st) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to stat() source path: %m");
+ if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Source directory can't be a symbolic link");
+
+ /* Our goal is to install a new bind mount into the container,
+ possibly read-only. This is irritatingly complex
+ unfortunately, currently.
+
+ First, we start by creating a private playground in /tmp,
+ that we can mount MS_SLAVE. (Which is necessary, since
+ MS_MOVE cannot be applied to mounts with MS_SHARED parent
+ mounts.) */
+
+ if (!mkdtemp(mount_slave))
+ return sd_bus_error_set_errnof(error, errno, "Failed to create playground %s: %m", mount_slave);
+
+ mount_slave_created = true;
+
+ if (mount(mount_slave, mount_slave, NULL, MS_BIND, NULL) < 0) {
+ r = sd_bus_error_set_errnof(error, errno, "Failed to make bind mount %s: %m", mount_slave);
+ goto finish;
+ }
+
+ mount_slave_mounted = true;
+
+ if (mount(NULL, mount_slave, NULL, MS_SLAVE, NULL) < 0) {
+ r = sd_bus_error_set_errnof(error, errno, "Failed to remount slave %s: %m", mount_slave);
+ goto finish;
+ }
+
+ /* Second, we mount the source file or directory to a directory inside of our MS_SLAVE playground. */
+ mount_tmp = strjoina(mount_slave, "/mount");
+ if (S_ISDIR(st.st_mode))
+ r = mkdir_errno_wrapper(mount_tmp, 0700);
+ else
+ r = touch(mount_tmp);
+ if (r < 0) {
+ sd_bus_error_set_errnof(error, errno, "Failed to create temporary mount point %s: %m", mount_tmp);
+ goto finish;
+ }
+
+ mount_tmp_created = true;
+
+ if (mount(chased_src, mount_tmp, NULL, MS_BIND, NULL) < 0) {
+ r = sd_bus_error_set_errnof(error, errno, "Failed to mount %s: %m", chased_src);
+ goto finish;
+ }
+
+ mount_tmp_mounted = true;
+
+ /* Third, we remount the new bind mount read-only if requested. */
+ if (read_only)
+ if (mount(NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) {
+ r = sd_bus_error_set_errnof(error, errno, "Failed to remount read-only %s: %m", mount_tmp);
+ goto finish;
+ }
+
+ /* Fourth, we move the new bind mount into the propagation directory. This way it will appear there read-only
+ * right-away. */
+
+ mount_outside = strjoina("/run/systemd/nspawn/propagate/", m->name, "/XXXXXX");
+ if (S_ISDIR(st.st_mode))
+ r = mkdtemp(mount_outside) ? 0 : -errno;
+ else {
+ r = mkostemp_safe(mount_outside);
+ safe_close(r);
+ }
+ if (r < 0) {
+ sd_bus_error_set_errnof(error, errno, "Cannot create propagation file or directory %s: %m", mount_outside);
+ goto finish;
+ }
+
+ mount_outside_created = true;
+
+ if (mount(mount_tmp, mount_outside, NULL, MS_MOVE, NULL) < 0) {
+ r = sd_bus_error_set_errnof(error, errno, "Failed to move %s to %s: %m", mount_tmp, mount_outside);
+ goto finish;
+ }
+
+ mount_outside_mounted = true;
+ mount_tmp_mounted = false;
+
+ if (S_ISDIR(st.st_mode))
+ (void) rmdir(mount_tmp);
+ else
+ (void) unlink(mount_tmp);
+ mount_tmp_created = false;
+
+ (void) umount(mount_slave);
+ mount_slave_mounted = false;
+
+ (void) rmdir(mount_slave);
+ mount_slave_created = false;
+
+ if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) {
+ r = sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
+ goto finish;
+ }
+
+ r = safe_fork("(sd-bindmnt)", FORK_RESET_SIGNALS, &child);
+ if (r < 0) {
+ sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ goto finish;
+ }
+ if (r == 0) {
+ const char *mount_inside;
+ int mntfd;
+ const char *q;
+
+ errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+ q = procfs_file_alloca(m->leader, "ns/mnt");
+ mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (mntfd < 0) {
+ r = log_error_errno(errno, "Failed to open mount namespace of leader: %m");
+ goto child_fail;
+ }
+
+ if (setns(mntfd, CLONE_NEWNS) < 0) {
+ r = log_error_errno(errno, "Failed to join namespace of leader: %m");
+ goto child_fail;
+ }
+
+ if (make_file_or_directory) {
+ if (S_ISDIR(st.st_mode))
+ (void) mkdir_p(dest, 0755);
+ else {
+ (void) mkdir_parents(dest, 0755);
+ safe_close(open(dest, O_CREAT|O_EXCL|O_WRONLY|O_CLOEXEC|O_NOCTTY, 0600));
+ }
+ }
+
+ /* Fifth, move the mount to the right place inside */
+ mount_inside = strjoina("/run/systemd/nspawn/incoming/", basename(mount_outside));
+ if (mount(mount_inside, dest, NULL, MS_MOVE, NULL) < 0) {
+ r = log_error_errno(errno, "Failed to mount: %m");
+ goto child_fail;
+ }
+
+ _exit(EXIT_SUCCESS);
+
+ child_fail:
+ (void) write(errno_pipe_fd[1], &r, sizeof(r));
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ _exit(EXIT_FAILURE);
+ }
+
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ r = wait_for_terminate_and_check("(sd-bindmnt)", child, 0);
+ if (r < 0) {
+ r = sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m");
+ goto finish;
+ }
+ if (r != EXIT_SUCCESS) {
+ if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r))
+ r = sd_bus_error_set_errnof(error, r, "Failed to mount: %m");
+ else
+ r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child failed.");
+ goto finish;
+ }
+
+ r = sd_bus_reply_method_return(message, NULL);
+
+finish:
+ if (mount_outside_mounted)
+ (void) umount(mount_outside);
+ if (mount_outside_created) {
+ if (S_ISDIR(st.st_mode))
+ (void) rmdir(mount_outside);
+ else
+ (void) unlink(mount_outside);
+ }
+
+ if (mount_tmp_mounted)
+ (void) umount(mount_tmp);
+ if (mount_tmp_created) {
+ if (S_ISDIR(st.st_mode))
+ (void) rmdir(mount_tmp);
+ else
+ (void) unlink(mount_tmp);
+ }
+
+ if (mount_slave_mounted)
+ (void) umount(mount_slave);
+ if (mount_slave_created)
+ (void) rmdir(mount_slave);
+
+ return r;
+}
+
+int bus_machine_method_copy(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *src, *dest, *host_path, *container_path, *host_basename, *container_basename, *container_dirname;
+ _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
+ CopyFlags copy_flags = COPY_REFLINK|COPY_MERGE;
+ _cleanup_close_ int hostfd = -1;
+ Machine *m = userdata;
+ bool copy_from;
+ pid_t child;
+ uid_t uid_shift;
+ char *t;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ if (m->manager->n_operations >= OPERATIONS_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing copies.");
+
+ if (m->class != MACHINE_CONTAINER)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Copying files is only supported on container machines.");
+
+ r = sd_bus_message_read(message, "ss", &src, &dest);
+ if (r < 0)
+ return r;
+
+ if (!path_is_absolute(src))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path must be absolute.");
+
+ if (isempty(dest))
+ dest = src;
+ else if (!path_is_absolute(dest))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path must be absolute.");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = machine_get_uid_shift(m, &uid_shift);
+ if (r < 0)
+ return r;
+
+ copy_from = strstr(sd_bus_message_get_member(message), "CopyFrom");
+
+ if (copy_from) {
+ container_path = src;
+ host_path = dest;
+ } else {
+ host_path = src;
+ container_path = dest;
+ }
+
+ host_basename = basename(host_path);
+
+ container_basename = basename(container_path);
+ t = strdupa(container_path);
+ container_dirname = dirname(t);
+
+ hostfd = open_parent(host_path, O_CLOEXEC, 0);
+ if (hostfd < 0)
+ return sd_bus_error_set_errnof(error, hostfd, "Failed to open host directory %s: %m", host_path);
+
+ if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
+
+ r = safe_fork("(sd-copy)", FORK_RESET_SIGNALS, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ int containerfd;
+ const char *q;
+ int mntfd;
+
+ errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+ q = procfs_file_alloca(m->leader, "ns/mnt");
+ mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (mntfd < 0) {
+ r = log_error_errno(errno, "Failed to open mount namespace of leader: %m");
+ goto child_fail;
+ }
+
+ if (setns(mntfd, CLONE_NEWNS) < 0) {
+ r = log_error_errno(errno, "Failed to join namespace of leader: %m");
+ goto child_fail;
+ }
+
+ containerfd = open(container_dirname, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_DIRECTORY);
+ if (containerfd < 0) {
+ r = log_error_errno(errno, "Failed to open destination directory: %m");
+ goto child_fail;
+ }
+
+ /* Run the actual copy operation. Note that when an UID shift is set we'll either clamp the UID/GID to
+ * 0 or to the actual UID shift depending on the direction we copy. If no UID shift is set we'll copy
+ * the UID/GIDs as they are. */
+ if (copy_from)
+ r = copy_tree_at(containerfd, container_basename, hostfd, host_basename, uid_shift == 0 ? UID_INVALID : 0, uid_shift == 0 ? GID_INVALID : 0, copy_flags);
+ else
+ r = copy_tree_at(hostfd, host_basename, containerfd, container_basename, uid_shift == 0 ? UID_INVALID : uid_shift, uid_shift == 0 ? GID_INVALID : uid_shift, copy_flags);
+
+ hostfd = safe_close(hostfd);
+ containerfd = safe_close(containerfd);
+
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to copy tree: %m");
+ goto child_fail;
+ }
+
+ _exit(EXIT_SUCCESS);
+
+ child_fail:
+ (void) write(errno_pipe_fd[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ /* Copying might take a while, hence install a watch on the child, and return */
+
+ r = operation_new(m->manager, m, child, message, errno_pipe_fd[0], NULL);
+ if (r < 0) {
+ (void) sigkill_wait(child);
+ return r;
+ }
+ errno_pipe_fd[0] = -1;
+
+ return 1;
+}
+
+int bus_machine_method_open_root_directory(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_close_ int fd = -1;
+ Machine *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ switch (m->class) {
+
+ case MACHINE_HOST:
+ fd = open("/", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ break;
+
+ case MACHINE_CONTAINER: {
+ _cleanup_close_ int mntns_fd = -1, root_fd = -1;
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ pid_t child;
+
+ r = namespace_open(m->leader, NULL, &mntns_fd, NULL, NULL, &root_fd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-openrootns)", "(sd-openroot)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ -1, mntns_fd, -1, -1, root_fd, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ _cleanup_close_ int dfd = -1;
+
+ pair[0] = safe_close(pair[0]);
+
+ dfd = open("/", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (dfd < 0)
+ _exit(EXIT_FAILURE);
+
+ r = send_one_fd(pair[1], dfd, 0);
+ dfd = safe_close(dfd);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-openrootns)", child, 0);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m");
+ if (r != EXIT_SUCCESS)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child died abnormally.");
+
+ fd = receive_one_fd(pair[0], MSG_DONTWAIT);
+ if (fd < 0)
+ return fd;
+
+ break;
+ }
+
+ default:
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Opening the root directory is only supported on container machines.");
+ }
+
+ return sd_bus_reply_method_return(message, "h", fd);
+}
+
+int bus_machine_method_get_uid_shift(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Machine *m = userdata;
+ uid_t shift = 0;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* You wonder why this is a method and not a property? Well, properties are not supposed to return errors, but
+ * we kinda have to for this. */
+
+ if (m->class == MACHINE_HOST)
+ return sd_bus_reply_method_return(message, "u", UINT32_C(0));
+
+ if (m->class != MACHINE_CONTAINER)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "UID/GID shift may only be determined for container machines.");
+
+ r = machine_get_uid_shift(m, &shift);
+ if (r == -ENXIO)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Machine %s uses a complex UID/GID mapping, cannot determine shift", m->name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "u", (uint32_t) shift);
+}
+
+const sd_bus_vtable machine_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Name", "s", NULL, offsetof(Machine, name), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Id", "ay", bus_property_get_id128, offsetof(Machine, id), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("Timestamp", offsetof(Machine, timestamp), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Service", "s", NULL, offsetof(Machine, service), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Unit", "s", NULL, offsetof(Machine, unit), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Scope", "s", NULL, offsetof(Machine, unit), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("Leader", "u", NULL, offsetof(Machine, leader), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Class", "s", property_get_class, offsetof(Machine, class), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RootDirectory", "s", NULL, offsetof(Machine, root_directory), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NetworkInterfaces", "ai", property_get_netif, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("State", "s", property_get_state, 0, 0),
+ SD_BUS_METHOD("Terminate", NULL, NULL, bus_machine_method_terminate, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Kill", "si", NULL, bus_machine_method_kill, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetAddresses", NULL, "a(iay)", bus_machine_method_get_addresses, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetOSRelease", NULL, "a{ss}", bus_machine_method_get_os_release, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUIDShift", NULL, "u", bus_machine_method_get_uid_shift, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("OpenPTY", NULL, "hs", bus_machine_method_open_pty, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("OpenLogin", NULL, "hs", bus_machine_method_open_login, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("OpenShell", "ssasas", "hs", bus_machine_method_open_shell, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("BindMount", "ssbb", NULL, bus_machine_method_bind_mount, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CopyFrom", "ss", NULL, bus_machine_method_copy, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CopyTo", "ss", NULL, bus_machine_method_copy, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("OpenRootDirectory", NULL, "h", bus_machine_method_open_root_directory, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END
+};
+
+int machine_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Machine *machine;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ if (streq(path, "/org/freedesktop/machine1/machine/self")) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ sd_bus_message *message;
+ pid_t pid;
+
+ message = sd_bus_get_current_message(bus);
+ if (!message)
+ return 0;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+
+ r = manager_get_machine_by_pid(m, pid, &machine);
+ if (r <= 0)
+ return 0;
+ } else {
+ _cleanup_free_ char *e = NULL;
+ const char *p;
+
+ p = startswith(path, "/org/freedesktop/machine1/machine/");
+ if (!p)
+ return 0;
+
+ e = bus_label_unescape(p);
+ if (!e)
+ return -ENOMEM;
+
+ machine = hashmap_get(m->machines, e);
+ if (!machine)
+ return 0;
+ }
+
+ *found = machine;
+ return 1;
+}
+
+char *machine_bus_path(Machine *m) {
+ _cleanup_free_ char *e = NULL;
+
+ assert(m);
+
+ e = bus_label_escape(m->name);
+ if (!e)
+ return NULL;
+
+ return strappend("/org/freedesktop/machine1/machine/", e);
+}
+
+int machine_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Machine *machine = NULL;
+ Manager *m = userdata;
+ Iterator i;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(nodes);
+
+ HASHMAP_FOREACH(machine, m->machines, i) {
+ char *p;
+
+ p = machine_bus_path(machine);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_consume(&l, p);
+ if (r < 0)
+ return r;
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+int machine_send_signal(Machine *m, bool new_machine) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(m);
+
+ p = machine_bus_path(m);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_signal(
+ m->manager->bus,
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ new_machine ? "MachineNew" : "MachineRemoved",
+ "so", m->name, p);
+}
+
+int machine_send_create_reply(Machine *m, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *c = NULL;
+ _cleanup_free_ char *p = NULL;
+
+ assert(m);
+
+ if (!m->create_message)
+ return 0;
+
+ c = TAKE_PTR(m->create_message);
+
+ if (error)
+ return sd_bus_reply_method_error(c, error);
+
+ /* Update the machine state file before we notify the client
+ * about the result. */
+ machine_save(m);
+
+ p = machine_bus_path(m);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(c, "o", p);
+}
diff --git a/src/machine/machine-dbus.h b/src/machine/machine-dbus.h
new file mode 100644
index 0000000..f880803
--- /dev/null
+++ b/src/machine/machine-dbus.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "machine.h"
+
+extern const sd_bus_vtable machine_vtable[];
+
+char *machine_bus_path(Machine *s);
+int machine_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
+int machine_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error);
+
+int bus_machine_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_get_addresses(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_get_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_open_pty(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_open_login(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_copy(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_open_root_directory(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_get_uid_shift(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+int machine_send_signal(Machine *m, bool new_machine);
+int machine_send_create_reply(Machine *m, sd_bus_error *error);
diff --git a/src/machine/machine.c b/src/machine/machine.c
new file mode 100644
index 0000000..4f89ac0
--- /dev/null
+++ b/src/machine/machine.c
@@ -0,0 +1,709 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio_ext.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "machine-dbus.h"
+#include "machine.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "util.h"
+
+Machine* machine_new(Manager *manager, MachineClass class, const char *name) {
+ Machine *m;
+
+ assert(manager);
+ assert(class < _MACHINE_CLASS_MAX);
+ assert(name);
+
+ /* Passing class == _MACHINE_CLASS_INVALID here is fine. It
+ * means as much as "we don't know yet", and that we'll figure
+ * it out later when loading the state file. */
+
+ m = new0(Machine, 1);
+ if (!m)
+ return NULL;
+
+ m->name = strdup(name);
+ if (!m->name)
+ goto fail;
+
+ if (class != MACHINE_HOST) {
+ m->state_file = strappend("/run/systemd/machines/", m->name);
+ if (!m->state_file)
+ goto fail;
+ }
+
+ m->class = class;
+
+ if (hashmap_put(manager->machines, m->name, m) < 0)
+ goto fail;
+
+ m->manager = manager;
+
+ return m;
+
+fail:
+ free(m->state_file);
+ free(m->name);
+ return mfree(m);
+}
+
+Machine* machine_free(Machine *m) {
+ if (!m)
+ return NULL;
+
+ while (m->operations)
+ operation_free(m->operations);
+
+ if (m->in_gc_queue)
+ LIST_REMOVE(gc_queue, m->manager->machine_gc_queue, m);
+
+ machine_release_unit(m);
+
+ free(m->scope_job);
+
+ (void) hashmap_remove(m->manager->machines, m->name);
+
+ if (m->manager->host_machine == m)
+ m->manager->host_machine = NULL;
+
+ if (m->leader > 0)
+ (void) hashmap_remove_value(m->manager->machine_leaders, PID_TO_PTR(m->leader), m);
+
+ sd_bus_message_unref(m->create_message);
+
+ free(m->name);
+ free(m->state_file);
+ free(m->service);
+ free(m->root_directory);
+ free(m->netif);
+ return mfree(m);
+}
+
+int machine_save(Machine *m) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(m);
+
+ if (!m->state_file)
+ return 0;
+
+ if (!m->started)
+ return 0;
+
+ r = mkdir_safe_label("/run/systemd/machines", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ goto fail;
+
+ r = fopen_temporary(m->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "NAME=%s\n",
+ m->name);
+
+ if (m->unit) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(m->unit);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "SCOPE=%s\n", escaped); /* We continue to call this "SCOPE=" because it is internal only, and we want to stay compatible with old files */
+ }
+
+ if (m->scope_job)
+ fprintf(f, "SCOPE_JOB=%s\n", m->scope_job);
+
+ if (m->service) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(m->service);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ fprintf(f, "SERVICE=%s\n", escaped);
+ }
+
+ if (m->root_directory) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(m->root_directory);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ fprintf(f, "ROOT=%s\n", escaped);
+ }
+
+ if (!sd_id128_is_null(m->id))
+ fprintf(f, "ID=" SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(m->id));
+
+ if (m->leader != 0)
+ fprintf(f, "LEADER="PID_FMT"\n", m->leader);
+
+ if (m->class != _MACHINE_CLASS_INVALID)
+ fprintf(f, "CLASS=%s\n", machine_class_to_string(m->class));
+
+ if (dual_timestamp_is_set(&m->timestamp))
+ fprintf(f,
+ "REALTIME="USEC_FMT"\n"
+ "MONOTONIC="USEC_FMT"\n",
+ m->timestamp.realtime,
+ m->timestamp.monotonic);
+
+ if (m->n_netif > 0) {
+ size_t i;
+
+ fputs("NETIF=", f);
+
+ for (i = 0; i < m->n_netif; i++) {
+ if (i != 0)
+ fputc(' ', f);
+
+ fprintf(f, "%i", m->netif[i]);
+ }
+
+ fputc('\n', f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, m->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (m->unit) {
+ char *sl;
+
+ /* Create a symlink from the unit name to the machine
+ * name, so that we can quickly find the machine for
+ * each given unit. Ignore error. */
+ sl = strjoina("/run/systemd/machines/unit:", m->unit);
+ (void) symlink(m->name, sl);
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(m->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save machine data %s: %m", m->state_file);
+}
+
+static void machine_unlink(Machine *m) {
+ assert(m);
+
+ if (m->unit) {
+ char *sl;
+
+ sl = strjoina("/run/systemd/machines/unit:", m->unit);
+ (void) unlink(sl);
+ }
+
+ if (m->state_file)
+ (void) unlink(m->state_file);
+}
+
+int machine_load(Machine *m) {
+ _cleanup_free_ char *realtime = NULL, *monotonic = NULL, *id = NULL, *leader = NULL, *class = NULL, *netif = NULL;
+ int r;
+
+ assert(m);
+
+ if (!m->state_file)
+ return 0;
+
+ r = parse_env_file(NULL, m->state_file,
+ "SCOPE", &m->unit,
+ "SCOPE_JOB", &m->scope_job,
+ "SERVICE", &m->service,
+ "ROOT", &m->root_directory,
+ "ID", &id,
+ "LEADER", &leader,
+ "CLASS", &class,
+ "REALTIME", &realtime,
+ "MONOTONIC", &monotonic,
+ "NETIF", &netif);
+ if (r < 0) {
+ if (r == -ENOENT)
+ return 0;
+
+ return log_error_errno(r, "Failed to read %s: %m", m->state_file);
+ }
+
+ if (id)
+ sd_id128_from_string(id, &m->id);
+
+ if (leader)
+ parse_pid(leader, &m->leader);
+
+ if (class) {
+ MachineClass c;
+
+ c = machine_class_from_string(class);
+ if (c >= 0)
+ m->class = c;
+ }
+
+ if (realtime)
+ (void) deserialize_usec(realtime, &m->timestamp.realtime);
+ if (monotonic)
+ (void) deserialize_usec(monotonic, &m->timestamp.monotonic);
+
+ if (netif) {
+ size_t allocated = 0, nr = 0;
+ const char *p;
+ int *ni = NULL;
+
+ p = netif;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ int ifi;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse NETIF: %s", netif);
+ break;
+ }
+
+ if (parse_ifindex(word, &ifi) < 0)
+ continue;
+
+ if (!GREEDY_REALLOC(ni, allocated, nr+1)) {
+ free(ni);
+ return log_oom();
+ }
+
+ ni[nr++] = ifi;
+ }
+
+ free(m->netif);
+ m->netif = ni;
+ m->n_netif = nr;
+ }
+
+ return r;
+}
+
+static int machine_start_scope(Machine *m, sd_bus_message *properties, sd_bus_error *error) {
+ assert(m);
+ assert(m->class != MACHINE_HOST);
+
+ if (!m->unit) {
+ _cleanup_free_ char *escaped = NULL, *scope = NULL;
+ char *description, *job = NULL;
+ int r;
+
+ escaped = unit_name_escape(m->name);
+ if (!escaped)
+ return log_oom();
+
+ scope = strjoin("machine-", escaped, ".scope");
+ if (!scope)
+ return log_oom();
+
+ description = strjoina(m->class == MACHINE_VM ? "Virtual Machine " : "Container ", m->name);
+
+ r = manager_start_scope(m->manager, scope, m->leader, SPECIAL_MACHINE_SLICE, description, properties, error, &job);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start machine scope: %s", bus_error_message(error, r));
+
+ m->unit = TAKE_PTR(scope);
+ free_and_replace(m->scope_job, job);
+ }
+
+ if (m->unit)
+ hashmap_put(m->manager->machine_units, m->unit, m);
+
+ return 0;
+}
+
+int machine_start(Machine *m, sd_bus_message *properties, sd_bus_error *error) {
+ int r;
+
+ assert(m);
+
+ if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM))
+ return -EOPNOTSUPP;
+
+ if (m->started)
+ return 0;
+
+ r = hashmap_put(m->manager->machine_leaders, PID_TO_PTR(m->leader), m);
+ if (r < 0)
+ return r;
+
+ /* Create cgroup */
+ r = machine_start_scope(m, properties, error);
+ if (r < 0)
+ return r;
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_MACHINE_START_STR,
+ "NAME=%s", m->name,
+ "LEADER="PID_FMT, m->leader,
+ LOG_MESSAGE("New machine %s.", m->name));
+
+ if (!dual_timestamp_is_set(&m->timestamp))
+ dual_timestamp_get(&m->timestamp);
+
+ m->started = true;
+
+ /* Save new machine data */
+ machine_save(m);
+
+ machine_send_signal(m, true);
+ (void) manager_enqueue_nscd_cache_flush(m->manager);
+
+ return 0;
+}
+
+static int machine_stop_scope(Machine *m) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char *job = NULL;
+ int r, q;
+
+ assert(m);
+ assert(m->class != MACHINE_HOST);
+
+ if (!m->unit)
+ return 0;
+
+ r = manager_stop_unit(m->manager, m->unit, &error, &job);
+ if (r < 0) {
+ log_error_errno(r, "Failed to stop machine scope: %s", bus_error_message(&error, r));
+ sd_bus_error_free(&error);
+ } else
+ free_and_replace(m->scope_job, job);
+
+ q = manager_unref_unit(m->manager, m->unit, &error);
+ if (q < 0)
+ log_warning_errno(q, "Failed to drop reference to machine scope, ignoring: %s", bus_error_message(&error, r));
+
+ return r;
+}
+
+int machine_stop(Machine *m) {
+ int r;
+ assert(m);
+
+ if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM))
+ return -EOPNOTSUPP;
+
+ r = machine_stop_scope(m);
+
+ m->stopping = true;
+
+ machine_save(m);
+ (void) manager_enqueue_nscd_cache_flush(m->manager);
+
+ return r;
+}
+
+int machine_finalize(Machine *m) {
+ assert(m);
+
+ if (m->started)
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_MACHINE_STOP_STR,
+ "NAME=%s", m->name,
+ "LEADER="PID_FMT, m->leader,
+ LOG_MESSAGE("Machine %s terminated.", m->name));
+
+ machine_unlink(m);
+ machine_add_to_gc_queue(m);
+
+ if (m->started) {
+ machine_send_signal(m, false);
+ m->started = false;
+ }
+
+ return 0;
+}
+
+bool machine_may_gc(Machine *m, bool drop_not_started) {
+ assert(m);
+
+ if (m->class == MACHINE_HOST)
+ return false;
+
+ if (drop_not_started && !m->started)
+ return true;
+
+ if (m->scope_job && manager_job_is_active(m->manager, m->scope_job))
+ return false;
+
+ if (m->unit && manager_unit_is_active(m->manager, m->unit))
+ return false;
+
+ return true;
+}
+
+void machine_add_to_gc_queue(Machine *m) {
+ assert(m);
+
+ if (m->in_gc_queue)
+ return;
+
+ LIST_PREPEND(gc_queue, m->manager->machine_gc_queue, m);
+ m->in_gc_queue = true;
+}
+
+MachineState machine_get_state(Machine *s) {
+ assert(s);
+
+ if (s->class == MACHINE_HOST)
+ return MACHINE_RUNNING;
+
+ if (s->stopping)
+ return MACHINE_CLOSING;
+
+ if (s->scope_job)
+ return MACHINE_OPENING;
+
+ return MACHINE_RUNNING;
+}
+
+int machine_kill(Machine *m, KillWho who, int signo) {
+ assert(m);
+
+ if (!IN_SET(m->class, MACHINE_VM, MACHINE_CONTAINER))
+ return -EOPNOTSUPP;
+
+ if (!m->unit)
+ return -ESRCH;
+
+ if (who == KILL_LEADER) {
+ /* If we shall simply kill the leader, do so directly */
+
+ if (kill(m->leader, signo) < 0)
+ return -errno;
+
+ return 0;
+ }
+
+ /* Otherwise, make PID 1 do it for us, for the entire cgroup */
+ return manager_kill_unit(m->manager, m->unit, signo, NULL);
+}
+
+int machine_openpt(Machine *m, int flags) {
+ assert(m);
+
+ switch (m->class) {
+
+ case MACHINE_HOST: {
+ int fd;
+
+ fd = posix_openpt(flags);
+ if (fd < 0)
+ return -errno;
+
+ if (unlockpt(fd) < 0)
+ return -errno;
+
+ return fd;
+ }
+
+ case MACHINE_CONTAINER:
+ if (m->leader <= 0)
+ return -EINVAL;
+
+ return openpt_in_namespace(m->leader, flags);
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int machine_open_terminal(Machine *m, const char *path, int mode) {
+ assert(m);
+
+ switch (m->class) {
+
+ case MACHINE_HOST:
+ return open_terminal(path, mode);
+
+ case MACHINE_CONTAINER:
+ if (m->leader <= 0)
+ return -EINVAL;
+
+ return open_terminal_in_namespace(m->leader, path, mode);
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+void machine_release_unit(Machine *m) {
+ assert(m);
+
+ if (!m->unit)
+ return;
+
+ (void) hashmap_remove(m->manager->machine_units, m->unit);
+ m->unit = mfree(m->unit);
+}
+
+int machine_get_uid_shift(Machine *m, uid_t *ret) {
+ char p[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1];
+ uid_t uid_base, uid_shift, uid_range;
+ gid_t gid_base, gid_shift, gid_range;
+ _cleanup_fclose_ FILE *f = NULL;
+ int k, r;
+
+ assert(m);
+ assert(ret);
+
+ /* Return the base UID/GID of the specified machine. Note that this only works for containers with simple
+ * mappings. In most cases setups should be simple like this, and administrators should only care about the
+ * basic offset a container has relative to the host. This is what this function exposes.
+ *
+ * If we encounter any more complex mappings we politely refuse this with ENXIO. */
+
+ if (m->class == MACHINE_HOST) {
+ *ret = 0;
+ return 0;
+ }
+
+ if (m->class != MACHINE_CONTAINER)
+ return -EOPNOTSUPP;
+
+ xsprintf(p, "/proc/" PID_FMT "/uid_map", m->leader);
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT) {
+ /* If the file doesn't exist, user namespacing is off in the kernel, return a zero mapping hence. */
+ *ret = 0;
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ /* Read the first line. There's at least one. */
+ errno = 0;
+ k = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT "\n", &uid_base, &uid_shift, &uid_range);
+ if (k != 3) {
+ if (ferror(f))
+ return -errno;
+
+ return -EBADMSG;
+ }
+
+ /* Not a mapping starting at 0? Then it's a complex mapping we can't expose here. */
+ if (uid_base != 0)
+ return -ENXIO;
+ /* Insist that at least the nobody user is mapped, everything else is weird, and hence complex, and we don't support it */
+ if (uid_range < UID_NOBODY)
+ return -ENXIO;
+
+ /* If there's more than one line, then we don't support this mapping. */
+ r = safe_fgetc(f, NULL);
+ if (r < 0)
+ return r;
+ if (r != 0) /* Insist on EOF */
+ return -ENXIO;
+
+ fclose(f);
+
+ xsprintf(p, "/proc/" PID_FMT "/gid_map", m->leader);
+ f = fopen(p, "re");
+ if (!f)
+ return -errno;
+
+ /* Read the first line. There's at least one. */
+ errno = 0;
+ k = fscanf(f, GID_FMT " " GID_FMT " " GID_FMT "\n", &gid_base, &gid_shift, &gid_range);
+ if (k != 3) {
+ if (ferror(f))
+ return -errno;
+
+ return -EBADMSG;
+ }
+
+ /* If there's more than one line, then we don't support this file. */
+ r = safe_fgetc(f, NULL);
+ if (r < 0)
+ return r;
+ if (r != 0) /* Insist on EOF */
+ return -ENXIO;
+
+ /* If the UID and GID mapping doesn't match, we don't support this mapping. */
+ if (uid_base != (uid_t) gid_base)
+ return -ENXIO;
+ if (uid_shift != (uid_t) gid_shift)
+ return -ENXIO;
+ if (uid_range != (uid_t) gid_range)
+ return -ENXIO;
+
+ *ret = uid_shift;
+ return 0;
+}
+
+static const char* const machine_class_table[_MACHINE_CLASS_MAX] = {
+ [MACHINE_CONTAINER] = "container",
+ [MACHINE_VM] = "vm",
+ [MACHINE_HOST] = "host",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(machine_class, MachineClass);
+
+static const char* const machine_state_table[_MACHINE_STATE_MAX] = {
+ [MACHINE_OPENING] = "opening",
+ [MACHINE_RUNNING] = "running",
+ [MACHINE_CLOSING] = "closing"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(machine_state, MachineState);
+
+static const char* const kill_who_table[_KILL_WHO_MAX] = {
+ [KILL_LEADER] = "leader",
+ [KILL_ALL] = "all"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);
diff --git a/src/machine/machine.h b/src/machine/machine.h
new file mode 100644
index 0000000..31527d0
--- /dev/null
+++ b/src/machine/machine.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Machine Machine;
+typedef enum KillWho KillWho;
+
+#include "list.h"
+#include "machined.h"
+#include "operation.h"
+
+typedef enum MachineState {
+ MACHINE_OPENING, /* Machine is being registered */
+ MACHINE_RUNNING, /* Machine is running */
+ MACHINE_CLOSING, /* Machine is terminating */
+ _MACHINE_STATE_MAX,
+ _MACHINE_STATE_INVALID = -1
+} MachineState;
+
+typedef enum MachineClass {
+ MACHINE_CONTAINER,
+ MACHINE_VM,
+ MACHINE_HOST,
+ _MACHINE_CLASS_MAX,
+ _MACHINE_CLASS_INVALID = -1
+} MachineClass;
+
+enum KillWho {
+ KILL_LEADER,
+ KILL_ALL,
+ _KILL_WHO_MAX,
+ _KILL_WHO_INVALID = -1
+};
+
+struct Machine {
+ Manager *manager;
+
+ char *name;
+ sd_id128_t id;
+
+ MachineClass class;
+
+ char *state_file;
+ char *service;
+ char *root_directory;
+
+ char *unit;
+ char *scope_job;
+
+ pid_t leader;
+
+ dual_timestamp timestamp;
+
+ bool in_gc_queue:1;
+ bool started:1;
+ bool stopping:1;
+
+ sd_bus_message *create_message;
+
+ int *netif;
+ size_t n_netif;
+
+ LIST_HEAD(Operation, operations);
+
+ LIST_FIELDS(Machine, gc_queue);
+};
+
+Machine* machine_new(Manager *manager, MachineClass class, const char *name);
+Machine* machine_free(Machine *m);
+bool machine_may_gc(Machine *m, bool drop_not_started);
+void machine_add_to_gc_queue(Machine *m);
+int machine_start(Machine *m, sd_bus_message *properties, sd_bus_error *error);
+int machine_stop(Machine *m);
+int machine_finalize(Machine *m);
+int machine_save(Machine *m);
+int machine_load(Machine *m);
+int machine_kill(Machine *m, KillWho who, int signo);
+
+void machine_release_unit(Machine *m);
+
+MachineState machine_get_state(Machine *u);
+
+const char* machine_class_to_string(MachineClass t) _const_;
+MachineClass machine_class_from_string(const char *s) _pure_;
+
+const char* machine_state_to_string(MachineState t) _const_;
+MachineState machine_state_from_string(const char *s) _pure_;
+
+const char *kill_who_to_string(KillWho k) _const_;
+KillWho kill_who_from_string(const char *s) _pure_;
+
+int machine_openpt(Machine *m, int flags);
+int machine_open_terminal(Machine *m, const char *path, int mode);
+
+int machine_get_uid_shift(Machine *m, uid_t *ret);
diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c
new file mode 100644
index 0000000..30f2e26
--- /dev/null
+++ b/src/machine/machinectl.c
@@ -0,0 +1,3127 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <locale.h>
+#include <math.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "copy.h"
+#include "def.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "format-table.h"
+#include "hostname-util.h"
+#include "import-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "logs-show.h"
+#include "macro.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "ptyfwd.h"
+#include "rlimit-util.h"
+#include "sigbus.h"
+#include "signal-util.h"
+#include "spawn-polkit-agent.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+#include "util.h"
+#include "verbs.h"
+#include "web-util.h"
+
+#define ALL_IP_ADDRESSES -1
+
+static char **arg_property = NULL;
+static bool arg_all = false;
+static bool arg_value = false;
+static bool arg_full = false;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static const char *arg_kill_who = NULL;
+static int arg_signal = SIGTERM;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static bool arg_read_only = false;
+static bool arg_mkdir = false;
+static bool arg_quiet = false;
+static bool arg_ask_password = true;
+static unsigned arg_lines = 10;
+static OutputMode arg_output = OUTPUT_SHORT;
+static bool arg_force = false;
+static ImportVerify arg_verify = IMPORT_VERIFY_SIGNATURE;
+static const char* arg_format = NULL;
+static const char *arg_uid = NULL;
+static char **arg_setenv = NULL;
+static int arg_addrs = 1;
+
+STATIC_DESTRUCTOR_REGISTER(arg_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_setenv, strv_freep);
+
+static OutputFlags get_output_flags(void) {
+ return
+ arg_all * OUTPUT_SHOW_ALL |
+ (arg_full || !on_tty() || pager_have()) * OUTPUT_FULL_WIDTH |
+ colors_enabled() * OUTPUT_COLOR |
+ !arg_quiet * OUTPUT_WARN_CUTOFF;
+}
+
+static int call_get_os_release(sd_bus *bus, const char *method, const char *name, const char *query, ...) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *k, *v, *iter, **query_res = NULL;
+ size_t count = 0, awaited_args = 0;
+ va_list ap;
+ int r;
+
+ assert(bus);
+ assert(name);
+ assert(query);
+
+ NULSTR_FOREACH(iter, query)
+ awaited_args++;
+ query_res = newa0(const char *, awaited_args);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ method,
+ &error,
+ &reply, "s", name);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to call '%s()': %s", method, bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "{ss}");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "{ss}", &k, &v)) > 0) {
+ count = 0;
+ NULSTR_FOREACH(iter, query) {
+ if (streq(k, iter)) {
+ query_res[count] = v;
+ break;
+ }
+ count++;
+ }
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ va_start(ap, query);
+ for (count = 0; count < awaited_args; count++) {
+ char *val, **out;
+
+ out = va_arg(ap, char **);
+ assert(out);
+ if (query_res[count]) {
+ val = strdup(query_res[count]);
+ if (!val) {
+ va_end(ap);
+ return -ENOMEM;
+ }
+ *out = val;
+ }
+ }
+ va_end(ap);
+
+ return 0;
+}
+
+static int call_get_addresses(sd_bus *bus, const char *name, int ifi, const char *prefix, const char *prefix2, int n_addr, char **ret) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *addresses = NULL;
+ bool truncate = false;
+ unsigned n = 0;
+ int r;
+
+ assert(bus);
+ assert(name);
+ assert(prefix);
+ assert(prefix2);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "GetMachineAddresses",
+ NULL,
+ &reply,
+ "s", name);
+ if (r < 0)
+ return log_debug_errno(r, "Could not get addresses: %s", bus_error_message(&error, r));
+
+ addresses = strdup(prefix);
+ if (!addresses)
+ return log_oom();
+ prefix = "";
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iay)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iay")) > 0) {
+ int family;
+ const void *a;
+ size_t sz;
+ char buf_ifi[DECIMAL_STR_MAX(int) + 2], buffer[MAX(INET6_ADDRSTRLEN, INET_ADDRSTRLEN)];
+
+ r = sd_bus_message_read(reply, "i", &family);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &a, &sz);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (n_addr != 0) {
+ if (family == AF_INET6 && ifi > 0)
+ xsprintf(buf_ifi, "%%%i", ifi);
+ else
+ strcpy(buf_ifi, "");
+
+ if (!strextend(&addresses, prefix, inet_ntop(family, a, buffer, sizeof(buffer)), buf_ifi, NULL))
+ return log_oom();
+ } else
+ truncate = true;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ prefix = prefix2;
+
+ if (n_addr > 0)
+ n_addr --;
+
+ n++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (truncate) {
+
+ if (!strextend(&addresses, special_glyph(SPECIAL_GLYPH_ELLIPSIS), NULL))
+ return -ENOMEM;
+
+ }
+
+ *ret = TAKE_PTR(addresses);
+ return (int) n;
+}
+
+static int show_table(Table *table, const char *word) {
+ int r;
+
+ assert(table);
+ assert(word);
+
+ if (table_get_rows(table) > 1) {
+ r = table_set_sort(table, (size_t) 0, (size_t) -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to sort table: %m");
+
+ table_set_header(table, arg_legend);
+
+ if (OUTPUT_MODE_IS_JSON(arg_output))
+ r = table_print_json(table, NULL, output_mode_to_json_format_flags(arg_output) | JSON_FORMAT_COLOR_AUTO);
+ else
+ r = table_print(table, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to show table: %m");
+ }
+
+ if (arg_legend) {
+ if (table_get_rows(table) > 1)
+ printf("\n%zu %s listed.\n", table_get_rows(table) - 1, word);
+ else
+ printf("No %s.\n", word);
+ }
+
+ return 0;
+}
+
+static int list_machines(int argc, char *argv[], void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "ListMachines",
+ &error,
+ &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not get machines: %s", bus_error_message(&error, r));
+
+ table = table_new("machine", "class", "service", "os", "version", "addresses");
+ if (!table)
+ return log_oom();
+
+ r = sd_bus_message_enter_container(reply, 'a', "(ssso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *os = NULL, *version_id = NULL, *addresses = NULL;
+ const char *name, *class, *service;
+
+ r = sd_bus_message_read(reply, "(ssso)", &name, &class, &service, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ if (name[0] == '.' && !arg_all)
+ continue;
+
+ (void) call_get_os_release(
+ bus,
+ "GetMachineOSRelease",
+ name,
+ "ID\0"
+ "VERSION_ID\0",
+ &os,
+ &version_id);
+
+ (void) call_get_addresses(
+ bus,
+ name,
+ 0,
+ "",
+ " ",
+ arg_addrs,
+ &addresses);
+
+ r = table_add_many(table,
+ TABLE_STRING, name,
+ TABLE_STRING, class,
+ TABLE_STRING, empty_to_dash(service),
+ TABLE_STRING, empty_to_dash(os),
+ TABLE_STRING, empty_to_dash(version_id),
+ TABLE_STRING, empty_to_dash(addresses));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add table row: %m");
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return show_table(table, "machines");
+}
+
+static int list_images(int argc, char *argv[], void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "ListImages",
+ &error,
+ &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not get images: %s", bus_error_message(&error, r));
+
+ table = table_new("name", "type", "ro", "usage", "created", "modified");
+ if (!table)
+ return log_oom();
+
+ (void) table_set_align_percent(table, TABLE_HEADER_CELL(3), 100);
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssbttto)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ uint64_t crtime, mtime, size;
+ const char *name, *type;
+ TableCell *cell;
+ bool ro_bool;
+ int ro_int;
+
+ r = sd_bus_message_read(reply, "(ssbttto)", &name, &type, &ro_int, &crtime, &mtime, &size, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ if (name[0] == '.' && !arg_all)
+ continue;
+
+ r = table_add_many(table,
+ TABLE_STRING, name,
+ TABLE_STRING, type);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add table row: %m");
+
+ ro_bool = ro_int;
+ r = table_add_cell(table, &cell, TABLE_BOOLEAN, &ro_bool);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add table cell: %m");
+
+ if (ro_bool) {
+ r = table_set_color(table, cell, ansi_highlight_red());
+ if (r < 0)
+ return log_error_errno(r, "Failed to set table cell color: %m");
+ }
+
+ r = table_add_many(table,
+ TABLE_SIZE, size,
+ TABLE_TIMESTAMP, crtime,
+ TABLE_TIMESTAMP, mtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add table row: %m");
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return show_table(table, "images");
+}
+
+static int show_unit_cgroup(sd_bus *bus, const char *unit, pid_t leader) {
+ _cleanup_free_ char *cgroup = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+ unsigned c;
+
+ assert(bus);
+ assert(unit);
+
+ r = show_cgroup_get_unit_path_and_warn(bus, unit, &cgroup);
+ if (r < 0)
+ return r;
+
+ if (isempty(cgroup))
+ return 0;
+
+ c = columns();
+ if (c > 18)
+ c -= 18;
+ else
+ c = 0;
+
+ r = unit_show_processes(bus, unit, cgroup, "\t\t ", c, get_output_flags(), &error);
+ if (r == -EBADR) {
+
+ if (arg_transport == BUS_TRANSPORT_REMOTE)
+ return 0;
+
+ /* Fallback for older systemd versions where the GetUnitProcesses() call is not yet available */
+
+ if (cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup) != 0 && leader <= 0)
+ return 0;
+
+ show_cgroup_and_extra(SYSTEMD_CGROUP_CONTROLLER, cgroup, "\t\t ", c, &leader, leader > 0, get_output_flags());
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to dump process list: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int print_os_release(sd_bus *bus, const char *method, const char *name, const char *prefix) {
+ _cleanup_free_ char *pretty = NULL;
+ int r;
+
+ assert(bus);
+ assert(name);
+ assert(prefix);
+
+ r = call_get_os_release(bus, method, name, "PRETTY_NAME\0", &pretty, NULL);
+ if (r < 0)
+ return r;
+
+ if (pretty)
+ printf("%s%s\n", prefix, pretty);
+
+ return 0;
+}
+
+static int print_uid_shift(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ uint32_t shift;
+ int r;
+
+ assert(bus);
+ assert(name);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "GetMachineUIDShift",
+ &error,
+ &reply,
+ "s", name);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to query UID/GID shift: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "u", &shift);
+ if (r < 0)
+ return r;
+
+ if (shift == 0) /* Don't show trivial mappings */
+ return 0;
+
+ printf(" UID Shift: %" PRIu32 "\n", shift);
+ return 0;
+}
+
+typedef struct MachineStatusInfo {
+ const char *name;
+ sd_id128_t id;
+ const char *class;
+ const char *service;
+ const char *unit;
+ const char *root_directory;
+ pid_t leader;
+ struct dual_timestamp timestamp;
+ int *netif;
+ size_t n_netif;
+} MachineStatusInfo;
+
+static void machine_status_info_clear(MachineStatusInfo *info) {
+ if (info) {
+ free(info->netif);
+ zero(*info);
+ }
+}
+
+static void print_machine_status_info(sd_bus *bus, MachineStatusInfo *i) {
+ char since1[FORMAT_TIMESTAMP_RELATIVE_MAX];
+ char since2[FORMAT_TIMESTAMP_MAX];
+ _cleanup_free_ char *addresses = NULL;
+ const char *s1, *s2;
+ int ifi = -1;
+
+ assert(bus);
+ assert(i);
+
+ fputs(strna(i->name), stdout);
+
+ if (!sd_id128_is_null(i->id))
+ printf("(" SD_ID128_FORMAT_STR ")\n", SD_ID128_FORMAT_VAL(i->id));
+ else
+ putchar('\n');
+
+ s1 = format_timestamp_relative(since1, sizeof(since1), i->timestamp.realtime);
+ s2 = format_timestamp(since2, sizeof(since2), i->timestamp.realtime);
+
+ if (s1)
+ printf("\t Since: %s; %s\n", s2, s1);
+ else if (s2)
+ printf("\t Since: %s\n", s2);
+
+ if (i->leader > 0) {
+ _cleanup_free_ char *t = NULL;
+
+ printf("\t Leader: %u", (unsigned) i->leader);
+
+ get_process_comm(i->leader, &t);
+ if (t)
+ printf(" (%s)", t);
+
+ putchar('\n');
+ }
+
+ if (i->service) {
+ printf("\t Service: %s", i->service);
+
+ if (i->class)
+ printf("; class %s", i->class);
+
+ putchar('\n');
+ } else if (i->class)
+ printf("\t Class: %s\n", i->class);
+
+ if (i->root_directory)
+ printf("\t Root: %s\n", i->root_directory);
+
+ if (i->n_netif > 0) {
+ size_t c;
+
+ fputs("\t Iface:", stdout);
+
+ for (c = 0; c < i->n_netif; c++) {
+ char name[IF_NAMESIZE+1] = "";
+
+ if (if_indextoname(i->netif[c], name)) {
+ fputc(' ', stdout);
+ fputs(name, stdout);
+
+ if (ifi < 0)
+ ifi = i->netif[c];
+ else
+ ifi = 0;
+ } else
+ printf(" %i", i->netif[c]);
+ }
+
+ fputc('\n', stdout);
+ }
+
+ if (call_get_addresses(bus, i->name, ifi,
+ "\t Address: ", "\n\t ", ALL_IP_ADDRESSES,
+ &addresses) > 0) {
+ fputs(addresses, stdout);
+ fputc('\n', stdout);
+ }
+
+ print_os_release(bus, "GetMachineOSRelease", i->name, "\t OS: ");
+
+ print_uid_shift(bus, i->name);
+
+ if (i->unit) {
+ printf("\t Unit: %s\n", i->unit);
+ show_unit_cgroup(bus, i->unit, i->leader);
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL)
+
+ show_journal_by_unit(
+ stdout,
+ i->unit,
+ arg_output,
+ 0,
+ i->timestamp.monotonic,
+ arg_lines,
+ 0,
+ get_output_flags() | OUTPUT_BEGIN_NEWLINE,
+ SD_JOURNAL_LOCAL_ONLY,
+ true,
+ NULL);
+ }
+}
+
+static int map_netif(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ MachineStatusInfo *i = userdata;
+ size_t l;
+ const void *v;
+ int r;
+
+ assert_cc(sizeof(int32_t) == sizeof(int));
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_INT32, &v, &l);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBADMSG;
+
+ i->n_netif = l / sizeof(int32_t);
+ i->netif = memdup(v, l);
+ if (!i->netif)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int show_machine_info(const char *verb, sd_bus *bus, const char *path, bool *new_line) {
+
+ static const struct bus_properties_map map[] = {
+ { "Name", "s", NULL, offsetof(MachineStatusInfo, name) },
+ { "Class", "s", NULL, offsetof(MachineStatusInfo, class) },
+ { "Service", "s", NULL, offsetof(MachineStatusInfo, service) },
+ { "Unit", "s", NULL, offsetof(MachineStatusInfo, unit) },
+ { "RootDirectory", "s", NULL, offsetof(MachineStatusInfo, root_directory) },
+ { "Leader", "u", NULL, offsetof(MachineStatusInfo, leader) },
+ { "Timestamp", "t", NULL, offsetof(MachineStatusInfo, timestamp.realtime) },
+ { "TimestampMonotonic", "t", NULL, offsetof(MachineStatusInfo, timestamp.monotonic) },
+ { "Id", "ay", bus_map_id128, offsetof(MachineStatusInfo, id) },
+ { "NetworkInterfaces", "ai", map_netif, 0 },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(machine_status_info_clear) MachineStatusInfo info = {};
+ int r;
+
+ assert(verb);
+ assert(bus);
+ assert(path);
+ assert(new_line);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.machine1",
+ path,
+ map,
+ 0,
+ &error,
+ &m,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ if (*new_line)
+ printf("\n");
+ *new_line = true;
+
+ print_machine_status_info(bus, &info);
+
+ return r;
+}
+
+static int show_machine_properties(sd_bus *bus, const char *path, bool *new_line) {
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(new_line);
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ r = bus_print_all_properties(bus, "org.freedesktop.machine1", path, NULL, arg_property, arg_value, arg_all, NULL);
+ if (r < 0)
+ log_error_errno(r, "Could not get properties: %m");
+
+ return r;
+}
+
+static int show_machine(int argc, char *argv[], void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ bool properties, new_line = false;
+ sd_bus *bus = userdata;
+ int r = 0, i;
+
+ assert(bus);
+
+ properties = !strstr(argv[0], "status");
+
+ (void) pager_open(arg_pager_flags);
+
+ if (properties && argc <= 1) {
+
+ /* If no argument is specified, inspect the manager
+ * itself */
+ r = show_machine_properties(bus, "/org/freedesktop/machine1", &new_line);
+ if (r < 0)
+ return r;
+ }
+
+ for (i = 1; i < argc; i++) {
+ const char *path = NULL;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "GetMachine",
+ &error,
+ &reply,
+ "s", argv[i]);
+ if (r < 0)
+ return log_error_errno(r, "Could not get path to machine: %s", bus_error_message(&error, -r));
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (properties)
+ r = show_machine_properties(bus, path, &new_line);
+ else
+ r = show_machine_info(argv[0], bus, path, &new_line);
+ }
+
+ return r;
+}
+
+static int print_image_hostname(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *hn;
+ int r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "GetImageHostname",
+ NULL, &reply, "s", name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "s", &hn);
+ if (r < 0)
+ return r;
+
+ if (!isempty(hn))
+ printf("\tHostname: %s\n", hn);
+
+ return 0;
+}
+
+static int print_image_machine_id(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ sd_id128_t id = SD_ID128_NULL;
+ const void *p;
+ size_t size;
+ int r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "GetImageMachineID",
+ NULL, &reply, "s", name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(reply, 'y', &p, &size);
+ if (r < 0)
+ return r;
+
+ if (size == sizeof(sd_id128_t))
+ memcpy(&id, p, size);
+
+ if (!sd_id128_is_null(id))
+ printf(" Machine ID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(id));
+
+ return 0;
+}
+
+static int print_image_machine_info(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "GetImageMachineInfo",
+ NULL, &reply, "s", name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, 'a', "{ss}");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *p, *q;
+
+ r = sd_bus_message_read(reply, "{ss}", &p, &q);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (streq(p, "DEPLOYMENT"))
+ printf(" Deployment: %s\n", q);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+typedef struct ImageStatusInfo {
+ const char *name;
+ const char *path;
+ const char *type;
+ bool read_only;
+ usec_t crtime;
+ usec_t mtime;
+ uint64_t usage;
+ uint64_t limit;
+ uint64_t usage_exclusive;
+ uint64_t limit_exclusive;
+} ImageStatusInfo;
+
+static void print_image_status_info(sd_bus *bus, ImageStatusInfo *i) {
+ char ts_relative[FORMAT_TIMESTAMP_RELATIVE_MAX];
+ char ts_absolute[FORMAT_TIMESTAMP_MAX];
+ char bs[FORMAT_BYTES_MAX];
+ char bs_exclusive[FORMAT_BYTES_MAX];
+ const char *s1, *s2, *s3, *s4;
+
+ assert(bus);
+ assert(i);
+
+ if (i->name) {
+ fputs(i->name, stdout);
+ putchar('\n');
+ }
+
+ if (i->type)
+ printf("\t Type: %s\n", i->type);
+
+ if (i->path)
+ printf("\t Path: %s\n", i->path);
+
+ (void) print_image_hostname(bus, i->name);
+ (void) print_image_machine_id(bus, i->name);
+ (void) print_image_machine_info(bus, i->name);
+
+ print_os_release(bus, "GetImageOSRelease", i->name, "\t OS: ");
+
+ printf("\t RO: %s%s%s\n",
+ i->read_only ? ansi_highlight_red() : "",
+ i->read_only ? "read-only" : "writable",
+ i->read_only ? ansi_normal() : "");
+
+ s1 = format_timestamp_relative(ts_relative, sizeof(ts_relative), i->crtime);
+ s2 = format_timestamp(ts_absolute, sizeof(ts_absolute), i->crtime);
+ if (s1 && s2)
+ printf("\t Created: %s; %s\n", s2, s1);
+ else if (s2)
+ printf("\t Created: %s\n", s2);
+
+ s1 = format_timestamp_relative(ts_relative, sizeof(ts_relative), i->mtime);
+ s2 = format_timestamp(ts_absolute, sizeof(ts_absolute), i->mtime);
+ if (s1 && s2)
+ printf("\tModified: %s; %s\n", s2, s1);
+ else if (s2)
+ printf("\tModified: %s\n", s2);
+
+ s3 = format_bytes(bs, sizeof(bs), i->usage);
+ s4 = i->usage_exclusive != i->usage ? format_bytes(bs_exclusive, sizeof(bs_exclusive), i->usage_exclusive) : NULL;
+ if (s3 && s4)
+ printf("\t Usage: %s (exclusive: %s)\n", s3, s4);
+ else if (s3)
+ printf("\t Usage: %s\n", s3);
+
+ s3 = format_bytes(bs, sizeof(bs), i->limit);
+ s4 = i->limit_exclusive != i->limit ? format_bytes(bs_exclusive, sizeof(bs_exclusive), i->limit_exclusive) : NULL;
+ if (s3 && s4)
+ printf("\t Limit: %s (exclusive: %s)\n", s3, s4);
+ else if (s3)
+ printf("\t Limit: %s\n", s3);
+}
+
+static int show_image_info(sd_bus *bus, const char *path, bool *new_line) {
+
+ static const struct bus_properties_map map[] = {
+ { "Name", "s", NULL, offsetof(ImageStatusInfo, name) },
+ { "Path", "s", NULL, offsetof(ImageStatusInfo, path) },
+ { "Type", "s", NULL, offsetof(ImageStatusInfo, type) },
+ { "ReadOnly", "b", NULL, offsetof(ImageStatusInfo, read_only) },
+ { "CreationTimestamp", "t", NULL, offsetof(ImageStatusInfo, crtime) },
+ { "ModificationTimestamp", "t", NULL, offsetof(ImageStatusInfo, mtime) },
+ { "Usage", "t", NULL, offsetof(ImageStatusInfo, usage) },
+ { "Limit", "t", NULL, offsetof(ImageStatusInfo, limit) },
+ { "UsageExclusive", "t", NULL, offsetof(ImageStatusInfo, usage_exclusive) },
+ { "LimitExclusive", "t", NULL, offsetof(ImageStatusInfo, limit_exclusive) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ ImageStatusInfo info = {};
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(new_line);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.machine1",
+ path,
+ map,
+ BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ &m,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ if (*new_line)
+ printf("\n");
+ *new_line = true;
+
+ print_image_status_info(bus, &info);
+
+ return r;
+}
+
+typedef struct PoolStatusInfo {
+ const char *path;
+ uint64_t usage;
+ uint64_t limit;
+} PoolStatusInfo;
+
+static void print_pool_status_info(sd_bus *bus, PoolStatusInfo *i) {
+ char bs[FORMAT_BYTES_MAX], *s;
+
+ if (i->path)
+ printf("\t Path: %s\n", i->path);
+
+ s = format_bytes(bs, sizeof(bs), i->usage);
+ if (s)
+ printf("\t Usage: %s\n", s);
+
+ s = format_bytes(bs, sizeof(bs), i->limit);
+ if (s)
+ printf("\t Limit: %s\n", s);
+}
+
+static int show_pool_info(sd_bus *bus) {
+
+ static const struct bus_properties_map map[] = {
+ { "PoolPath", "s", NULL, offsetof(PoolStatusInfo, path) },
+ { "PoolUsage", "t", NULL, offsetof(PoolStatusInfo, usage) },
+ { "PoolLimit", "t", NULL, offsetof(PoolStatusInfo, limit) },
+ {}
+ };
+
+ PoolStatusInfo info = {
+ .usage = (uint64_t) -1,
+ .limit = (uint64_t) -1,
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ map,
+ 0,
+ &error,
+ &m,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ print_pool_status_info(bus, &info);
+
+ return 0;
+}
+
+static int show_image_properties(sd_bus *bus, const char *path, bool *new_line) {
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(new_line);
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ r = bus_print_all_properties(bus, "org.freedesktop.machine1", path, NULL, arg_property, arg_value, arg_all, NULL);
+ if (r < 0)
+ log_error_errno(r, "Could not get properties: %m");
+
+ return r;
+}
+
+static int show_image(int argc, char *argv[], void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ bool properties, new_line = false;
+ sd_bus *bus = userdata;
+ int r = 0, i;
+
+ assert(bus);
+
+ properties = !strstr(argv[0], "status");
+
+ (void) pager_open(arg_pager_flags);
+
+ if (argc <= 1) {
+
+ /* If no argument is specified, inspect the manager
+ * itself */
+
+ if (properties)
+ r = show_image_properties(bus, "/org/freedesktop/machine1", &new_line);
+ else
+ r = show_pool_info(bus);
+ if (r < 0)
+ return r;
+ }
+
+ for (i = 1; i < argc; i++) {
+ const char *path = NULL;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "GetImage",
+ &error,
+ &reply,
+ "s", argv[i]);
+ if (r < 0)
+ return log_error_errno(r, "Could not get path to image: %s", bus_error_message(&error, -r));
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (properties)
+ r = show_image_properties(bus, path, &new_line);
+ else
+ r = show_image_info(bus, path, &new_line);
+ }
+
+ return r;
+}
+
+static int kill_machine(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (!arg_kill_who)
+ arg_kill_who = "all";
+
+ for (i = 1; i < argc; i++) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "KillMachine",
+ &error,
+ NULL,
+ "ssi", argv[i], arg_kill_who, arg_signal);
+ if (r < 0)
+ return log_error_errno(r, "Could not kill machine: %s", bus_error_message(&error, -r));
+ }
+
+ return 0;
+}
+
+static int reboot_machine(int argc, char *argv[], void *userdata) {
+ arg_kill_who = "leader";
+ arg_signal = SIGINT; /* sysvinit + systemd */
+
+ return kill_machine(argc, argv, userdata);
+}
+
+static int poweroff_machine(int argc, char *argv[], void *userdata) {
+ arg_kill_who = "leader";
+ arg_signal = SIGRTMIN+4; /* only systemd */
+
+ return kill_machine(argc, argv, userdata);
+}
+
+static int terminate_machine(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (i = 1; i < argc; i++) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "TerminateMachine",
+ &error,
+ NULL,
+ "s", argv[i]);
+ if (r < 0)
+ return log_error_errno(r, "Could not terminate machine: %s", bus_error_message(&error, -r));
+ }
+
+ return 0;
+}
+
+static int copy_files(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *abs_host_path = NULL;
+ char *dest, *host_path, *container_path;
+ sd_bus *bus = userdata;
+ bool copy_from;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ copy_from = streq(argv[0], "copy-from");
+ dest = argv[3] ?: argv[2];
+ host_path = copy_from ? dest : argv[2];
+ container_path = copy_from ? argv[2] : dest;
+
+ if (!path_is_absolute(host_path)) {
+ r = path_make_absolute_cwd(host_path, &abs_host_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make path absolute: %m");
+
+ host_path = abs_host_path;
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ copy_from ? "CopyFromMachine" : "CopyToMachine");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "sss",
+ argv[1],
+ copy_from ? container_path : host_path,
+ copy_from ? host_path : container_path);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* This is a slow operation, hence turn off any method call timeouts */
+ r = sd_bus_call(bus, m, USEC_INFINITY, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int bind_mount(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "BindMountMachine",
+ &error,
+ NULL,
+ "sssbb",
+ argv[1],
+ argv[2],
+ argv[3],
+ arg_read_only,
+ arg_mkdir);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind mount: %s", bus_error_message(&error, -r));
+
+ return 0;
+}
+
+static int on_machine_removed(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ PTYForward ** forward = (PTYForward**) userdata;
+ int r;
+
+ assert(m);
+ assert(forward);
+
+ if (*forward) {
+ /* If the forwarder is already initialized, tell it to
+ * exit on the next vhangup(), so that we still flush
+ * out what might be queued and exit then. */
+
+ r = pty_forward_set_ignore_vhangup(*forward, false);
+ if (r >= 0)
+ return 0;
+
+ log_error_errno(r, "Failed to set ignore_vhangup flag: %m");
+ }
+
+ /* On error, or when the forwarder is not initialized yet, quit immediately */
+ sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), EXIT_FAILURE);
+ return 0;
+}
+
+static int process_forward(sd_event *event, PTYForward **forward, int master, PTYForwardFlags flags, const char *name) {
+ char last_char = 0;
+ bool machine_died;
+ int r;
+
+ assert(event);
+ assert(master >= 0);
+ assert(name);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGWINCH, SIGTERM, SIGINT, -1) >= 0);
+
+ if (!arg_quiet) {
+ if (streq(name, ".host"))
+ log_info("Connected to the local host. Press ^] three times within 1s to exit session.");
+ else
+ log_info("Connected to machine %s. Press ^] three times within 1s to exit session.", name);
+ }
+
+ (void) sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+
+ r = pty_forward_new(event, master, flags, forward);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create PTY forwarder: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ pty_forward_get_last_char(*forward, &last_char);
+
+ machine_died =
+ (flags & PTY_FORWARD_IGNORE_VHANGUP) &&
+ pty_forward_get_ignore_vhangup(*forward) == 0;
+
+ *forward = pty_forward_free(*forward);
+
+ if (last_char != '\n')
+ fputc('\n', stdout);
+
+ if (!arg_quiet) {
+ if (machine_died)
+ log_info("Machine %s terminated.", name);
+ else if (streq(name, ".host"))
+ log_info("Connection to the local host terminated.");
+ else
+ log_info("Connection to machine %s terminated.", name);
+ }
+
+ return 0;
+}
+
+static int parse_machine_uid(const char *spec, const char **machine, char **uid) {
+ /*
+ * Whatever is specified in the spec takes priority over global arguments.
+ */
+ char *_uid = NULL;
+ const char *_machine = NULL;
+
+ if (spec) {
+ const char *at;
+
+ at = strchr(spec, '@');
+ if (at) {
+ if (at == spec)
+ /* Do the same as ssh and refuse "@host". */
+ return -EINVAL;
+
+ _machine = at + 1;
+ _uid = strndup(spec, at - spec);
+ if (!_uid)
+ return -ENOMEM;
+ } else
+ _machine = spec;
+ };
+
+ if (arg_uid && !_uid) {
+ _uid = strdup(arg_uid);
+ if (!_uid)
+ return -ENOMEM;
+ }
+
+ *uid = _uid;
+ *machine = isempty(_machine) ? ".host" : _machine;
+ return 0;
+}
+
+static int login_machine(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *slot = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ int master = -1, r;
+ sd_bus *bus = userdata;
+ const char *match, *machine;
+
+ assert(bus);
+
+ if (!strv_isempty(arg_setenv) || arg_uid) {
+ log_error("--setenv= and --uid= are not supported for 'login'. Use 'shell' instead.");
+ return -EINVAL;
+ }
+
+ if (!IN_SET(arg_transport, BUS_TRANSPORT_LOCAL, BUS_TRANSPORT_MACHINE)) {
+ log_error("Login only supported on local machines.");
+ return -EOPNOTSUPP;
+ }
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get event loop: %m");
+
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ machine = argc < 2 || isempty(argv[1]) ? ".host" : argv[1];
+
+ match = strjoina("type='signal',"
+ "sender='org.freedesktop.machine1',"
+ "path='/org/freedesktop/machine1',",
+ "interface='org.freedesktop.machine1.Manager',"
+ "member='MachineRemoved',"
+ "arg0='", machine, "'");
+
+ r = sd_bus_add_match_async(bus, &slot, match, on_machine_removed, NULL, &forward);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request machine removal match: %m");
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "OpenMachineLogin",
+ &error,
+ &reply,
+ "s", machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get login PTY: %s", bus_error_message(&error, -r));
+
+ r = sd_bus_message_read(reply, "hs", &master, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return process_forward(event, &forward, master, PTY_FORWARD_IGNORE_VHANGUP, machine);
+}
+
+static int shell_machine(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *slot = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ int master = -1, r;
+ sd_bus *bus = userdata;
+ const char *match, *machine, *path;
+ _cleanup_free_ char *uid = NULL;
+
+ assert(bus);
+
+ if (!IN_SET(arg_transport, BUS_TRANSPORT_LOCAL, BUS_TRANSPORT_MACHINE)) {
+ log_error("Shell only supported on local machines.");
+ return -EOPNOTSUPP;
+ }
+
+ /* Pass $TERM to shell session, if not explicitly specified. */
+ if (!strv_find_prefix(arg_setenv, "TERM=")) {
+ const char *t;
+
+ t = strv_find_prefix(environ, "TERM=");
+ if (t) {
+ if (strv_extend(&arg_setenv, t) < 0)
+ return log_oom();
+ }
+ }
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get event loop: %m");
+
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ r = parse_machine_uid(argc >= 2 ? argv[1] : NULL, &machine, &uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse machine specification: %m");
+
+ match = strjoina("type='signal',"
+ "sender='org.freedesktop.machine1',"
+ "path='/org/freedesktop/machine1',",
+ "interface='org.freedesktop.machine1.Manager',"
+ "member='MachineRemoved',"
+ "arg0='", machine, "'");
+
+ r = sd_bus_add_match_async(bus, &slot, match, on_machine_removed, NULL, &forward);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request machine removal match: %m");
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "OpenMachineShell");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ path = argc < 3 || isempty(argv[2]) ? NULL : argv[2];
+
+ r = sd_bus_message_append(m, "sss", machine, uid, path);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, strv_length(argv) <= 3 ? NULL : argv + 2);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, arg_setenv);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get shell PTY: %s", bus_error_message(&error, -r));
+
+ r = sd_bus_message_read(reply, "hs", &master, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return process_forward(event, &forward, master, 0, machine);
+}
+
+static int remove_image(int argc, char *argv[], void *userdata) {
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (i = 1; i < argc; i++) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "RemoveImage");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", argv[i]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* This is a slow operation, hence turn off any method call timeouts */
+ r = sd_bus_call(bus, m, USEC_INFINITY, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not remove image: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int rename_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "RenameImage",
+ &error,
+ NULL,
+ "ss", argv[1], argv[2]);
+ if (r < 0)
+ return log_error_errno(r, "Could not rename image: %s", bus_error_message(&error, -r));
+
+ return 0;
+}
+
+static int clone_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "CloneImage");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "ssb", argv[1], argv[2], arg_read_only);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* This is a slow operation, hence turn off any method call timeouts */
+ r = sd_bus_call(bus, m, USEC_INFINITY, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not clone image: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int read_only_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int b = true, r;
+
+ assert(bus);
+
+ if (argc > 2) {
+ b = parse_boolean(argv[2]);
+ if (b < 0) {
+ log_error("Failed to parse boolean argument: %s", argv[2]);
+ return -EINVAL;
+ }
+ }
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "MarkImageReadOnly",
+ &error,
+ NULL,
+ "sb", argv[1], b);
+ if (r < 0)
+ return log_error_errno(r, "Could not mark image read-only: %s", bus_error_message(&error, -r));
+
+ return 0;
+}
+
+static int image_exists(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(name);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "GetImage",
+ &error,
+ NULL,
+ "s", name);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_IMAGE))
+ return 0;
+
+ return log_error_errno(r, "Failed to check whether image %s exists: %s", name, bus_error_message(&error, -r));
+ }
+
+ return 1;
+}
+
+static int make_service_name(const char *name, char **ret) {
+ int r;
+
+ assert(name);
+ assert(ret);
+
+ if (!machine_name_is_valid(name))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid machine name %s.", name);
+
+ r = unit_name_build("systemd-nspawn", name, ".service", ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build unit name: %m");
+
+ return 0;
+}
+
+static int start_machine(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_oom();
+
+ for (i = 1; i < argc; i++) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *unit = NULL;
+ const char *object;
+
+ r = make_service_name(argv[i], &unit);
+ if (r < 0)
+ return r;
+
+ r = image_exists(bus, argv[i]);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ log_error("Machine image '%s' does not exist.", argv[i]);
+ return -ENXIO;
+ }
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnit",
+ &error,
+ &reply,
+ "ss", unit, "fail");
+ if (r < 0)
+ return log_error_errno(r, "Failed to start unit: %s", bus_error_message(&error, -r));
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_add(w, object);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = bus_wait_for_jobs(w, arg_quiet, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int enable_machine(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ const char *method = NULL;
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ method = streq(argv[0], "enable") ? "EnableUnitFiles" : "DisableUnitFiles";
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ method);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (i = 1; i < argc; i++) {
+ _cleanup_free_ char *unit = NULL;
+
+ r = make_service_name(argv[i], &unit);
+ if (r < 0)
+ return r;
+
+ r = image_exists(bus, argv[i]);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ log_error("Machine image '%s' does not exist.", argv[i]);
+ return -ENXIO;
+ }
+
+ r = sd_bus_message_append(m, "s", unit);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (streq(argv[0], "enable"))
+ r = sd_bus_message_append(m, "bb", false, false);
+ else
+ r = sd_bus_message_append(m, "b", false);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable or disable unit: %s", bus_error_message(&error, -r));
+
+ if (streq(argv[0], "enable")) {
+ r = sd_bus_message_read(reply, "b", NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ r = bus_deserialize_and_dump_unit_file_changes(reply, arg_quiet, &changes, &n_changes);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Reload",
+ &error,
+ NULL,
+ NULL);
+ if (r < 0) {
+ log_error("Failed to reload daemon: %s", bus_error_message(&error, -r));
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ unit_file_changes_free(changes, n_changes);
+
+ return r;
+}
+
+static int match_log_message(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ const char **our_path = userdata, *line;
+ unsigned priority;
+ int r;
+
+ assert(m);
+ assert(our_path);
+
+ r = sd_bus_message_read(m, "us", &priority, &line);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (!streq_ptr(*our_path, sd_bus_message_get_path(m)))
+ return 0;
+
+ if (arg_quiet && LOG_PRI(priority) >= LOG_INFO)
+ return 0;
+
+ log_full(priority, "%s", line);
+ return 0;
+}
+
+static int match_transfer_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ const char **our_path = userdata, *path, *result;
+ uint32_t id;
+ int r;
+
+ assert(m);
+ assert(our_path);
+
+ r = sd_bus_message_read(m, "uos", &id, &path, &result);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (!streq_ptr(*our_path, path))
+ return 0;
+
+ sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), !streq_ptr(result, "done"));
+ return 0;
+}
+
+static int transfer_signal_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ assert(s);
+ assert(si);
+
+ if (!arg_quiet)
+ log_info("Continuing download in the background. Use \"machinectl cancel-transfer %" PRIu32 "\" to abort transfer.", PTR_TO_UINT32(userdata));
+
+ sd_event_exit(sd_event_source_get_event(s), EINTR);
+ return 0;
+}
+
+static int transfer_image_common(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *slot_job_removed = NULL, *slot_log_message = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_event_unrefp) sd_event* event = NULL;
+ const char *path = NULL;
+ uint32_t id;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get event loop: %m");
+
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ r = sd_bus_match_signal_async(
+ bus,
+ &slot_job_removed,
+ "org.freedesktop.import1",
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "TransferRemoved",
+ match_transfer_removed, NULL, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match: %m");
+
+ r = sd_bus_match_signal_async(
+ bus,
+ &slot_log_message,
+ "org.freedesktop.import1",
+ NULL,
+ "org.freedesktop.import1.Transfer",
+ "LogMessage",
+ match_log_message, NULL, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match: %m");
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transfer image: %s", bus_error_message(&error, -r));
+
+ r = sd_bus_message_read(reply, "uo", &id, &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ if (!arg_quiet)
+ log_info("Enqueued transfer job %u. Press C-c to continue download in background.", id);
+
+ (void) sd_event_add_signal(event, NULL, SIGINT, transfer_signal_handler, UINT32_TO_PTR(id));
+ (void) sd_event_add_signal(event, NULL, SIGTERM, transfer_signal_handler, UINT32_TO_PTR(id));
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return -r;
+}
+
+static const char *nullify_dash(const char *p) {
+ if (isempty(p))
+ return NULL;
+
+ if (streq(p, "-"))
+ return NULL;
+
+ return p;
+}
+
+static int import_tar(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *ll = NULL, *fn = NULL;
+ const char *local = NULL, *path = NULL;
+ _cleanup_close_ int fd = -1;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2)
+ path = nullify_dash(argv[1]);
+
+ if (argc >= 3)
+ local = nullify_dash(argv[2]);
+ else if (path) {
+ r = path_extract_filename(path, &fn);
+ if (r < 0)
+ return log_error_errno(r, "Cannot extract container name from filename: %m");
+
+ local = fn;
+ }
+ if (!local) {
+ log_error("Need either path or local name.");
+ return -EINVAL;
+ }
+
+ r = tar_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local)) {
+ log_error("Local name %s is not a suitable machine name.", local);
+ return -EINVAL;
+ }
+
+ if (path) {
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", path);
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.import1",
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "ImportTar");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "hsbb",
+ fd >= 0 ? fd : STDIN_FILENO,
+ local,
+ arg_force,
+ arg_read_only);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+static int import_raw(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *ll = NULL, *fn = NULL;
+ const char *local = NULL, *path = NULL;
+ _cleanup_close_ int fd = -1;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2)
+ path = nullify_dash(argv[1]);
+
+ if (argc >= 3)
+ local = nullify_dash(argv[2]);
+ else if (path) {
+ r = path_extract_filename(path, &fn);
+ if (r < 0)
+ return log_error_errno(r, "Cannot extract container name from filename: %m");
+
+ local = fn;
+ }
+ if (!local) {
+ log_error("Need either path or local name.");
+ return -EINVAL;
+ }
+
+ r = raw_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local)) {
+ log_error("Local name %s is not a suitable machine name.", local);
+ return -EINVAL;
+ }
+
+ if (path) {
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", path);
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.import1",
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "ImportRaw");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "hsbb",
+ fd >= 0 ? fd : STDIN_FILENO,
+ local,
+ arg_force,
+ arg_read_only);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+static int import_fs(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ const char *local = NULL, *path = NULL;
+ _cleanup_free_ char *fn = NULL;
+ _cleanup_close_ int fd = -1;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2)
+ path = nullify_dash(argv[1]);
+
+ if (argc >= 3)
+ local = nullify_dash(argv[2]);
+ else if (path) {
+ r = path_extract_filename(path, &fn);
+ if (r < 0)
+ return log_error_errno(r, "Cannot extract container name from filename: %m");
+
+ local = fn;
+ }
+ if (!local) {
+ log_error("Need either path or local name.");
+ return -EINVAL;
+ }
+
+ if (!machine_name_is_valid(local)) {
+ log_error("Local name %s is not a suitable machine name.", local);
+ return -EINVAL;
+ }
+
+ if (path) {
+ fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open directory '%s': %m", path);
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.import1",
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "ImportFileSystem");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "hsbb",
+ fd >= 0 ? fd : STDIN_FILENO,
+ local,
+ arg_force,
+ arg_read_only);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+static void determine_compression_from_filename(const char *p) {
+ if (arg_format)
+ return;
+
+ if (!p)
+ return;
+
+ if (endswith(p, ".xz"))
+ arg_format = "xz";
+ else if (endswith(p, ".gz"))
+ arg_format = "gzip";
+ else if (endswith(p, ".bz2"))
+ arg_format = "bzip2";
+}
+
+static int export_tar(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *local = NULL, *path = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ local = argv[1];
+ if (!machine_name_is_valid(local)) {
+ log_error("Machine name %s is not valid.", local);
+ return -EINVAL;
+ }
+
+ if (argc >= 3)
+ path = argv[2];
+ if (isempty(path) || streq(path, "-"))
+ path = NULL;
+
+ if (path) {
+ determine_compression_from_filename(path);
+
+ fd = open(path, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC|O_NOCTTY, 0666);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", path);
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.import1",
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "ExportTar");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "shs",
+ local,
+ fd >= 0 ? fd : STDOUT_FILENO,
+ arg_format);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+static int export_raw(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *local = NULL, *path = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ local = argv[1];
+ if (!machine_name_is_valid(local)) {
+ log_error("Machine name %s is not valid.", local);
+ return -EINVAL;
+ }
+
+ if (argc >= 3)
+ path = argv[2];
+ if (isempty(path) || streq(path, "-"))
+ path = NULL;
+
+ if (path) {
+ determine_compression_from_filename(path);
+
+ fd = open(path, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC|O_NOCTTY, 0666);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", path);
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.import1",
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "ExportRaw");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "shs",
+ local,
+ fd >= 0 ? fd : STDOUT_FILENO,
+ arg_format);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+static int pull_tar(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *l = NULL, *ll = NULL;
+ const char *local, *remote;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ remote = argv[1];
+ if (!http_url_is_valid(remote)) {
+ log_error("URL '%s' is not valid.", remote);
+ return -EINVAL;
+ }
+
+ if (argc >= 3)
+ local = argv[2];
+ else {
+ r = import_url_last_component(remote, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get final component of URL: %m");
+
+ local = l;
+ }
+
+ if (isempty(local) || streq(local, "-"))
+ local = NULL;
+
+ if (local) {
+ r = tar_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local)) {
+ log_error("Local name %s is not a suitable machine name.", local);
+ return -EINVAL;
+ }
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.import1",
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "PullTar");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "sssb",
+ remote,
+ local,
+ import_verify_to_string(arg_verify),
+ arg_force);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+static int pull_raw(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *l = NULL, *ll = NULL;
+ const char *local, *remote;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ remote = argv[1];
+ if (!http_url_is_valid(remote)) {
+ log_error("URL '%s' is not valid.", remote);
+ return -EINVAL;
+ }
+
+ if (argc >= 3)
+ local = argv[2];
+ else {
+ r = import_url_last_component(remote, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get final component of URL: %m");
+
+ local = l;
+ }
+
+ if (isempty(local) || streq(local, "-"))
+ local = NULL;
+
+ if (local) {
+ r = raw_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local)) {
+ log_error("Local name %s is not a suitable machine name.", local);
+ return -EINVAL;
+ }
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.import1",
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "PullRaw");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "sssb",
+ remote,
+ local,
+ import_verify_to_string(arg_verify),
+ arg_force);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+typedef struct TransferInfo {
+ uint32_t id;
+ const char *type;
+ const char *remote;
+ const char *local;
+ double progress;
+} TransferInfo;
+
+static int compare_transfer_info(const TransferInfo *a, const TransferInfo *b) {
+ return strcmp(a->local, b->local);
+}
+
+static int list_transfers(int argc, char *argv[], void *userdata) {
+ size_t max_type = STRLEN("TYPE"), max_local = STRLEN("LOCAL"), max_remote = STRLEN("REMOTE");
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ TransferInfo *transfers = NULL;
+ size_t n_transfers = 0, n_allocated = 0, j;
+ const char *type, *remote, *local;
+ sd_bus *bus = userdata;
+ uint32_t id, max_id = 0;
+ double progress;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.import1",
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "ListTransfers",
+ &error,
+ &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not get transfers: %s", bus_error_message(&error, -r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "(usssdo)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(usssdo)", &id, &type, &remote, &local, &progress, NULL)) > 0) {
+ size_t l;
+
+ if (!GREEDY_REALLOC(transfers, n_allocated, n_transfers + 1))
+ return log_oom();
+
+ transfers[n_transfers].id = id;
+ transfers[n_transfers].type = type;
+ transfers[n_transfers].remote = remote;
+ transfers[n_transfers].local = local;
+ transfers[n_transfers].progress = progress;
+
+ l = strlen(type);
+ if (l > max_type)
+ max_type = l;
+
+ l = strlen(remote);
+ if (l > max_remote)
+ max_remote = l;
+
+ l = strlen(local);
+ if (l > max_local)
+ max_local = l;
+
+ if (id > max_id)
+ max_id = id;
+
+ n_transfers++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ typesafe_qsort(transfers, n_transfers, compare_transfer_info);
+
+ if (arg_legend && n_transfers > 0)
+ printf("%-*s %-*s %-*s %-*s %-*s\n",
+ (int) MAX(2U, DECIMAL_STR_WIDTH(max_id)), "ID",
+ (int) 7, "PERCENT",
+ (int) max_type, "TYPE",
+ (int) max_local, "LOCAL",
+ (int) max_remote, "REMOTE");
+
+ for (j = 0; j < n_transfers; j++)
+
+ if (transfers[j].progress < 0)
+ printf("%*" PRIu32 " %*s %-*s %-*s %-*s\n",
+ (int) MAX(2U, DECIMAL_STR_WIDTH(max_id)), transfers[j].id,
+ (int) 7, "n/a",
+ (int) max_type, transfers[j].type,
+ (int) max_local, transfers[j].local,
+ (int) max_remote, transfers[j].remote);
+ else
+ printf("%*" PRIu32 " %*u%% %-*s %-*s %-*s\n",
+ (int) MAX(2U, DECIMAL_STR_WIDTH(max_id)), transfers[j].id,
+ (int) 6, (unsigned) (transfers[j].progress * 100),
+ (int) max_type, transfers[j].type,
+ (int) max_local, transfers[j].local,
+ (int) max_remote, transfers[j].remote);
+
+ if (arg_legend) {
+ if (n_transfers > 0)
+ printf("\n%zu transfers listed.\n", n_transfers);
+ else
+ printf("No transfers.\n");
+ }
+
+ return 0;
+}
+
+static int cancel_transfer(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r, i;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (i = 1; i < argc; i++) {
+ uint32_t id;
+
+ r = safe_atou32(argv[i], &id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse transfer id: %s", argv[i]);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.import1",
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "CancelTransfer",
+ &error,
+ NULL,
+ "u", id);
+ if (r < 0)
+ return log_error_errno(r, "Could not cancel transfer: %s", bus_error_message(&error, -r));
+ }
+
+ return 0;
+}
+
+static int set_limit(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ uint64_t limit;
+ int r;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (STR_IN_SET(argv[argc-1], "-", "none", "infinity"))
+ limit = (uint64_t) -1;
+ else {
+ r = parse_size(argv[argc-1], 1024, &limit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse size: %s", argv[argc-1]);
+ }
+
+ if (argc > 2)
+ /* With two arguments changes the quota limit of the
+ * specified image */
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "SetImageLimit",
+ &error,
+ NULL,
+ "st", argv[1], limit);
+ else
+ /* With one argument changes the pool quota limit */
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "SetPoolLimit",
+ &error,
+ NULL,
+ "t", limit);
+
+ if (r < 0)
+ return log_error_errno(r, "Could not set limit: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int clean_images(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ uint64_t usage, total = 0;
+ char fb[FORMAT_BYTES_MAX];
+ sd_bus *bus = userdata;
+ const char *name;
+ unsigned c = 0;
+ int r;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "CleanPool");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", arg_all ? "all" : "hidden");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* This is a slow operation, hence permit a longer time for completion. */
+ r = sd_bus_call(bus, m, USEC_INFINITY, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Could not clean pool: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "(st)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(st)", &name, &usage)) > 0) {
+ log_info("Removed image '%s'. Freed exclusive disk space: %s",
+ name, format_bytes(fb, sizeof(fb), usage));
+
+ total += usage;
+ c++;
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ log_info("Removed %u images in total. Total freed exclusive disk space %s.",
+ c, format_bytes(fb, sizeof(fb), total));
+
+ return 0;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("machinectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Send control commands to or query the virtual machine and container\n"
+ "registration manager.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " --no-ask-password Do not ask for system passwords\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " -p --property=NAME Show only properties by this name\n"
+ " -q --quiet Suppress output\n"
+ " -a --all Show all properties, including empty ones\n"
+ " --value When showing properties, only print the value\n"
+ " -l --full Do not ellipsize output\n"
+ " --kill-who=WHO Who to send signal to\n"
+ " -s --signal=SIGNAL Which signal to send\n"
+ " --uid=USER Specify user ID to invoke shell as\n"
+ " -E --setenv=VAR=VALUE Add an environment variable for shell\n"
+ " --read-only Create read-only bind mount\n"
+ " --mkdir Create directory before bind mounting, if missing\n"
+ " -n --lines=INTEGER Number of journal entries to show\n"
+ " --max-addresses=INTEGER Number of internet addresses to show at most\n"
+ " -o --output=STRING Change journal output mode (short, short-precise,\n"
+ " short-iso, short-iso-precise, short-full,\n"
+ " short-monotonic, short-unix, verbose, export,\n"
+ " json, json-pretty, json-sse, json-seq, cat,\n"
+ " with-unit)\n"
+ " --verify=MODE Verification mode for downloaded images (no,\n"
+ " checksum, signature)\n"
+ " --force Download image even if already exists\n\n"
+ "Machine Commands:\n"
+ " list List running VMs and containers\n"
+ " status NAME... Show VM/container details\n"
+ " show [NAME...] Show properties of one or more VMs/containers\n"
+ " start NAME... Start container as a service\n"
+ " login [NAME] Get a login prompt in a container or on the\n"
+ " local host\n"
+ " shell [[USER@]NAME [COMMAND...]]\n"
+ " Invoke a shell (or other command) in a container\n"
+ " or on the local host\n"
+ " enable NAME... Enable automatic container start at boot\n"
+ " disable NAME... Disable automatic container start at boot\n"
+ " poweroff NAME... Power off one or more containers\n"
+ " reboot NAME... Reboot one or more containers\n"
+ " terminate NAME... Terminate one or more VMs/containers\n"
+ " kill NAME... Send signal to processes of a VM/container\n"
+ " copy-to NAME PATH [PATH] Copy files from the host to a container\n"
+ " copy-from NAME PATH [PATH] Copy files from a container to the host\n"
+ " bind NAME PATH [PATH] Bind mount a path from the host into a container\n\n"
+ "Image Commands:\n"
+ " list-images Show available container and VM images\n"
+ " image-status [NAME...] Show image details\n"
+ " show-image [NAME...] Show properties of image\n"
+ " clone NAME NAME Clone an image\n"
+ " rename NAME NAME Rename an image\n"
+ " read-only NAME [BOOL] Mark or unmark image read-only\n"
+ " remove NAME... Remove an image\n"
+ " set-limit [NAME] BYTES Set image or pool size limit (disk quota)\n"
+ " clean Remove hidden (or all) images\n\n"
+ "Image Transfer Commands:\n"
+ " pull-tar URL [NAME] Download a TAR container image\n"
+ " pull-raw URL [NAME] Download a RAW container or VM image\n"
+ " import-tar FILE [NAME] Import a local TAR container image\n"
+ " import-raw FILE [NAME] Import a local RAW container or VM image\n"
+ " import-fs DIRECTORY [NAME] Import a local directory container image\n"
+ " export-tar NAME [FILE] Export a TAR container image locally\n"
+ " export-raw NAME [FILE] Export a RAW container or VM image locally\n"
+ " list-transfers Show list of downloads in progress\n"
+ " cancel-transfer Cancel a download\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_VALUE,
+ ARG_KILL_WHO,
+ ARG_READ_ONLY,
+ ARG_MKDIR,
+ ARG_NO_ASK_PASSWORD,
+ ARG_VERIFY,
+ ARG_FORCE,
+ ARG_FORMAT,
+ ARG_UID,
+ ARG_NUMBER_IPS,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "property", required_argument, NULL, 'p' },
+ { "all", no_argument, NULL, 'a' },
+ { "value", no_argument, NULL, ARG_VALUE },
+ { "full", no_argument, NULL, 'l' },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "kill-who", required_argument, NULL, ARG_KILL_WHO },
+ { "signal", required_argument, NULL, 's' },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "read-only", no_argument, NULL, ARG_READ_ONLY },
+ { "mkdir", no_argument, NULL, ARG_MKDIR },
+ { "quiet", no_argument, NULL, 'q' },
+ { "lines", required_argument, NULL, 'n' },
+ { "output", required_argument, NULL, 'o' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "verify", required_argument, NULL, ARG_VERIFY },
+ { "force", no_argument, NULL, ARG_FORCE },
+ { "format", required_argument, NULL, ARG_FORMAT },
+ { "uid", required_argument, NULL, ARG_UID },
+ { "setenv", required_argument, NULL, 'E' },
+ { "max-addresses", required_argument, NULL, ARG_NUMBER_IPS },
+ {}
+ };
+
+ bool reorder = false;
+ int c, r, shell = -1;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ for (;;) {
+ static const char option_string[] = "-hp:als:H:M:qn:o:E:";
+
+ c = getopt_long(argc, argv, option_string + reorder, options, NULL);
+ if (c < 0)
+ break;
+
+ switch (c) {
+
+ case 1: /* getopt_long() returns 1 if "-" was the first character of the option string, and a
+ * non-option argument was discovered. */
+
+ assert(!reorder);
+
+ /* We generally are fine with the fact that getopt_long() reorders the command line, and looks
+ * for switches after the main verb. However, for "shell" we really don't want that, since we
+ * want that switches specified after the machine name are passed to the program to execute,
+ * and not processed by us. To make this possible, we'll first invoke getopt_long() with
+ * reordering disabled (i.e. with the "-" prefix in the option string), looking for the first
+ * non-option parameter. If it's the verb "shell" we remember its position and continue
+ * processing options. In this case, as soon as we hit the next non-option argument we found
+ * the machine name, and stop further processing. If the first non-option argument is any other
+ * verb than "shell" we switch to normal reordering mode and continue processing arguments
+ * normally. */
+
+ if (shell >= 0) {
+ /* If we already found the "shell" verb on the command line, and now found the next
+ * non-option argument, then this is the machine name and we should stop processing
+ * further arguments. */
+ optind --; /* don't process this argument, go one step back */
+ goto done;
+ }
+ if (streq(optarg, "shell"))
+ /* Remember the position of the "shell" verb, and continue processing normally. */
+ shell = optind - 1;
+ else {
+ int saved_optind;
+
+ /* OK, this is some other verb. In this case, turn on reordering again, and continue
+ * processing normally. */
+ reorder = true;
+
+ /* We changed the option string. getopt_long() only looks at it again if we invoke it
+ * at least once with a reset option index. Hence, let's reset the option index here,
+ * then invoke getopt_long() again (ignoring what it has to say, after all we most
+ * likely already processed it), and the bump the option index so that we read the
+ * intended argument again. */
+ saved_optind = optind;
+ optind = 0;
+ (void) getopt_long(argc, argv, option_string + reorder, options, NULL);
+ optind = saved_optind - 1; /* go one step back, process this argument again */
+ }
+
+ break;
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case 'p':
+ r = strv_extend(&arg_property, optarg);
+ if (r < 0)
+ return log_oom();
+
+ /* If the user asked for a particular
+ * property, show it to him, even if it is
+ * empty. */
+ arg_all = true;
+ break;
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case ARG_VALUE:
+ arg_value = true;
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case 'n':
+ if (safe_atou(optarg, &arg_lines) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse lines '%s'", optarg);
+ break;
+
+ case 'o':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(output_mode, OutputMode, _OUTPUT_MODE_MAX);
+ return 0;
+ }
+
+ arg_output = output_mode_from_string(optarg);
+ if (arg_output < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown output '%s'.", optarg);
+
+ if (OUTPUT_MODE_IS_JSON(arg_output))
+ arg_legend = false;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case ARG_KILL_WHO:
+ arg_kill_who = optarg;
+ break;
+
+ case 's':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(signal, int, _NSIG);
+ return 0;
+ }
+
+ arg_signal = signal_from_string(optarg);
+ if (arg_signal < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse signal string %s.", optarg);
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_READ_ONLY:
+ arg_read_only = true;
+ break;
+
+ case ARG_MKDIR:
+ arg_mkdir = true;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case ARG_VERIFY:
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(import_verify, ImportVerify, _IMPORT_VERIFY_MAX);
+ return 0;
+ }
+
+ arg_verify = import_verify_from_string(optarg);
+ if (arg_verify < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --verify= setting: %s", optarg);
+ break;
+
+ case ARG_FORCE:
+ arg_force = true;
+ break;
+
+ case ARG_FORMAT:
+ if (!STR_IN_SET(optarg, "uncompressed", "xz", "gzip", "bzip2"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown format: %s", optarg);
+
+ arg_format = optarg;
+ break;
+
+ case ARG_UID:
+ arg_uid = optarg;
+ break;
+
+ case 'E':
+ if (!env_assignment_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Environment assignment invalid: %s", optarg);
+
+ r = strv_extend(&arg_setenv, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case ARG_NUMBER_IPS:
+ if (streq(optarg, "all"))
+ arg_addrs = ALL_IP_ADDRESSES;
+ else if (safe_atoi(optarg, &arg_addrs) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid number of IPs");
+ else if (arg_addrs < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Number of IPs cannot be negative");
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+ }
+
+done:
+ if (shell >= 0) {
+ char *t;
+ int i;
+
+ /* We found the "shell" verb while processing the argument list. Since we turned off reordering of the
+ * argument list initially let's readjust it now, and move the "shell" verb to the back. */
+
+ optind -= 1; /* place the option index where the "shell" verb will be placed */
+
+ t = argv[shell];
+ for (i = shell; i < optind; i++)
+ argv[i] = argv[i+1];
+ argv[optind] = t;
+ }
+
+ return 1;
+}
+
+static int machinectl_main(int argc, char *argv[], sd_bus *bus) {
+
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "list", VERB_ANY, 1, VERB_DEFAULT, list_machines },
+ { "list-images", VERB_ANY, 1, 0, list_images },
+ { "status", 2, VERB_ANY, 0, show_machine },
+ { "image-status", VERB_ANY, VERB_ANY, 0, show_image },
+ { "show", VERB_ANY, VERB_ANY, 0, show_machine },
+ { "show-image", VERB_ANY, VERB_ANY, 0, show_image },
+ { "terminate", 2, VERB_ANY, 0, terminate_machine },
+ { "reboot", 2, VERB_ANY, 0, reboot_machine },
+ { "poweroff", 2, VERB_ANY, 0, poweroff_machine },
+ { "stop", 2, VERB_ANY, 0, poweroff_machine }, /* Convenience alias */
+ { "kill", 2, VERB_ANY, 0, kill_machine },
+ { "login", VERB_ANY, 2, 0, login_machine },
+ { "shell", VERB_ANY, VERB_ANY, 0, shell_machine },
+ { "bind", 3, 4, 0, bind_mount },
+ { "copy-to", 3, 4, 0, copy_files },
+ { "copy-from", 3, 4, 0, copy_files },
+ { "remove", 2, VERB_ANY, 0, remove_image },
+ { "rename", 3, 3, 0, rename_image },
+ { "clone", 3, 3, 0, clone_image },
+ { "read-only", 2, 3, 0, read_only_image },
+ { "start", 2, VERB_ANY, 0, start_machine },
+ { "enable", 2, VERB_ANY, 0, enable_machine },
+ { "disable", 2, VERB_ANY, 0, enable_machine },
+ { "import-tar", 2, 3, 0, import_tar },
+ { "import-raw", 2, 3, 0, import_raw },
+ { "import-fs", 2, 3, 0, import_fs },
+ { "export-tar", 2, 3, 0, export_tar },
+ { "export-raw", 2, 3, 0, export_raw },
+ { "pull-tar", 2, 3, 0, pull_tar },
+ { "pull-raw", 2, 3, 0, pull_raw },
+ { "list-transfers", VERB_ANY, 1, 0, list_transfers },
+ { "cancel-transfer", 2, VERB_ANY, 0, cancel_transfer },
+ { "set-limit", 2, 3, 0, set_limit },
+ { "clean", VERB_ANY, 1, 0, clean_images },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, bus);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ /* The journal merging logic potentially needs a lot of fds. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ sigbus_install();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, &bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ (void) sd_bus_set_allow_interactive_authorization(bus, arg_ask_password);
+
+ return machinectl_main(argc, argv, bus);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/machine/machined-core.c b/src/machine/machined-core.c
new file mode 100644
index 0000000..6a40480
--- /dev/null
+++ b/src/machine/machined-core.c
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "machined.h"
+#include "nscd-flush.h"
+#include "strv.h"
+
+static int on_nscd_cache_flush_event(sd_event_source *s, void *userdata) {
+ /* Let's ask glibc's nscd daemon to flush its caches. We request this for the three database machines may show
+ * up in: the hosts database (for resolvable machine names) and the user and group databases (for the user ns
+ * ranges). */
+
+ (void) nscd_flush_cache(STRV_MAKE("passwd", "group", "hosts"));
+ return 0;
+}
+
+int manager_enqueue_nscd_cache_flush(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (!m->nscd_cache_flush_event) {
+ r = sd_event_add_defer(m->event, &m->nscd_cache_flush_event, on_nscd_cache_flush_event, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate NSCD cache flush event: %m");
+
+ sd_event_source_set_description(m->nscd_cache_flush_event, "nscd-cache-flush");
+ }
+
+ r = sd_event_source_set_enabled(m->nscd_cache_flush_event, SD_EVENT_ONESHOT);
+ if (r < 0) {
+ m->nscd_cache_flush_event = sd_event_source_unref(m->nscd_cache_flush_event);
+ return log_error_errno(r, "Failed to enable NSCD cache flush event: %m");
+ }
+
+ return 0;
+}
diff --git a/src/machine/machined-dbus.c b/src/machine/machined-dbus.c
new file mode 100644
index 0000000..fea9cc2
--- /dev/null
+++ b/src/machine/machined-dbus.c
@@ -0,0 +1,1583 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "bus-common-errors.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "hostname-util.h"
+#include "image-dbus.h"
+#include "io-util.h"
+#include "machine-dbus.h"
+#include "machine-image.h"
+#include "machine-pool.h"
+#include "machined.h"
+#include "missing_capability.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_pool_path, "s", "/var/lib/machines");
+
+static int property_get_pool_usage(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_ int fd = -1;
+ uint64_t usage = (uint64_t) -1;
+
+ assert(bus);
+ assert(reply);
+
+ fd = open("/var/lib/machines", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd >= 0) {
+ BtrfsQuotaInfo q;
+
+ if (btrfs_subvol_get_subtree_quota_fd(fd, 0, &q) >= 0)
+ usage = q.referenced;
+ }
+
+ return sd_bus_message_append(reply, "t", usage);
+}
+
+static int property_get_pool_limit(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_ int fd = -1;
+ uint64_t size = (uint64_t) -1;
+
+ assert(bus);
+ assert(reply);
+
+ fd = open("/var/lib/machines", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd >= 0) {
+ BtrfsQuotaInfo q;
+
+ if (btrfs_subvol_get_subtree_quota_fd(fd, 0, &q) >= 0)
+ size = q.referenced_max;
+ }
+
+ return sd_bus_message_append(reply, "t", size);
+}
+
+static int method_get_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ Machine *machine;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ machine = hashmap_get(m->machines, name);
+ if (!machine)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_MACHINE, "No machine '%s' known", name);
+
+ p = machine_bus_path(machine);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_get_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = image_find(IMAGE_MACHINE, name, NULL);
+ if (r == -ENOENT)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_IMAGE, "No image '%s' known", name);
+ if (r < 0)
+ return r;
+
+ p = image_bus_path(name);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_get_machine_by_pid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ Machine *machine = NULL;
+ pid_t pid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+
+ r = sd_bus_message_read(message, "u", &pid);
+ if (r < 0)
+ return r;
+
+ if (pid < 0)
+ return -EINVAL;
+
+ if (pid == 0) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+ }
+
+ r = manager_get_machine_by_pid(m, pid, &machine);
+ if (r < 0)
+ return r;
+ if (!machine)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_MACHINE_FOR_PID, "PID "PID_FMT" does not belong to any known machine", pid);
+
+ p = machine_bus_path(machine);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_list_machines(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Machine *machine;
+ Iterator i;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssso)");
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+
+ HASHMAP_FOREACH(machine, m->machines, i) {
+ _cleanup_free_ char *p = NULL;
+
+ p = machine_bus_path(machine);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(ssso)",
+ machine->name,
+ strempty(machine_class_to_string(machine->class)),
+ machine->service,
+ p);
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_create_or_register_machine(Manager *manager, sd_bus_message *message, bool read_network, Machine **_m, sd_bus_error *error) {
+ const char *name, *service, *class, *root_directory;
+ const int32_t *netif = NULL;
+ MachineClass c;
+ uint32_t leader;
+ sd_id128_t id;
+ const void *v;
+ Machine *m;
+ size_t n, n_netif = 0;
+ int r;
+
+ assert(manager);
+ assert(message);
+ assert(_m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+ if (!machine_name_is_valid(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid machine name");
+
+ r = sd_bus_message_read_array(message, 'y', &v, &n);
+ if (r < 0)
+ return r;
+ if (n == 0)
+ id = SD_ID128_NULL;
+ else if (n == 16)
+ memcpy(&id, v, n);
+ else
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid machine ID parameter");
+
+ r = sd_bus_message_read(message, "ssus", &service, &class, &leader, &root_directory);
+ if (r < 0)
+ return r;
+
+ if (read_network) {
+ size_t i;
+
+ r = sd_bus_message_read_array(message, 'i', (const void**) &netif, &n_netif);
+ if (r < 0)
+ return r;
+
+ n_netif /= sizeof(int32_t);
+
+ for (i = 0; i < n_netif; i++) {
+ if (netif[i] <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid network interface index %i", netif[i]);
+ }
+ }
+
+ if (isempty(class))
+ c = _MACHINE_CLASS_INVALID;
+ else {
+ c = machine_class_from_string(class);
+ if (c < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid machine class parameter");
+ }
+
+ if (leader == 1)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid leader PID");
+
+ if (!isempty(root_directory) && !path_is_absolute(root_directory))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Root directory must be empty or an absolute path");
+
+ if (leader == 0) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ assert_cc(sizeof(uint32_t) == sizeof(pid_t));
+
+ r = sd_bus_creds_get_pid(creds, (pid_t*) &leader);
+ if (r < 0)
+ return r;
+ }
+
+ if (hashmap_get(manager->machines, name))
+ return sd_bus_error_setf(error, BUS_ERROR_MACHINE_EXISTS, "Machine '%s' already exists", name);
+
+ r = manager_add_machine(manager, name, &m);
+ if (r < 0)
+ return r;
+
+ m->leader = leader;
+ m->class = c;
+ m->id = id;
+
+ if (!isempty(service)) {
+ m->service = strdup(service);
+ if (!m->service) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!isempty(root_directory)) {
+ m->root_directory = strdup(root_directory);
+ if (!m->root_directory) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (n_netif > 0) {
+ assert_cc(sizeof(int32_t) == sizeof(int));
+ m->netif = memdup(netif, sizeof(int32_t) * n_netif);
+ if (!m->netif) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ m->n_netif = n_netif;
+ }
+
+ *_m = m;
+
+ return 1;
+
+fail:
+ machine_add_to_gc_queue(m);
+ return r;
+}
+
+static int method_create_machine_internal(sd_bus_message *message, bool read_network, void *userdata, sd_bus_error *error) {
+ Manager *manager = userdata;
+ Machine *m = NULL;
+ int r;
+
+ assert(message);
+ assert(manager);
+
+ r = method_create_or_register_machine(manager, message, read_network, &m, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(message, 'a', "(sv)");
+ if (r < 0)
+ goto fail;
+
+ r = machine_start(m, message, error);
+ if (r < 0)
+ goto fail;
+
+ m->create_message = sd_bus_message_ref(message);
+ return 1;
+
+fail:
+ machine_add_to_gc_queue(m);
+ return r;
+}
+
+static int method_create_machine_with_network(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_create_machine_internal(message, true, userdata, error);
+}
+
+static int method_create_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_create_machine_internal(message, false, userdata, error);
+}
+
+static int method_register_machine_internal(sd_bus_message *message, bool read_network, void *userdata, sd_bus_error *error) {
+ Manager *manager = userdata;
+ _cleanup_free_ char *p = NULL;
+ Machine *m = NULL;
+ int r;
+
+ assert(message);
+ assert(manager);
+
+ r = method_create_or_register_machine(manager, message, read_network, &m, error);
+ if (r < 0)
+ return r;
+
+ r = cg_pid_get_unit(m->leader, &m->unit);
+ if (r < 0) {
+ r = sd_bus_error_set_errnof(error, r,
+ "Failed to determine unit of process "PID_FMT" : %m",
+ m->leader);
+ goto fail;
+ }
+
+ r = machine_start(m, NULL, error);
+ if (r < 0)
+ goto fail;
+
+ p = machine_bus_path(m);
+ if (!p) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ return sd_bus_reply_method_return(message, "o", p);
+
+fail:
+ machine_add_to_gc_queue(m);
+ return r;
+}
+
+static int method_register_machine_with_network(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_register_machine_internal(message, true, userdata, error);
+}
+
+static int method_register_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_register_machine_internal(message, false, userdata, error);
+}
+
+static int redirect_method_to_machine(sd_bus_message *message, Manager *m, sd_bus_error *error, sd_bus_message_handler_t method) {
+ Machine *machine;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+ assert(method);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+
+ machine = hashmap_get(m->machines, name);
+ if (!machine)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_MACHINE, "No machine '%s' known", name);
+
+ return method(message, machine, error);
+}
+
+static int method_terminate_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_terminate);
+}
+
+static int method_kill_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_kill);
+}
+
+static int method_get_machine_addresses(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_get_addresses);
+}
+
+static int method_get_machine_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_get_os_release);
+}
+
+static int method_list_images(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_hashmap_free_ Hashmap *images = NULL;
+ Manager *m = userdata;
+ Image *image;
+ Iterator i;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ images = hashmap_new(&image_hash_ops);
+ if (!images)
+ return -ENOMEM;
+
+ r = image_discover(IMAGE_MACHINE, images);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssbttto)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(image, images, i) {
+ _cleanup_free_ char *p = NULL;
+
+ p = image_bus_path(image->name);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(ssbttto)",
+ image->name,
+ image_type_to_string(image->type),
+ image->read_only,
+ image->crtime,
+ image->mtime,
+ image->usage,
+ p);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_open_machine_pty(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_open_pty);
+}
+
+static int method_open_machine_login(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_open_login);
+}
+
+static int method_open_machine_shell(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_open_shell);
+}
+
+static int method_bind_mount_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_bind_mount);
+}
+
+static int method_copy_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_copy);
+}
+
+static int method_open_machine_root_directory(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_open_root_directory);
+}
+
+static int method_get_machine_uid_shift(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_get_uid_shift);
+}
+
+static int redirect_method_to_image(sd_bus_message *message, Manager *m, sd_bus_error *error, sd_bus_message_handler_t method) {
+ _cleanup_(image_unrefp) Image* i = NULL;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+ assert(method);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ if (!image_name_is_valid(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image name '%s' is invalid.", name);
+
+ r = image_find(IMAGE_MACHINE, name, &i);
+ if (r == -ENOENT)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_IMAGE, "No image '%s' known", name);
+ if (r < 0)
+ return r;
+
+ i->userdata = m;
+ return method(message, i, error);
+}
+
+static int method_remove_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_remove);
+}
+
+static int method_rename_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_rename);
+}
+
+static int method_clone_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_clone);
+}
+
+static int method_mark_image_read_only(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_mark_read_only);
+}
+
+static int method_get_image_hostname(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_get_hostname);
+}
+
+static int method_get_image_machine_id(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_get_machine_id);
+}
+
+static int method_get_image_machine_info(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_get_machine_info);
+}
+
+static int method_get_image_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_get_os_release);
+}
+
+static int clean_pool_done(Operation *operation, int ret, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ bool success;
+ size_t n;
+ int r;
+
+ assert(operation);
+ assert(operation->extra_fd >= 0);
+
+ if (lseek(operation->extra_fd, 0, SEEK_SET) == (off_t) -1)
+ return -errno;
+
+ f = fdopen(operation->extra_fd, "r");
+ if (!f)
+ return -errno;
+
+ operation->extra_fd = -1;
+
+ /* The resulting temporary file starts with a boolean value that indicates success or not. */
+ errno = 0;
+ n = fread(&success, 1, sizeof(success), f);
+ if (n != sizeof(success))
+ return ret < 0 ? ret : (errno != 0 ? -errno : -EIO);
+
+ if (ret < 0) {
+ _cleanup_free_ char *name = NULL;
+
+ /* The clean-up operation failed. In this case the resulting temporary file should contain a boolean
+ * set to false followed by the name of the failed image. Let's try to read this and use it for the
+ * error message. If we can't read it, don't mind, and return the naked error. */
+
+ if (success) /* The resulting temporary file could not be updated, ignore it. */
+ return ret;
+
+ r = read_nul_string(f, LONG_LINE_MAX, &name);
+ if (r <= 0) /* Same here... */
+ return ret;
+
+ return sd_bus_error_set_errnof(error, ret, "Failed to remove image %s: %m", name);
+ }
+
+ assert(success);
+
+ r = sd_bus_message_new_method_return(operation->message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ /* On success the resulting temporary file will contain a list of image names that were removed followed by
+ * their size on disk. Let's read that and turn it into a bus message. */
+ for (;;) {
+ _cleanup_free_ char *name = NULL;
+ uint64_t size;
+
+ r = read_nul_string(f, LONG_LINE_MAX, &name);
+ if (r < 0)
+ return r;
+ if (r == 0) /* reached the end */
+ break;
+
+ errno = 0;
+ n = fread(&size, 1, sizeof(size), f);
+ if (n != sizeof(size))
+ return errno != 0 ? -errno : -EIO;
+
+ r = sd_bus_message_append(reply, "(st)", name, size);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_clean_pool(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ enum {
+ REMOVE_ALL,
+ REMOVE_HIDDEN,
+ } mode;
+
+ _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
+ _cleanup_close_ int result_fd = -1;
+ Manager *m = userdata;
+ Operation *operation;
+ const char *mm;
+ pid_t child;
+ int r;
+
+ assert(message);
+
+ if (m->n_operations >= OPERATIONS_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing operations.");
+
+ r = sd_bus_message_read(message, "s", &mm);
+ if (r < 0)
+ return r;
+
+ if (streq(mm, "all"))
+ mode = REMOVE_ALL;
+ else if (streq(mm, "hidden"))
+ mode = REMOVE_HIDDEN;
+ else
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown mode '%s'.", mm);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
+
+ /* Create a temporary file we can dump information about deleted images into. We use a temporary file for this
+ * instead of a pipe or so, since this might grow quit large in theory and we don't want to process this
+ * continuously */
+ result_fd = open_tmpfile_unlinkable(NULL, O_RDWR|O_CLOEXEC);
+ if (result_fd < 0)
+ return -errno;
+
+ /* This might be a slow operation, run it asynchronously in a background process */
+ r = safe_fork("(sd-clean)", FORK_RESET_SIGNALS, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ _cleanup_hashmap_free_ Hashmap *images = NULL;
+ bool success = true;
+ Image *image;
+ Iterator i;
+ ssize_t l;
+
+ errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+ images = hashmap_new(&image_hash_ops);
+ if (!images) {
+ r = -ENOMEM;
+ goto child_fail;
+ }
+
+ r = image_discover(IMAGE_MACHINE, images);
+ if (r < 0)
+ goto child_fail;
+
+ l = write(result_fd, &success, sizeof(success));
+ if (l < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ HASHMAP_FOREACH(image, images, i) {
+
+ /* We can't remove vendor images (i.e. those in /usr) */
+ if (IMAGE_IS_VENDOR(image))
+ continue;
+
+ if (IMAGE_IS_HOST(image))
+ continue;
+
+ if (mode == REMOVE_HIDDEN && !IMAGE_IS_HIDDEN(image))
+ continue;
+
+ r = image_remove(image);
+ if (r == -EBUSY) /* keep images that are currently being used. */
+ continue;
+ if (r < 0) {
+ /* If the operation failed, let's override everything we wrote, and instead write there at which image we failed. */
+ success = false;
+ (void) ftruncate(result_fd, 0);
+ (void) lseek(result_fd, 0, SEEK_SET);
+ (void) write(result_fd, &success, sizeof(success));
+ (void) write(result_fd, image->name, strlen(image->name)+1);
+ goto child_fail;
+ }
+
+ l = write(result_fd, image->name, strlen(image->name)+1);
+ if (l < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ l = write(result_fd, &image->usage_exclusive, sizeof(image->usage_exclusive));
+ if (l < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ }
+
+ result_fd = safe_close(result_fd);
+ _exit(EXIT_SUCCESS);
+
+ child_fail:
+ (void) write(errno_pipe_fd[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ /* The clean-up might take a while, hence install a watch on the child and return */
+
+ r = operation_new(m, NULL, child, message, errno_pipe_fd[0], &operation);
+ if (r < 0) {
+ (void) sigkill_wait(child);
+ return r;
+ }
+
+ operation->extra_fd = result_fd;
+ operation->done = clean_pool_done;
+
+ result_fd = -1;
+ errno_pipe_fd[0] = -1;
+
+ return 1;
+}
+
+static int method_set_pool_limit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint64_t limit;
+ int r;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "t", &limit);
+ if (r < 0)
+ return r;
+ if (!FILE_SIZE_VALID_OR_INFINITY(limit))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New limit out of range");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ /* Set up the machine directory if necessary */
+ r = setup_machine_directory(error);
+ if (r < 0)
+ return r;
+
+ (void) btrfs_qgroup_set_limit("/var/lib/machines", 0, limit);
+
+ r = btrfs_subvol_set_subtree_quota_limit("/var/lib/machines", 0, limit);
+ if (r == -ENOTTY)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Quota is only supported on btrfs.");
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to adjust quota limit: %m");
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_set_image_limit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_set_limit);
+}
+
+static int method_map_from_machine_user(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_fclose_ FILE *f = NULL;
+ Manager *m = userdata;
+ const char *name, *p;
+ Machine *machine;
+ uint32_t uid;
+ int r;
+
+ r = sd_bus_message_read(message, "su", &name, &uid);
+ if (r < 0)
+ return r;
+
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid user ID " UID_FMT, uid);
+
+ machine = hashmap_get(m->machines, name);
+ if (!machine)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_MACHINE, "No machine '%s' known", name);
+
+ if (machine->class != MACHINE_CONTAINER)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Not supported for non-container machines.");
+
+ p = procfs_file_alloca(machine->leader, "uid_map");
+ f = fopen(p, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ uid_t uid_base, uid_shift, uid_range, converted;
+ int k;
+
+ errno = 0;
+ k = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT, &uid_base, &uid_shift, &uid_range);
+ if (k < 0 && feof(f))
+ break;
+ if (k != 3) {
+ if (ferror(f) && errno > 0)
+ return -errno;
+
+ return -EIO;
+ }
+
+ if (uid < uid_base || uid >= uid_base + uid_range)
+ continue;
+
+ converted = uid - uid_base + uid_shift;
+ if (!uid_is_valid(converted))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid user ID " UID_FMT, uid);
+
+ return sd_bus_reply_method_return(message, "u", (uint32_t) converted);
+ }
+
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_USER_MAPPING, "Machine '%s' has no matching user mappings.", name);
+}
+
+static int method_map_to_machine_user(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Machine *machine;
+ uid_t uid;
+ Iterator i;
+ int r;
+
+ r = sd_bus_message_read(message, "u", &uid);
+ if (r < 0)
+ return r;
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid user ID " UID_FMT, uid);
+ if (uid < 0x10000)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_USER_MAPPING, "User " UID_FMT " belongs to host UID range", uid);
+
+ HASHMAP_FOREACH(machine, m->machines, i) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char p[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1];
+
+ if (machine->class != MACHINE_CONTAINER)
+ continue;
+
+ xsprintf(p, "/proc/" UID_FMT "/uid_map", machine->leader);
+ f = fopen(p, "re");
+ if (!f) {
+ log_warning_errno(errno, "Failed to open %s, ignoring,", p);
+ continue;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *o = NULL;
+ uid_t uid_base, uid_shift, uid_range, converted;
+ int k;
+
+ errno = 0;
+ k = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT, &uid_base, &uid_shift, &uid_range);
+ if (k < 0 && feof(f))
+ break;
+ if (k != 3) {
+ if (ferror(f) && errno > 0)
+ return -errno;
+
+ return -EIO;
+ }
+
+ /* The private user namespace is disabled, ignoring. */
+ if (uid_shift == 0)
+ continue;
+
+ if (uid < uid_shift || uid >= uid_shift + uid_range)
+ continue;
+
+ converted = (uid - uid_shift + uid_base);
+ if (!uid_is_valid(converted))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid user ID " UID_FMT, uid);
+
+ o = machine_bus_path(machine);
+ if (!o)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "sou", machine->name, o, (uint32_t) converted);
+ }
+ }
+
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_USER_MAPPING, "No matching user mapping for " UID_FMT ".", uid);
+}
+
+static int method_map_from_machine_group(sd_bus_message *message, void *groupdata, sd_bus_error *error) {
+ _cleanup_fclose_ FILE *f = NULL;
+ Manager *m = groupdata;
+ const char *name, *p;
+ Machine *machine;
+ uint32_t gid;
+ int r;
+
+ r = sd_bus_message_read(message, "su", &name, &gid);
+ if (r < 0)
+ return r;
+
+ if (!gid_is_valid(gid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid group ID " GID_FMT, gid);
+
+ machine = hashmap_get(m->machines, name);
+ if (!machine)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_MACHINE, "No machine '%s' known", name);
+
+ if (machine->class != MACHINE_CONTAINER)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Not supported for non-container machines.");
+
+ p = procfs_file_alloca(machine->leader, "gid_map");
+ f = fopen(p, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ gid_t gid_base, gid_shift, gid_range, converted;
+ int k;
+
+ errno = 0;
+ k = fscanf(f, GID_FMT " " GID_FMT " " GID_FMT, &gid_base, &gid_shift, &gid_range);
+ if (k < 0 && feof(f))
+ break;
+ if (k != 3) {
+ if (ferror(f) && errno > 0)
+ return -errno;
+
+ return -EIO;
+ }
+
+ if (gid < gid_base || gid >= gid_base + gid_range)
+ continue;
+
+ converted = gid - gid_base + gid_shift;
+ if (!gid_is_valid(converted))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid group ID " GID_FMT, gid);
+
+ return sd_bus_reply_method_return(message, "u", (uint32_t) converted);
+ }
+
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_GROUP_MAPPING, "Machine '%s' has no matching group mappings.", name);
+}
+
+static int method_map_to_machine_group(sd_bus_message *message, void *groupdata, sd_bus_error *error) {
+ Manager *m = groupdata;
+ Machine *machine;
+ gid_t gid;
+ Iterator i;
+ int r;
+
+ r = sd_bus_message_read(message, "u", &gid);
+ if (r < 0)
+ return r;
+ if (!gid_is_valid(gid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid group ID " GID_FMT, gid);
+ if (gid < 0x10000)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_GROUP_MAPPING, "Group " GID_FMT " belongs to host GID range", gid);
+
+ HASHMAP_FOREACH(machine, m->machines, i) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char p[STRLEN("/proc//gid_map") + DECIMAL_STR_MAX(pid_t) + 1];
+
+ if (machine->class != MACHINE_CONTAINER)
+ continue;
+
+ xsprintf(p, "/proc/" GID_FMT "/gid_map", machine->leader);
+ f = fopen(p, "re");
+ if (!f) {
+ log_warning_errno(errno, "Failed to open %s, ignoring,", p);
+ continue;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *o = NULL;
+ gid_t gid_base, gid_shift, gid_range, converted;
+ int k;
+
+ errno = 0;
+ k = fscanf(f, GID_FMT " " GID_FMT " " GID_FMT, &gid_base, &gid_shift, &gid_range);
+ if (k < 0 && feof(f))
+ break;
+ if (k != 3) {
+ if (ferror(f) && errno > 0)
+ return -errno;
+
+ return -EIO;
+ }
+
+ /* The private user namespace is disabled, ignoring. */
+ if (gid_shift == 0)
+ continue;
+
+ if (gid < gid_shift || gid >= gid_shift + gid_range)
+ continue;
+
+ converted = (gid - gid_shift + gid_base);
+ if (!gid_is_valid(converted))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid group ID " GID_FMT, gid);
+
+ o = machine_bus_path(machine);
+ if (!o)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "sou", machine->name, o, (uint32_t) converted);
+ }
+ }
+
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_GROUP_MAPPING, "No matching group mapping for " GID_FMT ".", gid);
+}
+
+const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("PoolPath", "s", property_get_pool_path, 0, 0),
+ SD_BUS_PROPERTY("PoolUsage", "t", property_get_pool_usage, 0, 0),
+ SD_BUS_PROPERTY("PoolLimit", "t", property_get_pool_limit, 0, 0),
+ SD_BUS_METHOD("GetMachine", "s", "o", method_get_machine, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetImage", "s", "o", method_get_image, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetMachineByPID", "u", "o", method_get_machine_by_pid, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListMachines", NULL, "a(ssso)", method_list_machines, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListImages", NULL, "a(ssbttto)", method_list_images, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CreateMachine", "sayssusa(sv)", "o", method_create_machine, 0),
+ SD_BUS_METHOD("CreateMachineWithNetwork", "sayssusaia(sv)", "o", method_create_machine_with_network, 0),
+ SD_BUS_METHOD("RegisterMachine", "sayssus", "o", method_register_machine, 0),
+ SD_BUS_METHOD("RegisterMachineWithNetwork", "sayssusai", "o", method_register_machine_with_network, 0),
+ SD_BUS_METHOD("TerminateMachine", "s", NULL, method_terminate_machine, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("KillMachine", "ssi", NULL, method_kill_machine, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetMachineAddresses", "s", "a(iay)", method_get_machine_addresses, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetMachineOSRelease", "s", "a{ss}", method_get_machine_os_release, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("OpenMachinePTY", "s", "hs", method_open_machine_pty, 0),
+ SD_BUS_METHOD("OpenMachineLogin", "s", "hs", method_open_machine_login, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("OpenMachineShell", "sssasas", "hs", method_open_machine_shell, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("BindMountMachine", "sssbb", NULL, method_bind_mount_machine, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CopyFromMachine", "sss", NULL, method_copy_machine, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CopyToMachine", "sss", NULL, method_copy_machine, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("OpenMachineRootDirectory", "s", "h", method_open_machine_root_directory, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetMachineUIDShift", "s", "u", method_get_machine_uid_shift, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RemoveImage", "s", NULL, method_remove_image, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RenameImage", "ss", NULL, method_rename_image, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CloneImage", "ssb", NULL, method_clone_image, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("MarkImageReadOnly", "sb", NULL, method_mark_image_read_only, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetImageHostname", "s", "s", method_get_image_hostname, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetImageMachineID", "s", "ay", method_get_image_machine_id, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetImageMachineInfo", "s", "a{ss}", method_get_image_machine_info, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetImageOSRelease", "s", "a{ss}", method_get_image_os_release, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetPoolLimit", "t", NULL, method_set_pool_limit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetImageLimit", "st", NULL, method_set_image_limit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("CleanPool", "s", "a(st)", method_clean_pool, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("MapFromMachineUser", "su", "u", method_map_from_machine_user, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("MapToMachineUser", "u", "sou", method_map_to_machine_user, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("MapFromMachineGroup", "su", "u", method_map_from_machine_group, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("MapToMachineGroup", "u", "sou", method_map_to_machine_group, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_SIGNAL("MachineNew", "so", 0),
+ SD_BUS_SIGNAL("MachineRemoved", "so", 0),
+ SD_BUS_VTABLE_END
+};
+
+int match_job_removed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *path, *result, *unit;
+ Manager *m = userdata;
+ Machine *machine;
+ uint32_t id;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "uoss", &id, &path, &unit, &result);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ machine = hashmap_get(m->machine_units, unit);
+ if (!machine)
+ return 0;
+
+ if (streq_ptr(path, machine->scope_job)) {
+ machine->scope_job = mfree(machine->scope_job);
+
+ if (machine->started) {
+ if (streq(result, "done"))
+ machine_send_create_reply(machine, NULL);
+ else {
+ _cleanup_(sd_bus_error_free) sd_bus_error e = SD_BUS_ERROR_NULL;
+
+ sd_bus_error_setf(&e, BUS_ERROR_JOB_FAILED, "Start job for unit %s failed with '%s'", unit, result);
+
+ machine_send_create_reply(machine, &e);
+ }
+ }
+
+ machine_save(machine);
+ }
+
+ machine_add_to_gc_queue(machine);
+ return 0;
+}
+
+int match_properties_changed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *unit = NULL;
+ const char *path;
+ Manager *m = userdata;
+ Machine *machine;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ path = sd_bus_message_get_path(message);
+ if (!path)
+ return 0;
+
+ r = unit_name_from_dbus_path(path, &unit);
+ if (r == -EINVAL) /* not for a unit */
+ return 0;
+ if (r < 0) {
+ log_oom();
+ return 0;
+ }
+
+ machine = hashmap_get(m->machine_units, unit);
+ if (!machine)
+ return 0;
+
+ machine_add_to_gc_queue(machine);
+ return 0;
+}
+
+int match_unit_removed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *path, *unit;
+ Manager *m = userdata;
+ Machine *machine;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "so", &unit, &path);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ machine = hashmap_get(m->machine_units, unit);
+ if (!machine)
+ return 0;
+
+ machine_add_to_gc_queue(machine);
+ return 0;
+}
+
+int match_reloading(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Machine *machine;
+ Iterator i;
+ int b, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+ if (b)
+ return 0;
+
+ /* systemd finished reloading, let's recheck all our machines */
+ log_debug("System manager has been reloaded, rechecking machines...");
+
+ HASHMAP_FOREACH(machine, m->machines, i)
+ machine_add_to_gc_queue(machine);
+
+ return 0;
+}
+
+int manager_start_scope(
+ Manager *manager,
+ const char *scope,
+ pid_t pid,
+ const char *slice,
+ const char *description,
+ sd_bus_message *more_properties,
+ sd_bus_error *error,
+ char **job) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ int r;
+
+ assert(manager);
+ assert(scope);
+ assert(pid > 1);
+
+ r = sd_bus_message_new_method_call(
+ manager->bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartTransientUnit");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "ss", strempty(scope), "fail");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return r;
+
+ if (!isempty(slice)) {
+ r = sd_bus_message_append(m, "(sv)", "Slice", "s", slice);
+ if (r < 0)
+ return r;
+ }
+
+ if (!isempty(description)) {
+ r = sd_bus_message_append(m, "(sv)", "Description", "s", description);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_append(m, "(sv)(sv)(sv)(sv)(sv)",
+ "PIDs", "au", 1, pid,
+ "Delegate", "b", 1,
+ "CollectMode", "s", "inactive-or-failed",
+ "AddRef", "b", 1,
+ "TasksMax", "t", UINT64_C(16384));
+ if (r < 0)
+ return r;
+
+ if (more_properties) {
+ r = sd_bus_message_copy(m, more_properties, true);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "a(sa(sv))", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(manager->bus, m, 0, error, &reply);
+ if (r < 0)
+ return r;
+
+ if (job) {
+ const char *j;
+ char *copy;
+
+ r = sd_bus_message_read(reply, "o", &j);
+ if (r < 0)
+ return r;
+
+ copy = strdup(j);
+ if (!copy)
+ return -ENOMEM;
+
+ *job = copy;
+ }
+
+ return 1;
+}
+
+int manager_unref_unit(
+ Manager *m,
+ const char *unit,
+ sd_bus_error *error) {
+
+ assert(m);
+ assert(unit);
+
+ return sd_bus_call_method(
+ m->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "UnrefUnit",
+ error,
+ NULL,
+ "s",
+ unit);
+}
+
+int manager_stop_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(manager);
+ assert(unit);
+
+ r = sd_bus_call_method(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StopUnit",
+ error,
+ &reply,
+ "ss", unit, "fail");
+ if (r < 0) {
+ if (sd_bus_error_has_name(error, BUS_ERROR_NO_SUCH_UNIT) ||
+ sd_bus_error_has_name(error, BUS_ERROR_LOAD_FAILED)) {
+
+ if (job)
+ *job = NULL;
+
+ sd_bus_error_free(error);
+ return 0;
+ }
+
+ return r;
+ }
+
+ if (job) {
+ const char *j;
+ char *copy;
+
+ r = sd_bus_message_read(reply, "o", &j);
+ if (r < 0)
+ return r;
+
+ copy = strdup(j);
+ if (!copy)
+ return -ENOMEM;
+
+ *job = copy;
+ }
+
+ return 1;
+}
+
+int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_error *error) {
+ assert(manager);
+ assert(unit);
+
+ return sd_bus_call_method(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "KillUnit",
+ error,
+ NULL,
+ "ssi", unit, "all", signo);
+}
+
+int manager_unit_is_active(Manager *manager, const char *unit) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *path = NULL;
+ const char *state;
+ int r;
+
+ assert(manager);
+ assert(unit);
+
+ path = unit_dbus_path_from_name(unit);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_get_property(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "ActiveState",
+ &error,
+ &reply,
+ "s");
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, SD_BUS_ERROR_NO_REPLY) ||
+ sd_bus_error_has_name(&error, SD_BUS_ERROR_DISCONNECTED))
+ return true;
+
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_UNIT) ||
+ sd_bus_error_has_name(&error, BUS_ERROR_LOAD_FAILED))
+ return false;
+
+ return r;
+ }
+
+ r = sd_bus_message_read(reply, "s", &state);
+ if (r < 0)
+ return -EINVAL;
+
+ return !STR_IN_SET(state, "inactive", "failed");
+}
+
+int manager_job_is_active(Manager *manager, const char *path) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(manager);
+ assert(path);
+
+ r = sd_bus_get_property(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Job",
+ "State",
+ &error,
+ &reply,
+ "s");
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, SD_BUS_ERROR_NO_REPLY) ||
+ sd_bus_error_has_name(&error, SD_BUS_ERROR_DISCONNECTED))
+ return true;
+
+ if (sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_OBJECT))
+ return false;
+
+ return r;
+ }
+
+ /* We don't actually care about the state really. The fact
+ * that we could read the job state is enough for us */
+
+ return true;
+}
+
+int manager_get_machine_by_pid(Manager *m, pid_t pid, Machine **machine) {
+ Machine *mm;
+ int r;
+
+ assert(m);
+ assert(pid >= 1);
+ assert(machine);
+
+ mm = hashmap_get(m->machine_leaders, PID_TO_PTR(pid));
+ if (!mm) {
+ _cleanup_free_ char *unit = NULL;
+
+ r = cg_pid_get_unit(pid, &unit);
+ if (r >= 0)
+ mm = hashmap_get(m->machine_units, unit);
+ }
+ if (!mm)
+ return 0;
+
+ *machine = mm;
+ return 1;
+}
+
+int manager_add_machine(Manager *m, const char *name, Machine **_machine) {
+ Machine *machine;
+
+ assert(m);
+ assert(name);
+
+ machine = hashmap_get(m->machines, name);
+ if (!machine) {
+ machine = machine_new(m, _MACHINE_CLASS_INVALID, name);
+ if (!machine)
+ return -ENOMEM;
+ }
+
+ if (_machine)
+ *_machine = machine;
+
+ return 0;
+}
diff --git a/src/machine/machined.c b/src/machine/machined.c
new file mode 100644
index 0000000..0b92b1c
--- /dev/null
+++ b/src/machine/machined.c
@@ -0,0 +1,390 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "hostname-util.h"
+#include "label.h"
+#include "machine-image.h"
+#include "machined.h"
+#include "main-func.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "special.h"
+
+static Manager* manager_unref(Manager *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_unref);
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(machine_hash_ops, char, string_hash_func, string_compare_func, Machine, machine_free);
+
+static int manager_new(Manager **ret) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new0(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ m->machines = hashmap_new(&machine_hash_ops);
+ m->machine_units = hashmap_new(&string_hash_ops);
+ m->machine_leaders = hashmap_new(NULL);
+
+ if (!m->machines || !m->machine_units || !m->machine_leaders)
+ return -ENOMEM;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+static Manager* manager_unref(Manager *m) {
+ if (!m)
+ return NULL;
+
+ while (m->operations)
+ operation_free(m->operations);
+
+ assert(m->n_operations == 0);
+
+ hashmap_free(m->machines); /* This will free all machines, so that the machine_units/machine_leaders is empty */
+ hashmap_free(m->machine_units);
+ hashmap_free(m->machine_leaders);
+ hashmap_free(m->image_cache);
+
+ sd_event_source_unref(m->image_cache_defer_event);
+ sd_event_source_unref(m->nscd_cache_flush_event);
+
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+
+ sd_bus_flush_close_unref(m->bus);
+ sd_event_unref(m->event);
+
+ return mfree(m);
+}
+
+static int manager_add_host_machine(Manager *m) {
+ _cleanup_free_ char *rd = NULL, *unit = NULL;
+ sd_id128_t mid;
+ Machine *t;
+ int r;
+
+ if (m->host_machine)
+ return 0;
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get machine ID: %m");
+
+ rd = strdup("/");
+ if (!rd)
+ return log_oom();
+
+ unit = strdup(SPECIAL_ROOT_SLICE);
+ if (!unit)
+ return log_oom();
+
+ t = machine_new(m, MACHINE_HOST, ".host");
+ if (!t)
+ return log_oom();
+
+ t->leader = 1;
+ t->id = mid;
+
+ t->root_directory = TAKE_PTR(rd);
+ t->unit = TAKE_PTR(unit);
+
+ dual_timestamp_from_boottime_or_monotonic(&t->timestamp, 0);
+
+ m->host_machine = t;
+
+ return 0;
+}
+
+static int manager_enumerate_machines(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(m);
+
+ r = manager_add_host_machine(m);
+ if (r < 0)
+ return r;
+
+ /* Read in machine data stored on disk */
+ d = opendir("/run/systemd/machines");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /run/systemd/machines: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ struct Machine *machine;
+ int k;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ /* Ignore symlinks that map the unit name to the machine */
+ if (startswith(de->d_name, "unit:"))
+ continue;
+
+ if (!machine_name_is_valid(de->d_name))
+ continue;
+
+ k = manager_add_machine(m, de->d_name, &machine);
+ if (k < 0) {
+ r = log_error_errno(k, "Failed to add machine by file name %s: %m", de->d_name);
+ continue;
+ }
+
+ machine_add_to_gc_queue(machine);
+
+ k = machine_load(machine);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->bus);
+
+ r = sd_bus_default_system(&m->bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_add_object_vtable(m->bus, NULL, "/org/freedesktop/machine1", "org.freedesktop.machine1.Manager", manager_vtable, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add manager object vtable: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/machine1/machine", "org.freedesktop.machine1.Machine", machine_vtable, machine_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add machine object vtable: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/machine1/machine", machine_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add machine enumerator: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/machine1/image", "org.freedesktop.machine1.Image", image_vtable, image_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add image object vtable: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/machine1/image", image_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add image enumerator: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "JobRemoved",
+ match_job_removed, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match for JobRemoved: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "UnitRemoved",
+ match_unit_removed, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for UnitRemoved: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ NULL,
+ "org.freedesktop.DBus.Properties",
+ "PropertiesChanged",
+ match_properties_changed, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for PropertiesChanged: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Reloading",
+ match_reloading, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for Reloading: %m");
+
+ r = sd_bus_call_method_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Subscribe",
+ NULL, NULL,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable subscription: %m");
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.machine1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ return 0;
+}
+
+static void manager_gc(Manager *m, bool drop_not_started) {
+ Machine *machine;
+
+ assert(m);
+
+ while ((machine = m->machine_gc_queue)) {
+ LIST_REMOVE(gc_queue, m->machine_gc_queue, machine);
+ machine->in_gc_queue = false;
+
+ /* First, if we are not closing yet, initiate stopping */
+ if (machine_may_gc(machine, drop_not_started) &&
+ machine_get_state(machine) != MACHINE_CLOSING)
+ machine_stop(machine);
+
+ /* Now, the stop probably made this referenced
+ * again, but if it didn't, then it's time to let it
+ * go entirely. */
+ if (machine_may_gc(machine, drop_not_started)) {
+ machine_finalize(machine);
+ machine_free(machine);
+ }
+ }
+}
+
+static int manager_startup(Manager *m) {
+ Machine *machine;
+ Iterator i;
+ int r;
+
+ assert(m);
+
+ /* Connect to the bus */
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return r;
+
+ /* Deserialize state */
+ manager_enumerate_machines(m);
+
+ /* Remove stale objects before we start them */
+ manager_gc(m, false);
+
+ /* And start everything */
+ HASHMAP_FOREACH(machine, m->machines, i)
+ machine_start(machine, NULL, NULL);
+
+ return 0;
+}
+
+static bool check_idle(void *userdata) {
+ Manager *m = userdata;
+
+ if (m->operations)
+ return false;
+
+ manager_gc(m, true);
+
+ return hashmap_isempty(m->machines);
+}
+
+static int manager_run(Manager *m) {
+ assert(m);
+
+ return bus_event_loop_with_idle(
+ m->event,
+ m->bus,
+ "org.freedesktop.machine1",
+ DEFAULT_EXIT_USEC,
+ check_idle, m);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ log_set_facility(LOG_AUTH);
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc != 1) {
+ log_error("This program takes no arguments.");
+ return -EINVAL;
+ }
+
+ /* Always create the directories people can create inotify watches in. Note that some applications might check
+ * for the existence of /run/systemd/machines/ to determine whether machined is available, so please always
+ * make sure this check stays in. */
+ (void) mkdir_label("/run/systemd/machines", 0755);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate manager object: %m");
+
+ r = manager_startup(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fully start up daemon: %m");
+
+ log_debug("systemd-machined running as pid "PID_FMT, getpid_cached());
+ (void) sd_notify(false,
+ "READY=1\n"
+ "STATUS=Processing requests...");
+
+ r = manager_run(m);
+
+ log_debug("systemd-machined stopped as pid "PID_FMT, getpid_cached());
+ (void) sd_notify(false,
+ "STOPPING=1\n"
+ "STATUS=Shutting down...");
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/machine/machined.h b/src/machine/machined.h
new file mode 100644
index 0000000..2298a65
--- /dev/null
+++ b/src/machine/machined.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "hashmap.h"
+#include "list.h"
+
+typedef struct Manager Manager;
+
+#include "image-dbus.h"
+#include "machine-dbus.h"
+#include "machine.h"
+#include "operation.h"
+
+struct Manager {
+ sd_event *event;
+ sd_bus *bus;
+
+ Hashmap *machines;
+ Hashmap *machine_units;
+ Hashmap *machine_leaders;
+
+ Hashmap *polkit_registry;
+
+ Hashmap *image_cache;
+ sd_event_source *image_cache_defer_event;
+
+ LIST_HEAD(Machine, machine_gc_queue);
+
+ Machine *host_machine;
+
+ LIST_HEAD(Operation, operations);
+ unsigned n_operations;
+
+ sd_event_source *nscd_cache_flush_event;
+};
+
+int manager_add_machine(Manager *m, const char *name, Machine **_machine);
+int manager_get_machine_by_pid(Manager *m, pid_t pid, Machine **machine);
+
+extern const sd_bus_vtable manager_vtable[];
+
+int match_reloading(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_unit_removed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_properties_changed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_job_removed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+int manager_start_scope(Manager *manager, const char *scope, pid_t pid, const char *slice, const char *description, sd_bus_message *more_properties, sd_bus_error *error, char **job);
+int manager_stop_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job);
+int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_error *error);
+int manager_unref_unit(Manager *m, const char *unit, sd_bus_error *error);
+int manager_unit_is_active(Manager *manager, const char *unit);
+int manager_job_is_active(Manager *manager, const char *path);
+
+int manager_enqueue_nscd_cache_flush(Manager *m);
diff --git a/src/machine/meson.build b/src/machine/meson.build
new file mode 100644
index 0000000..bc67071
--- /dev/null
+++ b/src/machine/meson.build
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+systemd_machined_sources = files('''
+ machined.c
+ machined.h
+'''.split())
+
+libmachine_core_sources = files('''
+ machine.c
+ machine.h
+ machined-dbus.c
+ machined-core.c
+ machine-dbus.c
+ machine-dbus.h
+ image-dbus.c
+ image-dbus.h
+ operation.c
+ operation.h
+'''.split())
+
+libmachine_core = static_library(
+ 'machine-core',
+ libmachine_core_sources,
+ include_directories : includes,
+ dependencies : [threads])
+
+if conf.get('ENABLE_MACHINED') == 1
+ install_data('org.freedesktop.machine1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.machine1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.machine1.policy',
+ install_dir : polkitpolicydir)
+endif
+
+tests += [
+ [['src/machine/test-machine-tables.c'],
+ [libmachine_core,
+ libshared],
+ [threads],
+ 'ENABLE_MACHINED'],
+]
diff --git a/src/machine/operation.c b/src/machine/operation.c
new file mode 100644
index 0000000..42c51f3
--- /dev/null
+++ b/src/machine/operation.c
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/wait.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "operation.h"
+#include "process-util.h"
+
+static int operation_done(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Operation *o = userdata;
+ int r;
+
+ assert(o);
+ assert(si);
+
+ log_debug("Operating " PID_FMT " is now complete with code=%s status=%i",
+ o->pid,
+ sigchld_code_to_string(si->si_code), si->si_status);
+
+ o->pid = 0;
+
+ if (si->si_code != CLD_EXITED) {
+ r = sd_bus_error_setf(&error, SD_BUS_ERROR_FAILED, "Child died abnormally.");
+ goto fail;
+ }
+
+ if (si->si_status == EXIT_SUCCESS)
+ r = 0;
+ else if (read(o->errno_fd, &r, sizeof(r)) != sizeof(r)) { /* Try to acquire error code for failed operation */
+ r = sd_bus_error_setf(&error, SD_BUS_ERROR_FAILED, "Child failed.");
+ goto fail;
+ }
+
+ if (o->done) {
+ /* A completion routine is set for this operation, call it. */
+ r = o->done(o, r, &error);
+ if (r < 0) {
+ if (!sd_bus_error_is_set(&error))
+ sd_bus_error_set_errno(&error, r);
+
+ goto fail;
+ }
+
+ } else {
+ /* The default operation when done is to simply return an error on failure or an empty success
+ * message on success. */
+ if (r < 0) {
+ sd_bus_error_set_errno(&error, r);
+ goto fail;
+ }
+
+ r = sd_bus_reply_method_return(o->message, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to reply to message: %m");
+ }
+
+ operation_free(o);
+ return 0;
+
+fail:
+ r = sd_bus_reply_method_error(o->message, &error);
+ if (r < 0)
+ log_error_errno(r, "Failed to reply to message: %m");
+
+ operation_free(o);
+ return 0;
+}
+
+int operation_new(Manager *manager, Machine *machine, pid_t child, sd_bus_message *message, int errno_fd, Operation **ret) {
+ Operation *o;
+ int r;
+
+ assert(manager);
+ assert(child > 1);
+ assert(message);
+ assert(errno_fd >= 0);
+
+ o = new0(Operation, 1);
+ if (!o)
+ return -ENOMEM;
+
+ o->extra_fd = -1;
+
+ r = sd_event_add_child(manager->event, &o->event_source, child, WEXITED, operation_done, o);
+ if (r < 0) {
+ free(o);
+ return r;
+ }
+
+ o->pid = child;
+ o->message = sd_bus_message_ref(message);
+ o->errno_fd = errno_fd;
+
+ LIST_PREPEND(operations, manager->operations, o);
+ manager->n_operations++;
+ o->manager = manager;
+
+ if (machine) {
+ LIST_PREPEND(operations_by_machine, machine->operations, o);
+ o->machine = machine;
+ }
+
+ log_debug("Started new operation " PID_FMT ".", child);
+
+ /* At this point we took ownership of both the child and the errno file descriptor! */
+
+ if (ret)
+ *ret = o;
+
+ return 0;
+}
+
+Operation *operation_free(Operation *o) {
+ if (!o)
+ return NULL;
+
+ sd_event_source_unref(o->event_source);
+
+ safe_close(o->errno_fd);
+ safe_close(o->extra_fd);
+
+ if (o->pid > 1)
+ (void) sigkill_wait(o->pid);
+
+ sd_bus_message_unref(o->message);
+
+ if (o->manager) {
+ LIST_REMOVE(operations, o->manager->operations, o);
+ o->manager->n_operations--;
+ }
+
+ if (o->machine)
+ LIST_REMOVE(operations_by_machine, o->machine->operations, o);
+
+ return mfree(o);
+}
diff --git a/src/machine/operation.h b/src/machine/operation.h
new file mode 100644
index 0000000..0e33028
--- /dev/null
+++ b/src/machine/operation.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "list.h"
+
+typedef struct Operation Operation;
+
+#include "machined.h"
+
+#define OPERATIONS_MAX 64
+
+struct Operation {
+ Manager *manager;
+ Machine *machine;
+ pid_t pid;
+ sd_bus_message *message;
+ int errno_fd;
+ int extra_fd;
+ sd_event_source *event_source;
+ int (*done)(Operation *o, int ret, sd_bus_error *error);
+ LIST_FIELDS(Operation, operations);
+ LIST_FIELDS(Operation, operations_by_machine);
+};
+
+int operation_new(Manager *manager, Machine *machine, pid_t child, sd_bus_message *message, int errno_fd, Operation **ret);
+Operation *operation_free(Operation *o);
diff --git a/src/machine/org.freedesktop.machine1.conf b/src/machine/org.freedesktop.machine1.conf
new file mode 100644
index 0000000..73b7e07
--- /dev/null
+++ b/src/machine/org.freedesktop.machine1.conf
@@ -0,0 +1,238 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.machine1"/>
+ <allow send_destination="org.freedesktop.machine1"/>
+ <allow receive_sender="org.freedesktop.machine1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.machine1"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="ListMachines"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="ListImages"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetMachineByPID"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetImage"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetMachineAddresses"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetMachineOSRelease"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetMachineUIDShift"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="OpenMachineLogin"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="OpenMachineShell"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="TerminateMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="KillMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="BindMountMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="CopyFromMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="CopyToMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="RemoveImage"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="RenameImage"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="CloneImage"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="MarkImageReadOnly"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="SetPoolLimit"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="SetImageLimit"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetImageHostname"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetImageMachineID"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetImageMachineInfo"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetImageOSRelease"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="CleanPool"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="MapFromMachineUser"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="MapToMachineUser"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="MapFromMachineGroup"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="MapToMachineGroup"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="GetAddresses"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="GetOSRelease"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="GetUIDShift"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="OpenLogin"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="OpenShell"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="Terminate"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="Kill"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="BindMount"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="CopyFrom"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="CopyTo"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="Remove"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="Rename"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="Clone"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="SetLimit"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="MarkReadOnly"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="GetHostname"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="GetMachineID"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="GetMachineInfo"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="GetOSRelease"/>
+
+ <allow receive_sender="org.freedesktop.machine1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/machine/org.freedesktop.machine1.policy b/src/machine/org.freedesktop.machine1.policy
new file mode 100644
index 0000000..039c3d4
--- /dev/null
+++ b/src/machine/org.freedesktop.machine1.policy
@@ -0,0 +1,104 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.machine1.login">
+ <description gettext-domain="systemd">Log into a local container</description>
+ <message gettext-domain="systemd">Authentication is required to log into a local container.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.machine1.host-login">
+ <description gettext-domain="systemd">Log into the local host</description>
+ <message gettext-domain="systemd">Authentication is required to log into the local host.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.machine1.shell">
+ <description gettext-domain="systemd">Acquire a shell in a local container</description>
+ <message gettext-domain="systemd">Authentication is required to acquire a shell in a local container.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.login</annotate>
+ </action>
+
+ <action id="org.freedesktop.machine1.host-shell">
+ <description gettext-domain="systemd">Acquire a shell on the local host</description>
+ <message gettext-domain="systemd">Authentication is required to acquire a shell on the local host.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.host-login</annotate>
+ </action>
+
+ <action id="org.freedesktop.machine1.open-pty">
+ <description gettext-domain="systemd">Acquire a pseudo TTY in a local container</description>
+ <message gettext-domain="systemd">Authentication is required to acquire a pseudo TTY in a local container.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.machine1.host-open-pty">
+ <description gettext-domain="systemd">Acquire a pseudo TTY on the local host</description>
+ <message gettext-domain="systemd">Authentication is required to acquire a pseudo TTY on the local host.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.machine1.manage-machines">
+ <description gettext-domain="systemd">Manage local virtual machines and containers</description>
+ <message gettext-domain="systemd">Authentication is required to manage local virtual machines and containers.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.shell org.freedesktop.login1.login</annotate>
+ </action>
+
+ <action id="org.freedesktop.machine1.manage-images">
+ <description gettext-domain="systemd">Manage local virtual machine and container images</description>
+ <message gettext-domain="systemd">Authentication is required to manage local virtual machine and container images.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/machine/org.freedesktop.machine1.service b/src/machine/org.freedesktop.machine1.service
new file mode 100644
index 0000000..d07bcae
--- /dev/null
+++ b/src/machine/org.freedesktop.machine1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.machine1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.machine1.service
diff --git a/src/machine/test-machine-tables.c b/src/machine/test-machine-tables.c
new file mode 100644
index 0000000..04db7d8
--- /dev/null
+++ b/src/machine/test-machine-tables.c
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "machine.h"
+#include "test-tables.h"
+
+int main(int argc, char **argv) {
+ test_table(kill_who, KILL_WHO);
+ test_table(machine_class, MACHINE_CLASS);
+ test_table(machine_state, MACHINE_STATE);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/modules-load/modules-load.c b/src/modules-load/modules-load.c
new file mode 100644
index 0000000..36bda34
--- /dev/null
+++ b/src/modules-load/modules-load.c
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <libkmod.h>
+#include <limits.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include "conf-files.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "main-func.h"
+#include "module-util.h"
+#include "pretty-print.h"
+#include "proc-cmdline.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static char **arg_proc_cmdline_modules = NULL;
+static const char conf_file_dirs[] = CONF_PATHS_NULSTR("modules-load.d");
+
+STATIC_DESTRUCTOR_REGISTER(arg_proc_cmdline_modules, strv_freep);
+
+static void systemd_kmod_log(void *data, int priority, const char *file, int line,
+ const char *fn, const char *format, va_list args) {
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ log_internalv(priority, 0, file, line, fn, format, args);
+ REENABLE_WARNING;
+}
+
+static int add_modules(const char *p) {
+ _cleanup_strv_free_ char **k = NULL;
+
+ k = strv_split(p, ",");
+ if (!k)
+ return log_oom();
+
+ if (strv_extend_strv(&arg_proc_cmdline_modules, k, true) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ if (proc_cmdline_key_streq(key, "modules_load")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = add_modules(value);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int apply_file(struct kmod_ctx *ctx, const char *path, bool ignore_enoent) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(ctx);
+ assert(path);
+
+ r = search_and_fopen_nulstr(path, "re", NULL, conf_file_dirs, &f);
+ if (r < 0) {
+ if (ignore_enoent && r == -ENOENT)
+ return 0;
+
+ return log_error_errno(r, "Failed to open %s: %m", path);
+ }
+
+ log_debug("apply: %s", path);
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &line);
+ if (k < 0)
+ return log_error_errno(k, "Failed to read file '%s': %m", path);
+ if (k == 0)
+ break;
+
+ l = strstrip(line);
+ if (isempty(l))
+ continue;
+ if (strchr(COMMENTS, *l))
+ continue;
+
+ k = module_load_and_warn(ctx, l, true);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-modules-load.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CONFIGURATION FILE...]\n\n"
+ "Loads statically configured kernel modules.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(kmod_unrefp) struct kmod_ctx *ctx = NULL;
+ int r, k;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ ctx = kmod_new(NULL, NULL);
+ if (!ctx) {
+ log_error("Failed to allocate memory for kmod.");
+ return -ENOMEM;
+ }
+
+ kmod_load_resources(ctx);
+ kmod_set_log_fn(ctx, systemd_kmod_log, NULL);
+
+ r = 0;
+
+ if (argc > optind) {
+ int i;
+
+ for (i = optind; i < argc; i++) {
+ k = apply_file(ctx, argv[i], false);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ } else {
+ _cleanup_strv_free_ char **files = NULL;
+ char **fn, **i;
+
+ STRV_FOREACH(i, arg_proc_cmdline_modules) {
+ k = module_load_and_warn(ctx, *i, true);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ k = conf_files_list_nulstr(&files, ".conf", NULL, 0, conf_file_dirs);
+ if (k < 0) {
+ log_error_errno(k, "Failed to enumerate modules-load.d files: %m");
+ if (r == 0)
+ r = k;
+ return r;
+ }
+
+ STRV_FOREACH(fn, files) {
+ k = apply_file(ctx, *fn, true);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/mount/mount-tool.c b/src/mount/mount-tool.c
new file mode 100644
index 0000000..bbbc91c
--- /dev/null
+++ b/src/mount/mount-tool.c
@@ -0,0 +1,1620 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+
+#include "bus-error.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "fstab-util.h"
+#include "main-func.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "spawn-polkit-agent.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "unit-def.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "terminal-util.h"
+
+enum {
+ ACTION_DEFAULT,
+ ACTION_MOUNT,
+ ACTION_AUTOMOUNT,
+ ACTION_UMOUNT,
+ ACTION_LIST,
+} arg_action = ACTION_DEFAULT;
+
+static bool arg_no_block = false;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_ask_password = true;
+static bool arg_quiet = false;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static bool arg_user = false;
+static const char *arg_host = NULL;
+static bool arg_discover = false;
+static char *arg_mount_what = NULL;
+static char *arg_mount_where = NULL;
+static char *arg_mount_type = NULL;
+static char *arg_mount_options = NULL;
+static char *arg_description = NULL;
+static char **arg_property = NULL;
+static usec_t arg_timeout_idle = USEC_INFINITY;
+static bool arg_timeout_idle_set = false;
+static char **arg_automount_property = NULL;
+static int arg_bind_device = -1;
+static uid_t arg_uid = UID_INVALID;
+static gid_t arg_gid = GID_INVALID;
+static bool arg_fsck = true;
+static bool arg_aggressive_gc = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_mount_what, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_mount_where, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_mount_type, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_mount_options, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_description, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_automount_property, strv_freep);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-mount", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("systemd-mount [OPTIONS...] WHAT [WHERE]\n"
+ "systemd-mount [OPTIONS...] --list\n"
+ "%s [OPTIONS...] %sWHAT|WHERE...\n\n"
+ "Establish a mount or auto-mount point transiently.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-block Do not wait until operation finished\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-ask-password Do not prompt for password\n"
+ " -q --quiet Suppress information messages during runtime\n"
+ " --user Run as user unit\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --discover Discover mount device metadata\n"
+ " -t --type=TYPE File system type\n"
+ " -o --options=OPTIONS Mount options\n"
+ " --owner=USER Add uid= and gid= options for USER\n"
+ " --fsck=no Don't run file system check before mount\n"
+ " --description=TEXT Description for unit\n"
+ " -p --property=NAME=VALUE Set mount unit property\n"
+ " -A --automount=BOOL Create an auto-mount point\n"
+ " --timeout-idle-sec=SEC Specify automount idle timeout\n"
+ " --automount-property=NAME=VALUE\n"
+ " Set automount unit property\n"
+ " --bind-device Bind automount unit to device\n"
+ " --list List mountable block devices\n"
+ " -u --umount Unmount mount points\n"
+ " -G --collect Unload unit after it stopped, even when failed\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , streq(program_invocation_short_name, "systemd-umount") ? "" : "--umount "
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_BLOCK,
+ ARG_NO_PAGER,
+ ARG_NO_ASK_PASSWORD,
+ ARG_USER,
+ ARG_SYSTEM,
+ ARG_DISCOVER,
+ ARG_MOUNT_TYPE,
+ ARG_MOUNT_OPTIONS,
+ ARG_OWNER,
+ ARG_FSCK,
+ ARG_DESCRIPTION,
+ ARG_TIMEOUT_IDLE,
+ ARG_AUTOMOUNT,
+ ARG_AUTOMOUNT_PROPERTY,
+ ARG_BIND_DEVICE,
+ ARG_LIST,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-block", no_argument, NULL, ARG_NO_BLOCK },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "quiet", no_argument, NULL, 'q' },
+ { "user", no_argument, NULL, ARG_USER },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "discover", no_argument, NULL, ARG_DISCOVER },
+ { "type", required_argument, NULL, 't' },
+ { "options", required_argument, NULL, 'o' },
+ { "owner", required_argument, NULL, ARG_OWNER },
+ { "fsck", required_argument, NULL, ARG_FSCK },
+ { "description", required_argument, NULL, ARG_DESCRIPTION },
+ { "property", required_argument, NULL, 'p' },
+ { "automount", required_argument, NULL, ARG_AUTOMOUNT },
+ { "timeout-idle-sec", required_argument, NULL, ARG_TIMEOUT_IDLE },
+ { "automount-property", required_argument, NULL, ARG_AUTOMOUNT_PROPERTY },
+ { "bind-device", no_argument, NULL, ARG_BIND_DEVICE },
+ { "list", no_argument, NULL, ARG_LIST },
+ { "umount", no_argument, NULL, 'u' },
+ { "unmount", no_argument, NULL, 'u' },
+ { "collect", no_argument, NULL, 'G' },
+ {},
+ };
+
+ int r, c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ if (strstr(program_invocation_short_name, "systemd-umount"))
+ arg_action = ACTION_UMOUNT;
+
+ while ((c = getopt_long(argc, argv, "hqH:M:t:o:p:AuG", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_BLOCK:
+ arg_no_block = true;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case ARG_USER:
+ arg_user = true;
+ break;
+
+ case ARG_SYSTEM:
+ arg_user = false;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_DISCOVER:
+ arg_discover = true;
+ break;
+
+ case 't':
+ if (free_and_strdup(&arg_mount_type, optarg) < 0)
+ return log_oom();
+ break;
+
+ case 'o':
+ if (free_and_strdup(&arg_mount_options, optarg) < 0)
+ return log_oom();
+ break;
+
+ case ARG_OWNER: {
+ const char *user = optarg;
+
+ r = get_user_creds(&user, &arg_uid, &arg_gid, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r,
+ r == -EBADMSG ? "UID or GID of user %s are invalid."
+ : "Cannot use \"%s\" as owner: %m",
+ optarg);
+ break;
+ }
+
+ case ARG_FSCK:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --fsck= argument: %s", optarg);
+
+ arg_fsck = r;
+ break;
+
+ case ARG_DESCRIPTION:
+ if (free_and_strdup(&arg_description, optarg) < 0)
+ return log_oom();
+ break;
+
+ case 'p':
+ if (strv_extend(&arg_property, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case 'A':
+ arg_action = ACTION_AUTOMOUNT;
+ break;
+
+ case ARG_AUTOMOUNT:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "--automount= expects a valid boolean parameter: %s", optarg);
+
+ arg_action = r ? ACTION_AUTOMOUNT : ACTION_MOUNT;
+ break;
+
+ case ARG_TIMEOUT_IDLE:
+ r = parse_sec(optarg, &arg_timeout_idle);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timeout: %s", optarg);
+
+ break;
+
+ case ARG_AUTOMOUNT_PROPERTY:
+ if (strv_extend(&arg_automount_property, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_BIND_DEVICE:
+ arg_bind_device = true;
+ break;
+
+ case ARG_LIST:
+ arg_action = ACTION_LIST;
+ break;
+
+ case 'u':
+ arg_action = ACTION_UMOUNT;
+ break;
+
+ case 'G':
+ arg_aggressive_gc = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_user && arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Execution in user context is not supported on non-local systems.");
+
+ if (arg_action == ACTION_LIST) {
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Listing devices only supported locally.");
+ } else if (arg_action == ACTION_UMOUNT) {
+ if (optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "At least one argument required.");
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL) {
+ int i;
+
+ for (i = optind; i < argc; i++)
+ if (!path_is_absolute(argv[i]) )
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Only absolute path is supported: %s", argv[i]);
+ }
+ } else {
+ if (optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "At least one argument required.");
+
+ if (argc > optind+2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "At most two arguments required.");
+
+ if (arg_mount_type && (fstype_is_api_vfs(arg_mount_type) || fstype_is_network(arg_mount_type))) {
+ arg_mount_what = strdup(argv[optind]);
+ if (!arg_mount_what)
+ return log_oom();
+
+ } else if (arg_transport == BUS_TRANSPORT_LOCAL) {
+ _cleanup_free_ char *u = NULL;
+
+ u = fstab_node_to_udev_node(argv[optind]);
+ if (!u)
+ return log_oom();
+
+ r = chase_symlinks(u, NULL, 0, &arg_mount_what);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make path %s absolute: %m", u);
+ } else {
+ arg_mount_what = strdup(argv[optind]);
+ if (!arg_mount_what)
+ return log_oom();
+
+ path_simplify(arg_mount_what, false);
+
+ if (!path_is_absolute(arg_mount_what))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Only absolute path is supported: %s", arg_mount_what);
+ }
+
+ if (argc > optind+1) {
+ if (arg_transport == BUS_TRANSPORT_LOCAL) {
+ r = chase_symlinks(argv[optind+1], NULL, CHASE_NONEXISTENT, &arg_mount_where);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make path %s absolute: %m", argv[optind+1]);
+ } else {
+ arg_mount_where = strdup(argv[optind+1]);
+ if (!arg_mount_where)
+ return log_oom();
+
+ path_simplify(arg_mount_where, false);
+
+ if (!path_is_absolute(arg_mount_where))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Only absolute path is supported: %s", arg_mount_where);
+ }
+ } else
+ arg_discover = true;
+
+ if (arg_discover && arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Automatic mount location discovery is only supported locally.");
+ }
+
+ return 1;
+}
+
+static int transient_unit_set_properties(sd_bus_message *m, UnitType t, char **properties) {
+ int r;
+
+ if (!isempty(arg_description)) {
+ r = sd_bus_message_append(m, "(sv)", "Description", "s", arg_description);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_bind_device && is_device_path(arg_mount_what)) {
+ _cleanup_free_ char *device_unit = NULL;
+
+ r = unit_name_from_path(arg_mount_what, ".device", &device_unit);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "(sv)(sv)",
+ "After", "as", 1, device_unit,
+ "BindsTo", "as", 1, device_unit);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_aggressive_gc) {
+ r = sd_bus_message_append(m, "(sv)", "CollectMode", "s", "inactive-or-failed");
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_append_unit_property_assignment_many(m, t, properties);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int transient_mount_set_properties(sd_bus_message *m) {
+ _cleanup_free_ char *options = NULL;
+ int r;
+
+ assert(m);
+
+ r = transient_unit_set_properties(m, UNIT_MOUNT, arg_property);
+ if (r < 0)
+ return r;
+
+ if (arg_mount_what) {
+ r = sd_bus_message_append(m, "(sv)", "What", "s", arg_mount_what);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_mount_type) {
+ r = sd_bus_message_append(m, "(sv)", "Type", "s", arg_mount_type);
+ if (r < 0)
+ return r;
+ }
+
+ /* Prepend uid=…,gid=… if arg_uid is set */
+ if (arg_uid != UID_INVALID) {
+ r = asprintf(&options,
+ "uid=" UID_FMT ",gid=" GID_FMT "%s%s",
+ arg_uid, arg_gid,
+ arg_mount_options ? "," : "", strempty(arg_mount_options));
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ if (options || arg_mount_options) {
+ log_debug("Using mount options: %s", options ?: arg_mount_options);
+
+ r = sd_bus_message_append(m, "(sv)", "Options", "s", options ?: arg_mount_options);
+ if (r < 0)
+ return r;
+ } else
+ log_debug("Not using any mount options");
+
+ if (arg_fsck) {
+ _cleanup_free_ char *fsck = NULL;
+
+ r = unit_name_from_path_instance("systemd-fsck", arg_mount_what, ".service", &fsck);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m,
+ "(sv)(sv)",
+ "Requires", "as", 1, fsck,
+ "After", "as", 1, fsck);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int transient_automount_set_properties(sd_bus_message *m) {
+ int r;
+
+ assert(m);
+
+ r = transient_unit_set_properties(m, UNIT_AUTOMOUNT, arg_automount_property);
+ if (r < 0)
+ return r;
+
+ if (arg_timeout_idle != USEC_INFINITY) {
+ r = sd_bus_message_append(m, "(sv)", "TimeoutIdleUSec", "t", arg_timeout_idle);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int start_transient_mount(
+ sd_bus *bus,
+ char **argv) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_free_ char *mount_unit = NULL;
+ int r;
+
+ if (!arg_no_block) {
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch jobs: %m");
+ }
+
+ r = unit_name_from_path(arg_mount_where, ".mount", &mount_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make mount unit name: %m");
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and mode */
+ r = sd_bus_message_append(m, "ss", mount_unit, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = transient_mount_set_properties(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Auxiliary units */
+ r = sd_bus_message_append(m, "a(sa(sv))", 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start transient mount unit: %s", bus_error_message(&error, r));
+
+ if (w) {
+ const char *object;
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, arg_quiet);
+ if (r < 0)
+ return r;
+ }
+
+ if (!arg_quiet)
+ log_info("Started unit %s%s%s for mount point: %s%s%s",
+ ansi_highlight(), mount_unit, ansi_normal(),
+ ansi_highlight(), arg_mount_where, ansi_normal());
+
+ return 0;
+}
+
+static int start_transient_automount(
+ sd_bus *bus,
+ char **argv) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_free_ char *automount_unit = NULL, *mount_unit = NULL;
+ int r;
+
+ if (!arg_no_block) {
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch jobs: %m");
+ }
+
+ r = unit_name_from_path(arg_mount_where, ".automount", &automount_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make automount unit name: %m");
+
+ r = unit_name_from_path(arg_mount_where, ".mount", &mount_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make mount unit name: %m");
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and mode */
+ r = sd_bus_message_append(m, "ss", automount_unit, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = transient_automount_set_properties(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Auxiliary units */
+ r = sd_bus_message_open_container(m, 'a', "(sa(sv))");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "sa(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", mount_unit);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = transient_mount_set_properties(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start transient automount unit: %s", bus_error_message(&error, r));
+
+ if (w) {
+ const char *object;
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, arg_quiet);
+ if (r < 0)
+ return r;
+ }
+
+ if (!arg_quiet)
+ log_info("Started unit %s%s%s for mount point: %s%s%s",
+ ansi_highlight(), automount_unit, ansi_normal(),
+ ansi_highlight(), arg_mount_where, ansi_normal());
+
+ return 0;
+}
+
+static int find_mount_points(const char *what, char ***list) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ size_t bufsize = 0, n = 0;
+
+ assert(what);
+ assert(list);
+
+ /* Returns all mount points obtained from /proc/self/mountinfo in *list,
+ * and the number of mount points as return value. */
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return log_error_errno(errno, "Can't open /proc/self/mountinfo: %m");
+
+ for (;;) {
+ _cleanup_free_ char *path = NULL, *where = NULL, *dev = NULL;
+ int r;
+
+ r = fscanf(proc_self_mountinfo,
+ "%*s " /* (1) mount id */
+ "%*s " /* (2) parent id */
+ "%*s " /* (3) major:minor */
+ "%*s " /* (4) root */
+ "%ms " /* (5) mount point */
+ "%*s" /* (6) mount options */
+ "%*[^-]" /* (7) optional fields */
+ "- " /* (8) separator */
+ "%*s " /* (9) file system type */
+ "%ms" /* (10) mount source */
+ "%*s" /* (11) mount options 2 */
+ "%*[^\n]", /* some rubbish at the end */
+ &path, &dev);
+ if (r != 2) {
+ if (r == EOF)
+ break;
+
+ continue;
+ }
+
+ if (!streq(what, dev))
+ continue;
+
+ r = cunescape(path, UNESCAPE_RELAX, &where);
+ if (r < 0)
+ continue;
+
+ /* one extra slot is needed for the terminating NULL */
+ if (!GREEDY_REALLOC(l, bufsize, n + 2))
+ return log_oom();
+
+ l[n++] = TAKE_PTR(where);
+ }
+
+ if (!GREEDY_REALLOC(l, bufsize, n + 1))
+ return log_oom();
+
+ l[n] = NULL;
+ *list = TAKE_PTR(l);
+
+ return n;
+}
+
+static int find_loop_device(const char *backing_file, char **loop_dev) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ _cleanup_free_ char *l = NULL;
+
+ assert(backing_file);
+ assert(loop_dev);
+
+ d = opendir("/sys/devices/virtual/block");
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_free_ char *sys = NULL, *fname = NULL;
+ int r;
+
+ dirent_ensure_type(d, de);
+
+ if (de->d_type != DT_DIR)
+ continue;
+
+ if (!startswith(de->d_name, "loop"))
+ continue;
+
+ sys = strjoin("/sys/devices/virtual/block/", de->d_name, "/loop/backing_file");
+ if (!sys)
+ return -ENOMEM;
+
+ r = read_one_line_file(sys, &fname);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read %s, ignoring: %m", sys);
+ continue;
+ }
+
+ if (files_same(fname, backing_file, 0) <= 0)
+ continue;
+
+ l = strjoin("/dev/", de->d_name);
+ if (!l)
+ return -ENOMEM;
+
+ break;
+ }
+
+ if (!l)
+ return -ENXIO;
+
+ *loop_dev = TAKE_PTR(l);
+
+ return 0;
+}
+
+static int stop_mount(
+ sd_bus *bus,
+ const char *where,
+ const char *suffix) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_free_ char *mount_unit = NULL;
+ int r;
+
+ if (!arg_no_block) {
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch jobs: %m");
+ }
+
+ r = unit_name_from_path(where, suffix, &mount_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make %s unit name from path %s: %m", suffix + 1, where);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StopUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and mode */
+ r = sd_bus_message_append(m, "ss", mount_unit, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0) {
+ if (streq(suffix, ".automount") &&
+ sd_bus_error_has_name(&error, "org.freedesktop.systemd1.NoSuchUnit"))
+ return 0;
+ return log_error_errno(r, "Failed to stop %s unit: %s", suffix + 1, bus_error_message(&error, r));
+ }
+
+ if (w) {
+ const char *object;
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, arg_quiet);
+ if (r < 0)
+ return r;
+ }
+
+ if (!arg_quiet)
+ log_info("Stopped unit %s%s%s for mount point: %s%s%s",
+ ansi_highlight(), mount_unit, ansi_normal(),
+ ansi_highlight(), where, ansi_normal());
+
+ return 0;
+}
+
+static int stop_mounts(
+ sd_bus *bus,
+ const char *where) {
+
+ int r;
+
+ if (path_equal(where, "/"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Refusing to operate on root directory: %s", where);
+
+ if (!path_is_normalized(where))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path contains non-normalized components: %s", where);
+
+ r = stop_mount(bus, where, ".mount");
+ if (r < 0)
+ return r;
+
+ r = stop_mount(bus, where, ".automount");
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int umount_by_device(sd_bus *bus, const char *what) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_strv_free_ char **list = NULL;
+ struct stat st;
+ const char *v;
+ char **l;
+ int r, r2 = 0;
+
+ assert(what);
+
+ if (stat(what, &st) < 0)
+ return log_error_errno(errno, "Can't stat %s: %m", what);
+
+ if (!S_ISBLK(st.st_mode)) {
+ log_error("Not a block device: %s", what);
+ return -ENOTBLK;
+ }
+
+ r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from device number: %m");
+
+ r = sd_device_get_property_value(d, "ID_FS_USAGE", &v);
+ if (r < 0)
+ return log_device_error_errno(d, r, "Failed to get device property: %m");
+
+ if (!streq(v, "filesystem")) {
+ log_device_error(d, "%s does not contain a known file system.", what);
+ return -EINVAL;
+ }
+
+ if (sd_device_get_property_value(d, "SYSTEMD_MOUNT_WHERE", &v) >= 0)
+ r2 = stop_mounts(bus, v);
+
+ r = find_mount_points(what, &list);
+ if (r < 0)
+ return r;
+
+ for (l = list; *l; l++) {
+ r = stop_mounts(bus, *l);
+ if (r < 0)
+ r2 = r;
+ }
+
+ return r2;
+}
+
+static int umount_loop(sd_bus *bus, const char *backing_file) {
+ _cleanup_free_ char *loop_dev = NULL;
+ int r;
+
+ assert(backing_file);
+
+ r = find_loop_device(backing_file, &loop_dev);
+ if (r < 0)
+ return log_error_errno(r, r == -ENXIO ? "File %s is not mounted." : "Can't get loop device for %s: %m", backing_file);
+
+ return umount_by_device(bus, loop_dev);
+}
+
+static int action_umount(
+ sd_bus *bus,
+ int argc,
+ char **argv) {
+
+ int i, r, r2 = 0;
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL) {
+ for (i = optind; i < argc; i++) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strdup(argv[i]);
+ if (!p)
+ return log_oom();
+
+ path_simplify(p, false);
+
+ r = stop_mounts(bus, p);
+ if (r < 0)
+ r2 = r;
+ }
+ return r2;
+ }
+
+ for (i = optind; i < argc; i++) {
+ _cleanup_free_ char *u = NULL, *p = NULL;
+ struct stat st;
+
+ u = fstab_node_to_udev_node(argv[i]);
+ if (!u)
+ return log_oom();
+
+ r = chase_symlinks(u, NULL, 0, &p);
+ if (r < 0) {
+ r2 = log_error_errno(r, "Failed to make path %s absolute: %m", argv[i]);
+ continue;
+ }
+
+ if (stat(p, &st) < 0)
+ return log_error_errno(errno, "Can't stat %s (from %s): %m", p, argv[i]);
+
+ if (S_ISBLK(st.st_mode))
+ r = umount_by_device(bus, p);
+ else if (S_ISREG(st.st_mode))
+ r = umount_loop(bus, p);
+ else if (S_ISDIR(st.st_mode))
+ r = stop_mounts(bus, p);
+ else {
+ log_error("Invalid file type: %s (from %s)", p, argv[i]);
+ r = -EINVAL;
+ }
+
+ if (r < 0)
+ r2 = r;
+ }
+
+ return r2;
+}
+
+static int acquire_mount_type(sd_device *d) {
+ const char *v;
+
+ assert(d);
+
+ if (arg_mount_type)
+ return 0;
+
+ if (sd_device_get_property_value(d, "ID_FS_TYPE", &v) < 0)
+ return 0;
+
+ arg_mount_type = strdup(v);
+ if (!arg_mount_type)
+ return log_oom();
+
+ log_debug("Discovered type=%s", arg_mount_type);
+ return 1;
+}
+
+static int acquire_mount_options(sd_device *d) {
+ const char *v;
+
+ assert(d);
+
+ if (arg_mount_options)
+ return 0;
+
+ if (sd_device_get_property_value(d, "SYSTEMD_MOUNT_OPTIONS", &v) < 0)
+ return 0;
+
+ arg_mount_options = strdup(v);
+ if (!arg_mount_options)
+ return log_oom();
+
+ log_debug("Discovered options=%s", arg_mount_options);
+ return 1;
+}
+
+static const char *get_model(sd_device *d) {
+ const char *model;
+
+ assert(d);
+
+ if (sd_device_get_property_value(d, "ID_MODEL_FROM_DATABASE", &model) >= 0)
+ return model;
+
+ if (sd_device_get_property_value(d, "ID_MODEL", &model) >= 0)
+ return model;
+
+ return NULL;
+}
+
+static const char* get_label(sd_device *d) {
+ const char *label;
+
+ assert(d);
+
+ if (sd_device_get_property_value(d, "ID_FS_LABEL", &label) >= 0)
+ return label;
+
+ if (sd_device_get_property_value(d, "ID_PART_ENTRY_NAME", &label) >= 0)
+ return label;
+
+ return NULL;
+}
+
+static int acquire_mount_where(sd_device *d) {
+ const char *v;
+
+ if (arg_mount_where)
+ return 0;
+
+ if (sd_device_get_property_value(d, "SYSTEMD_MOUNT_WHERE", &v) < 0) {
+ _cleanup_free_ char *escaped = NULL;
+ const char *name;
+
+ name = get_label(d);
+ if (!name)
+ name = get_model(d);
+ if (!name) {
+ const char *dn;
+
+ if (sd_device_get_devname(d, &dn) < 0)
+ return 0;
+
+ name = basename(dn);
+ }
+
+ escaped = xescape(name, "\\");
+ if (!escaped)
+ return log_oom();
+ if (!filename_is_valid(escaped))
+ return 0;
+
+ arg_mount_where = strjoin("/run/media/system/", escaped);
+ } else
+ arg_mount_where = strdup(v);
+
+ if (!arg_mount_where)
+ return log_oom();
+
+ log_debug("Discovered where=%s", arg_mount_where);
+ return 1;
+}
+
+static int acquire_mount_where_for_loop_dev(const char *loop_dev) {
+ _cleanup_strv_free_ char **list = NULL;
+ int r;
+
+ if (arg_mount_where)
+ return 0;
+
+ r = find_mount_points(loop_dev, &list);
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Can't find mount point of %s. It is expected that %s is already mounted on a place.",
+ loop_dev, loop_dev);
+ else if (r >= 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s is mounted on %d places. It is expected that %s is mounted on a place.",
+ loop_dev, r, loop_dev);
+
+ arg_mount_where = strdup(list[0]);
+ if (!arg_mount_where)
+ return log_oom();
+
+ log_debug("Discovered where=%s", arg_mount_where);
+ return 1;
+}
+
+static int acquire_description(sd_device *d) {
+ const char *model, *label;
+
+ if (arg_description)
+ return 0;
+
+ model = get_model(d);
+
+ label = get_label(d);
+ if (!label)
+ (void) sd_device_get_property_value(d, "ID_PART_ENTRY_NUMBER", &label);
+
+ if (model && label)
+ arg_description = strjoin(model, " ", label);
+ else if (label)
+ arg_description = strdup(label);
+ else if (model)
+ arg_description = strdup(model);
+ else
+ return 0;
+
+ if (!arg_description)
+ return log_oom();
+
+ log_debug("Discovered description=%s", arg_description);
+ return 1;
+}
+
+static int acquire_removable(sd_device *d) {
+ const char *v;
+
+ /* Shortcut this if there's no reason to check it */
+ if (arg_action != ACTION_DEFAULT && arg_timeout_idle_set && arg_bind_device >= 0)
+ return 0;
+
+ for (;;) {
+ if (sd_device_get_sysattr_value(d, "removable", &v) > 0)
+ break;
+
+ if (sd_device_get_parent(d, &d) < 0)
+ return 0;
+
+ if (sd_device_get_subsystem(d, &v) < 0 || !streq(v, "block"))
+ return 0;
+ }
+
+ if (parse_boolean(v) <= 0)
+ return 0;
+
+ log_debug("Discovered removable device.");
+
+ if (arg_action == ACTION_DEFAULT) {
+ log_debug("Automatically turning on automount.");
+ arg_action = ACTION_AUTOMOUNT;
+ }
+
+ if (!arg_timeout_idle_set) {
+ log_debug("Setting idle timeout to 1s.");
+ arg_timeout_idle = USEC_PER_SEC;
+ }
+
+ if (arg_bind_device < 0) {
+ log_debug("Binding automount unit to device.");
+ arg_bind_device = true;
+ }
+
+ return 1;
+}
+
+static int discover_loop_backing_file(void) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_free_ char *loop_dev = NULL;
+ struct stat st;
+ const char *v;
+ int r;
+
+ r = find_loop_device(arg_mount_what, &loop_dev);
+ if (r < 0 && r != -ENXIO)
+ return log_error_errno(errno, "Can't get loop device for %s: %m", arg_mount_what);
+
+ if (r == -ENXIO) {
+ _cleanup_free_ char *escaped = NULL;
+
+ if (arg_mount_where)
+ return 0;
+
+ escaped = xescape(basename(arg_mount_what), "\\");
+ if (!escaped)
+ return log_oom();
+ if (!filename_is_valid(escaped)) {
+ log_error("Escaped name %s is not a valid filename.", escaped);
+ return -EINVAL;
+ }
+
+ arg_mount_where = strjoin("/run/media/system/", escaped);
+ if (!arg_mount_where)
+ return log_oom();
+
+ log_debug("Discovered where=%s", arg_mount_where);
+ return 0;
+ }
+
+ if (stat(loop_dev, &st) < 0)
+ return log_error_errno(errno, "Can't stat %s: %m", loop_dev);
+
+ if (!S_ISBLK(st.st_mode)) {
+ log_error("Invalid file type: %s", loop_dev);
+ return -EINVAL;
+ }
+
+ r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from device number: %m");
+
+ if (sd_device_get_property_value(d, "ID_FS_USAGE", &v) < 0 || !streq(v, "filesystem")) {
+ log_device_error(d, "%s does not contain a known file system.", arg_mount_what);
+ return -EINVAL;
+ }
+
+ r = acquire_mount_type(d);
+ if (r < 0)
+ return r;
+
+ r = acquire_mount_options(d);
+ if (r < 0)
+ return r;
+
+ r = acquire_mount_where_for_loop_dev(loop_dev);
+ if (r < 0)
+ return r;
+
+ r = acquire_description(d);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int discover_device(void) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ struct stat st;
+ const char *v;
+ int r;
+
+ if (stat(arg_mount_what, &st) < 0)
+ return log_error_errno(errno, "Can't stat %s: %m", arg_mount_what);
+
+ if (S_ISREG(st.st_mode))
+ return discover_loop_backing_file();
+
+ if (!S_ISBLK(st.st_mode)) {
+ log_error("Invalid file type: %s", arg_mount_what);
+ return -EINVAL;
+ }
+
+ r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from device number: %m");
+
+ if (sd_device_get_property_value(d, "ID_FS_USAGE", &v) < 0 || !streq(v, "filesystem")) {
+ log_error("%s does not contain a known file system.", arg_mount_what);
+ return -EINVAL;
+ }
+
+ r = acquire_mount_type(d);
+ if (r < 0)
+ return r;
+
+ r = acquire_mount_options(d);
+ if (r < 0)
+ return r;
+
+ r = acquire_mount_where(d);
+ if (r < 0)
+ return r;
+
+ r = acquire_description(d);
+ if (r < 0)
+ return r;
+
+ r = acquire_removable(d);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+enum {
+ COLUMN_NODE,
+ COLUMN_PATH,
+ COLUMN_MODEL,
+ COLUMN_WWN,
+ COLUMN_FSTYPE,
+ COLUMN_LABEL,
+ COLUMN_UUID,
+ _COLUMN_MAX,
+};
+
+struct item {
+ char* columns[_COLUMN_MAX];
+};
+
+static int compare_item(const struct item *a, const struct item *b) {
+ if (a->columns[COLUMN_NODE] == b->columns[COLUMN_NODE])
+ return 0;
+ if (!a->columns[COLUMN_NODE])
+ return 1;
+ if (!b->columns[COLUMN_NODE])
+ return -1;
+
+ return path_compare(a->columns[COLUMN_NODE], b->columns[COLUMN_NODE]);
+}
+
+static int list_devices(void) {
+
+ static const char * const titles[_COLUMN_MAX] = {
+ [COLUMN_NODE] = "NODE",
+ [COLUMN_PATH] = "PATH",
+ [COLUMN_MODEL] = "MODEL",
+ [COLUMN_WWN] = "WWN",
+ [COLUMN_FSTYPE] = "TYPE",
+ [COLUMN_LABEL] = "LABEL",
+ [COLUMN_UUID] = "UUID"
+ };
+
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ size_t n_allocated = 0, n = 0, i;
+ size_t column_width[_COLUMN_MAX];
+ struct item *items = NULL;
+ sd_device *d;
+ unsigned c;
+ int r;
+
+ for (c = 0; c < _COLUMN_MAX; c++)
+ column_width[c] = strlen(titles[c]);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return log_oom();
+
+ r = sd_device_enumerator_add_match_subsystem(e, "block", true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add block match: %m");
+
+ r = sd_device_enumerator_add_match_property(e, "ID_FS_USAGE", "filesystem");
+ if (r < 0)
+ return log_error_errno(r, "Failed to add property match: %m");
+
+ FOREACH_DEVICE(e, d) {
+ struct item *j;
+
+ if (!GREEDY_REALLOC0(items, n_allocated, n+1)) {
+ r = log_oom();
+ goto finish;
+ }
+
+ j = items + n++;
+
+ for (c = 0; c < _COLUMN_MAX; c++) {
+ const char *x = NULL;
+ size_t k;
+
+ switch (c) {
+
+ case COLUMN_NODE:
+ (void) sd_device_get_devname(d, &x);
+ break;
+
+ case COLUMN_PATH:
+ (void) sd_device_get_property_value(d, "ID_PATH", &x);
+ break;
+
+ case COLUMN_MODEL:
+ x = get_model(d);
+ break;
+
+ case COLUMN_WWN:
+ (void) sd_device_get_property_value(d, "ID_WWN", &x);
+ break;
+
+ case COLUMN_FSTYPE:
+ (void) sd_device_get_property_value(d, "ID_FS_TYPE", &x);
+ break;
+
+ case COLUMN_LABEL:
+ x = get_label(d);
+ break;
+
+ case COLUMN_UUID:
+ (void) sd_device_get_property_value(d, "ID_FS_UUID", &x);
+ break;
+ }
+
+ if (isempty(x))
+ continue;
+
+ j->columns[c] = strdup(x);
+ if (!j->columns[c]) {
+ r = log_oom();
+ goto finish;
+ }
+
+ k = strlen(x);
+ if (k > column_width[c])
+ column_width[c] = k;
+ }
+ }
+
+ if (n == 0) {
+ log_info("No devices found.");
+ goto finish;
+ }
+
+ typesafe_qsort(items, n, compare_item);
+
+ (void) pager_open(arg_pager_flags);
+
+ fputs(ansi_underline(), stdout);
+ for (c = 0; c < _COLUMN_MAX; c++) {
+ if (c > 0)
+ fputc(' ', stdout);
+
+ printf("%-*s", (int) column_width[c], titles[c]);
+ }
+ fputs(ansi_normal(), stdout);
+ fputc('\n', stdout);
+
+ for (i = 0; i < n; i++) {
+ for (c = 0; c < _COLUMN_MAX; c++) {
+ if (c > 0)
+ fputc(' ', stdout);
+
+ printf("%-*s", (int) column_width[c], strna(items[i].columns[c]));
+ }
+ fputc('\n', stdout);
+ }
+
+ r = 0;
+
+finish:
+ for (i = 0; i < n; i++)
+ for (c = 0; c < _COLUMN_MAX; c++)
+ free(items[i].columns[c]);
+
+ free(items);
+ return r;
+}
+
+static int run(int argc, char* argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_action == ACTION_LIST)
+ return list_devices();
+
+ r = bus_connect_transport_systemd(arg_transport, arg_host, arg_user, &bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ if (arg_action == ACTION_UMOUNT)
+ return action_umount(bus, argc, argv);
+
+ if (!path_is_normalized(arg_mount_what)) {
+ log_error("Path contains non-normalized components: %s", arg_mount_what);
+ return -EINVAL;
+ }
+
+ if (arg_discover) {
+ r = discover_device();
+ if (r < 0)
+ return r;
+ }
+
+ if (!arg_mount_where) {
+ log_error("Can't figure out where to mount %s.", arg_mount_what);
+ return -EINVAL;
+ }
+
+ if (path_equal(arg_mount_where, "/")) {
+ log_error("Refusing to operate on root directory.");
+ return -EINVAL;
+ }
+
+ if (!path_is_normalized(arg_mount_where)) {
+ log_error("Path contains non-normalized components: %s", arg_mount_where);
+ return -EINVAL;
+ }
+
+ if (streq_ptr(arg_mount_type, "auto"))
+ arg_mount_type = mfree(arg_mount_type);
+ if (streq_ptr(arg_mount_options, "defaults"))
+ arg_mount_options = mfree(arg_mount_options);
+
+ if (!is_device_path(arg_mount_what))
+ arg_fsck = false;
+
+ if (arg_fsck && arg_mount_type && arg_transport == BUS_TRANSPORT_LOCAL) {
+ r = fsck_exists(arg_mount_type);
+ if (r < 0)
+ log_warning_errno(r, "Couldn't determine whether fsck for %s exists, proceeding anyway.", arg_mount_type);
+ else if (r == 0) {
+ log_debug("Disabling file system check as fsck for %s doesn't exist.", arg_mount_type);
+ arg_fsck = false; /* fsck doesn't exist, let's not attempt it */
+ }
+ }
+
+ /* The kernel (properly) refuses mounting file systems with unknown uid=,gid= options,
+ * but not for all filesystem types. Let's try to catch the cases where the option
+ * would be used if the file system does not support it. It is also possible to
+ * autodetect the file system, but that's only possible with disk-based file systems
+ * which incidentally seem to be implemented more carefully and reject unknown options,
+ * so it's probably OK that we do the check only when the type is specified.
+ */
+ if (arg_mount_type &&
+ !streq(arg_mount_type, "auto") &&
+ arg_uid != UID_INVALID &&
+ !fstype_can_uid_gid(arg_mount_type)) {
+ log_error("File system type %s is not known to support uid=/gid=, refusing.",
+ arg_mount_type);
+ return -EOPNOTSUPP;
+ }
+
+ switch (arg_action) {
+
+ case ACTION_MOUNT:
+ case ACTION_DEFAULT:
+ r = start_transient_mount(bus, argv + optind);
+ break;
+
+ case ACTION_AUTOMOUNT:
+ r = start_transient_automount(bus, argv + optind);
+ break;
+
+ default:
+ assert_not_reached("Unexpected action.");
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/network/fuzz-netdev-parser.c b/src/network/fuzz-netdev-parser.c
new file mode 100644
index 0000000..37f7cf8
--- /dev/null
+++ b/src/network/fuzz-netdev-parser.c
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fuzz.h"
+#include "networkd-manager.h"
+#include "tmpfile-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(manager_freep) Manager *manager = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(unlink_tempfilep) char netdev_config[] = "/tmp/fuzz-networkd.XXXXXX";
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(fmkostemp_safe(netdev_config, "r+", &f) == 0);
+ if (size != 0)
+ assert_se(fwrite(data, size, 1, f) == 1);
+
+ rewind(f);
+ assert_se(manager_new(&manager) >= 0);
+ (void) netdev_load_one(manager, netdev_config);
+ return 0;
+}
diff --git a/src/network/fuzz-network-parser.c b/src/network/fuzz-network-parser.c
new file mode 100644
index 0000000..4b79500
--- /dev/null
+++ b/src/network/fuzz-network-parser.c
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fuzz.h"
+#include "networkd-manager.h"
+#include "tmpfile-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(manager_freep) Manager *manager = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(unlink_tempfilep) char network_config[] = "/tmp/fuzz-networkd.XXXXXX";
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(fmkostemp_safe(network_config, "r+", &f) == 0);
+ if (size != 0)
+ assert_se(fwrite(data, size, 1, f) == 1);
+
+ rewind(f);
+ assert_se(manager_new(&manager) >= 0);
+ (void) network_load_one(manager, network_config);
+ return 0;
+}
diff --git a/src/network/meson.build b/src/network/meson.build
new file mode 100644
index 0000000..8fe4854
--- /dev/null
+++ b/src/network/meson.build
@@ -0,0 +1,191 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+sources = files('''
+ netdev/bond.c
+ netdev/bond.h
+ netdev/bridge.c
+ netdev/bridge.h
+ netdev/dummy.c
+ netdev/dummy.h
+ netdev/ipvlan.c
+ netdev/ipvlan.h
+ netdev/macvlan.c
+ netdev/macvlan.h
+ netdev/netdev.c
+ netdev/netdev.h
+ netdev/tunnel.c
+ netdev/tunnel.h
+ netdev/tuntap.c
+ netdev/tuntap.h
+ netdev/vcan.c
+ netdev/vcan.h
+ netdev/veth.c
+ netdev/veth.h
+ netdev/vlan.c
+ netdev/vlan.h
+ netdev/vrf.c
+ netdev/vrf.h
+ netdev/vxlan.c
+ netdev/vxlan.h
+ netdev/geneve.c
+ netdev/geneve.h
+ netdev/vxcan.c
+ netdev/vxcan.h
+ netdev/wireguard.c
+ netdev/wireguard.h
+ netdev/netdevsim.c
+ netdev/netdevsim.h
+ netdev/fou-tunnel.c
+ netdev/fou-tunnel.h
+ networkd-address-label.c
+ networkd-address-label.h
+ networkd-address-pool.c
+ networkd-address-pool.h
+ networkd-address.c
+ networkd-address.h
+ networkd-brvlan.c
+ networkd-brvlan.h
+ networkd-conf.c
+ networkd-conf.h
+ networkd-dhcp4.c
+ networkd-dhcp6.c
+ networkd-fdb.c
+ networkd-fdb.h
+ networkd-ipv4ll.c
+ networkd-ipv6-proxy-ndp.c
+ networkd-ipv6-proxy-ndp.h
+ networkd-link-bus.c
+ networkd-link.c
+ networkd-link.h
+ networkd-lldp-tx.c
+ networkd-lldp-tx.h
+ networkd-manager-bus.c
+ networkd-manager.c
+ networkd-manager.h
+ networkd-ndisc.c
+ networkd-ndisc.h
+ networkd-neighbor.c
+ networkd-neighbor.h
+ networkd-radv.c
+ networkd-radv.h
+ networkd-network-bus.c
+ networkd-network.c
+ networkd-network.h
+ networkd-route.c
+ networkd-route.h
+ networkd-routing-policy-rule.c
+ networkd-routing-policy-rule.h
+ networkd-util.c
+ networkd-util.h
+'''.split())
+
+systemd_networkd_sources = files('networkd.c')
+
+systemd_networkd_wait_online_sources = files('''
+ wait-online/link.c
+ wait-online/link.h
+ wait-online/manager.c
+ wait-online/manager.h
+ wait-online/wait-online.c
+'''.split()) + network_internal_h
+
+networkctl_sources = files('networkctl.c')
+
+network_include_dir = include_directories('.')
+
+if conf.get('ENABLE_NETWORKD') == 1
+ networkd_gperf_c = custom_target(
+ 'networkd-gperf.c',
+ input : 'networkd-gperf.gperf',
+ output : 'networkd-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+ networkd_network_gperf_c = custom_target(
+ 'networkd-network-gperf.c',
+ input : 'networkd-network-gperf.gperf',
+ output : 'networkd-network-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+ netdev_gperf_c = custom_target(
+ 'netdev-gperf.c',
+ input : 'netdev/netdev-gperf.gperf',
+ output : 'netdev-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+ libnetworkd_core = static_library(
+ 'networkd-core',
+ sources,
+ network_internal_h,
+ networkd_gperf_c,
+ networkd_network_gperf_c,
+ netdev_gperf_c,
+ include_directories : includes,
+ link_with : [libshared])
+
+ install_data('org.freedesktop.network1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.network1.service',
+ install_dir : dbussystemservicedir)
+ if install_polkit
+ install_data('systemd-networkd.rules',
+ install_dir : polkitrulesdir)
+ endif
+ if install_polkit_pkla
+ install_data('systemd-networkd.pkla',
+ install_dir : polkitpkladir)
+ endif
+
+ install_data('networkd.conf',
+ install_dir : pkgsysconfdir)
+
+ fuzzers += [
+ [['src/network/fuzz-netdev-parser.c',
+ 'src/fuzz/fuzz.h'],
+ [libnetworkd_core,
+ libudev_static,
+ libsystemd_network,
+ libshared],
+ [threads]],
+
+ [['src/network/fuzz-network-parser.c',
+ 'src/fuzz/fuzz.h'],
+ [libnetworkd_core,
+ libudev_static,
+ libsystemd_network,
+ libshared],
+ [threads]]
+ ]
+
+ tests += [
+ [['src/network/test-networkd-conf.c'],
+ [libnetworkd_core,
+ libsystemd_network,
+ libudev],
+ []],
+
+ [['src/network/test-network.c'],
+ [libnetworkd_core,
+ libudev_static,
+ libsystemd_network,
+ libshared],
+ [threads]],
+
+ [['src/network/test-routing-policy-rule.c'],
+ [libnetworkd_core,
+ libsystemd_network,
+ libudev],
+ []],
+
+ [['src/network/test-network-tables.c',
+ 'src/network/test-network-tables.c',
+ test_tables_h],
+ [libnetworkd_core,
+ libudev_static,
+ libudev_core,
+ libsystemd_network,
+ libshared],
+ [threads],
+ '', '', [],
+ [network_include_dir] + libudev_core_includes],
+ ]
+endif
diff --git a/src/network/netdev/bond.c b/src/network/netdev/bond.c
new file mode 100644
index 0000000..550a7f8
--- /dev/null
+++ b/src/network/netdev/bond.c
@@ -0,0 +1,561 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/ether.h>
+#include <linux/if_bonding.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "ether-addr-util.h"
+#include "extract-word.h"
+#include "missing.h"
+#include "netdev/bond.h"
+#include "string-table.h"
+#include "string-util.h"
+
+/*
+ * Number of seconds between instances where the bonding
+ * driver sends learning packets to each slaves peer switch
+ */
+#define LEARNING_PACKETS_INTERVAL_MIN_SEC (1 * USEC_PER_SEC)
+#define LEARNING_PACKETS_INTERVAL_MAX_SEC (0x7fffffff * USEC_PER_SEC)
+
+/* Number of IGMP membership reports to be issued after
+ * a failover event.
+ */
+#define RESEND_IGMP_MIN 0
+#define RESEND_IGMP_MAX 255
+#define RESEND_IGMP_DEFAULT 1
+
+/*
+ * Number of packets to transmit through a slave before
+ * moving to the next one.
+ */
+#define PACKETS_PER_SLAVE_MIN 0
+#define PACKETS_PER_SLAVE_MAX 65535
+#define PACKETS_PER_SLAVE_DEFAULT 1
+
+/*
+ * Number of peer notifications (gratuitous ARPs and
+ * unsolicited IPv6 Neighbor Advertisements) to be issued after a
+ * failover event.
+ */
+#define GRATUITOUS_ARP_MIN 0
+#define GRATUITOUS_ARP_MAX 255
+#define GRATUITOUS_ARP_DEFAULT 1
+
+static const char* const bond_mode_table[_NETDEV_BOND_MODE_MAX] = {
+ [NETDEV_BOND_MODE_BALANCE_RR] = "balance-rr",
+ [NETDEV_BOND_MODE_ACTIVE_BACKUP] = "active-backup",
+ [NETDEV_BOND_MODE_BALANCE_XOR] = "balance-xor",
+ [NETDEV_BOND_MODE_BROADCAST] = "broadcast",
+ [NETDEV_BOND_MODE_802_3AD] = "802.3ad",
+ [NETDEV_BOND_MODE_BALANCE_TLB] = "balance-tlb",
+ [NETDEV_BOND_MODE_BALANCE_ALB] = "balance-alb",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_mode, BondMode);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_mode, bond_mode, BondMode, "Failed to parse bond mode");
+
+static const char* const bond_xmit_hash_policy_table[_NETDEV_BOND_XMIT_HASH_POLICY_MAX] = {
+ [NETDEV_BOND_XMIT_HASH_POLICY_LAYER2] = "layer2",
+ [NETDEV_BOND_XMIT_HASH_POLICY_LAYER34] = "layer3+4",
+ [NETDEV_BOND_XMIT_HASH_POLICY_LAYER23] = "layer2+3",
+ [NETDEV_BOND_XMIT_HASH_POLICY_ENCAP23] = "encap2+3",
+ [NETDEV_BOND_XMIT_HASH_POLICY_ENCAP34] = "encap3+4",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_xmit_hash_policy, BondXmitHashPolicy);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_xmit_hash_policy,
+ bond_xmit_hash_policy,
+ BondXmitHashPolicy,
+ "Failed to parse bond transmit hash policy")
+
+static const char* const bond_lacp_rate_table[_NETDEV_BOND_LACP_RATE_MAX] = {
+ [NETDEV_BOND_LACP_RATE_SLOW] = "slow",
+ [NETDEV_BOND_LACP_RATE_FAST] = "fast",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_lacp_rate, BondLacpRate);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_lacp_rate, bond_lacp_rate, BondLacpRate, "Failed to parse bond lacp rate")
+
+static const char* const bond_ad_select_table[_NETDEV_BOND_AD_SELECT_MAX] = {
+ [NETDEV_BOND_AD_SELECT_STABLE] = "stable",
+ [NETDEV_BOND_AD_SELECT_BANDWIDTH] = "bandwidth",
+ [NETDEV_BOND_AD_SELECT_COUNT] = "count",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_ad_select, BondAdSelect);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_ad_select, bond_ad_select, BondAdSelect, "Failed to parse bond AD select");
+
+static const char* const bond_fail_over_mac_table[_NETDEV_BOND_FAIL_OVER_MAC_MAX] = {
+ [NETDEV_BOND_FAIL_OVER_MAC_NONE] = "none",
+ [NETDEV_BOND_FAIL_OVER_MAC_ACTIVE] = "active",
+ [NETDEV_BOND_FAIL_OVER_MAC_FOLLOW] = "follow",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_fail_over_mac, BondFailOverMac);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_fail_over_mac, bond_fail_over_mac, BondFailOverMac, "Failed to parse bond fail over MAC");
+
+static const char *const bond_arp_validate_table[_NETDEV_BOND_ARP_VALIDATE_MAX] = {
+ [NETDEV_BOND_ARP_VALIDATE_NONE] = "none",
+ [NETDEV_BOND_ARP_VALIDATE_ACTIVE]= "active",
+ [NETDEV_BOND_ARP_VALIDATE_BACKUP]= "backup",
+ [NETDEV_BOND_ARP_VALIDATE_ALL]= "all",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_arp_validate, BondArpValidate);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_arp_validate, bond_arp_validate, BondArpValidate, "Failed to parse bond arp validate");
+
+static const char *const bond_arp_all_targets_table[_NETDEV_BOND_ARP_ALL_TARGETS_MAX] = {
+ [NETDEV_BOND_ARP_ALL_TARGETS_ANY] = "any",
+ [NETDEV_BOND_ARP_ALL_TARGETS_ALL] = "all",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_arp_all_targets, BondArpAllTargets);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_arp_all_targets, bond_arp_all_targets, BondArpAllTargets, "Failed to parse bond Arp all targets");
+
+static const char *bond_primary_reselect_table[_NETDEV_BOND_PRIMARY_RESELECT_MAX] = {
+ [NETDEV_BOND_PRIMARY_RESELECT_ALWAYS] = "always",
+ [NETDEV_BOND_PRIMARY_RESELECT_BETTER]= "better",
+ [NETDEV_BOND_PRIMARY_RESELECT_FAILURE]= "failure",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_primary_reselect, BondPrimaryReselect);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_primary_reselect, bond_primary_reselect, BondPrimaryReselect, "Failed to parse bond primary reselect");
+
+static uint8_t bond_mode_to_kernel(BondMode mode) {
+ switch (mode) {
+ case NETDEV_BOND_MODE_BALANCE_RR:
+ return BOND_MODE_ROUNDROBIN;
+ case NETDEV_BOND_MODE_ACTIVE_BACKUP:
+ return BOND_MODE_ACTIVEBACKUP;
+ case NETDEV_BOND_MODE_BALANCE_XOR:
+ return BOND_MODE_XOR;
+ case NETDEV_BOND_MODE_BROADCAST:
+ return BOND_MODE_BROADCAST;
+ case NETDEV_BOND_MODE_802_3AD:
+ return BOND_MODE_8023AD;
+ case NETDEV_BOND_MODE_BALANCE_TLB:
+ return BOND_MODE_TLB;
+ case NETDEV_BOND_MODE_BALANCE_ALB:
+ return BOND_MODE_ALB;
+ default:
+ return (uint8_t) -1;
+ }
+}
+
+static uint8_t bond_xmit_hash_policy_to_kernel(BondXmitHashPolicy policy) {
+ switch (policy) {
+ case NETDEV_BOND_XMIT_HASH_POLICY_LAYER2:
+ return BOND_XMIT_POLICY_LAYER2;
+ case NETDEV_BOND_XMIT_HASH_POLICY_LAYER34:
+ return BOND_XMIT_POLICY_LAYER34;
+ case NETDEV_BOND_XMIT_HASH_POLICY_LAYER23:
+ return BOND_XMIT_POLICY_LAYER23;
+ case NETDEV_BOND_XMIT_HASH_POLICY_ENCAP23:
+ return BOND_XMIT_POLICY_ENCAP23;
+ case NETDEV_BOND_XMIT_HASH_POLICY_ENCAP34:
+ return BOND_XMIT_POLICY_ENCAP34;
+ default:
+ return (uint8_t) -1;
+ }
+}
+
+static int netdev_bond_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Bond *b;
+ ArpIpTarget *target = NULL;
+ int r, i = 0;
+
+ assert(netdev);
+ assert(!link);
+ assert(m);
+
+ b = BOND(netdev);
+
+ assert(b);
+
+ if (b->mode != _NETDEV_BOND_MODE_INVALID) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_MODE,
+ bond_mode_to_kernel(b->mode));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_MODE attribute: %m");
+ }
+
+ if (b->xmit_hash_policy != _NETDEV_BOND_XMIT_HASH_POLICY_INVALID) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_XMIT_HASH_POLICY,
+ bond_xmit_hash_policy_to_kernel(b->xmit_hash_policy));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_XMIT_HASH_POLICY attribute: %m");
+ }
+
+ if (b->lacp_rate != _NETDEV_BOND_LACP_RATE_INVALID &&
+ b->mode == NETDEV_BOND_MODE_802_3AD) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_AD_LACP_RATE, b->lacp_rate );
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_AD_LACP_RATE attribute: %m");
+ }
+
+ if (b->miimon != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_MIIMON, b->miimon / USEC_PER_MSEC);
+ if (r < 0)
+ log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_BOND_MIIMON attribute: %m");
+ }
+
+ if (b->downdelay != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_DOWNDELAY, b->downdelay / USEC_PER_MSEC);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_DOWNDELAY attribute: %m");
+ }
+
+ if (b->updelay != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_UPDELAY, b->updelay / USEC_PER_MSEC);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_UPDELAY attribute: %m");
+ }
+
+ if (b->arp_interval != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_ARP_INTERVAL, b->arp_interval / USEC_PER_MSEC);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ARP_INTERVAL attribute: %m");
+
+ if ((b->lp_interval >= LEARNING_PACKETS_INTERVAL_MIN_SEC) &&
+ (b->lp_interval <= LEARNING_PACKETS_INTERVAL_MAX_SEC)) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_LP_INTERVAL, b->lp_interval / USEC_PER_SEC);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_LP_INTERVAL attribute: %m");
+ }
+ }
+
+ if (b->ad_select != _NETDEV_BOND_AD_SELECT_INVALID &&
+ b->mode == NETDEV_BOND_MODE_802_3AD) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_AD_SELECT, b->ad_select);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_AD_SELECT attribute: %m");
+ }
+
+ if (b->fail_over_mac != _NETDEV_BOND_FAIL_OVER_MAC_INVALID &&
+ b->mode == NETDEV_BOND_MODE_ACTIVE_BACKUP) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_FAIL_OVER_MAC, b->fail_over_mac);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_FAIL_OVER_MAC attribute: %m");
+ }
+
+ if (b->arp_validate != _NETDEV_BOND_ARP_VALIDATE_INVALID) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_ARP_VALIDATE, b->arp_validate);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ARP_VALIDATE attribute: %m");
+ }
+
+ if (b->arp_all_targets != _NETDEV_BOND_ARP_ALL_TARGETS_INVALID) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_ARP_ALL_TARGETS, b->arp_all_targets);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ARP_ALL_TARGETS attribute: %m");
+ }
+
+ if (b->primary_reselect != _NETDEV_BOND_PRIMARY_RESELECT_INVALID) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_PRIMARY_RESELECT, b->primary_reselect);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_PRIMARY_RESELECT attribute: %m");
+ }
+
+ if (b->resend_igmp <= RESEND_IGMP_MAX) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_RESEND_IGMP, b->resend_igmp);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_RESEND_IGMP attribute: %m");
+ }
+
+ if (b->packets_per_slave <= PACKETS_PER_SLAVE_MAX &&
+ b->mode == NETDEV_BOND_MODE_BALANCE_RR) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_PACKETS_PER_SLAVE, b->packets_per_slave);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_PACKETS_PER_SLAVE attribute: %m");
+ }
+
+ if (b->num_grat_arp <= GRATUITOUS_ARP_MAX) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_NUM_PEER_NOTIF, b->num_grat_arp);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_NUM_PEER_NOTIF attribute: %m");
+ }
+
+ if (b->min_links != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_MIN_LINKS, b->min_links);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_MIN_LINKS attribute: %m");
+ }
+
+ if (b->ad_actor_sys_prio != 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_BOND_AD_ACTOR_SYS_PRIO, b->ad_actor_sys_prio);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_AD_ACTOR_SYS_PRIO attribute: %m");
+ }
+
+ if (b->ad_user_port_key != 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_BOND_AD_USER_PORT_KEY, b->ad_user_port_key);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_AD_USER_PORT_KEY attribute: %m");
+ }
+
+ if (b->ad_actor_system) {
+ r = sd_netlink_message_append_ether_addr(m, IFLA_BOND_AD_ACTOR_SYSTEM, b->ad_actor_system);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_AD_ACTOR_SYSTEM attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_ALL_SLAVES_ACTIVE, b->all_slaves_active);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ALL_SLAVES_ACTIVE attribute: %m");
+
+ if (b->tlb_dynamic_lb >= 0) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_TLB_DYNAMIC_LB, b->tlb_dynamic_lb);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_TLB_DYNAMIC_LB attribute: %m");
+ }
+
+ if (b->arp_interval > 0) {
+ if (b->n_arp_ip_targets > 0) {
+
+ r = sd_netlink_message_open_container(m, IFLA_BOND_ARP_IP_TARGET);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not open contaniner IFLA_BOND_ARP_IP_TARGET : %m");
+
+ LIST_FOREACH(arp_ip_target, target, b->arp_ip_targets) {
+ r = sd_netlink_message_append_u32(m, i++, target->ip.in.s_addr);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ARP_ALL_TARGETS attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not close contaniner IFLA_BOND_ARP_IP_TARGET : %m");
+ }
+ }
+
+ return 0;
+}
+
+int config_parse_arp_ip_target_address(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Bond *b = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ for (;;) {
+ _cleanup_free_ ArpIpTarget *buffer = NULL;
+ _cleanup_free_ char *n = NULL;
+ int f;
+
+ r = extract_first_word(&rvalue, &n, NULL, 0);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Bond ARP ip target address, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ if (r == 0)
+ break;
+
+ buffer = new0(ArpIpTarget, 1);
+ if (!buffer)
+ return -ENOMEM;
+
+ r = in_addr_from_string_auto(n, &f, &buffer->ip);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Bond ARP ip target address is invalid, ignoring assignment: %s", n);
+ return 0;
+ }
+
+ if (f != AF_INET) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Bond ARP ip target address is invalid, ignoring assignment: %s", n);
+ return 0;
+ }
+
+ LIST_PREPEND(arp_ip_target, b->arp_ip_targets, TAKE_PTR(buffer));
+ b->n_arp_ip_targets++;
+ }
+
+ if (b->n_arp_ip_targets > NETDEV_BOND_ARP_TARGETS_MAX)
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "More than the maximum number of kernel-supported ARP ip targets specified: %d > %d",
+ b->n_arp_ip_targets, NETDEV_BOND_ARP_TARGETS_MAX);
+
+ return 0;
+}
+
+int config_parse_ad_actor_sys_prio(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Bond *b = userdata;
+ uint16_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou16(rvalue, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse actor system priority '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (v == 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse actor system priority '%s'. Range is [1,65535], ignoring.", rvalue);
+ return 0;
+ }
+
+ b->ad_actor_sys_prio = v;
+
+ return 0;
+}
+
+int config_parse_ad_user_port_key(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Bond *b = userdata;
+ uint16_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou16(rvalue, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse user port key '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (v > 1023) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse user port key '%s'. Range is [0,1023], ignoring.", rvalue);
+ return 0;
+ }
+
+ b->ad_user_port_key = v;
+
+ return 0;
+}
+
+int config_parse_ad_actor_system(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Bond *b = userdata;
+ _cleanup_free_ struct ether_addr *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ n = new0(struct ether_addr, 1);
+ if (!n)
+ return log_oom();
+
+ r = ether_addr_from_string(rvalue, n);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Not a valid MAC address %s. Ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (ether_addr_is_null(n) || (n->ether_addr_octet[0] & 0x01)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Not a valid MAC address %s, can not be null or multicast. Ignoring assignment.", rvalue);
+ return 0;
+ }
+
+ free_and_replace(b->ad_actor_system, n);
+
+ return 0;
+}
+
+static void bond_done(NetDev *netdev) {
+ ArpIpTarget *t = NULL, *n = NULL;
+ Bond *b;
+
+ assert(netdev);
+
+ b = BOND(netdev);
+
+ assert(b);
+
+ free(b->ad_actor_system);
+
+ LIST_FOREACH_SAFE(arp_ip_target, t, n, b->arp_ip_targets)
+ free(t);
+
+ b->arp_ip_targets = NULL;
+}
+
+static void bond_init(NetDev *netdev) {
+ Bond *b;
+
+ assert(netdev);
+
+ b = BOND(netdev);
+
+ assert(b);
+
+ b->mode = _NETDEV_BOND_MODE_INVALID;
+ b->xmit_hash_policy = _NETDEV_BOND_XMIT_HASH_POLICY_INVALID;
+ b->lacp_rate = _NETDEV_BOND_LACP_RATE_INVALID;
+ b->ad_select = _NETDEV_BOND_AD_SELECT_INVALID;
+ b->fail_over_mac = _NETDEV_BOND_FAIL_OVER_MAC_INVALID;
+ b->arp_validate = _NETDEV_BOND_ARP_VALIDATE_INVALID;
+ b->arp_all_targets = _NETDEV_BOND_ARP_ALL_TARGETS_INVALID;
+ b->primary_reselect = _NETDEV_BOND_PRIMARY_RESELECT_INVALID;
+
+ b->all_slaves_active = false;
+ b->tlb_dynamic_lb = -1;
+
+ b->resend_igmp = RESEND_IGMP_DEFAULT;
+ b->packets_per_slave = PACKETS_PER_SLAVE_DEFAULT;
+ b->num_grat_arp = GRATUITOUS_ARP_DEFAULT;
+ b->lp_interval = LEARNING_PACKETS_INTERVAL_MIN_SEC;
+
+ LIST_HEAD_INIT(b->arp_ip_targets);
+ b->n_arp_ip_targets = 0;
+}
+
+const NetDevVTable bond_vtable = {
+ .object_size = sizeof(Bond),
+ .init = bond_init,
+ .done = bond_done,
+ .sections = "Match\0NetDev\0Bond\0",
+ .fill_message_create = netdev_bond_fill_message_create,
+ .create_type = NETDEV_CREATE_MASTER,
+};
diff --git a/src/network/netdev/bond.h b/src/network/netdev/bond.h
new file mode 100644
index 0000000..31b922b
--- /dev/null
+++ b/src/network/netdev/bond.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "in-addr-util.h"
+#include "list.h"
+
+#include "netdev/netdev.h"
+
+/*
+ * Maximum number of targets supported by the kernel for a single
+ * bond netdev.
+ */
+#define NETDEV_BOND_ARP_TARGETS_MAX 16
+
+typedef enum BondMode {
+ NETDEV_BOND_MODE_BALANCE_RR,
+ NETDEV_BOND_MODE_ACTIVE_BACKUP,
+ NETDEV_BOND_MODE_BALANCE_XOR,
+ NETDEV_BOND_MODE_BROADCAST,
+ NETDEV_BOND_MODE_802_3AD,
+ NETDEV_BOND_MODE_BALANCE_TLB,
+ NETDEV_BOND_MODE_BALANCE_ALB,
+ _NETDEV_BOND_MODE_MAX,
+ _NETDEV_BOND_MODE_INVALID = -1
+} BondMode;
+
+typedef enum BondXmitHashPolicy {
+ NETDEV_BOND_XMIT_HASH_POLICY_LAYER2,
+ NETDEV_BOND_XMIT_HASH_POLICY_LAYER34,
+ NETDEV_BOND_XMIT_HASH_POLICY_LAYER23,
+ NETDEV_BOND_XMIT_HASH_POLICY_ENCAP23,
+ NETDEV_BOND_XMIT_HASH_POLICY_ENCAP34,
+ _NETDEV_BOND_XMIT_HASH_POLICY_MAX,
+ _NETDEV_BOND_XMIT_HASH_POLICY_INVALID = -1
+} BondXmitHashPolicy;
+
+typedef enum BondLacpRate {
+ NETDEV_BOND_LACP_RATE_SLOW,
+ NETDEV_BOND_LACP_RATE_FAST,
+ _NETDEV_BOND_LACP_RATE_MAX,
+ _NETDEV_BOND_LACP_RATE_INVALID = -1,
+} BondLacpRate;
+
+typedef enum BondAdSelect {
+ NETDEV_BOND_AD_SELECT_STABLE,
+ NETDEV_BOND_AD_SELECT_BANDWIDTH,
+ NETDEV_BOND_AD_SELECT_COUNT,
+ _NETDEV_BOND_AD_SELECT_MAX,
+ _NETDEV_BOND_AD_SELECT_INVALID = -1,
+} BondAdSelect;
+
+typedef enum BondFailOverMac {
+ NETDEV_BOND_FAIL_OVER_MAC_NONE,
+ NETDEV_BOND_FAIL_OVER_MAC_ACTIVE,
+ NETDEV_BOND_FAIL_OVER_MAC_FOLLOW,
+ _NETDEV_BOND_FAIL_OVER_MAC_MAX,
+ _NETDEV_BOND_FAIL_OVER_MAC_INVALID = -1,
+} BondFailOverMac;
+
+typedef enum BondArpValidate {
+ NETDEV_BOND_ARP_VALIDATE_NONE,
+ NETDEV_BOND_ARP_VALIDATE_ACTIVE,
+ NETDEV_BOND_ARP_VALIDATE_BACKUP,
+ NETDEV_BOND_ARP_VALIDATE_ALL,
+ _NETDEV_BOND_ARP_VALIDATE_MAX,
+ _NETDEV_BOND_ARP_VALIDATE_INVALID = -1,
+} BondArpValidate;
+
+typedef enum BondArpAllTargets {
+ NETDEV_BOND_ARP_ALL_TARGETS_ANY,
+ NETDEV_BOND_ARP_ALL_TARGETS_ALL,
+ _NETDEV_BOND_ARP_ALL_TARGETS_MAX,
+ _NETDEV_BOND_ARP_ALL_TARGETS_INVALID = -1,
+} BondArpAllTargets;
+
+typedef enum BondPrimaryReselect {
+ NETDEV_BOND_PRIMARY_RESELECT_ALWAYS,
+ NETDEV_BOND_PRIMARY_RESELECT_BETTER,
+ NETDEV_BOND_PRIMARY_RESELECT_FAILURE,
+ _NETDEV_BOND_PRIMARY_RESELECT_MAX,
+ _NETDEV_BOND_PRIMARY_RESELECT_INVALID = -1,
+} BondPrimaryReselect;
+
+typedef struct ArpIpTarget {
+ union in_addr_union ip;
+
+ LIST_FIELDS(struct ArpIpTarget, arp_ip_target);
+} ArpIpTarget;
+
+typedef struct Bond {
+ NetDev meta;
+
+ BondMode mode;
+ BondXmitHashPolicy xmit_hash_policy;
+ BondLacpRate lacp_rate;
+ BondAdSelect ad_select;
+ BondFailOverMac fail_over_mac;
+ BondArpValidate arp_validate;
+ BondArpAllTargets arp_all_targets;
+ BondPrimaryReselect primary_reselect;
+
+ int tlb_dynamic_lb;
+
+ bool all_slaves_active;
+
+ unsigned resend_igmp;
+ unsigned packets_per_slave;
+ unsigned num_grat_arp;
+ unsigned min_links;
+
+ uint16_t ad_actor_sys_prio;
+ uint16_t ad_user_port_key;
+ struct ether_addr *ad_actor_system;
+
+ usec_t miimon;
+ usec_t updelay;
+ usec_t downdelay;
+ usec_t arp_interval;
+ usec_t lp_interval;
+
+ int n_arp_ip_targets;
+ ArpIpTarget *arp_ip_targets;
+} Bond;
+
+DEFINE_NETDEV_CAST(BOND, Bond);
+extern const NetDevVTable bond_vtable;
+
+const char *bond_mode_to_string(BondMode d) _const_;
+BondMode bond_mode_from_string(const char *d) _pure_;
+
+const char *bond_xmit_hash_policy_to_string(BondXmitHashPolicy d) _const_;
+BondXmitHashPolicy bond_xmit_hash_policy_from_string(const char *d) _pure_;
+
+const char *bond_lacp_rate_to_string(BondLacpRate d) _const_;
+BondLacpRate bond_lacp_rate_from_string(const char *d) _pure_;
+
+const char *bond_fail_over_mac_to_string(BondFailOverMac d) _const_;
+BondFailOverMac bond_fail_over_mac_from_string(const char *d) _pure_;
+
+const char *bond_ad_select_to_string(BondAdSelect d) _const_;
+BondAdSelect bond_ad_select_from_string(const char *d) _pure_;
+
+const char *bond_arp_validate_to_string(BondArpValidate d) _const_;
+BondArpValidate bond_arp_validate_from_string(const char *d) _pure_;
+
+const char *bond_arp_all_targets_to_string(BondArpAllTargets d) _const_;
+BondArpAllTargets bond_arp_all_targets_from_string(const char *d) _pure_;
+
+const char *bond_primary_reselect_to_string(BondPrimaryReselect d) _const_;
+BondPrimaryReselect bond_primary_reselect_from_string(const char *d) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_xmit_hash_policy);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_lacp_rate);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_ad_select);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_fail_over_mac);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_arp_validate);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_arp_all_targets);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_primary_reselect);
+CONFIG_PARSER_PROTOTYPE(config_parse_arp_ip_target_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_ad_actor_sys_prio);
+CONFIG_PARSER_PROTOTYPE(config_parse_ad_user_port_key);
+CONFIG_PARSER_PROTOTYPE(config_parse_ad_actor_system);
diff --git a/src/network/netdev/bridge.c b/src/network/netdev/bridge.c
new file mode 100644
index 0000000..aadb3ab
--- /dev/null
+++ b/src/network/netdev/bridge.c
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+
+#include "missing.h"
+#include "netlink-util.h"
+#include "netdev/bridge.h"
+#include "networkd-manager.h"
+#include "vlan-util.h"
+
+/* callback for brige netdev's parameter set */
+static int netdev_bridge_set_handler(sd_netlink *rtnl, sd_netlink_message *m, NetDev *netdev) {
+ int r;
+
+ assert(netdev);
+ assert(m);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "Bridge parameters could not be set: %m");
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "Bridge parameters set success");
+
+ return 1;
+}
+
+static int netdev_bridge_post_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ Bridge *b;
+ int r;
+
+ assert(netdev);
+
+ b = BRIDGE(netdev);
+
+ assert(b);
+
+ r = sd_rtnl_message_new_link(netdev->manager->rtnl, &req, RTM_NEWLINK, netdev->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_netlink_message_set_flags(req, NLM_F_REQUEST | NLM_F_ACK);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set netlink flags: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_LINKINFO);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = sd_netlink_message_open_container_union(req, IFLA_INFO_DATA, netdev_kind_to_string(netdev->kind));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ /* convert to jiffes */
+ if (b->forward_delay != USEC_INFINITY) {
+ r = sd_netlink_message_append_u32(req, IFLA_BR_FORWARD_DELAY, usec_to_jiffies(b->forward_delay));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_FORWARD_DELAY attribute: %m");
+ }
+
+ if (b->hello_time > 0) {
+ r = sd_netlink_message_append_u32(req, IFLA_BR_HELLO_TIME, usec_to_jiffies(b->hello_time));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_HELLO_TIME attribute: %m");
+ }
+
+ if (b->max_age > 0) {
+ r = sd_netlink_message_append_u32(req, IFLA_BR_MAX_AGE, usec_to_jiffies(b->max_age));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_MAX_AGE attribute: %m");
+ }
+
+ if (b->ageing_time != USEC_INFINITY) {
+ r = sd_netlink_message_append_u32(req, IFLA_BR_AGEING_TIME, usec_to_jiffies(b->ageing_time));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_AGEING_TIME attribute: %m");
+ }
+
+ if (b->priority > 0) {
+ r = sd_netlink_message_append_u16(req, IFLA_BR_PRIORITY, b->priority);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_PRIORITY attribute: %m");
+ }
+
+ if (b->group_fwd_mask > 0) {
+ r = sd_netlink_message_append_u16(req, IFLA_BR_GROUP_FWD_MASK, b->group_fwd_mask);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_GROUP_FWD_MASK attribute: %m");
+ }
+
+ if (b->default_pvid != VLANID_INVALID) {
+ r = sd_netlink_message_append_u16(req, IFLA_BR_VLAN_DEFAULT_PVID, b->default_pvid);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_VLAN_DEFAULT_PVID attribute: %m");
+ }
+
+ if (b->mcast_querier >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BR_MCAST_QUERIER, b->mcast_querier);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_MCAST_QUERIER attribute: %m");
+ }
+
+ if (b->mcast_snooping >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BR_MCAST_SNOOPING, b->mcast_snooping);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_MCAST_SNOOPING attribute: %m");
+ }
+
+ if (b->vlan_filtering >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BR_VLAN_FILTERING, b->vlan_filtering);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_VLAN_FILTERING attribute: %m");
+ }
+
+ if (b->stp >= 0) {
+ r = sd_netlink_message_append_u32(req, IFLA_BR_STP_STATE, b->stp);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_STP_STATE attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ r = netlink_call_async(netdev->manager->rtnl, NULL, req, netdev_bridge_set_handler,
+ netdev_destroy_callback, netdev);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not send rtnetlink message: %m");
+
+ netdev_ref(netdev);
+
+ return r;
+}
+
+static void bridge_init(NetDev *n) {
+ Bridge *b;
+
+ b = BRIDGE(n);
+
+ assert(b);
+
+ b->mcast_querier = -1;
+ b->mcast_snooping = -1;
+ b->vlan_filtering = -1;
+ b->stp = -1;
+ b->default_pvid = VLANID_INVALID;
+ b->forward_delay = USEC_INFINITY;
+ b->ageing_time = USEC_INFINITY;
+}
+
+const NetDevVTable bridge_vtable = {
+ .object_size = sizeof(Bridge),
+ .init = bridge_init,
+ .sections = "Match\0NetDev\0Bridge\0",
+ .post_create = netdev_bridge_post_create,
+ .create_type = NETDEV_CREATE_MASTER,
+};
diff --git a/src/network/netdev/bridge.h b/src/network/netdev/bridge.h
new file mode 100644
index 0000000..3edc93a
--- /dev/null
+++ b/src/network/netdev/bridge.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "netdev/netdev.h"
+
+typedef struct Bridge {
+ NetDev meta;
+
+ int mcast_querier;
+ int mcast_snooping;
+ int vlan_filtering;
+ int stp;
+ uint16_t priority;
+ uint16_t group_fwd_mask;
+ uint16_t default_pvid;
+
+ usec_t forward_delay;
+ usec_t hello_time;
+ usec_t max_age;
+ usec_t ageing_time;
+} Bridge;
+
+DEFINE_NETDEV_CAST(BRIDGE, Bridge);
+extern const NetDevVTable bridge_vtable;
diff --git a/src/network/netdev/dummy.c b/src/network/netdev/dummy.c
new file mode 100644
index 0000000..aded1c5
--- /dev/null
+++ b/src/network/netdev/dummy.c
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "netdev/dummy.h"
+
+const NetDevVTable dummy_vtable = {
+ .object_size = sizeof(Dummy),
+ .sections = "Match\0NetDev\0",
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+};
diff --git a/src/network/netdev/dummy.h b/src/network/netdev/dummy.h
new file mode 100644
index 0000000..93e0651
--- /dev/null
+++ b/src/network/netdev/dummy.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "netdev/netdev.h"
+
+typedef struct Dummy {
+ NetDev meta;
+} Dummy;
+
+DEFINE_NETDEV_CAST(DUMMY, Dummy);
+extern const NetDevVTable dummy_vtable;
diff --git a/src/network/netdev/fou-tunnel.c b/src/network/netdev/fou-tunnel.c
new file mode 100644
index 0000000..65dad38
--- /dev/null
+++ b/src/network/netdev/fou-tunnel.c
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <linux/ip.h>
+
+#include "conf-parser.h"
+#include "missing.h"
+#include "netdev/fou-tunnel.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "sd-netlink.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "util.h"
+
+static const char* const fou_encap_type_table[_NETDEV_FOO_OVER_UDP_ENCAP_MAX] = {
+ [NETDEV_FOO_OVER_UDP_ENCAP_DIRECT] = "FooOverUDP",
+ [NETDEV_FOO_OVER_UDP_ENCAP_GUE] = "GenericUDPEncapsulation",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(fou_encap_type, FooOverUDPEncapType);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_fou_encap_type, fou_encap_type, FooOverUDPEncapType,
+ "Failed to parse Encapsulation=");
+
+static int netdev_fill_fou_tunnel_message(NetDev *netdev, sd_netlink_message **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ FouTunnel *t;
+ int r;
+
+ assert(netdev);
+
+ t = FOU(netdev);
+
+ assert(t);
+
+ r = sd_genl_message_new(netdev->manager->genl, SD_GENL_FOU, FOU_CMD_ADD, &m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to allocate generic netlink message: %m");
+
+ r = sd_netlink_message_append_u16(m, FOU_ATTR_PORT, htobe16(t->port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_PORT attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, FOU_ATTR_TYPE, FOU_ENCAP_GUE);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_TYPE attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, FOU_ATTR_AF, AF_INET);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_AF attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, FOU_ATTR_IPPROTO, t->fou_protocol);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_IPPROTO attribute: %m");
+
+ *ret = m;
+ m = NULL;
+
+ return 0;
+}
+
+static int netdev_fou_tunnel_create(NetDev *netdev) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ uint32_t serial;
+ FouTunnel *t;
+ int r;
+
+ assert(netdev);
+
+ t = FOU(netdev);
+
+ assert(t);
+
+ r = netdev_fill_fou_tunnel_message(netdev, &m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_send(netdev->manager->genl, m, &serial);
+ if (r < 0 && r != -EADDRINUSE)
+ return log_netdev_error_errno(netdev, r, "Failed to add FooOverUDP tunnel: %m");
+
+ return 0;
+}
+
+static int netdev_fou_tunnel_verify(NetDev *netdev, const char *filename) {
+ FouTunnel *t;
+
+ assert(netdev);
+ assert(filename);
+
+ t = FOU(netdev);
+
+ assert(t);
+
+ if (t->fou_encap_type == NETDEV_FOO_OVER_UDP_ENCAP_DIRECT && t->fou_protocol <= 0) {
+ log_netdev_error(netdev, "FooOverUDP protocol not configured in %s. Rejecting configuration.", filename);
+ return -EINVAL;
+ }
+
+ if (t->fou_encap_type == NETDEV_FOO_OVER_UDP_ENCAP_GUE && t->fou_protocol > 0) {
+ log_netdev_error(netdev, "FooOverUDP GUE can't be set with protocol configured in %s. Rejecting configuration.", filename);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void fou_tunnel_init(NetDev *netdev) {
+ FouTunnel *t;
+
+ assert(netdev);
+
+ t = FOU(netdev);
+
+ assert(t);
+
+ t->fou_encap_type = NETDEV_FOO_OVER_UDP_ENCAP_DIRECT;
+}
+
+const NetDevVTable foutnl_vtable = {
+ .object_size = sizeof(FouTunnel),
+ .init = fou_tunnel_init,
+ .sections = "Match\0NetDev\0FooOverUDP\0",
+ .create = netdev_fou_tunnel_create,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .config_verify = netdev_fou_tunnel_verify,
+};
diff --git a/src/network/netdev/fou-tunnel.h b/src/network/netdev/fou-tunnel.h
new file mode 100644
index 0000000..b8abed1
--- /dev/null
+++ b/src/network/netdev/fou-tunnel.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if HAVE_LINUX_FOU_H
+#include <linux/fou.h>
+#endif
+
+#include "in-addr-util.h"
+#include "missing_fou.h"
+#include "netdev/netdev.h"
+
+typedef enum FooOverUDPEncapType {
+ NETDEV_FOO_OVER_UDP_ENCAP_UNSPEC = FOU_ENCAP_UNSPEC,
+ NETDEV_FOO_OVER_UDP_ENCAP_DIRECT = FOU_ENCAP_DIRECT,
+ NETDEV_FOO_OVER_UDP_ENCAP_GUE = FOU_ENCAP_GUE,
+ _NETDEV_FOO_OVER_UDP_ENCAP_MAX,
+ _NETDEV_FOO_OVER_UDP_ENCAP_INVALID = -1,
+} FooOverUDPEncapType;
+
+typedef struct FouTunnel {
+ NetDev meta;
+
+ uint8_t fou_protocol;
+
+ uint16_t port;
+
+ FooOverUDPEncapType fou_encap_type;
+} FouTunnel;
+
+DEFINE_NETDEV_CAST(FOU, FouTunnel);
+extern const NetDevVTable foutnl_vtable;
+
+const char *fou_encap_type_to_string(FooOverUDPEncapType d) _const_;
+FooOverUDPEncapType fou_encap_type_from_string(const char *d) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_fou_encap_type);
diff --git a/src/network/netdev/geneve.c b/src/network/netdev/geneve.c
new file mode 100644
index 0000000..089bbfe
--- /dev/null
+++ b/src/network/netdev/geneve.c
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "extract-word.h"
+#include "geneve.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "missing.h"
+#include "networkd-manager.h"
+
+#define GENEVE_FLOW_LABEL_MAX_MASK 0xFFFFFU
+#define DEFAULT_GENEVE_DESTINATION_PORT 6081
+
+/* callback for geneve netdev's created without a backing Link */
+static int geneve_netdev_create_handler(sd_netlink *rtnl, sd_netlink_message *m, NetDev *netdev) {
+ int r;
+
+ assert(netdev);
+ assert(netdev->state != _NETDEV_STATE_INVALID);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -EEXIST)
+ log_netdev_info(netdev, "Geneve netdev exists, using existing without changing its parameters");
+ else if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "Geneve netdev could not be created: %m");
+ netdev_drop(netdev);
+
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "Geneve created");
+
+ return 1;
+}
+
+static int netdev_geneve_create(NetDev *netdev) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ Geneve *v;
+ int r;
+
+ assert(netdev);
+
+ v = GENEVE(netdev);
+
+ r = sd_rtnl_message_new_link(netdev->manager->rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not allocate RTM_NEWLINK message: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, netdev->ifname);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IFNAME, attribute: %m");
+
+ if (netdev->mac) {
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, netdev->mac);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_ADDRESS attribute: %m");
+ }
+
+ if (netdev->mtu != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_MTU, netdev->mtu);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MTU attribute: %m");
+ }
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, netdev_kind_to_string(netdev->kind));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ if (v->id <= GENEVE_VID_MAX) {
+ r = sd_netlink_message_append_u32(m, IFLA_GENEVE_ID, v->id);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_ID attribute: %m");
+ }
+
+ if (!in_addr_is_null(v->remote_family, &v->remote)) {
+
+ if (v->remote_family == AF_INET)
+ r = sd_netlink_message_append_in_addr(m, IFLA_GENEVE_REMOTE, &v->remote.in);
+ else
+ r = sd_netlink_message_append_in6_addr(m, IFLA_GENEVE_REMOTE6, &v->remote.in6);
+
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_GROUP attribute: %m");
+
+ }
+
+ if (v->ttl) {
+ r = sd_netlink_message_append_u8(m, IFLA_GENEVE_TTL, v->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_TTL attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u8(m, IFLA_GENEVE_TOS, v->tos);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_TOS attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GENEVE_UDP_CSUM, v->udpcsum);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_UDP_CSUM attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, v->udp6zerocsumtx);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_UDP_ZERO_CSUM6_TX attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, v->udp6zerocsumrx);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_UDP_ZERO_CSUM6_RX attribute: %m");
+
+ if (v->dest_port != DEFAULT_GENEVE_DESTINATION_PORT) {
+ r = sd_netlink_message_append_u16(m, IFLA_GENEVE_PORT, htobe16(v->dest_port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_PORT attribute: %m");
+ }
+
+ if (v->flow_label > 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_GENEVE_LABEL, htobe32(v->flow_label));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_LABEL attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = netlink_call_async(netdev->manager->rtnl, NULL, m, geneve_netdev_create_handler,
+ netdev_destroy_callback, netdev);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not send rtnetlink message: %m");
+
+ netdev_ref(netdev);
+ netdev->state = NETDEV_STATE_CREATING;
+
+ log_netdev_debug(netdev, "Creating");
+
+ return r;
+}
+
+int config_parse_geneve_vni(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Geneve *v = userdata;
+ uint32_t f;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &f);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Geneve VNI '%s'.", rvalue);
+ return 0;
+ }
+
+ if (f > GENEVE_VID_MAX){
+ log_syntax(unit, LOG_ERR, filename, line, r, "Geneve VNI out is of range '%s'.", rvalue);
+ return 0;
+ }
+
+ v->id = f;
+
+ return 0;
+}
+
+int config_parse_geneve_address(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Geneve *v = userdata;
+ union in_addr_union *addr = data, buffer;
+ int r, f;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = in_addr_from_string_auto(rvalue, &f, &buffer);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "geneve '%s' address is invalid, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ r = in_addr_is_multicast(f, &buffer);
+ if (r > 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "geneve invalid multicast '%s' address, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ v->remote_family = f;
+ *addr = buffer;
+
+ return 0;
+}
+
+int config_parse_geneve_flow_label(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Geneve *v = userdata;
+ uint32_t f;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &f);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Geneve flow label '%s'.", rvalue);
+ return 0;
+ }
+
+ if (f & ~GENEVE_FLOW_LABEL_MAX_MASK) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Geneve flow label '%s' not valid. Flow label range should be [0-1048575].", rvalue);
+ return 0;
+ }
+
+ v->flow_label = f;
+
+ return 0;
+}
+
+static int netdev_geneve_verify(NetDev *netdev, const char *filename) {
+ Geneve *v = GENEVE(netdev);
+
+ assert(netdev);
+ assert(v);
+ assert(filename);
+
+ if (v->ttl == 0) {
+ log_warning("Invalid Geneve TTL value '0' configured in '%s'. Ignoring", filename);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void geneve_init(NetDev *netdev) {
+ Geneve *v;
+
+ assert(netdev);
+
+ v = GENEVE(netdev);
+
+ assert(v);
+
+ v->id = GENEVE_VID_MAX + 1;
+ v->dest_port = DEFAULT_GENEVE_DESTINATION_PORT;
+ v->udpcsum = false;
+ v->udp6zerocsumtx = false;
+ v->udp6zerocsumrx = false;
+}
+
+const NetDevVTable geneve_vtable = {
+ .object_size = sizeof(Geneve),
+ .init = geneve_init,
+ .sections = "Match\0NetDev\0GENEVE\0",
+ .create = netdev_geneve_create,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .config_verify = netdev_geneve_verify,
+};
diff --git a/src/network/netdev/geneve.h b/src/network/netdev/geneve.h
new file mode 100644
index 0000000..c201981
--- /dev/null
+++ b/src/network/netdev/geneve.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Geneve Geneve;
+
+#include "in-addr-util.h"
+#include "netdev.h"
+#include "networkd-link.h"
+#include "networkd-network.h"
+
+#define GENEVE_VID_MAX (1u << 24) - 1
+
+struct Geneve {
+ NetDev meta;
+
+ uint32_t id;
+ uint32_t flow_label;
+
+ int remote_family;
+
+ uint8_t tos;
+ uint8_t ttl;
+
+ uint16_t dest_port;
+
+ bool udpcsum;
+ bool udp6zerocsumtx;
+ bool udp6zerocsumrx;
+
+ union in_addr_union remote;
+};
+
+DEFINE_NETDEV_CAST(GENEVE, Geneve);
+extern const NetDevVTable geneve_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_geneve_vni);
+CONFIG_PARSER_PROTOTYPE(config_parse_geneve_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_geneve_flow_label);
diff --git a/src/network/netdev/ipvlan.c b/src/network/netdev/ipvlan.c
new file mode 100644
index 0000000..5bb6a5b
--- /dev/null
+++ b/src/network/netdev/ipvlan.c
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+
+#include "conf-parser.h"
+#include "netdev/ipvlan.h"
+#include "string-table.h"
+
+static const char* const ipvlan_mode_table[_NETDEV_IPVLAN_MODE_MAX] = {
+ [NETDEV_IPVLAN_MODE_L2] = "L2",
+ [NETDEV_IPVLAN_MODE_L3] = "L3",
+ [NETDEV_IPVLAN_MODE_L3S] = "L3S",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(ipvlan_mode, IPVlanMode);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_ipvlan_mode, ipvlan_mode, IPVlanMode, "Failed to parse ipvlan mode");
+
+static const char* const ipvlan_flags_table[_NETDEV_IPVLAN_FLAGS_MAX] = {
+ [NETDEV_IPVLAN_FLAGS_BRIGDE] = "bridge",
+ [NETDEV_IPVLAN_FLAGS_PRIVATE] = "private",
+ [NETDEV_IPVLAN_FLAGS_VEPA] = "vepa",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(ipvlan_flags, IPVlanFlags);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_ipvlan_flags, ipvlan_flags, IPVlanFlags, "Failed to parse ipvlan flags");
+
+static int netdev_ipvlan_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *req) {
+ IPVlan *m;
+ int r;
+
+ assert(netdev);
+ assert(link);
+ assert(netdev->ifname);
+
+ m = IPVLAN(netdev);
+
+ assert(m);
+
+ if (m->mode != _NETDEV_IPVLAN_MODE_INVALID) {
+ r = sd_netlink_message_append_u16(req, IFLA_IPVLAN_MODE, m->mode);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPVLAN_MODE attribute: %m");
+ }
+
+ if (m->flags != _NETDEV_IPVLAN_FLAGS_INVALID) {
+ r = sd_netlink_message_append_u16(req, IFLA_IPVLAN_FLAGS, m->flags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPVLAN_FLAGS attribute: %m");
+ }
+
+ return 0;
+}
+
+static void ipvlan_init(NetDev *n) {
+ IPVlan *m;
+
+ assert(n);
+
+ m = IPVLAN(n);
+
+ assert(m);
+
+ m->mode = _NETDEV_IPVLAN_MODE_INVALID;
+ m->flags = _NETDEV_IPVLAN_FLAGS_INVALID;
+}
+
+const NetDevVTable ipvlan_vtable = {
+ .object_size = sizeof(IPVlan),
+ .init = ipvlan_init,
+ .sections = "Match\0NetDev\0IPVLAN\0",
+ .fill_message_create = netdev_ipvlan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+};
diff --git a/src/network/netdev/ipvlan.h b/src/network/netdev/ipvlan.h
new file mode 100644
index 0000000..fb426d3
--- /dev/null
+++ b/src/network/netdev/ipvlan.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/if_link.h>
+
+#include "missing_if_link.h"
+#include "netdev/netdev.h"
+
+typedef enum IPVlanMode {
+ NETDEV_IPVLAN_MODE_L2 = IPVLAN_MODE_L2,
+ NETDEV_IPVLAN_MODE_L3 = IPVLAN_MODE_L3,
+ NETDEV_IPVLAN_MODE_L3S = IPVLAN_MODE_L3S,
+ _NETDEV_IPVLAN_MODE_MAX,
+ _NETDEV_IPVLAN_MODE_INVALID = -1
+} IPVlanMode;
+
+typedef enum IPVlanFlags {
+ NETDEV_IPVLAN_FLAGS_BRIGDE,
+ NETDEV_IPVLAN_FLAGS_PRIVATE = IPVLAN_F_PRIVATE,
+ NETDEV_IPVLAN_FLAGS_VEPA = IPVLAN_F_VEPA,
+ _NETDEV_IPVLAN_FLAGS_MAX,
+ _NETDEV_IPVLAN_FLAGS_INVALID = -1
+} IPVlanFlags;
+
+typedef struct IPVlan {
+ NetDev meta;
+
+ IPVlanMode mode;
+ IPVlanFlags flags;
+} IPVlan;
+
+DEFINE_NETDEV_CAST(IPVLAN, IPVlan);
+extern const NetDevVTable ipvlan_vtable;
+
+const char *ipvlan_mode_to_string(IPVlanMode d) _const_;
+IPVlanMode ipvlan_mode_from_string(const char *d) _pure_;
+
+const char *ipvlan_flags_to_string(IPVlanFlags d) _const_;
+IPVlanFlags ipvlan_flags_from_string(const char *d) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ipvlan_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipvlan_flags);
diff --git a/src/network/netdev/macvlan.c b/src/network/netdev/macvlan.c
new file mode 100644
index 0000000..871f020
--- /dev/null
+++ b/src/network/netdev/macvlan.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+
+#include "conf-parser.h"
+#include "netdev/macvlan.h"
+#include "string-table.h"
+
+static const char* const macvlan_mode_table[_NETDEV_MACVLAN_MODE_MAX] = {
+ [NETDEV_MACVLAN_MODE_PRIVATE] = "private",
+ [NETDEV_MACVLAN_MODE_VEPA] = "vepa",
+ [NETDEV_MACVLAN_MODE_BRIDGE] = "bridge",
+ [NETDEV_MACVLAN_MODE_PASSTHRU] = "passthru",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(macvlan_mode, MacVlanMode);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_macvlan_mode, macvlan_mode, MacVlanMode, "Failed to parse macvlan mode");
+
+static int netdev_macvlan_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *req) {
+ MacVlan *m;
+ int r;
+
+ assert(netdev);
+ assert(link);
+ assert(netdev->ifname);
+
+ if (netdev->kind == NETDEV_KIND_MACVLAN)
+ m = MACVLAN(netdev);
+ else
+ m = MACVTAP(netdev);
+
+ assert(m);
+
+ if (m->mode != _NETDEV_MACVLAN_MODE_INVALID) {
+ r = sd_netlink_message_append_u32(req, IFLA_MACVLAN_MODE, m->mode);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MACVLAN_MODE attribute: %m");
+ }
+
+ return 0;
+}
+
+static void macvlan_init(NetDev *n) {
+ MacVlan *m;
+
+ assert(n);
+
+ if (n->kind == NETDEV_KIND_MACVLAN)
+ m = MACVLAN(n);
+ else
+ m = MACVTAP(n);
+
+ assert(m);
+
+ m->mode = _NETDEV_MACVLAN_MODE_INVALID;
+}
+
+const NetDevVTable macvtap_vtable = {
+ .object_size = sizeof(MacVlan),
+ .init = macvlan_init,
+ .sections = "Match\0NetDev\0MACVTAP\0",
+ .fill_message_create = netdev_macvlan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+};
+
+const NetDevVTable macvlan_vtable = {
+ .object_size = sizeof(MacVlan),
+ .init = macvlan_init,
+ .sections = "Match\0NetDev\0MACVLAN\0",
+ .fill_message_create = netdev_macvlan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+};
diff --git a/src/network/netdev/macvlan.h b/src/network/netdev/macvlan.h
new file mode 100644
index 0000000..b473f1e
--- /dev/null
+++ b/src/network/netdev/macvlan.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct MacVlan MacVlan;
+
+#include "netdev/netdev.h"
+
+typedef enum MacVlanMode {
+ NETDEV_MACVLAN_MODE_PRIVATE = MACVLAN_MODE_PRIVATE,
+ NETDEV_MACVLAN_MODE_VEPA = MACVLAN_MODE_VEPA,
+ NETDEV_MACVLAN_MODE_BRIDGE = MACVLAN_MODE_BRIDGE,
+ NETDEV_MACVLAN_MODE_PASSTHRU = MACVLAN_MODE_PASSTHRU,
+ _NETDEV_MACVLAN_MODE_MAX,
+ _NETDEV_MACVLAN_MODE_INVALID = -1
+} MacVlanMode;
+
+struct MacVlan {
+ NetDev meta;
+
+ MacVlanMode mode;
+};
+
+DEFINE_NETDEV_CAST(MACVLAN, MacVlan);
+DEFINE_NETDEV_CAST(MACVTAP, MacVlan);
+extern const NetDevVTable macvlan_vtable;
+extern const NetDevVTable macvtap_vtable;
+
+const char *macvlan_mode_to_string(MacVlanMode d) _const_;
+MacVlanMode macvlan_mode_from_string(const char *d) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_macvlan_mode);
diff --git a/src/network/netdev/netdev-gperf.gperf b/src/network/netdev/netdev-gperf.gperf
new file mode 100644
index 0000000..f7ca98f
--- /dev/null
+++ b/src/network/netdev/netdev-gperf.gperf
@@ -0,0 +1,174 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "network-internal.h"
+#include "netdev/bond.h"
+#include "netdev/bridge.h"
+#include "netdev/geneve.h"
+#include "netdev/ipvlan.h"
+#include "netdev/macvlan.h"
+#include "netdev/tunnel.h"
+#include "netdev/tuntap.h"
+#include "netdev/veth.h"
+#include "netdev/vlan.h"
+#include "netdev/vxlan.h"
+#include "netdev/vrf.h"
+#include "netdev/netdev.h"
+#include "netdev/vxcan.h"
+#include "netdev/wireguard.h"
+#include "netdev/fou-tunnel.h"
+#include "vlan-util.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name network_netdev_gperf_hash
+%define lookup-function-name network_netdev_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Match.Host, config_parse_net_condition, CONDITION_HOST, offsetof(NetDev, match_host)
+Match.Virtualization, config_parse_net_condition, CONDITION_VIRTUALIZATION, offsetof(NetDev, match_virt)
+Match.KernelCommandLine, config_parse_net_condition, CONDITION_KERNEL_COMMAND_LINE, offsetof(NetDev, match_kernel_cmdline)
+Match.KernelVersion, config_parse_net_condition, CONDITION_KERNEL_VERSION, offsetof(NetDev, match_kernel_version)
+Match.Architecture, config_parse_net_condition, CONDITION_ARCHITECTURE, offsetof(NetDev, match_arch)
+NetDev.Description, config_parse_string, 0, offsetof(NetDev, description)
+NetDev.Name, config_parse_ifname, 0, offsetof(NetDev, ifname)
+NetDev.Kind, config_parse_netdev_kind, 0, offsetof(NetDev, kind)
+NetDev.MTUBytes, config_parse_mtu, AF_UNSPEC, offsetof(NetDev, mtu)
+NetDev.MACAddress, config_parse_hwaddr, 0, offsetof(NetDev, mac)
+VLAN.Id, config_parse_vlanid, 0, offsetof(VLan, id)
+VLAN.GVRP, config_parse_tristate, 0, offsetof(VLan, gvrp)
+VLAN.MVRP, config_parse_tristate, 0, offsetof(VLan, mvrp)
+VLAN.LooseBinding, config_parse_tristate, 0, offsetof(VLan, loose_binding)
+VLAN.ReorderHeader, config_parse_tristate, 0, offsetof(VLan, reorder_hdr)
+MACVLAN.Mode, config_parse_macvlan_mode, 0, offsetof(MacVlan, mode)
+MACVTAP.Mode, config_parse_macvlan_mode, 0, offsetof(MacVlan, mode)
+IPVLAN.Mode, config_parse_ipvlan_mode, 0, offsetof(IPVlan, mode)
+IPVLAN.Flags, config_parse_ipvlan_flags, 0, offsetof(IPVlan, flags)
+Tunnel.Local, config_parse_tunnel_address, 0, offsetof(Tunnel, local)
+Tunnel.Remote, config_parse_tunnel_address, 0, offsetof(Tunnel, remote)
+Tunnel.TOS, config_parse_unsigned, 0, offsetof(Tunnel, tos)
+Tunnel.TTL, config_parse_unsigned, 0, offsetof(Tunnel, ttl)
+Tunnel.Key, config_parse_tunnel_key, 0, offsetof(Tunnel, key)
+Tunnel.InputKey, config_parse_tunnel_key, 0, offsetof(Tunnel, ikey)
+Tunnel.OutputKey, config_parse_tunnel_key, 0, offsetof(Tunnel, okey)
+Tunnel.DiscoverPathMTU, config_parse_bool, 0, offsetof(Tunnel, pmtudisc)
+Tunnel.Mode, config_parse_ip6tnl_mode, 0, offsetof(Tunnel, ip6tnl_mode)
+Tunnel.IPv6FlowLabel, config_parse_ipv6_flowlabel, 0, offsetof(Tunnel, ipv6_flowlabel)
+Tunnel.CopyDSCP, config_parse_bool, 0, offsetof(Tunnel, copy_dscp)
+Tunnel.EncapsulationLimit, config_parse_encap_limit, 0, offsetof(Tunnel, encap_limit)
+Tunnel.Independent, config_parse_bool, 0, offsetof(Tunnel, independent)
+Tunnel.AllowLocalRemote, config_parse_tristate, 0, offsetof(Tunnel, allow_localremote)
+Tunnel.FooOverUDP, config_parse_bool, 0, offsetof(Tunnel, fou_tunnel)
+Tunnel.FOUDestinationPort, config_parse_ip_port, 0, offsetof(Tunnel, fou_destination_port)
+Tunnel.FOUSourcePort, config_parse_ip_port, 0, offsetof(Tunnel, encap_src_port)
+Tunnel.Encapsulation, config_parse_fou_encap_type, 0, offsetof(Tunnel, fou_encap_type)
+Tunnel.IPv6RapidDeploymentPrefix, config_parse_6rd_prefix, 0, 0
+Tunnel.ERSPANIndex, config_parse_uint32, 0, offsetof(Tunnel, erspan_index)
+Tunnel.SerializeTunneledPackets, config_parse_tristate, 0, offsetof(Tunnel, erspan_sequence)
+Tunnel.ISATAP, config_parse_tristate, 0, offsetof(Tunnel, isatap)
+FooOverUDP.Protocol, config_parse_uint8, 0, offsetof(FouTunnel, fou_protocol)
+FooOverUDP.Encapsulation, config_parse_fou_encap_type, 0, offsetof(FouTunnel, fou_encap_type)
+FooOverUDP.Port, config_parse_ip_port, 0, offsetof(FouTunnel, port)
+Peer.Name, config_parse_ifname, 0, offsetof(Veth, ifname_peer)
+Peer.MACAddress, config_parse_hwaddr, 0, offsetof(Veth, mac_peer)
+VXCAN.Peer, config_parse_ifname, 0, offsetof(VxCan, ifname_peer)
+VXLAN.Id, config_parse_uint64, 0, offsetof(VxLan, id)
+VXLAN.Group, config_parse_vxlan_address, 0, offsetof(VxLan, remote)
+VXLAN.Local, config_parse_vxlan_address, 0, offsetof(VxLan, local)
+VXLAN.Remote, config_parse_vxlan_address, 0, offsetof(VxLan, remote)
+VXLAN.TOS, config_parse_unsigned, 0, offsetof(VxLan, tos)
+VXLAN.TTL, config_parse_unsigned, 0, offsetof(VxLan, ttl)
+VXLAN.MacLearning, config_parse_bool, 0, offsetof(VxLan, learning)
+VXLAN.ARPProxy, config_parse_bool, 0, offsetof(VxLan, arp_proxy)
+VXLAN.ReduceARPProxy, config_parse_bool, 0, offsetof(VxLan, arp_proxy)
+VXLAN.L2MissNotification, config_parse_bool, 0, offsetof(VxLan, l2miss)
+VXLAN.L3MissNotification, config_parse_bool, 0, offsetof(VxLan, l3miss)
+VXLAN.RouteShortCircuit, config_parse_bool, 0, offsetof(VxLan, route_short_circuit)
+VXLAN.UDPCheckSum, config_parse_bool, 0, offsetof(VxLan, udpcsum)
+VXLAN.UDPChecksum, config_parse_bool, 0, offsetof(VxLan, udpcsum)
+VXLAN.UDP6ZeroCheckSumRx, config_parse_bool, 0, offsetof(VxLan, udp6zerocsumrx)
+VXLAN.UDP6ZeroChecksumRx, config_parse_bool, 0, offsetof(VxLan, udp6zerocsumrx)
+VXLAN.UDP6ZeroCheckSumTx, config_parse_bool, 0, offsetof(VxLan, udp6zerocsumtx)
+VXLAN.UDP6ZeroChecksumTx, config_parse_bool, 0, offsetof(VxLan, udp6zerocsumtx)
+VXLAN.RemoteChecksumTx, config_parse_bool, 0, offsetof(VxLan, remote_csum_tx)
+VXLAN.RemoteChecksumRx, config_parse_bool, 0, offsetof(VxLan, remote_csum_rx)
+VXLAN.FDBAgeingSec, config_parse_sec, 0, offsetof(VxLan, fdb_ageing)
+VXLAN.GroupPolicyExtension, config_parse_bool, 0, offsetof(VxLan, group_policy)
+VXLAN.MaximumFDBEntries, config_parse_unsigned, 0, offsetof(VxLan, max_fdb)
+VXLAN.PortRange, config_parse_port_range, 0, 0
+VXLAN.DestinationPort, config_parse_ip_port, 0, offsetof(VxLan, dest_port)
+VXLAN.FlowLabel, config_parse_flow_label, 0, 0
+GENEVE.Id, config_parse_geneve_vni, 0, offsetof(Geneve, id)
+GENEVE.Remote, config_parse_geneve_address, 0, offsetof(Geneve, remote)
+GENEVE.TOS, config_parse_uint8, 0, offsetof(Geneve, tos)
+GENEVE.TTL, config_parse_uint8, 0, offsetof(Geneve, ttl)
+GENEVE.UDPChecksum, config_parse_bool, 0, offsetof(Geneve, udpcsum)
+GENEVE.UDP6ZeroCheckSumRx, config_parse_bool, 0, offsetof(Geneve, udp6zerocsumrx)
+GENEVE.UDP6ZeroChecksumRx, config_parse_bool, 0, offsetof(Geneve, udp6zerocsumrx)
+GENEVE.UDP6ZeroCheckSumTx, config_parse_bool, 0, offsetof(Geneve, udp6zerocsumtx)
+GENEVE.UDP6ZeroChecksumTx, config_parse_bool, 0, offsetof(Geneve, udp6zerocsumtx)
+GENEVE.DestinationPort, config_parse_ip_port, 0, offsetof(Geneve, dest_port)
+GENEVE.FlowLabel, config_parse_geneve_flow_label, 0, 0
+Tun.OneQueue, config_parse_bool, 0, offsetof(TunTap, one_queue)
+Tun.MultiQueue, config_parse_bool, 0, offsetof(TunTap, multi_queue)
+Tun.PacketInfo, config_parse_bool, 0, offsetof(TunTap, packet_info)
+Tun.User, config_parse_string, 0, offsetof(TunTap, user_name)
+Tun.Group, config_parse_string, 0, offsetof(TunTap, group_name)
+Tap.OneQueue, config_parse_bool, 0, offsetof(TunTap, one_queue)
+Tap.MultiQueue, config_parse_bool, 0, offsetof(TunTap, multi_queue)
+Tap.PacketInfo, config_parse_bool, 0, offsetof(TunTap, packet_info)
+Tap.VNetHeader, config_parse_bool, 0, offsetof(TunTap, vnet_hdr)
+Tap.User, config_parse_string, 0, offsetof(TunTap, user_name)
+Tap.Group, config_parse_string, 0, offsetof(TunTap, group_name)
+Bond.Mode, config_parse_bond_mode, 0, offsetof(Bond, mode)
+Bond.TransmitHashPolicy, config_parse_bond_xmit_hash_policy, 0, offsetof(Bond, xmit_hash_policy)
+Bond.LACPTransmitRate, config_parse_bond_lacp_rate, 0, offsetof(Bond, lacp_rate)
+Bond.AdSelect, config_parse_bond_ad_select, 0, offsetof(Bond, ad_select)
+Bond.FailOverMACPolicy, config_parse_bond_fail_over_mac, 0, offsetof(Bond, fail_over_mac)
+Bond.ARPIPTargets, config_parse_arp_ip_target_address, 0, 0
+Bond.ARPValidate, config_parse_bond_arp_validate, 0, offsetof(Bond, arp_validate)
+Bond.ARPAllTargets, config_parse_bond_arp_all_targets, 0, offsetof(Bond, arp_all_targets)
+Bond.PrimaryReselectPolicy, config_parse_bond_primary_reselect, 0, offsetof(Bond, primary_reselect)
+Bond.ResendIGMP, config_parse_unsigned, 0, offsetof(Bond, resend_igmp)
+Bond.PacketsPerSlave, config_parse_unsigned, 0, offsetof(Bond, packets_per_slave)
+Bond.GratuitousARP, config_parse_unsigned, 0, offsetof(Bond, num_grat_arp)
+Bond.AllSlavesActive, config_parse_bool, 0, offsetof(Bond, all_slaves_active)
+Bond.DynamicTransmitLoadBalancing, config_parse_tristate, 0, offsetof(Bond, tlb_dynamic_lb)
+Bond.MinLinks, config_parse_unsigned, 0, offsetof(Bond, min_links)
+Bond.MIIMonitorSec, config_parse_sec, 0, offsetof(Bond, miimon)
+Bond.UpDelaySec, config_parse_sec, 0, offsetof(Bond, updelay)
+Bond.DownDelaySec, config_parse_sec, 0, offsetof(Bond, downdelay)
+Bond.ARPIntervalSec, config_parse_sec, 0, offsetof(Bond, arp_interval)
+Bond.LearnPacketIntervalSec, config_parse_sec, 0, offsetof(Bond, lp_interval)
+Bond.AdActorSystemPriority, config_parse_ad_actor_sys_prio, 0, offsetof(Bond, ad_actor_sys_prio)
+Bond.AdUserPortKey, config_parse_ad_user_port_key, 0, offsetof(Bond, ad_user_port_key)
+Bond.AdActorSystem, config_parse_ad_actor_system, 0, offsetof(Bond, ad_actor_system)
+Bridge.HelloTimeSec, config_parse_sec, 0, offsetof(Bridge, hello_time)
+Bridge.MaxAgeSec, config_parse_sec, 0, offsetof(Bridge, max_age)
+Bridge.AgeingTimeSec, config_parse_sec, 0, offsetof(Bridge, ageing_time)
+Bridge.ForwardDelaySec, config_parse_sec, 0, offsetof(Bridge, forward_delay)
+Bridge.Priority, config_parse_uint16, 0, offsetof(Bridge, priority)
+Bridge.GroupForwardMask, config_parse_uint16, 0, offsetof(Bridge, group_fwd_mask)
+Bridge.DefaultPVID, config_parse_default_port_vlanid, 0, offsetof(Bridge, default_pvid)
+Bridge.MulticastQuerier, config_parse_tristate, 0, offsetof(Bridge, mcast_querier)
+Bridge.MulticastSnooping, config_parse_tristate, 0, offsetof(Bridge, mcast_snooping)
+Bridge.VLANFiltering, config_parse_tristate, 0, offsetof(Bridge, vlan_filtering)
+Bridge.STP, config_parse_tristate, 0, offsetof(Bridge, stp)
+VRF.TableId, config_parse_uint32, 0, offsetof(Vrf, table) /* deprecated */
+VRF.Table, config_parse_uint32, 0, offsetof(Vrf, table)
+WireGuard.FwMark, config_parse_unsigned, 0, offsetof(Wireguard, fwmark)
+WireGuard.ListenPort, config_parse_wireguard_listen_port, 0, offsetof(Wireguard, port)
+WireGuard.PrivateKey, config_parse_wireguard_private_key, 0, 0
+WireGuardPeer.AllowedIPs, config_parse_wireguard_allowed_ips, 0, 0
+WireGuardPeer.Endpoint, config_parse_wireguard_endpoint, 0, 0
+WireGuardPeer.PublicKey, config_parse_wireguard_public_key, 0, 0
+WireGuardPeer.PresharedKey, config_parse_wireguard_preshared_key, 0, 0
+WireGuardPeer.PersistentKeepalive, config_parse_wireguard_keepalive, 0, 0
diff --git a/src/network/netdev/netdev.c b/src/network/netdev/netdev.c
new file mode 100644
index 0000000..0263917
--- /dev/null
+++ b/src/network/netdev/netdev.c
@@ -0,0 +1,816 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "fd-util.h"
+#include "list.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "netdev/netdev.h"
+#include "networkd-manager.h"
+#include "networkd-link.h"
+#include "siphash24.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+
+#include "netdev/bridge.h"
+#include "netdev/bond.h"
+#include "netdev/geneve.h"
+#include "netdev/vlan.h"
+#include "netdev/macvlan.h"
+#include "netdev/ipvlan.h"
+#include "netdev/vxlan.h"
+#include "netdev/tunnel.h"
+#include "netdev/tuntap.h"
+#include "netdev/veth.h"
+#include "netdev/dummy.h"
+#include "netdev/vrf.h"
+#include "netdev/vcan.h"
+#include "netdev/vxcan.h"
+#include "netdev/wireguard.h"
+#include "netdev/netdevsim.h"
+#include "netdev/fou-tunnel.h"
+
+const NetDevVTable * const netdev_vtable[_NETDEV_KIND_MAX] = {
+ [NETDEV_KIND_BRIDGE] = &bridge_vtable,
+ [NETDEV_KIND_BOND] = &bond_vtable,
+ [NETDEV_KIND_VLAN] = &vlan_vtable,
+ [NETDEV_KIND_MACVLAN] = &macvlan_vtable,
+ [NETDEV_KIND_MACVTAP] = &macvtap_vtable,
+ [NETDEV_KIND_IPVLAN] = &ipvlan_vtable,
+ [NETDEV_KIND_VXLAN] = &vxlan_vtable,
+ [NETDEV_KIND_IPIP] = &ipip_vtable,
+ [NETDEV_KIND_GRE] = &gre_vtable,
+ [NETDEV_KIND_GRETAP] = &gretap_vtable,
+ [NETDEV_KIND_IP6GRE] = &ip6gre_vtable,
+ [NETDEV_KIND_IP6GRETAP] = &ip6gretap_vtable,
+ [NETDEV_KIND_SIT] = &sit_vtable,
+ [NETDEV_KIND_VTI] = &vti_vtable,
+ [NETDEV_KIND_VTI6] = &vti6_vtable,
+ [NETDEV_KIND_VETH] = &veth_vtable,
+ [NETDEV_KIND_DUMMY] = &dummy_vtable,
+ [NETDEV_KIND_TUN] = &tun_vtable,
+ [NETDEV_KIND_TAP] = &tap_vtable,
+ [NETDEV_KIND_IP6TNL] = &ip6tnl_vtable,
+ [NETDEV_KIND_VRF] = &vrf_vtable,
+ [NETDEV_KIND_VCAN] = &vcan_vtable,
+ [NETDEV_KIND_GENEVE] = &geneve_vtable,
+ [NETDEV_KIND_VXCAN] = &vxcan_vtable,
+ [NETDEV_KIND_WIREGUARD] = &wireguard_vtable,
+ [NETDEV_KIND_NETDEVSIM] = &netdevsim_vtable,
+ [NETDEV_KIND_FOU] = &foutnl_vtable,
+ [NETDEV_KIND_ERSPAN] = &erspan_vtable,
+};
+
+static const char* const netdev_kind_table[_NETDEV_KIND_MAX] = {
+ [NETDEV_KIND_BRIDGE] = "bridge",
+ [NETDEV_KIND_BOND] = "bond",
+ [NETDEV_KIND_VLAN] = "vlan",
+ [NETDEV_KIND_MACVLAN] = "macvlan",
+ [NETDEV_KIND_MACVTAP] = "macvtap",
+ [NETDEV_KIND_IPVLAN] = "ipvlan",
+ [NETDEV_KIND_VXLAN] = "vxlan",
+ [NETDEV_KIND_IPIP] = "ipip",
+ [NETDEV_KIND_GRE] = "gre",
+ [NETDEV_KIND_GRETAP] = "gretap",
+ [NETDEV_KIND_IP6GRE] = "ip6gre",
+ [NETDEV_KIND_IP6GRETAP] = "ip6gretap",
+ [NETDEV_KIND_SIT] = "sit",
+ [NETDEV_KIND_VETH] = "veth",
+ [NETDEV_KIND_VTI] = "vti",
+ [NETDEV_KIND_VTI6] = "vti6",
+ [NETDEV_KIND_DUMMY] = "dummy",
+ [NETDEV_KIND_TUN] = "tun",
+ [NETDEV_KIND_TAP] = "tap",
+ [NETDEV_KIND_IP6TNL] = "ip6tnl",
+ [NETDEV_KIND_VRF] = "vrf",
+ [NETDEV_KIND_VCAN] = "vcan",
+ [NETDEV_KIND_GENEVE] = "geneve",
+ [NETDEV_KIND_VXCAN] = "vxcan",
+ [NETDEV_KIND_WIREGUARD] = "wireguard",
+ [NETDEV_KIND_NETDEVSIM] = "netdevsim",
+ [NETDEV_KIND_FOU] = "fou",
+ [NETDEV_KIND_ERSPAN] = "erspan",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(netdev_kind, NetDevKind);
+
+int config_parse_netdev_kind(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ NetDevKind k, *kind = data;
+
+ assert(rvalue);
+ assert(data);
+
+ k = netdev_kind_from_string(rvalue);
+ if (k < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse netdev kind, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ if (*kind != _NETDEV_KIND_INVALID && *kind != k) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Specified netdev kind is different from the previous value '%s', ignoring assignment: %s",
+ netdev_kind_to_string(*kind), rvalue);
+ return 0;
+ }
+
+ *kind = k;
+
+ return 0;
+}
+
+static void netdev_callbacks_clear(NetDev *netdev) {
+ netdev_join_callback *callback;
+
+ if (!netdev)
+ return;
+
+ while ((callback = netdev->callbacks)) {
+ LIST_REMOVE(callbacks, netdev->callbacks, callback);
+ link_unref(callback->link);
+ free(callback);
+ }
+}
+
+bool netdev_is_managed(NetDev *netdev) {
+ if (!netdev || !netdev->manager || !netdev->ifname)
+ return false;
+
+ return hashmap_get(netdev->manager->netdevs, netdev->ifname) == netdev;
+}
+
+static void netdev_detach_from_manager(NetDev *netdev) {
+ if (netdev->ifname && netdev->manager)
+ hashmap_remove(netdev->manager->netdevs, netdev->ifname);
+}
+
+static NetDev *netdev_free(NetDev *netdev) {
+ assert(netdev);
+
+ netdev_callbacks_clear(netdev);
+
+ netdev_detach_from_manager(netdev);
+
+ free(netdev->filename);
+
+ free(netdev->description);
+ free(netdev->ifname);
+ free(netdev->mac);
+
+ condition_free_list(netdev->match_host);
+ condition_free_list(netdev->match_virt);
+ condition_free_list(netdev->match_kernel_cmdline);
+ condition_free_list(netdev->match_kernel_version);
+ condition_free_list(netdev->match_arch);
+
+ /* Invoke the per-kind done() destructor, but only if the state field is initialized. We conditionalize that
+ * because we parse .netdev files twice: once to determine the kind (with a short, minimal NetDev structure
+ * allocation, with no room for per-kind fields), and once to read the kind's properties (with a full,
+ * comprehensive NetDev structure allocation with enough space for whatever the specific kind needs). Now, in
+ * the first case we shouldn't try to destruct the per-kind NetDev fields on destruction, in the second case we
+ * should. We use the state field to discern the two cases: it's _NETDEV_STATE_INVALID on the first "raw"
+ * call. */
+ if (netdev->state != _NETDEV_STATE_INVALID &&
+ NETDEV_VTABLE(netdev) &&
+ NETDEV_VTABLE(netdev)->done)
+ NETDEV_VTABLE(netdev)->done(netdev);
+
+ return mfree(netdev);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(NetDev, netdev, netdev_free);
+
+void netdev_drop(NetDev *netdev) {
+ if (!netdev || netdev->state == NETDEV_STATE_LINGER)
+ return;
+
+ netdev->state = NETDEV_STATE_LINGER;
+
+ log_netdev_debug(netdev, "netdev removed");
+
+ netdev_callbacks_clear(netdev);
+
+ netdev_detach_from_manager(netdev);
+
+ netdev_unref(netdev);
+
+ return;
+}
+
+int netdev_get(Manager *manager, const char *name, NetDev **ret) {
+ NetDev *netdev;
+
+ assert(manager);
+ assert(name);
+ assert(ret);
+
+ netdev = hashmap_get(manager->netdevs, name);
+ if (!netdev) {
+ *ret = NULL;
+ return -ENOENT;
+ }
+
+ *ret = netdev;
+
+ return 0;
+}
+
+static int netdev_enter_failed(NetDev *netdev) {
+ netdev->state = NETDEV_STATE_FAILED;
+
+ netdev_callbacks_clear(netdev);
+
+ return 0;
+}
+
+static int netdev_enslave_ready(NetDev *netdev, Link* link, link_netlink_message_handler_t callback) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(netdev);
+ assert(netdev->state == NETDEV_STATE_READY);
+ assert(netdev->manager);
+ assert(netdev->manager->rtnl);
+ assert(IN_SET(netdev->kind, NETDEV_KIND_BRIDGE, NETDEV_KIND_BOND, NETDEV_KIND_VRF));
+ assert(link);
+ assert(callback);
+
+ if (link->flags & IFF_UP && netdev->kind == NETDEV_KIND_BOND) {
+ log_netdev_debug(netdev, "Link '%s' was up when attempting to enslave it. Bringing link down.", link->ifname);
+ r = link_down(link);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not bring link down: %m");
+ }
+
+ r = sd_rtnl_message_new_link(netdev->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_netlink_message_append_u32(req, IFLA_MASTER, netdev->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MASTER attribute: %m");
+
+ r = netlink_call_async(netdev->manager->rtnl, NULL, req, callback,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ log_netdev_debug(netdev, "Enslaving link '%s'", link->ifname);
+
+ return 0;
+}
+
+static int netdev_enter_ready(NetDev *netdev) {
+ netdev_join_callback *callback, *callback_next;
+ int r;
+
+ assert(netdev);
+ assert(netdev->ifname);
+
+ if (netdev->state != NETDEV_STATE_CREATING)
+ return 0;
+
+ netdev->state = NETDEV_STATE_READY;
+
+ log_netdev_info(netdev, "netdev ready");
+
+ LIST_FOREACH_SAFE(callbacks, callback, callback_next, netdev->callbacks) {
+ /* enslave the links that were attempted to be enslaved before the
+ * link was ready */
+ r = netdev_enslave_ready(netdev, callback->link, callback->callback);
+ if (r < 0)
+ return r;
+
+ LIST_REMOVE(callbacks, netdev->callbacks, callback);
+ link_unref(callback->link);
+ free(callback);
+ }
+
+ if (NETDEV_VTABLE(netdev)->post_create)
+ NETDEV_VTABLE(netdev)->post_create(netdev, NULL, NULL);
+
+ return 0;
+}
+
+/* callback for netdev's created without a backing Link */
+static int netdev_create_handler(sd_netlink *rtnl, sd_netlink_message *m, NetDev *netdev) {
+ int r;
+
+ assert(netdev);
+ assert(netdev->state != _NETDEV_STATE_INVALID);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -EEXIST)
+ log_netdev_info(netdev, "netdev exists, using existing without changing its parameters");
+ else if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "netdev could not be created: %m");
+ netdev_drop(netdev);
+
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "Created");
+
+ return 1;
+}
+
+static int netdev_enslave(NetDev *netdev, Link *link, link_netlink_message_handler_t callback) {
+ int r;
+
+ assert(netdev);
+ assert(netdev->manager);
+ assert(netdev->manager->rtnl);
+ assert(IN_SET(netdev->kind, NETDEV_KIND_BRIDGE, NETDEV_KIND_BOND, NETDEV_KIND_VRF));
+
+ if (netdev->state == NETDEV_STATE_READY) {
+ r = netdev_enslave_ready(netdev, link, callback);
+ if (r < 0)
+ return r;
+ } else if (IN_SET(netdev->state, NETDEV_STATE_LINGER, NETDEV_STATE_FAILED)) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+
+ r = rtnl_message_new_synthetic_error(netdev->manager->rtnl, -ENODEV, 0, &m);
+ if (r >= 0)
+ callback(netdev->manager->rtnl, m, link);
+ } else {
+ /* the netdev is not yet read, save this request for when it is */
+ netdev_join_callback *cb;
+
+ cb = new(netdev_join_callback, 1);
+ if (!cb)
+ return log_oom();
+
+ *cb = (netdev_join_callback) {
+ .callback = callback,
+ .link = link_ref(link),
+ };
+
+ LIST_PREPEND(callbacks, netdev->callbacks, cb);
+
+ log_netdev_debug(netdev, "Will enslave '%s', when ready", link->ifname);
+ }
+
+ return 0;
+}
+
+int netdev_set_ifindex(NetDev *netdev, sd_netlink_message *message) {
+ uint16_t type;
+ const char *kind;
+ const char *received_kind;
+ const char *received_name;
+ int r, ifindex;
+
+ assert(netdev);
+ assert(message);
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not get rtnl message type: %m");
+
+ if (type != RTM_NEWLINK) {
+ log_netdev_error(netdev, "Cannot set ifindex from unexpected rtnl message type.");
+ return -EINVAL;
+ }
+
+ r = sd_rtnl_message_link_get_ifindex(message, &ifindex);
+ if (r < 0) {
+ log_netdev_error_errno(netdev, r, "Could not get ifindex: %m");
+ netdev_enter_failed(netdev);
+ return r;
+ } else if (ifindex <= 0) {
+ log_netdev_error(netdev, "Got invalid ifindex: %d", ifindex);
+ netdev_enter_failed(netdev);
+ return -EINVAL;
+ }
+
+ if (netdev->ifindex > 0) {
+ if (netdev->ifindex != ifindex) {
+ log_netdev_error(netdev, "Could not set ifindex to %d, already set to %d",
+ ifindex, netdev->ifindex);
+ netdev_enter_failed(netdev);
+ return -EEXIST;
+ } else
+ /* ifindex already set to the same for this netdev */
+ return 0;
+ }
+
+ r = sd_netlink_message_read_string(message, IFLA_IFNAME, &received_name);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not get IFNAME: %m");
+
+ if (!streq(netdev->ifname, received_name)) {
+ log_netdev_error(netdev, "Received newlink with wrong IFNAME %s", received_name);
+ netdev_enter_failed(netdev);
+ return r;
+ }
+
+ r = sd_netlink_message_enter_container(message, IFLA_LINKINFO);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not get LINKINFO: %m");
+
+ r = sd_netlink_message_read_string(message, IFLA_INFO_KIND, &received_kind);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not get KIND: %m");
+
+ r = sd_netlink_message_exit_container(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not exit container: %m");
+
+ if (netdev->kind == NETDEV_KIND_TAP)
+ /* the kernel does not distinguish between tun and tap */
+ kind = "tun";
+ else {
+ kind = netdev_kind_to_string(netdev->kind);
+ if (!kind) {
+ log_netdev_error(netdev, "Could not get kind");
+ netdev_enter_failed(netdev);
+ return -EINVAL;
+ }
+ }
+
+ if (!streq(kind, received_kind)) {
+ log_netdev_error(netdev,
+ "Received newlink with wrong KIND %s, "
+ "expected %s", received_kind, kind);
+ netdev_enter_failed(netdev);
+ return r;
+ }
+
+ netdev->ifindex = ifindex;
+
+ log_netdev_debug(netdev, "netdev has index %d", netdev->ifindex);
+
+ netdev_enter_ready(netdev);
+
+ return 0;
+}
+
+#define HASH_KEY SD_ID128_MAKE(52,e1,45,bd,00,6f,29,96,21,c6,30,6d,83,71,04,48)
+
+int netdev_get_mac(const char *ifname, struct ether_addr **ret) {
+ _cleanup_free_ struct ether_addr *mac = NULL;
+ uint64_t result;
+ size_t l, sz;
+ uint8_t *v;
+ int r;
+
+ assert(ifname);
+ assert(ret);
+
+ mac = new0(struct ether_addr, 1);
+ if (!mac)
+ return -ENOMEM;
+
+ l = strlen(ifname);
+ sz = sizeof(sd_id128_t) + l;
+ v = newa(uint8_t, sz);
+
+ /* fetch some persistent data unique to the machine */
+ r = sd_id128_get_machine((sd_id128_t*) v);
+ if (r < 0)
+ return r;
+
+ /* combine with some data unique (on this machine) to this
+ * netdev */
+ memcpy(v + sizeof(sd_id128_t), ifname, l);
+
+ /* Let's hash the host machine ID plus the container name. We
+ * use a fixed, but originally randomly created hash key here. */
+ result = siphash24(v, sz, HASH_KEY.bytes);
+
+ assert_cc(ETH_ALEN <= sizeof(result));
+ memcpy(mac->ether_addr_octet, &result, ETH_ALEN);
+
+ /* see eth_random_addr in the kernel */
+ mac->ether_addr_octet[0] &= 0xfe; /* clear multicast bit */
+ mac->ether_addr_octet[0] |= 0x02; /* set local assignment bit (IEEE802) */
+
+ *ret = TAKE_PTR(mac);
+
+ return 0;
+}
+
+static int netdev_create(NetDev *netdev, Link *link, link_netlink_message_handler_t callback) {
+ int r;
+
+ assert(netdev);
+ assert(!link || callback);
+
+ /* create netdev */
+ if (NETDEV_VTABLE(netdev)->create) {
+ assert(!link);
+
+ r = NETDEV_VTABLE(netdev)->create(netdev);
+ if (r < 0)
+ return r;
+
+ log_netdev_debug(netdev, "Created");
+ } else {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+
+ r = sd_rtnl_message_new_link(netdev->manager->rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not allocate RTM_NEWLINK message: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, netdev->ifname);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IFNAME, attribute: %m");
+
+ if (netdev->mac) {
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, netdev->mac);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_ADDRESS attribute: %m");
+ }
+
+ if (netdev->mtu) {
+ r = sd_netlink_message_append_u32(m, IFLA_MTU, netdev->mtu);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MTU attribute: %m");
+ }
+
+ if (link) {
+ r = sd_netlink_message_append_u32(m, IFLA_LINK, link->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINK attribute: %m");
+ }
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, netdev_kind_to_string(netdev->kind));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ if (NETDEV_VTABLE(netdev)->fill_message_create) {
+ r = NETDEV_VTABLE(netdev)->fill_message_create(netdev, link, m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ if (link) {
+ r = netlink_call_async(netdev->manager->rtnl, NULL, m, callback,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+ } else {
+ r = netlink_call_async(netdev->manager->rtnl, NULL, m, netdev_create_handler,
+ netdev_destroy_callback, netdev);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not send rtnetlink message: %m");
+
+ netdev_ref(netdev);
+ }
+
+ netdev->state = NETDEV_STATE_CREATING;
+
+ log_netdev_debug(netdev, "Creating");
+ }
+
+ return 0;
+}
+
+/* the callback must be called, possibly after a timeout, as otherwise the Link will hang */
+int netdev_join(NetDev *netdev, Link *link, link_netlink_message_handler_t callback) {
+ int r;
+
+ assert(netdev);
+ assert(netdev->manager);
+ assert(netdev->manager->rtnl);
+ assert(NETDEV_VTABLE(netdev));
+
+ switch (NETDEV_VTABLE(netdev)->create_type) {
+ case NETDEV_CREATE_MASTER:
+ r = netdev_enslave(netdev, link, callback);
+ if (r < 0)
+ return r;
+
+ break;
+ case NETDEV_CREATE_STACKED:
+ r = netdev_create(netdev, link, callback);
+ if (r < 0)
+ return r;
+
+ break;
+ default:
+ assert_not_reached("Can not join independent netdev");
+ }
+
+ return 0;
+}
+
+int netdev_load_one(Manager *manager, const char *filename) {
+ _cleanup_(netdev_unrefp) NetDev *netdev_raw = NULL, *netdev = NULL;
+ _cleanup_fclose_ FILE *file = NULL;
+ const char *dropin_dirname;
+ bool independent = false;
+ int r;
+
+ assert(manager);
+ assert(filename);
+
+ file = fopen(filename, "re");
+ if (!file) {
+ if (errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (null_or_empty_fd(fileno(file))) {
+ log_debug("Skipping empty file: %s", filename);
+ return 0;
+ }
+
+ netdev_raw = new(NetDev, 1);
+ if (!netdev_raw)
+ return log_oom();
+
+ *netdev_raw = (NetDev) {
+ .n_ref = 1,
+ .kind = _NETDEV_KIND_INVALID,
+ .state = _NETDEV_STATE_INVALID, /* an invalid state means done() of the implementation won't be called on destruction */
+ };
+
+ dropin_dirname = strjoina(basename(filename), ".d");
+ r = config_parse_many(filename, network_dirs, dropin_dirname,
+ "Match\0NetDev\0",
+ config_item_perf_lookup, network_netdev_gperf_lookup,
+ CONFIG_PARSE_WARN|CONFIG_PARSE_RELAXED, netdev_raw);
+ if (r < 0)
+ return r;
+
+ /* skip out early if configuration does not match the environment */
+ if (net_match_config(NULL, NULL, NULL, NULL, NULL,
+ netdev_raw->match_host, netdev_raw->match_virt,
+ netdev_raw->match_kernel_cmdline, netdev_raw->match_kernel_version,
+ netdev_raw->match_arch,
+ NULL, NULL, NULL, NULL, NULL, NULL) <= 0)
+ return 0;
+
+ if (netdev_raw->kind == _NETDEV_KIND_INVALID) {
+ log_warning("NetDev has no Kind configured in %s. Ignoring", filename);
+ return 0;
+ }
+
+ if (!netdev_raw->ifname) {
+ log_warning("NetDev without Name configured in %s. Ignoring", filename);
+ return 0;
+ }
+
+ r = fseek(file, 0, SEEK_SET);
+ if (r < 0)
+ return -errno;
+
+ netdev = malloc0(NETDEV_VTABLE(netdev_raw)->object_size);
+ if (!netdev)
+ return log_oom();
+
+ netdev->n_ref = 1;
+ netdev->manager = manager;
+ netdev->kind = netdev_raw->kind;
+ netdev->state = NETDEV_STATE_LOADING; /* we initialize the state here for the first time, so that done() will be called on destruction */
+
+ if (NETDEV_VTABLE(netdev)->init)
+ NETDEV_VTABLE(netdev)->init(netdev);
+
+ r = config_parse_many(filename, network_dirs, dropin_dirname,
+ NETDEV_VTABLE(netdev)->sections,
+ config_item_perf_lookup, network_netdev_gperf_lookup,
+ CONFIG_PARSE_WARN, netdev);
+ if (r < 0)
+ return r;
+
+ /* verify configuration */
+ if (NETDEV_VTABLE(netdev)->config_verify) {
+ r = NETDEV_VTABLE(netdev)->config_verify(netdev, filename);
+ if (r < 0)
+ return 0;
+ }
+
+ netdev->filename = strdup(filename);
+ if (!netdev->filename)
+ return log_oom();
+
+ if (!netdev->mac && netdev->kind != NETDEV_KIND_VLAN) {
+ r = netdev_get_mac(netdev->ifname, &netdev->mac);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate predictable MAC address for %s: %m", netdev->ifname);
+ }
+
+ r = hashmap_ensure_allocated(&netdev->manager->netdevs, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(netdev->manager->netdevs, netdev->ifname, netdev);
+ if (r < 0)
+ return r;
+
+ LIST_HEAD_INIT(netdev->callbacks);
+
+ log_netdev_debug(netdev, "loaded %s", netdev_kind_to_string(netdev->kind));
+
+ switch (NETDEV_VTABLE(netdev)->create_type) {
+ case NETDEV_CREATE_MASTER:
+ case NETDEV_CREATE_INDEPENDENT:
+ r = netdev_create(netdev, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ break;
+ default:
+ break;
+ }
+
+ switch (netdev->kind) {
+ case NETDEV_KIND_IPIP:
+ independent = IPIP(netdev)->independent;
+ break;
+ case NETDEV_KIND_GRE:
+ independent = GRE(netdev)->independent;
+ break;
+ case NETDEV_KIND_GRETAP:
+ independent = GRETAP(netdev)->independent;
+ break;
+ case NETDEV_KIND_IP6GRE:
+ independent = IP6GRE(netdev)->independent;
+ break;
+ case NETDEV_KIND_IP6GRETAP:
+ independent = IP6GRETAP(netdev)->independent;
+ break;
+ case NETDEV_KIND_SIT:
+ independent = SIT(netdev)->independent;
+ break;
+ case NETDEV_KIND_VTI:
+ independent = VTI(netdev)->independent;
+ break;
+ case NETDEV_KIND_VTI6:
+ independent = VTI6(netdev)->independent;
+ break;
+ case NETDEV_KIND_IP6TNL:
+ independent = IP6TNL(netdev)->independent;
+ break;
+ default:
+ break;
+ }
+
+ if (independent) {
+ r = netdev_create(netdev, NULL, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ netdev = NULL;
+
+ return 0;
+}
+
+int netdev_load(Manager *manager) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+
+ assert(manager);
+
+ hashmap_clear_with_destructor(manager->netdevs, netdev_unref);
+
+ r = conf_files_list_strv(&files, ".netdev", NULL, 0, network_dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate netdev files: %m");
+
+ STRV_FOREACH_BACKWARDS(f, files) {
+ r = netdev_load_one(manager, *f);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/network/netdev/netdev.h b/src/network/netdev/netdev.h
new file mode 100644
index 0000000..d6524da
--- /dev/null
+++ b/src/network/netdev/netdev.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "conf-parser.h"
+#include "list.h"
+#include "../networkd-link.h"
+#include "time-util.h"
+
+typedef struct netdev_join_callback netdev_join_callback;
+
+struct netdev_join_callback {
+ link_netlink_message_handler_t callback;
+ Link *link;
+
+ LIST_FIELDS(netdev_join_callback, callbacks);
+};
+
+typedef enum NetDevKind {
+ NETDEV_KIND_BRIDGE,
+ NETDEV_KIND_BOND,
+ NETDEV_KIND_VLAN,
+ NETDEV_KIND_MACVLAN,
+ NETDEV_KIND_MACVTAP,
+ NETDEV_KIND_IPVLAN,
+ NETDEV_KIND_VXLAN,
+ NETDEV_KIND_IPIP,
+ NETDEV_KIND_GRE,
+ NETDEV_KIND_GRETAP,
+ NETDEV_KIND_IP6GRE,
+ NETDEV_KIND_IP6GRETAP,
+ NETDEV_KIND_SIT,
+ NETDEV_KIND_VETH,
+ NETDEV_KIND_VTI,
+ NETDEV_KIND_VTI6,
+ NETDEV_KIND_IP6TNL,
+ NETDEV_KIND_DUMMY,
+ NETDEV_KIND_TUN,
+ NETDEV_KIND_TAP,
+ NETDEV_KIND_VRF,
+ NETDEV_KIND_VCAN,
+ NETDEV_KIND_GENEVE,
+ NETDEV_KIND_VXCAN,
+ NETDEV_KIND_WIREGUARD,
+ NETDEV_KIND_NETDEVSIM,
+ NETDEV_KIND_FOU,
+ NETDEV_KIND_ERSPAN,
+ _NETDEV_KIND_MAX,
+ _NETDEV_KIND_INVALID = -1
+} NetDevKind;
+
+typedef enum NetDevState {
+ NETDEV_STATE_LOADING,
+ NETDEV_STATE_FAILED,
+ NETDEV_STATE_CREATING,
+ NETDEV_STATE_READY,
+ NETDEV_STATE_LINGER,
+ _NETDEV_STATE_MAX,
+ _NETDEV_STATE_INVALID = -1,
+} NetDevState;
+
+typedef enum NetDevCreateType {
+ NETDEV_CREATE_INDEPENDENT,
+ NETDEV_CREATE_MASTER,
+ NETDEV_CREATE_STACKED,
+ _NETDEV_CREATE_MAX,
+ _NETDEV_CREATE_INVALID = -1,
+} NetDevCreateType;
+
+typedef struct Manager Manager;
+typedef struct Condition Condition;
+
+typedef struct NetDev {
+ Manager *manager;
+
+ unsigned n_ref;
+
+ char *filename;
+
+ Condition *match_host;
+ Condition *match_virt;
+ Condition *match_kernel_cmdline;
+ Condition *match_kernel_version;
+ Condition *match_arch;
+
+ NetDevState state;
+ NetDevKind kind;
+ char *description;
+ char *ifname;
+ struct ether_addr *mac;
+ uint32_t mtu;
+ int ifindex;
+
+ LIST_HEAD(netdev_join_callback, callbacks);
+} NetDev;
+
+typedef struct NetDevVTable {
+ /* How much memory does an object of this unit type need */
+ size_t object_size;
+
+ /* Config file sections this netdev kind understands, separated
+ * by NUL chars */
+ const char *sections;
+
+ /* This should reset all type-specific variables. This should
+ * not allocate memory, and is called with zero-initialized
+ * data. It should hence only initialize variables that need
+ * to be set != 0. */
+ void (*init)(NetDev *n);
+
+ /* This should free all kind-specific variables. It should be
+ * idempotent. */
+ void (*done)(NetDev *n);
+
+ /* fill in message to create netdev */
+ int (*fill_message_create)(NetDev *netdev, Link *link, sd_netlink_message *message);
+
+ /* specifies if netdev is independent, or a master device or a stacked device */
+ NetDevCreateType create_type;
+
+ /* create netdev, if not done via rtnl */
+ int (*create)(NetDev *netdev);
+
+ /* perform additional configuration after netdev has been createad */
+ int (*post_create)(NetDev *netdev, Link *link, sd_netlink_message *message);
+
+ /* verify that compulsory configuration options were specified */
+ int (*config_verify)(NetDev *netdev, const char *filename);
+} NetDevVTable;
+
+extern const NetDevVTable * const netdev_vtable[_NETDEV_KIND_MAX];
+
+#define NETDEV_VTABLE(n) ((n)->kind != _NETDEV_KIND_INVALID ? netdev_vtable[(n)->kind] : NULL)
+
+/* For casting a netdev into the various netdev kinds */
+#define DEFINE_NETDEV_CAST(UPPERCASE, MixedCase) \
+ static inline MixedCase* UPPERCASE(NetDev *n) { \
+ if (_unlikely_(!n || \
+ n->kind != NETDEV_KIND_##UPPERCASE) || \
+ n->state == _NETDEV_STATE_INVALID) \
+ return NULL; \
+ \
+ return (MixedCase*) n; \
+ }
+
+/* For casting the various netdev kinds into a netdev */
+#define NETDEV(n) (&(n)->meta)
+
+int netdev_load(Manager *manager);
+int netdev_load_one(Manager *manager, const char *filename);
+void netdev_drop(NetDev *netdev);
+
+NetDev *netdev_unref(NetDev *netdev);
+NetDev *netdev_ref(NetDev *netdev);
+DEFINE_TRIVIAL_DESTRUCTOR(netdev_destroy_callback, NetDev, netdev_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(NetDev*, netdev_unref);
+
+bool netdev_is_managed(NetDev *netdev);
+int netdev_get(Manager *manager, const char *name, NetDev **ret);
+int netdev_set_ifindex(NetDev *netdev, sd_netlink_message *newlink);
+int netdev_get_mac(const char *ifname, struct ether_addr **ret);
+int netdev_join(NetDev *netdev, Link *link, link_netlink_message_handler_t cb);
+
+const char *netdev_kind_to_string(NetDevKind d) _const_;
+NetDevKind netdev_kind_from_string(const char *d) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_netdev_kind);
+
+/* gperf */
+const struct ConfigPerfItem* network_netdev_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+/* Macros which append INTERFACE= to the message */
+
+#define log_netdev_full(netdev, level, error, ...) \
+ ({ \
+ const NetDev *_n = (netdev); \
+ _n ? log_object_internal(level, error, __FILE__, __LINE__, __func__, "INTERFACE=", _n->ifname, NULL, NULL, ##__VA_ARGS__) : \
+ log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
+ })
+
+#define log_netdev_debug(netdev, ...) log_netdev_full(netdev, LOG_DEBUG, 0, ##__VA_ARGS__)
+#define log_netdev_info(netdev, ...) log_netdev_full(netdev, LOG_INFO, 0, ##__VA_ARGS__)
+#define log_netdev_notice(netdev, ...) log_netdev_full(netdev, LOG_NOTICE, 0, ##__VA_ARGS__)
+#define log_netdev_warning(netdev, ...) log_netdev_full(netdev, LOG_WARNING, 0, ## __VA_ARGS__)
+#define log_netdev_error(netdev, ...) log_netdev_full(netdev, LOG_ERR, 0, ##__VA_ARGS__)
+
+#define log_netdev_debug_errno(netdev, error, ...) log_netdev_full(netdev, LOG_DEBUG, error, ##__VA_ARGS__)
+#define log_netdev_info_errno(netdev, error, ...) log_netdev_full(netdev, LOG_INFO, error, ##__VA_ARGS__)
+#define log_netdev_notice_errno(netdev, error, ...) log_netdev_full(netdev, LOG_NOTICE, error, ##__VA_ARGS__)
+#define log_netdev_warning_errno(netdev, error, ...) log_netdev_full(netdev, LOG_WARNING, error, ##__VA_ARGS__)
+#define log_netdev_error_errno(netdev, error, ...) log_netdev_full(netdev, LOG_ERR, error, ##__VA_ARGS__)
+
+#define LOG_NETDEV_MESSAGE(netdev, fmt, ...) "MESSAGE=%s: " fmt, (netdev)->ifname, ##__VA_ARGS__
+#define LOG_NETDEV_INTERFACE(netdev) "INTERFACE=%s", (netdev)->ifname
diff --git a/src/network/netdev/netdevsim.c b/src/network/netdev/netdevsim.c
new file mode 100644
index 0000000..8caba67
--- /dev/null
+++ b/src/network/netdev/netdevsim.c
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "netdev/netdevsim.h"
+#include "missing.h"
+
+const NetDevVTable netdevsim_vtable = {
+ .object_size = sizeof(NetDevSim),
+ .sections = "Match\0NetDev\0",
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+};
diff --git a/src/network/netdev/netdevsim.h b/src/network/netdev/netdevsim.h
new file mode 100644
index 0000000..d3ed0c0
--- /dev/null
+++ b/src/network/netdev/netdevsim.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct NetDevSim NetDevSim;
+
+#include "netdev/netdev.h"
+
+struct NetDevSim {
+ NetDev meta;
+};
+
+DEFINE_NETDEV_CAST(NETDEVSIM, NetDevSim);
+extern const NetDevVTable netdevsim_vtable;
diff --git a/src/network/netdev/tunnel.c b/src/network/netdev/tunnel.c
new file mode 100644
index 0000000..684eddd
--- /dev/null
+++ b/src/network/netdev/tunnel.c
@@ -0,0 +1,936 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <linux/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/ip6_tunnel.h>
+
+#if HAVE_LINUX_FOU_H
+#include <linux/fou.h>
+#endif
+
+#include "sd-netlink.h"
+
+#include "conf-parser.h"
+#include "missing.h"
+#include "networkd-link.h"
+#include "netdev/tunnel.h"
+#include "parse-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "util.h"
+
+#define DEFAULT_TNL_HOP_LIMIT 64
+#define IP6_FLOWINFO_FLOWLABEL htobe32(0x000FFFFF)
+#define IP6_TNL_F_ALLOW_LOCAL_REMOTE 0x40
+
+static const char* const ip6tnl_mode_table[_NETDEV_IP6_TNL_MODE_MAX] = {
+ [NETDEV_IP6_TNL_MODE_IP6IP6] = "ip6ip6",
+ [NETDEV_IP6_TNL_MODE_IPIP6] = "ipip6",
+ [NETDEV_IP6_TNL_MODE_ANYIP6] = "any",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(ip6tnl_mode, Ip6TnlMode);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_ip6tnl_mode, ip6tnl_mode, Ip6TnlMode, "Failed to parse ip6 tunnel Mode");
+
+static int netdev_ipip_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Tunnel *t = IPIP(netdev);
+ int r;
+
+ assert(netdev);
+ assert(m);
+ assert(t);
+ assert(IN_SET(t->family, AF_INET, AF_UNSPEC));
+
+ if (link) {
+ r = sd_netlink_message_append_u32(m, IFLA_IPTUN_LINK, link->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LINK attribute: %m");
+ }
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_IPTUN_LOCAL, &t->local.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LOCAL attribute: %m");
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_IPTUN_REMOTE, &t->remote.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_REMOTE attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_IPTUN_TTL, t->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_TTL attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_IPTUN_PMTUDISC, t->pmtudisc);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_PMTUDISC attribute: %m");
+
+ if (t->fou_tunnel) {
+
+ r = sd_netlink_message_append_u16(m, IFLA_IPTUN_ENCAP_TYPE, t->fou_encap_type);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_ENCAP_TYPE attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_IPTUN_ENCAP_SPORT, htobe16(t->encap_src_port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_ENCAP_SPORT attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_IPTUN_ENCAP_DPORT, htobe16(t->fou_destination_port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_ENCAP_DPORT attribute: %m");
+ }
+
+ return r;
+}
+
+static int netdev_sit_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Tunnel *t = SIT(netdev);
+ int r;
+
+ assert(netdev);
+ assert(m);
+ assert(t);
+ assert(IN_SET(t->family, AF_INET, AF_UNSPEC));
+
+ if (link) {
+ r = sd_netlink_message_append_u32(m, IFLA_IPTUN_LINK, link->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LINK attribute: %m");
+ }
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_IPTUN_LOCAL, &t->local.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LOCAL attribute: %m");
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_IPTUN_REMOTE, &t->remote.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_REMOTE attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_IPTUN_TTL, t->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_TTL attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_IPTUN_PMTUDISC, t->pmtudisc);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_PMTUDISC attribute: %m");
+
+ if (t->sixrd_prefixlen > 0) {
+ r = sd_netlink_message_append_in6_addr(m, IFLA_IPTUN_6RD_PREFIX, &t->sixrd_prefix);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_6RD_PREFIX attribute: %m");
+
+ /* u16 is deliberate here, even though we're passing a netmask that can never be >128. The kernel is
+ * expecting to receive the prefixlen as a u16.
+ */
+ r = sd_netlink_message_append_u16(m, IFLA_IPTUN_6RD_PREFIXLEN, t->sixrd_prefixlen);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_6RD_PREFIXLEN attribute: %m");
+ }
+
+ if (t->isatap >= 0) {
+ uint16_t flags = 0;
+
+ SET_FLAG(flags, SIT_ISATAP, t->isatap);
+
+ r = sd_netlink_message_append_u16(m, IFLA_IPTUN_FLAGS, flags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_FLAGS attribute: %m");
+ }
+
+ return r;
+}
+
+static int netdev_gre_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Tunnel *t;
+ int r;
+
+ assert(netdev);
+
+ if (netdev->kind == NETDEV_KIND_GRE)
+ t = GRE(netdev);
+ else
+ t = GRETAP(netdev);
+
+ assert(t);
+ assert(IN_SET(t->family, AF_INET, AF_UNSPEC));
+ assert(m);
+
+ if (link) {
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_LINK, link->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_LINK attribute: %m");
+ }
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_GRE_LOCAL, &t->local.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_LOCAL attribute: %m");
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_GRE_REMOTE, &t->remote.in);
+ if (r < 0)
+ log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_REMOTE attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GRE_TTL, t->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_TTL attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GRE_TOS, t->tos);
+ if (r < 0)
+ log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_TOS attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GRE_PMTUDISC, t->pmtudisc);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_PMTUDISC attribute: %m");
+
+ return r;
+}
+
+static int netdev_erspan_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ uint32_t ikey = 0;
+ uint32_t okey = 0;
+ uint16_t iflags = 0;
+ uint16_t oflags = 0;
+ Tunnel *t;
+ int r;
+
+ assert(netdev);
+
+ t = ERSPAN(netdev);
+
+ assert(t);
+ assert(IN_SET(t->family, AF_INET, AF_UNSPEC));
+ assert(m);
+
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_ERSPAN_INDEX, t->erspan_index);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_ERSPAN_INDEX attribute: %m");
+
+ if (t->key != 0) {
+ ikey = okey = htobe32(t->key);
+ iflags |= GRE_KEY;
+ oflags |= GRE_KEY;
+ }
+
+ if (t->ikey != 0) {
+ ikey = htobe32(t->ikey);
+ iflags |= GRE_KEY;
+ }
+
+ if (t->okey != 0) {
+ okey = htobe32(t->okey);
+ oflags |= GRE_KEY;
+ }
+
+ if (t->erspan_sequence > 0) {
+ iflags |= GRE_SEQ;
+ oflags |= GRE_SEQ;
+ } else if (t->erspan_sequence == 0) {
+ iflags &= ~GRE_SEQ;
+ oflags &= ~GRE_SEQ;
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_IKEY, ikey);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_IKEY attribute: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_OKEY, okey);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_OKEY attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_GRE_IFLAGS, iflags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_IFLAGS attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_GRE_OFLAGS, oflags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_OFLAGS, attribute: %m");
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_GRE_LOCAL, &t->local.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_LOCAL attribute: %m");
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_GRE_REMOTE, &t->remote.in);
+ if (r < 0)
+ log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_REMOTE attribute: %m");
+
+ return r;
+}
+
+static int netdev_ip6gre_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Tunnel *t;
+ int r;
+
+ assert(netdev);
+
+ if (netdev->kind == NETDEV_KIND_IP6GRE)
+ t = IP6GRE(netdev);
+ else
+ t = IP6GRETAP(netdev);
+
+ assert(t);
+ assert(t->family == AF_INET6);
+ assert(m);
+
+ if (link) {
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_LINK, link->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_LINK attribute: %m");
+ }
+
+ r = sd_netlink_message_append_in6_addr(m, IFLA_GRE_LOCAL, &t->local.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_LOCAL attribute: %m");
+
+ r = sd_netlink_message_append_in6_addr(m, IFLA_GRE_REMOTE, &t->remote.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_REMOTE attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GRE_TTL, t->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_TTL attribute: %m");
+
+ if (t->ipv6_flowlabel != _NETDEV_IPV6_FLOWLABEL_INVALID) {
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_FLOWINFO, t->ipv6_flowlabel);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_FLOWINFO attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_FLAGS, t->flags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_FLAGS attribute: %m");
+
+ return r;
+}
+
+static int netdev_vti_fill_message_key(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ uint32_t ikey, okey;
+ Tunnel *t;
+ int r;
+
+ assert(m);
+
+ if (netdev->kind == NETDEV_KIND_VTI)
+ t = VTI(netdev);
+ else
+ t = VTI6(netdev);
+
+ assert(t);
+
+ if (t->key != 0)
+ ikey = okey = htobe32(t->key);
+ else {
+ ikey = htobe32(t->ikey);
+ okey = htobe32(t->okey);
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_VTI_IKEY, ikey);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VTI_IKEY attribute: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_VTI_OKEY, okey);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VTI_OKEY attribute: %m");
+
+ return 0;
+}
+
+static int netdev_vti_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Tunnel *t = VTI(netdev);
+ int r;
+
+ assert(netdev);
+ assert(m);
+ assert(t);
+ assert(t->family == AF_INET);
+
+ if (link) {
+ r = sd_netlink_message_append_u32(m, IFLA_VTI_LINK, link->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LINK attribute: %m");
+ }
+
+ r = netdev_vti_fill_message_key(netdev, link, m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_VTI_LOCAL, &t->local.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LOCAL attribute: %m");
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_VTI_REMOTE, &t->remote.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_REMOTE attribute: %m");
+
+ return r;
+}
+
+static int netdev_vti6_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Tunnel *t = VTI6(netdev);
+ int r;
+
+ assert(netdev);
+ assert(m);
+ assert(t);
+ assert(t->family == AF_INET6);
+
+ if (link) {
+ r = sd_netlink_message_append_u32(m, IFLA_VTI_LINK, link->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LINK attribute: %m");
+ }
+
+ r = netdev_vti_fill_message_key(netdev, link, m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_in6_addr(m, IFLA_VTI_LOCAL, &t->local.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LOCAL attribute: %m");
+
+ r = sd_netlink_message_append_in6_addr(m, IFLA_VTI_REMOTE, &t->remote.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_REMOTE attribute: %m");
+
+ return r;
+}
+
+static int netdev_ip6tnl_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Tunnel *t = IP6TNL(netdev);
+ uint8_t proto;
+ int r;
+
+ assert(netdev);
+ assert(m);
+ assert(t);
+ assert(t->family == AF_INET6);
+
+ if (link) {
+ r = sd_netlink_message_append_u32(m, IFLA_IPTUN_LINK, link->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LINK attribute: %m");
+ }
+
+ r = sd_netlink_message_append_in6_addr(m, IFLA_IPTUN_LOCAL, &t->local.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LOCAL attribute: %m");
+
+ r = sd_netlink_message_append_in6_addr(m, IFLA_IPTUN_REMOTE, &t->remote.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_REMOTE attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_IPTUN_TTL, t->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_TTL attribute: %m");
+
+ if (t->ipv6_flowlabel != _NETDEV_IPV6_FLOWLABEL_INVALID) {
+ r = sd_netlink_message_append_u32(m, IFLA_IPTUN_FLOWINFO, t->ipv6_flowlabel);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_FLOWINFO attribute: %m");
+ }
+
+ if (t->copy_dscp)
+ t->flags |= IP6_TNL_F_RCV_DSCP_COPY;
+
+ if (t->allow_localremote != -1)
+ SET_FLAG(t->flags, IP6_TNL_F_ALLOW_LOCAL_REMOTE, t->allow_localremote);
+
+ if (t->encap_limit != IPV6_DEFAULT_TNL_ENCAP_LIMIT) {
+ r = sd_netlink_message_append_u8(m, IFLA_IPTUN_ENCAP_LIMIT, t->encap_limit);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_ENCAP_LIMIT attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_IPTUN_FLAGS, t->flags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_FLAGS attribute: %m");
+
+ switch (t->ip6tnl_mode) {
+ case NETDEV_IP6_TNL_MODE_IP6IP6:
+ proto = IPPROTO_IPV6;
+ break;
+ case NETDEV_IP6_TNL_MODE_IPIP6:
+ proto = IPPROTO_IPIP;
+ break;
+ case NETDEV_IP6_TNL_MODE_ANYIP6:
+ default:
+ proto = 0;
+ break;
+ }
+
+ r = sd_netlink_message_append_u8(m, IFLA_IPTUN_PROTO, proto);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_MODE attribute: %m");
+
+ return r;
+}
+
+static int netdev_tunnel_verify(NetDev *netdev, const char *filename) {
+ Tunnel *t = NULL;
+
+ assert(netdev);
+ assert(filename);
+
+ switch (netdev->kind) {
+ case NETDEV_KIND_IPIP:
+ t = IPIP(netdev);
+ break;
+ case NETDEV_KIND_SIT:
+ t = SIT(netdev);
+ break;
+ case NETDEV_KIND_GRE:
+ t = GRE(netdev);
+ break;
+ case NETDEV_KIND_GRETAP:
+ t = GRETAP(netdev);
+ break;
+ case NETDEV_KIND_IP6GRE:
+ t = IP6GRE(netdev);
+ break;
+ case NETDEV_KIND_IP6GRETAP:
+ t = IP6GRETAP(netdev);
+ break;
+ case NETDEV_KIND_VTI:
+ t = VTI(netdev);
+ break;
+ case NETDEV_KIND_VTI6:
+ t = VTI6(netdev);
+ break;
+ case NETDEV_KIND_IP6TNL:
+ t = IP6TNL(netdev);
+ break;
+ case NETDEV_KIND_ERSPAN:
+ t = ERSPAN(netdev);
+ break;
+ default:
+ assert_not_reached("Invalid tunnel kind");
+ }
+
+ assert(t);
+
+ if (!IN_SET(t->family, AF_INET, AF_INET6, AF_UNSPEC)) {
+ log_netdev_error(netdev,
+ "Tunnel with invalid address family configured in %s. Ignoring", filename);
+ return -EINVAL;
+ }
+
+ if (IN_SET(netdev->kind, NETDEV_KIND_VTI, NETDEV_KIND_IPIP, NETDEV_KIND_SIT, NETDEV_KIND_GRE, NETDEV_KIND_GRETAP, NETDEV_KIND_ERSPAN) &&
+ (t->family != AF_INET || in_addr_is_null(t->family, &t->local))) {
+ log_netdev_error(netdev,
+ "vti/ipip/sit/gre/gretap/erspan tunnel without a local IPv4 address configured in %s. Ignoring", filename);
+ return -EINVAL;
+ }
+
+ if (IN_SET(netdev->kind, NETDEV_KIND_VTI6, NETDEV_KIND_IP6TNL, NETDEV_KIND_IP6GRE, NETDEV_KIND_IP6GRETAP) &&
+ (t->family != AF_INET6 || in_addr_is_null(t->family, &t->local))) {
+ log_netdev_error(netdev,
+ "vti6/ip6tnl/ip6gre/ip6gretap tunnel without a local IPv6 address configured in %s. Ignoring", filename);
+ return -EINVAL;
+ }
+
+ if (netdev->kind == NETDEV_KIND_IP6TNL &&
+ t->ip6tnl_mode == _NETDEV_IP6_TNL_MODE_INVALID) {
+ log_netdev_error(netdev,
+ "ip6tnl without mode configured in %s. Ignoring", filename);
+ return -EINVAL;
+ }
+
+ if (t->fou_tunnel && t->fou_destination_port <= 0) {
+ log_netdev_error(netdev, "FooOverUDP missing port configured in %s. Ignoring", filename);
+ return -EINVAL;
+ }
+
+ if (netdev->kind == NETDEV_KIND_ERSPAN && (t->erspan_index >= (1 << 20) || t->erspan_index == 0)) {
+ log_netdev_error(netdev, "Invalid erspan index %d. Ignoring", t->erspan_index);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int config_parse_tunnel_address(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Tunnel *t = userdata;
+ union in_addr_union *addr = data, buffer;
+ int r, f;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* This is used to parse addresses on both local and remote ends of the tunnel.
+ * Address families must match.
+ *
+ * "any" is a special value which means that the address is unspecified.
+ */
+
+ if (streq(rvalue, "any")) {
+ *addr = IN_ADDR_NULL;
+
+ /* As a special case, if both the local and remote addresses are
+ * unspecified, also clear the address family.
+ */
+ if (t->family != AF_UNSPEC &&
+ in_addr_is_null(t->family, &t->local) &&
+ in_addr_is_null(t->family, &t->remote))
+ t->family = AF_UNSPEC;
+ return 0;
+ }
+
+ r = in_addr_from_string_auto(rvalue, &f, &buffer);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Tunnel address \"%s\" invalid, ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (t->family != AF_UNSPEC && t->family != f) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Tunnel addresses incompatible, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ t->family = f;
+ *addr = buffer;
+ return 0;
+}
+
+int config_parse_tunnel_key(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ union in_addr_union buffer;
+ Tunnel *t = userdata;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = in_addr_from_string(AF_INET, rvalue, &buffer);
+ if (r < 0) {
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse tunnel key ignoring assignment: %s", rvalue);
+ return 0;
+ }
+ } else
+ k = be32toh(buffer.in.s_addr);
+
+ if (streq(lvalue, "Key"))
+ t->key = k;
+ else if (streq(lvalue, "InputKey"))
+ t->ikey = k;
+ else
+ t->okey = k;
+
+ return 0;
+}
+
+int config_parse_ipv6_flowlabel(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ IPv6FlowLabel *ipv6_flowlabel = data;
+ Tunnel *t = userdata;
+ int k = 0;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(ipv6_flowlabel);
+
+ if (streq(rvalue, "inherit")) {
+ *ipv6_flowlabel = IP6_FLOWINFO_FLOWLABEL;
+ t->flags |= IP6_TNL_F_USE_ORIG_FLOWLABEL;
+ } else {
+ r = config_parse_int(unit, filename, line, section, section_line, lvalue, ltype, rvalue, &k, userdata);
+ if (r < 0)
+ return r;
+
+ if (k > 0xFFFFF)
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse IPv6 flowlabel option, ignoring: %s", rvalue);
+ else {
+ *ipv6_flowlabel = htobe32(k) & IP6_FLOWINFO_FLOWLABEL;
+ t->flags &= ~IP6_TNL_F_USE_ORIG_FLOWLABEL;
+ }
+ }
+
+ return 0;
+}
+
+int config_parse_encap_limit(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Tunnel *t = userdata;
+ int k = 0;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (streq(rvalue, "none"))
+ t->flags |= IP6_TNL_F_IGN_ENCAP_LIMIT;
+ else {
+ r = safe_atoi(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Tunnel Encapsulation Limit option, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (k > 255 || k < 0)
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid Tunnel Encapsulation value, ignoring: %d", k);
+ else {
+ t->encap_limit = k;
+ t->flags &= ~IP6_TNL_F_IGN_ENCAP_LIMIT;
+ }
+ }
+
+ return 0;
+}
+
+int config_parse_6rd_prefix(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Tunnel *t = userdata;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ union in_addr_union p;
+ uint8_t l;
+ int r;
+
+ r = in_addr_prefix_from_string(rvalue, AF_INET6, &p, &l);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse 6rd prefix \"%s\", ignoring: %m", rvalue);
+ return 0;
+ }
+ if (l == 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "6rd prefix length of \"%s\" must be greater than zero, ignoring", rvalue);
+ return 0;
+ }
+
+ t->sixrd_prefix = p.in6;
+ t->sixrd_prefixlen = l;
+
+ return 0;
+}
+
+static void ipip_init(NetDev *n) {
+ Tunnel *t = IPIP(n);
+
+ assert(n);
+ assert(t);
+
+ t->pmtudisc = true;
+ t->fou_encap_type = FOU_ENCAP_DIRECT;
+}
+
+static void sit_init(NetDev *n) {
+ Tunnel *t = SIT(n);
+
+ assert(n);
+ assert(t);
+
+ t->pmtudisc = true;
+ t->isatap = -1;
+}
+
+static void vti_init(NetDev *n) {
+ Tunnel *t;
+
+ assert(n);
+
+ if (n->kind == NETDEV_KIND_VTI)
+ t = VTI(n);
+ else
+ t = VTI6(n);
+
+ assert(t);
+
+ t->pmtudisc = true;
+}
+
+static void gre_init(NetDev *n) {
+ Tunnel *t;
+
+ assert(n);
+
+ if (n->kind == NETDEV_KIND_GRE)
+ t = GRE(n);
+ else
+ t = GRETAP(n);
+
+ assert(t);
+
+ t->pmtudisc = true;
+}
+
+static void ip6gre_init(NetDev *n) {
+ Tunnel *t;
+
+ assert(n);
+
+ if (n->kind == NETDEV_KIND_IP6GRE)
+ t = IP6GRE(n);
+ else
+ t = IP6GRETAP(n);
+
+ assert(t);
+
+ t->ttl = DEFAULT_TNL_HOP_LIMIT;
+}
+
+static void erspan_init(NetDev *n) {
+ Tunnel *t;
+
+ assert(n);
+
+ t = ERSPAN(n);
+
+ assert(t);
+
+ t->erspan_sequence = -1;
+}
+
+static void ip6tnl_init(NetDev *n) {
+ Tunnel *t = IP6TNL(n);
+
+ assert(n);
+ assert(t);
+
+ t->ttl = DEFAULT_TNL_HOP_LIMIT;
+ t->encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT;
+ t->ip6tnl_mode = _NETDEV_IP6_TNL_MODE_INVALID;
+ t->ipv6_flowlabel = _NETDEV_IPV6_FLOWLABEL_INVALID;
+ t->allow_localremote = -1;
+}
+
+const NetDevVTable ipip_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = ipip_init,
+ .sections = "Match\0NetDev\0Tunnel\0",
+ .fill_message_create = netdev_ipip_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+};
+
+const NetDevVTable sit_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = sit_init,
+ .sections = "Match\0NetDev\0Tunnel\0",
+ .fill_message_create = netdev_sit_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+};
+
+const NetDevVTable vti_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = vti_init,
+ .sections = "Match\0NetDev\0Tunnel\0",
+ .fill_message_create = netdev_vti_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+};
+
+const NetDevVTable vti6_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = vti_init,
+ .sections = "Match\0NetDev\0Tunnel\0",
+ .fill_message_create = netdev_vti6_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+};
+
+const NetDevVTable gre_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = gre_init,
+ .sections = "Match\0NetDev\0Tunnel\0",
+ .fill_message_create = netdev_gre_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+};
+
+const NetDevVTable gretap_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = gre_init,
+ .sections = "Match\0NetDev\0Tunnel\0",
+ .fill_message_create = netdev_gre_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+};
+
+const NetDevVTable ip6gre_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = ip6gre_init,
+ .sections = "Match\0NetDev\0Tunnel\0",
+ .fill_message_create = netdev_ip6gre_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+};
+
+const NetDevVTable ip6gretap_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = ip6gre_init,
+ .sections = "Match\0NetDev\0Tunnel\0",
+ .fill_message_create = netdev_ip6gre_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+};
+
+const NetDevVTable ip6tnl_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = ip6tnl_init,
+ .sections = "Match\0NetDev\0Tunnel\0",
+ .fill_message_create = netdev_ip6tnl_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+};
+
+const NetDevVTable erspan_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = erspan_init,
+ .sections = "Match\0NetDev\0Tunnel\0",
+ .fill_message_create = netdev_erspan_fill_message_create,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .config_verify = netdev_tunnel_verify,
+};
diff --git a/src/network/netdev/tunnel.h b/src/network/netdev/tunnel.h
new file mode 100644
index 0000000..8f511dd
--- /dev/null
+++ b/src/network/netdev/tunnel.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "in-addr-util.h"
+
+#include "conf-parser.h"
+#include "netdev/netdev.h"
+#include "netdev/fou-tunnel.h"
+
+typedef enum Ip6TnlMode {
+ NETDEV_IP6_TNL_MODE_IP6IP6,
+ NETDEV_IP6_TNL_MODE_IPIP6,
+ NETDEV_IP6_TNL_MODE_ANYIP6,
+ _NETDEV_IP6_TNL_MODE_MAX,
+ _NETDEV_IP6_TNL_MODE_INVALID = -1,
+} Ip6TnlMode;
+
+typedef enum IPv6FlowLabel {
+ NETDEV_IPV6_FLOWLABEL_INHERIT = 0xFFFFF + 1,
+ _NETDEV_IPV6_FLOWLABEL_MAX,
+ _NETDEV_IPV6_FLOWLABEL_INVALID = -1,
+} IPv6FlowLabel;
+
+typedef struct Tunnel {
+ NetDev meta;
+
+ uint8_t encap_limit;
+
+ int family;
+ int ipv6_flowlabel;
+ int allow_localremote;
+ int erspan_sequence;
+ int isatap;
+
+ unsigned ttl;
+ unsigned tos;
+ unsigned flags;
+
+ uint32_t key;
+ uint32_t ikey;
+ uint32_t okey;
+ uint32_t erspan_index;
+
+ union in_addr_union local;
+ union in_addr_union remote;
+
+ Ip6TnlMode ip6tnl_mode;
+ FooOverUDPEncapType fou_encap_type;
+
+ bool pmtudisc;
+ bool copy_dscp;
+ bool independent;
+ bool fou_tunnel;
+
+ uint16_t encap_src_port;
+ uint16_t fou_destination_port;
+
+ struct in6_addr sixrd_prefix;
+ uint8_t sixrd_prefixlen;
+} Tunnel;
+
+DEFINE_NETDEV_CAST(IPIP, Tunnel);
+DEFINE_NETDEV_CAST(GRE, Tunnel);
+DEFINE_NETDEV_CAST(GRETAP, Tunnel);
+DEFINE_NETDEV_CAST(IP6GRE, Tunnel);
+DEFINE_NETDEV_CAST(IP6GRETAP, Tunnel);
+DEFINE_NETDEV_CAST(SIT, Tunnel);
+DEFINE_NETDEV_CAST(VTI, Tunnel);
+DEFINE_NETDEV_CAST(VTI6, Tunnel);
+DEFINE_NETDEV_CAST(IP6TNL, Tunnel);
+DEFINE_NETDEV_CAST(ERSPAN, Tunnel);
+extern const NetDevVTable ipip_vtable;
+extern const NetDevVTable sit_vtable;
+extern const NetDevVTable vti_vtable;
+extern const NetDevVTable vti6_vtable;
+extern const NetDevVTable gre_vtable;
+extern const NetDevVTable gretap_vtable;
+extern const NetDevVTable ip6gre_vtable;
+extern const NetDevVTable ip6gretap_vtable;
+extern const NetDevVTable ip6tnl_vtable;
+extern const NetDevVTable erspan_vtable;
+
+const char *ip6tnl_mode_to_string(Ip6TnlMode d) _const_;
+Ip6TnlMode ip6tnl_mode_from_string(const char *d) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ip6tnl_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_tunnel_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv6_flowlabel);
+CONFIG_PARSER_PROTOTYPE(config_parse_encap_limit);
+CONFIG_PARSER_PROTOTYPE(config_parse_tunnel_key);
+CONFIG_PARSER_PROTOTYPE(config_parse_6rd_prefix);
diff --git a/src/network/netdev/tuntap.c b/src/network/netdev/tuntap.c
new file mode 100644
index 0000000..951138d
--- /dev/null
+++ b/src/network/netdev/tuntap.c
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/if_tun.h>
+#include <net/if.h>
+#include <netinet/if_ether.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "netdev/tuntap.h"
+#include "user-util.h"
+
+#define TUN_DEV "/dev/net/tun"
+
+static int netdev_fill_tuntap_message(NetDev *netdev, struct ifreq *ifr) {
+ TunTap *t;
+
+ assert(netdev);
+ assert(netdev->ifname);
+ assert(ifr);
+
+ if (netdev->kind == NETDEV_KIND_TAP) {
+ t = TAP(netdev);
+ ifr->ifr_flags |= IFF_TAP;
+ } else {
+ t = TUN(netdev);
+ ifr->ifr_flags |= IFF_TUN;
+ }
+
+ if (!t->packet_info)
+ ifr->ifr_flags |= IFF_NO_PI;
+
+ if (t->one_queue)
+ ifr->ifr_flags |= IFF_ONE_QUEUE;
+
+ if (t->multi_queue)
+ ifr->ifr_flags |= IFF_MULTI_QUEUE;
+
+ if (t->vnet_hdr)
+ ifr->ifr_flags |= IFF_VNET_HDR;
+
+ strncpy(ifr->ifr_name, netdev->ifname, IFNAMSIZ-1);
+
+ return 0;
+}
+
+static int netdev_tuntap_add(NetDev *netdev, struct ifreq *ifr) {
+ _cleanup_close_ int fd;
+ TunTap *t = NULL;
+ const char *user;
+ const char *group;
+ uid_t uid;
+ gid_t gid;
+ int r;
+
+ assert(netdev);
+ assert(ifr);
+
+ fd = open(TUN_DEV, O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return log_netdev_error_errno(netdev, -errno, "Failed to open tun dev: %m");
+
+ if (ioctl(fd, TUNSETIFF, ifr) < 0)
+ return log_netdev_error_errno(netdev, -errno, "TUNSETIFF failed on tun dev: %m");
+
+ if (netdev->kind == NETDEV_KIND_TAP)
+ t = TAP(netdev);
+ else
+ t = TUN(netdev);
+
+ assert(t);
+
+ if (t->user_name) {
+ user = t->user_name;
+
+ r = get_user_creds(&user, &uid, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Cannot resolve user name %s: %m", t->user_name);
+
+ if (ioctl(fd, TUNSETOWNER, uid) < 0)
+ return log_netdev_error_errno(netdev, -errno, "TUNSETOWNER failed on tun dev: %m");
+ }
+
+ if (t->group_name) {
+ group = t->group_name;
+
+ r = get_group_creds(&group, &gid, USER_CREDS_ALLOW_MISSING);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Cannot resolve group name %s: %m", t->group_name);
+
+ if (ioctl(fd, TUNSETGROUP, gid) < 0)
+ return log_netdev_error_errno(netdev, -errno, "TUNSETGROUP failed on tun dev: %m");
+
+ }
+
+ if (ioctl(fd, TUNSETPERSIST, 1) < 0)
+ return log_netdev_error_errno(netdev, -errno, "TUNSETPERSIST failed on tun dev: %m");
+
+ return 0;
+}
+
+static int netdev_create_tuntap(NetDev *netdev) {
+ struct ifreq ifr = {};
+ int r;
+
+ r = netdev_fill_tuntap_message(netdev, &ifr);
+ if (r < 0)
+ return r;
+
+ return netdev_tuntap_add(netdev, &ifr);
+}
+
+static void tuntap_done(NetDev *netdev) {
+ TunTap *t = NULL;
+
+ assert(netdev);
+
+ if (netdev->kind == NETDEV_KIND_TUN)
+ t = TUN(netdev);
+ else
+ t = TAP(netdev);
+
+ assert(t);
+
+ t->user_name = mfree(t->user_name);
+ t->group_name = mfree(t->group_name);
+}
+
+static int tuntap_verify(NetDev *netdev, const char *filename) {
+ assert(netdev);
+
+ if (netdev->mtu != 0)
+ log_netdev_warning(netdev, "MTU configured for %s, ignoring", netdev_kind_to_string(netdev->kind));
+
+ if (netdev->mac)
+ log_netdev_warning(netdev, "MAC configured for %s, ignoring", netdev_kind_to_string(netdev->kind));
+
+ return 0;
+}
+
+const NetDevVTable tun_vtable = {
+ .object_size = sizeof(TunTap),
+ .sections = "Match\0NetDev\0Tun\0",
+ .config_verify = tuntap_verify,
+ .done = tuntap_done,
+ .create = netdev_create_tuntap,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+};
+
+const NetDevVTable tap_vtable = {
+ .object_size = sizeof(TunTap),
+ .sections = "Match\0NetDev\0Tap\0",
+ .config_verify = tuntap_verify,
+ .done = tuntap_done,
+ .create = netdev_create_tuntap,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+};
diff --git a/src/network/netdev/tuntap.h b/src/network/netdev/tuntap.h
new file mode 100644
index 0000000..9d9f992
--- /dev/null
+++ b/src/network/netdev/tuntap.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct TunTap TunTap;
+
+#include "netdev/netdev.h"
+
+struct TunTap {
+ NetDev meta;
+
+ char *user_name;
+ char *group_name;
+ bool one_queue;
+ bool multi_queue;
+ bool packet_info;
+ bool vnet_hdr;
+};
+
+DEFINE_NETDEV_CAST(TUN, TunTap);
+DEFINE_NETDEV_CAST(TAP, TunTap);
+extern const NetDevVTable tun_vtable;
+extern const NetDevVTable tap_vtable;
diff --git a/src/network/netdev/vcan.c b/src/network/netdev/vcan.c
new file mode 100644
index 0000000..574d1ca
--- /dev/null
+++ b/src/network/netdev/vcan.c
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "netdev/vcan.h"
+
+const NetDevVTable vcan_vtable = {
+ .object_size = sizeof(VCan),
+ .sections = "Match\0NetDev\0",
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+};
diff --git a/src/network/netdev/vcan.h b/src/network/netdev/vcan.h
new file mode 100644
index 0000000..6f62686
--- /dev/null
+++ b/src/network/netdev/vcan.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct VCan VCan;
+
+#include <linux/can/netlink.h>
+
+#include "netdev/netdev.h"
+
+struct VCan {
+ NetDev meta;
+};
+
+DEFINE_NETDEV_CAST(VCAN, VCan);
+
+extern const NetDevVTable vcan_vtable;
diff --git a/src/network/netdev/veth.c b/src/network/netdev/veth.c
new file mode 100644
index 0000000..3ad95ad
--- /dev/null
+++ b/src/network/netdev/veth.c
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <linux/veth.h>
+#include <net/if.h>
+
+#include "sd-netlink.h"
+
+#include "netdev/veth.h"
+
+static int netdev_veth_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Veth *v;
+ int r;
+
+ assert(netdev);
+ assert(!link);
+ assert(m);
+
+ v = VETH(netdev);
+
+ assert(v);
+
+ r = sd_netlink_message_open_container(m, VETH_INFO_PEER);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append VETH_INFO_PEER attribute: %m");
+
+ if (v->ifname_peer) {
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, v->ifname_peer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+ }
+
+ if (v->mac_peer) {
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, v->mac_peer);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_ADDRESS attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ return r;
+}
+
+static int netdev_veth_verify(NetDev *netdev, const char *filename) {
+ Veth *v;
+ int r;
+
+ assert(netdev);
+ assert(filename);
+
+ v = VETH(netdev);
+
+ assert(v);
+
+ if (!v->ifname_peer) {
+ log_warning("Veth NetDev without peer name configured in %s. Ignoring",
+ filename);
+ return -EINVAL;
+ }
+
+ if (!v->mac_peer) {
+ r = netdev_get_mac(v->ifname_peer, &v->mac_peer);
+ if (r < 0) {
+ log_warning("Failed to generate predictable MAC address for %s. Ignoring",
+ v->ifname_peer);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static void veth_done(NetDev *n) {
+ Veth *v;
+
+ assert(n);
+
+ v = VETH(n);
+
+ assert(v);
+
+ free(v->ifname_peer);
+ free(v->mac_peer);
+}
+
+const NetDevVTable veth_vtable = {
+ .object_size = sizeof(Veth),
+ .sections = "Match\0NetDev\0Peer\0",
+ .done = veth_done,
+ .fill_message_create = netdev_veth_fill_message_create,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .config_verify = netdev_veth_verify,
+};
diff --git a/src/network/netdev/veth.h b/src/network/netdev/veth.h
new file mode 100644
index 0000000..0bb9947
--- /dev/null
+++ b/src/network/netdev/veth.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Veth Veth;
+
+#include "netdev/netdev.h"
+
+struct Veth {
+ NetDev meta;
+
+ char *ifname_peer;
+ struct ether_addr *mac_peer;
+};
+
+DEFINE_NETDEV_CAST(VETH, Veth);
+extern const NetDevVTable veth_vtable;
diff --git a/src/network/netdev/vlan.c b/src/network/netdev/vlan.c
new file mode 100644
index 0000000..dd548b3
--- /dev/null
+++ b/src/network/netdev/vlan.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <linux/if_vlan.h>
+#include <net/if.h>
+
+#include "netdev/vlan.h"
+#include "vlan-util.h"
+
+static int netdev_vlan_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *req) {
+ struct ifla_vlan_flags flags = {};
+ VLan *v;
+ int r;
+
+ assert(netdev);
+ assert(link);
+ assert(req);
+
+ v = VLAN(netdev);
+
+ assert(v);
+
+ r = sd_netlink_message_append_u16(req, IFLA_VLAN_ID, v->id);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VLAN_ID attribute: %m");
+
+ if (v->gvrp != -1) {
+ flags.mask |= VLAN_FLAG_GVRP;
+ SET_FLAG(flags.flags, VLAN_FLAG_GVRP, v->gvrp);
+ }
+
+ if (v->mvrp != -1) {
+ flags.mask |= VLAN_FLAG_MVRP;
+ SET_FLAG(flags.flags, VLAN_FLAG_MVRP, v->mvrp);
+ }
+
+ if (v->reorder_hdr != -1) {
+ flags.mask |= VLAN_FLAG_REORDER_HDR;
+ SET_FLAG(flags.flags, VLAN_FLAG_REORDER_HDR, v->reorder_hdr);
+ }
+
+ if (v->loose_binding != -1) {
+ flags.mask |= VLAN_FLAG_LOOSE_BINDING;
+ SET_FLAG(flags.flags, VLAN_FLAG_LOOSE_BINDING, v->loose_binding);
+ }
+
+ r = sd_netlink_message_append_data(req, IFLA_VLAN_FLAGS, &flags, sizeof(struct ifla_vlan_flags));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VLAN_FLAGS attribute: %m");
+
+ return 0;
+}
+
+static int netdev_vlan_verify(NetDev *netdev, const char *filename) {
+ VLan *v;
+
+ assert(netdev);
+ assert(filename);
+
+ v = VLAN(netdev);
+
+ assert(v);
+
+ if (v->id == VLANID_INVALID) {
+ log_warning("VLAN without valid Id (%"PRIu16") configured in %s.", v->id, filename);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void vlan_init(NetDev *netdev) {
+ VLan *v = VLAN(netdev);
+
+ assert(netdev);
+ assert(v);
+
+ v->id = VLANID_INVALID;
+ v->gvrp = -1;
+ v->mvrp = -1;
+ v->loose_binding = -1;
+ v->reorder_hdr = -1;
+}
+
+const NetDevVTable vlan_vtable = {
+ .object_size = sizeof(VLan),
+ .init = vlan_init,
+ .sections = "Match\0NetDev\0VLAN\0",
+ .fill_message_create = netdev_vlan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_vlan_verify,
+};
diff --git a/src/network/netdev/vlan.h b/src/network/netdev/vlan.h
new file mode 100644
index 0000000..b815e03
--- /dev/null
+++ b/src/network/netdev/vlan.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct VLan VLan;
+
+#include "netdev/netdev.h"
+
+struct VLan {
+ NetDev meta;
+
+ uint16_t id;
+
+ int gvrp;
+ int mvrp;
+ int loose_binding;
+ int reorder_hdr;
+};
+
+DEFINE_NETDEV_CAST(VLAN, VLan);
+extern const NetDevVTable vlan_vtable;
diff --git a/src/network/netdev/vrf.c b/src/network/netdev/vrf.c
new file mode 100644
index 0000000..5efab53
--- /dev/null
+++ b/src/network/netdev/vrf.c
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+
+#include "sd-netlink.h"
+#include "missing.h"
+#include "netdev/vrf.h"
+
+static int netdev_vrf_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Vrf *v;
+ int r;
+
+ assert(netdev);
+ assert(!link);
+ assert(m);
+
+ v = VRF(netdev);
+
+ assert(v);
+
+ r = sd_netlink_message_append_u32(m, IFLA_VRF_TABLE, v->table);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IPLA_VRF_TABLE attribute: %m");
+
+ return r;
+}
+
+const NetDevVTable vrf_vtable = {
+ .object_size = sizeof(Vrf),
+ .sections = "Match\0NetDev\0VRF\0",
+ .fill_message_create = netdev_vrf_fill_message_create,
+ .create_type = NETDEV_CREATE_MASTER,
+};
diff --git a/src/network/netdev/vrf.h b/src/network/netdev/vrf.h
new file mode 100644
index 0000000..05b3937
--- /dev/null
+++ b/src/network/netdev/vrf.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct Vrf Vrf;
+
+#include "netdev/netdev.h"
+
+struct Vrf {
+ NetDev meta;
+
+ uint32_t table;
+};
+
+DEFINE_NETDEV_CAST(VRF, Vrf);
+extern const NetDevVTable vrf_vtable;
diff --git a/src/network/netdev/vxcan.c b/src/network/netdev/vxcan.c
new file mode 100644
index 0000000..e8ea70a
--- /dev/null
+++ b/src/network/netdev/vxcan.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_LINUX_CAN_VXCAN_H
+#include <linux/can/vxcan.h>
+#endif
+
+#include "missing.h"
+#include "netdev/vxcan.h"
+
+static int netdev_vxcan_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ VxCan *v;
+ int r;
+
+ assert(netdev);
+ assert(!link);
+ assert(m);
+
+ v = VXCAN(netdev);
+
+ assert(v);
+
+ r = sd_netlink_message_open_container(m, VXCAN_INFO_PEER);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append VXCAN_INFO_PEER attribute: %m");
+
+ if (v->ifname_peer) {
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, v->ifname_peer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add vxcan netlink interface peer name: %m");
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append VXCAN_INFO_PEER attribute: %m");
+
+ return r;
+}
+
+static int netdev_vxcan_verify(NetDev *netdev, const char *filename) {
+ VxCan *v;
+
+ assert(netdev);
+ assert(filename);
+
+ v = VXCAN(netdev);
+
+ assert(v);
+
+ if (!v->ifname_peer) {
+ log_warning("VxCan NetDev without peer name configured in %s. Ignoring", filename);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void vxcan_done(NetDev *n) {
+ VxCan *v;
+
+ assert(n);
+
+ v = VXCAN(n);
+
+ assert(v);
+
+ free(v->ifname_peer);
+}
+
+const NetDevVTable vxcan_vtable = {
+ .object_size = sizeof(VxCan),
+ .sections = "Match\0NetDev\0VXCAN\0",
+ .done = vxcan_done,
+ .fill_message_create = netdev_vxcan_fill_message_create,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .config_verify = netdev_vxcan_verify,
+};
diff --git a/src/network/netdev/vxcan.h b/src/network/netdev/vxcan.h
new file mode 100644
index 0000000..b5de197
--- /dev/null
+++ b/src/network/netdev/vxcan.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct VxCan VxCan;
+
+#include "netdev/netdev.h"
+
+struct VxCan {
+ NetDev meta;
+
+ char *ifname_peer;
+};
+
+DEFINE_NETDEV_CAST(VXCAN, VxCan);
+
+extern const NetDevVTable vxcan_vtable;
diff --git a/src/network/netdev/vxlan.c b/src/network/netdev/vxlan.c
new file mode 100644
index 0000000..4cb2eca
--- /dev/null
+++ b/src/network/netdev/vxlan.c
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+
+#include "sd-netlink.h"
+
+#include "conf-parser.h"
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "string-util.h"
+#include "strv.h"
+#include "parse-util.h"
+#include "missing.h"
+
+#include "networkd-link.h"
+#include "netdev/vxlan.h"
+
+static int netdev_vxlan_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ VxLan *v;
+ int r;
+
+ assert(netdev);
+ assert(link);
+ assert(m);
+
+ v = VXLAN(netdev);
+
+ assert(v);
+
+ if (v->id <= VXLAN_VID_MAX) {
+ r = sd_netlink_message_append_u32(m, IFLA_VXLAN_ID, v->id);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_ID attribute: %m");
+ }
+
+ if (!in_addr_is_null(v->remote_family, &v->remote)) {
+
+ if (v->remote_family == AF_INET)
+ r = sd_netlink_message_append_in_addr(m, IFLA_VXLAN_GROUP, &v->remote.in);
+ else
+ r = sd_netlink_message_append_in6_addr(m, IFLA_VXLAN_GROUP6, &v->remote.in6);
+
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_GROUP attribute: %m");
+ }
+
+ if (!in_addr_is_null(v->local_family, &v->local)) {
+
+ if (v->local_family == AF_INET)
+ r = sd_netlink_message_append_in_addr(m, IFLA_VXLAN_LOCAL, &v->local.in);
+ else
+ r = sd_netlink_message_append_in6_addr(m, IFLA_VXLAN_LOCAL6, &v->local.in6);
+
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_LOCAL attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_VXLAN_LINK, link->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_LINK attribute: %m");
+
+ if (v->ttl != 0) {
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_TTL, v->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_TTL attribute: %m");
+ }
+
+ if (v->tos != 0) {
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_TOS, v->tos);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_TOS attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_LEARNING, v->learning);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_LEARNING attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_RSC, v->route_short_circuit);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_RSC attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_PROXY, v->arp_proxy);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_PROXY attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_L2MISS, v->l2miss);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_L2MISS attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_L3MISS, v->l3miss);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_L3MISS attribute: %m");
+
+ if (v->fdb_ageing != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_VXLAN_AGEING, v->fdb_ageing / USEC_PER_SEC);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_AGEING attribute: %m");
+ }
+
+ if (v->max_fdb != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_VXLAN_LIMIT, v->max_fdb);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_LIMIT attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_UDP_CSUM, v->udpcsum);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_UDP_CSUM attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, v->udp6zerocsumtx);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_UDP_ZERO_CSUM6_TX attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, v->udp6zerocsumrx);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_UDP_ZERO_CSUM6_RX attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_REMCSUM_TX, v->remote_csum_tx);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_REMCSUM_TX attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_REMCSUM_RX, v->remote_csum_rx);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_REMCSUM_RX attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_VXLAN_PORT, htobe16(v->dest_port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_PORT attribute: %m");
+
+ if (v->port_range.low != 0 || v->port_range.high != 0) {
+ struct ifla_vxlan_port_range port_range;
+
+ port_range.low = htobe16(v->port_range.low);
+ port_range.high = htobe16(v->port_range.high);
+
+ r = sd_netlink_message_append_data(m, IFLA_VXLAN_PORT_RANGE, &port_range, sizeof(port_range));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_PORT_RANGE attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_VXLAN_LABEL, htobe32(v->flow_label));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_LABEL attribute: %m");
+
+ if (v->group_policy) {
+ r = sd_netlink_message_append_flag(m, IFLA_VXLAN_GBP);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_GBP attribute: %m");
+ }
+
+ return r;
+}
+
+int config_parse_vxlan_address(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ VxLan *v = userdata;
+ union in_addr_union *addr = data, buffer;
+ int r, f;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = in_addr_from_string_auto(rvalue, &f, &buffer);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "vxlan '%s' address is invalid, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ r = in_addr_is_multicast(f, &buffer);
+
+ if (streq(lvalue, "Group")) {
+ if (r <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "vxlan %s invalid multicast address, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ v->remote_family = f;
+ } else {
+ if (r > 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "vxlan %s cannot be a multicast address, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "Remote"))
+ v->remote_family = f;
+ else
+ v->local_family = f;
+ }
+
+ *addr = buffer;
+
+ return 0;
+}
+
+int config_parse_port_range(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ VxLan *v = userdata;
+ uint16_t low, high;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_ip_port_range(rvalue, &low, &high);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse VXLAN port range '%s'. Port should be greater than 0 and less than 65535.", rvalue);
+ return 0;
+ }
+
+ v->port_range.low = low;
+ v->port_range.high = high;
+
+ return 0;
+}
+
+int config_parse_flow_label(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ VxLan *v = userdata;
+ unsigned f;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou(rvalue, &f);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse VXLAN flow label '%s'.", rvalue);
+ return 0;
+ }
+
+ if (f & ~VXLAN_FLOW_LABEL_MAX_MASK) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "VXLAN flow label '%s' not valid. Flow label range should be [0-1048575].", rvalue);
+ return 0;
+ }
+
+ v->flow_label = f;
+
+ return 0;
+}
+
+static int netdev_vxlan_verify(NetDev *netdev, const char *filename) {
+ VxLan *v = VXLAN(netdev);
+
+ assert(netdev);
+ assert(v);
+ assert(filename);
+
+ if (v->id > VXLAN_VID_MAX) {
+ log_warning("VXLAN without valid Id configured in %s. Ignoring", filename);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void vxlan_init(NetDev *netdev) {
+ VxLan *v;
+
+ assert(netdev);
+
+ v = VXLAN(netdev);
+
+ assert(v);
+
+ v->id = VXLAN_VID_MAX + 1;
+ v->learning = true;
+ v->udpcsum = false;
+ v->udp6zerocsumtx = false;
+ v->udp6zerocsumrx = false;
+}
+
+const NetDevVTable vxlan_vtable = {
+ .object_size = sizeof(VxLan),
+ .init = vxlan_init,
+ .sections = "Match\0NetDev\0VXLAN\0",
+ .fill_message_create = netdev_vxlan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_vxlan_verify,
+};
diff --git a/src/network/netdev/vxlan.h b/src/network/netdev/vxlan.h
new file mode 100644
index 0000000..3b273e9
--- /dev/null
+++ b/src/network/netdev/vxlan.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct VxLan VxLan;
+
+#include "in-addr-util.h"
+#include "netdev/netdev.h"
+
+#define VXLAN_VID_MAX (1u << 24) - 1
+#define VXLAN_FLOW_LABEL_MAX_MASK 0xFFFFFU
+
+struct VxLan {
+ NetDev meta;
+
+ uint64_t id;
+
+ int remote_family;
+ int local_family;
+
+ union in_addr_union remote;
+ union in_addr_union local;
+
+ unsigned tos;
+ unsigned ttl;
+ unsigned max_fdb;
+ unsigned flow_label;
+
+ uint16_t dest_port;
+
+ usec_t fdb_ageing;
+
+ bool learning;
+ bool arp_proxy;
+ bool route_short_circuit;
+ bool l2miss;
+ bool l3miss;
+ bool udpcsum;
+ bool udp6zerocsumtx;
+ bool udp6zerocsumrx;
+ bool remote_csum_tx;
+ bool remote_csum_rx;
+ bool group_policy;
+
+ struct ifla_vxlan_port_range port_range;
+};
+
+DEFINE_NETDEV_CAST(VXLAN, VxLan);
+extern const NetDevVTable vxlan_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_vxlan_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_port_range);
+CONFIG_PARSER_PROTOTYPE(config_parse_flow_label);
diff --git a/src/network/netdev/wireguard.c b/src/network/netdev/wireguard.c
new file mode 100644
index 0000000..0c0b16d
--- /dev/null
+++ b/src/network/netdev/wireguard.c
@@ -0,0 +1,778 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+***/
+
+#include <sys/ioctl.h>
+#include <net/if.h>
+
+#include "sd-resolve.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-util.h"
+#include "parse-util.h"
+#include "resolve-private.h"
+#include "string-util.h"
+#include "strv.h"
+#include "wireguard.h"
+#include "wireguard-netlink.h"
+
+static void resolve_endpoints(NetDev *netdev);
+
+static WireguardPeer *wireguard_peer_new(Wireguard *w, unsigned section) {
+ WireguardPeer *peer;
+
+ assert(w);
+
+ if (w->last_peer_section == section && w->peers)
+ return w->peers;
+
+ peer = new(WireguardPeer, 1);
+ if (!peer)
+ return NULL;
+
+ *peer = (WireguardPeer) {
+ .flags = WGPEER_F_REPLACE_ALLOWEDIPS,
+ };
+
+ LIST_PREPEND(peers, w->peers, peer);
+ w->last_peer_section = section;
+
+ return peer;
+}
+
+static int wireguard_set_ipmask_one(NetDev *netdev, sd_netlink_message *message, const WireguardIPmask *mask, uint16_t index) {
+ int r;
+
+ assert(message);
+ assert(mask);
+ assert(index > 0);
+
+ /* This returns 1 on success, 0 on recoverable error, and negative errno on failure. */
+
+ r = sd_netlink_message_open_array(message, index);
+ if (r < 0)
+ return 0;
+
+ r = sd_netlink_message_append_u16(message, WGALLOWEDIP_A_FAMILY, mask->family);
+ if (r < 0)
+ goto cancel;
+
+ if (mask->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(message, WGALLOWEDIP_A_IPADDR, &mask->ip.in);
+ else if (mask->family == AF_INET6)
+ r = sd_netlink_message_append_in6_addr(message, WGALLOWEDIP_A_IPADDR, &mask->ip.in6);
+ if (r < 0)
+ goto cancel;
+
+ r = sd_netlink_message_append_u8(message, WGALLOWEDIP_A_CIDR_MASK, mask->cidr);
+ if (r < 0)
+ goto cancel;
+
+ r = sd_netlink_message_close_container(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not add wireguard allowed ip: %m");
+
+ return 1;
+
+cancel:
+ r = sd_netlink_message_cancel_array(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not cancel wireguard allowed ip message attribute: %m");
+
+ return 0;
+}
+
+static int wireguard_set_peer_one(NetDev *netdev, sd_netlink_message *message, const WireguardPeer *peer, uint16_t index, WireguardIPmask **mask_start) {
+ WireguardIPmask *mask, *start;
+ uint16_t j = 0;
+ int r;
+
+ assert(message);
+ assert(peer);
+ assert(index > 0);
+ assert(mask_start);
+
+ /* This returns 1 on success, 0 on recoverable error, and negative errno on failure. */
+
+ start = *mask_start ?: peer->ipmasks;
+
+ r = sd_netlink_message_open_array(message, index);
+ if (r < 0)
+ return 0;
+
+ r = sd_netlink_message_append_data(message, WGPEER_A_PUBLIC_KEY, &peer->public_key, sizeof(peer->public_key));
+ if (r < 0)
+ goto cancel;
+
+ if (!*mask_start) {
+ r = sd_netlink_message_append_data(message, WGPEER_A_PRESHARED_KEY, &peer->preshared_key, WG_KEY_LEN);
+ if (r < 0)
+ goto cancel;
+
+ r = sd_netlink_message_append_u32(message, WGPEER_A_FLAGS, peer->flags);
+ if (r < 0)
+ goto cancel;
+
+ r = sd_netlink_message_append_u16(message, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, peer->persistent_keepalive_interval);
+ if (r < 0)
+ goto cancel;
+
+ if (peer->endpoint.sa.sa_family == AF_INET)
+ r = sd_netlink_message_append_sockaddr_in(message, WGPEER_A_ENDPOINT, &peer->endpoint.in);
+ else if (peer->endpoint.sa.sa_family == AF_INET6)
+ r = sd_netlink_message_append_sockaddr_in6(message, WGPEER_A_ENDPOINT, &peer->endpoint.in6);
+ if (r < 0)
+ goto cancel;
+ }
+
+ r = sd_netlink_message_open_container(message, WGPEER_A_ALLOWEDIPS);
+ if (r < 0)
+ goto cancel;
+
+ LIST_FOREACH(ipmasks, mask, start) {
+ r = wireguard_set_ipmask_one(netdev, message, mask, ++j);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ r = sd_netlink_message_close_container(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not add wireguard allowed ip: %m");
+
+ r = sd_netlink_message_close_container(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not add wireguard peer: %m");
+
+ *mask_start = mask; /* Start next cycle from this mask. */
+ return !mask;
+
+cancel:
+ r = sd_netlink_message_cancel_array(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not cancel wireguard peers: %m");
+
+ return 0;
+}
+
+static int wireguard_set_interface(NetDev *netdev) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ WireguardIPmask *mask_start = NULL;
+ WireguardPeer *peer, *peer_start;
+ uint32_t serial;
+ Wireguard *w;
+ int r;
+
+ assert(netdev);
+ w = WIREGUARD(netdev);
+ assert(w);
+
+ for (peer_start = w->peers; peer_start; ) {
+ uint16_t i = 0;
+
+ message = sd_netlink_message_unref(message);
+
+ r = sd_genl_message_new(netdev->manager->genl, SD_GENL_WIREGUARD, WG_CMD_SET_DEVICE, &message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to allocate generic netlink message: %m");
+
+ r = sd_netlink_message_append_string(message, WGDEVICE_A_IFNAME, netdev->ifname);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append wireguard interface name: %m");
+
+ if (peer_start == w->peers) {
+ r = sd_netlink_message_append_data(message, WGDEVICE_A_PRIVATE_KEY, &w->private_key, WG_KEY_LEN);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append wireguard private key: %m");
+
+ r = sd_netlink_message_append_u16(message, WGDEVICE_A_LISTEN_PORT, w->port);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append wireguard port: %m");
+
+ r = sd_netlink_message_append_u32(message, WGDEVICE_A_FWMARK, w->fwmark);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append wireguard fwmark: %m");
+
+ r = sd_netlink_message_append_u32(message, WGDEVICE_A_FLAGS, w->flags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append wireguard flags: %m");
+ }
+
+ r = sd_netlink_message_open_container(message, WGDEVICE_A_PEERS);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append wireguard peer attributes: %m");
+
+ LIST_FOREACH(peers, peer, peer_start) {
+ r = wireguard_set_peer_one(netdev, message, peer, ++i, &mask_start);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+ peer_start = peer; /* Start next cycle from this peer. */
+
+ r = sd_netlink_message_close_container(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not close wireguard container: %m");
+
+ r = sd_netlink_send(netdev->manager->genl, message, &serial);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not set wireguard device: %m");
+ }
+
+ return 0;
+}
+
+static WireguardEndpoint* wireguard_endpoint_free(WireguardEndpoint *e) {
+ if (!e)
+ return NULL;
+ e->host = mfree(e->host);
+ e->port = mfree(e->port);
+ return mfree(e);
+}
+
+static void wireguard_endpoint_destroy_callback(WireguardEndpoint *e) {
+ assert(e);
+ assert(e->netdev);
+
+ netdev_unref(e->netdev);
+ wireguard_endpoint_free(e);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(WireguardEndpoint*, wireguard_endpoint_free);
+
+static int on_resolve_retry(sd_event_source *s, usec_t usec, void *userdata) {
+ NetDev *netdev = userdata;
+ Wireguard *w;
+
+ assert(netdev);
+ w = WIREGUARD(netdev);
+ assert(w);
+
+ if (!netdev_is_managed(netdev))
+ return 0;
+
+ assert(!w->unresolved_endpoints);
+ w->unresolved_endpoints = TAKE_PTR(w->failed_endpoints);
+
+ resolve_endpoints(netdev);
+
+ return 0;
+}
+
+/*
+ * Given the number of retries this function will return will an exponential
+ * increasing time in milliseconds to wait starting at 200ms and capped at 25 seconds.
+ */
+static int exponential_backoff_milliseconds(unsigned n_retries) {
+ return (2 << MAX(n_retries, 7U)) * 100 * USEC_PER_MSEC;
+}
+
+static int wireguard_resolve_handler(sd_resolve_query *q,
+ int ret,
+ const struct addrinfo *ai,
+ WireguardEndpoint *e) {
+ _cleanup_(netdev_unrefp) NetDev *netdev_will_unrefed = NULL;
+ NetDev *netdev;
+ Wireguard *w;
+ int r;
+
+ assert(e);
+ assert(e->netdev);
+
+ netdev = e->netdev;
+ w = WIREGUARD(netdev);
+ assert(w);
+
+ if (!netdev_is_managed(netdev))
+ return 0;
+
+ if (ret != 0) {
+ log_netdev_error(netdev, "Failed to resolve host '%s:%s': %s", e->host, e->port, gai_strerror(ret));
+ LIST_PREPEND(endpoints, w->failed_endpoints, e);
+ (void) sd_resolve_query_set_destroy_callback(q, NULL); /* Avoid freeing endpoint by destroy callback. */
+ netdev_will_unrefed = netdev; /* But netdev needs to be unrefed. */
+ } else if ((ai->ai_family == AF_INET && ai->ai_addrlen == sizeof(struct sockaddr_in)) ||
+ (ai->ai_family == AF_INET6 && ai->ai_addrlen == sizeof(struct sockaddr_in6)))
+ memcpy(&e->peer->endpoint, ai->ai_addr, ai->ai_addrlen);
+ else
+ log_netdev_error(netdev, "Neither IPv4 nor IPv6 address found for peer endpoint: %s:%s", e->host, e->port);
+
+ if (w->unresolved_endpoints) {
+ resolve_endpoints(netdev);
+ return 0;
+ }
+
+ (void) wireguard_set_interface(netdev);
+ if (w->failed_endpoints) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
+
+ w->n_retries++;
+ r = sd_event_add_time(netdev->manager->event,
+ &s,
+ CLOCK_MONOTONIC,
+ now(CLOCK_MONOTONIC) + exponential_backoff_milliseconds(w->n_retries),
+ 0,
+ on_resolve_retry,
+ netdev);
+ if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "Could not arm resolve retry handler: %m");
+ return 0;
+ }
+
+ r = sd_event_source_set_destroy_callback(s, (sd_event_destroy_t) netdev_destroy_callback);
+ if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "Failed to set destroy callback to event source: %m");
+ return 0;
+ }
+
+ (void) sd_event_source_set_floating(s, true);
+ netdev_ref(netdev);
+ }
+
+ return 0;
+}
+
+static void resolve_endpoints(NetDev *netdev) {
+ static const struct addrinfo hints = {
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_DGRAM,
+ .ai_protocol = IPPROTO_UDP
+ };
+ WireguardEndpoint *endpoint;
+ Wireguard *w;
+ int r = 0;
+
+ assert(netdev);
+ w = WIREGUARD(netdev);
+ assert(w);
+
+ LIST_FOREACH(endpoints, endpoint, w->unresolved_endpoints) {
+ r = resolve_getaddrinfo(netdev->manager->resolve,
+ NULL,
+ endpoint->host,
+ endpoint->port,
+ &hints,
+ wireguard_resolve_handler,
+ wireguard_endpoint_destroy_callback,
+ endpoint);
+
+ if (r == -ENOBUFS)
+ break;
+ if (r < 0) {
+ log_netdev_error_errno(netdev, r, "Failed to create resolver: %m");
+ continue;
+ }
+
+ /* Avoid freeing netdev. It will be unrefed by the destroy callback. */
+ netdev_ref(netdev);
+
+ LIST_REMOVE(endpoints, w->unresolved_endpoints, endpoint);
+ }
+}
+
+static int netdev_wireguard_post_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Wireguard *w;
+
+ assert(netdev);
+ w = WIREGUARD(netdev);
+ assert(w);
+
+ (void) wireguard_set_interface(netdev);
+ resolve_endpoints(netdev);
+ return 0;
+}
+
+int config_parse_wireguard_listen_port(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ uint16_t *s = data;
+ uint16_t port = 0;
+ int r;
+
+ assert(rvalue);
+ assert(data);
+
+ if (!streq(rvalue, "auto")) {
+ r = parse_ip_port(rvalue, &port);
+ if (r < 0)
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid port specification, ignoring assignment: %s", rvalue);
+ }
+
+ *s = port;
+
+ return 0;
+}
+
+static int parse_wireguard_key(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ _cleanup_free_ void *key = NULL;
+ size_t len;
+ int r;
+
+ assert(filename);
+ assert(rvalue);
+ assert(userdata);
+
+ r = unbase64mem(rvalue, strlen(rvalue), &key, &len);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Could not parse wireguard key \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ if (len != WG_KEY_LEN) {
+ log_syntax(unit, LOG_ERR, filename, line, EINVAL,
+ "Wireguard key is too short, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ memcpy(userdata, key, WG_KEY_LEN);
+ return true;
+}
+
+int config_parse_wireguard_private_key(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Wireguard *w;
+
+ assert(data);
+
+ w = WIREGUARD(data);
+
+ assert(w);
+
+ return parse_wireguard_key(unit,
+ filename,
+ line,
+ section,
+ section_line,
+ lvalue,
+ ltype,
+ rvalue,
+ data,
+ &w->private_key);
+
+}
+
+int config_parse_wireguard_preshared_key(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Wireguard *w;
+ WireguardPeer *peer;
+
+ assert(data);
+
+ w = WIREGUARD(data);
+
+ assert(w);
+
+ peer = wireguard_peer_new(w, section_line);
+ if (!peer)
+ return log_oom();
+
+ return parse_wireguard_key(unit,
+ filename,
+ line,
+ section,
+ section_line,
+ lvalue,
+ ltype,
+ rvalue,
+ data,
+ peer->preshared_key);
+}
+
+int config_parse_wireguard_public_key(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Wireguard *w;
+ WireguardPeer *peer;
+
+ assert(data);
+
+ w = WIREGUARD(data);
+
+ assert(w);
+
+ peer = wireguard_peer_new(w, section_line);
+ if (!peer)
+ return log_oom();
+
+ return parse_wireguard_key(unit,
+ filename,
+ line,
+ section,
+ section_line,
+ lvalue,
+ ltype,
+ rvalue,
+ data,
+ peer->public_key);
+}
+
+int config_parse_wireguard_allowed_ips(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ union in_addr_union addr;
+ unsigned char prefixlen;
+ int r, family;
+ Wireguard *w;
+ WireguardPeer *peer;
+ WireguardIPmask *ipmask;
+
+ assert(rvalue);
+ assert(data);
+
+ w = WIREGUARD(data);
+
+ peer = wireguard_peer_new(w, section_line);
+ if (!peer)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&rvalue, &word, "," WHITESPACE, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to split allowed ips \"%s\" option: %m", rvalue);
+ break;
+ }
+
+ r = in_addr_prefix_from_string_auto(word, &family, &addr, &prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Network address is invalid, ignoring assignment: %s", word);
+ return 0;
+ }
+
+ ipmask = new(WireguardIPmask, 1);
+ if (!ipmask)
+ return log_oom();
+
+ *ipmask = (WireguardIPmask) {
+ .family = family,
+ .ip.in6 = addr.in6,
+ .cidr = prefixlen,
+ };
+
+ LIST_PREPEND(ipmasks, peer->ipmasks, ipmask);
+ }
+
+ return 0;
+}
+
+int config_parse_wireguard_endpoint(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Wireguard *w;
+ WireguardPeer *peer;
+ size_t len;
+ const char *begin, *end = NULL;
+ _cleanup_free_ char *host = NULL, *port = NULL;
+ _cleanup_(wireguard_endpoint_freep) WireguardEndpoint *endpoint = NULL;
+
+ assert(data);
+ assert(rvalue);
+
+ w = WIREGUARD(data);
+
+ assert(w);
+
+ peer = wireguard_peer_new(w, section_line);
+ if (!peer)
+ return log_oom();
+
+ if (rvalue[0] == '[') {
+ begin = &rvalue[1];
+ end = strchr(rvalue, ']');
+ if (!end) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Unable to find matching brace of endpoint, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+ len = end - begin;
+ ++end;
+ if (*end != ':' || !*(end + 1)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Unable to find port of endpoint: %s", rvalue);
+ return 0;
+ }
+ ++end;
+ } else {
+ begin = rvalue;
+ end = strrchr(rvalue, ':');
+ if (!end || !*(end + 1)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Unable to find port of endpoint: %s", rvalue);
+ return 0;
+ }
+ len = end - begin;
+ ++end;
+ }
+
+ host = strndup(begin, len);
+ if (!host)
+ return log_oom();
+
+ port = strdup(end);
+ if (!port)
+ return log_oom();
+
+ endpoint = new(WireguardEndpoint, 1);
+ if (!endpoint)
+ return log_oom();
+
+ *endpoint = (WireguardEndpoint) {
+ .peer = TAKE_PTR(peer),
+ .host = TAKE_PTR(host),
+ .port = TAKE_PTR(port),
+ .netdev = data,
+ };
+ LIST_PREPEND(endpoints, w->unresolved_endpoints, TAKE_PTR(endpoint));
+
+ return 0;
+}
+
+int config_parse_wireguard_keepalive(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ int r;
+ uint16_t keepalive = 0;
+ Wireguard *w;
+ WireguardPeer *peer;
+
+ assert(rvalue);
+ assert(data);
+
+ w = WIREGUARD(data);
+
+ assert(w);
+
+ peer = wireguard_peer_new(w, section_line);
+ if (!peer)
+ return log_oom();
+
+ if (streq(rvalue, "off"))
+ keepalive = 0;
+ else {
+ r = safe_atou16(rvalue, &keepalive);
+ if (r < 0)
+ log_syntax(unit, LOG_ERR, filename, line, r, "The persistent keepalive interval must be 0-65535. Ignore assignment: %s", rvalue);
+ }
+
+ peer->persistent_keepalive_interval = keepalive;
+ return 0;
+}
+
+static void wireguard_init(NetDev *netdev) {
+ Wireguard *w;
+
+ assert(netdev);
+
+ w = WIREGUARD(netdev);
+
+ assert(w);
+
+ w->flags = WGDEVICE_F_REPLACE_PEERS;
+}
+
+static void wireguard_done(NetDev *netdev) {
+ Wireguard *w;
+ WireguardPeer *peer;
+ WireguardIPmask *mask;
+ WireguardEndpoint *e;
+
+ assert(netdev);
+ w = WIREGUARD(netdev);
+ assert(w);
+
+ while ((peer = w->peers)) {
+ LIST_REMOVE(peers, w->peers, peer);
+ while ((mask = peer->ipmasks)) {
+ LIST_REMOVE(ipmasks, peer->ipmasks, mask);
+ free(mask);
+ }
+ free(peer);
+ }
+
+ while ((e = w->unresolved_endpoints)) {
+ LIST_REMOVE(endpoints, w->unresolved_endpoints, e);
+ wireguard_endpoint_free(e);
+ }
+
+ while ((e = w->failed_endpoints)) {
+ LIST_REMOVE(endpoints, w->failed_endpoints, e);
+ wireguard_endpoint_free(e);
+ }
+}
+
+const NetDevVTable wireguard_vtable = {
+ .object_size = sizeof(Wireguard),
+ .sections = "Match\0NetDev\0WireGuard\0WireGuardPeer\0",
+ .post_create = netdev_wireguard_post_create,
+ .init = wireguard_init,
+ .done = wireguard_done,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+};
diff --git a/src/network/netdev/wireguard.h b/src/network/netdev/wireguard.h
new file mode 100644
index 0000000..143e93c
--- /dev/null
+++ b/src/network/netdev/wireguard.h
@@ -0,0 +1,73 @@
+#pragma once
+
+typedef struct Wireguard Wireguard;
+
+#include "in-addr-util.h"
+#include "netdev.h"
+#include "socket-util.h"
+#include "wireguard-netlink.h"
+
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
+typedef struct WireguardIPmask {
+ uint16_t family;
+ union in_addr_union ip;
+ uint8_t cidr;
+
+ LIST_FIELDS(struct WireguardIPmask, ipmasks);
+} WireguardIPmask;
+
+typedef struct WireguardPeer {
+ uint8_t public_key[WG_KEY_LEN];
+ uint8_t preshared_key[WG_KEY_LEN];
+ uint32_t flags;
+
+ union sockaddr_union endpoint;
+
+ uint16_t persistent_keepalive_interval;
+
+ LIST_HEAD(WireguardIPmask, ipmasks);
+ LIST_FIELDS(struct WireguardPeer, peers);
+} WireguardPeer;
+
+typedef struct WireguardEndpoint {
+ char *host;
+ char *port;
+
+ NetDev *netdev;
+ WireguardPeer *peer;
+
+ LIST_FIELDS(struct WireguardEndpoint, endpoints);
+} WireguardEndpoint;
+
+struct Wireguard {
+ NetDev meta;
+ unsigned last_peer_section;
+
+ uint32_t flags;
+
+ uint8_t private_key[WG_KEY_LEN];
+ uint32_t fwmark;
+
+ uint16_t port;
+
+ LIST_HEAD(WireguardPeer, peers);
+
+ LIST_HEAD(WireguardEndpoint, unresolved_endpoints);
+ LIST_HEAD(WireguardEndpoint, failed_endpoints);
+ unsigned n_retries;
+};
+
+DEFINE_NETDEV_CAST(WIREGUARD, Wireguard);
+extern const NetDevVTable wireguard_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_allowed_ips);
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_endpoint);
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_listen_port);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_public_key);
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_private_key);
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_preshared_key);
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_keepalive);
diff --git a/src/network/networkctl.c b/src/network/networkctl.c
new file mode 100644
index 0000000..b14b81c
--- /dev/null
+++ b/src/network/networkctl.c
@@ -0,0 +1,1183 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <linux/if_addrlabel.h>
+#include <net/if.h>
+#include <stdbool.h>
+
+#include "sd-device.h"
+#include "sd-hwdb.h"
+#include "sd-lldp.h"
+#include "sd-netlink.h"
+#include "sd-network.h"
+
+#include "alloc-util.h"
+#include "arphrd-list.h"
+#include "device-util.h"
+#include "ether-addr-util.h"
+#include "fd-util.h"
+#include "hwdb-util.h"
+#include "local-addresses.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "main-func.h"
+#include "netlink-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "socket-util.h"
+#include "sparse-endian.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "terminal-util.h"
+#include "util.h"
+#include "verbs.h"
+
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static bool arg_all = false;
+
+static char *link_get_type_string(unsigned short iftype, sd_device *d) {
+ const char *t, *devtype;
+ char *p;
+
+ if (d &&
+ sd_device_get_devtype(d, &devtype) >= 0 &&
+ !isempty(devtype))
+ return strdup(devtype);
+
+ t = arphrd_to_name(iftype);
+ if (!t)
+ return NULL;
+
+ p = strdup(t);
+ if (!p)
+ return NULL;
+
+ ascii_strlower(p);
+ return p;
+}
+
+static void operational_state_to_color(const char *state, const char **on, const char **off) {
+ assert(on);
+ assert(off);
+
+ if (streq_ptr(state, "routable")) {
+ *on = ansi_highlight_green();
+ *off = ansi_normal();
+ } else if (streq_ptr(state, "degraded")) {
+ *on = ansi_highlight_yellow();
+ *off = ansi_normal();
+ } else
+ *on = *off = "";
+}
+
+static void setup_state_to_color(const char *state, const char **on, const char **off) {
+ assert(on);
+ assert(off);
+
+ if (streq_ptr(state, "configured")) {
+ *on = ansi_highlight_green();
+ *off = ansi_normal();
+ } else if (streq_ptr(state, "configuring")) {
+ *on = ansi_highlight_yellow();
+ *off = ansi_normal();
+ } else if (STRPTR_IN_SET(state, "failed", "linger")) {
+ *on = ansi_highlight_red();
+ *off = ansi_normal();
+ } else
+ *on = *off = "";
+}
+
+typedef struct LinkInfo {
+ char name[IFNAMSIZ+1];
+ int ifindex;
+ unsigned short iftype;
+ struct ether_addr mac_address;
+ uint32_t mtu;
+
+ bool has_mac_address:1;
+ bool has_mtu:1;
+} LinkInfo;
+
+static int link_info_compare(const LinkInfo *a, const LinkInfo *b) {
+ return CMP(a->ifindex, b->ifindex);
+}
+
+static int decode_link(sd_netlink_message *m, LinkInfo *info) {
+ const char *name;
+ uint16_t type;
+ int r;
+
+ assert(m);
+ assert(info);
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+
+ if (type != RTM_NEWLINK)
+ return 0;
+
+ r = sd_rtnl_message_link_get_ifindex(m, &info->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_string(m, IFLA_IFNAME, &name);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_link_get_type(m, &info->iftype);
+ if (r < 0)
+ return r;
+
+ strscpy(info->name, sizeof info->name, name);
+
+ info->has_mac_address =
+ sd_netlink_message_read_ether_addr(m, IFLA_ADDRESS, &info->mac_address) >= 0 &&
+ memcmp(&info->mac_address, &ETHER_ADDR_NULL, sizeof(struct ether_addr)) != 0;
+
+ info->has_mtu =
+ sd_netlink_message_read_u32(m, IFLA_MTU, &info->mtu) &&
+ info->mtu > 0;
+
+ return 1;
+}
+
+static int acquire_link_info_strv(sd_netlink *rtnl, char **l, LinkInfo **ret) {
+ _cleanup_free_ LinkInfo *links = NULL;
+ char **i;
+ size_t c = 0;
+ int r;
+
+ assert(rtnl);
+ assert(ret);
+
+ links = new(LinkInfo, strv_length(l));
+ if (!links)
+ return log_oom();
+
+ STRV_FOREACH(i, l) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ int ifindex;
+
+ if (parse_ifindex(*i, &ifindex) >= 0)
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_GETLINK, ifindex);
+ else {
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_GETLINK, 0);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_message_append_string(req, IFLA_IFNAME, *i);
+ }
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request link: %m");
+
+ r = decode_link(reply, links + c);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ c++;
+ }
+
+ typesafe_qsort(links, c, link_info_compare);
+
+ *ret = TAKE_PTR(links);
+
+ return (int) c;
+}
+
+static int acquire_link_info_all(sd_netlink *rtnl, LinkInfo **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ _cleanup_free_ LinkInfo *links = NULL;
+ size_t allocated = 0, c = 0;
+ sd_netlink_message *i;
+ int r;
+
+ assert(rtnl);
+ assert(ret);
+
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_GETLINK, 0);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate links: %m");
+
+ for (i = reply; i; i = sd_netlink_message_next(i)) {
+ if (!GREEDY_REALLOC(links, allocated, c+1))
+ return -ENOMEM;
+
+ r = decode_link(i, links + c);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ c++;
+ }
+
+ typesafe_qsort(links, c, link_info_compare);
+
+ *ret = TAKE_PTR(links);
+
+ return (int) c;
+}
+
+static int list_links(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_free_ LinkInfo *links = NULL;
+ int c, i, r;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ if (argc > 1)
+ c = acquire_link_info_strv(rtnl, argv + 1, &links);
+ else
+ c = acquire_link_info_all(rtnl, &links);
+ if (c < 0)
+ return c;
+
+ (void) pager_open(arg_pager_flags);
+
+ if (arg_legend)
+ printf("%3s %-16s %-18s %-11s %-10s\n",
+ "IDX",
+ "LINK",
+ "TYPE",
+ "OPERATIONAL",
+ "SETUP");
+
+ for (i = 0; i < c; i++) {
+ _cleanup_free_ char *setup_state = NULL, *operational_state = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ const char *on_color_operational, *off_color_operational,
+ *on_color_setup, *off_color_setup;
+ char devid[2 + DECIMAL_STR_MAX(int)];
+ _cleanup_free_ char *t = NULL;
+
+ (void) sd_network_link_get_operational_state(links[i].ifindex, &operational_state);
+ operational_state_to_color(operational_state, &on_color_operational, &off_color_operational);
+
+ r = sd_network_link_get_setup_state(links[i].ifindex, &setup_state);
+ if (r == -ENODATA) /* If there's no info available about this iface, it's unmanaged by networkd */
+ setup_state = strdup("unmanaged");
+ setup_state_to_color(setup_state, &on_color_setup, &off_color_setup);
+
+ xsprintf(devid, "n%i", links[i].ifindex);
+ (void) sd_device_new_from_device_id(&d, devid);
+
+ t = link_get_type_string(links[i].iftype, d);
+
+ printf("%3i %-16s %-18s %s%-11s%s %s%-10s%s\n",
+ links[i].ifindex, links[i].name, strna(t),
+ on_color_operational, strna(operational_state), off_color_operational,
+ on_color_setup, strna(setup_state), off_color_setup);
+ }
+
+ if (arg_legend)
+ printf("\n%i links listed.\n", c);
+
+ return 0;
+}
+
+/* IEEE Organizationally Unique Identifier vendor string */
+static int ieee_oui(sd_hwdb *hwdb, const struct ether_addr *mac, char **ret) {
+ const char *description;
+ char modalias[STRLEN("OUI:XXYYXXYYXXYY") + 1], *desc;
+ int r;
+
+ assert(ret);
+
+ if (!hwdb)
+ return -EINVAL;
+
+ if (!mac)
+ return -EINVAL;
+
+ /* skip commonly misused 00:00:00 (Xerox) prefix */
+ if (memcmp(mac, "\0\0\0", 3) == 0)
+ return -EINVAL;
+
+ xsprintf(modalias, "OUI:" ETHER_ADDR_FORMAT_STR,
+ ETHER_ADDR_FORMAT_VAL(*mac));
+
+ r = sd_hwdb_get(hwdb, modalias, "ID_OUI_FROM_DATABASE", &description);
+ if (r < 0)
+ return r;
+
+ desc = strdup(description);
+ if (!desc)
+ return -ENOMEM;
+
+ *ret = desc;
+
+ return 0;
+}
+
+static int get_gateway_description(
+ sd_netlink *rtnl,
+ sd_hwdb *hwdb,
+ int ifindex,
+ int family,
+ union in_addr_union *gateway,
+ char **gateway_description) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *m;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex >= 0);
+ assert(IN_SET(family, AF_INET, AF_INET6));
+ assert(gateway);
+ assert(gateway_description);
+
+ r = sd_rtnl_message_new_neigh(rtnl, &req, RTM_GETNEIGH, ifindex, family);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (m = reply; m; m = sd_netlink_message_next(m)) {
+ union in_addr_union gw = {};
+ struct ether_addr mac = {};
+ uint16_t type;
+ int ifi, fam;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0) {
+ log_error_errno(r, "got error: %m");
+ continue;
+ }
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0) {
+ log_error_errno(r, "could not get type: %m");
+ continue;
+ }
+
+ if (type != RTM_NEWNEIGH) {
+ log_error("type is not RTM_NEWNEIGH");
+ continue;
+ }
+
+ r = sd_rtnl_message_neigh_get_family(m, &fam);
+ if (r < 0) {
+ log_error_errno(r, "could not get family: %m");
+ continue;
+ }
+
+ if (fam != family) {
+ log_error("family is not correct");
+ continue;
+ }
+
+ r = sd_rtnl_message_neigh_get_ifindex(m, &ifi);
+ if (r < 0) {
+ log_error_errno(r, "could not get ifindex: %m");
+ continue;
+ }
+
+ if (ifindex > 0 && ifi != ifindex)
+ continue;
+
+ switch (fam) {
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(m, NDA_DST, &gw.in);
+ if (r < 0)
+ continue;
+
+ break;
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(m, NDA_DST, &gw.in6);
+ if (r < 0)
+ continue;
+
+ break;
+ default:
+ continue;
+ }
+
+ if (!in_addr_equal(fam, &gw, gateway))
+ continue;
+
+ r = sd_netlink_message_read_ether_addr(m, NDA_LLADDR, &mac);
+ if (r < 0)
+ continue;
+
+ r = ieee_oui(hwdb, &mac, gateway_description);
+ if (r < 0)
+ continue;
+
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+static int dump_gateways(
+ sd_netlink *rtnl,
+ sd_hwdb *hwdb,
+ const char *prefix,
+ int ifindex) {
+ _cleanup_free_ struct local_address *local = NULL;
+ int r, n, i;
+
+ assert(rtnl);
+ assert(prefix);
+
+ n = local_gateways(rtnl, ifindex, AF_UNSPEC, &local);
+ if (n < 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *gateway = NULL, *description = NULL;
+
+ r = in_addr_to_string(local[i].family, &local[i].address, &gateway);
+ if (r < 0)
+ return r;
+
+ r = get_gateway_description(rtnl, hwdb, local[i].ifindex, local[i].family, &local[i].address, &description);
+ if (r < 0)
+ log_debug_errno(r, "Could not get description of gateway: %m");
+
+ printf("%*s%s",
+ (int) strlen(prefix),
+ i == 0 ? prefix : "",
+ gateway);
+
+ if (description)
+ printf(" (%s)", description);
+
+ /* Show interface name for the entry if we show
+ * entries for all interfaces */
+ if (ifindex <= 0) {
+ char name[IF_NAMESIZE+1];
+
+ if (if_indextoname(local[i].ifindex, name)) {
+ fputs(" on ", stdout);
+ fputs(name, stdout);
+ } else
+ printf(" on %%%i", local[i].ifindex);
+ }
+
+ fputc('\n', stdout);
+ }
+
+ return 0;
+}
+
+static int dump_addresses(
+ sd_netlink *rtnl,
+ const char *prefix,
+ int ifindex) {
+
+ _cleanup_free_ struct local_address *local = NULL;
+ int r, n, i;
+
+ assert(rtnl);
+ assert(prefix);
+
+ n = local_addresses(rtnl, ifindex, AF_UNSPEC, &local);
+ if (n < 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *pretty = NULL;
+
+ r = in_addr_to_string(local[i].family, &local[i].address, &pretty);
+ if (r < 0)
+ return r;
+
+ printf("%*s%s",
+ (int) strlen(prefix),
+ i == 0 ? prefix : "",
+ pretty);
+
+ if (ifindex <= 0) {
+ char name[IF_NAMESIZE+1];
+
+ if (if_indextoname(local[i].ifindex, name)) {
+ fputs(" on ", stdout);
+ fputs(name, stdout);
+ } else
+ printf(" on %%%i", local[i].ifindex);
+ }
+
+ fputc('\n', stdout);
+ }
+
+ return 0;
+}
+
+static int dump_address_labels(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *m;
+ int r;
+
+ assert(rtnl);
+
+ r = sd_rtnl_message_new_addrlabel(rtnl, &req, RTM_GETADDRLABEL, 0, AF_INET6);
+ if (r < 0)
+ return log_error_errno(r, "Could not allocate RTM_GETADDRLABEL message: %m");
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ printf("%10s/%s %30s\n", "Prefix", "Prefixlen", "Label");
+
+ for (m = reply; m; m = sd_netlink_message_next(m)) {
+ _cleanup_free_ char *pretty = NULL;
+ union in_addr_union prefix = {};
+ uint8_t prefixlen;
+ uint32_t label;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0) {
+ log_error_errno(r, "got error: %m");
+ continue;
+ }
+
+ r = sd_netlink_message_read_u32(m, IFAL_LABEL, &label);
+ if (r < 0 && r != -ENODATA) {
+ log_error_errno(r, "Could not read IFAL_LABEL, ignoring: %m");
+ continue;
+ }
+
+ r = sd_netlink_message_read_in6_addr(m, IFAL_ADDRESS, &prefix.in6);
+ if (r < 0)
+ continue;
+
+ r = in_addr_to_string(AF_INET6, &prefix, &pretty);
+ if (r < 0)
+ continue;
+
+ r = sd_rtnl_message_addrlabel_get_prefixlen(m, &prefixlen);
+ if (r < 0)
+ continue;
+
+ printf("%10s/%-5u %30u\n", pretty, prefixlen, label);
+ }
+
+ return 0;
+}
+
+static int list_address_labels(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ int r;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ dump_address_labels(rtnl);
+
+ return 0;
+}
+
+static int open_lldp_neighbors(int ifindex, FILE **ret) {
+ _cleanup_free_ char *p = NULL;
+ FILE *f;
+
+ if (asprintf(&p, "/run/systemd/netif/lldp/%i", ifindex) < 0)
+ return -ENOMEM;
+
+ f = fopen(p, "re");
+ if (!f)
+ return -errno;
+
+ *ret = f;
+ return 0;
+}
+
+static int next_lldp_neighbor(FILE *f, sd_lldp_neighbor **ret) {
+ _cleanup_free_ void *raw = NULL;
+ size_t l;
+ le64_t u;
+ int r;
+
+ assert(f);
+ assert(ret);
+
+ l = fread(&u, 1, sizeof(u), f);
+ if (l == 0 && feof(f))
+ return 0;
+ if (l != sizeof(u))
+ return -EBADMSG;
+
+ /* each LLDP packet is at most MTU size, but let's allow up to 4KiB just in case */
+ if (le64toh(u) >= 4096)
+ return -EBADMSG;
+
+ raw = new(uint8_t, le64toh(u));
+ if (!raw)
+ return -ENOMEM;
+
+ if (fread(raw, 1, le64toh(u), f) != le64toh(u))
+ return -EBADMSG;
+
+ r = sd_lldp_neighbor_from_raw(ret, raw, le64toh(u));
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int dump_lldp_neighbors(const char *prefix, int ifindex) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r, c = 0;
+
+ assert(prefix);
+ assert(ifindex > 0);
+
+ r = open_lldp_neighbors(ifindex, &f);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *system_name = NULL, *port_id = NULL, *port_description = NULL;
+ _cleanup_(sd_lldp_neighbor_unrefp) sd_lldp_neighbor *n = NULL;
+
+ r = next_lldp_neighbor(f, &n);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ printf("%*s",
+ (int) strlen(prefix),
+ c == 0 ? prefix : "");
+
+ (void) sd_lldp_neighbor_get_system_name(n, &system_name);
+ (void) sd_lldp_neighbor_get_port_id_as_string(n, &port_id);
+ (void) sd_lldp_neighbor_get_port_description(n, &port_description);
+
+ printf("%s on port %s", strna(system_name), strna(port_id));
+
+ if (!isempty(port_description))
+ printf(" (%s)", port_description);
+
+ putchar('\n');
+
+ c++;
+ }
+
+ return c;
+}
+
+static void dump_ifindexes(const char *prefix, const int *ifindexes) {
+ unsigned c;
+
+ assert(prefix);
+
+ if (!ifindexes || ifindexes[0] <= 0)
+ return;
+
+ for (c = 0; ifindexes[c] > 0; c++) {
+ char name[IF_NAMESIZE+1];
+
+ printf("%*s",
+ (int) strlen(prefix),
+ c == 0 ? prefix : "");
+
+ if (if_indextoname(ifindexes[c], name))
+ fputs(name, stdout);
+ else
+ printf("%i", ifindexes[c]);
+
+ fputc('\n', stdout);
+ }
+}
+
+static void dump_list(const char *prefix, char **l) {
+ char **i;
+
+ if (strv_isempty(l))
+ return;
+
+ STRV_FOREACH(i, l) {
+ printf("%*s%s\n",
+ (int) strlen(prefix),
+ i == l ? prefix : "",
+ *i);
+ }
+}
+
+static int link_status_one(
+ sd_netlink *rtnl,
+ sd_hwdb *hwdb,
+ const LinkInfo *info) {
+
+ _cleanup_strv_free_ char **dns = NULL, **ntp = NULL, **search_domains = NULL, **route_domains = NULL;
+ _cleanup_free_ char *setup_state = NULL, *operational_state = NULL, *tz = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ char devid[2 + DECIMAL_STR_MAX(int)];
+ _cleanup_free_ char *t = NULL, *network = NULL;
+ const char *driver = NULL, *path = NULL, *vendor = NULL, *model = NULL, *link = NULL;
+ const char *on_color_operational, *off_color_operational,
+ *on_color_setup, *off_color_setup;
+ _cleanup_free_ int *carrier_bound_to = NULL, *carrier_bound_by = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(info);
+
+ (void) sd_network_link_get_operational_state(info->ifindex, &operational_state);
+ operational_state_to_color(operational_state, &on_color_operational, &off_color_operational);
+
+ r = sd_network_link_get_setup_state(info->ifindex, &setup_state);
+ if (r == -ENODATA) /* If there's no info available about this iface, it's unmanaged by networkd */
+ setup_state = strdup("unmanaged");
+ setup_state_to_color(setup_state, &on_color_setup, &off_color_setup);
+
+ (void) sd_network_link_get_dns(info->ifindex, &dns);
+ (void) sd_network_link_get_search_domains(info->ifindex, &search_domains);
+ (void) sd_network_link_get_route_domains(info->ifindex, &route_domains);
+ (void) sd_network_link_get_ntp(info->ifindex, &ntp);
+
+ xsprintf(devid, "n%i", info->ifindex);
+
+ (void) sd_device_new_from_device_id(&d, devid);
+
+ if (d) {
+ (void) sd_device_get_property_value(d, "ID_NET_LINK_FILE", &link);
+ (void) sd_device_get_property_value(d, "ID_NET_DRIVER", &driver);
+ (void) sd_device_get_property_value(d, "ID_PATH", &path);
+
+ if (sd_device_get_property_value(d, "ID_VENDOR_FROM_DATABASE", &vendor) < 0)
+ (void) sd_device_get_property_value(d, "ID_VENDOR", &vendor);
+
+ if (sd_device_get_property_value(d, "ID_MODEL_FROM_DATABASE", &model) < 0)
+ (void) sd_device_get_property_value(d, "ID_MODEL", &model);
+ }
+
+ t = link_get_type_string(info->iftype, d);
+
+ (void) sd_network_link_get_network_file(info->ifindex, &network);
+
+ (void) sd_network_link_get_carrier_bound_to(info->ifindex, &carrier_bound_to);
+ (void) sd_network_link_get_carrier_bound_by(info->ifindex, &carrier_bound_by);
+
+ printf("%s%s%s %i: %s\n", on_color_operational, special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE), off_color_operational, info->ifindex, info->name);
+
+ printf(" Link File: %s\n"
+ " Network File: %s\n"
+ " Type: %s\n"
+ " State: %s%s%s (%s%s%s)\n",
+ strna(link),
+ strna(network),
+ strna(t),
+ on_color_operational, strna(operational_state), off_color_operational,
+ on_color_setup, strna(setup_state), off_color_setup);
+
+ if (path)
+ printf(" Path: %s\n", path);
+ if (driver)
+ printf(" Driver: %s\n", driver);
+ if (vendor)
+ printf(" Vendor: %s\n", vendor);
+ if (model)
+ printf(" Model: %s\n", model);
+
+ if (info->has_mac_address) {
+ _cleanup_free_ char *description = NULL;
+ char ea[ETHER_ADDR_TO_STRING_MAX];
+
+ (void) ieee_oui(hwdb, &info->mac_address, &description);
+
+ if (description)
+ printf(" HW Address: %s (%s)\n", ether_addr_to_string(&info->mac_address, ea), description);
+ else
+ printf(" HW Address: %s\n", ether_addr_to_string(&info->mac_address, ea));
+ }
+
+ if (info->has_mtu)
+ printf(" MTU: %" PRIu32 "\n", info->mtu);
+
+ (void) dump_addresses(rtnl, " Address: ", info->ifindex);
+ (void) dump_gateways(rtnl, hwdb, " Gateway: ", info->ifindex);
+
+ dump_list(" DNS: ", dns);
+ dump_list(" Search Domains: ", search_domains);
+ dump_list(" Route Domains: ", route_domains);
+
+ dump_list(" NTP: ", ntp);
+
+ dump_ifindexes("Carrier Bound To: ", carrier_bound_to);
+ dump_ifindexes("Carrier Bound By: ", carrier_bound_by);
+
+ (void) sd_network_link_get_timezone(info->ifindex, &tz);
+ if (tz)
+ printf(" Time Zone: %s\n", tz);
+
+ (void) dump_lldp_neighbors(" Connected To: ", info->ifindex);
+
+ return 0;
+}
+
+static int system_status(sd_netlink *rtnl, sd_hwdb *hwdb) {
+ _cleanup_free_ char *operational_state = NULL;
+ _cleanup_strv_free_ char **dns = NULL, **ntp = NULL, **search_domains = NULL, **route_domains = NULL;
+ const char *on_color_operational, *off_color_operational;
+
+ assert(rtnl);
+
+ (void) sd_network_get_operational_state(&operational_state);
+ operational_state_to_color(operational_state, &on_color_operational, &off_color_operational);
+
+ printf("%s%s%s State: %s%s%s\n",
+ on_color_operational, special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE), off_color_operational,
+ on_color_operational, strna(operational_state), off_color_operational);
+
+ (void) dump_addresses(rtnl, " Address: ", 0);
+ (void) dump_gateways(rtnl, hwdb, " Gateway: ", 0);
+
+ (void) sd_network_get_dns(&dns);
+ dump_list(" DNS: ", dns);
+
+ (void) sd_network_get_search_domains(&search_domains);
+ dump_list("Search Domains: ", search_domains);
+
+ (void) sd_network_get_route_domains(&route_domains);
+ dump_list(" Route Domains: ", route_domains);
+
+ (void) sd_network_get_ntp(&ntp);
+ dump_list(" NTP: ", ntp);
+
+ return 0;
+}
+
+static int link_status(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL;
+ _cleanup_free_ LinkInfo *links = NULL;
+ int r, c, i;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ r = sd_hwdb_new(&hwdb);
+ if (r < 0)
+ log_debug_errno(r, "Failed to open hardware database: %m");
+
+ if (arg_all)
+ c = acquire_link_info_all(rtnl, &links);
+ else if (argc <= 1)
+ return system_status(rtnl, hwdb);
+ else
+ c = acquire_link_info_strv(rtnl, argv + 1, &links);
+ if (c < 0)
+ return c;
+
+ for (i = 0; i < c; i++) {
+ if (i > 0)
+ fputc('\n', stdout);
+
+ link_status_one(rtnl, hwdb, links + i);
+ }
+
+ return 0;
+}
+
+static char *lldp_capabilities_to_string(uint16_t x) {
+ static const char characters[] = {
+ 'o', 'p', 'b', 'w', 'r', 't', 'd', 'a', 'c', 's', 'm',
+ };
+ char *ret;
+ unsigned i;
+
+ ret = new(char, ELEMENTSOF(characters) + 1);
+ if (!ret)
+ return NULL;
+
+ for (i = 0; i < ELEMENTSOF(characters); i++)
+ ret[i] = (x & (1U << i)) ? characters[i] : '.';
+
+ ret[i] = 0;
+ return ret;
+}
+
+static void lldp_capabilities_legend(uint16_t x) {
+ unsigned w, i, cols = columns();
+ static const char* const table[] = {
+ "o - Other",
+ "p - Repeater",
+ "b - Bridge",
+ "w - WLAN Access Point",
+ "r - Router",
+ "t - Telephone",
+ "d - DOCSIS cable device",
+ "a - Station",
+ "c - Customer VLAN",
+ "s - Service VLAN",
+ "m - Two-port MAC Relay (TPMR)",
+ };
+
+ if (x == 0)
+ return;
+
+ printf("\nCapability Flags:\n");
+ for (w = 0, i = 0; i < ELEMENTSOF(table); i++)
+ if (x & (1U << i) || arg_all) {
+ bool newline;
+
+ newline = w + strlen(table[i]) + (w == 0 ? 0 : 2) > cols;
+ if (newline)
+ w = 0;
+ w += printf("%s%s%s", newline ? "\n" : "", w == 0 ? "" : "; ", table[i]);
+ }
+ puts("");
+}
+
+static int link_lldp_status(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_free_ LinkInfo *links = NULL;
+ int i, r, c, m = 0;
+ uint16_t all = 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ if (argc > 1)
+ c = acquire_link_info_strv(rtnl, argv + 1, &links);
+ else
+ c = acquire_link_info_all(rtnl, &links);
+ if (c < 0)
+ return c;
+
+ (void) pager_open(arg_pager_flags);
+
+ if (arg_legend)
+ printf("%-16s %-17s %-16s %-11s %-17s %-16s\n",
+ "LINK",
+ "CHASSIS ID",
+ "SYSTEM NAME",
+ "CAPS",
+ "PORT ID",
+ "PORT DESCRIPTION");
+
+ for (i = 0; i < c; i++) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ r = open_lldp_neighbors(links[i].ifindex, &f);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0) {
+ log_warning_errno(r, "Failed to open LLDP data for %i, ignoring: %m", links[i].ifindex);
+ continue;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *cid = NULL, *pid = NULL, *sname = NULL, *pdesc = NULL;
+ const char *chassis_id = NULL, *port_id = NULL, *system_name = NULL, *port_description = NULL, *capabilities = NULL;
+ _cleanup_(sd_lldp_neighbor_unrefp) sd_lldp_neighbor *n = NULL;
+ uint16_t cc;
+
+ r = next_lldp_neighbor(f, &n);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read neighbor data: %m");
+ break;
+ }
+ if (r == 0)
+ break;
+
+ (void) sd_lldp_neighbor_get_chassis_id_as_string(n, &chassis_id);
+ (void) sd_lldp_neighbor_get_port_id_as_string(n, &port_id);
+ (void) sd_lldp_neighbor_get_system_name(n, &system_name);
+ (void) sd_lldp_neighbor_get_port_description(n, &port_description);
+
+ if (chassis_id) {
+ cid = ellipsize(chassis_id, 17, 100);
+ if (cid)
+ chassis_id = cid;
+ }
+
+ if (port_id) {
+ pid = ellipsize(port_id, 17, 100);
+ if (pid)
+ port_id = pid;
+ }
+
+ if (system_name) {
+ sname = ellipsize(system_name, 16, 100);
+ if (sname)
+ system_name = sname;
+ }
+
+ if (port_description) {
+ pdesc = ellipsize(port_description, 16, 100);
+ if (pdesc)
+ port_description = pdesc;
+ }
+
+ if (sd_lldp_neighbor_get_enabled_capabilities(n, &cc) >= 0) {
+ capabilities = lldp_capabilities_to_string(cc);
+ all |= cc;
+ }
+
+ printf("%-16s %-17s %-16s %-11s %-17s %-16s\n",
+ links[i].name,
+ strna(chassis_id),
+ strna(system_name),
+ strna(capabilities),
+ strna(port_id),
+ strna(port_description));
+
+ m++;
+ }
+ }
+
+ if (arg_legend) {
+ lldp_capabilities_legend(all);
+ printf("\n%i neighbors listed.\n", m);
+ }
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("networkctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Query and control the networking subsystem.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " -a --all Show status for all links\n\n"
+ "Commands:\n"
+ " list [LINK...] List links\n"
+ " status [LINK...] Show link status\n"
+ " lldp [LINK...] Show LLDP neighbors\n"
+ " label Show current address label entries in the kernel\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "all", no_argument, NULL, 'a' },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "ha", options, NULL)) >= 0) {
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+ }
+
+ return 1;
+}
+
+static int networkctl_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "list", VERB_ANY, VERB_ANY, VERB_DEFAULT, list_links },
+ { "status", VERB_ANY, VERB_ANY, 0, link_status },
+ { "lldp", VERB_ANY, VERB_ANY, 0, link_lldp_status },
+ { "label", VERB_ANY, VERB_ANY, 0, list_address_labels },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static void warn_networkd_missing(void) {
+
+ if (access("/run/systemd/netif/state", F_OK) >= 0)
+ return;
+
+ fprintf(stderr, "WARNING: systemd-networkd is not running, output will be incomplete.\n\n");
+}
+
+static int run(int argc, char* argv[]) {
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ warn_networkd_missing();
+
+ return networkctl_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/network/networkd-address-label.c b/src/network/networkd-address-label.c
new file mode 100644
index 0000000..94a12f8
--- /dev/null
+++ b/src/network/networkd-address-label.c
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+#include <linux/if_addrlabel.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "networkd-address-label.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "socket-util.h"
+
+void address_label_free(AddressLabel *label) {
+ if (!label)
+ return;
+
+ if (label->network) {
+ LIST_REMOVE(labels, label->network->address_labels, label);
+ assert(label->network->n_address_labels > 0);
+ label->network->n_address_labels--;
+
+ if (label->section) {
+ hashmap_remove(label->network->address_labels_by_section, label->section);
+ network_config_section_free(label->section);
+ }
+ }
+
+ free(label);
+}
+
+static int address_label_new_static(Network *network, const char *filename, unsigned section_line, AddressLabel **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(address_label_freep) AddressLabel *label = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(!!filename == (section_line > 0));
+
+ if (filename) {
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ label = hashmap_get(network->address_labels_by_section, n);
+ if (label) {
+ *ret = TAKE_PTR(label);
+
+ return 0;
+ }
+ }
+
+ label = new(AddressLabel, 1);
+ if (!label)
+ return -ENOMEM;
+
+ *label = (AddressLabel) {
+ .network = network,
+ };
+
+ LIST_APPEND(labels, network->address_labels, label);
+ network->n_address_labels++;
+
+ if (filename) {
+ label->section = TAKE_PTR(n);
+
+ r = hashmap_ensure_allocated(&network->address_labels_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->address_labels_by_section, label->section, label);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(label);
+
+ return 0;
+}
+
+static int address_label_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(rtnl);
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+ assert(link->address_label_messages > 0);
+
+ link->address_label_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_warning_errno(link, r, "could not set address label: %m");
+ else if (r >= 0)
+ manager_rtnl_process_address(rtnl, m, link->manager);
+
+ if (link->address_label_messages == 0)
+ log_link_debug(link, "Addresses label set");
+
+ return 1;
+}
+
+int address_label_configure(
+ AddressLabel *label,
+ Link *link,
+ link_netlink_message_handler_t callback,
+ bool update) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(label);
+ assert(link);
+ assert(link->ifindex > 0);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ r = sd_rtnl_message_new_addrlabel(link->manager->rtnl, &req, RTM_NEWADDRLABEL,
+ link->ifindex, AF_INET6);
+ if (r < 0)
+ return log_error_errno(r, "Could not allocate RTM_NEWADDR message: %m");
+
+ r = sd_rtnl_message_addrlabel_set_prefixlen(req, label->prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Could not set prefixlen: %m");
+
+ r = sd_netlink_message_append_u32(req, IFAL_LABEL, label->label);
+ if (r < 0)
+ return log_error_errno(r, "Could not append IFAL_LABEL attribute: %m");
+
+ r = sd_netlink_message_append_in6_addr(req, IFA_ADDRESS, &label->in_addr.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append IFA_ADDRESS attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req,
+ callback ?: address_label_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_error_errno(r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+int config_parse_address_label_prefix(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(address_label_freep) AddressLabel *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_label_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = in_addr_prefix_from_string(rvalue, AF_INET6, &n->in_addr, &n->prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Address label is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_address_label(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(address_label_freep) AddressLabel *n = NULL;
+ Network *network = userdata;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_label_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse address label, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (k == 0xffffffffUL) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Address label is invalid, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n->label = k;
+ n = NULL;
+
+ return 0;
+}
diff --git a/src/network/networkd-address-label.h b/src/network/networkd-address-label.h
new file mode 100644
index 0000000..6922cb0
--- /dev/null
+++ b/src/network/networkd-address-label.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "conf-parser.h"
+#include "in-addr-util.h"
+
+typedef struct AddressLabel AddressLabel;
+
+#include "networkd-link.h"
+#include "networkd-network.h"
+
+typedef struct Network Network;
+typedef struct Link Link;
+typedef struct NetworkConfigSection NetworkConfigSection;
+
+struct AddressLabel {
+ Network *network;
+ NetworkConfigSection *section;
+
+ unsigned char prefixlen;
+ uint32_t label;
+
+ union in_addr_union in_addr;
+
+ LIST_FIELDS(AddressLabel, labels);
+};
+
+void address_label_free(AddressLabel *label);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(AddressLabel*, address_label_free);
+
+int address_label_configure(AddressLabel *address, Link *link, link_netlink_message_handler_t callback, bool update);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_address_label);
+CONFIG_PARSER_PROTOTYPE(config_parse_address_label_prefix);
diff --git a/src/network/networkd-address-pool.c b/src/network/networkd-address-pool.c
new file mode 100644
index 0000000..1650515
--- /dev/null
+++ b/src/network/networkd-address-pool.c
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "networkd-address-pool.h"
+#include "networkd-manager.h"
+#include "set.h"
+#include "string-util.h"
+
+int address_pool_new(
+ Manager *m,
+ AddressPool **ret,
+ int family,
+ const union in_addr_union *u,
+ unsigned prefixlen) {
+
+ AddressPool *p;
+
+ assert(m);
+ assert(ret);
+ assert(u);
+
+ p = new(AddressPool, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = (AddressPool) {
+ .manager = m,
+ .family = family,
+ .prefixlen = prefixlen,
+ .in_addr = *u,
+ };
+
+ LIST_PREPEND(address_pools, m->address_pools, p);
+
+ *ret = p;
+ return 0;
+}
+
+int address_pool_new_from_string(
+ Manager *m,
+ AddressPool **ret,
+ int family,
+ const char *p,
+ unsigned prefixlen) {
+
+ union in_addr_union u;
+ int r;
+
+ assert(m);
+ assert(ret);
+ assert(p);
+
+ r = in_addr_from_string(family, p, &u);
+ if (r < 0)
+ return r;
+
+ return address_pool_new(m, ret, family, &u, prefixlen);
+}
+
+void address_pool_free(AddressPool *p) {
+
+ if (!p)
+ return;
+
+ if (p->manager)
+ LIST_REMOVE(address_pools, p->manager->address_pools, p);
+
+ free(p);
+}
+
+static bool address_pool_prefix_is_taken(
+ AddressPool *p,
+ const union in_addr_union *u,
+ unsigned prefixlen) {
+
+ Iterator i;
+ Link *l;
+ Network *n;
+
+ assert(p);
+ assert(u);
+
+ HASHMAP_FOREACH(l, p->manager->links, i) {
+ Address *a;
+ Iterator j;
+
+ /* Don't clash with assigned addresses */
+ SET_FOREACH(a, l->addresses, j) {
+ if (a->family != p->family)
+ continue;
+
+ if (in_addr_prefix_intersect(p->family, u, prefixlen, &a->in_addr, a->prefixlen))
+ return true;
+ }
+
+ /* Don't clash with addresses already pulled from the pool, but not assigned yet */
+ LIST_FOREACH(addresses, a, l->pool_addresses) {
+ if (a->family != p->family)
+ continue;
+
+ if (in_addr_prefix_intersect(p->family, u, prefixlen, &a->in_addr, a->prefixlen))
+ return true;
+ }
+ }
+
+ /* And don't clash with configured but un-assigned addresses either */
+ LIST_FOREACH(networks, n, p->manager->networks) {
+ Address *a;
+
+ LIST_FOREACH(addresses, a, n->static_addresses) {
+ if (a->family != p->family)
+ continue;
+
+ if (in_addr_prefix_intersect(p->family, u, prefixlen, &a->in_addr, a->prefixlen))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+int address_pool_acquire(AddressPool *p, unsigned prefixlen, union in_addr_union *found) {
+ union in_addr_union u;
+
+ assert(p);
+ assert(prefixlen > 0);
+ assert(found);
+
+ if (p->prefixlen > prefixlen)
+ return 0;
+
+ u = p->in_addr;
+ for (;;) {
+ if (!address_pool_prefix_is_taken(p, &u, prefixlen)) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ r = in_addr_to_string(p->family, &u, &s);
+ if (r < 0)
+ return r;
+
+ log_debug("Found range %s/%u", strna(s), prefixlen);
+
+ *found = u;
+ return 1;
+ }
+
+ if (!in_addr_prefix_next(p->family, &u, prefixlen))
+ return 0;
+
+ if (!in_addr_prefix_intersect(p->family, &p->in_addr, p->prefixlen, &u, prefixlen))
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/src/network/networkd-address-pool.h b/src/network/networkd-address-pool.h
new file mode 100644
index 0000000..bd479a5
--- /dev/null
+++ b/src/network/networkd-address-pool.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct AddressPool AddressPool;
+
+#include "in-addr-util.h"
+#include "list.h"
+
+typedef struct Manager Manager;
+
+struct AddressPool {
+ Manager *manager;
+
+ int family;
+ unsigned prefixlen;
+
+ union in_addr_union in_addr;
+
+ LIST_FIELDS(AddressPool, address_pools);
+};
+
+int address_pool_new(Manager *m, AddressPool **ret, int family, const union in_addr_union *u, unsigned prefixlen);
+int address_pool_new_from_string(Manager *m, AddressPool **ret, int family, const char *p, unsigned prefixlen);
+void address_pool_free(AddressPool *p);
+
+int address_pool_acquire(AddressPool *p, unsigned prefixlen, union in_addr_union *found);
diff --git a/src/network/networkd-address.c b/src/network/networkd-address.c
new file mode 100644
index 0000000..3cdbd9e
--- /dev/null
+++ b/src/network/networkd-address.c
@@ -0,0 +1,969 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "firewall-util.h"
+#include "missing_network.h"
+#include "netlink-util.h"
+#include "networkd-address.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "set.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+#include "util.h"
+
+#define ADDRESSES_PER_LINK_MAX 2048U
+#define STATIC_ADDRESSES_PER_NETWORK_MAX 1024U
+
+int address_new(Address **ret) {
+ _cleanup_(address_freep) Address *address = NULL;
+
+ address = new(Address, 1);
+ if (!address)
+ return -ENOMEM;
+
+ *address = (Address) {
+ .family = AF_UNSPEC,
+ .scope = RT_SCOPE_UNIVERSE,
+ .cinfo.ifa_prefered = CACHE_INFO_INFINITY_LIFE_TIME,
+ .cinfo.ifa_valid = CACHE_INFO_INFINITY_LIFE_TIME,
+ };
+
+ *ret = TAKE_PTR(address);
+
+ return 0;
+}
+
+int address_new_static(Network *network, const char *filename, unsigned section_line, Address **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(address_freep) Address *address = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(!!filename == (section_line > 0));
+
+ if (filename) {
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ address = hashmap_get(network->addresses_by_section, n);
+ if (address) {
+ *ret = TAKE_PTR(address);
+
+ return 0;
+ }
+ }
+
+ if (network->n_static_addresses >= STATIC_ADDRESSES_PER_NETWORK_MAX)
+ return -E2BIG;
+
+ r = address_new(&address);
+ if (r < 0)
+ return r;
+
+ address->network = network;
+ LIST_APPEND(addresses, network->static_addresses, address);
+ network->n_static_addresses++;
+
+ if (filename) {
+ address->section = TAKE_PTR(n);
+
+ r = hashmap_ensure_allocated(&network->addresses_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->addresses_by_section, address->section, address);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(address);
+
+ return 0;
+}
+
+void address_free(Address *address) {
+ if (!address)
+ return;
+
+ if (address->network) {
+ LIST_REMOVE(addresses, address->network->static_addresses, address);
+ assert(address->network->n_static_addresses > 0);
+ address->network->n_static_addresses--;
+
+ if (address->section)
+ hashmap_remove(address->network->addresses_by_section, address->section);
+ }
+
+ if (address->link) {
+ set_remove(address->link->addresses, address);
+ set_remove(address->link->addresses_foreign, address);
+
+ if (in_addr_equal(AF_INET6, &address->in_addr, (const union in_addr_union *) &address->link->ipv6ll_address))
+ memzero(&address->link->ipv6ll_address, sizeof(struct in6_addr));
+ }
+
+ network_config_section_free(address->section);
+ free(address->label);
+ free(address);
+}
+
+static void address_hash_func(const Address *a, struct siphash *state) {
+ assert(a);
+
+ siphash24_compress(&a->family, sizeof(a->family), state);
+
+ switch (a->family) {
+ case AF_INET:
+ siphash24_compress(&a->prefixlen, sizeof(a->prefixlen), state);
+
+ /* peer prefix */
+ if (a->prefixlen != 0) {
+ uint32_t prefix;
+
+ if (a->in_addr_peer.in.s_addr != 0)
+ prefix = be32toh(a->in_addr_peer.in.s_addr) >> (32 - a->prefixlen);
+ else
+ prefix = be32toh(a->in_addr.in.s_addr) >> (32 - a->prefixlen);
+
+ siphash24_compress(&prefix, sizeof(prefix), state);
+ }
+
+ _fallthrough_;
+ case AF_INET6:
+ /* local address */
+ siphash24_compress(&a->in_addr, FAMILY_ADDRESS_SIZE(a->family), state);
+
+ break;
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ break;
+ }
+}
+
+static int address_compare_func(const Address *a1, const Address *a2) {
+ int r;
+
+ r = CMP(a1->family, a2->family);
+ if (r != 0)
+ return r;
+
+ switch (a1->family) {
+ /* use the same notion of equality as the kernel does */
+ case AF_INET:
+ r = CMP(a1->prefixlen, a2->prefixlen);
+ if (r != 0)
+ return r;
+
+ /* compare the peer prefixes */
+ if (a1->prefixlen != 0) {
+ /* make sure we don't try to shift by 32.
+ * See ISO/IEC 9899:TC3 § 6.5.7.3. */
+ uint32_t b1, b2;
+
+ if (a1->in_addr_peer.in.s_addr != 0)
+ b1 = be32toh(a1->in_addr_peer.in.s_addr) >> (32 - a1->prefixlen);
+ else
+ b1 = be32toh(a1->in_addr.in.s_addr) >> (32 - a1->prefixlen);
+
+ if (a2->in_addr_peer.in.s_addr != 0)
+ b2 = be32toh(a2->in_addr_peer.in.s_addr) >> (32 - a1->prefixlen);
+ else
+ b2 = be32toh(a2->in_addr.in.s_addr) >> (32 - a1->prefixlen);
+
+ r = CMP(b1, b2);
+ if (r != 0)
+ return r;
+ }
+
+ _fallthrough_;
+ case AF_INET6:
+ return memcmp(&a1->in_addr, &a2->in_addr, FAMILY_ADDRESS_SIZE(a1->family));
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ return 0;
+ }
+}
+
+DEFINE_PRIVATE_HASH_OPS(address_hash_ops, Address, address_hash_func, address_compare_func);
+
+bool address_equal(Address *a1, Address *a2) {
+ if (a1 == a2)
+ return true;
+
+ if (!a1 || !a2)
+ return false;
+
+ return address_compare_func(a1, a2) == 0;
+}
+
+static int address_establish(Address *address, Link *link) {
+ bool masq;
+ int r;
+
+ assert(address);
+ assert(link);
+
+ masq = link->network &&
+ link->network->ip_masquerade &&
+ address->family == AF_INET &&
+ address->scope < RT_SCOPE_LINK;
+
+ /* Add firewall entry if this is requested */
+ if (address->ip_masquerade_done != masq) {
+ union in_addr_union masked = address->in_addr;
+ in_addr_mask(address->family, &masked, address->prefixlen);
+
+ r = fw_add_masquerade(masq, AF_INET, 0, &masked, address->prefixlen, NULL, NULL, 0);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not enable IP masquerading: %m");
+
+ address->ip_masquerade_done = masq;
+ }
+
+ return 0;
+}
+
+static int address_add_internal(Link *link, Set **addresses,
+ int family,
+ const union in_addr_union *in_addr,
+ unsigned char prefixlen,
+ Address **ret) {
+ _cleanup_(address_freep) Address *address = NULL;
+ int r;
+
+ assert(link);
+ assert(addresses);
+ assert(in_addr);
+
+ r = address_new(&address);
+ if (r < 0)
+ return r;
+
+ address->family = family;
+ address->in_addr = *in_addr;
+ address->prefixlen = prefixlen;
+ /* Consider address tentative until we get the real flags from the kernel */
+ address->flags = IFA_F_TENTATIVE;
+
+ r = set_ensure_allocated(addresses, &address_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put(*addresses, address);
+ if (r < 0)
+ return r;
+
+ address->link = link;
+
+ if (ret)
+ *ret = address;
+
+ address = NULL;
+
+ return 0;
+}
+
+int address_add_foreign(Link *link, int family, const union in_addr_union *in_addr, unsigned char prefixlen, Address **ret) {
+ return address_add_internal(link, &link->addresses_foreign, family, in_addr, prefixlen, ret);
+}
+
+int address_add(Link *link, int family, const union in_addr_union *in_addr, unsigned char prefixlen, Address **ret) {
+ Address *address;
+ int r;
+
+ r = address_get(link, family, in_addr, prefixlen, &address);
+ if (r == -ENOENT) {
+ /* Address does not exist, create a new one */
+ r = address_add_internal(link, &link->addresses, family, in_addr, prefixlen, &address);
+ if (r < 0)
+ return r;
+ } else if (r == 0) {
+ /* Take over a foreign address */
+ r = set_ensure_allocated(&link->addresses, &address_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put(link->addresses, address);
+ if (r < 0)
+ return r;
+
+ set_remove(link->addresses_foreign, address);
+ } else if (r == 1) {
+ /* Already exists, do nothing */
+ ;
+ } else
+ return r;
+
+ if (ret)
+ *ret = address;
+
+ return 0;
+}
+
+static int address_release(Address *address) {
+ int r;
+
+ assert(address);
+ assert(address->link);
+
+ /* Remove masquerading firewall entry if it was added */
+ if (address->ip_masquerade_done) {
+ union in_addr_union masked = address->in_addr;
+ in_addr_mask(address->family, &masked, address->prefixlen);
+
+ r = fw_add_masquerade(false, AF_INET, 0, &masked, address->prefixlen, NULL, NULL, 0);
+ if (r < 0)
+ log_link_warning_errno(address->link, r, "Failed to disable IP masquerading: %m");
+
+ address->ip_masquerade_done = false;
+ }
+
+ return 0;
+}
+
+int address_update(
+ Address *address,
+ unsigned char flags,
+ unsigned char scope,
+ const struct ifa_cacheinfo *cinfo) {
+
+ bool ready;
+ int r;
+
+ assert(address);
+ assert(cinfo);
+ assert_return(address->link, 1);
+
+ if (IN_SET(address->link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ ready = address_is_ready(address);
+
+ address->flags = flags;
+ address->scope = scope;
+ address->cinfo = *cinfo;
+
+ link_update_operstate(address->link);
+ link_check_ready(address->link);
+
+ if (!ready &&
+ address_is_ready(address) &&
+ address->family == AF_INET6 &&
+ in_addr_is_link_local(AF_INET6, &address->in_addr) > 0 &&
+ in_addr_is_null(AF_INET6, (const union in_addr_union*) &address->link->ipv6ll_address) > 0) {
+
+ r = link_ipv6ll_gained(address->link, &address->in_addr.in6);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int address_drop(Address *address) {
+ Link *link;
+ bool ready;
+
+ assert(address);
+
+ ready = address_is_ready(address);
+ link = address->link;
+
+ address_release(address);
+ address_free(address);
+
+ link_update_operstate(link);
+
+ if (link && !ready)
+ link_check_ready(link);
+
+ return 0;
+}
+
+int address_get(Link *link,
+ int family,
+ const union in_addr_union *in_addr,
+ unsigned char prefixlen,
+ Address **ret) {
+
+ Address address, *existing;
+
+ assert(link);
+ assert(in_addr);
+
+ address = (Address) {
+ .family = family,
+ .in_addr = *in_addr,
+ .prefixlen = prefixlen,
+ };
+
+ existing = set_get(link->addresses, &address);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 1;
+ }
+
+ existing = set_get(link->addresses_foreign, &address);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int address_remove_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EADDRNOTAVAIL)
+ log_link_warning_errno(link, r, "Could not drop address: %m");
+
+ return 1;
+}
+
+int address_remove(
+ Address *address,
+ Link *link,
+ link_netlink_message_handler_t callback) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ _cleanup_free_ char *b = NULL;
+ int r;
+
+ assert(address);
+ assert(IN_SET(address->family, AF_INET, AF_INET6));
+ assert(link);
+ assert(link->ifindex > 0);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ if (DEBUG_LOGGING) {
+ if (in_addr_to_string(address->family, &address->in_addr, &b) >= 0)
+ log_link_debug(link, "Removing address %s", b);
+ }
+
+ r = sd_rtnl_message_new_addr(link->manager->rtnl, &req, RTM_DELADDR,
+ link->ifindex, address->family);
+ if (r < 0)
+ return log_error_errno(r, "Could not allocate RTM_DELADDR message: %m");
+
+ r = sd_rtnl_message_addr_set_prefixlen(req, address->prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Could not set prefixlen: %m");
+
+ if (address->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(req, IFA_LOCAL, &address->in_addr.in);
+ else if (address->family == AF_INET6)
+ r = sd_netlink_message_append_in6_addr(req, IFA_LOCAL, &address->in_addr.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append IFA_LOCAL attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req,
+ callback ?: address_remove_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_error_errno(r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int address_acquire(Link *link, Address *original, Address **ret) {
+ union in_addr_union in_addr = {};
+ struct in_addr broadcast = {};
+ _cleanup_(address_freep) Address *na = NULL;
+ int r;
+
+ assert(link);
+ assert(original);
+ assert(ret);
+
+ /* Something useful was configured? just use it */
+ if (in_addr_is_null(original->family, &original->in_addr) <= 0)
+ return 0;
+
+ /* The address is configured to be 0.0.0.0 or [::] by the user?
+ * Then let's acquire something more useful from the pool. */
+ r = manager_address_pool_acquire(link->manager, original->family, original->prefixlen, &in_addr);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to acquire address from pool: %m");
+ if (r == 0) {
+ log_link_error(link, "Couldn't find free address for interface, all taken.");
+ return -EBUSY;
+ }
+
+ if (original->family == AF_INET) {
+ /* Pick first address in range for ourselves ... */
+ in_addr.in.s_addr = in_addr.in.s_addr | htobe32(1);
+
+ /* .. and use last as broadcast address */
+ if (original->prefixlen > 30)
+ broadcast.s_addr = 0;
+ else
+ broadcast.s_addr = in_addr.in.s_addr | htobe32(0xFFFFFFFFUL >> original->prefixlen);
+ } else if (original->family == AF_INET6)
+ in_addr.in6.s6_addr[15] |= 1;
+
+ r = address_new(&na);
+ if (r < 0)
+ return r;
+
+ na->family = original->family;
+ na->prefixlen = original->prefixlen;
+ na->scope = original->scope;
+ na->cinfo = original->cinfo;
+
+ if (original->label) {
+ na->label = strdup(original->label);
+ if (!na->label)
+ return -ENOMEM;
+ }
+
+ na->broadcast = broadcast;
+ na->in_addr = in_addr;
+
+ LIST_PREPEND(addresses, link->pool_addresses, na);
+
+ *ret = TAKE_PTR(na);
+
+ return 0;
+}
+
+int address_configure(
+ Address *address,
+ Link *link,
+ link_netlink_message_handler_t callback,
+ bool update) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(address);
+ assert(IN_SET(address->family, AF_INET, AF_INET6));
+ assert(link);
+ assert(link->ifindex > 0);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(callback);
+
+ /* If this is a new address, then refuse adding more than the limit */
+ if (address_get(link, address->family, &address->in_addr, address->prefixlen, NULL) <= 0 &&
+ set_size(link->addresses) >= ADDRESSES_PER_LINK_MAX)
+ return -E2BIG;
+
+ r = address_acquire(link, address, &address);
+ if (r < 0)
+ return r;
+
+ if (update)
+ r = sd_rtnl_message_new_addr_update(link->manager->rtnl, &req,
+ link->ifindex, address->family);
+ else
+ r = sd_rtnl_message_new_addr(link->manager->rtnl, &req, RTM_NEWADDR,
+ link->ifindex, address->family);
+ if (r < 0)
+ return log_error_errno(r, "Could not allocate RTM_NEWADDR message: %m");
+
+ r = sd_rtnl_message_addr_set_prefixlen(req, address->prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Could not set prefixlen: %m");
+
+ address->flags |= IFA_F_PERMANENT;
+
+ if (address->home_address)
+ address->flags |= IFA_F_HOMEADDRESS;
+
+ if (address->duplicate_address_detection)
+ address->flags |= IFA_F_NODAD;
+
+ if (address->manage_temporary_address)
+ address->flags |= IFA_F_MANAGETEMPADDR;
+
+ if (address->prefix_route)
+ address->flags |= IFA_F_NOPREFIXROUTE;
+
+ if (address->autojoin)
+ address->flags |= IFA_F_MCAUTOJOIN;
+
+ r = sd_rtnl_message_addr_set_flags(req, (address->flags & 0xff));
+ if (r < 0)
+ return log_error_errno(r, "Could not set flags: %m");
+
+ if (address->flags & ~0xff) {
+ r = sd_netlink_message_append_u32(req, IFA_FLAGS, address->flags);
+ if (r < 0)
+ return log_error_errno(r, "Could not set extended flags: %m");
+ }
+
+ r = sd_rtnl_message_addr_set_scope(req, address->scope);
+ if (r < 0)
+ return log_error_errno(r, "Could not set scope: %m");
+
+ if (address->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(req, IFA_LOCAL, &address->in_addr.in);
+ else if (address->family == AF_INET6)
+ r = sd_netlink_message_append_in6_addr(req, IFA_LOCAL, &address->in_addr.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append IFA_LOCAL attribute: %m");
+
+ if (!in_addr_is_null(address->family, &address->in_addr_peer)) {
+ if (address->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(req, IFA_ADDRESS, &address->in_addr_peer.in);
+ else if (address->family == AF_INET6)
+ r = sd_netlink_message_append_in6_addr(req, IFA_ADDRESS, &address->in_addr_peer.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append IFA_ADDRESS attribute: %m");
+ } else if (address->family == AF_INET && address->prefixlen <= 30) {
+ r = sd_netlink_message_append_in_addr(req, IFA_BROADCAST, &address->broadcast);
+ if (r < 0)
+ return log_error_errno(r, "Could not append IFA_BROADCAST attribute: %m");
+ }
+
+ if (address->label) {
+ r = sd_netlink_message_append_string(req, IFA_LABEL, address->label);
+ if (r < 0)
+ return log_error_errno(r, "Could not append IFA_LABEL attribute: %m");
+ }
+
+ r = sd_netlink_message_append_cache_info(req, IFA_CACHEINFO, &address->cinfo);
+ if (r < 0)
+ return log_error_errno(r, "Could not append IFA_CACHEINFO attribute: %m");
+
+ r = address_establish(address, link);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, callback, link_netlink_destroy_callback, link);
+ if (r < 0) {
+ address_release(address);
+ return log_error_errno(r, "Could not send rtnetlink message: %m");
+ }
+
+ link_ref(link);
+
+ if (address->family == AF_INET6 && !in_addr_is_null(address->family, &address->in_addr_peer))
+ r = address_add(link, address->family, &address->in_addr_peer, address->prefixlen, NULL);
+ else
+ r = address_add(link, address->family, &address->in_addr, address->prefixlen, NULL);
+ if (r < 0) {
+ address_release(address);
+ return log_error_errno(r, "Could not add address: %m");
+ }
+
+ return 0;
+}
+
+int config_parse_broadcast(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(address_freep) Address *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ if (n->family == AF_INET6) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Broadcast is not valid for IPv6 addresses, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = in_addr_from_string(AF_INET, rvalue, (union in_addr_union*) &n->broadcast);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Broadcast is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ n->family = AF_INET;
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_address(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(address_freep) Address *n = NULL;
+ union in_addr_union buffer;
+ unsigned char prefixlen;
+ int r, f;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(section, "Network")) {
+ /* we are not in an Address section, so treat
+ * this as the special '0' section */
+ r = address_new_static(network, NULL, 0, &n);
+ } else
+ r = address_new_static(network, filename, section_line, &n);
+
+ if (r < 0)
+ return r;
+
+ /* Address=address/prefixlen */
+ r = in_addr_prefix_from_string_auto_internal(rvalue, PREFIXLEN_REFUSE, &f, &buffer, &prefixlen);
+ if (r == -ENOANO) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "An address '%s' is specified without prefix length. "
+ "The behavior of parsing addresses without prefix length will be changed in the future release. "
+ "Please specify prefix length explicitly.", rvalue);
+
+ r = in_addr_prefix_from_string_auto_internal(rvalue, PREFIXLEN_LEGACY, &f, &buffer, &prefixlen);
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid address '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (n->family != AF_UNSPEC && f != n->family) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Address is incompatible, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ n->family = f;
+ n->prefixlen = prefixlen;
+
+ if (streq(lvalue, "Address"))
+ n->in_addr = buffer;
+ else
+ n->in_addr_peer = buffer;
+
+ if (n->family == AF_INET && n->broadcast.s_addr == 0)
+ n->broadcast.s_addr = n->in_addr.in.s_addr | htonl(0xfffffffflu >> n->prefixlen);
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_label(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(address_freep) Address *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ if (!address_label_valid(rvalue)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Interface label is too long or invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = free_and_strdup(&n->label, rvalue);
+ if (r < 0)
+ return log_oom();
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_lifetime(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Network *network = userdata;
+ _cleanup_(address_freep) Address *n = NULL;
+ unsigned k;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ if (STR_IN_SET(rvalue, "forever", "infinity")) {
+ n->cinfo.ifa_prefered = CACHE_INFO_INFINITY_LIFE_TIME;
+ n = NULL;
+
+ return 0;
+ }
+
+ r = safe_atou(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse PreferredLifetime, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (k != 0)
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid PreferredLifetime value, ignoring: %d", k);
+ else {
+ n->cinfo.ifa_prefered = k;
+ n = NULL;
+ }
+
+ return 0;
+}
+
+int config_parse_address_flags(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Network *network = userdata;
+ _cleanup_(address_freep) Address *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse address flag, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "HomeAddress"))
+ n->home_address = r;
+ else if (streq(lvalue, "DuplicateAddressDetection"))
+ n->duplicate_address_detection = r;
+ else if (streq(lvalue, "ManageTemporaryAddress"))
+ n->manage_temporary_address = r;
+ else if (streq(lvalue, "PrefixRoute"))
+ n->prefix_route = r;
+ else if (streq(lvalue, "AutoJoin"))
+ n->autojoin = r;
+
+ return 0;
+}
+
+int config_parse_address_scope(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Network *network = userdata;
+ _cleanup_(address_freep) Address *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ if (streq(rvalue, "host"))
+ n->scope = RT_SCOPE_HOST;
+ else if (streq(rvalue, "link"))
+ n->scope = RT_SCOPE_LINK;
+ else if (streq(rvalue, "global"))
+ n->scope = RT_SCOPE_UNIVERSE;
+ else {
+ r = safe_atou8(rvalue , &n->scope);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Could not parse address scope \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+bool address_is_ready(const Address *a) {
+ assert(a);
+
+ if (a->family == AF_INET6)
+ return !(a->flags & IFA_F_TENTATIVE);
+ else
+ return !(a->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED));
+}
diff --git a/src/network/networkd-address.h b/src/network/networkd-address.h
new file mode 100644
index 0000000..4714c07
--- /dev/null
+++ b/src/network/networkd-address.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "conf-parser.h"
+#include "in-addr-util.h"
+
+typedef struct Address Address;
+
+#include "networkd-link.h"
+#include "networkd-network.h"
+
+#define CACHE_INFO_INFINITY_LIFE_TIME 0xFFFFFFFFU
+
+typedef struct Network Network;
+typedef struct Link Link;
+typedef struct NetworkConfigSection NetworkConfigSection;
+
+struct Address {
+ Network *network;
+ NetworkConfigSection *section;
+
+ Link *link;
+
+ int family;
+ unsigned char prefixlen;
+ unsigned char scope;
+ uint32_t flags;
+ char *label;
+
+ struct in_addr broadcast;
+ struct ifa_cacheinfo cinfo;
+
+ union in_addr_union in_addr;
+ union in_addr_union in_addr_peer;
+
+ bool ip_masquerade_done:1;
+ bool duplicate_address_detection;
+ bool manage_temporary_address;
+ bool home_address;
+ bool prefix_route;
+ bool autojoin;
+
+ LIST_FIELDS(Address, addresses);
+};
+
+int address_new_static(Network *network, const char *filename, unsigned section, Address **ret);
+int address_new(Address **ret);
+void address_free(Address *address);
+int address_add_foreign(Link *link, int family, const union in_addr_union *in_addr, unsigned char prefixlen, Address **ret);
+int address_add(Link *link, int family, const union in_addr_union *in_addr, unsigned char prefixlen, Address **ret);
+int address_get(Link *link, int family, const union in_addr_union *in_addr, unsigned char prefixlen, Address **ret);
+int address_update(Address *address, unsigned char flags, unsigned char scope, const struct ifa_cacheinfo *cinfo);
+int address_drop(Address *address);
+int address_configure(Address *address, Link *link, link_netlink_message_handler_t callback, bool update);
+int address_remove(Address *address, Link *link, link_netlink_message_handler_t callback);
+bool address_equal(Address *a1, Address *a2);
+bool address_is_ready(const Address *a);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Address*, address_free);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_broadcast);
+CONFIG_PARSER_PROTOTYPE(config_parse_label);
+CONFIG_PARSER_PROTOTYPE(config_parse_lifetime);
+CONFIG_PARSER_PROTOTYPE(config_parse_address_flags);
+CONFIG_PARSER_PROTOTYPE(config_parse_address_scope);
diff --git a/src/network/networkd-brvlan.c b/src/network/networkd-brvlan.c
new file mode 100644
index 0000000..8377623
--- /dev/null
+++ b/src/network/networkd-brvlan.c
@@ -0,0 +1,340 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2016 BISDN GmbH. All rights reserved.
+***/
+
+#include <netinet/in.h>
+#include <linux/if_bridge.h>
+#include <stdbool.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "missing_if_bridge.h"
+#include "netlink-util.h"
+#include "networkd-brvlan.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "parse-util.h"
+#include "vlan-util.h"
+
+static bool is_bit_set(unsigned bit, uint32_t scope) {
+ assert(bit < sizeof(scope)*8);
+ return scope & (1 << bit);
+}
+
+static void set_bit(unsigned nr, uint32_t *addr) {
+ if (nr < BRIDGE_VLAN_BITMAP_MAX)
+ addr[nr / 32] |= (((uint32_t) 1) << (nr % 32));
+}
+
+static int find_next_bit(int i, uint32_t x) {
+ int j;
+
+ if (i >= 32)
+ return -1;
+
+ /* find first bit */
+ if (i < 0)
+ return BUILTIN_FFS_U32(x);
+
+ /* mask off prior finds to get next */
+ j = __builtin_ffs(x >> i);
+ return j ? j + i : 0;
+}
+
+static int append_vlan_info_data(Link *const link, sd_netlink_message *req, uint16_t pvid, const uint32_t *br_vid_bitmap, const uint32_t *br_untagged_bitmap) {
+ struct bridge_vlan_info br_vlan;
+ int i, j, k, r, done, cnt;
+ uint16_t begin, end;
+ bool untagged = false;
+
+ assert(link);
+ assert(req);
+ assert(br_vid_bitmap);
+ assert(br_untagged_bitmap);
+
+ i = cnt = -1;
+
+ begin = end = UINT16_MAX;
+ for (k = 0; k < BRIDGE_VLAN_BITMAP_LEN; k++) {
+ unsigned base_bit;
+ uint32_t vid_map = br_vid_bitmap[k];
+ uint32_t untagged_map = br_untagged_bitmap[k];
+
+ base_bit = k * 32;
+ i = -1;
+ done = 0;
+ do {
+ j = find_next_bit(i, vid_map);
+ if (j > 0) {
+ /* first hit of any bit */
+ if (begin == UINT16_MAX && end == UINT16_MAX) {
+ begin = end = j - 1 + base_bit;
+ untagged = is_bit_set(j - 1, untagged_map);
+ goto next;
+ }
+
+ /* this bit is a continuation of prior bits */
+ if (j - 2 + base_bit == end && untagged == is_bit_set(j - 1, untagged_map) && (uint16_t)j - 1 + base_bit != pvid && (uint16_t)begin != pvid) {
+ end++;
+ goto next;
+ }
+ } else
+ done = 1;
+
+ if (begin != UINT16_MAX) {
+ cnt++;
+ if (done && k < BRIDGE_VLAN_BITMAP_LEN - 1)
+ break;
+
+ br_vlan.flags = 0;
+ if (untagged)
+ br_vlan.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+ if (begin == end) {
+ br_vlan.vid = begin;
+
+ if (begin == pvid)
+ br_vlan.flags |= BRIDGE_VLAN_INFO_PVID;
+
+ r = sd_netlink_message_append_data(req, IFLA_BRIDGE_VLAN_INFO, &br_vlan, sizeof(br_vlan));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRIDGE_VLAN_INFO attribute: %m");
+ } else {
+ br_vlan.vid = begin;
+ br_vlan.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
+
+ r = sd_netlink_message_append_data(req, IFLA_BRIDGE_VLAN_INFO, &br_vlan, sizeof(br_vlan));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRIDGE_VLAN_INFO attribute: %m");
+
+ br_vlan.vid = end;
+ br_vlan.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
+ br_vlan.flags |= BRIDGE_VLAN_INFO_RANGE_END;
+
+ r = sd_netlink_message_append_data(req, IFLA_BRIDGE_VLAN_INFO, &br_vlan, sizeof(br_vlan));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRIDGE_VLAN_INFO attribute: %m");
+ }
+
+ if (done)
+ break;
+ }
+ if (j > 0) {
+ begin = end = j - 1 + base_bit;
+ untagged = is_bit_set(j - 1, untagged_map);
+ }
+
+ next:
+ i = j;
+ } while (!done);
+ }
+ if (!cnt)
+ return -EINVAL;
+
+ return cnt;
+}
+
+static int set_brvlan_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_error_errno(link, r, "Could not add VLAN to bridge port: %m");
+
+ return 1;
+}
+
+int br_vlan_configure(Link *link, uint16_t pvid, uint32_t *br_vid_bitmap, uint32_t *br_untagged_bitmap) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+ uint16_t flags;
+ sd_netlink *rtnl;
+
+ assert(link);
+ assert(link->manager);
+ assert(br_vid_bitmap);
+ assert(br_untagged_bitmap);
+ assert(link->network);
+
+ /* pvid might not be in br_vid_bitmap yet */
+ if (pvid)
+ set_bit(pvid, br_vid_bitmap);
+
+ rtnl = link->manager->rtnl;
+
+ /* create new RTM message */
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_rtnl_message_link_set_family(req, PF_BRIDGE);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set message family: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_AF_SPEC);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open IFLA_AF_SPEC container: %m");
+
+ /* master needs flag self */
+ if (!link->network->bridge) {
+ flags = BRIDGE_FLAGS_SELF;
+ sd_netlink_message_append_data(req, IFLA_BRIDGE_FLAGS, &flags, sizeof(uint16_t));
+ }
+
+ /* add vlan info */
+ r = append_vlan_info_data(link, req, pvid, br_vid_bitmap, br_untagged_bitmap);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append VLANs: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close IFLA_AF_SPEC container: %m");
+
+ /* send message to the kernel */
+ r = netlink_call_async(rtnl, NULL, req, set_brvlan_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int parse_vid_range(const char *rvalue, uint16_t *vid, uint16_t *vid_end) {
+ int r;
+ char *p;
+ char *_rvalue = NULL;
+ uint16_t _vid = UINT16_MAX;
+ uint16_t _vid_end = UINT16_MAX;
+
+ assert(rvalue);
+ assert(vid);
+ assert(vid_end);
+
+ _rvalue = strdupa(rvalue);
+ p = strchr(_rvalue, '-');
+ if (p) {
+ *p = '\0';
+ p++;
+ r = parse_vlanid(_rvalue, &_vid);
+ if (r < 0)
+ return r;
+
+ if (_vid == 0)
+ return -ERANGE;
+
+ r = parse_vlanid(p, &_vid_end);
+ if (r < 0)
+ return r;
+
+ if (_vid_end == 0)
+ return -ERANGE;
+ } else {
+ r = parse_vlanid(_rvalue, &_vid);
+ if (r < 0)
+ return r;
+
+ if (_vid == 0)
+ return -ERANGE;
+ }
+
+ *vid = _vid;
+ *vid_end = _vid_end;
+ return r;
+}
+
+int config_parse_brvlan_pvid(const char *unit, const char *filename,
+ unsigned line, const char *section,
+ unsigned section_line, const char *lvalue,
+ int ltype, const char *rvalue, void *data,
+ void *userdata) {
+ Network *network = userdata;
+ int r;
+ uint16_t pvid;
+ r = parse_vlanid(rvalue, &pvid);
+ if (r < 0)
+ return r;
+
+ network->pvid = pvid;
+ network->use_br_vlan = true;
+
+ return 0;
+}
+
+int config_parse_brvlan_vlan(const char *unit, const char *filename,
+ unsigned line, const char *section,
+ unsigned section_line, const char *lvalue,
+ int ltype, const char *rvalue, void *data,
+ void *userdata) {
+ Network *network = userdata;
+ int r;
+ uint16_t vid, vid_end;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_vid_range(rvalue, &vid, &vid_end);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse VLAN, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (UINT16_MAX == vid_end)
+ set_bit(vid++, network->br_vid_bitmap);
+ else {
+ if (vid >= vid_end) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid VLAN range, ignoring %s", rvalue);
+ return 0;
+ }
+ for (; vid <= vid_end; vid++)
+ set_bit(vid, network->br_vid_bitmap);
+ }
+ network->use_br_vlan = true;
+ return 0;
+}
+
+int config_parse_brvlan_untagged(const char *unit, const char *filename,
+ unsigned line, const char *section,
+ unsigned section_line, const char *lvalue,
+ int ltype, const char *rvalue, void *data,
+ void *userdata) {
+ Network *network = userdata;
+ int r;
+ uint16_t vid, vid_end;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_vid_range(rvalue, &vid, &vid_end);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Could not parse VLAN: %s", rvalue);
+ return 0;
+ }
+
+ if (UINT16_MAX == vid_end) {
+ set_bit(vid, network->br_vid_bitmap);
+ set_bit(vid, network->br_untagged_bitmap);
+ } else {
+ if (vid >= vid_end) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid VLAN range, ignoring %s", rvalue);
+ return 0;
+ }
+ for (; vid <= vid_end; vid++) {
+ set_bit(vid, network->br_vid_bitmap);
+ set_bit(vid, network->br_untagged_bitmap);
+ }
+ }
+ network->use_br_vlan = true;
+ return 0;
+}
diff --git a/src/network/networkd-brvlan.h b/src/network/networkd-brvlan.h
new file mode 100644
index 0000000..f6af140
--- /dev/null
+++ b/src/network/networkd-brvlan.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2016 BISDN GmbH. All rights reserved.
+***/
+
+#include <stdint.h>
+
+#include "conf-parser.h"
+
+typedef struct Link Link;
+
+int br_vlan_configure(Link *link, uint16_t pvid, uint32_t *br_vid_bitmap, uint32_t *br_untagged_bitmap);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_brvlan_pvid);
+CONFIG_PARSER_PROTOTYPE(config_parse_brvlan_vlan);
+CONFIG_PARSER_PROTOTYPE(config_parse_brvlan_untagged);
diff --git a/src/network/networkd-conf.c b/src/network/networkd-conf.c
new file mode 100644
index 0000000..334ffc3
--- /dev/null
+++ b/src/network/networkd-conf.c
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Vinay Kulkarni <kulkarniv@vmware.com>
+ ***/
+
+#include <ctype.h>
+
+#include "conf-parser.h"
+#include "def.h"
+#include "dhcp-identifier.h"
+#include "extract-word.h"
+#include "hexdecoct.h"
+#include "networkd-conf.h"
+#include "networkd-network.h"
+#include "string-table.h"
+
+int manager_parse_config_file(Manager *m) {
+ assert(m);
+
+ return config_parse_many_nulstr(PKGSYSCONFDIR "/networkd.conf",
+ CONF_PATHS_NULSTR("systemd/networkd.conf.d"),
+ "DHCP\0",
+ config_item_perf_lookup, networkd_gperf_lookup,
+ CONFIG_PARSE_WARN, m);
+}
+
+static const char* const duid_type_table[_DUID_TYPE_MAX] = {
+ [DUID_TYPE_LLT] = "link-layer-time",
+ [DUID_TYPE_EN] = "vendor",
+ [DUID_TYPE_LL] = "link-layer",
+ [DUID_TYPE_UUID] = "uuid",
+};
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(duid_type, DUIDType);
+
+int config_parse_duid_type(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *type_string = NULL;
+ const char *p = rvalue;
+ DUID *duid = data;
+ DUIDType type;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(duid);
+
+ r = extract_first_word(&p, &type_string, ":", 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract DUID type from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ type = duid_type_from_string(type_string);
+ if (type < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse DUID type '%s', ignoring.", type_string);
+ return 0;
+ }
+
+ if (!isempty(p)) {
+ usec_t u;
+
+ if (type != DUID_TYPE_LLT) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = parse_timestamp(p, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse timestamp, ignoring: %s", p);
+ return 0;
+ }
+
+ duid->llt_time = u;
+ }
+
+ duid->type = type;
+
+ return 0;
+}
+
+int config_parse_duid_rawdata(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ DUID *ret = data;
+ uint8_t raw_data[MAX_DUID_LEN];
+ unsigned count = 0;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(ret);
+
+ /* RawData contains DUID in format "NN:NN:NN..." */
+ for (;;) {
+ int n1, n2, len, r;
+ uint32_t byte;
+ _cleanup_free_ char *cbyte = NULL;
+
+ r = extract_first_word(&rvalue, &cbyte, ":", 0);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to read DUID, ignoring assignment: %s.", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ break;
+ if (count >= MAX_DUID_LEN) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Max DUID length exceeded, ignoring assignment: %s.", rvalue);
+ return 0;
+ }
+
+ len = strlen(cbyte);
+ if (!IN_SET(len, 1, 2)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid length - DUID byte: %s, ignoring assignment: %s.", cbyte, rvalue);
+ return 0;
+ }
+ n1 = unhexchar(cbyte[0]);
+ if (len == 2)
+ n2 = unhexchar(cbyte[1]);
+ else
+ n2 = 0;
+
+ if (n1 < 0 || n2 < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid DUID byte: %s. Ignoring assignment: %s.", cbyte, rvalue);
+ return 0;
+ }
+
+ byte = ((uint8_t) n1 << (4 * (len-1))) | (uint8_t) n2;
+ raw_data[count++] = byte;
+ }
+
+ assert_cc(sizeof(raw_data) == sizeof(ret->raw_data));
+ memcpy(ret->raw_data, raw_data, count);
+ ret->raw_data_len = count;
+ return 0;
+}
diff --git a/src/network/networkd-conf.h b/src/network/networkd-conf.h
new file mode 100644
index 0000000..88a2c64
--- /dev/null
+++ b/src/network/networkd-conf.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2014 Vinay Kulkarni <kulkarniv@vmware.com>
+***/
+
+#include "conf-parser.h"
+
+typedef struct Manager Manager;
+
+int manager_parse_config_file(Manager *m);
+
+const struct ConfigPerfItem* networkd_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_duid_type);
+CONFIG_PARSER_PROTOTYPE(config_parse_duid_rawdata);
diff --git a/src/network/networkd-dhcp4.c b/src/network/networkd-dhcp4.c
new file mode 100644
index 0000000..d8ac455
--- /dev/null
+++ b/src/network/networkd-dhcp4.c
@@ -0,0 +1,822 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/ether.h>
+#include <linux/if.h>
+
+#include "alloc-util.h"
+#include "hostname-util.h"
+#include "parse-util.h"
+#include "netdev/vrf.h"
+#include "network-internal.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "string-util.h"
+#include "sysctl-util.h"
+
+static int dhcp4_route_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->dhcp4_messages > 0);
+
+ link->dhcp4_messages--;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_error_errno(link, r, "Could not set DHCPv4 route: %m");
+ link_enter_failed(link);
+ }
+
+ if (link->dhcp4_messages == 0) {
+ link->dhcp4_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int route_scope_from_address(const Route *route, const struct in_addr *self_addr) {
+ assert(route);
+ assert(self_addr);
+
+ if (in_addr_is_localhost(AF_INET, &route->dst) ||
+ (self_addr->s_addr && route->dst.in.s_addr == self_addr->s_addr))
+ return RT_SCOPE_HOST;
+ else if (in4_addr_is_null(&route->gw.in))
+ return RT_SCOPE_LINK;
+ else
+ return RT_SCOPE_UNIVERSE;
+}
+
+static int link_set_dhcp_routes(Link *link) {
+ _cleanup_free_ sd_dhcp_route **static_routes = NULL;
+ bool classless_route = false, static_route = false;
+ struct in_addr gateway, address;
+ int r, n, i;
+ uint32_t table;
+
+ assert(link);
+
+ if (!link->dhcp_lease) /* link went down while we configured the IP addresses? */
+ return 0;
+
+ if (!link->network) /* link went down while we configured the IP addresses? */
+ return 0;
+
+ if (!link->network->dhcp_use_routes)
+ return 0;
+
+ /* When the interface is part of an VRF use the VRFs routing table, unless
+ * there is a another table specified. */
+ table = link->network->dhcp_route_table;
+ if (!link->network->dhcp_route_table_set && link->network->vrf != NULL)
+ table = VRF(link->network->vrf)->table;
+
+ r = sd_dhcp_lease_get_address(link->dhcp_lease, &address);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "DHCP error: could not get address: %m");
+
+ n = sd_dhcp_lease_get_routes(link->dhcp_lease, &static_routes);
+ if (n == -ENODATA)
+ log_link_debug_errno(link, n, "DHCP: No routes received from DHCP server: %m");
+ else if (n < 0)
+ log_link_debug_errno(link, n, "DHCP error: could not get routes: %m");
+
+ for (i = 0; i < n; i++) {
+ switch (sd_dhcp_route_get_option(static_routes[i])) {
+ case SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE:
+ classless_route = true;
+ break;
+ case SD_DHCP_OPTION_STATIC_ROUTE:
+ static_route = true;
+ break;
+ }
+ }
+
+ for (i = 0; i < n; i++) {
+ _cleanup_(route_freep) Route *route = NULL;
+
+ /* if the DHCP server returns both a Classless Static Routes option and a Static Routes option,
+ the DHCP client MUST ignore the Static Routes option. */
+ if (classless_route &&
+ sd_dhcp_route_get_option(static_routes[i]) != SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE)
+ continue;
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate route: %m");
+
+ route->family = AF_INET;
+ route->protocol = RTPROT_DHCP;
+ assert_se(sd_dhcp_route_get_gateway(static_routes[i], &route->gw.in) >= 0);
+ assert_se(sd_dhcp_route_get_destination(static_routes[i], &route->dst.in) >= 0);
+ assert_se(sd_dhcp_route_get_destination_prefix_length(static_routes[i], &route->dst_prefixlen) >= 0);
+ route->priority = link->network->dhcp_route_metric;
+ route->table = table;
+ route->scope = route_scope_from_address(route, &address);
+
+ r = route_configure(route, link, dhcp4_route_handler);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not set host route: %m");
+
+ link->dhcp4_messages++;
+ }
+
+ r = sd_dhcp_lease_get_router(link->dhcp_lease, &gateway);
+ if (r == -ENODATA)
+ log_link_info_errno(link, r, "DHCP: No gateway received from DHCP server: %m");
+ else if (r < 0)
+ log_link_warning_errno(link, r, "DHCP error: could not get gateway: %m");
+
+ /* According to RFC 3442: If the DHCP server returns both a Classless Static Routes option and
+ a Router option, the DHCP client MUST ignore the Router option. */
+ if (classless_route && static_route)
+ log_link_warning(link, "Classless static routes received from DHCP server: ignoring static-route option and router option");
+
+ if (r >= 0 && !classless_route) {
+ _cleanup_(route_freep) Route *route = NULL;
+ _cleanup_(route_freep) Route *route_gw = NULL;
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate route: %m");
+
+ route->protocol = RTPROT_DHCP;
+
+ r = route_new(&route_gw);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate route: %m");
+
+ /* The dhcp netmask may mask out the gateway. Add an explicit
+ * route for the gw host so that we can route no matter the
+ * netmask or existing kernel route tables. */
+ route_gw->family = AF_INET;
+ route_gw->dst.in = gateway;
+ route_gw->dst_prefixlen = 32;
+ route_gw->prefsrc.in = address;
+ route_gw->scope = RT_SCOPE_LINK;
+ route_gw->protocol = RTPROT_DHCP;
+ route_gw->priority = link->network->dhcp_route_metric;
+ route_gw->table = table;
+
+ r = route_configure(route_gw, link, dhcp4_route_handler);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not set host route: %m");
+
+ link->dhcp4_messages++;
+
+ route->family = AF_INET;
+ route->gw.in = gateway;
+ route->prefsrc.in = address;
+ route->priority = link->network->dhcp_route_metric;
+ route->table = table;
+
+ r = route_configure(route, link, dhcp4_route_handler);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set routes: %m");
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->dhcp4_messages++;
+ }
+
+ return 0;
+}
+
+static int dhcp_lease_lost(Link *link) {
+ _cleanup_(address_freep) Address *address = NULL;
+ struct in_addr addr;
+ struct in_addr netmask;
+ struct in_addr gateway;
+ unsigned prefixlen = 0;
+ int r;
+
+ assert(link);
+ assert(link->dhcp_lease);
+
+ log_link_warning(link, "DHCP lease lost");
+
+ if (link->network->dhcp_use_routes) {
+ _cleanup_free_ sd_dhcp_route **routes = NULL;
+ int n, i;
+
+ n = sd_dhcp_lease_get_routes(link->dhcp_lease, &routes);
+ if (n >= 0) {
+ for (i = 0; i < n; i++) {
+ _cleanup_(route_freep) Route *route = NULL;
+
+ r = route_new(&route);
+ if (r >= 0) {
+ route->family = AF_INET;
+ assert_se(sd_dhcp_route_get_gateway(routes[i], &route->gw.in) >= 0);
+ assert_se(sd_dhcp_route_get_destination(routes[i], &route->dst.in) >= 0);
+ assert_se(sd_dhcp_route_get_destination_prefix_length(routes[i], &route->dst_prefixlen) >= 0);
+
+ route_remove(route, link, NULL);
+ }
+ }
+ }
+ }
+
+ r = address_new(&address);
+ if (r >= 0) {
+ r = sd_dhcp_lease_get_router(link->dhcp_lease, &gateway);
+ if (r >= 0) {
+ _cleanup_(route_freep) Route *route_gw = NULL;
+ _cleanup_(route_freep) Route *route = NULL;
+
+ r = route_new(&route_gw);
+ if (r >= 0) {
+ route_gw->family = AF_INET;
+ route_gw->dst.in = gateway;
+ route_gw->dst_prefixlen = 32;
+ route_gw->scope = RT_SCOPE_LINK;
+
+ route_remove(route_gw, link, NULL);
+ }
+
+ r = route_new(&route);
+ if (r >= 0) {
+ route->family = AF_INET;
+ route->gw.in = gateway;
+
+ route_remove(route, link, NULL);
+ }
+ }
+
+ r = sd_dhcp_lease_get_address(link->dhcp_lease, &addr);
+ if (r >= 0) {
+ r = sd_dhcp_lease_get_netmask(link->dhcp_lease, &netmask);
+ if (r >= 0)
+ prefixlen = in4_addr_netmask_to_prefixlen(&netmask);
+
+ address->family = AF_INET;
+ address->in_addr.in = addr;
+ address->prefixlen = prefixlen;
+
+ address_remove(address, link, NULL);
+ }
+ }
+
+ if (link->network->dhcp_use_mtu) {
+ uint16_t mtu;
+
+ r = sd_dhcp_lease_get_mtu(link->dhcp_lease, &mtu);
+ if (r >= 0 && link->original_mtu != mtu) {
+ r = link_set_mtu(link, link->original_mtu);
+ if (r < 0) {
+ log_link_warning(link,
+ "DHCP error: could not reset MTU");
+ link_enter_failed(link);
+ return r;
+ }
+ }
+ }
+
+ if (link->network->dhcp_use_hostname) {
+ const char *hostname = NULL;
+
+ if (link->network->dhcp_hostname)
+ hostname = link->network->dhcp_hostname;
+ else
+ (void) sd_dhcp_lease_get_hostname(link->dhcp_lease, &hostname);
+
+ if (hostname) {
+ /* If a hostname was set due to the lease, then unset it now. */
+ r = manager_set_hostname(link->manager, NULL);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to reset transient hostname: %m");
+ }
+ }
+
+ link->dhcp_lease = sd_dhcp_lease_unref(link->dhcp_lease);
+ link_dirty(link);
+ link->dhcp4_configured = false;
+
+ return 0;
+}
+
+static int dhcp4_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_error_errno(link, r, "Could not set DHCPv4 address: %m");
+ link_enter_failed(link);
+ } else if (r >= 0)
+ manager_rtnl_process_address(rtnl, m, link->manager);
+
+ link_set_dhcp_routes(link);
+
+ if (link->dhcp4_messages == 0) {
+ link->dhcp4_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int dhcp4_update_address(Link *link,
+ struct in_addr *address,
+ struct in_addr *netmask,
+ uint32_t lifetime) {
+ _cleanup_(address_freep) Address *addr = NULL;
+ unsigned prefixlen;
+ int r;
+
+ assert(address);
+ assert(netmask);
+ assert(lifetime);
+
+ prefixlen = in4_addr_netmask_to_prefixlen(netmask);
+
+ r = address_new(&addr);
+ if (r < 0)
+ return r;
+
+ addr->family = AF_INET;
+ addr->in_addr.in.s_addr = address->s_addr;
+ addr->cinfo.ifa_prefered = lifetime;
+ addr->cinfo.ifa_valid = lifetime;
+ addr->prefixlen = prefixlen;
+ addr->broadcast.s_addr = address->s_addr | ~netmask->s_addr;
+
+ /* allow reusing an existing address and simply update its lifetime
+ * in case it already exists */
+ r = address_configure(addr, link, dhcp4_address_handler, true);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int dhcp_lease_renew(sd_dhcp_client *client, Link *link) {
+ sd_dhcp_lease *lease;
+ struct in_addr address;
+ struct in_addr netmask;
+ uint32_t lifetime = CACHE_INFO_INFINITY_LIFE_TIME;
+ int r;
+
+ assert(link);
+ assert(client);
+ assert(link->network);
+
+ r = sd_dhcp_client_get_lease(client, &lease);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "DHCP error: no lease: %m");
+
+ sd_dhcp_lease_unref(link->dhcp_lease);
+ link->dhcp4_configured = false;
+ link->dhcp_lease = sd_dhcp_lease_ref(lease);
+ link_dirty(link);
+
+ r = sd_dhcp_lease_get_address(lease, &address);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "DHCP error: no address: %m");
+
+ r = sd_dhcp_lease_get_netmask(lease, &netmask);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "DHCP error: no netmask: %m");
+
+ if (!link->network->dhcp_critical) {
+ r = sd_dhcp_lease_get_lifetime(link->dhcp_lease, &lifetime);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "DHCP error: no lifetime: %m");
+ }
+
+ r = dhcp4_update_address(link, &address, &netmask, lifetime);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not update IP address: %m");
+ link_enter_failed(link);
+ return r;
+ }
+
+ return 0;
+}
+
+static int dhcp_lease_acquired(sd_dhcp_client *client, Link *link) {
+ sd_dhcp_lease *lease;
+ struct in_addr address;
+ struct in_addr netmask;
+ struct in_addr gateway;
+ unsigned prefixlen;
+ uint32_t lifetime = CACHE_INFO_INFINITY_LIFE_TIME;
+ int r;
+
+ assert(client);
+ assert(link);
+
+ r = sd_dhcp_client_get_lease(client, &lease);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP error: No lease: %m");
+
+ r = sd_dhcp_lease_get_address(lease, &address);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP error: No address: %m");
+
+ r = sd_dhcp_lease_get_netmask(lease, &netmask);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP error: No netmask: %m");
+
+ prefixlen = in4_addr_netmask_to_prefixlen(&netmask);
+
+ r = sd_dhcp_lease_get_router(lease, &gateway);
+ if (r < 0 && r != -ENODATA)
+ return log_link_error_errno(link, r, "DHCP error: Could not get gateway: %m");
+
+ if (r >= 0)
+ log_struct(LOG_INFO,
+ LOG_LINK_INTERFACE(link),
+ LOG_LINK_MESSAGE(link, "DHCPv4 address %u.%u.%u.%u/%u via %u.%u.%u.%u",
+ ADDRESS_FMT_VAL(address),
+ prefixlen,
+ ADDRESS_FMT_VAL(gateway)),
+ "ADDRESS=%u.%u.%u.%u", ADDRESS_FMT_VAL(address),
+ "PREFIXLEN=%u", prefixlen,
+ "GATEWAY=%u.%u.%u.%u", ADDRESS_FMT_VAL(gateway));
+ else
+ log_struct(LOG_INFO,
+ LOG_LINK_INTERFACE(link),
+ LOG_LINK_MESSAGE(link, "DHCPv4 address %u.%u.%u.%u/%u",
+ ADDRESS_FMT_VAL(address),
+ prefixlen),
+ "ADDRESS=%u.%u.%u.%u", ADDRESS_FMT_VAL(address),
+ "PREFIXLEN=%u", prefixlen);
+
+ link->dhcp_lease = sd_dhcp_lease_ref(lease);
+ link_dirty(link);
+
+ if (link->network->dhcp_use_mtu) {
+ uint16_t mtu;
+
+ r = sd_dhcp_lease_get_mtu(lease, &mtu);
+ if (r >= 0) {
+ r = link_set_mtu(link, mtu);
+ if (r < 0)
+ log_link_error_errno(link, r, "Failed to set MTU to %" PRIu16 ": %m", mtu);
+ }
+ }
+
+ if (link->network->dhcp_use_hostname) {
+ const char *dhcpname = NULL;
+ _cleanup_free_ char *hostname = NULL;
+
+ if (link->network->dhcp_hostname)
+ dhcpname = link->network->dhcp_hostname;
+ else
+ (void) sd_dhcp_lease_get_hostname(lease, &dhcpname);
+
+ if (dhcpname) {
+ r = shorten_overlong(dhcpname, &hostname);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Unable to shorten overlong DHCP hostname '%s', ignoring: %m", dhcpname);
+ if (r == 1)
+ log_link_notice(link, "Overlong DCHP hostname received, shortened from '%s' to '%s'", dhcpname, hostname);
+ }
+
+ if (hostname) {
+ r = manager_set_hostname(link->manager, hostname);
+ if (r < 0)
+ log_link_error_errno(link, r, "Failed to set transient hostname to '%s': %m", hostname);
+ }
+ }
+
+ if (link->network->dhcp_use_timezone) {
+ const char *tz = NULL;
+
+ (void) sd_dhcp_lease_get_timezone(link->dhcp_lease, &tz);
+
+ if (tz) {
+ r = manager_set_timezone(link->manager, tz);
+ if (r < 0)
+ log_link_error_errno(link, r, "Failed to set timezone to '%s': %m", tz);
+ }
+ }
+
+ if (!link->network->dhcp_critical) {
+ r = sd_dhcp_lease_get_lifetime(link->dhcp_lease, &lifetime);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "DHCP error: no lifetime: %m");
+ }
+
+ r = dhcp4_update_address(link, &address, &netmask, lifetime);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not update IP address: %m");
+ link_enter_failed(link);
+ return r;
+ }
+
+ return 0;
+}
+static void dhcp4_handler(sd_dhcp_client *client, int event, void *userdata) {
+ Link *link = userdata;
+ int r = 0;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return;
+
+ switch (event) {
+ case SD_DHCP_CLIENT_EVENT_EXPIRED:
+ case SD_DHCP_CLIENT_EVENT_STOP:
+ case SD_DHCP_CLIENT_EVENT_IP_CHANGE:
+ if (link->network->dhcp_critical) {
+ log_link_error(link, "DHCPv4 connection considered system critical, ignoring request to reconfigure it.");
+ return;
+ }
+
+ if (link->dhcp_lease) {
+ r = dhcp_lease_lost(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+ }
+
+ if (event == SD_DHCP_CLIENT_EVENT_IP_CHANGE) {
+ r = dhcp_lease_acquired(client, link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+ }
+
+ break;
+ case SD_DHCP_CLIENT_EVENT_RENEW:
+ r = dhcp_lease_renew(client, link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+ break;
+ case SD_DHCP_CLIENT_EVENT_IP_ACQUIRE:
+ r = dhcp_lease_acquired(client, link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+ break;
+ default:
+ if (event < 0)
+ log_link_warning_errno(link, event, "DHCP error: Client failed: %m");
+ else
+ log_link_warning(link, "DHCP unknown event: %i", event);
+ break;
+ }
+
+ return;
+}
+
+static int dhcp4_set_hostname(Link *link) {
+ _cleanup_free_ char *hostname = NULL;
+ const char *hn;
+ int r;
+
+ assert(link);
+
+ if (!link->network->dhcp_send_hostname)
+ hn = NULL;
+ else if (link->network->dhcp_hostname)
+ hn = link->network->dhcp_hostname;
+ else {
+ r = gethostname_strict(&hostname);
+ if (r < 0 && r != -ENXIO) /* ENXIO: no hostname set or hostname is "localhost" */
+ return r;
+
+ hn = hostname;
+ }
+
+ r = sd_dhcp_client_set_hostname(link->dhcp_client, hn);
+ if (r == -EINVAL && hostname)
+ /* Ignore error when the machine's hostname is not suitable to send in DHCP packet. */
+ log_link_warning_errno(link, r, "DHCP4 CLIENT: Failed to set hostname from kernel hostname, ignoring: %m");
+ else if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set hostname: %m");
+
+ return 0;
+}
+
+static bool promote_secondaries_enabled(const char *ifname) {
+ _cleanup_free_ char *promote_secondaries_sysctl = NULL;
+ char *promote_secondaries_path;
+ int r;
+
+ promote_secondaries_path = strjoina("net/ipv4/conf/", ifname, "/promote_secondaries");
+ r = sysctl_read(promote_secondaries_path, &promote_secondaries_sysctl);
+ if (r < 0) {
+ log_debug_errno(r, "Cannot read sysctl %s", promote_secondaries_path);
+ return false;
+ }
+
+ truncate_nl(promote_secondaries_sysctl);
+ r = parse_boolean(promote_secondaries_sysctl);
+ if (r < 0)
+ log_warning_errno(r, "Cannot parse sysctl %s with content %s as boolean", promote_secondaries_path, promote_secondaries_sysctl);
+ return r > 0;
+}
+
+/* dhcp4_set_promote_secondaries will ensure this interface has
+ * the "promote_secondaries" option in the kernel set. If this sysctl
+ * is not set DHCP will work only as long as the IP address does not
+ * changes between leases. The kernel will remove all secondary IP
+ * addresses of an interface otherwise. The way systemd-network works
+ * is that the new IP of a lease is added as a secondary IP and when
+ * the primary one expires it relies on the kernel to promote the
+ * secondary IP. See also https://github.com/systemd/systemd/issues/7163
+ */
+int dhcp4_set_promote_secondaries(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->network->dhcp & ADDRESS_FAMILY_IPV4);
+
+ /* check if the kernel has promote_secondaries enabled for our
+ * interface. If it is not globally enabled or enabled for the
+ * specific interface we must either enable it.
+ */
+ if (!(promote_secondaries_enabled("all") || promote_secondaries_enabled(link->ifname))) {
+ char *promote_secondaries_path = NULL;
+
+ log_link_debug(link, "promote_secondaries is unset, setting it");
+ promote_secondaries_path = strjoina("net/ipv4/conf/", link->ifname, "/promote_secondaries");
+ r = sysctl_write(promote_secondaries_path, "1");
+ if (r < 0)
+ log_link_warning_errno(link, r, "cannot set sysctl %s to 1", promote_secondaries_path);
+ return r > 0;
+ }
+
+ return 0;
+}
+
+int dhcp4_set_client_identifier(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->dhcp_client);
+
+ switch (link->network->dhcp_client_identifier) {
+ case DHCP_CLIENT_ID_DUID: {
+ /* If configured, apply user specified DUID and IAID */
+ const DUID *duid = link_get_duid(link);
+
+ if (duid->type == DUID_TYPE_LLT && duid->raw_data_len == 0)
+ r = sd_dhcp_client_set_iaid_duid_llt(link->dhcp_client,
+ link->network->iaid_set,
+ link->network->iaid,
+ duid->llt_time);
+ else
+ r = sd_dhcp_client_set_iaid_duid(link->dhcp_client,
+ link->network->iaid_set,
+ link->network->iaid,
+ duid->type,
+ duid->raw_data_len > 0 ? duid->raw_data : NULL,
+ duid->raw_data_len);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set IAID+DUID: %m");
+ break;
+ }
+ case DHCP_CLIENT_ID_DUID_ONLY: {
+ /* If configured, apply user specified DUID */
+ const DUID *duid = link_get_duid(link);
+
+ if (duid->type == DUID_TYPE_LLT && duid->raw_data_len == 0)
+ r = sd_dhcp_client_set_duid_llt(link->dhcp_client,
+ duid->llt_time);
+ else
+ r = sd_dhcp_client_set_duid(link->dhcp_client,
+ duid->type,
+ duid->raw_data_len > 0 ? duid->raw_data : NULL,
+ duid->raw_data_len);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set DUID: %m");
+ break;
+ }
+ case DHCP_CLIENT_ID_MAC:
+ r = sd_dhcp_client_set_client_id(link->dhcp_client,
+ ARPHRD_ETHER,
+ (const uint8_t *) &link->mac,
+ sizeof(link->mac));
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set client ID: %m");
+ break;
+ default:
+ assert_not_reached("Unknown client identifier type.");
+ }
+
+ return 0;
+}
+
+int dhcp4_configure(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->network->dhcp & ADDRESS_FAMILY_IPV4);
+
+ if (!link->dhcp_client) {
+ r = sd_dhcp_client_new(&link->dhcp_client, link->network->dhcp_anonymize);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to create DHCP4 client: %m");
+ }
+
+ r = sd_dhcp_client_attach_event(link->dhcp_client, NULL, 0);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to attach event: %m");
+
+ r = sd_dhcp_client_set_mac(link->dhcp_client,
+ (const uint8_t *) &link->mac,
+ sizeof (link->mac), ARPHRD_ETHER);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set MAC address: %m");
+
+ r = sd_dhcp_client_set_ifindex(link->dhcp_client, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set ifindex: %m");
+
+ r = sd_dhcp_client_set_callback(link->dhcp_client, dhcp4_handler, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set callback: %m");
+
+ r = sd_dhcp_client_set_request_broadcast(link->dhcp_client,
+ link->network->dhcp_broadcast);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for broadcast: %m");
+
+ if (link->mtu) {
+ r = sd_dhcp_client_set_mtu(link->dhcp_client, link->mtu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set MTU: %m");
+ }
+
+ if (link->network->dhcp_use_mtu) {
+ r = sd_dhcp_client_set_request_option(link->dhcp_client,
+ SD_DHCP_OPTION_INTERFACE_MTU);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for MTU: %m");
+ }
+
+ /* NOTE: even if this variable is called "use", it also "sends" PRL
+ * options, maybe there should be a different configuration variable
+ * to send or not route options?. */
+ /* NOTE: when using Anonymize=yes, routes PRL options are sent
+ * by default, so they don't need to be added here. */
+ if (link->network->dhcp_use_routes && !link->network->dhcp_anonymize) {
+ r = sd_dhcp_client_set_request_option(link->dhcp_client,
+ SD_DHCP_OPTION_STATIC_ROUTE);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for static route: %m");
+
+ r = sd_dhcp_client_set_request_option(link->dhcp_client,
+ SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for classless static route: %m");
+ }
+
+ if (link->network->dhcp_use_ntp) {
+ r = sd_dhcp_client_set_request_option(link->dhcp_client, SD_DHCP_OPTION_NTP_SERVER);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for NTP server: %m");
+ }
+
+ if (link->network->dhcp_use_timezone) {
+ r = sd_dhcp_client_set_request_option(link->dhcp_client, SD_DHCP_OPTION_NEW_TZDB_TIMEZONE);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for timezone: %m");
+ }
+
+ r = dhcp4_set_hostname(link);
+ if (r < 0)
+ return r;
+
+ if (link->network->dhcp_vendor_class_identifier) {
+ r = sd_dhcp_client_set_vendor_class_identifier(link->dhcp_client,
+ link->network->dhcp_vendor_class_identifier);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set vendor class identifier: %m");
+ }
+
+ if (link->network->dhcp_user_class) {
+ r = sd_dhcp_client_set_user_class(link->dhcp_client, (const char **) link->network->dhcp_user_class);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set user class: %m");
+ }
+
+ if (link->network->dhcp_client_port) {
+ r = sd_dhcp_client_set_client_port(link->dhcp_client, link->network->dhcp_client_port);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set listen port: %m");
+ }
+
+ return dhcp4_set_client_identifier(link);
+}
diff --git a/src/network/networkd-dhcp6.c b/src/network/networkd-dhcp6.c
new file mode 100644
index 0000000..c1fba03
--- /dev/null
+++ b/src/network/networkd-dhcp6.c
@@ -0,0 +1,702 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/ether.h>
+#include <linux/if.h>
+#include "sd-radv.h"
+
+#include "sd-dhcp6-client.h"
+
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "missing_network.h"
+#include "network-internal.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "siphash24.h"
+#include "string-util.h"
+#include "radv-internal.h"
+
+static int dhcp6_lease_address_acquired(sd_dhcp6_client *client, Link *link);
+
+static bool dhcp6_get_prefix_delegation(Link *link) {
+ if (!link->network)
+ return false;
+
+ return IN_SET(link->network->router_prefix_delegation,
+ RADV_PREFIX_DELEGATION_DHCP6,
+ RADV_PREFIX_DELEGATION_BOTH);
+}
+
+static bool dhcp6_enable_prefix_delegation(Link *dhcp6_link) {
+ Manager *manager;
+ Link *l;
+ Iterator i;
+
+ assert(dhcp6_link);
+
+ manager = dhcp6_link->manager;
+ assert(manager);
+
+ HASHMAP_FOREACH(l, manager->links, i) {
+ if (l == dhcp6_link)
+ continue;
+
+ if (!dhcp6_get_prefix_delegation(l))
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
+static int dhcp6_lease_information_acquired(sd_dhcp6_client *client,
+ Link *link) {
+ return 0;
+}
+
+static int dhcp6_pd_prefix_assign(Link *link, struct in6_addr *prefix,
+ uint8_t prefix_len,
+ uint32_t lifetime_preferred,
+ uint32_t lifetime_valid) {
+ sd_radv *radv = link->radv;
+ int r;
+ _cleanup_(sd_radv_prefix_unrefp) sd_radv_prefix *p = NULL;
+
+ r = sd_radv_prefix_new(&p);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_prefix_set_prefix(p, prefix, prefix_len);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_prefix_set_preferred_lifetime(p, lifetime_preferred);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_prefix_set_valid_lifetime(p, lifetime_valid);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_stop(radv);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_add_prefix(radv, p, true);
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ r = manager_dhcp6_prefix_add(link->manager, &p->opt.in6_addr, link);
+ if (r < 0)
+ return r;
+
+ return sd_radv_start(radv);
+}
+
+static int dhcp6_route_remove_handler(sd_netlink *nl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_debug_errno(link, r, "Received error on unreachable route removal for DHCPv6 delegated subnetl: %m");
+
+ return 1;
+}
+
+int dhcp6_lease_pd_prefix_lost(sd_dhcp6_client *client, Link* link) {
+ int r;
+ sd_dhcp6_lease *lease;
+ union in_addr_union pd_prefix;
+ uint8_t pd_prefix_len;
+ uint32_t lifetime_preferred, lifetime_valid;
+
+ r = sd_dhcp6_client_get_lease(client, &lease);
+ if (r < 0)
+ return r;
+
+ sd_dhcp6_lease_reset_pd_prefix_iter(lease);
+
+ while (sd_dhcp6_lease_get_pd(lease, &pd_prefix.in6, &pd_prefix_len,
+ &lifetime_preferred,
+ &lifetime_valid) >= 0) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_free_ Route *route = NULL;
+
+ if (pd_prefix_len > 64)
+ continue;
+
+ (void) in_addr_to_string(AF_INET6, &pd_prefix, &buf);
+
+ if (pd_prefix_len < 64) {
+ r = route_new(&route);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Cannot create unreachable route to delete for DHCPv6 delegated subnet %s/%u: %m",
+ strnull(buf),
+ pd_prefix_len);
+ continue;
+ }
+
+ route_add(link, AF_INET6, &pd_prefix, pd_prefix_len,
+ 0, 0, 0, &route);
+ route_update(route, NULL, 0, NULL, NULL, 0, 0,
+ RTN_UNREACHABLE);
+
+ r = route_remove(route, link, dhcp6_route_remove_handler);
+ if (r < 0) {
+ (void) in_addr_to_string(AF_INET6,
+ &pd_prefix, &buf);
+
+ log_link_warning_errno(link, r, "Cannot delete unreachable route for DHCPv6 delegated subnet %s/%u: %m",
+ strnull(buf),
+ pd_prefix_len);
+
+ continue;
+ }
+
+ log_link_debug(link, "Removing unreachable route %s/%u",
+ strnull(buf), pd_prefix_len);
+ }
+ }
+
+ return 0;
+}
+
+static int dhcp6_pd_prefix_distribute(Link *dhcp6_link, Iterator *i,
+ struct in6_addr *pd_prefix,
+ uint8_t pd_prefix_len,
+ uint32_t lifetime_preferred,
+ uint32_t lifetime_valid) {
+ Link *link;
+ Manager *manager = dhcp6_link->manager;
+ union in_addr_union prefix;
+ uint64_t n_prefixes, n_used = 0;
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_free_ char *assigned_buf = NULL;
+ int r;
+
+ assert(manager);
+ assert(pd_prefix_len <= 64);
+
+ prefix.in6 = *pd_prefix;
+
+ r = in_addr_mask(AF_INET6, &prefix, pd_prefix_len);
+ if (r < 0)
+ return r;
+
+ n_prefixes = UINT64_C(1) << (64 - pd_prefix_len);
+
+ (void) in_addr_to_string(AF_INET6, &prefix, &buf);
+ log_link_debug(dhcp6_link, "Assigning up to %" PRIu64 " prefixes from %s/%u",
+ n_prefixes, strnull(buf), pd_prefix_len);
+
+ while (hashmap_iterate(manager->links, i, (void **)&link, NULL)) {
+ Link *assigned_link;
+
+ if (n_used == n_prefixes) {
+ log_link_debug(dhcp6_link, "Assigned %" PRIu64 "/%" PRIu64 " prefixes from %s/%u",
+ n_used, n_prefixes, strnull(buf), pd_prefix_len);
+
+ return -EAGAIN;
+ }
+
+ if (link == dhcp6_link)
+ continue;
+
+ if (!dhcp6_get_prefix_delegation(link))
+ continue;
+
+ assigned_link = manager_dhcp6_prefix_get(manager, &prefix.in6);
+ if (assigned_link != NULL && assigned_link != link)
+ continue;
+
+ (void) in_addr_to_string(AF_INET6, &prefix, &assigned_buf);
+ r = dhcp6_pd_prefix_assign(link, &prefix.in6, 64,
+ lifetime_preferred, lifetime_valid);
+ if (r < 0) {
+ log_link_error_errno(link, r, "Unable to %s prefix %s/64 from %s/%u for link: %m",
+ assigned_link ? "update": "assign",
+ strnull(assigned_buf),
+ strnull(buf), pd_prefix_len);
+
+ if (assigned_link == NULL)
+ continue;
+
+ } else
+ log_link_debug(link, "Assigned prefix %" PRIu64 "/%" PRIu64 " %s/64 from %s/%u to link",
+ n_used + 1, n_prefixes,
+ strnull(assigned_buf),
+ strnull(buf), pd_prefix_len);
+
+ n_used++;
+
+ r = in_addr_prefix_next(AF_INET6, &prefix, 64);
+ if (r < 0 && n_used < n_prefixes)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dhcp6_route_handler(sd_netlink *nl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_debug_errno(link, r, "Received error when adding unreachable route for DHCPv6 delegated subnet: %m");
+
+ return 1;
+}
+
+static int dhcp6_lease_pd_prefix_acquired(sd_dhcp6_client *client, Link *link) {
+ int r;
+ sd_dhcp6_lease *lease;
+ union in_addr_union pd_prefix;
+ uint8_t pd_prefix_len;
+ uint32_t lifetime_preferred, lifetime_valid;
+ _cleanup_free_ char *buf = NULL;
+ Iterator i = ITERATOR_FIRST;
+
+ r = sd_dhcp6_client_get_lease(client, &lease);
+ if (r < 0)
+ return r;
+
+ sd_dhcp6_lease_reset_pd_prefix_iter(lease);
+
+ while (sd_dhcp6_lease_get_pd(lease, &pd_prefix.in6, &pd_prefix_len,
+ &lifetime_preferred,
+ &lifetime_valid) >= 0) {
+
+ if (pd_prefix_len > 64) {
+ (void) in_addr_to_string(AF_INET6, &pd_prefix, &buf);
+ log_link_debug(link, "PD Prefix length > 64, ignoring prefix %s/%u",
+ strnull(buf), pd_prefix_len);
+ continue;
+ }
+
+ if (pd_prefix_len < 48) {
+ (void) in_addr_to_string(AF_INET6, &pd_prefix, &buf);
+ log_link_warning(link, "PD Prefix length < 48, looks unusual %s/%u",
+ strnull(buf), pd_prefix_len);
+ }
+
+ if (pd_prefix_len < 64) {
+ Route *route = NULL;
+
+ (void) in_addr_to_string(AF_INET6, &pd_prefix, &buf);
+
+ r = route_new(&route);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Cannot create unreachable route for DHCPv6 delegated subnet %s/%u: %m",
+ strnull(buf),
+ pd_prefix_len);
+ continue;
+ }
+
+ route_add(link, AF_INET6, &pd_prefix, pd_prefix_len,
+ 0, 0, 0, &route);
+ route_update(route, NULL, 0, NULL, NULL, 0, 0,
+ RTN_UNREACHABLE);
+
+ r = route_configure(route, link, dhcp6_route_handler);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Cannot configure unreachable route for delegated subnet %s/%u: %m",
+ strnull(buf),
+ pd_prefix_len);
+ route_free(route);
+ continue;
+ }
+
+ route_free(route);
+
+ log_link_debug(link, "Configuring unreachable route for %s/%u",
+ strnull(buf), pd_prefix_len);
+
+ } else
+ log_link_debug(link, "Not adding a blocking route since distributed prefix is /64");
+
+ r = dhcp6_pd_prefix_distribute(link, &i, &pd_prefix.in6,
+ pd_prefix_len,
+ lifetime_preferred,
+ lifetime_valid);
+ if (r < 0 && r != -EAGAIN)
+ return r;
+
+ if (r >= 0)
+ i = ITERATOR_FIRST;
+ }
+
+ return 0;
+}
+
+int dhcp6_request_prefix_delegation(Link *link) {
+ Link *l;
+ Iterator i;
+
+ assert_return(link, -EINVAL);
+ assert_return(link->manager, -EOPNOTSUPP);
+
+ if (dhcp6_get_prefix_delegation(link) <= 0)
+ return 0;
+
+ log_link_debug(link, "Requesting DHCPv6 prefixes to be delegated for new link");
+
+ HASHMAP_FOREACH(l, link->manager->links, i) {
+ int r, enabled;
+
+ if (l == link)
+ continue;
+
+ if (!l->dhcp6_client)
+ continue;
+
+ r = sd_dhcp6_client_get_prefix_delegation(l->dhcp6_client, &enabled);
+ if (r < 0) {
+ log_link_warning_errno(l, r, "Cannot get prefix delegation when adding new link");
+ continue;
+ }
+
+ if (enabled == 0) {
+ r = sd_dhcp6_client_set_prefix_delegation(l->dhcp6_client, 1);
+ if (r < 0) {
+ log_link_warning_errno(l, r, "Cannot enable prefix delegation when adding new link");
+ continue;
+ }
+ }
+
+ r = sd_dhcp6_client_is_running(l->dhcp6_client);
+ if (r <= 0)
+ continue;
+
+ if (enabled != 0) {
+ log_link_debug(l, "Requesting re-assignment of delegated prefixes after adding new link");
+ (void) dhcp6_lease_pd_prefix_acquired(l->dhcp6_client, l);
+
+ continue;
+ }
+
+ r = sd_dhcp6_client_stop(l->dhcp6_client);
+ if (r < 0) {
+ log_link_warning_errno(l, r, "Cannot stop DHCPv6 prefix delegation client after adding new link");
+ continue;
+ }
+
+ r = sd_dhcp6_client_start(l->dhcp6_client);
+ if (r < 0) {
+ log_link_warning_errno(l, r, "Cannot restart DHCPv6 prefix delegation client after adding new link");
+ continue;
+ }
+
+ log_link_debug(l, "Restarted DHCPv6 client to acquire prefix delegations after adding new link");
+ }
+
+ return 0;
+}
+
+static int dhcp6_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ if (link->rtnl_extended_attrs) {
+ log_link_warning(link, "Could not set extended netlink attributes, reverting to fallback mechanism");
+
+ link->rtnl_extended_attrs = false;
+ dhcp6_lease_address_acquired(link->dhcp6_client, link);
+
+ return 1;
+ }
+
+ log_link_error_errno(link, r, "Could not set DHCPv6 address: %m");
+
+ link_enter_failed(link);
+
+ } else if (r >= 0)
+ manager_rtnl_process_address(rtnl, m, link->manager);
+
+ return 1;
+}
+
+static int dhcp6_address_change(
+ Link *link,
+ struct in6_addr *ip6_addr,
+ uint32_t lifetime_preferred,
+ uint32_t lifetime_valid) {
+
+ _cleanup_(address_freep) Address *addr = NULL;
+ char buffer[INET6_ADDRSTRLEN];
+ int r;
+
+ r = address_new(&addr);
+ if (r < 0)
+ return r;
+
+ addr->family = AF_INET6;
+ memcpy(&addr->in_addr.in6, ip6_addr, sizeof(*ip6_addr));
+
+ addr->flags = IFA_F_NOPREFIXROUTE;
+ addr->prefixlen = 128;
+
+ addr->cinfo.ifa_prefered = lifetime_preferred;
+ addr->cinfo.ifa_valid = lifetime_valid;
+
+ log_link_info(link,
+ "DHCPv6 address %s/%d timeout preferred %d valid %d",
+ inet_ntop(AF_INET6, &addr->in_addr.in6, buffer, sizeof(buffer)),
+ addr->prefixlen, lifetime_preferred, lifetime_valid);
+
+ r = address_configure(addr, link, dhcp6_address_handler, true);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not assign DHCPv6 address: %m");
+
+ return r;
+}
+
+static int dhcp6_lease_address_acquired(sd_dhcp6_client *client, Link *link) {
+ int r;
+ sd_dhcp6_lease *lease;
+ struct in6_addr ip6_addr;
+ uint32_t lifetime_preferred, lifetime_valid;
+
+ r = sd_dhcp6_client_get_lease(client, &lease);
+ if (r < 0)
+ return r;
+
+ sd_dhcp6_lease_reset_address_iter(lease);
+
+ while (sd_dhcp6_lease_get_address(lease, &ip6_addr,
+ &lifetime_preferred,
+ &lifetime_valid) >= 0) {
+
+ r = dhcp6_address_change(link, &ip6_addr, lifetime_preferred, lifetime_valid);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static void dhcp6_handler(sd_dhcp6_client *client, int event, void *userdata) {
+ int r;
+ Link *link = userdata;
+
+ assert(link);
+ assert(link->network);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return;
+
+ switch(event) {
+ case SD_DHCP6_CLIENT_EVENT_STOP:
+ case SD_DHCP6_CLIENT_EVENT_RESEND_EXPIRE:
+ case SD_DHCP6_CLIENT_EVENT_RETRANS_MAX:
+ if (sd_dhcp6_client_get_lease(client, NULL) >= 0)
+ log_link_warning(link, "DHCPv6 lease lost");
+
+ (void) dhcp6_lease_pd_prefix_lost(client, link);
+ (void) manager_dhcp6_prefix_remove_all(link->manager, link);
+
+ link->dhcp6_configured = false;
+ break;
+
+ case SD_DHCP6_CLIENT_EVENT_IP_ACQUIRE:
+ r = dhcp6_lease_address_acquired(client, link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+
+ r = dhcp6_lease_pd_prefix_acquired(client, link);
+ if (r < 0)
+ log_link_debug(link, "DHCPv6 did not receive prefixes to delegate");
+
+ _fallthrough_;
+ case SD_DHCP6_CLIENT_EVENT_INFORMATION_REQUEST:
+ r = dhcp6_lease_information_acquired(client, link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+
+ link->dhcp6_configured = true;
+ break;
+
+ default:
+ if (event < 0)
+ log_link_warning_errno(link, event, "DHCPv6 error: %m");
+ else
+ log_link_warning(link, "DHCPv6 unknown event: %d", event);
+ return;
+ }
+
+ link_check_ready(link);
+}
+
+int dhcp6_request_address(Link *link, int ir) {
+ int r, inf_req, pd;
+ bool running;
+
+ assert(link);
+ assert(link->dhcp6_client);
+ assert(link->network);
+ assert(in_addr_is_link_local(AF_INET6, (const union in_addr_union*)&link->ipv6ll_address) > 0);
+
+ r = sd_dhcp6_client_is_running(link->dhcp6_client);
+ if (r < 0)
+ return r;
+ else
+ running = r;
+
+ r = sd_dhcp6_client_get_prefix_delegation(link->dhcp6_client, &pd);
+ if (r < 0)
+ return r;
+
+ if (pd && ir && link->network->dhcp6_force_pd_other_information) {
+ log_link_debug(link, "Enabling managed mode to request DHCPv6 PD with 'Other Information' set");
+
+ r = sd_dhcp6_client_set_address_request(link->dhcp6_client,
+ false);
+ if (r < 0 )
+ return r;
+
+ ir = false;
+ }
+
+ if (running) {
+ r = sd_dhcp6_client_get_information_request(link->dhcp6_client, &inf_req);
+ if (r < 0)
+ return r;
+
+ if (inf_req == ir)
+ return 0;
+
+ r = sd_dhcp6_client_stop(link->dhcp6_client);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_dhcp6_client_set_local_address(link->dhcp6_client, &link->ipv6ll_address);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_dhcp6_client_set_information_request(link->dhcp6_client, ir);
+ if (r < 0)
+ return r;
+
+ r = sd_dhcp6_client_start(link->dhcp6_client);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int dhcp6_set_hostname(sd_dhcp6_client *client, Link *link) {
+ _cleanup_free_ char *hostname = NULL;
+ const char *hn;
+ int r;
+
+ assert(link);
+
+ if (!link->network->dhcp_send_hostname)
+ hn = NULL;
+ else if (link->network->dhcp_hostname)
+ hn = link->network->dhcp_hostname;
+ else {
+ r = gethostname_strict(&hostname);
+ if (r < 0 && r != -ENXIO) /* ENXIO: no hostname set or hostname is "localhost" */
+ return r;
+
+ hn = hostname;
+ }
+
+ r = sd_dhcp6_client_set_fqdn(client, hn);
+ if (r == -EINVAL && hostname)
+ /* Ignore error when the machine's hostname is not suitable to send in DHCP packet. */
+ log_link_warning_errno(link, r, "DHCP6 CLIENT: Failed to set hostname from kernel hostname, ignoring: %m");
+ else if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set hostname: %m");
+
+ return 0;
+}
+
+int dhcp6_configure(Link *link) {
+ _cleanup_(sd_dhcp6_client_unrefp) sd_dhcp6_client *client = NULL;
+ const DUID *duid;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ if (link->dhcp6_client)
+ return 0;
+
+ r = sd_dhcp6_client_new(&client);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to create DHCP6 client: %m");
+
+ r = sd_dhcp6_client_attach_event(client, NULL, 0);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to attach event: %m");
+
+ r = sd_dhcp6_client_set_mac(client,
+ (const uint8_t *) &link->mac,
+ sizeof (link->mac), ARPHRD_ETHER);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set MAC address: %m");
+
+ if (link->network->iaid_set) {
+ r = sd_dhcp6_client_set_iaid(client, link->network->iaid);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set IAID: %m");
+ }
+
+ duid = link_get_duid(link);
+ if (duid->type == DUID_TYPE_LLT && duid->raw_data_len == 0)
+ r = sd_dhcp6_client_set_duid_llt(client, duid->llt_time);
+ else
+ r = sd_dhcp6_client_set_duid(client,
+ duid->type,
+ duid->raw_data_len > 0 ? duid->raw_data : NULL,
+ duid->raw_data_len);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set DUID: %m");
+
+ r = dhcp6_set_hostname(client, link);
+ if (r < 0)
+ return r;
+
+ r = sd_dhcp6_client_set_ifindex(client, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set ifindex: %m");
+
+ if (link->network->rapid_commit) {
+ r = sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_RAPID_COMMIT);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set request flag for rapid commit: %m");
+ }
+
+ r = sd_dhcp6_client_set_callback(client, dhcp6_handler, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set callback: %m");
+
+ if (dhcp6_enable_prefix_delegation(link)) {
+ r = sd_dhcp6_client_set_prefix_delegation(client, true);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set prefix delegation: %m");
+ }
+
+ link->dhcp6_client = TAKE_PTR(client);
+
+ return 0;
+}
diff --git a/src/network/networkd-fdb.c b/src/network/networkd-fdb.c
new file mode 100644
index 0000000..324f6a5
--- /dev/null
+++ b/src/network/networkd-fdb.c
@@ -0,0 +1,260 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <net/ethernet.h>
+#include <net/if.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "netdev/bridge.h"
+#include "netlink-util.h"
+#include "networkd-fdb.h"
+#include "networkd-manager.h"
+#include "util.h"
+#include "vlan-util.h"
+
+#define STATIC_FDB_ENTRIES_PER_NETWORK_MAX 1024U
+
+/* create a new FDB entry or get an existing one. */
+int fdb_entry_new_static(
+ Network *network,
+ const char *filename,
+ unsigned section_line,
+ FdbEntry **ret) {
+
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(fdb_entry_freep) FdbEntry *fdb_entry = NULL;
+ _cleanup_free_ struct ether_addr *mac_addr = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(!!filename == (section_line > 0));
+
+ /* search entry in hashmap first. */
+ if (filename) {
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ fdb_entry = hashmap_get(network->fdb_entries_by_section, n);
+ if (fdb_entry) {
+ *ret = TAKE_PTR(fdb_entry);
+
+ return 0;
+ }
+ }
+
+ if (network->n_static_fdb_entries >= STATIC_FDB_ENTRIES_PER_NETWORK_MAX)
+ return -E2BIG;
+
+ /* allocate space for MAC address. */
+ mac_addr = new0(struct ether_addr, 1);
+ if (!mac_addr)
+ return -ENOMEM;
+
+ /* allocate space for and FDB entry. */
+ fdb_entry = new(FdbEntry, 1);
+ if (!fdb_entry)
+ return -ENOMEM;
+
+ /* init FDB structure. */
+ *fdb_entry = (FdbEntry) {
+ .network = network,
+ .mac_addr = TAKE_PTR(mac_addr),
+ };
+
+ LIST_PREPEND(static_fdb_entries, network->static_fdb_entries, fdb_entry);
+ network->n_static_fdb_entries++;
+
+ if (filename) {
+ fdb_entry->section = TAKE_PTR(n);
+
+ r = hashmap_ensure_allocated(&network->fdb_entries_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->fdb_entries_by_section, fdb_entry->section, fdb_entry);
+ if (r < 0)
+ return r;
+ }
+
+ /* return allocated FDB structure. */
+ *ret = TAKE_PTR(fdb_entry);
+
+ return 0;
+}
+
+static int set_fdb_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_error_errno(link, r, "Could not add FDB entry: %m");
+
+ return 1;
+}
+
+/* send a request to the kernel to add a FDB entry in its static MAC table. */
+int fdb_entry_configure(Link *link, FdbEntry *fdb_entry) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ sd_netlink *rtnl;
+ int r;
+ uint8_t flags;
+ Bridge *bridge;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(fdb_entry);
+
+ rtnl = link->manager->rtnl;
+ bridge = BRIDGE(link->network->bridge);
+
+ /* create new RTM message */
+ r = sd_rtnl_message_new_neigh(rtnl, &req, RTM_NEWNEIGH, link->ifindex, PF_BRIDGE);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ if (bridge)
+ flags = NTF_MASTER;
+ else
+ flags = NTF_SELF;
+
+ r = sd_rtnl_message_neigh_set_flags(req, flags);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ /* only NUD_PERMANENT state supported. */
+ r = sd_rtnl_message_neigh_set_state(req, NUD_NOARP | NUD_PERMANENT);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_message_append_ether_addr(req, NDA_LLADDR, fdb_entry->mac_addr);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ /* VLAN Id is optional. We'll add VLAN Id only if it's specified. */
+ if (0 != fdb_entry->vlan_id) {
+ r = sd_netlink_message_append_u16(req, NDA_VLAN, fdb_entry->vlan_id);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+ }
+
+ /* send message to the kernel to update its internal static MAC table. */
+ r = netlink_call_async(rtnl, NULL, req, set_fdb_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+/* remove and FDB entry. */
+void fdb_entry_free(FdbEntry *fdb_entry) {
+ if (!fdb_entry)
+ return;
+
+ if (fdb_entry->network) {
+ LIST_REMOVE(static_fdb_entries, fdb_entry->network->static_fdb_entries, fdb_entry);
+ assert(fdb_entry->network->n_static_fdb_entries > 0);
+ fdb_entry->network->n_static_fdb_entries--;
+
+ if (fdb_entry->section)
+ hashmap_remove(fdb_entry->network->fdb_entries_by_section, fdb_entry->section);
+ }
+
+ network_config_section_free(fdb_entry->section);
+ free(fdb_entry->mac_addr);
+ free(fdb_entry);
+}
+
+/* parse the HW address from config files. */
+int config_parse_fdb_hwaddr(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(fdb_entry_freep) FdbEntry *fdb_entry = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = fdb_entry_new_static(network, filename, section_line, &fdb_entry);
+ if (r < 0)
+ return log_oom();
+
+ /* read in the MAC address for the FDB table. */
+ r = sscanf(rvalue, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx",
+ &fdb_entry->mac_addr->ether_addr_octet[0],
+ &fdb_entry->mac_addr->ether_addr_octet[1],
+ &fdb_entry->mac_addr->ether_addr_octet[2],
+ &fdb_entry->mac_addr->ether_addr_octet[3],
+ &fdb_entry->mac_addr->ether_addr_octet[4],
+ &fdb_entry->mac_addr->ether_addr_octet[5]);
+
+ if (r != ETHER_ADDR_LEN) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Not a valid MAC address, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ fdb_entry = NULL;
+
+ return 0;
+}
+
+/* parse the VLAN Id from config files. */
+int config_parse_fdb_vlan_id(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(fdb_entry_freep) FdbEntry *fdb_entry = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = fdb_entry_new_static(network, filename, section_line, &fdb_entry);
+ if (r < 0)
+ return log_oom();
+
+ r = config_parse_vlanid(unit, filename, line, section,
+ section_line, lvalue, ltype,
+ rvalue, &fdb_entry->vlan_id, userdata);
+ if (r < 0)
+ return r;
+
+ fdb_entry = NULL;
+
+ return 0;
+}
diff --git a/src/network/networkd-fdb.h b/src/network/networkd-fdb.h
new file mode 100644
index 0000000..55fc3a2
--- /dev/null
+++ b/src/network/networkd-fdb.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include "conf-parser.h"
+#include "list.h"
+#include "macro.h"
+
+typedef struct Network Network;
+typedef struct FdbEntry FdbEntry;
+typedef struct Link Link;
+typedef struct NetworkConfigSection NetworkConfigSection;
+
+struct FdbEntry {
+ Network *network;
+ NetworkConfigSection *section;
+
+ struct ether_addr *mac_addr;
+ uint16_t vlan_id;
+
+ LIST_FIELDS(FdbEntry, static_fdb_entries);
+};
+
+int fdb_entry_new_static(Network *network, const char *filename, unsigned section_line, FdbEntry **ret);
+void fdb_entry_free(FdbEntry *fdb_entry);
+int fdb_entry_configure(Link *link, FdbEntry *fdb_entry);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FdbEntry*, fdb_entry_free);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_fdb_hwaddr);
+CONFIG_PARSER_PROTOTYPE(config_parse_fdb_vlan_id);
diff --git a/src/network/networkd-gperf.gperf b/src/network/networkd-gperf.gperf
new file mode 100644
index 0000000..cc88fc0
--- /dev/null
+++ b/src/network/networkd-gperf.gperf
@@ -0,0 +1,22 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "networkd-conf.h"
+#include "networkd-manager.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name networkd_gperf_hash
+%define lookup-function-name networkd_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+DHCP.DUIDType, config_parse_duid_type, 0, offsetof(Manager, duid)
+DHCP.DUIDRawData, config_parse_duid_rawdata, 0, offsetof(Manager, duid)
diff --git a/src/network/networkd-ipv4ll.c b/src/network/networkd-ipv4ll.c
new file mode 100644
index 0000000..3562e90
--- /dev/null
+++ b/src/network/networkd-ipv4ll.c
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/ether.h>
+#include <linux/if.h>
+
+#include "network-internal.h"
+#include "networkd-address.h"
+#include "networkd-manager.h"
+#include "networkd-link.h"
+
+static int ipv4ll_address_lost(Link *link) {
+ _cleanup_(address_freep) Address *address = NULL;
+ _cleanup_(route_freep) Route *route = NULL;
+ struct in_addr addr;
+ int r;
+
+ assert(link);
+
+ link->ipv4ll_route = false;
+ link->ipv4ll_address = false;
+
+ r = sd_ipv4ll_get_address(link->ipv4ll, &addr);
+ if (r < 0)
+ return 0;
+
+ log_link_debug(link, "IPv4 link-local release %u.%u.%u.%u", ADDRESS_FMT_VAL(addr));
+
+ r = address_new(&address);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate address: %m");
+
+ address->family = AF_INET;
+ address->in_addr.in = addr;
+ address->prefixlen = 16;
+ address->scope = RT_SCOPE_LINK;
+
+ address_remove(address, link, NULL);
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate route: %m");
+
+ route->family = AF_INET;
+ route->scope = RT_SCOPE_LINK;
+ route->priority = IPV4LL_ROUTE_METRIC;
+
+ route_remove(route, link, NULL);
+
+ link_check_ready(link);
+
+ return 0;
+}
+
+static int ipv4ll_route_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(!link->ipv4ll_route);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_error_errno(link, r, "could not set ipv4ll route: %m");
+ link_enter_failed(link);
+ }
+
+ link->ipv4ll_route = true;
+
+ if (link->ipv4ll_address == true)
+ link_check_ready(link);
+
+ return 1;
+}
+
+static int ipv4ll_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(!link->ipv4ll_address);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_error_errno(link, r, "could not set ipv4ll address: %m");
+ link_enter_failed(link);
+ } else if (r >= 0)
+ manager_rtnl_process_address(rtnl, m, link->manager);
+
+ link->ipv4ll_address = true;
+
+ if (link->ipv4ll_route)
+ link_check_ready(link);
+
+ return 1;
+}
+
+static int ipv4ll_address_claimed(sd_ipv4ll *ll, Link *link) {
+ _cleanup_(address_freep) Address *ll_addr = NULL;
+ _cleanup_(route_freep) Route *route = NULL;
+ struct in_addr address;
+ int r;
+
+ assert(ll);
+ assert(link);
+
+ r = sd_ipv4ll_get_address(ll, &address);
+ if (r == -ENOENT)
+ return 0;
+ else if (r < 0)
+ return r;
+
+ log_link_debug(link, "IPv4 link-local claim %u.%u.%u.%u",
+ ADDRESS_FMT_VAL(address));
+
+ r = address_new(&ll_addr);
+ if (r < 0)
+ return r;
+
+ ll_addr->family = AF_INET;
+ ll_addr->in_addr.in = address;
+ ll_addr->prefixlen = 16;
+ ll_addr->broadcast.s_addr = ll_addr->in_addr.in.s_addr | htobe32(0xfffffffflu >> ll_addr->prefixlen);
+ ll_addr->scope = RT_SCOPE_LINK;
+
+ r = address_configure(ll_addr, link, ipv4ll_address_handler, false);
+ if (r < 0)
+ return r;
+
+ link->ipv4ll_address = false;
+
+ r = route_new(&route);
+ if (r < 0)
+ return r;
+
+ route->family = AF_INET;
+ route->scope = RT_SCOPE_LINK;
+ route->protocol = RTPROT_STATIC;
+ route->priority = IPV4LL_ROUTE_METRIC;
+
+ r = route_configure(route, link, ipv4ll_route_handler);
+ if (r < 0)
+ return r;
+
+ link->ipv4ll_route = false;
+
+ return 0;
+}
+
+static void ipv4ll_handler(sd_ipv4ll *ll, int event, void *userdata) {
+ Link *link = userdata;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return;
+
+ switch(event) {
+ case SD_IPV4LL_EVENT_STOP:
+ r = ipv4ll_address_lost(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+ break;
+ case SD_IPV4LL_EVENT_CONFLICT:
+ r = ipv4ll_address_lost(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+
+ r = sd_ipv4ll_restart(ll);
+ if (r < 0)
+ log_link_warning(link, "Could not acquire IPv4 link-local address");
+ break;
+ case SD_IPV4LL_EVENT_BIND:
+ r = ipv4ll_address_claimed(ll, link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+ break;
+ default:
+ log_link_warning(link, "IPv4 link-local unknown event: %d", event);
+ break;
+ }
+}
+
+int ipv4ll_configure(Link *link) {
+ uint64_t seed;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->network->link_local & ADDRESS_FAMILY_IPV4);
+
+ if (!link->ipv4ll) {
+ r = sd_ipv4ll_new(&link->ipv4ll);
+ if (r < 0)
+ return r;
+ }
+
+ if (link->sd_device &&
+ net_get_unique_predictable_data(link->sd_device, &seed) >= 0) {
+ r = sd_ipv4ll_set_address_seed(link->ipv4ll, seed);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_ipv4ll_attach_event(link->ipv4ll, NULL, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4ll_set_mac(link->ipv4ll, &link->mac);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4ll_set_ifindex(link->ipv4ll, link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4ll_set_callback(link->ipv4ll, ipv4ll_handler, link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
diff --git a/src/network/networkd-ipv6-proxy-ndp.c b/src/network/networkd-ipv6-proxy-ndp.c
new file mode 100644
index 0000000..f594b27
--- /dev/null
+++ b/src/network/networkd-ipv6-proxy-ndp.c
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/ether.h>
+#include <linux/if.h>
+#include <unistd.h>
+
+#include "fileio.h"
+#include "netlink-util.h"
+#include "networkd-ipv6-proxy-ndp.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "string-util.h"
+#include "socket-util.h"
+
+static bool ipv6_proxy_ndp_is_needed(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (link->network->ipv6_proxy_ndp >= 0)
+ return link->network->ipv6_proxy_ndp;
+
+ if (link->network->n_ipv6_proxy_ndp_addresses == 0)
+ return false;
+
+ return true;
+}
+
+static int ipv6_proxy_ndp_set(Link *link) {
+ const char *p = NULL;
+ int r, v;
+
+ assert(link);
+
+ if (!socket_ipv6_is_supported())
+ return 0;
+
+ v = ipv6_proxy_ndp_is_needed(link);
+ p = strjoina("/proc/sys/net/ipv6/conf/", link->ifname, "/proxy_ndp");
+
+ r = write_string_file(p, one_zero(v), WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot configure proxy NDP for interface: %m");
+
+ return 0;
+}
+
+int ipv6_proxy_ndp_address_new_static(Network *network, IPv6ProxyNDPAddress **ret) {
+ _cleanup_(ipv6_proxy_ndp_address_freep) IPv6ProxyNDPAddress *ipv6_proxy_ndp_address = NULL;
+
+ assert(network);
+ assert(ret);
+
+ /* allocate space for IPv6ProxyNDPAddress entry */
+ ipv6_proxy_ndp_address = new(IPv6ProxyNDPAddress, 1);
+ if (!ipv6_proxy_ndp_address)
+ return -ENOMEM;
+
+ *ipv6_proxy_ndp_address = (IPv6ProxyNDPAddress) {
+ .network = network,
+ };
+
+ LIST_PREPEND(ipv6_proxy_ndp_addresses, network->ipv6_proxy_ndp_addresses, ipv6_proxy_ndp_address);
+ network->n_ipv6_proxy_ndp_addresses++;
+
+ *ret = TAKE_PTR(ipv6_proxy_ndp_address);
+
+ return 0;
+}
+
+void ipv6_proxy_ndp_address_free(IPv6ProxyNDPAddress *ipv6_proxy_ndp_address) {
+ if (!ipv6_proxy_ndp_address)
+ return;
+
+ if (ipv6_proxy_ndp_address->network) {
+ LIST_REMOVE(ipv6_proxy_ndp_addresses, ipv6_proxy_ndp_address->network->ipv6_proxy_ndp_addresses,
+ ipv6_proxy_ndp_address);
+
+ assert(ipv6_proxy_ndp_address->network->n_ipv6_proxy_ndp_addresses > 0);
+ ipv6_proxy_ndp_address->network->n_ipv6_proxy_ndp_addresses--;
+ }
+
+ free(ipv6_proxy_ndp_address);
+}
+
+int config_parse_ipv6_proxy_ndp_address(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(ipv6_proxy_ndp_address_freep) IPv6ProxyNDPAddress *ipv6_proxy_ndp_address = NULL;
+ int r;
+ union in_addr_union buffer;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = ipv6_proxy_ndp_address_new_static(network, &ipv6_proxy_ndp_address);
+ if (r < 0)
+ return r;
+
+ r = in_addr_from_string(AF_INET6, rvalue, &buffer);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse IPv6 proxy NDP address, ignoring: %s",
+ rvalue);
+ return 0;
+ }
+
+ r = in_addr_is_null(AF_INET6, &buffer);
+ if (r != 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "IPv6 proxy NDP address cannot be the ANY address, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ ipv6_proxy_ndp_address->in_addr = buffer.in6;
+ ipv6_proxy_ndp_address = NULL;
+
+ return 0;
+}
+
+static int set_ipv6_proxy_ndp_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_error_errno(link, r, "Could not add IPv6 proxy ndp address entry: %m");
+
+ return 1;
+}
+
+/* send a request to the kernel to add a IPv6 Proxy entry to the neighbour table */
+int ipv6_proxy_ndp_address_configure(Link *link, IPv6ProxyNDPAddress *ipv6_proxy_ndp_address) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ sd_netlink *rtnl;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(ipv6_proxy_ndp_address);
+
+ rtnl = link->manager->rtnl;
+
+ /* create new netlink message */
+ r = sd_rtnl_message_new_neigh(rtnl, &req, RTM_NEWNEIGH, link->ifindex, AF_INET6);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_rtnl_message_neigh_set_flags(req, NLM_F_REQUEST | NTF_PROXY);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_message_append_in6_addr(req, NDA_DST, &ipv6_proxy_ndp_address->in_addr);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = netlink_call_async(rtnl, NULL, req, set_ipv6_proxy_ndp_address_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+/* configure all ipv6 proxy ndp addresses */
+int ipv6_proxy_ndp_addresses_configure(Link *link) {
+ IPv6ProxyNDPAddress *ipv6_proxy_ndp_address;
+ int r;
+
+ assert(link);
+
+ /* enable or disable proxy_ndp itself depending on whether ipv6_proxy_ndp_addresses are set or not */
+ r = ipv6_proxy_ndp_set(link);
+ if (r != 0)
+ return r;
+
+ LIST_FOREACH(ipv6_proxy_ndp_addresses, ipv6_proxy_ndp_address, link->network->ipv6_proxy_ndp_addresses) {
+ r = ipv6_proxy_ndp_address_configure(link, ipv6_proxy_ndp_address);
+ if (r != 0)
+ return r;
+ }
+ return 0;
+}
diff --git a/src/network/networkd-ipv6-proxy-ndp.h b/src/network/networkd-ipv6-proxy-ndp.h
new file mode 100644
index 0000000..546e856
--- /dev/null
+++ b/src/network/networkd-ipv6-proxy-ndp.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "conf-parser.h"
+#include "list.h"
+#include "macro.h"
+
+typedef struct Network Network;
+typedef struct IPv6ProxyNDPAddress IPv6ProxyNDPAddress;
+typedef struct Link Link;
+
+struct IPv6ProxyNDPAddress {
+ Network *network;
+ struct in6_addr in_addr;
+
+ LIST_FIELDS(IPv6ProxyNDPAddress, ipv6_proxy_ndp_addresses);
+};
+
+int ipv6_proxy_ndp_address_new_static(Network *network, IPv6ProxyNDPAddress ** ipv6_proxy_ndp_address);
+void ipv6_proxy_ndp_address_free(IPv6ProxyNDPAddress *ipv6_proxy_ndp_address);
+int ipv6_proxy_ndp_address_configure(Link *link, IPv6ProxyNDPAddress *ipv6_proxy_ndp_address);
+int ipv6_proxy_ndp_addresses_configure(Link *link);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(IPv6ProxyNDPAddress*, ipv6_proxy_ndp_address_free);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv6_proxy_ndp_address);
diff --git a/src/network/networkd-link-bus.c b/src/network/networkd-link-bus.c
new file mode 100644
index 0000000..0dbcd86
--- /dev/null
+++ b/src/network/networkd-link-bus.c
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "strv.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_operational_state, link_operstate, LinkOperationalState);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_administrative_state, link_state, LinkState);
+
+const sd_bus_vtable link_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("OperationalState", "s", property_get_operational_state, offsetof(Link, operstate), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("AdministrativeState", "s", property_get_administrative_state, offsetof(Link, state), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
+ SD_BUS_VTABLE_END
+};
+
+static char *link_bus_path(Link *link) {
+ _cleanup_free_ char *ifindex = NULL;
+ char *p;
+ int r;
+
+ assert(link);
+ assert(link->ifindex > 0);
+
+ if (asprintf(&ifindex, "%d", link->ifindex) < 0)
+ return NULL;
+
+ r = sd_bus_path_encode("/org/freedesktop/network1/link", ifindex, &p);
+ if (r < 0)
+ return NULL;
+
+ return p;
+}
+
+int link_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ unsigned c = 0;
+ Link *link;
+ Iterator i;
+
+ assert(bus);
+ assert(path);
+ assert(m);
+ assert(nodes);
+
+ l = new0(char*, hashmap_size(m->links) + 1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(link, m->links, i) {
+ char *p;
+
+ p = link_bus_path(link);
+ if (!p)
+ return -ENOMEM;
+
+ l[c++] = p;
+ }
+
+ l[c] = NULL;
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+int link_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ _cleanup_free_ char *identifier = NULL;
+ Manager *m = userdata;
+ Link *link;
+ int ifindex, r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(m);
+ assert(found);
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/network1/link", &identifier);
+ if (r <= 0)
+ return 0;
+
+ r = parse_ifindex(identifier, &ifindex);
+ if (r < 0)
+ return 0;
+
+ r = link_get(m, ifindex, &link);
+ if (r < 0)
+ return 0;
+
+ *found = link;
+
+ return 1;
+}
+
+int link_send_changed(Link *link, const char *property, ...) {
+ _cleanup_free_ char *p = NULL;
+ char **l;
+
+ assert(link);
+ assert(link->manager);
+
+ if (!link->manager->bus)
+ return 0; /* replace with assert when we have kdbus */
+
+ l = strv_from_stdarg_alloca(property);
+
+ p = link_bus_path(link);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_properties_changed_strv(
+ link->manager->bus,
+ p,
+ "org.freedesktop.network1.Link",
+ l);
+}
diff --git a/src/network/networkd-link.c b/src/network/networkd-link.c
new file mode 100644
index 0000000..22392d7
--- /dev/null
+++ b/src/network/networkd-link.c
@@ -0,0 +1,4184 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/ether.h>
+#include <linux/if.h>
+#include <linux/can/netlink.h>
+#include <unistd.h>
+#include <stdio_ext.h>
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "dhcp-identifier.h"
+#include "dhcp-lease-internal.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "missing_network.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "networkd-ipv6-proxy-ndp.h"
+#include "networkd-lldp-tx.h"
+#include "networkd-manager.h"
+#include "networkd-ndisc.h"
+#include "networkd-neighbor.h"
+#include "networkd-radv.h"
+#include "networkd-routing-policy-rule.h"
+#include "set.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "util.h"
+#include "virt.h"
+
+DUID* link_get_duid(Link *link) {
+ if (link->network->duid.type != _DUID_TYPE_INVALID)
+ return &link->network->duid;
+ else
+ return &link->manager->duid;
+}
+
+static bool link_dhcp6_enabled(Link *link) {
+ assert(link);
+
+ if (!socket_ipv6_is_supported())
+ return false;
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ return link->network->dhcp & ADDRESS_FAMILY_IPV6;
+}
+
+static bool link_dhcp4_enabled(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ return link->network->dhcp & ADDRESS_FAMILY_IPV4;
+}
+
+static bool link_dhcp4_server_enabled(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ return link->network->dhcp_server;
+}
+
+static bool link_ipv4ll_enabled(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (streq_ptr(link->kind, "wireguard"))
+ return false;
+
+ return link->network->link_local & ADDRESS_FAMILY_IPV4;
+}
+
+static bool link_ipv6ll_enabled(Link *link) {
+ assert(link);
+
+ if (!socket_ipv6_is_supported())
+ return false;
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (streq_ptr(link->kind, "wireguard"))
+ return false;
+
+ return link->network->link_local & ADDRESS_FAMILY_IPV6;
+}
+
+static bool link_ipv6_enabled(Link *link) {
+ assert(link);
+
+ if (!socket_ipv6_is_supported())
+ return false;
+
+ if (link->network->bridge)
+ return false;
+
+ /* DHCPv6 client will not be started if no IPv6 link-local address is configured. */
+ return link_ipv6ll_enabled(link) || network_has_static_ipv6_addresses(link->network);
+}
+
+static bool link_radv_enabled(Link *link) {
+ assert(link);
+
+ if (!link_ipv6ll_enabled(link))
+ return false;
+
+ return link->network->router_prefix_delegation != RADV_PREFIX_DELEGATION_NONE;
+}
+
+static bool link_lldp_rx_enabled(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (link->iftype != ARPHRD_ETHER)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ /* LLDP should be handled on bridge slaves as those have a direct
+ * connection to their peers not on the bridge master. Linux doesn't
+ * even (by default) forward lldp packets to the bridge master.*/
+ if (streq_ptr("bridge", link->kind))
+ return false;
+
+ return link->network->lldp_mode != LLDP_MODE_NO;
+}
+
+static bool link_lldp_emit_enabled(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (link->iftype != ARPHRD_ETHER)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ return link->network->lldp_emit != LLDP_EMIT_NO;
+}
+
+static bool link_ipv4_forward_enabled(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (link->network->ip_forward == _ADDRESS_FAMILY_BOOLEAN_INVALID)
+ return false;
+
+ return link->network->ip_forward & ADDRESS_FAMILY_IPV4;
+}
+
+static bool link_ipv6_forward_enabled(Link *link) {
+ assert(link);
+
+ if (!socket_ipv6_is_supported())
+ return false;
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (link->network->ip_forward == _ADDRESS_FAMILY_BOOLEAN_INVALID)
+ return false;
+
+ return link->network->ip_forward & ADDRESS_FAMILY_IPV6;
+}
+
+static bool link_proxy_arp_enabled(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (link->network->proxy_arp < 0)
+ return false;
+
+ return true;
+}
+
+static bool link_ipv6_accept_ra_enabled(Link *link) {
+ assert(link);
+
+ if (!socket_ipv6_is_supported())
+ return false;
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (!link_ipv6ll_enabled(link))
+ return false;
+
+ /* If unset use system default (enabled if local forwarding is disabled.
+ * disabled if local forwarding is enabled).
+ * If set, ignore or enforce RA independent of local forwarding state.
+ */
+ if (link->network->ipv6_accept_ra < 0)
+ /* default to accept RA if ip_forward is disabled and ignore RA if ip_forward is enabled */
+ return !link_ipv6_forward_enabled(link);
+ else if (link->network->ipv6_accept_ra > 0)
+ /* accept RA even if ip_forward is enabled */
+ return true;
+ else
+ /* ignore RA */
+ return false;
+}
+
+static IPv6PrivacyExtensions link_ipv6_privacy_extensions(Link *link) {
+ assert(link);
+
+ if (!socket_ipv6_is_supported())
+ return _IPV6_PRIVACY_EXTENSIONS_INVALID;
+
+ if (link->flags & IFF_LOOPBACK)
+ return _IPV6_PRIVACY_EXTENSIONS_INVALID;
+
+ if (!link->network)
+ return _IPV6_PRIVACY_EXTENSIONS_INVALID;
+
+ return link->network->ipv6_privacy_extensions;
+}
+
+static int link_enable_ipv6(Link *link) {
+ const char *p = NULL;
+ bool disabled;
+ int r;
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ disabled = !link_ipv6_enabled(link);
+
+ p = strjoina("/proc/sys/net/ipv6/conf/", link->ifname, "/disable_ipv6");
+
+ r = write_string_file(p, one_zero(disabled), WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot %s IPv6 for interface %s: %m",
+ enable_disable(!disabled), link->ifname);
+ else
+ log_link_info(link, "IPv6 successfully %sd", enable_disable(!disabled));
+
+ return 0;
+}
+
+void link_update_operstate(Link *link) {
+ LinkOperationalState operstate;
+ assert(link);
+
+ if (link->kernel_operstate == IF_OPER_DORMANT)
+ operstate = LINK_OPERSTATE_DORMANT;
+ else if (link_has_carrier(link)) {
+ Address *address;
+ uint8_t scope = RT_SCOPE_NOWHERE;
+ Iterator i;
+
+ /* if we have carrier, check what addresses we have */
+ SET_FOREACH(address, link->addresses, i) {
+ if (!address_is_ready(address))
+ continue;
+
+ if (address->scope < scope)
+ scope = address->scope;
+ }
+
+ /* for operstate we also take foreign addresses into account */
+ SET_FOREACH(address, link->addresses_foreign, i) {
+ if (!address_is_ready(address))
+ continue;
+
+ if (address->scope < scope)
+ scope = address->scope;
+ }
+
+ if (scope < RT_SCOPE_SITE)
+ /* universally accessible addresses found */
+ operstate = LINK_OPERSTATE_ROUTABLE;
+ else if (scope < RT_SCOPE_HOST)
+ /* only link or site local addresses found */
+ operstate = LINK_OPERSTATE_DEGRADED;
+ else
+ /* no useful addresses found */
+ operstate = LINK_OPERSTATE_CARRIER;
+ } else if (link->flags & IFF_UP)
+ operstate = LINK_OPERSTATE_NO_CARRIER;
+ else
+ operstate = LINK_OPERSTATE_OFF;
+
+ if (link->operstate != operstate) {
+ link->operstate = operstate;
+ link_send_changed(link, "OperationalState", NULL);
+ link_dirty(link);
+ }
+}
+
+#define FLAG_STRING(string, flag, old, new) \
+ (((old ^ new) & flag) \
+ ? ((old & flag) ? (" -" string) : (" +" string)) \
+ : "")
+
+static int link_update_flags(Link *link, sd_netlink_message *m) {
+ unsigned flags, unknown_flags_added, unknown_flags_removed, unknown_flags;
+ uint8_t operstate;
+ int r;
+
+ assert(link);
+
+ r = sd_rtnl_message_link_get_flags(m, &flags);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not get link flags: %m");
+
+ r = sd_netlink_message_read_u8(m, IFLA_OPERSTATE, &operstate);
+ if (r < 0)
+ /* if we got a message without operstate, take it to mean
+ the state was unchanged */
+ operstate = link->kernel_operstate;
+
+ if ((link->flags == flags) && (link->kernel_operstate == operstate))
+ return 0;
+
+ if (link->flags != flags) {
+ log_link_debug(link, "Flags change:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ FLAG_STRING("LOOPBACK", IFF_LOOPBACK, link->flags, flags),
+ FLAG_STRING("MASTER", IFF_MASTER, link->flags, flags),
+ FLAG_STRING("SLAVE", IFF_SLAVE, link->flags, flags),
+ FLAG_STRING("UP", IFF_UP, link->flags, flags),
+ FLAG_STRING("DORMANT", IFF_DORMANT, link->flags, flags),
+ FLAG_STRING("LOWER_UP", IFF_LOWER_UP, link->flags, flags),
+ FLAG_STRING("RUNNING", IFF_RUNNING, link->flags, flags),
+ FLAG_STRING("MULTICAST", IFF_MULTICAST, link->flags, flags),
+ FLAG_STRING("BROADCAST", IFF_BROADCAST, link->flags, flags),
+ FLAG_STRING("POINTOPOINT", IFF_POINTOPOINT, link->flags, flags),
+ FLAG_STRING("PROMISC", IFF_PROMISC, link->flags, flags),
+ FLAG_STRING("ALLMULTI", IFF_ALLMULTI, link->flags, flags),
+ FLAG_STRING("PORTSEL", IFF_PORTSEL, link->flags, flags),
+ FLAG_STRING("AUTOMEDIA", IFF_AUTOMEDIA, link->flags, flags),
+ FLAG_STRING("DYNAMIC", IFF_DYNAMIC, link->flags, flags),
+ FLAG_STRING("NOARP", IFF_NOARP, link->flags, flags),
+ FLAG_STRING("NOTRAILERS", IFF_NOTRAILERS, link->flags, flags),
+ FLAG_STRING("DEBUG", IFF_DEBUG, link->flags, flags),
+ FLAG_STRING("ECHO", IFF_ECHO, link->flags, flags));
+
+ unknown_flags = ~(IFF_LOOPBACK | IFF_MASTER | IFF_SLAVE | IFF_UP |
+ IFF_DORMANT | IFF_LOWER_UP | IFF_RUNNING |
+ IFF_MULTICAST | IFF_BROADCAST | IFF_POINTOPOINT |
+ IFF_PROMISC | IFF_ALLMULTI | IFF_PORTSEL |
+ IFF_AUTOMEDIA | IFF_DYNAMIC | IFF_NOARP |
+ IFF_NOTRAILERS | IFF_DEBUG | IFF_ECHO);
+ unknown_flags_added = ((link->flags ^ flags) & flags & unknown_flags);
+ unknown_flags_removed = ((link->flags ^ flags) & link->flags & unknown_flags);
+
+ /* link flags are currently at most 18 bits, let's align to
+ * printing 20 */
+ if (unknown_flags_added)
+ log_link_debug(link,
+ "Unknown link flags gained: %#.5x (ignoring)",
+ unknown_flags_added);
+
+ if (unknown_flags_removed)
+ log_link_debug(link,
+ "Unknown link flags lost: %#.5x (ignoring)",
+ unknown_flags_removed);
+ }
+
+ link->flags = flags;
+ link->kernel_operstate = operstate;
+
+ link_update_operstate(link);
+
+ return 0;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Link*, link_unref);
+
+static int link_new(Manager *manager, sd_netlink_message *message, Link **ret) {
+ _cleanup_(link_unrefp) Link *link = NULL;
+ uint16_t type;
+ const char *ifname, *kind = NULL;
+ int r, ifindex;
+ unsigned short iftype;
+
+ assert(manager);
+ assert(message);
+ assert(ret);
+
+ /* check for link kind */
+ r = sd_netlink_message_enter_container(message, IFLA_LINKINFO);
+ if (r == 0) {
+ (void) sd_netlink_message_read_string(message, IFLA_INFO_KIND, &kind);
+ r = sd_netlink_message_exit_container(message);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0)
+ return r;
+ else if (type != RTM_NEWLINK)
+ return -EINVAL;
+
+ r = sd_rtnl_message_link_get_ifindex(message, &ifindex);
+ if (r < 0)
+ return r;
+ else if (ifindex <= 0)
+ return -EINVAL;
+
+ r = sd_rtnl_message_link_get_type(message, &iftype);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_string(message, IFLA_IFNAME, &ifname);
+ if (r < 0)
+ return r;
+
+ link = new(Link, 1);
+ if (!link)
+ return -ENOMEM;
+
+ *link = (Link) {
+ .n_ref = 1,
+ .manager = manager,
+ .state = LINK_STATE_PENDING,
+ .rtnl_extended_attrs = true,
+ .ifindex = ifindex,
+ .iftype = iftype,
+ };
+
+ link->ifname = strdup(ifname);
+ if (!link->ifname)
+ return -ENOMEM;
+
+ if (kind) {
+ link->kind = strdup(kind);
+ if (!link->kind)
+ return -ENOMEM;
+ }
+
+ r = sd_netlink_message_read_u32(message, IFLA_MASTER, (uint32_t *)&link->master_ifindex);
+ if (r < 0)
+ log_link_debug_errno(link, r, "New device has no master, continuing without");
+
+ r = sd_netlink_message_read_ether_addr(message, IFLA_ADDRESS, &link->mac);
+ if (r < 0)
+ log_link_debug_errno(link, r, "MAC address not found for new device, continuing without");
+
+ if (asprintf(&link->state_file, "/run/systemd/netif/links/%d", link->ifindex) < 0)
+ return -ENOMEM;
+
+ if (asprintf(&link->lease_file, "/run/systemd/netif/leases/%d", link->ifindex) < 0)
+ return -ENOMEM;
+
+ if (asprintf(&link->lldp_file, "/run/systemd/netif/lldp/%d", link->ifindex) < 0)
+ return -ENOMEM;
+
+ r = hashmap_ensure_allocated(&manager->links, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(manager->links, INT_TO_PTR(link->ifindex), link);
+ if (r < 0)
+ return r;
+
+ r = link_update_flags(link, message);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(link);
+
+ return 0;
+}
+
+static void link_detach_from_manager(Link *link) {
+ if (!link || !link->manager)
+ return;
+
+ hashmap_remove(link->manager->links, INT_TO_PTR(link->ifindex));
+ set_remove(link->manager->links_requesting_uuid, link);
+ link_clean(link);
+}
+
+static Link *link_free(Link *link) {
+ Address *address;
+ Link *carrier;
+ Route *route;
+ Iterator i;
+
+ assert(link);
+
+ while ((route = set_first(link->routes)))
+ route_free(route);
+
+ while ((route = set_first(link->routes_foreign)))
+ route_free(route);
+
+ link->routes = set_free(link->routes);
+ link->routes_foreign = set_free(link->routes_foreign);
+
+ while ((address = set_first(link->addresses)))
+ address_free(address);
+
+ while ((address = set_first(link->addresses_foreign)))
+ address_free(address);
+
+ link->addresses = set_free(link->addresses);
+ link->addresses_foreign = set_free(link->addresses_foreign);
+
+ while ((address = link->pool_addresses)) {
+ LIST_REMOVE(addresses, link->pool_addresses, address);
+ address_free(address);
+ }
+
+ sd_dhcp_server_unref(link->dhcp_server);
+ sd_dhcp_client_unref(link->dhcp_client);
+ sd_dhcp_lease_unref(link->dhcp_lease);
+
+ link_lldp_emit_stop(link);
+
+ free(link->lease_file);
+
+ sd_lldp_unref(link->lldp);
+ free(link->lldp_file);
+
+ ndisc_flush(link);
+
+ sd_ipv4ll_unref(link->ipv4ll);
+ sd_dhcp6_client_unref(link->dhcp6_client);
+ sd_ndisc_unref(link->ndisc);
+ sd_radv_unref(link->radv);
+
+ link_detach_from_manager(link);
+
+ free(link->ifname);
+
+ free(link->kind);
+
+ (void) unlink(link->state_file);
+ free(link->state_file);
+
+ sd_device_unref(link->sd_device);
+
+ HASHMAP_FOREACH (carrier, link->bound_to_links, i)
+ hashmap_remove(link->bound_to_links, INT_TO_PTR(carrier->ifindex));
+ hashmap_free(link->bound_to_links);
+
+ HASHMAP_FOREACH (carrier, link->bound_by_links, i)
+ hashmap_remove(link->bound_by_links, INT_TO_PTR(carrier->ifindex));
+ hashmap_free(link->bound_by_links);
+
+ return mfree(link);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(Link, link, link_free);
+
+int link_get(Manager *m, int ifindex, Link **ret) {
+ Link *link;
+
+ assert(m);
+ assert(ifindex);
+ assert(ret);
+
+ link = hashmap_get(m->links, INT_TO_PTR(ifindex));
+ if (!link)
+ return -ENODEV;
+
+ *ret = link;
+
+ return 0;
+}
+
+static void link_set_state(Link *link, LinkState state) {
+ assert(link);
+
+ if (link->state == state)
+ return;
+
+ link->state = state;
+
+ link_send_changed(link, "AdministrativeState", NULL);
+}
+
+static void link_enter_unmanaged(Link *link) {
+ assert(link);
+
+ log_link_debug(link, "Unmanaged");
+
+ link_set_state(link, LINK_STATE_UNMANAGED);
+
+ link_dirty(link);
+}
+
+static int link_stop_clients(Link *link) {
+ int r = 0, k;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->event);
+
+ if (link->dhcp_client) {
+ k = sd_dhcp_client_stop(link->dhcp_client);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop DHCPv4 client: %m");
+ }
+
+ if (link->ipv4ll) {
+ k = sd_ipv4ll_stop(link->ipv4ll);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop IPv4 link-local: %m");
+ }
+
+ if (link->dhcp6_client) {
+ k = sd_dhcp6_client_stop(link->dhcp6_client);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop DHCPv6 client: %m");
+ }
+
+ if (link->ndisc) {
+ k = sd_ndisc_stop(link->ndisc);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop IPv6 Router Discovery: %m");
+ }
+
+ if (link->radv) {
+ k = sd_radv_stop(link->radv);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop IPv6 Router Advertisement: %m");
+ }
+
+ link_lldp_emit_stop(link);
+ return r;
+}
+
+void link_enter_failed(Link *link) {
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return;
+
+ log_link_warning(link, "Failed");
+
+ link_set_state(link, LINK_STATE_FAILED);
+
+ link_stop_clients(link);
+
+ link_dirty(link);
+}
+
+static Address* link_find_dhcp_server_address(Link *link) {
+ Address *address;
+
+ assert(link);
+ assert(link->network);
+
+ /* The first statically configured address if there is any */
+ LIST_FOREACH(addresses, address, link->network->static_addresses) {
+
+ if (address->family != AF_INET)
+ continue;
+
+ if (in_addr_is_null(address->family, &address->in_addr))
+ continue;
+
+ return address;
+ }
+
+ /* If that didn't work, find a suitable address we got from the pool */
+ LIST_FOREACH(addresses, address, link->pool_addresses) {
+ if (address->family != AF_INET)
+ continue;
+
+ return address;
+ }
+
+ return NULL;
+}
+
+static void link_enter_configured(Link *link) {
+ assert(link);
+ assert(link->network);
+
+ if (link->state != LINK_STATE_CONFIGURING)
+ return;
+
+ log_link_info(link, "Configured");
+
+ link_set_state(link, LINK_STATE_CONFIGURED);
+
+ link_dirty(link);
+}
+
+static int link_request_set_routing_policy_rule(Link *link) {
+ RoutingPolicyRule *rule, *rrule = NULL;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ link_set_state(link, LINK_STATE_CONFIGURING);
+ link->routing_policy_rules_configured = false;
+
+ LIST_FOREACH(rules, rule, link->network->rules) {
+ r = routing_policy_rule_get(link->manager, rule->family, &rule->from, rule->from_prefixlen, &rule->to,
+ rule->to_prefixlen, rule->tos, rule->fwmark, rule->table, rule->iif, rule->oif,
+ rule->protocol, &rule->sport, &rule->dport, &rrule);
+ if (r == 0) {
+ (void) routing_policy_rule_make_local(link->manager, rrule);
+ continue;
+ }
+
+ r = routing_policy_rule_configure(rule, link, NULL, false);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set routing policy rules: %m");
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->routing_policy_rule_messages++;
+ }
+
+ routing_policy_rule_purge(link->manager, link);
+ if (link->routing_policy_rule_messages == 0) {
+ link->routing_policy_rules_configured = true;
+ link_check_ready(link);
+ } else
+ log_link_debug(link, "Setting routing policy rules");
+
+ return 0;
+}
+
+static int route_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->route_messages > 0);
+ assert(IN_SET(link->state, LINK_STATE_CONFIGURING,
+ LINK_STATE_FAILED, LINK_STATE_LINGER));
+
+ link->route_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_warning_errno(link, r, "Could not set route: %m");
+
+ if (link->route_messages == 0) {
+ log_link_debug(link, "Routes set");
+ link->static_routes_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int link_request_set_routes(Link *link) {
+ enum {
+ PHASE_NON_GATEWAY, /* First phase: Routes without a gateway */
+ PHASE_GATEWAY, /* Second phase: Routes with a gateway */
+ _PHASE_MAX
+ } phase;
+ Route *rt;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->addresses_configured);
+ assert(link->address_messages == 0);
+ assert(link->state != _LINK_STATE_INVALID);
+
+ link_set_state(link, LINK_STATE_CONFIGURING);
+ link->static_routes_configured = false;
+
+ r = link_request_set_routing_policy_rule(link);
+ if (r < 0)
+ return r;
+
+ /* First add the routes that enable us to talk to gateways, then add in the others that need a gateway. */
+ for (phase = 0; phase < _PHASE_MAX; phase++)
+ LIST_FOREACH(routes, rt, link->network->static_routes) {
+
+ if (in_addr_is_null(rt->family, &rt->gw) != (phase == PHASE_NON_GATEWAY))
+ continue;
+
+ r = route_configure(rt, link, route_handler);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set routes: %m");
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->route_messages++;
+ }
+
+ if (link->route_messages == 0) {
+ link->static_routes_configured = true;
+ link_check_ready(link);
+ } else
+ log_link_debug(link, "Setting routes");
+
+ return 0;
+}
+
+void link_check_ready(Link *link) {
+ Address *a;
+ Iterator i;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return;
+
+ if (!link->network)
+ return;
+
+ if (!link->addresses_configured)
+ return;
+
+ if (!link->neighbors_configured)
+ return;
+
+ SET_FOREACH(a, link->addresses, i)
+ if (!address_is_ready(a))
+ return;
+
+ if (!link->addresses_ready) {
+ link->addresses_ready = true;
+ link_request_set_routes(link);
+ }
+
+ if (!link->static_routes_configured)
+ return;
+
+ if (!link->routing_policy_rules_configured)
+ return;
+
+ if (link_ipv4ll_enabled(link))
+ if (!link->ipv4ll_address ||
+ !link->ipv4ll_route)
+ return;
+
+ if (!link->network->bridge) {
+
+ if (link_ipv6ll_enabled(link))
+ if (in_addr_is_null(AF_INET6, (const union in_addr_union*) &link->ipv6ll_address) > 0)
+ return;
+
+ if ((link_dhcp4_enabled(link) && !link_dhcp6_enabled(link) &&
+ !link->dhcp4_configured) ||
+ (link_dhcp6_enabled(link) && !link_dhcp4_enabled(link) &&
+ !link->dhcp6_configured) ||
+ (link_dhcp4_enabled(link) && link_dhcp6_enabled(link) &&
+ !link->dhcp4_configured && !link->dhcp6_configured))
+ return;
+
+ if (link_ipv6_accept_ra_enabled(link) && !link->ndisc_configured)
+ return;
+ }
+
+ if (link->state != LINK_STATE_CONFIGURED)
+ link_enter_configured(link);
+
+ return;
+}
+
+static int link_request_set_neighbors(Link *link) {
+ Neighbor *neighbor;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->state != _LINK_STATE_INVALID);
+
+ link_set_state(link, LINK_STATE_CONFIGURING);
+ link->neighbors_configured = false;
+
+ LIST_FOREACH(neighbors, neighbor, link->network->neighbors) {
+ r = neighbor_configure(neighbor, link, NULL);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set neighbor: %m");
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ if (link->neighbor_messages == 0) {
+ link->neighbors_configured = true;
+ link_check_ready(link);
+ } else
+ log_link_debug(link, "Setting neighbors");
+
+ return 0;
+}
+
+static int address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(rtnl);
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+ assert(link->address_messages > 0);
+ assert(IN_SET(link->state, LINK_STATE_CONFIGURING,
+ LINK_STATE_FAILED, LINK_STATE_LINGER));
+
+ link->address_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_warning_errno(link, r, "could not set address: %m");
+ else if (r >= 0)
+ manager_rtnl_process_address(rtnl, m, link->manager);
+
+ if (link->address_messages == 0) {
+ log_link_debug(link, "Addresses set");
+ link->addresses_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int link_push_uplink_dns_to_dhcp_server(Link *link, sd_dhcp_server *s) {
+ _cleanup_free_ struct in_addr *addresses = NULL;
+ size_t n_addresses = 0, n_allocated = 0;
+ unsigned i;
+
+ log_debug("Copying DNS server information from %s", link->ifname);
+
+ if (!link->network)
+ return 0;
+
+ for (i = 0; i < link->network->n_dns; i++) {
+ struct in_addr ia;
+
+ /* Only look for IPv4 addresses */
+ if (link->network->dns[i].family != AF_INET)
+ continue;
+
+ ia = link->network->dns[i].address.in;
+
+ /* Never propagate obviously borked data */
+ if (in4_addr_is_null(&ia) || in4_addr_is_localhost(&ia))
+ continue;
+
+ if (!GREEDY_REALLOC(addresses, n_allocated, n_addresses + 1))
+ return log_oom();
+
+ addresses[n_addresses++] = ia;
+ }
+
+ if (link->network->dhcp_use_dns && link->dhcp_lease) {
+ const struct in_addr *da = NULL;
+ int n;
+
+ n = sd_dhcp_lease_get_dns(link->dhcp_lease, &da);
+ if (n > 0) {
+
+ if (!GREEDY_REALLOC(addresses, n_allocated, n_addresses + n))
+ return log_oom();
+
+ memcpy(addresses + n_addresses, da, n * sizeof(struct in_addr));
+ n_addresses += n;
+ }
+ }
+
+ if (n_addresses <= 0)
+ return 0;
+
+ return sd_dhcp_server_set_dns(s, addresses, n_addresses);
+}
+
+static int link_push_uplink_ntp_to_dhcp_server(Link *link, sd_dhcp_server *s) {
+ _cleanup_free_ struct in_addr *addresses = NULL;
+ size_t n_addresses = 0, n_allocated = 0;
+ char **a;
+
+ if (!link->network)
+ return 0;
+
+ log_debug("Copying NTP server information from %s", link->ifname);
+
+ STRV_FOREACH(a, link->network->ntp) {
+ struct in_addr ia;
+
+ /* Only look for IPv4 addresses */
+ if (inet_pton(AF_INET, *a, &ia) <= 0)
+ continue;
+
+ /* Never propagate obviously borked data */
+ if (in4_addr_is_null(&ia) || in4_addr_is_localhost(&ia))
+ continue;
+
+ if (!GREEDY_REALLOC(addresses, n_allocated, n_addresses + 1))
+ return log_oom();
+
+ addresses[n_addresses++] = ia;
+ }
+
+ if (link->network->dhcp_use_ntp && link->dhcp_lease) {
+ const struct in_addr *da = NULL;
+ int n;
+
+ n = sd_dhcp_lease_get_ntp(link->dhcp_lease, &da);
+ if (n > 0) {
+
+ if (!GREEDY_REALLOC(addresses, n_allocated, n_addresses + n))
+ return log_oom();
+
+ memcpy(addresses + n_addresses, da, n * sizeof(struct in_addr));
+ n_addresses += n;
+ }
+ }
+
+ if (n_addresses <= 0)
+ return 0;
+
+ return sd_dhcp_server_set_ntp(s, addresses, n_addresses);
+}
+
+static int link_set_bridge_fdb(Link *link) {
+ FdbEntry *fdb_entry;
+ int r;
+
+ LIST_FOREACH(static_fdb_entries, fdb_entry, link->network->static_fdb_entries) {
+ r = fdb_entry_configure(link, fdb_entry);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to add MAC entry to static MAC table: %m");
+ }
+
+ return 0;
+}
+
+static int link_request_set_addresses(Link *link) {
+ AddressLabel *label;
+ Address *ad;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->state != _LINK_STATE_INVALID);
+
+ link_set_state(link, LINK_STATE_CONFIGURING);
+
+ /* Reset all *_configured flags we are configuring. */
+ link->addresses_configured = false;
+ link->addresses_ready = false;
+ link->neighbors_configured = false;
+ link->static_routes_configured = false;
+ link->routing_policy_rules_configured = false;
+
+ r = link_set_bridge_fdb(link);
+ if (r < 0)
+ return r;
+
+ r = link_request_set_neighbors(link);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(addresses, ad, link->network->static_addresses) {
+ bool update;
+
+ update = address_get(link, ad->family, &ad->in_addr, ad->prefixlen, NULL) > 0;
+
+ r = address_configure(ad, link, address_handler, update);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set addresses: %m");
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->address_messages++;
+ }
+
+ LIST_FOREACH(labels, label, link->network->address_labels) {
+ r = address_label_configure(label, link, NULL, false);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set address label: %m");
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->address_label_messages++;
+ }
+
+ /* now that we can figure out a default address for the dhcp server,
+ start it */
+ if (link_dhcp4_server_enabled(link) && (link->flags & IFF_UP)) {
+ Address *address;
+ Link *uplink = NULL;
+ bool acquired_uplink = false;
+
+ address = link_find_dhcp_server_address(link);
+ if (!address) {
+ log_link_warning(link, "Failed to find suitable address for DHCPv4 server instance.");
+ link_enter_failed(link);
+ return 0;
+ }
+
+ /* use the server address' subnet as the pool */
+ r = sd_dhcp_server_configure_pool(link->dhcp_server, &address->in_addr.in, address->prefixlen,
+ link->network->dhcp_server_pool_offset, link->network->dhcp_server_pool_size);
+ if (r < 0)
+ return r;
+
+ /* TODO:
+ r = sd_dhcp_server_set_router(link->dhcp_server,
+ &main_address->in_addr.in);
+ if (r < 0)
+ return r;
+ */
+
+ if (link->network->dhcp_server_max_lease_time_usec > 0) {
+ r = sd_dhcp_server_set_max_lease_time(
+ link->dhcp_server,
+ DIV_ROUND_UP(link->network->dhcp_server_max_lease_time_usec, USEC_PER_SEC));
+ if (r < 0)
+ return r;
+ }
+
+ if (link->network->dhcp_server_default_lease_time_usec > 0) {
+ r = sd_dhcp_server_set_default_lease_time(
+ link->dhcp_server,
+ DIV_ROUND_UP(link->network->dhcp_server_default_lease_time_usec, USEC_PER_SEC));
+ if (r < 0)
+ return r;
+ }
+
+ if (link->network->dhcp_server_emit_dns) {
+
+ if (link->network->n_dhcp_server_dns > 0)
+ r = sd_dhcp_server_set_dns(link->dhcp_server, link->network->dhcp_server_dns, link->network->n_dhcp_server_dns);
+ else {
+ uplink = manager_find_uplink(link->manager, link);
+ acquired_uplink = true;
+
+ if (!uplink) {
+ log_link_debug(link, "Not emitting DNS server information on link, couldn't find suitable uplink.");
+ r = 0;
+ } else
+ r = link_push_uplink_dns_to_dhcp_server(uplink, link->dhcp_server);
+ }
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to set DNS server for DHCP server, ignoring: %m");
+ }
+
+ if (link->network->dhcp_server_emit_ntp) {
+
+ if (link->network->n_dhcp_server_ntp > 0)
+ r = sd_dhcp_server_set_ntp(link->dhcp_server, link->network->dhcp_server_ntp, link->network->n_dhcp_server_ntp);
+ else {
+ if (!acquired_uplink)
+ uplink = manager_find_uplink(link->manager, link);
+
+ if (!uplink) {
+ log_link_debug(link, "Not emitting NTP server information on link, couldn't find suitable uplink.");
+ r = 0;
+ } else
+ r = link_push_uplink_ntp_to_dhcp_server(uplink, link->dhcp_server);
+
+ }
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to set NTP server for DHCP server, ignoring: %m");
+ }
+
+ r = sd_dhcp_server_set_emit_router(link->dhcp_server, link->network->dhcp_server_emit_router);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to set router emission for DHCP server: %m");
+
+ if (link->network->dhcp_server_emit_timezone) {
+ _cleanup_free_ char *buffer = NULL;
+ const char *tz = NULL;
+
+ if (link->network->dhcp_server_timezone)
+ tz = link->network->dhcp_server_timezone;
+ else {
+ r = get_timezone(&buffer);
+ if (r < 0)
+ log_warning_errno(r, "Failed to determine timezone: %m");
+ else
+ tz = buffer;
+ }
+
+ if (tz) {
+ r = sd_dhcp_server_set_timezone(link->dhcp_server, tz);
+ if (r < 0)
+ return r;
+ }
+ }
+ if (!sd_dhcp_server_is_running(link->dhcp_server)) {
+ r = sd_dhcp_server_start(link->dhcp_server);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not start DHCPv4 server instance: %m");
+
+ link_enter_failed(link);
+
+ return 0;
+ }
+ }
+
+ log_link_debug(link, "Offering DHCPv4 leases");
+ }
+
+ if (link->address_messages == 0) {
+ link->addresses_configured = true;
+ link_check_ready(link);
+ } else
+ log_link_debug(link, "Setting addresses");
+
+ return 0;
+}
+
+static int link_set_bridge_vlan(Link *link) {
+ int r = 0;
+
+ r = br_vlan_configure(link, link->network->pvid, link->network->br_vid_bitmap, link->network->br_untagged_bitmap);
+ if (r < 0)
+ log_link_error_errno(link, r, "Failed to assign VLANs to bridge port: %m");
+
+ return r;
+}
+
+static int link_set_proxy_arp(Link *link) {
+ const char *p = NULL;
+ int r;
+
+ if (!link_proxy_arp_enabled(link))
+ return 0;
+
+ p = strjoina("/proc/sys/net/ipv4/conf/", link->ifname, "/proxy_arp");
+
+ r = write_string_file(p, one_zero(link->network->proxy_arp), WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot configure proxy ARP for interface: %m");
+
+ return 0;
+}
+
+static int link_set_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ log_link_debug(link, "Set link");
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_error_errno(link, r, "Could not join netdev: %m");
+ link_enter_failed(link);
+ }
+
+ return 1;
+}
+
+static int link_configure_after_setting_mtu(Link *link);
+
+static int set_mtu_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+
+ link->setting_mtu = false;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set MTU: %m");
+ return 1;
+ }
+
+ log_link_debug(link, "Setting MTU done.");
+
+ if (link->state == LINK_STATE_PENDING)
+ (void) link_configure_after_setting_mtu(link);
+
+ return 1;
+}
+
+int link_set_mtu(Link *link, uint32_t mtu) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ if (link->mtu == mtu || link->setting_mtu)
+ return 0;
+
+ log_link_debug(link, "Setting MTU: %" PRIu32, mtu);
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ /* If IPv6 not configured (no static IPv6 address and IPv6LL autoconfiguration is disabled)
+ * for this interface, or if it is a bridge slave, then disable IPv6 else enable it. */
+ (void) link_enable_ipv6(link);
+
+ /* IPv6 protocol requires a minimum MTU of IPV6_MTU_MIN(1280) bytes
+ * on the interface. Bump up MTU bytes to IPV6_MTU_MIN. */
+ if (link_ipv6_enabled(link) && mtu < IPV6_MIN_MTU) {
+
+ log_link_warning(link, "Bumping MTU to " STRINGIFY(IPV6_MIN_MTU) ", as "
+ "IPv6 is requested and requires a minimum MTU of " STRINGIFY(IPV6_MIN_MTU) " bytes: %m");
+
+ mtu = IPV6_MIN_MTU;
+ }
+
+ r = sd_netlink_message_append_u32(req, IFLA_MTU, mtu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append MTU: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, set_mtu_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+ link->setting_mtu = true;
+
+ return 0;
+}
+
+static int set_flags_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not set link flags: %m");
+
+ return 1;
+}
+
+static int link_set_flags(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ unsigned ifi_change = 0;
+ unsigned ifi_flags = 0;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (!link->network)
+ return 0;
+
+ if (link->network->arp < 0 && link->network->multicast < 0 && link->network->allmulticast < 0)
+ return 0;
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ if (link->network->arp >= 0) {
+ ifi_change |= IFF_NOARP;
+ SET_FLAG(ifi_flags, IFF_NOARP, link->network->arp == 0);
+ }
+
+ if (link->network->multicast >= 0) {
+ ifi_change |= IFF_MULTICAST;
+ SET_FLAG(ifi_flags, IFF_MULTICAST, link->network->multicast);
+ }
+
+ if (link->network->allmulticast >= 0) {
+ ifi_change |= IFF_ALLMULTI;
+ SET_FLAG(ifi_flags, IFF_ALLMULTI, link->network->allmulticast);
+ }
+
+ r = sd_rtnl_message_link_set_flags(req, ifi_flags, ifi_change);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set link flags: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, set_flags_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_set_bridge(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_rtnl_message_link_set_family(req, PF_BRIDGE);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set message family: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_PROTINFO);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_PROTINFO attribute: %m");
+
+ if (link->network->use_bpdu >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_GUARD, link->network->use_bpdu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_GUARD attribute: %m");
+ }
+
+ if (link->network->hairpin >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_MODE, link->network->hairpin);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_MODE attribute: %m");
+ }
+
+ if (link->network->fast_leave >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_FAST_LEAVE, link->network->fast_leave);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_FAST_LEAVE attribute: %m");
+ }
+
+ if (link->network->allow_port_to_be_root >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_PROTECT, link->network->allow_port_to_be_root);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_PROTECT attribute: %m");
+
+ }
+
+ if (link->network->unicast_flood >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_UNICAST_FLOOD, link->network->unicast_flood);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_UNICAST_FLOOD attribute: %m");
+ }
+
+ if (link->network->multicast_to_unicast >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_MCAST_TO_UCAST, link->network->multicast_to_unicast);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_MCAST_TO_UCAST attribute: %m");
+ }
+
+ if (link->network->cost != 0) {
+ r = sd_netlink_message_append_u32(req, IFLA_BRPORT_COST, link->network->cost);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_COST attribute: %m");
+ }
+
+ if (link->network->priority != LINK_BRIDGE_PORT_PRIORITY_INVALID) {
+ r = sd_netlink_message_append_u16(req, IFLA_BRPORT_PRIORITY, link->network->priority);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_PRIORITY attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_set_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return r;
+}
+
+static int link_bond_set(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_NEWLINK, link->network->bond->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_netlink_message_set_flags(req, NLM_F_REQUEST | NLM_F_ACK);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set netlink flags: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_LINKINFO);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_PROTINFO attribute: %m");
+
+ r = sd_netlink_message_open_container_union(req, IFLA_INFO_DATA, "bond");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ if (link->network->active_slave) {
+ r = sd_netlink_message_append_u32(req, IFLA_BOND_ACTIVE_SLAVE, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BOND_ACTIVE_SLAVE attribute: %m");
+ }
+
+ if (link->network->primary_slave) {
+ r = sd_netlink_message_append_u32(req, IFLA_BOND_PRIMARY, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BOND_PRIMARY attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, set_flags_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return r;
+}
+
+static int link_lldp_save(Link *link) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ sd_lldp_neighbor **l = NULL;
+ int n = 0, r, i;
+
+ assert(link);
+ assert(link->lldp_file);
+
+ if (!link->lldp) {
+ (void) unlink(link->lldp_file);
+ return 0;
+ }
+
+ r = sd_lldp_get_neighbors(link->lldp, &l);
+ if (r < 0)
+ goto finish;
+ if (r == 0) {
+ (void) unlink(link->lldp_file);
+ goto finish;
+ }
+
+ n = r;
+
+ r = fopen_temporary(link->lldp_file, &f, &temp_path);
+ if (r < 0)
+ goto finish;
+
+ fchmod(fileno(f), 0644);
+
+ for (i = 0; i < n; i++) {
+ const void *p;
+ le64_t u;
+ size_t sz;
+
+ r = sd_lldp_neighbor_get_raw(l[i], &p, &sz);
+ if (r < 0)
+ goto finish;
+
+ u = htole64(sz);
+ (void) fwrite(&u, 1, sizeof(u), f);
+ (void) fwrite(p, 1, sz, f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto finish;
+
+ if (rename(temp_path, link->lldp_file) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+finish:
+ if (r < 0) {
+ (void) unlink(link->lldp_file);
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ log_link_error_errno(link, r, "Failed to save LLDP data to %s: %m", link->lldp_file);
+ }
+
+ if (l) {
+ for (i = 0; i < n; i++)
+ sd_lldp_neighbor_unref(l[i]);
+ free(l);
+ }
+
+ return r;
+}
+
+static void lldp_handler(sd_lldp *lldp, sd_lldp_event event, sd_lldp_neighbor *n, void *userdata) {
+ Link *link = userdata;
+ int r;
+
+ assert(link);
+
+ (void) link_lldp_save(link);
+
+ if (link_lldp_emit_enabled(link) && event == SD_LLDP_EVENT_ADDED) {
+ /* If we received information about a new neighbor, restart the LLDP "fast" logic */
+
+ log_link_debug(link, "Received LLDP datagram from previously unknown neighbor, restarting 'fast' LLDP transmission.");
+
+ r = link_lldp_emit_start(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to restart LLDP transmission: %m");
+ }
+}
+
+static int link_acquire_ipv6_conf(Link *link) {
+ int r;
+
+ assert(link);
+
+ if (link_ipv6_accept_ra_enabled(link)) {
+ assert(link->ndisc);
+
+ log_link_debug(link, "Discovering IPv6 routers");
+
+ r = sd_ndisc_start(link->ndisc);
+ if (r < 0 && r != -EBUSY)
+ return log_link_warning_errno(link, r, "Could not start IPv6 Router Discovery: %m");
+ }
+
+ if (link_radv_enabled(link)) {
+ assert(link->radv);
+ assert(in_addr_is_link_local(AF_INET6, (const union in_addr_union*)&link->ipv6ll_address) > 0);
+
+ log_link_debug(link, "Starting IPv6 Router Advertisements");
+
+ r = sd_radv_start(link->radv);
+ if (r < 0 && r != -EBUSY)
+ return log_link_warning_errno(link, r, "Could not start IPv6 Router Advertisement: %m");
+ }
+
+ (void) dhcp6_request_prefix_delegation(link);
+
+ return 0;
+}
+
+static int link_acquire_ipv4_conf(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(link->manager->event);
+
+ if (link_ipv4ll_enabled(link)) {
+ assert(link->ipv4ll);
+
+ log_link_debug(link, "Acquiring IPv4 link-local address");
+
+ r = sd_ipv4ll_start(link->ipv4ll);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not acquire IPv4 link-local address: %m");
+ }
+
+ if (link_dhcp4_enabled(link)) {
+ assert(link->dhcp_client);
+
+ log_link_debug(link, "Acquiring DHCPv4 lease");
+
+ r = sd_dhcp_client_start(link->dhcp_client);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not acquire DHCPv4 lease: %m");
+ }
+
+ return 0;
+}
+
+static int link_acquire_conf(Link *link) {
+ int r;
+
+ assert(link);
+
+ r = link_acquire_ipv4_conf(link);
+ if (r < 0)
+ return r;
+
+ if (in_addr_is_null(AF_INET6, (const union in_addr_union*) &link->ipv6ll_address) == 0) {
+ r = link_acquire_ipv6_conf(link);
+ if (r < 0)
+ return r;
+ }
+
+ if (link_lldp_emit_enabled(link)) {
+ r = link_lldp_emit_start(link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to start LLDP transmission: %m");
+ }
+
+ return 0;
+}
+
+bool link_has_carrier(Link *link) {
+ /* see Documentation/networking/operstates.txt in the kernel sources */
+
+ if (link->kernel_operstate == IF_OPER_UP)
+ return true;
+
+ if (link->kernel_operstate == IF_OPER_UNKNOWN)
+ /* operstate may not be implemented, so fall back to flags */
+ if ((link->flags & IFF_LOWER_UP) && !(link->flags & IFF_DORMANT))
+ return true;
+
+ return false;
+}
+
+static int link_address_genmode_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not set address genmode for interface: %m");
+
+ return 1;
+}
+
+static int link_configure_addrgen_mode(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ uint8_t ipv6ll_mode;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ log_link_debug(link, "Setting address genmode for link");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_AF_SPEC);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open IFLA_AF_SPEC container: %m");
+
+ r = sd_netlink_message_open_container(req, AF_INET6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open AF_INET6 container: %m");
+
+ if (!link_ipv6ll_enabled(link))
+ ipv6ll_mode = IN6_ADDR_GEN_MODE_NONE;
+ else {
+ const char *p = NULL;
+ _cleanup_free_ char *stable_secret = NULL;
+
+ p = strjoina("/proc/sys/net/ipv6/conf/", link->ifname, "/stable_secret");
+
+ /* The file may not exist. And event if it exists, when stable_secret is unset,
+ * then reading the file fails and EIO is returned. */
+ r = read_one_line_file(p, &stable_secret);
+ if (r < 0)
+ ipv6ll_mode = IN6_ADDR_GEN_MODE_EUI64;
+ else
+ ipv6ll_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ }
+
+ r = sd_netlink_message_append_u8(req, IFLA_INET6_ADDR_GEN_MODE, ipv6ll_mode);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_INET6_ADDR_GEN_MODE: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close AF_INET6 container: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close IFLA_AF_SPEC container: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_address_genmode_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_up_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ /* we warn but don't fail the link, as it may be brought up later */
+ log_link_warning_errno(link, r, "Could not bring up interface: %m");
+
+ return 1;
+}
+
+static int link_up(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ log_link_debug(link, "Bringing link up");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ /* set it free if not enslaved with networkd */
+ if (!link->network->bridge && !link->network->bond && !link->network->vrf) {
+ r = sd_netlink_message_append_u32(req, IFLA_MASTER, 0);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_MASTER attribute: %m");
+ }
+
+ r = sd_rtnl_message_link_set_flags(req, IFF_UP, IFF_UP);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set link flags: %m");
+
+ if (link->network->mac) {
+ r = sd_netlink_message_append_ether_addr(req, IFLA_ADDRESS, link->network->mac);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set MAC address: %m");
+ }
+
+ if (link_ipv6_enabled(link)) {
+ r = sd_netlink_message_open_container(req, IFLA_AF_SPEC);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open IFLA_AF_SPEC container: %m");
+
+ /* if the kernel lacks ipv6 support setting IFF_UP fails if any ipv6 options are passed */
+ r = sd_netlink_message_open_container(req, AF_INET6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open AF_INET6 container: %m");
+
+ if (!in_addr_is_null(AF_INET6, &link->network->ipv6_token)) {
+ r = sd_netlink_message_append_in6_addr(req, IFLA_INET6_TOKEN, &link->network->ipv6_token.in6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_INET6_TOKEN: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close AF_INET6 container: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close IFLA_AF_SPEC container: %m");
+ }
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_up_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_up_can(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+
+ log_link_debug(link, "Bringing CAN link up");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_rtnl_message_link_set_flags(req, IFF_UP, IFF_UP);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set link flags: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_up_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_set_can(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ log_link_debug(link, "link_set_can");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &m, RTM_NEWLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_set_flags(m, NLM_F_REQUEST | NLM_F_ACK);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set netlink flags: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, link->kind);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ if (link->network->can_bitrate > 0 || link->network->can_sample_point > 0) {
+ struct can_bittiming bt = {
+ .bitrate = link->network->can_bitrate,
+ .sample_point = link->network->can_sample_point,
+ };
+
+ if (link->network->can_bitrate > UINT32_MAX) {
+ log_link_error(link, "bitrate (%zu) too big.", link->network->can_bitrate);
+ return -ERANGE;
+ }
+
+ log_link_debug(link, "Setting bitrate = %d bit/s", bt.bitrate);
+ if (link->network->can_sample_point > 0)
+ log_link_debug(link, "Setting sample point = %d.%d%%", bt.sample_point / 10, bt.sample_point % 10);
+ else
+ log_link_debug(link, "Using default sample point");
+
+ r = sd_netlink_message_append_data(m, IFLA_CAN_BITTIMING, &bt, sizeof(bt));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_CAN_BITTIMING attribute: %m");
+ }
+
+ if (link->network->can_restart_us > 0) {
+ char time_string[FORMAT_TIMESPAN_MAX];
+ uint64_t restart_ms;
+
+ if (link->network->can_restart_us == USEC_INFINITY)
+ restart_ms = 0;
+ else
+ restart_ms = DIV_ROUND_UP(link->network->can_restart_us, USEC_PER_MSEC);
+
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, restart_ms * 1000, MSEC_PER_SEC);
+
+ if (restart_ms > UINT32_MAX) {
+ log_link_error(link, "restart timeout (%s) too big.", time_string);
+ return -ERANGE;
+ }
+
+ log_link_debug(link, "Setting restart = %s", time_string);
+
+ r = sd_netlink_message_append_u32(m, IFLA_CAN_RESTART_MS, restart_ms);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_CAN_RESTART_MS attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to close netlink container: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, m, link_set_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ if (!(link->flags & IFF_UP)) {
+ r = link_up_can(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ log_link_debug(link, "link_set_can done");
+
+ return r;
+}
+
+static int link_down_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not bring down interface: %m");
+
+ if (streq_ptr(link->kind, "can"))
+ link_set_can(link);
+
+ return 1;
+}
+
+int link_down(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ log_link_debug(link, "Bringing link down");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req,
+ RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_rtnl_message_link_set_flags(req, 0, IFF_UP);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set link flags: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_down_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_handle_bound_to_list(Link *link) {
+ Link *l;
+ Iterator i;
+ int r;
+ bool required_up = false;
+ bool link_is_up = false;
+
+ assert(link);
+
+ if (hashmap_isempty(link->bound_to_links))
+ return 0;
+
+ if (link->flags & IFF_UP)
+ link_is_up = true;
+
+ HASHMAP_FOREACH (l, link->bound_to_links, i)
+ if (link_has_carrier(l)) {
+ required_up = true;
+ break;
+ }
+
+ if (!required_up && link_is_up) {
+ r = link_down(link);
+ if (r < 0)
+ return r;
+ } else if (required_up && !link_is_up) {
+ r = link_up(link);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int link_handle_bound_by_list(Link *link) {
+ Iterator i;
+ Link *l;
+ int r;
+
+ assert(link);
+
+ if (hashmap_isempty(link->bound_by_links))
+ return 0;
+
+ HASHMAP_FOREACH (l, link->bound_by_links, i) {
+ r = link_handle_bound_to_list(l);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int link_put_carrier(Link *link, Link *carrier, Hashmap **h) {
+ int r;
+
+ assert(link);
+ assert(carrier);
+
+ if (link == carrier)
+ return 0;
+
+ if (hashmap_get(*h, INT_TO_PTR(carrier->ifindex)))
+ return 0;
+
+ r = hashmap_ensure_allocated(h, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(*h, INT_TO_PTR(carrier->ifindex), carrier);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int link_new_bound_by_list(Link *link) {
+ Manager *m;
+ Link *carrier;
+ Iterator i;
+ int r;
+ bool list_updated = false;
+
+ assert(link);
+ assert(link->manager);
+
+ m = link->manager;
+
+ HASHMAP_FOREACH(carrier, m->links, i) {
+ if (!carrier->network)
+ continue;
+
+ if (strv_isempty(carrier->network->bind_carrier))
+ continue;
+
+ if (strv_fnmatch(carrier->network->bind_carrier, link->ifname, 0)) {
+ r = link_put_carrier(link, carrier, &link->bound_by_links);
+ if (r < 0)
+ return r;
+
+ list_updated = true;
+ }
+ }
+
+ if (list_updated)
+ link_dirty(link);
+
+ HASHMAP_FOREACH(carrier, link->bound_by_links, i) {
+ r = link_put_carrier(carrier, link, &carrier->bound_to_links);
+ if (r < 0)
+ return r;
+
+ link_dirty(carrier);
+ }
+
+ return 0;
+}
+
+static int link_new_bound_to_list(Link *link) {
+ Manager *m;
+ Link *carrier;
+ Iterator i;
+ int r;
+ bool list_updated = false;
+
+ assert(link);
+ assert(link->manager);
+
+ if (!link->network)
+ return 0;
+
+ if (strv_isempty(link->network->bind_carrier))
+ return 0;
+
+ m = link->manager;
+
+ HASHMAP_FOREACH (carrier, m->links, i) {
+ if (strv_fnmatch(link->network->bind_carrier, carrier->ifname, 0)) {
+ r = link_put_carrier(link, carrier, &link->bound_to_links);
+ if (r < 0)
+ return r;
+
+ list_updated = true;
+ }
+ }
+
+ if (list_updated)
+ link_dirty(link);
+
+ HASHMAP_FOREACH (carrier, link->bound_to_links, i) {
+ r = link_put_carrier(carrier, link, &carrier->bound_by_links);
+ if (r < 0)
+ return r;
+
+ link_dirty(carrier);
+ }
+
+ return 0;
+}
+
+static int link_new_carrier_maps(Link *link) {
+ int r;
+
+ r = link_new_bound_by_list(link);
+ if (r < 0)
+ return r;
+
+ r = link_handle_bound_by_list(link);
+ if (r < 0)
+ return r;
+
+ r = link_new_bound_to_list(link);
+ if (r < 0)
+ return r;
+
+ r = link_handle_bound_to_list(link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void link_free_bound_to_list(Link *link) {
+ Link *bound_to;
+ Iterator i;
+
+ HASHMAP_FOREACH (bound_to, link->bound_to_links, i) {
+ hashmap_remove(link->bound_to_links, INT_TO_PTR(bound_to->ifindex));
+
+ if (hashmap_remove(bound_to->bound_by_links, INT_TO_PTR(link->ifindex)))
+ link_dirty(bound_to);
+ }
+
+ return;
+}
+
+static void link_free_bound_by_list(Link *link) {
+ Link *bound_by;
+ Iterator i;
+
+ HASHMAP_FOREACH (bound_by, link->bound_by_links, i) {
+ hashmap_remove(link->bound_by_links, INT_TO_PTR(bound_by->ifindex));
+
+ if (hashmap_remove(bound_by->bound_to_links, INT_TO_PTR(link->ifindex))) {
+ link_dirty(bound_by);
+ link_handle_bound_to_list(bound_by);
+ }
+ }
+
+ return;
+}
+
+static void link_free_carrier_maps(Link *link) {
+ bool list_updated = false;
+
+ assert(link);
+
+ if (!hashmap_isempty(link->bound_to_links)) {
+ link_free_bound_to_list(link);
+ list_updated = true;
+ }
+
+ if (!hashmap_isempty(link->bound_by_links)) {
+ link_free_bound_by_list(link);
+ list_updated = true;
+ }
+
+ if (list_updated)
+ link_dirty(link);
+
+ return;
+}
+
+void link_drop(Link *link) {
+ if (!link || link->state == LINK_STATE_LINGER)
+ return;
+
+ link_set_state(link, LINK_STATE_LINGER);
+
+ link_free_carrier_maps(link);
+
+ log_link_debug(link, "Link removed");
+
+ (void) unlink(link->state_file);
+
+ link_detach_from_manager(link);
+
+ link_unref(link);
+
+ return;
+}
+
+static int link_joined(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ if (!hashmap_isempty(link->bound_to_links)) {
+ r = link_handle_bound_to_list(link);
+ if (r < 0)
+ return r;
+ } else if (!(link->flags & IFF_UP)) {
+ r = link_up(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ if (link->network->bridge) {
+ r = link_set_bridge(link);
+ if (r < 0)
+ log_link_error_errno(link, r, "Could not set bridge message: %m");
+ }
+
+ if (link->network->bond) {
+ r = link_bond_set(link);
+ if (r < 0)
+ log_link_error_errno(link, r, "Could not set bond message: %m");
+ }
+
+ if (link->network->use_br_vlan &&
+ (link->network->bridge || streq_ptr("bridge", link->kind))) {
+ r = link_set_bridge_vlan(link);
+ if (r < 0)
+ log_link_error_errno(link, r, "Could not set bridge vlan: %m");
+ }
+
+ /* Skip setting up addresses until it gets carrier,
+ or it would try to set addresses twice,
+ which is bad for non-idempotent steps. */
+ if (!link_has_carrier(link) && !link->network->configure_without_carrier)
+ return 0;
+
+ return link_request_set_addresses(link);
+}
+
+static int netdev_join_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ link->enslaving--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_error_errno(link, r, "Could not join netdev: %m");
+ link_enter_failed(link);
+ return 1;
+ } else
+ log_link_debug(link, "Joined netdev");
+
+ if (link->enslaving <= 0)
+ link_joined(link);
+
+ return 1;
+}
+
+static int link_enter_join_netdev(Link *link) {
+ NetDev *netdev;
+ Iterator i;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->state == LINK_STATE_PENDING);
+
+ link_set_state(link, LINK_STATE_CONFIGURING);
+
+ link_dirty(link);
+
+ if (!link->network->bridge &&
+ !link->network->bond &&
+ !link->network->vrf &&
+ hashmap_isempty(link->network->stacked_netdevs))
+ return link_joined(link);
+
+ if (link->network->bond) {
+ if (link->network->bond->state == NETDEV_STATE_READY &&
+ link->network->bond->ifindex == link->master_ifindex)
+ return link_joined(link);
+
+ log_struct(LOG_DEBUG,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(link->network->bond),
+ LOG_LINK_MESSAGE(link, "Enslaving by '%s'", link->network->bond->ifname));
+
+ r = netdev_join(link->network->bond, link, netdev_join_handler);
+ if (r < 0) {
+ log_struct_errno(LOG_WARNING, r,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(link->network->bond),
+ LOG_LINK_MESSAGE(link, "Could not join netdev '%s': %m", link->network->bond->ifname));
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->enslaving++;
+ }
+
+ if (link->network->bridge) {
+ log_struct(LOG_DEBUG,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(link->network->bridge),
+ LOG_LINK_MESSAGE(link, "Enslaving by '%s'", link->network->bridge->ifname));
+
+ r = netdev_join(link->network->bridge, link, netdev_join_handler);
+ if (r < 0) {
+ log_struct_errno(LOG_WARNING, r,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(link->network->bridge),
+ LOG_LINK_MESSAGE(link, "Could not join netdev '%s': %m", link->network->bridge->ifname));
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->enslaving++;
+ }
+
+ if (link->network->vrf) {
+ log_struct(LOG_DEBUG,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(link->network->vrf),
+ LOG_LINK_MESSAGE(link, "Enslaving by '%s'", link->network->vrf->ifname));
+
+ r = netdev_join(link->network->vrf, link, netdev_join_handler);
+ if (r < 0) {
+ log_struct_errno(LOG_WARNING, r,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(link->network->vrf),
+ LOG_LINK_MESSAGE(link, "Could not join netdev '%s': %m", link->network->vrf->ifname));
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->enslaving++;
+ }
+
+ HASHMAP_FOREACH(netdev, link->network->stacked_netdevs, i) {
+
+ if (netdev->ifindex > 0) {
+ link_joined(link);
+ continue;
+ }
+
+ log_struct(LOG_DEBUG,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(netdev),
+ LOG_LINK_MESSAGE(link, "Enslaving by '%s'", netdev->ifname));
+
+ r = netdev_join(netdev, link, netdev_join_handler);
+ if (r < 0) {
+ log_struct_errno(LOG_WARNING, r,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(netdev),
+ LOG_LINK_MESSAGE(link, "Could not join netdev '%s': %m", netdev->ifname));
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->enslaving++;
+ }
+
+ return 0;
+}
+
+static int link_set_ipv4_forward(Link *link) {
+ int r;
+
+ if (!link_ipv4_forward_enabled(link))
+ return 0;
+
+ /* We propagate the forwarding flag from one interface to the
+ * global setting one way. This means: as long as at least one
+ * interface was configured at any time that had IP forwarding
+ * enabled the setting will stay on for good. We do this
+ * primarily to keep IPv4 and IPv6 packet forwarding behaviour
+ * somewhat in sync (see below). */
+
+ r = write_string_file("/proc/sys/net/ipv4/ip_forward", "1", WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot turn on IPv4 packet forwarding, ignoring: %m");
+
+ return 0;
+}
+
+static int link_set_ipv6_forward(Link *link) {
+ int r;
+
+ if (!link_ipv6_forward_enabled(link))
+ return 0;
+
+ /* On Linux, the IPv6 stack does not know a per-interface
+ * packet forwarding setting: either packet forwarding is on
+ * for all, or off for all. We hence don't bother with a
+ * per-interface setting, but simply propagate the interface
+ * flag, if it is set, to the global flag, one-way. Note that
+ * while IPv4 would allow a per-interface flag, we expose the
+ * same behaviour there and also propagate the setting from
+ * one to all, to keep things simple (see above). */
+
+ r = write_string_file("/proc/sys/net/ipv6/conf/all/forwarding", "1", WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot configure IPv6 packet forwarding, ignoring: %m");
+
+ return 0;
+}
+
+static int link_set_ipv6_privacy_extensions(Link *link) {
+ char buf[DECIMAL_STR_MAX(unsigned) + 1];
+ IPv6PrivacyExtensions s;
+ const char *p = NULL;
+ int r;
+
+ s = link_ipv6_privacy_extensions(link);
+ if (s < 0)
+ return 0;
+
+ p = strjoina("/proc/sys/net/ipv6/conf/", link->ifname, "/use_tempaddr");
+ xsprintf(buf, "%u", (unsigned) link->network->ipv6_privacy_extensions);
+
+ r = write_string_file(p, buf, WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot configure IPv6 privacy extension for interface: %m");
+
+ return 0;
+}
+
+static int link_set_ipv6_accept_ra(Link *link) {
+ const char *p = NULL;
+ int r;
+
+ /* Make this a NOP if IPv6 is not available */
+ if (!socket_ipv6_is_supported())
+ return 0;
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (!link->network)
+ return 0;
+
+ p = strjoina("/proc/sys/net/ipv6/conf/", link->ifname, "/accept_ra");
+
+ /* We handle router advertisements ourselves, tell the kernel to GTFO */
+ r = write_string_file(p, "0", WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot disable kernel IPv6 accept_ra for interface: %m");
+
+ return 0;
+}
+
+static int link_set_ipv6_dad_transmits(Link *link) {
+ char buf[DECIMAL_STR_MAX(int) + 1];
+ const char *p = NULL;
+ int r;
+
+ /* Make this a NOP if IPv6 is not available */
+ if (!socket_ipv6_is_supported())
+ return 0;
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (!link->network)
+ return 0;
+
+ if (link->network->ipv6_dad_transmits < 0)
+ return 0;
+
+ p = strjoina("/proc/sys/net/ipv6/conf/", link->ifname, "/dad_transmits");
+ xsprintf(buf, "%i", link->network->ipv6_dad_transmits);
+
+ r = write_string_file(p, buf, WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot set IPv6 dad transmits for interface: %m");
+
+ return 0;
+}
+
+static int link_set_ipv6_hop_limit(Link *link) {
+ char buf[DECIMAL_STR_MAX(int) + 1];
+ const char *p = NULL;
+ int r;
+
+ /* Make this a NOP if IPv6 is not available */
+ if (!socket_ipv6_is_supported())
+ return 0;
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (!link->network)
+ return 0;
+
+ if (link->network->ipv6_hop_limit < 0)
+ return 0;
+
+ p = strjoina("/proc/sys/net/ipv6/conf/", link->ifname, "/hop_limit");
+ xsprintf(buf, "%i", link->network->ipv6_hop_limit);
+
+ r = write_string_file(p, buf, WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot set IPv6 hop limit for interface: %m");
+
+ return 0;
+}
+
+static int link_set_ipv6_mtu(Link *link) {
+ char buf[DECIMAL_STR_MAX(unsigned) + 1];
+ const char *p = NULL;
+ int r;
+
+ /* Make this a NOP if IPv6 is not available */
+ if (!socket_ipv6_is_supported())
+ return 0;
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (link->network->ipv6_mtu == 0)
+ return 0;
+
+ p = strjoina("/proc/sys/net/ipv6/conf/", link->ifname, "/mtu");
+
+ xsprintf(buf, "%" PRIu32, link->network->ipv6_mtu);
+
+ r = write_string_file(p, buf, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot set IPv6 MTU for interface: %m");
+
+ return 0;
+}
+
+static bool link_is_static_address_configured(Link *link, Address *address) {
+ Address *net_address;
+
+ assert(link);
+ assert(address);
+
+ if (!link->network)
+ return false;
+
+ LIST_FOREACH(addresses, net_address, link->network->static_addresses)
+ if (address_equal(net_address, address))
+ return true;
+
+ return false;
+}
+
+static bool link_is_static_route_configured(Link *link, Route *route) {
+ Route *net_route;
+
+ assert(link);
+ assert(route);
+
+ if (!link->network)
+ return false;
+
+ LIST_FOREACH(routes, net_route, link->network->static_routes)
+ if (route_equal(net_route, route))
+ return true;
+
+ return false;
+}
+
+static int link_drop_foreign_config(Link *link) {
+ Address *address;
+ Route *route;
+ Iterator i;
+ int r;
+
+ SET_FOREACH(address, link->addresses_foreign, i) {
+ /* we consider IPv6LL addresses to be managed by the kernel */
+ if (address->family == AF_INET6 && in_addr_is_link_local(AF_INET6, &address->in_addr) == 1)
+ continue;
+
+ if (link_is_static_address_configured(link, address)) {
+ r = address_add(link, address->family, &address->in_addr, address->prefixlen, NULL);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to add address: %m");
+ } else {
+ r = address_remove(address, link, NULL);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ SET_FOREACH(route, link->routes_foreign, i) {
+ /* do not touch routes managed by the kernel */
+ if (route->protocol == RTPROT_KERNEL)
+ continue;
+
+ if (link_is_static_route_configured(link, route)) {
+ r = route_add(link, route->family, &route->dst, route->dst_prefixlen, route->tos, route->priority, route->table, NULL);
+ if (r < 0)
+ return r;
+ } else {
+ r = route_remove(route, link, NULL);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int link_drop_config(Link *link) {
+ Address *address, *pool_address;
+ Route *route;
+ Iterator i;
+ int r;
+
+ SET_FOREACH(address, link->addresses, i) {
+ /* we consider IPv6LL addresses to be managed by the kernel */
+ if (address->family == AF_INET6 && in_addr_is_link_local(AF_INET6, &address->in_addr) == 1)
+ continue;
+
+ r = address_remove(address, link, NULL);
+ if (r < 0)
+ return r;
+
+ /* If this address came from an address pool, clean up the pool */
+ LIST_FOREACH(addresses, pool_address, link->pool_addresses) {
+ if (address_equal(address, pool_address)) {
+ LIST_REMOVE(addresses, link->pool_addresses, pool_address);
+ address_free(pool_address);
+ break;
+ }
+ }
+ }
+
+ SET_FOREACH(route, link->routes, i) {
+ /* do not touch routes managed by the kernel */
+ if (route->protocol == RTPROT_KERNEL)
+ continue;
+
+ r = route_remove(route, link, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ ndisc_flush(link);
+
+ return 0;
+}
+
+static int link_update_lldp(Link *link) {
+ int r;
+
+ assert(link);
+
+ if (!link->lldp)
+ return 0;
+
+ if (link->flags & IFF_UP) {
+ r = sd_lldp_start(link->lldp);
+ if (r > 0)
+ log_link_debug(link, "Started LLDP.");
+ } else {
+ r = sd_lldp_stop(link->lldp);
+ if (r > 0)
+ log_link_debug(link, "Stopped LLDP.");
+ }
+
+ return r;
+}
+
+static int link_configure_can(Link *link) {
+ int r;
+
+ if (streq_ptr(link->kind, "can")) {
+ /* The CAN interface must be down to configure bitrate, etc... */
+ if ((link->flags & IFF_UP)) {
+ r = link_down(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+
+ return 0;
+ }
+
+ return link_set_can(link);
+ }
+
+ if (!(link->flags & IFF_UP)) {
+ r = link_up_can(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int link_configure(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->state == LINK_STATE_PENDING);
+
+ if (STRPTR_IN_SET(link->kind, "can", "vcan"))
+ return link_configure_can(link);
+
+ /* Drop foreign config, but ignore loopback or critical devices.
+ * We do not want to remove loopback address or addresses used for root NFS. */
+ if (!(link->flags & IFF_LOOPBACK) && !(link->network->dhcp_critical)) {
+ r = link_drop_foreign_config(link);
+ if (r < 0)
+ return r;
+ }
+
+ r = link_set_proxy_arp(link);
+ if (r < 0)
+ return r;
+
+ r = ipv6_proxy_ndp_addresses_configure(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_ipv4_forward(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_ipv6_forward(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_ipv6_privacy_extensions(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_ipv6_accept_ra(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_ipv6_dad_transmits(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_ipv6_hop_limit(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_flags(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_ipv6_mtu(link);
+ if (r < 0)
+ return r;
+
+ if (link_ipv4ll_enabled(link)) {
+ r = ipv4ll_configure(link);
+ if (r < 0)
+ return r;
+ }
+
+ if (link_dhcp4_enabled(link)) {
+ r = dhcp4_set_promote_secondaries(link);
+ if (r < 0)
+ return r;
+
+ r = dhcp4_configure(link);
+ if (r < 0)
+ return r;
+ }
+
+ if (link_dhcp4_server_enabled(link)) {
+ r = sd_dhcp_server_new(&link->dhcp_server, link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_dhcp_server_attach_event(link->dhcp_server, NULL, 0);
+ if (r < 0)
+ return r;
+ }
+
+ if (link_dhcp6_enabled(link) ||
+ link_ipv6_accept_ra_enabled(link)) {
+ r = dhcp6_configure(link);
+ if (r < 0)
+ return r;
+ }
+
+ if (link_ipv6_accept_ra_enabled(link)) {
+ r = ndisc_configure(link);
+ if (r < 0)
+ return r;
+ }
+
+ if (link_radv_enabled(link)) {
+ r = radv_configure(link);
+ if (r < 0)
+ return r;
+ }
+
+ if (link_lldp_rx_enabled(link)) {
+ r = sd_lldp_new(&link->lldp);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_set_ifindex(link->lldp, link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_match_capabilities(link->lldp,
+ link->network->lldp_mode == LLDP_MODE_ROUTERS_ONLY ?
+ SD_LLDP_SYSTEM_CAPABILITIES_ALL_ROUTERS :
+ SD_LLDP_SYSTEM_CAPABILITIES_ALL);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_set_filter_address(link->lldp, &link->mac);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_attach_event(link->lldp, NULL, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_set_callback(link->lldp, lldp_handler, link);
+ if (r < 0)
+ return r;
+
+ r = link_update_lldp(link);
+ if (r < 0)
+ return r;
+ }
+
+ if (link->network->mtu > 0) {
+ r = link_set_mtu(link, link->network->mtu);
+ if (r < 0)
+ return r;
+ }
+
+ if (socket_ipv6_is_supported()) {
+ r = link_configure_addrgen_mode(link);
+ if (r < 0)
+ return r;
+ }
+
+ return link_configure_after_setting_mtu(link);
+}
+
+static int link_configure_after_setting_mtu(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->state == LINK_STATE_PENDING);
+
+ if (link->setting_mtu)
+ return 0;
+
+ if (link_has_carrier(link) || link->network->configure_without_carrier) {
+ r = link_acquire_conf(link);
+ if (r < 0)
+ return r;
+ }
+
+ return link_enter_join_netdev(link);
+}
+
+static int duid_set_uuid(DUID *duid, sd_id128_t uuid) {
+ assert(duid);
+
+ if (duid->raw_data_len > 0)
+ return 0;
+
+ if (duid->type != DUID_TYPE_UUID)
+ return -EINVAL;
+
+ memcpy(&duid->raw_data, &uuid, sizeof(sd_id128_t));
+ duid->raw_data_len = sizeof(sd_id128_t);
+
+ return 1;
+}
+
+int get_product_uuid_handler(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ Manager *manager = userdata;
+ const sd_bus_error *e;
+ const void *a;
+ size_t sz;
+ DUID *duid;
+ Link *link;
+ int r;
+
+ assert(m);
+ assert(manager);
+
+ e = sd_bus_message_get_error(m);
+ if (e) {
+ log_error_errno(sd_bus_error_get_errno(e),
+ "Could not get product UUID. Falling back to use machine-app-specific ID as DUID-UUID: %s",
+ e->message);
+ goto configure;
+ }
+
+ r = sd_bus_message_read_array(m, 'y', &a, &sz);
+ if (r < 0)
+ goto configure;
+
+ if (sz != sizeof(sd_id128_t)) {
+ log_error("Invalid product UUID. Falling back to use machine-app-specific ID as DUID-UUID.");
+ goto configure;
+ }
+
+ memcpy(&manager->product_uuid, a, sz);
+ while ((duid = set_steal_first(manager->duids_requesting_uuid)))
+ (void) duid_set_uuid(duid, manager->product_uuid);
+
+ manager->duids_requesting_uuid = set_free(manager->duids_requesting_uuid);
+
+configure:
+ while ((link = set_steal_first(manager->links_requesting_uuid))) {
+ r = link_configure(link);
+ if (r < 0)
+ log_link_error_errno(link, r, "Failed to configure link: %m");
+ }
+
+ manager->links_requesting_uuid = set_free(manager->links_requesting_uuid);
+
+ /* To avoid calling GetProductUUID() bus method so frequently, set the flag below
+ * even if the method fails. */
+ manager->has_product_uuid = true;
+
+ return 1;
+}
+
+static bool link_requires_uuid(Link *link) {
+ const DUID *duid;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->network);
+
+ duid = link_get_duid(link);
+ if (duid->type != DUID_TYPE_UUID || duid->raw_data_len != 0)
+ return false;
+
+ if (link_dhcp4_enabled(link) && IN_SET(link->network->dhcp_client_identifier, DHCP_CLIENT_ID_DUID, DHCP_CLIENT_ID_DUID_ONLY))
+ return true;
+
+ if (link_dhcp6_enabled(link) || link_ipv6_accept_ra_enabled(link))
+ return true;
+
+ return false;
+}
+
+static int link_configure_duid(Link *link) {
+ Manager *m;
+ DUID *duid;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->network);
+
+ m = link->manager;
+ duid = link_get_duid(link);
+
+ if (!link_requires_uuid(link))
+ return 1;
+
+ if (m->has_product_uuid) {
+ (void) duid_set_uuid(duid, m->product_uuid);
+ return 1;
+ }
+
+ if (!m->links_requesting_uuid) {
+ r = manager_request_product_uuid(m, link);
+ if (r < 0) {
+ if (r == -ENOMEM)
+ return r;
+
+ log_link_warning_errno(link, r,
+ "Failed to get product UUID. Falling back to use machine-app-specific ID as DUID-UUID: %m");
+ return 1;
+ }
+ } else {
+ r = set_put(m->links_requesting_uuid, link);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(m->duids_requesting_uuid, duid);
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+static int link_initialized_and_synced(Link *link) {
+ Network *network;
+ int r;
+
+ assert(link);
+ assert(link->ifname);
+ assert(link->manager);
+
+ if (link->state != LINK_STATE_PENDING)
+ return 1;
+
+ log_link_debug(link, "Link state is up-to-date");
+
+ r = link_new_bound_by_list(link);
+ if (r < 0)
+ return r;
+
+ r = link_handle_bound_by_list(link);
+ if (r < 0)
+ return r;
+
+ if (!link->network) {
+ r = network_get(link->manager, link->sd_device, link->ifname,
+ &link->mac, &network);
+ if (r == -ENOENT) {
+ link_enter_unmanaged(link);
+ return 1;
+ } else if (r == 0 && network->unmanaged) {
+ link_enter_unmanaged(link);
+ return 0;
+ } else if (r < 0)
+ return r;
+
+ if (link->flags & IFF_LOOPBACK) {
+ if (network->link_local != ADDRESS_FAMILY_NO)
+ log_link_debug(link, "Ignoring link-local autoconfiguration for loopback link");
+
+ if (network->dhcp != ADDRESS_FAMILY_NO)
+ log_link_debug(link, "Ignoring DHCP clients for loopback link");
+
+ if (network->dhcp_server)
+ log_link_debug(link, "Ignoring DHCP server for loopback link");
+ }
+
+ r = network_apply(network, link);
+ if (r < 0)
+ return r;
+ }
+
+ r = link_new_bound_to_list(link);
+ if (r < 0)
+ return r;
+
+ /* link_configure_duid() returns 0 if it requests product UUID. In that case,
+ * link_configure() is called later asynchronously. */
+ r = link_configure_duid(link);
+ if (r <= 0)
+ return r;
+
+ r = link_configure(link);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int link_initialized_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ (void) link_initialized_and_synced(link);
+ return 1;
+}
+
+int link_initialized(Link *link, sd_device *device) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(device);
+
+ if (link->state != LINK_STATE_PENDING)
+ return 0;
+
+ if (link->sd_device)
+ return 0;
+
+ log_link_debug(link, "udev initialized link");
+
+ link->sd_device = sd_device_ref(device);
+
+ /* udev has initialized the link, but we don't know if we have yet
+ * processed the NEWLINK messages with the latest state. Do a GETLINK,
+ * when it returns we know that the pending NEWLINKs have already been
+ * processed and that we are up-to-date */
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_GETLINK,
+ link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_initialized_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return r;
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_load(Link *link) {
+ _cleanup_free_ char *network_file = NULL,
+ *addresses = NULL,
+ *routes = NULL,
+ *dhcp4_address = NULL,
+ *ipv4ll_address = NULL;
+ union in_addr_union address;
+ union in_addr_union route_dst;
+ const char *p;
+ int r;
+
+ assert(link);
+
+ r = parse_env_file(NULL, link->state_file,
+ "NETWORK_FILE", &network_file,
+ "ADDRESSES", &addresses,
+ "ROUTES", &routes,
+ "DHCP4_ADDRESS", &dhcp4_address,
+ "IPV4LL_ADDRESS", &ipv4ll_address);
+ if (r < 0 && r != -ENOENT)
+ return log_link_error_errno(link, r, "Failed to read %s: %m", link->state_file);
+
+ if (network_file) {
+ Network *network;
+ char *suffix;
+
+ /* drop suffix */
+ suffix = strrchr(network_file, '.');
+ if (!suffix) {
+ log_link_debug(link, "Failed to get network name from %s", network_file);
+ goto network_file_fail;
+ }
+ *suffix = '\0';
+
+ r = network_get_by_name(link->manager, basename(network_file), &network);
+ if (r < 0) {
+ log_link_debug_errno(link, r, "Failed to get network %s: %m", basename(network_file));
+ goto network_file_fail;
+ }
+
+ r = network_apply(network, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to apply network %s: %m", basename(network_file));
+ }
+
+network_file_fail:
+
+ if (addresses) {
+ p = addresses;
+
+ for (;;) {
+ _cleanup_free_ char *address_str = NULL;
+ char *prefixlen_str;
+ int family;
+ unsigned char prefixlen;
+
+ r = extract_first_word(&p, &address_str, NULL, 0);
+ if (r < 0) {
+ log_link_debug_errno(link, r, "Failed to extract next address string: %m");
+ continue;
+ }
+ if (r == 0)
+ break;
+
+ prefixlen_str = strchr(address_str, '/');
+ if (!prefixlen_str) {
+ log_link_debug(link, "Failed to parse address and prefix length %s", address_str);
+ continue;
+ }
+
+ *prefixlen_str++ = '\0';
+
+ r = sscanf(prefixlen_str, "%hhu", &prefixlen);
+ if (r != 1) {
+ log_link_error(link, "Failed to parse prefixlen %s", prefixlen_str);
+ continue;
+ }
+
+ r = in_addr_from_string_auto(address_str, &family, &address);
+ if (r < 0) {
+ log_link_debug_errno(link, r, "Failed to parse address %s: %m", address_str);
+ continue;
+ }
+
+ r = address_add(link, family, &address, prefixlen, NULL);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to add address: %m");
+ }
+ }
+
+ if (routes) {
+ p = routes;
+
+ for (;;) {
+ Route *route;
+ _cleanup_free_ char *route_str = NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *expire = NULL;
+ usec_t lifetime;
+ char *prefixlen_str;
+ int family;
+ unsigned char prefixlen, tos, table;
+ uint32_t priority;
+
+ r = extract_first_word(&p, &route_str, NULL, 0);
+ if (r < 0) {
+ log_link_debug_errno(link, r, "Failed to extract next route string: %m");
+ continue;
+ }
+ if (r == 0)
+ break;
+
+ prefixlen_str = strchr(route_str, '/');
+ if (!prefixlen_str) {
+ log_link_debug(link, "Failed to parse route %s", route_str);
+ continue;
+ }
+
+ *prefixlen_str++ = '\0';
+
+ r = sscanf(prefixlen_str, "%hhu/%hhu/%"SCNu32"/%hhu/"USEC_FMT, &prefixlen, &tos, &priority, &table, &lifetime);
+ if (r != 5) {
+ log_link_debug(link,
+ "Failed to parse destination prefix length, tos, priority, table or expiration %s",
+ prefixlen_str);
+ continue;
+ }
+
+ r = in_addr_from_string_auto(route_str, &family, &route_dst);
+ if (r < 0) {
+ log_link_debug_errno(link, r, "Failed to parse route destination %s: %m", route_str);
+ continue;
+ }
+
+ r = route_add(link, family, &route_dst, prefixlen, tos, priority, table, &route);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to add route: %m");
+
+ if (lifetime != USEC_INFINITY && !kernel_route_expiration_supported()) {
+ r = sd_event_add_time(link->manager->event, &expire, clock_boottime_or_monotonic(), lifetime,
+ 0, route_expire_handler, route);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not arm route expiration handler: %m");
+ }
+
+ route->lifetime = lifetime;
+ sd_event_source_unref(route->expire);
+ route->expire = TAKE_PTR(expire);
+ }
+ }
+
+ if (dhcp4_address) {
+ r = in_addr_from_string(AF_INET, dhcp4_address, &address);
+ if (r < 0) {
+ log_link_debug_errno(link, r, "Failed to parse DHCPv4 address %s: %m", dhcp4_address);
+ goto dhcp4_address_fail;
+ }
+
+ r = sd_dhcp_client_new(&link->dhcp_client, link->network ? link->network->dhcp_anonymize : 0);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to create DHCPv4 client: %m");
+
+ r = sd_dhcp_client_set_request_address(link->dhcp_client, &address.in);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set initial DHCPv4 address %s: %m", dhcp4_address);
+ }
+
+dhcp4_address_fail:
+
+ if (ipv4ll_address) {
+ r = in_addr_from_string(AF_INET, ipv4ll_address, &address);
+ if (r < 0) {
+ log_link_debug_errno(link, r, "Failed to parse IPv4LL address %s: %m", ipv4ll_address);
+ goto ipv4ll_address_fail;
+ }
+
+ r = sd_ipv4ll_new(&link->ipv4ll);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to create IPv4LL client: %m");
+
+ r = sd_ipv4ll_set_address(link->ipv4ll, &address.in);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set initial IPv4LL address %s: %m", ipv4ll_address);
+ }
+
+ipv4ll_address_fail:
+
+ return 0;
+}
+
+int link_add(Manager *m, sd_netlink_message *message, Link **ret) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ char ifindex_str[2 + DECIMAL_STR_MAX(int)];
+ Link *link;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+ assert(message);
+ assert(ret);
+
+ r = link_new(m, message, ret);
+ if (r < 0)
+ return r;
+
+ link = *ret;
+
+ log_link_debug(link, "Link %d added", link->ifindex);
+
+ r = link_load(link);
+ if (r < 0)
+ return r;
+
+ if (detect_container() <= 0) {
+ /* not in a container, udev will be around */
+ sprintf(ifindex_str, "n%d", link->ifindex);
+ r = sd_device_new_from_device_id(&device, ifindex_str);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not find device: %m");
+ goto failed;
+ }
+
+ r = sd_device_get_is_initialized(device);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not determine whether the device is initialized or not: %m");
+ goto failed;
+ }
+ if (r == 0) {
+ /* not yet ready */
+ log_link_debug(link, "link pending udev initialization...");
+ return 0;
+ }
+
+ r = link_initialized(link, device);
+ if (r < 0)
+ goto failed;
+ } else {
+ r = link_initialized_and_synced(link);
+ if (r < 0)
+ goto failed;
+ }
+
+ return 0;
+failed:
+ link_enter_failed(link);
+ return r;
+}
+
+int link_ipv6ll_gained(Link *link, const struct in6_addr *address) {
+ int r;
+
+ assert(link);
+
+ log_link_info(link, "Gained IPv6LL");
+
+ link->ipv6ll_address = *address;
+ link_check_ready(link);
+
+ if (!IN_SET(link->state, LINK_STATE_PENDING, LINK_STATE_UNMANAGED, LINK_STATE_FAILED)) {
+ r = link_acquire_ipv6_conf(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int link_carrier_gained(Link *link) {
+ int r;
+
+ assert(link);
+
+ if (!IN_SET(link->state, LINK_STATE_PENDING, LINK_STATE_UNMANAGED, LINK_STATE_FAILED)) {
+ r = link_acquire_conf(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+
+ r = link_request_set_addresses(link);
+ if (r < 0)
+ return r;
+ }
+
+ r = link_handle_bound_by_list(link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int link_carrier_lost(Link *link) {
+ int r;
+
+ assert(link);
+
+ /* Some devices reset itself while setting the MTU. This causes the DHCP client fall into a loop.
+ * setting_mtu keep track whether the device got reset because of setting MTU and does not drop the
+ * configuration and stop the clients as well. */
+ if (link->setting_mtu)
+ return 0;
+
+ r = link_stop_clients(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+
+ if (link_dhcp4_server_enabled(link))
+ (void) sd_dhcp_server_stop(link->dhcp_server);
+
+ r = link_drop_config(link);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(link->state, LINK_STATE_UNMANAGED, LINK_STATE_PENDING)) {
+ log_link_debug(link, "State is %s, dropping config", link_state_to_string(link->state));
+ r = link_drop_foreign_config(link);
+ if (r < 0)
+ return r;
+ }
+
+ r = link_handle_bound_by_list(link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int link_carrier_reset(Link *link) {
+ int r;
+
+ assert(link);
+
+ if (link_has_carrier(link)) {
+ r = link_carrier_lost(link);
+ if (r < 0)
+ return r;
+
+ r = link_carrier_gained(link);
+ if (r < 0)
+ return r;
+
+ log_link_info(link, "Reset carrier");
+ }
+
+ return 0;
+}
+
+int link_update(Link *link, sd_netlink_message *m) {
+ struct ether_addr mac;
+ const char *ifname;
+ uint32_t mtu;
+ bool had_carrier, carrier_gained, carrier_lost;
+ int r;
+
+ assert(link);
+ assert(link->ifname);
+ assert(m);
+
+ if (link->state == LINK_STATE_LINGER) {
+ log_link_info(link, "Link readded");
+ link_set_state(link, LINK_STATE_CONFIGURING);
+
+ r = link_new_carrier_maps(link);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_netlink_message_read_string(m, IFLA_IFNAME, &ifname);
+ if (r >= 0 && !streq(ifname, link->ifname)) {
+ log_link_info(link, "Interface name change detected, %s has been renamed to %s.", link->ifname, ifname);
+
+ if (link->state == LINK_STATE_PENDING) {
+ r = free_and_strdup(&link->ifname, ifname);
+ if (r < 0)
+ return r;
+ } else {
+ Manager *manager = link->manager;
+
+ link_drop(link);
+ r = link_add(manager, m, &link);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ r = sd_netlink_message_read_u32(m, IFLA_MTU, &mtu);
+ if (r >= 0 && mtu > 0) {
+ link->mtu = mtu;
+ if (link->original_mtu == 0) {
+ link->original_mtu = mtu;
+ log_link_debug(link, "Saved original MTU: %" PRIu32, link->original_mtu);
+ }
+
+ if (link->dhcp_client) {
+ r = sd_dhcp_client_set_mtu(link->dhcp_client,
+ link->mtu);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MTU in DHCP client: %m");
+ }
+
+ if (link->radv) {
+ r = sd_radv_set_mtu(link->radv, link->mtu);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not set MTU for Router Advertisement: %m");
+ }
+ }
+
+ /* The kernel may broadcast NEWLINK messages without the MAC address
+ set, simply ignore them. */
+ r = sd_netlink_message_read_ether_addr(m, IFLA_ADDRESS, &mac);
+ if (r >= 0) {
+ if (memcmp(link->mac.ether_addr_octet, mac.ether_addr_octet,
+ ETH_ALEN)) {
+
+ memcpy(link->mac.ether_addr_octet, mac.ether_addr_octet,
+ ETH_ALEN);
+
+ log_link_debug(link, "MAC address: "
+ "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx",
+ mac.ether_addr_octet[0],
+ mac.ether_addr_octet[1],
+ mac.ether_addr_octet[2],
+ mac.ether_addr_octet[3],
+ mac.ether_addr_octet[4],
+ mac.ether_addr_octet[5]);
+
+ if (link->ipv4ll) {
+ r = sd_ipv4ll_set_mac(link->ipv4ll, &link->mac);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MAC address in IPv4LL client: %m");
+ }
+
+ if (link->dhcp_client) {
+ r = sd_dhcp_client_set_mac(link->dhcp_client,
+ (const uint8_t *) &link->mac,
+ sizeof (link->mac),
+ ARPHRD_ETHER);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MAC address in DHCP client: %m");
+
+ r = dhcp4_set_client_identifier(link);
+ if (r < 0)
+ return r;
+ }
+
+ if (link->dhcp6_client) {
+ const DUID* duid = link_get_duid(link);
+
+ r = sd_dhcp6_client_set_mac(link->dhcp6_client,
+ (const uint8_t *) &link->mac,
+ sizeof (link->mac),
+ ARPHRD_ETHER);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MAC address in DHCPv6 client: %m");
+
+ if (link->network->iaid_set) {
+ r = sd_dhcp6_client_set_iaid(link->dhcp6_client,
+ link->network->iaid);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update DHCPv6 IAID: %m");
+ }
+
+ r = sd_dhcp6_client_set_duid(link->dhcp6_client,
+ duid->type,
+ duid->raw_data_len > 0 ? duid->raw_data : NULL,
+ duid->raw_data_len);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update DHCPv6 DUID: %m");
+ }
+
+ if (link->radv) {
+ r = sd_radv_set_mac(link->radv, &link->mac);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MAC for Router Advertisement: %m");
+ }
+
+ if (link->ndisc) {
+ r = sd_ndisc_set_mac(link->ndisc, &link->mac);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MAC for ndisc: %m");
+ }
+ }
+ }
+
+ had_carrier = link_has_carrier(link);
+
+ r = link_update_flags(link, m);
+ if (r < 0)
+ return r;
+
+ r = link_update_lldp(link);
+ if (r < 0)
+ return r;
+
+ carrier_gained = !had_carrier && link_has_carrier(link);
+ carrier_lost = had_carrier && !link_has_carrier(link);
+
+ if (carrier_gained) {
+ log_link_info(link, "Gained carrier");
+
+ r = link_carrier_gained(link);
+ if (r < 0)
+ return r;
+ } else if (carrier_lost) {
+ log_link_info(link, "Lost carrier");
+
+ r = link_carrier_lost(link);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static void print_link_hashmap(FILE *f, const char *prefix, Hashmap* h) {
+ bool space = false;
+ Iterator i;
+ Link *link;
+
+ assert(f);
+ assert(prefix);
+
+ if (hashmap_isempty(h))
+ return;
+
+ fputs(prefix, f);
+ HASHMAP_FOREACH(link, h, i) {
+ if (space)
+ fputc(' ', f);
+
+ fprintf(f, "%i", link->ifindex);
+ space = true;
+ }
+
+ fputc('\n', f);
+}
+
+int link_save(Link *link) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *admin_state, *oper_state;
+ Address *a;
+ Route *route;
+ Iterator i;
+ int r;
+
+ assert(link);
+ assert(link->state_file);
+ assert(link->lease_file);
+ assert(link->manager);
+
+ if (link->state == LINK_STATE_LINGER) {
+ unlink(link->state_file);
+ return 0;
+ }
+
+ link_lldp_save(link);
+
+ admin_state = link_state_to_string(link->state);
+ assert(admin_state);
+
+ oper_state = link_operstate_to_string(link->operstate);
+ assert(oper_state);
+
+ r = fopen_temporary(link->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "ADMIN_STATE=%s\n"
+ "OPER_STATE=%s\n",
+ admin_state, oper_state);
+
+ if (link->network) {
+ bool space;
+ sd_dhcp6_lease *dhcp6_lease = NULL;
+ const char *dhcp_domainname = NULL;
+ char **dhcp6_domains = NULL;
+ char **dhcp_domains = NULL;
+ unsigned j;
+
+ fprintf(f, "REQUIRED_FOR_ONLINE=%s\n",
+ yes_no(link->network->required_for_online));
+
+ if (link->dhcp6_client) {
+ r = sd_dhcp6_client_get_lease(link->dhcp6_client, &dhcp6_lease);
+ if (r < 0 && r != -ENOMSG)
+ log_link_debug(link, "No DHCPv6 lease");
+ }
+
+ fprintf(f, "NETWORK_FILE=%s\n", link->network->filename);
+
+ fputs("DNS=", f);
+ space = false;
+
+ for (j = 0; j < link->network->n_dns; j++) {
+ _cleanup_free_ char *b = NULL;
+
+ r = in_addr_to_string(link->network->dns[j].family,
+ &link->network->dns[j].address, &b);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to format address, ignoring: %m");
+ continue;
+ }
+
+ if (space)
+ fputc(' ', f);
+ fputs(b, f);
+ space = true;
+ }
+
+ if (link->network->dhcp_use_dns &&
+ link->dhcp_lease) {
+ const struct in_addr *addresses;
+
+ r = sd_dhcp_lease_get_dns(link->dhcp_lease, &addresses);
+ if (r > 0) {
+ if (space)
+ fputc(' ', f);
+ serialize_in_addrs(f, addresses, r);
+ space = true;
+ }
+ }
+
+ if (link->network->dhcp_use_dns && dhcp6_lease) {
+ struct in6_addr *in6_addrs;
+
+ r = sd_dhcp6_lease_get_dns(dhcp6_lease, &in6_addrs);
+ if (r > 0) {
+ if (space)
+ fputc(' ', f);
+ serialize_in6_addrs(f, in6_addrs, r);
+ space = true;
+ }
+ }
+
+ /* Make sure to flush out old entries before we use the NDISC data */
+ ndisc_vacuum(link);
+
+ if (link->network->ipv6_accept_ra_use_dns && link->ndisc_rdnss) {
+ NDiscRDNSS *dd;
+
+ SET_FOREACH(dd, link->ndisc_rdnss, i) {
+ if (space)
+ fputc(' ', f);
+
+ serialize_in6_addrs(f, &dd->address, 1);
+ space = true;
+ }
+ }
+
+ fputc('\n', f);
+
+ fputs("NTP=", f);
+ space = false;
+ fputstrv(f, link->network->ntp, NULL, &space);
+
+ if (link->network->dhcp_use_ntp &&
+ link->dhcp_lease) {
+ const struct in_addr *addresses;
+
+ r = sd_dhcp_lease_get_ntp(link->dhcp_lease, &addresses);
+ if (r > 0) {
+ if (space)
+ fputc(' ', f);
+ serialize_in_addrs(f, addresses, r);
+ space = true;
+ }
+ }
+
+ if (link->network->dhcp_use_ntp && dhcp6_lease) {
+ struct in6_addr *in6_addrs;
+ char **hosts;
+
+ r = sd_dhcp6_lease_get_ntp_addrs(dhcp6_lease,
+ &in6_addrs);
+ if (r > 0) {
+ if (space)
+ fputc(' ', f);
+ serialize_in6_addrs(f, in6_addrs, r);
+ space = true;
+ }
+
+ r = sd_dhcp6_lease_get_ntp_fqdn(dhcp6_lease, &hosts);
+ if (r > 0)
+ fputstrv(f, hosts, NULL, &space);
+ }
+
+ fputc('\n', f);
+
+ if (link->network->dhcp_use_domains != DHCP_USE_DOMAINS_NO) {
+ if (link->dhcp_lease) {
+ (void) sd_dhcp_lease_get_domainname(link->dhcp_lease, &dhcp_domainname);
+ (void) sd_dhcp_lease_get_search_domains(link->dhcp_lease, &dhcp_domains);
+ }
+ if (dhcp6_lease)
+ (void) sd_dhcp6_lease_get_domains(dhcp6_lease, &dhcp6_domains);
+ }
+
+ fputs("DOMAINS=", f);
+ space = false;
+ fputstrv(f, link->network->search_domains, NULL, &space);
+
+ if (link->network->dhcp_use_domains == DHCP_USE_DOMAINS_YES) {
+ NDiscDNSSL *dd;
+
+ if (dhcp_domainname)
+ fputs_with_space(f, dhcp_domainname, NULL, &space);
+ if (dhcp_domains)
+ fputstrv(f, dhcp_domains, NULL, &space);
+ if (dhcp6_domains)
+ fputstrv(f, dhcp6_domains, NULL, &space);
+
+ SET_FOREACH(dd, link->ndisc_dnssl, i)
+ fputs_with_space(f, NDISC_DNSSL_DOMAIN(dd), NULL, &space);
+ }
+
+ fputc('\n', f);
+
+ fputs("ROUTE_DOMAINS=", f);
+ space = false;
+ fputstrv(f, link->network->route_domains, NULL, &space);
+
+ if (link->network->dhcp_use_domains == DHCP_USE_DOMAINS_ROUTE) {
+ NDiscDNSSL *dd;
+
+ if (dhcp_domainname)
+ fputs_with_space(f, dhcp_domainname, NULL, &space);
+ if (dhcp_domains)
+ fputstrv(f, dhcp_domains, NULL, &space);
+ if (dhcp6_domains)
+ fputstrv(f, dhcp6_domains, NULL, &space);
+
+ SET_FOREACH(dd, link->ndisc_dnssl, i)
+ fputs_with_space(f, NDISC_DNSSL_DOMAIN(dd), NULL, &space);
+ }
+
+ fputc('\n', f);
+
+ fprintf(f, "LLMNR=%s\n",
+ resolve_support_to_string(link->network->llmnr));
+ fprintf(f, "MDNS=%s\n",
+ resolve_support_to_string(link->network->mdns));
+ if (link->network->dns_default_route >= 0)
+ fprintf(f, "DNS_DEFAULT_ROUTE=%s\n", yes_no(link->network->dns_default_route));
+
+ if (link->network->dns_over_tls_mode != _DNS_OVER_TLS_MODE_INVALID)
+ fprintf(f, "DNS_OVER_TLS=%s\n",
+ dns_over_tls_mode_to_string(link->network->dns_over_tls_mode));
+
+ if (link->network->dnssec_mode != _DNSSEC_MODE_INVALID)
+ fprintf(f, "DNSSEC=%s\n",
+ dnssec_mode_to_string(link->network->dnssec_mode));
+
+ if (!set_isempty(link->network->dnssec_negative_trust_anchors)) {
+ const char *n;
+
+ fputs("DNSSEC_NTA=", f);
+ space = false;
+ SET_FOREACH(n, link->network->dnssec_negative_trust_anchors, i)
+ fputs_with_space(f, n, NULL, &space);
+ fputc('\n', f);
+ }
+
+ fputs("ADDRESSES=", f);
+ space = false;
+ SET_FOREACH(a, link->addresses, i) {
+ _cleanup_free_ char *address_str = NULL;
+
+ r = in_addr_to_string(a->family, &a->in_addr, &address_str);
+ if (r < 0)
+ goto fail;
+
+ fprintf(f, "%s%s/%u", space ? " " : "", address_str, a->prefixlen);
+ space = true;
+ }
+ fputc('\n', f);
+
+ fputs("ROUTES=", f);
+ space = false;
+ SET_FOREACH(route, link->routes, i) {
+ _cleanup_free_ char *route_str = NULL;
+
+ r = in_addr_to_string(route->family, &route->dst, &route_str);
+ if (r < 0)
+ goto fail;
+
+ fprintf(f, "%s%s/%hhu/%hhu/%"PRIu32"/%"PRIu32"/"USEC_FMT,
+ space ? " " : "", route_str,
+ route->dst_prefixlen, route->tos, route->priority, route->table, route->lifetime);
+ space = true;
+ }
+
+ fputc('\n', f);
+ }
+
+ print_link_hashmap(f, "CARRIER_BOUND_TO=", link->bound_to_links);
+ print_link_hashmap(f, "CARRIER_BOUND_BY=", link->bound_by_links);
+
+ if (link->dhcp_lease) {
+ struct in_addr address;
+ const char *tz = NULL;
+
+ assert(link->network);
+
+ r = sd_dhcp_lease_get_timezone(link->dhcp_lease, &tz);
+ if (r >= 0)
+ fprintf(f, "TIMEZONE=%s\n", tz);
+
+ r = sd_dhcp_lease_get_address(link->dhcp_lease, &address);
+ if (r >= 0) {
+ fputs("DHCP4_ADDRESS=", f);
+ serialize_in_addrs(f, &address, 1);
+ fputc('\n', f);
+ }
+
+ r = dhcp_lease_save(link->dhcp_lease, link->lease_file);
+ if (r < 0)
+ goto fail;
+
+ fprintf(f,
+ "DHCP_LEASE=%s\n",
+ link->lease_file);
+ } else
+ unlink(link->lease_file);
+
+ if (link->ipv4ll) {
+ struct in_addr address;
+
+ r = sd_ipv4ll_get_address(link->ipv4ll, &address);
+ if (r >= 0) {
+ fputs("IPV4LL_ADDRESS=", f);
+ serialize_in_addrs(f, &address, 1);
+ fputc('\n', f);
+ }
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, link->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(link->state_file);
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_link_error_errno(link, r, "Failed to save link data to %s: %m", link->state_file);
+}
+
+/* The serialized state in /run is no longer up-to-date. */
+void link_dirty(Link *link) {
+ int r;
+
+ assert(link);
+
+ /* mark manager dirty as link is dirty */
+ manager_dirty(link->manager);
+
+ r = set_ensure_allocated(&link->manager->dirty_links, NULL);
+ if (r < 0)
+ /* allocation errors are ignored */
+ return;
+
+ r = set_put(link->manager->dirty_links, link);
+ if (r <= 0)
+ /* don't take another ref if the link was already dirty */
+ return;
+
+ link_ref(link);
+}
+
+/* The serialized state in /run is up-to-date */
+void link_clean(Link *link) {
+ assert(link);
+ assert(link->manager);
+
+ link_unref(set_remove(link->manager->dirty_links, link));
+}
+
+static const char* const link_state_table[_LINK_STATE_MAX] = {
+ [LINK_STATE_PENDING] = "pending",
+ [LINK_STATE_CONFIGURING] = "configuring",
+ [LINK_STATE_CONFIGURED] = "configured",
+ [LINK_STATE_UNMANAGED] = "unmanaged",
+ [LINK_STATE_FAILED] = "failed",
+ [LINK_STATE_LINGER] = "linger",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(link_state, LinkState);
+
+static const char* const link_operstate_table[_LINK_OPERSTATE_MAX] = {
+ [LINK_OPERSTATE_OFF] = "off",
+ [LINK_OPERSTATE_NO_CARRIER] = "no-carrier",
+ [LINK_OPERSTATE_DORMANT] = "dormant",
+ [LINK_OPERSTATE_CARRIER] = "carrier",
+ [LINK_OPERSTATE_DEGRADED] = "degraded",
+ [LINK_OPERSTATE_ROUTABLE] = "routable",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(link_operstate, LinkOperationalState);
diff --git a/src/network/networkd-link.h b/src/network/networkd-link.h
new file mode 100644
index 0000000..dcb1ea6
--- /dev/null
+++ b/src/network/networkd-link.h
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <endian.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+#include "sd-dhcp-client.h"
+#include "sd-dhcp-server.h"
+#include "sd-dhcp6-client.h"
+#include "sd-ipv4ll.h"
+#include "sd-lldp.h"
+#include "sd-ndisc.h"
+#include "sd-radv.h"
+#include "sd-netlink.h"
+
+#include "list.h"
+#include "set.h"
+
+typedef enum LinkState {
+ LINK_STATE_PENDING,
+ LINK_STATE_CONFIGURING,
+ LINK_STATE_CONFIGURED,
+ LINK_STATE_UNMANAGED,
+ LINK_STATE_FAILED,
+ LINK_STATE_LINGER,
+ _LINK_STATE_MAX,
+ _LINK_STATE_INVALID = -1
+} LinkState;
+
+typedef enum LinkOperationalState {
+ LINK_OPERSTATE_OFF,
+ LINK_OPERSTATE_NO_CARRIER,
+ LINK_OPERSTATE_DORMANT,
+ LINK_OPERSTATE_CARRIER,
+ LINK_OPERSTATE_DEGRADED,
+ LINK_OPERSTATE_ROUTABLE,
+ _LINK_OPERSTATE_MAX,
+ _LINK_OPERSTATE_INVALID = -1
+} LinkOperationalState;
+
+typedef struct Manager Manager;
+typedef struct Network Network;
+typedef struct Address Address;
+typedef struct DUID DUID;
+
+typedef struct Link {
+ Manager *manager;
+
+ unsigned n_ref;
+
+ int ifindex;
+ int master_ifindex;
+ char *ifname;
+ char *kind;
+ unsigned short iftype;
+ char *state_file;
+ struct ether_addr mac;
+ struct in6_addr ipv6ll_address;
+ uint32_t mtu;
+ sd_device *sd_device;
+
+ unsigned flags;
+ uint8_t kernel_operstate;
+
+ Network *network;
+
+ LinkState state;
+ LinkOperationalState operstate;
+
+ unsigned address_messages;
+ unsigned address_label_messages;
+ unsigned neighbor_messages;
+ unsigned route_messages;
+ unsigned routing_policy_rule_messages;
+ unsigned routing_policy_rule_remove_messages;
+ unsigned enslaving;
+
+ Set *addresses;
+ Set *addresses_foreign;
+ Set *routes;
+ Set *routes_foreign;
+
+ bool addresses_configured;
+ bool addresses_ready;
+
+ sd_dhcp_client *dhcp_client;
+ sd_dhcp_lease *dhcp_lease;
+ char *lease_file;
+ uint32_t original_mtu;
+ unsigned dhcp4_messages;
+ bool dhcp4_configured;
+ bool dhcp6_configured;
+
+ unsigned ndisc_messages;
+ bool ndisc_configured;
+
+ sd_ipv4ll *ipv4ll;
+ bool ipv4ll_address:1;
+ bool ipv4ll_route:1;
+
+ bool neighbors_configured;
+
+ bool static_routes_configured;
+ bool routing_policy_rules_configured;
+ bool setting_mtu;
+
+ LIST_HEAD(Address, pool_addresses);
+
+ sd_dhcp_server *dhcp_server;
+
+ sd_ndisc *ndisc;
+ Set *ndisc_rdnss;
+ Set *ndisc_dnssl;
+
+ sd_radv *radv;
+
+ sd_dhcp6_client *dhcp6_client;
+ bool rtnl_extended_attrs;
+
+ /* This is about LLDP reception */
+ sd_lldp *lldp;
+ char *lldp_file;
+
+ /* This is about LLDP transmission */
+ unsigned lldp_tx_fast; /* The LLDP txFast counter (See 802.1ab-2009, section 9.2.5.18) */
+ sd_event_source *lldp_emit_event_source;
+
+ Hashmap *bound_by_links;
+ Hashmap *bound_to_links;
+} Link;
+
+typedef int (*link_netlink_message_handler_t)(sd_netlink*, sd_netlink_message*, Link*);
+
+DUID *link_get_duid(Link *link);
+int get_product_uuid_handler(sd_bus_message *m, void *userdata, sd_bus_error *ret_error);
+
+Link *link_unref(Link *link);
+Link *link_ref(Link *link);
+DEFINE_TRIVIAL_DESTRUCTOR(link_netlink_destroy_callback, Link, link_unref);
+
+int link_get(Manager *m, int ifindex, Link **ret);
+int link_add(Manager *manager, sd_netlink_message *message, Link **ret);
+void link_drop(Link *link);
+
+int link_down(Link *link);
+
+void link_enter_failed(Link *link);
+int link_initialized(Link *link, sd_device *device);
+
+void link_check_ready(Link *link);
+
+void link_update_operstate(Link *link);
+int link_update(Link *link, sd_netlink_message *message);
+
+void link_dirty(Link *link);
+void link_clean(Link *link);
+int link_save(Link *link);
+
+int link_carrier_reset(Link *link);
+bool link_has_carrier(Link *link);
+
+int link_ipv6ll_gained(Link *link, const struct in6_addr *address);
+
+int link_set_mtu(Link *link, uint32_t mtu);
+
+int ipv4ll_configure(Link *link);
+int dhcp4_configure(Link *link);
+int dhcp4_set_client_identifier(Link *link);
+int dhcp4_set_promote_secondaries(Link *link);
+int dhcp6_request_prefix_delegation(Link *link);
+int dhcp6_configure(Link *link);
+int dhcp6_request_address(Link *link, int ir);
+int dhcp6_lease_pd_prefix_lost(sd_dhcp6_client *client, Link* link);
+
+const char* link_state_to_string(LinkState s) _const_;
+LinkState link_state_from_string(const char *s) _pure_;
+
+const char* link_operstate_to_string(LinkOperationalState s) _const_;
+LinkOperationalState link_operstate_from_string(const char *s) _pure_;
+
+extern const sd_bus_vtable link_vtable[];
+
+int link_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error);
+int link_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
+int link_send_changed(Link *link, const char *property, ...) _sentinel_;
+
+/* Macros which append INTERFACE= to the message */
+
+#define log_link_full(link, level, error, ...) \
+ ({ \
+ const Link *_l = (link); \
+ _l ? log_object_internal(level, error, __FILE__, __LINE__, __func__, "INTERFACE=", _l->ifname, NULL, NULL, ##__VA_ARGS__) : \
+ log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
+ }) \
+
+#define log_link_debug(link, ...) log_link_full(link, LOG_DEBUG, 0, ##__VA_ARGS__)
+#define log_link_info(link, ...) log_link_full(link, LOG_INFO, 0, ##__VA_ARGS__)
+#define log_link_notice(link, ...) log_link_full(link, LOG_NOTICE, 0, ##__VA_ARGS__)
+#define log_link_warning(link, ...) log_link_full(link, LOG_WARNING, 0, ##__VA_ARGS__)
+#define log_link_error(link, ...) log_link_full(link, LOG_ERR, 0, ##__VA_ARGS__)
+
+#define log_link_debug_errno(link, error, ...) log_link_full(link, LOG_DEBUG, error, ##__VA_ARGS__)
+#define log_link_info_errno(link, error, ...) log_link_full(link, LOG_INFO, error, ##__VA_ARGS__)
+#define log_link_notice_errno(link, error, ...) log_link_full(link, LOG_NOTICE, error, ##__VA_ARGS__)
+#define log_link_warning_errno(link, error, ...) log_link_full(link, LOG_WARNING, error, ##__VA_ARGS__)
+#define log_link_error_errno(link, error, ...) log_link_full(link, LOG_ERR, error, ##__VA_ARGS__)
+
+#define LOG_LINK_MESSAGE(link, fmt, ...) "MESSAGE=%s: " fmt, (link)->ifname, ##__VA_ARGS__
+#define LOG_LINK_INTERFACE(link) "INTERFACE=%s", (link)->ifname
+
+#define ADDRESS_FMT_VAL(address) \
+ be32toh((address).s_addr) >> 24, \
+ (be32toh((address).s_addr) >> 16) & 0xFFu, \
+ (be32toh((address).s_addr) >> 8) & 0xFFu, \
+ be32toh((address).s_addr) & 0xFFu
diff --git a/src/network/networkd-lldp-tx.c b/src/network/networkd-lldp-tx.c
new file mode 100644
index 0000000..8fd6365
--- /dev/null
+++ b/src/network/networkd-lldp-tx.c
@@ -0,0 +1,399 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <endian.h>
+#include <inttypes.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "hostname-util.h"
+#include "missing_network.h"
+#include "networkd-lldp-tx.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "unaligned.h"
+
+/* The LLDP spec calls this "txFastInit", see 9.2.5.19 */
+#define LLDP_TX_FAST_INIT 4U
+
+/* The LLDP spec calls this "msgTxHold", see 9.2.5.6 */
+#define LLDP_TX_HOLD 4U
+
+/* The jitter range to add, see 9.2.2. */
+#define LLDP_JITTER_USEC (400U * USEC_PER_MSEC)
+
+/* The LLDP spec calls this msgTxInterval, but we subtract half the jitter off it. */
+#define LLDP_TX_INTERVAL_USEC (30U * USEC_PER_SEC - LLDP_JITTER_USEC / 2)
+
+/* The LLDP spec calls this msgFastTx, but we subtract half the jitter off it. */
+#define LLDP_FAST_TX_USEC (1U * USEC_PER_SEC - LLDP_JITTER_USEC / 2)
+
+static const struct ether_addr lldp_multicast_addr[_LLDP_EMIT_MAX] = {
+ [LLDP_EMIT_NEAREST_BRIDGE] = {{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x0e }},
+ [LLDP_EMIT_NON_TPMR_BRIDGE] = {{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03 }},
+ [LLDP_EMIT_CUSTOMER_BRIDGE] = {{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }},
+};
+
+static int lldp_write_tlv_header(uint8_t **p, uint8_t id, size_t sz) {
+ assert(p);
+
+ if (id > 127)
+ return -EBADMSG;
+ if (sz > 511)
+ return -ENOBUFS;
+
+ (*p)[0] = (id << 1) | !!(sz & 256);
+ (*p)[1] = sz & 255;
+
+ *p = *p + 2;
+ return 0;
+}
+
+static int lldp_make_packet(
+ LLDPEmit mode,
+ const struct ether_addr *hwaddr,
+ const char *machine_id,
+ const char *ifname,
+ uint16_t ttl,
+ const char *port_description,
+ const char *hostname,
+ const char *pretty_hostname,
+ uint16_t system_capabilities,
+ uint16_t enabled_capabilities,
+ void **ret, size_t *sz) {
+
+ size_t machine_id_length, ifname_length, port_description_length = 0, hostname_length = 0, pretty_hostname_length = 0;
+ _cleanup_free_ void *packet = NULL;
+ struct ether_header *h;
+ uint8_t *p;
+ size_t l;
+ int r;
+
+ assert(mode > LLDP_EMIT_NO);
+ assert(mode < _LLDP_EMIT_MAX);
+ assert(hwaddr);
+ assert(machine_id);
+ assert(ifname);
+ assert(ret);
+ assert(sz);
+
+ machine_id_length = strlen(machine_id);
+ ifname_length = strlen(ifname);
+
+ if (port_description)
+ port_description_length = strlen(port_description);
+
+ if (hostname)
+ hostname_length = strlen(hostname);
+
+ if (pretty_hostname)
+ pretty_hostname_length = strlen(pretty_hostname);
+
+ l = sizeof(struct ether_header) +
+ /* Chassis ID */
+ 2 + 1 + machine_id_length +
+ /* Port ID */
+ 2 + 1 + ifname_length +
+ /* TTL */
+ 2 + 2 +
+ /* System Capabilities */
+ 2 + 4 +
+ /* End */
+ 2;
+
+ /* Port Description */
+ if (port_description)
+ l += 2 + port_description_length;
+
+ /* System Name */
+ if (hostname)
+ l += 2 + hostname_length;
+
+ /* System Description */
+ if (pretty_hostname)
+ l += 2 + pretty_hostname_length;
+
+ packet = malloc(l);
+ if (!packet)
+ return -ENOMEM;
+
+ h = (struct ether_header*) packet;
+ h->ether_type = htobe16(ETHERTYPE_LLDP);
+ memcpy(h->ether_dhost, lldp_multicast_addr + mode, ETH_ALEN);
+ memcpy(h->ether_shost, hwaddr, ETH_ALEN);
+
+ p = (uint8_t*) packet + sizeof(struct ether_header);
+
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_CHASSIS_ID, 1 + machine_id_length);
+ if (r < 0)
+ return r;
+ *(p++) = SD_LLDP_CHASSIS_SUBTYPE_LOCALLY_ASSIGNED;
+ p = mempcpy(p, machine_id, machine_id_length);
+
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_PORT_ID, 1 + ifname_length);
+ if (r < 0)
+ return r;
+ *(p++) = SD_LLDP_PORT_SUBTYPE_INTERFACE_NAME;
+ p = mempcpy(p, ifname, ifname_length);
+
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_TTL, 2);
+ if (r < 0)
+ return r;
+ unaligned_write_be16(p, ttl);
+ p += 2;
+
+ if (port_description) {
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_PORT_DESCRIPTION, port_description_length);
+ if (r < 0)
+ return r;
+ p = mempcpy(p, port_description, port_description_length);
+ }
+
+ if (hostname) {
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_SYSTEM_NAME, hostname_length);
+ if (r < 0)
+ return r;
+ p = mempcpy(p, hostname, hostname_length);
+ }
+
+ if (pretty_hostname) {
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_SYSTEM_DESCRIPTION, pretty_hostname_length);
+ if (r < 0)
+ return r;
+ p = mempcpy(p, pretty_hostname, pretty_hostname_length);
+ }
+
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_SYSTEM_CAPABILITIES, 4);
+ if (r < 0)
+ return r;
+ unaligned_write_be16(p, system_capabilities);
+ p += 2;
+ unaligned_write_be16(p, enabled_capabilities);
+ p += 2;
+
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_END, 0);
+ if (r < 0)
+ return r;
+
+ assert(p == (uint8_t*) packet + l);
+
+ *ret = TAKE_PTR(packet);
+ *sz = l;
+
+ return 0;
+}
+
+static int lldp_send_packet(
+ int ifindex,
+ const struct ether_addr *address,
+ const void *packet,
+ size_t packet_size) {
+
+ union sockaddr_union sa = {
+ .ll.sll_family = AF_PACKET,
+ .ll.sll_protocol = htobe16(ETHERTYPE_LLDP),
+ .ll.sll_ifindex = ifindex,
+ .ll.sll_halen = ETH_ALEN,
+ };
+
+ _cleanup_close_ int fd = -1;
+ ssize_t l;
+
+ assert(ifindex > 0);
+ assert(address);
+ assert(packet || packet_size <= 0);
+
+ memcpy(sa.ll.sll_addr, address, ETH_ALEN);
+
+ fd = socket(PF_PACKET, SOCK_RAW|SOCK_CLOEXEC, IPPROTO_RAW);
+ if (fd < 0)
+ return -errno;
+
+ l = sendto(fd, packet, packet_size, MSG_NOSIGNAL, &sa.sa, sizeof(sa.ll));
+ if (l < 0)
+ return -errno;
+
+ if ((size_t) l != packet_size)
+ return -EIO;
+
+ return 0;
+}
+
+static int link_send_lldp(Link *link) {
+ char machine_id_string[SD_ID128_STRING_MAX];
+ _cleanup_free_ char *hostname = NULL, *pretty_hostname = NULL;
+ _cleanup_free_ void *packet = NULL;
+ size_t packet_size = 0;
+ sd_id128_t machine_id;
+ uint16_t caps;
+ usec_t ttl;
+ int r;
+
+ assert(link);
+
+ if (!link->network || link->network->lldp_emit == LLDP_EMIT_NO)
+ return 0;
+
+ assert(link->network->lldp_emit < _LLDP_EMIT_MAX);
+
+ r = sd_id128_get_machine(&machine_id);
+ if (r < 0)
+ return r;
+
+ (void) gethostname_strict(&hostname);
+ (void) parse_env_file(NULL, "/etc/machine-info", "PRETTY_HOSTNAME", &pretty_hostname);
+
+ assert_cc(LLDP_TX_INTERVAL_USEC * LLDP_TX_HOLD + 1 <= (UINT16_MAX - 1) * USEC_PER_SEC);
+ ttl = DIV_ROUND_UP(LLDP_TX_INTERVAL_USEC * LLDP_TX_HOLD + 1, USEC_PER_SEC);
+
+ caps = (link->network && link->network->ip_forward != ADDRESS_FAMILY_NO) ?
+ SD_LLDP_SYSTEM_CAPABILITIES_ROUTER :
+ SD_LLDP_SYSTEM_CAPABILITIES_STATION;
+
+ r = lldp_make_packet(link->network->lldp_emit,
+ &link->mac,
+ sd_id128_to_string(machine_id, machine_id_string),
+ link->ifname,
+ (uint16_t) ttl,
+ link->network ? link->network->description : NULL,
+ hostname,
+ pretty_hostname,
+ SD_LLDP_SYSTEM_CAPABILITIES_STATION|SD_LLDP_SYSTEM_CAPABILITIES_BRIDGE|SD_LLDP_SYSTEM_CAPABILITIES_ROUTER,
+ caps,
+ &packet, &packet_size);
+ if (r < 0)
+ return r;
+
+ return lldp_send_packet(link->ifindex, lldp_multicast_addr + link->network->lldp_emit, packet, packet_size);
+}
+
+static int on_lldp_timer(sd_event_source *s, usec_t t, void *userdata) {
+ Link *link = userdata;
+ usec_t current, delay, next;
+ int r;
+
+ assert(s);
+ assert(userdata);
+
+ log_link_debug(link, "Sending LLDP packet...");
+
+ r = link_send_lldp(link);
+ if (r < 0)
+ log_link_debug_errno(link, r, "Failed to send LLDP packet, ignoring: %m");
+
+ if (link->lldp_tx_fast > 0)
+ link->lldp_tx_fast--;
+
+ assert_se(sd_event_now(sd_event_source_get_event(s), clock_boottime_or_monotonic(), &current) >= 0);
+
+ delay = link->lldp_tx_fast > 0 ? LLDP_FAST_TX_USEC : LLDP_TX_INTERVAL_USEC;
+ next = usec_add(usec_add(current, delay), (usec_t) random_u64() % LLDP_JITTER_USEC);
+
+ r = sd_event_source_set_time(s, next);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to restart LLDP timer: %m");
+
+ r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to enable LLDP timer: %m");
+
+ return 0;
+}
+
+int link_lldp_emit_start(Link *link) {
+ usec_t next;
+ int r;
+
+ assert(link);
+
+ if (!link->network || link->network->lldp_emit == LLDP_EMIT_NO) {
+ link_lldp_emit_stop(link);
+ return 0;
+ }
+
+ /* Starts the LLDP transmission in "fast" mode. If it is already started, turns "fast" mode back on again. */
+
+ link->lldp_tx_fast = LLDP_TX_FAST_INIT;
+
+ next = usec_add(usec_add(now(clock_boottime_or_monotonic()), LLDP_FAST_TX_USEC),
+ (usec_t) random_u64() % LLDP_JITTER_USEC);
+
+ if (link->lldp_emit_event_source) {
+ usec_t old;
+
+ /* Lower the timeout, maybe */
+ r = sd_event_source_get_time(link->lldp_emit_event_source, &old);
+ if (r < 0)
+ return r;
+
+ if (old <= next)
+ return 0;
+
+ return sd_event_source_set_time(link->lldp_emit_event_source, next);
+ } else {
+ r = sd_event_add_time(
+ link->manager->event,
+ &link->lldp_emit_event_source,
+ clock_boottime_or_monotonic(),
+ next,
+ 0,
+ on_lldp_timer,
+ link);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(link->lldp_emit_event_source, "lldp-tx");
+ }
+
+ return 0;
+}
+
+void link_lldp_emit_stop(Link *link) {
+ assert(link);
+
+ link->lldp_emit_event_source = sd_event_source_unref(link->lldp_emit_event_source);
+}
+
+int config_parse_lldp_emit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ LLDPEmit *emit = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue))
+ *emit = LLDP_EMIT_NO;
+ else if (streq(rvalue, "nearest-bridge"))
+ *emit = LLDP_EMIT_NEAREST_BRIDGE;
+ else if (streq(rvalue, "non-tpmr-bridge"))
+ *emit = LLDP_EMIT_NON_TPMR_BRIDGE;
+ else if (streq(rvalue, "customer-bridge"))
+ *emit = LLDP_EMIT_CUSTOMER_BRIDGE;
+ else {
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse LLDP emission setting, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *emit = r ? LLDP_EMIT_NEAREST_BRIDGE : LLDP_EMIT_NO;
+ }
+
+ return 0;
+}
diff --git a/src/network/networkd-lldp-tx.h b/src/network/networkd-lldp-tx.h
new file mode 100644
index 0000000..6842804
--- /dev/null
+++ b/src/network/networkd-lldp-tx.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "conf-parser.h"
+#include "networkd-link.h"
+
+typedef enum LLDPEmit {
+ LLDP_EMIT_NO,
+ LLDP_EMIT_NEAREST_BRIDGE,
+ LLDP_EMIT_NON_TPMR_BRIDGE,
+ LLDP_EMIT_CUSTOMER_BRIDGE,
+ _LLDP_EMIT_MAX,
+} LLDPEmit;
+
+int link_lldp_emit_start(Link *link);
+void link_lldp_emit_stop(Link *link);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_lldp_emit);
diff --git a/src/network/networkd-manager-bus.c b/src/network/networkd-manager-bus.c
new file mode 100644
index 0000000..8c52783
--- /dev/null
+++ b/src/network/networkd-manager-bus.c
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "networkd-manager.h"
+#include "strv.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_operational_state, link_operstate, LinkOperationalState);
+
+const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("OperationalState", "s", property_get_operational_state, offsetof(Manager, operational_state), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
+ SD_BUS_VTABLE_END
+};
+
+int manager_send_changed(Manager *manager, const char *property, ...) {
+ char **l;
+
+ assert(manager);
+
+ if (!manager->bus)
+ return 0; /* replace by assert when we have kdbus */
+
+ l = strv_from_stdarg_alloca(property);
+
+ return sd_bus_emit_properties_changed_strv(
+ manager->bus,
+ "/org/freedesktop/network1",
+ "org.freedesktop.network1.Manager",
+ l);
+}
diff --git a/src/network/networkd-manager.c b/src/network/networkd-manager.c
new file mode 100644
index 0000000..c8d369e
--- /dev/null
+++ b/src/network/networkd-manager.c
@@ -0,0 +1,1860 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/socket.h>
+#include <linux/if.h>
+#include <linux/fib_rules.h>
+#include <stdio_ext.h>
+
+#include "sd-daemon.h"
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "device-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "local-addresses.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "ordered-set.h"
+#include "path-util.h"
+#include "set.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "virt.h"
+
+/* use 8 MB for receive socket kernel queue. */
+#define RCVBUF_SIZE (8*1024*1024)
+
+const char* const network_dirs[] = {
+ "/etc/systemd/network",
+ "/run/systemd/network",
+ "/usr/lib/systemd/network",
+#if HAVE_SPLIT_USR
+ "/lib/systemd/network",
+#endif
+ NULL};
+
+static int setup_default_address_pool(Manager *m) {
+ AddressPool *p;
+ int r;
+
+ assert(m);
+
+ /* Add in the well-known private address ranges. */
+
+ r = address_pool_new_from_string(m, &p, AF_INET6, "fc00::", 7);
+ if (r < 0)
+ return r;
+
+ r = address_pool_new_from_string(m, &p, AF_INET, "192.168.0.0", 16);
+ if (r < 0)
+ return r;
+
+ r = address_pool_new_from_string(m, &p, AF_INET, "172.16.0.0", 12);
+ if (r < 0)
+ return r;
+
+ r = address_pool_new_from_string(m, &p, AF_INET, "10.0.0.0", 8);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int manager_reset_all(Manager *m) {
+ Link *link;
+ Iterator i;
+ int r;
+
+ assert(m);
+
+ HASHMAP_FOREACH(link, m->links, i) {
+ r = link_carrier_reset(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not reset carrier: %m");
+ }
+
+ return 0;
+}
+
+static int match_prepare_for_sleep(sd_bus_message *message, void *userdata, sd_bus_error *ret_error) {
+ Manager *m = userdata;
+ int b, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse PrepareForSleep signal: %m");
+ return 0;
+ }
+
+ if (b)
+ return 0;
+
+ log_debug("Coming back from suspend, resetting all connections...");
+
+ (void) manager_reset_all(m);
+
+ return 0;
+}
+
+static int on_connected(sd_bus_message *message, void *userdata, sd_bus_error *ret_error) {
+ Manager *m = userdata;
+
+ assert(message);
+ assert(m);
+
+ /* Did we get a timezone or transient hostname from DHCP while D-Bus wasn't up yet? */
+ if (m->dynamic_hostname)
+ (void) manager_set_hostname(m, m->dynamic_hostname);
+ if (m->dynamic_timezone)
+ (void) manager_set_timezone(m, m->dynamic_timezone);
+ if (m->links_requesting_uuid)
+ (void) manager_request_product_uuid(m, NULL);
+
+ return 0;
+}
+
+int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->bus)
+ return 0;
+
+ r = bus_open_system_watch_bind_with_description(&m->bus, "bus-api-network");
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to bus: %m");
+
+ r = sd_bus_add_object_vtable(m->bus, NULL, "/org/freedesktop/network1", "org.freedesktop.network1.Manager", manager_vtable, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add manager object vtable: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/network1/link", "org.freedesktop.network1.Link", link_vtable, link_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add link object vtable: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/network1/link", link_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add link enumerator: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/network1/network", "org.freedesktop.network1.Network", network_vtable, network_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add network object vtable: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/network1/network", network_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add network enumerator: %m");
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.network1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.DBus.Local",
+ NULL,
+ "org.freedesktop.DBus.Local",
+ "Connected",
+ on_connected, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match on Connected signal: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "PrepareForSleep",
+ match_prepare_for_sleep, NULL, m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to request match for PrepareForSleep, ignoring: %m");
+
+ return 0;
+}
+
+static int manager_udev_process_link(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ Manager *m = userdata;
+ const char *action;
+ Link *link = NULL;
+ int r, ifindex;
+
+ assert(m);
+ assert(device);
+
+ r = sd_device_get_property_value(device, "ACTION", &action);
+ if (r < 0) {
+ log_device_debug_errno(device, r, "Failed to get 'ACTION' property, ignoring device: %m");
+ return 0;
+ }
+
+ if (!STR_IN_SET(action, "add", "change")) {
+ log_device_debug(device, "Ignoring udev %s event for device.", action);
+ return 0;
+ }
+
+ r = sd_device_get_ifindex(device, &ifindex);
+ if (r < 0) {
+ log_device_debug_errno(device, r, "Ignoring udev ADD event for device without ifindex or with invalid ifindex: %m");
+ return 0;
+ }
+
+ r = link_get(m, ifindex, &link);
+ if (r < 0) {
+ if (r != -ENODEV)
+ log_debug_errno(r, "Failed to get link from ifindex %i, ignoring: %m", ifindex);
+ return 0;
+ }
+
+ (void) link_initialized(link, device);
+
+ return 0;
+}
+
+static int manager_connect_udev(Manager *m) {
+ int r;
+
+ /* udev does not initialize devices inside containers,
+ * so we rely on them being already initialized before
+ * entering the container */
+ if (detect_container() > 0)
+ return 0;
+
+ r = sd_device_monitor_new(&m->device_monitor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize device monitor: %m");
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "net", NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not add device monitor filter: %m");
+
+ r = sd_device_monitor_attach_event(m->device_monitor, m->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event to device monitor: %m");
+
+ r = sd_device_monitor_start(m->device_monitor, manager_udev_process_link, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ return 0;
+}
+
+int manager_rtnl_process_route(sd_netlink *rtnl, sd_netlink_message *message, void *userdata) {
+ Manager *m = userdata;
+ Link *link = NULL;
+ uint16_t type;
+ uint32_t ifindex, priority = 0;
+ unsigned char protocol, scope, tos, table, rt_type;
+ int family;
+ unsigned char dst_prefixlen, src_prefixlen;
+ union in_addr_union dst = {}, gw = {}, src = {}, prefsrc = {};
+ Route *route = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(message);
+ assert(m);
+
+ if (sd_netlink_message_is_error(message)) {
+ r = sd_netlink_message_get_errno(message);
+ if (r < 0)
+ log_warning_errno(r, "rtnl: failed to receive route, ignoring: %m");
+
+ return 0;
+ }
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get message type, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(type, RTM_NEWROUTE, RTM_DELROUTE)) {
+ log_warning("rtnl: received unexpected message type when processing route, ignoring");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_u32(message, RTA_OIF, &ifindex);
+ if (r == -ENODATA) {
+ log_debug("rtnl: received route without ifindex, ignoring");
+ return 0;
+ } else if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get ifindex from route, ignoring: %m");
+ return 0;
+ } else if (ifindex <= 0) {
+ log_warning("rtnl: received route message with invalid ifindex, ignoring: %d", ifindex);
+ return 0;
+ } else {
+ r = link_get(m, ifindex, &link);
+ if (r < 0 || !link) {
+ /* when enumerating we might be out of sync, but we will
+ * get the route again, so just ignore it */
+ if (!m->enumerating)
+ log_warning("rtnl: received route for nonexistent link (%d), ignoring", ifindex);
+ return 0;
+ }
+ }
+
+ r = sd_rtnl_message_route_get_family(message, &family);
+ if (r < 0 || !IN_SET(family, AF_INET, AF_INET6)) {
+ log_link_warning(link, "rtnl: received address with invalid family, ignoring");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_protocol(message, &protocol);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get route protocol: %m");
+ return 0;
+ }
+
+ switch (family) {
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(message, RTA_DST, &dst.in);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route without valid destination, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_in_addr(message, RTA_GATEWAY, &gw.in);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid gateway, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_in_addr(message, RTA_SRC, &src.in);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid source, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_in_addr(message, RTA_PREFSRC, &prefsrc.in);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid preferred source, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(message, RTA_DST, &dst.in6);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route without valid destination, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_in6_addr(message, RTA_GATEWAY, &gw.in6);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid gateway, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_in6_addr(message, RTA_SRC, &src.in6);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid source, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_in6_addr(message, RTA_PREFSRC, &prefsrc.in6);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid preferred source, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Received unsupported address family");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_dst_prefixlen(message, &dst_prefixlen);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid destination prefixlen, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_src_prefixlen(message, &src_prefixlen);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid source prefixlen, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_scope(message, &scope);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid scope, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_tos(message, &tos);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid tos, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_type(message, &rt_type);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid type, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_table(message, &table);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid table, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_u32(message, RTA_PRIORITY, &priority);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route with invalid priority, ignoring: %m");
+ return 0;
+ }
+
+ (void) route_get(link, family, &dst, dst_prefixlen, tos, priority, table, &route);
+
+ switch (type) {
+ case RTM_NEWROUTE:
+ if (!route) {
+ /* A route appeared that we did not request */
+ r = route_add_foreign(link, family, &dst, dst_prefixlen, tos, priority, table, &route);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to add route, ignoring: %m");
+ return 0;
+ }
+ }
+
+ route_update(route, &src, src_prefixlen, &gw, &prefsrc, scope, protocol, rt_type);
+
+ break;
+
+ case RTM_DELROUTE:
+ route_free(route);
+ break;
+
+ default:
+ assert_not_reached("Received invalid RTNL message type");
+ }
+
+ return 1;
+}
+
+int manager_rtnl_process_address(sd_netlink *rtnl, sd_netlink_message *message, void *userdata) {
+ Manager *m = userdata;
+ Link *link = NULL;
+ uint16_t type;
+ unsigned char flags;
+ int family;
+ unsigned char prefixlen;
+ unsigned char scope;
+ union in_addr_union in_addr;
+ struct ifa_cacheinfo cinfo;
+ Address *address = NULL;
+ char buf[INET6_ADDRSTRLEN], valid_buf[FORMAT_TIMESPAN_MAX];
+ const char *valid_str = NULL;
+ int r, ifindex;
+
+ assert(rtnl);
+ assert(message);
+ assert(m);
+
+ if (sd_netlink_message_is_error(message)) {
+ r = sd_netlink_message_get_errno(message);
+ if (r < 0)
+ log_warning_errno(r, "rtnl: failed to receive address, ignoring: %m");
+
+ return 0;
+ }
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get message type, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(type, RTM_NEWADDR, RTM_DELADDR)) {
+ log_warning("rtnl: received unexpected message type when processing address, ignoring");
+ return 0;
+ }
+
+ r = sd_rtnl_message_addr_get_ifindex(message, &ifindex);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get ifindex from address, ignoring: %m");
+ return 0;
+ } else if (ifindex <= 0) {
+ log_warning("rtnl: received address message with invalid ifindex, ignoring: %d", ifindex);
+ return 0;
+ } else {
+ r = link_get(m, ifindex, &link);
+ if (r < 0 || !link) {
+ /* when enumerating we might be out of sync, but we will
+ * get the address again, so just ignore it */
+ if (!m->enumerating)
+ log_warning("rtnl: received address for nonexistent link (%d), ignoring", ifindex);
+ return 0;
+ }
+ }
+
+ r = sd_rtnl_message_addr_get_family(message, &family);
+ if (r < 0 || !IN_SET(family, AF_INET, AF_INET6)) {
+ log_link_warning(link, "rtnl: received address with invalid family, ignoring");
+ return 0;
+ }
+
+ r = sd_rtnl_message_addr_get_prefixlen(message, &prefixlen);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received address with invalid prefixlen, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_addr_get_scope(message, &scope);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received address with invalid scope, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_addr_get_flags(message, &flags);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received address with invalid flags, ignoring: %m");
+ return 0;
+ }
+
+ switch (family) {
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(message, IFA_LOCAL, &in_addr.in);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received address without valid address, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(message, IFA_ADDRESS, &in_addr.in6);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received address without valid address, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Received unsupported address family");
+ }
+
+ if (!inet_ntop(family, &in_addr, buf, INET6_ADDRSTRLEN)) {
+ log_link_warning(link, "Could not print address, ignoring");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_cache_info(message, IFA_CACHEINFO, &cinfo);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: cannot get IFA_CACHEINFO attribute, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ if (cinfo.ifa_valid != CACHE_INFO_INFINITY_LIFE_TIME)
+ valid_str = format_timespan(valid_buf, FORMAT_TIMESPAN_MAX,
+ cinfo.ifa_valid * USEC_PER_SEC,
+ USEC_PER_SEC);
+ }
+
+ (void) address_get(link, family, &in_addr, prefixlen, &address);
+
+ switch (type) {
+ case RTM_NEWADDR:
+ if (address)
+ log_link_debug(link, "Updating address: %s/%u (valid %s%s)", buf, prefixlen,
+ valid_str ? "for " : "forever", strempty(valid_str));
+ else {
+ /* An address appeared that we did not request */
+ r = address_add_foreign(link, family, &in_addr, prefixlen, &address);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to add address %s/%u, ignoring: %m", buf, prefixlen);
+ return 0;
+ } else
+ log_link_debug(link, "Adding address: %s/%u (valid %s%s)", buf, prefixlen,
+ valid_str ? "for " : "forever", strempty(valid_str));
+ }
+
+ r = address_update(address, flags, scope, &cinfo);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to update address %s/%u, ignoring: %m", buf, prefixlen);
+ return 0;
+ }
+
+ break;
+
+ case RTM_DELADDR:
+
+ if (address) {
+ log_link_debug(link, "Removing address: %s/%u (valid %s%s)", buf, prefixlen,
+ valid_str ? "for " : "forever", strempty(valid_str));
+ (void) address_drop(address);
+ } else
+ log_link_warning(link, "Removing non-existent address: %s/%u (valid %s%s), ignoring", buf, prefixlen,
+ valid_str ? "for " : "forever", strempty(valid_str));
+
+ break;
+ default:
+ assert_not_reached("Received invalid RTNL message type");
+ }
+
+ return 1;
+}
+
+static int manager_rtnl_process_link(sd_netlink *rtnl, sd_netlink_message *message, void *userdata) {
+ Manager *m = userdata;
+ Link *link = NULL;
+ NetDev *netdev = NULL;
+ uint16_t type;
+ const char *name;
+ int r, ifindex;
+
+ assert(rtnl);
+ assert(message);
+ assert(m);
+
+ if (sd_netlink_message_is_error(message)) {
+ r = sd_netlink_message_get_errno(message);
+ if (r < 0)
+ log_warning_errno(r, "rtnl: Could not receive link, ignoring: %m");
+
+ return 0;
+ }
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: Could not get message type, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(type, RTM_NEWLINK, RTM_DELLINK)) {
+ log_warning("rtnl: Received unexpected message type when processing link, ignoring");
+ return 0;
+ }
+
+ r = sd_rtnl_message_link_get_ifindex(message, &ifindex);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: Could not get ifindex from link, ignoring: %m");
+ return 0;
+ } else if (ifindex <= 0) {
+ log_warning("rtnl: received link message with invalid ifindex %d, ignoring", ifindex);
+ return 0;
+ }
+
+ r = sd_netlink_message_read_string(message, IFLA_IFNAME, &name);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: Received link message without ifname, ignoring: %m");
+ return 0;
+ }
+
+ (void) link_get(m, ifindex, &link);
+ (void) netdev_get(m, name, &netdev);
+
+ switch (type) {
+ case RTM_NEWLINK:
+ if (!link) {
+ /* link is new, so add it */
+ r = link_add(m, message, &link);
+ if (r < 0) {
+ log_warning_errno(r, "Could not add new link, ignoring: %m");
+ return 0;
+ }
+ }
+
+ if (netdev) {
+ /* netdev exists, so make sure the ifindex matches */
+ r = netdev_set_ifindex(netdev, message);
+ if (r < 0) {
+ log_warning_errno(r, "Could not set ifindex on netdev, ignoring: %m");
+ return 0;
+ }
+ }
+
+ r = link_update(link, message);
+ if (r < 0) {
+ log_warning_errno(r, "Could not update link, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ case RTM_DELLINK:
+ link_drop(link);
+ netdev_drop(netdev);
+
+ break;
+
+ default:
+ assert_not_reached("Received invalid RTNL message type.");
+ }
+
+ return 1;
+}
+
+int manager_rtnl_process_rule(sd_netlink *rtnl, sd_netlink_message *message, void *userdata) {
+ uint8_t tos = 0, to_prefixlen = 0, from_prefixlen = 0, protocol = 0;
+ struct fib_rule_port_range sport = {}, dport = {};
+ union in_addr_union to = {}, from = {};
+ RoutingPolicyRule *rule = NULL;
+ uint32_t fwmark = 0, table = 0;
+ const char *iif = NULL, *oif = NULL;
+ Manager *m = userdata;
+ uint16_t type;
+ int family;
+ int r;
+
+ assert(rtnl);
+ assert(message);
+ assert(m);
+
+ if (sd_netlink_message_is_error(message)) {
+ r = sd_netlink_message_get_errno(message);
+ if (r < 0)
+ log_warning_errno(r, "rtnl: failed to receive rule, ignoring: %m");
+
+ return 0;
+ }
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get message type, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(type, RTM_NEWRULE, RTM_DELRULE)) {
+ log_warning("rtnl: received unexpected message type '%u' when processing rule, ignoring", type);
+ return 0;
+ }
+
+ r = sd_rtnl_message_get_family(message, &family);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get rule family, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(family, AF_INET, AF_INET6)) {
+ log_debug("rtnl: received address with invalid family %u, ignoring", family);
+ return 0;
+ }
+
+ switch (family) {
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(message, FRA_SRC, &from.in);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_SRC attribute, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ r = sd_rtnl_message_routing_policy_rule_get_rtm_src_prefixlen(message, &from_prefixlen);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: failed to retrieve rule from prefix length, ignoring: %m");
+ return 0;
+ }
+ }
+
+ r = sd_netlink_message_read_in_addr(message, FRA_DST, &to.in);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_DST attribute, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ r = sd_rtnl_message_routing_policy_rule_get_rtm_dst_prefixlen(message, &to_prefixlen);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: failed to retrieve rule to prefix length, ignoring: %m");
+ return 0;
+ }
+ }
+
+ break;
+
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(message, FRA_SRC, &from.in6);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_SRC attribute, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ r = sd_rtnl_message_routing_policy_rule_get_rtm_src_prefixlen(message, &from_prefixlen);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: failed to retrieve rule from prefix length, ignoring: %m");
+ return 0;
+ }
+ }
+
+ r = sd_netlink_message_read_in6_addr(message, FRA_DST, &to.in6);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_DST attribute, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ r = sd_rtnl_message_routing_policy_rule_get_rtm_dst_prefixlen(message, &to_prefixlen);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: failed to retrieve rule to prefix length, ignoring: %m");
+ return 0;
+ }
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Received unsupported address family");
+ }
+
+ if (from_prefixlen == 0 && to_prefixlen == 0)
+ return 0;
+
+ r = sd_netlink_message_read_u32(message, FRA_FWMARK, &fwmark);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_FWMARK attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_u32(message, FRA_TABLE, &table);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_TABLE attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_routing_policy_rule_get_tos(message, &tos);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get ip rule TOS, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_string(message, FRA_IIFNAME, &iif);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_IIFNAME attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_string(message, FRA_OIFNAME, &oif);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_OIFNAME attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_u8(message, FRA_IP_PROTO, &protocol);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_IP_PROTO attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read(message, FRA_SPORT_RANGE, sizeof(sport), (void *) &sport);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_SPORT_RANGE attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read(message, FRA_DPORT_RANGE, sizeof(dport), (void *) &dport);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_DPORT_RANGE attribute, ignoring: %m");
+ return 0;
+ }
+
+ (void) routing_policy_rule_get(m, family, &from, from_prefixlen, &to, to_prefixlen, tos, fwmark, table, iif, oif, protocol, &sport, &dport, &rule);
+
+ switch (type) {
+ case RTM_NEWRULE:
+ if (!rule) {
+ r = routing_policy_rule_add_foreign(m, family, &from, from_prefixlen, &to, to_prefixlen, tos, fwmark, table, iif, oif, protocol, &sport, &dport, &rule);
+ if (r < 0) {
+ log_warning_errno(r, "Could not add rule, ignoring: %m");
+ return 0;
+ }
+ }
+ break;
+ case RTM_DELRULE:
+ routing_policy_rule_free(rule);
+
+ break;
+
+ default:
+ assert_not_reached("Received invalid RTNL message type");
+ }
+
+ return 1;
+}
+
+static int systemd_netlink_fd(void) {
+ int n, fd, rtnl_fd = -EINVAL;
+
+ n = sd_listen_fds(true);
+ if (n <= 0)
+ return -EINVAL;
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd ++) {
+ if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
+ if (rtnl_fd >= 0)
+ return -EINVAL;
+
+ rtnl_fd = fd;
+ }
+ }
+
+ return rtnl_fd;
+}
+
+static int manager_connect_genl(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = sd_genl_socket_open(&m->genl);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_inc_rcvbuf(m->genl, RCVBUF_SIZE);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_attach_event(m->genl, m->event, 0);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int manager_connect_rtnl(Manager *m) {
+ int fd, r;
+
+ assert(m);
+
+ fd = systemd_netlink_fd();
+ if (fd < 0)
+ r = sd_netlink_open(&m->rtnl);
+ else
+ r = sd_netlink_open_fd(&m->rtnl, fd);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_inc_rcvbuf(m->rtnl, RCVBUF_SIZE);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_attach_event(m->rtnl, m->event, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_NEWLINK, &manager_rtnl_process_link, NULL, m, "network-rtnl_process_link");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_DELLINK, &manager_rtnl_process_link, NULL, m, "network-rtnl_process_link");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_NEWADDR, &manager_rtnl_process_address, NULL, m, "network-rtnl_process_address");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_DELADDR, &manager_rtnl_process_address, NULL, m, "network-rtnl_process_address");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_NEWROUTE, &manager_rtnl_process_route, NULL, m, "network-rtnl_process_route");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_DELROUTE, &manager_rtnl_process_route, NULL, m, "network-rtnl_process_route");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_NEWRULE, &manager_rtnl_process_rule, NULL, m, "network-rtnl_process_rule");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_DELRULE, &manager_rtnl_process_rule, NULL, m, "network-rtnl_process_rule");
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int ordered_set_put_in_addr_data(OrderedSet *s, const struct in_addr_data *address) {
+ char *p;
+ int r;
+
+ assert(s);
+ assert(address);
+
+ r = in_addr_to_string(address->family, &address->address, &p);
+ if (r < 0)
+ return r;
+
+ r = ordered_set_consume(s, p);
+ if (r == -EEXIST)
+ return 0;
+
+ return r;
+}
+
+static int ordered_set_put_in_addr_datav(OrderedSet *s, const struct in_addr_data *addresses, unsigned n) {
+ int r, c = 0;
+ unsigned i;
+
+ assert(s);
+ assert(addresses || n == 0);
+
+ for (i = 0; i < n; i++) {
+ r = ordered_set_put_in_addr_data(s, addresses+i);
+ if (r < 0)
+ return r;
+
+ c += r;
+ }
+
+ return c;
+}
+
+static int ordered_set_put_in4_addr(OrderedSet *s, const struct in_addr *address) {
+ char *p;
+ int r;
+
+ assert(s);
+ assert(address);
+
+ r = in_addr_to_string(AF_INET, (const union in_addr_union*) address, &p);
+ if (r < 0)
+ return r;
+
+ r = ordered_set_consume(s, p);
+ if (r == -EEXIST)
+ return 0;
+
+ return r;
+}
+
+static int ordered_set_put_in4_addrv(OrderedSet *s, const struct in_addr *addresses, unsigned n) {
+ int r, c = 0;
+ unsigned i;
+
+ assert(s);
+ assert(n == 0 || addresses);
+
+ for (i = 0; i < n; i++) {
+ r = ordered_set_put_in4_addr(s, addresses+i);
+ if (r < 0)
+ return r;
+
+ c += r;
+ }
+
+ return c;
+}
+
+static void print_string_set(FILE *f, const char *field, OrderedSet *s) {
+ bool space = false;
+ Iterator i;
+ char *p;
+
+ if (ordered_set_isempty(s))
+ return;
+
+ fputs(field, f);
+
+ ORDERED_SET_FOREACH(p, s, i)
+ fputs_with_space(f, p, NULL, &space);
+
+ fputc('\n', f);
+}
+
+static int manager_save(Manager *m) {
+ _cleanup_ordered_set_free_free_ OrderedSet *dns = NULL, *ntp = NULL, *search_domains = NULL, *route_domains = NULL;
+ Link *link;
+ Iterator i;
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ LinkOperationalState operstate = LINK_OPERSTATE_OFF;
+ const char *operstate_str;
+ int r;
+
+ assert(m);
+ assert(m->state_file);
+
+ /* We add all NTP and DNS server to a set, to filter out duplicates */
+ dns = ordered_set_new(&string_hash_ops);
+ if (!dns)
+ return -ENOMEM;
+
+ ntp = ordered_set_new(&string_hash_ops);
+ if (!ntp)
+ return -ENOMEM;
+
+ search_domains = ordered_set_new(&dns_name_hash_ops);
+ if (!search_domains)
+ return -ENOMEM;
+
+ route_domains = ordered_set_new(&dns_name_hash_ops);
+ if (!route_domains)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(link, m->links, i) {
+ if (link->flags & IFF_LOOPBACK)
+ continue;
+
+ if (link->operstate > operstate)
+ operstate = link->operstate;
+
+ if (!link->network)
+ continue;
+
+ /* First add the static configured entries */
+ r = ordered_set_put_in_addr_datav(dns, link->network->dns, link->network->n_dns);
+ if (r < 0)
+ return r;
+
+ r = ordered_set_put_strdupv(ntp, link->network->ntp);
+ if (r < 0)
+ return r;
+
+ r = ordered_set_put_strdupv(search_domains, link->network->search_domains);
+ if (r < 0)
+ return r;
+
+ r = ordered_set_put_strdupv(route_domains, link->network->route_domains);
+ if (r < 0)
+ return r;
+
+ if (!link->dhcp_lease)
+ continue;
+
+ /* Secondly, add the entries acquired via DHCP */
+ if (link->network->dhcp_use_dns) {
+ const struct in_addr *addresses;
+
+ r = sd_dhcp_lease_get_dns(link->dhcp_lease, &addresses);
+ if (r > 0) {
+ r = ordered_set_put_in4_addrv(dns, addresses, r);
+ if (r < 0)
+ return r;
+ } else if (r < 0 && r != -ENODATA)
+ return r;
+ }
+
+ if (link->network->dhcp_use_ntp) {
+ const struct in_addr *addresses;
+
+ r = sd_dhcp_lease_get_ntp(link->dhcp_lease, &addresses);
+ if (r > 0) {
+ r = ordered_set_put_in4_addrv(ntp, addresses, r);
+ if (r < 0)
+ return r;
+ } else if (r < 0 && r != -ENODATA)
+ return r;
+ }
+
+ if (link->network->dhcp_use_domains != DHCP_USE_DOMAINS_NO) {
+ const char *domainname;
+ char **domains = NULL;
+
+ OrderedSet *target_domains = (link->network->dhcp_use_domains == DHCP_USE_DOMAINS_YES) ? search_domains : route_domains;
+ r = sd_dhcp_lease_get_domainname(link->dhcp_lease, &domainname);
+ if (r >= 0) {
+ r = ordered_set_put_strdup(target_domains, domainname);
+ if (r < 0)
+ return r;
+ } else if (r != -ENODATA)
+ return r;
+
+ r = sd_dhcp_lease_get_search_domains(link->dhcp_lease, &domains);
+ if (r >= 0) {
+ r = ordered_set_put_strdupv(target_domains, domains);
+ if (r < 0)
+ return r;
+ } else if (r != -ENODATA)
+ return r;
+ }
+ }
+
+ operstate_str = link_operstate_to_string(operstate);
+ assert(operstate_str);
+
+ r = fopen_temporary(m->state_file, &f, &temp_path);
+ if (r < 0)
+ return r;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "OPER_STATE=%s\n", operstate_str);
+
+ print_string_set(f, "DNS=", dns);
+ print_string_set(f, "NTP=", ntp);
+ print_string_set(f, "DOMAINS=", search_domains);
+ print_string_set(f, "ROUTE_DOMAINS=", route_domains);
+
+ r = routing_policy_serialize_rules(m->rules, f);
+ if (r < 0)
+ goto fail;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, m->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (m->operational_state != operstate) {
+ m->operational_state = operstate;
+ r = manager_send_changed(m, "OperationalState", NULL);
+ if (r < 0)
+ log_error_errno(r, "Could not emit changed OperationalState: %m");
+ }
+
+ m->dirty = false;
+
+ return 0;
+
+fail:
+ (void) unlink(m->state_file);
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save network state to %s: %m", m->state_file);
+}
+
+static int manager_dirty_handler(sd_event_source *s, void *userdata) {
+ Manager *m = userdata;
+ Link *link;
+ Iterator i;
+
+ assert(m);
+
+ if (m->dirty)
+ manager_save(m);
+
+ SET_FOREACH(link, m->dirty_links, i)
+ if (link_save(link) >= 0)
+ link_clean(link);
+
+ return 1;
+}
+
+Link *manager_dhcp6_prefix_get(Manager *m, struct in6_addr *addr) {
+ assert_return(m, NULL);
+ assert_return(addr, NULL);
+
+ return hashmap_get(m->dhcp6_prefixes, addr);
+}
+
+static int dhcp6_route_add_handler(sd_netlink *nl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_debug_errno(link, r, "Received error adding DHCPv6 Prefix Delegation route: %m");
+
+ return 0;
+}
+
+static void dhcp6_prefixes_hash_func(const struct in6_addr *addr, struct siphash *state) {
+ assert(addr);
+
+ siphash24_compress(addr, sizeof(*addr), state);
+}
+
+static int dhcp6_prefixes_compare_func(const struct in6_addr *a, const struct in6_addr *b) {
+ return memcmp(a, b, sizeof(*a));
+}
+
+DEFINE_PRIVATE_HASH_OPS(dhcp6_prefixes_hash_ops, struct in6_addr, dhcp6_prefixes_hash_func, dhcp6_prefixes_compare_func);
+
+int manager_dhcp6_prefix_add(Manager *m, struct in6_addr *addr, Link *link) {
+ _cleanup_free_ char *buf = NULL;
+ Route *route;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ r = route_add(link, AF_INET6, (union in_addr_union *) addr, 64,
+ 0, 0, 0, &route);
+ if (r < 0)
+ return r;
+
+ r = route_configure(route, link, dhcp6_route_add_handler);
+ if (r < 0)
+ return r;
+
+ (void) in_addr_to_string(AF_INET6, (union in_addr_union *) addr, &buf);
+ log_link_debug(link, "Adding prefix route %s/64", strnull(buf));
+
+ r = hashmap_ensure_allocated(&m->dhcp6_prefixes, &dhcp6_prefixes_hash_ops);
+ if (r < 0)
+ return r;
+
+ return hashmap_put(m->dhcp6_prefixes, addr, link);
+}
+
+static int dhcp6_route_remove_handler(sd_netlink *nl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_debug_errno(link, r, "Received error on DHCPv6 Prefix Delegation route removal: %m");
+
+ return 1;
+}
+
+static int manager_dhcp6_prefix_remove(Manager *m, struct in6_addr *addr) {
+ _cleanup_free_ char *buf = NULL;
+ Route *route;
+ Link *l;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ l = hashmap_remove(m->dhcp6_prefixes, addr);
+ if (!l)
+ return -EINVAL;
+
+ (void) sd_radv_remove_prefix(l->radv, addr, 64);
+ r = route_get(l, AF_INET6, (union in_addr_union *) addr, 64,
+ 0, 0, 0, &route);
+ if (r < 0)
+ return r;
+
+ r = route_remove(route, l, dhcp6_route_remove_handler);
+ if (r < 0)
+ return r;
+
+ (void) in_addr_to_string(AF_INET6, (union in_addr_union *) addr, &buf);
+ log_link_debug(l, "Removing prefix route %s/64", strnull(buf));
+
+ return 0;
+}
+
+int manager_dhcp6_prefix_remove_all(Manager *m, Link *link) {
+ struct in6_addr *addr;
+ Iterator i;
+ Link *l;
+
+ assert_return(m, -EINVAL);
+ assert_return(link, -EINVAL);
+
+ HASHMAP_FOREACH_KEY(l, addr, m->dhcp6_prefixes, i) {
+ if (l != link)
+ continue;
+
+ (void) manager_dhcp6_prefix_remove(m, addr);
+ }
+
+ return 0;
+}
+
+int manager_new(Manager **ret) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ m = new0(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ m->state_file = strdup("/run/systemd/netif/state");
+ if (!m->state_file)
+ return -ENOMEM;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_set_watchdog(m->event, true);
+ (void) sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ (void) sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+
+ r = sd_event_add_post(m->event, NULL, manager_dirty_handler, m);
+ if (r < 0)
+ return r;
+
+ r = manager_connect_rtnl(m);
+ if (r < 0)
+ return r;
+
+ r = manager_connect_genl(m);
+ if (r < 0)
+ return r;
+
+ r = manager_connect_udev(m);
+ if (r < 0)
+ return r;
+
+ LIST_HEAD_INIT(m->networks);
+
+ r = sd_resolve_default(&m->resolve);
+ if (r < 0)
+ return r;
+
+ r = sd_resolve_attach_event(m->resolve, m->event, 0);
+ if (r < 0)
+ return r;
+
+ r = setup_default_address_pool(m);
+ if (r < 0)
+ return r;
+
+ m->duid.type = DUID_TYPE_EN;
+
+ (void) routing_policy_load_rules(m->state_file, &m->rules_saved);
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+void manager_free(Manager *m) {
+ AddressPool *pool;
+ Network *network;
+ Link *link;
+
+ if (!m)
+ return;
+
+ free(m->state_file);
+
+ sd_netlink_unref(m->rtnl);
+ sd_netlink_unref(m->genl);
+ sd_resolve_unref(m->resolve);
+
+ while ((network = m->networks))
+ network_free(network);
+
+ while ((link = hashmap_first(m->dhcp6_prefixes)))
+ manager_dhcp6_prefix_remove_all(m, link);
+ hashmap_free(m->dhcp6_prefixes);
+
+ while ((link = hashmap_steal_first(m->links))) {
+ if (link->dhcp6_client)
+ (void) dhcp6_lease_pd_prefix_lost(link->dhcp6_client, link);
+ link_unref(link);
+ }
+
+ m->dirty_links = set_free_with_destructor(m->dirty_links, link_unref);
+ m->links = hashmap_free(m->links);
+ m->links_requesting_uuid = set_free(m->links_requesting_uuid);
+ set_free(m->duids_requesting_uuid);
+
+ hashmap_free(m->networks_by_name);
+
+ m->netdevs = hashmap_free_with_destructor(m->netdevs, netdev_unref);
+
+ while ((pool = m->address_pools))
+ address_pool_free(pool);
+
+ /* routing_policy_rule_free() access m->rules and m->rules_foreign.
+ * So, it is necessary to set NULL after the sets are freed. */
+ m->rules = set_free_with_destructor(m->rules, routing_policy_rule_free);
+ m->rules_foreign = set_free_with_destructor(m->rules_foreign, routing_policy_rule_free);
+ set_free_with_destructor(m->rules_saved, routing_policy_rule_free);
+
+ sd_event_unref(m->event);
+
+ sd_device_monitor_unref(m->device_monitor);
+
+ sd_bus_flush_close_unref(m->bus);
+
+ free(m->dynamic_timezone);
+ free(m->dynamic_hostname);
+
+ free(m);
+}
+
+int manager_start(Manager *m) {
+ Link *link;
+ Iterator i;
+
+ assert(m);
+
+ /* The dirty handler will deal with future serialization, but the first one
+ must be done explicitly. */
+
+ manager_save(m);
+
+ HASHMAP_FOREACH(link, m->links, i)
+ link_save(link);
+
+ return 0;
+}
+
+int manager_load_config(Manager *m) {
+ int r;
+
+ /* update timestamp */
+ paths_check_timestamp(network_dirs, &m->network_dirs_ts_usec, true);
+
+ r = netdev_load(m);
+ if (r < 0)
+ return r;
+
+ r = network_load(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+bool manager_should_reload(Manager *m) {
+ return paths_check_timestamp(network_dirs, &m->network_dirs_ts_usec, false);
+}
+
+int manager_rtnl_enumerate_links(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *link;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+
+ r = sd_rtnl_message_new_link(m->rtnl, &req, RTM_GETLINK, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(m->rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (link = reply; link; link = sd_netlink_message_next(link)) {
+ int k;
+
+ m->enumerating = true;
+
+ k = manager_rtnl_process_link(m->rtnl, link, m);
+ if (k < 0)
+ r = k;
+
+ m->enumerating = false;
+ }
+
+ return r;
+}
+
+int manager_rtnl_enumerate_addresses(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *addr;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+
+ r = sd_rtnl_message_new_addr(m->rtnl, &req, RTM_GETADDR, 0, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(m->rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (addr = reply; addr; addr = sd_netlink_message_next(addr)) {
+ int k;
+
+ m->enumerating = true;
+
+ k = manager_rtnl_process_address(m->rtnl, addr, m);
+ if (k < 0)
+ r = k;
+
+ m->enumerating = false;
+ }
+
+ return r;
+}
+
+int manager_rtnl_enumerate_routes(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *route;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+
+ r = sd_rtnl_message_new_route(m->rtnl, &req, RTM_GETROUTE, 0, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(m->rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (route = reply; route; route = sd_netlink_message_next(route)) {
+ int k;
+
+ m->enumerating = true;
+
+ k = manager_rtnl_process_route(m->rtnl, route, m);
+ if (k < 0)
+ r = k;
+
+ m->enumerating = false;
+ }
+
+ return r;
+}
+
+int manager_rtnl_enumerate_rules(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *rule;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+
+ r = sd_rtnl_message_new_routing_policy_rule(m->rtnl, &req, RTM_GETRULE, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(m->rtnl, req, 0, &reply);
+ if (r < 0) {
+ if (r == -EOPNOTSUPP) {
+ log_debug("FIB Rules are not supported by the kernel. Ignoring.");
+ return 0;
+ }
+
+ return r;
+ }
+
+ for (rule = reply; rule; rule = sd_netlink_message_next(rule)) {
+ int k;
+
+ m->enumerating = true;
+
+ k = manager_rtnl_process_rule(m->rtnl, rule, m);
+ if (k < 0)
+ r = k;
+
+ m->enumerating = false;
+ }
+
+ return r;
+}
+
+int manager_address_pool_acquire(Manager *m, int family, unsigned prefixlen, union in_addr_union *found) {
+ AddressPool *p;
+ int r;
+
+ assert(m);
+ assert(prefixlen > 0);
+ assert(found);
+
+ LIST_FOREACH(address_pools, p, m->address_pools) {
+ if (p->family != family)
+ continue;
+
+ r = address_pool_acquire(p, prefixlen, found);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+Link* manager_find_uplink(Manager *m, Link *exclude) {
+ _cleanup_free_ struct local_address *gateways = NULL;
+ int n, i;
+
+ assert(m);
+
+ /* Looks for a suitable "uplink", via black magic: an
+ * interface that is up and where the default route with the
+ * highest priority points to. */
+
+ n = local_gateways(m->rtnl, 0, AF_UNSPEC, &gateways);
+ if (n < 0) {
+ log_warning_errno(n, "Failed to determine list of default gateways: %m");
+ return NULL;
+ }
+
+ for (i = 0; i < n; i++) {
+ Link *link;
+
+ link = hashmap_get(m->links, INT_TO_PTR(gateways[i].ifindex));
+ if (!link) {
+ log_debug("Weird, found a gateway for a link we don't know. Ignoring.");
+ continue;
+ }
+
+ if (link == exclude)
+ continue;
+
+ if (link->operstate < LINK_OPERSTATE_ROUTABLE)
+ continue;
+
+ return link;
+ }
+
+ return NULL;
+}
+
+void manager_dirty(Manager *manager) {
+ assert(manager);
+
+ /* the serialized state in /run is no longer up-to-date */
+ manager->dirty = true;
+}
+
+static int set_hostname_handler(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ Manager *manager = userdata;
+ const sd_bus_error *e;
+
+ assert(m);
+ assert(manager);
+
+ e = sd_bus_message_get_error(m);
+ if (e)
+ log_warning_errno(sd_bus_error_get_errno(e), "Could not set hostname: %s", e->message);
+
+ return 1;
+}
+
+int manager_set_hostname(Manager *m, const char *hostname) {
+ int r;
+
+ log_debug("Setting transient hostname: '%s'", strna(hostname));
+
+ if (free_and_strdup(&m->dynamic_hostname, hostname) < 0)
+ return log_oom();
+
+ if (!m->bus || sd_bus_is_ready(m->bus) <= 0) {
+ log_debug("Not connected to system bus, setting hostname later.");
+ return 0;
+ }
+
+ r = sd_bus_call_method_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.hostname1",
+ "/org/freedesktop/hostname1",
+ "org.freedesktop.hostname1",
+ "SetHostname",
+ set_hostname_handler,
+ m,
+ "sb",
+ hostname,
+ false);
+
+ if (r < 0)
+ return log_error_errno(r, "Could not set transient hostname: %m");
+
+ return 0;
+}
+
+static int set_timezone_handler(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ Manager *manager = userdata;
+ const sd_bus_error *e;
+
+ assert(m);
+ assert(manager);
+
+ e = sd_bus_message_get_error(m);
+ if (e)
+ log_warning_errno(sd_bus_error_get_errno(e), "Could not set timezone: %s", e->message);
+
+ return 1;
+}
+
+int manager_set_timezone(Manager *m, const char *tz) {
+ int r;
+
+ assert(m);
+ assert(tz);
+
+ log_debug("Setting system timezone: '%s'", tz);
+ if (free_and_strdup(&m->dynamic_timezone, tz) < 0)
+ return log_oom();
+
+ if (!m->bus || sd_bus_is_ready(m->bus) <= 0) {
+ log_debug("Not connected to system bus, setting timezone later.");
+ return 0;
+ }
+
+ r = sd_bus_call_method_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.timedate1",
+ "/org/freedesktop/timedate1",
+ "org.freedesktop.timedate1",
+ "SetTimezone",
+ set_timezone_handler,
+ m,
+ "sb",
+ tz,
+ false);
+ if (r < 0)
+ return log_error_errno(r, "Could not set timezone: %m");
+
+ return 0;
+}
+
+int manager_request_product_uuid(Manager *m, Link *link) {
+ int r;
+
+ assert(m);
+
+ if (m->has_product_uuid)
+ return 0;
+
+ log_debug("Requesting product UUID");
+
+ if (link) {
+ DUID *duid;
+
+ assert_se(duid = link_get_duid(link));
+
+ r = set_ensure_allocated(&m->links_requesting_uuid, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = set_ensure_allocated(&m->duids_requesting_uuid, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(m->links_requesting_uuid, link);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(m->duids_requesting_uuid, duid);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (!m->bus || sd_bus_is_ready(m->bus) <= 0) {
+ log_debug("Not connected to system bus, requesting product UUID later.");
+ return 0;
+ }
+
+ r = sd_bus_call_method_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.hostname1",
+ "/org/freedesktop/hostname1",
+ "org.freedesktop.hostname1",
+ "GetProductUUID",
+ get_product_uuid_handler,
+ m,
+ "b",
+ false);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to get product UUID: %m");
+
+ return 0;
+}
diff --git a/src/network/networkd-manager.h b/src/network/networkd-manager.h
new file mode 100644
index 0000000..289ca96
--- /dev/null
+++ b/src/network/networkd-manager.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <arpa/inet.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+#include "sd-netlink.h"
+#include "sd-resolve.h"
+
+#include "dhcp-identifier.h"
+#include "hashmap.h"
+#include "list.h"
+
+#include "networkd-address-pool.h"
+#include "networkd-link.h"
+#include "networkd-network.h"
+
+extern const char* const network_dirs[];
+
+struct Manager {
+ sd_netlink *rtnl;
+ /* lazy initialized */
+ sd_netlink *genl;
+ sd_event *event;
+ sd_resolve *resolve;
+ sd_bus *bus;
+ sd_device_monitor *device_monitor;
+
+ bool enumerating:1;
+ bool dirty:1;
+
+ Set *dirty_links;
+
+ char *state_file;
+ LinkOperationalState operational_state;
+
+ Hashmap *links;
+ Hashmap *netdevs;
+ Hashmap *networks_by_name;
+ Hashmap *dhcp6_prefixes;
+ LIST_HEAD(Network, networks);
+ LIST_HEAD(AddressPool, address_pools);
+
+ usec_t network_dirs_ts_usec;
+
+ DUID duid;
+ sd_id128_t product_uuid;
+ bool has_product_uuid;
+ Set *links_requesting_uuid;
+ Set *duids_requesting_uuid;
+
+ char* dynamic_hostname;
+ char* dynamic_timezone;
+
+ Set *rules;
+ Set *rules_foreign;
+ Set *rules_saved;
+};
+
+extern const sd_bus_vtable manager_vtable[];
+
+int manager_new(Manager **ret);
+void manager_free(Manager *m);
+
+int manager_connect_bus(Manager *m);
+int manager_start(Manager *m);
+
+int manager_load_config(Manager *m);
+bool manager_should_reload(Manager *m);
+
+int manager_rtnl_enumerate_links(Manager *m);
+int manager_rtnl_enumerate_addresses(Manager *m);
+int manager_rtnl_enumerate_routes(Manager *m);
+int manager_rtnl_enumerate_rules(Manager *m);
+
+int manager_rtnl_process_address(sd_netlink *nl, sd_netlink_message *message, void *userdata);
+int manager_rtnl_process_route(sd_netlink *nl, sd_netlink_message *message, void *userdata);
+int manager_rtnl_process_rule(sd_netlink *nl, sd_netlink_message *message, void *userdata);
+
+int manager_send_changed(Manager *m, const char *property, ...) _sentinel_;
+void manager_dirty(Manager *m);
+
+int manager_address_pool_acquire(Manager *m, int family, unsigned prefixlen, union in_addr_union *found);
+
+Link* manager_find_uplink(Manager *m, Link *exclude);
+
+int manager_set_hostname(Manager *m, const char *hostname);
+int manager_set_timezone(Manager *m, const char *timezone);
+int manager_request_product_uuid(Manager *m, Link *link);
+
+Link *manager_dhcp6_prefix_get(Manager *m, struct in6_addr *addr);
+int manager_dhcp6_prefix_add(Manager *m, struct in6_addr *addr, Link *link);
+int manager_dhcp6_prefix_remove_all(Manager *m, Link *link);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
diff --git a/src/network/networkd-ndisc.c b/src/network/networkd-ndisc.c
new file mode 100644
index 0000000..e5b8d11
--- /dev/null
+++ b/src/network/networkd-ndisc.c
@@ -0,0 +1,671 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+#include <arpa/inet.h>
+
+#include "sd-ndisc.h"
+
+#include "missing_network.h"
+#include "networkd-ndisc.h"
+#include "networkd-route.h"
+#include "strv.h"
+
+#define NDISC_DNSSL_MAX 64U
+#define NDISC_RDNSS_MAX 64U
+#define NDISC_PREFIX_LFT_MIN 7200U
+
+static int ndisc_netlink_message_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->ndisc_messages > 0);
+
+ link->ndisc_messages--;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_error_errno(link, r, "Could not set NDisc route or address: %m");
+
+ if (link->ndisc_messages == 0) {
+ link->ndisc_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int ndisc_router_process_default(Link *link, sd_ndisc_router *rt) {
+ _cleanup_(route_freep) Route *route = NULL;
+ struct in6_addr gateway;
+ uint16_t lifetime;
+ unsigned preference;
+ uint32_t mtu;
+ usec_t time_now;
+ int r;
+ Address *address;
+ Iterator i;
+
+ assert(link);
+ assert(rt);
+
+ r = sd_ndisc_router_get_lifetime(rt, &lifetime);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get gateway address from RA: %m");
+
+ if (lifetime == 0) /* not a default router */
+ return 0;
+
+ r = sd_ndisc_router_get_address(rt, &gateway);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get gateway address from RA: %m");
+
+ SET_FOREACH(address, link->addresses, i)
+ if (!memcmp(&gateway, &address->in_addr.in6, sizeof(address->in_addr.in6))) {
+ char buffer[INET6_ADDRSTRLEN];
+
+ log_link_debug(link, "No NDisc route added, gateway %s matches local address",
+ inet_ntop(AF_INET6,
+ &address->in_addr.in6,
+ buffer, sizeof(buffer)));
+ return 0;
+ }
+
+ SET_FOREACH(address, link->addresses_foreign, i)
+ if (!memcmp(&gateway, &address->in_addr.in6, sizeof(address->in_addr.in6))) {
+ char buffer[INET6_ADDRSTRLEN];
+
+ log_link_debug(link, "No NDisc route added, gateway %s matches local address",
+ inet_ntop(AF_INET6,
+ &address->in_addr.in6,
+ buffer, sizeof(buffer)));
+ return 0;
+ }
+
+ r = sd_ndisc_router_get_preference(rt, &preference);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get default router preference from RA: %m");
+
+ r = sd_ndisc_router_get_timestamp(rt, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get RA timestamp: %m");
+
+ r = sd_ndisc_router_get_mtu(rt, &mtu);
+ if (r == -ENODATA)
+ mtu = 0;
+ else if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get default router MTU from RA: %m");
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate route: %m");
+
+ route->family = AF_INET6;
+ route->table = link->network->ipv6_accept_ra_route_table;
+ route->priority = link->network->dhcp_route_metric;
+ route->protocol = RTPROT_RA;
+ route->pref = preference;
+ route->gw.in6 = gateway;
+ route->lifetime = time_now + lifetime * USEC_PER_SEC;
+ route->mtu = mtu;
+
+ r = route_configure(route, link, ndisc_netlink_message_handler);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set default route: %m");
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->ndisc_messages++;
+
+ return 0;
+}
+
+static int ndisc_router_process_autonomous_prefix(Link *link, sd_ndisc_router *rt) {
+ _cleanup_(address_freep) Address *address = NULL;
+ Address *existing_address;
+ uint32_t lifetime_valid, lifetime_preferred, lifetime_remaining;
+ usec_t time_now;
+ unsigned prefixlen;
+ int r;
+
+ assert(link);
+ assert(rt);
+
+ r = sd_ndisc_router_get_timestamp(rt, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get RA timestamp: %m");
+
+ r = sd_ndisc_router_prefix_get_prefixlen(rt, &prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix length: %m");
+
+ r = sd_ndisc_router_prefix_get_valid_lifetime(rt, &lifetime_valid);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix valid lifetime: %m");
+
+ r = sd_ndisc_router_prefix_get_preferred_lifetime(rt, &lifetime_preferred);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix preferred lifetime: %m");
+
+ /* The preferred lifetime is never greater than the valid lifetime */
+ if (lifetime_preferred > lifetime_valid)
+ return 0;
+
+ r = address_new(&address);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate address: %m");
+
+ address->family = AF_INET6;
+ r = sd_ndisc_router_prefix_get_address(rt, &address->in_addr.in6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix address: %m");
+
+ if (in_addr_is_null(AF_INET6, (const union in_addr_union *) &link->network->ipv6_token) == 0)
+ memcpy(((char *)&address->in_addr.in6) + 8, ((char *)&link->network->ipv6_token) + 8, 8);
+ else {
+ /* see RFC4291 section 2.5.1 */
+ address->in_addr.in6.s6_addr[8] = link->mac.ether_addr_octet[0];
+ address->in_addr.in6.s6_addr[8] ^= 1 << 1;
+ address->in_addr.in6.s6_addr[9] = link->mac.ether_addr_octet[1];
+ address->in_addr.in6.s6_addr[10] = link->mac.ether_addr_octet[2];
+ address->in_addr.in6.s6_addr[11] = 0xff;
+ address->in_addr.in6.s6_addr[12] = 0xfe;
+ address->in_addr.in6.s6_addr[13] = link->mac.ether_addr_octet[3];
+ address->in_addr.in6.s6_addr[14] = link->mac.ether_addr_octet[4];
+ address->in_addr.in6.s6_addr[15] = link->mac.ether_addr_octet[5];
+ }
+ address->prefixlen = prefixlen;
+ address->flags = IFA_F_NOPREFIXROUTE|IFA_F_MANAGETEMPADDR;
+ address->cinfo.ifa_prefered = lifetime_preferred;
+
+ /* see RFC4862 section 5.5.3.e */
+ r = address_get(link, address->family, &address->in_addr, address->prefixlen, &existing_address);
+ if (r > 0) {
+ lifetime_remaining = existing_address->cinfo.tstamp / 100 + existing_address->cinfo.ifa_valid - time_now / USEC_PER_SEC;
+ if (lifetime_valid > NDISC_PREFIX_LFT_MIN || lifetime_valid > lifetime_remaining)
+ address->cinfo.ifa_valid = lifetime_valid;
+ else if (lifetime_remaining <= NDISC_PREFIX_LFT_MIN)
+ address->cinfo.ifa_valid = lifetime_remaining;
+ else
+ address->cinfo.ifa_valid = NDISC_PREFIX_LFT_MIN;
+ } else if (lifetime_valid > 0)
+ address->cinfo.ifa_valid = lifetime_valid;
+ else
+ return 0; /* see RFC4862 section 5.5.3.d */
+
+ if (address->cinfo.ifa_valid == 0)
+ return 0;
+
+ r = address_configure(address, link, ndisc_netlink_message_handler, true);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set SLAAC address: %m");
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->ndisc_messages++;
+
+ return 0;
+}
+
+static int ndisc_router_process_onlink_prefix(Link *link, sd_ndisc_router *rt) {
+ _cleanup_(route_freep) Route *route = NULL;
+ usec_t time_now;
+ uint32_t lifetime;
+ unsigned prefixlen;
+ int r;
+
+ assert(link);
+ assert(rt);
+
+ r = sd_ndisc_router_get_timestamp(rt, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get RA timestamp: %m");
+
+ r = sd_ndisc_router_prefix_get_prefixlen(rt, &prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix length: %m");
+
+ r = sd_ndisc_router_prefix_get_valid_lifetime(rt, &lifetime);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix lifetime: %m");
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate route: %m");
+
+ route->family = AF_INET6;
+ route->table = link->network->ipv6_accept_ra_route_table;
+ route->priority = link->network->dhcp_route_metric;
+ route->protocol = RTPROT_RA;
+ route->flags = RTM_F_PREFIX;
+ route->dst_prefixlen = prefixlen;
+ route->lifetime = time_now + lifetime * USEC_PER_SEC;
+
+ r = sd_ndisc_router_prefix_get_address(rt, &route->dst.in6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix address: %m");
+
+ r = route_configure(route, link, ndisc_netlink_message_handler);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set prefix route: %m");
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->ndisc_messages++;
+
+ return 0;
+}
+
+static int ndisc_router_process_route(Link *link, sd_ndisc_router *rt) {
+ _cleanup_(route_freep) Route *route = NULL;
+ struct in6_addr gateway;
+ uint32_t lifetime;
+ unsigned preference, prefixlen;
+ usec_t time_now;
+ int r;
+
+ assert(link);
+
+ r = sd_ndisc_router_route_get_lifetime(rt, &lifetime);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get gateway address from RA: %m");
+
+ if (lifetime == 0)
+ return 0;
+
+ r = sd_ndisc_router_get_address(rt, &gateway);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get gateway address from RA: %m");
+
+ r = sd_ndisc_router_route_get_prefixlen(rt, &prefixlen);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get route prefix length: %m");
+
+ r = sd_ndisc_router_route_get_preference(rt, &preference);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get default router preference from RA: %m");
+
+ r = sd_ndisc_router_get_timestamp(rt, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get RA timestamp: %m");
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate route: %m");
+
+ route->family = AF_INET6;
+ route->table = link->network->ipv6_accept_ra_route_table;
+ route->protocol = RTPROT_RA;
+ route->pref = preference;
+ route->gw.in6 = gateway;
+ route->dst_prefixlen = prefixlen;
+ route->lifetime = time_now + lifetime * USEC_PER_SEC;
+
+ r = sd_ndisc_router_route_get_address(rt, &route->dst.in6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get route address: %m");
+
+ r = route_configure(route, link, ndisc_netlink_message_handler);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set additional route: %m");
+ link_enter_failed(link);
+ return r;
+ }
+
+ link->ndisc_messages++;
+
+ return 0;
+}
+
+static void ndisc_rdnss_hash_func(const NDiscRDNSS *x, struct siphash *state) {
+ siphash24_compress(&x->address, sizeof(x->address), state);
+}
+
+static int ndisc_rdnss_compare_func(const NDiscRDNSS *a, const NDiscRDNSS *b) {
+ return memcmp(&a->address, &b->address, sizeof(a->address));
+}
+
+DEFINE_PRIVATE_HASH_OPS(ndisc_rdnss_hash_ops, NDiscRDNSS, ndisc_rdnss_hash_func, ndisc_rdnss_compare_func);
+
+static int ndisc_router_process_rdnss(Link *link, sd_ndisc_router *rt) {
+ uint32_t lifetime;
+ const struct in6_addr *a;
+ usec_t time_now;
+ int i, n, r;
+
+ assert(link);
+ assert(rt);
+
+ r = sd_ndisc_router_get_timestamp(rt, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get RA timestamp: %m");
+
+ r = sd_ndisc_router_rdnss_get_lifetime(rt, &lifetime);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get RDNSS lifetime: %m");
+
+ n = sd_ndisc_router_rdnss_get_addresses(rt, &a);
+ if (n < 0)
+ return log_link_warning_errno(link, n, "Failed to get RDNSS addresses: %m");
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ NDiscRDNSS *x = NULL;
+ NDiscRDNSS d = {
+ .address = a[i],
+ }, *y;
+
+ if (lifetime == 0) {
+ (void) set_remove(link->ndisc_rdnss, &d);
+ link_dirty(link);
+ continue;
+ }
+
+ y = set_get(link->ndisc_rdnss, &d);
+ if (y) {
+ y->valid_until = time_now + lifetime * USEC_PER_SEC;
+ continue;
+ }
+
+ ndisc_vacuum(link);
+
+ if (set_size(link->ndisc_rdnss) >= NDISC_RDNSS_MAX) {
+ log_link_warning(link, "Too many RDNSS records per link, ignoring.");
+ continue;
+ }
+
+ r = set_ensure_allocated(&link->ndisc_rdnss, &ndisc_rdnss_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ x = new(NDiscRDNSS, 1);
+ if (!x)
+ return log_oom();
+
+ *x = (NDiscRDNSS) {
+ .address = a[i],
+ .valid_until = time_now + lifetime * USEC_PER_SEC,
+ };
+
+ r = set_put(link->ndisc_rdnss, x);
+ if (r < 0)
+ return log_oom();
+
+ TAKE_PTR(x);
+
+ assert(r > 0);
+ link_dirty(link);
+ }
+
+ return 0;
+}
+
+static void ndisc_dnssl_hash_func(const NDiscDNSSL *x, struct siphash *state) {
+ siphash24_compress(NDISC_DNSSL_DOMAIN(x), strlen(NDISC_DNSSL_DOMAIN(x)), state);
+}
+
+static int ndisc_dnssl_compare_func(const NDiscDNSSL *a, const NDiscDNSSL *b) {
+ return strcmp(NDISC_DNSSL_DOMAIN(a), NDISC_DNSSL_DOMAIN(b));
+}
+
+DEFINE_PRIVATE_HASH_OPS(ndisc_dnssl_hash_ops, NDiscDNSSL, ndisc_dnssl_hash_func, ndisc_dnssl_compare_func);
+
+static void ndisc_router_process_dnssl(Link *link, sd_ndisc_router *rt) {
+ _cleanup_strv_free_ char **l = NULL;
+ uint32_t lifetime;
+ usec_t time_now;
+ char **i;
+ int r;
+
+ assert(link);
+ assert(rt);
+
+ r = sd_ndisc_router_get_timestamp(rt, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to get RA timestamp: %m");
+ return;
+ }
+
+ r = sd_ndisc_router_dnssl_get_lifetime(rt, &lifetime);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to get RDNSS lifetime: %m");
+ return;
+ }
+
+ r = sd_ndisc_router_dnssl_get_domains(rt, &l);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to get RDNSS addresses: %m");
+ return;
+ }
+
+ STRV_FOREACH(i, l) {
+ _cleanup_free_ NDiscDNSSL *s;
+ NDiscDNSSL *x;
+
+ s = malloc0(ALIGN(sizeof(NDiscDNSSL)) + strlen(*i) + 1);
+ if (!s) {
+ log_oom();
+ return;
+ }
+
+ strcpy(NDISC_DNSSL_DOMAIN(s), *i);
+
+ if (lifetime == 0) {
+ (void) set_remove(link->ndisc_dnssl, s);
+ link_dirty(link);
+ continue;
+ }
+
+ x = set_get(link->ndisc_dnssl, s);
+ if (x) {
+ x->valid_until = time_now + lifetime * USEC_PER_SEC;
+ continue;
+ }
+
+ ndisc_vacuum(link);
+
+ if (set_size(link->ndisc_dnssl) >= NDISC_DNSSL_MAX) {
+ log_link_warning(link, "Too many DNSSL records per link, ignoring.");
+ continue;
+ }
+
+ r = set_ensure_allocated(&link->ndisc_dnssl, &ndisc_dnssl_hash_ops);
+ if (r < 0) {
+ log_oom();
+ return;
+ }
+
+ s->valid_until = time_now + lifetime * USEC_PER_SEC;
+
+ r = set_put(link->ndisc_dnssl, s);
+ if (r < 0) {
+ log_oom();
+ return;
+ }
+
+ s = NULL;
+ assert(r > 0);
+ link_dirty(link);
+ }
+}
+
+static void ndisc_router_process_options(Link *link, sd_ndisc_router *rt) {
+ int r;
+
+ assert(link);
+ assert(rt);
+
+ r = sd_ndisc_router_option_rewind(rt);
+ for (;;) {
+ uint8_t type;
+
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to iterate through options: %m");
+ return;
+ }
+ if (r == 0) /* EOF */
+ break;
+
+ r = sd_ndisc_router_option_get_type(rt, &type);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to get RA option type: %m");
+ return;
+ }
+
+ switch (type) {
+
+ case SD_NDISC_OPTION_PREFIX_INFORMATION: {
+ uint8_t flags;
+
+ r = sd_ndisc_router_prefix_get_flags(rt, &flags);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to get RA prefix flags: %m");
+ return;
+ }
+
+ if (flags & ND_OPT_PI_FLAG_ONLINK)
+ (void) ndisc_router_process_onlink_prefix(link, rt);
+ if (flags & ND_OPT_PI_FLAG_AUTO)
+ (void) ndisc_router_process_autonomous_prefix(link, rt);
+
+ break;
+ }
+
+ case SD_NDISC_OPTION_ROUTE_INFORMATION:
+ (void) ndisc_router_process_route(link, rt);
+ break;
+
+ case SD_NDISC_OPTION_RDNSS:
+ if (link->network->ipv6_accept_ra_use_dns)
+ (void) ndisc_router_process_rdnss(link, rt);
+ break;
+
+ case SD_NDISC_OPTION_DNSSL:
+ if (link->network->ipv6_accept_ra_use_dns)
+ (void) ndisc_router_process_dnssl(link, rt);
+ break;
+ }
+
+ r = sd_ndisc_router_option_next(rt);
+ }
+}
+
+static int ndisc_router_handler(Link *link, sd_ndisc_router *rt) {
+ uint64_t flags;
+ int r = 0;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(rt);
+
+ r = sd_ndisc_router_get_flags(rt, &flags);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to get RA flags: %m");
+
+ if (flags & (ND_RA_FLAG_MANAGED | ND_RA_FLAG_OTHER)) {
+ /* (re)start DHCPv6 client in stateful or stateless mode according to RA flags */
+ r = dhcp6_request_address(link, !(flags & ND_RA_FLAG_MANAGED));
+ if (r < 0 && r != -EBUSY)
+ log_link_warning_errno(link, r, "Could not acquire DHCPv6 lease on NDisc request: %m");
+ else {
+ log_link_debug(link, "Acquiring DHCPv6 lease on NDisc request");
+ r = 0;
+ }
+ }
+
+ ndisc_router_process_default(link, rt);
+ ndisc_router_process_options(link, rt);
+
+ return r;
+}
+
+static void ndisc_handler(sd_ndisc *nd, sd_ndisc_event event, sd_ndisc_router *rt, void *userdata) {
+ Link *link = userdata;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return;
+
+ switch (event) {
+
+ case SD_NDISC_EVENT_ROUTER:
+ (void) ndisc_router_handler(link, rt);
+ break;
+
+ case SD_NDISC_EVENT_TIMEOUT:
+ link->ndisc_configured = true;
+ link_check_ready(link);
+
+ break;
+ default:
+ log_link_warning(link, "IPv6 Neighbor Discovery unknown event: %d", event);
+ }
+}
+
+int ndisc_configure(Link *link) {
+ int r;
+
+ assert(link);
+
+ r = sd_ndisc_new(&link->ndisc);
+ if (r < 0)
+ return r;
+
+ r = sd_ndisc_attach_event(link->ndisc, NULL, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_ndisc_set_mac(link->ndisc, &link->mac);
+ if (r < 0)
+ return r;
+
+ r = sd_ndisc_set_ifindex(link->ndisc, link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_ndisc_set_callback(link->ndisc, ndisc_handler, link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+void ndisc_vacuum(Link *link) {
+ NDiscRDNSS *r;
+ NDiscDNSSL *d;
+ Iterator i;
+ usec_t time_now;
+
+ assert(link);
+
+ /* Removes all RDNSS and DNSSL entries whose validity time has passed */
+
+ time_now = now(clock_boottime_or_monotonic());
+
+ SET_FOREACH(r, link->ndisc_rdnss, i)
+ if (r->valid_until < time_now) {
+ free(set_remove(link->ndisc_rdnss, r));
+ link_dirty(link);
+ }
+
+ SET_FOREACH(d, link->ndisc_dnssl, i)
+ if (d->valid_until < time_now) {
+ free(set_remove(link->ndisc_dnssl, d));
+ link_dirty(link);
+ }
+}
+
+void ndisc_flush(Link *link) {
+ assert(link);
+
+ /* Removes all RDNSS and DNSSL entries, without exception */
+
+ link->ndisc_rdnss = set_free_free(link->ndisc_rdnss);
+ link->ndisc_dnssl = set_free_free(link->ndisc_dnssl);
+}
diff --git a/src/network/networkd-ndisc.h b/src/network/networkd-ndisc.h
new file mode 100644
index 0000000..762f42d
--- /dev/null
+++ b/src/network/networkd-ndisc.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "networkd-link.h"
+
+typedef struct NDiscRDNSS {
+ usec_t valid_until;
+ struct in6_addr address;
+} NDiscRDNSS;
+
+typedef struct NDiscDNSSL {
+ usec_t valid_until;
+ /* The domain name follows immediately. */
+} NDiscDNSSL;
+
+static inline char* NDISC_DNSSL_DOMAIN(const NDiscDNSSL *n) {
+ return ((char*) n) + ALIGN(sizeof(NDiscDNSSL));
+}
+
+int ndisc_configure(Link *link);
+void ndisc_vacuum(Link *link);
+void ndisc_flush(Link *link);
diff --git a/src/network/networkd-neighbor.c b/src/network/networkd-neighbor.c
new file mode 100644
index 0000000..254a60b
--- /dev/null
+++ b/src/network/networkd-neighbor.c
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "ether-addr-util.h"
+#include "hashmap.h"
+#include "in-addr-util.h"
+#include "netlink-util.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-neighbor.h"
+
+void neighbor_free(Neighbor *neighbor) {
+ if (!neighbor)
+ return;
+
+ if (neighbor->network) {
+ LIST_REMOVE(neighbors, neighbor->network->neighbors, neighbor);
+ assert(neighbor->network->n_neighbors > 0);
+ neighbor->network->n_neighbors--;
+
+ if (neighbor->section) {
+ hashmap_remove(neighbor->network->neighbors_by_section, neighbor->section);
+ network_config_section_free(neighbor->section);
+ }
+ }
+
+ free(neighbor);
+}
+
+static int neighbor_new_static(Network *network, const char *filename, unsigned section_line, Neighbor **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(neighbor_freep) Neighbor *neighbor = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(!!filename == (section_line > 0));
+
+ if (filename) {
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ neighbor = hashmap_get(network->neighbors_by_section, n);
+ if (neighbor) {
+ *ret = TAKE_PTR(neighbor);
+
+ return 0;
+ }
+ }
+
+ neighbor = new(Neighbor, 1);
+ if (!neighbor)
+ return -ENOMEM;
+
+ *neighbor = (Neighbor) {
+ .network = network,
+ .family = AF_UNSPEC,
+ };
+
+ LIST_APPEND(neighbors, network->neighbors, neighbor);
+ network->n_neighbors++;
+
+ if (filename) {
+ neighbor->section = TAKE_PTR(n);
+
+ r = hashmap_ensure_allocated(&network->neighbors_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->neighbors_by_section, neighbor->section, neighbor);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(neighbor);
+
+ return 0;
+}
+
+static int neighbor_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->neighbor_messages > 0);
+
+ link->neighbor_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_warning_errno(link, r, "Could not set neighbor: %m");
+
+ if (link->neighbor_messages == 0) {
+ log_link_debug(link, "Neighbors set");
+ link->neighbors_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+int neighbor_configure(Neighbor *neighbor, Link *link, link_netlink_message_handler_t callback) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(neighbor);
+ assert(link);
+ assert(link->ifindex > 0);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ if (neighbor->family == AF_UNSPEC)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Neighbor without Address= configured");
+ if (!neighbor->mac_configured)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Neighbor without MACAddress= configured");
+
+ r = sd_rtnl_message_new_neigh(link->manager->rtnl, &req, RTM_NEWNEIGH,
+ link->ifindex, neighbor->family);
+ if (r < 0)
+ return log_error_errno(r, "Could not allocate RTM_NEWNEIGH message: %m");
+
+ r = sd_rtnl_message_neigh_set_state(req, NUD_PERMANENT);
+ if (r < 0)
+ return log_error_errno(r, "Could not set state: %m");
+
+ r = sd_netlink_message_set_flags(req, NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE);
+ if (r < 0)
+ return log_error_errno(r, "Could not set flags: %m");
+
+ r = sd_netlink_message_append_ether_addr(req, NDA_LLADDR, &neighbor->mac);
+ if (r < 0)
+ return log_error_errno(r, "Could not append NDA_LLADDR attribute: %m");
+
+ switch (neighbor->family) {
+ case AF_INET6:
+ r = sd_netlink_message_append_in6_addr(req, NDA_DST, &neighbor->in_addr.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append NDA_DST attribute: %m");
+ break;
+ case AF_INET:
+ r = sd_netlink_message_append_in_addr(req, NDA_DST, &neighbor->in_addr.in);
+ if (r < 0)
+ return log_error_errno(r, "Could not append NDA_DST attribute: %m");
+ break;
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Neighbor with invalid address family");
+ }
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, callback ?: neighbor_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_error_errno(r, "Could not send rtnetlink message: %m");
+
+ link->neighbor_messages++;
+ link_ref(link);
+
+ return 0;
+}
+
+int config_parse_neighbor_address(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(neighbor_freep) Neighbor *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = neighbor_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = in_addr_from_string_auto(rvalue, &n->family, &n->in_addr);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Neighbor Address is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+
+ return 0;
+}
+
+int config_parse_neighbor_hwaddr(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(neighbor_freep) Neighbor *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = neighbor_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = ether_addr_from_string(rvalue, &n->mac);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Neighbor MACAddress is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ n->mac_configured = true;
+ TAKE_PTR(n);
+
+ return 0;
+}
diff --git a/src/network/networkd-neighbor.h b/src/network/networkd-neighbor.h
new file mode 100644
index 0000000..094bf79
--- /dev/null
+++ b/src/network/networkd-neighbor.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "conf-parser.h"
+#include "ether-addr-util.h"
+#include "in-addr-util.h"
+#include "list.h"
+#include "macro.h"
+
+typedef struct Neighbor Neighbor;
+
+#include "networkd-link.h"
+#include "networkd-network.h"
+
+struct Neighbor {
+ Network *network;
+ Link *link;
+ NetworkConfigSection *section;
+
+ int family;
+ union in_addr_union in_addr;
+ bool mac_configured;
+ struct ether_addr mac;
+
+ LIST_FIELDS(Neighbor, neighbors);
+};
+
+void neighbor_free(Neighbor *neighbor);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Neighbor*, neighbor_free);
+
+int neighbor_configure(Neighbor *neighbor, Link *link, link_netlink_message_handler_t callback);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_neighbor_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_neighbor_hwaddr);
diff --git a/src/network/networkd-network-bus.c b/src/network/networkd-network-bus.c
new file mode 100644
index 0000000..07e453c
--- /dev/null
+++ b/src/network/networkd-network-bus.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "ether-addr-util.h"
+#include "networkd-manager.h"
+#include "string-util.h"
+#include "strv.h"
+
+static int property_get_ether_addrs(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ char buf[ETHER_ADDR_TO_STRING_MAX];
+ const struct ether_addr *p;
+ Iterator i;
+ Set *s;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ s = *(Set **) userdata;
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(p, s, i) {
+ r = sd_bus_message_append(reply, "s", ether_addr_to_string(p, buf));
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable network_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Description", "s", NULL, offsetof(Network, description), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SourcePath", "s", NULL, offsetof(Network, filename), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MatchMAC", "as", property_get_ether_addrs, offsetof(Network, match_mac), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MatchPath", "as", NULL, offsetof(Network, match_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MatchDriver", "as", NULL, offsetof(Network, match_driver), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MatchType", "as", NULL, offsetof(Network, match_type), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MatchName", "as", NULL, offsetof(Network, match_name), SD_BUS_VTABLE_PROPERTY_CONST),
+
+ SD_BUS_VTABLE_END
+};
+
+static char *network_bus_path(Network *network) {
+ _cleanup_free_ char *name = NULL;
+ char *networkname, *d, *path;
+ int r;
+
+ assert(network);
+ assert(network->filename);
+
+ name = strdup(network->filename);
+ if (!name)
+ return NULL;
+
+ networkname = basename(name);
+
+ d = strrchr(networkname, '.');
+ if (!d)
+ return NULL;
+
+ assert(streq(d, ".network"));
+
+ *d = '\0';
+
+ r = sd_bus_path_encode("/org/freedesktop/network1/network", networkname, &path);
+ if (r < 0)
+ return NULL;
+
+ return path;
+}
+
+int network_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ Network *network;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(m);
+ assert(nodes);
+
+ LIST_FOREACH(networks, network, m->networks) {
+ char *p;
+
+ p = network_bus_path(network);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_consume(&l, p);
+ if (r < 0)
+ return r;
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+int network_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Network *network;
+ _cleanup_free_ char *name = NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(m);
+ assert(found);
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/network1/network", &name);
+ if (r < 0)
+ return 0;
+
+ r = network_get_by_name(m, name, &network);
+ if (r < 0)
+ return 0;
+
+ *found = network;
+
+ return 1;
+}
diff --git a/src/network/networkd-network-gperf.gperf b/src/network/networkd-network-gperf.gperf
new file mode 100644
index 0000000..5d8aede
--- /dev/null
+++ b/src/network/networkd-network-gperf.gperf
@@ -0,0 +1,201 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "network-internal.h"
+#include "networkd-conf.h"
+#include "networkd-network.h"
+#include "vlan-util.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name network_network_gperf_hash
+%define lookup-function-name network_network_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Match.MACAddress, config_parse_hwaddrs, 0, offsetof(Network, match_mac)
+Match.Path, config_parse_strv, 0, offsetof(Network, match_path)
+Match.Driver, config_parse_strv, 0, offsetof(Network, match_driver)
+Match.Type, config_parse_strv, 0, offsetof(Network, match_type)
+Match.Name, config_parse_ifnames, 0, offsetof(Network, match_name)
+Match.Host, config_parse_net_condition, CONDITION_HOST, offsetof(Network, match_host)
+Match.Virtualization, config_parse_net_condition, CONDITION_VIRTUALIZATION, offsetof(Network, match_virt)
+Match.KernelCommandLine, config_parse_net_condition, CONDITION_KERNEL_COMMAND_LINE, offsetof(Network, match_kernel_cmdline)
+Match.KernelVersion, config_parse_net_condition, CONDITION_KERNEL_VERSION, offsetof(Network, match_kernel_version)
+Match.Architecture, config_parse_net_condition, CONDITION_ARCHITECTURE, offsetof(Network, match_arch)
+Link.MACAddress, config_parse_hwaddr, 0, offsetof(Network, mac)
+Link.MTUBytes, config_parse_mtu, AF_UNSPEC, offsetof(Network, mtu)
+Link.ARP, config_parse_tristate, 0, offsetof(Network, arp)
+Link.Multicast, config_parse_tristate, 0, offsetof(Network, multicast)
+Link.AllMulticast, config_parse_tristate, 0, offsetof(Network, allmulticast)
+Link.Unmanaged, config_parse_bool, 0, offsetof(Network, unmanaged)
+Link.RequiredForOnline, config_parse_bool, 0, offsetof(Network, required_for_online)
+Network.Description, config_parse_string, 0, offsetof(Network, description)
+Network.Bridge, config_parse_netdev, 0, 0
+Network.Bond, config_parse_netdev, 0, 0
+Network.VLAN, config_parse_netdev, 0, 0
+Network.MACVLAN, config_parse_netdev, 0, 0
+Network.MACVTAP, config_parse_netdev, 0, 0
+Network.IPVLAN, config_parse_netdev, 0, 0
+Network.VXLAN, config_parse_netdev, 0, 0
+Network.Tunnel, config_parse_tunnel, 0, 0
+Network.VRF, config_parse_netdev, 0, 0
+Network.DHCP, config_parse_dhcp, 0, offsetof(Network, dhcp)
+Network.DHCPServer, config_parse_bool, 0, offsetof(Network, dhcp_server)
+Network.LinkLocalAddressing, config_parse_address_family_boolean, 0, offsetof(Network, link_local)
+Network.IPv4LLRoute, config_parse_bool, 0, offsetof(Network, ipv4ll_route)
+Network.IPv6Token, config_parse_ipv6token, 0, offsetof(Network, ipv6_token)
+Network.LLDP, config_parse_lldp_mode, 0, offsetof(Network, lldp_mode)
+Network.EmitLLDP, config_parse_lldp_emit, 0, offsetof(Network, lldp_emit)
+Network.Address, config_parse_address, 0, 0
+Network.Gateway, config_parse_gateway, 0, 0
+Network.Domains, config_parse_domains, 0, 0
+Network.DNS, config_parse_dns, 0, 0
+Network.DNSDefaultRoute, config_parse_tristate, 0, offsetof(Network, dns_default_route)
+Network.LLMNR, config_parse_resolve_support, 0, offsetof(Network, llmnr)
+Network.MulticastDNS, config_parse_resolve_support, 0, offsetof(Network, mdns)
+Network.DNSOverTLS, config_parse_dns_over_tls_mode, 0, offsetof(Network, dns_over_tls_mode)
+Network.DNSSEC, config_parse_dnssec_mode, 0, offsetof(Network, dnssec_mode)
+Network.DNSSECNegativeTrustAnchors, config_parse_dnssec_negative_trust_anchors, 0, 0
+Network.NTP, config_parse_ntp, 0, offsetof(Network, ntp)
+Network.IPForward, config_parse_address_family_boolean_with_kernel,0, offsetof(Network, ip_forward)
+Network.IPMasquerade, config_parse_bool, 0, offsetof(Network, ip_masquerade)
+Network.IPv6PrivacyExtensions, config_parse_ipv6_privacy_extensions, 0, offsetof(Network, ipv6_privacy_extensions)
+Network.IPv6AcceptRA, config_parse_tristate, 0, offsetof(Network, ipv6_accept_ra)
+Network.IPv6AcceptRouterAdvertisements, config_parse_tristate, 0, offsetof(Network, ipv6_accept_ra)
+Network.IPv6DuplicateAddressDetection, config_parse_int, 0, offsetof(Network, ipv6_dad_transmits)
+Network.IPv6HopLimit, config_parse_int, 0, offsetof(Network, ipv6_hop_limit)
+Network.IPv6ProxyNDP, config_parse_tristate, 0, offsetof(Network, ipv6_proxy_ndp)
+Network.IPv6MTUBytes, config_parse_mtu, AF_INET6, offsetof(Network, ipv6_mtu)
+Network.ActiveSlave, config_parse_bool, 0, offsetof(Network, active_slave)
+Network.PrimarySlave, config_parse_bool, 0, offsetof(Network, primary_slave)
+Network.IPv4ProxyARP, config_parse_tristate, 0, offsetof(Network, proxy_arp)
+Network.ProxyARP, config_parse_tristate, 0, offsetof(Network, proxy_arp)
+Network.IPv6ProxyNDPAddress, config_parse_ipv6_proxy_ndp_address, 0, 0
+Network.BindCarrier, config_parse_strv, 0, offsetof(Network, bind_carrier)
+Network.ConfigureWithoutCarrier, config_parse_bool, 0, offsetof(Network, configure_without_carrier)
+Address.Address, config_parse_address, 0, 0
+Address.Peer, config_parse_address, 0, 0
+Address.Broadcast, config_parse_broadcast, 0, 0
+Address.Label, config_parse_label, 0, 0
+Address.PreferredLifetime, config_parse_lifetime, 0, 0
+Address.HomeAddress, config_parse_address_flags, 0, 0
+Address.DuplicateAddressDetection, config_parse_address_flags, 0, 0
+Address.ManageTemporaryAddress, config_parse_address_flags, 0, 0
+Address.PrefixRoute, config_parse_address_flags, 0, 0
+Address.AutoJoin, config_parse_address_flags, 0, 0
+Address.Scope, config_parse_address_scope, 0, 0
+IPv6AddressLabel.Prefix, config_parse_address_label_prefix, 0, 0
+IPv6AddressLabel.Label, config_parse_address_label, 0, 0
+Neighbor.Address, config_parse_neighbor_address, 0, 0
+Neighbor.MACAddress, config_parse_neighbor_hwaddr, 0, 0
+RoutingPolicyRule.TypeOfService, config_parse_routing_policy_rule_tos, 0, 0
+RoutingPolicyRule.Priority, config_parse_routing_policy_rule_priority, 0, 0
+RoutingPolicyRule.Table, config_parse_routing_policy_rule_table, 0, 0
+RoutingPolicyRule.FirewallMark, config_parse_routing_policy_rule_fwmark_mask, 0, 0
+RoutingPolicyRule.From, config_parse_routing_policy_rule_prefix, 0, 0
+RoutingPolicyRule.To, config_parse_routing_policy_rule_prefix, 0, 0
+RoutingPolicyRule.IncomingInterface, config_parse_routing_policy_rule_device, 0, 0
+RoutingPolicyRule.OutgoingInterface, config_parse_routing_policy_rule_device, 0, 0
+RoutingPolicyRule.IPProtocol, config_parse_routing_policy_rule_ip_protocol, 0, 0
+RoutingPolicyRule.SourcePort, config_parse_routing_policy_rule_port_range, 0, 0
+RoutingPolicyRule.DestinationPort, config_parse_routing_policy_rule_port_range, 0, 0
+RoutingPolicyRule.InvertRule, config_parse_routing_policy_rule_invert, 0, 0
+Route.Gateway, config_parse_gateway, 0, 0
+Route.Destination, config_parse_destination, 0, 0
+Route.Source, config_parse_destination, 0, 0
+Route.Metric, config_parse_route_priority, 0, 0
+Route.Scope, config_parse_route_scope, 0, 0
+Route.PreferredSource, config_parse_preferred_src, 0, 0
+Route.Table, config_parse_route_table, 0, 0
+Route.MTUBytes, config_parse_route_mtu, AF_UNSPEC, 0
+Route.GatewayOnlink, config_parse_gateway_onlink, 0, 0
+Route.IPv6Preference, config_parse_ipv6_route_preference, 0, 0
+Route.Protocol, config_parse_route_protocol, 0, 0
+Route.Type, config_parse_route_type, 0, 0
+Route.InitialCongestionWindow, config_parse_tcp_window, 0, 0
+Route.InitialAdvertisedReceiveWindow, config_parse_tcp_window, 0, 0
+Route.QuickAck, config_parse_quickack, 0, 0
+DHCP.ClientIdentifier, config_parse_dhcp_client_identifier, 0, offsetof(Network, dhcp_client_identifier)
+DHCP.UseDNS, config_parse_bool, 0, offsetof(Network, dhcp_use_dns)
+DHCP.UseNTP, config_parse_bool, 0, offsetof(Network, dhcp_use_ntp)
+DHCP.UseMTU, config_parse_bool, 0, offsetof(Network, dhcp_use_mtu)
+DHCP.UseHostname, config_parse_bool, 0, offsetof(Network, dhcp_use_hostname)
+DHCP.UseDomains, config_parse_dhcp_use_domains, 0, offsetof(Network, dhcp_use_domains)
+DHCP.UseRoutes, config_parse_bool, 0, offsetof(Network, dhcp_use_routes)
+DHCP.Anonymize, config_parse_bool, 0, offsetof(Network, dhcp_anonymize)
+DHCP.SendHostname, config_parse_bool, 0, offsetof(Network, dhcp_send_hostname)
+DHCP.Hostname, config_parse_hostname, 0, offsetof(Network, dhcp_hostname)
+DHCP.RequestBroadcast, config_parse_bool, 0, offsetof(Network, dhcp_broadcast)
+DHCP.CriticalConnection, config_parse_bool, 0, offsetof(Network, dhcp_critical)
+DHCP.VendorClassIdentifier, config_parse_string, 0, offsetof(Network, dhcp_vendor_class_identifier)
+DHCP.UserClass, config_parse_dhcp_user_class, 0, offsetof(Network, dhcp_user_class)
+DHCP.DUIDType, config_parse_duid_type, 0, offsetof(Network, duid)
+DHCP.DUIDRawData, config_parse_duid_rawdata, 0, offsetof(Network, duid)
+DHCP.RouteMetric, config_parse_unsigned, 0, offsetof(Network, dhcp_route_metric)
+DHCP.RouteTable, config_parse_dhcp_route_table, 0, 0
+DHCP.UseTimezone, config_parse_bool, 0, offsetof(Network, dhcp_use_timezone)
+DHCP.IAID, config_parse_iaid, 0, 0
+DHCP.ListenPort, config_parse_uint16, 0, offsetof(Network, dhcp_client_port)
+DHCP.RapidCommit, config_parse_bool, 0, offsetof(Network, rapid_commit)
+DHCP.ForceDHCPv6PDOtherInformation, config_parse_bool, 0, offsetof(Network, dhcp6_force_pd_other_information)
+IPv6AcceptRA.UseDNS, config_parse_bool, 0, offsetof(Network, ipv6_accept_ra_use_dns)
+IPv6AcceptRA.UseDomains, config_parse_dhcp_use_domains, 0, offsetof(Network, ipv6_accept_ra_use_domains)
+IPv6AcceptRA.RouteTable, config_parse_uint32, 0, offsetof(Network, ipv6_accept_ra_route_table)
+DHCPServer.MaxLeaseTimeSec, config_parse_sec, 0, offsetof(Network, dhcp_server_max_lease_time_usec)
+DHCPServer.DefaultLeaseTimeSec, config_parse_sec, 0, offsetof(Network, dhcp_server_default_lease_time_usec)
+DHCPServer.EmitDNS, config_parse_bool, 0, offsetof(Network, dhcp_server_emit_dns)
+DHCPServer.DNS, config_parse_dhcp_server_dns, 0, 0
+DHCPServer.EmitNTP, config_parse_bool, 0, offsetof(Network, dhcp_server_emit_ntp)
+DHCPServer.NTP, config_parse_dhcp_server_ntp, 0, 0
+DHCPServer.EmitRouter, config_parse_bool, 0, offsetof(Network, dhcp_server_emit_router)
+DHCPServer.EmitTimezone, config_parse_bool, 0, offsetof(Network, dhcp_server_emit_timezone)
+DHCPServer.Timezone, config_parse_timezone, 0, offsetof(Network, dhcp_server_timezone)
+DHCPServer.PoolOffset, config_parse_uint32, 0, offsetof(Network, dhcp_server_pool_offset)
+DHCPServer.PoolSize, config_parse_uint32, 0, offsetof(Network, dhcp_server_pool_size)
+Bridge.Cost, config_parse_uint32, 0, offsetof(Network, cost)
+Bridge.UseBPDU, config_parse_tristate, 0, offsetof(Network, use_bpdu)
+Bridge.HairPin, config_parse_tristate, 0, offsetof(Network, hairpin)
+Bridge.FastLeave, config_parse_tristate, 0, offsetof(Network, fast_leave)
+Bridge.AllowPortToBeRoot, config_parse_tristate, 0, offsetof(Network, allow_port_to_be_root)
+Bridge.UnicastFlood, config_parse_tristate, 0, offsetof(Network, unicast_flood)
+Bridge.MulticastToUnicast, config_parse_tristate, 0, offsetof(Network, multicast_to_unicast)
+Bridge.Priority, config_parse_bridge_port_priority, 0, offsetof(Network, priority)
+BridgeFDB.MACAddress, config_parse_fdb_hwaddr, 0, 0
+BridgeFDB.VLANId, config_parse_fdb_vlan_id, 0, 0
+BridgeVLAN.PVID, config_parse_brvlan_pvid, 0, 0
+BridgeVLAN.VLAN, config_parse_brvlan_vlan, 0, 0
+BridgeVLAN.EgressUntagged, config_parse_brvlan_untagged, 0, 0
+Network.IPv6PrefixDelegation, config_parse_router_prefix_delegation, 0, 0
+IPv6PrefixDelegation.RouterLifetimeSec, config_parse_sec, 0, offsetof(Network, router_lifetime_usec)
+IPv6PrefixDelegation.Managed, config_parse_bool, 0, offsetof(Network, router_managed)
+IPv6PrefixDelegation.OtherInformation, config_parse_bool, 0, offsetof(Network, router_other_information)
+IPv6PrefixDelegation.RouterPreference, config_parse_router_preference, 0, 0
+IPv6PrefixDelegation.EmitDNS, config_parse_bool, 0, offsetof(Network, router_emit_dns)
+IPv6PrefixDelegation.DNS, config_parse_radv_dns, 0, 0
+IPv6PrefixDelegation.EmitDomains, config_parse_bool, 0, offsetof(Network, router_emit_domains)
+IPv6PrefixDelegation.Domains, config_parse_radv_search_domains, 0, 0
+IPv6PrefixDelegation.DNSLifetimeSec, config_parse_sec, 0, offsetof(Network, router_dns_lifetime_usec)
+IPv6Prefix.Prefix, config_parse_prefix, 0, 0
+IPv6Prefix.OnLink, config_parse_prefix_flags, 0, 0
+IPv6Prefix.AddressAutoconfiguration, config_parse_prefix_flags, 0, 0
+IPv6Prefix.ValidLifetimeSec, config_parse_prefix_lifetime, 0, 0
+IPv6Prefix.PreferredLifetimeSec, config_parse_prefix_lifetime, 0, 0
+CAN.BitRate, config_parse_si_size, 0, offsetof(Network, can_bitrate)
+CAN.SamplePoint, config_parse_permille, 0, offsetof(Network, can_sample_point)
+CAN.RestartSec, config_parse_sec, 0, offsetof(Network, can_restart_us)
+/* backwards compatibility: do not add new entries to this section */
+Network.IPv4LL, config_parse_ipv4ll, 0, offsetof(Network, link_local)
+DHCPv4.UseDNS, config_parse_bool, 0, offsetof(Network, dhcp_use_dns)
+DHCPv4.UseMTU, config_parse_bool, 0, offsetof(Network, dhcp_use_mtu)
+DHCPv4.UseHostname, config_parse_bool, 0, offsetof(Network, dhcp_use_hostname)
+DHCP.UseDomainName, config_parse_dhcp_use_domains, 0, offsetof(Network, dhcp_use_domains)
+DHCPv4.UseDomainName, config_parse_dhcp_use_domains, 0, offsetof(Network, dhcp_use_domains)
+DHCPv4.CriticalConnection, config_parse_bool, 0, offsetof(Network, dhcp_critical)
diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c
new file mode 100644
index 0000000..12344ec
--- /dev/null
+++ b/src/network/networkd-network.c
@@ -0,0 +1,1507 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <net/if.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "missing_network.h"
+#include "network-internal.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "parse-util.h"
+#include "set.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static void network_config_hash_func(const NetworkConfigSection *c, struct siphash *state) {
+ siphash24_compress(c->filename, strlen(c->filename), state);
+ siphash24_compress(&c->line, sizeof(c->line), state);
+}
+
+static int network_config_compare_func(const NetworkConfigSection *x, const NetworkConfigSection *y) {
+ int r;
+
+ r = strcmp(x->filename, y->filename);
+ if (r != 0)
+ return r;
+
+ return CMP(x->line, y->line);
+}
+
+DEFINE_HASH_OPS(network_config_hash_ops, NetworkConfigSection, network_config_hash_func, network_config_compare_func);
+
+int network_config_section_new(const char *filename, unsigned line, NetworkConfigSection **s) {
+ NetworkConfigSection *cs;
+
+ cs = malloc0(offsetof(NetworkConfigSection, filename) + strlen(filename) + 1);
+ if (!cs)
+ return -ENOMEM;
+
+ strcpy(cs->filename, filename);
+ cs->line = line;
+
+ *s = TAKE_PTR(cs);
+
+ return 0;
+}
+
+void network_config_section_free(NetworkConfigSection *cs) {
+ free(cs);
+}
+
+/* Set defaults following RFC7844 */
+void network_apply_anonymize_if_set(Network *network) {
+ if (!network->dhcp_anonymize)
+ return;
+ /* RFC7844 3.7
+ SHOULD NOT send the Host Name option */
+ network->dhcp_send_hostname = false;
+ /* RFC7844 section 3.:
+ MAY contain the Client Identifier option
+ Section 3.5:
+ clients MUST use client identifiers based solely
+ on the link-layer address */
+ /* NOTE: Using MAC, as it does not reveal extra information,
+ * and some servers might not answer if this option is not sent */
+ network->dhcp_client_identifier = DHCP_CLIENT_ID_MAC;
+ /* RFC 7844 3.10:
+ SHOULD NOT use the Vendor Class Identifier option */
+ network->dhcp_vendor_class_identifier = mfree(network->dhcp_vendor_class_identifier);
+ /* RFC7844 section 3.6.:
+ The client intending to protect its privacy SHOULD only request a
+ minimal number of options in the PRL and SHOULD also randomly shuffle
+ the ordering of option codes in the PRL. If this random ordering
+ cannot be implemented, the client MAY order the option codes in the
+ PRL by option code number (lowest to highest).
+ */
+ /* NOTE: dhcp_use_mtu is false by default,
+ * though it was not initiallized to any value in network_load_one.
+ * Maybe there should be another var called *send*?
+ * (to use the MTU sent by the server but to do not send
+ * the option in the PRL). */
+ network->dhcp_use_mtu = false;
+ /* NOTE: when Anonymize=yes, the PRL route options are sent by default,
+ * but this is needed to use them. */
+ network->dhcp_use_routes = true;
+ /* RFC7844 section 3.6.
+ * same comments as previous option */
+ network->dhcp_use_timezone = false;
+}
+
+int network_load_one(Manager *manager, const char *filename) {
+ _cleanup_free_ char *fname = NULL, *name = NULL;
+ _cleanup_(network_freep) Network *network = NULL;
+ _cleanup_fclose_ FILE *file = NULL;
+ const char *dropin_dirname;
+ Address *address;
+ Route *route;
+ char *d;
+ int r;
+
+ assert(manager);
+ assert(filename);
+
+ file = fopen(filename, "re");
+ if (!file) {
+ if (errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (null_or_empty_fd(fileno(file))) {
+ log_debug("Skipping empty file: %s", filename);
+ return 0;
+ }
+
+ fname = strdup(filename);
+ if (!fname)
+ return log_oom();
+
+ name = strdup(basename(filename));
+ if (!name)
+ return log_oom();
+
+ d = strrchr(name, '.');
+ if (!d)
+ return -EINVAL;
+
+ *d = '\0';
+
+ dropin_dirname = strjoina(name, ".network.d");
+
+ network = new(Network, 1);
+ if (!network)
+ return log_oom();
+
+ *network = (Network) {
+ .manager = manager,
+ .filename = TAKE_PTR(fname),
+ .name = TAKE_PTR(name),
+
+ .required_for_online = true,
+ .dhcp = ADDRESS_FAMILY_NO,
+ .dhcp_use_ntp = true,
+ .dhcp_use_dns = true,
+ .dhcp_use_hostname = true,
+ .dhcp_use_routes = true,
+ /* NOTE: this var might be overwriten by network_apply_anonymize_if_set */
+ .dhcp_send_hostname = true,
+ /* To enable/disable RFC7844 Anonymity Profiles */
+ .dhcp_anonymize = false,
+ .dhcp_route_metric = DHCP_ROUTE_METRIC,
+ /* NOTE: this var might be overwrite by network_apply_anonymize_if_set */
+ .dhcp_client_identifier = DHCP_CLIENT_ID_DUID,
+ .dhcp_route_table = RT_TABLE_MAIN,
+ .dhcp_route_table_set = false,
+ /* NOTE: from man: UseMTU=... Defaults to false*/
+ .dhcp_use_mtu = false,
+ /* NOTE: from man: UseTimezone=... Defaults to "no".*/
+ .dhcp_use_timezone = false,
+ .rapid_commit = true,
+
+ .dhcp_server_emit_dns = true,
+ .dhcp_server_emit_ntp = true,
+ .dhcp_server_emit_router = true,
+ .dhcp_server_emit_timezone = true,
+
+ .router_emit_dns = true,
+ .router_emit_domains = true,
+
+ .use_bpdu = -1,
+ .hairpin = -1,
+ .fast_leave = -1,
+ .allow_port_to_be_root = -1,
+ .unicast_flood = -1,
+ .multicast_to_unicast = -1,
+ .priority = LINK_BRIDGE_PORT_PRIORITY_INVALID,
+
+ .lldp_mode = LLDP_MODE_ROUTERS_ONLY,
+
+ .dns_default_route = -1,
+ .llmnr = RESOLVE_SUPPORT_YES,
+ .mdns = RESOLVE_SUPPORT_NO,
+ .dnssec_mode = _DNSSEC_MODE_INVALID,
+ .dns_over_tls_mode = _DNS_OVER_TLS_MODE_INVALID,
+
+ .link_local = ADDRESS_FAMILY_IPV6,
+
+ .ipv6_privacy_extensions = IPV6_PRIVACY_EXTENSIONS_NO,
+ .ipv6_accept_ra = -1,
+ .ipv6_dad_transmits = -1,
+ .ipv6_hop_limit = -1,
+ .ipv6_proxy_ndp = -1,
+ .duid.type = _DUID_TYPE_INVALID,
+ .proxy_arp = -1,
+ .arp = -1,
+ .multicast = -1,
+ .allmulticast = -1,
+ .ipv6_accept_ra_use_dns = true,
+ .ipv6_accept_ra_route_table = RT_TABLE_MAIN,
+ };
+
+ r = config_parse_many(filename, network_dirs, dropin_dirname,
+ "Match\0"
+ "Link\0"
+ "Network\0"
+ "Address\0"
+ "Neighbor\0"
+ "IPv6AddressLabel\0"
+ "RoutingPolicyRule\0"
+ "Route\0"
+ "DHCP\0"
+ "DHCPv4\0" /* compat */
+ "DHCPServer\0"
+ "IPv6AcceptRA\0"
+ "IPv6NDPProxyAddress\0"
+ "Bridge\0"
+ "BridgeFDB\0"
+ "BridgeVLAN\0"
+ "IPv6PrefixDelegation\0"
+ "IPv6Prefix\0"
+ "CAN\0",
+ config_item_perf_lookup, network_network_gperf_lookup,
+ CONFIG_PARSE_WARN, network);
+ if (r < 0) {
+ /* Unset manager here. Otherwise, LIST_REMOVE() in network_free() fails. */
+ network->manager = NULL;
+ return r;
+ }
+
+ network_apply_anonymize_if_set(network);
+
+ /* IPMasquerade=yes implies IPForward=yes */
+ if (network->ip_masquerade)
+ network->ip_forward |= ADDRESS_FAMILY_IPV4;
+
+ if (network->mtu > 0 && network->dhcp_use_mtu) {
+ log_warning("MTUBytes= in [Link] section and UseMTU= in [DHCP] section are set in %s. "
+ "Disabling UseMTU=.", filename);
+ network->dhcp_use_mtu = false;
+ }
+
+ LIST_PREPEND(networks, manager->networks, network);
+
+ r = hashmap_ensure_allocated(&manager->networks_by_name, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(manager->networks_by_name, network->name, network);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(routes, route, network->static_routes)
+ if (!route->family) {
+ log_warning("Route section without Gateway field configured in %s. "
+ "Ignoring", filename);
+ return 0;
+ }
+
+ LIST_FOREACH(addresses, address, network->static_addresses)
+ if (!address->family) {
+ log_warning("Address section without Address field configured in %s. "
+ "Ignoring", filename);
+ return 0;
+ }
+
+ network = NULL;
+
+ return 0;
+}
+
+int network_load(Manager *manager) {
+ Network *network;
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+
+ assert(manager);
+
+ while ((network = manager->networks))
+ network_free(network);
+
+ r = conf_files_list_strv(&files, ".network", NULL, 0, network_dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate network files: %m");
+
+ STRV_FOREACH_BACKWARDS(f, files) {
+ r = network_load_one(manager, *f);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+void network_free(Network *network) {
+ IPv6ProxyNDPAddress *ipv6_proxy_ndp_address;
+ RoutingPolicyRule *rule;
+ FdbEntry *fdb_entry;
+ Neighbor *neighbor;
+ AddressLabel *label;
+ Prefix *prefix;
+ Address *address;
+ Route *route;
+
+ if (!network)
+ return;
+
+ free(network->filename);
+
+ set_free_free(network->match_mac);
+ strv_free(network->match_path);
+ strv_free(network->match_driver);
+ strv_free(network->match_type);
+ strv_free(network->match_name);
+
+ free(network->description);
+ free(network->dhcp_vendor_class_identifier);
+ strv_free(network->dhcp_user_class);
+ free(network->dhcp_hostname);
+
+ free(network->mac);
+
+ strv_free(network->ntp);
+ free(network->dns);
+ strv_free(network->search_domains);
+ strv_free(network->route_domains);
+ strv_free(network->bind_carrier);
+
+ strv_free(network->router_search_domains);
+ free(network->router_dns);
+
+ netdev_unref(network->bridge);
+ netdev_unref(network->bond);
+ netdev_unref(network->vrf);
+
+ hashmap_free_with_destructor(network->stacked_netdevs, netdev_unref);
+
+ while ((route = network->static_routes))
+ route_free(route);
+
+ while ((address = network->static_addresses))
+ address_free(address);
+
+ while ((fdb_entry = network->static_fdb_entries))
+ fdb_entry_free(fdb_entry);
+
+ while ((ipv6_proxy_ndp_address = network->ipv6_proxy_ndp_addresses))
+ ipv6_proxy_ndp_address_free(ipv6_proxy_ndp_address);
+
+ while ((neighbor = network->neighbors))
+ neighbor_free(neighbor);
+
+ while ((label = network->address_labels))
+ address_label_free(label);
+
+ while ((prefix = network->static_prefixes))
+ prefix_free(prefix);
+
+ while ((rule = network->rules))
+ routing_policy_rule_free(rule);
+
+ hashmap_free(network->addresses_by_section);
+ hashmap_free(network->routes_by_section);
+ hashmap_free(network->fdb_entries_by_section);
+ hashmap_free(network->neighbors_by_section);
+ hashmap_free(network->address_labels_by_section);
+ hashmap_free(network->prefixes_by_section);
+ hashmap_free(network->rules_by_section);
+
+ if (network->manager) {
+ if (network->manager->networks)
+ LIST_REMOVE(networks, network->manager->networks, network);
+
+ if (network->manager->networks_by_name && network->name)
+ hashmap_remove(network->manager->networks_by_name, network->name);
+
+ if (network->manager->duids_requesting_uuid)
+ set_remove(network->manager->duids_requesting_uuid, &network->duid);
+ }
+
+ free(network->name);
+
+ condition_free_list(network->match_host);
+ condition_free_list(network->match_virt);
+ condition_free_list(network->match_kernel_cmdline);
+ condition_free_list(network->match_kernel_version);
+ condition_free_list(network->match_arch);
+
+ free(network->dhcp_server_timezone);
+ free(network->dhcp_server_dns);
+ free(network->dhcp_server_ntp);
+
+ set_free_free(network->dnssec_negative_trust_anchors);
+
+ free(network);
+}
+
+int network_get_by_name(Manager *manager, const char *name, Network **ret) {
+ Network *network;
+
+ assert(manager);
+ assert(name);
+ assert(ret);
+
+ network = hashmap_get(manager->networks_by_name, name);
+ if (!network)
+ return -ENOENT;
+
+ *ret = network;
+
+ return 0;
+}
+
+int network_get(Manager *manager, sd_device *device,
+ const char *ifname, const struct ether_addr *address,
+ Network **ret) {
+ const char *path = NULL, *parent_driver = NULL, *driver = NULL, *devtype = NULL;
+ sd_device *parent;
+ Network *network;
+
+ assert(manager);
+ assert(ret);
+
+ if (device) {
+ (void) sd_device_get_property_value(device, "ID_PATH", &path);
+
+ if (sd_device_get_parent(device, &parent) >= 0)
+ (void) sd_device_get_driver(parent, &parent_driver);
+
+ (void) sd_device_get_property_value(device, "ID_NET_DRIVER", &driver);
+
+ (void) sd_device_get_devtype(device, &devtype);
+ }
+
+ LIST_FOREACH(networks, network, manager->networks) {
+ if (net_match_config(network->match_mac, network->match_path,
+ network->match_driver, network->match_type,
+ network->match_name, network->match_host,
+ network->match_virt, network->match_kernel_cmdline,
+ network->match_kernel_version, network->match_arch,
+ address, path, parent_driver, driver,
+ devtype, ifname)) {
+ if (network->match_name && device) {
+ const char *attr;
+ uint8_t name_assign_type = NET_NAME_UNKNOWN;
+
+ if (sd_device_get_sysattr_value(device, "name_assign_type", &attr) >= 0)
+ (void) safe_atou8(attr, &name_assign_type);
+
+ if (name_assign_type == NET_NAME_ENUM)
+ log_warning("%s: found matching network '%s', based on potentially unpredictable ifname",
+ ifname, network->filename);
+ else
+ log_debug("%s: found matching network '%s'", ifname, network->filename);
+ } else
+ log_debug("%s: found matching network '%s'", ifname, network->filename);
+
+ *ret = network;
+ return 0;
+ }
+ }
+
+ *ret = NULL;
+
+ return -ENOENT;
+}
+
+int network_apply(Network *network, Link *link) {
+ int r;
+
+ assert(network);
+ assert(link);
+
+ link->network = network;
+
+ if (network->ipv4ll_route) {
+ Route *route;
+
+ r = route_new_static(network, NULL, 0, &route);
+ if (r < 0)
+ return r;
+
+ r = inet_pton(AF_INET, "169.254.0.0", &route->dst.in);
+ if (r == 0)
+ return -EINVAL;
+ if (r < 0)
+ return -errno;
+
+ route->family = AF_INET;
+ route->dst_prefixlen = 16;
+ route->scope = RT_SCOPE_LINK;
+ route->priority = IPV4LL_ROUTE_METRIC;
+ route->protocol = RTPROT_STATIC;
+ }
+
+ if (network->n_dns > 0 ||
+ !strv_isempty(network->ntp) ||
+ !strv_isempty(network->search_domains) ||
+ !strv_isempty(network->route_domains))
+ link_dirty(link);
+
+ return 0;
+}
+
+bool network_has_static_ipv6_addresses(Network *network) {
+ Address *address;
+
+ assert(network);
+
+ LIST_FOREACH(addresses, address, network->static_addresses) {
+ if (address->family == AF_INET6)
+ return true;
+ }
+
+ return false;
+}
+
+int config_parse_netdev(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Network *network = userdata;
+ _cleanup_free_ char *kind_string = NULL;
+ char *p;
+ NetDev *netdev;
+ NetDevKind kind;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ kind_string = strdup(lvalue);
+ if (!kind_string)
+ return log_oom();
+
+ /* the keys are CamelCase versions of the kind */
+ for (p = kind_string; *p; p++)
+ *p = tolower(*p);
+
+ kind = netdev_kind_from_string(kind_string);
+ if (kind == _NETDEV_KIND_INVALID) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid NetDev kind: %s", lvalue);
+ return 0;
+ }
+
+ r = netdev_get(network->manager, rvalue, &netdev);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "%s could not be found, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (netdev->kind != kind) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "NetDev is not a %s, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ switch (kind) {
+ case NETDEV_KIND_BRIDGE:
+ network->bridge = netdev_unref(network->bridge);
+ network->bridge = netdev;
+
+ break;
+ case NETDEV_KIND_BOND:
+ network->bond = netdev_unref(network->bond);
+ network->bond = netdev;
+
+ break;
+ case NETDEV_KIND_VRF:
+ network->vrf = netdev_unref(network->vrf);
+ network->vrf = netdev;
+
+ break;
+ case NETDEV_KIND_VLAN:
+ case NETDEV_KIND_MACVLAN:
+ case NETDEV_KIND_MACVTAP:
+ case NETDEV_KIND_IPVLAN:
+ case NETDEV_KIND_VXLAN:
+ case NETDEV_KIND_VCAN:
+ r = hashmap_ensure_allocated(&network->stacked_netdevs, &string_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = hashmap_put(network->stacked_netdevs, netdev->ifname, netdev);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Cannot add NetDev '%s' to network: %m", rvalue);
+ return 0;
+ }
+
+ break;
+ default:
+ assert_not_reached("Cannot parse NetDev");
+ }
+
+ netdev_ref(netdev);
+
+ return 0;
+}
+
+int config_parse_domains(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ const char *p;
+ Network *n = data;
+ int r;
+
+ assert(n);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ n->search_domains = strv_free(n->search_domains);
+ n->route_domains = strv_free(n->route_domains);
+ return 0;
+ }
+
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *w = NULL, *normalized = NULL;
+ const char *domain;
+ bool is_route;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract search or route domain, ignoring: %s", rvalue);
+ break;
+ }
+ if (r == 0)
+ break;
+
+ is_route = w[0] == '~';
+ domain = is_route ? w + 1 : w;
+
+ if (dns_name_is_root(domain) || streq(domain, "*")) {
+ /* If the root domain appears as is, or the special token "*" is found, we'll consider this as
+ * routing domain, unconditionally. */
+ is_route = true;
+ domain = "."; /* make sure we don't allow empty strings, thus write the root domain as "." */
+ } else {
+ r = dns_name_normalize(domain, 0, &normalized);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "'%s' is not a valid domain name, ignoring.", domain);
+ continue;
+ }
+
+ domain = normalized;
+
+ if (is_localhost(domain)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "'localhost' domain names may not be configure as search or route domains, ignoring assignment: %s", domain);
+ continue;
+ }
+ }
+
+ if (is_route)
+ r = strv_extend(&n->route_domains, domain);
+ else
+ r = strv_extend(&n->search_domains, domain);
+ if (r < 0)
+ return log_oom();
+ }
+
+ strv_uniq(n->route_domains);
+ strv_uniq(n->search_domains);
+
+ return 0;
+}
+
+int config_parse_tunnel(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Network *network = userdata;
+ NetDev *netdev;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = netdev_get(network->manager, rvalue, &netdev);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Tunnel is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ if (!IN_SET(netdev->kind,
+ NETDEV_KIND_IPIP,
+ NETDEV_KIND_SIT,
+ NETDEV_KIND_GRE,
+ NETDEV_KIND_GRETAP,
+ NETDEV_KIND_IP6GRE,
+ NETDEV_KIND_IP6GRETAP,
+ NETDEV_KIND_VTI,
+ NETDEV_KIND_VTI6,
+ NETDEV_KIND_IP6TNL)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "NetDev is not a tunnel, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&network->stacked_netdevs, &string_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = hashmap_put(network->stacked_netdevs, netdev->ifname, netdev);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Cannot add VLAN '%s' to network, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ netdev_ref(netdev);
+
+ return 0;
+}
+
+int config_parse_ipv4ll(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ AddressFamilyBoolean *link_local = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* Note that this is mostly like
+ * config_parse_address_family_boolean(), except that it
+ * applies only to IPv4 */
+
+ SET_FLAG(*link_local, ADDRESS_FAMILY_IPV4, parse_boolean(rvalue));
+
+ return 0;
+}
+
+int config_parse_dhcp(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ AddressFamilyBoolean *dhcp = data, s;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* Note that this is mostly like
+ * config_parse_address_family_boolean(), except that it
+ * understands some old names for the enum values */
+
+ s = address_family_boolean_from_string(rvalue);
+ if (s < 0) {
+
+ /* Previously, we had a slightly different enum here,
+ * support its values for compatbility. */
+
+ if (streq(rvalue, "none"))
+ s = ADDRESS_FAMILY_NO;
+ else if (streq(rvalue, "v4"))
+ s = ADDRESS_FAMILY_IPV4;
+ else if (streq(rvalue, "v6"))
+ s = ADDRESS_FAMILY_IPV6;
+ else if (streq(rvalue, "both"))
+ s = ADDRESS_FAMILY_YES;
+ else {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse DHCP option, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ *dhcp = s;
+ return 0;
+}
+
+static const char* const dhcp_client_identifier_table[_DHCP_CLIENT_ID_MAX] = {
+ [DHCP_CLIENT_ID_MAC] = "mac",
+ [DHCP_CLIENT_ID_DUID] = "duid",
+ [DHCP_CLIENT_ID_DUID_ONLY] = "duid-only",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(dhcp_client_identifier, DHCPClientIdentifier);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dhcp_client_identifier, dhcp_client_identifier, DHCPClientIdentifier, "Failed to parse client identifier type");
+
+int config_parse_ipv6token(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ union in_addr_union buffer;
+ struct in6_addr *token = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(token);
+
+ r = in_addr_from_string(AF_INET6, rvalue, &buffer);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse IPv6 token, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = in_addr_is_null(AF_INET6, &buffer);
+ if (r != 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "IPv6 token cannot be the ANY address, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if ((buffer.in6.s6_addr32[0] | buffer.in6.s6_addr32[1]) != 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "IPv6 token cannot be longer than 64 bits, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *token = buffer.in6;
+
+ return 0;
+}
+
+static const char* const ipv6_privacy_extensions_table[_IPV6_PRIVACY_EXTENSIONS_MAX] = {
+ [IPV6_PRIVACY_EXTENSIONS_NO] = "no",
+ [IPV6_PRIVACY_EXTENSIONS_PREFER_PUBLIC] = "prefer-public",
+ [IPV6_PRIVACY_EXTENSIONS_YES] = "yes",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(ipv6_privacy_extensions, IPv6PrivacyExtensions);
+
+int config_parse_ipv6_privacy_extensions(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ IPv6PrivacyExtensions *ipv6_privacy_extensions = data;
+ int k;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(ipv6_privacy_extensions);
+
+ /* Our enum shall be a superset of booleans, hence first try
+ * to parse as boolean, and then as enum */
+
+ k = parse_boolean(rvalue);
+ if (k > 0)
+ *ipv6_privacy_extensions = IPV6_PRIVACY_EXTENSIONS_YES;
+ else if (k == 0)
+ *ipv6_privacy_extensions = IPV6_PRIVACY_EXTENSIONS_NO;
+ else {
+ IPv6PrivacyExtensions s;
+
+ s = ipv6_privacy_extensions_from_string(rvalue);
+ if (s < 0) {
+
+ if (streq(rvalue, "kernel"))
+ s = _IPV6_PRIVACY_EXTENSIONS_INVALID;
+ else {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse IPv6 privacy extensions option, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ *ipv6_privacy_extensions = s;
+ }
+
+ return 0;
+}
+
+int config_parse_hostname(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *hn = NULL;
+ char **hostname = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = config_parse_string(unit, filename, line, section, section_line, lvalue, ltype, rvalue, &hn, userdata);
+ if (r < 0)
+ return r;
+
+ if (!hostname_is_valid(hn, false)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Hostname is not valid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = dns_name_is_valid(hn);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to check validity of hostname '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ if (r == 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Hostname is not a valid DNS domain name, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ return free_and_replace(*hostname, hn);
+}
+
+int config_parse_timezone(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *tz = NULL;
+ char **datap = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = config_parse_string(unit, filename, line, section, section_line, lvalue, ltype, rvalue, &tz, userdata);
+ if (r < 0)
+ return r;
+
+ if (!timezone_is_valid(tz, LOG_ERR)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Timezone is not valid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ return free_and_replace(*datap, tz);
+}
+
+int config_parse_dhcp_server_dns(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *n = data;
+ const char *p = rvalue;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *w = NULL;
+ struct in_addr a, *m;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract word, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ break;
+
+ if (inet_pton(AF_INET, w, &a) <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse DNS server address, ignoring: %s", w);
+ continue;
+ }
+
+ m = reallocarray(n->dhcp_server_dns, n->n_dhcp_server_dns + 1, sizeof(struct in_addr));
+ if (!m)
+ return log_oom();
+
+ m[n->n_dhcp_server_dns++] = a;
+ n->dhcp_server_dns = m;
+ }
+
+ return 0;
+}
+
+int config_parse_radv_dns(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *n = data;
+ const char *p = rvalue;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *w = NULL;
+ union in_addr_union a;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract word, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ break;
+
+ if (in_addr_from_string(AF_INET6, w, &a) >= 0) {
+ struct in6_addr *m;
+
+ m = reallocarray(n->router_dns, n->n_router_dns + 1, sizeof(struct in6_addr));
+ if (!m)
+ return log_oom();
+
+ m[n->n_router_dns++] = a.in6;
+ n->router_dns = m;
+
+ } else
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse DNS server address, ignoring: %s", w);
+
+ }
+
+ return 0;
+}
+
+int config_parse_radv_search_domains(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *n = data;
+ const char *p = rvalue;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *w = NULL, *idna = NULL;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract word, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ break;
+
+ r = dns_name_apply_idna(w, &idna);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to apply IDNA to domain name '%s', ignoring: %m", w);
+ continue;
+ }
+ if (r > 0) {
+ r = strv_push(&n->router_search_domains, idna);
+ if (r >= 0)
+ idna = NULL;
+ } else {
+ r = strv_push(&n->router_search_domains, w);
+ if (r >= 0)
+ w = NULL;
+ }
+ }
+
+ return 0;
+}
+
+int config_parse_dhcp_server_ntp(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *n = data;
+ const char *p = rvalue;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *w = NULL;
+ struct in_addr a, *m;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract word, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ if (inet_pton(AF_INET, w, &a) <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse NTP server address, ignoring: %s", w);
+ continue;
+ }
+
+ m = reallocarray(n->dhcp_server_ntp, n->n_dhcp_server_ntp + 1, sizeof(struct in_addr));
+ if (!m)
+ return log_oom();
+
+ m[n->n_dhcp_server_ntp++] = a;
+ n->dhcp_server_ntp = m;
+ }
+}
+
+int config_parse_dns(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *n = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *w = NULL;
+ union in_addr_union a;
+ struct in_addr_data *m;
+ int family;
+
+ r = extract_first_word(&rvalue, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
+ if (r == 0)
+ break;
+
+ r = in_addr_from_string_auto(w, &family, &a);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse dns server address, ignoring: %s", w);
+ continue;
+ }
+
+ m = reallocarray(n->dns, n->n_dns + 1, sizeof(struct in_addr_data));
+ if (!m)
+ return log_oom();
+
+ m[n->n_dns++] = (struct in_addr_data) {
+ .family = family,
+ .address = a,
+ };
+
+ n->dns = m;
+ }
+
+ return 0;
+}
+
+int config_parse_dnssec_negative_trust_anchors(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ const char *p = rvalue;
+ Network *n = data;
+ int r;
+
+ assert(n);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ n->dnssec_negative_trust_anchors = set_free_free(n->dnssec_negative_trust_anchors);
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *w = NULL;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract negative trust anchor domain, ignoring: %s", rvalue);
+ break;
+ }
+ if (r == 0)
+ break;
+
+ r = dns_name_is_valid(w);
+ if (r <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "%s is not a valid domain name, ignoring.", w);
+ continue;
+ }
+
+ r = set_ensure_allocated(&n->dnssec_negative_trust_anchors, &dns_name_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(n->dnssec_negative_trust_anchors, w);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ w = NULL;
+ }
+
+ return 0;
+}
+
+int config_parse_ntp(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***l = data;
+ int r;
+
+ assert(l);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ *l = strv_free(*l);
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *w = NULL;
+
+ r = extract_first_word(&rvalue, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract NTP server name, ignoring: %s", rvalue);
+ break;
+ }
+ if (r == 0)
+ break;
+
+ r = dns_name_is_valid_or_address(w);
+ if (r <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "%s is not a valid domain name or IP address, ignoring.", w);
+ continue;
+ }
+
+ r = strv_push(l, w);
+ if (r < 0)
+ return log_oom();
+
+ w = NULL;
+ }
+
+ return 0;
+}
+
+int config_parse_dhcp_user_class(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***l = data;
+ int r;
+
+ assert(l);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ *l = strv_free(*l);
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *w = NULL;
+
+ r = extract_first_word(&rvalue, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to split user classes option, ignoring: %s", rvalue);
+ break;
+ }
+ if (r == 0)
+ break;
+
+ if (strlen(w) > 255) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "%s length is not in the range 1-255, ignoring.", w);
+ continue;
+ }
+
+ r = strv_push(l, w);
+ if (r < 0)
+ return log_oom();
+
+ w = NULL;
+ }
+
+ return 0;
+}
+
+int config_parse_dhcp_route_table(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Network *network = data;
+ uint32_t rt;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &rt);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Unable to read RouteTable, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ network->dhcp_route_table = rt;
+ network->dhcp_route_table_set = true;
+
+ return 0;
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dhcp_use_domains, dhcp_use_domains, DHCPUseDomains, "Failed to parse DHCP use domains setting");
+
+static const char* const dhcp_use_domains_table[_DHCP_USE_DOMAINS_MAX] = {
+ [DHCP_USE_DOMAINS_NO] = "no",
+ [DHCP_USE_DOMAINS_ROUTE] = "route",
+ [DHCP_USE_DOMAINS_YES] = "yes",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dhcp_use_domains, DHCPUseDomains, DHCP_USE_DOMAINS_YES);
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_lldp_mode, lldp_mode, LLDPMode, "Failed to parse LLDP= setting.");
+
+static const char* const lldp_mode_table[_LLDP_MODE_MAX] = {
+ [LLDP_MODE_NO] = "no",
+ [LLDP_MODE_YES] = "yes",
+ [LLDP_MODE_ROUTERS_ONLY] = "routers-only",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(lldp_mode, LLDPMode, LLDP_MODE_YES);
+
+int config_parse_iaid(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Network *network = data;
+ uint32_t iaid;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(network);
+
+ r = safe_atou32(rvalue, &iaid);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Unable to read IAID, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ network->iaid = iaid;
+ network->iaid_set = true;
+
+ return 0;
+}
diff --git a/src/network/networkd-network.h b/src/network/networkd-network.h
new file mode 100644
index 0000000..f6e62cd
--- /dev/null
+++ b/src/network/networkd-network.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-device.h"
+
+#include "condition.h"
+#include "conf-parser.h"
+#include "dhcp-identifier.h"
+#include "hashmap.h"
+#include "netdev/netdev.h"
+#include "networkd-address-label.h"
+#include "networkd-address.h"
+#include "networkd-brvlan.h"
+#include "networkd-fdb.h"
+#include "networkd-ipv6-proxy-ndp.h"
+#include "networkd-lldp-tx.h"
+#include "networkd-neighbor.h"
+#include "networkd-radv.h"
+#include "networkd-route.h"
+#include "networkd-routing-policy-rule.h"
+#include "networkd-util.h"
+#include "resolve-util.h"
+
+#define DHCP_ROUTE_METRIC 1024
+#define IPV4LL_ROUTE_METRIC 2048
+
+#define BRIDGE_VLAN_BITMAP_MAX 4096
+#define BRIDGE_VLAN_BITMAP_LEN (BRIDGE_VLAN_BITMAP_MAX / 32)
+
+typedef enum DHCPClientIdentifier {
+ DHCP_CLIENT_ID_MAC,
+ DHCP_CLIENT_ID_DUID,
+ /* The following option may not be good for RFC regarding DHCP (3315 and 4361).
+ * But some setups require this. E.g., Sky Broadband, the second largest provider in the UK
+ * requires the client id to be set to a custom string, reported at
+ * https://github.com/systemd/systemd/issues/7828 */
+ DHCP_CLIENT_ID_DUID_ONLY,
+ _DHCP_CLIENT_ID_MAX,
+ _DHCP_CLIENT_ID_INVALID = -1,
+} DHCPClientIdentifier;
+
+typedef enum IPv6PrivacyExtensions {
+ /* The values map to the kernel's /proc/sys/net/ipv6/conf/xxx/use_tempaddr values */
+ IPV6_PRIVACY_EXTENSIONS_NO,
+ IPV6_PRIVACY_EXTENSIONS_PREFER_PUBLIC,
+ IPV6_PRIVACY_EXTENSIONS_YES, /* aka prefer-temporary */
+ _IPV6_PRIVACY_EXTENSIONS_MAX,
+ _IPV6_PRIVACY_EXTENSIONS_INVALID = -1,
+} IPv6PrivacyExtensions;
+
+typedef enum DHCPUseDomains {
+ DHCP_USE_DOMAINS_NO,
+ DHCP_USE_DOMAINS_YES,
+ DHCP_USE_DOMAINS_ROUTE,
+ _DHCP_USE_DOMAINS_MAX,
+ _DHCP_USE_DOMAINS_INVALID = -1,
+} DHCPUseDomains;
+
+typedef enum LLDPMode {
+ LLDP_MODE_NO = 0,
+ LLDP_MODE_YES = 1,
+ LLDP_MODE_ROUTERS_ONLY = 2,
+ _LLDP_MODE_MAX,
+ _LLDP_MODE_INVALID = -1,
+} LLDPMode;
+
+typedef struct DUID {
+ /* Value of Type in [DHCP] section */
+ DUIDType type;
+
+ uint8_t raw_data_len;
+ uint8_t raw_data[MAX_DUID_LEN];
+ usec_t llt_time;
+} DUID;
+
+typedef enum RADVPrefixDelegation {
+ RADV_PREFIX_DELEGATION_NONE,
+ RADV_PREFIX_DELEGATION_STATIC,
+ RADV_PREFIX_DELEGATION_DHCP6,
+ RADV_PREFIX_DELEGATION_BOTH,
+ _RADV_PREFIX_DELEGATION_MAX,
+ _RADV_PREFIX_DELEGATION_INVALID = -1,
+} RADVPrefixDelegation;
+
+typedef struct NetworkConfigSection {
+ unsigned line;
+ char filename[];
+} NetworkConfigSection;
+
+int network_config_section_new(const char *filename, unsigned line, NetworkConfigSection **s);
+void network_config_section_free(NetworkConfigSection *network);
+DEFINE_TRIVIAL_CLEANUP_FUNC(NetworkConfigSection*, network_config_section_free);
+extern const struct hash_ops network_config_hash_ops;
+
+typedef struct Manager Manager;
+
+struct Network {
+ Manager *manager;
+
+ char *filename;
+ char *name;
+
+ Set *match_mac;
+ char **match_path;
+ char **match_driver;
+ char **match_type;
+ char **match_name;
+
+ Condition *match_host;
+ Condition *match_virt;
+ Condition *match_kernel_cmdline;
+ Condition *match_kernel_version;
+ Condition *match_arch;
+
+ char *description;
+
+ NetDev *bridge;
+ NetDev *bond;
+ NetDev *vrf;
+ Hashmap *stacked_netdevs;
+
+ /* DHCP Client Support */
+ AddressFamilyBoolean dhcp;
+ DHCPClientIdentifier dhcp_client_identifier;
+ char *dhcp_vendor_class_identifier;
+ char **dhcp_user_class;
+ char *dhcp_hostname;
+ unsigned dhcp_route_metric;
+ uint32_t dhcp_route_table;
+ uint16_t dhcp_client_port;
+ bool dhcp_anonymize;
+ bool dhcp_send_hostname;
+ bool dhcp_broadcast;
+ bool dhcp_critical;
+ bool dhcp_use_dns;
+ bool dhcp_use_ntp;
+ bool dhcp_use_mtu;
+ bool dhcp_use_routes;
+ bool dhcp_use_timezone;
+ bool rapid_commit;
+ bool dhcp_use_hostname;
+ bool dhcp_route_table_set;
+ DHCPUseDomains dhcp_use_domains;
+
+ /* DHCP Server Support */
+ bool dhcp_server;
+ bool dhcp_server_emit_dns;
+ struct in_addr *dhcp_server_dns;
+ unsigned n_dhcp_server_dns;
+ bool dhcp_server_emit_ntp;
+ struct in_addr *dhcp_server_ntp;
+ unsigned n_dhcp_server_ntp;
+ bool dhcp_server_emit_router;
+ bool dhcp_server_emit_timezone;
+ char *dhcp_server_timezone;
+ usec_t dhcp_server_default_lease_time_usec, dhcp_server_max_lease_time_usec;
+ uint32_t dhcp_server_pool_offset;
+ uint32_t dhcp_server_pool_size;
+
+ /* IPV4LL Support */
+ AddressFamilyBoolean link_local;
+ bool ipv4ll_route;
+
+ /* IPv6 prefix delegation support */
+ RADVPrefixDelegation router_prefix_delegation;
+ usec_t router_lifetime_usec;
+ uint8_t router_preference;
+ bool router_managed;
+ bool router_other_information;
+ bool router_emit_dns;
+ bool router_emit_domains;
+ usec_t router_dns_lifetime_usec;
+ struct in6_addr *router_dns;
+ unsigned n_router_dns;
+ char **router_search_domains;
+ bool dhcp6_force_pd_other_information; /* Start DHCPv6 PD also when 'O'
+ RA flag is set, see RFC 7084,
+ WPD-4 */
+
+ /* Bridge Support */
+ int use_bpdu;
+ int hairpin;
+ int fast_leave;
+ int allow_port_to_be_root;
+ int unicast_flood;
+ int multicast_to_unicast;
+ uint32_t cost;
+ uint16_t priority;
+
+ bool use_br_vlan;
+ uint16_t pvid;
+ uint32_t br_vid_bitmap[BRIDGE_VLAN_BITMAP_LEN];
+ uint32_t br_untagged_bitmap[BRIDGE_VLAN_BITMAP_LEN];
+
+ /* CAN support */
+ size_t can_bitrate;
+ unsigned can_sample_point;
+ usec_t can_restart_us;
+
+ AddressFamilyBoolean ip_forward;
+ bool ip_masquerade;
+
+ int ipv6_accept_ra;
+ int ipv6_dad_transmits;
+ int ipv6_hop_limit;
+ int ipv6_proxy_ndp;
+ int proxy_arp;
+ uint32_t ipv6_mtu;
+
+ bool ipv6_accept_ra_use_dns;
+ bool active_slave;
+ bool primary_slave;
+ DHCPUseDomains ipv6_accept_ra_use_domains;
+ uint32_t ipv6_accept_ra_route_table;
+
+ union in_addr_union ipv6_token;
+ IPv6PrivacyExtensions ipv6_privacy_extensions;
+
+ struct ether_addr *mac;
+ uint32_t mtu;
+ int arp;
+ int multicast;
+ int allmulticast;
+ bool unmanaged;
+ bool configure_without_carrier;
+ uint32_t iaid;
+ DUID duid;
+
+ bool iaid_set;
+
+ bool required_for_online; /* Is this network required to be considered online? */
+
+ LLDPMode lldp_mode; /* LLDP reception */
+ LLDPEmit lldp_emit; /* LLDP transmission */
+
+ LIST_HEAD(Address, static_addresses);
+ LIST_HEAD(Route, static_routes);
+ LIST_HEAD(FdbEntry, static_fdb_entries);
+ LIST_HEAD(IPv6ProxyNDPAddress, ipv6_proxy_ndp_addresses);
+ LIST_HEAD(Neighbor, neighbors);
+ LIST_HEAD(AddressLabel, address_labels);
+ LIST_HEAD(Prefix, static_prefixes);
+ LIST_HEAD(RoutingPolicyRule, rules);
+
+ unsigned n_static_addresses;
+ unsigned n_static_routes;
+ unsigned n_static_fdb_entries;
+ unsigned n_ipv6_proxy_ndp_addresses;
+ unsigned n_neighbors;
+ unsigned n_address_labels;
+ unsigned n_static_prefixes;
+ unsigned n_rules;
+
+ Hashmap *addresses_by_section;
+ Hashmap *routes_by_section;
+ Hashmap *fdb_entries_by_section;
+ Hashmap *neighbors_by_section;
+ Hashmap *address_labels_by_section;
+ Hashmap *prefixes_by_section;
+ Hashmap *rules_by_section;
+
+ /* All kinds of DNS configuration */
+ struct in_addr_data *dns;
+ unsigned n_dns;
+ char **search_domains, **route_domains;
+ int dns_default_route;
+ ResolveSupport llmnr;
+ ResolveSupport mdns;
+ DnssecMode dnssec_mode;
+ DnsOverTlsMode dns_over_tls_mode;
+ Set *dnssec_negative_trust_anchors;
+
+ char **ntp;
+ char **bind_carrier;
+
+ LIST_FIELDS(Network, networks);
+};
+
+void network_free(Network *network);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Network*, network_free);
+
+int network_load(Manager *manager);
+int network_load_one(Manager *manager, const char *filename);
+
+int network_get_by_name(Manager *manager, const char *name, Network **ret);
+int network_get(Manager *manager, sd_device *device, const char *ifname, const struct ether_addr *mac, Network **ret);
+int network_apply(Network *network, Link *link);
+void network_apply_anonymize_if_set(Network *network);
+
+bool network_has_static_ipv6_addresses(Network *network);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_netdev);
+CONFIG_PARSER_PROTOTYPE(config_parse_domains);
+CONFIG_PARSER_PROTOTYPE(config_parse_tunnel);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp);
+CONFIG_PARSER_PROTOTYPE(config_parse_dns);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_client_identifier);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv6token);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv6_privacy_extensions);
+CONFIG_PARSER_PROTOTYPE(config_parse_hostname);
+CONFIG_PARSER_PROTOTYPE(config_parse_timezone);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_server_dns);
+CONFIG_PARSER_PROTOTYPE(config_parse_radv_dns);
+CONFIG_PARSER_PROTOTYPE(config_parse_radv_search_domains);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_server_ntp);
+CONFIG_PARSER_PROTOTYPE(config_parse_dnssec_negative_trust_anchors);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_use_domains);
+CONFIG_PARSER_PROTOTYPE(config_parse_lldp_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_route_table);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_user_class);
+CONFIG_PARSER_PROTOTYPE(config_parse_ntp);
+CONFIG_PARSER_PROTOTYPE(config_parse_iaid);
+/* Legacy IPv4LL support */
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv4ll);
+
+const struct ConfigPerfItem* network_network_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+extern const sd_bus_vtable network_vtable[];
+
+int network_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error);
+int network_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
+
+const char* ipv6_privacy_extensions_to_string(IPv6PrivacyExtensions i) _const_;
+IPv6PrivacyExtensions ipv6_privacy_extensions_from_string(const char *s) _pure_;
+
+const char* dhcp_use_domains_to_string(DHCPUseDomains p) _const_;
+DHCPUseDomains dhcp_use_domains_from_string(const char *s) _pure_;
+
+const char* lldp_mode_to_string(LLDPMode m) _const_;
+LLDPMode lldp_mode_from_string(const char *s) _pure_;
+
+const char* radv_prefix_delegation_to_string(RADVPrefixDelegation i) _const_;
+RADVPrefixDelegation radv_prefix_delegation_from_string(const char *s) _pure_;
diff --git a/src/network/networkd-radv.c b/src/network/networkd-radv.c
new file mode 100644
index 0000000..92cead0
--- /dev/null
+++ b/src/network/networkd-radv.c
@@ -0,0 +1,512 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2017 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+#include <arpa/inet.h>
+
+#include "networkd-address.h"
+#include "networkd-manager.h"
+#include "networkd-radv.h"
+#include "parse-util.h"
+#include "sd-radv.h"
+#include "string-util.h"
+#include "string-table.h"
+#include "strv.h"
+
+static const char * const radv_prefix_delegation_table[_RADV_PREFIX_DELEGATION_MAX] = {
+ [RADV_PREFIX_DELEGATION_NONE] = "no",
+ [RADV_PREFIX_DELEGATION_STATIC] = "static",
+ [RADV_PREFIX_DELEGATION_DHCP6] = "dhcpv6",
+ [RADV_PREFIX_DELEGATION_BOTH] = "yes",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(
+ radv_prefix_delegation,
+ RADVPrefixDelegation,
+ RADV_PREFIX_DELEGATION_BOTH);
+
+int config_parse_router_prefix_delegation(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ RADVPrefixDelegation d;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ d = radv_prefix_delegation_from_string(rvalue);
+ if (d < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, -EINVAL, "Invalid router prefix delegation '%s', ignoring assignment.", rvalue);
+ return 0;
+ }
+
+ network->router_prefix_delegation = d;
+
+ return 0;
+}
+
+int config_parse_router_preference(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Network *network = userdata;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(rvalue, "high"))
+ network->router_preference = SD_NDISC_PREFERENCE_HIGH;
+ else if (STR_IN_SET(rvalue, "medium", "normal", "default"))
+ network->router_preference = SD_NDISC_PREFERENCE_MEDIUM;
+ else if (streq(rvalue, "low"))
+ network->router_preference = SD_NDISC_PREFERENCE_LOW;
+ else
+ log_syntax(unit, LOG_ERR, filename, line, -EINVAL, "Router preference '%s' is invalid, ignoring assignment: %m", rvalue);
+
+ return 0;
+}
+
+void prefix_free(Prefix *prefix) {
+ if (!prefix)
+ return;
+
+ if (prefix->network) {
+ LIST_REMOVE(prefixes, prefix->network->static_prefixes, prefix);
+ assert(prefix->network->n_static_prefixes > 0);
+ prefix->network->n_static_prefixes--;
+
+ if (prefix->section)
+ hashmap_remove(prefix->network->prefixes_by_section,
+ prefix->section);
+ }
+
+ network_config_section_free(prefix->section);
+ prefix->radv_prefix = sd_radv_prefix_unref(prefix->radv_prefix);
+
+ free(prefix);
+}
+
+int prefix_new(Prefix **ret) {
+ _cleanup_(prefix_freep) Prefix *prefix = NULL;
+
+ prefix = new0(Prefix, 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ if (sd_radv_prefix_new(&prefix->radv_prefix) < 0)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+int prefix_new_static(Network *network, const char *filename,
+ unsigned section_line, Prefix **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(prefix_freep) Prefix *prefix = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(!!filename == (section_line > 0));
+
+ if (filename) {
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ if (section_line) {
+ prefix = hashmap_get(network->prefixes_by_section, n);
+ if (prefix) {
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+ }
+ }
+ }
+
+ r = prefix_new(&prefix);
+ if (r < 0)
+ return r;
+
+ prefix->network = network;
+ LIST_APPEND(prefixes, network->static_prefixes, prefix);
+ network->n_static_prefixes++;
+
+ if (filename) {
+ prefix->section = TAKE_PTR(n);
+
+ r = hashmap_ensure_allocated(&network->prefixes_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->prefixes_by_section, prefix->section, prefix);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+int config_parse_prefix(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(prefix_freep) Prefix *p = NULL;
+ uint8_t prefixlen = 64;
+ union in_addr_union in6addr;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = prefix_new_static(network, filename, section_line, &p);
+ if (r < 0)
+ return r;
+
+ r = in_addr_prefix_from_string(rvalue, AF_INET6, &in6addr, &prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Prefix is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ if (sd_radv_prefix_set_prefix(p->radv_prefix, &in6addr.in6, prefixlen) < 0)
+ return -EADDRNOTAVAIL;
+
+ log_syntax(unit, LOG_INFO, filename, line, r, "Found prefix %s", rvalue);
+
+ p = NULL;
+
+ return 0;
+}
+
+int config_parse_prefix_flags(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Network *network = userdata;
+ _cleanup_(prefix_freep) Prefix *p = NULL;
+ int r, val;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = prefix_new_static(network, filename, section_line, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse address flag, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ val = r;
+
+ if (streq(lvalue, "OnLink"))
+ r = sd_radv_prefix_set_onlink(p->radv_prefix, val);
+ else if (streq(lvalue, "AddressAutoconfiguration"))
+ r = sd_radv_prefix_set_address_autoconfiguration(p->radv_prefix, val);
+ if (r < 0)
+ return r;
+
+ p = NULL;
+
+ return 0;
+}
+
+int config_parse_prefix_lifetime(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Network *network = userdata;
+ _cleanup_(prefix_freep) Prefix *p = NULL;
+ usec_t usec;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = prefix_new_static(network, filename, section_line, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_sec(rvalue, &usec);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Lifetime is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ /* a value of 0xffffffff represents infinity */
+ if (streq(lvalue, "PreferredLifetimeSec"))
+ r = sd_radv_prefix_set_preferred_lifetime(p->radv_prefix,
+ DIV_ROUND_UP(usec, USEC_PER_SEC));
+ else if (streq(lvalue, "ValidLifetimeSec"))
+ r = sd_radv_prefix_set_valid_lifetime(p->radv_prefix,
+ DIV_ROUND_UP(usec, USEC_PER_SEC));
+ if (r < 0)
+ return r;
+
+ p = NULL;
+
+ return 0;
+}
+
+static int radv_get_ip6dns(Network *network, struct in6_addr **dns,
+ size_t *n_dns) {
+ _cleanup_free_ struct in6_addr *addresses = NULL;
+ size_t i, n_addresses = 0, n_allocated = 0;
+
+ assert(network);
+ assert(dns);
+ assert(n_dns);
+
+ for (i = 0; i < network->n_dns; i++) {
+ union in_addr_union *addr;
+
+ if (network->dns[i].family != AF_INET6)
+ continue;
+
+ addr = &network->dns[i].address;
+
+ if (in_addr_is_null(AF_INET6, addr) ||
+ in_addr_is_link_local(AF_INET6, addr) ||
+ in_addr_is_localhost(AF_INET6, addr))
+ continue;
+
+ if (!GREEDY_REALLOC(addresses, n_allocated, n_addresses + 1))
+ return -ENOMEM;
+
+ addresses[n_addresses++] = addr->in6;
+ }
+
+ if (addresses) {
+ *dns = TAKE_PTR(addresses);
+
+ *n_dns = n_addresses;
+ }
+
+ return n_addresses;
+}
+
+static int radv_set_dns(Link *link, Link *uplink) {
+ _cleanup_free_ struct in6_addr *dns = NULL;
+ size_t n_dns;
+ usec_t lifetime_usec;
+ int r;
+
+ if (!link->network->router_emit_dns)
+ return 0;
+
+ if (link->network->router_dns) {
+ dns = newdup(struct in6_addr, link->network->router_dns,
+ link->network->n_router_dns);
+ if (dns == NULL)
+ return -ENOMEM;
+
+ n_dns = link->network->n_router_dns;
+ lifetime_usec = link->network->router_dns_lifetime_usec;
+
+ goto set_dns;
+ }
+
+ lifetime_usec = SD_RADV_DEFAULT_DNS_LIFETIME_USEC;
+
+ r = radv_get_ip6dns(link->network, &dns, &n_dns);
+ if (r > 0)
+ goto set_dns;
+
+ if (uplink) {
+ if (uplink->network == NULL) {
+ log_link_debug(uplink, "Cannot fetch DNS servers as uplink interface is not managed by us");
+ return 0;
+ }
+
+ r = radv_get_ip6dns(uplink->network, &dns, &n_dns);
+ if (r > 0)
+ goto set_dns;
+ }
+
+ return 0;
+
+ set_dns:
+ return sd_radv_set_rdnss(link->radv,
+ DIV_ROUND_UP(lifetime_usec, USEC_PER_SEC),
+ dns, n_dns);
+}
+
+static int radv_set_domains(Link *link, Link *uplink) {
+ char **search_domains;
+ usec_t lifetime_usec;
+
+ if (!link->network->router_emit_domains)
+ return 0;
+
+ search_domains = link->network->router_search_domains;
+ lifetime_usec = link->network->router_dns_lifetime_usec;
+
+ if (search_domains)
+ goto set_domains;
+
+ lifetime_usec = SD_RADV_DEFAULT_DNS_LIFETIME_USEC;
+
+ search_domains = link->network->search_domains;
+ if (search_domains)
+ goto set_domains;
+
+ if (uplink) {
+ if (uplink->network == NULL) {
+ log_link_debug(uplink, "Cannot fetch DNS search domains as uplink interface is not managed by us");
+ return 0;
+ }
+
+ search_domains = uplink->network->search_domains;
+ if (search_domains)
+ goto set_domains;
+ }
+
+ return 0;
+
+ set_domains:
+ return sd_radv_set_dnssl(link->radv,
+ DIV_ROUND_UP(lifetime_usec, USEC_PER_SEC),
+ search_domains);
+
+}
+
+int radv_emit_dns(Link *link) {
+ Link *uplink;
+ int r;
+
+ uplink = manager_find_uplink(link->manager, link);
+
+ r = radv_set_dns(link, uplink);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not set RA DNS: %m");
+
+ r = radv_set_domains(link, uplink);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not set RA Domains: %m");
+
+ return 0;
+}
+
+int radv_configure(Link *link) {
+ int r;
+ Prefix *p;
+
+ assert(link);
+ assert(link->network);
+
+ r = sd_radv_new(&link->radv);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_attach_event(link->radv, NULL, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_set_mac(link->radv, &link->mac);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_set_ifindex(link->radv, link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_set_managed_information(link->radv, link->network->router_managed);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_set_other_information(link->radv, link->network->router_other_information);
+ if (r < 0)
+ return r;
+
+ /* a value of 0xffffffff represents infinity, 0x0 means this host is
+ not a router */
+ r = sd_radv_set_router_lifetime(link->radv,
+ DIV_ROUND_UP(link->network->router_lifetime_usec, USEC_PER_SEC));
+ if (r < 0)
+ return r;
+
+ if (link->network->router_lifetime_usec > 0) {
+ r = sd_radv_set_preference(link->radv,
+ link->network->router_preference);
+ if (r < 0)
+ return r;
+ }
+
+ if (IN_SET(link->network->router_prefix_delegation,
+ RADV_PREFIX_DELEGATION_STATIC,
+ RADV_PREFIX_DELEGATION_BOTH)) {
+
+ LIST_FOREACH(prefixes, p, link->network->static_prefixes) {
+ r = sd_radv_add_prefix(link->radv, p->radv_prefix, false);
+ if (r == -EEXIST)
+ continue;
+ if (r == -ENOEXEC) {
+ log_link_warning_errno(link, r, "[IPv6Prefix] section configured without Prefix= setting, ignoring section.");
+ continue;
+ }
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return radv_emit_dns(link);
+}
diff --git a/src/network/networkd-radv.h b/src/network/networkd-radv.h
new file mode 100644
index 0000000..bb88f8a
--- /dev/null
+++ b/src/network/networkd-radv.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2017 Intel Corporation. All rights reserved.
+***/
+
+#include "conf-parser.h"
+#include "networkd-address.h"
+#include "networkd-link.h"
+
+typedef struct Prefix Prefix;
+
+struct Prefix {
+ Network *network;
+ NetworkConfigSection *section;
+
+ sd_radv_prefix *radv_prefix;
+
+ LIST_FIELDS(Prefix, prefixes);
+};
+
+int prefix_new(Prefix **ret);
+void prefix_free(Prefix *prefix);
+int prefix_new_static(Network *network, const char *filename, unsigned section, Prefix **ret);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Prefix*, prefix_free);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_router_prefix_delegation);
+CONFIG_PARSER_PROTOTYPE(config_parse_router_preference);
+CONFIG_PARSER_PROTOTYPE(config_parse_prefix);
+CONFIG_PARSER_PROTOTYPE(config_parse_prefix_flags);
+CONFIG_PARSER_PROTOTYPE(config_parse_prefix_lifetime);
+
+int radv_emit_dns(Link *link);
+int radv_configure(Link *link);
diff --git a/src/network/networkd-route.c b/src/network/networkd-route.c
new file mode 100644
index 0000000..5553a7e
--- /dev/null
+++ b/src/network/networkd-route.c
@@ -0,0 +1,1192 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <linux/icmpv6.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "missing_network.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "networkd-route.h"
+#include "parse-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "sysctl-util.h"
+#include "util.h"
+
+#define ROUTES_DEFAULT_MAX_PER_FAMILY 4096U
+
+static unsigned routes_max(void) {
+ static thread_local unsigned cached = 0;
+
+ _cleanup_free_ char *s4 = NULL, *s6 = NULL;
+ unsigned val4 = ROUTES_DEFAULT_MAX_PER_FAMILY, val6 = ROUTES_DEFAULT_MAX_PER_FAMILY;
+
+ if (cached > 0)
+ return cached;
+
+ if (sysctl_read("net/ipv4/route/max_size", &s4) >= 0) {
+ truncate_nl(s4);
+ if (safe_atou(s4, &val4) >= 0 &&
+ val4 == 2147483647U)
+ /* This is the default "no limit" value in the kernel */
+ val4 = ROUTES_DEFAULT_MAX_PER_FAMILY;
+ }
+
+ if (sysctl_read("net/ipv6/route/max_size", &s6) >= 0) {
+ truncate_nl(s6);
+ (void) safe_atou(s6, &val6);
+ }
+
+ cached = MAX(ROUTES_DEFAULT_MAX_PER_FAMILY, val4) +
+ MAX(ROUTES_DEFAULT_MAX_PER_FAMILY, val6);
+ return cached;
+}
+
+int route_new(Route **ret) {
+ _cleanup_(route_freep) Route *route = NULL;
+
+ route = new(Route, 1);
+ if (!route)
+ return -ENOMEM;
+
+ *route = (Route) {
+ .family = AF_UNSPEC,
+ .scope = RT_SCOPE_UNIVERSE,
+ .protocol = RTPROT_UNSPEC,
+ .type = RTN_UNICAST,
+ .table = RT_TABLE_MAIN,
+ .lifetime = USEC_INFINITY,
+ .quickack = -1,
+ };
+
+ *ret = TAKE_PTR(route);
+
+ return 0;
+}
+
+int route_new_static(Network *network, const char *filename, unsigned section_line, Route **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(route_freep) Route *route = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(!!filename == (section_line > 0));
+
+ if (filename) {
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ route = hashmap_get(network->routes_by_section, n);
+ if (route) {
+ *ret = TAKE_PTR(route);
+
+ return 0;
+ }
+ }
+
+ if (network->n_static_routes >= routes_max())
+ return -E2BIG;
+
+ r = route_new(&route);
+ if (r < 0)
+ return r;
+
+ route->protocol = RTPROT_STATIC;
+ route->network = network;
+ LIST_PREPEND(routes, network->static_routes, route);
+ network->n_static_routes++;
+
+ if (filename) {
+ route->section = TAKE_PTR(n);
+
+ r = hashmap_ensure_allocated(&network->routes_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->routes_by_section, route->section, route);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(route);
+
+ return 0;
+}
+
+void route_free(Route *route) {
+ if (!route)
+ return;
+
+ if (route->network) {
+ LIST_REMOVE(routes, route->network->static_routes, route);
+
+ assert(route->network->n_static_routes > 0);
+ route->network->n_static_routes--;
+
+ if (route->section)
+ hashmap_remove(route->network->routes_by_section, route->section);
+ }
+
+ network_config_section_free(route->section);
+
+ if (route->link) {
+ set_remove(route->link->routes, route);
+ set_remove(route->link->routes_foreign, route);
+ }
+
+ sd_event_source_unref(route->expire);
+
+ free(route);
+}
+
+static void route_hash_func(const Route *route, struct siphash *state) {
+ assert(route);
+
+ siphash24_compress(&route->family, sizeof(route->family), state);
+
+ switch (route->family) {
+ case AF_INET:
+ case AF_INET6:
+ /* Equality of routes are given by the 4-touple
+ (dst_prefix,dst_prefixlen,tos,priority,table) */
+ siphash24_compress(&route->dst, FAMILY_ADDRESS_SIZE(route->family), state);
+ siphash24_compress(&route->dst_prefixlen, sizeof(route->dst_prefixlen), state);
+ siphash24_compress(&route->tos, sizeof(route->tos), state);
+ siphash24_compress(&route->priority, sizeof(route->priority), state);
+ siphash24_compress(&route->table, sizeof(route->table), state);
+
+ break;
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ break;
+ }
+}
+
+static int route_compare_func(const Route *a, const Route *b) {
+ int r;
+
+ r = CMP(a->family, b->family);
+ if (r != 0)
+ return r;
+
+ switch (a->family) {
+ case AF_INET:
+ case AF_INET6:
+ r = CMP(a->dst_prefixlen, b->dst_prefixlen);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->tos, b->tos);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->priority, b->priority);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->table, b->table);
+ if (r != 0)
+ return r;
+
+ return memcmp(&a->dst, &b->dst, FAMILY_ADDRESS_SIZE(a->family));
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ return 0;
+ }
+}
+
+DEFINE_PRIVATE_HASH_OPS(route_hash_ops, Route, route_hash_func, route_compare_func);
+
+bool route_equal(Route *r1, Route *r2) {
+ if (r1 == r2)
+ return true;
+
+ if (!r1 || !r2)
+ return false;
+
+ return route_compare_func(r1, r2) == 0;
+}
+
+int route_get(Link *link,
+ int family,
+ const union in_addr_union *dst,
+ unsigned char dst_prefixlen,
+ unsigned char tos,
+ uint32_t priority,
+ uint32_t table,
+ Route **ret) {
+
+ Route route, *existing;
+
+ assert(link);
+ assert(dst);
+
+ route = (Route) {
+ .family = family,
+ .dst = *dst,
+ .dst_prefixlen = dst_prefixlen,
+ .tos = tos,
+ .priority = priority,
+ .table = table,
+ };
+
+ existing = set_get(link->routes, &route);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 1;
+ }
+
+ existing = set_get(link->routes_foreign, &route);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int route_add_internal(
+ Link *link,
+ Set **routes,
+ int family,
+ const union in_addr_union *dst,
+ unsigned char dst_prefixlen,
+ unsigned char tos,
+ uint32_t priority,
+ uint32_t table,
+ Route **ret) {
+
+ _cleanup_(route_freep) Route *route = NULL;
+ int r;
+
+ assert(link);
+ assert(routes);
+ assert(dst);
+
+ r = route_new(&route);
+ if (r < 0)
+ return r;
+
+ route->family = family;
+ route->dst = *dst;
+ route->dst_prefixlen = dst_prefixlen;
+ route->tos = tos;
+ route->priority = priority;
+ route->table = table;
+
+ r = set_ensure_allocated(routes, &route_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put(*routes, route);
+ if (r < 0)
+ return r;
+
+ route->link = link;
+
+ if (ret)
+ *ret = route;
+
+ route = NULL;
+
+ return 0;
+}
+
+int route_add_foreign(
+ Link *link,
+ int family,
+ const union in_addr_union *dst,
+ unsigned char dst_prefixlen,
+ unsigned char tos,
+ uint32_t priority,
+ uint32_t table,
+ Route **ret) {
+
+ return route_add_internal(link, &link->routes_foreign, family, dst, dst_prefixlen, tos, priority, table, ret);
+}
+
+int route_add(Link *link,
+ int family,
+ const union in_addr_union *dst,
+ unsigned char dst_prefixlen,
+ unsigned char tos,
+ uint32_t priority,
+ uint32_t table,
+ Route **ret) {
+
+ Route *route;
+ int r;
+
+ r = route_get(link, family, dst, dst_prefixlen, tos, priority, table, &route);
+ if (r == -ENOENT) {
+ /* Route does not exist, create a new one */
+ r = route_add_internal(link, &link->routes, family, dst, dst_prefixlen, tos, priority, table, &route);
+ if (r < 0)
+ return r;
+ } else if (r == 0) {
+ /* Take over a foreign route */
+ r = set_ensure_allocated(&link->routes, &route_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put(link->routes, route);
+ if (r < 0)
+ return r;
+
+ set_remove(link->routes_foreign, route);
+ } else if (r == 1) {
+ /* Route exists, do nothing */
+ ;
+ } else
+ return r;
+
+ if (ret)
+ *ret = route;
+
+ return 0;
+}
+
+void route_update(Route *route,
+ const union in_addr_union *src,
+ unsigned char src_prefixlen,
+ const union in_addr_union *gw,
+ const union in_addr_union *prefsrc,
+ unsigned char scope,
+ unsigned char protocol,
+ unsigned char type) {
+
+ assert(route);
+ assert(src || src_prefixlen == 0);
+
+ route->src = src ? *src : IN_ADDR_NULL;
+ route->src_prefixlen = src_prefixlen;
+ route->gw = gw ? *gw : IN_ADDR_NULL;
+ route->prefsrc = prefsrc ? *prefsrc : IN_ADDR_NULL;
+ route->scope = scope;
+ route->protocol = protocol;
+ route->type = type;
+}
+
+static int route_remove_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -ESRCH)
+ log_link_warning_errno(link, r, "Could not drop route: %m");
+
+ return 1;
+}
+
+int route_remove(Route *route, Link *link,
+ link_netlink_message_handler_t callback) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(link->ifindex > 0);
+ assert(IN_SET(route->family, AF_INET, AF_INET6));
+
+ r = sd_rtnl_message_new_route(link->manager->rtnl, &req,
+ RTM_DELROUTE, route->family,
+ route->protocol);
+ if (r < 0)
+ return log_error_errno(r, "Could not create RTM_DELROUTE message: %m");
+
+ if (!in_addr_is_null(route->family, &route->gw)) {
+ if (route->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(req, RTA_GATEWAY, &route->gw.in);
+ else if (route->family == AF_INET6)
+ r = sd_netlink_message_append_in6_addr(req, RTA_GATEWAY, &route->gw.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_GATEWAY attribute: %m");
+ }
+
+ if (route->dst_prefixlen) {
+ if (route->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(req, RTA_DST, &route->dst.in);
+ else if (route->family == AF_INET6)
+ r = sd_netlink_message_append_in6_addr(req, RTA_DST, &route->dst.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_DST attribute: %m");
+
+ r = sd_rtnl_message_route_set_dst_prefixlen(req, route->dst_prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Could not set destination prefix length: %m");
+ }
+
+ if (route->src_prefixlen) {
+ if (route->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(req, RTA_SRC, &route->src.in);
+ else if (route->family == AF_INET6)
+ r = sd_netlink_message_append_in6_addr(req, RTA_SRC, &route->src.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_SRC attribute: %m");
+
+ r = sd_rtnl_message_route_set_src_prefixlen(req, route->src_prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Could not set source prefix length: %m");
+ }
+
+ if (!in_addr_is_null(route->family, &route->prefsrc)) {
+ if (route->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(req, RTA_PREFSRC, &route->prefsrc.in);
+ else if (route->family == AF_INET6)
+ r = sd_netlink_message_append_in6_addr(req, RTA_PREFSRC, &route->prefsrc.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_PREFSRC attribute: %m");
+ }
+
+ r = sd_rtnl_message_route_set_scope(req, route->scope);
+ if (r < 0)
+ return log_error_errno(r, "Could not set scope: %m");
+
+ r = sd_netlink_message_append_u32(req, RTA_PRIORITY, route->priority);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_PRIORITY attribute: %m");
+
+ if (!IN_SET(route->type, RTN_UNREACHABLE, RTN_PROHIBIT, RTN_BLACKHOLE, RTN_THROW)) {
+ r = sd_netlink_message_append_u32(req, RTA_OIF, link->ifindex);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_OIF attribute: %m");
+ }
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req,
+ callback ?: route_remove_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_error_errno(r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+int route_expire_handler(sd_event_source *s, uint64_t usec, void *userdata) {
+ Route *route = userdata;
+ int r;
+
+ assert(route);
+
+ r = route_remove(route, route->link, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Could not remove route: %m");
+ else
+ route_free(route);
+
+ return 1;
+}
+
+int route_configure(
+ Route *route,
+ Link *link,
+ link_netlink_message_handler_t callback) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *expire = NULL;
+ usec_t lifetime;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(link->ifindex > 0);
+ assert(IN_SET(route->family, AF_INET, AF_INET6));
+ assert(callback);
+
+ if (route_get(link, route->family, &route->dst, route->dst_prefixlen, route->tos, route->priority, route->table, NULL) <= 0 &&
+ set_size(link->routes) >= routes_max())
+ return -E2BIG;
+
+ r = sd_rtnl_message_new_route(link->manager->rtnl, &req,
+ RTM_NEWROUTE, route->family,
+ route->protocol);
+ if (r < 0)
+ return log_error_errno(r, "Could not create RTM_NEWROUTE message: %m");
+
+ if (!in_addr_is_null(route->family, &route->gw)) {
+ if (route->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(req, RTA_GATEWAY, &route->gw.in);
+ else if (route->family == AF_INET6)
+ r = sd_netlink_message_append_in6_addr(req, RTA_GATEWAY, &route->gw.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_GATEWAY attribute: %m");
+
+ r = sd_rtnl_message_route_set_family(req, route->family);
+ if (r < 0)
+ return log_error_errno(r, "Could not set route family: %m");
+ }
+
+ if (route->dst_prefixlen) {
+ if (route->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(req, RTA_DST, &route->dst.in);
+ else if (route->family == AF_INET6)
+ r = sd_netlink_message_append_in6_addr(req, RTA_DST, &route->dst.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_DST attribute: %m");
+
+ r = sd_rtnl_message_route_set_dst_prefixlen(req, route->dst_prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Could not set destination prefix length: %m");
+ }
+
+ if (route->src_prefixlen) {
+ if (route->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(req, RTA_SRC, &route->src.in);
+ else if (route->family == AF_INET6)
+ r = sd_netlink_message_append_in6_addr(req, RTA_SRC, &route->src.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_SRC attribute: %m");
+
+ r = sd_rtnl_message_route_set_src_prefixlen(req, route->src_prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Could not set source prefix length: %m");
+ }
+
+ if (!in_addr_is_null(route->family, &route->prefsrc)) {
+ if (route->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(req, RTA_PREFSRC, &route->prefsrc.in);
+ else if (route->family == AF_INET6)
+ r = sd_netlink_message_append_in6_addr(req, RTA_PREFSRC, &route->prefsrc.in6);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_PREFSRC attribute: %m");
+ }
+
+ r = sd_rtnl_message_route_set_scope(req, route->scope);
+ if (r < 0)
+ return log_error_errno(r, "Could not set scope: %m");
+
+ r = sd_rtnl_message_route_set_flags(req, route->flags);
+ if (r < 0)
+ return log_error_errno(r, "Could not set flags: %m");
+
+ if (route->table != RT_TABLE_MAIN) {
+ if (route->table < 256) {
+ r = sd_rtnl_message_route_set_table(req, route->table);
+ if (r < 0)
+ return log_error_errno(r, "Could not set route table: %m");
+ } else {
+ r = sd_rtnl_message_route_set_table(req, RT_TABLE_UNSPEC);
+ if (r < 0)
+ return log_error_errno(r, "Could not set route table: %m");
+
+ /* Table attribute to allow more than 256. */
+ r = sd_netlink_message_append_data(req, RTA_TABLE, &route->table, sizeof(route->table));
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_TABLE attribute: %m");
+ }
+ }
+
+ r = sd_netlink_message_append_u32(req, RTA_PRIORITY, route->priority);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_PRIORITY attribute: %m");
+
+ r = sd_netlink_message_append_u8(req, RTA_PREF, route->pref);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_PREF attribute: %m");
+
+ if (route->lifetime != USEC_INFINITY && kernel_route_expiration_supported()) {
+ r = sd_netlink_message_append_u32(req, RTA_EXPIRES,
+ DIV_ROUND_UP(usec_sub_unsigned(route->lifetime, now(clock_boottime_or_monotonic())), USEC_PER_SEC));
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_EXPIRES attribute: %m");
+ }
+
+ r = sd_rtnl_message_route_set_type(req, route->type);
+ if (r < 0)
+ return log_error_errno(r, "Could not set route type: %m");
+
+ if (!IN_SET(route->type, RTN_UNREACHABLE, RTN_PROHIBIT, RTN_BLACKHOLE, RTN_THROW)) {
+ r = sd_netlink_message_append_u32(req, RTA_OIF, link->ifindex);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_OIF attribute: %m");
+ }
+
+ r = sd_netlink_message_open_container(req, RTA_METRICS);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_METRICS attribute: %m");
+
+ if (route->mtu > 0) {
+ r = sd_netlink_message_append_u32(req, RTAX_MTU, route->mtu);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTAX_MTU attribute: %m");
+ }
+
+ if (route->initcwnd > 0) {
+ r = sd_netlink_message_append_u32(req, RTAX_INITCWND, route->initcwnd);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTAX_INITCWND attribute: %m");
+ }
+
+ if (route->initrwnd > 0) {
+ r = sd_netlink_message_append_u32(req, RTAX_INITRWND, route->initrwnd);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTAX_INITRWND attribute: %m");
+ }
+
+ if (route->quickack != -1) {
+ r = sd_netlink_message_append_u32(req, RTAX_QUICKACK, route->quickack);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTAX_QUICKACK attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_error_errno(r, "Could not append RTA_METRICS attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, callback,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_error_errno(r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ lifetime = route->lifetime;
+
+ r = route_add(link, route->family, &route->dst, route->dst_prefixlen, route->tos, route->priority, route->table, &route);
+ if (r < 0)
+ return log_error_errno(r, "Could not add route: %m");
+
+ /* TODO: drop expiration handling once it can be pushed into the kernel */
+ route->lifetime = lifetime;
+
+ if (route->lifetime != USEC_INFINITY && !kernel_route_expiration_supported()) {
+ r = sd_event_add_time(link->manager->event, &expire, clock_boottime_or_monotonic(),
+ route->lifetime, 0, route_expire_handler, route);
+ if (r < 0)
+ return log_error_errno(r, "Could not arm expiration timer: %m");
+ }
+
+ sd_event_source_unref(route->expire);
+ route->expire = TAKE_PTR(expire);
+
+ return 0;
+}
+
+int config_parse_gateway(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_freep) Route *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(section, "Network")) {
+ /* we are not in an Route section, so treat
+ * this as the special '0' section */
+ r = route_new_static(network, NULL, 0, &n);
+ } else
+ r = route_new_static(network, filename, section_line, &n);
+
+ if (r < 0)
+ return r;
+
+ r = in_addr_from_string_auto(rvalue, &n->family, &n->gw);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Route is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+
+ return 0;
+}
+
+int config_parse_preferred_src(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_freep) Route *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = in_addr_from_string_auto(rvalue, &n->family, &n->prefsrc);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, EINVAL,
+ "Preferred source is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+
+ return 0;
+}
+
+int config_parse_destination(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_freep) Route *n = NULL;
+ union in_addr_union *buffer;
+ unsigned char *prefixlen;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ if (streq(lvalue, "Destination")) {
+ buffer = &n->dst;
+ prefixlen = &n->dst_prefixlen;
+ } else if (streq(lvalue, "Source")) {
+ buffer = &n->src;
+ prefixlen = &n->src_prefixlen;
+ } else
+ assert_not_reached(lvalue);
+
+ r = in_addr_prefix_from_string_auto(rvalue, &n->family, buffer, prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Route %s= prefix is invalid, ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_priority(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_freep) Route *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = safe_atou32(rvalue, &n->priority);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Could not parse route priority \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_scope(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_freep) Route *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ if (streq(rvalue, "host"))
+ n->scope = RT_SCOPE_HOST;
+ else if (streq(rvalue, "link"))
+ n->scope = RT_SCOPE_LINK;
+ else if (streq(rvalue, "global"))
+ n->scope = RT_SCOPE_UNIVERSE;
+ else {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Unknown route scope: %s", rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_table(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(route_freep) Route *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = safe_atou32(rvalue, &n->table);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Could not parse route table number \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_gateway_onlink(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_freep) Route *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Could not parse gateway onlink \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ SET_FLAG(n->flags, RTNH_F_ONLINK, r);
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_ipv6_route_preference(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_freep) Route *n = NULL;
+ int r;
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ if (streq(rvalue, "low"))
+ n->pref = ICMPV6_ROUTER_PREF_LOW;
+ else if (streq(rvalue, "medium"))
+ n->pref = ICMPV6_ROUTER_PREF_MEDIUM;
+ else if (streq(rvalue, "high"))
+ n->pref = ICMPV6_ROUTER_PREF_HIGH;
+ else {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Unknown route preference: %s", rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_protocol(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_freep) Route *n = NULL;
+ int r;
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ if (streq(rvalue, "kernel"))
+ n->protocol = RTPROT_KERNEL;
+ else if (streq(rvalue, "boot"))
+ n->protocol = RTPROT_BOOT;
+ else if (streq(rvalue, "static"))
+ n->protocol = RTPROT_STATIC;
+ else {
+ r = safe_atou8(rvalue , &n->protocol);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Could not parse route protocol \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_type(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_freep) Route *n = NULL;
+ int r;
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ if (streq(rvalue, "unicast"))
+ n->type = RTN_UNICAST;
+ else if (streq(rvalue, "blackhole"))
+ n->type = RTN_BLACKHOLE;
+ else if (streq(rvalue, "unreachable"))
+ n->type = RTN_UNREACHABLE;
+ else if (streq(rvalue, "prohibit"))
+ n->type = RTN_PROHIBIT;
+ else if (streq(rvalue, "throw"))
+ n->type = RTN_THROW;
+ else {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Could not parse route type \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_tcp_window(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(route_freep) Route *n = NULL;
+ Network *network = userdata;
+ uint64_t k;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = parse_size(rvalue, 1024, &k);
+ if (r < 0 || k > UINT32_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Could not parse TCP %s \"%s\" bytes, ignoring assignment: %m", rvalue, lvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "InitialCongestionWindow"))
+ n->initcwnd = k;
+ else if (streq(lvalue, "InitialAdvertisedReceiveWindow"))
+ n->initrwnd = k;
+ else {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse TCP %s: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_quickack(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(route_freep) Route *n = NULL;
+ Network *network = userdata;
+ int k, r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ k = parse_boolean(rvalue);
+ if (k < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, k, "Failed to parse TCP quickack, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n->quickack = !!k;
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_mtu(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_freep) Route *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = config_parse_mtu(unit, filename, line, section, section_line, lvalue, ltype, rvalue, &n->mtu, userdata);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(n);
+ return 0;
+}
diff --git a/src/network/networkd-route.h b/src/network/networkd-route.h
new file mode 100644
index 0000000..4eddf51
--- /dev/null
+++ b/src/network/networkd-route.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "conf-parser.h"
+
+typedef struct Route Route;
+typedef struct NetworkConfigSection NetworkConfigSection;
+
+#include "networkd-network.h"
+
+struct Route {
+ Network *network;
+ NetworkConfigSection *section;
+
+ Link *link;
+
+ int family;
+ int quickack;
+
+ unsigned char dst_prefixlen;
+ unsigned char src_prefixlen;
+ unsigned char scope;
+ unsigned char protocol; /* RTPROT_* */
+ unsigned char type; /* RTN_* */
+ unsigned char tos;
+ uint32_t priority; /* note that ip(8) calls this 'metric' */
+ uint32_t table;
+ uint32_t mtu;
+ uint32_t initcwnd;
+ uint32_t initrwnd;
+ unsigned char pref;
+ unsigned flags;
+
+ union in_addr_union gw;
+ union in_addr_union dst;
+ union in_addr_union src;
+ union in_addr_union prefsrc;
+
+ usec_t lifetime;
+ sd_event_source *expire;
+
+ LIST_FIELDS(Route, routes);
+};
+
+int route_new_static(Network *network, const char *filename, unsigned section_line, Route **ret);
+int route_new(Route **ret);
+void route_free(Route *route);
+int route_configure(Route *route, Link *link, link_netlink_message_handler_t callback);
+int route_remove(Route *route, Link *link, link_netlink_message_handler_t callback);
+
+int route_get(Link *link, int family, const union in_addr_union *dst, unsigned char dst_prefixlen, unsigned char tos, uint32_t priority, uint32_t table, Route **ret);
+int route_add(Link *link, int family, const union in_addr_union *dst, unsigned char dst_prefixlen, unsigned char tos, uint32_t priority, uint32_t table, Route **ret);
+int route_add_foreign(Link *link, int family, const union in_addr_union *dst, unsigned char dst_prefixlen, unsigned char tos, uint32_t priority, uint32_t table, Route **ret);
+void route_update(Route *route, const union in_addr_union *src, unsigned char src_prefixlen, const union in_addr_union *gw, const union in_addr_union *prefsrc, unsigned char scope, unsigned char protocol, unsigned char type);
+bool route_equal(Route *r1, Route *r2);
+
+int route_expire_handler(sd_event_source *s, uint64_t usec, void *userdata);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Route*, route_free);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_gateway);
+CONFIG_PARSER_PROTOTYPE(config_parse_preferred_src);
+CONFIG_PARSER_PROTOTYPE(config_parse_destination);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_priority);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_scope);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_table);
+CONFIG_PARSER_PROTOTYPE(config_parse_gateway_onlink);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv6_route_preference);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_protocol);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_type);
+CONFIG_PARSER_PROTOTYPE(config_parse_tcp_window);
+CONFIG_PARSER_PROTOTYPE(config_parse_quickack);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_mtu);
diff --git a/src/network/networkd-routing-policy-rule.c b/src/network/networkd-routing-policy-rule.c
new file mode 100644
index 0000000..2dc7862
--- /dev/null
+++ b/src/network/networkd-routing-policy-rule.c
@@ -0,0 +1,1274 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+#include <linux/fib_rules.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "fileio.h"
+#include "ip-protocol-list.h"
+#include "networkd-routing-policy-rule.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int routing_policy_rule_new(RoutingPolicyRule **ret) {
+ RoutingPolicyRule *rule;
+
+ rule = new(RoutingPolicyRule, 1);
+ if (!rule)
+ return -ENOMEM;
+
+ *rule = (RoutingPolicyRule) {
+ .family = AF_INET,
+ .table = RT_TABLE_MAIN,
+ };
+
+ *ret = rule;
+ return 0;
+}
+
+void routing_policy_rule_free(RoutingPolicyRule *rule) {
+
+ if (!rule)
+ return;
+
+ if (rule->network) {
+ LIST_REMOVE(rules, rule->network->rules, rule);
+ assert(rule->network->n_rules > 0);
+ rule->network->n_rules--;
+
+ if (rule->section)
+ hashmap_remove(rule->network->rules_by_section, rule->section);
+ }
+
+ if (rule->manager) {
+ set_remove(rule->manager->rules, rule);
+ set_remove(rule->manager->rules_foreign, rule);
+ }
+
+ network_config_section_free(rule->section);
+ free(rule->iif);
+ free(rule->oif);
+ free(rule);
+}
+
+static void routing_policy_rule_hash_func(const RoutingPolicyRule *rule, struct siphash *state) {
+ assert(rule);
+
+ siphash24_compress(&rule->family, sizeof(rule->family), state);
+
+ switch (rule->family) {
+ case AF_INET:
+ case AF_INET6:
+
+ siphash24_compress(&rule->from, FAMILY_ADDRESS_SIZE(rule->family), state);
+ siphash24_compress(&rule->from_prefixlen, sizeof(rule->from_prefixlen), state);
+
+ siphash24_compress(&rule->to, FAMILY_ADDRESS_SIZE(rule->family), state);
+ siphash24_compress(&rule->to_prefixlen, sizeof(rule->to_prefixlen), state);
+
+ siphash24_compress(&rule->tos, sizeof(rule->tos), state);
+ siphash24_compress(&rule->fwmark, sizeof(rule->fwmark), state);
+ siphash24_compress(&rule->table, sizeof(rule->table), state);
+
+ siphash24_compress(&rule->protocol, sizeof(rule->protocol), state);
+ siphash24_compress(&rule->sport, sizeof(rule->sport), state);
+ siphash24_compress(&rule->dport, sizeof(rule->dport), state);
+
+ if (rule->iif)
+ siphash24_compress(rule->iif, strlen(rule->iif), state);
+
+ if (rule->oif)
+ siphash24_compress(rule->oif, strlen(rule->oif), state);
+
+ break;
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ break;
+ }
+}
+
+static int routing_policy_rule_compare_func(const RoutingPolicyRule *a, const RoutingPolicyRule *b) {
+ int r;
+
+ r = CMP(a->family, b->family);
+ if (r != 0)
+ return r;
+
+ switch (a->family) {
+ case AF_INET:
+ case AF_INET6:
+ r = CMP(a->from_prefixlen, b->from_prefixlen);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->to_prefixlen, b->to_prefixlen);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->tos, b->tos);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->fwmask, b->fwmask);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->table, b->table);
+ if (r != 0)
+ return r;
+
+ r = strcmp_ptr(a->iif, b->iif);
+ if (!r)
+ return r;
+
+ r = strcmp_ptr(a->oif, b->oif);
+ if (!r)
+ return r;
+
+ r = CMP(a->protocol, b->protocol);
+ if (r != 0)
+ return r;
+
+ r = memcmp(&a->sport, &b->sport, sizeof(a->sport));
+ if (r != 0)
+ return r;
+
+ r = memcmp(&a->dport, &b->dport, sizeof(a->dport));
+ if (r != 0)
+ return r;
+
+ r = memcmp(&a->from, &b->from, FAMILY_ADDRESS_SIZE(a->family));
+ if (r != 0)
+ return r;
+
+ return memcmp(&a->to, &b->to, FAMILY_ADDRESS_SIZE(a->family));
+
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ return 0;
+ }
+}
+
+DEFINE_PRIVATE_HASH_OPS(routing_policy_rule_hash_ops, RoutingPolicyRule, routing_policy_rule_hash_func, routing_policy_rule_compare_func);
+
+int routing_policy_rule_get(Manager *m,
+ int family,
+ const union in_addr_union *from,
+ uint8_t from_prefixlen,
+ const union in_addr_union *to,
+ uint8_t to_prefixlen,
+ uint8_t tos,
+ uint32_t fwmark,
+ uint32_t table,
+ const char *iif,
+ const char *oif,
+ uint8_t protocol,
+ struct fib_rule_port_range *sport,
+ struct fib_rule_port_range *dport,
+ RoutingPolicyRule **ret) {
+
+ RoutingPolicyRule rule, *existing;
+
+ assert_return(m, -1);
+
+ rule = (RoutingPolicyRule) {
+ .family = family,
+ .from = *from,
+ .from_prefixlen = from_prefixlen,
+ .to = *to,
+ .to_prefixlen = to_prefixlen,
+ .tos = tos,
+ .fwmark = fwmark,
+ .table = table,
+ .iif = (char*) iif,
+ .oif = (char*) oif,
+ .protocol = protocol,
+ .sport = *sport,
+ .dport = *dport,
+ };
+
+ existing = set_get(m->rules, &rule);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 1;
+ }
+
+ existing = set_get(m->rules_foreign, &rule);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+int routing_policy_rule_make_local(Manager *m, RoutingPolicyRule *rule) {
+ int r;
+
+ assert(m);
+
+ if (set_contains(m->rules_foreign, rule)) {
+ set_remove(m->rules_foreign, rule);
+
+ r = set_ensure_allocated(&m->rules, &routing_policy_rule_hash_ops);
+ if (r < 0)
+ return r;
+
+ return set_put(m->rules, rule);
+ }
+
+ return -ENOENT;
+}
+
+static int routing_policy_rule_add_internal(Manager *m,
+ Set **rules,
+ int family,
+ const union in_addr_union *from,
+ uint8_t from_prefixlen,
+ const union in_addr_union *to,
+ uint8_t to_prefixlen,
+ uint8_t tos,
+ uint32_t fwmark,
+ uint32_t table,
+ const char *_iif,
+ const char *_oif,
+ uint8_t protocol,
+ const struct fib_rule_port_range *sport,
+ const struct fib_rule_port_range *dport,
+ RoutingPolicyRule **ret) {
+
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *rule = NULL;
+ _cleanup_free_ char *iif = NULL, *oif = NULL;
+ int r;
+
+ assert_return(rules, -EINVAL);
+
+ if (_iif) {
+ iif = strdup(_iif);
+ if (!iif)
+ return -ENOMEM;
+ }
+
+ if (_oif) {
+ oif = strdup(_oif);
+ if (!oif)
+ return -ENOMEM;
+ }
+
+ r = routing_policy_rule_new(&rule);
+ if (r < 0)
+ return r;
+
+ rule->manager = m;
+ rule->family = family;
+ rule->from = *from;
+ rule->from_prefixlen = from_prefixlen;
+ rule->to = *to;
+ rule->to_prefixlen = to_prefixlen;
+ rule->tos = tos;
+ rule->fwmark = fwmark;
+ rule->table = table;
+ rule->iif = iif;
+ rule->oif = oif;
+ rule->protocol = protocol;
+ rule->sport = *sport;
+ rule->dport = *dport;
+
+ r = set_ensure_allocated(rules, &routing_policy_rule_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put(*rules, rule);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = rule;
+
+ rule = NULL;
+ iif = oif = NULL;
+
+ return 0;
+}
+
+int routing_policy_rule_add(Manager *m,
+ int family,
+ const union in_addr_union *from,
+ uint8_t from_prefixlen,
+ const union in_addr_union *to,
+ uint8_t to_prefixlen,
+ uint8_t tos,
+ uint32_t fwmark,
+ uint32_t table,
+ const char *iif,
+ const char *oif,
+ uint8_t protocol,
+ const struct fib_rule_port_range *sport,
+ const struct fib_rule_port_range *dport,
+ RoutingPolicyRule **ret) {
+
+ return routing_policy_rule_add_internal(m, &m->rules, family, from, from_prefixlen, to, to_prefixlen, tos, fwmark, table, iif, oif, protocol, sport, dport, ret);
+}
+
+int routing_policy_rule_add_foreign(Manager *m,
+ int family,
+ const union in_addr_union *from,
+ uint8_t from_prefixlen,
+ const union in_addr_union *to,
+ uint8_t to_prefixlen,
+ uint8_t tos,
+ uint32_t fwmark,
+ uint32_t table,
+ const char *iif,
+ const char *oif,
+ uint8_t protocol,
+ const struct fib_rule_port_range *sport,
+ const struct fib_rule_port_range *dport,
+ RoutingPolicyRule **ret) {
+ return routing_policy_rule_add_internal(m, &m->rules_foreign, family, from, from_prefixlen, to, to_prefixlen, tos, fwmark, table, iif, oif, protocol, sport, dport, ret);
+}
+
+static int routing_policy_rule_remove_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+
+ link->routing_policy_rule_remove_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not drop routing policy rule: %m");
+
+ return 1;
+}
+
+int routing_policy_rule_remove(RoutingPolicyRule *routing_policy_rule, Link *link, link_netlink_message_handler_t callback) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(routing_policy_rule);
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(link->ifindex > 0);
+ assert(IN_SET(routing_policy_rule->family, AF_INET, AF_INET6));
+
+ r = sd_rtnl_message_new_routing_policy_rule(link->manager->rtnl, &m, RTM_DELRULE, routing_policy_rule->family);
+ if (r < 0)
+ return log_error_errno(r, "Could not allocate RTM_DELRULE message: %m");
+
+ if (!in_addr_is_null(routing_policy_rule->family, &routing_policy_rule->from)) {
+ if (routing_policy_rule->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(m, FRA_SRC, &routing_policy_rule->from.in);
+ else
+ r = sd_netlink_message_append_in6_addr(m, FRA_SRC, &routing_policy_rule->from.in6);
+
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_SRC attribute: %m");
+
+ r = sd_rtnl_message_routing_policy_rule_set_rtm_src_prefixlen(m, routing_policy_rule->from_prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Could not set source prefix length: %m");
+ }
+
+ if (!in_addr_is_null(routing_policy_rule->family, &routing_policy_rule->to)) {
+ if (routing_policy_rule->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(m, FRA_DST, &routing_policy_rule->to.in);
+ else
+ r = sd_netlink_message_append_in6_addr(m, FRA_DST, &routing_policy_rule->to.in6);
+
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_DST attribute: %m");
+
+ r = sd_rtnl_message_routing_policy_rule_set_rtm_dst_prefixlen(m, routing_policy_rule->to_prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Could not set destination prefix length: %m");
+ }
+
+ r = netlink_call_async(link->manager->rtnl, NULL, m,
+ callback ?: routing_policy_rule_remove_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_error_errno(r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int routing_policy_rule_new_static(Network *network, const char *filename, unsigned section_line, RoutingPolicyRule **ret) {
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *rule = NULL;
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(!!filename == (section_line > 0));
+
+ if (filename) {
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ rule = hashmap_get(network->rules_by_section, n);
+ if (rule) {
+ *ret = TAKE_PTR(rule);
+
+ return 0;
+ }
+ }
+
+ r = routing_policy_rule_new(&rule);
+ if (r < 0)
+ return r;
+
+ rule->network = network;
+ LIST_APPEND(rules, network->rules, rule);
+ network->n_rules++;
+
+ if (filename) {
+ rule->section = TAKE_PTR(n);
+
+ r = hashmap_ensure_allocated(&network->rules_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->rules_by_section, rule->section, rule);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(rule);
+
+ return 0;
+}
+
+static int routing_policy_rule_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(rtnl);
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+ assert(link->routing_policy_rule_messages > 0);
+
+ link->routing_policy_rule_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_warning_errno(link, r, "Could not add routing policy rule: %m");
+
+ if (link->routing_policy_rule_messages == 0) {
+ log_link_debug(link, "Routing policy rule configured");
+ link->routing_policy_rules_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+int routing_policy_rule_configure(RoutingPolicyRule *rule, Link *link, link_netlink_message_handler_t callback, bool update) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(rule);
+ assert(link);
+ assert(link->ifindex > 0);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ r = sd_rtnl_message_new_routing_policy_rule(link->manager->rtnl, &m, RTM_NEWRULE, rule->family);
+ if (r < 0)
+ return log_error_errno(r, "Could not allocate RTM_NEWRULE message: %m");
+
+ if (!in_addr_is_null(rule->family, &rule->from)) {
+ if (rule->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(m, FRA_SRC, &rule->from.in);
+ else
+ r = sd_netlink_message_append_in6_addr(m, FRA_SRC, &rule->from.in6);
+
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_SRC attribute: %m");
+
+ r = sd_rtnl_message_routing_policy_rule_set_rtm_src_prefixlen(m, rule->from_prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Could not set source prefix length: %m");
+ }
+
+ if (!in_addr_is_null(rule->family, &rule->to)) {
+ if (rule->family == AF_INET)
+ r = sd_netlink_message_append_in_addr(m, FRA_DST, &rule->to.in);
+ else
+ r = sd_netlink_message_append_in6_addr(m, FRA_DST, &rule->to.in6);
+
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_DST attribute: %m");
+
+ r = sd_rtnl_message_routing_policy_rule_set_rtm_dst_prefixlen(m, rule->to_prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Could not set destination prefix length: %m");
+ }
+
+ r = sd_netlink_message_append_u32(m, FRA_PRIORITY, rule->priority);
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_PRIORITY attribute: %m");
+
+ if (rule->tos > 0) {
+ r = sd_rtnl_message_routing_policy_rule_set_tos(m, rule->tos);
+ if (r < 0)
+ return log_error_errno(r, "Could not set ip rule tos: %m");
+ }
+
+ if (rule->table < 256) {
+ r = sd_rtnl_message_routing_policy_rule_set_table(m, rule->table);
+ if (r < 0)
+ return log_error_errno(r, "Could not set ip rule table: %m");
+ } else {
+ r = sd_rtnl_message_routing_policy_rule_set_table(m, RT_TABLE_UNSPEC);
+ if (r < 0)
+ return log_error_errno(r, "Could not set ip rule table: %m");
+
+ r = sd_netlink_message_append_u32(m, FRA_TABLE, rule->table);
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_TABLE attribute: %m");
+ }
+
+ if (rule->fwmark > 0) {
+ r = sd_netlink_message_append_u32(m, FRA_FWMARK, rule->fwmark);
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_FWMARK attribute: %m");
+ }
+
+ if (rule->fwmask > 0) {
+ r = sd_netlink_message_append_u32(m, FRA_FWMASK, rule->fwmask);
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_FWMASK attribute: %m");
+ }
+
+ if (rule->iif) {
+ r = sd_netlink_message_append_string(m, FRA_IFNAME, rule->iif);
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_IFNAME attribute: %m");
+ }
+
+ if (rule->oif) {
+ r = sd_netlink_message_append_string(m, FRA_OIFNAME, rule->oif);
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_OIFNAME attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u8(m, FRA_IP_PROTO, rule->protocol);
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_IP_PROTO attribute: %m");
+
+ if (rule->sport.start != 0 || rule->sport.end != 0) {
+ r = sd_netlink_message_append_data(m, FRA_SPORT_RANGE, &rule->sport, sizeof(rule->sport));
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_SPORT_RANGE attribute: %m");
+ }
+
+ if (rule->dport.start != 0 || rule->dport.end != 0) {
+ r = sd_netlink_message_append_data(m, FRA_DPORT_RANGE, &rule->dport, sizeof(rule->dport));
+ if (r < 0)
+ return log_error_errno(r, "Could not append FRA_DPORT_RANGE attribute: %m");
+ }
+
+ if (rule->invert_rule) {
+ r = sd_rtnl_message_routing_policy_rule_set_flags(m, FIB_RULE_INVERT);
+ if (r < 0)
+ return log_error_errno(r, "Could not append FIB_RULE_INVERT attribute: %m");
+ }
+
+ rule->link = link;
+
+ r = netlink_call_async(link->manager->rtnl, NULL, m,
+ callback ?: routing_policy_rule_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_error_errno(r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ r = routing_policy_rule_add(link->manager, rule->family, &rule->from, rule->from_prefixlen, &rule->to,
+ rule->to_prefixlen, rule->tos, rule->fwmark, rule->table, rule->iif, rule->oif, rule->protocol, &rule->sport, &rule->dport, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not add rule: %m");
+
+ return 0;
+}
+
+static int parse_fwmark_fwmask(const char *s, uint32_t *fwmark, uint32_t *fwmask) {
+ _cleanup_free_ char *f = NULL;
+ char *p;
+ int r;
+
+ assert(s);
+
+ f = strdup(s);
+ if (!f)
+ return -ENOMEM;
+
+ p = strchr(f, '/');
+ if (p)
+ *p++ = '\0';
+
+ r = safe_atou32(f, fwmark);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse RPDB rule firewall mark, ignoring: %s", f);
+
+ if (p) {
+ r = safe_atou32(p, fwmask);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse RPDB rule mask, ignoring: %s", f);
+ }
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_tos(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = safe_atou8(rvalue, &n->tos);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse RPDB rule tos, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_priority(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = safe_atou32(rvalue, &n->priority);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse RPDB rule priority, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_table(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = safe_atou32(rvalue, &n->table);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse RPDB rule table, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_fwmark_mask(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = parse_fwmark_fwmask(rvalue, &n->fwmark, &n->fwmask);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse RPDB rule firewall mark or mask, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_prefix(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ union in_addr_union *buffer;
+ uint8_t *prefixlen;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ if (streq(lvalue, "To")) {
+ buffer = &n->to;
+ prefixlen = &n->to_prefixlen;
+ } else {
+ buffer = &n->from;
+ prefixlen = &n->from_prefixlen;
+ }
+
+ r = in_addr_prefix_from_string_auto(rvalue, &n->family, buffer, prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "RPDB rule prefix is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_device(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ if (!ifname_valid(rvalue)) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse '%s' interface name, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "IncomingInterface")) {
+ r = free_and_strdup(&n->iif, rvalue);
+ if (r < 0)
+ return log_oom();
+ } else {
+ r = free_and_strdup(&n->oif, rvalue);
+ if (r < 0)
+ return log_oom();
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_port_range(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ uint16_t low, high;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = parse_ip_port_range(rvalue, &low, &high);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse routing policy rule port range '%s'", rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "SourcePort")) {
+ n->sport.start = low;
+ n->sport.end = high;
+ } else {
+ n->dport.start = low;
+ n->dport.end = high;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_ip_protocol(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = parse_ip_protocol(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse IP protocol '%s' for routing policy rule, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ n->protocol = r;
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_invert(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse RPDB rule invert, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n->invert_rule = r;
+
+ n = NULL;
+
+ return 0;
+}
+
+static int routing_policy_rule_read_full_file(const char *state_file, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t size;
+ int r;
+
+ assert(state_file);
+
+ r = read_full_file(state_file, &s, &size);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (size <= 0)
+ return -ENODATA;
+
+ *ret = TAKE_PTR(s);
+
+ return size;
+}
+
+int routing_policy_serialize_rules(Set *rules, FILE *f) {
+ RoutingPolicyRule *rule = NULL;
+ Iterator i;
+ int r;
+
+ assert(f);
+
+ SET_FOREACH(rule, rules, i) {
+ _cleanup_free_ char *from_str = NULL, *to_str = NULL;
+ bool space = false;
+
+ fputs("RULE=", f);
+
+ if (!in_addr_is_null(rule->family, &rule->from)) {
+ r = in_addr_to_string(rule->family, &rule->from, &from_str);
+ if (r < 0)
+ return r;
+
+ fprintf(f, "from=%s/%hhu",
+ from_str, rule->from_prefixlen);
+ space = true;
+ }
+
+ if (!in_addr_is_null(rule->family, &rule->to)) {
+ r = in_addr_to_string(rule->family, &rule->to, &to_str);
+ if (r < 0)
+ return r;
+
+ fprintf(f, "%sto=%s/%hhu",
+ space ? " " : "",
+ to_str, rule->to_prefixlen);
+ space = true;
+ }
+
+ if (rule->tos != 0) {
+ fprintf(f, "%stos=%hhu",
+ space ? " " : "",
+ rule->tos);
+ space = true;
+ }
+
+ if (rule->fwmark != 0) {
+ fprintf(f, "%sfwmark=%"PRIu32"/%"PRIu32,
+ space ? " " : "",
+ rule->fwmark, rule->fwmask);
+ space = true;
+ }
+
+ if (rule->iif) {
+ fprintf(f, "%siif=%s",
+ space ? " " : "",
+ rule->iif);
+ space = true;
+ }
+
+ if (rule->oif) {
+ fprintf(f, "%soif=%s",
+ space ? " " : "",
+ rule->oif);
+ space = true;
+ }
+
+ if (rule->protocol != 0) {
+ fprintf(f, "%sprotocol=%hhu",
+ space ? " " : "",
+ rule->protocol);
+ space = true;
+ }
+
+ if (rule->sport.start != 0 || rule->sport.end != 0) {
+ fprintf(f, "%ssourcesport=%"PRIu16"-%"PRIu16,
+ space ? " " : "",
+ rule->sport.start, rule->sport.end);
+ space = true;
+ }
+
+ if (rule->dport.start != 0 || rule->dport.end != 0) {
+ fprintf(f, "%sdestinationport=%"PRIu16"-%"PRIu16,
+ space ? " " : "",
+ rule->dport.start, rule->dport.end);
+ space = true;
+ }
+
+ fprintf(f, "%stable=%"PRIu32 "\n",
+ space ? " " : "",
+ rule->table);
+ }
+
+ return 0;
+}
+
+int routing_policy_load_rules(const char *state_file, Set **rules) {
+ _cleanup_strv_free_ char **l = NULL;
+ _cleanup_free_ char *data = NULL;
+ uint16_t low = 0, high = 0;
+ const char *p;
+ char **i;
+ int r;
+
+ assert(state_file);
+ assert(rules);
+
+ r = routing_policy_rule_read_full_file(state_file, &data);
+ if (r <= 0)
+ return r;
+
+ l = strv_split_newlines(data);
+ if (!l)
+ return -ENOMEM;
+
+ r = set_ensure_allocated(rules, &routing_policy_rule_hash_ops);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, l) {
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *rule = NULL;
+
+ p = startswith(*i, "RULE=");
+ if (!p)
+ continue;
+
+ r = routing_policy_rule_new(&rule);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *a = NULL, *b = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = split_pair(word, "=", &a, &b);
+ if (r < 0)
+ continue;
+
+ if (STR_IN_SET(a, "from", "to")) {
+ union in_addr_union *buffer;
+ uint8_t *prefixlen;
+
+ if (streq(a, "to")) {
+ buffer = &rule->to;
+ prefixlen = &rule->to_prefixlen;
+ } else {
+ buffer = &rule->from;
+ prefixlen = &rule->from_prefixlen;
+ }
+
+ r = in_addr_prefix_from_string_auto(b, &rule->family, buffer, prefixlen);
+ if (r < 0) {
+ log_error_errno(r, "RPDB rule prefix is invalid, ignoring assignment: %s", b);
+ continue;
+ }
+
+ } else if (streq(a, "tos")) {
+ r = safe_atou8(b, &rule->tos);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse RPDB rule tos, ignoring: %s", b);
+ continue;
+ }
+ } else if (streq(a, "table")) {
+ r = safe_atou32(b, &rule->table);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse RPDB rule table, ignoring: %s", b);
+ continue;
+ }
+ } else if (streq(a, "fwmark")) {
+
+ r = parse_fwmark_fwmask(b, &rule->fwmark, &rule->fwmask);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse RPDB rule firewall mark or mask, ignoring: %s", a);
+ continue;
+ }
+ } else if (streq(a, "iif")) {
+
+ if (free_and_strdup(&rule->iif, b) < 0)
+ return log_oom();
+
+ } else if (streq(a, "oif")) {
+
+ if (free_and_strdup(&rule->oif, b) < 0)
+ return log_oom();
+ } else if (streq(a, "protocol")) {
+ r = safe_atou8(b, &rule->protocol);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse RPDB rule protocol, ignoring: %s", b);
+ continue;
+ }
+ } else if (streq(a, "sourceport")) {
+
+ r = parse_ip_port_range(b, &low, &high);
+ if (r < 0) {
+ log_error_errno(r, "Invalid routing policy rule source port range, ignoring assignment:'%s'", b);
+ continue;
+ }
+
+ rule->sport.start = low;
+ rule->sport.end = high;
+
+ } else if (streq(a, "destinationport")) {
+
+ r = parse_ip_port_range(b, &low, &high);
+ if (r < 0) {
+ log_error_errno(r, "Invalid routing policy rule destination port range, ignoring assignment:'%s'", b);
+ continue;
+ }
+
+ rule->dport.start = low;
+ rule->dport.end = high;
+ }
+ }
+
+ r = set_put(*rules, rule);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to add RPDB rule to saved DB, ignoring: %s", p);
+ continue;
+ }
+
+ rule = NULL;
+ }
+
+ return 0;
+}
+
+void routing_policy_rule_purge(Manager *m, Link *link) {
+ RoutingPolicyRule *rule, *existing;
+ Iterator i;
+ int r;
+
+ assert(m);
+ assert(link);
+
+ SET_FOREACH(rule, m->rules_saved, i) {
+ existing = set_get(m->rules_foreign, rule);
+ if (existing) {
+
+ r = routing_policy_rule_remove(rule, link, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "Could not remove routing policy rules: %m");
+ continue;
+ }
+
+ link->routing_policy_rule_remove_messages++;
+ }
+ }
+}
diff --git a/src/network/networkd-routing-policy-rule.h b/src/network/networkd-routing-policy-rule.h
new file mode 100644
index 0000000..b35126e
--- /dev/null
+++ b/src/network/networkd-routing-policy-rule.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+#include <linux/fib_rules.h>
+#include <stdbool.h>
+
+#include "in-addr-util.h"
+#include "conf-parser.h"
+#include "missing_fib_rules.h"
+
+typedef struct RoutingPolicyRule RoutingPolicyRule;
+
+#include "networkd-link.h"
+#include "networkd-network.h"
+
+typedef struct Network Network;
+typedef struct Link Link;
+typedef struct NetworkConfigSection NetworkConfigSection;
+typedef struct Manager Manager;
+
+struct RoutingPolicyRule {
+ Manager *manager;
+ Network *network;
+ Link *link;
+ NetworkConfigSection *section;
+
+ bool invert_rule;
+
+ uint8_t tos;
+ uint8_t protocol;
+
+ uint32_t table;
+ uint32_t fwmark;
+ uint32_t fwmask;
+ uint32_t priority;
+
+ int family;
+ unsigned char to_prefixlen;
+ unsigned char from_prefixlen;
+
+ char *iif;
+ char *oif;
+
+ union in_addr_union to;
+ union in_addr_union from;
+
+ struct fib_rule_port_range sport;
+ struct fib_rule_port_range dport;
+
+ LIST_FIELDS(RoutingPolicyRule, rules);
+};
+
+int routing_policy_rule_new(RoutingPolicyRule **ret);
+void routing_policy_rule_free(RoutingPolicyRule *rule);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(RoutingPolicyRule*, routing_policy_rule_free);
+
+int routing_policy_rule_configure(RoutingPolicyRule *address, Link *link, link_netlink_message_handler_t callback, bool update);
+int routing_policy_rule_remove(RoutingPolicyRule *routing_policy_rule, Link *link, link_netlink_message_handler_t callback);
+
+int routing_policy_rule_add(Manager *m, int family, const union in_addr_union *from, uint8_t from_prefixlen, const union in_addr_union *to, uint8_t to_prefixlen,
+ uint8_t tos, uint32_t fwmark, uint32_t table, const char *iif, const char *oif, uint8_t protocol, const struct fib_rule_port_range *sport,
+ const struct fib_rule_port_range *dport, RoutingPolicyRule **ret);
+int routing_policy_rule_add_foreign(Manager *m, int family, const union in_addr_union *from, uint8_t from_prefixlen, const union in_addr_union *to, uint8_t to_prefixlen,
+ uint8_t tos, uint32_t fwmark, uint32_t table, const char *iif, const char *oif, uint8_t protocol, const struct fib_rule_port_range *sport,
+ const struct fib_rule_port_range *dport, RoutingPolicyRule **ret);
+int routing_policy_rule_get(Manager *m, int family, const union in_addr_union *from, uint8_t from_prefixlen, const union in_addr_union *to, uint8_t to_prefixlen, uint8_t tos,
+ uint32_t fwmark, uint32_t table, const char *iif, const char *oif, uint8_t protocol, struct fib_rule_port_range *sport,
+ struct fib_rule_port_range *dport, RoutingPolicyRule **ret);
+int routing_policy_rule_make_local(Manager *m, RoutingPolicyRule *rule);
+int routing_policy_serialize_rules(Set *rules, FILE *f);
+int routing_policy_load_rules(const char *state_file, Set **rules);
+void routing_policy_rule_purge(Manager *m, Link *link);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_tos);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_table);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_fwmark_mask);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_prefix);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_priority);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_device);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_port_range);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_ip_protocol);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_invert);
diff --git a/src/network/networkd-util.c b/src/network/networkd-util.c
new file mode 100644
index 0000000..9b6bc12
--- /dev/null
+++ b/src/network/networkd-util.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "condition.h"
+#include "conf-parser.h"
+#include "networkd-util.h"
+#include "parse-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "util.h"
+
+const char *address_family_boolean_to_string(AddressFamilyBoolean b) {
+ if (IN_SET(b, ADDRESS_FAMILY_YES, ADDRESS_FAMILY_NO))
+ return yes_no(b == ADDRESS_FAMILY_YES);
+
+ if (b == ADDRESS_FAMILY_IPV4)
+ return "ipv4";
+ if (b == ADDRESS_FAMILY_IPV6)
+ return "ipv6";
+
+ return NULL;
+}
+
+AddressFamilyBoolean address_family_boolean_from_string(const char *s) {
+ int r;
+
+ /* Make this a true superset of a boolean */
+
+ r = parse_boolean(s);
+ if (r > 0)
+ return ADDRESS_FAMILY_YES;
+ if (r == 0)
+ return ADDRESS_FAMILY_NO;
+
+ if (streq(s, "ipv4"))
+ return ADDRESS_FAMILY_IPV4;
+ if (streq(s, "ipv6"))
+ return ADDRESS_FAMILY_IPV6;
+
+ return _ADDRESS_FAMILY_BOOLEAN_INVALID;
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_address_family_boolean, address_family_boolean, AddressFamilyBoolean, "Failed to parse option");
+
+int config_parse_address_family_boolean_with_kernel(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ AddressFamilyBoolean *fwd = data, s;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* This function is mostly obsolete now. It simply redirects
+ * "kernel" to "no". In older networkd versions we used to
+ * distuingish IPForward=off from IPForward=kernel, where the
+ * former would explicitly turn off forwarding while the
+ * latter would simply not touch the setting. But that logic
+ * is gone, hence silently accept the old setting, but turn it
+ * to "no". */
+
+ s = address_family_boolean_from_string(rvalue);
+ if (s < 0) {
+ if (streq(rvalue, "kernel"))
+ s = ADDRESS_FAMILY_NO;
+ else {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse IPForward= option, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ *fwd = s;
+
+ return 0;
+}
+
+/* Router lifetime can be set with netlink interface since kernel >= 4.5
+ * so for the supported kernel we dont need to expire routes in userspace */
+int kernel_route_expiration_supported(void) {
+ static int cached = -1;
+ int r;
+
+ if (cached < 0) {
+ Condition c = {
+ .type = CONDITION_KERNEL_VERSION,
+ .parameter = (char *) ">= 4.5"
+ };
+ r = condition_test(&c);
+ if (r < 0)
+ return r;
+
+ cached = r;
+ }
+ return cached;
+}
diff --git a/src/network/networkd-util.h b/src/network/networkd-util.h
new file mode 100644
index 0000000..435fdb7
--- /dev/null
+++ b/src/network/networkd-util.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "conf-parser.h"
+#include "macro.h"
+
+typedef enum AddressFamilyBoolean {
+ /* This is a bitmask, though it usually doesn't feel that way! */
+ ADDRESS_FAMILY_NO = 0,
+ ADDRESS_FAMILY_IPV4 = 1,
+ ADDRESS_FAMILY_IPV6 = 2,
+ ADDRESS_FAMILY_YES = 3,
+ _ADDRESS_FAMILY_BOOLEAN_MAX,
+ _ADDRESS_FAMILY_BOOLEAN_INVALID = -1,
+} AddressFamilyBoolean;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_address_family_boolean);
+CONFIG_PARSER_PROTOTYPE(config_parse_address_family_boolean_with_kernel);
+
+const char *address_family_boolean_to_string(AddressFamilyBoolean b) _const_;
+AddressFamilyBoolean address_family_boolean_from_string(const char *s) _const_;
+
+int kernel_route_expiration_supported(void);
diff --git a/src/network/networkd.c b/src/network/networkd.c
new file mode 100644
index 0000000..fcecafe
--- /dev/null
+++ b/src/network/networkd.c
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "capability-util.h"
+#include "daemon-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "networkd-conf.h"
+#include "networkd-manager.h"
+#include "signal-util.h"
+#include "user-util.h"
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(notify_on_cleanup) const char *notify_message = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ const char *user = "systemd-network";
+ uid_t uid;
+ gid_t gid;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments.");
+
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Cannot resolve user name %s: %m", user);
+
+ /* Create runtime directory. This is not necessary when networkd is
+ * started with "RuntimeDirectory=systemd/netif", or after
+ * systemd-tmpfiles-setup.service. */
+ r = mkdir_safe_label("/run/systemd/netif", 0755, uid, gid, MKDIR_WARN_MODE);
+ if (r < 0)
+ log_warning_errno(r, "Could not create runtime directory: %m");
+
+ /* Drop privileges, but only if we have been started as root. If we are not running as root we assume all
+ * privileges are already dropped. */
+ if (geteuid() == 0) {
+ r = drop_privileges(uid, gid,
+ (1ULL << CAP_NET_ADMIN) |
+ (1ULL << CAP_NET_BIND_SERVICE) |
+ (1ULL << CAP_NET_BROADCAST) |
+ (1ULL << CAP_NET_RAW));
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop privileges: %m");
+ }
+
+ /* Always create the directories people can create inotify watches in.
+ * It is necessary to create the following subdirectories after drop_privileges()
+ * to support old kernels not supporting AmbientCapabilities=. */
+ r = mkdir_safe_label("/run/systemd/netif/links", 0755, uid, gid, MKDIR_WARN_MODE);
+ if (r < 0)
+ log_warning_errno(r, "Could not create runtime directory 'links': %m");
+
+ r = mkdir_safe_label("/run/systemd/netif/leases", 0755, uid, gid, MKDIR_WARN_MODE);
+ if (r < 0)
+ log_warning_errno(r, "Could not create runtime directory 'leases': %m");
+
+ r = mkdir_safe_label("/run/systemd/netif/lldp", 0755, uid, gid, MKDIR_WARN_MODE);
+ if (r < 0)
+ log_warning_errno(r, "Could not create runtime directory 'lldp': %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Could not create manager: %m");
+
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not connect to bus: %m");
+
+ r = manager_parse_config_file(m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse configuration file: %m");
+
+ r = manager_load_config(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not load configuration files: %m");
+
+ r = manager_rtnl_enumerate_links(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not enumerate links: %m");
+
+ r = manager_rtnl_enumerate_addresses(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not enumerate addresses: %m");
+
+ r = manager_rtnl_enumerate_routes(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not enumerate routes: %m");
+
+ r = manager_rtnl_enumerate_rules(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not enumerate rules: %m");
+
+ r = manager_start(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not start manager: %m");
+
+ log_info("Enumeration completed");
+
+ notify_message = notify_start(NOTIFY_READY, NOTIFY_STOPPING);
+
+ r = sd_event_loop(m->event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/network/networkd.conf b/src/network/networkd.conf
new file mode 100644
index 0000000..8dc5676
--- /dev/null
+++ b/src/network/networkd.conf
@@ -0,0 +1,16 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See networkd.conf(5) for details
+
+[DHCP]
+#DUIDType=vendor
+#DUIDRawData=
diff --git a/src/network/org.freedesktop.network1.conf b/src/network/org.freedesktop.network1.conf
new file mode 100644
index 0000000..52dad33
--- /dev/null
+++ b/src/network/org.freedesktop.network1.conf
@@ -0,0 +1,42 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="systemd-network">
+ <allow own="org.freedesktop.network1"/>
+ <allow send_destination="org.freedesktop.network1"/>
+ <allow receive_sender="org.freedesktop.network1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.network1"/>
+
+ <allow send_destination="org.freedesktop.network1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.network1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.network1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.network1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <allow receive_sender="org.freedesktop.network1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/network/org.freedesktop.network1.service b/src/network/org.freedesktop.network1.service
new file mode 100644
index 0000000..641a1a5
--- /dev/null
+++ b/src/network/org.freedesktop.network1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.network1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.network1.service
diff --git a/src/network/systemd-networkd.pkla b/src/network/systemd-networkd.pkla
new file mode 100644
index 0000000..4d1bb45
--- /dev/null
+++ b/src/network/systemd-networkd.pkla
@@ -0,0 +1,4 @@
+[Allow systemd-networkd to set timezone and transient hostname]
+Identity=unix-user:systemd-network
+Action=org.freedesktop.hostname1.set-hostname;org.freedesktop.hostname1.get-product-uuid;org.freedesktop.timedate1.set-timezone;
+ResultAny=yes
diff --git a/src/network/systemd-networkd.rules b/src/network/systemd-networkd.rules
new file mode 100644
index 0000000..b9077c1
--- /dev/null
+++ b/src/network/systemd-networkd.rules
@@ -0,0 +1,10 @@
+// Allow systemd-networkd to set timezone, get product UUID,
+// and transient hostname
+polkit.addRule(function(action, subject) {
+ if ((action.id == "org.freedesktop.hostname1.set-hostname" ||
+ action.id == "org.freedesktop.hostname1.get-product-uuid" ||
+ action.id == "org.freedesktop.timedate1.set-timezone") &&
+ subject.user == "systemd-network") {
+ return polkit.Result.YES;
+ }
+});
diff --git a/src/network/test-network-tables.c b/src/network/test-network-tables.c
new file mode 100644
index 0000000..6b110b7
--- /dev/null
+++ b/src/network/test-network-tables.c
@@ -0,0 +1,47 @@
+#include "dhcp6-internal.h"
+#include "dhcp6-protocol.h"
+#include "ethtool-util.h"
+#include "lldp-internal.h"
+#include "ndisc-internal.h"
+#include "netdev/bond.h"
+#include "netdev/ipvlan.h"
+#include "netdev/macvlan.h"
+#include "netdev/tunnel.h"
+#include "netlink-internal.h"
+#include "networkd-link.h"
+#include "networkd-network.h"
+#include "networkd-util.h"
+#include "test-tables.h"
+
+int main(int argc, char **argv) {
+ test_table(address_family_boolean, ADDRESS_FAMILY_BOOLEAN);
+ test_table(bond_ad_select, NETDEV_BOND_AD_SELECT);
+ test_table(bond_arp_all_targets, NETDEV_BOND_ARP_ALL_TARGETS);
+ test_table(bond_arp_validate, NETDEV_BOND_ARP_VALIDATE);
+ test_table(bond_fail_over_mac, NETDEV_BOND_FAIL_OVER_MAC);
+ test_table(bond_lacp_rate, NETDEV_BOND_LACP_RATE);
+ test_table(bond_mode, NETDEV_BOND_MODE);
+ test_table(bond_primary_reselect, NETDEV_BOND_PRIMARY_RESELECT);
+ test_table(bond_xmit_hash_policy, NETDEV_BOND_XMIT_HASH_POLICY);
+ test_table(dhcp6_message_status, DHCP6_STATUS);
+ test_table_sparse(dhcp6_message_type, DHCP6_MESSAGE); /* enum starts from 1 */
+ test_table(dhcp_use_domains, DHCP_USE_DOMAINS);
+ test_table(duplex, DUP);
+ test_table(ip6tnl_mode, NETDEV_IP6_TNL_MODE);
+ test_table(ipv6_privacy_extensions, IPV6_PRIVACY_EXTENSIONS);
+ test_table(ipvlan_flags, NETDEV_IPVLAN_FLAGS);
+ test_table(link_operstate, LINK_OPERSTATE);
+ /* test_table(link_state, LINK_STATE); — not a reversible mapping */
+ test_table(lldp_mode, LLDP_MODE);
+ test_table(netdev_kind, NETDEV_KIND);
+ test_table(nl_union_link_info_data, NL_UNION_LINK_INFO_DATA);
+ test_table(radv_prefix_delegation, RADV_PREFIX_DELEGATION);
+ test_table(wol, WOL);
+ test_table(lldp_event, SD_LLDP_EVENT);
+ test_table(ndisc_event, SD_NDISC_EVENT);
+
+ test_table_sparse(ipvlan_mode, NETDEV_IPVLAN_MODE);
+ test_table_sparse(macvlan_mode, NETDEV_MACVLAN_MODE);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/network/test-network.c b/src/network/test-network.c
new file mode 100644
index 0000000..6e169e0
--- /dev/null
+++ b/src/network/test-network.c
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/param.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "dhcp-lease-internal.h"
+#include "hostname-util.h"
+#include "network-internal.h"
+#include "networkd-manager.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_deserialize_in_addr(void) {
+ _cleanup_free_ struct in_addr *addresses = NULL;
+ _cleanup_free_ struct in6_addr *addresses6 = NULL;
+ struct in_addr a, b, c;
+ struct in6_addr d, e, f;
+ int size;
+ const char *addresses_string = "192.168.0.1 0:0:0:0:0:FFFF:204.152.189.116 192.168.0.2 ::1 192.168.0.3 1:0:0:0:0:0:0:8";
+
+ assert_se(inet_pton(AF_INET, "0:0:0:0:0:FFFF:204.152.189.116", &a) == 0);
+ assert_se(inet_pton(AF_INET6, "192.168.0.1", &d) == 0);
+
+ assert_se(inet_pton(AF_INET, "192.168.0.1", &a) == 1);
+ assert_se(inet_pton(AF_INET, "192.168.0.2", &b) == 1);
+ assert_se(inet_pton(AF_INET, "192.168.0.3", &c) == 1);
+ assert_se(inet_pton(AF_INET6, "0:0:0:0:0:FFFF:204.152.189.116", &d) == 1);
+ assert_se(inet_pton(AF_INET6, "::1", &e) == 1);
+ assert_se(inet_pton(AF_INET6, "1:0:0:0:0:0:0:8", &f) == 1);
+
+ assert_se((size = deserialize_in_addrs(&addresses, addresses_string)) >= 0);
+ assert_se(size == 3);
+ assert_se(!memcmp(&a, &addresses[0], sizeof(struct in_addr)));
+ assert_se(!memcmp(&b, &addresses[1], sizeof(struct in_addr)));
+ assert_se(!memcmp(&c, &addresses[2], sizeof(struct in_addr)));
+
+ assert_se((size = deserialize_in6_addrs(&addresses6, addresses_string)) >= 0);
+ assert_se(size == 3);
+ assert_se(!memcmp(&d, &addresses6[0], sizeof(struct in6_addr)));
+ assert_se(!memcmp(&e, &addresses6[1], sizeof(struct in6_addr)));
+ assert_se(!memcmp(&f, &addresses6[2], sizeof(struct in6_addr)));
+}
+
+static void test_deserialize_dhcp_routes(void) {
+ size_t size, allocated;
+
+ {
+ _cleanup_free_ struct sd_dhcp_route *routes = NULL;
+ assert_se(deserialize_dhcp_routes(&routes, &size, &allocated, "") >= 0);
+ assert_se(size == 0);
+ }
+
+ {
+ /* no errors */
+ _cleanup_free_ struct sd_dhcp_route *routes = NULL;
+ const char *routes_string = "192.168.0.0/16,192.168.0.1 10.1.2.0/24,10.1.2.1 0.0.0.0/0,10.0.1.1";
+
+ assert_se(deserialize_dhcp_routes(&routes, &size, &allocated, routes_string) >= 0);
+
+ assert_se(size == 3);
+ assert_se(routes[0].dst_addr.s_addr == inet_addr("192.168.0.0"));
+ assert_se(routes[0].gw_addr.s_addr == inet_addr("192.168.0.1"));
+ assert_se(routes[0].dst_prefixlen == 16);
+
+ assert_se(routes[1].dst_addr.s_addr == inet_addr("10.1.2.0"));
+ assert_se(routes[1].gw_addr.s_addr == inet_addr("10.1.2.1"));
+ assert_se(routes[1].dst_prefixlen == 24);
+
+ assert_se(routes[2].dst_addr.s_addr == inet_addr("0.0.0.0"));
+ assert_se(routes[2].gw_addr.s_addr == inet_addr("10.0.1.1"));
+ assert_se(routes[2].dst_prefixlen == 0);
+ }
+
+ {
+ /* error in second word */
+ _cleanup_free_ struct sd_dhcp_route *routes = NULL;
+ const char *routes_string = "192.168.0.0/16,192.168.0.1 10.1.2.0#24,10.1.2.1 0.0.0.0/0,10.0.1.1";
+
+ assert_se(deserialize_dhcp_routes(&routes, &size, &allocated, routes_string) >= 0);
+
+ assert_se(size == 2);
+ assert_se(routes[0].dst_addr.s_addr == inet_addr("192.168.0.0"));
+ assert_se(routes[0].gw_addr.s_addr == inet_addr("192.168.0.1"));
+ assert_se(routes[0].dst_prefixlen == 16);
+
+ assert_se(routes[1].dst_addr.s_addr == inet_addr("0.0.0.0"));
+ assert_se(routes[1].gw_addr.s_addr == inet_addr("10.0.1.1"));
+ assert_se(routes[1].dst_prefixlen == 0);
+ }
+
+ {
+ /* error in every word */
+ _cleanup_free_ struct sd_dhcp_route *routes = NULL;
+ const char *routes_string = "192.168.0.0/55,192.168.0.1 10.1.2.0#24,10.1.2.1 0.0.0.0/0,10.0.1.X";
+
+ assert_se(deserialize_dhcp_routes(&routes, &size, &allocated, routes_string) >= 0);
+ assert_se(size == 0);
+ }
+}
+
+static int test_load_config(Manager *manager) {
+ int r;
+/* TODO: should_reload, is false if the config dirs do not exist, so
+ * so we can't do this test here, move it to a test for paths_check_timestamps
+ * directly
+ *
+ * assert_se(network_should_reload(manager) == true);
+*/
+
+ r = manager_load_config(manager);
+ if (r == -EPERM)
+ return r;
+ assert_se(r >= 0);
+
+ assert_se(manager_should_reload(manager) == false);
+
+ return 0;
+}
+
+static void test_network_get(Manager *manager, sd_device *loopback) {
+ Network *network;
+ const struct ether_addr mac = {};
+
+ /* let's assume that the test machine does not have a .network file
+ that applies to the loopback device... */
+ assert_se(network_get(manager, loopback, "lo", &mac, &network) == -ENOENT);
+ assert_se(!network);
+}
+
+static void test_address_equality(void) {
+ _cleanup_(address_freep) Address *a1 = NULL, *a2 = NULL;
+
+ assert_se(address_new(&a1) >= 0);
+ assert_se(address_new(&a2) >= 0);
+
+ assert_se(address_equal(NULL, NULL));
+ assert_se(!address_equal(a1, NULL));
+ assert_se(!address_equal(NULL, a2));
+ assert_se(address_equal(a1, a2));
+
+ a1->family = AF_INET;
+ assert_se(!address_equal(a1, a2));
+
+ a2->family = AF_INET;
+ assert_se(address_equal(a1, a2));
+
+ assert_se(inet_pton(AF_INET, "192.168.3.9", &a1->in_addr.in));
+ assert_se(!address_equal(a1, a2));
+ assert_se(inet_pton(AF_INET, "192.168.3.9", &a2->in_addr.in));
+ assert_se(address_equal(a1, a2));
+ assert_se(inet_pton(AF_INET, "192.168.3.10", &a1->in_addr_peer.in));
+ assert_se(address_equal(a1, a2));
+ assert_se(inet_pton(AF_INET, "192.168.3.11", &a2->in_addr_peer.in));
+ assert_se(address_equal(a1, a2));
+ a1->prefixlen = 10;
+ assert_se(!address_equal(a1, a2));
+ a2->prefixlen = 10;
+ assert_se(address_equal(a1, a2));
+
+ a1->family = AF_INET6;
+ assert_se(!address_equal(a1, a2));
+
+ a2->family = AF_INET6;
+ assert_se(inet_pton(AF_INET6, "2001:4ca0:4f01::2", &a1->in_addr.in6));
+ assert_se(inet_pton(AF_INET6, "2001:4ca0:4f01::2", &a2->in_addr.in6));
+ assert_se(address_equal(a1, a2));
+
+ a2->prefixlen = 8;
+ assert_se(address_equal(a1, a2));
+
+ assert_se(inet_pton(AF_INET6, "2001:4ca0:4f01::1", &a2->in_addr.in6));
+ assert_se(!address_equal(a1, a2));
+}
+
+static void test_dhcp_hostname_shorten_overlong(void) {
+ int r;
+
+ {
+ /* simple hostname, no actions, no errors */
+ _cleanup_free_ char *shortened = NULL;
+ r = shorten_overlong("name1", &shortened);
+ assert_se(r == 0);
+ assert_se(streq("name1", shortened));
+ }
+
+ {
+ /* simple fqdn, no actions, no errors */
+ _cleanup_free_ char *shortened = NULL;
+ r = shorten_overlong("name1.example.com", &shortened);
+ assert_se(r == 0);
+ assert_se(streq("name1.example.com", shortened));
+ }
+
+ {
+ /* overlong fqdn, cut to first dot, no errors */
+ _cleanup_free_ char *shortened = NULL;
+ r = shorten_overlong("name1.test-dhcp-this-one-here-is-a-very-very-long-domain.example.com", &shortened);
+ assert_se(r == 1);
+ assert_se(streq("name1", shortened));
+ }
+
+ {
+ /* overlong hostname, cut to HOST_MAX_LEN, no errors */
+ _cleanup_free_ char *shortened = NULL;
+ r = shorten_overlong("test-dhcp-this-one-here-is-a-very-very-long-hostname-without-domainname", &shortened);
+ assert_se(r == 1);
+ assert_se(streq("test-dhcp-this-one-here-is-a-very-very-long-hostname-without-dom", shortened));
+ }
+
+ {
+ /* overlong fqdn, cut to first dot, empty result error */
+ _cleanup_free_ char *shortened = NULL;
+ r = shorten_overlong(".test-dhcp-this-one-here-is-a-very-very-long-hostname.example.com", &shortened);
+ assert_se(r == -EDOM);
+ assert_se(shortened == NULL);
+ }
+
+}
+
+int main(void) {
+ _cleanup_(manager_freep) Manager *manager = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *loopback = NULL;
+ int ifindex, r;
+
+ test_setup_logging(LOG_INFO);
+
+ test_deserialize_in_addr();
+ test_deserialize_dhcp_routes();
+ test_address_equality();
+ test_dhcp_hostname_shorten_overlong();
+
+ assert_se(manager_new(&manager) >= 0);
+
+ r = test_load_config(manager);
+ if (r == -EPERM)
+ return log_tests_skipped("Cannot load configuration");
+ assert_se(r == 0);
+
+ assert_se(sd_device_new_from_syspath(&loopback, "/sys/class/net/lo") >= 0);
+ assert_se(loopback);
+ assert_se(sd_device_get_ifindex(loopback, &ifindex) >= 0);
+ assert_se(ifindex == 1);
+
+ test_network_get(manager, loopback);
+
+ assert_se(manager_rtnl_enumerate_links(manager) >= 0);
+}
diff --git a/src/network/test-networkd-conf.c b/src/network/test-networkd-conf.c
new file mode 100644
index 0000000..86d4e7e
--- /dev/null
+++ b/src/network/test-networkd-conf.c
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "ether-addr-util.h"
+#include "hexdecoct.h"
+#include "log.h"
+#include "macro.h"
+#include "set.h"
+#include "string-util.h"
+
+#include "network-internal.h"
+#include "networkd-conf.h"
+#include "networkd-network.h"
+
+static void test_config_parse_duid_type_one(const char *rvalue, int ret, DUIDType expected, usec_t expected_time) {
+ DUID actual = {};
+ int r;
+
+ r = config_parse_duid_type("network", "filename", 1, "section", 1, "lvalue", 0, rvalue, &actual, NULL);
+ log_info_errno(r, "\"%s\" → %d (%m)", rvalue, actual.type);
+ assert_se(r == ret);
+ assert_se(expected == actual.type);
+ if (expected == DUID_TYPE_LLT)
+ assert_se(expected_time == actual.llt_time);
+}
+
+static void test_config_parse_duid_type(void) {
+ test_config_parse_duid_type_one("", 0, 0, 0);
+ test_config_parse_duid_type_one("link-layer-time", 0, DUID_TYPE_LLT, 0);
+ test_config_parse_duid_type_one("link-layer-time:2000-01-01 00:00:00 UTC", 0, DUID_TYPE_LLT, (usec_t) 946684800000000);
+ test_config_parse_duid_type_one("vendor", 0, DUID_TYPE_EN, 0);
+ test_config_parse_duid_type_one("vendor:2000-01-01 00:00:00 UTC", 0, 0, 0);
+ test_config_parse_duid_type_one("link-layer", 0, DUID_TYPE_LL, 0);
+ test_config_parse_duid_type_one("link-layer:2000-01-01 00:00:00 UTC", 0, 0, 0);
+ test_config_parse_duid_type_one("uuid", 0, DUID_TYPE_UUID, 0);
+ test_config_parse_duid_type_one("uuid:2000-01-01 00:00:00 UTC", 0, 0, 0);
+ test_config_parse_duid_type_one("foo", 0, 0, 0);
+ test_config_parse_duid_type_one("foo:2000-01-01 00:00:00 UTC", 0, 0, 0);
+}
+
+static void test_config_parse_duid_rawdata_one(const char *rvalue, int ret, const DUID* expected) {
+ DUID actual = {};
+ int r;
+ _cleanup_free_ char *d = NULL;
+
+ r = config_parse_duid_rawdata("network", "filename", 1, "section", 1, "lvalue", 0, rvalue, &actual, NULL);
+ d = hexmem(actual.raw_data, actual.raw_data_len);
+ log_info_errno(r, "\"%s\" → \"%s\" (%m)",
+ rvalue, strnull(d));
+ assert_se(r == ret);
+ if (expected) {
+ assert_se(actual.raw_data_len == expected->raw_data_len);
+ assert_se(memcmp(actual.raw_data, expected->raw_data, expected->raw_data_len) == 0);
+ }
+}
+
+static void test_config_parse_hwaddr_one(const char *rvalue, int ret, const struct ether_addr* expected) {
+ struct ether_addr *actual = NULL;
+ int r;
+
+ r = config_parse_hwaddr("network", "filename", 1, "section", 1, "lvalue", 0, rvalue, &actual, NULL);
+ assert_se(ret == r);
+ if (expected) {
+ assert_se(actual);
+ assert_se(ether_addr_equal(expected, actual));
+ } else
+ assert_se(actual == NULL);
+
+ free(actual);
+}
+
+static void test_config_parse_hwaddrs_one(const char *rvalue, const struct ether_addr* list, size_t n) {
+ _cleanup_set_free_free_ Set *s = NULL;
+ size_t m;
+
+ assert_se(config_parse_hwaddrs("network", "filename", 1, "section", 1, "lvalue", 0, rvalue, &s, NULL) == 0);
+ assert_se(set_size(s) == n);
+
+ for (m = 0; m < n; m++) {
+ _cleanup_free_ struct ether_addr *q = NULL;
+
+ assert_se(q = set_remove(s, &list[m]));
+ }
+
+ assert_se(set_size(s) == 0);
+}
+
+#define BYTES_0_128 "0:1:2:3:4:5:6:7:8:9:a:b:c:d:e:f:10:11:12:13:14:15:16:17:18:19:1a:1b:1c:1d:1e:1f:20:21:22:23:24:25:26:27:28:29:2a:2b:2c:2d:2e:2f:30:31:32:33:34:35:36:37:38:39:3a:3b:3c:3d:3e:3f:40:41:42:43:44:45:46:47:48:49:4a:4b:4c:4d:4e:4f:50:51:52:53:54:55:56:57:58:59:5a:5b:5c:5d:5e:5f:60:61:62:63:64:65:66:67:68:69:6a:6b:6c:6d:6e:6f:70:71:72:73:74:75:76:77:78:79:7a:7b:7c:7d:7e:7f:80"
+
+#define BYTES_1_128 {0x1,0x2,0x3,0x4,0x5,0x6,0x7,0x8,0x9,0xa,0xb,0xc,0xd,0xe,0xf,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0x80}
+
+static void test_config_parse_duid_rawdata(void) {
+ test_config_parse_duid_rawdata_one("", 0, &(DUID){});
+ test_config_parse_duid_rawdata_one("00:11:22:33:44:55:66:77", 0,
+ &(DUID){0, 8, {0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77}});
+ test_config_parse_duid_rawdata_one("00:11:22:", 0,
+ &(DUID){0, 3, {0x00,0x11,0x22}});
+ test_config_parse_duid_rawdata_one("000:11:22", 0, &(DUID){}); /* error, output is all zeros */
+ test_config_parse_duid_rawdata_one("00:111:22", 0, &(DUID){});
+ test_config_parse_duid_rawdata_one("0:1:2:3:4:5:6:7", 0,
+ &(DUID){0, 8, {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7}});
+ test_config_parse_duid_rawdata_one("11::", 0, &(DUID){0, 1, {0x11}}); /* FIXME: should this be an error? */
+ test_config_parse_duid_rawdata_one("abcdef", 0, &(DUID){});
+ test_config_parse_duid_rawdata_one(BYTES_0_128, 0, &(DUID){});
+ test_config_parse_duid_rawdata_one(BYTES_0_128 + 2, 0, &(DUID){0, 128, BYTES_1_128});
+}
+
+static void test_config_parse_hwaddr(void) {
+ const struct ether_addr t[] = {
+ { .ether_addr_octet = { 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff } },
+ { .ether_addr_octet = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab } },
+ };
+
+ test_config_parse_hwaddr_one("", 0, NULL);
+ test_config_parse_hwaddr_one("no:ta:ma:ca:dd:re", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc:dd:ee:fx", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc:dd:ee:ff", 0, &t[0]);
+ test_config_parse_hwaddr_one(" aa:bb:cc:dd:ee:ff", 0, &t[0]);
+ test_config_parse_hwaddr_one("aa:bb:cc:dd:ee:ff \t\n", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc:dd:ee:ff \t\nxxx", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc: dd:ee:ff", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc:d d:ee:ff", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc:dd:ee", 0, NULL);
+ test_config_parse_hwaddr_one("9:aa:bb:cc:dd:ee:ff", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc:dd:ee:ff:gg", 0, NULL);
+ test_config_parse_hwaddr_one("aa:Bb:CC:dd:ee:ff", 0, &t[0]);
+ test_config_parse_hwaddr_one("01:23:45:67:89:aB", 0, &t[1]);
+ test_config_parse_hwaddr_one("1:23:45:67:89:aB", 0, &t[1]);
+ test_config_parse_hwaddr_one("aa-bb-cc-dd-ee-ff", 0, &t[0]);
+ test_config_parse_hwaddr_one("AA-BB-CC-DD-EE-FF", 0, &t[0]);
+ test_config_parse_hwaddr_one("01-23-45-67-89-ab", 0, &t[1]);
+ test_config_parse_hwaddr_one("aabb.ccdd.eeff", 0, &t[0]);
+ test_config_parse_hwaddr_one("0123.4567.89ab", 0, &t[1]);
+ test_config_parse_hwaddr_one("123.4567.89ab.", 0, NULL);
+ test_config_parse_hwaddr_one("aabbcc.ddeeff", 0, NULL);
+ test_config_parse_hwaddr_one("aabbccddeeff", 0, NULL);
+ test_config_parse_hwaddr_one("aabbccddee:ff", 0, NULL);
+ test_config_parse_hwaddr_one("012345.6789ab", 0, NULL);
+ test_config_parse_hwaddr_one("123.4567.89ab", 0, &t[1]);
+
+ test_config_parse_hwaddrs_one("", t, 0);
+ test_config_parse_hwaddrs_one("no:ta:ma:ca:dd:re", t, 0);
+ test_config_parse_hwaddrs_one("aa:bb:cc:dd:ee:fx", t, 0);
+ test_config_parse_hwaddrs_one("aa:bb:cc:dd:ee:ff", t, 1);
+ test_config_parse_hwaddrs_one(" aa:bb:cc:dd:ee:ff", t, 1);
+ test_config_parse_hwaddrs_one("aa:bb:cc:dd:ee:ff \t\n", t, 1);
+ test_config_parse_hwaddrs_one("aa:bb:cc:dd:ee:ff \t\nxxx", t, 1);
+ test_config_parse_hwaddrs_one("aa:bb:cc: dd:ee:ff", t, 0);
+ test_config_parse_hwaddrs_one("aa:bb:cc:d d:ee:ff", t, 0);
+ test_config_parse_hwaddrs_one("aa:bb:cc:dd:ee", t, 0);
+ test_config_parse_hwaddrs_one("9:aa:bb:cc:dd:ee:ff", t, 0);
+ test_config_parse_hwaddrs_one("aa:bb:cc:dd:ee:ff:gg", t, 0);
+ test_config_parse_hwaddrs_one("aa:Bb:CC:dd:ee:ff", t, 1);
+ test_config_parse_hwaddrs_one("01:23:45:67:89:aB", &t[1], 1);
+ test_config_parse_hwaddrs_one("1:23:45:67:89:aB", &t[1], 1);
+ test_config_parse_hwaddrs_one("aa-bb-cc-dd-ee-ff", t, 1);
+ test_config_parse_hwaddrs_one("AA-BB-CC-DD-EE-FF", t, 1);
+ test_config_parse_hwaddrs_one("01-23-45-67-89-ab", &t[1], 1);
+ test_config_parse_hwaddrs_one("aabb.ccdd.eeff", t, 1);
+ test_config_parse_hwaddrs_one("0123.4567.89ab", &t[1], 1);
+ test_config_parse_hwaddrs_one("123.4567.89ab.", t, 0);
+ test_config_parse_hwaddrs_one("aabbcc.ddeeff", t, 0);
+ test_config_parse_hwaddrs_one("aabbccddeeff", t, 0);
+ test_config_parse_hwaddrs_one("aabbccddee:ff", t, 0);
+ test_config_parse_hwaddrs_one("012345.6789ab", t, 0);
+ test_config_parse_hwaddrs_one("123.4567.89ab", &t[1], 1);
+
+ test_config_parse_hwaddrs_one("123.4567.89ab aa:bb:cc:dd:ee:ff 01-23-45-67-89-ab aa:Bb:CC:dd:ee:ff", t, 2);
+ test_config_parse_hwaddrs_one("123.4567.89ab aa:bb:cc:dd:ee:fx hogehoge 01-23-45-67-89-ab aaaa aa:Bb:CC:dd:ee:ff", t, 2);
+}
+
+static void test_config_parse_address_one(const char *rvalue, int family, unsigned n_addresses, const union in_addr_union *u, unsigned char prefixlen) {
+ _cleanup_(network_freep) Network *network = NULL;
+
+ assert_se(network = new0(Network, 1));
+ assert_se(config_parse_address("network", "filename", 1, "section", 1, "Address", 0, rvalue, network, network) == 0);
+ assert_se(network->n_static_addresses == n_addresses);
+ if (n_addresses > 0) {
+ assert_se(network->static_addresses);
+ assert_se(network->static_addresses->prefixlen == prefixlen);
+ assert_se(network->static_addresses->family == family);
+ assert_se(in_addr_equal(family, &network->static_addresses->in_addr, u));
+ /* TODO: check Address.in_addr and Address.broadcast */
+ }
+}
+
+static void test_config_parse_address(void) {
+ test_config_parse_address_one("", AF_INET, 0, NULL, 0);
+ test_config_parse_address_one("/", AF_INET, 0, NULL, 0);
+ test_config_parse_address_one("/8", AF_INET, 0, NULL, 0);
+ test_config_parse_address_one("1.2.3.4", AF_INET, 1, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 8);
+ test_config_parse_address_one("1.2.3.4/0", AF_INET, 1, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 0);
+ test_config_parse_address_one("1.2.3.4/1", AF_INET, 1, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 1);
+ test_config_parse_address_one("1.2.3.4/2", AF_INET, 1, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 2);
+ test_config_parse_address_one("1.2.3.4/32", AF_INET, 1, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32);
+ test_config_parse_address_one("1.2.3.4/33", AF_INET, 0, NULL, 0);
+ test_config_parse_address_one("1.2.3.4/-1", AF_INET, 0, NULL, 0);
+
+ test_config_parse_address_one("", AF_INET6, 0, NULL, 0);
+ test_config_parse_address_one("/", AF_INET6, 0, NULL, 0);
+ test_config_parse_address_one("/8", AF_INET6, 0, NULL, 0);
+ test_config_parse_address_one("::1", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 0);
+ test_config_parse_address_one("::1/0", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 0);
+ test_config_parse_address_one("::1/1", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 1);
+ test_config_parse_address_one("::1/2", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 2);
+ test_config_parse_address_one("::1/32", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 32);
+ test_config_parse_address_one("::1/33", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 33);
+ test_config_parse_address_one("::1/64", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 64);
+ test_config_parse_address_one("::1/128", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128);
+ test_config_parse_address_one("::1/129", AF_INET6, 0, NULL, 0);
+ test_config_parse_address_one("::1/-1", AF_INET6, 0, NULL, 0);
+}
+
+int main(int argc, char **argv) {
+ log_parse_environment();
+ log_open();
+
+ test_config_parse_duid_type();
+ test_config_parse_duid_rawdata();
+ test_config_parse_hwaddr();
+ test_config_parse_address();
+
+ return 0;
+}
diff --git a/src/network/test-routing-policy-rule.c b/src/network/test-routing-policy-rule.c
new file mode 100644
index 0000000..57bfb6a
--- /dev/null
+++ b/src/network/test-routing-policy-rule.c
@@ -0,0 +1,92 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1+
+***/
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "network-internal.h"
+#include "networkd-manager.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+static void test_rule_serialization(const char *title, const char *ruleset, const char *expected) {
+ char pattern[] = "/tmp/systemd-test-routing-policy-rule.XXXXXX",
+ pattern2[] = "/tmp/systemd-test-routing-policy-rule.XXXXXX",
+ pattern3[] = "/tmp/systemd-test-routing-policy-rule.XXXXXX";
+ const char *cmd;
+ int fd, fd2, fd3;
+ _cleanup_fclose_ FILE *f = NULL, *f2 = NULL, *f3 = NULL;
+ Set *rules = NULL;
+ _cleanup_free_ char *buf = NULL;
+ size_t buf_size;
+
+ log_info("========== %s ==========", title);
+ log_info("put:\n%s\n", ruleset);
+
+ fd = mkostemp_safe(pattern);
+ assert_se(fd >= 0);
+ assert_se(f = fdopen(fd, "a+"));
+ assert_se(write_string_stream(f, ruleset, 0) == 0);
+
+ assert_se(routing_policy_load_rules(pattern, &rules) == 0);
+
+ fd2 = mkostemp_safe(pattern2);
+ assert_se(fd2 >= 0);
+ assert_se(f2 = fdopen(fd2, "a+"));
+
+ assert_se(routing_policy_serialize_rules(rules, f2) == 0);
+ assert_se(fflush_and_check(f2) == 0);
+
+ assert_se(read_full_file(pattern2, &buf, &buf_size) == 0);
+
+ log_info("got:\n%s", buf);
+
+ fd3 = mkostemp_safe(pattern3);
+ assert_se(fd3 >= 0);
+ assert_se(f3 = fdopen(fd3, "w"));
+ assert_se(write_string_stream(f3, expected ?: ruleset, 0) == 0);
+
+ cmd = strjoina("diff -u ", pattern3, " ", pattern2);
+ log_info("$ %s", cmd);
+ assert_se(system(cmd) == 0);
+
+ set_free_with_destructor(rules, routing_policy_rule_free);
+}
+
+int main(int argc, char **argv) {
+ _cleanup_free_ char *p = NULL;
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_rule_serialization("basic parsing",
+ "RULE=from=1.2.3.4/32 to=2.3.4.5/32 tos=5 fwmark=1/2 table=10", NULL);
+
+ test_rule_serialization("ignored values",
+ "RULE=something=to=ignore from=1.2.3.4/32 from=1.2.3.4/32"
+ " \t to=2.3.4.5/24 to=2.3.4.5/32 tos=5 fwmark=2 fwmark=1 table=10 table=20",
+ "RULE=from=1.2.3.4/32"
+ " to=2.3.4.5/32 tos=5 fwmark=1/0 table=20");
+
+ test_rule_serialization("ipv6",
+ "RULE=from=1::2/64 to=2::3/64 table=6", NULL);
+
+ assert_se(asprintf(&p, "RULE=from=1::2/64 to=2::3/64 table=%d", RT_TABLE_MAIN) >= 0);
+ test_rule_serialization("default table",
+ "RULE=from=1::2/64 to=2::3/64", p);
+
+ test_rule_serialization("incoming interface",
+ "RULE=from=1::2/64 to=2::3/64 table=1 iif=lo",
+ "RULE=from=1::2/64 to=2::3/64 iif=lo table=1");
+
+ test_rule_serialization("outgoing interface",
+ "RULE=from=1::2/64 to=2::3/64 oif=eth0 table=1", NULL);
+
+ test_rule_serialization("freeing interface names",
+ "RULE=from=1::2/64 to=2::3/64 iif=e0 iif=e1 oif=e0 oif=e1 table=1",
+ "RULE=from=1::2/64 to=2::3/64 iif=e1 oif=e1 table=1");
+
+ return 0;
+}
diff --git a/src/network/wait-online/link.c b/src/network/wait-online/link.c
new file mode 100644
index 0000000..87d54fe
--- /dev/null
+++ b/src/network/wait-online/link.c
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-network.h"
+
+#include "alloc-util.h"
+#include "hashmap.h"
+#include "link.h"
+#include "manager.h"
+#include "string-util.h"
+
+int link_new(Manager *m, Link **ret, int ifindex, const char *ifname) {
+ _cleanup_(link_freep) Link *l = NULL;
+ int r;
+
+ assert(m);
+ assert(ifindex > 0);
+
+ r = hashmap_ensure_allocated(&m->links, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&m->links_by_name, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ l = new0(Link, 1);
+ if (!l)
+ return -ENOMEM;
+
+ l->manager = m;
+
+ l->ifname = strdup(ifname);
+ if (!l->ifname)
+ return -ENOMEM;
+
+ r = hashmap_put(m->links_by_name, l->ifname, l);
+ if (r < 0)
+ return r;
+
+ l->ifindex = ifindex;
+
+ r = hashmap_put(m->links, INT_TO_PTR(ifindex), l);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = l;
+ l = NULL;
+
+ return 0;
+}
+
+Link *link_free(Link *l) {
+
+ if (!l)
+ return NULL;
+
+ if (l->manager) {
+ hashmap_remove(l->manager->links, INT_TO_PTR(l->ifindex));
+ hashmap_remove(l->manager->links_by_name, l->ifname);
+ }
+
+ free(l->ifname);
+ return mfree(l);
+ }
+
+int link_update_rtnl(Link *l, sd_netlink_message *m) {
+ const char *ifname;
+ int r;
+
+ assert(l);
+ assert(l->manager);
+ assert(m);
+
+ r = sd_rtnl_message_link_get_flags(m, &l->flags);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_string(m, IFLA_IFNAME, &ifname);
+ if (r < 0)
+ return r;
+
+ if (!streq(l->ifname, ifname)) {
+ char *new_ifname;
+
+ new_ifname = strdup(ifname);
+ if (!new_ifname)
+ return -ENOMEM;
+
+ hashmap_remove(l->manager->links_by_name, l->ifname);
+ free(l->ifname);
+ l->ifname = new_ifname;
+
+ r = hashmap_put(l->manager->links_by_name, l->ifname, l);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int link_update_monitor(Link *l) {
+ assert(l);
+
+ l->required_for_online = sd_network_link_get_required_for_online(l->ifindex) != 0;
+
+ l->operational_state = mfree(l->operational_state);
+
+ sd_network_link_get_operational_state(l->ifindex, &l->operational_state);
+
+ l->state = mfree(l->state);
+
+ sd_network_link_get_setup_state(l->ifindex, &l->state);
+
+ return 0;
+}
diff --git a/src/network/wait-online/link.h b/src/network/wait-online/link.h
new file mode 100644
index 0000000..57e8f12
--- /dev/null
+++ b/src/network/wait-online/link.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-netlink.h"
+
+typedef struct Link Link;
+typedef struct Manager Manager;
+
+struct Link {
+ Manager *manager;
+
+ int ifindex;
+ char *ifname;
+ unsigned flags;
+
+ bool required_for_online;
+ char *operational_state;
+ char *state;
+};
+
+int link_new(Manager *m, Link **ret, int ifindex, const char *ifname);
+Link *link_free(Link *l);
+int link_update_rtnl(Link *l, sd_netlink_message *m);
+int link_update_monitor(Link *l);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Link*, link_free);
diff --git a/src/network/wait-online/manager.c b/src/network/wait-online/manager.c
new file mode 100644
index 0000000..67218b6
--- /dev/null
+++ b/src/network/wait-online/manager.c
@@ -0,0 +1,316 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/ether.h>
+#include <linux/if.h>
+#include <fnmatch.h>
+
+#include "alloc-util.h"
+#include "link.h"
+#include "manager.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "strv.h"
+#include "time-util.h"
+#include "util.h"
+
+bool manager_ignore_link(Manager *m, Link *link) {
+ assert(m);
+ assert(link);
+
+ /* always ignore the loopback interface */
+ if (link->flags & IFF_LOOPBACK)
+ return true;
+
+ /* if interfaces are given on the command line, ignore all others */
+ if (m->interfaces && !strv_contains(m->interfaces, link->ifname))
+ return true;
+
+ if (!link->required_for_online)
+ return true;
+
+ /* ignore interfaces we explicitly are asked to ignore */
+ return strv_fnmatch(m->ignore, link->ifname, 0);
+}
+
+bool manager_all_configured(Manager *m) {
+ Iterator i;
+ Link *l;
+ char **ifname;
+ bool one_ready = false;
+
+ /* wait for all the links given on the command line to appear */
+ STRV_FOREACH(ifname, m->interfaces) {
+ l = hashmap_get(m->links_by_name, *ifname);
+ if (!l) {
+ log_debug("still waiting for %s", *ifname);
+ return false;
+ }
+ }
+
+ /* wait for all links networkd manages to be in admin state 'configured'
+ and at least one link to gain a carrier */
+ HASHMAP_FOREACH(l, m->links, i) {
+ if (manager_ignore_link(m, l)) {
+ log_info("ignoring: %s", l->ifname);
+ continue;
+ }
+
+ if (!l->state) {
+ log_debug("link %s has not yet been processed by udev",
+ l->ifname);
+ return false;
+ }
+
+ if (STR_IN_SET(l->state, "configuring", "pending")) {
+ log_debug("link %s is being processed by networkd",
+ l->ifname);
+ return false;
+ }
+
+ if (l->operational_state &&
+ STR_IN_SET(l->operational_state, "degraded", "routable"))
+ /* we wait for at least one link to be ready,
+ regardless of who manages it */
+ one_ready = true;
+ }
+
+ return one_ready;
+}
+
+static int manager_process_link(sd_netlink *rtnl, sd_netlink_message *mm, void *userdata) {
+ Manager *m = userdata;
+ uint16_t type;
+ Link *l;
+ const char *ifname;
+ int ifindex, r;
+
+ assert(rtnl);
+ assert(m);
+ assert(mm);
+
+ r = sd_netlink_message_get_type(mm, &type);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: Could not get message type, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_link_get_ifindex(mm, &ifindex);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: Could not get ifindex from link, ignoring: %m");
+ return 0;
+ } else if (ifindex <= 0) {
+ log_warning("rtnl: received link message with invalid ifindex %d, ignoring", ifindex);
+ return 0;
+ }
+
+ r = sd_netlink_message_read_string(mm, IFLA_IFNAME, &ifname);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: Received link message without ifname, ignoring: %m");
+ return 0;
+ }
+
+ l = hashmap_get(m->links, INT_TO_PTR(ifindex));
+
+ switch (type) {
+
+ case RTM_NEWLINK:
+ if (!l) {
+ log_debug("Found link %i", ifindex);
+
+ r = link_new(m, &l, ifindex, ifname);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create link object: %m");
+
+ r = link_update_monitor(l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize link object: %m");
+ }
+
+ r = link_update_rtnl(l, mm);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to process RTNL link message: %m");;
+
+ break;
+
+ case RTM_DELLINK:
+ if (l) {
+ log_debug("Removing link %i", l->ifindex);
+ link_free(l);
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+static int on_rtnl_event(sd_netlink *rtnl, sd_netlink_message *mm, void *userdata) {
+ Manager *m = userdata;
+ int r;
+
+ r = manager_process_link(rtnl, mm, m);
+ if (r < 0)
+ return r;
+
+ if (manager_all_configured(m))
+ sd_event_exit(m->event, 0);
+
+ return 1;
+}
+
+static int manager_rtnl_listen(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *i;
+ int r;
+
+ assert(m);
+
+ /* First, subscribe to interfaces coming and going */
+ r = sd_netlink_open(&m->rtnl);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_attach_event(m->rtnl, m->event, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_NEWLINK, on_rtnl_event, NULL, m, "wait-online-on-NEWLINK");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_DELLINK, on_rtnl_event, NULL, m, "wait-online-on-DELLINK");
+ if (r < 0)
+ return r;
+
+ /* Then, enumerate all links */
+ r = sd_rtnl_message_new_link(m->rtnl, &req, RTM_GETLINK, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(m->rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (i = reply; i; i = sd_netlink_message_next(i)) {
+ r = manager_process_link(m->rtnl, i, m);
+ if (r < 0)
+ return r;
+ }
+
+ return r;
+}
+
+static int on_network_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ Iterator i;
+ Link *l;
+ int r;
+
+ assert(m);
+
+ sd_network_monitor_flush(m->network_monitor);
+
+ HASHMAP_FOREACH(l, m->links, i) {
+ r = link_update_monitor(l);
+ if (r < 0)
+ log_warning_errno(r, "Failed to update monitor information for %i: %m", l->ifindex);
+ }
+
+ if (manager_all_configured(m))
+ sd_event_exit(m->event, 0);
+
+ return 0;
+}
+
+static int manager_network_monitor_listen(Manager *m) {
+ int r, fd, events;
+
+ assert(m);
+
+ r = sd_network_monitor_new(&m->network_monitor, NULL);
+ if (r < 0)
+ return r;
+
+ fd = sd_network_monitor_get_fd(m->network_monitor);
+ if (fd < 0)
+ return fd;
+
+ events = sd_network_monitor_get_events(m->network_monitor);
+ if (events < 0)
+ return events;
+
+ r = sd_event_add_io(m->event, &m->network_monitor_event_source,
+ fd, events, &on_network_event, m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int manager_new(Manager **ret, char **interfaces, char **ignore, usec_t timeout) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new0(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ m->interfaces = interfaces;
+ m->ignore = ignore;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ (void) sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+
+ if (timeout > 0) {
+ usec_t usec;
+
+ usec = now(clock_boottime_or_monotonic()) + timeout;
+
+ r = sd_event_add_time(m->event, NULL, clock_boottime_or_monotonic(), usec, 0, NULL, INT_TO_PTR(-ETIMEDOUT));
+ if (r < 0)
+ return r;
+ }
+
+ sd_event_set_watchdog(m->event, true);
+
+ r = manager_network_monitor_listen(m);
+ if (r < 0)
+ return r;
+
+ r = manager_rtnl_listen(m);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+void manager_free(Manager *m) {
+ if (!m)
+ return;
+
+ hashmap_free_with_destructor(m->links, link_free);
+ hashmap_free(m->links_by_name);
+
+ sd_event_source_unref(m->network_monitor_event_source);
+ sd_network_monitor_unref(m->network_monitor);
+
+ sd_event_source_unref(m->rtnl_event_source);
+ sd_netlink_unref(m->rtnl);
+
+ sd_event_unref(m->event);
+ free(m);
+
+ return;
+}
diff --git a/src/network/wait-online/manager.h b/src/network/wait-online/manager.h
new file mode 100644
index 0000000..0fde272
--- /dev/null
+++ b/src/network/wait-online/manager.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-event.h"
+#include "sd-netlink.h"
+#include "sd-network.h"
+
+#include "hashmap.h"
+
+typedef struct Manager Manager;
+typedef struct Link Link;
+
+struct Manager {
+ Hashmap *links;
+ Hashmap *links_by_name;
+
+ char **interfaces;
+ char **ignore;
+
+ sd_netlink *rtnl;
+ sd_event_source *rtnl_event_source;
+
+ sd_network_monitor *network_monitor;
+ sd_event_source *network_monitor_event_source;
+
+ sd_event *event;
+};
+
+void manager_free(Manager *m);
+int manager_new(Manager **ret, char **interfaces, char **ignore, usec_t timeout);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+bool manager_all_configured(Manager *m);
+bool manager_ignore_link(Manager *m, Link *link);
diff --git a/src/network/wait-online/wait-online.c b/src/network/wait-online/wait-online.c
new file mode 100644
index 0000000..71b6cf6
--- /dev/null
+++ b/src/network/wait-online/wait-online.c
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+
+#include "sd-daemon.h"
+
+#include "daemon-util.h"
+#include "main-func.h"
+#include "manager.h"
+#include "pretty-print.h"
+#include "signal-util.h"
+#include "strv.h"
+
+static bool arg_quiet = false;
+static usec_t arg_timeout = 120 * USEC_PER_SEC;
+static char **arg_interfaces = NULL;
+static char **arg_ignore = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_interfaces, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_ignore, strv_freep);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-networkd-wait-online.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Block until network is configured.\n\n"
+ " -h --help Show this help\n"
+ " --version Print version string\n"
+ " -q --quiet Do not show status information\n"
+ " -i --interface=INTERFACE Block until at least these interfaces have appeared\n"
+ " --ignore=INTERFACE Don't take these interfaces into account\n"
+ " --timeout=SECS Maximum time to wait for network connectivity\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_IGNORE,
+ ARG_TIMEOUT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "quiet", no_argument, NULL, 'q' },
+ { "interface", required_argument, NULL, 'i' },
+ { "ignore", required_argument, NULL, ARG_IGNORE },
+ { "timeout", required_argument, NULL, ARG_TIMEOUT },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+hi:q", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ help();
+ return 0;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case ARG_VERSION:
+ return version();
+
+ case 'i':
+ if (strv_extend(&arg_interfaces, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_IGNORE:
+ if (strv_extend(&arg_ignore, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_TIMEOUT:
+ r = parse_sec(optarg, &arg_timeout);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(notify_on_cleanup) const char *notify_message = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_quiet)
+ log_set_max_level(LOG_WARNING);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m, arg_interfaces, arg_ignore, arg_timeout);
+ if (r < 0)
+ return log_error_errno(r, "Could not create manager: %m");
+
+ if (manager_all_configured(m))
+ goto success;
+
+ notify_message = notify_start("READY=1\n"
+ "STATUS=Waiting for network connections...",
+ "STATUS=Failed to wait for network connectivity...");
+
+ r = sd_event_loop(m->event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+success:
+ notify_message = "STATUS=All interfaces configured...";
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/notify/notify.c b/src/notify/notify.c
new file mode 100644
index 0000000..fff5233
--- /dev/null
+++ b/src/notify/notify.c
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+static bool arg_ready = false;
+static pid_t arg_pid = 0;
+static const char *arg_status = NULL;
+static bool arg_booted = false;
+static uid_t arg_uid = UID_INVALID;
+static gid_t arg_gid = GID_INVALID;
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-notify", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [VARIABLE=VALUE...]\n\n"
+ "Notify the init system about service status updates.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --ready Inform the init system about service start-up completion\n"
+ " --pid[=PID] Set main PID of daemon\n"
+ " --uid=USER Set user to send from\n"
+ " --status=TEXT Set status text\n"
+ " --booted Check if the system was booted up with systemd\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_READY = 0x100,
+ ARG_VERSION,
+ ARG_PID,
+ ARG_STATUS,
+ ARG_BOOTED,
+ ARG_UID,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "ready", no_argument, NULL, ARG_READY },
+ { "pid", optional_argument, NULL, ARG_PID },
+ { "status", required_argument, NULL, ARG_STATUS },
+ { "booted", no_argument, NULL, ARG_BOOTED },
+ { "uid", required_argument, NULL, ARG_UID },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0) {
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_READY:
+ arg_ready = true;
+ break;
+
+ case ARG_PID:
+
+ if (optarg) {
+ if (parse_pid(optarg, &arg_pid) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse PID %s.", optarg);
+ } else
+ arg_pid = getppid();
+
+ break;
+
+ case ARG_STATUS:
+ arg_status = optarg;
+ break;
+
+ case ARG_BOOTED:
+ arg_booted = true;
+ break;
+
+ case ARG_UID: {
+ const char *u = optarg;
+
+ r = get_user_creds(&u, &arg_uid, &arg_gid, NULL, NULL, 0);
+ if (r == -ESRCH) /* If the user doesn't exist, then accept it anyway as numeric */
+ r = parse_uid(u, &arg_uid);
+ if (r < 0)
+ return log_error_errno(r, "Can't resolve user %s: %m", optarg);
+
+ break;
+ }
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+ }
+
+ if (optind >= argc &&
+ !arg_ready &&
+ !arg_status &&
+ !arg_pid &&
+ !arg_booted) {
+ help();
+ return -EINVAL;
+ }
+
+ return 1;
+}
+
+static int run(int argc, char* argv[]) {
+ _cleanup_free_ char *status = NULL, *cpid = NULL, *n = NULL;
+ _cleanup_strv_free_ char **final_env = NULL;
+ char* our_env[4];
+ unsigned i = 0;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_booted)
+ return sd_booted() <= 0;
+
+ if (arg_ready)
+ our_env[i++] = (char*) "READY=1";
+
+ if (arg_status) {
+ status = strappend("STATUS=", arg_status);
+ if (!status)
+ return log_oom();
+
+ our_env[i++] = status;
+ }
+
+ if (arg_pid > 0) {
+ if (asprintf(&cpid, "MAINPID="PID_FMT, arg_pid) < 0)
+ return log_oom();
+
+ our_env[i++] = cpid;
+ }
+
+ our_env[i++] = NULL;
+
+ final_env = strv_env_merge(2, our_env, argv + optind);
+ if (!final_env)
+ return log_oom();
+
+ if (strv_isempty(final_env))
+ return 0;
+
+ n = strv_join(final_env, "\n");
+ if (!n)
+ return log_oom();
+
+ /* If this is requested change to the requested UID/GID. Note thta we only change the real UID here, and leave
+ the effective UID in effect (which is 0 for this to work). That's because we want the privileges to fake the
+ ucred data, and sd_pid_notify() uses the real UID for filling in ucred. */
+
+ if (arg_gid != GID_INVALID &&
+ setregid(arg_gid, (gid_t) -1) < 0)
+ return log_error_errno(errno, "Failed to change GID: %m");
+
+ if (arg_uid != UID_INVALID &&
+ setreuid(arg_uid, (uid_t) -1) < 0)
+ return log_error_errno(errno, "Failed to change UID: %m");
+
+ r = sd_pid_notify(arg_pid ? arg_pid : getppid(), false, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to notify init system: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "No status data could be sent: $NOTIFY_SOCKET was not set");
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/nspawn/meson.build b/src/nspawn/meson.build
new file mode 100644
index 0000000..be54ba3
--- /dev/null
+++ b/src/nspawn/meson.build
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+libnspawn_core_sources = files('''
+ nspawn-cgroup.c
+ nspawn-cgroup.h
+ nspawn-def.h
+ nspawn-expose-ports.c
+ nspawn-expose-ports.h
+ nspawn-mount.c
+ nspawn-mount.h
+ nspawn-network.c
+ nspawn-network.h
+ nspawn-patch-uid.c
+ nspawn-patch-uid.h
+ nspawn-register.c
+ nspawn-register.h
+ nspawn-seccomp.c
+ nspawn-seccomp.h
+ nspawn-settings.c
+ nspawn-settings.h
+ nspawn-setuid.c
+ nspawn-setuid.h
+ nspawn-stub-pid1.c
+ nspawn-stub-pid1.h
+'''.split())
+
+nspawn_gperf_c = custom_target(
+ 'nspawn-gperf.c',
+ input : 'nspawn-gperf.gperf',
+ output : 'nspawn-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+libnspawn_core_sources += [nspawn_gperf_c]
+
+libnspawn_core = static_library(
+ 'nspawn-core',
+ libnspawn_core_sources,
+ include_directories : includes,
+ dependencies : [libacl,
+ libseccomp,
+ libselinux])
+
+systemd_nspawn_sources = files('nspawn.c')
+
+tests += [
+ [['src/nspawn/test-nspawn-tables.c'],
+ [libnspawn_core,
+ libshared],
+ []],
+
+ [['src/nspawn/test-patch-uid.c'],
+ [libnspawn_core,
+ libshared],
+ [libacl],
+ '', 'manual'],
+]
diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c
new file mode 100644
index 0000000..97fa092
--- /dev/null
+++ b/src/nspawn/nspawn-cgroup.c
@@ -0,0 +1,608 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "nspawn-cgroup.h"
+#include "nspawn-mount.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+static int chown_cgroup_path(const char *path, uid_t uid_shift) {
+ _cleanup_close_ int fd = -1;
+ const char *fn;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ FOREACH_STRING(fn,
+ ".",
+ "cgroup.clone_children",
+ "cgroup.controllers",
+ "cgroup.events",
+ "cgroup.procs",
+ "cgroup.stat",
+ "cgroup.subtree_control",
+ "cgroup.threads",
+ "notify_on_release",
+ "tasks")
+ if (fchownat(fd, fn, uid_shift, uid_shift, 0) < 0)
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to chown \"%s/%s\", ignoring: %m", path, fn);
+
+ return 0;
+}
+
+int chown_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift) {
+ _cleanup_free_ char *path = NULL, *fs = NULL;
+ int r;
+
+ r = cg_pid_get_path(NULL, pid, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get container cgroup path: %m");
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file system path for container cgroup: %m");
+
+ r = chown_cgroup_path(fs, uid_shift);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown() cgroup %s: %m", fs);
+
+ if (unified_requested == CGROUP_UNIFIED_SYSTEMD || (unified_requested == CGROUP_UNIFIED_NONE && cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0)) {
+ _cleanup_free_ char *lfs = NULL;
+ /* Always propagate access rights from unified to legacy controller */
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, NULL, &lfs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file system path for container cgroup: %m");
+
+ r = chown_cgroup_path(lfs, uid_shift);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown() cgroup %s: %m", lfs);
+ }
+
+ return 0;
+}
+
+int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift) {
+ _cleanup_free_ char *cgroup = NULL;
+ char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1];
+ bool undo_mount = false;
+ const char *fn;
+ int r, unified_controller;
+
+ unified_controller = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (unified_controller < 0)
+ return log_error_errno(unified_controller, "Failed to determine whether the systemd hierarchy is unified: %m");
+ if ((unified_controller > 0) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD))
+ return 0;
+
+ /* When the host uses the legacy cgroup setup, but the
+ * container shall use the unified hierarchy, let's make sure
+ * we copy the path from the name=systemd hierarchy into the
+ * unified hierarchy. Similar for the reverse situation. */
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get control group of " PID_FMT ": %m", pid);
+
+ /* In order to access the unified hierarchy we need to mount it */
+ if (!mkdtemp(tree))
+ return log_error_errno(errno, "Failed to generate temporary mount point for unified hierarchy: %m");
+
+ if (unified_controller > 0)
+ r = mount_verbose(LOG_ERR, "cgroup", tree, "cgroup",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr");
+ else
+ r = mount_verbose(LOG_ERR, "cgroup", tree, "cgroup2",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+ if (r < 0)
+ goto finish;
+
+ undo_mount = true;
+
+ /* If nspawn dies abruptly the cgroup hierarchy created below
+ * its unit isn't cleaned up. So, let's remove it
+ * https://github.com/systemd/systemd/pull/4223#issuecomment-252519810 */
+ fn = strjoina(tree, cgroup);
+ (void) rm_rf(fn, REMOVE_ROOT|REMOVE_ONLY_DIRECTORIES);
+
+ fn = strjoina(tree, cgroup, "/cgroup.procs");
+ (void) mkdir_parents(fn, 0755);
+
+ sprintf(pid_string, PID_FMT, pid);
+ r = write_string_file(fn, pid_string, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ log_error_errno(r, "Failed to move process: %m");
+ goto finish;
+ }
+
+ fn = strjoina(tree, cgroup);
+ r = chown_cgroup_path(fn, uid_shift);
+ if (r < 0)
+ log_error_errno(r, "Failed to chown() cgroup %s: %m", fn);
+finish:
+ if (undo_mount)
+ (void) umount_verbose(tree);
+
+ (void) rmdir(tree);
+ return r;
+}
+
+int create_subcgroup(pid_t pid, bool keep_unit, CGroupUnified unified_requested) {
+ _cleanup_free_ char *cgroup = NULL;
+ CGroupMask supported;
+ const char *payload;
+ int r;
+
+ assert(pid > 1);
+
+ /* In the unified hierarchy inner nodes may only contain subgroups, but not processes. Hence, if we running in
+ * the unified hierarchy and the container does the same, and we did not create a scope unit for the container
+ * move us and the container into two separate subcgroups.
+ *
+ * Moreover, container payloads such as systemd try to manage the cgroup they run in in full (i.e. including
+ * its attributes), while the host systemd will only delegate cgroups for children of the cgroup created for a
+ * delegation unit, instead of the cgroup itself. This means, if we'd pass on the cgroup allocated from the
+ * host systemd directly to the payload, the host and payload systemd might fight for the cgroup
+ * attributes. Hence, let's insert an intermediary cgroup to cover that case too.
+ *
+ * Note that we only bother with the main hierarchy here, not with any secondary ones. On the unified setup
+ * that's fine because there's only one hiearchy anyway and controllers are enabled directly on it. On the
+ * legacy setup, this is fine too, since delegation of controllers is generally not safe there, hence we won't
+ * do it. */
+
+ r = cg_mask_supported(&supported);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine supported controllers: %m");
+
+ if (keep_unit)
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
+ else
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get our control group: %m");
+
+ payload = strjoina(cgroup, "/payload");
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, payload, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s subcgroup: %m", payload);
+
+ if (keep_unit) {
+ const char *supervisor;
+
+ supervisor = strjoina(cgroup, "/supervisor");
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, supervisor, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s subcgroup: %m", supervisor);
+ }
+
+ /* Try to enable as many controllers as possible for the new payload. */
+ (void) cg_enable_everywhere(supported, supported, cgroup, NULL);
+ return 0;
+}
+
+/* Retrieve existing subsystems. This function is called in a new cgroup
+ * namespace.
+ */
+static int get_process_controllers(Set **ret) {
+ _cleanup_set_free_free_ Set *controllers = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(ret);
+
+ controllers = set_new(&string_hash_ops);
+ if (!controllers)
+ return -ENOMEM;
+
+ f = fopen("/proc/self/cgroup", "re");
+ if (!f)
+ return errno == ENOENT ? -ESRCH : -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *e, *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strchr(line, ':');
+ if (!l)
+ continue;
+
+ l++;
+ e = strchr(l, ':');
+ if (!e)
+ continue;
+
+ *e = 0;
+
+ if (STR_IN_SET(l, "", "name=systemd", "name=unified"))
+ continue;
+
+ r = set_put_strdup(controllers, l);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(controllers);
+
+ return 0;
+}
+
+static int mount_legacy_cgroup_hierarchy(
+ const char *dest,
+ const char *controller,
+ const char *hierarchy,
+ bool read_only) {
+
+ const char *to, *fstype, *opts;
+ int r;
+
+ to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
+
+ r = path_is_mount_point(to, dest, 0);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
+ if (r > 0)
+ return 0;
+
+ mkdir_p(to, 0755);
+
+ /* The superblock mount options of the mount point need to be
+ * identical to the hosts', and hence writable... */
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_HYBRID)) {
+ fstype = "cgroup2";
+ opts = NULL;
+ } else if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_LEGACY)) {
+ fstype = "cgroup";
+ opts = "none,name=systemd,xattr";
+ } else {
+ fstype = "cgroup";
+ opts = controller;
+ }
+
+ r = mount_verbose(LOG_ERR, "cgroup", to, fstype, MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
+ if (r < 0)
+ return r;
+
+ /* ... hence let's only make the bind mount read-only, not the superblock. */
+ if (read_only) {
+ r = mount_verbose(LOG_ERR, NULL, to, NULL,
+ MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+/* Mount a legacy cgroup hierarchy when cgroup namespaces are supported. */
+static int mount_legacy_cgns_supported(
+ const char *dest,
+ CGroupUnified unified_requested,
+ bool userns,
+ uid_t uid_shift,
+ uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ _cleanup_set_free_free_ Set *controllers = NULL;
+ const char *cgroup_root = "/sys/fs/cgroup", *c;
+ int r;
+
+ (void) mkdir_p(cgroup_root, 0755);
+
+ /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
+ r = path_is_mount_point(cgroup_root, dest, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
+ if (r == 0) {
+ _cleanup_free_ char *options = NULL;
+
+ /* When cgroup namespaces are enabled and user namespaces are
+ * used then the mount of the cgroupfs is done *inside* the new
+ * user namespace. We're root in the new user namespace and the
+ * kernel will happily translate our uid/gid to the correct
+ * uid/gid as seen from e.g. /proc/1/mountinfo. So we simply
+ * pass uid 0 and not uid_shift to tmpfs_patch_options().
+ */
+ r = tmpfs_patch_options("mode=755", 0, selinux_apifs_context, &options);
+ if (r < 0)
+ return log_oom();
+
+ r = mount_verbose(LOG_ERR, "tmpfs", cgroup_root, "tmpfs",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options);
+ if (r < 0)
+ return r;
+ }
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto skip_controllers;
+
+ r = get_process_controllers(&controllers);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine cgroup controllers: %m");
+
+ for (;;) {
+ _cleanup_free_ const char *controller = NULL;
+
+ controller = set_steal_first(controllers);
+ if (!controller)
+ break;
+
+ r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns);
+ if (r < 0)
+ return r;
+
+ /* When multiple hierarchies are co-mounted, make their
+ * constituting individual hierarchies a symlink to the
+ * co-mount.
+ */
+ c = controller;
+ for (;;) {
+ _cleanup_free_ char *target = NULL, *tok = NULL;
+
+ r = extract_first_word(&c, &tok, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract co-mounted cgroup controller: %m");
+ if (r == 0)
+ break;
+
+ if (streq(controller, tok))
+ break;
+
+ target = prefix_root("/sys/fs/cgroup/", tok);
+ if (!target)
+ return log_oom();
+
+ r = symlink_idempotent(controller, target, false);
+ if (r == -EINVAL)
+ return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m");
+ if (r < 0)
+ return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
+ }
+ }
+
+skip_controllers:
+ if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+ r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false);
+ if (r < 0)
+ return r;
+
+ if (!userns)
+ return mount_verbose(LOG_ERR, NULL, cgroup_root, NULL,
+ MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
+
+ return 0;
+}
+
+/* Mount legacy cgroup hierarchy when cgroup namespaces are unsupported. */
+static int mount_legacy_cgns_unsupported(
+ const char *dest,
+ CGroupUnified unified_requested,
+ bool userns,
+ uid_t uid_shift,
+ uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ _cleanup_set_free_free_ Set *controllers = NULL;
+ const char *cgroup_root;
+ int r;
+
+ cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
+
+ (void) mkdir_p(cgroup_root, 0755);
+
+ /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
+ r = path_is_mount_point(cgroup_root, dest, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
+ if (r == 0) {
+ _cleanup_free_ char *options = NULL;
+
+ r = tmpfs_patch_options("mode=755", uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &options);
+ if (r < 0)
+ return log_oom();
+
+ r = mount_verbose(LOG_ERR, "tmpfs", cgroup_root, "tmpfs",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options);
+ if (r < 0)
+ return r;
+ }
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto skip_controllers;
+
+ r = cg_kernel_controllers(&controllers);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine cgroup controllers: %m");
+
+ for (;;) {
+ _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
+
+ controller = set_steal_first(controllers);
+ if (!controller)
+ break;
+
+ origin = prefix_root("/sys/fs/cgroup/", controller);
+ if (!origin)
+ return log_oom();
+
+ r = readlink_malloc(origin, &combined);
+ if (r == -EINVAL) {
+ /* Not a symbolic link, but directly a single cgroup hierarchy */
+
+ r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
+ if (r < 0)
+ return r;
+
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to read link %s: %m", origin);
+ else {
+ _cleanup_free_ char *target = NULL;
+
+ target = prefix_root(dest, origin);
+ if (!target)
+ return log_oom();
+
+ /* A symbolic link, a combination of controllers in one hierarchy */
+
+ if (!filename_is_valid(combined)) {
+ log_warning("Ignoring invalid combined hierarchy %s.", combined);
+ continue;
+ }
+
+ r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
+ if (r < 0)
+ return r;
+
+ r = symlink_idempotent(combined, target, false);
+ if (r == -EINVAL)
+ return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m");
+ if (r < 0)
+ return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
+ }
+ }
+
+skip_controllers:
+ if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+ r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false);
+ if (r < 0)
+ return r;
+
+ return mount_verbose(LOG_ERR, NULL, cgroup_root, NULL,
+ MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
+}
+
+static int mount_unified_cgroups(const char *dest) {
+ const char *p;
+ int r;
+
+ assert(dest);
+
+ p = prefix_roota(dest, "/sys/fs/cgroup");
+
+ (void) mkdir_p(p, 0755);
+
+ r = path_is_mount_point(p, dest, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
+ if (r > 0) {
+ p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs");
+ if (access(p, F_OK) >= 0)
+ return 0;
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
+ }
+
+ return mount_verbose(LOG_ERR, "cgroup", p, "cgroup2", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+}
+
+int mount_cgroups(
+ const char *dest,
+ CGroupUnified unified_requested,
+ bool userns,
+ uid_t uid_shift,
+ uid_t uid_range,
+ const char *selinux_apifs_context,
+ bool use_cgns) {
+
+ if (unified_requested >= CGROUP_UNIFIED_ALL)
+ return mount_unified_cgroups(dest);
+ if (use_cgns)
+ return mount_legacy_cgns_supported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context);
+
+ return mount_legacy_cgns_unsupported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context);
+}
+
+static int mount_systemd_cgroup_writable_one(const char *root, const char *own) {
+ int r;
+
+ assert(root);
+ assert(own);
+
+ /* Make our own cgroup a (writable) bind mount */
+ r = mount_verbose(LOG_ERR, own, own, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ /* And then remount the systemd cgroup root read-only */
+ return mount_verbose(LOG_ERR, NULL, root, NULL,
+ MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL);
+}
+
+int mount_systemd_cgroup_writable(
+ const char *dest,
+ CGroupUnified unified_requested) {
+
+ _cleanup_free_ char *own_cgroup_path = NULL;
+ const char *root, *own;
+ int r;
+
+ assert(dest);
+
+ r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine our own cgroup path: %m");
+
+ /* If we are living in the top-level, then there's nothing to do... */
+ if (path_equal(own_cgroup_path, "/"))
+ return 0;
+
+ if (unified_requested >= CGROUP_UNIFIED_ALL) {
+
+ root = prefix_roota(dest, "/sys/fs/cgroup");
+ own = strjoina(root, own_cgroup_path);
+
+ } else {
+
+ if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+ root = prefix_roota(dest, "/sys/fs/cgroup/unified");
+ own = strjoina(root, own_cgroup_path);
+
+ r = mount_systemd_cgroup_writable_one(root, own);
+ if (r < 0)
+ return r;
+ }
+
+ root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
+ own = strjoina(root, own_cgroup_path);
+ }
+
+ return mount_systemd_cgroup_writable_one(root, own);
+}
diff --git a/src/nspawn/nspawn-cgroup.h b/src/nspawn/nspawn-cgroup.h
new file mode 100644
index 0000000..035e8fb
--- /dev/null
+++ b/src/nspawn/nspawn-cgroup.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "cgroup-util.h"
+
+int chown_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift);
+int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift);
+int create_subcgroup(pid_t pid, bool keep_unit, CGroupUnified unified_requested);
+
+int mount_cgroups(const char *dest, CGroupUnified unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, bool use_cgns);
+int mount_systemd_cgroup_writable(const char *dest, CGroupUnified unified_requested);
diff --git a/src/nspawn/nspawn-def.h b/src/nspawn/nspawn-def.h
new file mode 100644
index 0000000..9b54cda
--- /dev/null
+++ b/src/nspawn/nspawn-def.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include <sys/types.h>
+
+/* While we are chmod()ing a directory tree, we set the top-level UID base to this "busy" base, so that we can always
+ * recognize trees we are were chmod()ing recursively and got interrupted in */
+#define UID_BUSY_BASE ((uid_t) UINT32_C(0xFFFE0000))
+#define UID_BUSY_MASK ((uid_t) UINT32_C(0xFFFF0000))
diff --git a/src/nspawn/nspawn-expose-ports.c b/src/nspawn/nspawn-expose-ports.c
new file mode 100644
index 0000000..13ce849
--- /dev/null
+++ b/src/nspawn/nspawn-expose-ports.c
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "firewall-util.h"
+#include "in-addr-util.h"
+#include "local-addresses.h"
+#include "netlink-util.h"
+#include "nspawn-expose-ports.h"
+#include "parse-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "util.h"
+
+int expose_port_parse(ExposePort **l, const char *s) {
+
+ const char *split, *e;
+ uint16_t container_port, host_port;
+ int protocol;
+ ExposePort *p;
+ int r;
+
+ assert(l);
+ assert(s);
+
+ if ((e = startswith(s, "tcp:")))
+ protocol = IPPROTO_TCP;
+ else if ((e = startswith(s, "udp:")))
+ protocol = IPPROTO_UDP;
+ else {
+ e = s;
+ protocol = IPPROTO_TCP;
+ }
+
+ split = strchr(e, ':');
+ if (split) {
+ char v[split - e + 1];
+
+ memcpy(v, e, split - e);
+ v[split - e] = 0;
+
+ r = parse_ip_port(v, &host_port);
+ if (r < 0)
+ return -EINVAL;
+
+ r = parse_ip_port(split + 1, &container_port);
+ } else {
+ r = parse_ip_port(e, &container_port);
+ host_port = container_port;
+ }
+
+ if (r < 0)
+ return -EINVAL;
+
+ LIST_FOREACH(ports, p, *l)
+ if (p->protocol == protocol && p->host_port == host_port)
+ return -EEXIST;
+
+ p = new(ExposePort, 1);
+ if (!p)
+ return -ENOMEM;
+
+ p->protocol = protocol;
+ p->host_port = host_port;
+ p->container_port = container_port;
+
+ LIST_PREPEND(ports, *l, p);
+
+ return 0;
+}
+
+void expose_port_free_all(ExposePort *p) {
+
+ while (p) {
+ ExposePort *q = p;
+ LIST_REMOVE(ports, p, q);
+ free(q);
+ }
+}
+
+int expose_port_flush(ExposePort* l, union in_addr_union *exposed) {
+ ExposePort *p;
+ int r, af = AF_INET;
+
+ assert(exposed);
+
+ if (!l)
+ return 0;
+
+ if (in_addr_is_null(af, exposed))
+ return 0;
+
+ log_debug("Lost IP address.");
+
+ LIST_FOREACH(ports, p, l) {
+ r = fw_add_local_dnat(false,
+ af,
+ p->protocol,
+ NULL,
+ NULL, 0,
+ NULL, 0,
+ p->host_port,
+ exposed,
+ p->container_port,
+ NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to modify firewall: %m");
+ }
+
+ *exposed = IN_ADDR_NULL;
+ return 0;
+}
+
+int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *exposed) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ _cleanup_free_ char *pretty = NULL;
+ union in_addr_union new_exposed;
+ ExposePort *p;
+ bool add;
+ int af = AF_INET, r;
+
+ assert(exposed);
+
+ /* Invoked each time an address is added or removed inside the
+ * container */
+
+ if (!l)
+ return 0;
+
+ r = local_addresses(rtnl, 0, af, &addresses);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate local addresses: %m");
+
+ add = r > 0 &&
+ addresses[0].family == af &&
+ addresses[0].scope < RT_SCOPE_LINK;
+
+ if (!add)
+ return expose_port_flush(l, exposed);
+
+ new_exposed = addresses[0].address;
+ if (in_addr_equal(af, exposed, &new_exposed))
+ return 0;
+
+ in_addr_to_string(af, &new_exposed, &pretty);
+ log_debug("New container IP is %s.", strna(pretty));
+
+ LIST_FOREACH(ports, p, l) {
+
+ r = fw_add_local_dnat(true,
+ af,
+ p->protocol,
+ NULL,
+ NULL, 0,
+ NULL, 0,
+ p->host_port,
+ &new_exposed,
+ p->container_port,
+ in_addr_is_null(af, exposed) ? NULL : exposed);
+ if (r < 0)
+ log_warning_errno(r, "Failed to modify firewall: %m");
+ }
+
+ *exposed = new_exposed;
+ return 0;
+}
+
+int expose_port_send_rtnl(int send_fd) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(send_fd >= 0);
+
+ fd = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_ROUTE);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate container netlink: %m");
+
+ /* Store away the fd in the socket, so that it stays open as
+ * long as we run the child */
+ r = send_one_fd(send_fd, fd, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send netlink fd: %m");
+
+ return 0;
+}
+
+int expose_port_watch_rtnl(
+ sd_event *event,
+ int recv_fd,
+ sd_netlink_message_handler_t handler,
+ union in_addr_union *exposed,
+ sd_netlink **ret) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ int fd, r;
+
+ assert(event);
+ assert(recv_fd >= 0);
+ assert(ret);
+
+ fd = receive_one_fd(recv_fd, 0);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to recv netlink fd: %m");
+
+ r = sd_netlink_open_fd(&rtnl, fd);
+ if (r < 0) {
+ safe_close(fd);
+ return log_error_errno(r, "Failed to create rtnl object: %m");
+ }
+
+ r = sd_netlink_add_match(rtnl, NULL, RTM_NEWADDR, handler, NULL, exposed, "nspawn-NEWADDR");
+ if (r < 0)
+ return log_error_errno(r, "Failed to subscribe to RTM_NEWADDR messages: %m");
+
+ r = sd_netlink_add_match(rtnl, NULL, RTM_DELADDR, handler, NULL, exposed, "nspawn-DELADDR");
+ if (r < 0)
+ return log_error_errno(r, "Failed to subscribe to RTM_DELADDR messages: %m");
+
+ r = sd_netlink_attach_event(rtnl, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add to even loop: %m");
+
+ *ret = TAKE_PTR(rtnl);
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-expose-ports.h b/src/nspawn/nspawn-expose-ports.h
new file mode 100644
index 0000000..7ed7bee
--- /dev/null
+++ b/src/nspawn/nspawn-expose-ports.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+
+#include "sd-event.h"
+#include "sd-netlink.h"
+
+#include "in-addr-util.h"
+#include "list.h"
+
+typedef struct ExposePort {
+ int protocol;
+ uint16_t host_port;
+ uint16_t container_port;
+ LIST_FIELDS(struct ExposePort, ports);
+} ExposePort;
+
+void expose_port_free_all(ExposePort *p);
+int expose_port_parse(ExposePort **l, const char *s);
+
+int expose_port_watch_rtnl(sd_event *event, int recv_fd, sd_netlink_message_handler_t handler, union in_addr_union *exposed, sd_netlink **ret);
+int expose_port_send_rtnl(int send_fd);
+
+int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *exposed);
+int expose_port_flush(ExposePort* l, union in_addr_union *exposed);
diff --git a/src/nspawn/nspawn-gperf.gperf b/src/nspawn/nspawn-gperf.gperf
new file mode 100644
index 0000000..dec53a0
--- /dev/null
+++ b/src/nspawn/nspawn-gperf.gperf
@@ -0,0 +1,76 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "nspawn-settings.h"
+#include "nspawn-expose-ports.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name nspawn_gperf_hash
+%define lookup-function-name nspawn_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Exec.Boot, config_parse_boot, 0, 0
+Exec.Ephemeral, config_parse_bool, 0, offsetof(Settings, ephemeral)
+Exec.ProcessTwo, config_parse_pid2, 0, 0
+Exec.Parameters, config_parse_strv, 0, offsetof(Settings, parameters)
+Exec.Environment, config_parse_strv, 0, offsetof(Settings, environment)
+Exec.User, config_parse_string, 0, offsetof(Settings, user)
+Exec.Capability, config_parse_capability, 0, offsetof(Settings, capability)
+Exec.DropCapability, config_parse_capability, 0, offsetof(Settings, drop_capability)
+Exec.KillSignal, config_parse_signal, 0, offsetof(Settings, kill_signal)
+Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality)
+Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id)
+Exec.WorkingDirectory, config_parse_path, 0, offsetof(Settings, working_directory)
+Exec.PivotRoot, config_parse_pivot_root, 0, 0
+Exec.PrivateUsers, config_parse_private_users, 0, 0
+Exec.NotifyReady, config_parse_bool, 0, offsetof(Settings, notify_ready)
+Exec.SystemCallFilter, config_parse_syscall_filter, 0, 0,
+Exec.LimitCPU, config_parse_rlimit, RLIMIT_CPU, offsetof(Settings, rlimit)
+Exec.LimitFSIZE, config_parse_rlimit, RLIMIT_FSIZE, offsetof(Settings, rlimit)
+Exec.LimitDATA, config_parse_rlimit, RLIMIT_DATA, offsetof(Settings, rlimit)
+Exec.LimitSTACK, config_parse_rlimit, RLIMIT_STACK, offsetof(Settings, rlimit)
+Exec.LimitCORE, config_parse_rlimit, RLIMIT_CORE, offsetof(Settings, rlimit)
+Exec.LimitRSS, config_parse_rlimit, RLIMIT_RSS, offsetof(Settings, rlimit)
+Exec.LimitNOFILE, config_parse_rlimit, RLIMIT_NOFILE, offsetof(Settings, rlimit)
+Exec.LimitAS, config_parse_rlimit, RLIMIT_AS, offsetof(Settings, rlimit)
+Exec.LimitNPROC, config_parse_rlimit, RLIMIT_NPROC, offsetof(Settings, rlimit)
+Exec.LimitMEMLOCK, config_parse_rlimit, RLIMIT_MEMLOCK, offsetof(Settings, rlimit)
+Exec.LimitLOCKS, config_parse_rlimit, RLIMIT_LOCKS, offsetof(Settings, rlimit)
+Exec.LimitSIGPENDING, config_parse_rlimit, RLIMIT_SIGPENDING, offsetof(Settings, rlimit)
+Exec.LimitMSGQUEUE, config_parse_rlimit, RLIMIT_MSGQUEUE, offsetof(Settings, rlimit)
+Exec.LimitNICE, config_parse_rlimit, RLIMIT_NICE, offsetof(Settings, rlimit)
+Exec.LimitRTPRIO, config_parse_rlimit, RLIMIT_RTPRIO, offsetof(Settings, rlimit)
+Exec.LimitRTTIME, config_parse_rlimit, RLIMIT_RTTIME, offsetof(Settings, rlimit)
+Exec.Hostname, config_parse_hostname, 0, offsetof(Settings, hostname)
+Exec.NoNewPrivileges, config_parse_tristate, 0, offsetof(Settings, no_new_privileges)
+Exec.OOMScoreAdjust, config_parse_oom_score_adjust, 0, 0
+Exec.CPUAffinity, config_parse_cpu_affinity, 0, 0
+Exec.ResolvConf, config_parse_resolv_conf, 0, offsetof(Settings, resolv_conf)
+Exec.LinkJournal, config_parse_link_journal, 0, 0
+Exec.Timezone, config_parse_timezone, 0, offsetof(Settings, timezone)
+Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only)
+Files.Volatile, config_parse_volatile_mode, 0, offsetof(Settings, volatile_mode)
+Files.Bind, config_parse_bind, 0, 0
+Files.BindReadOnly, config_parse_bind, 1, 0
+Files.TemporaryFileSystem, config_parse_tmpfs, 0, 0
+Files.Overlay, config_parse_overlay, 0, 0
+Files.OverlayReadOnly, config_parse_overlay, 1, 0
+Files.PrivateUsersChown, config_parse_tristate, 0, offsetof(Settings, userns_chown)
+Network.Private, config_parse_tristate, 0, offsetof(Settings, private_network)
+Network.Interface, config_parse_strv, 0, offsetof(Settings, network_interfaces)
+Network.MACVLAN, config_parse_strv, 0, offsetof(Settings, network_macvlan)
+Network.IPVLAN, config_parse_strv, 0, offsetof(Settings, network_ipvlan)
+Network.VirtualEthernet, config_parse_tristate, 0, offsetof(Settings, network_veth)
+Network.VirtualEthernetExtra, config_parse_veth_extra, 0, 0
+Network.Bridge, config_parse_ifname, 0, offsetof(Settings, network_bridge)
+Network.Zone, config_parse_network_zone, 0, 0
+Network.Port, config_parse_expose_port, 0, 0
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
new file mode 100644
index 0000000..a9af889
--- /dev/null
+++ b/src/nspawn/nspawn-mount.c
@@ -0,0 +1,1075 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mount.h>
+#include <linux/magic.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "label.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "nspawn-mount.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "set.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t) {
+ CustomMount *c, *ret;
+
+ assert(l);
+ assert(n);
+ assert(t >= 0);
+ assert(t < _CUSTOM_MOUNT_TYPE_MAX);
+
+ c = reallocarray(*l, *n + 1, sizeof(CustomMount));
+ if (!c)
+ return NULL;
+
+ *l = c;
+ ret = *l + *n;
+ (*n)++;
+
+ *ret = (CustomMount) { .type = t };
+
+ return ret;
+}
+
+void custom_mount_free_all(CustomMount *l, size_t n) {
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ CustomMount *m = l + i;
+
+ free(m->source);
+ free(m->destination);
+ free(m->options);
+
+ if (m->work_dir) {
+ (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+ free(m->work_dir);
+ }
+
+ if (m->rm_rf_tmpdir) {
+ (void) rm_rf(m->rm_rf_tmpdir, REMOVE_ROOT|REMOVE_PHYSICAL);
+ free(m->rm_rf_tmpdir);
+ }
+
+ strv_free(m->lower);
+ }
+
+ free(l);
+}
+
+static int custom_mount_compare(const CustomMount *a, const CustomMount *b) {
+ int r;
+
+ r = path_compare(a->destination, b->destination);
+ if (r != 0)
+ return r;
+
+ return CMP(a->type, b->type);
+}
+
+static bool source_path_is_valid(const char *p) {
+ assert(p);
+
+ if (*p == '+')
+ p++;
+
+ return path_is_absolute(p);
+}
+
+static char *resolve_source_path(const char *dest, const char *source) {
+
+ if (!source)
+ return NULL;
+
+ if (source[0] == '+')
+ return prefix_root(dest, source + 1);
+
+ return strdup(source);
+}
+
+int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n) {
+ size_t i;
+ int r;
+
+ /* Prepare all custom mounts. This will make source we know all temporary directories. This is called in the
+ * parent process, so that we know the temporary directories to remove on exit before we fork off the
+ * children. */
+
+ assert(l || n == 0);
+
+ /* Order the custom mounts, and make sure we have a working directory */
+ typesafe_qsort(l, n, custom_mount_compare);
+
+ for (i = 0; i < n; i++) {
+ CustomMount *m = l + i;
+
+ if (m->source) {
+ char *s;
+
+ s = resolve_source_path(dest, m->source);
+ if (!s)
+ return log_oom();
+
+ free_and_replace(m->source, s);
+ } else {
+ /* No source specified? In that case, use a throw-away temporary directory in /var/tmp */
+
+ m->rm_rf_tmpdir = strdup("/var/tmp/nspawn-temp-XXXXXX");
+ if (!m->rm_rf_tmpdir)
+ return log_oom();
+
+ if (!mkdtemp(m->rm_rf_tmpdir)) {
+ m->rm_rf_tmpdir = mfree(m->rm_rf_tmpdir);
+ return log_error_errno(errno, "Failed to acquire temporary directory: %m");
+ }
+
+ m->source = strjoin(m->rm_rf_tmpdir, "/src");
+ if (!m->source)
+ return log_oom();
+
+ if (mkdir(m->source, 0755) < 0)
+ return log_error_errno(errno, "Failed to create %s: %m", m->source);
+ }
+
+ if (m->type == CUSTOM_MOUNT_OVERLAY) {
+ char **j;
+
+ STRV_FOREACH(j, m->lower) {
+ char *s;
+
+ s = resolve_source_path(dest, *j);
+ if (!s)
+ return log_oom();
+
+ free_and_replace(*j, s);
+ }
+
+ if (m->work_dir) {
+ char *s;
+
+ s = resolve_source_path(dest, m->work_dir);
+ if (!s)
+ return log_oom();
+
+ free_and_replace(m->work_dir, s);
+ } else {
+ assert(m->source);
+
+ r = tempfn_random(m->source, NULL, &m->work_dir);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire working directory: %m");
+ }
+
+ (void) mkdir_label(m->work_dir, 0700);
+ }
+ }
+
+ return 0;
+}
+
+int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
+ _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
+ const char *p = s;
+ CustomMount *m;
+ int r;
+
+ assert(l);
+ assert(n);
+
+ r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+ if (r == 1) {
+ destination = strdup(source[0] == '+' ? source+1 : source);
+ if (!destination)
+ return -ENOMEM;
+ }
+ if (r == 2 && !isempty(p)) {
+ opts = strdup(p);
+ if (!opts)
+ return -ENOMEM;
+ }
+
+ if (isempty(source))
+ source = NULL;
+ else if (!source_path_is_valid(source))
+ return -EINVAL;
+
+ if (!path_is_absolute(destination))
+ return -EINVAL;
+
+ m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
+ if (!m)
+ return -ENOMEM;
+
+ m->source = source;
+ m->destination = destination;
+ m->read_only = read_only;
+ m->options = opts;
+
+ source = destination = opts = NULL;
+ return 0;
+}
+
+int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s) {
+ _cleanup_free_ char *path = NULL, *opts = NULL;
+ const char *p = s;
+ CustomMount *m;
+ int r;
+
+ assert(l);
+ assert(n);
+ assert(s);
+
+ r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ if (isempty(p))
+ opts = strdup("mode=0755");
+ else
+ opts = strdup(p);
+ if (!opts)
+ return -ENOMEM;
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
+ if (!m)
+ return -ENOMEM;
+
+ m->destination = TAKE_PTR(path);
+ m->options = TAKE_PTR(opts);
+
+ return 0;
+}
+
+int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
+ _cleanup_free_ char *upper = NULL, *destination = NULL;
+ _cleanup_strv_free_ char **lower = NULL;
+ CustomMount *m;
+ int k;
+
+ k = strv_split_extract(&lower, s, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (k < 0)
+ return k;
+ if (k < 2)
+ return -EADDRNOTAVAIL;
+ if (k == 2) {
+ /* If two parameters are specified, the first one is the lower, the second one the upper directory. And
+ * we'll also define the destination mount point the same as the upper. */
+
+ if (!source_path_is_valid(lower[0]) ||
+ !source_path_is_valid(lower[1]))
+ return -EINVAL;
+
+ upper = TAKE_PTR(lower[1]);
+
+ destination = strdup(upper[0] == '+' ? upper+1 : upper); /* take the destination without "+" prefix */
+ if (!destination)
+ return -ENOMEM;
+ } else {
+ char **i;
+
+ /* If more than two parameters are specified, the last one is the destination, the second to last one
+ * the "upper", and all before that the "lower" directories. */
+
+ destination = lower[k - 1];
+ upper = TAKE_PTR(lower[k - 2]);
+
+ STRV_FOREACH(i, lower)
+ if (!source_path_is_valid(*i))
+ return -EINVAL;
+
+ /* If the upper directory is unspecified, then let's create it automatically as a throw-away directory
+ * in /var/tmp */
+ if (isempty(upper))
+ upper = NULL;
+ else if (!source_path_is_valid(upper))
+ return -EINVAL;
+
+ if (!path_is_absolute(destination))
+ return -EINVAL;
+ }
+
+ m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY);
+ if (!m)
+ return -ENOMEM;
+
+ m->destination = TAKE_PTR(destination);
+ m->source = TAKE_PTR(upper);
+ m->lower = TAKE_PTR(lower);
+ m->read_only = read_only;
+
+ return 0;
+}
+
+int tmpfs_patch_options(
+ const char *options,
+ uid_t uid_shift,
+ const char *selinux_apifs_context,
+ char **ret) {
+
+ char *buf = NULL;
+
+ if (uid_shift != UID_INVALID) {
+ if (asprintf(&buf, "%s%suid=" UID_FMT ",gid=" UID_FMT,
+ strempty(options), options ? "," : "",
+ uid_shift, uid_shift) < 0)
+ return -ENOMEM;
+
+ options = buf;
+ }
+
+#if HAVE_SELINUX
+ if (selinux_apifs_context) {
+ char *t;
+
+ t = strjoin(strempty(options), options ? "," : "",
+ "context=\"", selinux_apifs_context, "\"");
+ free(buf);
+ if (!t)
+ return -ENOMEM;
+
+ buf = t;
+ }
+#endif
+
+ if (!buf && options) {
+ buf = strdup(options);
+ if (!buf)
+ return -ENOMEM;
+ }
+ *ret = buf;
+
+ return !!buf;
+}
+
+int mount_sysfs(const char *dest, MountSettingsMask mount_settings) {
+ const char *full, *top, *x;
+ int r;
+ unsigned long extra_flags = 0;
+
+ top = prefix_roota(dest, "/sys");
+ r = path_is_fs_type(top, SYSFS_MAGIC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
+ /* /sys might already be mounted as sysfs by the outer child in the
+ * !netns case. In this case, it's all good. Don't touch it because we
+ * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
+ */
+ if (r > 0)
+ return 0;
+
+ full = prefix_roota(top, "/full");
+
+ (void) mkdir(full, 0755);
+
+ if (mount_settings & MOUNT_APPLY_APIVFS_RO)
+ extra_flags |= MS_RDONLY;
+
+ r = mount_verbose(LOG_ERR, "sysfs", full, "sysfs",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV|extra_flags, NULL);
+ if (r < 0)
+ return r;
+
+ FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
+ _cleanup_free_ char *from = NULL, *to = NULL;
+
+ from = prefix_root(full, x);
+ if (!from)
+ return log_oom();
+
+ to = prefix_root(top, x);
+ if (!to)
+ return log_oom();
+
+ (void) mkdir(to, 0755);
+
+ r = mount_verbose(LOG_ERR, from, to, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ r = mount_verbose(LOG_ERR, NULL, to, NULL,
+ MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = umount_verbose(full);
+ if (r < 0)
+ return r;
+
+ if (rmdir(full) < 0)
+ return log_error_errno(errno, "Failed to remove %s: %m", full);
+
+ /* Create mountpoint for cgroups. Otherwise we are not allowed since we
+ * remount /sys read-only.
+ */
+ x = prefix_roota(top, "/fs/cgroup");
+ (void) mkdir_p(x, 0755);
+
+ return mount_verbose(LOG_ERR, NULL, top, NULL,
+ MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
+}
+
+static int mkdir_userns(const char *path, mode_t mode, uid_t uid_shift) {
+ int r;
+
+ assert(path);
+
+ r = mkdir_errno_wrapper(path, mode);
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ if (uid_shift == UID_INVALID)
+ return 0;
+
+ if (lchown(path, uid_shift, uid_shift) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int mkdir_userns_p(const char *prefix, const char *path, mode_t mode, uid_t uid_shift) {
+ const char *p, *e;
+ int r;
+
+ assert(path);
+
+ if (prefix && !path_startswith(path, prefix))
+ return -ENOTDIR;
+
+ /* create every parent directory in the path, except the last component */
+ p = path + strspn(path, "/");
+ for (;;) {
+ char t[strlen(path) + 1];
+
+ e = p + strcspn(p, "/");
+ p = e + strspn(e, "/");
+
+ /* Is this the last component? If so, then we're done */
+ if (*p == 0)
+ break;
+
+ memcpy(t, path, e - path);
+ t[e-path] = 0;
+
+ if (prefix && path_startswith(prefix, t))
+ continue;
+
+ r = mkdir_userns(t, mode, uid_shift);
+ if (r < 0)
+ return r;
+ }
+
+ return mkdir_userns(path, mode, uid_shift);
+}
+
+int mount_all(const char *dest,
+ MountSettingsMask mount_settings,
+ uid_t uid_shift,
+ const char *selinux_apifs_context) {
+
+#define PROC_INACCESSIBLE(path) \
+ { NULL, (path), NULL, NULL, MS_BIND, \
+ MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_INACCESSIBLE_REG }, /* Bind mount first ... */ \
+ { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
+ MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
+
+#define PROC_READ_ONLY(path) \
+ { (path), (path), NULL, NULL, MS_BIND, \
+ MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
+ { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
+ MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
+
+ typedef struct MountPoint {
+ const char *what;
+ const char *where;
+ const char *type;
+ const char *options;
+ unsigned long flags;
+ MountSettingsMask mount_settings;
+ } MountPoint;
+
+ static const MountPoint mount_table[] = {
+ /* First we list inner child mounts (i.e. mounts applied *after* entering user namespacing) */
+ { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ MOUNT_FATAL|MOUNT_IN_USERNS },
+
+ { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND,
+ MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */
+
+ { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND,
+ MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */
+
+ { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
+ MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* ... then, make it r/o */
+
+ /* Make these files inaccessible to container payloads: they potentially leak information about kernel
+ * internals or the host's execution environment to the container */
+ PROC_INACCESSIBLE("/proc/kallsyms"),
+ PROC_INACCESSIBLE("/proc/kcore"),
+ PROC_INACCESSIBLE("/proc/keys"),
+ PROC_INACCESSIBLE("/proc/sysrq-trigger"),
+ PROC_INACCESSIBLE("/proc/timer_list"),
+
+ /* Make these directories read-only to container payloads: they show hardware information, and in some
+ * cases contain tunables the container really shouldn't have access to. */
+ PROC_READ_ONLY("/proc/acpi"),
+ PROC_READ_ONLY("/proc/apm"),
+ PROC_READ_ONLY("/proc/asound"),
+ PROC_READ_ONLY("/proc/bus"),
+ PROC_READ_ONLY("/proc/fs"),
+ PROC_READ_ONLY("/proc/irq"),
+ PROC_READ_ONLY("/proc/scsi"),
+
+ /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing) */
+ { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP },
+ { "tmpfs", "/sys", "tmpfs", "mode=555", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS },
+ { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO }, /* skipped if above was mounted */
+ { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ MOUNT_FATAL }, /* skipped if above was mounted */
+ { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
+ MOUNT_FATAL },
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ MOUNT_FATAL },
+ { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ MOUNT_FATAL },
+ { "mqueue", "/dev/mqueue", "mqueue", NULL, 0,
+ MOUNT_FATAL },
+
+#if HAVE_SELINUX
+ { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,
+ 0 }, /* Bind mount first */
+ { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
+ 0 }, /* Then, make it r/o */
+#endif
+ };
+
+ _cleanup_(unlink_and_freep) char *inaccessible = NULL;
+ bool use_userns = (mount_settings & MOUNT_USE_USERNS);
+ bool netns = (mount_settings & MOUNT_APPLY_APIVFS_NETNS);
+ bool ro = (mount_settings & MOUNT_APPLY_APIVFS_RO);
+ bool in_userns = (mount_settings & MOUNT_IN_USERNS);
+ bool tmpfs_tmp = (mount_settings & MOUNT_APPLY_TMPFS_TMP);
+ size_t k;
+ int r;
+
+ for (k = 0; k < ELEMENTSOF(mount_table); k++) {
+ _cleanup_free_ char *where = NULL, *options = NULL;
+ const char *o, *what;
+ bool fatal = (mount_table[k].mount_settings & MOUNT_FATAL);
+
+ if (in_userns != (bool)(mount_table[k].mount_settings & MOUNT_IN_USERNS))
+ continue;
+
+ if (!netns && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_NETNS))
+ continue;
+
+ if (!ro && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_RO))
+ continue;
+
+ if (!tmpfs_tmp && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_TMPFS_TMP))
+ continue;
+
+ r = chase_symlinks(mount_table[k].where, dest, CHASE_NONEXISTENT|CHASE_PREFIX_ROOT, &where);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, mount_table[k].where);
+
+ if (mount_table[k].mount_settings & MOUNT_INACCESSIBLE_REG) {
+
+ if (!inaccessible) {
+ _cleanup_free_ char *np = NULL;
+
+ r = tempfn_random_child(NULL, "inaccessible", &np);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate inaccessible file node path: %m");
+
+ r = touch_file(np, false, USEC_INFINITY, UID_INVALID, GID_INVALID, 0000);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create inaccessible file node '%s': %m", np);
+
+ inaccessible = TAKE_PTR(np);
+ }
+
+ what = inaccessible;
+ } else
+ what = mount_table[k].what;
+
+ r = path_is_mount_point(where, NULL, 0);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
+
+ /* Skip this entry if it is not a remount. */
+ if (what && r > 0)
+ continue;
+
+ r = mkdir_userns_p(dest, where, 0755, (use_userns && !in_userns) ? uid_shift : UID_INVALID);
+ if (r < 0 && r != -EEXIST) {
+ if (fatal && r != -EROFS)
+ return log_error_errno(r, "Failed to create directory %s: %m", where);
+
+ log_debug_errno(r, "Failed to create directory %s: %m", where);
+ /* If we failed mkdir() or chown() due to the root
+ * directory being read only, attempt to mount this fs
+ * anyway and let mount_verbose log any errors */
+ if (r != -EROFS)
+ continue;
+ }
+
+ o = mount_table[k].options;
+ if (streq_ptr(mount_table[k].type, "tmpfs")) {
+ r = tmpfs_patch_options(o, in_userns ? 0 : uid_shift, selinux_apifs_context, &options);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ o = options;
+ }
+
+ r = mount_verbose(fatal ? LOG_ERR : LOG_DEBUG,
+ what,
+ where,
+ mount_table[k].type,
+ mount_table[k].flags,
+ o);
+ if (r < 0 && fatal)
+ return r;
+ }
+
+ return 0;
+}
+
+static int mount_bind(const char *dest, CustomMount *m) {
+
+ _cleanup_free_ char *where = NULL;
+ struct stat source_st, dest_st;
+ int r;
+
+ assert(dest);
+ assert(m);
+
+ if (stat(m->source, &source_st) < 0)
+ return log_error_errno(errno, "Failed to stat %s: %m", m->source);
+
+ r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
+ if (r > 0) { /* Path exists already? */
+
+ if (stat(where, &dest_st) < 0)
+ return log_error_errno(errno, "Failed to stat %s: %m", where);
+
+ if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot bind mount directory %s on file %s.",
+ m->source, where);
+
+ if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot bind mount file %s on directory %s.",
+ m->source, where);
+
+ } else { /* Path doesn't exist yet? */
+ r = mkdir_parents_label(where, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make parents of %s: %m", where);
+
+ /* Create the mount point. Any non-directory file can be
+ * mounted on any non-directory file (regular, fifo, socket,
+ * char, block).
+ */
+ if (S_ISDIR(source_st.st_mode))
+ r = mkdir_label(where, 0755);
+ else
+ r = touch(where);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create mount point %s: %m", where);
+
+ }
+
+ r = mount_verbose(LOG_ERR, m->source, where, NULL, MS_BIND | MS_REC, m->options);
+ if (r < 0)
+ return r;
+
+ if (m->read_only) {
+ r = bind_remount_recursive(where, true, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Read-only bind mount failed: %m");
+ }
+
+ return 0;
+}
+
+static int mount_tmpfs(
+ const char *dest,
+ CustomMount *m,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ const char *options;
+ _cleanup_free_ char *buf = NULL, *where = NULL;
+ int r;
+
+ assert(dest);
+ assert(m);
+
+ r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
+ if (r == 0) { /* Doesn't exist yet? */
+ r = mkdir_p_label(where, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
+ }
+
+ r = tmpfs_patch_options(m->options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
+ if (r < 0)
+ return log_oom();
+ options = r > 0 ? buf : m->options;
+
+ return mount_verbose(LOG_ERR, "tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options);
+}
+
+static char *joined_and_escaped_lower_dirs(char **lower) {
+ _cleanup_strv_free_ char **sv = NULL;
+
+ sv = strv_copy(lower);
+ if (!sv)
+ return NULL;
+
+ strv_reverse(sv);
+
+ if (!strv_shell_escape(sv, ",:"))
+ return NULL;
+
+ return strv_join(sv, ":");
+}
+
+static int mount_overlay(const char *dest, CustomMount *m) {
+
+ _cleanup_free_ char *lower = NULL, *where = NULL, *escaped_source = NULL;
+ const char *options;
+ int r;
+
+ assert(dest);
+ assert(m);
+
+ r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
+ if (r == 0) { /* Doesn't exist yet? */
+ r = mkdir_label(where, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
+ }
+
+ (void) mkdir_p_label(m->source, 0755);
+
+ lower = joined_and_escaped_lower_dirs(m->lower);
+ if (!lower)
+ return log_oom();
+
+ escaped_source = shell_escape(m->source, ",:");
+ if (!escaped_source)
+ return log_oom();
+
+ if (m->read_only)
+ options = strjoina("lowerdir=", escaped_source, ":", lower);
+ else {
+ _cleanup_free_ char *escaped_work_dir = NULL;
+
+ escaped_work_dir = shell_escape(m->work_dir, ",:");
+ if (!escaped_work_dir)
+ return log_oom();
+
+ options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
+ }
+
+ return mount_verbose(LOG_ERR, "overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options);
+}
+
+int mount_custom(
+ const char *dest,
+ CustomMount *mounts, size_t n,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ size_t i;
+ int r;
+
+ assert(dest);
+
+ for (i = 0; i < n; i++) {
+ CustomMount *m = mounts + i;
+
+ switch (m->type) {
+
+ case CUSTOM_MOUNT_BIND:
+ r = mount_bind(dest, m);
+ break;
+
+ case CUSTOM_MOUNT_TMPFS:
+ r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
+ break;
+
+ case CUSTOM_MOUNT_OVERLAY:
+ r = mount_overlay(dest, m);
+ break;
+
+ default:
+ assert_not_reached("Unknown custom mount type");
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int setup_volatile_state(
+ const char *directory,
+ VolatileMode mode,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ _cleanup_free_ char *buf = NULL;
+ const char *p, *options;
+ int r;
+
+ assert(directory);
+
+ if (mode != VOLATILE_STATE)
+ return 0;
+
+ /* --volatile=state means we simply overmount /var
+ with a tmpfs, and the rest read-only. */
+
+ r = bind_remount_recursive(directory, true, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
+
+ p = prefix_roota(directory, "/var");
+ r = mkdir(p, 0755);
+ if (r < 0 && errno != EEXIST)
+ return log_error_errno(errno, "Failed to create %s: %m", directory);
+
+ options = "mode=755";
+ r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ options = buf;
+
+ return mount_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options);
+}
+
+int setup_volatile(
+ const char *directory,
+ VolatileMode mode,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ bool tmpfs_mounted = false, bind_mounted = false;
+ char template[] = "/tmp/nspawn-volatile-XXXXXX";
+ _cleanup_free_ char *buf = NULL;
+ const char *f, *t, *options;
+ int r;
+
+ assert(directory);
+
+ if (mode != VOLATILE_YES)
+ return 0;
+
+ /* --volatile=yes means we mount a tmpfs to the root dir, and
+ the original /usr to use inside it, and that read-only. */
+
+ if (!mkdtemp(template))
+ return log_error_errno(errno, "Failed to create temporary directory: %m");
+
+ options = "mode=755";
+ r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ options = buf;
+
+ r = mount_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
+ if (r < 0)
+ goto fail;
+
+ tmpfs_mounted = true;
+
+ f = prefix_roota(directory, "/usr");
+ t = prefix_roota(template, "/usr");
+
+ r = mkdir(t, 0755);
+ if (r < 0 && errno != EEXIST) {
+ r = log_error_errno(errno, "Failed to create %s: %m", t);
+ goto fail;
+ }
+
+ r = mount_verbose(LOG_ERR, f, t, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ goto fail;
+
+ bind_mounted = true;
+
+ r = bind_remount_recursive(t, true, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to remount %s read-only: %m", t);
+ goto fail;
+ }
+
+ r = mount_verbose(LOG_ERR, template, directory, NULL, MS_MOVE, NULL);
+ if (r < 0)
+ goto fail;
+
+ (void) rmdir(template);
+
+ return 0;
+
+fail:
+ if (bind_mounted)
+ (void) umount_verbose(t);
+
+ if (tmpfs_mounted)
+ (void) umount_verbose(template);
+ (void) rmdir(template);
+ return r;
+}
+
+/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
+int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
+ _cleanup_free_ char *root_new = NULL, *root_old = NULL;
+ const char *p = s;
+ int r;
+
+ assert(pivot_root_new);
+ assert(pivot_root_old);
+
+ r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ if (isempty(p))
+ root_old = NULL;
+ else {
+ root_old = strdup(p);
+ if (!root_old)
+ return -ENOMEM;
+ }
+
+ if (!path_is_absolute(root_new))
+ return -EINVAL;
+ if (root_old && !path_is_absolute(root_old))
+ return -EINVAL;
+
+ free_and_replace(*pivot_root_new, root_new);
+ free_and_replace(*pivot_root_old, root_old);
+
+ return 0;
+}
+
+int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) {
+ _cleanup_free_ char *directory_pivot_root_new = NULL;
+ _cleanup_free_ char *pivot_tmp_pivot_root_old = NULL;
+ char pivot_tmp[] = "/tmp/nspawn-pivot-XXXXXX";
+ bool remove_pivot_tmp = false;
+ int r;
+
+ assert(directory);
+
+ if (!pivot_root_new)
+ return 0;
+
+ /* Pivot pivot_root_new to / and the existing / to pivot_root_old.
+ * If pivot_root_old is NULL, the existing / disappears.
+ * This requires a temporary directory, pivot_tmp, which is
+ * not a child of either.
+ *
+ * This is typically used for OSTree-style containers, where
+ * the root partition contains several sysroots which could be
+ * run. Normally, one would be chosen by the bootloader and
+ * pivoted to / by initramfs.
+ *
+ * For example, for an OSTree deployment, pivot_root_new
+ * would be: /ostree/deploy/$os/deploy/$checksum. Note that this
+ * code doesn’t do the /var mount which OSTree expects: use
+ * --bind +/sysroot/ostree/deploy/$os/var:/var for that.
+ *
+ * So in the OSTree case, we’ll end up with something like:
+ * - directory = /tmp/nspawn-root-123456
+ * - pivot_root_new = /ostree/deploy/os/deploy/123abc
+ * - pivot_root_old = /sysroot
+ * - directory_pivot_root_new =
+ * /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc
+ * - pivot_tmp = /tmp/nspawn-pivot-123456
+ * - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot
+ *
+ * Requires all file systems at directory and below to be mounted
+ * MS_PRIVATE or MS_SLAVE so they can be moved.
+ */
+ directory_pivot_root_new = prefix_root(directory, pivot_root_new);
+
+ /* Remount directory_pivot_root_new to make it movable. */
+ r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL);
+ if (r < 0)
+ goto done;
+
+ if (pivot_root_old) {
+ if (!mkdtemp(pivot_tmp)) {
+ r = log_error_errno(errno, "Failed to create temporary directory: %m");
+ goto done;
+ }
+
+ remove_pivot_tmp = true;
+ pivot_tmp_pivot_root_old = prefix_root(pivot_tmp, pivot_root_old);
+
+ r = mount_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL);
+ if (r < 0)
+ goto done;
+
+ r = mount_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL);
+ if (r < 0)
+ goto done;
+
+ r = mount_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL);
+ if (r < 0)
+ goto done;
+ } else {
+ r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL);
+ if (r < 0)
+ goto done;
+ }
+
+done:
+ if (remove_pivot_tmp)
+ (void) rmdir(pivot_tmp);
+
+ return r;
+}
diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h
new file mode 100644
index 0000000..8051a7d
--- /dev/null
+++ b/src/nspawn/nspawn-mount.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "cgroup-util.h"
+#include "volatile-util.h"
+
+typedef enum MountSettingsMask {
+ MOUNT_FATAL = 1 << 0, /* if set, a mount error is considered fatal */
+ MOUNT_USE_USERNS = 1 << 1, /* if set, mounts are patched considering uid/gid shifts in a user namespace */
+ MOUNT_IN_USERNS = 1 << 2, /* if set, the mount is executed in the inner child, otherwise in the outer child */
+ MOUNT_APPLY_APIVFS_RO = 1 << 3, /* if set, /proc/sys, and /sys will be mounted read-only, otherwise read-write. */
+ MOUNT_APPLY_APIVFS_NETNS = 1 << 4, /* if set, /proc/sys/net will be mounted read-write.
+ Works only if MOUNT_APPLY_APIVFS_RO is also set. */
+ MOUNT_INACCESSIBLE_REG = 1 << 5, /* if set, create an inaccessible regular file first and use as bind mount source */
+ MOUNT_APPLY_TMPFS_TMP = 1 << 6, /* if set, /tmp will be mounted as tmpfs */
+} MountSettingsMask;
+
+typedef enum CustomMountType {
+ CUSTOM_MOUNT_BIND,
+ CUSTOM_MOUNT_TMPFS,
+ CUSTOM_MOUNT_OVERLAY,
+ _CUSTOM_MOUNT_TYPE_MAX,
+ _CUSTOM_MOUNT_TYPE_INVALID = -1
+} CustomMountType;
+
+typedef struct CustomMount {
+ CustomMountType type;
+ bool read_only;
+ char *source; /* for overlayfs this is the upper directory */
+ char *destination;
+ char *options;
+ char *work_dir;
+ char **lower;
+ char *rm_rf_tmpdir;
+} CustomMount;
+
+CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t);
+void custom_mount_free_all(CustomMount *l, size_t n);
+int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n);
+
+int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only);
+int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s);
+int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only);
+
+int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, const char *selinux_apifs_context);
+int mount_sysfs(const char *dest, MountSettingsMask mount_settings);
+
+int mount_custom(const char *dest, CustomMount *mounts, size_t n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
+
+int setup_volatile(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
+int setup_volatile_state(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
+
+int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s);
+int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old);
+
+int tmpfs_patch_options(const char *options,uid_t uid_shift, const char *selinux_apifs_context, char **ret);
diff --git a/src/nspawn/nspawn-network.c b/src/nspawn/nspawn-network.c
new file mode 100644
index 0000000..c028b57
--- /dev/null
+++ b/src/nspawn/nspawn-network.c
@@ -0,0 +1,661 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <linux/veth.h>
+#include <net/if.h>
+#include <sys/file.h>
+
+#include "sd-device.h"
+#include "sd-id128.h"
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "ether-addr-util.h"
+#include "lockfile-util.h"
+#include "missing_network.h"
+#include "netlink-util.h"
+#include "nspawn-network.h"
+#include "siphash24.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+#define HOST_HASH_KEY SD_ID128_MAKE(1a,37,6f,c7,46,ec,45,0b,ad,a3,d5,31,06,60,5d,b1)
+#define CONTAINER_HASH_KEY SD_ID128_MAKE(c3,c4,f9,19,b5,57,b2,1c,e6,cf,14,27,03,9c,ee,a2)
+#define VETH_EXTRA_HOST_HASH_KEY SD_ID128_MAKE(48,c7,f6,b7,ea,9d,4c,9e,b7,28,d4,de,91,d5,bf,66)
+#define VETH_EXTRA_CONTAINER_HASH_KEY SD_ID128_MAKE(af,50,17,61,ce,f9,4d,35,84,0d,2b,20,54,be,ce,59)
+#define MACVLAN_HASH_KEY SD_ID128_MAKE(00,13,6d,bc,66,83,44,81,bb,0c,f9,51,1f,24,a6,6f)
+
+static int remove_one_link(sd_netlink *rtnl, const char *name) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ if (isempty(name))
+ return 0;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_DELLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r == -ENODEV) /* Already gone */
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to remove interface %s: %m", name);
+
+ return 1;
+}
+
+static int generate_mac(
+ const char *machine_name,
+ struct ether_addr *mac,
+ sd_id128_t hash_key,
+ uint64_t idx) {
+
+ uint64_t result;
+ size_t l, sz;
+ uint8_t *v, *i;
+ int r;
+
+ l = strlen(machine_name);
+ sz = sizeof(sd_id128_t) + l;
+ if (idx > 0)
+ sz += sizeof(idx);
+
+ v = newa(uint8_t, sz);
+
+ /* fetch some persistent data unique to the host */
+ r = sd_id128_get_machine((sd_id128_t*) v);
+ if (r < 0)
+ return r;
+
+ /* combine with some data unique (on this host) to this
+ * container instance */
+ i = mempcpy(v + sizeof(sd_id128_t), machine_name, l);
+ if (idx > 0) {
+ idx = htole64(idx);
+ memcpy(i, &idx, sizeof(idx));
+ }
+
+ /* Let's hash the host machine ID plus the container name. We
+ * use a fixed, but originally randomly created hash key here. */
+ result = htole64(siphash24(v, sz, hash_key.bytes));
+
+ assert_cc(ETH_ALEN <= sizeof(result));
+ memcpy(mac->ether_addr_octet, &result, ETH_ALEN);
+
+ /* see eth_random_addr in the kernel */
+ mac->ether_addr_octet[0] &= 0xfe; /* clear multicast bit */
+ mac->ether_addr_octet[0] |= 0x02; /* set local assignment bit (IEEE802) */
+
+ return 0;
+}
+
+static int add_veth(
+ sd_netlink *rtnl,
+ pid_t pid,
+ const char *ifname_host,
+ const struct ether_addr *mac_host,
+ const char *ifname_container,
+ const struct ether_addr *mac_container) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifname_host);
+ assert(mac_host);
+ assert(ifname_container);
+ assert(mac_container);
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, ifname_host);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, mac_host);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink MAC address: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "veth");
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container(m, VETH_INFO_PEER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, ifname_container);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, mac_container);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink MAC address: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink namespace field: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add new veth interfaces (%s:%s): %m", ifname_host, ifname_container);
+
+ return 0;
+}
+
+int setup_veth(const char *machine_name,
+ pid_t pid,
+ char iface_name[IFNAMSIZ],
+ bool bridge) {
+
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ struct ether_addr mac_host, mac_container;
+ int r, i;
+
+ assert(machine_name);
+ assert(pid > 0);
+ assert(iface_name);
+
+ /* Use two different interface name prefixes depending whether
+ * we are in bridge mode or not. */
+ snprintf(iface_name, IFNAMSIZ - 1, "%s-%s",
+ bridge ? "vb" : "ve", machine_name);
+
+ r = generate_mac(machine_name, &mac_container, CONTAINER_HASH_KEY, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate predictable MAC address for container side: %m");
+
+ r = generate_mac(machine_name, &mac_host, HOST_HASH_KEY, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate predictable MAC address for host side: %m");
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ r = add_veth(rtnl, pid, iface_name, &mac_host, "host0", &mac_container);
+ if (r < 0)
+ return r;
+
+ i = (int) if_nametoindex(iface_name);
+ if (i <= 0)
+ return log_error_errno(errno, "Failed to resolve interface %s: %m", iface_name);
+
+ return i;
+}
+
+int setup_veth_extra(
+ const char *machine_name,
+ pid_t pid,
+ char **pairs) {
+
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ uint64_t idx = 0;
+ char **a, **b;
+ int r;
+
+ assert(machine_name);
+ assert(pid > 0);
+
+ if (strv_isempty(pairs))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ STRV_FOREACH_PAIR(a, b, pairs) {
+ struct ether_addr mac_host, mac_container;
+
+ r = generate_mac(machine_name, &mac_container, VETH_EXTRA_CONTAINER_HASH_KEY, idx);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate predictable MAC address for container side of extra veth link: %m");
+
+ r = generate_mac(machine_name, &mac_host, VETH_EXTRA_HOST_HASH_KEY, idx);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate predictable MAC address for container side of extra veth link: %m");
+
+ r = add_veth(rtnl, pid, *a, &mac_host, *b, &mac_container);
+ if (r < 0)
+ return r;
+
+ idx++;
+ }
+
+ return 0;
+}
+
+static int join_bridge(sd_netlink *rtnl, const char *veth_name, const char *bridge_name) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r, bridge_ifi;
+
+ assert(rtnl);
+ assert(veth_name);
+ assert(bridge_name);
+
+ bridge_ifi = (int) if_nametoindex(bridge_name);
+ if (bridge_ifi <= 0)
+ return -errno;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_link_set_flags(m, IFF_UP, IFF_UP);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, veth_name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_u32(m, IFLA_MASTER, bridge_ifi);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return bridge_ifi;
+}
+
+static int create_bridge(sd_netlink *rtnl, const char *bridge_name) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, bridge_name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "bridge");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int setup_bridge(const char *veth_name, const char *bridge_name, bool create) {
+ _cleanup_(release_lock_file) LockFile bridge_lock = LOCK_FILE_INIT;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ int r, bridge_ifi;
+ unsigned n = 0;
+
+ assert(veth_name);
+ assert(bridge_name);
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ if (create) {
+ /* We take a system-wide lock here, so that we can safely check whether there's still a member in the
+ * bridge before removing it, without risking interference from other nspawn instances. */
+
+ r = make_lock_file("/run/systemd/nspawn-network-zone", LOCK_EX, &bridge_lock);
+ if (r < 0)
+ return log_error_errno(r, "Failed to take network zone lock: %m");
+ }
+
+ for (;;) {
+ bridge_ifi = join_bridge(rtnl, veth_name, bridge_name);
+ if (bridge_ifi >= 0)
+ return bridge_ifi;
+ if (bridge_ifi != -ENODEV || !create || n > 10)
+ return log_error_errno(bridge_ifi, "Failed to add interface %s to bridge %s: %m", veth_name, bridge_name);
+
+ /* Count attempts, so that we don't enter an endless loop here. */
+ n++;
+
+ /* The bridge doesn't exist yet. Let's create it */
+ r = create_bridge(rtnl, bridge_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bridge interface %s: %m", bridge_name);
+
+ /* Try again, now that the bridge exists */
+ }
+}
+
+int remove_bridge(const char *bridge_name) {
+ _cleanup_(release_lock_file) LockFile bridge_lock = LOCK_FILE_INIT;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ const char *path;
+ int r;
+
+ /* Removes the specified bridge, but only if it is currently empty */
+
+ if (isempty(bridge_name))
+ return 0;
+
+ r = make_lock_file("/run/systemd/nspawn-network-zone", LOCK_EX, &bridge_lock);
+ if (r < 0)
+ return log_error_errno(r, "Failed to take network zone lock: %m");
+
+ path = strjoina("/sys/class/net/", bridge_name, "/brif");
+
+ r = dir_is_empty(path);
+ if (r == -ENOENT) /* Already gone? */
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Can't detect if bridge %s is empty: %m", bridge_name);
+ if (r == 0) /* Still populated, leave it around */
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ return remove_one_link(rtnl, bridge_name);
+}
+
+static int parse_interface(const char *name) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ char ifi_str[2 + DECIMAL_STR_MAX(int)];
+ int ifi, r;
+
+ ifi = (int) if_nametoindex(name);
+ if (ifi <= 0)
+ return log_error_errno(errno, "Failed to resolve interface %s: %m", name);
+
+ sprintf(ifi_str, "n%i", ifi);
+ r = sd_device_new_from_device_id(&d, ifi_str);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device for interface %s: %m", name);
+
+ r = sd_device_get_is_initialized(d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether interface %s is initialized or not: %m", name);
+ if (r == 0) {
+ log_error("Network interface %s is not initialized yet.", name);
+ return -EBUSY;
+ }
+
+ return ifi;
+}
+
+int move_network_interfaces(pid_t pid, char **ifaces) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ char **i;
+ int r;
+
+ if (strv_isempty(ifaces))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ STRV_FOREACH(i, ifaces) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int ifi;
+
+ ifi = parse_interface(*i);
+ if (ifi < 0)
+ return ifi;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, ifi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append namespace PID to netlink message: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move interface %s to namespace: %m", *i);
+ }
+
+ return 0;
+}
+
+int setup_macvlan(const char *machine_name, pid_t pid, char **ifaces) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ unsigned idx = 0;
+ char **i;
+ int r;
+
+ if (strv_isempty(ifaces))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ STRV_FOREACH(i, ifaces) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ _cleanup_free_ char *n = NULL;
+ struct ether_addr mac;
+ int ifi;
+
+ ifi = parse_interface(*i);
+ if (ifi < 0)
+ return ifi;
+
+ r = generate_mac(machine_name, &mac, MACVLAN_HASH_KEY, idx++);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create MACVLAN MAC address: %m");
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_LINK, ifi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface index: %m");
+
+ n = strappend("mv-", *i);
+ if (!n)
+ return log_oom();
+
+ strshorten(n, IFNAMSIZ-1);
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, &mac);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink MAC address: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink namespace field: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "macvlan");
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_MACVLAN_MODE, MACVLAN_MODE_BRIDGE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append macvlan mode: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add new macvlan interfaces: %m");
+ }
+
+ return 0;
+}
+
+int setup_ipvlan(const char *machine_name, pid_t pid, char **ifaces) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ char **i;
+ int r;
+
+ if (strv_isempty(ifaces))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ STRV_FOREACH(i, ifaces) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ _cleanup_free_ char *n = NULL;
+ int ifi;
+
+ ifi = parse_interface(*i);
+ if (ifi < 0)
+ return ifi;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_LINK, ifi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface index: %m");
+
+ n = strappend("iv-", *i);
+ if (!n)
+ return log_oom();
+
+ strshorten(n, IFNAMSIZ-1);
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink namespace field: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "ipvlan");
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_IPVLAN_MODE, IPVLAN_MODE_L2);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add ipvlan mode: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add new ipvlan interfaces: %m");
+ }
+
+ return 0;
+}
+
+int veth_extra_parse(char ***l, const char *p) {
+ _cleanup_free_ char *a = NULL, *b = NULL;
+ int r;
+
+ r = extract_first_word(&p, &a, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0 || !ifname_valid(a))
+ return -EINVAL;
+
+ r = extract_first_word(&p, &b, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0 || !ifname_valid(b)) {
+ free(b);
+ b = strdup(a);
+ if (!b)
+ return -ENOMEM;
+ }
+
+ if (p)
+ return -EINVAL;
+
+ r = strv_push_pair(l, a, b);
+ if (r < 0)
+ return -ENOMEM;
+
+ a = b = NULL;
+ return 0;
+}
+
+int remove_veth_links(const char *primary, char **pairs) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ char **a, **b;
+ int r;
+
+ /* In some cases the kernel might pin the veth links between host and container even after the namespace
+ * died. Hence, let's better remove them explicitly too. */
+
+ if (isempty(primary) && strv_isempty(pairs))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ remove_one_link(rtnl, primary);
+
+ STRV_FOREACH_PAIR(a, b, pairs)
+ remove_one_link(rtnl, *a);
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-network.h b/src/nspawn/nspawn-network.h
new file mode 100644
index 0000000..32ea21c
--- /dev/null
+++ b/src/nspawn/nspawn-network.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <net/if.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+int setup_veth(const char *machine_name, pid_t pid, char iface_name[IFNAMSIZ], bool bridge);
+int setup_veth_extra(const char *machine_name, pid_t pid, char **pairs);
+
+int setup_bridge(const char *veth_name, const char *bridge_name, bool create);
+int remove_bridge(const char *bridge_name);
+
+int setup_macvlan(const char *machine_name, pid_t pid, char **ifaces);
+int setup_ipvlan(const char *machine_name, pid_t pid, char **ifaces);
+
+int move_network_interfaces(pid_t pid, char **ifaces);
+
+int veth_extra_parse(char ***l, const char *p);
+
+int remove_veth_links(const char *primary, char **pairs);
diff --git a/src/nspawn/nspawn-patch-uid.c b/src/nspawn/nspawn-patch-uid.c
new file mode 100644
index 0000000..db2f546
--- /dev/null
+++ b/src/nspawn/nspawn-patch-uid.c
@@ -0,0 +1,490 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <linux/magic.h>
+#if HAVE_ACL
+#include <sys/acl.h>
+#endif
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#include "acl-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "missing.h"
+#include "nspawn-def.h"
+#include "nspawn-patch-uid.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+#if HAVE_ACL
+
+static int get_acl(int fd, const char *name, acl_type_t type, acl_t *ret) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ acl_t acl;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (name) {
+ _cleanup_close_ int child_fd = -1;
+
+ child_fd = openat(fd, name, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (child_fd < 0)
+ return -errno;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", child_fd);
+ acl = acl_get_file(procfs_path, type);
+ } else if (type == ACL_TYPE_ACCESS)
+ acl = acl_get_fd(fd);
+ else {
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ acl = acl_get_file(procfs_path, type);
+ }
+ if (!acl)
+ return -errno;
+
+ *ret = acl;
+ return 0;
+}
+
+static int set_acl(int fd, const char *name, acl_type_t type, acl_t acl) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ int r;
+
+ assert(fd >= 0);
+ assert(acl);
+
+ if (name) {
+ _cleanup_close_ int child_fd = -1;
+
+ child_fd = openat(fd, name, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (child_fd < 0)
+ return -errno;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", child_fd);
+ r = acl_set_file(procfs_path, type, acl);
+ } else if (type == ACL_TYPE_ACCESS)
+ r = acl_set_fd(fd, acl);
+ else {
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ r = acl_set_file(procfs_path, type, acl);
+ }
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int shift_acl(acl_t acl, uid_t shift, acl_t *ret) {
+ _cleanup_(acl_freep) acl_t copy = NULL;
+ acl_entry_t i;
+ int r;
+
+ assert(acl);
+ assert(ret);
+
+ r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ if (r < 0)
+ return -errno;
+ while (r > 0) {
+ uid_t *old_uid, new_uid;
+ bool modify = false;
+ acl_tag_t tag;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (IN_SET(tag, ACL_USER, ACL_GROUP)) {
+
+ /* We don't distuingish here between uid_t and gid_t, let's make sure the compiler checks that
+ * this is actually OK */
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+
+ old_uid = acl_get_qualifier(i);
+ if (!old_uid)
+ return -errno;
+
+ new_uid = shift | (*old_uid & UINT32_C(0xFFFF));
+ if (!uid_is_valid(new_uid))
+ return -EINVAL;
+
+ modify = new_uid != *old_uid;
+ if (modify && !copy) {
+ int n;
+
+ /* There's no copy of the ACL yet? if so, let's create one, and start the loop from the
+ * beginning, so that we copy all entries, starting from the first, this time. */
+
+ n = acl_entries(acl);
+ if (n < 0)
+ return -errno;
+
+ copy = acl_init(n);
+ if (!copy)
+ return -errno;
+
+ /* Seek back to the beginning */
+ r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ if (r < 0)
+ return -errno;
+ continue;
+ }
+ }
+
+ if (copy) {
+ acl_entry_t new_entry;
+
+ if (acl_create_entry(&copy, &new_entry) < 0)
+ return -errno;
+
+ if (acl_copy_entry(new_entry, i) < 0)
+ return -errno;
+
+ if (modify)
+ if (acl_set_qualifier(new_entry, &new_uid) < 0)
+ return -errno;
+ }
+
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i);
+ if (r < 0)
+ return -errno;
+ }
+
+ *ret = TAKE_PTR(copy);
+
+ return !!*ret;
+}
+
+static int patch_acls(int fd, const char *name, const struct stat *st, uid_t shift) {
+ _cleanup_(acl_freep) acl_t acl = NULL, shifted = NULL;
+ bool changed = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(st);
+
+ /* ACLs are not supported on symlinks, there's no point in trying */
+ if (S_ISLNK(st->st_mode))
+ return 0;
+
+ r = get_acl(fd, name, ACL_TYPE_ACCESS, &acl);
+ if (r == -EOPNOTSUPP)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = shift_acl(acl, shift, &shifted);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ r = set_acl(fd, name, ACL_TYPE_ACCESS, shifted);
+ if (r < 0)
+ return r;
+
+ changed = true;
+ }
+
+ if (S_ISDIR(st->st_mode)) {
+ acl_free(acl);
+ acl_free(shifted);
+
+ acl = shifted = NULL;
+
+ r = get_acl(fd, name, ACL_TYPE_DEFAULT, &acl);
+ if (r < 0)
+ return r;
+
+ r = shift_acl(acl, shift, &shifted);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ r = set_acl(fd, name, ACL_TYPE_DEFAULT, shifted);
+ if (r < 0)
+ return r;
+
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+#else
+
+static int patch_acls(int fd, const char *name, const struct stat *st, uid_t shift) {
+ return 0;
+}
+
+#endif
+
+static int patch_fd(int fd, const char *name, const struct stat *st, uid_t shift) {
+ uid_t new_uid;
+ gid_t new_gid;
+ bool changed = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(st);
+
+ new_uid = shift | (st->st_uid & UINT32_C(0xFFFF));
+ new_gid = (gid_t) shift | (st->st_gid & UINT32_C(0xFFFF));
+
+ if (!uid_is_valid(new_uid) || !gid_is_valid(new_gid))
+ return -EINVAL;
+
+ if (st->st_uid != new_uid || st->st_gid != new_gid) {
+ if (name)
+ r = fchownat(fd, name, new_uid, new_gid, AT_SYMLINK_NOFOLLOW);
+ else
+ r = fchown(fd, new_uid, new_gid);
+ if (r < 0)
+ return -errno;
+
+ /* The Linux kernel alters the mode in some cases of chown(). Let's undo this. */
+ if (name) {
+ if (!S_ISLNK(st->st_mode))
+ r = fchmodat(fd, name, st->st_mode, 0);
+ else /* AT_SYMLINK_NOFOLLOW is not available for fchmodat() */
+ r = 0;
+ } else
+ r = fchmod(fd, st->st_mode);
+ if (r < 0)
+ return -errno;
+
+ changed = true;
+ }
+
+ r = patch_acls(fd, name, st, shift);
+ if (r < 0)
+ return r;
+
+ return r > 0 || changed;
+}
+
+/*
+ * Check if the filesystem is fully compatible with user namespaces or
+ * UID/GID patching. Some filesystems in this list can be fully mounted inside
+ * user namespaces, however their inodes may relate to host resources or only
+ * valid in the global user namespace, therefore no patching should be applied.
+ */
+static int is_fs_fully_userns_compatible(const struct statfs *sfs) {
+
+ assert(sfs);
+
+ return F_TYPE_EQUAL(sfs->f_type, BINFMTFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, CGROUP_SUPER_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, CGROUP2_SUPER_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, DEBUGFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, DEVPTS_SUPER_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, EFIVARFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, HUGETLBFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, MQUEUE_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, PROC_SUPER_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, PSTOREFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, SELINUX_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, SMACK_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, SECURITYFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, BPF_FS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, TRACEFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, SYSFS_MAGIC);
+}
+
+static int recurse_fd(int fd, bool donate_fd, const struct stat *st, uid_t shift, bool is_toplevel) {
+ _cleanup_closedir_ DIR *d = NULL;
+ bool changed = false;
+ struct statfs sfs;
+ int r;
+
+ assert(fd >= 0);
+
+ if (fstatfs(fd, &sfs) < 0)
+ return -errno;
+
+ /* We generally want to permit crossing of mount boundaries when patching the UIDs/GIDs. However, we probably
+ * shouldn't do this for /proc and /sys if that is already mounted into place. Hence, let's stop the recursion
+ * when we hit procfs, sysfs or some other special file systems. */
+
+ r = is_fs_fully_userns_compatible(&sfs);
+ if (r < 0)
+ goto finish;
+ if (r > 0) {
+ r = 0; /* don't recurse */
+ goto finish;
+ }
+
+ /* Also, if we hit a read-only file system, then don't bother, skip the whole subtree */
+ if ((sfs.f_flags & ST_RDONLY) ||
+ access_fd(fd, W_OK) == -EROFS)
+ goto read_only;
+
+ if (S_ISDIR(st->st_mode)) {
+ struct dirent *de;
+
+ if (!donate_fd) {
+ int copy;
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ fd = copy;
+ donate_fd = true;
+ }
+
+ d = fdopendir(fd);
+ if (!d) {
+ r = -errno;
+ goto finish;
+ }
+ fd = -1;
+
+ FOREACH_DIRENT_ALL(de, d, r = -errno; goto finish) {
+ struct stat fst;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (fstatat(dirfd(d), de->d_name, &fst, AT_SYMLINK_NOFOLLOW) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (S_ISDIR(fst.st_mode)) {
+ int subdir_fd;
+
+ subdir_fd = openat(dirfd(d), de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (subdir_fd < 0) {
+ r = -errno;
+ goto finish;
+
+ }
+
+ r = recurse_fd(subdir_fd, true, &fst, shift, false);
+ if (r < 0)
+ goto finish;
+ if (r > 0)
+ changed = true;
+
+ } else {
+ r = patch_fd(dirfd(d), de->d_name, &fst, shift);
+ if (r < 0)
+ goto finish;
+ if (r > 0)
+ changed = true;
+ }
+ }
+ }
+
+ /* After we descended, also patch the directory itself. It's key to do this in this order so that the top-level
+ * directory is patched as very last object in the tree, so that we can use it as quick indicator whether the
+ * tree is properly chown()ed already. */
+ r = patch_fd(d ? dirfd(d) : fd, NULL, st, shift);
+ if (r == -EROFS)
+ goto read_only;
+ if (r > 0)
+ changed = true;
+
+ r = changed;
+ goto finish;
+
+read_only:
+ if (!is_toplevel) {
+ _cleanup_free_ char *name = NULL;
+
+ /* When we hit a ready-only subtree we simply skip it, but log about it. */
+ (void) fd_get_path(fd, &name);
+ log_debug("Skippping read-only file or directory %s.", strna(name));
+ r = changed;
+ }
+
+finish:
+ if (donate_fd)
+ safe_close(fd);
+
+ return r;
+}
+
+static int fd_patch_uid_internal(int fd, bool donate_fd, uid_t shift, uid_t range) {
+ struct stat st;
+ int r;
+
+ assert(fd >= 0);
+
+ /* Recursively adjusts the UID/GIDs of all files of a directory tree. This is used to automatically fix up an
+ * OS tree to the used user namespace UID range. Note that this automatic adjustment only works for UID ranges
+ * following the concept that the upper 16bit of a UID identify the container, and the lower 16bit are the actual
+ * UID within the container. */
+
+ if ((shift & 0xFFFF) != 0) {
+ /* We only support containers where the shift starts at a 2^16 boundary */
+ r = -EOPNOTSUPP;
+ goto finish;
+ }
+
+ if (shift == UID_BUSY_BASE) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (range != 0x10000) {
+ /* We only support containers with 16bit UID ranges for the patching logic */
+ r = -EOPNOTSUPP;
+ goto finish;
+ }
+
+ if (fstat(fd, &st) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if ((uint32_t) st.st_uid >> 16 != (uint32_t) st.st_gid >> 16) {
+ /* We only support containers where the uid/gid container ID match */
+ r = -EBADE;
+ goto finish;
+ }
+
+ /* Try to detect if the range is already right. Of course, this a pretty drastic optimization, as we assume
+ * that if the top-level dir has the right upper 16bit assigned, then everything below will have too... */
+ if (((uint32_t) (st.st_uid ^ shift) >> 16) == 0)
+ return 0;
+
+ /* Before we start recursively chowning, mark the top-level dir as "busy" by chowning it to the "busy"
+ * range. Should we be interrupted in the middle of our work, we'll see it owned by this user and will start
+ * chown()ing it again, unconditionally, as the busy UID is not a valid UID we'd everpick for ourselves. */
+
+ if ((st.st_uid & UID_BUSY_MASK) != UID_BUSY_BASE) {
+ if (fchown(fd,
+ UID_BUSY_BASE | (st.st_uid & ~UID_BUSY_MASK),
+ (gid_t) UID_BUSY_BASE | (st.st_gid & ~(gid_t) UID_BUSY_MASK)) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+
+ return recurse_fd(fd, donate_fd, &st, shift, true);
+
+finish:
+ if (donate_fd)
+ safe_close(fd);
+
+ return r;
+}
+
+int path_patch_uid(const char *path, uid_t shift, uid_t range) {
+ int fd;
+
+ fd = open(path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (fd < 0)
+ return -errno;
+
+ return fd_patch_uid_internal(fd, true, shift, range);
+}
diff --git a/src/nspawn/nspawn-patch-uid.h b/src/nspawn/nspawn-patch-uid.h
new file mode 100644
index 0000000..b7c6ce2
--- /dev/null
+++ b/src/nspawn/nspawn-patch-uid.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include <sys/types.h>
+
+int path_patch_uid(const char *path, uid_t shift, uid_t range);
diff --git a/src/nspawn/nspawn-register.c b/src/nspawn/nspawn-register.c
new file mode 100644
index 0000000..a7cdfc1
--- /dev/null
+++ b/src/nspawn/nspawn-register.c
@@ -0,0 +1,388 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-bus.h"
+
+#include "bus-error.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "nspawn-register.h"
+#include "special.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "util.h"
+
+static int append_machine_properties(
+ sd_bus_message *m,
+ CustomMount *mounts,
+ unsigned n_mounts,
+ int kill_signal) {
+
+ unsigned j;
+ int r;
+
+ assert(m);
+
+ r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "closed");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* If you make changes here, also make sure to update systemd-nspawn@.service, to keep the device policies in
+ * sync regardless if we are run with or without the --keep-unit switch. */
+ r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2,
+ /* Allow the container to
+ * access and create the API
+ * device nodes, so that
+ * PrivateDevices= in the
+ * container can work
+ * fine */
+ "/dev/net/tun", "rwm",
+ /* Allow the container
+ * access to ptys. However,
+ * do not permit the
+ * container to ever create
+ * these device nodes. */
+ "char-pts", "rw");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (j = 0; j < n_mounts; j++) {
+ CustomMount *cm = mounts + j;
+
+ if (cm->type != CUSTOM_MOUNT_BIND)
+ continue;
+
+ r = is_device_node(cm->source);
+ if (r == -ENOENT) {
+ /* The bind source might only appear as the image is put together, hence don't complain */
+ log_debug_errno(r, "Bind mount source %s not found, ignoring: %m", cm->source);
+ continue;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to stat %s: %m", cm->source);
+
+ if (r) {
+ r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1,
+ cm->source, cm->read_only ? "r" : "rw");
+ if (r < 0)
+ return log_error_errno(r, "Failed to append message arguments: %m");
+ }
+ }
+
+ if (kill_signal != 0) {
+ r = sd_bus_message_append(m, "(sv)", "KillSignal", "i", kill_signal);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "(sv)", "KillMode", "s", "mixed");
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ return 0;
+}
+
+static int append_controller_property(sd_bus *bus, sd_bus_message *m) {
+ const char *unique;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ r = sd_bus_get_unique_name(bus, &unique);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unique name: %m");
+
+ r = sd_bus_message_append(m, "(sv)", "Controller", "s", unique);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 0;
+}
+
+int register_machine(
+ sd_bus *bus,
+ const char *machine_name,
+ pid_t pid,
+ const char *directory,
+ sd_id128_t uuid,
+ int local_ifindex,
+ const char *slice,
+ CustomMount *mounts,
+ unsigned n_mounts,
+ int kill_signal,
+ char **properties,
+ bool keep_unit,
+ const char *service) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+
+ if (keep_unit) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "RegisterMachineWithNetwork",
+ &error,
+ NULL,
+ "sayssusai",
+ machine_name,
+ SD_BUS_MESSAGE_APPEND_ID128(uuid),
+ service,
+ "container",
+ (uint32_t) pid,
+ strempty(directory),
+ local_ifindex > 0 ? 1 : 0, local_ifindex);
+ } else {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "CreateMachineWithNetwork");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "sayssusai",
+ machine_name,
+ SD_BUS_MESSAGE_APPEND_ID128(uuid),
+ service,
+ "container",
+ (uint32_t) pid,
+ strempty(directory),
+ local_ifindex > 0 ? 1 : 0, local_ifindex);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (!isempty(slice)) {
+ r = sd_bus_message_append(m, "(sv)", "Slice", "s", slice);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = append_controller_property(bus, m);
+ if (r < 0)
+ return r;
+
+ r = append_machine_properties(
+ m,
+ mounts,
+ n_mounts,
+ kill_signal);
+ if (r < 0)
+ return r;
+
+ r = bus_append_unit_property_assignment_many(m, UNIT_SERVICE, properties);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ }
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+int terminate_machine(
+ sd_bus *bus,
+ const char *machine_name) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "TerminateMachine",
+ &error,
+ NULL,
+ "s",
+ machine_name);
+ if (r < 0)
+ log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+int allocate_scope(
+ sd_bus *bus,
+ const char *machine_name,
+ pid_t pid,
+ const char *slice,
+ CustomMount *mounts,
+ unsigned n_mounts,
+ int kill_signal,
+ char **properties) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_free_ char *scope = NULL;
+ const char *description, *object;
+ int r;
+
+ assert(bus);
+
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch job: %m");
+
+ r = unit_name_mangle_with_suffix(machine_name, 0, ".scope", &scope);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle scope name: %m");
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "ss", scope, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ description = strjoina("Container ", machine_name);
+
+ r = sd_bus_message_append(m, "(sv)(sv)(sv)(sv)(sv)(sv)",
+ "PIDs", "au", 1, pid,
+ "Description", "s", description,
+ "Delegate", "b", 1,
+ "CollectMode", "s", "inactive-or-failed",
+ "AddRef", "b", 1,
+ "Slice", "s", isempty(slice) ? SPECIAL_MACHINE_SLICE : slice);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = append_controller_property(bus, m);
+ if (r < 0)
+ return r;
+
+ r = append_machine_properties(
+ m,
+ mounts,
+ n_mounts,
+ kill_signal);
+ if (r < 0)
+ return r;
+
+ r = bus_append_unit_property_assignment_many(m, UNIT_SCOPE, properties);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* No auxiliary units */
+ r = sd_bus_message_append(
+ m,
+ "a(sa(sv))",
+ 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate scope: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, false);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int terminate_scope(
+ sd_bus *bus,
+ const char *machine_name) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *scope = NULL;
+ int r;
+
+ r = unit_name_mangle_with_suffix(machine_name, 0, ".scope", &scope);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle scope name: %m");
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "AbandonScope",
+ &error,
+ NULL,
+ "s",
+ scope);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to abandon scope '%s', ignoring: %s", scope, bus_error_message(&error, r));
+ sd_bus_error_free(&error);
+ }
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "KillUnit",
+ &error,
+ NULL,
+ "ssi",
+ scope,
+ "all",
+ (int32_t) SIGKILL);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to SIGKILL scope '%s', ignoring: %s", scope, bus_error_message(&error, r));
+ sd_bus_error_free(&error);
+ }
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "UnrefUnit",
+ &error,
+ NULL,
+ "s",
+ scope);
+ if (r < 0)
+ log_debug_errno(r, "Failed to drop reference to scope '%s', ignoring: %s", scope, bus_error_message(&error, r));
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-register.h b/src/nspawn/nspawn-register.h
new file mode 100644
index 0000000..05f5776
--- /dev/null
+++ b/src/nspawn/nspawn-register.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+#include "nspawn-mount.h"
+
+int register_machine(sd_bus *bus, const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, bool keep_unit, const char *service);
+int terminate_machine(sd_bus *bus, const char *machine_name);
+
+int allocate_scope(sd_bus *bus, const char *machine_name, pid_t pid, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties);
+int terminate_scope(sd_bus *bus, const char *machine_name);
diff --git a/src/nspawn/nspawn-seccomp.c b/src/nspawn/nspawn-seccomp.c
new file mode 100644
index 0000000..e7ef80f
--- /dev/null
+++ b/src/nspawn/nspawn-seccomp.c
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <linux/netlink.h>
+#include <sys/capability.h>
+#include <sys/types.h>
+
+#if HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+
+#include "alloc-util.h"
+#include "log.h"
+#include "nspawn-seccomp.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "string-util.h"
+#include "strv.h"
+
+#if HAVE_SECCOMP
+
+static int seccomp_add_default_syscall_filter(
+ scmp_filter_ctx ctx,
+ uint32_t arch,
+ uint64_t cap_list_retain,
+ char **syscall_whitelist,
+ char **syscall_blacklist) {
+
+ static const struct {
+ uint64_t capability;
+ const char* name;
+ } whitelist[] = {
+ /* Let's use set names where we can */
+ { 0, "@aio" },
+ { 0, "@basic-io" },
+ { 0, "@chown" },
+ { 0, "@default" },
+ { 0, "@file-system" },
+ { 0, "@io-event" },
+ { 0, "@ipc" },
+ { 0, "@mount" },
+ { 0, "@network-io" },
+ { 0, "@process" },
+ { 0, "@resources" },
+ { 0, "@setuid" },
+ { 0, "@signal" },
+ { 0, "@sync" },
+ { 0, "@timer" },
+
+ /* The following four are sets we optionally enable, in case the caps have been configured for it */
+ { CAP_SYS_TIME, "@clock" },
+ { CAP_SYS_MODULE, "@module" },
+ { CAP_SYS_RAWIO, "@raw-io" },
+ { CAP_IPC_LOCK, "@memlock" },
+
+ /* Plus a good set of additional syscalls which are not part of any of the groups above */
+ { 0, "brk" },
+ { 0, "capget" },
+ { 0, "capset" },
+ { 0, "copy_file_range" },
+ { 0, "fadvise64" },
+ { 0, "fadvise64_64" },
+ { 0, "flock" },
+ { 0, "get_mempolicy" },
+ { 0, "getcpu" },
+ { 0, "getpriority" },
+ { 0, "getrandom" },
+ { 0, "ioctl" },
+ { 0, "ioprio_get" },
+ { 0, "kcmp" },
+ { 0, "madvise" },
+ { 0, "mincore" },
+ { 0, "mprotect" },
+ { 0, "mremap" },
+ { 0, "name_to_handle_at" },
+ { 0, "oldolduname" },
+ { 0, "olduname" },
+ { 0, "personality" },
+ { 0, "readahead" },
+ { 0, "readdir" },
+ { 0, "remap_file_pages" },
+ { 0, "sched_get_priority_max" },
+ { 0, "sched_get_priority_min" },
+ { 0, "sched_getaffinity" },
+ { 0, "sched_getattr" },
+ { 0, "sched_getparam" },
+ { 0, "sched_getscheduler" },
+ { 0, "sched_rr_get_interval" },
+ { 0, "sched_yield" },
+ { 0, "seccomp" },
+ { 0, "sendfile" },
+ { 0, "sendfile64" },
+ { 0, "setdomainname" },
+ { 0, "setfsgid" },
+ { 0, "setfsgid32" },
+ { 0, "setfsuid" },
+ { 0, "setfsuid32" },
+ { 0, "sethostname" },
+ { 0, "setpgid" },
+ { 0, "setsid" },
+ { 0, "splice" },
+ { 0, "sysinfo" },
+ { 0, "tee" },
+ { 0, "umask" },
+ { 0, "uname" },
+ { 0, "userfaultfd" },
+ { 0, "vmsplice" },
+
+ /* The following individual syscalls are added depending on specified caps */
+ { CAP_SYS_PACCT, "acct" },
+ { CAP_SYS_PTRACE, "process_vm_readv" },
+ { CAP_SYS_PTRACE, "process_vm_writev" },
+ { CAP_SYS_PTRACE, "ptrace" },
+ { CAP_SYS_BOOT, "reboot" },
+ { CAP_SYSLOG, "syslog" },
+ { CAP_SYS_TTY_CONFIG, "vhangup" },
+
+ /*
+ * The following syscalls and groups are knowingly excluded:
+ *
+ * @cpu-emulation
+ * @keyring (NB: keyring is not namespaced!)
+ * @obsolete
+ * @swap
+ *
+ * bpf (NB: bpffs is not namespaced!)
+ * fanotify_init
+ * fanotify_mark
+ * kexec_file_load
+ * kexec_load
+ * lookup_dcookie
+ * nfsservctl
+ * open_by_handle_at
+ * perf_event_open
+ * pkey_alloc
+ * pkey_free
+ * pkey_mprotect
+ * quotactl
+ */
+ };
+
+ int r;
+ size_t i;
+ char **p;
+
+ for (i = 0; i < ELEMENTSOF(whitelist); i++) {
+ if (whitelist[i].capability != 0 && (cap_list_retain & (1ULL << whitelist[i].capability)) == 0)
+ continue;
+
+ r = seccomp_add_syscall_filter_item(ctx, whitelist[i].name, SCMP_ACT_ALLOW, syscall_blacklist, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add syscall filter item %s: %m", whitelist[i].name);
+ }
+
+ STRV_FOREACH(p, syscall_whitelist) {
+ r = seccomp_add_syscall_filter_item(ctx, *p, SCMP_ACT_ALLOW, syscall_blacklist, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add rule for system call %s on %s, ignoring: %m",
+ *p, seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **syscall_blacklist) {
+ uint32_t arch;
+ int r;
+
+ if (!is_seccomp_available()) {
+ log_debug("SECCOMP features not detected in the kernel, disabling SECCOMP filterering");
+ return 0;
+ }
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Applying whitelist on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ERRNO(EPERM));
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate seccomp object: %m");
+
+ r = seccomp_add_default_syscall_filter(seccomp, arch, cap_list_retain, syscall_whitelist, syscall_blacklist);
+ if (r < 0)
+ return r;
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return log_error_errno(r, "Failed to install seccomp filter: %m");
+ if (r < 0)
+ log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Applying NETLINK_AUDIT mask on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate seccomp object: %m");
+
+ /*
+ Audit is broken in containers, much of the userspace audit hookup will fail if running inside a
+ container. We don't care and just turn off creation of audit sockets.
+
+ This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail with EAFNOSUPPORT which audit userspace uses
+ as indication that audit is disabled in the kernel.
+ */
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 2,
+ SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
+ SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add audit seccomp rule, ignoring: %m");
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return log_error_errno(r, "Failed to install seccomp audit filter: %m");
+ if (r < 0)
+ log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+#else
+
+int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **syscall_blacklist) {
+ return 0;
+}
+
+#endif
diff --git a/src/nspawn/nspawn-seccomp.h b/src/nspawn/nspawn-seccomp.h
new file mode 100644
index 0000000..d852eef
--- /dev/null
+++ b/src/nspawn/nspawn-seccomp.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **syscall_blacklist);
diff --git a/src/nspawn/nspawn-settings.c b/src/nspawn/nspawn-settings.c
new file mode 100644
index 0000000..505e8ca
--- /dev/null
+++ b/src/nspawn/nspawn-settings.c
@@ -0,0 +1,807 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "cap-list.h"
+#include "conf-parser.h"
+#include "cpu-set-util.h"
+#include "hostname-util.h"
+#include "nspawn-network.h"
+#include "nspawn-settings.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+int settings_load(FILE *f, const char *path, Settings **ret) {
+ _cleanup_(settings_freep) Settings *s = NULL;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ s = new0(Settings, 1);
+ if (!s)
+ return -ENOMEM;
+
+ s->start_mode = _START_MODE_INVALID;
+ s->personality = PERSONALITY_INVALID;
+ s->userns_mode = _USER_NAMESPACE_MODE_INVALID;
+ s->resolv_conf = _RESOLV_CONF_MODE_INVALID;
+ s->link_journal = _LINK_JOURNAL_INVALID;
+ s->timezone = _TIMEZONE_MODE_INVALID;
+ s->uid_shift = UID_INVALID;
+ s->uid_range = UID_INVALID;
+ s->no_new_privileges = -1;
+
+ s->read_only = -1;
+ s->volatile_mode = _VOLATILE_MODE_INVALID;
+ s->userns_chown = -1;
+
+ s->private_network = -1;
+ s->network_veth = -1;
+
+ r = config_parse(NULL, path, f,
+ "Exec\0"
+ "Network\0"
+ "Files\0",
+ config_item_perf_lookup, nspawn_gperf_lookup,
+ CONFIG_PARSE_WARN,
+ s);
+ if (r < 0)
+ return r;
+
+ /* Make sure that if userns_mode is set, userns_chown is set to something appropriate, and vice versa. Either
+ * both fields shall be initialized or neither. */
+ if (s->userns_mode == USER_NAMESPACE_PICK)
+ s->userns_chown = true;
+ else if (s->userns_mode != _USER_NAMESPACE_MODE_INVALID && s->userns_chown < 0)
+ s->userns_chown = false;
+
+ if (s->userns_chown >= 0 && s->userns_mode == _USER_NAMESPACE_MODE_INVALID)
+ s->userns_mode = USER_NAMESPACE_NO;
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+Settings* settings_free(Settings *s) {
+
+ if (!s)
+ return NULL;
+
+ strv_free(s->parameters);
+ strv_free(s->environment);
+ free(s->user);
+ free(s->pivot_root_new);
+ free(s->pivot_root_old);
+ free(s->working_directory);
+ strv_free(s->syscall_whitelist);
+ strv_free(s->syscall_blacklist);
+ rlimit_free_all(s->rlimit);
+ free(s->hostname);
+ s->cpuset = cpu_set_mfree(s->cpuset);
+
+ strv_free(s->network_interfaces);
+ strv_free(s->network_macvlan);
+ strv_free(s->network_ipvlan);
+ strv_free(s->network_veth_extra);
+ free(s->network_bridge);
+ free(s->network_zone);
+ expose_port_free_all(s->expose_ports);
+
+ custom_mount_free_all(s->custom_mounts, s->n_custom_mounts);
+ return mfree(s);
+}
+
+bool settings_private_network(Settings *s) {
+ assert(s);
+
+ return
+ s->private_network > 0 ||
+ s->network_veth > 0 ||
+ s->network_bridge ||
+ s->network_zone ||
+ s->network_interfaces ||
+ s->network_macvlan ||
+ s->network_ipvlan ||
+ s->network_veth_extra;
+}
+
+bool settings_network_veth(Settings *s) {
+ assert(s);
+
+ return
+ s->network_veth > 0 ||
+ s->network_bridge ||
+ s->network_zone;
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_volatile_mode, volatile_mode, VolatileMode, "Failed to parse volatile mode");
+
+int config_parse_expose_port(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = expose_port_parse(&s->expose_ports, rvalue);
+ if (r == -EEXIST) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Duplicate port specification, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse host port %s: %m", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_capability(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t u = 0, *result = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&rvalue, &word, NULL, 0);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract capability string, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ break;
+
+ r = capability_from_name(word);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse capability, ignoring: %s", word);
+ continue;
+ }
+
+ u |= UINT64_C(1) << r;
+ }
+
+ if (u == 0)
+ return 0;
+
+ *result |= u;
+ return 0;
+}
+
+int config_parse_id128(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ sd_id128_t t, *result = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = sd_id128_from_string(rvalue, &t);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse 128bit ID/UUID, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *result = t;
+ return 0;
+}
+
+int config_parse_pivot_root(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = pivot_root_parse(&settings->pivot_root_new, &settings->pivot_root_old, rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid pivot root mount specification %s: %m", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_bind(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = bind_mount_parse(&settings->custom_mounts, &settings->n_custom_mounts, rvalue, ltype);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid bind mount specification %s: %m", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_tmpfs(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = tmpfs_mount_parse(&settings->custom_mounts, &settings->n_custom_mounts, rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid temporary file system specification %s: %m", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_overlay(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = overlay_mount_parse(&settings->custom_mounts, &settings->n_custom_mounts, rvalue, ltype);
+ if (r < 0)
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid overlay file system specification %s, ignoring: %m", rvalue);
+
+ return 0;
+}
+
+int config_parse_veth_extra(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = veth_extra_parse(&settings->network_veth_extra, rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid extra virtual Ethernet link specification %s: %m", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_network_zone(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ _cleanup_free_ char *j = NULL;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ j = strappend("vz-", rvalue);
+ if (!ifname_valid(j)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid network zone name, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ free_and_replace(settings->network_zone, j);
+
+ return 0;
+}
+
+int config_parse_boot(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Boot= parameter %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (r > 0) {
+ if (settings->start_mode == START_PID2)
+ goto conflict;
+
+ settings->start_mode = START_BOOT;
+ } else {
+ if (settings->start_mode == START_BOOT)
+ goto conflict;
+
+ if (settings->start_mode < 0)
+ settings->start_mode = START_PID1;
+ }
+
+ return 0;
+
+conflict:
+ log_syntax(unit, LOG_ERR, filename, line, r, "Conflicting Boot= or ProcessTwo= setting found. Ignoring.");
+ return 0;
+}
+
+int config_parse_pid2(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse ProcessTwo= parameter %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (r > 0) {
+ if (settings->start_mode == START_BOOT)
+ goto conflict;
+
+ settings->start_mode = START_PID2;
+ } else {
+ if (settings->start_mode == START_PID2)
+ goto conflict;
+
+ if (settings->start_mode < 0)
+ settings->start_mode = START_PID1;
+ }
+
+ return 0;
+
+conflict:
+ log_syntax(unit, LOG_ERR, filename, line, r, "Conflicting Boot= or ProcessTwo= setting found. Ignoring.");
+ return 0;
+}
+
+int config_parse_private_users(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = parse_boolean(rvalue);
+ if (r == 0) {
+ /* no: User namespacing off */
+ settings->userns_mode = USER_NAMESPACE_NO;
+ settings->uid_shift = UID_INVALID;
+ settings->uid_range = UINT32_C(0x10000);
+ } else if (r > 0) {
+ /* yes: User namespacing on, UID range is read from root dir */
+ settings->userns_mode = USER_NAMESPACE_FIXED;
+ settings->uid_shift = UID_INVALID;
+ settings->uid_range = UINT32_C(0x10000);
+ } else if (streq(rvalue, "pick")) {
+ /* pick: User namespacing on, UID range is picked randomly */
+ settings->userns_mode = USER_NAMESPACE_PICK;
+ settings->uid_shift = UID_INVALID;
+ settings->uid_range = UINT32_C(0x10000);
+ } else {
+ const char *range, *shift;
+ uid_t sh, rn;
+
+ /* anything else: User namespacing on, UID range is explicitly configured */
+
+ range = strchr(rvalue, ':');
+ if (range) {
+ shift = strndupa(rvalue, range - rvalue);
+ range++;
+
+ r = safe_atou32(range, &rn);
+ if (r < 0 || rn <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "UID/GID range invalid, ignoring: %s", range);
+ return 0;
+ }
+ } else {
+ shift = rvalue;
+ rn = UINT32_C(0x10000);
+ }
+
+ r = parse_uid(shift, &sh);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "UID/GID shift invalid, ignoring: %s", range);
+ return 0;
+ }
+
+ settings->userns_mode = USER_NAMESPACE_FIXED;
+ settings->uid_shift = sh;
+ settings->uid_range = rn;
+ }
+
+ return 0;
+}
+
+int config_parse_syscall_filter(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ bool negative;
+ const char *items;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ negative = rvalue[0] == '~';
+ items = negative ? rvalue + 1 : rvalue;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&items, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse SystemCallFilter= parameter %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (negative)
+ r = strv_extend(&settings->syscall_blacklist, word);
+ else
+ r = strv_extend(&settings->syscall_whitelist, word);
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+int config_parse_hostname(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+
+ assert(rvalue);
+ assert(s);
+
+ if (!hostname_is_valid(rvalue, false)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid hostname, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (free_and_strdup(s, empty_to_null(rvalue)) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_oom_score_adjust(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int oa, r;
+
+ assert(rvalue);
+ assert(settings);
+
+ if (isempty(rvalue)) {
+ settings->oom_score_adjust_set = false;
+ return 0;
+ }
+
+ r = parse_oom_score_adjust(rvalue, &oa);
+ if (r == -ERANGE) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "OOM score adjust value out of range, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse the OOM score adjust value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ settings->oom_score_adjust = oa;
+ settings->oom_score_adjust_set = true;
+
+ return 0;
+}
+
+int config_parse_cpu_affinity(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_cpu_free_ cpu_set_t *cpuset = NULL;
+ Settings *settings = data;
+ int ncpus;
+
+ assert(rvalue);
+ assert(settings);
+
+ ncpus = parse_cpu_set_and_warn(rvalue, &cpuset, unit, filename, line, lvalue);
+ if (ncpus < 0)
+ return ncpus;
+
+ if (ncpus == 0) {
+ /* An empty assignment resets the CPU list */
+ settings->cpuset = cpu_set_mfree(settings->cpuset);
+ settings->cpuset_ncpus = 0;
+ return 0;
+ }
+
+ if (!settings->cpuset) {
+ settings->cpuset = TAKE_PTR(cpuset);
+ settings->cpuset_ncpus = (unsigned) ncpus;
+ return 0;
+ }
+
+ if (settings->cpuset_ncpus < (unsigned) ncpus) {
+ CPU_OR_S(CPU_ALLOC_SIZE(settings->cpuset_ncpus), cpuset, settings->cpuset, cpuset);
+ CPU_FREE(settings->cpuset);
+ settings->cpuset = TAKE_PTR(cpuset);
+ settings->cpuset_ncpus = (unsigned) ncpus;
+ return 0;
+ }
+
+ CPU_OR_S(CPU_ALLOC_SIZE((unsigned) ncpus), settings->cpuset, settings->cpuset, cpuset);
+
+ return 0;
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_resolv_conf, resolv_conf_mode, ResolvConfMode, "Failed to parse resolv.conf mode");
+
+static const char *const resolv_conf_mode_table[_RESOLV_CONF_MODE_MAX] = {
+ [RESOLV_CONF_OFF] = "off",
+ [RESOLV_CONF_COPY_HOST] = "copy-host",
+ [RESOLV_CONF_COPY_STATIC] = "copy-static",
+ [RESOLV_CONF_BIND_HOST] = "bind-host",
+ [RESOLV_CONF_BIND_STATIC] = "bind-static",
+ [RESOLV_CONF_DELETE] = "delete",
+ [RESOLV_CONF_AUTO] = "auto",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(resolv_conf_mode, ResolvConfMode, RESOLV_CONF_AUTO);
+
+int parse_link_journal(const char *s, LinkJournal *ret_mode, bool *ret_try) {
+ assert(s);
+ assert(ret_mode);
+ assert(ret_try);
+
+ if (streq(s, "auto")) {
+ *ret_mode = LINK_AUTO;
+ *ret_try = false;
+ } else if (streq(s, "no")) {
+ *ret_mode = LINK_NO;
+ *ret_try = false;
+ } else if (streq(s, "guest")) {
+ *ret_mode = LINK_GUEST;
+ *ret_try = false;
+ } else if (streq(s, "host")) {
+ *ret_mode = LINK_HOST;
+ *ret_try = false;
+ } else if (streq(s, "try-guest")) {
+ *ret_mode = LINK_GUEST;
+ *ret_try = true;
+ } else if (streq(s, "try-host")) {
+ *ret_mode = LINK_HOST;
+ *ret_try = true;
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
+int config_parse_link_journal(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(rvalue);
+ assert(settings);
+
+ r = parse_link_journal(rvalue, &settings->link_journal, &settings->link_journal_try);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse link journal mode, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_timezone, timezone_mode, TimezoneMode, "Failed to parse timezone mode");
+
+static const char *const timezone_mode_table[_TIMEZONE_MODE_MAX] = {
+ [TIMEZONE_OFF] = "off",
+ [TIMEZONE_COPY] = "copy",
+ [TIMEZONE_BIND] = "bind",
+ [TIMEZONE_SYMLINK] = "symlink",
+ [TIMEZONE_DELETE] = "delete",
+ [TIMEZONE_AUTO] = "auto",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(timezone_mode, TimezoneMode, TIMEZONE_AUTO);
diff --git a/src/nspawn/nspawn-settings.h b/src/nspawn/nspawn-settings.h
new file mode 100644
index 0000000..a63aa32
--- /dev/null
+++ b/src/nspawn/nspawn-settings.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sched.h>
+#include <stdio.h>
+
+#include "sd-id128.h"
+
+#include "conf-parser.h"
+#include "macro.h"
+#include "missing_resource.h"
+#include "nspawn-expose-ports.h"
+#include "nspawn-mount.h"
+
+typedef enum StartMode {
+ START_PID1, /* Run parameters as command line as process 1 */
+ START_PID2, /* Use stub init process as PID 1, run parameters as command line as process 2 */
+ START_BOOT, /* Search for init system, pass arguments as parameters */
+ _START_MODE_MAX,
+ _START_MODE_INVALID = -1
+} StartMode;
+
+typedef enum UserNamespaceMode {
+ USER_NAMESPACE_NO,
+ USER_NAMESPACE_FIXED,
+ USER_NAMESPACE_PICK,
+ _USER_NAMESPACE_MODE_MAX,
+ _USER_NAMESPACE_MODE_INVALID = -1,
+} UserNamespaceMode;
+
+typedef enum ResolvConfMode {
+ RESOLV_CONF_OFF,
+ RESOLV_CONF_COPY_HOST,
+ RESOLV_CONF_COPY_STATIC,
+ RESOLV_CONF_BIND_HOST,
+ RESOLV_CONF_BIND_STATIC,
+ RESOLV_CONF_DELETE,
+ RESOLV_CONF_AUTO,
+ _RESOLV_CONF_MODE_MAX,
+ _RESOLV_CONF_MODE_INVALID = -1
+} ResolvConfMode;
+
+typedef enum LinkJournal {
+ LINK_NO,
+ LINK_AUTO,
+ LINK_HOST,
+ LINK_GUEST,
+ _LINK_JOURNAL_MAX,
+ _LINK_JOURNAL_INVALID = -1
+} LinkJournal;
+
+typedef enum TimezoneMode {
+ TIMEZONE_OFF,
+ TIMEZONE_COPY,
+ TIMEZONE_BIND,
+ TIMEZONE_SYMLINK,
+ TIMEZONE_DELETE,
+ TIMEZONE_AUTO,
+ _TIMEZONE_MODE_MAX,
+ _TIMEZONE_MODE_INVALID = -1
+} TimezoneMode;
+
+typedef enum SettingsMask {
+ SETTING_START_MODE = UINT64_C(1) << 0,
+ SETTING_ENVIRONMENT = UINT64_C(1) << 1,
+ SETTING_USER = UINT64_C(1) << 2,
+ SETTING_CAPABILITY = UINT64_C(1) << 3,
+ SETTING_KILL_SIGNAL = UINT64_C(1) << 4,
+ SETTING_PERSONALITY = UINT64_C(1) << 5,
+ SETTING_MACHINE_ID = UINT64_C(1) << 6,
+ SETTING_NETWORK = UINT64_C(1) << 7,
+ SETTING_EXPOSE_PORTS = UINT64_C(1) << 8,
+ SETTING_READ_ONLY = UINT64_C(1) << 9,
+ SETTING_VOLATILE_MODE = UINT64_C(1) << 10,
+ SETTING_CUSTOM_MOUNTS = UINT64_C(1) << 11,
+ SETTING_WORKING_DIRECTORY = UINT64_C(1) << 12,
+ SETTING_USERNS = UINT64_C(1) << 13,
+ SETTING_NOTIFY_READY = UINT64_C(1) << 14,
+ SETTING_PIVOT_ROOT = UINT64_C(1) << 15,
+ SETTING_SYSCALL_FILTER = UINT64_C(1) << 16,
+ SETTING_HOSTNAME = UINT64_C(1) << 17,
+ SETTING_NO_NEW_PRIVILEGES = UINT64_C(1) << 18,
+ SETTING_OOM_SCORE_ADJUST = UINT64_C(1) << 19,
+ SETTING_CPU_AFFINITY = UINT64_C(1) << 20,
+ SETTING_RESOLV_CONF = UINT64_C(1) << 21,
+ SETTING_LINK_JOURNAL = UINT64_C(1) << 22,
+ SETTING_TIMEZONE = UINT64_C(1) << 23,
+ SETTING_EPHEMERAL = UINT64_C(1) << 24,
+ SETTING_RLIMIT_FIRST = UINT64_C(1) << 25, /* we define one bit per resource limit here */
+ SETTING_RLIMIT_LAST = UINT64_C(1) << (25 + _RLIMIT_MAX - 1),
+ _SETTINGS_MASK_ALL = (UINT64_C(1) << (25 + _RLIMIT_MAX)) -1,
+ _SETTING_FORCE_ENUM_WIDTH = UINT64_MAX
+} SettingsMask;
+
+/* We want to use SETTING_RLIMIT_FIRST in shifts, so make sure it is really 64 bits
+ * when used in expressions. */
+#define SETTING_RLIMIT_FIRST ((uint64_t) SETTING_RLIMIT_FIRST)
+#define SETTING_RLIMIT_LAST ((uint64_t) SETTING_RLIMIT_LAST)
+
+assert_cc(sizeof(SettingsMask) == 8);
+assert_cc(sizeof(SETTING_RLIMIT_FIRST) == 8);
+assert_cc(sizeof(SETTING_RLIMIT_LAST) == 8);
+
+typedef struct Settings {
+ /* [Run] */
+ StartMode start_mode;
+ bool ephemeral;
+ char **parameters;
+ char **environment;
+ char *user;
+ uint64_t capability;
+ uint64_t drop_capability;
+ int kill_signal;
+ unsigned long personality;
+ sd_id128_t machine_id;
+ char *working_directory;
+ char *pivot_root_new;
+ char *pivot_root_old;
+ UserNamespaceMode userns_mode;
+ uid_t uid_shift, uid_range;
+ bool notify_ready;
+ char **syscall_whitelist;
+ char **syscall_blacklist;
+ struct rlimit *rlimit[_RLIMIT_MAX];
+ char *hostname;
+ int no_new_privileges;
+ int oom_score_adjust;
+ bool oom_score_adjust_set;
+ cpu_set_t *cpuset;
+ unsigned cpuset_ncpus;
+ ResolvConfMode resolv_conf;
+ LinkJournal link_journal;
+ bool link_journal_try;
+ TimezoneMode timezone;
+
+ /* [Image] */
+ int read_only;
+ VolatileMode volatile_mode;
+ CustomMount *custom_mounts;
+ size_t n_custom_mounts;
+ int userns_chown;
+
+ /* [Network] */
+ int private_network;
+ int network_veth;
+ char *network_bridge;
+ char *network_zone;
+ char **network_interfaces;
+ char **network_macvlan;
+ char **network_ipvlan;
+ char **network_veth_extra;
+ ExposePort *expose_ports;
+} Settings;
+
+int settings_load(FILE *f, const char *path, Settings **ret);
+Settings* settings_free(Settings *s);
+
+bool settings_network_veth(Settings *s);
+bool settings_private_network(Settings *s);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Settings*, settings_free);
+
+const struct ConfigPerfItem* nspawn_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_capability);
+CONFIG_PARSER_PROTOTYPE(config_parse_id128);
+CONFIG_PARSER_PROTOTYPE(config_parse_expose_port);
+CONFIG_PARSER_PROTOTYPE(config_parse_volatile_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_pivot_root);
+CONFIG_PARSER_PROTOTYPE(config_parse_bind);
+CONFIG_PARSER_PROTOTYPE(config_parse_tmpfs);
+CONFIG_PARSER_PROTOTYPE(config_parse_overlay);
+CONFIG_PARSER_PROTOTYPE(config_parse_veth_extra);
+CONFIG_PARSER_PROTOTYPE(config_parse_network_zone);
+CONFIG_PARSER_PROTOTYPE(config_parse_boot);
+CONFIG_PARSER_PROTOTYPE(config_parse_pid2);
+CONFIG_PARSER_PROTOTYPE(config_parse_private_users);
+CONFIG_PARSER_PROTOTYPE(config_parse_syscall_filter);
+CONFIG_PARSER_PROTOTYPE(config_parse_hostname);
+CONFIG_PARSER_PROTOTYPE(config_parse_oom_score_adjust);
+CONFIG_PARSER_PROTOTYPE(config_parse_cpu_affinity);
+CONFIG_PARSER_PROTOTYPE(config_parse_resolv_conf);
+CONFIG_PARSER_PROTOTYPE(config_parse_link_journal);
+CONFIG_PARSER_PROTOTYPE(config_parse_timezone);
+
+const char *resolv_conf_mode_to_string(ResolvConfMode a) _const_;
+ResolvConfMode resolv_conf_mode_from_string(const char *s) _pure_;
+
+const char *timezone_mode_to_string(TimezoneMode a) _const_;
+TimezoneMode timezone_mode_from_string(const char *s) _pure_;
+
+int parse_link_journal(const char *s, LinkJournal *ret_mode, bool *ret_try);
diff --git a/src/nspawn/nspawn-setuid.c b/src/nspawn/nspawn-setuid.c
new file mode 100644
index 0000000..0026e4e
--- /dev/null
+++ b/src/nspawn/nspawn-setuid.c
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <grp.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "errno.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "mkdir.h"
+#include "nspawn-setuid.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+static int spawn_getent(const char *database, const char *key, pid_t *rpid) {
+ int pipe_fds[2], r;
+ pid_t pid;
+
+ assert(database);
+ assert(key);
+ assert(rpid);
+
+ if (pipe2(pipe_fds, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to allocate pipe: %m");
+
+ r = safe_fork("(getent)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0) {
+ safe_close_pair(pipe_fds);
+ return r;
+ }
+ if (r == 0) {
+ char *empty_env = NULL;
+
+ safe_close(pipe_fds[0]);
+
+ if (rearrange_stdio(-1, pipe_fds[1], -1) < 0)
+ _exit(EXIT_FAILURE);
+
+ close_all_fds(NULL, 0);
+
+ (void) rlimit_nofile_safe();
+
+ execle("/usr/bin/getent", "getent", database, key, NULL, &empty_env);
+ execle("/bin/getent", "getent", database, key, NULL, &empty_env);
+ _exit(EXIT_FAILURE);
+ }
+
+ pipe_fds[1] = safe_close(pipe_fds[1]);
+
+ *rpid = pid;
+
+ return pipe_fds[0];
+}
+
+int change_uid_gid(const char *user, char **_home) {
+ char *x, *u, *g, *h;
+ const char *word, *state;
+ _cleanup_free_ uid_t *uids = NULL;
+ _cleanup_free_ char *home = NULL, *line = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ unsigned n_uids = 0;
+ size_t sz = 0, l;
+ uid_t uid;
+ gid_t gid;
+ pid_t pid;
+ int r;
+
+ assert(_home);
+
+ if (!user || STR_IN_SET(user, "root", "0")) {
+ /* Reset everything fully to 0, just in case */
+
+ r = reset_uid_gid();
+ if (r < 0)
+ return log_error_errno(r, "Failed to become root: %m");
+
+ *_home = NULL;
+ return 0;
+ }
+
+ /* First, get user credentials */
+ fd = spawn_getent("passwd", user, &pid);
+ if (fd < 0)
+ return fd;
+
+ f = fdopen(fd, "r");
+ if (!f)
+ return log_oom();
+ fd = -1;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
+ "Failed to resolve user %s.", user);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read from getent: %m");
+
+ (void) wait_for_terminate_and_check("getent passwd", pid, WAIT_LOG);
+
+ x = strchr(line, ':');
+ if (!x)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "/etc/passwd entry has invalid user field.");
+
+ u = strchr(x+1, ':');
+ if (!u)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "/etc/passwd entry has invalid password field.");
+
+ u++;
+ g = strchr(u, ':');
+ if (!g)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "/etc/passwd entry has invalid UID field.");
+
+ *g = 0;
+ g++;
+ x = strchr(g, ':');
+ if (!x)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "/etc/passwd entry has invalid GID field.");
+
+ *x = 0;
+ h = strchr(x+1, ':');
+ if (!h)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "/etc/passwd entry has invalid GECOS field.");
+
+ h++;
+ x = strchr(h, ':');
+ if (!x)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "/etc/passwd entry has invalid home directory field.");
+
+ *x = 0;
+
+ r = parse_uid(u, &uid);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to parse UID of user.");
+
+ r = parse_gid(g, &gid);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to parse GID of user.");
+
+ home = strdup(h);
+ if (!home)
+ return log_oom();
+
+ f = safe_fclose(f);
+ line = mfree(line);
+
+ /* Second, get group memberships */
+ fd = spawn_getent("initgroups", user, &pid);
+ if (fd < 0)
+ return fd;
+
+ f = fdopen(fd, "r");
+ if (!f)
+ return log_oom();
+ fd = -1;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
+ "Failed to resolve user %s.", user);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read from getent: %m");
+
+ (void) wait_for_terminate_and_check("getent initgroups", pid, WAIT_LOG);
+
+ /* Skip over the username and subsequent separator whitespace */
+ x = line;
+ x += strcspn(x, WHITESPACE);
+ x += strspn(x, WHITESPACE);
+
+ FOREACH_WORD(word, l, x, state) {
+ char c[l+1];
+
+ memcpy(c, word, l);
+ c[l] = 0;
+
+ if (!GREEDY_REALLOC(uids, sz, n_uids+1))
+ return log_oom();
+
+ r = parse_uid(c, &uids[n_uids++]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse group data from getent: %m");
+ }
+
+ r = mkdir_parents(home, 0775);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make home root directory: %m");
+
+ r = mkdir_safe(home, 0755, uid, gid, 0);
+ if (r < 0 && !IN_SET(r, -EEXIST, -ENOTDIR))
+ return log_error_errno(r, "Failed to make home directory: %m");
+
+ (void) fchown(STDIN_FILENO, uid, gid);
+ (void) fchown(STDOUT_FILENO, uid, gid);
+ (void) fchown(STDERR_FILENO, uid, gid);
+
+ if (setgroups(n_uids, uids) < 0)
+ return log_error_errno(errno, "Failed to set auxiliary groups: %m");
+
+ if (setresgid(gid, gid, gid) < 0)
+ return log_error_errno(errno, "setresgid() failed: %m");
+
+ if (setresuid(uid, uid, uid) < 0)
+ return log_error_errno(errno, "setresuid() failed: %m");
+
+ if (_home)
+ *_home = TAKE_PTR(home);
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-setuid.h b/src/nspawn/nspawn-setuid.h
new file mode 100644
index 0000000..0ae47cb
--- /dev/null
+++ b/src/nspawn/nspawn-setuid.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int change_uid_gid(const char *user, char **ret);
diff --git a/src/nspawn/nspawn-stub-pid1.c b/src/nspawn/nspawn-stub-pid1.c
new file mode 100644
index 0000000..5d17df3
--- /dev/null
+++ b/src/nspawn/nspawn-stub-pid1.c
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/reboot.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "def.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "log.h"
+#include "missing.h"
+#include "nspawn-stub-pid1.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "time-util.h"
+
+static int reset_environ(const char *new_environment, size_t length) {
+ unsigned long start, end;
+
+ start = (unsigned long) new_environment;
+ end = start + length;
+
+ if (prctl(PR_SET_MM, PR_SET_MM_ENV_START, start, 0, 0) < 0)
+ return -errno;
+
+ if (prctl(PR_SET_MM, PR_SET_MM_ENV_END, end, 0, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int stub_pid1(sd_id128_t uuid) {
+ enum {
+ STATE_RUNNING,
+ STATE_REBOOT,
+ STATE_POWEROFF,
+ } state = STATE_RUNNING;
+
+ sigset_t fullmask, oldmask, waitmask;
+ usec_t quit_usec = USEC_INFINITY;
+ pid_t pid;
+ int r;
+
+ /* The new environment we set up, on the stack. */
+ char new_environment[] =
+ "container=systemd-nspawn\0"
+ "container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
+
+ /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful
+ * for allowing arbitrary processes run in a container, and still have all zombies reaped. */
+
+ assert_se(sigfillset(&fullmask) >= 0);
+ assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0);
+
+ pid = fork();
+ if (pid < 0)
+ return log_error_errno(errno, "Failed to fork child pid: %m");
+
+ if (pid == 0) {
+ /* Return in the child */
+ assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0);
+ setsid();
+ return 0;
+ }
+
+ reset_all_signal_handlers();
+
+ log_close();
+ close_all_fds(NULL, 0);
+ log_open();
+
+ /* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also,
+ * set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ
+ * find them set. */
+ sd_id128_to_string(uuid, new_environment + sizeof(new_environment) - SD_ID128_STRING_MAX);
+ reset_environ(new_environment, sizeof(new_environment));
+
+ (void) rename_process("(sd-stubinit)");
+
+ assert_se(sigemptyset(&waitmask) >= 0);
+ assert_se(sigset_add_many(&waitmask,
+ SIGCHLD, /* posix: process died */
+ SIGINT, /* sysv: ctrl-alt-del */
+ SIGRTMIN+3, /* systemd: halt */
+ SIGRTMIN+4, /* systemd: poweroff */
+ SIGRTMIN+5, /* systemd: reboot */
+ SIGRTMIN+6, /* systemd: kexec */
+ SIGRTMIN+13, /* systemd: halt */
+ SIGRTMIN+14, /* systemd: poweroff */
+ SIGRTMIN+15, /* systemd: reboot */
+ SIGRTMIN+16, /* systemd: kexec */
+ -1) >= 0);
+
+ /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't
+ * support reexec/reloading in this stub process. */
+
+ for (;;) {
+ siginfo_t si;
+ usec_t current_usec;
+
+ si.si_pid = 0;
+ r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG);
+ if (r < 0) {
+ r = log_error_errno(errno, "Failed to reap children: %m");
+ goto finish;
+ }
+
+ current_usec = now(CLOCK_MONOTONIC);
+
+ if (si.si_pid == pid || current_usec >= quit_usec) {
+
+ /* The child we started ourselves died or we reached a timeout. */
+
+ if (state == STATE_REBOOT) { /* dispatch a queued reboot */
+ (void) reboot(RB_AUTOBOOT);
+ r = log_error_errno(errno, "Failed to reboot: %m");
+ goto finish;
+
+ } else if (state == STATE_POWEROFF)
+ (void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */
+
+ if (si.si_pid == pid && si.si_code == CLD_EXITED)
+ r = si.si_status; /* pass on exit code */
+ else
+ r = EXIT_EXCEPTION; /* signal, coredump, timeout, … */
+
+ goto finish;
+ }
+ if (si.si_pid != 0)
+ /* We reaped something. Retry until there's nothing more to reap. */
+ continue;
+
+ if (quit_usec == USEC_INFINITY)
+ r = sigwaitinfo(&waitmask, &si);
+ else {
+ struct timespec ts;
+ r = sigtimedwait(&waitmask, &si, timespec_store(&ts, quit_usec - current_usec));
+ }
+ if (r < 0) {
+ if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */
+ continue;
+ if (errno == EAGAIN) /* timeout reached */
+ continue;
+
+ r = log_error_errno(errno, "Failed to wait for signal: %m");
+ goto finish;
+ }
+
+ if (si.si_signo == SIGCHLD)
+ continue; /* Let's reap this */
+
+ if (state != STATE_RUNNING)
+ continue;
+
+ /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a
+ * constant… */
+
+ if (si.si_signo == SIGRTMIN+3 ||
+ si.si_signo == SIGRTMIN+4 ||
+ si.si_signo == SIGRTMIN+13 ||
+ si.si_signo == SIGRTMIN+14)
+
+ state = STATE_POWEROFF;
+
+ else if (si.si_signo == SIGINT ||
+ si.si_signo == SIGRTMIN+5 ||
+ si.si_signo == SIGRTMIN+6 ||
+ si.si_signo == SIGRTMIN+15 ||
+ si.si_signo == SIGRTMIN+16)
+
+ state = STATE_REBOOT;
+ else
+ assert_not_reached("Got unexpected signal");
+
+ r = kill_and_sigcont(pid, SIGTERM);
+
+ /* Let's send a SIGHUP after the SIGTERM, as shells tend to ignore SIGTERM but do react to SIGHUP. We
+ * do it strictly in this order, so that the SIGTERM is dispatched first, and SIGHUP second for those
+ * processes which handle both. That's because services tend to bind configuration reload or something
+ * else to SIGHUP. */
+
+ if (r != -ESRCH)
+ (void) kill(pid, SIGHUP);
+
+ quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC;
+ }
+
+finish:
+ _exit(r < 0 ? EXIT_FAILURE : r);
+}
diff --git a/src/nspawn/nspawn-stub-pid1.h b/src/nspawn/nspawn-stub-pid1.h
new file mode 100644
index 0000000..8982d50
--- /dev/null
+++ b/src/nspawn/nspawn-stub-pid1.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-id128.h"
+
+int stub_pid1(sd_id128_t uuid);
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
new file mode 100644
index 0000000..e0c2d71
--- /dev/null
+++ b/src/nspawn/nspawn.c
@@ -0,0 +1,4613 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_BLKID
+#include <blkid.h>
+#endif
+#include <errno.h>
+#include <getopt.h>
+#include <grp.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <pwd.h>
+#include <sched.h>
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/personality.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "barrier.h"
+#include "base-filesystem.h"
+#include "blkid-util.h"
+#include "btrfs-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "cgroup-util.h"
+#include "copy.h"
+#include "cpu-set-util.h"
+#include "dev-setup.h"
+#include "dissect-image.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fdset.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "gpt.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "log.h"
+#include "loop-util.h"
+#include "loopback-setup.h"
+#include "machine-image.h"
+#include "macro.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "netlink-util.h"
+#include "nspawn-cgroup.h"
+#include "nspawn-def.h"
+#include "nspawn-expose-ports.h"
+#include "nspawn-mount.h"
+#include "nspawn-network.h"
+#include "nspawn-patch-uid.h"
+#include "nspawn-register.h"
+#include "nspawn-seccomp.h"
+#include "nspawn-settings.h"
+#include "nspawn-setuid.h"
+#include "nspawn-stub-pid1.h"
+#include "os-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "ptyfwd.h"
+#include "random-util.h"
+#include "raw-clone.h"
+#include "rlimit-util.h"
+#include "rm-rf.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "util.h"
+
+#if HAVE_SPLIT_USR
+#define STATIC_RESOLV_CONF "/lib/systemd/resolv.conf"
+#else
+#define STATIC_RESOLV_CONF "/usr/lib/systemd/resolv.conf"
+#endif
+
+/* nspawn is listening on the socket at the path in the constant nspawn_notify_socket_path
+ * nspawn_notify_socket_path is relative to the container
+ * the init process in the container pid can send messages to nspawn following the sd_notify(3) protocol */
+#define NSPAWN_NOTIFY_SOCKET_PATH "/run/systemd/nspawn/notify"
+
+#define EXIT_FORCE_RESTART 133
+
+typedef enum ContainerStatus {
+ CONTAINER_TERMINATED,
+ CONTAINER_REBOOTED
+} ContainerStatus;
+
+static char *arg_directory = NULL;
+static char *arg_template = NULL;
+static char *arg_chdir = NULL;
+static char *arg_pivot_root_new = NULL;
+static char *arg_pivot_root_old = NULL;
+static char *arg_user = NULL;
+static sd_id128_t arg_uuid = {};
+static char *arg_machine = NULL; /* The name used by the host to refer to this */
+static char *arg_hostname = NULL; /* The name the payload sees by default */
+static const char *arg_selinux_context = NULL;
+static const char *arg_selinux_apifs_context = NULL;
+static const char *arg_slice = NULL;
+static bool arg_private_network = false;
+static bool arg_read_only = false;
+static StartMode arg_start_mode = START_PID1;
+static bool arg_ephemeral = false;
+static LinkJournal arg_link_journal = LINK_AUTO;
+static bool arg_link_journal_try = false;
+static uint64_t arg_caps_retain =
+ (1ULL << CAP_AUDIT_CONTROL) |
+ (1ULL << CAP_AUDIT_WRITE) |
+ (1ULL << CAP_CHOWN) |
+ (1ULL << CAP_DAC_OVERRIDE) |
+ (1ULL << CAP_DAC_READ_SEARCH) |
+ (1ULL << CAP_FOWNER) |
+ (1ULL << CAP_FSETID) |
+ (1ULL << CAP_IPC_OWNER) |
+ (1ULL << CAP_KILL) |
+ (1ULL << CAP_LEASE) |
+ (1ULL << CAP_LINUX_IMMUTABLE) |
+ (1ULL << CAP_MKNOD) |
+ (1ULL << CAP_NET_BIND_SERVICE) |
+ (1ULL << CAP_NET_BROADCAST) |
+ (1ULL << CAP_NET_RAW) |
+ (1ULL << CAP_SETFCAP) |
+ (1ULL << CAP_SETGID) |
+ (1ULL << CAP_SETPCAP) |
+ (1ULL << CAP_SETUID) |
+ (1ULL << CAP_SYS_ADMIN) |
+ (1ULL << CAP_SYS_BOOT) |
+ (1ULL << CAP_SYS_CHROOT) |
+ (1ULL << CAP_SYS_NICE) |
+ (1ULL << CAP_SYS_PTRACE) |
+ (1ULL << CAP_SYS_RESOURCE) |
+ (1ULL << CAP_SYS_TTY_CONFIG);
+static CustomMount *arg_custom_mounts = NULL;
+static size_t arg_n_custom_mounts = 0;
+static char **arg_setenv = NULL;
+static bool arg_quiet = false;
+static bool arg_register = true;
+static bool arg_keep_unit = false;
+static char **arg_network_interfaces = NULL;
+static char **arg_network_macvlan = NULL;
+static char **arg_network_ipvlan = NULL;
+static bool arg_network_veth = false;
+static char **arg_network_veth_extra = NULL;
+static char *arg_network_bridge = NULL;
+static char *arg_network_zone = NULL;
+static char *arg_network_namespace_path = NULL;
+static unsigned long arg_personality = PERSONALITY_INVALID;
+static char *arg_image = NULL;
+static VolatileMode arg_volatile_mode = VOLATILE_NO;
+static ExposePort *arg_expose_ports = NULL;
+static char **arg_property = NULL;
+static UserNamespaceMode arg_userns_mode = USER_NAMESPACE_NO;
+static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
+static bool arg_userns_chown = false;
+static int arg_kill_signal = 0;
+static CGroupUnified arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_UNKNOWN;
+static SettingsMask arg_settings_mask = 0;
+static int arg_settings_trusted = -1;
+static char **arg_parameters = NULL;
+static const char *arg_container_service_name = "systemd-nspawn";
+static bool arg_notify_ready = false;
+static bool arg_use_cgns = true;
+static unsigned long arg_clone_ns_flags = CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS;
+static MountSettingsMask arg_mount_settings = MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_TMPFS_TMP;
+static void *arg_root_hash = NULL;
+static size_t arg_root_hash_size = 0;
+static char **arg_syscall_whitelist = NULL;
+static char **arg_syscall_blacklist = NULL;
+static struct rlimit *arg_rlimit[_RLIMIT_MAX] = {};
+static bool arg_no_new_privileges = false;
+static int arg_oom_score_adjust = 0;
+static bool arg_oom_score_adjust_set = false;
+static cpu_set_t *arg_cpuset = NULL;
+static unsigned arg_cpuset_ncpus = 0;
+static ResolvConfMode arg_resolv_conf = RESOLV_CONF_AUTO;
+static TimezoneMode arg_timezone = TIMEZONE_AUTO;
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(false);
+
+ r = terminal_urlify_man("systemd-nspawn", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
+ "Spawn a command or OS in a light-weight container.\n\n"
+ " -h --help Show this help\n"
+ " --version Print version string\n"
+ " -q --quiet Do not show status information\n"
+ " -D --directory=PATH Root directory for the container\n"
+ " --template=PATH Initialize root directory from template directory,\n"
+ " if missing\n"
+ " -x --ephemeral Run container with snapshot of root directory, and\n"
+ " remove it after exit\n"
+ " -i --image=PATH File system device or disk image for the container\n"
+ " --root-hash=HASH Specify verity root hash\n"
+ " -a --as-pid2 Maintain a stub init as PID1, invoke binary as PID2\n"
+ " -b --boot Boot up full system (i.e. invoke init)\n"
+ " --chdir=PATH Set working directory in the container\n"
+ " --pivot-root=PATH[:PATH]\n"
+ " Pivot root to given directory in the container\n"
+ " -u --user=USER Run the command under specified user or uid\n"
+ " -M --machine=NAME Set the machine name for the container\n"
+ " --hostname=NAME Override the hostname for the container\n"
+ " --uuid=UUID Set a specific machine UUID for the container\n"
+ " -S --slice=SLICE Place the container in the specified slice\n"
+ " --property=NAME=VALUE Set scope unit property\n"
+ " -U --private-users=pick Run within user namespace, autoselect UID/GID range\n"
+ " --private-users[=UIDBASE[:NUIDS]]\n"
+ " Similar, but with user configured UID/GID range\n"
+ " --private-users-chown Adjust OS tree ownership to private UID/GID range\n"
+ " --private-network Disable network in container\n"
+ " --network-interface=INTERFACE\n"
+ " Assign an existing network interface to the\n"
+ " container\n"
+ " --network-macvlan=INTERFACE\n"
+ " Create a macvlan network interface based on an\n"
+ " existing network interface to the container\n"
+ " --network-ipvlan=INTERFACE\n"
+ " Create a ipvlan network interface based on an\n"
+ " existing network interface to the container\n"
+ " -n --network-veth Add a virtual Ethernet connection between host\n"
+ " and container\n"
+ " --network-veth-extra=HOSTIF[:CONTAINERIF]\n"
+ " Add an additional virtual Ethernet link between\n"
+ " host and container\n"
+ " --network-bridge=INTERFACE\n"
+ " Add a virtual Ethernet connection to the container\n"
+ " and attach it to an existing bridge on the host\n"
+ " --network-zone=NAME Similar, but attach the new interface to an\n"
+ " an automatically managed bridge interface\n"
+ " --network-namespace-path=PATH\n"
+ " Set network namespace to the one represented by\n"
+ " the specified kernel namespace file node\n"
+ " -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n"
+ " Expose a container IP port on the host\n"
+ " -Z --selinux-context=SECLABEL\n"
+ " Set the SELinux security context to be used by\n"
+ " processes in the container\n"
+ " -L --selinux-apifs-context=SECLABEL\n"
+ " Set the SELinux security context to be used by\n"
+ " API/tmpfs file systems in the container\n"
+ " --capability=CAP In addition to the default, retain specified\n"
+ " capability\n"
+ " --drop-capability=CAP Drop the specified capability from the default set\n"
+ " --system-call-filter=LIST|~LIST\n"
+ " Permit/prohibit specific system calls\n"
+ " --rlimit=NAME=LIMIT Set a resource limit for the payload\n"
+ " --oom-score-adjust=VALUE\n"
+ " Adjust the OOM score value for the payload\n"
+ " --cpu-affinity=CPUS Adjust the CPU affinity of the container\n"
+ " --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n"
+ " --link-journal=MODE Link up guest journal, one of no, auto, guest, \n"
+ " host, try-guest, try-host\n"
+ " -j Equivalent to --link-journal=try-guest\n"
+ " --resolv-conf=MODE Select mode of /etc/resolv.conf initialization\n"
+ " --timezone=MODE Select mode of /etc/localtime initialization\n"
+ " --read-only Mount the root directory read-only\n"
+ " --bind=PATH[:PATH[:OPTIONS]]\n"
+ " Bind mount a file or directory from the host into\n"
+ " the container\n"
+ " --bind-ro=PATH[:PATH[:OPTIONS]\n"
+ " Similar, but creates a read-only bind mount\n"
+ " --tmpfs=PATH:[OPTIONS] Mount an empty tmpfs to the specified directory\n"
+ " --overlay=PATH[:PATH...]:PATH\n"
+ " Create an overlay mount from the host to \n"
+ " the container\n"
+ " --overlay-ro=PATH[:PATH...]:PATH\n"
+ " Similar, but creates a read-only overlay mount\n"
+ " -E --setenv=NAME=VALUE Pass an environment variable to PID 1\n"
+ " --register=BOOLEAN Register container as machine\n"
+ " --keep-unit Do not register a scope for the machine, reuse\n"
+ " the service unit nspawn is running in\n"
+ " --volatile[=MODE] Run the system in volatile mode\n"
+ " --settings=BOOLEAN Load additional settings from .nspawn file\n"
+ " --notify-ready=BOOLEAN Receive notifications from the child init process\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int custom_mount_check_all(void) {
+ size_t i;
+
+ for (i = 0; i < arg_n_custom_mounts; i++) {
+ CustomMount *m = &arg_custom_mounts[i];
+
+ if (path_equal(m->destination, "/") && arg_userns_mode != USER_NAMESPACE_NO) {
+ if (arg_userns_chown)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--private-users-chown may not be combined with custom root mounts.");
+ else if (arg_uid_shift == UID_INVALID)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--private-users with automatic UID shift may not be combined with custom root mounts.");
+ }
+ }
+
+ return 0;
+}
+
+static int detect_unified_cgroup_hierarchy_from_environment(void) {
+ const char *e;
+ int r;
+
+ /* Allow the user to control whether the unified hierarchy is used */
+ e = getenv("UNIFIED_CGROUP_HIERARCHY");
+ if (e) {
+ r = parse_boolean(e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse $UNIFIED_CGROUP_HIERARCHY.");
+ if (r > 0)
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL;
+ else
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+ }
+
+ return 0;
+}
+
+static int detect_unified_cgroup_hierarchy_from_image(const char *directory) {
+ int r;
+
+ /* Let's inherit the mode to use from the host system, but let's take into consideration what systemd in the
+ * image actually supports. */
+ r = cg_all_unified();
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether we are in all unified mode.");
+ if (r > 0) {
+ /* Unified cgroup hierarchy support was added in 230. Unfortunately the detection
+ * routine only detects 231, so we'll have a false negative here for 230. */
+ r = systemd_installation_has_version(directory, 230);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine systemd version in container: %m");
+ if (r > 0)
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL;
+ else
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+ } else if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
+ /* Mixed cgroup hierarchy support was added in 233 */
+ r = systemd_installation_has_version(directory, 233);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine systemd version in container: %m");
+ if (r > 0)
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_SYSTEMD;
+ else
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+ } else
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+
+ log_debug("Using %s hierarchy for container.",
+ arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_NONE ? "legacy" :
+ arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_SYSTEMD ? "hybrid" : "unified");
+
+ return 0;
+}
+
+static void parse_share_ns_env(const char *name, unsigned long ns_flag) {
+ int r;
+
+ r = getenv_bool(name);
+ if (r == -ENXIO)
+ return;
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse %s from environment, defaulting to false.", name);
+ arg_clone_ns_flags = (arg_clone_ns_flags & ~ns_flag) | (r > 0 ? 0 : ns_flag);
+}
+
+static void parse_mount_settings_env(void) {
+ const char *e;
+ int r;
+
+ r = getenv_bool("SYSTEMD_NSPAWN_TMPFS_TMP");
+ if (r >= 0)
+ SET_FLAG(arg_mount_settings, MOUNT_APPLY_TMPFS_TMP, r > 0);
+ else if (r != -ENXIO)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_NSPAWN_TMPFS_TMP, ignoring: %m");
+
+ e = getenv("SYSTEMD_NSPAWN_API_VFS_WRITABLE");
+ if (!e)
+ return;
+
+ if (streq(e, "network")) {
+ arg_mount_settings |= MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS;
+ return;
+ }
+
+ r = parse_boolean(e);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse SYSTEMD_NSPAWN_API_VFS_WRITABLE from environment, ignoring.");
+ return;
+ }
+
+ SET_FLAG(arg_mount_settings, MOUNT_APPLY_APIVFS_RO, r == 0);
+ SET_FLAG(arg_mount_settings, MOUNT_APPLY_APIVFS_NETNS, false);
+}
+
+static void parse_environment(void) {
+ const char *e;
+ int r;
+
+ parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_IPC", CLONE_NEWIPC);
+ parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_PID", CLONE_NEWPID);
+ parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_UTS", CLONE_NEWUTS);
+ parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_SYSTEM", CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS);
+
+ parse_mount_settings_env();
+
+ /* SYSTEMD_NSPAWN_USE_CGNS=0 can be used to disable CLONE_NEWCGROUP use,
+ * even if it is supported. If not supported, it has no effect. */
+ r = getenv_bool("SYSTEMD_NSPAWN_USE_CGNS");
+ if (r == 0 || !cg_ns_supported())
+ arg_use_cgns = false;
+
+ e = getenv("SYSTEMD_NSPAWN_CONTAINER_SERVICE");
+ if (e)
+ arg_container_service_name = e;
+
+ detect_unified_cgroup_hierarchy_from_environment();
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_PRIVATE_NETWORK,
+ ARG_UUID,
+ ARG_READ_ONLY,
+ ARG_CAPABILITY,
+ ARG_DROP_CAPABILITY,
+ ARG_LINK_JOURNAL,
+ ARG_BIND,
+ ARG_BIND_RO,
+ ARG_TMPFS,
+ ARG_OVERLAY,
+ ARG_OVERLAY_RO,
+ ARG_SHARE_SYSTEM,
+ ARG_REGISTER,
+ ARG_KEEP_UNIT,
+ ARG_NETWORK_INTERFACE,
+ ARG_NETWORK_MACVLAN,
+ ARG_NETWORK_IPVLAN,
+ ARG_NETWORK_BRIDGE,
+ ARG_NETWORK_ZONE,
+ ARG_NETWORK_VETH_EXTRA,
+ ARG_NETWORK_NAMESPACE_PATH,
+ ARG_PERSONALITY,
+ ARG_VOLATILE,
+ ARG_TEMPLATE,
+ ARG_PROPERTY,
+ ARG_PRIVATE_USERS,
+ ARG_KILL_SIGNAL,
+ ARG_SETTINGS,
+ ARG_CHDIR,
+ ARG_PIVOT_ROOT,
+ ARG_PRIVATE_USERS_CHOWN,
+ ARG_NOTIFY_READY,
+ ARG_ROOT_HASH,
+ ARG_SYSTEM_CALL_FILTER,
+ ARG_RLIMIT,
+ ARG_HOSTNAME,
+ ARG_NO_NEW_PRIVILEGES,
+ ARG_OOM_SCORE_ADJUST,
+ ARG_CPU_AFFINITY,
+ ARG_RESOLV_CONF,
+ ARG_TIMEZONE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "directory", required_argument, NULL, 'D' },
+ { "template", required_argument, NULL, ARG_TEMPLATE },
+ { "ephemeral", no_argument, NULL, 'x' },
+ { "user", required_argument, NULL, 'u' },
+ { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
+ { "as-pid2", no_argument, NULL, 'a' },
+ { "boot", no_argument, NULL, 'b' },
+ { "uuid", required_argument, NULL, ARG_UUID },
+ { "read-only", no_argument, NULL, ARG_READ_ONLY },
+ { "capability", required_argument, NULL, ARG_CAPABILITY },
+ { "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
+ { "no-new-privileges", required_argument, NULL, ARG_NO_NEW_PRIVILEGES },
+ { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
+ { "bind", required_argument, NULL, ARG_BIND },
+ { "bind-ro", required_argument, NULL, ARG_BIND_RO },
+ { "tmpfs", required_argument, NULL, ARG_TMPFS },
+ { "overlay", required_argument, NULL, ARG_OVERLAY },
+ { "overlay-ro", required_argument, NULL, ARG_OVERLAY_RO },
+ { "machine", required_argument, NULL, 'M' },
+ { "hostname", required_argument, NULL, ARG_HOSTNAME },
+ { "slice", required_argument, NULL, 'S' },
+ { "setenv", required_argument, NULL, 'E' },
+ { "selinux-context", required_argument, NULL, 'Z' },
+ { "selinux-apifs-context", required_argument, NULL, 'L' },
+ { "quiet", no_argument, NULL, 'q' },
+ { "share-system", no_argument, NULL, ARG_SHARE_SYSTEM }, /* not documented */
+ { "register", required_argument, NULL, ARG_REGISTER },
+ { "keep-unit", no_argument, NULL, ARG_KEEP_UNIT },
+ { "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE },
+ { "network-macvlan", required_argument, NULL, ARG_NETWORK_MACVLAN },
+ { "network-ipvlan", required_argument, NULL, ARG_NETWORK_IPVLAN },
+ { "network-veth", no_argument, NULL, 'n' },
+ { "network-veth-extra", required_argument, NULL, ARG_NETWORK_VETH_EXTRA },
+ { "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE },
+ { "network-zone", required_argument, NULL, ARG_NETWORK_ZONE },
+ { "network-namespace-path", required_argument, NULL, ARG_NETWORK_NAMESPACE_PATH },
+ { "personality", required_argument, NULL, ARG_PERSONALITY },
+ { "image", required_argument, NULL, 'i' },
+ { "volatile", optional_argument, NULL, ARG_VOLATILE },
+ { "port", required_argument, NULL, 'p' },
+ { "property", required_argument, NULL, ARG_PROPERTY },
+ { "private-users", optional_argument, NULL, ARG_PRIVATE_USERS },
+ { "private-users-chown", optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN },
+ { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
+ { "settings", required_argument, NULL, ARG_SETTINGS },
+ { "chdir", required_argument, NULL, ARG_CHDIR },
+ { "pivot-root", required_argument, NULL, ARG_PIVOT_ROOT },
+ { "notify-ready", required_argument, NULL, ARG_NOTIFY_READY },
+ { "root-hash", required_argument, NULL, ARG_ROOT_HASH },
+ { "system-call-filter", required_argument, NULL, ARG_SYSTEM_CALL_FILTER },
+ { "rlimit", required_argument, NULL, ARG_RLIMIT },
+ { "oom-score-adjust", required_argument, NULL, ARG_OOM_SCORE_ADJUST },
+ { "cpu-affinity", required_argument, NULL, ARG_CPU_AFFINITY },
+ { "resolv-conf", required_argument, NULL, ARG_RESOLV_CONF },
+ { "timezone", required_argument, NULL, ARG_TIMEZONE },
+ {}
+ };
+
+ int c, r;
+ const char *p;
+ uint64_t plus = 0, minus = 0;
+ bool mask_all_settings = false, mask_no_settings = false;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+hD:u:abL:M:jS:Z:qi:xp:nUE:", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'D':
+ r = parse_path_argument_and_warn(optarg, false, &arg_directory);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_TEMPLATE:
+ r = parse_path_argument_and_warn(optarg, false, &arg_template);
+ if (r < 0)
+ return r;
+ break;
+
+ case 'i':
+ r = parse_path_argument_and_warn(optarg, false, &arg_image);
+ if (r < 0)
+ return r;
+ break;
+
+ case 'x':
+ arg_ephemeral = true;
+ arg_settings_mask |= SETTING_EPHEMERAL;
+ break;
+
+ case 'u':
+ r = free_and_strdup(&arg_user, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_settings_mask |= SETTING_USER;
+ break;
+
+ case ARG_NETWORK_ZONE: {
+ char *j;
+
+ j = strappend("vz-", optarg);
+ if (!j)
+ return log_oom();
+
+ if (!ifname_valid(j)) {
+ log_error("Network zone name not valid: %s", j);
+ free(j);
+ return -EINVAL;
+ }
+
+ free_and_replace(arg_network_zone, j);
+
+ arg_network_veth = true;
+ arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+ }
+
+ case ARG_NETWORK_BRIDGE:
+
+ if (!ifname_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Bridge interface name not valid: %s", optarg);
+
+ r = free_and_strdup(&arg_network_bridge, optarg);
+ if (r < 0)
+ return log_oom();
+
+ _fallthrough_;
+ case 'n':
+ arg_network_veth = true;
+ arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+
+ case ARG_NETWORK_VETH_EXTRA:
+ r = veth_extra_parse(&arg_network_veth_extra, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --network-veth-extra= parameter: %s", optarg);
+
+ arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+
+ case ARG_NETWORK_INTERFACE:
+ if (!ifname_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Network interface name not valid: %s", optarg);
+
+ if (strv_extend(&arg_network_interfaces, optarg) < 0)
+ return log_oom();
+
+ arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+
+ case ARG_NETWORK_MACVLAN:
+
+ if (!ifname_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "MACVLAN network interface name not valid: %s", optarg);
+
+ if (strv_extend(&arg_network_macvlan, optarg) < 0)
+ return log_oom();
+
+ arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+
+ case ARG_NETWORK_IPVLAN:
+
+ if (!ifname_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "IPVLAN network interface name not valid: %s", optarg);
+
+ if (strv_extend(&arg_network_ipvlan, optarg) < 0)
+ return log_oom();
+
+ _fallthrough_;
+ case ARG_PRIVATE_NETWORK:
+ arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+
+ case ARG_NETWORK_NAMESPACE_PATH:
+ r = parse_path_argument_and_warn(optarg, false, &arg_network_namespace_path);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case 'b':
+ if (arg_start_mode == START_PID2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--boot and --as-pid2 may not be combined.");
+
+ arg_start_mode = START_BOOT;
+ arg_settings_mask |= SETTING_START_MODE;
+ break;
+
+ case 'a':
+ if (arg_start_mode == START_BOOT)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--boot and --as-pid2 may not be combined.");
+
+ arg_start_mode = START_PID2;
+ arg_settings_mask |= SETTING_START_MODE;
+ break;
+
+ case ARG_UUID:
+ r = sd_id128_from_string(optarg, &arg_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Invalid UUID: %s", optarg);
+
+ if (sd_id128_is_null(arg_uuid))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Machine UUID may not be all zeroes.");
+
+ arg_settings_mask |= SETTING_MACHINE_ID;
+ break;
+
+ case 'S':
+ arg_slice = optarg;
+ break;
+
+ case 'M':
+ if (isempty(optarg))
+ arg_machine = mfree(arg_machine);
+ else {
+ if (!machine_name_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid machine name: %s", optarg);
+
+ r = free_and_strdup(&arg_machine, optarg);
+ if (r < 0)
+ return log_oom();
+ }
+ break;
+
+ case ARG_HOSTNAME:
+ if (isempty(optarg))
+ arg_hostname = mfree(arg_hostname);
+ else {
+ if (!hostname_is_valid(optarg, false))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid hostname: %s", optarg);
+
+ r = free_and_strdup(&arg_hostname, optarg);
+ if (r < 0)
+ return log_oom();
+ }
+
+ arg_settings_mask |= SETTING_HOSTNAME;
+ break;
+
+ case 'Z':
+ arg_selinux_context = optarg;
+ break;
+
+ case 'L':
+ arg_selinux_apifs_context = optarg;
+ break;
+
+ case ARG_READ_ONLY:
+ arg_read_only = true;
+ arg_settings_mask |= SETTING_READ_ONLY;
+ break;
+
+ case ARG_CAPABILITY:
+ case ARG_DROP_CAPABILITY: {
+ p = optarg;
+ for (;;) {
+ _cleanup_free_ char *t = NULL;
+
+ r = extract_first_word(&p, &t, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse capability %s.", t);
+
+ if (r == 0)
+ break;
+
+ if (streq(t, "all")) {
+ if (c == ARG_CAPABILITY)
+ plus = (uint64_t) -1;
+ else
+ minus = (uint64_t) -1;
+ } else {
+ r = capability_from_name(t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse capability %s.", t);
+
+ if (c == ARG_CAPABILITY)
+ plus |= 1ULL << r;
+ else
+ minus |= 1ULL << r;
+ }
+ }
+
+ arg_settings_mask |= SETTING_CAPABILITY;
+ break;
+ }
+
+ case ARG_NO_NEW_PRIVILEGES:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --no-new-privileges= argument: %s", optarg);
+
+ arg_no_new_privileges = r;
+ arg_settings_mask |= SETTING_NO_NEW_PRIVILEGES;
+ break;
+
+ case 'j':
+ arg_link_journal = LINK_GUEST;
+ arg_link_journal_try = true;
+ arg_settings_mask |= SETTING_LINK_JOURNAL;
+ break;
+
+ case ARG_LINK_JOURNAL:
+ r = parse_link_journal(optarg, &arg_link_journal, &arg_link_journal_try);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse link journal mode %s", optarg);
+ return -EINVAL;
+ }
+
+ arg_settings_mask |= SETTING_LINK_JOURNAL;
+ break;
+
+ case ARG_BIND:
+ case ARG_BIND_RO:
+ r = bind_mount_parse(&arg_custom_mounts, &arg_n_custom_mounts, optarg, c == ARG_BIND_RO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --bind(-ro)= argument %s: %m", optarg);
+
+ arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
+ break;
+
+ case ARG_TMPFS:
+ r = tmpfs_mount_parse(&arg_custom_mounts, &arg_n_custom_mounts, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --tmpfs= argument %s: %m", optarg);
+
+ arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
+ break;
+
+ case ARG_OVERLAY:
+ case ARG_OVERLAY_RO:
+ r = overlay_mount_parse(&arg_custom_mounts, &arg_n_custom_mounts, optarg, c == ARG_OVERLAY_RO);
+ if (r == -EADDRNOTAVAIL)
+ return log_error_errno(r, "--overlay(-ro)= needs at least two colon-separated directories specified.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --overlay(-ro)= argument %s: %m", optarg);
+
+ arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
+ break;
+
+ case 'E': {
+ char **n;
+
+ if (!env_assignment_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Environment variable assignment '%s' is not valid.", optarg);
+
+ n = strv_env_set(arg_setenv, optarg);
+ if (!n)
+ return log_oom();
+
+ strv_free_and_replace(arg_setenv, n);
+ arg_settings_mask |= SETTING_ENVIRONMENT;
+ break;
+ }
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case ARG_SHARE_SYSTEM:
+ /* We don't officially support this anymore, except for compat reasons. People should use the
+ * $SYSTEMD_NSPAWN_SHARE_* environment variables instead. */
+ log_warning("Please do not use --share-system anymore, use $SYSTEMD_NSPAWN_SHARE_* instead.");
+ arg_clone_ns_flags = 0;
+ break;
+
+ case ARG_REGISTER:
+ r = parse_boolean(optarg);
+ if (r < 0) {
+ log_error("Failed to parse --register= argument: %s", optarg);
+ return r;
+ }
+
+ arg_register = r;
+ break;
+
+ case ARG_KEEP_UNIT:
+ arg_keep_unit = true;
+ break;
+
+ case ARG_PERSONALITY:
+
+ arg_personality = personality_from_string(optarg);
+ if (arg_personality == PERSONALITY_INVALID)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown or unsupported personality '%s'.", optarg);
+
+ arg_settings_mask |= SETTING_PERSONALITY;
+ break;
+
+ case ARG_VOLATILE:
+
+ if (!optarg)
+ arg_volatile_mode = VOLATILE_YES;
+ else if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(volatile_mode, VolatileMode, _VOLATILE_MODE_MAX);
+ return 0;
+ } else {
+ VolatileMode m;
+
+ m = volatile_mode_from_string(optarg);
+ if (m < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --volatile= argument: %s", optarg);
+ else
+ arg_volatile_mode = m;
+ }
+
+ arg_settings_mask |= SETTING_VOLATILE_MODE;
+ break;
+
+ case 'p':
+ r = expose_port_parse(&arg_expose_ports, optarg);
+ if (r == -EEXIST)
+ return log_error_errno(r, "Duplicate port specification: %s", optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse host port %s: %m", optarg);
+
+ arg_settings_mask |= SETTING_EXPOSE_PORTS;
+ break;
+
+ case ARG_PROPERTY:
+ if (strv_extend(&arg_property, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_PRIVATE_USERS: {
+ int boolean = -1;
+
+ if (!optarg)
+ boolean = true;
+ else if (!in_charset(optarg, DIGITS))
+ /* do *not* parse numbers as booleans */
+ boolean = parse_boolean(optarg);
+
+ if (boolean == false) {
+ /* no: User namespacing off */
+ arg_userns_mode = USER_NAMESPACE_NO;
+ arg_uid_shift = UID_INVALID;
+ arg_uid_range = UINT32_C(0x10000);
+ } else if (boolean == true) {
+ /* yes: User namespacing on, UID range is read from root dir */
+ arg_userns_mode = USER_NAMESPACE_FIXED;
+ arg_uid_shift = UID_INVALID;
+ arg_uid_range = UINT32_C(0x10000);
+ } else if (streq(optarg, "pick")) {
+ /* pick: User namespacing on, UID range is picked randomly */
+ arg_userns_mode = USER_NAMESPACE_PICK;
+ arg_uid_shift = UID_INVALID;
+ arg_uid_range = UINT32_C(0x10000);
+ } else {
+ _cleanup_free_ char *buffer = NULL;
+ const char *range, *shift;
+
+ /* anything else: User namespacing on, UID range is explicitly configured */
+
+ range = strchr(optarg, ':');
+ if (range) {
+ buffer = strndup(optarg, range - optarg);
+ if (!buffer)
+ return log_oom();
+ shift = buffer;
+
+ range++;
+ r = safe_atou32(range, &arg_uid_range);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse UID range \"%s\": %m", range);
+ } else
+ shift = optarg;
+
+ r = parse_uid(shift, &arg_uid_shift);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse UID \"%s\": %m", optarg);
+
+ arg_userns_mode = USER_NAMESPACE_FIXED;
+ }
+
+ if (arg_uid_range <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "UID range cannot be 0.");
+
+ arg_settings_mask |= SETTING_USERNS;
+ break;
+ }
+
+ case 'U':
+ if (userns_supported()) {
+ arg_userns_mode = USER_NAMESPACE_PICK;
+ arg_uid_shift = UID_INVALID;
+ arg_uid_range = UINT32_C(0x10000);
+
+ arg_settings_mask |= SETTING_USERNS;
+ }
+
+ break;
+
+ case ARG_PRIVATE_USERS_CHOWN:
+ arg_userns_chown = true;
+
+ arg_settings_mask |= SETTING_USERNS;
+ break;
+
+ case ARG_KILL_SIGNAL:
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(signal, int, _NSIG);
+ return 0;
+ }
+
+ arg_kill_signal = signal_from_string(optarg);
+ if (arg_kill_signal < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot parse signal: %s", optarg);
+
+ arg_settings_mask |= SETTING_KILL_SIGNAL;
+ break;
+
+ case ARG_SETTINGS:
+
+ /* no → do not read files
+ * yes → read files, do not override cmdline, trust only subset
+ * override → read files, override cmdline, trust only subset
+ * trusted → read files, do not override cmdline, trust all
+ */
+
+ r = parse_boolean(optarg);
+ if (r < 0) {
+ if (streq(optarg, "trusted")) {
+ mask_all_settings = false;
+ mask_no_settings = false;
+ arg_settings_trusted = true;
+
+ } else if (streq(optarg, "override")) {
+ mask_all_settings = false;
+ mask_no_settings = true;
+ arg_settings_trusted = -1;
+ } else
+ return log_error_errno(r, "Failed to parse --settings= argument: %s", optarg);
+ } else if (r > 0) {
+ /* yes */
+ mask_all_settings = false;
+ mask_no_settings = false;
+ arg_settings_trusted = -1;
+ } else {
+ /* no */
+ mask_all_settings = true;
+ mask_no_settings = false;
+ arg_settings_trusted = false;
+ }
+
+ break;
+
+ case ARG_CHDIR:
+ if (!path_is_absolute(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Working directory %s is not an absolute path.", optarg);
+
+ r = free_and_strdup(&arg_chdir, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_settings_mask |= SETTING_WORKING_DIRECTORY;
+ break;
+
+ case ARG_PIVOT_ROOT:
+ r = pivot_root_parse(&arg_pivot_root_new, &arg_pivot_root_old, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --pivot-root= argument %s: %m", optarg);
+
+ arg_settings_mask |= SETTING_PIVOT_ROOT;
+ break;
+
+ case ARG_NOTIFY_READY:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s is not a valid notify mode. Valid modes are: yes, no, and ready.", optarg);
+ arg_notify_ready = r;
+ arg_settings_mask |= SETTING_NOTIFY_READY;
+ break;
+
+ case ARG_ROOT_HASH: {
+ void *k;
+ size_t l;
+
+ r = unhexmem(optarg, strlen(optarg), &k, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse root hash: %s", optarg);
+ if (l < sizeof(sd_id128_t)) {
+ log_error("Root hash must be at least 128bit long: %s", optarg);
+ free(k);
+ return -EINVAL;
+ }
+
+ free(arg_root_hash);
+ arg_root_hash = k;
+ arg_root_hash_size = l;
+ break;
+ }
+
+ case ARG_SYSTEM_CALL_FILTER: {
+ bool negative;
+ const char *items;
+
+ negative = optarg[0] == '~';
+ items = negative ? optarg + 1 : optarg;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&items, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse system call filter: %m");
+
+ if (negative)
+ r = strv_extend(&arg_syscall_blacklist, word);
+ else
+ r = strv_extend(&arg_syscall_whitelist, word);
+ if (r < 0)
+ return log_oom();
+ }
+
+ arg_settings_mask |= SETTING_SYSCALL_FILTER;
+ break;
+ }
+
+ case ARG_RLIMIT: {
+ const char *eq;
+ char *name;
+ int rl;
+
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(rlimit, int, _RLIMIT_MAX);
+ return 0;
+ }
+
+ eq = strchr(optarg, '=');
+ if (!eq)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--rlimit= expects an '=' assignment.");
+
+ name = strndup(optarg, eq - optarg);
+ if (!name)
+ return log_oom();
+
+ rl = rlimit_from_string_harder(name);
+ if (rl < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown resource limit: %s", name);
+
+ if (!arg_rlimit[rl]) {
+ arg_rlimit[rl] = new0(struct rlimit, 1);
+ if (!arg_rlimit[rl])
+ return log_oom();
+ }
+
+ r = rlimit_parse(rl, eq + 1, arg_rlimit[rl]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse resource limit: %s", eq + 1);
+
+ arg_settings_mask |= SETTING_RLIMIT_FIRST << rl;
+ break;
+ }
+
+ case ARG_OOM_SCORE_ADJUST:
+ r = parse_oom_score_adjust(optarg, &arg_oom_score_adjust);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --oom-score-adjust= parameter: %s", optarg);
+
+ arg_oom_score_adjust_set = true;
+ arg_settings_mask |= SETTING_OOM_SCORE_ADJUST;
+ break;
+
+ case ARG_CPU_AFFINITY: {
+ _cleanup_cpu_free_ cpu_set_t *cpuset = NULL;
+
+ r = parse_cpu_set(optarg, &cpuset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse CPU affinity mask: %s", optarg);
+
+ if (arg_cpuset)
+ CPU_FREE(arg_cpuset);
+
+ arg_cpuset = TAKE_PTR(cpuset);
+ arg_cpuset_ncpus = r;
+ arg_settings_mask |= SETTING_CPU_AFFINITY;
+ break;
+ }
+
+ case ARG_RESOLV_CONF:
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(resolv_conf_mode, ResolvConfMode, _RESOLV_CONF_MODE_MAX);
+ return 0;
+ }
+
+ arg_resolv_conf = resolv_conf_mode_from_string(optarg);
+ if (arg_resolv_conf < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse /etc/resolv.conf mode: %s", optarg);
+
+ arg_settings_mask |= SETTING_RESOLV_CONF;
+ break;
+
+ case ARG_TIMEZONE:
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(timezone_mode, TimezoneMode, _TIMEZONE_MODE_MAX);
+ return 0;
+ }
+
+ arg_timezone = timezone_mode_from_string(optarg);
+ if (arg_timezone < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse /etc/localtime mode: %s", optarg);
+
+ arg_settings_mask |= SETTING_TIMEZONE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (argc > optind) {
+ strv_free(arg_parameters);
+ arg_parameters = strv_copy(argv + optind);
+ if (!arg_parameters)
+ return log_oom();
+
+ arg_settings_mask |= SETTING_START_MODE;
+ }
+
+ if (arg_ephemeral && arg_template && !arg_directory)
+ /* User asked for ephemeral execution but specified --template= instead of --directory=. Semantically
+ * such an invocation makes some sense, see https://github.com/systemd/systemd/issues/3667. Let's
+ * accept this here, and silently make "--ephemeral --template=" equivalent to "--ephemeral
+ * --directory=". */
+ arg_directory = TAKE_PTR(arg_template);
+
+ arg_caps_retain = (arg_caps_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
+
+ /* Load all settings from .nspawn files */
+ if (mask_no_settings)
+ arg_settings_mask = 0;
+
+ /* Don't load any settings from .nspawn files */
+ if (mask_all_settings)
+ arg_settings_mask = _SETTINGS_MASK_ALL;
+
+ return 1;
+}
+
+static int verify_arguments(void) {
+ int r;
+
+ if (arg_userns_mode != USER_NAMESPACE_NO)
+ arg_mount_settings |= MOUNT_USE_USERNS;
+
+ if (arg_private_network)
+ arg_mount_settings |= MOUNT_APPLY_APIVFS_NETNS;
+
+ if (!(arg_clone_ns_flags & CLONE_NEWPID) ||
+ !(arg_clone_ns_flags & CLONE_NEWUTS)) {
+ arg_register = false;
+ if (arg_start_mode != START_PID1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--boot cannot be used without namespacing.");
+ }
+
+ if (arg_userns_mode == USER_NAMESPACE_PICK)
+ arg_userns_chown = true;
+
+ if (arg_start_mode == START_BOOT && arg_kill_signal <= 0)
+ arg_kill_signal = SIGRTMIN+3;
+
+ if (arg_keep_unit && arg_register && cg_pid_get_owner_uid(0, NULL) >= 0)
+ /* Save the user from accidentally registering either user-$SESSION.scope or user@.service.
+ * The latter is not technically a user session, but we don't need to labour the point. */
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--keep-unit --register=yes may not be used when invoked from a user session.");
+
+ if (arg_directory && arg_image)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--directory= and --image= may not be combined.");
+
+ if (arg_template && arg_image)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--template= and --image= may not be combined.");
+
+ if (arg_template && !(arg_directory || arg_machine))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--template= needs --directory= or --machine=.");
+
+ if (arg_ephemeral && arg_template)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--ephemeral and --template= may not be combined.");
+
+ if (arg_ephemeral && !IN_SET(arg_link_journal, LINK_NO, LINK_AUTO))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--ephemeral and --link-journal= may not be combined.");
+
+ if (arg_userns_mode != USER_NAMESPACE_NO && !userns_supported())
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "--private-users= is not supported, kernel compiled without user namespace support.");
+
+ if (arg_userns_chown && arg_read_only)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--read-only and --private-users-chown may not be combined.");
+
+ /* If --network-namespace-path is given with any other network-related option,
+ * we need to error out, to avoid conflicts between different network options. */
+ if (arg_network_namespace_path &&
+ (arg_network_interfaces || arg_network_macvlan ||
+ arg_network_ipvlan || arg_network_veth_extra ||
+ arg_network_bridge || arg_network_zone ||
+ arg_network_veth || arg_private_network))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--network-namespace-path cannot be combined with other network options.");
+
+ if (arg_network_bridge && arg_network_zone)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--network-bridge= and --network-zone= may not be combined.");
+
+ if (arg_userns_mode != USER_NAMESPACE_NO && (arg_mount_settings & MOUNT_APPLY_APIVFS_NETNS) && !arg_private_network)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid namespacing settings. Mounting sysfs with --private-users requires --private-network.");
+
+ if (arg_userns_mode != USER_NAMESPACE_NO && !(arg_mount_settings & MOUNT_APPLY_APIVFS_RO))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot combine --private-users with read-write mounts.");
+
+ if (arg_volatile_mode != VOLATILE_NO && arg_read_only)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot combine --read-only with --volatile. Note that --volatile already implies a read-only base hierarchy.");
+
+ if (arg_expose_ports && !arg_private_network)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --port= without private networking.");
+
+#if ! HAVE_LIBIPTC
+ if (arg_expose_ports)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "--port= is not supported, compiled without libiptc support.");
+#endif
+
+ r = custom_mount_check_all();
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int userns_lchown(const char *p, uid_t uid, gid_t gid) {
+ assert(p);
+
+ if (arg_userns_mode == USER_NAMESPACE_NO)
+ return 0;
+
+ if (uid == UID_INVALID && gid == GID_INVALID)
+ return 0;
+
+ if (uid != UID_INVALID) {
+ uid += arg_uid_shift;
+
+ if (uid < arg_uid_shift || uid >= arg_uid_shift + arg_uid_range)
+ return -EOVERFLOW;
+ }
+
+ if (gid != GID_INVALID) {
+ gid += (gid_t) arg_uid_shift;
+
+ if (gid < (gid_t) arg_uid_shift || gid >= (gid_t) (arg_uid_shift + arg_uid_range))
+ return -EOVERFLOW;
+ }
+
+ if (lchown(p, uid, gid) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int userns_mkdir(const char *root, const char *path, mode_t mode, uid_t uid, gid_t gid) {
+ const char *q;
+ int r;
+
+ q = prefix_roota(root, path);
+ r = mkdir_errno_wrapper(q, mode);
+ if (r == -EEXIST)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return userns_lchown(q, uid, gid);
+}
+
+static const char *timezone_from_path(const char *path) {
+ return PATH_STARTSWITH_SET(
+ path,
+ "../usr/share/zoneinfo/",
+ "/usr/share/zoneinfo/");
+}
+
+static int setup_timezone(const char *dest) {
+ _cleanup_free_ char *p = NULL, *etc = NULL;
+ const char *where, *check;
+ TimezoneMode m;
+ int r;
+
+ assert(dest);
+
+ if (IN_SET(arg_timezone, TIMEZONE_AUTO, TIMEZONE_SYMLINK)) {
+ r = readlink_malloc("/etc/localtime", &p);
+ if (r == -ENOENT && arg_timezone == TIMEZONE_AUTO)
+ m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? TIMEZONE_OFF : TIMEZONE_DELETE;
+ else if (r == -EINVAL && arg_timezone == TIMEZONE_AUTO) /* regular file? */
+ m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? TIMEZONE_BIND : TIMEZONE_COPY;
+ else if (r < 0) {
+ log_warning_errno(r, "Failed to read host's /etc/localtime symlink, not updating container timezone: %m");
+ /* To handle warning, delete /etc/localtime and replace it with a symbolic link to a time zone data
+ * file.
+ *
+ * Example:
+ * ln -s /usr/share/zoneinfo/UTC /etc/localtime
+ */
+ return 0;
+ } else if (arg_timezone == TIMEZONE_AUTO)
+ m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? TIMEZONE_BIND : TIMEZONE_SYMLINK;
+ else
+ m = arg_timezone;
+ } else
+ m = arg_timezone;
+
+ if (m == TIMEZONE_OFF)
+ return 0;
+
+ r = chase_symlinks("/etc", dest, CHASE_PREFIX_ROOT, &etc);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to resolve /etc path in container, ignoring: %m");
+ return 0;
+ }
+
+ where = strjoina(etc, "/localtime");
+
+ switch (m) {
+
+ case TIMEZONE_DELETE:
+ if (unlink(where) < 0)
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno, "Failed to remove '%s', ignoring: %m", where);
+
+ return 0;
+
+ case TIMEZONE_SYMLINK: {
+ _cleanup_free_ char *q = NULL;
+ const char *z, *what;
+
+ z = timezone_from_path(p);
+ if (!z) {
+ log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
+ return 0;
+ }
+
+ r = readlink_malloc(where, &q);
+ if (r >= 0 && streq_ptr(timezone_from_path(q), z))
+ return 0; /* Already pointing to the right place? Then do nothing .. */
+
+ check = strjoina(dest, "/usr/share/zoneinfo/", z);
+ r = chase_symlinks(check, dest, 0, NULL);
+ if (r < 0)
+ log_debug_errno(r, "Timezone %s does not exist (or is not accessible) in container, not creating symlink: %m", z);
+ else {
+ if (unlink(where) < 0 && errno != ENOENT) {
+ log_full_errno(IN_SET(errno, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING, /* Don't complain on read-only images */
+ errno, "Failed to remove existing timezone info %s in container, ignoring: %m", where);
+ return 0;
+ }
+
+ what = strjoina("../usr/share/zoneinfo/", z);
+ if (symlink(what, where) < 0) {
+ log_full_errno(IN_SET(errno, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING,
+ errno, "Failed to correct timezone of container, ignoring: %m");
+ return 0;
+ }
+
+ break;
+ }
+
+ _fallthrough_;
+ }
+
+ case TIMEZONE_BIND: {
+ _cleanup_free_ char *resolved = NULL;
+ int found;
+
+ found = chase_symlinks(where, dest, CHASE_NONEXISTENT, &resolved);
+ if (found < 0) {
+ log_warning_errno(found, "Failed to resolve /etc/localtime path in container, ignoring: %m");
+ return 0;
+ }
+
+ if (found == 0) /* missing? */
+ (void) touch(resolved);
+
+ r = mount_verbose(LOG_WARNING, "/etc/localtime", resolved, NULL, MS_BIND, NULL);
+ if (r >= 0)
+ return mount_verbose(LOG_ERR, NULL, resolved, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NODEV, NULL);
+
+ _fallthrough_;
+ }
+
+ case TIMEZONE_COPY:
+ /* If mounting failed, try to copy */
+ r = copy_file_atomic("/etc/localtime", where, 0644, 0, COPY_REFLINK|COPY_REPLACE);
+ if (r < 0) {
+ log_full_errno(IN_SET(r, -EROFS, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to copy /etc/localtime to %s, ignoring: %m", where);
+ return 0;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("unexpected mode");
+ }
+
+ /* Fix permissions of the symlink or file copy we just created */
+ r = userns_lchown(where, 0, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to chown /etc/localtime, ignoring: %m");
+
+ return 0;
+}
+
+static int have_resolv_conf(const char *path) {
+ assert(path);
+
+ if (access(path, F_OK) < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_debug_errno(errno, "Failed to determine whether '%s' is available: %m", path);
+ }
+
+ return 1;
+}
+
+static int resolved_listening(void) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *dns_stub_listener_mode = NULL;
+ int r;
+
+ /* Check if resolved is listening */
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to open system bus: %m");
+
+ r = bus_name_has_owner(bus, "org.freedesktop.resolve1", NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to check whether the 'org.freedesktop.resolve1' bus name is taken: %m");
+ if (r == 0)
+ return 0;
+
+ r = sd_bus_get_property_string(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "DNSStubListener",
+ &error,
+ &dns_stub_listener_mode);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to query DNSStubListener property: %s", bus_error_message(&error, r));
+
+ return STR_IN_SET(dns_stub_listener_mode, "udp", "yes");
+}
+
+static int setup_resolv_conf(const char *dest) {
+ _cleanup_free_ char *etc = NULL;
+ const char *where, *what;
+ ResolvConfMode m;
+ int r;
+
+ assert(dest);
+
+ if (arg_resolv_conf == RESOLV_CONF_AUTO) {
+ if (arg_private_network)
+ m = RESOLV_CONF_OFF;
+ else if (have_resolv_conf(STATIC_RESOLV_CONF) > 0 && resolved_listening() > 0)
+ m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? RESOLV_CONF_BIND_STATIC : RESOLV_CONF_COPY_STATIC;
+ else if (have_resolv_conf("/etc/resolv.conf") > 0)
+ m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? RESOLV_CONF_BIND_HOST : RESOLV_CONF_COPY_HOST;
+ else
+ m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? RESOLV_CONF_OFF : RESOLV_CONF_DELETE;
+ } else
+ m = arg_resolv_conf;
+
+ if (m == RESOLV_CONF_OFF)
+ return 0;
+
+ r = chase_symlinks("/etc", dest, CHASE_PREFIX_ROOT, &etc);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to resolve /etc path in container, ignoring: %m");
+ return 0;
+ }
+
+ where = strjoina(etc, "/resolv.conf");
+
+ if (m == RESOLV_CONF_DELETE) {
+ if (unlink(where) < 0)
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno, "Failed to remove '%s', ignoring: %m", where);
+
+ return 0;
+ }
+
+ if (IN_SET(m, RESOLV_CONF_BIND_STATIC, RESOLV_CONF_COPY_STATIC))
+ what = STATIC_RESOLV_CONF;
+ else
+ what = "/etc/resolv.conf";
+
+ if (IN_SET(m, RESOLV_CONF_BIND_HOST, RESOLV_CONF_BIND_STATIC)) {
+ _cleanup_free_ char *resolved = NULL;
+ int found;
+
+ found = chase_symlinks(where, dest, CHASE_NONEXISTENT, &resolved);
+ if (found < 0) {
+ log_warning_errno(found, "Failed to resolve /etc/resolv.conf path in container, ignoring: %m");
+ return 0;
+ }
+
+ if (found == 0) /* missing? */
+ (void) touch(resolved);
+
+ r = mount_verbose(LOG_WARNING, what, resolved, NULL, MS_BIND, NULL);
+ if (r >= 0)
+ return mount_verbose(LOG_ERR, NULL, resolved, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NODEV, NULL);
+ }
+
+ /* If that didn't work, let's copy the file */
+ r = copy_file(what, where, O_TRUNC|O_NOFOLLOW, 0644, 0, COPY_REFLINK);
+ if (r < 0) {
+ /* If the file already exists as symlink, let's suppress the warning, under the assumption that
+ * resolved or something similar runs inside and the symlink points there.
+ *
+ * If the disk image is read-only, there's also no point in complaining.
+ */
+ log_full_errno(!IN_SET(RESOLV_CONF_COPY_HOST, RESOLV_CONF_COPY_STATIC) && IN_SET(r, -ELOOP, -EROFS, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to copy /etc/resolv.conf to %s, ignoring: %m", where);
+ return 0;
+ }
+
+ r = userns_lchown(where, 0, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to chown /etc/resolv.conf, ignoring: %m");
+
+ return 0;
+}
+
+static int setup_boot_id(void) {
+ _cleanup_(unlink_and_freep) char *from = NULL;
+ _cleanup_free_ char *path = NULL;
+ sd_id128_t rnd = SD_ID128_NULL;
+ const char *to;
+ int r;
+
+ /* Generate a new randomized boot ID, so that each boot-up of
+ * the container gets a new one */
+
+ r = tempfn_random_child(NULL, "proc-sys-kernel-random-boot-id", &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate random boot ID path: %m");
+
+ r = sd_id128_randomize(&rnd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate random boot id: %m");
+
+ r = id128_write(path, ID128_UUID, rnd, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write boot id: %m");
+
+ from = TAKE_PTR(path);
+ to = "/proc/sys/kernel/random/boot_id";
+
+ r = mount_verbose(LOG_ERR, from, to, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ return mount_verbose(LOG_ERR, NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+}
+
+static int copy_devnodes(const char *dest) {
+ static const char devnodes[] =
+ "null\0"
+ "zero\0"
+ "full\0"
+ "random\0"
+ "urandom\0"
+ "tty\0"
+ "net/tun\0";
+
+ const char *d;
+ int r = 0;
+ _cleanup_umask_ mode_t u;
+
+ assert(dest);
+
+ u = umask(0000);
+
+ /* Create /dev/net, so that we can create /dev/net/tun in it */
+ if (userns_mkdir(dest, "/dev/net", 0755, 0, 0) < 0)
+ return log_error_errno(r, "Failed to create /dev/net directory: %m");
+
+ NULSTR_FOREACH(d, devnodes) {
+ _cleanup_free_ char *from = NULL, *to = NULL;
+ struct stat st;
+
+ from = strappend("/dev/", d);
+ if (!from)
+ return log_oom();
+
+ to = prefix_root(dest, from);
+ if (!to)
+ return log_oom();
+
+ if (stat(from, &st) < 0) {
+
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to stat %s: %m", from);
+
+ } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "%s is not a char or block device, cannot copy.", from);
+ else {
+ _cleanup_free_ char *sl = NULL, *prefixed = NULL, *dn = NULL, *t = NULL;
+
+ if (mknod(to, st.st_mode, st.st_rdev) < 0) {
+ /* Explicitly warn the user when /dev is already populated. */
+ if (errno == EEXIST)
+ log_notice("%s/dev is pre-mounted and pre-populated. If a pre-mounted /dev is provided it needs to be an unpopulated file system.", dest);
+ if (errno != EPERM)
+ return log_error_errno(errno, "mknod(%s) failed: %m", to);
+
+ /* Some systems abusively restrict mknod but allow bind mounts. */
+ r = touch(to);
+ if (r < 0)
+ return log_error_errno(r, "touch (%s) failed: %m", to);
+ r = mount_verbose(LOG_DEBUG, from, to, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Both mknod and bind mount (%s) failed: %m", to);
+ }
+
+ r = userns_lchown(to, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "chown() of device node %s failed: %m", to);
+
+ dn = strjoin("/dev/", S_ISCHR(st.st_mode) ? "char" : "block");
+ if (!dn)
+ return log_oom();
+
+ r = userns_mkdir(dest, dn, 0755, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create '%s': %m", dn);
+
+ if (asprintf(&sl, "%s/%u:%u", dn, major(st.st_rdev), minor(st.st_rdev)) < 0)
+ return log_oom();
+
+ prefixed = prefix_root(dest, sl);
+ if (!prefixed)
+ return log_oom();
+
+ t = strjoin("../", d);
+ if (!t)
+ return log_oom();
+
+ if (symlink(t, prefixed) < 0)
+ log_debug_errno(errno, "Failed to symlink '%s' to '%s': %m", t, prefixed);
+ }
+ }
+
+ return r;
+}
+
+static int setup_pts(const char *dest) {
+ _cleanup_free_ char *options = NULL;
+ const char *p;
+ int r;
+
+#if HAVE_SELINUX
+ if (arg_selinux_apifs_context)
+ (void) asprintf(&options,
+ "newinstance,ptmxmode=0666,mode=620,gid=" GID_FMT ",context=\"%s\"",
+ arg_uid_shift + TTY_GID,
+ arg_selinux_apifs_context);
+ else
+#endif
+ (void) asprintf(&options,
+ "newinstance,ptmxmode=0666,mode=620,gid=" GID_FMT,
+ arg_uid_shift + TTY_GID);
+
+ if (!options)
+ return log_oom();
+
+ /* Mount /dev/pts itself */
+ p = prefix_roota(dest, "/dev/pts");
+ r = mkdir_errno_wrapper(p, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /dev/pts: %m");
+
+ r = mount_verbose(LOG_ERR, "devpts", p, "devpts", MS_NOSUID|MS_NOEXEC, options);
+ if (r < 0)
+ return r;
+ r = userns_lchown(p, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown /dev/pts: %m");
+
+ /* Create /dev/ptmx symlink */
+ p = prefix_roota(dest, "/dev/ptmx");
+ if (symlink("pts/ptmx", p) < 0)
+ return log_error_errno(errno, "Failed to create /dev/ptmx symlink: %m");
+ r = userns_lchown(p, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown /dev/ptmx: %m");
+
+ /* And fix /dev/pts/ptmx ownership */
+ p = prefix_roota(dest, "/dev/pts/ptmx");
+ r = userns_lchown(p, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown /dev/pts/ptmx: %m");
+
+ return 0;
+}
+
+static int setup_dev_console(const char *dest, const char *console) {
+ _cleanup_umask_ mode_t u;
+ const char *to;
+ int r;
+
+ assert(dest);
+ assert(console);
+
+ u = umask(0000);
+
+ r = chmod_and_chown(console, 0600, arg_uid_shift, arg_uid_shift);
+ if (r < 0)
+ return log_error_errno(r, "Failed to correct access mode for TTY: %m");
+
+ /* We need to bind mount the right tty to /dev/console since
+ * ptys can only exist on pts file systems. To have something
+ * to bind mount things on we create a empty regular file. */
+
+ to = prefix_roota(dest, "/dev/console");
+ r = touch(to);
+ if (r < 0)
+ return log_error_errno(r, "touch() for /dev/console failed: %m");
+
+ return mount_verbose(LOG_ERR, console, to, NULL, MS_BIND, NULL);
+}
+
+static int setup_keyring(void) {
+ key_serial_t keyring;
+
+ /* Allocate a new session keyring for the container. This makes sure the keyring of the session systemd-nspawn
+ * was invoked from doesn't leak into the container. Note that by default we block keyctl() and request_key()
+ * anyway via seccomp so doing this operation isn't strictly necessary, but in case people explicitly whitelist
+ * these system calls let's make sure we don't leak anything into the container. */
+
+ keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
+ if (keyring == -1) {
+ if (errno == ENOSYS)
+ log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
+ else if (IN_SET(errno, EACCES, EPERM))
+ log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
+ else
+ return log_error_errno(errno, "Setting up kernel keyring failed: %m");
+ }
+
+ return 0;
+}
+
+static int setup_kmsg(int kmsg_socket) {
+ _cleanup_(unlink_and_freep) char *from = NULL;
+ _cleanup_free_ char *fifo = NULL;
+ _cleanup_close_ int fd = -1;
+ _cleanup_umask_ mode_t u;
+ const char *to;
+ int r;
+
+ assert(kmsg_socket >= 0);
+
+ u = umask(0000);
+
+ /* We create the kmsg FIFO as as temporary file in /tmp, but immediately delete it after bind mounting it to
+ * /proc/kmsg. While FIFOs on the reading side behave very similar to /proc/kmsg, their writing side behaves
+ * differently from /dev/kmsg in that writing blocks when nothing is reading. In order to avoid any problems
+ * with containers deadlocking due to this we simply make /dev/kmsg unavailable to the container. */
+
+ r = tempfn_random_child(NULL, "proc-kmsg", &fifo);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate kmsg path: %m");
+
+ if (mkfifo(fifo, 0600) < 0)
+ return log_error_errno(errno, "mkfifo() for /run/kmsg failed: %m");
+
+ from = TAKE_PTR(fifo);
+ to = "/proc/kmsg";
+
+ r = mount_verbose(LOG_ERR, from, to, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ fd = open(from, O_RDWR|O_NONBLOCK|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open fifo: %m");
+
+ /* Store away the fd in the socket, so that it stays open as long as we run the child */
+ r = send_one_fd(kmsg_socket, fd, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send FIFO fd: %m");
+
+ return 0;
+}
+
+static int on_address_change(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ union in_addr_union *exposed = userdata;
+
+ assert(rtnl);
+ assert(m);
+ assert(exposed);
+
+ expose_port_execute(rtnl, arg_expose_ports, exposed);
+ return 0;
+}
+
+static int setup_hostname(void) {
+ int r;
+
+ if ((arg_clone_ns_flags & CLONE_NEWUTS) == 0)
+ return 0;
+
+ r = sethostname_idempotent(arg_hostname ?: arg_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set hostname: %m");
+
+ return 0;
+}
+
+static int setup_journal(const char *directory) {
+ _cleanup_free_ char *d = NULL;
+ const char *dirname, *p, *q;
+ sd_id128_t this_id;
+ char id[33];
+ bool try;
+ int r;
+
+ /* Don't link journals in ephemeral mode */
+ if (arg_ephemeral)
+ return 0;
+
+ if (arg_link_journal == LINK_NO)
+ return 0;
+
+ try = arg_link_journal_try || arg_link_journal == LINK_AUTO;
+
+ r = sd_id128_get_machine(&this_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve machine ID: %m");
+
+ if (sd_id128_equal(arg_uuid, this_id)) {
+ log_full(try ? LOG_WARNING : LOG_ERR,
+ "Host and machine ids are equal (%s): refusing to link journals", sd_id128_to_string(arg_uuid, id));
+ if (try)
+ return 0;
+ return -EEXIST;
+ }
+
+ FOREACH_STRING(dirname, "/var", "/var/log", "/var/log/journal") {
+ r = userns_mkdir(directory, dirname, 0755, 0, 0);
+ if (r < 0) {
+ bool ignore = r == -EROFS && try;
+ log_full_errno(ignore ? LOG_DEBUG : LOG_ERR, r,
+ "Failed to create %s%s: %m", dirname, ignore ? ", ignoring" : "");
+ return ignore ? 0 : r;
+ }
+ }
+
+ (void) sd_id128_to_string(arg_uuid, id);
+
+ p = strjoina("/var/log/journal/", id);
+ q = prefix_roota(directory, p);
+
+ if (path_is_mount_point(p, NULL, 0) > 0) {
+ if (try)
+ return 0;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "%s: already a mount point, refusing to use for journal", p);
+ }
+
+ if (path_is_mount_point(q, NULL, 0) > 0) {
+ if (try)
+ return 0;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "%s: already a mount point, refusing to use for journal", q);
+ }
+
+ r = readlink_and_make_absolute(p, &d);
+ if (r >= 0) {
+ if (IN_SET(arg_link_journal, LINK_GUEST, LINK_AUTO) &&
+ path_equal(d, q)) {
+
+ r = userns_mkdir(directory, p, 0755, 0, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to create directory %s: %m", q);
+ return 0;
+ }
+
+ if (unlink(p) < 0)
+ return log_error_errno(errno, "Failed to remove symlink %s: %m", p);
+ } else if (r == -EINVAL) {
+
+ if (arg_link_journal == LINK_GUEST &&
+ rmdir(p) < 0) {
+
+ if (errno == ENOTDIR) {
+ log_error("%s already exists and is neither a symlink nor a directory", p);
+ return r;
+ } else
+ return log_error_errno(errno, "Failed to remove %s: %m", p);
+ }
+ } else if (r != -ENOENT)
+ return log_error_errno(r, "readlink(%s) failed: %m", p);
+
+ if (arg_link_journal == LINK_GUEST) {
+
+ if (symlink(q, p) < 0) {
+ if (try) {
+ log_debug_errno(errno, "Failed to symlink %s to %s, skipping journal setup: %m", q, p);
+ return 0;
+ } else
+ return log_error_errno(errno, "Failed to symlink %s to %s: %m", q, p);
+ }
+
+ r = userns_mkdir(directory, p, 0755, 0, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to create directory %s: %m", q);
+ return 0;
+ }
+
+ if (arg_link_journal == LINK_HOST) {
+ /* don't create parents here — if the host doesn't have
+ * permanent journal set up, don't force it here */
+
+ r = mkdir_errno_wrapper(p, 0755);
+ if (r < 0 && r != -EEXIST) {
+ if (try) {
+ log_debug_errno(r, "Failed to create %s, skipping journal setup: %m", p);
+ return 0;
+ } else
+ return log_error_errno(r, "Failed to create %s: %m", p);
+ }
+
+ } else if (access(p, F_OK) < 0)
+ return 0;
+
+ if (dir_is_empty(q) == 0)
+ log_warning("%s is not empty, proceeding anyway.", q);
+
+ r = userns_mkdir(directory, p, 0755, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s: %m", q);
+
+ r = mount_verbose(LOG_DEBUG, p, q, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to bind mount journal from host into guest: %m");
+
+ return 0;
+}
+
+static int drop_capabilities(void) {
+ return capability_bounding_set_drop(arg_caps_retain, false);
+}
+
+static int reset_audit_loginuid(void) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ if ((arg_clone_ns_flags & CLONE_NEWPID) == 0)
+ return 0;
+
+ r = read_one_line_file("/proc/self/loginuid", &p);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to read /proc/self/loginuid: %m");
+
+ /* Already reset? */
+ if (streq(p, "4294967295"))
+ return 0;
+
+ r = write_string_file("/proc/self/loginuid", "4294967295", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ log_error_errno(r,
+ "Failed to reset audit login UID. This probably means that your kernel is too\n"
+ "old and you have audit enabled. Note that the auditing subsystem is known to\n"
+ "be incompatible with containers on old kernels. Please make sure to upgrade\n"
+ "your kernel or to off auditing with 'audit=0' on the kernel command line before\n"
+ "using systemd-nspawn. Sleeping for 5s... (%m)");
+
+ sleep(5);
+ }
+
+ return 0;
+}
+
+static int setup_propagate(const char *root) {
+ const char *p, *q;
+ int r;
+
+ (void) mkdir_p("/run/systemd/nspawn/", 0755);
+ (void) mkdir_p("/run/systemd/nspawn/propagate", 0600);
+ p = strjoina("/run/systemd/nspawn/propagate/", arg_machine);
+ (void) mkdir_p(p, 0600);
+
+ r = userns_mkdir(root, "/run/systemd", 0755, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/systemd: %m");
+
+ r = userns_mkdir(root, "/run/systemd/nspawn", 0755, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/systemd/nspawn: %m");
+
+ r = userns_mkdir(root, "/run/systemd/nspawn/incoming", 0600, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/systemd/nspawn/incoming: %m");
+
+ q = prefix_roota(root, "/run/systemd/nspawn/incoming");
+ r = mount_verbose(LOG_ERR, p, q, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ r = mount_verbose(LOG_ERR, NULL, q, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
+ if (r < 0)
+ return r;
+
+ /* machined will MS_MOVE into that directory, and that's only
+ * supported for non-shared mounts. */
+ return mount_verbose(LOG_ERR, NULL, q, NULL, MS_SLAVE, NULL);
+}
+
+static int setup_machine_id(const char *directory) {
+ const char *etc_machine_id;
+ sd_id128_t id;
+ int r;
+
+ /* If the UUID in the container is already set, then that's what counts, and we use. If it isn't set, and the
+ * caller passed --uuid=, then we'll pass it in the $container_uuid env var to PID 1 of the container. The
+ * assumption is that PID 1 will then write it to /etc/machine-id to make it persistent. If --uuid= is not
+ * passed we generate a random UUID, and pass it via $container_uuid. In effect this means that /etc/machine-id
+ * in the container and our idea of the container UUID will always be in sync (at least if PID 1 in the
+ * container behaves nicely). */
+
+ etc_machine_id = prefix_roota(directory, "/etc/machine-id");
+
+ r = id128_read(etc_machine_id, ID128_PLAIN, &id);
+ if (r < 0) {
+ if (!IN_SET(r, -ENOENT, -ENOMEDIUM)) /* If the file is missing or empty, we don't mind */
+ return log_error_errno(r, "Failed to read machine ID from container image: %m");
+
+ if (sd_id128_is_null(arg_uuid)) {
+ r = sd_id128_randomize(&arg_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire randomized machine UUID: %m");
+ }
+ } else {
+ if (sd_id128_is_null(id))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Machine ID in container image is zero, refusing.");
+
+ arg_uuid = id;
+ }
+
+ return 0;
+}
+
+static int recursive_chown(const char *directory, uid_t shift, uid_t range) {
+ int r;
+
+ assert(directory);
+
+ if (arg_userns_mode == USER_NAMESPACE_NO || !arg_userns_chown)
+ return 0;
+
+ r = path_patch_uid(directory, arg_uid_shift, arg_uid_range);
+ if (r == -EOPNOTSUPP)
+ return log_error_errno(r, "Automatic UID/GID adjusting is only supported for UID/GID ranges starting at multiples of 2^16 with a range of 2^16.");
+ if (r == -EBADE)
+ return log_error_errno(r, "Upper 16 bits of root directory UID and GID do not match.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust UID/GID shift of OS tree: %m");
+ if (r == 0)
+ log_debug("Root directory of image is already owned by the right UID/GID range, skipping recursive chown operation.");
+ else
+ log_debug("Patched directory tree to match UID/GID range.");
+
+ return r;
+}
+
+/*
+ * Return values:
+ * < 0 : wait_for_terminate() failed to get the state of the
+ * container, the container was terminated by a signal, or
+ * failed for an unknown reason. No change is made to the
+ * container argument.
+ * > 0 : The program executed in the container terminated with an
+ * error. The exit code of the program executed in the
+ * container is returned. The container argument has been set
+ * to CONTAINER_TERMINATED.
+ * 0 : The container is being rebooted, has been shut down or exited
+ * successfully. The container argument has been set to either
+ * CONTAINER_TERMINATED or CONTAINER_REBOOTED.
+ *
+ * That is, success is indicated by a return value of zero, and an
+ * error is indicated by a non-zero value.
+ */
+static int wait_for_container(pid_t pid, ContainerStatus *container) {
+ siginfo_t status;
+ int r;
+
+ r = wait_for_terminate(pid, &status);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to wait for container: %m");
+
+ switch (status.si_code) {
+
+ case CLD_EXITED:
+ if (status.si_status == 0)
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s exited successfully.", arg_machine);
+ else
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s failed with error code %i.", arg_machine, status.si_status);
+
+ *container = CONTAINER_TERMINATED;
+ return status.si_status;
+
+ case CLD_KILLED:
+ if (status.si_status == SIGINT) {
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s has been shut down.", arg_machine);
+ *container = CONTAINER_TERMINATED;
+ return 0;
+
+ } else if (status.si_status == SIGHUP) {
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s is being rebooted.", arg_machine);
+ *container = CONTAINER_REBOOTED;
+ return 0;
+ }
+
+ _fallthrough_;
+ case CLD_DUMPED:
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Container %s failed due to unknown reason.", arg_machine);
+ }
+}
+
+static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ pid_t pid;
+
+ pid = PTR_TO_PID(userdata);
+ if (pid > 0) {
+ if (kill(pid, arg_kill_signal) >= 0) {
+ log_info("Trying to halt container. Send SIGTERM again to trigger immediate termination.");
+ sd_event_source_set_userdata(s, NULL);
+ return 0;
+ }
+ }
+
+ sd_event_exit(sd_event_source_get_event(s), 0);
+ return 0;
+}
+
+static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *ssi, void *userdata) {
+ pid_t pid;
+
+ assert(s);
+ assert(ssi);
+
+ pid = PTR_TO_PID(userdata);
+
+ for (;;) {
+ siginfo_t si = {};
+
+ if (waitid(P_ALL, 0, &si, WNOHANG|WNOWAIT|WEXITED) < 0)
+ return log_error_errno(errno, "Failed to waitid(): %m");
+ if (si.si_pid == 0) /* No pending children. */
+ break;
+ if (si.si_pid == pid) {
+ /* The main process we care for has exited. Return from
+ * signal handler but leave the zombie. */
+ sd_event_exit(sd_event_source_get_event(s), 0);
+ break;
+ }
+
+ /* Reap all other children. */
+ (void) waitid(P_PID, si.si_pid, &si, WNOHANG|WEXITED);
+ }
+
+ return 0;
+}
+
+static int on_request_stop(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ pid_t pid;
+
+ assert(m);
+
+ pid = PTR_TO_PID(userdata);
+
+ if (arg_kill_signal > 0) {
+ log_info("Container termination requested. Attempting to halt container.");
+ (void) kill(pid, arg_kill_signal);
+ } else {
+ log_info("Container termination requested. Exiting.");
+ sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 0);
+ }
+
+ return 0;
+}
+
+static int determine_names(void) {
+ int r;
+
+ if (arg_template && !arg_directory && arg_machine) {
+
+ /* If --template= was specified then we should not
+ * search for a machine, but instead create a new one
+ * in /var/lib/machine. */
+
+ arg_directory = strjoin("/var/lib/machines/", arg_machine);
+ if (!arg_directory)
+ return log_oom();
+ }
+
+ if (!arg_image && !arg_directory) {
+ if (arg_machine) {
+ _cleanup_(image_unrefp) Image *i = NULL;
+
+ r = image_find(IMAGE_MACHINE, arg_machine, &i);
+ if (r == -ENOENT)
+ return log_error_errno(r, "No image for machine '%s'.", arg_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find image for machine '%s': %m", arg_machine);
+
+ if (IN_SET(i->type, IMAGE_RAW, IMAGE_BLOCK))
+ r = free_and_strdup(&arg_image, i->path);
+ else
+ r = free_and_strdup(&arg_directory, i->path);
+ if (r < 0)
+ return log_oom();
+
+ if (!arg_ephemeral)
+ arg_read_only = arg_read_only || i->read_only;
+ } else {
+ r = safe_getcwd(&arg_directory);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine current directory: %m");
+ }
+
+ if (!arg_directory && !arg_image) {
+ log_error("Failed to determine path, please use -D or -i.");
+ return -EINVAL;
+ }
+ }
+
+ if (!arg_machine) {
+ if (arg_directory && path_equal(arg_directory, "/"))
+ arg_machine = gethostname_malloc();
+ else {
+ if (arg_image) {
+ char *e;
+
+ arg_machine = strdup(basename(arg_image));
+
+ /* Truncate suffix if there is one */
+ e = endswith(arg_machine, ".raw");
+ if (e)
+ *e = 0;
+ } else
+ arg_machine = strdup(basename(arg_directory));
+ }
+ if (!arg_machine)
+ return log_oom();
+
+ hostname_cleanup(arg_machine);
+ if (!machine_name_is_valid(arg_machine)) {
+ log_error("Failed to determine machine name automatically, please use -M.");
+ return -EINVAL;
+ }
+
+ if (arg_ephemeral) {
+ char *b;
+
+ /* Add a random suffix when this is an
+ * ephemeral machine, so that we can run many
+ * instances at once without manually having
+ * to specify -M each time. */
+
+ if (asprintf(&b, "%s-%016" PRIx64, arg_machine, random_u64()) < 0)
+ return log_oom();
+
+ free(arg_machine);
+ arg_machine = b;
+ }
+ }
+
+ return 0;
+}
+
+static int chase_symlinks_and_update(char **p, unsigned flags) {
+ char *chased;
+ int r;
+
+ assert(p);
+
+ if (!*p)
+ return 0;
+
+ r = chase_symlinks(*p, NULL, flags, &chased);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve path %s: %m", *p);
+
+ free_and_replace(*p, chased);
+ return r; /* r might be an fd here in case we ever use CHASE_OPEN in flags */
+}
+
+static int determine_uid_shift(const char *directory) {
+ int r;
+
+ if (arg_userns_mode == USER_NAMESPACE_NO) {
+ arg_uid_shift = 0;
+ return 0;
+ }
+
+ if (arg_uid_shift == UID_INVALID) {
+ struct stat st;
+
+ r = stat(directory, &st);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to determine UID base of %s: %m", directory);
+
+ arg_uid_shift = st.st_uid & UINT32_C(0xffff0000);
+
+ if (arg_uid_shift != (st.st_gid & UINT32_C(0xffff0000)))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "UID and GID base of %s don't match.", directory);
+
+ arg_uid_range = UINT32_C(0x10000);
+ }
+
+ if (arg_uid_shift > (uid_t) -1 - arg_uid_range)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "UID base too high for UID range.");
+
+ return 0;
+}
+
+static int inner_child(
+ Barrier *barrier,
+ const char *directory,
+ bool secondary,
+ int kmsg_socket,
+ int rtnl_socket,
+ FDSet *fds) {
+
+ _cleanup_free_ char *home = NULL;
+ char as_uuid[37];
+ size_t n_env = 1;
+ const char *envp[] = {
+ "PATH=" DEFAULT_PATH_COMPAT,
+ NULL, /* container */
+ NULL, /* TERM */
+ NULL, /* HOME */
+ NULL, /* USER */
+ NULL, /* LOGNAME */
+ NULL, /* container_uuid */
+ NULL, /* LISTEN_FDS */
+ NULL, /* LISTEN_PID */
+ NULL, /* NOTIFY_SOCKET */
+ NULL
+ };
+ const char *exec_target;
+ _cleanup_strv_free_ char **env_use = NULL;
+ int r;
+
+ /* This is the "inner" child process, i.e. the one forked off by the "outer" child process, which is the one
+ * the container manager itself forked off. At the time of clone() it gained its own CLONE_NEWNS, CLONE_NEWPID,
+ * CLONE_NEWUTS, CLONE_NEWIPC, CLONE_NEWUSER namespaces. Note that it has its own CLONE_NEWNS namespace,
+ * separate from the CLONE_NEWNS created for the "outer" child, and also separate from the host's CLONE_NEWNS
+ * namespace. The reason for having two levels of CLONE_NEWNS namespaces is that the "inner" one is owned by
+ * the CLONE_NEWUSER namespace of the container, while the "outer" one is owned by the host's CLONE_NEWUSER
+ * namespace.
+ *
+ * Note at this point we have no CLONE_NEWNET namespace yet. We'll acquire that one later through
+ * unshare(). See below. */
+
+ assert(barrier);
+ assert(directory);
+ assert(kmsg_socket >= 0);
+
+ if (arg_userns_mode != USER_NAMESPACE_NO) {
+ /* Tell the parent, that it now can write the UID map. */
+ (void) barrier_place(barrier); /* #1 */
+
+ /* Wait until the parent wrote the UID map */
+ if (!barrier_place_and_sync(barrier)) /* #2 */
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
+ "Parent died too early");
+ }
+
+ r = reset_uid_gid();
+ if (r < 0)
+ return log_error_errno(r, "Couldn't become new root: %m");
+
+ r = mount_all(NULL,
+ arg_mount_settings | MOUNT_IN_USERNS,
+ arg_uid_shift,
+ arg_selinux_apifs_context);
+ if (r < 0)
+ return r;
+
+ if (!arg_network_namespace_path && arg_private_network) {
+ r = unshare(CLONE_NEWNET);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to unshare network namespace: %m");
+
+ /* Tell the parent that it can setup network interfaces. */
+ (void) barrier_place(barrier); /* #3 */
+ }
+
+ r = mount_sysfs(NULL, arg_mount_settings);
+ if (r < 0)
+ return r;
+
+ /* Wait until we are cgroup-ified, so that we
+ * can mount the right cgroup path writable */
+ if (!barrier_place_and_sync(barrier)) /* #4 */
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
+ "Parent died too early");
+
+ if (arg_use_cgns) {
+ r = unshare(CLONE_NEWCGROUP);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to unshare cgroup namespace: %m");
+ r = mount_cgroups(
+ "",
+ arg_unified_cgroup_hierarchy,
+ arg_userns_mode != USER_NAMESPACE_NO,
+ arg_uid_shift,
+ arg_uid_range,
+ arg_selinux_apifs_context,
+ true);
+ if (r < 0)
+ return r;
+ } else {
+ r = mount_systemd_cgroup_writable("", arg_unified_cgroup_hierarchy);
+ if (r < 0)
+ return r;
+ }
+
+ r = setup_boot_id();
+ if (r < 0)
+ return r;
+
+ r = setup_kmsg(kmsg_socket);
+ if (r < 0)
+ return r;
+ kmsg_socket = safe_close(kmsg_socket);
+
+ if (setsid() < 0)
+ return log_error_errno(errno, "setsid() failed: %m");
+
+ if (arg_private_network)
+ loopback_setup();
+
+ if (arg_expose_ports) {
+ r = expose_port_send_rtnl(rtnl_socket);
+ if (r < 0)
+ return r;
+ rtnl_socket = safe_close(rtnl_socket);
+ }
+
+ if (arg_oom_score_adjust_set) {
+ r = set_oom_score_adjust(arg_oom_score_adjust);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust OOM score: %m");
+ }
+
+ if (arg_cpuset)
+ if (sched_setaffinity(0, CPU_ALLOC_SIZE(arg_cpuset_ncpus), arg_cpuset) < 0)
+ return log_error_errno(errno, "Failed to set CPU affinity: %m");
+
+ r = drop_capabilities();
+ if (r < 0)
+ return log_error_errno(r, "drop_capabilities() failed: %m");
+
+ (void) setup_hostname();
+
+ if (arg_personality != PERSONALITY_INVALID) {
+ r = safe_personality(arg_personality);
+ if (r < 0)
+ return log_error_errno(r, "personality() failed: %m");
+ } else if (secondary) {
+ r = safe_personality(PER_LINUX32);
+ if (r < 0)
+ return log_error_errno(r, "personality() failed: %m");
+ }
+
+#if HAVE_SELINUX
+ if (arg_selinux_context)
+ if (setexeccon(arg_selinux_context) < 0)
+ return log_error_errno(errno, "setexeccon(\"%s\") failed: %m", arg_selinux_context);
+#endif
+
+ r = change_uid_gid(arg_user, &home);
+ if (r < 0)
+ return r;
+
+ if (arg_no_new_privileges)
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0)
+ return log_error_errno(errno, "Failed to disable new privileges: %m");
+
+ /* LXC sets container=lxc, so follow the scheme here */
+ envp[n_env++] = strjoina("container=", arg_container_service_name);
+
+ envp[n_env] = strv_find_prefix(environ, "TERM=");
+ if (envp[n_env])
+ n_env++;
+
+ if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
+ (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
+ (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0))
+ return log_oom();
+
+ assert(!sd_id128_is_null(arg_uuid));
+
+ if (asprintf((char**)(envp + n_env++), "container_uuid=%s", id128_to_uuid_string(arg_uuid, as_uuid)) < 0)
+ return log_oom();
+
+ if (fdset_size(fds) > 0) {
+ r = fdset_cloexec(fds, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unset O_CLOEXEC for file descriptors.");
+
+ if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", fdset_size(fds)) < 0) ||
+ (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0))
+ return log_oom();
+ }
+ if (asprintf((char **)(envp + n_env++), "NOTIFY_SOCKET=%s", NSPAWN_NOTIFY_SOCKET_PATH) < 0)
+ return log_oom();
+
+ env_use = strv_env_merge(2, envp, arg_setenv);
+ if (!env_use)
+ return log_oom();
+
+ /* Let the parent know that we are ready and
+ * wait until the parent is ready with the
+ * setup, too... */
+ if (!barrier_place_and_sync(barrier)) /* #5 */
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
+ "Parent died too early");
+
+ if (arg_chdir)
+ if (chdir(arg_chdir) < 0)
+ return log_error_errno(errno, "Failed to change to specified working directory %s: %m", arg_chdir);
+
+ if (arg_start_mode == START_PID2) {
+ r = stub_pid1(arg_uuid);
+ if (r < 0)
+ return r;
+ }
+
+ /* Now, explicitly close the log, so that we then can close all remaining fds. Closing the log explicitly first
+ * has the benefit that the logging subsystem knows about it, and is thus ready to be reopened should we need
+ * it again. Note that the other fds closed here are at least the locking and barrier fds. */
+ log_close();
+ log_set_open_when_needed(true);
+
+ (void) fdset_close_others(fds);
+
+ if (arg_start_mode == START_BOOT) {
+ char **a;
+ size_t m;
+
+ /* Automatically search for the init system */
+
+ m = strv_length(arg_parameters);
+ a = newa(char*, m + 2);
+ memcpy_safe(a + 1, arg_parameters, m * sizeof(char*));
+ a[1 + m] = NULL;
+
+ a[0] = (char*) "/usr/lib/systemd/systemd";
+ execve(a[0], a, env_use);
+
+ a[0] = (char*) "/lib/systemd/systemd";
+ execve(a[0], a, env_use);
+
+ a[0] = (char*) "/sbin/init";
+ execve(a[0], a, env_use);
+
+ exec_target = "/usr/lib/systemd/systemd, /lib/systemd/systemd, /sbin/init";
+ } else if (!strv_isempty(arg_parameters)) {
+ const char *dollar_path;
+
+ exec_target = arg_parameters[0];
+
+ /* Use the user supplied search $PATH if there is one, or DEFAULT_PATH_COMPAT if not to search the
+ * binary. */
+ dollar_path = strv_env_get(env_use, "PATH");
+ if (dollar_path) {
+ if (putenv((char*) dollar_path) != 0)
+ return log_error_errno(errno, "Failed to update $PATH: %m");
+ }
+
+ execvpe(arg_parameters[0], arg_parameters, env_use);
+ } else {
+ if (!arg_chdir)
+ /* If we cannot change the directory, we'll end up in /, that is expected. */
+ (void) chdir(home ?: "/root");
+
+ execle("/bin/bash", "-bash", NULL, env_use);
+ execle("/bin/sh", "-sh", NULL, env_use);
+
+ exec_target = "/bin/bash, /bin/sh";
+ }
+
+ return log_error_errno(errno, "execv(%s) failed: %m", exec_target);
+}
+
+static int setup_sd_notify_child(void) {
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = NSPAWN_NOTIFY_SOCKET_PATH,
+ };
+ int r;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate notification socket: %m");
+
+ (void) mkdir_parents(NSPAWN_NOTIFY_SOCKET_PATH, 0755);
+ (void) sockaddr_un_unlink(&sa.un);
+
+ r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return log_error_errno(errno, "bind(" NSPAWN_NOTIFY_SOCKET_PATH ") failed: %m");
+
+ r = userns_lchown(NSPAWN_NOTIFY_SOCKET_PATH, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown " NSPAWN_NOTIFY_SOCKET_PATH ": %m");
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_PASSCRED failed: %m");
+
+ return TAKE_FD(fd);
+}
+
+static int outer_child(
+ Barrier *barrier,
+ const char *directory,
+ const char *console,
+ DissectedImage *dissected_image,
+ bool interactive,
+ bool secondary,
+ int pid_socket,
+ int uuid_socket,
+ int notify_socket,
+ int kmsg_socket,
+ int rtnl_socket,
+ int uid_shift_socket,
+ int unified_cgroup_hierarchy_socket,
+ FDSet *fds,
+ int netns_fd) {
+
+ _cleanup_close_ int fd = -1;
+ int r, which_failed;
+ pid_t pid;
+ ssize_t l;
+
+ /* This is the "outer" child process, i.e the one forked off by the container manager itself. It already has
+ * its own CLONE_NEWNS namespace (which was created by the clone()). It still lives in the host's CLONE_NEWPID,
+ * CLONE_NEWUTS, CLONE_NEWIPC, CLONE_NEWUSER and CLONE_NEWNET namespaces. After it completed a number of
+ * initializations a second child (the "inner" one) is forked off it, and it exits. */
+
+ assert(barrier);
+ assert(directory);
+ assert(console);
+ assert(pid_socket >= 0);
+ assert(uuid_socket >= 0);
+ assert(notify_socket >= 0);
+ assert(kmsg_socket >= 0);
+
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
+ return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m");
+
+ if (interactive) {
+ int terminal;
+
+ terminal = open_terminal(console, O_RDWR);
+ if (terminal < 0)
+ return log_error_errno(terminal, "Failed to open console: %m");
+
+ /* Make sure we can continue logging to the original stderr, even if stderr points elsewhere now */
+ r = log_dup_console();
+ if (r < 0)
+ return log_error_errno(r, "Failed to duplicate stderr: %m");
+
+ r = rearrange_stdio(terminal, terminal, terminal); /* invalidates 'terminal' on success and failure */
+ if (r < 0)
+ return log_error_errno(r, "Failed to move console to stdin/stdout/stderr: %m");
+ }
+
+ r = reset_audit_loginuid();
+ if (r < 0)
+ return r;
+
+ /* Mark everything as slave, so that we still
+ * receive mounts from the real root, but don't
+ * propagate mounts to the real root. */
+ r = mount_verbose(LOG_ERR, NULL, "/", NULL, MS_SLAVE|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ if (dissected_image) {
+ /* If we are operating on a disk image, then mount its root directory now, but leave out the rest. We
+ * can read the UID shift from it if we need to. Further down we'll mount the rest, but then with the
+ * uid shift known. That way we can mount VFAT file systems shifted to the right place right away. This
+ * makes sure ESP partitions and userns are compatible. */
+
+ r = dissected_image_mount(dissected_image, directory, arg_uid_shift,
+ DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_DISCARD_ON_LOOP|
+ (arg_read_only ? DISSECT_IMAGE_READ_ONLY : 0)|
+ (arg_start_mode == START_BOOT ? DISSECT_IMAGE_VALIDATE_OS : 0));
+ if (r < 0)
+ return r;
+ }
+
+ r = determine_uid_shift(directory);
+ if (r < 0)
+ return r;
+
+ if (arg_userns_mode != USER_NAMESPACE_NO) {
+ /* Let the parent know which UID shift we read from the image */
+ l = send(uid_shift_socket, &arg_uid_shift, sizeof(arg_uid_shift), MSG_NOSIGNAL);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to send UID shift: %m");
+ if (l != sizeof(arg_uid_shift))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Short write while sending UID shift.");
+
+ if (arg_userns_mode == USER_NAMESPACE_PICK) {
+ /* When we are supposed to pick the UID shift, the parent will check now whether the UID shift
+ * we just read from the image is available. If yes, it will send the UID shift back to us, if
+ * not it will pick a different one, and send it back to us. */
+
+ l = recv(uid_shift_socket, &arg_uid_shift, sizeof(arg_uid_shift), 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to recv UID shift: %m");
+ if (l != sizeof(arg_uid_shift))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Short read while receiving UID shift.");
+ }
+
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO,
+ "Selected user namespace base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range);
+ }
+
+ if (dissected_image) {
+ /* Now we know the uid shift, let's now mount everything else that might be in the image. */
+ r = dissected_image_mount(dissected_image, directory, arg_uid_shift,
+ DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY|DISSECT_IMAGE_DISCARD_ON_LOOP|(arg_read_only ? DISSECT_IMAGE_READ_ONLY : 0));
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_UNKNOWN) {
+ /* OK, we don't know yet which cgroup mode to use yet. Let's figure it out, and tell the parent. */
+
+ r = detect_unified_cgroup_hierarchy_from_image(directory);
+ if (r < 0)
+ return r;
+
+ l = send(unified_cgroup_hierarchy_socket, &arg_unified_cgroup_hierarchy, sizeof(arg_unified_cgroup_hierarchy), MSG_NOSIGNAL);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to send cgroup mode: %m");
+ if (l != sizeof(arg_unified_cgroup_hierarchy))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Short write while sending cgroup mode.");
+
+ unified_cgroup_hierarchy_socket = safe_close(unified_cgroup_hierarchy_socket);
+ }
+
+ /* Turn directory into bind mount */
+ r = mount_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ r = setup_pivot_root(
+ directory,
+ arg_pivot_root_new,
+ arg_pivot_root_old);
+ if (r < 0)
+ return r;
+
+ r = setup_volatile(
+ directory,
+ arg_volatile_mode,
+ arg_userns_mode != USER_NAMESPACE_NO,
+ arg_uid_shift,
+ arg_uid_range,
+ arg_selinux_context);
+ if (r < 0)
+ return r;
+
+ r = setup_volatile_state(
+ directory,
+ arg_volatile_mode,
+ arg_userns_mode != USER_NAMESPACE_NO,
+ arg_uid_shift,
+ arg_uid_range,
+ arg_selinux_context);
+ if (r < 0)
+ return r;
+
+ /* Mark everything as shared so our mounts get propagated down. This is
+ * required to make new bind mounts available in systemd services
+ * inside the containter that create a new mount namespace.
+ * See https://github.com/systemd/systemd/issues/3860
+ * Further submounts (such as /dev) done after this will inherit the
+ * shared propagation mode. */
+ r = mount_verbose(LOG_ERR, NULL, directory, NULL, MS_SHARED|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ r = recursive_chown(directory, arg_uid_shift, arg_uid_range);
+ if (r < 0)
+ return r;
+
+ r = base_filesystem_create(directory, arg_uid_shift, (gid_t) arg_uid_shift);
+ if (r < 0)
+ return r;
+
+ if (arg_read_only) {
+ r = bind_remount_recursive(directory, true, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make tree read-only: %m");
+ }
+
+ r = mount_all(directory,
+ arg_mount_settings,
+ arg_uid_shift,
+ arg_selinux_apifs_context);
+ if (r < 0)
+ return r;
+
+ r = copy_devnodes(directory);
+ if (r < 0)
+ return r;
+
+ dev_setup(directory, arg_uid_shift, arg_uid_shift);
+
+ r = setup_pts(directory);
+ if (r < 0)
+ return r;
+
+ r = setup_propagate(directory);
+ if (r < 0)
+ return r;
+
+ r = setup_dev_console(directory, console);
+ if (r < 0)
+ return r;
+
+ r = setup_keyring();
+ if (r < 0)
+ return r;
+
+ r = setup_seccomp(arg_caps_retain, arg_syscall_whitelist, arg_syscall_blacklist);
+ if (r < 0)
+ return r;
+
+ r = setup_timezone(directory);
+ if (r < 0)
+ return r;
+
+ r = setup_resolv_conf(directory);
+ if (r < 0)
+ return r;
+
+ r = setup_machine_id(directory);
+ if (r < 0)
+ return r;
+
+ r = setup_journal(directory);
+ if (r < 0)
+ return r;
+
+ r = mount_custom(
+ directory,
+ arg_custom_mounts,
+ arg_n_custom_mounts,
+ arg_userns_mode != USER_NAMESPACE_NO,
+ arg_uid_shift,
+ arg_uid_range,
+ arg_selinux_apifs_context);
+ if (r < 0)
+ return r;
+
+ if (!arg_use_cgns) {
+ r = mount_cgroups(
+ directory,
+ arg_unified_cgroup_hierarchy,
+ arg_userns_mode != USER_NAMESPACE_NO,
+ arg_uid_shift,
+ arg_uid_range,
+ arg_selinux_apifs_context,
+ false);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_move_root(directory);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move root directory: %m");
+
+ fd = setup_sd_notify_child();
+ if (fd < 0)
+ return fd;
+
+ r = setrlimit_closest_all((const struct rlimit *const*) arg_rlimit, &which_failed);
+ if (r < 0)
+ return log_error_errno(r, "Failed to apply resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
+
+ pid = raw_clone(SIGCHLD|CLONE_NEWNS|
+ arg_clone_ns_flags |
+ (arg_userns_mode != USER_NAMESPACE_NO ? CLONE_NEWUSER : 0));
+ if (pid < 0)
+ return log_error_errno(errno, "Failed to fork inner child: %m");
+ if (pid == 0) {
+ pid_socket = safe_close(pid_socket);
+ uuid_socket = safe_close(uuid_socket);
+ notify_socket = safe_close(notify_socket);
+ uid_shift_socket = safe_close(uid_shift_socket);
+
+ /* The inner child has all namespaces that are
+ * requested, so that we all are owned by the user if
+ * user namespaces are turned on. */
+
+ if (arg_network_namespace_path) {
+ r = namespace_enter(-1, -1, netns_fd, -1, -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to join network namespace: %m");
+ }
+
+ r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ l = send(pid_socket, &pid, sizeof(pid), MSG_NOSIGNAL);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to send PID: %m");
+ if (l != sizeof(pid))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Short write while sending PID.");
+
+ l = send(uuid_socket, &arg_uuid, sizeof(arg_uuid), MSG_NOSIGNAL);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to send machine ID: %m");
+ if (l != sizeof(arg_uuid))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Short write while sending machine ID.");
+
+ l = send_one_fd(notify_socket, fd, 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to send notify fd: %m");
+
+ pid_socket = safe_close(pid_socket);
+ uuid_socket = safe_close(uuid_socket);
+ notify_socket = safe_close(notify_socket);
+ kmsg_socket = safe_close(kmsg_socket);
+ rtnl_socket = safe_close(rtnl_socket);
+ netns_fd = safe_close(netns_fd);
+
+ return 0;
+}
+
+static int uid_shift_pick(uid_t *shift, LockFile *ret_lock_file) {
+ bool tried_hashed = false;
+ unsigned n_tries = 100;
+ uid_t candidate;
+ int r;
+
+ assert(shift);
+ assert(ret_lock_file);
+ assert(arg_userns_mode == USER_NAMESPACE_PICK);
+ assert(arg_uid_range == 0x10000U);
+
+ candidate = *shift;
+
+ (void) mkdir("/run/systemd/nspawn-uid", 0755);
+
+ for (;;) {
+ char lock_path[STRLEN("/run/systemd/nspawn-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+ _cleanup_(release_lock_file) LockFile lf = LOCK_FILE_INIT;
+
+ if (--n_tries <= 0)
+ return -EBUSY;
+
+ if (candidate < CONTAINER_UID_BASE_MIN || candidate > CONTAINER_UID_BASE_MAX)
+ goto next;
+ if ((candidate & UINT32_C(0xFFFF)) != 0)
+ goto next;
+
+ xsprintf(lock_path, "/run/systemd/nspawn-uid/" UID_FMT, candidate);
+ r = make_lock_file(lock_path, LOCK_EX|LOCK_NB, &lf);
+ if (r == -EBUSY) /* Range already taken by another nspawn instance */
+ goto next;
+ if (r < 0)
+ return r;
+
+ /* Make some superficial checks whether the range is currently known in the user database */
+ if (getpwuid(candidate))
+ goto next;
+ if (getpwuid(candidate + UINT32_C(0xFFFE)))
+ goto next;
+ if (getgrgid(candidate))
+ goto next;
+ if (getgrgid(candidate + UINT32_C(0xFFFE)))
+ goto next;
+
+ *ret_lock_file = lf;
+ lf = (struct LockFile) LOCK_FILE_INIT;
+ *shift = candidate;
+ return 0;
+
+ next:
+ if (arg_machine && !tried_hashed) {
+ /* Try to hash the base from the container name */
+
+ static const uint8_t hash_key[] = {
+ 0xe1, 0x56, 0xe0, 0xf0, 0x4a, 0xf0, 0x41, 0xaf,
+ 0x96, 0x41, 0xcf, 0x41, 0x33, 0x94, 0xff, 0x72
+ };
+
+ candidate = (uid_t) siphash24(arg_machine, strlen(arg_machine), hash_key);
+
+ tried_hashed = true;
+ } else
+ random_bytes(&candidate, sizeof(candidate));
+
+ candidate = (candidate % (CONTAINER_UID_BASE_MAX - CONTAINER_UID_BASE_MIN)) + CONTAINER_UID_BASE_MIN;
+ candidate &= (uid_t) UINT32_C(0xFFFF0000);
+ }
+}
+
+static int setup_uid_map(pid_t pid) {
+ char uid_map[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1], line[DECIMAL_STR_MAX(uid_t)*3+3+1];
+ int r;
+
+ assert(pid > 1);
+
+ xsprintf(uid_map, "/proc/" PID_FMT "/uid_map", pid);
+ xsprintf(line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0, arg_uid_shift, arg_uid_range);
+ r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write UID map: %m");
+
+ /* We always assign the same UID and GID ranges */
+ xsprintf(uid_map, "/proc/" PID_FMT "/gid_map", pid);
+ r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write GID map: %m");
+
+ return 0;
+}
+
+static int nspawn_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ char buf[NOTIFY_BUFFER_MAX+1];
+ char *p = NULL;
+ struct iovec iovec = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf)-1,
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int) * NOTIFY_FD_MAX)];
+ } control = {};
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ struct ucred *ucred = NULL;
+ ssize_t n;
+ pid_t inner_child_pid;
+ _cleanup_strv_free_ char **tags = NULL;
+
+ assert(userdata);
+
+ inner_child_pid = PTR_TO_PID(userdata);
+
+ if (revents != EPOLLIN) {
+ log_warning("Got unexpected poll event for notify fd.");
+ return 0;
+ }
+
+ n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (n < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return log_warning_errno(errno, "Couldn't read notification socket: %m");
+ }
+ cmsg_close_all(&msghdr);
+
+ CMSG_FOREACH(cmsg, &msghdr) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
+
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+ }
+ }
+
+ if (!ucred || ucred->pid != inner_child_pid) {
+ log_debug("Received notify message without valid credentials. Ignoring.");
+ return 0;
+ }
+
+ if ((size_t) n >= sizeof(buf)) {
+ log_warning("Received notify message exceeded maximum size. Ignoring.");
+ return 0;
+ }
+
+ buf[n] = 0;
+ tags = strv_split(buf, "\n\r");
+ if (!tags)
+ return log_oom();
+
+ if (strv_find(tags, "READY=1"))
+ sd_notifyf(false, "READY=1\n");
+
+ p = strv_find_startswith(tags, "STATUS=");
+ if (p)
+ sd_notifyf(false, "STATUS=Container running: %s", p);
+
+ return 0;
+}
+
+static int setup_sd_notify_parent(sd_event *event, int fd, pid_t *inner_child_pid, sd_event_source **notify_event_source) {
+ int r;
+
+ r = sd_event_add_io(event, notify_event_source, fd, EPOLLIN, nspawn_dispatch_notify_fd, inner_child_pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate notify event source: %m");
+
+ (void) sd_event_source_set_description(*notify_event_source, "nspawn-notify");
+
+ return 0;
+}
+
+static int merge_settings(Settings *settings, const char *path) {
+ int rl;
+
+ assert(settings);
+ assert(path);
+
+ /* Copy over bits from the settings, unless they have been explicitly masked by command line switches. Note
+ * that this steals the fields of the Settings* structure, and hence modifies it. */
+
+ if ((arg_settings_mask & SETTING_START_MODE) == 0 &&
+ settings->start_mode >= 0) {
+ arg_start_mode = settings->start_mode;
+ strv_free_and_replace(arg_parameters, settings->parameters);
+ }
+
+ if ((arg_settings_mask & SETTING_EPHEMERAL) == 0)
+ arg_ephemeral = settings->ephemeral;
+
+ if ((arg_settings_mask & SETTING_PIVOT_ROOT) == 0 &&
+ settings->pivot_root_new) {
+ free_and_replace(arg_pivot_root_new, settings->pivot_root_new);
+ free_and_replace(arg_pivot_root_old, settings->pivot_root_old);
+ }
+
+ if ((arg_settings_mask & SETTING_WORKING_DIRECTORY) == 0 &&
+ settings->working_directory)
+ free_and_replace(arg_chdir, settings->working_directory);
+
+ if ((arg_settings_mask & SETTING_ENVIRONMENT) == 0 &&
+ settings->environment)
+ strv_free_and_replace(arg_setenv, settings->environment);
+
+ if ((arg_settings_mask & SETTING_USER) == 0 &&
+ settings->user)
+ free_and_replace(arg_user, settings->user);
+
+ if ((arg_settings_mask & SETTING_CAPABILITY) == 0) {
+ uint64_t plus;
+
+ plus = settings->capability;
+ if (settings_private_network(settings))
+ plus |= (1ULL << CAP_NET_ADMIN);
+
+ if (!arg_settings_trusted && plus != 0) {
+ if (settings->capability != 0)
+ log_warning("Ignoring Capability= setting, file %s is not trusted.", path);
+ } else
+ arg_caps_retain |= plus;
+
+ arg_caps_retain &= ~settings->drop_capability;
+ }
+
+ if ((arg_settings_mask & SETTING_KILL_SIGNAL) == 0 &&
+ settings->kill_signal > 0)
+ arg_kill_signal = settings->kill_signal;
+
+ if ((arg_settings_mask & SETTING_PERSONALITY) == 0 &&
+ settings->personality != PERSONALITY_INVALID)
+ arg_personality = settings->personality;
+
+ if ((arg_settings_mask & SETTING_MACHINE_ID) == 0 &&
+ !sd_id128_is_null(settings->machine_id)) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring MachineID= setting, file %s is not trusted.", path);
+ else
+ arg_uuid = settings->machine_id;
+ }
+
+ if ((arg_settings_mask & SETTING_READ_ONLY) == 0 &&
+ settings->read_only >= 0)
+ arg_read_only = settings->read_only;
+
+ if ((arg_settings_mask & SETTING_VOLATILE_MODE) == 0 &&
+ settings->volatile_mode != _VOLATILE_MODE_INVALID)
+ arg_volatile_mode = settings->volatile_mode;
+
+ if ((arg_settings_mask & SETTING_CUSTOM_MOUNTS) == 0 &&
+ settings->n_custom_mounts > 0) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring TemporaryFileSystem=, Bind= and BindReadOnly= settings, file %s is not trusted.", path);
+ else {
+ custom_mount_free_all(arg_custom_mounts, arg_n_custom_mounts);
+ arg_custom_mounts = TAKE_PTR(settings->custom_mounts);
+ arg_n_custom_mounts = settings->n_custom_mounts;
+ settings->n_custom_mounts = 0;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_NETWORK) == 0 &&
+ (settings->private_network >= 0 ||
+ settings->network_veth >= 0 ||
+ settings->network_bridge ||
+ settings->network_zone ||
+ settings->network_interfaces ||
+ settings->network_macvlan ||
+ settings->network_ipvlan ||
+ settings->network_veth_extra)) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring network settings, file %s is not trusted.", path);
+ else {
+ arg_network_veth = settings_network_veth(settings);
+ arg_private_network = settings_private_network(settings);
+
+ strv_free_and_replace(arg_network_interfaces, settings->network_interfaces);
+ strv_free_and_replace(arg_network_macvlan, settings->network_macvlan);
+ strv_free_and_replace(arg_network_ipvlan, settings->network_ipvlan);
+ strv_free_and_replace(arg_network_veth_extra, settings->network_veth_extra);
+
+ free_and_replace(arg_network_bridge, settings->network_bridge);
+ free_and_replace(arg_network_zone, settings->network_zone);
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_EXPOSE_PORTS) == 0 &&
+ settings->expose_ports) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring Port= setting, file %s is not trusted.", path);
+ else {
+ expose_port_free_all(arg_expose_ports);
+ arg_expose_ports = TAKE_PTR(settings->expose_ports);
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_USERNS) == 0 &&
+ settings->userns_mode != _USER_NAMESPACE_MODE_INVALID) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring PrivateUsers= and PrivateUsersChown= settings, file %s is not trusted.", path);
+ else {
+ arg_userns_mode = settings->userns_mode;
+ arg_uid_shift = settings->uid_shift;
+ arg_uid_range = settings->uid_range;
+ arg_userns_chown = settings->userns_chown;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_NOTIFY_READY) == 0)
+ arg_notify_ready = settings->notify_ready;
+
+ if ((arg_settings_mask & SETTING_SYSCALL_FILTER) == 0) {
+
+ if (!arg_settings_trusted && !strv_isempty(arg_syscall_whitelist))
+ log_warning("Ignoring SystemCallFilter= settings, file %s is not trusted.", path);
+ else {
+ strv_free_and_replace(arg_syscall_whitelist, settings->syscall_whitelist);
+ strv_free_and_replace(arg_syscall_blacklist, settings->syscall_blacklist);
+ }
+ }
+
+ for (rl = 0; rl < _RLIMIT_MAX; rl ++) {
+ if ((arg_settings_mask & (SETTING_RLIMIT_FIRST << rl)))
+ continue;
+
+ if (!settings->rlimit[rl])
+ continue;
+
+ if (!arg_settings_trusted) {
+ log_warning("Ignoring Limit%s= setting, file '%s' is not trusted.", rlimit_to_string(rl), path);
+ continue;
+ }
+
+ free_and_replace(arg_rlimit[rl], settings->rlimit[rl]);
+ }
+
+ if ((arg_settings_mask & SETTING_HOSTNAME) == 0 &&
+ settings->hostname)
+ free_and_replace(arg_hostname, settings->hostname);
+
+ if ((arg_settings_mask & SETTING_NO_NEW_PRIVILEGES) == 0 &&
+ settings->no_new_privileges >= 0)
+ arg_no_new_privileges = settings->no_new_privileges;
+
+ if ((arg_settings_mask & SETTING_OOM_SCORE_ADJUST) == 0 &&
+ settings->oom_score_adjust_set) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring OOMScoreAdjust= setting, file '%s' is not trusted.", path);
+ else {
+ arg_oom_score_adjust = settings->oom_score_adjust;
+ arg_oom_score_adjust_set = true;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_CPU_AFFINITY) == 0 &&
+ settings->cpuset) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring CPUAffinity= setting, file '%s' is not trusted.", path);
+ else {
+ if (arg_cpuset)
+ CPU_FREE(arg_cpuset);
+ arg_cpuset = TAKE_PTR(settings->cpuset);
+ arg_cpuset_ncpus = settings->cpuset_ncpus;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_RESOLV_CONF) == 0 &&
+ settings->resolv_conf != _RESOLV_CONF_MODE_INVALID)
+ arg_resolv_conf = settings->resolv_conf;
+
+ if ((arg_settings_mask & SETTING_LINK_JOURNAL) == 0 &&
+ settings->link_journal != _LINK_JOURNAL_INVALID) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring journal link setting, file '%s' is not trusted.", path);
+ else {
+ arg_link_journal = settings->link_journal;
+ arg_link_journal_try = settings->link_journal_try;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_TIMEZONE) == 0 &&
+ settings->timezone != _TIMEZONE_MODE_INVALID)
+ arg_timezone = settings->timezone;
+
+ return 0;
+}
+
+static int load_settings(void) {
+ _cleanup_(settings_freep) Settings *settings = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ const char *fn, *i;
+ int r;
+
+ /* If all settings are masked, there's no point in looking for
+ * the settings file */
+ if ((arg_settings_mask & _SETTINGS_MASK_ALL) == _SETTINGS_MASK_ALL)
+ return 0;
+
+ fn = strjoina(arg_machine, ".nspawn");
+
+ /* We first look in the admin's directories in /etc and /run */
+ FOREACH_STRING(i, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
+ _cleanup_free_ char *j = NULL;
+
+ j = strjoin(i, "/", fn);
+ if (!j)
+ return log_oom();
+
+ f = fopen(j, "re");
+ if (f) {
+ p = TAKE_PTR(j);
+
+ /* By default, we trust configuration from /etc and /run */
+ if (arg_settings_trusted < 0)
+ arg_settings_trusted = true;
+
+ break;
+ }
+
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to open %s: %m", j);
+ }
+
+ if (!f) {
+ /* After that, let's look for a file next to the
+ * actual image we shall boot. */
+
+ if (arg_image) {
+ p = file_in_same_dir(arg_image, fn);
+ if (!p)
+ return log_oom();
+ } else if (arg_directory) {
+ p = file_in_same_dir(arg_directory, fn);
+ if (!p)
+ return log_oom();
+ }
+
+ if (p) {
+ f = fopen(p, "re");
+ if (!f && errno != ENOENT)
+ return log_error_errno(errno, "Failed to open %s: %m", p);
+
+ /* By default, we do not trust configuration from /var/lib/machines */
+ if (arg_settings_trusted < 0)
+ arg_settings_trusted = false;
+ }
+ }
+
+ if (!f)
+ return 0;
+
+ log_debug("Settings are trusted: %s", yes_no(arg_settings_trusted));
+
+ r = settings_load(f, p, &settings);
+ if (r < 0)
+ return r;
+
+ return merge_settings(settings, p);
+}
+
+static int run(int master,
+ const char* console,
+ DissectedImage *dissected_image,
+ bool interactive,
+ bool secondary,
+ FDSet *fds,
+ char veth_name[IFNAMSIZ], bool *veth_created,
+ union in_addr_union *exposed,
+ pid_t *pid, int *ret) {
+
+ static const struct sigaction sa = {
+ .sa_handler = nop_signal_handler,
+ .sa_flags = SA_NOCLDSTOP|SA_RESTART,
+ };
+
+ _cleanup_(release_lock_file) LockFile uid_shift_lock = LOCK_FILE_INIT;
+ _cleanup_close_ int etc_passwd_lock = -1;
+ _cleanup_close_pair_ int
+ kmsg_socket_pair[2] = { -1, -1 },
+ rtnl_socket_pair[2] = { -1, -1 },
+ pid_socket_pair[2] = { -1, -1 },
+ uuid_socket_pair[2] = { -1, -1 },
+ notify_socket_pair[2] = { -1, -1 },
+ uid_shift_socket_pair[2] = { -1, -1 },
+ unified_cgroup_hierarchy_socket_pair[2] = { -1, -1};
+
+ _cleanup_close_ int notify_socket= -1;
+ _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ ContainerStatus container_status = 0;
+ char last_char = 0;
+ int ifi = 0, r;
+ ssize_t l;
+ sigset_t mask_chld;
+ _cleanup_close_ int netns_fd = -1;
+
+ assert_se(sigemptyset(&mask_chld) == 0);
+ assert_se(sigaddset(&mask_chld, SIGCHLD) == 0);
+
+ if (arg_userns_mode == USER_NAMESPACE_PICK) {
+ /* When we shall pick the UID/GID range, let's first lock /etc/passwd, so that we can safely
+ * check with getpwuid() if the specific user already exists. Note that /etc might be
+ * read-only, in which case this will fail with EROFS. But that's really OK, as in that case we
+ * can be reasonably sure that no users are going to be added. Note that getpwuid() checks are
+ * really just an extra safety net. We kinda assume that the UID range we allocate from is
+ * really ours. */
+
+ etc_passwd_lock = take_etc_passwd_lock(NULL);
+ if (etc_passwd_lock < 0 && etc_passwd_lock != -EROFS)
+ return log_error_errno(etc_passwd_lock, "Failed to take /etc/passwd lock: %m");
+ }
+
+ r = barrier_create(&barrier);
+ if (r < 0)
+ return log_error_errno(r, "Cannot initialize IPC barrier: %m");
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create kmsg socket pair: %m");
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, rtnl_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create rtnl socket pair: %m");
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pid_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create pid socket pair: %m");
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, uuid_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create id socket pair: %m");
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, notify_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create notify socket pair: %m");
+
+ if (arg_userns_mode != USER_NAMESPACE_NO)
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, uid_shift_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create uid shift socket pair: %m");
+
+ if (arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_UNKNOWN)
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, unified_cgroup_hierarchy_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create unified cgroup socket pair: %m");
+
+ /* Child can be killed before execv(), so handle SIGCHLD in order to interrupt
+ * parent's blocking calls and give it a chance to call wait() and terminate. */
+ r = sigprocmask(SIG_UNBLOCK, &mask_chld, NULL);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to change the signal mask: %m");
+
+ r = sigaction(SIGCHLD, &sa, NULL);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to install SIGCHLD handler: %m");
+
+ if (arg_network_namespace_path) {
+ netns_fd = open(arg_network_namespace_path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (netns_fd < 0)
+ return log_error_errno(errno, "Cannot open file %s: %m", arg_network_namespace_path);
+
+ r = fd_is_network_ns(netns_fd);
+ if (r == -EUCLEAN)
+ log_debug_errno(r, "Cannot determine if passed network namespace path '%s' really refers to a network namespace, assuming it does.", arg_network_namespace_path);
+ else if (r < 0)
+ return log_error_errno(r, "Failed to check %s fs type: %m", arg_network_namespace_path);
+ else if (r == 0) {
+ log_error("Path %s doesn't refer to a network namespace, refusing.", arg_network_namespace_path);
+ return -EINVAL;
+ }
+ }
+
+ *pid = raw_clone(SIGCHLD|CLONE_NEWNS);
+ if (*pid < 0)
+ return log_error_errno(errno, "clone() failed%s: %m",
+ errno == EINVAL ?
+ ", do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in)" : "");
+
+ if (*pid == 0) {
+ /* The outer child only has a file system namespace. */
+ barrier_set_role(&barrier, BARRIER_CHILD);
+
+ master = safe_close(master);
+
+ kmsg_socket_pair[0] = safe_close(kmsg_socket_pair[0]);
+ rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]);
+ pid_socket_pair[0] = safe_close(pid_socket_pair[0]);
+ uuid_socket_pair[0] = safe_close(uuid_socket_pair[0]);
+ notify_socket_pair[0] = safe_close(notify_socket_pair[0]);
+ uid_shift_socket_pair[0] = safe_close(uid_shift_socket_pair[0]);
+ unified_cgroup_hierarchy_socket_pair[0] = safe_close(unified_cgroup_hierarchy_socket_pair[0]);
+
+ (void) reset_all_signal_handlers();
+ (void) reset_signal_mask();
+
+ r = outer_child(&barrier,
+ arg_directory,
+ console,
+ dissected_image,
+ interactive,
+ secondary,
+ pid_socket_pair[1],
+ uuid_socket_pair[1],
+ notify_socket_pair[1],
+ kmsg_socket_pair[1],
+ rtnl_socket_pair[1],
+ uid_shift_socket_pair[1],
+ unified_cgroup_hierarchy_socket_pair[1],
+ fds,
+ netns_fd);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ barrier_set_role(&barrier, BARRIER_PARENT);
+
+ fds = fdset_free(fds);
+
+ kmsg_socket_pair[1] = safe_close(kmsg_socket_pair[1]);
+ rtnl_socket_pair[1] = safe_close(rtnl_socket_pair[1]);
+ pid_socket_pair[1] = safe_close(pid_socket_pair[1]);
+ uuid_socket_pair[1] = safe_close(uuid_socket_pair[1]);
+ notify_socket_pair[1] = safe_close(notify_socket_pair[1]);
+ uid_shift_socket_pair[1] = safe_close(uid_shift_socket_pair[1]);
+ unified_cgroup_hierarchy_socket_pair[1] = safe_close(unified_cgroup_hierarchy_socket_pair[1]);
+
+ if (arg_userns_mode != USER_NAMESPACE_NO) {
+ /* The child just let us know the UID shift it might have read from the image. */
+ l = recv(uid_shift_socket_pair[0], &arg_uid_shift, sizeof arg_uid_shift, 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to read UID shift: %m");
+ if (l != sizeof arg_uid_shift) {
+ log_error("Short read while reading UID shift.");
+ return -EIO;
+ }
+
+ if (arg_userns_mode == USER_NAMESPACE_PICK) {
+ /* If we are supposed to pick the UID shift, let's try to use the shift read from the
+ * image, but if that's already in use, pick a new one, and report back to the child,
+ * which one we now picked. */
+
+ r = uid_shift_pick(&arg_uid_shift, &uid_shift_lock);
+ if (r < 0)
+ return log_error_errno(r, "Failed to pick suitable UID/GID range: %m");
+
+ l = send(uid_shift_socket_pair[0], &arg_uid_shift, sizeof arg_uid_shift, MSG_NOSIGNAL);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to send UID shift: %m");
+ if (l != sizeof arg_uid_shift) {
+ log_error("Short write while writing UID shift.");
+ return -EIO;
+ }
+ }
+ }
+
+ if (arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_UNKNOWN) {
+ /* The child let us know the support cgroup mode it might have read from the image. */
+ l = recv(unified_cgroup_hierarchy_socket_pair[0], &arg_unified_cgroup_hierarchy, sizeof(arg_unified_cgroup_hierarchy), 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to read cgroup mode: %m");
+ if (l != sizeof(arg_unified_cgroup_hierarchy)) {
+ log_error("Short read while reading cgroup mode (%zu bytes).%s",
+ l, l == 0 ? " The child is most likely dead." : "");
+ return -EIO;
+ }
+ }
+
+ /* Wait for the outer child. */
+ r = wait_for_terminate_and_check("(sd-namespace)", *pid, WAIT_LOG_ABNORMAL);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EIO;
+
+ /* And now retrieve the PID of the inner child. */
+ l = recv(pid_socket_pair[0], pid, sizeof *pid, 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to read inner child PID: %m");
+ if (l != sizeof *pid) {
+ log_error("Short read while reading inner child PID.");
+ return -EIO;
+ }
+
+ /* We also retrieve container UUID in case it was generated by outer child */
+ l = recv(uuid_socket_pair[0], &arg_uuid, sizeof arg_uuid, 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to read container machine ID: %m");
+ if (l != sizeof(arg_uuid)) {
+ log_error("Short read while reading container machined ID.");
+ return -EIO;
+ }
+
+ /* We also retrieve the socket used for notifications generated by outer child */
+ notify_socket = receive_one_fd(notify_socket_pair[0], 0);
+ if (notify_socket < 0)
+ return log_error_errno(notify_socket,
+ "Failed to receive notification socket from the outer child: %m");
+
+ log_debug("Init process invoked as PID "PID_FMT, *pid);
+
+ if (arg_userns_mode != USER_NAMESPACE_NO) {
+ if (!barrier_place_and_sync(&barrier)) { /* #1 */
+ log_error("Child died too early.");
+ return -ESRCH;
+ }
+
+ r = setup_uid_map(*pid);
+ if (r < 0)
+ return r;
+
+ (void) barrier_place(&barrier); /* #2 */
+ }
+
+ if (arg_private_network) {
+ if (!arg_network_namespace_path) {
+ /* Wait until the child has unshared its network namespace. */
+ if (!barrier_place_and_sync(&barrier)) { /* #3 */
+ log_error("Child died too early");
+ return -ESRCH;
+ }
+ }
+
+ r = move_network_interfaces(*pid, arg_network_interfaces);
+ if (r < 0)
+ return r;
+
+ if (arg_network_veth) {
+ r = setup_veth(arg_machine, *pid, veth_name,
+ arg_network_bridge || arg_network_zone);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ ifi = r;
+
+ if (arg_network_bridge) {
+ /* Add the interface to a bridge */
+ r = setup_bridge(veth_name, arg_network_bridge, false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ ifi = r;
+ } else if (arg_network_zone) {
+ /* Add the interface to a bridge, possibly creating it */
+ r = setup_bridge(veth_name, arg_network_zone, true);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ ifi = r;
+ }
+ }
+
+ r = setup_veth_extra(arg_machine, *pid, arg_network_veth_extra);
+ if (r < 0)
+ return r;
+
+ /* We created the primary and extra veth links now; let's remember this, so that we know to
+ remove them later on. Note that we don't bother with removing veth links that were created
+ here when their setup failed half-way, because in that case the kernel should be able to
+ remove them on its own, since they cannot be referenced by anything yet. */
+ *veth_created = true;
+
+ r = setup_macvlan(arg_machine, *pid, arg_network_macvlan);
+ if (r < 0)
+ return r;
+
+ r = setup_ipvlan(arg_machine, *pid, arg_network_ipvlan);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_register || !arg_keep_unit) {
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open system bus: %m");
+
+ r = sd_bus_set_close_on_exit(bus, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable close-on-exit behaviour: %m");
+ }
+
+ if (!arg_keep_unit) {
+ /* When a new scope is created for this container, then we'll be registered as its controller, in which
+ * case PID 1 will send us a friendly RequestStop signal, when it is asked to terminate the
+ * scope. Let's hook into that, and cleanly shut down the container, and print a friendly message. */
+
+ r = sd_bus_match_signal_async(
+ bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ NULL,
+ "org.freedesktop.systemd1.Scope",
+ "RequestStop",
+ on_request_stop, NULL, PID_TO_PTR(*pid));
+ if (r < 0)
+ return log_error_errno(r, "Failed to request RequestStop match: %m");
+ }
+
+ if (arg_register) {
+ r = register_machine(
+ bus,
+ arg_machine,
+ *pid,
+ arg_directory,
+ arg_uuid,
+ ifi,
+ arg_slice,
+ arg_custom_mounts, arg_n_custom_mounts,
+ arg_kill_signal,
+ arg_property,
+ arg_keep_unit,
+ arg_container_service_name);
+ if (r < 0)
+ return r;
+
+ } else if (!arg_keep_unit) {
+ r = allocate_scope(
+ bus,
+ arg_machine,
+ *pid,
+ arg_slice,
+ arg_custom_mounts, arg_n_custom_mounts,
+ arg_kill_signal,
+ arg_property);
+ if (r < 0)
+ return r;
+
+ } else if (arg_slice || arg_property)
+ log_notice("Machine and scope registration turned off, --slice= and --property= settings will have no effect.");
+
+ r = sync_cgroup(*pid, arg_unified_cgroup_hierarchy, arg_uid_shift);
+ if (r < 0)
+ return r;
+
+ r = create_subcgroup(*pid, arg_keep_unit, arg_unified_cgroup_hierarchy);
+ if (r < 0)
+ return r;
+
+ r = chown_cgroup(*pid, arg_unified_cgroup_hierarchy, arg_uid_shift);
+ if (r < 0)
+ return r;
+
+ /* Notify the child that the parent is ready with all
+ * its setup (including cgroup-ification), and that
+ * the child can now hand over control to the code to
+ * run inside the container. */
+ (void) barrier_place(&barrier); /* #4 */
+
+ /* Block SIGCHLD here, before notifying child.
+ * process_pty() will handle it with the other signals. */
+ assert_se(sigprocmask(SIG_BLOCK, &mask_chld, NULL) >= 0);
+
+ /* Reset signal to default */
+ r = default_signals(SIGCHLD, -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reset SIGCHLD: %m");
+
+ r = sd_event_new(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default event source: %m");
+
+ (void) sd_event_set_watchdog(event, true);
+
+ if (bus) {
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+ }
+
+ r = setup_sd_notify_parent(event, notify_socket, PID_TO_PTR(*pid), &notify_event_source);
+ if (r < 0)
+ return r;
+
+ /* Let the child know that we are ready and wait that the child is completely ready now. */
+ if (!barrier_place_and_sync(&barrier)) { /* #5 */
+ log_error("Child died too early.");
+ return -ESRCH;
+ }
+
+ /* At this point we have made use of the UID we picked, and thus nss-mymachines
+ * will make them appear in getpwuid(), thus we can release the /etc/passwd lock. */
+ etc_passwd_lock = safe_close(etc_passwd_lock);
+
+ sd_notifyf(false,
+ "STATUS=Container running.\n"
+ "X_NSPAWN_LEADER_PID=" PID_FMT, *pid);
+ if (!arg_notify_ready)
+ (void) sd_notify(false, "READY=1\n");
+
+ if (arg_kill_signal > 0) {
+ /* Try to kill the init system on SIGINT or SIGTERM */
+ (void) sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, PID_TO_PTR(*pid));
+ (void) sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, PID_TO_PTR(*pid));
+ } else {
+ /* Immediately exit */
+ (void) sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ }
+
+ /* Exit when the child exits */
+ (void) sd_event_add_signal(event, NULL, SIGCHLD, on_sigchld, PID_TO_PTR(*pid));
+
+ if (arg_expose_ports) {
+ r = expose_port_watch_rtnl(event, rtnl_socket_pair[0], on_address_change, exposed, &rtnl);
+ if (r < 0)
+ return r;
+
+ (void) expose_port_execute(rtnl, arg_expose_ports, exposed);
+ }
+
+ rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]);
+
+ r = pty_forward_new(event, master,
+ PTY_FORWARD_IGNORE_VHANGUP | (interactive ? 0 : PTY_FORWARD_READ_ONLY),
+ &forward);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create PTY forwarder: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ pty_forward_get_last_char(forward, &last_char);
+
+ forward = pty_forward_free(forward);
+
+ if (!arg_quiet && last_char != '\n')
+ putc('\n', stdout);
+
+ /* Kill if it is not dead yet anyway */
+ if (bus) {
+ if (arg_register)
+ terminate_machine(bus, arg_machine);
+ else if (!arg_keep_unit)
+ terminate_scope(bus, arg_machine);
+ }
+
+ /* Normally redundant, but better safe than sorry */
+ (void) kill(*pid, SIGKILL);
+
+ r = wait_for_container(*pid, &container_status);
+ *pid = 0;
+
+ if (r < 0)
+ /* We failed to wait for the container, or the container exited abnormally. */
+ return r;
+ if (r > 0 || container_status == CONTAINER_TERMINATED) {
+ /* r > 0 → The container exited with a non-zero status.
+ * As a special case, we need to replace 133 with a different value,
+ * because 133 is special-cased in the service file to reboot the container.
+ * otherwise → The container exited with zero status and a reboot was not requested.
+ */
+ if (r == EXIT_FORCE_RESTART)
+ r = EXIT_FAILURE; /* replace 133 with the general failure code */
+ *ret = r;
+ return 0; /* finito */
+ }
+
+ /* CONTAINER_REBOOTED, loop again */
+
+ if (arg_keep_unit) {
+ /* Special handling if we are running as a service: instead of simply
+ * restarting the machine we want to restart the entire service, so let's
+ * inform systemd about this with the special exit code 133. The service
+ * file uses RestartForceExitStatus=133 so that this results in a full
+ * nspawn restart. This is necessary since we might have cgroup parameters
+ * set we want to have flushed out. */
+ *ret = EXIT_FORCE_RESTART;
+ return 0; /* finito */
+ }
+
+ expose_port_flush(arg_expose_ports, exposed);
+
+ (void) remove_veth_links(veth_name, arg_network_veth_extra);
+ *veth_created = false;
+ return 1; /* loop again */
+}
+
+static int initialize_rlimits(void) {
+ /* The default resource limits the kernel passes to PID 1, as per kernel 4.16. Let's pass our container payload
+ * the same values as the kernel originally passed to PID 1, in order to minimize differences between host and
+ * container execution environments. */
+
+ static const struct rlimit kernel_defaults[_RLIMIT_MAX] = {
+ [RLIMIT_AS] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_CORE] = { 0, RLIM_INFINITY },
+ [RLIMIT_CPU] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_DATA] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_FSIZE] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_MEMLOCK] = { 65536, 65536 },
+ [RLIMIT_MSGQUEUE] = { 819200, 819200 },
+ [RLIMIT_NICE] = { 0, 0 },
+ [RLIMIT_NOFILE] = { 1024, 4096 },
+ [RLIMIT_RSS] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_RTPRIO] = { 0, 0 },
+ [RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_STACK] = { 8388608, RLIM_INFINITY },
+
+ /* The kernel scales the default for RLIMIT_NPROC and RLIMIT_SIGPENDING based on the system's amount of
+ * RAM. To provide best compatibility we'll read these limits off PID 1 instead of hardcoding them
+ * here. This is safe as we know that PID 1 doesn't change these two limits and thus the original
+ * kernel's initialization should still be valid during runtime — at least if PID 1 is systemd. Note
+ * that PID 1 changes a number of other resource limits during early initialization which is why we
+ * don't read the other limits from PID 1 but prefer the static table above. */
+ };
+
+ int rl;
+
+ for (rl = 0; rl < _RLIMIT_MAX; rl++) {
+ /* Let's only fill in what the user hasn't explicitly configured anyway */
+ if ((arg_settings_mask & (SETTING_RLIMIT_FIRST << rl)) == 0) {
+ const struct rlimit *v;
+ struct rlimit buffer;
+
+ if (IN_SET(rl, RLIMIT_NPROC, RLIMIT_SIGPENDING)) {
+ /* For these two let's read the limits off PID 1. See above for an explanation. */
+
+ if (prlimit(1, rl, NULL, &buffer) < 0)
+ return log_error_errno(errno, "Failed to read resource limit RLIMIT_%s of PID 1: %m", rlimit_to_string(rl));
+
+ v = &buffer;
+ } else
+ v = kernel_defaults + rl;
+
+ arg_rlimit[rl] = newdup(struct rlimit, v, 1);
+ if (!arg_rlimit[rl])
+ return log_oom();
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *k = NULL;
+
+ (void) rlimit_format(arg_rlimit[rl], &k);
+ log_debug("Setting RLIMIT_%s to %s.", rlimit_to_string(rl), k);
+ }
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_free_ char *console = NULL;
+ _cleanup_close_ int master = -1;
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ int r, n_fd_passed, ret = EXIT_SUCCESS;
+ char veth_name[IFNAMSIZ] = "";
+ bool secondary = false, remove_directory = false, remove_image = false;
+ pid_t pid = 0;
+ union in_addr_union exposed = {};
+ _cleanup_(release_lock_file) LockFile tree_global_lock = LOCK_FILE_INIT, tree_local_lock = LOCK_FILE_INIT;
+ bool interactive, veth_created = false, remove_tmprootdir = false;
+ char tmprootdir[] = "/tmp/nspawn-root-XXXXXX";
+ _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
+
+ log_parse_environment();
+ log_open();
+
+ /* Make sure rename_process() in the stub init process can work */
+ saved_argv = argv;
+ saved_argc = argc;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ goto finish;
+
+ r = must_be_root();
+ if (r < 0)
+ goto finish;
+
+ r = initialize_rlimits();
+ if (r < 0)
+ goto finish;
+
+ r = determine_names();
+ if (r < 0)
+ goto finish;
+
+ r = load_settings();
+ if (r < 0)
+ goto finish;
+
+ parse_environment();
+
+ r = cg_unified_flush();
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m");
+ goto finish;
+ }
+
+ r = verify_arguments();
+ if (r < 0)
+ goto finish;
+
+ r = detect_unified_cgroup_hierarchy_from_environment();
+ if (r < 0)
+ goto finish;
+
+ /* Ignore SIGPIPE here, because we use splice() on the ptyfwd stuff and that will generate SIGPIPE if
+ * the result is closed. Note that the container payload child will reset signal mask+handler anyway,
+ * so just turning this off here means we only turn it off in nspawn itself, not any children. */
+ (void) ignore_signals(SIGPIPE, -1);
+
+ n_fd_passed = sd_listen_fds(false);
+ if (n_fd_passed > 0) {
+ r = fdset_new_listen_fds(&fds, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to collect file descriptors: %m");
+ goto finish;
+ }
+ }
+
+ /* The "default" umask. This is appropriate for most file and directory
+ * operations performed by nspawn, and is the umask that will be used for
+ * the child. Functions like copy_devnodes() change the umask temporarily. */
+ umask(0022);
+
+ if (arg_directory) {
+ assert(!arg_image);
+
+ if (path_equal(arg_directory, "/") && !arg_ephemeral) {
+ log_error("Spawning container on root directory is not supported. Consider using --ephemeral.");
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (arg_ephemeral) {
+ _cleanup_free_ char *np = NULL;
+
+ r = chase_symlinks_and_update(&arg_directory, 0);
+ if (r < 0)
+ goto finish;
+
+ /* If the specified path is a mount point we
+ * generate the new snapshot immediately
+ * inside it under a random name. However if
+ * the specified is not a mount point we
+ * create the new snapshot in the parent
+ * directory, just next to it. */
+ r = path_is_mount_point(arg_directory, NULL, 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine whether directory %s is mount point: %m", arg_directory);
+ goto finish;
+ }
+ if (r > 0)
+ r = tempfn_random_child(arg_directory, "machine.", &np);
+ else
+ r = tempfn_random(arg_directory, "machine.", &np);
+ if (r < 0) {
+ log_error_errno(r, "Failed to generate name for directory snapshot: %m");
+ goto finish;
+ }
+
+ r = image_path_lock(np, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
+ if (r < 0) {
+ log_error_errno(r, "Failed to lock %s: %m", np);
+ goto finish;
+ }
+
+ r = btrfs_subvol_snapshot(arg_directory, np,
+ (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
+ BTRFS_SNAPSHOT_FALLBACK_COPY |
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+ BTRFS_SNAPSHOT_RECURSIVE |
+ BTRFS_SNAPSHOT_QUOTA);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create snapshot %s from %s: %m", np, arg_directory);
+ goto finish;
+ }
+
+ free_and_replace(arg_directory, np);
+
+ remove_directory = true;
+
+ } else {
+ r = chase_symlinks_and_update(&arg_directory, arg_template ? CHASE_NONEXISTENT : 0);
+ if (r < 0)
+ goto finish;
+
+ r = image_path_lock(arg_directory, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
+ if (r == -EBUSY) {
+ log_error_errno(r, "Directory tree %s is currently busy.", arg_directory);
+ goto finish;
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to lock %s: %m", arg_directory);
+ goto finish;
+ }
+
+ if (arg_template) {
+ r = chase_symlinks_and_update(&arg_template, 0);
+ if (r < 0)
+ goto finish;
+
+ r = btrfs_subvol_snapshot(arg_template, arg_directory,
+ (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
+ BTRFS_SNAPSHOT_FALLBACK_COPY |
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+ BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
+ BTRFS_SNAPSHOT_RECURSIVE |
+ BTRFS_SNAPSHOT_QUOTA);
+ if (r == -EEXIST)
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO,
+ "Directory %s already exists, not populating from template %s.", arg_directory, arg_template);
+ else if (r < 0) {
+ log_error_errno(r, "Couldn't create snapshot %s from %s: %m", arg_directory, arg_template);
+ goto finish;
+ } else
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO,
+ "Populated %s from template %s.", arg_directory, arg_template);
+ }
+ }
+
+ if (arg_start_mode == START_BOOT) {
+ const char *p;
+
+ if (arg_pivot_root_new)
+ p = prefix_roota(arg_directory, arg_pivot_root_new);
+ else
+ p = arg_directory;
+
+ if (path_is_os_tree(p) <= 0) {
+ log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", p);
+ r = -EINVAL;
+ goto finish;
+ }
+ } else {
+ const char *p, *q;
+
+ if (arg_pivot_root_new)
+ p = prefix_roota(arg_directory, arg_pivot_root_new);
+ else
+ p = arg_directory;
+
+ q = strjoina(p, "/usr/");
+
+ if (laccess(q, F_OK) < 0) {
+ log_error("Directory %s doesn't look like it has an OS tree. Refusing.", p);
+ r = -EINVAL;
+ goto finish;
+ }
+ }
+
+ } else {
+ assert(arg_image);
+ assert(!arg_template);
+
+ r = chase_symlinks_and_update(&arg_image, 0);
+ if (r < 0)
+ goto finish;
+
+ if (arg_ephemeral) {
+ _cleanup_free_ char *np = NULL;
+
+ r = tempfn_random(arg_image, "machine.", &np);
+ if (r < 0) {
+ log_error_errno(r, "Failed to generate name for image snapshot: %m");
+ goto finish;
+ }
+
+ r = image_path_lock(np, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to create image lock: %m");
+ goto finish;
+ }
+
+ r = copy_file(arg_image, np, O_EXCL, arg_read_only ? 0400 : 0600, FS_NOCOW_FL, COPY_REFLINK);
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to copy image file: %m");
+ goto finish;
+ }
+
+ free_and_replace(arg_image, np);
+
+ remove_image = true;
+ } else {
+ r = image_path_lock(arg_image, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
+ if (r == -EBUSY) {
+ r = log_error_errno(r, "Disk image %s is currently busy.", arg_image);
+ goto finish;
+ }
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to create image lock: %m");
+ goto finish;
+ }
+
+ if (!arg_root_hash) {
+ r = root_hash_load(arg_image, &arg_root_hash, &arg_root_hash_size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to load root hash file for %s: %m", arg_image);
+ goto finish;
+ }
+ }
+ }
+
+ if (!mkdtemp(tmprootdir)) {
+ r = log_error_errno(errno, "Failed to create temporary directory: %m");
+ goto finish;
+ }
+
+ remove_tmprootdir = true;
+
+ arg_directory = strdup(tmprootdir);
+ if (!arg_directory) {
+ r = log_oom();
+ goto finish;
+ }
+
+ r = loop_device_make_by_path(arg_image, arg_read_only ? O_RDONLY : O_RDWR, &loop);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set up loopback block device: %m");
+ goto finish;
+ }
+
+ r = dissect_image_and_warn(
+ loop->fd,
+ arg_image,
+ arg_root_hash, arg_root_hash_size,
+ DISSECT_IMAGE_REQUIRE_ROOT,
+ &dissected_image);
+ if (r == -ENOPKG) {
+ /* dissected_image_and_warn() already printed a brief error message. Extend on that with more details */
+ log_notice("Note that the disk image needs to\n"
+ " a) either contain only a single MBR partition of type 0x83 that is marked bootable\n"
+ " b) or contain a single GPT partition of type 0FC63DAF-8483-4772-8E79-3D69D8477DE4\n"
+ " c) or follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/\n"
+ " d) or contain a file system without a partition table\n"
+ "in order to be bootable with systemd-nspawn.");
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+
+ if (!arg_root_hash && dissected_image->can_verity)
+ log_notice("Note: image %s contains verity information, but no root hash specified! Proceeding without integrity checking.", arg_image);
+
+ r = dissected_image_decrypt_interactively(dissected_image, NULL, arg_root_hash, arg_root_hash_size, 0, &decrypted_image);
+ if (r < 0)
+ goto finish;
+
+ /* Now that we mounted the image, let's try to remove it again, if it is ephemeral */
+ if (remove_image && unlink(arg_image) >= 0)
+ remove_image = false;
+ }
+
+ r = custom_mount_prepare_all(arg_directory, arg_custom_mounts, arg_n_custom_mounts);
+ if (r < 0)
+ goto finish;
+
+ interactive =
+ isatty(STDIN_FILENO) > 0 &&
+ isatty(STDOUT_FILENO) > 0;
+
+ master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (master < 0) {
+ r = log_error_errno(errno, "Failed to acquire pseudo tty: %m");
+ goto finish;
+ }
+
+ r = ptsname_malloc(master, &console);
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to determine tty name: %m");
+ goto finish;
+ }
+
+ if (arg_selinux_apifs_context) {
+ r = mac_selinux_apply(console, arg_selinux_apifs_context);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (unlockpt(master) < 0) {
+ r = log_error_errno(errno, "Failed to unlock tty: %m");
+ goto finish;
+ }
+
+ if (!arg_quiet)
+ log_info("Spawning container %s on %s.\nPress ^] three times within 1s to kill container.",
+ arg_machine, arg_image ?: arg_directory);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1) >= 0);
+
+ if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) < 0) {
+ r = log_error_errno(errno, "Failed to become subreaper: %m");
+ goto finish;
+ }
+
+ for (;;) {
+ r = run(master,
+ console,
+ dissected_image,
+ interactive, secondary,
+ fds,
+ veth_name, &veth_created,
+ &exposed,
+ &pid, &ret);
+ if (r <= 0)
+ break;
+ }
+
+finish:
+ sd_notify(false,
+ r == 0 && ret == EXIT_FORCE_RESTART ? "STOPPING=1\nSTATUS=Restarting..." :
+ "STOPPING=1\nSTATUS=Terminating...");
+
+ if (pid > 0)
+ (void) kill(pid, SIGKILL);
+
+ /* Try to flush whatever is still queued in the pty */
+ if (master >= 0) {
+ (void) copy_bytes(master, STDOUT_FILENO, (uint64_t) -1, 0);
+ master = safe_close(master);
+ }
+
+ if (pid > 0)
+ (void) wait_for_terminate(pid, NULL);
+
+ pager_close();
+
+ if (remove_directory && arg_directory) {
+ int k;
+
+ k = rm_rf(arg_directory, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ if (k < 0)
+ log_warning_errno(k, "Cannot remove '%s', ignoring: %m", arg_directory);
+ }
+
+ if (remove_image && arg_image) {
+ if (unlink(arg_image) < 0)
+ log_warning_errno(errno, "Can't remove image file '%s', ignoring: %m", arg_image);
+ }
+
+ if (remove_tmprootdir) {
+ if (rmdir(tmprootdir) < 0)
+ log_debug_errno(errno, "Can't remove temporary root directory '%s', ignoring: %m", tmprootdir);
+ }
+
+ if (arg_machine) {
+ const char *p;
+
+ p = strjoina("/run/systemd/nspawn/propagate/", arg_machine);
+ (void) rm_rf(p, REMOVE_ROOT);
+ }
+
+ expose_port_flush(arg_expose_ports, &exposed);
+
+ if (veth_created)
+ (void) remove_veth_links(veth_name, arg_network_veth_extra);
+ (void) remove_bridge(arg_network_zone);
+
+ free(arg_directory);
+ free(arg_template);
+ free(arg_image);
+ free(arg_machine);
+ free(arg_hostname);
+ free(arg_user);
+ free(arg_pivot_root_new);
+ free(arg_pivot_root_old);
+ free(arg_chdir);
+ strv_free(arg_setenv);
+ free(arg_network_bridge);
+ strv_free(arg_network_interfaces);
+ strv_free(arg_network_macvlan);
+ strv_free(arg_network_ipvlan);
+ strv_free(arg_network_veth_extra);
+ strv_free(arg_parameters);
+ free(arg_network_zone);
+ free(arg_network_namespace_path);
+ strv_free(arg_property);
+ custom_mount_free_all(arg_custom_mounts, arg_n_custom_mounts);
+ expose_port_free_all(arg_expose_ports);
+ free(arg_root_hash);
+ rlimit_free_all(arg_rlimit);
+ strv_free(arg_syscall_whitelist);
+ strv_free(arg_syscall_blacklist);
+ arg_cpuset = cpu_set_mfree(arg_cpuset);
+
+ return r < 0 ? EXIT_FAILURE : ret;
+}
diff --git a/src/nspawn/test-nspawn-tables.c b/src/nspawn/test-nspawn-tables.c
new file mode 100644
index 0000000..2c5f2ef
--- /dev/null
+++ b/src/nspawn/test-nspawn-tables.c
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "nspawn-settings.h"
+#include "test-tables.h"
+
+int main(int argc, char **argv) {
+ test_table(resolv_conf_mode, RESOLV_CONF_MODE);
+ test_table(timezone_mode, TIMEZONE_MODE);
+
+ return 0;
+}
diff --git a/src/nspawn/test-patch-uid.c b/src/nspawn/test-patch-uid.c
new file mode 100644
index 0000000..b50f099
--- /dev/null
+++ b/src/nspawn/test-patch-uid.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdlib.h>
+
+#include "log.h"
+#include "nspawn-patch-uid.h"
+#include "user-util.h"
+#include "tests.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ uid_t shift, range;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (argc != 4) {
+ log_error("Expected PATH SHIFT RANGE parameters.");
+ return EXIT_FAILURE;
+ }
+
+ r = parse_uid(argv[2], &shift);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse UID shift %s.", argv[2]);
+ return EXIT_FAILURE;
+ }
+
+ r = parse_gid(argv[3], &range);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse UID range %s.", argv[3]);
+ return EXIT_FAILURE;
+ }
+
+ r = path_patch_uid(argv[1], shift, range);
+ if (r < 0) {
+ log_error_errno(r, "Failed to patch directory tree: %m");
+ return EXIT_FAILURE;
+ }
+
+ log_info("Changed: %s", yes_no(r));
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/nss-myhostname/nss-myhostname.c b/src/nss-myhostname/nss-myhostname.c
new file mode 100644
index 0000000..e491351
--- /dev/null
+++ b/src/nss-myhostname/nss-myhostname.c
@@ -0,0 +1,501 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <nss.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "hostname-util.h"
+#include "local-addresses.h"
+#include "macro.h"
+#include "nss-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "util.h"
+
+/* We use 127.0.0.2 as IPv4 address. This has the advantage over
+ * 127.0.0.1 that it can be translated back to the local hostname. For
+ * IPv6 we use ::1 which unfortunately will not translate back to the
+ * hostname but instead something like "localhost" or so. */
+
+#define LOCALADDRESS_IPV4 (htobe32(0x7F000002))
+#define LOCALADDRESS_IPV6 &in6addr_loopback
+
+NSS_GETHOSTBYNAME_PROTOTYPES(myhostname);
+NSS_GETHOSTBYADDR_PROTOTYPES(myhostname);
+
+enum nss_status _nss_myhostname_gethostbyname4_r(
+ const char *name,
+ struct gaih_addrtuple **pat,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp) {
+
+ struct gaih_addrtuple *r_tuple, *r_tuple_prev = NULL;
+ _cleanup_free_ struct local_address *addresses = NULL;
+ _cleanup_free_ char *hn = NULL;
+ const char *canonical = NULL;
+ int n_addresses = 0;
+ uint32_t local_address_ipv4;
+ struct local_address *a;
+ size_t l, idx, ms;
+ char *r_name;
+ unsigned n;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(pat);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (is_localhost(name)) {
+ /* We respond to 'localhost', so that /etc/hosts
+ * is optional */
+
+ canonical = "localhost";
+ local_address_ipv4 = htobe32(INADDR_LOOPBACK);
+
+ } else if (is_gateway_hostname(name)) {
+
+ n_addresses = local_gateways(NULL, 0, AF_UNSPEC, &addresses);
+ if (n_addresses <= 0) {
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+ }
+
+ canonical = "_gateway";
+
+ } else {
+ hn = gethostname_malloc();
+ if (!hn) {
+ UNPROTECT_ERRNO;
+ *errnop = ENOMEM;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* We respond to our local host name, our hostname suffixed with a single dot. */
+ if (!streq(name, hn) && !streq_ptr(startswith(name, hn), ".")) {
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+ }
+
+ n_addresses = local_addresses(NULL, 0, AF_UNSPEC, &addresses);
+ if (n_addresses < 0)
+ n_addresses = 0;
+
+ canonical = hn;
+ local_address_ipv4 = LOCALADDRESS_IPV4;
+ }
+
+ l = strlen(canonical);
+ ms = ALIGN(l+1) + ALIGN(sizeof(struct gaih_addrtuple)) * (n_addresses > 0 ? n_addresses : 2);
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, fill in hostname */
+ r_name = buffer;
+ memcpy(r_name, canonical, l+1);
+ idx = ALIGN(l+1);
+
+ assert(n_addresses >= 0);
+ if (n_addresses == 0) {
+ /* Second, fill in IPv6 tuple */
+ r_tuple = (struct gaih_addrtuple*) (buffer + idx);
+ r_tuple->next = r_tuple_prev;
+ r_tuple->name = r_name;
+ r_tuple->family = AF_INET6;
+ memcpy(r_tuple->addr, LOCALADDRESS_IPV6, 16);
+ r_tuple->scopeid = 0;
+
+ idx += ALIGN(sizeof(struct gaih_addrtuple));
+ r_tuple_prev = r_tuple;
+
+ /* Third, fill in IPv4 tuple */
+ r_tuple = (struct gaih_addrtuple*) (buffer + idx);
+ r_tuple->next = r_tuple_prev;
+ r_tuple->name = r_name;
+ r_tuple->family = AF_INET;
+ *(uint32_t*) r_tuple->addr = local_address_ipv4;
+ r_tuple->scopeid = 0;
+
+ idx += ALIGN(sizeof(struct gaih_addrtuple));
+ r_tuple_prev = r_tuple;
+ }
+
+ /* Fourth, fill actual addresses in, but in backwards order */
+ for (a = addresses + n_addresses - 1, n = 0; (int) n < n_addresses; n++, a--) {
+ r_tuple = (struct gaih_addrtuple*) (buffer + idx);
+ r_tuple->next = r_tuple_prev;
+ r_tuple->name = r_name;
+ r_tuple->family = a->family;
+ r_tuple->scopeid = a->family == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&a->address.in6) ? a->ifindex : 0;
+ memcpy(r_tuple->addr, &a->address, 16);
+
+ idx += ALIGN(sizeof(struct gaih_addrtuple));
+ r_tuple_prev = r_tuple;
+ }
+
+ /* Verify the size matches */
+ assert(idx == ms);
+
+ /* Nscd expects us to store the first record in **pat. */
+ if (*pat)
+ **pat = *r_tuple_prev;
+ else
+ *pat = r_tuple_prev;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+}
+
+static enum nss_status fill_in_hostent(
+ const char *canonical, const char *additional,
+ int af,
+ struct local_address *addresses, unsigned n_addresses,
+ uint32_t local_address_ipv4,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp,
+ char **canonp) {
+
+ size_t l_canonical, l_additional, idx, ms, alen;
+ char *r_addr, *r_name, *r_aliases, *r_alias = NULL, *r_addr_list;
+ struct local_address *a;
+ unsigned n, c;
+
+ assert(canonical);
+ assert(result);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ PROTECT_ERRNO;
+
+ alen = FAMILY_ADDRESS_SIZE(af);
+
+ for (a = addresses, n = 0, c = 0; n < n_addresses; a++, n++)
+ if (af == a->family)
+ c++;
+
+ l_canonical = strlen(canonical);
+ l_additional = strlen_ptr(additional);
+ ms = ALIGN(l_canonical+1)+
+ (additional ? ALIGN(l_additional+1) : 0) +
+ sizeof(char*) +
+ (additional ? sizeof(char*) : 0) +
+ (c > 0 ? c : 1) * ALIGN(alen) +
+ (c > 0 ? c+1 : 2) * sizeof(char*);
+
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, fill in hostnames */
+ r_name = buffer;
+ memcpy(r_name, canonical, l_canonical+1);
+ idx = ALIGN(l_canonical+1);
+
+ if (additional) {
+ r_alias = buffer + idx;
+ memcpy(r_alias, additional, l_additional+1);
+ idx += ALIGN(l_additional+1);
+ }
+
+ /* Second, create aliases array */
+ r_aliases = buffer + idx;
+ if (additional) {
+ ((char**) r_aliases)[0] = r_alias;
+ ((char**) r_aliases)[1] = NULL;
+ idx += 2*sizeof(char*);
+ } else {
+ ((char**) r_aliases)[0] = NULL;
+ idx += sizeof(char*);
+ }
+
+ /* Third, add addresses */
+ r_addr = buffer + idx;
+ if (c > 0) {
+ unsigned i = 0;
+
+ for (a = addresses, n = 0; n < n_addresses; a++, n++) {
+ if (af != a->family)
+ continue;
+
+ memcpy(r_addr + i*ALIGN(alen), &a->address, alen);
+ i++;
+ }
+
+ assert(i == c);
+ idx += c*ALIGN(alen);
+ } else {
+ if (af == AF_INET)
+ *(uint32_t*) r_addr = local_address_ipv4;
+ else
+ memcpy(r_addr, LOCALADDRESS_IPV6, 16);
+
+ idx += ALIGN(alen);
+ }
+
+ /* Fourth, add address pointer array */
+ r_addr_list = buffer + idx;
+ if (c > 0) {
+ unsigned i;
+
+ for (i = 0; i < c; i++)
+ ((char**) r_addr_list)[i] = r_addr + i*ALIGN(alen);
+
+ ((char**) r_addr_list)[i] = NULL;
+ idx += (c+1) * sizeof(char*);
+
+ } else {
+ ((char**) r_addr_list)[0] = r_addr;
+ ((char**) r_addr_list)[1] = NULL;
+ idx += 2 * sizeof(char*);
+ }
+
+ /* Verify the size matches */
+ assert(idx == ms);
+
+ result->h_name = r_name;
+ result->h_aliases = (char**) r_aliases;
+ result->h_addrtype = af;
+ result->h_length = alen;
+ result->h_addr_list = (char**) r_addr_list;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ if (canonp)
+ *canonp = r_name;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+}
+
+enum nss_status _nss_myhostname_gethostbyname3_r(
+ const char *name,
+ int af,
+ struct hostent *host,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp,
+ char **canonp) {
+
+ _cleanup_free_ struct local_address *addresses = NULL;
+ const char *canonical, *additional = NULL;
+ _cleanup_free_ char *hn = NULL;
+ uint32_t local_address_ipv4 = 0;
+ int n_addresses = 0;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(host);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (af == AF_UNSPEC)
+ af = AF_INET;
+
+ if (!IN_SET(af, AF_INET, AF_INET6)) {
+ UNPROTECT_ERRNO;
+ *errnop = EAFNOSUPPORT;
+ *h_errnop = NO_DATA;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (is_localhost(name)) {
+ canonical = "localhost";
+ local_address_ipv4 = htobe32(INADDR_LOOPBACK);
+
+ } else if (is_gateway_hostname(name)) {
+
+ n_addresses = local_gateways(NULL, 0, af, &addresses);
+ if (n_addresses <= 0) {
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+ }
+
+ canonical = "_gateway";
+
+ } else {
+ hn = gethostname_malloc();
+ if (!hn) {
+ UNPROTECT_ERRNO;
+ *errnop = ENOMEM;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ if (!streq(name, hn) && !streq_ptr(startswith(name, hn), ".")) {
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+ }
+
+ n_addresses = local_addresses(NULL, 0, af, &addresses);
+ if (n_addresses < 0)
+ n_addresses = 0;
+
+ canonical = hn;
+ additional = n_addresses <= 0 && af == AF_INET6 ? "localhost" : NULL;
+ local_address_ipv4 = LOCALADDRESS_IPV4;
+ }
+
+ UNPROTECT_ERRNO;
+
+ return fill_in_hostent(
+ canonical, additional,
+ af,
+ addresses, n_addresses,
+ local_address_ipv4,
+ host,
+ buffer, buflen,
+ errnop, h_errnop,
+ ttlp,
+ canonp);
+}
+
+enum nss_status _nss_myhostname_gethostbyaddr2_r(
+ const void* addr, socklen_t len,
+ int af,
+ struct hostent *host,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp) {
+
+ const char *canonical = NULL, *additional = NULL;
+ uint32_t local_address_ipv4 = LOCALADDRESS_IPV4;
+ _cleanup_free_ struct local_address *addresses = NULL;
+ _cleanup_free_ char *hn = NULL;
+ int n_addresses = 0;
+ struct local_address *a;
+ bool additional_from_hostname = false;
+ unsigned n;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(addr);
+ assert(host);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (!IN_SET(af, AF_INET, AF_INET6)) {
+ UNPROTECT_ERRNO;
+ *errnop = EAFNOSUPPORT;
+ *h_errnop = NO_DATA;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (len != FAMILY_ADDRESS_SIZE(af)) {
+ UNPROTECT_ERRNO;
+ *errnop = EINVAL;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (af == AF_INET) {
+ if ((*(uint32_t*) addr) == LOCALADDRESS_IPV4)
+ goto found;
+
+ if ((*(uint32_t*) addr) == htobe32(INADDR_LOOPBACK)) {
+ canonical = "localhost";
+ local_address_ipv4 = htobe32(INADDR_LOOPBACK);
+ goto found;
+ }
+
+ } else {
+ assert(af == AF_INET6);
+
+ if (memcmp(addr, LOCALADDRESS_IPV6, 16) == 0) {
+ canonical = "localhost";
+ additional_from_hostname = true;
+ goto found;
+ }
+ }
+
+ n_addresses = local_addresses(NULL, 0, AF_UNSPEC, &addresses);
+ for (a = addresses, n = 0; (int) n < n_addresses; n++, a++) {
+ if (af != a->family)
+ continue;
+
+ if (memcmp(addr, &a->address, FAMILY_ADDRESS_SIZE(af)) == 0)
+ goto found;
+ }
+
+ addresses = mfree(addresses);
+
+ n_addresses = local_gateways(NULL, 0, AF_UNSPEC, &addresses);
+ for (a = addresses, n = 0; (int) n < n_addresses; n++, a++) {
+ if (af != a->family)
+ continue;
+
+ if (memcmp(addr, &a->address, FAMILY_ADDRESS_SIZE(af)) == 0) {
+ canonical = "_gateway";
+ goto found;
+ }
+ }
+
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+
+found:
+ if (!canonical || additional_from_hostname) {
+ hn = gethostname_malloc();
+ if (!hn) {
+ UNPROTECT_ERRNO;
+ *errnop = ENOMEM;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ if (!canonical)
+ canonical = hn;
+ else
+ additional = hn;
+ }
+
+ UNPROTECT_ERRNO;
+ return fill_in_hostent(
+ canonical, additional,
+ af,
+ addresses, n_addresses,
+ local_address_ipv4,
+ host,
+ buffer, buflen,
+ errnop, h_errnop,
+ ttlp,
+ NULL);
+}
+
+NSS_GETHOSTBYNAME_FALLBACKS(myhostname);
+NSS_GETHOSTBYADDR_FALLBACKS(myhostname);
diff --git a/src/nss-myhostname/nss-myhostname.sym b/src/nss-myhostname/nss-myhostname.sym
new file mode 100644
index 0000000..fc7aa99
--- /dev/null
+++ b/src/nss-myhostname/nss-myhostname.sym
@@ -0,0 +1,19 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1+
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+{
+global:
+ _nss_myhostname_gethostbyname_r;
+ _nss_myhostname_gethostbyname2_r;
+ _nss_myhostname_gethostbyname3_r;
+ _nss_myhostname_gethostbyname4_r;
+ _nss_myhostname_gethostbyaddr_r;
+ _nss_myhostname_gethostbyaddr2_r;
+local: *;
+};
diff --git a/src/nss-mymachines/nss-mymachines.c b/src/nss-mymachines/nss-mymachines.c
new file mode 100644
index 0000000..486a658
--- /dev/null
+++ b/src/nss-mymachines/nss-mymachines.c
@@ -0,0 +1,776 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netdb.h>
+#include <nss.h>
+
+#include "sd-bus.h"
+#include "sd-login.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "env-util.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "nss-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "user-util.h"
+#include "util.h"
+
+NSS_GETHOSTBYNAME_PROTOTYPES(mymachines);
+NSS_GETPW_PROTOTYPES(mymachines);
+NSS_GETGR_PROTOTYPES(mymachines);
+
+#define HOST_UID_LIMIT ((uid_t) UINT32_C(0x10000))
+#define HOST_GID_LIMIT ((gid_t) UINT32_C(0x10000))
+
+static int count_addresses(sd_bus_message *m, int af, unsigned *ret) {
+ unsigned c = 0;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ while ((r = sd_bus_message_enter_container(m, 'r', "iay")) > 0) {
+ int family;
+
+ r = sd_bus_message_read(m, "i", &family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_skip(m, "ay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ if (af != AF_UNSPEC && family != af)
+ continue;
+
+ c++;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_rewind(m, false);
+ if (r < 0)
+ return r;
+
+ *ret = c;
+ return 0;
+}
+
+static bool avoid_deadlock(void) {
+
+ /* Check whether this lookup might have a chance of deadlocking because we are called from the service manager
+ * code activating systemd-machined.service. After all, we shouldn't synchronously do lookups to
+ * systemd-machined if we are required to finish before it can be started. This of course won't detect all
+ * possible dead locks of this kind, but it should work for the most obvious cases. */
+
+ if (geteuid() != 0) /* Ignore the env vars unless we are privileged. */
+ return false;
+
+ return streq_ptr(getenv("SYSTEMD_ACTIVATION_UNIT"), "systemd-machined.service") &&
+ streq_ptr(getenv("SYSTEMD_ACTIVATION_SCOPE"), "system");
+}
+
+enum nss_status _nss_mymachines_gethostbyname4_r(
+ const char *name,
+ struct gaih_addrtuple **pat,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp) {
+
+ struct gaih_addrtuple *r_tuple, *r_tuple_first = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ int *ifindices = NULL;
+ _cleanup_free_ char *class = NULL;
+ size_t l, ms, idx;
+ unsigned i = 0, c = 0;
+ char *r_name;
+ int n_ifindices, r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(pat);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ r = sd_machine_get_class(name, &class);
+ if (r < 0)
+ goto fail;
+ if (!streq(class, "container")) {
+ r = -ENOTTY;
+ goto fail;
+ }
+
+ n_ifindices = sd_machine_get_ifindices(name, &ifindices);
+ if (n_ifindices < 0) {
+ r = n_ifindices;
+ goto fail;
+ }
+
+ if (avoid_deadlock()) {
+ r = -EDEADLK;
+ goto fail;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "GetMachineAddresses",
+ NULL,
+ &reply,
+ "s", name);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iay)");
+ if (r < 0)
+ goto fail;
+
+ r = count_addresses(reply, AF_UNSPEC, &c);
+ if (r < 0)
+ goto fail;
+
+ if (c <= 0) {
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+ }
+
+ l = strlen(name);
+ ms = ALIGN(l+1) + ALIGN(sizeof(struct gaih_addrtuple)) * c;
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, append name */
+ r_name = buffer;
+ memcpy(r_name, name, l+1);
+ idx = ALIGN(l+1);
+
+ /* Second, append addresses */
+ r_tuple_first = (struct gaih_addrtuple*) (buffer + idx);
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iay")) > 0) {
+ int family;
+ const void *a;
+ size_t sz;
+
+ r = sd_bus_message_read(reply, "i", &family);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_array(reply, 'y', &a, &sz);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto fail;
+
+ if (!IN_SET(family, AF_INET, AF_INET6)) {
+ r = -EAFNOSUPPORT;
+ goto fail;
+ }
+
+ if (sz != FAMILY_ADDRESS_SIZE(family)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ r_tuple = (struct gaih_addrtuple*) (buffer + idx);
+ r_tuple->next = i == c-1 ? NULL : (struct gaih_addrtuple*) ((char*) r_tuple + ALIGN(sizeof(struct gaih_addrtuple)));
+ r_tuple->name = r_name;
+ r_tuple->family = family;
+ r_tuple->scopeid = n_ifindices == 1 ? ifindices[0] : 0;
+ memcpy(r_tuple->addr, a, sz);
+
+ idx += ALIGN(sizeof(struct gaih_addrtuple));
+ i++;
+ }
+
+ assert(i == c);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto fail;
+
+ assert(idx == ms);
+
+ if (*pat)
+ **pat = *r_tuple_first;
+ else
+ *pat = r_tuple_first;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ *h_errnop = NO_DATA;
+ return NSS_STATUS_UNAVAIL;
+}
+
+enum nss_status _nss_mymachines_gethostbyname3_r(
+ const char *name,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp,
+ char **canonp) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *class = NULL;
+ unsigned c = 0, i = 0;
+ char *r_name, *r_aliases, *r_addr, *r_addr_list;
+ size_t l, idx, ms, alen;
+ int r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(result);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (af == AF_UNSPEC)
+ af = AF_INET;
+
+ if (af != AF_INET && af != AF_INET6) {
+ r = -EAFNOSUPPORT;
+ goto fail;
+ }
+
+ r = sd_machine_get_class(name, &class);
+ if (r < 0)
+ goto fail;
+ if (!streq(class, "container")) {
+ r = -ENOTTY;
+ goto fail;
+ }
+
+ if (avoid_deadlock()) {
+ r = -EDEADLK;
+ goto fail;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "GetMachineAddresses",
+ NULL,
+ &reply,
+ "s", name);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iay)");
+ if (r < 0)
+ goto fail;
+
+ r = count_addresses(reply, af, &c);
+ if (r < 0)
+ goto fail;
+
+ if (c <= 0) {
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+ }
+
+ alen = FAMILY_ADDRESS_SIZE(af);
+ l = strlen(name);
+
+ ms = ALIGN(l+1) + c * ALIGN(alen) + (c+2) * sizeof(char*);
+
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, append name */
+ r_name = buffer;
+ memcpy(r_name, name, l+1);
+ idx = ALIGN(l+1);
+
+ /* Second, create aliases array */
+ r_aliases = buffer + idx;
+ ((char**) r_aliases)[0] = NULL;
+ idx += sizeof(char*);
+
+ /* Third, append addresses */
+ r_addr = buffer + idx;
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iay")) > 0) {
+ int family;
+ const void *a;
+ size_t sz;
+
+ r = sd_bus_message_read(reply, "i", &family);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_array(reply, 'y', &a, &sz);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto fail;
+
+ if (family != af)
+ continue;
+
+ if (sz != alen) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ memcpy(r_addr + i*ALIGN(alen), a, alen);
+ i++;
+ }
+
+ assert(i == c);
+ idx += c * ALIGN(alen);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto fail;
+
+ /* Third, append address pointer array */
+ r_addr_list = buffer + idx;
+ for (i = 0; i < c; i++)
+ ((char**) r_addr_list)[i] = r_addr + i*ALIGN(alen);
+
+ ((char**) r_addr_list)[i] = NULL;
+ idx += (c+1) * sizeof(char*);
+
+ assert(idx == ms);
+
+ result->h_name = r_name;
+ result->h_aliases = (char**) r_aliases;
+ result->h_addrtype = af;
+ result->h_length = alen;
+ result->h_addr_list = (char**) r_addr_list;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ if (canonp)
+ *canonp = r_name;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ *h_errnop = NO_DATA;
+ return NSS_STATUS_UNAVAIL;
+}
+
+NSS_GETHOSTBYNAME_FALLBACKS(mymachines);
+
+enum nss_status _nss_mymachines_getpwnam_r(
+ const char *name,
+ struct passwd *pwd,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ const char *p, *e, *machine;
+ uint32_t mapped;
+ uid_t uid;
+ size_t l;
+ int r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(pwd);
+
+ p = startswith(name, "vu-");
+ if (!p)
+ return NSS_STATUS_NOTFOUND;
+
+ e = strrchr(p, '-');
+ if (!e || e == p)
+ return NSS_STATUS_NOTFOUND;
+
+ if (e - p > HOST_NAME_MAX - 1) /* -1 for the last dash */
+ return NSS_STATUS_NOTFOUND;
+
+ r = parse_uid(e + 1, &uid);
+ if (r < 0)
+ return NSS_STATUS_NOTFOUND;
+
+ machine = strndupa(p, e - p);
+ if (!machine_name_is_valid(machine))
+ return NSS_STATUS_NOTFOUND;
+
+ if (getenv_bool_secure("SYSTEMD_NSS_BYPASS_BUS") > 0)
+ /* Make sure we can't deadlock if we are invoked by dbus-daemon. This way, it won't be able to resolve
+ * these UIDs, but that should be unproblematic as containers should never be able to connect to a bus
+ * running on the host. */
+ return NSS_STATUS_NOTFOUND;
+
+ if (avoid_deadlock()) {
+ r = -EDEADLK;
+ goto fail;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "MapFromMachineUser",
+ &error,
+ &reply,
+ "su",
+ machine, (uint32_t) uid);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_USER_MAPPING))
+ return NSS_STATUS_NOTFOUND;
+
+ goto fail;
+ }
+
+ r = sd_bus_message_read(reply, "u", &mapped);
+ if (r < 0)
+ goto fail;
+
+ /* Refuse to work if the mapped address is in the host UID range, or if there was no mapping at all. */
+ if (mapped < HOST_UID_LIMIT || mapped == uid)
+ return NSS_STATUS_NOTFOUND;
+
+ l = strlen(name);
+ if (buflen < l+1) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ memcpy(buffer, name, l+1);
+
+ pwd->pw_name = buffer;
+ pwd->pw_uid = mapped;
+ pwd->pw_gid = GID_NOBODY;
+ pwd->pw_gecos = buffer;
+ pwd->pw_passwd = (char*) "*"; /* locked */
+ pwd->pw_dir = (char*) "/";
+ pwd->pw_shell = (char*) "/sbin/nologin";
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+}
+
+enum nss_status _nss_mymachines_getpwuid_r(
+ uid_t uid,
+ struct passwd *pwd,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ const char *machine;
+ uint32_t mapped;
+ int r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ if (!uid_is_valid(uid))
+ return NSS_STATUS_NOTFOUND;
+
+ /* We consider all uids < 65536 host uids */
+ if (uid < HOST_UID_LIMIT)
+ return NSS_STATUS_NOTFOUND;
+
+ if (getenv_bool_secure("SYSTEMD_NSS_BYPASS_BUS") > 0)
+ return NSS_STATUS_NOTFOUND;
+
+ if (avoid_deadlock()) {
+ r = -EDEADLK;
+ goto fail;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "MapToMachineUser",
+ &error,
+ &reply,
+ "u",
+ (uint32_t) uid);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_USER_MAPPING))
+ return NSS_STATUS_NOTFOUND;
+
+ goto fail;
+ }
+
+ r = sd_bus_message_read(reply, "sou", &machine, NULL, &mapped);
+ if (r < 0)
+ goto fail;
+
+ if (mapped == uid)
+ return NSS_STATUS_NOTFOUND;
+
+ if (snprintf(buffer, buflen, "vu-%s-" UID_FMT, machine, (uid_t) mapped) >= (int) buflen) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ pwd->pw_name = buffer;
+ pwd->pw_uid = uid;
+ pwd->pw_gid = GID_NOBODY;
+ pwd->pw_gecos = buffer;
+ pwd->pw_passwd = (char*) "*"; /* locked */
+ pwd->pw_dir = (char*) "/";
+ pwd->pw_shell = (char*) "/sbin/nologin";
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+}
+
+#pragma GCC diagnostic ignored "-Wsizeof-pointer-memaccess"
+
+enum nss_status _nss_mymachines_getgrnam_r(
+ const char *name,
+ struct group *gr,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ const char *p, *e, *machine;
+ uint32_t mapped;
+ uid_t gid;
+ size_t l;
+ int r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(gr);
+
+ p = startswith(name, "vg-");
+ if (!p)
+ return NSS_STATUS_NOTFOUND;
+
+ e = strrchr(p, '-');
+ if (!e || e == p)
+ return NSS_STATUS_NOTFOUND;
+
+ if (e - p > HOST_NAME_MAX - 1) /* -1 for the last dash */
+ return NSS_STATUS_NOTFOUND;
+
+ r = parse_gid(e + 1, &gid);
+ if (r < 0)
+ return NSS_STATUS_NOTFOUND;
+
+ machine = strndupa(p, e - p);
+ if (!machine_name_is_valid(machine))
+ return NSS_STATUS_NOTFOUND;
+
+ if (getenv_bool_secure("SYSTEMD_NSS_BYPASS_BUS") > 0)
+ return NSS_STATUS_NOTFOUND;
+
+ if (avoid_deadlock()) {
+ r = -EDEADLK;
+ goto fail;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "MapFromMachineGroup",
+ &error,
+ &reply,
+ "su",
+ machine, (uint32_t) gid);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_GROUP_MAPPING))
+ return NSS_STATUS_NOTFOUND;
+
+ goto fail;
+ }
+
+ r = sd_bus_message_read(reply, "u", &mapped);
+ if (r < 0)
+ goto fail;
+
+ if (mapped < HOST_GID_LIMIT || mapped == gid)
+ return NSS_STATUS_NOTFOUND;
+
+ l = sizeof(char*) + strlen(name) + 1;
+ if (buflen < l) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ memzero(buffer, sizeof(char*));
+ strcpy(buffer + sizeof(char*), name);
+
+ gr->gr_name = buffer + sizeof(char*);
+ gr->gr_gid = mapped;
+ gr->gr_passwd = (char*) "*"; /* locked */
+ gr->gr_mem = (char**) buffer;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+}
+
+enum nss_status _nss_mymachines_getgrgid_r(
+ gid_t gid,
+ struct group *gr,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ const char *machine;
+ uint32_t mapped;
+ int r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ if (!gid_is_valid(gid))
+ return NSS_STATUS_NOTFOUND;
+
+ /* We consider all gids < 65536 host gids */
+ if (gid < HOST_GID_LIMIT)
+ return NSS_STATUS_NOTFOUND;
+
+ if (getenv_bool_secure("SYSTEMD_NSS_BYPASS_BUS") > 0)
+ return NSS_STATUS_NOTFOUND;
+
+ if (avoid_deadlock()) {
+ r = -EDEADLK;
+ goto fail;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "MapToMachineGroup",
+ &error,
+ &reply,
+ "u",
+ (uint32_t) gid);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_GROUP_MAPPING))
+ return NSS_STATUS_NOTFOUND;
+
+ goto fail;
+ }
+
+ r = sd_bus_message_read(reply, "sou", &machine, NULL, &mapped);
+ if (r < 0)
+ goto fail;
+
+ if (mapped == gid)
+ return NSS_STATUS_NOTFOUND;
+
+ if (buflen < sizeof(char*) + 1) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ memzero(buffer, sizeof(char*));
+ if (snprintf(buffer + sizeof(char*), buflen - sizeof(char*), "vg-%s-" GID_FMT, machine, (gid_t) mapped) >= (int) buflen) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ gr->gr_name = buffer + sizeof(char*);
+ gr->gr_gid = gid;
+ gr->gr_passwd = (char*) "*"; /* locked */
+ gr->gr_mem = (char**) buffer;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+}
diff --git a/src/nss-mymachines/nss-mymachines.sym b/src/nss-mymachines/nss-mymachines.sym
new file mode 100644
index 0000000..ade6447
--- /dev/null
+++ b/src/nss-mymachines/nss-mymachines.sym
@@ -0,0 +1,21 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1+
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+{
+global:
+ _nss_mymachines_gethostbyname_r;
+ _nss_mymachines_gethostbyname2_r;
+ _nss_mymachines_gethostbyname3_r;
+ _nss_mymachines_gethostbyname4_r;
+ _nss_mymachines_getpwnam_r;
+ _nss_mymachines_getpwuid_r;
+ _nss_mymachines_getgrnam_r;
+ _nss_mymachines_getgrgid_r;
+local: *;
+};
diff --git a/src/nss-resolve/nss-resolve.c b/src/nss-resolve/nss-resolve.c
new file mode 100644
index 0000000..8370fed
--- /dev/null
+++ b/src/nss-resolve/nss-resolve.c
@@ -0,0 +1,657 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <netdb.h>
+#include <nss.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sd-bus.h"
+
+#include "bus-common-errors.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "nss-util.h"
+#include "resolved-def.h"
+#include "string-util.h"
+#include "util.h"
+#include "signal-util.h"
+
+NSS_GETHOSTBYNAME_PROTOTYPES(resolve);
+NSS_GETHOSTBYADDR_PROTOTYPES(resolve);
+
+static bool bus_error_shall_fallback(sd_bus_error *e) {
+ return sd_bus_error_has_name(e, SD_BUS_ERROR_SERVICE_UNKNOWN) ||
+ sd_bus_error_has_name(e, SD_BUS_ERROR_NAME_HAS_NO_OWNER) ||
+ sd_bus_error_has_name(e, SD_BUS_ERROR_NO_REPLY) ||
+ sd_bus_error_has_name(e, SD_BUS_ERROR_ACCESS_DENIED);
+}
+
+static int count_addresses(sd_bus_message *m, int af, const char **canonical) {
+ int c = 0, r;
+
+ assert(m);
+ assert(canonical);
+
+ r = sd_bus_message_enter_container(m, 'a', "(iiay)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(m, 'r', "iiay")) > 0) {
+ int family, ifindex;
+
+ assert_cc(sizeof(int32_t) == sizeof(int));
+
+ r = sd_bus_message_read(m, "ii", &ifindex, &family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_skip(m, "ay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ if (af != AF_UNSPEC && family != af)
+ continue;
+
+ c++;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "s", canonical);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ return c;
+}
+
+static uint32_t ifindex_to_scopeid(int family, const void *a, int ifindex) {
+ struct in6_addr in6;
+
+ if (family != AF_INET6)
+ return 0;
+
+ /* Some apps can't deal with the scope ID attached to non-link-local addresses. Hence, let's suppress that. */
+
+ assert(sizeof(in6) == FAMILY_ADDRESS_SIZE(AF_INET6));
+ memcpy(&in6, a, sizeof(struct in6_addr));
+
+ return IN6_IS_ADDR_LINKLOCAL(&in6) ? ifindex : 0;
+}
+
+static bool avoid_deadlock(void) {
+
+ /* Check whether this lookup might have a chance of deadlocking because we are called from the service manager
+ * code activating systemd-resolved.service. After all, we shouldn't synchronously do lookups to
+ * systemd-resolved if we are required to finish before it can be started. This of course won't detect all
+ * possible dead locks of this kind, but it should work for the most obvious cases. */
+
+ if (geteuid() != 0) /* Ignore the env vars unless we are privileged. */
+ return false;
+
+ return streq_ptr(getenv("SYSTEMD_ACTIVATION_UNIT"), "systemd-resolved.service") &&
+ streq_ptr(getenv("SYSTEMD_ACTIVATION_SCOPE"), "system");
+}
+
+enum nss_status _nss_resolve_gethostbyname4_r(
+ const char *name,
+ struct gaih_addrtuple **pat,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ struct gaih_addrtuple *r_tuple, *r_tuple_first = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ enum nss_status ret = NSS_STATUS_UNAVAIL;
+ const char *canonical = NULL;
+ size_t l, ms, idx;
+ char *r_name;
+ int c, r, i = 0;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(pat);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (avoid_deadlock()) {
+ r = -EDEADLK;
+ goto fail;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResolveHostname");
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_set_auto_start(req, false);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_append(req, "isit", 0, name, AF_UNSPEC, (uint64_t) 0);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, _BUS_ERROR_DNS "NXDOMAIN") ||
+ !bus_error_shall_fallback(&error))
+ goto not_found;
+
+ /* Return NSS_STATUS_UNAVAIL when communication with systemd-resolved fails,
+ allowing falling back to other nss modules. Treat all other error conditions as
+ NOTFOUND. This includes DNSSEC errors and suchlike. (We don't use UNAVAIL in this
+ case so that the nsswitch.conf configuration can distuingish such executed but
+ negative replies from complete failure to talk to resolved). */
+ goto fail;
+ }
+
+ c = count_addresses(reply, AF_UNSPEC, &canonical);
+ if (c < 0) {
+ r = c;
+ goto fail;
+ }
+ if (c == 0)
+ goto not_found;
+
+ if (isempty(canonical))
+ canonical = name;
+
+ l = strlen(canonical);
+ ms = ALIGN(l+1) + ALIGN(sizeof(struct gaih_addrtuple)) * c;
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, append name */
+ r_name = buffer;
+ memcpy(r_name, canonical, l+1);
+ idx = ALIGN(l+1);
+
+ /* Second, append addresses */
+ r_tuple_first = (struct gaih_addrtuple*) (buffer + idx);
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ goto fail;
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iiay")) > 0) {
+ int family, ifindex;
+ const void *a;
+ size_t sz;
+
+ assert_cc(sizeof(int32_t) == sizeof(int));
+
+ r = sd_bus_message_read(reply, "ii", &ifindex, &family);
+ if (r < 0)
+ goto fail;
+
+ if (ifindex < 0) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ r = sd_bus_message_read_array(reply, 'y', &a, &sz);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto fail;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ continue;
+
+ if (sz != FAMILY_ADDRESS_SIZE(family)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ r_tuple = (struct gaih_addrtuple*) (buffer + idx);
+ r_tuple->next = i == c-1 ? NULL : (struct gaih_addrtuple*) ((char*) r_tuple + ALIGN(sizeof(struct gaih_addrtuple)));
+ r_tuple->name = r_name;
+ r_tuple->family = family;
+ r_tuple->scopeid = ifindex_to_scopeid(family, a, ifindex);
+ memcpy(r_tuple->addr, a, sz);
+
+ idx += ALIGN(sizeof(struct gaih_addrtuple));
+ i++;
+ }
+ if (r < 0)
+ goto fail;
+
+ assert(i == c);
+ assert(idx == ms);
+
+ if (*pat)
+ **pat = *r_tuple_first;
+ else
+ *pat = r_tuple_first;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ *h_errnop = NO_RECOVERY;
+ return ret;
+
+not_found:
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+}
+
+enum nss_status _nss_resolve_gethostbyname3_r(
+ const char *name,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp,
+ char **canonp) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char *r_name, *r_aliases, *r_addr, *r_addr_list;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ enum nss_status ret = NSS_STATUS_UNAVAIL;
+ size_t l, idx, ms, alen;
+ const char *canonical;
+ int c, r, i = 0;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(result);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (af == AF_UNSPEC)
+ af = AF_INET;
+
+ if (!IN_SET(af, AF_INET, AF_INET6)) {
+ r = -EAFNOSUPPORT;
+ goto fail;
+ }
+
+ if (avoid_deadlock()) {
+ r = -EDEADLK;
+ goto fail;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResolveHostname");
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_set_auto_start(req, false);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_append(req, "isit", 0, name, af, (uint64_t) 0);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, _BUS_ERROR_DNS "NXDOMAIN") ||
+ !bus_error_shall_fallback(&error))
+ goto not_found;
+
+ goto fail;
+ }
+
+ c = count_addresses(reply, af, &canonical);
+ if (c < 0) {
+ r = c;
+ goto fail;
+ }
+ if (c == 0)
+ goto not_found;
+
+ if (isempty(canonical))
+ canonical = name;
+
+ alen = FAMILY_ADDRESS_SIZE(af);
+ l = strlen(canonical);
+
+ ms = ALIGN(l+1) + c * ALIGN(alen) + (c+2) * sizeof(char*);
+
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, append name */
+ r_name = buffer;
+ memcpy(r_name, canonical, l+1);
+ idx = ALIGN(l+1);
+
+ /* Second, create empty aliases array */
+ r_aliases = buffer + idx;
+ ((char**) r_aliases)[0] = NULL;
+ idx += sizeof(char*);
+
+ /* Third, append addresses */
+ r_addr = buffer + idx;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ goto fail;
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iiay")) > 0) {
+ int ifindex, family;
+ const void *a;
+ size_t sz;
+
+ r = sd_bus_message_read(reply, "ii", &ifindex, &family);
+ if (r < 0)
+ goto fail;
+
+ if (ifindex < 0) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ r = sd_bus_message_read_array(reply, 'y', &a, &sz);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto fail;
+
+ if (family != af)
+ continue;
+
+ if (sz != alen) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ memcpy(r_addr + i*ALIGN(alen), a, alen);
+ i++;
+ }
+ if (r < 0)
+ goto fail;
+
+ assert(i == c);
+ idx += c * ALIGN(alen);
+
+ /* Fourth, append address pointer array */
+ r_addr_list = buffer + idx;
+ for (i = 0; i < c; i++)
+ ((char**) r_addr_list)[i] = r_addr + i*ALIGN(alen);
+
+ ((char**) r_addr_list)[i] = NULL;
+ idx += (c+1) * sizeof(char*);
+
+ assert(idx == ms);
+
+ result->h_name = r_name;
+ result->h_aliases = (char**) r_aliases;
+ result->h_addrtype = af;
+ result->h_length = alen;
+ result->h_addr_list = (char**) r_addr_list;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ if (canonp)
+ *canonp = r_name;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ *h_errnop = NO_RECOVERY;
+ return ret;
+
+not_found:
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+}
+
+enum nss_status _nss_resolve_gethostbyaddr2_r(
+ const void* addr, socklen_t len,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char *r_name, *r_aliases, *r_addr, *r_addr_list;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ enum nss_status ret = NSS_STATUS_UNAVAIL;
+ unsigned c = 0, i = 0;
+ size_t ms = 0, idx;
+ const char *n;
+ int r, ifindex;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(addr);
+ assert(result);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (!IN_SET(af, AF_INET, AF_INET6)) {
+ UNPROTECT_ERRNO;
+ *errnop = EAFNOSUPPORT;
+ *h_errnop = NO_DATA;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (len != FAMILY_ADDRESS_SIZE(af)) {
+ UNPROTECT_ERRNO;
+ *errnop = EINVAL;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (avoid_deadlock()) {
+ r = -EDEADLK;
+ goto fail;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResolveAddress");
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_set_auto_start(req, false);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_append(req, "ii", 0, af);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_append_array(req, 'y', addr, len);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_append(req, "t", (uint64_t) 0);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, _BUS_ERROR_DNS "NXDOMAIN") ||
+ !bus_error_shall_fallback(&error))
+ goto not_found;
+
+ goto fail;
+ }
+
+ r = sd_bus_message_enter_container(reply, 'a', "(is)");
+ if (r < 0)
+ goto fail;
+
+ while ((r = sd_bus_message_read(reply, "(is)", &ifindex, &n)) > 0) {
+
+ if (ifindex < 0) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ c++;
+ ms += ALIGN(strlen(n) + 1);
+ }
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_rewind(reply, false);
+ if (r < 0)
+ return r;
+
+ if (c <= 0)
+ goto not_found;
+
+ ms += ALIGN(len) + /* the address */
+ 2 * sizeof(char*) + /* pointers to the address, plus trailing NULL */
+ c * sizeof(char*); /* pointers to aliases, plus trailing NULL */
+
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, place address */
+ r_addr = buffer;
+ memcpy(r_addr, addr, len);
+ idx = ALIGN(len);
+
+ /* Second, place address list */
+ r_addr_list = buffer + idx;
+ ((char**) r_addr_list)[0] = r_addr;
+ ((char**) r_addr_list)[1] = NULL;
+ idx += sizeof(char*) * 2;
+
+ /* Third, reserve space for the aliases array */
+ r_aliases = buffer + idx;
+ idx += sizeof(char*) * c;
+
+ /* Fourth, place aliases */
+ i = 0;
+ r_name = buffer + idx;
+ while ((r = sd_bus_message_read(reply, "(is)", &ifindex, &n)) > 0) {
+ char *p;
+ size_t l;
+
+ l = strlen(n);
+ p = buffer + idx;
+ memcpy(p, n, l+1);
+
+ if (i > 0)
+ ((char**) r_aliases)[i-1] = p;
+ i++;
+
+ idx += ALIGN(l+1);
+ }
+ if (r < 0)
+ goto fail;
+
+ ((char**) r_aliases)[c-1] = NULL;
+ assert(idx == ms);
+
+ result->h_name = r_name;
+ result->h_aliases = (char**) r_aliases;
+ result->h_addrtype = af;
+ result->h_length = len;
+ result->h_addr_list = (char**) r_addr_list;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ *h_errnop = NO_RECOVERY;
+ return ret;
+
+not_found:
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+}
+
+NSS_GETHOSTBYNAME_FALLBACKS(resolve);
+NSS_GETHOSTBYADDR_FALLBACKS(resolve);
diff --git a/src/nss-resolve/nss-resolve.sym b/src/nss-resolve/nss-resolve.sym
new file mode 100644
index 0000000..e690a00
--- /dev/null
+++ b/src/nss-resolve/nss-resolve.sym
@@ -0,0 +1,19 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1+
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+{
+global:
+ _nss_resolve_gethostbyname_r;
+ _nss_resolve_gethostbyname2_r;
+ _nss_resolve_gethostbyname3_r;
+ _nss_resolve_gethostbyname4_r;
+ _nss_resolve_gethostbyaddr_r;
+ _nss_resolve_gethostbyaddr2_r;
+local: *;
+};
diff --git a/src/nss-systemd/nss-systemd.c b/src/nss-systemd/nss-systemd.c
new file mode 100644
index 0000000..f8db27a
--- /dev/null
+++ b/src/nss-systemd/nss-systemd.c
@@ -0,0 +1,834 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <nss.h>
+#include <pthread.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "dirent-util.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "list.h"
+#include "macro.h"
+#include "nss-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "user-util.h"
+#include "util.h"
+
+#define DYNAMIC_USER_GECOS "Dynamic User"
+#define DYNAMIC_USER_PASSWD "*" /* locked */
+#define DYNAMIC_USER_DIR "/"
+#define DYNAMIC_USER_SHELL "/sbin/nologin"
+
+static const struct passwd root_passwd = {
+ .pw_name = (char*) "root",
+ .pw_passwd = (char*) "x", /* see shadow file */
+ .pw_uid = 0,
+ .pw_gid = 0,
+ .pw_gecos = (char*) "Super User",
+ .pw_dir = (char*) "/root",
+ .pw_shell = (char*) "/bin/sh",
+};
+
+static const struct passwd nobody_passwd = {
+ .pw_name = (char*) NOBODY_USER_NAME,
+ .pw_passwd = (char*) "*", /* locked */
+ .pw_uid = UID_NOBODY,
+ .pw_gid = GID_NOBODY,
+ .pw_gecos = (char*) "User Nobody",
+ .pw_dir = (char*) "/",
+ .pw_shell = (char*) "/sbin/nologin",
+};
+
+static const struct group root_group = {
+ .gr_name = (char*) "root",
+ .gr_gid = 0,
+ .gr_passwd = (char*) "x", /* see shadow file */
+ .gr_mem = (char*[]) { NULL },
+};
+
+static const struct group nobody_group = {
+ .gr_name = (char*) NOBODY_GROUP_NAME,
+ .gr_gid = GID_NOBODY,
+ .gr_passwd = (char*) "*", /* locked */
+ .gr_mem = (char*[]) { NULL },
+};
+
+typedef struct UserEntry UserEntry;
+typedef struct GetentData GetentData;
+
+struct UserEntry {
+ uid_t id;
+ char *name;
+
+ GetentData *data;
+ LIST_FIELDS(UserEntry, entries);
+};
+
+struct GetentData {
+ /* As explained in NOTES section of getpwent_r(3) as 'getpwent_r() is not really
+ * reentrant since it shares the reading position in the stream with all other threads',
+ * we need to protect the data in UserEntry from multithreaded programs which may call
+ * setpwent(), getpwent_r(), or endpwent() simultaneously. So, each function locks the
+ * data by using the mutex below. */
+ pthread_mutex_t mutex;
+
+ UserEntry *position;
+ LIST_HEAD(UserEntry, entries);
+};
+
+static GetentData getpwent_data = { PTHREAD_MUTEX_INITIALIZER, NULL, NULL };
+static GetentData getgrent_data = { PTHREAD_MUTEX_INITIALIZER, NULL, NULL };
+
+NSS_GETPW_PROTOTYPES(systemd);
+NSS_GETGR_PROTOTYPES(systemd);
+enum nss_status _nss_systemd_endpwent(void) _public_;
+enum nss_status _nss_systemd_setpwent(int stayopen) _public_;
+enum nss_status _nss_systemd_getpwent_r(struct passwd *result, char *buffer, size_t buflen, int *errnop) _public_;
+enum nss_status _nss_systemd_endgrent(void) _public_;
+enum nss_status _nss_systemd_setgrent(int stayopen) _public_;
+enum nss_status _nss_systemd_getgrent_r(struct group *result, char *buffer, size_t buflen, int *errnop) _public_;
+
+static int direct_lookup_name(const char *name, uid_t *ret) {
+ _cleanup_free_ char *s = NULL;
+ const char *path;
+ int r;
+
+ assert(name);
+
+ /* Normally, we go via the bus to resolve names. That has the benefit that it is available from any mount
+ * namespace and subject to proper authentication. However, there's one problem: if our module is called from
+ * dbus-daemon itself we really can't use D-Bus to communicate. In this case, resort to a client-side hack,
+ * and look for the dynamic names directly. This is pretty ugly, but breaks the cyclic dependency. */
+
+ path = strjoina("/run/systemd/dynamic-uid/direct:", name);
+ r = readlink_malloc(path, &s);
+ if (r < 0)
+ return r;
+
+ return parse_uid(s, ret);
+}
+
+static int direct_lookup_uid(uid_t uid, char **ret) {
+ char path[STRLEN("/run/systemd/dynamic-uid/direct:") + DECIMAL_STR_MAX(uid_t) + 1], *s;
+ int r;
+
+ xsprintf(path, "/run/systemd/dynamic-uid/direct:" UID_FMT, uid);
+
+ r = readlink_malloc(path, &s);
+ if (r < 0)
+ return r;
+ if (!valid_user_group_name(s)) { /* extra safety check */
+ free(s);
+ return -EINVAL;
+ }
+
+ *ret = s;
+ return 0;
+}
+
+enum nss_status _nss_systemd_getpwnam_r(
+ const char *name,
+ struct passwd *pwd,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ uint32_t translated;
+ size_t l;
+ int bypass, r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(pwd);
+
+ /* If the username is not valid, then we don't know it. Ideally libc would filter these for us anyway. We don't
+ * generate EINVAL here, because it isn't really out business to complain about invalid user names. */
+ if (!valid_user_group_name(name))
+ return NSS_STATUS_NOTFOUND;
+
+ /* Synthesize entries for the root and nobody users, in case they are missing in /etc/passwd */
+ if (getenv_bool_secure("SYSTEMD_NSS_BYPASS_SYNTHETIC") <= 0) {
+ if (streq(name, root_passwd.pw_name)) {
+ *pwd = root_passwd;
+ return NSS_STATUS_SUCCESS;
+ }
+ if (synthesize_nobody() &&
+ streq(name, nobody_passwd.pw_name)) {
+ *pwd = nobody_passwd;
+ return NSS_STATUS_SUCCESS;
+ }
+ }
+
+ /* Make sure that we don't go in circles when allocating a dynamic UID by checking our own database */
+ if (getenv_bool_secure("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0)
+ return NSS_STATUS_NOTFOUND;
+
+ bypass = getenv_bool_secure("SYSTEMD_NSS_BYPASS_BUS");
+ if (bypass <= 0) {
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ bypass = 1;
+ }
+
+ if (bypass > 0) {
+ r = direct_lookup_name(name, (uid_t*) &translated);
+ if (r == -ENOENT)
+ return NSS_STATUS_NOTFOUND;
+ if (r < 0)
+ goto fail;
+ } else {
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "LookupDynamicUserByName",
+ &error,
+ &reply,
+ "s",
+ name);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER))
+ return NSS_STATUS_NOTFOUND;
+
+ goto fail;
+ }
+
+ r = sd_bus_message_read(reply, "u", &translated);
+ if (r < 0)
+ goto fail;
+ }
+
+ l = strlen(name);
+ if (buflen < l+1) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ memcpy(buffer, name, l+1);
+
+ pwd->pw_name = buffer;
+ pwd->pw_uid = (uid_t) translated;
+ pwd->pw_gid = (uid_t) translated;
+ pwd->pw_gecos = (char*) DYNAMIC_USER_GECOS;
+ pwd->pw_passwd = (char*) DYNAMIC_USER_PASSWD;
+ pwd->pw_dir = (char*) DYNAMIC_USER_DIR;
+ pwd->pw_shell = (char*) DYNAMIC_USER_SHELL;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+}
+
+enum nss_status _nss_systemd_getpwuid_r(
+ uid_t uid,
+ struct passwd *pwd,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *direct = NULL;
+ const char *translated;
+ size_t l;
+ int bypass, r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ if (!uid_is_valid(uid))
+ return NSS_STATUS_NOTFOUND;
+
+ /* Synthesize data for the root user and for nobody in case they are missing from /etc/passwd */
+ if (getenv_bool_secure("SYSTEMD_NSS_BYPASS_SYNTHETIC") <= 0) {
+ if (uid == root_passwd.pw_uid) {
+ *pwd = root_passwd;
+ return NSS_STATUS_SUCCESS;
+ }
+ if (synthesize_nobody() &&
+ uid == nobody_passwd.pw_uid) {
+ *pwd = nobody_passwd;
+ return NSS_STATUS_SUCCESS;
+ }
+ }
+
+ if (!uid_is_dynamic(uid))
+ return NSS_STATUS_NOTFOUND;
+
+ if (getenv_bool_secure("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0)
+ return NSS_STATUS_NOTFOUND;
+
+ bypass = getenv_bool_secure("SYSTEMD_NSS_BYPASS_BUS");
+ if (bypass <= 0) {
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ bypass = 1;
+ }
+
+ if (bypass > 0) {
+ r = direct_lookup_uid(uid, &direct);
+ if (r == -ENOENT)
+ return NSS_STATUS_NOTFOUND;
+ if (r < 0)
+ goto fail;
+
+ translated = direct;
+
+ } else {
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "LookupDynamicUserByUID",
+ &error,
+ &reply,
+ "u",
+ (uint32_t) uid);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER))
+ return NSS_STATUS_NOTFOUND;
+
+ goto fail;
+ }
+
+ r = sd_bus_message_read(reply, "s", &translated);
+ if (r < 0)
+ goto fail;
+ }
+
+ l = strlen(translated) + 1;
+ if (buflen < l) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ memcpy(buffer, translated, l);
+
+ pwd->pw_name = buffer;
+ pwd->pw_uid = uid;
+ pwd->pw_gid = uid;
+ pwd->pw_gecos = (char*) DYNAMIC_USER_GECOS;
+ pwd->pw_passwd = (char*) DYNAMIC_USER_PASSWD;
+ pwd->pw_dir = (char*) DYNAMIC_USER_DIR;
+ pwd->pw_shell = (char*) DYNAMIC_USER_SHELL;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+}
+
+#pragma GCC diagnostic ignored "-Wsizeof-pointer-memaccess"
+
+enum nss_status _nss_systemd_getgrnam_r(
+ const char *name,
+ struct group *gr,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ uint32_t translated;
+ size_t l;
+ int bypass, r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(gr);
+
+ if (!valid_user_group_name(name))
+ return NSS_STATUS_NOTFOUND;
+
+ /* Synthesize records for root and nobody, in case they are missing form /etc/group */
+ if (getenv_bool_secure("SYSTEMD_NSS_BYPASS_SYNTHETIC") <= 0) {
+ if (streq(name, root_group.gr_name)) {
+ *gr = root_group;
+ return NSS_STATUS_SUCCESS;
+ }
+ if (synthesize_nobody() &&
+ streq(name, nobody_group.gr_name)) {
+ *gr = nobody_group;
+ return NSS_STATUS_SUCCESS;
+ }
+ }
+
+ if (getenv_bool_secure("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0)
+ return NSS_STATUS_NOTFOUND;
+
+ bypass = getenv_bool_secure("SYSTEMD_NSS_BYPASS_BUS");
+ if (bypass <= 0) {
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ bypass = 1;
+ }
+
+ if (bypass > 0) {
+ r = direct_lookup_name(name, (uid_t*) &translated);
+ if (r == -ENOENT)
+ return NSS_STATUS_NOTFOUND;
+ if (r < 0)
+ goto fail;
+ } else {
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "LookupDynamicUserByName",
+ &error,
+ &reply,
+ "s",
+ name);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER))
+ return NSS_STATUS_NOTFOUND;
+
+ goto fail;
+ }
+
+ r = sd_bus_message_read(reply, "u", &translated);
+ if (r < 0)
+ goto fail;
+ }
+
+ l = sizeof(char*) + strlen(name) + 1;
+ if (buflen < l) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ memzero(buffer, sizeof(char*));
+ strcpy(buffer + sizeof(char*), name);
+
+ gr->gr_name = buffer + sizeof(char*);
+ gr->gr_gid = (gid_t) translated;
+ gr->gr_passwd = (char*) DYNAMIC_USER_PASSWD;
+ gr->gr_mem = (char**) buffer;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+}
+
+enum nss_status _nss_systemd_getgrgid_r(
+ gid_t gid,
+ struct group *gr,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *direct = NULL;
+ const char *translated;
+ size_t l;
+ int bypass, r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ if (!gid_is_valid(gid))
+ return NSS_STATUS_NOTFOUND;
+
+ /* Synthesize records for root and nobody, in case they are missing from /etc/group */
+ if (getenv_bool_secure("SYSTEMD_NSS_BYPASS_SYNTHETIC") <= 0) {
+ if (gid == root_group.gr_gid) {
+ *gr = root_group;
+ return NSS_STATUS_SUCCESS;
+ }
+ if (synthesize_nobody() &&
+ gid == nobody_group.gr_gid) {
+ *gr = nobody_group;
+ return NSS_STATUS_SUCCESS;
+ }
+ }
+
+ if (!gid_is_dynamic(gid))
+ return NSS_STATUS_NOTFOUND;
+
+ if (getenv_bool_secure("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0)
+ return NSS_STATUS_NOTFOUND;
+
+ bypass = getenv_bool_secure("SYSTEMD_NSS_BYPASS_BUS");
+ if (bypass <= 0) {
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ bypass = 1;
+ }
+
+ if (bypass > 0) {
+ r = direct_lookup_uid(gid, &direct);
+ if (r == -ENOENT)
+ return NSS_STATUS_NOTFOUND;
+ if (r < 0)
+ goto fail;
+
+ translated = direct;
+
+ } else {
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "LookupDynamicUserByUID",
+ &error,
+ &reply,
+ "u",
+ (uint32_t) gid);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER))
+ return NSS_STATUS_NOTFOUND;
+
+ goto fail;
+ }
+
+ r = sd_bus_message_read(reply, "s", &translated);
+ if (r < 0)
+ goto fail;
+ }
+
+ l = sizeof(char*) + strlen(translated) + 1;
+ if (buflen < l) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ memzero(buffer, sizeof(char*));
+ strcpy(buffer + sizeof(char*), translated);
+
+ gr->gr_name = buffer + sizeof(char*);
+ gr->gr_gid = gid;
+ gr->gr_passwd = (char*) DYNAMIC_USER_PASSWD;
+ gr->gr_mem = (char**) buffer;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+}
+
+static void user_entry_free(UserEntry *p) {
+ if (!p)
+ return;
+
+ if (p->data)
+ LIST_REMOVE(entries, p->data->entries, p);
+
+ free(p->name);
+ free(p);
+}
+
+static int user_entry_add(GetentData *data, const char *name, uid_t id) {
+ UserEntry *p;
+
+ assert(data);
+
+ /* This happens when User= or Group= already exists statically. */
+ if (!uid_is_dynamic(id))
+ return -EINVAL;
+
+ p = new0(UserEntry, 1);
+ if (!p)
+ return -ENOMEM;
+
+ p->name = strdup(name);
+ if (!p->name) {
+ free(p);
+ return -ENOMEM;
+ }
+ p->id = id;
+ p->data = data;
+
+ LIST_PREPEND(entries, data->entries, p);
+
+ return 0;
+}
+
+static void systemd_endent(GetentData *data) {
+ UserEntry *p;
+
+ assert(data);
+
+ while ((p = data->entries))
+ user_entry_free(p);
+
+ data->position = NULL;
+}
+
+static enum nss_status nss_systemd_endent(GetentData *p) {
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert_se(pthread_mutex_lock(&p->mutex) == 0);
+ systemd_endent(p);
+ assert_se(pthread_mutex_unlock(&p->mutex) == 0);
+
+ return NSS_STATUS_SUCCESS;
+}
+
+enum nss_status _nss_systemd_endpwent(void) {
+ return nss_systemd_endent(&getpwent_data);
+}
+
+enum nss_status _nss_systemd_endgrent(void) {
+ return nss_systemd_endent(&getgrent_data);
+}
+
+static int direct_enumeration(GetentData *p) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r;
+
+ assert(p);
+
+ d = opendir("/run/systemd/dynamic-uid/");
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_free_ char *name = NULL;
+ uid_t uid, verified;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ r = parse_uid(de->d_name, &uid);
+ if (r < 0)
+ continue;
+
+ r = direct_lookup_uid(uid, &name);
+ if (r == -ENOMEM)
+ return r;
+ if (r < 0)
+ continue;
+
+ r = direct_lookup_name(name, &verified);
+ if (r < 0)
+ continue;
+
+ if (uid != verified)
+ continue;
+
+ r = user_entry_add(p, name, uid);
+ if (r == -ENOMEM)
+ return r;
+ if (r < 0)
+ continue;
+ }
+
+ return 0;
+}
+
+static enum nss_status systemd_setent(GetentData *p) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ const char *name;
+ uid_t id;
+ int bypass, r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(p);
+
+ assert_se(pthread_mutex_lock(&p->mutex) == 0);
+
+ systemd_endent(p);
+
+ if (getenv_bool_secure("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0)
+ goto finish;
+
+ bypass = getenv_bool_secure("SYSTEMD_NSS_BYPASS_BUS");
+
+ if (bypass <= 0) {
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ bypass = 1;
+ }
+
+ if (bypass > 0) {
+ r = direct_enumeration(p);
+ if (r < 0)
+ goto fail;
+
+ goto finish;
+ }
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "GetDynamicUsers",
+ &error,
+ &reply,
+ NULL);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(us)");
+ if (r < 0)
+ goto fail;
+
+ while ((r = sd_bus_message_read(reply, "(us)", &id, &name)) > 0) {
+ r = user_entry_add(p, name, id);
+ if (r == -ENOMEM)
+ goto fail;
+ if (r < 0)
+ continue;
+ }
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto fail;
+
+finish:
+ p->position = p->entries;
+ assert_se(pthread_mutex_unlock(&p->mutex) == 0);
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ systemd_endent(p);
+ assert_se(pthread_mutex_unlock(&p->mutex) == 0);
+
+ return NSS_STATUS_UNAVAIL;
+}
+
+enum nss_status _nss_systemd_setpwent(int stayopen) {
+ return systemd_setent(&getpwent_data);
+}
+
+enum nss_status _nss_systemd_setgrent(int stayopen) {
+ return systemd_setent(&getgrent_data);
+}
+
+enum nss_status _nss_systemd_getpwent_r(struct passwd *result, char *buffer, size_t buflen, int *errnop) {
+ enum nss_status ret;
+ UserEntry *p;
+ size_t len;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(result);
+ assert(buffer);
+ assert(errnop);
+
+ assert_se(pthread_mutex_lock(&getpwent_data.mutex) == 0);
+
+ LIST_FOREACH(entries, p, getpwent_data.position) {
+ len = strlen(p->name) + 1;
+ if (buflen < len) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ ret = NSS_STATUS_TRYAGAIN;
+ goto finalize;
+ }
+
+ memcpy(buffer, p->name, len);
+
+ result->pw_name = buffer;
+ result->pw_uid = p->id;
+ result->pw_gid = p->id;
+ result->pw_gecos = (char*) DYNAMIC_USER_GECOS;
+ result->pw_passwd = (char*) DYNAMIC_USER_PASSWD;
+ result->pw_dir = (char*) DYNAMIC_USER_DIR;
+ result->pw_shell = (char*) DYNAMIC_USER_SHELL;
+ break;
+ }
+ if (!p) {
+ ret = NSS_STATUS_NOTFOUND;
+ goto finalize;
+ }
+
+ /* On success, step to the next entry. */
+ p = p->entries_next;
+ ret = NSS_STATUS_SUCCESS;
+
+finalize:
+ /* Save position for the next call. */
+ getpwent_data.position = p;
+
+ assert_se(pthread_mutex_unlock(&getpwent_data.mutex) == 0);
+
+ return ret;
+}
+
+enum nss_status _nss_systemd_getgrent_r(struct group *result, char *buffer, size_t buflen, int *errnop) {
+ enum nss_status ret;
+ UserEntry *p;
+ size_t len;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(result);
+ assert(buffer);
+ assert(errnop);
+
+ assert_se(pthread_mutex_lock(&getgrent_data.mutex) == 0);
+
+ LIST_FOREACH(entries, p, getgrent_data.position) {
+ len = sizeof(char*) + strlen(p->name) + 1;
+ if (buflen < len) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ ret = NSS_STATUS_TRYAGAIN;
+ goto finalize;
+ }
+
+ memzero(buffer, sizeof(char*));
+ strcpy(buffer + sizeof(char*), p->name);
+
+ result->gr_name = buffer + sizeof(char*);
+ result->gr_gid = p->id;
+ result->gr_passwd = (char*) DYNAMIC_USER_PASSWD;
+ result->gr_mem = (char**) buffer;
+ break;
+ }
+ if (!p) {
+ ret = NSS_STATUS_NOTFOUND;
+ goto finalize;
+ }
+
+ /* On success, step to the next entry. */
+ p = p->entries_next;
+ ret = NSS_STATUS_SUCCESS;
+
+finalize:
+ /* Save position for the next call. */
+ getgrent_data.position = p;
+
+ assert_se(pthread_mutex_unlock(&getgrent_data.mutex) == 0);
+
+ return ret;
+}
diff --git a/src/nss-systemd/nss-systemd.sym b/src/nss-systemd/nss-systemd.sym
new file mode 100644
index 0000000..ff63382
--- /dev/null
+++ b/src/nss-systemd/nss-systemd.sym
@@ -0,0 +1,23 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1+
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+{
+global:
+ _nss_systemd_getpwnam_r;
+ _nss_systemd_getpwuid_r;
+ _nss_systemd_endpwent;
+ _nss_systemd_setpwent;
+ _nss_systemd_getpwent_r;
+ _nss_systemd_getgrnam_r;
+ _nss_systemd_getgrgid_r;
+ _nss_systemd_endgrent;
+ _nss_systemd_setgrent;
+ _nss_systemd_getgrent_r;
+local: *;
+};
diff --git a/src/partition/growfs.c b/src/partition/growfs.c
new file mode 100644
index 0000000..c9d1c27
--- /dev/null
+++ b/src/partition/growfs.c
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/magic.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+
+#include "blockdev-util.h"
+#include "crypt-util.h"
+#include "device-nodes.h"
+#include "dissect-image.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "missing.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "stat-util.h"
+#include "strv.h"
+
+static const char *arg_target = NULL;
+static bool arg_dry_run = false;
+
+static int resize_ext4(const char *path, int mountfd, int devfd, uint64_t numblocks, uint64_t blocksize) {
+ assert((uint64_t) (int) blocksize == blocksize);
+
+ if (arg_dry_run)
+ return 0;
+
+ if (ioctl(mountfd, EXT4_IOC_RESIZE_FS, &numblocks) != 0)
+ return log_error_errno(errno, "Failed to resize \"%s\" to %"PRIu64" blocks (ext4): %m",
+ path, numblocks);
+
+ return 0;
+}
+
+static int resize_btrfs(const char *path, int mountfd, int devfd, uint64_t numblocks, uint64_t blocksize) {
+ struct btrfs_ioctl_vol_args args = {};
+ int r;
+
+ assert((uint64_t) (int) blocksize == blocksize);
+
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=118111 */
+ if (numblocks * blocksize < 256*1024*1024) {
+ log_warning("%s: resizing of btrfs volumes smaller than 256M is not supported", path);
+ return -EOPNOTSUPP;
+ }
+
+ r = snprintf(args.name, sizeof(args.name), "%"PRIu64, numblocks * blocksize);
+ /* The buffer is large enough for any number to fit... */
+ assert((size_t) r < sizeof(args.name));
+
+ if (arg_dry_run)
+ return 0;
+
+ if (ioctl(mountfd, BTRFS_IOC_RESIZE, &args) != 0)
+ return log_error_errno(errno, "Failed to resize \"%s\" to %"PRIu64" blocks (btrfs): %m",
+ path, numblocks);
+
+ return 0;
+}
+
+#if HAVE_LIBCRYPTSETUP
+static int resize_crypt_luks_device(dev_t devno, const char *fstype, dev_t main_devno) {
+ _cleanup_free_ char *devpath = NULL, *main_devpath = NULL;
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_close_ int main_devfd = -1;
+ uint64_t size;
+ int r;
+
+ r = device_path_make_major_minor(S_IFBLK, main_devno, &main_devpath);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format device major/minor path: %m");
+
+ main_devfd = open(main_devpath, O_RDONLY|O_CLOEXEC);
+ if (main_devfd < 0)
+ return log_error_errno(errno, "Failed to open \"%s\": %m", main_devpath);
+
+ if (ioctl(main_devfd, BLKGETSIZE64, &size) != 0)
+ return log_error_errno(errno, "Failed to query size of \"%s\" (before resize): %m",
+ main_devpath);
+
+ log_debug("%s is %"PRIu64" bytes", main_devpath, size);
+ r = device_path_make_major_minor(S_IFBLK, devno, &devpath);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format major/minor path: %m");
+
+ r = crypt_init(&cd, devpath);
+ if (r < 0)
+ return log_error_errno(r, "crypt_init(\"%s\") failed: %m", devpath);
+
+ crypt_set_log_callback(cd, cryptsetup_log_glue, NULL);
+
+ r = crypt_load(cd, CRYPT_LUKS, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load LUKS metadata for %s: %m", devpath);
+
+ if (arg_dry_run)
+ return 0;
+
+ r = crypt_resize(cd, main_devpath, 0);
+ if (r < 0)
+ return log_error_errno(r, "crypt_resize() of %s failed: %m", devpath);
+
+ if (ioctl(main_devfd, BLKGETSIZE64, &size) != 0)
+ log_warning_errno(errno, "Failed to query size of \"%s\" (after resize): %m",
+ devpath);
+ else
+ log_debug("%s is now %"PRIu64" bytes", main_devpath, size);
+
+ return 1;
+}
+#endif
+
+static int maybe_resize_slave_device(const char *mountpath, dev_t main_devno) {
+ _cleanup_free_ char *fstype = NULL, *devpath = NULL;
+ dev_t devno;
+ int r;
+
+#if HAVE_LIBCRYPTSETUP
+ crypt_set_log_callback(NULL, cryptsetup_log_glue, NULL);
+ crypt_set_debug_level(1);
+#endif
+
+ r = get_block_device_harder(mountpath, &devno);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine underlying block device of \"%s\": %m",
+ mountpath);
+
+ log_debug("Underlying device %d:%d, main dev %d:%d, %s",
+ major(devno), minor(devno),
+ major(main_devno), minor(main_devno),
+ devno == main_devno ? "same" : "different");
+ if (devno == main_devno)
+ return 0;
+
+ r = device_path_make_major_minor(S_IFBLK, devno, &devpath);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format device major/minor path: %m");
+
+ r = probe_filesystem(devpath, &fstype);
+ if (r == -EUCLEAN)
+ return log_warning_errno(r, "Cannot reliably determine probe \"%s\", refusing to proceed.", devpath);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to probe \"%s\": %m", devpath);
+
+#if HAVE_LIBCRYPTSETUP
+ if (streq_ptr(fstype, "crypto_LUKS"))
+ return resize_crypt_luks_device(devno, fstype, main_devno);
+#endif
+
+ log_debug("Don't know how to resize %s of type %s, ignoring", devpath, strnull(fstype));
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-growfs@.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] /path/to/mountpoint\n\n"
+ "Grow filesystem or encrypted payload to device size.\n\n"
+ "Options:\n"
+ " -h --help Show this help and exit\n"
+ " --version Print version string and exit\n"
+ " -n --dry-run Just print what would be done\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ };
+
+ int c;
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version" , no_argument, NULL, ARG_VERSION },
+ { "dry-run", no_argument, NULL, 'n' },
+ {}
+ };
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hn", options, NULL)) >= 0)
+ switch(c) {
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'n':
+ arg_dry_run = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind + 1 != argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s excepts exactly one argument (the mount point).",
+ program_invocation_short_name);
+
+ arg_target = argv[optind];
+
+ return 1;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_close_ int mountfd = -1, devfd = -1;
+ _cleanup_free_ char *devpath = NULL;
+ uint64_t size, numblocks;
+ char fb[FORMAT_BYTES_MAX];
+ struct statfs sfs;
+ dev_t devno;
+ int blocksize;
+ int r;
+
+ log_setup_service();
+
+ r = parse_argv(argc, argv);
+ if (r < 0)
+ return EXIT_FAILURE;
+ if (r == 0)
+ return EXIT_SUCCESS;
+
+ r = path_is_mount_point(arg_target, NULL, 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to check if \"%s\" is a mount point: %m", arg_target);
+ return EXIT_FAILURE;
+ }
+ if (r == 0) {
+ log_error_errno(r, "\"%s\" is not a mount point: %m", arg_target);
+ return EXIT_FAILURE;
+ }
+
+ r = get_block_device(arg_target, &devno);
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine block device of \"%s\": %m", arg_target);
+ return EXIT_FAILURE;
+ }
+
+ r = maybe_resize_slave_device(arg_target, devno);
+ if (r < 0)
+ return EXIT_FAILURE;
+
+ mountfd = open(arg_target, O_RDONLY|O_CLOEXEC);
+ if (mountfd < 0) {
+ log_error_errno(errno, "Failed to open \"%s\": %m", arg_target);
+ return EXIT_FAILURE;
+ }
+
+ r = device_path_make_major_minor(S_IFBLK, devno, &devpath);
+ if (r < 0) {
+ log_error_errno(r, "Failed to format device major/minor path: %m");
+ return EXIT_FAILURE;
+ }
+
+ devfd = open(devpath, O_RDONLY|O_CLOEXEC);
+ if (devfd < 0) {
+ log_error_errno(errno, "Failed to open \"%s\": %m", devpath);
+ return EXIT_FAILURE;
+ }
+
+ if (ioctl(devfd, BLKBSZGET, &blocksize) != 0) {
+ log_error_errno(errno, "Failed to query block size of \"%s\": %m", devpath);
+ return EXIT_FAILURE;
+ }
+
+ if (ioctl(devfd, BLKGETSIZE64, &size) != 0) {
+ log_error_errno(errno, "Failed to query size of \"%s\": %m", devpath);
+ return EXIT_FAILURE;
+ }
+
+ if (size % blocksize != 0)
+ log_notice("Partition size %"PRIu64" is not a multiple of the blocksize %d,"
+ " ignoring %"PRIu64" bytes", size, blocksize, size % blocksize);
+
+ numblocks = size / blocksize;
+
+ if (fstatfs(mountfd, &sfs) < 0) {
+ log_error_errno(errno, "Failed to stat file system \"%s\": %m", arg_target);
+ return EXIT_FAILURE;
+ }
+
+ switch(sfs.f_type) {
+ case EXT4_SUPER_MAGIC:
+ r = resize_ext4(arg_target, mountfd, devfd, numblocks, blocksize);
+ break;
+ case BTRFS_SUPER_MAGIC:
+ r = resize_btrfs(arg_target, mountfd, devfd, numblocks, blocksize);
+ break;
+ default:
+ log_error("Don't know how to resize fs %llx on \"%s\"",
+ (long long unsigned) sfs.f_type, arg_target);
+ return EXIT_FAILURE;
+ }
+
+ if (r < 0)
+ return EXIT_FAILURE;
+
+ log_info("Successfully resized \"%s\" to %s bytes (%"PRIu64" blocks of %d bytes).",
+ arg_target, format_bytes(fb, sizeof fb, size), numblocks, blocksize);
+ return EXIT_SUCCESS;
+}
diff --git a/src/partition/makefs.c b/src/partition/makefs.c
new file mode 100644
index 0000000..0b9bae5
--- /dev/null
+++ b/src/partition/makefs.c
@@ -0,0 +1,83 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1+
+***/
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dissect-image.h"
+#include "main-func.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+
+static int makefs(const char *type, const char *device) {
+ const char *mkfs;
+ pid_t pid;
+ int r;
+
+ if (streq(type, "swap"))
+ mkfs = "/sbin/mkswap";
+ else
+ mkfs = strjoina("/sbin/mkfs.", type);
+ if (access(mkfs, X_OK) != 0)
+ return log_error_errno(errno, "%s is not executable: %m", mkfs);
+
+ r = safe_fork("(mkfs)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *cmdline[3] = { mkfs, device, NULL };
+
+ /* Child */
+
+ execv(cmdline[0], (char**) cmdline);
+ _exit(EXIT_FAILURE);
+ }
+
+ return wait_for_terminate_and_check(mkfs, pid, WAIT_LOG);
+}
+
+static int run(int argc, char *argv[]) {
+ const char *device, *type;
+ _cleanup_free_ char *detected = NULL;
+ struct stat st;
+ int r;
+
+ log_setup_service();
+
+ if (argc != 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program expects two arguments.");
+
+ type = argv[1];
+ device = argv[2];
+
+ if (stat(device, &st) < 0)
+ return log_error_errno(errno, "Failed to stat \"%s\": %m", device);
+
+ if (!S_ISBLK(st.st_mode))
+ log_info("%s is not a block device.", device);
+
+ r = probe_filesystem(device, &detected);
+ if (r < 0)
+ return log_warning_errno(r,
+ r == -EUCLEAN ?
+ "Cannot reliably determine probe \"%s\", refusing to proceed." :
+ "Failed to probe \"%s\": %m",
+ device);
+
+ if (detected) {
+ log_info("%s is not empty (type %s), exiting", device, detected);
+ return 0;
+ }
+
+ return makefs(type, device);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/path/path.c b/src/path/path.c
new file mode 100644
index 0000000..7b630a5
--- /dev/null
+++ b/src/path/path.c
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-path.h"
+
+#include "alloc-util.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "util.h"
+
+static const char *arg_suffix = NULL;
+
+static const char* const path_table[_SD_PATH_MAX] = {
+ [SD_PATH_TEMPORARY] = "temporary",
+ [SD_PATH_TEMPORARY_LARGE] = "temporary-large",
+ [SD_PATH_SYSTEM_BINARIES] = "system-binaries",
+ [SD_PATH_SYSTEM_INCLUDE] = "system-include",
+ [SD_PATH_SYSTEM_LIBRARY_PRIVATE] = "system-library-private",
+ [SD_PATH_SYSTEM_LIBRARY_ARCH] = "system-library-arch",
+ [SD_PATH_SYSTEM_SHARED] = "system-shared",
+ [SD_PATH_SYSTEM_CONFIGURATION_FACTORY] = "system-configuration-factory",
+ [SD_PATH_SYSTEM_STATE_FACTORY] = "system-state-factory",
+ [SD_PATH_SYSTEM_CONFIGURATION] = "system-configuration",
+ [SD_PATH_SYSTEM_RUNTIME] = "system-runtime",
+ [SD_PATH_SYSTEM_RUNTIME_LOGS] = "system-runtime-logs",
+ [SD_PATH_SYSTEM_STATE_PRIVATE] = "system-state-private",
+ [SD_PATH_SYSTEM_STATE_LOGS] = "system-state-logs",
+ [SD_PATH_SYSTEM_STATE_CACHE] = "system-state-cache",
+ [SD_PATH_SYSTEM_STATE_SPOOL] = "system-state-spool",
+ [SD_PATH_USER_BINARIES] = "user-binaries",
+ [SD_PATH_USER_LIBRARY_PRIVATE] = "user-library-private",
+ [SD_PATH_USER_LIBRARY_ARCH] = "user-library-arch",
+ [SD_PATH_USER_SHARED] = "user-shared",
+ [SD_PATH_USER_CONFIGURATION] = "user-configuration",
+ [SD_PATH_USER_RUNTIME] = "user-runtime",
+ [SD_PATH_USER_STATE_CACHE] = "user-state-cache",
+ [SD_PATH_USER] = "user",
+ [SD_PATH_USER_DOCUMENTS] = "user-documents",
+ [SD_PATH_USER_MUSIC] = "user-music",
+ [SD_PATH_USER_PICTURES] = "user-pictures",
+ [SD_PATH_USER_VIDEOS] = "user-videos",
+ [SD_PATH_USER_DOWNLOAD] = "user-download",
+ [SD_PATH_USER_PUBLIC] = "user-public",
+ [SD_PATH_USER_TEMPLATES] = "user-templates",
+ [SD_PATH_USER_DESKTOP] = "user-desktop",
+ [SD_PATH_SEARCH_BINARIES] = "search-binaries",
+ [SD_PATH_SEARCH_BINARIES_DEFAULT] = "search-binaries-default",
+ [SD_PATH_SEARCH_LIBRARY_PRIVATE] = "search-library-private",
+ [SD_PATH_SEARCH_LIBRARY_ARCH] = "search-library-arch",
+ [SD_PATH_SEARCH_SHARED] = "search-shared",
+ [SD_PATH_SEARCH_CONFIGURATION_FACTORY] = "search-configuration-factory",
+ [SD_PATH_SEARCH_STATE_FACTORY] = "search-state-factory",
+ [SD_PATH_SEARCH_CONFIGURATION] = "search-configuration",
+};
+
+static int list_homes(void) {
+ uint64_t i = 0;
+ int r = 0;
+
+ for (i = 0; i < ELEMENTSOF(path_table); i++) {
+ _cleanup_free_ char *p = NULL;
+ int q;
+
+ q = sd_path_home(i, arg_suffix, &p);
+ if (q == -ENXIO)
+ continue;
+ if (q < 0) {
+ log_error_errno(r, "Failed to query %s: %m", path_table[i]);
+ r = q;
+ continue;
+ }
+
+ printf("%s: %s\n", path_table[i], p);
+ }
+
+ return r;
+}
+
+static int print_home(const char *n) {
+ uint64_t i = 0;
+ int r;
+
+ for (i = 0; i < ELEMENTSOF(path_table); i++) {
+ if (streq(path_table[i], n)) {
+ _cleanup_free_ char *p = NULL;
+
+ r = sd_path_home(i, arg_suffix, &p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query %s: %m", n);
+
+ printf("%s\n", p);
+ return 0;
+ }
+ }
+
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Path %s not known.", n);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-path", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [NAME...]\n\n"
+ "Show system and user paths.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --suffix=SUFFIX Suffix to append to paths\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_SUFFIX,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "suffix", required_argument, NULL, ARG_SUFFIX },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_SUFFIX:
+ arg_suffix = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char* argv[]) {
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (argc > optind) {
+ int i, q;
+
+ for (i = optind; i < argc; i++) {
+ q = print_home(argv[i]);
+ if (q < 0)
+ r = q;
+ }
+
+ return r;
+ } else
+ return list_homes();
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/portable/meson.build b/src/portable/meson.build
new file mode 100644
index 0000000..dc57163
--- /dev/null
+++ b/src/portable/meson.build
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+systemd_portabled_sources = files('''
+ portable.c
+ portable.h
+ portabled-bus.c
+ portabled-image-bus.c
+ portabled-image-bus.h
+ portabled-image.c
+ portabled-image.h
+ portabled-operation.c
+ portabled-operation.h
+ portabled.c
+ portabled.h
+'''.split())
+
+if conf.get('ENABLE_PORTABLED') == 1
+ install_data('org.freedesktop.portable1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.portable1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.portable1.policy',
+ install_dir : polkitpolicydir)
+
+ install_data('profile/default/service.conf', install_dir : join_paths(profiledir, 'default'))
+ install_data('profile/nonetwork/service.conf', install_dir : join_paths(profiledir, 'nonetwork'))
+ install_data('profile/strict/service.conf', install_dir : join_paths(profiledir, 'strict'))
+ install_data('profile/trusted/service.conf', install_dir : join_paths(profiledir, 'trusted'))
+endif
diff --git a/src/portable/org.freedesktop.portable1.conf b/src/portable/org.freedesktop.portable1.conf
new file mode 100644
index 0000000..4c20a88
--- /dev/null
+++ b/src/portable/org.freedesktop.portable1.conf
@@ -0,0 +1,117 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!-- SPDX-License-Identifier: LGPL-2.1+ -->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.portable1"/>
+ <allow send_destination="org.freedesktop.portable1"/>
+ <allow receive_sender="org.freedesktop.portable1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.portable1"/>
+
+ <!-- generic interfaces -->
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <!-- Manager object -->
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="GetImage"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="ListImages"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="GetImageOSRelease"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="GetImageUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="GetImageState"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="AttachImage"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="DetachImage"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="RemoveImage"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="MarkImageReadOnly"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="SetImageLimit"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="SetPoolLimit"/>
+
+ <!-- Image object -->
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="GetOSRelease"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="GetUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="GetImageState"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="Attach"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="Detach"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="Remove"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="MarkReadOnly"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="SetLimit"/>
+
+ <allow receive_sender="org.freedesktop.portable1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/portable/org.freedesktop.portable1.policy b/src/portable/org.freedesktop.portable1.policy
new file mode 100644
index 0000000..50bb44c
--- /dev/null
+++ b/src/portable/org.freedesktop.portable1.policy
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!-- SPDX-License-Identifier: LGPL-2.1+ -->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.portable1.inspect-images">
+ <description gettext-domain="systemd">Inspect a portable service image</description>
+ <message gettext-domain="systemd">Authentication is required to inspect a portable service image.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.portable1.attach-images">
+ <description gettext-domain="systemd">Attach or detach a portable service image</description>
+ <message gettext-domain="systemd">Authentication is required to attach or detach a portable service image.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.systemd1.reload-daemon</annotate>
+ </action>
+
+ <action id="org.freedesktop.portable1.manage-images">
+ <description gettext-domain="systemd">Delete or modify portable service image</description>
+ <message gettext-domain="systemd">Authentication is required to delete or modify a portable service image.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/portable/org.freedesktop.portable1.service b/src/portable/org.freedesktop.portable1.service
new file mode 100644
index 0000000..eff37b0
--- /dev/null
+++ b/src/portable/org.freedesktop.portable1.service
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+[D-BUS Service]
+Name=org.freedesktop.portable1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.portable1.service
diff --git a/src/portable/portable.c b/src/portable/portable.c
new file mode 100644
index 0000000..01fd1a9
--- /dev/null
+++ b/src/portable/portable.c
@@ -0,0 +1,1443 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio_ext.h>
+
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "conf-files.h"
+#include "copy.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "dissect-image.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "locale-util.h"
+#include "loop-util.h"
+#include "machine-image.h"
+#include "mkdir.h"
+#include "os-util.h"
+#include "path-lookup.h"
+#include "portable.h"
+#include "process-util.h"
+#include "set.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+
+static const char profile_dirs[] = CONF_PATHS_NULSTR("systemd/portable/profile");
+
+/* Markers used in the first line of our 20-portable.conf unit file drop-in to determine, that a) the unit file was
+ * dropped there by the portable service logic and b) for which image it was dropped there. */
+#define PORTABLE_DROPIN_MARKER_BEGIN "# Drop-in created for image '"
+#define PORTABLE_DROPIN_MARKER_END "', do not edit."
+
+static bool prefix_match(const char *unit, const char *prefix) {
+ const char *p;
+
+ p = startswith(unit, prefix);
+ if (!p)
+ return false;
+
+ /* Only respect prefixes followed by dash or dot or when there's a complete match */
+ return IN_SET(*p, '-', '.', '@', 0);
+}
+
+static bool unit_match(const char *unit, char **matches) {
+ const char *dot;
+ char **i;
+
+ dot = strrchr(unit, '.');
+ if (!dot)
+ return false;
+
+ if (!STR_IN_SET(dot, ".service", ".socket", ".target", ".timer", ".path"))
+ return false;
+
+ /* Empty match expression means: everything */
+ if (strv_isempty(matches))
+ return true;
+
+ /* Otherwise, at least one needs to match */
+ STRV_FOREACH(i, matches)
+ if (prefix_match(unit, *i))
+ return true;
+
+ return false;
+}
+
+static PortableMetadata *portable_metadata_new(const char *name, int fd) {
+ PortableMetadata *m;
+
+ m = malloc0(offsetof(PortableMetadata, name) + strlen(name) + 1);
+ if (!m)
+ return NULL;
+
+ strcpy(m->name, name);
+ m->fd = fd;
+
+ return m;
+}
+
+PortableMetadata *portable_metadata_unref(PortableMetadata *i) {
+ if (!i)
+ return NULL;
+
+ safe_close(i->fd);
+ free(i->source);
+
+ return mfree(i);
+}
+
+static int compare_metadata(PortableMetadata *const *x, PortableMetadata *const *y) {
+ return strcmp((*x)->name, (*y)->name);
+}
+
+int portable_metadata_hashmap_to_sorted_array(Hashmap *unit_files, PortableMetadata ***ret) {
+
+ _cleanup_free_ PortableMetadata **sorted = NULL;
+ Iterator iterator;
+ PortableMetadata *item;
+ size_t k = 0;
+
+ sorted = new(PortableMetadata*, hashmap_size(unit_files));
+ if (!sorted)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(item, unit_files, iterator)
+ sorted[k++] = item;
+
+ assert(k == hashmap_size(unit_files));
+
+ typesafe_qsort(sorted, k, compare_metadata);
+
+ *ret = TAKE_PTR(sorted);
+ return 0;
+}
+
+static int send_item(
+ int socket_fd,
+ const char *name,
+ int fd) {
+
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct iovec iovec;
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+ _cleanup_close_ int data_fd = -1;
+
+ assert(socket_fd >= 0);
+ assert(name);
+ assert(fd >= 0);
+
+ data_fd = fd_duplicate_data_fd(fd);
+ if (data_fd < 0)
+ return data_fd;
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &data_fd, sizeof(int));
+
+ mh.msg_controllen = CMSG_SPACE(sizeof(int));
+ iovec = IOVEC_MAKE_STRING(name);
+
+ if (sendmsg(socket_fd, &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int recv_item(
+ int socket_fd,
+ char **ret_name,
+ int *ret_fd) {
+
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ char buffer[PATH_MAX+2];
+ struct iovec iov = IOVEC_INIT(buffer, sizeof(buffer)-1);
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+ _cleanup_close_ int found_fd = -1;
+ char *copy;
+ ssize_t n;
+
+ assert(socket_fd >= 0);
+ assert(ret_name);
+ assert(ret_fd);
+
+ n = recvmsg(socket_fd, &mh, MSG_CMSG_CLOEXEC);
+ if (n < 0)
+ return -errno;
+
+ CMSG_FOREACH(cmsg, &mh) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+
+ if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) {
+ assert(found_fd < 0);
+ found_fd = *(int*) CMSG_DATA(cmsg);
+ break;
+ }
+
+ cmsg_close_all(&mh);
+ return -EIO;
+ }
+ }
+
+ buffer[n] = 0;
+
+ copy = strdup(buffer);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret_name = copy;
+ *ret_fd = TAKE_FD(found_fd);
+
+ return 0;
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(portable_metadata_hash_ops, char, string_hash_func, string_compare_func,
+ PortableMetadata, portable_metadata_unref);
+
+static int extract_now(
+ const char *where,
+ char **matches,
+ int socket_fd,
+ PortableMetadata **ret_os_release,
+ Hashmap **ret_unit_files) {
+
+ _cleanup_hashmap_free_ Hashmap *unit_files = NULL;
+ _cleanup_(portable_metadata_unrefp) PortableMetadata *os_release = NULL;
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_close_ int os_release_fd = -1;
+ _cleanup_free_ char *os_release_path = NULL;
+ char **i;
+ int r;
+
+ /* Extracts the metadata from a directory tree 'where'. Extracts two kinds of information: the /etc/os-release
+ * data, and all unit files matching the specified expression. Note that this function is called in two very
+ * different but also similar contexts. When the tool gets invoked on a directory tree, we'll process it
+ * directly, and in-process, and thus can return the requested data directly, via 'ret_os_release' and
+ * 'ret_unit_files'. However, if the tool is invoked on a raw disk image — which needs to be mounted first — we
+ * are invoked in a child process with private mounts and then need to send the collected data to our
+ * parent. To handle both cases in one call this function also gets a 'socket_fd' parameter, which when >= 0 is
+ * used to send the data to the parent. */
+
+ assert(where);
+
+ /* First, find /etc/os-release and send it upstream (or just save it). */
+ r = open_os_release(where, &os_release_path, &os_release_fd);
+ if (r < 0)
+ log_debug_errno(r, "Couldn't acquire os-release file, ignoring: %m");
+ else {
+ if (socket_fd >= 0) {
+ r = send_item(socket_fd, "/etc/os-release", os_release_fd);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to send os-release file: %m");
+ }
+
+ if (ret_os_release) {
+ os_release = portable_metadata_new("/etc/os-release", os_release_fd);
+ if (!os_release)
+ return -ENOMEM;
+
+ os_release_fd = -1;
+ os_release->source = TAKE_PTR(os_release_path);
+ }
+ }
+
+ /* Then, send unit file data to the parent (or/and add it to the hashmap). For that we use our usual unit
+ * discovery logic. Note that we force looking inside of /lib/systemd/system/ for units too, as we mightbe
+ * compiled for a split-usr system but the image might be a legacy-usr one. */
+ r = lookup_paths_init(&paths, UNIT_FILE_SYSTEM, LOOKUP_PATHS_SPLIT_USR, where);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to acquire lookup paths: %m");
+
+ unit_files = hashmap_new(&portable_metadata_hash_ops);
+ if (!unit_files)
+ return -ENOMEM;
+
+ STRV_FOREACH(i, paths.search_path) {
+ _cleanup_free_ char *resolved = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ r = chase_symlinks_and_opendir(*i, where, 0, &resolved, &d);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to open unit path '%s', ignoring: %m", *i);
+ continue;
+ }
+
+ FOREACH_DIRENT(de, d, return log_debug_errno(errno, "Failed to read directory: %m")) {
+ _cleanup_(portable_metadata_unrefp) PortableMetadata *m = NULL;
+ _cleanup_close_ int fd = -1;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ if (!unit_match(de->d_name, matches))
+ continue;
+
+ /* Filter out duplicates */
+ if (hashmap_get(unit_files, de->d_name))
+ continue;
+
+ dirent_ensure_type(d, de);
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ fd = openat(dirfd(d), de->d_name, O_CLOEXEC|O_RDONLY);
+ if (fd < 0) {
+ log_debug_errno(errno, "Failed to open unit file '%s', ignoring: %m", de->d_name);
+ continue;
+ }
+
+ if (socket_fd >= 0) {
+ r = send_item(socket_fd, de->d_name, fd);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to send unit metadata to parent: %m");
+ }
+
+ m = portable_metadata_new(de->d_name, fd);
+ if (!m)
+ return -ENOMEM;
+ fd = -1;
+
+ m->source = strjoin(resolved, "/", de->d_name);
+ if (!m->source)
+ return -ENOMEM;
+
+ r = hashmap_put(unit_files, m->name, m);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add unit to hashmap: %m");
+ m = NULL;
+ }
+ }
+
+ if (ret_os_release)
+ *ret_os_release = TAKE_PTR(os_release);
+ if (ret_unit_files)
+ *ret_unit_files = TAKE_PTR(unit_files);
+
+ return 0;
+}
+
+static int portable_extract_by_path(
+ const char *path,
+ char **matches,
+ PortableMetadata **ret_os_release,
+ Hashmap **ret_unit_files,
+ sd_bus_error *error) {
+
+ _cleanup_hashmap_free_ Hashmap *unit_files = NULL;
+ _cleanup_(portable_metadata_unrefp) PortableMetadata* os_release = NULL;
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ int r;
+
+ assert(path);
+
+ r = loop_device_make_by_path(path, O_RDONLY, &d);
+ if (r == -EISDIR) {
+ /* We can't turn this into a loop-back block device, and this returns EISDIR? Then this is a directory
+ * tree and not a raw device. It's easy then. */
+
+ r = extract_now(path, matches, -1, &os_release, &unit_files);
+ if (r < 0)
+ return r;
+
+ } else if (r < 0)
+ return log_debug_errno(r, "Failed to set up loopback device: %m");
+ else {
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+ _cleanup_(rmdir_and_freep) char *tmpdir = NULL;
+ _cleanup_(close_pairp) int seq[2] = { -1, -1 };
+ _cleanup_(sigkill_waitp) pid_t child = 0;
+
+ /* We now have a loopback block device, let's fork off a child in its own mount namespace, mount it
+ * there, and extract the metadata we need. The metadata is sent from the child back to us. */
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ r = mkdtemp_malloc("/tmp/inspect-XXXXXX", &tmpdir);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create temporary directory: %m");
+
+ r = dissect_image(d->fd, NULL, 0, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_DISCARD_ON_LOOP, &m);
+ if (r == -ENOPKG)
+ sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Couldn't identify a suitable partition table or file system in '%s'.", path);
+ else if (r == -EADDRNOTAVAIL)
+ sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "No root partition for specified root hash found in '%s'.", path);
+ else if (r == -ENOTUNIQ)
+ sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Multiple suitable root partitions found in image '%s'.", path);
+ else if (r == -ENXIO)
+ sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "No suitable root partition found in image '%s'.", path);
+ else if (r == -EPROTONOSUPPORT)
+ sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", path);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, seq) < 0)
+ return log_debug_errno(errno, "Failed to allocated SOCK_SEQPACKET socket: %m");
+
+ r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE|FORK_LOG, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ seq[0] = safe_close(seq[0]);
+
+ r = dissected_image_mount(m, tmpdir, UID_INVALID, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_VALIDATE_OS);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to mount dissected image: %m");
+ goto child_finish;
+ }
+
+ r = extract_now(tmpdir, matches, seq[1], NULL, NULL);
+
+ child_finish:
+ _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+ }
+
+ seq[1] = safe_close(seq[1]);
+
+ unit_files = hashmap_new(&portable_metadata_hash_ops);
+ if (!unit_files)
+ return -ENOMEM;
+
+ for (;;) {
+ _cleanup_(portable_metadata_unrefp) PortableMetadata *add = NULL;
+ _cleanup_free_ char *name = NULL;
+ _cleanup_close_ int fd = -1;
+
+ r = recv_item(seq[0], &name, &fd);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to receive item: %m");
+
+ /* We can't really distuingish a zero-length datagram without any fds from EOF (both are signalled the
+ * same way by recvmsg()). Hence, accept either as end notification. */
+ if (isempty(name) && fd < 0)
+ break;
+
+ if (isempty(name) || fd < 0) {
+ log_debug("Invalid item sent from child.");
+ return -EINVAL;
+ }
+
+ add = portable_metadata_new(name, fd);
+ if (!add)
+ return -ENOMEM;
+ fd = -1;
+
+ /* Note that we do not initialize 'add->source' here, as the source path is not usable here as
+ * it refers to a path only valid in the short-living namespaced child process we forked
+ * here. */
+
+ if (PORTABLE_METADATA_IS_UNIT(add)) {
+ r = hashmap_put(unit_files, add->name, add);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add item to unit file list: %m");
+
+ add = NULL;
+
+ } else if (PORTABLE_METADATA_IS_OS_RELEASE(add)) {
+
+ assert(!os_release);
+ os_release = TAKE_PTR(add);
+ } else
+ assert_not_reached("Unexpected metadata item from child.");
+ }
+
+ r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
+ if (r < 0)
+ return r;
+ child = 0;
+ }
+
+ if (!os_release)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image '%s' lacks os-release data, refusing.", path);
+
+ if (hashmap_isempty(unit_files))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Couldn't find any matching unit files in image '%s', refusing.", path);
+
+ if (ret_unit_files)
+ *ret_unit_files = TAKE_PTR(unit_files);
+
+ if (ret_os_release)
+ *ret_os_release = TAKE_PTR(os_release);
+
+ return 0;
+}
+
+int portable_extract(
+ const char *name_or_path,
+ char **matches,
+ PortableMetadata **ret_os_release,
+ Hashmap **ret_unit_files,
+ sd_bus_error *error) {
+
+ _cleanup_(image_unrefp) Image *image = NULL;
+ int r;
+
+ assert(name_or_path);
+
+ r = image_find_harder(IMAGE_PORTABLE, name_or_path, &image);
+ if (r < 0)
+ return r;
+
+ return portable_extract_by_path(image->path, matches, ret_os_release, ret_unit_files, error);
+}
+
+static int unit_file_is_active(
+ sd_bus *bus,
+ const char *name,
+ sd_bus_error *error) {
+
+ static const char *const active_states[] = {
+ "activating",
+ "active",
+ "reloading",
+ "deactivating",
+ NULL,
+ };
+ int r;
+
+ if (!bus)
+ return false;
+
+ /* If we are looking at a plain or instance things are easy, we can just query the state */
+ if (unit_name_is_valid(name, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) {
+ _cleanup_free_ char *path = NULL, *buf = NULL;
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "ActiveState",
+ error,
+ &buf);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to retrieve unit state: %s", bus_error_message(error, r));
+
+ return strv_contains((char**) active_states, buf);
+ }
+
+ /* Otherwise we need to enumerate. But let's build the most restricted query we can */
+ if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ const char *at, *prefix, *joined;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListUnitsByPatterns");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(m, (char**) active_states);
+ if (r < 0)
+ return r;
+
+ at = strchr(name, '@');
+ assert(at);
+
+ prefix = strndupa(name, at + 1 - name);
+ joined = strjoina(prefix, "*", at + 1);
+
+ r = sd_bus_message_append_strv(m, STRV_MAKE(joined));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(bus, m, 0, error, &reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to list units: %s", bus_error_message(error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_STRUCT, "ssssssouso");
+ if (r < 0)
+ return r;
+
+ return r > 0;
+ }
+
+ return -EINVAL;
+}
+
+static int portable_changes_add(
+ PortableChange **changes,
+ size_t *n_changes,
+ PortableChangeType type,
+ const char *path,
+ const char *source) {
+
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ PortableChange *c;
+
+ assert(path);
+ assert(!changes == !n_changes);
+
+ if (!changes)
+ return 0;
+
+ c = reallocarray(*changes, *n_changes + 1, sizeof(PortableChange));
+ if (!c)
+ return -ENOMEM;
+ *changes = c;
+
+ p = strdup(path);
+ if (!p)
+ return -ENOMEM;
+
+ path_simplify(p, false);
+
+ if (source) {
+ s = strdup(source);
+ if (!s)
+ return -ENOMEM;
+
+ path_simplify(s, false);
+ }
+
+ c[(*n_changes)++] = (PortableChange) {
+ .type = type,
+ .path = TAKE_PTR(p),
+ .source = TAKE_PTR(s),
+ };
+
+ return 0;
+}
+
+static int portable_changes_add_with_prefix(
+ PortableChange **changes,
+ size_t *n_changes,
+ PortableChangeType type,
+ const char *prefix,
+ const char *path,
+ const char *source) {
+
+ assert(path);
+ assert(!changes == !n_changes);
+
+ if (!changes)
+ return 0;
+
+ if (prefix) {
+ path = strjoina(prefix, "/", path);
+
+ if (source)
+ source = strjoina(prefix, "/", source);
+ }
+
+ return portable_changes_add(changes, n_changes, type, path, source);
+}
+
+void portable_changes_free(PortableChange *changes, size_t n_changes) {
+ size_t i;
+
+ assert(changes || n_changes == 0);
+
+ for (i = 0; i < n_changes; i++) {
+ free(changes[i].path);
+ free(changes[i].source);
+ }
+
+ free(changes);
+}
+
+static int install_chroot_dropin(
+ const char *image_path,
+ ImageType type,
+ const PortableMetadata *m,
+ const char *dropin_dir,
+ char **ret_dropin,
+ PortableChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *text = NULL, *dropin = NULL;
+ int r;
+
+ assert(image_path);
+ assert(m);
+ assert(dropin_dir);
+
+ dropin = strjoin(dropin_dir, "/20-portable.conf");
+ if (!dropin)
+ return -ENOMEM;
+
+ text = strjoin(PORTABLE_DROPIN_MARKER_BEGIN, image_path, PORTABLE_DROPIN_MARKER_END "\n");
+ if (!text)
+ return -ENOMEM;
+
+ if (endswith(m->name, ".service"))
+ if (!strextend(&text,
+ "\n"
+ "[Service]\n",
+ IN_SET(type, IMAGE_DIRECTORY, IMAGE_SUBVOLUME) ? "RootDirectory=" : "RootImage=", image_path, "\n"
+ "Environment=PORTABLE=", basename(image_path), "\n"
+ "LogExtraFields=PORTABLE=", basename(image_path), "\n",
+ NULL))
+
+ return -ENOMEM;
+
+ r = write_string_file(dropin, text, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to write '%s': %m", dropin);
+
+ (void) portable_changes_add(changes, n_changes, PORTABLE_WRITE, dropin, NULL);
+
+ if (ret_dropin)
+ *ret_dropin = TAKE_PTR(dropin);
+
+ return 0;
+}
+
+static int find_profile(const char *name, const char *unit, char **ret) {
+ const char *p, *dot;
+
+ assert(name);
+ assert(ret);
+
+ assert_se(dot = strrchr(unit, '.'));
+
+ NULSTR_FOREACH(p, profile_dirs) {
+ _cleanup_free_ char *joined;
+
+ joined = strjoin(p, "/", name, "/", dot + 1, ".conf");
+ if (!joined)
+ return -ENOMEM;
+
+ if (laccess(joined, F_OK) >= 0) {
+ *ret = TAKE_PTR(joined);
+ return 0;
+ }
+
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ return -ENOENT;
+}
+
+static int install_profile_dropin(
+ const char *image_path,
+ const PortableMetadata *m,
+ const char *dropin_dir,
+ const char *profile,
+ PortableFlags flags,
+ char **ret_dropin,
+ PortableChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *dropin = NULL, *from = NULL;
+ int r;
+
+ assert(image_path);
+ assert(m);
+ assert(dropin_dir);
+
+ if (!profile)
+ return 0;
+
+ r = find_profile(profile, m->name, &from);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_debug_errno(errno, "Profile '%s' is not accessible: %m", profile);
+
+ log_debug_errno(errno, "Skipping link to profile '%s', as it does not exist: %m", profile);
+ return 0;
+ }
+
+ dropin = strjoin(dropin_dir, "/10-profile.conf");
+ if (!dropin)
+ return -ENOMEM;
+
+ if (flags & PORTABLE_PREFER_COPY) {
+
+ r = copy_file_atomic(from, dropin, 0644, 0, COPY_REFLINK);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to copy %s %s %s: %m", from, special_glyph(SPECIAL_GLYPH_ARROW), dropin);
+
+ (void) portable_changes_add(changes, n_changes, PORTABLE_COPY, dropin, from);
+
+ } else {
+
+ if (symlink(from, dropin) < 0)
+ return log_debug_errno(errno, "Failed to link %s %s %s: %m", from, special_glyph(SPECIAL_GLYPH_ARROW), dropin);
+
+ (void) portable_changes_add(changes, n_changes, PORTABLE_SYMLINK, dropin, from);
+ }
+
+ if (ret_dropin)
+ *ret_dropin = TAKE_PTR(dropin);
+
+ return 0;
+}
+
+static const char *attached_path(const LookupPaths *paths, PortableFlags flags) {
+ const char *where;
+
+ assert(paths);
+
+ if (flags & PORTABLE_RUNTIME)
+ where = paths->runtime_attached;
+ else
+ where = paths->persistent_attached;
+
+ assert(where);
+ return where;
+}
+
+static int attach_unit_file(
+ const LookupPaths *paths,
+ const char *image_path,
+ ImageType type,
+ const PortableMetadata *m,
+ const char *profile,
+ PortableFlags flags,
+ PortableChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(unlink_and_freep) char *chroot_dropin = NULL, *profile_dropin = NULL;
+ _cleanup_(rmdir_and_freep) char *dropin_dir = NULL;
+ const char *where, *path;
+ int r;
+
+ assert(paths);
+ assert(image_path);
+ assert(m);
+ assert(PORTABLE_METADATA_IS_UNIT(m));
+
+ where = attached_path(paths, flags);
+
+ (void) mkdir_parents(where, 0755);
+ if (mkdir(where, 0755) < 0) {
+ if (errno != EEXIST)
+ return -errno;
+ } else
+ (void) portable_changes_add(changes, n_changes, PORTABLE_MKDIR, where, NULL);
+
+ path = strjoina(where, "/", m->name);
+ dropin_dir = strjoin(path, ".d");
+ if (!dropin_dir)
+ return -ENOMEM;
+
+ if (mkdir(dropin_dir, 0755) < 0) {
+ if (errno != EEXIST)
+ return -errno;
+ } else
+ (void) portable_changes_add(changes, n_changes, PORTABLE_MKDIR, dropin_dir, NULL);
+
+ /* We install the drop-ins first, and the actual unit file last to achieve somewhat atomic behaviour if PID 1
+ * is reloaded while we are creating things here: as long as only the drop-ins exist the unit doesn't exist at
+ * all for PID 1. */
+
+ r = install_chroot_dropin(image_path, type, m, dropin_dir, &chroot_dropin, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ r = install_profile_dropin(image_path, m, dropin_dir, profile, flags, &profile_dropin, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ if ((flags & PORTABLE_PREFER_SYMLINK) && m->source) {
+
+ if (symlink(m->source, path) < 0)
+ return log_debug_errno(errno, "Failed to symlink unit file '%s': %m", path);
+
+ (void) portable_changes_add(changes, n_changes, PORTABLE_SYMLINK, path, m->source);
+
+ } else {
+ _cleanup_(unlink_and_freep) char *tmp = NULL;
+ _cleanup_close_ int fd = -1;
+
+ fd = open_tmpfile_linkable(where, O_WRONLY|O_CLOEXEC, &tmp);
+ if (fd < 0)
+ return log_debug_errno(fd, "Failed to create unit file '%s': %m", path);
+
+ r = copy_bytes(m->fd, fd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to copy unit file '%s': %m", path);
+
+ if (fchmod(fd, 0644) < 0)
+ return log_debug_errno(errno, "Failed to change unit file access mode for '%s': %m", path);
+
+ r = link_tmpfile(fd, tmp, path);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to install unit file '%s': %m", path);
+
+ tmp = mfree(tmp);
+
+ (void) portable_changes_add(changes, n_changes, PORTABLE_COPY, path, m->source);
+ }
+
+ /* All is established now, now let's disable any rollbacks */
+ chroot_dropin = mfree(chroot_dropin);
+ profile_dropin = mfree(profile_dropin);
+ dropin_dir = mfree(dropin_dir);
+
+ return 0;
+}
+
+static int image_symlink(
+ const char *image_path,
+ PortableFlags flags,
+ char **ret) {
+
+ const char *fn, *where;
+ char *joined = NULL;
+
+ assert(image_path);
+ assert(ret);
+
+ fn = last_path_component(image_path);
+
+ if (flags & PORTABLE_RUNTIME)
+ where = "/run/portables/";
+ else
+ where = "/etc/portables/";
+
+ joined = strjoin(where, fn);
+ if (!joined)
+ return -ENOMEM;
+
+ *ret = joined;
+ return 0;
+}
+
+static int install_image_symlink(
+ const char *image_path,
+ PortableFlags flags,
+ PortableChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *sl = NULL;
+ int r;
+
+ assert(image_path);
+
+ /* If the image is outside of the image search also link it into it, so that it can be found with short image
+ * names and is listed among the images. */
+
+ if (image_in_search_path(IMAGE_PORTABLE, image_path))
+ return 0;
+
+ r = image_symlink(image_path, flags, &sl);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to generate image symlink path: %m");
+
+ (void) mkdir_parents(sl, 0755);
+
+ if (symlink(image_path, sl) < 0)
+ return log_debug_errno(errno, "Failed to link %s %s %s: %m", image_path, special_glyph(SPECIAL_GLYPH_ARROW), sl);
+
+ (void) portable_changes_add(changes, n_changes, PORTABLE_SYMLINK, sl, image_path);
+ return 0;
+}
+
+int portable_attach(
+ sd_bus *bus,
+ const char *name_or_path,
+ char **matches,
+ const char *profile,
+ PortableFlags flags,
+ PortableChange **changes,
+ size_t *n_changes,
+ sd_bus_error *error) {
+
+ _cleanup_hashmap_free_ Hashmap *unit_files = NULL;
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(image_unrefp) Image *image = NULL;
+ PortableMetadata *item;
+ Iterator iterator;
+ int r;
+
+ assert(name_or_path);
+
+ r = image_find_harder(IMAGE_PORTABLE, name_or_path, &image);
+ if (r < 0)
+ return r;
+
+ r = portable_extract_by_path(image->path, matches, NULL, &unit_files, error);
+ if (r < 0)
+ return r;
+
+ r = lookup_paths_init(&paths, UNIT_FILE_SYSTEM, LOOKUP_PATHS_SPLIT_USR, NULL);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(item, unit_files, iterator) {
+ r = unit_file_exists(UNIT_FILE_SYSTEM, &paths, item->name);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to determine whether unit '%s' exists on the host: %m", item->name);
+ if (r > 0)
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS, "Unit file '%s' exists on the host already, refusing.", item->name);
+
+ r = unit_file_is_active(bus, item->name, error);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS, "Unit file '%s' is active already, refusing.", item->name);
+ }
+
+ HASHMAP_FOREACH(item, unit_files, iterator) {
+ r = attach_unit_file(&paths, image->path, image->type, item, profile, flags, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+
+ /* We don't care too much for the image symlink, it's just a convenience thing, it's not necessary for proper
+ * operation otherwise. */
+ (void) install_image_symlink(image->path, flags, changes, n_changes);
+
+ return 0;
+}
+
+static bool marker_matches_image(const char *marker, const char *name_or_path) {
+ const char *a;
+
+ assert(marker);
+ assert(name_or_path);
+
+ a = last_path_component(marker);
+
+ if (image_name_is_valid(name_or_path)) {
+ const char *e;
+
+ /* We shall match against an image name. In that case let's compare the last component, and optionally
+ * allow either a suffix of ".raw" or a series of "/". */
+
+ e = startswith(a, name_or_path);
+ if (!e)
+ return false;
+
+ return
+ e[strspn(e, "/")] == 0 ||
+ streq(e, ".raw");
+ } else {
+ const char *b;
+ size_t l;
+
+ /* We shall match against a path. Let's ignore any prefix here though, as often there are many ways to
+ * reach the same file. However, in this mode, let's validate any file suffix. */
+
+ l = strcspn(a, "/");
+ b = last_path_component(name_or_path);
+
+ if (strcspn(b, "/") != l)
+ return false;
+
+ return memcmp(a, b, l) == 0;
+ }
+}
+
+static int test_chroot_dropin(
+ DIR *d,
+ const char *where,
+ const char *fname,
+ const char *name_or_path,
+ char **ret_marker) {
+
+ _cleanup_free_ char *line = NULL, *marker = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *p, *e, *k;
+ int r;
+
+ assert(d);
+ assert(where);
+ assert(fname);
+
+ /* We recognize unis created from portable images via the drop-in we created for them */
+
+ p = strjoina(fname, ".d/20-portable.conf");
+ fd = openat(dirfd(d), p, O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_debug_errno(errno, "Failed to open %s/%s: %m", where, p);
+ }
+
+ f = fdopen(fd, "r");
+ if (!f)
+ return log_debug_errno(errno, "Failed to convert file handle: %m");
+ fd = -1;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read from %s/%s: %m", where, p);
+
+ e = startswith(line, PORTABLE_DROPIN_MARKER_BEGIN);
+ if (!e)
+ return 0;
+
+ k = endswith(e, PORTABLE_DROPIN_MARKER_END);
+ if (!k)
+ return 0;
+
+ marker = strndup(e, k - e);
+ if (!marker)
+ return -ENOMEM;
+
+ if (!name_or_path)
+ r = true;
+ else
+ r = marker_matches_image(marker, name_or_path);
+
+ if (ret_marker)
+ *ret_marker = TAKE_PTR(marker);
+
+ return r;
+}
+
+int portable_detach(
+ sd_bus *bus,
+ const char *name_or_path,
+ PortableFlags flags,
+ PortableChange **changes,
+ size_t *n_changes,
+ sd_bus_error *error) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_set_free_free_ Set *unit_files = NULL, *markers = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ const char *where, *item;
+ Iterator iterator;
+ struct dirent *de;
+ int ret = 0;
+ int r;
+
+ assert(name_or_path);
+
+ r = lookup_paths_init(&paths, UNIT_FILE_SYSTEM, LOOKUP_PATHS_SPLIT_USR, NULL);
+ if (r < 0)
+ return r;
+
+ where = attached_path(&paths, flags);
+
+ d = opendir(where);
+ if (!d) {
+ if (errno == ENOENT)
+ goto not_found;
+
+ return log_debug_errno(errno, "Failed to open '%s' directory: %m", where);
+ }
+
+ unit_files = set_new(&string_hash_ops);
+ if (!unit_files)
+ return -ENOMEM;
+
+ markers = set_new(&path_hash_ops);
+ if (!markers)
+ return -ENOMEM;
+
+ FOREACH_DIRENT(de, d, return log_debug_errno(errno, "Failed to enumerate '%s' directory: %m", where)) {
+ _cleanup_free_ char *marker = NULL;
+ UnitFileState state;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ /* Filter out duplicates */
+ if (set_get(unit_files, de->d_name))
+ continue;
+
+ dirent_ensure_type(d, de);
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ r = test_chroot_dropin(d, where, de->d_name, name_or_path, &marker);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = unit_file_lookup_state(UNIT_FILE_SYSTEM, &paths, de->d_name, &state);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine unit file state of '%s': %m", de->d_name);
+ if (!IN_SET(state, UNIT_FILE_STATIC, UNIT_FILE_DISABLED, UNIT_FILE_LINKED, UNIT_FILE_RUNTIME))
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS, "Unit file '%s' is in state '%s', can't detach.", de->d_name, unit_file_state_to_string(state));
+
+ r = unit_file_is_active(bus, de->d_name, error);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS, "Unit file '%s' is active, can't detach.", de->d_name);
+
+ r = set_put_strdup(unit_files, de->d_name);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add unit name '%s' to set: %m", de->d_name);
+
+ if (path_is_absolute(marker) &&
+ !image_in_search_path(IMAGE_PORTABLE, marker)) {
+
+ r = set_ensure_allocated(&markers, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put(markers, marker);
+ if (r >= 0)
+ marker = NULL;
+ else if (r != -EEXIST)
+ return r;
+ }
+ }
+
+ if (set_isempty(unit_files))
+ goto not_found;
+
+ SET_FOREACH(item, unit_files, iterator) {
+ _cleanup_free_ char *md = NULL;
+ const char *suffix;
+
+ if (unlinkat(dirfd(d), item, 0) < 0) {
+ log_debug_errno(errno, "Can't remove unit file %s/%s: %m", where, item);
+
+ if (errno != ENOENT && ret >= 0)
+ ret = -errno;
+ } else
+ portable_changes_add_with_prefix(changes, n_changes, PORTABLE_UNLINK, where, item, NULL);
+
+ FOREACH_STRING(suffix, ".d/10-profile.conf", ".d/20-portable.conf") {
+ _cleanup_free_ char *dropin = NULL;
+
+ dropin = strjoin(item, suffix);
+ if (!dropin)
+ return -ENOMEM;
+
+ if (unlinkat(dirfd(d), dropin, 0) < 0) {
+ log_debug_errno(errno, "Can't remove drop-in %s/%s: %m", where, dropin);
+
+ if (errno != ENOENT && ret >= 0)
+ ret = -errno;
+ } else
+ portable_changes_add_with_prefix(changes, n_changes, PORTABLE_UNLINK, where, dropin, NULL);
+ }
+
+ md = strjoin(item, ".d");
+ if (!md)
+ return -ENOMEM;
+
+ if (unlinkat(dirfd(d), md, AT_REMOVEDIR) < 0) {
+ log_debug_errno(errno, "Can't remove drop-in directory %s/%s: %m", where, md);
+
+ if (errno != ENOENT && ret >= 0)
+ ret = -errno;
+ } else
+ portable_changes_add_with_prefix(changes, n_changes, PORTABLE_UNLINK, where, md, NULL);
+ }
+
+ /* Now, also drop any image symlink, for images outside of the sarch path */
+ SET_FOREACH(item, markers, iterator) {
+ _cleanup_free_ char *sl = NULL;
+ struct stat st;
+
+ r = image_symlink(item, flags, &sl);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine image symlink for '%s', ignoring: %m", item);
+ continue;
+ }
+
+ if (lstat(sl, &st) < 0) {
+ log_debug_errno(errno, "Failed to stat '%s', ignoring: %m", sl);
+ continue;
+ }
+
+ if (!S_ISLNK(st.st_mode)) {
+ log_debug("Image '%s' is not a symlink, ignoring.", sl);
+ continue;
+ }
+
+ if (unlink(sl) < 0) {
+ log_debug_errno(errno, "Can't remove image symlink '%s': %m", sl);
+
+ if (errno != ENOENT && ret >= 0)
+ ret = -errno;
+ } else
+ portable_changes_add(changes, n_changes, PORTABLE_UNLINK, sl, NULL);
+ }
+
+ /* Try to remove the unit file directory, if we can */
+ if (rmdir(where) >= 0)
+ portable_changes_add(changes, n_changes, PORTABLE_UNLINK, where, NULL);
+
+ return ret;
+
+not_found:
+ log_debug("No unit files associated with '%s' found. Image not attached?", name_or_path);
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "No unit files associated with '%s' found. Image not attached?", name_or_path);
+}
+
+static int portable_get_state_internal(
+ sd_bus *bus,
+ const char *name_or_path,
+ PortableFlags flags,
+ PortableState *ret,
+ sd_bus_error *error) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ bool found_enabled = false, found_running = false;
+ _cleanup_set_free_free_ Set *unit_files = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ const char *where;
+ struct dirent *de;
+ int r;
+
+ assert(name_or_path);
+ assert(ret);
+
+ r = lookup_paths_init(&paths, UNIT_FILE_SYSTEM, LOOKUP_PATHS_SPLIT_USR, NULL);
+ if (r < 0)
+ return r;
+
+ where = attached_path(&paths, flags);
+
+ d = opendir(where);
+ if (!d) {
+ if (errno == ENOENT) {
+ /* If the 'attached' directory doesn't exist at all, then we know for sure this image isn't attached. */
+ *ret = PORTABLE_DETACHED;
+ return 0;
+ }
+
+ return log_debug_errno(errno, "Failed to open '%s' directory: %m", where);
+ }
+
+ unit_files = set_new(&string_hash_ops);
+ if (!unit_files)
+ return -ENOMEM;
+
+ FOREACH_DIRENT(de, d, return log_debug_errno(errno, "Failed to enumerate '%s' directory: %m", where)) {
+ UnitFileState state;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ /* Filter out duplicates */
+ if (set_get(unit_files, de->d_name))
+ continue;
+
+ dirent_ensure_type(d, de);
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ r = test_chroot_dropin(d, where, de->d_name, name_or_path, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = unit_file_lookup_state(UNIT_FILE_SYSTEM, &paths, de->d_name, &state);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine unit file state of '%s': %m", de->d_name);
+ if (!IN_SET(state, UNIT_FILE_STATIC, UNIT_FILE_DISABLED, UNIT_FILE_LINKED, UNIT_FILE_LINKED_RUNTIME))
+ found_enabled = true;
+
+ r = unit_file_is_active(bus, de->d_name, error);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ found_running = true;
+
+ r = set_put_strdup(unit_files, de->d_name);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add unit name '%s' to set: %m", de->d_name);
+ }
+
+ *ret = found_running ? (!set_isempty(unit_files) && (flags & PORTABLE_RUNTIME) ? PORTABLE_RUNNING_RUNTIME : PORTABLE_RUNNING) :
+ found_enabled ? (flags & PORTABLE_RUNTIME ? PORTABLE_ENABLED_RUNTIME : PORTABLE_ENABLED) :
+ !set_isempty(unit_files) ? (flags & PORTABLE_RUNTIME ? PORTABLE_ATTACHED_RUNTIME : PORTABLE_ATTACHED) : PORTABLE_DETACHED;
+
+ return 0;
+}
+
+int portable_get_state(
+ sd_bus *bus,
+ const char *name_or_path,
+ PortableFlags flags,
+ PortableState *ret,
+ sd_bus_error *error) {
+
+ PortableState state;
+ int r;
+
+ assert(name_or_path);
+ assert(ret);
+
+ /* We look for matching units twice: once in the regular directories, and once in the runtime directories — but
+ * the latter only if we didn't find anything in the former. */
+
+ r = portable_get_state_internal(bus, name_or_path, flags & ~PORTABLE_RUNTIME, &state, error);
+ if (r < 0)
+ return r;
+
+ if (state == PORTABLE_DETACHED) {
+ r = portable_get_state_internal(bus, name_or_path, flags | PORTABLE_RUNTIME, &state, error);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = state;
+ return 0;
+}
+
+int portable_get_profiles(char ***ret) {
+ assert(ret);
+
+ return conf_files_list_nulstr(ret, NULL, NULL, CONF_FILES_DIRECTORY|CONF_FILES_BASENAME|CONF_FILES_FILTER_MASKED, profile_dirs);
+}
+
+static const char* const portable_change_type_table[_PORTABLE_CHANGE_TYPE_MAX] = {
+ [PORTABLE_COPY] = "copy",
+ [PORTABLE_MKDIR] = "mkdir",
+ [PORTABLE_SYMLINK] = "symlink",
+ [PORTABLE_UNLINK] = "unlink",
+ [PORTABLE_WRITE] = "write",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(portable_change_type, PortableChangeType);
+
+static const char* const portable_state_table[_PORTABLE_STATE_MAX] = {
+ [PORTABLE_DETACHED] = "detached",
+ [PORTABLE_ATTACHED] = "attached",
+ [PORTABLE_ATTACHED_RUNTIME] = "attached-runtime",
+ [PORTABLE_ENABLED] = "enabled",
+ [PORTABLE_ENABLED_RUNTIME] = "enabled-runtime",
+ [PORTABLE_RUNNING] = "running",
+ [PORTABLE_RUNNING_RUNTIME] = "running-runtime",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(portable_state, PortableState);
diff --git a/src/portable/portable.h b/src/portable/portable.h
new file mode 100644
index 0000000..ad81b58
--- /dev/null
+++ b/src/portable/portable.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "hashmap.h"
+#include "macro.h"
+#include "set.h"
+#include "string-util.h"
+
+typedef struct PortableMetadata {
+ int fd;
+ char *source;
+ char name[];
+} PortableMetadata;
+
+#define PORTABLE_METADATA_IS_OS_RELEASE(m) (streq((m)->name, "/etc/os-release"))
+#define PORTABLE_METADATA_IS_UNIT(m) (!IN_SET((m)->name[0], 0, '/'))
+
+typedef enum PortableFlags {
+ PORTABLE_PREFER_COPY = 1 << 0,
+ PORTABLE_PREFER_SYMLINK = 1 << 1,
+ PORTABLE_RUNTIME = 1 << 2,
+} PortableFlags;
+
+typedef enum PortableChangeType {
+ PORTABLE_COPY,
+ PORTABLE_SYMLINK,
+ PORTABLE_UNLINK,
+ PORTABLE_WRITE,
+ PORTABLE_MKDIR,
+ _PORTABLE_CHANGE_TYPE_MAX,
+ _PORTABLE_CHANGE_TYPE_INVALID = INT_MIN,
+} PortableChangeType;
+
+typedef enum PortableState {
+ PORTABLE_DETACHED,
+ PORTABLE_ATTACHED,
+ PORTABLE_ATTACHED_RUNTIME,
+ PORTABLE_ENABLED,
+ PORTABLE_ENABLED_RUNTIME,
+ PORTABLE_RUNNING,
+ PORTABLE_RUNNING_RUNTIME,
+ _PORTABLE_STATE_MAX,
+ _PORTABLE_STATE_INVALID = -1
+} PortableState;
+
+typedef struct PortableChange {
+ int type; /* PortableFileChangeType or negative error number */
+ char *path;
+ char *source;
+} PortableChange;
+
+PortableMetadata *portable_metadata_unref(PortableMetadata *i);
+DEFINE_TRIVIAL_CLEANUP_FUNC(PortableMetadata*, portable_metadata_unref);
+
+int portable_metadata_hashmap_to_sorted_array(Hashmap *unit_files, PortableMetadata ***ret);
+
+int portable_extract(const char *image, char **matches, PortableMetadata **ret_os_release, Hashmap **ret_unit_files, sd_bus_error *error);
+
+int portable_attach(sd_bus *bus, const char *name_or_path, char **matches, const char *profile, PortableFlags flags, PortableChange **changes, size_t *n_changes, sd_bus_error *error);
+int portable_detach(sd_bus *bus, const char *name_or_path, PortableFlags flags, PortableChange **changes, size_t *n_changes, sd_bus_error *error);
+
+int portable_get_state(sd_bus *bus, const char *name_or_path, PortableFlags flags, PortableState *ret, sd_bus_error *error);
+
+int portable_get_profiles(char ***ret);
+
+void portable_changes_free(PortableChange *changes, size_t n_changes);
+
+const char *portable_change_type_to_string(PortableChangeType t) _const_;
+PortableChangeType portable_change_type_from_string(const char *t) _pure_;
+
+const char *portable_state_to_string(PortableState t) _const_;
+PortableState portable_state_from_string(const char *t) _pure_;
diff --git a/src/portable/portablectl.c b/src/portable/portablectl.c
new file mode 100644
index 0000000..bb6cebd
--- /dev/null
+++ b/src/portable/portablectl.c
@@ -0,0 +1,969 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "format-table.h"
+#include "fs-util.h"
+#include "locale-util.h"
+#include "machine-image.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "spawn-polkit-agent.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "verbs.h"
+
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static bool arg_ask_password = true;
+static bool arg_quiet = false;
+static const char *arg_profile = "default";
+static const char* arg_copy_mode = NULL;
+static bool arg_runtime = false;
+static bool arg_reload = true;
+static bool arg_cat = false;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+
+static int determine_image(const char *image, bool permit_non_existing, char **ret) {
+ int r;
+
+ /* If the specified name is a valid image name, we pass it as-is to portabled, which will search for it in the
+ * usual search directories. Otherwise we presume it's a path, and will normalize it on the client's side
+ * (among other things, to make the path independent of the client's working directory) before passing it
+ * over. */
+
+ if (image_name_is_valid(image)) {
+ char *c;
+
+ if (!arg_quiet && laccess(image, F_OK) >= 0)
+ log_warning("Ambiguous invocation: current working directory contains file matching non-path argument '%s', ignoring. "
+ "Prefix argument with './' to force reference to file in current working directory.", image);
+
+ c = strdup(image);
+ if (!c)
+ return log_oom();
+
+ *ret = c;
+ return 0;
+ }
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Operations on images by path not supported when connecting to remote systems.");
+
+ r = chase_symlinks(image, NULL, CHASE_TRAIL_SLASH | (permit_non_existing ? CHASE_NONEXISTENT : 0), ret);
+ if (r < 0)
+ return log_error_errno(r, "Cannot normalize specified image path '%s': %m", image);
+
+ return 0;
+}
+
+static int extract_prefix(const char *path, char **ret) {
+ _cleanup_free_ char *name = NULL;
+ const char *bn, *underscore;
+ size_t m;
+
+ bn = basename(path);
+
+ underscore = strchr(bn, '_');
+ if (underscore)
+ m = underscore - bn;
+ else {
+ const char *e;
+
+ e = endswith(bn, ".raw");
+ if (!e)
+ e = strchr(bn, 0);
+
+ m = e - bn;
+ }
+
+ name = strndup(bn, m);
+ if (!name)
+ return -ENOMEM;
+
+ /* A slightly reduced version of what's permitted in unit names. With ':' and '\' are removed, as well as '_'
+ * which we use as delimiter for the second part of the image string, which we ignore for now. */
+ if (!in_charset(name, DIGITS LETTERS "-."))
+ return -EINVAL;
+
+ if (!filename_is_valid(name))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(name);
+
+ return 0;
+}
+
+static int determine_matches(const char *image, char **l, bool allow_any, char ***ret) {
+ _cleanup_strv_free_ char **k = NULL;
+ int r;
+
+ /* Determine the matches to apply. If the list is empty we derive the match from the image name. If the list
+ * contains exactly the "-" we return a wildcard list (which is the empty list), but only if this is expressly
+ * permitted. */
+
+ if (strv_isempty(l)) {
+ char *prefix;
+
+ r = extract_prefix(image, &prefix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract prefix of image name '%s': %m", image);
+
+ if (!arg_quiet)
+ log_info("(Matching unit files with prefix '%s'.)", prefix);
+
+ r = strv_consume(&k, prefix);
+ if (r < 0)
+ return log_oom();
+
+ } else if (strv_equal(l, STRV_MAKE("-"))) {
+
+ if (!allow_any)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Refusing all unit file match.");
+
+ if (!arg_quiet)
+ log_info("(Matching all unit files.)");
+ } else {
+
+ k = strv_copy(l);
+ if (!k)
+ return log_oom();
+
+ if (!arg_quiet) {
+ _cleanup_free_ char *joined = NULL;
+
+ joined = strv_join(k, "', '");
+ if (!joined)
+ return log_oom();
+
+ log_info("(Matching unit files with prefixes '%s'.)", joined);
+ }
+ }
+
+ *ret = TAKE_PTR(k);
+
+ return 0;
+}
+
+static int acquire_bus(sd_bus **bus) {
+ int r;
+
+ assert(bus);
+
+ if (*bus)
+ return 0;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to bus: %m");
+
+ (void) sd_bus_set_allow_interactive_authorization(*bus, arg_ask_password);
+
+ return 0;
+}
+
+static int maybe_reload(sd_bus **bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ if (!arg_reload)
+ return 0;
+
+ r = acquire_bus(bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_call(
+ *bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Reload");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Reloading the daemon may take long, hence set a longer timeout here */
+ r = sd_bus_call(*bus, m, DEFAULT_TIMEOUT_USEC * 2, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reload daemon: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int inspect_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_strv_free_ char **matches = NULL;
+ _cleanup_free_ char *image = NULL;
+ bool nl = false, header = false;
+ const void *data;
+ const char *path;
+ size_t sz;
+ int r;
+
+ r = determine_image(argv[1], false, &image);
+ if (r < 0)
+ return r;
+
+ r = determine_matches(argv[1], argv + 2, true, &matches);
+ if (r < 0)
+ return r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.portable1",
+ "/org/freedesktop/portable1",
+ "org.freedesktop.portable1.Manager",
+ "GetImageMetadata");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", image);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, matches);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to inspect image metadata: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &data, &sz);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ (void) pager_open(arg_pager_flags);
+
+ if (arg_cat) {
+ printf("%s-- OS Release: --%s\n", ansi_highlight(), ansi_normal());
+ fwrite(data, sz, 1, stdout);
+ fflush(stdout);
+ nl = true;
+ } else {
+ _cleanup_free_ char *pretty_portable = NULL, *pretty_os = NULL;
+
+ _cleanup_fclose_ FILE *f;
+
+ f = fmemopen((void*) data, sz, "re");
+ if (!f)
+ return log_error_errno(errno, "Failed to open /etc/os-release buffer: %m");
+
+ r = parse_env_file(f, "/etc/os-release",
+ "PORTABLE_PRETTY_NAME", &pretty_portable,
+ "PRETTY_NAME", &pretty_os);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse /etc/os-release: %m");
+
+ printf("Image:\n\t%s\n"
+ "Portable Service:\n\t%s\n"
+ "Operating System:\n\t%s\n",
+ path,
+ strna(pretty_portable),
+ strna(pretty_os));
+ }
+
+ r = sd_bus_message_enter_container(reply, 'a', "{say}");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ const char *name;
+
+ r = sd_bus_message_enter_container(reply, 'e', "say");
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(reply, "s", &name);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &data, &sz);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (arg_cat) {
+ if (nl)
+ fputc('\n', stdout);
+
+ printf("%s-- Unit file: %s --%s\n", ansi_highlight(), name, ansi_normal());
+ fwrite(data, sz, 1, stdout);
+ fflush(stdout);
+ nl = true;
+ } else {
+ if (!header) {
+ fputs("Unit files:\n", stdout);
+ header = true;
+ }
+
+ fputc('\t', stdout);
+ fputs(name, stdout);
+ fputc('\n', stdout);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+static int print_changes(sd_bus_message *m) {
+ int r;
+
+ if (arg_quiet)
+ return 0;
+
+ r = sd_bus_message_enter_container(m, 'a', "(sss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ const char *type, *path, *source;
+
+ r = sd_bus_message_read(m, "(sss)", &type, &path, &source);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ if (streq(type, "symlink"))
+ log_info("Created symlink %s %s %s.", path, special_glyph(SPECIAL_GLYPH_ARROW), source);
+ else if (streq(type, "copy")) {
+ if (isempty(source))
+ log_info("Copied %s.", path);
+ else
+ log_info("Copied %s %s %s.", source, special_glyph(SPECIAL_GLYPH_ARROW), path);
+ } else if (streq(type, "unlink"))
+ log_info("Removed %s.", path);
+ else if (streq(type, "write"))
+ log_info("Written %s.", path);
+ else if (streq(type, "mkdir"))
+ log_info("Created directory %s.", path);
+ else
+ log_error("Unexpected change: %s/%s/%s", type, path, source);
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int attach_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_strv_free_ char **matches = NULL;
+ _cleanup_free_ char *image = NULL;
+ int r;
+
+ r = determine_image(argv[1], false, &image);
+ if (r < 0)
+ return r;
+
+ r = determine_matches(argv[1], argv + 2, false, &matches);
+ if (r < 0)
+ return r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.portable1",
+ "/org/freedesktop/portable1",
+ "org.freedesktop.portable1.Manager",
+ "AttachImage");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", image);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, matches);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "sbs", arg_profile, arg_runtime, arg_copy_mode);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach image: %s", bus_error_message(&error, r));
+
+ (void) maybe_reload(&bus);
+
+ print_changes(reply);
+ return 0;
+}
+
+static int detach_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *image = NULL;
+ int r;
+
+ r = determine_image(argv[1], true, &image);
+ if (r < 0)
+ return r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.portable1",
+ "/org/freedesktop/portable1",
+ "org.freedesktop.portable1.Manager",
+ "DetachImage",
+ &error,
+ &reply,
+ "sb", image, arg_runtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to detach image: %s", bus_error_message(&error, r));
+
+ (void) maybe_reload(&bus);
+
+ print_changes(reply);
+ return 0;
+}
+
+static int list_images(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ int r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.portable1",
+ "/org/freedesktop/portable1",
+ "org.freedesktop.portable1.Manager",
+ "ListImages",
+ &error,
+ &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list images: %s", bus_error_message(&error, r));
+
+ table = table_new("name", "type", "ro", "crtime", "mtime", "usage", "state");
+ if (!table)
+ return log_oom();
+
+ r = sd_bus_message_enter_container(reply, 'a', "(ssbtttso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ const char *name, *type, *state;
+ uint64_t crtime, mtime, usage;
+ TableCell *cell;
+ bool ro_bool;
+ int ro_int;
+
+ r = sd_bus_message_read(reply, "(ssbtttso)", &name, &type, &ro_int, &crtime, &mtime, &usage, &state, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = table_add_many(table,
+ TABLE_STRING, name,
+ TABLE_STRING, type);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add row to table: %m");
+
+ ro_bool = ro_int;
+ r = table_add_cell(table, &cell, TABLE_BOOLEAN, &ro_bool);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add row to table: %m");
+
+ if (ro_bool) {
+ r = table_set_color(table, cell, ansi_highlight_red());
+ if (r < 0)
+ return log_error_errno(r, "Failed to set table cell color: %m");
+ }
+
+ r = table_add_many(table,
+ TABLE_TIMESTAMP, crtime,
+ TABLE_TIMESTAMP, mtime,
+ TABLE_SIZE, usage);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add row to table: %m");
+
+ r = table_add_cell(table, &cell, TABLE_STRING, state);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add row to table: %m");
+
+ if (!streq(state, "detached")) {
+ r = table_set_color(table, cell, ansi_highlight_green());
+ if (r < 0)
+ return log_error_errno(r, "Failed to set table cell color: %m");
+ }
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (table_get_rows(table) > 1) {
+ r = table_set_sort(table, (size_t) 0, (size_t) -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to sort table: %m");
+
+ table_set_header(table, arg_legend);
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to show table: %m");
+ }
+
+ if (arg_legend) {
+ if (table_get_rows(table) > 1)
+ printf("\n%zu images listed.\n", table_get_rows(table) - 1);
+ else
+ printf("No images.\n");
+ }
+
+ return 0;
+}
+
+static int remove_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r, i;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (i = 1; i < argc; i++) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.portable1",
+ "/org/freedesktop/portable1",
+ "org.freedesktop.portable1.Manager",
+ "RemoveImage");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", argv[i]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* This is a slow operation, hence turn off any method call timeouts */
+ r = sd_bus_call(bus, m, USEC_INFINITY, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not remove image: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int read_only_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int b = true, r;
+
+ if (argc > 2) {
+ b = parse_boolean(argv[2]);
+ if (b < 0)
+ return log_error_errno(b, "Failed to parse boolean argument: %s", argv[2]);
+ }
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.portable1",
+ "/org/freedesktop/portable1",
+ "org.freedesktop.portable1.Manager",
+ "MarkImageReadOnly",
+ &error,
+ NULL,
+ "sb", argv[1], b);
+ if (r < 0)
+ return log_error_errno(r, "Could not mark image read-only: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int set_limit(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ uint64_t limit;
+ int r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (STR_IN_SET(argv[argc-1], "-", "none", "infinity"))
+ limit = (uint64_t) -1;
+ else {
+ r = parse_size(argv[argc-1], 1024, &limit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse size: %s", argv[argc-1]);
+ }
+
+ if (argc > 2)
+ /* With two arguments changes the quota limit of the specified image */
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.portable1",
+ "/org/freedesktop/portable1",
+ "org.freedesktop.portable1.Manager",
+ "SetImageLimit",
+ &error,
+ NULL,
+ "st", argv[1], limit);
+ else
+ /* With one argument changes the pool quota limit */
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.portable1",
+ "/org/freedesktop/portable1",
+ "org.freedesktop.portable1.Manager",
+ "SetPoolLimit",
+ &error,
+ NULL,
+ "t", limit);
+
+ if (r < 0)
+ return log_error_errno(r, "Could not set limit: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int is_image_attached(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *image = NULL;
+ const char *state;
+ int r;
+
+ r = determine_image(argv[1], true, &image);
+ if (r < 0)
+ return r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.portable1",
+ "/org/freedesktop/portable1",
+ "org.freedesktop.portable1.Manager",
+ "GetImageState",
+ &error,
+ &reply,
+ "s", image);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get image state: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &state);
+ if (r < 0)
+ return r;
+
+ if (!arg_quiet)
+ puts(state);
+
+ return streq(state, "detached");
+}
+
+static int dump_profiles(void) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ char **i;
+ int r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_get_property_strv(
+ bus,
+ "org.freedesktop.portable1",
+ "/org/freedesktop/portable1",
+ "org.freedesktop.portable1.Manager",
+ "Profiles",
+ &error,
+ &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire list of profiles: %s", bus_error_message(&error, r));
+
+ if (arg_legend)
+ log_info("Available unit profiles:");
+
+ STRV_FOREACH(i, l) {
+ fputs(*i, stdout);
+ fputc('\n', stdout);
+ }
+
+ return 0;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("portablectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Attach or detach portable services from the local system.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " --no-ask-password Do not ask for system passwords\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " -q --quiet Suppress informational messages\n"
+ " -p --profile=PROFILE Pick security profile for portable service\n"
+ " --copy=copy|auto|symlink Prefer copying or symlinks if possible\n"
+ " --runtime Attach portable service until next reboot only\n"
+ " --no-reload Don't reload the system and service manager\n"
+ " --cat When inspecting include unit and os-release file\n"
+ " contents\n\n"
+ "Commands:\n"
+ " list List available portable service images\n"
+ " attach NAME|PATH [PREFIX...]\n"
+ " Attach the specified portable service image\n"
+ " detach NAME|PATH Detach the specified portable service image\n"
+ " inspect NAME|PATH [PREFIX...]\n"
+ " Show details of specified portable service image\n"
+ " is-attached NAME|PATH Query if portable service image is attached\n"
+ " read-only NAME|PATH [BOOL] Mark or unmark portable service image read-only\n"
+ " remove NAME|PATH... Remove a portable service image\n"
+ " set-limit [NAME|PATH] Set image or pool size limit (disk quota)\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_NO_ASK_PASSWORD,
+ ARG_COPY,
+ ARG_RUNTIME,
+ ARG_NO_RELOAD,
+ ARG_CAT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "quiet", no_argument, NULL, 'q' },
+ { "profile", required_argument, NULL, 'p' },
+ { "copy", required_argument, NULL, ARG_COPY },
+ { "runtime", no_argument, NULL, ARG_RUNTIME },
+ { "no-reload", no_argument, NULL, ARG_NO_RELOAD },
+ { "cat", no_argument, NULL, ARG_CAT },
+ {}
+ };
+
+ assert(argc >= 0);
+ assert(argv);
+
+ for (;;) {
+ int c;
+
+ c = getopt_long(argc, argv, "hH:M:qp:", options, NULL);
+ if (c < 0)
+ break;
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case 'p':
+ if (streq(optarg, "help"))
+ return dump_profiles();
+
+ if (!filename_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unit profile name not valid: %s", optarg);
+
+ arg_profile = optarg;
+ break;
+
+ case ARG_COPY:
+ if (streq(optarg, "auto"))
+ arg_copy_mode = NULL;
+ else if (STR_IN_SET(optarg, "copy", "symlink"))
+ arg_copy_mode = optarg;
+ else if (streq(optarg, "help")) {
+ puts("auto\n"
+ "copy\n"
+ "symlink");
+ return 0;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --copy= argument: %s", optarg);
+
+ break;
+
+ case ARG_RUNTIME:
+ arg_runtime = true;
+ break;
+
+ case ARG_NO_RELOAD:
+ arg_reload = false;
+ break;
+
+ case ARG_CAT:
+ arg_cat = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "list", VERB_ANY, 1, VERB_DEFAULT, list_images },
+ { "attach", 2, VERB_ANY, 0, attach_image },
+ { "detach", 2, 2, 0, detach_image },
+ { "inspect", 2, VERB_ANY, 0, inspect_image },
+ { "is-attached", 2, 2, 0, is_image_attached },
+ { "read-only", 2, 3, 0, read_only_image },
+ { "remove", 2, VERB_ANY, 0, remove_image },
+ { "set-limit", 3, 3, 0, set_limit },
+ {}
+ };
+
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/portable/portabled-bus.c b/src/portable/portabled-bus.c
new file mode 100644
index 0000000..3cbdb0b
--- /dev/null
+++ b/src/portable/portabled-bus.c
@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "bus-common-errors.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "machine-image.h"
+#include "missing_capability.h"
+#include "portable.h"
+#include "portabled-bus.h"
+#include "portabled-image-bus.h"
+#include "portabled-image.h"
+#include "portabled.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int property_get_pool_path(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ assert(bus);
+ assert(reply);
+
+ return sd_bus_message_append(reply, "s", "/var/lib/portables");
+}
+
+static int property_get_pool_usage(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_ int fd = -1;
+ uint64_t usage = (uint64_t) -1;
+
+ assert(bus);
+ assert(reply);
+
+ fd = open("/var/lib/portables", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd >= 0) {
+ BtrfsQuotaInfo q;
+
+ if (btrfs_subvol_get_subtree_quota_fd(fd, 0, &q) >= 0)
+ usage = q.referenced;
+ }
+
+ return sd_bus_message_append(reply, "t", usage);
+}
+
+static int property_get_pool_limit(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_ int fd = -1;
+ uint64_t size = (uint64_t) -1;
+
+ assert(bus);
+ assert(reply);
+
+ fd = open("/var/lib/portables", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd >= 0) {
+ BtrfsQuotaInfo q;
+
+ if (btrfs_subvol_get_subtree_quota_fd(fd, 0, &q) >= 0)
+ size = q.referenced_max;
+ }
+
+ return sd_bus_message_append(reply, "t", size);
+}
+
+static int property_get_profiles(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = portable_get_profiles(&l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append_strv(reply, l);
+}
+
+static int method_get_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ const char *name;
+ Image *image;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_image_acquire(m, message, name, NULL, BUS_IMAGE_REFUSE_BY_PATH, NULL, &image, error);
+ if (r < 0)
+ return r;
+
+ r = bus_image_path(image, &p);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_list_images(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_hashmap_free_ Hashmap *images = NULL;
+ Manager *m = userdata;
+ Image *image;
+ Iterator i;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ images = hashmap_new(&image_hash_ops);
+ if (!images)
+ return -ENOMEM;
+
+ r = manager_image_cache_discover(m, images, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssbtttso)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(image, images, i) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_state = SD_BUS_ERROR_NULL;
+ PortableState state = _PORTABLE_STATE_INVALID;
+ _cleanup_free_ char *p = NULL;
+
+ r = bus_image_path(image, &p);
+ if (r < 0)
+ return r;
+
+ r = portable_get_state(
+ sd_bus_message_get_bus(message),
+ image->path,
+ 0,
+ &state,
+ &error_state);
+ if (r < 0)
+ log_debug_errno(r, "Failed to get state of image '%s', ignoring: %s",
+ image->path, bus_error_message(&error_state, r));
+
+ r = sd_bus_message_append(reply, "(ssbtttso)",
+ image->name,
+ image_type_to_string(image->type),
+ image->read_only,
+ image->crtime,
+ image->mtime,
+ image->usage,
+ portable_state_to_string(state),
+ p);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int redirect_method_to_image(
+ Manager *m,
+ sd_bus_message *message,
+ sd_bus_error *error,
+ int (*method)(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error* error)) {
+
+ const char *name_or_path;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(method);
+
+ r = sd_bus_message_read(message, "s", &name_or_path);
+ if (r < 0)
+ return r;
+
+ return method(m, message, name_or_path, NULL, error);
+}
+
+static int method_get_image_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(userdata, message, error, bus_image_common_get_os_release);
+}
+
+static int method_get_image_metadata(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(userdata, message, error, bus_image_common_get_metadata);
+}
+
+static int method_get_image_state(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *name_or_path;
+ PortableState state;
+ int r;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "s", &name_or_path);
+ if (r < 0)
+ return r;
+
+ r = portable_get_state(
+ sd_bus_message_get_bus(message),
+ name_or_path,
+ 0,
+ &state,
+ error);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", portable_state_to_string(state));
+}
+
+static int method_attach_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(userdata, message, error, bus_image_common_attach);
+}
+
+static int method_detach_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ PortableChange *changes = NULL;
+ Manager *m = userdata;
+ size_t n_changes = 0;
+ const char *name_or_path;
+ int r, runtime;
+
+ assert(message);
+ assert(m);
+
+ /* Note that we do not redirect detaching to the image object here, because we want to allow that users can
+ * detach already deleted images too, in case the user already deleted an image before properly detaching
+ * it. */
+
+ r = sd_bus_message_read(message, "sb", &name_or_path, &runtime);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.portable1.attach-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = portable_detach(
+ sd_bus_message_get_bus(message),
+ name_or_path,
+ runtime ? PORTABLE_RUNTIME : 0,
+ &changes,
+ &n_changes,
+ error);
+ if (r < 0)
+ goto finish;
+
+ r = reply_portable_changes(message, changes, n_changes);
+
+finish:
+ portable_changes_free(changes, n_changes);
+ return r;
+}
+
+static int method_remove_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(userdata, message, error, bus_image_common_remove);
+}
+
+static int method_mark_image_read_only(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(userdata, message, error, bus_image_common_mark_read_only);
+}
+
+static int method_set_image_limit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(userdata, message, error, bus_image_common_set_limit);
+}
+
+static int method_set_pool_limit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint64_t limit;
+ int r;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "t", &limit);
+ if (r < 0)
+ return r;
+ if (!FILE_SIZE_VALID_OR_INFINITY(limit))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New limit out of range");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.portable1.manage-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ (void) btrfs_qgroup_set_limit("/var/lib/portables", 0, limit);
+
+ r = btrfs_subvol_set_subtree_quota_limit("/var/lib/portables", 0, limit);
+ if (r == -ENOTTY)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Quota is only supported on btrfs.");
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to adjust quota limit: %m");
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("PoolPath", "s", property_get_pool_path, 0, 0),
+ SD_BUS_PROPERTY("PoolUsage", "t", property_get_pool_usage, 0, 0),
+ SD_BUS_PROPERTY("PoolLimit", "t", property_get_pool_limit, 0, 0),
+ SD_BUS_PROPERTY("Profiles", "as", property_get_profiles, 0, 0),
+ SD_BUS_METHOD("GetImage", "s", "o", method_get_image, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListImages", NULL, "a(ssbtttso)", method_list_images, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetImageOSRelease", "s", "a{ss}", method_get_image_os_release, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetImageState", "s", "s", method_get_image_state, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetImageMetadata", "sas", "saya{say}", method_get_image_metadata, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("AttachImage", "sassbs", "a(sss)", method_attach_image, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("DetachImage", "sb", "a(sss)", method_detach_image, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RemoveImage", "s", NULL, method_remove_image, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("MarkImageReadOnly", "sb", NULL, method_mark_image_read_only, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetImageLimit", "st", NULL, method_set_image_limit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetPoolLimit", "t", NULL, method_set_pool_limit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END
+};
+
+int reply_portable_changes(sd_bus_message *m, const PortableChange *changes, size_t n_changes) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ size_t i;
+ int r;
+
+ assert(m);
+ assert(changes || n_changes == 0);
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(sss)");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n_changes; i++) {
+ r = sd_bus_message_append(reply, "(sss)",
+ portable_change_type_to_string(changes[i].type),
+ changes[i].path,
+ changes[i].source);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
diff --git a/src/portable/portabled-bus.h b/src/portable/portabled-bus.h
new file mode 100644
index 0000000..5810517
--- /dev/null
+++ b/src/portable/portabled-bus.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "portable.h"
+
+extern const sd_bus_vtable manager_vtable[];
+
+int reply_portable_changes(sd_bus_message *m, const PortableChange *changes, size_t n_changes);
diff --git a/src/portable/portabled-image-bus.c b/src/portable/portabled-image-bus.c
new file mode 100644
index 0000000..3605598
--- /dev/null
+++ b/src/portable/portabled-image-bus.c
@@ -0,0 +1,736 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-label.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "machine-image.h"
+#include "missing_capability.h"
+#include "portable.h"
+#include "portabled-bus.h"
+#include "portabled-image-bus.h"
+#include "portabled-image.h"
+#include "portabled.h"
+#include "process-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, image_type, ImageType);
+
+int bus_image_common_get_os_release(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(name_or_path || image);
+ assert(message);
+
+ if (!m) {
+ assert(image);
+ m = image->userdata;
+ }
+
+ r = bus_image_acquire(m,
+ message,
+ name_or_path,
+ image,
+ BUS_IMAGE_AUTHENTICATE_BY_PATH,
+ "org.freedesktop.portable1.inspect-images",
+ &image,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0) /* Will call us back */
+ return 1;
+
+ if (!image->metadata_valid) {
+ r = image_read_metadata(image);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read image metadata: %m");
+ }
+
+ return bus_reply_pair_array(message, image->os_release);
+}
+
+static int bus_image_method_get_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_image_common_get_os_release(NULL, message, NULL, userdata, error);
+}
+
+static int append_fd(sd_bus_message *m, PortableMetadata *d) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *buf = NULL;
+ size_t n;
+ int r;
+
+ assert(m);
+ assert(d);
+ assert(d->fd >= 0);
+
+ f = fdopen(d->fd, "r");
+ if (!f)
+ return -errno;
+
+ d->fd = -1;
+
+ r = read_full_stream(f, &buf, &n);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append_array(m, 'y', buf, n);
+}
+
+int bus_image_common_get_metadata(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ sd_bus_error *error) {
+
+ _cleanup_(portable_metadata_unrefp) PortableMetadata *os_release = NULL;
+ _cleanup_hashmap_free_ Hashmap *unit_files = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ PortableMetadata **sorted = NULL;
+ _cleanup_strv_free_ char **matches = NULL;
+ size_t i;
+ int r;
+
+ assert(name_or_path || image);
+ assert(message);
+
+ if (!m) {
+ assert(image);
+ m = image->userdata;
+ }
+
+ r = sd_bus_message_read_strv(message, &matches);
+ if (r < 0)
+ return r;
+
+ r = bus_image_acquire(m,
+ message,
+ name_or_path,
+ image,
+ BUS_IMAGE_AUTHENTICATE_BY_PATH,
+ "org.freedesktop.portable1.inspect-images",
+ &image,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0) /* Will call us back */
+ return 1;
+
+ r = portable_extract(
+ image->path,
+ matches,
+ &os_release,
+ &unit_files,
+ error);
+ if (r < 0)
+ return r;
+
+ r = portable_metadata_hashmap_to_sorted_array(unit_files, &sorted);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", image->path);
+ if (r < 0)
+ return r;
+
+ r = append_fd(reply, os_release);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{say}");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < hashmap_size(unit_files); i++) {
+
+ r = sd_bus_message_open_container(reply, 'e', "say");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", sorted[i]->name);
+ if (r < 0)
+ return r;
+
+ r = append_fd(reply, sorted[i]);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int bus_image_method_get_metadata(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_image_common_get_metadata(NULL, message, NULL, userdata, error);
+}
+
+static int bus_image_method_get_state(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ PortableState state;
+ int r;
+
+ assert(message);
+ assert(image);
+
+ r = portable_get_state(
+ sd_bus_message_get_bus(message),
+ image->path,
+ 0,
+ &state,
+ error);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", portable_state_to_string(state));
+}
+
+int bus_image_common_attach(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **matches = NULL;
+ PortableChange *changes = NULL;
+ PortableFlags flags = 0;
+ const char *profile, *copy_mode;
+ size_t n_changes = 0;
+ int runtime, r;
+
+ assert(message);
+ assert(name_or_path || image);
+
+ if (!m) {
+ assert(image);
+ m = image->userdata;
+ }
+
+ r = sd_bus_message_read_strv(message, &matches);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "sbs", &profile, &runtime, &copy_mode);
+ if (r < 0)
+ return r;
+
+ if (streq(copy_mode, "symlink"))
+ flags |= PORTABLE_PREFER_SYMLINK;
+ else if (streq(copy_mode, "copy"))
+ flags |= PORTABLE_PREFER_COPY;
+ else if (!isempty(copy_mode))
+ return sd_bus_reply_method_errorf(message, SD_BUS_ERROR_INVALID_ARGS, "Unknown copy mode '%s'", copy_mode);
+
+ if (runtime)
+ flags |= PORTABLE_RUNTIME;
+
+ r = bus_image_acquire(m,
+ message,
+ name_or_path,
+ image,
+ BUS_IMAGE_AUTHENTICATE_ALL,
+ "org.freedesktop.portable1.attach-images",
+ &image,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0) /* Will call us back */
+ return 1;
+
+ r = portable_attach(
+ sd_bus_message_get_bus(message),
+ image->path,
+ matches,
+ profile,
+ flags,
+ &changes,
+ &n_changes,
+ error);
+ if (r < 0)
+ goto finish;
+
+ r = reply_portable_changes(message, changes, n_changes);
+
+finish:
+ portable_changes_free(changes, n_changes);
+ return r;
+}
+
+static int bus_image_method_attach(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_image_common_attach(NULL, message, NULL, userdata, error);
+}
+
+static int bus_image_method_detach(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ PortableChange *changes = NULL;
+ Image *image = userdata;
+ Manager *m = image->userdata;
+ size_t n_changes = 0;
+ int r, runtime;
+
+ assert(message);
+ assert(image);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &runtime);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.portable1.attach-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = portable_detach(
+ sd_bus_message_get_bus(message),
+ image->path,
+ runtime ? PORTABLE_RUNTIME : 0,
+ &changes,
+ &n_changes,
+ error);
+ if (r < 0)
+ goto finish;
+
+ r = reply_portable_changes(message, changes, n_changes);
+
+finish:
+ portable_changes_free(changes, n_changes);
+ return r;
+}
+
+int bus_image_common_remove(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ sd_bus_error *error) {
+
+ _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
+ _cleanup_(sigkill_waitp) pid_t child = 0;
+ PortableState state;
+ int r;
+
+ assert(message);
+ assert(name_or_path || image);
+
+ if (!m) {
+ assert(image);
+ m = image->userdata;
+ }
+
+ if (m->n_operations >= OPERATIONS_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing operations.");
+
+ r = bus_image_acquire(m,
+ message,
+ name_or_path,
+ image,
+ BUS_IMAGE_AUTHENTICATE_ALL,
+ "org.freedesktop.portable1.manage-images",
+ &image,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = portable_get_state(
+ sd_bus_message_get_bus(message),
+ image->path,
+ 0,
+ &state,
+ error);
+ if (r < 0)
+ return r;
+
+ if (state != PORTABLE_DETACHED)
+ return sd_bus_error_set_errnof(error, EBUSY, "Image '%s' is not detached, refusing.", image->path);
+
+ if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
+
+ r = safe_fork("(sd-imgrm)", FORK_RESET_SIGNALS, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+ r = image_remove(image);
+ if (r < 0) {
+ (void) write(errno_pipe_fd[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ r = operation_new(m, child, message, errno_pipe_fd[0], NULL);
+ if (r < 0)
+ return r;
+
+ child = 0;
+ errno_pipe_fd[0] = -1;
+
+ return 1;
+}
+
+static int bus_image_method_remove(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_image_common_remove(NULL, message, NULL, userdata, error);
+}
+
+int bus_image_common_mark_read_only(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ sd_bus_error *error) {
+
+ int r, read_only;
+
+ assert(message);
+ assert(name_or_path || image);
+
+ if (!m) {
+ assert(image);
+ m = image->userdata;
+ }
+
+ r = sd_bus_message_read(message, "b", &read_only);
+ if (r < 0)
+ return r;
+
+ r = bus_image_acquire(m,
+ message,
+ name_or_path,
+ image,
+ BUS_IMAGE_AUTHENTICATE_ALL,
+ "org.freedesktop.portable1.manage-images",
+ &image,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = image_read_only(image, read_only);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int bus_image_method_mark_read_only(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_image_common_mark_read_only(NULL, message, NULL, userdata, error);
+}
+
+int bus_image_common_set_limit(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ sd_bus_error *error) {
+
+ uint64_t limit;
+ int r;
+
+ assert(message);
+ assert(name_or_path || image);
+
+ if (!m) {
+ assert(image);
+ m = image->userdata;
+ }
+
+ r = sd_bus_message_read(message, "t", &limit);
+ if (r < 0)
+ return r;
+ if (!FILE_SIZE_VALID_OR_INFINITY(limit))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New limit out of range");
+
+ r = bus_image_acquire(m,
+ message,
+ name_or_path,
+ image,
+ BUS_IMAGE_AUTHENTICATE_ALL,
+ "org.freedesktop.portable1.manage-images",
+ &image,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = image_set_limit(image, limit);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int bus_image_method_set_limit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_image_common_set_limit(NULL, message, NULL, userdata, error);
+}
+
+const sd_bus_vtable image_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Name", "s", NULL, offsetof(Image, name), 0),
+ SD_BUS_PROPERTY("Path", "s", NULL, offsetof(Image, path), 0),
+ SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Image, type), 0),
+ SD_BUS_PROPERTY("ReadOnly", "b", bus_property_get_bool, offsetof(Image, read_only), 0),
+ SD_BUS_PROPERTY("CreationTimestamp", "t", NULL, offsetof(Image, crtime), 0),
+ SD_BUS_PROPERTY("ModificationTimestamp", "t", NULL, offsetof(Image, mtime), 0),
+ SD_BUS_PROPERTY("Usage", "t", NULL, offsetof(Image, usage), 0),
+ SD_BUS_PROPERTY("Limit", "t", NULL, offsetof(Image, limit), 0),
+ SD_BUS_PROPERTY("UsageExclusive", "t", NULL, offsetof(Image, usage_exclusive), 0),
+ SD_BUS_PROPERTY("LimitExclusive", "t", NULL, offsetof(Image, limit_exclusive), 0),
+ SD_BUS_METHOD("GetOSRelease", NULL, "a{ss}", bus_image_method_get_os_release, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetMedatadata", "as", "saya{say}", bus_image_method_get_metadata, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetState", NULL, "s", bus_image_method_get_state, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Attach", "assbs", "a(sss)", bus_image_method_attach, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Detach", "b", "a(sss)", bus_image_method_detach, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Remove", NULL, NULL, bus_image_method_remove, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("MarkReadOnly", "b", NULL, bus_image_method_mark_read_only, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLimit", "t", NULL, bus_image_method_set_limit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END
+};
+
+int bus_image_path(Image *image, char **ret) {
+ assert(image);
+ assert(ret);
+
+ if (!image->discoverable)
+ return -EINVAL;
+
+ return sd_bus_path_encode("/org/freedesktop/portable1/image", image->name, ret);
+}
+
+int bus_image_acquire(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ ImageAcquireMode mode,
+ const char *polkit_action,
+ Image **ret,
+ sd_bus_error *error) {
+
+ _cleanup_(image_unrefp) Image *loaded = NULL;
+ Image *cached;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(name_or_path || image);
+ assert(mode >= 0);
+ assert(mode < _BUS_IMAGE_ACQUIRE_MODE_MAX);
+ assert(polkit_action || mode == BUS_IMAGE_REFUSE_BY_PATH);
+ assert(ret);
+
+ /* Acquires an 'Image' object if not acquired yet, and enforces necessary authentication while doing so. */
+
+ if (mode == BUS_IMAGE_AUTHENTICATE_ALL) {
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ polkit_action,
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0) { /* Will call us back */
+ *ret = NULL;
+ return 0;
+ }
+ }
+
+ /* Already passed in? */
+ if (image) {
+ *ret = image;
+ return 1;
+ }
+
+ /* Let's see if this image is already cached? */
+ cached = manager_image_cache_get(m, name_or_path);
+ if (cached) {
+ *ret = cached;
+ return 1;
+ }
+
+ if (image_name_is_valid(name_or_path)) {
+
+ /* If it's a short name, let's search for it */
+ r = image_find(IMAGE_PORTABLE, name_or_path, &loaded);
+ if (r == -ENOENT)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_PORTABLE_IMAGE, "No image '%s' found.", name_or_path);
+
+ /* other errors are handled below… */
+ } else {
+ /* Don't accept path if this is always forbidden */
+ if (mode == BUS_IMAGE_REFUSE_BY_PATH)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Expected image name, not path in place of '%s'.", name_or_path);
+
+ if (!path_is_absolute(name_or_path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image name '%s' is not valid or not a valid path.", name_or_path);
+
+ if (!path_is_normalized(name_or_path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image path '%s' is not normalized.", name_or_path);
+
+ if (mode == BUS_IMAGE_AUTHENTICATE_BY_PATH) {
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ polkit_action,
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0) { /* Will call us back */
+ *ret = NULL;
+ return 0;
+ }
+ }
+
+ r = image_from_path(name_or_path, &loaded);
+ }
+ if (r == -EMEDIUMTYPE) {
+ sd_bus_error_setf(error, BUS_ERROR_BAD_PORTABLE_IMAGE_TYPE, "Typ of image '%s' not recognized; supported image types are directories/btrfs subvolumes, block devices, and raw disk image files with suffix '.raw'.", name_or_path);
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ /* Add what we just loaded to the cache. This has as side-effect that the object stays in memory until the
+ * cache is purged again, i.e. at least for the current event loop iteration, which is all we need, and which
+ * means we don't actually need to ref the return object. */
+ r = manager_image_cache_add(m, loaded);
+ if (r < 0)
+ return r;
+
+ *ret = loaded;
+ return 1;
+}
+
+int bus_image_object_find(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ void *userdata,
+ void **found,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *e = NULL;
+ Manager *m = userdata;
+ Image *image = NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/portable1/image", &e);
+ if (r < 0)
+ return 0;
+ if (r == 0)
+ goto not_found;
+
+ r = bus_image_acquire(m, sd_bus_get_current_message(bus), e, NULL, BUS_IMAGE_REFUSE_BY_PATH, NULL, &image, error);
+ if (r == -ENOENT)
+ goto not_found;
+ if (r < 0)
+ return r;
+
+ *found = image;
+ return 1;
+
+not_found:
+ *found = NULL;
+ return 0;
+}
+
+int bus_image_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_hashmap_free_ Hashmap *images = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ size_t n_allocated = 0, n = 0;
+ Manager *m = userdata;
+ Image *image;
+ Iterator i;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(nodes);
+
+ images = hashmap_new(&image_hash_ops);
+ if (!images)
+ return -ENOMEM;
+
+ r = manager_image_cache_discover(m, images, error);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(image, images, i) {
+ char *p;
+
+ r = bus_image_path(image, &p);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC(l, n_allocated, n+2)) {
+ free(p);
+ return -ENOMEM;
+ }
+
+ l[n++] = p;
+ l[n] = NULL;
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
diff --git a/src/portable/portabled-image-bus.h b/src/portable/portabled-image-bus.h
new file mode 100644
index 0000000..825dea2
--- /dev/null
+++ b/src/portable/portabled-image-bus.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "machine-image.h"
+#include "portabled.h"
+
+int bus_image_common_get_os_release(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error *error);
+int bus_image_common_get_metadata(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error *error);
+int bus_image_common_attach(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error *error);
+int bus_image_common_remove(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error *error);
+int bus_image_common_mark_read_only(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error *error);
+int bus_image_common_set_limit(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error *error);
+
+extern const sd_bus_vtable image_vtable[];
+
+int bus_image_path(Image *image, char **ret);
+
+/* So here's some complexity: some of operations can either take an image name, or a fully qualified file system path
+ * to an image. We need to authenticate differently when processing these two: images referenced via simple image names
+ * mean the images are located in the image search path and thus safe for limited read access for unprivileged
+ * clients. For operations on images located anywhere else we need explicit authentication however, so that
+ * unprivileged clients can't make us open arbitrary files in the file system.
+ *
+ * The "Image" bus objects directly represent images in the image search path, but do not exist for path-referenced
+ * images. Hence, when requesting a bus object we need to refuse references by file system path, but still allow
+ * references by image name. Depending on the operation to execute potentially we need to authenticate in all cases. */
+
+typedef enum ImageAcquireMode {
+ BUS_IMAGE_REFUSE_BY_PATH, /* allow by name + prohibit by path */
+ BUS_IMAGE_AUTHENTICATE_BY_PATH, /* allow by name + polkit by path */
+ BUS_IMAGE_AUTHENTICATE_ALL, /* polkit by name + polkit by path */
+ _BUS_IMAGE_ACQUIRE_MODE_MAX,
+ _BUS_IMAGE_ACQUIRE_MODE_INVALID = -1
+} ImageAcquireMode;
+
+int bus_image_acquire(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, ImageAcquireMode mode, const char *polkit_action, Image **ret, sd_bus_error *error);
+
+int bus_image_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
+int bus_image_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error);
diff --git a/src/portable/portabled-image.c b/src/portable/portabled-image.c
new file mode 100644
index 0000000..d95845b
--- /dev/null
+++ b/src/portable/portabled-image.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "portable.h"
+#include "portabled-image.h"
+#include "portabled.h"
+
+Image *manager_image_cache_get(Manager *m, const char *name_or_path) {
+ assert(m);
+
+ return hashmap_get(m->image_cache, name_or_path);
+}
+
+static int image_cache_flush(sd_event_source *s, void *userdata) {
+ Manager *m = userdata;
+
+ assert(s);
+ assert(m);
+
+ hashmap_clear(m->image_cache);
+ return 0;
+}
+
+static int manager_image_cache_initialize(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = hashmap_ensure_allocated(&m->image_cache, &image_hash_ops);
+ if (r < 0)
+ return r;
+
+ /* We flush the cache as soon as we are idle again */
+ if (!m->image_cache_defer_event) {
+ r = sd_event_add_defer(m->event, &m->image_cache_defer_event, image_cache_flush, m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(m->image_cache_defer_event, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_source_set_enabled(m->image_cache_defer_event, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int manager_image_cache_add(Manager *m, Image *image) {
+ int r;
+
+ assert(m);
+
+ /* We add the specified image to the cache under two keys.
+ *
+ * 1. Always under its path
+ *
+ * 2. If the image was discovered in the search path (i.e. its discoverable boolean set) we'll also add it
+ * under its short name.
+ */
+
+ r = manager_image_cache_initialize(m);
+ if (r < 0)
+ return r;
+
+ image->userdata = m;
+
+ r = hashmap_put(m->image_cache, image->path, image);
+ if (r < 0)
+ return r;
+
+ image_ref(image);
+
+ if (image->discoverable) {
+ r = hashmap_put(m->image_cache, image->name, image);
+ if (r < 0)
+ return r;
+
+ image_ref(image);
+ }
+
+ return 0;
+}
+
+int manager_image_cache_discover(Manager *m, Hashmap *images, sd_bus_error *error) {
+ Image *image;
+ Iterator i;
+ int r;
+
+ assert(m);
+
+ /* A wrapper around image_discover() (for finding images in search path) and portable_discover_attached() (for
+ * finding attached images). */
+
+ r = image_discover(IMAGE_PORTABLE, images);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(image, images, i)
+ (void) manager_image_cache_add(m, image);
+
+ return 0;
+}
diff --git a/src/portable/portabled-image.h b/src/portable/portabled-image.h
new file mode 100644
index 0000000..26ce2f6
--- /dev/null
+++ b/src/portable/portabled-image.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "hashmap.h"
+#include "machine-image.h"
+#include "portabled.h"
+
+Image *manager_image_cache_get(Manager *m, const char *name_or_path);
+
+int manager_image_cache_add(Manager *m, Image *image);
+
+int manager_image_cache_discover(Manager *m, Hashmap *images, sd_bus_error *error);
diff --git a/src/portable/portabled-operation.c b/src/portable/portabled-operation.c
new file mode 100644
index 0000000..305c96c
--- /dev/null
+++ b/src/portable/portabled-operation.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "portabled-operation.h"
+#include "process-util.h"
+
+static int operation_done(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Operation *o = userdata;
+ int r;
+
+ assert(o);
+ assert(si);
+
+ log_debug("Operating " PID_FMT " is now complete with code=%s status=%i",
+ o->pid,
+ sigchld_code_to_string(si->si_code), si->si_status);
+
+ o->pid = 0;
+
+ if (si->si_code != CLD_EXITED) {
+ r = sd_bus_error_setf(&error, SD_BUS_ERROR_FAILED, "Child died abnormally.");
+ goto fail;
+ }
+
+ if (si->si_status == EXIT_SUCCESS)
+ r = 0;
+ else if (read(o->errno_fd, &r, sizeof(r)) != sizeof(r)) { /* Try to acquire error code for failed operation */
+ r = sd_bus_error_setf(&error, SD_BUS_ERROR_FAILED, "Child failed.");
+ goto fail;
+ }
+
+ if (o->done) {
+ /* A completion routine is set for this operation, call it. */
+ r = o->done(o, r, &error);
+ if (r < 0) {
+ if (!sd_bus_error_is_set(&error))
+ sd_bus_error_set_errno(&error, r);
+
+ goto fail;
+ }
+
+ } else {
+ /* The default operation when done is to simply return an error on failure or an empty success
+ * message on success. */
+ if (r < 0) {
+ sd_bus_error_set_errno(&error, r);
+ goto fail;
+ }
+
+ r = sd_bus_reply_method_return(o->message, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to reply to message: %m");
+ }
+
+ operation_free(o);
+ return 0;
+
+fail:
+ r = sd_bus_reply_method_error(o->message, &error);
+ if (r < 0)
+ log_error_errno(r, "Failed to reply to message: %m");
+
+ operation_free(o);
+ return 0;
+}
+
+int operation_new(Manager *manager, pid_t child, sd_bus_message *message, int errno_fd, Operation **ret) {
+ Operation *o;
+ int r;
+
+ assert(manager);
+ assert(child > 1);
+ assert(message);
+ assert(errno_fd >= 0);
+
+ o = new0(Operation, 1);
+ if (!o)
+ return -ENOMEM;
+
+ o->extra_fd = -1;
+
+ r = sd_event_add_child(manager->event, &o->event_source, child, WEXITED, operation_done, o);
+ if (r < 0) {
+ free(o);
+ return r;
+ }
+
+ o->pid = child;
+ o->message = sd_bus_message_ref(message);
+ o->errno_fd = errno_fd;
+
+ LIST_PREPEND(operations, manager->operations, o);
+ manager->n_operations++;
+ o->manager = manager;
+
+ log_debug("Started new operation " PID_FMT ".", child);
+
+ /* At this point we took ownership of both the child and the errno file descriptor! */
+
+ if (ret)
+ *ret = o;
+
+ return 0;
+}
+
+Operation *operation_free(Operation *o) {
+ if (!o)
+ return NULL;
+
+ sd_event_source_unref(o->event_source);
+
+ safe_close(o->errno_fd);
+ safe_close(o->extra_fd);
+
+ if (o->pid > 1)
+ (void) sigkill_wait(o->pid);
+
+ sd_bus_message_unref(o->message);
+
+ if (o->manager) {
+ LIST_REMOVE(operations, o->manager->operations, o);
+ o->manager->n_operations--;
+ }
+
+ return mfree(o);
+}
diff --git a/src/portable/portabled-operation.h b/src/portable/portabled-operation.h
new file mode 100644
index 0000000..701f399
--- /dev/null
+++ b/src/portable/portabled-operation.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "list.h"
+
+typedef struct Operation Operation;
+
+#include "portabled.h"
+
+#define OPERATIONS_MAX 64
+
+struct Operation {
+ Manager *manager;
+ pid_t pid;
+ sd_bus_message *message;
+ int errno_fd;
+ int extra_fd;
+ sd_event_source *event_source;
+ int (*done)(Operation *o, int ret, sd_bus_error *error);
+ LIST_FIELDS(Operation, operations);
+};
+
+int operation_new(Manager *manager, pid_t child, sd_bus_message *message, int errno_fd, Operation **ret);
+Operation *operation_free(Operation *o);
diff --git a/src/portable/portabled.c b/src/portable/portabled.c
new file mode 100644
index 0000000..49a359f
--- /dev/null
+++ b/src/portable/portabled.c
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "def.h"
+#include "main-func.h"
+#include "portabled-bus.h"
+#include "portabled-image-bus.h"
+#include "portabled.h"
+#include "process-util.h"
+#include "signal-util.h"
+
+static Manager* manager_unref(Manager *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_unref);
+
+static int manager_new(Manager **ret) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new0(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+static Manager* manager_unref(Manager *m) {
+ assert(m);
+
+ hashmap_free(m->image_cache);
+
+ sd_event_source_unref(m->image_cache_defer_event);
+
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+
+ sd_bus_flush_close_unref(m->bus);
+ sd_event_unref(m->event);
+
+ return mfree(m);
+}
+
+static int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->bus);
+
+ r = sd_bus_default_system(&m->bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_add_object_vtable(m->bus, NULL, "/org/freedesktop/portable1", "org.freedesktop.portable1.Manager", manager_vtable, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add manager object vtable: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/portable1/image", "org.freedesktop.portable1.Image", image_vtable, bus_image_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add image object vtable: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/portable1/image", bus_image_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add image enumerator: %m");
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.portable1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ (void) sd_bus_set_exit_on_disconnect(m->bus, true);
+
+ return 0;
+}
+
+static int manager_startup(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static bool check_idle(void *userdata) {
+ Manager *m = userdata;
+
+ return !m->operations;
+}
+
+static int manager_run(Manager *m) {
+ assert(m);
+
+ return bus_event_loop_with_idle(
+ m->event,
+ m->bus,
+ "org.freedesktop.portable1",
+ DEFAULT_EXIT_USEC,
+ check_idle, m);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc != 1) {
+ log_error("This program takes no arguments.");
+ return -EINVAL;
+ }
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate manager object: %m");
+
+ r = manager_startup(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fully start up daemon: %m");
+
+ log_debug("systemd-portabled running as pid " PID_FMT, getpid_cached());
+ sd_notify(false,
+ "READY=1\n"
+ "STATUS=Processing requests...");
+
+ r = manager_run(m);
+
+ (void) sd_notify(false,
+ "STOPPING=1\n"
+ "STATUS=Shutting down...");
+ log_debug("systemd-portabled stopped as pid " PID_FMT, getpid_cached());
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/portable/portabled.h b/src/portable/portabled.h
new file mode 100644
index 0000000..00461ed
--- /dev/null
+++ b/src/portable/portabled.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "hashmap.h"
+#include "list.h"
+
+typedef struct Manager Manager;
+
+#include "portabled-operation.h"
+
+struct Manager {
+ sd_event *event;
+ sd_bus *bus;
+
+ Hashmap *polkit_registry;
+
+ Hashmap *image_cache;
+ sd_event_source *image_cache_defer_event;
+
+ LIST_HEAD(Operation, operations);
+ unsigned n_operations;
+};
diff --git a/src/portable/profile/default/service.conf b/src/portable/profile/default/service.conf
new file mode 100644
index 0000000..792be50
--- /dev/null
+++ b/src/portable/profile/default/service.conf
@@ -0,0 +1,32 @@
+# The "default" security profile for services, i.e. a number of useful restrictions
+
+[Service]
+MountAPIVFS=yes
+TemporaryFileSystem=/run
+BindReadOnlyPaths=/run/systemd/notify
+BindReadOnlyPaths=/dev/log /run/systemd/journal/socket /run/systemd/journal/stdout
+BindReadOnlyPaths=/etc/machine-id
+BindReadOnlyPaths=/etc/resolv.conf
+BindReadOnlyPaths=/run/dbus/system_bus_socket
+DynamicUser=yes
+RemoveIPC=yes
+CapabilityBoundingSet=CAP_CHOWN CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_FOWNER \
+ CAP_FSETID CAP_IPC_LOCK CAP_IPC_OWNER CAP_KILL CAP_MKNOD CAP_NET_ADMIN \
+ CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_SETGID CAP_SETPCAP \
+ CAP_SETUID CAP_SYS_ADMIN CAP_SYS_CHROOT CAP_SYS_NICE CAP_SYS_RESOURCE
+PrivateTmp=yes
+PrivateDevices=yes
+PrivateUsers=yes
+ProtectSystem=strict
+ProtectHome=yes
+ProtectKernelTunables=yes
+ProtectKernelModules=yes
+ProtectControlGroups=yes
+RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6
+LockPersonality=yes
+MemoryDenyWriteExecute=yes
+RestrictRealtime=yes
+RestrictNamespaces=yes
+SystemCallFilter=@system-service
+SystemCallErrorNumber=EPERM
+SystemCallArchitectures=native
diff --git a/src/portable/profile/nonetwork/service.conf b/src/portable/profile/nonetwork/service.conf
new file mode 100644
index 0000000..c81cebe
--- /dev/null
+++ b/src/portable/profile/nonetwork/service.conf
@@ -0,0 +1,32 @@
+# The "nonetwork" security profile for services, i.e. like "default" but without networking
+
+[Service]
+MountAPIVFS=yes
+TemporaryFileSystem=/run
+BindReadOnlyPaths=/run/systemd/notify
+BindReadOnlyPaths=/dev/log /run/systemd/journal/socket /run/systemd/journal/stdout
+BindReadOnlyPaths=/etc/machine-id
+BindReadOnlyPaths=/run/dbus/system_bus_socket
+DynamicUser=yes
+RemoveIPC=yes
+CapabilityBoundingSet=CAP_CHOWN CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_FOWNER \
+ CAP_FSETID CAP_IPC_LOCK CAP_IPC_OWNER CAP_KILL CAP_MKNOD CAP_SETGID CAP_SETPCAP \
+ CAP_SETUID CAP_SYS_ADMIN CAP_SYS_CHROOT CAP_SYS_NICE CAP_SYS_RESOURCE
+PrivateTmp=yes
+PrivateDevices=yes
+PrivateUsers=yes
+ProtectSystem=strict
+ProtectHome=yes
+ProtectKernelTunables=yes
+ProtectKernelModules=yes
+ProtectControlGroups=yes
+RestrictAddressFamilies=AF_UNIX AF_NETLINK
+LockPersonality=yes
+MemoryDenyWriteExecute=yes
+RestrictRealtime=yes
+RestrictNamespaces=yes
+SystemCallFilter=@system-service
+SystemCallErrorNumber=EPERM
+SystemCallArchitectures=native
+PrivateNetwork=yes
+IPAddressDeny=any
diff --git a/src/portable/profile/strict/service.conf b/src/portable/profile/strict/service.conf
new file mode 100644
index 0000000..d10fb5a
--- /dev/null
+++ b/src/portable/profile/strict/service.conf
@@ -0,0 +1,31 @@
+# The "strict" security profile for services, all options turned on
+
+[Service]
+MountAPIVFS=yes
+TemporaryFileSystem=/run
+BindReadOnlyPaths=/run/systemd/notify
+BindReadOnlyPaths=/dev/log /run/systemd/journal/socket /run/systemd/journal/stdout
+BindReadOnlyPaths=/etc/machine-id
+DynamicUser=yes
+RemoveIPC=yes
+CapabilityBoundingSet=
+PrivateTmp=yes
+PrivateDevices=yes
+PrivateUsers=yes
+ProtectSystem=strict
+ProtectHome=yes
+ProtectKernelTunables=yes
+ProtectKernelModules=yes
+ProtectControlGroups=yes
+RestrictAddressFamilies=AF_UNIX
+LockPersonality=yes
+NoNewPrivileges=yes
+MemoryDenyWriteExecute=yes
+RestrictRealtime=yes
+RestrictNamespaces=yes
+SystemCallFilter=@system-service
+SystemCallErrorNumber=EPERM
+SystemCallArchitectures=native
+PrivateNetwork=yes
+IPAddressDeny=any
+TasksMax=4
diff --git a/src/portable/profile/trusted/service.conf b/src/portable/profile/trusted/service.conf
new file mode 100644
index 0000000..9a6af70
--- /dev/null
+++ b/src/portable/profile/trusted/service.conf
@@ -0,0 +1,7 @@
+# The "trusted" profile for services, i.e. no restrictions are applied
+
+[Service]
+MountAPIVFS=yes
+BindPaths=/run
+BindReadOnlyPaths=/etc/machine-id
+BindReadOnlyPaths=/etc/resolv.conf
diff --git a/src/quotacheck/quotacheck.c b/src/quotacheck/quotacheck.c
new file mode 100644
index 0000000..90f542a
--- /dev/null
+++ b/src/quotacheck/quotacheck.c
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "main-func.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "util.h"
+
+static bool arg_skip = false;
+static bool arg_force = false;
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+
+ if (streq(key, "quotacheck.mode")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (streq(value, "auto"))
+ arg_force = arg_skip = false;
+ else if (streq(value, "force"))
+ arg_force = true;
+ else if (streq(value, "skip"))
+ arg_skip = true;
+ else
+ log_warning("Invalid quotacheck.mode= parameter '%s'. Ignoring.", value);
+ }
+
+#if HAVE_SYSV_COMPAT
+ else if (streq(key, "forcequotacheck") && !value) {
+ log_warning("Please use 'quotacheck.mode=force' rather than 'forcequotacheck' on the kernel command line.");
+ arg_force = true;
+ }
+#endif
+
+ return 0;
+}
+
+static void test_files(void) {
+
+#if HAVE_SYSV_COMPAT
+ if (access("/forcequotacheck", F_OK) >= 0) {
+ log_error("Please pass 'quotacheck.mode=force' on the kernel command line rather than creating /forcequotacheck on the root file system.");
+ arg_force = true;
+ }
+#endif
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_setup_service();
+
+ if (argc > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program takes no arguments.");
+
+ umask(0022);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ test_files();
+
+ if (!arg_force) {
+ if (arg_skip)
+ return 0;
+
+ if (access("/run/systemd/quotacheck", F_OK) < 0)
+ return 0;
+ }
+
+ r = safe_fork("(quotacheck)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_WAIT|FORK_LOG, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ static const char * const cmdline[] = {
+ QUOTACHECK,
+ "-anug",
+ NULL
+ };
+
+ /* Child */
+
+ execv(cmdline[0], (char**) cmdline);
+ _exit(EXIT_FAILURE); /* Operational error */
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/random-seed/random-seed.c b/src/random-seed/random-seed.c
new file mode 100644
index 0000000..0c5f329
--- /dev/null
+++ b/src/random-seed/random-seed.c
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "string-util.h"
+#include "util.h"
+
+#define POOL_SIZE_MIN 512
+#define POOL_SIZE_MAX (10*1024*1024)
+
+static int run(int argc, char *argv[]) {
+ _cleanup_close_ int seed_fd = -1, random_fd = -1;
+ bool read_seed_file, write_seed_file;
+ _cleanup_free_ void* buf = NULL;
+ size_t buf_size = 0;
+ struct stat st;
+ ssize_t k;
+ FILE *f;
+ int r;
+
+ log_setup_service();
+
+ if (argc != 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program requires one argument.");
+
+ umask(0022);
+
+ /* Read pool size, if possible */
+ f = fopen("/proc/sys/kernel/random/poolsize", "re");
+ if (f) {
+ if (fscanf(f, "%zu", &buf_size) > 0)
+ /* poolsize is in bits on 2.6, but we want bytes */
+ buf_size /= 8;
+
+ fclose(f);
+ }
+
+ if (buf_size < POOL_SIZE_MIN)
+ buf_size = POOL_SIZE_MIN;
+
+ r = mkdir_parents_label(RANDOM_SEED, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create directory " RANDOM_SEED_DIR ": %m");
+
+ /* When we load the seed we read it and write it to the device and then immediately update the saved seed with
+ * new data, to make sure the next boot gets seeded differently. */
+
+ if (streq(argv[1], "load")) {
+ int open_rw_error;
+
+ seed_fd = open(RANDOM_SEED, O_RDWR|O_CLOEXEC|O_NOCTTY|O_CREAT, 0600);
+ open_rw_error = -errno;
+ if (seed_fd < 0) {
+ write_seed_file = false;
+
+ seed_fd = open(RANDOM_SEED, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (seed_fd < 0) {
+ bool missing = errno == ENOENT;
+
+ log_full_errno(missing ? LOG_DEBUG : LOG_ERR,
+ open_rw_error, "Failed to open " RANDOM_SEED " for writing: %m");
+ r = log_full_errno(missing ? LOG_DEBUG : LOG_ERR,
+ errno, "Failed to open " RANDOM_SEED " for reading: %m");
+ return missing ? 0 : r;
+ }
+ } else
+ write_seed_file = true;
+
+ random_fd = open("/dev/urandom", O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
+ if (random_fd < 0) {
+ write_seed_file = false;
+
+ random_fd = open("/dev/urandom", O_WRONLY|O_CLOEXEC|O_NOCTTY, 0600);
+ if (random_fd < 0)
+ return log_error_errno(errno, "Failed to open /dev/urandom: %m");
+ }
+
+ read_seed_file = true;
+
+ } else if (streq(argv[1], "save")) {
+
+ random_fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (random_fd < 0)
+ return log_error_errno(errno, "Failed to open /dev/urandom: %m");
+
+ seed_fd = open(RANDOM_SEED, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_CREAT, 0600);
+ if (seed_fd < 0)
+ return log_error_errno(errno, "Failed to open " RANDOM_SEED ": %m");
+
+ read_seed_file = false;
+ write_seed_file = true;
+
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown verb '%s'.", argv[1]);
+
+ if (fstat(seed_fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat() seed file " RANDOM_SEED ": %m");
+
+ /* If the seed file is larger than what we expect, then honour the existing size and save/restore as much as it says */
+ if ((uint64_t) st.st_size > buf_size)
+ buf_size = MIN(st.st_size, POOL_SIZE_MAX);
+
+ buf = malloc(buf_size);
+ if (!buf)
+ return log_oom();
+
+ if (read_seed_file) {
+ sd_id128_t mid;
+ int z;
+
+ k = loop_read(seed_fd, buf, buf_size, false);
+ if (k < 0)
+ r = log_error_errno(k, "Failed to read seed from " RANDOM_SEED ": %m");
+ else if (k == 0) {
+ r = 0;
+ log_debug("Seed file " RANDOM_SEED " not yet initialized, proceeding.");
+ } else {
+ (void) lseek(seed_fd, 0, SEEK_SET);
+
+ r = loop_write(random_fd, buf, (size_t) k, false);
+ if (r < 0)
+ log_error_errno(r, "Failed to write seed to /dev/urandom: %m");
+ }
+
+ /* Let's also write the machine ID into the random seed. Why? As an extra protection against "golden
+ * images" that are put together sloppily, i.e. images which are duplicated on multiple systems but
+ * where the random seed file is not properly reset. Frequently the machine ID is properly reset on
+ * those systems however (simply because it's easier to notice, if it isn't due to address clashes and
+ * so on, while random seed equivalence is generally not noticed easily), hence let's simply write the
+ * machined ID into the random pool too. */
+ z = sd_id128_get_machine(&mid);
+ if (z < 0)
+ log_debug_errno(z, "Failed to get machine ID, ignoring: %m");
+ else {
+ z = loop_write(random_fd, &mid, sizeof(mid), false);
+ if (z < 0)
+ log_debug_errno(z, "Failed to write machine ID to /dev/urandom, ignoring: %m");
+ }
+ }
+
+ if (write_seed_file) {
+ /* This is just a safety measure. Given that we are root and
+ * most likely created the file ourselves the mode and owner
+ * should be correct anyway. */
+ (void) fchmod(seed_fd, 0600);
+ (void) fchown(seed_fd, 0, 0);
+
+ k = loop_read(random_fd, buf, buf_size, false);
+ if (k < 0)
+ return log_error_errno(k, "Failed to read new seed from /dev/urandom: %m");
+ if (k == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Got EOF while reading from /dev/urandom.");
+
+ r = loop_write(seed_fd, buf, (size_t) k, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write new random seed file: %m");
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/rc-local-generator/rc-local-generator.c b/src/rc-local-generator/rc-local-generator.c
new file mode 100644
index 0000000..7a3948e
--- /dev/null
+++ b/src/rc-local-generator/rc-local-generator.c
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "generator.h"
+#include "log.h"
+#include "mkdir.h"
+#include "string-util.h"
+#include "util.h"
+
+static const char *arg_dest = NULL;
+
+/* So you are reading this, and might wonder: why is this implemented as a generator rather than as a plain, statically
+ * enabled service that carries appropriate ConditionFileIsExecutable= lines? The answer is this: conditions bypass
+ * execution of a service's binary, but they have no influence on unit dependencies. Thus, a service that is
+ * conditioned out will still act as synchronization point in the dependency tree, and we'd rather not have that for
+ * these two legacy scripts. */
+
+static int add_symlink(const char *service, const char *where) {
+ const char *from, *to;
+
+ assert(service);
+ assert(where);
+
+ from = strjoina(SYSTEM_DATA_UNIT_PATH "/", service);
+ to = strjoina(arg_dest, "/", where, ".wants/", service);
+
+ (void) mkdir_parents_label(to, 0755);
+
+ if (symlink(from, to) < 0) {
+ if (errno == EEXIST)
+ return 0;
+
+ return log_error_errno(errno, "Failed to create symlink %s: %m", to);
+ }
+
+ return 1;
+}
+
+static int check_executable(const char *path) {
+ assert(path);
+
+ if (access(path, X_OK) < 0) {
+ if (errno == ENOENT)
+ return log_debug_errno(errno, "%s does not exist, skipping.", path);
+ if (errno == EACCES)
+ return log_info_errno(errno, "%s is not marked executable, skipping.", path);
+
+ return log_warning_errno(errno, "Couldn't determine if %s exists and is executable, skipping: %m", path);
+ }
+
+ return 0;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r = 0, k = 0;
+
+ assert_se(arg_dest = dest);
+
+ if (check_executable(RC_LOCAL_SCRIPT_PATH_START) >= 0) {
+ log_debug("Automatically adding rc-local.service.");
+
+ r = add_symlink("rc-local.service", "multi-user.target");
+ }
+
+ if (check_executable(RC_LOCAL_SCRIPT_PATH_STOP) >= 0) {
+ log_debug("Automatically adding halt-local.service.");
+
+ k = add_symlink("halt-local.service", "final.target");
+ }
+
+ return r < 0 ? r : k;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/remount-fs/remount-fs.c b/src/remount-fs/remount-fs.c
new file mode 100644
index 0000000..0bac355
--- /dev/null
+++ b/src/remount-fs/remount-fs.c
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <mntent.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "env-util.h"
+#include "exit-status.h"
+#include "log.h"
+#include "main-func.h"
+#include "mount-setup.h"
+#include "mount-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "strv.h"
+#include "util.h"
+
+/* Goes through /etc/fstab and remounts all API file systems, applying options that are in /etc/fstab that systemd
+ * might not have respected */
+
+static int track_pid(Hashmap **h, const char *path, pid_t pid) {
+ _cleanup_free_ char *c = NULL;
+ int r;
+
+ assert(h);
+ assert(path);
+ assert(pid_is_valid(pid));
+
+ r = hashmap_ensure_allocated(h, NULL);
+ if (r < 0)
+ return log_oom();
+
+ c = strdup(path);
+ if (!c)
+ return log_oom();
+
+ r = hashmap_put(*h, PID_TO_PTR(pid), c);
+ if (r < 0)
+ return log_oom();
+
+ TAKE_PTR(c);
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_hashmap_free_free_ Hashmap *pids = NULL;
+ _cleanup_endmntent_ FILE *f = NULL;
+ bool has_root = false;
+ struct mntent* me;
+ int r;
+
+ log_setup_service();
+
+ if (argc > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program takes no arguments.");
+
+ umask(0022);
+
+ f = setmntent("/etc/fstab", "re");
+ if (!f) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to open /etc/fstab: %m");
+ } else {
+ while ((me = getmntent(f))) {
+ pid_t pid;
+
+ /* Remount the root fs, /usr and all API VFS */
+ if (!mount_point_is_api(me->mnt_dir) &&
+ !PATH_IN_SET(me->mnt_dir, "/", "/usr"))
+ continue;
+
+ log_debug("Remounting %s...", me->mnt_dir);
+
+ if (path_equal(me->mnt_dir, "/"))
+ has_root = true;
+
+ r = safe_fork("(remount)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execv(MOUNT_PATH, STRV_MAKE(MOUNT_PATH, me->mnt_dir, "-o", "remount"));
+ log_error_errno(errno, "Failed to execute " MOUNT_PATH ": %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* Parent */
+ r = track_pid(&pids, me->mnt_dir, pid);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!has_root) {
+ /* The $SYSTEMD_REMOUNT_ROOT_RW environment variable is set by systemd-gpt-auto-generator to tell us
+ * whether to remount things. We honour it only if there's no explicit line in /etc/fstab configured
+ * which takes precedence. */
+
+ r = getenv_bool("SYSTEMD_REMOUNT_ROOT_RW");
+ if (r > 0) {
+ pid_t pid;
+
+ log_debug("Remounting / writable...");
+
+ r = safe_fork("(remount-rw)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execv(MOUNT_PATH, STRV_MAKE(MOUNT_PATH, "/", "-o", "remount,rw"));
+ log_error_errno(errno, "Failed to execute " MOUNT_PATH ": %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ r = track_pid(&pids, "/", pid);
+ if (r < 0)
+ return r;
+
+ } else if (r < 0 && r != -ENXIO)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_REMOUNT_ROOT_RW, ignoring: %m");
+ }
+
+ r = 0;
+ while (!hashmap_isempty(pids)) {
+ _cleanup_free_ char *s = NULL;
+ siginfo_t si = {};
+
+ if (waitid(P_ALL, 0, &si, WEXITED) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return log_error_errno(errno, "waitid() failed: %m");
+ }
+
+ s = hashmap_remove(pids, PID_TO_PTR(si.si_pid));
+ if (s &&
+ !is_clean_exit(si.si_code, si.si_status, EXIT_CLEAN_COMMAND, NULL)) {
+ if (si.si_code == CLD_EXITED)
+ log_error(MOUNT_PATH " for %s exited with exit status %i.", s, si.si_status);
+ else
+ log_error(MOUNT_PATH " for %s terminated by signal %s.", s, signal_to_string(si.si_status));
+
+ r = -ENOEXEC;
+ }
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/reply-password/reply-password.c b/src/reply-password/reply-password.c
new file mode 100644
index 0000000..ee7a0ea
--- /dev/null
+++ b/src/reply-password/reply-password.c
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "util.h"
+
+static int send_on_socket(int fd, const char *socket_name, const void *packet, size_t size) {
+ union sockaddr_union sa = {};
+ int salen;
+
+ assert(fd >= 0);
+ assert(socket_name);
+ assert(packet);
+
+ salen = sockaddr_un_set_path(&sa.un, socket_name);
+ if (salen < 0)
+ return log_error_errno(salen, "Specified socket path for AF_UNIX socket invalid, refusing: %s", socket_name);
+
+ if (sendto(fd, packet, size, MSG_NOSIGNAL, &sa.sa, salen) < 0)
+ return log_error_errno(errno, "Failed to send: %m");
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_free_ char *packet = NULL;
+ _cleanup_close_ int fd = -1;
+ size_t length = 0;
+ int r;
+
+ log_setup_service();
+
+ if (argc != 3) {
+ log_error("Wrong number of arguments.");
+ return EXIT_FAILURE;
+ }
+
+ if (streq(argv[1], "1")) {
+ _cleanup_string_free_erase_ char *line = NULL;
+
+ r = read_line(stdin, LONG_LINE_MAX, &line);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read password: %m");
+ goto finish;
+ }
+ if (r == 0) {
+ log_error("Got EOF while reading password.");
+ r = -EIO;
+ goto finish;
+ }
+
+ packet = strjoin("+", line);
+ if (!packet) {
+ r = log_oom();
+ goto finish;
+ }
+
+ length = 1 + strlen(line) + 1;
+
+ } else if (streq(argv[1], "0")) {
+ packet = strdup("-");
+ if (!packet) {
+ r = log_oom();
+ goto finish;
+ }
+
+ length = 1;
+
+ } else {
+ log_error("Invalid first argument %s", argv[1]);
+ r = -EINVAL;
+ goto finish;
+ }
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0) {
+ r = log_error_errno(errno, "socket() failed: %m");
+ goto finish;
+ }
+
+ r = send_on_socket(fd, argv[2], packet, length);
+
+finish:
+ explicit_bzero_safe(packet, length);
+
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/resolve/RFCs b/src/resolve/RFCs
new file mode 100644
index 0000000..7190c16
--- /dev/null
+++ b/src/resolve/RFCs
@@ -0,0 +1,60 @@
+Y = Comprehensively Implemented, to the point appropriate for resolved
+D = Comprehensively Implemented, by a dependency of resolved
+! = Missing and something we might want to implement
+~ = Needs no explicit support or doesn't apply
+? = Is this relevant today?
+ = We are working on this
+
+Y https://tools.ietf.org/html/rfc1034 → DOMAIN NAMES - CONCEPTS AND FACILITIES
+Y https://tools.ietf.org/html/rfc1035 → DOMAIN NAMES - IMPLEMENTATION AND SPECIFICATION
+? https://tools.ietf.org/html/rfc1101 → DNS Encoding of Network Names and Other Types
+Y https://tools.ietf.org/html/rfc1123 → Requirements for Internet Hosts — Application and Support
+~ https://tools.ietf.org/html/rfc1464 → Using the Domain Name System To Store Arbitrary String Attributes
+Y https://tools.ietf.org/html/rfc1536 → Common DNS Implementation Errors and Suggested Fixes
+Y https://tools.ietf.org/html/rfc1876 → A Means for Expressing Location Information in the Domain Name System
+Y https://tools.ietf.org/html/rfc2181 → Clarifications to the DNS Specification
+Y https://tools.ietf.org/html/rfc2308 → Negative Caching of DNS Queries (DNS NCACHE)
+Y https://tools.ietf.org/html/rfc2782 → A DNS RR for specifying the location of services (DNS SRV)
+D https://tools.ietf.org/html/rfc3492 → Punycode: A Bootstring encoding of Unicode for Internationalized Domain Names in Applications (IDNA)
+Y https://tools.ietf.org/html/rfc3596 → DNS Extensions to Support IP Version 6
+Y https://tools.ietf.org/html/rfc3597 → Handling of Unknown DNS Resource Record (RR) Types
+Y https://tools.ietf.org/html/rfc4033 → DNS Security Introduction and Requirements
+Y https://tools.ietf.org/html/rfc4034 → Resource Records for the DNS Security Extensions
+Y https://tools.ietf.org/html/rfc4035 → Protocol Modifications for the DNS Security Extensions
+! https://tools.ietf.org/html/rfc4183 → A Suggested Scheme for DNS Resolution of Networks and Gateways
+Y https://tools.ietf.org/html/rfc4255 → Using DNS to Securely Publish Secure Shell (SSH) Key Fingerprints
+Y https://tools.ietf.org/html/rfc4343 → Domain Name System (DNS) Case Insensitivity Clarification
+~ https://tools.ietf.org/html/rfc4470 → Minimally Covering NSEC Records and DNSSEC On-line Signing
+Y https://tools.ietf.org/html/rfc4501 → Domain Name System Uniform Resource Identifiers
+Y https://tools.ietf.org/html/rfc4509 → Use of SHA-256 in DNSSEC Delegation Signer (DS) Resource Records (RRs)
+~ https://tools.ietf.org/html/rfc4592 → The Role of Wildcards in the Domain Name System
+~ https://tools.ietf.org/html/rfc4697 → Observed DNS Resolution Misbehavior
+Y https://tools.ietf.org/html/rfc4795 → Link-Local Multicast Name Resolution (LLMNR)
+Y https://tools.ietf.org/html/rfc5011 → Automated Updates of DNS Security (DNSSEC) Trust Anchors
+Y https://tools.ietf.org/html/rfc5155 → DNS Security (DNSSEC) Hashed Authenticated Denial of Existence
+Y https://tools.ietf.org/html/rfc5452 → Measures for Making DNS More Resilient against Forged Answers
+Y https://tools.ietf.org/html/rfc5702 → Use of SHA-2 Algorithms with RSA in DNSKEY and RRSIG Resource Records for DNSSEC
+Y https://tools.ietf.org/html/rfc5890 → Internationalized Domain Names for Applications (IDNA): Definitions and Document Framework
+Y https://tools.ietf.org/html/rfc5891 → Internationalized Domain Names in Applications (IDNA): Protocol
+Y https://tools.ietf.org/html/rfc5966 → DNS Transport over TCP - Implementation Requirements
+Y https://tools.ietf.org/html/rfc6303 → Locally Served DNS Zones
+Y https://tools.ietf.org/html/rfc6604 → xNAME RCODE and Status Bits Clarification
+Y https://tools.ietf.org/html/rfc6605 → Elliptic Curve Digital Signature Algorithm (DSA) for DNSSEC
+ https://tools.ietf.org/html/rfc6672 → DNAME Redirection in the DNS
+! https://tools.ietf.org/html/rfc6731 → Improved Recursive DNS Server Selection for Multi-Interfaced Nodes
+Y https://tools.ietf.org/html/rfc6761 → Special-Use Domain Names
+ https://tools.ietf.org/html/rfc6762 → Multicast DNS
+ https://tools.ietf.org/html/rfc6763 → DNS-Based Service Discovery
+~ https://tools.ietf.org/html/rfc6781 → DNSSEC Operational Practices, Version 2
+Y https://tools.ietf.org/html/rfc6840 → Clarifications and Implementation Notes for DNS Security (DNSSEC)
+Y https://tools.ietf.org/html/rfc6891 → Extension Mechanisms for DNS (EDNS(0))
+Y https://tools.ietf.org/html/rfc6944 → Applicability Statement: DNS Security (DNSSEC) DNSKEY Algorithm Implementation Status
+Y https://tools.ietf.org/html/rfc6975 → Signaling Cryptographic Algorithm Understanding in DNS Security Extensions (DNSSEC)
+Y https://tools.ietf.org/html/rfc7129 → Authenticated Denial of Existence in the DNS
+Y https://tools.ietf.org/html/rfc7646 → Definition and Use of DNSSEC Negative Trust Anchors
+~ https://tools.ietf.org/html/rfc7719 → DNS Terminology
+Y https://tools.ietf.org/html/rfc8080 → Edwards-Curve Digital Security Algorithm (EdDSA) for DNSSEC
+
+Also relevant:
+
+ https://www.iab.org/documents/correspondence-reports-documents/2013-2/iab-statement-dotless-domains-considered-harmful/
diff --git a/src/resolve/dns-type.c b/src/resolve/dns-type.c
new file mode 100644
index 0000000..c96f60a
--- /dev/null
+++ b/src/resolve/dns-type.c
@@ -0,0 +1,316 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/socket.h>
+#include <errno.h>
+
+#include "dns-type.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+typedef const struct {
+ uint16_t type;
+ const char *name;
+} dns_type;
+
+static const struct dns_type_name *
+lookup_dns_type (register const char *str, register GPERF_LEN_TYPE len);
+
+#include "dns_type-from-name.h"
+#include "dns_type-to-name.h"
+
+int dns_type_from_string(const char *s) {
+ const struct dns_type_name *sc;
+
+ assert(s);
+
+ sc = lookup_dns_type(s, strlen(s));
+ if (sc)
+ return sc->id;
+
+ s = startswith_no_case(s, "TYPE");
+ if (s) {
+ unsigned x;
+
+ if (safe_atou(s, &x) >= 0 &&
+ x <= UINT16_MAX)
+ return (int) x;
+ }
+
+ return _DNS_TYPE_INVALID;
+}
+
+bool dns_type_is_pseudo(uint16_t type) {
+
+ /* Checks whether the specified type is a "pseudo-type". What
+ * a "pseudo-type" precisely is, is defined only very weakly,
+ * but apparently entails all RR types that are not actually
+ * stored as RRs on the server and should hence also not be
+ * cached. We use this list primarily to validate NSEC type
+ * bitfields, and to verify what to cache. */
+
+ return IN_SET(type,
+ 0, /* A Pseudo RR type, according to RFC 2931 */
+ DNS_TYPE_ANY,
+ DNS_TYPE_AXFR,
+ DNS_TYPE_IXFR,
+ DNS_TYPE_OPT,
+ DNS_TYPE_TSIG,
+ DNS_TYPE_TKEY
+ );
+}
+
+bool dns_class_is_pseudo(uint16_t class) {
+ return class == DNS_TYPE_ANY;
+}
+
+bool dns_type_is_valid_query(uint16_t type) {
+
+ /* The types valid as questions in packets */
+
+ return !IN_SET(type,
+ 0,
+ DNS_TYPE_OPT,
+ DNS_TYPE_TSIG,
+ DNS_TYPE_TKEY,
+
+ /* RRSIG are technically valid as questions, but we refuse doing explicit queries for them, as
+ * they aren't really payload, but signatures for payload, and cannot be validated on their
+ * own. After all they are the signatures, and have no signatures of their own validating
+ * them. */
+ DNS_TYPE_RRSIG);
+}
+
+bool dns_type_is_zone_transer(uint16_t type) {
+
+ /* Zone transfers, either normal or incremental */
+
+ return IN_SET(type,
+ DNS_TYPE_AXFR,
+ DNS_TYPE_IXFR);
+}
+
+bool dns_type_is_valid_rr(uint16_t type) {
+
+ /* The types valid as RR in packets (but not necessarily
+ * stored on servers). */
+
+ return !IN_SET(type,
+ DNS_TYPE_ANY,
+ DNS_TYPE_AXFR,
+ DNS_TYPE_IXFR);
+}
+
+bool dns_class_is_valid_rr(uint16_t class) {
+ return class != DNS_CLASS_ANY;
+}
+
+bool dns_type_may_redirect(uint16_t type) {
+ /* The following record types should never be redirected using
+ * CNAME/DNAME RRs. See
+ * <https://tools.ietf.org/html/rfc4035#section-2.5>. */
+
+ if (dns_type_is_pseudo(type))
+ return false;
+
+ return !IN_SET(type,
+ DNS_TYPE_CNAME,
+ DNS_TYPE_DNAME,
+ DNS_TYPE_NSEC3,
+ DNS_TYPE_NSEC,
+ DNS_TYPE_RRSIG,
+ DNS_TYPE_NXT,
+ DNS_TYPE_SIG,
+ DNS_TYPE_KEY);
+}
+
+bool dns_type_may_wildcard(uint16_t type) {
+
+ /* The following records may not be expanded from wildcard RRsets */
+
+ if (dns_type_is_pseudo(type))
+ return false;
+
+ return !IN_SET(type,
+ DNS_TYPE_NSEC3,
+ DNS_TYPE_SOA,
+
+ /* Prohibited by https://tools.ietf.org/html/rfc4592#section-4.4 */
+ DNS_TYPE_DNAME);
+}
+
+bool dns_type_apex_only(uint16_t type) {
+
+ /* Returns true for all RR types that may only appear signed in a zone apex */
+
+ return IN_SET(type,
+ DNS_TYPE_SOA,
+ DNS_TYPE_NS, /* this one can appear elsewhere, too, but not signed */
+ DNS_TYPE_DNSKEY,
+ DNS_TYPE_NSEC3PARAM);
+}
+
+bool dns_type_is_dnssec(uint16_t type) {
+ return IN_SET(type,
+ DNS_TYPE_DS,
+ DNS_TYPE_DNSKEY,
+ DNS_TYPE_RRSIG,
+ DNS_TYPE_NSEC,
+ DNS_TYPE_NSEC3,
+ DNS_TYPE_NSEC3PARAM);
+}
+
+bool dns_type_is_obsolete(uint16_t type) {
+ return IN_SET(type,
+ /* Obsoleted by RFC 973 */
+ DNS_TYPE_MD,
+ DNS_TYPE_MF,
+ DNS_TYPE_MAILA,
+
+ /* Kinda obsoleted by RFC 2505 */
+ DNS_TYPE_MB,
+ DNS_TYPE_MG,
+ DNS_TYPE_MR,
+ DNS_TYPE_MINFO,
+ DNS_TYPE_MAILB,
+
+ /* RFC1127 kinda obsoleted this by recommending against its use */
+ DNS_TYPE_WKS,
+
+ /* Declared historical by RFC 6563 */
+ DNS_TYPE_A6,
+
+ /* Obsoleted by DNSSEC-bis */
+ DNS_TYPE_NXT,
+
+ /* RFC 1035 removed support for concepts that needed this from RFC 883 */
+ DNS_TYPE_NULL);
+}
+
+bool dns_type_needs_authentication(uint16_t type) {
+
+ /* Returns true for all (non-obsolete) RR types where records are not useful if they aren't
+ * authenticated. I.e. everything that contains crypto keys. */
+
+ return IN_SET(type,
+ DNS_TYPE_CERT,
+ DNS_TYPE_SSHFP,
+ DNS_TYPE_IPSECKEY,
+ DNS_TYPE_DS,
+ DNS_TYPE_DNSKEY,
+ DNS_TYPE_TLSA,
+ DNS_TYPE_CDNSKEY,
+ DNS_TYPE_OPENPGPKEY,
+ DNS_TYPE_CAA);
+}
+
+int dns_type_to_af(uint16_t t) {
+ switch (t) {
+
+ case DNS_TYPE_A:
+ return AF_INET;
+
+ case DNS_TYPE_AAAA:
+ return AF_INET6;
+
+ case DNS_TYPE_ANY:
+ return AF_UNSPEC;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+const char *dns_class_to_string(uint16_t class) {
+
+ switch (class) {
+
+ case DNS_CLASS_IN:
+ return "IN";
+
+ case DNS_CLASS_ANY:
+ return "ANY";
+ }
+
+ return NULL;
+}
+
+int dns_class_from_string(const char *s) {
+
+ if (!s)
+ return _DNS_CLASS_INVALID;
+
+ if (strcaseeq(s, "IN"))
+ return DNS_CLASS_IN;
+ else if (strcaseeq(s, "ANY"))
+ return DNS_CLASS_ANY;
+
+ return _DNS_CLASS_INVALID;
+}
+
+const char* tlsa_cert_usage_to_string(uint8_t cert_usage) {
+
+ switch (cert_usage) {
+
+ case 0:
+ return "CA constraint";
+
+ case 1:
+ return "Service certificate constraint";
+
+ case 2:
+ return "Trust anchor assertion";
+
+ case 3:
+ return "Domain-issued certificate";
+
+ case 4 ... 254:
+ return "Unassigned";
+
+ case 255:
+ return "Private use";
+ }
+
+ return NULL; /* clang cannot count that we covered everything */
+}
+
+const char* tlsa_selector_to_string(uint8_t selector) {
+ switch (selector) {
+
+ case 0:
+ return "Full Certificate";
+
+ case 1:
+ return "SubjectPublicKeyInfo";
+
+ case 2 ... 254:
+ return "Unassigned";
+
+ case 255:
+ return "Private use";
+ }
+
+ return NULL;
+}
+
+const char* tlsa_matching_type_to_string(uint8_t selector) {
+
+ switch (selector) {
+
+ case 0:
+ return "No hash used";
+
+ case 1:
+ return "SHA-256";
+
+ case 2:
+ return "SHA-512";
+
+ case 3 ... 254:
+ return "Unassigned";
+
+ case 255:
+ return "Private use";
+ }
+
+ return NULL;
+}
diff --git a/src/resolve/dns-type.h b/src/resolve/dns-type.h
new file mode 100644
index 0000000..8721536
--- /dev/null
+++ b/src/resolve/dns-type.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "macro.h"
+
+/* DNS record types, taken from
+ * http://www.iana.org/assignments/dns-parameters/dns-parameters.xhtml.
+ */
+enum {
+ /* Normal records */
+ DNS_TYPE_A = 0x01,
+ DNS_TYPE_NS,
+ DNS_TYPE_MD,
+ DNS_TYPE_MF,
+ DNS_TYPE_CNAME,
+ DNS_TYPE_SOA,
+ DNS_TYPE_MB,
+ DNS_TYPE_MG,
+ DNS_TYPE_MR,
+ DNS_TYPE_NULL,
+ DNS_TYPE_WKS,
+ DNS_TYPE_PTR,
+ DNS_TYPE_HINFO,
+ DNS_TYPE_MINFO,
+ DNS_TYPE_MX,
+ DNS_TYPE_TXT,
+ DNS_TYPE_RP,
+ DNS_TYPE_AFSDB,
+ DNS_TYPE_X25,
+ DNS_TYPE_ISDN,
+ DNS_TYPE_RT,
+ DNS_TYPE_NSAP,
+ DNS_TYPE_NSAP_PTR,
+ DNS_TYPE_SIG,
+ DNS_TYPE_KEY,
+ DNS_TYPE_PX,
+ DNS_TYPE_GPOS,
+ DNS_TYPE_AAAA,
+ DNS_TYPE_LOC,
+ DNS_TYPE_NXT,
+ DNS_TYPE_EID,
+ DNS_TYPE_NIMLOC,
+ DNS_TYPE_SRV,
+ DNS_TYPE_ATMA,
+ DNS_TYPE_NAPTR,
+ DNS_TYPE_KX,
+ DNS_TYPE_CERT,
+ DNS_TYPE_A6,
+ DNS_TYPE_DNAME,
+ DNS_TYPE_SINK,
+ DNS_TYPE_OPT, /* EDNS0 option */
+ DNS_TYPE_APL,
+ DNS_TYPE_DS,
+ DNS_TYPE_SSHFP,
+ DNS_TYPE_IPSECKEY,
+ DNS_TYPE_RRSIG,
+ DNS_TYPE_NSEC,
+ DNS_TYPE_DNSKEY,
+ DNS_TYPE_DHCID,
+ DNS_TYPE_NSEC3,
+ DNS_TYPE_NSEC3PARAM,
+ DNS_TYPE_TLSA,
+
+ DNS_TYPE_HIP = 0x37,
+ DNS_TYPE_NINFO,
+ DNS_TYPE_RKEY,
+ DNS_TYPE_TALINK,
+ DNS_TYPE_CDS,
+ DNS_TYPE_CDNSKEY,
+ DNS_TYPE_OPENPGPKEY,
+
+ DNS_TYPE_SPF = 0x63,
+ DNS_TYPE_NID,
+ DNS_TYPE_L32,
+ DNS_TYPE_L64,
+ DNS_TYPE_LP,
+ DNS_TYPE_EUI48,
+ DNS_TYPE_EUI64,
+
+ DNS_TYPE_TKEY = 0xF9,
+ DNS_TYPE_TSIG,
+ DNS_TYPE_IXFR,
+ DNS_TYPE_AXFR,
+ DNS_TYPE_MAILB,
+ DNS_TYPE_MAILA,
+ DNS_TYPE_ANY,
+ DNS_TYPE_URI,
+ DNS_TYPE_CAA,
+ DNS_TYPE_TA = 0x8000,
+ DNS_TYPE_DLV,
+
+ _DNS_TYPE_MAX,
+ _DNS_TYPE_INVALID = -1
+};
+
+assert_cc(DNS_TYPE_SSHFP == 44);
+assert_cc(DNS_TYPE_TLSA == 52);
+assert_cc(DNS_TYPE_ANY == 255);
+
+/* DNS record classes, see RFC 1035 */
+enum {
+ DNS_CLASS_IN = 0x01,
+ DNS_CLASS_ANY = 0xFF,
+
+ _DNS_CLASS_MAX,
+ _DNS_CLASS_INVALID = -1
+};
+
+#define _DNS_CLASS_STRING_MAX (sizeof "CLASS" + DECIMAL_STR_MAX(uint16_t))
+#define _DNS_TYPE_STRING_MAX (sizeof "CLASS" + DECIMAL_STR_MAX(uint16_t))
+
+bool dns_type_is_pseudo(uint16_t type);
+bool dns_type_is_valid_query(uint16_t type);
+bool dns_type_is_valid_rr(uint16_t type);
+bool dns_type_may_redirect(uint16_t type);
+bool dns_type_is_dnssec(uint16_t type);
+bool dns_type_is_obsolete(uint16_t type);
+bool dns_type_may_wildcard(uint16_t type);
+bool dns_type_apex_only(uint16_t type);
+bool dns_type_needs_authentication(uint16_t type);
+bool dns_type_is_zone_transer(uint16_t type);
+int dns_type_to_af(uint16_t type);
+
+bool dns_class_is_pseudo(uint16_t class);
+bool dns_class_is_valid_rr(uint16_t class);
+
+/* TYPE?? follows http://tools.ietf.org/html/rfc3597#section-5 */
+const char *dns_type_to_string(int type);
+int dns_type_from_string(const char *s);
+
+const char *dns_class_to_string(uint16_t class);
+int dns_class_from_string(const char *name);
+
+/* https://tools.ietf.org/html/draft-ietf-dane-protocol-23#section-7.2 */
+const char *tlsa_cert_usage_to_string(uint8_t cert_usage);
+
+/* https://tools.ietf.org/html/draft-ietf-dane-protocol-23#section-7.3 */
+const char *tlsa_selector_to_string(uint8_t selector);
+
+/* https://tools.ietf.org/html/draft-ietf-dane-protocol-23#section-7.4 */
+const char *tlsa_matching_type_to_string(uint8_t selector);
+
+/* https://tools.ietf.org/html/rfc6844#section-5.1 */
+#define CAA_FLAG_CRITICAL (1u << 7)
diff --git a/src/resolve/dns_type-to-name.awk b/src/resolve/dns_type-to-name.awk
new file mode 100644
index 0000000..badb182
--- /dev/null
+++ b/src/resolve/dns_type-to-name.awk
@@ -0,0 +1,11 @@
+BEGIN{
+ print "const char *dns_type_to_string(int type) {\n\tswitch(type) {"
+}
+{
+ printf " case DNS_TYPE_%s: return ", $1;
+ sub(/_/, "-");
+ printf "\"%s\";\n", $1
+}
+END{
+ print " default: return NULL;\n\t}\n}\n"
+}
diff --git a/src/resolve/generate-dns_type-gperf.py b/src/resolve/generate-dns_type-gperf.py
new file mode 100755
index 0000000..d4f7b94
--- /dev/null
+++ b/src/resolve/generate-dns_type-gperf.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+
+"""Generate %-from-name.gperf from %-list.txt
+"""
+
+import sys
+
+name, prefix, input = sys.argv[1:]
+
+print("""\
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \\"-Wimplicit-fallthrough\\"")
+#endif
+%}""")
+print("""\
+struct {}_name {{ const char* name; int id; }};
+%null-strings
+%%""".format(name))
+
+for line in open(input):
+ line = line.rstrip()
+ s = line.replace('_', '-')
+ print("{}, {}{}".format(s, prefix, line))
diff --git a/src/resolve/generate-dns_type-list.sed b/src/resolve/generate-dns_type-list.sed
new file mode 100644
index 0000000..b7bc30f
--- /dev/null
+++ b/src/resolve/generate-dns_type-list.sed
@@ -0,0 +1 @@
+s/.* DNS_TYPE_(\w+).*/\1/p
diff --git a/src/resolve/meson.build b/src/resolve/meson.build
new file mode 100644
index 0000000..92b67b6
--- /dev/null
+++ b/src/resolve/meson.build
@@ -0,0 +1,231 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+basic_dns_sources = files('''
+ resolved-dns-dnssec.c
+ resolved-dns-dnssec.h
+ resolved-dns-packet.c
+ resolved-dns-packet.h
+ resolved-dns-rr.c
+ resolved-dns-rr.h
+ resolved-dns-answer.c
+ resolved-dns-answer.h
+ resolved-dns-question.c
+ resolved-dns-question.h
+ dns-type.c
+'''.split())
+
+dns_type_h = files('dns-type.h')[0]
+
+systemd_resolved_sources = files('''
+ resolved.c
+ resolved-manager.c
+ resolved-manager.h
+ resolved-dnssd.c
+ resolved-dnssd.h
+ resolved-dnssd-bus.c
+ resolved-dnssd-bus.h
+ resolved-conf.c
+ resolved-conf.h
+ resolved-resolv-conf.c
+ resolved-resolv-conf.h
+ resolved-bus.c
+ resolved-bus.h
+ resolved-link.h
+ resolved-link.c
+ resolved-link-bus.c
+ resolved-link-bus.h
+ resolved-llmnr.h
+ resolved-llmnr.c
+ resolved-mdns.h
+ resolved-mdns.c
+ resolved-def.h
+ resolved-dns-query.h
+ resolved-dns-query.c
+ resolved-dns-synthesize.h
+ resolved-dns-synthesize.c
+ resolved-dns-transaction.h
+ resolved-dns-transaction.c
+ resolved-dns-scope.h
+ resolved-dns-scope.c
+ resolved-dns-server.h
+ resolved-dns-server.c
+ resolved-dns-search-domain.h
+ resolved-dns-search-domain.c
+ resolved-dns-cache.h
+ resolved-dns-cache.c
+ resolved-dns-zone.h
+ resolved-dns-zone.c
+ resolved-dns-stream.h
+ resolved-dns-stream.c
+ resolved-dns-trust-anchor.h
+ resolved-dns-trust-anchor.c
+ resolved-dns-stub.h
+ resolved-dns-stub.c
+ resolved-etc-hosts.h
+ resolved-etc-hosts.c
+ resolved-dnstls.h
+'''.split())
+
+resolvectl_sources = files('''
+ resolvconf-compat.c
+ resolvconf-compat.h
+ resolvectl.c
+ resolvectl.h
+'''.split())
+
+############################################################
+
+dns_type_list_txt = custom_target(
+ 'dns_type-list.txt',
+ input : ['generate-dns_type-list.sed', dns_type_h],
+ output : 'dns_type-list.txt',
+ command : [sed, '-n', '-r', '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+generate_dns_type_gperf = find_program('generate-dns_type-gperf.py')
+
+dns_type_headers = [dns_type_h]
+foreach item : [['dns_type', dns_type_list_txt, 'dns_type', 'DNS_TYPE_']]
+
+ fname = '@0@-from-name.gperf'.format(item[0])
+ gperf_file = custom_target(
+ fname,
+ input : item[1],
+ output : fname,
+ command : [generate_dns_type_gperf, item[2], item[3], '@INPUT@'],
+ capture : true)
+
+ fname = '@0@-from-name.h'.format(item[0])
+ target1 = custom_target(
+ fname,
+ input : gperf_file,
+ output : fname,
+ command : [gperf,
+ '-L', 'ANSI-C', '-t', '--ignore-case',
+ '-N', 'lookup_@0@'.format(item[2]),
+ '-H', 'hash_@0@_name'.format(item[2]),
+ '-p', '-C',
+ '@INPUT@'],
+ capture : true)
+
+ fname = '@0@-to-name.h'.format(item[0])
+ awkscript = '@0@-to-name.awk'.format(item[0])
+ target2 = custom_target(
+ fname,
+ input : [awkscript, item[1]],
+ output : fname,
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+ dns_type_headers += [target1, target2]
+endforeach
+
+resolved_gperf_c = custom_target(
+ 'resolved_gperf.c',
+ input : 'resolved-gperf.gperf',
+ output : 'resolved-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+resolved_dnssd_gperf_c = custom_target(
+ 'resolved_dnssd_gperf.c',
+ input : 'resolved-dnssd-gperf.gperf',
+ output : 'resolved-dnssd-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+libsystemd_resolve_core = static_library(
+ 'systemd-resolve-core',
+ basic_dns_sources,
+ dns_type_headers,
+ include_directories : includes)
+
+systemd_resolved_sources += [resolved_gperf_c, resolved_dnssd_gperf_c]
+
+systemd_resolved_dependencies = [threads, libgpg_error, libm, libidn]
+if conf.get('ENABLE_DNS_OVER_TLS') == 1
+ if conf.get('DNS_OVER_TLS_USE_GNUTLS') == 1
+ systemd_resolved_sources += files('resolved-dnstls-gnutls.c',
+ 'resolved-dnstls-gnutls.h')
+ systemd_resolved_dependencies += libgnutls
+ elif conf.get('DNS_OVER_TLS_USE_OPENSSL') == 1
+ systemd_resolved_sources += files('resolved-dnstls-openssl.c',
+ 'resolved-dnstls-openssl.h')
+ systemd_resolved_dependencies += libopenssl
+ else
+ error('unknown dependency for supporting DNS-over-TLS')
+ endif
+endif
+
+if conf.get('ENABLE_RESOLVE') == 1
+ install_data('org.freedesktop.resolve1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.resolve1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.resolve1.policy',
+ install_dir : polkitpolicydir)
+
+ resolved_conf = configure_file(
+ input : 'resolved.conf.in',
+ output : 'resolved.conf',
+ configuration : substs)
+ install_data(resolved_conf,
+ install_dir : pkgsysconfdir)
+
+ install_data('resolv.conf',
+ install_dir : rootlibexecdir)
+endif
+
+tests += [
+ [['src/resolve/test-resolve-tables.c',
+ dns_type_headers,
+ 'src/shared/test-tables.h'],
+ [libsystemd_resolve_core,
+ libshared],
+ [libgcrypt,
+ libgpg_error,
+ libm],
+ 'ENABLE_RESOLVE'],
+
+ [['src/resolve/test-dns-packet.c',
+ dns_type_headers],
+ [libsystemd_resolve_core,
+ libshared],
+ [libgcrypt,
+ libgpg_error,
+ libm],
+ 'ENABLE_RESOLVE'],
+
+ [['src/resolve/test-resolved-etc-hosts.c',
+ 'src/resolve/resolved-etc-hosts.c',
+ 'src/resolve/resolved-etc-hosts.h'],
+ [libsystemd_resolve_core,
+ libshared],
+ [libgcrypt,
+ libgpg_error,
+ libm],
+ 'ENABLE_RESOLVE'],
+
+ [['src/resolve/test-resolved-packet.c',
+ dns_type_headers],
+ [libsystemd_resolve_core,
+ libshared],
+ [libgcrypt,
+ libgpg_error,
+ libm],
+ 'ENABLE_RESOLVE'],
+
+ [['src/resolve/test-dnssec.c',
+ dns_type_headers],
+ [libsystemd_resolve_core,
+ libshared],
+ [libgcrypt,
+ libgpg_error,
+ libm],
+ 'ENABLE_RESOLVE'],
+
+ [['src/resolve/test-dnssec-complex.c',
+ 'src/resolve/dns-type.c',
+ dns_type_headers],
+ [],
+ [],
+ 'ENABLE_RESOLVE', 'manual'],
+]
diff --git a/src/resolve/org.freedesktop.resolve1.conf b/src/resolve/org.freedesktop.resolve1.conf
new file mode 100644
index 0000000..25b0977
--- /dev/null
+++ b/src/resolve/org.freedesktop.resolve1.conf
@@ -0,0 +1,27 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="systemd-resolve">
+ <allow own="org.freedesktop.resolve1"/>
+ <allow send_destination="org.freedesktop.resolve1"/>
+ <allow receive_sender="org.freedesktop.resolve1"/>
+ </policy>
+
+ <policy context="default">
+ <allow send_destination="org.freedesktop.resolve1"/>
+ <allow receive_sender="org.freedesktop.resolve1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/resolve/org.freedesktop.resolve1.policy b/src/resolve/org.freedesktop.resolve1.policy
new file mode 100644
index 0000000..b65ba3e
--- /dev/null
+++ b/src/resolve/org.freedesktop.resolve1.policy
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.resolve1.register-service">
+ <description gettext-domain="systemd">Register a DNS-SD service</description>
+ <message gettext-domain="systemd">Authentication is required to register a DNS-SD service</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+ <action id="org.freedesktop.resolve1.unregister-service">
+ <description gettext-domain="systemd">Unregister a DNS-SD service</description>
+ <message gettext-domain="systemd">Authentication is required to unregister a DNS-SD service</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+</policyconfig>
diff --git a/src/resolve/org.freedesktop.resolve1.service b/src/resolve/org.freedesktop.resolve1.service
new file mode 100644
index 0000000..2c25a61
--- /dev/null
+++ b/src/resolve/org.freedesktop.resolve1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.resolve1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.resolve1.service
diff --git a/src/resolve/resolv.conf b/src/resolve/resolv.conf
new file mode 100644
index 0000000..c3079ac
--- /dev/null
+++ b/src/resolve/resolv.conf
@@ -0,0 +1,18 @@
+# This file belongs to man:systemd-resolved(8). Do not edit.
+#
+# This is a static resolv.conf file for connecting local clients to the
+# internal DNS stub resolver of systemd-resolved. This file lists no search
+# domains.
+#
+# Run "resolvectl status" to see details about the uplink DNS servers
+# currently in use.
+#
+# Third party programs must not access this file directly, but only through the
+# symlink at /etc/resolv.conf. To manage man:resolv.conf(5) in a different way,
+# replace this symlink by a static file or a different symlink.
+#
+# See man:systemd-resolved.service(8) for details about the supported modes of
+# operation for /etc/resolv.conf.
+
+nameserver 127.0.0.53
+options edns0
diff --git a/src/resolve/resolvconf-compat.c b/src/resolve/resolvconf-compat.c
new file mode 100644
index 0000000..383d0b8
--- /dev/null
+++ b/src/resolve/resolvconf-compat.c
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <net/if.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "dns-domain.h"
+#include "extract-word.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "resolvconf-compat.h"
+#include "resolvectl.h"
+#include "resolved-def.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+
+static int resolvconf_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("resolvectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s -a INTERFACE < FILE\n"
+ "%1$s -d INTERFACE\n"
+ "\n"
+ "Register DNS server and domain configuration with systemd-resolved.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -a Register per-interface DNS server and domain data\n"
+ " -d Unregister per-interface DNS server and domain data\n"
+ " -f Ignore if specified interface does not exist\n"
+ " -x Send DNS traffic preferably over this interface\n"
+ "\n"
+ "This is a compatibility alias for the resolvectl(1) tool, providing native\n"
+ "command line compatibility with the resolvconf(8) tool of various Linux\n"
+ "distributions and BSD systems. Some options supported by other implementations\n"
+ "are not supported and are ignored: -m, -p. Various options supported by other\n"
+ "implementations are not supported and will cause the invocation to fail: -u,\n"
+ "-I, -i, -l, -R, -r, -v, -V, --enable-updates, --disable-updates,\n"
+ "--updates-are-enabled.\n"
+ "\nSee the %2$s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_nameserver(const char *string) {
+ int r;
+
+ assert(string);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (strv_push(&arg_set_dns, word) < 0)
+ return log_oom();
+
+ word = NULL;
+ }
+
+ return 0;
+}
+
+static int parse_search_domain(const char *string) {
+ int r;
+
+ assert(string);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&string, &word, NULL, EXTRACT_QUOTES);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (strv_push(&arg_set_domain, word) < 0)
+ return log_oom();
+
+ word = NULL;
+ }
+
+ return 0;
+}
+
+int resolvconf_parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_ENABLE_UPDATES,
+ ARG_DISABLE_UPDATES,
+ ARG_UPDATES_ARE_ENABLED,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+
+ /* The following are specific to Debian's original resolvconf */
+ { "enable-updates", no_argument, NULL, ARG_ENABLE_UPDATES },
+ { "disable-updates", no_argument, NULL, ARG_DISABLE_UPDATES },
+ { "updates-are-enabled", no_argument, NULL, ARG_UPDATES_ARE_ENABLED },
+ {}
+ };
+
+ enum {
+ TYPE_REGULAR,
+ TYPE_PRIVATE, /* -p: Not supported, treated identically to TYPE_REGULAR */
+ TYPE_EXCLUSIVE, /* -x */
+ } type = TYPE_REGULAR;
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ /* openresolv checks these environment variables */
+ if (getenv("IF_EXCLUSIVE"))
+ type = TYPE_EXCLUSIVE;
+ if (getenv("IF_PRIVATE"))
+ type = TYPE_PRIVATE; /* not actually supported */
+
+ arg_mode = _MODE_INVALID;
+
+ while ((c = getopt_long(argc, argv, "hadxpfm:uIi:l:Rr:vV", options, NULL)) >= 0)
+ switch(c) {
+
+ case 'h':
+ return resolvconf_help();
+
+ case ARG_VERSION:
+ return version();
+
+ /* -a and -d is what everybody can agree on */
+ case 'a':
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case 'd':
+ arg_mode = MODE_REVERT_LINK;
+ break;
+
+ /* The exclusive/private/force stuff is an openresolv invention, we support in some skewed way */
+ case 'x':
+ type = TYPE_EXCLUSIVE;
+ break;
+
+ case 'p':
+ type = TYPE_PRIVATE; /* not actually supported */
+ break;
+
+ case 'f':
+ arg_ifindex_permissive = true;
+ break;
+
+ /* The metrics stuff is an openresolv invention we ignore (and don't really need) */
+ case 'm':
+ log_debug("Switch -%c ignored.", c);
+ break;
+
+ /* Everybody else can agree on the existance of -u but we don't support it. */
+ case 'u':
+
+ /* The following options are openresolv inventions we don't support. */
+ case 'I':
+ case 'i':
+ case 'l':
+ case 'R':
+ case 'r':
+ case 'v':
+ case 'V':
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Switch -%c not supported.", c);
+
+ /* The Debian resolvconf commands we don't support. */
+ case ARG_ENABLE_UPDATES:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Switch --enable-updates not supported.");
+ case ARG_DISABLE_UPDATES:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Switch --disable-updates not supported.");
+ case ARG_UPDATES_ARE_ENABLED:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Switch --updates-are-enabled not supported.");
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_mode == _MODE_INVALID)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected either -a or -d on the command line.");
+
+ if (optind+1 != argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected interface name as argument.");
+
+ r = ifname_mangle(argv[optind]);
+ if (r <= 0)
+ return r;
+
+ optind++;
+
+ if (arg_mode == MODE_SET_LINK) {
+ unsigned n = 0;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *a, *l;
+
+ r = read_line(stdin, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read from stdin: %m");
+ if (r == 0)
+ break;
+
+ n++;
+
+ l = strstrip(line);
+ if (IN_SET(*l, '#', ';', 0))
+ continue;
+
+ a = first_word(l, "nameserver");
+ if (a) {
+ (void) parse_nameserver(a);
+ continue;
+ }
+
+ a = first_word(l, "domain");
+ if (!a)
+ a = first_word(l, "search");
+ if (a) {
+ (void) parse_search_domain(a);
+ continue;
+ }
+
+ log_syntax(NULL, LOG_DEBUG, "stdin", n, 0, "Ignoring resolv.conf line: %s", l);
+ }
+
+ if (type == TYPE_EXCLUSIVE) {
+
+ /* If -x mode is selected, let's preferably route non-suffixed lookups to this interface. This
+ * somewhat matches the original -x behaviour */
+
+ r = strv_extend(&arg_set_domain, "~.");
+ if (r < 0)
+ return log_oom();
+
+ } else if (type == TYPE_PRIVATE)
+ log_debug("Private DNS server data not supported, ignoring.");
+
+ if (!arg_set_dns)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "No DNS servers specified, refusing operation.");
+ }
+
+ return 1; /* work to do */
+}
diff --git a/src/resolve/resolvconf-compat.h b/src/resolve/resolvconf-compat.h
new file mode 100644
index 0000000..872add1
--- /dev/null
+++ b/src/resolve/resolvconf-compat.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int resolvconf_parse_argv(int argc, char *argv[]);
diff --git a/src/resolve/resolvectl.c b/src/resolve/resolvectl.c
new file mode 100644
index 0000000..4d533f8
--- /dev/null
+++ b/src/resolve/resolvectl.c
@@ -0,0 +1,3163 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <net/if.h>
+
+#include "sd-bus.h"
+#include "sd-netlink.h"
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "dns-domain.h"
+#include "escape.h"
+#include "gcrypt-util.h"
+#include "in-addr-util.h"
+#include "main-func.h"
+#include "missing_network.h"
+#include "netlink-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "resolvconf-compat.h"
+#include "resolvectl.h"
+#include "resolved-def.h"
+#include "resolved-dns-packet.h"
+#include "string-table.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "verbs.h"
+
+static int arg_family = AF_UNSPEC;
+static int arg_ifindex = 0;
+static char *arg_ifname = NULL;
+static uint16_t arg_type = 0;
+static uint16_t arg_class = 0;
+static bool arg_legend = true;
+static uint64_t arg_flags = 0;
+static PagerFlags arg_pager_flags = 0;
+bool arg_ifindex_permissive = false; /* If true, don't generate an error if the specified interface index doesn't exist */
+static const char *arg_service_family = NULL;
+
+typedef enum RawType {
+ RAW_NONE,
+ RAW_PAYLOAD,
+ RAW_PACKET,
+} RawType;
+static RawType arg_raw = RAW_NONE;
+
+ExecutionMode arg_mode = MODE_RESOLVE_HOST;
+
+char **arg_set_dns = NULL;
+char **arg_set_domain = NULL;
+static const char *arg_set_llmnr = NULL;
+static const char *arg_set_mdns = NULL;
+static const char *arg_set_dns_over_tls = NULL;
+static const char *arg_set_dnssec = NULL;
+static char **arg_set_nta = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_ifname, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_set_dns, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_set_domain, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_set_nta, strv_freep);
+
+typedef enum StatusMode {
+ STATUS_ALL,
+ STATUS_DNS,
+ STATUS_DOMAIN,
+ STATUS_DEFAULT_ROUTE,
+ STATUS_LLMNR,
+ STATUS_MDNS,
+ STATUS_PRIVATE,
+ STATUS_DNSSEC,
+ STATUS_NTA,
+} StatusMode;
+
+static int parse_ifindex_and_warn(const char *s) {
+ int ifi;
+
+ assert(s);
+
+ if (parse_ifindex(s, &ifi) < 0) {
+ ifi = if_nametoindex(s);
+ if (ifi <= 0)
+ return log_error_errno(errno, "Unknown interface '%s': %m", s);
+ }
+
+ return ifi;
+}
+
+int ifname_mangle(const char *s) {
+ _cleanup_free_ char *iface = NULL;
+ const char *dot;
+ int ifi;
+
+ assert(s);
+
+ dot = strchr(s, '.');
+ if (dot) {
+ log_debug("Ignoring protocol specifier '%s'.", dot + 1);
+ iface = strndup(s, dot - s);
+
+ } else
+ iface = strdup(s);
+ if (!iface)
+ return log_oom();
+
+ if (parse_ifindex(iface, &ifi) < 0) {
+ ifi = if_nametoindex(iface);
+ if (ifi <= 0) {
+ if (errno == ENODEV && arg_ifindex_permissive) {
+ log_debug("Interface '%s' not found, but -f specified, ignoring.", iface);
+ return 0; /* done */
+ }
+
+ return log_error_errno(errno, "Unknown interface '%s': %m", iface);
+ }
+ }
+
+ if (arg_ifindex > 0 && arg_ifindex != ifi) {
+ log_error("Specified multiple different interfaces. Refusing.");
+ return -EINVAL;
+ }
+
+ arg_ifindex = ifi;
+ free_and_replace(arg_ifname, iface);
+
+ return 1;
+}
+
+static void print_source(uint64_t flags, usec_t rtt) {
+ char rtt_str[FORMAT_TIMESTAMP_MAX];
+
+ if (!arg_legend)
+ return;
+
+ if (flags == 0)
+ return;
+
+ printf("\n%s-- Information acquired via", ansi_grey());
+
+ if (flags != 0)
+ printf(" protocol%s%s%s%s%s",
+ flags & SD_RESOLVED_DNS ? " DNS" :"",
+ flags & SD_RESOLVED_LLMNR_IPV4 ? " LLMNR/IPv4" : "",
+ flags & SD_RESOLVED_LLMNR_IPV6 ? " LLMNR/IPv6" : "",
+ flags & SD_RESOLVED_MDNS_IPV4 ? " mDNS/IPv4" : "",
+ flags & SD_RESOLVED_MDNS_IPV6 ? " mDNS/IPv6" : "");
+
+ assert_se(format_timespan(rtt_str, sizeof(rtt_str), rtt, 100));
+
+ printf(" in %s.%s\n"
+ "%s-- Data is authenticated: %s%s\n",
+ rtt_str, ansi_normal(),
+ ansi_grey(), yes_no(flags & SD_RESOLVED_AUTHENTICATED), ansi_normal());
+}
+
+static void print_ifindex_comment(int printed_so_far, int ifindex) {
+ char ifname[IF_NAMESIZE];
+
+ if (ifindex <= 0)
+ return;
+
+ if (!if_indextoname(ifindex, ifname))
+ log_warning_errno(errno, "Failed to resolve interface name for index %i, ignoring: %m", ifindex);
+ else
+ printf("%*s%s-- link: %s%s",
+ 60 > printed_so_far ? 60 - printed_so_far : 0, " ", /* Align comment to the 60th column */
+ ansi_grey(), ifname, ansi_normal());
+}
+
+static int resolve_host(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *canonical = NULL;
+ unsigned c = 0;
+ uint64_t flags;
+ usec_t ts;
+ int r;
+
+ assert(name);
+
+ log_debug("Resolving %s (family %s, interface %s).", name, af_to_name(arg_family) ?: "*", isempty(arg_ifname) ? "*" : arg_ifname);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResolveHostname");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "isit", arg_ifindex, name, arg_family, arg_flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ ts = now(CLOCK_MONOTONIC);
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "%s: resolve call failed: %s", name, bus_error_message(&error, r));
+
+ ts = now(CLOCK_MONOTONIC) - ts;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iiay")) > 0) {
+ _cleanup_free_ char *pretty = NULL;
+ int ifindex, family, k;
+ const void *a;
+ size_t sz;
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(reply, "ii", &ifindex, &family);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &a, &sz);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!IN_SET(family, AF_INET, AF_INET6)) {
+ log_debug("%s: skipping entry with family %d (%s)", name, family, af_to_name(family) ?: "unknown");
+ continue;
+ }
+
+ if (sz != FAMILY_ADDRESS_SIZE(family)) {
+ log_error("%s: systemd-resolved returned address of invalid size %zu for family %s", name, sz, af_to_name(family) ?: "unknown");
+ return -EINVAL;
+ }
+
+ r = in_addr_ifindex_to_string(family, a, ifindex, &pretty);
+ if (r < 0)
+ return log_error_errno(r, "Failed to print address for %s: %m", name);
+
+ k = printf("%*s%s %s%s%s",
+ (int) strlen(name), c == 0 ? name : "", c == 0 ? ":" : " ",
+ ansi_highlight(), pretty, ansi_normal());
+
+ print_ifindex_comment(k, ifindex);
+ fputc('\n', stdout);
+
+ c++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(reply, "st", &canonical, &flags);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!streq(name, canonical))
+ printf("%*s%s (%s)\n",
+ (int) strlen(name), c == 0 ? name : "", c == 0 ? ":" : " ",
+ canonical);
+
+ if (c == 0) {
+ log_error("%s: no addresses found", name);
+ return -ESRCH;
+ }
+
+ print_source(flags, ts);
+
+ return 0;
+}
+
+static int resolve_address(sd_bus *bus, int family, const union in_addr_union *address, int ifindex) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *pretty = NULL;
+ uint64_t flags;
+ unsigned c = 0;
+ usec_t ts;
+ int r;
+
+ assert(bus);
+ assert(IN_SET(family, AF_INET, AF_INET6));
+ assert(address);
+
+ if (ifindex <= 0)
+ ifindex = arg_ifindex;
+
+ r = in_addr_ifindex_to_string(family, address, ifindex, &pretty);
+ if (r < 0)
+ return log_oom();
+
+ log_debug("Resolving %s.", pretty);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResolveAddress");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "ii", ifindex, family);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(req, 'y', address, FAMILY_ADDRESS_SIZE(family));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "t", arg_flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ ts = now(CLOCK_MONOTONIC);
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "%s: resolve call failed: %s", pretty, bus_error_message(&error, r));
+
+ ts = now(CLOCK_MONOTONIC) - ts;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(is)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "is")) > 0) {
+ const char *n;
+ int k;
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(reply, "is", &ifindex, &n);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+
+ k = printf("%*s%s %s%s%s",
+ (int) strlen(pretty), c == 0 ? pretty : "",
+ c == 0 ? ":" : " ",
+ ansi_highlight(), n, ansi_normal());
+
+ print_ifindex_comment(k, ifindex);
+ fputc('\n', stdout);
+
+ c++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(reply, "t", &flags);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (c == 0) {
+ log_error("%s: no names found", pretty);
+ return -ESRCH;
+ }
+
+ print_source(flags, ts);
+
+ return 0;
+}
+
+static int output_rr_packet(const void *d, size_t l, int ifindex) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ int r;
+
+ r = dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, DNS_PACKET_SIZE_MAX);
+ if (r < 0)
+ return log_oom();
+
+ p->refuse_compression = true;
+
+ r = dns_packet_append_blob(p, d, l, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = dns_packet_read_rr(p, &rr, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse RR: %m");
+
+ if (arg_raw == RAW_PAYLOAD) {
+ void *data;
+ ssize_t k;
+
+ k = dns_resource_record_payload(rr, &data);
+ if (k < 0)
+ return log_error_errno(k, "Cannot dump RR: %m");
+ fwrite(data, 1, k, stdout);
+ } else {
+ const char *s;
+ int k;
+
+ s = dns_resource_record_to_string(rr);
+ if (!s)
+ return log_oom();
+
+ k = printf("%s", s);
+ print_ifindex_comment(k, ifindex);
+ fputc('\n', stdout);
+ }
+
+ return 0;
+}
+
+static int resolve_record(sd_bus *bus, const char *name, uint16_t class, uint16_t type, bool warn_missing) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ unsigned n = 0;
+ uint64_t flags;
+ int r;
+ usec_t ts;
+ bool needs_authentication = false;
+
+ assert(name);
+
+ log_debug("Resolving %s %s %s (interface %s).", name, dns_class_to_string(class), dns_type_to_string(type), isempty(arg_ifname) ? "*" : arg_ifname);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResolveRecord");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "isqqt", arg_ifindex, name, class, type, arg_flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ ts = now(CLOCK_MONOTONIC);
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+ if (r < 0) {
+ if (warn_missing || r != -ENXIO)
+ log_error("%s: resolve call failed: %s", name, bus_error_message(&error, r));
+ return r;
+ }
+
+ ts = now(CLOCK_MONOTONIC) - ts;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iqqay)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iqqay")) > 0) {
+ uint16_t c, t;
+ int ifindex;
+ const void *d;
+ size_t l;
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(reply, "iqq", &ifindex, &c, &t);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &d, &l);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (arg_raw == RAW_PACKET) {
+ uint64_t u64 = htole64(l);
+
+ fwrite(&u64, sizeof(u64), 1, stdout);
+ fwrite(d, 1, l, stdout);
+ } else {
+ r = output_rr_packet(d, l, ifindex);
+ if (r < 0)
+ return r;
+ }
+
+ if (dns_type_needs_authentication(t))
+ needs_authentication = true;
+
+ n++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(reply, "t", &flags);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (n == 0) {
+ if (warn_missing)
+ log_error("%s: no records found", name);
+ return -ESRCH;
+ }
+
+ print_source(flags, ts);
+
+ if ((flags & SD_RESOLVED_AUTHENTICATED) == 0 && needs_authentication) {
+ fflush(stdout);
+
+ fprintf(stderr, "\n%s"
+ "WARNING: The resources shown contain cryptographic key data which could not be\n"
+ " authenticated. It is not suitable to authenticate any communication.\n"
+ " This is usually indication that DNSSEC authentication was not enabled\n"
+ " or is not available for the selected protocol or DNS servers.%s\n",
+ ansi_highlight_red(),
+ ansi_normal());
+ }
+
+ return 0;
+}
+
+static int resolve_rfc4501(sd_bus *bus, const char *name) {
+ uint16_t type = 0, class = 0;
+ const char *p, *q, *n;
+ int r;
+
+ assert(bus);
+ assert(name);
+ assert(startswith(name, "dns:"));
+
+ /* Parse RFC 4501 dns: URIs */
+
+ p = name + 4;
+
+ if (p[0] == '/') {
+ const char *e;
+
+ if (p[1] != '/')
+ goto invalid;
+
+ e = strchr(p + 2, '/');
+ if (!e)
+ goto invalid;
+
+ if (e != p + 2)
+ log_warning("DNS authority specification not supported; ignoring specified authority.");
+
+ p = e + 1;
+ }
+
+ q = strchr(p, '?');
+ if (q) {
+ n = strndupa(p, q - p);
+ q++;
+
+ for (;;) {
+ const char *f;
+
+ f = startswith_no_case(q, "class=");
+ if (f) {
+ _cleanup_free_ char *t = NULL;
+ const char *e;
+
+ if (class != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "DNS class specified twice.");
+
+ e = strchrnul(f, ';');
+ t = strndup(f, e - f);
+ if (!t)
+ return log_oom();
+
+ r = dns_class_from_string(t);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown DNS class %s.", t);
+
+ class = r;
+
+ if (*e == ';') {
+ q = e + 1;
+ continue;
+ }
+
+ break;
+ }
+
+ f = startswith_no_case(q, "type=");
+ if (f) {
+ _cleanup_free_ char *t = NULL;
+ const char *e;
+
+ if (type != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "DNS type specified twice.");
+
+ e = strchrnul(f, ';');
+ t = strndup(f, e - f);
+ if (!t)
+ return log_oom();
+
+ r = dns_type_from_string(t);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown DNS type %s.", t);
+
+ type = r;
+
+ if (*e == ';') {
+ q = e + 1;
+ continue;
+ }
+
+ break;
+ }
+
+ goto invalid;
+ }
+ } else
+ n = p;
+
+ if (class == 0)
+ class = arg_class ?: DNS_CLASS_IN;
+ if (type == 0)
+ type = arg_type ?: DNS_TYPE_A;
+
+ return resolve_record(bus, n, class, type, true);
+
+invalid:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid DNS URI: %s", name);
+}
+
+static int verb_query(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ char **p;
+ int q, r = 0;
+
+ if (arg_type != 0)
+ STRV_FOREACH(p, argv + 1) {
+ q = resolve_record(bus, *p, arg_class, arg_type, true);
+ if (q < 0)
+ r = q;
+ }
+
+ else
+ STRV_FOREACH(p, argv + 1) {
+ if (startswith(*p, "dns:"))
+ q = resolve_rfc4501(bus, *p);
+ else {
+ int family, ifindex;
+ union in_addr_union a;
+
+ q = in_addr_ifindex_from_string_auto(*p, &family, &a, &ifindex);
+ if (q >= 0)
+ q = resolve_address(bus, family, &a, ifindex);
+ else
+ q = resolve_host(bus, *p);
+ }
+ if (q < 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int resolve_service(sd_bus *bus, const char *name, const char *type, const char *domain) {
+ const char *canonical_name, *canonical_type, *canonical_domain;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ size_t indent, sz;
+ uint64_t flags;
+ const char *p;
+ unsigned c;
+ usec_t ts;
+ int r;
+
+ assert(bus);
+ assert(domain);
+
+ name = empty_to_null(name);
+ type = empty_to_null(type);
+
+ if (name)
+ log_debug("Resolving service \"%s\" of type %s in %s (family %s, interface %s).", name, type, domain, af_to_name(arg_family) ?: "*", isempty(arg_ifname) ? "*" : arg_ifname);
+ else if (type)
+ log_debug("Resolving service type %s of %s (family %s, interface %s).", type, domain, af_to_name(arg_family) ?: "*", isempty(arg_ifname) ? "*" : arg_ifname);
+ else
+ log_debug("Resolving service type %s (family %s, interface %s).", domain, af_to_name(arg_family) ?: "*", isempty(arg_ifname) ? "*" : arg_ifname);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResolveService");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "isssit", arg_ifindex, name, type, domain, arg_family, arg_flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ ts = now(CLOCK_MONOTONIC);
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Resolve call failed: %s", bus_error_message(&error, r));
+
+ ts = now(CLOCK_MONOTONIC) - ts;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(qqqsa(iiay)s)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ indent =
+ (name ? strlen(name) + 1 : 0) +
+ (type ? strlen(type) + 1 : 0) +
+ strlen(domain) + 2;
+
+ c = 0;
+ while ((r = sd_bus_message_enter_container(reply, 'r', "qqqsa(iiay)s")) > 0) {
+ uint16_t priority, weight, port;
+ const char *hostname, *canonical;
+
+ r = sd_bus_message_read(reply, "qqqs", &priority, &weight, &port, &hostname);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (name)
+ printf("%*s%s", (int) strlen(name), c == 0 ? name : "", c == 0 ? "/" : " ");
+ if (type)
+ printf("%*s%s", (int) strlen(type), c == 0 ? type : "", c == 0 ? "/" : " ");
+
+ printf("%*s%s %s:%u [priority=%u, weight=%u]\n",
+ (int) strlen(domain), c == 0 ? domain : "",
+ c == 0 ? ":" : " ",
+ hostname, port,
+ priority, weight);
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iiay")) > 0) {
+ _cleanup_free_ char *pretty = NULL;
+ int ifindex, family, k;
+ const void *a;
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(reply, "ii", &ifindex, &family);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &a, &sz);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!IN_SET(family, AF_INET, AF_INET6)) {
+ log_debug("%s: skipping entry with family %d (%s)", name, family, af_to_name(family) ?: "unknown");
+ continue;
+ }
+
+ if (sz != FAMILY_ADDRESS_SIZE(family)) {
+ log_error("%s: systemd-resolved returned address of invalid size %zu for family %s", name, sz, af_to_name(family) ?: "unknown");
+ return -EINVAL;
+ }
+
+ r = in_addr_ifindex_to_string(family, a, ifindex, &pretty);
+ if (r < 0)
+ return log_error_errno(r, "Failed to print address for %s: %m", name);
+
+ k = printf("%*s%s", (int) indent, "", pretty);
+ print_ifindex_comment(k, ifindex);
+ fputc('\n', stdout);
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(reply, "s", &canonical);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!streq(hostname, canonical))
+ printf("%*s(%s)\n", (int) indent, "", canonical);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ c++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_enter_container(reply, 'a', "ay");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read_array(reply, 'y', (const void**) &p, &sz)) > 0) {
+ _cleanup_free_ char *escaped = NULL;
+
+ escaped = cescape_length(p, sz);
+ if (!escaped)
+ return log_oom();
+
+ printf("%*s%s\n", (int) indent, "", escaped);
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(reply, "ssst", &canonical_name, &canonical_type, &canonical_domain, &flags);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ canonical_name = empty_to_null(canonical_name);
+ canonical_type = empty_to_null(canonical_type);
+
+ if (!streq_ptr(name, canonical_name) ||
+ !streq_ptr(type, canonical_type) ||
+ !streq_ptr(domain, canonical_domain)) {
+
+ printf("%*s(", (int) indent, "");
+
+ if (canonical_name)
+ printf("%s/", canonical_name);
+ if (canonical_type)
+ printf("%s/", canonical_type);
+
+ printf("%s)\n", canonical_domain);
+ }
+
+ print_source(flags, ts);
+
+ return 0;
+}
+
+static int verb_service(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+
+ if (argc == 2)
+ return resolve_service(bus, NULL, NULL, argv[1]);
+ else if (argc == 3)
+ return resolve_service(bus, NULL, argv[1], argv[2]);
+ else
+ return resolve_service(bus, argv[1], argv[2], argv[3]);
+}
+
+static int resolve_openpgp(sd_bus *bus, const char *address) {
+ const char *domain, *full;
+ int r;
+ _cleanup_free_ char *hashed = NULL;
+
+ assert(bus);
+ assert(address);
+
+ domain = strrchr(address, '@');
+ if (!domain)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Address does not contain '@': \"%s\"", address);
+ if (domain == address || domain[1] == '\0')
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Address starts or ends with '@': \"%s\"", address);
+ domain++;
+
+ r = string_hashsum_sha256(address, domain - 1 - address, &hashed);
+ if (r < 0)
+ return log_error_errno(r, "Hashing failed: %m");
+
+ strshorten(hashed, 56);
+
+ full = strjoina(hashed, "._openpgpkey.", domain);
+ log_debug("Looking up \"%s\".", full);
+
+ r = resolve_record(bus, full,
+ arg_class ?: DNS_CLASS_IN,
+ arg_type ?: DNS_TYPE_OPENPGPKEY, false);
+
+ if (IN_SET(r, -ENXIO, -ESRCH)) { /* NXDOMAIN or NODATA? */
+ hashed = mfree(hashed);
+ r = string_hashsum_sha224(address, domain - 1 - address, &hashed);
+ if (r < 0)
+ return log_error_errno(r, "Hashing failed: %m");
+
+ full = strjoina(hashed, "._openpgpkey.", domain);
+ log_debug("Looking up \"%s\".", full);
+
+ return resolve_record(bus, full,
+ arg_class ?: DNS_CLASS_IN,
+ arg_type ?: DNS_TYPE_OPENPGPKEY, true);
+ }
+
+ return r;
+}
+
+static int verb_openpgp(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ char **p;
+ int q, r = 0;
+
+ STRV_FOREACH(p, argv + 1) {
+ q = resolve_openpgp(bus, *p);
+ if (q < 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int resolve_tlsa(sd_bus *bus, const char *family, const char *address) {
+ const char *port;
+ uint16_t port_num = 443;
+ _cleanup_free_ char *full = NULL;
+ int r;
+
+ assert(bus);
+ assert(address);
+
+ port = strrchr(address, ':');
+ if (port) {
+ r = parse_ip_port(port + 1, &port_num);
+ if (r < 0)
+ return log_error_errno(r, "Invalid port \"%s\".", port + 1);
+
+ address = strndupa(address, port - address);
+ }
+
+ r = asprintf(&full, "_%u._%s.%s",
+ port_num,
+ family,
+ address);
+ if (r < 0)
+ return log_oom();
+
+ log_debug("Looking up \"%s\".", full);
+
+ return resolve_record(bus, full,
+ arg_class ?: DNS_CLASS_IN,
+ arg_type ?: DNS_TYPE_TLSA, true);
+}
+
+static bool service_family_is_valid(const char *s) {
+ return STR_IN_SET(s, "tcp", "udp", "sctp");
+}
+
+static int verb_tlsa(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ char **p, **args = argv + 1;
+ const char *family = "tcp";
+ int q, r = 0;
+
+ if (service_family_is_valid(argv[1])) {
+ family = argv[1];
+ args++;
+ }
+
+ STRV_FOREACH(p, args) {
+ q = resolve_tlsa(bus, family, *p);
+ if (q < 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int show_statistics(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ sd_bus *bus = userdata;
+ uint64_t n_current_transactions, n_total_transactions,
+ cache_size, n_cache_hit, n_cache_miss,
+ n_dnssec_secure, n_dnssec_insecure, n_dnssec_bogus, n_dnssec_indeterminate;
+ int r, dnssec_supported;
+
+ assert(bus);
+
+ r = sd_bus_get_property_trivial(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "DNSSECSupported",
+ &error,
+ 'b',
+ &dnssec_supported);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get DNSSEC supported state: %s", bus_error_message(&error, r));
+
+ printf("DNSSEC supported by current servers: %s%s%s\n\n",
+ ansi_highlight(),
+ yes_no(dnssec_supported),
+ ansi_normal());
+
+ r = sd_bus_get_property(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "TransactionStatistics",
+ &error,
+ &reply,
+ "(tt)");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get transaction statistics: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "(tt)",
+ &n_current_transactions,
+ &n_total_transactions);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ printf("%sTransactions%s\n"
+ "Current Transactions: %" PRIu64 "\n"
+ " Total Transactions: %" PRIu64 "\n",
+ ansi_highlight(),
+ ansi_normal(),
+ n_current_transactions,
+ n_total_transactions);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_get_property(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "CacheStatistics",
+ &error,
+ &reply,
+ "(ttt)");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cache statistics: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "(ttt)",
+ &cache_size,
+ &n_cache_hit,
+ &n_cache_miss);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ printf("\n%sCache%s\n"
+ " Current Cache Size: %" PRIu64 "\n"
+ " Cache Hits: %" PRIu64 "\n"
+ " Cache Misses: %" PRIu64 "\n",
+ ansi_highlight(),
+ ansi_normal(),
+ cache_size,
+ n_cache_hit,
+ n_cache_miss);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_get_property(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "DNSSECStatistics",
+ &error,
+ &reply,
+ "(tttt)");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get DNSSEC statistics: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "(tttt)",
+ &n_dnssec_secure,
+ &n_dnssec_insecure,
+ &n_dnssec_bogus,
+ &n_dnssec_indeterminate);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ printf("\n%sDNSSEC Verdicts%s\n"
+ " Secure: %" PRIu64 "\n"
+ " Insecure: %" PRIu64 "\n"
+ " Bogus: %" PRIu64 "\n"
+ " Indeterminate: %" PRIu64 "\n",
+ ansi_highlight(),
+ ansi_normal(),
+ n_dnssec_secure,
+ n_dnssec_insecure,
+ n_dnssec_bogus,
+ n_dnssec_indeterminate);
+
+ return 0;
+}
+
+static int reset_statistics(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResetStatistics",
+ &error,
+ NULL,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reset statistics: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int flush_caches(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "FlushCaches",
+ &error,
+ NULL,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to flush caches: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int reset_server_features(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResetServerFeatures",
+ &error,
+ NULL,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reset server features: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int read_dns_server_one(sd_bus_message *m, bool with_ifindex, char **ret) {
+ _cleanup_free_ char *pretty = NULL;
+ int ifindex, family, r;
+ const void *a;
+ size_t sz;
+
+ assert(m);
+ assert(ret);
+
+ r = sd_bus_message_enter_container(m, 'r', with_ifindex ? "iiay" : "iay");
+ if (r <= 0)
+ return r;
+
+ if (with_ifindex) {
+ r = sd_bus_message_read(m, "i", &ifindex);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_read(m, "i", &family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(m, 'y', &a, &sz);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ if (with_ifindex && ifindex != 0) {
+ /* only show the global ones here */
+ *ret = NULL;
+ return 1;
+ }
+
+ if (!IN_SET(family, AF_INET, AF_INET6)) {
+ log_debug("Unexpected family, ignoring: %i", family);
+
+ *ret = NULL;
+ return 1;
+ }
+
+ if (sz != FAMILY_ADDRESS_SIZE(family)) {
+ log_debug("Address size mismatch, ignoring.");
+
+ *ret = NULL;
+ return 1;
+ }
+
+ r = in_addr_to_string(family, a, &pretty);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(pretty);
+
+ return 1;
+}
+
+static int map_link_dns_servers(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ char ***l = userdata;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+ assert(l);
+
+ r = sd_bus_message_enter_container(m, 'a', "(iay)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ char *pretty = NULL;
+
+ r = read_dns_server_one(m, false, &pretty);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (isempty(pretty))
+ continue;
+
+ r = strv_consume(l, pretty);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int map_link_current_dns_server(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ assert(m);
+ assert(userdata);
+
+ return read_dns_server_one(m, false, userdata);
+}
+
+static int read_domain_one(sd_bus_message *m, bool with_ifindex, char **ret) {
+ _cleanup_free_ char *str = NULL;
+ int ifindex, route_only, r;
+ const char *domain;
+
+ assert(m);
+ assert(ret);
+
+ if (with_ifindex)
+ r = sd_bus_message_read(m, "(isb)", &ifindex, &domain, &route_only);
+ else
+ r = sd_bus_message_read(m, "(sb)", &domain, &route_only);
+ if (r <= 0)
+ return r;
+
+ if (with_ifindex && ifindex != 0) {
+ /* only show the global ones here */
+ *ret = NULL;
+ return 1;
+ }
+
+ if (route_only)
+ str = strappend("~", domain);
+ else
+ str = strdup(domain);
+ if (!str)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(str);
+
+ return 1;
+}
+
+static int map_link_domains(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ char ***l = userdata;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+ assert(l);
+
+ r = sd_bus_message_enter_container(m, 'a', "(sb)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ char *pretty = NULL;
+
+ r = read_domain_one(m, false, &pretty);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (isempty(pretty))
+ continue;
+
+ r = strv_consume(l, pretty);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int status_print_strv_ifindex(int ifindex, const char *ifname, char **p) {
+ char **i;
+
+ printf("%sLink %i (%s)%s:",
+ ansi_highlight(), ifindex, ifname, ansi_normal());
+
+ STRV_FOREACH(i, p)
+ printf(" %s", *i);
+
+ printf("\n");
+
+ return 0;
+}
+
+struct link_info {
+ uint64_t scopes_mask;
+ const char *llmnr;
+ const char *mdns;
+ const char *dns_over_tls;
+ const char *dnssec;
+ char *current_dns;
+ char **dns;
+ char **domains;
+ char **ntas;
+ bool dnssec_supported;
+ bool default_route;
+};
+
+static void link_info_clear(struct link_info *p) {
+ free(p->current_dns);
+ strv_free(p->dns);
+ strv_free(p->domains);
+ strv_free(p->ntas);
+}
+
+static int status_ifindex(sd_bus *bus, int ifindex, const char *name, StatusMode mode, bool *empty_line) {
+ static const struct bus_properties_map property_map[] = {
+ { "ScopesMask", "t", NULL, offsetof(struct link_info, scopes_mask) },
+ { "DNS", "a(iay)", map_link_dns_servers, offsetof(struct link_info, dns) },
+ { "CurrentDNSServer", "(iay)", map_link_current_dns_server, offsetof(struct link_info, current_dns) },
+ { "Domains", "a(sb)", map_link_domains, offsetof(struct link_info, domains) },
+ { "DefaultRoute", "b", NULL, offsetof(struct link_info, default_route) },
+ { "LLMNR", "s", NULL, offsetof(struct link_info, llmnr) },
+ { "MulticastDNS", "s", NULL, offsetof(struct link_info, mdns) },
+ { "DNSOverTLS", "s", NULL, offsetof(struct link_info, dns_over_tls) },
+ { "DNSSEC", "s", NULL, offsetof(struct link_info, dnssec) },
+ { "DNSSECNegativeTrustAnchors", "as", NULL, offsetof(struct link_info, ntas) },
+ { "DNSSECSupported", "b", NULL, offsetof(struct link_info, dnssec_supported) },
+ {}
+ };
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(link_info_clear) struct link_info link_info = {};
+ _cleanup_free_ char *ifi = NULL, *p = NULL;
+ char ifname[IF_NAMESIZE] = "";
+ char **i;
+ int r;
+
+ assert(bus);
+ assert(ifindex > 0);
+
+ if (!name) {
+ if (!if_indextoname(ifindex, ifname))
+ return log_error_errno(errno, "Failed to resolve interface name for %i: %m", ifindex);
+
+ name = ifname;
+ }
+
+ if (asprintf(&ifi, "%i", ifindex) < 0)
+ return log_oom();
+
+ r = sd_bus_path_encode("/org/freedesktop/resolve1/link", ifi, &p);
+ if (r < 0)
+ return log_oom();
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.resolve1",
+ p,
+ property_map,
+ BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ &m,
+ &link_info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get link data for %i: %s", ifindex, bus_error_message(&error, r));
+
+ (void) pager_open(arg_pager_flags);
+
+ if (mode == STATUS_DNS)
+ return status_print_strv_ifindex(ifindex, name, link_info.dns);
+
+ if (mode == STATUS_DOMAIN)
+ return status_print_strv_ifindex(ifindex, name, link_info.domains);
+
+ if (mode == STATUS_NTA)
+ return status_print_strv_ifindex(ifindex, name, link_info.ntas);
+
+ if (mode == STATUS_DEFAULT_ROUTE) {
+ printf("%sLink %i (%s)%s: %s\n",
+ ansi_highlight(), ifindex, name, ansi_normal(),
+ yes_no(link_info.default_route));
+
+ return 0;
+ }
+
+ if (mode == STATUS_LLMNR) {
+ printf("%sLink %i (%s)%s: %s\n",
+ ansi_highlight(), ifindex, name, ansi_normal(),
+ strna(link_info.llmnr));
+
+ return 0;
+ }
+
+ if (mode == STATUS_MDNS) {
+ printf("%sLink %i (%s)%s: %s\n",
+ ansi_highlight(), ifindex, name, ansi_normal(),
+ strna(link_info.mdns));
+
+ return 0;
+ }
+
+ if (mode == STATUS_PRIVATE) {
+ printf("%sLink %i (%s)%s: %s\n",
+ ansi_highlight(), ifindex, name, ansi_normal(),
+ strna(link_info.dns_over_tls));
+
+ return 0;
+ }
+
+ if (mode == STATUS_DNSSEC) {
+ printf("%sLink %i (%s)%s: %s\n",
+ ansi_highlight(), ifindex, name, ansi_normal(),
+ strna(link_info.dnssec));
+
+ return 0;
+ }
+
+ if (empty_line && *empty_line)
+ fputc('\n', stdout);
+
+ printf("%sLink %i (%s)%s\n",
+ ansi_highlight(), ifindex, name, ansi_normal());
+
+ if (link_info.scopes_mask == 0)
+ printf(" Current Scopes: none\n");
+ else
+ printf(" Current Scopes:%s%s%s%s%s\n",
+ link_info.scopes_mask & SD_RESOLVED_DNS ? " DNS" : "",
+ link_info.scopes_mask & SD_RESOLVED_LLMNR_IPV4 ? " LLMNR/IPv4" : "",
+ link_info.scopes_mask & SD_RESOLVED_LLMNR_IPV6 ? " LLMNR/IPv6" : "",
+ link_info.scopes_mask & SD_RESOLVED_MDNS_IPV4 ? " mDNS/IPv4" : "",
+ link_info.scopes_mask & SD_RESOLVED_MDNS_IPV6 ? " mDNS/IPv6" : "");
+
+ printf("DefaultRoute setting: %s\n"
+ " LLMNR setting: %s\n"
+ "MulticastDNS setting: %s\n"
+ " DNSOverTLS setting: %s\n"
+ " DNSSEC setting: %s\n"
+ " DNSSEC supported: %s\n",
+ yes_no(link_info.default_route),
+ strna(link_info.llmnr),
+ strna(link_info.mdns),
+ strna(link_info.dns_over_tls),
+ strna(link_info.dnssec),
+ yes_no(link_info.dnssec_supported));
+
+ if (link_info.current_dns)
+ printf(" Current DNS Server: %s\n", link_info.current_dns);
+
+ STRV_FOREACH(i, link_info.dns) {
+ printf(" %s %s\n",
+ i == link_info.dns ? "DNS Servers:" : " ",
+ *i);
+ }
+
+ STRV_FOREACH(i, link_info.domains) {
+ printf(" %s %s\n",
+ i == link_info.domains ? "DNS Domain:" : " ",
+ *i);
+ }
+
+ STRV_FOREACH(i, link_info.ntas) {
+ printf(" %s %s\n",
+ i == link_info.ntas ? "DNSSEC NTA:" : " ",
+ *i);
+ }
+
+ if (empty_line)
+ *empty_line = true;
+
+ return 0;
+}
+
+static int map_global_dns_servers(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ char ***l = userdata;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+ assert(l);
+
+ r = sd_bus_message_enter_container(m, 'a', "(iiay)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ char *pretty = NULL;
+
+ r = read_dns_server_one(m, true, &pretty);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (isempty(pretty))
+ continue;
+
+ r = strv_consume(l, pretty);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int map_global_current_dns_server(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ assert(m);
+ assert(userdata);
+
+ return read_dns_server_one(m, true, userdata);
+}
+
+static int map_global_domains(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ char ***l = userdata;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+ assert(l);
+
+ r = sd_bus_message_enter_container(m, 'a', "(isb)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ char *pretty = NULL;
+
+ r = read_domain_one(m, true, &pretty);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (isempty(pretty))
+ continue;
+
+ r = strv_consume(l, pretty);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int status_print_strv_global(char **p) {
+ char **i;
+
+ printf("%sGlobal%s:", ansi_highlight(), ansi_normal());
+
+ STRV_FOREACH(i, p)
+ printf(" %s", *i);
+
+ printf("\n");
+
+ return 0;
+}
+
+struct global_info {
+ char *current_dns;
+ char **dns;
+ char **fallback_dns;
+ char **domains;
+ char **ntas;
+ const char *llmnr;
+ const char *mdns;
+ const char *dns_over_tls;
+ const char *dnssec;
+ bool dnssec_supported;
+};
+
+static void global_info_clear(struct global_info *p) {
+ free(p->current_dns);
+ strv_free(p->dns);
+ strv_free(p->fallback_dns);
+ strv_free(p->domains);
+ strv_free(p->ntas);
+}
+
+static int status_global(sd_bus *bus, StatusMode mode, bool *empty_line) {
+ static const struct bus_properties_map property_map[] = {
+ { "DNS", "a(iiay)", map_global_dns_servers, offsetof(struct global_info, dns) },
+ { "FallbackDNS", "a(iiay)", map_global_dns_servers, offsetof(struct global_info, fallback_dns) },
+ { "CurrentDNSServer", "(iiay)", map_global_current_dns_server, offsetof(struct global_info, current_dns) },
+ { "Domains", "a(isb)", map_global_domains, offsetof(struct global_info, domains) },
+ { "DNSSECNegativeTrustAnchors", "as", NULL, offsetof(struct global_info, ntas) },
+ { "LLMNR", "s", NULL, offsetof(struct global_info, llmnr) },
+ { "MulticastDNS", "s", NULL, offsetof(struct global_info, mdns) },
+ { "DNSOverTLS", "s", NULL, offsetof(struct global_info, dns_over_tls) },
+ { "DNSSEC", "s", NULL, offsetof(struct global_info, dnssec) },
+ { "DNSSECSupported", "b", NULL, offsetof(struct global_info, dnssec_supported) },
+ {}
+ };
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(global_info_clear) struct global_info global_info = {};
+ char **i;
+ int r;
+
+ assert(bus);
+ assert(empty_line);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ property_map,
+ BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ &m,
+ &global_info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get global data: %s", bus_error_message(&error, r));
+
+ (void) pager_open(arg_pager_flags);
+
+ if (mode == STATUS_DNS)
+ return status_print_strv_global(global_info.dns);
+
+ if (mode == STATUS_DOMAIN)
+ return status_print_strv_global(global_info.domains);
+
+ if (mode == STATUS_NTA)
+ return status_print_strv_global(global_info.ntas);
+
+ if (mode == STATUS_LLMNR) {
+ printf("%sGlobal%s: %s\n", ansi_highlight(), ansi_normal(),
+ strna(global_info.llmnr));
+
+ return 0;
+ }
+
+ if (mode == STATUS_MDNS) {
+ printf("%sGlobal%s: %s\n", ansi_highlight(), ansi_normal(),
+ strna(global_info.mdns));
+
+ return 0;
+ }
+
+ if (mode == STATUS_PRIVATE) {
+ printf("%sGlobal%s: %s\n", ansi_highlight(), ansi_normal(),
+ strna(global_info.dns_over_tls));
+
+ return 0;
+ }
+
+ if (mode == STATUS_DNSSEC) {
+ printf("%sGlobal%s: %s\n", ansi_highlight(), ansi_normal(),
+ strna(global_info.dnssec));
+
+ return 0;
+ }
+
+ printf("%sGlobal%s\n", ansi_highlight(), ansi_normal());
+
+ printf(" LLMNR setting: %s\n"
+ "MulticastDNS setting: %s\n"
+ " DNSOverTLS setting: %s\n"
+ " DNSSEC setting: %s\n"
+ " DNSSEC supported: %s\n",
+ strna(global_info.llmnr),
+ strna(global_info.mdns),
+ strna(global_info.dns_over_tls),
+ strna(global_info.dnssec),
+ yes_no(global_info.dnssec_supported));
+
+ if (global_info.current_dns)
+ printf(" Current DNS Server: %s\n", global_info.current_dns);
+
+ STRV_FOREACH(i, global_info.dns) {
+ printf(" %s %s\n",
+ i == global_info.dns ? "DNS Servers:" : " ",
+ *i);
+ }
+
+ STRV_FOREACH(i, global_info.fallback_dns) {
+ printf("%s %s\n",
+ i == global_info.fallback_dns ? "Fallback DNS Servers:" : " ",
+ *i);
+ }
+
+ STRV_FOREACH(i, global_info.domains) {
+ printf(" %s %s\n",
+ i == global_info.domains ? "DNS Domain:" : " ",
+ *i);
+ }
+
+ strv_sort(global_info.ntas);
+ STRV_FOREACH(i, global_info.ntas) {
+ printf(" %s %s\n",
+ i == global_info.ntas ? "DNSSEC NTA:" : " ",
+ *i);
+ }
+
+ *empty_line = true;
+
+ return 0;
+}
+
+static int status_all(sd_bus *bus, StatusMode mode) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ sd_netlink_message *i;
+ bool empty_line = false;
+ int r;
+
+ assert(bus);
+
+ r = status_global(bus, mode, &empty_line);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_GETLINK, 0);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate links: %m");
+
+ r = 0;
+ for (i = reply; i; i = sd_netlink_message_next(i)) {
+ const char *name;
+ int ifindex, q;
+ uint16_t type;
+
+ q = sd_netlink_message_get_type(i, &type);
+ if (q < 0)
+ return rtnl_log_parse_error(q);
+
+ if (type != RTM_NEWLINK)
+ continue;
+
+ q = sd_rtnl_message_link_get_ifindex(i, &ifindex);
+ if (q < 0)
+ return rtnl_log_parse_error(q);
+
+ if (ifindex == LOOPBACK_IFINDEX)
+ continue;
+
+ q = sd_netlink_message_read_string(i, IFLA_IFNAME, &name);
+ if (q < 0)
+ return rtnl_log_parse_error(q);
+
+ q = status_ifindex(bus, ifindex, name, mode, &empty_line);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int verb_status(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ int q, r = 0;
+
+ if (argc > 1) {
+ char **ifname;
+ bool empty_line = false;
+
+ STRV_FOREACH(ifname, argv + 1) {
+ int ifindex;
+
+ ifindex = parse_ifindex_and_warn(*ifname);
+ if (ifindex < 0)
+ continue;
+
+ q = status_ifindex(bus, ifindex, NULL, STATUS_ALL, &empty_line);
+ if (q < 0)
+ r = q;
+ }
+ } else
+ r = status_all(bus, STATUS_ALL);
+
+ return r;
+}
+
+static int log_interface_is_managed(int r, int ifindex) {
+ char ifname[IFNAMSIZ];
+
+ return log_error_errno(r,
+ "The specified interface %s is managed by systemd-networkd. Operation refused.\n"
+ "Please configure DNS settings for systemd-networkd managed interfaces directly in their .network files.",
+ strna(if_indextoname(ifindex, ifname)));
+}
+
+static int verb_dns(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL;
+ sd_bus *bus = userdata;
+ char **p;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_DNS);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_DNS, NULL);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "SetLinkDNS");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "i", arg_ifindex);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(req, 'a', "(iay)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* If only argument is the empty string, then call SetLinkDNS() with an
+ * empty list, which will clear the list of domains for an interface. */
+ if (!strv_equal(argv + 2, STRV_MAKE(""))) {
+ STRV_FOREACH(p, argv + 2) {
+ struct in_addr_data data;
+
+ r = in_addr_from_string_auto(*p, &data.family, &data.address);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse DNS server address: %s", *p);
+
+ r = sd_bus_message_open_container(req, 'r', "iay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "i", data.family);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(req, 'y', &data.address, FAMILY_ADDRESS_SIZE(data.family));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(req);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+ }
+
+ r = sd_bus_message_close_container(req);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, req, 0, &error, NULL);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY))
+ return log_interface_is_managed(r, arg_ifindex);
+
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set DNS configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_domain(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL;
+ sd_bus *bus = userdata;
+ char **p;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_DOMAIN);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_DOMAIN, NULL);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "SetLinkDomains");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "i", arg_ifindex);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(req, 'a', "(sb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* If only argument is the empty string, then call SetLinkDomains() with an
+ * empty list, which will clear the list of domains for an interface. */
+ if (!strv_equal(argv + 2, STRV_MAKE(""))) {
+ STRV_FOREACH(p, argv + 2) {
+ const char *n;
+
+ n = **p == '~' ? *p + 1 : *p;
+
+ r = dns_name_is_valid(n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to validate specified domain %s: %m", n);
+ if (r == 0) {
+ log_error("Domain not valid: %s", n);
+ return -EINVAL;
+ }
+
+ r = sd_bus_message_append(req, "(sb)", n, **p == '~');
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+ }
+
+ r = sd_bus_message_close_container(req);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, req, 0, &error, NULL);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY))
+ return log_interface_is_managed(r, arg_ifindex);
+
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set domain configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_default_route(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r, b;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_DEFAULT_ROUTE);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_DEFAULT_ROUTE, NULL);
+
+ b = parse_boolean(argv[2]);
+ if (b < 0)
+ return log_error_errno(b, "Failed to parse boolean argument: %s", argv[2]);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "SetLinkDefaultRoute",
+ &error,
+ NULL,
+ "ib", arg_ifindex, b);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY))
+ return log_interface_is_managed(r, arg_ifindex);
+
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set default route configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_llmnr(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_LLMNR);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_LLMNR, NULL);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "SetLinkLLMNR",
+ &error,
+ NULL,
+ "is", arg_ifindex, argv[2]);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY))
+ return log_interface_is_managed(r, arg_ifindex);
+
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set LLMNR configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_mdns(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_MDNS);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_MDNS, NULL);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "SetLinkMulticastDNS",
+ &error,
+ NULL,
+ "is", arg_ifindex, argv[2]);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY))
+ return log_interface_is_managed(r, arg_ifindex);
+
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set MulticastDNS configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_dns_over_tls(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_PRIVATE);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_PRIVATE, NULL);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "SetLinkDNSOverTLS",
+ &error,
+ NULL,
+ "is", arg_ifindex, argv[2]);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY))
+ return log_interface_is_managed(r, arg_ifindex);
+
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set DNSOverTLS configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_dnssec(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_DNSSEC);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_DNSSEC, NULL);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "SetLinkDNSSEC",
+ &error,
+ NULL,
+ "is", arg_ifindex, argv[2]);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY))
+ return log_interface_is_managed(r, arg_ifindex);
+
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set DNSSEC configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_nta(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL;
+ sd_bus *bus = userdata;
+ char **p;
+ int r;
+ bool clear;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_NTA);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_NTA, NULL);
+
+ /* If only argument is the empty string, then call SetLinkDNSSECNegativeTrustAnchors()
+ * with an empty list, which will clear the list of domains for an interface. */
+ clear = strv_equal(argv + 2, STRV_MAKE(""));
+
+ if (!clear)
+ STRV_FOREACH(p, argv + 2) {
+ r = dns_name_is_valid(*p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to validate specified domain %s: %m", *p);
+ if (r == 0) {
+ log_error("Domain not valid: %s", *p);
+ return -EINVAL;
+ }
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "SetLinkDNSSECNegativeTrustAnchors");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "i", arg_ifindex);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(req, clear ? NULL : argv + 2);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, req, 0, &error, NULL);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY))
+ return log_interface_is_managed(r, arg_ifindex);
+
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set DNSSEC NTA configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_revert_link(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Interface argument required.");
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "RevertLink",
+ &error,
+ NULL,
+ "i", arg_ifindex);
+ if (r < 0) {
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to revert interface configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static void help_protocol_types(void) {
+ if (arg_legend)
+ puts("Known protocol types:");
+ puts("dns\nllmnr\nllmnr-ipv4\nllmnr-ipv6\nmdns\nmdns-ipv4\nmdns-ipv6");
+}
+
+static void help_dns_types(void) {
+ if (arg_legend)
+ puts("Known DNS RR types:");
+
+ DUMP_STRING_TABLE(dns_type, int, _DNS_TYPE_MAX);
+}
+
+static void help_dns_classes(void) {
+ if (arg_legend)
+ puts("Known DNS RR classes:");
+
+ DUMP_STRING_TABLE(dns_class, int, _DNS_CLASS_MAX);
+}
+
+static int compat_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("resolvectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s [OPTIONS...] HOSTNAME|ADDRESS...\n"
+ "%1$s [OPTIONS...] --service [[NAME] TYPE] DOMAIN\n"
+ "%1$s [OPTIONS...] --openpgp EMAIL@DOMAIN...\n"
+ "%1$s [OPTIONS...] --statistics\n"
+ "%1$s [OPTIONS...] --reset-statistics\n"
+ "\n"
+ "Resolve domain names, IPv4 and IPv6 addresses, DNS records, and services.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " -4 Resolve IPv4 addresses\n"
+ " -6 Resolve IPv6 addresses\n"
+ " -i --interface=INTERFACE Look on interface\n"
+ " -p --protocol=PROTO|help Look via protocol\n"
+ " -t --type=TYPE|help Query RR with DNS type\n"
+ " -c --class=CLASS|help Query RR with DNS class\n"
+ " --service Resolve service (SRV)\n"
+ " --service-address=BOOL Resolve address for services (default: yes)\n"
+ " --service-txt=BOOL Resolve TXT records for services (default: yes)\n"
+ " --openpgp Query OpenPGP public key\n"
+ " --tlsa Query TLS public key\n"
+ " --cname=BOOL Follow CNAME redirects (default: yes)\n"
+ " --search=BOOL Use search domains for single-label names\n"
+ " (default: yes)\n"
+ " --raw[=payload|packet] Dump the answer as binary data\n"
+ " --legend=BOOL Print headers and additional info (default: yes)\n"
+ " --statistics Show resolver statistics\n"
+ " --reset-statistics Reset resolver statistics\n"
+ " --status Show link and server status\n"
+ " --flush-caches Flush all local DNS caches\n"
+ " --reset-server-features\n"
+ " Forget learnt DNS server feature levels\n"
+ " --set-dns=SERVER Set per-interface DNS server address\n"
+ " --set-domain=DOMAIN Set per-interface search domain\n"
+ " --set-llmnr=MODE Set per-interface LLMNR mode\n"
+ " --set-mdns=MODE Set per-interface MulticastDNS mode\n"
+ " --set-dnsovertls=MODE Set per-interface DNS-over-TLS mode\n"
+ " --set-dnssec=MODE Set per-interface DNSSEC mode\n"
+ " --set-nta=DOMAIN Set per-interface DNSSEC NTA\n"
+ " --revert Revert per-interface configuration\n"
+ "\nSee the %2$s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int native_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("resolvectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s [OPTIONS...] {COMMAND} ...\n"
+ "\n"
+ "Send control commands to the network name resolution manager, or\n"
+ "resolve domain names, IPv4 and IPv6 addresses, DNS records, and services.\n"
+ "\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " -4 Resolve IPv4 addresses\n"
+ " -6 Resolve IPv6 addresses\n"
+ " -i --interface=INTERFACE Look on interface\n"
+ " -p --protocol=PROTO|help Look via protocol\n"
+ " -t --type=TYPE|help Query RR with DNS type\n"
+ " -c --class=CLASS|help Query RR with DNS class\n"
+ " --service-address=BOOL Resolve address for services (default: yes)\n"
+ " --service-txt=BOOL Resolve TXT records for services (default: yes)\n"
+ " --cname=BOOL Follow CNAME redirects (default: yes)\n"
+ " --search=BOOL Use search domains for single-label names\n"
+ " (default: yes)\n"
+ " --raw[=payload|packet] Dump the answer as binary data\n"
+ " --legend=BOOL Print headers and additional info (default: yes)\n"
+ "\n"
+ "Commands:\n"
+ " query HOSTNAME|ADDRESS... Resolve domain names, IPv4 and IPv6 addresses\n"
+ " service [[NAME] TYPE] DOMAIN Resolve service (SRV)\n"
+ " openpgp EMAIL@DOMAIN... Query OpenPGP public key\n"
+ " tlsa DOMAIN[:PORT]... Query TLS public key\n"
+ " status [LINK...] Show link and server status\n"
+ " statistics Show resolver statistics\n"
+ " reset-statistics Reset resolver statistics\n"
+ " flush-caches Flush all local DNS caches\n"
+ " reset-server-features Forget learnt DNS server feature levels\n"
+ " dns [LINK [SERVER...]] Get/set per-interface DNS server address\n"
+ " domain [LINK [DOMAIN...]] Get/set per-interface search domain\n"
+ " default-route [LINK [BOOL]] Get/set per-interface default route flag\n"
+ " llmnr [LINK [MODE]] Get/set per-interface LLMNR mode\n"
+ " mdns [LINK [MODE]] Get/set per-interface MulticastDNS mode\n"
+ " dnsovertls [LINK [MODE]] Get/set per-interface DNS-over-TLS mode\n"
+ " dnssec [LINK [MODE]] Get/set per-interface DNSSEC mode\n"
+ " nta [LINK [DOMAIN...]] Get/set per-interface DNSSEC NTA\n"
+ " revert LINK Revert per-interface configuration\n"
+ "\nSee the %2$s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int verb_help(int argc, char **argv, void *userdata) {
+ return native_help();
+}
+
+static int compat_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_LEGEND,
+ ARG_SERVICE,
+ ARG_CNAME,
+ ARG_SERVICE_ADDRESS,
+ ARG_SERVICE_TXT,
+ ARG_OPENPGP,
+ ARG_TLSA,
+ ARG_RAW,
+ ARG_SEARCH,
+ ARG_STATISTICS,
+ ARG_RESET_STATISTICS,
+ ARG_STATUS,
+ ARG_FLUSH_CACHES,
+ ARG_RESET_SERVER_FEATURES,
+ ARG_NO_PAGER,
+ ARG_SET_DNS,
+ ARG_SET_DOMAIN,
+ ARG_SET_LLMNR,
+ ARG_SET_MDNS,
+ ARG_SET_PRIVATE,
+ ARG_SET_DNSSEC,
+ ARG_SET_NTA,
+ ARG_REVERT_LINK,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "type", required_argument, NULL, 't' },
+ { "class", required_argument, NULL, 'c' },
+ { "legend", required_argument, NULL, ARG_LEGEND },
+ { "interface", required_argument, NULL, 'i' },
+ { "protocol", required_argument, NULL, 'p' },
+ { "cname", required_argument, NULL, ARG_CNAME },
+ { "service", no_argument, NULL, ARG_SERVICE },
+ { "service-address", required_argument, NULL, ARG_SERVICE_ADDRESS },
+ { "service-txt", required_argument, NULL, ARG_SERVICE_TXT },
+ { "openpgp", no_argument, NULL, ARG_OPENPGP },
+ { "tlsa", optional_argument, NULL, ARG_TLSA },
+ { "raw", optional_argument, NULL, ARG_RAW },
+ { "search", required_argument, NULL, ARG_SEARCH },
+ { "statistics", no_argument, NULL, ARG_STATISTICS, },
+ { "reset-statistics", no_argument, NULL, ARG_RESET_STATISTICS },
+ { "status", no_argument, NULL, ARG_STATUS },
+ { "flush-caches", no_argument, NULL, ARG_FLUSH_CACHES },
+ { "reset-server-features", no_argument, NULL, ARG_RESET_SERVER_FEATURES },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "set-dns", required_argument, NULL, ARG_SET_DNS },
+ { "set-domain", required_argument, NULL, ARG_SET_DOMAIN },
+ { "set-llmnr", required_argument, NULL, ARG_SET_LLMNR },
+ { "set-mdns", required_argument, NULL, ARG_SET_MDNS },
+ { "set-dnsovertls", required_argument, NULL, ARG_SET_PRIVATE },
+ { "set-dnssec", required_argument, NULL, ARG_SET_DNSSEC },
+ { "set-nta", required_argument, NULL, ARG_SET_NTA },
+ { "revert", no_argument, NULL, ARG_REVERT_LINK },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h46i:t:c:p:", options, NULL)) >= 0)
+ switch(c) {
+
+ case 'h':
+ return compat_help();
+
+ case ARG_VERSION:
+ return version();
+
+ case '4':
+ arg_family = AF_INET;
+ break;
+
+ case '6':
+ arg_family = AF_INET6;
+ break;
+
+ case 'i':
+ r = ifname_mangle(optarg);
+ if (r < 0)
+ return r;
+ break;
+
+ case 't':
+ if (streq(optarg, "help")) {
+ help_dns_types();
+ return 0;
+ }
+
+ r = dns_type_from_string(optarg);
+ if (r < 0) {
+ log_error("Failed to parse RR record type %s", optarg);
+ return r;
+ }
+ arg_type = (uint16_t) r;
+ assert((int) arg_type == r);
+
+ arg_mode = MODE_RESOLVE_RECORD;
+ break;
+
+ case 'c':
+ if (streq(optarg, "help")) {
+ help_dns_classes();
+ return 0;
+ }
+
+ r = dns_class_from_string(optarg);
+ if (r < 0) {
+ log_error("Failed to parse RR record class %s", optarg);
+ return r;
+ }
+ arg_class = (uint16_t) r;
+ assert((int) arg_class == r);
+
+ break;
+
+ case ARG_LEGEND:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --legend= argument");
+
+ arg_legend = r;
+ break;
+
+ case 'p':
+ if (streq(optarg, "help")) {
+ help_protocol_types();
+ return 0;
+ } else if (streq(optarg, "dns"))
+ arg_flags |= SD_RESOLVED_DNS;
+ else if (streq(optarg, "llmnr"))
+ arg_flags |= SD_RESOLVED_LLMNR;
+ else if (streq(optarg, "llmnr-ipv4"))
+ arg_flags |= SD_RESOLVED_LLMNR_IPV4;
+ else if (streq(optarg, "llmnr-ipv6"))
+ arg_flags |= SD_RESOLVED_LLMNR_IPV6;
+ else if (streq(optarg, "mdns"))
+ arg_flags |= SD_RESOLVED_MDNS;
+ else if (streq(optarg, "mdns-ipv4"))
+ arg_flags |= SD_RESOLVED_MDNS_IPV4;
+ else if (streq(optarg, "mdns-ipv6"))
+ arg_flags |= SD_RESOLVED_MDNS_IPV6;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown protocol specifier: %s", optarg);
+
+ break;
+
+ case ARG_SERVICE:
+ arg_mode = MODE_RESOLVE_SERVICE;
+ break;
+
+ case ARG_OPENPGP:
+ arg_mode = MODE_RESOLVE_OPENPGP;
+ break;
+
+ case ARG_TLSA:
+ arg_mode = MODE_RESOLVE_TLSA;
+ if (!optarg || service_family_is_valid(optarg))
+ arg_service_family = optarg;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown service family \"%s\".", optarg);
+ break;
+
+ case ARG_RAW:
+ if (on_tty())
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY),
+ "Refusing to write binary data to tty.");
+
+ if (optarg == NULL || streq(optarg, "payload"))
+ arg_raw = RAW_PAYLOAD;
+ else if (streq(optarg, "packet"))
+ arg_raw = RAW_PACKET;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown --raw specifier \"%s\".",
+ optarg);
+
+ arg_legend = false;
+ break;
+
+ case ARG_CNAME:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --cname= argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_CNAME, r == 0);
+ break;
+
+ case ARG_SERVICE_ADDRESS:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --service-address= argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_ADDRESS, r == 0);
+ break;
+
+ case ARG_SERVICE_TXT:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --service-txt= argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_TXT, r == 0);
+ break;
+
+ case ARG_SEARCH:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --search argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_SEARCH, r == 0);
+ break;
+
+ case ARG_STATISTICS:
+ arg_mode = MODE_STATISTICS;
+ break;
+
+ case ARG_RESET_STATISTICS:
+ arg_mode = MODE_RESET_STATISTICS;
+ break;
+
+ case ARG_FLUSH_CACHES:
+ arg_mode = MODE_FLUSH_CACHES;
+ break;
+
+ case ARG_RESET_SERVER_FEATURES:
+ arg_mode = MODE_RESET_SERVER_FEATURES;
+ break;
+
+ case ARG_STATUS:
+ arg_mode = MODE_STATUS;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_SET_DNS:
+ r = strv_extend(&arg_set_dns, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_SET_DOMAIN:
+ r = strv_extend(&arg_set_domain, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_SET_LLMNR:
+ arg_set_llmnr = optarg;
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_SET_MDNS:
+ arg_set_mdns = optarg;
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_SET_PRIVATE:
+ arg_set_dns_over_tls = optarg;
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_SET_DNSSEC:
+ arg_set_dnssec = optarg;
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_SET_NTA:
+ r = strv_extend(&arg_set_nta, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_REVERT_LINK:
+ arg_mode = MODE_REVERT_LINK;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_type == 0 && arg_class != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--class= may only be used in conjunction with --type=.");
+
+ if (arg_type != 0 && arg_mode == MODE_RESOLVE_SERVICE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--service and --type= may not be combined.");
+
+ if (arg_type != 0 && arg_class == 0)
+ arg_class = DNS_CLASS_IN;
+
+ if (arg_class != 0 && arg_type == 0)
+ arg_type = DNS_TYPE_A;
+
+ if (IN_SET(arg_mode, MODE_SET_LINK, MODE_REVERT_LINK)) {
+
+ if (arg_ifindex <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--set-dns=, --set-domain=, --set-llmnr=, --set-mdns=, --set-dnsovertls=, --set-dnssec=, --set-nta= and --revert require --interface=.");
+ }
+
+ return 1 /* work to do */;
+}
+
+static int native_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_LEGEND,
+ ARG_CNAME,
+ ARG_SERVICE_ADDRESS,
+ ARG_SERVICE_TXT,
+ ARG_RAW,
+ ARG_SEARCH,
+ ARG_NO_PAGER,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "type", required_argument, NULL, 't' },
+ { "class", required_argument, NULL, 'c' },
+ { "legend", required_argument, NULL, ARG_LEGEND },
+ { "interface", required_argument, NULL, 'i' },
+ { "protocol", required_argument, NULL, 'p' },
+ { "cname", required_argument, NULL, ARG_CNAME },
+ { "service-address", required_argument, NULL, ARG_SERVICE_ADDRESS },
+ { "service-txt", required_argument, NULL, ARG_SERVICE_TXT },
+ { "raw", optional_argument, NULL, ARG_RAW },
+ { "search", required_argument, NULL, ARG_SEARCH },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h46i:t:c:p:", options, NULL)) >= 0)
+ switch(c) {
+
+ case 'h':
+ return native_help();
+
+ case ARG_VERSION:
+ return version();
+
+ case '4':
+ arg_family = AF_INET;
+ break;
+
+ case '6':
+ arg_family = AF_INET6;
+ break;
+
+ case 'i':
+ r = ifname_mangle(optarg);
+ if (r < 0)
+ return r;
+ break;
+
+ case 't':
+ if (streq(optarg, "help")) {
+ help_dns_types();
+ return 0;
+ }
+
+ r = dns_type_from_string(optarg);
+ if (r < 0) {
+ log_error("Failed to parse RR record type %s", optarg);
+ return r;
+ }
+ arg_type = (uint16_t) r;
+ assert((int) arg_type == r);
+
+ break;
+
+ case 'c':
+ if (streq(optarg, "help")) {
+ help_dns_classes();
+ return 0;
+ }
+
+ r = dns_class_from_string(optarg);
+ if (r < 0) {
+ log_error("Failed to parse RR record class %s", optarg);
+ return r;
+ }
+ arg_class = (uint16_t) r;
+ assert((int) arg_class == r);
+
+ break;
+
+ case ARG_LEGEND:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --legend= argument");
+
+ arg_legend = r;
+ break;
+
+ case 'p':
+ if (streq(optarg, "help")) {
+ help_protocol_types();
+ return 0;
+ } else if (streq(optarg, "dns"))
+ arg_flags |= SD_RESOLVED_DNS;
+ else if (streq(optarg, "llmnr"))
+ arg_flags |= SD_RESOLVED_LLMNR;
+ else if (streq(optarg, "llmnr-ipv4"))
+ arg_flags |= SD_RESOLVED_LLMNR_IPV4;
+ else if (streq(optarg, "llmnr-ipv6"))
+ arg_flags |= SD_RESOLVED_LLMNR_IPV6;
+ else if (streq(optarg, "mdns"))
+ arg_flags |= SD_RESOLVED_MDNS;
+ else if (streq(optarg, "mdns-ipv4"))
+ arg_flags |= SD_RESOLVED_MDNS_IPV4;
+ else if (streq(optarg, "mdns-ipv6"))
+ arg_flags |= SD_RESOLVED_MDNS_IPV6;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown protocol specifier: %s",
+ optarg);
+
+ break;
+
+ case ARG_RAW:
+ if (on_tty())
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY),
+ "Refusing to write binary data to tty.");
+
+ if (optarg == NULL || streq(optarg, "payload"))
+ arg_raw = RAW_PAYLOAD;
+ else if (streq(optarg, "packet"))
+ arg_raw = RAW_PACKET;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown --raw specifier \"%s\".",
+ optarg);
+
+ arg_legend = false;
+ break;
+
+ case ARG_CNAME:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --cname= argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_CNAME, r == 0);
+ break;
+
+ case ARG_SERVICE_ADDRESS:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --service-address= argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_ADDRESS, r == 0);
+ break;
+
+ case ARG_SERVICE_TXT:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --service-txt= argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_TXT, r == 0);
+ break;
+
+ case ARG_SEARCH:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --search argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_SEARCH, r == 0);
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_type == 0 && arg_class != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--class= may only be used in conjunction with --type=.");
+
+ if (arg_type != 0 && arg_class == 0)
+ arg_class = DNS_CLASS_IN;
+
+ if (arg_class != 0 && arg_type == 0)
+ arg_type = DNS_TYPE_A;
+
+ return 1 /* work to do */;
+}
+
+static int native_main(int argc, char *argv[], sd_bus *bus) {
+
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, verb_help },
+ { "status", VERB_ANY, VERB_ANY, VERB_DEFAULT, verb_status },
+ { "query", 2, VERB_ANY, 0, verb_query },
+ { "service", 2, 4, 0, verb_service },
+ { "openpgp", 2, VERB_ANY, 0, verb_openpgp },
+ { "tlsa", 2, VERB_ANY, 0, verb_tlsa },
+ { "statistics", VERB_ANY, 1, 0, show_statistics },
+ { "reset-statistics", VERB_ANY, 1, 0, reset_statistics },
+ { "flush-caches", VERB_ANY, 1, 0, flush_caches },
+ { "reset-server-features", VERB_ANY, 1, 0, reset_server_features },
+ { "dns", VERB_ANY, VERB_ANY, 0, verb_dns },
+ { "domain", VERB_ANY, VERB_ANY, 0, verb_domain },
+ { "default-route", VERB_ANY, 3, 0, verb_default_route },
+ { "llmnr", VERB_ANY, 3, 0, verb_llmnr },
+ { "mdns", VERB_ANY, 3, 0, verb_mdns },
+ { "dnsovertls", VERB_ANY, 3, 0, verb_dns_over_tls },
+ { "dnssec", VERB_ANY, 3, 0, verb_dnssec },
+ { "nta", VERB_ANY, VERB_ANY, 0, verb_nta },
+ { "revert", VERB_ANY, 2, 0, verb_revert_link },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, bus);
+}
+
+static int translate(const char *verb, const char *single_arg, size_t num_args, char **args, sd_bus *bus) {
+ char **fake, **p;
+ size_t num, i;
+
+ assert(verb);
+ assert(num_args == 0 || args);
+
+ num = !!single_arg + num_args + 1;
+
+ p = fake = newa0(char *, num + 1);
+ *p++ = (char *) verb;
+ if (single_arg)
+ *p++ = (char *) single_arg;
+ for (i = 0; i < num_args; i++)
+ *p++ = args[i];
+
+ optind = 0;
+ return native_main((int) num, fake, bus);
+}
+
+static int compat_main(int argc, char *argv[], sd_bus *bus) {
+ int r = 0;
+
+ switch (arg_mode) {
+ case MODE_RESOLVE_HOST:
+ case MODE_RESOLVE_RECORD:
+ return translate("query", NULL, argc - optind, argv + optind, bus);
+
+ case MODE_RESOLVE_SERVICE:
+ return translate("service", NULL, argc - optind, argv + optind, bus);
+
+ case MODE_RESOLVE_OPENPGP:
+ return translate("openpgp", NULL, argc - optind, argv + optind, bus);
+
+ case MODE_RESOLVE_TLSA:
+ return translate("tlsa", arg_service_family, argc - optind, argv + optind, bus);
+
+ case MODE_STATISTICS:
+ return translate("statistics", NULL, 0, NULL, bus);
+
+ case MODE_RESET_STATISTICS:
+ return translate("reset-statistics", NULL, 0, NULL, bus);
+
+ case MODE_FLUSH_CACHES:
+ return translate("flush-caches", NULL, 0, NULL, bus);
+
+ case MODE_RESET_SERVER_FEATURES:
+ return translate("reset-server-features", NULL, 0, NULL, bus);
+
+ case MODE_STATUS:
+ return translate("status", NULL, argc - optind, argv + optind, bus);
+
+ case MODE_SET_LINK:
+ assert(arg_ifname);
+
+ if (arg_set_dns) {
+ r = translate("dns", arg_ifname, strv_length(arg_set_dns), arg_set_dns, bus);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_set_domain) {
+ r = translate("domain", arg_ifname, strv_length(arg_set_domain), arg_set_domain, bus);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_set_nta) {
+ r = translate("nta", arg_ifname, strv_length(arg_set_nta), arg_set_nta, bus);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_set_llmnr) {
+ r = translate("llmnr", arg_ifname, 1, (char **) &arg_set_llmnr, bus);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_set_mdns) {
+ r = translate("mdns", arg_ifname, 1, (char **) &arg_set_mdns, bus);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_set_dns_over_tls) {
+ r = translate("dnsovertls", arg_ifname, 1, (char **) &arg_set_dns_over_tls, bus);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_set_dnssec) {
+ r = translate("dnssec", arg_ifname, 1, (char **) &arg_set_dnssec, bus);
+ if (r < 0)
+ return r;
+ }
+
+ return r;
+
+ case MODE_REVERT_LINK:
+ assert(arg_ifname);
+
+ return translate("revert", arg_ifname, 0, NULL, bus);
+
+ case _MODE_INVALID:
+ assert_not_reached("invalid mode");
+ }
+
+ return 0;
+}
+
+static int run(int argc, char **argv) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ if (streq(program_invocation_short_name, "resolvconf"))
+ r = resolvconf_parse_argv(argc, argv);
+ else if (streq(program_invocation_short_name, "systemd-resolve"))
+ r = compat_parse_argv(argc, argv);
+ else
+ r = native_parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "sd_bus_open_system: %m");
+
+ if (STR_IN_SET(program_invocation_short_name, "systemd-resolve", "resolvconf"))
+ return compat_main(argc, argv, bus);
+
+ return native_main(argc, argv, bus);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/resolve/resolvectl.h b/src/resolve/resolvectl.h
new file mode 100644
index 0000000..6b3a1f8
--- /dev/null
+++ b/src/resolve/resolvectl.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <in-addr-util.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+typedef enum ExecutionMode {
+ MODE_RESOLVE_HOST,
+ MODE_RESOLVE_RECORD,
+ MODE_RESOLVE_SERVICE,
+ MODE_RESOLVE_OPENPGP,
+ MODE_RESOLVE_TLSA,
+ MODE_STATISTICS,
+ MODE_RESET_STATISTICS,
+ MODE_FLUSH_CACHES,
+ MODE_RESET_SERVER_FEATURES,
+ MODE_STATUS,
+ MODE_SET_LINK,
+ MODE_REVERT_LINK,
+ _MODE_INVALID = -1,
+} ExecutionMode;
+
+extern ExecutionMode arg_mode;
+extern char **arg_set_dns;
+extern char **arg_set_domain;
+extern bool arg_ifindex_permissive;
+
+int ifname_mangle(const char *s);
diff --git a/src/resolve/resolved-bus.c b/src/resolve/resolved-bus.c
new file mode 100644
index 0000000..5b547ba
--- /dev/null
+++ b/src/resolve/resolved-bus.c
@@ -0,0 +1,1951 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-util.h"
+#include "dns-domain.h"
+#include "missing_capability.h"
+#include "resolved-bus.h"
+#include "resolved-def.h"
+#include "resolved-dns-synthesize.h"
+#include "resolved-dnssd.h"
+#include "resolved-dnssd-bus.h"
+#include "resolved-link-bus.h"
+#include "user-util.h"
+#include "utf8.h"
+
+BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_resolve_support, resolve_support, ResolveSupport);
+
+static int reply_query_state(DnsQuery *q) {
+
+ switch (q->state) {
+
+ case DNS_TRANSACTION_NO_SERVERS:
+ return sd_bus_reply_method_errorf(q->request, BUS_ERROR_NO_NAME_SERVERS, "No appropriate name servers or networks for name found");
+
+ case DNS_TRANSACTION_TIMEOUT:
+ return sd_bus_reply_method_errorf(q->request, SD_BUS_ERROR_TIMEOUT, "Query timed out");
+
+ case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
+ return sd_bus_reply_method_errorf(q->request, SD_BUS_ERROR_TIMEOUT, "All attempts to contact name servers or networks failed");
+
+ case DNS_TRANSACTION_INVALID_REPLY:
+ return sd_bus_reply_method_errorf(q->request, BUS_ERROR_INVALID_REPLY, "Received invalid reply");
+
+ case DNS_TRANSACTION_ERRNO:
+ return sd_bus_reply_method_errnof(q->request, q->answer_errno, "Lookup failed due to system error: %m");
+
+ case DNS_TRANSACTION_ABORTED:
+ return sd_bus_reply_method_errorf(q->request, BUS_ERROR_ABORTED, "Query aborted");
+
+ case DNS_TRANSACTION_DNSSEC_FAILED:
+ return sd_bus_reply_method_errorf(q->request, BUS_ERROR_DNSSEC_FAILED, "DNSSEC validation failed: %s",
+ dnssec_result_to_string(q->answer_dnssec_result));
+
+ case DNS_TRANSACTION_NO_TRUST_ANCHOR:
+ return sd_bus_reply_method_errorf(q->request, BUS_ERROR_NO_TRUST_ANCHOR, "No suitable trust anchor known");
+
+ case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
+ return sd_bus_reply_method_errorf(q->request, BUS_ERROR_RR_TYPE_UNSUPPORTED, "Server does not support requested resource record type");
+
+ case DNS_TRANSACTION_NETWORK_DOWN:
+ return sd_bus_reply_method_errorf(q->request, BUS_ERROR_NETWORK_DOWN, "Network is down");
+
+ case DNS_TRANSACTION_NOT_FOUND:
+ /* We return this as NXDOMAIN. This is only generated when a host doesn't implement LLMNR/TCP, and we
+ * thus quickly know that we cannot resolve an in-addr.arpa or ip6.arpa address. */
+ return sd_bus_reply_method_errorf(q->request, _BUS_ERROR_DNS "NXDOMAIN", "'%s' not found", dns_query_string(q));
+
+ case DNS_TRANSACTION_RCODE_FAILURE: {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ if (q->answer_rcode == DNS_RCODE_NXDOMAIN)
+ sd_bus_error_setf(&error, _BUS_ERROR_DNS "NXDOMAIN", "'%s' not found", dns_query_string(q));
+ else {
+ const char *rc, *n;
+ char p[DECIMAL_STR_MAX(q->answer_rcode)];
+
+ rc = dns_rcode_to_string(q->answer_rcode);
+ if (!rc) {
+ sprintf(p, "%i", q->answer_rcode);
+ rc = p;
+ }
+
+ n = strjoina(_BUS_ERROR_DNS, rc);
+ sd_bus_error_setf(&error, n, "Could not resolve '%s', server or network returned error %s", dns_query_string(q), rc);
+ }
+
+ return sd_bus_reply_method_error(q->request, &error);
+ }
+
+ case DNS_TRANSACTION_NULL:
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ case DNS_TRANSACTION_SUCCESS:
+ default:
+ assert_not_reached("Impossible state");
+ }
+}
+
+static int append_address(sd_bus_message *reply, DnsResourceRecord *rr, int ifindex) {
+ int r;
+
+ assert(reply);
+ assert(rr);
+
+ r = sd_bus_message_open_container(reply, 'r', "iiay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "i", ifindex);
+ if (r < 0)
+ return r;
+
+ if (rr->key->type == DNS_TYPE_A) {
+ r = sd_bus_message_append(reply, "i", AF_INET);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &rr->a.in_addr, sizeof(struct in_addr));
+
+ } else if (rr->key->type == DNS_TYPE_AAAA) {
+ r = sd_bus_message_append(reply, "i", AF_INET6);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &rr->aaaa.in6_addr, sizeof(struct in6_addr));
+ } else
+ return -EAFNOSUPPORT;
+
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void bus_method_resolve_hostname_complete(DnsQuery *q) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *canonical = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *normalized = NULL;
+ DnsResourceRecord *rr;
+ unsigned added = 0;
+ int ifindex, r;
+
+ assert(q);
+
+ if (q->state != DNS_TRANSACTION_SUCCESS) {
+ r = reply_query_state(q);
+ goto finish;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == -ELOOP) {
+ r = sd_bus_reply_method_errorf(q->request, BUS_ERROR_CNAME_LOOP, "CNAME loop detected, or CNAME resolving disabled on '%s'", dns_query_string(q));
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+ if (r == DNS_QUERY_RESTARTED) /* This was a cname, and the query was restarted. */
+ return;
+
+ r = sd_bus_message_new_method_return(q->request, &reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ goto finish;
+
+ DNS_ANSWER_FOREACH_IFINDEX(rr, ifindex, q->answer) {
+ DnsQuestion *question;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ r = dns_question_matches_rr(question, rr, DNS_SEARCH_DOMAIN_NAME(q->answer_search_domain));
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ r = append_address(reply, rr, ifindex);
+ if (r < 0)
+ goto finish;
+
+ if (!canonical)
+ canonical = dns_resource_record_ref(rr);
+
+ added++;
+ }
+
+ if (added <= 0) {
+ r = sd_bus_reply_method_errorf(q->request, BUS_ERROR_NO_SUCH_RR, "'%s' does not have any RR of the requested type", dns_query_string(q));
+ goto finish;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ goto finish;
+
+ /* The key names are not necessarily normalized, make sure that they are when we return them to our bus
+ * clients. */
+ r = dns_name_normalize(dns_resource_key_name(canonical->key), 0, &normalized);
+ if (r < 0)
+ goto finish;
+
+ /* Return the precise spelling and uppercasing and CNAME target reported by the server */
+ assert(canonical);
+ r = sd_bus_message_append(
+ reply, "st",
+ normalized,
+ SD_RESOLVED_FLAGS_MAKE(q->answer_protocol, q->answer_family, dns_query_fully_authenticated(q)));
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_send(q->manager->bus, reply, NULL);
+
+finish:
+ if (r < 0) {
+ log_error_errno(r, "Failed to send hostname reply: %m");
+ sd_bus_reply_method_errno(q->request, r, NULL);
+ }
+
+ dns_query_free(q);
+}
+
+static int check_ifindex_flags(int ifindex, uint64_t *flags, uint64_t ok, sd_bus_error *error) {
+ assert(flags);
+
+ if (ifindex < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid interface index");
+
+ if (*flags & ~(SD_RESOLVED_PROTOCOLS_ALL|SD_RESOLVED_NO_CNAME|ok))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid flags parameter");
+
+ if ((*flags & SD_RESOLVED_PROTOCOLS_ALL) == 0) /* If no protocol is enabled, enable all */
+ *flags |= SD_RESOLVED_PROTOCOLS_ALL;
+
+ return 0;
+}
+
+static int parse_as_address(sd_bus_message *m, int ifindex, const char *hostname, int family, uint64_t flags) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *canonical = NULL;
+ union in_addr_union parsed;
+ int r, ff, parsed_ifindex = 0;
+
+ /* Check if the hostname is actually already an IP address formatted as string. In that case just parse it,
+ * let's not attempt to look it up. */
+
+ r = in_addr_ifindex_from_string_auto(hostname, &ff, &parsed, &parsed_ifindex);
+ if (r < 0) /* not an address */
+ return 0;
+
+ if (family != AF_UNSPEC && ff != family)
+ return sd_bus_reply_method_errorf(m, BUS_ERROR_NO_SUCH_RR, "The specified address is not of the requested family.");
+ if (ifindex > 0 && parsed_ifindex > 0 && parsed_ifindex != ifindex)
+ return sd_bus_reply_method_errorf(m, BUS_ERROR_NO_SUCH_RR, "The specified address interface index does not match requested interface.");
+
+ if (parsed_ifindex > 0)
+ ifindex = parsed_ifindex;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'r', "iiay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "ii", ifindex, ff);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &parsed, FAMILY_ADDRESS_SIZE(ff));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ /* When an IP address is specified we just return it as canonical name, in order to avoid a DNS
+ * look-up. However, we reformat it to make sure it's in a truly canonical form (i.e. on IPv6 the inner
+ * omissions are always done the same way). */
+ r = in_addr_ifindex_to_string(ff, &parsed, ifindex, &canonical);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "st", canonical,
+ SD_RESOLVED_FLAGS_MAKE(dns_synthesize_protocol(flags), ff, true));
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(sd_bus_message_get_bus(m), reply, NULL);
+}
+
+static int bus_method_resolve_hostname(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *question_idna = NULL, *question_utf8 = NULL;
+ Manager *m = userdata;
+ const char *hostname;
+ int family, ifindex;
+ uint64_t flags;
+ DnsQuery *q;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(message, "isit", &ifindex, &hostname, &family, &flags);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6, AF_UNSPEC))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown address family %i", family);
+
+ r = check_ifindex_flags(ifindex, &flags, SD_RESOLVED_NO_SEARCH, error);
+ if (r < 0)
+ return r;
+
+ r = parse_as_address(message, ifindex, hostname, family, flags);
+ if (r != 0)
+ return r;
+
+ r = dns_name_is_valid(hostname);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid hostname '%s'", hostname);
+
+ r = dns_question_new_address(&question_utf8, family, hostname, false);
+ if (r < 0)
+ return r;
+
+ r = dns_question_new_address(&question_idna, family, hostname, true);
+ if (r < 0 && r != -EALREADY)
+ return r;
+
+ r = dns_query_new(m, &q, question_utf8, question_idna ?: question_utf8, ifindex, flags);
+ if (r < 0)
+ return r;
+
+ q->request = sd_bus_message_ref(message);
+ q->request_family = family;
+ q->complete = bus_method_resolve_hostname_complete;
+ q->suppress_unroutable_family = family == AF_UNSPEC;
+
+ r = dns_query_bus_track(q, message);
+ if (r < 0)
+ goto fail;
+
+ r = dns_query_go(q);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ dns_query_free(q);
+ return r;
+}
+
+static void bus_method_resolve_address_complete(DnsQuery *q) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ DnsQuestion *question;
+ DnsResourceRecord *rr;
+ unsigned added = 0;
+ int ifindex, r;
+
+ assert(q);
+
+ if (q->state != DNS_TRANSACTION_SUCCESS) {
+ r = reply_query_state(q);
+ goto finish;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == -ELOOP) {
+ r = sd_bus_reply_method_errorf(q->request, BUS_ERROR_CNAME_LOOP, "CNAME loop detected, or CNAME resolving disabled on '%s'", dns_query_string(q));
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+ if (r == DNS_QUERY_RESTARTED) /* This was a cname, and the query was restarted. */
+ return;
+
+ r = sd_bus_message_new_method_return(q->request, &reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_open_container(reply, 'a', "(is)");
+ if (r < 0)
+ goto finish;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ DNS_ANSWER_FOREACH_IFINDEX(rr, ifindex, q->answer) {
+ _cleanup_free_ char *normalized = NULL;
+
+ r = dns_question_matches_rr(question, rr, NULL);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ r = dns_name_normalize(rr->ptr.name, 0, &normalized);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_append(reply, "(is)", ifindex, normalized);
+ if (r < 0)
+ goto finish;
+
+ added++;
+ }
+
+ if (added <= 0) {
+ _cleanup_free_ char *ip = NULL;
+
+ (void) in_addr_to_string(q->request_family, &q->request_address, &ip);
+ r = sd_bus_reply_method_errorf(q->request, BUS_ERROR_NO_SUCH_RR,
+ "Address '%s' does not have any RR of requested type", strnull(ip));
+ goto finish;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_append(reply, "t", SD_RESOLVED_FLAGS_MAKE(q->answer_protocol, q->answer_family, dns_query_fully_authenticated(q)));
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_send(q->manager->bus, reply, NULL);
+
+finish:
+ if (r < 0) {
+ log_error_errno(r, "Failed to send address reply: %m");
+ sd_bus_reply_method_errno(q->request, r, NULL);
+ }
+
+ dns_query_free(q);
+}
+
+static int bus_method_resolve_address(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *question = NULL;
+ Manager *m = userdata;
+ int family, ifindex;
+ uint64_t flags;
+ const void *d;
+ DnsQuery *q;
+ size_t sz;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(message, "ii", &ifindex, &family);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown address family %i", family);
+
+ r = sd_bus_message_read_array(message, 'y', &d, &sz);
+ if (r < 0)
+ return r;
+
+ if (sz != FAMILY_ADDRESS_SIZE(family))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid address size");
+
+ r = sd_bus_message_read(message, "t", &flags);
+ if (r < 0)
+ return r;
+
+ r = check_ifindex_flags(ifindex, &flags, 0, error);
+ if (r < 0)
+ return r;
+
+ r = dns_question_new_reverse(&question, family, d);
+ if (r < 0)
+ return r;
+
+ r = dns_query_new(m, &q, question, question, ifindex, flags|SD_RESOLVED_NO_SEARCH);
+ if (r < 0)
+ return r;
+
+ q->request = sd_bus_message_ref(message);
+ q->request_family = family;
+ memcpy(&q->request_address, d, sz);
+ q->complete = bus_method_resolve_address_complete;
+
+ r = dns_query_bus_track(q, message);
+ if (r < 0)
+ goto fail;
+
+ r = dns_query_go(q);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ dns_query_free(q);
+ return r;
+}
+
+static int bus_message_append_rr(sd_bus_message *m, DnsResourceRecord *rr, int ifindex) {
+ int r;
+
+ assert(m);
+ assert(rr);
+
+ r = sd_bus_message_open_container(m, 'r', "iqqay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "iqq",
+ ifindex,
+ rr->key->class,
+ rr->key->type);
+ if (r < 0)
+ return r;
+
+ r = dns_resource_record_to_wire_format(rr, false);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(m, 'y', rr->wire_format, rr->wire_format_size);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(m);
+}
+
+static void bus_method_resolve_record_complete(DnsQuery *q) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ DnsResourceRecord *rr;
+ DnsQuestion *question;
+ unsigned added = 0;
+ int ifindex;
+ int r;
+
+ assert(q);
+
+ if (q->state != DNS_TRANSACTION_SUCCESS) {
+ r = reply_query_state(q);
+ goto finish;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == -ELOOP) {
+ r = sd_bus_reply_method_errorf(q->request, BUS_ERROR_CNAME_LOOP, "CNAME loop detected, or CNAME resolving disabled on '%s'", dns_query_string(q));
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+ if (r == DNS_QUERY_RESTARTED) /* This was a cname, and the query was restarted. */
+ return;
+
+ r = sd_bus_message_new_method_return(q->request, &reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iqqay)");
+ if (r < 0)
+ goto finish;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ DNS_ANSWER_FOREACH_IFINDEX(rr, ifindex, q->answer) {
+ r = dns_question_matches_rr(question, rr, NULL);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ r = bus_message_append_rr(reply, rr, ifindex);
+ if (r < 0)
+ goto finish;
+
+ added++;
+ }
+
+ if (added <= 0) {
+ r = sd_bus_reply_method_errorf(q->request, BUS_ERROR_NO_SUCH_RR, "Name '%s' does not have any RR of the requested type", dns_query_string(q));
+ goto finish;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_append(reply, "t", SD_RESOLVED_FLAGS_MAKE(q->answer_protocol, q->answer_family, dns_query_fully_authenticated(q)));
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_send(q->manager->bus, reply, NULL);
+
+finish:
+ if (r < 0) {
+ log_error_errno(r, "Failed to send record reply: %m");
+ sd_bus_reply_method_errno(q->request, r, NULL);
+ }
+
+ dns_query_free(q);
+}
+
+static int bus_method_resolve_record(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ _cleanup_(dns_question_unrefp) DnsQuestion *question = NULL;
+ Manager *m = userdata;
+ uint16_t class, type;
+ const char *name;
+ int r, ifindex;
+ uint64_t flags;
+ DnsQuery *q;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(message, "isqqt", &ifindex, &name, &class, &type, &flags);
+ if (r < 0)
+ return r;
+
+ r = dns_name_is_valid(name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid name '%s'", name);
+
+ if (!dns_type_is_valid_query(type))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Specified resource record type %" PRIu16 " may not be used in a query.", type);
+ if (dns_type_is_zone_transer(type))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Zone transfers not permitted via this programming interface.");
+ if (dns_type_is_obsolete(type))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Specified DNS resource record type %" PRIu16 " is obsolete.", type);
+
+ r = check_ifindex_flags(ifindex, &flags, 0, error);
+ if (r < 0)
+ return r;
+
+ question = dns_question_new(1);
+ if (!question)
+ return -ENOMEM;
+
+ key = dns_resource_key_new(class, type, name);
+ if (!key)
+ return -ENOMEM;
+
+ r = dns_question_add(question, key);
+ if (r < 0)
+ return r;
+
+ r = dns_query_new(m, &q, question, question, ifindex, flags|SD_RESOLVED_NO_SEARCH);
+ if (r < 0)
+ return r;
+
+ /* Let's request that the TTL is fixed up for locally cached entries, after all we return it in the wire format
+ * blob */
+ q->clamp_ttl = true;
+
+ q->request = sd_bus_message_ref(message);
+ q->complete = bus_method_resolve_record_complete;
+
+ r = dns_query_bus_track(q, message);
+ if (r < 0)
+ goto fail;
+
+ r = dns_query_go(q);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ dns_query_free(q);
+ return r;
+}
+
+static int append_srv(DnsQuery *q, sd_bus_message *reply, DnsResourceRecord *rr) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *canonical = NULL;
+ _cleanup_free_ char *normalized = NULL;
+ DnsQuery *aux;
+ int r;
+
+ assert(q);
+ assert(reply);
+ assert(rr);
+ assert(rr->key);
+
+ if (rr->key->type != DNS_TYPE_SRV)
+ return 0;
+
+ if ((q->flags & SD_RESOLVED_NO_ADDRESS) == 0) {
+ /* First, let's see if we could find an appropriate A or AAAA
+ * record for the SRV record */
+ LIST_FOREACH(auxiliary_queries, aux, q->auxiliary_queries) {
+ DnsResourceRecord *zz;
+ DnsQuestion *question;
+
+ if (aux->state != DNS_TRANSACTION_SUCCESS)
+ continue;
+ if (aux->auxiliary_result != 0)
+ continue;
+
+ question = dns_query_question_for_protocol(aux, aux->answer_protocol);
+
+ r = dns_name_equal(dns_question_first_name(question), rr->srv.name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ DNS_ANSWER_FOREACH(zz, aux->answer) {
+
+ r = dns_question_matches_rr(question, zz, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ canonical = dns_resource_record_ref(zz);
+ break;
+ }
+
+ if (canonical)
+ break;
+ }
+
+ /* Is there are successful A/AAAA lookup for this SRV RR? If not, don't add it */
+ if (!canonical)
+ return 0;
+ }
+
+ r = sd_bus_message_open_container(reply, 'r', "qqqsa(iiay)s");
+ if (r < 0)
+ return r;
+
+ r = dns_name_normalize(rr->srv.name, 0, &normalized);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(
+ reply,
+ "qqqs",
+ rr->srv.priority, rr->srv.weight, rr->srv.port, normalized);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ return r;
+
+ if ((q->flags & SD_RESOLVED_NO_ADDRESS) == 0) {
+ LIST_FOREACH(auxiliary_queries, aux, q->auxiliary_queries) {
+ DnsResourceRecord *zz;
+ DnsQuestion *question;
+ int ifindex;
+
+ if (aux->state != DNS_TRANSACTION_SUCCESS)
+ continue;
+ if (aux->auxiliary_result != 0)
+ continue;
+
+ question = dns_query_question_for_protocol(aux, aux->answer_protocol);
+
+ r = dns_name_equal(dns_question_first_name(question), rr->srv.name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ DNS_ANSWER_FOREACH_IFINDEX(zz, ifindex, aux->answer) {
+
+ r = dns_question_matches_rr(question, zz, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = append_address(reply, zz, ifindex);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ if (canonical) {
+ normalized = mfree(normalized);
+
+ r = dns_name_normalize(dns_resource_key_name(canonical->key), 0, &normalized);
+ if (r < 0)
+ return r;
+ }
+
+ /* Note that above we appended the hostname as encoded in the
+ * SRV, and here the canonical hostname this maps to. */
+ r = sd_bus_message_append(reply, "s", normalized);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int append_txt(sd_bus_message *reply, DnsResourceRecord *rr) {
+ DnsTxtItem *i;
+ int r;
+
+ assert(reply);
+ assert(rr);
+ assert(rr->key);
+
+ if (rr->key->type != DNS_TYPE_TXT)
+ return 0;
+
+ LIST_FOREACH(items, i, rr->txt.items) {
+
+ if (i->length <= 0)
+ continue;
+
+ r = sd_bus_message_append_array(reply, 'y', i->data, i->length);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+static void resolve_service_all_complete(DnsQuery *q) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *canonical = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *name = NULL, *type = NULL, *domain = NULL;
+ DnsQuestion *question;
+ DnsResourceRecord *rr;
+ unsigned added = 0;
+ DnsQuery *aux;
+ int r;
+
+ assert(q);
+
+ if (q->block_all_complete > 0)
+ return;
+
+ if ((q->flags & SD_RESOLVED_NO_ADDRESS) == 0) {
+ DnsQuery *bad = NULL;
+ bool have_success = false;
+
+ LIST_FOREACH(auxiliary_queries, aux, q->auxiliary_queries) {
+
+ switch (aux->state) {
+
+ case DNS_TRANSACTION_PENDING:
+ /* If an auxiliary query is still pending, let's wait */
+ return;
+
+ case DNS_TRANSACTION_SUCCESS:
+ if (aux->auxiliary_result == 0)
+ have_success = true;
+ else
+ bad = aux;
+ break;
+
+ default:
+ bad = aux;
+ break;
+ }
+ }
+
+ if (!have_success) {
+ /* We can only return one error, hence pick the last error we encountered */
+
+ assert(bad);
+
+ if (bad->state == DNS_TRANSACTION_SUCCESS) {
+ assert(bad->auxiliary_result != 0);
+
+ if (bad->auxiliary_result == -ELOOP) {
+ r = sd_bus_reply_method_errorf(q->request, BUS_ERROR_CNAME_LOOP, "CNAME loop detected, or CNAME resolving disabled on '%s'", dns_query_string(bad));
+ goto finish;
+ }
+
+ r = bad->auxiliary_result;
+ goto finish;
+ }
+
+ r = reply_query_state(bad);
+ goto finish;
+ }
+ }
+
+ r = sd_bus_message_new_method_return(q->request, &reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_open_container(reply, 'a', "(qqqsa(iiay)s)");
+ if (r < 0)
+ goto finish;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+ DNS_ANSWER_FOREACH(rr, q->answer) {
+ r = dns_question_matches_rr(question, rr, NULL);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ r = append_srv(q, reply, rr);
+ if (r < 0)
+ goto finish;
+ if (r == 0) /* not an SRV record */
+ continue;
+
+ if (!canonical)
+ canonical = dns_resource_record_ref(rr);
+
+ added++;
+ }
+
+ if (added <= 0) {
+ r = sd_bus_reply_method_errorf(q->request, BUS_ERROR_NO_SUCH_RR, "'%s' does not have any RR of the requested type", dns_query_string(q));
+ goto finish;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_open_container(reply, 'a', "ay");
+ if (r < 0)
+ goto finish;
+
+ DNS_ANSWER_FOREACH(rr, q->answer) {
+ r = dns_question_matches_rr(question, rr, NULL);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ r = append_txt(reply, rr);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ goto finish;
+
+ assert(canonical);
+ r = dns_service_split(dns_resource_key_name(canonical->key), &name, &type, &domain);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_append(
+ reply,
+ "ssst",
+ name, type, domain,
+ SD_RESOLVED_FLAGS_MAKE(q->answer_protocol, q->answer_family, dns_query_fully_authenticated(q)));
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_send(q->manager->bus, reply, NULL);
+
+finish:
+ if (r < 0) {
+ log_error_errno(r, "Failed to send service reply: %m");
+ sd_bus_reply_method_errno(q->request, r, NULL);
+ }
+
+ dns_query_free(q);
+}
+
+static void resolve_service_hostname_complete(DnsQuery *q) {
+ int r;
+
+ assert(q);
+ assert(q->auxiliary_for);
+
+ if (q->state != DNS_TRANSACTION_SUCCESS) {
+ resolve_service_all_complete(q->auxiliary_for);
+ return;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == DNS_QUERY_RESTARTED) /* This was a cname, and the query was restarted. */
+ return;
+
+ /* This auxiliary lookup is finished or failed, let's see if all are finished now. */
+ q->auxiliary_result = r;
+ resolve_service_all_complete(q->auxiliary_for);
+}
+
+static int resolve_service_hostname(DnsQuery *q, DnsResourceRecord *rr, int ifindex) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *question = NULL;
+ DnsQuery *aux;
+ int r;
+
+ assert(q);
+ assert(rr);
+ assert(rr->key);
+ assert(rr->key->type == DNS_TYPE_SRV);
+
+ /* OK, we found an SRV record for the service. Let's resolve
+ * the hostname included in it */
+
+ r = dns_question_new_address(&question, q->request_family, rr->srv.name, false);
+ if (r < 0)
+ return r;
+
+ r = dns_query_new(q->manager, &aux, question, question, ifindex, q->flags|SD_RESOLVED_NO_SEARCH);
+ if (r < 0)
+ return r;
+
+ aux->request_family = q->request_family;
+ aux->complete = resolve_service_hostname_complete;
+
+ r = dns_query_make_auxiliary(aux, q);
+ if (r == -EAGAIN) {
+ /* Too many auxiliary lookups? If so, don't complain,
+ * let's just not add this one, we already have more
+ * than enough */
+
+ dns_query_free(aux);
+ return 0;
+ }
+ if (r < 0)
+ goto fail;
+
+ /* Note that auxiliary queries do not track the original bus
+ * client, only the primary request does that. */
+
+ r = dns_query_go(aux);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ dns_query_free(aux);
+ return r;
+}
+
+static void bus_method_resolve_service_complete(DnsQuery *q) {
+ bool has_root_domain = false;
+ DnsResourceRecord *rr;
+ DnsQuestion *question;
+ unsigned found = 0;
+ int ifindex, r;
+
+ assert(q);
+
+ if (q->state != DNS_TRANSACTION_SUCCESS) {
+ r = reply_query_state(q);
+ goto finish;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == -ELOOP) {
+ r = sd_bus_reply_method_errorf(q->request, BUS_ERROR_CNAME_LOOP, "CNAME loop detected, or CNAME resolving disabled on '%s'", dns_query_string(q));
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+ if (r == DNS_QUERY_RESTARTED) /* This was a cname, and the query was restarted. */
+ return;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ DNS_ANSWER_FOREACH_IFINDEX(rr, ifindex, q->answer) {
+ r = dns_question_matches_rr(question, rr, NULL);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ if (rr->key->type != DNS_TYPE_SRV)
+ continue;
+
+ if (dns_name_is_root(rr->srv.name)) {
+ has_root_domain = true;
+ continue;
+ }
+
+ if ((q->flags & SD_RESOLVED_NO_ADDRESS) == 0) {
+ q->block_all_complete++;
+ r = resolve_service_hostname(q, rr, ifindex);
+ q->block_all_complete--;
+
+ if (r < 0)
+ goto finish;
+ }
+
+ found++;
+ }
+
+ if (has_root_domain && found <= 0) {
+ /* If there's exactly one SRV RR and it uses
+ * the root domain as host name, then the
+ * service is explicitly not offered on the
+ * domain. Report this as a recognizable
+ * error. See RFC 2782, Section "Usage
+ * Rules". */
+ r = sd_bus_reply_method_errorf(q->request, BUS_ERROR_NO_SUCH_SERVICE, "'%s' does not provide the requested service", dns_query_string(q));
+ goto finish;
+ }
+
+ if (found <= 0) {
+ r = sd_bus_reply_method_errorf(q->request, BUS_ERROR_NO_SUCH_RR, "'%s' does not have any RR of the requested type", dns_query_string(q));
+ goto finish;
+ }
+
+ /* Maybe we are already finished? check now... */
+ resolve_service_all_complete(q);
+ return;
+
+finish:
+ if (r < 0) {
+ log_error_errno(r, "Failed to send service reply: %m");
+ sd_bus_reply_method_errno(q->request, r, NULL);
+ }
+
+ dns_query_free(q);
+}
+
+static int bus_method_resolve_service(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *question_idna = NULL, *question_utf8 = NULL;
+ const char *name, *type, *domain;
+ Manager *m = userdata;
+ int family, ifindex;
+ uint64_t flags;
+ DnsQuery *q;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(message, "isssit", &ifindex, &name, &type, &domain, &family, &flags);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6, AF_UNSPEC))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown address family %i", family);
+
+ if (isempty(name))
+ name = NULL;
+ else if (!dns_service_name_is_valid(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid service name '%s'", name);
+
+ if (isempty(type))
+ type = NULL;
+ else if (!dns_srv_type_is_valid(type))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid SRV service type '%s'", type);
+
+ r = dns_name_is_valid(domain);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid domain '%s'", domain);
+
+ if (name && !type)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Service name cannot be specified without service type.");
+
+ r = check_ifindex_flags(ifindex, &flags, SD_RESOLVED_NO_TXT|SD_RESOLVED_NO_ADDRESS, error);
+ if (r < 0)
+ return r;
+
+ r = dns_question_new_service(&question_utf8, name, type, domain, !(flags & SD_RESOLVED_NO_TXT), false);
+ if (r < 0)
+ return r;
+
+ r = dns_question_new_service(&question_idna, name, type, domain, !(flags & SD_RESOLVED_NO_TXT), true);
+ if (r < 0)
+ return r;
+
+ r = dns_query_new(m, &q, question_utf8, question_idna, ifindex, flags|SD_RESOLVED_NO_SEARCH);
+ if (r < 0)
+ return r;
+
+ q->request = sd_bus_message_ref(message);
+ q->request_family = family;
+ q->complete = bus_method_resolve_service_complete;
+
+ r = dns_query_bus_track(q, message);
+ if (r < 0)
+ goto fail;
+
+ r = dns_query_go(q);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ dns_query_free(q);
+ return r;
+}
+
+int bus_dns_server_append(sd_bus_message *reply, DnsServer *s, bool with_ifindex) {
+ int r;
+
+ assert(reply);
+
+ if (!s) {
+ if (with_ifindex)
+ return sd_bus_message_append(reply, "(iiay)", 0, AF_UNSPEC, 0);
+ else
+ return sd_bus_message_append(reply, "(iay)", AF_UNSPEC, 0);
+ }
+
+ r = sd_bus_message_open_container(reply, 'r', with_ifindex ? "iiay" : "iay");
+ if (r < 0)
+ return r;
+
+ if (with_ifindex) {
+ r = sd_bus_message_append(reply, "i", dns_server_ifindex(s));
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_append(reply, "i", s->family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &s->address, FAMILY_ADDRESS_SIZE(s->family));
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int bus_property_get_dns_servers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ DnsServer *s;
+ Iterator i;
+ Link *l;
+ int r;
+
+ assert(reply);
+ assert(m);
+
+ r = sd_bus_message_open_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(servers, s, m->dns_servers) {
+ r = bus_dns_server_append(reply, s, true);
+ if (r < 0)
+ return r;
+ }
+
+ HASHMAP_FOREACH(l, m->links, i) {
+ LIST_FOREACH(servers, s, l->dns_servers) {
+ r = bus_dns_server_append(reply, s, true);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int bus_property_get_fallback_dns_servers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ DnsServer *s, **f = userdata;
+ int r;
+
+ assert(reply);
+ assert(f);
+
+ r = sd_bus_message_open_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(servers, s, *f) {
+ r = bus_dns_server_append(reply, s, true);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int bus_property_get_current_dns_server(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ DnsServer *s;
+
+ assert(reply);
+ assert(userdata);
+
+ s = *(DnsServer **) userdata;
+
+ return bus_dns_server_append(reply, s, true);
+}
+
+static int bus_property_get_domains(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ DnsSearchDomain *d;
+ Iterator i;
+ Link *l;
+ int r;
+
+ assert(reply);
+ assert(m);
+
+ r = sd_bus_message_open_container(reply, 'a', "(isb)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(domains, d, m->search_domains) {
+ r = sd_bus_message_append(reply, "(isb)", 0, d->name, d->route_only);
+ if (r < 0)
+ return r;
+ }
+
+ HASHMAP_FOREACH(l, m->links, i) {
+ LIST_FOREACH(domains, d, l->search_domains) {
+ r = sd_bus_message_append(reply, "(isb)", l->ifindex, d->name, d->route_only);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int bus_property_get_transaction_statistics(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+
+ assert(reply);
+ assert(m);
+
+ return sd_bus_message_append(reply, "(tt)",
+ (uint64_t) hashmap_size(m->dns_transactions),
+ (uint64_t) m->n_transactions_total);
+}
+
+static int bus_property_get_cache_statistics(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t size = 0, hit = 0, miss = 0;
+ Manager *m = userdata;
+ DnsScope *s;
+
+ assert(reply);
+ assert(m);
+
+ LIST_FOREACH(scopes, s, m->dns_scopes) {
+ size += dns_cache_size(&s->cache);
+ hit += s->cache.n_hit;
+ miss += s->cache.n_miss;
+ }
+
+ return sd_bus_message_append(reply, "(ttt)", size, hit, miss);
+}
+
+static int bus_property_get_dnssec_statistics(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+
+ assert(reply);
+ assert(m);
+
+ return sd_bus_message_append(reply, "(tttt)",
+ (uint64_t) m->n_dnssec_verdict[DNSSEC_SECURE],
+ (uint64_t) m->n_dnssec_verdict[DNSSEC_INSECURE],
+ (uint64_t) m->n_dnssec_verdict[DNSSEC_BOGUS],
+ (uint64_t) m->n_dnssec_verdict[DNSSEC_INDETERMINATE]);
+}
+
+static int bus_property_get_ntas(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ const char *domain;
+ Iterator i;
+ int r;
+
+ assert(reply);
+ assert(m);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(domain, m->trust_anchor.negative_by_name, i) {
+ r = sd_bus_message_append(reply, "s", domain);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_dns_stub_listener_mode, dns_stub_listener_mode, DnsStubListenerMode);
+static BUS_DEFINE_PROPERTY_GET(bus_property_get_dnssec_supported, "b", Manager, manager_dnssec_supported);
+static BUS_DEFINE_PROPERTY_GET2(bus_property_get_dnssec_mode, "s", Manager, manager_get_dnssec_mode, dnssec_mode_to_string);
+static BUS_DEFINE_PROPERTY_GET2(bus_property_get_dns_over_tls_mode, "s", Manager, manager_get_dns_over_tls_mode, dns_over_tls_mode_to_string);
+
+static int bus_method_reset_statistics(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ DnsScope *s;
+
+ assert(message);
+ assert(m);
+
+ LIST_FOREACH(scopes, s, m->dns_scopes)
+ s->cache.n_hit = s->cache.n_miss = 0;
+
+ m->n_transactions_total = 0;
+ zero(m->n_dnssec_verdict);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int get_any_link(Manager *m, int ifindex, Link **ret, sd_bus_error *error) {
+ Link *l;
+
+ assert(m);
+ assert(ret);
+
+ if (ifindex <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid interface index");
+
+ l = hashmap_get(m->links, INT_TO_PTR(ifindex));
+ if (!l)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_LINK, "Link %i not known", ifindex);
+
+ *ret = l;
+ return 0;
+}
+
+static int call_link_method(Manager *m, sd_bus_message *message, sd_bus_message_handler_t handler, sd_bus_error *error) {
+ int ifindex, r;
+ Link *l;
+
+ assert(m);
+ assert(message);
+ assert(handler);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+ r = sd_bus_message_read(message, "i", &ifindex);
+ if (r < 0)
+ return r;
+
+ r = get_any_link(m, ifindex, &l, error);
+ if (r < 0)
+ return r;
+
+ return handler(message, l, error);
+}
+
+static int bus_method_set_link_dns_servers(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dns_servers, error);
+}
+
+static int bus_method_set_link_domains(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_domains, error);
+}
+
+static int bus_method_set_link_default_route(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_default_route, error);
+}
+
+static int bus_method_set_link_llmnr(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_llmnr, error);
+}
+
+static int bus_method_set_link_mdns(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_mdns, error);
+}
+
+static int bus_method_set_link_dns_over_tls(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dns_over_tls, error);
+}
+
+static int bus_method_set_link_dnssec(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dnssec, error);
+}
+
+static int bus_method_set_link_dnssec_negative_trust_anchors(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dnssec_negative_trust_anchors, error);
+}
+
+static int bus_method_revert_link(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_revert, error);
+}
+
+static int bus_method_get_link(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ int r, ifindex;
+ Link *l;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+ r = sd_bus_message_read(message, "i", &ifindex);
+ if (r < 0)
+ return r;
+
+ r = get_any_link(m, ifindex, &l, error);
+ if (r < 0)
+ return r;
+
+ p = link_bus_path(l);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int bus_method_flush_caches(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ assert(message);
+ assert(m);
+
+ manager_flush_caches(m);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int bus_method_reset_server_features(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ assert(message);
+ assert(m);
+
+ manager_reset_server_features(m);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int on_bus_track(sd_bus_track *t, void *userdata) {
+ DnssdService *s = userdata;
+
+ assert(t);
+ assert(s);
+
+ log_debug("Client of active request vanished, destroying DNS-SD service.");
+ dnssd_service_free(s);
+
+ return 0;
+}
+
+static int bus_method_register_service(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ _cleanup_(dnssd_service_freep) DnssdService *service = NULL;
+ _cleanup_(sd_bus_track_unrefp) sd_bus_track *bus_track = NULL;
+ _cleanup_free_ char *path = NULL;
+ _cleanup_free_ char *instance_name = NULL;
+ Manager *m = userdata;
+ DnssdService *s = NULL;
+ const char *name;
+ const char *name_template;
+ const char *type;
+ uid_t euid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ if (m->mdns_support != RESOLVE_SUPPORT_YES)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Support for MulticastDNS is disabled");
+
+ r = bus_verify_polkit_async(message, CAP_SYS_ADMIN,
+ "org.freedesktop.resolve1.register-service",
+ NULL, false, UID_INVALID,
+ &m->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ service = new0(DnssdService, 1);
+ if (!service)
+ return log_oom();
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &euid);
+ if (r < 0)
+ return r;
+ service->originator = euid;
+
+ r = sd_bus_message_read(message, "sssqqq", &name, &name_template, &type,
+ &service->port, &service->priority,
+ &service->weight);
+ if (r < 0)
+ return r;
+
+ s = hashmap_get(m->dnssd_services, name);
+ if (s)
+ return sd_bus_error_setf(error, BUS_ERROR_DNSSD_SERVICE_EXISTS, "DNS-SD service '%s' exists already", name);
+
+ if (!dnssd_srv_type_is_valid(type))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "DNS-SD service type '%s' is invalid", type);
+
+ service->name = strdup(name);
+ if (!service->name)
+ return log_oom();
+
+ service->name_template = strdup(name_template);
+ if (!service->name_template)
+ return log_oom();
+
+ service->type = strdup(type);
+ if (!service->type)
+ return log_oom();
+
+ r = dnssd_render_instance_name(service, &instance_name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(message, SD_BUS_TYPE_ARRAY, "a{say}");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(message, SD_BUS_TYPE_ARRAY, "{say}")) > 0) {
+ _cleanup_(dnssd_txtdata_freep) DnssdTxtData *txt_data = NULL;
+ DnsTxtItem *last = NULL;
+
+ txt_data = new0(DnssdTxtData, 1);
+ if (!txt_data)
+ return log_oom();
+
+ while ((r = sd_bus_message_enter_container(message, SD_BUS_TYPE_DICT_ENTRY, "say")) > 0) {
+ const char *key;
+ const void *value;
+ size_t size;
+ DnsTxtItem *i;
+
+ r = sd_bus_message_read(message, "s", &key);
+ if (r < 0)
+ return r;
+
+ if (isempty(key))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Keys in DNS-SD TXT RRs can't be empty");
+
+ if (!ascii_is_valid(key))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "TXT key '%s' contains non-ASCII symbols", key);
+
+ r = sd_bus_message_read_array(message, 'y', &value, &size);
+ if (r < 0)
+ return r;
+
+ r = dnssd_txt_item_new_from_data(key, value, size, &i);
+ if (r < 0)
+ return r;
+
+ LIST_INSERT_AFTER(items, txt_data->txt, last, i);
+ last = i;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (txt_data->txt) {
+ LIST_PREPEND(items, service->txt_data_items, txt_data);
+ txt_data = NULL;
+ }
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!service->txt_data_items) {
+ _cleanup_(dnssd_txtdata_freep) DnssdTxtData *txt_data = NULL;
+
+ txt_data = new0(DnssdTxtData, 1);
+ if (!txt_data)
+ return log_oom();
+
+ r = dns_txt_item_new_empty(&txt_data->txt);
+ if (r < 0)
+ return r;
+
+ LIST_PREPEND(items, service->txt_data_items, txt_data);
+ txt_data = NULL;
+ }
+
+ r = sd_bus_path_encode("/org/freedesktop/resolve1/dnssd", service->name, &path);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&m->dnssd_services, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->dnssd_services, service->name, service);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_track_new(sd_bus_message_get_bus(message), &bus_track, on_bus_track, service);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_track_add_sender(bus_track, message);
+ if (r < 0)
+ return r;
+
+ service->manager = m;
+
+ service = NULL;
+
+ manager_refresh_rrs(m);
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
+static int call_dnssd_method(Manager *m, sd_bus_message *message, sd_bus_message_handler_t handler, sd_bus_error *error) {
+ _cleanup_free_ char *name = NULL;
+ DnssdService *s = NULL;
+ const char *path;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(handler);
+
+ r = sd_bus_message_read(message, "o", &path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/resolve1/dnssd", &name);
+ if (r == 0)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DNSSD_SERVICE, "DNS-SD service with object path '%s' does not exist", path);
+ if (r < 0)
+ return r;
+
+ s = hashmap_get(m->dnssd_services, name);
+ if (!s)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DNSSD_SERVICE, "DNS-SD service '%s' not known", name);
+
+ return handler(message, s, error);
+}
+
+static int bus_method_unregister_service(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ assert(message);
+ assert(m);
+
+ return call_dnssd_method(m, message, bus_dnssd_method_unregister, error);
+}
+
+static const sd_bus_vtable resolve_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("LLMNRHostname", "s", NULL, offsetof(Manager, llmnr_hostname), 0),
+ SD_BUS_PROPERTY("LLMNR", "s", bus_property_get_resolve_support, offsetof(Manager, llmnr_support), 0),
+ SD_BUS_PROPERTY("MulticastDNS", "s", bus_property_get_resolve_support, offsetof(Manager, mdns_support), 0),
+ SD_BUS_PROPERTY("DNSOverTLS", "s", bus_property_get_dns_over_tls_mode, 0, 0),
+ SD_BUS_PROPERTY("DNS", "a(iiay)", bus_property_get_dns_servers, 0, 0),
+ SD_BUS_PROPERTY("FallbackDNS", "a(iiay)", bus_property_get_fallback_dns_servers, offsetof(Manager, fallback_dns_servers), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CurrentDNSServer", "(iiay)", bus_property_get_current_dns_server, offsetof(Manager, current_dns_server), 0),
+ SD_BUS_PROPERTY("Domains", "a(isb)", bus_property_get_domains, 0, 0),
+ SD_BUS_PROPERTY("TransactionStatistics", "(tt)", bus_property_get_transaction_statistics, 0, 0),
+ SD_BUS_PROPERTY("CacheStatistics", "(ttt)", bus_property_get_cache_statistics, 0, 0),
+ SD_BUS_PROPERTY("DNSSEC", "s", bus_property_get_dnssec_mode, 0, 0),
+ SD_BUS_PROPERTY("DNSSECStatistics", "(tttt)", bus_property_get_dnssec_statistics, 0, 0),
+ SD_BUS_PROPERTY("DNSSECSupported", "b", bus_property_get_dnssec_supported, 0, 0),
+ SD_BUS_PROPERTY("DNSSECNegativeTrustAnchors", "as", bus_property_get_ntas, 0, 0),
+ SD_BUS_PROPERTY("DNSStubListener", "s", bus_property_get_dns_stub_listener_mode, offsetof(Manager, dns_stub_listener_mode), 0),
+
+ SD_BUS_METHOD("ResolveHostname", "isit", "a(iiay)st", bus_method_resolve_hostname, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ResolveAddress", "iiayt", "a(is)t", bus_method_resolve_address, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ResolveRecord", "isqqt", "a(iqqay)t", bus_method_resolve_record, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ResolveService", "isssit", "a(qqqsa(iiay)s)aayssst", bus_method_resolve_service, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ResetStatistics", NULL, NULL, bus_method_reset_statistics, 0),
+ SD_BUS_METHOD("FlushCaches", NULL, NULL, bus_method_flush_caches, 0),
+ SD_BUS_METHOD("ResetServerFeatures", NULL, NULL, bus_method_reset_server_features, 0),
+ SD_BUS_METHOD("GetLink", "i", "o", bus_method_get_link, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLinkDNS", "ia(iay)", NULL, bus_method_set_link_dns_servers, 0),
+ SD_BUS_METHOD("SetLinkDomains", "ia(sb)", NULL, bus_method_set_link_domains, 0),
+ SD_BUS_METHOD("SetLinkDefaultRoute", "ib", NULL, bus_method_set_link_default_route, 0),
+ SD_BUS_METHOD("SetLinkLLMNR", "is", NULL, bus_method_set_link_llmnr, 0),
+ SD_BUS_METHOD("SetLinkMulticastDNS", "is", NULL, bus_method_set_link_mdns, 0),
+ SD_BUS_METHOD("SetLinkDNSOverTLS", "is", NULL, bus_method_set_link_dns_over_tls, 0),
+ SD_BUS_METHOD("SetLinkDNSSEC", "is", NULL, bus_method_set_link_dnssec, 0),
+ SD_BUS_METHOD("SetLinkDNSSECNegativeTrustAnchors", "ias", NULL, bus_method_set_link_dnssec_negative_trust_anchors, 0),
+ SD_BUS_METHOD("RevertLink", "i", NULL, bus_method_revert_link, 0),
+
+ SD_BUS_METHOD("RegisterService", "sssqqqaa{say}", "o", bus_method_register_service, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("UnregisterService", "o", NULL, bus_method_unregister_service, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END,
+};
+
+static int match_prepare_for_sleep(sd_bus_message *message, void *userdata, sd_bus_error *ret_error) {
+ Manager *m = userdata;
+ int b, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse PrepareForSleep signal: %m");
+ return 0;
+ }
+
+ if (b)
+ return 0;
+
+ log_debug("Coming back from suspend, verifying all RRs...");
+
+ manager_verify_all(m);
+ return 0;
+}
+
+int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->bus)
+ return 0;
+
+ r = bus_open_system_watch_bind_with_description(&m->bus, "bus-api-resolve");
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_add_object_vtable(m->bus, NULL, "/org/freedesktop/resolve1", "org.freedesktop.resolve1.Manager", resolve_vtable, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register object: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/resolve1/link", "org.freedesktop.resolve1.Link", link_vtable, link_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register link objects: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/resolve1/link", link_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register link enumerator: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/resolve1/dnssd", "org.freedesktop.resolve1.DnssdService", dnssd_vtable, dnssd_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register dnssd objects: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/resolve1/dnssd", dnssd_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register dnssd enumerator: %m");
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.resolve1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "PrepareForSleep",
+ match_prepare_for_sleep,
+ NULL,
+ m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to request match for PrepareForSleep, ignoring: %m");
+
+ return 0;
+}
diff --git a/src/resolve/resolved-bus.h b/src/resolve/resolved-bus.h
new file mode 100644
index 0000000..a52bb0f
--- /dev/null
+++ b/src/resolve/resolved-bus.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "resolved-manager.h"
+
+int manager_connect_bus(Manager *m);
+int bus_dns_server_append(sd_bus_message *reply, DnsServer *s, bool with_ifindex);
+int bus_property_get_resolve_support(sd_bus *bus, const char *path, const char *interface,
+ const char *property, sd_bus_message *reply,
+ void *userdata, sd_bus_error *error);
diff --git a/src/resolve/resolved-conf.c b/src/resolve/resolved-conf.c
new file mode 100644
index 0000000..79e388f
--- /dev/null
+++ b/src/resolve/resolved-conf.c
@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "extract-word.h"
+#include "hexdecoct.h"
+#include "parse-util.h"
+#include "resolved-conf.h"
+#include "resolved-dnssd.h"
+#include "specifier.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "utf8.h"
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dns_stub_listener_mode, dns_stub_listener_mode, DnsStubListenerMode, "Failed to parse DNS stub listener mode setting");
+
+static const char* const dns_stub_listener_mode_table[_DNS_STUB_LISTENER_MODE_MAX] = {
+ [DNS_STUB_LISTENER_NO] = "no",
+ [DNS_STUB_LISTENER_UDP] = "udp",
+ [DNS_STUB_LISTENER_TCP] = "tcp",
+ [DNS_STUB_LISTENER_YES] = "yes",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode, DnsStubListenerMode, DNS_STUB_LISTENER_YES);
+
+int manager_add_dns_server_by_string(Manager *m, DnsServerType type, const char *word) {
+ union in_addr_union address;
+ int family, r, ifindex = 0;
+ DnsServer *s;
+
+ assert(m);
+ assert(word);
+
+ r = in_addr_ifindex_from_string_auto(word, &family, &address, &ifindex);
+ if (r < 0)
+ return r;
+
+ /* Silently filter out 0.0.0.0 and 127.0.0.53 (our own stub DNS listener) */
+ if (!dns_server_address_valid(family, &address))
+ return 0;
+
+ /* Filter out duplicates */
+ s = dns_server_find(manager_get_first_dns_server(m, type), family, &address, ifindex);
+ if (s) {
+ /*
+ * Drop the marker. This is used to find the servers
+ * that ceased to exist, see
+ * manager_mark_dns_servers() and
+ * manager_flush_marked_dns_servers().
+ */
+ dns_server_move_back_and_unmark(s);
+ return 0;
+ }
+
+ return dns_server_new(m, NULL, type, NULL, family, &address, ifindex);
+}
+
+int manager_parse_dns_server_string_and_warn(Manager *m, DnsServerType type, const char *string) {
+ int r;
+
+ assert(m);
+ assert(string);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = manager_add_dns_server_by_string(m, type, word);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add DNS server address '%s', ignoring: %m", word);
+ }
+
+ return 0;
+}
+
+int manager_add_search_domain_by_string(Manager *m, const char *domain) {
+ DnsSearchDomain *d;
+ bool route_only;
+ int r;
+
+ assert(m);
+ assert(domain);
+
+ route_only = *domain == '~';
+ if (route_only)
+ domain++;
+
+ if (dns_name_is_root(domain) || streq(domain, "*")) {
+ route_only = true;
+ domain = ".";
+ }
+
+ r = dns_search_domain_find(m->search_domains, domain, &d);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ dns_search_domain_move_back_and_unmark(d);
+ else {
+ r = dns_search_domain_new(m, &d, DNS_SEARCH_DOMAIN_SYSTEM, NULL, domain);
+ if (r < 0)
+ return r;
+ }
+
+ d->route_only = route_only;
+ return 0;
+}
+
+int manager_parse_search_domains_and_warn(Manager *m, const char *string) {
+ int r;
+
+ assert(m);
+ assert(string);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&string, &word, NULL, EXTRACT_QUOTES);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = manager_add_search_domain_by_string(m, word);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add search domain '%s', ignoring: %m", word);
+ }
+
+ return 0;
+}
+
+int config_parse_dns_servers(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Manager *m = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(m);
+
+ if (isempty(rvalue))
+ /* Empty assignment means clear the list */
+ dns_server_unlink_all(manager_get_first_dns_server(m, ltype));
+ else {
+ /* Otherwise, add to the list */
+ r = manager_parse_dns_server_string_and_warn(m, ltype, rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse DNS server string '%s'. Ignoring.", rvalue);
+ return 0;
+ }
+ }
+
+ /* If we have a manual setting, then we stop reading
+ * /etc/resolv.conf */
+ if (ltype == DNS_SERVER_SYSTEM)
+ m->read_resolv_conf = false;
+ if (ltype == DNS_SERVER_FALLBACK)
+ m->need_builtin_fallbacks = false;
+
+ return 0;
+}
+
+int config_parse_search_domains(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Manager *m = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(m);
+
+ if (isempty(rvalue))
+ /* Empty assignment means clear the list */
+ dns_search_domain_unlink_all(m->search_domains);
+ else {
+ /* Otherwise, add to the list */
+ r = manager_parse_search_domains_and_warn(m, rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse search domains string '%s'. Ignoring.", rvalue);
+ return 0;
+ }
+ }
+
+ /* If we have a manual setting, then we stop reading
+ * /etc/resolv.conf */
+ m->read_resolv_conf = false;
+
+ return 0;
+}
+
+int config_parse_dnssd_service_name(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata) {
+ static const Specifier specifier_table[] = {
+ { 'b', specifier_boot_id, NULL },
+ { 'H', specifier_host_name, NULL },
+ { 'm', specifier_machine_id, NULL },
+ { 'v', specifier_kernel_release, NULL },
+ {}
+ };
+ DnssdService *s = userdata;
+ _cleanup_free_ char *name = NULL;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(s);
+
+ if (isempty(rvalue)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Service instance name can't be empty. Ignoring.");
+ return -EINVAL;
+ }
+
+ r = free_and_strdup(&s->name_template, rvalue);
+ if (r < 0)
+ return log_oom();
+
+ r = specifier_printf(s->name_template, specifier_table, NULL, &name);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to replace specifiers: %m");
+
+ if (!dns_service_name_is_valid(name)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Service instance name template renders to invalid name '%s'. Ignoring.", name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int config_parse_dnssd_service_type(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata) {
+ DnssdService *s = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(s);
+
+ if (isempty(rvalue)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Service type can't be empty. Ignoring.");
+ return -EINVAL;
+ }
+
+ if (!dnssd_srv_type_is_valid(rvalue)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Service type is invalid. Ignoring.");
+ return -EINVAL;
+ }
+
+ r = free_and_strdup(&s->type, rvalue);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_dnssd_txt(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata) {
+ _cleanup_(dnssd_txtdata_freep) DnssdTxtData *txt_data = NULL;
+ DnssdService *s = userdata;
+ DnsTxtItem *last = NULL;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(s);
+
+ if (isempty(rvalue)) {
+ /* Flush out collected items */
+ s->txt_data_items = dnssd_txtdata_free_all(s->txt_data_items);
+ return 0;
+ }
+
+ txt_data = new0(DnssdTxtData, 1);
+ if (!txt_data)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ _cleanup_free_ char *key = NULL;
+ _cleanup_free_ char *value = NULL;
+ _cleanup_free_ void *decoded = NULL;
+ size_t length = 0;
+ DnsTxtItem *i;
+ int r;
+
+ r = extract_first_word(&rvalue, &word, NULL,
+ EXTRACT_QUOTES|EXTRACT_CUNESCAPE|EXTRACT_CUNESCAPE_RELAX);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+
+ r = split_pair(word, "=", &key, &value);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r == -EINVAL)
+ key = TAKE_PTR(word);
+
+ if (!ascii_is_valid(key)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid syntax, ignoring: %s", key);
+ return -EINVAL;
+ }
+
+ switch (ltype) {
+
+ case DNS_TXT_ITEM_DATA:
+ if (value) {
+ r = unbase64mem(value, strlen(value), &decoded, &length);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r,
+ "Invalid base64 encoding, ignoring: %s", value);
+ }
+
+ r = dnssd_txt_item_new_from_data(key, decoded, length, &i);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case DNS_TXT_ITEM_TEXT:
+ r = dnssd_txt_item_new_from_string(key, value, &i);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ default:
+ assert_not_reached("Unknown type of Txt config");
+ }
+
+ LIST_INSERT_AFTER(items, txt_data->txt, last, i);
+ last = i;
+ }
+
+ if (!LIST_IS_EMPTY(txt_data->txt)) {
+ LIST_PREPEND(items, s->txt_data_items, txt_data);
+ txt_data = NULL;
+ }
+
+ return 0;
+}
+
+int manager_parse_config_file(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = config_parse_many_nulstr(PKGSYSCONFDIR "/resolved.conf",
+ CONF_PATHS_NULSTR("systemd/resolved.conf.d"),
+ "Resolve\0",
+ config_item_perf_lookup, resolved_gperf_lookup,
+ CONFIG_PARSE_WARN, m);
+ if (r < 0)
+ return r;
+
+ if (m->need_builtin_fallbacks) {
+ r = manager_parse_dns_server_string_and_warn(m, DNS_SERVER_FALLBACK, DNS_SERVERS);
+ if (r < 0)
+ return r;
+ }
+
+#if ! HAVE_GCRYPT
+ if (m->dnssec_mode != DNSSEC_NO) {
+ log_warning("DNSSEC option cannot be enabled or set to allow-downgrade when systemd-resolved is built without gcrypt support. Turning off DNSSEC support.");
+ m->dnssec_mode = DNSSEC_NO;
+ }
+#endif
+
+#if ! ENABLE_DNS_OVER_TLS
+ if (m->dns_over_tls_mode != DNS_OVER_TLS_NO) {
+ log_warning("DNS-over-TLS option cannot be set to opportunistic when systemd-resolved is built without DNS-over-TLS support. Turning off DNS-over-TLS support.");
+ m->dns_over_tls_mode = DNS_OVER_TLS_NO;
+ }
+#endif
+ return 0;
+
+}
diff --git a/src/resolve/resolved-conf.h b/src/resolve/resolved-conf.h
new file mode 100644
index 0000000..f6efa17
--- /dev/null
+++ b/src/resolve/resolved-conf.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "conf-parser.h"
+
+typedef enum DnsStubListenerMode DnsStubListenerMode;
+
+enum DnsStubListenerMode {
+ DNS_STUB_LISTENER_NO,
+ DNS_STUB_LISTENER_UDP,
+ DNS_STUB_LISTENER_TCP,
+ DNS_STUB_LISTENER_YES,
+ _DNS_STUB_LISTENER_MODE_MAX,
+ _DNS_STUB_LISTENER_MODE_INVALID = -1
+};
+
+#include "resolved-manager.h"
+#include "resolved-dns-server.h"
+
+int manager_parse_config_file(Manager *m);
+
+int manager_add_search_domain_by_string(Manager *m, const char *domain);
+int manager_parse_search_domains_and_warn(Manager *m, const char *string);
+
+int manager_add_dns_server_by_string(Manager *m, DnsServerType type, const char *word);
+int manager_parse_dns_server_string_and_warn(Manager *m, DnsServerType type, const char *string);
+
+const struct ConfigPerfItem* resolved_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+const struct ConfigPerfItem* resolved_dnssd_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_dns_servers);
+CONFIG_PARSER_PROTOTYPE(config_parse_search_domains);
+CONFIG_PARSER_PROTOTYPE(config_parse_dns_stub_listener_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_dnssd_service_name);
+CONFIG_PARSER_PROTOTYPE(config_parse_dnssd_service_type);
+CONFIG_PARSER_PROTOTYPE(config_parse_dnssd_txt);
+
+const char* dns_stub_listener_mode_to_string(DnsStubListenerMode p) _const_;
+DnsStubListenerMode dns_stub_listener_mode_from_string(const char *s) _pure_;
diff --git a/src/resolve/resolved-def.h b/src/resolve/resolved-def.h
new file mode 100644
index 0000000..ea2851b
--- /dev/null
+++ b/src/resolve/resolved-def.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <inttypes.h>
+
+#include "time-util.h"
+
+#define SD_RESOLVED_DNS (UINT64_C(1) << 0)
+#define SD_RESOLVED_LLMNR_IPV4 (UINT64_C(1) << 1)
+#define SD_RESOLVED_LLMNR_IPV6 (UINT64_C(1) << 2)
+#define SD_RESOLVED_MDNS_IPV4 (UINT64_C(1) << 3)
+#define SD_RESOLVED_MDNS_IPV6 (UINT64_C(1) << 4)
+#define SD_RESOLVED_NO_CNAME (UINT64_C(1) << 5)
+#define SD_RESOLVED_NO_TXT (UINT64_C(1) << 6)
+#define SD_RESOLVED_NO_ADDRESS (UINT64_C(1) << 7)
+#define SD_RESOLVED_NO_SEARCH (UINT64_C(1) << 8)
+#define SD_RESOLVED_AUTHENTICATED (UINT64_C(1) << 9)
+
+#define SD_RESOLVED_LLMNR (SD_RESOLVED_LLMNR_IPV4|SD_RESOLVED_LLMNR_IPV6)
+#define SD_RESOLVED_MDNS (SD_RESOLVED_MDNS_IPV4|SD_RESOLVED_MDNS_IPV6)
+
+#define SD_RESOLVED_PROTOCOLS_ALL (SD_RESOLVED_MDNS|SD_RESOLVED_LLMNR|SD_RESOLVED_DNS)
+
+#define SD_RESOLVED_QUERY_TIMEOUT_USEC (120 * USEC_PER_SEC)
+
+/* 127.0.0.53 in native endian */
+#define INADDR_DNS_STUB ((in_addr_t) 0x7f000035U)
diff --git a/src/resolve/resolved-dns-answer.c b/src/resolve/resolved-dns-answer.c
new file mode 100644
index 0000000..d7252d3
--- /dev/null
+++ b/src/resolve/resolved-dns-answer.c
@@ -0,0 +1,757 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "resolved-dns-answer.h"
+#include "resolved-dns-dnssec.h"
+#include "string-util.h"
+
+DnsAnswer *dns_answer_new(size_t n) {
+ DnsAnswer *a;
+
+ a = malloc0(offsetof(DnsAnswer, items) + sizeof(DnsAnswerItem) * n);
+ if (!a)
+ return NULL;
+
+ a->n_ref = 1;
+ a->n_allocated = n;
+
+ return a;
+}
+
+static void dns_answer_flush(DnsAnswer *a) {
+ DnsResourceRecord *rr;
+
+ if (!a)
+ return;
+
+ DNS_ANSWER_FOREACH(rr, a)
+ dns_resource_record_unref(rr);
+
+ a->n_rrs = 0;
+}
+
+static DnsAnswer *dns_answer_free(DnsAnswer *a) {
+ assert(a);
+
+ dns_answer_flush(a);
+ return mfree(a);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsAnswer, dns_answer, dns_answer_free);
+
+static int dns_answer_add_raw(DnsAnswer *a, DnsResourceRecord *rr, int ifindex, DnsAnswerFlags flags) {
+ assert(rr);
+
+ if (!a)
+ return -ENOSPC;
+
+ if (a->n_rrs >= a->n_allocated)
+ return -ENOSPC;
+
+ a->items[a->n_rrs++] = (DnsAnswerItem) {
+ .rr = dns_resource_record_ref(rr),
+ .ifindex = ifindex,
+ .flags = flags,
+ };
+
+ return 1;
+}
+
+static int dns_answer_add_raw_all(DnsAnswer *a, DnsAnswer *source) {
+ DnsResourceRecord *rr;
+ DnsAnswerFlags flags;
+ int ifindex, r;
+
+ DNS_ANSWER_FOREACH_FULL(rr, ifindex, flags, source) {
+ r = dns_answer_add_raw(a, rr, ifindex, flags);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_answer_add(DnsAnswer *a, DnsResourceRecord *rr, int ifindex, DnsAnswerFlags flags) {
+ size_t i;
+ int r;
+
+ assert(rr);
+
+ if (!a)
+ return -ENOSPC;
+ if (a->n_ref > 1)
+ return -EBUSY;
+
+ for (i = 0; i < a->n_rrs; i++) {
+ if (a->items[i].ifindex != ifindex)
+ continue;
+
+ r = dns_resource_record_equal(a->items[i].rr, rr);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Don't mix contradicting TTLs (see below) */
+ if ((rr->ttl == 0) != (a->items[i].rr->ttl == 0))
+ return -EINVAL;
+
+ /* Entry already exists, keep the entry with
+ * the higher RR. */
+ if (rr->ttl > a->items[i].rr->ttl) {
+ dns_resource_record_ref(rr);
+ dns_resource_record_unref(a->items[i].rr);
+ a->items[i].rr = rr;
+ }
+
+ a->items[i].flags |= flags;
+ return 0;
+ }
+
+ r = dns_resource_key_equal(a->items[i].rr->key, rr->key);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* There's already an RR of the same RRset in
+ * place! Let's see if the TTLs more or less
+ * match. We don't really care if they match
+ * precisely, but we do care whether one is 0
+ * and the other is not. See RFC 2181, Section
+ * 5.2. */
+
+ if ((rr->ttl == 0) != (a->items[i].rr->ttl == 0))
+ return -EINVAL;
+ }
+ }
+
+ return dns_answer_add_raw(a, rr, ifindex, flags);
+}
+
+static int dns_answer_add_all(DnsAnswer *a, DnsAnswer *b) {
+ DnsResourceRecord *rr;
+ DnsAnswerFlags flags;
+ int ifindex, r;
+
+ DNS_ANSWER_FOREACH_FULL(rr, ifindex, flags, b) {
+ r = dns_answer_add(a, rr, ifindex, flags);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_answer_add_extend(DnsAnswer **a, DnsResourceRecord *rr, int ifindex, DnsAnswerFlags flags) {
+ int r;
+
+ assert(a);
+ assert(rr);
+
+ r = dns_answer_reserve_or_clone(a, 1);
+ if (r < 0)
+ return r;
+
+ return dns_answer_add(*a, rr, ifindex, flags);
+}
+
+int dns_answer_add_soa(DnsAnswer *a, const char *name, uint32_t ttl, int ifindex) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *soa = NULL;
+
+ soa = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_SOA, name);
+ if (!soa)
+ return -ENOMEM;
+
+ soa->ttl = ttl;
+
+ soa->soa.mname = strdup(name);
+ if (!soa->soa.mname)
+ return -ENOMEM;
+
+ soa->soa.rname = strappend("root.", name);
+ if (!soa->soa.rname)
+ return -ENOMEM;
+
+ soa->soa.serial = 1;
+ soa->soa.refresh = 1;
+ soa->soa.retry = 1;
+ soa->soa.expire = 1;
+ soa->soa.minimum = ttl;
+
+ return dns_answer_add(a, soa, ifindex, DNS_ANSWER_AUTHENTICATED);
+}
+
+int dns_answer_match_key(DnsAnswer *a, const DnsResourceKey *key, DnsAnswerFlags *ret_flags) {
+ DnsAnswerFlags flags = 0, i_flags;
+ DnsResourceRecord *i;
+ bool found = false;
+ int r;
+
+ assert(key);
+
+ DNS_ANSWER_FOREACH_FLAGS(i, i_flags, a) {
+ r = dns_resource_key_match_rr(key, i, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (!ret_flags)
+ return 1;
+
+ if (found)
+ flags &= i_flags;
+ else {
+ flags = i_flags;
+ found = true;
+ }
+ }
+
+ if (ret_flags)
+ *ret_flags = flags;
+
+ return found;
+}
+
+int dns_answer_contains_nsec_or_nsec3(DnsAnswer *a) {
+ DnsResourceRecord *i;
+
+ DNS_ANSWER_FOREACH(i, a) {
+ if (IN_SET(i->key->type, DNS_TYPE_NSEC, DNS_TYPE_NSEC3))
+ return true;
+ }
+
+ return false;
+}
+
+int dns_answer_contains_zone_nsec3(DnsAnswer *answer, const char *zone) {
+ DnsResourceRecord *rr;
+ int r;
+
+ /* Checks whether the specified answer contains at least one NSEC3 RR in the specified zone */
+
+ DNS_ANSWER_FOREACH(rr, answer) {
+ const char *p;
+
+ if (rr->key->type != DNS_TYPE_NSEC3)
+ continue;
+
+ p = dns_resource_key_name(rr->key);
+ r = dns_name_parent(&p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dns_name_equal(p, zone);
+ if (r != 0)
+ return r;
+ }
+
+ return false;
+}
+
+int dns_answer_find_soa(DnsAnswer *a, const DnsResourceKey *key, DnsResourceRecord **ret, DnsAnswerFlags *flags) {
+ DnsResourceRecord *rr, *soa = NULL;
+ DnsAnswerFlags rr_flags, soa_flags = 0;
+ int r;
+
+ assert(key);
+
+ /* For a SOA record we can never find a matching SOA record */
+ if (key->type == DNS_TYPE_SOA)
+ return 0;
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, rr_flags, a) {
+ r = dns_resource_key_match_soa(key, rr->key);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+
+ if (soa) {
+ r = dns_name_endswith(dns_resource_key_name(rr->key), dns_resource_key_name(soa->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+ }
+
+ soa = rr;
+ soa_flags = rr_flags;
+ }
+ }
+
+ if (!soa)
+ return 0;
+
+ if (ret)
+ *ret = soa;
+ if (flags)
+ *flags = soa_flags;
+
+ return 1;
+}
+
+int dns_answer_find_cname_or_dname(DnsAnswer *a, const DnsResourceKey *key, DnsResourceRecord **ret, DnsAnswerFlags *flags) {
+ DnsResourceRecord *rr;
+ DnsAnswerFlags rr_flags;
+ int r;
+
+ assert(key);
+
+ /* For a {C,D}NAME record we can never find a matching {C,D}NAME record */
+ if (!dns_type_may_redirect(key->type))
+ return 0;
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, rr_flags, a) {
+ r = dns_resource_key_match_cname_or_dname(key, rr->key, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (ret)
+ *ret = rr;
+ if (flags)
+ *flags = rr_flags;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int dns_answer_merge(DnsAnswer *a, DnsAnswer *b, DnsAnswer **ret) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *k = NULL;
+ int r;
+
+ assert(ret);
+
+ if (dns_answer_size(a) <= 0) {
+ *ret = dns_answer_ref(b);
+ return 0;
+ }
+
+ if (dns_answer_size(b) <= 0) {
+ *ret = dns_answer_ref(a);
+ return 0;
+ }
+
+ k = dns_answer_new(a->n_rrs + b->n_rrs);
+ if (!k)
+ return -ENOMEM;
+
+ r = dns_answer_add_raw_all(k, a);
+ if (r < 0)
+ return r;
+
+ r = dns_answer_add_all(k, b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(k);
+
+ return 0;
+}
+
+int dns_answer_extend(DnsAnswer **a, DnsAnswer *b) {
+ DnsAnswer *merged;
+ int r;
+
+ assert(a);
+
+ r = dns_answer_merge(*a, b, &merged);
+ if (r < 0)
+ return r;
+
+ dns_answer_unref(*a);
+ *a = merged;
+
+ return 0;
+}
+
+int dns_answer_remove_by_key(DnsAnswer **a, const DnsResourceKey *key) {
+ bool found = false, other = false;
+ DnsResourceRecord *rr;
+ size_t i;
+ int r;
+
+ assert(a);
+ assert(key);
+
+ /* Remove all entries matching the specified key from *a */
+
+ DNS_ANSWER_FOREACH(rr, *a) {
+ r = dns_resource_key_equal(rr->key, key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ found = true;
+ else
+ other = true;
+
+ if (found && other)
+ break;
+ }
+
+ if (!found)
+ return 0;
+
+ if (!other) {
+ *a = dns_answer_unref(*a); /* Return NULL for the empty answer */
+ return 1;
+ }
+
+ if ((*a)->n_ref > 1) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *copy = NULL;
+ DnsAnswerFlags flags;
+ int ifindex;
+
+ copy = dns_answer_new((*a)->n_rrs);
+ if (!copy)
+ return -ENOMEM;
+
+ DNS_ANSWER_FOREACH_FULL(rr, ifindex, flags, *a) {
+ r = dns_resource_key_equal(rr->key, key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ r = dns_answer_add_raw(copy, rr, ifindex, flags);
+ if (r < 0)
+ return r;
+ }
+
+ dns_answer_unref(*a);
+ *a = TAKE_PTR(copy);
+
+ return 1;
+ }
+
+ /* Only a single reference, edit in-place */
+
+ i = 0;
+ for (;;) {
+ if (i >= (*a)->n_rrs)
+ break;
+
+ r = dns_resource_key_equal((*a)->items[i].rr->key, key);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Kill this entry */
+
+ dns_resource_record_unref((*a)->items[i].rr);
+ memmove((*a)->items + i, (*a)->items + i + 1, sizeof(DnsAnswerItem) * ((*a)->n_rrs - i - 1));
+ (*a)->n_rrs--;
+ continue;
+
+ } else
+ /* Keep this entry */
+ i++;
+ }
+
+ return 1;
+}
+
+int dns_answer_remove_by_rr(DnsAnswer **a, DnsResourceRecord *rm) {
+ bool found = false, other = false;
+ DnsResourceRecord *rr;
+ size_t i;
+ int r;
+
+ assert(a);
+ assert(rm);
+
+ /* Remove all entries matching the specified RR from *a */
+
+ DNS_ANSWER_FOREACH(rr, *a) {
+ r = dns_resource_record_equal(rr, rm);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ found = true;
+ else
+ other = true;
+
+ if (found && other)
+ break;
+ }
+
+ if (!found)
+ return 0;
+
+ if (!other) {
+ *a = dns_answer_unref(*a); /* Return NULL for the empty answer */
+ return 1;
+ }
+
+ if ((*a)->n_ref > 1) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *copy = NULL;
+ DnsAnswerFlags flags;
+ int ifindex;
+
+ copy = dns_answer_new((*a)->n_rrs);
+ if (!copy)
+ return -ENOMEM;
+
+ DNS_ANSWER_FOREACH_FULL(rr, ifindex, flags, *a) {
+ r = dns_resource_record_equal(rr, rm);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ r = dns_answer_add_raw(copy, rr, ifindex, flags);
+ if (r < 0)
+ return r;
+ }
+
+ dns_answer_unref(*a);
+ *a = TAKE_PTR(copy);
+
+ return 1;
+ }
+
+ /* Only a single reference, edit in-place */
+
+ i = 0;
+ for (;;) {
+ if (i >= (*a)->n_rrs)
+ break;
+
+ r = dns_resource_record_equal((*a)->items[i].rr, rm);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Kill this entry */
+
+ dns_resource_record_unref((*a)->items[i].rr);
+ memmove((*a)->items + i, (*a)->items + i + 1, sizeof(DnsAnswerItem) * ((*a)->n_rrs - i - 1));
+ (*a)->n_rrs--;
+ continue;
+
+ } else
+ /* Keep this entry */
+ i++;
+ }
+
+ return 1;
+}
+
+int dns_answer_copy_by_key(DnsAnswer **a, DnsAnswer *source, const DnsResourceKey *key, DnsAnswerFlags or_flags) {
+ DnsResourceRecord *rr_source;
+ int ifindex_source, r;
+ DnsAnswerFlags flags_source;
+
+ assert(a);
+ assert(key);
+
+ /* Copy all RRs matching the specified key from source into *a */
+
+ DNS_ANSWER_FOREACH_FULL(rr_source, ifindex_source, flags_source, source) {
+
+ r = dns_resource_key_equal(rr_source->key, key);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* Make space for at least one entry */
+ r = dns_answer_reserve_or_clone(a, 1);
+ if (r < 0)
+ return r;
+
+ r = dns_answer_add(*a, rr_source, ifindex_source, flags_source|or_flags);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_answer_move_by_key(DnsAnswer **to, DnsAnswer **from, const DnsResourceKey *key, DnsAnswerFlags or_flags) {
+ int r;
+
+ assert(to);
+ assert(from);
+ assert(key);
+
+ r = dns_answer_copy_by_key(to, *from, key, or_flags);
+ if (r < 0)
+ return r;
+
+ return dns_answer_remove_by_key(from, key);
+}
+
+void dns_answer_order_by_scope(DnsAnswer *a, bool prefer_link_local) {
+ DnsAnswerItem *items;
+ size_t i, start, end;
+
+ if (!a)
+ return;
+
+ if (a->n_rrs <= 1)
+ return;
+
+ start = 0;
+ end = a->n_rrs-1;
+
+ /* RFC 4795, Section 2.6 suggests we should order entries
+ * depending on whether the sender is a link-local address. */
+
+ items = newa(DnsAnswerItem, a->n_rrs);
+ for (i = 0; i < a->n_rrs; i++) {
+
+ if (a->items[i].rr->key->class == DNS_CLASS_IN &&
+ ((a->items[i].rr->key->type == DNS_TYPE_A && in_addr_is_link_local(AF_INET, (union in_addr_union*) &a->items[i].rr->a.in_addr) != prefer_link_local) ||
+ (a->items[i].rr->key->type == DNS_TYPE_AAAA && in_addr_is_link_local(AF_INET6, (union in_addr_union*) &a->items[i].rr->aaaa.in6_addr) != prefer_link_local)))
+ /* Order address records that are not preferred to the end of the array */
+ items[end--] = a->items[i];
+ else
+ /* Order all other records to the beginning of the array */
+ items[start++] = a->items[i];
+ }
+
+ assert(start == end+1);
+ memcpy(a->items, items, sizeof(DnsAnswerItem) * a->n_rrs);
+}
+
+int dns_answer_reserve(DnsAnswer **a, size_t n_free) {
+ DnsAnswer *n;
+
+ assert(a);
+
+ if (n_free <= 0)
+ return 0;
+
+ if (*a) {
+ size_t ns;
+
+ if ((*a)->n_ref > 1)
+ return -EBUSY;
+
+ ns = (*a)->n_rrs + n_free;
+
+ if ((*a)->n_allocated >= ns)
+ return 0;
+
+ /* Allocate more than we need */
+ ns *= 2;
+
+ n = realloc(*a, offsetof(DnsAnswer, items) + sizeof(DnsAnswerItem) * ns);
+ if (!n)
+ return -ENOMEM;
+
+ n->n_allocated = ns;
+ } else {
+ n = dns_answer_new(n_free);
+ if (!n)
+ return -ENOMEM;
+ }
+
+ *a = n;
+ return 0;
+}
+
+int dns_answer_reserve_or_clone(DnsAnswer **a, size_t n_free) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *n = NULL;
+ int r;
+
+ assert(a);
+
+ /* Tries to extend the DnsAnswer object. And if that's not
+ * possible, since we are not the sole owner, then allocate a
+ * new, appropriately sized one. Either way, after this call
+ * the object will only have a single reference, and has room
+ * for at least the specified number of RRs. */
+
+ r = dns_answer_reserve(a, n_free);
+ if (r != -EBUSY)
+ return r;
+
+ assert(*a);
+
+ n = dns_answer_new(((*a)->n_rrs + n_free) * 2);
+ if (!n)
+ return -ENOMEM;
+
+ r = dns_answer_add_raw_all(n, *a);
+ if (r < 0)
+ return r;
+
+ dns_answer_unref(*a);
+ *a = TAKE_PTR(n);
+
+ return 0;
+}
+
+void dns_answer_dump(DnsAnswer *answer, FILE *f) {
+ DnsResourceRecord *rr;
+ DnsAnswerFlags flags;
+ int ifindex;
+
+ if (!f)
+ f = stdout;
+
+ DNS_ANSWER_FOREACH_FULL(rr, ifindex, flags, answer) {
+ const char *t;
+
+ fputc('\t', f);
+
+ t = dns_resource_record_to_string(rr);
+ if (!t) {
+ log_oom();
+ continue;
+ }
+
+ fputs(t, f);
+
+ if (ifindex != 0 || flags & (DNS_ANSWER_AUTHENTICATED|DNS_ANSWER_CACHEABLE|DNS_ANSWER_SHARED_OWNER))
+ fputs("\t;", f);
+
+ if (ifindex != 0)
+ printf(" ifindex=%i", ifindex);
+ if (flags & DNS_ANSWER_AUTHENTICATED)
+ fputs(" authenticated", f);
+ if (flags & DNS_ANSWER_CACHEABLE)
+ fputs(" cachable", f);
+ if (flags & DNS_ANSWER_SHARED_OWNER)
+ fputs(" shared-owner", f);
+
+ fputc('\n', f);
+ }
+}
+
+int dns_answer_has_dname_for_cname(DnsAnswer *a, DnsResourceRecord *cname) {
+ DnsResourceRecord *rr;
+ int r;
+
+ assert(cname);
+
+ /* Checks whether the answer contains a DNAME record that indicates that the specified CNAME record is
+ * synthesized from it */
+
+ if (cname->key->type != DNS_TYPE_CNAME)
+ return 0;
+
+ DNS_ANSWER_FOREACH(rr, a) {
+ _cleanup_free_ char *n = NULL;
+
+ if (rr->key->type != DNS_TYPE_DNAME)
+ continue;
+ if (rr->key->class != cname->key->class)
+ continue;
+
+ r = dns_name_change_suffix(cname->cname.name, rr->dname.name, dns_resource_key_name(rr->key), &n);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dns_name_equal(n, dns_resource_key_name(cname->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/src/resolve/resolved-dns-answer.h b/src/resolve/resolved-dns-answer.h
new file mode 100644
index 0000000..47fc80e
--- /dev/null
+++ b/src/resolve/resolved-dns-answer.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct DnsAnswer DnsAnswer;
+typedef struct DnsAnswerItem DnsAnswerItem;
+
+#include "macro.h"
+#include "resolved-dns-rr.h"
+
+/* A simple array of resource records. We keep track of the
+ * originating ifindex for each RR where that makes sense, so that we
+ * can qualify A and AAAA RRs referring to a local link with the
+ * right ifindex.
+ *
+ * Note that we usually encode the empty DnsAnswer object as a simple NULL. */
+
+typedef enum DnsAnswerFlags {
+ DNS_ANSWER_AUTHENTICATED = 1 << 0, /* Item has been authenticated */
+ DNS_ANSWER_CACHEABLE = 1 << 1, /* Item is subject to caching */
+ DNS_ANSWER_SHARED_OWNER = 1 << 2, /* For mDNS: RRset may be owner by multiple peers */
+ DNS_ANSWER_CACHE_FLUSH = 1 << 3, /* For mDNS: sets cache-flush bit in the rrclass of response records */
+ DNS_ANSWER_GOODBYE = 1 << 4, /* For mDNS: item is subject to disappear */
+} DnsAnswerFlags;
+
+struct DnsAnswerItem {
+ DnsResourceRecord *rr;
+ int ifindex;
+ DnsAnswerFlags flags;
+};
+
+struct DnsAnswer {
+ unsigned n_ref;
+ size_t n_rrs, n_allocated;
+ DnsAnswerItem items[0];
+};
+
+DnsAnswer *dns_answer_new(size_t n);
+DnsAnswer *dns_answer_ref(DnsAnswer *a);
+DnsAnswer *dns_answer_unref(DnsAnswer *a);
+
+int dns_answer_add(DnsAnswer *a, DnsResourceRecord *rr, int ifindex, DnsAnswerFlags flags);
+int dns_answer_add_extend(DnsAnswer **a, DnsResourceRecord *rr, int ifindex, DnsAnswerFlags flags);
+int dns_answer_add_soa(DnsAnswer *a, const char *name, uint32_t ttl, int ifindex);
+
+int dns_answer_match_key(DnsAnswer *a, const DnsResourceKey *key, DnsAnswerFlags *combined_flags);
+int dns_answer_contains_nsec_or_nsec3(DnsAnswer *a);
+int dns_answer_contains_zone_nsec3(DnsAnswer *answer, const char *zone);
+
+int dns_answer_find_soa(DnsAnswer *a, const DnsResourceKey *key, DnsResourceRecord **ret, DnsAnswerFlags *flags);
+int dns_answer_find_cname_or_dname(DnsAnswer *a, const DnsResourceKey *key, DnsResourceRecord **ret, DnsAnswerFlags *flags);
+
+int dns_answer_merge(DnsAnswer *a, DnsAnswer *b, DnsAnswer **ret);
+int dns_answer_extend(DnsAnswer **a, DnsAnswer *b);
+
+void dns_answer_order_by_scope(DnsAnswer *a, bool prefer_link_local);
+
+int dns_answer_reserve(DnsAnswer **a, size_t n_free);
+int dns_answer_reserve_or_clone(DnsAnswer **a, size_t n_free);
+
+int dns_answer_remove_by_key(DnsAnswer **a, const DnsResourceKey *key);
+int dns_answer_remove_by_rr(DnsAnswer **a, DnsResourceRecord *rr);
+
+int dns_answer_copy_by_key(DnsAnswer **a, DnsAnswer *source, const DnsResourceKey *key, DnsAnswerFlags or_flags);
+int dns_answer_move_by_key(DnsAnswer **to, DnsAnswer **from, const DnsResourceKey *key, DnsAnswerFlags or_flags);
+
+int dns_answer_has_dname_for_cname(DnsAnswer *a, DnsResourceRecord *cname);
+
+static inline size_t dns_answer_size(DnsAnswer *a) {
+ return a ? a->n_rrs : 0;
+}
+
+static inline bool dns_answer_isempty(DnsAnswer *a) {
+ return dns_answer_size(a) <= 0;
+}
+
+void dns_answer_dump(DnsAnswer *answer, FILE *f);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsAnswer*, dns_answer_unref);
+
+#define _DNS_ANSWER_FOREACH(q, kk, a) \
+ for (size_t UNIQ_T(i, q) = ({ \
+ (kk) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].rr : NULL; \
+ 0; \
+ }); \
+ (a) && (UNIQ_T(i, q) < (a)->n_rrs); \
+ UNIQ_T(i, q)++, (kk) = (UNIQ_T(i, q) < (a)->n_rrs ? (a)->items[UNIQ_T(i, q)].rr : NULL))
+
+#define DNS_ANSWER_FOREACH(kk, a) _DNS_ANSWER_FOREACH(UNIQ, kk, a)
+
+#define _DNS_ANSWER_FOREACH_IFINDEX(q, kk, ifi, a) \
+ for (size_t UNIQ_T(i, q) = ({ \
+ (kk) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].rr : NULL; \
+ (ifi) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].ifindex : 0; \
+ 0; \
+ }); \
+ (a) && (UNIQ_T(i, q) < (a)->n_rrs); \
+ UNIQ_T(i, q)++, \
+ (kk) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].rr : NULL), \
+ (ifi) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].ifindex : 0))
+
+#define DNS_ANSWER_FOREACH_IFINDEX(kk, ifindex, a) _DNS_ANSWER_FOREACH_IFINDEX(UNIQ, kk, ifindex, a)
+
+#define _DNS_ANSWER_FOREACH_FLAGS(q, kk, fl, a) \
+ for (size_t UNIQ_T(i, q) = ({ \
+ (kk) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].rr : NULL; \
+ (fl) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].flags : 0; \
+ 0; \
+ }); \
+ (a) && (UNIQ_T(i, q) < (a)->n_rrs); \
+ UNIQ_T(i, q)++, \
+ (kk) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].rr : NULL), \
+ (fl) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].flags : 0))
+
+#define DNS_ANSWER_FOREACH_FLAGS(kk, flags, a) _DNS_ANSWER_FOREACH_FLAGS(UNIQ, kk, flags, a)
+
+#define _DNS_ANSWER_FOREACH_FULL(q, kk, ifi, fl, a) \
+ for (size_t UNIQ_T(i, q) = ({ \
+ (kk) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].rr : NULL; \
+ (ifi) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].ifindex : 0; \
+ (fl) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].flags : 0; \
+ 0; \
+ }); \
+ (a) && (UNIQ_T(i, q) < (a)->n_rrs); \
+ UNIQ_T(i, q)++, \
+ (kk) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].rr : NULL), \
+ (ifi) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].ifindex : 0), \
+ (fl) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].flags : 0))
+
+#define DNS_ANSWER_FOREACH_FULL(kk, ifindex, flags, a) _DNS_ANSWER_FOREACH_FULL(UNIQ, kk, ifindex, flags, a)
diff --git a/src/resolve/resolved-dns-cache.c b/src/resolve/resolved-dns-cache.c
new file mode 100644
index 0000000..99dec28
--- /dev/null
+++ b/src/resolve/resolved-dns-cache.c
@@ -0,0 +1,1106 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "resolved-dns-answer.h"
+#include "resolved-dns-cache.h"
+#include "resolved-dns-packet.h"
+#include "string-util.h"
+
+/* Never cache more than 4K entries. RFC 1536, Section 5 suggests to
+ * leave DNS caches unbounded, but that's crazy. */
+#define CACHE_MAX 4096
+
+/* We never keep any item longer than 2h in our cache */
+#define CACHE_TTL_MAX_USEC (2 * USEC_PER_HOUR)
+
+/* How long to cache strange rcodes, i.e. rcodes != SUCCESS and != NXDOMAIN (specifically: that's only SERVFAIL for
+ * now) */
+#define CACHE_TTL_STRANGE_RCODE_USEC (30 * USEC_PER_SEC)
+
+typedef enum DnsCacheItemType DnsCacheItemType;
+typedef struct DnsCacheItem DnsCacheItem;
+
+enum DnsCacheItemType {
+ DNS_CACHE_POSITIVE,
+ DNS_CACHE_NODATA,
+ DNS_CACHE_NXDOMAIN,
+ DNS_CACHE_RCODE, /* "strange" RCODE (effective only SERVFAIL for now) */
+};
+
+struct DnsCacheItem {
+ DnsCacheItemType type;
+ DnsResourceKey *key;
+ DnsResourceRecord *rr;
+ int rcode;
+
+ usec_t until;
+ bool authenticated:1;
+ bool shared_owner:1;
+
+ int ifindex;
+ int owner_family;
+ union in_addr_union owner_address;
+
+ unsigned prioq_idx;
+ LIST_FIELDS(DnsCacheItem, by_key);
+};
+
+static const char *dns_cache_item_type_to_string(DnsCacheItem *item) {
+ assert(item);
+
+ switch (item->type) {
+
+ case DNS_CACHE_POSITIVE:
+ return "POSITIVE";
+
+ case DNS_CACHE_NODATA:
+ return "NODATA";
+
+ case DNS_CACHE_NXDOMAIN:
+ return "NXDOMAIN";
+
+ case DNS_CACHE_RCODE:
+ return dns_rcode_to_string(item->rcode);
+ }
+
+ return NULL;
+}
+
+static void dns_cache_item_free(DnsCacheItem *i) {
+ if (!i)
+ return;
+
+ dns_resource_record_unref(i->rr);
+ dns_resource_key_unref(i->key);
+ free(i);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsCacheItem*, dns_cache_item_free);
+
+static void dns_cache_item_unlink_and_free(DnsCache *c, DnsCacheItem *i) {
+ DnsCacheItem *first;
+
+ assert(c);
+
+ if (!i)
+ return;
+
+ first = hashmap_get(c->by_key, i->key);
+ LIST_REMOVE(by_key, first, i);
+
+ if (first)
+ assert_se(hashmap_replace(c->by_key, first->key, first) >= 0);
+ else
+ hashmap_remove(c->by_key, i->key);
+
+ prioq_remove(c->by_expiry, i, &i->prioq_idx);
+
+ dns_cache_item_free(i);
+}
+
+static bool dns_cache_remove_by_rr(DnsCache *c, DnsResourceRecord *rr) {
+ DnsCacheItem *first, *i;
+ int r;
+
+ first = hashmap_get(c->by_key, rr->key);
+ LIST_FOREACH(by_key, i, first) {
+ r = dns_resource_record_equal(i->rr, rr);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ dns_cache_item_unlink_and_free(c, i);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool dns_cache_remove_by_key(DnsCache *c, DnsResourceKey *key) {
+ DnsCacheItem *first, *i, *n;
+
+ assert(c);
+ assert(key);
+
+ first = hashmap_remove(c->by_key, key);
+ if (!first)
+ return false;
+
+ LIST_FOREACH_SAFE(by_key, i, n, first) {
+ prioq_remove(c->by_expiry, i, &i->prioq_idx);
+ dns_cache_item_free(i);
+ }
+
+ return true;
+}
+
+void dns_cache_flush(DnsCache *c) {
+ DnsResourceKey *key;
+
+ assert(c);
+
+ while ((key = hashmap_first_key(c->by_key)))
+ dns_cache_remove_by_key(c, key);
+
+ assert(hashmap_size(c->by_key) == 0);
+ assert(prioq_size(c->by_expiry) == 0);
+
+ c->by_key = hashmap_free(c->by_key);
+ c->by_expiry = prioq_free(c->by_expiry);
+}
+
+static void dns_cache_make_space(DnsCache *c, unsigned add) {
+ assert(c);
+
+ if (add <= 0)
+ return;
+
+ /* Makes space for n new entries. Note that we actually allow
+ * the cache to grow beyond CACHE_MAX, but only when we shall
+ * add more RRs to the cache than CACHE_MAX at once. In that
+ * case the cache will be emptied completely otherwise. */
+
+ for (;;) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ DnsCacheItem *i;
+
+ if (prioq_size(c->by_expiry) <= 0)
+ break;
+
+ if (prioq_size(c->by_expiry) + add < CACHE_MAX)
+ break;
+
+ i = prioq_peek(c->by_expiry);
+ assert(i);
+
+ /* Take an extra reference to the key so that it
+ * doesn't go away in the middle of the remove call */
+ key = dns_resource_key_ref(i->key);
+ dns_cache_remove_by_key(c, key);
+ }
+}
+
+void dns_cache_prune(DnsCache *c) {
+ usec_t t = 0;
+
+ assert(c);
+
+ /* Remove all entries that are past their TTL */
+
+ for (;;) {
+ DnsCacheItem *i;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ i = prioq_peek(c->by_expiry);
+ if (!i)
+ break;
+
+ if (t <= 0)
+ t = now(clock_boottime_or_monotonic());
+
+ if (i->until > t)
+ break;
+
+ /* Depending whether this is an mDNS shared entry
+ * either remove only this one RR or the whole RRset */
+ log_debug("Removing %scache entry for %s (expired "USEC_FMT"s ago)",
+ i->shared_owner ? "shared " : "",
+ dns_resource_key_to_string(i->key, key_str, sizeof key_str),
+ (t - i->until) / USEC_PER_SEC);
+
+ if (i->shared_owner)
+ dns_cache_item_unlink_and_free(c, i);
+ else {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+
+ /* Take an extra reference to the key so that it
+ * doesn't go away in the middle of the remove call */
+ key = dns_resource_key_ref(i->key);
+ dns_cache_remove_by_key(c, key);
+ }
+ }
+}
+
+static int dns_cache_item_prioq_compare_func(const void *a, const void *b) {
+ const DnsCacheItem *x = a, *y = b;
+
+ return CMP(x->until, y->until);
+}
+
+static int dns_cache_init(DnsCache *c) {
+ int r;
+
+ assert(c);
+
+ r = prioq_ensure_allocated(&c->by_expiry, dns_cache_item_prioq_compare_func);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&c->by_key, &dns_resource_key_hash_ops);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+
+static int dns_cache_link_item(DnsCache *c, DnsCacheItem *i) {
+ DnsCacheItem *first;
+ int r;
+
+ assert(c);
+ assert(i);
+
+ r = prioq_put(c->by_expiry, i, &i->prioq_idx);
+ if (r < 0)
+ return r;
+
+ first = hashmap_get(c->by_key, i->key);
+ if (first) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *k = NULL;
+
+ /* Keep a reference to the original key, while we manipulate the list. */
+ k = dns_resource_key_ref(first->key);
+
+ /* Now, try to reduce the number of keys we keep */
+ dns_resource_key_reduce(&first->key, &i->key);
+
+ if (first->rr)
+ dns_resource_key_reduce(&first->rr->key, &i->key);
+ if (i->rr)
+ dns_resource_key_reduce(&i->rr->key, &i->key);
+
+ LIST_PREPEND(by_key, first, i);
+ assert_se(hashmap_replace(c->by_key, first->key, first) >= 0);
+ } else {
+ r = hashmap_put(c->by_key, i->key, i);
+ if (r < 0) {
+ prioq_remove(c->by_expiry, i, &i->prioq_idx);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static DnsCacheItem* dns_cache_get(DnsCache *c, DnsResourceRecord *rr) {
+ DnsCacheItem *i;
+
+ assert(c);
+ assert(rr);
+
+ LIST_FOREACH(by_key, i, hashmap_get(c->by_key, rr->key))
+ if (i->rr && dns_resource_record_equal(i->rr, rr) > 0)
+ return i;
+
+ return NULL;
+}
+
+static usec_t calculate_until(DnsResourceRecord *rr, uint32_t nsec_ttl, usec_t timestamp, bool use_soa_minimum) {
+ uint32_t ttl;
+ usec_t u;
+
+ assert(rr);
+
+ ttl = MIN(rr->ttl, nsec_ttl);
+ if (rr->key->type == DNS_TYPE_SOA && use_soa_minimum) {
+ /* If this is a SOA RR, and it is requested, clamp to
+ * the SOA's minimum field. This is used when we do
+ * negative caching, to determine the TTL for the
+ * negative caching entry. See RFC 2308, Section
+ * 5. */
+
+ if (ttl > rr->soa.minimum)
+ ttl = rr->soa.minimum;
+ }
+
+ u = ttl * USEC_PER_SEC;
+ if (u > CACHE_TTL_MAX_USEC)
+ u = CACHE_TTL_MAX_USEC;
+
+ if (rr->expiry != USEC_INFINITY) {
+ usec_t left;
+
+ /* Make use of the DNSSEC RRSIG expiry info, if we
+ * have it */
+
+ left = LESS_BY(rr->expiry, now(CLOCK_REALTIME));
+ if (u > left)
+ u = left;
+ }
+
+ return timestamp + u;
+}
+
+static void dns_cache_item_update_positive(
+ DnsCache *c,
+ DnsCacheItem *i,
+ DnsResourceRecord *rr,
+ bool authenticated,
+ bool shared_owner,
+ usec_t timestamp,
+ int ifindex,
+ int owner_family,
+ const union in_addr_union *owner_address) {
+
+ assert(c);
+ assert(i);
+ assert(rr);
+ assert(owner_address);
+
+ i->type = DNS_CACHE_POSITIVE;
+
+ if (!i->by_key_prev)
+ /* We are the first item in the list, we need to
+ * update the key used in the hashmap */
+
+ assert_se(hashmap_replace(c->by_key, rr->key, i) >= 0);
+
+ dns_resource_record_ref(rr);
+ dns_resource_record_unref(i->rr);
+ i->rr = rr;
+
+ dns_resource_key_unref(i->key);
+ i->key = dns_resource_key_ref(rr->key);
+
+ i->until = calculate_until(rr, (uint32_t) -1, timestamp, false);
+ i->authenticated = authenticated;
+ i->shared_owner = shared_owner;
+
+ i->ifindex = ifindex;
+
+ i->owner_family = owner_family;
+ i->owner_address = *owner_address;
+
+ prioq_reshuffle(c->by_expiry, i, &i->prioq_idx);
+}
+
+static int dns_cache_put_positive(
+ DnsCache *c,
+ DnsResourceRecord *rr,
+ bool authenticated,
+ bool shared_owner,
+ usec_t timestamp,
+ int ifindex,
+ int owner_family,
+ const union in_addr_union *owner_address) {
+
+ _cleanup_(dns_cache_item_freep) DnsCacheItem *i = NULL;
+ DnsCacheItem *existing;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX], ifname[IF_NAMESIZE];
+ int r, k;
+
+ assert(c);
+ assert(rr);
+ assert(owner_address);
+
+ /* Never cache pseudo RRs */
+ if (dns_class_is_pseudo(rr->key->class))
+ return 0;
+ if (dns_type_is_pseudo(rr->key->type))
+ return 0;
+
+ /* New TTL is 0? Delete this specific entry... */
+ if (rr->ttl <= 0) {
+ k = dns_cache_remove_by_rr(c, rr);
+ log_debug("%s: %s",
+ k > 0 ? "Removed zero TTL entry from cache" : "Not caching zero TTL cache entry",
+ dns_resource_key_to_string(rr->key, key_str, sizeof key_str));
+ return 0;
+ }
+
+ /* Entry exists already? Update TTL, timestamp and owner */
+ existing = dns_cache_get(c, rr);
+ if (existing) {
+ dns_cache_item_update_positive(
+ c,
+ existing,
+ rr,
+ authenticated,
+ shared_owner,
+ timestamp,
+ ifindex,
+ owner_family,
+ owner_address);
+ return 0;
+ }
+
+ /* Otherwise, add the new RR */
+ r = dns_cache_init(c);
+ if (r < 0)
+ return r;
+
+ dns_cache_make_space(c, 1);
+
+ i = new0(DnsCacheItem, 1);
+ if (!i)
+ return -ENOMEM;
+
+ i->type = DNS_CACHE_POSITIVE;
+ i->key = dns_resource_key_ref(rr->key);
+ i->rr = dns_resource_record_ref(rr);
+ i->until = calculate_until(rr, (uint32_t) -1, timestamp, false);
+ i->authenticated = authenticated;
+ i->shared_owner = shared_owner;
+ i->ifindex = ifindex;
+ i->owner_family = owner_family;
+ i->owner_address = *owner_address;
+ i->prioq_idx = PRIOQ_IDX_NULL;
+
+ r = dns_cache_link_item(c, i);
+ if (r < 0)
+ return r;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *t = NULL;
+
+ (void) in_addr_to_string(i->owner_family, &i->owner_address, &t);
+
+ log_debug("Added positive %s%s cache entry for %s "USEC_FMT"s on %s/%s/%s",
+ i->authenticated ? "authenticated" : "unauthenticated",
+ i->shared_owner ? " shared" : "",
+ dns_resource_key_to_string(i->key, key_str, sizeof key_str),
+ (i->until - timestamp) / USEC_PER_SEC,
+ i->ifindex == 0 ? "*" : strna(if_indextoname(i->ifindex, ifname)),
+ af_to_name_short(i->owner_family),
+ strna(t));
+ }
+
+ i = NULL;
+ return 0;
+}
+
+static int dns_cache_put_negative(
+ DnsCache *c,
+ DnsResourceKey *key,
+ int rcode,
+ bool authenticated,
+ uint32_t nsec_ttl,
+ usec_t timestamp,
+ DnsResourceRecord *soa,
+ int owner_family,
+ const union in_addr_union *owner_address) {
+
+ _cleanup_(dns_cache_item_freep) DnsCacheItem *i = NULL;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+ int r;
+
+ assert(c);
+ assert(key);
+ assert(owner_address);
+
+ /* Never cache pseudo RR keys. DNS_TYPE_ANY is particularly
+ * important to filter out as we use this as a pseudo-type for
+ * NXDOMAIN entries */
+ if (dns_class_is_pseudo(key->class))
+ return 0;
+ if (dns_type_is_pseudo(key->type))
+ return 0;
+
+ if (IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) {
+ if (!soa)
+ return 0;
+
+ /* For negative replies, check if we have a TTL of a SOA */
+ if (nsec_ttl <= 0 || soa->soa.minimum <= 0 || soa->ttl <= 0) {
+ log_debug("Not caching negative entry with zero SOA/NSEC/NSEC3 TTL: %s",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+ return 0;
+ }
+ } else if (rcode != DNS_RCODE_SERVFAIL)
+ return 0;
+
+ r = dns_cache_init(c);
+ if (r < 0)
+ return r;
+
+ dns_cache_make_space(c, 1);
+
+ i = new0(DnsCacheItem, 1);
+ if (!i)
+ return -ENOMEM;
+
+ i->type =
+ rcode == DNS_RCODE_SUCCESS ? DNS_CACHE_NODATA :
+ rcode == DNS_RCODE_NXDOMAIN ? DNS_CACHE_NXDOMAIN : DNS_CACHE_RCODE;
+ i->until =
+ i->type == DNS_CACHE_RCODE ? timestamp + CACHE_TTL_STRANGE_RCODE_USEC :
+ calculate_until(soa, nsec_ttl, timestamp, true);
+ i->authenticated = authenticated;
+ i->owner_family = owner_family;
+ i->owner_address = *owner_address;
+ i->prioq_idx = PRIOQ_IDX_NULL;
+ i->rcode = rcode;
+
+ if (i->type == DNS_CACHE_NXDOMAIN) {
+ /* NXDOMAIN entries should apply equally to all types, so we use ANY as
+ * a pseudo type for this purpose here. */
+ i->key = dns_resource_key_new(key->class, DNS_TYPE_ANY, dns_resource_key_name(key));
+ if (!i->key)
+ return -ENOMEM;
+
+ /* Make sure to remove any previous entry for this
+ * specific ANY key. (For non-ANY keys the cache data
+ * is already cleared by the caller.) Note that we
+ * don't bother removing positive or NODATA cache
+ * items in this case, because it would either be slow
+ * or require explicit indexing by name */
+ dns_cache_remove_by_key(c, key);
+ } else
+ i->key = dns_resource_key_ref(key);
+
+ r = dns_cache_link_item(c, i);
+ if (r < 0)
+ return r;
+
+ log_debug("Added %s cache entry for %s "USEC_FMT"s",
+ dns_cache_item_type_to_string(i),
+ dns_resource_key_to_string(i->key, key_str, sizeof key_str),
+ (i->until - timestamp) / USEC_PER_SEC);
+
+ i = NULL;
+ return 0;
+}
+
+static void dns_cache_remove_previous(
+ DnsCache *c,
+ DnsResourceKey *key,
+ DnsAnswer *answer) {
+
+ DnsResourceRecord *rr;
+ DnsAnswerFlags flags;
+
+ assert(c);
+
+ /* First, if we were passed a key (i.e. on LLMNR/DNS, but
+ * not on mDNS), delete all matching old RRs, so that we only
+ * keep complete by_key in place. */
+ if (key)
+ dns_cache_remove_by_key(c, key);
+
+ /* Second, flush all entries matching the answer, unless this
+ * is an RR that is explicitly marked to be "shared" between
+ * peers (i.e. mDNS RRs without the flush-cache bit set). */
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, answer) {
+ if ((flags & DNS_ANSWER_CACHEABLE) == 0)
+ continue;
+
+ if (flags & DNS_ANSWER_SHARED_OWNER)
+ continue;
+
+ dns_cache_remove_by_key(c, rr->key);
+ }
+}
+
+static bool rr_eligible(DnsResourceRecord *rr) {
+ assert(rr);
+
+ /* When we see an NSEC/NSEC3 RR, we'll only cache it if it is from the lower zone, not the upper zone, since
+ * that's where the interesting bits are (with exception of DS RRs). Of course, this way we cannot derive DS
+ * existence from any cached NSEC/NSEC3, but that should be fine. */
+
+ switch (rr->key->type) {
+
+ case DNS_TYPE_NSEC:
+ return !bitmap_isset(rr->nsec.types, DNS_TYPE_NS) ||
+ bitmap_isset(rr->nsec.types, DNS_TYPE_SOA);
+
+ case DNS_TYPE_NSEC3:
+ return !bitmap_isset(rr->nsec3.types, DNS_TYPE_NS) ||
+ bitmap_isset(rr->nsec3.types, DNS_TYPE_SOA);
+
+ default:
+ return true;
+ }
+}
+
+int dns_cache_put(
+ DnsCache *c,
+ DnsResourceKey *key,
+ int rcode,
+ DnsAnswer *answer,
+ bool authenticated,
+ uint32_t nsec_ttl,
+ usec_t timestamp,
+ int owner_family,
+ const union in_addr_union *owner_address) {
+
+ DnsResourceRecord *soa = NULL, *rr;
+ bool weird_rcode = false;
+ DnsAnswerFlags flags;
+ unsigned cache_keys;
+ int r, ifindex;
+
+ assert(c);
+ assert(owner_address);
+
+ dns_cache_remove_previous(c, key, answer);
+
+ /* We only care for positive replies and NXDOMAINs, on all other replies we will simply flush the respective
+ * entries, and that's it. (Well, with one further exception: since some DNS zones (akamai!) return SERVFAIL
+ * consistently for some lookups, and forwarders tend to propagate that we'll cache that too, but only for a
+ * short time.) */
+
+ if (IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) {
+ if (dns_answer_size(answer) <= 0) {
+ if (key) {
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ log_debug("Not caching negative entry without a SOA record: %s",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+ }
+ return 0;
+ }
+
+ } else {
+ /* Only cache SERVFAIL as "weird" rcode for now. We can add more later, should that turn out to be
+ * beneficial. */
+ if (rcode != DNS_RCODE_SERVFAIL)
+ return 0;
+
+ weird_rcode = true;
+ }
+
+ cache_keys = dns_answer_size(answer);
+ if (key)
+ cache_keys++;
+
+ /* Make some space for our new entries */
+ dns_cache_make_space(c, cache_keys);
+
+ if (timestamp <= 0)
+ timestamp = now(clock_boottime_or_monotonic());
+
+ /* Second, add in positive entries for all contained RRs */
+ DNS_ANSWER_FOREACH_FULL(rr, ifindex, flags, answer) {
+ if ((flags & DNS_ANSWER_CACHEABLE) == 0 ||
+ !rr_eligible(rr))
+ continue;
+
+ r = dns_cache_put_positive(
+ c,
+ rr,
+ flags & DNS_ANSWER_AUTHENTICATED,
+ flags & DNS_ANSWER_SHARED_OWNER,
+ timestamp,
+ ifindex,
+ owner_family, owner_address);
+ if (r < 0)
+ goto fail;
+ }
+
+ if (!key) /* mDNS doesn't know negative caching, really */
+ return 0;
+
+ /* Third, add in negative entries if the key has no RR */
+ r = dns_answer_match_key(answer, key, NULL);
+ if (r < 0)
+ goto fail;
+ if (r > 0)
+ return 0;
+
+ /* But not if it has a matching CNAME/DNAME (the negative
+ * caching will be done on the canonical name, not on the
+ * alias) */
+ r = dns_answer_find_cname_or_dname(answer, key, NULL, NULL);
+ if (r < 0)
+ goto fail;
+ if (r > 0)
+ return 0;
+
+ /* See https://tools.ietf.org/html/rfc2308, which say that a matching SOA record in the packet is used to
+ * enable negative caching. We apply one exception though: if we are about to cache a weird rcode we do so
+ * regardless of a SOA. */
+ r = dns_answer_find_soa(answer, key, &soa, &flags);
+ if (r < 0)
+ goto fail;
+ if (r == 0 && !weird_rcode)
+ return 0;
+ if (r > 0) {
+ /* Refuse using the SOA data if it is unsigned, but the key is
+ * signed */
+ if (authenticated && (flags & DNS_ANSWER_AUTHENTICATED) == 0)
+ return 0;
+ }
+
+ r = dns_cache_put_negative(
+ c,
+ key,
+ rcode,
+ authenticated,
+ nsec_ttl,
+ timestamp,
+ soa,
+ owner_family, owner_address);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ /* Adding all RRs failed. Let's clean up what we already
+ * added, just in case */
+
+ if (key)
+ dns_cache_remove_by_key(c, key);
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, answer) {
+ if ((flags & DNS_ANSWER_CACHEABLE) == 0)
+ continue;
+
+ dns_cache_remove_by_key(c, rr->key);
+ }
+
+ return r;
+}
+
+static DnsCacheItem *dns_cache_get_by_key_follow_cname_dname_nsec(DnsCache *c, DnsResourceKey *k) {
+ DnsCacheItem *i;
+ const char *n;
+ int r;
+
+ assert(c);
+ assert(k);
+
+ /* If we hit some OOM error, or suchlike, we don't care too
+ * much, after all this is just a cache */
+
+ i = hashmap_get(c->by_key, k);
+ if (i)
+ return i;
+
+ n = dns_resource_key_name(k);
+
+ /* Check if we have an NXDOMAIN cache item for the name, notice that we use
+ * the pseudo-type ANY for NXDOMAIN cache items. */
+ i = hashmap_get(c->by_key, &DNS_RESOURCE_KEY_CONST(k->class, DNS_TYPE_ANY, n));
+ if (i && i->type == DNS_CACHE_NXDOMAIN)
+ return i;
+
+ if (dns_type_may_redirect(k->type)) {
+ /* Check if we have a CNAME record instead */
+ i = hashmap_get(c->by_key, &DNS_RESOURCE_KEY_CONST(k->class, DNS_TYPE_CNAME, n));
+ if (i && i->type != DNS_CACHE_NODATA)
+ return i;
+
+ /* OK, let's look for cached DNAME records. */
+ for (;;) {
+ if (isempty(n))
+ return NULL;
+
+ i = hashmap_get(c->by_key, &DNS_RESOURCE_KEY_CONST(k->class, DNS_TYPE_DNAME, n));
+ if (i && i->type != DNS_CACHE_NODATA)
+ return i;
+
+ /* Jump one label ahead */
+ r = dns_name_parent(&n);
+ if (r <= 0)
+ return NULL;
+ }
+ }
+
+ if (k->type != DNS_TYPE_NSEC) {
+ /* Check if we have an NSEC record instead for the name. */
+ i = hashmap_get(c->by_key, &DNS_RESOURCE_KEY_CONST(k->class, DNS_TYPE_NSEC, n));
+ if (i)
+ return i;
+ }
+
+ return NULL;
+}
+
+int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcode, DnsAnswer **ret, bool *authenticated) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+ unsigned n = 0;
+ int r;
+ bool nxdomain = false;
+ DnsCacheItem *j, *first, *nsec = NULL;
+ bool have_authenticated = false, have_non_authenticated = false;
+ usec_t current;
+ int found_rcode = -1;
+
+ assert(c);
+ assert(key);
+ assert(rcode);
+ assert(ret);
+ assert(authenticated);
+
+ if (key->type == DNS_TYPE_ANY || key->class == DNS_CLASS_ANY) {
+ /* If we have ANY lookups we don't use the cache, so
+ * that the caller refreshes via the network. */
+
+ log_debug("Ignoring cache for ANY lookup: %s",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+
+ c->n_miss++;
+
+ *ret = NULL;
+ *rcode = DNS_RCODE_SUCCESS;
+ *authenticated = false;
+
+ return 0;
+ }
+
+ first = dns_cache_get_by_key_follow_cname_dname_nsec(c, key);
+ if (!first) {
+ /* If one question cannot be answered we need to refresh */
+
+ log_debug("Cache miss for %s",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+
+ c->n_miss++;
+
+ *ret = NULL;
+ *rcode = DNS_RCODE_SUCCESS;
+ *authenticated = false;
+
+ return 0;
+ }
+
+ LIST_FOREACH(by_key, j, first) {
+ if (j->rr) {
+ if (j->rr->key->type == DNS_TYPE_NSEC)
+ nsec = j;
+
+ n++;
+ } else if (j->type == DNS_CACHE_NXDOMAIN)
+ nxdomain = true;
+ else if (j->type == DNS_CACHE_RCODE)
+ found_rcode = j->rcode;
+
+ if (j->authenticated)
+ have_authenticated = true;
+ else
+ have_non_authenticated = true;
+ }
+
+ if (found_rcode >= 0) {
+ log_debug("RCODE %s cache hit for %s",
+ dns_rcode_to_string(found_rcode),
+ dns_resource_key_to_string(key, key_str, sizeof(key_str)));
+
+ *ret = NULL;
+ *rcode = found_rcode;
+ *authenticated = false;
+
+ c->n_hit++;
+ return 1;
+ }
+
+ if (nsec && !IN_SET(key->type, DNS_TYPE_NSEC, DNS_TYPE_DS)) {
+ /* Note that we won't derive information for DS RRs from an NSEC, because we only cache NSEC RRs from
+ * the lower-zone of a zone cut, but the DS RRs are on the upper zone. */
+
+ log_debug("NSEC NODATA cache hit for %s",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+
+ /* We only found an NSEC record that matches our name.
+ * If it says the type doesn't exist report
+ * NODATA. Otherwise report a cache miss. */
+
+ *ret = NULL;
+ *rcode = DNS_RCODE_SUCCESS;
+ *authenticated = nsec->authenticated;
+
+ if (!bitmap_isset(nsec->rr->nsec.types, key->type) &&
+ !bitmap_isset(nsec->rr->nsec.types, DNS_TYPE_CNAME) &&
+ !bitmap_isset(nsec->rr->nsec.types, DNS_TYPE_DNAME)) {
+ c->n_hit++;
+ return 1;
+ }
+
+ c->n_miss++;
+ return 0;
+ }
+
+ log_debug("%s cache hit for %s",
+ n > 0 ? "Positive" :
+ nxdomain ? "NXDOMAIN" : "NODATA",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+
+ if (n <= 0) {
+ c->n_hit++;
+
+ *ret = NULL;
+ *rcode = nxdomain ? DNS_RCODE_NXDOMAIN : DNS_RCODE_SUCCESS;
+ *authenticated = have_authenticated && !have_non_authenticated;
+ return 1;
+ }
+
+ answer = dns_answer_new(n);
+ if (!answer)
+ return -ENOMEM;
+
+ if (clamp_ttl)
+ current = now(clock_boottime_or_monotonic());
+
+ LIST_FOREACH(by_key, j, first) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ if (!j->rr)
+ continue;
+
+ if (clamp_ttl) {
+ rr = dns_resource_record_ref(j->rr);
+
+ r = dns_resource_record_clamp_ttl(&rr, LESS_BY(j->until, current) / USEC_PER_SEC);
+ if (r < 0)
+ return r;
+ }
+
+ r = dns_answer_add(answer, rr ?: j->rr, j->ifindex, j->authenticated ? DNS_ANSWER_AUTHENTICATED : 0);
+ if (r < 0)
+ return r;
+ }
+
+ c->n_hit++;
+
+ *ret = answer;
+ *rcode = DNS_RCODE_SUCCESS;
+ *authenticated = have_authenticated && !have_non_authenticated;
+ answer = NULL;
+
+ return n;
+}
+
+int dns_cache_check_conflicts(DnsCache *cache, DnsResourceRecord *rr, int owner_family, const union in_addr_union *owner_address) {
+ DnsCacheItem *i, *first;
+ bool same_owner = true;
+
+ assert(cache);
+ assert(rr);
+
+ dns_cache_prune(cache);
+
+ /* See if there's a cache entry for the same key. If there
+ * isn't there's no conflict */
+ first = hashmap_get(cache->by_key, rr->key);
+ if (!first)
+ return 0;
+
+ /* See if the RR key is owned by the same owner, if so, there
+ * isn't a conflict either */
+ LIST_FOREACH(by_key, i, first) {
+ if (i->owner_family != owner_family ||
+ !in_addr_equal(owner_family, &i->owner_address, owner_address)) {
+ same_owner = false;
+ break;
+ }
+ }
+ if (same_owner)
+ return 0;
+
+ /* See if there's the exact same RR in the cache. If yes, then
+ * there's no conflict. */
+ if (dns_cache_get(cache, rr))
+ return 0;
+
+ /* There's a conflict */
+ return 1;
+}
+
+int dns_cache_export_shared_to_packet(DnsCache *cache, DnsPacket *p) {
+ unsigned ancount = 0;
+ Iterator iterator;
+ DnsCacheItem *i;
+ int r;
+
+ assert(cache);
+ assert(p);
+
+ HASHMAP_FOREACH(i, cache->by_key, iterator) {
+ DnsCacheItem *j;
+
+ LIST_FOREACH(by_key, j, i) {
+ if (!j->rr)
+ continue;
+
+ if (!j->shared_owner)
+ continue;
+
+ r = dns_packet_append_rr(p, j->rr, 0, NULL, NULL);
+ if (r == -EMSGSIZE && p->protocol == DNS_PROTOCOL_MDNS) {
+ /* For mDNS, if we're unable to stuff all known answers into the given packet,
+ * allocate a new one, push the RR into that one and link it to the current one.
+ */
+
+ DNS_PACKET_HEADER(p)->ancount = htobe16(ancount);
+ ancount = 0;
+
+ r = dns_packet_new_query(&p->more, p->protocol, 0, true);
+ if (r < 0)
+ return r;
+
+ /* continue with new packet */
+ p = p->more;
+ r = dns_packet_append_rr(p, j->rr, 0, NULL, NULL);
+ }
+
+ if (r < 0)
+ return r;
+
+ ancount++;
+ }
+ }
+
+ DNS_PACKET_HEADER(p)->ancount = htobe16(ancount);
+
+ return 0;
+}
+
+void dns_cache_dump(DnsCache *cache, FILE *f) {
+ Iterator iterator;
+ DnsCacheItem *i;
+
+ if (!cache)
+ return;
+
+ if (!f)
+ f = stdout;
+
+ HASHMAP_FOREACH(i, cache->by_key, iterator) {
+ DnsCacheItem *j;
+
+ LIST_FOREACH(by_key, j, i) {
+
+ fputc('\t', f);
+
+ if (j->rr) {
+ const char *t;
+ t = dns_resource_record_to_string(j->rr);
+ if (!t) {
+ log_oom();
+ continue;
+ }
+
+ fputs(t, f);
+ fputc('\n', f);
+ } else {
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ fputs(dns_resource_key_to_string(j->key, key_str, sizeof key_str), f);
+ fputs(" -- ", f);
+ fputs(dns_cache_item_type_to_string(j), f);
+ fputc('\n', f);
+ }
+ }
+ }
+}
+
+bool dns_cache_is_empty(DnsCache *cache) {
+ if (!cache)
+ return true;
+
+ return hashmap_isempty(cache->by_key);
+}
+
+unsigned dns_cache_size(DnsCache *cache) {
+ if (!cache)
+ return 0;
+
+ return hashmap_size(cache->by_key);
+}
diff --git a/src/resolve/resolved-dns-cache.h b/src/resolve/resolved-dns-cache.h
new file mode 100644
index 0000000..48a3bde
--- /dev/null
+++ b/src/resolve/resolved-dns-cache.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "hashmap.h"
+#include "list.h"
+#include "prioq.h"
+#include "time-util.h"
+
+typedef struct DnsCache {
+ Hashmap *by_key;
+ Prioq *by_expiry;
+ unsigned n_hit;
+ unsigned n_miss;
+} DnsCache;
+
+#include "resolved-dns-answer.h"
+#include "resolved-dns-packet.h"
+#include "resolved-dns-question.h"
+#include "resolved-dns-rr.h"
+
+void dns_cache_flush(DnsCache *c);
+void dns_cache_prune(DnsCache *c);
+
+int dns_cache_put(DnsCache *c, DnsResourceKey *key, int rcode, DnsAnswer *answer, bool authenticated, uint32_t nsec_ttl, usec_t timestamp, int owner_family, const union in_addr_union *owner_address);
+int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcode, DnsAnswer **answer, bool *authenticated);
+
+int dns_cache_check_conflicts(DnsCache *cache, DnsResourceRecord *rr, int owner_family, const union in_addr_union *owner_address);
+
+void dns_cache_dump(DnsCache *cache, FILE *f);
+bool dns_cache_is_empty(DnsCache *cache);
+
+unsigned dns_cache_size(DnsCache *cache);
+
+int dns_cache_export_shared_to_packet(DnsCache *cache, DnsPacket *p);
diff --git a/src/resolve/resolved-dns-dnssec.c b/src/resolve/resolved-dns-dnssec.c
new file mode 100644
index 0000000..14acc4e
--- /dev/null
+++ b/src/resolve/resolved-dns-dnssec.c
@@ -0,0 +1,2299 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio_ext.h>
+
+#if HAVE_GCRYPT
+#include <gcrypt.h>
+#endif
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "gcrypt-util.h"
+#include "hexdecoct.h"
+#include "resolved-dns-dnssec.h"
+#include "resolved-dns-packet.h"
+#include "string-table.h"
+
+#define VERIFY_RRS_MAX 256
+#define MAX_KEY_SIZE (32*1024)
+
+/* Permit a maximum clock skew of 1h 10min. This should be enough to deal with DST confusion */
+#define SKEW_MAX (1*USEC_PER_HOUR + 10*USEC_PER_MINUTE)
+
+/* Maximum number of NSEC3 iterations we'll do. RFC5155 says 2500 shall be the maximum useful value */
+#define NSEC3_ITERATIONS_MAX 2500
+
+/*
+ * The DNSSEC Chain of trust:
+ *
+ * Normal RRs are protected via RRSIG RRs in combination with DNSKEY RRs, all in the same zone
+ * DNSKEY RRs are either protected like normal RRs, or via a DS from a zone "higher" up the tree
+ * DS RRs are protected like normal RRs
+ *
+ * Example chain:
+ * Normal RR → RRSIG/DNSKEY+ → DS → RRSIG/DNSKEY+ → DS → ... → DS → RRSIG/DNSKEY+ → DS
+ */
+
+uint16_t dnssec_keytag(DnsResourceRecord *dnskey, bool mask_revoke) {
+ const uint8_t *p;
+ uint32_t sum, f;
+ size_t i;
+
+ /* The algorithm from RFC 4034, Appendix B. */
+
+ assert(dnskey);
+ assert(dnskey->key->type == DNS_TYPE_DNSKEY);
+
+ f = (uint32_t) dnskey->dnskey.flags;
+
+ if (mask_revoke)
+ f &= ~DNSKEY_FLAG_REVOKE;
+
+ sum = f + ((((uint32_t) dnskey->dnskey.protocol) << 8) + (uint32_t) dnskey->dnskey.algorithm);
+
+ p = dnskey->dnskey.key;
+
+ for (i = 0; i < dnskey->dnskey.key_size; i++)
+ sum += (i & 1) == 0 ? (uint32_t) p[i] << 8 : (uint32_t) p[i];
+
+ sum += (sum >> 16) & UINT32_C(0xFFFF);
+
+ return sum & UINT32_C(0xFFFF);
+}
+
+int dnssec_canonicalize(const char *n, char *buffer, size_t buffer_max) {
+ size_t c = 0;
+ int r;
+
+ /* Converts the specified hostname into DNSSEC canonicalized
+ * form. */
+
+ if (buffer_max < 2)
+ return -ENOBUFS;
+
+ for (;;) {
+ r = dns_label_unescape(&n, buffer, buffer_max, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (buffer_max < (size_t) r + 2)
+ return -ENOBUFS;
+
+ /* The DNSSEC canonical form is not clear on what to
+ * do with dots appearing in labels, the way DNS-SD
+ * does it. Refuse it for now. */
+
+ if (memchr(buffer, '.', r))
+ return -EINVAL;
+
+ ascii_strlower_n(buffer, (size_t) r);
+ buffer[r] = '.';
+
+ buffer += r + 1;
+ c += r + 1;
+
+ buffer_max -= r + 1;
+ }
+
+ if (c <= 0) {
+ /* Not even a single label: this is the root domain name */
+
+ assert(buffer_max > 2);
+ buffer[0] = '.';
+ buffer[1] = 0;
+
+ return 1;
+ }
+
+ return (int) c;
+}
+
+#if HAVE_GCRYPT
+
+static int rr_compare(DnsResourceRecord * const *a, DnsResourceRecord * const *b) {
+ const DnsResourceRecord *x = *a, *y = *b;
+ size_t m;
+ int r;
+
+ /* Let's order the RRs according to RFC 4034, Section 6.3 */
+
+ assert(x);
+ assert(x->wire_format);
+ assert(y);
+ assert(y->wire_format);
+
+ m = MIN(DNS_RESOURCE_RECORD_RDATA_SIZE(x), DNS_RESOURCE_RECORD_RDATA_SIZE(y));
+
+ r = memcmp(DNS_RESOURCE_RECORD_RDATA(x), DNS_RESOURCE_RECORD_RDATA(y), m);
+ if (r != 0)
+ return r;
+
+ return CMP(DNS_RESOURCE_RECORD_RDATA_SIZE(x), DNS_RESOURCE_RECORD_RDATA_SIZE(y));
+}
+
+static int dnssec_rsa_verify_raw(
+ const char *hash_algorithm,
+ const void *signature, size_t signature_size,
+ const void *data, size_t data_size,
+ const void *exponent, size_t exponent_size,
+ const void *modulus, size_t modulus_size) {
+
+ gcry_sexp_t public_key_sexp = NULL, data_sexp = NULL, signature_sexp = NULL;
+ gcry_mpi_t n = NULL, e = NULL, s = NULL;
+ gcry_error_t ge;
+ int r;
+
+ assert(hash_algorithm);
+
+ ge = gcry_mpi_scan(&s, GCRYMPI_FMT_USG, signature, signature_size, NULL);
+ if (ge != 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_mpi_scan(&e, GCRYMPI_FMT_USG, exponent, exponent_size, NULL);
+ if (ge != 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_mpi_scan(&n, GCRYMPI_FMT_USG, modulus, modulus_size, NULL);
+ if (ge != 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&signature_sexp,
+ NULL,
+ "(sig-val (rsa (s %m)))",
+ s);
+
+ if (ge != 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&data_sexp,
+ NULL,
+ "(data (flags pkcs1) (hash %s %b))",
+ hash_algorithm,
+ (int) data_size,
+ data);
+ if (ge != 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&public_key_sexp,
+ NULL,
+ "(public-key (rsa (n %m) (e %m)))",
+ n,
+ e);
+ if (ge != 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_pk_verify(signature_sexp, data_sexp, public_key_sexp);
+ if (gpg_err_code(ge) == GPG_ERR_BAD_SIGNATURE)
+ r = 0;
+ else if (ge != 0) {
+ log_debug("RSA signature check failed: %s", gpg_strerror(ge));
+ r = -EIO;
+ } else
+ r = 1;
+
+finish:
+ if (e)
+ gcry_mpi_release(e);
+ if (n)
+ gcry_mpi_release(n);
+ if (s)
+ gcry_mpi_release(s);
+
+ if (public_key_sexp)
+ gcry_sexp_release(public_key_sexp);
+ if (signature_sexp)
+ gcry_sexp_release(signature_sexp);
+ if (data_sexp)
+ gcry_sexp_release(data_sexp);
+
+ return r;
+}
+
+static int dnssec_rsa_verify(
+ const char *hash_algorithm,
+ const void *hash, size_t hash_size,
+ DnsResourceRecord *rrsig,
+ DnsResourceRecord *dnskey) {
+
+ size_t exponent_size, modulus_size;
+ void *exponent, *modulus;
+
+ assert(hash_algorithm);
+ assert(hash);
+ assert(hash_size > 0);
+ assert(rrsig);
+ assert(dnskey);
+
+ if (*(uint8_t*) dnskey->dnskey.key == 0) {
+ /* exponent is > 255 bytes long */
+
+ exponent = (uint8_t*) dnskey->dnskey.key + 3;
+ exponent_size =
+ ((size_t) (((uint8_t*) dnskey->dnskey.key)[1]) << 8) |
+ ((size_t) ((uint8_t*) dnskey->dnskey.key)[2]);
+
+ if (exponent_size < 256)
+ return -EINVAL;
+
+ if (3 + exponent_size >= dnskey->dnskey.key_size)
+ return -EINVAL;
+
+ modulus = (uint8_t*) dnskey->dnskey.key + 3 + exponent_size;
+ modulus_size = dnskey->dnskey.key_size - 3 - exponent_size;
+
+ } else {
+ /* exponent is <= 255 bytes long */
+
+ exponent = (uint8_t*) dnskey->dnskey.key + 1;
+ exponent_size = (size_t) ((uint8_t*) dnskey->dnskey.key)[0];
+
+ if (exponent_size <= 0)
+ return -EINVAL;
+
+ if (1 + exponent_size >= dnskey->dnskey.key_size)
+ return -EINVAL;
+
+ modulus = (uint8_t*) dnskey->dnskey.key + 1 + exponent_size;
+ modulus_size = dnskey->dnskey.key_size - 1 - exponent_size;
+ }
+
+ return dnssec_rsa_verify_raw(
+ hash_algorithm,
+ rrsig->rrsig.signature, rrsig->rrsig.signature_size,
+ hash, hash_size,
+ exponent, exponent_size,
+ modulus, modulus_size);
+}
+
+static int dnssec_ecdsa_verify_raw(
+ const char *hash_algorithm,
+ const char *curve,
+ const void *signature_r, size_t signature_r_size,
+ const void *signature_s, size_t signature_s_size,
+ const void *data, size_t data_size,
+ const void *key, size_t key_size) {
+
+ gcry_sexp_t public_key_sexp = NULL, data_sexp = NULL, signature_sexp = NULL;
+ gcry_mpi_t q = NULL, r = NULL, s = NULL;
+ gcry_error_t ge;
+ int k;
+
+ assert(hash_algorithm);
+
+ ge = gcry_mpi_scan(&r, GCRYMPI_FMT_USG, signature_r, signature_r_size, NULL);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_mpi_scan(&s, GCRYMPI_FMT_USG, signature_s, signature_s_size, NULL);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_mpi_scan(&q, GCRYMPI_FMT_USG, key, key_size, NULL);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&signature_sexp,
+ NULL,
+ "(sig-val (ecdsa (r %m) (s %m)))",
+ r,
+ s);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&data_sexp,
+ NULL,
+ "(data (flags rfc6979) (hash %s %b))",
+ hash_algorithm,
+ (int) data_size,
+ data);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&public_key_sexp,
+ NULL,
+ "(public-key (ecc (curve %s) (q %m)))",
+ curve,
+ q);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_pk_verify(signature_sexp, data_sexp, public_key_sexp);
+ if (gpg_err_code(ge) == GPG_ERR_BAD_SIGNATURE)
+ k = 0;
+ else if (ge != 0) {
+ log_debug("ECDSA signature check failed: %s", gpg_strerror(ge));
+ k = -EIO;
+ } else
+ k = 1;
+finish:
+ if (r)
+ gcry_mpi_release(r);
+ if (s)
+ gcry_mpi_release(s);
+ if (q)
+ gcry_mpi_release(q);
+
+ if (public_key_sexp)
+ gcry_sexp_release(public_key_sexp);
+ if (signature_sexp)
+ gcry_sexp_release(signature_sexp);
+ if (data_sexp)
+ gcry_sexp_release(data_sexp);
+
+ return k;
+}
+
+static int dnssec_ecdsa_verify(
+ const char *hash_algorithm,
+ int algorithm,
+ const void *hash, size_t hash_size,
+ DnsResourceRecord *rrsig,
+ DnsResourceRecord *dnskey) {
+
+ const char *curve;
+ size_t key_size;
+ uint8_t *q;
+
+ assert(hash);
+ assert(hash_size);
+ assert(rrsig);
+ assert(dnskey);
+
+ if (algorithm == DNSSEC_ALGORITHM_ECDSAP256SHA256) {
+ key_size = 32;
+ curve = "NIST P-256";
+ } else if (algorithm == DNSSEC_ALGORITHM_ECDSAP384SHA384) {
+ key_size = 48;
+ curve = "NIST P-384";
+ } else
+ return -EOPNOTSUPP;
+
+ if (dnskey->dnskey.key_size != key_size * 2)
+ return -EINVAL;
+
+ if (rrsig->rrsig.signature_size != key_size * 2)
+ return -EINVAL;
+
+ q = newa(uint8_t, key_size*2 + 1);
+ q[0] = 0x04; /* Prepend 0x04 to indicate an uncompressed key */
+ memcpy(q+1, dnskey->dnskey.key, key_size*2);
+
+ return dnssec_ecdsa_verify_raw(
+ hash_algorithm,
+ curve,
+ rrsig->rrsig.signature, key_size,
+ (uint8_t*) rrsig->rrsig.signature + key_size, key_size,
+ hash, hash_size,
+ q, key_size*2+1);
+}
+
+#if GCRYPT_VERSION_NUMBER >= 0x010600
+static int dnssec_eddsa_verify_raw(
+ const char *curve,
+ const void *signature_r, size_t signature_r_size,
+ const void *signature_s, size_t signature_s_size,
+ const void *data, size_t data_size,
+ const void *key, size_t key_size) {
+
+ gcry_sexp_t public_key_sexp = NULL, data_sexp = NULL, signature_sexp = NULL;
+ gcry_error_t ge;
+ int k;
+
+ ge = gcry_sexp_build(&signature_sexp,
+ NULL,
+ "(sig-val (eddsa (r %b) (s %b)))",
+ (int) signature_r_size,
+ signature_r,
+ (int) signature_s_size,
+ signature_s);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&data_sexp,
+ NULL,
+ "(data (flags eddsa) (hash-algo sha512) (value %b))",
+ (int) data_size,
+ data);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&public_key_sexp,
+ NULL,
+ "(public-key (ecc (curve %s) (flags eddsa) (q %b)))",
+ curve,
+ (int) key_size,
+ key);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_pk_verify(signature_sexp, data_sexp, public_key_sexp);
+ if (gpg_err_code(ge) == GPG_ERR_BAD_SIGNATURE)
+ k = 0;
+ else if (ge != 0) {
+ log_debug("EdDSA signature check failed: %s", gpg_strerror(ge));
+ k = -EIO;
+ } else
+ k = 1;
+finish:
+ if (public_key_sexp)
+ gcry_sexp_release(public_key_sexp);
+ if (signature_sexp)
+ gcry_sexp_release(signature_sexp);
+ if (data_sexp)
+ gcry_sexp_release(data_sexp);
+
+ return k;
+}
+
+static int dnssec_eddsa_verify(
+ int algorithm,
+ const void *data, size_t data_size,
+ DnsResourceRecord *rrsig,
+ DnsResourceRecord *dnskey) {
+ const char *curve;
+ size_t key_size;
+
+ if (algorithm == DNSSEC_ALGORITHM_ED25519) {
+ curve = "Ed25519";
+ key_size = 32;
+ } else
+ return -EOPNOTSUPP;
+
+ if (dnskey->dnskey.key_size != key_size)
+ return -EINVAL;
+
+ if (rrsig->rrsig.signature_size != key_size * 2)
+ return -EINVAL;
+
+ return dnssec_eddsa_verify_raw(
+ curve,
+ rrsig->rrsig.signature, key_size,
+ (uint8_t*) rrsig->rrsig.signature + key_size, key_size,
+ data, data_size,
+ dnskey->dnskey.key, key_size);
+}
+#endif
+
+static void md_add_uint8(gcry_md_hd_t md, uint8_t v) {
+ gcry_md_write(md, &v, sizeof(v));
+}
+
+static void md_add_uint16(gcry_md_hd_t md, uint16_t v) {
+ v = htobe16(v);
+ gcry_md_write(md, &v, sizeof(v));
+}
+
+static void fwrite_uint8(FILE *fp, uint8_t v) {
+ fwrite(&v, sizeof(v), 1, fp);
+}
+
+static void fwrite_uint16(FILE *fp, uint16_t v) {
+ v = htobe16(v);
+ fwrite(&v, sizeof(v), 1, fp);
+}
+
+static void fwrite_uint32(FILE *fp, uint32_t v) {
+ v = htobe32(v);
+ fwrite(&v, sizeof(v), 1, fp);
+}
+
+static int dnssec_rrsig_prepare(DnsResourceRecord *rrsig) {
+ int n_key_labels, n_signer_labels;
+ const char *name;
+ int r;
+
+ /* Checks whether the specified RRSIG RR is somewhat valid, and initializes the .n_skip_labels_source and
+ * .n_skip_labels_signer fields so that we can use them later on. */
+
+ assert(rrsig);
+ assert(rrsig->key->type == DNS_TYPE_RRSIG);
+
+ /* Check if this RRSIG RR is already prepared */
+ if (rrsig->n_skip_labels_source != (unsigned) -1)
+ return 0;
+
+ if (rrsig->rrsig.inception > rrsig->rrsig.expiration)
+ return -EINVAL;
+
+ name = dns_resource_key_name(rrsig->key);
+
+ n_key_labels = dns_name_count_labels(name);
+ if (n_key_labels < 0)
+ return n_key_labels;
+ if (rrsig->rrsig.labels > n_key_labels)
+ return -EINVAL;
+
+ n_signer_labels = dns_name_count_labels(rrsig->rrsig.signer);
+ if (n_signer_labels < 0)
+ return n_signer_labels;
+ if (n_signer_labels > rrsig->rrsig.labels)
+ return -EINVAL;
+
+ r = dns_name_skip(name, n_key_labels - n_signer_labels, &name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ /* Check if the signer is really a suffix of us */
+ r = dns_name_equal(name, rrsig->rrsig.signer);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ rrsig->n_skip_labels_source = n_key_labels - rrsig->rrsig.labels;
+ rrsig->n_skip_labels_signer = n_key_labels - n_signer_labels;
+
+ return 0;
+}
+
+static int dnssec_rrsig_expired(DnsResourceRecord *rrsig, usec_t realtime) {
+ usec_t expiration, inception, skew;
+
+ assert(rrsig);
+ assert(rrsig->key->type == DNS_TYPE_RRSIG);
+
+ if (realtime == USEC_INFINITY)
+ realtime = now(CLOCK_REALTIME);
+
+ expiration = rrsig->rrsig.expiration * USEC_PER_SEC;
+ inception = rrsig->rrsig.inception * USEC_PER_SEC;
+
+ /* Consider inverted validity intervals as expired */
+ if (inception > expiration)
+ return true;
+
+ /* Permit a certain amount of clock skew of 10% of the valid
+ * time range. This takes inspiration from unbound's
+ * resolver. */
+ skew = (expiration - inception) / 10;
+ if (skew > SKEW_MAX)
+ skew = SKEW_MAX;
+
+ if (inception < skew)
+ inception = 0;
+ else
+ inception -= skew;
+
+ if (expiration + skew < expiration)
+ expiration = USEC_INFINITY;
+ else
+ expiration += skew;
+
+ return realtime < inception || realtime > expiration;
+}
+
+static int algorithm_to_gcrypt_md(uint8_t algorithm) {
+
+ /* Translates a DNSSEC signature algorithm into a gcrypt
+ * digest identifier.
+ *
+ * Note that we implement all algorithms listed as "Must
+ * implement" and "Recommended to Implement" in RFC6944. We
+ * don't implement any algorithms that are listed as
+ * "Optional" or "Must Not Implement". Specifically, we do not
+ * implement RSAMD5, DSASHA1, DH, DSA-NSEC3-SHA1, and
+ * GOST-ECC. */
+
+ switch (algorithm) {
+
+ case DNSSEC_ALGORITHM_RSASHA1:
+ case DNSSEC_ALGORITHM_RSASHA1_NSEC3_SHA1:
+ return GCRY_MD_SHA1;
+
+ case DNSSEC_ALGORITHM_RSASHA256:
+ case DNSSEC_ALGORITHM_ECDSAP256SHA256:
+ return GCRY_MD_SHA256;
+
+ case DNSSEC_ALGORITHM_ECDSAP384SHA384:
+ return GCRY_MD_SHA384;
+
+ case DNSSEC_ALGORITHM_RSASHA512:
+ return GCRY_MD_SHA512;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void dnssec_fix_rrset_ttl(
+ DnsResourceRecord *list[],
+ unsigned n,
+ DnsResourceRecord *rrsig,
+ usec_t realtime) {
+
+ unsigned k;
+
+ assert(list);
+ assert(n > 0);
+ assert(rrsig);
+
+ for (k = 0; k < n; k++) {
+ DnsResourceRecord *rr = list[k];
+
+ /* Pick the TTL as the minimum of the RR's TTL, the
+ * RR's original TTL according to the RRSIG and the
+ * RRSIG's own TTL, see RFC 4035, Section 5.3.3 */
+ rr->ttl = MIN3(rr->ttl, rrsig->rrsig.original_ttl, rrsig->ttl);
+ rr->expiry = rrsig->rrsig.expiration * USEC_PER_SEC;
+
+ /* Copy over information about the signer and wildcard source of synthesis */
+ rr->n_skip_labels_source = rrsig->n_skip_labels_source;
+ rr->n_skip_labels_signer = rrsig->n_skip_labels_signer;
+ }
+
+ rrsig->expiry = rrsig->rrsig.expiration * USEC_PER_SEC;
+}
+
+int dnssec_verify_rrset(
+ DnsAnswer *a,
+ const DnsResourceKey *key,
+ DnsResourceRecord *rrsig,
+ DnsResourceRecord *dnskey,
+ usec_t realtime,
+ DnssecResult *result) {
+
+ uint8_t wire_format_name[DNS_WIRE_FORMAT_HOSTNAME_MAX];
+ DnsResourceRecord **list, *rr;
+ const char *source, *name;
+ _cleanup_(gcry_md_closep) gcry_md_hd_t md = NULL;
+ int r, md_algorithm;
+ size_t k, n = 0;
+ size_t sig_size = 0;
+ _cleanup_free_ char *sig_data = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t hash_size;
+ void *hash;
+ bool wildcard;
+
+ assert(key);
+ assert(rrsig);
+ assert(dnskey);
+ assert(result);
+ assert(rrsig->key->type == DNS_TYPE_RRSIG);
+ assert(dnskey->key->type == DNS_TYPE_DNSKEY);
+
+ /* Verifies that the RRSet matches the specified "key" in "a",
+ * using the signature "rrsig" and the key "dnskey". It's
+ * assumed that RRSIG and DNSKEY match. */
+
+ r = dnssec_rrsig_prepare(rrsig);
+ if (r == -EINVAL) {
+ *result = DNSSEC_INVALID;
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = dnssec_rrsig_expired(rrsig, realtime);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *result = DNSSEC_SIGNATURE_EXPIRED;
+ return 0;
+ }
+
+ name = dns_resource_key_name(key);
+
+ /* Some keys may only appear signed in the zone apex, and are invalid anywhere else. (SOA, NS...) */
+ if (dns_type_apex_only(rrsig->rrsig.type_covered)) {
+ r = dns_name_equal(rrsig->rrsig.signer, name);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *result = DNSSEC_INVALID;
+ return 0;
+ }
+ }
+
+ /* OTOH DS RRs may not appear in the zone apex, but are valid everywhere else. */
+ if (rrsig->rrsig.type_covered == DNS_TYPE_DS) {
+ r = dns_name_equal(rrsig->rrsig.signer, name);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *result = DNSSEC_INVALID;
+ return 0;
+ }
+ }
+
+ /* Determine the "Source of Synthesis" and whether this is a wildcard RRSIG */
+ r = dns_name_suffix(name, rrsig->rrsig.labels, &source);
+ if (r < 0)
+ return r;
+ if (r > 0 && !dns_type_may_wildcard(rrsig->rrsig.type_covered)) {
+ /* We refuse to validate NSEC3 or SOA RRs that are synthesized from wildcards */
+ *result = DNSSEC_INVALID;
+ return 0;
+ }
+ if (r == 1) {
+ /* If we stripped a single label, then let's see if that maybe was "*". If so, we are not really
+ * synthesized from a wildcard, we are the wildcard itself. Treat that like a normal name. */
+ r = dns_name_startswith(name, "*");
+ if (r < 0)
+ return r;
+ if (r > 0)
+ source = name;
+
+ wildcard = r == 0;
+ } else
+ wildcard = r > 0;
+
+ /* Collect all relevant RRs in a single array, so that we can look at the RRset */
+ list = newa(DnsResourceRecord *, dns_answer_size(a));
+
+ DNS_ANSWER_FOREACH(rr, a) {
+ r = dns_resource_key_equal(key, rr->key);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* We need the wire format for ordering, and digest calculation */
+ r = dns_resource_record_to_wire_format(rr, true);
+ if (r < 0)
+ return r;
+
+ list[n++] = rr;
+
+ if (n > VERIFY_RRS_MAX)
+ return -E2BIG;
+ }
+
+ if (n <= 0)
+ return -ENODATA;
+
+ /* Bring the RRs into canonical order */
+ typesafe_qsort(list, n, rr_compare);
+
+ f = open_memstream(&sig_data, &sig_size);
+ if (!f)
+ return -ENOMEM;
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fwrite_uint16(f, rrsig->rrsig.type_covered);
+ fwrite_uint8(f, rrsig->rrsig.algorithm);
+ fwrite_uint8(f, rrsig->rrsig.labels);
+ fwrite_uint32(f, rrsig->rrsig.original_ttl);
+ fwrite_uint32(f, rrsig->rrsig.expiration);
+ fwrite_uint32(f, rrsig->rrsig.inception);
+ fwrite_uint16(f, rrsig->rrsig.key_tag);
+
+ r = dns_name_to_wire_format(rrsig->rrsig.signer, wire_format_name, sizeof(wire_format_name), true);
+ if (r < 0)
+ return r;
+ fwrite(wire_format_name, 1, r, f);
+
+ /* Convert the source of synthesis into wire format */
+ r = dns_name_to_wire_format(source, wire_format_name, sizeof(wire_format_name), true);
+ if (r < 0)
+ return r;
+
+ for (k = 0; k < n; k++) {
+ size_t l;
+
+ rr = list[k];
+
+ /* Hash the source of synthesis. If this is a wildcard, then prefix it with the *. label */
+ if (wildcard)
+ fwrite((uint8_t[]) { 1, '*'}, sizeof(uint8_t), 2, f);
+ fwrite(wire_format_name, 1, r, f);
+
+ fwrite_uint16(f, rr->key->type);
+ fwrite_uint16(f, rr->key->class);
+ fwrite_uint32(f, rrsig->rrsig.original_ttl);
+
+ l = DNS_RESOURCE_RECORD_RDATA_SIZE(rr);
+ assert(l <= 0xFFFF);
+
+ fwrite_uint16(f, (uint16_t) l);
+ fwrite(DNS_RESOURCE_RECORD_RDATA(rr), 1, l, f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ initialize_libgcrypt(false);
+
+ switch (rrsig->rrsig.algorithm) {
+#if GCRYPT_VERSION_NUMBER >= 0x010600
+ case DNSSEC_ALGORITHM_ED25519:
+ break;
+#else
+ case DNSSEC_ALGORITHM_ED25519:
+#endif
+ case DNSSEC_ALGORITHM_ED448:
+ *result = DNSSEC_UNSUPPORTED_ALGORITHM;
+ return 0;
+ default:
+ /* OK, the RRs are now in canonical order. Let's calculate the digest */
+ md_algorithm = algorithm_to_gcrypt_md(rrsig->rrsig.algorithm);
+ if (md_algorithm == -EOPNOTSUPP) {
+ *result = DNSSEC_UNSUPPORTED_ALGORITHM;
+ return 0;
+ }
+ if (md_algorithm < 0)
+ return md_algorithm;
+
+ gcry_md_open(&md, md_algorithm, 0);
+ if (!md)
+ return -EIO;
+
+ hash_size = gcry_md_get_algo_dlen(md_algorithm);
+ assert(hash_size > 0);
+
+ gcry_md_write(md, sig_data, sig_size);
+
+ hash = gcry_md_read(md, 0);
+ if (!hash)
+ return -EIO;
+ }
+
+ switch (rrsig->rrsig.algorithm) {
+
+ case DNSSEC_ALGORITHM_RSASHA1:
+ case DNSSEC_ALGORITHM_RSASHA1_NSEC3_SHA1:
+ case DNSSEC_ALGORITHM_RSASHA256:
+ case DNSSEC_ALGORITHM_RSASHA512:
+ r = dnssec_rsa_verify(
+ gcry_md_algo_name(md_algorithm),
+ hash, hash_size,
+ rrsig,
+ dnskey);
+ break;
+
+ case DNSSEC_ALGORITHM_ECDSAP256SHA256:
+ case DNSSEC_ALGORITHM_ECDSAP384SHA384:
+ r = dnssec_ecdsa_verify(
+ gcry_md_algo_name(md_algorithm),
+ rrsig->rrsig.algorithm,
+ hash, hash_size,
+ rrsig,
+ dnskey);
+ break;
+#if GCRYPT_VERSION_NUMBER >= 0x010600
+ case DNSSEC_ALGORITHM_ED25519:
+ r = dnssec_eddsa_verify(
+ rrsig->rrsig.algorithm,
+ sig_data, sig_size,
+ rrsig,
+ dnskey);
+ break;
+#endif
+ }
+ if (r < 0)
+ return r;
+
+ /* Now, fix the ttl, expiry, and remember the synthesizing source and the signer */
+ if (r > 0)
+ dnssec_fix_rrset_ttl(list, n, rrsig, realtime);
+
+ if (r == 0)
+ *result = DNSSEC_INVALID;
+ else if (wildcard)
+ *result = DNSSEC_VALIDATED_WILDCARD;
+ else
+ *result = DNSSEC_VALIDATED;
+
+ return 0;
+}
+
+int dnssec_rrsig_match_dnskey(DnsResourceRecord *rrsig, DnsResourceRecord *dnskey, bool revoked_ok) {
+
+ assert(rrsig);
+ assert(dnskey);
+
+ /* Checks if the specified DNSKEY RR matches the key used for
+ * the signature in the specified RRSIG RR */
+
+ if (rrsig->key->type != DNS_TYPE_RRSIG)
+ return -EINVAL;
+
+ if (dnskey->key->type != DNS_TYPE_DNSKEY)
+ return 0;
+ if (dnskey->key->class != rrsig->key->class)
+ return 0;
+ if ((dnskey->dnskey.flags & DNSKEY_FLAG_ZONE_KEY) == 0)
+ return 0;
+ if (!revoked_ok && (dnskey->dnskey.flags & DNSKEY_FLAG_REVOKE))
+ return 0;
+ if (dnskey->dnskey.protocol != 3)
+ return 0;
+ if (dnskey->dnskey.algorithm != rrsig->rrsig.algorithm)
+ return 0;
+
+ if (dnssec_keytag(dnskey, false) != rrsig->rrsig.key_tag)
+ return 0;
+
+ return dns_name_equal(dns_resource_key_name(dnskey->key), rrsig->rrsig.signer);
+}
+
+int dnssec_key_match_rrsig(const DnsResourceKey *key, DnsResourceRecord *rrsig) {
+ assert(key);
+ assert(rrsig);
+
+ /* Checks if the specified RRSIG RR protects the RRSet of the specified RR key. */
+
+ if (rrsig->key->type != DNS_TYPE_RRSIG)
+ return 0;
+ if (rrsig->key->class != key->class)
+ return 0;
+ if (rrsig->rrsig.type_covered != key->type)
+ return 0;
+
+ return dns_name_equal(dns_resource_key_name(rrsig->key), dns_resource_key_name(key));
+}
+
+int dnssec_verify_rrset_search(
+ DnsAnswer *a,
+ const DnsResourceKey *key,
+ DnsAnswer *validated_dnskeys,
+ usec_t realtime,
+ DnssecResult *result,
+ DnsResourceRecord **ret_rrsig) {
+
+ bool found_rrsig = false, found_invalid = false, found_expired_rrsig = false, found_unsupported_algorithm = false;
+ DnsResourceRecord *rrsig;
+ int r;
+
+ assert(key);
+ assert(result);
+
+ /* Verifies all RRs from "a" that match the key "key" against DNSKEYs in "validated_dnskeys" */
+
+ if (!a || a->n_rrs <= 0)
+ return -ENODATA;
+
+ /* Iterate through each RRSIG RR. */
+ DNS_ANSWER_FOREACH(rrsig, a) {
+ DnsResourceRecord *dnskey;
+ DnsAnswerFlags flags;
+
+ /* Is this an RRSIG RR that applies to RRs matching our key? */
+ r = dnssec_key_match_rrsig(key, rrsig);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ found_rrsig = true;
+
+ /* Look for a matching key */
+ DNS_ANSWER_FOREACH_FLAGS(dnskey, flags, validated_dnskeys) {
+ DnssecResult one_result;
+
+ if ((flags & DNS_ANSWER_AUTHENTICATED) == 0)
+ continue;
+
+ /* Is this a DNSKEY RR that matches they key of our RRSIG? */
+ r = dnssec_rrsig_match_dnskey(rrsig, dnskey, false);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* Take the time here, if it isn't set yet, so
+ * that we do all validations with the same
+ * time. */
+ if (realtime == USEC_INFINITY)
+ realtime = now(CLOCK_REALTIME);
+
+ /* Yay, we found a matching RRSIG with a matching
+ * DNSKEY, awesome. Now let's verify all entries of
+ * the RRSet against the RRSIG and DNSKEY
+ * combination. */
+
+ r = dnssec_verify_rrset(a, key, rrsig, dnskey, realtime, &one_result);
+ if (r < 0)
+ return r;
+
+ switch (one_result) {
+
+ case DNSSEC_VALIDATED:
+ case DNSSEC_VALIDATED_WILDCARD:
+ /* Yay, the RR has been validated,
+ * return immediately, but fix up the expiry */
+ if (ret_rrsig)
+ *ret_rrsig = rrsig;
+
+ *result = one_result;
+ return 0;
+
+ case DNSSEC_INVALID:
+ /* If the signature is invalid, let's try another
+ key and/or signature. After all they
+ key_tags and stuff are not unique, and
+ might be shared by multiple keys. */
+ found_invalid = true;
+ continue;
+
+ case DNSSEC_UNSUPPORTED_ALGORITHM:
+ /* If the key algorithm is
+ unsupported, try another
+ RRSIG/DNSKEY pair, but remember we
+ encountered this, so that we can
+ return a proper error when we
+ encounter nothing better. */
+ found_unsupported_algorithm = true;
+ continue;
+
+ case DNSSEC_SIGNATURE_EXPIRED:
+ /* If the signature is expired, try
+ another one, but remember it, so
+ that we can return this */
+ found_expired_rrsig = true;
+ continue;
+
+ default:
+ assert_not_reached("Unexpected DNSSEC validation result");
+ }
+ }
+ }
+
+ if (found_expired_rrsig)
+ *result = DNSSEC_SIGNATURE_EXPIRED;
+ else if (found_unsupported_algorithm)
+ *result = DNSSEC_UNSUPPORTED_ALGORITHM;
+ else if (found_invalid)
+ *result = DNSSEC_INVALID;
+ else if (found_rrsig)
+ *result = DNSSEC_MISSING_KEY;
+ else
+ *result = DNSSEC_NO_SIGNATURE;
+
+ if (ret_rrsig)
+ *ret_rrsig = NULL;
+
+ return 0;
+}
+
+int dnssec_has_rrsig(DnsAnswer *a, const DnsResourceKey *key) {
+ DnsResourceRecord *rr;
+ int r;
+
+ /* Checks whether there's at least one RRSIG in 'a' that proctects RRs of the specified key */
+
+ DNS_ANSWER_FOREACH(rr, a) {
+ r = dnssec_key_match_rrsig(key, rr);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int digest_to_gcrypt_md(uint8_t algorithm) {
+
+ /* Translates a DNSSEC digest algorithm into a gcrypt digest identifier */
+
+ switch (algorithm) {
+
+ case DNSSEC_DIGEST_SHA1:
+ return GCRY_MD_SHA1;
+
+ case DNSSEC_DIGEST_SHA256:
+ return GCRY_MD_SHA256;
+
+ case DNSSEC_DIGEST_SHA384:
+ return GCRY_MD_SHA384;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int dnssec_verify_dnskey_by_ds(DnsResourceRecord *dnskey, DnsResourceRecord *ds, bool mask_revoke) {
+ uint8_t wire_format[DNS_WIRE_FORMAT_HOSTNAME_MAX];
+ _cleanup_(gcry_md_closep) gcry_md_hd_t md = NULL;
+ size_t hash_size;
+ int md_algorithm, r;
+ void *result;
+
+ assert(dnskey);
+ assert(ds);
+
+ /* Implements DNSKEY verification by a DS, according to RFC 4035, section 5.2 */
+
+ if (dnskey->key->type != DNS_TYPE_DNSKEY)
+ return -EINVAL;
+ if (ds->key->type != DNS_TYPE_DS)
+ return -EINVAL;
+ if ((dnskey->dnskey.flags & DNSKEY_FLAG_ZONE_KEY) == 0)
+ return -EKEYREJECTED;
+ if (!mask_revoke && (dnskey->dnskey.flags & DNSKEY_FLAG_REVOKE))
+ return -EKEYREJECTED;
+ if (dnskey->dnskey.protocol != 3)
+ return -EKEYREJECTED;
+
+ if (dnskey->dnskey.algorithm != ds->ds.algorithm)
+ return 0;
+ if (dnssec_keytag(dnskey, mask_revoke) != ds->ds.key_tag)
+ return 0;
+
+ initialize_libgcrypt(false);
+
+ md_algorithm = digest_to_gcrypt_md(ds->ds.digest_type);
+ if (md_algorithm < 0)
+ return md_algorithm;
+
+ hash_size = gcry_md_get_algo_dlen(md_algorithm);
+ assert(hash_size > 0);
+
+ if (ds->ds.digest_size != hash_size)
+ return 0;
+
+ r = dns_name_to_wire_format(dns_resource_key_name(dnskey->key), wire_format, sizeof(wire_format), true);
+ if (r < 0)
+ return r;
+
+ gcry_md_open(&md, md_algorithm, 0);
+ if (!md)
+ return -EIO;
+
+ gcry_md_write(md, wire_format, r);
+ if (mask_revoke)
+ md_add_uint16(md, dnskey->dnskey.flags & ~DNSKEY_FLAG_REVOKE);
+ else
+ md_add_uint16(md, dnskey->dnskey.flags);
+ md_add_uint8(md, dnskey->dnskey.protocol);
+ md_add_uint8(md, dnskey->dnskey.algorithm);
+ gcry_md_write(md, dnskey->dnskey.key, dnskey->dnskey.key_size);
+
+ result = gcry_md_read(md, 0);
+ if (!result)
+ return -EIO;
+
+ return memcmp(result, ds->ds.digest, ds->ds.digest_size) == 0;
+}
+
+int dnssec_verify_dnskey_by_ds_search(DnsResourceRecord *dnskey, DnsAnswer *validated_ds) {
+ DnsResourceRecord *ds;
+ DnsAnswerFlags flags;
+ int r;
+
+ assert(dnskey);
+
+ if (dnskey->key->type != DNS_TYPE_DNSKEY)
+ return 0;
+
+ DNS_ANSWER_FOREACH_FLAGS(ds, flags, validated_ds) {
+
+ if ((flags & DNS_ANSWER_AUTHENTICATED) == 0)
+ continue;
+
+ if (ds->key->type != DNS_TYPE_DS)
+ continue;
+ if (ds->key->class != dnskey->key->class)
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(dnskey->key), dns_resource_key_name(ds->key));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dnssec_verify_dnskey_by_ds(dnskey, ds, false);
+ if (IN_SET(r, -EKEYREJECTED, -EOPNOTSUPP))
+ return 0; /* The DNSKEY is revoked or otherwise invalid, or we don't support the digest algorithm */
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int nsec3_hash_to_gcrypt_md(uint8_t algorithm) {
+
+ /* Translates a DNSSEC NSEC3 hash algorithm into a gcrypt digest identifier */
+
+ switch (algorithm) {
+
+ case NSEC3_ALGORITHM_SHA1:
+ return GCRY_MD_SHA1;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int dnssec_nsec3_hash(DnsResourceRecord *nsec3, const char *name, void *ret) {
+ uint8_t wire_format[DNS_WIRE_FORMAT_HOSTNAME_MAX];
+ gcry_md_hd_t md = NULL;
+ size_t hash_size;
+ int algorithm;
+ void *result;
+ unsigned k;
+ int r;
+
+ assert(nsec3);
+ assert(name);
+ assert(ret);
+
+ if (nsec3->key->type != DNS_TYPE_NSEC3)
+ return -EINVAL;
+
+ if (nsec3->nsec3.iterations > NSEC3_ITERATIONS_MAX)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Ignoring NSEC3 RR %s with excessive number of iterations.",
+ dns_resource_record_to_string(nsec3));
+
+ algorithm = nsec3_hash_to_gcrypt_md(nsec3->nsec3.algorithm);
+ if (algorithm < 0)
+ return algorithm;
+
+ initialize_libgcrypt(false);
+
+ hash_size = gcry_md_get_algo_dlen(algorithm);
+ assert(hash_size > 0);
+
+ if (nsec3->nsec3.next_hashed_name_size != hash_size)
+ return -EINVAL;
+
+ r = dns_name_to_wire_format(name, wire_format, sizeof(wire_format), true);
+ if (r < 0)
+ return r;
+
+ gcry_md_open(&md, algorithm, 0);
+ if (!md)
+ return -EIO;
+
+ gcry_md_write(md, wire_format, r);
+ gcry_md_write(md, nsec3->nsec3.salt, nsec3->nsec3.salt_size);
+
+ result = gcry_md_read(md, 0);
+ if (!result) {
+ r = -EIO;
+ goto finish;
+ }
+
+ for (k = 0; k < nsec3->nsec3.iterations; k++) {
+ uint8_t tmp[hash_size];
+ memcpy(tmp, result, hash_size);
+
+ gcry_md_reset(md);
+ gcry_md_write(md, tmp, hash_size);
+ gcry_md_write(md, nsec3->nsec3.salt, nsec3->nsec3.salt_size);
+
+ result = gcry_md_read(md, 0);
+ if (!result) {
+ r = -EIO;
+ goto finish;
+ }
+ }
+
+ memcpy(ret, result, hash_size);
+ r = (int) hash_size;
+
+finish:
+ gcry_md_close(md);
+ return r;
+}
+
+static int nsec3_is_good(DnsResourceRecord *rr, DnsResourceRecord *nsec3) {
+ const char *a, *b;
+ int r;
+
+ assert(rr);
+
+ if (rr->key->type != DNS_TYPE_NSEC3)
+ return 0;
+
+ /* RFC 5155, Section 8.2 says we MUST ignore NSEC3 RRs with flags != 0 or 1 */
+ if (!IN_SET(rr->nsec3.flags, 0, 1))
+ return 0;
+
+ /* Ignore NSEC3 RRs whose algorithm we don't know */
+ if (nsec3_hash_to_gcrypt_md(rr->nsec3.algorithm) < 0)
+ return 0;
+ /* Ignore NSEC3 RRs with an excessive number of required iterations */
+ if (rr->nsec3.iterations > NSEC3_ITERATIONS_MAX)
+ return 0;
+
+ /* Ignore NSEC3 RRs generated from wildcards. If these NSEC3 RRs weren't correctly signed we can't make this
+ * check (since rr->n_skip_labels_source is -1), but that's OK, as we won't trust them anyway in that case. */
+ if (!IN_SET(rr->n_skip_labels_source, 0, (unsigned) -1))
+ return 0;
+ /* Ignore NSEC3 RRs that are located anywhere else than one label below the zone */
+ if (!IN_SET(rr->n_skip_labels_signer, 1, (unsigned) -1))
+ return 0;
+
+ if (!nsec3)
+ return 1;
+
+ /* If a second NSEC3 RR is specified, also check if they are from the same zone. */
+
+ if (nsec3 == rr) /* Shortcut */
+ return 1;
+
+ if (rr->key->class != nsec3->key->class)
+ return 0;
+ if (rr->nsec3.algorithm != nsec3->nsec3.algorithm)
+ return 0;
+ if (rr->nsec3.iterations != nsec3->nsec3.iterations)
+ return 0;
+ if (rr->nsec3.salt_size != nsec3->nsec3.salt_size)
+ return 0;
+ if (memcmp_safe(rr->nsec3.salt, nsec3->nsec3.salt, rr->nsec3.salt_size) != 0)
+ return 0;
+
+ a = dns_resource_key_name(rr->key);
+ r = dns_name_parent(&a); /* strip off hash */
+ if (r <= 0)
+ return r;
+
+ b = dns_resource_key_name(nsec3->key);
+ r = dns_name_parent(&b); /* strip off hash */
+ if (r <= 0)
+ return r;
+
+ /* Make sure both have the same parent */
+ return dns_name_equal(a, b);
+}
+
+static int nsec3_hashed_domain_format(const uint8_t *hashed, size_t hashed_size, const char *zone, char **ret) {
+ _cleanup_free_ char *l = NULL;
+ char *j;
+
+ assert(hashed);
+ assert(hashed_size > 0);
+ assert(zone);
+ assert(ret);
+
+ l = base32hexmem(hashed, hashed_size, false);
+ if (!l)
+ return -ENOMEM;
+
+ j = strjoin(l, ".", zone);
+ if (!j)
+ return -ENOMEM;
+
+ *ret = j;
+ return (int) hashed_size;
+}
+
+static int nsec3_hashed_domain_make(DnsResourceRecord *nsec3, const char *domain, const char *zone, char **ret) {
+ uint8_t hashed[DNSSEC_HASH_SIZE_MAX];
+ int hashed_size;
+
+ assert(nsec3);
+ assert(domain);
+ assert(zone);
+ assert(ret);
+
+ hashed_size = dnssec_nsec3_hash(nsec3, domain, hashed);
+ if (hashed_size < 0)
+ return hashed_size;
+
+ return nsec3_hashed_domain_format(hashed, (size_t) hashed_size, zone, ret);
+}
+
+/* See RFC 5155, Section 8
+ * First try to find a NSEC3 record that matches our query precisely, if that fails, find the closest
+ * enclosure. Secondly, find a proof that there is no closer enclosure and either a proof that there
+ * is no wildcard domain as a direct descendant of the closest enclosure, or find an NSEC3 record that
+ * matches the wildcard domain.
+ *
+ * Based on this we can prove either the existence of the record in @key, or NXDOMAIN or NODATA, or
+ * that there is no proof either way. The latter is the case if a the proof of non-existence of a given
+ * name uses an NSEC3 record with the opt-out bit set. Lastly, if we are given insufficient NSEC3 records
+ * to conclude anything we indicate this by returning NO_RR. */
+static int dnssec_test_nsec3(DnsAnswer *answer, DnsResourceKey *key, DnssecNsecResult *result, bool *authenticated, uint32_t *ttl) {
+ _cleanup_free_ char *next_closer_domain = NULL, *wildcard_domain = NULL;
+ const char *zone, *p, *pp = NULL, *wildcard;
+ DnsResourceRecord *rr, *enclosure_rr, *zone_rr, *wildcard_rr = NULL;
+ DnsAnswerFlags flags;
+ int hashed_size, r;
+ bool a, no_closer = false, no_wildcard = false, optout = false;
+
+ assert(key);
+ assert(result);
+
+ /* First step, find the zone name and the NSEC3 parameters of the zone.
+ * it is sufficient to look for the longest common suffix we find with
+ * any NSEC3 RR in the response. Any NSEC3 record will do as all NSEC3
+ * records from a given zone in a response must use the same
+ * parameters. */
+ zone = dns_resource_key_name(key);
+ for (;;) {
+ DNS_ANSWER_FOREACH_FLAGS(zone_rr, flags, answer) {
+ r = nsec3_is_good(zone_rr, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dns_name_equal_skip(dns_resource_key_name(zone_rr->key), 1, zone);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto found_zone;
+ }
+
+ /* Strip one label from the front */
+ r = dns_name_parent(&zone);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ *result = DNSSEC_NSEC_NO_RR;
+ return 0;
+
+found_zone:
+ /* Second step, find the closest encloser NSEC3 RR in 'answer' that matches 'key' */
+ p = dns_resource_key_name(key);
+ for (;;) {
+ _cleanup_free_ char *hashed_domain = NULL;
+
+ hashed_size = nsec3_hashed_domain_make(zone_rr, p, zone, &hashed_domain);
+ if (hashed_size == -EOPNOTSUPP) {
+ *result = DNSSEC_NSEC_UNSUPPORTED_ALGORITHM;
+ return 0;
+ }
+ if (hashed_size < 0)
+ return hashed_size;
+
+ DNS_ANSWER_FOREACH_FLAGS(enclosure_rr, flags, answer) {
+
+ r = nsec3_is_good(enclosure_rr, zone_rr);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (enclosure_rr->nsec3.next_hashed_name_size != (size_t) hashed_size)
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(enclosure_rr->key), hashed_domain);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ a = flags & DNS_ANSWER_AUTHENTICATED;
+ goto found_closest_encloser;
+ }
+ }
+
+ /* We didn't find the closest encloser with this name,
+ * but let's remember this domain name, it might be
+ * the next closer name */
+
+ pp = p;
+
+ /* Strip one label from the front */
+ r = dns_name_parent(&p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ *result = DNSSEC_NSEC_NO_RR;
+ return 0;
+
+found_closest_encloser:
+ /* We found a closest encloser in 'p'; next closer is 'pp' */
+
+ if (!pp) {
+ /* We have an exact match! If we area looking for a DS RR, then we must insist that we got the NSEC3 RR
+ * from the parent. Otherwise the one from the child. Do so, by checking whether SOA and NS are
+ * appropriately set. */
+
+ if (key->type == DNS_TYPE_DS) {
+ if (bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_SOA))
+ return -EBADMSG;
+ } else {
+ if (bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_NS) &&
+ !bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_SOA))
+ return -EBADMSG;
+ }
+
+ /* No next closer NSEC3 RR. That means there's a direct NSEC3 RR for our key. */
+ if (bitmap_isset(enclosure_rr->nsec3.types, key->type))
+ *result = DNSSEC_NSEC_FOUND;
+ else if (bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_CNAME))
+ *result = DNSSEC_NSEC_CNAME;
+ else
+ *result = DNSSEC_NSEC_NODATA;
+
+ if (authenticated)
+ *authenticated = a;
+ if (ttl)
+ *ttl = enclosure_rr->ttl;
+
+ return 0;
+ }
+
+ /* Ensure this is not a DNAME domain, see RFC5155, section 8.3. */
+ if (bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_DNAME))
+ return -EBADMSG;
+
+ /* Ensure that this data is from the delegated domain
+ * (i.e. originates from the "lower" DNS server), and isn't
+ * just glue records (i.e. doesn't originate from the "upper"
+ * DNS server). */
+ if (bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_NS) &&
+ !bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_SOA))
+ return -EBADMSG;
+
+ /* Prove that there is no next closer and whether or not there is a wildcard domain. */
+
+ wildcard = strjoina("*.", p);
+ r = nsec3_hashed_domain_make(enclosure_rr, wildcard, zone, &wildcard_domain);
+ if (r < 0)
+ return r;
+ if (r != hashed_size)
+ return -EBADMSG;
+
+ r = nsec3_hashed_domain_make(enclosure_rr, pp, zone, &next_closer_domain);
+ if (r < 0)
+ return r;
+ if (r != hashed_size)
+ return -EBADMSG;
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, answer) {
+ _cleanup_free_ char *next_hashed_domain = NULL;
+
+ r = nsec3_is_good(rr, zone_rr);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = nsec3_hashed_domain_format(rr->nsec3.next_hashed_name, rr->nsec3.next_hashed_name_size, zone, &next_hashed_domain);
+ if (r < 0)
+ return r;
+
+ r = dns_name_between(dns_resource_key_name(rr->key), next_closer_domain, next_hashed_domain);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (rr->nsec3.flags & 1)
+ optout = true;
+
+ a = a && (flags & DNS_ANSWER_AUTHENTICATED);
+
+ no_closer = true;
+ }
+
+ r = dns_name_equal(dns_resource_key_name(rr->key), wildcard_domain);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ a = a && (flags & DNS_ANSWER_AUTHENTICATED);
+
+ wildcard_rr = rr;
+ }
+
+ r = dns_name_between(dns_resource_key_name(rr->key), wildcard_domain, next_hashed_domain);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (rr->nsec3.flags & 1)
+ /* This only makes sense if we have a wildcard delegation, which is
+ * very unlikely, see RFC 4592, Section 4.2, but we cannot rely on
+ * this not happening, so hence cannot simply conclude NXDOMAIN as
+ * we would wish */
+ optout = true;
+
+ a = a && (flags & DNS_ANSWER_AUTHENTICATED);
+
+ no_wildcard = true;
+ }
+ }
+
+ if (wildcard_rr && no_wildcard)
+ return -EBADMSG;
+
+ if (!no_closer) {
+ *result = DNSSEC_NSEC_NO_RR;
+ return 0;
+ }
+
+ if (wildcard_rr) {
+ /* A wildcard exists that matches our query. */
+ if (optout)
+ /* This is not specified in any RFC to the best of my knowledge, but
+ * if the next closer enclosure is covered by an opt-out NSEC3 RR
+ * it means that we cannot prove that the source of synthesis is
+ * correct, as there may be a closer match. */
+ *result = DNSSEC_NSEC_OPTOUT;
+ else if (bitmap_isset(wildcard_rr->nsec3.types, key->type))
+ *result = DNSSEC_NSEC_FOUND;
+ else if (bitmap_isset(wildcard_rr->nsec3.types, DNS_TYPE_CNAME))
+ *result = DNSSEC_NSEC_CNAME;
+ else
+ *result = DNSSEC_NSEC_NODATA;
+ } else {
+ if (optout)
+ /* The RFC only specifies that we have to care for optout for NODATA for
+ * DS records. However, children of an insecure opt-out delegation should
+ * also be considered opt-out, rather than verified NXDOMAIN.
+ * Note that we do not require a proof of wildcard non-existence if the
+ * next closer domain is covered by an opt-out, as that would not provide
+ * any additional information. */
+ *result = DNSSEC_NSEC_OPTOUT;
+ else if (no_wildcard)
+ *result = DNSSEC_NSEC_NXDOMAIN;
+ else {
+ *result = DNSSEC_NSEC_NO_RR;
+
+ return 0;
+ }
+ }
+
+ if (authenticated)
+ *authenticated = a;
+
+ if (ttl)
+ *ttl = enclosure_rr->ttl;
+
+ return 0;
+}
+
+static int dnssec_nsec_wildcard_equal(DnsResourceRecord *rr, const char *name) {
+ char label[DNS_LABEL_MAX];
+ const char *n;
+ int r;
+
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_NSEC);
+
+ /* Checks whether the specified RR has a name beginning in "*.", and if the rest is a suffix of our name */
+
+ if (rr->n_skip_labels_source != 1)
+ return 0;
+
+ n = dns_resource_key_name(rr->key);
+ r = dns_label_unescape(&n, label, sizeof label, 0);
+ if (r <= 0)
+ return r;
+ if (r != 1 || label[0] != '*')
+ return 0;
+
+ return dns_name_endswith(name, n);
+}
+
+static int dnssec_nsec_in_path(DnsResourceRecord *rr, const char *name) {
+ const char *nn, *common_suffix;
+ int r;
+
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_NSEC);
+
+ /* Checks whether the specified nsec RR indicates that name is an empty non-terminal (ENT)
+ *
+ * A couple of examples:
+ *
+ * NSEC bar → waldo.foo.bar: indicates that foo.bar exists and is an ENT
+ * NSEC waldo.foo.bar → yyy.zzz.xoo.bar: indicates that xoo.bar and zzz.xoo.bar exist and are ENTs
+ * NSEC yyy.zzz.xoo.bar → bar: indicates pretty much nothing about ENTs
+ */
+
+ /* First, determine parent of next domain. */
+ nn = rr->nsec.next_domain_name;
+ r = dns_name_parent(&nn);
+ if (r <= 0)
+ return r;
+
+ /* If the name we just determined is not equal or child of the name we are interested in, then we can't say
+ * anything at all. */
+ r = dns_name_endswith(nn, name);
+ if (r <= 0)
+ return r;
+
+ /* If the name we are interested in is not a prefix of the common suffix of the NSEC RR's owner and next domain names, then we can't say anything either. */
+ r = dns_name_common_suffix(dns_resource_key_name(rr->key), rr->nsec.next_domain_name, &common_suffix);
+ if (r < 0)
+ return r;
+
+ return dns_name_endswith(name, common_suffix);
+}
+
+static int dnssec_nsec_from_parent_zone(DnsResourceRecord *rr, const char *name) {
+ int r;
+
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_NSEC);
+
+ /* Checks whether this NSEC originates to the parent zone or the child zone. */
+
+ r = dns_name_parent(&name);
+ if (r <= 0)
+ return r;
+
+ r = dns_name_equal(name, dns_resource_key_name(rr->key));
+ if (r <= 0)
+ return r;
+
+ /* DNAME, and NS without SOA is an indication for a delegation. */
+ if (bitmap_isset(rr->nsec.types, DNS_TYPE_DNAME))
+ return 1;
+
+ if (bitmap_isset(rr->nsec.types, DNS_TYPE_NS) && !bitmap_isset(rr->nsec.types, DNS_TYPE_SOA))
+ return 1;
+
+ return 0;
+}
+
+static int dnssec_nsec_covers(DnsResourceRecord *rr, const char *name) {
+ const char *signer;
+ int r;
+
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_NSEC);
+
+ /* Checks whether the name is covered by this NSEC RR. This means, that the name is somewhere below the NSEC's
+ * signer name, and between the NSEC's two names. */
+
+ r = dns_resource_record_signer(rr, &signer);
+ if (r < 0)
+ return r;
+
+ r = dns_name_endswith(name, signer); /* this NSEC isn't suitable the name is not in the signer's domain */
+ if (r <= 0)
+ return r;
+
+ return dns_name_between(dns_resource_key_name(rr->key), name, rr->nsec.next_domain_name);
+}
+
+static int dnssec_nsec_covers_wildcard(DnsResourceRecord *rr, const char *name) {
+ _cleanup_free_ char *wc = NULL;
+ const char *common_suffix, *signer;
+ int r;
+
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_NSEC);
+
+ /* Checks whether the "Wildcard at the Closest Encloser" is within the space covered by the specified
+ * RR. Specifically, checks whether 'name' has the common suffix of the NSEC RR's owner and next names as
+ * suffix, and whether the NSEC covers the name generated by that suffix prepended with an asterisk label.
+ *
+ * NSEC bar → waldo.foo.bar: indicates that *.bar and *.foo.bar do not exist
+ * NSEC waldo.foo.bar → yyy.zzz.xoo.bar: indicates that *.xoo.bar and *.zzz.xoo.bar do not exist (and more ...)
+ * NSEC yyy.zzz.xoo.bar → bar: indicates that a number of wildcards don#t exist either...
+ */
+
+ r = dns_resource_record_signer(rr, &signer);
+ if (r < 0)
+ return r;
+
+ r = dns_name_endswith(name, signer); /* this NSEC isn't suitable the name is not in the signer's domain */
+ if (r <= 0)
+ return r;
+
+ r = dns_name_endswith(name, dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r > 0) /* If the name we are interested in is a child of the NSEC RR, then append the asterisk to the NSEC
+ * RR's name. */
+ r = dns_name_concat("*", dns_resource_key_name(rr->key), 0, &wc);
+ else {
+ r = dns_name_common_suffix(dns_resource_key_name(rr->key), rr->nsec.next_domain_name, &common_suffix);
+ if (r < 0)
+ return r;
+
+ r = dns_name_concat("*", common_suffix, 0, &wc);
+ }
+ if (r < 0)
+ return r;
+
+ return dns_name_between(dns_resource_key_name(rr->key), wc, rr->nsec.next_domain_name);
+}
+
+int dnssec_nsec_test(DnsAnswer *answer, DnsResourceKey *key, DnssecNsecResult *result, bool *authenticated, uint32_t *ttl) {
+ bool have_nsec3 = false, covering_rr_authenticated = false, wildcard_rr_authenticated = false;
+ DnsResourceRecord *rr, *covering_rr = NULL, *wildcard_rr = NULL;
+ DnsAnswerFlags flags;
+ const char *name;
+ int r;
+
+ assert(key);
+ assert(result);
+
+ /* Look for any NSEC/NSEC3 RRs that say something about the specified key. */
+
+ name = dns_resource_key_name(key);
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, answer) {
+
+ if (rr->key->class != key->class)
+ continue;
+
+ have_nsec3 = have_nsec3 || (rr->key->type == DNS_TYPE_NSEC3);
+
+ if (rr->key->type != DNS_TYPE_NSEC)
+ continue;
+
+ /* The following checks only make sense for NSEC RRs that are not expanded from a wildcard */
+ r = dns_resource_record_is_synthetic(rr);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ /* Check if this is a direct match. If so, we have encountered a NODATA case */
+ r = dns_name_equal(dns_resource_key_name(rr->key), name);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* If it's not a direct match, maybe it's a wild card match? */
+ r = dnssec_nsec_wildcard_equal(rr, name);
+ if (r < 0)
+ return r;
+ }
+ if (r > 0) {
+ if (key->type == DNS_TYPE_DS) {
+ /* If we look for a DS RR and the server sent us the NSEC RR of the child zone
+ * we have a problem. For DS RRs we want the NSEC RR from the parent */
+ if (bitmap_isset(rr->nsec.types, DNS_TYPE_SOA))
+ continue;
+ } else {
+ /* For all RR types, ensure that if NS is set SOA is set too, so that we know
+ * we got the child's NSEC. */
+ if (bitmap_isset(rr->nsec.types, DNS_TYPE_NS) &&
+ !bitmap_isset(rr->nsec.types, DNS_TYPE_SOA))
+ continue;
+ }
+
+ if (bitmap_isset(rr->nsec.types, key->type))
+ *result = DNSSEC_NSEC_FOUND;
+ else if (bitmap_isset(rr->nsec.types, DNS_TYPE_CNAME))
+ *result = DNSSEC_NSEC_CNAME;
+ else
+ *result = DNSSEC_NSEC_NODATA;
+
+ if (authenticated)
+ *authenticated = flags & DNS_ANSWER_AUTHENTICATED;
+ if (ttl)
+ *ttl = rr->ttl;
+
+ return 0;
+ }
+
+ /* Check if the name we are looking for is an empty non-terminal within the owner or next name
+ * of the NSEC RR. */
+ r = dnssec_nsec_in_path(rr, name);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *result = DNSSEC_NSEC_NODATA;
+
+ if (authenticated)
+ *authenticated = flags & DNS_ANSWER_AUTHENTICATED;
+ if (ttl)
+ *ttl = rr->ttl;
+
+ return 0;
+ }
+
+ /* The following two "covering" checks, are not useful if the NSEC is from the parent */
+ r = dnssec_nsec_from_parent_zone(rr, name);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ /* Check if this NSEC RR proves the absence of an explicit RR under this name */
+ r = dnssec_nsec_covers(rr, name);
+ if (r < 0)
+ return r;
+ if (r > 0 && (!covering_rr || !covering_rr_authenticated)) {
+ covering_rr = rr;
+ covering_rr_authenticated = flags & DNS_ANSWER_AUTHENTICATED;
+ }
+
+ /* Check if this NSEC RR proves the absence of a wildcard RR under this name */
+ r = dnssec_nsec_covers_wildcard(rr, name);
+ if (r < 0)
+ return r;
+ if (r > 0 && (!wildcard_rr || !wildcard_rr_authenticated)) {
+ wildcard_rr = rr;
+ wildcard_rr_authenticated = flags & DNS_ANSWER_AUTHENTICATED;
+ }
+ }
+
+ if (covering_rr && wildcard_rr) {
+ /* If we could prove that neither the name itself, nor the wildcard at the closest encloser exists, we
+ * proved the NXDOMAIN case. */
+ *result = DNSSEC_NSEC_NXDOMAIN;
+
+ if (authenticated)
+ *authenticated = covering_rr_authenticated && wildcard_rr_authenticated;
+ if (ttl)
+ *ttl = MIN(covering_rr->ttl, wildcard_rr->ttl);
+
+ return 0;
+ }
+
+ /* OK, this was not sufficient. Let's see if NSEC3 can help. */
+ if (have_nsec3)
+ return dnssec_test_nsec3(answer, key, result, authenticated, ttl);
+
+ /* No approproate NSEC RR found, report this. */
+ *result = DNSSEC_NSEC_NO_RR;
+ return 0;
+}
+
+static int dnssec_nsec_test_enclosed(DnsAnswer *answer, uint16_t type, const char *name, const char *zone, bool *authenticated) {
+ DnsResourceRecord *rr;
+ DnsAnswerFlags flags;
+ int r;
+
+ assert(name);
+ assert(zone);
+
+ /* Checks whether there's an NSEC/NSEC3 that proves that the specified 'name' is non-existing in the specified
+ * 'zone'. The 'zone' must be a suffix of the 'name'. */
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, answer) {
+ bool found = false;
+
+ if (rr->key->type != type && type != DNS_TYPE_ANY)
+ continue;
+
+ switch (rr->key->type) {
+
+ case DNS_TYPE_NSEC:
+
+ /* We only care for NSEC RRs from the indicated zone */
+ r = dns_resource_record_is_signer(rr, zone);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dns_name_between(dns_resource_key_name(rr->key), name, rr->nsec.next_domain_name);
+ if (r < 0)
+ return r;
+
+ found = r > 0;
+ break;
+
+ case DNS_TYPE_NSEC3: {
+ _cleanup_free_ char *hashed_domain = NULL, *next_hashed_domain = NULL;
+
+ /* We only care for NSEC3 RRs from the indicated zone */
+ r = dns_resource_record_is_signer(rr, zone);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = nsec3_is_good(rr, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ /* Format the domain we are testing with the NSEC3 RR's hash function */
+ r = nsec3_hashed_domain_make(
+ rr,
+ name,
+ zone,
+ &hashed_domain);
+ if (r < 0)
+ return r;
+ if ((size_t) r != rr->nsec3.next_hashed_name_size)
+ break;
+
+ /* Format the NSEC3's next hashed name as proper domain name */
+ r = nsec3_hashed_domain_format(
+ rr->nsec3.next_hashed_name,
+ rr->nsec3.next_hashed_name_size,
+ zone,
+ &next_hashed_domain);
+ if (r < 0)
+ return r;
+
+ r = dns_name_between(dns_resource_key_name(rr->key), hashed_domain, next_hashed_domain);
+ if (r < 0)
+ return r;
+
+ found = r > 0;
+ break;
+ }
+
+ default:
+ continue;
+ }
+
+ if (found) {
+ if (authenticated)
+ *authenticated = flags & DNS_ANSWER_AUTHENTICATED;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int dnssec_test_positive_wildcard_nsec3(
+ DnsAnswer *answer,
+ const char *name,
+ const char *source,
+ const char *zone,
+ bool *authenticated) {
+
+ const char *next_closer = NULL;
+ int r;
+
+ /* Run a positive NSEC3 wildcard proof. Specifically:
+ *
+ * A proof that the "next closer" of the generating wildcard does not exist.
+ *
+ * Note a key difference between the NSEC3 and NSEC versions of the proof. NSEC RRs don't have to exist for
+ * empty non-transients. NSEC3 RRs however have to. This means it's sufficient to check if the next closer name
+ * exists for the NSEC3 RR and we are done.
+ *
+ * To prove that a.b.c.d.e.f is rightfully synthesized from a wildcard *.d.e.f all we have to check is that
+ * c.d.e.f does not exist. */
+
+ for (;;) {
+ next_closer = name;
+ r = dns_name_parent(&name);
+ if (r <= 0)
+ return r;
+
+ r = dns_name_equal(name, source);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ break;
+ }
+
+ return dnssec_nsec_test_enclosed(answer, DNS_TYPE_NSEC3, next_closer, zone, authenticated);
+}
+
+static int dnssec_test_positive_wildcard_nsec(
+ DnsAnswer *answer,
+ const char *name,
+ const char *source,
+ const char *zone,
+ bool *_authenticated) {
+
+ bool authenticated = true;
+ int r;
+
+ /* Run a positive NSEC wildcard proof. Specifically:
+ *
+ * A proof that there's neither a wildcard name nor a non-wildcard name that is a suffix of the name "name" and
+ * a prefix of the synthesizing source "source" in the zone "zone".
+ *
+ * See RFC 5155, Section 8.8 and RFC 4035, Section 5.3.4
+ *
+ * Note that if we want to prove that a.b.c.d.e.f is rightfully synthesized from a wildcard *.d.e.f, then we
+ * have to prove that none of the following exist:
+ *
+ * 1) a.b.c.d.e.f
+ * 2) *.b.c.d.e.f
+ * 3) b.c.d.e.f
+ * 4) *.c.d.e.f
+ * 5) c.d.e.f
+ */
+
+ for (;;) {
+ _cleanup_free_ char *wc = NULL;
+ bool a = false;
+
+ /* Check if there's an NSEC or NSEC3 RR that proves that the mame we determined is really non-existing,
+ * i.e between the owner name and the next name of an NSEC RR. */
+ r = dnssec_nsec_test_enclosed(answer, DNS_TYPE_NSEC, name, zone, &a);
+ if (r <= 0)
+ return r;
+
+ authenticated = authenticated && a;
+
+ /* Strip one label off */
+ r = dns_name_parent(&name);
+ if (r <= 0)
+ return r;
+
+ /* Did we reach the source of synthesis? */
+ r = dns_name_equal(name, source);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Successful exit */
+ *_authenticated = authenticated;
+ return 1;
+ }
+
+ /* Safety check, that the source of synthesis is still our suffix */
+ r = dns_name_endswith(name, source);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBADMSG;
+
+ /* Replace the label we stripped off with an asterisk */
+ wc = strappend("*.", name);
+ if (!wc)
+ return -ENOMEM;
+
+ /* And check if the proof holds for the asterisk name, too */
+ r = dnssec_nsec_test_enclosed(answer, DNS_TYPE_NSEC, wc, zone, &a);
+ if (r <= 0)
+ return r;
+
+ authenticated = authenticated && a;
+ /* In the next iteration we'll check the non-asterisk-prefixed version */
+ }
+}
+
+int dnssec_test_positive_wildcard(
+ DnsAnswer *answer,
+ const char *name,
+ const char *source,
+ const char *zone,
+ bool *authenticated) {
+
+ int r;
+
+ assert(name);
+ assert(source);
+ assert(zone);
+ assert(authenticated);
+
+ r = dns_answer_contains_zone_nsec3(answer, zone);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return dnssec_test_positive_wildcard_nsec3(answer, name, source, zone, authenticated);
+ else
+ return dnssec_test_positive_wildcard_nsec(answer, name, source, zone, authenticated);
+}
+
+#else
+
+int dnssec_verify_rrset(
+ DnsAnswer *a,
+ const DnsResourceKey *key,
+ DnsResourceRecord *rrsig,
+ DnsResourceRecord *dnskey,
+ usec_t realtime,
+ DnssecResult *result) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_rrsig_match_dnskey(DnsResourceRecord *rrsig, DnsResourceRecord *dnskey, bool revoked_ok) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_key_match_rrsig(const DnsResourceKey *key, DnsResourceRecord *rrsig) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_verify_rrset_search(
+ DnsAnswer *a,
+ const DnsResourceKey *key,
+ DnsAnswer *validated_dnskeys,
+ usec_t realtime,
+ DnssecResult *result,
+ DnsResourceRecord **ret_rrsig) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_has_rrsig(DnsAnswer *a, const DnsResourceKey *key) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_verify_dnskey_by_ds(DnsResourceRecord *dnskey, DnsResourceRecord *ds, bool mask_revoke) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_verify_dnskey_by_ds_search(DnsResourceRecord *dnskey, DnsAnswer *validated_ds) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_nsec3_hash(DnsResourceRecord *nsec3, const char *name, void *ret) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_nsec_test(DnsAnswer *answer, DnsResourceKey *key, DnssecNsecResult *result, bool *authenticated, uint32_t *ttl) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_test_positive_wildcard(
+ DnsAnswer *answer,
+ const char *name,
+ const char *source,
+ const char *zone,
+ bool *authenticated) {
+
+ return -EOPNOTSUPP;
+}
+
+#endif
+
+static const char* const dnssec_result_table[_DNSSEC_RESULT_MAX] = {
+ [DNSSEC_VALIDATED] = "validated",
+ [DNSSEC_VALIDATED_WILDCARD] = "validated-wildcard",
+ [DNSSEC_INVALID] = "invalid",
+ [DNSSEC_SIGNATURE_EXPIRED] = "signature-expired",
+ [DNSSEC_UNSUPPORTED_ALGORITHM] = "unsupported-algorithm",
+ [DNSSEC_NO_SIGNATURE] = "no-signature",
+ [DNSSEC_MISSING_KEY] = "missing-key",
+ [DNSSEC_UNSIGNED] = "unsigned",
+ [DNSSEC_FAILED_AUXILIARY] = "failed-auxiliary",
+ [DNSSEC_NSEC_MISMATCH] = "nsec-mismatch",
+ [DNSSEC_INCOMPATIBLE_SERVER] = "incompatible-server",
+};
+DEFINE_STRING_TABLE_LOOKUP(dnssec_result, DnssecResult);
+
+static const char* const dnssec_verdict_table[_DNSSEC_VERDICT_MAX] = {
+ [DNSSEC_SECURE] = "secure",
+ [DNSSEC_INSECURE] = "insecure",
+ [DNSSEC_BOGUS] = "bogus",
+ [DNSSEC_INDETERMINATE] = "indeterminate",
+};
+DEFINE_STRING_TABLE_LOOKUP(dnssec_verdict, DnssecVerdict);
diff --git a/src/resolve/resolved-dns-dnssec.h b/src/resolve/resolved-dns-dnssec.h
new file mode 100644
index 0000000..dfee723
--- /dev/null
+++ b/src/resolve/resolved-dns-dnssec.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef enum DnssecResult DnssecResult;
+typedef enum DnssecVerdict DnssecVerdict;
+
+#include "dns-domain.h"
+#include "resolved-dns-answer.h"
+#include "resolved-dns-rr.h"
+
+enum DnssecResult {
+ /* These five are returned by dnssec_verify_rrset() */
+ DNSSEC_VALIDATED,
+ DNSSEC_VALIDATED_WILDCARD, /* Validated via a wildcard RRSIG, further NSEC/NSEC3 checks necessary */
+ DNSSEC_INVALID,
+ DNSSEC_SIGNATURE_EXPIRED,
+ DNSSEC_UNSUPPORTED_ALGORITHM,
+
+ /* These two are added by dnssec_verify_rrset_search() */
+ DNSSEC_NO_SIGNATURE,
+ DNSSEC_MISSING_KEY,
+
+ /* These two are added by the DnsTransaction logic */
+ DNSSEC_UNSIGNED,
+ DNSSEC_FAILED_AUXILIARY,
+ DNSSEC_NSEC_MISMATCH,
+ DNSSEC_INCOMPATIBLE_SERVER,
+
+ _DNSSEC_RESULT_MAX,
+ _DNSSEC_RESULT_INVALID = -1
+};
+
+enum DnssecVerdict {
+ DNSSEC_SECURE,
+ DNSSEC_INSECURE,
+ DNSSEC_BOGUS,
+ DNSSEC_INDETERMINATE,
+
+ _DNSSEC_VERDICT_MAX,
+ _DNSSEC_VERDICT_INVALID = -1
+};
+
+#define DNSSEC_CANONICAL_HOSTNAME_MAX (DNS_HOSTNAME_MAX + 2)
+
+/* The longest digest we'll ever generate, of all digest algorithms we support */
+#define DNSSEC_HASH_SIZE_MAX (MAX(20, 32))
+
+int dnssec_rrsig_match_dnskey(DnsResourceRecord *rrsig, DnsResourceRecord *dnskey, bool revoked_ok);
+int dnssec_key_match_rrsig(const DnsResourceKey *key, DnsResourceRecord *rrsig);
+
+int dnssec_verify_rrset(DnsAnswer *answer, const DnsResourceKey *key, DnsResourceRecord *rrsig, DnsResourceRecord *dnskey, usec_t realtime, DnssecResult *result);
+int dnssec_verify_rrset_search(DnsAnswer *answer, const DnsResourceKey *key, DnsAnswer *validated_dnskeys, usec_t realtime, DnssecResult *result, DnsResourceRecord **rrsig);
+
+int dnssec_verify_dnskey_by_ds(DnsResourceRecord *dnskey, DnsResourceRecord *ds, bool mask_revoke);
+int dnssec_verify_dnskey_by_ds_search(DnsResourceRecord *dnskey, DnsAnswer *validated_ds);
+
+int dnssec_has_rrsig(DnsAnswer *a, const DnsResourceKey *key);
+
+uint16_t dnssec_keytag(DnsResourceRecord *dnskey, bool mask_revoke);
+
+int dnssec_canonicalize(const char *n, char *buffer, size_t buffer_max);
+
+int dnssec_nsec3_hash(DnsResourceRecord *nsec3, const char *name, void *ret);
+
+typedef enum DnssecNsecResult {
+ DNSSEC_NSEC_NO_RR, /* No suitable NSEC/NSEC3 RR found */
+ DNSSEC_NSEC_CNAME, /* Didn't find what was asked for, but did find CNAME */
+ DNSSEC_NSEC_UNSUPPORTED_ALGORITHM,
+ DNSSEC_NSEC_NXDOMAIN,
+ DNSSEC_NSEC_NODATA,
+ DNSSEC_NSEC_FOUND,
+ DNSSEC_NSEC_OPTOUT,
+} DnssecNsecResult;
+
+int dnssec_nsec_test(DnsAnswer *answer, DnsResourceKey *key, DnssecNsecResult *result, bool *authenticated, uint32_t *ttl);
+
+int dnssec_test_positive_wildcard(DnsAnswer *a, const char *name, const char *source, const char *zone, bool *authenticated);
+
+const char* dnssec_result_to_string(DnssecResult m) _const_;
+DnssecResult dnssec_result_from_string(const char *s) _pure_;
+
+const char* dnssec_verdict_to_string(DnssecVerdict m) _const_;
+DnssecVerdict dnssec_verdict_from_string(const char *s) _pure_;
diff --git a/src/resolve/resolved-dns-packet.c b/src/resolve/resolved-dns-packet.c
new file mode 100644
index 0000000..572271b
--- /dev/null
+++ b/src/resolve/resolved-dns-packet.c
@@ -0,0 +1,2380 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_GCRYPT
+#include <gcrypt.h>
+#endif
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "resolved-dns-packet.h"
+#include "string-table.h"
+#include "strv.h"
+#include "unaligned.h"
+#include "utf8.h"
+#include "util.h"
+
+#define EDNS0_OPT_DO (1<<15)
+
+assert_cc(DNS_PACKET_SIZE_START > DNS_PACKET_HEADER_SIZE)
+
+typedef struct DnsPacketRewinder {
+ DnsPacket *packet;
+ size_t saved_rindex;
+} DnsPacketRewinder;
+
+static void rewind_dns_packet(DnsPacketRewinder *rewinder) {
+ if (rewinder->packet)
+ dns_packet_rewind(rewinder->packet, rewinder->saved_rindex);
+}
+
+#define INIT_REWINDER(rewinder, p) do { rewinder.packet = p; rewinder.saved_rindex = p->rindex; } while (0)
+#define CANCEL_REWINDER(rewinder) do { rewinder.packet = NULL; } while (0)
+
+int dns_packet_new(
+ DnsPacket **ret,
+ DnsProtocol protocol,
+ size_t min_alloc_dsize,
+ size_t max_size) {
+
+ DnsPacket *p;
+ size_t a;
+
+ assert(ret);
+ assert(max_size >= DNS_PACKET_HEADER_SIZE);
+
+ if (max_size > DNS_PACKET_SIZE_MAX)
+ max_size = DNS_PACKET_SIZE_MAX;
+
+ /* The caller may not check what is going to be truly allocated, so do not allow to
+ * allocate a DNS packet bigger than DNS_PACKET_SIZE_MAX.
+ */
+ if (min_alloc_dsize > DNS_PACKET_SIZE_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EFBIG),
+ "Requested packet data size too big: %zu",
+ min_alloc_dsize);
+
+ /* When dns_packet_new() is called with min_alloc_dsize == 0, allocate more than the
+ * absolute minimum (which is the dns packet header size), to avoid
+ * resizing immediately again after appending the first data to the packet.
+ */
+ if (min_alloc_dsize < DNS_PACKET_HEADER_SIZE)
+ a = DNS_PACKET_SIZE_START;
+ else
+ a = min_alloc_dsize;
+
+ /* round up to next page size */
+ a = PAGE_ALIGN(ALIGN(sizeof(DnsPacket)) + a) - ALIGN(sizeof(DnsPacket));
+
+ /* make sure we never allocate more than useful */
+ if (a > max_size)
+ a = max_size;
+
+ p = malloc0(ALIGN(sizeof(DnsPacket)) + a);
+ if (!p)
+ return -ENOMEM;
+
+ p->size = p->rindex = DNS_PACKET_HEADER_SIZE;
+ p->allocated = a;
+ p->max_size = max_size;
+ p->protocol = protocol;
+ p->opt_start = p->opt_size = (size_t) -1;
+ p->n_ref = 1;
+
+ *ret = p;
+
+ return 0;
+}
+
+void dns_packet_set_flags(DnsPacket *p, bool dnssec_checking_disabled, bool truncated) {
+
+ DnsPacketHeader *h;
+
+ assert(p);
+
+ h = DNS_PACKET_HEADER(p);
+
+ switch(p->protocol) {
+ case DNS_PROTOCOL_LLMNR:
+ assert(!truncated);
+
+ h->flags = htobe16(DNS_PACKET_MAKE_FLAGS(0 /* qr */,
+ 0 /* opcode */,
+ 0 /* c */,
+ 0 /* tc */,
+ 0 /* t */,
+ 0 /* ra */,
+ 0 /* ad */,
+ 0 /* cd */,
+ 0 /* rcode */));
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ h->flags = htobe16(DNS_PACKET_MAKE_FLAGS(0 /* qr */,
+ 0 /* opcode */,
+ 0 /* aa */,
+ truncated /* tc */,
+ 0 /* rd (ask for recursion) */,
+ 0 /* ra */,
+ 0 /* ad */,
+ 0 /* cd */,
+ 0 /* rcode */));
+ break;
+
+ default:
+ assert(!truncated);
+
+ h->flags = htobe16(DNS_PACKET_MAKE_FLAGS(0 /* qr */,
+ 0 /* opcode */,
+ 0 /* aa */,
+ 0 /* tc */,
+ 1 /* rd (ask for recursion) */,
+ 0 /* ra */,
+ 0 /* ad */,
+ dnssec_checking_disabled /* cd */,
+ 0 /* rcode */));
+ }
+}
+
+int dns_packet_new_query(DnsPacket **ret, DnsProtocol protocol, size_t min_alloc_dsize, bool dnssec_checking_disabled) {
+ DnsPacket *p;
+ int r;
+
+ assert(ret);
+
+ r = dns_packet_new(&p, protocol, min_alloc_dsize, DNS_PACKET_SIZE_MAX);
+ if (r < 0)
+ return r;
+
+ /* Always set the TC bit to 0 initially.
+ * If there are multiple packets later, we'll update the bit shortly before sending.
+ */
+ dns_packet_set_flags(p, dnssec_checking_disabled, false);
+
+ *ret = p;
+ return 0;
+}
+
+DnsPacket *dns_packet_ref(DnsPacket *p) {
+
+ if (!p)
+ return NULL;
+
+ assert(!p->on_stack);
+
+ assert(p->n_ref > 0);
+ p->n_ref++;
+ return p;
+}
+
+static void dns_packet_free(DnsPacket *p) {
+ char *s;
+
+ assert(p);
+
+ dns_question_unref(p->question);
+ dns_answer_unref(p->answer);
+ dns_resource_record_unref(p->opt);
+
+ while ((s = hashmap_steal_first_key(p->names)))
+ free(s);
+ hashmap_free(p->names);
+
+ free(p->_data);
+
+ if (!p->on_stack)
+ free(p);
+}
+
+DnsPacket *dns_packet_unref(DnsPacket *p) {
+ if (!p)
+ return NULL;
+
+ assert(p->n_ref > 0);
+
+ dns_packet_unref(p->more);
+
+ if (p->n_ref == 1)
+ dns_packet_free(p);
+ else
+ p->n_ref--;
+
+ return NULL;
+}
+
+int dns_packet_validate(DnsPacket *p) {
+ assert(p);
+
+ if (p->size < DNS_PACKET_HEADER_SIZE)
+ return -EBADMSG;
+
+ if (p->size > DNS_PACKET_SIZE_MAX)
+ return -EBADMSG;
+
+ return 1;
+}
+
+int dns_packet_validate_reply(DnsPacket *p) {
+ int r;
+
+ assert(p);
+
+ r = dns_packet_validate(p);
+ if (r < 0)
+ return r;
+
+ if (DNS_PACKET_QR(p) != 1)
+ return 0;
+
+ if (DNS_PACKET_OPCODE(p) != 0)
+ return -EBADMSG;
+
+ switch (p->protocol) {
+
+ case DNS_PROTOCOL_LLMNR:
+ /* RFC 4795, Section 2.1.1. says to discard all replies with QDCOUNT != 1 */
+ if (DNS_PACKET_QDCOUNT(p) != 1)
+ return -EBADMSG;
+
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ /* RFC 6762, Section 18 */
+ if (DNS_PACKET_RCODE(p) != 0)
+ return -EBADMSG;
+
+ break;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+int dns_packet_validate_query(DnsPacket *p) {
+ int r;
+
+ assert(p);
+
+ r = dns_packet_validate(p);
+ if (r < 0)
+ return r;
+
+ if (DNS_PACKET_QR(p) != 0)
+ return 0;
+
+ if (DNS_PACKET_OPCODE(p) != 0)
+ return -EBADMSG;
+
+ if (DNS_PACKET_TC(p))
+ return -EBADMSG;
+
+ switch (p->protocol) {
+
+ case DNS_PROTOCOL_LLMNR:
+ case DNS_PROTOCOL_DNS:
+ /* RFC 4795, Section 2.1.1. says to discard all queries with QDCOUNT != 1 */
+ if (DNS_PACKET_QDCOUNT(p) != 1)
+ return -EBADMSG;
+
+ /* RFC 4795, Section 2.1.1. says to discard all queries with ANCOUNT != 0 */
+ if (DNS_PACKET_ANCOUNT(p) > 0)
+ return -EBADMSG;
+
+ /* RFC 4795, Section 2.1.1. says to discard all queries with NSCOUNT != 0 */
+ if (DNS_PACKET_NSCOUNT(p) > 0)
+ return -EBADMSG;
+
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ /* RFC 6762, Section 18 */
+ if (DNS_PACKET_AA(p) != 0 ||
+ DNS_PACKET_RD(p) != 0 ||
+ DNS_PACKET_RA(p) != 0 ||
+ DNS_PACKET_AD(p) != 0 ||
+ DNS_PACKET_CD(p) != 0 ||
+ DNS_PACKET_RCODE(p) != 0)
+ return -EBADMSG;
+
+ break;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int dns_packet_extend(DnsPacket *p, size_t add, void **ret, size_t *start) {
+ assert(p);
+
+ if (p->size + add > p->allocated) {
+ size_t a, ms;
+
+ a = PAGE_ALIGN((p->size + add) * 2);
+
+ ms = dns_packet_size_max(p);
+ if (a > ms)
+ a = ms;
+
+ if (p->size + add > a)
+ return -EMSGSIZE;
+
+ if (p->_data) {
+ void *d;
+
+ d = realloc(p->_data, a);
+ if (!d)
+ return -ENOMEM;
+
+ p->_data = d;
+ } else {
+ p->_data = malloc(a);
+ if (!p->_data)
+ return -ENOMEM;
+
+ memcpy(p->_data, (uint8_t*) p + ALIGN(sizeof(DnsPacket)), p->size);
+ memzero((uint8_t*) p->_data + p->size, a - p->size);
+ }
+
+ p->allocated = a;
+ }
+
+ if (start)
+ *start = p->size;
+
+ if (ret)
+ *ret = (uint8_t*) DNS_PACKET_DATA(p) + p->size;
+
+ p->size += add;
+ return 0;
+}
+
+void dns_packet_truncate(DnsPacket *p, size_t sz) {
+ Iterator i;
+ char *s;
+ void *n;
+
+ assert(p);
+
+ if (p->size <= sz)
+ return;
+
+ HASHMAP_FOREACH_KEY(n, s, p->names, i) {
+
+ if (PTR_TO_SIZE(n) < sz)
+ continue;
+
+ hashmap_remove(p->names, s);
+ free(s);
+ }
+
+ p->size = sz;
+}
+
+int dns_packet_append_blob(DnsPacket *p, const void *d, size_t l, size_t *start) {
+ void *q;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_extend(p, l, &q, start);
+ if (r < 0)
+ return r;
+
+ memcpy_safe(q, d, l);
+ return 0;
+}
+
+int dns_packet_append_uint8(DnsPacket *p, uint8_t v, size_t *start) {
+ void *d;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_extend(p, sizeof(uint8_t), &d, start);
+ if (r < 0)
+ return r;
+
+ ((uint8_t*) d)[0] = v;
+
+ return 0;
+}
+
+int dns_packet_append_uint16(DnsPacket *p, uint16_t v, size_t *start) {
+ void *d;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_extend(p, sizeof(uint16_t), &d, start);
+ if (r < 0)
+ return r;
+
+ unaligned_write_be16(d, v);
+
+ return 0;
+}
+
+int dns_packet_append_uint32(DnsPacket *p, uint32_t v, size_t *start) {
+ void *d;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_extend(p, sizeof(uint32_t), &d, start);
+ if (r < 0)
+ return r;
+
+ unaligned_write_be32(d, v);
+
+ return 0;
+}
+
+int dns_packet_append_string(DnsPacket *p, const char *s, size_t *start) {
+ assert(p);
+ assert(s);
+
+ return dns_packet_append_raw_string(p, s, strlen(s), start);
+}
+
+int dns_packet_append_raw_string(DnsPacket *p, const void *s, size_t size, size_t *start) {
+ void *d;
+ int r;
+
+ assert(p);
+ assert(s || size == 0);
+
+ if (size > 255)
+ return -E2BIG;
+
+ r = dns_packet_extend(p, 1 + size, &d, start);
+ if (r < 0)
+ return r;
+
+ ((uint8_t*) d)[0] = (uint8_t) size;
+
+ memcpy_safe(((uint8_t*) d) + 1, s, size);
+
+ return 0;
+}
+
+int dns_packet_append_label(DnsPacket *p, const char *d, size_t l, bool canonical_candidate, size_t *start) {
+ uint8_t *w;
+ int r;
+
+ /* Append a label to a packet. Optionally, does this in DNSSEC
+ * canonical form, if this label is marked as a candidate for
+ * it, and the canonical form logic is enabled for the
+ * packet */
+
+ assert(p);
+ assert(d);
+
+ if (l > DNS_LABEL_MAX)
+ return -E2BIG;
+
+ r = dns_packet_extend(p, 1 + l, (void**) &w, start);
+ if (r < 0)
+ return r;
+
+ *(w++) = (uint8_t) l;
+
+ if (p->canonical_form && canonical_candidate) {
+ size_t i;
+
+ /* Generate in canonical form, as defined by DNSSEC
+ * RFC 4034, Section 6.2, i.e. all lower-case. */
+
+ for (i = 0; i < l; i++)
+ w[i] = (uint8_t) ascii_tolower(d[i]);
+ } else
+ /* Otherwise, just copy the string unaltered. This is
+ * essential for DNS-SD, where the casing of labels
+ * matters and needs to be retained. */
+ memcpy(w, d, l);
+
+ return 0;
+}
+
+int dns_packet_append_name(
+ DnsPacket *p,
+ const char *name,
+ bool allow_compression,
+ bool canonical_candidate,
+ size_t *start) {
+
+ size_t saved_size;
+ int r;
+
+ assert(p);
+ assert(name);
+
+ if (p->refuse_compression)
+ allow_compression = false;
+
+ saved_size = p->size;
+
+ while (!dns_name_is_root(name)) {
+ const char *z = name;
+ char label[DNS_LABEL_MAX];
+ size_t n = 0;
+
+ if (allow_compression)
+ n = PTR_TO_SIZE(hashmap_get(p->names, name));
+ if (n > 0) {
+ assert(n < p->size);
+
+ if (n < 0x4000) {
+ r = dns_packet_append_uint16(p, 0xC000 | n, NULL);
+ if (r < 0)
+ goto fail;
+
+ goto done;
+ }
+ }
+
+ r = dns_label_unescape(&name, label, sizeof label, 0);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_label(p, label, r, canonical_candidate, &n);
+ if (r < 0)
+ goto fail;
+
+ if (allow_compression) {
+ _cleanup_free_ char *s = NULL;
+
+ s = strdup(z);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ r = hashmap_ensure_allocated(&p->names, &dns_name_hash_ops);
+ if (r < 0)
+ goto fail;
+
+ r = hashmap_put(p->names, s, SIZE_TO_PTR(n));
+ if (r < 0)
+ goto fail;
+
+ s = NULL;
+ }
+ }
+
+ r = dns_packet_append_uint8(p, 0, NULL);
+ if (r < 0)
+ return r;
+
+done:
+ if (start)
+ *start = saved_size;
+
+ return 0;
+
+fail:
+ dns_packet_truncate(p, saved_size);
+ return r;
+}
+
+int dns_packet_append_key(DnsPacket *p, const DnsResourceKey *k, const DnsAnswerFlags flags, size_t *start) {
+ size_t saved_size;
+ uint16_t class;
+ int r;
+
+ assert(p);
+ assert(k);
+
+ saved_size = p->size;
+
+ r = dns_packet_append_name(p, dns_resource_key_name(k), true, true, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint16(p, k->type, NULL);
+ if (r < 0)
+ goto fail;
+
+ class = flags & DNS_ANSWER_CACHE_FLUSH ? k->class | MDNS_RR_CACHE_FLUSH : k->class;
+ r = dns_packet_append_uint16(p, class, NULL);
+ if (r < 0)
+ goto fail;
+
+ if (start)
+ *start = saved_size;
+
+ return 0;
+
+fail:
+ dns_packet_truncate(p, saved_size);
+ return r;
+}
+
+static int dns_packet_append_type_window(DnsPacket *p, uint8_t window, uint8_t length, const uint8_t *types, size_t *start) {
+ size_t saved_size;
+ int r;
+
+ assert(p);
+ assert(types);
+ assert(length > 0);
+
+ saved_size = p->size;
+
+ r = dns_packet_append_uint8(p, window, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, length, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, types, length, NULL);
+ if (r < 0)
+ goto fail;
+
+ if (start)
+ *start = saved_size;
+
+ return 0;
+fail:
+ dns_packet_truncate(p, saved_size);
+ return r;
+}
+
+static int dns_packet_append_types(DnsPacket *p, Bitmap *types, size_t *start) {
+ Iterator i;
+ uint8_t window = 0;
+ uint8_t entry = 0;
+ uint8_t bitmaps[32] = {};
+ unsigned n;
+ size_t saved_size;
+ int r;
+
+ assert(p);
+
+ saved_size = p->size;
+
+ BITMAP_FOREACH(n, types, i) {
+ assert(n <= 0xffff);
+
+ if ((n >> 8) != window && bitmaps[entry / 8] != 0) {
+ r = dns_packet_append_type_window(p, window, entry / 8 + 1, bitmaps, NULL);
+ if (r < 0)
+ goto fail;
+
+ zero(bitmaps);
+ }
+
+ window = n >> 8;
+ entry = n & 255;
+
+ bitmaps[entry / 8] |= 1 << (7 - (entry % 8));
+ }
+
+ if (bitmaps[entry / 8] != 0) {
+ r = dns_packet_append_type_window(p, window, entry / 8 + 1, bitmaps, NULL);
+ if (r < 0)
+ goto fail;
+ }
+
+ if (start)
+ *start = saved_size;
+
+ return 0;
+fail:
+ dns_packet_truncate(p, saved_size);
+ return r;
+}
+
+/* Append the OPT pseudo-RR described in RFC6891 */
+int dns_packet_append_opt(DnsPacket *p, uint16_t max_udp_size, bool edns0_do, int rcode, size_t *start) {
+ size_t saved_size;
+ int r;
+
+ assert(p);
+ /* we must never advertise supported packet size smaller than the legacy max */
+ assert(max_udp_size >= DNS_PACKET_UNICAST_SIZE_MAX);
+ assert(rcode >= 0);
+ assert(rcode <= _DNS_RCODE_MAX);
+
+ if (p->opt_start != (size_t) -1)
+ return -EBUSY;
+
+ assert(p->opt_size == (size_t) -1);
+
+ saved_size = p->size;
+
+ /* empty name */
+ r = dns_packet_append_uint8(p, 0, NULL);
+ if (r < 0)
+ return r;
+
+ /* type */
+ r = dns_packet_append_uint16(p, DNS_TYPE_OPT, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* class: maximum udp packet that can be received */
+ r = dns_packet_append_uint16(p, max_udp_size, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* extended RCODE and VERSION */
+ r = dns_packet_append_uint16(p, ((uint16_t) rcode & 0x0FF0) << 4, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* flags: DNSSEC OK (DO), see RFC3225 */
+ r = dns_packet_append_uint16(p, edns0_do ? EDNS0_OPT_DO : 0, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* RDLENGTH */
+ if (edns0_do && !DNS_PACKET_QR(p)) {
+ /* If DO is on and this is not a reply, also append RFC6975 Algorithm data */
+
+ static const uint8_t rfc6975[] = {
+
+ 0, 5, /* OPTION_CODE: DAU */
+#if HAVE_GCRYPT && GCRYPT_VERSION_NUMBER >= 0x010600
+ 0, 7, /* LIST_LENGTH */
+#else
+ 0, 6, /* LIST_LENGTH */
+#endif
+ DNSSEC_ALGORITHM_RSASHA1,
+ DNSSEC_ALGORITHM_RSASHA1_NSEC3_SHA1,
+ DNSSEC_ALGORITHM_RSASHA256,
+ DNSSEC_ALGORITHM_RSASHA512,
+ DNSSEC_ALGORITHM_ECDSAP256SHA256,
+ DNSSEC_ALGORITHM_ECDSAP384SHA384,
+#if HAVE_GCRYPT && GCRYPT_VERSION_NUMBER >= 0x010600
+ DNSSEC_ALGORITHM_ED25519,
+#endif
+
+ 0, 6, /* OPTION_CODE: DHU */
+ 0, 3, /* LIST_LENGTH */
+ DNSSEC_DIGEST_SHA1,
+ DNSSEC_DIGEST_SHA256,
+ DNSSEC_DIGEST_SHA384,
+
+ 0, 7, /* OPTION_CODE: N3U */
+ 0, 1, /* LIST_LENGTH */
+ NSEC3_ALGORITHM_SHA1,
+ };
+
+ r = dns_packet_append_uint16(p, sizeof(rfc6975), NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rfc6975, sizeof(rfc6975), NULL);
+ } else
+ r = dns_packet_append_uint16(p, 0, NULL);
+ if (r < 0)
+ goto fail;
+
+ DNS_PACKET_HEADER(p)->arcount = htobe16(DNS_PACKET_ARCOUNT(p) + 1);
+
+ p->opt_start = saved_size;
+ p->opt_size = p->size - saved_size;
+
+ if (start)
+ *start = saved_size;
+
+ return 0;
+
+fail:
+ dns_packet_truncate(p, saved_size);
+ return r;
+}
+
+int dns_packet_truncate_opt(DnsPacket *p) {
+ assert(p);
+
+ if (p->opt_start == (size_t) -1) {
+ assert(p->opt_size == (size_t) -1);
+ return 0;
+ }
+
+ assert(p->opt_size != (size_t) -1);
+ assert(DNS_PACKET_ARCOUNT(p) > 0);
+
+ if (p->opt_start + p->opt_size != p->size)
+ return -EBUSY;
+
+ dns_packet_truncate(p, p->opt_start);
+ DNS_PACKET_HEADER(p)->arcount = htobe16(DNS_PACKET_ARCOUNT(p) - 1);
+ p->opt_start = p->opt_size = (size_t) -1;
+
+ return 1;
+}
+
+int dns_packet_append_rr(DnsPacket *p, const DnsResourceRecord *rr, const DnsAnswerFlags flags, size_t *start, size_t *rdata_start) {
+
+ size_t saved_size, rdlength_offset, end, rdlength, rds;
+ uint32_t ttl;
+ int r;
+
+ assert(p);
+ assert(rr);
+
+ saved_size = p->size;
+
+ r = dns_packet_append_key(p, rr->key, flags, NULL);
+ if (r < 0)
+ goto fail;
+
+ ttl = flags & DNS_ANSWER_GOODBYE ? 0 : rr->ttl;
+ r = dns_packet_append_uint32(p, ttl, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* Initially we write 0 here */
+ r = dns_packet_append_uint16(p, 0, &rdlength_offset);
+ if (r < 0)
+ goto fail;
+
+ rds = p->size - saved_size;
+
+ switch (rr->unparseable ? _DNS_TYPE_INVALID : rr->key->type) {
+
+ case DNS_TYPE_SRV:
+ r = dns_packet_append_uint16(p, rr->srv.priority, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint16(p, rr->srv.weight, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint16(p, rr->srv.port, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* RFC 2782 states "Unless and until permitted by future standards
+ * action, name compression is not to be used for this field." */
+ r = dns_packet_append_name(p, rr->srv.name, false, false, NULL);
+ break;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ r = dns_packet_append_name(p, rr->ptr.name, true, false, NULL);
+ break;
+
+ case DNS_TYPE_HINFO:
+ r = dns_packet_append_string(p, rr->hinfo.cpu, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_string(p, rr->hinfo.os, NULL);
+ break;
+
+ case DNS_TYPE_SPF: /* exactly the same as TXT */
+ case DNS_TYPE_TXT:
+
+ if (!rr->txt.items) {
+ /* RFC 6763, section 6.1 suggests to generate
+ * single empty string for an empty array. */
+
+ r = dns_packet_append_raw_string(p, NULL, 0, NULL);
+ if (r < 0)
+ goto fail;
+ } else {
+ DnsTxtItem *i;
+
+ LIST_FOREACH(items, i, rr->txt.items) {
+ r = dns_packet_append_raw_string(p, i->data, i->length, NULL);
+ if (r < 0)
+ goto fail;
+ }
+ }
+
+ r = 0;
+ break;
+
+ case DNS_TYPE_A:
+ r = dns_packet_append_blob(p, &rr->a.in_addr, sizeof(struct in_addr), NULL);
+ break;
+
+ case DNS_TYPE_AAAA:
+ r = dns_packet_append_blob(p, &rr->aaaa.in6_addr, sizeof(struct in6_addr), NULL);
+ break;
+
+ case DNS_TYPE_SOA:
+ r = dns_packet_append_name(p, rr->soa.mname, true, false, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_name(p, rr->soa.rname, true, false, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->soa.serial, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->soa.refresh, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->soa.retry, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->soa.expire, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->soa.minimum, NULL);
+ break;
+
+ case DNS_TYPE_MX:
+ r = dns_packet_append_uint16(p, rr->mx.priority, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_name(p, rr->mx.exchange, true, false, NULL);
+ break;
+
+ case DNS_TYPE_LOC:
+ r = dns_packet_append_uint8(p, rr->loc.version, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->loc.size, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->loc.horiz_pre, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->loc.vert_pre, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->loc.latitude, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->loc.longitude, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->loc.altitude, NULL);
+ break;
+
+ case DNS_TYPE_DS:
+ r = dns_packet_append_uint16(p, rr->ds.key_tag, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->ds.algorithm, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->ds.digest_type, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->ds.digest, rr->ds.digest_size, NULL);
+ break;
+
+ case DNS_TYPE_SSHFP:
+ r = dns_packet_append_uint8(p, rr->sshfp.algorithm, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->sshfp.fptype, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->sshfp.fingerprint, rr->sshfp.fingerprint_size, NULL);
+ break;
+
+ case DNS_TYPE_DNSKEY:
+ r = dns_packet_append_uint16(p, rr->dnskey.flags, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->dnskey.protocol, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->dnskey.algorithm, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->dnskey.key, rr->dnskey.key_size, NULL);
+ break;
+
+ case DNS_TYPE_RRSIG:
+ r = dns_packet_append_uint16(p, rr->rrsig.type_covered, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->rrsig.algorithm, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->rrsig.labels, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->rrsig.original_ttl, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->rrsig.expiration, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->rrsig.inception, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint16(p, rr->rrsig.key_tag, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_name(p, rr->rrsig.signer, false, true, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->rrsig.signature, rr->rrsig.signature_size, NULL);
+ break;
+
+ case DNS_TYPE_NSEC:
+ r = dns_packet_append_name(p, rr->nsec.next_domain_name, false, false, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_types(p, rr->nsec.types, NULL);
+ if (r < 0)
+ goto fail;
+
+ break;
+
+ case DNS_TYPE_NSEC3:
+ r = dns_packet_append_uint8(p, rr->nsec3.algorithm, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->nsec3.flags, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint16(p, rr->nsec3.iterations, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->nsec3.salt_size, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->nsec3.salt, rr->nsec3.salt_size, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->nsec3.next_hashed_name_size, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->nsec3.next_hashed_name, rr->nsec3.next_hashed_name_size, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_types(p, rr->nsec3.types, NULL);
+ if (r < 0)
+ goto fail;
+
+ break;
+
+ case DNS_TYPE_TLSA:
+ r = dns_packet_append_uint8(p, rr->tlsa.cert_usage, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->tlsa.selector, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->tlsa.matching_type, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->tlsa.data, rr->tlsa.data_size, NULL);
+ break;
+
+ case DNS_TYPE_CAA:
+ r = dns_packet_append_uint8(p, rr->caa.flags, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_string(p, rr->caa.tag, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->caa.value, rr->caa.value_size, NULL);
+ break;
+
+ case DNS_TYPE_OPT:
+ case DNS_TYPE_OPENPGPKEY:
+ case _DNS_TYPE_INVALID: /* unparseable */
+ default:
+
+ r = dns_packet_append_blob(p, rr->generic.data, rr->generic.data_size, NULL);
+ break;
+ }
+ if (r < 0)
+ goto fail;
+
+ /* Let's calculate the actual data size and update the field */
+ rdlength = p->size - rdlength_offset - sizeof(uint16_t);
+ if (rdlength > 0xFFFF) {
+ r = -ENOSPC;
+ goto fail;
+ }
+
+ end = p->size;
+ p->size = rdlength_offset;
+ r = dns_packet_append_uint16(p, rdlength, NULL);
+ if (r < 0)
+ goto fail;
+ p->size = end;
+
+ if (start)
+ *start = saved_size;
+
+ if (rdata_start)
+ *rdata_start = rds;
+
+ return 0;
+
+fail:
+ dns_packet_truncate(p, saved_size);
+ return r;
+}
+
+int dns_packet_append_question(DnsPacket *p, DnsQuestion *q) {
+ DnsResourceKey *key;
+ int r;
+
+ assert(p);
+
+ DNS_QUESTION_FOREACH(key, q) {
+ r = dns_packet_append_key(p, key, 0, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_packet_append_answer(DnsPacket *p, DnsAnswer *a) {
+ DnsResourceRecord *rr;
+ DnsAnswerFlags flags;
+ int r;
+
+ assert(p);
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, a) {
+ r = dns_packet_append_rr(p, rr, flags, NULL, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_packet_read(DnsPacket *p, size_t sz, const void **ret, size_t *start) {
+ assert(p);
+
+ if (p->rindex + sz > p->size)
+ return -EMSGSIZE;
+
+ if (ret)
+ *ret = (uint8_t*) DNS_PACKET_DATA(p) + p->rindex;
+
+ if (start)
+ *start = p->rindex;
+
+ p->rindex += sz;
+ return 0;
+}
+
+void dns_packet_rewind(DnsPacket *p, size_t idx) {
+ assert(p);
+ assert(idx <= p->size);
+ assert(idx >= DNS_PACKET_HEADER_SIZE);
+
+ p->rindex = idx;
+}
+
+int dns_packet_read_blob(DnsPacket *p, void *d, size_t sz, size_t *start) {
+ const void *q;
+ int r;
+
+ assert(p);
+ assert(d);
+
+ r = dns_packet_read(p, sz, &q, start);
+ if (r < 0)
+ return r;
+
+ memcpy(d, q, sz);
+ return 0;
+}
+
+static int dns_packet_read_memdup(
+ DnsPacket *p, size_t size,
+ void **ret, size_t *ret_size,
+ size_t *ret_start) {
+
+ const void *src;
+ size_t start;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ r = dns_packet_read(p, size, &src, &start);
+ if (r < 0)
+ return r;
+
+ if (size <= 0)
+ *ret = NULL;
+ else {
+ void *copy;
+
+ copy = memdup(src, size);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ }
+
+ if (ret_size)
+ *ret_size = size;
+ if (ret_start)
+ *ret_start = start;
+
+ return 0;
+}
+
+int dns_packet_read_uint8(DnsPacket *p, uint8_t *ret, size_t *start) {
+ const void *d;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_read(p, sizeof(uint8_t), &d, start);
+ if (r < 0)
+ return r;
+
+ *ret = ((uint8_t*) d)[0];
+ return 0;
+}
+
+int dns_packet_read_uint16(DnsPacket *p, uint16_t *ret, size_t *start) {
+ const void *d;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_read(p, sizeof(uint16_t), &d, start);
+ if (r < 0)
+ return r;
+
+ *ret = unaligned_read_be16(d);
+
+ return 0;
+}
+
+int dns_packet_read_uint32(DnsPacket *p, uint32_t *ret, size_t *start) {
+ const void *d;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_read(p, sizeof(uint32_t), &d, start);
+ if (r < 0)
+ return r;
+
+ *ret = unaligned_read_be32(d);
+
+ return 0;
+}
+
+int dns_packet_read_string(DnsPacket *p, char **ret, size_t *start) {
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ const void *d;
+ char *t;
+ uint8_t c;
+ int r;
+
+ assert(p);
+ INIT_REWINDER(rewinder, p);
+
+ r = dns_packet_read_uint8(p, &c, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read(p, c, &d, NULL);
+ if (r < 0)
+ return r;
+
+ if (memchr(d, 0, c))
+ return -EBADMSG;
+
+ t = strndup(d, c);
+ if (!t)
+ return -ENOMEM;
+
+ if (!utf8_is_valid(t)) {
+ free(t);
+ return -EBADMSG;
+ }
+
+ *ret = t;
+
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+int dns_packet_read_raw_string(DnsPacket *p, const void **ret, size_t *size, size_t *start) {
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ uint8_t c;
+ int r;
+
+ assert(p);
+ INIT_REWINDER(rewinder, p);
+
+ r = dns_packet_read_uint8(p, &c, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read(p, c, ret, NULL);
+ if (r < 0)
+ return r;
+
+ if (size)
+ *size = c;
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+int dns_packet_read_name(
+ DnsPacket *p,
+ char **_ret,
+ bool allow_compression,
+ size_t *start) {
+
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ size_t after_rindex = 0, jump_barrier;
+ _cleanup_free_ char *ret = NULL;
+ size_t n = 0, allocated = 0;
+ bool first = true;
+ int r;
+
+ assert(p);
+ assert(_ret);
+ INIT_REWINDER(rewinder, p);
+ jump_barrier = p->rindex;
+
+ if (p->refuse_compression)
+ allow_compression = false;
+
+ for (;;) {
+ uint8_t c, d;
+
+ r = dns_packet_read_uint8(p, &c, NULL);
+ if (r < 0)
+ return r;
+
+ if (c == 0)
+ /* End of name */
+ break;
+ else if (c <= 63) {
+ const char *label;
+
+ /* Literal label */
+ r = dns_packet_read(p, c, (const void**) &label, NULL);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC(ret, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ if (first)
+ first = false;
+ else
+ ret[n++] = '.';
+
+ r = dns_label_escape(label, c, ret + n, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ n += r;
+ continue;
+ } else if (allow_compression && (c & 0xc0) == 0xc0) {
+ uint16_t ptr;
+
+ /* Pointer */
+ r = dns_packet_read_uint8(p, &d, NULL);
+ if (r < 0)
+ return r;
+
+ ptr = (uint16_t) (c & ~0xc0) << 8 | (uint16_t) d;
+ if (ptr < DNS_PACKET_HEADER_SIZE || ptr >= jump_barrier)
+ return -EBADMSG;
+
+ if (after_rindex == 0)
+ after_rindex = p->rindex;
+
+ /* Jumps are limited to a "prior occurrence" (RFC-1035 4.1.4) */
+ jump_barrier = ptr;
+ p->rindex = ptr;
+ } else
+ return -EBADMSG;
+ }
+
+ if (!GREEDY_REALLOC(ret, allocated, n + 1))
+ return -ENOMEM;
+
+ ret[n] = 0;
+
+ if (after_rindex != 0)
+ p->rindex= after_rindex;
+
+ *_ret = TAKE_PTR(ret);
+
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+static int dns_packet_read_type_window(DnsPacket *p, Bitmap **types, size_t *start) {
+ uint8_t window;
+ uint8_t length;
+ const uint8_t *bitmap;
+ uint8_t bit = 0;
+ unsigned i;
+ bool found = false;
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ int r;
+
+ assert(p);
+ assert(types);
+ INIT_REWINDER(rewinder, p);
+
+ r = bitmap_ensure_allocated(types);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &window, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &length, NULL);
+ if (r < 0)
+ return r;
+
+ if (length == 0 || length > 32)
+ return -EBADMSG;
+
+ r = dns_packet_read(p, length, (const void **)&bitmap, NULL);
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < length; i++) {
+ uint8_t bitmask = 1 << 7;
+
+ if (!bitmap[i]) {
+ found = false;
+ bit += 8;
+ continue;
+ }
+
+ found = true;
+
+ for (; bitmask; bit++, bitmask >>= 1)
+ if (bitmap[i] & bitmask) {
+ uint16_t n;
+
+ n = (uint16_t) window << 8 | (uint16_t) bit;
+
+ /* Ignore pseudo-types. see RFC4034 section 4.1.2 */
+ if (dns_type_is_pseudo(n))
+ continue;
+
+ r = bitmap_set(*types, n);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!found)
+ return -EBADMSG;
+
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+static int dns_packet_read_type_windows(DnsPacket *p, Bitmap **types, size_t size, size_t *start) {
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ int r;
+
+ INIT_REWINDER(rewinder, p);
+
+ while (p->rindex < rewinder.saved_rindex + size) {
+ r = dns_packet_read_type_window(p, types, NULL);
+ if (r < 0)
+ return r;
+
+ /* don't read past end of current RR */
+ if (p->rindex > rewinder.saved_rindex + size)
+ return -EBADMSG;
+ }
+
+ if (p->rindex != rewinder.saved_rindex + size)
+ return -EBADMSG;
+
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+int dns_packet_read_key(DnsPacket *p, DnsResourceKey **ret, bool *ret_cache_flush, size_t *start) {
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ _cleanup_free_ char *name = NULL;
+ bool cache_flush = false;
+ uint16_t class, type;
+ DnsResourceKey *key;
+ int r;
+
+ assert(p);
+ assert(ret);
+ INIT_REWINDER(rewinder, p);
+
+ r = dns_packet_read_name(p, &name, true, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint16(p, &type, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint16(p, &class, NULL);
+ if (r < 0)
+ return r;
+
+ if (p->protocol == DNS_PROTOCOL_MDNS) {
+ /* See RFC6762, Section 10.2 */
+
+ if (type != DNS_TYPE_OPT && (class & MDNS_RR_CACHE_FLUSH)) {
+ class &= ~MDNS_RR_CACHE_FLUSH;
+ cache_flush = true;
+ }
+ }
+
+ key = dns_resource_key_new_consume(class, type, name);
+ if (!key)
+ return -ENOMEM;
+
+ name = NULL;
+ *ret = key;
+
+ if (ret_cache_flush)
+ *ret_cache_flush = cache_flush;
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+static bool loc_size_ok(uint8_t size) {
+ uint8_t m = size >> 4, e = size & 0xF;
+
+ return m <= 9 && e <= 9 && (m > 0 || e == 0);
+}
+
+int dns_packet_read_rr(DnsPacket *p, DnsResourceRecord **ret, bool *ret_cache_flush, size_t *start) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ size_t offset;
+ uint16_t rdlength;
+ bool cache_flush;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ INIT_REWINDER(rewinder, p);
+
+ r = dns_packet_read_key(p, &key, &cache_flush, NULL);
+ if (r < 0)
+ return r;
+
+ if (!dns_class_is_valid_rr(key->class) || !dns_type_is_valid_rr(key->type))
+ return -EBADMSG;
+
+ rr = dns_resource_record_new(key);
+ if (!rr)
+ return -ENOMEM;
+
+ r = dns_packet_read_uint32(p, &rr->ttl, NULL);
+ if (r < 0)
+ return r;
+
+ /* RFC 2181, Section 8, suggests to
+ * treat a TTL with the MSB set as a zero TTL. */
+ if (rr->ttl & UINT32_C(0x80000000))
+ rr->ttl = 0;
+
+ r = dns_packet_read_uint16(p, &rdlength, NULL);
+ if (r < 0)
+ return r;
+
+ if (p->rindex + rdlength > p->size)
+ return -EBADMSG;
+
+ offset = p->rindex;
+
+ switch (rr->key->type) {
+
+ case DNS_TYPE_SRV:
+ r = dns_packet_read_uint16(p, &rr->srv.priority, NULL);
+ if (r < 0)
+ return r;
+ r = dns_packet_read_uint16(p, &rr->srv.weight, NULL);
+ if (r < 0)
+ return r;
+ r = dns_packet_read_uint16(p, &rr->srv.port, NULL);
+ if (r < 0)
+ return r;
+ r = dns_packet_read_name(p, &rr->srv.name, true, NULL);
+ break;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ r = dns_packet_read_name(p, &rr->ptr.name, true, NULL);
+ break;
+
+ case DNS_TYPE_HINFO:
+ r = dns_packet_read_string(p, &rr->hinfo.cpu, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_string(p, &rr->hinfo.os, NULL);
+ break;
+
+ case DNS_TYPE_SPF: /* exactly the same as TXT */
+ case DNS_TYPE_TXT:
+ if (rdlength <= 0) {
+ r = dns_txt_item_new_empty(&rr->txt.items);
+ if (r < 0)
+ return r;
+ } else {
+ DnsTxtItem *last = NULL;
+
+ while (p->rindex < offset + rdlength) {
+ DnsTxtItem *i;
+ const void *data;
+ size_t sz;
+
+ r = dns_packet_read_raw_string(p, &data, &sz, NULL);
+ if (r < 0)
+ return r;
+
+ i = malloc0(offsetof(DnsTxtItem, data) + sz + 1); /* extra NUL byte at the end */
+ if (!i)
+ return -ENOMEM;
+
+ memcpy(i->data, data, sz);
+ i->length = sz;
+
+ LIST_INSERT_AFTER(items, rr->txt.items, last, i);
+ last = i;
+ }
+ }
+
+ r = 0;
+ break;
+
+ case DNS_TYPE_A:
+ r = dns_packet_read_blob(p, &rr->a.in_addr, sizeof(struct in_addr), NULL);
+ break;
+
+ case DNS_TYPE_AAAA:
+ r = dns_packet_read_blob(p, &rr->aaaa.in6_addr, sizeof(struct in6_addr), NULL);
+ break;
+
+ case DNS_TYPE_SOA:
+ r = dns_packet_read_name(p, &rr->soa.mname, true, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_name(p, &rr->soa.rname, true, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->soa.serial, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->soa.refresh, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->soa.retry, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->soa.expire, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->soa.minimum, NULL);
+ break;
+
+ case DNS_TYPE_MX:
+ r = dns_packet_read_uint16(p, &rr->mx.priority, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_name(p, &rr->mx.exchange, true, NULL);
+ break;
+
+ case DNS_TYPE_LOC: {
+ uint8_t t;
+ size_t pos;
+
+ r = dns_packet_read_uint8(p, &t, &pos);
+ if (r < 0)
+ return r;
+
+ if (t == 0) {
+ rr->loc.version = t;
+
+ r = dns_packet_read_uint8(p, &rr->loc.size, NULL);
+ if (r < 0)
+ return r;
+
+ if (!loc_size_ok(rr->loc.size))
+ return -EBADMSG;
+
+ r = dns_packet_read_uint8(p, &rr->loc.horiz_pre, NULL);
+ if (r < 0)
+ return r;
+
+ if (!loc_size_ok(rr->loc.horiz_pre))
+ return -EBADMSG;
+
+ r = dns_packet_read_uint8(p, &rr->loc.vert_pre, NULL);
+ if (r < 0)
+ return r;
+
+ if (!loc_size_ok(rr->loc.vert_pre))
+ return -EBADMSG;
+
+ r = dns_packet_read_uint32(p, &rr->loc.latitude, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->loc.longitude, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->loc.altitude, NULL);
+ if (r < 0)
+ return r;
+
+ break;
+ } else {
+ dns_packet_rewind(p, pos);
+ rr->unparseable = true;
+ goto unparseable;
+ }
+ }
+
+ case DNS_TYPE_DS:
+ r = dns_packet_read_uint16(p, &rr->ds.key_tag, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->ds.algorithm, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->ds.digest_type, NULL);
+ if (r < 0)
+ return r;
+
+ if (rdlength < 4)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p, rdlength - 4,
+ &rr->ds.digest, &rr->ds.digest_size,
+ NULL);
+ if (r < 0)
+ return r;
+
+ if (rr->ds.digest_size <= 0)
+ /* the accepted size depends on the algorithm, but for now
+ just ensure that the value is greater than zero */
+ return -EBADMSG;
+
+ break;
+
+ case DNS_TYPE_SSHFP:
+ r = dns_packet_read_uint8(p, &rr->sshfp.algorithm, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->sshfp.fptype, NULL);
+ if (r < 0)
+ return r;
+
+ if (rdlength < 2)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p, rdlength - 2,
+ &rr->sshfp.fingerprint, &rr->sshfp.fingerprint_size,
+ NULL);
+
+ if (rr->sshfp.fingerprint_size <= 0)
+ /* the accepted size depends on the algorithm, but for now
+ just ensure that the value is greater than zero */
+ return -EBADMSG;
+
+ break;
+
+ case DNS_TYPE_DNSKEY:
+ r = dns_packet_read_uint16(p, &rr->dnskey.flags, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->dnskey.protocol, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->dnskey.algorithm, NULL);
+ if (r < 0)
+ return r;
+
+ if (rdlength < 4)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p, rdlength - 4,
+ &rr->dnskey.key, &rr->dnskey.key_size,
+ NULL);
+
+ if (rr->dnskey.key_size <= 0)
+ /* the accepted size depends on the algorithm, but for now
+ just ensure that the value is greater than zero */
+ return -EBADMSG;
+
+ break;
+
+ case DNS_TYPE_RRSIG:
+ r = dns_packet_read_uint16(p, &rr->rrsig.type_covered, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->rrsig.algorithm, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->rrsig.labels, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->rrsig.original_ttl, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->rrsig.expiration, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->rrsig.inception, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint16(p, &rr->rrsig.key_tag, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_name(p, &rr->rrsig.signer, false, NULL);
+ if (r < 0)
+ return r;
+
+ if (rdlength + offset < p->rindex)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p, offset + rdlength - p->rindex,
+ &rr->rrsig.signature, &rr->rrsig.signature_size,
+ NULL);
+
+ if (rr->rrsig.signature_size <= 0)
+ /* the accepted size depends on the algorithm, but for now
+ just ensure that the value is greater than zero */
+ return -EBADMSG;
+
+ break;
+
+ case DNS_TYPE_NSEC: {
+
+ /*
+ * RFC6762, section 18.14 explictly states mDNS should use name compression.
+ * This contradicts RFC3845, section 2.1.1
+ */
+
+ bool allow_compressed = p->protocol == DNS_PROTOCOL_MDNS;
+
+ r = dns_packet_read_name(p, &rr->nsec.next_domain_name, allow_compressed, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_type_windows(p, &rr->nsec.types, offset + rdlength - p->rindex, NULL);
+
+ /* We accept empty NSEC bitmaps. The bit indicating the presence of the NSEC record itself
+ * is redundant and in e.g., RFC4956 this fact is used to define a use for NSEC records
+ * without the NSEC bit set. */
+
+ break;
+ }
+ case DNS_TYPE_NSEC3: {
+ uint8_t size;
+
+ r = dns_packet_read_uint8(p, &rr->nsec3.algorithm, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->nsec3.flags, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint16(p, &rr->nsec3.iterations, NULL);
+ if (r < 0)
+ return r;
+
+ /* this may be zero */
+ r = dns_packet_read_uint8(p, &size, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_memdup(p, size, &rr->nsec3.salt, &rr->nsec3.salt_size, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &size, NULL);
+ if (r < 0)
+ return r;
+
+ if (size <= 0)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p, size,
+ &rr->nsec3.next_hashed_name, &rr->nsec3.next_hashed_name_size,
+ NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_type_windows(p, &rr->nsec3.types, offset + rdlength - p->rindex, NULL);
+
+ /* empty non-terminals can have NSEC3 records, so empty bitmaps are allowed */
+
+ break;
+ }
+
+ case DNS_TYPE_TLSA:
+ r = dns_packet_read_uint8(p, &rr->tlsa.cert_usage, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->tlsa.selector, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->tlsa.matching_type, NULL);
+ if (r < 0)
+ return r;
+
+ if (rdlength < 3)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p, rdlength - 3,
+ &rr->tlsa.data, &rr->tlsa.data_size,
+ NULL);
+
+ if (rr->tlsa.data_size <= 0)
+ /* the accepted size depends on the algorithm, but for now
+ just ensure that the value is greater than zero */
+ return -EBADMSG;
+
+ break;
+
+ case DNS_TYPE_CAA:
+ r = dns_packet_read_uint8(p, &rr->caa.flags, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_string(p, &rr->caa.tag, NULL);
+ if (r < 0)
+ return r;
+
+ if (rdlength + offset < p->rindex)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p,
+ rdlength + offset - p->rindex,
+ &rr->caa.value, &rr->caa.value_size, NULL);
+
+ break;
+
+ case DNS_TYPE_OPT: /* we only care about the header of OPT for now. */
+ case DNS_TYPE_OPENPGPKEY:
+ default:
+ unparseable:
+ r = dns_packet_read_memdup(p, rdlength, &rr->generic.data, &rr->generic.data_size, NULL);
+
+ break;
+ }
+ if (r < 0)
+ return r;
+ if (p->rindex != offset + rdlength)
+ return -EBADMSG;
+
+ *ret = TAKE_PTR(rr);
+
+ if (ret_cache_flush)
+ *ret_cache_flush = cache_flush;
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+static bool opt_is_good(DnsResourceRecord *rr, bool *rfc6975) {
+ const uint8_t* p;
+ bool found_dau_dhu_n3u = false;
+ size_t l;
+
+ /* Checks whether the specified OPT RR is well-formed and whether it contains RFC6975 data (which is not OK in
+ * a reply). */
+
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_OPT);
+
+ /* Check that the version is 0 */
+ if (((rr->ttl >> 16) & UINT32_C(0xFF)) != 0) {
+ *rfc6975 = false;
+ return true; /* if it's not version 0, it's OK, but we will ignore the OPT field contents */
+ }
+
+ p = rr->opt.data;
+ l = rr->opt.data_size;
+ while (l > 0) {
+ uint16_t option_code, option_length;
+
+ /* At least four bytes for OPTION-CODE and OPTION-LENGTH are required */
+ if (l < 4U)
+ return false;
+
+ option_code = unaligned_read_be16(p);
+ option_length = unaligned_read_be16(p + 2);
+
+ if (l < option_length + 4U)
+ return false;
+
+ /* RFC 6975 DAU, DHU or N3U fields found. */
+ if (IN_SET(option_code, 5, 6, 7))
+ found_dau_dhu_n3u = true;
+
+ p += option_length + 4U;
+ l -= option_length + 4U;
+ }
+
+ *rfc6975 = found_dau_dhu_n3u;
+ return true;
+}
+
+static int dns_packet_extract_question(DnsPacket *p, DnsQuestion **ret_question) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *question = NULL;
+ unsigned n, i;
+ int r;
+
+ n = DNS_PACKET_QDCOUNT(p);
+ if (n > 0) {
+ question = dns_question_new(n);
+ if (!question)
+ return -ENOMEM;
+
+ for (i = 0; i < n; i++) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ bool cache_flush;
+
+ r = dns_packet_read_key(p, &key, &cache_flush, NULL);
+ if (r < 0)
+ return r;
+
+ if (cache_flush)
+ return -EBADMSG;
+
+ if (!dns_type_is_valid_query(key->type))
+ return -EBADMSG;
+
+ r = dns_question_add(question, key);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *ret_question = TAKE_PTR(question);
+
+ return 0;
+}
+
+static int dns_packet_extract_answer(DnsPacket *p, DnsAnswer **ret_answer) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ unsigned n, i;
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *previous = NULL;
+ bool bad_opt = false;
+ int r;
+
+ n = DNS_PACKET_RRCOUNT(p);
+ if (n == 0)
+ return 0;
+
+ answer = dns_answer_new(n);
+ if (!answer)
+ return -ENOMEM;
+
+ for (i = 0; i < n; i++) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ bool cache_flush = false;
+
+ r = dns_packet_read_rr(p, &rr, &cache_flush, NULL);
+ if (r < 0)
+ return r;
+
+ /* Try to reduce memory usage a bit */
+ if (previous)
+ dns_resource_key_reduce(&rr->key, &previous->key);
+
+ if (rr->key->type == DNS_TYPE_OPT) {
+ bool has_rfc6975;
+
+ if (p->opt || bad_opt) {
+ /* Multiple OPT RRs? if so, let's ignore all, because there's
+ * something wrong with the server, and if one is valid we wouldn't
+ * know which one. */
+ log_debug("Multiple OPT RRs detected, ignoring all.");
+ bad_opt = true;
+ continue;
+ }
+
+ if (!dns_name_is_root(dns_resource_key_name(rr->key))) {
+ /* If the OPT RR is not owned by the root domain, then it is bad,
+ * let's ignore it. */
+ log_debug("OPT RR is not owned by root domain, ignoring.");
+ bad_opt = true;
+ continue;
+ }
+
+ if (i < DNS_PACKET_ANCOUNT(p) + DNS_PACKET_NSCOUNT(p)) {
+ /* OPT RR is in the wrong section? Some Belkin routers do this. This
+ * is a hint the EDNS implementation is borked, like the Belkin one
+ * is, hence ignore it. */
+ log_debug("OPT RR in wrong section, ignoring.");
+ bad_opt = true;
+ continue;
+ }
+
+ if (!opt_is_good(rr, &has_rfc6975)) {
+ log_debug("Malformed OPT RR, ignoring.");
+ bad_opt = true;
+ continue;
+ }
+
+ if (DNS_PACKET_QR(p)) {
+ /* Additional checks for responses */
+
+ if (!DNS_RESOURCE_RECORD_OPT_VERSION_SUPPORTED(rr)) {
+ /* If this is a reply and we don't know the EDNS version
+ * then something is weird... */
+ log_debug("EDNS version newer that our request, bad server.");
+ return -EBADMSG;
+ }
+
+ if (has_rfc6975) {
+ /* If the OPT RR contains RFC6975 algorithm data, then this
+ * is indication that the server just copied the OPT it got
+ * from us (which contained that data) back into the reply.
+ * If so, then it doesn't properly support EDNS, as RFC6975
+ * makes it very clear that the algorithm data should only
+ * be contained in questions, never in replies. Crappy
+ * Belkin routers copy the OPT data for example, hence let's
+ * detect this so that we downgrade early. */
+ log_debug("OPT RR contained RFC6975 data, ignoring.");
+ bad_opt = true;
+ continue;
+ }
+ }
+
+ p->opt = dns_resource_record_ref(rr);
+ } else {
+ /* According to RFC 4795, section 2.9. only the RRs from the Answer section
+ * shall be cached. Hence mark only those RRs as cacheable by default, but
+ * not the ones from the Additional or Authority sections. */
+ DnsAnswerFlags flags =
+ (i < DNS_PACKET_ANCOUNT(p) ? DNS_ANSWER_CACHEABLE : 0) |
+ (p->protocol == DNS_PROTOCOL_MDNS && !cache_flush ? DNS_ANSWER_SHARED_OWNER : 0);
+
+ r = dns_answer_add(answer, rr, p->ifindex, flags);
+ if (r < 0)
+ return r;
+ }
+
+ /* Remember this RR, so that we potentically can merge it's ->key object with the
+ * next RR. Note that we only do this if we actually decided to keep the RR around.
+ */
+ dns_resource_record_unref(previous);
+ previous = dns_resource_record_ref(rr);
+ }
+
+ if (bad_opt)
+ p->opt = dns_resource_record_unref(p->opt);
+
+ *ret_answer = TAKE_PTR(answer);
+
+ return 0;
+}
+
+int dns_packet_extract(DnsPacket *p) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *question = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder = {};
+ int r;
+
+ if (p->extracted)
+ return 0;
+
+ INIT_REWINDER(rewinder, p);
+ dns_packet_rewind(p, DNS_PACKET_HEADER_SIZE);
+
+ r = dns_packet_extract_question(p, &question);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_extract_answer(p, &answer);
+ if (r < 0)
+ return r;
+
+ p->question = TAKE_PTR(question);
+ p->answer = TAKE_PTR(answer);
+
+ p->extracted = true;
+
+ /* no CANCEL, always rewind */
+ return 0;
+}
+
+int dns_packet_is_reply_for(DnsPacket *p, const DnsResourceKey *key) {
+ int r;
+
+ assert(p);
+ assert(key);
+
+ /* Checks if the specified packet is a reply for the specified
+ * key and the specified key is the only one in the question
+ * section. */
+
+ if (DNS_PACKET_QR(p) != 1)
+ return 0;
+
+ /* Let's unpack the packet, if that hasn't happened yet. */
+ r = dns_packet_extract(p);
+ if (r < 0)
+ return r;
+
+ if (!p->question)
+ return 0;
+
+ if (p->question->n_keys != 1)
+ return 0;
+
+ return dns_resource_key_equal(p->question->keys[0], key);
+}
+
+static void dns_packet_hash_func(const DnsPacket *s, struct siphash *state) {
+ assert(s);
+
+ siphash24_compress(&s->size, sizeof(s->size), state);
+ siphash24_compress(DNS_PACKET_DATA((DnsPacket*) s), s->size, state);
+}
+
+static int dns_packet_compare_func(const DnsPacket *x, const DnsPacket *y) {
+ int r;
+
+ r = CMP(x->size, y->size);
+ if (r != 0)
+ return r;
+
+ return memcmp(DNS_PACKET_DATA((DnsPacket*) x), DNS_PACKET_DATA((DnsPacket*) y), x->size);
+}
+
+DEFINE_HASH_OPS(dns_packet_hash_ops, DnsPacket, dns_packet_hash_func, dns_packet_compare_func);
+
+static const char* const dns_rcode_table[_DNS_RCODE_MAX_DEFINED] = {
+ [DNS_RCODE_SUCCESS] = "SUCCESS",
+ [DNS_RCODE_FORMERR] = "FORMERR",
+ [DNS_RCODE_SERVFAIL] = "SERVFAIL",
+ [DNS_RCODE_NXDOMAIN] = "NXDOMAIN",
+ [DNS_RCODE_NOTIMP] = "NOTIMP",
+ [DNS_RCODE_REFUSED] = "REFUSED",
+ [DNS_RCODE_YXDOMAIN] = "YXDOMAIN",
+ [DNS_RCODE_YXRRSET] = "YRRSET",
+ [DNS_RCODE_NXRRSET] = "NXRRSET",
+ [DNS_RCODE_NOTAUTH] = "NOTAUTH",
+ [DNS_RCODE_NOTZONE] = "NOTZONE",
+ [DNS_RCODE_BADVERS] = "BADVERS",
+ [DNS_RCODE_BADKEY] = "BADKEY",
+ [DNS_RCODE_BADTIME] = "BADTIME",
+ [DNS_RCODE_BADMODE] = "BADMODE",
+ [DNS_RCODE_BADNAME] = "BADNAME",
+ [DNS_RCODE_BADALG] = "BADALG",
+ [DNS_RCODE_BADTRUNC] = "BADTRUNC",
+ [DNS_RCODE_BADCOOKIE] = "BADCOOKIE",
+};
+DEFINE_STRING_TABLE_LOOKUP(dns_rcode, int);
+
+static const char* const dns_protocol_table[_DNS_PROTOCOL_MAX] = {
+ [DNS_PROTOCOL_DNS] = "dns",
+ [DNS_PROTOCOL_MDNS] = "mdns",
+ [DNS_PROTOCOL_LLMNR] = "llmnr",
+};
+DEFINE_STRING_TABLE_LOOKUP(dns_protocol, DnsProtocol);
diff --git a/src/resolve/resolved-dns-packet.h b/src/resolve/resolved-dns-packet.h
new file mode 100644
index 0000000..008860b
--- /dev/null
+++ b/src/resolve/resolved-dns-packet.h
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+
+#include "hashmap.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "sparse-endian.h"
+
+typedef struct DnsPacketHeader DnsPacketHeader;
+typedef struct DnsPacket DnsPacket;
+
+#include "resolved-def.h"
+#include "resolved-dns-answer.h"
+#include "resolved-dns-question.h"
+#include "resolved-dns-rr.h"
+
+typedef enum DnsProtocol {
+ DNS_PROTOCOL_DNS,
+ DNS_PROTOCOL_MDNS,
+ DNS_PROTOCOL_LLMNR,
+ _DNS_PROTOCOL_MAX,
+ _DNS_PROTOCOL_INVALID = -1
+} DnsProtocol;
+
+struct DnsPacketHeader {
+ uint16_t id;
+ be16_t flags;
+ be16_t qdcount;
+ be16_t ancount;
+ be16_t nscount;
+ be16_t arcount;
+};
+
+#define DNS_PACKET_HEADER_SIZE sizeof(DnsPacketHeader)
+#define UDP_PACKET_HEADER_SIZE (sizeof(struct iphdr) + sizeof(struct udphdr))
+
+/* The various DNS protocols deviate in how large a packet can grow,
+ * but the TCP transport has a 16bit size field, hence that appears to
+ * be the absolute maximum. */
+#define DNS_PACKET_SIZE_MAX 0xFFFFu
+
+/* The default size to use for allocation when we don't know how large
+ * the packet will turn out to be. */
+#define DNS_PACKET_SIZE_START 512u
+
+/* RFC 1035 say 512 is the maximum, for classic unicast DNS */
+#define DNS_PACKET_UNICAST_SIZE_MAX 512u
+
+/* With EDNS0 we can use larger packets, default to 4096, which is what is commonly used */
+#define DNS_PACKET_UNICAST_SIZE_LARGE_MAX 4096u
+
+struct DnsPacket {
+ unsigned n_ref;
+ DnsProtocol protocol;
+ size_t size, allocated, rindex, max_size;
+ void *_data; /* don't access directly, use DNS_PACKET_DATA()! */
+ Hashmap *names; /* For name compression */
+ size_t opt_start, opt_size;
+
+ /* Parsed data */
+ DnsQuestion *question;
+ DnsAnswer *answer;
+ DnsResourceRecord *opt;
+
+ /* Packet reception metadata */
+ int ifindex;
+ int family, ipproto;
+ union in_addr_union sender, destination;
+ uint16_t sender_port, destination_port;
+ uint32_t ttl;
+
+ /* For support of truncated packets */
+ DnsPacket *more;
+
+ bool on_stack:1;
+ bool extracted:1;
+ bool refuse_compression:1;
+ bool canonical_form:1;
+};
+
+static inline uint8_t* DNS_PACKET_DATA(DnsPacket *p) {
+ if (_unlikely_(!p))
+ return NULL;
+
+ if (p->_data)
+ return p->_data;
+
+ return ((uint8_t*) p) + ALIGN(sizeof(DnsPacket));
+}
+
+#define DNS_PACKET_HEADER(p) ((DnsPacketHeader*) DNS_PACKET_DATA(p))
+#define DNS_PACKET_ID(p) DNS_PACKET_HEADER(p)->id
+#define DNS_PACKET_QR(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 15) & 1)
+#define DNS_PACKET_OPCODE(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 11) & 15)
+#define DNS_PACKET_AA(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 10) & 1)
+#define DNS_PACKET_TC(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 9) & 1)
+#define DNS_PACKET_RD(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 8) & 1)
+#define DNS_PACKET_RA(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 7) & 1)
+#define DNS_PACKET_AD(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 5) & 1)
+#define DNS_PACKET_CD(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 4) & 1)
+
+#define DNS_PACKET_FLAG_TC (UINT16_C(1) << 9)
+
+static inline uint16_t DNS_PACKET_RCODE(DnsPacket *p) {
+ uint16_t rcode;
+
+ if (p->opt)
+ rcode = (uint16_t) (p->opt->ttl >> 24);
+ else
+ rcode = 0;
+
+ return rcode | (be16toh(DNS_PACKET_HEADER(p)->flags) & 0xF);
+}
+
+static inline uint16_t DNS_PACKET_PAYLOAD_SIZE_MAX(DnsPacket *p) {
+
+ /* Returns the advertised maximum size for replies, or the DNS default if there's nothing defined. */
+
+ if (p->opt)
+ return MAX(DNS_PACKET_UNICAST_SIZE_MAX, p->opt->key->class);
+
+ if (p->ipproto == IPPROTO_TCP)
+ return DNS_PACKET_SIZE_MAX;
+
+ return DNS_PACKET_UNICAST_SIZE_MAX;
+}
+
+static inline bool DNS_PACKET_DO(DnsPacket *p) {
+ if (!p->opt)
+ return false;
+
+ return !!(p->opt->ttl & (1U << 15));
+}
+
+static inline bool DNS_PACKET_VERSION_SUPPORTED(DnsPacket *p) {
+ /* Returns true if this packet is in a version we support. Which means either non-EDNS or EDNS(0), but not EDNS
+ * of any newer versions */
+
+ if (!p->opt)
+ return true;
+
+ return DNS_RESOURCE_RECORD_OPT_VERSION_SUPPORTED(p->opt);
+}
+
+/* LLMNR defines some bits differently */
+#define DNS_PACKET_LLMNR_C(p) DNS_PACKET_AA(p)
+#define DNS_PACKET_LLMNR_T(p) DNS_PACKET_RD(p)
+
+#define DNS_PACKET_QDCOUNT(p) be16toh(DNS_PACKET_HEADER(p)->qdcount)
+#define DNS_PACKET_ANCOUNT(p) be16toh(DNS_PACKET_HEADER(p)->ancount)
+#define DNS_PACKET_NSCOUNT(p) be16toh(DNS_PACKET_HEADER(p)->nscount)
+#define DNS_PACKET_ARCOUNT(p) be16toh(DNS_PACKET_HEADER(p)->arcount)
+
+#define DNS_PACKET_MAKE_FLAGS(qr, opcode, aa, tc, rd, ra, ad, cd, rcode) \
+ (((uint16_t) !!(qr) << 15) | \
+ ((uint16_t) ((opcode) & 15) << 11) | \
+ ((uint16_t) !!(aa) << 10) | /* on LLMNR: c */ \
+ ((uint16_t) !!(tc) << 9) | \
+ ((uint16_t) !!(rd) << 8) | /* on LLMNR: t */ \
+ ((uint16_t) !!(ra) << 7) | \
+ ((uint16_t) !!(ad) << 5) | \
+ ((uint16_t) !!(cd) << 4) | \
+ ((uint16_t) ((rcode) & 15)))
+
+static inline unsigned DNS_PACKET_RRCOUNT(DnsPacket *p) {
+ return
+ (unsigned) DNS_PACKET_ANCOUNT(p) +
+ (unsigned) DNS_PACKET_NSCOUNT(p) +
+ (unsigned) DNS_PACKET_ARCOUNT(p);
+}
+
+int dns_packet_new(DnsPacket **p, DnsProtocol protocol, size_t min_alloc_dsize, size_t max_size);
+int dns_packet_new_query(DnsPacket **p, DnsProtocol protocol, size_t min_alloc_dsize, bool dnssec_checking_disabled);
+
+void dns_packet_set_flags(DnsPacket *p, bool dnssec_checking_disabled, bool truncated);
+
+DnsPacket *dns_packet_ref(DnsPacket *p);
+DnsPacket *dns_packet_unref(DnsPacket *p);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsPacket*, dns_packet_unref);
+
+int dns_packet_validate(DnsPacket *p);
+int dns_packet_validate_reply(DnsPacket *p);
+int dns_packet_validate_query(DnsPacket *p);
+
+int dns_packet_is_reply_for(DnsPacket *p, const DnsResourceKey *key);
+
+int dns_packet_append_blob(DnsPacket *p, const void *d, size_t sz, size_t *start);
+int dns_packet_append_uint8(DnsPacket *p, uint8_t v, size_t *start);
+int dns_packet_append_uint16(DnsPacket *p, uint16_t v, size_t *start);
+int dns_packet_append_uint32(DnsPacket *p, uint32_t v, size_t *start);
+int dns_packet_append_string(DnsPacket *p, const char *s, size_t *start);
+int dns_packet_append_raw_string(DnsPacket *p, const void *s, size_t size, size_t *start);
+int dns_packet_append_label(DnsPacket *p, const char *s, size_t l, bool canonical_candidate, size_t *start);
+int dns_packet_append_name(DnsPacket *p, const char *name, bool allow_compression, bool canonical_candidate, size_t *start);
+int dns_packet_append_key(DnsPacket *p, const DnsResourceKey *key, const DnsAnswerFlags flags, size_t *start);
+int dns_packet_append_rr(DnsPacket *p, const DnsResourceRecord *rr, const DnsAnswerFlags flags, size_t *start, size_t *rdata_start);
+int dns_packet_append_opt(DnsPacket *p, uint16_t max_udp_size, bool edns0_do, int rcode, size_t *start);
+int dns_packet_append_question(DnsPacket *p, DnsQuestion *q);
+int dns_packet_append_answer(DnsPacket *p, DnsAnswer *a);
+
+void dns_packet_truncate(DnsPacket *p, size_t sz);
+int dns_packet_truncate_opt(DnsPacket *p);
+
+int dns_packet_read(DnsPacket *p, size_t sz, const void **ret, size_t *start);
+int dns_packet_read_blob(DnsPacket *p, void *d, size_t sz, size_t *start);
+int dns_packet_read_uint8(DnsPacket *p, uint8_t *ret, size_t *start);
+int dns_packet_read_uint16(DnsPacket *p, uint16_t *ret, size_t *start);
+int dns_packet_read_uint32(DnsPacket *p, uint32_t *ret, size_t *start);
+int dns_packet_read_string(DnsPacket *p, char **ret, size_t *start);
+int dns_packet_read_raw_string(DnsPacket *p, const void **ret, size_t *size, size_t *start);
+int dns_packet_read_name(DnsPacket *p, char **ret, bool allow_compression, size_t *start);
+int dns_packet_read_key(DnsPacket *p, DnsResourceKey **ret, bool *ret_cache_flush, size_t *start);
+int dns_packet_read_rr(DnsPacket *p, DnsResourceRecord **ret, bool *ret_cache_flush, size_t *start);
+
+void dns_packet_rewind(DnsPacket *p, size_t idx);
+
+int dns_packet_skip_question(DnsPacket *p);
+int dns_packet_extract(DnsPacket *p);
+
+static inline bool DNS_PACKET_SHALL_CACHE(DnsPacket *p) {
+ /* Never cache data originating from localhost, under the
+ * assumption, that it's coming from a locally DNS forwarder
+ * or server, that is caching on its own. */
+
+ return in_addr_is_localhost(p->family, &p->sender) == 0;
+}
+
+/* https://www.iana.org/assignments/dns-parameters/dns-parameters.xhtml#dns-parameters-6 */
+enum {
+ DNS_RCODE_SUCCESS = 0,
+ DNS_RCODE_FORMERR = 1,
+ DNS_RCODE_SERVFAIL = 2,
+ DNS_RCODE_NXDOMAIN = 3,
+ DNS_RCODE_NOTIMP = 4,
+ DNS_RCODE_REFUSED = 5,
+ DNS_RCODE_YXDOMAIN = 6,
+ DNS_RCODE_YXRRSET = 7,
+ DNS_RCODE_NXRRSET = 8,
+ DNS_RCODE_NOTAUTH = 9,
+ DNS_RCODE_NOTZONE = 10,
+ DNS_RCODE_BADVERS = 16,
+ DNS_RCODE_BADSIG = 16, /* duplicate value! */
+ DNS_RCODE_BADKEY = 17,
+ DNS_RCODE_BADTIME = 18,
+ DNS_RCODE_BADMODE = 19,
+ DNS_RCODE_BADNAME = 20,
+ DNS_RCODE_BADALG = 21,
+ DNS_RCODE_BADTRUNC = 22,
+ DNS_RCODE_BADCOOKIE = 23,
+ _DNS_RCODE_MAX_DEFINED,
+ _DNS_RCODE_MAX = 4095 /* 4 bit rcode in the header plus 8 bit rcode in OPT, makes 12 bit */
+};
+
+const char* dns_rcode_to_string(int i) _const_;
+int dns_rcode_from_string(const char *s) _pure_;
+
+const char* dns_protocol_to_string(DnsProtocol p) _const_;
+DnsProtocol dns_protocol_from_string(const char *s) _pure_;
+
+#define LLMNR_MULTICAST_IPV4_ADDRESS ((struct in_addr) { .s_addr = htobe32(224U << 24 | 252U) })
+#define LLMNR_MULTICAST_IPV6_ADDRESS ((struct in6_addr) { .s6_addr = { 0xFF, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x03 } })
+
+#define MDNS_MULTICAST_IPV4_ADDRESS ((struct in_addr) { .s_addr = htobe32(224U << 24 | 251U) })
+#define MDNS_MULTICAST_IPV6_ADDRESS ((struct in6_addr) { .s6_addr = { 0xFF, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfb } })
+
+extern const struct hash_ops dns_packet_hash_ops;
+
+static inline uint64_t SD_RESOLVED_FLAGS_MAKE(DnsProtocol protocol, int family, bool authenticated) {
+ uint64_t f;
+
+ /* Converts a protocol + family into a flags field as used in queries and responses */
+
+ f = authenticated ? SD_RESOLVED_AUTHENTICATED : 0;
+
+ switch (protocol) {
+ case DNS_PROTOCOL_DNS:
+ return f|SD_RESOLVED_DNS;
+
+ case DNS_PROTOCOL_LLMNR:
+ return f|(family == AF_INET6 ? SD_RESOLVED_LLMNR_IPV6 : SD_RESOLVED_LLMNR_IPV4);
+
+ case DNS_PROTOCOL_MDNS:
+ return f|(family == AF_INET6 ? SD_RESOLVED_MDNS_IPV6 : SD_RESOLVED_MDNS_IPV4);
+
+ default:
+ return f;
+ }
+}
+
+static inline size_t dns_packet_size_max(DnsPacket *p) {
+ assert(p);
+
+ /* Why not insist on a fully initialized max_size during DnsPacket construction? Well, this way it's easy to
+ * allocate a transient, throw-away DnsPacket on the stack by simple zero initialization, without having to
+ * deal with explicit field initialization. */
+
+ return p->max_size != 0 ? p->max_size : DNS_PACKET_SIZE_MAX;
+}
diff --git a/src/resolve/resolved-dns-query.c b/src/resolve/resolved-dns-query.c
new file mode 100644
index 0000000..535ef4e
--- /dev/null
+++ b/src/resolve/resolved-dns-query.c
@@ -0,0 +1,1109 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "dns-type.h"
+#include "hostname-util.h"
+#include "local-addresses.h"
+#include "resolved-dns-query.h"
+#include "resolved-dns-synthesize.h"
+#include "resolved-etc-hosts.h"
+#include "string-util.h"
+
+#define CNAME_MAX 8
+#define QUERIES_MAX 2048
+#define AUXILIARY_QUERIES_MAX 64
+
+static int dns_query_candidate_new(DnsQueryCandidate **ret, DnsQuery *q, DnsScope *s) {
+ DnsQueryCandidate *c;
+
+ assert(ret);
+ assert(q);
+ assert(s);
+
+ c = new0(DnsQueryCandidate, 1);
+ if (!c)
+ return -ENOMEM;
+
+ c->query = q;
+ c->scope = s;
+
+ LIST_PREPEND(candidates_by_query, q->candidates, c);
+ LIST_PREPEND(candidates_by_scope, s->query_candidates, c);
+
+ *ret = c;
+ return 0;
+}
+
+static void dns_query_candidate_stop(DnsQueryCandidate *c) {
+ DnsTransaction *t;
+
+ assert(c);
+
+ while ((t = set_steal_first(c->transactions))) {
+ set_remove(t->notify_query_candidates, c);
+ set_remove(t->notify_query_candidates_done, c);
+ dns_transaction_gc(t);
+ }
+}
+
+DnsQueryCandidate* dns_query_candidate_free(DnsQueryCandidate *c) {
+
+ if (!c)
+ return NULL;
+
+ dns_query_candidate_stop(c);
+
+ set_free(c->transactions);
+ dns_search_domain_unref(c->search_domain);
+
+ if (c->query)
+ LIST_REMOVE(candidates_by_query, c->query->candidates, c);
+
+ if (c->scope)
+ LIST_REMOVE(candidates_by_scope, c->scope->query_candidates, c);
+
+ return mfree(c);
+}
+
+static int dns_query_candidate_next_search_domain(DnsQueryCandidate *c) {
+ DnsSearchDomain *next = NULL;
+
+ assert(c);
+
+ if (c->search_domain && c->search_domain->linked)
+ next = c->search_domain->domains_next;
+ else
+ next = dns_scope_get_search_domains(c->scope);
+
+ for (;;) {
+ if (!next) /* We hit the end of the list */
+ return 0;
+
+ if (!next->route_only)
+ break;
+
+ /* Skip over route-only domains */
+ next = next->domains_next;
+ }
+
+ dns_search_domain_unref(c->search_domain);
+ c->search_domain = dns_search_domain_ref(next);
+
+ return 1;
+}
+
+static int dns_query_candidate_add_transaction(DnsQueryCandidate *c, DnsResourceKey *key) {
+ DnsTransaction *t;
+ int r;
+
+ assert(c);
+ assert(key);
+
+ t = dns_scope_find_transaction(c->scope, key, true);
+ if (!t) {
+ r = dns_transaction_new(&t, c->scope, key);
+ if (r < 0)
+ return r;
+ } else {
+ if (set_contains(c->transactions, t))
+ return 0;
+ }
+
+ r = set_ensure_allocated(&c->transactions, NULL);
+ if (r < 0)
+ goto gc;
+
+ r = set_ensure_allocated(&t->notify_query_candidates, NULL);
+ if (r < 0)
+ goto gc;
+
+ r = set_ensure_allocated(&t->notify_query_candidates_done, NULL);
+ if (r < 0)
+ goto gc;
+
+ r = set_put(t->notify_query_candidates, c);
+ if (r < 0)
+ goto gc;
+
+ r = set_put(c->transactions, t);
+ if (r < 0) {
+ (void) set_remove(t->notify_query_candidates, c);
+ goto gc;
+ }
+
+ t->clamp_ttl = c->query->clamp_ttl;
+ return 1;
+
+gc:
+ dns_transaction_gc(t);
+ return r;
+}
+
+static int dns_query_candidate_go(DnsQueryCandidate *c) {
+ DnsTransaction *t;
+ Iterator i;
+ int r;
+ unsigned n = 0;
+
+ assert(c);
+
+ /* Start the transactions that are not started yet */
+ SET_FOREACH(t, c->transactions, i) {
+ if (t->state != DNS_TRANSACTION_NULL)
+ continue;
+
+ r = dns_transaction_go(t);
+ if (r < 0)
+ return r;
+
+ n++;
+ }
+
+ /* If there was nothing to start, then let's proceed immediately */
+ if (n == 0)
+ dns_query_candidate_notify(c);
+
+ return 0;
+}
+
+static DnsTransactionState dns_query_candidate_state(DnsQueryCandidate *c) {
+ DnsTransactionState state = DNS_TRANSACTION_NO_SERVERS;
+ DnsTransaction *t;
+ Iterator i;
+
+ assert(c);
+
+ if (c->error_code != 0)
+ return DNS_TRANSACTION_ERRNO;
+
+ SET_FOREACH(t, c->transactions, i) {
+
+ switch (t->state) {
+
+ case DNS_TRANSACTION_NULL:
+ /* If there's a NULL transaction pending, then
+ * this means not all transactions where
+ * started yet, and we were called from within
+ * the stackframe that is supposed to start
+ * remaining transactions. In this case,
+ * simply claim the candidate is pending. */
+
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ /* If there's one transaction currently in
+ * VALIDATING state, then this means there's
+ * also one in PENDING state, hence we can
+ * return PENDING immediately. */
+ return DNS_TRANSACTION_PENDING;
+
+ case DNS_TRANSACTION_SUCCESS:
+ state = t->state;
+ break;
+
+ default:
+ if (state != DNS_TRANSACTION_SUCCESS)
+ state = t->state;
+
+ break;
+ }
+ }
+
+ return state;
+}
+
+static bool dns_query_candidate_is_routable(DnsQueryCandidate *c, uint16_t type) {
+ int family;
+
+ assert(c);
+
+ /* Checks whether the specified RR type matches an address family that is routable on the link(s) the scope of
+ * this candidate belongs to. Specifically, whether there's a routable IPv4 address on it if we query an A RR,
+ * or a routable IPv6 address if we query an AAAA RR. */
+
+ if (!c->query->suppress_unroutable_family)
+ return true;
+
+ if (c->scope->protocol != DNS_PROTOCOL_DNS)
+ return true;
+
+ family = dns_type_to_af(type);
+ if (family < 0)
+ return true;
+
+ if (c->scope->link)
+ return link_relevant(c->scope->link, family, false);
+ else
+ return manager_routable(c->scope->manager, family);
+}
+
+static int dns_query_candidate_setup_transactions(DnsQueryCandidate *c) {
+ DnsQuestion *question;
+ DnsResourceKey *key;
+ int n = 0, r;
+
+ assert(c);
+
+ dns_query_candidate_stop(c);
+
+ question = dns_query_question_for_protocol(c->query, c->scope->protocol);
+
+ /* Create one transaction per question key */
+ DNS_QUESTION_FOREACH(key, question) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *new_key = NULL;
+ DnsResourceKey *qkey;
+
+ if (!dns_query_candidate_is_routable(c, key->type))
+ continue;
+
+ if (c->search_domain) {
+ r = dns_resource_key_new_append_suffix(&new_key, key, c->search_domain->name);
+ if (r < 0)
+ goto fail;
+
+ qkey = new_key;
+ } else
+ qkey = key;
+
+ if (!dns_scope_good_key(c->scope, qkey))
+ continue;
+
+ r = dns_query_candidate_add_transaction(c, qkey);
+ if (r < 0)
+ goto fail;
+
+ n++;
+ }
+
+ return n;
+
+fail:
+ dns_query_candidate_stop(c);
+ return r;
+}
+
+void dns_query_candidate_notify(DnsQueryCandidate *c) {
+ DnsTransactionState state;
+ int r;
+
+ assert(c);
+
+ state = dns_query_candidate_state(c);
+
+ if (DNS_TRANSACTION_IS_LIVE(state))
+ return;
+
+ if (state != DNS_TRANSACTION_SUCCESS && c->search_domain) {
+
+ r = dns_query_candidate_next_search_domain(c);
+ if (r < 0)
+ goto fail;
+
+ if (r > 0) {
+ /* OK, there's another search domain to try, let's do so. */
+
+ r = dns_query_candidate_setup_transactions(c);
+ if (r < 0)
+ goto fail;
+
+ if (r > 0) {
+ /* New transactions where queued. Start them and wait */
+
+ r = dns_query_candidate_go(c);
+ if (r < 0)
+ goto fail;
+
+ return;
+ }
+ }
+
+ }
+
+ dns_query_ready(c->query);
+ return;
+
+fail:
+ log_warning_errno(r, "Failed to follow search domains: %m");
+ c->error_code = r;
+ dns_query_ready(c->query);
+}
+
+static void dns_query_stop(DnsQuery *q) {
+ DnsQueryCandidate *c;
+
+ assert(q);
+
+ q->timeout_event_source = sd_event_source_unref(q->timeout_event_source);
+
+ LIST_FOREACH(candidates_by_query, c, q->candidates)
+ dns_query_candidate_stop(c);
+}
+
+static void dns_query_free_candidates(DnsQuery *q) {
+ assert(q);
+
+ while (q->candidates)
+ dns_query_candidate_free(q->candidates);
+}
+
+static void dns_query_reset_answer(DnsQuery *q) {
+ assert(q);
+
+ q->answer = dns_answer_unref(q->answer);
+ q->answer_rcode = 0;
+ q->answer_dnssec_result = _DNSSEC_RESULT_INVALID;
+ q->answer_errno = 0;
+ q->answer_authenticated = false;
+ q->answer_protocol = _DNS_PROTOCOL_INVALID;
+ q->answer_family = AF_UNSPEC;
+ q->answer_search_domain = dns_search_domain_unref(q->answer_search_domain);
+}
+
+DnsQuery *dns_query_free(DnsQuery *q) {
+ if (!q)
+ return NULL;
+
+ while (q->auxiliary_queries)
+ dns_query_free(q->auxiliary_queries);
+
+ if (q->auxiliary_for) {
+ assert(q->auxiliary_for->n_auxiliary_queries > 0);
+ q->auxiliary_for->n_auxiliary_queries--;
+ LIST_REMOVE(auxiliary_queries, q->auxiliary_for->auxiliary_queries, q);
+ }
+
+ dns_query_free_candidates(q);
+
+ dns_question_unref(q->question_idna);
+ dns_question_unref(q->question_utf8);
+
+ dns_query_reset_answer(q);
+
+ sd_bus_message_unref(q->request);
+ sd_bus_track_unref(q->bus_track);
+
+ dns_packet_unref(q->request_dns_packet);
+ dns_packet_unref(q->reply_dns_packet);
+
+ if (q->request_dns_stream) {
+ /* Detach the stream from our query, in case something else keeps a reference to it. */
+ q->request_dns_stream->complete = NULL;
+ q->request_dns_stream->on_packet = NULL;
+ q->request_dns_stream->query = NULL;
+ dns_stream_unref(q->request_dns_stream);
+ }
+
+ free(q->request_address_string);
+
+ if (q->manager) {
+ LIST_REMOVE(queries, q->manager->dns_queries, q);
+ q->manager->n_dns_queries--;
+ }
+
+ return mfree(q);
+}
+
+int dns_query_new(
+ Manager *m,
+ DnsQuery **ret,
+ DnsQuestion *question_utf8,
+ DnsQuestion *question_idna,
+ int ifindex,
+ uint64_t flags) {
+
+ _cleanup_(dns_query_freep) DnsQuery *q = NULL;
+ DnsResourceKey *key;
+ bool good = false;
+ int r;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ assert(m);
+
+ if (dns_question_size(question_utf8) > 0) {
+ r = dns_question_is_valid_for_query(question_utf8);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ good = true;
+ }
+
+ /* If the IDNA and UTF8 questions are the same, merge their references */
+ r = dns_question_is_equal(question_idna, question_utf8);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ question_idna = question_utf8;
+ else {
+ if (dns_question_size(question_idna) > 0) {
+ r = dns_question_is_valid_for_query(question_idna);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ good = true;
+ }
+ }
+
+ if (!good) /* don't allow empty queries */
+ return -EINVAL;
+
+ if (m->n_dns_queries >= QUERIES_MAX)
+ return -EBUSY;
+
+ q = new0(DnsQuery, 1);
+ if (!q)
+ return -ENOMEM;
+
+ q->question_utf8 = dns_question_ref(question_utf8);
+ q->question_idna = dns_question_ref(question_idna);
+ q->ifindex = ifindex;
+ q->flags = flags;
+ q->answer_dnssec_result = _DNSSEC_RESULT_INVALID;
+ q->answer_protocol = _DNS_PROTOCOL_INVALID;
+ q->answer_family = AF_UNSPEC;
+
+ /* First dump UTF8 question */
+ DNS_QUESTION_FOREACH(key, question_utf8)
+ log_debug("Looking up RR for %s.",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+
+ /* And then dump the IDNA question, but only what hasn't been dumped already through the UTF8 question. */
+ DNS_QUESTION_FOREACH(key, question_idna) {
+ r = dns_question_contains(question_utf8, key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ log_debug("Looking up IDNA RR for %s.",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+ }
+
+ LIST_PREPEND(queries, m->dns_queries, q);
+ m->n_dns_queries++;
+ q->manager = m;
+
+ if (ret)
+ *ret = q;
+ q = NULL;
+
+ return 0;
+}
+
+int dns_query_make_auxiliary(DnsQuery *q, DnsQuery *auxiliary_for) {
+ assert(q);
+ assert(auxiliary_for);
+
+ /* Ensure that the query is not auxiliary yet, and
+ * nothing else is auxiliary to it either */
+ assert(!q->auxiliary_for);
+ assert(!q->auxiliary_queries);
+
+ /* Ensure that the unit we shall be made auxiliary for isn't
+ * auxiliary itself */
+ assert(!auxiliary_for->auxiliary_for);
+
+ if (auxiliary_for->n_auxiliary_queries >= AUXILIARY_QUERIES_MAX)
+ return -EAGAIN;
+
+ LIST_PREPEND(auxiliary_queries, auxiliary_for->auxiliary_queries, q);
+ q->auxiliary_for = auxiliary_for;
+
+ auxiliary_for->n_auxiliary_queries++;
+ return 0;
+}
+
+static void dns_query_complete(DnsQuery *q, DnsTransactionState state) {
+ assert(q);
+ assert(!DNS_TRANSACTION_IS_LIVE(state));
+ assert(DNS_TRANSACTION_IS_LIVE(q->state));
+
+ /* Note that this call might invalidate the query. Callers
+ * should hence not attempt to access the query or transaction
+ * after calling this function. */
+
+ q->state = state;
+
+ dns_query_stop(q);
+ if (q->complete)
+ q->complete(q);
+}
+
+static int on_query_timeout(sd_event_source *s, usec_t usec, void *userdata) {
+ DnsQuery *q = userdata;
+
+ assert(s);
+ assert(q);
+
+ dns_query_complete(q, DNS_TRANSACTION_TIMEOUT);
+ return 0;
+}
+
+static int dns_query_add_candidate(DnsQuery *q, DnsScope *s) {
+ DnsQueryCandidate *c;
+ int r;
+
+ assert(q);
+ assert(s);
+
+ r = dns_query_candidate_new(&c, q, s);
+ if (r < 0)
+ return r;
+
+ /* If this a single-label domain on DNS, we might append a suitable search domain first. */
+ if ((q->flags & SD_RESOLVED_NO_SEARCH) == 0 &&
+ dns_scope_name_needs_search_domain(s, dns_question_first_name(q->question_idna))) {
+ /* OK, we need a search domain now. Let's find one for this scope */
+
+ r = dns_query_candidate_next_search_domain(c);
+ if (r <= 0) /* if there's no search domain, then we won't add any transaction. */
+ goto fail;
+ }
+
+ r = dns_query_candidate_setup_transactions(c);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ dns_query_candidate_free(c);
+ return r;
+}
+
+static int dns_query_synthesize_reply(DnsQuery *q, DnsTransactionState *state) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ int r;
+
+ assert(q);
+ assert(state);
+
+ /* Tries to synthesize localhost RR replies (and others) where appropriate. Note that this is done *after* the
+ * the normal lookup finished. The data from the network hence takes precedence over the data we
+ * synthesize. (But note that many scopes refuse to resolve certain domain names) */
+
+ if (!IN_SET(*state,
+ DNS_TRANSACTION_RCODE_FAILURE,
+ DNS_TRANSACTION_NO_SERVERS,
+ DNS_TRANSACTION_TIMEOUT,
+ DNS_TRANSACTION_ATTEMPTS_MAX_REACHED,
+ DNS_TRANSACTION_NETWORK_DOWN,
+ DNS_TRANSACTION_NOT_FOUND))
+ return 0;
+
+ r = dns_synthesize_answer(
+ q->manager,
+ q->question_utf8,
+ q->ifindex,
+ &answer);
+ if (r == -ENXIO) {
+ /* If we get ENXIO this tells us to generate NXDOMAIN unconditionally. */
+
+ dns_query_reset_answer(q);
+ q->answer_rcode = DNS_RCODE_NXDOMAIN;
+ q->answer_protocol = dns_synthesize_protocol(q->flags);
+ q->answer_family = dns_synthesize_family(q->flags);
+ q->answer_authenticated = true;
+ *state = DNS_TRANSACTION_RCODE_FAILURE;
+
+ return 0;
+ }
+ if (r <= 0)
+ return r;
+
+ dns_query_reset_answer(q);
+
+ q->answer = TAKE_PTR(answer);
+ q->answer_rcode = DNS_RCODE_SUCCESS;
+ q->answer_protocol = dns_synthesize_protocol(q->flags);
+ q->answer_family = dns_synthesize_family(q->flags);
+ q->answer_authenticated = true;
+
+ *state = DNS_TRANSACTION_SUCCESS;
+
+ return 1;
+}
+
+static int dns_query_try_etc_hosts(DnsQuery *q) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ int r;
+
+ assert(q);
+
+ /* Looks in /etc/hosts for matching entries. Note that this is done *before* the normal lookup is done. The
+ * data from /etc/hosts hence takes precedence over the network. */
+
+ r = manager_etc_hosts_lookup(
+ q->manager,
+ q->question_utf8,
+ &answer);
+ if (r <= 0)
+ return r;
+
+ dns_query_reset_answer(q);
+
+ q->answer = TAKE_PTR(answer);
+ q->answer_rcode = DNS_RCODE_SUCCESS;
+ q->answer_protocol = dns_synthesize_protocol(q->flags);
+ q->answer_family = dns_synthesize_family(q->flags);
+ q->answer_authenticated = true;
+
+ return 1;
+}
+
+int dns_query_go(DnsQuery *q) {
+ DnsScopeMatch found = DNS_SCOPE_NO;
+ DnsScope *s, *first = NULL;
+ DnsQueryCandidate *c;
+ int r;
+
+ assert(q);
+
+ if (q->state != DNS_TRANSACTION_NULL)
+ return 0;
+
+ r = dns_query_try_etc_hosts(q);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ dns_query_complete(q, DNS_TRANSACTION_SUCCESS);
+ return 1;
+ }
+
+ LIST_FOREACH(scopes, s, q->manager->dns_scopes) {
+ DnsScopeMatch match;
+ const char *name;
+
+ name = dns_question_first_name(dns_query_question_for_protocol(q, s->protocol));
+ if (!name)
+ continue;
+
+ match = dns_scope_good_domain(s, q->ifindex, q->flags, name);
+ if (match < 0) {
+ log_debug("Couldn't check if '%s' matches against scope, ignoring.", name);
+ continue;
+ }
+
+ if (match > found) { /* Does this match better? If so, remember how well it matched, and the first one
+ * that matches this well */
+ found = match;
+ first = s;
+ }
+ }
+
+ if (found == DNS_SCOPE_NO) {
+ DnsTransactionState state = DNS_TRANSACTION_NO_SERVERS;
+
+ r = dns_query_synthesize_reply(q, &state);
+ if (r < 0)
+ return r;
+
+ dns_query_complete(q, state);
+ return 1;
+ }
+
+ r = dns_query_add_candidate(q, first);
+ if (r < 0)
+ goto fail;
+
+ LIST_FOREACH(scopes, s, first->scopes_next) {
+ DnsScopeMatch match;
+ const char *name;
+
+ name = dns_question_first_name(dns_query_question_for_protocol(q, s->protocol));
+ if (!name)
+ continue;
+
+ match = dns_scope_good_domain(s, q->ifindex, q->flags, name);
+ if (match < 0) {
+ log_debug("Couldn't check if '%s' matches against scope, ignoring.", name);
+ continue;
+ }
+
+ if (match < found)
+ continue;
+
+ r = dns_query_add_candidate(q, s);
+ if (r < 0)
+ goto fail;
+ }
+
+ dns_query_reset_answer(q);
+
+ r = sd_event_add_time(
+ q->manager->event,
+ &q->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ now(clock_boottime_or_monotonic()) + SD_RESOLVED_QUERY_TIMEOUT_USEC,
+ 0, on_query_timeout, q);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(q->timeout_event_source, "query-timeout");
+
+ q->state = DNS_TRANSACTION_PENDING;
+ q->block_ready++;
+
+ /* Start the transactions */
+ LIST_FOREACH(candidates_by_query, c, q->candidates) {
+ r = dns_query_candidate_go(c);
+ if (r < 0) {
+ q->block_ready--;
+ goto fail;
+ }
+ }
+
+ q->block_ready--;
+ dns_query_ready(q);
+
+ return 1;
+
+fail:
+ dns_query_stop(q);
+ return r;
+}
+
+static void dns_query_accept(DnsQuery *q, DnsQueryCandidate *c) {
+ DnsTransactionState state = DNS_TRANSACTION_NO_SERVERS;
+ bool has_authenticated = false, has_non_authenticated = false;
+ DnssecResult dnssec_result_authenticated = _DNSSEC_RESULT_INVALID, dnssec_result_non_authenticated = _DNSSEC_RESULT_INVALID;
+ DnsTransaction *t;
+ Iterator i;
+ int r;
+
+ assert(q);
+
+ if (!c) {
+ r = dns_query_synthesize_reply(q, &state);
+ if (r < 0)
+ goto fail;
+
+ dns_query_complete(q, state);
+ return;
+ }
+
+ if (c->error_code != 0) {
+ /* If the candidate had an error condition of its own, start with that. */
+ state = DNS_TRANSACTION_ERRNO;
+ q->answer = dns_answer_unref(q->answer);
+ q->answer_rcode = 0;
+ q->answer_dnssec_result = _DNSSEC_RESULT_INVALID;
+ q->answer_authenticated = false;
+ q->answer_errno = c->error_code;
+ }
+
+ SET_FOREACH(t, c->transactions, i) {
+
+ switch (t->state) {
+
+ case DNS_TRANSACTION_SUCCESS: {
+ /* We found a successfully reply, merge it into the answer */
+ r = dns_answer_extend(&q->answer, t->answer);
+ if (r < 0)
+ goto fail;
+
+ q->answer_rcode = t->answer_rcode;
+ q->answer_errno = 0;
+
+ if (t->answer_authenticated) {
+ has_authenticated = true;
+ dnssec_result_authenticated = t->answer_dnssec_result;
+ } else {
+ has_non_authenticated = true;
+ dnssec_result_non_authenticated = t->answer_dnssec_result;
+ }
+
+ state = DNS_TRANSACTION_SUCCESS;
+ break;
+ }
+
+ case DNS_TRANSACTION_NULL:
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ case DNS_TRANSACTION_ABORTED:
+ /* Ignore transactions that didn't complete */
+ continue;
+
+ default:
+ /* Any kind of failure? Store the data away, if there's nothing stored yet. */
+ if (state == DNS_TRANSACTION_SUCCESS)
+ continue;
+
+ /* If there's already an authenticated negative reply stored, then prefer that over any unauthenticated one */
+ if (q->answer_authenticated && !t->answer_authenticated)
+ continue;
+
+ q->answer = dns_answer_unref(q->answer);
+ q->answer_rcode = t->answer_rcode;
+ q->answer_dnssec_result = t->answer_dnssec_result;
+ q->answer_authenticated = t->answer_authenticated;
+ q->answer_errno = t->answer_errno;
+
+ state = t->state;
+ break;
+ }
+ }
+
+ if (state == DNS_TRANSACTION_SUCCESS) {
+ q->answer_authenticated = has_authenticated && !has_non_authenticated;
+ q->answer_dnssec_result = q->answer_authenticated ? dnssec_result_authenticated : dnssec_result_non_authenticated;
+ }
+
+ q->answer_protocol = c->scope->protocol;
+ q->answer_family = c->scope->family;
+
+ dns_search_domain_unref(q->answer_search_domain);
+ q->answer_search_domain = dns_search_domain_ref(c->search_domain);
+
+ r = dns_query_synthesize_reply(q, &state);
+ if (r < 0)
+ goto fail;
+
+ dns_query_complete(q, state);
+ return;
+
+fail:
+ q->answer_errno = -r;
+ dns_query_complete(q, DNS_TRANSACTION_ERRNO);
+}
+
+void dns_query_ready(DnsQuery *q) {
+
+ DnsQueryCandidate *bad = NULL, *c;
+ bool pending = false;
+
+ assert(q);
+ assert(DNS_TRANSACTION_IS_LIVE(q->state));
+
+ /* Note that this call might invalidate the query. Callers
+ * should hence not attempt to access the query or transaction
+ * after calling this function, unless the block_ready
+ * counter was explicitly bumped before doing so. */
+
+ if (q->block_ready > 0)
+ return;
+
+ LIST_FOREACH(candidates_by_query, c, q->candidates) {
+ DnsTransactionState state;
+
+ state = dns_query_candidate_state(c);
+ switch (state) {
+
+ case DNS_TRANSACTION_SUCCESS:
+ /* One of the candidates is successful,
+ * let's use it, and copy its data out */
+ dns_query_accept(q, c);
+ return;
+
+ case DNS_TRANSACTION_NULL:
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ /* One of the candidates is still going on,
+ * let's maybe wait for it */
+ pending = true;
+ break;
+
+ default:
+ /* Any kind of failure */
+ bad = c;
+ break;
+ }
+ }
+
+ if (pending)
+ return;
+
+ dns_query_accept(q, bad);
+}
+
+static int dns_query_cname_redirect(DnsQuery *q, const DnsResourceRecord *cname) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *nq_idna = NULL, *nq_utf8 = NULL;
+ int r, k;
+
+ assert(q);
+
+ q->n_cname_redirects++;
+ if (q->n_cname_redirects > CNAME_MAX)
+ return -ELOOP;
+
+ r = dns_question_cname_redirect(q->question_idna, cname, &nq_idna);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ log_debug("Following CNAME/DNAME %s → %s.", dns_question_first_name(q->question_idna), dns_question_first_name(nq_idna));
+
+ k = dns_question_is_equal(q->question_idna, q->question_utf8);
+ if (k < 0)
+ return r;
+ if (k > 0) {
+ /* Same question? Shortcut new question generation */
+ nq_utf8 = dns_question_ref(nq_idna);
+ k = r;
+ } else {
+ k = dns_question_cname_redirect(q->question_utf8, cname, &nq_utf8);
+ if (k < 0)
+ return k;
+ else if (k > 0)
+ log_debug("Following UTF8 CNAME/DNAME %s → %s.", dns_question_first_name(q->question_utf8), dns_question_first_name(nq_utf8));
+ }
+
+ if (r == 0 && k == 0) /* No actual cname happened? */
+ return -ELOOP;
+
+ if (q->answer_protocol == DNS_PROTOCOL_DNS) {
+ /* Don't permit CNAME redirects from unicast DNS to LLMNR or MulticastDNS, so that global resources
+ * cannot invade the local namespace. The opposite way we permit: local names may redirect to global
+ * ones. */
+
+ q->flags &= ~(SD_RESOLVED_LLMNR|SD_RESOLVED_MDNS); /* mask away the local protocols */
+ }
+
+ /* Turn off searching for the new name */
+ q->flags |= SD_RESOLVED_NO_SEARCH;
+
+ dns_question_unref(q->question_idna);
+ q->question_idna = TAKE_PTR(nq_idna);
+
+ dns_question_unref(q->question_utf8);
+ q->question_utf8 = TAKE_PTR(nq_utf8);
+
+ dns_query_free_candidates(q);
+ dns_query_reset_answer(q);
+
+ q->state = DNS_TRANSACTION_NULL;
+
+ return 0;
+}
+
+int dns_query_process_cname(DnsQuery *q) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *cname = NULL;
+ DnsQuestion *question;
+ DnsResourceRecord *rr;
+ int r;
+
+ assert(q);
+
+ if (!IN_SET(q->state, DNS_TRANSACTION_SUCCESS, DNS_TRANSACTION_NULL))
+ return DNS_QUERY_NOMATCH;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ DNS_ANSWER_FOREACH(rr, q->answer) {
+ r = dns_question_matches_rr(question, rr, DNS_SEARCH_DOMAIN_NAME(q->answer_search_domain));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return DNS_QUERY_MATCH; /* The answer matches directly, no need to follow cnames */
+
+ r = dns_question_matches_cname_or_dname(question, rr, DNS_SEARCH_DOMAIN_NAME(q->answer_search_domain));
+ if (r < 0)
+ return r;
+ if (r > 0 && !cname)
+ cname = dns_resource_record_ref(rr);
+ }
+
+ if (!cname)
+ return DNS_QUERY_NOMATCH; /* No match and no cname to follow */
+
+ if (q->flags & SD_RESOLVED_NO_CNAME)
+ return -ELOOP;
+
+ if (!q->answer_authenticated)
+ q->previous_redirect_unauthenticated = true;
+
+ /* OK, let's actually follow the CNAME */
+ r = dns_query_cname_redirect(q, cname);
+ if (r < 0)
+ return r;
+
+ /* Let's see if the answer can already answer the new
+ * redirected question */
+ r = dns_query_process_cname(q);
+ if (r != DNS_QUERY_NOMATCH)
+ return r;
+
+ /* OK, it cannot, let's begin with the new query */
+ r = dns_query_go(q);
+ if (r < 0)
+ return r;
+
+ return DNS_QUERY_RESTARTED; /* We restarted the query for a new cname */
+}
+
+static int on_bus_track(sd_bus_track *t, void *userdata) {
+ DnsQuery *q = userdata;
+
+ assert(t);
+ assert(q);
+
+ log_debug("Client of active query vanished, aborting query.");
+ dns_query_complete(q, DNS_TRANSACTION_ABORTED);
+ return 0;
+}
+
+int dns_query_bus_track(DnsQuery *q, sd_bus_message *m) {
+ int r;
+
+ assert(q);
+ assert(m);
+
+ if (!q->bus_track) {
+ r = sd_bus_track_new(sd_bus_message_get_bus(m), &q->bus_track, on_bus_track, q);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_track_add_sender(q->bus_track, m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+DnsQuestion* dns_query_question_for_protocol(DnsQuery *q, DnsProtocol protocol) {
+ assert(q);
+
+ switch (protocol) {
+
+ case DNS_PROTOCOL_DNS:
+ return q->question_idna;
+
+ case DNS_PROTOCOL_MDNS:
+ case DNS_PROTOCOL_LLMNR:
+ return q->question_utf8;
+
+ default:
+ return NULL;
+ }
+}
+
+const char *dns_query_string(DnsQuery *q) {
+ const char *name;
+ int r;
+
+ /* Returns a somewhat useful human-readable lookup key string for this query */
+
+ if (q->request_address_string)
+ return q->request_address_string;
+
+ if (q->request_address_valid) {
+ r = in_addr_to_string(q->request_family, &q->request_address, &q->request_address_string);
+ if (r >= 0)
+ return q->request_address_string;
+ }
+
+ name = dns_question_first_name(q->question_utf8);
+ if (name)
+ return name;
+
+ return dns_question_first_name(q->question_idna);
+}
+
+bool dns_query_fully_authenticated(DnsQuery *q) {
+ assert(q);
+
+ return q->answer_authenticated && !q->previous_redirect_unauthenticated;
+}
diff --git a/src/resolve/resolved-dns-query.h b/src/resolve/resolved-dns-query.h
new file mode 100644
index 0000000..5ee946b
--- /dev/null
+++ b/src/resolve/resolved-dns-query.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "set.h"
+
+typedef struct DnsQueryCandidate DnsQueryCandidate;
+typedef struct DnsQuery DnsQuery;
+
+#include "resolved-dns-answer.h"
+#include "resolved-dns-question.h"
+#include "resolved-dns-stream.h"
+#include "resolved-dns-search-domain.h"
+
+struct DnsQueryCandidate {
+ DnsQuery *query;
+ DnsScope *scope;
+
+ DnsSearchDomain *search_domain;
+
+ int error_code;
+ Set *transactions;
+
+ LIST_FIELDS(DnsQueryCandidate, candidates_by_query);
+ LIST_FIELDS(DnsQueryCandidate, candidates_by_scope);
+};
+
+struct DnsQuery {
+ Manager *manager;
+
+ /* When resolving a service, we first create a TXT+SRV query,
+ * and then for the hostnames we discover auxiliary A+AAAA
+ * queries. This pointer always points from the auxiliary
+ * queries back to the TXT+SRV query. */
+ DnsQuery *auxiliary_for;
+ LIST_HEAD(DnsQuery, auxiliary_queries);
+ unsigned n_auxiliary_queries;
+ int auxiliary_result;
+
+ /* The question, formatted in IDNA for use on classic DNS, and as UTF8 for use in LLMNR or mDNS. Note that even
+ * on classic DNS some labels might use UTF8 encoding. Specifically, DNS-SD service names (in contrast to their
+ * domain suffixes) use UTF-8 encoding even on DNS. Thus, the difference between these two fields is mostly
+ * relevant only for explicit *hostname* lookups as well as the domain suffixes of service lookups. */
+ DnsQuestion *question_idna;
+ DnsQuestion *question_utf8;
+
+ uint64_t flags;
+ int ifindex;
+
+ /* If true, A or AAAA RR lookups will be suppressed on links with no routable address of the matching address
+ * family */
+ bool suppress_unroutable_family;
+
+ /* If true, the RR TTLs of the answer will be clamped by their current left validity in the cache */
+ bool clamp_ttl;
+
+ DnsTransactionState state;
+ unsigned n_cname_redirects;
+
+ LIST_HEAD(DnsQueryCandidate, candidates);
+ sd_event_source *timeout_event_source;
+
+ /* Discovered data */
+ DnsAnswer *answer;
+ int answer_rcode;
+ DnssecResult answer_dnssec_result;
+ bool answer_authenticated;
+ DnsProtocol answer_protocol;
+ int answer_family;
+ DnsSearchDomain *answer_search_domain;
+ int answer_errno; /* if state is DNS_TRANSACTION_ERRNO */
+ bool previous_redirect_unauthenticated;
+
+ /* Bus client information */
+ sd_bus_message *request;
+ int request_family;
+ bool request_address_valid;
+ union in_addr_union request_address;
+ unsigned block_all_complete;
+ char *request_address_string;
+
+ /* DNS stub information */
+ DnsPacket *request_dns_packet;
+ DnsStream *request_dns_stream;
+ DnsPacket *reply_dns_packet;
+
+ /* Completion callback */
+ void (*complete)(DnsQuery* q);
+ unsigned block_ready;
+
+ sd_bus_track *bus_track;
+
+ LIST_FIELDS(DnsQuery, queries);
+ LIST_FIELDS(DnsQuery, auxiliary_queries);
+};
+
+enum {
+ DNS_QUERY_MATCH,
+ DNS_QUERY_NOMATCH,
+ DNS_QUERY_RESTARTED,
+};
+
+DnsQueryCandidate* dns_query_candidate_free(DnsQueryCandidate *c);
+void dns_query_candidate_notify(DnsQueryCandidate *c);
+
+int dns_query_new(Manager *m, DnsQuery **q, DnsQuestion *question_utf8, DnsQuestion *question_idna, int family, uint64_t flags);
+DnsQuery *dns_query_free(DnsQuery *q);
+
+int dns_query_make_auxiliary(DnsQuery *q, DnsQuery *auxiliary_for);
+
+int dns_query_go(DnsQuery *q);
+void dns_query_ready(DnsQuery *q);
+
+int dns_query_process_cname(DnsQuery *q);
+
+int dns_query_bus_track(DnsQuery *q, sd_bus_message *m);
+
+DnsQuestion* dns_query_question_for_protocol(DnsQuery *q, DnsProtocol protocol);
+
+const char *dns_query_string(DnsQuery *q);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsQuery*, dns_query_free);
+
+bool dns_query_fully_authenticated(DnsQuery *q);
diff --git a/src/resolve/resolved-dns-question.c b/src/resolve/resolved-dns-question.c
new file mode 100644
index 0000000..1ed9171
--- /dev/null
+++ b/src/resolve/resolved-dns-question.c
@@ -0,0 +1,438 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "dns-type.h"
+#include "resolved-dns-question.h"
+
+DnsQuestion *dns_question_new(size_t n) {
+ DnsQuestion *q;
+
+ assert(n > 0);
+
+ q = malloc0(offsetof(DnsQuestion, keys) + sizeof(DnsResourceKey*) * n);
+ if (!q)
+ return NULL;
+
+ q->n_ref = 1;
+ q->n_allocated = n;
+
+ return q;
+}
+
+static DnsQuestion *dns_question_free(DnsQuestion *q) {
+ size_t i;
+
+ assert(q);
+
+ for (i = 0; i < q->n_keys; i++)
+ dns_resource_key_unref(q->keys[i]);
+ return mfree(q);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsQuestion, dns_question, dns_question_free);
+
+int dns_question_add(DnsQuestion *q, DnsResourceKey *key) {
+ size_t i;
+ int r;
+
+ assert(key);
+
+ if (!q)
+ return -ENOSPC;
+
+ for (i = 0; i < q->n_keys; i++) {
+ r = dns_resource_key_equal(q->keys[i], key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+ }
+
+ if (q->n_keys >= q->n_allocated)
+ return -ENOSPC;
+
+ q->keys[q->n_keys++] = dns_resource_key_ref(key);
+ return 0;
+}
+
+int dns_question_matches_rr(DnsQuestion *q, DnsResourceRecord *rr, const char *search_domain) {
+ size_t i;
+ int r;
+
+ assert(rr);
+
+ if (!q)
+ return 0;
+
+ for (i = 0; i < q->n_keys; i++) {
+ r = dns_resource_key_match_rr(q->keys[i], rr, search_domain);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_question_matches_cname_or_dname(DnsQuestion *q, DnsResourceRecord *rr, const char *search_domain) {
+ size_t i;
+ int r;
+
+ assert(rr);
+
+ if (!q)
+ return 0;
+
+ if (!IN_SET(rr->key->type, DNS_TYPE_CNAME, DNS_TYPE_DNAME))
+ return 0;
+
+ for (i = 0; i < q->n_keys; i++) {
+ /* For a {C,D}NAME record we can never find a matching {C,D}NAME record */
+ if (!dns_type_may_redirect(q->keys[i]->type))
+ return 0;
+
+ r = dns_resource_key_match_cname_or_dname(q->keys[i], rr->key, search_domain);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_question_is_valid_for_query(DnsQuestion *q) {
+ const char *name;
+ size_t i;
+ int r;
+
+ if (!q)
+ return 0;
+
+ if (q->n_keys <= 0)
+ return 0;
+
+ if (q->n_keys > 65535)
+ return 0;
+
+ name = dns_resource_key_name(q->keys[0]);
+ if (!name)
+ return 0;
+
+ /* Check that all keys in this question bear the same name */
+ for (i = 0; i < q->n_keys; i++) {
+ assert(q->keys[i]);
+
+ if (i > 0) {
+ r = dns_name_equal(dns_resource_key_name(q->keys[i]), name);
+ if (r <= 0)
+ return r;
+ }
+
+ if (!dns_type_is_valid_query(q->keys[i]->type))
+ return 0;
+ }
+
+ return 1;
+}
+
+int dns_question_contains(DnsQuestion *a, const DnsResourceKey *k) {
+ size_t j;
+ int r;
+
+ assert(k);
+
+ if (!a)
+ return 0;
+
+ for (j = 0; j < a->n_keys; j++) {
+ r = dns_resource_key_equal(a->keys[j], k);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_question_is_equal(DnsQuestion *a, DnsQuestion *b) {
+ size_t j;
+ int r;
+
+ if (a == b)
+ return 1;
+
+ if (!a)
+ return !b || b->n_keys == 0;
+ if (!b)
+ return a->n_keys == 0;
+
+ /* Checks if all keys in a are also contained b, and vice versa */
+
+ for (j = 0; j < a->n_keys; j++) {
+ r = dns_question_contains(b, a->keys[j]);
+ if (r <= 0)
+ return r;
+ }
+
+ for (j = 0; j < b->n_keys; j++) {
+ r = dns_question_contains(a, b->keys[j]);
+ if (r <= 0)
+ return r;
+ }
+
+ return 1;
+}
+
+int dns_question_cname_redirect(DnsQuestion *q, const DnsResourceRecord *cname, DnsQuestion **ret) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *n = NULL;
+ DnsResourceKey *key;
+ bool same = true;
+ int r;
+
+ assert(cname);
+ assert(ret);
+ assert(IN_SET(cname->key->type, DNS_TYPE_CNAME, DNS_TYPE_DNAME));
+
+ if (dns_question_size(q) <= 0) {
+ *ret = NULL;
+ return 0;
+ }
+
+ DNS_QUESTION_FOREACH(key, q) {
+ _cleanup_free_ char *destination = NULL;
+ const char *d;
+
+ if (cname->key->type == DNS_TYPE_CNAME)
+ d = cname->cname.name;
+ else {
+ r = dns_name_change_suffix(dns_resource_key_name(key), dns_resource_key_name(cname->key), cname->dname.name, &destination);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ d = destination;
+ }
+
+ r = dns_name_equal(dns_resource_key_name(key), d);
+ if (r < 0)
+ return r;
+
+ if (r == 0) {
+ same = false;
+ break;
+ }
+ }
+
+ /* Fully the same, indicate we didn't do a thing */
+ if (same) {
+ *ret = NULL;
+ return 0;
+ }
+
+ n = dns_question_new(q->n_keys);
+ if (!n)
+ return -ENOMEM;
+
+ /* Create a new question, and patch in the new name */
+ DNS_QUESTION_FOREACH(key, q) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *k = NULL;
+
+ k = dns_resource_key_new_redirect(key, cname);
+ if (!k)
+ return -ENOMEM;
+
+ r = dns_question_add(n, k);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(n);
+
+ return 1;
+}
+
+const char *dns_question_first_name(DnsQuestion *q) {
+
+ if (!q)
+ return NULL;
+
+ if (q->n_keys < 1)
+ return NULL;
+
+ return dns_resource_key_name(q->keys[0]);
+}
+
+int dns_question_new_address(DnsQuestion **ret, int family, const char *name, bool convert_idna) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *q = NULL;
+ _cleanup_free_ char *buf = NULL;
+ int r;
+
+ assert(ret);
+ assert(name);
+
+ if (!IN_SET(family, AF_INET, AF_INET6, AF_UNSPEC))
+ return -EAFNOSUPPORT;
+
+ if (convert_idna) {
+ r = dns_name_apply_idna(name, &buf);
+ if (r < 0)
+ return r;
+ if (r > 0 && !streq(name, buf))
+ name = buf;
+ else
+ /* We did not manage to create convert the idna name, or it's
+ * the same as the original name. We assume the caller already
+ * created an uncoverted question, so let's not repeat work
+ * unnecessarily. */
+ return -EALREADY;
+ }
+
+ q = dns_question_new(family == AF_UNSPEC ? 2 : 1);
+ if (!q)
+ return -ENOMEM;
+
+ if (family != AF_INET6) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+
+ key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_A, name);
+ if (!key)
+ return -ENOMEM;
+
+ r = dns_question_add(q, key);
+ if (r < 0)
+ return r;
+ }
+
+ if (family != AF_INET) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+
+ key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_AAAA, name);
+ if (!key)
+ return -ENOMEM;
+
+ r = dns_question_add(q, key);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(q);
+
+ return 0;
+}
+
+int dns_question_new_reverse(DnsQuestion **ret, int family, const union in_addr_union *a) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ _cleanup_(dns_question_unrefp) DnsQuestion *q = NULL;
+ _cleanup_free_ char *reverse = NULL;
+ int r;
+
+ assert(ret);
+ assert(a);
+
+ if (!IN_SET(family, AF_INET, AF_INET6, AF_UNSPEC))
+ return -EAFNOSUPPORT;
+
+ r = dns_name_reverse(family, a, &reverse);
+ if (r < 0)
+ return r;
+
+ q = dns_question_new(1);
+ if (!q)
+ return -ENOMEM;
+
+ key = dns_resource_key_new_consume(DNS_CLASS_IN, DNS_TYPE_PTR, reverse);
+ if (!key)
+ return -ENOMEM;
+
+ reverse = NULL;
+
+ r = dns_question_add(q, key);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(q);
+
+ return 0;
+}
+
+int dns_question_new_service(
+ DnsQuestion **ret,
+ const char *service,
+ const char *type,
+ const char *domain,
+ bool with_txt,
+ bool convert_idna) {
+
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ _cleanup_(dns_question_unrefp) DnsQuestion *q = NULL;
+ _cleanup_free_ char *buf = NULL, *joined = NULL;
+ const char *name;
+ int r;
+
+ assert(ret);
+
+ /* We support three modes of invocation:
+ *
+ * 1. Only a domain is specified, in which case we assume a properly encoded SRV RR name, including service
+ * type and possibly a service name. If specified in this way we assume it's already IDNA converted if
+ * that's necessary.
+ *
+ * 2. Both service type and a domain specified, in which case a normal SRV RR is assumed, without a DNS-SD
+ * style prefix. In this case we'll IDNA convert the domain, if that's requested.
+ *
+ * 3. All three of service name, type and domain are specified, in which case a DNS-SD service is put
+ * together. The service name is never IDNA converted, and the domain is if requested.
+ *
+ * It's not supported to specify a service name without a type, or no domain name.
+ */
+
+ if (!domain)
+ return -EINVAL;
+
+ if (type) {
+ if (convert_idna) {
+ r = dns_name_apply_idna(domain, &buf);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ domain = buf;
+ }
+
+ r = dns_service_join(service, type, domain, &joined);
+ if (r < 0)
+ return r;
+
+ name = joined;
+ } else {
+ if (service)
+ return -EINVAL;
+
+ name = domain;
+ }
+
+ q = dns_question_new(1 + with_txt);
+ if (!q)
+ return -ENOMEM;
+
+ key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_SRV, name);
+ if (!key)
+ return -ENOMEM;
+
+ r = dns_question_add(q, key);
+ if (r < 0)
+ return r;
+
+ if (with_txt) {
+ dns_resource_key_unref(key);
+ key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_TXT, name);
+ if (!key)
+ return -ENOMEM;
+
+ r = dns_question_add(q, key);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(q);
+
+ return 0;
+}
diff --git a/src/resolve/resolved-dns-question.h b/src/resolve/resolved-dns-question.h
new file mode 100644
index 0000000..f513bf0
--- /dev/null
+++ b/src/resolve/resolved-dns-question.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct DnsQuestion DnsQuestion;
+
+#include "macro.h"
+#include "resolved-dns-rr.h"
+
+/* A simple array of resource keys */
+
+struct DnsQuestion {
+ unsigned n_ref;
+ size_t n_keys, n_allocated;
+ DnsResourceKey* keys[0];
+};
+
+DnsQuestion *dns_question_new(size_t n);
+DnsQuestion *dns_question_ref(DnsQuestion *q);
+DnsQuestion *dns_question_unref(DnsQuestion *q);
+
+int dns_question_new_address(DnsQuestion **ret, int family, const char *name, bool convert_idna);
+int dns_question_new_reverse(DnsQuestion **ret, int family, const union in_addr_union *a);
+int dns_question_new_service(DnsQuestion **ret, const char *service, const char *type, const char *domain, bool with_txt, bool convert_idna);
+
+int dns_question_add(DnsQuestion *q, DnsResourceKey *key);
+
+int dns_question_matches_rr(DnsQuestion *q, DnsResourceRecord *rr, const char *search_domain);
+int dns_question_matches_cname_or_dname(DnsQuestion *q, DnsResourceRecord *rr, const char* search_domain);
+int dns_question_is_valid_for_query(DnsQuestion *q);
+int dns_question_contains(DnsQuestion *a, const DnsResourceKey *k);
+int dns_question_is_equal(DnsQuestion *a, DnsQuestion *b);
+
+int dns_question_cname_redirect(DnsQuestion *q, const DnsResourceRecord *cname, DnsQuestion **ret);
+
+const char *dns_question_first_name(DnsQuestion *q);
+
+static inline size_t dns_question_size(DnsQuestion *q) {
+ return q ? q->n_keys : 0;
+}
+
+static inline bool dns_question_isempty(DnsQuestion *q) {
+ return dns_question_size(q) <= 0;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsQuestion*, dns_question_unref);
+
+#define _DNS_QUESTION_FOREACH(u, key, q) \
+ for (size_t UNIQ_T(i, u) = ({ \
+ (key) = ((q) && (q)->n_keys > 0) ? (q)->keys[0] : NULL; \
+ 0; \
+ }); \
+ (q) && (UNIQ_T(i, u) < (q)->n_keys); \
+ UNIQ_T(i, u)++, (key) = (UNIQ_T(i, u) < (q)->n_keys ? (q)->keys[UNIQ_T(i, u)] : NULL))
+
+#define DNS_QUESTION_FOREACH(key, q) _DNS_QUESTION_FOREACH(UNIQ, key, q)
diff --git a/src/resolve/resolved-dns-rr.c b/src/resolve/resolved-dns-rr.c
new file mode 100644
index 0000000..a1dffb0
--- /dev/null
+++ b/src/resolve/resolved-dns-rr.c
@@ -0,0 +1,1819 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <math.h>
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "dns-type.h"
+#include "escape.h"
+#include "hexdecoct.h"
+#include "resolved-dns-dnssec.h"
+#include "resolved-dns-packet.h"
+#include "resolved-dns-rr.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+
+DnsResourceKey* dns_resource_key_new(uint16_t class, uint16_t type, const char *name) {
+ DnsResourceKey *k;
+ size_t l;
+
+ assert(name);
+
+ l = strlen(name);
+ k = malloc0(sizeof(DnsResourceKey) + l + 1);
+ if (!k)
+ return NULL;
+
+ k->n_ref = 1;
+ k->class = class;
+ k->type = type;
+
+ strcpy((char*) k + sizeof(DnsResourceKey), name);
+
+ return k;
+}
+
+DnsResourceKey* dns_resource_key_new_redirect(const DnsResourceKey *key, const DnsResourceRecord *cname) {
+ int r;
+
+ assert(key);
+ assert(cname);
+
+ assert(IN_SET(cname->key->type, DNS_TYPE_CNAME, DNS_TYPE_DNAME));
+
+ if (cname->key->type == DNS_TYPE_CNAME)
+ return dns_resource_key_new(key->class, key->type, cname->cname.name);
+ else {
+ DnsResourceKey *k;
+ char *destination = NULL;
+
+ r = dns_name_change_suffix(dns_resource_key_name(key), dns_resource_key_name(cname->key), cname->dname.name, &destination);
+ if (r < 0)
+ return NULL;
+ if (r == 0)
+ return dns_resource_key_ref((DnsResourceKey*) key);
+
+ k = dns_resource_key_new_consume(key->class, key->type, destination);
+ if (!k)
+ return mfree(destination);
+
+ return k;
+ }
+}
+
+int dns_resource_key_new_append_suffix(DnsResourceKey **ret, DnsResourceKey *key, char *name) {
+ DnsResourceKey *new_key;
+ char *joined;
+ int r;
+
+ assert(ret);
+ assert(key);
+ assert(name);
+
+ if (dns_name_is_root(name)) {
+ *ret = dns_resource_key_ref(key);
+ return 0;
+ }
+
+ r = dns_name_concat(dns_resource_key_name(key), name, 0, &joined);
+ if (r < 0)
+ return r;
+
+ new_key = dns_resource_key_new_consume(key->class, key->type, joined);
+ if (!new_key) {
+ free(joined);
+ return -ENOMEM;
+ }
+
+ *ret = new_key;
+ return 0;
+}
+
+DnsResourceKey* dns_resource_key_new_consume(uint16_t class, uint16_t type, char *name) {
+ DnsResourceKey *k;
+
+ assert(name);
+
+ k = new0(DnsResourceKey, 1);
+ if (!k)
+ return NULL;
+
+ k->n_ref = 1;
+ k->class = class;
+ k->type = type;
+ k->_name = name;
+
+ return k;
+}
+
+DnsResourceKey* dns_resource_key_ref(DnsResourceKey *k) {
+
+ if (!k)
+ return NULL;
+
+ /* Static/const keys created with DNS_RESOURCE_KEY_CONST will
+ * set this to -1, they should not be reffed/unreffed */
+ assert(k->n_ref != (unsigned) -1);
+
+ assert(k->n_ref > 0);
+ k->n_ref++;
+
+ return k;
+}
+
+DnsResourceKey* dns_resource_key_unref(DnsResourceKey *k) {
+ if (!k)
+ return NULL;
+
+ assert(k->n_ref != (unsigned) -1);
+ assert(k->n_ref > 0);
+
+ if (k->n_ref == 1) {
+ free(k->_name);
+ free(k);
+ } else
+ k->n_ref--;
+
+ return NULL;
+}
+
+const char* dns_resource_key_name(const DnsResourceKey *key) {
+ const char *name;
+
+ if (!key)
+ return NULL;
+
+ if (key->_name)
+ name = key->_name;
+ else
+ name = (char*) key + sizeof(DnsResourceKey);
+
+ if (dns_name_is_root(name))
+ return ".";
+ else
+ return name;
+}
+
+bool dns_resource_key_is_address(const DnsResourceKey *key) {
+ assert(key);
+
+ /* Check if this is an A or AAAA resource key */
+
+ return key->class == DNS_CLASS_IN && IN_SET(key->type, DNS_TYPE_A, DNS_TYPE_AAAA);
+}
+
+bool dns_resource_key_is_dnssd_ptr(const DnsResourceKey *key) {
+ assert(key);
+
+ /* Check if this is a PTR resource key used in
+ Service Instance Enumeration as described in RFC6763 p4.1. */
+
+ if (key->type != DNS_TYPE_PTR)
+ return false;
+
+ return dns_name_endswith(dns_resource_key_name(key), "_tcp.local") ||
+ dns_name_endswith(dns_resource_key_name(key), "_udp.local");
+}
+
+int dns_resource_key_equal(const DnsResourceKey *a, const DnsResourceKey *b) {
+ int r;
+
+ if (a == b)
+ return 1;
+
+ r = dns_name_equal(dns_resource_key_name(a), dns_resource_key_name(b));
+ if (r <= 0)
+ return r;
+
+ if (a->class != b->class)
+ return 0;
+
+ if (a->type != b->type)
+ return 0;
+
+ return 1;
+}
+
+int dns_resource_key_match_rr(const DnsResourceKey *key, DnsResourceRecord *rr, const char *search_domain) {
+ int r;
+
+ assert(key);
+ assert(rr);
+
+ if (key == rr->key)
+ return 1;
+
+ /* Checks if an rr matches the specified key. If a search
+ * domain is specified, it will also be checked if the key
+ * with the search domain suffixed might match the RR. */
+
+ if (rr->key->class != key->class && key->class != DNS_CLASS_ANY)
+ return 0;
+
+ if (rr->key->type != key->type && key->type != DNS_TYPE_ANY)
+ return 0;
+
+ r = dns_name_equal(dns_resource_key_name(rr->key), dns_resource_key_name(key));
+ if (r != 0)
+ return r;
+
+ if (search_domain) {
+ _cleanup_free_ char *joined = NULL;
+
+ r = dns_name_concat(dns_resource_key_name(key), search_domain, 0, &joined);
+ if (r < 0)
+ return r;
+
+ return dns_name_equal(dns_resource_key_name(rr->key), joined);
+ }
+
+ return 0;
+}
+
+int dns_resource_key_match_cname_or_dname(const DnsResourceKey *key, const DnsResourceKey *cname, const char *search_domain) {
+ int r;
+
+ assert(key);
+ assert(cname);
+
+ if (cname->class != key->class && key->class != DNS_CLASS_ANY)
+ return 0;
+
+ if (cname->type == DNS_TYPE_CNAME)
+ r = dns_name_equal(dns_resource_key_name(key), dns_resource_key_name(cname));
+ else if (cname->type == DNS_TYPE_DNAME)
+ r = dns_name_endswith(dns_resource_key_name(key), dns_resource_key_name(cname));
+ else
+ return 0;
+
+ if (r != 0)
+ return r;
+
+ if (search_domain) {
+ _cleanup_free_ char *joined = NULL;
+
+ r = dns_name_concat(dns_resource_key_name(key), search_domain, 0, &joined);
+ if (r < 0)
+ return r;
+
+ if (cname->type == DNS_TYPE_CNAME)
+ return dns_name_equal(joined, dns_resource_key_name(cname));
+ else if (cname->type == DNS_TYPE_DNAME)
+ return dns_name_endswith(joined, dns_resource_key_name(cname));
+ }
+
+ return 0;
+}
+
+int dns_resource_key_match_soa(const DnsResourceKey *key, const DnsResourceKey *soa) {
+ assert(soa);
+ assert(key);
+
+ /* Checks whether 'soa' is a SOA record for the specified key. */
+
+ if (soa->class != key->class)
+ return 0;
+
+ if (soa->type != DNS_TYPE_SOA)
+ return 0;
+
+ return dns_name_endswith(dns_resource_key_name(key), dns_resource_key_name(soa));
+}
+
+static void dns_resource_key_hash_func(const DnsResourceKey *k, struct siphash *state) {
+ assert(k);
+
+ dns_name_hash_func(dns_resource_key_name(k), state);
+ siphash24_compress(&k->class, sizeof(k->class), state);
+ siphash24_compress(&k->type, sizeof(k->type), state);
+}
+
+static int dns_resource_key_compare_func(const DnsResourceKey *x, const DnsResourceKey *y) {
+ int ret;
+
+ ret = dns_name_compare_func(dns_resource_key_name(x), dns_resource_key_name(y));
+ if (ret != 0)
+ return ret;
+
+ ret = CMP(x->type, y->type);
+ if (ret != 0)
+ return ret;
+
+ ret = CMP(x->class, y->class);
+ if (ret != 0)
+ return ret;
+
+ return 0;
+}
+
+DEFINE_HASH_OPS(dns_resource_key_hash_ops, DnsResourceKey, dns_resource_key_hash_func, dns_resource_key_compare_func);
+
+char* dns_resource_key_to_string(const DnsResourceKey *key, char *buf, size_t buf_size) {
+ const char *c, *t;
+ char *ans = buf;
+
+ /* If we cannot convert the CLASS/TYPE into a known string,
+ use the format recommended by RFC 3597, Section 5. */
+
+ c = dns_class_to_string(key->class);
+ t = dns_type_to_string(key->type);
+
+ snprintf(buf, buf_size, "%s %s%s%.0u %s%s%.0u",
+ dns_resource_key_name(key),
+ strempty(c), c ? "" : "CLASS", c ? 0 : key->class,
+ strempty(t), t ? "" : "TYPE", t ? 0 : key->type);
+
+ return ans;
+}
+
+bool dns_resource_key_reduce(DnsResourceKey **a, DnsResourceKey **b) {
+ assert(a);
+ assert(b);
+
+ /* Try to replace one RR key by another if they are identical, thus saving a bit of memory. Note that we do
+ * this only for RR keys, not for RRs themselves, as they carry a lot of additional metadata (where they come
+ * from, validity data, and suchlike), and cannot be replaced so easily by other RRs that have the same
+ * superficial data. */
+
+ if (!*a)
+ return false;
+ if (!*b)
+ return false;
+
+ /* We refuse merging const keys */
+ if ((*a)->n_ref == (unsigned) -1)
+ return false;
+ if ((*b)->n_ref == (unsigned) -1)
+ return false;
+
+ /* Already the same? */
+ if (*a == *b)
+ return true;
+
+ /* Are they really identical? */
+ if (dns_resource_key_equal(*a, *b) <= 0)
+ return false;
+
+ /* Keep the one which already has more references. */
+ if ((*a)->n_ref > (*b)->n_ref) {
+ dns_resource_key_unref(*b);
+ *b = dns_resource_key_ref(*a);
+ } else {
+ dns_resource_key_unref(*a);
+ *a = dns_resource_key_ref(*b);
+ }
+
+ return true;
+}
+
+DnsResourceRecord* dns_resource_record_new(DnsResourceKey *key) {
+ DnsResourceRecord *rr;
+
+ rr = new0(DnsResourceRecord, 1);
+ if (!rr)
+ return NULL;
+
+ rr->n_ref = 1;
+ rr->key = dns_resource_key_ref(key);
+ rr->expiry = USEC_INFINITY;
+ rr->n_skip_labels_signer = rr->n_skip_labels_source = (unsigned) -1;
+
+ return rr;
+}
+
+DnsResourceRecord* dns_resource_record_new_full(uint16_t class, uint16_t type, const char *name) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+
+ key = dns_resource_key_new(class, type, name);
+ if (!key)
+ return NULL;
+
+ return dns_resource_record_new(key);
+}
+
+static DnsResourceRecord* dns_resource_record_free(DnsResourceRecord *rr) {
+ assert(rr);
+
+ if (rr->key) {
+ switch(rr->key->type) {
+
+ case DNS_TYPE_SRV:
+ free(rr->srv.name);
+ break;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ free(rr->ptr.name);
+ break;
+
+ case DNS_TYPE_HINFO:
+ free(rr->hinfo.cpu);
+ free(rr->hinfo.os);
+ break;
+
+ case DNS_TYPE_TXT:
+ case DNS_TYPE_SPF:
+ dns_txt_item_free_all(rr->txt.items);
+ break;
+
+ case DNS_TYPE_SOA:
+ free(rr->soa.mname);
+ free(rr->soa.rname);
+ break;
+
+ case DNS_TYPE_MX:
+ free(rr->mx.exchange);
+ break;
+
+ case DNS_TYPE_DS:
+ free(rr->ds.digest);
+ break;
+
+ case DNS_TYPE_SSHFP:
+ free(rr->sshfp.fingerprint);
+ break;
+
+ case DNS_TYPE_DNSKEY:
+ free(rr->dnskey.key);
+ break;
+
+ case DNS_TYPE_RRSIG:
+ free(rr->rrsig.signer);
+ free(rr->rrsig.signature);
+ break;
+
+ case DNS_TYPE_NSEC:
+ free(rr->nsec.next_domain_name);
+ bitmap_free(rr->nsec.types);
+ break;
+
+ case DNS_TYPE_NSEC3:
+ free(rr->nsec3.next_hashed_name);
+ free(rr->nsec3.salt);
+ bitmap_free(rr->nsec3.types);
+ break;
+
+ case DNS_TYPE_LOC:
+ case DNS_TYPE_A:
+ case DNS_TYPE_AAAA:
+ break;
+
+ case DNS_TYPE_TLSA:
+ free(rr->tlsa.data);
+ break;
+
+ case DNS_TYPE_CAA:
+ free(rr->caa.tag);
+ free(rr->caa.value);
+ break;
+
+ case DNS_TYPE_OPENPGPKEY:
+ default:
+ if (!rr->unparseable)
+ free(rr->generic.data);
+ }
+
+ if (rr->unparseable)
+ free(rr->generic.data);
+
+ free(rr->wire_format);
+ dns_resource_key_unref(rr->key);
+ }
+
+ free(rr->to_string);
+ return mfree(rr);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsResourceRecord, dns_resource_record, dns_resource_record_free);
+
+int dns_resource_record_new_reverse(DnsResourceRecord **ret, int family, const union in_addr_union *address, const char *hostname) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ _cleanup_free_ char *ptr = NULL;
+ int r;
+
+ assert(ret);
+ assert(address);
+ assert(hostname);
+
+ r = dns_name_reverse(family, address, &ptr);
+ if (r < 0)
+ return r;
+
+ key = dns_resource_key_new_consume(DNS_CLASS_IN, DNS_TYPE_PTR, ptr);
+ if (!key)
+ return -ENOMEM;
+
+ ptr = NULL;
+
+ rr = dns_resource_record_new(key);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->ptr.name = strdup(hostname);
+ if (!rr->ptr.name)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(rr);
+
+ return 0;
+}
+
+int dns_resource_record_new_address(DnsResourceRecord **ret, int family, const union in_addr_union *address, const char *name) {
+ DnsResourceRecord *rr;
+
+ assert(ret);
+ assert(address);
+ assert(family);
+
+ if (family == AF_INET) {
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_A, name);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->a.in_addr = address->in;
+
+ } else if (family == AF_INET6) {
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_AAAA, name);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->aaaa.in6_addr = address->in6;
+ } else
+ return -EAFNOSUPPORT;
+
+ *ret = rr;
+
+ return 0;
+}
+
+#define FIELD_EQUAL(a, b, field) \
+ ((a).field ## _size == (b).field ## _size && \
+ memcmp((a).field, (b).field, (a).field ## _size) == 0)
+
+int dns_resource_record_equal(const DnsResourceRecord *a, const DnsResourceRecord *b) {
+ int r;
+
+ assert(a);
+ assert(b);
+
+ if (a == b)
+ return 1;
+
+ r = dns_resource_key_equal(a->key, b->key);
+ if (r <= 0)
+ return r;
+
+ if (a->unparseable != b->unparseable)
+ return 0;
+
+ switch (a->unparseable ? _DNS_TYPE_INVALID : a->key->type) {
+
+ case DNS_TYPE_SRV:
+ r = dns_name_equal(a->srv.name, b->srv.name);
+ if (r <= 0)
+ return r;
+
+ return a->srv.priority == b->srv.priority &&
+ a->srv.weight == b->srv.weight &&
+ a->srv.port == b->srv.port;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ return dns_name_equal(a->ptr.name, b->ptr.name);
+
+ case DNS_TYPE_HINFO:
+ return strcaseeq(a->hinfo.cpu, b->hinfo.cpu) &&
+ strcaseeq(a->hinfo.os, b->hinfo.os);
+
+ case DNS_TYPE_SPF: /* exactly the same as TXT */
+ case DNS_TYPE_TXT:
+ return dns_txt_item_equal(a->txt.items, b->txt.items);
+
+ case DNS_TYPE_A:
+ return memcmp(&a->a.in_addr, &b->a.in_addr, sizeof(struct in_addr)) == 0;
+
+ case DNS_TYPE_AAAA:
+ return memcmp(&a->aaaa.in6_addr, &b->aaaa.in6_addr, sizeof(struct in6_addr)) == 0;
+
+ case DNS_TYPE_SOA:
+ r = dns_name_equal(a->soa.mname, b->soa.mname);
+ if (r <= 0)
+ return r;
+ r = dns_name_equal(a->soa.rname, b->soa.rname);
+ if (r <= 0)
+ return r;
+
+ return a->soa.serial == b->soa.serial &&
+ a->soa.refresh == b->soa.refresh &&
+ a->soa.retry == b->soa.retry &&
+ a->soa.expire == b->soa.expire &&
+ a->soa.minimum == b->soa.minimum;
+
+ case DNS_TYPE_MX:
+ if (a->mx.priority != b->mx.priority)
+ return 0;
+
+ return dns_name_equal(a->mx.exchange, b->mx.exchange);
+
+ case DNS_TYPE_LOC:
+ assert(a->loc.version == b->loc.version);
+
+ return a->loc.size == b->loc.size &&
+ a->loc.horiz_pre == b->loc.horiz_pre &&
+ a->loc.vert_pre == b->loc.vert_pre &&
+ a->loc.latitude == b->loc.latitude &&
+ a->loc.longitude == b->loc.longitude &&
+ a->loc.altitude == b->loc.altitude;
+
+ case DNS_TYPE_DS:
+ return a->ds.key_tag == b->ds.key_tag &&
+ a->ds.algorithm == b->ds.algorithm &&
+ a->ds.digest_type == b->ds.digest_type &&
+ FIELD_EQUAL(a->ds, b->ds, digest);
+
+ case DNS_TYPE_SSHFP:
+ return a->sshfp.algorithm == b->sshfp.algorithm &&
+ a->sshfp.fptype == b->sshfp.fptype &&
+ FIELD_EQUAL(a->sshfp, b->sshfp, fingerprint);
+
+ case DNS_TYPE_DNSKEY:
+ return a->dnskey.flags == b->dnskey.flags &&
+ a->dnskey.protocol == b->dnskey.protocol &&
+ a->dnskey.algorithm == b->dnskey.algorithm &&
+ FIELD_EQUAL(a->dnskey, b->dnskey, key);
+
+ case DNS_TYPE_RRSIG:
+ /* do the fast comparisons first */
+ return a->rrsig.type_covered == b->rrsig.type_covered &&
+ a->rrsig.algorithm == b->rrsig.algorithm &&
+ a->rrsig.labels == b->rrsig.labels &&
+ a->rrsig.original_ttl == b->rrsig.original_ttl &&
+ a->rrsig.expiration == b->rrsig.expiration &&
+ a->rrsig.inception == b->rrsig.inception &&
+ a->rrsig.key_tag == b->rrsig.key_tag &&
+ FIELD_EQUAL(a->rrsig, b->rrsig, signature) &&
+ dns_name_equal(a->rrsig.signer, b->rrsig.signer);
+
+ case DNS_TYPE_NSEC:
+ return dns_name_equal(a->nsec.next_domain_name, b->nsec.next_domain_name) &&
+ bitmap_equal(a->nsec.types, b->nsec.types);
+
+ case DNS_TYPE_NSEC3:
+ return a->nsec3.algorithm == b->nsec3.algorithm &&
+ a->nsec3.flags == b->nsec3.flags &&
+ a->nsec3.iterations == b->nsec3.iterations &&
+ FIELD_EQUAL(a->nsec3, b->nsec3, salt) &&
+ FIELD_EQUAL(a->nsec3, b->nsec3, next_hashed_name) &&
+ bitmap_equal(a->nsec3.types, b->nsec3.types);
+
+ case DNS_TYPE_TLSA:
+ return a->tlsa.cert_usage == b->tlsa.cert_usage &&
+ a->tlsa.selector == b->tlsa.selector &&
+ a->tlsa.matching_type == b->tlsa.matching_type &&
+ FIELD_EQUAL(a->tlsa, b->tlsa, data);
+
+ case DNS_TYPE_CAA:
+ return a->caa.flags == b->caa.flags &&
+ streq(a->caa.tag, b->caa.tag) &&
+ FIELD_EQUAL(a->caa, b->caa, value);
+
+ case DNS_TYPE_OPENPGPKEY:
+ default:
+ return FIELD_EQUAL(a->generic, b->generic, data);
+ }
+}
+
+static char* format_location(uint32_t latitude, uint32_t longitude, uint32_t altitude,
+ uint8_t size, uint8_t horiz_pre, uint8_t vert_pre) {
+ char *s;
+ char NS = latitude >= 1U<<31 ? 'N' : 'S';
+ char EW = longitude >= 1U<<31 ? 'E' : 'W';
+
+ int lat = latitude >= 1U<<31 ? (int) (latitude - (1U<<31)) : (int) ((1U<<31) - latitude);
+ int lon = longitude >= 1U<<31 ? (int) (longitude - (1U<<31)) : (int) ((1U<<31) - longitude);
+ double alt = altitude >= 10000000u ? altitude - 10000000u : -(double)(10000000u - altitude);
+ double siz = (size >> 4) * exp10((double) (size & 0xF));
+ double hor = (horiz_pre >> 4) * exp10((double) (horiz_pre & 0xF));
+ double ver = (vert_pre >> 4) * exp10((double) (vert_pre & 0xF));
+
+ if (asprintf(&s, "%d %d %.3f %c %d %d %.3f %c %.2fm %.2fm %.2fm %.2fm",
+ (lat / 60000 / 60),
+ (lat / 60000) % 60,
+ (lat % 60000) / 1000.,
+ NS,
+ (lon / 60000 / 60),
+ (lon / 60000) % 60,
+ (lon % 60000) / 1000.,
+ EW,
+ alt / 100.,
+ siz / 100.,
+ hor / 100.,
+ ver / 100.) < 0)
+ return NULL;
+
+ return s;
+}
+
+static int format_timestamp_dns(char *buf, size_t l, time_t sec) {
+ struct tm tm;
+
+ assert(buf);
+ assert(l > STRLEN("YYYYMMDDHHmmSS"));
+
+ if (!gmtime_r(&sec, &tm))
+ return -EINVAL;
+
+ if (strftime(buf, l, "%Y%m%d%H%M%S", &tm) <= 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static char *format_types(Bitmap *types) {
+ _cleanup_strv_free_ char **strv = NULL;
+ _cleanup_free_ char *str = NULL;
+ Iterator i;
+ unsigned type;
+ int r;
+
+ BITMAP_FOREACH(type, types, i) {
+ if (dns_type_to_string(type)) {
+ r = strv_extend(&strv, dns_type_to_string(type));
+ if (r < 0)
+ return NULL;
+ } else {
+ char *t;
+
+ r = asprintf(&t, "TYPE%u", type);
+ if (r < 0)
+ return NULL;
+
+ r = strv_consume(&strv, t);
+ if (r < 0)
+ return NULL;
+ }
+ }
+
+ str = strv_join(strv, " ");
+ if (!str)
+ return NULL;
+
+ return strjoin("( ", str, " )");
+}
+
+static char *format_txt(DnsTxtItem *first) {
+ DnsTxtItem *i;
+ size_t c = 1;
+ char *p, *s;
+
+ LIST_FOREACH(items, i, first)
+ c += i->length * 4 + 3;
+
+ p = s = new(char, c);
+ if (!s)
+ return NULL;
+
+ LIST_FOREACH(items, i, first) {
+ size_t j;
+
+ if (i != first)
+ *(p++) = ' ';
+
+ *(p++) = '"';
+
+ for (j = 0; j < i->length; j++) {
+ if (i->data[j] < ' ' || i->data[j] == '"' || i->data[j] >= 127) {
+ *(p++) = '\\';
+ *(p++) = '0' + (i->data[j] / 100);
+ *(p++) = '0' + ((i->data[j] / 10) % 10);
+ *(p++) = '0' + (i->data[j] % 10);
+ } else
+ *(p++) = i->data[j];
+ }
+
+ *(p++) = '"';
+ }
+
+ *p = 0;
+ return s;
+}
+
+const char *dns_resource_record_to_string(DnsResourceRecord *rr) {
+ _cleanup_free_ char *t = NULL;
+ char *s, k[DNS_RESOURCE_KEY_STRING_MAX];
+ int r;
+
+ assert(rr);
+
+ if (rr->to_string)
+ return rr->to_string;
+
+ dns_resource_key_to_string(rr->key, k, sizeof(k));
+
+ switch (rr->unparseable ? _DNS_TYPE_INVALID : rr->key->type) {
+
+ case DNS_TYPE_SRV:
+ r = asprintf(&s, "%s %u %u %u %s",
+ k,
+ rr->srv.priority,
+ rr->srv.weight,
+ rr->srv.port,
+ strna(rr->srv.name));
+ if (r < 0)
+ return NULL;
+ break;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ s = strjoin(k, " ", rr->ptr.name);
+ if (!s)
+ return NULL;
+
+ break;
+
+ case DNS_TYPE_HINFO:
+ s = strjoin(k, " ", rr->hinfo.cpu, " ", rr->hinfo.os);
+ if (!s)
+ return NULL;
+ break;
+
+ case DNS_TYPE_SPF: /* exactly the same as TXT */
+ case DNS_TYPE_TXT:
+ t = format_txt(rr->txt.items);
+ if (!t)
+ return NULL;
+
+ s = strjoin(k, " ", t);
+ if (!s)
+ return NULL;
+ break;
+
+ case DNS_TYPE_A: {
+ _cleanup_free_ char *x = NULL;
+
+ r = in_addr_to_string(AF_INET, (const union in_addr_union*) &rr->a.in_addr, &x);
+ if (r < 0)
+ return NULL;
+
+ s = strjoin(k, " ", x);
+ if (!s)
+ return NULL;
+ break;
+ }
+
+ case DNS_TYPE_AAAA:
+ r = in_addr_to_string(AF_INET6, (const union in_addr_union*) &rr->aaaa.in6_addr, &t);
+ if (r < 0)
+ return NULL;
+
+ s = strjoin(k, " ", t);
+ if (!s)
+ return NULL;
+ break;
+
+ case DNS_TYPE_SOA:
+ r = asprintf(&s, "%s %s %s %u %u %u %u %u",
+ k,
+ strna(rr->soa.mname),
+ strna(rr->soa.rname),
+ rr->soa.serial,
+ rr->soa.refresh,
+ rr->soa.retry,
+ rr->soa.expire,
+ rr->soa.minimum);
+ if (r < 0)
+ return NULL;
+ break;
+
+ case DNS_TYPE_MX:
+ r = asprintf(&s, "%s %u %s",
+ k,
+ rr->mx.priority,
+ rr->mx.exchange);
+ if (r < 0)
+ return NULL;
+ break;
+
+ case DNS_TYPE_LOC:
+ assert(rr->loc.version == 0);
+
+ t = format_location(rr->loc.latitude,
+ rr->loc.longitude,
+ rr->loc.altitude,
+ rr->loc.size,
+ rr->loc.horiz_pre,
+ rr->loc.vert_pre);
+ if (!t)
+ return NULL;
+
+ s = strjoin(k, " ", t);
+ if (!s)
+ return NULL;
+ break;
+
+ case DNS_TYPE_DS:
+ t = hexmem(rr->ds.digest, rr->ds.digest_size);
+ if (!t)
+ return NULL;
+
+ r = asprintf(&s, "%s %u %u %u %s",
+ k,
+ rr->ds.key_tag,
+ rr->ds.algorithm,
+ rr->ds.digest_type,
+ t);
+ if (r < 0)
+ return NULL;
+ break;
+
+ case DNS_TYPE_SSHFP:
+ t = hexmem(rr->sshfp.fingerprint, rr->sshfp.fingerprint_size);
+ if (!t)
+ return NULL;
+
+ r = asprintf(&s, "%s %u %u %s",
+ k,
+ rr->sshfp.algorithm,
+ rr->sshfp.fptype,
+ t);
+ if (r < 0)
+ return NULL;
+ break;
+
+ case DNS_TYPE_DNSKEY: {
+ _cleanup_free_ char *alg = NULL;
+ char *ss;
+ int n;
+ uint16_t key_tag;
+
+ key_tag = dnssec_keytag(rr, true);
+
+ r = dnssec_algorithm_to_string_alloc(rr->dnskey.algorithm, &alg);
+ if (r < 0)
+ return NULL;
+
+ r = asprintf(&s, "%s %u %u %s %n",
+ k,
+ rr->dnskey.flags,
+ rr->dnskey.protocol,
+ alg,
+ &n);
+ if (r < 0)
+ return NULL;
+
+ r = base64_append(&s, n,
+ rr->dnskey.key, rr->dnskey.key_size,
+ 8, columns());
+ if (r < 0)
+ return NULL;
+
+ r = asprintf(&ss, "%s\n"
+ " -- Flags:%s%s%s\n"
+ " -- Key tag: %u",
+ s,
+ rr->dnskey.flags & DNSKEY_FLAG_SEP ? " SEP" : "",
+ rr->dnskey.flags & DNSKEY_FLAG_REVOKE ? " REVOKE" : "",
+ rr->dnskey.flags & DNSKEY_FLAG_ZONE_KEY ? " ZONE_KEY" : "",
+ key_tag);
+ if (r < 0)
+ return NULL;
+ free(s);
+ s = ss;
+
+ break;
+ }
+
+ case DNS_TYPE_RRSIG: {
+ _cleanup_free_ char *alg = NULL;
+ char expiration[STRLEN("YYYYMMDDHHmmSS") + 1], inception[STRLEN("YYYYMMDDHHmmSS") + 1];
+ const char *type;
+ int n;
+
+ type = dns_type_to_string(rr->rrsig.type_covered);
+
+ r = dnssec_algorithm_to_string_alloc(rr->rrsig.algorithm, &alg);
+ if (r < 0)
+ return NULL;
+
+ r = format_timestamp_dns(expiration, sizeof(expiration), rr->rrsig.expiration);
+ if (r < 0)
+ return NULL;
+
+ r = format_timestamp_dns(inception, sizeof(inception), rr->rrsig.inception);
+ if (r < 0)
+ return NULL;
+
+ /* TYPE?? follows
+ * http://tools.ietf.org/html/rfc3597#section-5 */
+
+ r = asprintf(&s, "%s %s%.*u %s %u %u %s %s %u %s %n",
+ k,
+ type ?: "TYPE",
+ type ? 0 : 1, type ? 0u : (unsigned) rr->rrsig.type_covered,
+ alg,
+ rr->rrsig.labels,
+ rr->rrsig.original_ttl,
+ expiration,
+ inception,
+ rr->rrsig.key_tag,
+ rr->rrsig.signer,
+ &n);
+ if (r < 0)
+ return NULL;
+
+ r = base64_append(&s, n,
+ rr->rrsig.signature, rr->rrsig.signature_size,
+ 8, columns());
+ if (r < 0)
+ return NULL;
+
+ break;
+ }
+
+ case DNS_TYPE_NSEC:
+ t = format_types(rr->nsec.types);
+ if (!t)
+ return NULL;
+
+ r = asprintf(&s, "%s %s %s",
+ k,
+ rr->nsec.next_domain_name,
+ t);
+ if (r < 0)
+ return NULL;
+ break;
+
+ case DNS_TYPE_NSEC3: {
+ _cleanup_free_ char *salt = NULL, *hash = NULL;
+
+ if (rr->nsec3.salt_size > 0) {
+ salt = hexmem(rr->nsec3.salt, rr->nsec3.salt_size);
+ if (!salt)
+ return NULL;
+ }
+
+ hash = base32hexmem(rr->nsec3.next_hashed_name, rr->nsec3.next_hashed_name_size, false);
+ if (!hash)
+ return NULL;
+
+ t = format_types(rr->nsec3.types);
+ if (!t)
+ return NULL;
+
+ r = asprintf(&s, "%s %"PRIu8" %"PRIu8" %"PRIu16" %s %s %s",
+ k,
+ rr->nsec3.algorithm,
+ rr->nsec3.flags,
+ rr->nsec3.iterations,
+ rr->nsec3.salt_size > 0 ? salt : "-",
+ hash,
+ t);
+ if (r < 0)
+ return NULL;
+
+ break;
+ }
+
+ case DNS_TYPE_TLSA: {
+ const char *cert_usage, *selector, *matching_type;
+
+ cert_usage = tlsa_cert_usage_to_string(rr->tlsa.cert_usage);
+ selector = tlsa_selector_to_string(rr->tlsa.selector);
+ matching_type = tlsa_matching_type_to_string(rr->tlsa.matching_type);
+
+ t = hexmem(rr->sshfp.fingerprint, rr->sshfp.fingerprint_size);
+ if (!t)
+ return NULL;
+
+ r = asprintf(&s,
+ "%s %u %u %u %s\n"
+ " -- Cert. usage: %s\n"
+ " -- Selector: %s\n"
+ " -- Matching type: %s",
+ k,
+ rr->tlsa.cert_usage,
+ rr->tlsa.selector,
+ rr->tlsa.matching_type,
+ t,
+ cert_usage,
+ selector,
+ matching_type);
+ if (r < 0)
+ return NULL;
+
+ break;
+ }
+
+ case DNS_TYPE_CAA: {
+ _cleanup_free_ char *value;
+
+ value = octescape(rr->caa.value, rr->caa.value_size);
+ if (!value)
+ return NULL;
+
+ r = asprintf(&s, "%s %u %s \"%s\"%s%s%s%.0u",
+ k,
+ rr->caa.flags,
+ rr->caa.tag,
+ value,
+ rr->caa.flags ? "\n -- Flags:" : "",
+ rr->caa.flags & CAA_FLAG_CRITICAL ? " critical" : "",
+ rr->caa.flags & ~CAA_FLAG_CRITICAL ? " " : "",
+ rr->caa.flags & ~CAA_FLAG_CRITICAL);
+ if (r < 0)
+ return NULL;
+
+ break;
+ }
+
+ case DNS_TYPE_OPENPGPKEY: {
+ int n;
+
+ r = asprintf(&s, "%s %n",
+ k,
+ &n);
+ if (r < 0)
+ return NULL;
+
+ r = base64_append(&s, n,
+ rr->generic.data, rr->generic.data_size,
+ 8, columns());
+ if (r < 0)
+ return NULL;
+ break;
+ }
+
+ default:
+ t = hexmem(rr->generic.data, rr->generic.data_size);
+ if (!t)
+ return NULL;
+
+ /* Format as documented in RFC 3597, Section 5 */
+ r = asprintf(&s, "%s \\# %zu %s", k, rr->generic.data_size, t);
+ if (r < 0)
+ return NULL;
+ break;
+ }
+
+ rr->to_string = s;
+ return s;
+}
+
+ssize_t dns_resource_record_payload(DnsResourceRecord *rr, void **out) {
+ assert(rr);
+ assert(out);
+
+ switch(rr->unparseable ? _DNS_TYPE_INVALID : rr->key->type) {
+ case DNS_TYPE_SRV:
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ case DNS_TYPE_HINFO:
+ case DNS_TYPE_SPF:
+ case DNS_TYPE_TXT:
+ case DNS_TYPE_A:
+ case DNS_TYPE_AAAA:
+ case DNS_TYPE_SOA:
+ case DNS_TYPE_MX:
+ case DNS_TYPE_LOC:
+ case DNS_TYPE_DS:
+ case DNS_TYPE_DNSKEY:
+ case DNS_TYPE_RRSIG:
+ case DNS_TYPE_NSEC:
+ case DNS_TYPE_NSEC3:
+ return -EINVAL;
+
+ case DNS_TYPE_SSHFP:
+ *out = rr->sshfp.fingerprint;
+ return rr->sshfp.fingerprint_size;
+
+ case DNS_TYPE_TLSA:
+ *out = rr->tlsa.data;
+ return rr->tlsa.data_size;
+
+ case DNS_TYPE_OPENPGPKEY:
+ default:
+ *out = rr->generic.data;
+ return rr->generic.data_size;
+ }
+}
+
+int dns_resource_record_to_wire_format(DnsResourceRecord *rr, bool canonical) {
+
+ DnsPacket packet = {
+ .n_ref = 1,
+ .protocol = DNS_PROTOCOL_DNS,
+ .on_stack = true,
+ .refuse_compression = true,
+ .canonical_form = canonical,
+ };
+
+ size_t start, rds;
+ int r;
+
+ assert(rr);
+
+ /* Generates the RR in wire-format, optionally in the
+ * canonical form as discussed in the DNSSEC RFC 4034, Section
+ * 6.2. We allocate a throw-away DnsPacket object on the stack
+ * here, because we need some book-keeping for memory
+ * management, and can reuse the DnsPacket serializer, that
+ * can generate the canonical form, too, but also knows label
+ * compression and suchlike. */
+
+ if (rr->wire_format && rr->wire_format_canonical == canonical)
+ return 0;
+
+ r = dns_packet_append_rr(&packet, rr, 0, &start, &rds);
+ if (r < 0)
+ return r;
+
+ assert(start == 0);
+ assert(packet._data);
+
+ free(rr->wire_format);
+ rr->wire_format = packet._data;
+ rr->wire_format_size = packet.size;
+ rr->wire_format_rdata_offset = rds;
+ rr->wire_format_canonical = canonical;
+
+ packet._data = NULL;
+ dns_packet_unref(&packet);
+
+ return 0;
+}
+
+int dns_resource_record_signer(DnsResourceRecord *rr, const char **ret) {
+ const char *n;
+ int r;
+
+ assert(rr);
+ assert(ret);
+
+ /* Returns the RRset's signer, if it is known. */
+
+ if (rr->n_skip_labels_signer == (unsigned) -1)
+ return -ENODATA;
+
+ n = dns_resource_key_name(rr->key);
+ r = dns_name_skip(n, rr->n_skip_labels_signer, &n);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ *ret = n;
+ return 0;
+}
+
+int dns_resource_record_source(DnsResourceRecord *rr, const char **ret) {
+ const char *n;
+ int r;
+
+ assert(rr);
+ assert(ret);
+
+ /* Returns the RRset's synthesizing source, if it is known. */
+
+ if (rr->n_skip_labels_source == (unsigned) -1)
+ return -ENODATA;
+
+ n = dns_resource_key_name(rr->key);
+ r = dns_name_skip(n, rr->n_skip_labels_source, &n);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ *ret = n;
+ return 0;
+}
+
+int dns_resource_record_is_signer(DnsResourceRecord *rr, const char *zone) {
+ const char *signer;
+ int r;
+
+ assert(rr);
+
+ r = dns_resource_record_signer(rr, &signer);
+ if (r < 0)
+ return r;
+
+ return dns_name_equal(zone, signer);
+}
+
+int dns_resource_record_is_synthetic(DnsResourceRecord *rr) {
+ int r;
+
+ assert(rr);
+
+ /* Returns > 0 if the RR is generated from a wildcard, and is not the asterisk name itself */
+
+ if (rr->n_skip_labels_source == (unsigned) -1)
+ return -ENODATA;
+
+ if (rr->n_skip_labels_source == 0)
+ return 0;
+
+ if (rr->n_skip_labels_source > 1)
+ return 1;
+
+ r = dns_name_startswith(dns_resource_key_name(rr->key), "*");
+ if (r < 0)
+ return r;
+
+ return !r;
+}
+
+void dns_resource_record_hash_func(const DnsResourceRecord *rr, struct siphash *state) {
+ assert(rr);
+
+ dns_resource_key_hash_func(rr->key, state);
+
+ switch (rr->unparseable ? _DNS_TYPE_INVALID : rr->key->type) {
+
+ case DNS_TYPE_SRV:
+ siphash24_compress(&rr->srv.priority, sizeof(rr->srv.priority), state);
+ siphash24_compress(&rr->srv.weight, sizeof(rr->srv.weight), state);
+ siphash24_compress(&rr->srv.port, sizeof(rr->srv.port), state);
+ dns_name_hash_func(rr->srv.name, state);
+ break;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ dns_name_hash_func(rr->ptr.name, state);
+ break;
+
+ case DNS_TYPE_HINFO:
+ string_hash_func(rr->hinfo.cpu, state);
+ string_hash_func(rr->hinfo.os, state);
+ break;
+
+ case DNS_TYPE_TXT:
+ case DNS_TYPE_SPF: {
+ DnsTxtItem *j;
+
+ LIST_FOREACH(items, j, rr->txt.items) {
+ siphash24_compress(j->data, j->length, state);
+
+ /* Add an extra NUL byte, so that "a" followed by "b" doesn't result in the same hash as "ab"
+ * followed by "". */
+ siphash24_compress_byte(0, state);
+ }
+ break;
+ }
+
+ case DNS_TYPE_A:
+ siphash24_compress(&rr->a.in_addr, sizeof(rr->a.in_addr), state);
+ break;
+
+ case DNS_TYPE_AAAA:
+ siphash24_compress(&rr->aaaa.in6_addr, sizeof(rr->aaaa.in6_addr), state);
+ break;
+
+ case DNS_TYPE_SOA:
+ dns_name_hash_func(rr->soa.mname, state);
+ dns_name_hash_func(rr->soa.rname, state);
+ siphash24_compress(&rr->soa.serial, sizeof(rr->soa.serial), state);
+ siphash24_compress(&rr->soa.refresh, sizeof(rr->soa.refresh), state);
+ siphash24_compress(&rr->soa.retry, sizeof(rr->soa.retry), state);
+ siphash24_compress(&rr->soa.expire, sizeof(rr->soa.expire), state);
+ siphash24_compress(&rr->soa.minimum, sizeof(rr->soa.minimum), state);
+ break;
+
+ case DNS_TYPE_MX:
+ siphash24_compress(&rr->mx.priority, sizeof(rr->mx.priority), state);
+ dns_name_hash_func(rr->mx.exchange, state);
+ break;
+
+ case DNS_TYPE_LOC:
+ siphash24_compress(&rr->loc.version, sizeof(rr->loc.version), state);
+ siphash24_compress(&rr->loc.size, sizeof(rr->loc.size), state);
+ siphash24_compress(&rr->loc.horiz_pre, sizeof(rr->loc.horiz_pre), state);
+ siphash24_compress(&rr->loc.vert_pre, sizeof(rr->loc.vert_pre), state);
+ siphash24_compress(&rr->loc.latitude, sizeof(rr->loc.latitude), state);
+ siphash24_compress(&rr->loc.longitude, sizeof(rr->loc.longitude), state);
+ siphash24_compress(&rr->loc.altitude, sizeof(rr->loc.altitude), state);
+ break;
+
+ case DNS_TYPE_SSHFP:
+ siphash24_compress(&rr->sshfp.algorithm, sizeof(rr->sshfp.algorithm), state);
+ siphash24_compress(&rr->sshfp.fptype, sizeof(rr->sshfp.fptype), state);
+ siphash24_compress(rr->sshfp.fingerprint, rr->sshfp.fingerprint_size, state);
+ break;
+
+ case DNS_TYPE_DNSKEY:
+ siphash24_compress(&rr->dnskey.flags, sizeof(rr->dnskey.flags), state);
+ siphash24_compress(&rr->dnskey.protocol, sizeof(rr->dnskey.protocol), state);
+ siphash24_compress(&rr->dnskey.algorithm, sizeof(rr->dnskey.algorithm), state);
+ siphash24_compress(rr->dnskey.key, rr->dnskey.key_size, state);
+ break;
+
+ case DNS_TYPE_RRSIG:
+ siphash24_compress(&rr->rrsig.type_covered, sizeof(rr->rrsig.type_covered), state);
+ siphash24_compress(&rr->rrsig.algorithm, sizeof(rr->rrsig.algorithm), state);
+ siphash24_compress(&rr->rrsig.labels, sizeof(rr->rrsig.labels), state);
+ siphash24_compress(&rr->rrsig.original_ttl, sizeof(rr->rrsig.original_ttl), state);
+ siphash24_compress(&rr->rrsig.expiration, sizeof(rr->rrsig.expiration), state);
+ siphash24_compress(&rr->rrsig.inception, sizeof(rr->rrsig.inception), state);
+ siphash24_compress(&rr->rrsig.key_tag, sizeof(rr->rrsig.key_tag), state);
+ dns_name_hash_func(rr->rrsig.signer, state);
+ siphash24_compress(rr->rrsig.signature, rr->rrsig.signature_size, state);
+ break;
+
+ case DNS_TYPE_NSEC:
+ dns_name_hash_func(rr->nsec.next_domain_name, state);
+ /* FIXME: we leave out the type bitmap here. Hash
+ * would be better if we'd take it into account
+ * too. */
+ break;
+
+ case DNS_TYPE_DS:
+ siphash24_compress(&rr->ds.key_tag, sizeof(rr->ds.key_tag), state);
+ siphash24_compress(&rr->ds.algorithm, sizeof(rr->ds.algorithm), state);
+ siphash24_compress(&rr->ds.digest_type, sizeof(rr->ds.digest_type), state);
+ siphash24_compress(rr->ds.digest, rr->ds.digest_size, state);
+ break;
+
+ case DNS_TYPE_NSEC3:
+ siphash24_compress(&rr->nsec3.algorithm, sizeof(rr->nsec3.algorithm), state);
+ siphash24_compress(&rr->nsec3.flags, sizeof(rr->nsec3.flags), state);
+ siphash24_compress(&rr->nsec3.iterations, sizeof(rr->nsec3.iterations), state);
+ siphash24_compress(rr->nsec3.salt, rr->nsec3.salt_size, state);
+ siphash24_compress(rr->nsec3.next_hashed_name, rr->nsec3.next_hashed_name_size, state);
+ /* FIXME: We leave the bitmaps out */
+ break;
+
+ case DNS_TYPE_TLSA:
+ siphash24_compress(&rr->tlsa.cert_usage, sizeof(rr->tlsa.cert_usage), state);
+ siphash24_compress(&rr->tlsa.selector, sizeof(rr->tlsa.selector), state);
+ siphash24_compress(&rr->tlsa.matching_type, sizeof(rr->tlsa.matching_type), state);
+ siphash24_compress(rr->tlsa.data, rr->tlsa.data_size, state);
+ break;
+
+ case DNS_TYPE_CAA:
+ siphash24_compress(&rr->caa.flags, sizeof(rr->caa.flags), state);
+ string_hash_func(rr->caa.tag, state);
+ siphash24_compress(rr->caa.value, rr->caa.value_size, state);
+ break;
+
+ case DNS_TYPE_OPENPGPKEY:
+ default:
+ siphash24_compress(rr->generic.data, rr->generic.data_size, state);
+ break;
+ }
+}
+
+static int dns_resource_record_compare_func(const DnsResourceRecord *x, const DnsResourceRecord *y) {
+ int r;
+
+ r = dns_resource_key_compare_func(x->key, y->key);
+ if (r != 0)
+ return r;
+
+ if (dns_resource_record_equal(x, y))
+ return 0;
+
+ /* We still use CMP() here, even though don't implement proper
+ * ordering, since the hashtable doesn't need ordering anyway. */
+ return CMP(x, y);
+}
+
+DEFINE_HASH_OPS(dns_resource_record_hash_ops, DnsResourceRecord, dns_resource_record_hash_func, dns_resource_record_compare_func);
+
+DnsResourceRecord *dns_resource_record_copy(DnsResourceRecord *rr) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *copy = NULL;
+ DnsResourceRecord *t;
+
+ assert(rr);
+
+ copy = dns_resource_record_new(rr->key);
+ if (!copy)
+ return NULL;
+
+ copy->ttl = rr->ttl;
+ copy->expiry = rr->expiry;
+ copy->n_skip_labels_signer = rr->n_skip_labels_signer;
+ copy->n_skip_labels_source = rr->n_skip_labels_source;
+ copy->unparseable = rr->unparseable;
+
+ switch (rr->unparseable ? _DNS_TYPE_INVALID : rr->key->type) {
+
+ case DNS_TYPE_SRV:
+ copy->srv.priority = rr->srv.priority;
+ copy->srv.weight = rr->srv.weight;
+ copy->srv.port = rr->srv.port;
+ copy->srv.name = strdup(rr->srv.name);
+ if (!copy->srv.name)
+ return NULL;
+ break;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ copy->ptr.name = strdup(rr->ptr.name);
+ if (!copy->ptr.name)
+ return NULL;
+ break;
+
+ case DNS_TYPE_HINFO:
+ copy->hinfo.cpu = strdup(rr->hinfo.cpu);
+ if (!copy->hinfo.cpu)
+ return NULL;
+
+ copy->hinfo.os = strdup(rr->hinfo.os);
+ if (!copy->hinfo.os)
+ return NULL;
+ break;
+
+ case DNS_TYPE_TXT:
+ case DNS_TYPE_SPF:
+ copy->txt.items = dns_txt_item_copy(rr->txt.items);
+ if (!copy->txt.items)
+ return NULL;
+ break;
+
+ case DNS_TYPE_A:
+ copy->a = rr->a;
+ break;
+
+ case DNS_TYPE_AAAA:
+ copy->aaaa = rr->aaaa;
+ break;
+
+ case DNS_TYPE_SOA:
+ copy->soa.mname = strdup(rr->soa.mname);
+ if (!copy->soa.mname)
+ return NULL;
+ copy->soa.rname = strdup(rr->soa.rname);
+ if (!copy->soa.rname)
+ return NULL;
+ copy->soa.serial = rr->soa.serial;
+ copy->soa.refresh = rr->soa.refresh;
+ copy->soa.retry = rr->soa.retry;
+ copy->soa.expire = rr->soa.expire;
+ copy->soa.minimum = rr->soa.minimum;
+ break;
+
+ case DNS_TYPE_MX:
+ copy->mx.priority = rr->mx.priority;
+ copy->mx.exchange = strdup(rr->mx.exchange);
+ if (!copy->mx.exchange)
+ return NULL;
+ break;
+
+ case DNS_TYPE_LOC:
+ copy->loc = rr->loc;
+ break;
+
+ case DNS_TYPE_SSHFP:
+ copy->sshfp.algorithm = rr->sshfp.algorithm;
+ copy->sshfp.fptype = rr->sshfp.fptype;
+ copy->sshfp.fingerprint = memdup(rr->sshfp.fingerprint, rr->sshfp.fingerprint_size);
+ if (!copy->sshfp.fingerprint)
+ return NULL;
+ copy->sshfp.fingerprint_size = rr->sshfp.fingerprint_size;
+ break;
+
+ case DNS_TYPE_DNSKEY:
+ copy->dnskey.flags = rr->dnskey.flags;
+ copy->dnskey.protocol = rr->dnskey.protocol;
+ copy->dnskey.algorithm = rr->dnskey.algorithm;
+ copy->dnskey.key = memdup(rr->dnskey.key, rr->dnskey.key_size);
+ if (!copy->dnskey.key)
+ return NULL;
+ copy->dnskey.key_size = rr->dnskey.key_size;
+ break;
+
+ case DNS_TYPE_RRSIG:
+ copy->rrsig.type_covered = rr->rrsig.type_covered;
+ copy->rrsig.algorithm = rr->rrsig.algorithm;
+ copy->rrsig.labels = rr->rrsig.labels;
+ copy->rrsig.original_ttl = rr->rrsig.original_ttl;
+ copy->rrsig.expiration = rr->rrsig.expiration;
+ copy->rrsig.inception = rr->rrsig.inception;
+ copy->rrsig.key_tag = rr->rrsig.key_tag;
+ copy->rrsig.signer = strdup(rr->rrsig.signer);
+ if (!copy->rrsig.signer)
+ return NULL;
+ copy->rrsig.signature = memdup(rr->rrsig.signature, rr->rrsig.signature_size);
+ if (!copy->rrsig.signature)
+ return NULL;
+ copy->rrsig.signature_size = rr->rrsig.signature_size;
+ break;
+
+ case DNS_TYPE_NSEC:
+ copy->nsec.next_domain_name = strdup(rr->nsec.next_domain_name);
+ if (!copy->nsec.next_domain_name)
+ return NULL;
+ copy->nsec.types = bitmap_copy(rr->nsec.types);
+ if (!copy->nsec.types)
+ return NULL;
+ break;
+
+ case DNS_TYPE_DS:
+ copy->ds.key_tag = rr->ds.key_tag;
+ copy->ds.algorithm = rr->ds.algorithm;
+ copy->ds.digest_type = rr->ds.digest_type;
+ copy->ds.digest = memdup(rr->ds.digest, rr->ds.digest_size);
+ if (!copy->ds.digest)
+ return NULL;
+ copy->ds.digest_size = rr->ds.digest_size;
+ break;
+
+ case DNS_TYPE_NSEC3:
+ copy->nsec3.algorithm = rr->nsec3.algorithm;
+ copy->nsec3.flags = rr->nsec3.flags;
+ copy->nsec3.iterations = rr->nsec3.iterations;
+ copy->nsec3.salt = memdup(rr->nsec3.salt, rr->nsec3.salt_size);
+ if (!copy->nsec3.salt)
+ return NULL;
+ copy->nsec3.salt_size = rr->nsec3.salt_size;
+ copy->nsec3.next_hashed_name = memdup(rr->nsec3.next_hashed_name, rr->nsec3.next_hashed_name_size);
+ if (!copy->nsec3.next_hashed_name_size)
+ return NULL;
+ copy->nsec3.next_hashed_name_size = rr->nsec3.next_hashed_name_size;
+ copy->nsec3.types = bitmap_copy(rr->nsec3.types);
+ if (!copy->nsec3.types)
+ return NULL;
+ break;
+
+ case DNS_TYPE_TLSA:
+ copy->tlsa.cert_usage = rr->tlsa.cert_usage;
+ copy->tlsa.selector = rr->tlsa.selector;
+ copy->tlsa.matching_type = rr->tlsa.matching_type;
+ copy->tlsa.data = memdup(rr->tlsa.data, rr->tlsa.data_size);
+ if (!copy->tlsa.data)
+ return NULL;
+ copy->tlsa.data_size = rr->tlsa.data_size;
+ break;
+
+ case DNS_TYPE_CAA:
+ copy->caa.flags = rr->caa.flags;
+ copy->caa.tag = strdup(rr->caa.tag);
+ if (!copy->caa.tag)
+ return NULL;
+ copy->caa.value = memdup(rr->caa.value, rr->caa.value_size);
+ if (!copy->caa.value)
+ return NULL;
+ copy->caa.value_size = rr->caa.value_size;
+ break;
+
+ case DNS_TYPE_OPT:
+ default:
+ copy->generic.data = memdup(rr->generic.data, rr->generic.data_size);
+ if (!copy->generic.data)
+ return NULL;
+ copy->generic.data_size = rr->generic.data_size;
+ break;
+ }
+
+ t = TAKE_PTR(copy);
+
+ return t;
+}
+
+int dns_resource_record_clamp_ttl(DnsResourceRecord **rr, uint32_t max_ttl) {
+ DnsResourceRecord *old_rr, *new_rr;
+ uint32_t new_ttl;
+
+ assert(rr);
+ old_rr = *rr;
+
+ if (old_rr->key->type == DNS_TYPE_OPT)
+ return -EINVAL;
+
+ new_ttl = MIN(old_rr->ttl, max_ttl);
+ if (new_ttl == old_rr->ttl)
+ return 0;
+
+ if (old_rr->n_ref == 1) {
+ /* Patch in place */
+ old_rr->ttl = new_ttl;
+ return 1;
+ }
+
+ new_rr = dns_resource_record_copy(old_rr);
+ if (!new_rr)
+ return -ENOMEM;
+
+ new_rr->ttl = new_ttl;
+
+ dns_resource_record_unref(*rr);
+ *rr = new_rr;
+
+ return 1;
+}
+
+DnsTxtItem *dns_txt_item_free_all(DnsTxtItem *i) {
+ DnsTxtItem *n;
+
+ if (!i)
+ return NULL;
+
+ n = i->items_next;
+
+ free(i);
+ return dns_txt_item_free_all(n);
+}
+
+bool dns_txt_item_equal(DnsTxtItem *a, DnsTxtItem *b) {
+
+ if (a == b)
+ return true;
+
+ if (!a != !b)
+ return false;
+
+ if (!a)
+ return true;
+
+ if (a->length != b->length)
+ return false;
+
+ if (memcmp(a->data, b->data, a->length) != 0)
+ return false;
+
+ return dns_txt_item_equal(a->items_next, b->items_next);
+}
+
+DnsTxtItem *dns_txt_item_copy(DnsTxtItem *first) {
+ DnsTxtItem *i, *copy = NULL, *end = NULL;
+
+ LIST_FOREACH(items, i, first) {
+ DnsTxtItem *j;
+
+ j = memdup(i, offsetof(DnsTxtItem, data) + i->length + 1);
+ if (!j) {
+ dns_txt_item_free_all(copy);
+ return NULL;
+ }
+
+ LIST_INSERT_AFTER(items, copy, end, j);
+ end = j;
+ }
+
+ return copy;
+}
+
+int dns_txt_item_new_empty(DnsTxtItem **ret) {
+ DnsTxtItem *i;
+
+ /* RFC 6763, section 6.1 suggests to treat
+ * empty TXT RRs as equivalent to a TXT record
+ * with a single empty string. */
+
+ i = malloc0(offsetof(DnsTxtItem, data) + 1); /* for safety reasons we add an extra NUL byte */
+ if (!i)
+ return -ENOMEM;
+
+ *ret = i;
+
+ return 0;
+}
+
+static const char* const dnssec_algorithm_table[_DNSSEC_ALGORITHM_MAX_DEFINED] = {
+ /* Mnemonics as listed on https://www.iana.org/assignments/dns-sec-alg-numbers/dns-sec-alg-numbers.xhtml */
+ [DNSSEC_ALGORITHM_RSAMD5] = "RSAMD5",
+ [DNSSEC_ALGORITHM_DH] = "DH",
+ [DNSSEC_ALGORITHM_DSA] = "DSA",
+ [DNSSEC_ALGORITHM_ECC] = "ECC",
+ [DNSSEC_ALGORITHM_RSASHA1] = "RSASHA1",
+ [DNSSEC_ALGORITHM_DSA_NSEC3_SHA1] = "DSA-NSEC3-SHA1",
+ [DNSSEC_ALGORITHM_RSASHA1_NSEC3_SHA1] = "RSASHA1-NSEC3-SHA1",
+ [DNSSEC_ALGORITHM_RSASHA256] = "RSASHA256",
+ [DNSSEC_ALGORITHM_RSASHA512] = "RSASHA512",
+ [DNSSEC_ALGORITHM_ECC_GOST] = "ECC-GOST",
+ [DNSSEC_ALGORITHM_ECDSAP256SHA256] = "ECDSAP256SHA256",
+ [DNSSEC_ALGORITHM_ECDSAP384SHA384] = "ECDSAP384SHA384",
+ [DNSSEC_ALGORITHM_ED25519] = "ED25519",
+ [DNSSEC_ALGORITHM_ED448] = "ED448",
+ [DNSSEC_ALGORITHM_INDIRECT] = "INDIRECT",
+ [DNSSEC_ALGORITHM_PRIVATEDNS] = "PRIVATEDNS",
+ [DNSSEC_ALGORITHM_PRIVATEOID] = "PRIVATEOID",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(dnssec_algorithm, int, 255);
+
+static const char* const dnssec_digest_table[_DNSSEC_DIGEST_MAX_DEFINED] = {
+ /* Names as listed on https://www.iana.org/assignments/ds-rr-types/ds-rr-types.xhtml */
+ [DNSSEC_DIGEST_SHA1] = "SHA-1",
+ [DNSSEC_DIGEST_SHA256] = "SHA-256",
+ [DNSSEC_DIGEST_GOST_R_34_11_94] = "GOST_R_34.11-94",
+ [DNSSEC_DIGEST_SHA384] = "SHA-384",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(dnssec_digest, int, 255);
diff --git a/src/resolve/resolved-dns-rr.h b/src/resolve/resolved-dns-rr.h
new file mode 100644
index 0000000..e02fa2d
--- /dev/null
+++ b/src/resolve/resolved-dns-rr.h
@@ -0,0 +1,338 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <netinet/in.h>
+
+#include "bitmap.h"
+#include "dns-type.h"
+#include "hashmap.h"
+#include "in-addr-util.h"
+#include "list.h"
+#include "string-util.h"
+
+typedef struct DnsResourceKey DnsResourceKey;
+typedef struct DnsResourceRecord DnsResourceRecord;
+typedef struct DnsTxtItem DnsTxtItem;
+
+/* DNSKEY RR flags */
+#define DNSKEY_FLAG_SEP (UINT16_C(1) << 0)
+#define DNSKEY_FLAG_REVOKE (UINT16_C(1) << 7)
+#define DNSKEY_FLAG_ZONE_KEY (UINT16_C(1) << 8)
+
+/* mDNS RR flags */
+#define MDNS_RR_CACHE_FLUSH (UINT16_C(1) << 15)
+
+/* DNSSEC algorithm identifiers, see
+ * http://tools.ietf.org/html/rfc4034#appendix-A.1 and
+ * https://www.iana.org/assignments/dns-sec-alg-numbers/dns-sec-alg-numbers.xhtml */
+enum {
+ DNSSEC_ALGORITHM_RSAMD5 = 1,
+ DNSSEC_ALGORITHM_DH,
+ DNSSEC_ALGORITHM_DSA,
+ DNSSEC_ALGORITHM_ECC,
+ DNSSEC_ALGORITHM_RSASHA1,
+ DNSSEC_ALGORITHM_DSA_NSEC3_SHA1,
+ DNSSEC_ALGORITHM_RSASHA1_NSEC3_SHA1,
+ DNSSEC_ALGORITHM_RSASHA256 = 8, /* RFC 5702 */
+ DNSSEC_ALGORITHM_RSASHA512 = 10, /* RFC 5702 */
+ DNSSEC_ALGORITHM_ECC_GOST = 12, /* RFC 5933 */
+ DNSSEC_ALGORITHM_ECDSAP256SHA256 = 13, /* RFC 6605 */
+ DNSSEC_ALGORITHM_ECDSAP384SHA384 = 14, /* RFC 6605 */
+ DNSSEC_ALGORITHM_ED25519 = 15, /* RFC 8080 */
+ DNSSEC_ALGORITHM_ED448 = 16, /* RFC 8080 */
+ DNSSEC_ALGORITHM_INDIRECT = 252,
+ DNSSEC_ALGORITHM_PRIVATEDNS,
+ DNSSEC_ALGORITHM_PRIVATEOID,
+ _DNSSEC_ALGORITHM_MAX_DEFINED
+};
+
+/* DNSSEC digest identifiers, see
+ * https://www.iana.org/assignments/ds-rr-types/ds-rr-types.xhtml */
+enum {
+ DNSSEC_DIGEST_SHA1 = 1,
+ DNSSEC_DIGEST_SHA256 = 2, /* RFC 4509 */
+ DNSSEC_DIGEST_GOST_R_34_11_94 = 3, /* RFC 5933 */
+ DNSSEC_DIGEST_SHA384 = 4, /* RFC 6605 */
+ _DNSSEC_DIGEST_MAX_DEFINED
+};
+
+/* DNSSEC NSEC3 hash algorithms, see
+ * https://www.iana.org/assignments/dnssec-nsec3-parameters/dnssec-nsec3-parameters.xhtml */
+enum {
+ NSEC3_ALGORITHM_SHA1 = 1,
+ _NSEC3_ALGORITHM_MAX_DEFINED
+};
+
+struct DnsResourceKey {
+ unsigned n_ref; /* (unsigned -1) for const keys, see below */
+ uint16_t class, type;
+ char *_name; /* don't access directly, use dns_resource_key_name()! */
+};
+
+/* Creates a temporary resource key. This is only useful to quickly
+ * look up something, without allocating a full DnsResourceKey object
+ * for it. Note that it is not OK to take references to this kind of
+ * resource key object. */
+#define DNS_RESOURCE_KEY_CONST(c, t, n) \
+ ((DnsResourceKey) { \
+ .n_ref = (unsigned) -1, \
+ .class = c, \
+ .type = t, \
+ ._name = (char*) n, \
+ })
+
+struct DnsTxtItem {
+ size_t length;
+ LIST_FIELDS(DnsTxtItem, items);
+ uint8_t data[];
+};
+
+struct DnsResourceRecord {
+ unsigned n_ref;
+ DnsResourceKey *key;
+
+ char *to_string;
+
+ uint32_t ttl;
+ usec_t expiry; /* RRSIG signature expiry */
+
+ /* How many labels to strip to determine "signer" of the RRSIG (aka, the zone). -1 if not signed. */
+ unsigned n_skip_labels_signer;
+ /* How many labels to strip to determine "synthesizing source" of this RR, i.e. the wildcard's immediate parent. -1 if not signed. */
+ unsigned n_skip_labels_source;
+
+ bool unparseable:1;
+
+ bool wire_format_canonical:1;
+ void *wire_format;
+ size_t wire_format_size;
+ size_t wire_format_rdata_offset;
+
+ union {
+ struct {
+ void *data;
+ size_t data_size;
+ } generic, opt;
+
+ struct {
+ uint16_t priority;
+ uint16_t weight;
+ uint16_t port;
+ char *name;
+ } srv;
+
+ struct {
+ char *name;
+ } ptr, ns, cname, dname;
+
+ struct {
+ char *cpu;
+ char *os;
+ } hinfo;
+
+ struct {
+ DnsTxtItem *items;
+ } txt, spf;
+
+ struct {
+ struct in_addr in_addr;
+ } a;
+
+ struct {
+ struct in6_addr in6_addr;
+ } aaaa;
+
+ struct {
+ char *mname;
+ char *rname;
+ uint32_t serial;
+ uint32_t refresh;
+ uint32_t retry;
+ uint32_t expire;
+ uint32_t minimum;
+ } soa;
+
+ struct {
+ uint16_t priority;
+ char *exchange;
+ } mx;
+
+ /* https://tools.ietf.org/html/rfc1876 */
+ struct {
+ uint8_t version;
+ uint8_t size;
+ uint8_t horiz_pre;
+ uint8_t vert_pre;
+ uint32_t latitude;
+ uint32_t longitude;
+ uint32_t altitude;
+ } loc;
+
+ /* https://tools.ietf.org/html/rfc4255#section-3.1 */
+ struct {
+ uint8_t algorithm;
+ uint8_t fptype;
+ void *fingerprint;
+ size_t fingerprint_size;
+ } sshfp;
+
+ /* http://tools.ietf.org/html/rfc4034#section-2.1 */
+ struct {
+ uint16_t flags;
+ uint8_t protocol;
+ uint8_t algorithm;
+ void* key;
+ size_t key_size;
+ } dnskey;
+
+ /* http://tools.ietf.org/html/rfc4034#section-3.1 */
+ struct {
+ uint16_t type_covered;
+ uint8_t algorithm;
+ uint8_t labels;
+ uint32_t original_ttl;
+ uint32_t expiration;
+ uint32_t inception;
+ uint16_t key_tag;
+ char *signer;
+ void *signature;
+ size_t signature_size;
+ } rrsig;
+
+ /* https://tools.ietf.org/html/rfc4034#section-4.1 */
+ struct {
+ char *next_domain_name;
+ Bitmap *types;
+ } nsec;
+
+ /* https://tools.ietf.org/html/rfc4034#section-5.1 */
+ struct {
+ uint16_t key_tag;
+ uint8_t algorithm;
+ uint8_t digest_type;
+ void *digest;
+ size_t digest_size;
+ } ds;
+
+ struct {
+ uint8_t algorithm;
+ uint8_t flags;
+ uint16_t iterations;
+ void *salt;
+ size_t salt_size;
+ void *next_hashed_name;
+ size_t next_hashed_name_size;
+ Bitmap *types;
+ } nsec3;
+
+ /* https://tools.ietf.org/html/draft-ietf-dane-protocol-23 */
+ struct {
+ uint8_t cert_usage;
+ uint8_t selector;
+ uint8_t matching_type;
+ void *data;
+ size_t data_size;
+ } tlsa;
+
+ /* https://tools.ietf.org/html/rfc6844 */
+ struct {
+ uint8_t flags;
+ char *tag;
+ void *value;
+ size_t value_size;
+ } caa;
+ };
+};
+
+static inline const void* DNS_RESOURCE_RECORD_RDATA(const DnsResourceRecord *rr) {
+ if (!rr)
+ return NULL;
+
+ if (!rr->wire_format)
+ return NULL;
+
+ assert(rr->wire_format_rdata_offset <= rr->wire_format_size);
+ return (uint8_t*) rr->wire_format + rr->wire_format_rdata_offset;
+}
+
+static inline size_t DNS_RESOURCE_RECORD_RDATA_SIZE(const DnsResourceRecord *rr) {
+ if (!rr)
+ return 0;
+ if (!rr->wire_format)
+ return 0;
+
+ assert(rr->wire_format_rdata_offset <= rr->wire_format_size);
+ return rr->wire_format_size - rr->wire_format_rdata_offset;
+}
+
+static inline uint8_t DNS_RESOURCE_RECORD_OPT_VERSION_SUPPORTED(const DnsResourceRecord *rr) {
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_OPT);
+
+ return ((rr->ttl >> 16) & 0xFF) == 0;
+}
+
+DnsResourceKey* dns_resource_key_new(uint16_t class, uint16_t type, const char *name);
+DnsResourceKey* dns_resource_key_new_redirect(const DnsResourceKey *key, const DnsResourceRecord *cname);
+int dns_resource_key_new_append_suffix(DnsResourceKey **ret, DnsResourceKey *key, char *name);
+DnsResourceKey* dns_resource_key_new_consume(uint16_t class, uint16_t type, char *name);
+DnsResourceKey* dns_resource_key_ref(DnsResourceKey *key);
+DnsResourceKey* dns_resource_key_unref(DnsResourceKey *key);
+const char* dns_resource_key_name(const DnsResourceKey *key);
+bool dns_resource_key_is_address(const DnsResourceKey *key);
+bool dns_resource_key_is_dnssd_ptr(const DnsResourceKey *key);
+int dns_resource_key_equal(const DnsResourceKey *a, const DnsResourceKey *b);
+int dns_resource_key_match_rr(const DnsResourceKey *key, DnsResourceRecord *rr, const char *search_domain);
+int dns_resource_key_match_cname_or_dname(const DnsResourceKey *key, const DnsResourceKey *cname, const char *search_domain);
+int dns_resource_key_match_soa(const DnsResourceKey *key, const DnsResourceKey *soa);
+
+/* _DNS_{CLASS,TYPE}_STRING_MAX include one byte for NUL, which we use for space instead below.
+ * DNS_HOSTNAME_MAX does not include the NUL byte, so we need to add 1. */
+#define DNS_RESOURCE_KEY_STRING_MAX (_DNS_CLASS_STRING_MAX + _DNS_TYPE_STRING_MAX + DNS_HOSTNAME_MAX + 1)
+
+char* dns_resource_key_to_string(const DnsResourceKey *key, char *buf, size_t buf_size);
+ssize_t dns_resource_record_payload(DnsResourceRecord *rr, void **out);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsResourceKey*, dns_resource_key_unref);
+
+static inline bool dns_key_is_shared(const DnsResourceKey *key) {
+ return IN_SET(key->type, DNS_TYPE_PTR);
+}
+
+bool dns_resource_key_reduce(DnsResourceKey **a, DnsResourceKey **b);
+
+DnsResourceRecord* dns_resource_record_new(DnsResourceKey *key);
+DnsResourceRecord* dns_resource_record_new_full(uint16_t class, uint16_t type, const char *name);
+DnsResourceRecord* dns_resource_record_ref(DnsResourceRecord *rr);
+DnsResourceRecord* dns_resource_record_unref(DnsResourceRecord *rr);
+int dns_resource_record_new_reverse(DnsResourceRecord **ret, int family, const union in_addr_union *address, const char *name);
+int dns_resource_record_new_address(DnsResourceRecord **ret, int family, const union in_addr_union *address, const char *name);
+int dns_resource_record_equal(const DnsResourceRecord *a, const DnsResourceRecord *b);
+const char* dns_resource_record_to_string(DnsResourceRecord *rr);
+DnsResourceRecord *dns_resource_record_copy(DnsResourceRecord *rr);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsResourceRecord*, dns_resource_record_unref);
+
+int dns_resource_record_to_wire_format(DnsResourceRecord *rr, bool canonical);
+
+int dns_resource_record_signer(DnsResourceRecord *rr, const char **ret);
+int dns_resource_record_source(DnsResourceRecord *rr, const char **ret);
+int dns_resource_record_is_signer(DnsResourceRecord *rr, const char *zone);
+int dns_resource_record_is_synthetic(DnsResourceRecord *rr);
+
+int dns_resource_record_clamp_ttl(DnsResourceRecord **rr, uint32_t max_ttl);
+
+DnsTxtItem *dns_txt_item_free_all(DnsTxtItem *i);
+bool dns_txt_item_equal(DnsTxtItem *a, DnsTxtItem *b);
+DnsTxtItem *dns_txt_item_copy(DnsTxtItem *i);
+int dns_txt_item_new_empty(DnsTxtItem **ret);
+
+void dns_resource_record_hash_func(const DnsResourceRecord *i, struct siphash *state);
+
+extern const struct hash_ops dns_resource_key_hash_ops;
+extern const struct hash_ops dns_resource_record_hash_ops;
+
+int dnssec_algorithm_to_string_alloc(int i, char **ret);
+int dnssec_algorithm_from_string(const char *s) _pure_;
+
+int dnssec_digest_to_string_alloc(int i, char **ret);
+int dnssec_digest_from_string(const char *s) _pure_;
diff --git a/src/resolve/resolved-dns-scope.c b/src/resolve/resolved-dns-scope.c
new file mode 100644
index 0000000..66dd2b1
--- /dev/null
+++ b/src/resolve/resolved-dns-scope.c
@@ -0,0 +1,1461 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/tcp.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "hostname-util.h"
+#include "missing.h"
+#include "random-util.h"
+#include "resolved-dnssd.h"
+#include "resolved-dns-scope.h"
+#include "resolved-dns-zone.h"
+#include "resolved-llmnr.h"
+#include "resolved-mdns.h"
+#include "socket-util.h"
+#include "strv.h"
+
+#define MULTICAST_RATELIMIT_INTERVAL_USEC (1*USEC_PER_SEC)
+#define MULTICAST_RATELIMIT_BURST 1000
+
+/* After how much time to repeat LLMNR requests, see RFC 4795 Section 7 */
+#define MULTICAST_RESEND_TIMEOUT_MIN_USEC (100 * USEC_PER_MSEC)
+#define MULTICAST_RESEND_TIMEOUT_MAX_USEC (1 * USEC_PER_SEC)
+
+int dns_scope_new(Manager *m, DnsScope **ret, Link *l, DnsProtocol protocol, int family) {
+ DnsScope *s;
+
+ assert(m);
+ assert(ret);
+
+ s = new(DnsScope, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (DnsScope) {
+ .manager = m,
+ .link = l,
+ .protocol = protocol,
+ .family = family,
+ .resend_timeout = MULTICAST_RESEND_TIMEOUT_MIN_USEC,
+ };
+
+ if (protocol == DNS_PROTOCOL_DNS) {
+ /* Copy DNSSEC mode from the link if it is set there,
+ * otherwise take the manager's DNSSEC mode. Note that
+ * we copy this only at scope creation time, and do
+ * not update it from the on, even if the setting
+ * changes. */
+
+ if (l) {
+ s->dnssec_mode = link_get_dnssec_mode(l);
+ s->dns_over_tls_mode = link_get_dns_over_tls_mode(l);
+ } else {
+ s->dnssec_mode = manager_get_dnssec_mode(m);
+ s->dns_over_tls_mode = manager_get_dns_over_tls_mode(m);
+ }
+
+ } else {
+ s->dnssec_mode = DNSSEC_NO;
+ s->dns_over_tls_mode = DNS_OVER_TLS_NO;
+ }
+
+ LIST_PREPEND(scopes, m->dns_scopes, s);
+
+ dns_scope_llmnr_membership(s, true);
+ dns_scope_mdns_membership(s, true);
+
+ log_debug("New scope on link %s, protocol %s, family %s", l ? l->name : "*", dns_protocol_to_string(protocol), family == AF_UNSPEC ? "*" : af_to_name(family));
+
+ /* Enforce ratelimiting for the multicast protocols */
+ RATELIMIT_INIT(s->ratelimit, MULTICAST_RATELIMIT_INTERVAL_USEC, MULTICAST_RATELIMIT_BURST);
+
+ *ret = s;
+ return 0;
+}
+
+static void dns_scope_abort_transactions(DnsScope *s) {
+ assert(s);
+
+ while (s->transactions) {
+ DnsTransaction *t = s->transactions;
+
+ /* Abort the transaction, but make sure it is not
+ * freed while we still look at it */
+
+ t->block_gc++;
+ if (DNS_TRANSACTION_IS_LIVE(t->state))
+ dns_transaction_complete(t, DNS_TRANSACTION_ABORTED);
+ t->block_gc--;
+
+ dns_transaction_free(t);
+ }
+}
+
+DnsScope* dns_scope_free(DnsScope *s) {
+ if (!s)
+ return NULL;
+
+ log_debug("Removing scope on link %s, protocol %s, family %s", s->link ? s->link->name : "*", dns_protocol_to_string(s->protocol), s->family == AF_UNSPEC ? "*" : af_to_name(s->family));
+
+ dns_scope_llmnr_membership(s, false);
+ dns_scope_mdns_membership(s, false);
+ dns_scope_abort_transactions(s);
+
+ while (s->query_candidates)
+ dns_query_candidate_free(s->query_candidates);
+
+ hashmap_free(s->transactions_by_key);
+
+ ordered_hashmap_free_with_destructor(s->conflict_queue, dns_resource_record_unref);
+ sd_event_source_unref(s->conflict_event_source);
+
+ sd_event_source_unref(s->announce_event_source);
+
+ dns_cache_flush(&s->cache);
+ dns_zone_flush(&s->zone);
+
+ LIST_REMOVE(scopes, s->manager->dns_scopes, s);
+ return mfree(s);
+}
+
+DnsServer *dns_scope_get_dns_server(DnsScope *s) {
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_DNS)
+ return NULL;
+
+ if (s->link)
+ return link_get_dns_server(s->link);
+ else
+ return manager_get_dns_server(s->manager);
+}
+
+unsigned dns_scope_get_n_dns_servers(DnsScope *s) {
+ unsigned n = 0;
+ DnsServer *i;
+
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_DNS)
+ return 0;
+
+ if (s->link)
+ i = s->link->dns_servers;
+ else
+ i = s->manager->dns_servers;
+
+ for (; i; i = i->servers_next)
+ n++;
+
+ return n;
+}
+
+void dns_scope_next_dns_server(DnsScope *s) {
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_DNS)
+ return;
+
+ if (s->link)
+ link_next_dns_server(s->link);
+ else
+ manager_next_dns_server(s->manager);
+}
+
+void dns_scope_packet_received(DnsScope *s, usec_t rtt) {
+ assert(s);
+
+ if (rtt <= s->max_rtt)
+ return;
+
+ s->max_rtt = rtt;
+ s->resend_timeout = MIN(MAX(MULTICAST_RESEND_TIMEOUT_MIN_USEC, s->max_rtt * 2), MULTICAST_RESEND_TIMEOUT_MAX_USEC);
+}
+
+void dns_scope_packet_lost(DnsScope *s, usec_t usec) {
+ assert(s);
+
+ if (s->resend_timeout <= usec)
+ s->resend_timeout = MIN(s->resend_timeout * 2, MULTICAST_RESEND_TIMEOUT_MAX_USEC);
+}
+
+static int dns_scope_emit_one(DnsScope *s, int fd, DnsPacket *p) {
+ union in_addr_union addr;
+ int ifindex = 0, r;
+ int family;
+ uint32_t mtu;
+
+ assert(s);
+ assert(p);
+ assert(p->protocol == s->protocol);
+
+ if (s->link) {
+ mtu = s->link->mtu;
+ ifindex = s->link->ifindex;
+ } else
+ mtu = manager_find_mtu(s->manager);
+
+ switch (s->protocol) {
+
+ case DNS_PROTOCOL_DNS:
+ assert(fd >= 0);
+
+ if (DNS_PACKET_QDCOUNT(p) > 1)
+ return -EOPNOTSUPP;
+
+ if (p->size > DNS_PACKET_UNICAST_SIZE_MAX)
+ return -EMSGSIZE;
+
+ if (p->size + UDP_PACKET_HEADER_SIZE > mtu)
+ return -EMSGSIZE;
+
+ r = manager_write(s->manager, fd, p);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case DNS_PROTOCOL_LLMNR:
+ assert(fd < 0);
+
+ if (DNS_PACKET_QDCOUNT(p) > 1)
+ return -EOPNOTSUPP;
+
+ if (!ratelimit_below(&s->ratelimit))
+ return -EBUSY;
+
+ family = s->family;
+
+ if (family == AF_INET) {
+ addr.in = LLMNR_MULTICAST_IPV4_ADDRESS;
+ fd = manager_llmnr_ipv4_udp_fd(s->manager);
+ } else if (family == AF_INET6) {
+ addr.in6 = LLMNR_MULTICAST_IPV6_ADDRESS;
+ fd = manager_llmnr_ipv6_udp_fd(s->manager);
+ } else
+ return -EAFNOSUPPORT;
+ if (fd < 0)
+ return fd;
+
+ r = manager_send(s->manager, fd, ifindex, family, &addr, LLMNR_PORT, NULL, p);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ assert(fd < 0);
+
+ if (!ratelimit_below(&s->ratelimit))
+ return -EBUSY;
+
+ family = s->family;
+
+ if (family == AF_INET) {
+ addr.in = MDNS_MULTICAST_IPV4_ADDRESS;
+ fd = manager_mdns_ipv4_fd(s->manager);
+ } else if (family == AF_INET6) {
+ addr.in6 = MDNS_MULTICAST_IPV6_ADDRESS;
+ fd = manager_mdns_ipv6_fd(s->manager);
+ } else
+ return -EAFNOSUPPORT;
+ if (fd < 0)
+ return fd;
+
+ r = manager_send(s->manager, fd, ifindex, family, &addr, MDNS_PORT, NULL, p);
+ if (r < 0)
+ return r;
+
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ return 1;
+}
+
+int dns_scope_emit_udp(DnsScope *s, int fd, DnsPacket *p) {
+ int r;
+
+ assert(s);
+ assert(p);
+ assert(p->protocol == s->protocol);
+ assert((s->protocol == DNS_PROTOCOL_DNS) == (fd >= 0));
+
+ do {
+ /* If there are multiple linked packets, set the TC bit in all but the last of them */
+ if (p->more) {
+ assert(p->protocol == DNS_PROTOCOL_MDNS);
+ dns_packet_set_flags(p, true, true);
+ }
+
+ r = dns_scope_emit_one(s, fd, p);
+ if (r < 0)
+ return r;
+
+ p = p->more;
+ } while (p);
+
+ return 0;
+}
+
+static int dns_scope_socket(
+ DnsScope *s,
+ int type,
+ int family,
+ const union in_addr_union *address,
+ DnsServer *server,
+ uint16_t port,
+ union sockaddr_union *ret_socket_address) {
+
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union sa;
+ socklen_t salen;
+ int r, ifindex;
+
+ assert(s);
+
+ if (server) {
+ assert(family == AF_UNSPEC);
+ assert(!address);
+
+ ifindex = dns_server_ifindex(server);
+
+ switch (server->family) {
+ case AF_INET:
+ sa = (union sockaddr_union) {
+ .in.sin_family = server->family,
+ .in.sin_port = htobe16(port),
+ .in.sin_addr = server->address.in,
+ };
+ salen = sizeof(sa.in);
+ break;
+ case AF_INET6:
+ sa = (union sockaddr_union) {
+ .in6.sin6_family = server->family,
+ .in6.sin6_port = htobe16(port),
+ .in6.sin6_addr = server->address.in6,
+ .in6.sin6_scope_id = ifindex,
+ };
+ salen = sizeof(sa.in6);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+ } else {
+ assert(family != AF_UNSPEC);
+ assert(address);
+
+ ifindex = s->link ? s->link->ifindex : 0;
+
+ switch (family) {
+ case AF_INET:
+ sa = (union sockaddr_union) {
+ .in.sin_family = family,
+ .in.sin_port = htobe16(port),
+ .in.sin_addr = address->in,
+ };
+ salen = sizeof(sa.in);
+ break;
+ case AF_INET6:
+ sa = (union sockaddr_union) {
+ .in6.sin6_family = family,
+ .in6.sin6_port = htobe16(port),
+ .in6.sin6_addr = address->in6,
+ .in6.sin6_scope_id = ifindex,
+ };
+ salen = sizeof(sa.in6);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+ }
+
+ fd = socket(sa.sa.sa_family, type|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (type == SOCK_STREAM) {
+ r = setsockopt_int(fd, IPPROTO_TCP, TCP_NODELAY, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (s->link) {
+ be32_t ifindex_be = htobe32(ifindex);
+
+ if (sa.sa.sa_family == AF_INET) {
+ r = setsockopt(fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex_be, sizeof(ifindex_be));
+ if (r < 0)
+ return -errno;
+ } else if (sa.sa.sa_family == AF_INET6) {
+ r = setsockopt(fd, IPPROTO_IPV6, IPV6_UNICAST_IF, &ifindex_be, sizeof(ifindex_be));
+ if (r < 0)
+ return -errno;
+ }
+ }
+
+ if (s->protocol == DNS_PROTOCOL_LLMNR) {
+ /* RFC 4795, section 2.5 requires the TTL to be set to 1 */
+
+ if (sa.sa.sa_family == AF_INET) {
+ r = setsockopt_int(fd, IPPROTO_IP, IP_TTL, true);
+ if (r < 0)
+ return r;
+ } else if (sa.sa.sa_family == AF_INET6) {
+ r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, true);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (type == SOCK_DGRAM) {
+ /* Set IP_RECVERR or IPV6_RECVERR to get ICMP error feedback. See discussion in #10345. */
+
+ if (sa.sa.sa_family == AF_INET) {
+ r = setsockopt_int(fd, IPPROTO_IP, IP_RECVERR, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(fd, IPPROTO_IP, IP_PKTINFO, true);
+ if (r < 0)
+ return r;
+
+ } else if (sa.sa.sa_family == AF_INET6) {
+ r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_RECVERR, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, true);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (ret_socket_address)
+ *ret_socket_address = sa;
+ else {
+ r = connect(fd, &sa.sa, salen);
+ if (r < 0 && errno != EINPROGRESS)
+ return -errno;
+ }
+
+ return TAKE_FD(fd);
+}
+
+int dns_scope_socket_udp(DnsScope *s, DnsServer *server, uint16_t port) {
+ return dns_scope_socket(s, SOCK_DGRAM, AF_UNSPEC, NULL, server, port, NULL);
+}
+
+int dns_scope_socket_tcp(DnsScope *s, int family, const union in_addr_union *address, DnsServer *server, uint16_t port, union sockaddr_union *ret_socket_address) {
+ /* If ret_socket_address is not NULL, the caller is responisble
+ * for calling connect() or sendmsg(). This is required by TCP
+ * Fast Open, to be able to send the initial SYN packet along
+ * with the first data packet. */
+ return dns_scope_socket(s, SOCK_STREAM, family, address, server, port, ret_socket_address);
+}
+
+static DnsScopeMatch accept_link_local_reverse_lookups(const char *domain) {
+ assert(domain);
+
+ if (dns_name_endswith(domain, "254.169.in-addr.arpa") > 0)
+ return DNS_SCOPE_YES_BASE + 4; /* 4 labels match */
+
+ if (dns_name_endswith(domain, "8.e.f.ip6.arpa") > 0 ||
+ dns_name_endswith(domain, "9.e.f.ip6.arpa") > 0 ||
+ dns_name_endswith(domain, "a.e.f.ip6.arpa") > 0 ||
+ dns_name_endswith(domain, "b.e.f.ip6.arpa") > 0)
+ return DNS_SCOPE_YES_BASE + 5; /* 5 labels match */
+
+ return _DNS_SCOPE_MATCH_INVALID;
+}
+
+DnsScopeMatch dns_scope_good_domain(
+ DnsScope *s,
+ int ifindex,
+ uint64_t flags,
+ const char *domain) {
+
+ DnsSearchDomain *d;
+
+ /* This returns the following return values:
+ *
+ * DNS_SCOPE_NO → This scope is not suitable for lookups of this domain, at all
+ * DNS_SCOPE_MAYBE → This scope is suitable, but only if nothing else wants it
+ * DNS_SCOPE_YES_BASE+n → This scope is suitable, and 'n' suffix labels match
+ *
+ * (The idea is that the caller will only use the scopes with the longest 'n' returned. If no scopes return
+ * DNS_SCOPE_YES_BASE+n, then it should use those which returned DNS_SCOPE_MAYBE. It should never use those
+ * which returned DNS_SCOPE_NO.)
+ */
+
+ assert(s);
+ assert(domain);
+
+ /* Checks if the specified domain is something to look up on
+ * this scope. Note that this accepts non-qualified hostnames,
+ * i.e. those without any search path prefixed yet. */
+
+ if (ifindex != 0 && (!s->link || s->link->ifindex != ifindex))
+ return DNS_SCOPE_NO;
+
+ if ((SD_RESOLVED_FLAGS_MAKE(s->protocol, s->family, 0) & flags) == 0)
+ return DNS_SCOPE_NO;
+
+ /* Never resolve any loopback hostname or IP address via DNS,
+ * LLMNR or mDNS. Instead, always rely on synthesized RRs for
+ * these. */
+ if (is_localhost(domain) ||
+ dns_name_endswith(domain, "127.in-addr.arpa") > 0 ||
+ dns_name_equal(domain, "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa") > 0)
+ return DNS_SCOPE_NO;
+
+ /* Never respond to some of the domains listed in RFC6303 */
+ if (dns_name_endswith(domain, "0.in-addr.arpa") > 0 ||
+ dns_name_equal(domain, "255.255.255.255.in-addr.arpa") > 0 ||
+ dns_name_equal(domain, "0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa") > 0)
+ return DNS_SCOPE_NO;
+
+ /* Never respond to some of the domains listed in RFC6761 */
+ if (dns_name_endswith(domain, "invalid") > 0)
+ return DNS_SCOPE_NO;
+
+ switch (s->protocol) {
+
+ case DNS_PROTOCOL_DNS: {
+ bool has_search_domains = false;
+ int n_best = -1;
+
+ /* Never route things to scopes that lack DNS servers */
+ if (!dns_scope_get_dns_server(s))
+ return DNS_SCOPE_NO;
+
+ /* Always honour search domains for routing queries, except if this scope lacks DNS servers. Note that
+ * we return DNS_SCOPE_YES here, rather than just DNS_SCOPE_MAYBE, which means other wildcard scopes
+ * won't be considered anymore. */
+ LIST_FOREACH(domains, d, dns_scope_get_search_domains(s)) {
+
+ if (!d->route_only && !dns_name_is_root(d->name))
+ has_search_domains = true;
+
+ if (dns_name_endswith(domain, d->name) > 0) {
+ int c;
+
+ c = dns_name_count_labels(d->name);
+ if (c < 0)
+ continue;
+
+ if (c > n_best)
+ n_best = c;
+ }
+ }
+
+ /* If there's a true search domain defined for this scope, and the query is single-label,
+ * then let's resolve things here, prefereably. Note that LLMNR considers itself
+ * authoritative for single-label names too, at the same preference, see below. */
+ if (has_search_domains && dns_name_is_single_label(domain))
+ return DNS_SCOPE_YES_BASE + 1;
+
+ /* Let's return the number of labels in the best matching result */
+ if (n_best >= 0) {
+ assert(n_best <= DNS_SCOPE_YES_END - DNS_SCOPE_YES_BASE);
+ return DNS_SCOPE_YES_BASE + n_best;
+ }
+
+ /* See if this scope is suitable as default route. */
+ if (!dns_scope_is_default_route(s))
+ return DNS_SCOPE_NO;
+
+ /* Exclude link-local IP ranges */
+ if (dns_name_endswith(domain, "254.169.in-addr.arpa") == 0 &&
+ dns_name_endswith(domain, "8.e.f.ip6.arpa") == 0 &&
+ dns_name_endswith(domain, "9.e.f.ip6.arpa") == 0 &&
+ dns_name_endswith(domain, "a.e.f.ip6.arpa") == 0 &&
+ dns_name_endswith(domain, "b.e.f.ip6.arpa") == 0 &&
+ /* If networks use .local in their private setups, they are supposed to also add .local to their search
+ * domains, which we already checked above. Otherwise, we consider .local specific to mDNS and won't
+ * send such queries ordinary DNS servers. */
+ dns_name_endswith(domain, "local") == 0)
+ return DNS_SCOPE_MAYBE;
+
+ return DNS_SCOPE_NO;
+ }
+
+ case DNS_PROTOCOL_MDNS: {
+ DnsScopeMatch m;
+
+ m = accept_link_local_reverse_lookups(domain);
+ if (m >= 0)
+ return m;
+
+ if ((s->family == AF_INET && dns_name_endswith(domain, "in-addr.arpa") > 0) ||
+ (s->family == AF_INET6 && dns_name_endswith(domain, "ip6.arpa") > 0))
+ return DNS_SCOPE_MAYBE;
+
+ if ((dns_name_endswith(domain, "local") > 0 && /* only resolve names ending in .local via mDNS */
+ dns_name_equal(domain, "local") == 0 && /* but not the single-label "local" name itself */
+ manager_is_own_hostname(s->manager, domain) <= 0)) /* never resolve the local hostname via mDNS */
+ return DNS_SCOPE_YES_BASE + 1; /* Return +1, as the top-level .local domain matches, i.e. one label */
+
+ return DNS_SCOPE_NO;
+ }
+
+ case DNS_PROTOCOL_LLMNR: {
+ DnsScopeMatch m;
+
+ m = accept_link_local_reverse_lookups(domain);
+ if (m >= 0)
+ return m;
+
+ if ((s->family == AF_INET && dns_name_endswith(domain, "in-addr.arpa") > 0) ||
+ (s->family == AF_INET6 && dns_name_endswith(domain, "ip6.arpa") > 0))
+ return DNS_SCOPE_MAYBE;
+
+ if ((dns_name_is_single_label(domain) && /* only resolve single label names via LLMNR */
+ !is_gateway_hostname(domain) && /* don't resolve "gateway" with LLMNR, let nss-myhostname handle this */
+ manager_is_own_hostname(s->manager, domain) <= 0)) /* never resolve the local hostname via LLMNR */
+ return DNS_SCOPE_YES_BASE + 1; /* Return +1, as we consider ourselves authoritative
+ * for single-label names, i.e. one label. This is
+ * particular relevant as it means a "." route on some
+ * other scope won't pull all traffic away from
+ * us. (If people actually want to pull traffic away
+ * from us they should turn off LLMNR on the
+ * link). Note that unicast DNS scopes with search
+ * domains also consider themselves authoritative for
+ * single-label domains, at the same preference (see
+ * above). */
+
+ return DNS_SCOPE_NO;
+ }
+
+ default:
+ assert_not_reached("Unknown scope protocol");
+ }
+}
+
+bool dns_scope_good_key(DnsScope *s, const DnsResourceKey *key) {
+ int key_family;
+
+ assert(s);
+ assert(key);
+
+ /* Check if it makes sense to resolve the specified key on
+ * this scope. Note that this call assumes as fully qualified
+ * name, i.e. the search suffixes already appended. */
+
+ if (key->class != DNS_CLASS_IN)
+ return false;
+
+ if (s->protocol == DNS_PROTOCOL_DNS) {
+
+ /* On classic DNS, looking up non-address RRs is always
+ * fine. (Specifically, we want to permit looking up
+ * DNSKEY and DS records on the root and top-level
+ * domains.) */
+ if (!dns_resource_key_is_address(key))
+ return true;
+
+ /* However, we refuse to look up A and AAAA RRs on the
+ * root and single-label domains, under the assumption
+ * that those should be resolved via LLMNR or search
+ * path only, and should not be leaked onto the
+ * internet. */
+ return !(dns_name_is_single_label(dns_resource_key_name(key)) ||
+ dns_name_is_root(dns_resource_key_name(key)));
+ }
+
+ /* On mDNS and LLMNR, send A and AAAA queries only on the
+ * respective scopes */
+
+ key_family = dns_type_to_af(key->type);
+ if (key_family < 0)
+ return true;
+
+ return key_family == s->family;
+}
+
+static int dns_scope_multicast_membership(DnsScope *s, bool b, struct in_addr in, struct in6_addr in6) {
+ int fd;
+
+ assert(s);
+ assert(s->link);
+
+ if (s->family == AF_INET) {
+ struct ip_mreqn mreqn = {
+ .imr_multiaddr = in,
+ .imr_ifindex = s->link->ifindex,
+ };
+
+ if (s->protocol == DNS_PROTOCOL_LLMNR)
+ fd = manager_llmnr_ipv4_udp_fd(s->manager);
+ else
+ fd = manager_mdns_ipv4_fd(s->manager);
+
+ if (fd < 0)
+ return fd;
+
+ /* Always first try to drop membership before we add
+ * one. This is necessary on some devices, such as
+ * veth. */
+ if (b)
+ (void) setsockopt(fd, IPPROTO_IP, IP_DROP_MEMBERSHIP, &mreqn, sizeof(mreqn));
+
+ if (setsockopt(fd, IPPROTO_IP, b ? IP_ADD_MEMBERSHIP : IP_DROP_MEMBERSHIP, &mreqn, sizeof(mreqn)) < 0)
+ return -errno;
+
+ } else if (s->family == AF_INET6) {
+ struct ipv6_mreq mreq = {
+ .ipv6mr_multiaddr = in6,
+ .ipv6mr_interface = s->link->ifindex,
+ };
+
+ if (s->protocol == DNS_PROTOCOL_LLMNR)
+ fd = manager_llmnr_ipv6_udp_fd(s->manager);
+ else
+ fd = manager_mdns_ipv6_fd(s->manager);
+
+ if (fd < 0)
+ return fd;
+
+ if (b)
+ (void) setsockopt(fd, IPPROTO_IPV6, IPV6_DROP_MEMBERSHIP, &mreq, sizeof(mreq));
+
+ if (setsockopt(fd, IPPROTO_IPV6, b ? IPV6_ADD_MEMBERSHIP : IPV6_DROP_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
+ return -errno;
+ } else
+ return -EAFNOSUPPORT;
+
+ return 0;
+}
+
+int dns_scope_llmnr_membership(DnsScope *s, bool b) {
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_LLMNR)
+ return 0;
+
+ return dns_scope_multicast_membership(s, b, LLMNR_MULTICAST_IPV4_ADDRESS, LLMNR_MULTICAST_IPV6_ADDRESS);
+}
+
+int dns_scope_mdns_membership(DnsScope *s, bool b) {
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_MDNS)
+ return 0;
+
+ return dns_scope_multicast_membership(s, b, MDNS_MULTICAST_IPV4_ADDRESS, MDNS_MULTICAST_IPV6_ADDRESS);
+}
+
+int dns_scope_make_reply_packet(
+ DnsScope *s,
+ uint16_t id,
+ int rcode,
+ DnsQuestion *q,
+ DnsAnswer *answer,
+ DnsAnswer *soa,
+ bool tentative,
+ DnsPacket **ret) {
+
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ int r;
+
+ assert(s);
+ assert(ret);
+
+ if (dns_question_isempty(q) &&
+ dns_answer_isempty(answer) &&
+ dns_answer_isempty(soa))
+ return -EINVAL;
+
+ r = dns_packet_new(&p, s->protocol, 0, DNS_PACKET_SIZE_MAX);
+ if (r < 0)
+ return r;
+
+ DNS_PACKET_HEADER(p)->id = id;
+ DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
+ 1 /* qr */,
+ 0 /* opcode */,
+ 0 /* c */,
+ 0 /* tc */,
+ tentative,
+ 0 /* (ra) */,
+ 0 /* (ad) */,
+ 0 /* (cd) */,
+ rcode));
+
+ r = dns_packet_append_question(p, q);
+ if (r < 0)
+ return r;
+ DNS_PACKET_HEADER(p)->qdcount = htobe16(dns_question_size(q));
+
+ r = dns_packet_append_answer(p, answer);
+ if (r < 0)
+ return r;
+ DNS_PACKET_HEADER(p)->ancount = htobe16(dns_answer_size(answer));
+
+ r = dns_packet_append_answer(p, soa);
+ if (r < 0)
+ return r;
+ DNS_PACKET_HEADER(p)->arcount = htobe16(dns_answer_size(soa));
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static void dns_scope_verify_conflicts(DnsScope *s, DnsPacket *p) {
+ DnsResourceRecord *rr;
+ DnsResourceKey *key;
+
+ assert(s);
+ assert(p);
+
+ DNS_QUESTION_FOREACH(key, p->question)
+ dns_zone_verify_conflicts(&s->zone, key);
+
+ DNS_ANSWER_FOREACH(rr, p->answer)
+ dns_zone_verify_conflicts(&s->zone, rr->key);
+}
+
+void dns_scope_process_query(DnsScope *s, DnsStream *stream, DnsPacket *p) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL, *soa = NULL;
+ _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
+ DnsResourceKey *key = NULL;
+ bool tentative = false;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ if (p->protocol != DNS_PROTOCOL_LLMNR)
+ return;
+
+ if (p->ipproto == IPPROTO_UDP) {
+ /* Don't accept UDP queries directed to anything but
+ * the LLMNR multicast addresses. See RFC 4795,
+ * section 2.5. */
+
+ if (p->family == AF_INET && !in_addr_equal(AF_INET, &p->destination, (union in_addr_union*) &LLMNR_MULTICAST_IPV4_ADDRESS))
+ return;
+
+ if (p->family == AF_INET6 && !in_addr_equal(AF_INET6, &p->destination, (union in_addr_union*) &LLMNR_MULTICAST_IPV6_ADDRESS))
+ return;
+ }
+
+ r = dns_packet_extract(p);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to extract resource records from incoming packet: %m");
+ return;
+ }
+
+ if (DNS_PACKET_LLMNR_C(p)) {
+ /* Somebody notified us about a possible conflict */
+ dns_scope_verify_conflicts(s, p);
+ return;
+ }
+
+ assert(dns_question_size(p->question) == 1);
+ key = p->question->keys[0];
+
+ r = dns_zone_lookup(&s->zone, key, 0, &answer, &soa, &tentative);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to lookup key: %m");
+ return;
+ }
+ if (r == 0)
+ return;
+
+ if (answer)
+ dns_answer_order_by_scope(answer, in_addr_is_link_local(p->family, &p->sender) > 0);
+
+ r = dns_scope_make_reply_packet(s, DNS_PACKET_ID(p), DNS_RCODE_SUCCESS, p->question, answer, soa, tentative, &reply);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to build reply packet: %m");
+ return;
+ }
+
+ if (stream) {
+ r = dns_stream_write_packet(stream, reply);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to enqueue reply packet: %m");
+ return;
+ }
+
+ /* Let's take an extra reference on this stream, so that it stays around after returning. The reference
+ * will be dangling until the stream is disconnected, and the default completion handler of the stream
+ * will then unref the stream and destroy it */
+ if (DNS_STREAM_QUEUED(stream))
+ dns_stream_ref(stream);
+ } else {
+ int fd;
+
+ if (!ratelimit_below(&s->ratelimit))
+ return;
+
+ if (p->family == AF_INET)
+ fd = manager_llmnr_ipv4_udp_fd(s->manager);
+ else if (p->family == AF_INET6)
+ fd = manager_llmnr_ipv6_udp_fd(s->manager);
+ else {
+ log_debug("Unknown protocol");
+ return;
+ }
+ if (fd < 0) {
+ log_debug_errno(fd, "Failed to get reply socket: %m");
+ return;
+ }
+
+ /* Note that we always immediately reply to all LLMNR
+ * requests, and do not wait any time, since we
+ * verified uniqueness for all records. Also see RFC
+ * 4795, Section 2.7 */
+
+ r = manager_send(s->manager, fd, p->ifindex, p->family, &p->sender, p->sender_port, NULL, reply);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to send reply packet: %m");
+ return;
+ }
+ }
+}
+
+DnsTransaction *dns_scope_find_transaction(DnsScope *scope, DnsResourceKey *key, bool cache_ok) {
+ DnsTransaction *t;
+
+ assert(scope);
+ assert(key);
+
+ /* Try to find an ongoing transaction that is a equal to the
+ * specified question */
+ t = hashmap_get(scope->transactions_by_key, key);
+ if (!t)
+ return NULL;
+
+ /* Refuse reusing transactions that completed based on cached
+ * data instead of a real packet, if that's requested. */
+ if (!cache_ok &&
+ IN_SET(t->state, DNS_TRANSACTION_SUCCESS, DNS_TRANSACTION_RCODE_FAILURE) &&
+ t->answer_source != DNS_TRANSACTION_NETWORK)
+ return NULL;
+
+ return t;
+}
+
+static int dns_scope_make_conflict_packet(
+ DnsScope *s,
+ DnsResourceRecord *rr,
+ DnsPacket **ret) {
+
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ int r;
+
+ assert(s);
+ assert(rr);
+ assert(ret);
+
+ r = dns_packet_new(&p, s->protocol, 0, DNS_PACKET_SIZE_MAX);
+ if (r < 0)
+ return r;
+
+ DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
+ 0 /* qr */,
+ 0 /* opcode */,
+ 1 /* conflict */,
+ 0 /* tc */,
+ 0 /* t */,
+ 0 /* (ra) */,
+ 0 /* (ad) */,
+ 0 /* (cd) */,
+ 0));
+
+ /* For mDNS, the transaction ID should always be 0 */
+ if (s->protocol != DNS_PROTOCOL_MDNS)
+ random_bytes(&DNS_PACKET_HEADER(p)->id, sizeof(uint16_t));
+
+ DNS_PACKET_HEADER(p)->qdcount = htobe16(1);
+ DNS_PACKET_HEADER(p)->arcount = htobe16(1);
+
+ r = dns_packet_append_key(p, rr->key, 0, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_append_rr(p, rr, 0, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static int on_conflict_dispatch(sd_event_source *es, usec_t usec, void *userdata) {
+ DnsScope *scope = userdata;
+ int r;
+
+ assert(es);
+ assert(scope);
+
+ scope->conflict_event_source = sd_event_source_unref(scope->conflict_event_source);
+
+ for (;;) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+
+ key = ordered_hashmap_first_key(scope->conflict_queue);
+ if (!key)
+ break;
+
+ rr = ordered_hashmap_remove(scope->conflict_queue, key);
+ assert(rr);
+
+ r = dns_scope_make_conflict_packet(scope, rr, &p);
+ if (r < 0) {
+ log_error_errno(r, "Failed to make conflict packet: %m");
+ return 0;
+ }
+
+ r = dns_scope_emit_udp(scope, -1, p);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send conflict packet: %m");
+ }
+
+ return 0;
+}
+
+int dns_scope_notify_conflict(DnsScope *scope, DnsResourceRecord *rr) {
+ usec_t jitter;
+ int r;
+
+ assert(scope);
+ assert(rr);
+
+ /* We don't send these queries immediately. Instead, we queue
+ * them, and send them after some jitter delay. */
+ r = ordered_hashmap_ensure_allocated(&scope->conflict_queue, &dns_resource_key_hash_ops);
+ if (r < 0) {
+ log_oom();
+ return r;
+ }
+
+ /* We only place one RR per key in the conflict
+ * messages, not all of them. That should be enough to
+ * indicate where there might be a conflict */
+ r = ordered_hashmap_put(scope->conflict_queue, rr->key, rr);
+ if (IN_SET(r, 0, -EEXIST))
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to queue conflicting RR: %m");
+
+ dns_resource_key_ref(rr->key);
+ dns_resource_record_ref(rr);
+
+ if (scope->conflict_event_source)
+ return 0;
+
+ random_bytes(&jitter, sizeof(jitter));
+ jitter %= LLMNR_JITTER_INTERVAL_USEC;
+
+ r = sd_event_add_time(scope->manager->event,
+ &scope->conflict_event_source,
+ clock_boottime_or_monotonic(),
+ now(clock_boottime_or_monotonic()) + jitter,
+ LLMNR_JITTER_INTERVAL_USEC,
+ on_conflict_dispatch, scope);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add conflict dispatch event: %m");
+
+ (void) sd_event_source_set_description(scope->conflict_event_source, "scope-conflict");
+
+ return 0;
+}
+
+void dns_scope_check_conflicts(DnsScope *scope, DnsPacket *p) {
+ DnsResourceRecord *rr;
+ int r;
+
+ assert(scope);
+ assert(p);
+
+ if (!IN_SET(p->protocol, DNS_PROTOCOL_LLMNR, DNS_PROTOCOL_MDNS))
+ return;
+
+ if (DNS_PACKET_RRCOUNT(p) <= 0)
+ return;
+
+ if (p->protocol == DNS_PROTOCOL_LLMNR) {
+ if (DNS_PACKET_LLMNR_C(p) != 0)
+ return;
+
+ if (DNS_PACKET_LLMNR_T(p) != 0)
+ return;
+ }
+
+ if (manager_our_packet(scope->manager, p))
+ return;
+
+ r = dns_packet_extract(p);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to extract packet: %m");
+ return;
+ }
+
+ log_debug("Checking for conflicts...");
+
+ DNS_ANSWER_FOREACH(rr, p->answer) {
+ /* No conflict if it is DNS-SD RR used for service enumeration. */
+ if (dns_resource_key_is_dnssd_ptr(rr->key))
+ continue;
+
+ /* Check for conflicts against the local zone. If we
+ * found one, we won't check any further */
+ r = dns_zone_check_conflicts(&scope->zone, rr);
+ if (r != 0)
+ continue;
+
+ /* Check for conflicts against the local cache. If so,
+ * send out an advisory query, to inform everybody */
+ r = dns_cache_check_conflicts(&scope->cache, rr, p->family, &p->sender);
+ if (r <= 0)
+ continue;
+
+ dns_scope_notify_conflict(scope, rr);
+ }
+}
+
+void dns_scope_dump(DnsScope *s, FILE *f) {
+ assert(s);
+
+ if (!f)
+ f = stdout;
+
+ fputs("[Scope protocol=", f);
+ fputs(dns_protocol_to_string(s->protocol), f);
+
+ if (s->link) {
+ fputs(" interface=", f);
+ fputs(s->link->name, f);
+ }
+
+ if (s->family != AF_UNSPEC) {
+ fputs(" family=", f);
+ fputs(af_to_name(s->family), f);
+ }
+
+ fputs("]\n", f);
+
+ if (!dns_zone_is_empty(&s->zone)) {
+ fputs("ZONE:\n", f);
+ dns_zone_dump(&s->zone, f);
+ }
+
+ if (!dns_cache_is_empty(&s->cache)) {
+ fputs("CACHE:\n", f);
+ dns_cache_dump(&s->cache, f);
+ }
+}
+
+DnsSearchDomain *dns_scope_get_search_domains(DnsScope *s) {
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_DNS)
+ return NULL;
+
+ if (s->link)
+ return s->link->search_domains;
+
+ return s->manager->search_domains;
+}
+
+bool dns_scope_name_needs_search_domain(DnsScope *s, const char *name) {
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_DNS)
+ return false;
+
+ return dns_name_is_single_label(name);
+}
+
+bool dns_scope_network_good(DnsScope *s) {
+ /* Checks whether the network is in good state for lookups on this scope. For mDNS/LLMNR/Classic DNS scopes
+ * bound to links this is easy, as they don't even exist if the link isn't in a suitable state. For the global
+ * DNS scope we check whether there are any links that are up and have an address. */
+
+ if (s->link)
+ return true;
+
+ return manager_routable(s->manager, AF_UNSPEC);
+}
+
+int dns_scope_ifindex(DnsScope *s) {
+ assert(s);
+
+ if (s->link)
+ return s->link->ifindex;
+
+ return 0;
+}
+
+static int on_announcement_timeout(sd_event_source *s, usec_t usec, void *userdata) {
+ DnsScope *scope = userdata;
+
+ assert(s);
+
+ scope->announce_event_source = sd_event_source_unref(scope->announce_event_source);
+
+ (void) dns_scope_announce(scope, false);
+ return 0;
+}
+
+int dns_scope_announce(DnsScope *scope, bool goodbye) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ _cleanup_set_free_ Set *types = NULL;
+ DnsTransaction *t;
+ DnsZoneItem *z, *i;
+ unsigned size = 0;
+ Iterator iterator;
+ char *service_type;
+ int r;
+
+ if (!scope)
+ return 0;
+
+ if (scope->protocol != DNS_PROTOCOL_MDNS)
+ return 0;
+
+ /* Check if we're done with probing. */
+ LIST_FOREACH(transactions_by_scope, t, scope->transactions)
+ if (DNS_TRANSACTION_IS_LIVE(t->state))
+ return 0;
+
+ /* Check if there're services pending conflict resolution. */
+ if (manager_next_dnssd_names(scope->manager))
+ return 0; /* we reach this point only if changing hostname didn't help */
+
+ /* Calculate answer's size. */
+ HASHMAP_FOREACH(z, scope->zone.by_key, iterator) {
+ if (z->state != DNS_ZONE_ITEM_ESTABLISHED)
+ continue;
+
+ if (z->rr->key->type == DNS_TYPE_PTR &&
+ !dns_zone_contains_name(&scope->zone, z->rr->ptr.name)) {
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ log_debug("Skip PTR RR <%s> since its counterparts seem to be withdrawn", dns_resource_key_to_string(z->rr->key, key_str, sizeof key_str));
+ z->state = DNS_ZONE_ITEM_WITHDRAWN;
+ continue;
+ }
+
+ /* Collect service types for _services._dns-sd._udp.local RRs in a set */
+ if (!scope->announced &&
+ dns_resource_key_is_dnssd_ptr(z->rr->key)) {
+ if (!set_contains(types, dns_resource_key_name(z->rr->key))) {
+ r = set_ensure_allocated(&types, &dns_name_hash_ops);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to allocate set: %m");
+
+ r = set_put(types, dns_resource_key_name(z->rr->key));
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add item to set: %m");
+ }
+ }
+
+ LIST_FOREACH(by_key, i, z)
+ size++;
+ }
+
+ answer = dns_answer_new(size + set_size(types));
+ if (!answer)
+ return log_oom();
+
+ /* Second iteration, actually add RRs to the answer. */
+ HASHMAP_FOREACH(z, scope->zone.by_key, iterator)
+ LIST_FOREACH (by_key, i, z) {
+ DnsAnswerFlags flags;
+
+ if (i->state != DNS_ZONE_ITEM_ESTABLISHED)
+ continue;
+
+ if (dns_resource_key_is_dnssd_ptr(i->rr->key))
+ flags = goodbye ? DNS_ANSWER_GOODBYE : 0;
+ else
+ flags = goodbye ? (DNS_ANSWER_GOODBYE|DNS_ANSWER_CACHE_FLUSH) : DNS_ANSWER_CACHE_FLUSH;
+
+ r = dns_answer_add(answer, i->rr, 0 , flags);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add RR to announce: %m");
+ }
+
+ /* Since all the active services are in the zone make them discoverable now. */
+ SET_FOREACH(service_type, types, iterator) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr;
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_PTR,
+ "_services._dns-sd._udp.local");
+ rr->ptr.name = strdup(service_type);
+ rr->ttl = MDNS_DEFAULT_TTL;
+
+ r = dns_zone_put(&scope->zone, scope, rr, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add DNS-SD PTR record to MDNS zone: %m");
+
+ r = dns_answer_add(answer, rr, 0 , 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add RR to announce: %m");
+ }
+
+ if (dns_answer_isempty(answer))
+ return 0;
+
+ r = dns_scope_make_reply_packet(scope, 0, DNS_RCODE_SUCCESS, NULL, answer, NULL, false, &p);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to build reply packet: %m");
+
+ r = dns_scope_emit_udp(scope, -1, p);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to send reply packet: %m");
+
+ /* In section 8.3 of RFC6762: "The Multicast DNS responder MUST send at least two unsolicited
+ * responses, one second apart." */
+ if (!scope->announced) {
+ usec_t ts;
+
+ scope->announced = true;
+
+ assert_se(sd_event_now(scope->manager->event, clock_boottime_or_monotonic(), &ts) >= 0);
+ ts += MDNS_ANNOUNCE_DELAY;
+
+ r = sd_event_add_time(
+ scope->manager->event,
+ &scope->announce_event_source,
+ clock_boottime_or_monotonic(),
+ ts,
+ MDNS_JITTER_RANGE_USEC,
+ on_announcement_timeout, scope);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to schedule second announcement: %m");
+
+ (void) sd_event_source_set_description(scope->announce_event_source, "mdns-announce");
+ }
+
+ return 0;
+}
+
+int dns_scope_add_dnssd_services(DnsScope *scope) {
+ Iterator i;
+ DnssdService *service;
+ DnssdTxtData *txt_data;
+ int r;
+
+ assert(scope);
+
+ if (hashmap_size(scope->manager->dnssd_services) == 0)
+ return 0;
+
+ scope->announced = false;
+
+ HASHMAP_FOREACH(service, scope->manager->dnssd_services, i) {
+ service->withdrawn = false;
+
+ r = dns_zone_put(&scope->zone, scope, service->ptr_rr, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add PTR record to MDNS zone: %m");
+
+ r = dns_zone_put(&scope->zone, scope, service->srv_rr, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add SRV record to MDNS zone: %m");
+
+ LIST_FOREACH(items, txt_data, service->txt_data_items) {
+ r = dns_zone_put(&scope->zone, scope, txt_data->rr, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add TXT record to MDNS zone: %m");
+ }
+ }
+
+ return 0;
+}
+
+int dns_scope_remove_dnssd_services(DnsScope *scope) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ Iterator i;
+ DnssdService *service;
+ DnssdTxtData *txt_data;
+ int r;
+
+ assert(scope);
+
+ key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_PTR,
+ "_services._dns-sd._udp.local");
+ if (!key)
+ return log_oom();
+
+ r = dns_zone_remove_rrs_by_key(&scope->zone, key);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(service, scope->manager->dnssd_services, i) {
+ dns_zone_remove_rr(&scope->zone, service->ptr_rr);
+ dns_zone_remove_rr(&scope->zone, service->srv_rr);
+ LIST_FOREACH(items, txt_data, service->txt_data_items)
+ dns_zone_remove_rr(&scope->zone, txt_data->rr);
+ }
+
+ return 0;
+}
+
+static bool dns_scope_has_route_only_domains(DnsScope *scope) {
+ DnsSearchDomain *domain, *first;
+ bool route_only = false;
+
+ assert(scope);
+ assert(scope->protocol == DNS_PROTOCOL_DNS);
+
+ /* Returns 'true' if this scope is suitable for queries to specific domains only. For that we check
+ * if there are any route-only domains on this interface, as a heuristic to discern VPN-style links
+ * from non-VPN-style links. Returns 'false' for all other cases, i.e. if the scope is intended to
+ * take queries to arbitrary domains, i.e. has no routing domains set. */
+
+ if (scope->link)
+ first = scope->link->search_domains;
+ else
+ first = scope->manager->search_domains;
+
+ LIST_FOREACH(domains, domain, first) {
+ /* "." means "any domain", thus the interface takes any kind of traffic. Thus, we exit early
+ * here, as it doesn't really matter whether this link has any route-only domains or not,
+ * "~." really trumps everything and clearly indicates that this interface shall receive all
+ * traffic it can get. */
+ if (dns_name_is_root(DNS_SEARCH_DOMAIN_NAME(domain)))
+ return false;
+
+ if (domain->route_only)
+ route_only = true;
+ }
+
+ return route_only;
+}
+
+bool dns_scope_is_default_route(DnsScope *scope) {
+ assert(scope);
+
+ /* Only use DNS scopes as default routes */
+ if (scope->protocol != DNS_PROTOCOL_DNS)
+ return false;
+
+ /* The global DNS scope is always suitable as default route */
+ if (!scope->link)
+ return true;
+
+ /* Honour whatever is explicitly configured. This is really the best approach, and trumps any
+ * automatic logic. */
+ if (scope->link->default_route >= 0)
+ return scope->link->default_route;
+
+ /* Otherwise check if we have any route-only domains, as a sensible heuristic: if so, let's not
+ * volunteer as default route. */
+ return !dns_scope_has_route_only_domains(scope);
+}
diff --git a/src/resolve/resolved-dns-scope.h b/src/resolve/resolved-dns-scope.h
new file mode 100644
index 0000000..f4b45c4
--- /dev/null
+++ b/src/resolve/resolved-dns-scope.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "list.h"
+
+typedef struct DnsScope DnsScope;
+
+#include "resolved-dns-cache.h"
+#include "resolved-dns-dnssec.h"
+#include "resolved-dns-packet.h"
+#include "resolved-dns-query.h"
+#include "resolved-dns-search-domain.h"
+#include "resolved-dns-server.h"
+#include "resolved-dns-stream.h"
+#include "resolved-dns-zone.h"
+#include "resolved-link.h"
+
+typedef enum DnsScopeMatch {
+ DNS_SCOPE_NO,
+ DNS_SCOPE_MAYBE,
+ DNS_SCOPE_YES_BASE, /* Add the number of matching labels to this */
+ DNS_SCOPE_YES_END = DNS_SCOPE_YES_BASE + DNS_N_LABELS_MAX,
+ _DNS_SCOPE_MATCH_MAX,
+ _DNS_SCOPE_MATCH_INVALID = -1
+} DnsScopeMatch;
+
+struct DnsScope {
+ Manager *manager;
+
+ DnsProtocol protocol;
+ int family;
+
+ /* Copied at scope creation time from the link/manager */
+ DnssecMode dnssec_mode;
+ DnsOverTlsMode dns_over_tls_mode;
+
+ Link *link;
+
+ DnsCache cache;
+ DnsZone zone;
+
+ OrderedHashmap *conflict_queue;
+ sd_event_source *conflict_event_source;
+
+ bool announced:1;
+ sd_event_source *announce_event_source;
+
+ RateLimit ratelimit;
+
+ usec_t resend_timeout;
+ usec_t max_rtt;
+
+ LIST_HEAD(DnsQueryCandidate, query_candidates);
+
+ /* Note that we keep track of ongoing transactions in two
+ * ways: once in a hashmap, indexed by the rr key, and once in
+ * a linked list. We use the hashmap to quickly find
+ * transactions we can reuse for a key. But note that there
+ * might be multiple transactions for the same key (because
+ * the zone probing can't reuse a transaction answered from
+ * the zone or the cache), and the hashmap only tracks the
+ * most recent entry. */
+ Hashmap *transactions_by_key;
+ LIST_HEAD(DnsTransaction, transactions);
+
+ LIST_FIELDS(DnsScope, scopes);
+};
+
+int dns_scope_new(Manager *m, DnsScope **ret, Link *l, DnsProtocol p, int family);
+DnsScope* dns_scope_free(DnsScope *s);
+
+void dns_scope_packet_received(DnsScope *s, usec_t rtt);
+void dns_scope_packet_lost(DnsScope *s, usec_t usec);
+
+int dns_scope_emit_udp(DnsScope *s, int fd, DnsPacket *p);
+int dns_scope_socket_tcp(DnsScope *s, int family, const union in_addr_union *address, DnsServer *server, uint16_t port, union sockaddr_union *ret_socket_address);
+int dns_scope_socket_udp(DnsScope *s, DnsServer *server, uint16_t port);
+
+DnsScopeMatch dns_scope_good_domain(DnsScope *s, int ifindex, uint64_t flags, const char *domain);
+bool dns_scope_good_key(DnsScope *s, const DnsResourceKey *key);
+
+DnsServer *dns_scope_get_dns_server(DnsScope *s);
+unsigned dns_scope_get_n_dns_servers(DnsScope *s);
+void dns_scope_next_dns_server(DnsScope *s);
+
+int dns_scope_llmnr_membership(DnsScope *s, bool b);
+int dns_scope_mdns_membership(DnsScope *s, bool b);
+
+int dns_scope_make_reply_packet(DnsScope *s, uint16_t id, int rcode, DnsQuestion *q, DnsAnswer *answer, DnsAnswer *soa, bool tentative, DnsPacket **ret);
+void dns_scope_process_query(DnsScope *s, DnsStream *stream, DnsPacket *p);
+
+DnsTransaction *dns_scope_find_transaction(DnsScope *scope, DnsResourceKey *key, bool cache_ok);
+
+int dns_scope_notify_conflict(DnsScope *scope, DnsResourceRecord *rr);
+void dns_scope_check_conflicts(DnsScope *scope, DnsPacket *p);
+
+void dns_scope_dump(DnsScope *s, FILE *f);
+
+DnsSearchDomain *dns_scope_get_search_domains(DnsScope *s);
+
+bool dns_scope_name_needs_search_domain(DnsScope *s, const char *name);
+
+bool dns_scope_network_good(DnsScope *s);
+
+int dns_scope_ifindex(DnsScope *s);
+
+int dns_scope_announce(DnsScope *scope, bool goodbye);
+
+int dns_scope_add_dnssd_services(DnsScope *scope);
+int dns_scope_remove_dnssd_services(DnsScope *scope);
+
+bool dns_scope_is_default_route(DnsScope *scope);
diff --git a/src/resolve/resolved-dns-search-domain.c b/src/resolve/resolved-dns-search-domain.c
new file mode 100644
index 0000000..21c2442
--- /dev/null
+++ b/src/resolve/resolved-dns-search-domain.c
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "resolved-dns-search-domain.h"
+
+int dns_search_domain_new(
+ Manager *m,
+ DnsSearchDomain **ret,
+ DnsSearchDomainType type,
+ Link *l,
+ const char *name) {
+
+ _cleanup_free_ char *normalized = NULL;
+ DnsSearchDomain *d;
+ int r;
+
+ assert(m);
+ assert((type == DNS_SEARCH_DOMAIN_LINK) == !!l);
+ assert(name);
+
+ r = dns_name_normalize(name, 0, &normalized);
+ if (r < 0)
+ return r;
+
+ if (l) {
+ if (l->n_search_domains >= LINK_SEARCH_DOMAINS_MAX)
+ return -E2BIG;
+ } else {
+ if (m->n_search_domains >= MANAGER_SEARCH_DOMAINS_MAX)
+ return -E2BIG;
+ }
+
+ d = new0(DnsSearchDomain, 1);
+ if (!d)
+ return -ENOMEM;
+
+ d->n_ref = 1;
+ d->manager = m;
+ d->type = type;
+ d->name = TAKE_PTR(normalized);
+
+ switch (type) {
+
+ case DNS_SEARCH_DOMAIN_LINK:
+ d->link = l;
+ LIST_APPEND(domains, l->search_domains, d);
+ l->n_search_domains++;
+ break;
+
+ case DNS_SERVER_SYSTEM:
+ LIST_APPEND(domains, m->search_domains, d);
+ m->n_search_domains++;
+ break;
+
+ default:
+ assert_not_reached("Unknown search domain type");
+ }
+
+ d->linked = true;
+
+ if (ret)
+ *ret = d;
+
+ return 0;
+}
+
+static DnsSearchDomain* dns_search_domain_free(DnsSearchDomain *d) {
+ assert(d);
+
+ free(d->name);
+ return mfree(d);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsSearchDomain, dns_search_domain, dns_search_domain_free);
+
+void dns_search_domain_unlink(DnsSearchDomain *d) {
+ assert(d);
+ assert(d->manager);
+
+ if (!d->linked)
+ return;
+
+ switch (d->type) {
+
+ case DNS_SEARCH_DOMAIN_LINK:
+ assert(d->link);
+ assert(d->link->n_search_domains > 0);
+ LIST_REMOVE(domains, d->link->search_domains, d);
+ d->link->n_search_domains--;
+ break;
+
+ case DNS_SEARCH_DOMAIN_SYSTEM:
+ assert(d->manager->n_search_domains > 0);
+ LIST_REMOVE(domains, d->manager->search_domains, d);
+ d->manager->n_search_domains--;
+ break;
+ }
+
+ d->linked = false;
+
+ dns_search_domain_unref(d);
+}
+
+void dns_search_domain_move_back_and_unmark(DnsSearchDomain *d) {
+ DnsSearchDomain *tail;
+
+ assert(d);
+
+ if (!d->marked)
+ return;
+
+ d->marked = false;
+
+ if (!d->linked || !d->domains_next)
+ return;
+
+ switch (d->type) {
+
+ case DNS_SEARCH_DOMAIN_LINK:
+ assert(d->link);
+ LIST_FIND_TAIL(domains, d, tail);
+ LIST_REMOVE(domains, d->link->search_domains, d);
+ LIST_INSERT_AFTER(domains, d->link->search_domains, tail, d);
+ break;
+
+ case DNS_SEARCH_DOMAIN_SYSTEM:
+ LIST_FIND_TAIL(domains, d, tail);
+ LIST_REMOVE(domains, d->manager->search_domains, d);
+ LIST_INSERT_AFTER(domains, d->manager->search_domains, tail, d);
+ break;
+
+ default:
+ assert_not_reached("Unknown search domain type");
+ }
+}
+
+void dns_search_domain_unlink_all(DnsSearchDomain *first) {
+ DnsSearchDomain *next;
+
+ if (!first)
+ return;
+
+ next = first->domains_next;
+ dns_search_domain_unlink(first);
+
+ dns_search_domain_unlink_all(next);
+}
+
+void dns_search_domain_unlink_marked(DnsSearchDomain *first) {
+ DnsSearchDomain *next;
+
+ if (!first)
+ return;
+
+ next = first->domains_next;
+
+ if (first->marked)
+ dns_search_domain_unlink(first);
+
+ dns_search_domain_unlink_marked(next);
+}
+
+void dns_search_domain_mark_all(DnsSearchDomain *first) {
+ if (!first)
+ return;
+
+ first->marked = true;
+ dns_search_domain_mark_all(first->domains_next);
+}
+
+int dns_search_domain_find(DnsSearchDomain *first, const char *name, DnsSearchDomain **ret) {
+ DnsSearchDomain *d;
+ int r;
+
+ assert(name);
+ assert(ret);
+
+ LIST_FOREACH(domains, d, first) {
+
+ r = dns_name_equal(name, d->name);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *ret = d;
+ return 1;
+ }
+ }
+
+ *ret = NULL;
+ return 0;
+}
diff --git a/src/resolve/resolved-dns-search-domain.h b/src/resolve/resolved-dns-search-domain.h
new file mode 100644
index 0000000..d0c2914
--- /dev/null
+++ b/src/resolve/resolved-dns-search-domain.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "macro.h"
+
+typedef struct DnsSearchDomain DnsSearchDomain;
+
+typedef enum DnsSearchDomainType {
+ DNS_SEARCH_DOMAIN_SYSTEM,
+ DNS_SEARCH_DOMAIN_LINK,
+} DnsSearchDomainType;
+
+#include "resolved-link.h"
+#include "resolved-manager.h"
+
+struct DnsSearchDomain {
+ Manager *manager;
+
+ unsigned n_ref;
+
+ DnsSearchDomainType type;
+ Link *link;
+
+ char *name;
+
+ bool marked:1;
+ bool route_only:1;
+
+ bool linked:1;
+ LIST_FIELDS(DnsSearchDomain, domains);
+};
+
+int dns_search_domain_new(
+ Manager *m,
+ DnsSearchDomain **ret,
+ DnsSearchDomainType type,
+ Link *link,
+ const char *name);
+
+DnsSearchDomain* dns_search_domain_ref(DnsSearchDomain *d);
+DnsSearchDomain* dns_search_domain_unref(DnsSearchDomain *d);
+
+void dns_search_domain_unlink(DnsSearchDomain *d);
+void dns_search_domain_move_back_and_unmark(DnsSearchDomain *d);
+
+void dns_search_domain_unlink_all(DnsSearchDomain *first);
+void dns_search_domain_unlink_marked(DnsSearchDomain *first);
+void dns_search_domain_mark_all(DnsSearchDomain *first);
+
+int dns_search_domain_find(DnsSearchDomain *first, const char *name, DnsSearchDomain **ret);
+
+static inline const char* DNS_SEARCH_DOMAIN_NAME(DnsSearchDomain *d) {
+ return d ? d->name : NULL;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsSearchDomain*, dns_search_domain_unref);
diff --git a/src/resolve/resolved-dns-server.c b/src/resolve/resolved-dns-server.c
new file mode 100644
index 0000000..b85eb75
--- /dev/null
+++ b/src/resolve/resolved-dns-server.c
@@ -0,0 +1,915 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "resolved-dns-server.h"
+#include "resolved-dns-stub.h"
+#include "resolved-resolv-conf.h"
+#include "siphash24.h"
+#include "string-table.h"
+#include "string-util.h"
+
+/* The amount of time to wait before retrying with a full feature set */
+#define DNS_SERVER_FEATURE_GRACE_PERIOD_MAX_USEC (6 * USEC_PER_HOUR)
+#define DNS_SERVER_FEATURE_GRACE_PERIOD_MIN_USEC (5 * USEC_PER_MINUTE)
+
+/* The number of times we will attempt a certain feature set before degrading */
+#define DNS_SERVER_FEATURE_RETRY_ATTEMPTS 3
+
+int dns_server_new(
+ Manager *m,
+ DnsServer **ret,
+ DnsServerType type,
+ Link *l,
+ int family,
+ const union in_addr_union *in_addr,
+ int ifindex) {
+
+ DnsServer *s;
+
+ assert(m);
+ assert((type == DNS_SERVER_LINK) == !!l);
+ assert(in_addr);
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return -EAFNOSUPPORT;
+
+ if (l) {
+ if (l->n_dns_servers >= LINK_DNS_SERVERS_MAX)
+ return -E2BIG;
+ } else {
+ if (m->n_dns_servers >= MANAGER_DNS_SERVERS_MAX)
+ return -E2BIG;
+ }
+
+ s = new(DnsServer, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (DnsServer) {
+ .n_ref = 1,
+ .manager = m,
+ .type = type,
+ .family = family,
+ .address = *in_addr,
+ .ifindex = ifindex,
+ };
+
+ dns_server_reset_features(s);
+
+ switch (type) {
+
+ case DNS_SERVER_LINK:
+ s->link = l;
+ LIST_APPEND(servers, l->dns_servers, s);
+ l->n_dns_servers++;
+ break;
+
+ case DNS_SERVER_SYSTEM:
+ LIST_APPEND(servers, m->dns_servers, s);
+ m->n_dns_servers++;
+ break;
+
+ case DNS_SERVER_FALLBACK:
+ LIST_APPEND(servers, m->fallback_dns_servers, s);
+ m->n_dns_servers++;
+ break;
+
+ default:
+ assert_not_reached("Unknown server type");
+ }
+
+ s->linked = true;
+
+#if ENABLE_DNS_OVER_TLS
+ dnstls_server_init(s);
+#endif
+
+ /* A new DNS server that isn't fallback is added and the one
+ * we used so far was a fallback one? Then let's try to pick
+ * the new one */
+ if (type != DNS_SERVER_FALLBACK &&
+ m->current_dns_server &&
+ m->current_dns_server->type == DNS_SERVER_FALLBACK)
+ manager_set_dns_server(m, NULL);
+
+ if (ret)
+ *ret = s;
+
+ return 0;
+}
+
+static DnsServer* dns_server_free(DnsServer *s) {
+ assert(s);
+
+ dns_server_unref_stream(s);
+
+#if ENABLE_DNS_OVER_TLS
+ dnstls_server_free(s);
+#endif
+
+ free(s->server_string);
+ return mfree(s);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsServer, dns_server, dns_server_free);
+
+void dns_server_unlink(DnsServer *s) {
+ assert(s);
+ assert(s->manager);
+
+ /* This removes the specified server from the linked list of
+ * servers, but any server might still stay around if it has
+ * refs, for example from an ongoing transaction. */
+
+ if (!s->linked)
+ return;
+
+ switch (s->type) {
+
+ case DNS_SERVER_LINK:
+ assert(s->link);
+ assert(s->link->n_dns_servers > 0);
+ LIST_REMOVE(servers, s->link->dns_servers, s);
+ s->link->n_dns_servers--;
+ break;
+
+ case DNS_SERVER_SYSTEM:
+ assert(s->manager->n_dns_servers > 0);
+ LIST_REMOVE(servers, s->manager->dns_servers, s);
+ s->manager->n_dns_servers--;
+ break;
+
+ case DNS_SERVER_FALLBACK:
+ assert(s->manager->n_dns_servers > 0);
+ LIST_REMOVE(servers, s->manager->fallback_dns_servers, s);
+ s->manager->n_dns_servers--;
+ break;
+ default:
+ assert_not_reached("Unknown server type");
+ }
+
+ s->linked = false;
+
+ if (s->link && s->link->current_dns_server == s)
+ link_set_dns_server(s->link, NULL);
+
+ if (s->manager->current_dns_server == s)
+ manager_set_dns_server(s->manager, NULL);
+
+ /* No need to keep a default stream around anymore */
+ dns_server_unref_stream(s);
+
+ dns_server_unref(s);
+}
+
+void dns_server_move_back_and_unmark(DnsServer *s) {
+ DnsServer *tail;
+
+ assert(s);
+
+ if (!s->marked)
+ return;
+
+ s->marked = false;
+
+ if (!s->linked || !s->servers_next)
+ return;
+
+ /* Move us to the end of the list, so that the order is
+ * strictly kept, if we are not at the end anyway. */
+
+ switch (s->type) {
+
+ case DNS_SERVER_LINK:
+ assert(s->link);
+ LIST_FIND_TAIL(servers, s, tail);
+ LIST_REMOVE(servers, s->link->dns_servers, s);
+ LIST_INSERT_AFTER(servers, s->link->dns_servers, tail, s);
+ break;
+
+ case DNS_SERVER_SYSTEM:
+ LIST_FIND_TAIL(servers, s, tail);
+ LIST_REMOVE(servers, s->manager->dns_servers, s);
+ LIST_INSERT_AFTER(servers, s->manager->dns_servers, tail, s);
+ break;
+
+ case DNS_SERVER_FALLBACK:
+ LIST_FIND_TAIL(servers, s, tail);
+ LIST_REMOVE(servers, s->manager->fallback_dns_servers, s);
+ LIST_INSERT_AFTER(servers, s->manager->fallback_dns_servers, tail, s);
+ break;
+
+ default:
+ assert_not_reached("Unknown server type");
+ }
+}
+
+static void dns_server_verified(DnsServer *s, DnsServerFeatureLevel level) {
+ assert(s);
+
+ if (s->verified_feature_level > level)
+ return;
+
+ if (s->verified_feature_level != level) {
+ log_debug("Verified we get a response at feature level %s from DNS server %s.",
+ dns_server_feature_level_to_string(level),
+ dns_server_string(s));
+ s->verified_feature_level = level;
+ }
+
+ assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &s->verified_usec) >= 0);
+}
+
+static void dns_server_reset_counters(DnsServer *s) {
+ assert(s);
+
+ s->n_failed_udp = 0;
+ s->n_failed_tcp = 0;
+ s->n_failed_tls = 0;
+ s->packet_truncated = false;
+ s->verified_usec = 0;
+
+ /* Note that we do not reset s->packet_bad_opt and s->packet_rrsig_missing here. We reset them only when the
+ * grace period ends, but not when lowering the possible feature level, as a lower level feature level should
+ * not make RRSIGs appear or OPT appear, but rather make them disappear. If the reappear anyway, then that's
+ * indication for a differently broken OPT/RRSIG implementation, and we really don't want to support that
+ * either.
+ *
+ * This is particularly important to deal with certain Belkin routers which break OPT for certain lookups (A),
+ * but pass traffic through for others (AAAA). If we detect the broken behaviour on one lookup we should not
+ * reenable it for another, because we cannot validate things anyway, given that the RRSIG/OPT data will be
+ * incomplete. */
+}
+
+void dns_server_packet_received(DnsServer *s, int protocol, DnsServerFeatureLevel level, size_t size) {
+ assert(s);
+
+ if (protocol == IPPROTO_UDP) {
+ if (s->possible_feature_level == level)
+ s->n_failed_udp = 0;
+ } else if (protocol == IPPROTO_TCP) {
+ if (DNS_SERVER_FEATURE_LEVEL_IS_TLS(level)) {
+ if (s->possible_feature_level == level)
+ s->n_failed_tls = 0;
+ } else {
+ if (s->possible_feature_level == level)
+ s->n_failed_tcp = 0;
+
+ /* Successful TCP connections are only useful to verify the TCP feature level. */
+ level = DNS_SERVER_FEATURE_LEVEL_TCP;
+ }
+ }
+
+ /* If the RRSIG data is missing, then we can only validate EDNS0 at max */
+ if (s->packet_rrsig_missing && level >= DNS_SERVER_FEATURE_LEVEL_DO)
+ level = DNS_SERVER_FEATURE_LEVEL_IS_TLS(level) ? DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN : DNS_SERVER_FEATURE_LEVEL_EDNS0;
+
+ /* If the OPT RR got lost, then we can only validate UDP at max */
+ if (s->packet_bad_opt && level >= DNS_SERVER_FEATURE_LEVEL_EDNS0)
+ level = DNS_SERVER_FEATURE_LEVEL_EDNS0 - 1;
+
+ /* Even if we successfully receive a reply to a request announcing support for large packets,
+ that does not mean we can necessarily receive large packets. */
+ if (level == DNS_SERVER_FEATURE_LEVEL_LARGE)
+ level = DNS_SERVER_FEATURE_LEVEL_LARGE - 1;
+
+ dns_server_verified(s, level);
+
+ /* Remember the size of the largest UDP packet we received from a server,
+ we know that we can always announce support for packets with at least
+ this size. */
+ if (protocol == IPPROTO_UDP && s->received_udp_packet_max < size)
+ s->received_udp_packet_max = size;
+}
+
+void dns_server_packet_lost(DnsServer *s, int protocol, DnsServerFeatureLevel level) {
+ assert(s);
+ assert(s->manager);
+
+ if (s->possible_feature_level == level) {
+ if (protocol == IPPROTO_UDP)
+ s->n_failed_udp++;
+ else if (protocol == IPPROTO_TCP) {
+ if (DNS_SERVER_FEATURE_LEVEL_IS_TLS(level))
+ s->n_failed_tls++;
+ else
+ s->n_failed_tcp++;
+ }
+ }
+}
+
+void dns_server_packet_truncated(DnsServer *s, DnsServerFeatureLevel level) {
+ assert(s);
+
+ /* Invoked whenever we get a packet with TC bit set. */
+
+ if (s->possible_feature_level != level)
+ return;
+
+ s->packet_truncated = true;
+}
+
+void dns_server_packet_rrsig_missing(DnsServer *s, DnsServerFeatureLevel level) {
+ assert(s);
+
+ if (level < DNS_SERVER_FEATURE_LEVEL_DO)
+ return;
+
+ /* If the RRSIG RRs are missing, we have to downgrade what we previously verified */
+ if (s->verified_feature_level >= DNS_SERVER_FEATURE_LEVEL_DO)
+ s->verified_feature_level = DNS_SERVER_FEATURE_LEVEL_IS_TLS(level) ? DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN : DNS_SERVER_FEATURE_LEVEL_EDNS0;
+
+ s->packet_rrsig_missing = true;
+}
+
+void dns_server_packet_bad_opt(DnsServer *s, DnsServerFeatureLevel level) {
+ assert(s);
+
+ if (level < DNS_SERVER_FEATURE_LEVEL_EDNS0)
+ return;
+
+ /* If the OPT RR got lost, we have to downgrade what we previously verified */
+ if (s->verified_feature_level >= DNS_SERVER_FEATURE_LEVEL_EDNS0)
+ s->verified_feature_level = DNS_SERVER_FEATURE_LEVEL_EDNS0-1;
+
+ s->packet_bad_opt = true;
+}
+
+void dns_server_packet_rcode_downgrade(DnsServer *s, DnsServerFeatureLevel level) {
+ assert(s);
+
+ /* Invoked whenever we got a FORMERR, SERVFAIL or NOTIMP rcode from a server and downgrading the feature level
+ * for the transaction made it go away. In this case we immediately downgrade to the feature level that made
+ * things work. */
+
+ if (s->verified_feature_level > level)
+ s->verified_feature_level = level;
+
+ if (s->possible_feature_level > level) {
+ s->possible_feature_level = level;
+ dns_server_reset_counters(s);
+ }
+
+ log_debug("Downgrading transaction feature level fixed an RCODE error, downgrading server %s too.", dns_server_string(s));
+}
+
+static bool dns_server_grace_period_expired(DnsServer *s) {
+ usec_t ts;
+
+ assert(s);
+ assert(s->manager);
+
+ if (s->verified_usec == 0)
+ return false;
+
+ assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &ts) >= 0);
+
+ if (s->verified_usec + s->features_grace_period_usec > ts)
+ return false;
+
+ s->features_grace_period_usec = MIN(s->features_grace_period_usec * 2, DNS_SERVER_FEATURE_GRACE_PERIOD_MAX_USEC);
+
+ return true;
+}
+
+DnsServerFeatureLevel dns_server_possible_feature_level(DnsServer *s) {
+ DnsServerFeatureLevel best;
+
+ assert(s);
+
+ /* Determine the best feature level we care about. If DNSSEC mode is off there's no point in using anything
+ * better than EDNS0, hence don't even try. */
+ if (dns_server_get_dnssec_mode(s) != DNSSEC_NO)
+ best = dns_server_get_dns_over_tls_mode(s) == DNS_OVER_TLS_NO ?
+ DNS_SERVER_FEATURE_LEVEL_LARGE :
+ DNS_SERVER_FEATURE_LEVEL_TLS_DO;
+ else
+ best = dns_server_get_dns_over_tls_mode(s) == DNS_OVER_TLS_NO ?
+ DNS_SERVER_FEATURE_LEVEL_EDNS0 :
+ DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN;
+
+ /* Clamp the feature level the highest level we care about. The DNSSEC mode might have changed since the last
+ * time, hence let's downgrade if we are still at a higher level. */
+ if (s->possible_feature_level > best)
+ s->possible_feature_level = best;
+
+ if (s->possible_feature_level < best && dns_server_grace_period_expired(s)) {
+
+ s->possible_feature_level = best;
+
+ dns_server_reset_counters(s);
+
+ s->packet_bad_opt = false;
+ s->packet_rrsig_missing = false;
+
+ log_info("Grace period over, resuming full feature set (%s) for DNS server %s.",
+ dns_server_feature_level_to_string(s->possible_feature_level),
+ dns_server_string(s));
+
+ dns_server_flush_cache(s);
+
+ } else if (s->possible_feature_level <= s->verified_feature_level)
+ s->possible_feature_level = s->verified_feature_level;
+ else {
+ DnsServerFeatureLevel p = s->possible_feature_level;
+
+ if (s->n_failed_tcp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
+ s->possible_feature_level == DNS_SERVER_FEATURE_LEVEL_TCP) {
+
+ /* We are at the TCP (lowest) level, and we tried a couple of TCP connections, and it didn't
+ * work. Upgrade back to UDP again. */
+ log_debug("Reached maximum number of failed TCP connection attempts, trying UDP again...");
+ s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_UDP;
+ } else if (s->n_failed_tls > 0 &&
+ DNS_SERVER_FEATURE_LEVEL_IS_TLS(s->possible_feature_level)) {
+
+ /* We tried to connect using DNS-over-TLS, and it didn't work. Downgrade to plaintext UDP
+ * if we don't require DNS-over-TLS */
+
+ log_debug("Server doesn't support DNS-over-TLS, downgrading protocol...");
+ s->possible_feature_level--;
+ } else if (s->packet_bad_opt &&
+ s->possible_feature_level >= DNS_SERVER_FEATURE_LEVEL_EDNS0) {
+
+ /* A reply to one of our EDNS0 queries didn't carry a valid OPT RR, then downgrade to below
+ * EDNS0 levels. After all, some records generate different responses with and without OPT RR
+ * in the request. Example:
+ * https://open.nlnetlabs.nl/pipermail/dnssec-trigger/2014-November/000376.html */
+
+ log_debug("Server doesn't support EDNS(0) properly, downgrading feature level...");
+ s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_UDP;
+
+ } else if (s->packet_rrsig_missing &&
+ s->possible_feature_level >= DNS_SERVER_FEATURE_LEVEL_DO) {
+
+ /* RRSIG data was missing on a EDNS0 packet with DO bit set. This means the server doesn't
+ * augment responses with DNSSEC RRs. If so, let's better not ask the server for it anymore,
+ * after all some servers generate different replies depending if an OPT RR is in the query or
+ * not. */
+
+ log_debug("Detected server responses lack RRSIG records, downgrading feature level...");
+ s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_IS_TLS(s->possible_feature_level) ? DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN : DNS_SERVER_FEATURE_LEVEL_EDNS0;
+
+ } else if (s->n_failed_udp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
+ s->possible_feature_level >= (dns_server_get_dnssec_mode(s) == DNSSEC_YES ? DNS_SERVER_FEATURE_LEVEL_LARGE : DNS_SERVER_FEATURE_LEVEL_UDP)) {
+
+ /* We lost too many UDP packets in a row, and are on a feature level of UDP or higher. If the
+ * packets are lost, maybe the server cannot parse them, hence downgrading sounds like a good
+ * idea. We might downgrade all the way down to TCP this way.
+ *
+ * If strict DNSSEC mode is used we won't downgrade below DO level however, as packet loss
+ * might have many reasons, a broken DNSSEC implementation being only one reason. And if the
+ * user is strict on DNSSEC, then let's assume that DNSSEC is not the fault here. */
+
+ log_debug("Lost too many UDP packets, downgrading feature level...");
+ s->possible_feature_level--;
+
+ } else if (s->n_failed_tcp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
+ s->packet_truncated &&
+ s->possible_feature_level > (dns_server_get_dnssec_mode(s) == DNSSEC_YES ? DNS_SERVER_FEATURE_LEVEL_LARGE : DNS_SERVER_FEATURE_LEVEL_UDP)) {
+
+ /* We got too many TCP connection failures in a row, we had at least one truncated packet, and
+ * are on a feature level above UDP. By downgrading things and getting rid of DNSSEC or EDNS0
+ * data we hope to make the packet smaller, so that it still works via UDP given that TCP
+ * appears not to be a fallback. Note that if we are already at the lowest UDP level, we don't
+ * go further down, since that's TCP, and TCP failed too often after all. */
+
+ log_debug("Got too many failed TCP connection failures and truncated UDP packets, downgrading feature level...");
+ s->possible_feature_level--;
+ }
+
+ if (p != s->possible_feature_level) {
+
+ /* We changed the feature level, reset the counting */
+ dns_server_reset_counters(s);
+
+ log_warning("Using degraded feature set (%s) for DNS server %s.",
+ dns_server_feature_level_to_string(s->possible_feature_level),
+ dns_server_string(s));
+ }
+ }
+
+ return s->possible_feature_level;
+}
+
+int dns_server_adjust_opt(DnsServer *server, DnsPacket *packet, DnsServerFeatureLevel level) {
+ size_t packet_size;
+ bool edns_do;
+ int r;
+
+ assert(server);
+ assert(packet);
+ assert(packet->protocol == DNS_PROTOCOL_DNS);
+
+ /* Fix the OPT field in the packet to match our current feature level. */
+
+ r = dns_packet_truncate_opt(packet);
+ if (r < 0)
+ return r;
+
+ if (level < DNS_SERVER_FEATURE_LEVEL_EDNS0)
+ return 0;
+
+ edns_do = level >= DNS_SERVER_FEATURE_LEVEL_DO;
+
+ if (level >= DNS_SERVER_FEATURE_LEVEL_LARGE)
+ packet_size = DNS_PACKET_UNICAST_SIZE_LARGE_MAX;
+ else
+ packet_size = server->received_udp_packet_max;
+
+ return dns_packet_append_opt(packet, packet_size, edns_do, 0, NULL);
+}
+
+int dns_server_ifindex(const DnsServer *s) {
+ assert(s);
+
+ /* The link ifindex always takes precedence */
+ if (s->link)
+ return s->link->ifindex;
+
+ if (s->ifindex > 0)
+ return s->ifindex;
+
+ return 0;
+}
+
+const char *dns_server_string(DnsServer *server) {
+ assert(server);
+
+ if (!server->server_string)
+ (void) in_addr_ifindex_to_string(server->family, &server->address, dns_server_ifindex(server), &server->server_string);
+
+ return strna(server->server_string);
+}
+
+bool dns_server_dnssec_supported(DnsServer *server) {
+ assert(server);
+
+ /* Returns whether the server supports DNSSEC according to what we know about it */
+
+ if (server->possible_feature_level < DNS_SERVER_FEATURE_LEVEL_DO)
+ return false;
+
+ if (server->packet_bad_opt)
+ return false;
+
+ if (server->packet_rrsig_missing)
+ return false;
+
+ /* DNSSEC servers need to support TCP properly (see RFC5966), if they don't, we assume DNSSEC is borked too */
+ if (server->n_failed_tcp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS)
+ return false;
+
+ return true;
+}
+
+void dns_server_warn_downgrade(DnsServer *server) {
+ assert(server);
+
+ if (server->warned_downgrade)
+ return;
+
+ log_struct(LOG_NOTICE,
+ "MESSAGE_ID=" SD_MESSAGE_DNSSEC_DOWNGRADE_STR,
+ LOG_MESSAGE("Server %s does not support DNSSEC, downgrading to non-DNSSEC mode.", dns_server_string(server)),
+ "DNS_SERVER=%s", dns_server_string(server),
+ "DNS_SERVER_FEATURE_LEVEL=%s", dns_server_feature_level_to_string(server->possible_feature_level));
+
+ server->warned_downgrade = true;
+}
+
+static void dns_server_hash_func(const DnsServer *s, struct siphash *state) {
+ assert(s);
+
+ siphash24_compress(&s->family, sizeof(s->family), state);
+ siphash24_compress(&s->address, FAMILY_ADDRESS_SIZE(s->family), state);
+ siphash24_compress(&s->ifindex, sizeof(s->ifindex), state);
+}
+
+static int dns_server_compare_func(const DnsServer *x, const DnsServer *y) {
+ int r;
+
+ r = CMP(x->family, y->family);
+ if (r != 0)
+ return r;
+
+ r = memcmp(&x->address, &y->address, FAMILY_ADDRESS_SIZE(x->family));
+ if (r != 0)
+ return r;
+
+ r = CMP(x->ifindex, y->ifindex);
+ if (r != 0)
+ return r;
+
+ return 0;
+}
+
+DEFINE_HASH_OPS(dns_server_hash_ops, DnsServer, dns_server_hash_func, dns_server_compare_func);
+
+void dns_server_unlink_all(DnsServer *first) {
+ DnsServer *next;
+
+ if (!first)
+ return;
+
+ next = first->servers_next;
+ dns_server_unlink(first);
+
+ dns_server_unlink_all(next);
+}
+
+void dns_server_unlink_marked(DnsServer *first) {
+ DnsServer *next;
+
+ if (!first)
+ return;
+
+ next = first->servers_next;
+
+ if (first->marked)
+ dns_server_unlink(first);
+
+ dns_server_unlink_marked(next);
+}
+
+void dns_server_mark_all(DnsServer *first) {
+ if (!first)
+ return;
+
+ first->marked = true;
+ dns_server_mark_all(first->servers_next);
+}
+
+DnsServer *dns_server_find(DnsServer *first, int family, const union in_addr_union *in_addr, int ifindex) {
+ DnsServer *s;
+
+ LIST_FOREACH(servers, s, first)
+ if (s->family == family && in_addr_equal(family, &s->address, in_addr) > 0 && s->ifindex == ifindex)
+ return s;
+
+ return NULL;
+}
+
+DnsServer *manager_get_first_dns_server(Manager *m, DnsServerType t) {
+ assert(m);
+
+ switch (t) {
+
+ case DNS_SERVER_SYSTEM:
+ return m->dns_servers;
+
+ case DNS_SERVER_FALLBACK:
+ return m->fallback_dns_servers;
+
+ default:
+ return NULL;
+ }
+}
+
+DnsServer *manager_set_dns_server(Manager *m, DnsServer *s) {
+ assert(m);
+
+ if (m->current_dns_server == s)
+ return s;
+
+ if (s)
+ log_debug("Switching to %s DNS server %s.",
+ dns_server_type_to_string(s->type),
+ dns_server_string(s));
+
+ dns_server_unref(m->current_dns_server);
+ m->current_dns_server = dns_server_ref(s);
+
+ if (m->unicast_scope)
+ dns_cache_flush(&m->unicast_scope->cache);
+
+ return s;
+}
+
+DnsServer *manager_get_dns_server(Manager *m) {
+ Link *l;
+ assert(m);
+
+ /* Try to read updates resolv.conf */
+ manager_read_resolv_conf(m);
+
+ /* If no DNS server was chosen so far, pick the first one */
+ if (!m->current_dns_server)
+ manager_set_dns_server(m, m->dns_servers);
+
+ if (!m->current_dns_server) {
+ bool found = false;
+ Iterator i;
+
+ /* No DNS servers configured, let's see if there are
+ * any on any links. If not, we use the fallback
+ * servers */
+
+ HASHMAP_FOREACH(l, m->links, i)
+ if (l->dns_servers) {
+ found = true;
+ break;
+ }
+
+ if (!found)
+ manager_set_dns_server(m, m->fallback_dns_servers);
+ }
+
+ return m->current_dns_server;
+}
+
+void manager_next_dns_server(Manager *m) {
+ assert(m);
+
+ /* If there's currently no DNS server set, then the next
+ * manager_get_dns_server() will find one */
+ if (!m->current_dns_server)
+ return;
+
+ /* Change to the next one, but make sure to follow the linked
+ * list only if the server is still linked. */
+ if (m->current_dns_server->linked && m->current_dns_server->servers_next) {
+ manager_set_dns_server(m, m->current_dns_server->servers_next);
+ return;
+ }
+
+ /* If there was no next one, then start from the beginning of
+ * the list */
+ if (m->current_dns_server->type == DNS_SERVER_FALLBACK)
+ manager_set_dns_server(m, m->fallback_dns_servers);
+ else
+ manager_set_dns_server(m, m->dns_servers);
+}
+
+bool dns_server_address_valid(int family, const union in_addr_union *sa) {
+
+ /* Refuses the 0 IP addresses as well as 127.0.0.53 (which is our own DNS stub) */
+
+ if (in_addr_is_null(family, sa))
+ return false;
+
+ if (family == AF_INET && sa->in.s_addr == htobe32(INADDR_DNS_STUB))
+ return false;
+
+ return true;
+}
+
+DnssecMode dns_server_get_dnssec_mode(DnsServer *s) {
+ assert(s);
+
+ if (s->link)
+ return link_get_dnssec_mode(s->link);
+
+ return manager_get_dnssec_mode(s->manager);
+}
+
+DnsOverTlsMode dns_server_get_dns_over_tls_mode(DnsServer *s) {
+ assert(s);
+
+ if (s->link)
+ return link_get_dns_over_tls_mode(s->link);
+
+ return manager_get_dns_over_tls_mode(s->manager);
+}
+
+void dns_server_flush_cache(DnsServer *s) {
+ DnsServer *current;
+ DnsScope *scope;
+
+ assert(s);
+
+ /* Flush the cache of the scope this server belongs to */
+
+ current = s->link ? s->link->current_dns_server : s->manager->current_dns_server;
+ if (current != s)
+ return;
+
+ scope = s->link ? s->link->unicast_scope : s->manager->unicast_scope;
+ if (!scope)
+ return;
+
+ dns_cache_flush(&scope->cache);
+}
+
+void dns_server_reset_features(DnsServer *s) {
+ assert(s);
+
+ s->verified_feature_level = _DNS_SERVER_FEATURE_LEVEL_INVALID;
+ s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_BEST;
+
+ s->received_udp_packet_max = DNS_PACKET_UNICAST_SIZE_MAX;
+
+ s->packet_bad_opt = false;
+ s->packet_rrsig_missing = false;
+
+ s->features_grace_period_usec = DNS_SERVER_FEATURE_GRACE_PERIOD_MIN_USEC;
+
+ s->warned_downgrade = false;
+
+ dns_server_reset_counters(s);
+
+ /* Let's close the default stream, so that we reprobe with the new features */
+ dns_server_unref_stream(s);
+}
+
+void dns_server_reset_features_all(DnsServer *s) {
+ DnsServer *i;
+
+ LIST_FOREACH(servers, i, s)
+ dns_server_reset_features(i);
+}
+
+void dns_server_dump(DnsServer *s, FILE *f) {
+ assert(s);
+
+ if (!f)
+ f = stdout;
+
+ fputs("[Server ", f);
+ fputs(dns_server_string(s), f);
+ fputs(" type=", f);
+ fputs(dns_server_type_to_string(s->type), f);
+
+ if (s->type == DNS_SERVER_LINK) {
+ assert(s->link);
+
+ fputs(" interface=", f);
+ fputs(s->link->name, f);
+ }
+
+ fputs("]\n", f);
+
+ fputs("\tVerified feature level: ", f);
+ fputs(strna(dns_server_feature_level_to_string(s->verified_feature_level)), f);
+ fputc('\n', f);
+
+ fputs("\tPossible feature level: ", f);
+ fputs(strna(dns_server_feature_level_to_string(s->possible_feature_level)), f);
+ fputc('\n', f);
+
+ fputs("\tDNSSEC Mode: ", f);
+ fputs(strna(dnssec_mode_to_string(dns_server_get_dnssec_mode(s))), f);
+ fputc('\n', f);
+
+ fputs("\tCan do DNSSEC: ", f);
+ fputs(yes_no(dns_server_dnssec_supported(s)), f);
+ fputc('\n', f);
+
+ fprintf(f,
+ "\tMaximum UDP packet size received: %zu\n"
+ "\tFailed UDP attempts: %u\n"
+ "\tFailed TCP attempts: %u\n"
+ "\tSeen truncated packet: %s\n"
+ "\tSeen OPT RR getting lost: %s\n"
+ "\tSeen RRSIG RR missing: %s\n",
+ s->received_udp_packet_max,
+ s->n_failed_udp,
+ s->n_failed_tcp,
+ yes_no(s->packet_truncated),
+ yes_no(s->packet_bad_opt),
+ yes_no(s->packet_rrsig_missing));
+}
+
+void dns_server_unref_stream(DnsServer *s) {
+ DnsStream *ref;
+
+ assert(s);
+
+ /* Detaches the default stream of this server. Some special care needs to be taken here, as that stream and
+ * this server reference each other. First, take the stream out of the server. It's destructor will check if it
+ * is registered with us, hence let's invalidate this separatly, so that it is already unregistered. */
+ ref = TAKE_PTR(s->stream);
+
+ /* And then, unref it */
+ dns_stream_unref(ref);
+}
+
+DnsScope *dns_server_scope(DnsServer *s) {
+ assert(s);
+ assert((s->type == DNS_SERVER_LINK) == !!s->link);
+
+ if (s->link)
+ return s->link->unicast_scope;
+
+ return s->manager->unicast_scope;
+}
+
+static const char* const dns_server_type_table[_DNS_SERVER_TYPE_MAX] = {
+ [DNS_SERVER_SYSTEM] = "system",
+ [DNS_SERVER_FALLBACK] = "fallback",
+ [DNS_SERVER_LINK] = "link",
+};
+DEFINE_STRING_TABLE_LOOKUP(dns_server_type, DnsServerType);
+
+static const char* const dns_server_feature_level_table[_DNS_SERVER_FEATURE_LEVEL_MAX] = {
+ [DNS_SERVER_FEATURE_LEVEL_TCP] = "TCP",
+ [DNS_SERVER_FEATURE_LEVEL_UDP] = "UDP",
+ [DNS_SERVER_FEATURE_LEVEL_EDNS0] = "UDP+EDNS0",
+ [DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN] = "TLS+EDNS0",
+ [DNS_SERVER_FEATURE_LEVEL_DO] = "UDP+EDNS0+DO",
+ [DNS_SERVER_FEATURE_LEVEL_LARGE] = "UDP+EDNS0+DO+LARGE",
+ [DNS_SERVER_FEATURE_LEVEL_TLS_DO] = "TLS+EDNS0+D0",
+};
+DEFINE_STRING_TABLE_LOOKUP(dns_server_feature_level, DnsServerFeatureLevel);
diff --git a/src/resolve/resolved-dns-server.h b/src/resolve/resolved-dns-server.h
new file mode 100644
index 0000000..3c4627b
--- /dev/null
+++ b/src/resolve/resolved-dns-server.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "in-addr-util.h"
+
+typedef struct DnsServer DnsServer;
+
+typedef enum DnsServerType {
+ DNS_SERVER_SYSTEM,
+ DNS_SERVER_FALLBACK,
+ DNS_SERVER_LINK,
+ _DNS_SERVER_TYPE_MAX,
+ _DNS_SERVER_TYPE_INVALID = -1
+} DnsServerType;
+
+const char* dns_server_type_to_string(DnsServerType i) _const_;
+DnsServerType dns_server_type_from_string(const char *s) _pure_;
+
+typedef enum DnsServerFeatureLevel {
+ DNS_SERVER_FEATURE_LEVEL_TCP,
+ DNS_SERVER_FEATURE_LEVEL_UDP,
+ DNS_SERVER_FEATURE_LEVEL_EDNS0,
+ DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN,
+ DNS_SERVER_FEATURE_LEVEL_DO,
+ DNS_SERVER_FEATURE_LEVEL_LARGE,
+ DNS_SERVER_FEATURE_LEVEL_TLS_DO,
+ _DNS_SERVER_FEATURE_LEVEL_MAX,
+ _DNS_SERVER_FEATURE_LEVEL_INVALID = -1
+} DnsServerFeatureLevel;
+
+#define DNS_SERVER_FEATURE_LEVEL_WORST 0
+#define DNS_SERVER_FEATURE_LEVEL_BEST (_DNS_SERVER_FEATURE_LEVEL_MAX - 1)
+#define DNS_SERVER_FEATURE_LEVEL_IS_TLS(x) IN_SET(x, DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN, DNS_SERVER_FEATURE_LEVEL_TLS_DO)
+
+const char* dns_server_feature_level_to_string(int i) _const_;
+int dns_server_feature_level_from_string(const char *s) _pure_;
+
+#include "resolved-link.h"
+#include "resolved-manager.h"
+#if ENABLE_DNS_OVER_TLS
+#include "resolved-dnstls.h"
+#endif
+
+struct DnsServer {
+ Manager *manager;
+
+ unsigned n_ref;
+
+ DnsServerType type;
+ Link *link;
+
+ int family;
+ union in_addr_union address;
+ int ifindex; /* for IPv6 link-local DNS servers */
+
+ char *server_string;
+
+ /* The long-lived stream towards this server. */
+ DnsStream *stream;
+
+#if ENABLE_DNS_OVER_TLS
+ DnsTlsServerData dnstls_data;
+#endif
+
+ DnsServerFeatureLevel verified_feature_level;
+ DnsServerFeatureLevel possible_feature_level;
+
+ size_t received_udp_packet_max;
+
+ unsigned n_failed_udp;
+ unsigned n_failed_tcp;
+ unsigned n_failed_tls;
+
+ bool packet_truncated:1;
+ bool packet_bad_opt:1;
+ bool packet_rrsig_missing:1;
+
+ usec_t verified_usec;
+ usec_t features_grace_period_usec;
+
+ /* Whether we already warned about downgrading to non-DNSSEC mode for this server */
+ bool warned_downgrade:1;
+
+ /* Used when GC'ing old DNS servers when configuration changes. */
+ bool marked:1;
+
+ /* If linked is set, then this server appears in the servers linked list */
+ bool linked:1;
+ LIST_FIELDS(DnsServer, servers);
+};
+
+int dns_server_new(
+ Manager *m,
+ DnsServer **ret,
+ DnsServerType type,
+ Link *link,
+ int family,
+ const union in_addr_union *address,
+ int ifindex);
+
+DnsServer* dns_server_ref(DnsServer *s);
+DnsServer* dns_server_unref(DnsServer *s);
+
+void dns_server_unlink(DnsServer *s);
+void dns_server_move_back_and_unmark(DnsServer *s);
+
+void dns_server_packet_received(DnsServer *s, int protocol, DnsServerFeatureLevel level, size_t size);
+void dns_server_packet_lost(DnsServer *s, int protocol, DnsServerFeatureLevel level);
+void dns_server_packet_truncated(DnsServer *s, DnsServerFeatureLevel level);
+void dns_server_packet_rrsig_missing(DnsServer *s, DnsServerFeatureLevel level);
+void dns_server_packet_bad_opt(DnsServer *s, DnsServerFeatureLevel level);
+void dns_server_packet_rcode_downgrade(DnsServer *s, DnsServerFeatureLevel level);
+
+DnsServerFeatureLevel dns_server_possible_feature_level(DnsServer *s);
+
+int dns_server_adjust_opt(DnsServer *server, DnsPacket *packet, DnsServerFeatureLevel level);
+
+const char *dns_server_string(DnsServer *server);
+int dns_server_ifindex(const DnsServer *s);
+
+bool dns_server_dnssec_supported(DnsServer *server);
+
+void dns_server_warn_downgrade(DnsServer *server);
+
+DnsServer *dns_server_find(DnsServer *first, int family, const union in_addr_union *in_addr, int ifindex);
+
+void dns_server_unlink_all(DnsServer *first);
+void dns_server_unlink_marked(DnsServer *first);
+void dns_server_mark_all(DnsServer *first);
+
+DnsServer *manager_get_first_dns_server(Manager *m, DnsServerType t);
+
+DnsServer *manager_set_dns_server(Manager *m, DnsServer *s);
+DnsServer *manager_get_dns_server(Manager *m);
+void manager_next_dns_server(Manager *m);
+
+bool dns_server_address_valid(int family, const union in_addr_union *sa);
+
+DnssecMode dns_server_get_dnssec_mode(DnsServer *s);
+DnsOverTlsMode dns_server_get_dns_over_tls_mode(DnsServer *s);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsServer*, dns_server_unref);
+
+extern const struct hash_ops dns_server_hash_ops;
+
+void dns_server_flush_cache(DnsServer *s);
+
+void dns_server_reset_features(DnsServer *s);
+void dns_server_reset_features_all(DnsServer *s);
+
+void dns_server_dump(DnsServer *s, FILE *f);
+
+void dns_server_unref_stream(DnsServer *s);
+
+DnsScope *dns_server_scope(DnsServer *s);
diff --git a/src/resolve/resolved-dns-stream.c b/src/resolve/resolved-dns-stream.c
new file mode 100644
index 0000000..aee339a
--- /dev/null
+++ b/src/resolve/resolved-dns-stream.c
@@ -0,0 +1,567 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/tcp.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "missing.h"
+#include "resolved-dns-stream.h"
+
+#define DNS_STREAM_TIMEOUT_USEC (10 * USEC_PER_SEC)
+#define DNS_STREAMS_MAX 128
+
+static void dns_stream_stop(DnsStream *s) {
+ assert(s);
+
+ s->io_event_source = sd_event_source_unref(s->io_event_source);
+ s->timeout_event_source = sd_event_source_unref(s->timeout_event_source);
+ s->fd = safe_close(s->fd);
+
+ /* Disconnect us from the server object if we are now not usable anymore */
+ dns_stream_detach(s);
+}
+
+static int dns_stream_update_io(DnsStream *s) {
+ int f = 0;
+
+ assert(s);
+
+ if (s->write_packet && s->n_written < sizeof(s->write_size) + s->write_packet->size)
+ f |= EPOLLOUT;
+ else if (!ordered_set_isempty(s->write_queue)) {
+ dns_packet_unref(s->write_packet);
+ s->write_packet = ordered_set_steal_first(s->write_queue);
+ s->write_size = htobe16(s->write_packet->size);
+ s->n_written = 0;
+ f |= EPOLLOUT;
+ }
+ if (!s->read_packet || s->n_read < sizeof(s->read_size) + s->read_packet->size)
+ f |= EPOLLIN;
+
+#if ENABLE_DNS_OVER_TLS
+ /* For handshake and clean closing purposes, TLS can override requested events */
+ if (s->dnstls_events)
+ f = s->dnstls_events;
+#endif
+
+ return sd_event_source_set_io_events(s->io_event_source, f);
+}
+
+static int dns_stream_complete(DnsStream *s, int error) {
+ _cleanup_(dns_stream_unrefp) _unused_ DnsStream *ref = dns_stream_ref(s); /* Protect stream while we process it */
+
+ assert(s);
+
+#if ENABLE_DNS_OVER_TLS
+ if (s->encrypted) {
+ int r;
+
+ r = dnstls_stream_shutdown(s, error);
+ if (r != -EAGAIN)
+ dns_stream_stop(s);
+ } else
+#endif
+ dns_stream_stop(s);
+
+ dns_stream_detach(s);
+
+ if (s->complete)
+ s->complete(s, error);
+ else /* the default action if no completion function is set is to close the stream */
+ dns_stream_unref(s);
+
+ return 0;
+}
+
+static int dns_stream_identify(DnsStream *s) {
+ union {
+ struct cmsghdr header; /* For alignment */
+ uint8_t buffer[CMSG_SPACE(MAXSIZE(struct in_pktinfo, struct in6_pktinfo))
+ + EXTRA_CMSG_SPACE /* kernel appears to require extra space */];
+ } control;
+ struct msghdr mh = {};
+ struct cmsghdr *cmsg;
+ socklen_t sl;
+ int r;
+
+ assert(s);
+
+ if (s->identified)
+ return 0;
+
+ /* Query the local side */
+ s->local_salen = sizeof(s->local);
+ r = getsockname(s->fd, &s->local.sa, &s->local_salen);
+ if (r < 0)
+ return -errno;
+ if (s->local.sa.sa_family == AF_INET6 && s->ifindex <= 0)
+ s->ifindex = s->local.in6.sin6_scope_id;
+
+ /* Query the remote side */
+ s->peer_salen = sizeof(s->peer);
+ r = getpeername(s->fd, &s->peer.sa, &s->peer_salen);
+ if (r < 0)
+ return -errno;
+ if (s->peer.sa.sa_family == AF_INET6 && s->ifindex <= 0)
+ s->ifindex = s->peer.in6.sin6_scope_id;
+
+ /* Check consistency */
+ assert(s->peer.sa.sa_family == s->local.sa.sa_family);
+ assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6));
+
+ /* Query connection meta information */
+ sl = sizeof(control);
+ if (s->peer.sa.sa_family == AF_INET) {
+ r = getsockopt(s->fd, IPPROTO_IP, IP_PKTOPTIONS, &control, &sl);
+ if (r < 0)
+ return -errno;
+ } else if (s->peer.sa.sa_family == AF_INET6) {
+
+ r = getsockopt(s->fd, IPPROTO_IPV6, IPV6_2292PKTOPTIONS, &control, &sl);
+ if (r < 0)
+ return -errno;
+ } else
+ return -EAFNOSUPPORT;
+
+ mh.msg_control = &control;
+ mh.msg_controllen = sl;
+
+ CMSG_FOREACH(cmsg, &mh) {
+
+ if (cmsg->cmsg_level == IPPROTO_IPV6) {
+ assert(s->peer.sa.sa_family == AF_INET6);
+
+ switch (cmsg->cmsg_type) {
+
+ case IPV6_PKTINFO: {
+ struct in6_pktinfo *i = (struct in6_pktinfo*) CMSG_DATA(cmsg);
+
+ if (s->ifindex <= 0)
+ s->ifindex = i->ipi6_ifindex;
+ break;
+ }
+
+ case IPV6_HOPLIMIT:
+ s->ttl = *(int *) CMSG_DATA(cmsg);
+ break;
+ }
+
+ } else if (cmsg->cmsg_level == IPPROTO_IP) {
+ assert(s->peer.sa.sa_family == AF_INET);
+
+ switch (cmsg->cmsg_type) {
+
+ case IP_PKTINFO: {
+ struct in_pktinfo *i = (struct in_pktinfo*) CMSG_DATA(cmsg);
+
+ if (s->ifindex <= 0)
+ s->ifindex = i->ipi_ifindex;
+ break;
+ }
+
+ case IP_TTL:
+ s->ttl = *(int *) CMSG_DATA(cmsg);
+ break;
+ }
+ }
+ }
+
+ /* The Linux kernel sets the interface index to the loopback
+ * device if the connection came from the local host since it
+ * avoids the routing table in such a case. Let's unset the
+ * interface index in such a case. */
+ if (s->ifindex == LOOPBACK_IFINDEX)
+ s->ifindex = 0;
+
+ /* If we don't know the interface index still, we look for the
+ * first local interface with a matching address. Yuck! */
+ if (s->ifindex <= 0)
+ s->ifindex = manager_find_ifindex(s->manager, s->local.sa.sa_family, s->local.sa.sa_family == AF_INET ? (union in_addr_union*) &s->local.in.sin_addr : (union in_addr_union*) &s->local.in6.sin6_addr);
+
+ if (s->protocol == DNS_PROTOCOL_LLMNR && s->ifindex > 0) {
+ uint32_t ifindex = htobe32(s->ifindex);
+
+ /* Make sure all packets for this connection are sent on the same interface */
+ if (s->local.sa.sa_family == AF_INET) {
+ r = setsockopt(s->fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex, sizeof(ifindex));
+ if (r < 0)
+ log_debug_errno(errno, "Failed to invoke IP_UNICAST_IF: %m");
+ } else if (s->local.sa.sa_family == AF_INET6) {
+ r = setsockopt(s->fd, IPPROTO_IPV6, IPV6_UNICAST_IF, &ifindex, sizeof(ifindex));
+ if (r < 0)
+ log_debug_errno(errno, "Failed to invoke IPV6_UNICAST_IF: %m");
+ }
+ }
+
+ s->identified = true;
+
+ return 0;
+}
+
+ssize_t dns_stream_writev(DnsStream *s, const struct iovec *iov, size_t iovcnt, int flags) {
+ ssize_t m;
+
+ assert(s);
+ assert(iov);
+
+#if ENABLE_DNS_OVER_TLS
+ if (s->encrypted && !(flags & DNS_STREAM_WRITE_TLS_DATA)) {
+ ssize_t ss;
+ size_t i;
+
+ m = 0;
+ for (i = 0; i < iovcnt; i++) {
+ ss = dnstls_stream_write(s, iov[i].iov_base, iov[i].iov_len);
+ if (ss < 0)
+ return ss;
+
+ m += ss;
+ if (ss != (ssize_t) iov[i].iov_len)
+ continue;
+ }
+ } else
+#endif
+ if (s->tfo_salen > 0) {
+ struct msghdr hdr = {
+ .msg_iov = (struct iovec*) iov,
+ .msg_iovlen = iovcnt,
+ .msg_name = &s->tfo_address.sa,
+ .msg_namelen = s->tfo_salen
+ };
+
+ m = sendmsg(s->fd, &hdr, MSG_FASTOPEN);
+ if (m < 0) {
+ if (errno == EOPNOTSUPP) {
+ s->tfo_salen = 0;
+ if (connect(s->fd, &s->tfo_address.sa, s->tfo_salen) < 0)
+ return -errno;
+
+ return -EAGAIN;
+ }
+ if (errno == EINPROGRESS)
+ return -EAGAIN;
+
+ return -errno;
+ } else
+ s->tfo_salen = 0; /* connection is made */
+ } else {
+ m = writev(s->fd, iov, iovcnt);
+ if (m < 0)
+ return -errno;
+ }
+
+ return m;
+}
+
+static ssize_t dns_stream_read(DnsStream *s, void *buf, size_t count) {
+ ssize_t ss;
+
+#if ENABLE_DNS_OVER_TLS
+ if (s->encrypted)
+ ss = dnstls_stream_read(s, buf, count);
+ else
+#endif
+ {
+ ss = read(s->fd, buf, count);
+ if (ss < 0)
+ return -errno;
+ }
+
+ return ss;
+}
+
+static int on_stream_timeout(sd_event_source *es, usec_t usec, void *userdata) {
+ DnsStream *s = userdata;
+
+ assert(s);
+
+ return dns_stream_complete(s, ETIMEDOUT);
+}
+
+static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(dns_stream_unrefp) DnsStream *s = dns_stream_ref(userdata); /* Protect stream while we process it */
+ int r;
+
+ assert(s);
+
+#if ENABLE_DNS_OVER_TLS
+ if (s->encrypted) {
+ r = dnstls_stream_on_io(s, revents);
+ if (r == DNSTLS_STREAM_CLOSED)
+ return 0;
+ if (r == -EAGAIN)
+ return dns_stream_update_io(s);
+ if (r < 0)
+ return dns_stream_complete(s, -r);
+
+ r = dns_stream_update_io(s);
+ if (r < 0)
+ return r;
+ }
+#endif
+
+ /* only identify after connecting */
+ if (s->tfo_salen == 0) {
+ r = dns_stream_identify(s);
+ if (r < 0)
+ return dns_stream_complete(s, -r);
+ }
+
+ if ((revents & EPOLLOUT) &&
+ s->write_packet &&
+ s->n_written < sizeof(s->write_size) + s->write_packet->size) {
+
+ struct iovec iov[2];
+ ssize_t ss;
+
+ iov[0] = IOVEC_MAKE(&s->write_size, sizeof(s->write_size));
+ iov[1] = IOVEC_MAKE(DNS_PACKET_DATA(s->write_packet), s->write_packet->size);
+
+ IOVEC_INCREMENT(iov, 2, s->n_written);
+
+ ss = dns_stream_writev(s, iov, 2, 0);
+ if (ss < 0) {
+ if (!IN_SET(-ss, EINTR, EAGAIN))
+ return dns_stream_complete(s, -ss);
+ } else
+ s->n_written += ss;
+
+ /* Are we done? If so, disable the event source for EPOLLOUT */
+ if (s->n_written >= sizeof(s->write_size) + s->write_packet->size) {
+ r = dns_stream_update_io(s);
+ if (r < 0)
+ return dns_stream_complete(s, -r);
+ }
+ }
+
+ if ((revents & (EPOLLIN|EPOLLHUP|EPOLLRDHUP)) &&
+ (!s->read_packet ||
+ s->n_read < sizeof(s->read_size) + s->read_packet->size)) {
+
+ if (s->n_read < sizeof(s->read_size)) {
+ ssize_t ss;
+
+ ss = dns_stream_read(s, (uint8_t*) &s->read_size + s->n_read, sizeof(s->read_size) - s->n_read);
+ if (ss < 0) {
+ if (!IN_SET(-ss, EINTR, EAGAIN))
+ return dns_stream_complete(s, -ss);
+ } else if (ss == 0)
+ return dns_stream_complete(s, ECONNRESET);
+ else
+ s->n_read += ss;
+ }
+
+ if (s->n_read >= sizeof(s->read_size)) {
+
+ if (be16toh(s->read_size) < DNS_PACKET_HEADER_SIZE)
+ return dns_stream_complete(s, EBADMSG);
+
+ if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size)) {
+ ssize_t ss;
+
+ if (!s->read_packet) {
+ r = dns_packet_new(&s->read_packet, s->protocol, be16toh(s->read_size), DNS_PACKET_SIZE_MAX);
+ if (r < 0)
+ return dns_stream_complete(s, -r);
+
+ s->read_packet->size = be16toh(s->read_size);
+ s->read_packet->ipproto = IPPROTO_TCP;
+ s->read_packet->family = s->peer.sa.sa_family;
+ s->read_packet->ttl = s->ttl;
+ s->read_packet->ifindex = s->ifindex;
+
+ if (s->read_packet->family == AF_INET) {
+ s->read_packet->sender.in = s->peer.in.sin_addr;
+ s->read_packet->sender_port = be16toh(s->peer.in.sin_port);
+ s->read_packet->destination.in = s->local.in.sin_addr;
+ s->read_packet->destination_port = be16toh(s->local.in.sin_port);
+ } else {
+ assert(s->read_packet->family == AF_INET6);
+ s->read_packet->sender.in6 = s->peer.in6.sin6_addr;
+ s->read_packet->sender_port = be16toh(s->peer.in6.sin6_port);
+ s->read_packet->destination.in6 = s->local.in6.sin6_addr;
+ s->read_packet->destination_port = be16toh(s->local.in6.sin6_port);
+
+ if (s->read_packet->ifindex == 0)
+ s->read_packet->ifindex = s->peer.in6.sin6_scope_id;
+ if (s->read_packet->ifindex == 0)
+ s->read_packet->ifindex = s->local.in6.sin6_scope_id;
+ }
+ }
+
+ ss = dns_stream_read(s,
+ (uint8_t*) DNS_PACKET_DATA(s->read_packet) + s->n_read - sizeof(s->read_size),
+ sizeof(s->read_size) + be16toh(s->read_size) - s->n_read);
+ if (ss < 0) {
+ if (!IN_SET(-ss, EINTR, EAGAIN))
+ return dns_stream_complete(s, -ss);
+ } else if (ss == 0)
+ return dns_stream_complete(s, ECONNRESET);
+ else
+ s->n_read += ss;
+ }
+
+ /* Are we done? If so, disable the event source for EPOLLIN */
+ if (s->n_read >= sizeof(s->read_size) + be16toh(s->read_size)) {
+ /* If there's a packet handler
+ * installed, call that. Note that
+ * this is optional... */
+ if (s->on_packet) {
+ r = s->on_packet(s);
+ if (r < 0)
+ return r;
+ }
+
+ r = dns_stream_update_io(s);
+ if (r < 0)
+ return dns_stream_complete(s, -r);
+ }
+ }
+ }
+
+ if ((s->write_packet && s->n_written >= sizeof(s->write_size) + s->write_packet->size) &&
+ (s->read_packet && s->n_read >= sizeof(s->read_size) + s->read_packet->size))
+ return dns_stream_complete(s, 0);
+
+ return 0;
+}
+
+static DnsStream *dns_stream_free(DnsStream *s) {
+ DnsPacket *p;
+ Iterator i;
+
+ assert(s);
+
+ dns_stream_stop(s);
+
+ if (s->manager) {
+ LIST_REMOVE(streams, s->manager->dns_streams, s);
+ s->manager->n_dns_streams--;
+ }
+
+#if ENABLE_DNS_OVER_TLS
+ if (s->encrypted)
+ dnstls_stream_free(s);
+#endif
+
+ ORDERED_SET_FOREACH(p, s->write_queue, i)
+ dns_packet_unref(ordered_set_remove(s->write_queue, p));
+
+ dns_packet_unref(s->write_packet);
+ dns_packet_unref(s->read_packet);
+ dns_server_unref(s->server);
+
+ ordered_set_free(s->write_queue);
+
+ return mfree(s);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsStream, dns_stream, dns_stream_free);
+
+int dns_stream_new(
+ Manager *m,
+ DnsStream **ret,
+ DnsProtocol protocol,
+ int fd,
+ const union sockaddr_union *tfo_address) {
+
+ _cleanup_(dns_stream_unrefp) DnsStream *s = NULL;
+ int r;
+
+ assert(m);
+ assert(ret);
+ assert(fd >= 0);
+
+ if (m->n_dns_streams > DNS_STREAMS_MAX)
+ return -EBUSY;
+
+ s = new(DnsStream, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (DnsStream) {
+ .n_ref = 1,
+ .fd = -1,
+ .protocol = protocol,
+ };
+
+ r = ordered_set_ensure_allocated(&s->write_queue, &dns_packet_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_io(m->event, &s->io_event_source, fd, EPOLLIN, on_stream_io, s);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s->io_event_source, "dns-stream-io");
+
+ r = sd_event_add_time(
+ m->event,
+ &s->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ now(clock_boottime_or_monotonic()) + DNS_STREAM_TIMEOUT_USEC, 0,
+ on_stream_timeout, s);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s->timeout_event_source, "dns-stream-timeout");
+
+ LIST_PREPEND(streams, m->dns_streams, s);
+ m->n_dns_streams++;
+ s->manager = m;
+
+ s->fd = fd;
+
+ if (tfo_address) {
+ s->tfo_address = *tfo_address;
+ s->tfo_salen = tfo_address->sa.sa_family == AF_INET6 ? sizeof(tfo_address->in6) : sizeof(tfo_address->in);
+ }
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+int dns_stream_write_packet(DnsStream *s, DnsPacket *p) {
+ int r;
+
+ assert(s);
+ assert(p);
+
+ r = ordered_set_put(s->write_queue, p);
+ if (r < 0)
+ return r;
+
+ dns_packet_ref(p);
+
+ return dns_stream_update_io(s);
+}
+
+DnsPacket *dns_stream_take_read_packet(DnsStream *s) {
+ assert(s);
+
+ if (!s->read_packet)
+ return NULL;
+
+ if (s->n_read < sizeof(s->read_size))
+ return NULL;
+
+ if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size))
+ return NULL;
+
+ s->n_read = 0;
+ return TAKE_PTR(s->read_packet);
+}
+
+void dns_stream_detach(DnsStream *s) {
+ assert(s);
+
+ if (!s->server)
+ return;
+
+ if (s->server->stream != s)
+ return;
+
+ dns_server_unref_stream(s->server);
+}
diff --git a/src/resolve/resolved-dns-stream.h b/src/resolve/resolved-dns-stream.h
new file mode 100644
index 0000000..f18fc91
--- /dev/null
+++ b/src/resolve/resolved-dns-stream.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "socket-util.h"
+
+typedef struct DnsStream DnsStream;
+
+#include "resolved-dns-packet.h"
+#include "resolved-dns-transaction.h"
+#include "resolved-manager.h"
+#if ENABLE_DNS_OVER_TLS
+#include "resolved-dnstls.h"
+#endif
+
+#define DNS_STREAM_WRITE_TLS_DATA 1
+
+/* Streams are used by three subsystems:
+ *
+ * 1. The normal transaction logic when doing a DNS or LLMNR lookup via TCP
+ * 2. The LLMNR logic when accepting a TCP-based lookup
+ * 3. The DNS stub logic when accepting a TCP-based lookup
+ */
+
+struct DnsStream {
+ Manager *manager;
+ unsigned n_ref;
+
+ DnsProtocol protocol;
+
+ int fd;
+ union sockaddr_union peer;
+ socklen_t peer_salen;
+ union sockaddr_union local;
+ socklen_t local_salen;
+ int ifindex;
+ uint32_t ttl;
+ bool identified;
+
+ /* only when using TCP fast open */
+ union sockaddr_union tfo_address;
+ socklen_t tfo_salen;
+
+#if ENABLE_DNS_OVER_TLS
+ DnsTlsStreamData dnstls_data;
+ int dnstls_events;
+#endif
+
+ sd_event_source *io_event_source;
+ sd_event_source *timeout_event_source;
+
+ be16_t write_size, read_size;
+ DnsPacket *write_packet, *read_packet;
+ size_t n_written, n_read;
+ OrderedSet *write_queue;
+
+ int (*on_packet)(DnsStream *s);
+ int (*complete)(DnsStream *s, int error);
+
+ LIST_HEAD(DnsTransaction, transactions); /* when used by the transaction logic */
+ DnsServer *server; /* when used by the transaction logic */
+ DnsQuery *query; /* when used by the DNS stub logic */
+
+ /* used when DNS-over-TLS is enabled */
+ bool encrypted:1;
+
+ LIST_FIELDS(DnsStream, streams);
+};
+
+int dns_stream_new(Manager *m, DnsStream **s, DnsProtocol protocol, int fd, const union sockaddr_union *tfo_address);
+#if ENABLE_DNS_OVER_TLS
+int dns_stream_connect_tls(DnsStream *s, void *tls_session);
+#endif
+DnsStream *dns_stream_unref(DnsStream *s);
+DnsStream *dns_stream_ref(DnsStream *s);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsStream*, dns_stream_unref);
+
+int dns_stream_write_packet(DnsStream *s, DnsPacket *p);
+ssize_t dns_stream_writev(DnsStream *s, const struct iovec *iov, size_t iovcnt, int flags);
+
+static inline bool DNS_STREAM_QUEUED(DnsStream *s) {
+ assert(s);
+
+ if (s->fd < 0) /* already stopped? */
+ return false;
+
+ return !!s->write_packet;
+}
+
+DnsPacket *dns_stream_take_read_packet(DnsStream *s);
+
+void dns_stream_detach(DnsStream *s);
diff --git a/src/resolve/resolved-dns-stub.c b/src/resolve/resolved-dns-stub.c
new file mode 100644
index 0000000..a00716c
--- /dev/null
+++ b/src/resolve/resolved-dns-stub.c
@@ -0,0 +1,586 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fd-util.h"
+#include "missing_network.h"
+#include "resolved-dns-stub.h"
+#include "socket-util.h"
+
+/* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet,
+ * IP and UDP header sizes */
+#define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U)
+
+static int manager_dns_stub_udp_fd(Manager *m);
+static int manager_dns_stub_tcp_fd(Manager *m);
+
+static int dns_stub_make_reply_packet(
+ DnsPacket **p,
+ size_t max_size,
+ DnsQuestion *q,
+ DnsAnswer *answer,
+ bool *ret_truncated) {
+
+ bool truncated = false;
+ DnsResourceRecord *rr;
+ unsigned c = 0;
+ int r;
+
+ assert(p);
+
+ /* Note that we don't bother with any additional RRs, as this is stub is for local lookups only, and hence
+ * roundtrips aren't expensive. */
+
+ if (!*p) {
+ r = dns_packet_new(p, DNS_PROTOCOL_DNS, 0, max_size);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_append_question(*p, q);
+ if (r < 0)
+ return r;
+
+ DNS_PACKET_HEADER(*p)->qdcount = htobe16(dns_question_size(q));
+ }
+
+ DNS_ANSWER_FOREACH(rr, answer) {
+
+ r = dns_question_matches_rr(q, rr, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto add;
+
+ r = dns_question_matches_cname_or_dname(q, rr, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto add;
+
+ continue;
+ add:
+ r = dns_packet_append_rr(*p, rr, 0, NULL, NULL);
+ if (r == -EMSGSIZE) {
+ truncated = true;
+ break;
+ }
+ if (r < 0)
+ return r;
+
+ c++;
+ }
+
+ if (ret_truncated)
+ *ret_truncated = truncated;
+ else if (truncated)
+ return -EMSGSIZE;
+
+ DNS_PACKET_HEADER(*p)->ancount = htobe16(be16toh(DNS_PACKET_HEADER(*p)->ancount) + c);
+
+ return 0;
+}
+
+static int dns_stub_finish_reply_packet(
+ DnsPacket *p,
+ uint16_t id,
+ int rcode,
+ bool tc, /* set the Truncated bit? */
+ bool add_opt, /* add an OPT RR to this packet? */
+ bool edns0_do, /* set the EDNS0 DNSSEC OK bit? */
+ bool ad) { /* set the DNSSEC authenticated data bit? */
+
+ int r;
+
+ assert(p);
+
+ if (!add_opt) {
+ /* If the client can't to EDNS0, don't do DO either */
+ edns0_do = false;
+
+ /* If the client didn't do EDNS, clamp the rcode to 4 bit */
+ if (rcode > 0xF)
+ rcode = DNS_RCODE_SERVFAIL;
+ }
+
+ /* Don't set the AD bit unless DO is on, too */
+ if (!edns0_do)
+ ad = false;
+
+ DNS_PACKET_HEADER(p)->id = id;
+
+ DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
+ 1 /* qr */,
+ 0 /* opcode */,
+ 0 /* aa */,
+ tc /* tc */,
+ 1 /* rd */,
+ 1 /* ra */,
+ ad /* ad */,
+ 0 /* cd */,
+ rcode));
+
+ if (add_opt) {
+ r = dns_packet_append_opt(p, ADVERTISE_DATAGRAM_SIZE_MAX, edns0_do, rcode, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static void dns_stub_detach_stream(DnsStream *s) {
+ assert(s);
+
+ s->complete = NULL;
+ s->on_packet = NULL;
+ s->query = NULL;
+}
+
+static int dns_stub_send(Manager *m, DnsStream *s, DnsPacket *p, DnsPacket *reply) {
+ int r;
+
+ assert(m);
+ assert(p);
+ assert(reply);
+
+ if (s)
+ r = dns_stream_write_packet(s, reply);
+ else {
+ int fd;
+
+ fd = manager_dns_stub_udp_fd(m);
+ if (fd < 0)
+ return log_debug_errno(fd, "Failed to get reply socket: %m");
+
+ /* Note that it is essential here that we explicitly choose the source IP address for this packet. This
+ * is because otherwise the kernel will choose it automatically based on the routing table and will
+ * thus pick 127.0.0.1 rather than 127.0.0.53. */
+
+ r = manager_send(m, fd, LOOPBACK_IFINDEX, p->family, &p->sender, p->sender_port, &p->destination, reply);
+ }
+ if (r < 0)
+ return log_debug_errno(r, "Failed to send reply packet: %m");
+
+ return 0;
+}
+
+static int dns_stub_send_failure(Manager *m, DnsStream *s, DnsPacket *p, int rcode, bool authenticated) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
+ int r;
+
+ assert(m);
+ assert(p);
+
+ r = dns_stub_make_reply_packet(&reply, DNS_PACKET_PAYLOAD_SIZE_MAX(p), p->question, NULL, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to make failure packet: %m");
+
+ r = dns_stub_finish_reply_packet(reply, DNS_PACKET_ID(p), rcode, false, !!p->opt, DNS_PACKET_DO(p), authenticated);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to build failure packet: %m");
+
+ return dns_stub_send(m, s, p, reply);
+}
+
+static void dns_stub_query_complete(DnsQuery *q) {
+ int r;
+
+ assert(q);
+ assert(q->request_dns_packet);
+
+ switch (q->state) {
+
+ case DNS_TRANSACTION_SUCCESS: {
+ bool truncated;
+
+ r = dns_stub_make_reply_packet(&q->reply_dns_packet, DNS_PACKET_PAYLOAD_SIZE_MAX(q->request_dns_packet), q->question_idna, q->answer, &truncated);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to build reply packet: %m");
+ break;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == -ELOOP) {
+ (void) dns_stub_send_failure(q->manager, q->request_dns_stream, q->request_dns_packet, DNS_RCODE_SERVFAIL, false);
+ break;
+ }
+ if (r < 0) {
+ log_debug_errno(r, "Failed to process CNAME: %m");
+ break;
+ }
+ if (r == DNS_QUERY_RESTARTED)
+ return;
+
+ r = dns_stub_finish_reply_packet(
+ q->reply_dns_packet,
+ DNS_PACKET_ID(q->request_dns_packet),
+ q->answer_rcode,
+ truncated,
+ !!q->request_dns_packet->opt,
+ DNS_PACKET_DO(q->request_dns_packet),
+ dns_query_fully_authenticated(q));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to finish reply packet: %m");
+ break;
+ }
+
+ (void) dns_stub_send(q->manager, q->request_dns_stream, q->request_dns_packet, q->reply_dns_packet);
+ break;
+ }
+
+ case DNS_TRANSACTION_RCODE_FAILURE:
+ (void) dns_stub_send_failure(q->manager, q->request_dns_stream, q->request_dns_packet, q->answer_rcode, dns_query_fully_authenticated(q));
+ break;
+
+ case DNS_TRANSACTION_NOT_FOUND:
+ (void) dns_stub_send_failure(q->manager, q->request_dns_stream, q->request_dns_packet, DNS_RCODE_NXDOMAIN, dns_query_fully_authenticated(q));
+ break;
+
+ case DNS_TRANSACTION_TIMEOUT:
+ case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
+ /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */
+ break;
+
+ case DNS_TRANSACTION_NO_SERVERS:
+ case DNS_TRANSACTION_INVALID_REPLY:
+ case DNS_TRANSACTION_ERRNO:
+ case DNS_TRANSACTION_ABORTED:
+ case DNS_TRANSACTION_DNSSEC_FAILED:
+ case DNS_TRANSACTION_NO_TRUST_ANCHOR:
+ case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
+ case DNS_TRANSACTION_NETWORK_DOWN:
+ (void) dns_stub_send_failure(q->manager, q->request_dns_stream, q->request_dns_packet, DNS_RCODE_SERVFAIL, false);
+ break;
+
+ case DNS_TRANSACTION_NULL:
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ default:
+ assert_not_reached("Impossible state");
+ }
+
+ /* If there's a packet to write set, let's leave the stream around */
+ if (q->request_dns_stream && DNS_STREAM_QUEUED(q->request_dns_stream)) {
+
+ /* Detach the stream from our query (make it an orphan), but do not drop the reference to it. The
+ * default completion action of the stream will drop the reference. */
+
+ dns_stub_detach_stream(q->request_dns_stream);
+ q->request_dns_stream = NULL;
+ }
+
+ dns_query_free(q);
+}
+
+static int dns_stub_stream_complete(DnsStream *s, int error) {
+ assert(s);
+
+ log_debug_errno(error, "DNS TCP connection terminated, destroying query: %m");
+
+ assert(s->query);
+ dns_query_free(s->query);
+
+ return 0;
+}
+
+static void dns_stub_process_query(Manager *m, DnsStream *s, DnsPacket *p) {
+ DnsQuery *q = NULL;
+ int r;
+
+ assert(m);
+ assert(p);
+ assert(p->protocol == DNS_PROTOCOL_DNS);
+
+ /* Takes ownership of the *s stream object */
+
+ if (in_addr_is_localhost(p->family, &p->sender) <= 0 ||
+ in_addr_is_localhost(p->family, &p->destination) <= 0) {
+ log_error("Got packet on unexpected IP range, refusing.");
+ dns_stub_send_failure(m, s, p, DNS_RCODE_SERVFAIL, false);
+ goto fail;
+ }
+
+ r = dns_packet_extract(p);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m");
+ dns_stub_send_failure(m, s, p, DNS_RCODE_FORMERR, false);
+ goto fail;
+ }
+
+ if (!DNS_PACKET_VERSION_SUPPORTED(p)) {
+ log_debug("Got EDNS OPT field with unsupported version number.");
+ dns_stub_send_failure(m, s, p, DNS_RCODE_BADVERS, false);
+ goto fail;
+ }
+
+ if (dns_type_is_obsolete(p->question->keys[0]->type)) {
+ log_debug("Got message with obsolete key type, refusing.");
+ dns_stub_send_failure(m, s, p, DNS_RCODE_NOTIMP, false);
+ goto fail;
+ }
+
+ if (dns_type_is_zone_transer(p->question->keys[0]->type)) {
+ log_debug("Got request for zone transfer, refusing.");
+ dns_stub_send_failure(m, s, p, DNS_RCODE_NOTIMP, false);
+ goto fail;
+ }
+
+ if (!DNS_PACKET_RD(p)) {
+ /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */
+ log_debug("Got request with recursion disabled, refusing.");
+ dns_stub_send_failure(m, s, p, DNS_RCODE_REFUSED, false);
+ goto fail;
+ }
+
+ if (DNS_PACKET_DO(p) && DNS_PACKET_CD(p)) {
+ log_debug("Got request with DNSSEC CD bit set, refusing.");
+ dns_stub_send_failure(m, s, p, DNS_RCODE_NOTIMP, false);
+ goto fail;
+ }
+
+ r = dns_query_new(m, &q, p->question, p->question, 0, SD_RESOLVED_PROTOCOLS_ALL|SD_RESOLVED_NO_SEARCH);
+ if (r < 0) {
+ log_error_errno(r, "Failed to generate query object: %m");
+ dns_stub_send_failure(m, s, p, DNS_RCODE_SERVFAIL, false);
+ goto fail;
+ }
+
+ /* Request that the TTL is corrected by the cached time for this lookup, so that we return vaguely useful TTLs */
+ q->clamp_ttl = true;
+
+ q->request_dns_packet = dns_packet_ref(p);
+ q->request_dns_stream = dns_stream_ref(s); /* make sure the stream stays around until we can send a reply through it */
+ q->complete = dns_stub_query_complete;
+
+ if (s) {
+ s->on_packet = NULL;
+ s->complete = dns_stub_stream_complete;
+ s->query = q;
+ }
+
+ r = dns_query_go(q);
+ if (r < 0) {
+ log_error_errno(r, "Failed to start query: %m");
+ dns_stub_send_failure(m, s, p, DNS_RCODE_SERVFAIL, false);
+ goto fail;
+ }
+
+ log_debug("Processing query...");
+ return;
+
+fail:
+ if (s && DNS_STREAM_QUEUED(s))
+ dns_stub_detach_stream(s);
+
+ dns_query_free(q);
+}
+
+static int on_dns_stub_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ Manager *m = userdata;
+ int r;
+
+ r = manager_recv(m, fd, DNS_PROTOCOL_DNS, &p);
+ if (r <= 0)
+ return r;
+
+ if (dns_packet_validate_query(p) > 0) {
+ log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p));
+
+ dns_stub_process_query(m, NULL, p);
+ } else
+ log_debug("Invalid DNS stub UDP packet, ignoring.");
+
+ return 0;
+}
+
+static int manager_dns_stub_udp_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(53),
+ .in.sin_addr.s_addr = htobe32(INADDR_DNS_STUB),
+ };
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ if (m->dns_stub_udp_fd >= 0)
+ return m->dns_stub_udp_fd;
+
+ fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(fd, IPPROTO_IP, IP_PKTINFO, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(fd, IPPROTO_IP, IP_RECVTTL, true);
+ if (r < 0)
+ return r;
+
+ /* Make sure no traffic from outside the local host can leak to onto this socket */
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, "lo", 3) < 0)
+ return -errno;
+
+ if (bind(fd, &sa.sa, sizeof(sa.in)) < 0)
+ return -errno;
+
+ r = sd_event_add_io(m->event, &m->dns_stub_udp_event_source, fd, EPOLLIN, on_dns_stub_packet, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->dns_stub_udp_event_source, "dns-stub-udp");
+
+ return m->dns_stub_udp_fd = TAKE_FD(fd);
+}
+
+static int on_dns_stub_stream_packet(DnsStream *s) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+
+ assert(s);
+
+ p = dns_stream_take_read_packet(s);
+ assert(p);
+
+ if (dns_packet_validate_query(p) > 0) {
+ log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p));
+
+ dns_stub_process_query(s->manager, s, p);
+ } else
+ log_debug("Invalid DNS stub TCP packet, ignoring.");
+
+ /* Drop the reference to the stream. Either a query was created and added its own reference to the stream now,
+ * or that didn't happen in which case we want to free the stream */
+ dns_stream_unref(s);
+
+ return 0;
+}
+
+static int on_dns_stub_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ DnsStream *stream;
+ Manager *m = userdata;
+ int cfd, r;
+
+ cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (cfd < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return -errno;
+ }
+
+ r = dns_stream_new(m, &stream, DNS_PROTOCOL_DNS, cfd, NULL);
+ if (r < 0) {
+ safe_close(cfd);
+ return r;
+ }
+
+ stream->on_packet = on_dns_stub_stream_packet;
+
+ /* We let the reference to the stream dangling here, it will either be dropped by the default "complete" action
+ * of the stream, or by our packet callback, or when the manager is shut down. */
+
+ return 0;
+}
+
+static int manager_dns_stub_tcp_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in.sin_family = AF_INET,
+ .in.sin_addr.s_addr = htobe32(INADDR_DNS_STUB),
+ .in.sin_port = htobe16(53),
+ };
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ if (m->dns_stub_tcp_fd >= 0)
+ return m->dns_stub_tcp_fd;
+
+ fd = socket(AF_INET, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ r = setsockopt_int(fd, IPPROTO_IP, IP_TTL, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(fd, IPPROTO_IP, IP_PKTINFO, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(fd, IPPROTO_IP, IP_RECVTTL, true);
+ if (r < 0)
+ return r;
+
+ /* Make sure no traffic from outside the local host can leak to onto this socket */
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, "lo", 3) < 0)
+ return -errno;
+
+ if (bind(fd, &sa.sa, sizeof(sa.in)) < 0)
+ return -errno;
+
+ if (listen(fd, SOMAXCONN) < 0)
+ return -errno;
+
+ r = sd_event_add_io(m->event, &m->dns_stub_tcp_event_source, fd, EPOLLIN, on_dns_stub_stream, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->dns_stub_tcp_event_source, "dns-stub-tcp");
+
+ return m->dns_stub_tcp_fd = TAKE_FD(fd);
+}
+
+int manager_dns_stub_start(Manager *m) {
+ const char *t = "UDP";
+ int r = 0;
+
+ assert(m);
+
+ if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_NO)
+ log_debug("Not creating stub listener.");
+ else
+ log_debug("Creating stub listener using %s.",
+ m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP ? "UDP" :
+ m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP ? "TCP" :
+ "UDP/TCP");
+
+ if (IN_SET(m->dns_stub_listener_mode, DNS_STUB_LISTENER_YES, DNS_STUB_LISTENER_UDP))
+ r = manager_dns_stub_udp_fd(m);
+
+ if (r >= 0 &&
+ IN_SET(m->dns_stub_listener_mode, DNS_STUB_LISTENER_YES, DNS_STUB_LISTENER_TCP)) {
+ t = "TCP";
+ r = manager_dns_stub_tcp_fd(m);
+ }
+
+ if (IN_SET(r, -EADDRINUSE, -EPERM)) {
+ if (r == -EADDRINUSE)
+ log_warning_errno(r,
+ "Another process is already listening on %s socket 127.0.0.53:53.\n"
+ "Turning off local DNS stub support.", t);
+ else
+ log_warning_errno(r,
+ "Failed to listen on %s socket 127.0.0.53:53: %m.\n"
+ "Turning off local DNS stub support.", t);
+ manager_dns_stub_stop(m);
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to listen on %s socket 127.0.0.53:53: %m", t);
+
+ return 0;
+}
+
+void manager_dns_stub_stop(Manager *m) {
+ assert(m);
+
+ m->dns_stub_udp_event_source = sd_event_source_unref(m->dns_stub_udp_event_source);
+ m->dns_stub_tcp_event_source = sd_event_source_unref(m->dns_stub_tcp_event_source);
+
+ m->dns_stub_udp_fd = safe_close(m->dns_stub_udp_fd);
+ m->dns_stub_tcp_fd = safe_close(m->dns_stub_tcp_fd);
+}
diff --git a/src/resolve/resolved-dns-stub.h b/src/resolve/resolved-dns-stub.h
new file mode 100644
index 0000000..f34e9db
--- /dev/null
+++ b/src/resolve/resolved-dns-stub.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "resolved-manager.h"
+
+void manager_dns_stub_stop(Manager *m);
+int manager_dns_stub_start(Manager *m);
diff --git a/src/resolve/resolved-dns-synthesize.c b/src/resolve/resolved-dns-synthesize.c
new file mode 100644
index 0000000..f65116c
--- /dev/null
+++ b/src/resolve/resolved-dns-synthesize.c
@@ -0,0 +1,434 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "hostname-util.h"
+#include "local-addresses.h"
+#include "missing_network.h"
+#include "resolved-dns-synthesize.h"
+
+int dns_synthesize_ifindex(int ifindex) {
+
+ /* When the caller asked for resolving on a specific
+ * interface, we synthesize the answer for that
+ * interface. However, if nothing specific was claimed and we
+ * only return localhost RRs, we synthesize the answer for
+ * localhost. */
+
+ if (ifindex > 0)
+ return ifindex;
+
+ return LOOPBACK_IFINDEX;
+}
+
+int dns_synthesize_family(uint64_t flags) {
+
+ /* Picks an address family depending on set flags. This is
+ * purely for synthesized answers, where the family we return
+ * for the reply should match what was requested in the
+ * question, even though we are synthesizing the answer
+ * here. */
+
+ if (!(flags & SD_RESOLVED_DNS)) {
+ if (flags & (SD_RESOLVED_LLMNR_IPV4|SD_RESOLVED_MDNS_IPV4))
+ return AF_INET;
+ if (flags & (SD_RESOLVED_LLMNR_IPV6|SD_RESOLVED_MDNS_IPV6))
+ return AF_INET6;
+ }
+
+ return AF_UNSPEC;
+}
+
+DnsProtocol dns_synthesize_protocol(uint64_t flags) {
+
+ /* Similar as dns_synthesize_family() but does this for the
+ * protocol. If resolving via DNS was requested, we claim it
+ * was DNS. Similar, if nothing specific was
+ * requested. However, if only resolving via LLMNR was
+ * requested we return that. */
+
+ if (flags & SD_RESOLVED_DNS)
+ return DNS_PROTOCOL_DNS;
+ if (flags & SD_RESOLVED_LLMNR)
+ return DNS_PROTOCOL_LLMNR;
+ if (flags & SD_RESOLVED_MDNS)
+ return DNS_PROTOCOL_MDNS;
+
+ return DNS_PROTOCOL_DNS;
+}
+
+static int synthesize_localhost_rr(Manager *m, const DnsResourceKey *key, int ifindex, DnsAnswer **answer) {
+ int r;
+
+ assert(m);
+ assert(key);
+ assert(answer);
+
+ r = dns_answer_reserve(answer, 2);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(key->type, DNS_TYPE_A, DNS_TYPE_ANY)) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_A, dns_resource_key_name(key));
+ if (!rr)
+ return -ENOMEM;
+
+ rr->a.in_addr.s_addr = htobe32(INADDR_LOOPBACK);
+
+ r = dns_answer_add(*answer, rr, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+
+ if (IN_SET(key->type, DNS_TYPE_AAAA, DNS_TYPE_ANY)) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_AAAA, dns_resource_key_name(key));
+ if (!rr)
+ return -ENOMEM;
+
+ rr->aaaa.in6_addr = in6addr_loopback;
+
+ r = dns_answer_add(*answer, rr, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int answer_add_ptr(DnsAnswer **answer, const char *from, const char *to, int ifindex, DnsAnswerFlags flags) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_PTR, from);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->ptr.name = strdup(to);
+ if (!rr->ptr.name)
+ return -ENOMEM;
+
+ return dns_answer_add(*answer, rr, ifindex, flags);
+}
+
+static int synthesize_localhost_ptr(Manager *m, const DnsResourceKey *key, int ifindex, DnsAnswer **answer) {
+ int r;
+
+ assert(m);
+ assert(key);
+ assert(answer);
+
+ if (IN_SET(key->type, DNS_TYPE_PTR, DNS_TYPE_ANY)) {
+ r = dns_answer_reserve(answer, 1);
+ if (r < 0)
+ return r;
+
+ r = answer_add_ptr(answer, dns_resource_key_name(key), "localhost", dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int answer_add_addresses_rr(
+ DnsAnswer **answer,
+ const char *name,
+ struct local_address *addresses,
+ unsigned n_addresses) {
+
+ unsigned j;
+ int r;
+
+ assert(answer);
+ assert(name);
+
+ r = dns_answer_reserve(answer, n_addresses);
+ if (r < 0)
+ return r;
+
+ for (j = 0; j < n_addresses; j++) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ r = dns_resource_record_new_address(&rr, addresses[j].family, &addresses[j].address, name);
+ if (r < 0)
+ return r;
+
+ r = dns_answer_add(*answer, rr, addresses[j].ifindex, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int answer_add_addresses_ptr(
+ DnsAnswer **answer,
+ const char *name,
+ struct local_address *addresses,
+ unsigned n_addresses,
+ int af, const union in_addr_union *match) {
+
+ bool added = false;
+ unsigned j;
+ int r;
+
+ assert(answer);
+ assert(name);
+
+ for (j = 0; j < n_addresses; j++) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ if (af != AF_UNSPEC) {
+
+ if (addresses[j].family != af)
+ continue;
+
+ if (match && !in_addr_equal(af, match, &addresses[j].address))
+ continue;
+ }
+
+ r = dns_answer_reserve(answer, 1);
+ if (r < 0)
+ return r;
+
+ r = dns_resource_record_new_reverse(&rr, addresses[j].family, &addresses[j].address, name);
+ if (r < 0)
+ return r;
+
+ r = dns_answer_add(*answer, rr, addresses[j].ifindex, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ added = true;
+ }
+
+ return added;
+}
+
+static int synthesize_system_hostname_rr(Manager *m, const DnsResourceKey *key, int ifindex, DnsAnswer **answer) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ int n = 0, af;
+
+ assert(m);
+ assert(key);
+ assert(answer);
+
+ af = dns_type_to_af(key->type);
+ if (af >= 0) {
+ n = local_addresses(m->rtnl, ifindex, af, &addresses);
+ if (n < 0)
+ return n;
+
+ if (n == 0) {
+ struct local_address buffer[2];
+
+ /* If we have no local addresses then use ::1
+ * and 127.0.0.2 as local ones. */
+
+ if (IN_SET(af, AF_INET, AF_UNSPEC))
+ buffer[n++] = (struct local_address) {
+ .family = AF_INET,
+ .ifindex = dns_synthesize_ifindex(ifindex),
+ .address.in.s_addr = htobe32(0x7F000002),
+ };
+
+ if (IN_SET(af, AF_INET6, AF_UNSPEC))
+ buffer[n++] = (struct local_address) {
+ .family = AF_INET6,
+ .ifindex = dns_synthesize_ifindex(ifindex),
+ .address.in6 = in6addr_loopback,
+ };
+
+ return answer_add_addresses_rr(answer,
+ dns_resource_key_name(key),
+ buffer, n);
+ }
+ }
+
+ return answer_add_addresses_rr(answer, dns_resource_key_name(key), addresses, n);
+}
+
+static int synthesize_system_hostname_ptr(Manager *m, int af, const union in_addr_union *address, int ifindex, DnsAnswer **answer) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ bool added = false;
+ int n, r;
+
+ assert(m);
+ assert(address);
+ assert(answer);
+
+ if (af == AF_INET && address->in.s_addr == htobe32(0x7F000002)) {
+
+ /* Always map the IPv4 address 127.0.0.2 to the local hostname, in addition to "localhost": */
+
+ r = dns_answer_reserve(answer, 4);
+ if (r < 0)
+ return r;
+
+ r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", m->full_hostname, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", m->llmnr_hostname, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", m->mdns_hostname, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", "localhost", dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ n = local_addresses(m->rtnl, ifindex, af, &addresses);
+ if (n <= 0)
+ return n;
+
+ r = answer_add_addresses_ptr(answer, m->full_hostname, addresses, n, af, address);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ added = true;
+
+ r = answer_add_addresses_ptr(answer, m->llmnr_hostname, addresses, n, af, address);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ added = true;
+
+ r = answer_add_addresses_ptr(answer, m->mdns_hostname, addresses, n, af, address);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ added = true;
+
+ return added;
+}
+
+static int synthesize_gateway_rr(Manager *m, const DnsResourceKey *key, int ifindex, DnsAnswer **answer) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ int n = 0, af, r;
+
+ assert(m);
+ assert(key);
+ assert(answer);
+
+ af = dns_type_to_af(key->type);
+ if (af >= 0) {
+ n = local_gateways(m->rtnl, ifindex, af, &addresses);
+ if (n <= 0)
+ return n; /* < 0 means: error; == 0 means we have no gateway */
+ }
+
+ r = answer_add_addresses_rr(answer, dns_resource_key_name(key), addresses, n);
+ if (r < 0)
+ return r;
+
+ return 1; /* > 0 means: we have some gateway */
+}
+
+static int synthesize_gateway_ptr(Manager *m, int af, const union in_addr_union *address, int ifindex, DnsAnswer **answer) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ int n;
+
+ assert(m);
+ assert(address);
+ assert(answer);
+
+ n = local_gateways(m->rtnl, ifindex, af, &addresses);
+ if (n <= 0)
+ return n;
+
+ return answer_add_addresses_ptr(answer, "_gateway", addresses, n, af, address);
+}
+
+int dns_synthesize_answer(
+ Manager *m,
+ DnsQuestion *q,
+ int ifindex,
+ DnsAnswer **ret) {
+
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnsResourceKey *key;
+ bool found = false, nxdomain = false;
+ int r;
+
+ assert(m);
+ assert(q);
+
+ DNS_QUESTION_FOREACH(key, q) {
+ union in_addr_union address;
+ const char *name;
+ int af;
+
+ if (!IN_SET(key->class, DNS_CLASS_IN, DNS_CLASS_ANY))
+ continue;
+
+ name = dns_resource_key_name(key);
+
+ if (is_localhost(name)) {
+
+ r = synthesize_localhost_rr(m, key, ifindex, &answer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to synthesize localhost RRs: %m");
+
+ } else if (manager_is_own_hostname(m, name)) {
+
+ r = synthesize_system_hostname_rr(m, key, ifindex, &answer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to synthesize system hostname RRs: %m");
+
+ } else if (is_gateway_hostname(name)) {
+
+ r = synthesize_gateway_rr(m, key, ifindex, &answer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to synthesize gateway RRs: %m");
+ if (r == 0) { /* if we have no gateway return NXDOMAIN */
+ nxdomain = true;
+ continue;
+ }
+
+ } else if ((dns_name_endswith(name, "127.in-addr.arpa") > 0 && dns_name_equal(name, "2.0.0.127.in-addr.arpa") == 0) ||
+ dns_name_equal(name, "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa") > 0) {
+
+ r = synthesize_localhost_ptr(m, key, ifindex, &answer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to synthesize localhost PTR RRs: %m");
+
+ } else if (dns_name_address(name, &af, &address) > 0) {
+ int v, w;
+
+ v = synthesize_system_hostname_ptr(m, af, &address, ifindex, &answer);
+ if (v < 0)
+ return log_error_errno(v, "Failed to synthesize system hostname PTR RR: %m");
+
+ w = synthesize_gateway_ptr(m, af, &address, ifindex, &answer);
+ if (w < 0)
+ return log_error_errno(w, "Failed to synthesize gateway hostname PTR RR: %m");
+
+ if (v == 0 && w == 0) /* This IP address is neither a local one nor a gateway */
+ continue;
+
+ } else
+ continue;
+
+ found = true;
+ }
+
+ if (found) {
+
+ if (ret)
+ *ret = TAKE_PTR(answer);
+
+ return 1;
+ } else if (nxdomain)
+ return -ENXIO;
+
+ return 0;
+}
diff --git a/src/resolve/resolved-dns-synthesize.h b/src/resolve/resolved-dns-synthesize.h
new file mode 100644
index 0000000..0d9d17b
--- /dev/null
+++ b/src/resolve/resolved-dns-synthesize.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "resolved-dns-answer.h"
+#include "resolved-dns-question.h"
+#include "resolved-manager.h"
+
+int dns_synthesize_ifindex(int ifindex);
+int dns_synthesize_family(uint64_t flags);
+DnsProtocol dns_synthesize_protocol(uint64_t flags);
+
+int dns_synthesize_answer(Manager *m, DnsQuestion *q, int ifindex, DnsAnswer **ret);
diff --git a/src/resolve/resolved-dns-transaction.c b/src/resolve/resolved-dns-transaction.c
new file mode 100644
index 0000000..4a2d2cc
--- /dev/null
+++ b/src/resolve/resolved-dns-transaction.c
@@ -0,0 +1,3277 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-messages.h"
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "errno-list.h"
+#include "fd-util.h"
+#include "random-util.h"
+#include "resolved-dns-cache.h"
+#include "resolved-dns-transaction.h"
+#include "resolved-llmnr.h"
+#if ENABLE_DNS_OVER_TLS
+#include "resolved-dnstls.h"
+#endif
+#include "string-table.h"
+
+#define TRANSACTIONS_MAX 4096
+#define TRANSACTION_TCP_TIMEOUT_USEC (10U*USEC_PER_SEC)
+
+/* After how much time to repeat classic DNS requests */
+#define DNS_TIMEOUT_USEC (SD_RESOLVED_QUERY_TIMEOUT_USEC / DNS_TRANSACTION_ATTEMPTS_MAX)
+
+static void dns_transaction_reset_answer(DnsTransaction *t) {
+ assert(t);
+
+ t->received = dns_packet_unref(t->received);
+ t->answer = dns_answer_unref(t->answer);
+ t->answer_rcode = 0;
+ t->answer_dnssec_result = _DNSSEC_RESULT_INVALID;
+ t->answer_source = _DNS_TRANSACTION_SOURCE_INVALID;
+ t->answer_authenticated = false;
+ t->answer_nsec_ttl = (uint32_t) -1;
+ t->answer_errno = 0;
+}
+
+static void dns_transaction_flush_dnssec_transactions(DnsTransaction *t) {
+ DnsTransaction *z;
+
+ assert(t);
+
+ while ((z = set_steal_first(t->dnssec_transactions))) {
+ set_remove(z->notify_transactions, t);
+ set_remove(z->notify_transactions_done, t);
+ dns_transaction_gc(z);
+ }
+}
+
+static void dns_transaction_close_connection(DnsTransaction *t) {
+ assert(t);
+
+ if (t->stream) {
+ /* Let's detach the stream from our transaction, in case something else keeps a reference to it. */
+ LIST_REMOVE(transactions_by_stream, t->stream->transactions, t);
+
+ /* Remove packet in case it's still in the queue */
+ dns_packet_unref(ordered_set_remove(t->stream->write_queue, t->sent));
+
+ t->stream = dns_stream_unref(t->stream);
+ }
+
+ t->dns_udp_event_source = sd_event_source_unref(t->dns_udp_event_source);
+ t->dns_udp_fd = safe_close(t->dns_udp_fd);
+}
+
+static void dns_transaction_stop_timeout(DnsTransaction *t) {
+ assert(t);
+
+ t->timeout_event_source = sd_event_source_unref(t->timeout_event_source);
+}
+
+DnsTransaction* dns_transaction_free(DnsTransaction *t) {
+ DnsQueryCandidate *c;
+ DnsZoneItem *i;
+ DnsTransaction *z;
+
+ if (!t)
+ return NULL;
+
+ log_debug("Freeing transaction %" PRIu16 ".", t->id);
+
+ dns_transaction_close_connection(t);
+ dns_transaction_stop_timeout(t);
+
+ dns_packet_unref(t->sent);
+ dns_transaction_reset_answer(t);
+
+ dns_server_unref(t->server);
+
+ if (t->scope) {
+ hashmap_remove_value(t->scope->transactions_by_key, t->key, t);
+ LIST_REMOVE(transactions_by_scope, t->scope->transactions, t);
+
+ if (t->id != 0)
+ hashmap_remove(t->scope->manager->dns_transactions, UINT_TO_PTR(t->id));
+ }
+
+ while ((c = set_steal_first(t->notify_query_candidates)))
+ set_remove(c->transactions, t);
+ set_free(t->notify_query_candidates);
+
+ while ((c = set_steal_first(t->notify_query_candidates_done)))
+ set_remove(c->transactions, t);
+ set_free(t->notify_query_candidates_done);
+
+ while ((i = set_steal_first(t->notify_zone_items)))
+ i->probe_transaction = NULL;
+ set_free(t->notify_zone_items);
+
+ while ((i = set_steal_first(t->notify_zone_items_done)))
+ i->probe_transaction = NULL;
+ set_free(t->notify_zone_items_done);
+
+ while ((z = set_steal_first(t->notify_transactions)))
+ set_remove(z->dnssec_transactions, t);
+ set_free(t->notify_transactions);
+
+ while ((z = set_steal_first(t->notify_transactions_done)))
+ set_remove(z->dnssec_transactions, t);
+ set_free(t->notify_transactions_done);
+
+ dns_transaction_flush_dnssec_transactions(t);
+ set_free(t->dnssec_transactions);
+
+ dns_answer_unref(t->validated_keys);
+ dns_resource_key_unref(t->key);
+
+ return mfree(t);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsTransaction*, dns_transaction_free);
+
+bool dns_transaction_gc(DnsTransaction *t) {
+ assert(t);
+
+ if (t->block_gc > 0)
+ return true;
+
+ if (set_isempty(t->notify_query_candidates) &&
+ set_isempty(t->notify_query_candidates_done) &&
+ set_isempty(t->notify_zone_items) &&
+ set_isempty(t->notify_zone_items_done) &&
+ set_isempty(t->notify_transactions) &&
+ set_isempty(t->notify_transactions_done)) {
+ dns_transaction_free(t);
+ return false;
+ }
+
+ return true;
+}
+
+static uint16_t pick_new_id(Manager *m) {
+ uint16_t new_id;
+
+ /* Find a fresh, unused transaction id. Note that this loop is bounded because there's a limit on the number of
+ * transactions, and it's much lower than the space of IDs. */
+
+ assert_cc(TRANSACTIONS_MAX < 0xFFFF);
+
+ do
+ random_bytes(&new_id, sizeof(new_id));
+ while (new_id == 0 ||
+ hashmap_get(m->dns_transactions, UINT_TO_PTR(new_id)));
+
+ return new_id;
+}
+
+int dns_transaction_new(DnsTransaction **ret, DnsScope *s, DnsResourceKey *key) {
+ _cleanup_(dns_transaction_freep) DnsTransaction *t = NULL;
+ int r;
+
+ assert(ret);
+ assert(s);
+ assert(key);
+
+ /* Don't allow looking up invalid or pseudo RRs */
+ if (!dns_type_is_valid_query(key->type))
+ return -EINVAL;
+ if (dns_type_is_obsolete(key->type))
+ return -EOPNOTSUPP;
+
+ /* We only support the IN class */
+ if (!IN_SET(key->class, DNS_CLASS_IN, DNS_CLASS_ANY))
+ return -EOPNOTSUPP;
+
+ if (hashmap_size(s->manager->dns_transactions) >= TRANSACTIONS_MAX)
+ return -EBUSY;
+
+ r = hashmap_ensure_allocated(&s->manager->dns_transactions, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&s->transactions_by_key, &dns_resource_key_hash_ops);
+ if (r < 0)
+ return r;
+
+ t = new0(DnsTransaction, 1);
+ if (!t)
+ return -ENOMEM;
+
+ t->dns_udp_fd = -1;
+ t->answer_source = _DNS_TRANSACTION_SOURCE_INVALID;
+ t->answer_dnssec_result = _DNSSEC_RESULT_INVALID;
+ t->answer_nsec_ttl = (uint32_t) -1;
+ t->key = dns_resource_key_ref(key);
+ t->current_feature_level = _DNS_SERVER_FEATURE_LEVEL_INVALID;
+ t->clamp_feature_level = _DNS_SERVER_FEATURE_LEVEL_INVALID;
+
+ t->id = pick_new_id(s->manager);
+
+ r = hashmap_put(s->manager->dns_transactions, UINT_TO_PTR(t->id), t);
+ if (r < 0) {
+ t->id = 0;
+ return r;
+ }
+
+ r = hashmap_replace(s->transactions_by_key, t->key, t);
+ if (r < 0) {
+ hashmap_remove(s->manager->dns_transactions, UINT_TO_PTR(t->id));
+ return r;
+ }
+
+ LIST_PREPEND(transactions_by_scope, s->transactions, t);
+ t->scope = s;
+
+ s->manager->n_transactions_total++;
+
+ if (ret)
+ *ret = t;
+
+ t = NULL;
+
+ return 0;
+}
+
+static void dns_transaction_shuffle_id(DnsTransaction *t) {
+ uint16_t new_id;
+ assert(t);
+
+ /* Pick a new ID for this transaction. */
+
+ new_id = pick_new_id(t->scope->manager);
+ assert_se(hashmap_remove_and_put(t->scope->manager->dns_transactions, UINT_TO_PTR(t->id), UINT_TO_PTR(new_id), t) >= 0);
+
+ log_debug("Transaction %" PRIu16 " is now %" PRIu16 ".", t->id, new_id);
+ t->id = new_id;
+
+ /* Make sure we generate a new packet with the new ID */
+ t->sent = dns_packet_unref(t->sent);
+}
+
+static void dns_transaction_tentative(DnsTransaction *t, DnsPacket *p) {
+ _cleanup_free_ char *pretty = NULL;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+ DnsZoneItem *z;
+
+ assert(t);
+ assert(p);
+
+ if (manager_our_packet(t->scope->manager, p) != 0)
+ return;
+
+ (void) in_addr_to_string(p->family, &p->sender, &pretty);
+
+ log_debug("Transaction %" PRIu16 " for <%s> on scope %s on %s/%s got tentative packet from %s.",
+ t->id,
+ dns_resource_key_to_string(t->key, key_str, sizeof key_str),
+ dns_protocol_to_string(t->scope->protocol),
+ t->scope->link ? t->scope->link->name : "*",
+ af_to_name_short(t->scope->family),
+ strnull(pretty));
+
+ /* RFC 4795, Section 4.1 says that the peer with the
+ * lexicographically smaller IP address loses */
+ if (memcmp(&p->sender, &p->destination, FAMILY_ADDRESS_SIZE(p->family)) >= 0) {
+ log_debug("Peer has lexicographically larger IP address and thus lost in the conflict.");
+ return;
+ }
+
+ log_debug("We have the lexicographically larger IP address and thus lost in the conflict.");
+
+ t->block_gc++;
+
+ while ((z = set_first(t->notify_zone_items))) {
+ /* First, make sure the zone item drops the reference
+ * to us */
+ dns_zone_item_probe_stop(z);
+
+ /* Secondly, report this as conflict, so that we might
+ * look for a different hostname */
+ dns_zone_item_conflict(z);
+ }
+ t->block_gc--;
+
+ dns_transaction_gc(t);
+}
+
+void dns_transaction_complete(DnsTransaction *t, DnsTransactionState state) {
+ DnsQueryCandidate *c;
+ DnsZoneItem *z;
+ DnsTransaction *d;
+ const char *st;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ assert(t);
+ assert(!DNS_TRANSACTION_IS_LIVE(state));
+
+ if (state == DNS_TRANSACTION_DNSSEC_FAILED) {
+ dns_resource_key_to_string(t->key, key_str, sizeof key_str);
+
+ log_struct(LOG_NOTICE,
+ "MESSAGE_ID=" SD_MESSAGE_DNSSEC_FAILURE_STR,
+ LOG_MESSAGE("DNSSEC validation failed for question %s: %s", key_str, dnssec_result_to_string(t->answer_dnssec_result)),
+ "DNS_TRANSACTION=%" PRIu16, t->id,
+ "DNS_QUESTION=%s", key_str,
+ "DNSSEC_RESULT=%s", dnssec_result_to_string(t->answer_dnssec_result),
+ "DNS_SERVER=%s", dns_server_string(t->server),
+ "DNS_SERVER_FEATURE_LEVEL=%s", dns_server_feature_level_to_string(t->server->possible_feature_level));
+ }
+
+ /* Note that this call might invalidate the query. Callers
+ * should hence not attempt to access the query or transaction
+ * after calling this function. */
+
+ if (state == DNS_TRANSACTION_ERRNO)
+ st = errno_to_name(t->answer_errno);
+ else
+ st = dns_transaction_state_to_string(state);
+
+ log_debug("Transaction %" PRIu16 " for <%s> on scope %s on %s/%s now complete with <%s> from %s (%s).",
+ t->id,
+ dns_resource_key_to_string(t->key, key_str, sizeof key_str),
+ dns_protocol_to_string(t->scope->protocol),
+ t->scope->link ? t->scope->link->name : "*",
+ af_to_name_short(t->scope->family),
+ st,
+ t->answer_source < 0 ? "none" : dns_transaction_source_to_string(t->answer_source),
+ t->answer_authenticated ? "authenticated" : "unsigned");
+
+ t->state = state;
+
+ dns_transaction_close_connection(t);
+ dns_transaction_stop_timeout(t);
+
+ /* Notify all queries that are interested, but make sure the
+ * transaction isn't freed while we are still looking at it */
+ t->block_gc++;
+
+ SET_FOREACH_MOVE(c, t->notify_query_candidates_done, t->notify_query_candidates)
+ dns_query_candidate_notify(c);
+ SWAP_TWO(t->notify_query_candidates, t->notify_query_candidates_done);
+
+ SET_FOREACH_MOVE(z, t->notify_zone_items_done, t->notify_zone_items)
+ dns_zone_item_notify(z);
+ SWAP_TWO(t->notify_zone_items, t->notify_zone_items_done);
+ if (t->probing && t->state == DNS_TRANSACTION_ATTEMPTS_MAX_REACHED)
+ (void) dns_scope_announce(t->scope, false);
+
+ SET_FOREACH_MOVE(d, t->notify_transactions_done, t->notify_transactions)
+ dns_transaction_notify(d, t);
+ SWAP_TWO(t->notify_transactions, t->notify_transactions_done);
+
+ t->block_gc--;
+ dns_transaction_gc(t);
+}
+
+static int dns_transaction_pick_server(DnsTransaction *t) {
+ DnsServer *server;
+
+ assert(t);
+ assert(t->scope->protocol == DNS_PROTOCOL_DNS);
+
+ /* Pick a DNS server and a feature level for it. */
+
+ server = dns_scope_get_dns_server(t->scope);
+ if (!server)
+ return -ESRCH;
+
+ /* If we changed the server invalidate the feature level clamping, as the new server might have completely
+ * different properties. */
+ if (server != t->server)
+ t->clamp_feature_level = _DNS_SERVER_FEATURE_LEVEL_INVALID;
+
+ t->current_feature_level = dns_server_possible_feature_level(server);
+
+ /* Clamp the feature level if that is requested. */
+ if (t->clamp_feature_level != _DNS_SERVER_FEATURE_LEVEL_INVALID &&
+ t->current_feature_level > t->clamp_feature_level)
+ t->current_feature_level = t->clamp_feature_level;
+
+ log_debug("Using feature level %s for transaction %u.", dns_server_feature_level_to_string(t->current_feature_level), t->id);
+
+ if (server == t->server)
+ return 0;
+
+ dns_server_unref(t->server);
+ t->server = dns_server_ref(server);
+
+ t->n_picked_servers ++;
+
+ log_debug("Using DNS server %s for transaction %u.", dns_server_string(t->server), t->id);
+
+ return 1;
+}
+
+static void dns_transaction_retry(DnsTransaction *t, bool next_server) {
+ int r;
+
+ assert(t);
+
+ log_debug("Retrying transaction %" PRIu16 ".", t->id);
+
+ /* Before we try again, switch to a new server. */
+ if (next_server)
+ dns_scope_next_dns_server(t->scope);
+
+ r = dns_transaction_go(t);
+ if (r < 0) {
+ t->answer_errno = -r;
+ dns_transaction_complete(t, DNS_TRANSACTION_ERRNO);
+ }
+}
+
+static int dns_transaction_maybe_restart(DnsTransaction *t) {
+ int r;
+
+ assert(t);
+
+ /* Returns > 0 if the transaction was restarted, 0 if not */
+
+ if (!t->server)
+ return 0;
+
+ if (t->current_feature_level <= dns_server_possible_feature_level(t->server))
+ return 0;
+
+ /* The server's current feature level is lower than when we sent the original query. We learnt something from
+ the response or possibly an auxiliary DNSSEC response that we didn't know before. We take that as reason to
+ restart the whole transaction. This is a good idea to deal with servers that respond rubbish if we include
+ OPT RR or DO bit. One of these cases is documented here, for example:
+ https://open.nlnetlabs.nl/pipermail/dnssec-trigger/2014-November/000376.html */
+
+ log_debug("Server feature level is now lower than when we began our transaction. Restarting with new ID.");
+ dns_transaction_shuffle_id(t);
+
+ r = dns_transaction_go(t);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static void on_transaction_stream_error(DnsTransaction *t, int error) {
+ assert(t);
+
+ dns_transaction_close_connection(t);
+
+ if (ERRNO_IS_DISCONNECT(error)) {
+ if (t->scope->protocol == DNS_PROTOCOL_LLMNR) {
+ /* If the LLMNR/TCP connection failed, the host doesn't support LLMNR, and we cannot answer the
+ * question on this scope. */
+ dns_transaction_complete(t, DNS_TRANSACTION_NOT_FOUND);
+ return;
+ }
+
+ dns_transaction_retry(t, true);
+ return;
+ }
+ if (error != 0) {
+ t->answer_errno = error;
+ dns_transaction_complete(t, DNS_TRANSACTION_ERRNO);
+ }
+}
+
+static int dns_transaction_on_stream_packet(DnsTransaction *t, DnsPacket *p) {
+ assert(t);
+ assert(p);
+
+ dns_transaction_close_connection(t);
+
+ if (dns_packet_validate_reply(p) <= 0) {
+ log_debug("Invalid TCP reply packet.");
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return 0;
+ }
+
+ dns_scope_check_conflicts(t->scope, p);
+
+ t->block_gc++;
+ dns_transaction_process_reply(t, p);
+ t->block_gc--;
+
+ /* If the response wasn't useful, then complete the transition
+ * now. After all, we are the worst feature set now with TCP
+ * sockets, and there's really no point in retrying. */
+ if (t->state == DNS_TRANSACTION_PENDING)
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ else
+ dns_transaction_gc(t);
+
+ return 0;
+}
+
+static int on_stream_complete(DnsStream *s, int error) {
+ assert(s);
+
+ if (ERRNO_IS_DISCONNECT(error) && s->protocol != DNS_PROTOCOL_LLMNR) {
+ log_debug_errno(error, "Connection failure for DNS TCP stream: %m");
+
+ if (s->transactions) {
+ DnsTransaction *t;
+
+ t = s->transactions;
+ dns_server_packet_lost(t->server, IPPROTO_TCP, t->current_feature_level);
+ }
+ }
+
+ if (error != 0) {
+ DnsTransaction *t, *n;
+
+ LIST_FOREACH_SAFE(transactions_by_stream, t, n, s->transactions)
+ on_transaction_stream_error(t, error);
+ }
+
+ return 0;
+}
+
+static int on_stream_packet(DnsStream *s) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ DnsTransaction *t;
+
+ assert(s);
+
+ /* Take ownership of packet to be able to receive new packets */
+ p = dns_stream_take_read_packet(s);
+ assert(p);
+
+ t = hashmap_get(s->manager->dns_transactions, UINT_TO_PTR(DNS_PACKET_ID(p)));
+ if (t)
+ return dns_transaction_on_stream_packet(t, p);
+
+ /* Ignore incorrect transaction id as transaction can have been canceled */
+ if (dns_packet_validate_reply(p) <= 0) {
+ log_debug("Invalid TCP reply packet.");
+ on_stream_complete(s, 0);
+ }
+
+ return 0;
+}
+
+static uint16_t dns_port_for_feature_level(DnsServerFeatureLevel level) {
+ return DNS_SERVER_FEATURE_LEVEL_IS_TLS(level) ? 853 : 53;
+}
+
+static int dns_transaction_emit_tcp(DnsTransaction *t) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_(dns_stream_unrefp) DnsStream *s = NULL;
+ union sockaddr_union sa;
+ int r;
+
+ assert(t);
+
+ dns_transaction_close_connection(t);
+
+ switch (t->scope->protocol) {
+
+ case DNS_PROTOCOL_DNS:
+ r = dns_transaction_pick_server(t);
+ if (r < 0)
+ return r;
+
+ if (!dns_server_dnssec_supported(t->server) && dns_type_is_dnssec(t->key->type))
+ return -EOPNOTSUPP;
+
+ r = dns_server_adjust_opt(t->server, t->sent, t->current_feature_level);
+ if (r < 0)
+ return r;
+
+ if (t->server->stream && (DNS_SERVER_FEATURE_LEVEL_IS_TLS(t->current_feature_level) == t->server->stream->encrypted))
+ s = dns_stream_ref(t->server->stream);
+ else
+ fd = dns_scope_socket_tcp(t->scope, AF_UNSPEC, NULL, t->server, dns_port_for_feature_level(t->current_feature_level), &sa);
+
+ break;
+
+ case DNS_PROTOCOL_LLMNR:
+ /* When we already received a reply to this (but it was truncated), send to its sender address */
+ if (t->received)
+ fd = dns_scope_socket_tcp(t->scope, t->received->family, &t->received->sender, NULL, t->received->sender_port, &sa);
+ else {
+ union in_addr_union address;
+ int family = AF_UNSPEC;
+
+ /* Otherwise, try to talk to the owner of a
+ * the IP address, in case this is a reverse
+ * PTR lookup */
+
+ r = dns_name_address(dns_resource_key_name(t->key), &family, &address);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+ if (family != t->scope->family)
+ return -ESRCH;
+
+ fd = dns_scope_socket_tcp(t->scope, family, &address, NULL, LLMNR_PORT, &sa);
+ }
+
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ if (!s) {
+ if (fd < 0)
+ return fd;
+
+ r = dns_stream_new(t->scope->manager, &s, t->scope->protocol, fd, &sa);
+ if (r < 0)
+ return r;
+
+ fd = -1;
+
+#if ENABLE_DNS_OVER_TLS
+ if (t->scope->protocol == DNS_PROTOCOL_DNS &&
+ DNS_SERVER_FEATURE_LEVEL_IS_TLS(t->current_feature_level)) {
+
+ assert(t->server);
+ r = dnstls_stream_connect_tls(s, t->server);
+ if (r < 0)
+ return r;
+ }
+#endif
+
+ if (t->server) {
+ dns_server_unref_stream(t->server);
+ t->server->stream = dns_stream_ref(s);
+ s->server = dns_server_ref(t->server);
+ }
+
+ s->complete = on_stream_complete;
+ s->on_packet = on_stream_packet;
+
+ /* The interface index is difficult to determine if we are
+ * connecting to the local host, hence fill this in right away
+ * instead of determining it from the socket */
+ s->ifindex = dns_scope_ifindex(t->scope);
+ }
+
+ t->stream = TAKE_PTR(s);
+ LIST_PREPEND(transactions_by_stream, t->stream->transactions, t);
+
+ r = dns_stream_write_packet(t->stream, t->sent);
+ if (r < 0) {
+ dns_transaction_close_connection(t);
+ return r;
+ }
+
+ dns_transaction_reset_answer(t);
+
+ t->tried_stream = true;
+
+ return 0;
+}
+
+static void dns_transaction_cache_answer(DnsTransaction *t) {
+ assert(t);
+
+ /* For mDNS we cache whenever we get the packet, rather than
+ * in each transaction. */
+ if (!IN_SET(t->scope->protocol, DNS_PROTOCOL_DNS, DNS_PROTOCOL_LLMNR))
+ return;
+
+ /* Caching disabled? */
+ if (!t->scope->manager->enable_cache)
+ return;
+
+ /* We never cache if this packet is from the local host, under
+ * the assumption that a locally running DNS server would
+ * cache this anyway, and probably knows better when to flush
+ * the cache then we could. */
+ if (!DNS_PACKET_SHALL_CACHE(t->received))
+ return;
+
+ dns_cache_put(&t->scope->cache,
+ t->key,
+ t->answer_rcode,
+ t->answer,
+ t->answer_authenticated,
+ t->answer_nsec_ttl,
+ 0,
+ t->received->family,
+ &t->received->sender);
+}
+
+static bool dns_transaction_dnssec_is_live(DnsTransaction *t) {
+ DnsTransaction *dt;
+ Iterator i;
+
+ assert(t);
+
+ SET_FOREACH(dt, t->dnssec_transactions, i)
+ if (DNS_TRANSACTION_IS_LIVE(dt->state))
+ return true;
+
+ return false;
+}
+
+static int dns_transaction_dnssec_ready(DnsTransaction *t) {
+ DnsTransaction *dt;
+ Iterator i;
+
+ assert(t);
+
+ /* Checks whether the auxiliary DNSSEC transactions of our transaction have completed, or are still
+ * ongoing. Returns 0, if we aren't ready for the DNSSEC validation, positive if we are. */
+
+ SET_FOREACH(dt, t->dnssec_transactions, i) {
+
+ switch (dt->state) {
+
+ case DNS_TRANSACTION_NULL:
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ /* Still ongoing */
+ return 0;
+
+ case DNS_TRANSACTION_RCODE_FAILURE:
+ if (!IN_SET(dt->answer_rcode, DNS_RCODE_NXDOMAIN, DNS_RCODE_SERVFAIL)) {
+ log_debug("Auxiliary DNSSEC RR query failed with rcode=%s.", dns_rcode_to_string(dt->answer_rcode));
+ goto fail;
+ }
+
+ /* Fall-through: NXDOMAIN/SERVFAIL is good enough for us. This is because some DNS servers
+ * erronously return NXDOMAIN/SERVFAIL for empty non-terminals (Akamai...) or missing DS
+ * records (Facebook), and we need to handle that nicely, when asking for parent SOA or similar
+ * RRs to make unsigned proofs. */
+
+ case DNS_TRANSACTION_SUCCESS:
+ /* All good. */
+ break;
+
+ case DNS_TRANSACTION_DNSSEC_FAILED:
+ /* We handle DNSSEC failures different from other errors, as we care about the DNSSEC
+ * validationr result */
+
+ log_debug("Auxiliary DNSSEC RR query failed validation: %s", dnssec_result_to_string(dt->answer_dnssec_result));
+ t->answer_dnssec_result = dt->answer_dnssec_result; /* Copy error code over */
+ dns_transaction_complete(t, DNS_TRANSACTION_DNSSEC_FAILED);
+ return 0;
+
+ default:
+ log_debug("Auxiliary DNSSEC RR query failed with %s", dns_transaction_state_to_string(dt->state));
+ goto fail;
+ }
+ }
+
+ /* All is ready, we can go and validate */
+ return 1;
+
+fail:
+ t->answer_dnssec_result = DNSSEC_FAILED_AUXILIARY;
+ dns_transaction_complete(t, DNS_TRANSACTION_DNSSEC_FAILED);
+ return 0;
+}
+
+static void dns_transaction_process_dnssec(DnsTransaction *t) {
+ int r;
+
+ assert(t);
+
+ /* Are there ongoing DNSSEC transactions? If so, let's wait for them. */
+ r = dns_transaction_dnssec_ready(t);
+ if (r < 0)
+ goto fail;
+ if (r == 0) /* We aren't ready yet (or one of our auxiliary transactions failed, and we shouldn't validate now */
+ return;
+
+ /* See if we learnt things from the additional DNSSEC transactions, that we didn't know before, and better
+ * restart the lookup immediately. */
+ r = dns_transaction_maybe_restart(t);
+ if (r < 0)
+ goto fail;
+ if (r > 0) /* Transaction got restarted... */
+ return;
+
+ /* All our auxiliary DNSSEC transactions are complete now. Try
+ * to validate our RRset now. */
+ r = dns_transaction_validate_dnssec(t);
+ if (r == -EBADMSG) {
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return;
+ }
+ if (r < 0)
+ goto fail;
+
+ if (t->answer_dnssec_result == DNSSEC_INCOMPATIBLE_SERVER &&
+ t->scope->dnssec_mode == DNSSEC_YES) {
+
+ /* We are not in automatic downgrade mode, and the server is bad. Let's try a different server, maybe
+ * that works. */
+
+ if (t->n_picked_servers < dns_scope_get_n_dns_servers(t->scope)) {
+ /* We tried fewer servers on this transaction than we know, let's try another one then */
+ dns_transaction_retry(t, true);
+ return;
+ }
+
+ /* OK, let's give up, apparently all servers we tried didn't work. */
+ dns_transaction_complete(t, DNS_TRANSACTION_DNSSEC_FAILED);
+ return;
+ }
+
+ if (!IN_SET(t->answer_dnssec_result,
+ _DNSSEC_RESULT_INVALID, /* No DNSSEC validation enabled */
+ DNSSEC_VALIDATED, /* Answer is signed and validated successfully */
+ DNSSEC_UNSIGNED, /* Answer is right-fully unsigned */
+ DNSSEC_INCOMPATIBLE_SERVER)) { /* Server does not do DNSSEC (Yay, we are downgrade attack vulnerable!) */
+ dns_transaction_complete(t, DNS_TRANSACTION_DNSSEC_FAILED);
+ return;
+ }
+
+ if (t->answer_dnssec_result == DNSSEC_INCOMPATIBLE_SERVER)
+ dns_server_warn_downgrade(t->server);
+
+ dns_transaction_cache_answer(t);
+
+ if (t->answer_rcode == DNS_RCODE_SUCCESS)
+ dns_transaction_complete(t, DNS_TRANSACTION_SUCCESS);
+ else
+ dns_transaction_complete(t, DNS_TRANSACTION_RCODE_FAILURE);
+
+ return;
+
+fail:
+ t->answer_errno = -r;
+ dns_transaction_complete(t, DNS_TRANSACTION_ERRNO);
+}
+
+static int dns_transaction_has_positive_answer(DnsTransaction *t, DnsAnswerFlags *flags) {
+ int r;
+
+ assert(t);
+
+ /* Checks whether the answer is positive, i.e. either a direct
+ * answer to the question, or a CNAME/DNAME for it */
+
+ r = dns_answer_match_key(t->answer, t->key, flags);
+ if (r != 0)
+ return r;
+
+ r = dns_answer_find_cname_or_dname(t->answer, t->key, NULL, flags);
+ if (r != 0)
+ return r;
+
+ return false;
+}
+
+static int dns_transaction_fix_rcode(DnsTransaction *t) {
+ int r;
+
+ assert(t);
+
+ /* Fix up the RCODE to SUCCESS if we get at least one matching RR in a response. Note that this contradicts the
+ * DNS RFCs a bit. Specifically, RFC 6604 Section 3 clarifies that the RCODE shall say something about a
+ * CNAME/DNAME chain element coming after the last chain element contained in the message, and not the first
+ * one included. However, it also indicates that not all DNS servers implement this correctly. Moreover, when
+ * using DNSSEC we usually only can prove the first element of a CNAME/DNAME chain anyway, hence let's settle
+ * on always processing the RCODE as referring to the immediate look-up we do, i.e. the first element of a
+ * CNAME/DNAME chain. This way, we uniformly handle CNAME/DNAME chains, regardless if the DNS server
+ * incorrectly implements RCODE, whether DNSSEC is in use, or whether the DNS server only supplied us with an
+ * incomplete CNAME/DNAME chain.
+ *
+ * Or in other words: if we get at least one positive reply in a message we patch NXDOMAIN to become SUCCESS,
+ * and then rely on the CNAME chasing logic to figure out that there's actually a CNAME error with a new
+ * lookup. */
+
+ if (t->answer_rcode != DNS_RCODE_NXDOMAIN)
+ return 0;
+
+ r = dns_transaction_has_positive_answer(t, NULL);
+ if (r <= 0)
+ return r;
+
+ t->answer_rcode = DNS_RCODE_SUCCESS;
+ return 0;
+}
+
+void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) {
+ usec_t ts;
+ int r;
+
+ assert(t);
+ assert(p);
+ assert(t->scope);
+ assert(t->scope->manager);
+
+ if (t->state != DNS_TRANSACTION_PENDING)
+ return;
+
+ /* Note that this call might invalidate the query. Callers
+ * should hence not attempt to access the query or transaction
+ * after calling this function. */
+
+ log_debug("Processing incoming packet on transaction %" PRIu16" (rcode=%s).",
+ t->id, dns_rcode_to_string(DNS_PACKET_RCODE(p)));
+
+ switch (t->scope->protocol) {
+
+ case DNS_PROTOCOL_LLMNR:
+ /* For LLMNR we will not accept any packets from other interfaces */
+
+ if (p->ifindex != dns_scope_ifindex(t->scope))
+ return;
+
+ if (p->family != t->scope->family)
+ return;
+
+ /* Tentative packets are not full responses but still
+ * useful for identifying uniqueness conflicts during
+ * probing. */
+ if (DNS_PACKET_LLMNR_T(p)) {
+ dns_transaction_tentative(t, p);
+ return;
+ }
+
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ /* For mDNS we will not accept any packets from other interfaces */
+
+ if (p->ifindex != dns_scope_ifindex(t->scope))
+ return;
+
+ if (p->family != t->scope->family)
+ return;
+
+ break;
+
+ case DNS_PROTOCOL_DNS:
+ /* Note that we do not need to verify the
+ * addresses/port numbers of incoming traffic, as we
+ * invoked connect() on our UDP socket in which case
+ * the kernel already does the needed verification for
+ * us. */
+ break;
+
+ default:
+ assert_not_reached("Invalid DNS protocol.");
+ }
+
+ if (t->received != p) {
+ dns_packet_unref(t->received);
+ t->received = dns_packet_ref(p);
+ }
+
+ t->answer_source = DNS_TRANSACTION_NETWORK;
+
+ if (p->ipproto == IPPROTO_TCP) {
+ if (DNS_PACKET_TC(p)) {
+ /* Truncated via TCP? Somebody must be fucking with us */
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return;
+ }
+
+ if (DNS_PACKET_ID(p) != t->id) {
+ /* Not the reply to our query? Somebody must be fucking with us */
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return;
+ }
+ }
+
+ assert_se(sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &ts) >= 0);
+
+ switch (t->scope->protocol) {
+
+ case DNS_PROTOCOL_DNS:
+ assert(t->server);
+
+ if (IN_SET(DNS_PACKET_RCODE(p), DNS_RCODE_FORMERR, DNS_RCODE_SERVFAIL, DNS_RCODE_NOTIMP)) {
+
+ /* Request failed, immediately try again with reduced features */
+
+ if (t->current_feature_level <= DNS_SERVER_FEATURE_LEVEL_UDP) {
+
+ /* This was already at UDP feature level? If so, it doesn't make sense to downgrade
+ * this transaction anymore, but let's see if it might make sense to send the request
+ * to a different DNS server instead. If not let's process the response, and accept the
+ * rcode. Note that we don't retry on TCP, since that's a suitable way to mitigate
+ * packet loss, but is not going to give us better rcodes should we actually have
+ * managed to get them already at UDP level. */
+
+ if (t->n_picked_servers < dns_scope_get_n_dns_servers(t->scope)) {
+ /* We tried fewer servers on this transaction than we know, let's try another one then */
+ dns_transaction_retry(t, true);
+ return;
+ }
+
+ /* Give up, accept the rcode */
+ log_debug("Server returned error: %s", dns_rcode_to_string(DNS_PACKET_RCODE(p)));
+ break;
+ }
+
+ /* Reduce this feature level by one and try again. */
+ switch (t->current_feature_level) {
+ case DNS_SERVER_FEATURE_LEVEL_TLS_DO:
+ t->clamp_feature_level = DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN;
+ break;
+ case DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN + 1:
+ /* Skip plain TLS when TLS is not supported */
+ t->clamp_feature_level = DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN - 1;
+ break;
+ default:
+ t->clamp_feature_level = t->current_feature_level - 1;
+ }
+
+ log_debug("Server returned error %s, retrying transaction with reduced feature level %s.",
+ dns_rcode_to_string(DNS_PACKET_RCODE(p)),
+ dns_server_feature_level_to_string(t->clamp_feature_level));
+
+ dns_transaction_retry(t, false /* use the same server */);
+ return;
+ }
+
+ if (DNS_PACKET_RCODE(p) == DNS_RCODE_REFUSED) {
+ /* This server refused our request? If so, try again, use a different server */
+ log_debug("Server returned REFUSED, switching servers, and retrying.");
+ dns_transaction_retry(t, true /* pick a new server */);
+ return;
+ }
+
+ if (DNS_PACKET_TC(p))
+ dns_server_packet_truncated(t->server, t->current_feature_level);
+
+ break;
+
+ case DNS_PROTOCOL_LLMNR:
+ case DNS_PROTOCOL_MDNS:
+ dns_scope_packet_received(t->scope, ts - t->start_usec);
+ break;
+
+ default:
+ assert_not_reached("Invalid DNS protocol.");
+ }
+
+ if (DNS_PACKET_TC(p)) {
+
+ /* Truncated packets for mDNS are not allowed. Give up immediately. */
+ if (t->scope->protocol == DNS_PROTOCOL_MDNS) {
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return;
+ }
+
+ log_debug("Reply truncated, retrying via TCP.");
+
+ /* Response was truncated, let's try again with good old TCP */
+ r = dns_transaction_emit_tcp(t);
+ if (r == -ESRCH) {
+ /* No servers found? Damn! */
+ dns_transaction_complete(t, DNS_TRANSACTION_NO_SERVERS);
+ return;
+ }
+ if (r == -EOPNOTSUPP) {
+ /* Tried to ask for DNSSEC RRs, on a server that doesn't do DNSSEC */
+ dns_transaction_complete(t, DNS_TRANSACTION_RR_TYPE_UNSUPPORTED);
+ return;
+ }
+ if (r < 0) {
+ /* On LLMNR, if we cannot connect to the host,
+ * we immediately give up */
+ if (t->scope->protocol != DNS_PROTOCOL_DNS)
+ goto fail;
+
+ /* On DNS, couldn't send? Try immediately again, with a new server */
+ dns_transaction_retry(t, true);
+ }
+
+ return;
+ }
+
+ /* After the superficial checks, actually parse the message. */
+ r = dns_packet_extract(p);
+ if (r < 0) {
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return;
+ }
+
+ if (t->server) {
+ /* Report that we successfully received a valid packet with a good rcode after we initially got a bad
+ * rcode and subsequently downgraded the protocol */
+
+ if (IN_SET(DNS_PACKET_RCODE(p), DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN) &&
+ t->clamp_feature_level != _DNS_SERVER_FEATURE_LEVEL_INVALID)
+ dns_server_packet_rcode_downgrade(t->server, t->clamp_feature_level);
+
+ /* Report that the OPT RR was missing */
+ if (!p->opt)
+ dns_server_packet_bad_opt(t->server, t->current_feature_level);
+
+ /* Report that we successfully received a packet */
+ dns_server_packet_received(t->server, p->ipproto, t->current_feature_level, p->size);
+ }
+
+ /* See if we know things we didn't know before that indicate we better restart the lookup immediately. */
+ r = dns_transaction_maybe_restart(t);
+ if (r < 0)
+ goto fail;
+ if (r > 0) /* Transaction got restarted... */
+ return;
+
+ if (IN_SET(t->scope->protocol, DNS_PROTOCOL_DNS, DNS_PROTOCOL_LLMNR, DNS_PROTOCOL_MDNS)) {
+
+ /* When dealing with protocols other than mDNS only consider responses with
+ * equivalent query section to the request. For mDNS this check doesn't make
+ * sense, because the section 6 of RFC6762 states that "Multicast DNS responses MUST NOT
+ * contain any questions in the Question Section". */
+ if (t->scope->protocol != DNS_PROTOCOL_MDNS) {
+ r = dns_packet_is_reply_for(p, t->key);
+ if (r < 0)
+ goto fail;
+ if (r == 0) {
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return;
+ }
+ }
+
+ /* Install the answer as answer to the transaction */
+ dns_answer_unref(t->answer);
+ t->answer = dns_answer_ref(p->answer);
+ t->answer_rcode = DNS_PACKET_RCODE(p);
+ t->answer_dnssec_result = _DNSSEC_RESULT_INVALID;
+ t->answer_authenticated = false;
+
+ r = dns_transaction_fix_rcode(t);
+ if (r < 0)
+ goto fail;
+
+ /* Block GC while starting requests for additional DNSSEC RRs */
+ t->block_gc++;
+ r = dns_transaction_request_dnssec_keys(t);
+ t->block_gc--;
+
+ /* Maybe the transaction is ready for GC'ing now? If so, free it and return. */
+ if (!dns_transaction_gc(t))
+ return;
+
+ /* Requesting additional keys might have resulted in
+ * this transaction to fail, since the auxiliary
+ * request failed for some reason. If so, we are not
+ * in pending state anymore, and we should exit
+ * quickly. */
+ if (t->state != DNS_TRANSACTION_PENDING)
+ return;
+ if (r < 0)
+ goto fail;
+ if (r > 0) {
+ /* There are DNSSEC transactions pending now. Update the state accordingly. */
+ t->state = DNS_TRANSACTION_VALIDATING;
+ dns_transaction_close_connection(t);
+ dns_transaction_stop_timeout(t);
+ return;
+ }
+ }
+
+ dns_transaction_process_dnssec(t);
+ return;
+
+fail:
+ t->answer_errno = -r;
+ dns_transaction_complete(t, DNS_TRANSACTION_ERRNO);
+}
+
+static int on_dns_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ DnsTransaction *t = userdata;
+ int r;
+
+ assert(t);
+ assert(t->scope);
+
+ r = manager_recv(t->scope->manager, fd, DNS_PROTOCOL_DNS, &p);
+ if (ERRNO_IS_DISCONNECT(-r)) {
+ usec_t usec;
+
+ /* UDP connection failure get reported via ICMP and then are possible delivered to us on the next
+ * recvmsg(). Treat this like a lost packet. */
+
+ log_debug_errno(r, "Connection failure for DNS UDP packet: %m");
+ assert_se(sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &usec) >= 0);
+ dns_server_packet_lost(t->server, IPPROTO_UDP, t->current_feature_level);
+
+ dns_transaction_retry(t, true);
+ return 0;
+ }
+ if (r < 0) {
+ dns_transaction_complete(t, DNS_TRANSACTION_ERRNO);
+ t->answer_errno = -r;
+ return 0;
+ }
+
+ r = dns_packet_validate_reply(p);
+ if (r < 0) {
+ log_debug_errno(r, "Received invalid DNS packet as response, ignoring: %m");
+ return 0;
+ }
+ if (r == 0) {
+ log_debug("Received inappropriate DNS packet as response, ignoring.");
+ return 0;
+ }
+
+ if (DNS_PACKET_ID(p) != t->id) {
+ log_debug("Received packet with incorrect transaction ID, ignoring.");
+ return 0;
+ }
+
+ dns_transaction_process_reply(t, p);
+ return 0;
+}
+
+static int dns_transaction_emit_udp(DnsTransaction *t) {
+ int r;
+
+ assert(t);
+
+ if (t->scope->protocol == DNS_PROTOCOL_DNS) {
+
+ r = dns_transaction_pick_server(t);
+ if (r < 0)
+ return r;
+
+ if (t->current_feature_level < DNS_SERVER_FEATURE_LEVEL_UDP || DNS_SERVER_FEATURE_LEVEL_IS_TLS(t->current_feature_level))
+ return -EAGAIN; /* Sorry, can't do UDP, try TCP! */
+
+ if (!dns_server_dnssec_supported(t->server) && dns_type_is_dnssec(t->key->type))
+ return -EOPNOTSUPP;
+
+ if (r > 0 || t->dns_udp_fd < 0) { /* Server changed, or no connection yet. */
+ int fd;
+
+ dns_transaction_close_connection(t);
+
+ fd = dns_scope_socket_udp(t->scope, t->server, 53);
+ if (fd < 0)
+ return fd;
+
+ r = sd_event_add_io(t->scope->manager->event, &t->dns_udp_event_source, fd, EPOLLIN, on_dns_packet, t);
+ if (r < 0) {
+ safe_close(fd);
+ return r;
+ }
+
+ (void) sd_event_source_set_description(t->dns_udp_event_source, "dns-transaction-udp");
+ t->dns_udp_fd = fd;
+ }
+
+ r = dns_server_adjust_opt(t->server, t->sent, t->current_feature_level);
+ if (r < 0)
+ return r;
+ } else
+ dns_transaction_close_connection(t);
+
+ r = dns_scope_emit_udp(t->scope, t->dns_udp_fd, t->sent);
+ if (r < 0)
+ return r;
+
+ dns_transaction_reset_answer(t);
+
+ return 0;
+}
+
+static int on_transaction_timeout(sd_event_source *s, usec_t usec, void *userdata) {
+ DnsTransaction *t = userdata;
+
+ assert(s);
+ assert(t);
+
+ if (!t->initial_jitter_scheduled || t->initial_jitter_elapsed) {
+ /* Timeout reached? Increase the timeout for the server used */
+ switch (t->scope->protocol) {
+
+ case DNS_PROTOCOL_DNS:
+ assert(t->server);
+ dns_server_packet_lost(t->server, t->stream ? IPPROTO_TCP : IPPROTO_UDP, t->current_feature_level);
+ break;
+
+ case DNS_PROTOCOL_LLMNR:
+ case DNS_PROTOCOL_MDNS:
+ dns_scope_packet_lost(t->scope, usec - t->start_usec);
+ break;
+
+ default:
+ assert_not_reached("Invalid DNS protocol.");
+ }
+
+ if (t->initial_jitter_scheduled)
+ t->initial_jitter_elapsed = true;
+ }
+
+ log_debug("Timeout reached on transaction %" PRIu16 ".", t->id);
+
+ dns_transaction_retry(t, true);
+ return 0;
+}
+
+static usec_t transaction_get_resend_timeout(DnsTransaction *t) {
+ assert(t);
+ assert(t->scope);
+
+ switch (t->scope->protocol) {
+
+ case DNS_PROTOCOL_DNS:
+
+ /* When we do TCP, grant a much longer timeout, as in this case there's no need for us to quickly
+ * resend, as the kernel does that anyway for us, and we really don't want to interrupt it in that
+ * needlessly. */
+ if (t->stream)
+ return TRANSACTION_TCP_TIMEOUT_USEC;
+
+ return DNS_TIMEOUT_USEC;
+
+ case DNS_PROTOCOL_MDNS:
+ assert(t->n_attempts > 0);
+ if (t->probing)
+ return MDNS_PROBING_INTERVAL_USEC;
+ else
+ return (1 << (t->n_attempts - 1)) * USEC_PER_SEC;
+
+ case DNS_PROTOCOL_LLMNR:
+ return t->scope->resend_timeout;
+
+ default:
+ assert_not_reached("Invalid DNS protocol.");
+ }
+}
+
+static int dns_transaction_prepare(DnsTransaction *t, usec_t ts) {
+ int r;
+
+ assert(t);
+
+ dns_transaction_stop_timeout(t);
+
+ if (!dns_scope_network_good(t->scope)) {
+ dns_transaction_complete(t, DNS_TRANSACTION_NETWORK_DOWN);
+ return 0;
+ }
+
+ if (t->n_attempts >= TRANSACTION_ATTEMPTS_MAX(t->scope->protocol)) {
+ dns_transaction_complete(t, DNS_TRANSACTION_ATTEMPTS_MAX_REACHED);
+ return 0;
+ }
+
+ if (t->scope->protocol == DNS_PROTOCOL_LLMNR && t->tried_stream) {
+ /* If we already tried via a stream, then we don't
+ * retry on LLMNR. See RFC 4795, Section 2.7. */
+ dns_transaction_complete(t, DNS_TRANSACTION_ATTEMPTS_MAX_REACHED);
+ return 0;
+ }
+
+ t->n_attempts++;
+ t->start_usec = ts;
+
+ dns_transaction_reset_answer(t);
+ dns_transaction_flush_dnssec_transactions(t);
+
+ /* Check the trust anchor. Do so only on classic DNS, since DNSSEC does not apply otherwise. */
+ if (t->scope->protocol == DNS_PROTOCOL_DNS) {
+ r = dns_trust_anchor_lookup_positive(&t->scope->manager->trust_anchor, t->key, &t->answer);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ t->answer_rcode = DNS_RCODE_SUCCESS;
+ t->answer_source = DNS_TRANSACTION_TRUST_ANCHOR;
+ t->answer_authenticated = true;
+ dns_transaction_complete(t, DNS_TRANSACTION_SUCCESS);
+ return 0;
+ }
+
+ if (dns_name_is_root(dns_resource_key_name(t->key)) &&
+ t->key->type == DNS_TYPE_DS) {
+
+ /* Hmm, this is a request for the root DS? A
+ * DS RR doesn't exist in the root zone, and
+ * if our trust anchor didn't know it either,
+ * this means we cannot do any DNSSEC logic
+ * anymore. */
+
+ if (t->scope->dnssec_mode == DNSSEC_ALLOW_DOWNGRADE) {
+ /* We are in downgrade mode. In this
+ * case, synthesize an unsigned empty
+ * response, so that the any lookup
+ * depending on this one can continue
+ * assuming there was no DS, and hence
+ * the root zone was unsigned. */
+
+ t->answer_rcode = DNS_RCODE_SUCCESS;
+ t->answer_source = DNS_TRANSACTION_TRUST_ANCHOR;
+ t->answer_authenticated = false;
+ dns_transaction_complete(t, DNS_TRANSACTION_SUCCESS);
+ } else
+ /* If we are not in downgrade mode,
+ * then fail the lookup, because we
+ * cannot reasonably answer it. There
+ * might be DS RRs, but we don't know
+ * them, and the DNS server won't tell
+ * them to us (and even if it would,
+ * we couldn't validate and trust them. */
+ dns_transaction_complete(t, DNS_TRANSACTION_NO_TRUST_ANCHOR);
+
+ return 0;
+ }
+ }
+
+ /* Check the zone, but only if this transaction is not used
+ * for probing or verifying a zone item. */
+ if (set_isempty(t->notify_zone_items)) {
+
+ r = dns_zone_lookup(&t->scope->zone, t->key, dns_scope_ifindex(t->scope), &t->answer, NULL, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ t->answer_rcode = DNS_RCODE_SUCCESS;
+ t->answer_source = DNS_TRANSACTION_ZONE;
+ t->answer_authenticated = true;
+ dns_transaction_complete(t, DNS_TRANSACTION_SUCCESS);
+ return 0;
+ }
+ }
+
+ /* Check the cache, but only if this transaction is not used
+ * for probing or verifying a zone item. */
+ if (set_isempty(t->notify_zone_items)) {
+
+ /* Before trying the cache, let's make sure we figured out a
+ * server to use. Should this cause a change of server this
+ * might flush the cache. */
+ (void) dns_scope_get_dns_server(t->scope);
+
+ /* Let's then prune all outdated entries */
+ dns_cache_prune(&t->scope->cache);
+
+ r = dns_cache_lookup(&t->scope->cache, t->key, t->clamp_ttl, &t->answer_rcode, &t->answer, &t->answer_authenticated);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ t->answer_source = DNS_TRANSACTION_CACHE;
+ if (t->answer_rcode == DNS_RCODE_SUCCESS)
+ dns_transaction_complete(t, DNS_TRANSACTION_SUCCESS);
+ else
+ dns_transaction_complete(t, DNS_TRANSACTION_RCODE_FAILURE);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static int dns_transaction_make_packet_mdns(DnsTransaction *t) {
+
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ bool add_known_answers = false;
+ DnsTransaction *other;
+ Iterator i;
+ DnsResourceKey *tkey;
+ _cleanup_set_free_ Set *keys = NULL;
+ unsigned qdcount;
+ unsigned nscount = 0;
+ usec_t ts;
+ int r;
+
+ assert(t);
+ assert(t->scope->protocol == DNS_PROTOCOL_MDNS);
+
+ /* Discard any previously prepared packet, so we can start over and coalesce again */
+ t->sent = dns_packet_unref(t->sent);
+
+ r = dns_packet_new_query(&p, t->scope->protocol, 0, false);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_append_key(p, t->key, 0, NULL);
+ if (r < 0)
+ return r;
+
+ qdcount = 1;
+
+ if (dns_key_is_shared(t->key))
+ add_known_answers = true;
+
+ if (t->key->type == DNS_TYPE_ANY) {
+ r = set_ensure_allocated(&keys, &dns_resource_key_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put(keys, t->key);
+ if (r < 0)
+ return r;
+ }
+
+ /*
+ * For mDNS, we want to coalesce as many open queries in pending transactions into one single
+ * query packet on the wire as possible. To achieve that, we iterate through all pending transactions
+ * in our current scope, and see whether their timing contraints allow them to be sent.
+ */
+
+ assert_se(sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &ts) >= 0);
+
+ LIST_FOREACH(transactions_by_scope, other, t->scope->transactions) {
+
+ /* Skip ourselves */
+ if (other == t)
+ continue;
+
+ if (other->state != DNS_TRANSACTION_PENDING)
+ continue;
+
+ if (other->next_attempt_after > ts)
+ continue;
+
+ if (qdcount >= UINT16_MAX)
+ break;
+
+ r = dns_packet_append_key(p, other->key, 0, NULL);
+
+ /*
+ * If we can't stuff more questions into the packet, just give up.
+ * One of the 'other' transactions will fire later and take care of the rest.
+ */
+ if (r == -EMSGSIZE)
+ break;
+
+ if (r < 0)
+ return r;
+
+ r = dns_transaction_prepare(other, ts);
+ if (r <= 0)
+ continue;
+
+ ts += transaction_get_resend_timeout(other);
+
+ r = sd_event_add_time(
+ other->scope->manager->event,
+ &other->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ ts, 0,
+ on_transaction_timeout, other);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(other->timeout_event_source, "dns-transaction-timeout");
+
+ other->state = DNS_TRANSACTION_PENDING;
+ other->next_attempt_after = ts;
+
+ qdcount++;
+
+ if (dns_key_is_shared(other->key))
+ add_known_answers = true;
+
+ if (other->key->type == DNS_TYPE_ANY) {
+ r = set_ensure_allocated(&keys, &dns_resource_key_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put(keys, other->key);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ DNS_PACKET_HEADER(p)->qdcount = htobe16(qdcount);
+
+ /* Append known answer section if we're asking for any shared record */
+ if (add_known_answers) {
+ r = dns_cache_export_shared_to_packet(&t->scope->cache, p);
+ if (r < 0)
+ return r;
+ }
+
+ SET_FOREACH(tkey, keys, i) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ bool tentative;
+
+ r = dns_zone_lookup(&t->scope->zone, tkey, t->scope->link->ifindex, &answer, NULL, &tentative);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_append_answer(p, answer);
+ if (r < 0)
+ return r;
+
+ nscount += dns_answer_size(answer);
+ }
+ DNS_PACKET_HEADER(p)->nscount = htobe16(nscount);
+
+ t->sent = TAKE_PTR(p);
+
+ return 0;
+}
+
+static int dns_transaction_make_packet(DnsTransaction *t) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ int r;
+
+ assert(t);
+
+ if (t->scope->protocol == DNS_PROTOCOL_MDNS)
+ return dns_transaction_make_packet_mdns(t);
+
+ if (t->sent)
+ return 0;
+
+ r = dns_packet_new_query(&p, t->scope->protocol, 0, t->scope->dnssec_mode != DNSSEC_NO);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_append_key(p, t->key, 0, NULL);
+ if (r < 0)
+ return r;
+
+ DNS_PACKET_HEADER(p)->qdcount = htobe16(1);
+ DNS_PACKET_HEADER(p)->id = t->id;
+
+ t->sent = TAKE_PTR(p);
+
+ return 0;
+}
+
+int dns_transaction_go(DnsTransaction *t) {
+ usec_t ts;
+ int r;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ assert(t);
+
+ /* Returns > 0 if the transaction is now pending, returns 0 if could be processed immediately and has finished
+ * now. */
+
+ assert_se(sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &ts) >= 0);
+
+ r = dns_transaction_prepare(t, ts);
+ if (r <= 0)
+ return r;
+
+ log_debug("Transaction %" PRIu16 " for <%s> scope %s on %s/%s.",
+ t->id,
+ dns_resource_key_to_string(t->key, key_str, sizeof key_str),
+ dns_protocol_to_string(t->scope->protocol),
+ t->scope->link ? t->scope->link->name : "*",
+ af_to_name_short(t->scope->family));
+
+ if (!t->initial_jitter_scheduled &&
+ IN_SET(t->scope->protocol, DNS_PROTOCOL_LLMNR, DNS_PROTOCOL_MDNS)) {
+ usec_t jitter, accuracy;
+
+ /* RFC 4795 Section 2.7 suggests all queries should be
+ * delayed by a random time from 0 to JITTER_INTERVAL. */
+
+ t->initial_jitter_scheduled = true;
+
+ random_bytes(&jitter, sizeof(jitter));
+
+ switch (t->scope->protocol) {
+
+ case DNS_PROTOCOL_LLMNR:
+ jitter %= LLMNR_JITTER_INTERVAL_USEC;
+ accuracy = LLMNR_JITTER_INTERVAL_USEC;
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ jitter %= MDNS_JITTER_RANGE_USEC;
+ jitter += MDNS_JITTER_MIN_USEC;
+ accuracy = MDNS_JITTER_RANGE_USEC;
+ break;
+ default:
+ assert_not_reached("bad protocol");
+ }
+
+ r = sd_event_add_time(
+ t->scope->manager->event,
+ &t->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ ts + jitter, accuracy,
+ on_transaction_timeout, t);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(t->timeout_event_source, "dns-transaction-timeout");
+
+ t->n_attempts = 0;
+ t->next_attempt_after = ts;
+ t->state = DNS_TRANSACTION_PENDING;
+
+ log_debug("Delaying %s transaction for " USEC_FMT "us.", dns_protocol_to_string(t->scope->protocol), jitter);
+ return 0;
+ }
+
+ /* Otherwise, we need to ask the network */
+ r = dns_transaction_make_packet(t);
+ if (r < 0)
+ return r;
+
+ if (t->scope->protocol == DNS_PROTOCOL_LLMNR &&
+ (dns_name_endswith(dns_resource_key_name(t->key), "in-addr.arpa") > 0 ||
+ dns_name_endswith(dns_resource_key_name(t->key), "ip6.arpa") > 0)) {
+
+ /* RFC 4795, Section 2.4. says reverse lookups shall
+ * always be made via TCP on LLMNR */
+ r = dns_transaction_emit_tcp(t);
+ } else {
+ /* Try via UDP, and if that fails due to large size or lack of
+ * support try via TCP */
+ r = dns_transaction_emit_udp(t);
+ if (r == -EMSGSIZE)
+ log_debug("Sending query via TCP since it is too large.");
+ else if (r == -EAGAIN)
+ log_debug("Sending query via TCP since UDP isn't supported.");
+ if (IN_SET(r, -EMSGSIZE, -EAGAIN))
+ r = dns_transaction_emit_tcp(t);
+ }
+
+ if (r == -ESRCH) {
+ /* No servers to send this to? */
+ dns_transaction_complete(t, DNS_TRANSACTION_NO_SERVERS);
+ return 0;
+ }
+ if (r == -EOPNOTSUPP) {
+ /* Tried to ask for DNSSEC RRs, on a server that doesn't do DNSSEC */
+ dns_transaction_complete(t, DNS_TRANSACTION_RR_TYPE_UNSUPPORTED);
+ return 0;
+ }
+ if (t->scope->protocol == DNS_PROTOCOL_LLMNR && ERRNO_IS_DISCONNECT(-r)) {
+ /* On LLMNR, if we cannot connect to a host via TCP when doing reverse lookups. This means we cannot
+ * answer this request with this protocol. */
+ dns_transaction_complete(t, DNS_TRANSACTION_NOT_FOUND);
+ return 0;
+ }
+ if (r < 0) {
+ if (t->scope->protocol != DNS_PROTOCOL_DNS)
+ return r;
+
+ /* Couldn't send? Try immediately again, with a new server */
+ dns_scope_next_dns_server(t->scope);
+
+ return dns_transaction_go(t);
+ }
+
+ ts += transaction_get_resend_timeout(t);
+
+ r = sd_event_add_time(
+ t->scope->manager->event,
+ &t->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ ts, 0,
+ on_transaction_timeout, t);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(t->timeout_event_source, "dns-transaction-timeout");
+
+ t->state = DNS_TRANSACTION_PENDING;
+ t->next_attempt_after = ts;
+
+ return 1;
+}
+
+static int dns_transaction_find_cyclic(DnsTransaction *t, DnsTransaction *aux) {
+ DnsTransaction *n;
+ Iterator i;
+ int r;
+
+ assert(t);
+ assert(aux);
+
+ /* Try to find cyclic dependencies between transaction objects */
+
+ if (t == aux)
+ return 1;
+
+ SET_FOREACH(n, aux->dnssec_transactions, i) {
+ r = dns_transaction_find_cyclic(t, n);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dns_transaction_add_dnssec_transaction(DnsTransaction *t, DnsResourceKey *key, DnsTransaction **ret) {
+ DnsTransaction *aux;
+ int r;
+
+ assert(t);
+ assert(ret);
+ assert(key);
+
+ aux = dns_scope_find_transaction(t->scope, key, true);
+ if (!aux) {
+ r = dns_transaction_new(&aux, t->scope, key);
+ if (r < 0)
+ return r;
+ } else {
+ if (set_contains(t->dnssec_transactions, aux)) {
+ *ret = aux;
+ return 0;
+ }
+
+ r = dns_transaction_find_cyclic(t, aux);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ char s[DNS_RESOURCE_KEY_STRING_MAX], saux[DNS_RESOURCE_KEY_STRING_MAX];
+
+ return log_debug_errno(SYNTHETIC_ERRNO(ELOOP),
+ "Potential cyclic dependency, refusing to add transaction %" PRIu16 " (%s) as dependency for %" PRIu16 " (%s).",
+ aux->id,
+ dns_resource_key_to_string(t->key, s, sizeof s),
+ t->id,
+ dns_resource_key_to_string(aux->key, saux, sizeof saux));
+ }
+ }
+
+ r = set_ensure_allocated(&t->dnssec_transactions, NULL);
+ if (r < 0)
+ goto gc;
+
+ r = set_ensure_allocated(&aux->notify_transactions, NULL);
+ if (r < 0)
+ goto gc;
+
+ r = set_ensure_allocated(&aux->notify_transactions_done, NULL);
+ if (r < 0)
+ goto gc;
+
+ r = set_put(t->dnssec_transactions, aux);
+ if (r < 0)
+ goto gc;
+
+ r = set_put(aux->notify_transactions, t);
+ if (r < 0) {
+ (void) set_remove(t->dnssec_transactions, aux);
+ goto gc;
+ }
+
+ *ret = aux;
+ return 1;
+
+gc:
+ dns_transaction_gc(aux);
+ return r;
+}
+
+static int dns_transaction_request_dnssec_rr(DnsTransaction *t, DnsResourceKey *key) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *a = NULL;
+ DnsTransaction *aux;
+ int r;
+
+ assert(t);
+ assert(key);
+
+ /* Try to get the data from the trust anchor */
+ r = dns_trust_anchor_lookup_positive(&t->scope->manager->trust_anchor, key, &a);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ r = dns_answer_extend(&t->validated_keys, a);
+ if (r < 0)
+ return r;
+
+ return 0;
+ }
+
+ /* This didn't work, ask for it via the network/cache then. */
+ r = dns_transaction_add_dnssec_transaction(t, key, &aux);
+ if (r == -ELOOP) /* This would result in a cyclic dependency */
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (aux->state == DNS_TRANSACTION_NULL) {
+ r = dns_transaction_go(aux);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+static int dns_transaction_negative_trust_anchor_lookup(DnsTransaction *t, const char *name) {
+ int r;
+
+ assert(t);
+
+ /* Check whether the specified name is in the NTA
+ * database, either in the global one, or the link-local
+ * one. */
+
+ r = dns_trust_anchor_lookup_negative(&t->scope->manager->trust_anchor, name);
+ if (r != 0)
+ return r;
+
+ if (!t->scope->link)
+ return 0;
+
+ return set_contains(t->scope->link->dnssec_negative_trust_anchors, name);
+}
+
+static int dns_transaction_has_unsigned_negative_answer(DnsTransaction *t) {
+ int r;
+
+ assert(t);
+
+ /* Checks whether the answer is negative, and lacks NSEC/NSEC3
+ * RRs to prove it */
+
+ r = dns_transaction_has_positive_answer(t, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return false;
+
+ /* Is this key explicitly listed as a negative trust anchor?
+ * If so, it's nothing we need to care about */
+ r = dns_transaction_negative_trust_anchor_lookup(t, dns_resource_key_name(t->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return false;
+
+ /* The answer does not contain any RRs that match to the
+ * question. If so, let's see if there are any NSEC/NSEC3 RRs
+ * included. If not, the answer is unsigned. */
+
+ r = dns_answer_contains_nsec_or_nsec3(t->answer);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return false;
+
+ return true;
+}
+
+static int dns_transaction_is_primary_response(DnsTransaction *t, DnsResourceRecord *rr) {
+ int r;
+
+ assert(t);
+ assert(rr);
+
+ /* Check if the specified RR is the "primary" response,
+ * i.e. either matches the question precisely or is a
+ * CNAME/DNAME for it. */
+
+ r = dns_resource_key_match_rr(t->key, rr, NULL);
+ if (r != 0)
+ return r;
+
+ return dns_resource_key_match_cname_or_dname(t->key, rr->key, NULL);
+}
+
+static bool dns_transaction_dnssec_supported(DnsTransaction *t) {
+ assert(t);
+
+ /* Checks whether our transaction's DNS server is assumed to be compatible with DNSSEC. Returns false as soon
+ * as we changed our mind about a server, and now believe it is incompatible with DNSSEC. */
+
+ if (t->scope->protocol != DNS_PROTOCOL_DNS)
+ return false;
+
+ /* If we have picked no server, then we are working from the cache or some other source, and DNSSEC might well
+ * be supported, hence return true. */
+ if (!t->server)
+ return true;
+
+ /* Note that we do not check the feature level actually used for the transaction but instead the feature level
+ * the server is known to support currently, as the transaction feature level might be lower than what the
+ * server actually supports, since we might have downgraded this transaction's feature level because we got a
+ * SERVFAIL earlier and wanted to check whether downgrading fixes it. */
+
+ return dns_server_dnssec_supported(t->server);
+}
+
+static bool dns_transaction_dnssec_supported_full(DnsTransaction *t) {
+ DnsTransaction *dt;
+ Iterator i;
+
+ assert(t);
+
+ /* Checks whether our transaction our any of the auxiliary transactions couldn't do DNSSEC. */
+
+ if (!dns_transaction_dnssec_supported(t))
+ return false;
+
+ SET_FOREACH(dt, t->dnssec_transactions, i)
+ if (!dns_transaction_dnssec_supported(dt))
+ return false;
+
+ return true;
+}
+
+int dns_transaction_request_dnssec_keys(DnsTransaction *t) {
+ DnsResourceRecord *rr;
+
+ int r;
+
+ assert(t);
+
+ /*
+ * Retrieve all auxiliary RRs for the answer we got, so that
+ * we can verify signatures or prove that RRs are rightfully
+ * unsigned. Specifically:
+ *
+ * - For RRSIG we get the matching DNSKEY
+ * - For DNSKEY we get the matching DS
+ * - For unsigned SOA/NS we get the matching DS
+ * - For unsigned CNAME/DNAME/DS we get the parent SOA RR
+ * - For other unsigned RRs we get the matching SOA RR
+ * - For SOA/NS queries with no matching response RR, and no NSEC/NSEC3, the DS RR
+ * - For DS queries with no matching response RRs, and no NSEC/NSEC3, the parent's SOA RR
+ * - For other queries with no matching response RRs, and no NSEC/NSEC3, the SOA RR
+ */
+
+ if (t->scope->dnssec_mode == DNSSEC_NO)
+ return 0;
+ if (t->answer_source != DNS_TRANSACTION_NETWORK)
+ return 0; /* We only need to validate stuff from the network */
+ if (!dns_transaction_dnssec_supported(t))
+ return 0; /* If we can't do DNSSEC anyway there's no point in geting the auxiliary RRs */
+
+ DNS_ANSWER_FOREACH(rr, t->answer) {
+
+ if (dns_type_is_pseudo(rr->key->type))
+ continue;
+
+ /* If this RR is in the negative trust anchor, we don't need to validate it. */
+ r = dns_transaction_negative_trust_anchor_lookup(t, dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ switch (rr->key->type) {
+
+ case DNS_TYPE_RRSIG: {
+ /* For each RRSIG we request the matching DNSKEY */
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *dnskey = NULL;
+
+ /* If this RRSIG is about a DNSKEY RR and the
+ * signer is the same as the owner, then we
+ * already have the DNSKEY, and we don't have
+ * to look for more. */
+ if (rr->rrsig.type_covered == DNS_TYPE_DNSKEY) {
+ r = dns_name_equal(rr->rrsig.signer, dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+ }
+
+ /* If the signer is not a parent of our
+ * original query, then this is about an
+ * auxiliary RRset, but not anything we asked
+ * for. In this case we aren't interested,
+ * because we don't want to request additional
+ * RRs for stuff we didn't really ask for, and
+ * also to avoid request loops, where
+ * additional RRs from one transaction result
+ * in another transaction whose additonal RRs
+ * point back to the original transaction, and
+ * we deadlock. */
+ r = dns_name_endswith(dns_resource_key_name(t->key), rr->rrsig.signer);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ dnskey = dns_resource_key_new(rr->key->class, DNS_TYPE_DNSKEY, rr->rrsig.signer);
+ if (!dnskey)
+ return -ENOMEM;
+
+ log_debug("Requesting DNSKEY to validate transaction %" PRIu16" (%s, RRSIG with key tag: %" PRIu16 ").",
+ t->id, dns_resource_key_name(rr->key), rr->rrsig.key_tag);
+ r = dns_transaction_request_dnssec_rr(t, dnskey);
+ if (r < 0)
+ return r;
+ break;
+ }
+
+ case DNS_TYPE_DNSKEY: {
+ /* For each DNSKEY we request the matching DS */
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *ds = NULL;
+
+ /* If the DNSKEY we are looking at is not for
+ * zone we are interested in, nor any of its
+ * parents, we aren't interested, and don't
+ * request it. After all, we don't want to end
+ * up in request loops, and want to keep
+ * additional traffic down. */
+
+ r = dns_name_endswith(dns_resource_key_name(t->key), dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ ds = dns_resource_key_new(rr->key->class, DNS_TYPE_DS, dns_resource_key_name(rr->key));
+ if (!ds)
+ return -ENOMEM;
+
+ log_debug("Requesting DS to validate transaction %" PRIu16" (%s, DNSKEY with key tag: %" PRIu16 ").",
+ t->id, dns_resource_key_name(rr->key), dnssec_keytag(rr, false));
+ r = dns_transaction_request_dnssec_rr(t, ds);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ case DNS_TYPE_SOA:
+ case DNS_TYPE_NS: {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *ds = NULL;
+
+ /* For an unsigned SOA or NS, try to acquire
+ * the matching DS RR, as we are at a zone cut
+ * then, and whether a DS exists tells us
+ * whether the zone is signed. Do so only if
+ * this RR matches our original question,
+ * however. */
+
+ r = dns_resource_key_match_rr(t->key, rr, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Hmm, so this SOA RR doesn't match our original question. In this case, maybe this is
+ * a negative reply, and we need the a SOA RR's TTL in order to cache a negative entry?
+ * If so, we need to validate it, too. */
+
+ r = dns_answer_match_key(t->answer, t->key, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) /* positive reply, we won't need the SOA and hence don't need to validate
+ * it. */
+ continue;
+
+ /* Only bother with this if the SOA/NS RR we are looking at is actually a parent of
+ * what we are looking for, otherwise there's no value in it for us. */
+ r = dns_name_endswith(dns_resource_key_name(t->key), dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+ }
+
+ r = dnssec_has_rrsig(t->answer, rr->key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ ds = dns_resource_key_new(rr->key->class, DNS_TYPE_DS, dns_resource_key_name(rr->key));
+ if (!ds)
+ return -ENOMEM;
+
+ log_debug("Requesting DS to validate transaction %" PRIu16 " (%s, unsigned SOA/NS RRset).",
+ t->id, dns_resource_key_name(rr->key));
+ r = dns_transaction_request_dnssec_rr(t, ds);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ case DNS_TYPE_DS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME: {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *soa = NULL;
+ const char *name;
+
+ /* CNAMEs and DNAMEs cannot be located at a
+ * zone apex, hence ask for the parent SOA for
+ * unsigned CNAME/DNAME RRs, maybe that's the
+ * apex. But do all that only if this is
+ * actually a response to our original
+ * question.
+ *
+ * Similar for DS RRs, which are signed when
+ * the parent SOA is signed. */
+
+ r = dns_transaction_is_primary_response(t, rr);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dnssec_has_rrsig(t->answer, rr->key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ r = dns_answer_has_dname_for_cname(t->answer, rr);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ name = dns_resource_key_name(rr->key);
+ r = dns_name_parent(&name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ soa = dns_resource_key_new(rr->key->class, DNS_TYPE_SOA, name);
+ if (!soa)
+ return -ENOMEM;
+
+ log_debug("Requesting parent SOA to validate transaction %" PRIu16 " (%s, unsigned CNAME/DNAME/DS RRset).",
+ t->id, dns_resource_key_name(rr->key));
+ r = dns_transaction_request_dnssec_rr(t, soa);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ default: {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *soa = NULL;
+
+ /* For other unsigned RRsets (including
+ * NSEC/NSEC3!), look for proof the zone is
+ * unsigned, by requesting the SOA RR of the
+ * zone. However, do so only if they are
+ * directly relevant to our original
+ * question. */
+
+ r = dns_transaction_is_primary_response(t, rr);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dnssec_has_rrsig(t->answer, rr->key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ soa = dns_resource_key_new(rr->key->class, DNS_TYPE_SOA, dns_resource_key_name(rr->key));
+ if (!soa)
+ return -ENOMEM;
+
+ log_debug("Requesting SOA to validate transaction %" PRIu16 " (%s, unsigned non-SOA/NS RRset <%s>).",
+ t->id, dns_resource_key_name(rr->key), dns_resource_record_to_string(rr));
+ r = dns_transaction_request_dnssec_rr(t, soa);
+ if (r < 0)
+ return r;
+ break;
+ }}
+ }
+
+ /* Above, we requested everything necessary to validate what
+ * we got. Now, let's request what we need to validate what we
+ * didn't get... */
+
+ r = dns_transaction_has_unsigned_negative_answer(t);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ const char *name;
+ uint16_t type = 0;
+
+ name = dns_resource_key_name(t->key);
+
+ /* If this was a SOA or NS request, then check if there's a DS RR for the same domain. Note that this
+ * could also be used as indication that we are not at a zone apex, but in real world setups there are
+ * too many broken DNS servers (Hello, incapdns.net!) where non-terminal zones return NXDOMAIN even
+ * though they have further children. If this was a DS request, then it's signed when the parent zone
+ * is signed, hence ask the parent SOA in that case. If this was any other RR then ask for the SOA RR,
+ * to see if that is signed. */
+
+ if (t->key->type == DNS_TYPE_DS) {
+ r = dns_name_parent(&name);
+ if (r > 0) {
+ type = DNS_TYPE_SOA;
+ log_debug("Requesting parent SOA (→ %s) to validate transaction %" PRIu16 " (%s, unsigned empty DS response).",
+ name, t->id, dns_resource_key_name(t->key));
+ } else
+ name = NULL;
+
+ } else if (IN_SET(t->key->type, DNS_TYPE_SOA, DNS_TYPE_NS)) {
+
+ type = DNS_TYPE_DS;
+ log_debug("Requesting DS (→ %s) to validate transaction %" PRIu16 " (%s, unsigned empty SOA/NS response).",
+ name, t->id, name);
+
+ } else {
+ type = DNS_TYPE_SOA;
+ log_debug("Requesting SOA (→ %s) to validate transaction %" PRIu16 " (%s, unsigned empty non-SOA/NS/DS response).",
+ name, t->id, name);
+ }
+
+ if (name) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *soa = NULL;
+
+ soa = dns_resource_key_new(t->key->class, type, name);
+ if (!soa)
+ return -ENOMEM;
+
+ r = dns_transaction_request_dnssec_rr(t, soa);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return dns_transaction_dnssec_is_live(t);
+}
+
+void dns_transaction_notify(DnsTransaction *t, DnsTransaction *source) {
+ assert(t);
+ assert(source);
+
+ /* Invoked whenever any of our auxiliary DNSSEC transactions completed its work. If the state is still PENDING,
+ we are still in the loop that adds further DNSSEC transactions, hence don't check if we are ready yet. If
+ the state is VALIDATING however, we should check if we are complete now. */
+
+ if (t->state == DNS_TRANSACTION_VALIDATING)
+ dns_transaction_process_dnssec(t);
+}
+
+static int dns_transaction_validate_dnskey_by_ds(DnsTransaction *t) {
+ DnsResourceRecord *rr;
+ int ifindex, r;
+
+ assert(t);
+
+ /* Add all DNSKEY RRs from the answer that are validated by DS
+ * RRs from the list of validated keys to the list of
+ * validated keys. */
+
+ DNS_ANSWER_FOREACH_IFINDEX(rr, ifindex, t->answer) {
+
+ r = dnssec_verify_dnskey_by_ds_search(rr, t->validated_keys);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* If so, the DNSKEY is validated too. */
+ r = dns_answer_add_extend(&t->validated_keys, rr, ifindex, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dns_transaction_requires_rrsig(DnsTransaction *t, DnsResourceRecord *rr) {
+ int r;
+
+ assert(t);
+ assert(rr);
+
+ /* Checks if the RR we are looking for must be signed with an
+ * RRSIG. This is used for positive responses. */
+
+ if (t->scope->dnssec_mode == DNSSEC_NO)
+ return false;
+
+ if (dns_type_is_pseudo(rr->key->type))
+ return -EINVAL;
+
+ r = dns_transaction_negative_trust_anchor_lookup(t, dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return false;
+
+ switch (rr->key->type) {
+
+ case DNS_TYPE_RRSIG:
+ /* RRSIGs are the signatures themselves, they need no signing. */
+ return false;
+
+ case DNS_TYPE_SOA:
+ case DNS_TYPE_NS: {
+ DnsTransaction *dt;
+ Iterator i;
+
+ /* For SOA or NS RRs we look for a matching DS transaction */
+
+ SET_FOREACH(dt, t->dnssec_transactions, i) {
+
+ if (dt->key->class != rr->key->class)
+ continue;
+ if (dt->key->type != DNS_TYPE_DS)
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* We found a DS transactions for the SOA/NS
+ * RRs we are looking at. If it discovered signed DS
+ * RRs, then we need to be signed, too. */
+
+ if (!dt->answer_authenticated)
+ return false;
+
+ return dns_answer_match_key(dt->answer, dt->key, NULL);
+ }
+
+ /* We found nothing that proves this is safe to leave
+ * this unauthenticated, hence ask inist on
+ * authentication. */
+ return true;
+ }
+
+ case DNS_TYPE_DS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME: {
+ const char *parent = NULL;
+ DnsTransaction *dt;
+ Iterator i;
+
+ /*
+ * CNAME/DNAME RRs cannot be located at a zone apex, hence look directly for the parent SOA.
+ *
+ * DS RRs are signed if the parent is signed, hence also look at the parent SOA
+ */
+
+ SET_FOREACH(dt, t->dnssec_transactions, i) {
+
+ if (dt->key->class != rr->key->class)
+ continue;
+ if (dt->key->type != DNS_TYPE_SOA)
+ continue;
+
+ if (!parent) {
+ parent = dns_resource_key_name(rr->key);
+ r = dns_name_parent(&parent);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (rr->key->type == DNS_TYPE_DS)
+ return true;
+
+ /* A CNAME/DNAME without a parent? That's sooo weird. */
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Transaction %" PRIu16 " claims CNAME/DNAME at root. Refusing.", t->id);
+ }
+ }
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), parent);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ return t->answer_authenticated;
+ }
+
+ return true;
+ }
+
+ default: {
+ DnsTransaction *dt;
+ Iterator i;
+
+ /* Any other kind of RR (including DNSKEY/NSEC/NSEC3). Let's see if our SOA lookup was authenticated */
+
+ SET_FOREACH(dt, t->dnssec_transactions, i) {
+
+ if (dt->key->class != rr->key->class)
+ continue;
+ if (dt->key->type != DNS_TYPE_SOA)
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* We found the transaction that was supposed to find
+ * the SOA RR for us. It was successful, but found no
+ * RR for us. This means we are not at a zone cut. In
+ * this case, we require authentication if the SOA
+ * lookup was authenticated too. */
+ return t->answer_authenticated;
+ }
+
+ return true;
+ }}
+}
+
+static int dns_transaction_in_private_tld(DnsTransaction *t, const DnsResourceKey *key) {
+ DnsTransaction *dt;
+ const char *tld;
+ Iterator i;
+ int r;
+
+ /* If DNSSEC downgrade mode is on, checks whether the
+ * specified RR is one level below a TLD we have proven not to
+ * exist. In such a case we assume that this is a private
+ * domain, and permit it.
+ *
+ * This detects cases like the Fritz!Box router networks. Each
+ * Fritz!Box router serves a private "fritz.box" zone, in the
+ * non-existing TLD "box". Requests for the "fritz.box" domain
+ * are served by the router itself, while requests for the
+ * "box" domain will result in NXDOMAIN.
+ *
+ * Note that this logic is unable to detect cases where a
+ * router serves a private DNS zone directly under
+ * non-existing TLD. In such a case we cannot detect whether
+ * the TLD is supposed to exist or not, as all requests we
+ * make for it will be answered by the router's zone, and not
+ * by the root zone. */
+
+ assert(t);
+
+ if (t->scope->dnssec_mode != DNSSEC_ALLOW_DOWNGRADE)
+ return false; /* In strict DNSSEC mode what doesn't exist, doesn't exist */
+
+ tld = dns_resource_key_name(key);
+ r = dns_name_parent(&tld);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false; /* Already the root domain */
+
+ if (!dns_name_is_single_label(tld))
+ return false;
+
+ SET_FOREACH(dt, t->dnssec_transactions, i) {
+
+ if (dt->key->class != key->class)
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), tld);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* We found an auxiliary lookup we did for the TLD. If
+ * that returned with NXDOMAIN, we know the TLD didn't
+ * exist, and hence this might be a private zone. */
+
+ return dt->answer_rcode == DNS_RCODE_NXDOMAIN;
+ }
+
+ return false;
+}
+
+static int dns_transaction_requires_nsec(DnsTransaction *t) {
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+ DnsTransaction *dt;
+ const char *name;
+ uint16_t type = 0;
+ Iterator i;
+ int r;
+
+ assert(t);
+
+ /* Checks if we need to insist on NSEC/NSEC3 RRs for proving
+ * this negative reply */
+
+ if (t->scope->dnssec_mode == DNSSEC_NO)
+ return false;
+
+ if (dns_type_is_pseudo(t->key->type))
+ return -EINVAL;
+
+ r = dns_transaction_negative_trust_anchor_lookup(t, dns_resource_key_name(t->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return false;
+
+ r = dns_transaction_in_private_tld(t, t->key);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* The lookup is from a TLD that is proven not to
+ * exist, and we are in downgrade mode, hence ignore
+ * that fact that we didn't get any NSEC RRs. */
+
+ log_info("Detected a negative query %s in a private DNS zone, permitting unsigned response.",
+ dns_resource_key_to_string(t->key, key_str, sizeof key_str));
+ return false;
+ }
+
+ name = dns_resource_key_name(t->key);
+
+ if (t->key->type == DNS_TYPE_DS) {
+
+ /* We got a negative reply for this DS lookup? DS RRs are signed when their parent zone is signed,
+ * hence check the parent SOA in this case. */
+
+ r = dns_name_parent(&name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return true;
+
+ type = DNS_TYPE_SOA;
+
+ } else if (IN_SET(t->key->type, DNS_TYPE_SOA, DNS_TYPE_NS))
+ /* We got a negative reply for this SOA/NS lookup? If so, check if there's a DS RR for this */
+ type = DNS_TYPE_DS;
+ else
+ /* For all other negative replies, check for the SOA lookup */
+ type = DNS_TYPE_SOA;
+
+ /* For all other RRs we check the SOA on the same level to see
+ * if it's signed. */
+
+ SET_FOREACH(dt, t->dnssec_transactions, i) {
+
+ if (dt->key->class != t->key->class)
+ continue;
+ if (dt->key->type != type)
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ return dt->answer_authenticated;
+ }
+
+ /* If in doubt, require NSEC/NSEC3 */
+ return true;
+}
+
+static int dns_transaction_dnskey_authenticated(DnsTransaction *t, DnsResourceRecord *rr) {
+ DnsResourceRecord *rrsig;
+ bool found = false;
+ int r;
+
+ /* Checks whether any of the DNSKEYs used for the RRSIGs for
+ * the specified RRset is authenticated (i.e. has a matching
+ * DS RR). */
+
+ r = dns_transaction_negative_trust_anchor_lookup(t, dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return false;
+
+ DNS_ANSWER_FOREACH(rrsig, t->answer) {
+ DnsTransaction *dt;
+ Iterator i;
+
+ r = dnssec_key_match_rrsig(rr->key, rrsig);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ SET_FOREACH(dt, t->dnssec_transactions, i) {
+
+ if (dt->key->class != rr->key->class)
+ continue;
+
+ if (dt->key->type == DNS_TYPE_DNSKEY) {
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), rrsig->rrsig.signer);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* OK, we found an auxiliary DNSKEY
+ * lookup. If that lookup is
+ * authenticated, report this. */
+
+ if (dt->answer_authenticated)
+ return true;
+
+ found = true;
+
+ } else if (dt->key->type == DNS_TYPE_DS) {
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), rrsig->rrsig.signer);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* OK, we found an auxiliary DS
+ * lookup. If that lookup is
+ * authenticated and non-zero, we
+ * won! */
+
+ if (!dt->answer_authenticated)
+ return false;
+
+ return dns_answer_match_key(dt->answer, dt->key, NULL);
+ }
+ }
+ }
+
+ return found ? false : -ENXIO;
+}
+
+static int dns_transaction_known_signed(DnsTransaction *t, DnsResourceRecord *rr) {
+ assert(t);
+ assert(rr);
+
+ /* We know that the root domain is signed, hence if it appears
+ * not to be signed, there's a problem with the DNS server */
+
+ return rr->key->class == DNS_CLASS_IN &&
+ dns_name_is_root(dns_resource_key_name(rr->key));
+}
+
+static int dns_transaction_check_revoked_trust_anchors(DnsTransaction *t) {
+ DnsResourceRecord *rr;
+ int r;
+
+ assert(t);
+
+ /* Maybe warn the user that we encountered a revoked DNSKEY
+ * for a key from our trust anchor. Note that we don't care
+ * whether the DNSKEY can be authenticated or not. It's
+ * sufficient if it is self-signed. */
+
+ DNS_ANSWER_FOREACH(rr, t->answer) {
+ r = dns_trust_anchor_check_revoked(&t->scope->manager->trust_anchor, rr, t->answer);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dns_transaction_invalidate_revoked_keys(DnsTransaction *t) {
+ bool changed;
+ int r;
+
+ assert(t);
+
+ /* Removes all DNSKEY/DS objects from t->validated_keys that
+ * our trust anchors database considers revoked. */
+
+ do {
+ DnsResourceRecord *rr;
+
+ changed = false;
+
+ DNS_ANSWER_FOREACH(rr, t->validated_keys) {
+ r = dns_trust_anchor_is_revoked(&t->scope->manager->trust_anchor, rr);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ r = dns_answer_remove_by_rr(&t->validated_keys, rr);
+ if (r < 0)
+ return r;
+
+ assert(r > 0);
+ changed = true;
+ break;
+ }
+ }
+ } while (changed);
+
+ return 0;
+}
+
+static int dns_transaction_copy_validated(DnsTransaction *t) {
+ DnsTransaction *dt;
+ Iterator i;
+ int r;
+
+ assert(t);
+
+ /* Copy all validated RRs from the auxiliary DNSSEC transactions into our set of validated RRs */
+
+ SET_FOREACH(dt, t->dnssec_transactions, i) {
+
+ if (DNS_TRANSACTION_IS_LIVE(dt->state))
+ continue;
+
+ if (!dt->answer_authenticated)
+ continue;
+
+ r = dns_answer_extend(&t->validated_keys, dt->answer);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+typedef enum {
+ DNSSEC_PHASE_DNSKEY, /* Phase #1, only validate DNSKEYs */
+ DNSSEC_PHASE_NSEC, /* Phase #2, only validate NSEC+NSEC3 */
+ DNSSEC_PHASE_ALL, /* Phase #3, validate everything else */
+} Phase;
+
+static int dnssec_validate_records(
+ DnsTransaction *t,
+ Phase phase,
+ bool *have_nsec,
+ DnsAnswer **validated) {
+
+ DnsResourceRecord *rr;
+ int r;
+
+ /* Returns negative on error, 0 if validation failed, 1 to restart validation, 2 when finished. */
+
+ DNS_ANSWER_FOREACH(rr, t->answer) {
+ DnsResourceRecord *rrsig = NULL;
+ DnssecResult result;
+
+ switch (rr->key->type) {
+ case DNS_TYPE_RRSIG:
+ continue;
+
+ case DNS_TYPE_DNSKEY:
+ /* We validate DNSKEYs only in the DNSKEY and ALL phases */
+ if (phase == DNSSEC_PHASE_NSEC)
+ continue;
+ break;
+
+ case DNS_TYPE_NSEC:
+ case DNS_TYPE_NSEC3:
+ *have_nsec = true;
+
+ /* We validate NSEC/NSEC3 only in the NSEC and ALL phases */
+ if (phase == DNSSEC_PHASE_DNSKEY)
+ continue;
+ break;
+
+ default:
+ /* We validate all other RRs only in the ALL phases */
+ if (phase != DNSSEC_PHASE_ALL)
+ continue;
+ }
+
+ r = dnssec_verify_rrset_search(t->answer, rr->key, t->validated_keys, USEC_INFINITY, &result, &rrsig);
+ if (r < 0)
+ return r;
+
+ log_debug("Looking at %s: %s", strna(dns_resource_record_to_string(rr)), dnssec_result_to_string(result));
+
+ if (result == DNSSEC_VALIDATED) {
+
+ if (rr->key->type == DNS_TYPE_DNSKEY) {
+ /* If we just validated a DNSKEY RRset, then let's add these keys to
+ * the set of validated keys for this transaction. */
+
+ r = dns_answer_copy_by_key(&t->validated_keys, t->answer, rr->key, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ /* Some of the DNSKEYs we just added might already have been revoked,
+ * remove them again in that case. */
+ r = dns_transaction_invalidate_revoked_keys(t);
+ if (r < 0)
+ return r;
+ }
+
+ /* Add the validated RRset to the new list of validated
+ * RRsets, and remove it from the unvalidated RRsets.
+ * We mark the RRset as authenticated and cacheable. */
+ r = dns_answer_move_by_key(validated, &t->answer, rr->key, DNS_ANSWER_AUTHENTICATED|DNS_ANSWER_CACHEABLE);
+ if (r < 0)
+ return r;
+
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_SECURE, rr->key);
+
+ /* Exit the loop, we dropped something from the answer, start from the beginning */
+ return 1;
+ }
+
+ /* If we haven't read all DNSKEYs yet a negative result of the validation is irrelevant, as
+ * there might be more DNSKEYs coming. Similar, if we haven't read all NSEC/NSEC3 RRs yet,
+ * we cannot do positive wildcard proofs yet, as those require the NSEC/NSEC3 RRs. */
+ if (phase != DNSSEC_PHASE_ALL)
+ continue;
+
+ if (result == DNSSEC_VALIDATED_WILDCARD) {
+ bool authenticated = false;
+ const char *source;
+
+ /* This RRset validated, but as a wildcard. This means we need
+ * to prove via NSEC/NSEC3 that no matching non-wildcard RR exists. */
+
+ /* First step, determine the source of synthesis */
+ r = dns_resource_record_source(rrsig, &source);
+ if (r < 0)
+ return r;
+
+ r = dnssec_test_positive_wildcard(*validated,
+ dns_resource_key_name(rr->key),
+ source,
+ rrsig->rrsig.signer,
+ &authenticated);
+
+ /* Unless the NSEC proof showed that the key really doesn't exist something is off. */
+ if (r == 0)
+ result = DNSSEC_INVALID;
+ else {
+ r = dns_answer_move_by_key(validated, &t->answer, rr->key,
+ authenticated ? (DNS_ANSWER_AUTHENTICATED|DNS_ANSWER_CACHEABLE) : 0);
+ if (r < 0)
+ return r;
+
+ manager_dnssec_verdict(t->scope->manager, authenticated ? DNSSEC_SECURE : DNSSEC_INSECURE, rr->key);
+
+ /* Exit the loop, we dropped something from the answer, start from the beginning */
+ return 1;
+ }
+ }
+
+ if (result == DNSSEC_NO_SIGNATURE) {
+ r = dns_transaction_requires_rrsig(t, rr);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Data does not require signing. In that case, just copy it over,
+ * but remember that this is by no means authenticated. */
+ r = dns_answer_move_by_key(validated, &t->answer, rr->key, 0);
+ if (r < 0)
+ return r;
+
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INSECURE, rr->key);
+ return 1;
+ }
+
+ r = dns_transaction_known_signed(t, rr);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* This is an RR we know has to be signed. If it isn't this means
+ * the server is not attaching RRSIGs, hence complain. */
+
+ dns_server_packet_rrsig_missing(t->server, t->current_feature_level);
+
+ if (t->scope->dnssec_mode == DNSSEC_ALLOW_DOWNGRADE) {
+
+ /* Downgrading is OK? If so, just consider the information unsigned */
+
+ r = dns_answer_move_by_key(validated, &t->answer, rr->key, 0);
+ if (r < 0)
+ return r;
+
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INSECURE, rr->key);
+ return 1;
+ }
+
+ /* Otherwise, fail */
+ t->answer_dnssec_result = DNSSEC_INCOMPATIBLE_SERVER;
+ return 0;
+ }
+
+ r = dns_transaction_in_private_tld(t, rr->key);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ char s[DNS_RESOURCE_KEY_STRING_MAX];
+
+ /* The data is from a TLD that is proven not to exist, and we are in downgrade
+ * mode, hence ignore the fact that this was not signed. */
+
+ log_info("Detected RRset %s is in a private DNS zone, permitting unsigned RRs.",
+ dns_resource_key_to_string(rr->key, s, sizeof s));
+
+ r = dns_answer_move_by_key(validated, &t->answer, rr->key, 0);
+ if (r < 0)
+ return r;
+
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INSECURE, rr->key);
+ return 1;
+ }
+ }
+
+ if (IN_SET(result,
+ DNSSEC_MISSING_KEY,
+ DNSSEC_SIGNATURE_EXPIRED,
+ DNSSEC_UNSUPPORTED_ALGORITHM)) {
+
+ r = dns_transaction_dnskey_authenticated(t, rr);
+ if (r < 0 && r != -ENXIO)
+ return r;
+ if (r == 0) {
+ /* The DNSKEY transaction was not authenticated, this means there's
+ * no DS for this, which means it's OK if no keys are found for this signature. */
+
+ r = dns_answer_move_by_key(validated, &t->answer, rr->key, 0);
+ if (r < 0)
+ return r;
+
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INSECURE, rr->key);
+ return 1;
+ }
+ }
+
+ r = dns_transaction_is_primary_response(t, rr);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Look for a matching DNAME for this CNAME */
+ r = dns_answer_has_dname_for_cname(t->answer, rr);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Also look among the stuff we already validated */
+ r = dns_answer_has_dname_for_cname(*validated, rr);
+ if (r < 0)
+ return r;
+ }
+
+ if (r == 0) {
+ if (IN_SET(result,
+ DNSSEC_INVALID,
+ DNSSEC_SIGNATURE_EXPIRED,
+ DNSSEC_NO_SIGNATURE))
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_BOGUS, rr->key);
+ else /* DNSSEC_MISSING_KEY or DNSSEC_UNSUPPORTED_ALGORITHM */
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INDETERMINATE, rr->key);
+
+ /* This is a primary response to our question, and it failed validation.
+ * That's fatal. */
+ t->answer_dnssec_result = result;
+ return 0;
+ }
+
+ /* This is a primary response, but we do have a DNAME RR
+ * in the RR that can replay this CNAME, hence rely on
+ * that, and we can remove the CNAME in favour of it. */
+ }
+
+ /* This is just some auxiliary data. Just remove the RRset and continue. */
+ r = dns_answer_remove_by_key(&t->answer, rr->key);
+ if (r < 0)
+ return r;
+
+ /* We dropped something from the answer, start from the beginning. */
+ return 1;
+ }
+
+ return 2; /* Finito. */
+}
+
+int dns_transaction_validate_dnssec(DnsTransaction *t) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *validated = NULL;
+ Phase phase;
+ DnsAnswerFlags flags;
+ int r;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ assert(t);
+
+ /* We have now collected all DS and DNSKEY RRs in
+ * t->validated_keys, let's see which RRs we can now
+ * authenticate with that. */
+
+ if (t->scope->dnssec_mode == DNSSEC_NO)
+ return 0;
+
+ /* Already validated */
+ if (t->answer_dnssec_result != _DNSSEC_RESULT_INVALID)
+ return 0;
+
+ /* Our own stuff needs no validation */
+ if (IN_SET(t->answer_source, DNS_TRANSACTION_ZONE, DNS_TRANSACTION_TRUST_ANCHOR)) {
+ t->answer_dnssec_result = DNSSEC_VALIDATED;
+ t->answer_authenticated = true;
+ return 0;
+ }
+
+ /* Cached stuff is not affected by validation. */
+ if (t->answer_source != DNS_TRANSACTION_NETWORK)
+ return 0;
+
+ if (!dns_transaction_dnssec_supported_full(t)) {
+ /* The server does not support DNSSEC, or doesn't augment responses with RRSIGs. */
+ t->answer_dnssec_result = DNSSEC_INCOMPATIBLE_SERVER;
+ log_debug("Not validating response for %" PRIu16 ", used server feature level does not support DNSSEC.", t->id);
+ return 0;
+ }
+
+ log_debug("Validating response from transaction %" PRIu16 " (%s).",
+ t->id,
+ dns_resource_key_to_string(t->key, key_str, sizeof key_str));
+
+ /* First, see if this response contains any revoked trust
+ * anchors we care about */
+ r = dns_transaction_check_revoked_trust_anchors(t);
+ if (r < 0)
+ return r;
+
+ /* Third, copy all RRs we acquired successfully from auxiliary RRs over. */
+ r = dns_transaction_copy_validated(t);
+ if (r < 0)
+ return r;
+
+ /* Second, see if there are DNSKEYs we already know a
+ * validated DS for. */
+ r = dns_transaction_validate_dnskey_by_ds(t);
+ if (r < 0)
+ return r;
+
+ /* Fourth, remove all DNSKEY and DS RRs again that our trust
+ * anchor says are revoked. After all we might have marked
+ * some keys revoked above, but they might still be lingering
+ * in our validated_keys list. */
+ r = dns_transaction_invalidate_revoked_keys(t);
+ if (r < 0)
+ return r;
+
+ phase = DNSSEC_PHASE_DNSKEY;
+ for (;;) {
+ bool have_nsec = false;
+
+ r = dnssec_validate_records(t, phase, &have_nsec, &validated);
+ if (r <= 0)
+ return r;
+
+ /* Try again as long as we managed to achieve something */
+ if (r == 1)
+ continue;
+
+ if (phase == DNSSEC_PHASE_DNSKEY && have_nsec) {
+ /* OK, we processed all DNSKEYs, and there are NSEC/NSEC3 RRs, look at those now. */
+ phase = DNSSEC_PHASE_NSEC;
+ continue;
+ }
+
+ if (phase != DNSSEC_PHASE_ALL) {
+ /* OK, we processed all DNSKEYs and NSEC/NSEC3 RRs, look at all the rest now.
+ * Note that in this third phase we start to remove RRs we couldn't validate. */
+ phase = DNSSEC_PHASE_ALL;
+ continue;
+ }
+
+ /* We're done */
+ break;
+ }
+
+ dns_answer_unref(t->answer);
+ t->answer = TAKE_PTR(validated);
+
+ /* At this point the answer only contains validated
+ * RRsets. Now, let's see if it actually answers the question
+ * we asked. If so, great! If it doesn't, then see if
+ * NSEC/NSEC3 can prove this. */
+ r = dns_transaction_has_positive_answer(t, &flags);
+ if (r > 0) {
+ /* Yes, it answers the question! */
+
+ if (flags & DNS_ANSWER_AUTHENTICATED) {
+ /* The answer is fully authenticated, yay. */
+ t->answer_dnssec_result = DNSSEC_VALIDATED;
+ t->answer_rcode = DNS_RCODE_SUCCESS;
+ t->answer_authenticated = true;
+ } else {
+ /* The answer is not fully authenticated. */
+ t->answer_dnssec_result = DNSSEC_UNSIGNED;
+ t->answer_authenticated = false;
+ }
+
+ } else if (r == 0) {
+ DnssecNsecResult nr;
+ bool authenticated = false;
+
+ /* Bummer! Let's check NSEC/NSEC3 */
+ r = dnssec_nsec_test(t->answer, t->key, &nr, &authenticated, &t->answer_nsec_ttl);
+ if (r < 0)
+ return r;
+
+ switch (nr) {
+
+ case DNSSEC_NSEC_NXDOMAIN:
+ /* NSEC proves the domain doesn't exist. Very good. */
+ log_debug("Proved NXDOMAIN via NSEC/NSEC3 for transaction %u (%s)", t->id, key_str);
+ t->answer_dnssec_result = DNSSEC_VALIDATED;
+ t->answer_rcode = DNS_RCODE_NXDOMAIN;
+ t->answer_authenticated = authenticated;
+
+ manager_dnssec_verdict(t->scope->manager, authenticated ? DNSSEC_SECURE : DNSSEC_INSECURE, t->key);
+ break;
+
+ case DNSSEC_NSEC_NODATA:
+ /* NSEC proves that there's no data here, very good. */
+ log_debug("Proved NODATA via NSEC/NSEC3 for transaction %u (%s)", t->id, key_str);
+ t->answer_dnssec_result = DNSSEC_VALIDATED;
+ t->answer_rcode = DNS_RCODE_SUCCESS;
+ t->answer_authenticated = authenticated;
+
+ manager_dnssec_verdict(t->scope->manager, authenticated ? DNSSEC_SECURE : DNSSEC_INSECURE, t->key);
+ break;
+
+ case DNSSEC_NSEC_OPTOUT:
+ /* NSEC3 says the data might not be signed */
+ log_debug("Data is NSEC3 opt-out via NSEC/NSEC3 for transaction %u (%s)", t->id, key_str);
+ t->answer_dnssec_result = DNSSEC_UNSIGNED;
+ t->answer_authenticated = false;
+
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INSECURE, t->key);
+ break;
+
+ case DNSSEC_NSEC_NO_RR:
+ /* No NSEC data? Bummer! */
+
+ r = dns_transaction_requires_nsec(t);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ t->answer_dnssec_result = DNSSEC_NO_SIGNATURE;
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_BOGUS, t->key);
+ } else {
+ t->answer_dnssec_result = DNSSEC_UNSIGNED;
+ t->answer_authenticated = false;
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INSECURE, t->key);
+ }
+
+ break;
+
+ case DNSSEC_NSEC_UNSUPPORTED_ALGORITHM:
+ /* We don't know the NSEC3 algorithm used? */
+ t->answer_dnssec_result = DNSSEC_UNSUPPORTED_ALGORITHM;
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INDETERMINATE, t->key);
+ break;
+
+ case DNSSEC_NSEC_FOUND:
+ case DNSSEC_NSEC_CNAME:
+ /* NSEC says it needs to be there, but we couldn't find it? Bummer! */
+ t->answer_dnssec_result = DNSSEC_NSEC_MISMATCH;
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_BOGUS, t->key);
+ break;
+
+ default:
+ assert_not_reached("Unexpected NSEC result.");
+ }
+ }
+
+ return 1;
+}
+
+static const char* const dns_transaction_state_table[_DNS_TRANSACTION_STATE_MAX] = {
+ [DNS_TRANSACTION_NULL] = "null",
+ [DNS_TRANSACTION_PENDING] = "pending",
+ [DNS_TRANSACTION_VALIDATING] = "validating",
+ [DNS_TRANSACTION_RCODE_FAILURE] = "rcode-failure",
+ [DNS_TRANSACTION_SUCCESS] = "success",
+ [DNS_TRANSACTION_NO_SERVERS] = "no-servers",
+ [DNS_TRANSACTION_TIMEOUT] = "timeout",
+ [DNS_TRANSACTION_ATTEMPTS_MAX_REACHED] = "attempts-max-reached",
+ [DNS_TRANSACTION_INVALID_REPLY] = "invalid-reply",
+ [DNS_TRANSACTION_ERRNO] = "errno",
+ [DNS_TRANSACTION_ABORTED] = "aborted",
+ [DNS_TRANSACTION_DNSSEC_FAILED] = "dnssec-failed",
+ [DNS_TRANSACTION_NO_TRUST_ANCHOR] = "no-trust-anchor",
+ [DNS_TRANSACTION_RR_TYPE_UNSUPPORTED] = "rr-type-unsupported",
+ [DNS_TRANSACTION_NETWORK_DOWN] = "network-down",
+ [DNS_TRANSACTION_NOT_FOUND] = "not-found",
+};
+DEFINE_STRING_TABLE_LOOKUP(dns_transaction_state, DnsTransactionState);
+
+static const char* const dns_transaction_source_table[_DNS_TRANSACTION_SOURCE_MAX] = {
+ [DNS_TRANSACTION_NETWORK] = "network",
+ [DNS_TRANSACTION_CACHE] = "cache",
+ [DNS_TRANSACTION_ZONE] = "zone",
+ [DNS_TRANSACTION_TRUST_ANCHOR] = "trust-anchor",
+};
+DEFINE_STRING_TABLE_LOOKUP(dns_transaction_source, DnsTransactionSource);
diff --git a/src/resolve/resolved-dns-transaction.h b/src/resolve/resolved-dns-transaction.h
new file mode 100644
index 0000000..bdfcbc1
--- /dev/null
+++ b/src/resolve/resolved-dns-transaction.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct DnsTransaction DnsTransaction;
+typedef enum DnsTransactionState DnsTransactionState;
+typedef enum DnsTransactionSource DnsTransactionSource;
+
+enum DnsTransactionState {
+ DNS_TRANSACTION_NULL,
+ DNS_TRANSACTION_PENDING,
+ DNS_TRANSACTION_VALIDATING,
+ DNS_TRANSACTION_RCODE_FAILURE,
+ DNS_TRANSACTION_SUCCESS,
+ DNS_TRANSACTION_NO_SERVERS,
+ DNS_TRANSACTION_TIMEOUT,
+ DNS_TRANSACTION_ATTEMPTS_MAX_REACHED,
+ DNS_TRANSACTION_INVALID_REPLY,
+ DNS_TRANSACTION_ERRNO,
+ DNS_TRANSACTION_ABORTED,
+ DNS_TRANSACTION_DNSSEC_FAILED,
+ DNS_TRANSACTION_NO_TRUST_ANCHOR,
+ DNS_TRANSACTION_RR_TYPE_UNSUPPORTED,
+ DNS_TRANSACTION_NETWORK_DOWN,
+ DNS_TRANSACTION_NOT_FOUND, /* like NXDOMAIN, but when LLMNR/TCP connections fail */
+ _DNS_TRANSACTION_STATE_MAX,
+ _DNS_TRANSACTION_STATE_INVALID = -1
+};
+
+#define DNS_TRANSACTION_IS_LIVE(state) IN_SET((state), DNS_TRANSACTION_NULL, DNS_TRANSACTION_PENDING, DNS_TRANSACTION_VALIDATING)
+
+enum DnsTransactionSource {
+ DNS_TRANSACTION_NETWORK,
+ DNS_TRANSACTION_CACHE,
+ DNS_TRANSACTION_ZONE,
+ DNS_TRANSACTION_TRUST_ANCHOR,
+ _DNS_TRANSACTION_SOURCE_MAX,
+ _DNS_TRANSACTION_SOURCE_INVALID = -1
+};
+
+#include "resolved-dns-answer.h"
+#include "resolved-dns-packet.h"
+#include "resolved-dns-question.h"
+#include "resolved-dns-scope.h"
+#include "resolved-dns-server.h"
+#include "resolved-dns-stream.h"
+
+struct DnsTransaction {
+ DnsScope *scope;
+
+ DnsResourceKey *key;
+
+ DnsTransactionState state;
+
+ uint16_t id;
+
+ bool tried_stream:1;
+
+ bool initial_jitter_scheduled:1;
+ bool initial_jitter_elapsed:1;
+
+ bool clamp_ttl:1;
+
+ bool probing:1;
+
+ DnsPacket *sent, *received;
+
+ DnsAnswer *answer;
+ int answer_rcode;
+ DnssecResult answer_dnssec_result;
+ DnsTransactionSource answer_source;
+ uint32_t answer_nsec_ttl;
+ int answer_errno; /* if state is DNS_TRANSACTION_ERRNO */
+
+ /* Indicates whether the primary answer is authenticated,
+ * i.e. whether the RRs from answer which directly match the
+ * question are authenticated, or, if there are none, whether
+ * the NODATA or NXDOMAIN case is. It says nothing about
+ * additional RRs listed in the answer, however they have
+ * their own DNS_ANSWER_AUTHORIZED FLAGS. Note that this bit
+ * is defined different than the AD bit in DNS packets, as
+ * that covers more than just the actual primary answer. */
+ bool answer_authenticated;
+
+ /* Contains DNSKEY, DS, SOA RRs we already verified and need
+ * to authenticate this reply */
+ DnsAnswer *validated_keys;
+
+ usec_t start_usec;
+ usec_t next_attempt_after;
+ sd_event_source *timeout_event_source;
+ unsigned n_attempts;
+
+ unsigned n_picked_servers;
+
+ /* UDP connection logic, if we need it */
+ int dns_udp_fd;
+ sd_event_source *dns_udp_event_source;
+
+ /* TCP connection logic, if we need it */
+ DnsStream *stream;
+
+ /* The active server */
+ DnsServer *server;
+
+ /* The features of the DNS server at time of transaction start */
+ DnsServerFeatureLevel current_feature_level;
+
+ /* If we got SERVFAIL back, we retry the lookup, using a lower feature level than we used before. */
+ DnsServerFeatureLevel clamp_feature_level;
+
+ /* Query candidates this transaction is referenced by and that
+ * shall be notified about this specific transaction
+ * completing. */
+ Set *notify_query_candidates, *notify_query_candidates_done;
+
+ /* Zone items this transaction is referenced by and that shall
+ * be notified about completion. */
+ Set *notify_zone_items, *notify_zone_items_done;
+
+ /* Other transactions that this transactions is referenced by
+ * and that shall be notified about completion. This is used
+ * when transactions want to validate their RRsets, but need
+ * another DNSKEY or DS RR to do so. */
+ Set *notify_transactions, *notify_transactions_done;
+
+ /* The opposite direction: the transactions this transaction
+ * created in order to request DNSKEY or DS RRs. */
+ Set *dnssec_transactions;
+
+ unsigned block_gc;
+
+ LIST_FIELDS(DnsTransaction, transactions_by_scope);
+ LIST_FIELDS(DnsTransaction, transactions_by_stream);
+};
+
+int dns_transaction_new(DnsTransaction **ret, DnsScope *s, DnsResourceKey *key);
+DnsTransaction* dns_transaction_free(DnsTransaction *t);
+
+bool dns_transaction_gc(DnsTransaction *t);
+int dns_transaction_go(DnsTransaction *t);
+
+void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p);
+void dns_transaction_complete(DnsTransaction *t, DnsTransactionState state);
+
+void dns_transaction_notify(DnsTransaction *t, DnsTransaction *source);
+int dns_transaction_validate_dnssec(DnsTransaction *t);
+int dns_transaction_request_dnssec_keys(DnsTransaction *t);
+
+const char* dns_transaction_state_to_string(DnsTransactionState p) _const_;
+DnsTransactionState dns_transaction_state_from_string(const char *s) _pure_;
+
+const char* dns_transaction_source_to_string(DnsTransactionSource p) _const_;
+DnsTransactionSource dns_transaction_source_from_string(const char *s) _pure_;
+
+/* LLMNR Jitter interval, see RFC 4795 Section 7 */
+#define LLMNR_JITTER_INTERVAL_USEC (100 * USEC_PER_MSEC)
+
+/* mDNS Jitter interval, see RFC 6762 Section 5.2 */
+#define MDNS_JITTER_MIN_USEC (20 * USEC_PER_MSEC)
+#define MDNS_JITTER_RANGE_USEC (100 * USEC_PER_MSEC)
+
+/* mDNS probing interval, see RFC 6762 Section 8.1 */
+#define MDNS_PROBING_INTERVAL_USEC (250 * USEC_PER_MSEC)
+
+/* Maximum attempts to send DNS requests, across all DNS servers */
+#define DNS_TRANSACTION_ATTEMPTS_MAX 24
+
+/* Maximum attempts to send LLMNR requests, see RFC 4795 Section 2.7 */
+#define LLMNR_TRANSACTION_ATTEMPTS_MAX 3
+
+/* Maximum attempts to send MDNS requests, see RFC 6762 Section 8.1 */
+#define MDNS_TRANSACTION_ATTEMPTS_MAX 3
+
+#define TRANSACTION_ATTEMPTS_MAX(p) (((p) == DNS_PROTOCOL_LLMNR) ? \
+ LLMNR_TRANSACTION_ATTEMPTS_MAX : \
+ (((p) == DNS_PROTOCOL_MDNS) ? \
+ MDNS_TRANSACTION_ATTEMPTS_MAX : \
+ DNS_TRANSACTION_ATTEMPTS_MAX))
diff --git a/src/resolve/resolved-dns-trust-anchor.c b/src/resolve/resolved-dns-trust-anchor.c
new file mode 100644
index 0000000..c5ec93b
--- /dev/null
+++ b/src/resolve/resolved-dns-trust-anchor.c
@@ -0,0 +1,789 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "def.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "parse-util.h"
+#include "resolved-dns-trust-anchor.h"
+#include "resolved-dns-dnssec.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+
+static const char trust_anchor_dirs[] = CONF_PATHS_NULSTR("dnssec-trust-anchors.d");
+
+/* The first DS RR from https://data.iana.org/root-anchors/root-anchors.xml, retrieved December 2015 */
+static const uint8_t root_digest1[] =
+ { 0x49, 0xAA, 0xC1, 0x1D, 0x7B, 0x6F, 0x64, 0x46, 0x70, 0x2E, 0x54, 0xA1, 0x60, 0x73, 0x71, 0x60,
+ 0x7A, 0x1A, 0x41, 0x85, 0x52, 0x00, 0xFD, 0x2C, 0xE1, 0xCD, 0xDE, 0x32, 0xF2, 0x4E, 0x8F, 0xB5 };
+
+/* The second DS RR from https://data.iana.org/root-anchors/root-anchors.xml, retrieved February 2017 */
+static const uint8_t root_digest2[] =
+ { 0xE0, 0x6D, 0x44, 0xB8, 0x0B, 0x8F, 0x1D, 0x39, 0xA9, 0x5C, 0x0B, 0x0D, 0x7C, 0x65, 0xD0, 0x84,
+ 0x58, 0xE8, 0x80, 0x40, 0x9B, 0xBC, 0x68, 0x34, 0x57, 0x10, 0x42, 0x37, 0xC7, 0xF8, 0xEC, 0x8D };
+
+static bool dns_trust_anchor_knows_domain_positive(DnsTrustAnchor *d, const char *name) {
+ assert(d);
+
+ /* Returns true if there's an entry for the specified domain
+ * name in our trust anchor */
+
+ return
+ hashmap_contains(d->positive_by_key, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_IN, DNS_TYPE_DNSKEY, name)) ||
+ hashmap_contains(d->positive_by_key, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_IN, DNS_TYPE_DS, name));
+}
+
+static int add_root_ksk(
+ DnsAnswer *answer,
+ DnsResourceKey *key,
+ uint16_t key_tag,
+ uint8_t algorithm,
+ uint8_t digest_type,
+ const void *digest,
+ size_t digest_size) {
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ int r;
+
+ rr = dns_resource_record_new(key);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->ds.key_tag = key_tag;
+ rr->ds.algorithm = algorithm;
+ rr->ds.digest_type = digest_type;
+ rr->ds.digest_size = digest_size;
+ rr->ds.digest = memdup(digest, rr->ds.digest_size);
+ if (!rr->ds.digest)
+ return -ENOMEM;
+
+ r = dns_answer_add(answer, rr, 0, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int dns_trust_anchor_add_builtin_positive(DnsTrustAnchor *d) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ int r;
+
+ assert(d);
+
+ r = hashmap_ensure_allocated(&d->positive_by_key, &dns_resource_key_hash_ops);
+ if (r < 0)
+ return r;
+
+ /* Only add the built-in trust anchor if there's neither a DS nor a DNSKEY defined for the root domain. That
+ * way users have an easy way to override the root domain DS/DNSKEY data. */
+ if (dns_trust_anchor_knows_domain_positive(d, "."))
+ return 0;
+
+ key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_DS, "");
+ if (!key)
+ return -ENOMEM;
+
+ answer = dns_answer_new(2);
+ if (!answer)
+ return -ENOMEM;
+
+ /* Add the two RRs from https://data.iana.org/root-anchors/root-anchors.xml */
+ r = add_root_ksk(answer, key, 19036, DNSSEC_ALGORITHM_RSASHA256, DNSSEC_DIGEST_SHA256, root_digest1, sizeof(root_digest1));
+ if (r < 0)
+ return r;
+
+ r = add_root_ksk(answer, key, 20326, DNSSEC_ALGORITHM_RSASHA256, DNSSEC_DIGEST_SHA256, root_digest2, sizeof(root_digest2));
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(d->positive_by_key, key, answer);
+ if (r < 0)
+ return r;
+
+ answer = NULL;
+ return 0;
+}
+
+static int dns_trust_anchor_add_builtin_negative(DnsTrustAnchor *d) {
+
+ static const char private_domains[] =
+ /* RFC 6761 says that .test is a special domain for
+ * testing and not to be installed in the root zone */
+ "test\0"
+
+ /* RFC 6761 says that these reverse IP lookup ranges
+ * are for private addresses, and hence should not
+ * show up in the root zone */
+ "10.in-addr.arpa\0"
+ "16.172.in-addr.arpa\0"
+ "17.172.in-addr.arpa\0"
+ "18.172.in-addr.arpa\0"
+ "19.172.in-addr.arpa\0"
+ "20.172.in-addr.arpa\0"
+ "21.172.in-addr.arpa\0"
+ "22.172.in-addr.arpa\0"
+ "23.172.in-addr.arpa\0"
+ "24.172.in-addr.arpa\0"
+ "25.172.in-addr.arpa\0"
+ "26.172.in-addr.arpa\0"
+ "27.172.in-addr.arpa\0"
+ "28.172.in-addr.arpa\0"
+ "29.172.in-addr.arpa\0"
+ "30.172.in-addr.arpa\0"
+ "31.172.in-addr.arpa\0"
+ "168.192.in-addr.arpa\0"
+
+ /* The same, but for IPv6. */
+ "d.f.ip6.arpa\0"
+
+ /* RFC 6762 reserves the .local domain for Multicast
+ * DNS, it hence cannot appear in the root zone. (Note
+ * that we by default do not route .local traffic to
+ * DNS anyway, except when a configured search domain
+ * suggests so.) */
+ "local\0"
+
+ /* These two are well known, popular private zone
+ * TLDs, that are blocked from delegation, according
+ * to:
+ * http://icannwiki.com/Name_Collision#NGPC_Resolution
+ *
+ * There's also ongoing work on making this official
+ * in an RRC:
+ * https://www.ietf.org/archive/id/draft-chapin-additional-reserved-tlds-02.txt */
+ "home\0"
+ "corp\0"
+
+ /* The following four TLDs are suggested for private
+ * zones in RFC 6762, Appendix G, and are hence very
+ * unlikely to be made official TLDs any day soon */
+ "lan\0"
+ "intranet\0"
+ "internal\0"
+ "private\0";
+
+ const char *name;
+ int r;
+
+ assert(d);
+
+ /* Only add the built-in trust anchor if there's no negative
+ * trust anchor defined at all. This enables easy overriding
+ * of negative trust anchors. */
+
+ if (set_size(d->negative_by_name) > 0)
+ return 0;
+
+ r = set_ensure_allocated(&d->negative_by_name, &dns_name_hash_ops);
+ if (r < 0)
+ return r;
+
+ /* We add a couple of domains as default negative trust
+ * anchors, where it's very unlikely they will be installed in
+ * the root zone. If they exist they must be private, and thus
+ * unsigned. */
+
+ NULSTR_FOREACH(name, private_domains) {
+
+ if (dns_trust_anchor_knows_domain_positive(d, name))
+ continue;
+
+ r = set_put_strdup(d->negative_by_name, name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dns_trust_anchor_load_positive(DnsTrustAnchor *d, const char *path, unsigned line, const char *s) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ _cleanup_free_ char *domain = NULL, *class = NULL, *type = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnsAnswer *old_answer = NULL;
+ const char *p = s;
+ int r;
+
+ assert(d);
+ assert(line);
+
+ r = extract_first_word(&p, &domain, NULL, EXTRACT_QUOTES);
+ if (r < 0)
+ return log_warning_errno(r, "Unable to parse domain in line %s:%u: %m", path, line);
+
+ r = dns_name_is_valid(domain);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to chack validity of domain name '%s', at line %s:%u, ignoring line: %m", domain, path, line);
+ if (r == 0) {
+ log_warning("Domain name %s is invalid, at line %s:%u, ignoring line.", domain, path, line);
+ return -EINVAL;
+ }
+
+ r = extract_many_words(&p, NULL, 0, &class, &type, NULL);
+ if (r < 0)
+ return log_warning_errno(r, "Unable to parse class and type in line %s:%u: %m", path, line);
+ if (r != 2) {
+ log_warning("Missing class or type in line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ if (!strcaseeq(class, "IN")) {
+ log_warning("RR class %s is not supported, ignoring line %s:%u.", class, path, line);
+ return -EINVAL;
+ }
+
+ if (strcaseeq(type, "DS")) {
+ _cleanup_free_ char *key_tag = NULL, *algorithm = NULL, *digest_type = NULL;
+ _cleanup_free_ void *dd = NULL;
+ uint16_t kt;
+ int a, dt;
+ size_t l;
+
+ r = extract_many_words(&p, NULL, 0, &key_tag, &algorithm, &digest_type, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse DS parameters on line %s:%u: %m", path, line);
+ return -EINVAL;
+ }
+ if (r != 3) {
+ log_warning("Missing DS parameters on line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ r = safe_atou16(key_tag, &kt);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse DS key tag %s on line %s:%u: %m", key_tag, path, line);
+
+ a = dnssec_algorithm_from_string(algorithm);
+ if (a < 0) {
+ log_warning("Failed to parse DS algorithm %s on line %s:%u", algorithm, path, line);
+ return -EINVAL;
+ }
+
+ dt = dnssec_digest_from_string(digest_type);
+ if (dt < 0) {
+ log_warning("Failed to parse DS digest type %s on line %s:%u", digest_type, path, line);
+ return -EINVAL;
+ }
+
+ if (isempty(p)) {
+ log_warning("Missing DS digest on line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ r = unhexmem(p, strlen(p), &dd, &l);
+ if (r < 0) {
+ log_warning("Failed to parse DS digest %s on line %s:%u", p, path, line);
+ return -EINVAL;
+ }
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DS, domain);
+ if (!rr)
+ return log_oom();
+
+ rr->ds.key_tag = kt;
+ rr->ds.algorithm = a;
+ rr->ds.digest_type = dt;
+ rr->ds.digest_size = l;
+ rr->ds.digest = TAKE_PTR(dd);
+
+ } else if (strcaseeq(type, "DNSKEY")) {
+ _cleanup_free_ char *flags = NULL, *protocol = NULL, *algorithm = NULL;
+ _cleanup_free_ void *k = NULL;
+ uint16_t f;
+ size_t l;
+ int a;
+
+ r = extract_many_words(&p, NULL, 0, &flags, &protocol, &algorithm, NULL);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse DNSKEY parameters on line %s:%u: %m", path, line);
+ if (r != 3) {
+ log_warning("Missing DNSKEY parameters on line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ if (!streq(protocol, "3")) {
+ log_warning("DNSKEY Protocol is not 3 on line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ r = safe_atou16(flags, &f);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse DNSKEY flags field %s on line %s:%u", flags, path, line);
+ if ((f & DNSKEY_FLAG_ZONE_KEY) == 0) {
+ log_warning("DNSKEY lacks zone key bit set on line %s:%u", path, line);
+ return -EINVAL;
+ }
+ if ((f & DNSKEY_FLAG_REVOKE)) {
+ log_warning("DNSKEY is already revoked on line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ a = dnssec_algorithm_from_string(algorithm);
+ if (a < 0) {
+ log_warning("Failed to parse DNSKEY algorithm %s on line %s:%u", algorithm, path, line);
+ return -EINVAL;
+ }
+
+ if (isempty(p)) {
+ log_warning("Missing DNSKEY key on line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ r = unbase64mem(p, strlen(p), &k, &l);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse DNSKEY key data %s on line %s:%u", p, path, line);
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, domain);
+ if (!rr)
+ return log_oom();
+
+ rr->dnskey.flags = f;
+ rr->dnskey.protocol = 3;
+ rr->dnskey.algorithm = a;
+ rr->dnskey.key_size = l;
+ rr->dnskey.key = TAKE_PTR(k);
+
+ } else {
+ log_warning("RR type %s is not supported, ignoring line %s:%u.", type, path, line);
+ return -EINVAL;
+ }
+
+ r = hashmap_ensure_allocated(&d->positive_by_key, &dns_resource_key_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ old_answer = hashmap_get(d->positive_by_key, rr->key);
+ answer = dns_answer_ref(old_answer);
+
+ r = dns_answer_add_extend(&answer, rr, 0, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add trust anchor RR: %m");
+
+ r = hashmap_replace(d->positive_by_key, rr->key, answer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add answer to trust anchor: %m");
+
+ old_answer = dns_answer_unref(old_answer);
+ answer = NULL;
+
+ return 0;
+}
+
+static int dns_trust_anchor_load_negative(DnsTrustAnchor *d, const char *path, unsigned line, const char *s) {
+ _cleanup_free_ char *domain = NULL;
+ const char *p = s;
+ int r;
+
+ assert(d);
+ assert(line);
+
+ r = extract_first_word(&p, &domain, NULL, EXTRACT_QUOTES);
+ if (r < 0)
+ return log_warning_errno(r, "Unable to parse line %s:%u: %m", path, line);
+
+ r = dns_name_is_valid(domain);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to chack validity of domain name '%s', at line %s:%u, ignoring line: %m", domain, path, line);
+ if (r == 0) {
+ log_warning("Domain name %s is invalid, at line %s:%u, ignoring line.", domain, path, line);
+ return -EINVAL;
+ }
+
+ if (!isempty(p)) {
+ log_warning("Trailing garbage at line %s:%u, ignoring line.", path, line);
+ return -EINVAL;
+ }
+
+ r = set_ensure_allocated(&d->negative_by_name, &dns_name_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(d->negative_by_name, domain);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ domain = NULL;
+
+ return 0;
+}
+
+static int dns_trust_anchor_load_files(
+ DnsTrustAnchor *d,
+ const char *suffix,
+ int (*loader)(DnsTrustAnchor *d, const char *path, unsigned n, const char *line)) {
+
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+
+ assert(d);
+ assert(suffix);
+ assert(loader);
+
+ r = conf_files_list_nulstr(&files, suffix, NULL, 0, trust_anchor_dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate %s trust anchor files: %m", suffix);
+
+ STRV_FOREACH(f, files) {
+ _cleanup_fclose_ FILE *g = NULL;
+ unsigned n = 0;
+
+ g = fopen(*f, "r");
+ if (!g) {
+ if (errno == ENOENT)
+ continue;
+
+ log_warning_errno(errno, "Failed to open '%s', ignoring: %m", *f);
+ continue;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+
+ r = read_line(g, LONG_LINE_MAX, &line);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read '%s', ignoring: %m", *f);
+ break;
+ }
+ if (r == 0)
+ break;
+
+ n++;
+
+ l = strstrip(line);
+ if (isempty(l))
+ continue;
+
+ if (*l == ';')
+ continue;
+
+ (void) loader(d, *f, n, l);
+ }
+ }
+
+ return 0;
+}
+
+static int domain_name_cmp(char * const *a, char * const *b) {
+ return dns_name_compare_func(*a, *b);
+}
+
+static int dns_trust_anchor_dump(DnsTrustAnchor *d) {
+ DnsAnswer *a;
+ Iterator i;
+
+ assert(d);
+
+ if (hashmap_isempty(d->positive_by_key))
+ log_info("No positive trust anchors defined.");
+ else {
+ log_info("Positive Trust Anchors:");
+ HASHMAP_FOREACH(a, d->positive_by_key, i) {
+ DnsResourceRecord *rr;
+
+ DNS_ANSWER_FOREACH(rr, a)
+ log_info("%s", dns_resource_record_to_string(rr));
+ }
+ }
+
+ if (set_isempty(d->negative_by_name))
+ log_info("No negative trust anchors defined.");
+ else {
+ _cleanup_free_ char **l = NULL, *j = NULL;
+
+ l = set_get_strv(d->negative_by_name);
+ if (!l)
+ return log_oom();
+
+ typesafe_qsort(l, set_size(d->negative_by_name), domain_name_cmp);
+
+ j = strv_join(l, " ");
+ if (!j)
+ return log_oom();
+
+ log_info("Negative trust anchors: %s", j);
+ }
+
+ return 0;
+}
+
+int dns_trust_anchor_load(DnsTrustAnchor *d) {
+ int r;
+
+ assert(d);
+
+ /* If loading things from disk fails, we don't consider this fatal */
+ (void) dns_trust_anchor_load_files(d, ".positive", dns_trust_anchor_load_positive);
+ (void) dns_trust_anchor_load_files(d, ".negative", dns_trust_anchor_load_negative);
+
+ /* However, if the built-in DS fails, then we have a problem. */
+ r = dns_trust_anchor_add_builtin_positive(d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add built-in positive trust anchor: %m");
+
+ r = dns_trust_anchor_add_builtin_negative(d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add built-in negative trust anchor: %m");
+
+ dns_trust_anchor_dump(d);
+
+ return 0;
+}
+
+void dns_trust_anchor_flush(DnsTrustAnchor *d) {
+ assert(d);
+
+ d->positive_by_key = hashmap_free_with_destructor(d->positive_by_key, dns_answer_unref);
+ d->revoked_by_rr = set_free_with_destructor(d->revoked_by_rr, dns_resource_record_unref);
+ d->negative_by_name = set_free_free(d->negative_by_name);
+}
+
+int dns_trust_anchor_lookup_positive(DnsTrustAnchor *d, const DnsResourceKey *key, DnsAnswer **ret) {
+ DnsAnswer *a;
+
+ assert(d);
+ assert(key);
+ assert(ret);
+
+ /* We only serve DS and DNSKEY RRs. */
+ if (!IN_SET(key->type, DNS_TYPE_DS, DNS_TYPE_DNSKEY))
+ return 0;
+
+ a = hashmap_get(d->positive_by_key, key);
+ if (!a)
+ return 0;
+
+ *ret = dns_answer_ref(a);
+ return 1;
+}
+
+int dns_trust_anchor_lookup_negative(DnsTrustAnchor *d, const char *name) {
+ int r;
+
+ assert(d);
+ assert(name);
+
+ for (;;) {
+ /* If the domain is listed as-is in the NTA database, then that counts */
+ if (set_contains(d->negative_by_name, name))
+ return true;
+
+ /* If the domain isn't listed as NTA, but is listed as positive trust anchor, then that counts. See RFC
+ * 7646, section 1.1 */
+ if (hashmap_contains(d->positive_by_key, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_IN, DNS_TYPE_DS, name)))
+ return false;
+
+ if (hashmap_contains(d->positive_by_key, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_IN, DNS_TYPE_KEY, name)))
+ return false;
+
+ /* And now, let's look at the parent, and check that too */
+ r = dns_name_parent(&name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ return false;
+}
+
+static int dns_trust_anchor_revoked_put(DnsTrustAnchor *d, DnsResourceRecord *rr) {
+ int r;
+
+ assert(d);
+
+ r = set_ensure_allocated(&d->revoked_by_rr, &dns_resource_record_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = set_put(d->revoked_by_rr, rr);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ dns_resource_record_ref(rr);
+
+ return r;
+}
+
+static int dns_trust_anchor_remove_revoked(DnsTrustAnchor *d, DnsResourceRecord *rr) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *new_answer = NULL;
+ DnsAnswer *old_answer;
+ int r;
+
+ /* Remember that this is a revoked trust anchor RR */
+ r = dns_trust_anchor_revoked_put(d, rr);
+ if (r < 0)
+ return r;
+
+ /* Remove this from the positive trust anchor */
+ old_answer = hashmap_get(d->positive_by_key, rr->key);
+ if (!old_answer)
+ return 0;
+
+ new_answer = dns_answer_ref(old_answer);
+
+ r = dns_answer_remove_by_rr(&new_answer, rr);
+ if (r <= 0)
+ return r;
+
+ /* We found the key! Warn the user */
+ log_struct(LOG_WARNING,
+ "MESSAGE_ID=" SD_MESSAGE_DNSSEC_TRUST_ANCHOR_REVOKED_STR,
+ LOG_MESSAGE("DNSSEC trust anchor %s has been revoked.\n"
+ "Please update the trust anchor, or upgrade your operating system.",
+ strna(dns_resource_record_to_string(rr))),
+ "TRUST_ANCHOR=%s", dns_resource_record_to_string(rr));
+
+ if (dns_answer_size(new_answer) <= 0) {
+ assert_se(hashmap_remove(d->positive_by_key, rr->key) == old_answer);
+ dns_answer_unref(old_answer);
+ return 1;
+ }
+
+ r = hashmap_replace(d->positive_by_key, new_answer->items[0].rr->key, new_answer);
+ if (r < 0)
+ return r;
+
+ new_answer = NULL;
+ dns_answer_unref(old_answer);
+ return 1;
+}
+
+static int dns_trust_anchor_check_revoked_one(DnsTrustAnchor *d, DnsResourceRecord *revoked_dnskey) {
+ DnsAnswer *a;
+ int r;
+
+ assert(d);
+ assert(revoked_dnskey);
+ assert(revoked_dnskey->key->type == DNS_TYPE_DNSKEY);
+ assert(revoked_dnskey->dnskey.flags & DNSKEY_FLAG_REVOKE);
+
+ a = hashmap_get(d->positive_by_key, revoked_dnskey->key);
+ if (a) {
+ DnsResourceRecord *anchor;
+
+ /* First, look for the precise DNSKEY in our trust anchor database */
+
+ DNS_ANSWER_FOREACH(anchor, a) {
+
+ if (anchor->dnskey.protocol != revoked_dnskey->dnskey.protocol)
+ continue;
+
+ if (anchor->dnskey.algorithm != revoked_dnskey->dnskey.algorithm)
+ continue;
+
+ if (anchor->dnskey.key_size != revoked_dnskey->dnskey.key_size)
+ continue;
+
+ /* Note that we allow the REVOKE bit to be
+ * different! It will be set in the revoked
+ * key, but unset in our version of it */
+ if (((anchor->dnskey.flags ^ revoked_dnskey->dnskey.flags) | DNSKEY_FLAG_REVOKE) != DNSKEY_FLAG_REVOKE)
+ continue;
+
+ if (memcmp(anchor->dnskey.key, revoked_dnskey->dnskey.key, anchor->dnskey.key_size) != 0)
+ continue;
+
+ dns_trust_anchor_remove_revoked(d, anchor);
+ break;
+ }
+ }
+
+ a = hashmap_get(d->positive_by_key, &DNS_RESOURCE_KEY_CONST(revoked_dnskey->key->class, DNS_TYPE_DS, dns_resource_key_name(revoked_dnskey->key)));
+ if (a) {
+ DnsResourceRecord *anchor;
+
+ /* Second, look for DS RRs matching this DNSKEY in our trust anchor database */
+
+ DNS_ANSWER_FOREACH(anchor, a) {
+
+ /* We set mask_revoke to true here, since our
+ * DS fingerprint will be the one of the
+ * unrevoked DNSKEY, but the one we got passed
+ * here has the bit set. */
+ r = dnssec_verify_dnskey_by_ds(revoked_dnskey, anchor, true);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ dns_trust_anchor_remove_revoked(d, anchor);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int dns_trust_anchor_check_revoked(DnsTrustAnchor *d, DnsResourceRecord *dnskey, DnsAnswer *rrs) {
+ DnsResourceRecord *rrsig;
+ int r;
+
+ assert(d);
+ assert(dnskey);
+
+ /* Looks if "dnskey" is a self-signed RR that has been revoked
+ * and matches one of our trust anchor entries. If so, removes
+ * it from the trust anchor and returns > 0. */
+
+ if (dnskey->key->type != DNS_TYPE_DNSKEY)
+ return 0;
+
+ /* Is this DNSKEY revoked? */
+ if ((dnskey->dnskey.flags & DNSKEY_FLAG_REVOKE) == 0)
+ return 0;
+
+ /* Could this be interesting to us at all? If not,
+ * there's no point in looking for and verifying a
+ * self-signed RRSIG. */
+ if (!dns_trust_anchor_knows_domain_positive(d, dns_resource_key_name(dnskey->key)))
+ return 0;
+
+ /* Look for a self-signed RRSIG in the other rrs belonging to this DNSKEY */
+ DNS_ANSWER_FOREACH(rrsig, rrs) {
+ DnssecResult result;
+
+ if (rrsig->key->type != DNS_TYPE_RRSIG)
+ continue;
+
+ r = dnssec_rrsig_match_dnskey(rrsig, dnskey, true);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dnssec_verify_rrset(rrs, dnskey->key, rrsig, dnskey, USEC_INFINITY, &result);
+ if (r < 0)
+ return r;
+ if (result != DNSSEC_VALIDATED)
+ continue;
+
+ /* Bingo! This is a revoked self-signed DNSKEY. Let's
+ * see if this precise one exists in our trust anchor
+ * database, too. */
+ r = dns_trust_anchor_check_revoked_one(d, dnskey);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int dns_trust_anchor_is_revoked(DnsTrustAnchor *d, DnsResourceRecord *rr) {
+ assert(d);
+
+ if (!IN_SET(rr->key->type, DNS_TYPE_DS, DNS_TYPE_DNSKEY))
+ return 0;
+
+ return set_contains(d->revoked_by_rr, rr);
+}
diff --git a/src/resolve/resolved-dns-trust-anchor.h b/src/resolve/resolved-dns-trust-anchor.h
new file mode 100644
index 0000000..df49777
--- /dev/null
+++ b/src/resolve/resolved-dns-trust-anchor.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct DnsTrustAnchor DnsTrustAnchor;
+
+#include "hashmap.h"
+#include "resolved-dns-answer.h"
+#include "resolved-dns-rr.h"
+
+/* This contains a fixed database mapping domain names to DS or DNSKEY records. */
+
+struct DnsTrustAnchor {
+ Hashmap *positive_by_key;
+ Set *negative_by_name;
+ Set *revoked_by_rr;
+};
+
+int dns_trust_anchor_load(DnsTrustAnchor *d);
+void dns_trust_anchor_flush(DnsTrustAnchor *d);
+
+int dns_trust_anchor_lookup_positive(DnsTrustAnchor *d, const DnsResourceKey* key, DnsAnswer **answer);
+int dns_trust_anchor_lookup_negative(DnsTrustAnchor *d, const char *name);
+
+int dns_trust_anchor_check_revoked(DnsTrustAnchor *d, DnsResourceRecord *dnskey, DnsAnswer *rrs);
+int dns_trust_anchor_is_revoked(DnsTrustAnchor *d, DnsResourceRecord *rr);
diff --git a/src/resolve/resolved-dns-zone.c b/src/resolve/resolved-dns-zone.c
new file mode 100644
index 0000000..c2d9f3d
--- /dev/null
+++ b/src/resolve/resolved-dns-zone.c
@@ -0,0 +1,704 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "list.h"
+#include "resolved-dns-packet.h"
+#include "resolved-dns-zone.h"
+#include "resolved-dnssd.h"
+#include "string-util.h"
+
+/* Never allow more than 1K entries */
+#define ZONE_MAX 1024
+
+void dns_zone_item_probe_stop(DnsZoneItem *i) {
+ DnsTransaction *t;
+ assert(i);
+
+ if (!i->probe_transaction)
+ return;
+
+ t = TAKE_PTR(i->probe_transaction);
+
+ set_remove(t->notify_zone_items, i);
+ set_remove(t->notify_zone_items_done, i);
+ dns_transaction_gc(t);
+}
+
+static void dns_zone_item_free(DnsZoneItem *i) {
+ if (!i)
+ return;
+
+ dns_zone_item_probe_stop(i);
+ dns_resource_record_unref(i->rr);
+
+ free(i);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsZoneItem*, dns_zone_item_free);
+
+static void dns_zone_item_remove_and_free(DnsZone *z, DnsZoneItem *i) {
+ DnsZoneItem *first;
+
+ assert(z);
+
+ if (!i)
+ return;
+
+ first = hashmap_get(z->by_key, i->rr->key);
+ LIST_REMOVE(by_key, first, i);
+ if (first)
+ assert_se(hashmap_replace(z->by_key, first->rr->key, first) >= 0);
+ else
+ hashmap_remove(z->by_key, i->rr->key);
+
+ first = hashmap_get(z->by_name, dns_resource_key_name(i->rr->key));
+ LIST_REMOVE(by_name, first, i);
+ if (first)
+ assert_se(hashmap_replace(z->by_name, dns_resource_key_name(first->rr->key), first) >= 0);
+ else
+ hashmap_remove(z->by_name, dns_resource_key_name(i->rr->key));
+
+ dns_zone_item_free(i);
+}
+
+void dns_zone_flush(DnsZone *z) {
+ DnsZoneItem *i;
+
+ assert(z);
+
+ while ((i = hashmap_first(z->by_key)))
+ dns_zone_item_remove_and_free(z, i);
+
+ assert(hashmap_size(z->by_key) == 0);
+ assert(hashmap_size(z->by_name) == 0);
+
+ z->by_key = hashmap_free(z->by_key);
+ z->by_name = hashmap_free(z->by_name);
+}
+
+DnsZoneItem* dns_zone_get(DnsZone *z, DnsResourceRecord *rr) {
+ DnsZoneItem *i;
+
+ assert(z);
+ assert(rr);
+
+ LIST_FOREACH(by_key, i, hashmap_get(z->by_key, rr->key))
+ if (dns_resource_record_equal(i->rr, rr) > 0)
+ return i;
+
+ return NULL;
+}
+
+void dns_zone_remove_rr(DnsZone *z, DnsResourceRecord *rr) {
+ DnsZoneItem *i;
+
+ assert(z);
+ assert(rr);
+
+ i = dns_zone_get(z, rr);
+ if (i)
+ dns_zone_item_remove_and_free(z, i);
+}
+
+int dns_zone_remove_rrs_by_key(DnsZone *z, DnsResourceKey *key) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL, *soa = NULL;
+ DnsResourceRecord *rr;
+ bool tentative;
+ int r;
+
+ r = dns_zone_lookup(z, key, 0, &answer, &soa, &tentative);
+ if (r < 0)
+ return r;
+
+ DNS_ANSWER_FOREACH(rr, answer)
+ dns_zone_remove_rr(z, rr);
+
+ return 0;
+}
+
+static int dns_zone_init(DnsZone *z) {
+ int r;
+
+ assert(z);
+
+ r = hashmap_ensure_allocated(&z->by_key, &dns_resource_key_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&z->by_name, &dns_name_hash_ops);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int dns_zone_link_item(DnsZone *z, DnsZoneItem *i) {
+ DnsZoneItem *first;
+ int r;
+
+ first = hashmap_get(z->by_key, i->rr->key);
+ if (first) {
+ LIST_PREPEND(by_key, first, i);
+ assert_se(hashmap_replace(z->by_key, first->rr->key, first) >= 0);
+ } else {
+ r = hashmap_put(z->by_key, i->rr->key, i);
+ if (r < 0)
+ return r;
+ }
+
+ first = hashmap_get(z->by_name, dns_resource_key_name(i->rr->key));
+ if (first) {
+ LIST_PREPEND(by_name, first, i);
+ assert_se(hashmap_replace(z->by_name, dns_resource_key_name(first->rr->key), first) >= 0);
+ } else {
+ r = hashmap_put(z->by_name, dns_resource_key_name(i->rr->key), i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dns_zone_item_probe_start(DnsZoneItem *i) {
+ DnsTransaction *t;
+ int r;
+
+ assert(i);
+
+ if (i->probe_transaction)
+ return 0;
+
+ t = dns_scope_find_transaction(i->scope, &DNS_RESOURCE_KEY_CONST(i->rr->key->class, DNS_TYPE_ANY, dns_resource_key_name(i->rr->key)), false);
+ if (!t) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+
+ key = dns_resource_key_new(i->rr->key->class, DNS_TYPE_ANY, dns_resource_key_name(i->rr->key));
+ if (!key)
+ return -ENOMEM;
+
+ r = dns_transaction_new(&t, i->scope, key);
+ if (r < 0)
+ return r;
+ }
+
+ r = set_ensure_allocated(&t->notify_zone_items, NULL);
+ if (r < 0)
+ goto gc;
+
+ r = set_ensure_allocated(&t->notify_zone_items_done, NULL);
+ if (r < 0)
+ goto gc;
+
+ r = set_put(t->notify_zone_items, i);
+ if (r < 0)
+ goto gc;
+
+ i->probe_transaction = t;
+ t->probing = true;
+
+ if (t->state == DNS_TRANSACTION_NULL) {
+
+ i->block_ready++;
+ r = dns_transaction_go(t);
+ i->block_ready--;
+
+ if (r < 0) {
+ dns_zone_item_probe_stop(i);
+ return r;
+ }
+ }
+
+ dns_zone_item_notify(i);
+ return 0;
+
+gc:
+ dns_transaction_gc(t);
+ return r;
+}
+
+int dns_zone_put(DnsZone *z, DnsScope *s, DnsResourceRecord *rr, bool probe) {
+ _cleanup_(dns_zone_item_freep) DnsZoneItem *i = NULL;
+ DnsZoneItem *existing;
+ int r;
+
+ assert(z);
+ assert(s);
+ assert(rr);
+
+ if (dns_class_is_pseudo(rr->key->class))
+ return -EINVAL;
+ if (dns_type_is_pseudo(rr->key->type))
+ return -EINVAL;
+
+ existing = dns_zone_get(z, rr);
+ if (existing)
+ return 0;
+
+ r = dns_zone_init(z);
+ if (r < 0)
+ return r;
+
+ i = new0(DnsZoneItem, 1);
+ if (!i)
+ return -ENOMEM;
+
+ i->scope = s;
+ i->rr = dns_resource_record_ref(rr);
+ i->probing_enabled = probe;
+
+ r = dns_zone_link_item(z, i);
+ if (r < 0)
+ return r;
+
+ if (probe) {
+ DnsZoneItem *first, *j;
+ bool established = false;
+
+ /* Check if there's already an RR with the same name
+ * established. If so, it has been probed already, and
+ * we don't ned to probe again. */
+
+ LIST_FIND_HEAD(by_name, i, first);
+ LIST_FOREACH(by_name, j, first) {
+ if (i == j)
+ continue;
+
+ if (j->state == DNS_ZONE_ITEM_ESTABLISHED)
+ established = true;
+ }
+
+ if (established)
+ i->state = DNS_ZONE_ITEM_ESTABLISHED;
+ else {
+ i->state = DNS_ZONE_ITEM_PROBING;
+
+ r = dns_zone_item_probe_start(i);
+ if (r < 0) {
+ dns_zone_item_remove_and_free(z, i);
+ i = NULL;
+ return r;
+ }
+ }
+ } else
+ i->state = DNS_ZONE_ITEM_ESTABLISHED;
+
+ i = NULL;
+ return 0;
+}
+
+static int dns_zone_add_authenticated_answer(DnsAnswer *a, DnsZoneItem *i, int ifindex) {
+ DnsAnswerFlags flags;
+
+ /* From RFC 6762, Section 10.2
+ * "They (the rules about when to set the cache-flush bit) apply to
+ * startup announcements as described in Section 8.3, "Announcing",
+ * and to responses generated as a result of receiving query messages."
+ * So, set the cache-flush bit for mDNS answers except for DNS-SD
+ * service enumeration PTRs described in RFC 6763, Section 4.1. */
+ if (i->scope->protocol == DNS_PROTOCOL_MDNS &&
+ !dns_resource_key_is_dnssd_ptr(i->rr->key))
+ flags = DNS_ANSWER_AUTHENTICATED|DNS_ANSWER_CACHE_FLUSH;
+ else
+ flags = DNS_ANSWER_AUTHENTICATED;
+
+ return dns_answer_add(a, i->rr, ifindex, flags);
+}
+
+int dns_zone_lookup(DnsZone *z, DnsResourceKey *key, int ifindex, DnsAnswer **ret_answer, DnsAnswer **ret_soa, bool *ret_tentative) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL, *soa = NULL;
+ unsigned n_answer = 0;
+ DnsZoneItem *j, *first;
+ bool tentative = true, need_soa = false;
+ int r;
+
+ /* Note that we don't actually need the ifindex for anything. However when it is passed we'll initialize the
+ * ifindex field in the answer with it */
+
+ assert(z);
+ assert(key);
+ assert(ret_answer);
+
+ /* First iteration, count what we have */
+
+ if (key->type == DNS_TYPE_ANY || key->class == DNS_CLASS_ANY) {
+ bool found = false, added = false;
+ int k;
+
+ /* If this is a generic match, then we have to
+ * go through the list by the name and look
+ * for everything manually */
+
+ first = hashmap_get(z->by_name, dns_resource_key_name(key));
+ LIST_FOREACH(by_name, j, first) {
+ if (!IN_SET(j->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ found = true;
+
+ k = dns_resource_key_match_rr(key, j->rr, NULL);
+ if (k < 0)
+ return k;
+ if (k > 0) {
+ n_answer++;
+ added = true;
+ }
+
+ }
+
+ if (found && !added)
+ need_soa = true;
+
+ } else {
+ bool found = false;
+
+ /* If this is a specific match, then look for
+ * the right key immediately */
+
+ first = hashmap_get(z->by_key, key);
+ LIST_FOREACH(by_key, j, first) {
+ if (!IN_SET(j->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ found = true;
+ n_answer++;
+ }
+
+ if (!found) {
+ first = hashmap_get(z->by_name, dns_resource_key_name(key));
+ LIST_FOREACH(by_name, j, first) {
+ if (!IN_SET(j->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ need_soa = true;
+ break;
+ }
+ }
+ }
+
+ if (n_answer <= 0 && !need_soa)
+ goto return_empty;
+
+ if (n_answer > 0) {
+ answer = dns_answer_new(n_answer);
+ if (!answer)
+ return -ENOMEM;
+ }
+
+ if (need_soa) {
+ soa = dns_answer_new(1);
+ if (!soa)
+ return -ENOMEM;
+ }
+
+ /* Second iteration, actually add the RRs to the answers */
+ if (key->type == DNS_TYPE_ANY || key->class == DNS_CLASS_ANY) {
+ bool found = false, added = false;
+ int k;
+
+ first = hashmap_get(z->by_name, dns_resource_key_name(key));
+ LIST_FOREACH(by_name, j, first) {
+ if (!IN_SET(j->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ found = true;
+
+ if (j->state != DNS_ZONE_ITEM_PROBING)
+ tentative = false;
+
+ k = dns_resource_key_match_rr(key, j->rr, NULL);
+ if (k < 0)
+ return k;
+ if (k > 0) {
+ r = dns_zone_add_authenticated_answer(answer, j, ifindex);
+ if (r < 0)
+ return r;
+
+ added = true;
+ }
+ }
+
+ if (found && !added) {
+ r = dns_answer_add_soa(soa, dns_resource_key_name(key), LLMNR_DEFAULT_TTL, ifindex);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ bool found = false;
+
+ first = hashmap_get(z->by_key, key);
+ LIST_FOREACH(by_key, j, first) {
+ if (!IN_SET(j->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ found = true;
+
+ if (j->state != DNS_ZONE_ITEM_PROBING)
+ tentative = false;
+
+ r = dns_zone_add_authenticated_answer(answer, j, ifindex);
+ if (r < 0)
+ return r;
+ }
+
+ if (!found) {
+ bool add_soa = false;
+
+ first = hashmap_get(z->by_name, dns_resource_key_name(key));
+ LIST_FOREACH(by_name, j, first) {
+ if (!IN_SET(j->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ if (j->state != DNS_ZONE_ITEM_PROBING)
+ tentative = false;
+
+ add_soa = true;
+ }
+
+ if (add_soa) {
+ r = dns_answer_add_soa(soa, dns_resource_key_name(key), LLMNR_DEFAULT_TTL, ifindex);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ /* If the caller sets ret_tentative to NULL, then use this as
+ * indication to not return tentative entries */
+
+ if (!ret_tentative && tentative)
+ goto return_empty;
+
+ *ret_answer = TAKE_PTR(answer);
+
+ if (ret_soa)
+ *ret_soa = TAKE_PTR(soa);
+
+ if (ret_tentative)
+ *ret_tentative = tentative;
+
+ return 1;
+
+return_empty:
+ *ret_answer = NULL;
+
+ if (ret_soa)
+ *ret_soa = NULL;
+
+ if (ret_tentative)
+ *ret_tentative = false;
+
+ return 0;
+}
+
+void dns_zone_item_conflict(DnsZoneItem *i) {
+ assert(i);
+
+ if (!IN_SET(i->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_VERIFYING, DNS_ZONE_ITEM_ESTABLISHED))
+ return;
+
+ log_info("Detected conflict on %s", strna(dns_resource_record_to_string(i->rr)));
+
+ dns_zone_item_probe_stop(i);
+
+ /* Withdraw the conflict item */
+ i->state = DNS_ZONE_ITEM_WITHDRAWN;
+
+ dnssd_signal_conflict(i->scope->manager, dns_resource_key_name(i->rr->key));
+
+ /* Maybe change the hostname */
+ if (manager_is_own_hostname(i->scope->manager, dns_resource_key_name(i->rr->key)) > 0)
+ manager_next_hostname(i->scope->manager);
+}
+
+void dns_zone_item_notify(DnsZoneItem *i) {
+ assert(i);
+ assert(i->probe_transaction);
+
+ if (i->block_ready > 0)
+ return;
+
+ if (IN_SET(i->probe_transaction->state, DNS_TRANSACTION_NULL, DNS_TRANSACTION_PENDING, DNS_TRANSACTION_VALIDATING))
+ return;
+
+ if (i->probe_transaction->state == DNS_TRANSACTION_SUCCESS) {
+ bool we_lost = false;
+
+ /* The probe got a successful reply. If we so far
+ * weren't established we just give up.
+ *
+ * In LLMNR case if we already
+ * were established, and the peer has the
+ * lexicographically larger IP address we continue
+ * and defend it. */
+
+ if (!IN_SET(i->state, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING)) {
+ log_debug("Got a successful probe for not yet established RR, we lost.");
+ we_lost = true;
+ } else if (i->probe_transaction->scope->protocol == DNS_PROTOCOL_LLMNR) {
+ assert(i->probe_transaction->received);
+ we_lost = memcmp(&i->probe_transaction->received->sender, &i->probe_transaction->received->destination, FAMILY_ADDRESS_SIZE(i->probe_transaction->received->family)) < 0;
+ if (we_lost)
+ log_debug("Got a successful probe reply for an established RR, and we have a lexicographically larger IP address and thus lost.");
+ }
+
+ if (we_lost) {
+ dns_zone_item_conflict(i);
+ return;
+ }
+
+ log_debug("Got a successful probe reply, but peer has lexicographically lower IP address and thus lost.");
+ }
+
+ log_debug("Record %s successfully probed.", strna(dns_resource_record_to_string(i->rr)));
+
+ dns_zone_item_probe_stop(i);
+ i->state = DNS_ZONE_ITEM_ESTABLISHED;
+}
+
+static int dns_zone_item_verify(DnsZoneItem *i) {
+ int r;
+
+ assert(i);
+
+ if (i->state != DNS_ZONE_ITEM_ESTABLISHED)
+ return 0;
+
+ log_debug("Verifying RR %s", strna(dns_resource_record_to_string(i->rr)));
+
+ i->state = DNS_ZONE_ITEM_VERIFYING;
+ r = dns_zone_item_probe_start(i);
+ if (r < 0) {
+ log_error_errno(r, "Failed to start probing for verifying RR: %m");
+ i->state = DNS_ZONE_ITEM_ESTABLISHED;
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_zone_check_conflicts(DnsZone *zone, DnsResourceRecord *rr) {
+ DnsZoneItem *i, *first;
+ int c = 0;
+
+ assert(zone);
+ assert(rr);
+
+ /* This checks whether a response RR we received from somebody
+ * else is one that we actually thought was uniquely ours. If
+ * so, we'll verify our RRs. */
+
+ /* No conflict if we don't have the name at all. */
+ first = hashmap_get(zone->by_name, dns_resource_key_name(rr->key));
+ if (!first)
+ return 0;
+
+ /* No conflict if we have the exact same RR */
+ if (dns_zone_get(zone, rr))
+ return 0;
+
+ /* No conflict if it is DNS-SD RR used for service enumeration. */
+ if (dns_resource_key_is_dnssd_ptr(rr->key))
+ return 0;
+
+ /* OK, somebody else has RRs for the same name. Yuck! Let's
+ * start probing again */
+
+ LIST_FOREACH(by_name, i, first) {
+ if (dns_resource_record_equal(i->rr, rr))
+ continue;
+
+ dns_zone_item_verify(i);
+ c++;
+ }
+
+ return c;
+}
+
+int dns_zone_verify_conflicts(DnsZone *zone, DnsResourceKey *key) {
+ DnsZoneItem *i, *first;
+ int c = 0;
+
+ assert(zone);
+
+ /* Somebody else notified us about a possible conflict. Let's
+ * verify if that's true. */
+
+ first = hashmap_get(zone->by_name, dns_resource_key_name(key));
+ if (!first)
+ return 0;
+
+ LIST_FOREACH(by_name, i, first) {
+ dns_zone_item_verify(i);
+ c++;
+ }
+
+ return c;
+}
+
+void dns_zone_verify_all(DnsZone *zone) {
+ DnsZoneItem *i;
+ Iterator iterator;
+
+ assert(zone);
+
+ HASHMAP_FOREACH(i, zone->by_key, iterator) {
+ DnsZoneItem *j;
+
+ LIST_FOREACH(by_key, j, i)
+ dns_zone_item_verify(j);
+ }
+}
+
+void dns_zone_dump(DnsZone *zone, FILE *f) {
+ Iterator iterator;
+ DnsZoneItem *i;
+
+ if (!zone)
+ return;
+
+ if (!f)
+ f = stdout;
+
+ HASHMAP_FOREACH(i, zone->by_key, iterator) {
+ DnsZoneItem *j;
+
+ LIST_FOREACH(by_key, j, i) {
+ const char *t;
+
+ t = dns_resource_record_to_string(j->rr);
+ if (!t) {
+ log_oom();
+ continue;
+ }
+
+ fputc('\t', f);
+ fputs(t, f);
+ fputc('\n', f);
+ }
+ }
+}
+
+bool dns_zone_is_empty(DnsZone *zone) {
+ if (!zone)
+ return true;
+
+ return hashmap_isempty(zone->by_key);
+}
+
+bool dns_zone_contains_name(DnsZone *z, const char *name) {
+ DnsZoneItem *i, *first;
+
+ first = hashmap_get(z->by_name, name);
+ if (!first)
+ return false;
+
+ LIST_FOREACH(by_name, i, first) {
+ if (!IN_SET(i->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/resolve/resolved-dns-zone.h b/src/resolve/resolved-dns-zone.h
new file mode 100644
index 0000000..2719b21
--- /dev/null
+++ b/src/resolve/resolved-dns-zone.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "hashmap.h"
+
+typedef struct DnsZone {
+ Hashmap *by_key;
+ Hashmap *by_name;
+} DnsZone;
+
+typedef struct DnsZoneItem DnsZoneItem;
+typedef enum DnsZoneItemState DnsZoneItemState;
+
+#include "resolved-dns-answer.h"
+#include "resolved-dns-question.h"
+#include "resolved-dns-rr.h"
+#include "resolved-dns-transaction.h"
+
+/* RFC 4795 Section 2.8. suggests a TTL of 30s by default */
+#define LLMNR_DEFAULT_TTL (30)
+
+/* RFC 6762 Section 10. suggests a TTL of 120s by default */
+#define MDNS_DEFAULT_TTL (120)
+
+enum DnsZoneItemState {
+ DNS_ZONE_ITEM_PROBING,
+ DNS_ZONE_ITEM_ESTABLISHED,
+ DNS_ZONE_ITEM_VERIFYING,
+ DNS_ZONE_ITEM_WITHDRAWN,
+};
+
+struct DnsZoneItem {
+ DnsScope *scope;
+ DnsResourceRecord *rr;
+
+ DnsZoneItemState state;
+
+ unsigned block_ready;
+
+ bool probing_enabled;
+
+ LIST_FIELDS(DnsZoneItem, by_key);
+ LIST_FIELDS(DnsZoneItem, by_name);
+
+ DnsTransaction *probe_transaction;
+};
+
+void dns_zone_flush(DnsZone *z);
+
+int dns_zone_put(DnsZone *z, DnsScope *s, DnsResourceRecord *rr, bool probe);
+DnsZoneItem* dns_zone_get(DnsZone *z, DnsResourceRecord *rr);
+void dns_zone_remove_rr(DnsZone *z, DnsResourceRecord *rr);
+int dns_zone_remove_rrs_by_key(DnsZone *z, DnsResourceKey *key);
+
+int dns_zone_lookup(DnsZone *z, DnsResourceKey *key, int ifindex, DnsAnswer **answer, DnsAnswer **soa, bool *tentative);
+
+void dns_zone_item_conflict(DnsZoneItem *i);
+void dns_zone_item_notify(DnsZoneItem *i);
+
+int dns_zone_check_conflicts(DnsZone *zone, DnsResourceRecord *rr);
+int dns_zone_verify_conflicts(DnsZone *zone, DnsResourceKey *key);
+
+void dns_zone_verify_all(DnsZone *zone);
+
+void dns_zone_item_probe_stop(DnsZoneItem *i);
+
+void dns_zone_dump(DnsZone *zone, FILE *f);
+bool dns_zone_is_empty(DnsZone *zone);
+bool dns_zone_contains_name(DnsZone *z, const char *name);
diff --git a/src/resolve/resolved-dnssd-bus.c b/src/resolve/resolved-dnssd-bus.c
new file mode 100644
index 0000000..24bb37b
--- /dev/null
+++ b/src/resolve/resolved-dnssd-bus.c
@@ -0,0 +1,128 @@
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "missing_capability.h"
+#include "resolved-dnssd.h"
+#include "resolved-dnssd-bus.h"
+#include "resolved-link.h"
+#include "strv.h"
+#include "user-util.h"
+
+int bus_dnssd_method_unregister(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ DnssdService *s = userdata;
+ DnssdTxtData *txt_data;
+ Manager *m;
+ Iterator i;
+ Link *l;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ m = s->manager;
+
+ r = bus_verify_polkit_async(message, CAP_SYS_ADMIN,
+ "org.freedesktop.resolve1.unregister-service",
+ NULL, false, s->originator,
+ &m->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ HASHMAP_FOREACH(l, m->links, i) {
+ if (l->mdns_ipv4_scope) {
+ r = dns_scope_announce(l->mdns_ipv4_scope, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to send goodbye messages in IPv4 scope: %m");
+
+ dns_zone_remove_rr(&l->mdns_ipv4_scope->zone, s->ptr_rr);
+ dns_zone_remove_rr(&l->mdns_ipv4_scope->zone, s->srv_rr);
+ LIST_FOREACH(items, txt_data, s->txt_data_items)
+ dns_zone_remove_rr(&l->mdns_ipv4_scope->zone, txt_data->rr);
+ }
+
+ if (l->mdns_ipv6_scope) {
+ r = dns_scope_announce(l->mdns_ipv6_scope, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to send goodbye messages in IPv6 scope: %m");
+
+ dns_zone_remove_rr(&l->mdns_ipv6_scope->zone, s->ptr_rr);
+ dns_zone_remove_rr(&l->mdns_ipv6_scope->zone, s->srv_rr);
+ LIST_FOREACH(items, txt_data, s->txt_data_items)
+ dns_zone_remove_rr(&l->mdns_ipv6_scope->zone, txt_data->rr);
+ }
+ }
+
+ dnssd_service_free(s);
+
+ manager_refresh_rrs(m);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+const sd_bus_vtable dnssd_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_METHOD("Unregister", NULL, NULL, bus_dnssd_method_unregister, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_SIGNAL("Conflicted", NULL, 0),
+
+ SD_BUS_VTABLE_END
+};
+
+int dnssd_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ _cleanup_free_ char *name = NULL;
+ Manager *m = userdata;
+ DnssdService *service;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/resolve1/dnssd", &name);
+ if (r <= 0)
+ return 0;
+
+ service = hashmap_get(m->dnssd_services, name);
+ if (!service)
+ return 0;
+
+ *found = service;
+ return 1;
+}
+
+int dnssd_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ DnssdService *service;
+ Iterator i;
+ unsigned c = 0;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(m);
+ assert(nodes);
+
+ l = new0(char*, hashmap_size(m->dnssd_services) + 1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(service, m->dnssd_services, i) {
+ char *p;
+
+ r = sd_bus_path_encode("/org/freedesktop/resolve1/dnssd", service->name, &p);
+ if (r < 0)
+ return r;
+
+ l[c++] = p;
+ }
+
+ l[c] = NULL;
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
diff --git a/src/resolve/resolved-dnssd-bus.h b/src/resolve/resolved-dnssd-bus.h
new file mode 100644
index 0000000..9ee2ce1
--- /dev/null
+++ b/src/resolve/resolved-dnssd-bus.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "sd-bus.h"
+
+extern const sd_bus_vtable dnssd_vtable[];
+
+int dnssd_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
+int dnssd_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error);
+
+int bus_dnssd_method_unregister(sd_bus_message *message, void *userdata, sd_bus_error *error);
diff --git a/src/resolve/resolved-dnssd-gperf.gperf b/src/resolve/resolved-dnssd-gperf.gperf
new file mode 100644
index 0000000..2780b85
--- /dev/null
+++ b/src/resolve/resolved-dnssd-gperf.gperf
@@ -0,0 +1,24 @@
+%{
+#include <stddef.h>
+#include "conf-parser.h"
+#include "resolved-conf.h"
+#include "resolved-dnssd.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name resolved_dnssd_gperf_hash
+%define lookup-function-name resolved_dnssd_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Service.Name, config_parse_dnssd_service_name, 0, 0
+Service.Type, config_parse_dnssd_service_type, 0, 0
+Service.Port, config_parse_ip_port, 0, offsetof(DnssdService, port)
+Service.Priority, config_parse_uint16, 0, offsetof(DnssdService, priority)
+Service.Weight, config_parse_uint16, 0, offsetof(DnssdService, weight)
+Service.TxtText, config_parse_dnssd_txt, DNS_TXT_ITEM_TEXT, 0
+Service.TxtData, config_parse_dnssd_txt, DNS_TXT_ITEM_DATA, 0
diff --git a/src/resolve/resolved-dnssd.c b/src/resolve/resolved-dnssd.c
new file mode 100644
index 0000000..b8e6a7a
--- /dev/null
+++ b/src/resolve/resolved-dnssd.c
@@ -0,0 +1,366 @@
+
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "resolved-dnssd.h"
+#include "resolved-dns-rr.h"
+#include "resolved-manager.h"
+#include "specifier.h"
+#include "strv.h"
+
+const char* const dnssd_service_dirs[] = {
+ "/etc/systemd/dnssd",
+ "/run/systemd/dnssd",
+ "/usr/lib/systemd/dnssd",
+#if HAVE_SPLIT_USR
+ "/lib/systemd/dnssd",
+#endif
+ NULL
+};
+
+DnssdTxtData *dnssd_txtdata_free(DnssdTxtData *txt_data) {
+ if (!txt_data)
+ return NULL;
+
+ dns_resource_record_unref(txt_data->rr);
+ dns_txt_item_free_all(txt_data->txt);
+
+ return mfree(txt_data);
+}
+
+DnssdTxtData *dnssd_txtdata_free_all(DnssdTxtData *txt_data) {
+ DnssdTxtData *next;
+
+ if (!txt_data)
+ return NULL;
+
+ next = txt_data->items_next;
+
+ dnssd_txtdata_free(txt_data);
+
+ return dnssd_txtdata_free_all(next);
+}
+
+DnssdService *dnssd_service_free(DnssdService *service) {
+ if (!service)
+ return NULL;
+
+ if (service->manager)
+ hashmap_remove(service->manager->dnssd_services, service->name);
+
+ dns_resource_record_unref(service->ptr_rr);
+ dns_resource_record_unref(service->srv_rr);
+
+ dnssd_txtdata_free_all(service->txt_data_items);
+
+ free(service->filename);
+ free(service->name);
+ free(service->type);
+ free(service->name_template);
+
+ return mfree(service);
+}
+
+static int dnssd_service_load(Manager *manager, const char *filename) {
+ _cleanup_(dnssd_service_freep) DnssdService *service = NULL;
+ _cleanup_(dnssd_txtdata_freep) DnssdTxtData *txt_data = NULL;
+ char *d;
+ const char *dropin_dirname;
+ int r;
+
+ assert(manager);
+ assert(filename);
+
+ service = new0(DnssdService, 1);
+ if (!service)
+ return log_oom();
+
+ service->filename = strdup(filename);
+ if (!service->filename)
+ return log_oom();
+
+ service->name = strdup(basename(filename));
+ if (!service->name)
+ return log_oom();
+
+ d = endswith(service->name, ".dnssd");
+ if (!d)
+ return -EINVAL;
+
+ assert(streq(d, ".dnssd"));
+
+ *d = '\0';
+
+ dropin_dirname = strjoina(service->name, ".dnssd.d");
+
+ r = config_parse_many(filename, dnssd_service_dirs, dropin_dirname,
+ "Service\0",
+ config_item_perf_lookup, resolved_dnssd_gperf_lookup,
+ false, service);
+ if (r < 0)
+ return r;
+
+ if (!service->name_template) {
+ log_error("%s doesn't define service instance name", service->name);
+ return -EINVAL;
+ }
+
+ if (!service->type) {
+ log_error("%s doesn't define service type", service->name);
+ return -EINVAL;
+ }
+
+ if (LIST_IS_EMPTY(service->txt_data_items)) {
+ txt_data = new0(DnssdTxtData, 1);
+ if (!txt_data)
+ return log_oom();
+
+ r = dns_txt_item_new_empty(&txt_data->txt);
+ if (r < 0)
+ return r;
+
+ LIST_PREPEND(items, service->txt_data_items, txt_data);
+ txt_data = NULL;
+ }
+
+ r = hashmap_ensure_allocated(&manager->dnssd_services, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(manager->dnssd_services, service->name, service);
+ if (r < 0)
+ return r;
+
+ service->manager = manager;
+
+ r = dnssd_update_rrs(service);
+ if (r < 0)
+ return r;
+
+ service = NULL;
+
+ return 0;
+}
+
+static int specifier_dnssd_host_name(char specifier, const void *data, const void *userdata, char **ret) {
+ DnssdService *s = (DnssdService *) userdata;
+ char *n;
+
+ assert(s);
+ assert(s->manager);
+ assert(s->manager->llmnr_hostname);
+
+ n = strdup(s->manager->llmnr_hostname);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int dnssd_render_instance_name(DnssdService *s, char **ret_name) {
+ static const Specifier specifier_table[] = {
+ { 'b', specifier_boot_id, NULL },
+ { 'H', specifier_dnssd_host_name, NULL },
+ { 'm', specifier_machine_id, NULL },
+ { 'v', specifier_kernel_release, NULL },
+ {}
+ };
+ _cleanup_free_ char *name = NULL;
+ int r;
+
+ assert(s);
+ assert(s->name_template);
+
+ r = specifier_printf(s->name_template, specifier_table, s, &name);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to replace specifiers: %m");
+
+ if (!dns_service_name_is_valid(name))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Service instance name '%s' is invalid.",
+ name);
+
+ *ret_name = TAKE_PTR(name);
+
+ return 0;
+}
+
+int dnssd_load(Manager *manager) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+
+ assert(manager);
+
+ if (manager->mdns_support != RESOLVE_SUPPORT_YES)
+ return 0;
+
+ r = conf_files_list_strv(&files, ".dnssd", NULL, 0, dnssd_service_dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate .dnssd files: %m");
+
+ STRV_FOREACH_BACKWARDS(f, files) {
+ r = dnssd_service_load(manager, *f);
+ if (r < 0)
+ log_warning_errno(r, "Failed to load '%s': %m", *f);;
+ }
+
+ return 0;
+}
+
+int dnssd_update_rrs(DnssdService *s) {
+ _cleanup_free_ char *n = NULL;
+ _cleanup_free_ char *service_name = NULL;
+ _cleanup_free_ char *full_name = NULL;
+ DnssdTxtData *txt_data;
+ int r;
+
+ assert(s);
+ assert(s->txt_data_items);
+ assert(s->manager);
+
+ s->ptr_rr = dns_resource_record_unref(s->ptr_rr);
+ s->srv_rr = dns_resource_record_unref(s->srv_rr);
+ LIST_FOREACH(items, txt_data, s->txt_data_items)
+ txt_data->rr = dns_resource_record_unref(txt_data->rr);
+
+ r = dnssd_render_instance_name(s, &n);
+ if (r < 0)
+ return r;
+
+ r = dns_name_concat(s->type, "local", 0, &service_name);
+ if (r < 0)
+ return r;
+ r = dns_name_concat(n, service_name, 0, &full_name);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(items, txt_data, s->txt_data_items) {
+ txt_data->rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_TXT,
+ full_name);
+ if (!txt_data->rr)
+ goto oom;
+
+ txt_data->rr->ttl = MDNS_DEFAULT_TTL;
+ txt_data->rr->txt.items = dns_txt_item_copy(txt_data->txt);
+ if (!txt_data->rr->txt.items)
+ goto oom;
+ }
+
+ s->ptr_rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_PTR,
+ service_name);
+ if (!s->ptr_rr)
+ goto oom;
+
+ s->ptr_rr->ttl = MDNS_DEFAULT_TTL;
+ s->ptr_rr->ptr.name = strdup(full_name);
+ if (!s->ptr_rr->ptr.name)
+ goto oom;
+
+ s->srv_rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_SRV,
+ full_name);
+ if (!s->srv_rr)
+ goto oom;
+
+ s->srv_rr->ttl = MDNS_DEFAULT_TTL;
+ s->srv_rr->srv.priority = s->priority;
+ s->srv_rr->srv.weight = s->weight;
+ s->srv_rr->srv.port = s->port;
+ s->srv_rr->srv.name = strdup(s->manager->mdns_hostname);
+ if (!s->srv_rr->srv.name)
+ goto oom;
+
+ return 0;
+
+oom:
+ LIST_FOREACH(items, txt_data, s->txt_data_items)
+ txt_data->rr = dns_resource_record_unref(txt_data->rr);
+ s->ptr_rr = dns_resource_record_unref(s->ptr_rr);
+ s->srv_rr = dns_resource_record_unref(s->srv_rr);
+ return -ENOMEM;
+}
+
+int dnssd_txt_item_new_from_string(const char *key, const char *value, DnsTxtItem **ret_item) {
+ size_t length;
+ DnsTxtItem *i;
+
+ length = strlen(key);
+
+ if (!isempty(value))
+ length += strlen(value) + 1; /* length of value plus '=' */
+
+ i = malloc0(offsetof(DnsTxtItem, data) + length + 1); /* for safety reasons we add an extra NUL byte */
+ if (!i)
+ return -ENOMEM;
+
+ memcpy(i->data, key, strlen(key));
+ if (!isempty(value)) {
+ memcpy(i->data + strlen(key), "=", 1);
+ memcpy(i->data + strlen(key) + 1, value, strlen(value));
+ }
+ i->length = length;
+
+ *ret_item = TAKE_PTR(i);
+
+ return 0;
+}
+
+int dnssd_txt_item_new_from_data(const char *key, const void *data, const size_t size, DnsTxtItem **ret_item) {
+ size_t length;
+ DnsTxtItem *i;
+
+ length = strlen(key);
+
+ if (size > 0)
+ length += size + 1; /* size of date plus '=' */
+
+ i = malloc0(offsetof(DnsTxtItem, data) + length + 1); /* for safety reasons we add an extra NUL byte */
+ if (!i)
+ return -ENOMEM;
+
+ memcpy(i->data, key, strlen(key));
+ if (size > 0) {
+ memcpy(i->data + strlen(key), "=", 1);
+ memcpy(i->data + strlen(key) + 1, data, size);
+ }
+ i->length = length;
+
+ *ret_item = TAKE_PTR(i);
+
+ return 0;
+}
+
+void dnssd_signal_conflict(Manager *manager, const char *name) {
+ Iterator i;
+ DnssdService *s;
+ int r;
+
+ HASHMAP_FOREACH(s, manager->dnssd_services, i) {
+ if (s->withdrawn)
+ continue;
+
+ if (dns_name_equal(dns_resource_key_name(s->srv_rr->key), name)) {
+ _cleanup_free_ char *path = NULL;
+
+ s->withdrawn = true;
+
+ r = sd_bus_path_encode("/org/freedesktop/resolve1/dnssd", s->name, &path);
+ if (r < 0) {
+ log_error_errno(r, "Can't get D-BUS object path: %m");
+ return;
+ }
+
+ r = sd_bus_emit_signal(manager->bus,
+ path,
+ "org.freedesktop.resolve1.DnssdService",
+ "Conflicted",
+ NULL);
+ if (r < 0) {
+ log_error_errno(r, "Cannot emit signal: %m");
+ return;
+ }
+
+ break;
+ }
+ }
+}
diff --git a/src/resolve/resolved-dnssd.h b/src/resolve/resolved-dnssd.h
new file mode 100644
index 0000000..f2e1014
--- /dev/null
+++ b/src/resolve/resolved-dnssd.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include "list.h"
+
+typedef struct DnssdService DnssdService;
+typedef struct DnssdTxtData DnssdTxtData;
+
+typedef struct Manager Manager;
+typedef struct DnsResourceRecord DnsResourceRecord;
+typedef struct DnsTxtItem DnsTxtItem;
+
+enum {
+ DNS_TXT_ITEM_TEXT,
+ DNS_TXT_ITEM_DATA
+};
+
+struct DnssdTxtData {
+ DnsResourceRecord *rr;
+
+ LIST_HEAD(DnsTxtItem, txt);
+
+ LIST_FIELDS(DnssdTxtData, items);
+};
+
+struct DnssdService {
+ char *filename;
+ char *name;
+ char *name_template;
+ char *type;
+ uint16_t port;
+ uint16_t priority;
+ uint16_t weight;
+
+ DnsResourceRecord *ptr_rr;
+ DnsResourceRecord *srv_rr;
+
+ /* Section 6.8 of RFC 6763 allows having service
+ * instances with multiple TXT resource records. */
+ LIST_HEAD(DnssdTxtData, txt_data_items);
+
+ Manager *manager;
+
+ bool withdrawn:1;
+ uid_t originator;
+};
+
+DnssdService *dnssd_service_free(DnssdService *service);
+DnssdTxtData *dnssd_txtdata_free(DnssdTxtData *txt_data);
+DnssdTxtData *dnssd_txtdata_free_all(DnssdTxtData *txt_data);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnssdService*, dnssd_service_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnssdTxtData*, dnssd_txtdata_free);
+
+int dnssd_render_instance_name(DnssdService *s, char **ret_name);
+int dnssd_load(Manager *manager);
+int dnssd_txt_item_new_from_string(const char *key, const char *value, DnsTxtItem **ret_item);
+int dnssd_txt_item_new_from_data(const char *key, const void *value, const size_t size, DnsTxtItem **ret_item);
+int dnssd_update_rrs(DnssdService *s);
+void dnssd_signal_conflict(Manager *manager, const char *name);
diff --git a/src/resolve/resolved-dnstls-gnutls.c b/src/resolve/resolved-dnstls-gnutls.c
new file mode 100644
index 0000000..ddf9758
--- /dev/null
+++ b/src/resolve/resolved-dnstls-gnutls.c
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if !ENABLE_DNS_OVER_TLS || !DNS_OVER_TLS_USE_GNUTLS
+#error This source file requires DNS-over-TLS to be enabled and GnuTLS to be available.
+#endif
+
+#include <gnutls/socket.h>
+
+#include "resolved-dns-stream.h"
+#include "resolved-dnstls.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(gnutls_session_t, gnutls_deinit);
+
+static ssize_t dnstls_stream_writev(gnutls_transport_ptr_t p, const giovec_t *iov, int iovcnt) {
+ int r;
+
+ assert(p);
+
+ r = dns_stream_writev((DnsStream*) p, (const struct iovec*) iov, iovcnt, DNS_STREAM_WRITE_TLS_DATA);
+ if (r < 0) {
+ errno = -r;
+ return -1;
+ }
+
+ return r;
+}
+
+int dnstls_stream_connect_tls(DnsStream *stream, DnsServer *server) {
+ _cleanup_(gnutls_deinitp) gnutls_session_t gs;
+ int r;
+
+ assert(stream);
+ assert(server);
+
+ r = gnutls_init(&gs, GNUTLS_CLIENT | GNUTLS_ENABLE_FALSE_START | GNUTLS_NONBLOCK);
+ if (r < 0)
+ return r;
+
+ /* As DNS-over-TLS is a recent protocol, older TLS versions can be disabled */
+ r = gnutls_priority_set_direct(gs, "NORMAL:-VERS-ALL:+VERS-TLS1.2", NULL);
+ if (r < 0)
+ return r;
+
+ r = gnutls_credentials_set(gs, GNUTLS_CRD_CERTIFICATE, server->dnstls_data.cert_cred);
+ if (r < 0)
+ return r;
+
+ if (server->dnstls_data.session_data.size > 0) {
+ gnutls_session_set_data(gs, server->dnstls_data.session_data.data, server->dnstls_data.session_data.size);
+
+ // Clear old session ticket
+ gnutls_free(server->dnstls_data.session_data.data);
+ server->dnstls_data.session_data.data = NULL;
+ server->dnstls_data.session_data.size = 0;
+ }
+
+ gnutls_handshake_set_timeout(gs, GNUTLS_DEFAULT_HANDSHAKE_TIMEOUT);
+
+ gnutls_transport_set_ptr2(gs, (gnutls_transport_ptr_t) (long) stream->fd, stream);
+ gnutls_transport_set_vec_push_function(gs, &dnstls_stream_writev);
+
+ stream->encrypted = true;
+ stream->dnstls_data.handshake = gnutls_handshake(gs);
+ if (stream->dnstls_data.handshake < 0 && gnutls_error_is_fatal(stream->dnstls_data.handshake))
+ return -ECONNREFUSED;
+
+ stream->dnstls_data.session = TAKE_PTR(gs);
+
+ return 0;
+}
+
+void dnstls_stream_free(DnsStream *stream) {
+ assert(stream);
+ assert(stream->encrypted);
+
+ if (stream->dnstls_data.session)
+ gnutls_deinit(stream->dnstls_data.session);
+}
+
+int dnstls_stream_on_io(DnsStream *stream, uint32_t revents) {
+ int r;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.session);
+
+ if (stream->dnstls_data.shutdown) {
+ r = gnutls_bye(stream->dnstls_data.session, GNUTLS_SHUT_RDWR);
+ if (r == GNUTLS_E_AGAIN) {
+ stream->dnstls_events = gnutls_record_get_direction(stream->dnstls_data.session) == 1 ? EPOLLOUT : EPOLLIN;
+ return -EAGAIN;
+ } else if (r < 0)
+ log_debug("Failed to invoke gnutls_bye: %s", gnutls_strerror(r));
+
+ stream->dnstls_events = 0;
+ stream->dnstls_data.shutdown = false;
+ dns_stream_unref(stream);
+ return DNSTLS_STREAM_CLOSED;
+ } else if (stream->dnstls_data.handshake < 0) {
+ stream->dnstls_data.handshake = gnutls_handshake(stream->dnstls_data.session);
+ if (stream->dnstls_data.handshake == GNUTLS_E_AGAIN) {
+ stream->dnstls_events = gnutls_record_get_direction(stream->dnstls_data.session) == 1 ? EPOLLOUT : EPOLLIN;
+ return -EAGAIN;
+ } else if (stream->dnstls_data.handshake < 0) {
+ log_debug("Failed to invoke gnutls_handshake: %s", gnutls_strerror(stream->dnstls_data.handshake));
+ if (gnutls_error_is_fatal(stream->dnstls_data.handshake))
+ return -ECONNREFUSED;
+ }
+
+ stream->dnstls_events = 0;
+ }
+
+ return 0;
+}
+
+int dnstls_stream_shutdown(DnsStream *stream, int error) {
+ int r;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.session);
+
+ /* Store TLS Ticket for faster succesive TLS handshakes */
+ if (stream->server && stream->server->dnstls_data.session_data.size == 0 && stream->dnstls_data.handshake == GNUTLS_E_SUCCESS)
+ gnutls_session_get_data2(stream->dnstls_data.session, &stream->server->dnstls_data.session_data);
+
+ if (IN_SET(error, ETIMEDOUT, 0)) {
+ r = gnutls_bye(stream->dnstls_data.session, GNUTLS_SHUT_RDWR);
+ if (r == GNUTLS_E_AGAIN) {
+ if (!stream->dnstls_data.shutdown) {
+ stream->dnstls_data.shutdown = true;
+ dns_stream_ref(stream);
+ return -EAGAIN;
+ }
+ } else if (r < 0)
+ log_debug("Failed to invoke gnutls_bye: %s", gnutls_strerror(r));
+ }
+
+ return 0;
+}
+
+ssize_t dnstls_stream_write(DnsStream *stream, const char *buf, size_t count) {
+ ssize_t ss;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.session);
+ assert(buf);
+
+ ss = gnutls_record_send(stream->dnstls_data.session, buf, count);
+ if (ss < 0)
+ switch(ss) {
+ case GNUTLS_E_INTERRUPTED:
+ return -EINTR;
+ case GNUTLS_E_AGAIN:
+ return -EAGAIN;
+ default:
+ return log_debug_errno(SYNTHETIC_ERRNO(EPIPE),
+ "Failed to invoke gnutls_record_send: %s",
+ gnutls_strerror(ss));
+ }
+
+ return ss;
+}
+
+ssize_t dnstls_stream_read(DnsStream *stream, void *buf, size_t count) {
+ ssize_t ss;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.session);
+ assert(buf);
+
+ ss = gnutls_record_recv(stream->dnstls_data.session, buf, count);
+ if (ss < 0)
+ switch(ss) {
+ case GNUTLS_E_INTERRUPTED:
+ return -EINTR;
+ case GNUTLS_E_AGAIN:
+ return -EAGAIN;
+ default:
+ return log_debug_errno(SYNTHETIC_ERRNO(EPIPE),
+ "Failed to invoke gnutls_record_recv: %s",
+ gnutls_strerror(ss));
+ }
+
+ return ss;
+}
+
+void dnstls_server_init(DnsServer *server) {
+ assert(server);
+
+ /* Do not verify cerificate */
+ gnutls_certificate_allocate_credentials(&server->dnstls_data.cert_cred);
+}
+
+void dnstls_server_free(DnsServer *server) {
+ assert(server);
+
+ if (server->dnstls_data.cert_cred)
+ gnutls_certificate_free_credentials(server->dnstls_data.cert_cred);
+
+ if (server->dnstls_data.session_data.data)
+ gnutls_free(server->dnstls_data.session_data.data);
+}
diff --git a/src/resolve/resolved-dnstls-gnutls.h b/src/resolve/resolved-dnstls-gnutls.h
new file mode 100644
index 0000000..41c89f2
--- /dev/null
+++ b/src/resolve/resolved-dnstls-gnutls.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !ENABLE_DNS_OVER_TLS || !DNS_OVER_TLS_USE_GNUTLS
+#error This source file requires DNS-over-TLS to be enabled and GnuTLS to be available.
+#endif
+
+#include <gnutls/gnutls.h>
+#include <stdbool.h>
+
+struct DnsTlsServerData {
+ gnutls_certificate_credentials_t cert_cred;
+ gnutls_datum_t session_data;
+};
+
+struct DnsTlsStreamData {
+ gnutls_session_t session;
+ int handshake;
+ bool shutdown;
+};
diff --git a/src/resolve/resolved-dnstls-openssl.c b/src/resolve/resolved-dnstls-openssl.c
new file mode 100644
index 0000000..f269e4d
--- /dev/null
+++ b/src/resolve/resolved-dnstls-openssl.c
@@ -0,0 +1,353 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if !ENABLE_DNS_OVER_TLS || !DNS_OVER_TLS_USE_OPENSSL
+#error This source file requires DNS-over-TLS to be enabled and OpenSSL to be available.
+#endif
+
+#include <openssl/bio.h>
+#include <openssl/err.h>
+
+#include "io-util.h"
+#include "resolved-dns-stream.h"
+#include "resolved-dnstls.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(SSL*, SSL_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(BIO*, BIO_free);
+
+static int dnstls_flush_write_buffer(DnsStream *stream) {
+ ssize_t ss;
+
+ assert(stream);
+ assert(stream->encrypted);
+
+ if (stream->dnstls_data.write_buffer->length > 0) {
+ assert(stream->dnstls_data.write_buffer->data);
+
+ struct iovec iov[1];
+ iov[0] = IOVEC_MAKE(stream->dnstls_data.write_buffer->data,
+ stream->dnstls_data.write_buffer->length);
+ ss = dns_stream_writev(stream, iov, 1, DNS_STREAM_WRITE_TLS_DATA);
+ if (ss < 0) {
+ if (ss == -EAGAIN)
+ stream->dnstls_events |= EPOLLOUT;
+
+ return ss;
+ } else {
+ stream->dnstls_data.write_buffer->length -= ss;
+ stream->dnstls_data.write_buffer->data += ss;
+
+ if (stream->dnstls_data.write_buffer->length > 0) {
+ stream->dnstls_events |= EPOLLOUT;
+ return -EAGAIN;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int dnstls_stream_connect_tls(DnsStream *stream, DnsServer *server) {
+ _cleanup_(BIO_freep) BIO *rb = NULL, *wb = NULL;
+ _cleanup_(SSL_freep) SSL *s = NULL;
+ int error, r;
+
+ assert(stream);
+ assert(server);
+
+ rb = BIO_new_socket(stream->fd, 0);
+ if (!rb)
+ return -ENOMEM;
+
+ wb = BIO_new(BIO_s_mem());
+ if (!wb)
+ return -ENOMEM;
+
+ BIO_get_mem_ptr(wb, &stream->dnstls_data.write_buffer);
+
+ s = SSL_new(server->dnstls_data.ctx);
+ if (!s)
+ return -ENOMEM;
+
+ SSL_set_connect_state(s);
+ SSL_set_session(s, server->dnstls_data.session);
+ SSL_set_bio(s, TAKE_PTR(rb), TAKE_PTR(wb));
+
+ ERR_clear_error();
+ stream->dnstls_data.handshake = SSL_do_handshake(s);
+ if (stream->dnstls_data.handshake <= 0) {
+ error = SSL_get_error(s, stream->dnstls_data.handshake);
+ if (!IN_SET(error, SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE)) {
+ char errbuf[256];
+
+ ERR_error_string_n(error, errbuf, sizeof(errbuf));
+ log_debug("Failed to invoke SSL_do_handshake: %s", errbuf);
+ return -ECONNREFUSED;
+ }
+ }
+
+ stream->encrypted = true;
+
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0 && r != -EAGAIN)
+ return r;
+
+ stream->dnstls_data.ssl = TAKE_PTR(s);
+
+ return 0;
+}
+
+void dnstls_stream_free(DnsStream *stream) {
+ assert(stream);
+ assert(stream->encrypted);
+
+ if (stream->dnstls_data.ssl)
+ SSL_free(stream->dnstls_data.ssl);
+}
+
+int dnstls_stream_on_io(DnsStream *stream, uint32_t revents) {
+ int error, r;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.ssl);
+
+ /* Flush write buffer when requested by OpenSSL */
+ if ((revents & EPOLLOUT) && (stream->dnstls_events & EPOLLOUT)) {
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+ }
+
+ if (stream->dnstls_data.shutdown) {
+ ERR_clear_error();
+ r = SSL_shutdown(stream->dnstls_data.ssl);
+ if (r == 0) {
+ stream->dnstls_events = 0;
+
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ return -EAGAIN;
+ } else if (r < 0) {
+ error = SSL_get_error(stream->dnstls_data.ssl, r);
+ if (IN_SET(error, SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE)) {
+ stream->dnstls_events = error == SSL_ERROR_WANT_READ ? EPOLLIN : EPOLLOUT;
+
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ return -EAGAIN;
+ } else if (error == SSL_ERROR_SYSCALL) {
+ if (errno > 0)
+ log_debug_errno(errno, "Failed to invoke SSL_shutdown, ignoring: %m");
+ } else {
+ char errbuf[256];
+
+ ERR_error_string_n(error, errbuf, sizeof(errbuf));
+ log_debug("Failed to invoke SSL_shutdown, ignoring: %s", errbuf);
+ }
+ }
+
+ stream->dnstls_events = 0;
+ stream->dnstls_data.shutdown = false;
+
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ dns_stream_unref(stream);
+ return DNSTLS_STREAM_CLOSED;
+ } else if (stream->dnstls_data.handshake <= 0) {
+ ERR_clear_error();
+ stream->dnstls_data.handshake = SSL_do_handshake(stream->dnstls_data.ssl);
+ if (stream->dnstls_data.handshake <= 0) {
+ error = SSL_get_error(stream->dnstls_data.ssl, stream->dnstls_data.handshake);
+ if (IN_SET(error, SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE)) {
+ stream->dnstls_events = error == SSL_ERROR_WANT_READ ? EPOLLIN : EPOLLOUT;
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ return -EAGAIN;
+ } else {
+ char errbuf[256];
+
+ ERR_error_string_n(error, errbuf, sizeof(errbuf));
+ return log_debug_errno(SYNTHETIC_ERRNO(ECONNREFUSED),
+ "Failed to invoke SSL_do_handshake: %s",
+ errbuf);
+ }
+ }
+
+ stream->dnstls_events = 0;
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dnstls_stream_shutdown(DnsStream *stream, int error) {
+ int ssl_error, r;
+ SSL_SESSION *s;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.ssl);
+
+ if (stream->server) {
+ s = SSL_get1_session(stream->dnstls_data.ssl);
+ if (s) {
+ if (stream->server->dnstls_data.session)
+ SSL_SESSION_free(stream->server->dnstls_data.session);
+
+ stream->server->dnstls_data.session = s;
+ }
+ }
+
+ if (error == ETIMEDOUT) {
+ ERR_clear_error();
+ r = SSL_shutdown(stream->dnstls_data.ssl);
+ if (r == 0) {
+ if (!stream->dnstls_data.shutdown) {
+ stream->dnstls_data.shutdown = true;
+ dns_stream_ref(stream);
+ }
+
+ stream->dnstls_events = 0;
+
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ return -EAGAIN;
+ } else if (r < 0) {
+ ssl_error = SSL_get_error(stream->dnstls_data.ssl, r);
+ if (IN_SET(ssl_error, SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE)) {
+ stream->dnstls_events = ssl_error == SSL_ERROR_WANT_READ ? EPOLLIN : EPOLLOUT;
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0 && r != -EAGAIN)
+ return r;
+
+ if (!stream->dnstls_data.shutdown) {
+ stream->dnstls_data.shutdown = true;
+ dns_stream_ref(stream);
+ }
+ return -EAGAIN;
+ } else if (ssl_error == SSL_ERROR_SYSCALL) {
+ if (errno > 0)
+ log_debug_errno(errno, "Failed to invoke SSL_shutdown, ignoring: %m");
+ } else {
+ char errbuf[256];
+
+ ERR_error_string_n(ssl_error, errbuf, sizeof(errbuf));
+ log_debug("Failed to invoke SSL_shutdown, ignoring: %s", errbuf);
+ }
+ }
+
+ stream->dnstls_events = 0;
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+ssize_t dnstls_stream_write(DnsStream *stream, const char *buf, size_t count) {
+ int error, r;
+ ssize_t ss;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.ssl);
+ assert(buf);
+
+ ERR_clear_error();
+ ss = r = SSL_write(stream->dnstls_data.ssl, buf, count);
+ if (r <= 0) {
+ error = SSL_get_error(stream->dnstls_data.ssl, r);
+ if (IN_SET(error, SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE)) {
+ stream->dnstls_events = error == SSL_ERROR_WANT_READ ? EPOLLIN : EPOLLOUT;
+ ss = -EAGAIN;
+ } else if (error == SSL_ERROR_ZERO_RETURN) {
+ stream->dnstls_events = 0;
+ ss = 0;
+ } else {
+ char errbuf[256];
+
+ ERR_error_string_n(error, errbuf, sizeof(errbuf));
+ log_debug("Failed to invoke SSL_write: %s", errbuf);
+ stream->dnstls_events = 0;
+ ss = -EPIPE;
+ }
+ } else
+ stream->dnstls_events = 0;
+
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ return ss;
+}
+
+ssize_t dnstls_stream_read(DnsStream *stream, void *buf, size_t count) {
+ int error, r;
+ ssize_t ss;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.ssl);
+ assert(buf);
+
+ ERR_clear_error();
+ ss = r = SSL_read(stream->dnstls_data.ssl, buf, count);
+ if (r <= 0) {
+ error = SSL_get_error(stream->dnstls_data.ssl, r);
+ if (IN_SET(error, SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE)) {
+ stream->dnstls_events = error == SSL_ERROR_WANT_READ ? EPOLLIN : EPOLLOUT;
+ ss = -EAGAIN;
+ } else if (error == SSL_ERROR_ZERO_RETURN) {
+ stream->dnstls_events = 0;
+ ss = 0;
+ } else {
+ char errbuf[256];
+
+ ERR_error_string_n(error, errbuf, sizeof(errbuf));
+ log_debug("Failed to invoke SSL_read: %s", errbuf);
+ stream->dnstls_events = 0;
+ ss = -EPIPE;
+ }
+ } else
+ stream->dnstls_events = 0;
+
+ /* flush write buffer in cache of renegotiation */
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ return ss;
+}
+
+void dnstls_server_init(DnsServer *server) {
+ assert(server);
+
+ server->dnstls_data.ctx = SSL_CTX_new(TLS_client_method());
+ if (server->dnstls_data.ctx) {
+ SSL_CTX_set_min_proto_version(server->dnstls_data.ctx, TLS1_2_VERSION);
+ SSL_CTX_set_options(server->dnstls_data.ctx, SSL_OP_NO_COMPRESSION);
+ }
+}
+
+void dnstls_server_free(DnsServer *server) {
+ assert(server);
+
+ if (server->dnstls_data.ctx)
+ SSL_CTX_free(server->dnstls_data.ctx);
+
+ if (server->dnstls_data.session)
+ SSL_SESSION_free(server->dnstls_data.session);
+}
diff --git a/src/resolve/resolved-dnstls-openssl.h b/src/resolve/resolved-dnstls-openssl.h
new file mode 100644
index 0000000..f0dccf3
--- /dev/null
+++ b/src/resolve/resolved-dnstls-openssl.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !ENABLE_DNS_OVER_TLS || !DNS_OVER_TLS_USE_OPENSSL
+#error This source file requires DNS-over-TLS to be enabled and OpenSSL to be available.
+#endif
+
+#include <openssl/ssl.h>
+#include <stdbool.h>
+
+struct DnsTlsServerData {
+ SSL_CTX *ctx;
+ SSL_SESSION *session;
+};
+
+struct DnsTlsStreamData {
+ int handshake;
+ bool shutdown;
+ SSL *ssl;
+ BUF_MEM *write_buffer;
+};
diff --git a/src/resolve/resolved-dnstls.h b/src/resolve/resolved-dnstls.h
new file mode 100644
index 0000000..fdd85ee
--- /dev/null
+++ b/src/resolve/resolved-dnstls.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if !ENABLE_DNS_OVER_TLS
+#error This source file requires DNS-over-TLS to be enabled
+#endif
+
+typedef struct DnsTlsServerData DnsTlsServerData;
+typedef struct DnsTlsStreamData DnsTlsStreamData;
+
+#if DNS_OVER_TLS_USE_GNUTLS
+#include "resolved-dnstls-gnutls.h"
+#elif DNS_OVER_TLS_USE_OPENSSL
+#include "resolved-dnstls-openssl.h"
+#else
+#error Unknown dependency for supporting DNS-over-TLS
+#endif
+
+#include "resolved-dns-stream.h"
+#include "resolved-dns-transaction.h"
+
+#define DNSTLS_STREAM_CLOSED 1
+
+int dnstls_stream_connect_tls(DnsStream *stream, DnsServer *server);
+void dnstls_stream_free(DnsStream *stream);
+int dnstls_stream_on_io(DnsStream *stream, uint32_t revents);
+int dnstls_stream_shutdown(DnsStream *stream, int error);
+ssize_t dnstls_stream_write(DnsStream *stream, const char *buf, size_t count);
+ssize_t dnstls_stream_read(DnsStream *stream, void *buf, size_t count);
+
+void dnstls_server_init(DnsServer *server);
+void dnstls_server_free(DnsServer *server);
diff --git a/src/resolve/resolved-etc-hosts.c b/src/resolve/resolved-etc-hosts.c
new file mode 100644
index 0000000..ee21222
--- /dev/null
+++ b/src/resolve/resolved-etc-hosts.c
@@ -0,0 +1,377 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "resolved-dns-synthesize.h"
+#include "resolved-etc-hosts.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+/* Recheck /etc/hosts at most once every 2s */
+#define ETC_HOSTS_RECHECK_USEC (2*USEC_PER_SEC)
+
+static void etc_hosts_item_free(EtcHostsItem *item) {
+ strv_free(item->names);
+ free(item);
+}
+
+static void etc_hosts_item_by_name_free(EtcHostsItemByName *item) {
+ free(item->name);
+ free(item->addresses);
+ free(item);
+}
+
+void etc_hosts_free(EtcHosts *hosts) {
+ hosts->by_address = hashmap_free_with_destructor(hosts->by_address, etc_hosts_item_free);
+ hosts->by_name = hashmap_free_with_destructor(hosts->by_name, etc_hosts_item_by_name_free);
+ hosts->no_address = set_free_free(hosts->no_address);
+}
+
+void manager_etc_hosts_flush(Manager *m) {
+ etc_hosts_free(&m->etc_hosts);
+ m->etc_hosts_mtime = USEC_INFINITY;
+}
+
+static int parse_line(EtcHosts *hosts, unsigned nr, const char *line) {
+ _cleanup_free_ char *address_str = NULL;
+ struct in_addr_data address = {};
+ bool found = false;
+ EtcHostsItem *item;
+ int r;
+
+ assert(hosts);
+ assert(line);
+
+ r = extract_first_word(&line, &address_str, NULL, EXTRACT_RELAX);
+ if (r < 0)
+ return log_error_errno(r, "/etc/hosts:%u: failed to extract address: %m", nr);
+ assert(r > 0); /* We already checked that the line is not empty, so it should contain *something* */
+
+ r = in_addr_ifindex_from_string_auto(address_str, &address.family, &address.address, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "/etc/hosts:%u: address '%s' is invalid, ignoring: %m", nr, address_str);
+ return 0;
+ }
+
+ r = in_addr_is_null(address.family, &address.address);
+ if (r < 0) {
+ log_warning_errno(r, "/etc/hosts:%u: address '%s' is invalid, ignoring: %m", nr, address_str);
+ return 0;
+ }
+ if (r > 0)
+ /* This is an 0.0.0.0 or :: item, which we assume means that we shall map the specified hostname to
+ * nothing. */
+ item = NULL;
+ else {
+ /* If this is a normal address, then simply add entry mapping it to the specified names */
+
+ item = hashmap_get(hosts->by_address, &address);
+ if (!item) {
+ r = hashmap_ensure_allocated(&hosts->by_address, &in_addr_data_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ item = new0(EtcHostsItem, 1);
+ if (!item)
+ return log_oom();
+
+ item->address = address;
+
+ r = hashmap_put(hosts->by_address, &item->address, item);
+ if (r < 0) {
+ free(item);
+ return log_oom();
+ }
+ }
+ }
+
+ for (;;) {
+ _cleanup_free_ char *name = NULL;
+ EtcHostsItemByName *bn;
+
+ r = extract_first_word(&line, &name, NULL, EXTRACT_RELAX);
+ if (r < 0)
+ return log_error_errno(r, "/etc/hosts:%u: couldn't extract host name: %m", nr);
+ if (r == 0)
+ break;
+
+ found = true;
+
+ r = dns_name_is_valid_ldh(name);
+ if (r <= 0) {
+ log_warning_errno(r, "/etc/hosts:%u: hostname \"%s\" is not valid, ignoring.", nr, name);
+ continue;
+ }
+
+ if (is_localhost(name))
+ /* Suppress the "localhost" line that is often seen */
+ continue;
+
+ if (!item) {
+ /* Optimize the case where we don't need to store any addresses, by storing
+ * only the name in a dedicated Set instead of the hashmap */
+
+ r = set_ensure_allocated(&hosts->no_address, &dns_name_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(hosts->no_address, name);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(name);
+ continue;
+ }
+
+ r = strv_extend(&item->names, name);
+ if (r < 0)
+ return log_oom();
+
+ bn = hashmap_get(hosts->by_name, name);
+ if (!bn) {
+ r = hashmap_ensure_allocated(&hosts->by_name, &dns_name_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ bn = new0(EtcHostsItemByName, 1);
+ if (!bn)
+ return log_oom();
+
+ r = hashmap_put(hosts->by_name, name, bn);
+ if (r < 0) {
+ free(bn);
+ return log_oom();
+ }
+
+ bn->name = TAKE_PTR(name);
+ }
+
+ if (!GREEDY_REALLOC(bn->addresses, bn->n_allocated, bn->n_addresses + 1))
+ return log_oom();
+
+ bn->addresses[bn->n_addresses++] = &item->address;
+ }
+
+ if (!found)
+ log_warning("/etc/hosts:%u: line is missing any host names", nr);
+
+ return 0;
+}
+
+int etc_hosts_parse(EtcHosts *hosts, FILE *f) {
+ _cleanup_(etc_hosts_free) EtcHosts t = {};
+ unsigned nr = 0;
+ int r;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read /etc/hosts: %m");
+ if (r == 0)
+ break;
+
+ nr++;
+
+ l = strchr(line, '#');
+ if (l)
+ *l = '\0';
+
+ l = strstrip(line);
+ if (isempty(l))
+ continue;
+
+ r = parse_line(&t, nr, l);
+ if (r < 0)
+ return r;
+ }
+
+ etc_hosts_free(hosts);
+ *hosts = t;
+ t = (EtcHosts) {}; /* prevent cleanup */
+ return 0;
+}
+
+static int manager_etc_hosts_read(Manager *m) {
+ _cleanup_fclose_ FILE *f = NULL;
+ struct stat st;
+ usec_t ts;
+ int r;
+
+ assert_se(sd_event_now(m->event, clock_boottime_or_monotonic(), &ts) >= 0);
+
+ /* See if we checked /etc/hosts recently already */
+ if (m->etc_hosts_last != USEC_INFINITY && m->etc_hosts_last + ETC_HOSTS_RECHECK_USEC > ts)
+ return 0;
+
+ m->etc_hosts_last = ts;
+
+ if (m->etc_hosts_mtime != USEC_INFINITY) {
+ if (stat("/etc/hosts", &st) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to stat /etc/hosts: %m");
+
+ manager_etc_hosts_flush(m);
+ return 0;
+ }
+
+ /* Did the mtime change? If not, there's no point in re-reading the file. */
+ if (timespec_load(&st.st_mtim) == m->etc_hosts_mtime)
+ return 0;
+ }
+
+ f = fopen("/etc/hosts", "re");
+ if (!f) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to open /etc/hosts: %m");
+
+ manager_etc_hosts_flush(m);
+ return 0;
+ }
+
+ /* Take the timestamp at the beginning of processing, so that any changes made later are read on the next
+ * invocation */
+ r = fstat(fileno(f), &st);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to fstat() /etc/hosts: %m");
+
+ r = etc_hosts_parse(&m->etc_hosts, f);
+ if (r < 0)
+ return r;
+
+ m->etc_hosts_mtime = timespec_load(&st.st_mtim);
+ m->etc_hosts_last = ts;
+
+ return 1;
+}
+
+int manager_etc_hosts_lookup(Manager *m, DnsQuestion* q, DnsAnswer **answer) {
+ bool found_a = false, found_aaaa = false;
+ struct in_addr_data k = {};
+ EtcHostsItemByName *bn;
+ DnsResourceKey *t;
+ const char *name;
+ unsigned i;
+ int r;
+
+ assert(m);
+ assert(q);
+ assert(answer);
+
+ if (!m->read_etc_hosts)
+ return 0;
+
+ (void) manager_etc_hosts_read(m);
+
+ name = dns_question_first_name(q);
+ if (!name)
+ return 0;
+
+ r = dns_name_address(name, &k.family, &k.address);
+ if (r > 0) {
+ EtcHostsItem *item;
+ DnsResourceKey *found_ptr = NULL;
+
+ item = hashmap_get(m->etc_hosts.by_address, &k);
+ if (!item)
+ return 0;
+
+ /* We have an address in /etc/hosts that matches the queried name. Let's return successful. Actual data
+ * we'll only return if the request was for PTR. */
+
+ DNS_QUESTION_FOREACH(t, q) {
+ if (!IN_SET(t->type, DNS_TYPE_PTR, DNS_TYPE_ANY))
+ continue;
+ if (!IN_SET(t->class, DNS_CLASS_IN, DNS_CLASS_ANY))
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(t), name);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ found_ptr = t;
+ break;
+ }
+ }
+
+ if (found_ptr) {
+ char **n;
+
+ r = dns_answer_reserve(answer, strv_length(item->names));
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(n, item->names) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ rr = dns_resource_record_new(found_ptr);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->ptr.name = strdup(*n);
+ if (!rr->ptr.name)
+ return -ENOMEM;
+
+ r = dns_answer_add(*answer, rr, 0, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 1;
+ }
+
+ bn = hashmap_get(m->etc_hosts.by_name, name);
+ if (bn) {
+ r = dns_answer_reserve(answer, bn->n_addresses);
+ if (r < 0)
+ return r;
+ } else {
+ /* Check if name was listed with no address. If yes, continue to return an answer. */
+ if (!set_contains(m->etc_hosts.no_address, name))
+ return 0;
+ }
+
+ DNS_QUESTION_FOREACH(t, q) {
+ if (!IN_SET(t->type, DNS_TYPE_A, DNS_TYPE_AAAA, DNS_TYPE_ANY))
+ continue;
+ if (!IN_SET(t->class, DNS_CLASS_IN, DNS_CLASS_ANY))
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(t), name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (IN_SET(t->type, DNS_TYPE_A, DNS_TYPE_ANY))
+ found_a = true;
+ if (IN_SET(t->type, DNS_TYPE_AAAA, DNS_TYPE_ANY))
+ found_aaaa = true;
+
+ if (found_a && found_aaaa)
+ break;
+ }
+
+ for (i = 0; bn && i < bn->n_addresses; i++) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ if ((!found_a && bn->addresses[i]->family == AF_INET) ||
+ (!found_aaaa && bn->addresses[i]->family == AF_INET6))
+ continue;
+
+ r = dns_resource_record_new_address(&rr, bn->addresses[i]->family, &bn->addresses[i]->address, bn->name);
+ if (r < 0)
+ return r;
+
+ r = dns_answer_add(*answer, rr, 0, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+
+ return found_a || found_aaaa;
+}
diff --git a/src/resolve/resolved-etc-hosts.h b/src/resolve/resolved-etc-hosts.h
new file mode 100644
index 0000000..caf32a5
--- /dev/null
+++ b/src/resolve/resolved-etc-hosts.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "resolved-manager.h"
+#include "resolved-dns-question.h"
+#include "resolved-dns-answer.h"
+
+typedef struct EtcHostsItem {
+ struct in_addr_data address;
+
+ char **names;
+} EtcHostsItem;
+
+typedef struct EtcHostsItemByName {
+ char *name;
+
+ struct in_addr_data **addresses;
+ size_t n_addresses, n_allocated;
+} EtcHostsItemByName;
+
+int etc_hosts_parse(EtcHosts *hosts, FILE *f);
+void etc_hosts_free(EtcHosts *hosts);
+
+void manager_etc_hosts_flush(Manager *m);
+int manager_etc_hosts_lookup(Manager *m, DnsQuestion* q, DnsAnswer **answer);
diff --git a/src/resolve/resolved-gperf.gperf b/src/resolve/resolved-gperf.gperf
new file mode 100644
index 0000000..9b9290b
--- /dev/null
+++ b/src/resolve/resolved-gperf.gperf
@@ -0,0 +1,29 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "resolved-conf.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name resolved_gperf_hash
+%define lookup-function-name resolved_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Resolve.DNS, config_parse_dns_servers, DNS_SERVER_SYSTEM, 0
+Resolve.FallbackDNS, config_parse_dns_servers, DNS_SERVER_FALLBACK, 0
+Resolve.Domains, config_parse_search_domains, 0, 0
+Resolve.LLMNR, config_parse_resolve_support, 0, offsetof(Manager, llmnr_support)
+Resolve.MulticastDNS, config_parse_resolve_support, 0, offsetof(Manager, mdns_support)
+Resolve.DNSSEC, config_parse_dnssec_mode, 0, offsetof(Manager, dnssec_mode)
+Resolve.DNSOverTLS, config_parse_dns_over_tls_mode, 0, offsetof(Manager, dns_over_tls_mode)
+Resolve.Cache, config_parse_bool, 0, offsetof(Manager, enable_cache)
+Resolve.DNSStubListener, config_parse_dns_stub_listener_mode, 0, offsetof(Manager, dns_stub_listener_mode)
+Resolve.ReadEtcHosts, config_parse_bool, 0, offsetof(Manager, read_etc_hosts)
diff --git a/src/resolve/resolved-link-bus.c b/src/resolve/resolved-link-bus.c
new file mode 100644
index 0000000..96093ff
--- /dev/null
+++ b/src/resolve/resolved-link-bus.c
@@ -0,0 +1,700 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-util.h"
+#include "parse-util.h"
+#include "resolve-util.h"
+#include "resolved-bus.h"
+#include "resolved-link-bus.h"
+#include "resolved-resolv-conf.h"
+#include "strv.h"
+
+static BUS_DEFINE_PROPERTY_GET(property_get_dnssec_supported, "b", Link, link_dnssec_supported);
+static BUS_DEFINE_PROPERTY_GET2(property_get_dnssec_mode, "s", Link, link_get_dnssec_mode, dnssec_mode_to_string);
+
+static int property_get_dns_over_tls_mode(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Link *l = userdata;
+
+ assert(reply);
+ assert(l);
+
+ return sd_bus_message_append(reply, "s", dns_over_tls_mode_to_string(link_get_dns_over_tls_mode(l)));
+}
+
+static int property_get_dns(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Link *l = userdata;
+ DnsServer *s;
+ int r;
+
+ assert(reply);
+ assert(l);
+
+ r = sd_bus_message_open_container(reply, 'a', "(iay)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(servers, s, l->dns_servers) {
+ r = bus_dns_server_append(reply, s, false);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_current_dns_server(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ DnsServer *s;
+
+ assert(reply);
+ assert(userdata);
+
+ s = *(DnsServer **) userdata;
+
+ return bus_dns_server_append(reply, s, false);
+}
+
+static int property_get_domains(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Link *l = userdata;
+ DnsSearchDomain *d;
+ int r;
+
+ assert(reply);
+ assert(l);
+
+ r = sd_bus_message_open_container(reply, 'a', "(sb)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(domains, d, l->search_domains) {
+ r = sd_bus_message_append(reply, "(sb)", d->name, d->route_only);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_default_route(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Link *l = userdata;
+
+ assert(reply);
+ assert(l);
+
+ /* Return what is configured, if there's something configured */
+ if (l->default_route >= 0)
+ return sd_bus_message_append(reply, "b", l->default_route);
+
+ /* Otherwise report what is in effect */
+ if (l->unicast_scope)
+ return sd_bus_message_append(reply, "b", dns_scope_is_default_route(l->unicast_scope));
+
+ return sd_bus_message_append(reply, "b", false);
+}
+
+static int property_get_scopes_mask(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Link *l = userdata;
+ uint64_t mask;
+
+ assert(reply);
+ assert(l);
+
+ mask = (l->unicast_scope ? SD_RESOLVED_DNS : 0) |
+ (l->llmnr_ipv4_scope ? SD_RESOLVED_LLMNR_IPV4 : 0) |
+ (l->llmnr_ipv6_scope ? SD_RESOLVED_LLMNR_IPV6 : 0) |
+ (l->mdns_ipv4_scope ? SD_RESOLVED_MDNS_IPV4 : 0) |
+ (l->mdns_ipv6_scope ? SD_RESOLVED_MDNS_IPV6 : 0);
+
+ return sd_bus_message_append(reply, "t", mask);
+}
+
+static int property_get_ntas(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Link *l = userdata;
+ const char *name;
+ Iterator i;
+ int r;
+
+ assert(reply);
+ assert(l);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(name, l->dnssec_negative_trust_anchors, i) {
+ r = sd_bus_message_append(reply, "s", name);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int verify_unmanaged_link(Link *l, sd_bus_error *error) {
+ assert(l);
+
+ if (l->flags & IFF_LOOPBACK)
+ return sd_bus_error_setf(error, BUS_ERROR_LINK_BUSY, "Link %s is loopback device.", l->name);
+ if (l->is_managed)
+ return sd_bus_error_setf(error, BUS_ERROR_LINK_BUSY, "Link %s is managed.", l->name);
+
+ return 0;
+}
+
+int bus_link_method_set_dns_servers(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ struct in_addr_data *dns = NULL;
+ size_t allocated = 0, n = 0;
+ Link *l = userdata;
+ unsigned i;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(message, 'a', "(iay)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ int family;
+ size_t sz;
+ const void *d;
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_enter_container(message, 'r', "iay");
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(message, "i", &family);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown address family %i", family);
+
+ r = sd_bus_message_read_array(message, 'y', &d, &sz);
+ if (r < 0)
+ return r;
+ if (sz != FAMILY_ADDRESS_SIZE(family))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid address size");
+
+ if (!dns_server_address_valid(family, d))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid DNS server address");
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC(dns, allocated, n+1))
+ return -ENOMEM;
+
+ dns[n].family = family;
+ memcpy(&dns[n].address, d, sz);
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ dns_server_mark_all(l->dns_servers);
+
+ for (i = 0; i < n; i++) {
+ DnsServer *s;
+
+ s = dns_server_find(l->dns_servers, dns[i].family, &dns[i].address, 0);
+ if (s)
+ dns_server_move_back_and_unmark(s);
+ else {
+ r = dns_server_new(l->manager, NULL, DNS_SERVER_LINK, l, dns[i].family, &dns[i].address, 0);
+ if (r < 0)
+ goto clear;
+ }
+
+ }
+
+ dns_server_unlink_marked(l->dns_servers);
+ link_allocate_scopes(l);
+
+ (void) link_save_user(l);
+ (void) manager_write_resolv_conf(l->manager);
+
+ return sd_bus_reply_method_return(message, NULL);
+
+clear:
+ dns_server_unlink_all(l->dns_servers);
+ return r;
+}
+
+int bus_link_method_set_domains(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(message, 'a', "(sb)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *name;
+ int route_only;
+
+ r = sd_bus_message_read(message, "(sb)", &name, &route_only);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = dns_name_is_valid(name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid search domain %s", name);
+ if (!route_only && dns_name_is_root(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Root domain is not suitable as search domain");
+ }
+
+ dns_search_domain_mark_all(l->search_domains);
+
+ r = sd_bus_message_rewind(message, false);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ DnsSearchDomain *d;
+ const char *name;
+ int route_only;
+
+ r = sd_bus_message_read(message, "(sb)", &name, &route_only);
+ if (r < 0)
+ goto clear;
+ if (r == 0)
+ break;
+
+ r = dns_search_domain_find(l->search_domains, name, &d);
+ if (r < 0)
+ goto clear;
+
+ if (r > 0)
+ dns_search_domain_move_back_and_unmark(d);
+ else {
+ r = dns_search_domain_new(l->manager, &d, DNS_SEARCH_DOMAIN_LINK, l, name);
+ if (r < 0)
+ goto clear;
+ }
+
+ d->route_only = route_only;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ goto clear;
+
+ dns_search_domain_unlink_marked(l->search_domains);
+
+ (void) link_save_user(l);
+ (void) manager_write_resolv_conf(l->manager);
+
+ return sd_bus_reply_method_return(message, NULL);
+
+clear:
+ dns_search_domain_unlink_all(l->search_domains);
+ return r;
+}
+
+int bus_link_method_set_default_route(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ int r, b;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (l->default_route != b) {
+ l->default_route = b;
+
+ (void) link_save_user(l);
+ (void) manager_write_resolv_conf(l->manager);
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_llmnr(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ ResolveSupport mode;
+ const char *llmnr;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &llmnr);
+ if (r < 0)
+ return r;
+
+ if (isempty(llmnr))
+ mode = RESOLVE_SUPPORT_YES;
+ else {
+ mode = resolve_support_from_string(llmnr);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid LLMNR setting: %s", llmnr);
+ }
+
+ l->llmnr_support = mode;
+ link_allocate_scopes(l);
+ link_add_rrs(l, false);
+
+ (void) link_save_user(l);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_mdns(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ ResolveSupport mode;
+ const char *mdns;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &mdns);
+ if (r < 0)
+ return r;
+
+ if (isempty(mdns))
+ mode = RESOLVE_SUPPORT_NO;
+ else {
+ mode = resolve_support_from_string(mdns);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid MulticastDNS setting: %s", mdns);
+ }
+
+ l->mdns_support = mode;
+ link_allocate_scopes(l);
+ link_add_rrs(l, false);
+
+ (void) link_save_user(l);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_dns_over_tls(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ const char *dns_over_tls;
+ DnsOverTlsMode mode;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &dns_over_tls);
+ if (r < 0)
+ return r;
+
+ if (isempty(dns_over_tls))
+ mode = _DNS_OVER_TLS_MODE_INVALID;
+ else {
+ mode = dns_over_tls_mode_from_string(dns_over_tls);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid DNSOverTLS setting: %s", dns_over_tls);
+ }
+
+ link_set_dns_over_tls_mode(l, mode);
+
+ (void) link_save_user(l);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_dnssec(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ const char *dnssec;
+ DnssecMode mode;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &dnssec);
+ if (r < 0)
+ return r;
+
+ if (isempty(dnssec))
+ mode = _DNSSEC_MODE_INVALID;
+ else {
+ mode = dnssec_mode_from_string(dnssec);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid DNSSEC setting: %s", dnssec);
+ }
+
+ link_set_dnssec_mode(l, mode);
+
+ (void) link_save_user(l);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_dnssec_negative_trust_anchors(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_set_free_free_ Set *ns = NULL;
+ _cleanup_strv_free_ char **ntas = NULL;
+ Link *l = userdata;
+ int r;
+ char **i;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &ntas);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, ntas) {
+ r = dns_name_is_valid(*i);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid negative trust anchor domain: %s", *i);
+ }
+
+ ns = set_new(&dns_name_hash_ops);
+ if (!ns)
+ return -ENOMEM;
+
+ STRV_FOREACH(i, ntas) {
+ r = set_put_strdup(ns, *i);
+ if (r < 0)
+ return r;
+ }
+
+ set_free_free(l->dnssec_negative_trust_anchors);
+ l->dnssec_negative_trust_anchors = TAKE_PTR(ns);
+
+ (void) link_save_user(l);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_revert(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ link_flush_settings(l);
+ link_allocate_scopes(l);
+ link_add_rrs(l, false);
+
+ (void) link_save_user(l);
+ (void) manager_write_resolv_conf(l->manager);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+const sd_bus_vtable link_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("ScopesMask", "t", property_get_scopes_mask, 0, 0),
+ SD_BUS_PROPERTY("DNS", "a(iay)", property_get_dns, 0, 0),
+ SD_BUS_PROPERTY("CurrentDNSServer", "(iay)", property_get_current_dns_server, offsetof(Link, current_dns_server), 0),
+ SD_BUS_PROPERTY("Domains", "a(sb)", property_get_domains, 0, 0),
+ SD_BUS_PROPERTY("DefaultRoute", "b", property_get_default_route, 0, 0),
+ SD_BUS_PROPERTY("LLMNR", "s", bus_property_get_resolve_support, offsetof(Link, llmnr_support), 0),
+ SD_BUS_PROPERTY("MulticastDNS", "s", bus_property_get_resolve_support, offsetof(Link, mdns_support), 0),
+ SD_BUS_PROPERTY("DNSOverTLS", "s", property_get_dns_over_tls_mode, 0, 0),
+ SD_BUS_PROPERTY("DNSSEC", "s", property_get_dnssec_mode, 0, 0),
+ SD_BUS_PROPERTY("DNSSECNegativeTrustAnchors", "as", property_get_ntas, 0, 0),
+ SD_BUS_PROPERTY("DNSSECSupported", "b", property_get_dnssec_supported, 0, 0),
+
+ SD_BUS_METHOD("SetDNS", "a(iay)", NULL, bus_link_method_set_dns_servers, 0),
+ SD_BUS_METHOD("SetDomains", "a(sb)", NULL, bus_link_method_set_domains, 0),
+ SD_BUS_METHOD("SetDefaultRoute", "b", NULL, bus_link_method_set_default_route, 0),
+ SD_BUS_METHOD("SetLLMNR", "s", NULL, bus_link_method_set_llmnr, 0),
+ SD_BUS_METHOD("SetMulticastDNS", "s", NULL, bus_link_method_set_mdns, 0),
+ SD_BUS_METHOD("SetDNSOverTLS", "s", NULL, bus_link_method_set_dns_over_tls, 0),
+ SD_BUS_METHOD("SetDNSSEC", "s", NULL, bus_link_method_set_dnssec, 0),
+ SD_BUS_METHOD("SetDNSSECNegativeTrustAnchors", "as", NULL, bus_link_method_set_dnssec_negative_trust_anchors, 0),
+ SD_BUS_METHOD("Revert", NULL, NULL, bus_link_method_revert, 0),
+
+ SD_BUS_VTABLE_END
+};
+
+int link_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ _cleanup_free_ char *e = NULL;
+ Manager *m = userdata;
+ int ifindex;
+ Link *link;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/resolve1/link", &e);
+ if (r <= 0)
+ return 0;
+
+ r = parse_ifindex(e, &ifindex);
+ if (r < 0)
+ return 0;
+
+ link = hashmap_get(m->links, INT_TO_PTR(ifindex));
+ if (!link)
+ return 0;
+
+ *found = link;
+ return 1;
+}
+
+char *link_bus_path(Link *link) {
+ _cleanup_free_ char *ifindex = NULL;
+ char *p;
+ int r;
+
+ assert(link);
+
+ if (asprintf(&ifindex, "%i", link->ifindex) < 0)
+ return NULL;
+
+ r = sd_bus_path_encode("/org/freedesktop/resolve1/link", ifindex, &p);
+ if (r < 0)
+ return NULL;
+
+ return p;
+}
+
+int link_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ Link *link;
+ Iterator i;
+ unsigned c = 0;
+
+ assert(bus);
+ assert(path);
+ assert(m);
+ assert(nodes);
+
+ l = new0(char*, hashmap_size(m->links) + 1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(link, m->links, i) {
+ char *p;
+
+ p = link_bus_path(link);
+ if (!p)
+ return -ENOMEM;
+
+ l[c++] = p;
+ }
+
+ l[c] = NULL;
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
diff --git a/src/resolve/resolved-link-bus.h b/src/resolve/resolved-link-bus.h
new file mode 100644
index 0000000..6717251
--- /dev/null
+++ b/src/resolve/resolved-link-bus.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "resolved-link.h"
+
+extern const sd_bus_vtable link_vtable[];
+
+int link_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
+char *link_bus_path(Link *link);
+int link_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error);
+
+int bus_link_method_set_dns_servers(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_domains(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_default_route(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_llmnr(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_mdns(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_dns_over_tls(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_dnssec(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_dnssec_negative_trust_anchors(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_revert(sd_bus_message *message, void *userdata, sd_bus_error *error);
diff --git a/src/resolve/resolved-link.c b/src/resolve/resolved-link.c
new file mode 100644
index 0000000..44f70ac
--- /dev/null
+++ b/src/resolve/resolved-link.c
@@ -0,0 +1,1394 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+#include <stdio_ext.h>
+
+#include "sd-network.h"
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "resolved-link.h"
+#include "resolved-llmnr.h"
+#include "resolved-mdns.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+int link_new(Manager *m, Link **ret, int ifindex) {
+ _cleanup_(link_freep) Link *l = NULL;
+ int r;
+
+ assert(m);
+ assert(ifindex > 0);
+
+ r = hashmap_ensure_allocated(&m->links, NULL);
+ if (r < 0)
+ return r;
+
+ l = new(Link, 1);
+ if (!l)
+ return -ENOMEM;
+
+ *l = (Link) {
+ .ifindex = ifindex,
+ .default_route = -1,
+ .llmnr_support = RESOLVE_SUPPORT_YES,
+ .mdns_support = RESOLVE_SUPPORT_NO,
+ .dnssec_mode = _DNSSEC_MODE_INVALID,
+ .dns_over_tls_mode = _DNS_OVER_TLS_MODE_INVALID,
+ .operstate = IF_OPER_UNKNOWN,
+ };
+
+ if (asprintf(&l->state_file, "/run/systemd/resolve/netif/%i", ifindex) < 0)
+ return -ENOMEM;
+
+ r = hashmap_put(m->links, INT_TO_PTR(ifindex), l);
+ if (r < 0)
+ return r;
+
+ l->manager = m;
+
+ if (ret)
+ *ret = l;
+ l = NULL;
+
+ return 0;
+}
+
+void link_flush_settings(Link *l) {
+ assert(l);
+
+ l->default_route = -1;
+ l->llmnr_support = RESOLVE_SUPPORT_YES;
+ l->mdns_support = RESOLVE_SUPPORT_NO;
+ l->dnssec_mode = _DNSSEC_MODE_INVALID;
+ l->dns_over_tls_mode = _DNS_OVER_TLS_MODE_INVALID;
+
+ dns_server_unlink_all(l->dns_servers);
+ dns_search_domain_unlink_all(l->search_domains);
+
+ l->dnssec_negative_trust_anchors = set_free_free(l->dnssec_negative_trust_anchors);
+}
+
+Link *link_free(Link *l) {
+ if (!l)
+ return NULL;
+
+ /* Send goodbye messages. */
+ dns_scope_announce(l->mdns_ipv4_scope, true);
+ dns_scope_announce(l->mdns_ipv6_scope, true);
+
+ link_flush_settings(l);
+
+ while (l->addresses)
+ (void) link_address_free(l->addresses);
+
+ if (l->manager)
+ hashmap_remove(l->manager->links, INT_TO_PTR(l->ifindex));
+
+ dns_scope_free(l->unicast_scope);
+ dns_scope_free(l->llmnr_ipv4_scope);
+ dns_scope_free(l->llmnr_ipv6_scope);
+ dns_scope_free(l->mdns_ipv4_scope);
+ dns_scope_free(l->mdns_ipv6_scope);
+
+ free(l->state_file);
+
+ return mfree(l);
+}
+
+void link_allocate_scopes(Link *l) {
+ bool unicast_relevant;
+ int r;
+
+ assert(l);
+
+ /* If a link that used to be relevant is no longer, or a link that did not use to be relevant now becomes
+ * relevant, let's reinit the learnt global DNS server information, since we might talk to different servers
+ * now, even if they have the same addresses as before. */
+
+ unicast_relevant = link_relevant(l, AF_UNSPEC, false);
+ if (unicast_relevant != l->unicast_relevant) {
+ l->unicast_relevant = unicast_relevant;
+
+ dns_server_reset_features_all(l->manager->fallback_dns_servers);
+ dns_server_reset_features_all(l->manager->dns_servers);
+
+ /* Also, flush the global unicast scope, to deal with split horizon setups, where talking through one
+ * interface reveals different DNS zones than through others. */
+ if (l->manager->unicast_scope)
+ dns_cache_flush(&l->manager->unicast_scope->cache);
+ }
+
+ /* And now, allocate all scopes that makes sense now if we didn't have them yet, and drop those which we don't
+ * need anymore */
+
+ if (unicast_relevant && l->dns_servers) {
+ if (!l->unicast_scope) {
+ dns_server_reset_features_all(l->dns_servers);
+
+ r = dns_scope_new(l->manager, &l->unicast_scope, l, DNS_PROTOCOL_DNS, AF_UNSPEC);
+ if (r < 0)
+ log_warning_errno(r, "Failed to allocate DNS scope: %m");
+ }
+ } else
+ l->unicast_scope = dns_scope_free(l->unicast_scope);
+
+ if (link_relevant(l, AF_INET, true) &&
+ l->llmnr_support != RESOLVE_SUPPORT_NO &&
+ l->manager->llmnr_support != RESOLVE_SUPPORT_NO) {
+ if (!l->llmnr_ipv4_scope) {
+ r = dns_scope_new(l->manager, &l->llmnr_ipv4_scope, l, DNS_PROTOCOL_LLMNR, AF_INET);
+ if (r < 0)
+ log_warning_errno(r, "Failed to allocate LLMNR IPv4 scope: %m");
+ }
+ } else
+ l->llmnr_ipv4_scope = dns_scope_free(l->llmnr_ipv4_scope);
+
+ if (link_relevant(l, AF_INET6, true) &&
+ l->llmnr_support != RESOLVE_SUPPORT_NO &&
+ l->manager->llmnr_support != RESOLVE_SUPPORT_NO &&
+ socket_ipv6_is_supported()) {
+ if (!l->llmnr_ipv6_scope) {
+ r = dns_scope_new(l->manager, &l->llmnr_ipv6_scope, l, DNS_PROTOCOL_LLMNR, AF_INET6);
+ if (r < 0)
+ log_warning_errno(r, "Failed to allocate LLMNR IPv6 scope: %m");
+ }
+ } else
+ l->llmnr_ipv6_scope = dns_scope_free(l->llmnr_ipv6_scope);
+
+ if (link_relevant(l, AF_INET, true) &&
+ l->mdns_support != RESOLVE_SUPPORT_NO &&
+ l->manager->mdns_support != RESOLVE_SUPPORT_NO) {
+ if (!l->mdns_ipv4_scope) {
+ r = dns_scope_new(l->manager, &l->mdns_ipv4_scope, l, DNS_PROTOCOL_MDNS, AF_INET);
+ if (r < 0)
+ log_warning_errno(r, "Failed to allocate mDNS IPv4 scope: %m");
+ }
+ } else
+ l->mdns_ipv4_scope = dns_scope_free(l->mdns_ipv4_scope);
+
+ if (link_relevant(l, AF_INET6, true) &&
+ l->mdns_support != RESOLVE_SUPPORT_NO &&
+ l->manager->mdns_support != RESOLVE_SUPPORT_NO) {
+ if (!l->mdns_ipv6_scope) {
+ r = dns_scope_new(l->manager, &l->mdns_ipv6_scope, l, DNS_PROTOCOL_MDNS, AF_INET6);
+ if (r < 0)
+ log_warning_errno(r, "Failed to allocate mDNS IPv6 scope: %m");
+ }
+ } else
+ l->mdns_ipv6_scope = dns_scope_free(l->mdns_ipv6_scope);
+}
+
+void link_add_rrs(Link *l, bool force_remove) {
+ LinkAddress *a;
+ int r;
+
+ LIST_FOREACH(addresses, a, l->addresses)
+ link_address_add_rrs(a, force_remove);
+
+ if (!force_remove &&
+ l->mdns_support == RESOLVE_SUPPORT_YES &&
+ l->manager->mdns_support == RESOLVE_SUPPORT_YES) {
+
+ if (l->mdns_ipv4_scope) {
+ r = dns_scope_add_dnssd_services(l->mdns_ipv4_scope);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add IPv4 DNS-SD services: %m");
+ }
+
+ if (l->mdns_ipv6_scope) {
+ r = dns_scope_add_dnssd_services(l->mdns_ipv6_scope);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add IPv6 DNS-SD services: %m");
+ }
+
+ } else {
+
+ if (l->mdns_ipv4_scope) {
+ r = dns_scope_remove_dnssd_services(l->mdns_ipv4_scope);
+ if (r < 0)
+ log_warning_errno(r, "Failed to remove IPv4 DNS-SD services: %m");
+ }
+
+ if (l->mdns_ipv6_scope) {
+ r = dns_scope_remove_dnssd_services(l->mdns_ipv6_scope);
+ if (r < 0)
+ log_warning_errno(r, "Failed to remove IPv6 DNS-SD services: %m");
+ }
+ }
+}
+
+int link_process_rtnl(Link *l, sd_netlink_message *m) {
+ const char *n = NULL;
+ int r;
+
+ assert(l);
+ assert(m);
+
+ r = sd_rtnl_message_link_get_flags(m, &l->flags);
+ if (r < 0)
+ return r;
+
+ (void) sd_netlink_message_read_u32(m, IFLA_MTU, &l->mtu);
+ (void) sd_netlink_message_read_u8(m, IFLA_OPERSTATE, &l->operstate);
+
+ if (sd_netlink_message_read_string(m, IFLA_IFNAME, &n) >= 0) {
+ strncpy(l->name, n, sizeof(l->name)-1);
+ char_array_0(l->name);
+ }
+
+ link_allocate_scopes(l);
+ link_add_rrs(l, false);
+
+ return 0;
+}
+
+static int link_update_dns_server_one(Link *l, const char *name) {
+ union in_addr_union a;
+ DnsServer *s;
+ int family, r;
+
+ assert(l);
+ assert(name);
+
+ r = in_addr_from_string_auto(name, &family, &a);
+ if (r < 0)
+ return r;
+
+ s = dns_server_find(l->dns_servers, family, &a, 0);
+ if (s) {
+ dns_server_move_back_and_unmark(s);
+ return 0;
+ }
+
+ return dns_server_new(l->manager, NULL, DNS_SERVER_LINK, l, family, &a, 0);
+}
+
+static int link_update_dns_servers(Link *l) {
+ _cleanup_strv_free_ char **nameservers = NULL;
+ char **nameserver;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_dns(l->ifindex, &nameservers);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ dns_server_mark_all(l->dns_servers);
+
+ STRV_FOREACH(nameserver, nameservers) {
+ r = link_update_dns_server_one(l, *nameserver);
+ if (r < 0)
+ goto clear;
+ }
+
+ dns_server_unlink_marked(l->dns_servers);
+ return 0;
+
+clear:
+ dns_server_unlink_all(l->dns_servers);
+ return r;
+}
+
+static int link_update_default_route(Link *l) {
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_dns_default_route(l->ifindex);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ l->default_route = r > 0;
+ return 0;
+
+clear:
+ l->default_route = -1;
+ return r;
+}
+
+static int link_update_llmnr_support(Link *l) {
+ _cleanup_free_ char *b = NULL;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_llmnr(l->ifindex, &b);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ l->llmnr_support = resolve_support_from_string(b);
+ if (l->llmnr_support < 0) {
+ r = -EINVAL;
+ goto clear;
+ }
+
+ return 0;
+
+clear:
+ l->llmnr_support = RESOLVE_SUPPORT_YES;
+ return r;
+}
+
+static int link_update_mdns_support(Link *l) {
+ _cleanup_free_ char *b = NULL;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_mdns(l->ifindex, &b);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ l->mdns_support = resolve_support_from_string(b);
+ if (l->mdns_support < 0) {
+ r = -EINVAL;
+ goto clear;
+ }
+
+ return 0;
+
+clear:
+ l->mdns_support = RESOLVE_SUPPORT_NO;
+ return r;
+}
+
+void link_set_dns_over_tls_mode(Link *l, DnsOverTlsMode mode) {
+
+ assert(l);
+
+#if ! ENABLE_DNS_OVER_TLS
+ if (mode != DNS_OVER_TLS_NO)
+ log_warning("DNS-over-TLS option for the link cannot be set to opportunistic when systemd-resolved is built without DNS-over-TLS support. Turning off DNS-over-TLS support.");
+ return;
+#endif
+
+ l->dns_over_tls_mode = mode;
+}
+
+static int link_update_dns_over_tls_mode(Link *l) {
+ _cleanup_free_ char *b = NULL;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_dns_over_tls(l->ifindex, &b);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ l->dns_over_tls_mode = dns_over_tls_mode_from_string(b);
+ if (l->dns_over_tls_mode < 0) {
+ r = -EINVAL;
+ goto clear;
+ }
+
+ return 0;
+
+clear:
+ l->dns_over_tls_mode = _DNS_OVER_TLS_MODE_INVALID;
+ return r;
+}
+
+void link_set_dnssec_mode(Link *l, DnssecMode mode) {
+
+ assert(l);
+
+#if ! HAVE_GCRYPT
+ if (IN_SET(mode, DNSSEC_YES, DNSSEC_ALLOW_DOWNGRADE))
+ log_warning("DNSSEC option for the link cannot be enabled or set to allow-downgrade when systemd-resolved is built without gcrypt support. Turning off DNSSEC support.");
+ return;
+#endif
+
+ if (l->dnssec_mode == mode)
+ return;
+
+ if ((l->dnssec_mode == _DNSSEC_MODE_INVALID) ||
+ (l->dnssec_mode == DNSSEC_NO && mode != DNSSEC_NO) ||
+ (l->dnssec_mode == DNSSEC_ALLOW_DOWNGRADE && mode == DNSSEC_YES)) {
+
+ /* When switching from non-DNSSEC mode to DNSSEC mode, flush the cache. Also when switching from the
+ * allow-downgrade mode to full DNSSEC mode, flush it too. */
+ if (l->unicast_scope)
+ dns_cache_flush(&l->unicast_scope->cache);
+ }
+
+ l->dnssec_mode = mode;
+}
+
+static int link_update_dnssec_mode(Link *l) {
+ _cleanup_free_ char *m = NULL;
+ DnssecMode mode;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_dnssec(l->ifindex, &m);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ mode = dnssec_mode_from_string(m);
+ if (mode < 0) {
+ r = -EINVAL;
+ goto clear;
+ }
+
+ link_set_dnssec_mode(l, mode);
+
+ return 0;
+
+clear:
+ l->dnssec_mode = _DNSSEC_MODE_INVALID;
+ return r;
+}
+
+static int link_update_dnssec_negative_trust_anchors(Link *l) {
+ _cleanup_strv_free_ char **ntas = NULL;
+ _cleanup_set_free_free_ Set *ns = NULL;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_dnssec_negative_trust_anchors(l->ifindex, &ntas);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ ns = set_new(&dns_name_hash_ops);
+ if (!ns)
+ return -ENOMEM;
+
+ r = set_put_strdupv(ns, ntas);
+ if (r < 0)
+ return r;
+
+ set_free_free(l->dnssec_negative_trust_anchors);
+ l->dnssec_negative_trust_anchors = TAKE_PTR(ns);
+
+ return 0;
+
+clear:
+ l->dnssec_negative_trust_anchors = set_free_free(l->dnssec_negative_trust_anchors);
+ return r;
+}
+
+static int link_update_search_domain_one(Link *l, const char *name, bool route_only) {
+ DnsSearchDomain *d;
+ int r;
+
+ assert(l);
+ assert(name);
+
+ r = dns_search_domain_find(l->search_domains, name, &d);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ dns_search_domain_move_back_and_unmark(d);
+ else {
+ r = dns_search_domain_new(l->manager, &d, DNS_SEARCH_DOMAIN_LINK, l, name);
+ if (r < 0)
+ return r;
+ }
+
+ d->route_only = route_only;
+ return 0;
+}
+
+static int link_update_search_domains(Link *l) {
+ _cleanup_strv_free_ char **sdomains = NULL, **rdomains = NULL;
+ char **i;
+ int r, q;
+
+ assert(l);
+
+ r = sd_network_link_get_search_domains(l->ifindex, &sdomains);
+ if (r < 0 && r != -ENODATA)
+ goto clear;
+
+ q = sd_network_link_get_route_domains(l->ifindex, &rdomains);
+ if (q < 0 && q != -ENODATA) {
+ r = q;
+ goto clear;
+ }
+
+ if (r == -ENODATA && q == -ENODATA) {
+ /* networkd knows nothing about this interface, and that's fine. */
+ r = 0;
+ goto clear;
+ }
+
+ dns_search_domain_mark_all(l->search_domains);
+
+ STRV_FOREACH(i, sdomains) {
+ r = link_update_search_domain_one(l, *i, false);
+ if (r < 0)
+ goto clear;
+ }
+
+ STRV_FOREACH(i, rdomains) {
+ r = link_update_search_domain_one(l, *i, true);
+ if (r < 0)
+ goto clear;
+ }
+
+ dns_search_domain_unlink_marked(l->search_domains);
+ return 0;
+
+clear:
+ dns_search_domain_unlink_all(l->search_domains);
+ return r;
+}
+
+static int link_is_managed(Link *l) {
+ _cleanup_free_ char *state = NULL;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_setup_state(l->ifindex, &state);
+ if (r == -ENODATA)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return !STR_IN_SET(state, "pending", "unmanaged");
+}
+
+static void link_read_settings(Link *l) {
+ int r;
+
+ assert(l);
+
+ /* Read settings from networkd, except when networkd is not managing this interface. */
+
+ r = link_is_managed(l);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to determine whether interface %s is managed: %m", l->name);
+ return;
+ }
+ if (r == 0) {
+
+ /* If this link used to be managed, but is now unmanaged, flush all our settings — but only once. */
+ if (l->is_managed)
+ link_flush_settings(l);
+
+ l->is_managed = false;
+ return;
+ }
+
+ l->is_managed = true;
+
+ r = link_update_dns_servers(l);
+ if (r < 0)
+ log_warning_errno(r, "Failed to read DNS servers for interface %s, ignoring: %m", l->name);
+
+ r = link_update_llmnr_support(l);
+ if (r < 0)
+ log_warning_errno(r, "Failed to read LLMNR support for interface %s, ignoring: %m", l->name);
+
+ r = link_update_mdns_support(l);
+ if (r < 0)
+ log_warning_errno(r, "Failed to read mDNS support for interface %s, ignoring: %m", l->name);
+
+ r = link_update_dns_over_tls_mode(l);
+ if (r < 0)
+ log_warning_errno(r, "Failed to read DNS-over-TLS mode for interface %s, ignoring: %m", l->name);
+
+ r = link_update_dnssec_mode(l);
+ if (r < 0)
+ log_warning_errno(r, "Failed to read DNSSEC mode for interface %s, ignoring: %m", l->name);
+
+ r = link_update_dnssec_negative_trust_anchors(l);
+ if (r < 0)
+ log_warning_errno(r, "Failed to read DNSSEC negative trust anchors for interface %s, ignoring: %m", l->name);
+
+ r = link_update_search_domains(l);
+ if (r < 0)
+ log_warning_errno(r, "Failed to read search domains for interface %s, ignoring: %m", l->name);
+
+ r = link_update_default_route(l);
+ if (r < 0)
+ log_warning_errno(r, "Failed to read default route setting for interface %s, proceeding anyway: %m", l->name);
+}
+
+int link_update(Link *l) {
+ int r;
+
+ assert(l);
+
+ link_read_settings(l);
+ link_load_user(l);
+
+ if (l->llmnr_support != RESOLVE_SUPPORT_NO) {
+ r = manager_llmnr_start(l->manager);
+ if (r < 0)
+ return r;
+ }
+
+ if (l->mdns_support != RESOLVE_SUPPORT_NO) {
+ r = manager_mdns_start(l->manager);
+ if (r < 0)
+ return r;
+ }
+
+ link_allocate_scopes(l);
+ link_add_rrs(l, false);
+
+ return 0;
+}
+
+bool link_relevant(Link *l, int family, bool local_multicast) {
+ _cleanup_free_ char *state = NULL;
+ LinkAddress *a;
+
+ assert(l);
+
+ /* A link is relevant for local multicast traffic if it isn't a loopback device, has a link
+ * beat, can do multicast and has at least one link-local (or better) IP address.
+ *
+ * A link is relevant for non-multicast traffic if it isn't a loopback device, has a link beat, and has at
+ * least one routable address. */
+
+ if (l->flags & (IFF_LOOPBACK|IFF_DORMANT))
+ return false;
+
+ if ((l->flags & (IFF_UP|IFF_LOWER_UP)) != (IFF_UP|IFF_LOWER_UP))
+ return false;
+
+ if (local_multicast) {
+ if ((l->flags & IFF_MULTICAST) != IFF_MULTICAST)
+ return false;
+ }
+
+ /* Check kernel operstate
+ * https://www.kernel.org/doc/Documentation/networking/operstates.txt */
+ if (!IN_SET(l->operstate, IF_OPER_UNKNOWN, IF_OPER_UP))
+ return false;
+
+ (void) sd_network_link_get_operational_state(l->ifindex, &state);
+ if (state && !STR_IN_SET(state, "unknown", "degraded", "routable"))
+ return false;
+
+ LIST_FOREACH(addresses, a, l->addresses)
+ if ((family == AF_UNSPEC || a->family == family) && link_address_relevant(a, local_multicast))
+ return true;
+
+ return false;
+}
+
+LinkAddress *link_find_address(Link *l, int family, const union in_addr_union *in_addr) {
+ LinkAddress *a;
+
+ assert(l);
+
+ LIST_FOREACH(addresses, a, l->addresses)
+ if (a->family == family && in_addr_equal(family, &a->in_addr, in_addr))
+ return a;
+
+ return NULL;
+}
+
+DnsServer* link_set_dns_server(Link *l, DnsServer *s) {
+ assert(l);
+
+ if (l->current_dns_server == s)
+ return s;
+
+ if (s)
+ log_debug("Switching to DNS server %s for interface %s.", dns_server_string(s), l->name);
+
+ dns_server_unref(l->current_dns_server);
+ l->current_dns_server = dns_server_ref(s);
+
+ if (l->unicast_scope)
+ dns_cache_flush(&l->unicast_scope->cache);
+
+ return s;
+}
+
+DnsServer *link_get_dns_server(Link *l) {
+ assert(l);
+
+ if (!l->current_dns_server)
+ link_set_dns_server(l, l->dns_servers);
+
+ return l->current_dns_server;
+}
+
+void link_next_dns_server(Link *l) {
+ assert(l);
+
+ if (!l->current_dns_server)
+ return;
+
+ /* Change to the next one, but make sure to follow the linked
+ * list only if this server is actually still linked. */
+ if (l->current_dns_server->linked && l->current_dns_server->servers_next) {
+ link_set_dns_server(l, l->current_dns_server->servers_next);
+ return;
+ }
+
+ link_set_dns_server(l, l->dns_servers);
+}
+
+DnsOverTlsMode link_get_dns_over_tls_mode(Link *l) {
+ assert(l);
+
+ if (l->dns_over_tls_mode != _DNS_OVER_TLS_MODE_INVALID)
+ return l->dns_over_tls_mode;
+
+ return manager_get_dns_over_tls_mode(l->manager);
+}
+
+DnssecMode link_get_dnssec_mode(Link *l) {
+ assert(l);
+
+ if (l->dnssec_mode != _DNSSEC_MODE_INVALID)
+ return l->dnssec_mode;
+
+ return manager_get_dnssec_mode(l->manager);
+}
+
+bool link_dnssec_supported(Link *l) {
+ DnsServer *server;
+
+ assert(l);
+
+ if (link_get_dnssec_mode(l) == DNSSEC_NO)
+ return false;
+
+ server = link_get_dns_server(l);
+ if (server)
+ return dns_server_dnssec_supported(server);
+
+ return true;
+}
+
+int link_address_new(Link *l, LinkAddress **ret, int family, const union in_addr_union *in_addr) {
+ LinkAddress *a;
+
+ assert(l);
+ assert(in_addr);
+
+ a = new0(LinkAddress, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->family = family;
+ a->in_addr = *in_addr;
+
+ a->link = l;
+ LIST_PREPEND(addresses, l->addresses, a);
+ l->n_addresses++;
+
+ if (ret)
+ *ret = a;
+
+ return 0;
+}
+
+LinkAddress *link_address_free(LinkAddress *a) {
+ if (!a)
+ return NULL;
+
+ if (a->link) {
+ LIST_REMOVE(addresses, a->link->addresses, a);
+
+ assert(a->link->n_addresses > 0);
+ a->link->n_addresses--;
+
+ if (a->llmnr_address_rr) {
+ if (a->family == AF_INET && a->link->llmnr_ipv4_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv4_scope->zone, a->llmnr_address_rr);
+ else if (a->family == AF_INET6 && a->link->llmnr_ipv6_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv6_scope->zone, a->llmnr_address_rr);
+ }
+
+ if (a->llmnr_ptr_rr) {
+ if (a->family == AF_INET && a->link->llmnr_ipv4_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv4_scope->zone, a->llmnr_ptr_rr);
+ else if (a->family == AF_INET6 && a->link->llmnr_ipv6_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv6_scope->zone, a->llmnr_ptr_rr);
+ }
+
+ if (a->mdns_address_rr) {
+ if (a->family == AF_INET && a->link->mdns_ipv4_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv4_scope->zone, a->mdns_address_rr);
+ else if (a->family == AF_INET6 && a->link->mdns_ipv6_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv6_scope->zone, a->mdns_address_rr);
+ }
+
+ if (a->mdns_ptr_rr) {
+ if (a->family == AF_INET && a->link->mdns_ipv4_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv4_scope->zone, a->mdns_ptr_rr);
+ else if (a->family == AF_INET6 && a->link->mdns_ipv6_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv6_scope->zone, a->mdns_ptr_rr);
+ }
+ }
+
+ dns_resource_record_unref(a->llmnr_address_rr);
+ dns_resource_record_unref(a->llmnr_ptr_rr);
+ dns_resource_record_unref(a->mdns_address_rr);
+ dns_resource_record_unref(a->mdns_ptr_rr);
+
+ return mfree(a);
+}
+
+void link_address_add_rrs(LinkAddress *a, bool force_remove) {
+ int r;
+
+ assert(a);
+
+ if (a->family == AF_INET) {
+
+ if (!force_remove &&
+ link_address_relevant(a, true) &&
+ a->link->llmnr_ipv4_scope &&
+ a->link->llmnr_support == RESOLVE_SUPPORT_YES &&
+ a->link->manager->llmnr_support == RESOLVE_SUPPORT_YES) {
+
+ if (!a->link->manager->llmnr_host_ipv4_key) {
+ a->link->manager->llmnr_host_ipv4_key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_A, a->link->manager->llmnr_hostname);
+ if (!a->link->manager->llmnr_host_ipv4_key) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!a->llmnr_address_rr) {
+ a->llmnr_address_rr = dns_resource_record_new(a->link->manager->llmnr_host_ipv4_key);
+ if (!a->llmnr_address_rr) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ a->llmnr_address_rr->a.in_addr = a->in_addr.in;
+ a->llmnr_address_rr->ttl = LLMNR_DEFAULT_TTL;
+ }
+
+ if (!a->llmnr_ptr_rr) {
+ r = dns_resource_record_new_reverse(&a->llmnr_ptr_rr, a->family, &a->in_addr, a->link->manager->llmnr_hostname);
+ if (r < 0)
+ goto fail;
+
+ a->llmnr_ptr_rr->ttl = LLMNR_DEFAULT_TTL;
+ }
+
+ r = dns_zone_put(&a->link->llmnr_ipv4_scope->zone, a->link->llmnr_ipv4_scope, a->llmnr_address_rr, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add A record to LLMNR zone: %m");
+
+ r = dns_zone_put(&a->link->llmnr_ipv4_scope->zone, a->link->llmnr_ipv4_scope, a->llmnr_ptr_rr, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add IPv4 PTR record to LLMNR zone: %m");
+ } else {
+ if (a->llmnr_address_rr) {
+ if (a->link->llmnr_ipv4_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv4_scope->zone, a->llmnr_address_rr);
+ a->llmnr_address_rr = dns_resource_record_unref(a->llmnr_address_rr);
+ }
+
+ if (a->llmnr_ptr_rr) {
+ if (a->link->llmnr_ipv4_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv4_scope->zone, a->llmnr_ptr_rr);
+ a->llmnr_ptr_rr = dns_resource_record_unref(a->llmnr_ptr_rr);
+ }
+ }
+
+ if (!force_remove &&
+ link_address_relevant(a, true) &&
+ a->link->mdns_ipv4_scope &&
+ a->link->mdns_support == RESOLVE_SUPPORT_YES &&
+ a->link->manager->mdns_support == RESOLVE_SUPPORT_YES) {
+ if (!a->link->manager->mdns_host_ipv4_key) {
+ a->link->manager->mdns_host_ipv4_key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_A, a->link->manager->mdns_hostname);
+ if (!a->link->manager->mdns_host_ipv4_key) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!a->mdns_address_rr) {
+ a->mdns_address_rr = dns_resource_record_new(a->link->manager->mdns_host_ipv4_key);
+ if (!a->mdns_address_rr) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ a->mdns_address_rr->a.in_addr = a->in_addr.in;
+ a->mdns_address_rr->ttl = MDNS_DEFAULT_TTL;
+ }
+
+ if (!a->mdns_ptr_rr) {
+ r = dns_resource_record_new_reverse(&a->mdns_ptr_rr, a->family, &a->in_addr, a->link->manager->mdns_hostname);
+ if (r < 0)
+ goto fail;
+
+ a->mdns_ptr_rr->ttl = MDNS_DEFAULT_TTL;
+ }
+
+ r = dns_zone_put(&a->link->mdns_ipv4_scope->zone, a->link->mdns_ipv4_scope, a->mdns_address_rr, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add A record to MDNS zone: %m");
+
+ r = dns_zone_put(&a->link->mdns_ipv4_scope->zone, a->link->mdns_ipv4_scope, a->mdns_ptr_rr, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add IPv4 PTR record to MDNS zone: %m");
+ } else {
+ if (a->mdns_address_rr) {
+ if (a->link->mdns_ipv4_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv4_scope->zone, a->mdns_address_rr);
+ a->mdns_address_rr = dns_resource_record_unref(a->mdns_address_rr);
+ }
+
+ if (a->mdns_ptr_rr) {
+ if (a->link->mdns_ipv4_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv4_scope->zone, a->mdns_ptr_rr);
+ a->mdns_ptr_rr = dns_resource_record_unref(a->mdns_ptr_rr);
+ }
+ }
+ }
+
+ if (a->family == AF_INET6) {
+
+ if (!force_remove &&
+ link_address_relevant(a, true) &&
+ a->link->llmnr_ipv6_scope &&
+ a->link->llmnr_support == RESOLVE_SUPPORT_YES &&
+ a->link->manager->llmnr_support == RESOLVE_SUPPORT_YES) {
+
+ if (!a->link->manager->llmnr_host_ipv6_key) {
+ a->link->manager->llmnr_host_ipv6_key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_AAAA, a->link->manager->llmnr_hostname);
+ if (!a->link->manager->llmnr_host_ipv6_key) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!a->llmnr_address_rr) {
+ a->llmnr_address_rr = dns_resource_record_new(a->link->manager->llmnr_host_ipv6_key);
+ if (!a->llmnr_address_rr) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ a->llmnr_address_rr->aaaa.in6_addr = a->in_addr.in6;
+ a->llmnr_address_rr->ttl = LLMNR_DEFAULT_TTL;
+ }
+
+ if (!a->llmnr_ptr_rr) {
+ r = dns_resource_record_new_reverse(&a->llmnr_ptr_rr, a->family, &a->in_addr, a->link->manager->llmnr_hostname);
+ if (r < 0)
+ goto fail;
+
+ a->llmnr_ptr_rr->ttl = LLMNR_DEFAULT_TTL;
+ }
+
+ r = dns_zone_put(&a->link->llmnr_ipv6_scope->zone, a->link->llmnr_ipv6_scope, a->llmnr_address_rr, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add AAAA record to LLMNR zone: %m");
+
+ r = dns_zone_put(&a->link->llmnr_ipv6_scope->zone, a->link->llmnr_ipv6_scope, a->llmnr_ptr_rr, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add IPv6 PTR record to LLMNR zone: %m");
+ } else {
+ if (a->llmnr_address_rr) {
+ if (a->link->llmnr_ipv6_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv6_scope->zone, a->llmnr_address_rr);
+ a->llmnr_address_rr = dns_resource_record_unref(a->llmnr_address_rr);
+ }
+
+ if (a->llmnr_ptr_rr) {
+ if (a->link->llmnr_ipv6_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv6_scope->zone, a->llmnr_ptr_rr);
+ a->llmnr_ptr_rr = dns_resource_record_unref(a->llmnr_ptr_rr);
+ }
+ }
+
+ if (!force_remove &&
+ link_address_relevant(a, true) &&
+ a->link->mdns_ipv6_scope &&
+ a->link->mdns_support == RESOLVE_SUPPORT_YES &&
+ a->link->manager->mdns_support == RESOLVE_SUPPORT_YES) {
+
+ if (!a->link->manager->mdns_host_ipv6_key) {
+ a->link->manager->mdns_host_ipv6_key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_AAAA, a->link->manager->mdns_hostname);
+ if (!a->link->manager->mdns_host_ipv6_key) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!a->mdns_address_rr) {
+ a->mdns_address_rr = dns_resource_record_new(a->link->manager->mdns_host_ipv6_key);
+ if (!a->mdns_address_rr) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ a->mdns_address_rr->aaaa.in6_addr = a->in_addr.in6;
+ a->mdns_address_rr->ttl = MDNS_DEFAULT_TTL;
+ }
+
+ if (!a->mdns_ptr_rr) {
+ r = dns_resource_record_new_reverse(&a->mdns_ptr_rr, a->family, &a->in_addr, a->link->manager->mdns_hostname);
+ if (r < 0)
+ goto fail;
+
+ a->mdns_ptr_rr->ttl = MDNS_DEFAULT_TTL;
+ }
+
+ r = dns_zone_put(&a->link->mdns_ipv6_scope->zone, a->link->mdns_ipv6_scope, a->mdns_address_rr, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add AAAA record to MDNS zone: %m");
+
+ r = dns_zone_put(&a->link->mdns_ipv6_scope->zone, a->link->mdns_ipv6_scope, a->mdns_ptr_rr, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add IPv6 PTR record to MDNS zone: %m");
+ } else {
+ if (a->mdns_address_rr) {
+ if (a->link->mdns_ipv6_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv6_scope->zone, a->mdns_address_rr);
+ a->mdns_address_rr = dns_resource_record_unref(a->mdns_address_rr);
+ }
+
+ if (a->mdns_ptr_rr) {
+ if (a->link->mdns_ipv6_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv6_scope->zone, a->mdns_ptr_rr);
+ a->mdns_ptr_rr = dns_resource_record_unref(a->mdns_ptr_rr);
+ }
+ }
+ }
+
+ return;
+
+fail:
+ log_debug_errno(r, "Failed to update address RRs: %m");
+}
+
+int link_address_update_rtnl(LinkAddress *a, sd_netlink_message *m) {
+ int r;
+ assert(a);
+ assert(m);
+
+ r = sd_rtnl_message_addr_get_flags(m, &a->flags);
+ if (r < 0)
+ return r;
+
+ sd_rtnl_message_addr_get_scope(m, &a->scope);
+
+ link_allocate_scopes(a->link);
+ link_add_rrs(a->link, false);
+
+ return 0;
+}
+
+bool link_address_relevant(LinkAddress *a, bool local_multicast) {
+ assert(a);
+
+ if (a->flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE))
+ return false;
+
+ if (a->scope >= (local_multicast ? RT_SCOPE_HOST : RT_SCOPE_LINK))
+ return false;
+
+ return true;
+}
+
+static bool link_needs_save(Link *l) {
+ assert(l);
+
+ /* Returns true if any of the settings where set different from the default */
+
+ if (l->is_managed)
+ return false;
+
+ if (l->llmnr_support != RESOLVE_SUPPORT_YES ||
+ l->mdns_support != RESOLVE_SUPPORT_NO ||
+ l->dnssec_mode != _DNSSEC_MODE_INVALID ||
+ l->dns_over_tls_mode != _DNS_OVER_TLS_MODE_INVALID)
+ return true;
+
+ if (l->dns_servers ||
+ l->search_domains)
+ return true;
+
+ if (!set_isempty(l->dnssec_negative_trust_anchors))
+ return true;
+
+ if (l->default_route >= 0)
+ return true;
+
+ return false;
+}
+
+int link_save_user(Link *l) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *v;
+ int r;
+
+ assert(l);
+ assert(l->state_file);
+
+ if (!link_needs_save(l)) {
+ (void) unlink(l->state_file);
+ return 0;
+ }
+
+ r = mkdir_parents(l->state_file, 0700);
+ if (r < 0)
+ goto fail;
+
+ r = fopen_temporary(l->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ (void) fchmod(fileno(f), 0644);
+
+ fputs("# This is private data. Do not parse.\n", f);
+
+ v = resolve_support_to_string(l->llmnr_support);
+ if (v)
+ fprintf(f, "LLMNR=%s\n", v);
+
+ v = resolve_support_to_string(l->mdns_support);
+ if (v)
+ fprintf(f, "MDNS=%s\n", v);
+
+ v = dnssec_mode_to_string(l->dnssec_mode);
+ if (v)
+ fprintf(f, "DNSSEC=%s\n", v);
+
+ if (l->default_route >= 0)
+ fprintf(f, "DEFAULT_ROUTE=%s\n", yes_no(l->default_route));
+
+ if (l->dns_servers) {
+ DnsServer *server;
+
+ fputs("SERVERS=", f);
+ LIST_FOREACH(servers, server, l->dns_servers) {
+
+ if (server != l->dns_servers)
+ fputc(' ', f);
+
+ v = dns_server_string(server);
+ if (!v) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fputs(v, f);
+ }
+ fputc('\n', f);
+ }
+
+ if (l->search_domains) {
+ DnsSearchDomain *domain;
+
+ fputs("DOMAINS=", f);
+ LIST_FOREACH(domains, domain, l->search_domains) {
+
+ if (domain != l->search_domains)
+ fputc(' ', f);
+
+ if (domain->route_only)
+ fputc('~', f);
+
+ fputs(DNS_SEARCH_DOMAIN_NAME(domain), f);
+ }
+ fputc('\n', f);
+ }
+
+ if (!set_isempty(l->dnssec_negative_trust_anchors)) {
+ bool space = false;
+ Iterator i;
+ char *nta;
+
+ fputs("NTAS=", f);
+ SET_FOREACH(nta, l->dnssec_negative_trust_anchors, i) {
+
+ if (space)
+ fputc(' ', f);
+
+ fputs(nta, f);
+ space = true;
+ }
+ fputc('\n', f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, l->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(l->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save link data %s: %m", l->state_file);
+}
+
+int link_load_user(Link *l) {
+ _cleanup_free_ char
+ *llmnr = NULL,
+ *mdns = NULL,
+ *dnssec = NULL,
+ *servers = NULL,
+ *domains = NULL,
+ *ntas = NULL,
+ *default_route = NULL;
+
+ ResolveSupport s;
+ const char *p;
+ int r;
+
+ assert(l);
+ assert(l->state_file);
+
+ /* Try to load only a single time */
+ if (l->loaded)
+ return 0;
+ l->loaded = true;
+
+ if (l->is_managed)
+ return 0; /* if the device is managed, then networkd is our configuration source, not the bus API */
+
+ r = parse_env_file(NULL, l->state_file,
+ "LLMNR", &llmnr,
+ "MDNS", &mdns,
+ "DNSSEC", &dnssec,
+ "SERVERS", &servers,
+ "DOMAINS", &domains,
+ "NTAS", &ntas,
+ "DEFAULT_ROUTE", &default_route);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ goto fail;
+
+ link_flush_settings(l);
+
+ /* If we can't recognize the LLMNR or MDNS setting we don't override the default */
+ s = resolve_support_from_string(llmnr);
+ if (s >= 0)
+ l->llmnr_support = s;
+
+ s = resolve_support_from_string(mdns);
+ if (s >= 0)
+ l->mdns_support = s;
+
+ r = parse_boolean(default_route);
+ if (r >= 0)
+ l->default_route = r;
+
+ /* If we can't recognize the DNSSEC setting, then set it to invalid, so that the daemon default is used. */
+ l->dnssec_mode = dnssec_mode_from_string(dnssec);
+
+ for (p = servers;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ goto fail;
+ if (r == 0)
+ break;
+
+ r = link_update_dns_server_one(l, word);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to load DNS server '%s', ignoring: %m", word);
+ continue;
+ }
+ }
+
+ for (p = domains;;) {
+ _cleanup_free_ char *word = NULL;
+ const char *n;
+ bool is_route;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ goto fail;
+ if (r == 0)
+ break;
+
+ is_route = word[0] == '~';
+ n = is_route ? word + 1 : word;
+
+ r = link_update_search_domain_one(l, n, is_route);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to load search domain '%s', ignoring: %m", word);
+ continue;
+ }
+ }
+
+ if (ntas) {
+ _cleanup_set_free_free_ Set *ns = NULL;
+
+ ns = set_new(&dns_name_hash_ops);
+ if (!ns) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ r = set_put_strsplit(ns, ntas, NULL, 0);
+ if (r < 0)
+ goto fail;
+
+ l->dnssec_negative_trust_anchors = TAKE_PTR(ns);
+ }
+
+ return 0;
+
+fail:
+ return log_error_errno(r, "Failed to load link data %s: %m", l->state_file);
+}
+
+void link_remove_user(Link *l) {
+ assert(l);
+ assert(l->state_file);
+
+ (void) unlink(l->state_file);
+}
diff --git a/src/resolve/resolved-link.h b/src/resolve/resolved-link.h
new file mode 100644
index 0000000..f95ea37
--- /dev/null
+++ b/src/resolve/resolved-link.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <net/if.h>
+
+#include "in-addr-util.h"
+#include "ratelimit.h"
+#include "resolve-util.h"
+
+typedef struct Link Link;
+typedef struct LinkAddress LinkAddress;
+
+#include "resolved-dns-rr.h"
+#include "resolved-dns-scope.h"
+#include "resolved-dns-search-domain.h"
+#include "resolved-dns-server.h"
+#include "resolved-manager.h"
+
+#define LINK_SEARCH_DOMAINS_MAX 256
+#define LINK_DNS_SERVERS_MAX 256
+
+struct LinkAddress {
+ Link *link;
+
+ int family;
+ union in_addr_union in_addr;
+
+ unsigned char flags, scope;
+
+ DnsResourceRecord *llmnr_address_rr;
+ DnsResourceRecord *llmnr_ptr_rr;
+ DnsResourceRecord *mdns_address_rr;
+ DnsResourceRecord *mdns_ptr_rr;
+
+ LIST_FIELDS(LinkAddress, addresses);
+};
+
+struct Link {
+ Manager *manager;
+
+ int ifindex;
+ unsigned flags;
+
+ LIST_HEAD(LinkAddress, addresses);
+ unsigned n_addresses;
+
+ LIST_HEAD(DnsServer, dns_servers);
+ DnsServer *current_dns_server;
+ unsigned n_dns_servers;
+
+ LIST_HEAD(DnsSearchDomain, search_domains);
+ unsigned n_search_domains;
+
+ int default_route;
+
+ ResolveSupport llmnr_support;
+ ResolveSupport mdns_support;
+ DnsOverTlsMode dns_over_tls_mode;
+ DnssecMode dnssec_mode;
+ Set *dnssec_negative_trust_anchors;
+
+ DnsScope *unicast_scope;
+ DnsScope *llmnr_ipv4_scope;
+ DnsScope *llmnr_ipv6_scope;
+ DnsScope *mdns_ipv4_scope;
+ DnsScope *mdns_ipv6_scope;
+
+ bool is_managed;
+
+ char name[IF_NAMESIZE];
+ uint32_t mtu;
+ uint8_t operstate;
+
+ bool loaded;
+ char *state_file;
+
+ bool unicast_relevant;
+};
+
+int link_new(Manager *m, Link **ret, int ifindex);
+Link *link_free(Link *l);
+int link_process_rtnl(Link *l, sd_netlink_message *m);
+int link_update(Link *l);
+bool link_relevant(Link *l, int family, bool local_multicast);
+LinkAddress* link_find_address(Link *l, int family, const union in_addr_union *in_addr);
+void link_add_rrs(Link *l, bool force_remove);
+
+void link_flush_settings(Link *l);
+void link_set_dnssec_mode(Link *l, DnssecMode mode);
+void link_set_dns_over_tls_mode(Link *l, DnsOverTlsMode mode);
+void link_allocate_scopes(Link *l);
+
+DnsServer* link_set_dns_server(Link *l, DnsServer *s);
+DnsServer* link_get_dns_server(Link *l);
+void link_next_dns_server(Link *l);
+
+DnssecMode link_get_dnssec_mode(Link *l);
+bool link_dnssec_supported(Link *l);
+
+DnsOverTlsMode link_get_dns_over_tls_mode(Link *l);
+
+int link_save_user(Link *l);
+int link_load_user(Link *l);
+void link_remove_user(Link *l);
+
+int link_address_new(Link *l, LinkAddress **ret, int family, const union in_addr_union *in_addr);
+LinkAddress *link_address_free(LinkAddress *a);
+int link_address_update_rtnl(LinkAddress *a, sd_netlink_message *m);
+bool link_address_relevant(LinkAddress *l, bool local_multicast);
+void link_address_add_rrs(LinkAddress *a, bool force_remove);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Link*, link_free);
diff --git a/src/resolve/resolved-llmnr.c b/src/resolve/resolved-llmnr.c
new file mode 100644
index 0000000..dfa55c5
--- /dev/null
+++ b/src/resolve/resolved-llmnr.c
@@ -0,0 +1,447 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/in.h>
+#include <resolv.h>
+
+#include "fd-util.h"
+#include "resolved-llmnr.h"
+#include "resolved-manager.h"
+
+void manager_llmnr_stop(Manager *m) {
+ assert(m);
+
+ m->llmnr_ipv4_udp_event_source = sd_event_source_unref(m->llmnr_ipv4_udp_event_source);
+ m->llmnr_ipv4_udp_fd = safe_close(m->llmnr_ipv4_udp_fd);
+
+ m->llmnr_ipv6_udp_event_source = sd_event_source_unref(m->llmnr_ipv6_udp_event_source);
+ m->llmnr_ipv6_udp_fd = safe_close(m->llmnr_ipv6_udp_fd);
+
+ m->llmnr_ipv4_tcp_event_source = sd_event_source_unref(m->llmnr_ipv4_tcp_event_source);
+ m->llmnr_ipv4_tcp_fd = safe_close(m->llmnr_ipv4_tcp_fd);
+
+ m->llmnr_ipv6_tcp_event_source = sd_event_source_unref(m->llmnr_ipv6_tcp_event_source);
+ m->llmnr_ipv6_tcp_fd = safe_close(m->llmnr_ipv6_tcp_fd);
+}
+
+int manager_llmnr_start(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->llmnr_support == RESOLVE_SUPPORT_NO)
+ return 0;
+
+ r = manager_llmnr_ipv4_udp_fd(m);
+ if (r == -EADDRINUSE)
+ goto eaddrinuse;
+ if (r < 0)
+ return r;
+
+ r = manager_llmnr_ipv4_tcp_fd(m);
+ if (r == -EADDRINUSE)
+ goto eaddrinuse;
+ if (r < 0)
+ return r;
+
+ if (socket_ipv6_is_supported()) {
+ r = manager_llmnr_ipv6_udp_fd(m);
+ if (r == -EADDRINUSE)
+ goto eaddrinuse;
+ if (r < 0)
+ return r;
+
+ r = manager_llmnr_ipv6_tcp_fd(m);
+ if (r == -EADDRINUSE)
+ goto eaddrinuse;
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+
+eaddrinuse:
+ log_warning("Another LLMNR responder prohibits binding the socket to the same port. Turning off LLMNR support.");
+ m->llmnr_support = RESOLVE_SUPPORT_NO;
+ manager_llmnr_stop(m);
+
+ return 0;
+}
+
+static int on_llmnr_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ DnsTransaction *t = NULL;
+ Manager *m = userdata;
+ DnsScope *scope;
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(m);
+
+ r = manager_recv(m, fd, DNS_PROTOCOL_LLMNR, &p);
+ if (r <= 0)
+ return r;
+
+ if (manager_our_packet(m, p))
+ return 0;
+
+ scope = manager_find_scope(m, p);
+ if (!scope) {
+ log_debug("Got LLMNR UDP packet on unknown scope. Ignoring.");
+ return 0;
+ }
+
+ if (dns_packet_validate_reply(p) > 0) {
+ log_debug("Got LLMNR UDP reply packet for id %u", DNS_PACKET_ID(p));
+
+ dns_scope_check_conflicts(scope, p);
+
+ t = hashmap_get(m->dns_transactions, UINT_TO_PTR(DNS_PACKET_ID(p)));
+ if (t)
+ dns_transaction_process_reply(t, p);
+
+ } else if (dns_packet_validate_query(p) > 0) {
+ log_debug("Got LLMNR UDP query packet for id %u", DNS_PACKET_ID(p));
+
+ dns_scope_process_query(scope, NULL, p);
+ } else
+ log_debug("Invalid LLMNR UDP packet, ignoring.");
+
+ return 0;
+}
+
+int manager_llmnr_ipv4_udp_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(LLMNR_PORT),
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(m);
+
+ if (m->llmnr_ipv4_udp_fd >= 0)
+ return m->llmnr_ipv4_udp_fd;
+
+ s = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return log_error_errno(errno, "LLMNR-IPv4(UDP): Failed to create socket: %m");
+
+ /* RFC 4795, section 2.5 recommends setting the TTL of UDP packets to 255. */
+ r = setsockopt_int(s, IPPROTO_IP, IP_TTL, 255);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set IP_TTL: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_MULTICAST_TTL, 255);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set IP_MULTICAST_TTL: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_MULTICAST_LOOP, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set IP_MULTICAST_LOOP: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_PKTINFO, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set IP_PKTINFO: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_RECVTTL, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set IP_RECVTTL: %m");
+
+ /* Disable Don't-Fragment bit in the IP header */
+ r = setsockopt_int(s, IPPROTO_IP, IP_MTU_DISCOVER, IP_PMTUDISC_DONT);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set IP_MTU_DISCOVER: %m");
+
+ /* first try to bind without SO_REUSEADDR to detect another LLMNR responder */
+ r = bind(s, &sa.sa, sizeof(sa.in));
+ if (r < 0) {
+ if (errno != EADDRINUSE)
+ return log_error_errno(errno, "LLMNR-IPv4(UDP): Failed to bind socket: %m");
+
+ log_warning("LLMNR-IPv4(UDP): There appears to be another LLMNR responder running, or previously systemd-resolved crashed with some outstanding transfers.");
+
+ /* try again with SO_REUSEADDR */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set SO_REUSEADDR: %m");
+
+ r = bind(s, &sa.sa, sizeof(sa.in));
+ if (r < 0)
+ return log_error_errno(errno, "LLMNR-IPv4(UDP): Failed to bind socket: %m");
+ } else {
+ /* enable SO_REUSEADDR for the case that the user really wants multiple LLMNR responders */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set SO_REUSEADDR: %m");
+ }
+
+ r = sd_event_add_io(m->event, &m->llmnr_ipv4_udp_event_source, s, EPOLLIN, on_llmnr_packet, m);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to create event source: %m");
+
+ (void) sd_event_source_set_description(m->llmnr_ipv4_udp_event_source, "llmnr-ipv4-udp");
+
+ return m->llmnr_ipv4_udp_fd = TAKE_FD(s);
+}
+
+int manager_llmnr_ipv6_udp_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_port = htobe16(LLMNR_PORT),
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(m);
+
+ if (m->llmnr_ipv6_udp_fd >= 0)
+ return m->llmnr_ipv6_udp_fd;
+
+ s = socket(AF_INET6, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return log_error_errno(errno, "LLMNR-IPv6(UDP): Failed to create socket: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 255);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set IPV6_UNICAST_HOPS: %m");
+
+ /* RFC 4795, section 2.5 recommends setting the TTL of UDP packets to 255. */
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 255);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set IPV6_MULTICAST_HOPS: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set IPV6_MULTICAST_LOOP: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_V6ONLY, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set IPV6_V6ONLY: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set IPV6_RECVPKTINFO: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set IPV6_RECVHOPLIMIT: %m");
+
+ /* first try to bind without SO_REUSEADDR to detect another LLMNR responder */
+ r = bind(s, &sa.sa, sizeof(sa.in6));
+ if (r < 0) {
+ if (errno != EADDRINUSE)
+ return log_error_errno(errno, "LLMNR-IPv6(UDP): Failed to bind socket: %m");
+
+ log_warning("LLMNR-IPv6(UDP): There appears to be another LLMNR responder running, or previously systemd-resolved crashed with some outstanding transfers.");
+
+ /* try again with SO_REUSEADDR */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set SO_REUSEADDR: %m");
+
+ r = bind(s, &sa.sa, sizeof(sa.in6));
+ if (r < 0)
+ return log_error_errno(errno, "LLMNR-IPv6(UDP): Failed to bind socket: %m");
+ } else {
+ /* enable SO_REUSEADDR for the case that the user really wants multiple LLMNR responders */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set SO_REUSEADDR: %m");
+ }
+
+ r = sd_event_add_io(m->event, &m->llmnr_ipv6_udp_event_source, s, EPOLLIN, on_llmnr_packet, m);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to create event source: %m");
+
+ (void) sd_event_source_set_description(m->llmnr_ipv6_udp_event_source, "llmnr-ipv6-udp");
+
+ return m->llmnr_ipv6_udp_fd = TAKE_FD(s);
+}
+
+static int on_llmnr_stream_packet(DnsStream *s) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ DnsScope *scope;
+
+ assert(s);
+
+ p = dns_stream_take_read_packet(s);
+ assert(p);
+
+ scope = manager_find_scope(s->manager, p);
+ if (!scope)
+ log_debug("Got LLMNR TCP packet on unknown scope. Ignoring.");
+ else if (dns_packet_validate_query(p) > 0) {
+ log_debug("Got LLMNR TCP query packet for id %u", DNS_PACKET_ID(p));
+
+ dns_scope_process_query(scope, s, p);
+ } else
+ log_debug("Invalid LLMNR TCP packet, ignoring.");
+
+ dns_stream_unref(s);
+ return 0;
+}
+
+static int on_llmnr_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ DnsStream *stream;
+ Manager *m = userdata;
+ int cfd, r;
+
+ cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (cfd < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return -errno;
+ }
+
+ r = dns_stream_new(m, &stream, DNS_PROTOCOL_LLMNR, cfd, NULL);
+ if (r < 0) {
+ safe_close(cfd);
+ return r;
+ }
+
+ stream->on_packet = on_llmnr_stream_packet;
+ return 0;
+}
+
+int manager_llmnr_ipv4_tcp_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(LLMNR_PORT),
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(m);
+
+ if (m->llmnr_ipv4_tcp_fd >= 0)
+ return m->llmnr_ipv4_tcp_fd;
+
+ s = socket(AF_INET, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return log_error_errno(errno, "LLMNR-IPv4(TCP): Failed to create socket: %m");
+
+ /* RFC 4795, section 2.5. requires setting the TTL of TCP streams to 1 */
+ r = setsockopt_int(s, IPPROTO_IP, IP_TTL, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to set IP_TTL: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_PKTINFO, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to set IP_PKTINFO: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_RECVTTL, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to set IP_RECVTTL: %m");
+
+ /* Disable Don't-Fragment bit in the IP header */
+ r = setsockopt_int(s, IPPROTO_IP, IP_MTU_DISCOVER, IP_PMTUDISC_DONT);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to set IP_MTU_DISCOVER: %m");
+
+ /* first try to bind without SO_REUSEADDR to detect another LLMNR responder */
+ r = bind(s, &sa.sa, sizeof(sa.in));
+ if (r < 0) {
+ if (errno != EADDRINUSE)
+ return log_error_errno(errno, "LLMNR-IPv4(TCP): Failed to bind socket: %m");
+
+ log_warning("LLMNR-IPv4(TCP): There appears to be another LLMNR responder running, or previously systemd-resolved crashed with some outstanding transfers.");
+
+ /* try again with SO_REUSEADDR */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to set SO_REUSEADDR: %m");
+
+ r = bind(s, &sa.sa, sizeof(sa.in));
+ if (r < 0)
+ return log_error_errno(errno, "LLMNR-IPv4(TCP): Failed to bind socket: %m");
+ } else {
+ /* enable SO_REUSEADDR for the case that the user really wants multiple LLMNR responders */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to set SO_REUSEADDR: %m");
+ }
+
+ r = listen(s, SOMAXCONN);
+ if (r < 0)
+ return log_error_errno(errno, "LLMNR-IPv4(TCP): Failed to listen the stream: %m");
+
+ r = sd_event_add_io(m->event, &m->llmnr_ipv4_tcp_event_source, s, EPOLLIN, on_llmnr_stream, m);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to create event source: %m");
+
+ (void) sd_event_source_set_description(m->llmnr_ipv4_tcp_event_source, "llmnr-ipv4-tcp");
+
+ return m->llmnr_ipv4_tcp_fd = TAKE_FD(s);
+}
+
+int manager_llmnr_ipv6_tcp_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_port = htobe16(LLMNR_PORT),
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(m);
+
+ if (m->llmnr_ipv6_tcp_fd >= 0)
+ return m->llmnr_ipv6_tcp_fd;
+
+ s = socket(AF_INET6, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return log_error_errno(errno, "LLMNR-IPv6(TCP): Failed to create socket: %m");
+
+ /* RFC 4795, section 2.5. requires setting the TTL of TCP streams to 1 */
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to set IPV6_UNICAST_HOPS: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_V6ONLY, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to set IPV6_V6ONLY: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to set IPV6_RECVPKTINFO: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to set IPV6_RECVHOPLIMIT: %m");
+
+ /* first try to bind without SO_REUSEADDR to detect another LLMNR responder */
+ r = bind(s, &sa.sa, sizeof(sa.in6));
+ if (r < 0) {
+ if (errno != EADDRINUSE)
+ return log_error_errno(errno, "LLMNR-IPv6(TCP): Failed to bind socket: %m");
+
+ log_warning("LLMNR-IPv6(TCP): There appears to be another LLMNR responder running, or previously systemd-resolved crashed with some outstanding transfers.");
+
+ /* try again with SO_REUSEADDR */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to set SO_REUSEADDR: %m");
+
+ r = bind(s, &sa.sa, sizeof(sa.in6));
+ if (r < 0)
+ return log_error_errno(errno, "LLMNR-IPv6(TCP): Failed to bind socket: %m");
+ } else {
+ /* enable SO_REUSEADDR for the case that the user really wants multiple LLMNR responders */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to set SO_REUSEADDR: %m");
+ }
+
+ r = listen(s, SOMAXCONN);
+ if (r < 0)
+ return log_error_errno(errno, "LLMNR-IPv6(TCP): Failed to listen the stream: %m");
+
+ r = sd_event_add_io(m->event, &m->llmnr_ipv6_tcp_event_source, s, EPOLLIN, on_llmnr_stream, m);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to create event source: %m");
+
+ (void) sd_event_source_set_description(m->llmnr_ipv6_tcp_event_source, "llmnr-ipv6-tcp");
+
+ return m->llmnr_ipv6_tcp_fd = TAKE_FD(s);
+}
diff --git a/src/resolve/resolved-llmnr.h b/src/resolve/resolved-llmnr.h
new file mode 100644
index 0000000..a3b12cf
--- /dev/null
+++ b/src/resolve/resolved-llmnr.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "resolved-manager.h"
+
+#define LLMNR_PORT 5355
+
+int manager_llmnr_ipv4_udp_fd(Manager *m);
+int manager_llmnr_ipv6_udp_fd(Manager *m);
+int manager_llmnr_ipv4_tcp_fd(Manager *m);
+int manager_llmnr_ipv6_tcp_fd(Manager *m);
+
+void manager_llmnr_stop(Manager *m);
+int manager_llmnr_start(Manager *m);
diff --git a/src/resolve/resolved-manager.c b/src/resolve/resolved-manager.c
new file mode 100644
index 0000000..b3d35c8
--- /dev/null
+++ b/src/resolve/resolved-manager.c
@@ -0,0 +1,1533 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/in.h>
+#include <poll.h>
+#include <stdio_ext.h>
+#include <sys/ioctl.h>
+
+#if HAVE_LIBIDN2
+#include <idn2.h>
+#endif
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "io-util.h"
+#include "missing_network.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "ordered-set.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "resolved-bus.h"
+#include "resolved-conf.h"
+#include "resolved-dns-stub.h"
+#include "resolved-dnssd.h"
+#include "resolved-etc-hosts.h"
+#include "resolved-llmnr.h"
+#include "resolved-manager.h"
+#include "resolved-mdns.h"
+#include "resolved-resolv-conf.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "utf8.h"
+
+#define SEND_TIMEOUT_USEC (200 * USEC_PER_MSEC)
+
+static int manager_process_link(sd_netlink *rtnl, sd_netlink_message *mm, void *userdata) {
+ Manager *m = userdata;
+ uint16_t type;
+ Link *l;
+ int ifindex, r;
+
+ assert(rtnl);
+ assert(m);
+ assert(mm);
+
+ r = sd_netlink_message_get_type(mm, &type);
+ if (r < 0)
+ goto fail;
+
+ r = sd_rtnl_message_link_get_ifindex(mm, &ifindex);
+ if (r < 0)
+ goto fail;
+
+ l = hashmap_get(m->links, INT_TO_PTR(ifindex));
+
+ switch (type) {
+
+ case RTM_NEWLINK:{
+ bool is_new = !l;
+
+ if (!l) {
+ r = link_new(m, &l, ifindex);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = link_process_rtnl(l, mm);
+ if (r < 0)
+ goto fail;
+
+ r = link_update(l);
+ if (r < 0)
+ goto fail;
+
+ if (is_new)
+ log_debug("Found new link %i/%s", ifindex, l->name);
+
+ break;
+ }
+
+ case RTM_DELLINK:
+ if (l) {
+ log_debug("Removing link %i/%s", l->ifindex, l->name);
+ link_remove_user(l);
+ link_free(l);
+ }
+
+ break;
+ }
+
+ return 0;
+
+fail:
+ log_warning_errno(r, "Failed to process RTNL link message: %m");
+ return 0;
+}
+
+static int manager_process_address(sd_netlink *rtnl, sd_netlink_message *mm, void *userdata) {
+ Manager *m = userdata;
+ union in_addr_union address;
+ uint16_t type;
+ int r, ifindex, family;
+ LinkAddress *a;
+ Link *l;
+
+ assert(rtnl);
+ assert(mm);
+ assert(m);
+
+ r = sd_netlink_message_get_type(mm, &type);
+ if (r < 0)
+ goto fail;
+
+ r = sd_rtnl_message_addr_get_ifindex(mm, &ifindex);
+ if (r < 0)
+ goto fail;
+
+ l = hashmap_get(m->links, INT_TO_PTR(ifindex));
+ if (!l)
+ return 0;
+
+ r = sd_rtnl_message_addr_get_family(mm, &family);
+ if (r < 0)
+ goto fail;
+
+ switch (family) {
+
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(mm, IFA_LOCAL, &address.in);
+ if (r < 0) {
+ r = sd_netlink_message_read_in_addr(mm, IFA_ADDRESS, &address.in);
+ if (r < 0)
+ goto fail;
+ }
+
+ break;
+
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(mm, IFA_LOCAL, &address.in6);
+ if (r < 0) {
+ r = sd_netlink_message_read_in6_addr(mm, IFA_ADDRESS, &address.in6);
+ if (r < 0)
+ goto fail;
+ }
+
+ break;
+
+ default:
+ return 0;
+ }
+
+ a = link_find_address(l, family, &address);
+
+ switch (type) {
+
+ case RTM_NEWADDR:
+
+ if (!a) {
+ r = link_address_new(l, &a, family, &address);
+ if (r < 0)
+ return r;
+ }
+
+ r = link_address_update_rtnl(a, mm);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case RTM_DELADDR:
+ link_address_free(a);
+ break;
+ }
+
+ return 0;
+
+fail:
+ log_warning_errno(r, "Failed to process RTNL address message: %m");
+ return 0;
+}
+
+static int manager_rtnl_listen(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *i;
+ int r;
+
+ assert(m);
+
+ /* First, subscribe to interfaces coming and going */
+ r = sd_netlink_open(&m->rtnl);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_attach_event(m->rtnl, m->event, SD_EVENT_PRIORITY_IMPORTANT);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_NEWLINK, manager_process_link, NULL, m, "resolve-NEWLINK");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_DELLINK, manager_process_link, NULL, m, "resolve-DELLINK");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_NEWADDR, manager_process_address, NULL, m, "resolve-NEWADDR");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_DELADDR, manager_process_address, NULL, m, "resolve-DELADDR");
+ if (r < 0)
+ return r;
+
+ /* Then, enumerate all links */
+ r = sd_rtnl_message_new_link(m->rtnl, &req, RTM_GETLINK, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(m->rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (i = reply; i; i = sd_netlink_message_next(i)) {
+ r = manager_process_link(m->rtnl, i, m);
+ if (r < 0)
+ return r;
+ }
+
+ req = sd_netlink_message_unref(req);
+ reply = sd_netlink_message_unref(reply);
+
+ /* Finally, enumerate all addresses, too */
+ r = sd_rtnl_message_new_addr(m->rtnl, &req, RTM_GETADDR, 0, AF_UNSPEC);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(m->rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (i = reply; i; i = sd_netlink_message_next(i)) {
+ r = manager_process_address(m->rtnl, i, m);
+ if (r < 0)
+ return r;
+ }
+
+ return r;
+}
+
+static int on_network_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ Iterator i;
+ Link *l;
+ int r;
+
+ assert(m);
+
+ sd_network_monitor_flush(m->network_monitor);
+
+ HASHMAP_FOREACH(l, m->links, i) {
+ r = link_update(l);
+ if (r < 0)
+ log_warning_errno(r, "Failed to update monitor information for %i: %m", l->ifindex);
+ }
+
+ (void) manager_write_resolv_conf(m);
+
+ return 0;
+}
+
+static int manager_network_monitor_listen(Manager *m) {
+ int r, fd, events;
+
+ assert(m);
+
+ r = sd_network_monitor_new(&m->network_monitor, NULL);
+ if (r < 0)
+ return r;
+
+ fd = sd_network_monitor_get_fd(m->network_monitor);
+ if (fd < 0)
+ return fd;
+
+ events = sd_network_monitor_get_events(m->network_monitor);
+ if (events < 0)
+ return events;
+
+ r = sd_event_add_io(m->event, &m->network_event_source, fd, events, &on_network_event, m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(m->network_event_source, SD_EVENT_PRIORITY_IMPORTANT+5);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->network_event_source, "network-monitor");
+
+ return 0;
+}
+
+static int determine_hostname(char **full_hostname, char **llmnr_hostname, char **mdns_hostname) {
+ _cleanup_free_ char *h = NULL, *n = NULL;
+#if HAVE_LIBIDN2
+ _cleanup_free_ char *utf8 = NULL;
+#elif HAVE_LIBIDN
+ int k;
+#endif
+ char label[DNS_LABEL_MAX];
+ const char *p, *decoded;
+ int r;
+
+ assert(full_hostname);
+ assert(llmnr_hostname);
+ assert(mdns_hostname);
+
+ /* Extract and normalize the first label of the locally configured hostname, and check it's not "localhost". */
+
+ r = gethostname_strict(&h);
+ if (r < 0)
+ return log_debug_errno(r, "Can't determine system hostname: %m");
+
+ p = h;
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape host name: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Couldn't find a single label in hostname.");
+
+#if HAVE_LIBIDN2
+ r = idn2_to_unicode_8z8z(label, &utf8, 0);
+ if (r != IDN2_OK)
+ return log_error("Failed to undo IDNA: %s", idn2_strerror(r));
+ assert(utf8_is_valid(utf8));
+
+ r = strlen(utf8);
+ decoded = utf8;
+#elif HAVE_LIBIDN
+ k = dns_label_undo_idna(label, r, label, sizeof label);
+ if (k < 0)
+ return log_error_errno(k, "Failed to undo IDNA: %m");
+ if (k > 0)
+ r = k;
+
+ if (!utf8_is_valid(label))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "System hostname is not UTF-8 clean.");
+ decoded = label;
+#else
+ decoded = label; /* no decoding */
+#endif
+
+ r = dns_label_escape_new(decoded, r, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to escape host name: %m");
+
+ if (is_localhost(n))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "System hostname is 'localhost', ignoring.");
+
+ r = dns_name_concat(n, "local", 0, mdns_hostname);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine mDNS hostname: %m");
+
+ *llmnr_hostname = TAKE_PTR(n);
+ *full_hostname = TAKE_PTR(h);
+
+ return 0;
+}
+
+static const char *fallback_hostname(void) {
+
+ /* Determine the fall back hostname. For exposing this system to the outside world, we cannot have it to be
+ * "localhost" even if that's the compiled in hostname. In this case, let's revert to "linux" instead. */
+
+ if (is_localhost(FALLBACK_HOSTNAME))
+ return "linux";
+
+ return FALLBACK_HOSTNAME;
+}
+
+static int make_fallback_hostnames(char **full_hostname, char **llmnr_hostname, char **mdns_hostname) {
+ _cleanup_free_ char *n = NULL, *m = NULL;
+ char label[DNS_LABEL_MAX], *h;
+ const char *p;
+ int r;
+
+ assert(full_hostname);
+ assert(llmnr_hostname);
+ assert(mdns_hostname);
+
+ p = fallback_hostname();
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape fallback host name: %m");
+
+ assert(r > 0); /* The fallback hostname must have at least one label */
+
+ r = dns_label_escape_new(label, r, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to escape fallback hostname: %m");
+
+ r = dns_name_concat(n, "local", 0, &m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to concatenate mDNS hostname: %m");
+
+ h = strdup(fallback_hostname());
+ if (!h)
+ return log_oom();
+
+ *llmnr_hostname = TAKE_PTR(n);
+ *mdns_hostname = TAKE_PTR(m);
+
+ *full_hostname = h;
+
+ return 0;
+}
+
+static int on_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ _cleanup_free_ char *full_hostname = NULL, *llmnr_hostname = NULL, *mdns_hostname = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(m);
+
+ r = determine_hostname(&full_hostname, &llmnr_hostname, &mdns_hostname);
+ if (r < 0)
+ return 0; /* ignore invalid hostnames */
+
+ if (streq(full_hostname, m->full_hostname) &&
+ streq(llmnr_hostname, m->llmnr_hostname) &&
+ streq(mdns_hostname, m->mdns_hostname))
+ return 0;
+
+ log_info("System hostname changed to '%s'.", full_hostname);
+
+ free_and_replace(m->full_hostname, full_hostname);
+ free_and_replace(m->llmnr_hostname, llmnr_hostname);
+ free_and_replace(m->mdns_hostname, mdns_hostname);
+
+ manager_refresh_rrs(m);
+
+ return 0;
+}
+
+static int manager_watch_hostname(Manager *m) {
+ int r;
+
+ assert(m);
+
+ m->hostname_fd = open("/proc/sys/kernel/hostname",
+ O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (m->hostname_fd < 0) {
+ log_warning_errno(errno, "Failed to watch hostname: %m");
+ return 0;
+ }
+
+ r = sd_event_add_io(m->event, &m->hostname_event_source, m->hostname_fd, 0, on_hostname_change, m);
+ if (r < 0) {
+ if (r == -EPERM)
+ /* kernels prior to 3.2 don't support polling this file. Ignore the failure. */
+ m->hostname_fd = safe_close(m->hostname_fd);
+ else
+ return log_error_errno(r, "Failed to add hostname event source: %m");
+ }
+
+ (void) sd_event_source_set_description(m->hostname_event_source, "hostname");
+
+ r = determine_hostname(&m->full_hostname, &m->llmnr_hostname, &m->mdns_hostname);
+ if (r < 0) {
+ log_info("Defaulting to hostname '%s'.", fallback_hostname());
+
+ r = make_fallback_hostnames(&m->full_hostname, &m->llmnr_hostname, &m->mdns_hostname);
+ if (r < 0)
+ return r;
+ } else
+ log_info("Using system hostname '%s'.", m->full_hostname);
+
+ return 0;
+}
+
+static int manager_sigusr1(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ _cleanup_free_ char *buffer = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ Manager *m = userdata;
+ DnsServer *server;
+ size_t size = 0;
+ DnsScope *scope;
+ Iterator i;
+ Link *l;
+
+ assert(s);
+ assert(si);
+ assert(m);
+
+ f = open_memstream(&buffer, &size);
+ if (!f)
+ return log_oom();
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ LIST_FOREACH(scopes, scope, m->dns_scopes)
+ dns_scope_dump(scope, f);
+
+ LIST_FOREACH(servers, server, m->dns_servers)
+ dns_server_dump(server, f);
+ LIST_FOREACH(servers, server, m->fallback_dns_servers)
+ dns_server_dump(server, f);
+ HASHMAP_FOREACH(l, m->links, i)
+ LIST_FOREACH(servers, server, l->dns_servers)
+ dns_server_dump(server, f);
+
+ if (fflush_and_check(f) < 0)
+ return log_oom();
+
+ log_dump(LOG_INFO, buffer);
+ return 0;
+}
+
+static int manager_sigusr2(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *m = userdata;
+
+ assert(s);
+ assert(si);
+ assert(m);
+
+ manager_flush_caches(m);
+
+ return 0;
+}
+
+static int manager_sigrtmin1(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *m = userdata;
+
+ assert(s);
+ assert(si);
+ assert(m);
+
+ manager_reset_server_features(m);
+ return 0;
+}
+
+int manager_new(Manager **ret) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (Manager) {
+ .llmnr_ipv4_udp_fd = -1,
+ .llmnr_ipv6_udp_fd = -1,
+ .llmnr_ipv4_tcp_fd = -1,
+ .llmnr_ipv6_tcp_fd = -1,
+ .mdns_ipv4_fd = -1,
+ .mdns_ipv6_fd = -1,
+ .dns_stub_udp_fd = -1,
+ .dns_stub_tcp_fd = -1,
+ .hostname_fd = -1,
+
+ .llmnr_support = RESOLVE_SUPPORT_YES,
+ .mdns_support = RESOLVE_SUPPORT_YES,
+ .dnssec_mode = DEFAULT_DNSSEC_MODE,
+ .dns_over_tls_mode = DEFAULT_DNS_OVER_TLS_MODE,
+ .enable_cache = true,
+ .dns_stub_listener_mode = DNS_STUB_LISTENER_YES,
+ .read_resolv_conf = true,
+ .need_builtin_fallbacks = true,
+ .etc_hosts_last = USEC_INFINITY,
+ .etc_hosts_mtime = USEC_INFINITY,
+ .read_etc_hosts = true,
+ };
+
+ r = dns_trust_anchor_load(&m->trust_anchor);
+ if (r < 0)
+ return r;
+
+ r = manager_parse_config_file(m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse configuration file: %m");
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ (void) sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ r = manager_watch_hostname(m);
+ if (r < 0)
+ return r;
+
+ r = dnssd_load(m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to load DNS-SD configuration files: %m");
+
+ r = dns_scope_new(m, &m->unicast_scope, NULL, DNS_PROTOCOL_DNS, AF_UNSPEC);
+ if (r < 0)
+ return r;
+
+ r = manager_network_monitor_listen(m);
+ if (r < 0)
+ return r;
+
+ r = manager_rtnl_listen(m);
+ if (r < 0)
+ return r;
+
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_add_signal(m->event, &m->sigusr1_event_source, SIGUSR1, manager_sigusr1, m);
+ (void) sd_event_add_signal(m->event, &m->sigusr2_event_source, SIGUSR2, manager_sigusr2, m);
+ (void) sd_event_add_signal(m->event, &m->sigrtmin1_event_source, SIGRTMIN+1, manager_sigrtmin1, m);
+
+ manager_cleanup_saved_user(m);
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+int manager_start(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = manager_dns_stub_start(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+Manager *manager_free(Manager *m) {
+ Link *l;
+ DnssdService *s;
+
+ if (!m)
+ return NULL;
+
+ dns_server_unlink_all(m->dns_servers);
+ dns_server_unlink_all(m->fallback_dns_servers);
+ dns_search_domain_unlink_all(m->search_domains);
+
+ while ((l = hashmap_first(m->links)))
+ link_free(l);
+
+ while (m->dns_queries)
+ dns_query_free(m->dns_queries);
+
+ dns_scope_free(m->unicast_scope);
+
+ /* At this point only orphaned streams should remain. All others should have been freed already by their
+ * owners */
+ while (m->dns_streams)
+ dns_stream_unref(m->dns_streams);
+
+ hashmap_free(m->links);
+ hashmap_free(m->dns_transactions);
+
+ sd_event_source_unref(m->network_event_source);
+ sd_network_monitor_unref(m->network_monitor);
+
+ sd_netlink_unref(m->rtnl);
+ sd_event_source_unref(m->rtnl_event_source);
+
+ manager_llmnr_stop(m);
+ manager_mdns_stop(m);
+ manager_dns_stub_stop(m);
+
+ sd_bus_flush_close_unref(m->bus);
+
+ sd_event_source_unref(m->sigusr1_event_source);
+ sd_event_source_unref(m->sigusr2_event_source);
+ sd_event_source_unref(m->sigrtmin1_event_source);
+
+ sd_event_unref(m->event);
+
+ dns_resource_key_unref(m->llmnr_host_ipv4_key);
+ dns_resource_key_unref(m->llmnr_host_ipv6_key);
+ dns_resource_key_unref(m->mdns_host_ipv4_key);
+ dns_resource_key_unref(m->mdns_host_ipv6_key);
+
+ sd_event_source_unref(m->hostname_event_source);
+ safe_close(m->hostname_fd);
+
+ free(m->full_hostname);
+ free(m->llmnr_hostname);
+ free(m->mdns_hostname);
+
+ while ((s = hashmap_first(m->dnssd_services)))
+ dnssd_service_free(s);
+ hashmap_free(m->dnssd_services);
+
+ dns_trust_anchor_flush(&m->trust_anchor);
+ manager_etc_hosts_flush(m);
+
+ return mfree(m);
+}
+
+int manager_recv(Manager *m, int fd, DnsProtocol protocol, DnsPacket **ret) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ union {
+ struct cmsghdr header; /* For alignment */
+ uint8_t buffer[CMSG_SPACE(MAXSIZE(struct in_pktinfo, struct in6_pktinfo))
+ + CMSG_SPACE(int) /* ttl/hoplimit */
+ + EXTRA_CMSG_SPACE /* kernel appears to require extra buffer space */];
+ } control;
+ union sockaddr_union sa;
+ struct iovec iov;
+ struct msghdr mh = {
+ .msg_name = &sa.sa,
+ .msg_namelen = sizeof(sa),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ ssize_t ms, l;
+ int r;
+
+ assert(m);
+ assert(fd >= 0);
+ assert(ret);
+
+ ms = next_datagram_size_fd(fd);
+ if (ms < 0)
+ return ms;
+
+ r = dns_packet_new(&p, protocol, ms, DNS_PACKET_SIZE_MAX);
+ if (r < 0)
+ return r;
+
+ iov = IOVEC_MAKE(DNS_PACKET_DATA(p), p->allocated);
+
+ l = recvmsg(fd, &mh, 0);
+ if (l < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return -errno;
+ }
+ if (l == 0)
+ return 0;
+
+ assert(!(mh.msg_flags & MSG_CTRUNC));
+ assert(!(mh.msg_flags & MSG_TRUNC));
+
+ p->size = (size_t) l;
+
+ p->family = sa.sa.sa_family;
+ p->ipproto = IPPROTO_UDP;
+ if (p->family == AF_INET) {
+ p->sender.in = sa.in.sin_addr;
+ p->sender_port = be16toh(sa.in.sin_port);
+ } else if (p->family == AF_INET6) {
+ p->sender.in6 = sa.in6.sin6_addr;
+ p->sender_port = be16toh(sa.in6.sin6_port);
+ p->ifindex = sa.in6.sin6_scope_id;
+ } else
+ return -EAFNOSUPPORT;
+
+ CMSG_FOREACH(cmsg, &mh) {
+
+ if (cmsg->cmsg_level == IPPROTO_IPV6) {
+ assert(p->family == AF_INET6);
+
+ switch (cmsg->cmsg_type) {
+
+ case IPV6_PKTINFO: {
+ struct in6_pktinfo *i = (struct in6_pktinfo*) CMSG_DATA(cmsg);
+
+ if (p->ifindex <= 0)
+ p->ifindex = i->ipi6_ifindex;
+
+ p->destination.in6 = i->ipi6_addr;
+ break;
+ }
+
+ case IPV6_HOPLIMIT:
+ p->ttl = *(int *) CMSG_DATA(cmsg);
+ break;
+
+ }
+ } else if (cmsg->cmsg_level == IPPROTO_IP) {
+ assert(p->family == AF_INET);
+
+ switch (cmsg->cmsg_type) {
+
+ case IP_PKTINFO: {
+ struct in_pktinfo *i = (struct in_pktinfo*) CMSG_DATA(cmsg);
+
+ if (p->ifindex <= 0)
+ p->ifindex = i->ipi_ifindex;
+
+ p->destination.in = i->ipi_addr;
+ break;
+ }
+
+ case IP_TTL:
+ p->ttl = *(int *) CMSG_DATA(cmsg);
+ break;
+ }
+ }
+ }
+
+ /* The Linux kernel sets the interface index to the loopback
+ * device if the packet came from the local host since it
+ * avoids the routing table in such a case. Let's unset the
+ * interface index in such a case. */
+ if (p->ifindex == LOOPBACK_IFINDEX)
+ p->ifindex = 0;
+
+ if (protocol != DNS_PROTOCOL_DNS) {
+ /* If we don't know the interface index still, we look for the
+ * first local interface with a matching address. Yuck! */
+ if (p->ifindex <= 0)
+ p->ifindex = manager_find_ifindex(m, p->family, &p->destination);
+ }
+
+ *ret = TAKE_PTR(p);
+
+ return 1;
+}
+
+static int sendmsg_loop(int fd, struct msghdr *mh, int flags) {
+ int r;
+
+ assert(fd >= 0);
+ assert(mh);
+
+ for (;;) {
+ if (sendmsg(fd, mh, flags) >= 0)
+ return 0;
+
+ if (errno == EINTR)
+ continue;
+
+ if (errno != EAGAIN)
+ return -errno;
+
+ r = fd_wait_for_event(fd, POLLOUT, SEND_TIMEOUT_USEC);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+ }
+}
+
+static int write_loop(int fd, void *message, size_t length) {
+ int r;
+
+ assert(fd >= 0);
+ assert(message);
+
+ for (;;) {
+ if (write(fd, message, length) >= 0)
+ return 0;
+
+ if (errno == EINTR)
+ continue;
+
+ if (errno != EAGAIN)
+ return -errno;
+
+ r = fd_wait_for_event(fd, POLLOUT, SEND_TIMEOUT_USEC);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+ }
+}
+
+int manager_write(Manager *m, int fd, DnsPacket *p) {
+ int r;
+
+ log_debug("Sending %s packet with id %" PRIu16 ".", DNS_PACKET_QR(p) ? "response" : "query", DNS_PACKET_ID(p));
+
+ r = write_loop(fd, DNS_PACKET_DATA(p), p->size);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int manager_ipv4_send(
+ Manager *m,
+ int fd,
+ int ifindex,
+ const struct in_addr *destination,
+ uint16_t port,
+ const struct in_addr *source,
+ DnsPacket *p) {
+ union {
+ struct cmsghdr header; /* For alignment */
+ uint8_t buffer[CMSG_SPACE(sizeof(struct in_pktinfo))];
+ } control = {};
+ union sockaddr_union sa;
+ struct iovec iov;
+ struct msghdr mh = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_name = &sa.sa,
+ .msg_namelen = sizeof(sa.in),
+ };
+
+ assert(m);
+ assert(fd >= 0);
+ assert(destination);
+ assert(port > 0);
+ assert(p);
+
+ iov = IOVEC_MAKE(DNS_PACKET_DATA(p), p->size);
+
+ sa = (union sockaddr_union) {
+ .in.sin_family = AF_INET,
+ .in.sin_addr = *destination,
+ .in.sin_port = htobe16(port),
+ };
+
+ if (ifindex > 0) {
+ struct cmsghdr *cmsg;
+ struct in_pktinfo *pi;
+
+ mh.msg_control = &control;
+ mh.msg_controllen = CMSG_LEN(sizeof(struct in_pktinfo));
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_len = mh.msg_controllen;
+ cmsg->cmsg_level = IPPROTO_IP;
+ cmsg->cmsg_type = IP_PKTINFO;
+
+ pi = (struct in_pktinfo*) CMSG_DATA(cmsg);
+ pi->ipi_ifindex = ifindex;
+
+ if (source)
+ pi->ipi_spec_dst = *source;
+ }
+
+ return sendmsg_loop(fd, &mh, 0);
+}
+
+static int manager_ipv6_send(
+ Manager *m,
+ int fd,
+ int ifindex,
+ const struct in6_addr *destination,
+ uint16_t port,
+ const struct in6_addr *source,
+ DnsPacket *p) {
+
+ union {
+ struct cmsghdr header; /* For alignment */
+ uint8_t buffer[CMSG_SPACE(sizeof(struct in6_pktinfo))];
+ } control = {};
+ union sockaddr_union sa;
+ struct iovec iov;
+ struct msghdr mh = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_name = &sa.sa,
+ .msg_namelen = sizeof(sa.in6),
+ };
+
+ assert(m);
+ assert(fd >= 0);
+ assert(destination);
+ assert(port > 0);
+ assert(p);
+
+ iov = IOVEC_MAKE(DNS_PACKET_DATA(p), p->size);
+
+ sa = (union sockaddr_union) {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_addr = *destination,
+ .in6.sin6_port = htobe16(port),
+ .in6.sin6_scope_id = ifindex,
+ };
+
+ if (ifindex > 0) {
+ struct cmsghdr *cmsg;
+ struct in6_pktinfo *pi;
+
+ mh.msg_control = &control;
+ mh.msg_controllen = CMSG_LEN(sizeof(struct in6_pktinfo));
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_len = mh.msg_controllen;
+ cmsg->cmsg_level = IPPROTO_IPV6;
+ cmsg->cmsg_type = IPV6_PKTINFO;
+
+ pi = (struct in6_pktinfo*) CMSG_DATA(cmsg);
+ pi->ipi6_ifindex = ifindex;
+
+ if (source)
+ pi->ipi6_addr = *source;
+ }
+
+ return sendmsg_loop(fd, &mh, 0);
+}
+
+int manager_send(
+ Manager *m,
+ int fd,
+ int ifindex,
+ int family,
+ const union in_addr_union *destination,
+ uint16_t port,
+ const union in_addr_union *source,
+ DnsPacket *p) {
+
+ assert(m);
+ assert(fd >= 0);
+ assert(destination);
+ assert(port > 0);
+ assert(p);
+
+ log_debug("Sending %s packet with id %" PRIu16 " on interface %i/%s.", DNS_PACKET_QR(p) ? "response" : "query", DNS_PACKET_ID(p), ifindex, af_to_name(family));
+
+ if (family == AF_INET)
+ return manager_ipv4_send(m, fd, ifindex, &destination->in, port, source ? &source->in : NULL, p);
+ if (family == AF_INET6)
+ return manager_ipv6_send(m, fd, ifindex, &destination->in6, port, source ? &source->in6 : NULL, p);
+
+ return -EAFNOSUPPORT;
+}
+
+uint32_t manager_find_mtu(Manager *m) {
+ uint32_t mtu = 0;
+ Link *l;
+ Iterator i;
+
+ /* If we don't know on which link a DNS packet would be
+ * delivered, let's find the largest MTU that works on all
+ * interfaces we know of */
+
+ HASHMAP_FOREACH(l, m->links, i) {
+ if (l->mtu <= 0)
+ continue;
+
+ if (mtu <= 0 || l->mtu < mtu)
+ mtu = l->mtu;
+ }
+
+ return mtu;
+}
+
+int manager_find_ifindex(Manager *m, int family, const union in_addr_union *in_addr) {
+ LinkAddress *a;
+
+ assert(m);
+
+ a = manager_find_link_address(m, family, in_addr);
+ if (a)
+ return a->link->ifindex;
+
+ return 0;
+}
+
+void manager_refresh_rrs(Manager *m) {
+ Iterator i;
+ Link *l;
+ DnssdService *s;
+
+ assert(m);
+
+ m->llmnr_host_ipv4_key = dns_resource_key_unref(m->llmnr_host_ipv4_key);
+ m->llmnr_host_ipv6_key = dns_resource_key_unref(m->llmnr_host_ipv6_key);
+ m->mdns_host_ipv4_key = dns_resource_key_unref(m->mdns_host_ipv4_key);
+ m->mdns_host_ipv6_key = dns_resource_key_unref(m->mdns_host_ipv6_key);
+
+ if (m->mdns_support == RESOLVE_SUPPORT_YES)
+ HASHMAP_FOREACH(s, m->dnssd_services, i)
+ if (dnssd_update_rrs(s) < 0)
+ log_warning("Failed to refresh DNS-SD service '%s'", s->name);
+
+ HASHMAP_FOREACH(l, m->links, i) {
+ link_add_rrs(l, true);
+ link_add_rrs(l, false);
+ }
+}
+
+static int manager_next_random_name(const char *old, char **ret_new) {
+ const char *p;
+ uint64_t u, a;
+ char *n;
+
+ p = strchr(old, 0);
+ assert(p);
+
+ while (p > old) {
+ if (!strchr(DIGITS, p[-1]))
+ break;
+
+ p--;
+ }
+
+ if (*p == 0 || safe_atou64(p, &u) < 0 || u <= 0)
+ u = 1;
+
+ /* Add a random number to the old value. This way we can avoid
+ * that two hosts pick the same hostname, win on IPv4 and lose
+ * on IPv6 (or vice versa), and pick the same hostname
+ * replacement hostname, ad infinitum. We still want the
+ * numbers to go up monotonically, hence we just add a random
+ * value 1..10 */
+
+ random_bytes(&a, sizeof(a));
+ u += 1 + a % 10;
+
+ if (asprintf(&n, "%.*s%" PRIu64, (int) (p - old), old, u) < 0)
+ return -ENOMEM;
+
+ *ret_new = n;
+
+ return 0;
+}
+
+int manager_next_hostname(Manager *m) {
+ _cleanup_free_ char *h = NULL, *k = NULL;
+ int r;
+
+ assert(m);
+
+ r = manager_next_random_name(m->llmnr_hostname, &h);
+ if (r < 0)
+ return r;
+
+ r = dns_name_concat(h, "local", 0, &k);
+ if (r < 0)
+ return r;
+
+ log_info("Hostname conflict, changing published hostname from '%s' to '%s'.", m->llmnr_hostname, h);
+
+ free_and_replace(m->llmnr_hostname, h);
+ free_and_replace(m->mdns_hostname, k);
+
+ manager_refresh_rrs(m);
+
+ return 0;
+}
+
+LinkAddress* manager_find_link_address(Manager *m, int family, const union in_addr_union *in_addr) {
+ Iterator i;
+ Link *l;
+
+ assert(m);
+
+ HASHMAP_FOREACH(l, m->links, i) {
+ LinkAddress *a;
+
+ a = link_find_address(l, family, in_addr);
+ if (a)
+ return a;
+ }
+
+ return NULL;
+}
+
+bool manager_our_packet(Manager *m, DnsPacket *p) {
+ assert(m);
+ assert(p);
+
+ return !!manager_find_link_address(m, p->family, &p->sender);
+}
+
+DnsScope* manager_find_scope(Manager *m, DnsPacket *p) {
+ Link *l;
+
+ assert(m);
+ assert(p);
+
+ l = hashmap_get(m->links, INT_TO_PTR(p->ifindex));
+ if (!l)
+ return NULL;
+
+ switch (p->protocol) {
+ case DNS_PROTOCOL_LLMNR:
+ if (p->family == AF_INET)
+ return l->llmnr_ipv4_scope;
+ else if (p->family == AF_INET6)
+ return l->llmnr_ipv6_scope;
+
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ if (p->family == AF_INET)
+ return l->mdns_ipv4_scope;
+ else if (p->family == AF_INET6)
+ return l->mdns_ipv6_scope;
+
+ break;
+
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+void manager_verify_all(Manager *m) {
+ DnsScope *s;
+
+ assert(m);
+
+ LIST_FOREACH(scopes, s, m->dns_scopes)
+ dns_zone_verify_all(&s->zone);
+}
+
+int manager_is_own_hostname(Manager *m, const char *name) {
+ int r;
+
+ assert(m);
+ assert(name);
+
+ if (m->llmnr_hostname) {
+ r = dns_name_equal(name, m->llmnr_hostname);
+ if (r != 0)
+ return r;
+ }
+
+ if (m->mdns_hostname) {
+ r = dns_name_equal(name, m->mdns_hostname);
+ if (r != 0)
+ return r;
+ }
+
+ if (m->full_hostname)
+ return dns_name_equal(name, m->full_hostname);
+
+ return 0;
+}
+
+int manager_compile_dns_servers(Manager *m, OrderedSet **dns) {
+ DnsServer *s;
+ Iterator i;
+ Link *l;
+ int r;
+
+ assert(m);
+ assert(dns);
+
+ r = ordered_set_ensure_allocated(dns, &dns_server_hash_ops);
+ if (r < 0)
+ return r;
+
+ /* First add the system-wide servers and domains */
+ LIST_FOREACH(servers, s, m->dns_servers) {
+ r = ordered_set_put(*dns, s);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+
+ /* Then, add the per-link servers */
+ HASHMAP_FOREACH(l, m->links, i) {
+ LIST_FOREACH(servers, s, l->dns_servers) {
+ r = ordered_set_put(*dns, s);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* If we found nothing, add the fallback servers */
+ if (ordered_set_isempty(*dns)) {
+ LIST_FOREACH(servers, s, m->fallback_dns_servers) {
+ r = ordered_set_put(*dns, s);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/* filter_route is a tri-state:
+ * < 0: no filtering
+ * = 0 or false: return only domains which should be used for searching
+ * > 0 or true: return only domains which are for routing only
+ */
+int manager_compile_search_domains(Manager *m, OrderedSet **domains, int filter_route) {
+ DnsSearchDomain *d;
+ Iterator i;
+ Link *l;
+ int r;
+
+ assert(m);
+ assert(domains);
+
+ r = ordered_set_ensure_allocated(domains, &dns_name_hash_ops);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(domains, d, m->search_domains) {
+
+ if (filter_route >= 0 &&
+ d->route_only != !!filter_route)
+ continue;
+
+ r = ordered_set_put(*domains, d->name);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+
+ HASHMAP_FOREACH(l, m->links, i) {
+
+ LIST_FOREACH(domains, d, l->search_domains) {
+
+ if (filter_route >= 0 &&
+ d->route_only != !!filter_route)
+ continue;
+
+ r = ordered_set_put(*domains, d->name);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+DnssecMode manager_get_dnssec_mode(Manager *m) {
+ assert(m);
+
+ if (m->dnssec_mode != _DNSSEC_MODE_INVALID)
+ return m->dnssec_mode;
+
+ return DNSSEC_NO;
+}
+
+bool manager_dnssec_supported(Manager *m) {
+ DnsServer *server;
+ Iterator i;
+ Link *l;
+
+ assert(m);
+
+ if (manager_get_dnssec_mode(m) == DNSSEC_NO)
+ return false;
+
+ server = manager_get_dns_server(m);
+ if (server && !dns_server_dnssec_supported(server))
+ return false;
+
+ HASHMAP_FOREACH(l, m->links, i)
+ if (!link_dnssec_supported(l))
+ return false;
+
+ return true;
+}
+
+DnsOverTlsMode manager_get_dns_over_tls_mode(Manager *m) {
+ assert(m);
+
+ if (m->dns_over_tls_mode != _DNS_OVER_TLS_MODE_INVALID)
+ return m->dns_over_tls_mode;
+
+ return DNS_OVER_TLS_NO;
+}
+
+void manager_dnssec_verdict(Manager *m, DnssecVerdict verdict, const DnsResourceKey *key) {
+
+ assert(verdict >= 0);
+ assert(verdict < _DNSSEC_VERDICT_MAX);
+
+ if (DEBUG_LOGGING) {
+ char s[DNS_RESOURCE_KEY_STRING_MAX];
+
+ log_debug("Found verdict for lookup %s: %s",
+ dns_resource_key_to_string(key, s, sizeof s),
+ dnssec_verdict_to_string(verdict));
+ }
+
+ m->n_dnssec_verdict[verdict]++;
+}
+
+bool manager_routable(Manager *m, int family) {
+ Iterator i;
+ Link *l;
+
+ assert(m);
+
+ /* Returns true if the host has at least one interface with a routable address of the specified type */
+
+ HASHMAP_FOREACH(l, m->links, i)
+ if (link_relevant(l, family, false))
+ return true;
+
+ return false;
+}
+
+void manager_flush_caches(Manager *m) {
+ DnsScope *scope;
+
+ assert(m);
+
+ LIST_FOREACH(scopes, scope, m->dns_scopes)
+ dns_cache_flush(&scope->cache);
+
+ log_info("Flushed all caches.");
+}
+
+void manager_reset_server_features(Manager *m) {
+ Iterator i;
+ Link *l;
+
+ dns_server_reset_features_all(m->dns_servers);
+ dns_server_reset_features_all(m->fallback_dns_servers);
+
+ HASHMAP_FOREACH(l, m->links, i)
+ dns_server_reset_features_all(l->dns_servers);
+
+ log_info("Resetting learnt feature levels on all servers.");
+}
+
+void manager_cleanup_saved_user(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r;
+
+ assert(m);
+
+ /* Clean up all saved per-link files in /run/systemd/resolve/netif/ that don't have a matching interface
+ * anymore. These files are created to persist settings pushed in by the user via the bus, so that resolved can
+ * be restarted without losing this data. */
+
+ d = opendir("/run/systemd/resolve/netif/");
+ if (!d) {
+ if (errno == ENOENT)
+ return;
+
+ log_warning_errno(errno, "Failed to open interface directory: %m");
+ return;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, log_error_errno(errno, "Failed to read interface directory: %m")) {
+ _cleanup_free_ char *p = NULL;
+ int ifindex;
+ Link *l;
+
+ if (!IN_SET(de->d_type, DT_UNKNOWN, DT_REG))
+ continue;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ r = parse_ifindex(de->d_name, &ifindex);
+ if (r < 0) /* Probably some temporary file from a previous run. Delete it */
+ goto rm;
+
+ l = hashmap_get(m->links, INT_TO_PTR(ifindex));
+ if (!l) /* link vanished */
+ goto rm;
+
+ if (l->is_managed) /* now managed by networkd, hence the bus settings are useless */
+ goto rm;
+
+ continue;
+
+ rm:
+ p = strappend("/run/systemd/resolve/netif/", de->d_name);
+ if (!p) {
+ log_oom();
+ return;
+ }
+
+ (void) unlink(p);
+ }
+}
+
+bool manager_next_dnssd_names(Manager *m) {
+ Iterator i;
+ DnssdService *s;
+ bool tried = false;
+ int r;
+
+ assert(m);
+
+ HASHMAP_FOREACH(s, m->dnssd_services, i) {
+ _cleanup_free_ char * new_name = NULL;
+
+ if (!s->withdrawn)
+ continue;
+
+ r = manager_next_random_name(s->name_template, &new_name);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to get new name for service '%s': %m", s->name);
+ continue;
+ }
+
+ free_and_replace(s->name_template, new_name);
+
+ s->withdrawn = false;
+
+ tried = true;
+ }
+
+ if (tried)
+ manager_refresh_rrs(m);
+
+ return tried;
+}
diff --git a/src/resolve/resolved-manager.h b/src/resolve/resolved-manager.h
new file mode 100644
index 0000000..06c76f6
--- /dev/null
+++ b/src/resolve/resolved-manager.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-event.h"
+#include "sd-netlink.h"
+#include "sd-network.h"
+
+#include "hashmap.h"
+#include "list.h"
+#include "ordered-set.h"
+#include "resolve-util.h"
+
+typedef struct Manager Manager;
+
+#include "resolved-conf.h"
+#include "resolved-dns-query.h"
+#include "resolved-dns-search-domain.h"
+#include "resolved-dns-server.h"
+#include "resolved-dns-stream.h"
+#include "resolved-dns-trust-anchor.h"
+#include "resolved-link.h"
+
+#define MANAGER_SEARCH_DOMAINS_MAX 256
+#define MANAGER_DNS_SERVERS_MAX 256
+
+typedef struct EtcHosts {
+ Hashmap *by_address;
+ Hashmap *by_name;
+ Set *no_address;
+} EtcHosts;
+
+struct Manager {
+ sd_event *event;
+
+ ResolveSupport llmnr_support;
+ ResolveSupport mdns_support;
+ DnssecMode dnssec_mode;
+ DnsOverTlsMode dns_over_tls_mode;
+ bool enable_cache;
+ DnsStubListenerMode dns_stub_listener_mode;
+
+ /* Network */
+ Hashmap *links;
+
+ sd_netlink *rtnl;
+ sd_event_source *rtnl_event_source;
+
+ sd_network_monitor *network_monitor;
+ sd_event_source *network_event_source;
+
+ /* DNS query management */
+ Hashmap *dns_transactions;
+ LIST_HEAD(DnsQuery, dns_queries);
+ unsigned n_dns_queries;
+
+ LIST_HEAD(DnsStream, dns_streams);
+ unsigned n_dns_streams;
+
+ /* Unicast dns */
+ LIST_HEAD(DnsServer, dns_servers);
+ LIST_HEAD(DnsServer, fallback_dns_servers);
+ unsigned n_dns_servers; /* counts both main and fallback */
+ DnsServer *current_dns_server;
+
+ LIST_HEAD(DnsSearchDomain, search_domains);
+ unsigned n_search_domains;
+
+ bool need_builtin_fallbacks:1;
+
+ bool read_resolv_conf:1;
+ usec_t resolv_conf_mtime;
+
+ DnsTrustAnchor trust_anchor;
+
+ LIST_HEAD(DnsScope, dns_scopes);
+ DnsScope *unicast_scope;
+
+ /* LLMNR */
+ int llmnr_ipv4_udp_fd;
+ int llmnr_ipv6_udp_fd;
+ int llmnr_ipv4_tcp_fd;
+ int llmnr_ipv6_tcp_fd;
+
+ sd_event_source *llmnr_ipv4_udp_event_source;
+ sd_event_source *llmnr_ipv6_udp_event_source;
+ sd_event_source *llmnr_ipv4_tcp_event_source;
+ sd_event_source *llmnr_ipv6_tcp_event_source;
+
+ /* mDNS */
+ int mdns_ipv4_fd;
+ int mdns_ipv6_fd;
+
+ /* DNS-SD */
+ Hashmap *dnssd_services;
+
+ sd_event_source *mdns_ipv4_event_source;
+ sd_event_source *mdns_ipv6_event_source;
+
+ /* dbus */
+ sd_bus *bus;
+
+ /* The hostname we publish on LLMNR and mDNS */
+ char *full_hostname;
+ char *llmnr_hostname;
+ char *mdns_hostname;
+ DnsResourceKey *llmnr_host_ipv4_key;
+ DnsResourceKey *llmnr_host_ipv6_key;
+ DnsResourceKey *mdns_host_ipv4_key;
+ DnsResourceKey *mdns_host_ipv6_key;
+
+ /* Watch the system hostname */
+ int hostname_fd;
+ sd_event_source *hostname_event_source;
+
+ sd_event_source *sigusr1_event_source;
+ sd_event_source *sigusr2_event_source;
+ sd_event_source *sigrtmin1_event_source;
+
+ unsigned n_transactions_total;
+ unsigned n_dnssec_verdict[_DNSSEC_VERDICT_MAX];
+
+ /* Data from /etc/hosts */
+ EtcHosts etc_hosts;
+ usec_t etc_hosts_last, etc_hosts_mtime;
+ bool read_etc_hosts;
+
+ /* Local DNS stub on 127.0.0.53:53 */
+ int dns_stub_udp_fd;
+ int dns_stub_tcp_fd;
+
+ sd_event_source *dns_stub_udp_event_source;
+ sd_event_source *dns_stub_tcp_event_source;
+
+ Hashmap *polkit_registry;
+};
+
+/* Manager */
+
+int manager_new(Manager **ret);
+Manager* manager_free(Manager *m);
+
+int manager_start(Manager *m);
+
+uint32_t manager_find_mtu(Manager *m);
+
+int manager_write(Manager *m, int fd, DnsPacket *p);
+int manager_send(Manager *m, int fd, int ifindex, int family, const union in_addr_union *destination, uint16_t port, const union in_addr_union *source, DnsPacket *p);
+int manager_recv(Manager *m, int fd, DnsProtocol protocol, DnsPacket **ret);
+
+int manager_find_ifindex(Manager *m, int family, const union in_addr_union *in_addr);
+LinkAddress* manager_find_link_address(Manager *m, int family, const union in_addr_union *in_addr);
+
+void manager_refresh_rrs(Manager *m);
+int manager_next_hostname(Manager *m);
+
+bool manager_our_packet(Manager *m, DnsPacket *p);
+DnsScope* manager_find_scope(Manager *m, DnsPacket *p);
+
+void manager_verify_all(Manager *m);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+#define EXTRA_CMSG_SPACE 1024
+
+int manager_is_own_hostname(Manager *m, const char *name);
+
+int manager_compile_dns_servers(Manager *m, OrderedSet **servers);
+int manager_compile_search_domains(Manager *m, OrderedSet **domains, int filter_route);
+
+DnssecMode manager_get_dnssec_mode(Manager *m);
+bool manager_dnssec_supported(Manager *m);
+
+DnsOverTlsMode manager_get_dns_over_tls_mode(Manager *m);
+
+void manager_dnssec_verdict(Manager *m, DnssecVerdict verdict, const DnsResourceKey *key);
+
+bool manager_routable(Manager *m, int family);
+
+void manager_flush_caches(Manager *m);
+void manager_reset_server_features(Manager *m);
+
+void manager_cleanup_saved_user(Manager *m);
+
+bool manager_next_dnssd_names(Manager *m);
diff --git a/src/resolve/resolved-mdns.c b/src/resolve/resolved-mdns.c
new file mode 100644
index 0000000..89c2497
--- /dev/null
+++ b/src/resolve/resolved-mdns.c
@@ -0,0 +1,481 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <resolv.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "resolved-manager.h"
+#include "resolved-mdns.h"
+
+#define CLEAR_CACHE_FLUSH(x) (~MDNS_RR_CACHE_FLUSH & (x))
+
+void manager_mdns_stop(Manager *m) {
+ assert(m);
+
+ m->mdns_ipv4_event_source = sd_event_source_unref(m->mdns_ipv4_event_source);
+ m->mdns_ipv4_fd = safe_close(m->mdns_ipv4_fd);
+
+ m->mdns_ipv6_event_source = sd_event_source_unref(m->mdns_ipv6_event_source);
+ m->mdns_ipv6_fd = safe_close(m->mdns_ipv6_fd);
+}
+
+int manager_mdns_start(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->mdns_support == RESOLVE_SUPPORT_NO)
+ return 0;
+
+ r = manager_mdns_ipv4_fd(m);
+ if (r == -EADDRINUSE)
+ goto eaddrinuse;
+ if (r < 0)
+ return r;
+
+ if (socket_ipv6_is_supported()) {
+ r = manager_mdns_ipv6_fd(m);
+ if (r == -EADDRINUSE)
+ goto eaddrinuse;
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+
+eaddrinuse:
+ log_warning("Another mDNS responder prohibits binding the socket to the same port. Turning off mDNS support.");
+ m->mdns_support = RESOLVE_SUPPORT_NO;
+ manager_mdns_stop(m);
+
+ return 0;
+}
+
+static int mdns_rr_compare(DnsResourceRecord * const *a, DnsResourceRecord * const *b) {
+ DnsResourceRecord *x = *(DnsResourceRecord **) a, *y = *(DnsResourceRecord **) b;
+ size_t m;
+ int r;
+
+ assert(x);
+ assert(y);
+
+ r = CMP(CLEAR_CACHE_FLUSH(x->key->class), CLEAR_CACHE_FLUSH(y->key->class));
+ if (r != 0)
+ return r;
+
+ r = CMP(x->key->type, y->key->type);
+ if (r != 0)
+ return r;
+
+ r = dns_resource_record_to_wire_format(x, false);
+ if (r < 0) {
+ log_warning_errno(r, "Can't wire-format RR: %m");
+ return 0;
+ }
+
+ r = dns_resource_record_to_wire_format(y, false);
+ if (r < 0) {
+ log_warning_errno(r, "Can't wire-format RR: %m");
+ return 0;
+ }
+
+ m = MIN(DNS_RESOURCE_RECORD_RDATA_SIZE(x), DNS_RESOURCE_RECORD_RDATA_SIZE(y));
+
+ r = memcmp(DNS_RESOURCE_RECORD_RDATA(x), DNS_RESOURCE_RECORD_RDATA(y), m);
+ if (r != 0)
+ return r;
+
+ return CMP(DNS_RESOURCE_RECORD_RDATA_SIZE(x), DNS_RESOURCE_RECORD_RDATA_SIZE(y));
+}
+
+static int proposed_rrs_cmp(DnsResourceRecord **x, unsigned x_size, DnsResourceRecord **y, unsigned y_size) {
+ unsigned m;
+ int r;
+
+ m = MIN(x_size, y_size);
+ for (unsigned i = 0; i < m; i++) {
+ r = mdns_rr_compare(&x[i], &y[i]);
+ if (r != 0)
+ return r;
+ }
+
+ return CMP(x_size, y_size);
+}
+
+static int mdns_packet_extract_matching_rrs(DnsPacket *p, DnsResourceKey *key, DnsResourceRecord ***ret_rrs) {
+ _cleanup_free_ DnsResourceRecord **list = NULL;
+ unsigned n = 0, size = 0;
+ int r;
+
+ assert(p);
+ assert(key);
+ assert(ret_rrs);
+ assert_return(DNS_PACKET_NSCOUNT(p) > 0, -EINVAL);
+
+ for (size_t i = DNS_PACKET_ANCOUNT(p); i < (DNS_PACKET_ANCOUNT(p) + DNS_PACKET_NSCOUNT(p)); i++) {
+ r = dns_resource_key_match_rr(key, p->answer->items[i].rr, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ size++;
+ }
+
+ if (size == 0)
+ return 0;
+
+ list = new(DnsResourceRecord *, size);
+ if (!list)
+ return -ENOMEM;
+
+ for (size_t i = DNS_PACKET_ANCOUNT(p); i < (DNS_PACKET_ANCOUNT(p) + DNS_PACKET_NSCOUNT(p)); i++) {
+ r = dns_resource_key_match_rr(key, p->answer->items[i].rr, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ list[n++] = p->answer->items[i].rr;
+ }
+ assert(n == size);
+ typesafe_qsort(list, size, mdns_rr_compare);
+
+ *ret_rrs = TAKE_PTR(list);
+
+ return size;
+}
+
+static int mdns_do_tiebreak(DnsResourceKey *key, DnsAnswer *answer, DnsPacket *p) {
+ _cleanup_free_ DnsResourceRecord **our = NULL, **remote = NULL;
+ DnsResourceRecord *rr;
+ size_t i = 0, size;
+ int r;
+
+ size = dns_answer_size(answer);
+ our = new(DnsResourceRecord *, size);
+ if (!our)
+ return -ENOMEM;
+
+ DNS_ANSWER_FOREACH(rr, answer)
+ our[i++] = rr;
+
+ typesafe_qsort(our, size, mdns_rr_compare);
+
+ r = mdns_packet_extract_matching_rrs(p, key, &remote);
+ if (r < 0)
+ return r;
+
+ assert(r > 0);
+
+ if (proposed_rrs_cmp(remote, r, our, size) > 0)
+ return 1;
+
+ return 0;
+}
+
+static int mdns_scope_process_query(DnsScope *s, DnsPacket *p) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *full_answer = NULL;
+ _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
+ DnsResourceKey *key = NULL;
+ DnsResourceRecord *rr;
+ bool tentative = false;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ r = dns_packet_extract(p);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to extract resource records from incoming packet: %m");
+
+ assert_return((dns_question_size(p->question) > 0), -EINVAL);
+
+ DNS_QUESTION_FOREACH(key, p->question) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL, *soa = NULL;
+
+ r = dns_zone_lookup(&s->zone, key, 0, &answer, &soa, &tentative);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to lookup key: %m");
+
+ if (tentative && DNS_PACKET_NSCOUNT(p) > 0) {
+ /*
+ * A race condition detected with the probe packet from
+ * a remote host.
+ * Do simultaneous probe tiebreaking as described in
+ * RFC 6762, Section 8.2. In case we lost don't reply
+ * the question and withdraw conflicting RRs.
+ */
+ r = mdns_do_tiebreak(key, answer, p);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to do tiebreaking");
+
+ if (r > 0) { /* we lost */
+ DNS_ANSWER_FOREACH(rr, answer) {
+ DnsZoneItem *i;
+
+ i = dns_zone_get(&s->zone, rr);
+ if (i)
+ dns_zone_item_conflict(i);
+ }
+
+ continue;
+ }
+ }
+
+ r = dns_answer_extend(&full_answer, answer);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to extend answer: %m");
+ }
+
+ if (dns_answer_isempty(full_answer))
+ return 0;
+
+ r = dns_scope_make_reply_packet(s, DNS_PACKET_ID(p), DNS_RCODE_SUCCESS, NULL, full_answer, NULL, false, &reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to build reply packet: %m");
+
+ if (!ratelimit_below(&s->ratelimit))
+ return 0;
+
+ r = dns_scope_emit_udp(s, -1, reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to send reply packet: %m");
+
+ return 0;
+}
+
+static int on_mdns_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ Manager *m = userdata;
+ DnsScope *scope;
+ int r;
+
+ r = manager_recv(m, fd, DNS_PROTOCOL_MDNS, &p);
+ if (r <= 0)
+ return r;
+
+ if (manager_our_packet(m, p))
+ return 0;
+
+ scope = manager_find_scope(m, p);
+ if (!scope) {
+ log_debug("Got mDNS UDP packet on unknown scope. Ignoring.");
+ return 0;
+ }
+
+ if (dns_packet_validate_reply(p) > 0) {
+ DnsResourceRecord *rr;
+
+ log_debug("Got mDNS reply packet");
+
+ /*
+ * mDNS is different from regular DNS and LLMNR with regard to handling responses.
+ * While on other protocols, we can ignore every answer that doesn't match a question
+ * we broadcast earlier, RFC6762, section 18.1 recommends looking at and caching all
+ * incoming information, regardless of the DNS packet ID.
+ *
+ * Hence, extract the packet here, and try to find a transaction for answer the we got
+ * and complete it. Also store the new information in scope's cache.
+ */
+ r = dns_packet_extract(p);
+ if (r < 0) {
+ log_debug("mDNS packet extraction failed.");
+ return 0;
+ }
+
+ dns_scope_check_conflicts(scope, p);
+
+ DNS_ANSWER_FOREACH(rr, p->answer) {
+ const char *name = dns_resource_key_name(rr->key);
+ DnsTransaction *t;
+
+ /* If the received reply packet contains ANY record that is not .local or .in-addr.arpa,
+ * we assume someone's playing tricks on us and discard the packet completely. */
+ if (!(dns_name_endswith(name, "in-addr.arpa") > 0 ||
+ dns_name_endswith(name, "local") > 0))
+ return 0;
+
+ if (rr->ttl == 0) {
+ log_debug("Got a goodbye packet");
+ /* See the section 10.1 of RFC6762 */
+ rr->ttl = 1;
+ }
+
+ t = dns_scope_find_transaction(scope, rr->key, false);
+ if (t)
+ dns_transaction_process_reply(t, p);
+
+ /* Also look for the various types of ANY transactions */
+ t = dns_scope_find_transaction(scope, &DNS_RESOURCE_KEY_CONST(rr->key->class, DNS_TYPE_ANY, dns_resource_key_name(rr->key)), false);
+ if (t)
+ dns_transaction_process_reply(t, p);
+
+ t = dns_scope_find_transaction(scope, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_ANY, rr->key->type, dns_resource_key_name(rr->key)), false);
+ if (t)
+ dns_transaction_process_reply(t, p);
+
+ t = dns_scope_find_transaction(scope, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_ANY, DNS_TYPE_ANY, dns_resource_key_name(rr->key)), false);
+ if (t)
+ dns_transaction_process_reply(t, p);
+ }
+
+ dns_cache_put(&scope->cache, NULL, DNS_PACKET_RCODE(p), p->answer, false, (uint32_t) -1, 0, p->family, &p->sender);
+
+ } else if (dns_packet_validate_query(p) > 0) {
+ log_debug("Got mDNS query packet for id %u", DNS_PACKET_ID(p));
+
+ r = mdns_scope_process_query(scope, p);
+ if (r < 0) {
+ log_debug_errno(r, "mDNS query processing failed: %m");
+ return 0;
+ }
+ } else
+ log_debug("Invalid mDNS UDP packet.");
+
+ return 0;
+}
+
+int manager_mdns_ipv4_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(MDNS_PORT),
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(m);
+
+ if (m->mdns_ipv4_fd >= 0)
+ return m->mdns_ipv4_fd;
+
+ s = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return log_error_errno(errno, "mDNS-IPv4: Failed to create socket: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_TTL, 255);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set IP_TTL: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_MULTICAST_TTL, 255);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set IP_MULTICAST_TTL: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_MULTICAST_LOOP, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set IP_MULTICAST_LOOP: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_PKTINFO, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set IP_PKTINFO: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_RECVTTL, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set IP_RECVTTL: %m");
+
+ /* Disable Don't-Fragment bit in the IP header */
+ r = setsockopt_int(s, IPPROTO_IP, IP_MTU_DISCOVER, IP_PMTUDISC_DONT);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set IP_MTU_DISCOVER: %m");
+
+ /* See the section 15.1 of RFC6762 */
+ /* first try to bind without SO_REUSEADDR to detect another mDNS responder */
+ r = bind(s, &sa.sa, sizeof(sa.in));
+ if (r < 0) {
+ if (errno != EADDRINUSE)
+ return log_error_errno(errno, "mDNS-IPv4: Failed to bind socket: %m");
+
+ log_warning("mDNS-IPv4: There appears to be another mDNS responder running, or previously systemd-resolved crashed with some outstanding transfers.");
+
+ /* try again with SO_REUSEADDR */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set SO_REUSEADDR: %m");
+
+ r = bind(s, &sa.sa, sizeof(sa.in));
+ if (r < 0)
+ return log_error_errno(errno, "mDNS-IPv4: Failed to bind socket: %m");
+ } else {
+ /* enable SO_REUSEADDR for the case that the user really wants multiple mDNS responders */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set SO_REUSEADDR: %m");
+ }
+
+ r = sd_event_add_io(m->event, &m->mdns_ipv4_event_source, s, EPOLLIN, on_mdns_packet, m);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to create event source: %m");
+
+ return m->mdns_ipv4_fd = TAKE_FD(s);
+}
+
+int manager_mdns_ipv6_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_port = htobe16(MDNS_PORT),
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(m);
+
+ if (m->mdns_ipv6_fd >= 0)
+ return m->mdns_ipv6_fd;
+
+ s = socket(AF_INET6, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return log_error_errno(errno, "mDNS-IPv6: Failed to create socket: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 255);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set IPV6_UNICAST_HOPS: %m");
+
+ /* RFC 4795, section 2.5 recommends setting the TTL of UDP packets to 255. */
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 255);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set IPV6_MULTICAST_HOPS: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set IPV6_MULTICAST_LOOP: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_V6ONLY, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set IPV6_V6ONLY: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set IPV6_RECVPKTINFO: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set IPV6_RECVHOPLIMIT: %m");
+
+ /* See the section 15.1 of RFC6762 */
+ /* first try to bind without SO_REUSEADDR to detect another mDNS responder */
+ r = bind(s, &sa.sa, sizeof(sa.in6));
+ if (r < 0) {
+ if (errno != EADDRINUSE)
+ return log_error_errno(errno, "mDNS-IPv6: Failed to bind socket: %m");
+
+ log_warning("mDNS-IPv6: There appears to be another mDNS responder running, or previously systemd-resolved crashed with some outstanding transfers.");
+
+ /* try again with SO_REUSEADDR */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set SO_REUSEADDR: %m");
+
+ r = bind(s, &sa.sa, sizeof(sa.in6));
+ if (r < 0)
+ return log_error_errno(errno, "mDNS-IPv6: Failed to bind socket: %m");
+ } else {
+ /* enable SO_REUSEADDR for the case that the user really wants multiple mDNS responders */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set SO_REUSEADDR: %m");
+ }
+
+ r = sd_event_add_io(m->event, &m->mdns_ipv6_event_source, s, EPOLLIN, on_mdns_packet, m);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to create event source: %m");
+
+ return m->mdns_ipv6_fd = TAKE_FD(s);
+}
diff --git a/src/resolve/resolved-mdns.h b/src/resolve/resolved-mdns.h
new file mode 100644
index 0000000..2f69478
--- /dev/null
+++ b/src/resolve/resolved-mdns.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "resolved-manager.h"
+
+#define MDNS_PORT 5353
+#define MDNS_ANNOUNCE_DELAY (1 * USEC_PER_SEC)
+
+int manager_mdns_ipv4_fd(Manager *m);
+int manager_mdns_ipv6_fd(Manager *m);
+
+void manager_mdns_stop(Manager *m);
+int manager_mdns_start(Manager *m);
diff --git a/src/resolve/resolved-resolv-conf.c b/src/resolve/resolved-resolv-conf.c
new file mode 100644
index 0000000..5205071
--- /dev/null
+++ b/src/resolve/resolved-resolv-conf.c
@@ -0,0 +1,400 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <resolv.h>
+#include <stdio_ext.h>
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "ordered-set.h"
+#include "resolved-conf.h"
+#include "resolved-dns-server.h"
+#include "resolved-resolv-conf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util-label.h"
+
+/* A resolv.conf file containing the DNS server and domain data we learnt from uplink, i.e. the full uplink data */
+#define PRIVATE_UPLINK_RESOLV_CONF "/run/systemd/resolve/resolv.conf"
+
+/* A resolv.conf file containing the domain data we learnt from uplink, but our own DNS server address. */
+#define PRIVATE_STUB_RESOLV_CONF "/run/systemd/resolve/stub-resolv.conf"
+
+/* A static resolv.conf file containing no domains, but only our own DNS server address */
+#define PRIVATE_STATIC_RESOLV_CONF ROOTLIBEXECDIR "/resolv.conf"
+
+int manager_check_resolv_conf(const Manager *m) {
+ const char *path;
+ struct stat st;
+ int r;
+
+ assert(m);
+
+ /* This warns only when our stub listener is disabled and /etc/resolv.conf is a symlink to
+ * PRIVATE_STATIC_RESOLV_CONF or PRIVATE_STUB_RESOLV_CONF. */
+
+ if (m->dns_stub_listener_mode != DNS_STUB_LISTENER_NO)
+ return 0;
+
+ r = stat("/etc/resolv.conf", &st);
+ if (r < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to stat /etc/resolv.conf: %m");
+ }
+
+ FOREACH_STRING(path,
+ PRIVATE_STUB_RESOLV_CONF,
+ PRIVATE_STATIC_RESOLV_CONF) {
+
+ struct stat own;
+
+ /* Is it symlinked to our own uplink file? */
+ if (stat(path, &own) >= 0 &&
+ st.st_dev == own.st_dev &&
+ st.st_ino == own.st_ino) {
+ log_warning("DNSStubListener= is disabled, but /etc/resolv.conf is a symlink to %s "
+ "which expects DNSStubListener= to be enabled.", path);
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static bool file_is_our_own(const struct stat *st) {
+ const char *path;
+
+ assert(st);
+
+ FOREACH_STRING(path,
+ PRIVATE_UPLINK_RESOLV_CONF,
+ PRIVATE_STUB_RESOLV_CONF,
+ PRIVATE_STATIC_RESOLV_CONF) {
+
+ struct stat own;
+
+ /* Is it symlinked to our own uplink file? */
+ if (stat(path, &own) >= 0 &&
+ st->st_dev == own.st_dev &&
+ st->st_ino == own.st_ino)
+ return true;
+ }
+
+ return false;
+}
+
+int manager_read_resolv_conf(Manager *m) {
+ _cleanup_fclose_ FILE *f = NULL;
+ struct stat st;
+ unsigned n = 0;
+ int r;
+
+ assert(m);
+
+ /* Reads the system /etc/resolv.conf, if it exists and is not
+ * symlinked to our own resolv.conf instance */
+
+ if (!m->read_resolv_conf)
+ return 0;
+
+ r = stat("/etc/resolv.conf", &st);
+ if (r < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ r = log_warning_errno(errno, "Failed to stat /etc/resolv.conf: %m");
+ goto clear;
+ }
+
+ /* Have we already seen the file? */
+ if (timespec_load(&st.st_mtim) == m->resolv_conf_mtime)
+ return 0;
+
+ if (file_is_our_own(&st))
+ return 0;
+
+ f = fopen("/etc/resolv.conf", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ r = log_warning_errno(errno, "Failed to open /etc/resolv.conf: %m");
+ goto clear;
+ }
+
+ if (fstat(fileno(f), &st) < 0) {
+ r = log_error_errno(errno, "Failed to stat open file: %m");
+ goto clear;
+ }
+
+ if (file_is_our_own(&st))
+ return 0;
+
+ dns_server_mark_all(m->dns_servers);
+ dns_search_domain_mark_all(m->search_domains);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *a;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read /etc/resolv.conf: %m");
+ goto clear;
+ }
+ if (r == 0)
+ break;
+
+ n++;
+
+ l = strstrip(line);
+ if (IN_SET(*l, '#', ';', 0))
+ continue;
+
+ a = first_word(l, "nameserver");
+ if (a) {
+ r = manager_parse_dns_server_string_and_warn(m, DNS_SERVER_SYSTEM, a);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse DNS server address '%s', ignoring.", a);
+
+ continue;
+ }
+
+ a = first_word(l, "domain");
+ if (!a) /* We treat "domain" lines, and "search" lines as equivalent, and add both to our list. */
+ a = first_word(l, "search");
+ if (a) {
+ r = manager_parse_search_domains_and_warn(m, a);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse search domain string '%s', ignoring.", a);
+ }
+
+ log_syntax(NULL, LOG_DEBUG, "/etc/resolv.conf", n, 0, "Ignoring resolv.conf line: %s", l);
+ }
+
+ m->resolv_conf_mtime = timespec_load(&st.st_mtim);
+
+ /* Flush out all servers and search domains that are still
+ * marked. Those are then ones that didn't appear in the new
+ * /etc/resolv.conf */
+ dns_server_unlink_marked(m->dns_servers);
+ dns_search_domain_unlink_marked(m->search_domains);
+
+ /* Whenever /etc/resolv.conf changes, start using the first
+ * DNS server of it. This is useful to deal with broken
+ * network managing implementations (like NetworkManager),
+ * that when connecting to a VPN place both the VPN DNS
+ * servers and the local ones in /etc/resolv.conf. Without
+ * resetting the DNS server to use back to the first entry we
+ * will continue to use the local one thus being unable to
+ * resolve VPN domains. */
+ manager_set_dns_server(m, m->dns_servers);
+
+ /* Unconditionally flush the cache when /etc/resolv.conf is
+ * modified, even if the data it contained was completely
+ * identical to the previous version we used. We do this
+ * because altering /etc/resolv.conf is typically done when
+ * the network configuration changes, and that should be
+ * enough to flush the global unicast DNS cache. */
+ if (m->unicast_scope)
+ dns_cache_flush(&m->unicast_scope->cache);
+
+ /* If /etc/resolv.conf changed, make sure to forget everything we learned about the DNS servers. After all we
+ * might now talk to a very different DNS server that just happens to have the same IP address as an old one
+ * (think 192.168.1.1). */
+ dns_server_reset_features_all(m->dns_servers);
+
+ return 0;
+
+clear:
+ dns_server_unlink_all(m->dns_servers);
+ dns_search_domain_unlink_all(m->search_domains);
+ return r;
+}
+
+static void write_resolv_conf_server(DnsServer *s, FILE *f, unsigned *count) {
+ DnsScope *scope;
+
+ assert(s);
+ assert(f);
+ assert(count);
+
+ if (!dns_server_string(s)) {
+ log_warning("Out of memory, or invalid DNS address. Ignoring server.");
+ return;
+ }
+
+ /* Check if the scope this DNS server belongs to is suitable as 'default' route for lookups; resolv.conf does
+ * not have a syntax to express that, so it must not appear as a global name server to avoid routing unrelated
+ * domains to it (which is a privacy violation, will most probably fail anyway, and adds unnecessary load) */
+ scope = dns_server_scope(s);
+ if (scope && !dns_scope_is_default_route(scope)) {
+ log_debug("Scope of DNS server %s has only route-only domains, not using as global name server", dns_server_string(s));
+ return;
+ }
+
+ if (*count == MAXNS)
+ fputs("# Too many DNS servers configured, the following entries may be ignored.\n", f);
+ (*count)++;
+
+ fprintf(f, "nameserver %s\n", dns_server_string(s));
+}
+
+static void write_resolv_conf_search(
+ OrderedSet *domains,
+ FILE *f) {
+ unsigned length = 0, count = 0;
+ Iterator i;
+ char *domain;
+
+ assert(domains);
+ assert(f);
+
+ fputs("search", f);
+
+ ORDERED_SET_FOREACH(domain, domains, i) {
+ if (++count > MAXDNSRCH) {
+ fputs("\n# Too many search domains configured, remaining ones ignored.", f);
+ break;
+ }
+ length += strlen(domain) + 1;
+ if (length > 256) {
+ fputs("\n# Total length of all search domains is too long, remaining ones ignored.", f);
+ break;
+ }
+ fputc(' ', f);
+ fputs(domain, f);
+ }
+
+ fputs("\n", f);
+}
+
+static int write_uplink_resolv_conf_contents(FILE *f, OrderedSet *dns, OrderedSet *domains) {
+ Iterator i;
+
+ fputs("# This file is managed by man:systemd-resolved(8). Do not edit.\n"
+ "#\n"
+ "# This is a dynamic resolv.conf file for connecting local clients directly to\n"
+ "# all known uplink DNS servers. This file lists all configured search domains.\n"
+ "#\n"
+ "# Third party programs must not access this file directly, but only through the\n"
+ "# symlink at /etc/resolv.conf. To manage man:resolv.conf(5) in a different way,\n"
+ "# replace this symlink by a static file or a different symlink.\n"
+ "#\n"
+ "# See man:systemd-resolved.service(8) for details about the supported modes of\n"
+ "# operation for /etc/resolv.conf.\n"
+ "\n", f);
+
+ if (ordered_set_isempty(dns))
+ fputs("# No DNS servers known.\n", f);
+ else {
+ unsigned count = 0;
+ DnsServer *s;
+
+ ORDERED_SET_FOREACH(s, dns, i)
+ write_resolv_conf_server(s, f, &count);
+ }
+
+ if (!ordered_set_isempty(domains))
+ write_resolv_conf_search(domains, f);
+
+ return fflush_and_check(f);
+}
+
+static int write_stub_resolv_conf_contents(FILE *f, OrderedSet *dns, OrderedSet *domains) {
+ fputs_unlocked("# This file is managed by man:systemd-resolved(8). Do not edit.\n"
+ "#\n"
+ "# This is a dynamic resolv.conf file for connecting local clients to the\n"
+ "# internal DNS stub resolver of systemd-resolved. This file lists all\n"
+ "# configured search domains.\n"
+ "#\n"
+ "# Run \"resolvectl status\" to see details about the uplink DNS servers\n"
+ "# currently in use.\n"
+ "#\n"
+ "# Third party programs must not access this file directly, but only through the\n"
+ "# symlink at /etc/resolv.conf. To manage man:resolv.conf(5) in a different way,\n"
+ "# replace this symlink by a static file or a different symlink.\n"
+ "#\n"
+ "# See man:systemd-resolved.service(8) for details about the supported modes of\n"
+ "# operation for /etc/resolv.conf.\n"
+ "\n"
+ "nameserver 127.0.0.53\n"
+ "options edns0\n", f);
+
+ if (!ordered_set_isempty(domains))
+ write_resolv_conf_search(domains, f);
+
+ return fflush_and_check(f);
+}
+
+int manager_write_resolv_conf(Manager *m) {
+
+ _cleanup_ordered_set_free_ OrderedSet *dns = NULL, *domains = NULL;
+ _cleanup_free_ char *temp_path_uplink = NULL, *temp_path_stub = NULL;
+ _cleanup_fclose_ FILE *f_uplink = NULL, *f_stub = NULL;
+ int r;
+
+ assert(m);
+
+ /* Read the system /etc/resolv.conf first */
+ (void) manager_read_resolv_conf(m);
+
+ /* Add the full list to a set, to filter out duplicates */
+ r = manager_compile_dns_servers(m, &dns);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to compile list of DNS servers: %m");
+
+ r = manager_compile_search_domains(m, &domains, false);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to compile list of search domains: %m");
+
+ r = fopen_temporary_label(PRIVATE_UPLINK_RESOLV_CONF, PRIVATE_UPLINK_RESOLV_CONF, &f_uplink, &temp_path_uplink);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to open private resolv.conf file for writing: %m");
+
+ (void) __fsetlocking(f_uplink, FSETLOCKING_BYCALLER);
+ (void) fchmod(fileno(f_uplink), 0644);
+
+ r = fopen_temporary_label(PRIVATE_STUB_RESOLV_CONF, PRIVATE_STUB_RESOLV_CONF, &f_stub, &temp_path_stub);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to open private stub-resolv.conf file for writing: %m");
+
+ (void) __fsetlocking(f_stub, FSETLOCKING_BYCALLER);
+ (void) fchmod(fileno(f_stub), 0644);
+
+ r = write_uplink_resolv_conf_contents(f_uplink, dns, domains);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write private resolv.conf contents: %m");
+ goto fail;
+ }
+
+ if (rename(temp_path_uplink, PRIVATE_UPLINK_RESOLV_CONF) < 0) {
+ r = log_error_errno(errno, "Failed to move private resolv.conf file into place: %m");
+ goto fail;
+ }
+
+ r = write_stub_resolv_conf_contents(f_stub, dns, domains);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write private stub-resolv.conf contents: %m");
+ goto fail;
+ }
+
+ if (rename(temp_path_stub, PRIVATE_STUB_RESOLV_CONF) < 0) {
+ r = log_error_errno(errno, "Failed to move private stub-resolv.conf file into place: %m");
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(PRIVATE_UPLINK_RESOLV_CONF);
+ (void) unlink(temp_path_uplink);
+ (void) unlink(PRIVATE_STUB_RESOLV_CONF);
+ (void) unlink(temp_path_stub);
+
+ return r;
+}
diff --git a/src/resolve/resolved-resolv-conf.h b/src/resolve/resolved-resolv-conf.h
new file mode 100644
index 0000000..f69cf2a
--- /dev/null
+++ b/src/resolve/resolved-resolv-conf.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "resolved-manager.h"
+
+int manager_check_resolv_conf(const Manager *m);
+int manager_read_resolv_conf(Manager *m);
+int manager_write_resolv_conf(Manager *m);
diff --git a/src/resolve/resolved.c b/src/resolve/resolved.c
new file mode 100644
index 0000000..0845b2c
--- /dev/null
+++ b/src/resolve/resolved.c
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "capability-util.h"
+#include "daemon-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "resolved-conf.h"
+#include "resolved-manager.h"
+#include "resolved-resolv-conf.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "user-util.h"
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(notify_on_cleanup) const char *notify_stop = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ const char *user = "systemd-resolve";
+ uid_t uid;
+ gid_t gid;
+ int r;
+
+ log_setup_service();
+
+ if (argc != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments.");
+
+ umask(0022);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return log_error_errno(r, "SELinux setup failed: %m");
+
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Cannot resolve user name %s: %m", user);
+
+ /* Always create the directory where resolv.conf will live */
+ r = mkdir_safe_label("/run/systemd/resolve", 0755, uid, gid, MKDIR_WARN_MODE);
+ if (r < 0)
+ return log_error_errno(r, "Could not create runtime directory: %m");
+
+ /* Drop privileges, but only if we have been started as root. If we are not running as root we assume most
+ * privileges are already dropped. */
+ if (getuid() == 0) {
+
+ /* Drop privileges, but keep three caps. Note that we drop those too, later on (see below) */
+ r = drop_privileges(uid, gid,
+ (UINT64_C(1) << CAP_NET_RAW)| /* needed for SO_BINDTODEVICE */
+ (UINT64_C(1) << CAP_NET_BIND_SERVICE)| /* needed to bind on port 53 */
+ (UINT64_C(1) << CAP_SETPCAP) /* needed in order to drop the caps later */);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop privileges: %m");
+ }
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Could not create manager: %m");
+
+ r = manager_start(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start manager: %m");
+
+ /* Write finish default resolv.conf to avoid a dangling symlink */
+ (void) manager_write_resolv_conf(m);
+
+ (void) manager_check_resolv_conf(m);
+
+ /* Let's drop the remaining caps now */
+ r = capability_bounding_set_drop(0, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop remaining caps: %m");
+
+ notify_stop = notify_start(NOTIFY_READY, NOTIFY_STOPPING);
+
+ r = sd_event_loop(m->event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/resolve/resolved.conf.in b/src/resolve/resolved.conf.in
new file mode 100644
index 0000000..6898c78
--- /dev/null
+++ b/src/resolve/resolved.conf.in
@@ -0,0 +1,24 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See resolved.conf(5) for details
+
+[Resolve]
+#DNS=
+#FallbackDNS=@DNS_SERVERS@
+#Domains=
+#LLMNR=yes
+#MulticastDNS=yes
+#DNSSEC=@DEFAULT_DNSSEC_MODE@
+#DNSOverTLS=@DEFAULT_DNS_OVER_TLS_MODE@
+#Cache=yes
+#DNSStubListener=yes
+#ReadEtcHosts=yes
diff --git a/src/resolve/test-dns-packet.c b/src/resolve/test-dns-packet.c
new file mode 100644
index 0000000..f6df913
--- /dev/null
+++ b/src/resolve/test-dns-packet.c
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+#include <glob.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "glob-util.h"
+#include "log.h"
+#include "macro.h"
+#include "resolved-dns-packet.h"
+#include "resolved-dns-rr.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "unaligned.h"
+
+#define HASH_KEY SD_ID128_MAKE(d3,1e,48,90,4b,fa,4c,fe,af,9d,d5,a1,d7,2e,8a,b1)
+
+static void verify_rr_copy(DnsResourceRecord *rr) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *copy = NULL;
+ const char *a, *b;
+
+ assert_se(copy = dns_resource_record_copy(rr));
+ assert_se(dns_resource_record_equal(copy, rr) > 0);
+
+ assert_se(a = dns_resource_record_to_string(rr));
+ assert_se(b = dns_resource_record_to_string(copy));
+
+ assert_se(streq(a, b));
+}
+
+static uint64_t hash(DnsResourceRecord *rr) {
+ struct siphash state;
+
+ siphash24_init(&state, HASH_KEY.bytes);
+ dns_resource_record_hash_func(rr, &state);
+ return siphash24_finalize(&state);
+}
+
+static void test_packet_from_file(const char* filename, bool canonical) {
+ _cleanup_free_ char *data = NULL;
+ size_t data_size, packet_size, offset;
+
+ assert_se(read_full_file(filename, &data, &data_size) >= 0);
+ assert_se(data);
+ assert_se(data_size > 8);
+
+ log_info("============== %s %s==============", filename, canonical ? "canonical " : "");
+
+ for (offset = 0; offset < data_size; offset += 8 + packet_size) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL, *p2 = NULL;
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL, *rr2 = NULL;
+ const char *s, *s2;
+ uint64_t hash1, hash2;
+
+ packet_size = unaligned_read_le64(data + offset);
+ assert_se(packet_size > 0);
+ assert_se(offset + 8 + packet_size <= data_size);
+
+ assert_se(dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, DNS_PACKET_SIZE_MAX) >= 0);
+
+ assert_se(dns_packet_append_blob(p, data + offset + 8, packet_size, NULL) >= 0);
+ assert_se(dns_packet_read_rr(p, &rr, NULL, NULL) >= 0);
+
+ verify_rr_copy(rr);
+
+ s = dns_resource_record_to_string(rr);
+ assert_se(s);
+ puts(s);
+
+ hash1 = hash(rr);
+
+ assert_se(dns_resource_record_to_wire_format(rr, canonical) >= 0);
+
+ assert_se(dns_packet_new(&p2, DNS_PROTOCOL_DNS, 0, DNS_PACKET_SIZE_MAX) >= 0);
+ assert_se(dns_packet_append_blob(p2, rr->wire_format, rr->wire_format_size, NULL) >= 0);
+ assert_se(dns_packet_read_rr(p2, &rr2, NULL, NULL) >= 0);
+
+ verify_rr_copy(rr);
+
+ s2 = dns_resource_record_to_string(rr);
+ assert_se(s2);
+ assert_se(streq(s, s2));
+
+ hash2 = hash(rr);
+ assert_se(hash1 == hash2);
+ }
+}
+
+int main(int argc, char **argv) {
+ int i, N;
+ _cleanup_free_ char *pkts_glob = NULL;
+ _cleanup_globfree_ glob_t g = {};
+ char **fnames;
+
+ log_parse_environment();
+
+ if (argc >= 2) {
+ N = argc - 1;
+ fnames = argv + 1;
+ } else {
+ pkts_glob = path_join(get_testdata_dir(), "test-resolve/*.pkts");
+ assert_se(glob(pkts_glob, GLOB_NOSORT, NULL, &g) == 0);
+ N = g.gl_pathc;
+ fnames = g.gl_pathv;
+ }
+
+ for (i = 0; i < N; i++) {
+ test_packet_from_file(fnames[i], false);
+ puts("");
+ test_packet_from_file(fnames[i], true);
+ if (i + 1 < N)
+ puts("");
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/resolve/test-dnssec-complex.c b/src/resolve/test-dnssec-complex.c
new file mode 100644
index 0000000..ccd62a1
--- /dev/null
+++ b/src/resolve/test-dnssec-complex.c
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/ip.h>
+
+#include "sd-bus.h"
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "dns-type.h"
+#include "random-util.h"
+#include "resolved-def.h"
+#include "string-util.h"
+#include "time-util.h"
+
+static void prefix_random(const char *name, char **ret) {
+ uint64_t i, u;
+ char *m = NULL;
+
+ u = 1 + (random_u64() & 3);
+
+ for (i = 0; i < u; i++) {
+ _cleanup_free_ char *b = NULL;
+ char *x;
+
+ assert_se(asprintf(&b, "x%" PRIu64 "x", random_u64()));
+ x = strjoin(b, ".", name);
+ assert_se(x);
+
+ free(m);
+ m = x;
+ }
+
+ *ret = m;
+ }
+
+static void test_rr_lookup(sd_bus *bus, const char *name, uint16_t type, const char *result) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *m = NULL;
+ int r;
+
+ /* If the name starts with a dot, we prefix one to three random labels */
+ if (startswith(name, ".")) {
+ prefix_random(name + 1, &m);
+ name = m;
+ }
+
+ assert_se(sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResolveRecord") >= 0);
+
+ assert_se(sd_bus_message_append(req, "isqqt", 0, name, DNS_CLASS_IN, type, UINT64_C(0)) >= 0);
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+
+ if (r < 0) {
+ assert_se(result);
+ assert_se(sd_bus_error_has_name(&error, result));
+ log_info("[OK] %s/%s resulted in <%s>.", name, dns_type_to_string(type), error.name);
+ } else {
+ assert_se(!result);
+ log_info("[OK] %s/%s succeeded.", name, dns_type_to_string(type));
+ }
+}
+
+static void test_hostname_lookup(sd_bus *bus, const char *name, int family, const char *result) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *m = NULL;
+ const char *af;
+ int r;
+
+ af = family == AF_UNSPEC ? "AF_UNSPEC" : af_to_name(family);
+
+ /* If the name starts with a dot, we prefix one to three random labels */
+ if (startswith(name, ".")) {
+ prefix_random(name + 1, &m);
+ name = m;
+ }
+
+ assert_se(sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResolveHostname") >= 0);
+
+ assert_se(sd_bus_message_append(req, "isit", 0, name, family, UINT64_C(0)) >= 0);
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+
+ if (r < 0) {
+ assert_se(result);
+ assert_se(sd_bus_error_has_name(&error, result));
+ log_info("[OK] %s/%s resulted in <%s>.", name, af, error.name);
+ } else {
+ assert_se(!result);
+ log_info("[OK] %s/%s succeeded.", name, af);
+ }
+
+}
+
+int main(int argc, char* argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+
+ /* Note that this is a manual test as it requires:
+ *
+ * Full network access
+ * A DNSSEC capable DNS server
+ * That zones contacted are still set up as they were when I wrote this.
+ */
+
+ assert_se(sd_bus_open_system(&bus) >= 0);
+
+ /* Normally signed */
+ test_rr_lookup(bus, "www.eurid.eu", DNS_TYPE_A, NULL);
+ test_hostname_lookup(bus, "www.eurid.eu", AF_UNSPEC, NULL);
+
+ test_rr_lookup(bus, "sigok.verteiltesysteme.net", DNS_TYPE_A, NULL);
+ test_hostname_lookup(bus, "sigok.verteiltesysteme.net", AF_UNSPEC, NULL);
+
+ /* Normally signed, NODATA */
+ test_rr_lookup(bus, "www.eurid.eu", DNS_TYPE_RP, BUS_ERROR_NO_SUCH_RR);
+ test_rr_lookup(bus, "sigok.verteiltesysteme.net", DNS_TYPE_RP, BUS_ERROR_NO_SUCH_RR);
+
+ /* Invalid signature */
+ test_rr_lookup(bus, "sigfail.verteiltesysteme.net", DNS_TYPE_A, BUS_ERROR_DNSSEC_FAILED);
+ test_hostname_lookup(bus, "sigfail.verteiltesysteme.net", AF_INET, BUS_ERROR_DNSSEC_FAILED);
+
+ /* Invalid signature, RSA, wildcard */
+ test_rr_lookup(bus, ".wilda.rhybar.0skar.cz", DNS_TYPE_A, BUS_ERROR_DNSSEC_FAILED);
+ test_hostname_lookup(bus, ".wilda.rhybar.0skar.cz", AF_INET, BUS_ERROR_DNSSEC_FAILED);
+
+ /* Invalid signature, ECDSA, wildcard */
+ test_rr_lookup(bus, ".wilda.rhybar.ecdsa.0skar.cz", DNS_TYPE_A, BUS_ERROR_DNSSEC_FAILED);
+ test_hostname_lookup(bus, ".wilda.rhybar.ecdsa.0skar.cz", AF_INET, BUS_ERROR_DNSSEC_FAILED);
+
+ /* Missing DS for DNSKEY */
+ test_rr_lookup(bus, "www.dnssec-bogus.sg", DNS_TYPE_A, BUS_ERROR_DNSSEC_FAILED);
+ test_hostname_lookup(bus, "www.dnssec-bogus.sg", AF_INET, BUS_ERROR_DNSSEC_FAILED);
+
+ /* NXDOMAIN in NSEC domain */
+ test_rr_lookup(bus, "hhh.nasa.gov", DNS_TYPE_A, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "hhh.nasa.gov", AF_UNSPEC, _BUS_ERROR_DNS "NXDOMAIN");
+ test_rr_lookup(bus, "_pgpkey-https._tcp.hkps.pool.sks-keyservers.net", DNS_TYPE_SRV, _BUS_ERROR_DNS "NXDOMAIN");
+
+ /* wildcard, NSEC zone */
+ test_rr_lookup(bus, ".wilda.nsec.0skar.cz", DNS_TYPE_A, NULL);
+ test_hostname_lookup(bus, ".wilda.nsec.0skar.cz", AF_INET, NULL);
+
+ /* wildcard, NSEC zone, NODATA */
+ test_rr_lookup(bus, ".wilda.nsec.0skar.cz", DNS_TYPE_RP, BUS_ERROR_NO_SUCH_RR);
+
+ /* wildcard, NSEC3 zone */
+ test_rr_lookup(bus, ".wilda.0skar.cz", DNS_TYPE_A, NULL);
+ test_hostname_lookup(bus, ".wilda.0skar.cz", AF_INET, NULL);
+
+ /* wildcard, NSEC3 zone, NODATA */
+ test_rr_lookup(bus, ".wilda.0skar.cz", DNS_TYPE_RP, BUS_ERROR_NO_SUCH_RR);
+
+ /* wildcard, NSEC zone, CNAME */
+ test_rr_lookup(bus, ".wild.nsec.0skar.cz", DNS_TYPE_A, NULL);
+ test_hostname_lookup(bus, ".wild.nsec.0skar.cz", AF_UNSPEC, NULL);
+ test_hostname_lookup(bus, ".wild.nsec.0skar.cz", AF_INET, NULL);
+
+ /* wildcard, NSEC zone, NODATA, CNAME */
+ test_rr_lookup(bus, ".wild.nsec.0skar.cz", DNS_TYPE_RP, BUS_ERROR_NO_SUCH_RR);
+
+ /* wildcard, NSEC3 zone, CNAME */
+ test_rr_lookup(bus, ".wild.0skar.cz", DNS_TYPE_A, NULL);
+ test_hostname_lookup(bus, ".wild.0skar.cz", AF_UNSPEC, NULL);
+ test_hostname_lookup(bus, ".wild.0skar.cz", AF_INET, NULL);
+
+ /* wildcard, NSEC3 zone, NODATA, CNAME */
+ test_rr_lookup(bus, ".wild.0skar.cz", DNS_TYPE_RP, BUS_ERROR_NO_SUCH_RR);
+
+ /* NODATA due to empty non-terminal in NSEC domain */
+ test_rr_lookup(bus, "herndon.nasa.gov", DNS_TYPE_A, BUS_ERROR_NO_SUCH_RR);
+ test_hostname_lookup(bus, "herndon.nasa.gov", AF_UNSPEC, BUS_ERROR_NO_SUCH_RR);
+ test_hostname_lookup(bus, "herndon.nasa.gov", AF_INET, BUS_ERROR_NO_SUCH_RR);
+ test_hostname_lookup(bus, "herndon.nasa.gov", AF_INET6, BUS_ERROR_NO_SUCH_RR);
+
+ /* NXDOMAIN in NSEC root zone: */
+ test_rr_lookup(bus, "jasdhjas.kjkfgjhfjg", DNS_TYPE_A, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "jasdhjas.kjkfgjhfjg", AF_UNSPEC, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "jasdhjas.kjkfgjhfjg", AF_INET, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "jasdhjas.kjkfgjhfjg", AF_INET6, _BUS_ERROR_DNS "NXDOMAIN");
+
+ /* NXDOMAIN in NSEC3 .com zone: */
+ test_rr_lookup(bus, "kjkfgjhfjgsdfdsfd.com", DNS_TYPE_A, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "kjkfgjhfjgsdfdsfd.com", AF_INET, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "kjkfgjhfjgsdfdsfd.com", AF_INET6, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "kjkfgjhfjgsdfdsfd.com", AF_UNSPEC, _BUS_ERROR_DNS "NXDOMAIN");
+
+ /* Unsigned A */
+ test_rr_lookup(bus, "poettering.de", DNS_TYPE_A, NULL);
+ test_rr_lookup(bus, "poettering.de", DNS_TYPE_AAAA, NULL);
+ test_hostname_lookup(bus, "poettering.de", AF_UNSPEC, NULL);
+ test_hostname_lookup(bus, "poettering.de", AF_INET, NULL);
+ test_hostname_lookup(bus, "poettering.de", AF_INET6, NULL);
+
+#if HAVE_LIBIDN2 || HAVE_LIBIDN
+ /* Unsigned A with IDNA conversion necessary */
+ test_hostname_lookup(bus, "pöttering.de", AF_UNSPEC, NULL);
+ test_hostname_lookup(bus, "pöttering.de", AF_INET, NULL);
+ test_hostname_lookup(bus, "pöttering.de", AF_INET6, NULL);
+#endif
+
+ /* DNAME, pointing to NXDOMAIN */
+ test_rr_lookup(bus, ".ireallyhpoethisdoesnexist.xn--kprw13d.", DNS_TYPE_A, _BUS_ERROR_DNS "NXDOMAIN");
+ test_rr_lookup(bus, ".ireallyhpoethisdoesnexist.xn--kprw13d.", DNS_TYPE_RP, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, ".ireallyhpoethisdoesntexist.xn--kprw13d.", AF_UNSPEC, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, ".ireallyhpoethisdoesntexist.xn--kprw13d.", AF_INET, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, ".ireallyhpoethisdoesntexist.xn--kprw13d.", AF_INET6, _BUS_ERROR_DNS "NXDOMAIN");
+
+ return 0;
+}
diff --git a/src/resolve/test-dnssec.c b/src/resolve/test-dnssec.c
new file mode 100644
index 0000000..840c4fa
--- /dev/null
+++ b/src/resolve/test-dnssec.c
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#if HAVE_GCRYPT
+#include <gcrypt.h>
+#endif
+#include <netinet/in.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "resolved-dns-dnssec.h"
+#include "resolved-dns-rr.h"
+#include "string-util.h"
+#include "hexdecoct.h"
+
+static void test_dnssec_canonicalize_one(const char *original, const char *canonical, int r) {
+ char canonicalized[DNSSEC_CANONICAL_HOSTNAME_MAX];
+
+ assert_se(dnssec_canonicalize(original, canonicalized, sizeof(canonicalized)) == r);
+ if (r < 0)
+ return;
+
+ assert_se(streq(canonicalized, canonical));
+}
+
+static void test_dnssec_canonicalize(void) {
+ test_dnssec_canonicalize_one("", ".", 1);
+ test_dnssec_canonicalize_one(".", ".", 1);
+ test_dnssec_canonicalize_one("foo", "foo.", 4);
+ test_dnssec_canonicalize_one("foo.", "foo.", 4);
+ test_dnssec_canonicalize_one("FOO.", "foo.", 4);
+ test_dnssec_canonicalize_one("FOO.bar.", "foo.bar.", 8);
+ test_dnssec_canonicalize_one("FOO..bar.", NULL, -EINVAL);
+}
+
+#if HAVE_GCRYPT
+
+static void test_dnssec_verify_dns_key(void) {
+
+ static const uint8_t ds1_fprint[] = {
+ 0x46, 0x8B, 0xC8, 0xDD, 0xC7, 0xE8, 0x27, 0x03, 0x40, 0xBB, 0x8A, 0x1F, 0x3B, 0x2E, 0x45, 0x9D,
+ 0x80, 0x67, 0x14, 0x01,
+ };
+ static const uint8_t ds2_fprint[] = {
+ 0x8A, 0xEE, 0x80, 0x47, 0x05, 0x5F, 0x83, 0xD1, 0x48, 0xBA, 0x8F, 0xF6, 0xDD, 0xA7, 0x60, 0xCE,
+ 0x94, 0xF7, 0xC7, 0x5E, 0x52, 0x4C, 0xF2, 0xE9, 0x50, 0xB9, 0x2E, 0xCB, 0xEF, 0x96, 0xB9, 0x98,
+ };
+ static const uint8_t dnskey_blob[] = {
+ 0x03, 0x01, 0x00, 0x01, 0xa8, 0x12, 0xda, 0x4f, 0xd2, 0x7d, 0x54, 0x14, 0x0e, 0xcc, 0x5b, 0x5e,
+ 0x45, 0x9c, 0x96, 0x98, 0xc0, 0xc0, 0x85, 0x81, 0xb1, 0x47, 0x8c, 0x7d, 0xe8, 0x39, 0x50, 0xcc,
+ 0xc5, 0xd0, 0xf2, 0x00, 0x81, 0x67, 0x79, 0xf6, 0xcc, 0x9d, 0xad, 0x6c, 0xbb, 0x7b, 0x6f, 0x48,
+ 0x97, 0x15, 0x1c, 0xfd, 0x0b, 0xfe, 0xd3, 0xd7, 0x7d, 0x9f, 0x81, 0x26, 0xd3, 0xc5, 0x65, 0x49,
+ 0xcf, 0x46, 0x62, 0xb0, 0x55, 0x6e, 0x47, 0xc7, 0x30, 0xef, 0x51, 0xfb, 0x3e, 0xc6, 0xef, 0xde,
+ 0x27, 0x3f, 0xfa, 0x57, 0x2d, 0xa7, 0x1d, 0x80, 0x46, 0x9a, 0x5f, 0x14, 0xb3, 0xb0, 0x2c, 0xbe,
+ 0x72, 0xca, 0xdf, 0xb2, 0xff, 0x36, 0x5b, 0x4f, 0xec, 0x58, 0x8e, 0x8d, 0x01, 0xe9, 0xa9, 0xdf,
+ 0xb5, 0x60, 0xad, 0x52, 0x4d, 0xfc, 0xa9, 0x3e, 0x8d, 0x35, 0x95, 0xb3, 0x4e, 0x0f, 0xca, 0x45,
+ 0x1b, 0xf7, 0xef, 0x3a, 0x88, 0x25, 0x08, 0xc7, 0x4e, 0x06, 0xc1, 0x62, 0x1a, 0xce, 0xd8, 0x77,
+ 0xbd, 0x02, 0x65, 0xf8, 0x49, 0xfb, 0xce, 0xf6, 0xa8, 0x09, 0xfc, 0xde, 0xb2, 0x09, 0x9d, 0x39,
+ 0xf8, 0x63, 0x9c, 0x32, 0x42, 0x7c, 0xa0, 0x30, 0x86, 0x72, 0x7a, 0x4a, 0xc6, 0xd4, 0xb3, 0x2d,
+ 0x24, 0xef, 0x96, 0x3f, 0xc2, 0xda, 0xd3, 0xf2, 0x15, 0x6f, 0xda, 0x65, 0x4b, 0x81, 0x28, 0x68,
+ 0xf4, 0xfe, 0x3e, 0x71, 0x4f, 0x50, 0x96, 0x72, 0x58, 0xa1, 0x89, 0xdd, 0x01, 0x61, 0x39, 0x39,
+ 0xc6, 0x76, 0xa4, 0xda, 0x02, 0x70, 0x3d, 0xc0, 0xdc, 0x8d, 0x70, 0x72, 0x04, 0x90, 0x79, 0xd4,
+ 0xec, 0x65, 0xcf, 0x49, 0x35, 0x25, 0x3a, 0x14, 0x1a, 0x45, 0x20, 0xeb, 0x31, 0xaf, 0x92, 0xba,
+ 0x20, 0xd3, 0xcd, 0xa7, 0x13, 0x44, 0xdc, 0xcf, 0xf0, 0x27, 0x34, 0xb9, 0xe7, 0x24, 0x6f, 0x73,
+ 0xe7, 0xea, 0x77, 0x03,
+ };
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *dnskey = NULL, *ds1 = NULL, *ds2 = NULL;
+
+ /* The two DS RRs in effect for nasa.gov on 2015-12-01. */
+ ds1 = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DS, "nasa.gov");
+ assert_se(ds1);
+
+ ds1->ds.key_tag = 47857;
+ ds1->ds.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ ds1->ds.digest_type = DNSSEC_DIGEST_SHA1;
+ ds1->ds.digest_size = sizeof(ds1_fprint);
+ ds1->ds.digest = memdup(ds1_fprint, ds1->ds.digest_size);
+ assert_se(ds1->ds.digest);
+
+ log_info("DS1: %s", strna(dns_resource_record_to_string(ds1)));
+
+ ds2 = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DS, "NASA.GOV");
+ assert_se(ds2);
+
+ ds2->ds.key_tag = 47857;
+ ds2->ds.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ ds2->ds.digest_type = DNSSEC_DIGEST_SHA256;
+ ds2->ds.digest_size = sizeof(ds2_fprint);
+ ds2->ds.digest = memdup(ds2_fprint, ds2->ds.digest_size);
+ assert_se(ds2->ds.digest);
+
+ log_info("DS2: %s", strna(dns_resource_record_to_string(ds2)));
+
+ dnskey = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, "nasa.GOV");
+ assert_se(dnskey);
+
+ dnskey->dnskey.flags = 257;
+ dnskey->dnskey.protocol = 3;
+ dnskey->dnskey.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ dnskey->dnskey.key_size = sizeof(dnskey_blob);
+ dnskey->dnskey.key = memdup(dnskey_blob, sizeof(dnskey_blob));
+ assert_se(dnskey->dnskey.key);
+
+ log_info("DNSKEY: %s", strna(dns_resource_record_to_string(dnskey)));
+ log_info("DNSKEY keytag: %u", dnssec_keytag(dnskey, false));
+
+ assert_se(dnssec_verify_dnskey_by_ds(dnskey, ds1, false) > 0);
+ assert_se(dnssec_verify_dnskey_by_ds(dnskey, ds2, false) > 0);
+}
+
+static void test_dnssec_verify_rfc8080_ed25519_example1(void) {
+ static const uint8_t dnskey_blob[] = {
+ 0x97, 0x4d, 0x96, 0xa2, 0x2d, 0x22, 0x4b, 0xc0, 0x1a, 0xdb, 0x91, 0x50, 0x91, 0x47, 0x7d,
+ 0x44, 0xcc, 0xd9, 0x1c, 0x9a, 0x41, 0xa1, 0x14, 0x30, 0x01, 0x01, 0x17, 0xd5, 0x2c, 0x59,
+ 0x24, 0xe
+ };
+ static const uint8_t ds_fprint[] = {
+ 0xdd, 0xa6, 0xb9, 0x69, 0xbd, 0xfb, 0x79, 0xf7, 0x1e, 0xe7, 0xb7, 0xfb, 0xdf, 0xb7, 0xdc,
+ 0xd7, 0xad, 0xbb, 0xd3, 0x5d, 0xdf, 0x79, 0xed, 0x3b, 0x6d, 0xd7, 0xf6, 0xe3, 0x56, 0xdd,
+ 0xd7, 0x47, 0xf7, 0x6f, 0x5f, 0x7a, 0xe1, 0xa6, 0xf9, 0xe5, 0xce, 0xfc, 0x7b, 0xbf, 0x5a,
+ 0xdf, 0x4e, 0x1b
+ };
+ static const uint8_t signature_blob[] = {
+ 0xa0, 0xbf, 0x64, 0xac, 0x9b, 0xa7, 0xef, 0x17, 0xc1, 0x38, 0x85, 0x9c, 0x18, 0x78, 0xbb,
+ 0x99, 0xa8, 0x39, 0xfe, 0x17, 0x59, 0xac, 0xa5, 0xb0, 0xd7, 0x98, 0xcf, 0x1a, 0xb1, 0xe9,
+ 0x8d, 0x07, 0x91, 0x02, 0xf4, 0xdd, 0xb3, 0x36, 0x8f, 0x0f, 0xe4, 0x0b, 0xb3, 0x77, 0xf1,
+ 0xf0, 0x0e, 0x0c, 0xdd, 0xed, 0xb7, 0x99, 0x16, 0x7d, 0x56, 0xb6, 0xe9, 0x32, 0x78, 0x30,
+ 0x72, 0xba, 0x8d, 0x02
+ };
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *dnskey = NULL, *ds = NULL, *mx = NULL,
+ *rrsig = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnssecResult result;
+
+ dnskey = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, "example.com.");
+ assert_se(dnskey);
+
+ dnskey->dnskey.flags = 257;
+ dnskey->dnskey.protocol = 3;
+ dnskey->dnskey.algorithm = DNSSEC_ALGORITHM_ED25519;
+ dnskey->dnskey.key_size = sizeof(dnskey_blob);
+ dnskey->dnskey.key = memdup(dnskey_blob, sizeof(dnskey_blob));
+ assert_se(dnskey->dnskey.key);
+
+ log_info("DNSKEY: %s", strna(dns_resource_record_to_string(dnskey)));
+
+ ds = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DS, "example.com.");
+ assert_se(ds);
+
+ ds->ds.key_tag = 3613;
+ ds->ds.algorithm = DNSSEC_ALGORITHM_ED25519;
+ ds->ds.digest_type = DNSSEC_DIGEST_SHA256;
+ ds->ds.digest_size = sizeof(ds_fprint);
+ ds->ds.digest = memdup(ds_fprint, ds->ds.digest_size);
+ assert_se(ds->ds.digest);
+
+ log_info("DS: %s", strna(dns_resource_record_to_string(ds)));
+
+ mx = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_MX, "example.com.");
+ assert_se(mx);
+
+ mx->mx.priority = 10;
+ mx->mx.exchange = strdup("mail.example.com.");
+ assert_se(mx->mx.exchange);
+
+ log_info("MX: %s", strna(dns_resource_record_to_string(mx)));
+
+ rrsig = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_RRSIG, "example.com.");
+ assert_se(rrsig);
+
+ rrsig->rrsig.type_covered = DNS_TYPE_MX;
+ rrsig->rrsig.algorithm = DNSSEC_ALGORITHM_ED25519;
+ rrsig->rrsig.labels = 2;
+ rrsig->rrsig.original_ttl = 3600;
+ rrsig->rrsig.expiration = 1440021600;
+ rrsig->rrsig.inception = 1438207200;
+ rrsig->rrsig.key_tag = 3613;
+ rrsig->rrsig.signer = strdup("example.com.");
+ assert_se(rrsig->rrsig.signer);
+ rrsig->rrsig.signature_size = sizeof(signature_blob);
+ rrsig->rrsig.signature = memdup(signature_blob, rrsig->rrsig.signature_size);
+ assert_se(rrsig->rrsig.signature);
+
+ log_info("RRSIG: %s", strna(dns_resource_record_to_string(rrsig)));
+
+ assert_se(dnssec_key_match_rrsig(mx->key, rrsig) > 0);
+ assert_se(dnssec_rrsig_match_dnskey(rrsig, dnskey, false) > 0);
+
+ answer = dns_answer_new(1);
+ assert_se(answer);
+ assert_se(dns_answer_add(answer, mx, 0, DNS_ANSWER_AUTHENTICATED) >= 0);
+
+ assert_se(dnssec_verify_rrset(answer, mx->key, rrsig, dnskey,
+ rrsig->rrsig.inception * USEC_PER_SEC, &result) >= 0);
+#if GCRYPT_VERSION_NUMBER >= 0x010600
+ assert_se(result == DNSSEC_VALIDATED);
+#else
+ assert_se(result == DNSSEC_UNSUPPORTED_ALGORITHM);
+#endif
+}
+
+static void test_dnssec_verify_rfc8080_ed25519_example2(void) {
+ static const uint8_t dnskey_blob[] = {
+ 0xcc, 0xf9, 0xd9, 0xfd, 0x0c, 0x04, 0x7b, 0xb4, 0xbc, 0x0b, 0x94, 0x8f, 0xcf, 0x63, 0x9f,
+ 0x4b, 0x94, 0x51, 0xe3, 0x40, 0x13, 0x93, 0x6f, 0xeb, 0x62, 0x71, 0x3d, 0xc4, 0x72, 0x4,
+ 0x8a, 0x3b
+ };
+ static const uint8_t ds_fprint[] = {
+ 0xe3, 0x4d, 0x7b, 0xf3, 0x56, 0xfd, 0xdf, 0x87, 0xb7, 0xf7, 0x67, 0x5e, 0xe3, 0xdd, 0x9e,
+ 0x73, 0xbe, 0xda, 0x7b, 0x67, 0xb5, 0xe5, 0xde, 0xf4, 0x7f, 0xae, 0x7b, 0xe5, 0xad, 0x5c,
+ 0xd1, 0xb7, 0x39, 0xf5, 0xce, 0x76, 0xef, 0x97, 0x34, 0xe1, 0xe6, 0xde, 0xf3, 0x47, 0x3a,
+ 0xeb, 0x5e, 0x1c
+ };
+ static const uint8_t signature_blob[] = {
+ 0xcd, 0x74, 0x34, 0x6e, 0x46, 0x20, 0x41, 0x31, 0x05, 0xc9, 0xf2, 0xf2, 0x8b, 0xd4, 0x28,
+ 0x89, 0x8e, 0x83, 0xf1, 0x97, 0x58, 0xa3, 0x8c, 0x32, 0x52, 0x15, 0x62, 0xa1, 0x86, 0x57,
+ 0x15, 0xd4, 0xf8, 0xd7, 0x44, 0x0f, 0x44, 0x84, 0xd0, 0x4a, 0xa2, 0x52, 0x9f, 0x34, 0x28,
+ 0x4a, 0x6e, 0x69, 0xa0, 0x9e, 0xe0, 0x0f, 0xb0, 0x10, 0x47, 0x43, 0xbb, 0x2a, 0xe2, 0x39,
+ 0x93, 0x6a, 0x5c, 0x06
+ };
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *dnskey = NULL, *ds = NULL, *mx = NULL,
+ *rrsig = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnssecResult result;
+
+ dnskey = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, "example.com.");
+ assert_se(dnskey);
+
+ dnskey->dnskey.flags = 257;
+ dnskey->dnskey.protocol = 3;
+ dnskey->dnskey.algorithm = DNSSEC_ALGORITHM_ED25519;
+ dnskey->dnskey.key_size = sizeof(dnskey_blob);
+ dnskey->dnskey.key = memdup(dnskey_blob, sizeof(dnskey_blob));
+ assert_se(dnskey->dnskey.key);
+
+ log_info("DNSKEY: %s", strna(dns_resource_record_to_string(dnskey)));
+
+ ds = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DS, "example.com.");
+ assert_se(ds);
+
+ ds->ds.key_tag = 35217;
+ ds->ds.algorithm = DNSSEC_ALGORITHM_ED25519;
+ ds->ds.digest_type = DNSSEC_DIGEST_SHA256;
+ ds->ds.digest_size = sizeof(ds_fprint);
+ ds->ds.digest = memdup(ds_fprint, ds->ds.digest_size);
+ assert_se(ds->ds.digest);
+
+ log_info("DS: %s", strna(dns_resource_record_to_string(ds)));
+
+ mx = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_MX, "example.com.");
+ assert_se(mx);
+
+ mx->mx.priority = 10;
+ mx->mx.exchange = strdup("mail.example.com.");
+ assert_se(mx->mx.exchange);
+
+ log_info("MX: %s", strna(dns_resource_record_to_string(mx)));
+
+ rrsig = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_RRSIG, "example.com.");
+ assert_se(rrsig);
+
+ rrsig->rrsig.type_covered = DNS_TYPE_MX;
+ rrsig->rrsig.algorithm = DNSSEC_ALGORITHM_ED25519;
+ rrsig->rrsig.labels = 2;
+ rrsig->rrsig.original_ttl = 3600;
+ rrsig->rrsig.expiration = 1440021600;
+ rrsig->rrsig.inception = 1438207200;
+ rrsig->rrsig.key_tag = 35217;
+ rrsig->rrsig.signer = strdup("example.com.");
+ assert_se(rrsig->rrsig.signer);
+ rrsig->rrsig.signature_size = sizeof(signature_blob);
+ rrsig->rrsig.signature = memdup(signature_blob, rrsig->rrsig.signature_size);
+ assert_se(rrsig->rrsig.signature);
+
+ log_info("RRSIG: %s", strna(dns_resource_record_to_string(rrsig)));
+
+ assert_se(dnssec_key_match_rrsig(mx->key, rrsig) > 0);
+ assert_se(dnssec_rrsig_match_dnskey(rrsig, dnskey, false) > 0);
+
+ answer = dns_answer_new(1);
+ assert_se(answer);
+ assert_se(dns_answer_add(answer, mx, 0, DNS_ANSWER_AUTHENTICATED) >= 0);
+
+ assert_se(dnssec_verify_rrset(answer, mx->key, rrsig, dnskey,
+ rrsig->rrsig.inception * USEC_PER_SEC, &result) >= 0);
+#if GCRYPT_VERSION_NUMBER >= 0x010600
+ assert_se(result == DNSSEC_VALIDATED);
+#else
+ assert_se(result == DNSSEC_UNSUPPORTED_ALGORITHM);
+#endif
+}
+static void test_dnssec_verify_rrset(void) {
+
+ static const uint8_t signature_blob[] = {
+ 0x7f, 0x79, 0xdd, 0x5e, 0x89, 0x79, 0x18, 0xd0, 0x34, 0x86, 0x8c, 0x72, 0x77, 0x75, 0x48, 0x4d,
+ 0xc3, 0x7d, 0x38, 0x04, 0xab, 0xcd, 0x9e, 0x4c, 0x82, 0xb0, 0x92, 0xca, 0xe9, 0x66, 0xe9, 0x6e,
+ 0x47, 0xc7, 0x68, 0x8c, 0x94, 0xf6, 0x69, 0xcb, 0x75, 0x94, 0xe6, 0x30, 0xa6, 0xfb, 0x68, 0x64,
+ 0x96, 0x1a, 0x84, 0xe1, 0xdc, 0x16, 0x4c, 0x83, 0x6c, 0x44, 0xf2, 0x74, 0x4d, 0x74, 0x79, 0x8f,
+ 0xf3, 0xf4, 0x63, 0x0d, 0xef, 0x5a, 0xe7, 0xe2, 0xfd, 0xf2, 0x2b, 0x38, 0x7c, 0x28, 0x96, 0x9d,
+ 0xb6, 0xcd, 0x5c, 0x3b, 0x57, 0xe2, 0x24, 0x78, 0x65, 0xd0, 0x9e, 0x77, 0x83, 0x09, 0x6c, 0xff,
+ 0x3d, 0x52, 0x3f, 0x6e, 0xd1, 0xed, 0x2e, 0xf9, 0xee, 0x8e, 0xa6, 0xbe, 0x9a, 0xa8, 0x87, 0x76,
+ 0xd8, 0x77, 0xcc, 0x96, 0xa0, 0x98, 0xa1, 0xd1, 0x68, 0x09, 0x43, 0xcf, 0x56, 0xd9, 0xd1, 0x66,
+ };
+
+ static const uint8_t dnskey_blob[] = {
+ 0x03, 0x01, 0x00, 0x01, 0x9b, 0x49, 0x9b, 0xc1, 0xf9, 0x9a, 0xe0, 0x4e, 0xcf, 0xcb, 0x14, 0x45,
+ 0x2e, 0xc9, 0xf9, 0x74, 0xa7, 0x18, 0xb5, 0xf3, 0xde, 0x39, 0x49, 0xdf, 0x63, 0x33, 0x97, 0x52,
+ 0xe0, 0x8e, 0xac, 0x50, 0x30, 0x8e, 0x09, 0xd5, 0x24, 0x3d, 0x26, 0xa4, 0x49, 0x37, 0x2b, 0xb0,
+ 0x6b, 0x1b, 0xdf, 0xde, 0x85, 0x83, 0xcb, 0x22, 0x4e, 0x60, 0x0a, 0x91, 0x1a, 0x1f, 0xc5, 0x40,
+ 0xb1, 0xc3, 0x15, 0xc1, 0x54, 0x77, 0x86, 0x65, 0x53, 0xec, 0x10, 0x90, 0x0c, 0x91, 0x00, 0x5e,
+ 0x15, 0xdc, 0x08, 0x02, 0x4c, 0x8c, 0x0d, 0xc0, 0xac, 0x6e, 0xc4, 0x3e, 0x1b, 0x80, 0x19, 0xe4,
+ 0xf7, 0x5f, 0x77, 0x51, 0x06, 0x87, 0x61, 0xde, 0xa2, 0x18, 0x0f, 0x40, 0x8b, 0x79, 0x72, 0xfa,
+ 0x8d, 0x1a, 0x44, 0x47, 0x0d, 0x8e, 0x3a, 0x2d, 0xc7, 0x39, 0xbf, 0x56, 0x28, 0x97, 0xd9, 0x20,
+ 0x4f, 0x00, 0x51, 0x3b,
+ };
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *a = NULL, *rrsig = NULL, *dnskey = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnssecResult result;
+
+ a = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_A, "nAsA.gov");
+ assert_se(a);
+
+ a->a.in_addr.s_addr = inet_addr("52.0.14.116");
+
+ log_info("A: %s", strna(dns_resource_record_to_string(a)));
+
+ rrsig = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_RRSIG, "NaSa.GOV.");
+ assert_se(rrsig);
+
+ rrsig->rrsig.type_covered = DNS_TYPE_A;
+ rrsig->rrsig.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ rrsig->rrsig.labels = 2;
+ rrsig->rrsig.original_ttl = 600;
+ rrsig->rrsig.expiration = 0x5683135c;
+ rrsig->rrsig.inception = 0x565b7da8;
+ rrsig->rrsig.key_tag = 63876;
+ rrsig->rrsig.signer = strdup("Nasa.Gov.");
+ assert_se(rrsig->rrsig.signer);
+ rrsig->rrsig.signature_size = sizeof(signature_blob);
+ rrsig->rrsig.signature = memdup(signature_blob, rrsig->rrsig.signature_size);
+ assert_se(rrsig->rrsig.signature);
+
+ log_info("RRSIG: %s", strna(dns_resource_record_to_string(rrsig)));
+
+ dnskey = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, "nASA.gOV");
+ assert_se(dnskey);
+
+ dnskey->dnskey.flags = 256;
+ dnskey->dnskey.protocol = 3;
+ dnskey->dnskey.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ dnskey->dnskey.key_size = sizeof(dnskey_blob);
+ dnskey->dnskey.key = memdup(dnskey_blob, sizeof(dnskey_blob));
+ assert_se(dnskey->dnskey.key);
+
+ log_info("DNSKEY: %s", strna(dns_resource_record_to_string(dnskey)));
+ log_info("DNSKEY keytag: %u", dnssec_keytag(dnskey, false));
+
+ assert_se(dnssec_key_match_rrsig(a->key, rrsig) > 0);
+ assert_se(dnssec_rrsig_match_dnskey(rrsig, dnskey, false) > 0);
+
+ answer = dns_answer_new(1);
+ assert_se(answer);
+ assert_se(dns_answer_add(answer, a, 0, DNS_ANSWER_AUTHENTICATED) >= 0);
+
+ /* Validate the RR as it if was 2015-12-2 today */
+ assert_se(dnssec_verify_rrset(answer, a->key, rrsig, dnskey, 1449092754*USEC_PER_SEC, &result) >= 0);
+ assert_se(result == DNSSEC_VALIDATED);
+}
+
+static void test_dnssec_verify_rrset2(void) {
+
+ static const uint8_t signature_blob[] = {
+ 0x48, 0x45, 0xc8, 0x8b, 0xc0, 0x14, 0x92, 0xf5, 0x15, 0xc6, 0x84, 0x9d, 0x2f, 0xe3, 0x32, 0x11,
+ 0x7d, 0xf1, 0xe6, 0x87, 0xb9, 0x42, 0xd3, 0x8b, 0x9e, 0xaf, 0x92, 0x31, 0x0a, 0x53, 0xad, 0x8b,
+ 0xa7, 0x5c, 0x83, 0x39, 0x8c, 0x28, 0xac, 0xce, 0x6e, 0x9c, 0x18, 0xe3, 0x31, 0x16, 0x6e, 0xca,
+ 0x38, 0x31, 0xaf, 0xd9, 0x94, 0xf1, 0x84, 0xb1, 0xdf, 0x5a, 0xc2, 0x73, 0x22, 0xf6, 0xcb, 0xa2,
+ 0xe7, 0x8c, 0x77, 0x0c, 0x74, 0x2f, 0xc2, 0x13, 0xb0, 0x93, 0x51, 0xa9, 0x4f, 0xae, 0x0a, 0xda,
+ 0x45, 0xcc, 0xfd, 0x43, 0x99, 0x36, 0x9a, 0x0d, 0x21, 0xe0, 0xeb, 0x30, 0x65, 0xd4, 0xa0, 0x27,
+ 0x37, 0x3b, 0xe4, 0xc1, 0xc5, 0xa1, 0x2a, 0xd1, 0x76, 0xc4, 0x7e, 0x64, 0x0e, 0x5a, 0xa6, 0x50,
+ 0x24, 0xd5, 0x2c, 0xcc, 0x6d, 0xe5, 0x37, 0xea, 0xbd, 0x09, 0x34, 0xed, 0x24, 0x06, 0xa1, 0x22,
+ };
+
+ static const uint8_t dnskey_blob[] = {
+ 0x03, 0x01, 0x00, 0x01, 0xc3, 0x7f, 0x1d, 0xd1, 0x1c, 0x97, 0xb1, 0x13, 0x34, 0x3a, 0x9a, 0xea,
+ 0xee, 0xd9, 0x5a, 0x11, 0x1b, 0x17, 0xc7, 0xe3, 0xd4, 0xda, 0x20, 0xbc, 0x5d, 0xba, 0x74, 0xe3,
+ 0x37, 0x99, 0xec, 0x25, 0xce, 0x93, 0x7f, 0xbd, 0x22, 0x73, 0x7e, 0x14, 0x71, 0xe0, 0x60, 0x07,
+ 0xd4, 0x39, 0x8b, 0x5e, 0xe9, 0xba, 0x25, 0xe8, 0x49, 0xe9, 0x34, 0xef, 0xfe, 0x04, 0x5c, 0xa5,
+ 0x27, 0xcd, 0xa9, 0xda, 0x70, 0x05, 0x21, 0xab, 0x15, 0x82, 0x24, 0xc3, 0x94, 0xf5, 0xd7, 0xb7,
+ 0xc4, 0x66, 0xcb, 0x32, 0x6e, 0x60, 0x2b, 0x55, 0x59, 0x28, 0x89, 0x8a, 0x72, 0xde, 0x88, 0x56,
+ 0x27, 0x95, 0xd9, 0xac, 0x88, 0x4f, 0x65, 0x2b, 0x68, 0xfc, 0xe6, 0x41, 0xc1, 0x1b, 0xef, 0x4e,
+ 0xd6, 0xc2, 0x0f, 0x64, 0x88, 0x95, 0x5e, 0xdd, 0x3a, 0x02, 0x07, 0x50, 0xa9, 0xda, 0xa4, 0x49,
+ 0x74, 0x62, 0xfe, 0xd7,
+ };
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *nsec = NULL, *rrsig = NULL, *dnskey = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnssecResult result;
+
+ nsec = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_NSEC, "nasa.gov");
+ assert_se(nsec);
+
+ nsec->nsec.next_domain_name = strdup("3D-Printing.nasa.gov");
+ assert_se(nsec->nsec.next_domain_name);
+
+ nsec->nsec.types = bitmap_new();
+ assert_se(nsec->nsec.types);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_A) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_NS) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_SOA) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_MX) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_TXT) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_RRSIG) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_NSEC) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_DNSKEY) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, 65534) >= 0);
+
+ log_info("NSEC: %s", strna(dns_resource_record_to_string(nsec)));
+
+ rrsig = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_RRSIG, "NaSa.GOV.");
+ assert_se(rrsig);
+
+ rrsig->rrsig.type_covered = DNS_TYPE_NSEC;
+ rrsig->rrsig.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ rrsig->rrsig.labels = 2;
+ rrsig->rrsig.original_ttl = 300;
+ rrsig->rrsig.expiration = 0x5689002f;
+ rrsig->rrsig.inception = 0x56617230;
+ rrsig->rrsig.key_tag = 30390;
+ rrsig->rrsig.signer = strdup("Nasa.Gov.");
+ assert_se(rrsig->rrsig.signer);
+ rrsig->rrsig.signature_size = sizeof(signature_blob);
+ rrsig->rrsig.signature = memdup(signature_blob, rrsig->rrsig.signature_size);
+ assert_se(rrsig->rrsig.signature);
+
+ log_info("RRSIG: %s", strna(dns_resource_record_to_string(rrsig)));
+
+ dnskey = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, "nASA.gOV");
+ assert_se(dnskey);
+
+ dnskey->dnskey.flags = 256;
+ dnskey->dnskey.protocol = 3;
+ dnskey->dnskey.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ dnskey->dnskey.key_size = sizeof(dnskey_blob);
+ dnskey->dnskey.key = memdup(dnskey_blob, sizeof(dnskey_blob));
+ assert_se(dnskey->dnskey.key);
+
+ log_info("DNSKEY: %s", strna(dns_resource_record_to_string(dnskey)));
+ log_info("DNSKEY keytag: %u", dnssec_keytag(dnskey, false));
+
+ assert_se(dnssec_key_match_rrsig(nsec->key, rrsig) > 0);
+ assert_se(dnssec_rrsig_match_dnskey(rrsig, dnskey, false) > 0);
+
+ answer = dns_answer_new(1);
+ assert_se(answer);
+ assert_se(dns_answer_add(answer, nsec, 0, DNS_ANSWER_AUTHENTICATED) >= 0);
+
+ /* Validate the RR as it if was 2015-12-11 today */
+ assert_se(dnssec_verify_rrset(answer, nsec->key, rrsig, dnskey, 1449849318*USEC_PER_SEC, &result) >= 0);
+ assert_se(result == DNSSEC_VALIDATED);
+}
+
+static void test_dnssec_nsec3_hash(void) {
+ static const uint8_t salt[] = { 0xB0, 0x1D, 0xFA, 0xCE };
+ static const uint8_t next_hashed_name[] = { 0x84, 0x10, 0x26, 0x53, 0xc9, 0xfa, 0x4d, 0x85, 0x6c, 0x97, 0x82, 0xe2, 0x8f, 0xdf, 0x2d, 0x5e, 0x87, 0x69, 0xc4, 0x52 };
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ uint8_t h[DNSSEC_HASH_SIZE_MAX];
+ _cleanup_free_ char *b = NULL;
+ int k;
+
+ /* The NSEC3 RR for eurid.eu on 2015-12-14. */
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_NSEC3, "PJ8S08RR45VIQDAQGE7EN3VHKNROTBMM.eurid.eu.");
+ assert_se(rr);
+
+ rr->nsec3.algorithm = DNSSEC_DIGEST_SHA1;
+ rr->nsec3.flags = 1;
+ rr->nsec3.iterations = 1;
+ rr->nsec3.salt = memdup(salt, sizeof(salt));
+ assert_se(rr->nsec3.salt);
+ rr->nsec3.salt_size = sizeof(salt);
+ rr->nsec3.next_hashed_name = memdup(next_hashed_name, sizeof(next_hashed_name));
+ assert_se(rr->nsec3.next_hashed_name);
+ rr->nsec3.next_hashed_name_size = sizeof(next_hashed_name);
+
+ log_info("NSEC3: %s", strna(dns_resource_record_to_string(rr)));
+
+ k = dnssec_nsec3_hash(rr, "eurid.eu", &h);
+ assert_se(k >= 0);
+
+ b = base32hexmem(h, k, false);
+ assert_se(b);
+ assert_se(strcasecmp(b, "PJ8S08RR45VIQDAQGE7EN3VHKNROTBMM") == 0);
+}
+
+#endif
+
+int main(int argc, char *argv[]) {
+
+ test_dnssec_canonicalize();
+
+#if HAVE_GCRYPT
+ test_dnssec_verify_dns_key();
+ test_dnssec_verify_rfc8080_ed25519_example1();
+ test_dnssec_verify_rfc8080_ed25519_example2();
+ test_dnssec_verify_rrset();
+ test_dnssec_verify_rrset2();
+ test_dnssec_nsec3_hash();
+#endif
+
+ return 0;
+}
diff --git a/src/resolve/test-resolve-tables.c b/src/resolve/test-resolve-tables.c
new file mode 100644
index 0000000..2230a66
--- /dev/null
+++ b/src/resolve/test-resolve-tables.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "dns-type.h"
+#include "resolved-dns-dnssec.h"
+#include "resolved-dns-packet.h"
+#include "test-tables.h"
+
+int main(int argc, char **argv) {
+ uint16_t i;
+
+ test_table(dns_protocol, DNS_PROTOCOL);
+ test_table(dnssec_result, DNSSEC_RESULT);
+ test_table(dnssec_verdict, DNSSEC_VERDICT);
+
+ test_table_sparse(dns_rcode, DNS_RCODE);
+ test_table_sparse(dns_type, DNS_TYPE);
+
+ log_info("/* DNS_TYPE */");
+ for (i = 0; i < _DNS_TYPE_MAX; i++) {
+ const char *s;
+
+ s = dns_type_to_string(i);
+ assert_se(s == NULL || strlen(s) < _DNS_TYPE_STRING_MAX);
+
+ if (s)
+ log_info("%-*s %s%s%s%s%s%s%s%s%s",
+ (int) _DNS_TYPE_STRING_MAX - 1, s,
+ dns_type_is_pseudo(i) ? "pseudo " : "",
+ dns_type_is_valid_query(i) ? "valid_query " : "",
+ dns_type_is_valid_rr(i) ? "is_valid_rr " : "",
+ dns_type_may_redirect(i) ? "may_redirect " : "",
+ dns_type_is_dnssec(i) ? "dnssec " : "",
+ dns_type_is_obsolete(i) ? "obsolete " : "",
+ dns_type_may_wildcard(i) ? "wildcard " : "",
+ dns_type_apex_only(i) ? "apex_only " : "",
+ dns_type_needs_authentication(i) ? "needs_authentication" : "");
+ }
+
+ log_info("/* DNS_CLASS */");
+ for (i = 0; i < _DNS_CLASS_MAX; i++) {
+ const char *s;
+
+ s = dns_class_to_string(i);
+ assert_se(s == NULL || strlen(s) < _DNS_CLASS_STRING_MAX);
+
+ if (s)
+ log_info("%-*s %s%s",
+ (int) _DNS_CLASS_STRING_MAX - 1, s,
+ dns_class_is_pseudo(i) ? "is_pseudo " : "",
+ dns_class_is_valid_rr(i) ? "is_valid_rr " : "");
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/resolve/test-resolved-etc-hosts.c b/src/resolve/test-resolved-etc-hosts.c
new file mode 100644
index 0000000..dbff889
--- /dev/null
+++ b/src/resolve/test-resolved-etc-hosts.c
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "resolved-etc-hosts.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+static void test_parse_etc_hosts_system(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ log_info("/* %s */", __func__);
+
+ f = fopen("/etc/hosts", "re");
+ if (!f) {
+ assert_se(errno == ENOENT);
+ return;
+ }
+
+ _cleanup_(etc_hosts_free) EtcHosts hosts = {};
+ assert_se(etc_hosts_parse(&hosts, f) == 0);
+}
+
+#define address_equal_4(_addr, _address) \
+ ((_addr)->family == AF_INET && \
+ !memcmp(&(_addr)->address.in, &(struct in_addr) { .s_addr = (_address) }, 4))
+
+#define address_equal_6(_addr, ...) \
+ ((_addr)->family == AF_INET6 && \
+ !memcmp(&(_addr)->address.in6, &(struct in6_addr) { .s6_addr = __VA_ARGS__}, 16) )
+
+static void test_parse_etc_hosts(void) {
+ _cleanup_(unlink_tempfilep) char
+ t[] = "/tmp/test-resolved-etc-hosts.XXXXXX";
+
+ log_info("/* %s */", __func__);
+
+ int fd;
+ _cleanup_fclose_ FILE *f;
+ const char *s;
+
+ fd = mkostemp_safe(t);
+ assert_se(fd >= 0);
+
+ f = fdopen(fd, "r+");
+ assert_se(f);
+ fputs("1.2.3.4 some.where\n"
+ "1.2.3.5 some.where\n"
+ "1.2.3.6 dash dash-dash.where-dash\n"
+ "1.2.3.7 bad-dash- -bad-dash -bad-dash.bad-\n"
+ "1.2.3.8\n"
+ "1.2.3.9 before.comment # within.comment\n"
+ "1.2.3.10 before.comment#within.comment2\n"
+ "1.2.3.11 before.comment# within.comment3\n"
+ "1.2.3.12 before.comment#\n"
+ "1.2.3 short.address\n"
+ "1.2.3.4.5 long.address\n"
+ "1::2::3 multi.colon\n"
+
+ "::0 some.where some.other\n"
+ "0.0.0.0 black.listed\n"
+ "::5\t\t\t \tsome.where\tsome.other foobar.foo.foo\t\t\t\n"
+ " \n", f);
+ assert_se(fflush_and_check(f) >= 0);
+ rewind(f);
+
+ _cleanup_(etc_hosts_free) EtcHosts hosts = {};
+ assert_se(etc_hosts_parse(&hosts, f) == 0);
+
+ EtcHostsItemByName *bn;
+ assert_se(bn = hashmap_get(hosts.by_name, "some.where"));
+ assert_se(bn->n_addresses == 3);
+ assert_se(bn->n_allocated >= 3);
+ assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.4")));
+ assert_se(address_equal_4(bn->addresses[1], inet_addr("1.2.3.5")));
+ assert_se(address_equal_6(bn->addresses[2], {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}));
+
+ assert_se(bn = hashmap_get(hosts.by_name, "dash"));
+ assert_se(bn->n_addresses == 1);
+ assert_se(bn->n_allocated >= 1);
+ assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.6")));
+
+ assert_se(bn = hashmap_get(hosts.by_name, "dash-dash.where-dash"));
+ assert_se(bn->n_addresses == 1);
+ assert_se(bn->n_allocated >= 1);
+ assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.6")));
+
+ /* See https://tools.ietf.org/html/rfc1035#section-2.3.1 */
+ FOREACH_STRING(s, "bad-dash-", "-bad-dash", "-bad-dash.bad-")
+ assert_se(!hashmap_get(hosts.by_name, s));
+
+ assert_se(bn = hashmap_get(hosts.by_name, "before.comment"));
+ assert_se(bn->n_addresses == 4);
+ assert_se(bn->n_allocated >= 4);
+ assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.9")));
+ assert_se(address_equal_4(bn->addresses[1], inet_addr("1.2.3.10")));
+ assert_se(address_equal_4(bn->addresses[2], inet_addr("1.2.3.11")));
+ assert_se(address_equal_4(bn->addresses[3], inet_addr("1.2.3.12")));
+
+ assert(!hashmap_get(hosts.by_name, "within.comment"));
+ assert(!hashmap_get(hosts.by_name, "within.comment2"));
+ assert(!hashmap_get(hosts.by_name, "within.comment3"));
+ assert(!hashmap_get(hosts.by_name, "#"));
+
+ assert(!hashmap_get(hosts.by_name, "short.address"));
+ assert(!hashmap_get(hosts.by_name, "long.address"));
+ assert(!hashmap_get(hosts.by_name, "multi.colon"));
+ assert_se(!set_contains(hosts.no_address, "short.address"));
+ assert_se(!set_contains(hosts.no_address, "long.address"));
+ assert_se(!set_contains(hosts.no_address, "multi.colon"));
+
+ assert_se(bn = hashmap_get(hosts.by_name, "some.other"));
+ assert_se(bn->n_addresses == 1);
+ assert_se(bn->n_allocated >= 1);
+ assert_se(address_equal_6(bn->addresses[0], {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}));
+
+ assert_se( set_contains(hosts.no_address, "some.where"));
+ assert_se( set_contains(hosts.no_address, "some.other"));
+ assert_se( set_contains(hosts.no_address, "black.listed"));
+ assert_se(!set_contains(hosts.no_address, "foobar.foo.foo"));
+}
+
+static void test_parse_file(const char *fname) {
+ _cleanup_(etc_hosts_free) EtcHosts hosts = {};
+ _cleanup_fclose_ FILE *f;
+
+ log_info("/* %s(\"%s\") */", __func__, fname);
+
+ assert_se(f = fopen(fname, "re"));
+ assert_se(etc_hosts_parse(&hosts, f) == 0);
+}
+
+int main(int argc, char **argv) {
+ log_set_max_level(LOG_DEBUG);
+ log_parse_environment();
+ log_open();
+
+ if (argc == 1) {
+ test_parse_etc_hosts_system();
+ test_parse_etc_hosts();
+ } else
+ test_parse_file(argv[1]);
+
+ return 0;
+}
diff --git a/src/resolve/test-resolved-packet.c b/src/resolve/test-resolved-packet.c
new file mode 100644
index 0000000..22ae26c
--- /dev/null
+++ b/src/resolve/test-resolved-packet.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "log.h"
+#include "resolved-dns-packet.h"
+
+static void test_dns_packet_new(void) {
+ size_t i;
+ _cleanup_(dns_packet_unrefp) DnsPacket *p2 = NULL;
+
+ for (i = 0; i <= DNS_PACKET_SIZE_MAX; i++) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+
+ assert_se(dns_packet_new(&p, DNS_PROTOCOL_DNS, i, DNS_PACKET_SIZE_MAX) == 0);
+
+ log_debug("dns_packet_new: %zu → %zu", i, p->allocated);
+ assert_se(p->allocated >= MIN(DNS_PACKET_SIZE_MAX, i));
+
+ if (i > DNS_PACKET_SIZE_START + 10 && i < DNS_PACKET_SIZE_MAX - 10)
+ i = MIN(i * 2, DNS_PACKET_SIZE_MAX - 10);
+ }
+
+ assert_se(dns_packet_new(&p2, DNS_PROTOCOL_DNS, DNS_PACKET_SIZE_MAX + 1, DNS_PACKET_SIZE_MAX) == -EFBIG);
+}
+
+int main(int argc, char **argv) {
+
+ log_set_max_level(LOG_DEBUG);
+ log_parse_environment();
+ log_open();
+
+ test_dns_packet_new();
+
+ return 0;
+}
diff --git a/src/rfkill/rfkill.c b/src/rfkill/rfkill.c
new file mode 100644
index 0000000..ac21dc0
--- /dev/null
+++ b/src/rfkill/rfkill.c
@@ -0,0 +1,377 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <linux/rfkill.h>
+#include <poll.h>
+
+#include "sd-daemon.h"
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "udev-util.h"
+#include "util.h"
+#include "list.h"
+
+/* Note that any write is delayed until exit and the rfkill state will not be
+ * stored for rfkill indices that disappear after a change. */
+#define EXIT_USEC (5 * USEC_PER_SEC)
+
+typedef struct write_queue_item {
+ LIST_FIELDS(struct write_queue_item, queue);
+ int rfkill_idx;
+ char *file;
+ int state;
+} write_queue_item;
+
+typedef struct Context {
+ LIST_HEAD(write_queue_item, write_queue);
+ int rfkill_fd;
+} Context;
+
+static struct write_queue_item* write_queue_item_free(struct write_queue_item *item) {
+ if (!item)
+ return NULL;
+
+ free(item->file);
+ return mfree(item);
+}
+
+static const char* const rfkill_type_table[NUM_RFKILL_TYPES] = {
+ [RFKILL_TYPE_ALL] = "all",
+ [RFKILL_TYPE_WLAN] = "wlan",
+ [RFKILL_TYPE_BLUETOOTH] = "bluetooth",
+ [RFKILL_TYPE_UWB] = "uwb",
+ [RFKILL_TYPE_WIMAX] = "wimax",
+ [RFKILL_TYPE_WWAN] = "wwan",
+ [RFKILL_TYPE_GPS] = "gps",
+ [RFKILL_TYPE_FM] = "fm",
+ [RFKILL_TYPE_NFC] = "nfc",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(rfkill_type, int);
+
+static int find_device(
+ const struct rfkill_event *event,
+ sd_device **ret) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ _cleanup_free_ char *sysname = NULL;
+ const char *name;
+ int r;
+
+ assert(event);
+ assert(ret);
+
+ if (asprintf(&sysname, "rfkill%i", event->idx) < 0)
+ return log_oom();
+
+ r = sd_device_new_from_subsystem_sysname(&device, "rfkill", sysname);
+ if (r < 0)
+ return log_full_errno(IN_SET(r, -ENOENT, -ENXIO, -ENODEV) ? LOG_DEBUG : LOG_ERR, r,
+ "Failed to open device '%s': %m", sysname);
+
+ r = sd_device_get_sysattr_value(device, "name", &name);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "Device has no name, ignoring: %m");
+
+ log_device_debug(device, "Operating on rfkill device '%s'.", name);
+
+ *ret = TAKE_PTR(device);
+ return 0;
+}
+
+static int determine_state_file(
+ const struct rfkill_event *event,
+ char **ret) {
+
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL, *device = NULL;
+ const char *path_id, *type;
+ char *state_file;
+ int r;
+
+ assert(event);
+ assert(ret);
+
+ r = find_device(event, &d);
+ if (r < 0)
+ return r;
+
+ r = device_wait_for_initialization(d, "rfkill", &device);
+ if (r < 0)
+ return r;
+
+ assert_se(type = rfkill_type_to_string(event->type));
+
+ if (sd_device_get_property_value(device, "ID_PATH", &path_id) >= 0) {
+ _cleanup_free_ char *escaped_path_id = NULL;
+
+ escaped_path_id = cescape(path_id);
+ if (!escaped_path_id)
+ return log_oom();
+
+ state_file = strjoin("/var/lib/systemd/rfkill/", escaped_path_id, ":", type);
+ } else
+ state_file = strjoin("/var/lib/systemd/rfkill/", type);
+
+ if (!state_file)
+ return log_oom();
+
+ *ret = state_file;
+ return 0;
+}
+
+static int load_state(Context *c, const struct rfkill_event *event) {
+ _cleanup_free_ char *state_file = NULL, *value = NULL;
+ struct rfkill_event we;
+ ssize_t l;
+ int b, r;
+
+ assert(c);
+ assert(c->rfkill_fd >= 0);
+ assert(event);
+
+ if (shall_restore_state() == 0)
+ return 0;
+
+ r = determine_state_file(event, &state_file);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(state_file, &value);
+ if (r == -ENOENT) {
+ /* No state file? Then save the current state */
+
+ r = write_string_file(state_file, one_zero(event->soft), WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write state file %s: %m", state_file);
+
+ log_debug("Saved state '%s' to %s.", one_zero(event->soft), state_file);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to read state file %s: %m", state_file);
+
+ b = parse_boolean(value);
+ if (b < 0)
+ return log_error_errno(b, "Failed to parse state file %s: %m", state_file);
+
+ we = (struct rfkill_event) {
+ .op = RFKILL_OP_CHANGE,
+ .idx = event->idx,
+ .soft = b,
+ };
+
+ l = write(c->rfkill_fd, &we, sizeof(we));
+ if (l < 0)
+ return log_error_errno(errno, "Failed to restore rfkill state for %i: %m", event->idx);
+ if (l != sizeof(we))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Couldn't write rfkill event structure, too short.");
+
+ log_debug("Loaded state '%s' from %s.", one_zero(b), state_file);
+ return 0;
+}
+
+static void save_state_queue_remove(Context *c, int idx, const char *state_file) {
+ struct write_queue_item *item, *tmp;
+
+ assert(c);
+
+ LIST_FOREACH_SAFE(queue, item, tmp, c->write_queue) {
+ if ((state_file && streq(item->file, state_file)) || idx == item->rfkill_idx) {
+ log_debug("Canceled previous save state of '%s' to %s.", one_zero(item->state), item->file);
+ LIST_REMOVE(queue, c->write_queue, item);
+ write_queue_item_free(item);
+ }
+ }
+}
+
+static int save_state_queue(Context *c, const struct rfkill_event *event) {
+ _cleanup_free_ char *state_file = NULL;
+ struct write_queue_item *item;
+ int r;
+
+ assert(c);
+ assert(c->rfkill_fd >= 0);
+ assert(event);
+
+ r = determine_state_file(event, &state_file);
+ if (r < 0)
+ return r;
+
+ save_state_queue_remove(c, event->idx, state_file);
+
+ item = new0(struct write_queue_item, 1);
+ if (!item)
+ return -ENOMEM;
+
+ item->file = TAKE_PTR(state_file);
+ item->rfkill_idx = event->idx;
+ item->state = event->soft;
+
+ LIST_APPEND(queue, c->write_queue, item);
+
+ return 0;
+}
+
+static int save_state_cancel(Context *c, const struct rfkill_event *event) {
+ _cleanup_free_ char *state_file = NULL;
+ int r;
+
+ assert(c);
+ assert(c->rfkill_fd >= 0);
+ assert(event);
+
+ r = determine_state_file(event, &state_file);
+ save_state_queue_remove(c, event->idx, state_file);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int save_state_write_one(struct write_queue_item *item) {
+ int r;
+
+ r = write_string_file(item->file, one_zero(item->state), WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write state file %s: %m", item->file);
+
+ log_debug("Saved state '%s' to %s.", one_zero(item->state), item->file);
+ return 0;
+}
+
+static void context_save_and_clear(Context *c) {
+ struct write_queue_item *i;
+
+ assert(c);
+
+ while ((i = c->write_queue)) {
+ LIST_REMOVE(queue, c->write_queue, i);
+ (void) save_state_write_one(i);
+ write_queue_item_free(i);
+ }
+
+ safe_close(c->rfkill_fd);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_save_and_clear) Context c = { .rfkill_fd = -1 };
+ bool ready = false;
+ int r, n;
+
+ if (argc > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program requires no arguments.");
+
+ log_setup_service();
+
+ umask(0022);
+
+ r = mkdir_p("/var/lib/systemd/rfkill", 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create rfkill directory: %m");
+
+ n = sd_listen_fds(false);
+ if (n < 0)
+ return log_error_errno(n, "Failed to determine whether we got any file descriptors passed: %m");
+ if (n > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Got too many file descriptors.");
+
+ if (n == 0) {
+ c.rfkill_fd = open("/dev/rfkill", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (c.rfkill_fd < 0) {
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "Missing rfkill subsystem, or no device present, exiting.");
+ return 0;
+ }
+
+ return log_error_errno(errno, "Failed to open /dev/rfkill: %m");
+ }
+ } else {
+ c.rfkill_fd = SD_LISTEN_FDS_START;
+
+ r = fd_nonblock(c.rfkill_fd, 1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make /dev/rfkill socket non-blocking: %m");
+ }
+
+ for (;;) {
+ struct rfkill_event event;
+ const char *type;
+ ssize_t l;
+
+ l = read(c.rfkill_fd, &event, sizeof(event));
+ if (l < 0) {
+ if (errno == EAGAIN) {
+
+ if (!ready) {
+ /* Notify manager that we are
+ * now finished with
+ * processing whatever was
+ * queued */
+ (void) sd_notify(false, "READY=1");
+ ready = true;
+ }
+
+ /* Hang around for a bit, maybe there's more coming */
+
+ r = fd_wait_for_event(c.rfkill_fd, POLLIN, EXIT_USEC);
+ if (r == -EINTR)
+ continue;
+ if (r < 0)
+ return log_error_errno(r, "Failed to poll() on device: %m");
+ if (r > 0)
+ continue;
+
+ log_debug("All events read and idle, exiting.");
+ break;
+ }
+
+ log_error_errno(errno, "Failed to read from /dev/rfkill: %m");
+ }
+
+ if (l != RFKILL_EVENT_SIZE_V1)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Read event structure of invalid size.");
+
+ type = rfkill_type_to_string(event.type);
+ if (!type) {
+ log_debug("An rfkill device of unknown type %i discovered, ignoring.", event.type);
+ continue;
+ }
+
+ switch (event.op) {
+
+ case RFKILL_OP_ADD:
+ log_debug("A new rfkill device has been added with index %i and type %s.", event.idx, type);
+ (void) load_state(&c, &event);
+ break;
+
+ case RFKILL_OP_DEL:
+ log_debug("An rfkill device has been removed with index %i and type %s", event.idx, type);
+ (void) save_state_cancel(&c, &event);
+ break;
+
+ case RFKILL_OP_CHANGE:
+ log_debug("An rfkill device has changed state with index %i and type %s", event.idx, type);
+ (void) save_state_queue(&c, &event);
+ break;
+
+ default:
+ log_debug("Unknown event %i from /dev/rfkill for index %i and type %s, ignoring.", event.op, event.idx, type);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/run-generator/run-generator.c b/src/run-generator/run-generator.c
new file mode 100644
index 0000000..a5dfac0
--- /dev/null
+++ b/src/run-generator/run-generator.c
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "generator.h"
+#include "mkdir.h"
+#include "proc-cmdline.h"
+#include "specifier.h"
+#include "strv.h"
+
+static const char *arg_dest = NULL;
+static char **arg_commands = NULL;
+static char *arg_success_action = NULL;
+static char *arg_failure_action = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_commands, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_success_action, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_failure_action, freep);
+
+static int parse(const char *key, const char *value, void *data) {
+ int r;
+
+ if (proc_cmdline_key_streq(key, "systemd.run")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = strv_extend(&arg_commands, value);
+ if (r < 0)
+ return log_oom();
+
+ } else if (proc_cmdline_key_streq(key, "systemd.run_success_action")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_success_action, value) < 0)
+ return log_oom();
+
+ } else if (proc_cmdline_key_streq(key, "systemd.run_failure_action")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_failure_action, value) < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+static int generate(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+ char **c;
+ int r;
+
+ if (strv_isempty(arg_commands) && !arg_success_action)
+ return 0;
+
+ p = strjoina(arg_dest, "/kernel-command-line.service");
+ f = fopen(p, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", p);
+
+ fputs("# Automatically generated by systemd-run-generator\n\n"
+ "[Unit]\n"
+ "Description=Command from Kernel Command Line\n"
+ "Documentation=man:systemd-run-generator(8)\n"
+ "SourcePath=/proc/cmdline\n", f);
+
+ if (!streq_ptr(arg_success_action, "none"))
+ fprintf(f, "SuccessAction=%s\n",
+ arg_success_action ?: "exit");
+
+ if (!streq_ptr(arg_failure_action, "none"))
+ fprintf(f, "FailureAction=%s\n",
+ arg_failure_action ?: "exit");
+
+ fputs("\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "StandardOutput=journal+console\n", f);
+
+ STRV_FOREACH(c, arg_commands) {
+ _cleanup_free_ char *a = NULL;
+
+ a = specifier_escape(*c);
+ if (!a)
+ return log_oom();
+
+ fprintf(f, "ExecStart=%s\n", a);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", p);
+
+ /* Let's create a a target we can link "default.target" to */
+ p = strjoina(arg_dest, "/kernel-command-line.target");
+ r = write_string_file(
+ p,
+ "# Automatically generated by systemd-run-generator\n\n"
+ "[Unit]\n"
+ "Description=Command from Kernel Command Line\n"
+ "Documentation=man:systemd-run-generator(8)\n"
+ "SourcePath=/proc/cmdline\n"
+ "Requires=kernel-command-line.service\n"
+ "After=kernel-command-line.service\n",
+ WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_NOFOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create unit file %s: %m", p);
+
+ /* And now redirect default.target to our new target */
+ p = strjoina(arg_dest, "/default.target");
+ if (symlink("kernel-command-line.target", p) < 0)
+ return log_error_errno(errno, "Failed to link unit file kernel-command-line.target → %s: %m", p);
+
+ return 0;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r;
+
+ assert_se(arg_dest = dest);
+
+ r = proc_cmdline_parse(parse, NULL, PROC_CMDLINE_RD_STRICT|PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ return generate();
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/run/run.c b/src/run/run.c
new file mode 100644
index 0000000..e9e3128
--- /dev/null
+++ b/src/run/run.c
@@ -0,0 +1,1655 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <stdio.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "calendarspec.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "ptyfwd.h"
+#include "signal-util.h"
+#include "spawn-polkit-agent.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit-def.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+static bool arg_ask_password = true;
+static bool arg_scope = false;
+static bool arg_remain_after_exit = false;
+static bool arg_no_block = false;
+static bool arg_wait = false;
+static const char *arg_unit = NULL;
+static const char *arg_description = NULL;
+static const char *arg_slice = NULL;
+static bool arg_send_sighup = false;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static bool arg_user = false;
+static const char *arg_service_type = NULL;
+static const char *arg_exec_user = NULL;
+static const char *arg_exec_group = NULL;
+static int arg_nice = 0;
+static bool arg_nice_set = false;
+static char **arg_environment = NULL;
+static char **arg_property = NULL;
+static enum {
+ ARG_STDIO_NONE, /* The default, as it is for normal services, stdin connected to /dev/null, and stdout+stderr to the journal */
+ ARG_STDIO_PTY, /* Interactive behaviour, requested by --pty: we allocate a pty and connect it to the TTY we are invoked from */
+ ARG_STDIO_DIRECT, /* Directly pass our stdin/stdout/stderr to the activated service, useful for usage in shell pipelines, requested by --pipe */
+ ARG_STDIO_AUTO, /* If --pipe and --pty are used together we use --pty when invoked on a TTY, and --pipe otherwise */
+} arg_stdio = ARG_STDIO_NONE;
+static char **arg_path_property = NULL;
+static char **arg_socket_property = NULL;
+static char **arg_timer_property = NULL;
+static bool with_timer = false;
+static bool arg_quiet = false;
+static bool arg_aggressive_gc = false;
+static char *arg_working_directory = NULL;
+static bool arg_shell = false;
+static char **arg_cmdline = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_environment, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_path_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_socket_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_timer_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_working_directory, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_cmdline, strv_freep);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-run", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {COMMAND} [ARGS...]\n\n"
+ "Run the specified command in a transient scope or service.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-ask-password Do not prompt for password\n"
+ " --user Run as user unit\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --scope Run this as scope rather than service\n"
+ " --unit=UNIT Run under the specified unit name\n"
+ " -p --property=NAME=VALUE Set service or scope unit property\n"
+ " --description=TEXT Description for unit\n"
+ " --slice=SLICE Run in the specified slice\n"
+ " --no-block Do not wait until operation finished\n"
+ " -r --remain-after-exit Leave service around until explicitly stopped\n"
+ " --wait Wait until service stopped again\n"
+ " --send-sighup Send SIGHUP when terminating\n"
+ " --service-type=TYPE Service type\n"
+ " --uid=USER Run as system user\n"
+ " --gid=GROUP Run as system group\n"
+ " --nice=NICE Nice level\n"
+ " --working-directory=PATH Set working directory\n"
+ " -d --same-dir Inherit working directory from caller\n"
+ " -E --setenv=NAME=VALUE Set environment\n"
+ " -t --pty Run service on pseudo TTY as STDIN/STDOUT/\n"
+ " STDERR\n"
+ " -P --pipe Pass STDIN/STDOUT/STDERR directly to service\n"
+ " -q --quiet Suppress information messages during runtime\n"
+ " -G --collect Unload unit after it ran, even when failed\n"
+ " -S --shell Invoke a $SHELL interactively\n\n"
+ "Path options:\n"
+ " --path-property=NAME=VALUE Set path unit property\n\n"
+ "Socket options:\n"
+ " --socket-property=NAME=VALUE Set socket unit property\n\n"
+ "Timer options:\n"
+ " --on-active=SECONDS Run after SECONDS delay\n"
+ " --on-boot=SECONDS Run SECONDS after machine was booted up\n"
+ " --on-startup=SECONDS Run SECONDS after systemd activation\n"
+ " --on-unit-active=SECONDS Run SECONDS after the last activation\n"
+ " --on-unit-inactive=SECONDS Run SECONDS after the last deactivation\n"
+ " --on-calendar=SPEC Realtime timer\n"
+ " --timer-property=NAME=VALUE Set timer unit property\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int add_timer_property(const char *name, const char *val) {
+ char *p;
+
+ assert(name);
+ assert(val);
+
+ p = strjoin(name, "=", val);
+ if (!p)
+ return log_oom();
+
+ if (strv_consume(&arg_timer_property, p) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_USER,
+ ARG_SYSTEM,
+ ARG_SCOPE,
+ ARG_UNIT,
+ ARG_DESCRIPTION,
+ ARG_SLICE,
+ ARG_SEND_SIGHUP,
+ ARG_SERVICE_TYPE,
+ ARG_EXEC_USER,
+ ARG_EXEC_GROUP,
+ ARG_NICE,
+ ARG_ON_ACTIVE,
+ ARG_ON_BOOT,
+ ARG_ON_STARTUP,
+ ARG_ON_UNIT_ACTIVE,
+ ARG_ON_UNIT_INACTIVE,
+ ARG_ON_CALENDAR,
+ ARG_TIMER_PROPERTY,
+ ARG_PATH_PROPERTY,
+ ARG_SOCKET_PROPERTY,
+ ARG_NO_BLOCK,
+ ARG_NO_ASK_PASSWORD,
+ ARG_WAIT,
+ ARG_WORKING_DIRECTORY,
+ ARG_SHELL,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "user", no_argument, NULL, ARG_USER },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "scope", no_argument, NULL, ARG_SCOPE },
+ { "unit", required_argument, NULL, ARG_UNIT },
+ { "description", required_argument, NULL, ARG_DESCRIPTION },
+ { "slice", required_argument, NULL, ARG_SLICE },
+ { "remain-after-exit", no_argument, NULL, 'r' },
+ { "send-sighup", no_argument, NULL, ARG_SEND_SIGHUP },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "service-type", required_argument, NULL, ARG_SERVICE_TYPE },
+ { "wait", no_argument, NULL, ARG_WAIT },
+ { "uid", required_argument, NULL, ARG_EXEC_USER },
+ { "gid", required_argument, NULL, ARG_EXEC_GROUP },
+ { "nice", required_argument, NULL, ARG_NICE },
+ { "setenv", required_argument, NULL, 'E' },
+ { "property", required_argument, NULL, 'p' },
+ { "tty", no_argument, NULL, 't' }, /* deprecated alias */
+ { "pty", no_argument, NULL, 't' },
+ { "pipe", no_argument, NULL, 'P' },
+ { "quiet", no_argument, NULL, 'q' },
+ { "on-active", required_argument, NULL, ARG_ON_ACTIVE },
+ { "on-boot", required_argument, NULL, ARG_ON_BOOT },
+ { "on-startup", required_argument, NULL, ARG_ON_STARTUP },
+ { "on-unit-active", required_argument, NULL, ARG_ON_UNIT_ACTIVE },
+ { "on-unit-inactive", required_argument, NULL, ARG_ON_UNIT_INACTIVE },
+ { "on-calendar", required_argument, NULL, ARG_ON_CALENDAR },
+ { "timer-property", required_argument, NULL, ARG_TIMER_PROPERTY },
+ { "path-property", required_argument, NULL, ARG_PATH_PROPERTY },
+ { "socket-property", required_argument, NULL, ARG_SOCKET_PROPERTY },
+ { "no-block", no_argument, NULL, ARG_NO_BLOCK },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "collect", no_argument, NULL, 'G' },
+ { "working-directory", required_argument, NULL, ARG_WORKING_DIRECTORY },
+ { "same-dir", no_argument, NULL, 'd' },
+ { "shell", no_argument, NULL, 'S' },
+ {},
+ };
+
+ bool with_trigger = false;
+ int r, c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+hrH:M:E:p:tPqGdS", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case ARG_USER:
+ arg_user = true;
+ break;
+
+ case ARG_SYSTEM:
+ arg_user = false;
+ break;
+
+ case ARG_SCOPE:
+ arg_scope = true;
+ break;
+
+ case ARG_UNIT:
+ arg_unit = optarg;
+ break;
+
+ case ARG_DESCRIPTION:
+ arg_description = optarg;
+ break;
+
+ case ARG_SLICE:
+ arg_slice = optarg;
+ break;
+
+ case ARG_SEND_SIGHUP:
+ arg_send_sighup = true;
+ break;
+
+ case 'r':
+ arg_remain_after_exit = true;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_SERVICE_TYPE:
+ arg_service_type = optarg;
+ break;
+
+ case ARG_EXEC_USER:
+ arg_exec_user = optarg;
+ break;
+
+ case ARG_EXEC_GROUP:
+ arg_exec_group = optarg;
+ break;
+
+ case ARG_NICE:
+ r = parse_nice(optarg, &arg_nice);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse nice value: %s", optarg);
+
+ arg_nice_set = true;
+ break;
+
+ case 'E':
+ if (strv_extend(&arg_environment, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case 'p':
+ if (strv_extend(&arg_property, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case 't': /* --pty */
+ if (IN_SET(arg_stdio, ARG_STDIO_DIRECT, ARG_STDIO_AUTO)) /* if --pipe is already used, upgrade to auto mode */
+ arg_stdio = ARG_STDIO_AUTO;
+ else
+ arg_stdio = ARG_STDIO_PTY;
+ break;
+
+ case 'P': /* --pipe */
+ if (IN_SET(arg_stdio, ARG_STDIO_PTY, ARG_STDIO_AUTO)) /* If --pty is already used, upgrade to auto mode */
+ arg_stdio = ARG_STDIO_AUTO;
+ else
+ arg_stdio = ARG_STDIO_DIRECT;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case ARG_ON_ACTIVE:
+ r = add_timer_property("OnActiveSec", optarg);
+ if (r < 0)
+ return r;
+
+ with_timer = true;
+ break;
+
+ case ARG_ON_BOOT:
+ r = add_timer_property("OnBootSec", optarg);
+ if (r < 0)
+ return r;
+
+ with_timer = true;
+ break;
+
+ case ARG_ON_STARTUP:
+ r = add_timer_property("OnStartupSec", optarg);
+ if (r < 0)
+ return r;
+
+ with_timer = true;
+ break;
+
+ case ARG_ON_UNIT_ACTIVE:
+ r = add_timer_property("OnUnitActiveSec", optarg);
+ if (r < 0)
+ return r;
+
+ with_timer = true;
+ break;
+
+ case ARG_ON_UNIT_INACTIVE:
+ r = add_timer_property("OnUnitInactiveSec", optarg);
+ if (r < 0)
+ return r;
+
+ with_timer = true;
+ break;
+
+ case ARG_ON_CALENDAR:
+ r = add_timer_property("OnCalendar", optarg);
+ if (r < 0)
+ return r;
+
+ with_timer = true;
+ break;
+
+ case ARG_TIMER_PROPERTY:
+
+ if (strv_extend(&arg_timer_property, optarg) < 0)
+ return log_oom();
+
+ with_timer = with_timer ||
+ STARTSWITH_SET(optarg,
+ "OnActiveSec=",
+ "OnBootSec=",
+ "OnStartupSec=",
+ "OnUnitActiveSec=",
+ "OnUnitInactiveSec=",
+ "OnCalendar=");
+ break;
+
+ case ARG_PATH_PROPERTY:
+
+ if (strv_extend(&arg_path_property, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_SOCKET_PROPERTY:
+
+ if (strv_extend(&arg_socket_property, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_NO_BLOCK:
+ arg_no_block = true;
+ break;
+
+ case ARG_WAIT:
+ arg_wait = true;
+ break;
+
+ case ARG_WORKING_DIRECTORY:
+ r = parse_path_argument_and_warn(optarg, true, &arg_working_directory);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case 'd': {
+ _cleanup_free_ char *p = NULL;
+
+ r = safe_getcwd(&p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get current working directory: %m");
+
+ if (empty_or_root(p))
+ arg_working_directory = mfree(arg_working_directory);
+ else
+ free_and_replace(arg_working_directory, p);
+ break;
+ }
+
+ case 'G':
+ arg_aggressive_gc = true;
+ break;
+
+ case 'S':
+ arg_shell = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ with_trigger = !!arg_path_property || !!arg_socket_property || with_timer;
+
+ /* currently, only single trigger (path, socket, timer) unit can be created simultaneously */
+ if ((int) !!arg_path_property + (int) !!arg_socket_property + (int) with_timer > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Only single trigger (path, socket, timer) unit can be created.");
+
+ if (arg_shell) {
+ /* If --shell is imply --pty --pipe --same-dir --service-type=exec --wait --collect, unless otherwise
+ * specified. */
+
+ if (!arg_scope) {
+ if (arg_stdio == ARG_STDIO_NONE)
+ arg_stdio = ARG_STDIO_AUTO;
+
+ if (!arg_working_directory) {
+ r = safe_getcwd(&arg_working_directory);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get current working directory: %m");
+ }
+
+ if (!arg_service_type) {
+ arg_service_type = strdup("exec");
+ if (!arg_service_type)
+ return log_oom();
+ }
+
+ arg_wait = true;
+ }
+
+ arg_aggressive_gc = true;
+ }
+
+ if (arg_stdio == ARG_STDIO_AUTO) {
+ /* If we both --pty and --pipe are specified we'll automatically pick --pty if we are connected fully
+ * to a TTY and pick direct fd passing otherwise. This way, we automatically adapt to usage in a shell
+ * pipeline, but we are neatly interactive with tty-level isolation otherwise. */
+ arg_stdio = isatty(STDIN_FILENO) && isatty(STDOUT_FILENO) && isatty(STDERR_FILENO) ?
+ ARG_STDIO_PTY :
+ ARG_STDIO_DIRECT;
+ }
+
+ if (argc > optind) {
+ char **l;
+
+ if (arg_shell)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "If --shell is used, no command line is expected.");
+
+ l = strv_copy(argv + optind);
+ if (!l)
+ return log_oom();
+
+ strv_free_and_replace(arg_cmdline, l);
+
+ } else if (arg_shell) {
+ _cleanup_free_ char *s = NULL;
+ char **l;
+
+ r = get_shell(&s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine shell: %m");
+
+ l = strv_new(s);
+ if (!l)
+ return log_oom();
+
+ strv_free_and_replace(arg_cmdline, l);
+
+ } else if (!arg_unit || !with_trigger)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Command line to execute required.");
+
+ if (arg_user && arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Execution in user context is not supported on non-local systems.");
+
+ if (arg_scope && arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Scope execution is not supported on non-local systems.");
+
+ if (arg_scope && (arg_remain_after_exit || arg_service_type))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--remain-after-exit and --service-type= are not supported in --scope mode.");
+
+ if (arg_stdio != ARG_STDIO_NONE && (with_trigger || arg_scope))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--pty/--pipe is not compatible in timer or --scope mode.");
+
+ if (arg_stdio != ARG_STDIO_NONE && arg_transport == BUS_TRANSPORT_REMOTE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--pty/--pipe is only supported when connecting to the local system or containers.");
+
+ if (arg_stdio != ARG_STDIO_NONE && arg_no_block)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--pty/--pipe is not compatible with --no-block.");
+
+ if (arg_scope && with_trigger)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path, socket or timer options are not supported in --scope mode.");
+
+ if (arg_timer_property && !with_timer)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--timer-property= has no effect without any other timer options.");
+
+ if (arg_wait) {
+ if (arg_no_block)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--wait may not be combined with --no-block.");
+
+ if (with_trigger)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--wait may not be combined with path, socket or timer operations.");
+
+ if (arg_scope)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--wait may not be combined with --scope.");
+ }
+
+ return 1;
+}
+
+static int transient_unit_set_properties(sd_bus_message *m, UnitType t, char **properties) {
+ int r;
+
+ r = sd_bus_message_append(m, "(sv)", "Description", "s", arg_description);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (arg_aggressive_gc) {
+ r = sd_bus_message_append(m, "(sv)", "CollectMode", "s", "inactive-or-failed");
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = bus_append_unit_property_assignment_many(m, t, properties);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int transient_cgroup_set_properties(sd_bus_message *m) {
+ int r;
+ assert(m);
+
+ if (!isempty(arg_slice)) {
+ _cleanup_free_ char *slice = NULL;
+
+ r = unit_name_mangle_with_suffix(arg_slice, arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, ".slice", &slice);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle name '%s': %m", arg_slice);
+
+ r = sd_bus_message_append(m, "(sv)", "Slice", "s", slice);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ return 0;
+}
+
+static int transient_kill_set_properties(sd_bus_message *m) {
+ int r;
+
+ assert(m);
+
+ if (arg_send_sighup) {
+ r = sd_bus_message_append(m, "(sv)", "SendSIGHUP", "b", arg_send_sighup);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ return 0;
+}
+
+static int transient_service_set_properties(sd_bus_message *m, const char *pty_path) {
+ bool send_term = false;
+ int r;
+
+ assert(m);
+
+ r = transient_unit_set_properties(m, UNIT_SERVICE, arg_property);
+ if (r < 0)
+ return r;
+
+ r = transient_kill_set_properties(m);
+ if (r < 0)
+ return r;
+
+ r = transient_cgroup_set_properties(m);
+ if (r < 0)
+ return r;
+
+ if (arg_wait || arg_stdio != ARG_STDIO_NONE) {
+ r = sd_bus_message_append(m, "(sv)", "AddRef", "b", 1);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (arg_remain_after_exit) {
+ r = sd_bus_message_append(m, "(sv)", "RemainAfterExit", "b", arg_remain_after_exit);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (arg_service_type) {
+ r = sd_bus_message_append(m, "(sv)", "Type", "s", arg_service_type);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (arg_exec_user) {
+ r = sd_bus_message_append(m, "(sv)", "User", "s", arg_exec_user);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (arg_exec_group) {
+ r = sd_bus_message_append(m, "(sv)", "Group", "s", arg_exec_group);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (arg_nice_set) {
+ r = sd_bus_message_append(m, "(sv)", "Nice", "i", arg_nice);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (arg_working_directory) {
+ r = sd_bus_message_append(m, "(sv)", "WorkingDirectory", "s", arg_working_directory);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (pty_path) {
+ r = sd_bus_message_append(m,
+ "(sv)(sv)(sv)(sv)",
+ "StandardInput", "s", "tty",
+ "StandardOutput", "s", "tty",
+ "StandardError", "s", "tty",
+ "TTYPath", "s", pty_path);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ send_term = true;
+
+ } else if (arg_stdio == ARG_STDIO_DIRECT) {
+ r = sd_bus_message_append(m,
+ "(sv)(sv)(sv)",
+ "StandardInputFileDescriptor", "h", STDIN_FILENO,
+ "StandardOutputFileDescriptor", "h", STDOUT_FILENO,
+ "StandardErrorFileDescriptor", "h", STDERR_FILENO);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ send_term = isatty(STDIN_FILENO) || isatty(STDOUT_FILENO) || isatty(STDERR_FILENO);
+ }
+
+ if (send_term) {
+ const char *e;
+
+ e = getenv("TERM");
+ if (e) {
+ char *n;
+
+ n = strjoina("TERM=", e);
+ r = sd_bus_message_append(m,
+ "(sv)",
+ "Environment", "as", 1, n);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+ }
+
+ if (!strv_isempty(arg_environment)) {
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", "Environment");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "as");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, arg_environment);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ /* Exec container */
+ if (!strv_isempty(arg_cmdline)) {
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", "ExecStart");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(sasb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sasb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "sasb");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", arg_cmdline[0]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, arg_cmdline);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "b", false);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ return 0;
+}
+
+static int transient_scope_set_properties(sd_bus_message *m) {
+ int r;
+
+ assert(m);
+
+ r = transient_unit_set_properties(m, UNIT_SCOPE, arg_property);
+ if (r < 0)
+ return r;
+
+ r = transient_kill_set_properties(m);
+ if (r < 0)
+ return r;
+
+ r = transient_cgroup_set_properties(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "(sv)", "PIDs", "au", 1, (uint32_t) getpid_cached());
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 0;
+}
+
+static int transient_timer_set_properties(sd_bus_message *m) {
+ int r;
+
+ assert(m);
+
+ r = transient_unit_set_properties(m, UNIT_TIMER, arg_timer_property);
+ if (r < 0)
+ return r;
+
+ /* Automatically clean up our transient timers */
+ r = sd_bus_message_append(m, "(sv)", "RemainAfterElapse", "b", false);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 0;
+}
+
+static int make_unit_name(sd_bus *bus, UnitType t, char **ret) {
+ const char *unique, *id;
+ char *p;
+ int r;
+
+ assert(bus);
+ assert(t >= 0);
+ assert(t < _UNIT_TYPE_MAX);
+
+ r = sd_bus_get_unique_name(bus, &unique);
+ if (r < 0) {
+ sd_id128_t rnd;
+
+ /* We couldn't get the unique name, which is a pretty
+ * common case if we are connected to systemd
+ * directly. In that case, just pick a random uuid as
+ * name */
+
+ r = sd_id128_randomize(&rnd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate random run unit name: %m");
+
+ if (asprintf(ret, "run-r" SD_ID128_FORMAT_STR ".%s", SD_ID128_FORMAT_VAL(rnd), unit_type_to_string(t)) < 0)
+ return log_oom();
+
+ return 0;
+ }
+
+ /* We managed to get the unique name, then let's use that to
+ * name our transient units. */
+
+ id = startswith(unique, ":1.");
+ if (!id)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unique name %s has unexpected format.",
+ unique);
+
+ p = strjoin("run-u", id, ".", unit_type_to_string(t));
+ if (!p)
+ return log_oom();
+
+ *ret = p;
+ return 0;
+}
+
+typedef struct RunContext {
+ sd_bus *bus;
+ sd_event *event;
+ PTYForward *forward;
+ sd_bus_slot *match;
+
+ /* The exit data of the unit */
+ char *active_state;
+ uint64_t inactive_exit_usec;
+ uint64_t inactive_enter_usec;
+ char *result;
+ uint64_t cpu_usage_nsec;
+ uint64_t ip_ingress_bytes;
+ uint64_t ip_egress_bytes;
+ uint32_t exit_code;
+ uint32_t exit_status;
+} RunContext;
+
+static void run_context_free(RunContext *c) {
+ assert(c);
+
+ c->forward = pty_forward_free(c->forward);
+ c->match = sd_bus_slot_unref(c->match);
+ c->bus = sd_bus_unref(c->bus);
+ c->event = sd_event_unref(c->event);
+
+ free(c->active_state);
+ free(c->result);
+}
+
+static void run_context_check_done(RunContext *c) {
+ bool done;
+
+ assert(c);
+
+ if (c->match)
+ done = STRPTR_IN_SET(c->active_state, "inactive", "failed");
+ else
+ done = true;
+
+ if (c->forward && done) /* If the service is gone, it's time to drain the output */
+ done = pty_forward_drain(c->forward);
+
+ if (done)
+ sd_event_exit(c->event, EXIT_SUCCESS);
+}
+
+static int run_context_update(RunContext *c, const char *path) {
+
+ static const struct bus_properties_map map[] = {
+ { "ActiveState", "s", NULL, offsetof(RunContext, active_state) },
+ { "InactiveExitTimestampMonotonic", "t", NULL, offsetof(RunContext, inactive_exit_usec) },
+ { "InactiveEnterTimestampMonotonic", "t", NULL, offsetof(RunContext, inactive_enter_usec) },
+ { "Result", "s", NULL, offsetof(RunContext, result) },
+ { "ExecMainCode", "i", NULL, offsetof(RunContext, exit_code) },
+ { "ExecMainStatus", "i", NULL, offsetof(RunContext, exit_status) },
+ { "CPUUsageNSec", "t", NULL, offsetof(RunContext, cpu_usage_nsec) },
+ { "IPIngressBytes", "t", NULL, offsetof(RunContext, ip_ingress_bytes) },
+ { "IPEgressBytes", "t", NULL, offsetof(RunContext, ip_egress_bytes) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ r = bus_map_all_properties(c->bus,
+ "org.freedesktop.systemd1",
+ path,
+ map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ c);
+ if (r < 0) {
+ sd_event_exit(c->event, EXIT_FAILURE);
+ return log_error_errno(r, "Failed to query unit state: %s", bus_error_message(&error, r));
+ }
+
+ run_context_check_done(c);
+ return 0;
+}
+
+static int on_properties_changed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ RunContext *c = userdata;
+
+ assert(m);
+ assert(c);
+
+ return run_context_update(c, sd_bus_message_get_path(m));
+}
+
+static int pty_forward_handler(PTYForward *f, int rcode, void *userdata) {
+ RunContext *c = userdata;
+
+ assert(f);
+
+ if (rcode < 0) {
+ sd_event_exit(c->event, EXIT_FAILURE);
+ return log_error_errno(rcode, "Error on PTY forwarding logic: %m");
+ }
+
+ run_context_check_done(c);
+ return 0;
+}
+
+static int start_transient_service(
+ sd_bus *bus,
+ int *retval) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_free_ char *service = NULL, *pty_path = NULL;
+ _cleanup_close_ int master = -1;
+ int r;
+
+ assert(bus);
+ assert(retval);
+
+ if (arg_stdio == ARG_STDIO_PTY) {
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL) {
+ master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (master < 0)
+ return log_error_errno(errno, "Failed to acquire pseudo tty: %m");
+
+ r = ptsname_malloc(master, &pty_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine tty name: %m");
+
+ if (unlockpt(master) < 0)
+ return log_error_errno(errno, "Failed to unlock tty: %m");
+
+ } else if (arg_transport == BUS_TRANSPORT_MACHINE) {
+ _cleanup_(sd_bus_unrefp) sd_bus *system_bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *pty_reply = NULL;
+ const char *s;
+
+ r = sd_bus_default_system(&system_bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_call_method(system_bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "OpenMachinePTY",
+ &error,
+ &pty_reply,
+ "s", arg_host);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get machine PTY: %s", bus_error_message(&error, -r));
+
+ r = sd_bus_message_read(pty_reply, "hs", &master, &s);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ master = fcntl(master, F_DUPFD_CLOEXEC, 3);
+ if (master < 0)
+ return log_error_errno(errno, "Failed to duplicate master fd: %m");
+
+ pty_path = strdup(s);
+ if (!pty_path)
+ return log_oom();
+ } else
+ assert_not_reached("Can't allocate tty via ssh");
+ }
+
+ if (!arg_no_block) {
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch jobs: %m");
+ }
+
+ if (arg_unit) {
+ r = unit_name_mangle_with_suffix(arg_unit, arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, ".service", &service);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+ } else {
+ r = make_unit_name(bus, UNIT_SERVICE, &service);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and mode */
+ r = sd_bus_message_append(m, "ss", service, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = transient_service_set_properties(m, pty_path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Auxiliary units */
+ r = sd_bus_message_append(m, "a(sa(sv))", 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start transient service unit: %s", bus_error_message(&error, r));
+
+ if (w) {
+ const char *object;
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, arg_quiet);
+ if (r < 0)
+ return r;
+ }
+
+ if (!arg_quiet)
+ log_info("Running as unit: %s", service);
+
+ if (arg_wait || arg_stdio != ARG_STDIO_NONE) {
+ _cleanup_(run_context_free) RunContext c = {
+ .cpu_usage_nsec = NSEC_INFINITY,
+ .ip_ingress_bytes = UINT64_MAX,
+ .ip_egress_bytes = UINT64_MAX,
+ .inactive_exit_usec = USEC_INFINITY,
+ .inactive_enter_usec = USEC_INFINITY,
+ };
+ _cleanup_free_ char *path = NULL;
+
+ c.bus = sd_bus_ref(bus);
+
+ r = sd_event_default(&c.event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get event loop: %m");
+
+ if (master >= 0) {
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGWINCH, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(c.event, NULL, SIGINT, NULL, NULL);
+ (void) sd_event_add_signal(c.event, NULL, SIGTERM, NULL, NULL);
+
+ if (!arg_quiet)
+ log_info("Press ^] three times within 1s to disconnect TTY.");
+
+ r = pty_forward_new(c.event, master, PTY_FORWARD_IGNORE_INITIAL_VHANGUP, &c.forward);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create PTY forwarder: %m");
+
+ pty_forward_set_handler(c.forward, pty_forward_handler, &c);
+
+ /* Make sure to process any TTY events before we process bus events */
+ (void) pty_forward_set_priority(c.forward, SD_EVENT_PRIORITY_IMPORTANT);
+ }
+
+ path = unit_dbus_path_from_name(service);
+ if (!path)
+ return log_oom();
+
+ r = sd_bus_match_signal_async(
+ bus,
+ &c.match,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.DBus.Properties",
+ "PropertiesChanged",
+ on_properties_changed, NULL, &c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request properties changed signal match: %m");
+
+ r = sd_bus_attach_event(bus, c.event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ r = run_context_update(&c, path);
+ if (r < 0)
+ return r;
+
+ r = sd_event_loop(c.event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ if (c.forward) {
+ char last_char = 0;
+
+ r = pty_forward_get_last_char(c.forward, &last_char);
+ if (r >= 0 && !arg_quiet && last_char != '\n')
+ fputc('\n', stdout);
+ }
+
+ if (arg_wait && !arg_quiet) {
+
+ /* Explicitly destroy the PTY forwarder, so that the PTY device is usable again, in its
+ * original settings (i.e. proper line breaks), so that we can show the summary in a pretty
+ * way. */
+ c.forward = pty_forward_free(c.forward);
+
+ if (!isempty(c.result))
+ log_info("Finished with result: %s", strna(c.result));
+
+ if (c.exit_code == CLD_EXITED)
+ log_info("Main processes terminated with: code=%s/status=%i", sigchld_code_to_string(c.exit_code), c.exit_status);
+ else if (c.exit_code > 0)
+ log_info("Main processes terminated with: code=%s/status=%s", sigchld_code_to_string(c.exit_code), signal_to_string(c.exit_status));
+
+ if (c.inactive_enter_usec > 0 && c.inactive_enter_usec != USEC_INFINITY &&
+ c.inactive_exit_usec > 0 && c.inactive_exit_usec != USEC_INFINITY &&
+ c.inactive_enter_usec > c.inactive_exit_usec) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ log_info("Service runtime: %s", format_timespan(ts, sizeof(ts), c.inactive_enter_usec - c.inactive_exit_usec, USEC_PER_MSEC));
+ }
+
+ if (c.cpu_usage_nsec != NSEC_INFINITY) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ log_info("CPU time consumed: %s", format_timespan(ts, sizeof(ts), (c.cpu_usage_nsec + NSEC_PER_USEC - 1) / NSEC_PER_USEC, USEC_PER_MSEC));
+ }
+
+ if (c.ip_ingress_bytes != UINT64_MAX) {
+ char bytes[FORMAT_BYTES_MAX];
+ log_info("IP traffic received: %s", format_bytes(bytes, sizeof(bytes), c.ip_ingress_bytes));
+ }
+ if (c.ip_egress_bytes != UINT64_MAX) {
+ char bytes[FORMAT_BYTES_MAX];
+ log_info("IP traffic sent: %s", format_bytes(bytes, sizeof(bytes), c.ip_egress_bytes));
+ }
+ }
+
+ /* Try to propagate the service's return value */
+ if (c.result && STR_IN_SET(c.result, "success", "exit-code") && c.exit_code == CLD_EXITED)
+ *retval = c.exit_status;
+ else
+ *retval = EXIT_FAILURE;
+ }
+
+ return 0;
+}
+
+static int acquire_invocation_id(sd_bus *bus, sd_id128_t *ret) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const void *p;
+ size_t l;
+ int r;
+
+ assert(bus);
+ assert(ret);
+
+ r = sd_bus_get_property(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1/unit/self",
+ "org.freedesktop.systemd1.Unit",
+ "InvocationID",
+ &error,
+ &reply,
+ "ay");
+ if (r < 0)
+ return log_error_errno(r, "Failed to request invocation ID for scope: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read_array(reply, 'y', &p, &l);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (l != sizeof(sd_id128_t))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid UUID size, %zu != %zu.", l, sizeof(sd_id128_t));
+
+ memcpy(ret, p, l);
+ return 0;
+}
+
+static int start_transient_scope(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_strv_free_ char **env = NULL, **user_env = NULL;
+ _cleanup_free_ char *scope = NULL;
+ const char *object = NULL;
+ sd_id128_t invocation_id;
+ int r;
+
+ assert(bus);
+ assert(!strv_isempty(arg_cmdline));
+
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_oom();
+
+ if (arg_unit) {
+ r = unit_name_mangle_with_suffix(arg_unit, arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, ".scope", &scope);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle scope name: %m");
+ } else {
+ r = make_unit_name(bus, UNIT_SCOPE, &scope);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and Mode */
+ r = sd_bus_message_append(m, "ss", scope, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = transient_scope_set_properties(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Auxiliary units */
+ r = sd_bus_message_append(m, "a(sa(sv))", 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start transient scope unit: %s", bus_error_message(&error, -r));
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, arg_quiet);
+ if (r < 0)
+ return r;
+
+ r = acquire_invocation_id(bus, &invocation_id);
+ if (r < 0)
+ return r;
+
+ r = strv_extendf(&user_env, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(invocation_id));
+ if (r < 0)
+ return log_oom();
+
+ if (arg_nice_set) {
+ if (setpriority(PRIO_PROCESS, 0, arg_nice) < 0)
+ return log_error_errno(errno, "Failed to set nice level: %m");
+ }
+
+ if (arg_exec_group) {
+ gid_t gid;
+
+ r = get_group_creds(&arg_exec_group, &gid, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve group %s: %m", arg_exec_group);
+
+ if (setresgid(gid, gid, gid) < 0)
+ return log_error_errno(errno, "Failed to change GID to " GID_FMT ": %m", gid);
+ }
+
+ if (arg_exec_user) {
+ const char *home, *shell;
+ uid_t uid;
+ gid_t gid;
+
+ r = get_user_creds(&arg_exec_user, &uid, &gid, &home, &shell, USER_CREDS_CLEAN|USER_CREDS_PREFER_NSS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve user %s: %m", arg_exec_user);
+
+ if (home) {
+ r = strv_extendf(&user_env, "HOME=%s", home);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (shell) {
+ r = strv_extendf(&user_env, "SHELL=%s", shell);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = strv_extendf(&user_env, "USER=%s", arg_exec_user);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&user_env, "LOGNAME=%s", arg_exec_user);
+ if (r < 0)
+ return log_oom();
+
+ if (!arg_exec_group) {
+ if (setresgid(gid, gid, gid) < 0)
+ return log_error_errno(errno, "Failed to change GID to " GID_FMT ": %m", gid);
+ }
+
+ if (setresuid(uid, uid, uid) < 0)
+ return log_error_errno(errno, "Failed to change UID to " UID_FMT ": %m", uid);
+ }
+
+ env = strv_env_merge(3, environ, user_env, arg_environment);
+ if (!env)
+ return log_oom();
+
+ if (!arg_quiet)
+ log_info("Running scope as unit: %s", scope);
+
+ execvpe(arg_cmdline[0], arg_cmdline, env);
+
+ return log_error_errno(errno, "Failed to execute: %m");
+}
+
+static int start_transient_trigger(
+ sd_bus *bus,
+ const char *suffix) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_free_ char *trigger = NULL, *service = NULL;
+ const char *object = NULL;
+ int r;
+
+ assert(bus);
+
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_oom();
+
+ if (arg_unit) {
+ switch (unit_name_to_type(arg_unit)) {
+
+ case UNIT_SERVICE:
+ service = strdup(arg_unit);
+ if (!service)
+ return log_oom();
+
+ r = unit_name_change_suffix(service, suffix, &trigger);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change unit suffix: %m");
+ break;
+
+ case UNIT_TIMER:
+ trigger = strdup(arg_unit);
+ if (!trigger)
+ return log_oom();
+
+ r = unit_name_change_suffix(trigger, ".service", &service);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change unit suffix: %m");
+ break;
+
+ default:
+ r = unit_name_mangle_with_suffix(arg_unit, arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, ".service", &service);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ r = unit_name_mangle_with_suffix(arg_unit, arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, suffix, &trigger);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ break;
+ }
+ } else {
+ r = make_unit_name(bus, UNIT_SERVICE, &service);
+ if (r < 0)
+ return r;
+
+ r = unit_name_change_suffix(service, suffix, &trigger);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change unit suffix: %m");
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and Mode */
+ r = sd_bus_message_append(m, "ss", trigger, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (streq(suffix, ".path"))
+ r = transient_unit_set_properties(m, UNIT_PATH, arg_path_property);
+ else if (streq(suffix, ".socket"))
+ r = transient_unit_set_properties(m, UNIT_SOCKET, arg_socket_property);
+ else if (streq(suffix, ".timer"))
+ r = transient_timer_set_properties(m);
+ else
+ assert_not_reached("Invalid suffix");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sa(sv))");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (!strv_isempty(arg_cmdline)) {
+ r = sd_bus_message_open_container(m, 'r', "sa(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", service);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = transient_service_set_properties(m, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start transient %s unit: %s", suffix + 1, bus_error_message(&error, -r));
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, arg_quiet);
+ if (r < 0)
+ return r;
+
+ if (!arg_quiet) {
+ log_info("Running %s as unit: %s", suffix + 1, trigger);
+ if (!strv_isempty(arg_cmdline))
+ log_info("Will run service as unit: %s", service);
+ }
+
+ return 0;
+}
+
+static int run(int argc, char* argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *description = NULL;
+ int r, retval = EXIT_SUCCESS;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (!strv_isempty(arg_cmdline) && arg_transport == BUS_TRANSPORT_LOCAL) {
+ _cleanup_free_ char *command = NULL;
+
+ /* Patch in an absolute path */
+
+ r = find_binary(arg_cmdline[0], &command);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find executable %s: %m", arg_cmdline[0]);
+
+ free_and_replace(arg_cmdline[0], command);
+ }
+
+ if (!arg_description) {
+ description = strv_join(arg_cmdline, " ");
+ if (!description)
+ return log_oom();
+
+ if (arg_unit && isempty(description)) {
+ r = free_and_strdup(&description, arg_unit);
+ if (r < 0)
+ return log_oom();
+ }
+
+ arg_description = description;
+ }
+
+ /* If --wait is used connect via the bus, unconditionally, as ref/unref is not supported via the limited direct
+ * connection */
+ if (arg_wait || arg_stdio != ARG_STDIO_NONE)
+ r = bus_connect_transport(arg_transport, arg_host, arg_user, &bus);
+ else
+ r = bus_connect_transport_systemd(arg_transport, arg_host, arg_user, &bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ if (arg_scope)
+ r = start_transient_scope(bus);
+ else if (arg_path_property)
+ r = start_transient_trigger(bus, ".path");
+ else if (arg_socket_property)
+ r = start_transient_trigger(bus, ".socket");
+ else if (with_timer)
+ r = start_transient_trigger(bus, ".timer");
+ else
+ r = start_transient_service(bus, &retval);
+ if (r < 0)
+ return r;
+
+ return retval;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/shared/acl-util.c b/src/shared/acl-util.c
new file mode 100644
index 0000000..9633514
--- /dev/null
+++ b/src/shared/acl-util.c
@@ -0,0 +1,406 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdbool.h>
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *entry) {
+ acl_entry_t i;
+ int r;
+
+ assert(acl);
+ assert(entry);
+
+ for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) {
+
+ acl_tag_t tag;
+ uid_t *u;
+ bool b;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag != ACL_USER)
+ continue;
+
+ u = acl_get_qualifier(i);
+ if (!u)
+ return -errno;
+
+ b = *u == uid;
+ acl_free(u);
+
+ if (b) {
+ *entry = i;
+ return 1;
+ }
+ }
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int calc_acl_mask_if_needed(acl_t *acl_p) {
+ acl_entry_t i;
+ int r;
+ bool need = false;
+
+ assert(acl_p);
+
+ for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(*acl_p, ACL_NEXT_ENTRY, &i)) {
+ acl_tag_t tag;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag == ACL_MASK)
+ return 0;
+
+ if (IN_SET(tag, ACL_USER, ACL_GROUP))
+ need = true;
+ }
+ if (r < 0)
+ return -errno;
+
+ if (need && acl_calc_mask(acl_p) < 0)
+ return -errno;
+
+ return need;
+}
+
+int add_base_acls_if_needed(acl_t *acl_p, const char *path) {
+ acl_entry_t i;
+ int r;
+ bool have_user_obj = false, have_group_obj = false, have_other = false;
+ struct stat st;
+ _cleanup_(acl_freep) acl_t basic = NULL;
+
+ assert(acl_p);
+
+ for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(*acl_p, ACL_NEXT_ENTRY, &i)) {
+ acl_tag_t tag;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag == ACL_USER_OBJ)
+ have_user_obj = true;
+ else if (tag == ACL_GROUP_OBJ)
+ have_group_obj = true;
+ else if (tag == ACL_OTHER)
+ have_other = true;
+ if (have_user_obj && have_group_obj && have_other)
+ return 0;
+ }
+ if (r < 0)
+ return -errno;
+
+ r = stat(path, &st);
+ if (r < 0)
+ return -errno;
+
+ basic = acl_from_mode(st.st_mode);
+ if (!basic)
+ return -errno;
+
+ for (r = acl_get_entry(basic, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(basic, ACL_NEXT_ENTRY, &i)) {
+ acl_tag_t tag;
+ acl_entry_t dst;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if ((tag == ACL_USER_OBJ && have_user_obj) ||
+ (tag == ACL_GROUP_OBJ && have_group_obj) ||
+ (tag == ACL_OTHER && have_other))
+ continue;
+
+ r = acl_create_entry(acl_p, &dst);
+ if (r < 0)
+ return -errno;
+
+ r = acl_copy_entry(dst, i);
+ if (r < 0)
+ return -errno;
+ }
+ if (r < 0)
+ return -errno;
+ return 0;
+}
+
+int acl_search_groups(const char *path, char ***ret_groups) {
+ _cleanup_strv_free_ char **g = NULL;
+ _cleanup_(acl_freep) acl_t acl = NULL;
+ bool ret = false;
+ acl_entry_t entry;
+ int r;
+
+ assert(path);
+
+ acl = acl_get_file(path, ACL_TYPE_DEFAULT);
+ if (!acl)
+ return -errno;
+
+ r = acl_get_entry(acl, ACL_FIRST_ENTRY, &entry);
+ for (;;) {
+ _cleanup_(acl_free_gid_tpp) gid_t *gid = NULL;
+ acl_tag_t tag;
+
+ if (r < 0)
+ return -errno;
+ if (r == 0)
+ break;
+
+ if (acl_get_tag_type(entry, &tag) < 0)
+ return -errno;
+
+ if (tag != ACL_GROUP)
+ goto next;
+
+ gid = acl_get_qualifier(entry);
+ if (!gid)
+ return -errno;
+
+ if (in_gid(*gid) > 0) {
+ if (!ret_groups)
+ return true;
+
+ ret = true;
+ }
+
+ if (ret_groups) {
+ char *name;
+
+ name = gid_to_name(*gid);
+ if (!name)
+ return -ENOMEM;
+
+ r = strv_consume(&g, name);
+ if (r < 0)
+ return r;
+ }
+
+ next:
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &entry);
+ }
+
+ if (ret_groups)
+ *ret_groups = TAKE_PTR(g);
+
+ return ret;
+}
+
+int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask) {
+ _cleanup_free_ char **a = NULL, **d = NULL; /* strings are not freed */
+ _cleanup_strv_free_ char **split;
+ char **entry;
+ int r = -EINVAL;
+ _cleanup_(acl_freep) acl_t a_acl = NULL, d_acl = NULL;
+
+ split = strv_split(text, ",");
+ if (!split)
+ return -ENOMEM;
+
+ STRV_FOREACH(entry, split) {
+ char *p;
+
+ p = STARTSWITH_SET(*entry, "default:", "d:");
+ if (p)
+ r = strv_push(&d, p);
+ else
+ r = strv_push(&a, *entry);
+ if (r < 0)
+ return r;
+ }
+
+ if (!strv_isempty(a)) {
+ _cleanup_free_ char *join;
+
+ join = strv_join(a, ",");
+ if (!join)
+ return -ENOMEM;
+
+ a_acl = acl_from_text(join);
+ if (!a_acl)
+ return -errno;
+
+ if (want_mask) {
+ r = calc_acl_mask_if_needed(&a_acl);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!strv_isempty(d)) {
+ _cleanup_free_ char *join;
+
+ join = strv_join(d, ",");
+ if (!join)
+ return -ENOMEM;
+
+ d_acl = acl_from_text(join);
+ if (!d_acl)
+ return -errno;
+
+ if (want_mask) {
+ r = calc_acl_mask_if_needed(&d_acl);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *acl_access = TAKE_PTR(a_acl);
+ *acl_default = TAKE_PTR(d_acl);
+
+ return 0;
+}
+
+static int acl_entry_equal(acl_entry_t a, acl_entry_t b) {
+ acl_tag_t tag_a, tag_b;
+
+ if (acl_get_tag_type(a, &tag_a) < 0)
+ return -errno;
+
+ if (acl_get_tag_type(b, &tag_b) < 0)
+ return -errno;
+
+ if (tag_a != tag_b)
+ return false;
+
+ switch (tag_a) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ /* can have only one of those */
+ return true;
+ case ACL_USER: {
+ _cleanup_(acl_free_uid_tpp) uid_t *uid_a = NULL, *uid_b = NULL;
+
+ uid_a = acl_get_qualifier(a);
+ if (!uid_a)
+ return -errno;
+
+ uid_b = acl_get_qualifier(b);
+ if (!uid_b)
+ return -errno;
+
+ return *uid_a == *uid_b;
+ }
+ case ACL_GROUP: {
+ _cleanup_(acl_free_gid_tpp) gid_t *gid_a = NULL, *gid_b = NULL;
+
+ gid_a = acl_get_qualifier(a);
+ if (!gid_a)
+ return -errno;
+
+ gid_b = acl_get_qualifier(b);
+ if (!gid_b)
+ return -errno;
+
+ return *gid_a == *gid_b;
+ }
+ default:
+ assert_not_reached("Unknown acl tag type");
+ }
+}
+
+static int find_acl_entry(acl_t acl, acl_entry_t entry, acl_entry_t *out) {
+ acl_entry_t i;
+ int r;
+
+ for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) {
+
+ r = acl_entry_equal(i, entry);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *out = i;
+ return 1;
+ }
+ }
+ if (r < 0)
+ return -errno;
+ return 0;
+}
+
+int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl) {
+ _cleanup_(acl_freep) acl_t old;
+ acl_entry_t i;
+ int r;
+
+ old = acl_get_file(path, type);
+ if (!old)
+ return -errno;
+
+ for (r = acl_get_entry(new, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(new, ACL_NEXT_ENTRY, &i)) {
+
+ acl_entry_t j;
+
+ r = find_acl_entry(old, i, &j);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ if (acl_create_entry(&old, &j) < 0)
+ return -errno;
+
+ if (acl_copy_entry(j, i) < 0)
+ return -errno;
+ }
+ if (r < 0)
+ return -errno;
+
+ *acl = TAKE_PTR(old);
+
+ return 0;
+}
+
+int add_acls_for_user(int fd, uid_t uid) {
+ _cleanup_(acl_freep) acl_t acl = NULL;
+ acl_entry_t entry;
+ acl_permset_t permset;
+ int r;
+
+ acl = acl_get_fd(fd);
+ if (!acl)
+ return -errno;
+
+ r = acl_find_uid(acl, uid, &entry);
+ if (r <= 0) {
+ if (acl_create_entry(&acl, &entry) < 0 ||
+ acl_set_tag_type(entry, ACL_USER) < 0 ||
+ acl_set_qualifier(entry, &uid) < 0)
+ return -errno;
+ }
+
+ /* We do not recalculate the mask unconditionally here,
+ * so that the fchmod() mask above stays intact. */
+ if (acl_get_permset(entry, &permset) < 0 ||
+ acl_add_perm(permset, ACL_READ) < 0)
+ return -errno;
+
+ r = calc_acl_mask_if_needed(&acl);
+ if (r < 0)
+ return r;
+
+ return acl_set_fd(fd, acl);
+}
diff --git a/src/shared/acl-util.h b/src/shared/acl-util.h
new file mode 100644
index 0000000..10b2a3d
--- /dev/null
+++ b/src/shared/acl-util.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if HAVE_ACL
+
+#include <acl/libacl.h>
+#include <stdbool.h>
+#include <sys/acl.h>
+
+#include "macro.h"
+
+int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *entry);
+int calc_acl_mask_if_needed(acl_t *acl_p);
+int add_base_acls_if_needed(acl_t *acl_p, const char *path);
+int acl_search_groups(const char* path, char ***ret_groups);
+int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask);
+int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl);
+int add_acls_for_user(int fd, uid_t uid);
+
+/* acl_free takes multiple argument types.
+ * Multiple cleanup functions are necessary. */
+DEFINE_TRIVIAL_CLEANUP_FUNC(acl_t, acl_free);
+#define acl_free_charp acl_free
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, acl_free_charp);
+#define acl_free_uid_tp acl_free
+DEFINE_TRIVIAL_CLEANUP_FUNC(uid_t*, acl_free_uid_tp);
+#define acl_free_gid_tp acl_free
+DEFINE_TRIVIAL_CLEANUP_FUNC(gid_t*, acl_free_gid_tp);
+
+#endif
diff --git a/src/shared/acpi-fpdt.c b/src/shared/acpi-fpdt.c
new file mode 100644
index 0000000..d565ebd
--- /dev/null
+++ b/src/shared/acpi-fpdt.c
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "acpi-fpdt.h"
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "time-util.h"
+
+struct acpi_table_header {
+ char signature[4];
+ uint32_t length;
+ uint8_t revision;
+ uint8_t checksum;
+ char oem_id[6];
+ char oem_table_id[8];
+ uint32_t oem_revision;
+ char asl_compiler_id[4];
+ uint32_t asl_compiler_revision;
+};
+
+enum {
+ ACPI_FPDT_TYPE_BOOT = 0,
+ ACPI_FPDT_TYPE_S3PERF = 1,
+};
+
+struct acpi_fpdt_header {
+ uint16_t type;
+ uint8_t length;
+ uint8_t revision;
+ uint8_t reserved[4];
+ uint64_t ptr;
+};
+
+struct acpi_fpdt_boot_header {
+ char signature[4];
+ uint32_t length;
+};
+
+enum {
+ ACPI_FPDT_S3PERF_RESUME_REC = 0,
+ ACPI_FPDT_S3PERF_SUSPEND_REC = 1,
+ ACPI_FPDT_BOOT_REC = 2,
+};
+
+struct acpi_fpdt_boot {
+ uint16_t type;
+ uint8_t length;
+ uint8_t revision;
+ uint8_t reserved[4];
+ uint64_t reset_end;
+ uint64_t load_start;
+ uint64_t startup_start;
+ uint64_t exit_services_entry;
+ uint64_t exit_services_exit;
+};
+
+int acpi_get_boot_usec(usec_t *loader_start, usec_t *loader_exit) {
+ _cleanup_free_ char *buf = NULL;
+ struct acpi_table_header *tbl;
+ size_t l = 0;
+ struct acpi_fpdt_header *rec;
+ int r;
+ uint64_t ptr = 0;
+ _cleanup_close_ int fd = -1;
+ struct acpi_fpdt_boot_header hbrec;
+ struct acpi_fpdt_boot brec;
+
+ r = read_full_file("/sys/firmware/acpi/tables/FPDT", &buf, &l);
+ if (r < 0)
+ return r;
+
+ if (l < sizeof(struct acpi_table_header) + sizeof(struct acpi_fpdt_header))
+ return -EINVAL;
+
+ tbl = (struct acpi_table_header *)buf;
+ if (l != tbl->length)
+ return -EINVAL;
+
+ if (memcmp(tbl->signature, "FPDT", 4) != 0)
+ return -EINVAL;
+
+ /* find Firmware Basic Boot Performance Pointer Record */
+ for (rec = (struct acpi_fpdt_header *)(buf + sizeof(struct acpi_table_header));
+ (char *)rec < buf + l;
+ rec = (struct acpi_fpdt_header *)((char *)rec + rec->length)) {
+ if (rec->length <= 0)
+ break;
+ if (rec->type != ACPI_FPDT_TYPE_BOOT)
+ continue;
+ if (rec->length != sizeof(struct acpi_fpdt_header))
+ continue;
+
+ ptr = rec->ptr;
+ break;
+ }
+
+ if (ptr == 0)
+ return -ENODATA;
+
+ /* read Firmware Basic Boot Performance Data Record */
+ fd = open("/dev/mem", O_CLOEXEC|O_RDONLY);
+ if (fd < 0)
+ return -errno;
+
+ l = pread(fd, &hbrec, sizeof(struct acpi_fpdt_boot_header), ptr);
+ if (l != sizeof(struct acpi_fpdt_boot_header))
+ return -EINVAL;
+
+ if (memcmp(hbrec.signature, "FBPT", 4) != 0)
+ return -EINVAL;
+
+ if (hbrec.length < sizeof(struct acpi_fpdt_boot_header) + sizeof(struct acpi_fpdt_boot))
+ return -EINVAL;
+
+ l = pread(fd, &brec, sizeof(struct acpi_fpdt_boot), ptr + sizeof(struct acpi_fpdt_boot_header));
+ if (l != sizeof(struct acpi_fpdt_boot))
+ return -EINVAL;
+
+ if (brec.length != sizeof(struct acpi_fpdt_boot))
+ return -EINVAL;
+
+ if (brec.type != ACPI_FPDT_BOOT_REC)
+ return -EINVAL;
+
+ if (brec.exit_services_exit == 0)
+ /* Non-UEFI compatible boot. */
+ return -ENODATA;
+
+ if (brec.startup_start == 0 || brec.exit_services_exit < brec.startup_start)
+ return -EINVAL;
+ if (brec.exit_services_exit > NSEC_PER_HOUR)
+ return -EINVAL;
+
+ if (loader_start)
+ *loader_start = brec.startup_start / 1000;
+ if (loader_exit)
+ *loader_exit = brec.exit_services_exit / 1000;
+
+ return 0;
+}
diff --git a/src/shared/acpi-fpdt.h b/src/shared/acpi-fpdt.h
new file mode 100644
index 0000000..8d28893
--- /dev/null
+++ b/src/shared/acpi-fpdt.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <time-util.h>
+
+int acpi_get_boot_usec(usec_t *loader_start, usec_t *loader_exit);
diff --git a/src/shared/apparmor-util.c b/src/shared/apparmor-util.c
new file mode 100644
index 0000000..c4a4b04
--- /dev/null
+++ b/src/shared/apparmor-util.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stddef.h>
+
+#include "alloc-util.h"
+#include "apparmor-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+
+bool mac_apparmor_use(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ cached_use =
+ read_one_line_file("/sys/module/apparmor/parameters/enabled", &p) >= 0 &&
+ parse_boolean(p) > 0;
+ }
+
+ return cached_use;
+}
diff --git a/src/shared/apparmor-util.h b/src/shared/apparmor-util.h
new file mode 100644
index 0000000..7fbaf90
--- /dev/null
+++ b/src/shared/apparmor-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+bool mac_apparmor_use(void);
diff --git a/src/shared/ask-password-api.c b/src/shared/ask-password-api.c
new file mode 100644
index 0000000..072bf72
--- /dev/null
+++ b/src/shared/ask-password-api.c
@@ -0,0 +1,818 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "ask-password-api.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "utf8.h"
+#include "util.h"
+
+#define KEYRING_TIMEOUT_USEC ((5 * USEC_PER_MINUTE) / 2)
+
+static int lookup_key(const char *keyname, key_serial_t *ret) {
+ key_serial_t serial;
+
+ assert(keyname);
+ assert(ret);
+
+ serial = request_key("user", keyname, NULL, 0);
+ if (serial == -1)
+ return negative_errno();
+
+ *ret = serial;
+ return 0;
+}
+
+static int retrieve_key(key_serial_t serial, char ***ret) {
+ _cleanup_free_ char *p = NULL;
+ long m = 100, n;
+ char **l;
+
+ assert(ret);
+
+ for (;;) {
+ p = new(char, m);
+ if (!p)
+ return -ENOMEM;
+
+ n = keyctl(KEYCTL_READ, (unsigned long) serial, (unsigned long) p, (unsigned long) m, 0);
+ if (n < 0)
+ return -errno;
+
+ if (n < m)
+ break;
+
+ explicit_bzero_safe(p, n);
+ free(p);
+ m *= 2;
+ }
+
+ l = strv_parse_nulstr(p, n);
+ if (!l)
+ return -ENOMEM;
+
+ explicit_bzero_safe(p, n);
+
+ *ret = l;
+ return 0;
+}
+
+static int add_to_keyring(const char *keyname, AskPasswordFlags flags, char **passwords) {
+ _cleanup_strv_free_erase_ char **l = NULL;
+ _cleanup_free_ char *p = NULL;
+ key_serial_t serial;
+ size_t n;
+ int r;
+
+ assert(keyname);
+ assert(passwords);
+
+ if (!(flags & ASK_PASSWORD_PUSH_CACHE))
+ return 0;
+
+ r = lookup_key(keyname, &serial);
+ if (r >= 0) {
+ r = retrieve_key(serial, &l);
+ if (r < 0)
+ return r;
+ } else if (r != -ENOKEY)
+ return r;
+
+ r = strv_extend_strv(&l, passwords, true);
+ if (r <= 0)
+ return r;
+
+ r = strv_make_nulstr(l, &p, &n);
+ if (r < 0)
+ return r;
+
+ serial = add_key("user", keyname, p, n, KEY_SPEC_USER_KEYRING);
+ explicit_bzero_safe(p, n);
+ if (serial == -1)
+ return -errno;
+
+ if (keyctl(KEYCTL_SET_TIMEOUT,
+ (unsigned long) serial,
+ (unsigned long) DIV_ROUND_UP(KEYRING_TIMEOUT_USEC, USEC_PER_SEC), 0, 0) < 0)
+ log_debug_errno(errno, "Failed to adjust timeout: %m");
+
+ /* Tell everyone to check the keyring */
+ (void) touch("/run/systemd/ask-password");
+
+ log_debug("Added key to keyring as %" PRIi32 ".", serial);
+
+ return 1;
+}
+
+static int add_to_keyring_and_log(const char *keyname, AskPasswordFlags flags, char **passwords) {
+ int r;
+
+ assert(keyname);
+ assert(passwords);
+
+ r = add_to_keyring(keyname, flags, passwords);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add password to keyring: %m");
+
+ return 0;
+}
+
+static int ask_password_keyring(const char *keyname, AskPasswordFlags flags, char ***ret) {
+
+ key_serial_t serial;
+ int r;
+
+ assert(keyname);
+ assert(ret);
+
+ if (!(flags & ASK_PASSWORD_ACCEPT_CACHED))
+ return -EUNATCH;
+
+ r = lookup_key(keyname, &serial);
+ if (r == -ENOSYS) /* when retrieving the distinction doesn't matter */
+ return -ENOKEY;
+ if (r < 0)
+ return r;
+
+ return retrieve_key(serial, ret);
+}
+
+static void backspace_chars(int ttyfd, size_t p) {
+
+ if (ttyfd < 0)
+ return;
+
+ while (p > 0) {
+ p--;
+
+ loop_write(ttyfd, "\b \b", 3, false);
+ }
+}
+
+static void backspace_string(int ttyfd, const char *str) {
+ size_t m;
+
+ assert(str);
+
+ if (ttyfd < 0)
+ return;
+
+ /* Backspaces through enough characters to entirely undo printing of the specified string. */
+
+ m = utf8_n_codepoints(str);
+ if (m == (size_t) -1)
+ m = strlen(str); /* Not a valid UTF-8 string? If so, let's backspace the number of bytes output. Most
+ * likely this happened because we are not in an UTF-8 locale, and in that case that
+ * is the correct thing to do. And even if it's not, terminals tend to stop
+ * backspacing at the leftmost column, hence backspacing too much should be mostly
+ * OK. */
+
+ backspace_chars(ttyfd, m);
+}
+
+int ask_password_tty(
+ int ttyfd,
+ const char *message,
+ const char *keyname,
+ usec_t until,
+ AskPasswordFlags flags,
+ const char *flag_file,
+ char ***ret) {
+
+ enum {
+ POLL_TTY,
+ POLL_INOTIFY,
+ _POLL_MAX,
+ };
+
+ bool reset_tty = false, dirty = false, use_color = false;
+ _cleanup_close_ int cttyfd = -1, notify = -1;
+ struct termios old_termios, new_termios;
+ char passphrase[LINE_MAX + 1] = {}, *x;
+ _cleanup_strv_free_erase_ char **l = NULL;
+ struct pollfd pollfd[_POLL_MAX];
+ size_t p = 0, codepoint = 0;
+ int r;
+
+ assert(ret);
+
+ if (flags & ASK_PASSWORD_NO_TTY)
+ return -EUNATCH;
+
+ if (!message)
+ message = "Password:";
+
+ if (flag_file || ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname)) {
+ notify = inotify_init1(IN_CLOEXEC|IN_NONBLOCK);
+ if (notify < 0)
+ return -errno;
+ }
+ if (flag_file) {
+ if (inotify_add_watch(notify, flag_file, IN_ATTRIB /* for the link count */) < 0)
+ return -errno;
+ }
+ if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname) {
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0)
+ return 0;
+ else if (r != -ENOKEY)
+ return r;
+
+ if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_ATTRIB /* for mtime */) < 0)
+ return -errno;
+ }
+
+ /* If the caller didn't specify a TTY, then use the controlling tty, if we can. */
+ if (ttyfd < 0)
+ ttyfd = cttyfd = open("/dev/tty", O_RDWR|O_NOCTTY|O_CLOEXEC);
+
+ if (ttyfd >= 0) {
+ if (tcgetattr(ttyfd, &old_termios) < 0)
+ return -errno;
+
+ if (flags & ASK_PASSWORD_CONSOLE_COLOR)
+ use_color = dev_console_colors_enabled();
+ else
+ use_color = colors_enabled();
+
+ if (use_color)
+ (void) loop_write(ttyfd, ANSI_HIGHLIGHT, STRLEN(ANSI_HIGHLIGHT), false);
+
+ (void) loop_write(ttyfd, message, strlen(message), false);
+ (void) loop_write(ttyfd, " ", 1, false);
+
+ if (use_color)
+ (void) loop_write(ttyfd, ANSI_NORMAL, STRLEN(ANSI_NORMAL), false);
+
+ new_termios = old_termios;
+ new_termios.c_lflag &= ~(ICANON|ECHO);
+ new_termios.c_cc[VMIN] = 1;
+ new_termios.c_cc[VTIME] = 0;
+
+ if (tcsetattr(ttyfd, TCSADRAIN, &new_termios) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ reset_tty = true;
+ }
+
+ pollfd[POLL_TTY] = (struct pollfd) {
+ .fd = ttyfd >= 0 ? ttyfd : STDIN_FILENO,
+ .events = POLLIN,
+ };
+ pollfd[POLL_INOTIFY] = (struct pollfd) {
+ .fd = notify,
+ .events = POLLIN,
+ };
+
+ for (;;) {
+ int sleep_for = -1, k;
+ ssize_t n;
+ char c;
+
+ if (until > 0) {
+ usec_t y;
+
+ y = now(CLOCK_MONOTONIC);
+
+ if (y > until) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ sleep_for = (int) DIV_ROUND_UP(until - y, USEC_PER_MSEC);
+ }
+
+ if (flag_file)
+ if (access(flag_file, F_OK) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ k = poll(pollfd, notify >= 0 ? 2 : 1, sleep_for);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ r = -errno;
+ goto finish;
+ } else if (k == 0) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ if (notify >= 0 && pollfd[POLL_INOTIFY].revents != 0 && keyname) {
+ (void) flush_fd(notify);
+
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ } else if (r != -ENOKEY)
+ goto finish;
+ }
+
+ if (pollfd[POLL_TTY].revents == 0)
+ continue;
+
+ n = read(ttyfd >= 0 ? ttyfd : STDIN_FILENO, &c, 1);
+ if (n < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ continue;
+
+ r = -errno;
+ goto finish;
+
+ }
+
+ /* We treat EOF, newline and NUL byte all as valid end markers */
+ if (n == 0 || c == '\n' || c == 0)
+ break;
+
+ if (c == 21) { /* C-u */
+
+ if (!(flags & ASK_PASSWORD_SILENT))
+ backspace_string(ttyfd, passphrase);
+
+ explicit_bzero_safe(passphrase, sizeof(passphrase));
+ p = codepoint = 0;
+
+ } else if (IN_SET(c, '\b', 127)) {
+
+ if (p > 0) {
+ size_t q;
+
+ if (!(flags & ASK_PASSWORD_SILENT))
+ backspace_chars(ttyfd, 1);
+
+ /* Remove a full UTF-8 codepoint from the end. For that, figure out where the last one
+ * begins */
+ q = 0;
+ for (;;) {
+ size_t z;
+
+ z = utf8_encoded_valid_unichar(passphrase + q);
+ if (z == 0) {
+ q = (size_t) -1; /* Invalid UTF8! */
+ break;
+ }
+
+ if (q + z >= p) /* This one brings us over the edge */
+ break;
+
+ q += z;
+ }
+
+ p = codepoint = q == (size_t) -1 ? p - 1 : q;
+ explicit_bzero_safe(passphrase + p, sizeof(passphrase) - p);
+
+ } else if (!dirty && !(flags & ASK_PASSWORD_SILENT)) {
+
+ flags |= ASK_PASSWORD_SILENT;
+
+ /* There are two ways to enter silent mode. Either by pressing backspace as first key
+ * (and only as first key), or ... */
+
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, "(no echo) ", 10, false);
+
+ } else if (ttyfd >= 0)
+ (void) loop_write(ttyfd, "\a", 1, false);
+
+ } else if (c == '\t' && !(flags & ASK_PASSWORD_SILENT)) {
+
+ backspace_string(ttyfd, passphrase);
+ flags |= ASK_PASSWORD_SILENT;
+
+ /* ... or by pressing TAB at any time. */
+
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, "(no echo) ", 10, false);
+
+ } else if (p >= sizeof(passphrase)-1) {
+
+ /* Reached the size limit */
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, "\a", 1, false);
+
+ } else {
+ passphrase[p++] = c;
+
+ if (!(flags & ASK_PASSWORD_SILENT) && ttyfd >= 0) {
+ /* Check if we got a complete UTF-8 character now. If so, let's output one '*'. */
+ n = utf8_encoded_valid_unichar(passphrase + codepoint);
+ if (n >= 0) {
+ codepoint = p;
+ (void) loop_write(ttyfd, (flags & ASK_PASSWORD_ECHO) ? &c : "*", 1, false);
+ }
+ }
+
+ dirty = true;
+ }
+
+ /* Let's forget this char, just to not keep needlessly copies of key material around */
+ c = 'x';
+ }
+
+ x = strndup(passphrase, p);
+ explicit_bzero_safe(passphrase, sizeof(passphrase));
+ if (!x) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = strv_consume(&l, x);
+ if (r < 0)
+ goto finish;
+
+ if (keyname)
+ (void) add_to_keyring_and_log(keyname, flags, l);
+
+ *ret = TAKE_PTR(l);
+ r = 0;
+
+finish:
+ if (ttyfd >= 0 && reset_tty) {
+ (void) loop_write(ttyfd, "\n", 1, false);
+ (void) tcsetattr(ttyfd, TCSADRAIN, &old_termios);
+ }
+
+ return r;
+}
+
+static int create_socket(char **ret) {
+ _cleanup_free_ char *path = NULL;
+ union sockaddr_union sa = {};
+ _cleanup_close_ int fd = -1;
+ int salen, r;
+
+ assert(ret);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (asprintf(&path, "/run/systemd/ask-password/sck.%" PRIx64, random_u64()) < 0)
+ return -ENOMEM;
+
+ salen = sockaddr_un_set_path(&sa.un, path);
+ if (salen < 0)
+ return salen;
+
+ RUN_WITH_UMASK(0177) {
+ if (bind(fd, &sa.sa, salen) < 0)
+ return -errno;
+ }
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(path);
+ return TAKE_FD(fd);
+}
+
+int ask_password_agent(
+ const char *message,
+ const char *icon,
+ const char *id,
+ const char *keyname,
+ usec_t until,
+ AskPasswordFlags flags,
+ char ***ret) {
+
+ enum {
+ FD_SOCKET,
+ FD_SIGNAL,
+ FD_INOTIFY,
+ _FD_MAX
+ };
+
+ _cleanup_close_ int socket_fd = -1, signal_fd = -1, notify = -1, fd = -1;
+ char temp[] = "/run/systemd/ask-password/tmp.XXXXXX";
+ char final[sizeof(temp)] = "";
+ _cleanup_free_ char *socket_name = NULL;
+ _cleanup_strv_free_erase_ char **l = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ struct pollfd pollfd[_FD_MAX];
+ sigset_t mask, oldmask;
+ int r;
+
+ assert(ret);
+
+ if (flags & ASK_PASSWORD_NO_AGENT)
+ return -EUNATCH;
+
+ assert_se(sigemptyset(&mask) >= 0);
+ assert_se(sigset_add_many(&mask, SIGINT, SIGTERM, -1) >= 0);
+ assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) >= 0);
+
+ (void) mkdir_p_label("/run/systemd/ask-password", 0755);
+
+ if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname) {
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ } else if (r != -ENOKEY)
+ goto finish;
+
+ notify = inotify_init1(IN_CLOEXEC | IN_NONBLOCK);
+ if (notify < 0) {
+ r = -errno;
+ goto finish;
+ }
+ if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_ATTRIB /* for mtime */) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+
+ fd = mkostemp_safe(temp);
+ if (fd < 0) {
+ r = fd;
+ goto finish;
+ }
+
+ (void) fchmod(fd, 0644);
+
+ f = fdopen(fd, "w");
+ if (!f) {
+ r = -errno;
+ goto finish;
+ }
+
+ fd = -1;
+
+ signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
+ if (signal_fd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ socket_fd = create_socket(&socket_name);
+ if (socket_fd < 0) {
+ r = socket_fd;
+ goto finish;
+ }
+
+ fprintf(f,
+ "[Ask]\n"
+ "PID="PID_FMT"\n"
+ "Socket=%s\n"
+ "AcceptCached=%i\n"
+ "Echo=%i\n"
+ "NotAfter="USEC_FMT"\n",
+ getpid_cached(),
+ socket_name,
+ (flags & ASK_PASSWORD_ACCEPT_CACHED) ? 1 : 0,
+ (flags & ASK_PASSWORD_ECHO) ? 1 : 0,
+ until);
+
+ if (message)
+ fprintf(f, "Message=%s\n", message);
+
+ if (icon)
+ fprintf(f, "Icon=%s\n", icon);
+
+ if (id)
+ fprintf(f, "Id=%s\n", id);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto finish;
+
+ memcpy(final, temp, sizeof(temp));
+
+ final[sizeof(final)-11] = 'a';
+ final[sizeof(final)-10] = 's';
+ final[sizeof(final)-9] = 'k';
+
+ if (rename(temp, final) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ zero(pollfd);
+ pollfd[FD_SOCKET].fd = socket_fd;
+ pollfd[FD_SOCKET].events = POLLIN;
+ pollfd[FD_SIGNAL].fd = signal_fd;
+ pollfd[FD_SIGNAL].events = POLLIN;
+ pollfd[FD_INOTIFY].fd = notify;
+ pollfd[FD_INOTIFY].events = POLLIN;
+
+ for (;;) {
+ char passphrase[LINE_MAX+1];
+ struct msghdr msghdr;
+ struct iovec iovec;
+ struct ucred *ucred;
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
+ } control;
+ ssize_t n;
+ int k;
+ usec_t t;
+
+ t = now(CLOCK_MONOTONIC);
+
+ if (until > 0 && until <= t) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ k = poll(pollfd, notify >= 0 ? _FD_MAX : _FD_MAX - 1, until > 0 ? (int) ((until-t)/USEC_PER_MSEC) : -1);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ r = -errno;
+ goto finish;
+ }
+
+ if (k <= 0) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ if (pollfd[FD_SIGNAL].revents & POLLIN) {
+ r = -EINTR;
+ goto finish;
+ }
+
+ if (notify >= 0 && pollfd[FD_INOTIFY].revents != 0) {
+ (void) flush_fd(notify);
+
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ } else if (r != -ENOKEY)
+ goto finish;
+ }
+
+ if (pollfd[FD_SOCKET].revents == 0)
+ continue;
+
+ if (pollfd[FD_SOCKET].revents != POLLIN) {
+ r = -EIO;
+ goto finish;
+ }
+
+ iovec = IOVEC_MAKE(passphrase, sizeof(passphrase));
+
+ zero(control);
+ zero(msghdr);
+ msghdr.msg_iov = &iovec;
+ msghdr.msg_iovlen = 1;
+ msghdr.msg_control = &control;
+ msghdr.msg_controllen = sizeof(control);
+
+ n = recvmsg(socket_fd, &msghdr, 0);
+ if (n < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ continue;
+
+ r = -errno;
+ goto finish;
+ }
+
+ cmsg_close_all(&msghdr);
+
+ if (n <= 0) {
+ log_debug("Message too short");
+ continue;
+ }
+
+ if (msghdr.msg_controllen < CMSG_LEN(sizeof(struct ucred)) ||
+ control.cmsghdr.cmsg_level != SOL_SOCKET ||
+ control.cmsghdr.cmsg_type != SCM_CREDENTIALS ||
+ control.cmsghdr.cmsg_len != CMSG_LEN(sizeof(struct ucred))) {
+ log_debug("Received message without credentials. Ignoring.");
+ continue;
+ }
+
+ ucred = (struct ucred*) CMSG_DATA(&control.cmsghdr);
+ if (ucred->uid != 0) {
+ log_debug("Got request from unprivileged user. Ignoring.");
+ continue;
+ }
+
+ if (passphrase[0] == '+') {
+ /* An empty message refers to the empty password */
+ if (n == 1)
+ l = strv_new("");
+ else
+ l = strv_parse_nulstr(passphrase+1, n-1);
+ explicit_bzero_safe(passphrase, n);
+ if (!l) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ if (strv_isempty(l)) {
+ l = strv_free(l);
+ log_debug("Invalid packet");
+ continue;
+ }
+
+ break;
+ }
+
+ if (passphrase[0] == '-') {
+ r = -ECANCELED;
+ goto finish;
+ }
+
+ log_debug("Invalid packet");
+ }
+
+ if (keyname)
+ (void) add_to_keyring_and_log(keyname, flags, l);
+
+ *ret = TAKE_PTR(l);
+ r = 0;
+
+finish:
+ if (socket_name)
+ (void) unlink(socket_name);
+
+ (void) unlink(temp);
+
+ if (final[0])
+ (void) unlink(final);
+
+ assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) == 0);
+ return r;
+}
+
+int ask_password_auto(
+ const char *message,
+ const char *icon,
+ const char *id,
+ const char *keyname,
+ usec_t until,
+ AskPasswordFlags flags,
+ char ***ret) {
+
+ int r;
+
+ assert(ret);
+
+ if ((flags & ASK_PASSWORD_ACCEPT_CACHED) &&
+ keyname &&
+ ((flags & ASK_PASSWORD_NO_TTY) || !isatty(STDIN_FILENO)) &&
+ (flags & ASK_PASSWORD_NO_AGENT)) {
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r != -ENOKEY)
+ return r;
+ }
+
+ if (!(flags & ASK_PASSWORD_NO_TTY) && isatty(STDIN_FILENO))
+ return ask_password_tty(-1, message, keyname, until, flags, NULL, ret);
+
+ if (!(flags & ASK_PASSWORD_NO_AGENT))
+ return ask_password_agent(message, icon, id, keyname, until, flags, ret);
+
+ return -EUNATCH;
+}
diff --git a/src/shared/ask-password-api.h b/src/shared/ask-password-api.h
new file mode 100644
index 0000000..15762b9
--- /dev/null
+++ b/src/shared/ask-password-api.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+
+typedef enum AskPasswordFlags {
+ ASK_PASSWORD_ACCEPT_CACHED = 1 << 0,
+ ASK_PASSWORD_PUSH_CACHE = 1 << 1,
+ ASK_PASSWORD_ECHO = 1 << 2, /* show the password literally while reading, instead of "*" */
+ ASK_PASSWORD_SILENT = 1 << 3, /* do no show any password at all while reading */
+ ASK_PASSWORD_NO_TTY = 1 << 4,
+ ASK_PASSWORD_NO_AGENT = 1 << 5,
+ ASK_PASSWORD_CONSOLE_COLOR = 1 << 6, /* Use color if /dev/console points to a console that supports color */
+} AskPasswordFlags;
+
+int ask_password_tty(int tty_fd, const char *message, const char *keyname, usec_t until, AskPasswordFlags flags, const char *flag_file, char ***ret);
+int ask_password_agent(const char *message, const char *icon, const char *id, const char *keyname, usec_t until, AskPasswordFlags flag, char ***ret);
+int ask_password_auto(const char *message, const char *icon, const char *id, const char *keyname, usec_t until, AskPasswordFlags flag, char ***ret);
diff --git a/src/shared/barrier.c b/src/shared/barrier.c
new file mode 100644
index 0000000..bb5869d
--- /dev/null
+++ b/src/shared/barrier.c
@@ -0,0 +1,394 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "barrier.h"
+#include "fd-util.h"
+#include "macro.h"
+
+/**
+ * Barriers
+ * This barrier implementation provides a simple synchronization method based
+ * on file-descriptors that can safely be used between threads and processes. A
+ * barrier object contains 2 shared counters based on eventfd. Both processes
+ * can now place barriers and wait for the other end to reach a random or
+ * specific barrier.
+ * Barriers are numbered, so you can either wait for the other end to reach any
+ * barrier or the last barrier that you placed. This way, you can use barriers
+ * for one-way *and* full synchronization. Note that even-though barriers are
+ * numbered, these numbers are internal and recycled once both sides reached the
+ * same barrier (implemented as a simple signed counter). It is thus not
+ * possible to address barriers by their ID.
+ *
+ * Barrier-API: Both ends can place as many barriers via barrier_place() as
+ * they want and each pair of barriers on both sides will be implicitly linked.
+ * Each side can use the barrier_wait/sync_*() family of calls to wait for the
+ * other side to place a specific barrier. barrier_wait_next() waits until the
+ * other side calls barrier_place(). No links between the barriers are
+ * considered and this simply serves as most basic asynchronous barrier.
+ * barrier_sync_next() is like barrier_wait_next() and waits for the other side
+ * to place their next barrier via barrier_place(). However, it only waits for
+ * barriers that are linked to a barrier we already placed. If the other side
+ * already placed more barriers than we did, barrier_sync_next() returns
+ * immediately.
+ * barrier_sync() extends barrier_sync_next() and waits until the other end
+ * placed as many barriers via barrier_place() as we did. If they already placed
+ * as many as we did (or more), it returns immediately.
+ *
+ * Additionally to basic barriers, an abortion event is available.
+ * barrier_abort() places an abortion event that cannot be undone. An abortion
+ * immediately cancels all placed barriers and replaces them. Any running and
+ * following wait/sync call besides barrier_wait_abortion() will immediately
+ * return false on both sides (otherwise, they always return true).
+ * barrier_abort() can be called multiple times on both ends and will be a
+ * no-op if already called on this side.
+ * barrier_wait_abortion() can be used to wait for the other side to call
+ * barrier_abort() and is the only wait/sync call that does not return
+ * immediately if we aborted outself. It only returns once the other side
+ * called barrier_abort().
+ *
+ * Barriers can be used for in-process and inter-process synchronization.
+ * However, for in-process synchronization you could just use mutexes.
+ * Therefore, main target is IPC and we require both sides to *not* share the FD
+ * table. If that's given, barriers provide target tracking: If the remote side
+ * exit()s, an abortion event is implicitly queued on the other side. This way,
+ * a sync/wait call will be woken up if the remote side crashed or exited
+ * unexpectedly. However, note that these abortion events are only queued if the
+ * barrier-queue has been drained. Therefore, it is safe to place a barrier and
+ * exit. The other side can safely wait on the barrier even though the exit
+ * queued an abortion event. Usually, the abortion event would overwrite the
+ * barrier, however, that's not true for exit-abortion events. Those are only
+ * queued if the barrier-queue is drained (thus, the receiving side has placed
+ * more barriers than the remote side).
+ */
+
+/**
+ * barrier_create() - Initialize a barrier object
+ * @obj: barrier to initialize
+ *
+ * This initializes a barrier object. The caller is responsible of allocating
+ * the memory and keeping it valid. The memory does not have to be zeroed
+ * beforehand.
+ * Two eventfd objects are allocated for each barrier. If allocation fails, an
+ * error is returned.
+ *
+ * If this function fails, the barrier is reset to an invalid state so it is
+ * safe to call barrier_destroy() on the object regardless whether the
+ * initialization succeeded or not.
+ *
+ * The caller is responsible to destroy the object via barrier_destroy() before
+ * releasing the underlying memory.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int barrier_create(Barrier *b) {
+ _cleanup_(barrier_destroyp) Barrier *staging = b;
+ int r;
+
+ assert(b);
+
+ b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+ if (b->me < 0)
+ return -errno;
+
+ b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+ if (b->them < 0)
+ return -errno;
+
+ r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
+ if (r < 0)
+ return -errno;
+
+ staging = NULL;
+ return 0;
+}
+
+/**
+ * barrier_destroy() - Destroy a barrier object
+ * @b: barrier to destroy or NULL
+ *
+ * This destroys a barrier object that has previously been passed to
+ * barrier_create(). The object is released and reset to invalid
+ * state. Therefore, it is safe to call barrier_destroy() multiple
+ * times or even if barrier_create() failed. However, barrier must be
+ * always initialized with BARRIER_NULL.
+ *
+ * If @b is NULL, this is a no-op.
+ */
+void barrier_destroy(Barrier *b) {
+ if (!b)
+ return;
+
+ b->me = safe_close(b->me);
+ b->them = safe_close(b->them);
+ safe_close_pair(b->pipe);
+ b->barriers = 0;
+}
+
+/**
+ * barrier_set_role() - Set the local role of the barrier
+ * @b: barrier to operate on
+ * @role: role to set on the barrier
+ *
+ * This sets the roles on a barrier object. This is needed to know
+ * which side of the barrier you're on. Usually, the parent creates
+ * the barrier via barrier_create() and then calls fork() or clone().
+ * Therefore, the FDs are duplicated and the child retains the same
+ * barrier object.
+ *
+ * Both sides need to call barrier_set_role() after fork() or clone()
+ * are done. If this is not done, barriers will not work correctly.
+ *
+ * Note that barriers could be supported without fork() or clone(). However,
+ * this is currently not needed so it hasn't been implemented.
+ */
+void barrier_set_role(Barrier *b, unsigned role) {
+ assert(b);
+ assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
+ /* make sure this is only called once */
+ assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
+
+ if (role == BARRIER_PARENT)
+ b->pipe[1] = safe_close(b->pipe[1]);
+ else {
+ b->pipe[0] = safe_close(b->pipe[0]);
+
+ /* swap me/them for children */
+ SWAP_TWO(b->me, b->them);
+ }
+}
+
+/* places barrier; returns false if we aborted, otherwise true */
+static bool barrier_write(Barrier *b, uint64_t buf) {
+ ssize_t len;
+
+ /* prevent new sync-points if we already aborted */
+ if (barrier_i_aborted(b))
+ return false;
+
+ assert(b->me >= 0);
+ do {
+ len = write(b->me, &buf, sizeof(buf));
+ } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
+
+ if (len != sizeof(buf))
+ goto error;
+
+ /* lock if we aborted */
+ if (buf >= (uint64_t)BARRIER_ABORTION) {
+ if (barrier_they_aborted(b))
+ b->barriers = BARRIER_WE_ABORTED;
+ else
+ b->barriers = BARRIER_I_ABORTED;
+ } else if (!barrier_is_aborted(b))
+ b->barriers += buf;
+
+ return !barrier_i_aborted(b);
+
+error:
+ /* If there is an unexpected error, we have to make this fatal. There
+ * is no way we can recover from sync-errors. Therefore, we close the
+ * pipe-ends and treat this as abortion. The other end will notice the
+ * pipe-close and treat it as abortion, too. */
+
+ safe_close_pair(b->pipe);
+ b->barriers = BARRIER_WE_ABORTED;
+ return false;
+}
+
+/* waits for barriers; returns false if they aborted, otherwise true */
+static bool barrier_read(Barrier *b, int64_t comp) {
+ if (barrier_they_aborted(b))
+ return false;
+
+ while (b->barriers > comp) {
+ struct pollfd pfd[2] = {
+ { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
+ .events = POLLHUP },
+ { .fd = b->them,
+ .events = POLLIN }};
+ uint64_t buf;
+ int r;
+
+ r = poll(pfd, 2, -1);
+ if (r < 0 && IN_SET(errno, EAGAIN, EINTR))
+ continue;
+ else if (r < 0)
+ goto error;
+
+ if (pfd[1].revents) {
+ ssize_t len;
+
+ /* events on @them signal new data for us */
+ len = read(b->them, &buf, sizeof(buf));
+ if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
+ continue;
+
+ if (len != sizeof(buf))
+ goto error;
+ } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
+ /* POLLHUP on the pipe tells us the other side exited.
+ * We treat this as implicit abortion. But we only
+ * handle it if there's no event on the eventfd. This
+ * guarantees that exit-abortions do not overwrite real
+ * barriers. */
+ buf = BARRIER_ABORTION;
+ else
+ continue;
+
+ /* lock if they aborted */
+ if (buf >= (uint64_t)BARRIER_ABORTION) {
+ if (barrier_i_aborted(b))
+ b->barriers = BARRIER_WE_ABORTED;
+ else
+ b->barriers = BARRIER_THEY_ABORTED;
+ } else if (!barrier_is_aborted(b))
+ b->barriers -= buf;
+ }
+
+ return !barrier_they_aborted(b);
+
+error:
+ /* If there is an unexpected error, we have to make this fatal. There
+ * is no way we can recover from sync-errors. Therefore, we close the
+ * pipe-ends and treat this as abortion. The other end will notice the
+ * pipe-close and treat it as abortion, too. */
+
+ safe_close_pair(b->pipe);
+ b->barriers = BARRIER_WE_ABORTED;
+ return false;
+}
+
+/**
+ * barrier_place() - Place a new barrier
+ * @b: barrier object
+ *
+ * This places a new barrier on the barrier object. If either side already
+ * aborted, this is a no-op and returns "false". Otherwise, the barrier is
+ * placed and this returns "true".
+ *
+ * Returns: true if barrier was placed, false if either side aborted.
+ */
+bool barrier_place(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_write(b, BARRIER_SINGLE);
+ return true;
+}
+
+/**
+ * barrier_abort() - Abort the synchronization
+ * @b: barrier object to abort
+ *
+ * This aborts the barrier-synchronization. If barrier_abort() was already
+ * called on this side, this is a no-op. Otherwise, the barrier is put into the
+ * ABORT-state and will stay there. The other side is notified about the
+ * abortion. Any following attempt to place normal barriers or to wait on normal
+ * barriers will return immediately as "false".
+ *
+ * You can wait for the other side to call barrier_abort(), too. Use
+ * barrier_wait_abortion() for that.
+ *
+ * Returns: false if the other side already aborted, true otherwise.
+ */
+bool barrier_abort(Barrier *b) {
+ assert(b);
+
+ barrier_write(b, BARRIER_ABORTION);
+ return !barrier_they_aborted(b);
+}
+
+/**
+ * barrier_wait_next() - Wait for the next barrier of the other side
+ * @b: barrier to operate on
+ *
+ * This waits until the other side places its next barrier. This is independent
+ * of any barrier-links and just waits for any next barrier of the other side.
+ *
+ * If either side aborted, this returns false.
+ *
+ * Returns: false if either side aborted, true otherwise.
+ */
+bool barrier_wait_next(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_read(b, b->barriers - 1);
+ return !barrier_is_aborted(b);
+}
+
+/**
+ * barrier_wait_abortion() - Wait for the other side to abort
+ * @b: barrier to operate on
+ *
+ * This waits until the other side called barrier_abort(). This can be called
+ * regardless whether the local side already called barrier_abort() or not.
+ *
+ * If the other side has already aborted, this returns immediately.
+ *
+ * Returns: false if the local side aborted, true otherwise.
+ */
+bool barrier_wait_abortion(Barrier *b) {
+ assert(b);
+
+ barrier_read(b, BARRIER_THEY_ABORTED);
+ return !barrier_i_aborted(b);
+}
+
+/**
+ * barrier_sync_next() - Wait for the other side to place a next linked barrier
+ * @b: barrier to operate on
+ *
+ * This is like barrier_wait_next() and waits for the other side to call
+ * barrier_place(). However, this only waits for linked barriers. That means, if
+ * the other side already placed more barriers than (or as much as) we did, this
+ * returns immediately instead of waiting.
+ *
+ * If either side aborted, this returns false.
+ *
+ * Returns: false if either side aborted, true otherwise.
+ */
+bool barrier_sync_next(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_read(b, MAX((int64_t)0, b->barriers - 1));
+ return !barrier_is_aborted(b);
+}
+
+/**
+ * barrier_sync() - Wait for the other side to place as many barriers as we did
+ * @b: barrier to operate on
+ *
+ * This is like barrier_sync_next() but waits for the other side to call
+ * barrier_place() as often as we did (in total). If they already placed as much
+ * as we did (or more), this returns immediately instead of waiting.
+ *
+ * If either side aborted, this returns false.
+ *
+ * Returns: false if either side aborted, true otherwise.
+ */
+bool barrier_sync(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_read(b, 0);
+ return !barrier_is_aborted(b);
+}
diff --git a/src/shared/barrier.h b/src/shared/barrier.h
new file mode 100644
index 0000000..0eb3d27
--- /dev/null
+++ b/src/shared/barrier.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+/* See source file for an API description. */
+
+typedef struct Barrier Barrier;
+
+enum {
+ BARRIER_SINGLE = 1LL,
+ BARRIER_ABORTION = INT64_MAX,
+
+ /* bias values to store state; keep @WE < @THEY < @I */
+ BARRIER_BIAS = INT64_MIN,
+ BARRIER_WE_ABORTED = BARRIER_BIAS + 1LL,
+ BARRIER_THEY_ABORTED = BARRIER_BIAS + 2LL,
+ BARRIER_I_ABORTED = BARRIER_BIAS + 3LL,
+};
+
+enum {
+ BARRIER_PARENT,
+ BARRIER_CHILD,
+};
+
+struct Barrier {
+ int me;
+ int them;
+ int pipe[2];
+ int64_t barriers;
+};
+
+#define BARRIER_NULL {-1, -1, {-1, -1}, 0}
+
+int barrier_create(Barrier *obj);
+void barrier_destroy(Barrier *b);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Barrier*, barrier_destroy);
+
+void barrier_set_role(Barrier *b, unsigned role);
+
+bool barrier_place(Barrier *b);
+bool barrier_abort(Barrier *b);
+
+bool barrier_wait_next(Barrier *b);
+bool barrier_wait_abortion(Barrier *b);
+bool barrier_sync_next(Barrier *b);
+bool barrier_sync(Barrier *b);
+
+static inline bool barrier_i_aborted(Barrier *b) {
+ return IN_SET(b->barriers, BARRIER_I_ABORTED, BARRIER_WE_ABORTED);
+}
+
+static inline bool barrier_they_aborted(Barrier *b) {
+ return IN_SET(b->barriers, BARRIER_THEY_ABORTED, BARRIER_WE_ABORTED);
+}
+
+static inline bool barrier_we_aborted(Barrier *b) {
+ return b->barriers == BARRIER_WE_ABORTED;
+}
+
+static inline bool barrier_is_aborted(Barrier *b) {
+ return IN_SET(b->barriers,
+ BARRIER_I_ABORTED, BARRIER_THEY_ABORTED, BARRIER_WE_ABORTED);
+}
+
+static inline bool barrier_place_and_sync(Barrier *b) {
+ (void) barrier_place(b);
+ return barrier_sync(b);
+}
diff --git a/src/shared/base-filesystem.c b/src/shared/base-filesystem.c
new file mode 100644
index 0000000..89d7a7d
--- /dev/null
+++ b/src/shared/base-filesystem.c
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "base-filesystem.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "util.h"
+
+typedef struct BaseFilesystem {
+ const char *dir;
+ mode_t mode;
+ const char *target;
+ const char *exists;
+ bool ignore_failure;
+} BaseFilesystem;
+
+static const BaseFilesystem table[] = {
+ { "bin", 0, "usr/bin\0", NULL },
+ { "lib", 0, "usr/lib\0", NULL },
+ { "root", 0755, NULL, NULL, true },
+ { "sbin", 0, "usr/sbin\0", NULL },
+ { "usr", 0755, NULL, NULL },
+ { "var", 0755, NULL, NULL },
+ { "etc", 0755, NULL, NULL },
+ { "proc", 0755, NULL, NULL, true },
+ { "sys", 0755, NULL, NULL, true },
+ { "dev", 0755, NULL, NULL, true },
+#if defined(__i386__) || defined(__x86_64__)
+ { "lib64", 0, "usr/lib/x86_64-linux-gnu\0"
+ "usr/lib64\0", "ld-linux-x86-64.so.2" },
+#endif
+};
+
+int base_filesystem_create(const char *root, uid_t uid, gid_t gid) {
+ _cleanup_close_ int fd = -1;
+ int r = 0;
+ size_t i;
+
+ fd = open(root, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open root file system: %m");
+
+ for (i = 0; i < ELEMENTSOF(table); i ++) {
+ if (faccessat(fd, table[i].dir, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
+ continue;
+
+ if (table[i].target) {
+ const char *target = NULL, *s;
+
+ /* check if one of the targets exists */
+ NULSTR_FOREACH(s, table[i].target) {
+ if (faccessat(fd, s, F_OK, AT_SYMLINK_NOFOLLOW) < 0)
+ continue;
+
+ /* check if a specific file exists at the target path */
+ if (table[i].exists) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin(s, "/", table[i].exists);
+ if (!p)
+ return log_oom();
+
+ if (faccessat(fd, p, F_OK, AT_SYMLINK_NOFOLLOW) < 0)
+ continue;
+ }
+
+ target = s;
+ break;
+ }
+
+ if (!target)
+ continue;
+
+ r = symlinkat(target, fd, table[i].dir);
+ if (r < 0 && errno != EEXIST)
+ return log_error_errno(errno, "Failed to create symlink at %s/%s: %m", root, table[i].dir);
+
+ if (uid_is_valid(uid) || gid_is_valid(gid)) {
+ if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0)
+ return log_error_errno(errno, "Failed to chown symlink at %s/%s: %m", root, table[i].dir);
+ }
+
+ continue;
+ }
+
+ RUN_WITH_UMASK(0000)
+ r = mkdirat(fd, table[i].dir, table[i].mode);
+ if (r < 0 && errno != EEXIST) {
+ log_full_errno(table[i].ignore_failure ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to create directory at %s/%s: %m", root, table[i].dir);
+
+ if (!table[i].ignore_failure)
+ return -errno;
+
+ continue;
+ }
+
+ if (uid != UID_INVALID || gid != UID_INVALID) {
+ if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0)
+ return log_error_errno(errno, "Failed to chown directory at %s/%s: %m", root, table[i].dir);
+ }
+ }
+
+ return 0;
+}
diff --git a/src/shared/base-filesystem.h b/src/shared/base-filesystem.h
new file mode 100644
index 0000000..39d9708
--- /dev/null
+++ b/src/shared/base-filesystem.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+int base_filesystem_create(const char *root, uid_t uid, gid_t gid);
diff --git a/src/shared/bitmap.c b/src/shared/bitmap.c
new file mode 100644
index 0000000..a4cd645
--- /dev/null
+++ b/src/shared/bitmap.c
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "bitmap.h"
+#include "hashmap.h"
+#include "macro.h"
+
+struct Bitmap {
+ uint64_t *bitmaps;
+ size_t n_bitmaps;
+ size_t bitmaps_allocated;
+};
+
+/* Bitmaps are only meant to store relatively small numbers
+ * (corresponding to, say, an enum), so it is ok to limit
+ * the max entry. 64k should be plenty. */
+#define BITMAPS_MAX_ENTRY 0xffff
+
+/* This indicates that we reached the end of the bitmap */
+#define BITMAP_END ((unsigned) -1)
+
+#define BITMAP_NUM_TO_OFFSET(n) ((n) / (sizeof(uint64_t) * 8))
+#define BITMAP_NUM_TO_REM(n) ((n) % (sizeof(uint64_t) * 8))
+#define BITMAP_OFFSET_TO_NUM(offset, rem) ((offset) * sizeof(uint64_t) * 8 + (rem))
+
+Bitmap *bitmap_new(void) {
+ return new0(Bitmap, 1);
+}
+
+Bitmap *bitmap_copy(Bitmap *b) {
+ Bitmap *ret;
+
+ ret = bitmap_new();
+ if (!ret)
+ return NULL;
+
+ ret->bitmaps = newdup(uint64_t, b->bitmaps, b->n_bitmaps);
+ if (!ret->bitmaps)
+ return mfree(ret);
+
+ ret->n_bitmaps = ret->bitmaps_allocated = b->n_bitmaps;
+ return ret;
+}
+
+void bitmap_free(Bitmap *b) {
+ if (!b)
+ return;
+
+ free(b->bitmaps);
+ free(b);
+}
+
+int bitmap_ensure_allocated(Bitmap **b) {
+ Bitmap *a;
+
+ assert(b);
+
+ if (*b)
+ return 0;
+
+ a = bitmap_new();
+ if (!a)
+ return -ENOMEM;
+
+ *b = a;
+
+ return 0;
+}
+
+int bitmap_set(Bitmap *b, unsigned n) {
+ uint64_t bitmask;
+ unsigned offset;
+
+ assert(b);
+
+ /* we refuse to allocate huge bitmaps */
+ if (n > BITMAPS_MAX_ENTRY)
+ return -ERANGE;
+
+ offset = BITMAP_NUM_TO_OFFSET(n);
+
+ if (offset >= b->n_bitmaps) {
+ if (!GREEDY_REALLOC0(b->bitmaps, b->bitmaps_allocated, offset + 1))
+ return -ENOMEM;
+
+ b->n_bitmaps = offset + 1;
+ }
+
+ bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n);
+
+ b->bitmaps[offset] |= bitmask;
+
+ return 0;
+}
+
+void bitmap_unset(Bitmap *b, unsigned n) {
+ uint64_t bitmask;
+ unsigned offset;
+
+ if (!b)
+ return;
+
+ offset = BITMAP_NUM_TO_OFFSET(n);
+
+ if (offset >= b->n_bitmaps)
+ return;
+
+ bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n);
+
+ b->bitmaps[offset] &= ~bitmask;
+}
+
+bool bitmap_isset(Bitmap *b, unsigned n) {
+ uint64_t bitmask;
+ unsigned offset;
+
+ if (!b)
+ return false;
+
+ offset = BITMAP_NUM_TO_OFFSET(n);
+
+ if (offset >= b->n_bitmaps)
+ return false;
+
+ bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n);
+
+ return !!(b->bitmaps[offset] & bitmask);
+}
+
+bool bitmap_isclear(Bitmap *b) {
+ unsigned i;
+
+ if (!b)
+ return true;
+
+ for (i = 0; i < b->n_bitmaps; i++)
+ if (b->bitmaps[i] != 0)
+ return false;
+
+ return true;
+}
+
+void bitmap_clear(Bitmap *b) {
+
+ if (!b)
+ return;
+
+ b->bitmaps = mfree(b->bitmaps);
+ b->n_bitmaps = 0;
+ b->bitmaps_allocated = 0;
+}
+
+bool bitmap_iterate(Bitmap *b, Iterator *i, unsigned *n) {
+ uint64_t bitmask;
+ unsigned offset, rem;
+
+ assert(i);
+ assert(n);
+
+ if (!b || i->idx == BITMAP_END)
+ return false;
+
+ offset = BITMAP_NUM_TO_OFFSET(i->idx);
+ rem = BITMAP_NUM_TO_REM(i->idx);
+ bitmask = UINT64_C(1) << rem;
+
+ for (; offset < b->n_bitmaps; offset ++) {
+ if (b->bitmaps[offset]) {
+ for (; bitmask; bitmask <<= 1, rem ++) {
+ if (b->bitmaps[offset] & bitmask) {
+ *n = BITMAP_OFFSET_TO_NUM(offset, rem);
+ i->idx = *n + 1;
+
+ return true;
+ }
+ }
+ }
+
+ rem = 0;
+ bitmask = 1;
+ }
+
+ i->idx = BITMAP_END;
+
+ return false;
+}
+
+bool bitmap_equal(Bitmap *a, Bitmap *b) {
+ size_t common_n_bitmaps;
+ Bitmap *c;
+ unsigned i;
+
+ if (a == b)
+ return true;
+
+ if (!a != !b)
+ return false;
+
+ if (!a)
+ return true;
+
+ common_n_bitmaps = MIN(a->n_bitmaps, b->n_bitmaps);
+ if (memcmp_safe(a->bitmaps, b->bitmaps, sizeof(uint64_t) * common_n_bitmaps) != 0)
+ return false;
+
+ c = a->n_bitmaps > b->n_bitmaps ? a : b;
+ for (i = common_n_bitmaps; i < c->n_bitmaps; i++)
+ if (c->bitmaps[i] != 0)
+ return false;
+
+ return true;
+}
diff --git a/src/shared/bitmap.h b/src/shared/bitmap.h
new file mode 100644
index 0000000..843d27d
--- /dev/null
+++ b/src/shared/bitmap.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+
+typedef struct Bitmap Bitmap;
+
+Bitmap *bitmap_new(void);
+Bitmap *bitmap_copy(Bitmap *b);
+int bitmap_ensure_allocated(Bitmap **b);
+void bitmap_free(Bitmap *b);
+
+int bitmap_set(Bitmap *b, unsigned n);
+void bitmap_unset(Bitmap *b, unsigned n);
+bool bitmap_isset(Bitmap *b, unsigned n);
+bool bitmap_isclear(Bitmap *b);
+void bitmap_clear(Bitmap *b);
+
+bool bitmap_iterate(Bitmap *b, Iterator *i, unsigned *n);
+
+bool bitmap_equal(Bitmap *a, Bitmap *b);
+
+#define BITMAP_FOREACH(n, b, i) \
+ for ((i).idx = 0; bitmap_iterate((b), &(i), (unsigned*)&(n)); )
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Bitmap*, bitmap_free);
+
+#define _cleanup_bitmap_free_ _cleanup_(bitmap_freep)
diff --git a/src/shared/blkid-util.h b/src/shared/blkid-util.h
new file mode 100644
index 0000000..eb07a88
--- /dev/null
+++ b/src/shared/blkid-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if HAVE_BLKID
+# include <blkid.h>
+
+# include "macro.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(blkid_probe, blkid_free_probe);
+#endif
diff --git a/src/shared/boot-timestamps.c b/src/shared/boot-timestamps.c
new file mode 100644
index 0000000..bcbb86d
--- /dev/null
+++ b/src/shared/boot-timestamps.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "acpi-fpdt.h"
+#include "boot-timestamps.h"
+#include "efivars.h"
+#include "macro.h"
+#include "time-util.h"
+
+int boot_timestamps(const dual_timestamp *n, dual_timestamp *firmware, dual_timestamp *loader) {
+ usec_t x = 0, y = 0, a;
+ int r;
+ dual_timestamp _n;
+
+ assert(firmware);
+ assert(loader);
+
+ if (!n) {
+ dual_timestamp_get(&_n);
+ n = &_n;
+ }
+
+ r = acpi_get_boot_usec(&x, &y);
+ if (r < 0) {
+ r = efi_loader_get_boot_usec(&x, &y);
+ if (r < 0)
+ return r;
+ }
+
+ /* Let's convert this to timestamps where the firmware
+ * began/loader began working. To make this more confusing:
+ * since usec_t is unsigned and the kernel's monotonic clock
+ * begins at kernel initialization we'll actually initialize
+ * the monotonic timestamps here as negative of the actual
+ * value. */
+
+ firmware->monotonic = y;
+ loader->monotonic = y - x;
+
+ a = n->monotonic + firmware->monotonic;
+ firmware->realtime = n->realtime > a ? n->realtime - a : 0;
+
+ a = n->monotonic + loader->monotonic;
+ loader->realtime = n->realtime > a ? n->realtime - a : 0;
+
+ return 0;
+}
diff --git a/src/shared/boot-timestamps.h b/src/shared/boot-timestamps.h
new file mode 100644
index 0000000..4e648f1
--- /dev/null
+++ b/src/shared/boot-timestamps.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <time-util.h>
+
+int boot_timestamps(const dual_timestamp *n, dual_timestamp *firmware, dual_timestamp *loader);
diff --git a/src/shared/bootspec.c b/src/shared/bootspec.c
new file mode 100644
index 0000000..7e276f1
--- /dev/null
+++ b/src/shared/bootspec.c
@@ -0,0 +1,665 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <linux/magic.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "blkid-util.h"
+#include "bootspec.h"
+#include "conf-files.h"
+#include "def.h"
+#include "device-nodes.h"
+#include "efivars.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "virt.h"
+
+static void boot_entry_free(BootEntry *entry) {
+ assert(entry);
+
+ free(entry->id);
+ free(entry->path);
+ free(entry->title);
+ free(entry->show_title);
+ free(entry->version);
+ free(entry->machine_id);
+ free(entry->architecture);
+ strv_free(entry->options);
+ free(entry->kernel);
+ free(entry->efi);
+ strv_free(entry->initrd);
+ free(entry->device_tree);
+}
+
+static int boot_entry_load(const char *path, BootEntry *entry) {
+ _cleanup_(boot_entry_free) BootEntry tmp = {};
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned line = 1;
+ char *b, *c;
+ int r;
+
+ assert(path);
+ assert(entry);
+
+ c = endswith_no_case(path, ".conf");
+ if (!c) {
+ log_error("Invalid loader entry filename: %s", path);
+ return -EINVAL;
+ }
+
+ b = basename(path);
+ tmp.id = strndup(b, c - b);
+ if (!tmp.id)
+ return log_oom();
+
+ tmp.path = strdup(path);
+ if (!tmp.path)
+ return log_oom();
+
+ f = fopen(path, "re");
+ if (!f)
+ return log_error_errno(errno, "Failed to open \"%s\": %m", path);
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL, *field = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r == 0)
+ break;
+ if (r == -ENOBUFS)
+ return log_error_errno(r, "%s:%u: Line too long", path, line);
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+
+ line++;
+
+ if (IN_SET(*strstrip(buf), '#', '\0'))
+ continue;
+
+ p = buf;
+ r = extract_first_word(&p, &field, " \t", 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse config file %s line %u: %m", path, line);
+ continue;
+ }
+ if (r == 0) {
+ log_warning("%s:%u: Bad syntax", path, line);
+ continue;
+ }
+
+ if (streq(field, "title"))
+ r = free_and_strdup(&tmp.title, p);
+ else if (streq(field, "version"))
+ r = free_and_strdup(&tmp.version, p);
+ else if (streq(field, "machine-id"))
+ r = free_and_strdup(&tmp.machine_id, p);
+ else if (streq(field, "architecture"))
+ r = free_and_strdup(&tmp.architecture, p);
+ else if (streq(field, "options"))
+ r = strv_extend(&tmp.options, p);
+ else if (streq(field, "linux"))
+ r = free_and_strdup(&tmp.kernel, p);
+ else if (streq(field, "efi"))
+ r = free_and_strdup(&tmp.efi, p);
+ else if (streq(field, "initrd"))
+ r = strv_extend(&tmp.initrd, p);
+ else if (streq(field, "devicetree"))
+ r = free_and_strdup(&tmp.device_tree, p);
+ else {
+ log_notice("%s:%u: Unknown line \"%s\"", path, line, field);
+ continue;
+ }
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+ }
+
+ *entry = tmp;
+ tmp = (BootEntry) {};
+ return 0;
+}
+
+void boot_config_free(BootConfig *config) {
+ size_t i;
+
+ assert(config);
+
+ free(config->default_pattern);
+ free(config->timeout);
+ free(config->editor);
+ free(config->auto_entries);
+ free(config->auto_firmware);
+
+ free(config->entry_oneshot);
+ free(config->entry_default);
+
+ for (i = 0; i < config->n_entries; i++)
+ boot_entry_free(config->entries + i);
+ free(config->entries);
+}
+
+static int boot_loader_read_conf(const char *path, BootConfig *config) {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned line = 1;
+ int r;
+
+ assert(path);
+ assert(config);
+
+ f = fopen(path, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open \"%s\": %m", path);
+ }
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL, *field = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r == 0)
+ break;
+ if (r == -ENOBUFS)
+ return log_error_errno(r, "%s:%u: Line too long", path, line);
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+
+ line++;
+
+ if (IN_SET(*strstrip(buf), '#', '\0'))
+ continue;
+
+ p = buf;
+ r = extract_first_word(&p, &field, " \t", 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse config file %s line %u: %m", path, line);
+ continue;
+ }
+ if (r == 0) {
+ log_warning("%s:%u: Bad syntax", path, line);
+ continue;
+ }
+
+ if (streq(field, "default"))
+ r = free_and_strdup(&config->default_pattern, p);
+ else if (streq(field, "timeout"))
+ r = free_and_strdup(&config->timeout, p);
+ else if (streq(field, "editor"))
+ r = free_and_strdup(&config->editor, p);
+ else if (streq(field, "auto-entries"))
+ r = free_and_strdup(&config->auto_entries, p);
+ else if (streq(field, "auto-firmware"))
+ r = free_and_strdup(&config->auto_firmware, p);
+ else if (streq(field, "console-mode"))
+ r = free_and_strdup(&config->console_mode, p);
+ else {
+ log_notice("%s:%u: Unknown line \"%s\"", path, line, field);
+ continue;
+ }
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+ }
+
+ return 1;
+}
+
+static int boot_entry_compare(const BootEntry *a, const BootEntry *b) {
+ return str_verscmp(a->id, b->id);
+}
+
+static int boot_entries_find(const char *dir, BootEntry **ret_entries, size_t *ret_n_entries) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+ BootEntry *array = NULL;
+ size_t n_allocated = 0, n = 0;
+
+ assert(dir);
+ assert(ret_entries);
+ assert(ret_n_entries);
+
+ r = conf_files_list(&files, ".conf", NULL, 0, dir, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list files in \"%s\": %m", dir);
+
+ STRV_FOREACH(f, files) {
+ if (!GREEDY_REALLOC0(array, n_allocated, n + 1))
+ return log_oom();
+
+ r = boot_entry_load(*f, array + n);
+ if (r < 0)
+ continue;
+
+ n++;
+ }
+
+ typesafe_qsort(array, n, boot_entry_compare);
+
+ *ret_entries = array;
+ *ret_n_entries = n;
+
+ return 0;
+}
+
+static bool find_nonunique(BootEntry *entries, size_t n_entries, bool *arr) {
+ size_t i, j;
+ bool non_unique = false;
+
+ assert(entries || n_entries == 0);
+ assert(arr || n_entries == 0);
+
+ for (i = 0; i < n_entries; i++)
+ arr[i] = false;
+
+ for (i = 0; i < n_entries; i++)
+ for (j = 0; j < n_entries; j++)
+ if (i != j && streq(boot_entry_title(entries + i),
+ boot_entry_title(entries + j)))
+ non_unique = arr[i] = arr[j] = true;
+
+ return non_unique;
+}
+
+static int boot_entries_uniquify(BootEntry *entries, size_t n_entries) {
+ char *s;
+ size_t i;
+ int r;
+ bool arr[n_entries];
+
+ assert(entries || n_entries == 0);
+
+ /* Find _all_ non-unique titles */
+ if (!find_nonunique(entries, n_entries, arr))
+ return 0;
+
+ /* Add version to non-unique titles */
+ for (i = 0; i < n_entries; i++)
+ if (arr[i] && entries[i].version) {
+ r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].version);
+ if (r < 0)
+ return -ENOMEM;
+
+ free_and_replace(entries[i].show_title, s);
+ }
+
+ if (!find_nonunique(entries, n_entries, arr))
+ return 0;
+
+ /* Add machine-id to non-unique titles */
+ for (i = 0; i < n_entries; i++)
+ if (arr[i] && entries[i].machine_id) {
+ r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].machine_id);
+ if (r < 0)
+ return -ENOMEM;
+
+ free_and_replace(entries[i].show_title, s);
+ }
+
+ if (!find_nonunique(entries, n_entries, arr))
+ return 0;
+
+ /* Add file name to non-unique titles */
+ for (i = 0; i < n_entries; i++)
+ if (arr[i]) {
+ r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].id);
+ if (r < 0)
+ return -ENOMEM;
+
+ free_and_replace(entries[i].show_title, s);
+ }
+
+ return 0;
+}
+
+static int boot_entries_select_default(const BootConfig *config) {
+ int i;
+
+ assert(config);
+
+ if (config->entry_oneshot)
+ for (i = config->n_entries - 1; i >= 0; i--)
+ if (streq(config->entry_oneshot, config->entries[i].id)) {
+ log_debug("Found default: id \"%s\" is matched by LoaderEntryOneShot",
+ config->entries[i].id);
+ return i;
+ }
+
+ if (config->entry_default)
+ for (i = config->n_entries - 1; i >= 0; i--)
+ if (streq(config->entry_default, config->entries[i].id)) {
+ log_debug("Found default: id \"%s\" is matched by LoaderEntryDefault",
+ config->entries[i].id);
+ return i;
+ }
+
+ if (config->default_pattern)
+ for (i = config->n_entries - 1; i >= 0; i--)
+ if (fnmatch(config->default_pattern, config->entries[i].id, FNM_CASEFOLD) == 0) {
+ log_debug("Found default: id \"%s\" is matched by pattern \"%s\"",
+ config->entries[i].id, config->default_pattern);
+ return i;
+ }
+
+ if (config->n_entries > 0)
+ log_debug("Found default: last entry \"%s\"", config->entries[config->n_entries - 1].id);
+ else
+ log_debug("Found no default boot entry :(");
+
+ return config->n_entries - 1; /* -1 means "no default" */
+}
+
+int boot_entries_load_config(const char *esp_path, BootConfig *config) {
+ const char *p;
+ int r;
+
+ assert(esp_path);
+ assert(config);
+
+ p = strjoina(esp_path, "/loader/loader.conf");
+ r = boot_loader_read_conf(p, config);
+ if (r < 0)
+ return r;
+
+ p = strjoina(esp_path, "/loader/entries");
+ r = boot_entries_find(p, &config->entries, &config->n_entries);
+ if (r < 0)
+ return r;
+
+ r = boot_entries_uniquify(config->entries, config->n_entries);
+ if (r < 0)
+ return log_error_errno(r, "Failed to uniquify boot entries: %m");
+
+ if (is_efi_boot()) {
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryOneShot", &config->entry_oneshot);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to read EFI var \"LoaderEntryOneShot\": %m");
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryDefault", &config->entry_default);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to read EFI var \"LoaderEntryDefault\": %m");
+ }
+
+ config->default_entry = boot_entries_select_default(config);
+ return 0;
+}
+
+/********************************************************************************/
+
+static int verify_esp(
+ const char *p,
+ bool searching,
+ bool unprivileged_mode,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+#if HAVE_BLKID
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ _cleanup_free_ char *node = NULL;
+ const char *v;
+#endif
+ uint64_t pstart = 0, psize = 0;
+ struct stat st, st2;
+ const char *t2;
+ struct statfs sfs;
+ sd_id128_t uuid = SD_ID128_NULL;
+ uint32_t part = 0;
+ bool relax_checks;
+ int r;
+
+ assert(p);
+
+ relax_checks = getenv_bool("SYSTEMD_RELAX_ESP_CHECKS") > 0;
+
+ /* Non-root user can only check the status, so if an error occured in the following, it does not cause any
+ * issues. Let's also, silence the error messages. */
+
+ if (!relax_checks) {
+ if (statfs(p, &sfs) < 0) {
+ /* If we are searching for the mount point, don't generate a log message if we can't find the path */
+ if (errno == ENOENT && searching)
+ return -ENOENT;
+
+ return log_full_errno(unprivileged_mode && errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to check file system type of \"%s\": %m", p);
+ }
+
+ if (!F_TYPE_EQUAL(sfs.f_type, MSDOS_SUPER_MAGIC)) {
+ if (searching)
+ return -EADDRNOTAVAIL;
+
+ log_error("File system \"%s\" is not a FAT EFI System Partition (ESP) file system.", p);
+ return -ENODEV;
+ }
+ }
+
+ if (stat(p, &st) < 0)
+ return log_full_errno(unprivileged_mode && errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to determine block device node of \"%s\": %m", p);
+
+ if (major(st.st_dev) == 0) {
+ log_error("Block device node of %p is invalid.", p);
+ return -ENODEV;
+ }
+
+ t2 = strjoina(p, "/..");
+ r = stat(t2, &st2);
+ if (r < 0)
+ return log_full_errno(unprivileged_mode && errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to determine block device node of parent of \"%s\": %m", p);
+
+ if (st.st_dev == st2.st_dev) {
+ log_error("Directory \"%s\" is not the root of the EFI System Partition (ESP) file system.", p);
+ return -ENODEV;
+ }
+
+ /* In a container we don't have access to block devices, skip this part of the verification, we trust the
+ * container manager set everything up correctly on its own. Also skip the following verification for non-root user. */
+ if (detect_container() > 0 || unprivileged_mode || relax_checks)
+ goto finish;
+
+#if HAVE_BLKID
+ r = device_path_make_major_minor(S_IFBLK, st.st_dev, &node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format major/minor device path: %m");
+ errno = 0;
+ b = blkid_new_probe_from_filename(node);
+ if (!b)
+ return log_error_errno(errno ?: ENOMEM, "Failed to open file system \"%s\": %m", p);
+
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
+ blkid_probe_enable_partitions(b, 1);
+ blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (r == -2) {
+ log_error("File system \"%s\" is ambiguous.", p);
+ return -ENODEV;
+ } else if (r == 1) {
+ log_error("File system \"%s\" does not contain a label.", p);
+ return -ENODEV;
+ } else if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe file system \"%s\": %m", p);
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "TYPE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe file system type \"%s\": %m", p);
+ if (!streq(v, "vfat")) {
+ log_error("File system \"%s\" is not FAT.", p);
+ return -ENODEV;
+ }
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_SCHEME", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition scheme \"%s\": %m", p);
+ if (!streq(v, "gpt")) {
+ log_error("File system \"%s\" is not on a GPT partition table.", p);
+ return -ENODEV;
+ }
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition type UUID \"%s\": %m", p);
+ if (!streq(v, "c12a7328-f81f-11d2-ba4b-00a0c93ec93b")) {
+ log_error("File system \"%s\" has wrong type for an EFI System Partition (ESP).", p);
+ return -ENODEV;
+ }
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_UUID", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition entry UUID \"%s\": %m", p);
+ r = sd_id128_from_string(v, &uuid);
+ if (r < 0) {
+ log_error("Partition \"%s\" has invalid UUID \"%s\".", p, v);
+ return -EIO;
+ }
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_NUMBER", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition number \"%s\": m", p);
+ r = safe_atou32(v, &part);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_NUMBER field.");
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_OFFSET", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition offset \"%s\": %m", p);
+ r = safe_atou64(v, &pstart);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_OFFSET field.");
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_SIZE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition size \"%s\": %m", p);
+ r = safe_atou64(v, &psize);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_SIZE field.");
+#endif
+
+finish:
+ if (ret_part)
+ *ret_part = part;
+ if (ret_pstart)
+ *ret_pstart = pstart;
+ if (ret_psize)
+ *ret_psize = psize;
+ if (ret_uuid)
+ *ret_uuid = uuid;
+
+ return 0;
+}
+
+int find_esp_and_warn(
+ const char *path,
+ bool unprivileged_mode,
+ char **ret_path,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+
+ int r;
+
+ /* This logs about all errors except:
+ *
+ * -ENOKEY → when we can't find the partition
+ * -EACCESS → when unprivileged_mode is true, and we can't access something
+ */
+
+ if (path) {
+ r = verify_esp(path, false, unprivileged_mode, ret_part, ret_pstart, ret_psize, ret_uuid);
+ if (r < 0)
+ return r;
+
+ goto found;
+ }
+
+ path = getenv("SYSTEMD_ESP_PATH");
+ if (path) {
+ if (!path_is_valid(path) || !path_is_absolute(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "$SYSTEMD_ESP_PATH does not refer to absolute path, refusing to use it: %s",
+ path);
+
+ /* Note: when the user explicitly configured things with an env var we won't validate the mount
+ * point. After all we want this to be useful for testing. */
+ goto found;
+ }
+
+ FOREACH_STRING(path, "/efi", "/boot", "/boot/efi") {
+
+ r = verify_esp(path, true, unprivileged_mode, ret_part, ret_pstart, ret_psize, ret_uuid);
+ if (r >= 0)
+ goto found;
+ if (!IN_SET(r, -ENOENT, -EADDRNOTAVAIL)) /* This one is not it */
+ return r;
+ }
+
+ /* No logging here */
+ return -ENOKEY;
+
+found:
+ if (ret_path) {
+ char *c;
+
+ c = strdup(path);
+ if (!c)
+ return log_oom();
+
+ *ret_path = c;
+ }
+
+ return 0;
+}
+
+int find_default_boot_entry(
+ const char *esp_path,
+ char **esp_where,
+ BootConfig *config,
+ const BootEntry **e) {
+
+ _cleanup_free_ char *where = NULL;
+ int r;
+
+ assert(config);
+ assert(e);
+
+ r = find_esp_and_warn(esp_path, false, &where, NULL, NULL, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = boot_entries_load_config(where, config);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load bootspec config from \"%s/loader\": %m", where);
+
+ if (config->default_entry < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "No entry suitable as default, refusing to guess.");
+
+ *e = &config->entries[config->default_entry];
+ log_debug("Found default boot entry in file \"%s\"", (*e)->path);
+
+ if (esp_where)
+ *esp_where = TAKE_PTR(where);
+
+ return 0;
+}
diff --git a/src/shared/bootspec.h b/src/shared/bootspec.h
new file mode 100644
index 0000000..ed57621
--- /dev/null
+++ b/src/shared/bootspec.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+typedef struct BootEntry {
+ char *id; /* This is the file basename without extension */
+ char *path; /* This is the full path to the file */
+ char *title;
+ char *show_title;
+ char *version;
+ char *machine_id;
+ char *architecture;
+ char **options;
+ char *kernel; /* linux is #defined to 1, yikes! */
+ char *efi;
+ char **initrd;
+ char *device_tree;
+} BootEntry;
+
+typedef struct BootConfig {
+ char *default_pattern;
+ char *timeout;
+ char *editor;
+ char *auto_entries;
+ char *auto_firmware;
+ char *console_mode;
+
+ char *entry_oneshot;
+ char *entry_default;
+
+ BootEntry *entries;
+ size_t n_entries;
+ ssize_t default_entry;
+} BootConfig;
+
+void boot_config_free(BootConfig *config);
+int boot_entries_load_config(const char *esp_path, BootConfig *config);
+
+static inline const char* boot_entry_title(const BootEntry *entry) {
+ return entry->show_title ?: entry->title ?: entry->id;
+}
+
+int find_esp_and_warn(const char *path, bool unprivileged_mode, char **ret_path, uint32_t *ret_part, uint64_t *ret_pstart, uint64_t *ret_psize, sd_id128_t *ret_uuid);
+
+int find_default_boot_entry(const char *esp_path, char **esp_where, BootConfig *config, const BootEntry **e);
diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c
new file mode 100644
index 0000000..2c61e04
--- /dev/null
+++ b/src/shared/bpf-program.c
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "log.h"
+#include "missing.h"
+#include "path-util.h"
+#include "util.h"
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+
+ p = new0(BPFProgram, 1);
+ if (!p)
+ return log_oom();
+
+ p->n_ref = 1;
+ p->prog_type = prog_type;
+ p->kernel_fd = -1;
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static BPFProgram *bpf_program_free(BPFProgram *p) {
+ assert(p);
+
+ /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
+ * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
+ * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
+ * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
+ * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
+ * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
+ * counter this, we track closely to which cgroup a program was attached to and will detach it on our own
+ * whenever we close the BPF fd. */
+ (void) bpf_program_cgroup_detach(p);
+
+ safe_close(p->kernel_fd);
+ free(p->instructions);
+ free(p->attached_path);
+
+ return mfree(p);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free);
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
+
+ assert(p);
+
+ if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
+ return -EBUSY;
+
+ if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
+ return -ENOMEM;
+
+ memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
+ p->n_instructions += count;
+
+ return 0;
+}
+
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
+ union bpf_attr attr;
+
+ assert(p);
+
+ if (p->kernel_fd >= 0) { /* make this idempotent */
+ memzero(log_buf, log_size);
+ return 0;
+ }
+
+ attr = (union bpf_attr) {
+ .prog_type = p->prog_type,
+ .insns = PTR_TO_UINT64(p->instructions),
+ .insn_cnt = p->n_instructions,
+ .license = PTR_TO_UINT64("GPL"),
+ .log_buf = PTR_TO_UINT64(log_buf),
+ .log_level = !!log_buf,
+ .log_size = log_size,
+ };
+
+ p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (p->kernel_fd < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
+ _cleanup_free_ char *copy = NULL;
+ _cleanup_close_ int fd = -1;
+ union bpf_attr attr;
+ int r;
+
+ assert(p);
+ assert(type >= 0);
+ assert(path);
+
+ if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
+ return -EINVAL;
+
+ /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
+ * refuse this early. */
+ if (p->attached_path) {
+ if (!path_equal(p->attached_path, path))
+ return -EBUSY;
+ if (p->attached_type != type)
+ return -EBUSY;
+ if (p->attached_flags != flags)
+ return -EBUSY;
+
+ /* Here's a shortcut: if we previously attached this program already, then we don't have to do so
+ * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
+ * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
+ * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
+ * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
+ * would remain in effect. */
+ if (flags != BPF_F_ALLOW_OVERRIDE)
+ return 0;
+ }
+
+ /* Ensure we have a kernel object for this. */
+ r = bpf_program_load_kernel(p, NULL, 0);
+ if (r < 0)
+ return r;
+
+ copy = strdup(path);
+ if (!copy)
+ return -ENOMEM;
+
+ fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ attr = (union bpf_attr) {
+ .attach_type = type,
+ .target_fd = fd,
+ .attach_bpf_fd = p->kernel_fd,
+ .attach_flags = flags,
+ };
+
+ if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ free_and_replace(p->attached_path, copy);
+ p->attached_type = type;
+ p->attached_flags = flags;
+
+ return 0;
+}
+
+int bpf_program_cgroup_detach(BPFProgram *p) {
+ _cleanup_close_ int fd = -1;
+
+ assert(p);
+
+ if (!p->attached_path)
+ return -EUNATCH;
+
+ fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
+ * implicitly by the removal, hence don't complain */
+
+ } else {
+ union bpf_attr attr;
+
+ attr = (union bpf_attr) {
+ .attach_type = p->attached_type,
+ .target_fd = fd,
+ .attach_bpf_fd = p->kernel_fd,
+ };
+
+ if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
+ return -errno;
+ }
+
+ p->attached_path = mfree(p->attached_path);
+
+ return 0;
+}
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
+ union bpf_attr attr = {
+ .map_type = type,
+ .key_size = key_size,
+ .value_size = value_size,
+ .max_entries = max_entries,
+ .map_flags = flags,
+ };
+ int fd;
+
+ fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int bpf_map_update_element(int fd, const void *key, void *value) {
+
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = PTR_TO_UINT64(key),
+ .value = PTR_TO_UINT64(value),
+ };
+
+ if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_map_lookup_element(int fd, const void *key, void *value) {
+
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = PTR_TO_UINT64(key),
+ .value = PTR_TO_UINT64(value),
+ };
+
+ if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h
new file mode 100644
index 0000000..c21eb2f
--- /dev/null
+++ b/src/shared/bpf-program.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <sys/syscall.h>
+
+#include "list.h"
+#include "macro.h"
+
+typedef struct BPFProgram BPFProgram;
+
+struct BPFProgram {
+ unsigned n_ref;
+
+ int kernel_fd;
+ uint32_t prog_type;
+
+ size_t n_instructions;
+ size_t allocated;
+ struct bpf_insn *instructions;
+
+ char *attached_path;
+ int attached_type;
+ uint32_t attached_flags;
+};
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret);
+BPFProgram *bpf_program_unref(BPFProgram *p);
+BPFProgram *bpf_program_ref(BPFProgram *p);
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
+int bpf_program_cgroup_detach(BPFProgram *p);
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
+int bpf_map_update_element(int fd, const void *key, void *value);
+int bpf_map_lookup_element(int fd, const void *key, void *value);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
new file mode 100644
index 0000000..9a8051d
--- /dev/null
+++ b/src/shared/bus-unit-util.c
@@ -0,0 +1,2547 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "cap-list.h"
+#include "cgroup-util.h"
+#include "condition.h"
+#include "cpu-set-util.h"
+#include "env-util.h"
+#include "errno-list.h"
+#include "escape.h"
+#include "hashmap.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "ip-protocol-list.h"
+#include "list.h"
+#include "locale-util.h"
+#include "missing_fs.h"
+#include "mountpoint-util.h"
+#include "nsflags.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "securebits-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "unit-def.h"
+#include "user-util.h"
+#include "utf8.h"
+#include "util.h"
+
+int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) {
+ assert(message);
+ assert(u);
+
+ u->machine = NULL;
+
+ return sd_bus_message_read(
+ message,
+ "(ssssssouso)",
+ &u->id,
+ &u->description,
+ &u->load_state,
+ &u->active_state,
+ &u->sub_state,
+ &u->following,
+ &u->unit_path,
+ &u->job_id,
+ &u->job_type,
+ &u->job_path);
+}
+
+#define DEFINE_BUS_APPEND_PARSE_PTR(bus_type, cast_type, type, parse_func) \
+ static int bus_append_##parse_func( \
+ sd_bus_message *m, \
+ const char *field, \
+ const char *eq) { \
+ type val; \
+ int r; \
+ \
+ r = parse_func(eq, &val); \
+ if (r < 0) \
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq); \
+ \
+ r = sd_bus_message_append(m, "(sv)", field, \
+ bus_type, (cast_type) val); \
+ if (r < 0) \
+ return bus_log_create_error(r); \
+ \
+ return 1; \
+ }
+
+#define DEFINE_BUS_APPEND_PARSE(bus_type, parse_func) \
+ static int bus_append_##parse_func( \
+ sd_bus_message *m, \
+ const char *field, \
+ const char *eq) { \
+ int r; \
+ \
+ r = parse_func(eq); \
+ if (r < 0) \
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s: %s", field, eq); \
+ \
+ r = sd_bus_message_append(m, "(sv)", field, \
+ bus_type, (int32_t) r); \
+ if (r < 0) \
+ return bus_log_create_error(r); \
+ \
+ return 1; \
+ }
+
+DEFINE_BUS_APPEND_PARSE("b", parse_boolean);
+DEFINE_BUS_APPEND_PARSE("i", ioprio_class_from_string);
+DEFINE_BUS_APPEND_PARSE("i", ip_tos_from_string);
+DEFINE_BUS_APPEND_PARSE("i", log_facility_unshifted_from_string);
+DEFINE_BUS_APPEND_PARSE("i", log_level_from_string);
+DEFINE_BUS_APPEND_PARSE("i", parse_errno);
+DEFINE_BUS_APPEND_PARSE("i", sched_policy_from_string);
+DEFINE_BUS_APPEND_PARSE("i", secure_bits_from_string);
+DEFINE_BUS_APPEND_PARSE("i", signal_from_string);
+DEFINE_BUS_APPEND_PARSE("i", parse_ip_protocol);
+DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, ioprio_parse_priority);
+DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, parse_nice);
+DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, safe_atoi);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, nsec_t, parse_nsec);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_blkio_weight_parse);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_cpu_shares_parse);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_weight_parse);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, unsigned long, mount_propagation_flags_from_string);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, safe_atou64);
+DEFINE_BUS_APPEND_PARSE_PTR("u", uint32_t, mode_t, parse_mode);
+DEFINE_BUS_APPEND_PARSE_PTR("u", uint32_t, unsigned, safe_atou);
+DEFINE_BUS_APPEND_PARSE_PTR("x", int64_t, int64_t, safe_atoi64);
+
+static int bus_append_string(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ r = sd_bus_message_append(m, "(sv)", field, "s", eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_strv(sd_bus_message *m, const char *field, const char *eq, ExtractFlags flags) {
+ const char *p;
+ int r;
+
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 's', field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "as");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (p = eq;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, flags);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Invalid syntax: %s", eq);
+
+ r = sd_bus_message_append_basic(m, 's', word);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_byte_array(sd_bus_message *m, const char *field, const void *buf, size_t n) {
+ int r;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "ay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'y', buf, n);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_parse_sec_rename(sd_bus_message *m, const char *field, const char *eq) {
+ char *n;
+ usec_t t;
+ size_t l;
+ int r;
+
+ r = parse_sec(eq, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq);
+
+ l = strlen(field);
+ n = newa(char, l + 2);
+ /* Change suffix Sec → USec */
+ strcpy(mempcpy(n, field, l - 3), "USec");
+
+ r = sd_bus_message_append(m, "(sv)", n, "t", t);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_parse_size(sd_bus_message *m, const char *field, const char *eq, uint64_t base) {
+ uint64_t v;
+ int r;
+
+ r = parse_size(eq, base, &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", v);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_exec_command(sd_bus_message *m, const char *field, const char *eq) {
+ bool ignore_failure = false, explicit_path = false, done = false;
+ _cleanup_strv_free_ char **l = NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ do {
+ switch (*eq) {
+
+ case '-':
+ if (ignore_failure)
+ done = true;
+ else {
+ ignore_failure = true;
+ eq++;
+ }
+ break;
+
+ case '@':
+ if (explicit_path)
+ done = true;
+ else {
+ explicit_path = true;
+ eq++;
+ }
+ break;
+
+ case '+':
+ case '!':
+ /* The bus API doesn't support +, ! and !! currently, unfortunately. :-( */
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Sorry, but +, ! and !! are currently not supported for transient services.");
+
+ default:
+ done = true;
+ break;
+ }
+ } while (!done);
+
+ if (explicit_path) {
+ r = extract_first_word(&eq, &path, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse path: %m");
+ }
+
+ r = strv_split_extract(&l, eq, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse command line: %m");
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(sasb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sasb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (!strv_isempty(l)) {
+
+ r = sd_bus_message_open_container(m, 'r', "sasb");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", path ?: l[0]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, l);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "b", ignore_failure);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_ip_address_access(sd_bus_message *m, int family, const union in_addr_union *prefix, unsigned char prefixlen) {
+ int r;
+
+ assert(m);
+ assert(prefix);
+
+ r = sd_bus_message_open_container(m, 'r', "iayu");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "i", family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(m, 'y', prefix, FAMILY_ADDRESS_SIZE(family));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "u", prefixlen);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(m);
+}
+
+static int bus_append_cgroup_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field, "DevicePolicy", "Slice"))
+
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting",
+ "TasksAccounting", "IPAccounting"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUWeight", "StartupCPUWeight", "IOWeight", "StartupIOWeight"))
+
+ return bus_append_cg_weight_parse(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUShares", "StartupCPUShares"))
+
+ return bus_append_cg_cpu_shares_parse(m, field, eq);
+
+ if (STR_IN_SET(field, "BlockIOWeight", "StartupBlockIOWeight"))
+
+ return bus_append_cg_blkio_weight_parse(m, field, eq);
+
+ if (streq(field, "Delegate")) {
+
+ r = parse_boolean(eq);
+ if (r < 0)
+ return bus_append_strv(m, "DelegateControllers", eq, EXTRACT_QUOTES);
+
+ r = sd_bus_message_append(m, "(sv)", "Delegate", "b", r);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "MemoryMin", "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit", "TasksMax")) {
+
+ if (isempty(eq) || streq(eq, "infinity")) {
+ r = sd_bus_message_append(m, "(sv)", field, "t", CGROUP_LIMIT_MAX);
+ if (r < 0)
+ return bus_log_create_error(r);
+ return 1;
+ }
+
+ r = parse_permille(eq);
+ if (r >= 0) {
+ char *n;
+
+ /* When this is a percentage we'll convert this into a relative value in the range 0…UINT32_MAX
+ * and pass it in the MemoryLowScale property (and related ones). This way the physical memory
+ * size can be determined server-side. */
+
+ n = strjoina(field, "Scale");
+ r = sd_bus_message_append(m, "(sv)", n, "u", (uint32_t) (((uint64_t) r * UINT32_MAX) / 1000U));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "TasksMax"))
+ return bus_append_safe_atou64(m, field, eq);
+
+ return bus_append_parse_size(m, field, eq, 1024);
+ }
+
+ if (streq(field, "CPUQuota")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "CPUQuotaPerSecUSec", "t", USEC_INFINITY);
+ else {
+ r = parse_permille_unbounded(eq);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE),
+ "CPU quota too small.");
+ if (r < 0)
+ return log_error_errno(r, "CPU quota '%s' invalid.", eq);
+
+ r = sd_bus_message_append(m, "(sv)", "CPUQuotaPerSecUSec", "t", (((uint64_t) r * USEC_PER_SEC) / 1000U));
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "DeviceAllow")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 0);
+ else {
+ const char *path = eq, *rwm = NULL, *e;
+
+ e = strchr(eq, ' ');
+ if (e) {
+ path = strndupa(eq, e - eq);
+ rwm = e+1;
+ }
+
+ r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 1, path, strempty(rwm));
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (cgroup_io_limit_type_from_string(field) >= 0 || STR_IN_SET(field, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 0);
+ else {
+ const char *path, *bandwidth, *e;
+ uint64_t bytes;
+
+ e = strchr(eq, ' ');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s value %s.",
+ field, eq);
+
+ path = strndupa(eq, e - eq);
+ bandwidth = e+1;
+
+ if (streq(bandwidth, "infinity"))
+ bytes = CGROUP_LIMIT_MAX;
+ else {
+ r = parse_size(bandwidth, 1000, &bytes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse byte value %s: %m", bandwidth);
+ }
+
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 1, path, bytes);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "IODeviceWeight", "BlockIODeviceWeight")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 0);
+ else {
+ const char *path, *weight, *e;
+ uint64_t u;
+
+ e = strchr(eq, ' ');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s value %s.",
+ field, eq);
+
+ path = strndupa(eq, e - eq);
+ weight = e+1;
+
+ r = safe_atou64(weight, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s: %m", field, weight);
+
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 1, path, u);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "IODeviceLatencyTargetSec")) {
+ const char *field_usec = "IODeviceLatencyTargetUSec";
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", USEC_INFINITY);
+ else {
+ const char *path, *target, *e;
+ usec_t usec;
+
+ e = strchr(eq, ' ');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s value %s.",
+ field, eq);
+
+ path = strndupa(eq, e - eq);
+ target = e+1;
+
+ r = parse_sec(target, &usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s: %m", field, target);
+
+ r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", 1, path, usec);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "IPAddressAllow", "IPAddressDeny")) {
+ unsigned char prefixlen;
+ union in_addr_union prefix = {};
+ int family;
+
+ if (isempty(eq)) {
+ r = sd_bus_message_append(m, "(sv)", field, "a(iayu)", 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(iayu)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(iayu)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (streq(eq, "any")) {
+ /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
+
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else if (is_localhost(eq)) {
+ /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
+
+ prefix.in.s_addr = htobe32(0x7f000000);
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 8);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 128);
+ if (r < 0)
+ return r;
+
+ } else if (streq(eq, "link-local")) {
+ /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
+
+ prefix.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 16);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) {
+ .s6_addr32[0] = htobe32(0xfe800000)
+ };
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 64);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else if (streq(eq, "multicast")) {
+ /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
+
+ prefix.in.s_addr = htobe32((UINT32_C(224) << 24));
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 4);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) {
+ .s6_addr32[0] = htobe32(0xff000000)
+ };
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 8);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else {
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&eq, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s: %s", field, eq);
+
+ r = in_addr_prefix_from_string_auto(word, &family, &prefix, &prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse IP address prefix: %s", word);
+
+ r = bus_append_ip_address_access(m, family, &prefix, prefixlen);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_automount_property(sd_bus_message *m, const char *field, const char *eq) {
+
+ if (streq(field, "Where"))
+
+ return bus_append_string(m, field, eq);
+
+ if (streq(field, "DirectoryMode"))
+
+ return bus_append_parse_mode(m, field, eq);
+
+ if (streq(field, "TimeoutIdleSec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_execute_property(sd_bus_message *m, const char *field, const char *eq) {
+ const char *suffix;
+ int r;
+
+ if (STR_IN_SET(field,
+ "User", "Group",
+ "UtmpIdentifier", "UtmpMode", "PAMName", "TTYPath",
+ "WorkingDirectory", "RootDirectory", "SyslogIdentifier",
+ "ProtectSystem", "ProtectHome", "SELinuxContext", "RootImage",
+ "RuntimeDirectoryPreserve", "Personality", "KeyringMode"))
+
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate",
+ "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
+ "PrivateMounts", "NoNewPrivileges", "SyslogLevelPrefix",
+ "MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC",
+ "ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups",
+ "MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories",
+ "ReadWritePaths", "ReadOnlyPaths", "InaccessiblePaths",
+ "RuntimeDirectory", "StateDirectory", "CacheDirectory", "LogsDirectory", "ConfigurationDirectory",
+ "SupplementaryGroups", "SystemCallArchitectures"))
+
+ return bus_append_strv(m, field, eq, EXTRACT_QUOTES);
+
+ if (STR_IN_SET(field, "SyslogLevel", "LogLevelMax"))
+
+ return bus_append_log_level_from_string(m, field, eq);
+
+ if (streq(field, "SyslogFacility"))
+
+ return bus_append_log_facility_unshifted_from_string(m, field, eq);
+
+ if (streq(field, "SecureBits"))
+
+ return bus_append_secure_bits_from_string(m, field, eq);
+
+ if (streq(field, "CPUSchedulingPolicy"))
+
+ return bus_append_sched_policy_from_string(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUSchedulingPriority", "OOMScoreAdjust"))
+
+ return bus_append_safe_atoi(m, field, eq);
+
+ if (streq(field, "Nice"))
+
+ return bus_append_parse_nice(m, field, eq);
+
+ if (streq(field, "SystemCallErrorNumber"))
+
+ return bus_append_parse_errno(m, field, eq);
+
+ if (streq(field, "IOSchedulingClass"))
+
+ return bus_append_ioprio_class_from_string(m, field, eq);
+
+ if (streq(field, "IOSchedulingPriority"))
+
+ return bus_append_ioprio_parse_priority(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "RuntimeDirectoryMode", "StateDirectoryMode", "CacheDirectoryMode",
+ "LogsDirectoryMode", "ConfigurationDirectoryMode", "UMask"))
+
+ return bus_append_parse_mode(m, field, eq);
+
+ if (streq(field, "TimerSlackNSec"))
+
+ return bus_append_parse_nsec(m, field, eq);
+
+ if (streq(field, "LogRateLimitIntervalSec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "LogRateLimitBurst"))
+
+ return bus_append_safe_atou(m, field, eq);
+
+ if (streq(field, "MountFlags"))
+
+ return bus_append_mount_propagation_flags_from_string(m, field, eq);
+
+ if (STR_IN_SET(field, "Environment", "UnsetEnvironment", "PassEnvironment"))
+
+ return bus_append_strv(m, field, eq, EXTRACT_QUOTES|EXTRACT_CUNESCAPE);
+
+ if (streq(field, "EnvironmentFile")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "EnvironmentFiles", "a(sb)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "EnvironmentFiles", "a(sb)", 1,
+ eq[0] == '-' ? eq + 1 : eq,
+ eq[0] == '-');
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "LogExtraFields")) {
+
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 's', "LogExtraFields");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "aay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "ay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'y', eq, strlen(eq));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "StandardInput", "StandardOutput", "StandardError")) {
+ const char *n, *appended;
+
+ if ((n = startswith(eq, "fd:"))) {
+ appended = strjoina(field, "FileDescriptorName");
+ r = sd_bus_message_append(m, "(sv)", appended, "s", n);
+ } else if ((n = startswith(eq, "file:"))) {
+ appended = strjoina(field, "File");
+ r = sd_bus_message_append(m, "(sv)", appended, "s", n);
+ } else if ((n = startswith(eq, "append:"))) {
+ appended = strjoina(field, "FileToAppend");
+ r = sd_bus_message_append(m, "(sv)", appended, "s", n);
+ } else
+ r = sd_bus_message_append(m, "(sv)", field, "s", eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "StandardInputText")) {
+ _cleanup_free_ char *unescaped = NULL;
+
+ r = cunescape(eq, 0, &unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape text '%s': %m", eq);
+
+ if (!strextend(&unescaped, "\n", NULL))
+ return log_oom();
+
+ /* Note that we don't expand specifiers here, but that should be OK, as this is a programmatic
+ * interface anyway */
+
+ return bus_append_byte_array(m, field, unescaped, strlen(unescaped));
+ }
+
+ if (streq(field, "StandardInputData")) {
+ _cleanup_free_ void *decoded = NULL;
+ size_t sz;
+
+ r = unbase64mem(eq, (size_t) -1, &decoded, &sz);
+ if (r < 0)
+ return log_error_errno(r, "Failed to decode base64 data '%s': %m", eq);
+
+ return bus_append_byte_array(m, field, decoded, sz);
+ }
+
+ if ((suffix = startswith(field, "Limit"))) {
+ int rl;
+
+ rl = rlimit_from_string(suffix);
+ if (rl >= 0) {
+ const char *sn;
+ struct rlimit l;
+
+ r = rlimit_parse(rl, eq, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse resource limit: %s", eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", l.rlim_max);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ sn = strjoina(field, "Soft");
+ r = sd_bus_message_append(m, "(sv)", sn, "t", l.rlim_cur);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+ }
+
+ if (STR_IN_SET(field, "AppArmorProfile", "SmackProcessLabel")) {
+ int ignore = 0;
+ const char *s = eq;
+
+ if (eq[0] == '-') {
+ ignore = 1;
+ s = eq + 1;
+ }
+
+ r = sd_bus_message_append(m, "(sv)", field, "(bs)", ignore, s);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "CapabilityBoundingSet", "AmbientCapabilities")) {
+ uint64_t sum = 0;
+ bool invert = false;
+ const char *p = eq;
+
+ if (*p == '~') {
+ invert = true;
+ p++;
+ }
+
+ r = capability_set_from_string(p, &sum);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s: %m", field, eq);
+
+ sum = invert ? ~sum : sum;
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", sum);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "CPUAffinity")) {
+ _cleanup_cpu_free_ cpu_set_t *cpuset = NULL;
+
+ r = parse_cpu_set(eq, &cpuset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
+
+ return bus_append_byte_array(m, field, cpuset, CPU_ALLOC_SIZE(r));
+ }
+
+ if (STR_IN_SET(field, "RestrictAddressFamilies", "SystemCallFilter")) {
+ int whitelist = 1;
+ const char *p = eq;
+
+ if (*p == '~') {
+ whitelist = 0;
+ p++;
+ }
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "(bas)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "bas");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 'b', &whitelist);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Invalid syntax: %s", eq);
+
+ r = sd_bus_message_append_basic(m, 's', word);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "RestrictNamespaces")) {
+ bool invert = false;
+ unsigned long flags;
+
+ r = parse_boolean(eq);
+ if (r > 0)
+ flags = 0;
+ else if (r == 0)
+ flags = NAMESPACE_FLAGS_ALL;
+ else {
+ if (eq[0] == '~') {
+ invert = true;
+ eq++;
+ }
+
+ r = namespace_flags_from_string(eq, &flags);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s.", field, eq);
+ }
+
+ if (invert)
+ flags = (~flags) & NAMESPACE_FLAGS_ALL;
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", (uint64_t) flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "BindPaths", "BindReadOnlyPaths")) {
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ssbt)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ssbt)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *source = NULL, *destination = NULL;
+ char *s = NULL, *d = NULL;
+ bool ignore_enoent = false;
+ uint64_t flags = MS_REC;
+
+ r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ break;
+
+ s = source;
+ if (s[0] == '-') {
+ ignore_enoent = true;
+ s++;
+ }
+
+ if (p && p[-1] == ':') {
+ r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Missing argument after ':': %s",
+ eq);
+
+ d = destination;
+
+ if (p && p[-1] == ':') {
+ _cleanup_free_ char *options = NULL;
+
+ r = extract_first_word(&p, &options, NULL, EXTRACT_QUOTES);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+
+ if (isempty(options) || streq(options, "rbind"))
+ flags = MS_REC;
+ else if (streq(options, "norbind"))
+ flags = 0;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown options: %s",
+ eq);
+ }
+ } else
+ d = s;
+
+ r = sd_bus_message_append(m, "(ssbt)", s, d, ignore_enoent, flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "TemporaryFileSystem")) {
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *path = NULL;
+ const char *w;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ break;
+
+ w = word;
+ r = extract_first_word(&w, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse argument: %s",
+ p);
+
+ r = sd_bus_message_append(m, "(ss)", path, w);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_kill_property(sd_bus_message *m, const char *field, const char *eq) {
+
+ if (streq(field, "KillMode"))
+
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "SendSIGHUP", "SendSIGKILL"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "KillSignal", "FinalKillSignal", "WatchdogSignal"))
+
+ return bus_append_signal_from_string(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_mount_property(sd_bus_message *m, const char *field, const char *eq) {
+
+ if (STR_IN_SET(field, "What", "Where", "Options", "Type"))
+
+ return bus_append_string(m, field, eq);
+
+ if (streq(field, "TimeoutSec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "DirectoryMode"))
+
+ return bus_append_parse_mode(m, field, eq);
+
+ if (STR_IN_SET(field, "SloppyOptions", "LazyUnmount", "ForceUnmount"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_path_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (streq(field, "MakeDirectory"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (streq(field, "DirectoryMode"))
+
+ return bus_append_parse_mode(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "PathExists", "PathExistsGlob", "PathChanged",
+ "PathModified", "DirectoryNotEmpty")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "Paths", "a(ss)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "Paths", "a(ss)", 1, field, eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_service_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field,
+ "PIDFile", "Type", "Restart", "BusName", "NotifyAccess",
+ "USBFunctionDescriptors", "USBFunctionStrings"))
+
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "PermissionsStartOnly", "RootDirectoryStartOnly", "RemainAfterExit", "GuessMainPID"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "RestartSec", "TimeoutStartSec", "TimeoutStopSec", "RuntimeMaxSec", "WatchdogSec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "TimeoutSec")) {
+
+ r = bus_append_parse_sec_rename(m, "TimeoutStartSec", eq);
+ if (r < 0)
+ return r;
+
+ return bus_append_parse_sec_rename(m, "TimeoutStopSec", eq);
+ }
+
+ if (streq(field, "FileDescriptorStoreMax"))
+
+ return bus_append_safe_atou(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "ExecStartPre", "ExecStart", "ExecStartPost",
+ "ExecReload", "ExecStop", "ExecStopPost"))
+
+ return bus_append_exec_command(m, field, eq);
+
+ if (STR_IN_SET(field, "RestartPreventExitStatus", "RestartForceExitStatus", "SuccessExitStatus")) {
+ _cleanup_free_ int *status = NULL, *signal = NULL;
+ size_t sz_status = 0, sz_signal = 0;
+ const char *p;
+
+ for (p = eq;;) {
+ _cleanup_free_ char *word = NULL;
+ int val;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Invalid syntax in %s: %s", field, eq);
+
+ r = safe_atoi(word, &val);
+ if (r < 0) {
+ val = signal_from_string(word);
+ if (val < 0)
+ return log_error_errno(r, "Invalid status or signal %s in %s: %m", word, field);
+
+ signal = reallocarray(signal, sz_signal + 1, sizeof(int));
+ if (!signal)
+ return log_oom();
+
+ signal[sz_signal++] = val;
+ } else {
+ status = reallocarray(status, sz_status + 1, sizeof(int));
+ if (!status)
+ return log_oom();
+
+ status[sz_status++] = val;
+ }
+ }
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "(aiai)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "aiai");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'i', status, sz_status);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'i', signal, sz_signal);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_socket_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field,
+ "Accept", "Writable", "KeepAlive", "NoDelay", "FreeBind", "Transparent", "Broadcast",
+ "PassCredentials", "PassSecurity", "ReusePort", "RemoveOnStop", "SELinuxContextFromNet"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "Priority", "IPTTL", "Mark"))
+
+ return bus_append_safe_atoi(m, field, eq);
+
+ if (streq(field, "IPTOS"))
+
+ return bus_append_ip_tos_from_string(m, field, eq);
+
+ if (STR_IN_SET(field, "Backlog", "MaxConnections", "MaxConnectionsPerSource", "KeepAliveProbes", "TriggerLimitBurst"))
+
+ return bus_append_safe_atou(m, field, eq);
+
+ if (STR_IN_SET(field, "SocketMode", "DirectoryMode"))
+
+ return bus_append_parse_mode(m, field, eq);
+
+ if (STR_IN_SET(field, "MessageQueueMaxMessages", "MessageQueueMessageSize"))
+
+ return bus_append_safe_atoi64(m, field, eq);
+
+ if (STR_IN_SET(field, "TimeoutSec", "KeepAliveTimeSec", "KeepAliveIntervalSec", "DeferAcceptSec", "TriggerLimitIntervalSec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (STR_IN_SET(field, "ReceiveBuffer", "SendBuffer", "PipeSize"))
+
+ return bus_append_parse_size(m, field, eq, 1024);
+
+ if (STR_IN_SET(field, "ExecStartPre", "ExecStartPost", "ExecReload", "ExecStopPost"))
+
+ return bus_append_exec_command(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "SmackLabel", "SmackLabelIPIn", "SmackLabelIPOut", "TCPCongestion",
+ "BindToDevice", "BindIPv6Only", "FileDescriptorName",
+ "SocketUser", "SocketGroup"))
+
+ return bus_append_string(m, field, eq);
+
+ if (streq(field, "Symlinks"))
+
+ return bus_append_strv(m, field, eq, EXTRACT_QUOTES);
+
+ if (streq(field, "SocketProtocol"))
+
+ return bus_append_parse_ip_protocol(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "ListenStream", "ListenDatagram", "ListenSequentialPacket", "ListenNetlink",
+ "ListenSpecial", "ListenMessageQueue", "ListenFIFO", "ListenUSBFunction")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "Listen", "a(ss)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "Listen", "a(ss)", 1, field + STRLEN("Listen"), eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+static int bus_append_timer_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field, "WakeSystem", "RemainAfterElapse", "Persistent"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "AccuracySec", "RandomizedDelaySec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "OnActiveSec", "OnBootSec", "OnStartupSec",
+ "OnUnitActiveSec","OnUnitInactiveSec")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "TimersMonotonic", "a(st)", 0);
+ else {
+ usec_t t;
+ r = parse_sec(eq, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", "TimersMonotonic", "a(st)", 1, field, t);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "OnCalendar")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "TimersCalendar", "a(ss)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "TimersCalendar", "a(ss)", 1, field, eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_unit_property(sd_bus_message *m, const char *field, const char *eq) {
+ ConditionType t = _CONDITION_TYPE_INVALID;
+ bool is_condition = false;
+ int r;
+
+ if (STR_IN_SET(field,
+ "Description", "SourcePath", "OnFailureJobMode",
+ "JobTimeoutAction", "JobTimeoutRebootArgument",
+ "StartLimitAction", "FailureAction", "SuccessAction",
+ "RebootArgument", "CollectMode"))
+
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "StopWhenUnneeded", "RefuseManualStart", "RefuseManualStop",
+ "AllowIsolate", "IgnoreOnIsolate", "DefaultDependencies"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "JobTimeoutSec", "JobRunningTimeoutSec", "StartLimitIntervalSec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "StartLimitBurst"))
+
+ return bus_append_safe_atou(m, field, eq);
+
+ if (STR_IN_SET(field, "SuccessActionExitStatus", "FailureActionExitStatus")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "i", -1);
+ else {
+ uint8_t u;
+
+ r = safe_atou8(eq, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "i", (int) u);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (unit_dependency_from_string(field) >= 0 ||
+ STR_IN_SET(field, "Documentation", "RequiresMountsFor"))
+
+ return bus_append_strv(m, field, eq, EXTRACT_QUOTES);
+
+ t = condition_type_from_string(field);
+ if (t >= 0)
+ is_condition = true;
+ else
+ t = assert_type_from_string(field);
+ if (t >= 0) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", is_condition ? "Conditions" : "Asserts", "a(sbbs)", 0);
+ else {
+ const char *p = eq;
+ int trigger, negate;
+
+ trigger = *p == '|';
+ if (trigger)
+ p++;
+
+ negate = *p == '!';
+ if (negate)
+ p++;
+
+ r = sd_bus_message_append(m, "(sv)", is_condition ? "Conditions" : "Asserts", "a(sbbs)", 1,
+ field, trigger, negate, p);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_append_unit_property_assignment(sd_bus_message *m, UnitType t, const char *assignment) {
+ const char *eq, *field;
+ int r;
+
+ assert(m);
+ assert(assignment);
+
+ eq = strchr(assignment, '=');
+ if (!eq)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not an assignment: %s", assignment);
+
+ field = strndupa(assignment, eq - assignment);
+ eq++;
+
+ switch (t) {
+ case UNIT_SERVICE:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_execute_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_service_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_SOCKET:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_execute_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_socket_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_TIMER:
+ r = bus_append_timer_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_PATH:
+ r = bus_append_path_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_SLICE:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_SCOPE:
+
+ if (streq(field, "TimeoutStopSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_MOUNT:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_execute_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_mount_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ break;
+
+ case UNIT_AUTOMOUNT:
+ r = bus_append_automount_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ break;
+
+ case UNIT_TARGET:
+ case UNIT_DEVICE:
+ case UNIT_SWAP:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not supported unit type");
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid unit type");
+ }
+
+ r = bus_append_unit_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown assignment: %s", assignment);
+}
+
+int bus_append_unit_property_assignment_many(sd_bus_message *m, UnitType t, char **l) {
+ char **i;
+ int r;
+
+ assert(m);
+
+ STRV_FOREACH(i, l) {
+ r = bus_append_unit_property_assignment(m, t, *i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+typedef struct BusWaitForJobs {
+ sd_bus *bus;
+ Set *jobs;
+
+ char *name;
+ char *result;
+
+ sd_bus_slot *slot_job_removed;
+ sd_bus_slot *slot_disconnected;
+} BusWaitForJobs;
+
+static int match_disconnected(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ assert(m);
+
+ log_error("Warning! D-Bus connection terminated.");
+ sd_bus_close(sd_bus_message_get_bus(m));
+
+ return 0;
+}
+
+static int match_job_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ const char *path, *unit, *result;
+ BusWaitForJobs *d = userdata;
+ uint32_t id;
+ char *found;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ r = sd_bus_message_read(m, "uoss", &id, &path, &unit, &result);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ found = set_remove(d->jobs, (char*) path);
+ if (!found)
+ return 0;
+
+ free(found);
+
+ if (!isempty(result))
+ d->result = strdup(result);
+
+ if (!isempty(unit))
+ d->name = strdup(unit);
+
+ return 0;
+}
+
+void bus_wait_for_jobs_free(BusWaitForJobs *d) {
+ if (!d)
+ return;
+
+ set_free_free(d->jobs);
+
+ sd_bus_slot_unref(d->slot_disconnected);
+ sd_bus_slot_unref(d->slot_job_removed);
+
+ sd_bus_unref(d->bus);
+
+ free(d->name);
+ free(d->result);
+
+ free(d);
+}
+
+int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret) {
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *d = NULL;
+ int r;
+
+ assert(bus);
+ assert(ret);
+
+ d = new0(BusWaitForJobs, 1);
+ if (!d)
+ return -ENOMEM;
+
+ d->bus = sd_bus_ref(bus);
+
+ /* When we are a bus client we match by sender. Direct
+ * connections OTOH have no initialized sender field, and
+ * hence we ignore the sender then */
+ r = sd_bus_match_signal_async(
+ bus,
+ &d->slot_job_removed,
+ bus->bus_client ? "org.freedesktop.systemd1" : NULL,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "JobRemoved",
+ match_job_removed, NULL, d);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_match_signal_async(
+ bus,
+ &d->slot_disconnected,
+ "org.freedesktop.DBus.Local",
+ NULL,
+ "org.freedesktop.DBus.Local",
+ "Disconnected",
+ match_disconnected, NULL, d);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(d);
+
+ return 0;
+}
+
+static int bus_process_wait(sd_bus *bus) {
+ int r;
+
+ for (;;) {
+ r = sd_bus_process(bus, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+}
+
+static int bus_job_get_service_result(BusWaitForJobs *d, char **result) {
+ _cleanup_free_ char *dbus_path = NULL;
+
+ assert(d);
+ assert(d->name);
+ assert(result);
+
+ if (!endswith(d->name, ".service"))
+ return -EINVAL;
+
+ dbus_path = unit_dbus_path_from_name(d->name);
+ if (!dbus_path)
+ return -ENOMEM;
+
+ return sd_bus_get_property_string(d->bus,
+ "org.freedesktop.systemd1",
+ dbus_path,
+ "org.freedesktop.systemd1.Service",
+ "Result",
+ NULL,
+ result);
+}
+
+static const struct {
+ const char *result, *explanation;
+} explanations [] = {
+ { "resources", "of unavailable resources or another system error" },
+ { "protocol", "the service did not take the steps required by its unit configuration" },
+ { "timeout", "a timeout was exceeded" },
+ { "exit-code", "the control process exited with error code" },
+ { "signal", "a fatal signal was delivered to the control process" },
+ { "core-dump", "a fatal signal was delivered causing the control process to dump core" },
+ { "watchdog", "the service failed to send watchdog ping" },
+ { "start-limit", "start of the service was attempted too often" }
+};
+
+static void log_job_error_with_service_result(const char* service, const char *result, const char* const* extra_args) {
+ _cleanup_free_ char *service_shell_quoted = NULL;
+ const char *systemctl = "systemctl", *journalctl = "journalctl";
+
+ assert(service);
+
+ service_shell_quoted = shell_maybe_quote(service, ESCAPE_BACKSLASH);
+
+ if (!strv_isempty((char**) extra_args)) {
+ _cleanup_free_ char *t;
+
+ t = strv_join((char**) extra_args, " ");
+ systemctl = strjoina("systemctl ", t ? : "<args>");
+ journalctl = strjoina("journalctl ", t ? : "<args>");
+ }
+
+ if (!isempty(result)) {
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(explanations); ++i)
+ if (streq(result, explanations[i].result))
+ break;
+
+ if (i < ELEMENTSOF(explanations)) {
+ log_error("Job for %s failed because %s.\n"
+ "See \"%s status %s\" and \"%s -xe\" for details.\n",
+ service,
+ explanations[i].explanation,
+ systemctl,
+ service_shell_quoted ?: "<service>",
+ journalctl);
+ goto finish;
+ }
+ }
+
+ log_error("Job for %s failed.\n"
+ "See \"%s status %s\" and \"%s -xe\" for details.\n",
+ service,
+ systemctl,
+ service_shell_quoted ?: "<service>",
+ journalctl);
+
+finish:
+ /* For some results maybe additional explanation is required */
+ if (streq_ptr(result, "start-limit"))
+ log_info("To force a start use \"%1$s reset-failed %2$s\"\n"
+ "followed by \"%1$s start %2$s\" again.",
+ systemctl,
+ service_shell_quoted ?: "<service>");
+}
+
+static int check_wait_response(BusWaitForJobs *d, bool quiet, const char* const* extra_args) {
+ assert(d->result);
+
+ if (!quiet) {
+ if (streq(d->result, "canceled"))
+ log_error("Job for %s canceled.", strna(d->name));
+ else if (streq(d->result, "timeout"))
+ log_error("Job for %s timed out.", strna(d->name));
+ else if (streq(d->result, "dependency"))
+ log_error("A dependency job for %s failed. See 'journalctl -xe' for details.", strna(d->name));
+ else if (streq(d->result, "invalid"))
+ log_error("%s is not active, cannot reload.", strna(d->name));
+ else if (streq(d->result, "assert"))
+ log_error("Assertion failed on job for %s.", strna(d->name));
+ else if (streq(d->result, "unsupported"))
+ log_error("Operation on or unit type of %s not supported on this system.", strna(d->name));
+ else if (streq(d->result, "collected"))
+ log_error("Queued job for %s was garbage collected.", strna(d->name));
+ else if (streq(d->result, "once"))
+ log_error("Unit %s was started already once and can't be started again.", strna(d->name));
+ else if (!STR_IN_SET(d->result, "done", "skipped")) {
+ if (d->name) {
+ _cleanup_free_ char *result = NULL;
+ int q;
+
+ q = bus_job_get_service_result(d, &result);
+ if (q < 0)
+ log_debug_errno(q, "Failed to get Result property of unit %s: %m", d->name);
+
+ log_job_error_with_service_result(d->name, result, extra_args);
+ } else
+ log_error("Job failed. See \"journalctl -xe\" for details.");
+ }
+ }
+
+ if (STR_IN_SET(d->result, "canceled", "collected"))
+ return -ECANCELED;
+ else if (streq(d->result, "timeout"))
+ return -ETIME;
+ else if (streq(d->result, "dependency"))
+ return -EIO;
+ else if (streq(d->result, "invalid"))
+ return -ENOEXEC;
+ else if (streq(d->result, "assert"))
+ return -EPROTO;
+ else if (streq(d->result, "unsupported"))
+ return -EOPNOTSUPP;
+ else if (streq(d->result, "once"))
+ return -ESTALE;
+ else if (STR_IN_SET(d->result, "done", "skipped"))
+ return 0;
+
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "Unexpected job result, assuming server side newer than us: %s", d->result);
+}
+
+int bus_wait_for_jobs(BusWaitForJobs *d, bool quiet, const char* const* extra_args) {
+ int r = 0;
+
+ assert(d);
+
+ while (!set_isempty(d->jobs)) {
+ int q;
+
+ q = bus_process_wait(d->bus);
+ if (q < 0)
+ return log_error_errno(q, "Failed to wait for response: %m");
+
+ if (d->result) {
+ q = check_wait_response(d, quiet, extra_args);
+ /* Return the first error as it is most likely to be
+ * meaningful. */
+ if (q < 0 && r == 0)
+ r = q;
+
+ log_debug_errno(q, "Got result %s/%m for job %s", strna(d->result), strna(d->name));
+ }
+
+ d->name = mfree(d->name);
+ d->result = mfree(d->result);
+ }
+
+ return r;
+}
+
+int bus_wait_for_jobs_add(BusWaitForJobs *d, const char *path) {
+ int r;
+
+ assert(d);
+
+ r = set_ensure_allocated(&d->jobs, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ return set_put_strdup(d->jobs, path);
+}
+
+int bus_wait_for_jobs_one(BusWaitForJobs *d, const char *path, bool quiet) {
+ int r;
+
+ r = bus_wait_for_jobs_add(d, path);
+ if (r < 0)
+ return log_oom();
+
+ return bus_wait_for_jobs(d, quiet, NULL);
+}
+
+int bus_deserialize_and_dump_unit_file_changes(sd_bus_message *m, bool quiet, UnitFileChange **changes, size_t *n_changes) {
+ const char *type, *path, *source;
+ int r;
+
+ /* changes is dereferenced when calling unit_file_dump_changes() later,
+ * so we have to make sure this is not NULL. */
+ assert(changes);
+ assert(n_changes);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(sss)", &type, &path, &source)) > 0) {
+ /* We expect only "success" changes to be sent over the bus.
+ Hence, reject anything negative. */
+ UnitFileChangeType ch = unit_file_change_type_from_string(type);
+
+ if (ch < 0) {
+ log_notice("Manager reported unknown change type \"%s\" for path \"%s\", ignoring.", type, path);
+ continue;
+ }
+
+ r = unit_file_changes_add(changes, n_changes, ch, path, source);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ unit_file_dump_changes(0, NULL, *changes, *n_changes, quiet);
+ return 0;
+}
+
+struct CGroupInfo {
+ char *cgroup_path;
+ bool is_const; /* If false, cgroup_path should be free()'d */
+
+ Hashmap *pids; /* PID → process name */
+ bool done;
+
+ struct CGroupInfo *parent;
+ LIST_FIELDS(struct CGroupInfo, siblings);
+ LIST_HEAD(struct CGroupInfo, children);
+ size_t n_children;
+};
+
+static bool IS_ROOT(const char *p) {
+ return isempty(p) || streq(p, "/");
+}
+
+static int add_cgroup(Hashmap *cgroups, const char *path, bool is_const, struct CGroupInfo **ret) {
+ struct CGroupInfo *parent = NULL, *cg;
+ int r;
+
+ assert(cgroups);
+ assert(ret);
+
+ if (IS_ROOT(path))
+ path = "/";
+
+ cg = hashmap_get(cgroups, path);
+ if (cg) {
+ *ret = cg;
+ return 0;
+ }
+
+ if (!IS_ROOT(path)) {
+ const char *e, *pp;
+
+ e = strrchr(path, '/');
+ if (!e)
+ return -EINVAL;
+
+ pp = strndupa(path, e - path);
+ if (!pp)
+ return -ENOMEM;
+
+ r = add_cgroup(cgroups, pp, false, &parent);
+ if (r < 0)
+ return r;
+ }
+
+ cg = new0(struct CGroupInfo, 1);
+ if (!cg)
+ return -ENOMEM;
+
+ if (is_const)
+ cg->cgroup_path = (char*) path;
+ else {
+ cg->cgroup_path = strdup(path);
+ if (!cg->cgroup_path) {
+ free(cg);
+ return -ENOMEM;
+ }
+ }
+
+ cg->is_const = is_const;
+ cg->parent = parent;
+
+ r = hashmap_put(cgroups, cg->cgroup_path, cg);
+ if (r < 0) {
+ if (!is_const)
+ free(cg->cgroup_path);
+ free(cg);
+ return r;
+ }
+
+ if (parent) {
+ LIST_PREPEND(siblings, parent->children, cg);
+ parent->n_children++;
+ }
+
+ *ret = cg;
+ return 1;
+}
+
+static int add_process(
+ Hashmap *cgroups,
+ const char *path,
+ pid_t pid,
+ const char *name) {
+
+ struct CGroupInfo *cg;
+ int r;
+
+ assert(cgroups);
+ assert(name);
+ assert(pid > 0);
+
+ r = add_cgroup(cgroups, path, true, &cg);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&cg->pids, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ return hashmap_put(cg->pids, PID_TO_PTR(pid), (void*) name);
+}
+
+static void remove_cgroup(Hashmap *cgroups, struct CGroupInfo *cg) {
+ assert(cgroups);
+ assert(cg);
+
+ while (cg->children)
+ remove_cgroup(cgroups, cg->children);
+
+ hashmap_remove(cgroups, cg->cgroup_path);
+
+ if (!cg->is_const)
+ free(cg->cgroup_path);
+
+ hashmap_free(cg->pids);
+
+ if (cg->parent)
+ LIST_REMOVE(siblings, cg->parent->children, cg);
+
+ free(cg);
+}
+
+static int cgroup_info_compare_func(struct CGroupInfo * const *a, struct CGroupInfo * const *b) {
+ return strcmp((*a)->cgroup_path, (*b)->cgroup_path);
+}
+
+static int dump_processes(
+ Hashmap *cgroups,
+ const char *cgroup_path,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+
+ struct CGroupInfo *cg;
+ int r;
+
+ assert(prefix);
+
+ if (IS_ROOT(cgroup_path))
+ cgroup_path = "/";
+
+ cg = hashmap_get(cgroups, cgroup_path);
+ if (!cg)
+ return 0;
+
+ if (!hashmap_isempty(cg->pids)) {
+ const char *name;
+ size_t n = 0, i;
+ pid_t *pids;
+ void *pidp;
+ Iterator j;
+ int width;
+
+ /* Order processes by their PID */
+ pids = newa(pid_t, hashmap_size(cg->pids));
+
+ HASHMAP_FOREACH_KEY(name, pidp, cg->pids, j)
+ pids[n++] = PTR_TO_PID(pidp);
+
+ assert(n == hashmap_size(cg->pids));
+ typesafe_qsort(pids, n, pid_compare_func);
+
+ width = DECIMAL_STR_WIDTH(pids[n-1]);
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *e = NULL;
+ const char *special;
+ bool more;
+
+ name = hashmap_get(cg->pids, PID_TO_PTR(pids[i]));
+ assert(name);
+
+ if (n_columns != 0) {
+ unsigned k;
+
+ k = MAX(LESS_BY(n_columns, 2U + width + 1U), 20U);
+
+ e = ellipsize(name, k, 100);
+ if (e)
+ name = e;
+ }
+
+ more = i+1 < n || cg->children;
+ special = special_glyph(more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT);
+
+ fprintf(stdout, "%s%s%*"PID_PRI" %s\n",
+ prefix,
+ special,
+ width, pids[i],
+ name);
+ }
+ }
+
+ if (cg->children) {
+ struct CGroupInfo **children, *child;
+ size_t n = 0, i;
+
+ /* Order subcgroups by their name */
+ children = newa(struct CGroupInfo*, cg->n_children);
+ LIST_FOREACH(siblings, child, cg->children)
+ children[n++] = child;
+ assert(n == cg->n_children);
+ typesafe_qsort(children, n, cgroup_info_compare_func);
+
+ if (n_columns != 0)
+ n_columns = MAX(LESS_BY(n_columns, 2U), 20U);
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *pp = NULL;
+ const char *name, *special;
+ bool more;
+
+ child = children[i];
+
+ name = strrchr(child->cgroup_path, '/');
+ if (!name)
+ return -EINVAL;
+ name++;
+
+ more = i+1 < n;
+ special = special_glyph(more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT);
+
+ fputs(prefix, stdout);
+ fputs(special, stdout);
+ fputs(name, stdout);
+ fputc('\n', stdout);
+
+ special = special_glyph(more ? SPECIAL_GLYPH_TREE_VERTICAL : SPECIAL_GLYPH_TREE_SPACE);
+
+ pp = strappend(prefix, special);
+ if (!pp)
+ return -ENOMEM;
+
+ r = dump_processes(cgroups, child->cgroup_path, pp, n_columns, flags);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ cg->done = true;
+ return 0;
+}
+
+static int dump_extra_processes(
+ Hashmap *cgroups,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+
+ _cleanup_free_ pid_t *pids = NULL;
+ _cleanup_hashmap_free_ Hashmap *names = NULL;
+ struct CGroupInfo *cg;
+ size_t n_allocated = 0, n = 0, k;
+ Iterator i;
+ int width, r;
+
+ /* Prints the extra processes, i.e. those that are in cgroups we haven't displayed yet. We show them as
+ * combined, sorted, linear list. */
+
+ HASHMAP_FOREACH(cg, cgroups, i) {
+ const char *name;
+ void *pidp;
+ Iterator j;
+
+ if (cg->done)
+ continue;
+
+ if (hashmap_isempty(cg->pids))
+ continue;
+
+ r = hashmap_ensure_allocated(&names, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC(pids, n_allocated, n + hashmap_size(cg->pids)))
+ return -ENOMEM;
+
+ HASHMAP_FOREACH_KEY(name, pidp, cg->pids, j) {
+ pids[n++] = PTR_TO_PID(pidp);
+
+ r = hashmap_put(names, pidp, (void*) name);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (n == 0)
+ return 0;
+
+ typesafe_qsort(pids, n, pid_compare_func);
+ width = DECIMAL_STR_WIDTH(pids[n-1]);
+
+ for (k = 0; k < n; k++) {
+ _cleanup_free_ char *e = NULL;
+ const char *name;
+
+ name = hashmap_get(names, PID_TO_PTR(pids[k]));
+ assert(name);
+
+ if (n_columns != 0) {
+ unsigned z;
+
+ z = MAX(LESS_BY(n_columns, 2U + width + 1U), 20U);
+
+ e = ellipsize(name, z, 100);
+ if (e)
+ name = e;
+ }
+
+ fprintf(stdout, "%s%s %*" PID_PRI " %s\n",
+ prefix,
+ special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET),
+ width, pids[k],
+ name);
+ }
+
+ return 0;
+}
+
+int unit_show_processes(
+ sd_bus *bus,
+ const char *unit,
+ const char *cgroup_path,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Hashmap *cgroups = NULL;
+ struct CGroupInfo *cg;
+ int r;
+
+ assert(bus);
+ assert(unit);
+
+ if (flags & OUTPUT_FULL_WIDTH)
+ n_columns = 0;
+ else if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "GetUnitProcesses",
+ error,
+ &reply,
+ "s",
+ unit);
+ if (r < 0)
+ return r;
+
+ cgroups = hashmap_new(&path_hash_ops);
+ if (!cgroups)
+ return -ENOMEM;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(sus)");
+ if (r < 0)
+ goto finish;
+
+ for (;;) {
+ const char *path = NULL, *name = NULL;
+ uint32_t pid;
+
+ r = sd_bus_message_read(reply, "(sus)", &path, &pid, &name);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ break;
+
+ r = add_process(cgroups, path, pid, name);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto finish;
+
+ r = dump_processes(cgroups, cgroup_path, prefix, n_columns, flags);
+ if (r < 0)
+ goto finish;
+
+ r = dump_extra_processes(cgroups, prefix, n_columns, flags);
+
+finish:
+ while ((cg = hashmap_first(cgroups)))
+ remove_cgroup(cgroups, cg);
+
+ hashmap_free(cgroups);
+
+ return r;
+}
+
+int unit_load_state(sd_bus *bus, const char *name, char **load_state) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return log_oom();
+
+ /* This function warns on it's own, because otherwise it'd be awkward to pass
+ * the dbus error message around. */
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "LoadState",
+ &error,
+ load_state);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get load state of %s: %s", name, bus_error_message(&error, r));
+
+ return 0;
+}
diff --git a/src/shared/bus-unit-util.h b/src/shared/bus-unit-util.h
new file mode 100644
index 0000000..4fc94b0
--- /dev/null
+++ b/src/shared/bus-unit-util.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "install.h"
+#include "output-mode.h"
+#include "sd-bus.h"
+#include "unit-def.h"
+
+typedef struct UnitInfo {
+ const char *machine;
+ const char *id;
+ const char *description;
+ const char *load_state;
+ const char *active_state;
+ const char *sub_state;
+ const char *following;
+ const char *unit_path;
+ uint32_t job_id;
+ const char *job_type;
+ const char *job_path;
+} UnitInfo;
+
+int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u);
+
+int bus_append_unit_property_assignment(sd_bus_message *m, UnitType t, const char *assignment);
+int bus_append_unit_property_assignment_many(sd_bus_message *m, UnitType t, char **l);
+
+typedef struct BusWaitForJobs BusWaitForJobs;
+
+int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret);
+void bus_wait_for_jobs_free(BusWaitForJobs *d);
+int bus_wait_for_jobs_add(BusWaitForJobs *d, const char *path);
+int bus_wait_for_jobs(BusWaitForJobs *d, bool quiet, const char* const* extra_args);
+int bus_wait_for_jobs_one(BusWaitForJobs *d, const char *path, bool quiet);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BusWaitForJobs*, bus_wait_for_jobs_free);
+
+int bus_deserialize_and_dump_unit_file_changes(sd_bus_message *m, bool quiet, UnitFileChange **changes, size_t *n_changes);
+
+int unit_show_processes(sd_bus *bus, const char *unit, const char *cgroup_path, const char *prefix, unsigned n_columns, OutputFlags flags, sd_bus_error *error);
+
+int unit_load_state(sd_bus *bus, const char *name, char **load_state);
diff --git a/src/shared/bus-util.c b/src/shared/bus-util.c
new file mode 100644
index 0000000..cbcf698
--- /dev/null
+++ b/src/shared/bus-util.c
@@ -0,0 +1,1753 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "sd-bus-protocol.h"
+#include "sd-bus.h"
+#include "sd-daemon.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "cap-list.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "missing.h"
+#include "mountpoint-util.h"
+#include "nsflags.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "rlimit-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int name_owner_change_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ sd_event *e = userdata;
+
+ assert(m);
+ assert(e);
+
+ sd_bus_close(sd_bus_message_get_bus(m));
+ sd_event_exit(e, 0);
+
+ return 1;
+}
+
+int bus_async_unregister_and_exit(sd_event *e, sd_bus *bus, const char *name) {
+ const char *match;
+ const char *unique;
+ int r;
+
+ assert(e);
+ assert(bus);
+ assert(name);
+
+ /* We unregister the name here and then wait for the
+ * NameOwnerChanged signal for this event to arrive before we
+ * quit. We do this in order to make sure that any queued
+ * requests are still processed before we really exit. */
+
+ r = sd_bus_get_unique_name(bus, &unique);
+ if (r < 0)
+ return r;
+
+ match = strjoina(
+ "sender='org.freedesktop.DBus',"
+ "type='signal',"
+ "interface='org.freedesktop.DBus',"
+ "member='NameOwnerChanged',"
+ "path='/org/freedesktop/DBus',"
+ "arg0='", name, "',",
+ "arg1='", unique, "',",
+ "arg2=''");
+
+ r = sd_bus_add_match_async(bus, NULL, match, name_owner_change_callback, NULL, e);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_release_name_async(bus, NULL, name, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int bus_event_loop_with_idle(
+ sd_event *e,
+ sd_bus *bus,
+ const char *name,
+ usec_t timeout,
+ check_idle_t check_idle,
+ void *userdata) {
+ bool exiting = false;
+ int r, code;
+
+ assert(e);
+ assert(bus);
+ assert(name);
+
+ for (;;) {
+ bool idle;
+
+ r = sd_event_get_state(e);
+ if (r < 0)
+ return r;
+ if (r == SD_EVENT_FINISHED)
+ break;
+
+ if (check_idle)
+ idle = check_idle(userdata);
+ else
+ idle = true;
+
+ r = sd_event_run(e, exiting || !idle ? (uint64_t) -1 : timeout);
+ if (r < 0)
+ return r;
+
+ if (r == 0 && !exiting && idle) {
+
+ r = sd_bus_try_close(bus);
+ if (r == -EBUSY)
+ continue;
+
+ /* Fallback for dbus1 connections: we
+ * unregister the name and wait for the
+ * response to come through for it */
+ if (r == -EOPNOTSUPP) {
+
+ /* Inform the service manager that we
+ * are going down, so that it will
+ * queue all further start requests,
+ * instead of assuming we are already
+ * running. */
+ sd_notify(false, "STOPPING=1");
+
+ r = bus_async_unregister_and_exit(e, bus, name);
+ if (r < 0)
+ return r;
+
+ exiting = true;
+ continue;
+ }
+
+ if (r < 0)
+ return r;
+
+ sd_event_exit(e, 0);
+ break;
+ }
+ }
+
+ r = sd_event_get_exit_code(e, &code);
+ if (r < 0)
+ return r;
+
+ return code;
+}
+
+int bus_name_has_owner(sd_bus *c, const char *name, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *rep = NULL;
+ int r, has_owner = 0;
+
+ assert(c);
+ assert(name);
+
+ r = sd_bus_call_method(c,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/dbus",
+ "org.freedesktop.DBus",
+ "NameHasOwner",
+ error,
+ &rep,
+ "s",
+ name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_basic(rep, 'b', &has_owner);
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+
+ return has_owner;
+}
+
+static int check_good_user(sd_bus_message *m, uid_t good_user) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ uid_t sender_uid;
+ int r;
+
+ assert(m);
+
+ if (good_user == UID_INVALID)
+ return 0;
+
+ r = sd_bus_query_sender_creds(m, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ /* Don't trust augmented credentials for authorization */
+ assert_return((sd_bus_creds_get_augmented_mask(creds) & SD_BUS_CREDS_EUID) == 0, -EPERM);
+
+ r = sd_bus_creds_get_euid(creds, &sender_uid);
+ if (r < 0)
+ return r;
+
+ return sender_uid == good_user;
+}
+
+int bus_test_polkit(
+ sd_bus_message *call,
+ int capability,
+ const char *action,
+ const char **details,
+ uid_t good_user,
+ bool *_challenge,
+ sd_bus_error *e) {
+
+ int r;
+
+ assert(call);
+ assert(action);
+
+ /* Tests non-interactively! */
+
+ r = check_good_user(call, good_user);
+ if (r != 0)
+ return r;
+
+ r = sd_bus_query_sender_privilege(call, capability);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ return 1;
+#if ENABLE_POLKIT
+ else {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *request = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int authorized = false, challenge = false;
+ const char *sender, **k, **v;
+
+ sender = sd_bus_message_get_sender(call);
+ if (!sender)
+ return -EBADMSG;
+
+ r = sd_bus_message_new_method_call(
+ call->bus,
+ &request,
+ "org.freedesktop.PolicyKit1",
+ "/org/freedesktop/PolicyKit1/Authority",
+ "org.freedesktop.PolicyKit1.Authority",
+ "CheckAuthorization");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(
+ request,
+ "(sa{sv})s",
+ "system-bus-name", 1, "name", "s", sender,
+ action);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(request, 'a', "{ss}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(k, v, details) {
+ r = sd_bus_message_append(request, "{ss}", *k, *v);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(request);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(request, "us", 0, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(call->bus, request, 0, e, &reply);
+ if (r < 0) {
+ /* Treat no PK available as access denied */
+ if (sd_bus_error_has_name(e, SD_BUS_ERROR_SERVICE_UNKNOWN)) {
+ sd_bus_error_free(e);
+ return -EACCES;
+ }
+
+ return r;
+ }
+
+ r = sd_bus_message_enter_container(reply, 'r', "bba{ss}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "bb", &authorized, &challenge);
+ if (r < 0)
+ return r;
+
+ if (authorized)
+ return 1;
+
+ if (_challenge) {
+ *_challenge = challenge;
+ return 0;
+ }
+ }
+#endif
+
+ return -EACCES;
+}
+
+#if ENABLE_POLKIT
+
+typedef struct AsyncPolkitQuery {
+ sd_bus_message *request, *reply;
+ sd_bus_message_handler_t callback;
+ void *userdata;
+ sd_bus_slot *slot;
+ Hashmap *registry;
+} AsyncPolkitQuery;
+
+static void async_polkit_query_free(AsyncPolkitQuery *q) {
+
+ if (!q)
+ return;
+
+ sd_bus_slot_unref(q->slot);
+
+ if (q->registry && q->request)
+ hashmap_remove(q->registry, q->request);
+
+ sd_bus_message_unref(q->request);
+ sd_bus_message_unref(q->reply);
+
+ free(q);
+}
+
+static int async_polkit_callback(sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ AsyncPolkitQuery *q = userdata;
+ int r;
+
+ assert(reply);
+ assert(q);
+
+ q->slot = sd_bus_slot_unref(q->slot);
+ q->reply = sd_bus_message_ref(reply);
+
+ r = sd_bus_message_rewind(q->request, true);
+ if (r < 0) {
+ r = sd_bus_reply_method_errno(q->request, r, NULL);
+ goto finish;
+ }
+
+ r = q->callback(q->request, q->userdata, &error_buffer);
+ r = bus_maybe_reply_error(q->request, r, &error_buffer);
+
+finish:
+ async_polkit_query_free(q);
+
+ return r;
+}
+
+#endif
+
+int bus_verify_polkit_async(
+ sd_bus_message *call,
+ int capability,
+ const char *action,
+ const char **details,
+ bool interactive,
+ uid_t good_user,
+ Hashmap **registry,
+ sd_bus_error *error) {
+
+#if ENABLE_POLKIT
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *pk = NULL;
+ AsyncPolkitQuery *q;
+ const char *sender, **k, **v;
+ sd_bus_message_handler_t callback;
+ void *userdata;
+ int c;
+#endif
+ int r;
+
+ assert(call);
+ assert(action);
+ assert(registry);
+
+ r = check_good_user(call, good_user);
+ if (r != 0)
+ return r;
+
+#if ENABLE_POLKIT
+ q = hashmap_get(*registry, call);
+ if (q) {
+ int authorized, challenge;
+
+ /* This is the second invocation of this function, and
+ * there's already a response from polkit, let's
+ * process it */
+ assert(q->reply);
+
+ if (sd_bus_message_is_method_error(q->reply, NULL)) {
+ const sd_bus_error *e;
+
+ /* Copy error from polkit reply */
+ e = sd_bus_message_get_error(q->reply);
+ sd_bus_error_copy(error, e);
+
+ /* Treat no PK available as access denied */
+ if (sd_bus_error_has_name(e, SD_BUS_ERROR_SERVICE_UNKNOWN))
+ return -EACCES;
+
+ return -sd_bus_error_get_errno(e);
+ }
+
+ r = sd_bus_message_enter_container(q->reply, 'r', "bba{ss}");
+ if (r >= 0)
+ r = sd_bus_message_read(q->reply, "bb", &authorized, &challenge);
+
+ if (r < 0)
+ return r;
+
+ if (authorized)
+ return 1;
+
+ if (challenge)
+ return sd_bus_error_set(error, SD_BUS_ERROR_INTERACTIVE_AUTHORIZATION_REQUIRED, "Interactive authentication required.");
+
+ return -EACCES;
+ }
+#endif
+
+ r = sd_bus_query_sender_privilege(call, capability);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ return 1;
+
+#if ENABLE_POLKIT
+ if (sd_bus_get_current_message(call->bus) != call)
+ return -EINVAL;
+
+ callback = sd_bus_get_current_handler(call->bus);
+ if (!callback)
+ return -EINVAL;
+
+ userdata = sd_bus_get_current_userdata(call->bus);
+
+ sender = sd_bus_message_get_sender(call);
+ if (!sender)
+ return -EBADMSG;
+
+ c = sd_bus_message_get_allow_interactive_authorization(call);
+ if (c < 0)
+ return c;
+ if (c > 0)
+ interactive = true;
+
+ r = hashmap_ensure_allocated(registry, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_call(
+ call->bus,
+ &pk,
+ "org.freedesktop.PolicyKit1",
+ "/org/freedesktop/PolicyKit1/Authority",
+ "org.freedesktop.PolicyKit1.Authority",
+ "CheckAuthorization");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(
+ pk,
+ "(sa{sv})s",
+ "system-bus-name", 1, "name", "s", sender,
+ action);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(pk, 'a', "{ss}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(k, v, details) {
+ r = sd_bus_message_append(pk, "{ss}", *k, *v);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(pk);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(pk, "us", interactive, NULL);
+ if (r < 0)
+ return r;
+
+ q = new0(AsyncPolkitQuery, 1);
+ if (!q)
+ return -ENOMEM;
+
+ q->request = sd_bus_message_ref(call);
+ q->callback = callback;
+ q->userdata = userdata;
+
+ r = hashmap_put(*registry, call, q);
+ if (r < 0) {
+ async_polkit_query_free(q);
+ return r;
+ }
+
+ q->registry = *registry;
+
+ r = sd_bus_call_async(call->bus, &q->slot, pk, async_polkit_callback, q, 0);
+ if (r < 0) {
+ async_polkit_query_free(q);
+ return r;
+ }
+
+ return 0;
+#endif
+
+ return -EACCES;
+}
+
+void bus_verify_polkit_async_registry_free(Hashmap *registry) {
+#if ENABLE_POLKIT
+ hashmap_free_with_destructor(registry, async_polkit_query_free);
+#endif
+}
+
+int bus_check_peercred(sd_bus *c) {
+ struct ucred ucred;
+ int fd, r;
+
+ assert(c);
+
+ fd = sd_bus_get_fd(c);
+ if (fd < 0)
+ return fd;
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ if (ucred.uid != 0 && ucred.uid != geteuid())
+ return -EPERM;
+
+ return 1;
+}
+
+int bus_connect_system_systemd(sd_bus **_bus) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(_bus);
+
+ if (geteuid() != 0)
+ return sd_bus_default_system(_bus);
+
+ /* If we are root then let's talk directly to the system
+ * instance, instead of going via the bus */
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_address(bus, "unix:path=/run/systemd/private");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return sd_bus_default_system(_bus);
+
+ r = bus_check_peercred(bus);
+ if (r < 0)
+ return r;
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_connect_user_systemd(sd_bus **_bus) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *ee = NULL;
+ const char *e;
+ int r;
+
+ assert(_bus);
+
+ e = secure_getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return sd_bus_default_user(_bus);
+
+ ee = bus_address_escape(e);
+ if (!ee)
+ return -ENOMEM;
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ bus->address = strjoin("unix:path=", ee, "/systemd/private");
+ if (!bus->address)
+ return -ENOMEM;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return sd_bus_default_user(_bus);
+
+ r = bus_check_peercred(bus);
+ if (r < 0)
+ return r;
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_print_property_value(const char *name, const char *expected_value, bool only_value, const char *fmt, ...) {
+ va_list ap;
+ int r;
+
+ assert(name);
+ assert(fmt);
+
+ if (expected_value) {
+ _cleanup_free_ char *s = NULL;
+
+ va_start(ap, fmt);
+ r = vasprintf(&s, fmt, ap);
+ va_end(ap);
+ if (r < 0)
+ return -ENOMEM;
+
+ if (streq_ptr(expected_value, s)) {
+ if (only_value)
+ puts(s);
+ else
+ printf("%s=%s\n", name, s);
+ }
+
+ return 0;
+ }
+
+ if (!only_value)
+ printf("%s=", name);
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ puts("");
+
+ return 0;
+}
+
+static int bus_print_property(const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all) {
+ char type;
+ const char *contents;
+ int r;
+
+ assert(name);
+ assert(m);
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRING: {
+ const char *s;
+
+ r = sd_bus_message_read_basic(m, type, &s);
+ if (r < 0)
+ return r;
+
+ if (all || !isempty(s)) {
+ bool good;
+
+ /* This property has a single value, so we need to take
+ * care not to print a new line, everything else is OK. */
+ good = !strchr(s, '\n');
+ bus_print_property_value(name, expected_value, value, "%s", good ? s : "[unprintable]");
+ }
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_BOOLEAN: {
+ int b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return r;
+
+ if (expected_value && parse_boolean(expected_value) != b)
+ return 1;
+
+ bus_print_property_value(name, NULL, value, "%s", yes_no(b));
+ return 1;
+ }
+
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t u;
+
+ r = sd_bus_message_read_basic(m, type, &u);
+ if (r < 0)
+ return r;
+
+ /* Yes, heuristics! But we can change this check
+ * should it turn out to not be sufficient */
+
+ if (endswith(name, "Timestamp") ||
+ STR_IN_SET(name, "NextElapseUSecRealtime", "LastTriggerUSec", "TimeUSec", "RTCTimeUSec")) {
+ char timestamp[FORMAT_TIMESTAMP_MAX];
+ const char *t;
+
+ t = format_timestamp(timestamp, sizeof(timestamp), u);
+ if (t || all)
+ bus_print_property_value(name, expected_value, value, "%s", strempty(t));
+
+ } else if (strstr(name, "USec")) {
+ char timespan[FORMAT_TIMESPAN_MAX];
+
+ (void) format_timespan(timespan, sizeof(timespan), u, 0);
+ bus_print_property_value(name, expected_value, value, "%s", timespan);
+
+ } else if (streq(name, "RestrictNamespaces")) {
+ _cleanup_free_ char *s = NULL;
+ const char *result;
+
+ if ((u & NAMESPACE_FLAGS_ALL) == 0)
+ result = "yes";
+ else if ((u & NAMESPACE_FLAGS_ALL) == NAMESPACE_FLAGS_ALL)
+ result = "no";
+ else {
+ r = namespace_flags_to_string(u, &s);
+ if (r < 0)
+ return r;
+
+ result = s;
+ }
+
+ bus_print_property_value(name, expected_value, value, "%s", result);
+
+ } else if (streq(name, "MountFlags")) {
+ const char *result;
+
+ result = mount_propagation_flags_to_string(u);
+ if (!result)
+ return -EINVAL;
+
+ bus_print_property_value(name, expected_value, value, "%s", result);
+
+ } else if (STR_IN_SET(name, "CapabilityBoundingSet", "AmbientCapabilities")) {
+ _cleanup_free_ char *s = NULL;
+
+ r = capability_set_to_string_alloc(u, &s);
+ if (r < 0)
+ return r;
+
+ bus_print_property_value(name, expected_value, value, "%s", s);
+
+ } else if ((STR_IN_SET(name, "CPUWeight", "StartupCPUWeight", "IOWeight", "StartupIOWeight") && u == CGROUP_WEIGHT_INVALID) ||
+ (STR_IN_SET(name, "CPUShares", "StartupCPUShares") && u == CGROUP_CPU_SHARES_INVALID) ||
+ (STR_IN_SET(name, "BlockIOWeight", "StartupBlockIOWeight") && u == CGROUP_BLKIO_WEIGHT_INVALID) ||
+ (STR_IN_SET(name, "MemoryCurrent", "TasksCurrent") && u == (uint64_t) -1) ||
+ (endswith(name, "NSec") && u == (uint64_t) -1))
+
+ bus_print_property_value(name, expected_value, value, "%s", "[not set]");
+
+ else if ((STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit") && u == CGROUP_LIMIT_MAX) ||
+ (STR_IN_SET(name, "TasksMax", "DefaultTasksMax") && u == (uint64_t) -1) ||
+ (startswith(name, "Limit") && u == (uint64_t) -1) ||
+ (startswith(name, "DefaultLimit") && u == (uint64_t) -1))
+
+ bus_print_property_value(name, expected_value, value, "%s", "infinity");
+ else
+ bus_print_property_value(name, expected_value, value, "%"PRIu64, u);
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_INT64: {
+ int64_t i;
+
+ r = sd_bus_message_read_basic(m, type, &i);
+ if (r < 0)
+ return r;
+
+ bus_print_property_value(name, expected_value, value, "%"PRIi64, i);
+ return 1;
+ }
+
+ case SD_BUS_TYPE_UINT32: {
+ uint32_t u;
+
+ r = sd_bus_message_read_basic(m, type, &u);
+ if (r < 0)
+ return r;
+
+ if (strstr(name, "UMask") || strstr(name, "Mode"))
+ bus_print_property_value(name, expected_value, value, "%04o", u);
+
+ else if (streq(name, "UID")) {
+ if (u == UID_INVALID)
+ bus_print_property_value(name, expected_value, value, "%s", "[not set]");
+ else
+ bus_print_property_value(name, expected_value, value, "%"PRIu32, u);
+ } else if (streq(name, "GID")) {
+ if (u == GID_INVALID)
+ bus_print_property_value(name, expected_value, value, "%s", "[not set]");
+ else
+ bus_print_property_value(name, expected_value, value, "%"PRIu32, u);
+ } else
+ bus_print_property_value(name, expected_value, value, "%"PRIu32, u);
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_INT32: {
+ int32_t i;
+
+ r = sd_bus_message_read_basic(m, type, &i);
+ if (r < 0)
+ return r;
+
+ bus_print_property_value(name, expected_value, value, "%"PRIi32, i);
+ return 1;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double d;
+
+ r = sd_bus_message_read_basic(m, type, &d);
+ if (r < 0)
+ return r;
+
+ bus_print_property_value(name, expected_value, value, "%g", d);
+ return 1;
+ }
+
+ case SD_BUS_TYPE_ARRAY:
+ if (streq(contents, "s")) {
+ bool first = true;
+ const char *str;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, contents);
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &str)) > 0) {
+ bool good;
+
+ if (first && !value)
+ printf("%s=", name);
+
+ /* This property has multiple space-separated values, so
+ * neither spaces nor newlines can be allowed in a value. */
+ good = str[strcspn(str, " \n")] == '\0';
+
+ printf("%s%s", first ? "" : " ", good ? str : "[unprintable]");
+
+ first = false;
+ }
+ if (r < 0)
+ return r;
+
+ if (first && all && !value)
+ printf("%s=", name);
+ if (!first || all)
+ puts("");
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 1;
+
+ } else if (streq(contents, "y")) {
+ const uint8_t *u;
+ size_t n;
+
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_BYTE, (const void**) &u, &n);
+ if (r < 0)
+ return r;
+
+ if (all || n > 0) {
+ unsigned i;
+
+ if (!value)
+ printf("%s=", name);
+
+ for (i = 0; i < n; i++)
+ printf("%02x", u[i]);
+
+ puts("");
+ }
+
+ return 1;
+
+ } else if (streq(contents, "u")) {
+ uint32_t *u;
+ size_t n;
+
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_UINT32, (const void**) &u, &n);
+ if (r < 0)
+ return r;
+
+ if (all || n > 0) {
+ unsigned i;
+
+ if (!value)
+ printf("%s=", name);
+
+ for (i = 0; i < n; i++)
+ printf("%08x", u[i]);
+
+ puts("");
+ }
+
+ return 1;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int bus_message_print_all_properties(
+ sd_bus_message *m,
+ bus_message_print_t func,
+ char **filter,
+ bool value,
+ bool all,
+ Set **found_properties) {
+
+ int r;
+
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) {
+ _cleanup_free_ char *name_with_equal = NULL;
+ const char *name, *contents, *expected_value = NULL;
+
+ r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &name);
+ if (r < 0)
+ return r;
+
+ if (found_properties) {
+ r = set_ensure_allocated(found_properties, &string_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(*found_properties, name);
+ if (r < 0 && r != -EEXIST)
+ return log_oom();
+ }
+
+ name_with_equal = strappend(name, "=");
+ if (!name_with_equal)
+ return log_oom();
+
+ if (!filter || strv_find(filter, name) ||
+ (expected_value = strv_find_startswith(filter, name_with_equal))) {
+ r = sd_bus_message_peek_type(m, NULL, &contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents);
+ if (r < 0)
+ return r;
+
+ if (func)
+ r = func(name, expected_value, m, value, all);
+ if (!func || r == 0)
+ r = bus_print_property(name, expected_value, m, value, all);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (all && !expected_value)
+ printf("%s=[unprintable]\n", name);
+ /* skip what we didn't read */
+ r = sd_bus_message_skip(m, contents);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_bus_message_skip(m, "v");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int bus_print_all_properties(
+ sd_bus *bus,
+ const char *dest,
+ const char *path,
+ bus_message_print_t func,
+ char **filter,
+ bool value,
+ bool all,
+ Set **found_properties) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+
+ r = sd_bus_call_method(bus,
+ dest,
+ path,
+ "org.freedesktop.DBus.Properties",
+ "GetAll",
+ &error,
+ &reply,
+ "s", "");
+ if (r < 0)
+ return r;
+
+ return bus_message_print_all_properties(reply, func, filter, value, all, found_properties);
+}
+
+int bus_map_id128(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ sd_id128_t *p = userdata;
+ const void *v;
+ size_t n;
+ int r;
+
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_BYTE, &v, &n);
+ if (r < 0)
+ return r;
+
+ if (n == 0)
+ *p = SD_ID128_NULL;
+ else if (n == 16)
+ memcpy((*p).bytes, v, n);
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int map_basic(sd_bus *bus, const char *member, sd_bus_message *m, unsigned flags, sd_bus_error *error, void *userdata) {
+ char type;
+ int r;
+
+ r = sd_bus_message_peek_type(m, &type, NULL);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRING: {
+ const char **p = userdata;
+ const char *s;
+
+ r = sd_bus_message_read_basic(m, type, &s);
+ if (r < 0)
+ return r;
+
+ if (isempty(s))
+ s = NULL;
+
+ if (flags & BUS_MAP_STRDUP)
+ return free_and_strdup((char **) userdata, s);
+
+ *p = s;
+ return 0;
+ }
+
+ case SD_BUS_TYPE_ARRAY: {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***p = userdata;
+
+ r = bus_message_read_strv_extend(m, &l);
+ if (r < 0)
+ return r;
+
+ return strv_free_and_replace(*p, l);
+ }
+
+ case SD_BUS_TYPE_BOOLEAN: {
+ int b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return r;
+
+ if (flags & BUS_MAP_BOOLEAN_AS_BOOL)
+ *(bool*) userdata = b;
+ else
+ *(int*) userdata = b;
+
+ return 0;
+ }
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32: {
+ uint32_t u, *p = userdata;
+
+ r = sd_bus_message_read_basic(m, type, &u);
+ if (r < 0)
+ return r;
+
+ *p = u;
+ return 0;
+ }
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t t, *p = userdata;
+
+ r = sd_bus_message_read_basic(m, type, &t);
+ if (r < 0)
+ return r;
+
+ *p = t;
+ return 0;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double d, *p = userdata;
+
+ r = sd_bus_message_read_basic(m, type, &d);
+ if (r < 0)
+ return r;
+
+ *p = d;
+ return 0;
+ }}
+
+ return -EOPNOTSUPP;
+}
+
+int bus_message_map_all_properties(
+ sd_bus_message *m,
+ const struct bus_properties_map *map,
+ unsigned flags,
+ sd_bus_error *error,
+ void *userdata) {
+
+ int r;
+
+ assert(m);
+ assert(map);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) {
+ const struct bus_properties_map *prop;
+ const char *member;
+ const char *contents;
+ void *v;
+ unsigned i;
+
+ r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &member);
+ if (r < 0)
+ return r;
+
+ for (i = 0, prop = NULL; map[i].member; i++)
+ if (streq(map[i].member, member)) {
+ prop = &map[i];
+ break;
+ }
+
+ if (prop) {
+ r = sd_bus_message_peek_type(m, NULL, &contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents);
+ if (r < 0)
+ return r;
+
+ v = (uint8_t *)userdata + prop->offset;
+ if (map[i].set)
+ r = prop->set(sd_bus_message_get_bus(m), member, m, error, v);
+ else
+ r = map_basic(sd_bus_message_get_bus(m), member, m, flags, error, v);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_bus_message_skip(m, "v");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_exit_container(m);
+}
+
+int bus_map_all_properties(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const struct bus_properties_map *map,
+ unsigned flags,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ void *userdata) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+ assert(destination);
+ assert(path);
+ assert(map);
+ assert(reply || (flags & BUS_MAP_STRDUP));
+
+ r = sd_bus_call_method(
+ bus,
+ destination,
+ path,
+ "org.freedesktop.DBus.Properties",
+ "GetAll",
+ error,
+ &m,
+ "s", "");
+ if (r < 0)
+ return r;
+
+ r = bus_message_map_all_properties(m, map, flags, error, userdata);
+ if (r < 0)
+ return r;
+
+ if (reply)
+ *reply = sd_bus_message_ref(m);
+
+ return r;
+}
+
+int bus_connect_transport(BusTransport transport, const char *host, bool user, sd_bus **ret) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(transport >= 0);
+ assert(transport < _BUS_TRANSPORT_MAX);
+ assert(ret);
+
+ assert_return((transport == BUS_TRANSPORT_LOCAL) == !host, -EINVAL);
+ assert_return(transport == BUS_TRANSPORT_LOCAL || !user, -EOPNOTSUPP);
+
+ switch (transport) {
+
+ case BUS_TRANSPORT_LOCAL:
+ if (user)
+ r = sd_bus_default_user(&bus);
+ else {
+ if (sd_booted() <= 0) {
+ /* Print a friendly message when the local system is actually not running systemd as PID 1. */
+ log_error("System has not been booted with systemd as init system (PID 1). Can't operate.");
+
+ return -EHOSTDOWN;
+ }
+ r = sd_bus_default_system(&bus);
+ }
+ break;
+
+ case BUS_TRANSPORT_REMOTE:
+ r = sd_bus_open_system_remote(&bus, host);
+ break;
+
+ case BUS_TRANSPORT_MACHINE:
+ r = sd_bus_open_system_machine(&bus, host);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unknown transport type.");
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_exit_on_disconnect(bus, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_connect_transport_systemd(BusTransport transport, const char *host, bool user, sd_bus **bus) {
+ int r;
+
+ assert(transport >= 0);
+ assert(transport < _BUS_TRANSPORT_MAX);
+ assert(bus);
+
+ assert_return((transport == BUS_TRANSPORT_LOCAL) == !host, -EINVAL);
+ assert_return(transport == BUS_TRANSPORT_LOCAL || !user, -EOPNOTSUPP);
+
+ switch (transport) {
+
+ case BUS_TRANSPORT_LOCAL:
+ if (user)
+ r = bus_connect_user_systemd(bus);
+ else {
+ if (sd_booted() <= 0)
+ /* Print a friendly message when the local system is actually not running systemd as PID 1. */
+ return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
+ "System has not been booted with systemd as init system (PID 1). Can't operate.");
+ r = bus_connect_system_systemd(bus);
+ }
+ break;
+
+ case BUS_TRANSPORT_REMOTE:
+ r = sd_bus_open_system_remote(bus, host);
+ break;
+
+ case BUS_TRANSPORT_MACHINE:
+ r = sd_bus_open_system_machine(bus, host);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unknown transport type.");
+ }
+
+ return r;
+}
+
+int bus_property_get_bool(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int b = *(bool*) userdata;
+
+ return sd_bus_message_append_basic(reply, 'b', &b);
+}
+
+int bus_property_set_bool(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int b, r;
+
+ r = sd_bus_message_read(value, "b", &b);
+ if (r < 0)
+ return r;
+
+ *(bool*) userdata = b;
+ return 0;
+}
+
+int bus_property_get_id128(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ sd_id128_t *id = userdata;
+
+ if (sd_id128_is_null(*id)) /* Add an empty array if the ID is zero */
+ return sd_bus_message_append(reply, "ay", 0);
+ else
+ return sd_bus_message_append_array(reply, 'y', id->bytes, 16);
+}
+
+#if __SIZEOF_SIZE_T__ != 8
+int bus_property_get_size(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t sz = *(size_t*) userdata;
+
+ return sd_bus_message_append_basic(reply, 't', &sz);
+}
+#endif
+
+#if __SIZEOF_LONG__ != 8
+int bus_property_get_long(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int64_t l = *(long*) userdata;
+
+ return sd_bus_message_append_basic(reply, 'x', &l);
+}
+
+int bus_property_get_ulong(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t ul = *(unsigned long*) userdata;
+
+ return sd_bus_message_append_basic(reply, 't', &ul);
+}
+#endif
+
+int bus_log_parse_error(int r) {
+ return log_error_errno(r, "Failed to parse bus message: %m");
+}
+
+int bus_log_create_error(int r) {
+ return log_error_errno(r, "Failed to create bus message: %m");
+}
+
+/**
+ * bus_path_encode_unique() - encode unique object path
+ * @b: bus connection or NULL
+ * @prefix: object path prefix
+ * @sender_id: unique-name of client, or NULL
+ * @external_id: external ID to be chosen by client, or NULL
+ * @ret_path: storage for encoded object path pointer
+ *
+ * Whenever we provide a bus API that allows clients to create and manage
+ * server-side objects, we need to provide a unique name for these objects. If
+ * we let the server choose the name, we suffer from a race condition: If a
+ * client creates an object asynchronously, it cannot destroy that object until
+ * it received the method reply. It cannot know the name of the new object,
+ * thus, it cannot destroy it. Furthermore, it enforces a round-trip.
+ *
+ * Therefore, many APIs allow the client to choose the unique name for newly
+ * created objects. There're two problems to solve, though:
+ * 1) Object names are usually defined via dbus object paths, which are
+ * usually globally namespaced. Therefore, multiple clients must be able
+ * to choose unique object names without interference.
+ * 2) If multiple libraries share the same bus connection, they must be
+ * able to choose unique object names without interference.
+ * The first problem is solved easily by prefixing a name with the
+ * unique-bus-name of a connection. The server side must enforce this and
+ * reject any other name. The second problem is solved by providing unique
+ * suffixes from within sd-bus.
+ *
+ * This helper allows clients to create unique object-paths. It uses the
+ * template '/prefix/sender_id/external_id' and returns the new path in
+ * @ret_path (must be freed by the caller).
+ * If @sender_id is NULL, the unique-name of @b is used. If @external_id is
+ * NULL, this function allocates a unique suffix via @b (by requesting a new
+ * cookie). If both @sender_id and @external_id are given, @b can be passed as
+ * NULL.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int bus_path_encode_unique(sd_bus *b, const char *prefix, const char *sender_id, const char *external_id, char **ret_path) {
+ _cleanup_free_ char *sender_label = NULL, *external_label = NULL;
+ char external_buf[DECIMAL_STR_MAX(uint64_t)], *p;
+ int r;
+
+ assert_return(b || (sender_id && external_id), -EINVAL);
+ assert_return(object_path_is_valid(prefix), -EINVAL);
+ assert_return(ret_path, -EINVAL);
+
+ if (!sender_id) {
+ r = sd_bus_get_unique_name(b, &sender_id);
+ if (r < 0)
+ return r;
+ }
+
+ if (!external_id) {
+ xsprintf(external_buf, "%"PRIu64, ++b->cookie);
+ external_id = external_buf;
+ }
+
+ sender_label = bus_label_escape(sender_id);
+ if (!sender_label)
+ return -ENOMEM;
+
+ external_label = bus_label_escape(external_id);
+ if (!external_label)
+ return -ENOMEM;
+
+ p = strjoin(prefix, "/", sender_label, "/", external_label);
+ if (!p)
+ return -ENOMEM;
+
+ *ret_path = p;
+ return 0;
+}
+
+/**
+ * bus_path_decode_unique() - decode unique object path
+ * @path: object path to decode
+ * @prefix: object path prefix
+ * @ret_sender: output parameter for sender-id label
+ * @ret_external: output parameter for external-id label
+ *
+ * This does the reverse of bus_path_encode_unique() (see its description for
+ * details). Both trailing labels, sender-id and external-id, are unescaped and
+ * returned in the given output parameters (the caller must free them).
+ *
+ * Note that this function returns 0 if the path does not match the template
+ * (see bus_path_encode_unique()), 1 if it matched.
+ *
+ * Returns: Negative error code on failure, 0 if the given object path does not
+ * match the template (return parameters are set to NULL), 1 if it was
+ * parsed successfully (return parameters contain allocated labels).
+ */
+int bus_path_decode_unique(const char *path, const char *prefix, char **ret_sender, char **ret_external) {
+ const char *p, *q;
+ char *sender, *external;
+
+ assert(object_path_is_valid(path));
+ assert(object_path_is_valid(prefix));
+ assert(ret_sender);
+ assert(ret_external);
+
+ p = object_path_startswith(path, prefix);
+ if (!p) {
+ *ret_sender = NULL;
+ *ret_external = NULL;
+ return 0;
+ }
+
+ q = strchr(p, '/');
+ if (!q) {
+ *ret_sender = NULL;
+ *ret_external = NULL;
+ return 0;
+ }
+
+ sender = bus_label_unescape_n(p, q - p);
+ external = bus_label_unescape(q + 1);
+ if (!sender || !external) {
+ free(sender);
+ free(external);
+ return -ENOMEM;
+ }
+
+ *ret_sender = sender;
+ *ret_external = external;
+ return 1;
+}
+
+int bus_property_get_rlimit(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const char *is_soft;
+ struct rlimit *rl;
+ uint64_t u;
+ rlim_t x;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ is_soft = endswith(property, "Soft");
+
+ rl = *(struct rlimit**) userdata;
+ if (rl)
+ x = is_soft ? rl->rlim_cur : rl->rlim_max;
+ else {
+ struct rlimit buf = {};
+ const char *s, *p;
+ int z;
+
+ /* Chop off "Soft" suffix */
+ s = is_soft ? strndupa(property, is_soft - property) : property;
+
+ /* Skip over any prefix, such as "Default" */
+ assert_se(p = strstr(s, "Limit"));
+
+ z = rlimit_from_string(p + 5);
+ assert(z >= 0);
+
+ (void) getrlimit(z, &buf);
+ x = is_soft ? buf.rlim_cur : buf.rlim_max;
+ }
+
+ /* rlim_t might have different sizes, let's map RLIMIT_INFINITY to (uint64_t) -1, so that it is the same on all
+ * archs */
+ u = x == RLIM_INFINITY ? (uint64_t) -1 : (uint64_t) x;
+
+ return sd_bus_message_append(reply, "t", u);
+}
+
+int bus_track_add_name_many(sd_bus_track *t, char **l) {
+ int r = 0;
+ char **i;
+
+ assert(t);
+
+ /* Continues adding after failure, and returns the first failure. */
+
+ STRV_FOREACH(i, l) {
+ int k;
+
+ k = sd_bus_track_add_name(t, *i);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+int bus_open_system_watch_bind_with_description(sd_bus **ret, const char *description) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ const char *e;
+ int r;
+
+ assert(ret);
+
+ /* Match like sd_bus_open_system(), but with the "watch_bind" feature and the Connected() signal turned on. */
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ if (description) {
+ r = sd_bus_set_description(bus, description);
+ if (r < 0)
+ return r;
+ }
+
+ e = secure_getenv("DBUS_SYSTEM_BUS_ADDRESS");
+ if (!e)
+ e = DEFAULT_SYSTEM_BUS_ADDRESS;
+
+ r = sd_bus_set_address(bus, e);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_bus_client(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_trusted(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_negotiate_creds(bus, true, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_watch_bind(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_connected_signal(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_reply_pair_array(sd_bus_message *m, char **l) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ char **k, **v;
+ int r;
+
+ assert(m);
+
+ /* Reply to the specified message with a message containing a dictionary put together from the specified
+ * strv */
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{ss}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(k, v, l) {
+ r = sd_bus_message_append(reply, "{ss}", *k, *v);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
diff --git a/src/shared/bus-util.h b/src/shared/bus-util.h
new file mode 100644
index 0000000..71c248f
--- /dev/null
+++ b/src/shared/bus-util.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "hashmap.h"
+#include "macro.h"
+#include "string-util.h"
+
+typedef enum BusTransport {
+ BUS_TRANSPORT_LOCAL,
+ BUS_TRANSPORT_REMOTE,
+ BUS_TRANSPORT_MACHINE,
+ _BUS_TRANSPORT_MAX,
+ _BUS_TRANSPORT_INVALID = -1
+} BusTransport;
+
+typedef int (*bus_property_set_t) (sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata);
+
+struct bus_properties_map {
+ const char *member;
+ const char *signature;
+ bus_property_set_t set;
+ size_t offset;
+};
+
+enum {
+ BUS_MAP_STRDUP = 1 << 0, /* If set, each "s" message is duplicated. Thus, each pointer needs to be freed. */
+ BUS_MAP_BOOLEAN_AS_BOOL = 1 << 1, /* If set, each "b" message is written to a bool pointer. If not set, "b" is written to a int pointer. */
+};
+
+int bus_map_id128(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata);
+
+int bus_message_map_all_properties(sd_bus_message *m, const struct bus_properties_map *map, unsigned flags, sd_bus_error *error, void *userdata);
+int bus_map_all_properties(sd_bus *bus, const char *destination, const char *path, const struct bus_properties_map *map,
+ unsigned flags, sd_bus_error *error, sd_bus_message **reply, void *userdata);
+
+int bus_async_unregister_and_exit(sd_event *e, sd_bus *bus, const char *name);
+
+typedef bool (*check_idle_t)(void *userdata);
+
+int bus_event_loop_with_idle(sd_event *e, sd_bus *bus, const char *name, usec_t timeout, check_idle_t check_idle, void *userdata);
+
+int bus_name_has_owner(sd_bus *c, const char *name, sd_bus_error *error);
+
+int bus_check_peercred(sd_bus *c);
+
+int bus_test_polkit(sd_bus_message *call, int capability, const char *action, const char **details, uid_t good_user, bool *_challenge, sd_bus_error *e);
+
+int bus_verify_polkit_async(sd_bus_message *call, int capability, const char *action, const char **details, bool interactive, uid_t good_user, Hashmap **registry, sd_bus_error *error);
+void bus_verify_polkit_async_registry_free(Hashmap *registry);
+
+int bus_connect_system_systemd(sd_bus **_bus);
+int bus_connect_user_systemd(sd_bus **_bus);
+
+int bus_connect_transport(BusTransport transport, const char *host, bool user, sd_bus **bus);
+int bus_connect_transport_systemd(BusTransport transport, const char *host, bool user, sd_bus **bus);
+
+typedef int (*bus_message_print_t) (const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all);
+
+int bus_print_property_value(const char *name, const char *expected_value, bool only_value, const char *fmt, ...) _printf_(4,5);
+int bus_message_print_all_properties(sd_bus_message *m, bus_message_print_t func, char **filter, bool value, bool all, Set **found_properties);
+int bus_print_all_properties(sd_bus *bus, const char *dest, const char *path, bus_message_print_t func, char **filter, bool value, bool all, Set **found_properties);
+
+int bus_property_get_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_set_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error);
+int bus_property_get_id128(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+#define bus_property_get_usec ((sd_bus_property_get_t) NULL)
+#define bus_property_set_usec ((sd_bus_property_set_t) NULL)
+
+assert_cc(sizeof(int) == sizeof(int32_t));
+#define bus_property_get_int ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(unsigned) == sizeof(uint32_t));
+#define bus_property_get_unsigned ((sd_bus_property_get_t) NULL)
+
+/* On 64bit machines we can use the default serializer for size_t and
+ * friends, otherwise we need to cast this manually */
+#if __SIZEOF_SIZE_T__ == 8
+#define bus_property_get_size ((sd_bus_property_get_t) NULL)
+#else
+int bus_property_get_size(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+#endif
+
+#if __SIZEOF_LONG__ == 8
+#define bus_property_get_long ((sd_bus_property_get_t) NULL)
+#define bus_property_get_ulong ((sd_bus_property_get_t) NULL)
+#else
+int bus_property_get_long(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_get_ulong(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+#endif
+
+/* uid_t and friends on Linux 32 bit. This means we can just use the
+ * default serializer for 32bit unsigned, for serializing it, and map
+ * it to NULL here */
+assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+#define bus_property_get_uid ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(gid_t) == sizeof(uint32_t));
+#define bus_property_get_gid ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+#define bus_property_get_pid ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(mode_t) == sizeof(uint32_t));
+#define bus_property_get_mode ((sd_bus_property_get_t) NULL)
+
+int bus_log_parse_error(int r);
+int bus_log_create_error(int r);
+
+#define BUS_DEFINE_PROPERTY_GET_GLOBAL(function, bus_type, val) \
+ int function(sd_bus *bus, \
+ const char *path, \
+ const char *interface, \
+ const char *property, \
+ sd_bus_message *reply, \
+ void *userdata, \
+ sd_bus_error *error) { \
+ \
+ assert(bus); \
+ assert(reply); \
+ \
+ return sd_bus_message_append(reply, bus_type, val); \
+ }
+
+#define BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, get1, get2) \
+ int function(sd_bus *bus, \
+ const char *path, \
+ const char *interface, \
+ const char *property, \
+ sd_bus_message *reply, \
+ void *userdata, \
+ sd_bus_error *error) { \
+ \
+ data_type *data = userdata; \
+ \
+ assert(bus); \
+ assert(reply); \
+ assert(data); \
+ \
+ return sd_bus_message_append(reply, bus_type, \
+ get2(get1(data))); \
+ }
+
+#define ident(x) (x)
+#define BUS_DEFINE_PROPERTY_GET(function, bus_type, data_type, get1) \
+ BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, get1, ident)
+
+#define ref(x) (*(x))
+#define BUS_DEFINE_PROPERTY_GET_REF(function, bus_type, data_type, get) \
+ BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, ref, get)
+
+#define BUS_DEFINE_PROPERTY_GET_ENUM(function, name, type) \
+ BUS_DEFINE_PROPERTY_GET_REF(function, "s", type, name##_to_string)
+
+#define BUS_PROPERTY_DUAL_TIMESTAMP(name, offset, flags) \
+ SD_BUS_PROPERTY(name, "t", bus_property_get_usec, (offset) + offsetof(struct dual_timestamp, realtime), (flags)), \
+ SD_BUS_PROPERTY(name "Monotonic", "t", bus_property_get_usec, (offset) + offsetof(struct dual_timestamp, monotonic), (flags))
+
+int bus_path_encode_unique(sd_bus *b, const char *prefix, const char *sender_id, const char *external_id, char **ret_path);
+int bus_path_decode_unique(const char *path, const char *prefix, char **ret_sender, char **ret_external);
+
+int bus_property_get_rlimit(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+int bus_track_add_name_many(sd_bus_track *t, char **l);
+
+int bus_open_system_watch_bind_with_description(sd_bus **ret, const char *description);
+static inline int bus_open_system_watch_bind(sd_bus **ret) {
+ return bus_open_system_watch_bind_with_description(ret, NULL);
+}
+
+int bus_reply_pair_array(sd_bus_message *m, char **l);
diff --git a/src/shared/calendarspec.c b/src/shared/calendarspec.c
new file mode 100644
index 0000000..dafc09e
--- /dev/null
+++ b/src/shared/calendarspec.c
@@ -0,0 +1,1370 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <alloca.h>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <time.h>
+
+#include "alloc-util.h"
+#include "calendarspec.h"
+#include "fileio.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+#define BITS_WEEKDAYS 127
+#define MIN_YEAR 1970
+#define MAX_YEAR 2199
+
+/* An arbitrary limit on the length of the chains of components. We don't want to
+ * build a very long linked list, which would be slow to iterate over and might cause
+ * our stack to overflow. It's unlikely that legitimate uses require more than a few
+ * linked compenents anyway. */
+#define CALENDARSPEC_COMPONENTS_MAX 240
+
+static void free_chain(CalendarComponent *c) {
+ CalendarComponent *n;
+
+ while (c) {
+ n = c->next;
+ free(c);
+ c = n;
+ }
+}
+
+CalendarSpec* calendar_spec_free(CalendarSpec *c) {
+
+ if (!c)
+ return NULL;
+
+ free_chain(c->year);
+ free_chain(c->month);
+ free_chain(c->day);
+ free_chain(c->hour);
+ free_chain(c->minute);
+ free_chain(c->microsecond);
+ free(c->timezone);
+
+ return mfree(c);
+}
+
+static int component_compare(CalendarComponent * const *a, CalendarComponent * const *b) {
+ int r;
+
+ r = CMP((*a)->start, (*b)->start);
+ if (r != 0)
+ return r;
+
+ r = CMP((*a)->stop, (*b)->stop);
+ if (r != 0)
+ return r;
+
+ return CMP((*a)->repeat, (*b)->repeat);
+}
+
+static void normalize_chain(CalendarComponent **c) {
+ CalendarComponent **b, *i, **j, *next;
+ size_t n = 0, k;
+
+ assert(c);
+
+ for (i = *c; i; i = i->next) {
+ n++;
+
+ /*
+ * While we're counting the chain, also normalize `stop`
+ * so the length of the range is a multiple of `repeat`
+ */
+ if (i->stop > i->start && i->repeat > 0)
+ i->stop -= (i->stop - i->start) % i->repeat;
+
+ }
+
+ if (n <= 1)
+ return;
+
+ j = b = newa(CalendarComponent*, n);
+ for (i = *c; i; i = i->next)
+ *(j++) = i;
+
+ typesafe_qsort(b, n, component_compare);
+
+ b[n-1]->next = NULL;
+ next = b[n-1];
+
+ /* Drop non-unique entries */
+ for (k = n-1; k > 0; k--) {
+ if (component_compare(&b[k-1], &next) == 0) {
+ free(b[k-1]);
+ continue;
+ }
+
+ b[k-1]->next = next;
+ next = b[k-1];
+ }
+
+ *c = next;
+}
+
+static void fix_year(CalendarComponent *c) {
+ /* Turns 12 → 2012, 89 → 1989 */
+
+ while (c) {
+ if (c->start >= 0 && c->start < 70)
+ c->start += 2000;
+
+ if (c->stop >= 0 && c->stop < 70)
+ c->stop += 2000;
+
+ if (c->start >= 70 && c->start < 100)
+ c->start += 1900;
+
+ if (c->stop >= 70 && c->stop < 100)
+ c->stop += 1900;
+
+ c = c->next;
+ }
+}
+
+int calendar_spec_normalize(CalendarSpec *c) {
+ assert(c);
+
+ if (streq_ptr(c->timezone, "UTC")) {
+ c->utc = true;
+ c->timezone = mfree(c->timezone);
+ }
+
+ if (c->weekdays_bits <= 0 || c->weekdays_bits >= BITS_WEEKDAYS)
+ c->weekdays_bits = -1;
+
+ if (c->end_of_month && !c->day)
+ c->end_of_month = false;
+
+ fix_year(c->year);
+
+ normalize_chain(&c->year);
+ normalize_chain(&c->month);
+ normalize_chain(&c->day);
+ normalize_chain(&c->hour);
+ normalize_chain(&c->minute);
+ normalize_chain(&c->microsecond);
+
+ return 0;
+}
+
+_pure_ static bool chain_valid(CalendarComponent *c, int from, int to, bool end_of_month) {
+ assert(to >= from);
+
+ if (!c)
+ return true;
+
+ /* Forbid dates more than 28 days from the end of the month */
+ if (end_of_month)
+ to -= 3;
+
+ if (c->start < from || c->start > to)
+ return false;
+
+ /* Avoid overly large values that could cause overflow */
+ if (c->repeat > to - from)
+ return false;
+
+ /*
+ * c->repeat must be short enough so at least one repetition may
+ * occur before the end of the interval. For dates scheduled
+ * relative to the end of the month, c->start and c->stop
+ * correspond to the Nth last day of the month.
+ */
+ if (c->stop >= 0) {
+ if (c->stop < from || c ->stop > to)
+ return false;
+
+ if (c->start + c->repeat > c->stop)
+ return false;
+ } else {
+ if (end_of_month && c->start - c->repeat < from)
+ return false;
+
+ if (!end_of_month && c->start + c->repeat > to)
+ return false;
+ }
+
+ if (c->next)
+ return chain_valid(c->next, from, to, end_of_month);
+
+ return true;
+}
+
+_pure_ bool calendar_spec_valid(CalendarSpec *c) {
+ assert(c);
+
+ if (c->weekdays_bits > BITS_WEEKDAYS)
+ return false;
+
+ if (!chain_valid(c->year, MIN_YEAR, MAX_YEAR, false))
+ return false;
+
+ if (!chain_valid(c->month, 1, 12, false))
+ return false;
+
+ if (!chain_valid(c->day, 1, 31, c->end_of_month))
+ return false;
+
+ if (!chain_valid(c->hour, 0, 23, false))
+ return false;
+
+ if (!chain_valid(c->minute, 0, 59, false))
+ return false;
+
+ if (!chain_valid(c->microsecond, 0, 60*USEC_PER_SEC-1, false))
+ return false;
+
+ return true;
+}
+
+static void format_weekdays(FILE *f, const CalendarSpec *c) {
+ static const char *const days[] = {
+ "Mon",
+ "Tue",
+ "Wed",
+ "Thu",
+ "Fri",
+ "Sat",
+ "Sun"
+ };
+
+ int l, x;
+ bool need_comma = false;
+
+ assert(f);
+ assert(c);
+ assert(c->weekdays_bits > 0 && c->weekdays_bits <= BITS_WEEKDAYS);
+
+ for (x = 0, l = -1; x < (int) ELEMENTSOF(days); x++) {
+
+ if (c->weekdays_bits & (1 << x)) {
+
+ if (l < 0) {
+ if (need_comma)
+ fputc(',', f);
+ else
+ need_comma = true;
+
+ fputs(days[x], f);
+ l = x;
+ }
+
+ } else if (l >= 0) {
+
+ if (x > l + 1) {
+ fputs(x > l + 2 ? ".." : ",", f);
+ fputs(days[x-1], f);
+ }
+
+ l = -1;
+ }
+ }
+
+ if (l >= 0 && x > l + 1) {
+ fputs(x > l + 2 ? ".." : ",", f);
+ fputs(days[x-1], f);
+ }
+}
+
+static void format_chain(FILE *f, int space, const CalendarComponent *c, bool usec) {
+ int d = usec ? (int) USEC_PER_SEC : 1;
+
+ assert(f);
+
+ if (!c) {
+ fputc('*', f);
+ return;
+ }
+
+ if (usec && c->start == 0 && c->repeat == USEC_PER_SEC && !c->next) {
+ fputc('*', f);
+ return;
+ }
+
+ assert(c->start >= 0);
+
+ fprintf(f, "%0*i", space, c->start / d);
+ if (c->start % d > 0)
+ fprintf(f, ".%06i", c->start % d);
+
+ if (c->stop > 0)
+ fprintf(f, "..%0*i", space, c->stop / d);
+ if (c->stop % d > 0)
+ fprintf(f, ".%06i", c->stop % d);
+
+ if (c->repeat > 0 && !(c->stop > 0 && c->repeat == d))
+ fprintf(f, "/%i", c->repeat / d);
+ if (c->repeat % d > 0)
+ fprintf(f, ".%06i", c->repeat % d);
+
+ if (c->next) {
+ fputc(',', f);
+ format_chain(f, space, c->next, usec);
+ }
+}
+
+int calendar_spec_to_string(const CalendarSpec *c, char **p) {
+ char *buf = NULL;
+ size_t sz = 0;
+ FILE *f;
+ int r;
+
+ assert(c);
+ assert(p);
+
+ f = open_memstream(&buf, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ if (c->weekdays_bits > 0 && c->weekdays_bits <= BITS_WEEKDAYS) {
+ format_weekdays(f, c);
+ fputc(' ', f);
+ }
+
+ format_chain(f, 4, c->year, false);
+ fputc('-', f);
+ format_chain(f, 2, c->month, false);
+ fputc(c->end_of_month ? '~' : '-', f);
+ format_chain(f, 2, c->day, false);
+ fputc(' ', f);
+ format_chain(f, 2, c->hour, false);
+ fputc(':', f);
+ format_chain(f, 2, c->minute, false);
+ fputc(':', f);
+ format_chain(f, 2, c->microsecond, true);
+
+ if (c->utc)
+ fputs(" UTC", f);
+ else if (c->timezone != NULL) {
+ fputc(' ', f);
+ fputs(c->timezone, f);
+ } else if (IN_SET(c->dst, 0, 1)) {
+
+ /* If daylight saving is explicitly on or off, let's show the used timezone. */
+
+ tzset();
+
+ if (!isempty(tzname[c->dst])) {
+ fputc(' ', f);
+ fputs(tzname[c->dst], f);
+ }
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0) {
+ free(buf);
+ fclose(f);
+ return r;
+ }
+
+ fclose(f);
+
+ *p = buf;
+ return 0;
+}
+
+static int parse_weekdays(const char **p, CalendarSpec *c) {
+ static const struct {
+ const char *name;
+ const int nr;
+ } day_nr[] = {
+ { "Monday", 0 },
+ { "Mon", 0 },
+ { "Tuesday", 1 },
+ { "Tue", 1 },
+ { "Wednesday", 2 },
+ { "Wed", 2 },
+ { "Thursday", 3 },
+ { "Thu", 3 },
+ { "Friday", 4 },
+ { "Fri", 4 },
+ { "Saturday", 5 },
+ { "Sat", 5 },
+ { "Sunday", 6 },
+ { "Sun", 6 }
+ };
+
+ int l = -1;
+ bool first = true;
+
+ assert(p);
+ assert(*p);
+ assert(c);
+
+ for (;;) {
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(day_nr); i++) {
+ size_t skip;
+
+ if (!startswith_no_case(*p, day_nr[i].name))
+ continue;
+
+ skip = strlen(day_nr[i].name);
+
+ if (!IN_SET((*p)[skip], 0, '-', '.', ',', ' '))
+ return -EINVAL;
+
+ c->weekdays_bits |= 1 << day_nr[i].nr;
+
+ if (l >= 0) {
+ int j;
+
+ if (l > day_nr[i].nr)
+ return -EINVAL;
+
+ for (j = l + 1; j < day_nr[i].nr; j++)
+ c->weekdays_bits |= 1 << j;
+ }
+
+ *p += skip;
+ break;
+ }
+
+ /* Couldn't find this prefix, so let's assume the
+ weekday was not specified and let's continue with
+ the date */
+ if (i >= ELEMENTSOF(day_nr))
+ return first ? 0 : -EINVAL;
+
+ /* We reached the end of the string */
+ if (**p == 0)
+ return 0;
+
+ /* We reached the end of the weekday spec part */
+ if (**p == ' ') {
+ *p += strspn(*p, " ");
+ return 0;
+ }
+
+ if (**p == '.') {
+ if (l >= 0)
+ return -EINVAL;
+
+ if ((*p)[1] != '.')
+ return -EINVAL;
+
+ l = day_nr[i].nr;
+ *p += 2;
+
+ /* Support ranges with "-" for backwards compatibility */
+ } else if (**p == '-') {
+ if (l >= 0)
+ return -EINVAL;
+
+ l = day_nr[i].nr;
+ *p += 1;
+
+ } else if (**p == ',') {
+ l = -1;
+ *p += 1;
+ }
+
+ /* Allow a trailing comma but not an open range */
+ if (IN_SET(**p, 0, ' ')) {
+ *p += strspn(*p, " ");
+ return l < 0 ? 0 : -EINVAL;
+ }
+
+ first = false;
+ }
+}
+
+static int parse_one_number(const char *p, const char **e, unsigned long *ret) {
+ char *ee = NULL;
+ unsigned long value;
+
+ errno = 0;
+ value = strtoul(p, &ee, 10);
+ if (errno > 0)
+ return -errno;
+ if (ee == p)
+ return -EINVAL;
+
+ *ret = value;
+ *e = ee;
+ return 0;
+}
+
+static int parse_component_decimal(const char **p, bool usec, int *res) {
+ unsigned long value;
+ const char *e = NULL;
+ int r;
+
+ if (!isdigit(**p))
+ return -EINVAL;
+
+ r = parse_one_number(*p, &e, &value);
+ if (r < 0)
+ return r;
+
+ if (usec) {
+ if (value * USEC_PER_SEC / USEC_PER_SEC != value)
+ return -ERANGE;
+
+ value *= USEC_PER_SEC;
+
+ /* One "." is a decimal point, but ".." is a range separator */
+ if (e[0] == '.' && e[1] != '.') {
+ unsigned add;
+
+ e++;
+ r = parse_fractional_part_u(&e, 6, &add);
+ if (r < 0)
+ return r;
+
+ if (add + value < value)
+ return -ERANGE;
+ value += add;
+ }
+ }
+
+ if (value > INT_MAX)
+ return -ERANGE;
+
+ *p = e;
+ *res = value;
+
+ return 0;
+}
+
+static int const_chain(int value, CalendarComponent **c) {
+ CalendarComponent *cc = NULL;
+
+ assert(c);
+
+ cc = new0(CalendarComponent, 1);
+ if (!cc)
+ return -ENOMEM;
+
+ cc->start = value;
+ cc->stop = -1;
+ cc->repeat = 0;
+ cc->next = *c;
+
+ *c = cc;
+
+ return 0;
+}
+
+static int calendarspec_from_time_t(CalendarSpec *c, time_t time) {
+ struct tm tm;
+ CalendarComponent *year = NULL, *month = NULL, *day = NULL, *hour = NULL, *minute = NULL, *us = NULL;
+ int r;
+
+ if (!gmtime_r(&time, &tm))
+ return -ERANGE;
+
+ r = const_chain(tm.tm_year + 1900, &year);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_mon + 1, &month);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_mday, &day);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_hour, &hour);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_min, &minute);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_sec * USEC_PER_SEC, &us);
+ if (r < 0)
+ return r;
+
+ c->utc = true;
+ c->year = year;
+ c->month = month;
+ c->day = day;
+ c->hour = hour;
+ c->minute = minute;
+ c->microsecond = us;
+ return 0;
+}
+
+static int prepend_component(const char **p, bool usec, unsigned nesting, CalendarComponent **c) {
+ int r, start, stop = -1, repeat = 0;
+ CalendarComponent *cc;
+ const char *e = *p;
+
+ assert(p);
+ assert(c);
+
+ if (nesting > CALENDARSPEC_COMPONENTS_MAX)
+ return -ENOBUFS;
+
+ r = parse_component_decimal(&e, usec, &start);
+ if (r < 0)
+ return r;
+
+ if (e[0] == '.' && e[1] == '.') {
+ e += 2;
+ r = parse_component_decimal(&e, usec, &stop);
+ if (r < 0)
+ return r;
+
+ repeat = usec ? USEC_PER_SEC : 1;
+ }
+
+ if (*e == '/') {
+ e++;
+ r = parse_component_decimal(&e, usec, &repeat);
+ if (r < 0)
+ return r;
+
+ if (repeat == 0)
+ return -ERANGE;
+ }
+
+ if (!IN_SET(*e, 0, ' ', ',', '-', '~', ':'))
+ return -EINVAL;
+
+ cc = new0(CalendarComponent, 1);
+ if (!cc)
+ return -ENOMEM;
+
+ cc->start = start;
+ cc->stop = stop;
+ cc->repeat = repeat;
+ cc->next = *c;
+
+ *p = e;
+ *c = cc;
+
+ if (*e ==',') {
+ *p += 1;
+ return prepend_component(p, usec, nesting + 1, c);
+ }
+
+ return 0;
+}
+
+static int parse_chain(const char **p, bool usec, CalendarComponent **c) {
+ const char *t;
+ CalendarComponent *cc = NULL;
+ int r;
+
+ assert(p);
+ assert(c);
+
+ t = *p;
+
+ if (t[0] == '*') {
+ if (usec) {
+ r = const_chain(0, c);
+ if (r < 0)
+ return r;
+ (*c)->repeat = USEC_PER_SEC;
+ } else
+ *c = NULL;
+
+ *p = t + 1;
+ return 0;
+ }
+
+ r = prepend_component(&t, usec, 0, &cc);
+ if (r < 0) {
+ free_chain(cc);
+ return r;
+ }
+
+ *p = t;
+ *c = cc;
+ return 0;
+}
+
+static int parse_date(const char **p, CalendarSpec *c) {
+ const char *t;
+ int r;
+ CalendarComponent *first, *second, *third;
+
+ assert(p);
+ assert(*p);
+ assert(c);
+
+ t = *p;
+
+ if (*t == 0)
+ return 0;
+
+ /* @TIMESTAMP — UNIX time in seconds since the epoch */
+ if (*t == '@') {
+ unsigned long value;
+ time_t time;
+
+ r = parse_one_number(t + 1, &t, &value);
+ if (r < 0)
+ return r;
+
+ time = value;
+ if ((unsigned long) time != value)
+ return -ERANGE;
+
+ r = calendarspec_from_time_t(c, time);
+ if (r < 0)
+ return r;
+
+ *p = t;
+ return 1; /* finito, don't parse H:M:S after that */
+ }
+
+ r = parse_chain(&t, false, &first);
+ if (r < 0)
+ return r;
+
+ /* Already the end? A ':' as separator? In that case this was a time, not a date */
+ if (IN_SET(*t, 0, ':')) {
+ free_chain(first);
+ return 0;
+ }
+
+ if (*t == '~')
+ c->end_of_month = true;
+ else if (*t != '-') {
+ free_chain(first);
+ return -EINVAL;
+ }
+
+ t++;
+ r = parse_chain(&t, false, &second);
+ if (r < 0) {
+ free_chain(first);
+ return r;
+ }
+
+ /* Got two parts, hence it's month and day */
+ if (IN_SET(*t, 0, ' ')) {
+ *p = t + strspn(t, " ");
+ c->month = first;
+ c->day = second;
+ return 0;
+ } else if (c->end_of_month) {
+ free_chain(first);
+ free_chain(second);
+ return -EINVAL;
+ }
+
+ if (*t == '~')
+ c->end_of_month = true;
+ else if (*t != '-') {
+ free_chain(first);
+ free_chain(second);
+ return -EINVAL;
+ }
+
+ t++;
+ r = parse_chain(&t, false, &third);
+ if (r < 0) {
+ free_chain(first);
+ free_chain(second);
+ return r;
+ }
+
+ /* Got three parts, hence it is year, month and day */
+ if (IN_SET(*t, 0, ' ')) {
+ *p = t + strspn(t, " ");
+ c->year = first;
+ c->month = second;
+ c->day = third;
+ return 0;
+ }
+
+ free_chain(first);
+ free_chain(second);
+ free_chain(third);
+ return -EINVAL;
+}
+
+static int parse_calendar_time(const char **p, CalendarSpec *c) {
+ CalendarComponent *h = NULL, *m = NULL, *s = NULL;
+ const char *t;
+ int r;
+
+ assert(p);
+ assert(*p);
+ assert(c);
+
+ t = *p;
+
+ /* If no time is specified at all, then this means 00:00:00 */
+ if (*t == 0)
+ goto null_hour;
+
+ r = parse_chain(&t, false, &h);
+ if (r < 0)
+ goto fail;
+
+ if (*t != ':') {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ t++;
+ r = parse_chain(&t, false, &m);
+ if (r < 0)
+ goto fail;
+
+ /* Already at the end? Then it's hours and minutes, and seconds are 0 */
+ if (*t == 0)
+ goto null_second;
+
+ if (*t != ':') {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ t++;
+ r = parse_chain(&t, true, &s);
+ if (r < 0)
+ goto fail;
+
+ /* At the end? Then it's hours, minutes and seconds */
+ if (*t == 0)
+ goto finish;
+
+ r = -EINVAL;
+ goto fail;
+
+null_hour:
+ r = const_chain(0, &h);
+ if (r < 0)
+ goto fail;
+
+ r = const_chain(0, &m);
+ if (r < 0)
+ goto fail;
+
+null_second:
+ r = const_chain(0, &s);
+ if (r < 0)
+ goto fail;
+
+finish:
+ *p = t;
+ c->hour = h;
+ c->minute = m;
+ c->microsecond = s;
+
+ return 0;
+
+fail:
+ free_chain(h);
+ free_chain(m);
+ free_chain(s);
+ return r;
+}
+
+int calendar_spec_from_string(const char *p, CalendarSpec **spec) {
+ const char *utc;
+ _cleanup_(calendar_spec_freep) CalendarSpec *c = NULL;
+ int r;
+
+ assert(p);
+ assert(spec);
+
+ c = new0(CalendarSpec, 1);
+ if (!c)
+ return -ENOMEM;
+ c->dst = -1;
+ c->timezone = NULL;
+
+ utc = endswith_no_case(p, " UTC");
+ if (utc) {
+ c->utc = true;
+ p = strndupa(p, utc - p);
+ } else {
+ const char *e = NULL;
+ int j;
+
+ tzset();
+
+ /* Check if the local timezone was specified? */
+ for (j = 0; j <= 1; j++) {
+ if (isempty(tzname[j]))
+ continue;
+
+ e = endswith_no_case(p, tzname[j]);
+ if (!e)
+ continue;
+ if (e == p)
+ continue;
+ if (e[-1] != ' ')
+ continue;
+
+ break;
+ }
+
+ /* Found one of the two timezones specified? */
+ if (IN_SET(j, 0, 1)) {
+ p = strndupa(p, e - p - 1);
+ c->dst = j;
+ } else {
+ const char *last_space;
+
+ last_space = strrchr(p, ' ');
+ if (last_space != NULL && timezone_is_valid(last_space + 1, LOG_DEBUG)) {
+ c->timezone = strdup(last_space + 1);
+ if (!c->timezone)
+ return -ENOMEM;
+
+ p = strndupa(p, last_space - p);
+ }
+ }
+ }
+
+ if (isempty(p))
+ return -EINVAL;
+
+ if (strcaseeq(p, "minutely")) {
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "hourly")) {
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "daily")) {
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "monthly")) {
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "annually") ||
+ strcaseeq(p, "yearly") ||
+ strcaseeq(p, "anually") /* backwards compatibility */ ) {
+
+ r = const_chain(1, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "weekly")) {
+
+ c->weekdays_bits = 1;
+
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "quarterly")) {
+
+ r = const_chain(1, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(4, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(7, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(10, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "biannually") ||
+ strcaseeq(p, "bi-annually") ||
+ strcaseeq(p, "semiannually") ||
+ strcaseeq(p, "semi-annually")) {
+
+ r = const_chain(1, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(7, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else {
+ r = parse_weekdays(&p, c);
+ if (r < 0)
+ return r;
+
+ r = parse_date(&p, c);
+ if (r < 0)
+ return r;
+
+ if (r == 0) {
+ r = parse_calendar_time(&p, c);
+ if (r < 0)
+ return r;
+ }
+
+ if (*p != 0)
+ return -EINVAL;
+ }
+
+ r = calendar_spec_normalize(c);
+ if (r < 0)
+ return r;
+
+ if (!calendar_spec_valid(c))
+ return -EINVAL;
+
+ *spec = TAKE_PTR(c);
+ return 0;
+}
+
+static int find_end_of_month(struct tm *tm, bool utc, int day) {
+ struct tm t = *tm;
+
+ t.tm_mon++;
+ t.tm_mday = 1 - day;
+
+ if (mktime_or_timegm(&t, utc) < 0 ||
+ t.tm_mon != tm->tm_mon)
+ return -1;
+
+ return t.tm_mday;
+}
+
+static int find_matching_component(const CalendarSpec *spec, const CalendarComponent *c,
+ struct tm *tm, int *val) {
+ const CalendarComponent *p = c;
+ int start, stop, d = -1;
+ bool d_set = false;
+ int r;
+
+ assert(val);
+
+ if (!c)
+ return 0;
+
+ while (c) {
+ start = c->start;
+ stop = c->stop;
+
+ if (spec->end_of_month && p == spec->day) {
+ start = find_end_of_month(tm, spec->utc, start);
+ stop = find_end_of_month(tm, spec->utc, stop);
+
+ if (stop > 0)
+ SWAP_TWO(start, stop);
+ }
+
+ if (start >= *val) {
+
+ if (!d_set || start < d) {
+ d = start;
+ d_set = true;
+ }
+
+ } else if (c->repeat > 0) {
+ int k;
+
+ k = start + c->repeat * DIV_ROUND_UP(*val - start, c->repeat);
+
+ if ((!d_set || k < d) && (stop < 0 || k <= stop)) {
+ d = k;
+ d_set = true;
+ }
+ }
+
+ c = c->next;
+ }
+
+ if (!d_set)
+ return -ENOENT;
+
+ r = *val != d;
+ *val = d;
+ return r;
+}
+
+static bool tm_out_of_bounds(const struct tm *tm, bool utc) {
+ struct tm t;
+ assert(tm);
+
+ t = *tm;
+
+ if (mktime_or_timegm(&t, utc) < 0)
+ return true;
+
+ /*
+ * Set an upper bound on the year so impossible dates like "*-02-31"
+ * don't cause find_next() to loop forever. tm_year contains years
+ * since 1900, so adjust it accordingly.
+ */
+ if (tm->tm_year + 1900 > MAX_YEAR)
+ return true;
+
+ /* Did any normalization take place? If so, it was out of bounds before */
+ return
+ t.tm_year != tm->tm_year ||
+ t.tm_mon != tm->tm_mon ||
+ t.tm_mday != tm->tm_mday ||
+ t.tm_hour != tm->tm_hour ||
+ t.tm_min != tm->tm_min ||
+ t.tm_sec != tm->tm_sec;
+}
+
+static bool matches_weekday(int weekdays_bits, const struct tm *tm, bool utc) {
+ struct tm t;
+ int k;
+
+ if (weekdays_bits < 0 || weekdays_bits >= BITS_WEEKDAYS)
+ return true;
+
+ t = *tm;
+ if (mktime_or_timegm(&t, utc) < 0)
+ return false;
+
+ k = t.tm_wday == 0 ? 6 : t.tm_wday - 1;
+ return (weekdays_bits & (1 << k));
+}
+
+static int find_next(const CalendarSpec *spec, struct tm *tm, usec_t *usec) {
+ struct tm c;
+ int tm_usec;
+ int r;
+
+ assert(spec);
+ assert(tm);
+
+ c = *tm;
+ tm_usec = *usec;
+
+ for (;;) {
+ /* Normalize the current date */
+ (void) mktime_or_timegm(&c, spec->utc);
+ c.tm_isdst = spec->dst;
+
+ c.tm_year += 1900;
+ r = find_matching_component(spec, spec->year, &c, &c.tm_year);
+ c.tm_year -= 1900;
+
+ if (r > 0) {
+ c.tm_mon = 0;
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ }
+ if (r < 0)
+ return r;
+ if (tm_out_of_bounds(&c, spec->utc))
+ return -ENOENT;
+
+ c.tm_mon += 1;
+ r = find_matching_component(spec, spec->month, &c, &c.tm_mon);
+ c.tm_mon -= 1;
+
+ if (r > 0) {
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ }
+ if (r < 0 || tm_out_of_bounds(&c, spec->utc)) {
+ c.tm_year++;
+ c.tm_mon = 0;
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ r = find_matching_component(spec, spec->day, &c, &c.tm_mday);
+ if (r > 0)
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ if (r < 0 || tm_out_of_bounds(&c, spec->utc)) {
+ c.tm_mon++;
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ if (!matches_weekday(spec->weekdays_bits, &c, spec->utc)) {
+ c.tm_mday++;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ r = find_matching_component(spec, spec->hour, &c, &c.tm_hour);
+ if (r > 0)
+ c.tm_min = c.tm_sec = tm_usec = 0;
+ if (r < 0 || tm_out_of_bounds(&c, spec->utc)) {
+ c.tm_mday++;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ r = find_matching_component(spec, spec->minute, &c, &c.tm_min);
+ if (r > 0)
+ c.tm_sec = tm_usec = 0;
+ if (r < 0 || tm_out_of_bounds(&c, spec->utc)) {
+ c.tm_hour++;
+ c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ c.tm_sec = c.tm_sec * USEC_PER_SEC + tm_usec;
+ r = find_matching_component(spec, spec->microsecond, &c, &c.tm_sec);
+ tm_usec = c.tm_sec % USEC_PER_SEC;
+ c.tm_sec /= USEC_PER_SEC;
+
+ if (r < 0 || tm_out_of_bounds(&c, spec->utc)) {
+ c.tm_min++;
+ c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ *tm = c;
+ *usec = tm_usec;
+ return 0;
+ }
+}
+
+static int calendar_spec_next_usec_impl(const CalendarSpec *spec, usec_t usec, usec_t *next) {
+ struct tm tm;
+ time_t t;
+ int r;
+ usec_t tm_usec;
+
+ assert(spec);
+ assert(next);
+
+ if (usec > USEC_TIMESTAMP_FORMATTABLE_MAX)
+ return -EINVAL;
+
+ usec++;
+ t = (time_t) (usec / USEC_PER_SEC);
+ assert_se(localtime_or_gmtime_r(&t, &tm, spec->utc));
+ tm_usec = usec % USEC_PER_SEC;
+
+ r = find_next(spec, &tm, &tm_usec);
+ if (r < 0)
+ return r;
+
+ t = mktime_or_timegm(&tm, spec->utc);
+ if (t < 0)
+ return -EINVAL;
+
+ *next = (usec_t) t * USEC_PER_SEC + tm_usec;
+ return 0;
+}
+
+typedef struct SpecNextResult {
+ usec_t next;
+ int return_value;
+} SpecNextResult;
+
+int calendar_spec_next_usec(const CalendarSpec *spec, usec_t usec, usec_t *next) {
+ SpecNextResult *shared, tmp;
+ int r;
+
+ if (isempty(spec->timezone))
+ return calendar_spec_next_usec_impl(spec, usec, next);
+
+ shared = mmap(NULL, sizeof *shared, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+ if (shared == MAP_FAILED)
+ return negative_errno();
+
+ r = safe_fork("(sd-calendar)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT, NULL);
+ if (r < 0) {
+ (void) munmap(shared, sizeof *shared);
+ return r;
+ }
+ if (r == 0) {
+ if (setenv("TZ", spec->timezone, 1) != 0) {
+ shared->return_value = negative_errno();
+ _exit(EXIT_FAILURE);
+ }
+
+ tzset();
+
+ shared->return_value = calendar_spec_next_usec_impl(spec, usec, &shared->next);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ tmp = *shared;
+ if (munmap(shared, sizeof *shared) < 0)
+ return negative_errno();
+
+ if (tmp.return_value == 0)
+ *next = tmp.next;
+
+ return tmp.return_value;
+}
diff --git a/src/shared/calendarspec.h b/src/shared/calendarspec.h
new file mode 100644
index 0000000..3bf8a39
--- /dev/null
+++ b/src/shared/calendarspec.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* A structure for specifying (possibly repetitive) points in calendar
+ * time, a la cron */
+
+#include <stdbool.h>
+
+#include "time-util.h"
+#include "util.h"
+
+typedef struct CalendarComponent {
+ int start;
+ int stop;
+ int repeat;
+
+ struct CalendarComponent *next;
+} CalendarComponent;
+
+typedef struct CalendarSpec {
+ int weekdays_bits;
+ bool end_of_month;
+ bool utc;
+ int dst;
+ char *timezone;
+
+ CalendarComponent *year;
+ CalendarComponent *month;
+ CalendarComponent *day;
+
+ CalendarComponent *hour;
+ CalendarComponent *minute;
+ CalendarComponent *microsecond;
+} CalendarSpec;
+
+CalendarSpec* calendar_spec_free(CalendarSpec *c);
+
+int calendar_spec_normalize(CalendarSpec *spec);
+bool calendar_spec_valid(CalendarSpec *spec);
+
+int calendar_spec_to_string(const CalendarSpec *spec, char **p);
+int calendar_spec_from_string(const char *p, CalendarSpec **spec);
+
+int calendar_spec_next_usec(const CalendarSpec *spec, usec_t usec, usec_t *next);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(CalendarSpec*, calendar_spec_free);
diff --git a/src/shared/cgroup-show.c b/src/shared/cgroup-show.c
new file mode 100644
index 0000000..61df751
--- /dev/null
+++ b/src/shared/cgroup-show.c
@@ -0,0 +1,354 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "output-mode.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+
+static void show_pid_array(
+ pid_t pids[],
+ unsigned n_pids,
+ const char *prefix,
+ unsigned n_columns,
+ bool extra,
+ bool more,
+ OutputFlags flags) {
+
+ unsigned i, j, pid_width;
+
+ if (n_pids == 0)
+ return;
+
+ typesafe_qsort(pids, n_pids, pid_compare_func);
+
+ /* Filter duplicates */
+ for (j = 0, i = 1; i < n_pids; i++) {
+ if (pids[i] == pids[j])
+ continue;
+ pids[++j] = pids[i];
+ }
+ n_pids = j + 1;
+ pid_width = DECIMAL_STR_WIDTH(pids[j]);
+
+ if (flags & OUTPUT_FULL_WIDTH)
+ n_columns = 0;
+ else {
+ if (n_columns > pid_width+2)
+ n_columns -= pid_width+2;
+ else
+ n_columns = 20;
+ }
+ for (i = 0; i < n_pids; i++) {
+ _cleanup_free_ char *t = NULL;
+
+ (void) get_process_cmdline(pids[i], n_columns, true, &t);
+
+ if (extra)
+ printf("%s%s ", prefix, special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET));
+ else
+ printf("%s%s", prefix, special_glyph(((more || i < n_pids-1) ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT)));
+
+ printf("%*"PID_PRI" %s\n", pid_width, pids[i], strna(t));
+ }
+}
+
+static int show_cgroup_one_by_path(
+ const char *path,
+ const char *prefix,
+ unsigned n_columns,
+ bool more,
+ OutputFlags flags) {
+
+ char *fn;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t n = 0, n_allocated = 0;
+ _cleanup_free_ pid_t *pids = NULL;
+ _cleanup_free_ char *p = NULL;
+ pid_t pid;
+ int r;
+
+ r = cg_mangle_path(path, &p);
+ if (r < 0)
+ return r;
+
+ fn = strjoina(p, "/cgroup.procs");
+ f = fopen(fn, "re");
+ if (!f)
+ return -errno;
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+
+ if (!(flags & OUTPUT_KERNEL_THREADS) && is_kernel_thread(pid) > 0)
+ continue;
+
+ if (!GREEDY_REALLOC(pids, n_allocated, n + 1))
+ return -ENOMEM;
+
+ assert(n < n_allocated);
+ pids[n++] = pid;
+ }
+
+ if (r < 0)
+ return r;
+
+ show_pid_array(pids, n, prefix, n_columns, false, more, flags);
+
+ return 0;
+}
+
+int show_cgroup_by_path(
+ const char *path,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+
+ _cleanup_free_ char *fn = NULL, *p1 = NULL, *last = NULL, *p2 = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ char *gn = NULL;
+ bool shown_pids = false;
+ int r;
+
+ assert(path);
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ r = cg_mangle_path(path, &fn);
+ if (r < 0)
+ return r;
+
+ d = opendir(fn);
+ if (!d)
+ return -errno;
+
+ while ((r = cg_read_subgroup(d, &gn)) > 0) {
+ _cleanup_free_ char *k = NULL;
+
+ k = strjoin(fn, "/", gn);
+ free(gn);
+ if (!k)
+ return -ENOMEM;
+
+ if (!(flags & OUTPUT_SHOW_ALL) && cg_is_empty_recursive(NULL, k) > 0)
+ continue;
+
+ if (!shown_pids) {
+ show_cgroup_one_by_path(path, prefix, n_columns, true, flags);
+ shown_pids = true;
+ }
+
+ if (last) {
+ printf("%s%s%s\n", prefix, special_glyph(SPECIAL_GLYPH_TREE_BRANCH), cg_unescape(basename(last)));
+
+ if (!p1) {
+ p1 = strappend(prefix, special_glyph(SPECIAL_GLYPH_TREE_VERTICAL));
+ if (!p1)
+ return -ENOMEM;
+ }
+
+ show_cgroup_by_path(last, p1, n_columns-2, flags);
+ free(last);
+ }
+
+ last = TAKE_PTR(k);
+ }
+
+ if (r < 0)
+ return r;
+
+ if (!shown_pids)
+ show_cgroup_one_by_path(path, prefix, n_columns, !!last, flags);
+
+ if (last) {
+ printf("%s%s%s\n", prefix, special_glyph(SPECIAL_GLYPH_TREE_RIGHT), cg_unescape(basename(last)));
+
+ if (!p2) {
+ p2 = strappend(prefix, " ");
+ if (!p2)
+ return -ENOMEM;
+ }
+
+ show_cgroup_by_path(last, p2, n_columns-2, flags);
+ }
+
+ return 0;
+}
+
+int show_cgroup(const char *controller,
+ const char *path,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(path);
+
+ r = cg_get_path(controller, path, NULL, &p);
+ if (r < 0)
+ return r;
+
+ return show_cgroup_by_path(p, prefix, n_columns, flags);
+}
+
+static int show_extra_pids(
+ const char *controller,
+ const char *path,
+ const char *prefix,
+ unsigned n_columns,
+ const pid_t pids[],
+ unsigned n_pids,
+ OutputFlags flags) {
+
+ _cleanup_free_ pid_t *copy = NULL;
+ unsigned i, j;
+ int r;
+
+ assert(path);
+
+ if (n_pids <= 0)
+ return 0;
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ copy = new(pid_t, n_pids);
+ if (!copy)
+ return -ENOMEM;
+
+ for (i = 0, j = 0; i < n_pids; i++) {
+ _cleanup_free_ char *k = NULL;
+
+ r = cg_pid_get_path(controller, pids[i], &k);
+ if (r < 0)
+ return r;
+
+ if (path_startswith(k, path))
+ continue;
+
+ copy[j++] = pids[i];
+ }
+
+ show_pid_array(copy, j, prefix, n_columns, true, false, flags);
+
+ return 0;
+}
+
+int show_cgroup_and_extra(
+ const char *controller,
+ const char *path,
+ const char *prefix,
+ unsigned n_columns,
+ const pid_t extra_pids[],
+ unsigned n_extra_pids,
+ OutputFlags flags) {
+
+ int r;
+
+ assert(path);
+
+ r = show_cgroup(controller, path, prefix, n_columns, flags);
+ if (r < 0)
+ return r;
+
+ return show_extra_pids(controller, path, prefix, n_columns, extra_pids, n_extra_pids, flags);
+}
+
+int show_cgroup_get_unit_path_and_warn(
+ sd_bus *bus,
+ const char *unit,
+ char **ret) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ path = unit_dbus_path_from_name(unit);
+ if (!path)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ unit_dbus_interface_from_name(unit),
+ "ControlGroup",
+ &error,
+ ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query unit control group path: %s",
+ bus_error_message(&error, r));
+
+ return 0;
+}
+
+int show_cgroup_get_path_and_warn(
+ const char *machine,
+ const char *prefix,
+ char **ret) {
+
+ int r;
+ _cleanup_free_ char *root = NULL;
+
+ if (machine) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *unit = NULL;
+ const char *m;
+
+ m = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, m, "SCOPE", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load machine data: %m");
+
+ r = bus_connect_transport_systemd(BUS_TRANSPORT_LOCAL, NULL, false, &bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ r = show_cgroup_get_unit_path_and_warn(bus, unit, &root);
+ if (r < 0)
+ return r;
+ } else {
+ r = cg_get_root_path(&root);
+ if (r == -ENOMEDIUM)
+ return log_error_errno(r, "Failed to get root control group path.\n"
+ "No cgroup filesystem mounted on /sys/fs/cgroup");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to get root control group path: %m");
+ }
+
+ if (prefix) {
+ char *t;
+
+ t = strjoin(root, prefix);
+ if (!t)
+ return log_oom();
+
+ *ret = t;
+ } else
+ *ret = TAKE_PTR(root);
+
+ return 0;
+}
diff --git a/src/shared/cgroup-show.h b/src/shared/cgroup-show.h
new file mode 100644
index 0000000..3593e9d
--- /dev/null
+++ b/src/shared/cgroup-show.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-bus.h"
+
+#include "logs-show.h"
+#include "output-mode.h"
+
+int show_cgroup_by_path(const char *path, const char *prefix, unsigned columns, OutputFlags flags);
+int show_cgroup(const char *controller, const char *path, const char *prefix, unsigned columns, OutputFlags flags);
+
+int show_cgroup_and_extra(const char *controller, const char *path, const char *prefix, unsigned n_columns, const pid_t extra_pids[], unsigned n_extra_pids, OutputFlags flags);
+
+int show_cgroup_get_unit_path_and_warn(
+ sd_bus *bus,
+ const char *unit,
+ char **ret);
+int show_cgroup_get_path_and_warn(
+ const char *machine,
+ const char *prefix,
+ char **ret);
diff --git a/src/shared/clean-ipc.c b/src/shared/clean-ipc.c
new file mode 100644
index 0000000..46fa680
--- /dev/null
+++ b/src/shared/clean-ipc.c
@@ -0,0 +1,453 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <mqueue.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ipc.h>
+#include <sys/msg.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "clean-ipc.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static bool match_uid_gid(uid_t subject_uid, gid_t subject_gid, uid_t delete_uid, gid_t delete_gid) {
+
+ if (uid_is_valid(delete_uid) && subject_uid == delete_uid)
+ return true;
+
+ if (gid_is_valid(delete_gid) && subject_gid == delete_gid)
+ return true;
+
+ return false;
+}
+
+static int clean_sysvipc_shm(uid_t delete_uid, gid_t delete_gid, bool rm) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool first = true;
+ int ret = 0, r;
+
+ f = fopen("/proc/sysvipc/shm", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /proc/sysvipc/shm: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ unsigned n_attached;
+ pid_t cpid, lpid;
+ uid_t uid, cuid;
+ gid_t gid, cgid;
+ int shmid;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to read /proc/sysvipc/shm: %m");
+ if (r == 0)
+ break;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (sscanf(line, "%*i %i %*o %*u " PID_FMT " " PID_FMT " %u " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT,
+ &shmid, &cpid, &lpid, &n_attached, &uid, &gid, &cuid, &cgid) != 8)
+ continue;
+
+ if (n_attached > 0)
+ continue;
+
+ if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (shmctl(shmid, IPC_RMID, NULL) < 0) {
+
+ /* Ignore entries that are already deleted */
+ if (IN_SET(errno, EIDRM, EINVAL))
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to remove SysV shared memory segment %i: %m",
+ shmid);
+ } else {
+ log_debug("Removed SysV shared memory segment %i.", shmid);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int clean_sysvipc_sem(uid_t delete_uid, gid_t delete_gid, bool rm) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool first = true;
+ int ret = 0, r;
+
+ f = fopen("/proc/sysvipc/sem", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /proc/sysvipc/sem: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uid_t uid, cuid;
+ gid_t gid, cgid;
+ int semid;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/sysvipc/sem: %m");
+ if (r == 0)
+ break;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (sscanf(line, "%*i %i %*o %*u " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT,
+ &semid, &uid, &gid, &cuid, &cgid) != 5)
+ continue;
+
+ if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (semctl(semid, 0, IPC_RMID) < 0) {
+
+ /* Ignore entries that are already deleted */
+ if (IN_SET(errno, EIDRM, EINVAL))
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to remove SysV semaphores object %i: %m",
+ semid);
+ } else {
+ log_debug("Removed SysV semaphore %i.", semid);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int clean_sysvipc_msg(uid_t delete_uid, gid_t delete_gid, bool rm) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool first = true;
+ int ret = 0, r;
+
+ f = fopen("/proc/sysvipc/msg", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /proc/sysvipc/msg: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uid_t uid, cuid;
+ gid_t gid, cgid;
+ pid_t cpid, lpid;
+ int msgid;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/sysvipc/msg: %m");
+ if (r == 0)
+ break;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (sscanf(line, "%*i %i %*o %*u %*u " PID_FMT " " PID_FMT " " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT,
+ &msgid, &cpid, &lpid, &uid, &gid, &cuid, &cgid) != 7)
+ continue;
+
+ if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (msgctl(msgid, IPC_RMID, NULL) < 0) {
+
+ /* Ignore entries that are already deleted */
+ if (IN_SET(errno, EIDRM, EINVAL))
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to remove SysV message queue %i: %m",
+ msgid);
+ } else {
+ log_debug("Removed SysV message queue %i.", msgid);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int clean_posix_shm_internal(DIR *dir, uid_t uid, gid_t gid, bool rm) {
+ struct dirent *de;
+ int ret = 0, r;
+
+ assert(dir);
+
+ FOREACH_DIRENT_ALL(de, dir, goto fail) {
+ struct stat st;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (fstatat(dirfd(dir), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno, "Failed to stat() POSIX shared memory segment %s: %m", de->d_name);
+ continue;
+ }
+
+ if (S_ISDIR(st.st_mode)) {
+ _cleanup_closedir_ DIR *kid;
+
+ kid = xopendirat(dirfd(dir), de->d_name, O_NOFOLLOW|O_NOATIME);
+ if (!kid) {
+ if (errno != ENOENT)
+ ret = log_warning_errno(errno, "Failed to enter shared memory directory %s: %m", de->d_name);
+ } else {
+ r = clean_posix_shm_internal(kid, uid, gid, rm);
+ if (r < 0)
+ ret = r;
+ }
+
+ if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (unlinkat(dirfd(dir), de->d_name, AT_REMOVEDIR) < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno, "Failed to remove POSIX shared memory directory %s: %m", de->d_name);
+ } else {
+ log_debug("Removed POSIX shared memory directory %s", de->d_name);
+ if (ret == 0)
+ ret = 1;
+ }
+ } else {
+
+ if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (unlinkat(dirfd(dir), de->d_name, 0) < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno, "Failed to remove POSIX shared memory segment %s: %m", de->d_name);
+ } else {
+ log_debug("Removed POSIX shared memory segment %s", de->d_name);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+ }
+
+ return ret;
+
+fail:
+ return log_warning_errno(errno, "Failed to read /dev/shm: %m");
+}
+
+static int clean_posix_shm(uid_t uid, gid_t gid, bool rm) {
+ _cleanup_closedir_ DIR *dir = NULL;
+
+ dir = opendir("/dev/shm");
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /dev/shm: %m");
+ }
+
+ return clean_posix_shm_internal(dir, uid, gid, rm);
+}
+
+static int clean_posix_mq(uid_t uid, gid_t gid, bool rm) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *de;
+ int ret = 0;
+
+ dir = opendir("/dev/mqueue");
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /dev/mqueue: %m");
+ }
+
+ FOREACH_DIRENT_ALL(de, dir, goto fail) {
+ struct stat st;
+ char fn[1+strlen(de->d_name)+1];
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (fstatat(dirfd(dir), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to stat() MQ segment %s: %m",
+ de->d_name);
+ continue;
+ }
+
+ if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ fn[0] = '/';
+ strcpy(fn+1, de->d_name);
+
+ if (mq_unlink(fn) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to unlink POSIX message queue %s: %m",
+ fn);
+ } else {
+ log_debug("Removed POSIX message queue %s", fn);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+
+fail:
+ return log_warning_errno(errno, "Failed to read /dev/mqueue: %m");
+}
+
+int clean_ipc_internal(uid_t uid, gid_t gid, bool rm) {
+ int ret = 0, r;
+
+ /* If 'rm' is true, clean all IPC objects owned by either the specified UID or the specified GID. Return the
+ * last error encountered or == 0 if no matching IPC objects have been found or > 0 if matching IPC objects
+ * have been found and have been removed.
+ *
+ * If 'rm' is false, just search for IPC objects owned by either the specified UID or the specified GID. In
+ * this case we return < 0 on error, > 0 if we found a matching object, == 0 if we didn't.
+ *
+ * As special rule: if UID/GID is specified as root we'll silently not clean up things, and always claim that
+ * there are IPC objects for it. */
+
+ if (uid == 0) {
+ if (!rm)
+ return 1;
+
+ uid = UID_INVALID;
+ }
+ if (gid == 0) {
+ if (!rm)
+ return 1;
+
+ gid = GID_INVALID;
+ }
+
+ /* Anything to do? */
+ if (!uid_is_valid(uid) && !gid_is_valid(gid))
+ return 0;
+
+ r = clean_sysvipc_shm(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_sysvipc_sem(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_sysvipc_msg(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_posix_shm(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_posix_mq(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ return ret;
+}
+
+int clean_ipc_by_uid(uid_t uid) {
+ return clean_ipc_internal(uid, GID_INVALID, true);
+}
+
+int clean_ipc_by_gid(gid_t gid) {
+ return clean_ipc_internal(UID_INVALID, gid, true);
+}
diff --git a/src/shared/clean-ipc.h b/src/shared/clean-ipc.h
new file mode 100644
index 0000000..eaff47d
--- /dev/null
+++ b/src/shared/clean-ipc.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+#include "user-util.h"
+
+int clean_ipc_internal(uid_t uid, gid_t gid, bool rm);
+
+/* Remove all IPC objects owned by the specified UID or GID */
+int clean_ipc_by_uid(uid_t uid);
+int clean_ipc_by_gid(gid_t gid);
+
+/* Check if any IPC object owned by the specified UID or GID exists, returns > 0 if so, == 0 if not */
+static inline int search_ipc(uid_t uid, gid_t gid) {
+ return clean_ipc_internal(uid, gid, false);
+}
diff --git a/src/shared/clock-util.c b/src/shared/clock-util.c
new file mode 100644
index 0000000..1877a81
--- /dev/null
+++ b/src/shared/clock-util.c
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <time.h>
+#include <linux/rtc.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "alloc-util.h"
+#include "clock-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "string-util.h"
+#include "util.h"
+
+int clock_get_hwclock(struct tm *tm) {
+ _cleanup_close_ int fd = -1;
+
+ assert(tm);
+
+ fd = open("/dev/rtc", O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ /* This leaves the timezone fields of struct tm
+ * uninitialized! */
+ if (ioctl(fd, RTC_RD_TIME, tm) < 0)
+ return -errno;
+
+ /* We don't know daylight saving, so we reset this in order not
+ * to confuse mktime(). */
+ tm->tm_isdst = -1;
+
+ return 0;
+}
+
+int clock_set_hwclock(const struct tm *tm) {
+ _cleanup_close_ int fd = -1;
+
+ assert(tm);
+
+ fd = open("/dev/rtc", O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (ioctl(fd, RTC_SET_TIME, tm) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int clock_is_localtime(const char* adjtime_path) {
+ _cleanup_fclose_ FILE *f;
+ int r;
+
+ if (!adjtime_path)
+ adjtime_path = "/etc/adjtime";
+
+ /*
+ * The third line of adjtime is "UTC" or "LOCAL" or nothing.
+ * # /etc/adjtime
+ * 0.0 0 0
+ * 0
+ * UTC
+ */
+ f = fopen(adjtime_path, "re");
+ if (f) {
+ _cleanup_free_ char *line = NULL;
+ unsigned i;
+
+ for (i = 0; i < 2; i++) { /* skip the first two lines */
+ r = read_line(f, LONG_LINE_MAX, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false; /* less than three lines → default to UTC */
+ }
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false; /* less than three lines → default to UTC */
+
+ return streq(line, "LOCAL");
+
+ } else if (errno != ENOENT)
+ return -errno;
+
+ /* adjtime not present → default to UTC */
+ return false;
+}
+
+int clock_set_timezone(int *min) {
+ const struct timeval *tv_null = NULL;
+ struct timespec ts;
+ struct tm tm;
+ int minutesdelta;
+ struct timezone tz;
+
+ assert_se(clock_gettime(CLOCK_REALTIME, &ts) == 0);
+ assert_se(localtime_r(&ts.tv_sec, &tm));
+ minutesdelta = tm.tm_gmtoff / 60;
+
+ tz.tz_minuteswest = -minutesdelta;
+ tz.tz_dsttime = 0; /* DST_NONE */
+
+ /*
+ * If the RTC does not run in UTC but in local time, the very first
+ * call to settimeofday() will set the kernel's timezone and will warp the
+ * system clock, so that it runs in UTC instead of the local time we
+ * have read from the RTC.
+ */
+ if (settimeofday(tv_null, &tz) < 0)
+ return negative_errno();
+
+ if (min)
+ *min = minutesdelta;
+ return 0;
+}
+
+int clock_reset_timewarp(void) {
+ const struct timeval *tv_null = NULL;
+ struct timezone tz;
+
+ tz.tz_minuteswest = 0;
+ tz.tz_dsttime = 0; /* DST_NONE */
+
+ /*
+ * The very first call to settimeofday() does time warp magic. Do a
+ * dummy call here, so the time warping is sealed and all later calls
+ * behave as expected.
+ */
+ if (settimeofday(tv_null, &tz) < 0)
+ return -errno;
+
+ return 0;
+}
+
+#define TIME_EPOCH_USEC ((usec_t) TIME_EPOCH * USEC_PER_SEC)
+
+int clock_apply_epoch(void) {
+ struct timespec ts;
+
+ if (now(CLOCK_REALTIME) >= TIME_EPOCH_USEC)
+ return 0;
+
+ if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, TIME_EPOCH_USEC)) < 0)
+ return -errno;
+
+ return 1;
+}
diff --git a/src/shared/clock-util.h b/src/shared/clock-util.h
new file mode 100644
index 0000000..b9db54e
--- /dev/null
+++ b/src/shared/clock-util.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <time.h>
+
+int clock_is_localtime(const char* adjtime_path);
+int clock_set_timezone(int *min);
+int clock_reset_timewarp(void);
+int clock_get_hwclock(struct tm *tm);
+int clock_set_hwclock(const struct tm *tm);
+int clock_apply_epoch(void);
diff --git a/src/shared/condition.c b/src/shared/condition.c
new file mode 100644
index 0000000..fb77966
--- /dev/null
+++ b/src/shared/condition.c
@@ -0,0 +1,733 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "apparmor-util.h"
+#include "architecture.h"
+#include "audit-util.h"
+#include "cap-list.h"
+#include "cgroup-util.h"
+#include "condition.h"
+#include "efivars.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "glob-util.h"
+#include "hostname-util.h"
+#include "ima-util.h"
+#include "list.h"
+#include "macro.h"
+#include "mountpoint-util.h"
+#include "env-file.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "tomoyo-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+Condition* condition_new(ConditionType type, const char *parameter, bool trigger, bool negate) {
+ Condition *c;
+ int r;
+
+ assert(type >= 0);
+ assert(type < _CONDITION_TYPE_MAX);
+ assert((!parameter) == (type == CONDITION_NULL));
+
+ c = new0(Condition, 1);
+ if (!c)
+ return NULL;
+
+ c->type = type;
+ c->trigger = trigger;
+ c->negate = negate;
+
+ r = free_and_strdup(&c->parameter, parameter);
+ if (r < 0) {
+ return mfree(c);
+ }
+
+ return c;
+}
+
+void condition_free(Condition *c) {
+ assert(c);
+
+ free(c->parameter);
+ free(c);
+}
+
+Condition* condition_free_list(Condition *first) {
+ Condition *c, *n;
+
+ LIST_FOREACH_SAFE(conditions, c, n, first)
+ condition_free(c);
+
+ return NULL;
+}
+
+static int condition_test_kernel_command_line(Condition *c) {
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ bool equal;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_KERNEL_COMMAND_LINE);
+
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ equal = strchr(c->parameter, '=');
+
+ for (p = line;;) {
+ _cleanup_free_ char *word = NULL;
+ bool found;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES|EXTRACT_RELAX);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (equal)
+ found = streq(word, c->parameter);
+ else {
+ const char *f;
+
+ f = startswith(word, c->parameter);
+ found = f && IN_SET(*f, 0, '=');
+ }
+
+ if (found)
+ return true;
+ }
+
+ return false;
+}
+
+static int condition_test_kernel_version(Condition *c) {
+ enum {
+ /* Listed in order of checking. Note that some comparators are prefixes of others, hence the longest
+ * should be listed first. */
+ LOWER_OR_EQUAL,
+ GREATER_OR_EQUAL,
+ LOWER,
+ GREATER,
+ EQUAL,
+ _ORDER_MAX,
+ };
+
+ static const char *const prefix[_ORDER_MAX] = {
+ [LOWER_OR_EQUAL] = "<=",
+ [GREATER_OR_EQUAL] = ">=",
+ [LOWER] = "<",
+ [GREATER] = ">",
+ [EQUAL] = "=",
+ };
+ const char *p = NULL;
+ struct utsname u;
+ size_t i;
+ int k;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_KERNEL_VERSION);
+
+ assert_se(uname(&u) >= 0);
+
+ for (i = 0; i < _ORDER_MAX; i++) {
+ p = startswith(c->parameter, prefix[i]);
+ if (p)
+ break;
+ }
+
+ /* No prefix? Then treat as glob string */
+ if (!p)
+ return fnmatch(skip_leading_chars(c->parameter, NULL), u.release, 0) == 0;
+
+ k = str_verscmp(u.release, skip_leading_chars(p, NULL));
+
+ switch (i) {
+
+ case LOWER:
+ return k < 0;
+
+ case LOWER_OR_EQUAL:
+ return k <= 0;
+
+ case EQUAL:
+ return k == 0;
+
+ case GREATER_OR_EQUAL:
+ return k >= 0;
+
+ case GREATER:
+ return k > 0;
+
+ default:
+ assert_not_reached("Can't compare");
+ }
+}
+
+static int condition_test_user(Condition *c) {
+ uid_t id;
+ int r;
+ _cleanup_free_ char *username = NULL;
+ const char *u;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_USER);
+
+ r = parse_uid(c->parameter, &id);
+ if (r >= 0)
+ return id == getuid() || id == geteuid();
+
+ if (streq("@system", c->parameter))
+ return uid_is_system(getuid()) || uid_is_system(geteuid());
+
+ username = getusername_malloc();
+ if (!username)
+ return -ENOMEM;
+
+ if (streq(username, c->parameter))
+ return 1;
+
+ if (getpid_cached() == 1)
+ return streq(c->parameter, "root");
+
+ u = c->parameter;
+ r = get_user_creds(&u, &id, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING);
+ if (r < 0)
+ return 0;
+
+ return id == getuid() || id == geteuid();
+}
+
+static int condition_test_control_group_controller(Condition *c) {
+ int r;
+ CGroupMask system_mask, wanted_mask = 0;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_CONTROL_GROUP_CONTROLLER);
+
+ r = cg_mask_supported(&system_mask);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine supported controllers: %m");
+
+ r = cg_mask_from_string(c->parameter, &wanted_mask);
+ if (r < 0 || wanted_mask <= 0) {
+ /* This won't catch the case that we have an unknown controller
+ * mixed in with valid ones -- these are only assessed on the
+ * validity of the valid controllers found. */
+ log_debug("Failed to parse cgroup string: %s", c->parameter);
+ return 1;
+ }
+
+ return FLAGS_SET(system_mask, wanted_mask);
+}
+
+static int condition_test_group(Condition *c) {
+ gid_t id;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_GROUP);
+
+ r = parse_gid(c->parameter, &id);
+ if (r >= 0)
+ return in_gid(id);
+
+ /* Avoid any NSS lookups if we are PID1 */
+ if (getpid_cached() == 1)
+ return streq(c->parameter, "root");
+
+ return in_group(c->parameter) > 0;
+}
+
+static int condition_test_virtualization(Condition *c) {
+ int b, v;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_VIRTUALIZATION);
+
+ if (streq(c->parameter, "private-users"))
+ return running_in_userns();
+
+ v = detect_virtualization();
+ if (v < 0)
+ return v;
+
+ /* First, compare with yes/no */
+ b = parse_boolean(c->parameter);
+ if (b >= 0)
+ return b == !!v;
+
+ /* Then, compare categorization */
+ if (streq(c->parameter, "vm"))
+ return VIRTUALIZATION_IS_VM(v);
+
+ if (streq(c->parameter, "container"))
+ return VIRTUALIZATION_IS_CONTAINER(v);
+
+ /* Finally compare id */
+ return v != VIRTUALIZATION_NONE && streq(c->parameter, virtualization_to_string(v));
+}
+
+static int condition_test_architecture(Condition *c) {
+ int a, b;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_ARCHITECTURE);
+
+ a = uname_architecture();
+ if (a < 0)
+ return a;
+
+ if (streq(c->parameter, "native"))
+ b = native_architecture();
+ else {
+ b = architecture_from_string(c->parameter);
+ if (b < 0) /* unknown architecture? Then it's definitely not ours */
+ return false;
+ }
+
+ return a == b;
+}
+
+static int condition_test_host(Condition *c) {
+ _cleanup_free_ char *h = NULL;
+ sd_id128_t x, y;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_HOST);
+
+ if (sd_id128_from_string(c->parameter, &x) >= 0) {
+
+ r = sd_id128_get_machine(&y);
+ if (r < 0)
+ return r;
+
+ return sd_id128_equal(x, y);
+ }
+
+ h = gethostname_malloc();
+ if (!h)
+ return -ENOMEM;
+
+ return fnmatch(c->parameter, h, FNM_CASEFOLD) == 0;
+}
+
+static int condition_test_ac_power(Condition *c) {
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_AC_POWER);
+
+ r = parse_boolean(c->parameter);
+ if (r < 0)
+ return r;
+
+ return (on_ac_power() != 0) == !!r;
+}
+
+static int condition_test_security(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_SECURITY);
+
+ if (streq(c->parameter, "selinux"))
+ return mac_selinux_use();
+ if (streq(c->parameter, "smack"))
+ return mac_smack_use();
+ if (streq(c->parameter, "apparmor"))
+ return mac_apparmor_use();
+ if (streq(c->parameter, "audit"))
+ return use_audit();
+ if (streq(c->parameter, "ima"))
+ return use_ima();
+ if (streq(c->parameter, "tomoyo"))
+ return mac_tomoyo_use();
+ if (streq(c->parameter, "uefi-secureboot"))
+ return is_efi_secure_boot();
+
+ return false;
+}
+
+static int condition_test_capability(Condition *c) {
+ unsigned long long capabilities = (unsigned long long) -1;
+ _cleanup_fclose_ FILE *f = NULL;
+ int value, r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_CAPABILITY);
+
+ /* If it's an invalid capability, we don't have it */
+ value = capability_from_name(c->parameter);
+ if (value < 0)
+ return -EINVAL;
+
+ /* If it's a valid capability we default to assume
+ * that we have it */
+
+ f = fopen("/proc/self/status", "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ p = startswith(line, "CapBnd:");
+ if (p) {
+ if (sscanf(line+7, "%llx", &capabilities) != 1)
+ return -EIO;
+
+ break;
+ }
+ }
+
+ return !!(capabilities & (1ULL << value));
+}
+
+static int condition_test_needs_update(Condition *c) {
+ const char *p;
+ struct stat usr, other;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_NEEDS_UPDATE);
+
+ /* If the file system is read-only we shouldn't suggest an update */
+ if (path_is_read_only_fs(c->parameter) > 0)
+ return false;
+
+ /* Any other failure means we should allow the condition to be true,
+ * so that we rather invoke too many update tools than too
+ * few. */
+
+ if (!path_is_absolute(c->parameter))
+ return true;
+
+ p = strjoina(c->parameter, "/.updated");
+ if (lstat(p, &other) < 0)
+ return true;
+
+ if (lstat("/usr/", &usr) < 0)
+ return true;
+
+ /*
+ * First, compare seconds as they are always accurate...
+ */
+ if (usr.st_mtim.tv_sec != other.st_mtim.tv_sec)
+ return usr.st_mtim.tv_sec > other.st_mtim.tv_sec;
+
+ /*
+ * ...then compare nanoseconds.
+ *
+ * A false positive is only possible when /usr's nanoseconds > 0
+ * (otherwise /usr cannot be strictly newer than the target file)
+ * AND the target file's nanoseconds == 0
+ * (otherwise the filesystem supports nsec timestamps, see stat(2)).
+ */
+ if (usr.st_mtim.tv_nsec > 0 && other.st_mtim.tv_nsec == 0) {
+ _cleanup_free_ char *timestamp_str = NULL;
+ uint64_t timestamp;
+ int r;
+
+ r = parse_env_file(NULL, p, "TIMESTAMP_NSEC", &timestamp_str);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse timestamp file '%s', using mtime: %m", p);
+ return true;
+ } else if (r == 0) {
+ log_debug("No data in timestamp file '%s', using mtime", p);
+ return true;
+ }
+
+ r = safe_atou64(timestamp_str, &timestamp);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m", timestamp_str, p);
+ return true;
+ }
+
+ timespec_store(&other.st_mtim, timestamp);
+ }
+
+ return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec;
+}
+
+static int condition_test_first_boot(Condition *c) {
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_FIRST_BOOT);
+
+ r = parse_boolean(c->parameter);
+ if (r < 0)
+ return r;
+
+ return (access("/run/systemd/first-boot", F_OK) >= 0) == !!r;
+}
+
+static int condition_test_path_exists(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_EXISTS);
+
+ return access(c->parameter, F_OK) >= 0;
+}
+
+static int condition_test_path_exists_glob(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_EXISTS_GLOB);
+
+ return glob_exists(c->parameter) > 0;
+}
+
+static int condition_test_path_is_directory(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_DIRECTORY);
+
+ return is_dir(c->parameter, true) > 0;
+}
+
+static int condition_test_path_is_symbolic_link(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_SYMBOLIC_LINK);
+
+ return is_symlink(c->parameter) > 0;
+}
+
+static int condition_test_path_is_mount_point(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_MOUNT_POINT);
+
+ return path_is_mount_point(c->parameter, NULL, AT_SYMLINK_FOLLOW) > 0;
+}
+
+static int condition_test_path_is_read_write(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_READ_WRITE);
+
+ return path_is_read_only_fs(c->parameter) <= 0;
+}
+
+static int condition_test_directory_not_empty(Condition *c) {
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_DIRECTORY_NOT_EMPTY);
+
+ r = dir_is_empty(c->parameter);
+ return r <= 0 && r != -ENOENT;
+}
+
+static int condition_test_file_not_empty(Condition *c) {
+ struct stat st;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_FILE_NOT_EMPTY);
+
+ return (stat(c->parameter, &st) >= 0 &&
+ S_ISREG(st.st_mode) &&
+ st.st_size > 0);
+}
+
+static int condition_test_file_is_executable(Condition *c) {
+ struct stat st;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_FILE_IS_EXECUTABLE);
+
+ return (stat(c->parameter, &st) >= 0 &&
+ S_ISREG(st.st_mode) &&
+ (st.st_mode & 0111));
+}
+
+static int condition_test_null(Condition *c) {
+ assert(c);
+ assert(c->type == CONDITION_NULL);
+
+ /* Note that during parsing we already evaluate the string and
+ * store it in c->negate */
+ return true;
+}
+
+int condition_test(Condition *c) {
+
+ static int (*const condition_tests[_CONDITION_TYPE_MAX])(Condition *c) = {
+ [CONDITION_PATH_EXISTS] = condition_test_path_exists,
+ [CONDITION_PATH_EXISTS_GLOB] = condition_test_path_exists_glob,
+ [CONDITION_PATH_IS_DIRECTORY] = condition_test_path_is_directory,
+ [CONDITION_PATH_IS_SYMBOLIC_LINK] = condition_test_path_is_symbolic_link,
+ [CONDITION_PATH_IS_MOUNT_POINT] = condition_test_path_is_mount_point,
+ [CONDITION_PATH_IS_READ_WRITE] = condition_test_path_is_read_write,
+ [CONDITION_DIRECTORY_NOT_EMPTY] = condition_test_directory_not_empty,
+ [CONDITION_FILE_NOT_EMPTY] = condition_test_file_not_empty,
+ [CONDITION_FILE_IS_EXECUTABLE] = condition_test_file_is_executable,
+ [CONDITION_KERNEL_COMMAND_LINE] = condition_test_kernel_command_line,
+ [CONDITION_KERNEL_VERSION] = condition_test_kernel_version,
+ [CONDITION_VIRTUALIZATION] = condition_test_virtualization,
+ [CONDITION_SECURITY] = condition_test_security,
+ [CONDITION_CAPABILITY] = condition_test_capability,
+ [CONDITION_HOST] = condition_test_host,
+ [CONDITION_AC_POWER] = condition_test_ac_power,
+ [CONDITION_ARCHITECTURE] = condition_test_architecture,
+ [CONDITION_NEEDS_UPDATE] = condition_test_needs_update,
+ [CONDITION_FIRST_BOOT] = condition_test_first_boot,
+ [CONDITION_USER] = condition_test_user,
+ [CONDITION_GROUP] = condition_test_group,
+ [CONDITION_CONTROL_GROUP_CONTROLLER] = condition_test_control_group_controller,
+ [CONDITION_NULL] = condition_test_null,
+ };
+
+ int r, b;
+
+ assert(c);
+ assert(c->type >= 0);
+ assert(c->type < _CONDITION_TYPE_MAX);
+
+ r = condition_tests[c->type](c);
+ if (r < 0) {
+ c->result = CONDITION_ERROR;
+ return r;
+ }
+
+ b = (r > 0) == !c->negate;
+ c->result = b ? CONDITION_SUCCEEDED : CONDITION_FAILED;
+ return b;
+}
+
+void condition_dump(Condition *c, FILE *f, const char *prefix, const char *(*to_string)(ConditionType t)) {
+ assert(c);
+ assert(f);
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%s\t%s: %s%s%s %s\n",
+ prefix,
+ to_string(c->type),
+ c->trigger ? "|" : "",
+ c->negate ? "!" : "",
+ c->parameter,
+ condition_result_to_string(c->result));
+}
+
+void condition_dump_list(Condition *first, FILE *f, const char *prefix, const char *(*to_string)(ConditionType t)) {
+ Condition *c;
+
+ LIST_FOREACH(conditions, c, first)
+ condition_dump(c, f, prefix, to_string);
+}
+
+static const char* const condition_type_table[_CONDITION_TYPE_MAX] = {
+ [CONDITION_ARCHITECTURE] = "ConditionArchitecture",
+ [CONDITION_VIRTUALIZATION] = "ConditionVirtualization",
+ [CONDITION_HOST] = "ConditionHost",
+ [CONDITION_KERNEL_COMMAND_LINE] = "ConditionKernelCommandLine",
+ [CONDITION_KERNEL_VERSION] = "ConditionKernelVersion",
+ [CONDITION_SECURITY] = "ConditionSecurity",
+ [CONDITION_CAPABILITY] = "ConditionCapability",
+ [CONDITION_AC_POWER] = "ConditionACPower",
+ [CONDITION_NEEDS_UPDATE] = "ConditionNeedsUpdate",
+ [CONDITION_FIRST_BOOT] = "ConditionFirstBoot",
+ [CONDITION_PATH_EXISTS] = "ConditionPathExists",
+ [CONDITION_PATH_EXISTS_GLOB] = "ConditionPathExistsGlob",
+ [CONDITION_PATH_IS_DIRECTORY] = "ConditionPathIsDirectory",
+ [CONDITION_PATH_IS_SYMBOLIC_LINK] = "ConditionPathIsSymbolicLink",
+ [CONDITION_PATH_IS_MOUNT_POINT] = "ConditionPathIsMountPoint",
+ [CONDITION_PATH_IS_READ_WRITE] = "ConditionPathIsReadWrite",
+ [CONDITION_DIRECTORY_NOT_EMPTY] = "ConditionDirectoryNotEmpty",
+ [CONDITION_FILE_NOT_EMPTY] = "ConditionFileNotEmpty",
+ [CONDITION_FILE_IS_EXECUTABLE] = "ConditionFileIsExecutable",
+ [CONDITION_USER] = "ConditionUser",
+ [CONDITION_GROUP] = "ConditionGroup",
+ [CONDITION_CONTROL_GROUP_CONTROLLER] = "ConditionControlGroupController",
+ [CONDITION_NULL] = "ConditionNull"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(condition_type, ConditionType);
+
+static const char* const assert_type_table[_CONDITION_TYPE_MAX] = {
+ [CONDITION_ARCHITECTURE] = "AssertArchitecture",
+ [CONDITION_VIRTUALIZATION] = "AssertVirtualization",
+ [CONDITION_HOST] = "AssertHost",
+ [CONDITION_KERNEL_COMMAND_LINE] = "AssertKernelCommandLine",
+ [CONDITION_KERNEL_VERSION] = "AssertKernelVersion",
+ [CONDITION_SECURITY] = "AssertSecurity",
+ [CONDITION_CAPABILITY] = "AssertCapability",
+ [CONDITION_AC_POWER] = "AssertACPower",
+ [CONDITION_NEEDS_UPDATE] = "AssertNeedsUpdate",
+ [CONDITION_FIRST_BOOT] = "AssertFirstBoot",
+ [CONDITION_PATH_EXISTS] = "AssertPathExists",
+ [CONDITION_PATH_EXISTS_GLOB] = "AssertPathExistsGlob",
+ [CONDITION_PATH_IS_DIRECTORY] = "AssertPathIsDirectory",
+ [CONDITION_PATH_IS_SYMBOLIC_LINK] = "AssertPathIsSymbolicLink",
+ [CONDITION_PATH_IS_MOUNT_POINT] = "AssertPathIsMountPoint",
+ [CONDITION_PATH_IS_READ_WRITE] = "AssertPathIsReadWrite",
+ [CONDITION_DIRECTORY_NOT_EMPTY] = "AssertDirectoryNotEmpty",
+ [CONDITION_FILE_NOT_EMPTY] = "AssertFileNotEmpty",
+ [CONDITION_FILE_IS_EXECUTABLE] = "AssertFileIsExecutable",
+ [CONDITION_USER] = "AssertUser",
+ [CONDITION_GROUP] = "AssertGroup",
+ [CONDITION_CONTROL_GROUP_CONTROLLER] = "AssertControlGroupController",
+ [CONDITION_NULL] = "AssertNull"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(assert_type, ConditionType);
+
+static const char* const condition_result_table[_CONDITION_RESULT_MAX] = {
+ [CONDITION_UNTESTED] = "untested",
+ [CONDITION_SUCCEEDED] = "succeeded",
+ [CONDITION_FAILED] = "failed",
+ [CONDITION_ERROR] = "error",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(condition_result, ConditionResult);
diff --git a/src/shared/condition.h b/src/shared/condition.h
new file mode 100644
index 0000000..e69fc36
--- /dev/null
+++ b/src/shared/condition.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "list.h"
+#include "macro.h"
+
+typedef enum ConditionType {
+ CONDITION_ARCHITECTURE,
+ CONDITION_VIRTUALIZATION,
+ CONDITION_HOST,
+ CONDITION_KERNEL_COMMAND_LINE,
+ CONDITION_KERNEL_VERSION,
+ CONDITION_SECURITY,
+ CONDITION_CAPABILITY,
+ CONDITION_AC_POWER,
+
+ CONDITION_NEEDS_UPDATE,
+ CONDITION_FIRST_BOOT,
+
+ CONDITION_PATH_EXISTS,
+ CONDITION_PATH_EXISTS_GLOB,
+ CONDITION_PATH_IS_DIRECTORY,
+ CONDITION_PATH_IS_SYMBOLIC_LINK,
+ CONDITION_PATH_IS_MOUNT_POINT,
+ CONDITION_PATH_IS_READ_WRITE,
+ CONDITION_DIRECTORY_NOT_EMPTY,
+ CONDITION_FILE_NOT_EMPTY,
+ CONDITION_FILE_IS_EXECUTABLE,
+
+ CONDITION_NULL,
+
+ CONDITION_USER,
+ CONDITION_GROUP,
+
+ CONDITION_CONTROL_GROUP_CONTROLLER,
+
+ _CONDITION_TYPE_MAX,
+ _CONDITION_TYPE_INVALID = -1
+} ConditionType;
+
+typedef enum ConditionResult {
+ CONDITION_UNTESTED,
+ CONDITION_SUCCEEDED,
+ CONDITION_FAILED,
+ CONDITION_ERROR,
+ _CONDITION_RESULT_MAX,
+ _CONDITION_RESULT_INVALID = -1
+} ConditionResult;
+
+typedef struct Condition {
+ ConditionType type:8;
+
+ bool trigger:1;
+ bool negate:1;
+
+ ConditionResult result:6;
+
+ char *parameter;
+
+ LIST_FIELDS(struct Condition, conditions);
+} Condition;
+
+Condition* condition_new(ConditionType type, const char *parameter, bool trigger, bool negate);
+void condition_free(Condition *c);
+Condition* condition_free_list(Condition *c);
+
+int condition_test(Condition *c);
+
+void condition_dump(Condition *c, FILE *f, const char *prefix, const char *(*to_string)(ConditionType t));
+void condition_dump_list(Condition *c, FILE *f, const char *prefix, const char *(*to_string)(ConditionType t));
+
+const char* condition_type_to_string(ConditionType t) _const_;
+ConditionType condition_type_from_string(const char *s) _pure_;
+
+const char* assert_type_to_string(ConditionType t) _const_;
+ConditionType assert_type_from_string(const char *s) _pure_;
+
+const char* condition_result_to_string(ConditionResult r) _const_;
+ConditionResult condition_result_from_string(const char *s) _pure_;
+
+static inline bool condition_takes_path(ConditionType t) {
+ return IN_SET(t,
+ CONDITION_PATH_EXISTS,
+ CONDITION_PATH_EXISTS_GLOB,
+ CONDITION_PATH_IS_DIRECTORY,
+ CONDITION_PATH_IS_SYMBOLIC_LINK,
+ CONDITION_PATH_IS_MOUNT_POINT,
+ CONDITION_PATH_IS_READ_WRITE,
+ CONDITION_DIRECTORY_NOT_EMPTY,
+ CONDITION_FILE_NOT_EMPTY,
+ CONDITION_FILE_IS_EXECUTABLE,
+ CONDITION_NEEDS_UPDATE);
+}
diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c
new file mode 100644
index 0000000..b80c147
--- /dev/null
+++ b/src/shared/conf-parser.c
@@ -0,0 +1,1113 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "time-util.h"
+#include "utf8.h"
+
+int config_item_table_lookup(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata) {
+
+ const ConfigTableItem *t;
+
+ assert(table);
+ assert(lvalue);
+ assert(func);
+ assert(ltype);
+ assert(data);
+
+ for (t = table; t->lvalue; t++) {
+
+ if (!streq(lvalue, t->lvalue))
+ continue;
+
+ if (!streq_ptr(section, t->section))
+ continue;
+
+ *func = t->parse;
+ *ltype = t->ltype;
+ *data = t->data;
+ return 1;
+ }
+
+ return 0;
+}
+
+int config_item_perf_lookup(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata) {
+
+ ConfigPerfItemLookup lookup = (ConfigPerfItemLookup) table;
+ const ConfigPerfItem *p;
+
+ assert(table);
+ assert(lvalue);
+ assert(func);
+ assert(ltype);
+ assert(data);
+
+ if (section) {
+ const char *key;
+
+ key = strjoina(section, ".", lvalue);
+ p = lookup(key, strlen(key));
+ } else
+ p = lookup(lvalue, strlen(lvalue));
+ if (!p)
+ return 0;
+
+ *func = p->parse;
+ *ltype = p->ltype;
+ *data = (uint8_t*) userdata + p->offset;
+ return 1;
+}
+
+/* Run the user supplied parser for an assignment */
+static int next_assignment(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ ConfigItemLookup lookup,
+ const void *table,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ const char *rvalue,
+ ConfigParseFlags flags,
+ void *userdata) {
+
+ ConfigParserCallback func = NULL;
+ int ltype = 0;
+ void *data = NULL;
+ int r;
+
+ assert(filename);
+ assert(line > 0);
+ assert(lookup);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = lookup(table, section, lvalue, &func, &ltype, &data, userdata);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (func)
+ return func(unit, filename, line, section, section_line,
+ lvalue, ltype, rvalue, data, userdata);
+
+ return 0;
+ }
+
+ /* Warn about unknown non-extension fields. */
+ if (!(flags & CONFIG_PARSE_RELAXED) && !startswith(lvalue, "X-"))
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown lvalue '%s' in section '%s', ignoring", lvalue, section);
+
+ return 0;
+}
+
+/* Parse a single logical line */
+static int parse_line(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ char **section,
+ unsigned *section_line,
+ bool *section_ignored,
+ char *l,
+ void *userdata) {
+
+ char *e, *include;
+
+ assert(filename);
+ assert(line > 0);
+ assert(lookup);
+ assert(l);
+
+ l = strstrip(l);
+ if (!*l)
+ return 0;
+
+ if (*l == '\n')
+ return 0;
+
+ include = first_word(l, ".include");
+ if (include) {
+ _cleanup_free_ char *fn = NULL;
+
+ /* .includes are a bad idea, we only support them here
+ * for historical reasons. They create cyclic include
+ * problems and make it difficult to detect
+ * configuration file changes with an easy
+ * stat(). Better approaches, such as .d/ drop-in
+ * snippets exist.
+ *
+ * Support for them should be eventually removed. */
+
+ if (!(flags & CONFIG_PARSE_ALLOW_INCLUDE)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, ".include not allowed here. Ignoring.");
+ return 0;
+ }
+
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ ".include directives are deprecated, and support for them will be removed in a future version of systemd. "
+ "Please use drop-in files instead.");
+
+ fn = file_in_same_dir(filename, strstrip(include));
+ if (!fn)
+ return -ENOMEM;
+
+ return config_parse(unit, fn, NULL, sections, lookup, table, flags, userdata);
+ }
+
+ if (!utf8_is_valid(l))
+ return log_syntax_invalid_utf8(unit, LOG_WARNING, filename, line, l);
+
+ if (*l == '[') {
+ size_t k;
+ char *n;
+
+ k = strlen(l);
+ assert(k > 0);
+
+ if (l[k-1] != ']') {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid section header '%s'", l);
+ return -EBADMSG;
+ }
+
+ n = strndup(l+1, k-2);
+ if (!n)
+ return -ENOMEM;
+
+ if (sections && !nulstr_contains(sections, n)) {
+
+ if (!(flags & CONFIG_PARSE_RELAXED) && !startswith(n, "X-"))
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown section '%s'. Ignoring.", n);
+
+ free(n);
+ *section = mfree(*section);
+ *section_line = 0;
+ *section_ignored = true;
+ } else {
+ free_and_replace(*section, n);
+ *section_line = line;
+ *section_ignored = false;
+ }
+
+ return 0;
+ }
+
+ if (sections && !*section) {
+
+ if (!(flags & CONFIG_PARSE_RELAXED) && !*section_ignored)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Assignment outside of section. Ignoring.");
+
+ return 0;
+ }
+
+ e = strchr(l, '=');
+ if (!e) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Missing '='.");
+ return -EINVAL;
+ }
+
+ *e = 0;
+ e++;
+
+ return next_assignment(unit,
+ filename,
+ line,
+ lookup,
+ table,
+ *section,
+ *section_line,
+ strstrip(l),
+ strstrip(e),
+ flags,
+ userdata);
+}
+
+/* Go through the file and parse each line */
+int config_parse(const char *unit,
+ const char *filename,
+ FILE *f,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata) {
+
+ _cleanup_free_ char *section = NULL, *continuation = NULL;
+ _cleanup_fclose_ FILE *ours = NULL;
+ unsigned line = 0, section_line = 0;
+ bool section_ignored = false;
+ int r;
+
+ assert(filename);
+ assert(lookup);
+
+ if (!f) {
+ f = ours = fopen(filename, "re");
+ if (!f) {
+ /* Only log on request, except for ENOENT,
+ * since we return 0 to the caller. */
+ if ((flags & CONFIG_PARSE_WARN) || errno == ENOENT)
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to open configuration file '%s': %m", filename);
+ return errno == ENOENT ? 0 : -errno;
+ }
+ }
+
+ fd_warn_permissions(filename, fileno(f));
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ bool escaped = false;
+ char *l, *p, *e;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r == 0)
+ break;
+ if (r == -ENOBUFS) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_error_errno(r, "%s:%u: Line too long", filename, line);
+
+ return r;
+ }
+ if (r < 0) {
+ if (CONFIG_PARSE_WARN)
+ log_error_errno(r, "%s:%u: Error while reading configuration file: %m", filename, line);
+
+ return r;
+ }
+
+ if (strchr(COMMENTS, *skip_leading_chars(buf, WHITESPACE)))
+ continue;
+
+ l = buf;
+ if (!(flags & CONFIG_PARSE_REFUSE_BOM)) {
+ char *q;
+
+ q = startswith(buf, UTF8_BYTE_ORDER_MARK);
+ if (q) {
+ l = q;
+ flags |= CONFIG_PARSE_REFUSE_BOM;
+ }
+ }
+
+ if (continuation) {
+ if (strlen(continuation) + strlen(l) > LONG_LINE_MAX) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_error("%s:%u: Continuation line too long", filename, line);
+ return -ENOBUFS;
+ }
+
+ if (!strextend(&continuation, l, NULL)) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_oom();
+ return -ENOMEM;
+ }
+
+ p = continuation;
+ } else
+ p = l;
+
+ for (e = p; *e; e++) {
+ if (escaped)
+ escaped = false;
+ else if (*e == '\\')
+ escaped = true;
+ }
+
+ if (escaped) {
+ *(e-1) = ' ';
+
+ if (!continuation) {
+ continuation = strdup(l);
+ if (!continuation) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_oom();
+ return -ENOMEM;
+ }
+ }
+
+ continue;
+ }
+
+ r = parse_line(unit,
+ filename,
+ ++line,
+ sections,
+ lookup,
+ table,
+ flags,
+ &section,
+ &section_line,
+ &section_ignored,
+ p,
+ userdata);
+ if (r < 0) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_warning_errno(r, "%s:%u: Failed to parse file: %m", filename, line);
+ return r;
+ }
+
+ continuation = mfree(continuation);
+ }
+
+ if (continuation) {
+ r = parse_line(unit,
+ filename,
+ ++line,
+ sections,
+ lookup,
+ table,
+ flags,
+ &section,
+ &section_line,
+ &section_ignored,
+ continuation,
+ userdata);
+ if (r < 0) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_warning_errno(r, "%s:%u: Failed to parse file: %m", filename, line);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int config_parse_many_files(
+ const char *conf_file,
+ char **files,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata) {
+
+ char **fn;
+ int r;
+
+ if (conf_file) {
+ r = config_parse(NULL, conf_file, NULL, sections, lookup, table, flags, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(fn, files) {
+ r = config_parse(NULL, *fn, NULL, sections, lookup, table, flags, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+/* Parse each config file in the directories specified as nulstr. */
+int config_parse_many_nulstr(
+ const char *conf_file,
+ const char *conf_file_dirs,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata) {
+
+ _cleanup_strv_free_ char **files = NULL;
+ int r;
+
+ r = conf_files_list_nulstr(&files, ".conf", NULL, 0, conf_file_dirs);
+ if (r < 0)
+ return r;
+
+ return config_parse_many_files(conf_file, files, sections, lookup, table, flags, userdata);
+}
+
+/* Parse each config file in the directories specified as strv. */
+int config_parse_many(
+ const char *conf_file,
+ const char* const* conf_file_dirs,
+ const char *dropin_dirname,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata) {
+
+ _cleanup_strv_free_ char **dropin_dirs = NULL;
+ _cleanup_strv_free_ char **files = NULL;
+ const char *suffix;
+ int r;
+
+ suffix = strjoina("/", dropin_dirname);
+ r = strv_extend_strv_concat(&dropin_dirs, (char**) conf_file_dirs, suffix);
+ if (r < 0)
+ return r;
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char* const*) dropin_dirs);
+ if (r < 0)
+ return r;
+
+ return config_parse_many_files(conf_file, files, sections, lookup, table, flags, userdata);
+}
+
+#define DEFINE_PARSER(type, vartype, conv_func) \
+ DEFINE_CONFIG_PARSE_PTR(config_parse_##type, conv_func, vartype, "Failed to parse " #type " value")
+
+DEFINE_PARSER(int, int, safe_atoi);
+DEFINE_PARSER(long, long, safe_atoli);
+DEFINE_PARSER(uint8, uint8_t, safe_atou8);
+DEFINE_PARSER(uint16, uint16_t, safe_atou16);
+DEFINE_PARSER(uint32, uint32_t, safe_atou32);
+DEFINE_PARSER(uint64, uint64_t, safe_atou64);
+DEFINE_PARSER(unsigned, unsigned, safe_atou);
+DEFINE_PARSER(double, double, safe_atod);
+DEFINE_PARSER(nsec, nsec_t, parse_nsec);
+DEFINE_PARSER(sec, usec_t, parse_sec);
+DEFINE_PARSER(mode, mode_t, parse_mode);
+
+int config_parse_iec_size(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ size_t *sz = data;
+ uint64_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1024, &v);
+ if (r >= 0 && (uint64_t) (size_t) v != v)
+ r = -ERANGE;
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ *sz = (size_t) v;
+ return 0;
+}
+
+int config_parse_si_size(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ size_t *sz = data;
+ uint64_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1000, &v);
+ if (r >= 0 && (uint64_t) (size_t) v != v)
+ r = -ERANGE;
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ *sz = (size_t) v;
+ return 0;
+}
+
+int config_parse_iec_uint64(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *bytes = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1024, bytes);
+ if (r < 0)
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse size value, ignoring: %s", rvalue);
+
+ return 0;
+}
+
+int config_parse_bool(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int k;
+ bool *b = data;
+ bool fatal = ltype;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ k = parse_boolean(rvalue);
+ if (k < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, k,
+ "Failed to parse boolean value%s: %s",
+ fatal ? "" : ", ignoring", rvalue);
+ return fatal ? -ENOEXEC : 0;
+ }
+
+ *b = k;
+ return 0;
+}
+
+int config_parse_tristate(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int k, *t = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* A tristate is pretty much a boolean, except that it can
+ * also take the special value -1, indicating "uninitialized",
+ * much like NULL is for a pointer type. */
+
+ k = parse_boolean(rvalue);
+ if (k < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, k, "Failed to parse boolean value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *t = !!k;
+ return 0;
+}
+
+int config_parse_string(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (free_and_strdup(s, empty_to_null(rvalue)) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_path(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *n = NULL;
+ bool fatal = ltype;
+ char **s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue))
+ goto finalize;
+
+ n = strdup(rvalue);
+ if (!n)
+ return log_oom();
+
+ r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE | (fatal ? PATH_CHECK_FATAL : 0), unit, filename, line, lvalue);
+ if (r < 0)
+ return fatal ? -ENOEXEC : 0;
+
+finalize:
+ return free_and_replace(*s, n);
+}
+
+int config_parse_strv(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***sv = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *sv = strv_free(*sv);
+ return 0;
+ }
+
+ for (;;) {
+ char *word = NULL;
+
+ r = extract_first_word(&rvalue, &word, NULL, EXTRACT_QUOTES|EXTRACT_RETAIN_ESCAPE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
+
+ r = strv_consume(sv, word);
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+int config_parse_warn_compat(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Disabled reason = ltype;
+
+ switch(reason) {
+
+ case DISABLED_CONFIGURATION:
+ log_syntax(unit, LOG_DEBUG, filename, line, 0,
+ "Support for option %s= has been disabled at compile time and it is ignored", lvalue);
+ break;
+
+ case DISABLED_LEGACY:
+ log_syntax(unit, LOG_INFO, filename, line, 0,
+ "Support for option %s= has been removed and it is ignored", lvalue);
+ break;
+
+ case DISABLED_EXPERIMENTAL:
+ log_syntax(unit, LOG_INFO, filename, line, 0,
+ "Support for option %s= has not yet been enabled and it is ignored", lvalue);
+ break;
+ }
+
+ return 0;
+}
+
+int config_parse_log_facility(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *o = data, x;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ x = log_facility_unshifted_from_string(rvalue);
+ if (x < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse log facility, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *o = (x << 3) | LOG_PRI(*o);
+
+ return 0;
+}
+
+int config_parse_log_level(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *o = data, x;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ x = log_level_from_string(rvalue);
+ if (x < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse log level, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (*o < 0) /* if it wasn't initialized so far, assume zero facility */
+ *o = x;
+ else
+ *o = (*o & LOG_FACMASK) | x;
+
+ return 0;
+}
+
+int config_parse_signal(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *sig = data, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(sig);
+
+ r = signal_from_string(rvalue);
+ if (r <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse signal name, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *sig = r;
+ return 0;
+}
+
+int config_parse_personality(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ unsigned long *personality = data, p;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(personality);
+
+ if (isempty(rvalue))
+ p = PERSONALITY_INVALID;
+ else {
+ p = personality_from_string(rvalue);
+ if (p == PERSONALITY_INVALID) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse personality, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ *personality = p;
+ return 0;
+}
+
+int config_parse_ifname(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!ifname_valid(rvalue)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Interface name is not valid or too long, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = free_and_strdup(s, rvalue);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_ip_port(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t *s = data;
+ uint16_t port;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *s = 0;
+ return 0;
+ }
+
+ r = parse_ip_port(rvalue, &port);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse port '%s'.", rvalue);
+ return 0;
+ }
+
+ *s = port;
+
+ return 0;
+}
+
+int config_parse_mtu(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint32_t *mtu = data;
+ int r;
+
+ assert(rvalue);
+ assert(mtu);
+
+ r = parse_mtu(ltype, rvalue, mtu);
+ if (r == -ERANGE) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Maximum transfer unit (MTU) value out of range. Permitted range is %" PRIu32 "…%" PRIu32 ", ignoring: %s",
+ (uint32_t) (ltype == AF_INET6 ? IPV6_MIN_MTU : IPV4_MIN_MTU), (uint32_t) UINT32_MAX,
+ rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse MTU value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_rlimit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ struct rlimit **rl = data, d = {};
+ int r;
+
+ assert(rvalue);
+ assert(rl);
+
+ r = rlimit_parse(ltype, rvalue, &d);
+ if (r == -EILSEQ) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Soft resource limit chosen higher than hard limit, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse resource value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (rl[ltype])
+ *rl[ltype] = d;
+ else {
+ rl[ltype] = newdup(struct rlimit, &d, 1);
+ if (!rl[ltype])
+ return log_oom();
+ }
+
+ return 0;
+}
+
+int config_parse_permille(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ unsigned *permille = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(permille);
+
+ r = parse_permille(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse permille value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *permille = (unsigned) r;
+
+ return 0;
+}
diff --git a/src/shared/conf-parser.h b/src/shared/conf-parser.h
new file mode 100644
index 0000000..865db42
--- /dev/null
+++ b/src/shared/conf-parser.h
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <syslog.h>
+
+#include "alloc-util.h"
+#include "log.h"
+#include "macro.h"
+
+/* An abstract parser for simple, line based, shallow configuration files consisting of variable assignments only. */
+
+typedef enum ConfigParseFlags {
+ CONFIG_PARSE_RELAXED = 1 << 0,
+ CONFIG_PARSE_ALLOW_INCLUDE = 1 << 1,
+ CONFIG_PARSE_WARN = 1 << 2,
+ CONFIG_PARSE_REFUSE_BOM = 1 << 3,
+} ConfigParseFlags;
+
+/* Argument list for parsers of specific configuration settings. */
+#define CONFIG_PARSER_ARGUMENTS \
+ const char *unit, \
+ const char *filename, \
+ unsigned line, \
+ const char *section, \
+ unsigned section_line, \
+ const char *lvalue, \
+ int ltype, \
+ const char *rvalue, \
+ void *data, \
+ void *userdata
+
+/* Prototype for a parser for a specific configuration setting */
+typedef int (*ConfigParserCallback)(CONFIG_PARSER_ARGUMENTS);
+
+/* A macro declaring the a function prototype, following the typedef above, simply because it's so cumbersomely long
+ * otherwise. (And current emacs gets irritatingly slow when editing files that contain lots of very long function
+ * prototypes on the same screen…) */
+#define CONFIG_PARSER_PROTOTYPE(name) int name(CONFIG_PARSER_ARGUMENTS)
+
+/* Wraps information for parsing a specific configuration variable, to
+ * be stored in a simple array */
+typedef struct ConfigTableItem {
+ const char *section; /* Section */
+ const char *lvalue; /* Name of the variable */
+ ConfigParserCallback parse; /* Function that is called to parse the variable's value */
+ int ltype; /* Distinguish different variables passed to the same callback */
+ void *data; /* Where to store the variable's data */
+} ConfigTableItem;
+
+/* Wraps information for parsing a specific configuration variable, to
+ * be stored in a gperf perfect hashtable */
+typedef struct ConfigPerfItem {
+ const char *section_and_lvalue; /* Section + "." + name of the variable */
+ ConfigParserCallback parse; /* Function that is called to parse the variable's value */
+ int ltype; /* Distinguish different variables passed to the same callback */
+ size_t offset; /* Offset where to store data, from the beginning of userdata */
+} ConfigPerfItem;
+
+/* Prototype for a low-level gperf lookup function */
+typedef const ConfigPerfItem* (*ConfigPerfItemLookup)(const char *section_and_lvalue, unsigned length);
+
+/* Prototype for a generic high-level lookup function */
+typedef int (*ConfigItemLookup)(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata);
+
+/* Linear table search implementation of ConfigItemLookup, based on
+ * ConfigTableItem arrays */
+int config_item_table_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata);
+
+/* gperf implementation of ConfigItemLookup, based on gperf
+ * ConfigPerfItem tables */
+int config_item_perf_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata);
+
+int config_parse(
+ const char *unit,
+ const char *filename,
+ FILE *f,
+ const char *sections, /* nulstr */
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata);
+
+int config_parse_many_nulstr(
+ const char *conf_file, /* possibly NULL */
+ const char *conf_file_dirs, /* nulstr */
+ const char *sections, /* nulstr */
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata);
+
+int config_parse_many(
+ const char *conf_file, /* possibly NULL */
+ const char* const* conf_file_dirs,
+ const char *dropin_dirname,
+ const char *sections, /* nulstr */
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_int);
+CONFIG_PARSER_PROTOTYPE(config_parse_unsigned);
+CONFIG_PARSER_PROTOTYPE(config_parse_long);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint8);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint16);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint32);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint64);
+CONFIG_PARSER_PROTOTYPE(config_parse_double);
+CONFIG_PARSER_PROTOTYPE(config_parse_iec_size);
+CONFIG_PARSER_PROTOTYPE(config_parse_si_size);
+CONFIG_PARSER_PROTOTYPE(config_parse_iec_uint64);
+CONFIG_PARSER_PROTOTYPE(config_parse_bool);
+CONFIG_PARSER_PROTOTYPE(config_parse_tristate);
+CONFIG_PARSER_PROTOTYPE(config_parse_string);
+CONFIG_PARSER_PROTOTYPE(config_parse_path);
+CONFIG_PARSER_PROTOTYPE(config_parse_strv);
+CONFIG_PARSER_PROTOTYPE(config_parse_sec);
+CONFIG_PARSER_PROTOTYPE(config_parse_nsec);
+CONFIG_PARSER_PROTOTYPE(config_parse_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_warn_compat);
+CONFIG_PARSER_PROTOTYPE(config_parse_log_facility);
+CONFIG_PARSER_PROTOTYPE(config_parse_log_level);
+CONFIG_PARSER_PROTOTYPE(config_parse_signal);
+CONFIG_PARSER_PROTOTYPE(config_parse_personality);
+CONFIG_PARSER_PROTOTYPE(config_parse_permille);
+CONFIG_PARSER_PROTOTYPE(config_parse_ifname);
+CONFIG_PARSER_PROTOTYPE(config_parse_ip_port);
+CONFIG_PARSER_PROTOTYPE(config_parse_mtu);
+CONFIG_PARSER_PROTOTYPE(config_parse_rlimit);
+
+typedef enum Disabled {
+ DISABLED_CONFIGURATION,
+ DISABLED_LEGACY,
+ DISABLED_EXPERIMENTAL,
+} Disabled;
+
+#define DEFINE_CONFIG_PARSE(function, parser, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ int *i = data, r; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ r = parser(rvalue); \
+ if (r < 0) { \
+ log_syntax(unit, LOG_ERR, filename, line, r, \
+ msg ", ignoring: %s", rvalue); \
+ return 0; \
+ } \
+ \
+ *i = r; \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_PTR(function, parser, type, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type *i = data; \
+ int r; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ r = parser(rvalue, i); \
+ if (r < 0) \
+ log_syntax(unit, LOG_ERR, filename, line, r, \
+ msg ", ignoring: %s", rvalue); \
+ \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_ENUM(function, name, type, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type *i = data, x; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ x = name##_from_string(rvalue); \
+ if (x < 0) { \
+ log_syntax(unit, LOG_ERR, filename, line, 0, \
+ msg ", ignoring: %s", rvalue); \
+ return 0; \
+ } \
+ \
+ *i = x; \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(function, name, type, default_value, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type *i = data, x; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ if (isempty(rvalue)) { \
+ *i = default_value; \
+ return 0; \
+ } \
+ \
+ x = name##_from_string(rvalue); \
+ if (x < 0) { \
+ log_syntax(unit, LOG_ERR, filename, line, 0, \
+ msg ", ignoring: %s", rvalue); \
+ return 0; \
+ } \
+ \
+ *i = x; \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_ENUMV(function, name, type, invalid, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type **enums = data, x, *ys; \
+ _cleanup_free_ type *xs = NULL; \
+ const char *word, *state; \
+ size_t l, i = 0; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ xs = new0(type, 1); \
+ if (!xs) \
+ return -ENOMEM; \
+ \
+ *xs = invalid; \
+ \
+ FOREACH_WORD(word, l, rvalue, state) { \
+ _cleanup_free_ char *en = NULL; \
+ type *new_xs; \
+ \
+ en = strndup(word, l); \
+ if (!en) \
+ return -ENOMEM; \
+ \
+ if ((x = name##_from_string(en)) < 0) { \
+ log_syntax(unit, LOG_ERR, filename, line, 0, \
+ msg ", ignoring: %s", en); \
+ continue; \
+ } \
+ \
+ for (ys = xs; x != invalid && *ys != invalid; ys++) { \
+ if (*ys == x) { \
+ log_syntax(unit, LOG_NOTICE, filename, \
+ line, 0, \
+ "Duplicate entry, ignoring: %s", \
+ en); \
+ x = invalid; \
+ } \
+ } \
+ \
+ if (x == invalid) \
+ continue; \
+ \
+ *(xs + i) = x; \
+ new_xs = realloc(xs, (++i + 1) * sizeof(type)); \
+ if (new_xs) \
+ xs = new_xs; \
+ else \
+ return -ENOMEM; \
+ \
+ *(xs + i) = invalid; \
+ } \
+ \
+ free_and_replace(*enums, xs); \
+ return 0; \
+ }
diff --git a/src/shared/cpu-set-util.c b/src/shared/cpu-set-util.c
new file mode 100644
index 0000000..9a789ae
--- /dev/null
+++ b/src/shared/cpu-set-util.c
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <syslog.h>
+
+#include "alloc-util.h"
+#include "cpu-set-util.h"
+#include "extract-word.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+cpu_set_t* cpu_set_malloc(unsigned *ncpus) {
+ cpu_set_t *c;
+ unsigned n = 1024;
+
+ /* Allocates the cpuset in the right size */
+
+ for (;;) {
+ c = CPU_ALLOC(n);
+ if (!c)
+ return NULL;
+
+ if (sched_getaffinity(0, CPU_ALLOC_SIZE(n), c) >= 0) {
+ CPU_ZERO_S(CPU_ALLOC_SIZE(n), c);
+
+ if (ncpus)
+ *ncpus = n;
+
+ return c;
+ }
+
+ CPU_FREE(c);
+
+ if (errno != EINVAL)
+ return NULL;
+
+ n *= 2;
+ }
+}
+
+int parse_cpu_set_internal(
+ const char *rvalue,
+ cpu_set_t **cpu_set,
+ bool warn,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue) {
+
+ _cleanup_cpu_free_ cpu_set_t *c = NULL;
+ const char *p = rvalue;
+ unsigned ncpus = 0;
+
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ unsigned cpu, cpu_lower, cpu_upper;
+ int r;
+
+ r = extract_first_word(&p, &word, WHITESPACE ",", EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return warn ? log_oom() : -ENOMEM;
+ if (r < 0)
+ return warn ? log_syntax(unit, LOG_ERR, filename, line, r, "Invalid value for %s: %s", lvalue, rvalue) : r;
+ if (r == 0)
+ break;
+
+ if (!c) {
+ c = cpu_set_malloc(&ncpus);
+ if (!c)
+ return warn ? log_oom() : -ENOMEM;
+ }
+
+ r = parse_range(word, &cpu_lower, &cpu_upper);
+ if (r < 0)
+ return warn ? log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CPU affinity '%s'", word) : r;
+ if (cpu_lower >= ncpus || cpu_upper >= ncpus)
+ return warn ? log_syntax(unit, LOG_ERR, filename, line, EINVAL, "CPU out of range '%s' ncpus is %u", word, ncpus) : -EINVAL;
+
+ if (cpu_lower > cpu_upper) {
+ if (warn)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Range '%s' is invalid, %u > %u, ignoring", word, cpu_lower, cpu_upper);
+ continue;
+ }
+
+ for (cpu = cpu_lower; cpu <= cpu_upper; cpu++)
+ CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
+ }
+
+ /* On success, sets *cpu_set and returns ncpus for the system. */
+ if (c)
+ *cpu_set = TAKE_PTR(c);
+
+ return (int) ncpus;
+}
diff --git a/src/shared/cpu-set-util.h b/src/shared/cpu-set-util.h
new file mode 100644
index 0000000..1b6bd35
--- /dev/null
+++ b/src/shared/cpu-set-util.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sched.h>
+
+#include "macro.h"
+
+#ifdef __NCPUBITS
+#define CPU_SIZE_TO_NUM(n) ((n) * __NCPUBITS)
+#else
+#define CPU_SIZE_TO_NUM(n) ((n) * sizeof(cpu_set_t) * 8)
+#endif
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(cpu_set_t*, CPU_FREE);
+#define _cleanup_cpu_free_ _cleanup_(CPU_FREEp)
+
+static inline cpu_set_t* cpu_set_mfree(cpu_set_t *p) {
+ if (p)
+ CPU_FREE(p);
+ return NULL;
+}
+
+cpu_set_t* cpu_set_malloc(unsigned *ncpus);
+
+int parse_cpu_set_internal(const char *rvalue, cpu_set_t **cpu_set, bool warn, const char *unit, const char *filename, unsigned line, const char *lvalue);
+
+static inline int parse_cpu_set_and_warn(const char *rvalue, cpu_set_t **cpu_set, const char *unit, const char *filename, unsigned line, const char *lvalue) {
+ assert(lvalue);
+
+ return parse_cpu_set_internal(rvalue, cpu_set, true, unit, filename, line, lvalue);
+}
+
+static inline int parse_cpu_set(const char *rvalue, cpu_set_t **cpu_set){
+ return parse_cpu_set_internal(rvalue, cpu_set, false, NULL, NULL, 0, NULL);
+}
diff --git a/src/shared/crypt-util.c b/src/shared/crypt-util.c
new file mode 100644
index 0000000..20bdc54
--- /dev/null
+++ b/src/shared/crypt-util.c
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_LIBCRYPTSETUP
+#include "crypt-util.h"
+#include "log.h"
+
+void cryptsetup_log_glue(int level, const char *msg, void *usrptr) {
+ switch (level) {
+ case CRYPT_LOG_NORMAL:
+ level = LOG_NOTICE;
+ break;
+ case CRYPT_LOG_ERROR:
+ level = LOG_ERR;
+ break;
+ case CRYPT_LOG_VERBOSE:
+ level = LOG_INFO;
+ break;
+ case CRYPT_LOG_DEBUG:
+ level = LOG_DEBUG;
+ break;
+ default:
+ log_error("Unknown libcryptsetup log level: %d", level);
+ level = LOG_ERR;
+ }
+
+ log_full(level, "%s", msg);
+}
+#endif
diff --git a/src/shared/crypt-util.h b/src/shared/crypt-util.h
new file mode 100644
index 0000000..8c86714
--- /dev/null
+++ b/src/shared/crypt-util.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if HAVE_LIBCRYPTSETUP
+#include <libcryptsetup.h>
+
+#include "macro.h"
+
+/* libcryptsetup define for any LUKS version, compatible with libcryptsetup 1.x */
+#ifndef CRYPT_LUKS
+#define CRYPT_LUKS NULL
+#endif
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct crypt_device *, crypt_free);
+
+void cryptsetup_log_glue(int level, const char *msg, void *usrptr);
+#endif
diff --git a/src/shared/daemon-util.h b/src/shared/daemon-util.h
new file mode 100644
index 0000000..5e9eca1
--- /dev/null
+++ b/src/shared/daemon-util.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-daemon.h"
+
+#define NOTIFY_READY "READY=1\n" "STATUS=Processing requests..."
+#define NOTIFY_STOPPING "STOPPING=1\n" "STATUS=Shutting down..."
+
+static inline const char *notify_start(const char *start, const char *stop) {
+ if (start)
+ (void) sd_notify(false, start);
+
+ return stop;
+}
+
+/* This is intended to be used with _cleanup_ attribute. */
+static inline void notify_on_cleanup(const char **p) {
+ if (p)
+ (void) sd_notify(false, *p);
+}
diff --git a/src/shared/dev-setup.c b/src/shared/dev-setup.c
new file mode 100644
index 0000000..b545c2a
--- /dev/null
+++ b/src/shared/dev-setup.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dev-setup.h"
+#include "label.h"
+#include "log.h"
+#include "path-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int dev_setup(const char *prefix, uid_t uid, gid_t gid) {
+ static const char symlinks[] =
+ "-/proc/kcore\0" "/dev/core\0"
+ "/proc/self/fd\0" "/dev/fd\0"
+ "/proc/self/fd/0\0" "/dev/stdin\0"
+ "/proc/self/fd/1\0" "/dev/stdout\0"
+ "/proc/self/fd/2\0" "/dev/stderr\0";
+
+ const char *j, *k;
+ int r;
+
+ NULSTR_FOREACH_PAIR(j, k, symlinks) {
+ _cleanup_free_ char *link_name = NULL;
+ const char *n;
+
+ if (j[0] == '-') {
+ j++;
+
+ if (access(j, F_OK) < 0)
+ continue;
+ }
+
+ if (prefix) {
+ link_name = prefix_root(prefix, k);
+ if (!link_name)
+ return -ENOMEM;
+
+ n = link_name;
+ } else
+ n = k;
+
+ r = symlink_label(j, n);
+ if (r < 0)
+ log_debug_errno(r, "Failed to symlink %s to %s: %m", j, n);
+
+ if (uid != UID_INVALID || gid != GID_INVALID)
+ if (lchown(n, uid, gid) < 0)
+ log_debug_errno(errno, "Failed to chown %s: %m", n);
+ }
+
+ return 0;
+}
+
+int make_inaccessible_nodes(const char *root, uid_t uid, gid_t gid) {
+ static const struct {
+ const char *name;
+ mode_t mode;
+ } table[] = {
+ { "/run/systemd", S_IFDIR | 0755 },
+ { "/run/systemd/inaccessible", S_IFDIR | 0000 },
+ { "/run/systemd/inaccessible/reg", S_IFREG | 0000 },
+ { "/run/systemd/inaccessible/dir", S_IFDIR | 0000 },
+ { "/run/systemd/inaccessible/fifo", S_IFIFO | 0000 },
+ { "/run/systemd/inaccessible/sock", S_IFSOCK | 0000 },
+
+ /* The following two are likely to fail if we lack the privs for it (for example in an userns
+ * environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0
+ * device nodes to be created). But that's entirely fine. Consumers of these files should carry
+ * fallback to use a different node then, for example /run/systemd/inaccessible/sock, which is close
+ * enough in behaviour and semantics for most uses. */
+ { "/run/systemd/inaccessible/chr", S_IFCHR | 0000 },
+ { "/run/systemd/inaccessible/blk", S_IFBLK | 0000 },
+ };
+
+ _cleanup_umask_ mode_t u;
+ size_t i;
+ int r;
+
+ u = umask(0000);
+
+ /* Set up inaccessible (and empty) file nodes of all types. This are used to as mount sources for over-mounting
+ * ("masking") file nodes that shall become inaccessible and empty for specific containers or services. We try
+ * to lock down these nodes as much as we can, but otherwise try to match them as closely as possible with the
+ * underlying file, i.e. in the best case we offer the same node type as the underlying node. */
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+ _cleanup_free_ char *path = NULL;
+
+ path = prefix_root(root, table[i].name);
+ if (!path)
+ return log_oom();
+
+ if (S_ISDIR(table[i].mode))
+ r = mkdir(path, table[i].mode & 07777);
+ else
+ r = mknod(path, table[i].mode, makedev(0, 0));
+ if (r < 0) {
+ if (errno != EEXIST)
+ log_debug_errno(errno, "Failed to create '%s', ignoring: %m", path);
+ continue;
+ }
+
+ if (uid != UID_INVALID || gid != GID_INVALID) {
+ if (lchown(path, uid, gid) < 0)
+ log_debug_errno(errno, "Failed to chown '%s': %m", path);
+ }
+ }
+
+ return 0;
+}
diff --git a/src/shared/dev-setup.h b/src/shared/dev-setup.h
new file mode 100644
index 0000000..72b90ec
--- /dev/null
+++ b/src/shared/dev-setup.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+int dev_setup(const char *prefix, uid_t uid, gid_t gid);
+
+int make_inaccessible_nodes(const char *root, uid_t uid, gid_t gid);
diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c
new file mode 100644
index 0000000..d340487
--- /dev/null
+++ b/src/shared/dissect-image.c
@@ -0,0 +1,1507 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#include "sd-device.h"
+#include "sd-id128.h"
+
+#include "architecture.h"
+#include "ask-password-api.h"
+#include "blkid-util.h"
+#include "blockdev-util.h"
+#include "copy.h"
+#include "crypt-util.h"
+#include "def.h"
+#include "device-nodes.h"
+#include "device-util.h"
+#include "dissect-image.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "gpt.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "linux-3.13/dm-ioctl.h"
+#include "missing.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "os-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "udev-util.h"
+#include "user-util.h"
+#include "xattr-util.h"
+
+int probe_filesystem(const char *node, char **ret_fstype) {
+ /* Try to find device content type and return it in *ret_fstype. If nothing is found,
+ * 0/NULL will be returned. -EUCLEAN will be returned for ambigous results, and an
+ * different error otherwise. */
+
+#if HAVE_BLKID
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ const char *fstype;
+ int r;
+
+ errno = 0;
+ b = blkid_new_probe_from_filename(node);
+ if (!b)
+ return -errno ?: -ENOMEM;
+
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (r == 1) {
+ log_debug("No type detected on partition %s", node);
+ goto not_found;
+ }
+ if (r == -2) {
+ log_debug("Results ambiguous for partition %s", node);
+ return -EUCLEAN;
+ }
+ if (r != 0)
+ return -errno ?: -EIO;
+
+ (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
+
+ if (fstype) {
+ char *t;
+
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+
+ *ret_fstype = t;
+ return 1;
+ }
+
+not_found:
+ *ret_fstype = NULL;
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+#if HAVE_BLKID
+/* Detect RPMB and Boot partitions, which are not listed by blkid.
+ * See https://github.com/systemd/systemd/issues/5806. */
+static bool device_is_mmc_special_partition(sd_device *d) {
+ const char *sysname;
+
+ assert(d);
+
+ if (sd_device_get_sysname(d, &sysname) < 0)
+ return false;
+
+ return startswith(sysname, "mmcblk") &&
+ (endswith(sysname, "rpmb") || endswith(sysname, "boot0") || endswith(sysname, "boot1"));
+}
+
+static bool device_is_block(sd_device *d) {
+ const char *ss;
+
+ assert(d);
+
+ if (sd_device_get_subsystem(d, &ss) < 0)
+ return false;
+
+ return streq(ss, "block");
+}
+
+static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ int r;
+
+ assert(d);
+ assert(ret);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_parent(e, d);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(e);
+ return 0;
+}
+
+/* how many times to wait for the device nodes to appear */
+#define N_DEVICE_NODE_LIST_ATTEMPTS 10
+
+static int wait_for_partitions_to_appear(
+ int fd,
+ sd_device *d,
+ unsigned num_partitions,
+ DissectImageFlags flags,
+ sd_device_enumerator **ret_enumerator) {
+
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *q;
+ unsigned n;
+ int r;
+
+ assert(fd >= 0);
+ assert(d);
+ assert(ret_enumerator);
+
+ r = enumerator_for_parent(d, &e);
+ if (r < 0)
+ return r;
+
+ /* Count the partitions enumerated by the kernel */
+ n = 0;
+ FOREACH_DEVICE(e, q) {
+ if (sd_device_get_devnum(q, NULL) < 0)
+ continue;
+ if (!device_is_block(q))
+ continue;
+ if (device_is_mmc_special_partition(q))
+ continue;
+
+ if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
+ r = device_wait_for_initialization(q, "block", NULL);
+ if (r < 0)
+ return r;
+ }
+
+ n++;
+ }
+
+ if (n == num_partitions + 1) {
+ *ret_enumerator = TAKE_PTR(e);
+ return 0; /* success! */
+ }
+ if (n > num_partitions + 1)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "blkid and kernel partition lists do not match.");
+
+ /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running or it
+ * got EBUSY because udev already opened the device. Let's reprobe the device, which is a synchronous
+ * call that waits until probing is complete. */
+
+ for (unsigned j = 0; ; j++) {
+ if (j++ > 20)
+ return -EBUSY;
+
+ if (ioctl(fd, BLKRRPART, 0) >= 0)
+ break;
+ r = -errno;
+ if (r == -EINVAL) {
+ struct loop_info64 info;
+
+ /* If we are running on a loop device that has partition scanning off, return
+ * an explicit recognizable error about this, so that callers can generate a
+ * proper message explaining the situation. */
+
+ if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
+ log_debug("Device is a loop device and partition scanning is off!");
+ return -EPROTONOSUPPORT;
+ }
+ }
+ if (r != -EBUSY)
+ return r;
+
+ /* If something else has the device open, such as an udev rule, the ioctl will return
+ * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a bit,
+ * and try again.
+ *
+ * This is really something they should fix in the kernel! */
+ (void) usleep(50 * USEC_PER_MSEC);
+
+ }
+
+ return -EAGAIN; /* no success yet, try again */
+}
+
+static int loop_wait_for_partitions_to_appear(
+ int fd,
+ sd_device *d,
+ unsigned num_partitions,
+ DissectImageFlags flags,
+ sd_device_enumerator **ret_enumerator) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ assert(fd >= 0);
+ assert(d);
+ assert(ret_enumerator);
+
+ log_debug("Waiting for device (parent + %d partitions) to appear...", num_partitions);
+
+ if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
+ r = device_wait_for_initialization(d, "block", &device);
+ if (r < 0)
+ return r;
+ } else
+ device = sd_device_ref(d);
+
+ for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) {
+ r = wait_for_partitions_to_appear(fd, device, num_partitions, flags, ret_enumerator);
+ if (r != -EAGAIN)
+ return r;
+ }
+
+ return log_debug_errno(SYNTHETIC_ERRNO(ENXIO),
+ "Kernel partitions dit not appear within %d attempts",
+ N_DEVICE_NODE_LIST_ATTEMPTS);
+}
+
+#endif
+
+int dissect_image(
+ int fd,
+ const void *root_hash,
+ size_t root_hash_size,
+ DissectImageFlags flags,
+ DissectedImage **ret) {
+
+#if HAVE_BLKID
+ sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ _cleanup_free_ char *generic_node = NULL;
+ sd_id128_t generic_uuid = SD_ID128_NULL;
+ const char *pttype = NULL;
+ blkid_partlist pl;
+ int r, generic_nr;
+ struct stat st;
+ sd_device *q;
+ unsigned i;
+
+ assert(fd >= 0);
+ assert(ret);
+ assert(root_hash || root_hash_size == 0);
+
+ /* Probes a disk image, and returns information about what it found in *ret.
+ *
+ * Returns -ENOPKG if no suitable partition table or file system could be found.
+ * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
+
+ if (root_hash) {
+ /* If a root hash is supplied, then we use the root partition that has a UUID that match the first
+ * 128bit of the root hash. And we use the verity partition that has a UUID that match the final
+ * 128bit. */
+
+ if (root_hash_size < sizeof(sd_id128_t))
+ return -EINVAL;
+
+ memcpy(&root_uuid, root_hash, sizeof(sd_id128_t));
+ memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
+
+ if (sd_id128_is_null(root_uuid))
+ return -EINVAL;
+ if (sd_id128_is_null(verity_uuid))
+ return -EINVAL;
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ b = blkid_new_probe();
+ if (!b)
+ return -ENOMEM;
+
+ errno = 0;
+ r = blkid_probe_set_device(b, fd, 0, 0);
+ if (r != 0)
+ return -errno ?: -ENOMEM;
+
+ if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
+ /* Look for file system superblocks, unless we only shall look for GPT partition tables */
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
+ }
+
+ blkid_probe_enable_partitions(b, 1);
+ blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (IN_SET(r, -2, 1)) {
+ log_debug("Failed to identify any partition table.");
+ return -ENOPKG;
+ }
+ if (r != 0)
+ return -errno ?: -EIO;
+
+ m = new0(DissectedImage, 1);
+ if (!m)
+ return -ENOMEM;
+
+ r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+ if (r < 0)
+ return r;
+
+ if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
+ (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
+ const char *usage = NULL;
+
+ (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
+ if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
+ _cleanup_free_ char *t = NULL, *n = NULL;
+ const char *fstype = NULL;
+
+ /* OK, we have found a file system, that's our root partition then. */
+ (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
+
+ if (fstype) {
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+ }
+
+ r = device_path_make_major_minor(st.st_mode, st.st_rdev, &n);
+ if (r < 0)
+ return r;
+
+ m->partitions[PARTITION_ROOT] = (DissectedPartition) {
+ .found = true,
+ .rw = true,
+ .partno = -1,
+ .architecture = _ARCHITECTURE_INVALID,
+ .fstype = TAKE_PTR(t),
+ .node = TAKE_PTR(n),
+ };
+
+ m->encrypted = streq_ptr(fstype, "crypto_LUKS");
+
+ r = loop_wait_for_partitions_to_appear(fd, d, 0, flags, &e);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+ }
+ }
+
+ (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
+ if (!pttype)
+ return -ENOPKG;
+
+ is_gpt = streq_ptr(pttype, "gpt");
+ is_mbr = streq_ptr(pttype, "dos");
+
+ if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
+ return -ENOPKG;
+
+ errno = 0;
+ pl = blkid_probe_get_partitions(b);
+ if (!pl)
+ return -errno ?: -ENOMEM;
+
+ r = loop_wait_for_partitions_to_appear(fd, d, blkid_partlist_numof_partitions(pl), flags, &e);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, q) {
+ unsigned long long pflags;
+ blkid_partition pp;
+ const char *node;
+ dev_t qn;
+ int nr;
+
+ r = sd_device_get_devnum(q, &qn);
+ if (r < 0)
+ continue;
+
+ if (st.st_rdev == qn)
+ continue;
+
+ if (!device_is_block(q))
+ continue;
+
+ if (device_is_mmc_special_partition(q))
+ continue;
+
+ r = sd_device_get_devname(q, &node);
+ if (r < 0)
+ continue;
+
+ pp = blkid_partlist_devno_to_partition(pl, qn);
+ if (!pp)
+ continue;
+
+ pflags = blkid_partition_get_flags(pp);
+
+ nr = blkid_partition_get_partno(pp);
+ if (nr < 0)
+ continue;
+
+ if (is_gpt) {
+ int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
+ const char *stype, *sid, *fstype = NULL;
+ sd_id128_t type_id, id;
+ bool rw = true;
+
+ sid = blkid_partition_get_uuid(pp);
+ if (!sid)
+ continue;
+ if (sd_id128_from_string(sid, &id) < 0)
+ continue;
+
+ stype = blkid_partition_get_type_string(pp);
+ if (!stype)
+ continue;
+ if (sd_id128_from_string(stype, &type_id) < 0)
+ continue;
+
+ if (sd_id128_equal(type_id, GPT_HOME)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_HOME;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+ } else if (sd_id128_equal(type_id, GPT_SRV)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_SRV;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+ } else if (sd_id128_equal(type_id, GPT_ESP)) {
+
+ /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined
+ * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the
+ * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */
+
+ if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
+ continue;
+
+ designator = PARTITION_ESP;
+ fstype = "vfat";
+ }
+#ifdef GPT_ROOT_NATIVE
+ else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* If a root ID is specified, ignore everything but the root id */
+ if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT;
+ architecture = native_architecture();
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+ } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ m->can_verity = true;
+
+ /* Ignore verity unless a root hash is specified */
+ if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT_VERITY;
+ fstype = "DM_verity_hash";
+ architecture = native_architecture();
+ rw = false;
+ }
+#endif
+#ifdef GPT_ROOT_SECONDARY
+ else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* If a root ID is specified, ignore everything but the root id */
+ if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT_SECONDARY;
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+ } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ m->can_verity = true;
+
+ /* Ignore verity unless root has is specified */
+ if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT_SECONDARY_VERITY;
+ fstype = "DM_verity_hash";
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = false;
+ }
+#endif
+ else if (sd_id128_equal(type_id, GPT_SWAP)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_SWAP;
+ fstype = "swap";
+ } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ if (generic_node)
+ multiple_generic = true;
+ else {
+ generic_nr = nr;
+ generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
+ generic_uuid = id;
+ generic_node = strdup(node);
+ if (!generic_node)
+ return -ENOMEM;
+ }
+ }
+
+ if (designator != _PARTITION_DESIGNATOR_INVALID) {
+ _cleanup_free_ char *t = NULL, *n = NULL;
+
+ /* First one wins */
+ if (m->partitions[designator].found)
+ continue;
+
+ if (fstype) {
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+ }
+
+ n = strdup(node);
+ if (!n)
+ return -ENOMEM;
+
+ m->partitions[designator] = (DissectedPartition) {
+ .found = true,
+ .partno = nr,
+ .rw = rw,
+ .architecture = architecture,
+ .node = TAKE_PTR(n),
+ .fstype = TAKE_PTR(t),
+ .uuid = id,
+ };
+ }
+
+ } else if (is_mbr) {
+
+ if (pflags != 0x80) /* Bootable flag */
+ continue;
+
+ if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
+ continue;
+
+ if (generic_node)
+ multiple_generic = true;
+ else {
+ generic_nr = nr;
+ generic_rw = true;
+ generic_node = strdup(node);
+ if (!generic_node)
+ return -ENOMEM;
+ }
+ }
+ }
+
+ if (!m->partitions[PARTITION_ROOT].found) {
+ /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
+ * either, then check if there's a single generic one, and use that. */
+
+ if (m->partitions[PARTITION_ROOT_VERITY].found)
+ return -EADDRNOTAVAIL;
+
+ if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
+ m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
+ zero(m->partitions[PARTITION_ROOT_SECONDARY]);
+
+ m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
+ zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
+
+ } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
+
+ /* If the root has was set, then we won't fallback to a generic node, because the root hash
+ * decides */
+ if (root_hash)
+ return -EADDRNOTAVAIL;
+
+ /* If we didn't find a generic node, then we can't fix this up either */
+ if (!generic_node)
+ return -ENXIO;
+
+ /* If we didn't find a properly marked root partition, but we did find a single suitable
+ * generic Linux partition, then use this as root partition, if the caller asked for it. */
+ if (multiple_generic)
+ return -ENOTUNIQ;
+
+ m->partitions[PARTITION_ROOT] = (DissectedPartition) {
+ .found = true,
+ .rw = generic_rw,
+ .partno = generic_nr,
+ .architecture = _ARCHITECTURE_INVALID,
+ .node = TAKE_PTR(generic_node),
+ .uuid = generic_uuid,
+ };
+ }
+ }
+
+ if (root_hash) {
+ if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
+ return -EADDRNOTAVAIL;
+
+ /* If we found the primary root with the hash, then we definitely want to suppress any secondary root
+ * (which would be weird, after all the root hash should only be assigned to one pair of
+ * partitions... */
+ m->partitions[PARTITION_ROOT_SECONDARY].found = false;
+ m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
+
+ /* If we found a verity setup, then the root partition is necessarily read-only. */
+ m->partitions[PARTITION_ROOT].rw = false;
+
+ m->verity = true;
+ }
+
+ blkid_free_probe(b);
+ b = NULL;
+
+ /* Fill in file system types if we don't know them yet. */
+ for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ DissectedPartition *p = m->partitions + i;
+
+ if (!p->found)
+ continue;
+
+ if (!p->fstype && p->node) {
+ r = probe_filesystem(p->node, &p->fstype);
+ if (r < 0 && r != -EUCLEAN)
+ return r;
+ }
+
+ if (streq_ptr(p->fstype, "crypto_LUKS"))
+ m->encrypted = true;
+
+ if (p->fstype && fstype_is_ro(p->fstype))
+ p->rw = false;
+ }
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+DissectedImage* dissected_image_unref(DissectedImage *m) {
+ unsigned i;
+
+ if (!m)
+ return NULL;
+
+ for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ free(m->partitions[i].fstype);
+ free(m->partitions[i].node);
+ free(m->partitions[i].decrypted_fstype);
+ free(m->partitions[i].decrypted_node);
+ }
+
+ free(m->hostname);
+ strv_free(m->machine_info);
+ strv_free(m->os_release);
+
+ return mfree(m);
+}
+
+static int is_loop_device(const char *path) {
+ char s[SYS_BLOCK_PATH_MAX("/../loop/")];
+ struct stat st;
+
+ assert(path);
+
+ if (stat(path, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ xsprintf_sys_block_path(s, "/loop/", st.st_dev);
+ if (access(s, F_OK) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
+ xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
+ if (access(s, F_OK) < 0)
+ return errno == ENOENT ? false : -errno;
+ }
+
+ return true;
+}
+
+static int mount_partition(
+ DissectedPartition *m,
+ const char *where,
+ const char *directory,
+ uid_t uid_shift,
+ DissectImageFlags flags) {
+
+ _cleanup_free_ char *chased = NULL, *options = NULL;
+ const char *p, *node, *fstype;
+ bool rw;
+ int r;
+
+ assert(m);
+ assert(where);
+
+ node = m->decrypted_node ?: m->node;
+ fstype = m->decrypted_fstype ?: m->fstype;
+
+ if (!m->found || !node || !fstype)
+ return 0;
+
+ /* Stacked encryption? Yuck */
+ if (streq_ptr(fstype, "crypto_LUKS"))
+ return -ELOOP;
+
+ rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
+
+ if (directory) {
+ r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased);
+ if (r < 0)
+ return r;
+
+ p = chased;
+ } else
+ p = where;
+
+ /* If requested, turn on discard support. */
+ if (fstype_can_discard(fstype) &&
+ ((flags & DISSECT_IMAGE_DISCARD) ||
+ ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) {
+ options = strdup("discard");
+ if (!options)
+ return -ENOMEM;
+ }
+
+ if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
+ _cleanup_free_ char *uid_option = NULL;
+
+ if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
+ return -ENOMEM;
+
+ if (!strextend_with_separator(&options, ",", uid_option, NULL))
+ return -ENOMEM;
+ }
+
+ return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
+}
+
+int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
+ int r;
+
+ assert(m);
+ assert(where);
+
+ if (!m->partitions[PARTITION_ROOT].found)
+ return -ENXIO;
+
+ if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
+ r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ if (flags & DISSECT_IMAGE_VALIDATE_OS) {
+ r = path_is_os_tree(where);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EMEDIUMTYPE;
+ }
+ }
+
+ if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
+ return 0;
+
+ r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ if (m->partitions[PARTITION_ESP].found) {
+ const char *mp;
+
+ /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
+
+ FOREACH_STRING(mp, "/efi", "/boot") {
+ _cleanup_free_ char *p = NULL;
+
+ r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
+ if (r < 0)
+ continue;
+
+ r = dir_is_empty(p);
+ if (r > 0) {
+ r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+#if HAVE_LIBCRYPTSETUP
+typedef struct DecryptedPartition {
+ struct crypt_device *device;
+ char *name;
+ bool relinquished;
+} DecryptedPartition;
+
+struct DecryptedImage {
+ DecryptedPartition *decrypted;
+ size_t n_decrypted;
+ size_t n_allocated;
+};
+#endif
+
+DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
+#if HAVE_LIBCRYPTSETUP
+ size_t i;
+ int r;
+
+ if (!d)
+ return NULL;
+
+ for (i = 0; i < d->n_decrypted; i++) {
+ DecryptedPartition *p = d->decrypted + i;
+
+ if (p->device && p->name && !p->relinquished) {
+ r = crypt_deactivate(p->device, p->name);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
+ }
+
+ if (p->device)
+ crypt_free(p->device);
+ free(p->name);
+ }
+
+ free(d);
+#endif
+ return NULL;
+}
+
+#if HAVE_LIBCRYPTSETUP
+
+static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
+ _cleanup_free_ char *name = NULL, *node = NULL;
+ const char *base;
+
+ assert(original_node);
+ assert(suffix);
+ assert(ret_name);
+ assert(ret_node);
+
+ base = strrchr(original_node, '/');
+ if (!base)
+ return -EINVAL;
+ base++;
+ if (isempty(base))
+ return -EINVAL;
+
+ name = strjoin(base, suffix);
+ if (!name)
+ return -ENOMEM;
+ if (!filename_is_valid(name))
+ return -EINVAL;
+
+ node = strjoin(crypt_get_dir(), "/", name);
+ if (!node)
+ return -ENOMEM;
+
+ *ret_name = TAKE_PTR(name);
+ *ret_node = TAKE_PTR(node);
+
+ return 0;
+}
+
+static int decrypt_partition(
+ DissectedPartition *m,
+ const char *passphrase,
+ DissectImageFlags flags,
+ DecryptedImage *d) {
+
+ _cleanup_free_ char *node = NULL, *name = NULL;
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ if (!m->found || !m->node || !m->fstype)
+ return 0;
+
+ if (!streq(m->fstype, "crypto_LUKS"))
+ return 0;
+
+ if (!passphrase)
+ return -ENOKEY;
+
+ r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
+ return -ENOMEM;
+
+ r = crypt_init(&cd, m->node);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
+
+ r = crypt_load(cd, CRYPT_LUKS, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load LUKS metadata: %m");
+
+ r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
+ ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
+ ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to activate LUKS device: %m");
+ return r == -EPERM ? -EKEYREJECTED : r;
+ }
+
+ d->decrypted[d->n_decrypted].name = TAKE_PTR(name);
+ d->decrypted[d->n_decrypted].device = TAKE_PTR(cd);
+ d->n_decrypted++;
+
+ m->decrypted_node = TAKE_PTR(node);
+
+ return 0;
+}
+
+static int verity_partition(
+ DissectedPartition *m,
+ DissectedPartition *v,
+ const void *root_hash,
+ size_t root_hash_size,
+ DissectImageFlags flags,
+ DecryptedImage *d) {
+
+ _cleanup_free_ char *node = NULL, *name = NULL;
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ int r;
+
+ assert(m);
+ assert(v);
+
+ if (!root_hash)
+ return 0;
+
+ if (!m->found || !m->node || !m->fstype)
+ return 0;
+ if (!v->found || !v->node || !v->fstype)
+ return 0;
+
+ if (!streq(v->fstype, "DM_verity_hash"))
+ return 0;
+
+ r = make_dm_name_and_node(m->node, "-verity", &name, &node);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
+ return -ENOMEM;
+
+ r = crypt_init(&cd, v->node);
+ if (r < 0)
+ return r;
+
+ r = crypt_load(cd, CRYPT_VERITY, NULL);
+ if (r < 0)
+ return r;
+
+ r = crypt_set_data_device(cd, m->node);
+ if (r < 0)
+ return r;
+
+ r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY);
+ if (r < 0)
+ return r;
+
+ d->decrypted[d->n_decrypted].name = TAKE_PTR(name);
+ d->decrypted[d->n_decrypted].device = TAKE_PTR(cd);
+ d->n_decrypted++;
+
+ m->decrypted_node = TAKE_PTR(node);
+
+ return 0;
+}
+#endif
+
+int dissected_image_decrypt(
+ DissectedImage *m,
+ const char *passphrase,
+ const void *root_hash,
+ size_t root_hash_size,
+ DissectImageFlags flags,
+ DecryptedImage **ret) {
+
+#if HAVE_LIBCRYPTSETUP
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
+ unsigned i;
+ int r;
+#endif
+
+ assert(m);
+ assert(root_hash || root_hash_size == 0);
+
+ /* Returns:
+ *
+ * = 0 → There was nothing to decrypt
+ * > 0 → Decrypted successfully
+ * -ENOKEY → There's something to decrypt but no key was supplied
+ * -EKEYREJECTED → Passed key was not correct
+ */
+
+ if (root_hash && root_hash_size < sizeof(sd_id128_t))
+ return -EINVAL;
+
+ if (!m->encrypted && !m->verity) {
+ *ret = NULL;
+ return 0;
+ }
+
+#if HAVE_LIBCRYPTSETUP
+ d = new0(DecryptedImage, 1);
+ if (!d)
+ return -ENOMEM;
+
+ for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ DissectedPartition *p = m->partitions + i;
+ int k;
+
+ if (!p->found)
+ continue;
+
+ r = decrypt_partition(p, passphrase, flags, d);
+ if (r < 0)
+ return r;
+
+ k = PARTITION_VERITY_OF(i);
+ if (k >= 0) {
+ r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d);
+ if (r < 0)
+ return r;
+ }
+
+ if (!p->decrypted_fstype && p->decrypted_node) {
+ r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
+ if (r < 0 && r != -EUCLEAN)
+ return r;
+ }
+ }
+
+ *ret = TAKE_PTR(d);
+
+ return 1;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+int dissected_image_decrypt_interactively(
+ DissectedImage *m,
+ const char *passphrase,
+ const void *root_hash,
+ size_t root_hash_size,
+ DissectImageFlags flags,
+ DecryptedImage **ret) {
+
+ _cleanup_strv_free_erase_ char **z = NULL;
+ int n = 3, r;
+
+ if (passphrase)
+ n--;
+
+ for (;;) {
+ r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret);
+ if (r >= 0)
+ return r;
+ if (r == -EKEYREJECTED)
+ log_error_errno(r, "Incorrect passphrase, try again!");
+ else if (r != -ENOKEY)
+ return log_error_errno(r, "Failed to decrypt image: %m");
+
+ if (--n < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EKEYREJECTED),
+ "Too many retries.");
+
+ z = strv_free(z);
+
+ r = ask_password_auto("Please enter image passphrase:", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query for passphrase: %m");
+
+ passphrase = z[0];
+ }
+}
+
+#if HAVE_LIBCRYPTSETUP
+static int deferred_remove(DecryptedPartition *p) {
+ struct dm_ioctl dm = {
+ .version = {
+ DM_VERSION_MAJOR,
+ DM_VERSION_MINOR,
+ DM_VERSION_PATCHLEVEL
+ },
+ .data_size = sizeof(dm),
+ .flags = DM_DEFERRED_REMOVE,
+ };
+
+ _cleanup_close_ int fd = -1;
+
+ assert(p);
+
+ /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
+
+ fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (strlen(p->name) > sizeof(dm.name))
+ return -ENAMETOOLONG;
+
+ strncpy(dm.name, p->name, sizeof(dm.name));
+
+ if (ioctl(fd, DM_DEV_REMOVE, &dm))
+ return -errno;
+
+ return 0;
+}
+#endif
+
+int decrypted_image_relinquish(DecryptedImage *d) {
+
+#if HAVE_LIBCRYPTSETUP
+ size_t i;
+ int r;
+#endif
+
+ assert(d);
+
+ /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
+ * that we don't clean it up ourselves either anymore */
+
+#if HAVE_LIBCRYPTSETUP
+ for (i = 0; i < d->n_decrypted; i++) {
+ DecryptedPartition *p = d->decrypted + i;
+
+ if (p->relinquished)
+ continue;
+
+ r = deferred_remove(p);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
+
+ p->relinquished = true;
+ }
+#endif
+
+ return 0;
+}
+
+int root_hash_load(const char *image, void **ret, size_t *ret_size) {
+ _cleanup_free_ char *text = NULL;
+ _cleanup_free_ void *k = NULL;
+ size_t l;
+ int r;
+
+ assert(image);
+ assert(ret);
+ assert(ret_size);
+
+ if (is_device_path(image)) {
+ /* If we are asked to load the root hash for a device node, exit early */
+ *ret = NULL;
+ *ret_size = 0;
+ return 0;
+ }
+
+ r = getxattr_malloc(image, "user.verity.roothash", &text, true);
+ if (r < 0) {
+ char *fn, *e, *n;
+
+ if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT))
+ return r;
+
+ fn = newa(char, strlen(image) + STRLEN(".roothash") + 1);
+ n = stpcpy(fn, image);
+ e = endswith(fn, ".raw");
+ if (e)
+ n = e;
+
+ strcpy(n, ".roothash");
+
+ r = read_one_line_file(fn, &text);
+ if (r == -ENOENT) {
+ *ret = NULL;
+ *ret_size = 0;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+ }
+
+ r = unhexmem(text, strlen(text), &k, &l);
+ if (r < 0)
+ return r;
+ if (l < sizeof(sd_id128_t))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(k);
+ *ret_size = l;
+
+ return 1;
+}
+
+int dissected_image_acquire_metadata(DissectedImage *m) {
+
+ enum {
+ META_HOSTNAME,
+ META_MACHINE_ID,
+ META_MACHINE_INFO,
+ META_OS_RELEASE,
+ _META_MAX,
+ };
+
+ static const char *const paths[_META_MAX] = {
+ [META_HOSTNAME] = "/etc/hostname\0",
+ [META_MACHINE_ID] = "/etc/machine-id\0",
+ [META_MACHINE_INFO] = "/etc/machine-info\0",
+ [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0",
+ };
+
+ _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
+ _cleanup_(rmdir_and_freep) char *t = NULL;
+ _cleanup_(sigkill_waitp) pid_t child = 0;
+ sd_id128_t machine_id = SD_ID128_NULL;
+ _cleanup_free_ char *hostname = NULL;
+ unsigned n_meta_initialized = 0, k;
+ int fds[2 * _META_MAX], r;
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ assert(m);
+
+ for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
+ if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
+ if (r < 0)
+ goto finish;
+
+ r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child);
+ if (r < 0)
+ goto finish;
+ if (r == 0) {
+ r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_VALIDATE_OS);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to mount dissected image: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ for (k = 0; k < _META_MAX; k++) {
+ _cleanup_close_ int fd = -1;
+ const char *p;
+
+ fds[2*k] = safe_close(fds[2*k]);
+
+ NULSTR_FOREACH(p, paths[k]) {
+ fd = chase_symlinks_and_open(p, t, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
+ if (fd >= 0)
+ break;
+ }
+ if (fd < 0) {
+ log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]);
+ continue;
+ }
+
+ r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ fds[2*k+1] = safe_close(fds[2*k+1]);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ for (k = 0; k < _META_MAX; k++) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ fds[2*k+1] = safe_close(fds[2*k+1]);
+
+ f = fdopen(fds[2*k], "r");
+ if (!f) {
+ r = -errno;
+ goto finish;
+ }
+
+ fds[2*k] = -1;
+
+ switch (k) {
+
+ case META_HOSTNAME:
+ r = read_etc_hostname_stream(f, &hostname);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /etc/hostname: %m");
+
+ break;
+
+ case META_MACHINE_ID: {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /etc/machine-id: %m");
+ else if (r == 33) {
+ r = sd_id128_from_string(line, &machine_id);
+ if (r < 0)
+ log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
+ } else if (r == 0)
+ log_debug("/etc/machine-id file is empty.");
+ else
+ log_debug("/etc/machine-id has unexpected length %i.", r);
+
+ break;
+ }
+
+ case META_MACHINE_INFO:
+ r = load_env_file_pairs(f, "machine-info", &machine_info);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /etc/machine-info: %m");
+
+ break;
+
+ case META_OS_RELEASE:
+ r = load_env_file_pairs(f, "os-release", &os_release);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read OS release file: %m");
+
+ break;
+ }
+ }
+
+ r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
+ child = 0;
+ if (r < 0)
+ goto finish;
+ if (r != EXIT_SUCCESS)
+ return -EPROTO;
+
+ free_and_replace(m->hostname, hostname);
+ m->machine_id = machine_id;
+ strv_free_and_replace(m->machine_info, machine_info);
+ strv_free_and_replace(m->os_release, os_release);
+
+finish:
+ for (k = 0; k < n_meta_initialized; k++)
+ safe_close_pair(fds + 2*k);
+
+ return r;
+}
+
+int dissect_image_and_warn(
+ int fd,
+ const char *name,
+ const void *root_hash,
+ size_t root_hash_size,
+ DissectImageFlags flags,
+ DissectedImage **ret) {
+
+ _cleanup_free_ char *buffer = NULL;
+ int r;
+
+ if (!name) {
+ r = fd_get_path(fd, &buffer);
+ if (r < 0)
+ return r;
+
+ name = buffer;
+ }
+
+ r = dissect_image(fd, root_hash, root_hash_size, flags, ret);
+
+ switch (r) {
+
+ case -EOPNOTSUPP:
+ return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support.");
+
+ case -ENOPKG:
+ return log_error_errno(r, "Couldn't identify a suitable partition table or file system in '%s'.", name);
+
+ case -EADDRNOTAVAIL:
+ return log_error_errno(r, "No root partition for specified root hash found in '%s'.", name);
+
+ case -ENOTUNIQ:
+ return log_error_errno(r, "Multiple suitable root partitions found in image '%s'.", name);
+
+ case -ENXIO:
+ return log_error_errno(r, "No suitable root partition found in image '%s'.", name);
+
+ case -EPROTONOSUPPORT:
+ return log_error_errno(r, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", name);
+
+ default:
+ if (r < 0)
+ return log_error_errno(r, "Failed to dissect image '%s': %m", name);
+
+ return r;
+ }
+}
+
+static const char *const partition_designator_table[] = {
+ [PARTITION_ROOT] = "root",
+ [PARTITION_ROOT_SECONDARY] = "root-secondary",
+ [PARTITION_HOME] = "home",
+ [PARTITION_SRV] = "srv",
+ [PARTITION_ESP] = "esp",
+ [PARTITION_SWAP] = "swap",
+ [PARTITION_ROOT_VERITY] = "root-verity",
+ [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);
diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h
new file mode 100644
index 0000000..f50b40e
--- /dev/null
+++ b/src/shared/dissect-image.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+#include "macro.h"
+
+typedef struct DissectedImage DissectedImage;
+typedef struct DissectedPartition DissectedPartition;
+typedef struct DecryptedImage DecryptedImage;
+
+struct DissectedPartition {
+ bool found:1;
+ bool rw:1;
+ int partno; /* -1 if there was no partition and the images contains a file system directly */
+ int architecture; /* Intended architecture: either native, secondary or unset (-1). */
+ sd_id128_t uuid; /* Partition entry UUID as reported by the GPT */
+ char *fstype;
+ char *node;
+ char *decrypted_node;
+ char *decrypted_fstype;
+};
+
+enum {
+ PARTITION_ROOT,
+ PARTITION_ROOT_SECONDARY, /* Secondary architecture */
+ PARTITION_HOME,
+ PARTITION_SRV,
+ PARTITION_ESP,
+ PARTITION_SWAP,
+ PARTITION_ROOT_VERITY, /* verity data for the PARTITION_ROOT partition */
+ PARTITION_ROOT_SECONDARY_VERITY, /* verity data for the PARTITION_ROOT_SECONDARY partition */
+ _PARTITION_DESIGNATOR_MAX,
+ _PARTITION_DESIGNATOR_INVALID = -1
+};
+
+static inline int PARTITION_VERITY_OF(int p) {
+ if (p == PARTITION_ROOT)
+ return PARTITION_ROOT_VERITY;
+ if (p == PARTITION_ROOT_SECONDARY)
+ return PARTITION_ROOT_SECONDARY_VERITY;
+ return _PARTITION_DESIGNATOR_INVALID;
+}
+
+typedef enum DissectImageFlags {
+ DISSECT_IMAGE_READ_ONLY = 1 << 0,
+ DISSECT_IMAGE_DISCARD_ON_LOOP = 1 << 1, /* Turn on "discard" if on a loop device and file system supports it */
+ DISSECT_IMAGE_DISCARD = 1 << 2, /* Turn on "discard" if file system supports it, on all block devices */
+ DISSECT_IMAGE_DISCARD_ON_CRYPTO = 1 << 3, /* Turn on "discard" also on crypto devices */
+ DISSECT_IMAGE_DISCARD_ANY = DISSECT_IMAGE_DISCARD_ON_LOOP |
+ DISSECT_IMAGE_DISCARD |
+ DISSECT_IMAGE_DISCARD_ON_CRYPTO,
+ DISSECT_IMAGE_GPT_ONLY = 1 << 4, /* Only recognize images with GPT partition tables */
+ DISSECT_IMAGE_REQUIRE_ROOT = 1 << 5, /* Don't accept disks without root partition */
+ DISSECT_IMAGE_MOUNT_ROOT_ONLY = 1 << 6, /* Mount only the root partition */
+ DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY = 1 << 7, /* Mount only non-root partitions */
+ DISSECT_IMAGE_VALIDATE_OS = 1 << 8, /* Refuse mounting images that aren't identifyable as OS images */
+ DISSECT_IMAGE_NO_UDEV = 1 << 9, /* Don't wait for udev initializing things */
+} DissectImageFlags;
+
+struct DissectedImage {
+ bool encrypted:1;
+ bool verity:1; /* verity available and usable */
+ bool can_verity:1; /* verity available, but not necessarily used */
+
+ DissectedPartition partitions[_PARTITION_DESIGNATOR_MAX];
+
+ char *hostname;
+ sd_id128_t machine_id;
+ char **machine_info;
+ char **os_release;
+};
+
+int probe_filesystem(const char *node, char **ret_fstype);
+int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret);
+int dissect_image_and_warn(int fd, const char *name, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret);
+
+DissectedImage* dissected_image_unref(DissectedImage *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref);
+
+int dissected_image_decrypt(DissectedImage *m, const char *passphrase, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret);
+int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret);
+int dissected_image_mount(DissectedImage *m, const char *dest, uid_t uid_shift, DissectImageFlags flags);
+
+int dissected_image_acquire_metadata(DissectedImage *m);
+
+DecryptedImage* decrypted_image_unref(DecryptedImage *p);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DecryptedImage*, decrypted_image_unref);
+int decrypted_image_relinquish(DecryptedImage *d);
+
+const char* partition_designator_to_string(int i) _const_;
+int partition_designator_from_string(const char *name) _pure_;
+
+int root_hash_load(const char *image, void **ret, size_t *ret_size);
diff --git a/src/shared/dns-domain.c b/src/shared/dns-domain.c
new file mode 100644
index 0000000..4b31cb3
--- /dev/null
+++ b/src/shared/dns-domain.c
@@ -0,0 +1,1375 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_LIBIDN2
+# include <idn2.h>
+#elif HAVE_LIBIDN
+# include <idna.h>
+# include <stringprep.h>
+#endif
+
+#include <endian.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "hashmap.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+int dns_label_unescape(const char **name, char *dest, size_t sz, DNSLabelFlags flags) {
+ const char *n;
+ char *d, last_char = 0;
+ int r = 0;
+
+ assert(name);
+ assert(*name);
+
+ n = *name;
+ d = dest;
+
+ for (;;) {
+ if (*n == 0 || *n == '.') {
+ if (FLAGS_SET(flags, DNS_LABEL_LDH) && last_char == '-')
+ /* Trailing dash */
+ return -EINVAL;
+
+ if (*n == '.')
+ n++;
+ break;
+ }
+
+ if (r >= DNS_LABEL_MAX)
+ return -EINVAL;
+
+ if (sz <= 0)
+ return -ENOBUFS;
+
+ if (*n == '\\') {
+ /* Escaped character */
+ if (FLAGS_SET(flags, DNS_LABEL_NO_ESCAPES))
+ return -EINVAL;
+
+ n++;
+
+ if (*n == 0)
+ /* Ending NUL */
+ return -EINVAL;
+
+ else if (IN_SET(*n, '\\', '.')) {
+ /* Escaped backslash or dot */
+
+ if (FLAGS_SET(flags, DNS_LABEL_LDH))
+ return -EINVAL;
+
+ last_char = *n;
+ if (d)
+ *(d++) = *n;
+ sz--;
+ r++;
+ n++;
+
+ } else if (n[0] >= '0' && n[0] <= '9') {
+ unsigned k;
+
+ /* Escaped literal ASCII character */
+
+ if (!(n[1] >= '0' && n[1] <= '9') ||
+ !(n[2] >= '0' && n[2] <= '9'))
+ return -EINVAL;
+
+ k = ((unsigned) (n[0] - '0') * 100) +
+ ((unsigned) (n[1] - '0') * 10) +
+ ((unsigned) (n[2] - '0'));
+
+ /* Don't allow anything that doesn't
+ * fit in 8bit. Note that we do allow
+ * control characters, as some servers
+ * (e.g. cloudflare) are happy to
+ * generate labels with them
+ * inside. */
+ if (k > 255)
+ return -EINVAL;
+
+ if (FLAGS_SET(flags, DNS_LABEL_LDH) &&
+ !valid_ldh_char((char) k))
+ return -EINVAL;
+
+ last_char = (char) k;
+ if (d)
+ *(d++) = (char) k;
+ sz--;
+ r++;
+
+ n += 3;
+ } else
+ return -EINVAL;
+
+ } else if ((uint8_t) *n >= (uint8_t) ' ' && *n != 127) {
+
+ /* Normal character */
+
+ if (FLAGS_SET(flags, DNS_LABEL_LDH)) {
+ if (!valid_ldh_char(*n))
+ return -EINVAL;
+ if (r == 0 && *n == '-')
+ /* Leading dash */
+ return -EINVAL;
+ }
+
+ last_char = *n;
+ if (d)
+ *(d++) = *n;
+ sz--;
+ r++;
+ n++;
+ } else
+ return -EINVAL;
+ }
+
+ /* Empty label that is not at the end? */
+ if (r == 0 && *n)
+ return -EINVAL;
+
+ /* More than one trailing dot? */
+ if (*n == '.')
+ return -EINVAL;
+
+ if (sz >= 1 && d)
+ *d = 0;
+
+ *name = n;
+ return r;
+}
+
+/* @label_terminal: terminal character of a label, updated to point to the terminal character of
+ * the previous label (always skipping one dot) or to NULL if there are no more
+ * labels. */
+int dns_label_unescape_suffix(const char *name, const char **label_terminal, char *dest, size_t sz) {
+ const char *terminal;
+ int r;
+
+ assert(name);
+ assert(label_terminal);
+ assert(dest);
+
+ /* no more labels */
+ if (!*label_terminal) {
+ if (sz >= 1)
+ *dest = 0;
+
+ return 0;
+ }
+
+ terminal = *label_terminal;
+ assert(IN_SET(*terminal, 0, '.'));
+
+ /* Skip current terminal character (and accept domain names ending it ".") */
+ if (*terminal == 0)
+ terminal--;
+ if (terminal >= name && *terminal == '.')
+ terminal--;
+
+ /* Point name to the last label, and terminal to the preceding terminal symbol (or make it a NULL pointer) */
+ for (;;) {
+ if (terminal < name) {
+ /* Reached the first label, so indicate that there are no more */
+ terminal = NULL;
+ break;
+ }
+
+ /* Find the start of the last label */
+ if (*terminal == '.') {
+ const char *y;
+ unsigned slashes = 0;
+
+ for (y = terminal - 1; y >= name && *y == '\\'; y--)
+ slashes++;
+
+ if (slashes % 2 == 0) {
+ /* The '.' was not escaped */
+ name = terminal + 1;
+ break;
+ } else {
+ terminal = y;
+ continue;
+ }
+ }
+
+ terminal--;
+ }
+
+ r = dns_label_unescape(&name, dest, sz, 0);
+ if (r < 0)
+ return r;
+
+ *label_terminal = terminal;
+
+ return r;
+}
+
+int dns_label_escape(const char *p, size_t l, char *dest, size_t sz) {
+ char *q;
+
+ /* DNS labels must be between 1 and 63 characters long. A
+ * zero-length label does not exist. See RFC 2182, Section
+ * 11. */
+
+ if (l <= 0 || l > DNS_LABEL_MAX)
+ return -EINVAL;
+ if (sz < 1)
+ return -ENOBUFS;
+
+ assert(p);
+ assert(dest);
+
+ q = dest;
+ while (l > 0) {
+
+ if (IN_SET(*p, '.', '\\')) {
+
+ /* Dot or backslash */
+
+ if (sz < 3)
+ return -ENOBUFS;
+
+ *(q++) = '\\';
+ *(q++) = *p;
+
+ sz -= 2;
+
+ } else if (IN_SET(*p, '_', '-') ||
+ (*p >= '0' && *p <= '9') ||
+ (*p >= 'a' && *p <= 'z') ||
+ (*p >= 'A' && *p <= 'Z')) {
+
+ /* Proper character */
+
+ if (sz < 2)
+ return -ENOBUFS;
+
+ *(q++) = *p;
+ sz -= 1;
+
+ } else {
+
+ /* Everything else */
+
+ if (sz < 5)
+ return -ENOBUFS;
+
+ *(q++) = '\\';
+ *(q++) = '0' + (char) ((uint8_t) *p / 100);
+ *(q++) = '0' + (char) (((uint8_t) *p / 10) % 10);
+ *(q++) = '0' + (char) ((uint8_t) *p % 10);
+
+ sz -= 4;
+ }
+
+ p++;
+ l--;
+ }
+
+ *q = 0;
+ return (int) (q - dest);
+}
+
+int dns_label_escape_new(const char *p, size_t l, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ if (l <= 0 || l > DNS_LABEL_MAX)
+ return -EINVAL;
+
+ s = new(char, DNS_LABEL_ESCAPED_MAX);
+ if (!s)
+ return -ENOMEM;
+
+ r = dns_label_escape(p, l, s, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(s);
+
+ return r;
+}
+
+#if HAVE_LIBIDN
+int dns_label_apply_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) {
+ _cleanup_free_ uint32_t *input = NULL;
+ size_t input_size, l;
+ const char *p;
+ bool contains_8bit = false;
+ char buffer[DNS_LABEL_MAX+1];
+
+ assert(encoded);
+ assert(decoded);
+
+ /* Converts an U-label into an A-label */
+
+ if (encoded_size <= 0)
+ return -EINVAL;
+
+ for (p = encoded; p < encoded + encoded_size; p++)
+ if ((uint8_t) *p > 127)
+ contains_8bit = true;
+
+ if (!contains_8bit) {
+ if (encoded_size > DNS_LABEL_MAX)
+ return -EINVAL;
+
+ return 0;
+ }
+
+ input = stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size);
+ if (!input)
+ return -ENOMEM;
+
+ if (idna_to_ascii_4i(input, input_size, buffer, 0) != 0)
+ return -EINVAL;
+
+ l = strlen(buffer);
+
+ /* Verify that the result is not longer than one DNS label. */
+ if (l <= 0 || l > DNS_LABEL_MAX)
+ return -EINVAL;
+ if (l > decoded_max)
+ return -ENOBUFS;
+
+ memcpy(decoded, buffer, l);
+
+ /* If there's room, append a trailing NUL byte, but only then */
+ if (decoded_max > l)
+ decoded[l] = 0;
+
+ return (int) l;
+}
+
+int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) {
+ size_t input_size, output_size;
+ _cleanup_free_ uint32_t *input = NULL;
+ _cleanup_free_ char *result = NULL;
+ uint32_t *output = NULL;
+ size_t w;
+
+ /* To be invoked after unescaping. Converts an A-label into an U-label. */
+
+ assert(encoded);
+ assert(decoded);
+
+ if (encoded_size <= 0 || encoded_size > DNS_LABEL_MAX)
+ return -EINVAL;
+
+ if (!memory_startswith(encoded, encoded_size, IDNA_ACE_PREFIX))
+ return 0;
+
+ input = stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size);
+ if (!input)
+ return -ENOMEM;
+
+ output_size = input_size;
+ output = newa(uint32_t, output_size);
+
+ idna_to_unicode_44i(input, input_size, output, &output_size, 0);
+
+ result = stringprep_ucs4_to_utf8(output, output_size, NULL, &w);
+ if (!result)
+ return -ENOMEM;
+ if (w <= 0)
+ return -EINVAL;
+ if (w > decoded_max)
+ return -ENOBUFS;
+
+ memcpy(decoded, result, w);
+
+ /* Append trailing NUL byte if there's space, but only then. */
+ if (decoded_max > w)
+ decoded[w] = 0;
+
+ return w;
+}
+#endif
+
+int dns_name_concat(const char *a, const char *b, DNSLabelFlags flags, char **_ret) {
+ _cleanup_free_ char *ret = NULL;
+ size_t n = 0, allocated = 0;
+ const char *p;
+ bool first = true;
+ int r;
+
+ if (a)
+ p = a;
+ else if (b)
+ p = TAKE_PTR(b);
+ else
+ goto finish;
+
+ for (;;) {
+ char label[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&p, label, sizeof label, flags);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (*p != 0)
+ return -EINVAL;
+
+ if (b) {
+ /* Now continue with the second string, if there is one */
+ p = TAKE_PTR(b);
+ continue;
+ }
+
+ break;
+ }
+
+ if (_ret) {
+ if (!GREEDY_REALLOC(ret, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ r = dns_label_escape(label, r, ret + n + !first, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ if (!first)
+ ret[n] = '.';
+ } else {
+ char escaped[DNS_LABEL_ESCAPED_MAX];
+
+ r = dns_label_escape(label, r, escaped, sizeof(escaped));
+ if (r < 0)
+ return r;
+ }
+
+ if (!first)
+ n++;
+ else
+ first = false;
+
+ n += r;
+ }
+
+finish:
+ if (n > DNS_HOSTNAME_MAX)
+ return -EINVAL;
+
+ if (_ret) {
+ if (n == 0) {
+ /* Nothing appended? If so, generate at least a single dot, to indicate the DNS root domain */
+ if (!GREEDY_REALLOC(ret, allocated, 2))
+ return -ENOMEM;
+
+ ret[n++] = '.';
+ } else {
+ if (!GREEDY_REALLOC(ret, allocated, n + 1))
+ return -ENOMEM;
+ }
+
+ ret[n] = 0;
+ *_ret = TAKE_PTR(ret);
+ }
+
+ return 0;
+}
+
+void dns_name_hash_func(const char *p, struct siphash *state) {
+ int r;
+
+ assert(p);
+
+ for (;;) {
+ char label[DNS_LABEL_MAX+1];
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r < 0)
+ break;
+ if (r == 0)
+ break;
+
+ ascii_strlower_n(label, r);
+ siphash24_compress(label, r, state);
+ siphash24_compress_byte(0, state); /* make sure foobar and foo.bar result in different hashes */
+ }
+
+ /* enforce that all names are terminated by the empty label */
+ string_hash_func("", state);
+}
+
+int dns_name_compare_func(const char *a, const char *b) {
+ const char *x, *y;
+ int r, q;
+
+ assert(a);
+ assert(b);
+
+ x = a + strlen(a);
+ y = b + strlen(b);
+
+ for (;;) {
+ char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX];
+
+ if (x == NULL && y == NULL)
+ return 0;
+
+ r = dns_label_unescape_suffix(a, &x, la, sizeof(la));
+ q = dns_label_unescape_suffix(b, &y, lb, sizeof(lb));
+ if (r < 0 || q < 0)
+ return CMP(r, q);
+
+ r = ascii_strcasecmp_nn(la, r, lb, q);
+ if (r != 0)
+ return r;
+ }
+}
+
+DEFINE_HASH_OPS(dns_name_hash_ops, char, dns_name_hash_func, dns_name_compare_func);
+
+int dns_name_equal(const char *x, const char *y) {
+ int r, q;
+
+ assert(x);
+ assert(y);
+
+ for (;;) {
+ char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&x, la, sizeof la, 0);
+ if (r < 0)
+ return r;
+
+ q = dns_label_unescape(&y, lb, sizeof lb, 0);
+ if (q < 0)
+ return q;
+
+ if (r != q)
+ return false;
+ if (r == 0)
+ return true;
+
+ if (ascii_strcasecmp_n(la, lb, r) != 0)
+ return false;
+ }
+}
+
+int dns_name_endswith(const char *name, const char *suffix) {
+ const char *n, *s, *saved_n = NULL;
+ int r, q;
+
+ assert(name);
+ assert(suffix);
+
+ n = name;
+ s = suffix;
+
+ for (;;) {
+ char ln[DNS_LABEL_MAX], ls[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&n, ln, sizeof ln, 0);
+ if (r < 0)
+ return r;
+
+ if (!saved_n)
+ saved_n = n;
+
+ q = dns_label_unescape(&s, ls, sizeof ls, 0);
+ if (q < 0)
+ return q;
+
+ if (r == 0 && q == 0)
+ return true;
+ if (r == 0 && saved_n == n)
+ return false;
+
+ if (r != q || ascii_strcasecmp_n(ln, ls, r) != 0) {
+
+ /* Not the same, let's jump back, and try with the next label again */
+ s = suffix;
+ n = TAKE_PTR(saved_n);
+ }
+ }
+}
+
+int dns_name_startswith(const char *name, const char *prefix) {
+ const char *n, *p;
+ int r, q;
+
+ assert(name);
+ assert(prefix);
+
+ n = name;
+ p = prefix;
+
+ for (;;) {
+ char ln[DNS_LABEL_MAX], lp[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&p, lp, sizeof lp, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return true;
+
+ q = dns_label_unescape(&n, ln, sizeof ln, 0);
+ if (q < 0)
+ return q;
+
+ if (r != q)
+ return false;
+ if (ascii_strcasecmp_n(ln, lp, r) != 0)
+ return false;
+ }
+}
+
+int dns_name_change_suffix(const char *name, const char *old_suffix, const char *new_suffix, char **ret) {
+ const char *n, *s, *saved_before = NULL, *saved_after = NULL, *prefix;
+ int r, q;
+
+ assert(name);
+ assert(old_suffix);
+ assert(new_suffix);
+ assert(ret);
+
+ n = name;
+ s = old_suffix;
+
+ for (;;) {
+ char ln[DNS_LABEL_MAX], ls[DNS_LABEL_MAX];
+
+ if (!saved_before)
+ saved_before = n;
+
+ r = dns_label_unescape(&n, ln, sizeof ln, 0);
+ if (r < 0)
+ return r;
+
+ if (!saved_after)
+ saved_after = n;
+
+ q = dns_label_unescape(&s, ls, sizeof ls, 0);
+ if (q < 0)
+ return q;
+
+ if (r == 0 && q == 0)
+ break;
+ if (r == 0 && saved_after == n) {
+ *ret = NULL; /* doesn't match */
+ return 0;
+ }
+
+ if (r != q || ascii_strcasecmp_n(ln, ls, r) != 0) {
+
+ /* Not the same, let's jump back, and try with the next label again */
+ s = old_suffix;
+ n = TAKE_PTR(saved_after);
+ saved_before = NULL;
+ }
+ }
+
+ /* Found it! Now generate the new name */
+ prefix = strndupa(name, saved_before - name);
+
+ r = dns_name_concat(prefix, new_suffix, 0, ret);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int dns_name_between(const char *a, const char *b, const char *c) {
+ /* Determine if b is strictly greater than a and strictly smaller than c.
+ We consider the order of names to be circular, so that if a is
+ strictly greater than c, we consider b to be between them if it is
+ either greater than a or smaller than c. This is how the canonical
+ DNS name order used in NSEC records work. */
+
+ if (dns_name_compare_func(a, c) < 0)
+ /*
+ a and c are properly ordered:
+ a<---b--->c
+ */
+ return dns_name_compare_func(a, b) < 0 &&
+ dns_name_compare_func(b, c) < 0;
+ else
+ /*
+ a and c are equal or 'reversed':
+ <--b--c a----->
+ or:
+ <-----c a--b-->
+ */
+ return dns_name_compare_func(b, c) < 0 ||
+ dns_name_compare_func(a, b) < 0;
+}
+
+int dns_name_reverse(int family, const union in_addr_union *a, char **ret) {
+ const uint8_t *p;
+ int r;
+
+ assert(a);
+ assert(ret);
+
+ p = (const uint8_t*) a;
+
+ if (family == AF_INET)
+ r = asprintf(ret, "%u.%u.%u.%u.in-addr.arpa", p[3], p[2], p[1], p[0]);
+ else if (family == AF_INET6)
+ r = asprintf(ret, "%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.ip6.arpa",
+ hexchar(p[15] & 0xF), hexchar(p[15] >> 4), hexchar(p[14] & 0xF), hexchar(p[14] >> 4),
+ hexchar(p[13] & 0xF), hexchar(p[13] >> 4), hexchar(p[12] & 0xF), hexchar(p[12] >> 4),
+ hexchar(p[11] & 0xF), hexchar(p[11] >> 4), hexchar(p[10] & 0xF), hexchar(p[10] >> 4),
+ hexchar(p[ 9] & 0xF), hexchar(p[ 9] >> 4), hexchar(p[ 8] & 0xF), hexchar(p[ 8] >> 4),
+ hexchar(p[ 7] & 0xF), hexchar(p[ 7] >> 4), hexchar(p[ 6] & 0xF), hexchar(p[ 6] >> 4),
+ hexchar(p[ 5] & 0xF), hexchar(p[ 5] >> 4), hexchar(p[ 4] & 0xF), hexchar(p[ 4] >> 4),
+ hexchar(p[ 3] & 0xF), hexchar(p[ 3] >> 4), hexchar(p[ 2] & 0xF), hexchar(p[ 2] >> 4),
+ hexchar(p[ 1] & 0xF), hexchar(p[ 1] >> 4), hexchar(p[ 0] & 0xF), hexchar(p[ 0] >> 4));
+ else
+ return -EAFNOSUPPORT;
+ if (r < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int dns_name_address(const char *p, int *family, union in_addr_union *address) {
+ int r;
+
+ assert(p);
+ assert(family);
+ assert(address);
+
+ r = dns_name_endswith(p, "in-addr.arpa");
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ uint8_t a[4];
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(a); i++) {
+ char label[DNS_LABEL_MAX+1];
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+ if (r > 3)
+ return -EINVAL;
+
+ r = safe_atou8(label, &a[i]);
+ if (r < 0)
+ return r;
+ }
+
+ r = dns_name_equal(p, "in-addr.arpa");
+ if (r <= 0)
+ return r;
+
+ *family = AF_INET;
+ address->in.s_addr = htobe32(((uint32_t) a[3] << 24) |
+ ((uint32_t) a[2] << 16) |
+ ((uint32_t) a[1] << 8) |
+ (uint32_t) a[0]);
+
+ return 1;
+ }
+
+ r = dns_name_endswith(p, "ip6.arpa");
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ struct in6_addr a;
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(a.s6_addr); i++) {
+ char label[DNS_LABEL_MAX+1];
+ int x, y;
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r <= 0)
+ return r;
+ if (r != 1)
+ return -EINVAL;
+ x = unhexchar(label[0]);
+ if (x < 0)
+ return -EINVAL;
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r <= 0)
+ return r;
+ if (r != 1)
+ return -EINVAL;
+ y = unhexchar(label[0]);
+ if (y < 0)
+ return -EINVAL;
+
+ a.s6_addr[ELEMENTSOF(a.s6_addr) - i - 1] = (uint8_t) y << 4 | (uint8_t) x;
+ }
+
+ r = dns_name_equal(p, "ip6.arpa");
+ if (r <= 0)
+ return r;
+
+ *family = AF_INET6;
+ address->in6 = a;
+ return 1;
+ }
+
+ return 0;
+}
+
+bool dns_name_is_root(const char *name) {
+
+ assert(name);
+
+ /* There are exactly two ways to encode the root domain name:
+ * as empty string, or with a single dot. */
+
+ return STR_IN_SET(name, "", ".");
+}
+
+bool dns_name_is_single_label(const char *name) {
+ int r;
+
+ assert(name);
+
+ r = dns_name_parent(&name);
+ if (r <= 0)
+ return false;
+
+ return dns_name_is_root(name);
+}
+
+/* Encode a domain name according to RFC 1035 Section 3.1, without compression */
+int dns_name_to_wire_format(const char *domain, uint8_t *buffer, size_t len, bool canonical) {
+ uint8_t *label_length, *out;
+ int r;
+
+ assert(domain);
+ assert(buffer);
+
+ out = buffer;
+
+ do {
+ /* Reserve a byte for label length */
+ if (len <= 0)
+ return -ENOBUFS;
+ len--;
+ label_length = out;
+ out++;
+
+ /* Convert and copy a single label. Note that
+ * dns_label_unescape() returns 0 when it hits the end
+ * of the domain name, which we rely on here to encode
+ * the trailing NUL byte. */
+ r = dns_label_unescape(&domain, (char *) out, len, 0);
+ if (r < 0)
+ return r;
+
+ /* Optionally, output the name in DNSSEC canonical
+ * format, as described in RFC 4034, section 6.2. Or
+ * in other words: in lower-case. */
+ if (canonical)
+ ascii_strlower_n((char*) out, (size_t) r);
+
+ /* Fill label length, move forward */
+ *label_length = r;
+ out += r;
+ len -= r;
+
+ } while (r != 0);
+
+ /* Verify the maximum size of the encoded name. The trailing
+ * dot + NUL byte account are included this time, hence
+ * compare against DNS_HOSTNAME_MAX + 2 (which is 255) this
+ * time. */
+ if (out - buffer > DNS_HOSTNAME_MAX + 2)
+ return -EINVAL;
+
+ return out - buffer;
+}
+
+static bool srv_type_label_is_valid(const char *label, size_t n) {
+ size_t k;
+
+ assert(label);
+
+ if (n < 2) /* Label needs to be at least 2 chars long */
+ return false;
+
+ if (label[0] != '_') /* First label char needs to be underscore */
+ return false;
+
+ /* Second char must be a letter */
+ if (!(label[1] >= 'A' && label[1] <= 'Z') &&
+ !(label[1] >= 'a' && label[1] <= 'z'))
+ return false;
+
+ /* Third and further chars must be alphanumeric or a hyphen */
+ for (k = 2; k < n; k++) {
+ if (!(label[k] >= 'A' && label[k] <= 'Z') &&
+ !(label[k] >= 'a' && label[k] <= 'z') &&
+ !(label[k] >= '0' && label[k] <= '9') &&
+ label[k] != '-')
+ return false;
+ }
+
+ return true;
+}
+
+bool dns_srv_type_is_valid(const char *name) {
+ unsigned c = 0;
+ int r;
+
+ if (!name)
+ return false;
+
+ for (;;) {
+ char label[DNS_LABEL_MAX];
+
+ /* This more or less implements RFC 6335, Section 5.1 */
+
+ r = dns_label_unescape(&name, label, sizeof label, 0);
+ if (r < 0)
+ return false;
+ if (r == 0)
+ break;
+
+ if (c >= 2)
+ return false;
+
+ if (!srv_type_label_is_valid(label, r))
+ return false;
+
+ c++;
+ }
+
+ return c == 2; /* exactly two labels */
+}
+
+bool dnssd_srv_type_is_valid(const char *name) {
+ return dns_srv_type_is_valid(name) &&
+ ((dns_name_endswith(name, "_tcp") > 0) ||
+ (dns_name_endswith(name, "_udp") > 0)); /* Specific to DNS-SD. RFC 6763, Section 7 */
+}
+
+bool dns_service_name_is_valid(const char *name) {
+ size_t l;
+
+ /* This more or less implements RFC 6763, Section 4.1.1 */
+
+ if (!name)
+ return false;
+
+ if (!utf8_is_valid(name))
+ return false;
+
+ if (string_has_cc(name, NULL))
+ return false;
+
+ l = strlen(name);
+ if (l <= 0)
+ return false;
+ if (l > 63)
+ return false;
+
+ return true;
+}
+
+int dns_service_join(const char *name, const char *type, const char *domain, char **ret) {
+ char escaped[DNS_LABEL_ESCAPED_MAX];
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ assert(type);
+ assert(domain);
+ assert(ret);
+
+ if (!dns_srv_type_is_valid(type))
+ return -EINVAL;
+
+ if (!name)
+ return dns_name_concat(type, domain, 0, ret);
+
+ if (!dns_service_name_is_valid(name))
+ return -EINVAL;
+
+ r = dns_label_escape(name, strlen(name), escaped, sizeof(escaped));
+ if (r < 0)
+ return r;
+
+ r = dns_name_concat(type, domain, 0, &n);
+ if (r < 0)
+ return r;
+
+ return dns_name_concat(escaped, n, 0, ret);
+}
+
+static bool dns_service_name_label_is_valid(const char *label, size_t n) {
+ char *s;
+
+ assert(label);
+
+ if (memchr(label, 0, n))
+ return false;
+
+ s = strndupa(label, n);
+ return dns_service_name_is_valid(s);
+}
+
+int dns_service_split(const char *joined, char **_name, char **_type, char **_domain) {
+ _cleanup_free_ char *name = NULL, *type = NULL, *domain = NULL;
+ const char *p = joined, *q = NULL, *d = NULL;
+ char a[DNS_LABEL_MAX], b[DNS_LABEL_MAX], c[DNS_LABEL_MAX];
+ int an, bn, cn, r;
+ unsigned x = 0;
+
+ assert(joined);
+
+ /* Get first label from the full name */
+ an = dns_label_unescape(&p, a, sizeof(a), 0);
+ if (an < 0)
+ return an;
+
+ if (an > 0) {
+ x++;
+
+ /* If there was a first label, try to get the second one */
+ bn = dns_label_unescape(&p, b, sizeof(b), 0);
+ if (bn < 0)
+ return bn;
+
+ if (bn > 0) {
+ x++;
+
+ /* If there was a second label, try to get the third one */
+ q = p;
+ cn = dns_label_unescape(&p, c, sizeof(c), 0);
+ if (cn < 0)
+ return cn;
+
+ if (cn > 0)
+ x++;
+ } else
+ cn = 0;
+ } else
+ an = 0;
+
+ if (x >= 2 && srv_type_label_is_valid(b, bn)) {
+
+ if (x >= 3 && srv_type_label_is_valid(c, cn)) {
+
+ if (dns_service_name_label_is_valid(a, an)) {
+ /* OK, got <name> . <type> . <type2> . <domain> */
+
+ name = strndup(a, an);
+ if (!name)
+ return -ENOMEM;
+
+ type = strjoin(b, ".", c);
+ if (!type)
+ return -ENOMEM;
+
+ d = p;
+ goto finish;
+ }
+
+ } else if (srv_type_label_is_valid(a, an)) {
+
+ /* OK, got <type> . <type2> . <domain> */
+
+ name = NULL;
+
+ type = strjoin(a, ".", b);
+ if (!type)
+ return -ENOMEM;
+
+ d = q;
+ goto finish;
+ }
+ }
+
+ name = NULL;
+ type = NULL;
+ d = joined;
+
+finish:
+ r = dns_name_normalize(d, 0, &domain);
+ if (r < 0)
+ return r;
+
+ if (_domain)
+ *_domain = TAKE_PTR(domain);
+
+ if (_type)
+ *_type = TAKE_PTR(type);
+
+ if (_name)
+ *_name = TAKE_PTR(name);
+
+ return 0;
+}
+
+static int dns_name_build_suffix_table(const char *name, const char *table[]) {
+ const char *p;
+ unsigned n = 0;
+ int r;
+
+ assert(name);
+ assert(table);
+
+ p = name;
+ for (;;) {
+ if (n > DNS_N_LABELS_MAX)
+ return -EINVAL;
+
+ table[n] = p;
+ r = dns_name_parent(&p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ n++;
+ }
+
+ return (int) n;
+}
+
+int dns_name_suffix(const char *name, unsigned n_labels, const char **ret) {
+ const char* labels[DNS_N_LABELS_MAX+1];
+ int n;
+
+ assert(name);
+ assert(ret);
+
+ n = dns_name_build_suffix_table(name, labels);
+ if (n < 0)
+ return n;
+
+ if ((unsigned) n < n_labels)
+ return -EINVAL;
+
+ *ret = labels[n - n_labels];
+ return (int) (n - n_labels);
+}
+
+int dns_name_skip(const char *a, unsigned n_labels, const char **ret) {
+ int r;
+
+ assert(a);
+ assert(ret);
+
+ for (; n_labels > 0; n_labels--) {
+ r = dns_name_parent(&a);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *ret = "";
+ return 0;
+ }
+ }
+
+ *ret = a;
+ return 1;
+}
+
+int dns_name_count_labels(const char *name) {
+ unsigned n = 0;
+ const char *p;
+ int r;
+
+ assert(name);
+
+ p = name;
+ for (;;) {
+ r = dns_name_parent(&p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (n >= DNS_N_LABELS_MAX)
+ return -EINVAL;
+
+ n++;
+ }
+
+ return (int) n;
+}
+
+int dns_name_equal_skip(const char *a, unsigned n_labels, const char *b) {
+ int r;
+
+ assert(a);
+ assert(b);
+
+ r = dns_name_skip(a, n_labels, &a);
+ if (r <= 0)
+ return r;
+
+ return dns_name_equal(a, b);
+}
+
+int dns_name_common_suffix(const char *a, const char *b, const char **ret) {
+ const char *a_labels[DNS_N_LABELS_MAX+1], *b_labels[DNS_N_LABELS_MAX+1];
+ int n = 0, m = 0, k = 0, r, q;
+
+ assert(a);
+ assert(b);
+ assert(ret);
+
+ /* Determines the common suffix of domain names a and b */
+
+ n = dns_name_build_suffix_table(a, a_labels);
+ if (n < 0)
+ return n;
+
+ m = dns_name_build_suffix_table(b, b_labels);
+ if (m < 0)
+ return m;
+
+ for (;;) {
+ char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX];
+ const char *x, *y;
+
+ if (k >= n || k >= m) {
+ *ret = a_labels[n - k];
+ return 0;
+ }
+
+ x = a_labels[n - 1 - k];
+ r = dns_label_unescape(&x, la, sizeof la, 0);
+ if (r < 0)
+ return r;
+
+ y = b_labels[m - 1 - k];
+ q = dns_label_unescape(&y, lb, sizeof lb, 0);
+ if (q < 0)
+ return q;
+
+ if (r != q || ascii_strcasecmp_n(la, lb, r) != 0) {
+ *ret = a_labels[n - k];
+ return 0;
+ }
+
+ k++;
+ }
+}
+
+int dns_name_apply_idna(const char *name, char **ret) {
+ /* Return negative on error, 0 if not implemented, positive on success. */
+
+#if HAVE_LIBIDN2
+ int r;
+ _cleanup_free_ char *t = NULL;
+
+ assert(name);
+ assert(ret);
+
+ r = idn2_lookup_u8((uint8_t*) name, (uint8_t**) &t,
+ IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL);
+ log_debug("idn2_lookup_u8: %s → %s", name, t);
+ if (r == IDN2_OK) {
+ if (!startswith(name, "xn--")) {
+ _cleanup_free_ char *s = NULL;
+
+ r = idn2_to_unicode_8z8z(t, &s, 0);
+ if (r != IDN2_OK) {
+ log_debug("idn2_to_unicode_8z8z(\"%s\") failed: %d/%s",
+ t, r, idn2_strerror(r));
+ return 0;
+ }
+
+ if (!streq_ptr(name, s)) {
+ log_debug("idn2 roundtrip failed: \"%s\" → \"%s\" → \"%s\", ignoring.",
+ name, t, s);
+ return 0;
+ }
+ }
+
+ *ret = TAKE_PTR(t);
+
+ return 1; /* *ret has been written */
+ }
+
+ log_debug("idn2_lookup_u8(\"%s\") failed: %d/%s", name, r, idn2_strerror(r));
+ if (r == IDN2_2HYPHEN)
+ /* The name has two hyphens — forbidden by IDNA2008 in some cases */
+ return 0;
+ if (IN_SET(r, IDN2_TOO_BIG_DOMAIN, IDN2_TOO_BIG_LABEL))
+ return -ENOSPC;
+ return -EINVAL;
+#elif HAVE_LIBIDN
+ _cleanup_free_ char *buf = NULL;
+ size_t n = 0, allocated = 0;
+ bool first = true;
+ int r, q;
+
+ assert(name);
+ assert(ret);
+
+ for (;;) {
+ char label[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&name, label, sizeof label, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ q = dns_label_apply_idna(label, r, label, sizeof label);
+ if (q < 0)
+ return q;
+ if (q > 0)
+ r = q;
+
+ if (!GREEDY_REALLOC(buf, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ r = dns_label_escape(label, r, buf + n + !first, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ if (first)
+ first = false;
+ else
+ buf[n++] = '.';
+
+ n += r;
+ }
+
+ if (n > DNS_HOSTNAME_MAX)
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(buf, allocated, n + 1))
+ return -ENOMEM;
+
+ buf[n] = 0;
+ *ret = TAKE_PTR(buf);
+
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+int dns_name_is_valid_or_address(const char *name) {
+ /* Returns > 0 if the specified name is either a valid IP address formatted as string or a valid DNS name */
+
+ if (isempty(name))
+ return 0;
+
+ if (in_addr_from_string_auto(name, NULL, NULL) >= 0)
+ return 1;
+
+ return dns_name_is_valid(name);
+}
diff --git a/src/shared/dns-domain.h b/src/shared/dns-domain.h
new file mode 100644
index 0000000..6ed512c
--- /dev/null
+++ b/src/shared/dns-domain.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "hashmap.h"
+#include "in-addr-util.h"
+
+/* Length of a single label, with all escaping removed, excluding any trailing dot or NUL byte */
+#define DNS_LABEL_MAX 63
+
+/* Worst case length of a single label, with all escaping applied and room for a trailing NUL byte. */
+#define DNS_LABEL_ESCAPED_MAX (DNS_LABEL_MAX*4+1)
+
+/* Maximum length of a full hostname, consisting of a series of unescaped labels, and no trailing dot or NUL byte */
+#define DNS_HOSTNAME_MAX 253
+
+/* Maximum length of a full hostname, on the wire, including the final NUL byte */
+#define DNS_WIRE_FORMAT_HOSTNAME_MAX 255
+
+/* Maximum number of labels per valid hostname */
+#define DNS_N_LABELS_MAX 127
+
+typedef enum DNSLabelFlags {
+ DNS_LABEL_LDH = 1 << 0, /* Follow the "LDH" rule — only letters, digits, and internal hyphens. */
+ DNS_LABEL_NO_ESCAPES = 1 << 1, /* Do not treat backslashes specially */
+} DNSLabelFlags;
+
+int dns_label_unescape(const char **name, char *dest, size_t sz, DNSLabelFlags flags);
+int dns_label_unescape_suffix(const char *name, const char **label_end, char *dest, size_t sz);
+int dns_label_escape(const char *p, size_t l, char *dest, size_t sz);
+int dns_label_escape_new(const char *p, size_t l, char **ret);
+
+static inline int dns_name_parent(const char **name) {
+ return dns_label_unescape(name, NULL, DNS_LABEL_MAX, 0);
+}
+
+#if HAVE_LIBIDN
+int dns_label_apply_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max);
+int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max);
+#endif
+
+int dns_name_concat(const char *a, const char *b, DNSLabelFlags flags, char **ret);
+
+static inline int dns_name_normalize(const char *s, DNSLabelFlags flags, char **ret) {
+ /* dns_name_concat() normalizes as a side-effect */
+ return dns_name_concat(s, NULL, flags, ret);
+}
+
+static inline int dns_name_is_valid(const char *s) {
+ int r;
+
+ /* dns_name_normalize() verifies as a side effect */
+ r = dns_name_normalize(s, 0, NULL);
+ if (r == -EINVAL)
+ return 0;
+ if (r < 0)
+ return r;
+ return 1;
+}
+
+static inline int dns_name_is_valid_ldh(const char *s) {
+ int r;
+
+ r = dns_name_concat(s, NULL, DNS_LABEL_LDH|DNS_LABEL_NO_ESCAPES, NULL);
+ if (r == -EINVAL)
+ return 0;
+ if (r < 0)
+ return r;
+ return 1;
+}
+
+void dns_name_hash_func(const char *s, struct siphash *state);
+int dns_name_compare_func(const char *a, const char *b);
+extern const struct hash_ops dns_name_hash_ops;
+
+int dns_name_between(const char *a, const char *b, const char *c);
+int dns_name_equal(const char *x, const char *y);
+int dns_name_endswith(const char *name, const char *suffix);
+int dns_name_startswith(const char *name, const char *prefix);
+
+int dns_name_change_suffix(const char *name, const char *old_suffix, const char *new_suffix, char **ret);
+
+int dns_name_reverse(int family, const union in_addr_union *a, char **ret);
+int dns_name_address(const char *p, int *family, union in_addr_union *a);
+
+bool dns_name_is_root(const char *name);
+bool dns_name_is_single_label(const char *name);
+
+int dns_name_to_wire_format(const char *domain, uint8_t *buffer, size_t len, bool canonical);
+
+bool dns_srv_type_is_valid(const char *name);
+bool dnssd_srv_type_is_valid(const char *name);
+bool dns_service_name_is_valid(const char *name);
+
+int dns_service_join(const char *name, const char *type, const char *domain, char **ret);
+int dns_service_split(const char *joined, char **name, char **type, char **domain);
+
+int dns_name_suffix(const char *name, unsigned n_labels, const char **ret);
+int dns_name_count_labels(const char *name);
+
+int dns_name_skip(const char *a, unsigned n_labels, const char **ret);
+int dns_name_equal_skip(const char *a, unsigned n_labels, const char *b);
+
+int dns_name_common_suffix(const char *a, const char *b, const char **ret);
+
+int dns_name_apply_idna(const char *name, char **ret);
+
+int dns_name_is_valid_or_address(const char *name);
diff --git a/src/shared/dropin.c b/src/shared/dropin.c
new file mode 100644
index 0000000..409eef2
--- /dev/null
+++ b/src/shared/dropin.c
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "dirent-util.h"
+#include "dropin.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio-label.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+int drop_in_file(const char *dir, const char *unit, unsigned level,
+ const char *name, char **_p, char **_q) {
+
+ char prefix[DECIMAL_STR_MAX(unsigned)];
+ _cleanup_free_ char *b = NULL;
+ char *p, *q;
+
+ assert(unit);
+ assert(name);
+ assert(_p);
+ assert(_q);
+
+ sprintf(prefix, "%u", level);
+
+ b = xescape(name, "/.");
+ if (!b)
+ return -ENOMEM;
+
+ if (!filename_is_valid(b))
+ return -EINVAL;
+
+ p = strjoin(dir, "/", unit, ".d");
+ if (!p)
+ return -ENOMEM;
+
+ q = strjoin(p, "/", prefix, "-", b, ".conf");
+ if (!q) {
+ free(p);
+ return -ENOMEM;
+ }
+
+ *_p = p;
+ *_q = q;
+ return 0;
+}
+
+int write_drop_in(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *data) {
+
+ _cleanup_free_ char *p = NULL, *q = NULL;
+ int r;
+
+ assert(dir);
+ assert(unit);
+ assert(name);
+ assert(data);
+
+ r = drop_in_file(dir, unit, level, name, &p, &q);
+ if (r < 0)
+ return r;
+
+ (void) mkdir_p(p, 0755);
+ return write_string_file_atomic_label(q, data);
+}
+
+int write_drop_in_format(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *format, ...) {
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ assert(dir);
+ assert(unit);
+ assert(name);
+ assert(format);
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return write_drop_in(dir, unit, level, name, p);
+}
+
+static int unit_file_find_dir(
+ const char *original_root,
+ const char *path,
+ char ***dirs) {
+
+ _cleanup_free_ char *chased = NULL;
+ int r;
+
+ assert(path);
+
+ r = chase_symlinks(path, original_root, 0, &chased);
+ if (r == -ENOENT) /* Ignore -ENOENT, after all most units won't have a drop-in dir. */
+ return 0;
+ if (r == -ENAMETOOLONG) {
+ /* Also, ignore -ENAMETOOLONG but log about it. After all, users are not even able to create the
+ * drop-in dir in such case. This mostly happens for device units with an overly long /sys path. */
+ log_debug_errno(r, "Path '%s' too long, couldn't canonicalize, ignoring.", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_warning_errno(r, "Failed to canonicalize path '%s': %m", path);
+
+ r = strv_push(dirs, chased);
+ if (r < 0)
+ return log_oom();
+
+ chased = NULL;
+ return 0;
+}
+
+static int unit_file_find_dirs(
+ const char *original_root,
+ Set *unit_path_cache,
+ const char *unit_path,
+ const char *name,
+ const char *suffix,
+ char ***dirs) {
+
+ _cleanup_free_ char *prefix = NULL, *instance = NULL, *built = NULL;
+ bool is_instance, chopped;
+ const char *dash;
+ UnitType type;
+ char *path;
+ size_t n;
+ int r;
+
+ assert(unit_path);
+ assert(name);
+ assert(suffix);
+
+ path = strjoina(unit_path, "/", name, suffix);
+ if (!unit_path_cache || set_get(unit_path_cache, path)) {
+ r = unit_file_find_dir(original_root, path, dirs);
+ if (r < 0)
+ return r;
+ }
+
+ is_instance = unit_name_is_valid(name, UNIT_NAME_INSTANCE);
+ if (is_instance) { /* Also try the template dir */
+ _cleanup_free_ char *template = NULL;
+
+ r = unit_name_template(name, &template);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate template from unit name: %m");
+
+ r = unit_file_find_dirs(original_root, unit_path_cache, unit_path, template, suffix, dirs);
+ if (r < 0)
+ return r;
+ }
+
+ /* Let's see if there's a "-" prefix for this unit name. If so, let's invoke ourselves for it. This will then
+ * recursively do the same for all our prefixes. i.e. this means given "foo-bar-waldo.service" we'll also
+ * search "foo-bar-.service" and "foo-.service".
+ *
+ * Note the order in which we do it: we traverse up adding drop-ins on each step. This means the more specific
+ * drop-ins may override the more generic drop-ins, which is the intended behaviour. */
+
+ r = unit_name_to_prefix(name, &prefix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to derive unit name prefix from unit name: %m");
+
+ chopped = false;
+ for (;;) {
+ dash = strrchr(prefix, '-');
+ if (!dash) /* No dash? if so we are done */
+ return 0;
+
+ n = (size_t) (dash - prefix);
+ if (n == 0) /* Leading dash? If so, we are done */
+ return 0;
+
+ if (prefix[n+1] != 0 || chopped) {
+ prefix[n+1] = 0;
+ break;
+ }
+
+ /* Trailing dash? If so, chop it off and try again, but not more than once. */
+ prefix[n] = 0;
+ chopped = true;
+ }
+
+ if (!unit_prefix_is_valid(prefix))
+ return 0;
+
+ type = unit_name_to_type(name);
+ if (type < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to to derive unit type from unit name: %s",
+ name);
+
+ if (is_instance) {
+ r = unit_name_to_instance(name, &instance);
+ if (r < 0)
+ return log_error_errno(r, "Failed to derive unit name instance from unit name: %m");
+ }
+
+ r = unit_name_build_from_type(prefix, instance, type, &built);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build prefix unit name: %m");
+
+ return unit_file_find_dirs(original_root, unit_path_cache, unit_path, built, suffix, dirs);
+}
+
+int unit_file_find_dropin_paths(
+ const char *original_root,
+ char **lookup_path,
+ Set *unit_path_cache,
+ const char *dir_suffix,
+ const char *file_suffix,
+ Set *names,
+ char ***ret) {
+
+ _cleanup_strv_free_ char **dirs = NULL;
+ char *t, **p;
+ Iterator i;
+ int r;
+
+ assert(ret);
+
+ SET_FOREACH(t, names, i)
+ STRV_FOREACH(p, lookup_path)
+ (void) unit_file_find_dirs(original_root, unit_path_cache, *p, t, dir_suffix, &dirs);
+
+ if (strv_isempty(dirs)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ r = conf_files_list_strv(ret, file_suffix, NULL, 0, (const char**) dirs);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to create the list of configuration files: %m");
+
+ return 1;
+}
diff --git a/src/shared/dropin.h b/src/shared/dropin.h
new file mode 100644
index 0000000..ae7379b
--- /dev/null
+++ b/src/shared/dropin.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "hashmap.h"
+#include "macro.h"
+#include "set.h"
+#include "unit-name.h"
+
+int drop_in_file(const char *dir, const char *unit, unsigned level,
+ const char *name, char **_p, char **_q);
+
+int write_drop_in(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *data);
+
+int write_drop_in_format(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *format, ...) _printf_(5, 6);
+
+int unit_file_find_dropin_paths(
+ const char *original_root,
+ char **lookup_path,
+ Set *unit_path_cache,
+ const char *dir_suffix,
+ const char *file_suffix,
+ Set *names,
+ char ***paths);
+
+static inline int unit_file_find_dropin_conf_paths(
+ const char *original_root,
+ char **lookup_path,
+ Set *unit_path_cache,
+ Set *names,
+ char ***paths) {
+
+ return unit_file_find_dropin_paths(original_root,
+ lookup_path,
+ unit_path_cache,
+ ".d", ".conf",
+ names, paths);
+}
diff --git a/src/shared/efivars.c b/src/shared/efivars.c
new file mode 100644
index 0000000..26f905b
--- /dev/null
+++ b/src/shared/efivars.c
@@ -0,0 +1,914 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/fs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "chattr-util.h"
+#include "dirent-util.h"
+#include "efivars.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "virt.h"
+
+#if ENABLE_EFI
+
+#define LOAD_OPTION_ACTIVE 0x00000001
+#define MEDIA_DEVICE_PATH 0x04
+#define MEDIA_HARDDRIVE_DP 0x01
+#define MEDIA_FILEPATH_DP 0x04
+#define SIGNATURE_TYPE_GUID 0x02
+#define MBR_TYPE_EFI_PARTITION_TABLE_HEADER 0x02
+#define END_DEVICE_PATH_TYPE 0x7f
+#define END_ENTIRE_DEVICE_PATH_SUBTYPE 0xff
+#define EFI_OS_INDICATIONS_BOOT_TO_FW_UI 0x0000000000000001
+
+#define boot_option__contents { \
+ uint32_t attr; \
+ uint16_t path_len; \
+ uint16_t title[]; \
+ }
+
+struct boot_option boot_option__contents;
+struct boot_option__packed boot_option__contents _packed_;
+assert_cc(offsetof(struct boot_option, title) == offsetof(struct boot_option__packed, title));
+/* sizeof(struct boot_option) != sizeof(struct boot_option__packed), so
+ * the *size* of the structure should not be used anywhere below. */
+
+struct drive_path {
+ uint32_t part_nr;
+ uint64_t part_start;
+ uint64_t part_size;
+ char signature[16];
+ uint8_t mbr_type;
+ uint8_t signature_type;
+} _packed_;
+
+#define device_path__contents { \
+ uint8_t type; \
+ uint8_t sub_type; \
+ uint16_t length; \
+ union { \
+ uint16_t path[0]; \
+ struct drive_path drive; \
+ }; \
+ }
+
+struct device_path device_path__contents;
+struct device_path__packed device_path__contents _packed_;
+assert_cc(sizeof(struct device_path) == sizeof(struct device_path__packed));
+
+bool is_efi_boot(void) {
+ if (detect_container() > 0)
+ return false;
+
+ return access("/sys/firmware/efi/", F_OK) >= 0;
+}
+
+static int read_flag(const char *varname) {
+ _cleanup_free_ void *v = NULL;
+ uint8_t b;
+ size_t s;
+ int r;
+
+ if (!is_efi_boot()) /* If this is not an EFI boot, assume the queried flags are zero */
+ return 0;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, varname, NULL, &v, &s);
+ if (r < 0)
+ return r;
+
+ if (s != 1)
+ return -EINVAL;
+
+ b = *(uint8_t *)v;
+ return !!b;
+}
+
+bool is_efi_secure_boot(void) {
+ return read_flag("SecureBoot") > 0;
+}
+
+bool is_efi_secure_boot_setup_mode(void) {
+ return read_flag("SetupMode") > 0;
+}
+
+int efi_reboot_to_firmware_supported(void) {
+ _cleanup_free_ void *v = NULL;
+ uint64_t b;
+ size_t s;
+ int r;
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndicationsSupported", NULL, &v, &s);
+ if (r == -ENOENT) /* variable doesn't exist? it's not supported then */
+ return -EOPNOTSUPP;
+ if (r < 0)
+ return r;
+ if (s != sizeof(uint64_t))
+ return -EINVAL;
+
+ b = *(uint64_t*) v;
+ if (!(b & EFI_OS_INDICATIONS_BOOT_TO_FW_UI))
+ return -EOPNOTSUPP; /* bit unset? it's not supported then */
+
+ return 0;
+}
+
+static int get_os_indications(uint64_t *os_indication) {
+ _cleanup_free_ void *v = NULL;
+ size_t s;
+ int r;
+
+ /* Let's verify general support first */
+ r = efi_reboot_to_firmware_supported();
+ if (r < 0)
+ return r;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndications", NULL, &v, &s);
+ if (r == -ENOENT) {
+ /* Some firmware implementations that do support OsIndications and report that with
+ * OsIndicationsSupported will remove the OsIndications variable when it is unset. Let's pretend it's 0
+ * then, to hide this implementation detail. Note that this call will return -ENOENT then only if the
+ * support for OsIndications is missing entirely, as determined by efi_reboot_to_firmware_supported()
+ * above. */
+ *os_indication = 0;
+ return 0;
+ } else if (r < 0)
+ return r;
+ else if (s != sizeof(uint64_t))
+ return -EINVAL;
+
+ *os_indication = *(uint64_t *)v;
+ return 0;
+}
+
+int efi_get_reboot_to_firmware(void) {
+ int r;
+ uint64_t b;
+
+ r = get_os_indications(&b);
+ if (r < 0)
+ return r;
+
+ return !!(b & EFI_OS_INDICATIONS_BOOT_TO_FW_UI);
+}
+
+int efi_set_reboot_to_firmware(bool value) {
+ int r;
+ uint64_t b, b_new;
+
+ r = get_os_indications(&b);
+ if (r < 0)
+ return r;
+
+ if (value)
+ b_new = b | EFI_OS_INDICATIONS_BOOT_TO_FW_UI;
+ else
+ b_new = b & ~EFI_OS_INDICATIONS_BOOT_TO_FW_UI;
+
+ /* Avoid writing to efi vars store if we can due to firmware bugs. */
+ if (b != b_new)
+ return efi_set_variable(EFI_VENDOR_GLOBAL, "OsIndications", &b_new, sizeof(uint64_t));
+
+ return 0;
+}
+
+int efi_get_variable(
+ sd_id128_t vendor,
+ const char *name,
+ uint32_t *attribute,
+ void **value,
+ size_t *size) {
+
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *p = NULL;
+ uint32_t a;
+ ssize_t n;
+ struct stat st;
+ _cleanup_free_ void *buf = NULL;
+
+ assert(name);
+ assert(value);
+ assert(size);
+
+ if (asprintf(&p,
+ "/sys/firmware/efi/efivars/%s-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ name, SD_ID128_FORMAT_VAL(vendor)) < 0)
+ return -ENOMEM;
+
+ fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+ if (st.st_size < 4)
+ return -EIO;
+ if (st.st_size > 4*1024*1024 + 4)
+ return -E2BIG;
+
+ n = read(fd, &a, sizeof(a));
+ if (n < 0)
+ return -errno;
+ if (n != sizeof(a))
+ return -EIO;
+
+ buf = malloc(st.st_size - 4 + 2);
+ if (!buf)
+ return -ENOMEM;
+
+ n = read(fd, buf, (size_t) st.st_size - 4);
+ if (n < 0)
+ return -errno;
+ if (n != (ssize_t) st.st_size - 4)
+ return -EIO;
+
+ /* Always NUL terminate (2 bytes, to protect UTF-16) */
+ ((char*) buf)[st.st_size - 4] = 0;
+ ((char*) buf)[st.st_size - 4 + 1] = 0;
+
+ *value = TAKE_PTR(buf);
+ *size = (size_t) st.st_size - 4;
+
+ if (attribute)
+ *attribute = a;
+
+ return 0;
+}
+
+int efi_get_variable_string(sd_id128_t vendor, const char *name, char **p) {
+ _cleanup_free_ void *s = NULL;
+ size_t ss = 0;
+ int r;
+ char *x;
+
+ r = efi_get_variable(vendor, name, NULL, &s, &ss);
+ if (r < 0)
+ return r;
+
+ x = utf16_to_utf8(s, ss);
+ if (!x)
+ return -ENOMEM;
+
+ *p = x;
+ return 0;
+}
+
+int efi_set_variable(
+ sd_id128_t vendor,
+ const char *name,
+ const void *value,
+ size_t size) {
+
+ struct var {
+ uint32_t attr;
+ char buf[];
+ } _packed_ * _cleanup_free_ buf = NULL;
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ bool saved_flags_valid = false;
+ unsigned saved_flags;
+ int r;
+
+ assert(name);
+ assert(value || size == 0);
+
+ if (asprintf(&p,
+ "/sys/firmware/efi/efivars/%s-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ name, SD_ID128_FORMAT_VAL(vendor)) < 0)
+ return -ENOMEM;
+
+ /* Newer efivarfs protects variables that are not in a whitelist with FS_IMMUTABLE_FL by default, to protect
+ * them for accidental removal and modification. We are not changing these variables accidentally however,
+ * hence let's unset the bit first. */
+
+ r = chattr_path(p, 0, FS_IMMUTABLE_FL, &saved_flags);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to drop FS_IMMUTABLE_FL flag from '%s', ignoring: %m", p);
+
+ saved_flags_valid = r >= 0;
+
+ if (size == 0) {
+ if (unlink(p) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ return 0;
+ }
+
+ fd = open(p, O_WRONLY|O_CREAT|O_NOCTTY|O_CLOEXEC, 0644);
+ if (fd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ buf = malloc(sizeof(uint32_t) + size);
+ if (!buf) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ buf->attr = EFI_VARIABLE_NON_VOLATILE|EFI_VARIABLE_BOOTSERVICE_ACCESS|EFI_VARIABLE_RUNTIME_ACCESS;
+ memcpy(buf->buf, value, size);
+
+ r = loop_write(fd, buf, sizeof(uint32_t) + size, false);
+ if (r < 0)
+ goto finish;
+
+ r = 0;
+
+finish:
+ if (saved_flags_valid) {
+ int q;
+
+ /* Restore the original flags field, just in case */
+ if (fd < 0)
+ q = chattr_path(p, saved_flags, FS_IMMUTABLE_FL, NULL);
+ else
+ q = chattr_fd(fd, saved_flags, FS_IMMUTABLE_FL, NULL);
+ if (q < 0)
+ log_debug_errno(q, "Failed to restore FS_IMMUTABLE_FL on '%s', ignoring: %m", p);
+ }
+
+ return r;
+}
+
+int efi_set_variable_string(sd_id128_t vendor, const char *name, const char *v) {
+ _cleanup_free_ char16_t *u16 = NULL;
+
+ u16 = utf8_to_utf16(v, strlen(v));
+ if (!u16)
+ return -ENOMEM;
+
+ return efi_set_variable(vendor, name, u16, (char16_strlen(u16) + 1) * sizeof(char16_t));
+}
+
+static ssize_t utf16_size(const uint16_t *s, size_t buf_len_bytes) {
+ size_t l = 0;
+
+ /* Returns the size of the string in bytes without the terminating two zero bytes */
+
+ if (buf_len_bytes % sizeof(uint16_t) != 0)
+ return -EINVAL;
+
+ while (l < buf_len_bytes / sizeof(uint16_t)) {
+ if (s[l] == 0)
+ return (l + 1) * sizeof(uint16_t);
+ l++;
+ }
+
+ return -EINVAL; /* The terminator was not found */
+}
+
+struct guid {
+ uint32_t u1;
+ uint16_t u2;
+ uint16_t u3;
+ uint8_t u4[8];
+} _packed_;
+
+static void efi_guid_to_id128(const void *guid, sd_id128_t *id128) {
+ uint32_t u1;
+ uint16_t u2, u3;
+ const struct guid *uuid = guid;
+
+ memcpy(&u1, &uuid->u1, sizeof(uint32_t));
+ id128->bytes[0] = (u1 >> 24) & 0xff;
+ id128->bytes[1] = (u1 >> 16) & 0xff;
+ id128->bytes[2] = (u1 >> 8) & 0xff;
+ id128->bytes[3] = u1 & 0xff;
+ memcpy(&u2, &uuid->u2, sizeof(uint16_t));
+ id128->bytes[4] = (u2 >> 8) & 0xff;
+ id128->bytes[5] = u2 & 0xff;
+ memcpy(&u3, &uuid->u3, sizeof(uint16_t));
+ id128->bytes[6] = (u3 >> 8) & 0xff;
+ id128->bytes[7] = u3 & 0xff;
+ memcpy(&id128->bytes[8], uuid->u4, sizeof(uuid->u4));
+}
+
+int efi_get_boot_option(
+ uint16_t id,
+ char **title,
+ sd_id128_t *part_uuid,
+ char **path,
+ bool *active) {
+
+ char boot_id[9];
+ _cleanup_free_ uint8_t *buf = NULL;
+ size_t l;
+ struct boot_option *header;
+ ssize_t title_size;
+ _cleanup_free_ char *s = NULL, *p = NULL;
+ sd_id128_t p_uuid = SD_ID128_NULL;
+ int r;
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ xsprintf(boot_id, "Boot%04X", id);
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, boot_id, NULL, (void **)&buf, &l);
+ if (r < 0)
+ return r;
+ if (l < offsetof(struct boot_option, title))
+ return -ENOENT;
+
+ header = (struct boot_option *)buf;
+ title_size = utf16_size(header->title, l - offsetof(struct boot_option, title));
+ if (title_size < 0)
+ return title_size;
+
+ if (title) {
+ s = utf16_to_utf8(header->title, title_size);
+ if (!s)
+ return -ENOMEM;
+ }
+
+ if (header->path_len > 0) {
+ uint8_t *dbuf;
+ size_t dnext, doff;
+
+ doff = offsetof(struct boot_option, title) + title_size;
+ dbuf = buf + doff;
+ if (header->path_len > l - doff)
+ return -EINVAL;
+
+ dnext = 0;
+ while (dnext < header->path_len) {
+ struct device_path *dpath;
+
+ dpath = (struct device_path *)(dbuf + dnext);
+ if (dpath->length < 4)
+ break;
+
+ /* Type 0x7F – End of Hardware Device Path, Sub-Type 0xFF – End Entire Device Path */
+ if (dpath->type == END_DEVICE_PATH_TYPE && dpath->sub_type == END_ENTIRE_DEVICE_PATH_SUBTYPE)
+ break;
+
+ dnext += dpath->length;
+
+ /* Type 0x04 – Media Device Path */
+ if (dpath->type != MEDIA_DEVICE_PATH)
+ continue;
+
+ /* Sub-Type 1 – Hard Drive */
+ if (dpath->sub_type == MEDIA_HARDDRIVE_DP) {
+ /* 0x02 – GUID Partition Table */
+ if (dpath->drive.mbr_type != MBR_TYPE_EFI_PARTITION_TABLE_HEADER)
+ continue;
+
+ /* 0x02 – GUID signature */
+ if (dpath->drive.signature_type != SIGNATURE_TYPE_GUID)
+ continue;
+
+ if (part_uuid)
+ efi_guid_to_id128(dpath->drive.signature, &p_uuid);
+ continue;
+ }
+
+ /* Sub-Type 4 – File Path */
+ if (dpath->sub_type == MEDIA_FILEPATH_DP && !p && path) {
+ p = utf16_to_utf8(dpath->path, dpath->length-4);
+ if (!p)
+ return -ENOMEM;
+
+ efi_tilt_backslashes(p);
+ continue;
+ }
+ }
+ }
+
+ if (title)
+ *title = TAKE_PTR(s);
+ if (part_uuid)
+ *part_uuid = p_uuid;
+ if (path)
+ *path = TAKE_PTR(p);
+ if (active)
+ *active = !!(header->attr & LOAD_OPTION_ACTIVE);
+
+ return 0;
+}
+
+static void to_utf16(uint16_t *dest, const char *src) {
+ int i;
+
+ for (i = 0; src[i] != '\0'; i++)
+ dest[i] = src[i];
+ dest[i] = '\0';
+}
+
+static void id128_to_efi_guid(sd_id128_t id, void *guid) {
+ struct guid uuid = {
+ .u1 = id.bytes[0] << 24 | id.bytes[1] << 16 | id.bytes[2] << 8 | id.bytes[3],
+ .u2 = id.bytes[4] << 8 | id.bytes[5],
+ .u3 = id.bytes[6] << 8 | id.bytes[7],
+ };
+ memcpy(uuid.u4, id.bytes+8, sizeof(uuid.u4));
+ memcpy(guid, &uuid, sizeof(uuid));
+}
+
+static uint16_t *tilt_slashes(uint16_t *s) {
+ uint16_t *p;
+
+ for (p = s; *p; p++)
+ if (*p == '/')
+ *p = '\\';
+
+ return s;
+}
+
+int efi_add_boot_option(
+ uint16_t id,
+ const char *title,
+ uint32_t part,
+ uint64_t pstart,
+ uint64_t psize,
+ sd_id128_t part_uuid,
+ const char *path) {
+
+ size_t size, title_len, path_len;
+ _cleanup_free_ char *buf = NULL;
+ struct boot_option *option;
+ struct device_path *devicep;
+ char boot_id[9];
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ title_len = (strlen(title)+1) * 2;
+ path_len = (strlen(path)+1) * 2;
+
+ buf = malloc0(offsetof(struct boot_option, title) + title_len +
+ sizeof(struct drive_path) +
+ sizeof(struct device_path) + path_len);
+ if (!buf)
+ return -ENOMEM;
+
+ /* header */
+ option = (struct boot_option *)buf;
+ option->attr = LOAD_OPTION_ACTIVE;
+ option->path_len = offsetof(struct device_path, drive) + sizeof(struct drive_path) +
+ offsetof(struct device_path, path) + path_len +
+ offsetof(struct device_path, path);
+ to_utf16(option->title, title);
+ size = offsetof(struct boot_option, title) + title_len;
+
+ /* partition info */
+ devicep = (struct device_path *)(buf + size);
+ devicep->type = MEDIA_DEVICE_PATH;
+ devicep->sub_type = MEDIA_HARDDRIVE_DP;
+ devicep->length = offsetof(struct device_path, drive) + sizeof(struct drive_path);
+ memcpy(&devicep->drive.part_nr, &part, sizeof(uint32_t));
+ memcpy(&devicep->drive.part_start, &pstart, sizeof(uint64_t));
+ memcpy(&devicep->drive.part_size, &psize, sizeof(uint64_t));
+ id128_to_efi_guid(part_uuid, devicep->drive.signature);
+ devicep->drive.mbr_type = MBR_TYPE_EFI_PARTITION_TABLE_HEADER;
+ devicep->drive.signature_type = SIGNATURE_TYPE_GUID;
+ size += devicep->length;
+
+ /* path to loader */
+ devicep = (struct device_path *)(buf + size);
+ devicep->type = MEDIA_DEVICE_PATH;
+ devicep->sub_type = MEDIA_FILEPATH_DP;
+ devicep->length = offsetof(struct device_path, path) + path_len;
+ to_utf16(devicep->path, path);
+ tilt_slashes(devicep->path);
+ size += devicep->length;
+
+ /* end of path */
+ devicep = (struct device_path *)(buf + size);
+ devicep->type = END_DEVICE_PATH_TYPE;
+ devicep->sub_type = END_ENTIRE_DEVICE_PATH_SUBTYPE;
+ devicep->length = offsetof(struct device_path, path);
+ size += devicep->length;
+
+ xsprintf(boot_id, "Boot%04X", id);
+ return efi_set_variable(EFI_VENDOR_GLOBAL, boot_id, buf, size);
+}
+
+int efi_remove_boot_option(uint16_t id) {
+ char boot_id[9];
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ xsprintf(boot_id, "Boot%04X", id);
+ return efi_set_variable(EFI_VENDOR_GLOBAL, boot_id, NULL, 0);
+}
+
+int efi_get_boot_order(uint16_t **order) {
+ _cleanup_free_ void *buf = NULL;
+ size_t l;
+ int r;
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, "BootOrder", NULL, &buf, &l);
+ if (r < 0)
+ return r;
+
+ if (l <= 0)
+ return -ENOENT;
+
+ if (l % sizeof(uint16_t) > 0 ||
+ l / sizeof(uint16_t) > INT_MAX)
+ return -EINVAL;
+
+ *order = TAKE_PTR(buf);
+ return (int) (l / sizeof(uint16_t));
+}
+
+int efi_set_boot_order(uint16_t *order, size_t n) {
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ return efi_set_variable(EFI_VENDOR_GLOBAL, "BootOrder", order, n * sizeof(uint16_t));
+}
+
+static int boot_id_hex(const char s[static 4]) {
+ int id = 0, i;
+
+ for (i = 0; i < 4; i++)
+ if (s[i] >= '0' && s[i] <= '9')
+ id |= (s[i] - '0') << (3 - i) * 4;
+ else if (s[i] >= 'A' && s[i] <= 'F')
+ id |= (s[i] - 'A' + 10) << (3 - i) * 4;
+ else
+ return -EINVAL;
+
+ return id;
+}
+
+static int cmp_uint16(const uint16_t *a, const uint16_t *b) {
+ return CMP(*a, *b);
+}
+
+int efi_get_boot_options(uint16_t **options) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ _cleanup_free_ uint16_t *list = NULL;
+ struct dirent *de;
+ size_t alloc = 0;
+ int count = 0;
+
+ assert(options);
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ dir = opendir("/sys/firmware/efi/efivars/");
+ if (!dir)
+ return -errno;
+
+ FOREACH_DIRENT(de, dir, return -errno) {
+ int id;
+
+ if (strncmp(de->d_name, "Boot", 4) != 0)
+ continue;
+
+ if (strlen(de->d_name) != 45)
+ continue;
+
+ if (strcmp(de->d_name + 8, "-8be4df61-93ca-11d2-aa0d-00e098032b8c") != 0)
+ continue;
+
+ id = boot_id_hex(de->d_name + 4);
+ if (id < 0)
+ continue;
+
+ if (!GREEDY_REALLOC(list, alloc, count + 1))
+ return -ENOMEM;
+
+ list[count++] = id;
+ }
+
+ typesafe_qsort(list, count, cmp_uint16);
+
+ *options = TAKE_PTR(list);
+
+ return count;
+}
+
+static int read_usec(sd_id128_t vendor, const char *name, usec_t *u) {
+ _cleanup_free_ char *j = NULL;
+ int r;
+ uint64_t x = 0;
+
+ assert(name);
+ assert(u);
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, name, &j);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(j, &x);
+ if (r < 0)
+ return r;
+
+ *u = x;
+ return 0;
+}
+
+int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader) {
+ uint64_t x, y;
+ int r;
+
+ assert(firmware);
+ assert(loader);
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = read_usec(EFI_VENDOR_LOADER, "LoaderTimeInitUSec", &x);
+ if (r < 0)
+ return r;
+
+ r = read_usec(EFI_VENDOR_LOADER, "LoaderTimeExecUSec", &y);
+ if (r < 0)
+ return r;
+
+ if (y == 0 || y < x)
+ return -EIO;
+
+ if (y > USEC_PER_HOUR)
+ return -EIO;
+
+ *firmware = x;
+ *loader = y;
+
+ return 0;
+}
+
+int efi_loader_get_device_part_uuid(sd_id128_t *u) {
+ _cleanup_free_ char *p = NULL;
+ int r, parsed[16];
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderDevicePartUUID", &p);
+ if (r < 0)
+ return r;
+
+ if (sscanf(p, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ &parsed[0], &parsed[1], &parsed[2], &parsed[3],
+ &parsed[4], &parsed[5], &parsed[6], &parsed[7],
+ &parsed[8], &parsed[9], &parsed[10], &parsed[11],
+ &parsed[12], &parsed[13], &parsed[14], &parsed[15]) != 16)
+ return -EIO;
+
+ if (u) {
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(parsed); i++)
+ u->bytes[i] = parsed[i];
+ }
+
+ return 0;
+}
+
+bool efi_loader_entry_name_valid(const char *s) {
+ if (isempty(s))
+ return false;
+
+ if (strlen(s) > FILENAME_MAX) /* Make sure entry names fit in filenames */
+ return false;
+
+ return in_charset(s, ALPHANUMERICAL "-");
+}
+
+int efi_loader_get_entries(char ***ret) {
+ _cleanup_free_ char16_t *entries = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ size_t size, i, start;
+ int r;
+
+ assert(ret);
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderEntries", NULL, (void**) &entries, &size);
+ if (r < 0)
+ return r;
+
+ /* The variable contains a series of individually NUL terminated UTF-16 strings. */
+
+ for (i = 0, start = 0;; i++) {
+ _cleanup_free_ char *decoded = NULL;
+ bool end;
+
+ /* Is this the end of the variable's data? */
+ end = i * sizeof(char16_t) >= size;
+
+ /* Are we in the middle of a string? (i.e. not at the end of the variable, nor at a NUL terminator?) If
+ * so, let's go to the next entry. */
+ if (!end && entries[i] != 0)
+ continue;
+
+ /* We reached the end of a string, let's decode it into UTF-8 */
+ decoded = utf16_to_utf8(entries + start, (i - start) * sizeof(char16_t));
+ if (!decoded)
+ return -ENOMEM;
+
+ if (efi_loader_entry_name_valid(decoded)) {
+ r = strv_consume(&l, TAKE_PTR(decoded));
+ if (r < 0)
+ return r;
+ } else
+ log_debug("Ignoring invalid loader entry '%s'.", decoded);
+
+ /* We reached the end of the variable */
+ if (end)
+ break;
+
+ /* Continue after the NUL byte */
+ start = i + 1;
+ }
+
+ *ret = TAKE_PTR(l);
+ return 0;
+}
+
+int efi_loader_get_features(uint64_t *ret) {
+ _cleanup_free_ void *v = NULL;
+ size_t s;
+ int r;
+
+ if (!is_efi_boot()) {
+ *ret = 0;
+ return 0;
+ }
+
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderFeatures", NULL, &v, &s);
+ if (r == -ENOENT) {
+ _cleanup_free_ char *info = NULL;
+
+ /* The new (v240+) LoaderFeatures variable is not supported, let's see if it's systemd-boot at all */
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderInfo", &info);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return r;
+
+ /* Variable not set, definitely means not systemd-boot */
+
+ } else if (first_word(info, "systemd-boot")) {
+
+ /* An older systemd-boot version. Let's hardcode the feature set, since it was pretty
+ * static in all its versions. */
+
+ *ret = EFI_LOADER_FEATURE_CONFIG_TIMEOUT |
+ EFI_LOADER_FEATURE_ENTRY_DEFAULT |
+ EFI_LOADER_FEATURE_ENTRY_ONESHOT;
+
+ return 0;
+ }
+
+ /* No features supported */
+ *ret = 0;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ if (s != sizeof(uint64_t))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "LoaderFeatures EFI variable doesn't have the right size.");
+
+ memcpy(ret, v, sizeof(uint64_t));
+ return 0;
+}
+
+#endif
+
+char *efi_tilt_backslashes(char *s) {
+ char *p;
+
+ for (p = s; *p; p++)
+ if (*p == '\\')
+ *p = '/';
+
+ return s;
+}
diff --git a/src/shared/efivars.h b/src/shared/efivars.h
new file mode 100644
index 0000000..92670c8
--- /dev/null
+++ b/src/shared/efivars.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if ! ENABLE_EFI
+#include <errno.h>
+#endif
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "sd-id128.h"
+
+#include "time-util.h"
+
+#define EFI_VENDOR_LOADER SD_ID128_MAKE(4a,67,b0,82,0a,4c,41,cf,b6,c7,44,0b,29,bb,8c,4f)
+#define EFI_VENDOR_GLOBAL SD_ID128_MAKE(8b,e4,df,61,93,ca,11,d2,aa,0d,00,e0,98,03,2b,8c)
+#define EFI_VARIABLE_NON_VOLATILE 0x0000000000000001
+#define EFI_VARIABLE_BOOTSERVICE_ACCESS 0x0000000000000002
+#define EFI_VARIABLE_RUNTIME_ACCESS 0x0000000000000004
+
+#define EFI_LOADER_FEATURE_CONFIG_TIMEOUT (UINT64_C(1) << 0)
+#define EFI_LOADER_FEATURE_CONFIG_TIMEOUT_ONE_SHOT (UINT64_C(1) << 1)
+#define EFI_LOADER_FEATURE_ENTRY_DEFAULT (UINT64_C(1) << 2)
+#define EFI_LOADER_FEATURE_ENTRY_ONESHOT (UINT64_C(1) << 3)
+#define EFI_LOADER_FEATURE_BOOT_COUNTING (UINT64_C(1) << 4)
+
+#if ENABLE_EFI
+
+bool is_efi_boot(void);
+bool is_efi_secure_boot(void);
+bool is_efi_secure_boot_setup_mode(void);
+int efi_reboot_to_firmware_supported(void);
+int efi_get_reboot_to_firmware(void);
+int efi_set_reboot_to_firmware(bool value);
+
+int efi_get_variable(sd_id128_t vendor, const char *name, uint32_t *attribute, void **value, size_t *size);
+int efi_get_variable_string(sd_id128_t vendor, const char *name, char **p);
+int efi_set_variable(sd_id128_t vendor, const char *name, const void *value, size_t size);
+int efi_set_variable_string(sd_id128_t vendor, const char *name, const char *p);
+
+int efi_get_boot_option(uint16_t nr, char **title, sd_id128_t *part_uuid, char **path, bool *active);
+int efi_add_boot_option(uint16_t id, const char *title, uint32_t part, uint64_t pstart, uint64_t psize, sd_id128_t part_uuid, const char *path);
+int efi_remove_boot_option(uint16_t id);
+int efi_get_boot_order(uint16_t **order);
+int efi_set_boot_order(uint16_t *order, size_t n);
+int efi_get_boot_options(uint16_t **options);
+
+int efi_loader_get_device_part_uuid(sd_id128_t *u);
+int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader);
+
+int efi_loader_get_entries(char ***ret);
+
+bool efi_loader_entry_name_valid(const char *s);
+
+int efi_loader_get_features(uint64_t *ret);
+
+#else
+
+static inline bool is_efi_boot(void) {
+ return false;
+}
+
+static inline bool is_efi_secure_boot(void) {
+ return false;
+}
+
+static inline bool is_efi_secure_boot_setup_mode(void) {
+ return false;
+}
+
+static inline int efi_reboot_to_firmware_supported(void) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_reboot_to_firmware(void) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_reboot_to_firmware(bool value) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_variable(sd_id128_t vendor, const char *name, uint32_t *attribute, void **value, size_t *size) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_variable_string(sd_id128_t vendor, const char *name, char **p) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_variable(sd_id128_t vendor, const char *name, const void *value, size_t size) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_variable_string(sd_id128_t vendor, const char *name, const char *p) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_boot_option(uint16_t nr, char **title, sd_id128_t *part_uuid, char **path, bool *active) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_add_boot_option(uint16_t id, const char *title, uint32_t part, uint64_t pstart, uint64_t psize, sd_id128_t part_uuid, const char *path) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_remove_boot_option(uint16_t id) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_boot_order(uint16_t **order) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_boot_order(uint16_t *order, size_t n) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_boot_options(uint16_t **options) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_device_part_uuid(sd_id128_t *u) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_entries(char ***ret) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_features(uint64_t *ret) {
+ return -EOPNOTSUPP;
+}
+
+#endif
+
+char *efi_tilt_backslashes(char *s);
diff --git a/src/shared/enable-mempool.c b/src/shared/enable-mempool.c
new file mode 100644
index 0000000..a571b43
--- /dev/null
+++ b/src/shared/enable-mempool.c
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "mempool.h"
+
+const bool mempool_use_allowed = true;
diff --git a/src/shared/env-file-label.c b/src/shared/env-file-label.c
new file mode 100644
index 0000000..add68d2
--- /dev/null
+++ b/src/shared/env-file-label.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/stat.h>
+
+#include "env-file-label.h"
+#include "env-file.h"
+#include "selinux-util.h"
+
+int write_env_file_label(const char *fname, char **l) {
+ int r;
+
+ r = mac_selinux_create_file_prepare(fname, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = write_env_file(fname, l);
+
+ mac_selinux_create_file_clear();
+
+ return r;
+}
diff --git a/src/shared/env-file-label.h b/src/shared/env-file-label.h
new file mode 100644
index 0000000..158fc4e
--- /dev/null
+++ b/src/shared/env-file-label.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* These functions are split out of fileio.h (and not for example just flags to the functions they wrap) in order to
+ * optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but not
+ * for all */
+
+int write_env_file_label(const char *fname, char **l);
diff --git a/src/shared/exec-util.c b/src/shared/exec-util.c
new file mode 100644
index 0000000..17a278a
--- /dev/null
+++ b/src/shared/exec-util.c
@@ -0,0 +1,353 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "exec-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "serialize.h"
+#include "set.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+/* Put this test here for a lack of better place */
+assert_cc(EAGAIN == EWOULDBLOCK);
+
+static int do_spawn(const char *path, char *argv[], int stdout_fd, pid_t *pid) {
+
+ pid_t _pid;
+ int r;
+
+ if (null_or_empty_path(path)) {
+ log_debug("%s is empty (a mask).", path);
+ return 0;
+ }
+
+ r = safe_fork("(direxec)", FORK_DEATHSIG|FORK_LOG, &_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ char *_argv[2];
+
+ if (stdout_fd >= 0) {
+ r = rearrange_stdio(STDIN_FILENO, stdout_fd, STDERR_FILENO);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+ }
+
+ (void) rlimit_nofile_safe();
+
+ if (!argv) {
+ _argv[0] = (char*) path;
+ _argv[1] = NULL;
+ argv = _argv;
+ } else
+ argv[0] = (char*) path;
+
+ execv(path, argv);
+ log_error_errno(errno, "Failed to execute %s: %m", path);
+ _exit(EXIT_FAILURE);
+ }
+
+ *pid = _pid;
+ return 1;
+}
+
+static int do_execute(
+ char **directories,
+ usec_t timeout,
+ gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX],
+ void* const callback_args[_STDOUT_CONSUME_MAX],
+ int output_fd,
+ char *argv[],
+ char *envp[]) {
+
+ _cleanup_hashmap_free_free_ Hashmap *pids = NULL;
+ _cleanup_strv_free_ char **paths = NULL;
+ char **path, **e;
+ int r;
+
+ /* We fork this all off from a child process so that we can somewhat cleanly make
+ * use of SIGALRM to set a time limit.
+ *
+ * If callbacks is nonnull, execution is serial. Otherwise, we default to parallel.
+ */
+
+ r = conf_files_list_strv(&paths, NULL, NULL, CONF_FILES_EXECUTABLE|CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, (const char* const*) directories);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate executables: %m");
+
+ if (!callbacks) {
+ pids = hashmap_new(NULL);
+ if (!pids)
+ return log_oom();
+ }
+
+ /* Abort execution of this process after the timout. We simply rely on SIGALRM as
+ * default action terminating the process, and turn on alarm(). */
+
+ if (timeout != USEC_INFINITY)
+ alarm(DIV_ROUND_UP(timeout, USEC_PER_SEC));
+
+ STRV_FOREACH(e, envp)
+ if (putenv(*e) != 0)
+ return log_error_errno(errno, "Failed to set environment variable: %m");
+
+ STRV_FOREACH(path, paths) {
+ _cleanup_free_ char *t = NULL;
+ _cleanup_close_ int fd = -1;
+ pid_t pid;
+
+ t = strdup(*path);
+ if (!t)
+ return log_oom();
+
+ if (callbacks) {
+ fd = open_serialization_fd(basename(*path));
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open serialization file: %m");
+ }
+
+ r = do_spawn(t, argv, fd, &pid);
+ if (r <= 0)
+ continue;
+
+ if (pids) {
+ r = hashmap_put(pids, PID_TO_PTR(pid), t);
+ if (r < 0)
+ return log_oom();
+ t = NULL;
+ } else {
+ r = wait_for_terminate_and_check(t, pid, WAIT_LOG);
+ if (r < 0)
+ continue;
+
+ if (lseek(fd, 0, SEEK_SET) < 0)
+ return log_error_errno(errno, "Failed to seek on serialization fd: %m");
+
+ r = callbacks[STDOUT_GENERATE](fd, callback_args[STDOUT_GENERATE]);
+ fd = -1;
+ if (r < 0)
+ return log_error_errno(r, "Failed to process output from %s: %m", *path);
+ }
+ }
+
+ if (callbacks) {
+ r = callbacks[STDOUT_COLLECT](output_fd, callback_args[STDOUT_COLLECT]);
+ if (r < 0)
+ return log_error_errno(r, "Callback two failed: %m");
+ }
+
+ while (!hashmap_isempty(pids)) {
+ _cleanup_free_ char *t = NULL;
+ pid_t pid;
+
+ pid = PTR_TO_PID(hashmap_first_key(pids));
+ assert(pid > 0);
+
+ t = hashmap_remove(pids, PID_TO_PTR(pid));
+ assert(t);
+
+ (void) wait_for_terminate_and_check(t, pid, WAIT_LOG);
+ }
+
+ return 0;
+}
+
+int execute_directories(
+ const char* const* directories,
+ usec_t timeout,
+ gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX],
+ void* const callback_args[_STDOUT_CONSUME_MAX],
+ char *argv[],
+ char *envp[]) {
+
+ char **dirs = (char**) directories;
+ _cleanup_close_ int fd = -1;
+ char *name;
+ int r;
+
+ assert(!strv_isempty(dirs));
+
+ name = basename(dirs[0]);
+ assert(!isempty(name));
+
+ if (callbacks) {
+ assert(callback_args);
+ assert(callbacks[STDOUT_GENERATE]);
+ assert(callbacks[STDOUT_COLLECT]);
+ assert(callbacks[STDOUT_CONSUME]);
+
+ fd = open_serialization_fd(name);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open serialization file: %m");
+ }
+
+ /* Executes all binaries in the directories serially or in parallel and waits for
+ * them to finish. Optionally a timeout is applied. If a file with the same name
+ * exists in more than one directory, the earliest one wins. */
+
+ r = safe_fork("(sd-executor)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ r = do_execute(dirs, timeout, callbacks, callback_args, fd, argv, envp);
+ _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+ }
+
+ if (!callbacks)
+ return 0;
+
+ if (lseek(fd, 0, SEEK_SET) < 0)
+ return log_error_errno(errno, "Failed to rewind serialization fd: %m");
+
+ r = callbacks[STDOUT_CONSUME](fd, callback_args[STDOUT_CONSUME]);
+ fd = -1;
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse returned data: %m");
+ return 0;
+}
+
+static int gather_environment_generate(int fd, void *arg) {
+ char ***env = arg, **x, **y;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **new = NULL;
+ int r;
+
+ /* Read a series of VAR=value assignments from fd, use them to update the list of
+ * variables in env. Also update the exported environment.
+ *
+ * fd is always consumed, even on error.
+ */
+
+ assert(env);
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ r = load_env_file_pairs(f, NULL, &new);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(x, y, new) {
+ char *p;
+
+ if (!env_name_is_valid(*x)) {
+ log_warning("Invalid variable assignment \"%s=...\", ignoring.", *x);
+ continue;
+ }
+
+ p = strjoin(*x, "=", *y);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_env_replace(env, p);
+ if (r < 0)
+ return r;
+
+ if (setenv(*x, *y, true) < 0)
+ return -errno;
+ }
+
+ return r;
+}
+
+static int gather_environment_collect(int fd, void *arg) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char ***env = arg;
+ int r;
+
+ /* Write out a series of env=cescape(VAR=value) assignments to fd. */
+
+ assert(env);
+
+ f = fdopen(fd, "w");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ r = serialize_strv(f, "env", *env);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int gather_environment_consume(int fd, void *arg) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char ***env = arg;
+ int r = 0;
+
+ /* Read a series of env=cescape(VAR=value) assignments from fd into env. */
+
+ assert(env);
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *v;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &line);
+ if (k < 0)
+ return k;
+ if (k == 0)
+ break;
+
+ v = startswith(line, "env=");
+ if (!v) {
+ log_debug("Serialization line \"%s\" unexpectedly didn't start with \"env=\".", line);
+ if (r == 0)
+ r = -EINVAL;
+
+ continue;
+ }
+
+ k = deserialize_environment(v, env);
+ if (k < 0) {
+ log_debug_errno(k, "Invalid serialization line \"%s\": %m", line);
+
+ if (r == 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+const gather_stdout_callback_t gather_environment[] = {
+ gather_environment_generate,
+ gather_environment_collect,
+ gather_environment_consume,
+};
diff --git a/src/shared/exec-util.h b/src/shared/exec-util.h
new file mode 100644
index 0000000..6ac3c90
--- /dev/null
+++ b/src/shared/exec-util.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+
+typedef int (*gather_stdout_callback_t) (int fd, void *arg);
+
+enum {
+ STDOUT_GENERATE, /* from generators to helper process */
+ STDOUT_COLLECT, /* from helper process to main process */
+ STDOUT_CONSUME, /* process data in main process */
+ _STDOUT_CONSUME_MAX,
+};
+
+int execute_directories(
+ const char* const* directories,
+ usec_t timeout,
+ gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX],
+ void* const callback_args[_STDOUT_CONSUME_MAX],
+ char *argv[],
+ char *envp[]);
+
+extern const gather_stdout_callback_t gather_environment[_STDOUT_CONSUME_MAX];
diff --git a/src/shared/exit-status.c b/src/shared/exit-status.c
new file mode 100644
index 0000000..26b3060
--- /dev/null
+++ b/src/shared/exit-status.c
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <signal.h>
+#include <stdlib.h>
+#include <sysexits.h>
+
+#include "exit-status.h"
+#include "macro.h"
+#include "set.h"
+
+const char* exit_status_to_string(int status, ExitStatusLevel level) {
+
+ /* Exit status ranges:
+ *
+ * 0…1 │ ISO C, EXIT_SUCCESS + EXIT_FAILURE
+ * 2…7 │ LSB exit codes for init scripts
+ * 8…63 │ (Currently unmapped)
+ * 64…78 │ BSD defined exit codes
+ * 79…199 │ (Currently unmapped)
+ * 200…241 │ systemd's private error codes (might be extended to 254 in future development)
+ * 242…254 │ (Currently unmapped, but see above)
+ *
+ * 255 │ EXIT_EXCEPTION (We use this to propagate exit-by-signal events. It's frequently used by others apps (like bash)
+ * │ to indicate exit reason that cannot really be expressed in a single exit status value — such as a propagated
+ * │ signal or such, and we follow that logic here.)
+ */
+
+ switch (status) { /* We always cover the ISO C ones */
+
+ case EXIT_SUCCESS:
+ return "SUCCESS";
+
+ case EXIT_FAILURE:
+ return "FAILURE";
+ }
+
+ if (IN_SET(level, EXIT_STATUS_SYSTEMD, EXIT_STATUS_LSB, EXIT_STATUS_FULL)) {
+ switch (status) { /* Optionally we cover our own ones */
+
+ case EXIT_CHDIR:
+ return "CHDIR";
+
+ case EXIT_NICE:
+ return "NICE";
+
+ case EXIT_FDS:
+ return "FDS";
+
+ case EXIT_EXEC:
+ return "EXEC";
+
+ case EXIT_MEMORY:
+ return "MEMORY";
+
+ case EXIT_LIMITS:
+ return "LIMITS";
+
+ case EXIT_OOM_ADJUST:
+ return "OOM_ADJUST";
+
+ case EXIT_SIGNAL_MASK:
+ return "SIGNAL_MASK";
+
+ case EXIT_STDIN:
+ return "STDIN";
+
+ case EXIT_STDOUT:
+ return "STDOUT";
+
+ case EXIT_CHROOT:
+ return "CHROOT";
+
+ case EXIT_IOPRIO:
+ return "IOPRIO";
+
+ case EXIT_TIMERSLACK:
+ return "TIMERSLACK";
+
+ case EXIT_SECUREBITS:
+ return "SECUREBITS";
+
+ case EXIT_SETSCHEDULER:
+ return "SETSCHEDULER";
+
+ case EXIT_CPUAFFINITY:
+ return "CPUAFFINITY";
+
+ case EXIT_GROUP:
+ return "GROUP";
+
+ case EXIT_USER:
+ return "USER";
+
+ case EXIT_CAPABILITIES:
+ return "CAPABILITIES";
+
+ case EXIT_CGROUP:
+ return "CGROUP";
+
+ case EXIT_SETSID:
+ return "SETSID";
+
+ case EXIT_CONFIRM:
+ return "CONFIRM";
+
+ case EXIT_STDERR:
+ return "STDERR";
+
+ case EXIT_PAM:
+ return "PAM";
+
+ case EXIT_NETWORK:
+ return "NETWORK";
+
+ case EXIT_NAMESPACE:
+ return "NAMESPACE";
+
+ case EXIT_NO_NEW_PRIVILEGES:
+ return "NO_NEW_PRIVILEGES";
+
+ case EXIT_SECCOMP:
+ return "SECCOMP";
+
+ case EXIT_SELINUX_CONTEXT:
+ return "SELINUX_CONTEXT";
+
+ case EXIT_PERSONALITY:
+ return "PERSONALITY";
+
+ case EXIT_APPARMOR_PROFILE:
+ return "APPARMOR";
+
+ case EXIT_ADDRESS_FAMILIES:
+ return "ADDRESS_FAMILIES";
+
+ case EXIT_RUNTIME_DIRECTORY:
+ return "RUNTIME_DIRECTORY";
+
+ case EXIT_CHOWN:
+ return "CHOWN";
+
+ case EXIT_SMACK_PROCESS_LABEL:
+ return "SMACK_PROCESS_LABEL";
+
+ case EXIT_KEYRING:
+ return "KEYRING";
+
+ case EXIT_STATE_DIRECTORY:
+ return "STATE_DIRECTORY";
+
+ case EXIT_CACHE_DIRECTORY:
+ return "CACHE_DIRECTORY";
+
+ case EXIT_LOGS_DIRECTORY:
+ return "LOGS_DIRECTORY";
+
+ case EXIT_CONFIGURATION_DIRECTORY:
+ return "CONFIGURATION_DIRECTORY";
+
+ case EXIT_EXCEPTION:
+ return "EXCEPTION";
+ }
+ }
+
+ if (IN_SET(level, EXIT_STATUS_LSB, EXIT_STATUS_FULL)) {
+ switch (status) { /* Optionally we support LSB ones */
+
+ case EXIT_INVALIDARGUMENT:
+ return "INVALIDARGUMENT";
+
+ case EXIT_NOTIMPLEMENTED:
+ return "NOTIMPLEMENTED";
+
+ case EXIT_NOPERMISSION:
+ return "NOPERMISSION";
+
+ case EXIT_NOTINSTALLED:
+ return "NOTINSTALLED";
+
+ case EXIT_NOTCONFIGURED:
+ return "NOTCONFIGURED";
+
+ case EXIT_NOTRUNNING:
+ return "NOTRUNNING";
+ }
+ }
+
+ if (level == EXIT_STATUS_FULL) {
+ switch (status) { /* Optionally, we support BSD exit statusses */
+
+ case EX_USAGE:
+ return "USAGE";
+
+ case EX_DATAERR:
+ return "DATAERR";
+
+ case EX_NOINPUT:
+ return "NOINPUT";
+
+ case EX_NOUSER:
+ return "NOUSER";
+
+ case EX_NOHOST:
+ return "NOHOST";
+
+ case EX_UNAVAILABLE:
+ return "UNAVAILABLE";
+
+ case EX_SOFTWARE:
+ return "SOFTWARE";
+
+ case EX_OSERR:
+ return "OSERR";
+
+ case EX_OSFILE:
+ return "OSFILE";
+
+ case EX_CANTCREAT:
+ return "CANTCREAT";
+
+ case EX_IOERR:
+ return "IOERR";
+
+ case EX_TEMPFAIL:
+ return "TEMPFAIL";
+
+ case EX_PROTOCOL:
+ return "PROTOCOL";
+
+ case EX_NOPERM:
+ return "NOPERM";
+
+ case EX_CONFIG:
+ return "CONFIG";
+ }
+ }
+
+ return NULL;
+}
+
+bool is_clean_exit(int code, int status, ExitClean clean, ExitStatusSet *success_status) {
+
+ if (code == CLD_EXITED)
+ return status == 0 ||
+ (success_status &&
+ set_contains(success_status->status, INT_TO_PTR(status)));
+
+ /* If a daemon does not implement handlers for some of the signals that's not considered an unclean shutdown */
+ if (code == CLD_KILLED)
+ return
+ (clean == EXIT_CLEAN_DAEMON && IN_SET(status, SIGHUP, SIGINT, SIGTERM, SIGPIPE)) ||
+ (success_status &&
+ set_contains(success_status->signal, INT_TO_PTR(status)));
+
+ return false;
+}
+
+void exit_status_set_free(ExitStatusSet *x) {
+ assert(x);
+
+ x->status = set_free(x->status);
+ x->signal = set_free(x->signal);
+}
+
+bool exit_status_set_is_empty(ExitStatusSet *x) {
+ if (!x)
+ return true;
+
+ return set_isempty(x->status) && set_isempty(x->signal);
+}
+
+bool exit_status_set_test(ExitStatusSet *x, int code, int status) {
+
+ if (exit_status_set_is_empty(x))
+ return false;
+
+ if (code == CLD_EXITED && set_contains(x->status, INT_TO_PTR(status)))
+ return true;
+
+ if (IN_SET(code, CLD_KILLED, CLD_DUMPED) && set_contains(x->signal, INT_TO_PTR(status)))
+ return true;
+
+ return false;
+}
diff --git a/src/shared/exit-status.h b/src/shared/exit-status.h
new file mode 100644
index 0000000..510eb31
--- /dev/null
+++ b/src/shared/exit-status.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+#include "set.h"
+
+/* This defines pretty names for the LSB 'start' verb exit codes. Note that they shouldn't be confused with the LSB
+ * 'status' verb exit codes which are defined very differently. For details see:
+ *
+ * https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html
+ */
+
+enum {
+ /* EXIT_SUCCESS defined by libc */
+ /* EXIT_FAILURE defined by libc */
+ EXIT_INVALIDARGUMENT = 2,
+ EXIT_NOTIMPLEMENTED = 3,
+ EXIT_NOPERMISSION = 4,
+ EXIT_NOTINSTALLED = 5,
+ EXIT_NOTCONFIGURED = 6,
+ EXIT_NOTRUNNING = 7,
+
+ /* BSD's sysexits.h defines a couple EX_xyz exit codes in the range 64 … 78 */
+
+ /* The LSB suggests that error codes >= 200 are "reserved". We use them here under the assumption that they
+ * hence are unused by init scripts. */
+ EXIT_CHDIR = 200,
+ EXIT_NICE,
+ EXIT_FDS,
+ EXIT_EXEC,
+ EXIT_MEMORY,
+ EXIT_LIMITS,
+ EXIT_OOM_ADJUST,
+ EXIT_SIGNAL_MASK,
+ EXIT_STDIN,
+ EXIT_STDOUT,
+ EXIT_CHROOT, /* 210 */
+ EXIT_IOPRIO,
+ EXIT_TIMERSLACK,
+ EXIT_SECUREBITS,
+ EXIT_SETSCHEDULER,
+ EXIT_CPUAFFINITY,
+ EXIT_GROUP,
+ EXIT_USER,
+ EXIT_CAPABILITIES,
+ EXIT_CGROUP,
+ EXIT_SETSID, /* 220 */
+ EXIT_CONFIRM,
+ EXIT_STDERR,
+ _EXIT_RESERVED, /* used to be tcpwrap, don't reuse! */
+ EXIT_PAM,
+ EXIT_NETWORK,
+ EXIT_NAMESPACE,
+ EXIT_NO_NEW_PRIVILEGES,
+ EXIT_SECCOMP,
+ EXIT_SELINUX_CONTEXT,
+ EXIT_PERSONALITY, /* 230 */
+ EXIT_APPARMOR_PROFILE,
+ EXIT_ADDRESS_FAMILIES,
+ EXIT_RUNTIME_DIRECTORY,
+ _EXIT_RESERVED2, /* used to be used by kdbus, don't reuse */
+ EXIT_CHOWN,
+ EXIT_SMACK_PROCESS_LABEL,
+ EXIT_KEYRING,
+ EXIT_STATE_DIRECTORY,
+ EXIT_CACHE_DIRECTORY,
+ EXIT_LOGS_DIRECTORY, /* 240 */
+ EXIT_CONFIGURATION_DIRECTORY,
+
+ EXIT_EXCEPTION = 255, /* Whenever we want to propagate an abnormal/signal exit, in line with bash */
+};
+
+typedef enum ExitStatusLevel {
+ EXIT_STATUS_MINIMAL, /* only cover libc EXIT_STATUS/EXIT_FAILURE */
+ EXIT_STATUS_SYSTEMD, /* cover libc and systemd's own exit codes */
+ EXIT_STATUS_LSB, /* cover libc, systemd's own and LSB exit codes */
+ EXIT_STATUS_FULL, /* cover libc, systemd's own, LSB and BSD (EX_xyz) exit codes */
+} ExitStatusLevel;
+
+typedef struct ExitStatusSet {
+ Set *status;
+ Set *signal;
+} ExitStatusSet;
+
+const char* exit_status_to_string(int status, ExitStatusLevel level) _const_;
+
+typedef enum ExitClean {
+ EXIT_CLEAN_DAEMON,
+ EXIT_CLEAN_COMMAND,
+} ExitClean;
+
+bool is_clean_exit(int code, int status, ExitClean clean, ExitStatusSet *success_status);
+
+void exit_status_set_free(ExitStatusSet *x);
+bool exit_status_set_is_empty(ExitStatusSet *x);
+bool exit_status_set_test(ExitStatusSet *x, int code, int status);
diff --git a/src/shared/fdset.c b/src/shared/fdset.c
new file mode 100644
index 0000000..5d27732
--- /dev/null
+++ b/src/shared/fdset.c
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fdset.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+
+#define MAKE_SET(s) ((Set*) s)
+#define MAKE_FDSET(s) ((FDSet*) s)
+
+FDSet *fdset_new(void) {
+ return MAKE_FDSET(set_new(NULL));
+}
+
+int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds) {
+ size_t i;
+ FDSet *s;
+ int r;
+
+ assert(ret);
+
+ s = fdset_new();
+ if (!s)
+ return -ENOMEM;
+
+ for (i = 0; i < n_fds; i++) {
+
+ r = fdset_put(s, fds[i]);
+ if (r < 0) {
+ set_free(MAKE_SET(s));
+ return r;
+ }
+ }
+
+ *ret = s;
+ return 0;
+}
+
+FDSet* fdset_free(FDSet *s) {
+ void *p;
+
+ while ((p = set_steal_first(MAKE_SET(s)))) {
+ /* Valgrind's fd might have ended up in this set here,
+ * due to fdset_new_fill(). We'll ignore all failures
+ * here, so that the EBADFD that valgrind will return
+ * us on close() doesn't influence us */
+
+ /* When reloading duplicates of the private bus
+ * connection fds and suchlike are closed here, which
+ * has no effect at all, since they are only
+ * duplicates. So don't be surprised about these log
+ * messages. */
+
+ log_debug("Closing left-over fd %i", PTR_TO_FD(p));
+ close_nointr(PTR_TO_FD(p));
+ }
+
+ set_free(MAKE_SET(s));
+ return NULL;
+}
+
+int fdset_put(FDSet *s, int fd) {
+ assert(s);
+ assert(fd >= 0);
+
+ return set_put(MAKE_SET(s), FD_TO_PTR(fd));
+}
+
+int fdset_put_dup(FDSet *s, int fd) {
+ int copy, r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ r = fdset_put(s, copy);
+ if (r < 0) {
+ safe_close(copy);
+ return r;
+ }
+
+ return copy;
+}
+
+bool fdset_contains(FDSet *s, int fd) {
+ assert(s);
+ assert(fd >= 0);
+
+ return !!set_get(MAKE_SET(s), FD_TO_PTR(fd));
+}
+
+int fdset_remove(FDSet *s, int fd) {
+ assert(s);
+ assert(fd >= 0);
+
+ return set_remove(MAKE_SET(s), FD_TO_PTR(fd)) ? fd : -ENOENT;
+}
+
+int fdset_new_fill(FDSet **_s) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+ FDSet *s;
+
+ assert(_s);
+
+ /* Creates an fdset and fills in all currently open file
+ * descriptors. */
+
+ d = opendir("/proc/self/fd");
+ if (!d)
+ return -errno;
+
+ s = fdset_new();
+ if (!s) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ int fd = -1;
+
+ r = safe_atoi(de->d_name, &fd);
+ if (r < 0)
+ goto finish;
+
+ if (fd < 3)
+ continue;
+
+ if (fd == dirfd(d))
+ continue;
+
+ r = fdset_put(s, fd);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = 0;
+ *_s = TAKE_PTR(s);
+
+finish:
+ /* We won't close the fds here! */
+ if (s)
+ set_free(MAKE_SET(s));
+
+ return r;
+}
+
+int fdset_cloexec(FDSet *fds, bool b) {
+ Iterator i;
+ void *p;
+ int r;
+
+ assert(fds);
+
+ SET_FOREACH(p, MAKE_SET(fds), i) {
+ r = fd_cloexec(PTR_TO_FD(p), b);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int fdset_new_listen_fds(FDSet **_s, bool unset) {
+ int n, fd, r;
+ FDSet *s;
+
+ assert(_s);
+
+ /* Creates an fdset and fills in all passed file descriptors */
+
+ s = fdset_new();
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n = sd_listen_fds(unset);
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd ++) {
+ r = fdset_put(s, fd);
+ if (r < 0)
+ goto fail;
+ }
+
+ *_s = s;
+ return 0;
+
+fail:
+ if (s)
+ set_free(MAKE_SET(s));
+
+ return r;
+}
+
+int fdset_close_others(FDSet *fds) {
+ void *e;
+ Iterator i;
+ int *a = NULL;
+ size_t j = 0, m;
+
+ m = fdset_size(fds);
+
+ if (m > 0) {
+ a = newa(int, m);
+ SET_FOREACH(e, MAKE_SET(fds), i)
+ a[j++] = PTR_TO_FD(e);
+ }
+
+ assert(j == m);
+
+ return close_all_fds(a, j);
+}
+
+unsigned fdset_size(FDSet *fds) {
+ return set_size(MAKE_SET(fds));
+}
+
+bool fdset_isempty(FDSet *fds) {
+ return set_isempty(MAKE_SET(fds));
+}
+
+int fdset_iterate(FDSet *s, Iterator *i) {
+ void *p;
+
+ if (!set_iterate(MAKE_SET(s), i, &p))
+ return -ENOENT;
+
+ return PTR_TO_FD(p);
+}
+
+int fdset_steal_first(FDSet *fds) {
+ void *p;
+
+ p = set_steal_first(MAKE_SET(fds));
+ if (!p)
+ return -ENOENT;
+
+ return PTR_TO_FD(p);
+}
diff --git a/src/shared/fdset.h b/src/shared/fdset.h
new file mode 100644
index 0000000..d31062b
--- /dev/null
+++ b/src/shared/fdset.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+#include "set.h"
+
+typedef struct FDSet FDSet;
+
+FDSet* fdset_new(void);
+FDSet* fdset_free(FDSet *s);
+
+int fdset_put(FDSet *s, int fd);
+int fdset_put_dup(FDSet *s, int fd);
+
+bool fdset_contains(FDSet *s, int fd);
+int fdset_remove(FDSet *s, int fd);
+
+int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds);
+int fdset_new_fill(FDSet **ret);
+int fdset_new_listen_fds(FDSet **ret, bool unset);
+
+int fdset_cloexec(FDSet *fds, bool b);
+
+int fdset_close_others(FDSet *fds);
+
+unsigned fdset_size(FDSet *fds);
+bool fdset_isempty(FDSet *fds);
+
+int fdset_iterate(FDSet *s, Iterator *i);
+
+int fdset_steal_first(FDSet *fds);
+
+#define FDSET_FOREACH(fd, fds, i) \
+ for ((i) = ITERATOR_FIRST, (fd) = fdset_iterate((fds), &(i)); (fd) >= 0; (fd) = fdset_iterate((fds), &(i)))
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FDSet*, fdset_free);
+#define _cleanup_fdset_free_ _cleanup_(fdset_freep)
diff --git a/src/shared/fileio-label.c b/src/shared/fileio-label.c
new file mode 100644
index 0000000..49ab297
--- /dev/null
+++ b/src/shared/fileio-label.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/stat.h>
+
+#include "fileio-label.h"
+#include "fileio.h"
+#include "selinux-util.h"
+
+int write_string_file_atomic_label_ts(const char *fn, const char *line, struct timespec *ts) {
+ int r;
+
+ r = mac_selinux_create_file_prepare(fn, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = write_string_file_ts(fn, line, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC, ts);
+
+ mac_selinux_create_file_clear();
+
+ return r;
+}
+
+int create_shutdown_run_nologin_or_warn(void) {
+ int r;
+
+ /* This is used twice: once in systemd-user-sessions.service, in order to block logins when we actually go
+ * down, and once in systemd-logind.service when shutdowns are scheduled, and logins are to be turned off a bit
+ * in advance. We use the same wording of the message in both cases. */
+
+ r = write_string_file_atomic_label("/run/nologin",
+ "System is going down. Unprivileged users are not permitted to log in anymore. "
+ "For technical details, see pam_nologin(8).");
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/nologin: %m");
+
+ return 0;
+}
diff --git a/src/shared/fileio-label.h b/src/shared/fileio-label.h
new file mode 100644
index 0000000..8f88955
--- /dev/null
+++ b/src/shared/fileio-label.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+/* These functions are split out of fileio.h (and not for example just flags to the functions they wrap) in order to
+ * optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but not
+ * for all */
+
+int write_string_file_atomic_label_ts(const char *fn, const char *line, struct timespec *ts);
+static inline int write_string_file_atomic_label(const char *fn, const char *line) {
+ return write_string_file_atomic_label_ts(fn, line, NULL);
+}
+
+int create_shutdown_run_nologin_or_warn(void);
diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c
new file mode 100644
index 0000000..cba52fb
--- /dev/null
+++ b/src/shared/firewall-util.c
@@ -0,0 +1,350 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+/* Temporary work-around for broken glibc vs. linux kernel header definitions
+ * This is already fixed upstream, remove this when distributions have updated.
+ */
+#define _NET_IF_H 1
+
+#include <alloca.h>
+#include <arpa/inet.h>
+#include <endian.h>
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+#include <linux/if.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/nf_nat.h>
+#include <linux/netfilter/xt_addrtype.h>
+#include <libiptc/libiptc.h>
+
+#include "alloc-util.h"
+#include "firewall-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "socket-util.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct xtc_handle*, iptc_free);
+
+static int entry_fill_basics(
+ struct ipt_entry *entry,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen) {
+
+ assert(entry);
+
+ if (out_interface && !ifname_valid(out_interface))
+ return -EINVAL;
+ if (in_interface && !ifname_valid(in_interface))
+ return -EINVAL;
+
+ entry->ip.proto = protocol;
+
+ if (in_interface) {
+ size_t l;
+
+ l = strlen(in_interface);
+ assert(l < sizeof entry->ip.iniface);
+ assert(l < sizeof entry->ip.iniface_mask);
+
+ strcpy(entry->ip.iniface, in_interface);
+ memset(entry->ip.iniface_mask, 0xFF, l + 1);
+ }
+ if (source) {
+ entry->ip.src = source->in;
+ in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
+ }
+
+ if (out_interface) {
+ size_t l = strlen(out_interface);
+ assert(l < sizeof entry->ip.outiface);
+ assert(l < sizeof entry->ip.outiface_mask);
+
+ strcpy(entry->ip.outiface, out_interface);
+ memset(entry->ip.outiface_mask, 0xFF, l + 1);
+ }
+ if (destination) {
+ entry->ip.dst = destination->in;
+ in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
+ }
+
+ return 0;
+}
+
+int fw_add_masquerade(
+ bool add,
+ int af,
+ int protocol,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen) {
+
+ _cleanup_(iptc_freep) struct xtc_handle *h = NULL;
+ struct ipt_entry *entry, *mask;
+ struct ipt_entry_target *t;
+ size_t sz;
+ struct nf_nat_ipv4_multi_range_compat *mr;
+ int r;
+
+ if (af != AF_INET)
+ return -EOPNOTSUPP;
+
+ if (!IN_SET(protocol, 0, IPPROTO_TCP, IPPROTO_UDP))
+ return -EOPNOTSUPP;
+
+ h = iptc_init("nat");
+ if (!h)
+ return -errno;
+
+ sz = XT_ALIGN(sizeof(struct ipt_entry)) +
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+
+ /* Put together the entry we want to add or remove */
+ entry = alloca0(sz);
+ entry->next_offset = sz;
+ entry->target_offset = XT_ALIGN(sizeof(struct ipt_entry));
+ r = entry_fill_basics(entry, protocol, NULL, source, source_prefixlen, out_interface, destination, destination_prefixlen);
+ if (r < 0)
+ return r;
+
+ /* Fill in target part */
+ t = ipt_get_target(entry);
+ t->u.target_size =
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+ strncpy(t->u.user.name, "MASQUERADE", sizeof(t->u.user.name));
+ mr = (struct nf_nat_ipv4_multi_range_compat*) t->data;
+ mr->rangesize = 1;
+
+ /* Create a search mask entry */
+ mask = alloca(sz);
+ memset(mask, 0xFF, sz);
+
+ if (add) {
+ if (iptc_check_entry("POSTROUTING", entry, (unsigned char*) mask, h))
+ return 0;
+ if (errno != ENOENT) /* if other error than not existing yet, fail */
+ return -errno;
+
+ if (!iptc_insert_entry("POSTROUTING", entry, 0, h))
+ return -errno;
+ } else {
+ if (!iptc_delete_entry("POSTROUTING", entry, (unsigned char*) mask, h)) {
+ if (errno == ENOENT) /* if it's already gone, all is good! */
+ return 0;
+
+ return -errno;
+ }
+ }
+
+ if (!iptc_commit(h))
+ return -errno;
+
+ return 0;
+}
+
+int fw_add_local_dnat(
+ bool add,
+ int af,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen,
+ uint16_t local_port,
+ const union in_addr_union *remote,
+ uint16_t remote_port,
+ const union in_addr_union *previous_remote) {
+
+ _cleanup_(iptc_freep) struct xtc_handle *h = NULL;
+ struct ipt_entry *entry, *mask;
+ struct ipt_entry_target *t;
+ struct ipt_entry_match *m;
+ struct xt_addrtype_info_v1 *at;
+ struct nf_nat_ipv4_multi_range_compat *mr;
+ size_t sz, msz;
+ int r;
+
+ assert(add || !previous_remote);
+
+ if (af != AF_INET)
+ return -EOPNOTSUPP;
+
+ if (!IN_SET(protocol, IPPROTO_TCP, IPPROTO_UDP))
+ return -EOPNOTSUPP;
+
+ if (local_port <= 0)
+ return -EINVAL;
+
+ if (remote_port <= 0)
+ return -EINVAL;
+
+ h = iptc_init("nat");
+ if (!h)
+ return -errno;
+
+ sz = XT_ALIGN(sizeof(struct ipt_entry)) +
+ XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) +
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+
+ if (protocol == IPPROTO_TCP)
+ msz = XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_tcp));
+ else
+ msz = XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_udp));
+
+ sz += msz;
+
+ /* Fill in basic part */
+ entry = alloca0(sz);
+ entry->next_offset = sz;
+ entry->target_offset =
+ XT_ALIGN(sizeof(struct ipt_entry)) +
+ XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) +
+ msz;
+ r = entry_fill_basics(entry, protocol, in_interface, source, source_prefixlen, NULL, destination, destination_prefixlen);
+ if (r < 0)
+ return r;
+
+ /* Fill in first match */
+ m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)));
+ m->u.match_size = msz;
+ if (protocol == IPPROTO_TCP) {
+ struct xt_tcp *tcp;
+
+ strncpy(m->u.user.name, "tcp", sizeof(m->u.user.name));
+ tcp = (struct xt_tcp*) m->data;
+ tcp->dpts[0] = tcp->dpts[1] = local_port;
+ tcp->spts[0] = 0;
+ tcp->spts[1] = 0xFFFF;
+
+ } else {
+ struct xt_udp *udp;
+
+ strncpy(m->u.user.name, "udp", sizeof(m->u.user.name));
+ udp = (struct xt_udp*) m->data;
+ udp->dpts[0] = udp->dpts[1] = local_port;
+ udp->spts[0] = 0;
+ udp->spts[1] = 0xFFFF;
+ }
+
+ /* Fill in second match */
+ m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)) + msz);
+ m->u.match_size =
+ XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_addrtype_info_v1));
+ strncpy(m->u.user.name, "addrtype", sizeof(m->u.user.name));
+ m->u.user.revision = 1;
+ at = (struct xt_addrtype_info_v1*) m->data;
+ at->dest = XT_ADDRTYPE_LOCAL;
+
+ /* Fill in target part */
+ t = ipt_get_target(entry);
+ t->u.target_size =
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+ strncpy(t->u.user.name, "DNAT", sizeof(t->u.user.name));
+ mr = (struct nf_nat_ipv4_multi_range_compat*) t->data;
+ mr->rangesize = 1;
+ mr->range[0].flags = NF_NAT_RANGE_PROTO_SPECIFIED|NF_NAT_RANGE_MAP_IPS;
+ mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr;
+ if (protocol == IPPROTO_TCP)
+ mr->range[0].min.tcp.port = mr->range[0].max.tcp.port = htons(remote_port);
+ else
+ mr->range[0].min.udp.port = mr->range[0].max.udp.port = htons(remote_port);
+
+ mask = alloca0(sz);
+ memset(mask, 0xFF, sz);
+
+ if (add) {
+ /* Add the PREROUTING rule, if it is missing so far */
+ if (!iptc_check_entry("PREROUTING", entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -EINVAL;
+
+ if (!iptc_insert_entry("PREROUTING", entry, 0, h))
+ return -errno;
+ }
+
+ /* If a previous remote is set, remove its entry */
+ if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
+ mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr;
+
+ if (!iptc_delete_entry("PREROUTING", entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr;
+ }
+
+ /* Add the OUTPUT rule, if it is missing so far */
+ if (!in_interface) {
+
+ /* Don't apply onto loopback addresses */
+ if (!destination) {
+ entry->ip.dst.s_addr = htobe32(0x7F000000);
+ entry->ip.dmsk.s_addr = htobe32(0xFF000000);
+ entry->ip.invflags = IPT_INV_DSTIP;
+ }
+
+ if (!iptc_check_entry("OUTPUT", entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+
+ if (!iptc_insert_entry("OUTPUT", entry, 0, h))
+ return -errno;
+ }
+
+ /* If a previous remote is set, remove its entry */
+ if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
+ mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr;
+
+ if (!iptc_delete_entry("OUTPUT", entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+ }
+ }
+ } else {
+ if (!iptc_delete_entry("PREROUTING", entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ if (!in_interface) {
+ if (!destination) {
+ entry->ip.dst.s_addr = htobe32(0x7F000000);
+ entry->ip.dmsk.s_addr = htobe32(0xFF000000);
+ entry->ip.invflags = IPT_INV_DSTIP;
+ }
+
+ if (!iptc_delete_entry("OUTPUT", entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+ }
+ }
+
+ if (!iptc_commit(h))
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/firewall-util.h b/src/shared/firewall-util.h
new file mode 100644
index 0000000..4fc71da
--- /dev/null
+++ b/src/shared/firewall-util.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "in-addr-util.h"
+
+#if HAVE_LIBIPTC
+
+int fw_add_masquerade(
+ bool add,
+ int af,
+ int protocol,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen);
+
+int fw_add_local_dnat(
+ bool add,
+ int af,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen,
+ uint16_t local_port,
+ const union in_addr_union *remote,
+ uint16_t remote_port,
+ const union in_addr_union *previous_remote);
+
+#else
+
+static inline int fw_add_masquerade(
+ bool add,
+ int af,
+ int protocol,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen) {
+ return -EOPNOTSUPP;
+}
+
+static inline int fw_add_local_dnat(
+ bool add,
+ int af,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen,
+ uint16_t local_port,
+ const union in_addr_union *remote,
+ uint16_t remote_port,
+ const union in_addr_union *previous_remote) {
+ return -EOPNOTSUPP;
+}
+
+#endif
diff --git a/src/shared/format-table.c b/src/shared/format-table.c
new file mode 100644
index 0000000..7d52980
--- /dev/null
+++ b/src/shared/format-table.c
@@ -0,0 +1,1625 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <stdio_ext.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-table.h"
+#include "gunicode.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+
+#define DEFAULT_WEIGHT 100
+
+/*
+ A few notes on implementation details:
+
+ - TableCell is a 'fake' structure, it's just used as data type to pass references to specific cell positions in the
+ table. It can be easily converted to an index number and back.
+
+ - TableData is where the actual data is stored: it encapsulates the data and formatting for a specific cell. It's
+ 'pseudo-immutable' and ref-counted. When a cell's data's formatting is to be changed, we duplicate the object if the
+ ref-counting is larger than 1. Note that TableData and its ref-counting is mostly not visible to the outside. The
+ outside only sees Table and TableCell.
+
+ - The Table object stores a simple one-dimensional array of references to TableData objects, one row after the
+ previous one.
+
+ - There's no special concept of a "row" or "column" in the table, and no special concept of the "header" row. It's all
+ derived from the cell index: we know how many cells are to be stored in a row, and can determine the rest from
+ that. The first row is always the header row. If header display is turned off we simply skip outputting the first
+ row. Also, when sorting rows we always leave the first row where it is, as the header shouldn't move.
+
+ - Note because there's no row and no column object some properties that might be appropriate as row/column properties
+ are exposed as cell properties instead. For example, the "weight" of a column (which is used to determine where to
+ add/remove space preferable when expanding/compressing tables horizontally) is actually made the "weight" of a
+ cell. Given that we usually need it per-column though we will calculate the average across every cell of the column
+ instead.
+
+ - To make things easy, when cells are added without any explicit configured formatting, then we'll copy the formatting
+ from the same cell in the previous cell. This is particularly useful for the "weight" of the cell (see above), as
+ this means setting the weight of the cells of the header row will nicely propagate to all cells in the other rows.
+*/
+
+typedef struct TableData {
+ unsigned n_ref;
+ TableDataType type;
+
+ size_t minimum_width; /* minimum width for the column */
+ size_t maximum_width; /* maximum width for the column */
+ unsigned weight; /* the horizontal weight for this column, in case the table is expanded/compressed */
+ unsigned ellipsize_percent; /* 0 … 100, where to place the ellipsis when compression is needed */
+ unsigned align_percent; /* 0 … 100, where to pad with spaces when expanding is needed. 0: left-aligned, 100: right-aligned */
+
+ bool uppercase; /* Uppercase string on display */
+
+ const char *color; /* ANSI color string to use for this cell. When written to terminal should not move cursor. Will automatically be reset after the cell */
+ char *url; /* A URL to use for a clickable hyperlink */
+ char *formatted; /* A cached textual representation of the cell data, before ellipsation/alignment */
+
+ union {
+ uint8_t data[0]; /* data is generic array */
+ bool boolean;
+ usec_t timestamp;
+ usec_t timespan;
+ uint64_t size;
+ char string[0];
+ uint32_t uint32;
+ uint64_t uint64;
+ int percent; /* we use 'int' as datatype for percent values in order to match the result of parse_percent() */
+ /* … add more here as we start supporting more cell data types … */
+ };
+} TableData;
+
+static size_t TABLE_CELL_TO_INDEX(TableCell *cell) {
+ size_t i;
+
+ assert(cell);
+
+ i = PTR_TO_SIZE(cell);
+ assert(i > 0);
+
+ return i-1;
+}
+
+static TableCell* TABLE_INDEX_TO_CELL(size_t index) {
+ assert(index != (size_t) -1);
+ return SIZE_TO_PTR(index + 1);
+}
+
+struct Table {
+ size_t n_columns;
+ size_t n_cells;
+
+ bool header; /* Whether to show the header row? */
+ size_t width; /* If != (size_t) -1 the width to format this table in */
+
+ TableData **data;
+ size_t n_allocated;
+
+ size_t *display_map; /* List of columns to show (by their index). It's fine if columns are listed multiple times or not at all */
+ size_t n_display_map;
+
+ size_t *sort_map; /* The columns to order rows by, in order of preference. */
+ size_t n_sort_map;
+
+ bool *reverse_map;
+};
+
+Table *table_new_raw(size_t n_columns) {
+ _cleanup_(table_unrefp) Table *t = NULL;
+
+ assert(n_columns > 0);
+
+ t = new(Table, 1);
+ if (!t)
+ return NULL;
+
+ *t = (struct Table) {
+ .n_columns = n_columns,
+ .header = true,
+ .width = (size_t) -1,
+ };
+
+ return TAKE_PTR(t);
+}
+
+Table *table_new_internal(const char *first_header, ...) {
+ _cleanup_(table_unrefp) Table *t = NULL;
+ size_t n_columns = 1;
+ const char *h;
+ va_list ap;
+ int r;
+
+ assert(first_header);
+
+ va_start(ap, first_header);
+ for (;;) {
+ h = va_arg(ap, const char*);
+ if (!h)
+ break;
+
+ n_columns++;
+ }
+ va_end(ap);
+
+ t = table_new_raw(n_columns);
+ if (!t)
+ return NULL;
+
+ va_start(ap, first_header);
+ for (h = first_header; h; h = va_arg(ap, const char*)) {
+ TableCell *cell;
+
+ r = table_add_cell(t, &cell, TABLE_STRING, h);
+ if (r < 0) {
+ va_end(ap);
+ return NULL;
+ }
+
+ /* Make the table header uppercase */
+ r = table_set_uppercase(t, cell, true);
+ if (r < 0) {
+ va_end(ap);
+ return NULL;
+ }
+ }
+ va_end(ap);
+
+ assert(t->n_columns == t->n_cells);
+ return TAKE_PTR(t);
+}
+
+static TableData *table_data_free(TableData *d) {
+ assert(d);
+
+ free(d->formatted);
+ free(d->url);
+
+ return mfree(d);
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(TableData, table_data, table_data_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(TableData*, table_data_unref);
+
+Table *table_unref(Table *t) {
+ size_t i;
+
+ if (!t)
+ return NULL;
+
+ for (i = 0; i < t->n_cells; i++)
+ table_data_unref(t->data[i]);
+
+ free(t->data);
+ free(t->display_map);
+ free(t->sort_map);
+ free(t->reverse_map);
+
+ return mfree(t);
+}
+
+static size_t table_data_size(TableDataType type, const void *data) {
+
+ switch (type) {
+
+ case TABLE_EMPTY:
+ return 0;
+
+ case TABLE_STRING:
+ return strlen(data) + 1;
+
+ case TABLE_BOOLEAN:
+ return sizeof(bool);
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESPAN:
+ return sizeof(usec_t);
+
+ case TABLE_SIZE:
+ case TABLE_UINT64:
+ return sizeof(uint64_t);
+
+ case TABLE_UINT32:
+ return sizeof(uint32_t);
+
+ case TABLE_PERCENT:
+ return sizeof(int);
+
+ default:
+ assert_not_reached("Uh? Unexpected cell type");
+ }
+}
+
+static bool table_data_matches(
+ TableData *d,
+ TableDataType type,
+ const void *data,
+ size_t minimum_width,
+ size_t maximum_width,
+ unsigned weight,
+ unsigned align_percent,
+ unsigned ellipsize_percent) {
+
+ size_t k, l;
+ assert(d);
+
+ if (d->type != type)
+ return false;
+
+ if (d->minimum_width != minimum_width)
+ return false;
+
+ if (d->maximum_width != maximum_width)
+ return false;
+
+ if (d->weight != weight)
+ return false;
+
+ if (d->align_percent != align_percent)
+ return false;
+
+ if (d->ellipsize_percent != ellipsize_percent)
+ return false;
+
+ /* If a color/url/uppercase flag is set, refuse to merge */
+ if (d->color)
+ return false;
+ if (d->url)
+ return false;
+ if (d->uppercase)
+ return false;
+
+ k = table_data_size(type, data);
+ l = table_data_size(d->type, d->data);
+
+ if (k != l)
+ return false;
+
+ return memcmp_safe(data, d->data, l) == 0;
+}
+
+static TableData *table_data_new(
+ TableDataType type,
+ const void *data,
+ size_t minimum_width,
+ size_t maximum_width,
+ unsigned weight,
+ unsigned align_percent,
+ unsigned ellipsize_percent) {
+
+ size_t data_size;
+ TableData *d;
+
+ data_size = table_data_size(type, data);
+
+ d = malloc0(offsetof(TableData, data) + data_size);
+ if (!d)
+ return NULL;
+
+ d->n_ref = 1;
+ d->type = type;
+ d->minimum_width = minimum_width;
+ d->maximum_width = maximum_width;
+ d->weight = weight;
+ d->align_percent = align_percent;
+ d->ellipsize_percent = ellipsize_percent;
+ memcpy_safe(d->data, data, data_size);
+
+ return d;
+}
+
+int table_add_cell_full(
+ Table *t,
+ TableCell **ret_cell,
+ TableDataType type,
+ const void *data,
+ size_t minimum_width,
+ size_t maximum_width,
+ unsigned weight,
+ unsigned align_percent,
+ unsigned ellipsize_percent) {
+
+ _cleanup_(table_data_unrefp) TableData *d = NULL;
+ TableData *p;
+
+ assert(t);
+ assert(type >= 0);
+ assert(type < _TABLE_DATA_TYPE_MAX);
+
+ /* Determine the cell adjacent to the current one, but one row up */
+ if (t->n_cells >= t->n_columns)
+ assert_se(p = t->data[t->n_cells - t->n_columns]);
+ else
+ p = NULL;
+
+ /* If formatting parameters are left unspecified, copy from the previous row */
+ if (minimum_width == (size_t) -1)
+ minimum_width = p ? p->minimum_width : 1;
+
+ if (weight == (unsigned) -1)
+ weight = p ? p->weight : DEFAULT_WEIGHT;
+
+ if (align_percent == (unsigned) -1)
+ align_percent = p ? p->align_percent : 0;
+
+ if (ellipsize_percent == (unsigned) -1)
+ ellipsize_percent = p ? p->ellipsize_percent : 100;
+
+ assert(align_percent <= 100);
+ assert(ellipsize_percent <= 100);
+
+ /* Small optimization: Pretty often adjacent cells in two subsequent lines have the same data and
+ * formatting. Let's see if we can reuse the cell data and ref it once more. */
+
+ if (p && table_data_matches(p, type, data, minimum_width, maximum_width, weight, align_percent, ellipsize_percent))
+ d = table_data_ref(p);
+ else {
+ d = table_data_new(type, data, minimum_width, maximum_width, weight, align_percent, ellipsize_percent);
+ if (!d)
+ return -ENOMEM;
+ }
+
+ if (!GREEDY_REALLOC(t->data, t->n_allocated, MAX(t->n_cells + 1, t->n_columns)))
+ return -ENOMEM;
+
+ if (ret_cell)
+ *ret_cell = TABLE_INDEX_TO_CELL(t->n_cells);
+
+ t->data[t->n_cells++] = TAKE_PTR(d);
+
+ return 0;
+}
+
+int table_dup_cell(Table *t, TableCell *cell) {
+ size_t i;
+
+ assert(t);
+
+ /* Add the data of the specified cell a second time as a new cell to the end. */
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return -ENXIO;
+
+ if (!GREEDY_REALLOC(t->data, t->n_allocated, MAX(t->n_cells + 1, t->n_columns)))
+ return -ENOMEM;
+
+ t->data[t->n_cells++] = table_data_ref(t->data[i]);
+ return 0;
+}
+
+static int table_dedup_cell(Table *t, TableCell *cell) {
+ _cleanup_free_ char *curl = NULL;
+ TableData *nd, *od;
+ size_t i;
+
+ assert(t);
+
+ /* Helper call that ensures the specified cell's data object has a ref count of 1, which we can use before
+ * changing a cell's formatting without effecting every other cell's formatting that shares the same data */
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return -ENXIO;
+
+ assert_se(od = t->data[i]);
+ if (od->n_ref == 1)
+ return 0;
+
+ assert(od->n_ref > 1);
+
+ if (od->url) {
+ curl = strdup(od->url);
+ if (!curl)
+ return -ENOMEM;
+ }
+
+ nd = table_data_new(
+ od->type,
+ od->data,
+ od->minimum_width,
+ od->maximum_width,
+ od->weight,
+ od->align_percent,
+ od->ellipsize_percent);
+ if (!nd)
+ return -ENOMEM;
+
+ nd->color = od->color;
+ nd->url = TAKE_PTR(curl);
+ nd->uppercase = od->uppercase;
+
+ table_data_unref(od);
+ t->data[i] = nd;
+
+ assert(nd->n_ref == 1);
+
+ return 1;
+}
+
+static TableData *table_get_data(Table *t, TableCell *cell) {
+ size_t i;
+
+ assert(t);
+ assert(cell);
+
+ /* Get the data object of the specified cell, or NULL if it doesn't exist */
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return NULL;
+
+ assert(t->data[i]);
+ assert(t->data[i]->n_ref > 0);
+
+ return t->data[i];
+}
+
+int table_set_minimum_width(Table *t, TableCell *cell, size_t minimum_width) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (minimum_width == (size_t) -1)
+ minimum_width = 1;
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->minimum_width = minimum_width;
+ return 0;
+}
+
+int table_set_maximum_width(Table *t, TableCell *cell, size_t maximum_width) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->maximum_width = maximum_width;
+ return 0;
+}
+
+int table_set_weight(Table *t, TableCell *cell, unsigned weight) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (weight == (unsigned) -1)
+ weight = DEFAULT_WEIGHT;
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->weight = weight;
+ return 0;
+}
+
+int table_set_align_percent(Table *t, TableCell *cell, unsigned percent) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (percent == (unsigned) -1)
+ percent = 0;
+
+ assert(percent <= 100);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->align_percent = percent;
+ return 0;
+}
+
+int table_set_ellipsize_percent(Table *t, TableCell *cell, unsigned percent) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (percent == (unsigned) -1)
+ percent = 100;
+
+ assert(percent <= 100);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->ellipsize_percent = percent;
+ return 0;
+}
+
+int table_set_color(Table *t, TableCell *cell, const char *color) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->color = empty_to_null(color);
+ return 0;
+}
+
+int table_set_url(Table *t, TableCell *cell, const char *url) {
+ _cleanup_free_ char *copy = NULL;
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (url) {
+ copy = strdup(url);
+ if (!copy)
+ return -ENOMEM;
+ }
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ return free_and_replace(table_get_data(t, cell)->url, copy);
+}
+
+int table_set_uppercase(Table *t, TableCell *cell, bool b) {
+ TableData *d;
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ assert_se(d = table_get_data(t, cell));
+
+ if (d->uppercase == b)
+ return 0;
+
+ d->formatted = mfree(d->formatted);
+ d->uppercase = b;
+ return 1;
+}
+
+int table_update(Table *t, TableCell *cell, TableDataType type, const void *data) {
+ _cleanup_free_ char *curl = NULL;
+ TableData *nd, *od;
+ size_t i;
+
+ assert(t);
+ assert(cell);
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return -ENXIO;
+
+ assert_se(od = t->data[i]);
+
+ if (od->url) {
+ curl = strdup(od->url);
+ if (!curl)
+ return -ENOMEM;
+ }
+
+ nd = table_data_new(
+ type,
+ data,
+ od->minimum_width,
+ od->maximum_width,
+ od->weight,
+ od->align_percent,
+ od->ellipsize_percent);
+ if (!nd)
+ return -ENOMEM;
+
+ nd->color = od->color;
+ nd->url = TAKE_PTR(curl);
+ nd->uppercase = od->uppercase;
+
+ table_data_unref(od);
+ t->data[i] = nd;
+
+ return 0;
+}
+
+int table_add_many_internal(Table *t, TableDataType first_type, ...) {
+ TableDataType type;
+ va_list ap;
+ int r;
+
+ assert(t);
+ assert(first_type >= 0);
+ assert(first_type < _TABLE_DATA_TYPE_MAX);
+
+ type = first_type;
+
+ va_start(ap, first_type);
+ for (;;) {
+ const void *data;
+ union {
+ uint64_t size;
+ usec_t usec;
+ uint32_t uint32;
+ uint64_t uint64;
+ int percent;
+ bool b;
+ } buffer;
+
+ switch (type) {
+
+ case TABLE_EMPTY:
+ data = NULL;
+ break;
+
+ case TABLE_STRING:
+ data = va_arg(ap, const char *);
+ break;
+
+ case TABLE_BOOLEAN:
+ buffer.b = va_arg(ap, int);
+ data = &buffer.b;
+ break;
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESPAN:
+ buffer.usec = va_arg(ap, usec_t);
+ data = &buffer.usec;
+ break;
+
+ case TABLE_SIZE:
+ buffer.size = va_arg(ap, uint64_t);
+ data = &buffer.size;
+ break;
+
+ case TABLE_UINT32:
+ buffer.uint32 = va_arg(ap, uint32_t);
+ data = &buffer.uint32;
+ break;
+
+ case TABLE_UINT64:
+ buffer.uint64 = va_arg(ap, uint64_t);
+ data = &buffer.uint64;
+ break;
+
+ case TABLE_PERCENT:
+ buffer.percent = va_arg(ap, int);
+ data = &buffer.percent;
+ break;
+
+ case _TABLE_DATA_TYPE_MAX:
+ /* Used as end marker */
+ va_end(ap);
+ return 0;
+
+ default:
+ assert_not_reached("Uh? Unexpected data type.");
+ }
+
+ r = table_add_cell(t, NULL, type, data);
+ if (r < 0) {
+ va_end(ap);
+ return r;
+ }
+
+ type = va_arg(ap, TableDataType);
+ }
+}
+
+void table_set_header(Table *t, bool b) {
+ assert(t);
+
+ t->header = b;
+}
+
+void table_set_width(Table *t, size_t width) {
+ assert(t);
+
+ t->width = width;
+}
+
+int table_set_display(Table *t, size_t first_column, ...) {
+ size_t allocated, column;
+ va_list ap;
+
+ assert(t);
+
+ allocated = t->n_display_map;
+ column = first_column;
+
+ va_start(ap, first_column);
+ for (;;) {
+ assert(column < t->n_columns);
+
+ if (!GREEDY_REALLOC(t->display_map, allocated, MAX(t->n_columns, t->n_display_map+1))) {
+ va_end(ap);
+ return -ENOMEM;
+ }
+
+ t->display_map[t->n_display_map++] = column;
+
+ column = va_arg(ap, size_t);
+ if (column == (size_t) -1)
+ break;
+
+ }
+ va_end(ap);
+
+ return 0;
+}
+
+int table_set_sort(Table *t, size_t first_column, ...) {
+ size_t allocated, column;
+ va_list ap;
+
+ assert(t);
+
+ allocated = t->n_sort_map;
+ column = first_column;
+
+ va_start(ap, first_column);
+ for (;;) {
+ assert(column < t->n_columns);
+
+ if (!GREEDY_REALLOC(t->sort_map, allocated, MAX(t->n_columns, t->n_sort_map+1))) {
+ va_end(ap);
+ return -ENOMEM;
+ }
+
+ t->sort_map[t->n_sort_map++] = column;
+
+ column = va_arg(ap, size_t);
+ if (column == (size_t) -1)
+ break;
+ }
+ va_end(ap);
+
+ return 0;
+}
+
+static int cell_data_compare(TableData *a, size_t index_a, TableData *b, size_t index_b) {
+ assert(a);
+ assert(b);
+
+ if (a->type == b->type) {
+
+ /* We only define ordering for cells of the same data type. If cells with different data types are
+ * compared we follow the order the cells were originally added in */
+
+ switch (a->type) {
+
+ case TABLE_STRING:
+ return strcmp(a->string, b->string);
+
+ case TABLE_BOOLEAN:
+ if (!a->boolean && b->boolean)
+ return -1;
+ if (a->boolean && !b->boolean)
+ return 1;
+ return 0;
+
+ case TABLE_TIMESTAMP:
+ return CMP(a->timestamp, b->timestamp);
+
+ case TABLE_TIMESPAN:
+ return CMP(a->timespan, b->timespan);
+
+ case TABLE_SIZE:
+ return CMP(a->size, b->size);
+
+ case TABLE_UINT32:
+ return CMP(a->uint32, b->uint32);
+
+ case TABLE_UINT64:
+ return CMP(a->uint64, b->uint64);
+
+ case TABLE_PERCENT:
+ return CMP(a->percent, b->percent);
+
+ default:
+ ;
+ }
+ }
+
+ /* Generic fallback using the orginal order in which the cells where added. */
+ return CMP(index_a, index_b);
+}
+
+static int table_data_compare(const size_t *a, const size_t *b, Table *t) {
+ size_t i;
+ int r;
+
+ assert(t);
+ assert(t->sort_map);
+
+ /* Make sure the header stays at the beginning */
+ if (*a < t->n_columns && *b < t->n_columns)
+ return 0;
+ if (*a < t->n_columns)
+ return -1;
+ if (*b < t->n_columns)
+ return 1;
+
+ /* Order other lines by the sorting map */
+ for (i = 0; i < t->n_sort_map; i++) {
+ TableData *d, *dd;
+
+ d = t->data[*a + t->sort_map[i]];
+ dd = t->data[*b + t->sort_map[i]];
+
+ r = cell_data_compare(d, *a, dd, *b);
+ if (r != 0)
+ return t->reverse_map && t->reverse_map[t->sort_map[i]] ? -r : r;
+ }
+
+ /* Order identical lines by the order there were originally added in */
+ return CMP(*a, *b);
+}
+
+static const char *table_data_format(TableData *d) {
+ assert(d);
+
+ if (d->formatted)
+ return d->formatted;
+
+ switch (d->type) {
+ case TABLE_EMPTY:
+ return "";
+
+ case TABLE_STRING:
+ if (d->uppercase) {
+ char *p, *q;
+
+ d->formatted = new(char, strlen(d->string) + 1);
+ if (!d->formatted)
+ return NULL;
+
+ for (p = d->string, q = d->formatted; *p; p++, q++)
+ *q = (char) toupper((unsigned char) *p);
+ *q = 0;
+
+ return d->formatted;
+ }
+
+ return d->string;
+
+ case TABLE_BOOLEAN:
+ return yes_no(d->boolean);
+
+ case TABLE_TIMESTAMP: {
+ _cleanup_free_ char *p;
+
+ p = new(char, FORMAT_TIMESTAMP_MAX);
+ if (!p)
+ return NULL;
+
+ if (!format_timestamp(p, FORMAT_TIMESTAMP_MAX, d->timestamp))
+ return "n/a";
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_TIMESPAN: {
+ _cleanup_free_ char *p;
+
+ p = new(char, FORMAT_TIMESPAN_MAX);
+ if (!p)
+ return NULL;
+
+ if (!format_timespan(p, FORMAT_TIMESPAN_MAX, d->timespan, 0))
+ return "n/a";
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_SIZE: {
+ _cleanup_free_ char *p;
+
+ p = new(char, FORMAT_BYTES_MAX);
+ if (!p)
+ return NULL;
+
+ if (!format_bytes(p, FORMAT_BYTES_MAX, d->size))
+ return "n/a";
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT32: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint32) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIu32, d->uint32);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT64: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint64) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIu64, d->uint64);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_PERCENT: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->percent) + 2);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%i%%" , d->percent);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ default:
+ assert_not_reached("Unexpected type?");
+ }
+
+ return d->formatted;
+}
+
+static int table_data_requested_width(TableData *d, size_t *ret) {
+ const char *t;
+ size_t l;
+
+ t = table_data_format(d);
+ if (!t)
+ return -ENOMEM;
+
+ l = utf8_console_width(t);
+ if (l == (size_t) -1)
+ return -EINVAL;
+
+ if (d->maximum_width != (size_t) -1 && l > d->maximum_width)
+ l = d->maximum_width;
+
+ if (l < d->minimum_width)
+ l = d->minimum_width;
+
+ *ret = l;
+ return 0;
+}
+
+static char *align_string_mem(const char *str, const char *url, size_t new_length, unsigned percent) {
+ size_t w = 0, space, lspace, old_length, clickable_length;
+ _cleanup_free_ char *clickable = NULL;
+ const char *p;
+ char *ret;
+ size_t i;
+ int r;
+
+ /* As with ellipsize_mem(), 'old_length' is a byte size while 'new_length' is a width in character cells */
+
+ assert(str);
+ assert(percent <= 100);
+
+ old_length = strlen(str);
+
+ if (url) {
+ r = terminal_urlify(url, str, &clickable);
+ if (r < 0)
+ return NULL;
+
+ clickable_length = strlen(clickable);
+ } else
+ clickable_length = old_length;
+
+ /* Determine current width on screen */
+ p = str;
+ while (p < str + old_length) {
+ char32_t c;
+
+ if (utf8_encoded_to_unichar(p, &c) < 0) {
+ p++, w++; /* count invalid chars as 1 */
+ continue;
+ }
+
+ p = utf8_next_char(p);
+ w += unichar_iswide(c) ? 2 : 1;
+ }
+
+ /* Already wider than the target, if so, don't do anything */
+ if (w >= new_length)
+ return clickable ? TAKE_PTR(clickable) : strdup(str);
+
+ /* How much spaces shall we add? An how much on the left side? */
+ space = new_length - w;
+ lspace = space * percent / 100U;
+
+ ret = new(char, space + clickable_length + 1);
+ if (!ret)
+ return NULL;
+
+ for (i = 0; i < lspace; i++)
+ ret[i] = ' ';
+ memcpy(ret + lspace, clickable ?: str, clickable_length);
+ for (i = lspace + clickable_length; i < space + clickable_length; i++)
+ ret[i] = ' ';
+
+ ret[space + clickable_length] = 0;
+ return ret;
+}
+
+int table_print(Table *t, FILE *f) {
+ size_t n_rows, *minimum_width, *maximum_width, display_columns, *requested_width,
+ i, j, table_minimum_width, table_maximum_width, table_requested_width, table_effective_width,
+ *width;
+ _cleanup_free_ size_t *sorted = NULL;
+ uint64_t *column_weight, weight_sum;
+ int r;
+
+ assert(t);
+
+ if (!f)
+ f = stdout;
+
+ /* Ensure we have no incomplete rows */
+ assert(t->n_cells % t->n_columns == 0);
+
+ n_rows = t->n_cells / t->n_columns;
+ assert(n_rows > 0); /* at least the header row must be complete */
+
+ if (t->sort_map) {
+ /* If sorting is requested, let's calculate an index table we use to lookup the actual index to display with. */
+
+ sorted = new(size_t, n_rows);
+ if (!sorted)
+ return -ENOMEM;
+
+ for (i = 0; i < n_rows; i++)
+ sorted[i] = i * t->n_columns;
+
+ typesafe_qsort_r(sorted, n_rows, table_data_compare, t);
+ }
+
+ if (t->display_map)
+ display_columns = t->n_display_map;
+ else
+ display_columns = t->n_columns;
+
+ assert(display_columns > 0);
+
+ minimum_width = newa(size_t, display_columns);
+ maximum_width = newa(size_t, display_columns);
+ requested_width = newa(size_t, display_columns);
+ width = newa(size_t, display_columns);
+ column_weight = newa0(uint64_t, display_columns);
+
+ for (j = 0; j < display_columns; j++) {
+ minimum_width[j] = 1;
+ maximum_width[j] = (size_t) -1;
+ requested_width[j] = (size_t) -1;
+ }
+
+ /* First pass: determine column sizes */
+ for (i = t->header ? 0 : 1; i < n_rows; i++) {
+ TableData **row;
+
+ /* Note that we don't care about ordering at this time, as we just want to determine column sizes,
+ * hence we don't care for sorted[] during the first pass. */
+ row = t->data + i * t->n_columns;
+
+ for (j = 0; j < display_columns; j++) {
+ TableData *d;
+ size_t req;
+
+ assert_se(d = row[t->display_map ? t->display_map[j] : j]);
+
+ r = table_data_requested_width(d, &req);
+ if (r < 0)
+ return r;
+
+ /* Determine the biggest width that any cell in this column would like to have */
+ if (requested_width[j] == (size_t) -1 ||
+ requested_width[j] < req)
+ requested_width[j] = req;
+
+ /* Determine the minimum width any cell in this column needs */
+ if (minimum_width[j] < d->minimum_width)
+ minimum_width[j] = d->minimum_width;
+
+ /* Determine the maximum width any cell in this column needs */
+ if (d->maximum_width != (size_t) -1 &&
+ (maximum_width[j] == (size_t) -1 ||
+ maximum_width[j] > d->maximum_width))
+ maximum_width[j] = d->maximum_width;
+
+ /* Determine the full columns weight */
+ column_weight[j] += d->weight;
+ }
+ }
+
+ /* One space between each column */
+ table_requested_width = table_minimum_width = table_maximum_width = display_columns - 1;
+
+ /* Calculate the total weight for all columns, plus the minimum, maximum and requested width for the table. */
+ weight_sum = 0;
+ for (j = 0; j < display_columns; j++) {
+ weight_sum += column_weight[j];
+
+ table_minimum_width += minimum_width[j];
+
+ if (maximum_width[j] == (size_t) -1)
+ table_maximum_width = (size_t) -1;
+ else
+ table_maximum_width += maximum_width[j];
+
+ table_requested_width += requested_width[j];
+ }
+
+ /* Calculate effective table width */
+ if (t->width == (size_t) -1)
+ table_effective_width = pager_have() ? table_requested_width : MIN(table_requested_width, columns());
+ else
+ table_effective_width = t->width;
+
+ if (table_maximum_width != (size_t) -1 && table_effective_width > table_maximum_width)
+ table_effective_width = table_maximum_width;
+
+ if (table_effective_width < table_minimum_width)
+ table_effective_width = table_minimum_width;
+
+ if (table_effective_width >= table_requested_width) {
+ size_t extra;
+
+ /* We have extra room, let's distribute it among columns according to their weights. We first provide
+ * each column with what it asked for and the distribute the rest. */
+
+ extra = table_effective_width - table_requested_width;
+
+ for (j = 0; j < display_columns; j++) {
+ size_t delta;
+
+ if (weight_sum == 0)
+ width[j] = requested_width[j] + extra / (display_columns - j); /* Avoid division by zero */
+ else
+ width[j] = requested_width[j] + (extra * column_weight[j]) / weight_sum;
+
+ if (maximum_width[j] != (size_t) -1 && width[j] > maximum_width[j])
+ width[j] = maximum_width[j];
+
+ if (width[j] < minimum_width[j])
+ width[j] = minimum_width[j];
+
+ assert(width[j] >= requested_width[j]);
+ delta = width[j] - requested_width[j];
+
+ /* Subtract what we just added from the rest */
+ if (extra > delta)
+ extra -= delta;
+ else
+ extra = 0;
+
+ assert(weight_sum >= column_weight[j]);
+ weight_sum -= column_weight[j];
+ }
+
+ } else {
+ /* We need to compress the table, columns can't get what they asked for. We first provide each column
+ * with the minimum they need, and then distribute anything left. */
+ bool finalize = false;
+ size_t extra;
+
+ extra = table_effective_width - table_minimum_width;
+
+ for (j = 0; j < display_columns; j++)
+ width[j] = (size_t) -1;
+
+ for (;;) {
+ bool restart = false;
+
+ for (j = 0; j < display_columns; j++) {
+ size_t delta, w;
+
+ /* Did this column already get something assigned? If so, let's skip to the next */
+ if (width[j] != (size_t) -1)
+ continue;
+
+ if (weight_sum == 0)
+ w = minimum_width[j] + extra / (display_columns - j); /* avoid division by zero */
+ else
+ w = minimum_width[j] + (extra * column_weight[j]) / weight_sum;
+
+ if (w >= requested_width[j]) {
+ /* Never give more than requested. If we hit a column like this, there's more
+ * space to allocate to other columns which means we need to restart the
+ * iteration. However, if we hit a column like this, let's assign it the space
+ * it wanted for good early.*/
+
+ w = requested_width[j];
+ restart = true;
+
+ } else if (!finalize)
+ continue;
+
+ width[j] = w;
+
+ assert(w >= minimum_width[j]);
+ delta = w - minimum_width[j];
+
+ assert(delta <= extra);
+ extra -= delta;
+
+ assert(weight_sum >= column_weight[j]);
+ weight_sum -= column_weight[j];
+
+ if (restart && !finalize)
+ break;
+ }
+
+ if (finalize)
+ break;
+
+ if (!restart)
+ finalize = true;
+ }
+ }
+
+ /* Second pass: show output */
+ for (i = t->header ? 0 : 1; i < n_rows; i++) {
+ TableData **row;
+
+ if (sorted)
+ row = t->data + sorted[i];
+ else
+ row = t->data + i * t->n_columns;
+
+ for (j = 0; j < display_columns; j++) {
+ _cleanup_free_ char *buffer = NULL;
+ const char *field;
+ TableData *d;
+ size_t l;
+
+ assert_se(d = row[t->display_map ? t->display_map[j] : j]);
+
+ field = table_data_format(d);
+ if (!field)
+ return -ENOMEM;
+
+ l = utf8_console_width(field);
+ if (l > width[j]) {
+ /* Field is wider than allocated space. Let's ellipsize */
+
+ buffer = ellipsize(field, width[j], d->ellipsize_percent);
+ if (!buffer)
+ return -ENOMEM;
+
+ field = buffer;
+
+ } else if (l < width[j]) {
+ /* Field is shorter than allocated space. Let's align with spaces */
+
+ buffer = align_string_mem(field, d->url, width[j], d->align_percent);
+ if (!buffer)
+ return -ENOMEM;
+
+ field = buffer;
+ }
+
+ if (l >= width[j] && d->url) {
+ _cleanup_free_ char *clickable = NULL;
+
+ r = terminal_urlify(d->url, field, &clickable);
+ if (r < 0)
+ return r;
+
+ free_and_replace(buffer, clickable);
+ field = buffer;
+ }
+
+ if (row == t->data) /* underline header line fully, including the column separator */
+ fputs(ansi_underline(), f);
+
+ if (j > 0)
+ fputc(' ', f); /* column separator */
+
+ if (d->color && colors_enabled()) {
+ if (row == t->data) /* first undo header underliner */
+ fputs(ANSI_NORMAL, f);
+
+ fputs(d->color, f);
+ }
+
+ fputs(field, f);
+
+ if (colors_enabled() && (d->color || row == t->data))
+ fputs(ANSI_NORMAL, f);
+ }
+
+ fputc('\n', f);
+ }
+
+ return fflush_and_check(f);
+}
+
+int table_format(Table *t, char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char *buf = NULL;
+ size_t sz = 0;
+ int r;
+
+ f = open_memstream(&buf, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ r = table_print(t, f);
+ if (r < 0)
+ return r;
+
+ f = safe_fclose(f);
+
+ *ret = buf;
+
+ return 0;
+}
+
+size_t table_get_rows(Table *t) {
+ if (!t)
+ return 0;
+
+ assert(t->n_columns > 0);
+ return t->n_cells / t->n_columns;
+}
+
+size_t table_get_columns(Table *t) {
+ if (!t)
+ return 0;
+
+ assert(t->n_columns > 0);
+ return t->n_columns;
+}
+
+int table_set_reverse(Table *t, size_t column, bool b) {
+ assert(t);
+ assert(column < t->n_columns);
+
+ if (!t->reverse_map) {
+ if (!b)
+ return 0;
+
+ t->reverse_map = new0(bool, t->n_columns);
+ if (!t->reverse_map)
+ return -ENOMEM;
+ }
+
+ t->reverse_map[column] = b;
+ return 0;
+}
+
+TableCell *table_get_cell(Table *t, size_t row, size_t column) {
+ size_t i;
+
+ assert(t);
+
+ if (column >= t->n_columns)
+ return NULL;
+
+ i = row * t->n_columns + column;
+ if (i >= t->n_cells)
+ return NULL;
+
+ return TABLE_INDEX_TO_CELL(i);
+}
+
+const void *table_get(Table *t, TableCell *cell) {
+ TableData *d;
+
+ assert(t);
+
+ d = table_get_data(t, cell);
+ if (!d)
+ return NULL;
+
+ return d->data;
+}
+
+const void* table_get_at(Table *t, size_t row, size_t column) {
+ TableCell *cell;
+
+ cell = table_get_cell(t, row, column);
+ if (!cell)
+ return NULL;
+
+ return table_get(t, cell);
+}
+
+static int table_data_to_json(TableData *d, JsonVariant **ret) {
+
+ switch (d->type) {
+
+ case TABLE_EMPTY:
+ return json_variant_new_null(ret);
+
+ case TABLE_STRING:
+ return json_variant_new_string(ret, d->string);
+
+ case TABLE_BOOLEAN:
+ return json_variant_new_boolean(ret, d->boolean);
+
+ case TABLE_TIMESTAMP:
+ if (d->timestamp == USEC_INFINITY)
+ return json_variant_new_null(ret);
+
+ return json_variant_new_unsigned(ret, d->timestamp);
+
+ case TABLE_TIMESPAN:
+ if (d->timespan == USEC_INFINITY)
+ return json_variant_new_null(ret);
+
+ return json_variant_new_unsigned(ret, d->timespan);
+
+ case TABLE_SIZE:
+ if (d->size == (size_t) -1)
+ return json_variant_new_null(ret);
+
+ return json_variant_new_unsigned(ret, d->size);
+
+ case TABLE_UINT32:
+ return json_variant_new_unsigned(ret, d->uint32);
+
+ case TABLE_UINT64:
+ return json_variant_new_unsigned(ret, d->uint64);
+
+ case TABLE_PERCENT:
+ return json_variant_new_integer(ret, d->percent);
+
+ default:
+ return -EINVAL;
+ }
+}
+
+int table_to_json(Table *t, JsonVariant **ret) {
+ JsonVariant **rows = NULL, **elements = NULL;
+ _cleanup_free_ size_t *sorted = NULL;
+ size_t n_rows, i, j, display_columns;
+ int r;
+
+ assert(t);
+
+ /* Ensure we have no incomplete rows */
+ assert(t->n_cells % t->n_columns == 0);
+
+ n_rows = t->n_cells / t->n_columns;
+ assert(n_rows > 0); /* at least the header row must be complete */
+
+ if (t->sort_map) {
+ /* If sorting is requested, let's calculate an index table we use to lookup the actual index to display with. */
+
+ sorted = new(size_t, n_rows);
+ if (!sorted) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ for (i = 0; i < n_rows; i++)
+ sorted[i] = i * t->n_columns;
+
+ typesafe_qsort_r(sorted, n_rows, table_data_compare, t);
+ }
+
+ if (t->display_map)
+ display_columns = t->n_display_map;
+ else
+ display_columns = t->n_columns;
+ assert(display_columns > 0);
+
+ elements = new0(JsonVariant*, display_columns * 2);
+ if (!elements) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ for (j = 0; j < display_columns; j++) {
+ TableData *d;
+
+ assert_se(d = t->data[t->display_map ? t->display_map[j] : j]);
+
+ r = table_data_to_json(d, elements + j*2);
+ if (r < 0)
+ goto finish;
+ }
+
+ rows = new0(JsonVariant*, n_rows-1);
+ if (!rows) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ for (i = 1; i < n_rows; i++) {
+ TableData **row;
+
+ if (sorted)
+ row = t->data + sorted[i];
+ else
+ row = t->data + i * t->n_columns;
+
+ for (j = 0; j < display_columns; j++) {
+ TableData *d;
+ size_t k;
+
+ assert_se(d = row[t->display_map ? t->display_map[j] : j]);
+
+ k = j*2+1;
+ elements[k] = json_variant_unref(elements[k]);
+
+ r = table_data_to_json(d, elements + k);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = json_variant_new_object(rows + i - 1, elements, display_columns * 2);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = json_variant_new_array(ret, rows, n_rows - 1);
+
+finish:
+ if (rows) {
+ json_variant_unref_many(rows, n_rows-1);
+ free(rows);
+ }
+
+ if (elements) {
+ json_variant_unref_many(elements, display_columns*2);
+ free(elements);
+ }
+
+ return r;
+}
+
+int table_print_json(Table *t, FILE *f, JsonFormatFlags flags) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ int r;
+
+ assert(t);
+
+ if (!f)
+ f = stdout;
+
+ r = table_to_json(t, &v);
+ if (r < 0)
+ return r;
+
+ json_variant_dump(v, flags, f, NULL);
+
+ return fflush_and_check(f);
+}
diff --git a/src/shared/format-table.h b/src/shared/format-table.h
new file mode 100644
index 0000000..5ff2479
--- /dev/null
+++ b/src/shared/format-table.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "json.h"
+#include "macro.h"
+
+typedef enum TableDataType {
+ TABLE_EMPTY,
+ TABLE_STRING,
+ TABLE_BOOLEAN,
+ TABLE_TIMESTAMP,
+ TABLE_TIMESPAN,
+ TABLE_SIZE,
+ TABLE_UINT32,
+ TABLE_UINT64,
+ TABLE_PERCENT,
+ _TABLE_DATA_TYPE_MAX,
+ _TABLE_DATA_TYPE_INVALID = -1,
+} TableDataType;
+
+typedef struct Table Table;
+typedef struct TableCell TableCell;
+
+Table *table_new_internal(const char *first_header, ...) _sentinel_;
+#define table_new(...) table_new_internal(__VA_ARGS__, NULL)
+Table *table_new_raw(size_t n_columns);
+Table *table_unref(Table *t);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Table*, table_unref);
+
+int table_add_cell_full(Table *t, TableCell **ret_cell, TableDataType type, const void *data, size_t minimum_width, size_t maximum_width, unsigned weight, unsigned align_percent, unsigned ellipsize_percent);
+static inline int table_add_cell(Table *t, TableCell **ret_cell, TableDataType type, const void *data) {
+ return table_add_cell_full(t, ret_cell, type, data, (size_t) -1, (size_t) -1, (unsigned) -1, (unsigned) -1, (unsigned) -1);
+}
+
+int table_dup_cell(Table *t, TableCell *cell);
+
+int table_set_minimum_width(Table *t, TableCell *cell, size_t minimum_width);
+int table_set_maximum_width(Table *t, TableCell *cell, size_t maximum_width);
+int table_set_weight(Table *t, TableCell *cell, unsigned weight);
+int table_set_align_percent(Table *t, TableCell *cell, unsigned percent);
+int table_set_ellipsize_percent(Table *t, TableCell *cell, unsigned percent);
+int table_set_color(Table *t, TableCell *cell, const char *color);
+int table_set_url(Table *t, TableCell *cell, const char *color);
+int table_set_uppercase(Table *t, TableCell *cell, bool b);
+
+int table_update(Table *t, TableCell *cell, TableDataType type, const void *data);
+
+int table_add_many_internal(Table *t, TableDataType first_type, ...);
+#define table_add_many(t, ...) table_add_many_internal(t, __VA_ARGS__, _TABLE_DATA_TYPE_MAX)
+
+void table_set_header(Table *table, bool b);
+void table_set_width(Table *t, size_t width);
+int table_set_display(Table *t, size_t first_column, ...);
+int table_set_sort(Table *t, size_t first_column, ...);
+int table_set_reverse(Table *t, size_t column, bool b);
+
+int table_print(Table *t, FILE *f);
+int table_format(Table *t, char **ret);
+
+static inline TableCell* TABLE_HEADER_CELL(size_t i) {
+ return SIZE_TO_PTR(i + 1);
+}
+
+size_t table_get_rows(Table *t);
+size_t table_get_columns(Table *t);
+
+TableCell *table_get_cell(Table *t, size_t row, size_t column);
+
+const void *table_get(Table *t, TableCell *cell);
+const void *table_get_at(Table *t, size_t row, size_t column);
+
+int table_to_json(Table *t, JsonVariant **ret);
+int table_print_json(Table *t, FILE *f, unsigned json_flags);
diff --git a/src/shared/fstab-util.c b/src/shared/fstab-util.c
new file mode 100644
index 0000000..6fd9866
--- /dev/null
+++ b/src/shared/fstab-util.c
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <mntent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "device-nodes.h"
+#include "fstab-util.h"
+#include "macro.h"
+#include "mount-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+int fstab_has_fstype(const char *fstype) {
+ _cleanup_endmntent_ FILE *f = NULL;
+ struct mntent *m;
+
+ f = setmntent("/etc/fstab", "re");
+ if (!f)
+ return errno == ENOENT ? false : -errno;
+
+ for (;;) {
+ errno = 0;
+ m = getmntent(f);
+ if (!m)
+ return errno != 0 ? -errno : false;
+
+ if (streq(m->mnt_type, fstype))
+ return true;
+ }
+ return false;
+}
+
+int fstab_is_mount_point(const char *mount) {
+ _cleanup_endmntent_ FILE *f = NULL;
+ struct mntent *m;
+
+ f = setmntent("/etc/fstab", "re");
+ if (!f)
+ return errno == ENOENT ? false : -errno;
+
+ for (;;) {
+ errno = 0;
+ m = getmntent(f);
+ if (!m)
+ return errno != 0 ? -errno : false;
+
+ if (path_equal(m->mnt_dir, mount))
+ return true;
+ }
+ return false;
+}
+
+int fstab_filter_options(const char *opts, const char *names,
+ const char **namefound, char **value, char **filtered) {
+ const char *name, *n = NULL, *x;
+ _cleanup_strv_free_ char **stor = NULL;
+ _cleanup_free_ char *v = NULL, **strv = NULL;
+
+ assert(names && *names);
+
+ if (!opts)
+ goto answer;
+
+ /* If !value and !filtered, this function is not allowed to fail. */
+
+ if (!filtered) {
+ const char *word, *state;
+ size_t l;
+
+ FOREACH_WORD_SEPARATOR(word, l, opts, ",", state)
+ NULSTR_FOREACH(name, names) {
+ if (l < strlen(name))
+ continue;
+ if (!strneq(word, name, strlen(name)))
+ continue;
+
+ /* we know that the string is NUL
+ * terminated, so *x is valid */
+ x = word + strlen(name);
+ if (IN_SET(*x, '\0', '=', ',')) {
+ n = name;
+ if (value) {
+ free(v);
+ if (IN_SET(*x, '\0', ','))
+ v = NULL;
+ else {
+ assert(*x == '=');
+ x++;
+ v = strndup(x, l - strlen(name) - 1);
+ if (!v)
+ return -ENOMEM;
+ }
+ }
+ }
+ }
+ } else {
+ char **t, **s;
+
+ stor = strv_split(opts, ",");
+ if (!stor)
+ return -ENOMEM;
+ strv = memdup(stor, sizeof(char*) * (strv_length(stor) + 1));
+ if (!strv)
+ return -ENOMEM;
+
+ for (s = t = strv; *s; s++) {
+ NULSTR_FOREACH(name, names) {
+ x = startswith(*s, name);
+ if (x && IN_SET(*x, '\0', '='))
+ goto found;
+ }
+
+ *t = *s;
+ t++;
+ continue;
+ found:
+ /* Keep the last occurence found */
+ n = name;
+ if (value) {
+ free(v);
+ if (*x == '\0')
+ v = NULL;
+ else {
+ assert(*x == '=');
+ x++;
+ v = strdup(x);
+ if (!v)
+ return -ENOMEM;
+ }
+ }
+ }
+ *t = NULL;
+ }
+
+answer:
+ if (namefound)
+ *namefound = n;
+ if (filtered) {
+ char *f;
+
+ f = strv_join(strv, ",");
+ if (!f)
+ return -ENOMEM;
+
+ *filtered = f;
+ }
+ if (value)
+ *value = TAKE_PTR(v);
+
+ return !!n;
+}
+
+int fstab_extract_values(const char *opts, const char *name, char ***values) {
+ _cleanup_strv_free_ char **optsv = NULL, **res = NULL;
+ char **s;
+
+ assert(opts);
+ assert(name);
+ assert(values);
+
+ optsv = strv_split(opts, ",");
+ if (!optsv)
+ return -ENOMEM;
+
+ STRV_FOREACH(s, optsv) {
+ char *arg;
+ int r;
+
+ arg = startswith(*s, name);
+ if (!arg || *arg != '=')
+ continue;
+ r = strv_extend(&res, arg + 1);
+ if (r < 0)
+ return r;
+ }
+
+ *values = TAKE_PTR(res);
+
+ return !!*values;
+}
+
+int fstab_find_pri(const char *options, int *ret) {
+ _cleanup_free_ char *opt = NULL;
+ int r;
+ unsigned pri;
+
+ assert(ret);
+
+ r = fstab_filter_options(options, "pri\0", NULL, &opt, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0 || !opt)
+ return 0;
+
+ r = safe_atou(opt, &pri);
+ if (r < 0)
+ return r;
+
+ if ((int) pri < 0)
+ return -ERANGE;
+
+ *ret = (int) pri;
+ return 1;
+}
+
+static char *unquote(const char *s, const char* quotes) {
+ size_t l;
+ assert(s);
+
+ /* This is rather stupid, simply removes the heading and
+ * trailing quotes if there is one. Doesn't care about
+ * escaping or anything.
+ *
+ * DON'T USE THIS FOR NEW CODE ANYMORE! */
+
+ l = strlen(s);
+ if (l < 2)
+ return strdup(s);
+
+ if (strchr(quotes, s[0]) && s[l-1] == s[0])
+ return strndup(s+1, l-2);
+
+ return strdup(s);
+}
+
+static char *tag_to_udev_node(const char *tagvalue, const char *by) {
+ _cleanup_free_ char *t = NULL, *u = NULL;
+ size_t enc_len;
+
+ u = unquote(tagvalue, QUOTES);
+ if (!u)
+ return NULL;
+
+ enc_len = strlen(u) * 4 + 1;
+ t = new(char, enc_len);
+ if (!t)
+ return NULL;
+
+ if (encode_devnode_name(u, t, enc_len) < 0)
+ return NULL;
+
+ return strjoin("/dev/disk/by-", by, "/", t);
+}
+
+char *fstab_node_to_udev_node(const char *p) {
+ assert(p);
+
+ if (startswith(p, "LABEL="))
+ return tag_to_udev_node(p+6, "label");
+
+ if (startswith(p, "UUID="))
+ return tag_to_udev_node(p+5, "uuid");
+
+ if (startswith(p, "PARTUUID="))
+ return tag_to_udev_node(p+9, "partuuid");
+
+ if (startswith(p, "PARTLABEL="))
+ return tag_to_udev_node(p+10, "partlabel");
+
+ return strdup(p);
+}
diff --git a/src/shared/fstab-util.h b/src/shared/fstab-util.h
new file mode 100644
index 0000000..9820f78
--- /dev/null
+++ b/src/shared/fstab-util.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "macro.h"
+
+int fstab_is_mount_point(const char *mount);
+int fstab_has_fstype(const char *fstype);
+
+int fstab_filter_options(const char *opts, const char *names, const char **namefound, char **value, char **filtered);
+
+int fstab_extract_values(const char *opts, const char *name, char ***values);
+
+static inline bool fstab_test_option(const char *opts, const char *names) {
+ return !!fstab_filter_options(opts, names, NULL, NULL, NULL);
+}
+
+int fstab_find_pri(const char *options, int *ret);
+
+static inline bool fstab_test_yes_no_option(const char *opts, const char *yes_no) {
+ int r;
+ const char *opt;
+
+ /* If first name given is last, return 1.
+ * If second name given is last or neither is found, return 0. */
+
+ r = fstab_filter_options(opts, yes_no, &opt, NULL, NULL);
+ assert(r >= 0);
+
+ return opt == yes_no;
+}
+
+char *fstab_node_to_udev_node(const char *p);
diff --git a/src/shared/generate-ip-protocol-list.sh b/src/shared/generate-ip-protocol-list.sh
new file mode 100755
index 0000000..a9b1e0f
--- /dev/null
+++ b/src/shared/generate-ip-protocol-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include netinet/in.h - </dev/null | \
+ awk '/^#define[ \t]+IPPROTO_[^ \t]+[ \t]+[^ \t]/ { print $2; }' | \
+ sed -e 's/IPPROTO_//'
diff --git a/src/shared/generator.c b/src/shared/generator.c
new file mode 100644
index 0000000..0adaaf2
--- /dev/null
+++ b/src/shared/generator.c
@@ -0,0 +1,504 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio_ext.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dropin.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "special.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "unit-name.h"
+#include "util.h"
+
+int generator_open_unit_file(
+ const char *dest,
+ const char *source,
+ const char *name,
+ FILE **file) {
+
+ const char *unit;
+ FILE *f;
+
+ unit = strjoina(dest, "/", name);
+
+ f = fopen(unit, "wxe");
+ if (!f) {
+ if (source && errno == EEXIST)
+ return log_error_errno(errno,
+ "Failed to create unit file %s, as it already exists. Duplicate entry in %s?",
+ unit, source);
+ else
+ return log_error_errno(errno,
+ "Failed to create unit file %s: %m",
+ unit);
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n",
+ program_invocation_short_name);
+
+ *file = f;
+ return 0;
+}
+
+int generator_add_symlink(const char *root, const char *dst, const char *dep_type, const char *src) {
+ /* Adds a symlink from <dst>.<dep_type>.d/ to ../<src> */
+
+ const char *from, *to;
+
+ from = strjoina("../", src);
+ to = strjoina(root, "/", dst, ".", dep_type, "/", src);
+
+ mkdir_parents_label(to, 0755);
+ if (symlink(from, to) < 0)
+ if (errno != EEXIST)
+ return log_error_errno(errno, "Failed to create symlink \"%s\": %m", to);
+
+ return 0;
+}
+
+static int write_fsck_sysroot_service(const char *dir, const char *what) {
+ _cleanup_free_ char *device = NULL, *escaped = NULL, *escaped2 = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit;
+ int r;
+
+ escaped = specifier_escape(what);
+ if (!escaped)
+ return log_oom();
+
+ escaped2 = cescape(escaped);
+ if (!escaped2)
+ return log_oom();
+
+ unit = strjoina(dir, "/systemd-fsck-root.service");
+ log_debug("Creating %s", unit);
+
+ r = unit_name_from_path(what, ".device", &device);
+ if (r < 0)
+ return log_error_errno(r, "Failed to convert device \"%s\" to unit name: %m", what);
+
+ f = fopen(unit, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by %1$s\n\n"
+ "[Unit]\n"
+ "Description=File System Check on %2$s\n"
+ "Documentation=man:systemd-fsck-root.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%3$s\n"
+ "Conflicts=shutdown.target\n"
+ "After=initrd-root-device.target local-fs-pre.target %3$s\n"
+ "Before=shutdown.target\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart=" SYSTEMD_FSCK_PATH " %4$s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ escaped,
+ device,
+ escaped2);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit);
+
+ return 0;
+}
+
+int generator_write_fsck_deps(
+ FILE *f,
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *fstype) {
+
+ int r;
+
+ assert(f);
+ assert(dir);
+ assert(what);
+ assert(where);
+
+ if (!is_device_path(what)) {
+ log_warning("Checking was requested for \"%s\", but it is not a device.", what);
+ return 0;
+ }
+
+ if (!isempty(fstype) && !streq(fstype, "auto")) {
+ r = fsck_exists(fstype);
+ if (r < 0)
+ log_warning_errno(r, "Checking was requested for %s, but couldn't detect if fsck.%s may be used, proceeding: %m", what, fstype);
+ else if (r == 0) {
+ /* treat missing check as essentially OK */
+ log_debug("Checking was requested for %s, but fsck.%s does not exist.", what, fstype);
+ return 0;
+ }
+ }
+
+ if (path_equal(where, "/")) {
+ const char *lnk;
+
+ lnk = strjoina(dir, "/" SPECIAL_LOCAL_FS_TARGET ".wants/systemd-fsck-root.service");
+
+ mkdir_parents(lnk, 0755);
+ if (symlink(SYSTEM_DATA_UNIT_PATH "/systemd-fsck-root.service", lnk) < 0)
+ return log_error_errno(errno, "Failed to create symlink %s: %m", lnk);
+
+ } else {
+ _cleanup_free_ char *_fsck = NULL;
+ const char *fsck;
+
+ if (in_initrd() && path_equal(where, "/sysroot")) {
+ r = write_fsck_sysroot_service(dir, what);
+ if (r < 0)
+ return r;
+
+ fsck = "systemd-fsck-root.service";
+ } else {
+ r = unit_name_from_path_instance("systemd-fsck", what, ".service", &_fsck);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create fsck service name: %m");
+
+ fsck = _fsck;
+ }
+
+ fprintf(f,
+ "Requires=%1$s\n"
+ "After=%1$s\n",
+ fsck);
+ }
+
+ return 0;
+}
+
+int generator_write_timeouts(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts,
+ char **filtered) {
+
+ /* Configure how long we wait for a device that backs a mount point or a
+ * swap partition to show up. This is useful to support endless device timeouts
+ * for devices that show up only after user input, like crypto devices. */
+
+ _cleanup_free_ char *node = NULL, *unit = NULL, *timeout = NULL;
+ usec_t u;
+ int r;
+
+ r = fstab_filter_options(opts, "comment=systemd.device-timeout\0"
+ "x-systemd.device-timeout\0",
+ NULL, &timeout, filtered);
+ if (r <= 0)
+ return r;
+
+ r = parse_sec_fix_0(timeout, &u);
+ if (r < 0) {
+ log_warning("Failed to parse timeout for %s, ignoring: %s", where, timeout);
+ return 0;
+ }
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+ if (!is_device_path(node)) {
+ log_warning("x-systemd.device-timeout ignored for %s", what);
+ return 0;
+ }
+
+ r = unit_name_from_path(node, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path: %m");
+
+ return write_drop_in_format(dir, unit, 50, "device-timeout",
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "JobRunningTimeoutSec=%s",
+ program_invocation_short_name,
+ timeout);
+}
+
+int generator_write_device_deps(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts) {
+
+ /* fstab records that specify _netdev option should apply the network
+ * ordering on the actual device depending on network connection. If we
+ * are not mounting real device (NFS, CIFS), we rely on _netdev effect
+ * on the mount unit itself. */
+
+ _cleanup_free_ char *node = NULL, *unit = NULL;
+ int r;
+
+ if (!fstab_test_option(opts, "_netdev\0"))
+ return 0;
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+
+ /* Nothing to apply dependencies to. */
+ if (!is_device_path(node))
+ return 0;
+
+ r = unit_name_from_path(node, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ node);
+
+ /* See mount_add_default_dependencies for explanation why we create such
+ * dependencies. */
+ return write_drop_in_format(dir, unit, 50, "netdev-dependencies",
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "After=" SPECIAL_NETWORK_ONLINE_TARGET " " SPECIAL_NETWORK_TARGET "\n"
+ "Wants=" SPECIAL_NETWORK_ONLINE_TARGET "\n",
+ program_invocation_short_name);
+}
+
+int generator_write_initrd_root_device_deps(const char *dir, const char *what) {
+ _cleanup_free_ char *unit = NULL;
+ int r;
+
+ r = unit_name_from_path(what, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ what);
+
+ return write_drop_in_format(dir, SPECIAL_INITRD_ROOT_DEVICE_TARGET, 50, "root-device",
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Requires=%s\n"
+ "After=%s",
+ program_invocation_short_name,
+ unit,
+ unit);
+}
+
+int generator_hook_up_mkswap(
+ const char *dir,
+ const char *what) {
+
+ _cleanup_free_ char *node = NULL, *unit = NULL, *escaped = NULL, *where_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit_file;
+ int r;
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+
+ /* Nothing to work on. */
+ if (!is_device_path(node))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot format something that is not a device node: %s",
+ node);
+
+ r = unit_name_from_path_instance("systemd-mkswap", node, ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m",
+ node);
+
+ unit_file = strjoina(dir, "/", unit);
+ log_debug("Creating %s", unit_file);
+
+ escaped = cescape(node);
+ if (!escaped)
+ return log_oom();
+
+ r = unit_name_from_path(what, ".swap", &where_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ what);
+
+ f = fopen(unit_file, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m",
+ unit_file);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Description=Make Swap on %%f\n"
+ "Documentation=man:systemd-mkswap@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%%i.device\n"
+ "Conflicts=shutdown.target\n"
+ "After=%%i.device\n"
+ "Before=shutdown.target %s\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart="SYSTEMD_MAKEFS_PATH " swap %s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ where_unit,
+ escaped);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit_file);
+
+ return generator_add_symlink(dir, where_unit, "requires", unit);
+}
+
+int generator_hook_up_mkfs(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *type) {
+
+ _cleanup_free_ char *node = NULL, *unit = NULL, *escaped = NULL, *where_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit_file;
+ int r;
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+
+ /* Nothing to work on. */
+ if (!is_device_path(node))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot format something that is not a device node: %s",
+ node);
+
+ if (!type || streq(type, "auto"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot format partition %s, filesystem type is not specified",
+ node);
+
+ r = unit_name_from_path_instance("systemd-mkfs", node, ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m",
+ node);
+
+ unit_file = strjoina(dir, "/", unit);
+ log_debug("Creating %s", unit_file);
+
+ escaped = cescape(node);
+ if (!escaped)
+ return log_oom();
+
+ r = unit_name_from_path(where, ".mount", &where_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ where);
+
+ f = fopen(unit_file, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m",
+ unit_file);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Description=Make File System on %%f\n"
+ "Documentation=man:systemd-mkfs@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%%i.device\n"
+ "Conflicts=shutdown.target\n"
+ "After=%%i.device\n"
+ /* fsck might or might not be used, so let's be safe and order
+ * ourselves before both systemd-fsck@.service and the mount unit. */
+ "Before=shutdown.target systemd-fsck@%%i.service %s\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart="SYSTEMD_MAKEFS_PATH " %s %s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ where_unit,
+ type,
+ escaped);
+ // XXX: what about local-fs-pre.target?
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit_file);
+
+ return generator_add_symlink(dir, where_unit, "requires", unit);
+}
+
+int generator_hook_up_growfs(
+ const char *dir,
+ const char *where,
+ const char *target) {
+
+ _cleanup_free_ char *unit = NULL, *escaped = NULL, *where_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit_file;
+ int r;
+
+ escaped = cescape(where);
+ if (!escaped)
+ return log_oom();
+
+ r = unit_name_from_path_instance("systemd-growfs", where, ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m",
+ where);
+
+ r = unit_name_from_path(where, ".mount", &where_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ where);
+
+ unit_file = strjoina(dir, "/", unit);
+ log_debug("Creating %s", unit_file);
+
+ f = fopen(unit_file, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m",
+ unit_file);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Description=Grow File System on %%f\n"
+ "Documentation=man:systemd-growfs@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%%i.mount\n"
+ "Conflicts=shutdown.target\n"
+ "After=%%i.mount\n"
+ "Before=shutdown.target %s\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart="SYSTEMD_GROWFS_PATH " %s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ target,
+ escaped);
+
+ return generator_add_symlink(dir, where_unit, "wants", unit);
+}
+
+void log_setup_generator(void) {
+ log_set_prohibit_ipc(true);
+ log_setup_service();
+}
diff --git a/src/shared/generator.h b/src/shared/generator.h
new file mode 100644
index 0000000..5a1c1e3
--- /dev/null
+++ b/src/shared/generator.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+#include "main-func.h"
+
+int generator_open_unit_file(
+ const char *dest,
+ const char *source,
+ const char *name,
+ FILE **file);
+
+int generator_add_symlink(const char *root, const char *dst, const char *dep_type, const char *src);
+
+int generator_write_fsck_deps(
+ FILE *f,
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *type);
+
+int generator_write_timeouts(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts,
+ char **filtered);
+
+int generator_write_device_deps(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts);
+
+int generator_write_initrd_root_device_deps(
+ const char *dir,
+ const char *what);
+
+int generator_hook_up_mkswap(
+ const char *dir,
+ const char *what);
+int generator_hook_up_mkfs(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *type);
+int generator_hook_up_growfs(
+ const char *dir,
+ const char *where,
+ const char *target);
+
+void log_setup_generator(void);
+
+/* Similar to DEFINE_MAIN_FUNCTION, but initializes logging and assigns positional arguments. */
+#define DEFINE_MAIN_GENERATOR_FUNCTION(impl) \
+ _DEFINE_MAIN_FUNCTION( \
+ ({ \
+ log_setup_generator(); \
+ if (argc > 1 && argc != 4) \
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), \
+ "This program takes zero or three arguments."); \
+ }), \
+ impl(argc > 1 ? argv[1] : "/tmp", \
+ argc > 1 ? argv[2] : "/tmp", \
+ argc > 1 ? argv[3] : "/tmp"), \
+ r < 0 ? EXIT_FAILURE : EXIT_SUCCESS)
diff --git a/src/shared/gpt.h b/src/shared/gpt.h
new file mode 100644
index 0000000..fd953fa
--- /dev/null
+++ b/src/shared/gpt.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <endian.h>
+
+#include "sd-id128.h"
+
+/* We only support root disk discovery for x86, x86-64, Itanium and ARM for
+ * now, since EFI for anything else doesn't really exist, and we only
+ * care for root partitions on the same disk as the EFI ESP. */
+
+#define GPT_ROOT_X86 SD_ID128_MAKE(44,47,95,40,f2,97,41,b2,9a,f7,d1,31,d5,f0,45,8a)
+#define GPT_ROOT_X86_64 SD_ID128_MAKE(4f,68,bc,e3,e8,cd,4d,b1,96,e7,fb,ca,f9,84,b7,09)
+#define GPT_ROOT_ARM SD_ID128_MAKE(69,da,d7,10,2c,e4,4e,3c,b1,6c,21,a1,d4,9a,be,d3)
+#define GPT_ROOT_ARM_64 SD_ID128_MAKE(b9,21,b0,45,1d,f0,41,c3,af,44,4c,6f,28,0d,3f,ae)
+#define GPT_ROOT_IA64 SD_ID128_MAKE(99,3d,8d,3d,f8,0e,42,25,85,5a,9d,af,8e,d7,ea,97)
+#define GPT_ESP SD_ID128_MAKE(c1,2a,73,28,f8,1f,11,d2,ba,4b,00,a0,c9,3e,c9,3b)
+#define GPT_SWAP SD_ID128_MAKE(06,57,fd,6d,a4,ab,43,c4,84,e5,09,33,c8,4b,4f,4f)
+#define GPT_HOME SD_ID128_MAKE(93,3a,c7,e1,2e,b4,4f,13,b8,44,0e,14,e2,ae,f9,15)
+#define GPT_SRV SD_ID128_MAKE(3b,8f,84,25,20,e0,4f,3b,90,7f,1a,25,a7,6f,98,e8)
+
+/* Verity partitions for the root partitions above (we only define them for the root partitions, because only they are
+ * are commonly read-only and hence suitable for verity). */
+#define GPT_ROOT_X86_VERITY SD_ID128_MAKE(d1,3c,5d,3b,b5,d1,42,2a,b2,9f,94,54,fd,c8,9d,76)
+#define GPT_ROOT_X86_64_VERITY SD_ID128_MAKE(2c,73,57,ed,eb,d2,46,d9,ae,c1,23,d4,37,ec,2b,f5)
+#define GPT_ROOT_ARM_VERITY SD_ID128_MAKE(73,86,cd,f2,20,3c,47,a9,a4,98,f2,ec,ce,45,a2,d6)
+#define GPT_ROOT_ARM_64_VERITY SD_ID128_MAKE(df,33,00,ce,d6,9f,4c,92,97,8c,9b,fb,0f,38,d8,20)
+#define GPT_ROOT_IA64_VERITY SD_ID128_MAKE(86,ed,10,d5,b6,07,45,bb,89,57,d3,50,f2,3d,05,71)
+
+#if defined(__x86_64__)
+# define GPT_ROOT_NATIVE GPT_ROOT_X86_64
+# define GPT_ROOT_SECONDARY GPT_ROOT_X86
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_64_VERITY
+# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_X86_VERITY
+#elif defined(__i386__)
+# define GPT_ROOT_NATIVE GPT_ROOT_X86
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_VERITY
+#endif
+
+#if defined(__ia64__)
+# define GPT_ROOT_NATIVE GPT_ROOT_IA64
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_IA64_VERITY
+#endif
+
+#if defined(__aarch64__) && (__BYTE_ORDER != __BIG_ENDIAN)
+# define GPT_ROOT_NATIVE GPT_ROOT_ARM_64
+# define GPT_ROOT_SECONDARY GPT_ROOT_ARM
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_64_VERITY
+# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_ARM_VERITY
+#elif defined(__arm__) && (__BYTE_ORDER != __BIG_ENDIAN)
+# define GPT_ROOT_NATIVE GPT_ROOT_ARM
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_VERITY
+#endif
+
+#define GPT_FLAG_NO_BLOCK_IO_PROTOCOL (1ULL << 1)
+
+/* Flags we recognize on the root, swap, home and srv partitions when
+ * doing auto-discovery. These happen to be identical to what
+ * Microsoft defines for its own Basic Data Partitions, but that's
+ * just because we saw no point in defining any other values here. */
+#define GPT_FLAG_READ_ONLY (1ULL << 60)
+#define GPT_FLAG_NO_AUTO (1ULL << 63)
+
+#define GPT_LINUX_GENERIC SD_ID128_MAKE(0f,c6,3d,af,84,83,47,72,8e,79,3d,69,d8,47,7d,e4)
diff --git a/src/shared/id128-print.c b/src/shared/id128-print.c
new file mode 100644
index 0000000..1b20b8f
--- /dev/null
+++ b/src/shared/id128-print.c
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "id128-print.h"
+#include "log.h"
+#include "pretty-print.h"
+#include "terminal-util.h"
+
+int id128_pretty_print(sd_id128_t id, bool pretty) {
+ unsigned i;
+ _cleanup_free_ char *man_link = NULL, *mod_link = NULL;
+ const char *on, *off;
+
+ if (!pretty) {
+ printf(SD_ID128_FORMAT_STR "\n",
+ SD_ID128_FORMAT_VAL(id));
+ return 0;
+ }
+
+ on = ansi_highlight();
+ off = ansi_normal();
+
+ if (terminal_urlify("man:systemd-id128(1)", "systemd-id128(1)", &man_link) < 0)
+ return log_oom();
+
+ if (terminal_urlify("https://docs.python.org/3/library/uuid.html", "uuid", &mod_link) < 0)
+ return log_oom();
+
+ printf("As string:\n"
+ "%s" SD_ID128_FORMAT_STR "%s\n\n"
+ "As UUID:\n"
+ "%s%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x%s\n\n"
+ "As %s macro:\n"
+ "%s#define MESSAGE_XYZ SD_ID128_MAKE(",
+ on, SD_ID128_FORMAT_VAL(id), off,
+ on, SD_ID128_FORMAT_VAL(id), off,
+ man_link,
+ on);
+ for (i = 0; i < 16; i++)
+ printf("%02x%s", id.bytes[i], i != 15 ? "," : "");
+ printf(")%s\n\n", off);
+
+ printf("As Python constant:\n"
+ ">>> import %s\n"
+ ">>> %sMESSAGE_XYZ = uuid.UUID('" SD_ID128_FORMAT_STR "')%s\n",
+ mod_link,
+ on, SD_ID128_FORMAT_VAL(id), off);
+
+ return 0;
+}
+
+int id128_print_new(bool pretty) {
+ sd_id128_t id;
+ int r;
+
+ r = sd_id128_randomize(&id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate ID: %m");
+
+ return id128_pretty_print(id, pretty);
+}
diff --git a/src/shared/id128-print.h b/src/shared/id128-print.h
new file mode 100644
index 0000000..5d50de0
--- /dev/null
+++ b/src/shared/id128-print.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+int id128_pretty_print(sd_id128_t id, bool pretty);
+int id128_print_new(bool pretty);
diff --git a/src/shared/ima-util.c b/src/shared/ima-util.c
new file mode 100644
index 0000000..0d4ce3c
--- /dev/null
+++ b/src/shared/ima-util.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "ima-util.h"
+
+static int use_ima_cached = -1;
+
+bool use_ima(void) {
+
+ if (use_ima_cached < 0)
+ use_ima_cached = access("/sys/kernel/security/ima/", F_OK) >= 0;
+
+ return use_ima_cached;
+}
diff --git a/src/shared/ima-util.h b/src/shared/ima-util.h
new file mode 100644
index 0000000..8f20741
--- /dev/null
+++ b/src/shared/ima-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+bool use_ima(void);
diff --git a/src/shared/import-util.c b/src/shared/import-util.c
new file mode 100644
index 0000000..bcd6c0c
--- /dev/null
+++ b/src/shared/import-util.c
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "import-util.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "util.h"
+
+int import_url_last_component(const char *url, char **ret) {
+ const char *e, *p;
+ char *s;
+
+ e = strchrnul(url, '?');
+
+ while (e > url && e[-1] == '/')
+ e--;
+
+ p = e;
+ while (p > url && p[-1] != '/')
+ p--;
+
+ if (e <= p)
+ return -EINVAL;
+
+ s = strndup(p, e - p);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int import_url_change_last_component(const char *url, const char *suffix, char **ret) {
+ const char *e;
+ char *s;
+
+ assert(url);
+ assert(ret);
+
+ e = strchrnul(url, '?');
+
+ while (e > url && e[-1] == '/')
+ e--;
+
+ while (e > url && e[-1] != '/')
+ e--;
+
+ if (e <= url)
+ return -EINVAL;
+
+ s = new(char, (e - url) + strlen(suffix) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(s, url, e - url), suffix);
+ *ret = s;
+ return 0;
+}
+
+static const char* const import_verify_table[_IMPORT_VERIFY_MAX] = {
+ [IMPORT_VERIFY_NO] = "no",
+ [IMPORT_VERIFY_CHECKSUM] = "checksum",
+ [IMPORT_VERIFY_SIGNATURE] = "signature",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(import_verify, ImportVerify);
+
+int tar_strip_suffixes(const char *name, char **ret) {
+ const char *e;
+ char *s;
+
+ e = endswith(name, ".tar");
+ if (!e)
+ e = endswith(name, ".tar.xz");
+ if (!e)
+ e = endswith(name, ".tar.gz");
+ if (!e)
+ e = endswith(name, ".tar.bz2");
+ if (!e)
+ e = endswith(name, ".tgz");
+ if (!e)
+ e = strchr(name, 0);
+
+ if (e <= name)
+ return -EINVAL;
+
+ s = strndup(name, e - name);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int raw_strip_suffixes(const char *p, char **ret) {
+
+ static const char suffixes[] =
+ ".xz\0"
+ ".gz\0"
+ ".bz2\0"
+ ".raw\0"
+ ".qcow2\0"
+ ".img\0"
+ ".bin\0";
+
+ _cleanup_free_ char *q = NULL;
+
+ q = strdup(p);
+ if (!q)
+ return -ENOMEM;
+
+ for (;;) {
+ const char *sfx;
+ bool changed = false;
+
+ NULSTR_FOREACH(sfx, suffixes) {
+ char *e;
+
+ e = endswith(q, sfx);
+ if (e) {
+ *e = 0;
+ changed = true;
+ }
+ }
+
+ if (!changed)
+ break;
+ }
+
+ *ret = TAKE_PTR(q);
+
+ return 0;
+}
+
+int import_assign_pool_quota_and_warn(const char *path) {
+ int r;
+
+ r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true);
+ if (r == -ENOTTY) {
+ log_debug_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, as directory is not on btrfs or not a subvolume. Ignoring.");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines: %m");
+ if (r > 0)
+ log_info("Set up default quota hierarchy for /var/lib/machines.");
+
+ r = btrfs_subvol_auto_qgroup(path, 0, true);
+ if (r == -ENOTTY) {
+ log_debug_errno(r, "Failed to set up quota hierarchy for %s, as directory is not on btrfs or not a subvolume. Ignoring.", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up default quota hierarchy for %s: %m", path);
+ if (r > 0)
+ log_debug("Set up default quota hierarchy for %s.", path);
+
+ return 0;
+}
diff --git a/src/shared/import-util.h b/src/shared/import-util.h
new file mode 100644
index 0000000..0f2a517
--- /dev/null
+++ b/src/shared/import-util.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+typedef enum ImportVerify {
+ IMPORT_VERIFY_NO,
+ IMPORT_VERIFY_CHECKSUM,
+ IMPORT_VERIFY_SIGNATURE,
+ _IMPORT_VERIFY_MAX,
+ _IMPORT_VERIFY_INVALID = -1,
+} ImportVerify;
+
+int import_url_last_component(const char *url, char **ret);
+int import_url_change_last_component(const char *url, const char *suffix, char **ret);
+
+const char* import_verify_to_string(ImportVerify v) _const_;
+ImportVerify import_verify_from_string(const char *s) _pure_;
+
+int tar_strip_suffixes(const char *name, char **ret);
+int raw_strip_suffixes(const char *name, char **ret);
+
+int import_assign_pool_quota_and_warn(const char *path);
diff --git a/src/shared/initreq.h b/src/shared/initreq.h
new file mode 100644
index 0000000..1d7ff81
--- /dev/null
+++ b/src/shared/initreq.h
@@ -0,0 +1,73 @@
+/*
+ * initreq.h Interface to talk to init through /dev/initctl.
+ *
+ * Copyright (C) 1995-2004 Miquel van Smoorenburg
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * Version: @(#)initreq.h 1.28 31-Mar-2004 MvS
+ */
+
+#pragma once
+
+#include <sys/param.h>
+
+#if defined(__FreeBSD_kernel__)
+# define INIT_FIFO "/etc/.initctl"
+#else
+# define INIT_FIFO "/dev/initctl"
+#endif
+
+#define INIT_MAGIC 0x03091969
+#define INIT_CMD_START 0
+#define INIT_CMD_RUNLVL 1
+#define INIT_CMD_POWERFAIL 2
+#define INIT_CMD_POWERFAILNOW 3
+#define INIT_CMD_POWEROK 4
+#define INIT_CMD_BSD 5
+#define INIT_CMD_SETENV 6
+#define INIT_CMD_UNSETENV 7
+
+#define INIT_CMD_CHANGECONS 12345
+
+#ifdef MAXHOSTNAMELEN
+# define INITRQ_HLEN MAXHOSTNAMELEN
+#else
+# define INITRQ_HLEN 64
+#endif
+
+/*
+ * This is what BSD 4.4 uses when talking to init.
+ * Linux doesn't use this right now.
+ */
+struct init_request_bsd {
+ char gen_id[8]; /* Beats me.. telnetd uses "fe" */
+ char tty_id[16]; /* Tty name minus /dev/tty */
+ char host[INITRQ_HLEN]; /* Hostname */
+ char term_type[16]; /* Terminal type */
+ int signal; /* Signal to send */
+ int pid; /* Process to send to */
+ char exec_name[128]; /* Program to execute */
+ char reserved[128]; /* For future expansion. */
+};
+
+/*
+ * Because of legacy interfaces, "runlevel" and "sleeptime"
+ * aren't in a separate struct in the union.
+ *
+ * The weird sizes are because init expects the whole
+ * struct to be 384 bytes.
+ */
+struct init_request {
+ int magic; /* Magic number */
+ int cmd; /* What kind of request */
+ int runlevel; /* Runlevel to change to */
+ int sleeptime; /* Time between TERM and KILL */
+ union {
+ struct init_request_bsd bsd;
+ char data[368];
+ } i;
+};
diff --git a/src/shared/install-printf.c b/src/shared/install-printf.c
new file mode 100644
index 0000000..d2143be
--- /dev/null
+++ b/src/shared/install-printf.c
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "format-util.h"
+#include "install-printf.h"
+#include "install.h"
+#include "macro.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+static int specifier_prefix_and_instance(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+ _cleanup_free_ char *prefix = NULL;
+ int r;
+
+ assert(i);
+
+ r = unit_name_to_prefix_and_instance(i->name, &prefix);
+ if (r < 0)
+ return r;
+
+ if (endswith(prefix, "@") && i->default_instance) {
+ char *ans;
+
+ ans = strjoin(prefix, i->default_instance);
+ if (!ans)
+ return -ENOMEM;
+ *ret = ans;
+ } else
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+static int specifier_name(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+ char *ans;
+
+ assert(i);
+
+ if (unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE) && i->default_instance)
+ return unit_name_replace_instance(i->name, i->default_instance, ret);
+
+ ans = strdup(i->name);
+ if (!ans)
+ return -ENOMEM;
+ *ret = ans;
+ return 0;
+}
+
+static int specifier_prefix(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+
+ assert(i);
+
+ return unit_name_to_prefix(i->name, ret);
+}
+
+static int specifier_instance(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+ char *instance;
+ int r;
+
+ assert(i);
+
+ r = unit_name_to_instance(i->name, &instance);
+ if (r < 0)
+ return r;
+
+ if (isempty(instance)) {
+ r = free_and_strdup(&instance, strempty(i->default_instance));
+ if (r < 0)
+ return r;
+ }
+
+ *ret = instance;
+ return 0;
+}
+
+static int specifier_last_component(char specifier, const void *data, const void *userdata, char **ret) {
+ _cleanup_free_ char *prefix = NULL;
+ char *dash;
+ int r;
+
+ r = specifier_prefix(specifier, data, userdata, &prefix);
+ if (r < 0)
+ return r;
+
+ dash = strrchr(prefix, '-');
+ if (dash) {
+ dash = strdup(dash + 1);
+ if (!dash)
+ return -ENOMEM;
+ *ret = dash;
+ } else
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+int install_full_printf(const UnitFileInstallInfo *i, const char *format, char **ret) {
+ /* This is similar to unit_full_printf() but does not support
+ * anything path-related.
+ *
+ * %n: the full id of the unit (foo@bar.waldo)
+ * %N: the id of the unit without the suffix (foo@bar)
+ * %p: the prefix (foo)
+ * %i: the instance (bar)
+
+ * %U the UID of the running user
+ * %u the username of running user
+ * %m the machine ID of the running system
+ * %H the host name of the running system
+ * %b the boot ID of the running system
+ * %v `uname -r` of the running system
+ */
+
+ const Specifier table[] = {
+ { 'n', specifier_name, NULL },
+ { 'N', specifier_prefix_and_instance, NULL },
+ { 'p', specifier_prefix, NULL },
+ { 'i', specifier_instance, NULL },
+ { 'j', specifier_last_component, NULL },
+
+ { 'g', specifier_group_name, NULL },
+ { 'G', specifier_group_id, NULL },
+ { 'U', specifier_user_id, NULL },
+ { 'u', specifier_user_name, NULL },
+
+ { 'm', specifier_machine_id, NULL },
+ { 'H', specifier_host_name, NULL },
+ { 'b', specifier_boot_id, NULL },
+ { 'v', specifier_kernel_release, NULL },
+ {}
+ };
+
+ assert(i);
+ assert(format);
+ assert(ret);
+
+ return specifier_printf(format, table, i, ret);
+}
diff --git a/src/shared/install-printf.h b/src/shared/install-printf.h
new file mode 100644
index 0000000..fa8ea7e
--- /dev/null
+++ b/src/shared/install-printf.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "install.h"
+
+int install_full_printf(const UnitFileInstallInfo *i, const char *format, char **ret);
diff --git a/src/shared/install.c b/src/shared/install.c
new file mode 100644
index 0000000..8629304
--- /dev/null
+++ b/src/shared/install.c
@@ -0,0 +1,3383 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "dirent-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "install-printf.h"
+#include "install.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "set.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+#define UNIT_FILE_FOLLOW_SYMLINK_MAX 64
+
+typedef enum SearchFlags {
+ SEARCH_LOAD = 1 << 0,
+ SEARCH_FOLLOW_CONFIG_SYMLINKS = 1 << 1,
+ SEARCH_DROPIN = 1 << 2,
+} SearchFlags;
+
+typedef struct {
+ OrderedHashmap *will_process;
+ OrderedHashmap *have_processed;
+} InstallContext;
+
+typedef enum {
+ PRESET_UNKNOWN,
+ PRESET_ENABLE,
+ PRESET_DISABLE,
+} PresetAction;
+
+typedef struct {
+ char *pattern;
+ PresetAction action;
+ char **instances;
+} PresetRule;
+
+typedef struct {
+ PresetRule *rules;
+ size_t n_rules;
+} Presets;
+
+static bool unit_file_install_info_has_rules(const UnitFileInstallInfo *i) {
+ assert(i);
+
+ return !strv_isempty(i->aliases) ||
+ !strv_isempty(i->wanted_by) ||
+ !strv_isempty(i->required_by);
+}
+
+static bool unit_file_install_info_has_also(const UnitFileInstallInfo *i) {
+ assert(i);
+
+ return !strv_isempty(i->also);
+}
+
+static void presets_freep(Presets *p) {
+ size_t i;
+
+ if (!p)
+ return;
+
+ for (i = 0; i < p->n_rules; i++) {
+ free(p->rules[i].pattern);
+ strv_free(p->rules[i].instances);
+ }
+
+ free(p->rules);
+ p->n_rules = 0;
+}
+
+bool unit_type_may_alias(UnitType type) {
+ return IN_SET(type,
+ UNIT_SERVICE,
+ UNIT_SOCKET,
+ UNIT_TARGET,
+ UNIT_DEVICE,
+ UNIT_TIMER,
+ UNIT_PATH);
+}
+
+bool unit_type_may_template(UnitType type) {
+ return IN_SET(type,
+ UNIT_SERVICE,
+ UNIT_SOCKET,
+ UNIT_TARGET,
+ UNIT_TIMER,
+ UNIT_PATH);
+}
+
+static const char *unit_file_type_table[_UNIT_FILE_TYPE_MAX] = {
+ [UNIT_FILE_TYPE_REGULAR] = "regular",
+ [UNIT_FILE_TYPE_SYMLINK] = "symlink",
+ [UNIT_FILE_TYPE_MASKED] = "masked",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(unit_file_type, UnitFileType);
+
+static int in_search_path(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+ char **i;
+
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ STRV_FOREACH(i, p->search_path)
+ if (path_equal(parent, *i))
+ return true;
+
+ return false;
+}
+
+static const char* skip_root(const LookupPaths *p, const char *path) {
+ char *e;
+
+ assert(p);
+ assert(path);
+
+ if (!p->root_dir)
+ return path;
+
+ e = path_startswith(path, p->root_dir);
+ if (!e)
+ return NULL;
+
+ /* Make sure the returned path starts with a slash */
+ if (e[0] != '/') {
+ if (e == path || e[-1] != '/')
+ return NULL;
+
+ e--;
+ }
+
+ return e;
+}
+
+static int path_is_generator(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_equal_ptr(parent, p->generator) ||
+ path_equal_ptr(parent, p->generator_early) ||
+ path_equal_ptr(parent, p->generator_late);
+}
+
+static int path_is_transient(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_equal_ptr(parent, p->transient);
+}
+
+static int path_is_control(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_equal_ptr(parent, p->persistent_control) ||
+ path_equal_ptr(parent, p->runtime_control);
+}
+
+static int path_is_config(const LookupPaths *p, const char *path, bool check_parent) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ /* Note that we do *not* have generic checks for /etc or /run in place, since with
+ * them we couldn't discern configuration from transient or generated units */
+
+ if (check_parent) {
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ path = parent;
+ }
+
+ return path_equal_ptr(path, p->persistent_config) ||
+ path_equal_ptr(path, p->runtime_config);
+}
+
+static int path_is_runtime(const LookupPaths *p, const char *path, bool check_parent) {
+ _cleanup_free_ char *parent = NULL;
+ const char *rpath;
+
+ assert(p);
+ assert(path);
+
+ /* Everything in /run is considered runtime. On top of that we also add
+ * explicit checks for the various runtime directories, as safety net. */
+
+ rpath = skip_root(p, path);
+ if (rpath && path_startswith(rpath, "/run"))
+ return true;
+
+ if (check_parent) {
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ path = parent;
+ }
+
+ return path_equal_ptr(path, p->runtime_config) ||
+ path_equal_ptr(path, p->generator) ||
+ path_equal_ptr(path, p->generator_early) ||
+ path_equal_ptr(path, p->generator_late) ||
+ path_equal_ptr(path, p->transient) ||
+ path_equal_ptr(path, p->runtime_control);
+}
+
+static int path_is_vendor(const LookupPaths *p, const char *path) {
+ const char *rpath;
+
+ assert(p);
+ assert(path);
+
+ rpath = skip_root(p, path);
+ if (!rpath)
+ return 0;
+
+ if (path_startswith(rpath, "/usr"))
+ return true;
+
+#if HAVE_SPLIT_USR
+ if (path_startswith(rpath, "/lib"))
+ return true;
+#endif
+
+ return path_equal(rpath, SYSTEM_DATA_UNIT_PATH);
+}
+
+int unit_file_changes_add(
+ UnitFileChange **changes,
+ size_t *n_changes,
+ UnitFileChangeType type,
+ const char *path,
+ const char *source) {
+
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ UnitFileChange *c;
+
+ assert(path);
+ assert(!changes == !n_changes);
+
+ if (!changes)
+ return 0;
+
+ c = reallocarray(*changes, *n_changes + 1, sizeof(UnitFileChange));
+ if (!c)
+ return -ENOMEM;
+ *changes = c;
+
+ p = strdup(path);
+ if (source)
+ s = strdup(source);
+
+ if (!p || (source && !s))
+ return -ENOMEM;
+
+ path_simplify(p, false);
+ if (s)
+ path_simplify(s, false);
+
+ c[*n_changes] = (UnitFileChange) { type, p, s };
+ p = s = NULL;
+ (*n_changes) ++;
+ return 0;
+}
+
+void unit_file_changes_free(UnitFileChange *changes, size_t n_changes) {
+ size_t i;
+
+ assert(changes || n_changes == 0);
+
+ for (i = 0; i < n_changes; i++) {
+ free(changes[i].path);
+ free(changes[i].source);
+ }
+
+ free(changes);
+}
+
+void unit_file_dump_changes(int r, const char *verb, const UnitFileChange *changes, size_t n_changes, bool quiet) {
+ size_t i;
+ bool logged = false;
+
+ assert(changes || n_changes == 0);
+ /* If verb is not specified, errors are not allowed! */
+ assert(verb || r >= 0);
+
+ for (i = 0; i < n_changes; i++) {
+ assert(verb || changes[i].type >= 0);
+
+ switch(changes[i].type) {
+ case UNIT_FILE_SYMLINK:
+ if (!quiet)
+ log_info("Created symlink %s %s %s.",
+ changes[i].path,
+ special_glyph(SPECIAL_GLYPH_ARROW),
+ changes[i].source);
+ break;
+ case UNIT_FILE_UNLINK:
+ if (!quiet)
+ log_info("Removed %s.", changes[i].path);
+ break;
+ case UNIT_FILE_IS_MASKED:
+ if (!quiet)
+ log_info("Unit %s is masked, ignoring.", changes[i].path);
+ break;
+ case UNIT_FILE_IS_DANGLING:
+ if (!quiet)
+ log_info("Unit %s is an alias to a unit that is not present, ignoring.",
+ changes[i].path);
+ break;
+ case -EEXIST:
+ if (changes[i].source)
+ log_error_errno(changes[i].type,
+ "Failed to %s unit, file %s already exists and is a symlink to %s.",
+ verb, changes[i].path, changes[i].source);
+ else
+ log_error_errno(changes[i].type,
+ "Failed to %s unit, file %s already exists.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -ERFKILL:
+ log_error_errno(changes[i].type, "Failed to %s unit, unit %s is masked.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -EADDRNOTAVAIL:
+ log_error_errno(changes[i].type, "Failed to %s unit, unit %s is transient or generated.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -ELOOP:
+ log_error_errno(changes[i].type, "Failed to %s unit, refusing to operate on linked unit file %s",
+ verb, changes[i].path);
+ logged = true;
+ break;
+
+ case -ENOENT:
+ log_error_errno(changes[i].type, "Failed to %s unit, unit %s does not exist.", verb, changes[i].path);
+ logged = true;
+ break;
+
+ default:
+ assert(changes[i].type < 0);
+ log_error_errno(changes[i].type, "Failed to %s unit, file %s: %m.",
+ verb, changes[i].path);
+ logged = true;
+ }
+ }
+
+ if (r < 0 && !logged)
+ log_error_errno(r, "Failed to %s: %m.", verb);
+}
+
+/**
+ * Checks if two paths or symlinks from wd are the same, when root is the root of the filesystem.
+ * wc should be the full path in the host file system.
+ */
+static bool chroot_symlinks_same(const char *root, const char *wd, const char *a, const char *b) {
+ assert(path_is_absolute(wd));
+
+ /* This will give incorrect results if the paths are relative and go outside
+ * of the chroot. False negatives are possible. */
+
+ if (!root)
+ root = "/";
+
+ a = strjoina(path_is_absolute(a) ? root : wd, "/", a);
+ b = strjoina(path_is_absolute(b) ? root : wd, "/", b);
+ return path_equal_or_files_same(a, b, 0);
+}
+
+static int create_symlink(
+ const LookupPaths *paths,
+ const char *old_path,
+ const char *new_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *dest = NULL, *dirname = NULL;
+ const char *rp;
+ int r;
+
+ assert(old_path);
+ assert(new_path);
+
+ rp = skip_root(paths, old_path);
+ if (rp)
+ old_path = rp;
+
+ /* Actually create a symlink, and remember that we did. Is
+ * smart enough to check if there's already a valid symlink in
+ * place.
+ *
+ * Returns 1 if a symlink was created or already exists and points to
+ * the right place, or negative on error.
+ */
+
+ mkdir_parents_label(new_path, 0755);
+
+ if (symlink(old_path, new_path) >= 0) {
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_SYMLINK, new_path, old_path);
+ return 1;
+ }
+
+ if (errno != EEXIST) {
+ unit_file_changes_add(changes, n_changes, -errno, new_path, NULL);
+ return -errno;
+ }
+
+ r = readlink_malloc(new_path, &dest);
+ if (r < 0) {
+ /* translate EINVAL (non-symlink exists) to EEXIST */
+ if (r == -EINVAL)
+ r = -EEXIST;
+
+ unit_file_changes_add(changes, n_changes, r, new_path, NULL);
+ return r;
+ }
+
+ dirname = dirname_malloc(new_path);
+ if (!dirname)
+ return -ENOMEM;
+
+ if (chroot_symlinks_same(paths->root_dir, dirname, dest, old_path)) {
+ log_debug("Symlink %s → %s already exists", new_path, dest);
+ return 1;
+ }
+
+ if (!force) {
+ unit_file_changes_add(changes, n_changes, -EEXIST, new_path, dest);
+ return -EEXIST;
+ }
+
+ r = symlink_atomic(old_path, new_path);
+ if (r < 0) {
+ unit_file_changes_add(changes, n_changes, r, new_path, NULL);
+ return r;
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, new_path, NULL);
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_SYMLINK, new_path, old_path);
+
+ return 1;
+}
+
+static int mark_symlink_for_removal(
+ Set **remove_symlinks_to,
+ const char *p) {
+
+ char *n;
+ int r;
+
+ assert(p);
+
+ r = set_ensure_allocated(remove_symlinks_to, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ n = strdup(p);
+ if (!n)
+ return -ENOMEM;
+
+ path_simplify(n, false);
+
+ r = set_consume(*remove_symlinks_to, n);
+ if (r == -EEXIST)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int remove_marked_symlinks_fd(
+ Set *remove_symlinks_to,
+ int fd,
+ const char *path,
+ const char *config_path,
+ const LookupPaths *lp,
+ bool dry_run,
+ bool *restart,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(remove_symlinks_to);
+ assert(fd >= 0);
+ assert(path);
+ assert(config_path);
+ assert(lp);
+ assert(restart);
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ rewinddir(d);
+
+ FOREACH_DIRENT(de, d, return -errno) {
+
+ dirent_ensure_type(d, de);
+
+ if (de->d_type == DT_DIR) {
+ _cleanup_free_ char *p = NULL;
+ int nfd, q;
+
+ nfd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (nfd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ if (r == 0)
+ r = -errno;
+ continue;
+ }
+
+ p = path_make_absolute(de->d_name, path);
+ if (!p) {
+ safe_close(nfd);
+ return -ENOMEM;
+ }
+
+ /* This will close nfd, regardless whether it succeeds or not */
+ q = remove_marked_symlinks_fd(remove_symlinks_to, nfd, p, config_path, lp, dry_run, restart, changes, n_changes);
+ if (q < 0 && r == 0)
+ r = q;
+
+ } else if (de->d_type == DT_LNK) {
+ _cleanup_free_ char *p = NULL, *dest = NULL;
+ const char *rp;
+ bool found;
+ int q;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ p = path_make_absolute(de->d_name, path);
+ if (!p)
+ return -ENOMEM;
+ path_simplify(p, false);
+
+ q = readlink_malloc(p, &dest);
+ if (q == -ENOENT)
+ continue;
+ if (q < 0) {
+ if (r == 0)
+ r = q;
+ continue;
+ }
+
+ /* We remove all links pointing to a file or path that is marked, as well as all files sharing
+ * the same name as a file that is marked. */
+
+ found = set_contains(remove_symlinks_to, dest) ||
+ set_contains(remove_symlinks_to, basename(dest)) ||
+ set_contains(remove_symlinks_to, de->d_name);
+
+ if (!found)
+ continue;
+
+ if (!dry_run) {
+ if (unlinkat(fd, de->d_name, 0) < 0 && errno != ENOENT) {
+ if (r == 0)
+ r = -errno;
+ unit_file_changes_add(changes, n_changes, -errno, p, NULL);
+ continue;
+ }
+
+ (void) rmdir_parents(p, config_path);
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, p, NULL);
+
+ /* Now, remember the full path (but with the root prefix removed) of
+ * the symlink we just removed, and remove any symlinks to it, too. */
+
+ rp = skip_root(lp, p);
+ q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: p);
+ if (q < 0)
+ return q;
+ if (q > 0 && !dry_run)
+ *restart = true;
+ }
+ }
+
+ return r;
+}
+
+static int remove_marked_symlinks(
+ Set *remove_symlinks_to,
+ const char *config_path,
+ const LookupPaths *lp,
+ bool dry_run,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_close_ int fd = -1;
+ bool restart;
+ int r = 0;
+
+ assert(config_path);
+ assert(lp);
+
+ if (set_size(remove_symlinks_to) <= 0)
+ return 0;
+
+ fd = open(config_path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC);
+ if (fd < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ do {
+ int q, cfd;
+ restart = false;
+
+ cfd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (cfd < 0)
+ return -errno;
+
+ /* This takes possession of cfd and closes it */
+ q = remove_marked_symlinks_fd(remove_symlinks_to, cfd, config_path, config_path, lp, dry_run, &restart, changes, n_changes);
+ if (r == 0)
+ r = q;
+ } while (restart);
+
+ return r;
+}
+
+static int is_symlink_with_known_name(const UnitFileInstallInfo *i, const char *name) {
+ int r;
+
+ if (streq(name, i->name))
+ return true;
+
+ if (strv_contains(i->aliases, name))
+ return true;
+
+ /* Look for template symlink matching DefaultInstance */
+ if (i->default_instance && unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_free_ char *s = NULL;
+
+ r = unit_name_replace_instance(i->name, i->default_instance, &s);
+ if (r < 0) {
+ if (r != -EINVAL)
+ return r;
+
+ } else if (streq(name, s))
+ return true;
+ }
+
+ return false;
+}
+
+static int find_symlinks_fd(
+ const char *root_dir,
+ const UnitFileInstallInfo *i,
+ bool match_aliases,
+ bool ignore_same_name,
+ int fd,
+ const char *path,
+ const char *config_path,
+ bool *same_name_link) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(i);
+ assert(fd >= 0);
+ assert(path);
+ assert(config_path);
+ assert(same_name_link);
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+
+ dirent_ensure_type(d, de);
+
+ if (de->d_type == DT_DIR) {
+ _cleanup_free_ char *p = NULL;
+ int nfd, q;
+
+ nfd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (nfd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ if (r == 0)
+ r = -errno;
+ continue;
+ }
+
+ p = path_make_absolute(de->d_name, path);
+ if (!p) {
+ safe_close(nfd);
+ return -ENOMEM;
+ }
+
+ /* This will close nfd, regardless whether it succeeds or not */
+ q = find_symlinks_fd(root_dir, i, match_aliases, ignore_same_name, nfd,
+ p, config_path, same_name_link);
+ if (q > 0)
+ return 1;
+ if (r == 0)
+ r = q;
+
+ } else if (de->d_type == DT_LNK) {
+ _cleanup_free_ char *p = NULL, *dest = NULL;
+ bool found_path = false, found_dest, b = false;
+ int q;
+
+ /* Acquire symlink name */
+ p = path_make_absolute(de->d_name, path);
+ if (!p)
+ return -ENOMEM;
+
+ /* Acquire symlink destination */
+ q = readlink_malloc(p, &dest);
+ if (q == -ENOENT)
+ continue;
+ if (q < 0) {
+ if (r == 0)
+ r = q;
+ continue;
+ }
+
+ /* Make absolute */
+ if (!path_is_absolute(dest)) {
+ char *x;
+
+ x = prefix_root(root_dir, dest);
+ if (!x)
+ return -ENOMEM;
+
+ free_and_replace(dest, x);
+ }
+
+ assert(unit_name_is_valid(i->name, UNIT_NAME_ANY));
+ if (!ignore_same_name)
+ /* Check if the symlink itself matches what we are looking for.
+ *
+ * If ignore_same_name is specified, we are in one of the directories which
+ * have lower priority than the unit file, and even if a file or symlink with
+ * this name was found, we should ignore it. */
+ found_path = streq(de->d_name, i->name);
+
+ /* Check if what the symlink points to matches what we are looking for */
+ found_dest = streq(basename(dest), i->name);
+
+ if (found_path && found_dest) {
+ _cleanup_free_ char *t = NULL;
+
+ /* Filter out same name links in the main
+ * config path */
+ t = path_make_absolute(i->name, config_path);
+ if (!t)
+ return -ENOMEM;
+
+ b = path_equal(t, p);
+ }
+
+ if (b)
+ *same_name_link = true;
+ else if (found_path || found_dest) {
+ if (!match_aliases)
+ return 1;
+
+ /* Check if symlink name is in the set of names used by [Install] */
+ q = is_symlink_with_known_name(i, de->d_name);
+ if (q < 0)
+ return q;
+ if (q > 0)
+ return 1;
+ }
+ }
+ }
+
+ return r;
+}
+
+static int find_symlinks(
+ const char *root_dir,
+ const UnitFileInstallInfo *i,
+ bool match_name,
+ bool ignore_same_name,
+ const char *config_path,
+ bool *same_name_link) {
+
+ int fd;
+
+ assert(i);
+ assert(config_path);
+ assert(same_name_link);
+
+ fd = open(config_path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC);
+ if (fd < 0) {
+ if (IN_SET(errno, ENOENT, ENOTDIR, EACCES))
+ return 0;
+ return -errno;
+ }
+
+ /* This takes possession of fd and closes it */
+ return find_symlinks_fd(root_dir, i, match_name, ignore_same_name, fd,
+ config_path, config_path, same_name_link);
+}
+
+static int find_symlinks_in_scope(
+ UnitFileScope scope,
+ const LookupPaths *paths,
+ const UnitFileInstallInfo *i,
+ bool match_name,
+ UnitFileState *state) {
+
+ bool same_name_link_runtime = false, same_name_link_config = false;
+ bool enabled_in_runtime = false, enabled_at_all = false;
+ bool ignore_same_name = false;
+ char **p;
+ int r;
+
+ assert(paths);
+ assert(i);
+
+ /* As we iterate over the list of search paths in paths->search_path, we may encounter "same name"
+ * symlinks. The ones which are "below" (i.e. have lower priority) than the unit file itself are
+ * efectively masked, so we should ignore them. */
+
+ STRV_FOREACH(p, paths->search_path) {
+ bool same_name_link = false;
+
+ r = find_symlinks(paths->root_dir, i, match_name, ignore_same_name, *p, &same_name_link);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* We found symlinks in this dir? Yay! Let's see where precisely it is enabled. */
+
+ if (path_equal_ptr(*p, paths->persistent_config)) {
+ /* This is the best outcome, let's return it immediately. */
+ *state = UNIT_FILE_ENABLED;
+ return 1;
+ }
+
+ /* look for global enablement of user units */
+ if (scope == UNIT_FILE_USER && path_is_user_config_dir(*p)) {
+ *state = UNIT_FILE_ENABLED;
+ return 1;
+ }
+
+ r = path_is_runtime(paths, *p, false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ enabled_in_runtime = true;
+ else
+ enabled_at_all = true;
+
+ } else if (same_name_link) {
+ if (path_equal_ptr(*p, paths->persistent_config))
+ same_name_link_config = true;
+ else {
+ r = path_is_runtime(paths, *p, false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ same_name_link_runtime = true;
+ }
+ }
+
+ /* Check if next iteration will be "below" the unit file (either a regular file
+ * or a symlink), and hence should be ignored */
+ if (!ignore_same_name && path_startswith(i->path, *p))
+ ignore_same_name = true;
+ }
+
+ if (enabled_in_runtime) {
+ *state = UNIT_FILE_ENABLED_RUNTIME;
+ return 1;
+ }
+
+ /* Here's a special rule: if the unit we are looking for is an instance, and it symlinked in the search path
+ * outside of runtime and configuration directory, then we consider it statically enabled. Note we do that only
+ * for instance, not for regular names, as those are merely aliases, while instances explicitly instantiate
+ * something, and hence are a much stronger concept. */
+ if (enabled_at_all && unit_name_is_valid(i->name, UNIT_NAME_INSTANCE)) {
+ *state = UNIT_FILE_STATIC;
+ return 1;
+ }
+
+ /* Hmm, we didn't find it, but maybe we found the same name
+ * link? */
+ if (same_name_link_config) {
+ *state = UNIT_FILE_LINKED;
+ return 1;
+ }
+ if (same_name_link_runtime) {
+ *state = UNIT_FILE_LINKED_RUNTIME;
+ return 1;
+ }
+
+ return 0;
+}
+
+static void install_info_free(UnitFileInstallInfo *i) {
+
+ if (!i)
+ return;
+
+ free(i->name);
+ free(i->path);
+ strv_free(i->aliases);
+ strv_free(i->wanted_by);
+ strv_free(i->required_by);
+ strv_free(i->also);
+ free(i->default_instance);
+ free(i->symlink_target);
+ free(i);
+}
+
+static void install_context_done(InstallContext *c) {
+ assert(c);
+
+ c->will_process = ordered_hashmap_free_with_destructor(c->will_process, install_info_free);
+ c->have_processed = ordered_hashmap_free_with_destructor(c->have_processed, install_info_free);
+}
+
+static UnitFileInstallInfo *install_info_find(InstallContext *c, const char *name) {
+ UnitFileInstallInfo *i;
+
+ i = ordered_hashmap_get(c->have_processed, name);
+ if (i)
+ return i;
+
+ return ordered_hashmap_get(c->will_process, name);
+}
+
+static int install_info_may_process(
+ const UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+ assert(i);
+ assert(paths);
+
+ /* Checks whether the loaded unit file is one we should process, or is masked,
+ * transient or generated and thus not subject to enable/disable operations. */
+
+ if (i->type == UNIT_FILE_TYPE_MASKED) {
+ unit_file_changes_add(changes, n_changes, -ERFKILL, i->path, NULL);
+ return -ERFKILL;
+ }
+ if (path_is_generator(paths, i->path) ||
+ path_is_transient(paths, i->path)) {
+ unit_file_changes_add(changes, n_changes, -EADDRNOTAVAIL, i->path, NULL);
+ return -EADDRNOTAVAIL;
+ }
+
+ return 0;
+}
+
+/**
+ * Adds a new UnitFileInstallInfo entry under name in the InstallContext.will_process
+ * hashmap, or retrieves the existing one if already present.
+ *
+ * Returns negative on error, 0 if the unit was already known, 1 otherwise.
+ */
+static int install_info_add(
+ InstallContext *c,
+ const char *name,
+ const char *path,
+ bool auxiliary,
+ UnitFileInstallInfo **ret) {
+
+ UnitFileInstallInfo *i = NULL;
+ int r;
+
+ assert(c);
+ assert(name || path);
+
+ if (!name)
+ name = basename(path);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ i = install_info_find(c, name);
+ if (i) {
+ i->auxiliary = i->auxiliary && auxiliary;
+
+ if (ret)
+ *ret = i;
+ return 0;
+ }
+
+ r = ordered_hashmap_ensure_allocated(&c->will_process, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ i = new(UnitFileInstallInfo, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (UnitFileInstallInfo) {
+ .type = _UNIT_FILE_TYPE_INVALID,
+ .auxiliary = auxiliary,
+ };
+
+ i->name = strdup(name);
+ if (!i->name) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (path) {
+ i->path = strdup(path);
+ if (!i->path) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ r = ordered_hashmap_put(c->will_process, i->name, i);
+ if (r < 0)
+ goto fail;
+
+ if (ret)
+ *ret = i;
+
+ return 1;
+
+fail:
+ install_info_free(i);
+ return r;
+}
+
+static int config_parse_alias(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitType type;
+
+ assert(unit);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ type = unit_name_to_type(unit);
+ if (!unit_type_may_alias(type))
+ return log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Alias= is not allowed for %s units, ignoring.",
+ unit_type_to_string(type));
+
+ return config_parse_strv(unit, filename, line, section, section_line,
+ lvalue, ltype, rvalue, data, userdata);
+}
+
+static int config_parse_also(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitFileInstallInfo *info = userdata, *alsoinfo = NULL;
+ InstallContext *c = data;
+ int r;
+
+ assert(unit);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *printed = NULL;
+
+ r = extract_first_word(&rvalue, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = install_full_printf(info, word, &printed);
+ if (r < 0)
+ return r;
+
+ if (!unit_name_is_valid(printed, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_add(c, printed, NULL, true, &alsoinfo);
+ if (r < 0)
+ return r;
+
+ r = strv_push(&info->also, printed);
+ if (r < 0)
+ return r;
+
+ printed = NULL;
+ }
+
+ return 0;
+}
+
+static int config_parse_default_instance(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitFileInstallInfo *i = data;
+ _cleanup_free_ char *printed = NULL;
+ int r;
+
+ assert(unit);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (unit_name_is_valid(unit, UNIT_NAME_INSTANCE))
+ /* When enabling an instance, we might be using a template unit file,
+ * but we should ignore DefaultInstance silently. */
+ return 0;
+ if (!unit_name_is_valid(unit, UNIT_NAME_TEMPLATE))
+ return log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "DefaultInstance= only makes sense for template units, ignoring.");
+
+ r = install_full_printf(i, rvalue, &printed);
+ if (r < 0)
+ return r;
+
+ if (!unit_instance_is_valid(printed))
+ return -EINVAL;
+
+ return free_and_replace(i->default_instance, printed);
+}
+
+static int unit_file_load(
+ InstallContext *c,
+ UnitFileInstallInfo *info,
+ const char *path,
+ const char *root_dir,
+ SearchFlags flags) {
+
+ const ConfigTableItem items[] = {
+ { "Install", "Alias", config_parse_alias, 0, &info->aliases },
+ { "Install", "WantedBy", config_parse_strv, 0, &info->wanted_by },
+ { "Install", "RequiredBy", config_parse_strv, 0, &info->required_by },
+ { "Install", "DefaultInstance", config_parse_default_instance, 0, info },
+ { "Install", "Also", config_parse_also, 0, c },
+ {}
+ };
+
+ UnitType type;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ int r;
+
+ assert(info);
+ assert(path);
+
+ if (!(flags & SEARCH_DROPIN)) {
+ /* Loading or checking for the main unit file… */
+
+ type = unit_name_to_type(info->name);
+ if (type < 0)
+ return -EINVAL;
+ if (unit_name_is_valid(info->name, UNIT_NAME_TEMPLATE|UNIT_NAME_INSTANCE) && !unit_type_may_template(type))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unit type %s cannot be templated.", unit_type_to_string(type));
+
+ if (!(flags & SEARCH_LOAD)) {
+ r = lstat(path, &st);
+ if (r < 0)
+ return -errno;
+
+ if (null_or_empty(&st))
+ info->type = UNIT_FILE_TYPE_MASKED;
+ else if (S_ISREG(st.st_mode))
+ info->type = UNIT_FILE_TYPE_REGULAR;
+ else if (S_ISLNK(st.st_mode))
+ return -ELOOP;
+ else if (S_ISDIR(st.st_mode))
+ return -EISDIR;
+ else
+ return -ENOTTY;
+
+ return 0;
+ }
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+ } else {
+ /* Operating on a drop-in file. If we aren't supposed to load the unit file drop-ins don't matter, let's hence shortcut this. */
+
+ if (!(flags & SEARCH_LOAD))
+ return 0;
+
+ fd = chase_symlinks_and_open(path, root_dir, 0, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
+ if (fd < 0)
+ return fd;
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (null_or_empty(&st)) {
+ if ((flags & SEARCH_DROPIN) == 0)
+ info->type = UNIT_FILE_TYPE_MASKED;
+
+ return 0;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return r;
+
+ f = fdopen(fd, "r");
+ if (!f)
+ return -errno;
+ fd = -1;
+
+ /* c is only needed if we actually load the file (it's referenced from items[] btw, in case you wonder.) */
+ assert(c);
+
+ r = config_parse(info->name, path, f,
+ NULL,
+ config_item_table_lookup, items,
+ CONFIG_PARSE_RELAXED|CONFIG_PARSE_ALLOW_INCLUDE, info);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse %s: %m", info->name);
+
+ if ((flags & SEARCH_DROPIN) == 0)
+ info->type = UNIT_FILE_TYPE_REGULAR;
+
+ return
+ (int) strv_length(info->aliases) +
+ (int) strv_length(info->wanted_by) +
+ (int) strv_length(info->required_by);
+}
+
+static int unit_file_load_or_readlink(
+ InstallContext *c,
+ UnitFileInstallInfo *info,
+ const char *path,
+ const char *root_dir,
+ SearchFlags flags) {
+
+ _cleanup_free_ char *target = NULL;
+ int r;
+
+ r = unit_file_load(c, info, path, root_dir, flags);
+ if (r != -ELOOP || (flags & SEARCH_DROPIN))
+ return r;
+
+ /* This is a symlink, let's read it. */
+
+ r = readlink_malloc(path, &target);
+ if (r < 0)
+ return r;
+
+ if (path_equal(target, "/dev/null"))
+ info->type = UNIT_FILE_TYPE_MASKED;
+ else {
+ const char *bn;
+ UnitType a, b;
+
+ bn = basename(target);
+
+ if (unit_name_is_valid(info->name, UNIT_NAME_PLAIN)) {
+
+ if (!unit_name_is_valid(bn, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ } else if (unit_name_is_valid(info->name, UNIT_NAME_INSTANCE)) {
+
+ if (!unit_name_is_valid(bn, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+
+ } else if (unit_name_is_valid(info->name, UNIT_NAME_TEMPLATE)) {
+
+ if (!unit_name_is_valid(bn, UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ /* Enforce that the symlink destination does not
+ * change the unit file type. */
+
+ a = unit_name_to_type(info->name);
+ b = unit_name_to_type(bn);
+ if (a < 0 || b < 0 || a != b)
+ return -EINVAL;
+
+ if (path_is_absolute(target))
+ /* This is an absolute path, prefix the root so that we always deal with fully qualified paths */
+ info->symlink_target = prefix_root(root_dir, target);
+ else
+ /* This is a relative path, take it relative to the dir the symlink is located in. */
+ info->symlink_target = file_in_same_dir(path, target);
+ if (!info->symlink_target)
+ return -ENOMEM;
+
+ info->type = UNIT_FILE_TYPE_SYMLINK;
+ }
+
+ return 0;
+}
+
+static int unit_file_search(
+ InstallContext *c,
+ UnitFileInstallInfo *info,
+ const LookupPaths *paths,
+ SearchFlags flags) {
+
+ const char *dropin_dir_name = NULL, *dropin_template_dir_name = NULL;
+ _cleanup_strv_free_ char **dirs = NULL, **files = NULL;
+ _cleanup_free_ char *template = NULL;
+ bool found_unit = false;
+ int r, result;
+ char **p;
+
+ assert(info);
+ assert(paths);
+
+ /* Was this unit already loaded? */
+ if (info->type != _UNIT_FILE_TYPE_INVALID)
+ return 0;
+
+ if (info->path)
+ return unit_file_load_or_readlink(c, info, info->path, paths->root_dir, flags);
+
+ assert(info->name);
+
+ if (unit_name_is_valid(info->name, UNIT_NAME_INSTANCE)) {
+ r = unit_name_template(info->name, &template);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(p, paths->search_path) {
+ _cleanup_free_ char *path = NULL;
+
+ path = strjoin(*p, "/", info->name);
+ if (!path)
+ return -ENOMEM;
+
+ r = unit_file_load_or_readlink(c, info, path, paths->root_dir, flags);
+ if (r >= 0) {
+ info->path = TAKE_PTR(path);
+ result = r;
+ found_unit = true;
+ break;
+ } else if (!IN_SET(r, -ENOENT, -ENOTDIR, -EACCES))
+ return r;
+ }
+
+ if (!found_unit && template) {
+
+ /* Unit file doesn't exist, however instance
+ * enablement was requested. We will check if it is
+ * possible to load template unit file. */
+
+ STRV_FOREACH(p, paths->search_path) {
+ _cleanup_free_ char *path = NULL;
+
+ path = strjoin(*p, "/", template);
+ if (!path)
+ return -ENOMEM;
+
+ r = unit_file_load_or_readlink(c, info, path, paths->root_dir, flags);
+ if (r >= 0) {
+ info->path = TAKE_PTR(path);
+ result = r;
+ found_unit = true;
+ break;
+ } else if (!IN_SET(r, -ENOENT, -ENOTDIR, -EACCES))
+ return r;
+ }
+ }
+
+ if (!found_unit)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Cannot find unit %s%s%s.",
+ info->name, template ? " or " : "", strempty(template));
+
+ if (info->type == UNIT_FILE_TYPE_MASKED)
+ return result;
+
+ /* Search for drop-in directories */
+
+ dropin_dir_name = strjoina(info->name, ".d");
+ STRV_FOREACH(p, paths->search_path) {
+ char *path;
+
+ path = path_join(*p, dropin_dir_name);
+ if (!path)
+ return -ENOMEM;
+
+ r = strv_consume(&dirs, path);
+ if (r < 0)
+ return r;
+ }
+
+ if (template) {
+ dropin_template_dir_name = strjoina(template, ".d");
+ STRV_FOREACH(p, paths->search_path) {
+ char *path;
+
+ path = path_join(*p, dropin_template_dir_name);
+ if (!path)
+ return -ENOMEM;
+
+ r = strv_consume(&dirs, path);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Load drop-in conf files */
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char**) dirs);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get list of conf files: %m");
+
+ STRV_FOREACH(p, files) {
+ r = unit_file_load_or_readlink(c, info, *p, paths->root_dir, flags | SEARCH_DROPIN);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load conf file %s: %m", *p);
+ }
+
+ return result;
+}
+
+static int install_info_follow(
+ InstallContext *c,
+ UnitFileInstallInfo *i,
+ const char *root_dir,
+ SearchFlags flags,
+ bool ignore_different_name) {
+
+ assert(c);
+ assert(i);
+
+ if (i->type != UNIT_FILE_TYPE_SYMLINK)
+ return -EINVAL;
+ if (!i->symlink_target)
+ return -EINVAL;
+
+ /* If the basename doesn't match, the caller should add a
+ * complete new entry for this. */
+
+ if (!ignore_different_name && !streq(basename(i->symlink_target), i->name))
+ return -EXDEV;
+
+ free_and_replace(i->path, i->symlink_target);
+ i->type = _UNIT_FILE_TYPE_INVALID;
+
+ return unit_file_load_or_readlink(c, i, i->path, root_dir, flags);
+}
+
+/**
+ * Search for the unit file. If the unit name is a symlink, follow the symlink to the
+ * target, maybe more than once. Propagate the instance name if present.
+ */
+static int install_info_traverse(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ UnitFileInstallInfo *start,
+ SearchFlags flags,
+ UnitFileInstallInfo **ret) {
+
+ UnitFileInstallInfo *i;
+ unsigned k = 0;
+ int r;
+
+ assert(paths);
+ assert(start);
+ assert(c);
+
+ r = unit_file_search(c, start, paths, flags);
+ if (r < 0)
+ return r;
+
+ i = start;
+ while (i->type == UNIT_FILE_TYPE_SYMLINK) {
+ /* Follow the symlink */
+
+ if (++k > UNIT_FILE_FOLLOW_SYMLINK_MAX)
+ return -ELOOP;
+
+ if (!(flags & SEARCH_FOLLOW_CONFIG_SYMLINKS)) {
+ r = path_is_config(paths, i->path, true);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return -ELOOP;
+ }
+
+ r = install_info_follow(c, i, paths->root_dir, flags, false);
+ if (r == -EXDEV) {
+ _cleanup_free_ char *buffer = NULL;
+ const char *bn;
+
+ /* Target has a different name, create a new
+ * install info object for that, and continue
+ * with that. */
+
+ bn = basename(i->symlink_target);
+
+ if (unit_name_is_valid(i->name, UNIT_NAME_INSTANCE) &&
+ unit_name_is_valid(bn, UNIT_NAME_TEMPLATE)) {
+
+ _cleanup_free_ char *instance = NULL;
+
+ r = unit_name_to_instance(i->name, &instance);
+ if (r < 0)
+ return r;
+
+ r = unit_name_replace_instance(bn, instance, &buffer);
+ if (r < 0)
+ return r;
+
+ if (streq(buffer, i->name)) {
+
+ /* We filled in the instance, and the target stayed the same? If so, then let's
+ * honour the link as it is. */
+
+ r = install_info_follow(c, i, paths->root_dir, flags, true);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ bn = buffer;
+ }
+
+ r = install_info_add(c, bn, NULL, false, &i);
+ if (r < 0)
+ return r;
+
+ /* Try again, with the new target we found. */
+ r = unit_file_search(c, i, paths, flags);
+ if (r == -ENOENT)
+ /* Translate error code to highlight this specific case */
+ return -ENOLINK;
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ if (ret)
+ *ret = i;
+
+ return 0;
+}
+
+/**
+ * Call install_info_add() with name_or_path as the path (if name_or_path starts with "/")
+ * or the name (otherwise). root_dir is prepended to the path.
+ */
+static int install_info_add_auto(
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *name_or_path,
+ UnitFileInstallInfo **ret) {
+
+ assert(c);
+ assert(name_or_path);
+
+ if (path_is_absolute(name_or_path)) {
+ const char *pp;
+
+ pp = prefix_roota(paths->root_dir, name_or_path);
+
+ return install_info_add(c, NULL, pp, false, ret);
+ } else
+ return install_info_add(c, name_or_path, NULL, false, ret);
+}
+
+static int install_info_discover(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *name,
+ SearchFlags flags,
+ UnitFileInstallInfo **ret,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ UnitFileInstallInfo *i;
+ int r;
+
+ assert(c);
+ assert(paths);
+ assert(name);
+
+ r = install_info_add_auto(c, paths, name, &i);
+ if (r >= 0)
+ r = install_info_traverse(scope, c, paths, i, flags, ret);
+
+ if (r < 0)
+ unit_file_changes_add(changes, n_changes, r, name, NULL);
+ return r;
+}
+
+static int install_info_discover_and_check(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *name,
+ SearchFlags flags,
+ UnitFileInstallInfo **ret,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ int r;
+
+ r = install_info_discover(scope, c, paths, name, flags, ret, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ return install_info_may_process(ret ? *ret : NULL, paths, changes, n_changes);
+}
+
+static int install_info_symlink_alias(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ char **s;
+ int r = 0, q;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+
+ STRV_FOREACH(s, i->aliases) {
+ _cleanup_free_ char *alias_path = NULL, *dst = NULL;
+
+ q = install_full_printf(i, *s, &dst);
+ if (q < 0)
+ return q;
+
+ alias_path = path_make_absolute(dst, config_path);
+ if (!alias_path)
+ return -ENOMEM;
+
+ q = create_symlink(paths, i->path, alias_path, force, changes, n_changes);
+ if (r == 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int install_info_symlink_wants(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ char **list,
+ const char *suffix,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *buf = NULL;
+ const char *n;
+ char **s;
+ int r = 0, q;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+
+ if (strv_isempty(list))
+ return 0;
+
+ if (unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE)) {
+ UnitFileInstallInfo instance = {
+ .type = _UNIT_FILE_TYPE_INVALID,
+ };
+ _cleanup_free_ char *path = NULL;
+
+ /* If this is a template, and we have no instance, don't do anything */
+ if (!i->default_instance)
+ return 1;
+
+ r = unit_name_replace_instance(i->name, i->default_instance, &buf);
+ if (r < 0)
+ return r;
+
+ instance.name = buf;
+ r = unit_file_search(NULL, &instance, paths, SEARCH_FOLLOW_CONFIG_SYMLINKS);
+ if (r < 0)
+ return r;
+
+ path = TAKE_PTR(instance.path);
+
+ if (instance.type == UNIT_FILE_TYPE_MASKED) {
+ unit_file_changes_add(changes, n_changes, -ERFKILL, path, NULL);
+ return -ERFKILL;
+ }
+
+ n = buf;
+ } else
+ n = i->name;
+
+ STRV_FOREACH(s, list) {
+ _cleanup_free_ char *path = NULL, *dst = NULL;
+
+ q = install_full_printf(i, *s, &dst);
+ if (q < 0)
+ return q;
+
+ if (!unit_name_is_valid(dst, UNIT_NAME_ANY)) {
+ r = -EINVAL;
+ continue;
+ }
+
+ path = strjoin(config_path, "/", dst, suffix, n);
+ if (!path)
+ return -ENOMEM;
+
+ q = create_symlink(paths, i->path, path, true, changes, n_changes);
+ if (r == 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int install_info_symlink_link(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+ assert(i->path);
+
+ r = in_search_path(paths, i->path);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ path = strjoin(config_path, "/", i->name);
+ if (!path)
+ return -ENOMEM;
+
+ return create_symlink(paths, i->path, path, force, changes, n_changes);
+}
+
+static int install_info_apply(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ int r, q;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+
+ if (i->type != UNIT_FILE_TYPE_REGULAR)
+ return 0;
+
+ r = install_info_symlink_alias(i, paths, config_path, force, changes, n_changes);
+
+ q = install_info_symlink_wants(i, paths, config_path, i->wanted_by, ".wants/", changes, n_changes);
+ if (r == 0)
+ r = q;
+
+ q = install_info_symlink_wants(i, paths, config_path, i->required_by, ".requires/", changes, n_changes);
+ if (r == 0)
+ r = q;
+
+ q = install_info_symlink_link(i, paths, config_path, force, changes, n_changes);
+ /* Do not count links to the unit file towards the "carries_install_info" count */
+ if (r == 0 && q < 0)
+ r = q;
+
+ return r;
+}
+
+static int install_context_apply(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ SearchFlags flags,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ UnitFileInstallInfo *i;
+ int r;
+
+ assert(c);
+ assert(paths);
+ assert(config_path);
+
+ if (ordered_hashmap_isempty(c->will_process))
+ return 0;
+
+ r = ordered_hashmap_ensure_allocated(&c->have_processed, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = 0;
+ while ((i = ordered_hashmap_first(c->will_process))) {
+ int q;
+
+ q = ordered_hashmap_move_one(c->have_processed, c->will_process, i->name);
+ if (q < 0)
+ return q;
+
+ q = install_info_traverse(scope, c, paths, i, flags, NULL);
+ if (q < 0) {
+ unit_file_changes_add(changes, n_changes, r, i->name, NULL);
+ return q;
+ }
+
+ /* We can attempt to process a masked unit when a different unit
+ * that we were processing specifies it in Also=. */
+ if (i->type == UNIT_FILE_TYPE_MASKED) {
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_MASKED, i->path, NULL);
+ if (r >= 0)
+ /* Assume that something *could* have been enabled here,
+ * avoid "empty [Install] section" warning. */
+ r += 1;
+ continue;
+ }
+
+ if (i->type != UNIT_FILE_TYPE_REGULAR)
+ continue;
+
+ q = install_info_apply(i, paths, config_path, force, changes, n_changes);
+ if (r >= 0) {
+ if (q < 0)
+ r = q;
+ else
+ r += q;
+ }
+ }
+
+ return r;
+}
+
+static int install_context_mark_for_removal(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ Set **remove_symlinks_to,
+ const char *config_path,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ UnitFileInstallInfo *i;
+ int r;
+
+ assert(c);
+ assert(paths);
+ assert(config_path);
+
+ /* Marks all items for removal */
+
+ if (ordered_hashmap_isempty(c->will_process))
+ return 0;
+
+ r = ordered_hashmap_ensure_allocated(&c->have_processed, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ while ((i = ordered_hashmap_first(c->will_process))) {
+
+ r = ordered_hashmap_move_one(c->have_processed, c->will_process, i->name);
+ if (r < 0)
+ return r;
+
+ r = install_info_traverse(scope, c, paths, i, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, NULL);
+ if (r == -ENOLINK) {
+ log_debug_errno(r, "Name %s leads to a dangling symlink, removing name.", i->name);
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_DANGLING, i->path ?: i->name, NULL);
+ } else if (r == -ENOENT) {
+
+ if (i->auxiliary) /* some unit specified in Also= or similar is missing */
+ log_debug_errno(r, "Auxiliary unit of %s not found, removing name.", i->name);
+ else {
+ log_debug_errno(r, "Unit %s not found, removing name.", i->name);
+ unit_file_changes_add(changes, n_changes, r, i->path ?: i->name, NULL);
+ }
+
+ } else if (r < 0) {
+ log_debug_errno(r, "Failed to find unit %s, removing name: %m", i->name);
+ unit_file_changes_add(changes, n_changes, r, i->path ?: i->name, NULL);
+ } else if (i->type == UNIT_FILE_TYPE_MASKED) {
+ log_debug("Unit file %s is masked, ignoring.", i->name);
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_MASKED, i->path ?: i->name, NULL);
+ continue;
+ } else if (i->type != UNIT_FILE_TYPE_REGULAR) {
+ log_debug("Unit %s has type %s, ignoring.", i->name, unit_file_type_to_string(i->type) ?: "invalid");
+ continue;
+ }
+
+ r = mark_symlink_for_removal(remove_symlinks_to, i->name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int unit_file_mask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ const char *config_path;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(i, files) {
+ _cleanup_free_ char *path = NULL;
+ int q;
+
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY)) {
+ if (r == 0)
+ r = -EINVAL;
+ continue;
+ }
+
+ path = path_make_absolute(*i, config_path);
+ if (!path)
+ return -ENOMEM;
+
+ q = create_symlink(&paths, "/dev/null", path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+int unit_file_unmask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+ _cleanup_strv_free_ char **todo = NULL;
+ size_t n_todo = 0, n_allocated = 0;
+ const char *config_path;
+ char **i;
+ bool dry_run;
+ int r, q;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ dry_run = !!(flags & UNIT_FILE_DRY_RUN);
+
+ STRV_FOREACH(i, files) {
+ _cleanup_free_ char *path = NULL;
+
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ path = path_make_absolute(*i, config_path);
+ if (!path)
+ return -ENOMEM;
+
+ r = null_or_empty_path(path);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo] = strdup(*i);
+ if (!todo[n_todo])
+ return -ENOMEM;
+
+ n_todo++;
+ }
+
+ strv_uniq(todo);
+
+ r = 0;
+ STRV_FOREACH(i, todo) {
+ _cleanup_free_ char *path = NULL;
+ const char *rp;
+
+ path = path_make_absolute(*i, config_path);
+ if (!path)
+ return -ENOMEM;
+
+ if (!dry_run && unlink(path) < 0) {
+ if (errno != ENOENT) {
+ if (r >= 0)
+ r = -errno;
+ unit_file_changes_add(changes, n_changes, -errno, path, NULL);
+ }
+
+ continue;
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, path, NULL);
+
+ rp = skip_root(&paths, path);
+ q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: path);
+ if (q < 0)
+ return q;
+ }
+
+ q = remove_marked_symlinks(remove_symlinks_to, config_path, &paths, dry_run, changes, n_changes);
+ if (r >= 0)
+ r = q;
+
+ return r;
+}
+
+int unit_file_link(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_strv_free_ char **todo = NULL;
+ size_t n_todo = 0, n_allocated = 0;
+ const char *config_path;
+ char **i;
+ int r, q;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(i, files) {
+ _cleanup_free_ char *full = NULL;
+ struct stat st;
+ char *fn;
+
+ if (!path_is_absolute(*i))
+ return -EINVAL;
+
+ fn = basename(*i);
+ if (!unit_name_is_valid(fn, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ full = prefix_root(paths.root_dir, *i);
+ if (!full)
+ return -ENOMEM;
+
+ if (lstat(full, &st) < 0)
+ return -errno;
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return r;
+
+ q = in_search_path(&paths, *i);
+ if (q < 0)
+ return q;
+ if (q > 0)
+ continue;
+
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo] = strdup(*i);
+ if (!todo[n_todo])
+ return -ENOMEM;
+
+ n_todo++;
+ }
+
+ strv_uniq(todo);
+
+ r = 0;
+ STRV_FOREACH(i, todo) {
+ _cleanup_free_ char *new_path = NULL;
+
+ new_path = path_make_absolute(basename(*i), config_path);
+ if (!new_path)
+ return -ENOMEM;
+
+ q = create_symlink(&paths, *i, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int path_shall_revert(const LookupPaths *paths, const char *path) {
+ int r;
+
+ assert(paths);
+ assert(path);
+
+ /* Checks whether the path is one where the drop-in directories shall be removed. */
+
+ r = path_is_config(paths, path, true);
+ if (r != 0)
+ return r;
+
+ r = path_is_control(paths, path);
+ if (r != 0)
+ return r;
+
+ return path_is_transient(paths, path);
+}
+
+int unit_file_revert(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_strv_free_ char **todo = NULL;
+ size_t n_todo = 0, n_allocated = 0;
+ char **i;
+ int r, q;
+
+ /* Puts a unit file back into vendor state. This means:
+ *
+ * a) we remove all drop-in snippets added by the user ("config"), add to transient units ("transient"), and
+ * added via "systemctl set-property" ("control"), but not if the drop-in is generated ("generated").
+ *
+ * c) if there's a vendor unit file (i.e. one in /usr) we remove any configured overriding unit files (i.e. in
+ * "config", but not in "transient" or "control" or even "generated").
+ *
+ * We remove all that in both the runtime and the persistent directories, if that applies.
+ */
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, files) {
+ bool has_vendor = false;
+ char **p;
+
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ STRV_FOREACH(p, paths.search_path) {
+ _cleanup_free_ char *path = NULL, *dropin = NULL;
+ struct stat st;
+
+ path = path_make_absolute(*i, *p);
+ if (!path)
+ return -ENOMEM;
+
+ r = lstat(path, &st);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else if (S_ISREG(st.st_mode)) {
+ /* Check if there's a vendor version */
+ r = path_is_vendor(&paths, path);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ has_vendor = true;
+ }
+
+ dropin = strappend(path, ".d");
+ if (!dropin)
+ return -ENOMEM;
+
+ r = lstat(dropin, &st);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else if (S_ISDIR(st.st_mode)) {
+ /* Remove the drop-ins */
+ r = path_shall_revert(&paths, dropin);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo++] = TAKE_PTR(dropin);
+ }
+ }
+ }
+
+ if (!has_vendor)
+ continue;
+
+ /* OK, there's a vendor version, hence drop all configuration versions */
+ STRV_FOREACH(p, paths.search_path) {
+ _cleanup_free_ char *path = NULL;
+ struct stat st;
+
+ path = path_make_absolute(*i, *p);
+ if (!path)
+ return -ENOMEM;
+
+ r = lstat(path, &st);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
+ r = path_is_config(&paths, path, true);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo++] = TAKE_PTR(path);
+ }
+ }
+ }
+ }
+
+ strv_uniq(todo);
+
+ r = 0;
+ STRV_FOREACH(i, todo) {
+ _cleanup_strv_free_ char **fs = NULL;
+ const char *rp;
+ char **j;
+
+ (void) get_files_in_directory(*i, &fs);
+
+ q = rm_rf(*i, REMOVE_ROOT|REMOVE_PHYSICAL);
+ if (q < 0 && q != -ENOENT && r >= 0) {
+ r = q;
+ continue;
+ }
+
+ STRV_FOREACH(j, fs) {
+ _cleanup_free_ char *t = NULL;
+
+ t = strjoin(*i, "/", *j);
+ if (!t)
+ return -ENOMEM;
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, t, NULL);
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, *i, NULL);
+
+ rp = skip_root(&paths, *i);
+ q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: *i);
+ if (q < 0)
+ return q;
+ }
+
+ q = remove_marked_symlinks(remove_symlinks_to, paths.runtime_config, &paths, false, changes, n_changes);
+ if (r >= 0)
+ r = q;
+
+ q = remove_marked_symlinks(remove_symlinks_to, paths.persistent_config, &paths, false, changes, n_changes);
+ if (r >= 0)
+ r = q;
+
+ return r;
+}
+
+int unit_file_add_dependency(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ const char *target,
+ UnitDependency dep,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i, *target_info;
+ const char *config_path;
+ char **f;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(target);
+
+ if (!IN_SET(dep, UNIT_WANTS, UNIT_REQUIRES))
+ return -EINVAL;
+
+ if (!unit_name_is_valid(target, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ r = install_info_discover_and_check(scope, &c, &paths, target, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &target_info, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ assert(target_info->type == UNIT_FILE_TYPE_REGULAR);
+
+ STRV_FOREACH(f, files) {
+ char ***l;
+
+ r = install_info_discover_and_check(scope, &c, &paths, *f, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ assert(i->type == UNIT_FILE_TYPE_REGULAR);
+
+ /* We didn't actually load anything from the unit
+ * file, but instead just add in our new symlink to
+ * create. */
+
+ if (dep == UNIT_WANTS)
+ l = &i->wanted_by;
+ else
+ l = &i->required_by;
+
+ strv_free(*l);
+ *l = strv_new(target_info->name);
+ if (!*l)
+ return -ENOMEM;
+ }
+
+ return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_FOLLOW_CONFIG_SYMLINKS, changes, n_changes);
+}
+
+int unit_file_enable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ const char *config_path;
+ UnitFileInstallInfo *i;
+ char **f;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(f, files) {
+ r = install_info_discover_and_check(scope, &c, &paths, *f, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ assert(i->type == UNIT_FILE_TYPE_REGULAR);
+ }
+
+ /* This will return the number of symlink rules that were
+ supposed to be created, not the ones actually created. This
+ is useful to determine whether the passed files had any
+ installation data at all. */
+
+ return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_LOAD, changes, n_changes);
+}
+
+int unit_file_disable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+ const char *config_path;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(i, files) {
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_add(&c, *i, NULL, false, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = install_context_mark_for_removal(scope, &c, &paths, &remove_symlinks_to, config_path, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ return remove_marked_symlinks(remove_symlinks_to, config_path, &paths, !!(flags & UNIT_FILE_DRY_RUN), changes, n_changes);
+}
+
+int unit_file_reenable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ char **n;
+ int r;
+ size_t l, i;
+
+ /* First, we invoke the disable command with only the basename... */
+ l = strv_length(files);
+ n = newa(char*, l+1);
+ for (i = 0; i < l; i++)
+ n[i] = basename(files[i]);
+ n[i] = NULL;
+
+ r = unit_file_disable(scope, flags, root_dir, n, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ /* But the enable command with the full name */
+ return unit_file_enable(scope, flags, root_dir, files, changes, n_changes);
+}
+
+int unit_file_set_default(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ const char *name,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i;
+ const char *new_path;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(name);
+
+ if (unit_name_to_type(name) != UNIT_TARGET) /* this also validates the name */
+ return -EINVAL;
+ if (streq(name, SPECIAL_DEFAULT_TARGET))
+ return -EINVAL;
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ r = install_info_discover_and_check(scope, &c, &paths, name, 0, &i, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ new_path = strjoina(paths.persistent_config, "/" SPECIAL_DEFAULT_TARGET);
+ return create_symlink(&paths, i->path, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+}
+
+int unit_file_get_default(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **name) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i;
+ char *n;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(name);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ r = install_info_discover(scope, &c, &paths, SPECIAL_DEFAULT_TARGET, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, NULL, NULL);
+ if (r < 0)
+ return r;
+ r = install_info_may_process(i, &paths, NULL, 0);
+ if (r < 0)
+ return r;
+
+ n = strdup(i->name);
+ if (!n)
+ return -ENOMEM;
+
+ *name = n;
+ return 0;
+}
+
+int unit_file_lookup_state(
+ UnitFileScope scope,
+ const LookupPaths *paths,
+ const char *name,
+ UnitFileState *ret) {
+
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i;
+ UnitFileState state;
+ int r;
+
+ assert(paths);
+ assert(name);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_discover(scope, &c, paths, name, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, NULL, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to discover unit %s: %m", name);
+
+ assert(IN_SET(i->type, UNIT_FILE_TYPE_REGULAR, UNIT_FILE_TYPE_MASKED));
+ log_debug("Found unit %s at %s (%s)", name, strna(i->path),
+ i->type == UNIT_FILE_TYPE_REGULAR ? "regular file" : "mask");
+
+ /* Shortcut things, if the caller just wants to know if this unit exists. */
+ if (!ret)
+ return 0;
+
+ switch (i->type) {
+
+ case UNIT_FILE_TYPE_MASKED:
+ r = path_is_runtime(paths, i->path, true);
+ if (r < 0)
+ return r;
+
+ state = r > 0 ? UNIT_FILE_MASKED_RUNTIME : UNIT_FILE_MASKED;
+ break;
+
+ case UNIT_FILE_TYPE_REGULAR:
+ r = path_is_generator(paths, i->path);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ state = UNIT_FILE_GENERATED;
+ break;
+ }
+
+ r = path_is_transient(paths, i->path);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ state = UNIT_FILE_TRANSIENT;
+ break;
+ }
+
+ /* Check if any of the Alias= symlinks have been created.
+ * We ignore other aliases, and only check those that would
+ * be created by systemctl enable for this unit. */
+ r = find_symlinks_in_scope(scope, paths, i, true, &state);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ break;
+
+ /* Check if the file is known under other names. If it is,
+ * it might be in use. Report that as UNIT_FILE_INDIRECT. */
+ r = find_symlinks_in_scope(scope, paths, i, false, &state);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ state = UNIT_FILE_INDIRECT;
+ else {
+ if (unit_file_install_info_has_rules(i))
+ state = UNIT_FILE_DISABLED;
+ else if (unit_file_install_info_has_also(i))
+ state = UNIT_FILE_INDIRECT;
+ else
+ state = UNIT_FILE_STATIC;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Unexpect unit file type.");
+ }
+
+ *ret = state;
+ return 0;
+}
+
+int unit_file_get_state(
+ UnitFileScope scope,
+ const char *root_dir,
+ const char *name,
+ UnitFileState *ret) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(name);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ return unit_file_lookup_state(scope, &paths, name, ret);
+}
+
+int unit_file_exists(UnitFileScope scope, const LookupPaths *paths, const char *name) {
+ _cleanup_(install_context_done) InstallContext c = {};
+ int r;
+
+ assert(paths);
+ assert(name);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_discover(scope, &c, paths, name, 0, NULL, NULL, NULL);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int split_pattern_into_name_and_instances(const char *pattern, char **out_unit_name, char ***out_instances) {
+ _cleanup_strv_free_ char **instances = NULL;
+ _cleanup_free_ char *unit_name = NULL;
+ int r;
+
+ assert(pattern);
+ assert(out_instances);
+ assert(out_unit_name);
+
+ r = extract_first_word(&pattern, &unit_name, NULL, 0);
+ if (r < 0)
+ return r;
+
+ /* We handle the instances logic when unit name is extracted */
+ if (pattern) {
+ /* We only create instances when a rule of templated unit
+ * is seen. A rule like enable foo@.service a b c will
+ * result in an array of (a, b, c) as instance names */
+ if (!unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+
+ instances = strv_split(pattern, WHITESPACE);
+ if (!instances)
+ return -ENOMEM;
+
+ *out_instances = TAKE_PTR(instances);
+ }
+
+ *out_unit_name = TAKE_PTR(unit_name);
+
+ return 0;
+}
+
+static int read_presets(UnitFileScope scope, const char *root_dir, Presets *presets) {
+ _cleanup_(presets_freep) Presets ps = {};
+ size_t n_allocated = 0;
+ _cleanup_strv_free_ char **files = NULL;
+ char **p;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(presets);
+
+ switch (scope) {
+ case UNIT_FILE_SYSTEM:
+ r = conf_files_list(&files, ".preset", root_dir, 0,
+ "/etc/systemd/system-preset",
+ "/run/systemd/system-preset",
+ "/usr/local/lib/systemd/system-preset",
+ "/usr/lib/systemd/system-preset",
+#if HAVE_SPLIT_USR
+ "/lib/systemd/system-preset",
+#endif
+ NULL);
+ break;
+
+ case UNIT_FILE_GLOBAL:
+ case UNIT_FILE_USER:
+ r = conf_files_list(&files, ".preset", root_dir, 0,
+ "/etc/systemd/user-preset",
+ "/run/systemd/user-preset",
+ "/usr/local/lib/systemd/user-preset",
+ "/usr/lib/systemd/user-preset",
+ NULL);
+ break;
+
+ default:
+ assert_not_reached("Invalid unit file scope");
+ }
+
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, files) {
+ _cleanup_fclose_ FILE *f;
+ int n = 0;
+
+ f = fopen(*p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ PresetRule rule = {};
+ const char *parameter;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+ n++;
+
+ if (isempty(l))
+ continue;
+ if (strchr(COMMENTS, *l))
+ continue;
+
+ parameter = first_word(l, "enable");
+ if (parameter) {
+ char *unit_name;
+ char **instances = NULL;
+
+ /* Unit_name will remain the same as parameter when no instances are specified */
+ r = split_pattern_into_name_and_instances(parameter, &unit_name, &instances);
+ if (r < 0) {
+ log_syntax(NULL, LOG_WARNING, *p, n, r, "Couldn't parse line '%s'. Ignoring.", line);
+ continue;
+ }
+
+ rule = (PresetRule) {
+ .pattern = unit_name,
+ .action = PRESET_ENABLE,
+ .instances = instances,
+ };
+ }
+
+ parameter = first_word(l, "disable");
+ if (parameter) {
+ char *pattern;
+
+ pattern = strdup(parameter);
+ if (!pattern)
+ return -ENOMEM;
+
+ rule = (PresetRule) {
+ .pattern = pattern,
+ .action = PRESET_DISABLE,
+ };
+ }
+
+ if (rule.action) {
+ if (!GREEDY_REALLOC(ps.rules, n_allocated, ps.n_rules + 1))
+ return -ENOMEM;
+
+ ps.rules[ps.n_rules++] = rule;
+ continue;
+ }
+
+ log_syntax(NULL, LOG_WARNING, *p, n, 0, "Couldn't parse line '%s'. Ignoring.", line);
+ }
+ }
+
+ *presets = ps;
+ ps = (Presets){};
+
+ return 0;
+}
+
+static int pattern_match_multiple_instances(
+ const PresetRule rule,
+ const char *unit_name,
+ char ***ret) {
+
+ _cleanup_free_ char *templated_name = NULL;
+ int r;
+
+ /* If no ret is needed or the rule itself does not have instances
+ * initalized, we return not matching */
+ if (!ret || !rule.instances)
+ return 0;
+
+ r = unit_name_template(unit_name, &templated_name);
+ if (r < 0)
+ return r;
+ if (!streq(rule.pattern, templated_name))
+ return 0;
+
+ /* Compose a list of specified instances when unit name is a template */
+ if (unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_free_ char *prefix = NULL;
+ _cleanup_strv_free_ char **out_strv = NULL;
+ char **iter;
+
+ r = unit_name_to_prefix(unit_name, &prefix);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(iter, rule.instances) {
+ _cleanup_free_ char *name = NULL;
+ r = unit_name_build(prefix, *iter, ".service", &name);
+ if (r < 0)
+ return r;
+ r = strv_extend(&out_strv, name);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(out_strv);
+ return 1;
+ } else {
+ /* We now know the input unit name is an instance name */
+ _cleanup_free_ char *instance_name = NULL;
+
+ r = unit_name_to_instance(unit_name, &instance_name);
+ if (r < 0)
+ return r;
+
+ if (strv_find(rule.instances, instance_name))
+ return 1;
+ }
+ return 0;
+}
+
+static int query_presets(const char *name, const Presets presets, char ***instance_name_list) {
+ PresetAction action = PRESET_UNKNOWN;
+ size_t i;
+ char **s;
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ for (i = 0; i < presets.n_rules; i++)
+ if (pattern_match_multiple_instances(presets.rules[i], name, instance_name_list) > 0 ||
+ fnmatch(presets.rules[i].pattern, name, FNM_NOESCAPE) == 0) {
+ action = presets.rules[i].action;
+ break;
+ }
+
+ switch (action) {
+ case PRESET_UNKNOWN:
+ log_debug("Preset files don't specify rule for %s. Enabling.", name);
+ return 1;
+ case PRESET_ENABLE:
+ if (instance_name_list && *instance_name_list)
+ STRV_FOREACH(s, *instance_name_list)
+ log_debug("Preset files say enable %s.", *s);
+ else
+ log_debug("Preset files say enable %s.", name);
+ return 1;
+ case PRESET_DISABLE:
+ log_debug("Preset files say disable %s.", name);
+ return 0;
+ default:
+ assert_not_reached("invalid preset action");
+ }
+}
+
+int unit_file_query_preset(UnitFileScope scope, const char *root_dir, const char *name) {
+ _cleanup_(presets_freep) Presets presets = {};
+ int r;
+
+ r = read_presets(scope, root_dir, &presets);
+ if (r < 0)
+ return r;
+
+ return query_presets(name, presets, NULL);
+}
+
+static int execute_preset(
+ UnitFileScope scope,
+ InstallContext *plus,
+ InstallContext *minus,
+ const LookupPaths *paths,
+ const char *config_path,
+ char **files,
+ UnitFilePresetMode mode,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ int r;
+
+ assert(plus);
+ assert(minus);
+ assert(paths);
+ assert(config_path);
+
+ if (mode != UNIT_FILE_PRESET_ENABLE_ONLY) {
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+
+ r = install_context_mark_for_removal(scope, minus, paths, &remove_symlinks_to, config_path, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ r = remove_marked_symlinks(remove_symlinks_to, config_path, paths, false, changes, n_changes);
+ } else
+ r = 0;
+
+ if (mode != UNIT_FILE_PRESET_DISABLE_ONLY) {
+ int q;
+
+ /* Returns number of symlinks that where supposed to be installed. */
+ q = install_context_apply(scope, plus, paths, config_path, force, SEARCH_LOAD, changes, n_changes);
+ if (r >= 0) {
+ if (q < 0)
+ r = q;
+ else
+ r += q;
+ }
+ }
+
+ return r;
+}
+
+static int preset_prepare_one(
+ UnitFileScope scope,
+ InstallContext *plus,
+ InstallContext *minus,
+ LookupPaths *paths,
+ const char *name,
+ Presets presets,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(install_context_done) InstallContext tmp = {};
+ _cleanup_strv_free_ char **instance_name_list = NULL;
+ UnitFileInstallInfo *i;
+ int r;
+
+ if (install_info_find(plus, name) || install_info_find(minus, name))
+ return 0;
+
+ r = install_info_discover(scope, &tmp, paths, name, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+ if (!streq(name, i->name)) {
+ log_debug("Skipping %s because it is an alias for %s.", name, i->name);
+ return 0;
+ }
+
+ r = query_presets(name, presets, &instance_name_list);
+ if (r < 0)
+ return r;
+
+ if (r > 0) {
+ if (instance_name_list) {
+ char **s;
+ STRV_FOREACH(s, instance_name_list) {
+ r = install_info_discover_and_check(scope, plus, paths, *s, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ r = install_info_discover_and_check(scope, plus, paths, name, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+
+ } else
+ r = install_info_discover(scope, minus, paths, name, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+
+ return r;
+}
+
+int unit_file_preset(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(install_context_done) InstallContext plus = {}, minus = {};
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(presets_freep) Presets presets = {};
+ const char *config_path;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(mode < _UNIT_FILE_PRESET_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ r = read_presets(scope, root_dir, &presets);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, files) {
+ r = preset_prepare_one(scope, &plus, &minus, &paths, *i, presets, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+
+ return execute_preset(scope, &plus, &minus, &paths, config_path, files, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+}
+
+int unit_file_preset_all(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(install_context_done) InstallContext plus = {}, minus = {};
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(presets_freep) Presets presets = {};
+ const char *config_path = NULL;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(mode < _UNIT_FILE_PRESET_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ r = read_presets(scope, root_dir, &presets);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, paths.search_path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(*i);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ dirent_ensure_type(d, de);
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ /* we don't pass changes[] in, because we want to handle errors on our own */
+ r = preset_prepare_one(scope, &plus, &minus, &paths, de->d_name, presets, NULL, 0);
+ if (r == -ERFKILL)
+ r = unit_file_changes_add(changes, n_changes,
+ UNIT_FILE_IS_MASKED, de->d_name, NULL);
+ else if (r == -ENOLINK)
+ r = unit_file_changes_add(changes, n_changes,
+ UNIT_FILE_IS_DANGLING, de->d_name, NULL);
+ else if (r == -EADDRNOTAVAIL) /* Ignore generated/transient units when applying preset */
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return execute_preset(scope, &plus, &minus, &paths, config_path, NULL, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+}
+
+static void unit_file_list_free_one(UnitFileList *f) {
+ if (!f)
+ return;
+
+ free(f->path);
+ free(f);
+}
+
+Hashmap* unit_file_list_free(Hashmap *h) {
+ return hashmap_free_with_destructor(h, unit_file_list_free_one);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(UnitFileList*, unit_file_list_free_one);
+
+int unit_file_get_list(
+ UnitFileScope scope,
+ const char *root_dir,
+ Hashmap *h,
+ char **states,
+ char **patterns) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(h);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, paths.search_path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(*i);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+ if (IN_SET(errno, ENOTDIR, EACCES)) {
+ log_debug_errno(errno, "Failed to open \"%s\": %m", *i);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_(unit_file_list_free_onep) UnitFileList *f = NULL;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ if (!strv_fnmatch_or_empty(patterns, de->d_name, FNM_NOESCAPE))
+ continue;
+
+ if (hashmap_get(h, de->d_name))
+ continue;
+
+ dirent_ensure_type(d, de);
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ f = new0(UnitFileList, 1);
+ if (!f)
+ return -ENOMEM;
+
+ f->path = path_make_absolute(de->d_name, *i);
+ if (!f->path)
+ return -ENOMEM;
+
+ r = unit_file_lookup_state(scope, &paths, de->d_name, &f->state);
+ if (r < 0)
+ f->state = UNIT_FILE_BAD;
+
+ if (!strv_isempty(states) &&
+ !strv_contains(states, unit_file_state_to_string(f->state)))
+ continue;
+
+ r = hashmap_put(h, basename(f->path), f);
+ if (r < 0)
+ return r;
+
+ f = NULL; /* prevent cleanup */
+ }
+ }
+
+ return 0;
+}
+
+static const char* const unit_file_state_table[_UNIT_FILE_STATE_MAX] = {
+ [UNIT_FILE_ENABLED] = "enabled",
+ [UNIT_FILE_ENABLED_RUNTIME] = "enabled-runtime",
+ [UNIT_FILE_LINKED] = "linked",
+ [UNIT_FILE_LINKED_RUNTIME] = "linked-runtime",
+ [UNIT_FILE_MASKED] = "masked",
+ [UNIT_FILE_MASKED_RUNTIME] = "masked-runtime",
+ [UNIT_FILE_STATIC] = "static",
+ [UNIT_FILE_DISABLED] = "disabled",
+ [UNIT_FILE_INDIRECT] = "indirect",
+ [UNIT_FILE_GENERATED] = "generated",
+ [UNIT_FILE_TRANSIENT] = "transient",
+ [UNIT_FILE_BAD] = "bad",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_file_state, UnitFileState);
+
+static const char* const unit_file_change_type_table[_UNIT_FILE_CHANGE_TYPE_MAX] = {
+ [UNIT_FILE_SYMLINK] = "symlink",
+ [UNIT_FILE_UNLINK] = "unlink",
+ [UNIT_FILE_IS_MASKED] = "masked",
+ [UNIT_FILE_IS_DANGLING] = "dangling",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_file_change_type, UnitFileChangeType);
+
+static const char* const unit_file_preset_mode_table[_UNIT_FILE_PRESET_MAX] = {
+ [UNIT_FILE_PRESET_FULL] = "full",
+ [UNIT_FILE_PRESET_ENABLE_ONLY] = "enable-only",
+ [UNIT_FILE_PRESET_DISABLE_ONLY] = "disable-only",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_file_preset_mode, UnitFilePresetMode);
diff --git a/src/shared/install.h b/src/shared/install.h
new file mode 100644
index 0000000..e452940
--- /dev/null
+++ b/src/shared/install.h
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef enum UnitFileScope UnitFileScope;
+typedef enum UnitFileState UnitFileState;
+typedef enum UnitFilePresetMode UnitFilePresetMode;
+typedef enum UnitFileChangeType UnitFileChangeType;
+typedef enum UnitFileFlags UnitFileFlags;
+typedef enum UnitFileType UnitFileType;
+typedef struct UnitFileChange UnitFileChange;
+typedef struct UnitFileList UnitFileList;
+typedef struct UnitFileInstallInfo UnitFileInstallInfo;
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+#include "path-lookup.h"
+#include "strv.h"
+#include "unit-name.h"
+
+enum UnitFileScope {
+ UNIT_FILE_SYSTEM,
+ UNIT_FILE_GLOBAL,
+ UNIT_FILE_USER,
+ _UNIT_FILE_SCOPE_MAX,
+ _UNIT_FILE_SCOPE_INVALID = -1
+};
+
+enum UnitFileState {
+ UNIT_FILE_ENABLED,
+ UNIT_FILE_ENABLED_RUNTIME,
+ UNIT_FILE_LINKED,
+ UNIT_FILE_LINKED_RUNTIME,
+ UNIT_FILE_MASKED,
+ UNIT_FILE_MASKED_RUNTIME,
+ UNIT_FILE_STATIC,
+ UNIT_FILE_DISABLED,
+ UNIT_FILE_INDIRECT,
+ UNIT_FILE_GENERATED,
+ UNIT_FILE_TRANSIENT,
+ UNIT_FILE_BAD,
+ _UNIT_FILE_STATE_MAX,
+ _UNIT_FILE_STATE_INVALID = -1
+};
+
+enum UnitFilePresetMode {
+ UNIT_FILE_PRESET_FULL,
+ UNIT_FILE_PRESET_ENABLE_ONLY,
+ UNIT_FILE_PRESET_DISABLE_ONLY,
+ _UNIT_FILE_PRESET_MAX,
+ _UNIT_FILE_PRESET_INVALID = -1
+};
+
+enum UnitFileChangeType {
+ UNIT_FILE_SYMLINK,
+ UNIT_FILE_UNLINK,
+ UNIT_FILE_IS_MASKED,
+ UNIT_FILE_IS_DANGLING,
+ _UNIT_FILE_CHANGE_TYPE_MAX,
+ _UNIT_FILE_CHANGE_TYPE_INVALID = INT_MIN
+};
+
+enum UnitFileFlags {
+ UNIT_FILE_RUNTIME = 1 << 0,
+ UNIT_FILE_FORCE = 1 << 1,
+ UNIT_FILE_DRY_RUN = 1 << 2,
+};
+
+/* type can either one of the UnitFileChangeTypes listed above, or a negative error.
+ * If source is specified, it should be the contents of the path symlink.
+ * In case of an error, source should be the existing symlink contents or NULL
+ */
+struct UnitFileChange {
+ int type; /* UnitFileChangeType or bust */
+ char *path;
+ char *source;
+};
+
+static inline bool unit_file_changes_have_modification(const UnitFileChange* changes, size_t n_changes) {
+ size_t i;
+ for (i = 0; i < n_changes; i++)
+ if (IN_SET(changes[i].type, UNIT_FILE_SYMLINK, UNIT_FILE_UNLINK))
+ return true;
+ return false;
+}
+
+struct UnitFileList {
+ char *path;
+ UnitFileState state;
+};
+
+enum UnitFileType {
+ UNIT_FILE_TYPE_REGULAR,
+ UNIT_FILE_TYPE_SYMLINK,
+ UNIT_FILE_TYPE_MASKED,
+ _UNIT_FILE_TYPE_MAX,
+ _UNIT_FILE_TYPE_INVALID = -1,
+};
+
+struct UnitFileInstallInfo {
+ char *name;
+ char *path;
+
+ char **aliases;
+ char **wanted_by;
+ char **required_by;
+ char **also;
+
+ char *default_instance;
+ char *symlink_target;
+
+ UnitFileType type;
+ bool auxiliary;
+};
+
+bool unit_type_may_alias(UnitType type) _const_;
+bool unit_type_may_template(UnitType type) _const_;
+
+int unit_file_enable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_disable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_reenable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_preset(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_preset_all(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_mask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_unmask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_link(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_revert(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_set_default(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ const char *file,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_get_default(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **name);
+int unit_file_add_dependency(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ const char *target,
+ UnitDependency dep,
+ UnitFileChange **changes,
+ size_t *n_changes);
+
+int unit_file_lookup_state(
+ UnitFileScope scope,
+ const LookupPaths *paths,
+ const char *name,
+ UnitFileState *ret);
+
+int unit_file_get_state(UnitFileScope scope, const char *root_dir, const char *filename, UnitFileState *ret);
+int unit_file_exists(UnitFileScope scope, const LookupPaths *paths, const char *name);
+
+int unit_file_get_list(UnitFileScope scope, const char *root_dir, Hashmap *h, char **states, char **patterns);
+Hashmap* unit_file_list_free(Hashmap *h);
+
+int unit_file_changes_add(UnitFileChange **changes, size_t *n_changes, UnitFileChangeType type, const char *path, const char *source);
+void unit_file_changes_free(UnitFileChange *changes, size_t n_changes);
+void unit_file_dump_changes(int r, const char *verb, const UnitFileChange *changes, size_t n_changes, bool quiet);
+
+int unit_file_query_preset(UnitFileScope scope, const char *root_dir, const char *name);
+
+const char *unit_file_state_to_string(UnitFileState s) _const_;
+UnitFileState unit_file_state_from_string(const char *s) _pure_;
+/* from_string conversion is unreliable because of the overlap between -EPERM and -1 for error. */
+
+const char *unit_file_change_type_to_string(UnitFileChangeType s) _const_;
+UnitFileChangeType unit_file_change_type_from_string(const char *s) _pure_;
+
+const char *unit_file_preset_mode_to_string(UnitFilePresetMode m) _const_;
+UnitFilePresetMode unit_file_preset_mode_from_string(const char *s) _pure_;
diff --git a/src/shared/ip-protocol-list.c b/src/shared/ip-protocol-list.c
new file mode 100644
index 0000000..aa675ea
--- /dev/null
+++ b/src/shared/ip-protocol-list.c
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <netinet/in.h>
+
+#include "alloc-util.h"
+#include "ip-protocol-list.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+static const struct ip_protocol_name* lookup_ip_protocol(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "ip-protocol-from-name.h"
+#include "ip-protocol-to-name.h"
+
+const char *ip_protocol_to_name(int id) {
+
+ if (id < 0)
+ return NULL;
+
+ if ((size_t) id >= ELEMENTSOF(ip_protocol_names))
+ return NULL;
+
+ return ip_protocol_names[id];
+}
+
+int ip_protocol_from_name(const char *name) {
+ const struct ip_protocol_name *sc;
+
+ assert(name);
+
+ sc = lookup_ip_protocol(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
+
+int parse_ip_protocol(const char *s) {
+ _cleanup_free_ char *str = NULL;
+ int i, r;
+
+ assert(s);
+
+ if (isempty(s))
+ return IPPROTO_IP;
+
+ /* Do not use strdupa() here, as the input string may come from *
+ * command line or config files. */
+ str = strdup(s);
+ if (!str)
+ return -ENOMEM;
+
+ i = ip_protocol_from_name(ascii_strlower(str));
+ if (i >= 0)
+ return i;
+
+ r = safe_atoi(str, &i);
+ if (r < 0)
+ return r;
+
+ if (!ip_protocol_to_name(i))
+ return -EINVAL;
+
+ return i;
+}
diff --git a/src/shared/ip-protocol-list.h b/src/shared/ip-protocol-list.h
new file mode 100644
index 0000000..5c94969
--- /dev/null
+++ b/src/shared/ip-protocol-list.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+const char *ip_protocol_to_name(int id);
+int ip_protocol_from_name(const char *name);
+int parse_ip_protocol(const char *s);
diff --git a/src/shared/ip-protocol-to-name.awk b/src/shared/ip-protocol-to-name.awk
new file mode 100644
index 0000000..824f811
--- /dev/null
+++ b/src/shared/ip-protocol-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const ip_protocol_names[] = { "
+}
+!/HOPOPTS/ {
+ printf " [IPPROTO_%s] = \"%s\",\n", $1, tolower($1)
+}
+END{
+ print "};"
+}
diff --git a/src/shared/journal-importer.c b/src/shared/journal-importer.c
new file mode 100644
index 0000000..8638cd3
--- /dev/null
+++ b/src/shared/journal-importer.c
@@ -0,0 +1,504 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "journal-file.h"
+#include "journal-importer.h"
+#include "journal-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "unaligned.h"
+
+enum {
+ IMPORTER_STATE_LINE = 0, /* waiting to read, or reading line */
+ IMPORTER_STATE_DATA_START, /* reading binary data header */
+ IMPORTER_STATE_DATA, /* reading binary data */
+ IMPORTER_STATE_DATA_FINISH, /* expecting newline */
+ IMPORTER_STATE_EOF, /* done */
+};
+
+static int iovw_put(struct iovec_wrapper *iovw, void* data, size_t len) {
+ if (iovw->count >= ENTRY_FIELD_COUNT_MAX)
+ return -E2BIG;
+
+ if (!GREEDY_REALLOC(iovw->iovec, iovw->size_bytes, iovw->count + 1))
+ return log_oom();
+
+ iovw->iovec[iovw->count++] = IOVEC_MAKE(data, len);
+ return 0;
+}
+
+static void iovw_free_contents(struct iovec_wrapper *iovw) {
+ iovw->iovec = mfree(iovw->iovec);
+ iovw->size_bytes = iovw->count = 0;
+}
+
+static void iovw_rebase(struct iovec_wrapper *iovw, char *old, char *new) {
+ size_t i;
+
+ for (i = 0; i < iovw->count; i++)
+ iovw->iovec[i].iov_base = (char*) iovw->iovec[i].iov_base - old + new;
+}
+
+size_t iovw_size(struct iovec_wrapper *iovw) {
+ size_t n = 0, i;
+
+ for (i = 0; i < iovw->count; i++)
+ n += iovw->iovec[i].iov_len;
+
+ return n;
+}
+
+void journal_importer_cleanup(JournalImporter *imp) {
+ if (imp->fd >= 0 && !imp->passive_fd) {
+ log_debug("Closing %s (fd=%d)", imp->name ?: "importer", imp->fd);
+ safe_close(imp->fd);
+ }
+
+ free(imp->name);
+ free(imp->buf);
+ iovw_free_contents(&imp->iovw);
+}
+
+static char* realloc_buffer(JournalImporter *imp, size_t size) {
+ char *b, *old = imp->buf;
+
+ b = GREEDY_REALLOC(imp->buf, imp->size, size);
+ if (!b)
+ return NULL;
+
+ iovw_rebase(&imp->iovw, old, imp->buf);
+
+ return b;
+}
+
+static int get_line(JournalImporter *imp, char **line, size_t *size) {
+ ssize_t n;
+ char *c = NULL;
+
+ assert(imp);
+ assert(imp->state == IMPORTER_STATE_LINE);
+ assert(imp->offset <= imp->filled);
+ assert(imp->filled <= imp->size);
+ assert(!imp->buf || imp->size > 0);
+ assert(imp->fd >= 0);
+
+ for (;;) {
+ if (imp->buf) {
+ size_t start = MAX(imp->scanned, imp->offset);
+
+ c = memchr(imp->buf + start, '\n',
+ imp->filled - start);
+ if (c != NULL)
+ break;
+ }
+
+ imp->scanned = imp->filled;
+ if (imp->scanned >= DATA_SIZE_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOBUFS),
+ "Entry is bigger than %u bytes.",
+ DATA_SIZE_MAX);
+
+ if (imp->passive_fd)
+ /* we have to wait for some data to come to us */
+ return -EAGAIN;
+
+ /* We know that imp->filled is at most DATA_SIZE_MAX, so if
+ we reallocate it, we'll increase the size at least a bit. */
+ assert_cc(DATA_SIZE_MAX < ENTRY_SIZE_MAX);
+ if (imp->size - imp->filled < LINE_CHUNK &&
+ !realloc_buffer(imp, MIN(imp->filled + LINE_CHUNK, ENTRY_SIZE_MAX)))
+ return log_oom();
+
+ assert(imp->buf);
+ assert(imp->size - imp->filled >= LINE_CHUNK ||
+ imp->size == ENTRY_SIZE_MAX);
+
+ n = read(imp->fd,
+ imp->buf + imp->filled,
+ imp->size - imp->filled);
+ if (n < 0) {
+ if (errno != EAGAIN)
+ log_error_errno(errno, "read(%d, ..., %zu): %m",
+ imp->fd,
+ imp->size - imp->filled);
+ return -errno;
+ } else if (n == 0)
+ return 0;
+
+ imp->filled += n;
+ }
+
+ *line = imp->buf + imp->offset;
+ *size = c + 1 - imp->buf - imp->offset;
+ imp->offset += *size;
+
+ return 1;
+}
+
+static int fill_fixed_size(JournalImporter *imp, void **data, size_t size) {
+
+ assert(imp);
+ assert(IN_SET(imp->state, IMPORTER_STATE_DATA_START, IMPORTER_STATE_DATA, IMPORTER_STATE_DATA_FINISH));
+ assert(size <= DATA_SIZE_MAX);
+ assert(imp->offset <= imp->filled);
+ assert(imp->filled <= imp->size);
+ assert(imp->buf || imp->size == 0);
+ assert(!imp->buf || imp->size > 0);
+ assert(imp->fd >= 0);
+ assert(data);
+
+ while (imp->filled - imp->offset < size) {
+ int n;
+
+ if (imp->passive_fd)
+ /* we have to wait for some data to come to us */
+ return -EAGAIN;
+
+ if (!realloc_buffer(imp, imp->offset + size))
+ return log_oom();
+
+ n = read(imp->fd, imp->buf + imp->filled,
+ imp->size - imp->filled);
+ if (n < 0) {
+ if (errno != EAGAIN)
+ log_error_errno(errno, "read(%d, ..., %zu): %m", imp->fd,
+ imp->size - imp->filled);
+ return -errno;
+ } else if (n == 0)
+ return 0;
+
+ imp->filled += n;
+ }
+
+ *data = imp->buf + imp->offset;
+ imp->offset += size;
+
+ return 1;
+}
+
+static int get_data_size(JournalImporter *imp) {
+ int r;
+ void *data;
+
+ assert(imp);
+ assert(imp->state == IMPORTER_STATE_DATA_START);
+ assert(imp->data_size == 0);
+
+ r = fill_fixed_size(imp, &data, sizeof(uint64_t));
+ if (r <= 0)
+ return r;
+
+ imp->data_size = unaligned_read_le64(data);
+ if (imp->data_size > DATA_SIZE_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Stream declares field with size %zu > DATA_SIZE_MAX = %u",
+ imp->data_size, DATA_SIZE_MAX);
+ if (imp->data_size == 0)
+ log_warning("Binary field with zero length");
+
+ return 1;
+}
+
+static int get_data_data(JournalImporter *imp, void **data) {
+ int r;
+
+ assert(imp);
+ assert(data);
+ assert(imp->state == IMPORTER_STATE_DATA);
+
+ r = fill_fixed_size(imp, data, imp->data_size);
+ if (r <= 0)
+ return r;
+
+ return 1;
+}
+
+static int get_data_newline(JournalImporter *imp) {
+ int r;
+ char *data;
+
+ assert(imp);
+ assert(imp->state == IMPORTER_STATE_DATA_FINISH);
+
+ r = fill_fixed_size(imp, (void**) &data, 1);
+ if (r <= 0)
+ return r;
+
+ assert(data);
+ if (*data != '\n') {
+ char buf[4];
+ int l;
+
+ l = cescape_char(*data, buf);
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected newline, got '%.*s'", l, buf);
+ }
+
+ return 1;
+}
+
+static int process_special_field(JournalImporter *imp, char *line) {
+ const char *value;
+ char buf[CELLESCAPE_DEFAULT_LENGTH];
+ int r;
+
+ assert(line);
+
+ value = startswith(line, "__CURSOR=");
+ if (value)
+ /* ignore __CURSOR */
+ return 1;
+
+ value = startswith(line, "__REALTIME_TIMESTAMP=");
+ if (value) {
+ uint64_t x;
+
+ r = safe_atou64(value, &x);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse __REALTIME_TIMESTAMP '%s': %m",
+ cellescape(buf, sizeof buf, value));
+ else if (!VALID_REALTIME(x)) {
+ log_warning("__REALTIME_TIMESTAMP out of range, ignoring: %"PRIu64, x);
+ return -ERANGE;
+ }
+
+ imp->ts.realtime = x;
+ return 1;
+ }
+
+ value = startswith(line, "__MONOTONIC_TIMESTAMP=");
+ if (value) {
+ uint64_t x;
+
+ r = safe_atou64(value, &x);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse __MONOTONIC_TIMESTAMP '%s': %m",
+ cellescape(buf, sizeof buf, value));
+ else if (!VALID_MONOTONIC(x)) {
+ log_warning("__MONOTONIC_TIMESTAMP out of range, ignoring: %"PRIu64, x);
+ return -ERANGE;
+ }
+
+ imp->ts.monotonic = x;
+ return 1;
+ }
+
+ /* Just a single underline, but it needs special treatment too. */
+ value = startswith(line, "_BOOT_ID=");
+ if (value) {
+ r = sd_id128_from_string(value, &imp->boot_id);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse _BOOT_ID '%s': %m",
+ cellescape(buf, sizeof buf, value));
+
+ /* store the field in the usual fashion too */
+ return 0;
+ }
+
+ value = startswith(line, "__");
+ if (value) {
+ log_notice("Unknown dunder line __%s, ignoring.", cellescape(buf, sizeof buf, value));
+ return 1;
+ }
+
+ /* no dunder */
+ return 0;
+}
+
+int journal_importer_process_data(JournalImporter *imp) {
+ int r;
+
+ switch(imp->state) {
+ case IMPORTER_STATE_LINE: {
+ char *line, *sep;
+ size_t n = 0;
+
+ assert(imp->data_size == 0);
+
+ r = get_line(imp, &line, &n);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+ assert(n > 0);
+ assert(line[n-1] == '\n');
+
+ if (n == 1) {
+ log_trace("Received empty line, event is ready");
+ return 1;
+ }
+
+ /* MESSAGE=xxx\n
+ or
+ COREDUMP\n
+ LLLLLLLL0011223344...\n
+ */
+ sep = memchr(line, '=', n);
+ if (sep) {
+ /* chomp newline */
+ n--;
+
+ if (!journal_field_valid(line, sep - line, true)) {
+ char buf[64], *t;
+
+ t = strndupa(line, sep - line);
+ log_debug("Ignoring invalid field: \"%s\"",
+ cellescape(buf, sizeof buf, t));
+
+ return 0;
+ }
+
+ line[n] = '\0';
+ r = process_special_field(imp, line);
+ if (r != 0)
+ return r < 0 ? r : 0;
+
+ r = iovw_put(&imp->iovw, line, n);
+ if (r < 0)
+ return r;
+ } else {
+ /* replace \n with = */
+ line[n-1] = '=';
+
+ imp->field_len = n;
+ imp->state = IMPORTER_STATE_DATA_START;
+
+ /* we cannot put the field in iovec until we have all data */
+ }
+
+ log_trace("Received: %.*s (%s)", (int) n, line, sep ? "text" : "binary");
+
+ return 0; /* continue */
+ }
+
+ case IMPORTER_STATE_DATA_START:
+ assert(imp->data_size == 0);
+
+ r = get_data_size(imp);
+ // log_debug("get_data_size() -> %d", r);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+
+ imp->state = imp->data_size > 0 ?
+ IMPORTER_STATE_DATA : IMPORTER_STATE_DATA_FINISH;
+
+ return 0; /* continue */
+
+ case IMPORTER_STATE_DATA: {
+ void *data;
+ char *field;
+
+ assert(imp->data_size > 0);
+
+ r = get_data_data(imp, &data);
+ // log_debug("get_data_data() -> %d", r);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+
+ assert(data);
+
+ field = (char*) data - sizeof(uint64_t) - imp->field_len;
+ memmove(field + sizeof(uint64_t), field, imp->field_len);
+
+ r = iovw_put(&imp->iovw, field + sizeof(uint64_t), imp->field_len + imp->data_size);
+ if (r < 0)
+ return r;
+
+ imp->state = IMPORTER_STATE_DATA_FINISH;
+
+ return 0; /* continue */
+ }
+
+ case IMPORTER_STATE_DATA_FINISH:
+ r = get_data_newline(imp);
+ // log_debug("get_data_newline() -> %d", r);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+
+ imp->data_size = 0;
+ imp->state = IMPORTER_STATE_LINE;
+
+ return 0; /* continue */
+ default:
+ assert_not_reached("wtf?");
+ }
+}
+
+int journal_importer_push_data(JournalImporter *imp, const char *data, size_t size) {
+ assert(imp);
+ assert(imp->state != IMPORTER_STATE_EOF);
+
+ if (!realloc_buffer(imp, imp->filled + size))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOMEM),
+ "Failed to store received data of size %zu "
+ "(in addition to existing %zu bytes with %zu filled): %s",
+ size, imp->size, imp->filled,
+ strerror(ENOMEM));
+
+ memcpy(imp->buf + imp->filled, data, size);
+ imp->filled += size;
+
+ return 0;
+}
+
+void journal_importer_drop_iovw(JournalImporter *imp) {
+ size_t remain, target;
+
+ /* This function drops processed data that along with the iovw that points at it */
+
+ iovw_free_contents(&imp->iovw);
+
+ /* possibly reset buffer position */
+ remain = imp->filled - imp->offset;
+
+ if (remain == 0) /* no brainer */
+ imp->offset = imp->scanned = imp->filled = 0;
+ else if (imp->offset > imp->size - imp->filled &&
+ imp->offset > remain) {
+ memcpy(imp->buf, imp->buf + imp->offset, remain);
+ imp->offset = imp->scanned = 0;
+ imp->filled = remain;
+ }
+
+ target = imp->size;
+ while (target > 16 * LINE_CHUNK && imp->filled < target / 2)
+ target /= 2;
+ if (target < imp->size) {
+ char *tmp;
+
+ tmp = realloc(imp->buf, target);
+ if (!tmp)
+ log_warning("Failed to reallocate buffer to (smaller) size %zu",
+ target);
+ else {
+ log_debug("Reallocated buffer from %zu to %zu bytes",
+ imp->size, target);
+ imp->buf = tmp;
+ imp->size = target;
+ }
+ }
+}
+
+bool journal_importer_eof(const JournalImporter *imp) {
+ return imp->state == IMPORTER_STATE_EOF;
+}
diff --git a/src/shared/journal-importer.h b/src/shared/journal-importer.h
new file mode 100644
index 0000000..7914c0c
--- /dev/null
+++ b/src/shared/journal-importer.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/uio.h>
+
+#include "sd-id128.h"
+
+#include "time-util.h"
+
+/* Make sure not to make this smaller than the maximum coredump size.
+ * See JOURNAL_SIZE_MAX in coredump.c */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+#define ENTRY_SIZE_MAX (1024*1024*770u)
+#define DATA_SIZE_MAX (1024*1024*768u)
+#else
+#define ENTRY_SIZE_MAX (1024*1024*13u)
+#define DATA_SIZE_MAX (1024*1024*11u)
+#endif
+#define LINE_CHUNK 8*1024u
+
+/* The maximum number of fields in an entry */
+#define ENTRY_FIELD_COUNT_MAX 1024
+
+struct iovec_wrapper {
+ struct iovec *iovec;
+ size_t size_bytes;
+ size_t count;
+};
+
+size_t iovw_size(struct iovec_wrapper *iovw);
+
+typedef struct JournalImporter {
+ int fd;
+ bool passive_fd;
+ char *name;
+
+ char *buf;
+ size_t size; /* total size of the buffer */
+ size_t offset; /* offset to the beginning of live data in the buffer */
+ size_t scanned; /* number of bytes since the beginning of data without a newline */
+ size_t filled; /* total number of bytes in the buffer */
+
+ size_t field_len; /* used for binary fields: the field name length */
+ size_t data_size; /* and the size of the binary data chunk being processed */
+
+ struct iovec_wrapper iovw;
+
+ int state;
+ dual_timestamp ts;
+ sd_id128_t boot_id;
+} JournalImporter;
+
+void journal_importer_cleanup(JournalImporter *);
+int journal_importer_process_data(JournalImporter *);
+int journal_importer_push_data(JournalImporter *, const char *data, size_t size);
+void journal_importer_drop_iovw(JournalImporter *);
+bool journal_importer_eof(const JournalImporter *);
+
+static inline size_t journal_importer_bytes_remaining(const JournalImporter *imp) {
+ return imp->filled;
+}
diff --git a/src/shared/journal-util.c b/src/shared/journal-util.c
new file mode 100644
index 0000000..89b76af
--- /dev/null
+++ b/src/shared/journal-util.c
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "acl-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "journal-internal.h"
+#include "journal-util.h"
+#include "log.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int access_check_var_log_journal(sd_journal *j, bool want_other_users) {
+#if HAVE_ACL
+ _cleanup_strv_free_ char **g = NULL;
+ const char* dir;
+#endif
+ int r;
+
+ assert(j);
+
+ /* If we are root, we should have access, don't warn. */
+ if (getuid() == 0)
+ return 0;
+
+ /* If we are in the 'systemd-journal' group, we should have
+ * access too. */
+ r = in_group("systemd-journal");
+ if (r < 0)
+ return log_error_errno(r, "Failed to check if we are in the 'systemd-journal' group: %m");
+ if (r > 0)
+ return 0;
+
+#if HAVE_ACL
+ if (laccess("/run/log/journal", F_OK) >= 0)
+ dir = "/run/log/journal";
+ else
+ dir = "/var/log/journal";
+
+ /* If we are in any of the groups listed in the journal ACLs,
+ * then all is good, too. Let's enumerate all groups from the
+ * default ACL of the directory, which generally should allow
+ * access to most journal files too. */
+ r = acl_search_groups(dir, &g);
+ if (r < 0)
+ return log_error_errno(r, "Failed to search journal ACL: %m");
+ if (r > 0)
+ return 0;
+
+ /* Print a pretty list, if there were ACLs set. */
+ if (!strv_isempty(g)) {
+ _cleanup_free_ char *s = NULL;
+
+ /* Thre are groups in the ACL, let's list them */
+ r = strv_extend(&g, "systemd-journal");
+ if (r < 0)
+ return log_oom();
+
+ strv_sort(g);
+ strv_uniq(g);
+
+ s = strv_join(g, "', '");
+ if (!s)
+ return log_oom();
+
+ log_notice("Hint: You are currently not seeing messages from %s.\n"
+ " Users in groups '%s' can see all messages.\n"
+ " Pass -q to turn off this notice.",
+ want_other_users ? "other users and the system" : "the system",
+ s);
+ return 1;
+ }
+#endif
+
+ /* If no ACLs were found, print a short version of the message. */
+ log_notice("Hint: You are currently not seeing messages from %s.\n"
+ " Users in the 'systemd-journal' group can see all messages. Pass -q to\n"
+ " turn off this notice.",
+ want_other_users ? "other users and the system" : "the system");
+
+ return 1;
+}
+
+int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users) {
+ Iterator it;
+ void *code;
+ char *path;
+ int r = 0;
+
+ assert(j);
+
+ if (hashmap_isempty(j->errors)) {
+ if (ordered_hashmap_isempty(j->files) && !quiet)
+ log_notice("No journal files were found.");
+
+ return 0;
+ }
+
+ if (hashmap_contains(j->errors, INT_TO_PTR(-EACCES))) {
+ if (!quiet)
+ (void) access_check_var_log_journal(j, want_other_users);
+
+ if (ordered_hashmap_isempty(j->files))
+ r = log_error_errno(EACCES, "No journal files were opened due to insufficient permissions.");
+ }
+
+ HASHMAP_FOREACH_KEY(path, code, j->errors, it) {
+ int err;
+
+ err = abs(PTR_TO_INT(code));
+
+ switch (err) {
+ case EACCES:
+ continue;
+
+ case ENODATA:
+ log_warning_errno(err, "Journal file %s is truncated, ignoring file.", path);
+ break;
+
+ case EPROTONOSUPPORT:
+ log_warning_errno(err, "Journal file %1$s uses an unsupported feature, ignoring file.\n"
+ "Use SYSTEMD_LOG_LEVEL=debug journalctl --file=%1$s to see the details.",
+ path);
+ break;
+
+ case EBADMSG:
+ log_warning_errno(err, "Journal file %s corrupted, ignoring file.", path);
+ break;
+
+ default:
+ log_warning_errno(err, "An error was encountered while opening journal file or directory %s, ignoring file: %m", path);
+ break;
+ }
+ }
+
+ return r;
+}
+
+bool journal_field_valid(const char *p, size_t l, bool allow_protected) {
+ const char *a;
+
+ /* We kinda enforce POSIX syntax recommendations for
+ environment variables here, but make a couple of additional
+ requirements.
+
+ http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
+
+ if (l == (size_t) -1)
+ l = strlen(p);
+
+ /* No empty field names */
+ if (l <= 0)
+ return false;
+
+ /* Don't allow names longer than 64 chars */
+ if (l > 64)
+ return false;
+
+ /* Variables starting with an underscore are protected */
+ if (!allow_protected && p[0] == '_')
+ return false;
+
+ /* Don't allow digits as first character */
+ if (p[0] >= '0' && p[0] <= '9')
+ return false;
+
+ /* Only allow A-Z0-9 and '_' */
+ for (a = p; a < p + l; a++)
+ if ((*a < 'A' || *a > 'Z') &&
+ (*a < '0' || *a > '9') &&
+ *a != '_')
+ return false;
+
+ return true;
+}
diff --git a/src/shared/journal-util.h b/src/shared/journal-util.h
new file mode 100644
index 0000000..da86434
--- /dev/null
+++ b/src/shared/journal-util.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-journal.h"
+
+bool journal_field_valid(const char *p, size_t l, bool allow_protected);
+
+int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users);
diff --git a/src/shared/json-internal.h b/src/shared/json-internal.h
new file mode 100644
index 0000000..bf158bf
--- /dev/null
+++ b/src/shared/json-internal.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include "json.h"
+
+/* This header should include all prototypes only the JSON parser itself and
+ * its tests need access to. Normal code consuming the JSON parser should not
+ * interface with this. */
+
+typedef union JsonValue {
+ /* Encodes a simple value. On x86-64 this structure is 16 bytes wide (as long double is 128bit). */
+ bool boolean;
+ long double real;
+ intmax_t integer;
+ uintmax_t unsig;
+} JsonValue;
+
+/* Let's protect us against accidental structure size changes on our most relevant arch */
+#ifdef __x86_64__
+assert_cc(sizeof(JsonValue) == 16U);
+#endif
+
+#define JSON_VALUE_NULL ((JsonValue) {})
+
+/* We use fake JsonVariant objects for some special values, in order to avoid memory allocations for them. Note that
+ * effectively this means that there are multiple ways to encode the same objects: via these magic values or as
+ * properly allocated JsonVariant. We convert between both on-the-fly as necessary. */
+#define JSON_VARIANT_MAGIC_TRUE ((JsonVariant*) 1)
+#define JSON_VARIANT_MAGIC_FALSE ((JsonVariant*) 2)
+#define JSON_VARIANT_MAGIC_NULL ((JsonVariant*) 3)
+#define JSON_VARIANT_MAGIC_ZERO_INTEGER ((JsonVariant*) 4)
+#define JSON_VARIANT_MAGIC_ZERO_UNSIGNED ((JsonVariant*) 5)
+#define JSON_VARIANT_MAGIC_ZERO_REAL ((JsonVariant*) 6)
+#define JSON_VARIANT_MAGIC_EMPTY_STRING ((JsonVariant*) 7)
+#define JSON_VARIANT_MAGIC_EMPTY_ARRAY ((JsonVariant*) 8)
+#define JSON_VARIANT_MAGIC_EMPTY_OBJECT ((JsonVariant*) 9)
+#define _JSON_VARIANT_MAGIC_MAX ((JsonVariant*) 10)
+
+/* This is only safe as long as we don't define more than 4K magic pointers, i.e. the page size of the simplest
+ * architectures we support. That's because we rely on the fact that malloc() will never allocate from the first memory
+ * page, as it is a faulting page for catching NULL pointer dereferences. */
+assert_cc((uintptr_t) _JSON_VARIANT_MAGIC_MAX < 4096U);
+
+enum { /* JSON tokens */
+ JSON_TOKEN_END,
+ JSON_TOKEN_COLON,
+ JSON_TOKEN_COMMA,
+ JSON_TOKEN_OBJECT_OPEN,
+ JSON_TOKEN_OBJECT_CLOSE,
+ JSON_TOKEN_ARRAY_OPEN,
+ JSON_TOKEN_ARRAY_CLOSE,
+ JSON_TOKEN_STRING,
+ JSON_TOKEN_REAL,
+ JSON_TOKEN_INTEGER,
+ JSON_TOKEN_UNSIGNED,
+ JSON_TOKEN_BOOLEAN,
+ JSON_TOKEN_NULL,
+ _JSON_TOKEN_MAX,
+ _JSON_TOKEN_INVALID = -1,
+};
+
+int json_tokenize(const char **p, char **ret_string, JsonValue *ret_value, unsigned *ret_line, unsigned *ret_column, void **state, unsigned *line, unsigned *column);
diff --git a/src/shared/json.c b/src/shared/json.c
new file mode 100644
index 0000000..3786ff1
--- /dev/null
+++ b/src/shared/json.c
@@ -0,0 +1,3480 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "float.h"
+#include "hexdecoct.h"
+#include "json-internal.h"
+#include "json.h"
+#include "macro.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "utf8.h"
+
+/* Refuse putting together variants with a larger depth than 4K by default (as a protection against overflowing stacks
+ * if code processes JSON objects recursively. Note that we store the depth in an uint16_t, hence make sure this
+ * remains under 2^16.
+ * The value was 16k, but it was discovered to be too high on llvm/x86-64. See also the issue #10738. */
+#define DEPTH_MAX (4U*1024U)
+assert_cc(DEPTH_MAX <= UINT16_MAX);
+
+typedef struct JsonSource {
+ /* When we parse from a file or similar, encodes the filename, to indicate the source of a json variant */
+ size_t n_ref;
+ unsigned max_line;
+ unsigned max_column;
+ char name[];
+} JsonSource;
+
+/* On x86-64 this whole structure should have a size of 6 * 64 bit = 48 bytes */
+struct JsonVariant {
+ union {
+ /* We either maintain a reference counter for this variant itself, or we are embedded into an
+ * array/object, in which case only that surrounding object is ref-counted. (If 'embedded' is false,
+ * see below.) */
+ size_t n_ref;
+
+ /* If this JsonVariant is part of an array/object, then this field points to the surrounding
+ * JSON_VARIANT_ARRAY/JSON_VARIANT_OBJECT object. (If 'embedded' is true, see below.) */
+ JsonVariant *parent;
+ };
+
+ /* If this was parsed from some file or buffer, this stores where from, as well as the source line/column */
+ JsonSource *source;
+ unsigned line, column;
+
+ JsonVariantType type:5;
+
+ /* A marker whether this variant is embedded into in array/object or not. If true, the 'parent' pointer above
+ * is valid. If false, the 'n_ref' field above is valid instead. */
+ bool is_embedded:1;
+
+ /* In some conditions (for example, if this object is part of an array of strings or objects), we don't store
+ * any data inline, but instead simply reference an external object and act as surrogate of it. In that case
+ * this bool is set, and the external object is referenced through the .reference field below. */
+ bool is_reference:1;
+
+ /* While comparing two arrays, we use this for marking what we already have seen */
+ bool is_marked:1;
+
+ /* The current 'depth' of the JsonVariant, i.e. how many levels of member variants this has */
+ uint16_t depth;
+
+ union {
+ /* For simple types we store the value in-line. */
+ JsonValue value;
+
+ /* For objects and arrays we store the number of elements immediately following */
+ size_t n_elements;
+
+ /* If is_reference as indicated above is set, this is where the reference object is actually stored. */
+ JsonVariant *reference;
+
+ /* Strings are placed immediately after the structure. Note that when this is a JsonVariant embedded
+ * into an array we might encode strings up to INLINE_STRING_LENGTH characters directly inside the
+ * element, while longer strings are stored as references. When this object is not embedded into an
+ * array, but stand-alone we allocate the right size for the whole structure, i.e. the array might be
+ * much larger than INLINE_STRING_LENGTH.
+ *
+ * Note that because we want to allocate arrays of the JsonVariant structure we specify [0] here,
+ * rather than the prettier []. If we wouldn't, then this char array would have undefined size, and so
+ * would the union and then the struct this is included in. And of structures with undefined size we
+ * can't allocate arrays (at least not easily). */
+ char string[0];
+ };
+};
+
+/* Inside string arrays we have a series of JasonVariant structures one after the other. In this case, strings longer
+ * than INLINE_STRING_MAX are stored as references, and all shorter ones inline. (This means — on x86-64 — strings up
+ * to 15 chars are stored within the array elements, and all others in separate allocations) */
+#define INLINE_STRING_MAX (sizeof(JsonVariant) - offsetof(JsonVariant, string) - 1U)
+
+/* Let's make sure this structure isn't increased in size accidentally. This check is only for our most relevant arch
+ * (x86-64). */
+#ifdef __x86_64__
+assert_cc(sizeof(JsonVariant) == 48U);
+assert_cc(INLINE_STRING_MAX == 15U);
+#endif
+
+static JsonSource* json_source_new(const char *name) {
+ JsonSource *s;
+
+ assert(name);
+
+ s = malloc(offsetof(JsonSource, name) + strlen(name) + 1);
+ if (!s)
+ return NULL;
+
+ *s = (JsonSource) {
+ .n_ref = 1,
+ };
+ strcpy(s->name, name);
+
+ return s;
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(JsonSource, json_source, mfree);
+
+static bool json_source_equal(JsonSource *a, JsonSource *b) {
+ if (a == b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ return streq(a->name, b->name);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(JsonSource*, json_source_unref);
+
+/* There are four kind of JsonVariant* pointers:
+ *
+ * 1. NULL
+ * 2. A 'regular' one, i.e. pointing to malloc() memory
+ * 3. A 'magic' one, i.e. one of the special JSON_VARIANT_MAGIC_XYZ values, that encode a few very basic values directly in the pointer.
+ * 4. A 'const string' one, i.e. a pointer to a const string.
+ *
+ * The four kinds of pointers can be discerned like this:
+ *
+ * Detecting #1 is easy, just compare with NULL. Detecting #3 is similarly easy: all magic pointers are below
+ * _JSON_VARIANT_MAGIC_MAX (which is pretty low, within the first memory page, which is special on Linux and other
+ * OSes, as it is a faulting page). In order to discern #2 and #4 we check the lowest bit. If it's off it's #2,
+ * otherwise #4. This makes use of the fact that malloc() will return "maximum aligned" memory, which definitely
+ * means the pointer is even. This means we can use the uneven pointers to reference static strings, as long as we
+ * make sure that all static strings used like this are aligned to 2 (or higher), and that we mask the bit on
+ * access. The JSON_VARIANT_STRING_CONST() macro encodes strings as JsonVariant* pointers, with the bit set. */
+
+static bool json_variant_is_magic(const JsonVariant *v) {
+ if (!v)
+ return false;
+
+ return v < _JSON_VARIANT_MAGIC_MAX;
+}
+
+static bool json_variant_is_const_string(const JsonVariant *v) {
+
+ if (v < _JSON_VARIANT_MAGIC_MAX)
+ return false;
+
+ /* A proper JsonVariant is aligned to whatever malloc() aligns things too, which is definitely not uneven. We
+ * hence use all uneven pointers as indicators for const strings. */
+
+ return (((uintptr_t) v) & 1) != 0;
+}
+
+static bool json_variant_is_regular(const JsonVariant *v) {
+
+ if (v < _JSON_VARIANT_MAGIC_MAX)
+ return false;
+
+ return (((uintptr_t) v) & 1) == 0;
+}
+
+static JsonVariant *json_variant_dereference(JsonVariant *v) {
+
+ /* Recursively dereference variants that are references to other variants */
+
+ if (!v)
+ return NULL;
+
+ if (!json_variant_is_regular(v))
+ return v;
+
+ if (!v->is_reference)
+ return v;
+
+ return json_variant_dereference(v->reference);
+}
+
+static uint16_t json_variant_depth(JsonVariant *v) {
+
+ v = json_variant_dereference(v);
+ if (!v)
+ return 0;
+
+ if (!json_variant_is_regular(v))
+ return 0;
+
+ return v->depth;
+}
+
+static JsonVariant *json_variant_normalize(JsonVariant *v) {
+
+ /* Converts json variants to their normalized form, i.e. fully dereferenced and wherever possible converted to
+ * the "magic" version if there is one */
+
+ if (!v)
+ return NULL;
+
+ v = json_variant_dereference(v);
+
+ switch (json_variant_type(v)) {
+
+ case JSON_VARIANT_BOOLEAN:
+ return json_variant_boolean(v) ? JSON_VARIANT_MAGIC_TRUE : JSON_VARIANT_MAGIC_FALSE;
+
+ case JSON_VARIANT_NULL:
+ return JSON_VARIANT_MAGIC_NULL;
+
+ case JSON_VARIANT_INTEGER:
+ return json_variant_integer(v) == 0 ? JSON_VARIANT_MAGIC_ZERO_INTEGER : v;
+
+ case JSON_VARIANT_UNSIGNED:
+ return json_variant_unsigned(v) == 0 ? JSON_VARIANT_MAGIC_ZERO_UNSIGNED : v;
+
+ case JSON_VARIANT_REAL:
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ return json_variant_real(v) == 0.0 ? JSON_VARIANT_MAGIC_ZERO_REAL : v;
+#pragma GCC diagnostic pop
+
+ case JSON_VARIANT_STRING:
+ return isempty(json_variant_string(v)) ? JSON_VARIANT_MAGIC_EMPTY_STRING : v;
+
+ case JSON_VARIANT_ARRAY:
+ return json_variant_elements(v) == 0 ? JSON_VARIANT_MAGIC_EMPTY_ARRAY : v;
+
+ case JSON_VARIANT_OBJECT:
+ return json_variant_elements(v) == 0 ? JSON_VARIANT_MAGIC_EMPTY_OBJECT : v;
+
+ default:
+ return v;
+ }
+}
+
+static JsonVariant *json_variant_conservative_normalize(JsonVariant *v) {
+
+ /* Much like json_variant_normalize(), but won't simplify if the variant has a source/line location attached to
+ * it, in order not to lose context */
+
+ if (!v)
+ return NULL;
+
+ if (!json_variant_is_regular(v))
+ return v;
+
+ if (v->source || v->line > 0 || v->column > 0)
+ return v;
+
+ return json_variant_normalize(v);
+}
+
+static int json_variant_new(JsonVariant **ret, JsonVariantType type, size_t space) {
+ JsonVariant *v;
+
+ assert_return(ret, -EINVAL);
+
+ v = malloc0(offsetof(JsonVariant, value) + space);
+ if (!v)
+ return -ENOMEM;
+
+ v->n_ref = 1;
+ v->type = type;
+
+ *ret = v;
+ return 0;
+}
+
+int json_variant_new_integer(JsonVariant **ret, intmax_t i) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (i == 0) {
+ *ret = JSON_VARIANT_MAGIC_ZERO_INTEGER;
+ return 0;
+ }
+
+ r = json_variant_new(&v, JSON_VARIANT_INTEGER, sizeof(i));
+ if (r < 0)
+ return r;
+
+ v->value.integer = i;
+ *ret = v;
+
+ return 0;
+}
+
+int json_variant_new_unsigned(JsonVariant **ret, uintmax_t u) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ if (u == 0) {
+ *ret = JSON_VARIANT_MAGIC_ZERO_UNSIGNED;
+ return 0;
+ }
+
+ r = json_variant_new(&v, JSON_VARIANT_UNSIGNED, sizeof(u));
+ if (r < 0)
+ return r;
+
+ v->value.unsig = u;
+ *ret = v;
+
+ return 0;
+}
+
+int json_variant_new_real(JsonVariant **ret, long double d) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ if (d == 0.0) {
+#pragma GCC diagnostic pop
+ *ret = JSON_VARIANT_MAGIC_ZERO_REAL;
+ return 0;
+ }
+
+ r = json_variant_new(&v, JSON_VARIANT_REAL, sizeof(d));
+ if (r < 0)
+ return r;
+
+ v->value.real = d;
+ *ret = v;
+
+ return 0;
+}
+
+int json_variant_new_boolean(JsonVariant **ret, bool b) {
+ assert_return(ret, -EINVAL);
+
+ if (b)
+ *ret = JSON_VARIANT_MAGIC_TRUE;
+ else
+ *ret = JSON_VARIANT_MAGIC_FALSE;
+
+ return 0;
+}
+
+int json_variant_new_null(JsonVariant **ret) {
+ assert_return(ret, -EINVAL);
+
+ *ret = JSON_VARIANT_MAGIC_NULL;
+ return 0;
+}
+
+int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ if (!s) {
+ assert_return(n == 0, -EINVAL);
+ return json_variant_new_null(ret);
+ }
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_STRING;
+ return 0;
+ }
+
+ r = json_variant_new(&v, JSON_VARIANT_STRING, n + 1);
+ if (r < 0)
+ return r;
+
+ memcpy(v->string, s, n);
+ v->string[n] = 0;
+
+ *ret = v;
+ return 0;
+}
+
+static void json_variant_set(JsonVariant *a, JsonVariant *b) {
+ assert(a);
+
+ b = json_variant_dereference(b);
+ if (!b) {
+ a->type = JSON_VARIANT_NULL;
+ return;
+ }
+
+ a->type = json_variant_type(b);
+ switch (a->type) {
+
+ case JSON_VARIANT_INTEGER:
+ a->value.integer = json_variant_integer(b);
+ break;
+
+ case JSON_VARIANT_UNSIGNED:
+ a->value.unsig = json_variant_unsigned(b);
+ break;
+
+ case JSON_VARIANT_REAL:
+ a->value.real = json_variant_real(b);
+ break;
+
+ case JSON_VARIANT_BOOLEAN:
+ a->value.boolean = json_variant_boolean(b);
+ break;
+
+ case JSON_VARIANT_STRING: {
+ const char *s;
+
+ assert_se(s = json_variant_string(b));
+
+ /* Short strings we can store inline */
+ if (strnlen(s, INLINE_STRING_MAX+1) <= INLINE_STRING_MAX) {
+ strcpy(a->string, s);
+ break;
+ }
+
+ /* For longer strings, use a reference… */
+ _fallthrough_;
+ }
+
+ case JSON_VARIANT_ARRAY:
+ case JSON_VARIANT_OBJECT:
+ a->is_reference = true;
+ a->reference = json_variant_ref(json_variant_conservative_normalize(b));
+ break;
+
+ case JSON_VARIANT_NULL:
+ break;
+
+ default:
+ assert_not_reached("Unexpected variant type");
+ }
+}
+
+static void json_variant_copy_source(JsonVariant *v, JsonVariant *from) {
+ assert(v);
+ assert(from);
+
+ if (!json_variant_is_regular(from))
+ return;
+
+ v->line = from->line;
+ v->column = from->column;
+ v->source = json_source_ref(from->source);
+}
+
+int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ assert_return(ret, -EINVAL);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY;
+ return 0;
+ }
+ assert_return(array, -EINVAL);
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_ARRAY,
+ };
+
+ for (v->n_elements = 0; v->n_elements < n; v->n_elements++) {
+ JsonVariant *w = v + 1 + v->n_elements,
+ *c = array[v->n_elements];
+ uint16_t d;
+
+ d = json_variant_depth(c);
+ if (d >= DEPTH_MAX) /* Refuse too deep nesting */
+ return -ELNRNG;
+ if (d >= v->depth)
+ v->depth = d + 1;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ };
+
+ json_variant_set(w, c);
+ json_variant_copy_source(w, c);
+ }
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n) {
+ JsonVariant *v;
+ size_t i;
+
+ assert_return(ret, -EINVAL);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY;
+ return 0;
+ }
+ assert_return(p, -EINVAL);
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_ARRAY,
+ .n_elements = n,
+ .depth = 1,
+ };
+
+ for (i = 0; i < n; i++) {
+ JsonVariant *w = v + 1 + i;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ .type = JSON_VARIANT_UNSIGNED,
+ .value.unsig = ((const uint8_t*) p)[i],
+ };
+ }
+
+ *ret = v;
+ return 0;
+}
+
+int json_variant_new_array_strv(JsonVariant **ret, char **l) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ size_t n;
+ int r;
+
+ assert(ret);
+
+ n = strv_length(l);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY;
+ return 0;
+ }
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_ARRAY,
+ .depth = 1,
+ };
+
+ for (v->n_elements = 0; v->n_elements < n; v->n_elements++) {
+ JsonVariant *w = v + 1 + v->n_elements;
+ size_t k;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ .type = JSON_VARIANT_STRING,
+ };
+
+ k = strlen(l[v->n_elements]);
+
+ if (k > INLINE_STRING_MAX) {
+ /* If string is too long, store it as reference. */
+
+ r = json_variant_new_stringn(&w->reference, l[v->n_elements], k);
+ if (r < 0)
+ return r;
+
+ w->is_reference = true;
+ } else
+ memcpy(w->string, l[v->n_elements], k+1);
+ }
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ assert_return(ret, -EINVAL);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_OBJECT;
+ return 0;
+ }
+ assert_return(array, -EINVAL);
+ assert_return(n % 2 == 0, -EINVAL);
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_OBJECT,
+ };
+
+ for (v->n_elements = 0; v->n_elements < n; v->n_elements++) {
+ JsonVariant *w = v + 1 + v->n_elements,
+ *c = array[v->n_elements];
+ uint16_t d;
+
+ if ((v->n_elements & 1) == 0 &&
+ !json_variant_is_string(c))
+ return -EINVAL; /* Every second one needs to be a string, as it is the key name */
+
+ d = json_variant_depth(c);
+ if (d >= DEPTH_MAX) /* Refuse too deep nesting */
+ return -ELNRNG;
+ if (d >= v->depth)
+ v->depth = d + 1;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ };
+
+ json_variant_set(w, c);
+ json_variant_copy_source(w, c);
+ }
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+static void json_variant_free_inner(JsonVariant *v) {
+ assert(v);
+
+ if (!json_variant_is_regular(v))
+ return;
+
+ json_source_unref(v->source);
+
+ if (v->is_reference) {
+ json_variant_unref(v->reference);
+ return;
+ }
+
+ if (IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT)) {
+ size_t i;
+
+ for (i = 0; i < v->n_elements; i++)
+ json_variant_free_inner(v + 1 + i);
+ }
+}
+
+JsonVariant *json_variant_ref(JsonVariant *v) {
+ if (!v)
+ return NULL;
+ if (!json_variant_is_regular(v))
+ return v;
+
+ if (v->is_embedded)
+ json_variant_ref(v->parent); /* ref the compounding variant instead */
+ else {
+ assert(v->n_ref > 0);
+ v->n_ref++;
+ }
+
+ return v;
+}
+
+JsonVariant *json_variant_unref(JsonVariant *v) {
+ if (!v)
+ return NULL;
+ if (!json_variant_is_regular(v))
+ return NULL;
+
+ if (v->is_embedded)
+ json_variant_unref(v->parent);
+ else {
+ assert(v->n_ref > 0);
+ v->n_ref--;
+
+ if (v->n_ref == 0) {
+ json_variant_free_inner(v);
+ free(v);
+ }
+ }
+
+ return NULL;
+}
+
+void json_variant_unref_many(JsonVariant **array, size_t n) {
+ size_t i;
+
+ assert(array || n == 0);
+
+ for (i = 0; i < n; i++)
+ json_variant_unref(array[i]);
+}
+
+const char *json_variant_string(JsonVariant *v) {
+ if (!v)
+ return NULL;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_STRING)
+ return "";
+ if (json_variant_is_magic(v))
+ goto mismatch;
+ if (json_variant_is_const_string(v)) {
+ uintptr_t p = (uintptr_t) v;
+
+ assert((p & 1) != 0);
+ return (const char*) (p ^ 1U);
+ }
+
+ if (v->is_reference)
+ return json_variant_string(v->reference);
+ if (v->type != JSON_VARIANT_STRING)
+ goto mismatch;
+
+ return v->string;
+
+mismatch:
+ log_debug("Non-string JSON variant requested as string, returning NULL.");
+ return NULL;
+}
+
+bool json_variant_boolean(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_TRUE)
+ return true;
+ if (v == JSON_VARIANT_MAGIC_FALSE)
+ return false;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->type != JSON_VARIANT_BOOLEAN)
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_boolean(v->reference);
+
+ return v->value.boolean;
+
+mismatch:
+ log_debug("Non-boolean JSON variant requested as boolean, returning false.");
+ return false;
+}
+
+intmax_t json_variant_integer(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return 0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_integer(v->reference);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_INTEGER:
+ return v->value.integer;
+
+ case JSON_VARIANT_UNSIGNED:
+ if (v->value.unsig <= INTMAX_MAX)
+ return (intmax_t) v->value.unsig;
+
+ log_debug("Unsigned integer %ju requested as signed integer and out of range, returning 0.", v->value.unsig);
+ return 0;
+
+ case JSON_VARIANT_REAL: {
+ intmax_t converted;
+
+ converted = (intmax_t) v->value.real;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ if ((long double) converted == v->value.real)
+#pragma GCC diagnostic pop
+ return converted;
+
+ log_debug("Real %Lg requested as integer, and cannot be converted losslessly, returning 0.", v->value.real);
+ return 0;
+ }
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant requested as integer, returning 0.");
+ return 0;
+}
+
+uintmax_t json_variant_unsigned(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return 0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_integer(v->reference);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_INTEGER:
+ if (v->value.integer >= 0)
+ return (uintmax_t) v->value.integer;
+
+ log_debug("Signed integer %ju requested as unsigned integer and out of range, returning 0.", v->value.integer);
+ return 0;
+
+ case JSON_VARIANT_UNSIGNED:
+ return v->value.unsig;
+
+ case JSON_VARIANT_REAL: {
+ uintmax_t converted;
+
+ converted = (uintmax_t) v->value.real;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ if ((long double) converted == v->value.real)
+#pragma GCC diagnostic pop
+ return converted;
+
+ log_debug("Real %Lg requested as unsigned integer, and cannot be converted losslessly, returning 0.", v->value.real);
+ return 0;
+ }
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant requested as unsigned, returning 0.");
+ return 0;
+}
+
+long double json_variant_real(JsonVariant *v) {
+ if (!v)
+ return 0.0;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return 0.0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_real(v->reference);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_REAL:
+ return v->value.real;
+
+ case JSON_VARIANT_INTEGER: {
+ long double converted;
+
+ converted = (long double) v->value.integer;
+
+ if ((intmax_t) converted == v->value.integer)
+ return converted;
+
+ log_debug("Signed integer %ji requested as real, and cannot be converted losslessly, returning 0.", v->value.integer);
+ return 0.0;
+ }
+
+ case JSON_VARIANT_UNSIGNED: {
+ long double converted;
+
+ converted = (long double) v->value.unsig;
+
+ if ((uintmax_t) converted == v->value.unsig)
+ return converted;
+
+ log_debug("Unsigned integer %ju requested as real, and cannot be converted losslessly, returning 0.", v->value.unsig);
+ return 0.0;
+ }
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant requested as integer, returning 0.");
+ return 0.0;
+}
+
+bool json_variant_is_negative(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return false;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_is_negative(v->reference);
+
+ /* This function is useful as checking whether numbers are negative is pretty complex since we have three types
+ * of numbers. And some JSON code (OCI for example) uses negative numbers to mark "not defined" numeric
+ * values. */
+
+ switch (v->type) {
+
+ case JSON_VARIANT_REAL:
+ return v->value.real < 0;
+
+ case JSON_VARIANT_INTEGER:
+ return v->value.integer < 0;
+
+ case JSON_VARIANT_UNSIGNED:
+ return false;
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant tested for negativity, returning false.");
+ return false;
+}
+
+JsonVariantType json_variant_type(JsonVariant *v) {
+
+ if (!v)
+ return _JSON_VARIANT_TYPE_INVALID;
+
+ if (json_variant_is_const_string(v))
+ return JSON_VARIANT_STRING;
+
+ if (v == JSON_VARIANT_MAGIC_TRUE || v == JSON_VARIANT_MAGIC_FALSE)
+ return JSON_VARIANT_BOOLEAN;
+
+ if (v == JSON_VARIANT_MAGIC_NULL)
+ return JSON_VARIANT_NULL;
+
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER)
+ return JSON_VARIANT_INTEGER;
+
+ if (v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED)
+ return JSON_VARIANT_UNSIGNED;
+
+ if (v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return JSON_VARIANT_REAL;
+
+ if (v == JSON_VARIANT_MAGIC_EMPTY_STRING)
+ return JSON_VARIANT_STRING;
+
+ if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY)
+ return JSON_VARIANT_ARRAY;
+
+ if (v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ return JSON_VARIANT_OBJECT;
+
+ return v->type;
+}
+
+bool json_variant_has_type(JsonVariant *v, JsonVariantType type) {
+ JsonVariantType rt;
+
+ v = json_variant_dereference(v);
+ if (!v)
+ return false;
+
+ rt = json_variant_type(v);
+ if (rt == type)
+ return true;
+
+ /* If it's a const string, then it only can be a string, and if it is not, it's not */
+ if (json_variant_is_const_string(v))
+ return false;
+
+ /* All three magic zeroes qualify as integer, unsigned and as real */
+ if ((v == JSON_VARIANT_MAGIC_ZERO_INTEGER || v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || v == JSON_VARIANT_MAGIC_ZERO_REAL) &&
+ IN_SET(type, JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL, JSON_VARIANT_NUMBER))
+ return true;
+
+ /* All other magic variant types are only equal to themselves */
+ if (json_variant_is_magic(v))
+ return false;
+
+ /* Handle the "number" pseudo type */
+ if (type == JSON_VARIANT_NUMBER)
+ return IN_SET(rt, JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL);
+
+ /* Integer conversions are OK in many cases */
+ if (rt == JSON_VARIANT_INTEGER && type == JSON_VARIANT_UNSIGNED)
+ return v->value.integer >= 0;
+ if (rt == JSON_VARIANT_UNSIGNED && type == JSON_VARIANT_INTEGER)
+ return v->value.unsig <= INTMAX_MAX;
+
+ /* Any integer that can be converted lossley to a real and back may also be considered a real */
+ if (rt == JSON_VARIANT_INTEGER && type == JSON_VARIANT_REAL)
+ return (intmax_t) (long double) v->value.integer == v->value.integer;
+ if (rt == JSON_VARIANT_UNSIGNED && type == JSON_VARIANT_REAL)
+ return (uintmax_t) (long double) v->value.unsig == v->value.unsig;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ /* Any real that can be converted losslessly to an integer and back may also be considered an integer */
+ if (rt == JSON_VARIANT_REAL && type == JSON_VARIANT_INTEGER)
+ return (long double) (intmax_t) v->value.real == v->value.real;
+ if (rt == JSON_VARIANT_REAL && type == JSON_VARIANT_UNSIGNED)
+ return (long double) (uintmax_t) v->value.real == v->value.real;
+#pragma GCC diagnostic pop
+
+ return false;
+}
+
+size_t json_variant_elements(JsonVariant *v) {
+ if (!v)
+ return 0;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY ||
+ v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ return 0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (!IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_elements(v->reference);
+
+ return v->n_elements;
+
+mismatch:
+ log_debug("Number of elements in non-array/non-object JSON variant requested, returning 0.");
+ return 0;
+}
+
+JsonVariant *json_variant_by_index(JsonVariant *v, size_t idx) {
+ if (!v)
+ return NULL;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY ||
+ v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ return NULL;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (!IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_by_index(v->reference, idx);
+ if (idx >= v->n_elements)
+ return NULL;
+
+ return json_variant_conservative_normalize(v + 1 + idx);
+
+mismatch:
+ log_debug("Element in non-array/non-object JSON variant requested by index, returning NULL.");
+ return NULL;
+}
+
+JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVariant **ret_key) {
+ size_t i;
+
+ if (!v)
+ goto not_found;
+ if (!key)
+ goto not_found;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ goto not_found;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->type != JSON_VARIANT_OBJECT)
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_by_key(v->reference, key);
+
+ for (i = 0; i < v->n_elements; i += 2) {
+ JsonVariant *p;
+
+ p = json_variant_dereference(v + 1 + i);
+
+ if (!json_variant_has_type(p, JSON_VARIANT_STRING))
+ continue;
+
+ if (streq(json_variant_string(p), key)) {
+
+ if (ret_key)
+ *ret_key = json_variant_conservative_normalize(v + 1 + i);
+
+ return json_variant_conservative_normalize(v + 1 + i + 1);
+ }
+ }
+
+not_found:
+ if (ret_key)
+ *ret_key = NULL;
+
+ return NULL;
+
+mismatch:
+ log_debug("Element in non-object JSON variant requested by key, returning NULL.");
+ if (ret_key)
+ *ret_key = NULL;
+
+ return NULL;
+}
+
+JsonVariant *json_variant_by_key(JsonVariant *v, const char *key) {
+ return json_variant_by_key_full(v, key, NULL);
+}
+
+bool json_variant_equal(JsonVariant *a, JsonVariant *b) {
+ JsonVariantType t;
+
+ a = json_variant_normalize(a);
+ b = json_variant_normalize(b);
+
+ if (a == b)
+ return true;
+
+ t = json_variant_type(a);
+ if (!json_variant_has_type(b, t))
+ return false;
+
+ switch (t) {
+
+ case JSON_VARIANT_STRING:
+ return streq(json_variant_string(a), json_variant_string(b));
+
+ case JSON_VARIANT_INTEGER:
+ return json_variant_integer(a) == json_variant_integer(b);
+
+ case JSON_VARIANT_UNSIGNED:
+ return json_variant_unsigned(a) == json_variant_unsigned(b);
+
+ case JSON_VARIANT_REAL:
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ return json_variant_real(a) == json_variant_real(b);
+#pragma GCC diagnostic pop
+
+ case JSON_VARIANT_BOOLEAN:
+ return json_variant_boolean(a) == json_variant_boolean(b);
+
+ case JSON_VARIANT_NULL:
+ return true;
+
+ case JSON_VARIANT_ARRAY: {
+ size_t i, n;
+
+ n = json_variant_elements(a);
+ if (n != json_variant_elements(b))
+ return false;
+
+ for (i = 0; i < n; i++) {
+ if (!json_variant_equal(json_variant_by_index(a, i), json_variant_by_index(b, i)))
+ return false;
+ }
+
+ return true;
+ }
+
+ case JSON_VARIANT_OBJECT: {
+ size_t i, n;
+
+ n = json_variant_elements(a);
+ if (n != json_variant_elements(b))
+ return false;
+
+ /* Iterate through all keys in 'a' */
+ for (i = 0; i < n; i += 2) {
+ bool found = false;
+ size_t j;
+
+ /* Match them against all keys in 'b' */
+ for (j = 0; j < n; j += 2) {
+ JsonVariant *key_b;
+
+ key_b = json_variant_by_index(b, j);
+
+ /* During the first iteration unmark everything */
+ if (i == 0)
+ key_b->is_marked = false;
+ else if (key_b->is_marked) /* In later iterations if we already marked something, don't bother with it again */
+ continue;
+
+ if (found)
+ continue;
+
+ if (json_variant_equal(json_variant_by_index(a, i), key_b) &&
+ json_variant_equal(json_variant_by_index(a, i+1), json_variant_by_index(b, j+1))) {
+ /* Key and values match! */
+ key_b->is_marked = found = true;
+
+ /* In the first iteration we continue the inner loop since we want to mark
+ * everything, otherwise exit the loop quickly after we found what we were
+ * looking for. */
+ if (i != 0)
+ break;
+ }
+ }
+
+ if (!found)
+ return false;
+ }
+
+ return true;
+ }
+
+ default:
+ assert_not_reached("Unknown variant type.");
+ }
+}
+
+int json_variant_get_source(JsonVariant *v, const char **ret_source, unsigned *ret_line, unsigned *ret_column) {
+ assert_return(v, -EINVAL);
+
+ if (ret_source)
+ *ret_source = json_variant_is_regular(v) && v->source ? v->source->name : NULL;
+
+ if (ret_line)
+ *ret_line = json_variant_is_regular(v) ? v->line : 0;
+
+ if (ret_column)
+ *ret_column = json_variant_is_regular(v) ? v->column : 0;
+
+ return 0;
+}
+
+static int print_source(FILE *f, JsonVariant *v, JsonFormatFlags flags, bool whitespace) {
+ size_t w, k;
+
+ if (!FLAGS_SET(flags, JSON_FORMAT_SOURCE|JSON_FORMAT_PRETTY))
+ return 0;
+
+ if (!json_variant_is_regular(v))
+ return 0;
+
+ if (!v->source && v->line == 0 && v->column == 0)
+ return 0;
+
+ /* The max width we need to format the line numbers for this source file */
+ w = (v->source && v->source->max_line > 0) ?
+ DECIMAL_STR_WIDTH(v->source->max_line) :
+ DECIMAL_STR_MAX(unsigned)-1;
+ k = (v->source && v->source->max_column > 0) ?
+ DECIMAL_STR_WIDTH(v->source->max_column) :
+ DECIMAL_STR_MAX(unsigned) -1;
+
+ if (whitespace) {
+ size_t i, n;
+
+ n = 1 + (v->source ? strlen(v->source->name) : 0) +
+ ((v->source && (v->line > 0 || v->column > 0)) ? 1 : 0) +
+ (v->line > 0 ? w : 0) +
+ (((v->source || v->line > 0) && v->column > 0) ? 1 : 0) +
+ (v->column > 0 ? k : 0) +
+ 2;
+
+ for (i = 0; i < n; i++)
+ fputc(' ', f);
+ } else {
+ fputc('[', f);
+
+ if (v->source)
+ fputs(v->source->name, f);
+ if (v->source && (v->line > 0 || v->column > 0))
+ fputc(':', f);
+ if (v->line > 0)
+ fprintf(f, "%*u", (int) w, v->line);
+ if ((v->source || v->line > 0) || v->column > 0)
+ fputc(':', f);
+ if (v->column > 0)
+ fprintf(f, "%*u", (int) k, v->column);
+
+ fputc(']', f);
+ fputc(' ', f);
+ }
+
+ return 0;
+}
+
+static int json_format(FILE *f, JsonVariant *v, JsonFormatFlags flags, const char *prefix) {
+ int r;
+
+ assert(f);
+ assert(v);
+
+ switch (json_variant_type(v)) {
+
+ case JSON_VARIANT_REAL: {
+ locale_t loc;
+
+ loc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0);
+ if (loc == (locale_t) 0)
+ return -errno;
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT_BLUE, f);
+
+ fprintf(f, "%.*Le", DECIMAL_DIG, json_variant_real(v));
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+
+ freelocale(loc);
+ break;
+ }
+
+ case JSON_VARIANT_INTEGER:
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT_BLUE, f);
+
+ fprintf(f, "%" PRIdMAX, json_variant_integer(v));
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+ break;
+
+ case JSON_VARIANT_UNSIGNED:
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT_BLUE, f);
+
+ fprintf(f, "%" PRIuMAX, json_variant_unsigned(v));
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+ break;
+
+ case JSON_VARIANT_BOOLEAN:
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT, f);
+
+ if (json_variant_boolean(v))
+ fputs("true", f);
+ else
+ fputs("false", f);
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+
+ break;
+
+ case JSON_VARIANT_NULL:
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT, f);
+
+ fputs("null", f);
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+ break;
+
+ case JSON_VARIANT_STRING: {
+ const char *q;
+
+ fputc('"', f);
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_GREEN, f);
+
+ for (q = json_variant_string(v); *q; q++) {
+
+ switch (*q) {
+
+ case '"':
+ fputs("\\\"", f);
+ break;
+
+ case '\\':
+ fputs("\\\\", f);
+ break;
+
+ case '\b':
+ fputs("\\b", f);
+ break;
+
+ case '\f':
+ fputs("\\f", f);
+ break;
+
+ case '\n':
+ fputs("\\n", f);
+ break;
+
+ case '\r':
+ fputs("\\r", f);
+ break;
+
+ case '\t':
+ fputs("\\t", f);
+ break;
+
+ default:
+ if ((signed char) *q >= 0 && *q < ' ')
+ fprintf(f, "\\u%04x", *q);
+ else
+ fputc(*q, f);
+ break;
+ }
+ }
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+
+ fputc('"', f);
+ break;
+ }
+
+ case JSON_VARIANT_ARRAY: {
+ size_t i, n;
+
+ n = json_variant_elements(v);
+
+ if (n == 0)
+ fputs("[]", f);
+ else {
+ _cleanup_free_ char *joined = NULL;
+ const char *prefix2;
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ joined = strjoin(strempty(prefix), "\t");
+ if (!joined)
+ return -ENOMEM;
+
+ prefix2 = joined;
+ fputs("[\n", f);
+ } else {
+ prefix2 = strempty(prefix);
+ fputc('[', f);
+ }
+
+ for (i = 0; i < n; i++) {
+ JsonVariant *e;
+
+ assert_se(e = json_variant_by_index(v, i));
+
+ if (i > 0) {
+ if (flags & JSON_FORMAT_PRETTY)
+ fputs(",\n", f);
+ else
+ fputc(',', f);
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ print_source(f, e, flags, false);
+ fputs(prefix2, f);
+ }
+
+ r = json_format(f, e, flags, prefix2);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ fputc('\n', f);
+ print_source(f, v, flags, true);
+ fputs(strempty(prefix), f);
+ }
+
+ fputc(']', f);
+ }
+ break;
+ }
+
+ case JSON_VARIANT_OBJECT: {
+ size_t i, n;
+
+ n = json_variant_elements(v);
+
+ if (n == 0)
+ fputs("{}", f);
+ else {
+ _cleanup_free_ char *joined = NULL;
+ const char *prefix2;
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ joined = strjoin(strempty(prefix), "\t");
+ if (!joined)
+ return -ENOMEM;
+
+ prefix2 = joined;
+ fputs("{\n", f);
+ } else {
+ prefix2 = strempty(prefix);
+ fputc('{', f);
+ }
+
+ for (i = 0; i < n; i += 2) {
+ JsonVariant *e;
+
+ e = json_variant_by_index(v, i);
+
+ if (i > 0) {
+ if (flags & JSON_FORMAT_PRETTY)
+ fputs(",\n", f);
+ else
+ fputc(',', f);
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ print_source(f, e, flags, false);
+ fputs(prefix2, f);
+ }
+
+ r = json_format(f, e, flags, prefix2);
+ if (r < 0)
+ return r;
+
+ fputs(flags & JSON_FORMAT_PRETTY ? " : " : ":", f);
+
+ r = json_format(f, json_variant_by_index(v, i+1), flags, prefix2);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ fputc('\n', f);
+ print_source(f, v, flags, true);
+ fputs(strempty(prefix), f);
+ }
+
+ fputc('}', f);
+ }
+ break;
+ }
+
+ default:
+ assert_not_reached("Unexpected variant type.");
+ }
+
+ return 0;
+}
+
+int json_variant_format(JsonVariant *v, JsonFormatFlags flags, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t sz = 0;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ f = open_memstream(&s, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ json_variant_dump(v, flags, f, NULL);
+
+ r = fflush_and_check(f);
+ }
+ if (r < 0)
+ return r;
+
+ assert(s);
+ *ret = TAKE_PTR(s);
+
+ return (int) sz;
+}
+
+void json_variant_dump(JsonVariant *v, JsonFormatFlags flags, FILE *f, const char *prefix) {
+ if (!v)
+ return;
+
+ if (!f)
+ f = stdout;
+
+ print_source(f, v, flags, false);
+
+ if (((flags & (JSON_FORMAT_COLOR_AUTO|JSON_FORMAT_COLOR)) == JSON_FORMAT_COLOR_AUTO) && colors_enabled())
+ flags |= JSON_FORMAT_COLOR;
+
+ if (flags & JSON_FORMAT_SSE)
+ fputs("data: ", f);
+ if (flags & JSON_FORMAT_SEQ)
+ fputc('\x1e', f); /* ASCII Record Separator */
+
+ json_format(f, v, flags, prefix);
+
+ if (flags & (JSON_FORMAT_PRETTY|JSON_FORMAT_SEQ|JSON_FORMAT_SSE|JSON_FORMAT_NEWLINE))
+ fputc('\n', f);
+ if (flags & JSON_FORMAT_SSE)
+ fputc('\n', f); /* In case of SSE add a second newline */
+}
+
+static int json_variant_copy(JsonVariant **nv, JsonVariant *v) {
+ JsonVariantType t;
+ JsonVariant *c;
+ JsonValue value;
+ const void *source;
+ size_t k;
+
+ assert(nv);
+ assert(v);
+
+ /* Let's copy the simple types literally, and the larger types by references */
+ t = json_variant_type(v);
+ switch (t) {
+ case JSON_VARIANT_INTEGER:
+ k = sizeof(intmax_t);
+ value.integer = json_variant_integer(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_UNSIGNED:
+ k = sizeof(uintmax_t);
+ value.unsig = json_variant_unsigned(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_REAL:
+ k = sizeof(long double);
+ value.real = json_variant_real(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_BOOLEAN:
+ k = sizeof(bool);
+ value.boolean = json_variant_boolean(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_NULL:
+ k = 0;
+ source = NULL;
+ break;
+
+ case JSON_VARIANT_STRING:
+ source = json_variant_string(v);
+ k = strnlen(source, INLINE_STRING_MAX + 1);
+ if (k <= INLINE_STRING_MAX) {
+ k ++;
+ break;
+ }
+
+ _fallthrough_;
+
+ default:
+ /* Everything else copy by reference */
+
+ c = malloc0(offsetof(JsonVariant, reference) + sizeof(JsonVariant*));
+ if (!c)
+ return -ENOMEM;
+
+ c->n_ref = 1;
+ c->type = t;
+ c->is_reference = true;
+ c->reference = json_variant_ref(json_variant_normalize(v));
+
+ *nv = c;
+ return 0;
+ }
+
+ c = malloc0(offsetof(JsonVariant, value) + k);
+ if (!c)
+ return -ENOMEM;
+
+ c->n_ref = 1;
+ c->type = t;
+
+ memcpy_safe(&c->value, source, k);
+
+ *nv = c;
+ return 0;
+}
+
+static bool json_single_ref(JsonVariant *v) {
+
+ /* Checks whether the caller is the single owner of the object, i.e. can get away with changing it */
+
+ if (!json_variant_is_regular(v))
+ return false;
+
+ if (v->is_embedded)
+ return json_single_ref(v->parent);
+
+ assert(v->n_ref > 0);
+ return v->n_ref == 1;
+}
+
+static int json_variant_set_source(JsonVariant **v, JsonSource *source, unsigned line, unsigned column) {
+ JsonVariant *w;
+ int r;
+
+ assert(v);
+
+ /* Patch in source and line/column number. Tries to do this in-place if the caller is the sole referencer of
+ * the object. If not, allocates a new object, possibly a surrogate for the original one */
+
+ if (!*v)
+ return 0;
+
+ if (source && line > source->max_line)
+ source->max_line = line;
+ if (source && column > source->max_column)
+ source->max_column = column;
+
+ if (!json_variant_is_regular(*v)) {
+
+ if (!source && line == 0 && column == 0)
+ return 0;
+
+ } else {
+ if (json_source_equal((*v)->source, source) &&
+ (*v)->line == line &&
+ (*v)->column == column)
+ return 0;
+
+ if (json_single_ref(*v)) { /* Sole reference? */
+ json_source_unref((*v)->source);
+ (*v)->source = json_source_ref(source);
+ (*v)->line = line;
+ (*v)->column = column;
+ return 1;
+ }
+ }
+
+ r = json_variant_copy(&w, *v);
+ if (r < 0)
+ return r;
+
+ assert(json_variant_is_regular(w));
+ assert(!w->is_embedded);
+ assert(w->n_ref == 1);
+ assert(!w->source);
+
+ w->source = json_source_ref(source);
+ w->line = line;
+ w->column = column;
+
+ json_variant_unref(*v);
+ *v = w;
+
+ return 1;
+}
+
+static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, size_t n) {
+ assert(line);
+ assert(column);
+ assert(s || n == 0);
+
+ while (n > 0) {
+
+ if (*s == '\n') {
+ (*line)++;
+ *column = 1;
+ } else if ((signed char) *s >= 0 && *s < 127) /* Process ASCII chars quickly */
+ (*column)++;
+ else {
+ int w;
+
+ w = utf8_encoded_valid_unichar(s);
+ if (w < 0) /* count invalid unichars as normal characters */
+ w = 1;
+ else if ((size_t) w > n) /* never read more than the specified number of characters */
+ w = (int) n;
+
+ (*column)++;
+
+ s += w;
+ n -= w;
+ continue;
+ }
+
+ s++;
+ n--;
+ }
+}
+
+static int unhex_ucs2(const char *c, uint16_t *ret) {
+ int aa, bb, cc, dd;
+ uint16_t x;
+
+ assert(c);
+ assert(ret);
+
+ aa = unhexchar(c[0]);
+ if (aa < 0)
+ return -EINVAL;
+
+ bb = unhexchar(c[1]);
+ if (bb < 0)
+ return -EINVAL;
+
+ cc = unhexchar(c[2]);
+ if (cc < 0)
+ return -EINVAL;
+
+ dd = unhexchar(c[3]);
+ if (dd < 0)
+ return -EINVAL;
+
+ x = ((uint16_t) aa << 12) |
+ ((uint16_t) bb << 8) |
+ ((uint16_t) cc << 4) |
+ ((uint16_t) dd);
+
+ if (x <= 0)
+ return -EINVAL;
+
+ *ret = x;
+
+ return 0;
+}
+
+static int json_parse_string(const char **p, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t n = 0, allocated = 0;
+ const char *c;
+
+ assert(p);
+ assert(*p);
+ assert(ret);
+
+ c = *p;
+
+ if (*c != '"')
+ return -EINVAL;
+
+ c++;
+
+ for (;;) {
+ int len;
+
+ /* Check for EOF */
+ if (*c == 0)
+ return -EINVAL;
+
+ /* Check for control characters 0x00..0x1f */
+ if (*c > 0 && *c < ' ')
+ return -EINVAL;
+
+ /* Check for control character 0x7f */
+ if (*c == 0x7f)
+ return -EINVAL;
+
+ if (*c == '"') {
+ if (!s) {
+ s = strdup("");
+ if (!s)
+ return -ENOMEM;
+ } else
+ s[n] = 0;
+
+ *p = c + 1;
+
+ *ret = TAKE_PTR(s);
+ return JSON_TOKEN_STRING;
+ }
+
+ if (*c == '\\') {
+ char ch = 0;
+ c++;
+
+ if (*c == 0)
+ return -EINVAL;
+
+ if (IN_SET(*c, '"', '\\', '/'))
+ ch = *c;
+ else if (*c == 'b')
+ ch = '\b';
+ else if (*c == 'f')
+ ch = '\f';
+ else if (*c == 'n')
+ ch = '\n';
+ else if (*c == 'r')
+ ch = '\r';
+ else if (*c == 't')
+ ch = '\t';
+ else if (*c == 'u') {
+ char16_t x;
+ int r;
+
+ r = unhex_ucs2(c + 1, &x);
+ if (r < 0)
+ return r;
+
+ c += 5;
+
+ if (!GREEDY_REALLOC(s, allocated, n + 5))
+ return -ENOMEM;
+
+ if (!utf16_is_surrogate(x))
+ n += utf8_encode_unichar(s + n, (char32_t) x);
+ else if (utf16_is_trailing_surrogate(x))
+ return -EINVAL;
+ else {
+ char16_t y;
+
+ if (c[0] != '\\' || c[1] != 'u')
+ return -EINVAL;
+
+ r = unhex_ucs2(c + 2, &y);
+ if (r < 0)
+ return r;
+
+ c += 6;
+
+ if (!utf16_is_trailing_surrogate(y))
+ return -EINVAL;
+
+ n += utf8_encode_unichar(s + n, utf16_surrogate_pair_to_unichar(x, y));
+ }
+
+ continue;
+ } else
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(s, allocated, n + 2))
+ return -ENOMEM;
+
+ s[n++] = ch;
+ c ++;
+ continue;
+ }
+
+ len = utf8_encoded_valid_unichar(c);
+ if (len < 0)
+ return len;
+
+ if (!GREEDY_REALLOC(s, allocated, n + len + 1))
+ return -ENOMEM;
+
+ memcpy(s + n, c, len);
+ n += len;
+ c += len;
+ }
+}
+
+static int json_parse_number(const char **p, JsonValue *ret) {
+ bool negative = false, exponent_negative = false, is_real = false;
+ long double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0;
+ intmax_t i = 0;
+ uintmax_t u = 0;
+ const char *c;
+
+ assert(p);
+ assert(*p);
+ assert(ret);
+
+ c = *p;
+
+ if (*c == '-') {
+ negative = true;
+ c++;
+ }
+
+ if (*c == '0')
+ c++;
+ else {
+ if (!strchr("123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ if (!is_real) {
+ if (negative) {
+
+ if (i < INTMAX_MIN / 10) /* overflow */
+ is_real = true;
+ else {
+ intmax_t t = 10 * i;
+
+ if (t < INTMAX_MIN + (*c - '0')) /* overflow */
+ is_real = true;
+ else
+ i = t - (*c - '0');
+ }
+ } else {
+ if (u > UINTMAX_MAX / 10) /* overflow */
+ is_real = true;
+ else {
+ uintmax_t t = 10 * u;
+
+ if (t > UINTMAX_MAX - (*c - '0')) /* overflow */
+ is_real = true;
+ else
+ u = t + (*c - '0');
+ }
+ }
+ }
+
+ x = 10.0 * x + (*c - '0');
+
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ if (*c == '.') {
+ is_real = true;
+ c++;
+
+ if (!strchr("0123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ y = 10.0 * y + (*c - '0');
+ shift = 10.0 * shift;
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ if (IN_SET(*c, 'e', 'E')) {
+ is_real = true;
+ c++;
+
+ if (*c == '-') {
+ exponent_negative = true;
+ c++;
+ } else if (*c == '+')
+ c++;
+
+ if (!strchr("0123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ exponent = 10.0 * exponent + (*c - '0');
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ *p = c;
+
+ if (is_real) {
+ ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10l((exponent_negative ? -1.0 : 1.0) * exponent);
+ return JSON_TOKEN_REAL;
+ } else if (negative) {
+ ret->integer = i;
+ return JSON_TOKEN_INTEGER;
+ } else {
+ ret->unsig = u;
+ return JSON_TOKEN_UNSIGNED;
+ }
+}
+
+int json_tokenize(
+ const char **p,
+ char **ret_string,
+ JsonValue *ret_value,
+ unsigned *ret_line, /* 'ret_line' returns the line at the beginning of this token */
+ unsigned *ret_column,
+ void **state,
+ unsigned *line, /* 'line' is used as a line state, it always reflect the line we are at after the token was read */
+ unsigned *column) {
+
+ unsigned start_line, start_column;
+ const char *start, *c;
+ size_t n;
+ int t, r;
+
+ enum {
+ STATE_NULL,
+ STATE_VALUE,
+ STATE_VALUE_POST,
+ };
+
+ assert(p);
+ assert(*p);
+ assert(ret_string);
+ assert(ret_value);
+ assert(ret_line);
+ assert(ret_column);
+ assert(line);
+ assert(column);
+ assert(state);
+
+ t = PTR_TO_INT(*state);
+ if (t == STATE_NULL) {
+ *line = 1;
+ *column = 1;
+ t = STATE_VALUE;
+ }
+
+ /* Skip over the whitespace */
+ n = strspn(*p, WHITESPACE);
+ inc_lines_columns(line, column, *p, n);
+ c = *p + n;
+
+ /* Remember where we started processing this token */
+ start = c;
+ start_line = *line;
+ start_column = *column;
+
+ if (*c == 0) {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ r = JSON_TOKEN_END;
+ goto finish;
+ }
+
+ switch (t) {
+
+ case STATE_VALUE:
+
+ if (*c == '{') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_OBJECT_OPEN;
+ goto null_return;
+
+ } else if (*c == '}') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_OBJECT_CLOSE;
+ goto null_return;
+
+ } else if (*c == '[') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_ARRAY_OPEN;
+ goto null_return;
+
+ } else if (*c == ']') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_ARRAY_CLOSE;
+ goto null_return;
+
+ } else if (*c == '"') {
+
+ r = json_parse_string(&c, ret_string);
+ if (r < 0)
+ return r;
+
+ *ret_value = JSON_VALUE_NULL;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ goto finish;
+
+ } else if (strchr("-0123456789", *c)) {
+
+ r = json_parse_number(&c, ret_value);
+ if (r < 0)
+ return r;
+
+ *ret_string = NULL;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ goto finish;
+
+ } else if (startswith(c, "true")) {
+ *ret_string = NULL;
+ ret_value->boolean = true;
+ c += 4;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_BOOLEAN;
+ goto finish;
+
+ } else if (startswith(c, "false")) {
+ *ret_string = NULL;
+ ret_value->boolean = false;
+ c += 5;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_BOOLEAN;
+ goto finish;
+
+ } else if (startswith(c, "null")) {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ c += 4;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_NULL;
+ goto finish;
+
+ }
+
+ return -EINVAL;
+
+ case STATE_VALUE_POST:
+
+ if (*c == ':') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_COLON;
+ goto null_return;
+
+ } else if (*c == ',') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_COMMA;
+ goto null_return;
+
+ } else if (*c == '}') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_OBJECT_CLOSE;
+ goto null_return;
+
+ } else if (*c == ']') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_ARRAY_CLOSE;
+ goto null_return;
+ }
+
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unexpected tokenizer state");
+ }
+
+null_return:
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+
+finish:
+ inc_lines_columns(line, column, start, c - start);
+ *p = c;
+
+ *ret_line = start_line;
+ *ret_column = start_column;
+
+ return r;
+}
+
+typedef enum JsonExpect {
+ /* The following values are used by json_parse() */
+ EXPECT_TOPLEVEL,
+ EXPECT_END,
+ EXPECT_OBJECT_FIRST_KEY,
+ EXPECT_OBJECT_NEXT_KEY,
+ EXPECT_OBJECT_COLON,
+ EXPECT_OBJECT_VALUE,
+ EXPECT_OBJECT_COMMA,
+ EXPECT_ARRAY_FIRST_ELEMENT,
+ EXPECT_ARRAY_NEXT_ELEMENT,
+ EXPECT_ARRAY_COMMA,
+
+ /* And these are used by json_build() */
+ EXPECT_ARRAY_ELEMENT,
+ EXPECT_OBJECT_KEY,
+} JsonExpect;
+
+typedef struct JsonStack {
+ JsonExpect expect;
+ JsonVariant **elements;
+ size_t n_elements, n_elements_allocated;
+ unsigned line_before;
+ unsigned column_before;
+ size_t n_suppress; /* When building: if > 0, suppress this many subsequent elements. If == (size_t) -1, suppress all subsequent elements */
+} JsonStack;
+
+static void json_stack_release(JsonStack *s) {
+ assert(s);
+
+ json_variant_unref_many(s->elements, s->n_elements);
+ s->elements = mfree(s->elements);
+}
+
+static int json_parse_internal(
+ const char **input,
+ JsonSource *source,
+ JsonVariant **ret,
+ unsigned *line,
+ unsigned *column,
+ bool continue_end) {
+
+ size_t n_stack = 1, n_stack_allocated = 0, i;
+ unsigned line_buffer = 0, column_buffer = 0;
+ void *tokenizer_state = NULL;
+ JsonStack *stack = NULL;
+ const char *p;
+ int r;
+
+ assert_return(input, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ p = *input;
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack))
+ return -ENOMEM;
+
+ stack[0] = (JsonStack) {
+ .expect = EXPECT_TOPLEVEL,
+ };
+
+ if (!line)
+ line = &line_buffer;
+ if (!column)
+ column = &column_buffer;
+
+ for (;;) {
+ _cleanup_free_ char *string = NULL;
+ unsigned line_token, column_token;
+ JsonVariant *add = NULL;
+ JsonStack *current;
+ JsonValue value;
+ int token;
+
+ assert(n_stack > 0);
+ current = stack + n_stack - 1;
+
+ if (continue_end && current->expect == EXPECT_END)
+ goto done;
+
+ token = json_tokenize(&p, &string, &value, &line_token, &column_token, &tokenizer_state, line, column);
+ if (token < 0) {
+ r = token;
+ goto finish;
+ }
+
+ switch (token) {
+
+ case JSON_TOKEN_END:
+ if (current->expect != EXPECT_END) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(current->n_elements == 1);
+ assert(n_stack == 1);
+ goto done;
+
+ case JSON_TOKEN_COLON:
+
+ if (current->expect != EXPECT_OBJECT_COLON) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ current->expect = EXPECT_OBJECT_VALUE;
+ break;
+
+ case JSON_TOKEN_COMMA:
+
+ if (current->expect == EXPECT_OBJECT_COMMA)
+ current->expect = EXPECT_OBJECT_NEXT_KEY;
+ else if (current->expect == EXPECT_ARRAY_COMMA)
+ current->expect = EXPECT_ARRAY_NEXT_ELEMENT;
+ else {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ break;
+
+ case JSON_TOKEN_OBJECT_OPEN:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ /* Prepare the expect for when we return from the child */
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_OBJECT_FIRST_KEY,
+ .line_before = line_token,
+ .column_before = column_token,
+ };
+
+ current = stack + n_stack - 1;
+ break;
+
+ case JSON_TOKEN_OBJECT_CLOSE:
+ if (!IN_SET(current->expect, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_COMMA)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ r = json_variant_new_object(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+
+ line_token = current->line_before;
+ column_token = current->column_before;
+
+ json_stack_release(current);
+ n_stack--, current--;
+
+ break;
+
+ case JSON_TOKEN_ARRAY_OPEN:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ /* Prepare the expect for when we return from the child */
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_ARRAY_FIRST_ELEMENT,
+ .line_before = line_token,
+ .column_before = column_token,
+ };
+
+ break;
+
+ case JSON_TOKEN_ARRAY_CLOSE:
+ if (!IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_COMMA)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ r = json_variant_new_array(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+
+ line_token = current->line_before;
+ column_token = current->column_before;
+
+ json_stack_release(current);
+ n_stack--, current--;
+ break;
+
+ case JSON_TOKEN_STRING:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_NEXT_KEY, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_string(&add, string);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (IN_SET(current->expect, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_NEXT_KEY))
+ current->expect = EXPECT_OBJECT_COLON;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_REAL:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_real(&add, value.real);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_INTEGER:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_integer(&add, value.integer);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_UNSIGNED:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_unsigned(&add, value.unsig);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_BOOLEAN:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_boolean(&add, value.boolean);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_NULL:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_null(&add);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected token");
+ }
+
+ if (add) {
+ (void) json_variant_set_source(&add, source, line_token, column_token);
+
+ if (!GREEDY_REALLOC(current->elements, current->n_elements_allocated, current->n_elements + 1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ current->elements[current->n_elements++] = add;
+ }
+ }
+
+done:
+ assert(n_stack == 1);
+ assert(stack[0].n_elements == 1);
+
+ *ret = json_variant_ref(stack[0].elements[0]);
+ *input = p;
+ r = 0;
+
+finish:
+ for (i = 0; i < n_stack; i++)
+ json_stack_release(stack + i);
+
+ free(stack);
+
+ return r;
+}
+
+int json_parse(const char *input, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ return json_parse_internal(&input, NULL, ret, ret_line, ret_column, false);
+}
+
+int json_parse_continue(const char **p, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ return json_parse_internal(p, NULL, ret, ret_line, ret_column, true);
+}
+
+int json_parse_file(FILE *f, const char *path, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ _cleanup_(json_source_unrefp) JsonSource *source = NULL;
+ _cleanup_free_ char *text = NULL;
+ const char *p;
+ int r;
+
+ if (f)
+ r = read_full_stream(f, &text, NULL);
+ else if (path)
+ r = read_full_file(path, &text, NULL);
+ else
+ return -EINVAL;
+ if (r < 0)
+ return r;
+
+ if (path) {
+ source = json_source_new(path);
+ if (!source)
+ return -ENOMEM;
+ }
+
+ p = text;
+ return json_parse_internal(&p, source, ret, ret_line, ret_column, false);
+}
+
+int json_buildv(JsonVariant **ret, va_list ap) {
+ JsonStack *stack = NULL;
+ size_t n_stack = 1, n_stack_allocated = 0, i;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack))
+ return -ENOMEM;
+
+ stack[0] = (JsonStack) {
+ .expect = EXPECT_TOPLEVEL,
+ };
+
+ for (;;) {
+ _cleanup_(json_variant_unrefp) JsonVariant *add = NULL;
+ size_t n_subtract = 0; /* how much to subtract from current->n_suppress, i.e. how many elements would
+ * have been added to the current variant */
+ JsonStack *current;
+ int command;
+
+ assert(n_stack > 0);
+ current = stack + n_stack - 1;
+
+ if (current->expect == EXPECT_END)
+ goto done;
+
+ command = va_arg(ap, int);
+
+ switch (command) {
+
+ case _JSON_BUILD_STRING: {
+ const char *p;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ p = va_arg(ap, const char *);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_string(&add, p);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_INTEGER: {
+ intmax_t j;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ j = va_arg(ap, intmax_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_integer(&add, j);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_UNSIGNED: {
+ uintmax_t j;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ j = va_arg(ap, uintmax_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_unsigned(&add, j);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_REAL: {
+ long double d;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ d = va_arg(ap, long double);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_real(&add, d);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_BOOLEAN: {
+ bool b;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ b = va_arg(ap, int);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_boolean(&add, b);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_NULL:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_null(&add);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+
+ case _JSON_BUILD_VARIANT:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ /* Note that we don't care for current->n_suppress here, after all the variant is already
+ * allocated anyway... */
+ add = va_arg(ap, JsonVariant*);
+ if (!add)
+ add = JSON_VARIANT_MAGIC_NULL;
+ else
+ json_variant_ref(add);
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+
+ case _JSON_BUILD_LITERAL: {
+ const char *l;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ l = va_arg(ap, const char *);
+
+ if (l) {
+ /* Note that we don't care for current->n_suppress here, we should generate parsing
+ * errors even in suppressed object properties */
+
+ r = json_parse(l, &add, NULL, NULL);
+ if (r < 0)
+ goto finish;
+ } else
+ add = JSON_VARIANT_MAGIC_NULL;
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_ARRAY_BEGIN:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_ARRAY_ELEMENT,
+ .n_suppress = current->n_suppress != 0 ? (size_t) -1 : 0, /* if we shall suppress the
+ * new array, then we should
+ * also suppress all array
+ * members */
+ };
+
+ break;
+
+ case _JSON_BUILD_ARRAY_END:
+ if (current->expect != EXPECT_ARRAY_ELEMENT) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_array(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ json_stack_release(current);
+ n_stack--, current--;
+
+ break;
+
+ case _JSON_BUILD_STRV: {
+ char **l;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ l = va_arg(ap, char **);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_array_strv(&add, l);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_OBJECT_BEGIN:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_OBJECT_KEY,
+ .n_suppress = current->n_suppress != 0 ? (size_t) -1 : 0, /* if we shall suppress the
+ * new object, then we should
+ * also suppress all object
+ * members */
+ };
+
+ break;
+
+ case _JSON_BUILD_OBJECT_END:
+
+ if (current->expect != EXPECT_OBJECT_KEY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_object(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ json_stack_release(current);
+ n_stack--, current--;
+
+ break;
+
+ case _JSON_BUILD_PAIR: {
+ const char *n;
+
+ if (current->expect != EXPECT_OBJECT_KEY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ n = va_arg(ap, const char *);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_string(&add, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ current->expect = EXPECT_OBJECT_VALUE;
+ break;
+ }
+
+ case _JSON_BUILD_PAIR_CONDITION: {
+ const char *n;
+ bool b;
+
+ if (current->expect != EXPECT_OBJECT_KEY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ b = va_arg(ap, int);
+ n = va_arg(ap, const char *);
+
+ if (b && current->n_suppress == 0) {
+ r = json_variant_new_string(&add, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1; /* we generated one item */
+
+ if (!b && current->n_suppress != (size_t) -1)
+ current->n_suppress += 2; /* Suppress this one and the next item */
+
+ current->expect = EXPECT_OBJECT_VALUE;
+ break;
+ }}
+
+ /* If a variant was generated, add it to our current variant, but only if we are not supposed to suppress additions */
+ if (add && current->n_suppress == 0) {
+ if (!GREEDY_REALLOC(current->elements, current->n_elements_allocated, current->n_elements + 1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ current->elements[current->n_elements++] = TAKE_PTR(add);
+ }
+
+ /* If we are supposed to suppress items, let's subtract how many items where generated from that
+ * counter. Except if the counter is (size_t) -1, i.e. we shall suppress an infinite number of elements
+ * on this stack level */
+ if (current->n_suppress != (size_t) -1) {
+ if (current->n_suppress <= n_subtract) /* Saturated */
+ current->n_suppress = 0;
+ else
+ current->n_suppress -= n_subtract;
+ }
+ }
+
+done:
+ assert(n_stack == 1);
+ assert(stack[0].n_elements == 1);
+
+ *ret = json_variant_ref(stack[0].elements[0]);
+ r = 0;
+
+finish:
+ for (i = 0; i < n_stack; i++)
+ json_stack_release(stack + i);
+
+ free(stack);
+
+ va_end(ap);
+
+ return r;
+}
+
+int json_build(JsonVariant **ret, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, ret);
+ r = json_buildv(ret, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int json_log_internal(
+ JsonVariant *variant,
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ PROTECT_ERRNO;
+
+ unsigned source_line, source_column;
+ char buffer[LINE_MAX];
+ const char *source;
+ va_list ap;
+ int r;
+
+ errno = ERRNO_VALUE(error);
+
+ va_start(ap, format);
+ (void) vsnprintf(buffer, sizeof buffer, format, ap);
+ va_end(ap);
+
+ if (variant) {
+ r = json_variant_get_source(variant, &source, &source_line, &source_column);
+ if (r < 0)
+ return r;
+ } else {
+ source = NULL;
+ source_line = 0;
+ source_column = 0;
+ }
+
+ if (source && source_line > 0 && source_column > 0)
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ "CONFIG_FILE=%s", source,
+ "CONFIG_LINE=%u", source_line,
+ "CONFIG_COLUMN=%u", source_column,
+ LOG_MESSAGE("%s:%u: %s", source, line, buffer),
+ NULL);
+ else
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ LOG_MESSAGE("%s", buffer),
+ NULL);
+}
+
+int json_dispatch(JsonVariant *v, const JsonDispatch table[], JsonDispatchCallback bad, JsonDispatchFlags flags, void *userdata) {
+ const JsonDispatch *p;
+ size_t i, n, m;
+ int r, done = 0;
+ bool *found;
+
+ if (!json_variant_is_object(v)) {
+ json_log(v, flags, 0, "JSON variant is not an object.");
+
+ if (flags & JSON_PERMISSIVE)
+ return 0;
+
+ return -EINVAL;
+ }
+
+ for (p = table, m = 0; p->name; p++)
+ m++;
+
+ found = newa0(bool, m);
+
+ n = json_variant_elements(v);
+ for (i = 0; i < n; i += 2) {
+ JsonVariant *key, *value;
+
+ assert_se(key = json_variant_by_index(v, i));
+ assert_se(value = json_variant_by_index(v, i+1));
+
+ for (p = table; p->name; p++)
+ if (p->name == (const char*) -1 ||
+ streq_ptr(json_variant_string(key), p->name))
+ break;
+
+ if (p->name) { /* Found a matching entry! :-) */
+ JsonDispatchFlags merged_flags;
+
+ merged_flags = flags | p->flags;
+
+ if (p->type != _JSON_VARIANT_TYPE_INVALID &&
+ !json_variant_has_type(value, p->type)) {
+
+ json_log(value, merged_flags, 0,
+ "Object field '%s' has wrong type %s, expected %s.", json_variant_string(key),
+ json_variant_type_to_string(json_variant_type(value)), json_variant_type_to_string(p->type));
+
+ if (merged_flags & JSON_PERMISSIVE)
+ continue;
+
+ return -EINVAL;
+ }
+
+ if (found[p-table]) {
+ json_log(value, merged_flags, 0, "Duplicate object field '%s'.", json_variant_string(key));
+
+ if (merged_flags & JSON_PERMISSIVE)
+ continue;
+
+ return -ENOTUNIQ;
+ }
+
+ found[p-table] = true;
+
+ if (p->callback) {
+ r = p->callback(json_variant_string(key), value, merged_flags, (uint8_t*) userdata + p->offset);
+ if (r < 0) {
+ if (merged_flags & JSON_PERMISSIVE)
+ continue;
+
+ return r;
+ }
+ }
+
+ done ++;
+
+ } else { /* Didn't find a matching entry! :-( */
+
+ if (bad) {
+ r = bad(json_variant_string(key), value, flags, userdata);
+ if (r < 0) {
+ if (flags & JSON_PERMISSIVE)
+ continue;
+
+ return r;
+ } else
+ done ++;
+
+ } else {
+ json_log(value, flags, 0, "Unexpected object field '%s'.", json_variant_string(key));
+
+ if (flags & JSON_PERMISSIVE)
+ continue;
+
+ return -EADDRNOTAVAIL;
+ }
+ }
+ }
+
+ for (p = table; p->name; p++) {
+ JsonDispatchFlags merged_flags = p->flags | flags;
+
+ if ((merged_flags & JSON_MANDATORY) && !found[p-table]) {
+ json_log(v, merged_flags, 0, "Missing object field '%s'.", p->name);
+
+ if ((merged_flags & JSON_PERMISSIVE))
+ continue;
+
+ return -ENXIO;
+ }
+ }
+
+ return done;
+}
+
+int json_dispatch_boolean(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ bool *b = userdata;
+
+ assert(variant);
+ assert(b);
+
+ if (!json_variant_is_boolean(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not a boolean.", strna(name));
+ return -EINVAL;
+ }
+
+ *b = json_variant_boolean(variant);
+ return 0;
+}
+
+int json_dispatch_tristate(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ int *b = userdata;
+
+ assert(variant);
+ assert(b);
+
+ if (!json_variant_is_boolean(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not a boolean.", strna(name));
+ return -EINVAL;
+ }
+
+ *b = json_variant_boolean(variant);
+ return 0;
+}
+
+int json_dispatch_integer(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ intmax_t *i = userdata;
+
+ assert(variant);
+ assert(i);
+
+ if (!json_variant_is_integer(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not an integer.", strna(name));
+ return -EINVAL;
+ }
+
+ *i = json_variant_integer(variant);
+ return 0;
+}
+
+int json_dispatch_unsigned(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uintmax_t *u = userdata;
+
+ assert(variant);
+ assert(u);
+
+ if (!json_variant_is_unsigned(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not an unsigned integer.", strna(name));
+ return -EINVAL;
+ }
+
+ *u = json_variant_unsigned(variant);
+ return 0;
+}
+
+int json_dispatch_uint32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uint32_t *u = userdata;
+
+ assert(variant);
+ assert(u);
+
+ if (!json_variant_is_unsigned(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not an unsigned integer.", strna(name));
+ return -EINVAL;
+ }
+
+ if (json_variant_unsigned(variant) > UINT32_MAX) {
+ json_log(variant, flags, 0, "JSON field '%s' out of bounds.", strna(name));
+ return -ERANGE;
+ }
+
+ *u = (uint32_t) json_variant_unsigned(variant);
+ return 0;
+}
+
+int json_dispatch_int32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ int32_t *i = userdata;
+
+ assert(variant);
+ assert(i);
+
+ if (!json_variant_is_integer(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not an integer.", strna(name));
+ return -EINVAL;
+ }
+
+ if (json_variant_integer(variant) < INT32_MIN || json_variant_integer(variant) > INT32_MAX) {
+ json_log(variant, flags, 0, "JSON field '%s' out of bounds.", strna(name));
+ return -ERANGE;
+ }
+
+ *i = (int32_t) json_variant_integer(variant);
+ return 0;
+}
+
+int json_dispatch_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ int r;
+
+ assert(variant);
+ assert(s);
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not a string.", strna(name));
+ return -EINVAL;
+ }
+
+ r = free_and_strdup(s, json_variant_string(variant));
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+int json_dispatch_strv(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***s = userdata;
+ size_t i;
+ int r;
+
+ assert(variant);
+ assert(s);
+
+ if (json_variant_is_null(variant)) {
+ *s = strv_free(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_array(variant)) {
+ json_log(variant, 0, flags, "JSON field '%s' is not an array.", strna(name));
+ return -EINVAL;
+ }
+
+ for (i = 0; i < json_variant_elements(variant); i++) {
+ JsonVariant *e;
+
+ assert_se(e = json_variant_by_index(variant, i));
+
+ if (!json_variant_is_string(e)) {
+ json_log(e, 0, flags, "JSON array element is not a string.");
+ return -EINVAL;
+ }
+
+ r = strv_extend(&l, json_variant_string(e));
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to append array element: %m");
+ }
+
+ strv_free_and_replace(*s, l);
+ return 0;
+}
+
+int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ JsonVariant **p = userdata;
+
+ assert(variant);
+ assert(p);
+
+ json_variant_unref(*p);
+ *p = json_variant_ref(variant);
+
+ return 0;
+}
+
+static const char* const json_variant_type_table[_JSON_VARIANT_TYPE_MAX] = {
+ [JSON_VARIANT_STRING] = "string",
+ [JSON_VARIANT_INTEGER] = "integer",
+ [JSON_VARIANT_UNSIGNED] = "unsigned",
+ [JSON_VARIANT_REAL] = "real",
+ [JSON_VARIANT_NUMBER] = "number",
+ [JSON_VARIANT_BOOLEAN] = "boolean",
+ [JSON_VARIANT_ARRAY] = "array",
+ [JSON_VARIANT_OBJECT] = "object",
+ [JSON_VARIANT_NULL] = "null",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(json_variant_type, JsonVariantType);
diff --git a/src/shared/json.h b/src/shared/json.h
new file mode 100644
index 0000000..f8e035c
--- /dev/null
+++ b/src/shared/json.h
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "macro.h"
+#include "string-util.h"
+#include "util.h"
+
+/*
+ In case you wonder why we have our own JSON implementation, here are a couple of reasons why this implementation has
+ benefits over various other implementatins:
+
+ - We need support for 64bit signed and unsigned integers, i.e. the full 64,5bit range of -9223372036854775808…18446744073709551615
+ - All our variants are immutable after creation
+ - Special values such as true, false, zero, null, empty strings, empty array, empty objects require zero dynamic memory
+ - Progressive parsing
+ - Our integer/real type implicitly converts, but only if that's safe and loss-lessly possible
+ - There's a "builder" for putting together objects easily in varargs function calls
+ - There's a "dispatcher" for mapping objects to C data structures
+ - Every variant optionally carries parsing location information, which simplifies debugging and parse log error generation
+ - Formatter has color, line, column support
+
+ Limitations:
+ - Doesn't allow embedded NUL in strings
+ - Can't store integers outside of the -9223372036854775808…18446744073709551615 range (it will use 'long double' for
+ values outside this range, which is lossy)
+ - Can't store negative zero (will be treated identical to positive zero, and not retained across serialization)
+ - Can't store non-integer numbers that can't be stored in "long double" losslessly
+ - Allows creation and parsing of objects with duplicate keys. The "dispatcher" will refuse them however. This means
+ we can parse and pass around such objects, but will carefully refuse them when we convert them into our own data.
+
+ (These limitations should be pretty much in line with those of other JSON implementations, in fact might be less
+ limiting in most cases even.)
+*/
+
+typedef struct JsonVariant JsonVariant;
+
+typedef enum JsonVariantType {
+ JSON_VARIANT_STRING,
+ JSON_VARIANT_INTEGER,
+ JSON_VARIANT_UNSIGNED,
+ JSON_VARIANT_REAL,
+ JSON_VARIANT_NUMBER, /* This a pseudo-type: we can never create variants of this type, but we use it as wildcard check for the above three types */
+ JSON_VARIANT_BOOLEAN,
+ JSON_VARIANT_ARRAY,
+ JSON_VARIANT_OBJECT,
+ JSON_VARIANT_NULL,
+ _JSON_VARIANT_TYPE_MAX,
+ _JSON_VARIANT_TYPE_INVALID = -1
+} JsonVariantType;
+
+int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n);
+int json_variant_new_integer(JsonVariant **ret, intmax_t i);
+int json_variant_new_unsigned(JsonVariant **ret, uintmax_t u);
+int json_variant_new_real(JsonVariant **ret, long double d);
+int json_variant_new_boolean(JsonVariant **ret, bool b);
+int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n);
+int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n);
+int json_variant_new_array_strv(JsonVariant **ret, char **l);
+int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n);
+int json_variant_new_null(JsonVariant **ret);
+
+static inline int json_variant_new_string(JsonVariant **ret, const char *s) {
+ return json_variant_new_stringn(ret, s, strlen_ptr(s));
+}
+
+JsonVariant *json_variant_ref(JsonVariant *v);
+JsonVariant *json_variant_unref(JsonVariant *v);
+void json_variant_unref_many(JsonVariant **array, size_t n);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(JsonVariant *, json_variant_unref);
+
+const char *json_variant_string(JsonVariant *v);
+intmax_t json_variant_integer(JsonVariant *v);
+uintmax_t json_variant_unsigned(JsonVariant *v);
+long double json_variant_real(JsonVariant *v);
+bool json_variant_boolean(JsonVariant *v);
+
+JsonVariantType json_variant_type(JsonVariant *v);
+bool json_variant_has_type(JsonVariant *v, JsonVariantType type);
+
+static inline bool json_variant_is_string(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_STRING);
+}
+
+static inline bool json_variant_is_integer(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_INTEGER);
+}
+
+static inline bool json_variant_is_unsigned(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_UNSIGNED);
+}
+
+static inline bool json_variant_is_real(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_REAL);
+}
+
+static inline bool json_variant_is_number(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_NUMBER);
+}
+
+static inline bool json_variant_is_boolean(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_BOOLEAN);
+}
+
+static inline bool json_variant_is_array(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_ARRAY);
+}
+
+static inline bool json_variant_is_object(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_OBJECT);
+}
+
+static inline bool json_variant_is_null(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_NULL);
+}
+
+bool json_variant_is_negative(JsonVariant *v);
+
+size_t json_variant_elements(JsonVariant *v);
+JsonVariant *json_variant_by_index(JsonVariant *v, size_t index);
+JsonVariant *json_variant_by_key(JsonVariant *v, const char *key);
+JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVariant **ret_key);
+
+bool json_variant_equal(JsonVariant *a, JsonVariant *b);
+
+struct json_variant_foreach_state {
+ JsonVariant *variant;
+ size_t idx;
+};
+
+#define JSON_VARIANT_ARRAY_FOREACH(i, v) \
+ for (struct json_variant_foreach_state _state = { (v), 0 }; \
+ _state.idx < json_variant_elements(_state.variant) && \
+ ({ i = json_variant_by_index(_state.variant, _state.idx); \
+ true; }); \
+ _state.idx++)
+
+#define JSON_VARIANT_OBJECT_FOREACH(k, e, v) \
+ for (struct json_variant_foreach_state _state = { (v), 0 }; \
+ _state.idx < json_variant_elements(_state.variant) && \
+ ({ k = json_variant_by_index(_state.variant, _state.idx); \
+ e = json_variant_by_index(_state.variant, _state.idx + 1); \
+ true; }); \
+ _state.idx += 2)
+
+int json_variant_get_source(JsonVariant *v, const char **ret_source, unsigned *ret_line, unsigned *ret_column);
+
+typedef enum JsonFormatFlags {
+ JSON_FORMAT_NEWLINE = 1 << 0, /* suffix with newline */
+ JSON_FORMAT_PRETTY = 1 << 1, /* add internal whitespace to appeal to human readers */
+ JSON_FORMAT_COLOR = 1 << 2, /* insert ANSI color sequences */
+ JSON_FORMAT_COLOR_AUTO = 1 << 3, /* insert ANSI color sequences if colors_enabled() says so */
+ JSON_FORMAT_SOURCE = 1 << 4, /* prefix with source filename/line/column */
+ JSON_FORMAT_SSE = 1 << 5, /* prefix/suffix with W3C server-sent events */
+ JSON_FORMAT_SEQ = 1 << 6, /* prefix/suffix with RFC 7464 application/json-seq */
+} JsonFormatFlags;
+
+int json_variant_format(JsonVariant *v, JsonFormatFlags flags, char **ret);
+void json_variant_dump(JsonVariant *v, JsonFormatFlags flags, FILE *f, const char *prefix);
+
+int json_parse(const char *string, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column);
+int json_parse_continue(const char **p, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column);
+int json_parse_file(FILE *f, const char *path, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column);
+
+enum {
+ _JSON_BUILD_STRING,
+ _JSON_BUILD_INTEGER,
+ _JSON_BUILD_UNSIGNED,
+ _JSON_BUILD_REAL,
+ _JSON_BUILD_BOOLEAN,
+ _JSON_BUILD_ARRAY_BEGIN,
+ _JSON_BUILD_ARRAY_END,
+ _JSON_BUILD_OBJECT_BEGIN,
+ _JSON_BUILD_OBJECT_END,
+ _JSON_BUILD_PAIR,
+ _JSON_BUILD_PAIR_CONDITION,
+ _JSON_BUILD_NULL,
+ _JSON_BUILD_VARIANT,
+ _JSON_BUILD_LITERAL,
+ _JSON_BUILD_STRV,
+ _JSON_BUILD_MAX,
+};
+
+#define JSON_BUILD_STRING(s) _JSON_BUILD_STRING, ({ const char *_x = s; _x; })
+#define JSON_BUILD_INTEGER(i) _JSON_BUILD_INTEGER, ({ intmax_t _x = i; _x; })
+#define JSON_BUILD_UNSIGNED(u) _JSON_BUILD_UNSIGNED, ({ uintmax_t _x = u; _x; })
+#define JSON_BUILD_REAL(d) _JSON_BUILD_REAL, ({ long double _x = d; _x; })
+#define JSON_BUILD_BOOLEAN(b) _JSON_BUILD_BOOLEAN, ({ bool _x = b; _x; })
+#define JSON_BUILD_ARRAY(...) _JSON_BUILD_ARRAY_BEGIN, __VA_ARGS__, _JSON_BUILD_ARRAY_END
+#define JSON_BUILD_OBJECT(...) _JSON_BUILD_OBJECT_BEGIN, __VA_ARGS__, _JSON_BUILD_OBJECT_END
+#define JSON_BUILD_PAIR(n, ...) _JSON_BUILD_PAIR, ({ const char *_x = n; _x; }), __VA_ARGS__
+#define JSON_BUILD_PAIR_CONDITION(c, n, ...) _JSON_BUILD_PAIR_CONDITION, ({ bool _x = c; _x; }), ({ const char *_x = n; _x; }), __VA_ARGS__
+#define JSON_BUILD_NULL _JSON_BUILD_NULL
+#define JSON_BUILD_VARIANT(v) _JSON_BUILD_VARIANT, ({ JsonVariant *_x = v; _x; })
+#define JSON_BUILD_LITERAL(l) _JSON_BUILD_LITERAL, ({ const char *_x = l; _x; })
+#define JSON_BUILD_STRV(l) _JSON_BUILD_STRV, ({ char **_x = l; _x; })
+
+int json_build(JsonVariant **ret, ...);
+int json_buildv(JsonVariant **ret, va_list ap);
+
+/* A bitmask of flags used by the dispatch logic. Note that this is a combined bit mask, that is generated from the bit
+ * mask originally passed into json_dispatch(), the individual bitmask associated with the static JsonDispatch callout
+ * entry, as well the bitmask specified for json_log() calls */
+typedef enum JsonDispatchFlags {
+ /* The following three may be set in JsonDispatch's .flags field or the json_dispatch() flags parameter */
+ JSON_PERMISSIVE = 1 << 0, /* Shall parsing errors be considered fatal for this property? */
+ JSON_MANDATORY = 1 << 1, /* Should existance of this property be mandatory? */
+ JSON_LOG = 1 << 2, /* Should the parser log about errors? */
+
+ /* The following two may be passed into log_json() in addition to the three above */
+ JSON_DEBUG = 1 << 3, /* Indicates that this log message is a debug message */
+ JSON_WARNING = 1 << 4, /* Indicates that this log message is a warning message */
+} JsonDispatchFlags;
+
+typedef int (*JsonDispatchCallback)(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+
+typedef struct JsonDispatch {
+ const char *name;
+ JsonVariantType type;
+ JsonDispatchCallback callback;
+ size_t offset;
+ JsonDispatchFlags flags;
+} JsonDispatch;
+
+int json_dispatch(JsonVariant *v, const JsonDispatch table[], JsonDispatchCallback bad, JsonDispatchFlags flags, void *userdata);
+
+int json_dispatch_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_strv(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_boolean(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_tristate(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_integer(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_unsigned(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_uint32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_int32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+
+assert_cc(sizeof(uintmax_t) == sizeof(uint64_t))
+#define json_dispatch_uint64 json_dispatch_unsigned
+
+assert_cc(sizeof(intmax_t) == sizeof(int64_t))
+#define json_dispatch_int64 json_dispatch_integer
+
+static inline int json_dispatch_level(JsonDispatchFlags flags) {
+
+ /* Did the user request no logging? If so, then never log higher than LOG_DEBUG. Also, if this is marked as
+ * debug message, then also log at debug level. */
+
+ if (!(flags & JSON_LOG) ||
+ (flags & JSON_DEBUG))
+ return LOG_DEBUG;
+
+ /* Are we invoked in permissive mode, or is this explicitly marked as warning message? Then this should be
+ * printed at LOG_WARNING */
+ if (flags & (JSON_PERMISSIVE|JSON_WARNING))
+ return LOG_WARNING;
+
+ /* Otherwise it's an error. */
+ return LOG_ERR;
+}
+
+int json_log_internal(JsonVariant *variant, int level, int error, const char *file, int line, const char *func, const char *format, ...) _printf_(7, 8);
+
+#define json_log(variant, flags, error, ...) \
+ ({ \
+ int _level = json_dispatch_level(flags), _e = (error); \
+ (log_get_max_level() >= LOG_PRI(_level)) \
+ ? json_log_internal(variant, _level, _e, __FILE__, __LINE__, __func__, __VA_ARGS__) \
+ : -abs(_e); \
+ })
+
+#define JSON_VARIANT_STRING_CONST(x) _JSON_VARIANT_STRING_CONST(UNIQ, (x))
+
+#define _JSON_VARIANT_STRING_CONST(xq, x) \
+ ({ \
+ _align_(2) static const char UNIQ_T(json_string_const, xq)[] = (x); \
+ assert((((uintptr_t) UNIQ_T(json_string_const, xq)) & 1) == 0); \
+ (JsonVariant*) ((uintptr_t) UNIQ_T(json_string_const, xq) + 1); \
+ })
+
+const char *json_variant_type_to_string(JsonVariantType t);
+JsonVariantType json_variant_type_from_string(const char *s);
diff --git a/src/shared/libshared.sym b/src/shared/libshared.sym
new file mode 100644
index 0000000..6a7495a
--- /dev/null
+++ b/src/shared/libshared.sym
@@ -0,0 +1,3 @@
+SD_SHARED {
+ global: *;
+};
diff --git a/src/shared/linux-3.13/dm-ioctl.h b/src/shared/linux-3.13/dm-ioctl.h
new file mode 100644
index 0000000..c8a4302
--- /dev/null
+++ b/src/shared/linux-3.13/dm-ioctl.h
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
+ * Copyright (C) 2004 - 2009 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef _LINUX_DM_IOCTL_V4_H
+#define _LINUX_DM_IOCTL_V4_H
+
+#include <linux/types.h>
+
+#define DM_DIR "mapper" /* Slashes not supported */
+#define DM_CONTROL_NODE "control"
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
+#define DM_UUID_LEN 129
+
+/*
+ * A traditional ioctl interface for the device mapper.
+ *
+ * Each device can have two tables associated with it, an
+ * 'active' table which is the one currently used by io passing
+ * through the device, and an 'inactive' one which is a table
+ * that is being prepared as a replacement for the 'active' one.
+ *
+ * DM_VERSION:
+ * Just get the version information for the ioctl interface.
+ *
+ * DM_REMOVE_ALL:
+ * Remove all dm devices, destroy all tables. Only really used
+ * for debug.
+ *
+ * DM_LIST_DEVICES:
+ * Get a list of all the dm device names.
+ *
+ * DM_DEV_CREATE:
+ * Create a new device, neither the 'active' or 'inactive' table
+ * slots will be filled. The device will be in suspended state
+ * after creation, however any io to the device will get errored
+ * since it will be out-of-bounds.
+ *
+ * DM_DEV_REMOVE:
+ * Remove a device, destroy any tables.
+ *
+ * DM_DEV_RENAME:
+ * Rename a device or set its uuid if none was previously supplied.
+ *
+ * DM_SUSPEND:
+ * This performs both suspend and resume, depending which flag is
+ * passed in.
+ * Suspend: This command will not return until all pending io to
+ * the device has completed. Further io will be deferred until
+ * the device is resumed.
+ * Resume: It is no longer an error to issue this command on an
+ * unsuspended device. If a table is present in the 'inactive'
+ * slot, it will be moved to the active slot, then the old table
+ * from the active slot will be _destroyed_. Finally the device
+ * is resumed.
+ *
+ * DM_DEV_STATUS:
+ * Retrieves the status for the table in the 'active' slot.
+ *
+ * DM_DEV_WAIT:
+ * Wait for a significant event to occur to the device. This
+ * could either be caused by an event triggered by one of the
+ * targets of the table in the 'active' slot, or a table change.
+ *
+ * DM_TABLE_LOAD:
+ * Load a table into the 'inactive' slot for the device. The
+ * device does _not_ need to be suspended prior to this command.
+ *
+ * DM_TABLE_CLEAR:
+ * Destroy any table in the 'inactive' slot (ie. abort).
+ *
+ * DM_TABLE_DEPS:
+ * Return a set of device dependencies for the 'active' table.
+ *
+ * DM_TABLE_STATUS:
+ * Return the targets status for the 'active' table.
+ *
+ * DM_TARGET_MSG:
+ * Pass a message string to the target at a specific offset of a device.
+ *
+ * DM_DEV_SET_GEOMETRY:
+ * Set the geometry of a device by passing in a string in this format:
+ *
+ * "cylinders heads sectors_per_track start_sector"
+ *
+ * Beware that CHS geometry is nearly obsolete and only provided
+ * for compatibility with dm devices that can be booted by a PC
+ * BIOS. See struct hd_geometry for range limits. Also note that
+ * the geometry is erased if the device size changes.
+ */
+
+/*
+ * All ioctl arguments consist of a single chunk of memory, with
+ * this structure at the start. If a uuid is specified any
+ * lookup (eg. for a DM_INFO) will be done on that, *not* the
+ * name.
+ */
+struct dm_ioctl {
+ /*
+ * The version number is made up of three parts:
+ * major - no backward or forward compatibility,
+ * minor - only backwards compatible,
+ * patch - both backwards and forwards compatible.
+ *
+ * All clients of the ioctl interface should fill in the
+ * version number of the interface that they were
+ * compiled with.
+ *
+ * All recognised ioctl commands (ie. those that don't
+ * return -ENOTTY) fill out this field, even if the
+ * command failed.
+ */
+ __u32 version[3]; /* in/out */
+ __u32 data_size; /* total size of data passed in
+ * including this struct */
+
+ __u32 data_start; /* offset to start of data
+ * relative to start of this struct */
+
+ __u32 target_count; /* in/out */
+ __s32 open_count; /* out */
+ __u32 flags; /* in/out */
+
+ /*
+ * event_nr holds either the event number (input and output) or the
+ * udev cookie value (input only).
+ * The DM_DEV_WAIT ioctl takes an event number as input.
+ * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls
+ * use the field as a cookie to return in the DM_COOKIE
+ * variable with the uevents they issue.
+ * For output, the ioctls return the event number, not the cookie.
+ */
+ __u32 event_nr; /* in/out */
+ __u32 padding;
+
+ __u64 dev; /* in/out */
+
+ char name[DM_NAME_LEN]; /* device name */
+ char uuid[DM_UUID_LEN]; /* unique identifier for
+ * the block device */
+ char data[7]; /* padding or data */
+};
+
+/*
+ * Used to specify tables. These structures appear after the
+ * dm_ioctl.
+ */
+struct dm_target_spec {
+ __u64 sector_start;
+ __u64 length;
+ __s32 status; /* used when reading from kernel only */
+
+ /*
+ * Location of the next dm_target_spec.
+ * - When specifying targets on a DM_TABLE_LOAD command, this value is
+ * the number of bytes from the start of the "current" dm_target_spec
+ * to the start of the "next" dm_target_spec.
+ * - When retrieving targets on a DM_TABLE_STATUS command, this value
+ * is the number of bytes from the start of the first dm_target_spec
+ * (that follows the dm_ioctl struct) to the start of the "next"
+ * dm_target_spec.
+ */
+ __u32 next;
+
+ char target_type[DM_MAX_TYPE_NAME];
+
+ /*
+ * Parameter string starts immediately after this object.
+ * Be careful to add padding after string to ensure correct
+ * alignment of subsequent dm_target_spec.
+ */
+};
+
+/*
+ * Used to retrieve the target dependencies.
+ */
+struct dm_target_deps {
+ __u32 count; /* Array size */
+ __u32 padding; /* unused */
+ __u64 dev[0]; /* out */
+};
+
+/*
+ * Used to get a list of all dm devices.
+ */
+struct dm_name_list {
+ __u64 dev;
+ __u32 next; /* offset to the next record from
+ the _start_ of this */
+ char name[0];
+};
+
+/*
+ * Used to retrieve the target versions
+ */
+struct dm_target_versions {
+ __u32 next;
+ __u32 version[3];
+
+ char name[0];
+};
+
+/*
+ * Used to pass message to a target
+ */
+struct dm_target_msg {
+ __u64 sector; /* Device sector */
+
+ char message[0];
+};
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to dm-ioctl.c:lookup_ioctl()
+ */
+enum {
+ /* Top level cmds */
+ DM_VERSION_CMD = 0,
+ DM_REMOVE_ALL_CMD,
+ DM_LIST_DEVICES_CMD,
+
+ /* device level cmds */
+ DM_DEV_CREATE_CMD,
+ DM_DEV_REMOVE_CMD,
+ DM_DEV_RENAME_CMD,
+ DM_DEV_SUSPEND_CMD,
+ DM_DEV_STATUS_CMD,
+ DM_DEV_WAIT_CMD,
+
+ /* Table level cmds */
+ DM_TABLE_LOAD_CMD,
+ DM_TABLE_CLEAR_CMD,
+ DM_TABLE_DEPS_CMD,
+ DM_TABLE_STATUS_CMD,
+
+ /* Added later */
+ DM_LIST_VERSIONS_CMD,
+ DM_TARGET_MSG_CMD,
+ DM_DEV_SET_GEOMETRY_CMD
+};
+
+#define DM_IOCTL 0xfd
+
+#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
+#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+#define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
+
+#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
+#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
+#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
+#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
+#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
+#define DM_DEV_WAIT _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
+
+#define DM_TABLE_LOAD _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
+#define DM_TABLE_CLEAR _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
+#define DM_TABLE_DEPS _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
+#define DM_TABLE_STATUS _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
+
+#define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl)
+
+#define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl)
+#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
+
+#define DM_VERSION_MAJOR 4
+#define DM_VERSION_MINOR 27
+#define DM_VERSION_PATCHLEVEL 0
+#define DM_VERSION_EXTRA "-ioctl (2013-10-30)"
+
+/* Status bits */
+#define DM_READONLY_FLAG (1 << 0) /* In/Out */
+#define DM_SUSPEND_FLAG (1 << 1) /* In/Out */
+#define DM_PERSISTENT_DEV_FLAG (1 << 3) /* In */
+
+/*
+ * Flag passed into ioctl STATUS command to get table information
+ * rather than current status.
+ */
+#define DM_STATUS_TABLE_FLAG (1 << 4) /* In */
+
+/*
+ * Flags that indicate whether a table is present in either of
+ * the two table slots that a device has.
+ */
+#define DM_ACTIVE_PRESENT_FLAG (1 << 5) /* Out */
+#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */
+
+/*
+ * Indicates that the buffer passed in wasn't big enough for the
+ * results.
+ */
+#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */
+
+/*
+ * This flag is now ignored.
+ */
+#define DM_SKIP_BDGET_FLAG (1 << 9) /* In */
+
+/*
+ * Set this to avoid attempting to freeze any filesystem when suspending.
+ */
+#define DM_SKIP_LOCKFS_FLAG (1 << 10) /* In */
+
+/*
+ * Set this to suspend without flushing queued ios.
+ * Also disables flushing uncommitted changes in the thin target before
+ * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT.
+ */
+#define DM_NOFLUSH_FLAG (1 << 11) /* In */
+
+/*
+ * If set, any table information returned will relate to the inactive
+ * table instead of the live one. Always check DM_INACTIVE_PRESENT_FLAG
+ * is set before using the data returned.
+ */
+#define DM_QUERY_INACTIVE_TABLE_FLAG (1 << 12) /* In */
+
+/*
+ * If set, a uevent was generated for which the caller may need to wait.
+ */
+#define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */
+
+/*
+ * If set, rename changes the uuid not the name. Only permitted
+ * if no uuid was previously supplied: an existing uuid cannot be changed.
+ */
+#define DM_UUID_FLAG (1 << 14) /* In */
+
+/*
+ * If set, all buffers are wiped after use. Use when sending
+ * or requesting sensitive data such as an encryption key.
+ */
+#define DM_SECURE_DATA_FLAG (1 << 15) /* In */
+
+/*
+ * If set, a message generated output data.
+ */
+#define DM_DATA_OUT_FLAG (1 << 16) /* Out */
+
+/*
+ * If set with DM_DEV_REMOVE or DM_REMOVE_ALL this indicates that if
+ * the device cannot be removed immediately because it is still in use
+ * it should instead be scheduled for removal when it gets closed.
+ *
+ * On return from DM_DEV_REMOVE, DM_DEV_STATUS or other ioctls, this
+ * flag indicates that the device is scheduled to be removed when it
+ * gets closed.
+ */
+#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */
+
+#endif /* _LINUX_DM_IOCTL_H */
diff --git a/src/shared/linux/auto_dev-ioctl.h b/src/shared/linux/auto_dev-ioctl.h
new file mode 100644
index 0000000..d9838eb
--- /dev/null
+++ b/src/shared/linux/auto_dev-ioctl.h
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright © 2008 Red Hat, Inc. All rights reserved.
+ * Copyright © 2008 Ian Kent <raven@themaw.net>
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ */
+
+#ifndef _LINUX_AUTO_DEV_IOCTL_H
+#define _LINUX_AUTO_DEV_IOCTL_H
+
+#include <linux/auto_fs.h>
+
+#ifdef __KERNEL__
+#include <linux/string.h>
+#else
+#include <string.h>
+#endif /* __KERNEL__ */
+
+#define AUTOFS_DEVICE_NAME "autofs"
+
+#define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1
+#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0
+
+#define AUTOFS_DEVID_LEN 16
+
+#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
+
+/*
+ * An ioctl interface for autofs mount point control.
+ */
+
+struct args_protover {
+ __u32 version;
+};
+
+struct args_protosubver {
+ __u32 sub_version;
+};
+
+struct args_openmount {
+ __u32 devid;
+};
+
+struct args_ready {
+ __u32 token;
+};
+
+struct args_fail {
+ __u32 token;
+ __s32 status;
+};
+
+struct args_setpipefd {
+ __s32 pipefd;
+};
+
+struct args_timeout {
+ __u64 timeout;
+};
+
+struct args_requester {
+ __u32 uid;
+ __u32 gid;
+};
+
+struct args_expire {
+ __u32 how;
+};
+
+struct args_askumount {
+ __u32 may_umount;
+};
+
+struct args_ismountpoint {
+ union {
+ struct args_in {
+ __u32 type;
+ } in;
+ struct args_out {
+ __u32 devid;
+ __u32 magic;
+ } out;
+ };
+};
+
+/*
+ * All the ioctls use this structure.
+ * When sending a path size must account for the total length
+ * of the chunk of memory otherwise is is the size of the
+ * structure.
+ */
+
+struct autofs_dev_ioctl {
+ __u32 ver_major;
+ __u32 ver_minor;
+ __u32 size; /* total size of data passed in
+ * including this struct */
+ __s32 ioctlfd; /* automount command fd */
+
+ /* Command parameters */
+
+ union {
+ struct args_protover protover;
+ struct args_protosubver protosubver;
+ struct args_openmount openmount;
+ struct args_ready ready;
+ struct args_fail fail;
+ struct args_setpipefd setpipefd;
+ struct args_timeout timeout;
+ struct args_requester requester;
+ struct args_expire expire;
+ struct args_askumount askumount;
+ struct args_ismountpoint ismountpoint;
+ };
+
+ char path[0];
+};
+
+static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) {
+ memset(in, 0, sizeof(struct autofs_dev_ioctl));
+ in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
+ in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
+ in->size = sizeof(struct autofs_dev_ioctl);
+ in->ioctlfd = -1;
+ return;
+}
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to autofs-dev-ioctl.c:lookup_ioctl()
+ */
+enum {
+ /* Get various version info */
+ AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
+ AUTOFS_DEV_IOCTL_PROTOVER_CMD,
+ AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD,
+
+ /* Open mount ioctl fd */
+ AUTOFS_DEV_IOCTL_OPENMOUNT_CMD,
+
+ /* Close mount ioctl fd */
+ AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD,
+
+ /* Mount/expire status returns */
+ AUTOFS_DEV_IOCTL_READY_CMD,
+ AUTOFS_DEV_IOCTL_FAIL_CMD,
+
+ /* Activate/deactivate autofs mount */
+ AUTOFS_DEV_IOCTL_SETPIPEFD_CMD,
+ AUTOFS_DEV_IOCTL_CATATONIC_CMD,
+
+ /* Expiry timeout */
+ AUTOFS_DEV_IOCTL_TIMEOUT_CMD,
+
+ /* Get mount last requesting uid and gid */
+ AUTOFS_DEV_IOCTL_REQUESTER_CMD,
+
+ /* Check for eligible expire candidates */
+ AUTOFS_DEV_IOCTL_EXPIRE_CMD,
+
+ /* Request busy status */
+ AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD,
+
+ /* Check if path is a mountpoint */
+ AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
+};
+
+#define AUTOFS_IOCTL 0x93
+
+#define AUTOFS_DEV_IOCTL_VERSION \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOVER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOSUBVER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_OPENMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_READY \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_FAIL \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_SETPIPEFD \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CATATONIC \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_TIMEOUT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_REQUESTER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_EXPIRE \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ASKUMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl)
+
+#endif /* _LINUX_AUTO_DEV_IOCTL_H */
diff --git a/src/shared/linux/bpf.h b/src/shared/linux/bpf.h
new file mode 100644
index 0000000..1df9e7e
--- /dev/null
+++ b/src/shared/linux/bpf.h
@@ -0,0 +1,1109 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_H__
+#define _UAPI__LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64 0x07 /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW 0x18 /* double word (64-bit) */
+#define BPF_XADD 0xc0 /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV 0xb0 /* mov reg to reg */
+#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END 0xd0 /* flags for endianness conversion: */
+#define BPF_TO_LE 0x00 /* convert to little-endian */
+#define BPF_TO_BE 0x08 /* convert to big-endian */
+#define BPF_FROM_LE BPF_TO_LE
+#define BPF_FROM_BE BPF_TO_BE
+
+/* jmp encodings */
+#define BPF_JNE 0x50 /* jump != */
+#define BPF_JLT 0xa0 /* LT is unsigned, '<' */
+#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */
+#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
+#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
+#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
+#define BPF_CALL 0x80 /* function call */
+#define BPF_EXIT 0x90 /* function return */
+
+/* Register numbers */
+enum {
+ BPF_REG_0 = 0,
+ BPF_REG_1,
+ BPF_REG_2,
+ BPF_REG_3,
+ BPF_REG_4,
+ BPF_REG_5,
+ BPF_REG_6,
+ BPF_REG_7,
+ BPF_REG_8,
+ BPF_REG_9,
+ BPF_REG_10,
+ __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG __MAX_BPF_REG
+
+struct bpf_insn {
+ __u8 code; /* opcode */
+ __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ __s16 off; /* signed offset */
+ __s32 imm; /* signed immediate constant */
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+struct bpf_lpm_trie_key {
+ __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
+ __u8 data[0]; /* Arbitrary size */
+};
+
+/* BPF syscall commands, see bpf(2) man-page for details. */
+enum bpf_cmd {
+ BPF_MAP_CREATE,
+ BPF_MAP_LOOKUP_ELEM,
+ BPF_MAP_UPDATE_ELEM,
+ BPF_MAP_DELETE_ELEM,
+ BPF_MAP_GET_NEXT_KEY,
+ BPF_PROG_LOAD,
+ BPF_OBJ_PIN,
+ BPF_OBJ_GET,
+ BPF_PROG_ATTACH,
+ BPF_PROG_DETACH,
+ BPF_PROG_TEST_RUN,
+ BPF_PROG_GET_NEXT_ID,
+ BPF_MAP_GET_NEXT_ID,
+ BPF_PROG_GET_FD_BY_ID,
+ BPF_MAP_GET_FD_BY_ID,
+ BPF_OBJ_GET_INFO_BY_FD,
+ BPF_PROG_QUERY,
+};
+
+enum bpf_map_type {
+ BPF_MAP_TYPE_UNSPEC,
+ BPF_MAP_TYPE_HASH,
+ BPF_MAP_TYPE_ARRAY,
+ BPF_MAP_TYPE_PROG_ARRAY,
+ BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ BPF_MAP_TYPE_PERCPU_HASH,
+ BPF_MAP_TYPE_PERCPU_ARRAY,
+ BPF_MAP_TYPE_STACK_TRACE,
+ BPF_MAP_TYPE_CGROUP_ARRAY,
+ BPF_MAP_TYPE_LRU_HASH,
+ BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ BPF_MAP_TYPE_LPM_TRIE,
+ BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ BPF_MAP_TYPE_HASH_OF_MAPS,
+ BPF_MAP_TYPE_DEVMAP,
+ BPF_MAP_TYPE_SOCKMAP,
+ BPF_MAP_TYPE_CPUMAP,
+};
+
+enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+ BPF_PROG_TYPE_TRACEPOINT,
+ BPF_PROG_TYPE_XDP,
+ BPF_PROG_TYPE_PERF_EVENT,
+ BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_PROG_TYPE_LWT_IN,
+ BPF_PROG_TYPE_LWT_OUT,
+ BPF_PROG_TYPE_LWT_XMIT,
+ BPF_PROG_TYPE_SOCK_OPS,
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_CGROUP_DEVICE,
+};
+
+enum bpf_attach_type {
+ BPF_CGROUP_INET_INGRESS,
+ BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_SOCK_OPS,
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_DEVICE,
+ __MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
+ *
+ * NONE(default): No further bpf programs allowed in the subtree.
+ *
+ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
+ * the program in this cgroup yields to sub-cgroup program.
+ *
+ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
+ * that cgroup program gets run in addition to the program in this cgroup.
+ *
+ * Only one program is allowed to be attached to a cgroup with
+ * NONE or BPF_F_ALLOW_OVERRIDE flag.
+ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
+ * release old program and attach the new one. Attach flags has to match.
+ *
+ * Multiple programs are allowed to be attached to a cgroup with
+ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
+ * (those that were attached first, run first)
+ * The programs of sub-cgroup are executed first, then programs of
+ * this cgroup and then programs of parent cgroup.
+ * When children program makes decision (like picking TCP CA or sock bind)
+ * parent program has a chance to override it.
+ *
+ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
+ * A cgroup with NONE doesn't allow any programs in sub-cgroups.
+ * Ex1:
+ * cgrp1 (MULTI progs A, B) ->
+ * cgrp2 (OVERRIDE prog C) ->
+ * cgrp3 (MULTI prog D) ->
+ * cgrp4 (OVERRIDE prog E) ->
+ * cgrp5 (NONE prog F)
+ * the event in cgrp5 triggers execution of F,D,A,B in that order.
+ * if prog F is detached, the execution is E,D,A,B
+ * if prog F and D are detached, the execution is E,A,B
+ * if prog F, E and D are detached, the execution is C,A,B
+ *
+ * All eligible programs are executed regardless of return code from
+ * earlier programs.
+ */
+#define BPF_F_ALLOW_OVERRIDE (1U << 0)
+#define BPF_F_ALLOW_MULTI (1U << 1)
+
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT (1U << 0)
+
+/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
+#define BPF_PSEUDO_MAP_FD 1
+
+/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
+ * offset to another bpf function
+ */
+#define BPF_PSEUDO_CALL 1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY 0 /* create new element or update existing */
+#define BPF_NOEXIST 1 /* create new element if it didn't exist */
+#define BPF_EXIST 2 /* update existing element */
+
+/* flags for BPF_MAP_CREATE command */
+#define BPF_F_NO_PREALLOC (1U << 0)
+/* Instead of having one common LRU list in the
+ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
+ * which can scale and perform better.
+ * Note, the LRU nodes (including free nodes) cannot be moved
+ * across different LRU lists.
+ */
+#define BPF_F_NO_COMMON_LRU (1U << 1)
+/* Specify numa node during map creation */
+#define BPF_F_NUMA_NODE (1U << 2)
+
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+
+#define BPF_OBJ_NAME_LEN 16U
+
+/* Flags for accessing BPF object */
+#define BPF_F_RDONLY (1U << 3)
+#define BPF_F_WRONLY (1U << 4)
+
+union bpf_attr {
+ struct { /* anonymous struct used by BPF_MAP_CREATE command */
+ __u32 map_type; /* one of enum bpf_map_type */
+ __u32 key_size; /* size of key in bytes */
+ __u32 value_size; /* size of value in bytes */
+ __u32 max_entries; /* max number of entries in a map */
+ __u32 map_flags; /* BPF_MAP_CREATE related
+ * flags defined above.
+ */
+ __u32 inner_map_fd; /* fd pointing to the inner map */
+ __u32 numa_node; /* numa node (effective only if
+ * BPF_F_NUMA_NODE is set).
+ */
+ char map_name[BPF_OBJ_NAME_LEN];
+ __u32 map_ifindex; /* ifindex of netdev to create on */
+ };
+
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ __u32 map_fd;
+ __aligned_u64 key;
+ union {
+ __aligned_u64 value;
+ __aligned_u64 next_key;
+ };
+ __u64 flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_LOAD command */
+ __u32 prog_type; /* one of enum bpf_prog_type */
+ __u32 insn_cnt;
+ __aligned_u64 insns;
+ __aligned_u64 license;
+ __u32 log_level; /* verbosity level of verifier */
+ __u32 log_size; /* size of user buffer */
+ __aligned_u64 log_buf; /* user supplied buffer */
+ __u32 kern_version; /* checked when prog_type=kprobe */
+ __u32 prog_flags;
+ char prog_name[BPF_OBJ_NAME_LEN];
+ __u32 prog_ifindex; /* ifindex of netdev to prep for */
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_* commands */
+ __aligned_u64 pathname;
+ __u32 bpf_fd;
+ __u32 file_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+ __u32 target_fd; /* container object to attach to */
+ __u32 attach_bpf_fd; /* eBPF program to attach */
+ __u32 attach_type;
+ __u32 attach_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+ __u32 prog_fd;
+ __u32 retval;
+ __u32 data_size_in;
+ __u32 data_size_out;
+ __aligned_u64 data_in;
+ __aligned_u64 data_out;
+ __u32 repeat;
+ __u32 duration;
+ } test;
+
+ struct { /* anonymous struct used by BPF_*_GET_*_ID */
+ union {
+ __u32 start_id;
+ __u32 prog_id;
+ __u32 map_id;
+ };
+ __u32 next_id;
+ __u32 open_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
+ __u32 bpf_fd;
+ __u32 info_len;
+ __aligned_u64 info;
+ } info;
+
+ struct { /* anonymous struct used by BPF_PROG_QUERY command */
+ __u32 target_fd; /* container object to query */
+ __u32 attach_type;
+ __u32 query_flags;
+ __u32 attach_flags;
+ __aligned_u64 prog_ids;
+ __u32 prog_cnt;
+ } query;
+} __attribute__((aligned(8)));
+
+/* BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(&map, &key)
+ * Return: Map value or NULL
+ *
+ * int bpf_map_update_elem(&map, &key, &value, flags)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_map_delete_elem(&map, &key)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_probe_read(void *dst, int size, void *src)
+ * Return: 0 on success or negative error
+ *
+ * u64 bpf_ktime_get_ns(void)
+ * Return: current ktime
+ *
+ * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
+ * Return: length of buffer written or negative error
+ *
+ * u32 bpf_prandom_u32(void)
+ * Return: random value
+ *
+ * u32 bpf_raw_smp_processor_id(void)
+ * Return: SMP processor ID
+ *
+ * int bpf_skb_store_bytes(skb, offset, from, len, flags)
+ * store bytes into packet
+ * @skb: pointer to skb
+ * @offset: offset within packet from skb->mac_header
+ * @from: pointer where to copy bytes from
+ * @len: number of bytes to store into packet
+ * @flags: bit 0 - if true, recompute skb->csum
+ * other bits - reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_l3_csum_replace(skb, offset, from, to, flags)
+ * recompute IP checksum
+ * @skb: pointer to skb
+ * @offset: offset within packet where IP checksum is located
+ * @from: old value of header field
+ * @to: new value of header field
+ * @flags: bits 0-3 - size of header field
+ * other bits - reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_l4_csum_replace(skb, offset, from, to, flags)
+ * recompute TCP/UDP checksum
+ * @skb: pointer to skb
+ * @offset: offset within packet where TCP/UDP checksum is located
+ * @from: old value of header field
+ * @to: new value of header field
+ * @flags: bits 0-3 - size of header field
+ * bit 4 - is pseudo header
+ * other bits - reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_tail_call(ctx, prog_array_map, index)
+ * jump into another BPF program
+ * @ctx: context pointer passed to next program
+ * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
+ * @index: 32-bit index inside array that selects specific program to run
+ * Return: 0 on success or negative error
+ *
+ * int bpf_clone_redirect(skb, ifindex, flags)
+ * redirect to another netdev
+ * @skb: pointer to skb
+ * @ifindex: ifindex of the net device
+ * @flags: bit 0 - if set, redirect to ingress instead of egress
+ * other bits - reserved
+ * Return: 0 on success or negative error
+ *
+ * u64 bpf_get_current_pid_tgid(void)
+ * Return: current->tgid << 32 | current->pid
+ *
+ * u64 bpf_get_current_uid_gid(void)
+ * Return: current_gid << 32 | current_uid
+ *
+ * int bpf_get_current_comm(char *buf, int size_of_buf)
+ * stores current->comm into buf
+ * Return: 0 on success or negative error
+ *
+ * u32 bpf_get_cgroup_classid(skb)
+ * retrieve a proc's classid
+ * @skb: pointer to skb
+ * Return: classid if != 0
+ *
+ * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_vlan_pop(skb)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_get_tunnel_key(skb, key, size, flags)
+ * int bpf_skb_set_tunnel_key(skb, key, size, flags)
+ * retrieve or populate tunnel metadata
+ * @skb: pointer to skb
+ * @key: pointer to 'struct bpf_tunnel_key'
+ * @size: size of 'struct bpf_tunnel_key'
+ * @flags: room for future extensions
+ * Return: 0 on success or negative error
+ *
+ * u64 bpf_perf_event_read(map, flags)
+ * read perf event counter value
+ * @map: pointer to perf_event_array map
+ * @flags: index of event in the map or bitmask flags
+ * Return: value of perf event counter read or error code
+ *
+ * int bpf_redirect(ifindex, flags)
+ * redirect to another netdev
+ * @ifindex: ifindex of the net device
+ * @flags:
+ * cls_bpf:
+ * bit 0 - if set, redirect to ingress instead of egress
+ * other bits - reserved
+ * xdp_bpf:
+ * all bits - reserved
+ * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
+ * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
+ * int bpf_redirect_map(map, key, flags)
+ * redirect to endpoint in map
+ * @map: pointer to dev map
+ * @key: index in map to lookup
+ * @flags: --
+ * Return: XDP_REDIRECT on success or XDP_ABORT on error
+ *
+ * u32 bpf_get_route_realm(skb)
+ * retrieve a dst's tclassid
+ * @skb: pointer to skb
+ * Return: realm if != 0
+ *
+ * int bpf_perf_event_output(ctx, map, flags, data, size)
+ * output perf raw sample
+ * @ctx: struct pt_regs*
+ * @map: pointer to perf_event_array map
+ * @flags: index of event in the map or bitmask flags
+ * @data: data on stack to be output as raw data
+ * @size: size of data
+ * Return: 0 on success or negative error
+ *
+ * int bpf_get_stackid(ctx, map, flags)
+ * walk user or kernel stack and return id
+ * @ctx: struct pt_regs*
+ * @map: pointer to stack_trace map
+ * @flags: bits 0-7 - numer of stack frames to skip
+ * bit 8 - collect user stack instead of kernel
+ * bit 9 - compare stacks by hash only
+ * bit 10 - if two different stacks hash into the same stackid
+ * discard old
+ * other bits - reserved
+ * Return: >= 0 stackid on success or negative error
+ *
+ * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
+ * calculate csum diff
+ * @from: raw from buffer
+ * @from_size: length of from buffer
+ * @to: raw to buffer
+ * @to_size: length of to buffer
+ * @seed: optional seed
+ * Return: csum result or negative error code
+ *
+ * int bpf_skb_get_tunnel_opt(skb, opt, size)
+ * retrieve tunnel options metadata
+ * @skb: pointer to skb
+ * @opt: pointer to raw tunnel option data
+ * @size: size of @opt
+ * Return: option size
+ *
+ * int bpf_skb_set_tunnel_opt(skb, opt, size)
+ * populate tunnel options metadata
+ * @skb: pointer to skb
+ * @opt: pointer to raw tunnel option data
+ * @size: size of @opt
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_change_proto(skb, proto, flags)
+ * Change protocol of the skb. Currently supported is v4 -> v6,
+ * v6 -> v4 transitions. The helper will also resize the skb. eBPF
+ * program is expected to fill the new headers via skb_store_bytes
+ * and lX_csum_replace.
+ * @skb: pointer to skb
+ * @proto: new skb->protocol type
+ * @flags: reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_change_type(skb, type)
+ * Change packet type of skb.
+ * @skb: pointer to skb
+ * @type: new skb->pkt_type type
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_under_cgroup(skb, map, index)
+ * Check cgroup2 membership of skb
+ * @skb: pointer to skb
+ * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ * @index: index of the cgroup in the bpf_map
+ * Return:
+ * == 0 skb failed the cgroup2 descendant test
+ * == 1 skb succeeded the cgroup2 descendant test
+ * < 0 error
+ *
+ * u32 bpf_get_hash_recalc(skb)
+ * Retrieve and possibly recalculate skb->hash.
+ * @skb: pointer to skb
+ * Return: hash
+ *
+ * u64 bpf_get_current_task(void)
+ * Returns current task_struct
+ * Return: current
+ *
+ * int bpf_probe_write_user(void *dst, void *src, int len)
+ * safely attempt to write to a location
+ * @dst: destination address in userspace
+ * @src: source address on stack
+ * @len: number of bytes to copy
+ * Return: 0 on success or negative error
+ *
+ * int bpf_current_task_under_cgroup(map, index)
+ * Check cgroup2 membership of current task
+ * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ * @index: index of the cgroup in the bpf_map
+ * Return:
+ * == 0 current failed the cgroup2 descendant test
+ * == 1 current succeeded the cgroup2 descendant test
+ * < 0 error
+ *
+ * int bpf_skb_change_tail(skb, len, flags)
+ * The helper will resize the skb to the given new size, to be used f.e.
+ * with control messages.
+ * @skb: pointer to skb
+ * @len: new skb length
+ * @flags: reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_pull_data(skb, len)
+ * The helper will pull in non-linear data in case the skb is non-linear
+ * and not all of len are part of the linear section. Only needed for
+ * read/write with direct packet access.
+ * @skb: pointer to skb
+ * @len: len to make read/writeable
+ * Return: 0 on success or negative error
+ *
+ * s64 bpf_csum_update(skb, csum)
+ * Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
+ * @skb: pointer to skb
+ * @csum: csum to add
+ * Return: csum on success or negative error
+ *
+ * void bpf_set_hash_invalid(skb)
+ * Invalidate current skb->hash.
+ * @skb: pointer to skb
+ *
+ * int bpf_get_numa_node_id()
+ * Return: Id of current NUMA node.
+ *
+ * int bpf_skb_change_head()
+ * Grows headroom of skb and adjusts MAC header offset accordingly.
+ * Will extends/reallocae as required automatically.
+ * May change skb data pointer and will thus invalidate any check
+ * performed for direct packet access.
+ * @skb: pointer to skb
+ * @len: length of header to be pushed in front
+ * @flags: Flags (unused for now)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_xdp_adjust_head(xdp_md, delta)
+ * Adjust the xdp_md.data by delta
+ * @xdp_md: pointer to xdp_md
+ * @delta: An positive/negative integer to be added to xdp_md.data
+ * Return: 0 on success or negative on error
+ *
+ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * Copy a NUL terminated string from unsafe address. In case the string
+ * length is smaller than size, the target is not padded with further NUL
+ * bytes. In case the string length is larger than size, just count-1
+ * bytes are copied and the last byte is set to NUL.
+ * @dst: destination address
+ * @size: maximum number of bytes to copy, including the trailing NUL
+ * @unsafe_ptr: unsafe address
+ * Return:
+ * > 0 length of the string including the trailing NUL on success
+ * < 0 error
+ *
+ * u64 bpf_get_socket_cookie(skb)
+ * Get the cookie for the socket stored inside sk_buff.
+ * @skb: pointer to skb
+ * Return: 8 Bytes non-decreasing number on success or 0 if the socket
+ * field is missing inside sk_buff
+ *
+ * u32 bpf_get_socket_uid(skb)
+ * Get the owner uid of the socket stored inside sk_buff.
+ * @skb: pointer to skb
+ * Return: uid of the socket owner on success or overflowuid if failed.
+ *
+ * u32 bpf_set_hash(skb, hash)
+ * Set full skb->hash.
+ * @skb: pointer to skb
+ * @hash: hash to set
+ *
+ * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
+ * Calls setsockopt. Not all opts are available, only those with
+ * integer optvals plus TCP_CONGESTION.
+ * Supported levels: SOL_SOCKET and IPPROTO_TCP
+ * @bpf_socket: pointer to bpf_socket
+ * @level: SOL_SOCKET or IPPROTO_TCP
+ * @optname: option name
+ * @optval: pointer to option value
+ * @optlen: length of optval in bytes
+ * Return: 0 or negative error
+ *
+ * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
+ * Calls getsockopt. Not all opts are available.
+ * Supported levels: IPPROTO_TCP
+ * @bpf_socket: pointer to bpf_socket
+ * @level: IPPROTO_TCP
+ * @optname: option name
+ * @optval: pointer to option value
+ * @optlen: length of optval in bytes
+ * Return: 0 or negative error
+ *
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ * Set callback flags for sock_ops
+ * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ * @flags: flags value
+ * Return: 0 for no error
+ * -EINVAL if there is no full tcp socket
+ * bits in flags that are not supported by current kernel
+ *
+ * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
+ * Grow or shrink room in sk_buff.
+ * @skb: pointer to skb
+ * @len_diff: (signed) amount of room to grow/shrink
+ * @mode: operation mode (enum bpf_adj_room_mode)
+ * @flags: reserved for future use
+ * Return: 0 on success or negative error code
+ *
+ * int bpf_sk_redirect_map(map, key, flags)
+ * Redirect skb to a sock in map using key as a lookup key for the
+ * sock in map.
+ * @map: pointer to sockmap
+ * @key: key to lookup sock in map
+ * @flags: reserved for future use
+ * Return: SK_PASS
+ *
+ * int bpf_sock_map_update(skops, map, key, flags)
+ * @skops: pointer to bpf_sock_ops
+ * @map: pointer to sockmap to update
+ * @key: key to insert/update sock in map
+ * @flags: same flags as map update elem
+ *
+ * int bpf_xdp_adjust_meta(xdp_md, delta)
+ * Adjust the xdp_md.data_meta by delta
+ * @xdp_md: pointer to xdp_md
+ * @delta: An positive/negative integer to be added to xdp_md.data_meta
+ * Return: 0 on success or negative on error
+ *
+ * int bpf_perf_event_read_value(map, flags, buf, buf_size)
+ * read perf event counter value and perf event enabled/running time
+ * @map: pointer to perf_event_array map
+ * @flags: index of event in the map or bitmask flags
+ * @buf: buf to fill
+ * @buf_size: size of the buf
+ * Return: 0 on success or negative error code
+ *
+ * int bpf_perf_prog_read_value(ctx, buf, buf_size)
+ * read perf prog attached perf event counter and enabled/running time
+ * @ctx: pointer to ctx
+ * @buf: buf to fill
+ * @buf_size: size of the buf
+ * Return : 0 on success or negative error code
+ *
+ * int bpf_override_return(pt_regs, rc)
+ * @pt_regs: pointer to struct pt_regs
+ * @rc: the return value to set
+ */
+#define __BPF_FUNC_MAPPER(FN) \
+ FN(unspec), \
+ FN(map_lookup_elem), \
+ FN(map_update_elem), \
+ FN(map_delete_elem), \
+ FN(probe_read), \
+ FN(ktime_get_ns), \
+ FN(trace_printk), \
+ FN(get_prandom_u32), \
+ FN(get_smp_processor_id), \
+ FN(skb_store_bytes), \
+ FN(l3_csum_replace), \
+ FN(l4_csum_replace), \
+ FN(tail_call), \
+ FN(clone_redirect), \
+ FN(get_current_pid_tgid), \
+ FN(get_current_uid_gid), \
+ FN(get_current_comm), \
+ FN(get_cgroup_classid), \
+ FN(skb_vlan_push), \
+ FN(skb_vlan_pop), \
+ FN(skb_get_tunnel_key), \
+ FN(skb_set_tunnel_key), \
+ FN(perf_event_read), \
+ FN(redirect), \
+ FN(get_route_realm), \
+ FN(perf_event_output), \
+ FN(skb_load_bytes), \
+ FN(get_stackid), \
+ FN(csum_diff), \
+ FN(skb_get_tunnel_opt), \
+ FN(skb_set_tunnel_opt), \
+ FN(skb_change_proto), \
+ FN(skb_change_type), \
+ FN(skb_under_cgroup), \
+ FN(get_hash_recalc), \
+ FN(get_current_task), \
+ FN(probe_write_user), \
+ FN(current_task_under_cgroup), \
+ FN(skb_change_tail), \
+ FN(skb_pull_data), \
+ FN(csum_update), \
+ FN(set_hash_invalid), \
+ FN(get_numa_node_id), \
+ FN(skb_change_head), \
+ FN(xdp_adjust_head), \
+ FN(probe_read_str), \
+ FN(get_socket_cookie), \
+ FN(get_socket_uid), \
+ FN(set_hash), \
+ FN(setsockopt), \
+ FN(skb_adjust_room), \
+ FN(redirect_map), \
+ FN(sk_redirect_map), \
+ FN(sock_map_update), \
+ FN(xdp_adjust_meta), \
+ FN(perf_event_read_value), \
+ FN(perf_prog_read_value), \
+ FN(getsockopt), \
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set),
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+enum bpf_func_id {
+ __BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+ __BPF_FUNC_MAX_ID,
+};
+#undef __BPF_ENUM_FN
+
+/* All flags used by eBPF helper functions, placed here. */
+
+/* BPF_FUNC_skb_store_bytes flags. */
+#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
+#define BPF_F_INVALIDATE_HASH (1ULL << 1)
+
+/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+ * First 4 bits are for passing the header field size.
+ */
+#define BPF_F_HDR_FIELD_MASK 0xfULL
+
+/* BPF_FUNC_l4_csum_replace flags. */
+#define BPF_F_PSEUDO_HDR (1ULL << 4)
+#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
+#define BPF_F_MARK_ENFORCE (1ULL << 6)
+
+/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+#define BPF_F_INGRESS (1ULL << 0)
+
+/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
+
+/* BPF_FUNC_get_stackid flags. */
+#define BPF_F_SKIP_FIELD_MASK 0xffULL
+#define BPF_F_USER_STACK (1ULL << 8)
+#define BPF_F_FAST_STACK_CMP (1ULL << 9)
+#define BPF_F_REUSE_STACKID (1ULL << 10)
+
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
+#define BPF_F_DONT_FRAGMENT (1ULL << 2)
+
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
+ * BPF_FUNC_perf_event_read_value flags.
+ */
+#define BPF_F_INDEX_MASK 0xffffffffULL
+#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+ BPF_ADJ_ROOM_NET,
+};
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+ __u32 len;
+ __u32 pkt_type;
+ __u32 mark;
+ __u32 queue_mapping;
+ __u32 protocol;
+ __u32 vlan_present;
+ __u32 vlan_tci;
+ __u32 vlan_proto;
+ __u32 priority;
+ __u32 ingress_ifindex;
+ __u32 ifindex;
+ __u32 tc_index;
+ __u32 cb[5];
+ __u32 hash;
+ __u32 tc_classid;
+ __u32 data;
+ __u32 data_end;
+ __u32 napi_id;
+
+ /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ /* ... here. */
+
+ __u32 data_meta;
+};
+
+struct bpf_tunnel_key {
+ __u32 tunnel_id;
+ union {
+ __u32 remote_ipv4;
+ __u32 remote_ipv6[4];
+ };
+ __u8 tunnel_tos;
+ __u8 tunnel_ttl;
+ __u16 tunnel_ext;
+ __u32 tunnel_label;
+};
+
+/* Generic BPF return codes which all BPF program types may support.
+ * The values are binary compatible with their TC_ACT_* counter-part to
+ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+ * programs.
+ *
+ * XDP is handled seprately, see XDP_*.
+ */
+enum bpf_ret_code {
+ BPF_OK = 0,
+ /* 1 reserved */
+ BPF_DROP = 2,
+ /* 3-6 reserved */
+ BPF_REDIRECT = 7,
+ /* >127 are reserved for prog type specific return codes */
+};
+
+struct bpf_sock {
+ __u32 bound_dev_if;
+ __u32 family;
+ __u32 type;
+ __u32 protocol;
+ __u32 mark;
+ __u32 priority;
+};
+
+#define XDP_PACKET_HEADROOM 256
+
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
+ */
+enum xdp_action {
+ XDP_ABORTED = 0,
+ XDP_DROP,
+ XDP_PASS,
+ XDP_TX,
+ XDP_REDIRECT,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+ __u32 data;
+ __u32 data_end;
+ __u32 data_meta;
+ /* Below access go through struct xdp_rxq_info */
+ __u32 ingress_ifindex; /* rxq->dev->ifindex */
+ __u32 rx_queue_index; /* rxq->queue_index */
+};
+
+enum sk_action {
+ SK_DROP = 0,
+ SK_PASS,
+};
+
+#define BPF_TAG_SIZE 8
+
+struct bpf_prog_info {
+ __u32 type;
+ __u32 id;
+ __u8 tag[BPF_TAG_SIZE];
+ __u32 jited_prog_len;
+ __u32 xlated_prog_len;
+ __aligned_u64 jited_prog_insns;
+ __aligned_u64 xlated_prog_insns;
+ __u64 load_time; /* ns since boottime */
+ __u32 created_by_uid;
+ __u32 nr_map_ids;
+ __aligned_u64 map_ids;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u64 netns_dev;
+ __u64 netns_ino;
+} __attribute__((aligned(8)));
+
+struct bpf_map_info {
+ __u32 type;
+ __u32 id;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u64 netns_dev;
+ __u64 netns_ino;
+} __attribute__((aligned(8)));
+
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * Some of this fields are in network (bigendian) byte order and may need
+ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+ __u32 op;
+ union {
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
+ };
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ __u32 is_fullsock; /* Some TCP fields are only valid if
+ * there is a full socket. If not, the
+ * fields read as zero.
+ */
+ __u32 snd_cwnd;
+ __u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
+};
+
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+ BPF_SOCK_OPS_VOID,
+ BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or
+ * -1 if default value should be used
+ */
+ BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized
+ * window (in packets) or -1 if default
+ * value should be used
+ */
+ BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an
+ * active connection is initialized
+ */
+ BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an
+ * active connection is
+ * established
+ */
+ BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a
+ * passive connection is
+ * established
+ */
+ BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
+ * needs ECN
+ */
+ BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is
+ * based on the path and may be
+ * dependent on the congestion control
+ * algorithm. In general it indicates
+ * a congestion threshold. RTTs above
+ * this indicate congestion
+ */
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
+};
+
+#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+
+struct bpf_perf_event_value {
+ __u64 counter;
+ __u64 enabled;
+ __u64 running;
+};
+
+#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
+#define BPF_DEVCG_ACC_READ (1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
+
+#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
+
+struct bpf_cgroup_dev_ctx {
+ /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
+ __u32 access_type;
+ __u32 major;
+ __u32 minor;
+};
+
+#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/src/shared/linux/bpf_common.h b/src/shared/linux/bpf_common.h
new file mode 100644
index 0000000..afe7433
--- /dev/null
+++ b/src/shared/linux/bpf_common.h
@@ -0,0 +1,55 @@
+#ifndef __LINUX_BPF_COMMON_H__
+#define __LINUX_BPF_COMMON_H__
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00
+#define BPF_H 0x08
+#define BPF_B 0x10
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_MOD 0x90
+#define BPF_XOR 0xa0
+
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+#ifndef BPF_MAXINSNS
+#define BPF_MAXINSNS 4096
+#endif
+
+#endif /* __LINUX_BPF_COMMON_H__ */
diff --git a/src/shared/linux/libbpf.h b/src/shared/linux/libbpf.h
new file mode 100644
index 0000000..391eee5
--- /dev/null
+++ b/src/shared/linux/libbpf.h
@@ -0,0 +1,207 @@
+/* eBPF mini library */
+#ifndef __LIBBPF_H
+#define __LIBBPF_H
+
+#include <linux/bpf.h>
+
+struct bpf_insn;
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_MOV32_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM) \
+ BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = (__u32) (IMM) }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64) (IMM)) >> 32 })
+
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD 1
+#endif
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Unconditional jumps */
+
+#define BPF_JMP_A(OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_JA, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = CODE, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
+#endif
diff --git a/src/shared/lockfile-util.c b/src/shared/lockfile-util.c
new file mode 100644
index 0000000..260c208
--- /dev/null
+++ b/src/shared/lockfile-util.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "lockfile-util.h"
+#include "macro.h"
+#include "missing_fcntl.h"
+#include "path-util.h"
+
+int make_lock_file(const char *p, int operation, LockFile *ret) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ /*
+ * We use UNPOSIX locks if they are available. They have nice
+ * semantics, and are mostly compatible with NFS. However,
+ * they are only available on new kernels. When we detect we
+ * are running on an older kernel, then we fall back to good
+ * old BSD locks. They also have nice semantics, but are
+ * slightly problematic on NFS, where they are upgraded to
+ * POSIX locks, even though locally they are orthogonal to
+ * POSIX locks.
+ */
+
+ t = strdup(p);
+ if (!t)
+ return -ENOMEM;
+
+ for (;;) {
+ struct flock fl = {
+ .l_type = (operation & ~LOCK_NB) == LOCK_EX ? F_WRLCK : F_RDLCK,
+ .l_whence = SEEK_SET,
+ };
+ struct stat st;
+
+ fd = open(p, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600);
+ if (fd < 0)
+ return -errno;
+
+ r = fcntl(fd, (operation & LOCK_NB) ? F_OFD_SETLK : F_OFD_SETLKW, &fl);
+ if (r < 0) {
+
+ /* If the kernel is too old, use good old BSD locks */
+ if (errno == EINVAL)
+ r = flock(fd, operation);
+
+ if (r < 0)
+ return errno == EAGAIN ? -EBUSY : -errno;
+ }
+
+ /* If we acquired the lock, let's check if the file
+ * still exists in the file system. If not, then the
+ * previous exclusive owner removed it and then closed
+ * it. In such a case our acquired lock is worthless,
+ * hence try again. */
+
+ r = fstat(fd, &st);
+ if (r < 0)
+ return -errno;
+ if (st.st_nlink > 0)
+ break;
+
+ fd = safe_close(fd);
+ }
+
+ ret->path = t;
+ ret->fd = fd;
+ ret->operation = operation;
+
+ fd = -1;
+ t = NULL;
+
+ return r;
+}
+
+int make_lock_file_for(const char *p, int operation, LockFile *ret) {
+ const char *fn;
+ char *t;
+
+ assert(p);
+ assert(ret);
+
+ fn = basename(p);
+ if (!filename_is_valid(fn))
+ return -EINVAL;
+
+ t = newa(char, strlen(p) + 2 + 4 + 1);
+ stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), fn), ".lck");
+
+ return make_lock_file(t, operation, ret);
+}
+
+void release_lock_file(LockFile *f) {
+ int r;
+
+ if (!f)
+ return;
+
+ if (f->path) {
+
+ /* If we are the exclusive owner we can safely delete
+ * the lock file itself. If we are not the exclusive
+ * owner, we can try becoming it. */
+
+ if (f->fd >= 0 &&
+ (f->operation & ~LOCK_NB) == LOCK_SH) {
+ static const struct flock fl = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ };
+
+ r = fcntl(f->fd, F_OFD_SETLK, &fl);
+ if (r < 0 && errno == EINVAL)
+ r = flock(f->fd, LOCK_EX|LOCK_NB);
+
+ if (r >= 0)
+ f->operation = LOCK_EX|LOCK_NB;
+ }
+
+ if ((f->operation & ~LOCK_NB) == LOCK_EX)
+ unlink_noerrno(f->path);
+
+ f->path = mfree(f->path);
+ }
+
+ f->fd = safe_close(f->fd);
+ f->operation = 0;
+}
diff --git a/src/shared/lockfile-util.h b/src/shared/lockfile-util.h
new file mode 100644
index 0000000..e0eef34
--- /dev/null
+++ b/src/shared/lockfile-util.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct LockFile {
+ char *path;
+ int fd;
+ int operation;
+} LockFile;
+
+int make_lock_file(const char *p, int operation, LockFile *ret);
+int make_lock_file_for(const char *p, int operation, LockFile *ret);
+void release_lock_file(LockFile *f);
+
+#define LOCK_FILE_INIT { .fd = -1, .path = NULL }
diff --git a/src/shared/logs-show.c b/src/shared/logs-show.c
new file mode 100644
index 0000000..15ef0f1
--- /dev/null
+++ b/src/shared/logs-show.c
@@ -0,0 +1,1462 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <syslog.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "io-util.h"
+#include "journal-internal.h"
+#include "json.h"
+#include "log.h"
+#include "logs-show.h"
+#include "macro.h"
+#include "output-mode.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "sparse-endian.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+
+/* up to three lines (each up to 100 characters) or 300 characters, whichever is less */
+#define PRINT_LINE_THRESHOLD 3
+#define PRINT_CHAR_THRESHOLD 300
+
+#define JSON_THRESHOLD 4096U
+
+static int print_catalog(FILE *f, sd_journal *j) {
+ int r;
+ _cleanup_free_ char *t = NULL, *z = NULL;
+
+ r = sd_journal_get_catalog(j, &t);
+ if (r < 0)
+ return r;
+
+ z = strreplace(strstrip(t), "\n", "\n-- ");
+ if (!z)
+ return log_oom();
+
+ fputs("-- ", f);
+ fputs(z, f);
+ fputc('\n', f);
+
+ return 0;
+}
+
+static int parse_field(const void *data, size_t length, const char *field, size_t field_len, char **target, size_t *target_len) {
+ size_t nl;
+ char *buf;
+
+ assert(data);
+ assert(field);
+ assert(target);
+
+ if (length < field_len)
+ return 0;
+
+ if (memcmp(data, field, field_len))
+ return 0;
+
+ nl = length - field_len;
+
+ buf = newdup_suffix0(char, (const char*) data + field_len, nl);
+ if (!buf)
+ return log_oom();
+
+ free(*target);
+ *target = buf;
+
+ if (target_len)
+ *target_len = nl;
+
+ return 1;
+}
+
+typedef struct ParseFieldVec {
+ const char *field;
+ size_t field_len;
+ char **target;
+ size_t *target_len;
+} ParseFieldVec;
+
+#define PARSE_FIELD_VEC_ENTRY(_field, _target, _target_len) \
+ { .field = _field, .field_len = strlen(_field), .target = _target, .target_len = _target_len }
+
+static int parse_fieldv(const void *data, size_t length, const ParseFieldVec *fields, unsigned n_fields) {
+ unsigned i;
+
+ for (i = 0; i < n_fields; i++) {
+ const ParseFieldVec *f = &fields[i];
+ int r;
+
+ r = parse_field(data, length, f->field, f->field_len, f->target, f->target_len);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ break;
+ }
+
+ return 0;
+}
+
+static int field_set_test(Set *fields, const char *name, size_t n) {
+ char *s = NULL;
+
+ if (!fields)
+ return 1;
+
+ s = strndupa(name, n);
+ if (!s)
+ return log_oom();
+
+ return set_get(fields, s) ? 1 : 0;
+}
+
+static bool shall_print(const char *p, size_t l, OutputFlags flags) {
+ assert(p);
+
+ if (flags & OUTPUT_SHOW_ALL)
+ return true;
+
+ if (l >= PRINT_CHAR_THRESHOLD)
+ return false;
+
+ if (!utf8_is_printable(p, l))
+ return false;
+
+ return true;
+}
+
+static bool print_multiline(
+ FILE *f,
+ unsigned prefix,
+ unsigned n_columns,
+ OutputFlags flags,
+ int priority,
+ const char* message,
+ size_t message_len,
+ size_t highlight[2]) {
+
+ const char *color_on = "", *color_off = "", *highlight_on = "";
+ const char *pos, *end;
+ bool ellipsized = false;
+ int line = 0;
+
+ if (flags & OUTPUT_COLOR) {
+ if (priority <= LOG_ERR) {
+ color_on = ANSI_HIGHLIGHT_RED;
+ color_off = ANSI_NORMAL;
+ highlight_on = ANSI_HIGHLIGHT;
+ } else if (priority <= LOG_NOTICE) {
+ color_on = ANSI_HIGHLIGHT;
+ color_off = ANSI_NORMAL;
+ highlight_on = ANSI_HIGHLIGHT_RED;
+ } else if (priority >= LOG_DEBUG) {
+ color_on = ANSI_GREY;
+ color_off = ANSI_NORMAL;
+ highlight_on = ANSI_HIGHLIGHT_RED;
+ }
+ }
+
+ /* A special case: make sure that we print a newline when
+ the message is empty. */
+ if (message_len == 0)
+ fputs("\n", f);
+
+ for (pos = message;
+ pos < message + message_len;
+ pos = end + 1, line++) {
+ bool continuation = line > 0;
+ bool tail_line;
+ int len;
+ for (end = pos; end < message + message_len && *end != '\n'; end++)
+ ;
+ len = end - pos;
+ assert(len >= 0);
+
+ /* We need to figure out when we are showing not-last line, *and*
+ * will skip subsequent lines. In that case, we will put the dots
+ * at the end of the line, instead of putting dots in the middle
+ * or not at all.
+ */
+ tail_line =
+ line + 1 == PRINT_LINE_THRESHOLD ||
+ end + 1 >= message + PRINT_CHAR_THRESHOLD;
+
+ if (flags & (OUTPUT_FULL_WIDTH | OUTPUT_SHOW_ALL) ||
+ (prefix + len + 1 < n_columns && !tail_line)) {
+ if (highlight &&
+ (size_t) (pos - message) <= highlight[0] &&
+ highlight[0] < (size_t) len) {
+
+ fprintf(f, "%*s%s%.*s",
+ continuation * prefix, "",
+ color_on, (int) highlight[0], pos);
+ fprintf(f, "%s%.*s",
+ highlight_on,
+ (int) (MIN((size_t) len, highlight[1]) - highlight[0]),
+ pos + highlight[0]);
+ if ((size_t) len > highlight[1])
+ fprintf(f, "%s%.*s",
+ color_on,
+ (int) (len - highlight[1]),
+ pos + highlight[1]);
+ fprintf(f, "%s\n", color_off);
+
+ } else
+ fprintf(f, "%*s%s%.*s%s\n",
+ continuation * prefix, "",
+ color_on, len, pos, color_off);
+ continue;
+ }
+
+ /* Beyond this point, ellipsization will happen. */
+ ellipsized = true;
+
+ if (prefix < n_columns && n_columns - prefix >= 3) {
+ if (n_columns - prefix > (unsigned) len + 3)
+ fprintf(f, "%*s%s%.*s...%s\n",
+ continuation * prefix, "",
+ color_on, len, pos, color_off);
+ else {
+ _cleanup_free_ char *e;
+
+ e = ellipsize_mem(pos, len, n_columns - prefix,
+ tail_line ? 100 : 90);
+ if (!e)
+ fprintf(f, "%*s%s%.*s%s\n",
+ continuation * prefix, "",
+ color_on, len, pos, color_off);
+ else
+ fprintf(f, "%*s%s%s%s\n",
+ continuation * prefix, "",
+ color_on, e, color_off);
+ }
+ } else
+ fputs("...\n", f);
+
+ if (tail_line)
+ break;
+ }
+
+ return ellipsized;
+}
+
+static int output_timestamp_monotonic(FILE *f, sd_journal *j, const char *monotonic) {
+ sd_id128_t boot_id;
+ uint64_t t;
+ int r;
+
+ assert(f);
+ assert(j);
+
+ r = -ENXIO;
+ if (monotonic)
+ r = safe_atou64(monotonic, &t);
+ if (r < 0)
+ r = sd_journal_get_monotonic_usec(j, &t, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ fprintf(f, "[%5"PRI_USEC".%06"PRI_USEC"]", t / USEC_PER_SEC, t % USEC_PER_SEC);
+ return 1 + 5 + 1 + 6 + 1;
+}
+
+static int output_timestamp_realtime(FILE *f, sd_journal *j, OutputMode mode, OutputFlags flags, const char *realtime) {
+ char buf[MAX(FORMAT_TIMESTAMP_MAX, 64)];
+ struct tm *(*gettime_r)(const time_t *, struct tm *);
+ struct tm tm;
+ uint64_t x;
+ time_t t;
+ int r;
+
+ assert(f);
+ assert(j);
+
+ if (realtime)
+ r = safe_atou64(realtime, &x);
+ if (!realtime || r < 0 || !VALID_REALTIME(x))
+ r = sd_journal_get_realtime_usec(j, &x);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ if (IN_SET(mode, OUTPUT_SHORT_FULL, OUTPUT_WITH_UNIT)) {
+ const char *k;
+
+ if (flags & OUTPUT_UTC)
+ k = format_timestamp_utc(buf, sizeof(buf), x);
+ else
+ k = format_timestamp(buf, sizeof(buf), x);
+ if (!k)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format timestamp: %" PRIu64, x);
+
+ } else {
+ char usec[7];
+
+ gettime_r = (flags & OUTPUT_UTC) ? gmtime_r : localtime_r;
+ t = (time_t) (x / USEC_PER_SEC);
+
+ switch (mode) {
+
+ case OUTPUT_SHORT_UNIX:
+ xsprintf(buf, "%10"PRI_TIME".%06"PRIu64, t, x % USEC_PER_SEC);
+ break;
+
+ case OUTPUT_SHORT_ISO:
+ if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S%z", gettime_r(&t, &tm)) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format ISO time");
+ break;
+
+ case OUTPUT_SHORT_ISO_PRECISE:
+ /* No usec in strftime, so we leave space and copy over */
+ if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S.xxxxxx%z", gettime_r(&t, &tm)) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format ISO-precise time");
+ xsprintf(usec, "%06"PRI_USEC, x % USEC_PER_SEC);
+ memcpy(buf + 20, usec, 6);
+ break;
+
+ case OUTPUT_SHORT:
+ case OUTPUT_SHORT_PRECISE:
+
+ if (strftime(buf, sizeof(buf), "%b %d %H:%M:%S", gettime_r(&t, &tm)) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format syslog time");
+
+ if (mode == OUTPUT_SHORT_PRECISE) {
+ size_t k;
+
+ assert(sizeof(buf) > strlen(buf));
+ k = sizeof(buf) - strlen(buf);
+
+ r = snprintf(buf + strlen(buf), k, ".%06"PRIu64, x % USEC_PER_SEC);
+ if (r <= 0 || (size_t) r >= k) /* too long? */
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format precise time");
+ }
+ break;
+
+ default:
+ assert_not_reached("Unknown time format");
+ }
+ }
+
+ fputs(buf, f);
+ return (int) strlen(buf);
+}
+
+static int output_short(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ Set *output_fields,
+ const size_t highlight[2]) {
+
+ int r;
+ const void *data;
+ size_t length;
+ size_t n = 0;
+ _cleanup_free_ char *hostname = NULL, *identifier = NULL, *comm = NULL, *pid = NULL, *fake_pid = NULL, *message = NULL, *realtime = NULL, *monotonic = NULL, *priority = NULL, *unit = NULL, *user_unit = NULL;
+ size_t hostname_len = 0, identifier_len = 0, comm_len = 0, pid_len = 0, fake_pid_len = 0, message_len = 0, realtime_len = 0, monotonic_len = 0, priority_len = 0, unit_len = 0, user_unit_len = 0;
+ int p = LOG_INFO;
+ bool ellipsized = false;
+ const ParseFieldVec fields[] = {
+ PARSE_FIELD_VEC_ENTRY("_PID=", &pid, &pid_len),
+ PARSE_FIELD_VEC_ENTRY("_COMM=", &comm, &comm_len),
+ PARSE_FIELD_VEC_ENTRY("MESSAGE=", &message, &message_len),
+ PARSE_FIELD_VEC_ENTRY("PRIORITY=", &priority, &priority_len),
+ PARSE_FIELD_VEC_ENTRY("_HOSTNAME=", &hostname, &hostname_len),
+ PARSE_FIELD_VEC_ENTRY("SYSLOG_PID=", &fake_pid, &fake_pid_len),
+ PARSE_FIELD_VEC_ENTRY("SYSLOG_IDENTIFIER=", &identifier, &identifier_len),
+ PARSE_FIELD_VEC_ENTRY("_SOURCE_REALTIME_TIMESTAMP=", &realtime, &realtime_len),
+ PARSE_FIELD_VEC_ENTRY("_SOURCE_MONOTONIC_TIMESTAMP=", &monotonic, &monotonic_len),
+ PARSE_FIELD_VEC_ENTRY("_SYSTEMD_UNIT=", &unit, &unit_len),
+ PARSE_FIELD_VEC_ENTRY("_SYSTEMD_USER_UNIT=", &user_unit, &user_unit_len),
+ };
+ size_t highlight_shifted[] = {highlight ? highlight[0] : 0, highlight ? highlight[1] : 0};
+
+ assert(f);
+ assert(j);
+
+ /* Set the threshold to one bigger than the actual print
+ * threshold, so that if the line is actually longer than what
+ * we're willing to print, ellipsization will occur. This way
+ * we won't output a misleading line without any indication of
+ * truncation.
+ */
+ sd_journal_set_data_threshold(j, flags & (OUTPUT_SHOW_ALL|OUTPUT_FULL_WIDTH) ? 0 : PRINT_CHAR_THRESHOLD + 1);
+
+ JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
+ r = parse_fieldv(data, length, fields, ELEMENTSOF(fields));
+ if (r < 0)
+ return r;
+ }
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to get journal fields: %m");
+
+ if (!message) {
+ log_debug("Skipping message without MESSAGE= field.");
+ return 0;
+ }
+
+ if (!(flags & OUTPUT_SHOW_ALL))
+ strip_tab_ansi(&message, &message_len, highlight_shifted);
+
+ if (priority_len == 1 && *priority >= '0' && *priority <= '7')
+ p = *priority - '0';
+
+ if (mode == OUTPUT_SHORT_MONOTONIC)
+ r = output_timestamp_monotonic(f, j, monotonic);
+ else
+ r = output_timestamp_realtime(f, j, mode, flags, realtime);
+ if (r < 0)
+ return r;
+ n += r;
+
+ if (flags & OUTPUT_NO_HOSTNAME) {
+ /* Suppress display of the hostname if this is requested. */
+ hostname = mfree(hostname);
+ hostname_len = 0;
+ }
+
+ if (hostname && shall_print(hostname, hostname_len, flags)) {
+ fprintf(f, " %.*s", (int) hostname_len, hostname);
+ n += hostname_len + 1;
+ }
+
+ if (mode == OUTPUT_WITH_UNIT && ((unit && shall_print(unit, unit_len, flags)) || (user_unit && shall_print(user_unit, user_unit_len, flags)))) {
+ if (unit) {
+ fprintf(f, " %.*s", (int) unit_len, unit);
+ n += unit_len + 1;
+ }
+ if (user_unit) {
+ if (unit)
+ fprintf(f, "/%.*s", (int) user_unit_len, user_unit);
+ else
+ fprintf(f, " %.*s", (int) user_unit_len, user_unit);
+ n += unit_len + 1;
+ }
+ } else if (identifier && shall_print(identifier, identifier_len, flags)) {
+ fprintf(f, " %.*s", (int) identifier_len, identifier);
+ n += identifier_len + 1;
+ } else if (comm && shall_print(comm, comm_len, flags)) {
+ fprintf(f, " %.*s", (int) comm_len, comm);
+ n += comm_len + 1;
+ } else
+ fputs(" unknown", f);
+
+ if (pid && shall_print(pid, pid_len, flags)) {
+ fprintf(f, "[%.*s]", (int) pid_len, pid);
+ n += pid_len + 2;
+ } else if (fake_pid && shall_print(fake_pid, fake_pid_len, flags)) {
+ fprintf(f, "[%.*s]", (int) fake_pid_len, fake_pid);
+ n += fake_pid_len + 2;
+ }
+
+ if (!(flags & OUTPUT_SHOW_ALL) && !utf8_is_printable(message, message_len)) {
+ char bytes[FORMAT_BYTES_MAX];
+ fprintf(f, ": [%s blob data]\n", format_bytes(bytes, sizeof(bytes), message_len));
+ } else {
+ fputs(": ", f);
+ ellipsized |=
+ print_multiline(f, n + 2, n_columns, flags, p,
+ message, message_len,
+ highlight_shifted);
+ }
+
+ if (flags & OUTPUT_CATALOG)
+ print_catalog(f, j);
+
+ return ellipsized;
+}
+
+static int output_verbose(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ Set *output_fields,
+ const size_t highlight[2]) {
+
+ const void *data;
+ size_t length;
+ _cleanup_free_ char *cursor = NULL;
+ uint64_t realtime = 0;
+ char ts[FORMAT_TIMESTAMP_MAX + 7];
+ const char *timestamp;
+ int r;
+
+ assert(f);
+ assert(j);
+
+ sd_journal_set_data_threshold(j, 0);
+
+ r = sd_journal_get_data(j, "_SOURCE_REALTIME_TIMESTAMP", &data, &length);
+ if (r == -ENOENT)
+ log_debug("Source realtime timestamp not found");
+ else if (r < 0)
+ return log_full_errno(r == -EADDRNOTAVAIL ? LOG_DEBUG : LOG_ERR, r, "Failed to get source realtime timestamp: %m");
+ else {
+ _cleanup_free_ char *value = NULL;
+
+ r = parse_field(data, length, "_SOURCE_REALTIME_TIMESTAMP=",
+ STRLEN("_SOURCE_REALTIME_TIMESTAMP="), &value,
+ NULL);
+ if (r < 0)
+ return r;
+ assert(r > 0);
+
+ r = safe_atou64(value, &realtime);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse realtime timestamp: %m");
+ }
+
+ if (r < 0) {
+ r = sd_journal_get_realtime_usec(j, &realtime);
+ if (r < 0)
+ return log_full_errno(r == -EADDRNOTAVAIL ? LOG_DEBUG : LOG_ERR, r, "Failed to get realtime timestamp: %m");
+ }
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ timestamp = flags & OUTPUT_UTC ? format_timestamp_us_utc(ts, sizeof ts, realtime)
+ : format_timestamp_us(ts, sizeof ts, realtime);
+ fprintf(f, "%s [%s]\n",
+ timestamp ?: "(no timestamp)",
+ cursor);
+
+ JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
+ const char *c;
+ int fieldlen;
+ const char *on = "", *off = "";
+
+ c = memchr(data, '=', length);
+ if (!c)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid field.");
+ fieldlen = c - (const char*) data;
+
+ r = field_set_test(output_fields, data, fieldlen);
+ if (r < 0)
+ return r;
+ if (!r)
+ continue;
+
+ if (flags & OUTPUT_COLOR && startswith(data, "MESSAGE=")) {
+ on = ANSI_HIGHLIGHT;
+ off = ANSI_NORMAL;
+ }
+
+ if ((flags & OUTPUT_SHOW_ALL) ||
+ (((length < PRINT_CHAR_THRESHOLD) || flags & OUTPUT_FULL_WIDTH)
+ && utf8_is_printable(data, length))) {
+ fprintf(f, " %s%.*s=", on, fieldlen, (const char*)data);
+ print_multiline(f, 4 + fieldlen + 1, 0, OUTPUT_FULL_WIDTH, 0, c + 1, length - fieldlen - 1, NULL);
+ fputs(off, f);
+ } else {
+ char bytes[FORMAT_BYTES_MAX];
+
+ fprintf(f, " %s%.*s=[%s blob data]%s\n",
+ on,
+ (int) (c - (const char*) data),
+ (const char*) data,
+ format_bytes(bytes, sizeof(bytes), length - (c - (const char *) data) - 1),
+ off);
+ }
+ }
+
+ if (r < 0)
+ return r;
+
+ if (flags & OUTPUT_CATALOG)
+ print_catalog(f, j);
+
+ return 0;
+}
+
+static int output_export(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ Set *output_fields,
+ const size_t highlight[2]) {
+
+ sd_id128_t boot_id;
+ char sid[33];
+ int r;
+ usec_t realtime, monotonic;
+ _cleanup_free_ char *cursor = NULL;
+ const void *data;
+ size_t length;
+
+ assert(j);
+
+ sd_journal_set_data_threshold(j, 0);
+
+ r = sd_journal_get_realtime_usec(j, &realtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ fprintf(f,
+ "__CURSOR=%s\n"
+ "__REALTIME_TIMESTAMP="USEC_FMT"\n"
+ "__MONOTONIC_TIMESTAMP="USEC_FMT"\n"
+ "_BOOT_ID=%s\n",
+ cursor,
+ realtime,
+ monotonic,
+ sd_id128_to_string(boot_id, sid));
+
+ JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
+ const char *c;
+
+ /* We already printed the boot id from the data in the header, hence let's suppress it here */
+ if (memory_startswith(data, length, "_BOOT_ID="))
+ continue;
+
+ c = memchr(data, '=', length);
+ if (!c)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid field.");
+
+ r = field_set_test(output_fields, data, c - (const char *) data);
+ if (r < 0)
+ return r;
+ if (!r)
+ continue;
+
+ if (utf8_is_printable_newline(data, length, false))
+ fwrite(data, length, 1, f);
+ else {
+ uint64_t le64;
+
+ fwrite(data, c - (const char*) data, 1, f);
+ fputc('\n', f);
+ le64 = htole64(length - (c - (const char*) data) - 1);
+ fwrite(&le64, sizeof(le64), 1, f);
+ fwrite(c + 1, length - (c - (const char*) data) - 1, 1, f);
+ }
+
+ fputc('\n', f);
+ }
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+
+ if (r < 0)
+ return r;
+
+ fputc('\n', f);
+
+ return 0;
+}
+
+void json_escape(
+ FILE *f,
+ const char* p,
+ size_t l,
+ OutputFlags flags) {
+
+ assert(f);
+ assert(p);
+
+ if (!(flags & OUTPUT_SHOW_ALL) && l >= JSON_THRESHOLD)
+ fputs("null", f);
+
+ else if (!(flags & OUTPUT_SHOW_ALL) && !utf8_is_printable(p, l)) {
+ bool not_first = false;
+
+ fputs("[ ", f);
+
+ while (l > 0) {
+ if (not_first)
+ fprintf(f, ", %u", (uint8_t) *p);
+ else {
+ not_first = true;
+ fprintf(f, "%u", (uint8_t) *p);
+ }
+
+ p++;
+ l--;
+ }
+
+ fputs(" ]", f);
+ } else {
+ fputc('"', f);
+
+ while (l > 0) {
+ if (IN_SET(*p, '"', '\\')) {
+ fputc('\\', f);
+ fputc(*p, f);
+ } else if (*p == '\n')
+ fputs("\\n", f);
+ else if ((uint8_t) *p < ' ')
+ fprintf(f, "\\u%04x", (uint8_t) *p);
+ else
+ fputc(*p, f);
+
+ p++;
+ l--;
+ }
+
+ fputc('"', f);
+ }
+}
+
+struct json_data {
+ JsonVariant* name;
+ size_t n_values;
+ JsonVariant* values[];
+};
+
+static int update_json_data(
+ Hashmap *h,
+ OutputFlags flags,
+ const char *name,
+ const void *value,
+ size_t size) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ struct json_data *d;
+ int r;
+
+ if (!(flags & OUTPUT_SHOW_ALL) && strlen(name) + 1 + size >= JSON_THRESHOLD)
+ r = json_variant_new_null(&v);
+ else if (utf8_is_printable(value, size))
+ r = json_variant_new_stringn(&v, value, size);
+ else
+ r = json_variant_new_array_bytes(&v, value, size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate JSON data: %m");
+
+ d = hashmap_get(h, name);
+ if (d) {
+ struct json_data *w;
+
+ w = realloc(d, offsetof(struct json_data, values) + sizeof(JsonVariant*) * (d->n_values + 1));
+ if (!w)
+ return log_oom();
+
+ d = w;
+ assert_se(hashmap_update(h, json_variant_string(d->name), d) >= 0);
+ } else {
+ _cleanup_(json_variant_unrefp) JsonVariant *n = NULL;
+
+ r = json_variant_new_string(&n, name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate JSON name variant: %m");
+
+ d = malloc0(offsetof(struct json_data, values) + sizeof(JsonVariant*));
+ if (!d)
+ return log_oom();
+
+ r = hashmap_put(h, json_variant_string(n), d);
+ if (r < 0) {
+ free(d);
+ return log_error_errno(r, "Failed to insert JSON name into hashmap: %m");
+ }
+
+ d->name = TAKE_PTR(n);
+ }
+
+ d->values[d->n_values++] = TAKE_PTR(v);
+ return 0;
+}
+
+static int update_json_data_split(
+ Hashmap *h,
+ OutputFlags flags,
+ Set *output_fields,
+ const void *data,
+ size_t size) {
+
+ const char *eq;
+ char *name;
+
+ assert(h);
+ assert(data || size == 0);
+
+ if (memory_startswith(data, size, "_BOOT_ID="))
+ return 0;
+
+ eq = memchr(data, '=', MIN(size, JSON_THRESHOLD));
+ if (!eq)
+ return 0;
+
+ if (eq == data)
+ return 0;
+
+ name = strndupa(data, eq - (const char*) data);
+ if (output_fields && !set_get(output_fields, name))
+ return 0;
+
+ return update_json_data(h, flags, name, eq + 1, size - (eq - (const char*) data) - 1);
+}
+
+static int output_json(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ Set *output_fields,
+ const size_t highlight[2]) {
+
+ char sid[SD_ID128_STRING_MAX], usecbuf[DECIMAL_STR_MAX(usec_t)];
+ _cleanup_(json_variant_unrefp) JsonVariant *object = NULL;
+ _cleanup_free_ char *cursor = NULL;
+ uint64_t realtime, monotonic;
+ JsonVariant **array = NULL;
+ struct json_data *d;
+ sd_id128_t boot_id;
+ Hashmap *h = NULL;
+ size_t n = 0;
+ Iterator i;
+ int r;
+
+ assert(j);
+
+ (void) sd_journal_set_data_threshold(j, flags & OUTPUT_SHOW_ALL ? 0 : JSON_THRESHOLD);
+
+ r = sd_journal_get_realtime_usec(j, &realtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ h = hashmap_new(&string_hash_ops);
+ if (!h)
+ return log_oom();
+
+ r = update_json_data(h, flags, "__CURSOR", cursor, strlen(cursor));
+ if (r < 0)
+ goto finish;
+
+ xsprintf(usecbuf, USEC_FMT, realtime);
+ r = update_json_data(h, flags, "__REALTIME_TIMESTAMP", usecbuf, strlen(usecbuf));
+ if (r < 0)
+ goto finish;
+
+ xsprintf(usecbuf, USEC_FMT, monotonic);
+ r = update_json_data(h, flags, "__MONOTONIC_TIMESTAMP", usecbuf, strlen(usecbuf));
+ if (r < 0)
+ goto finish;
+
+ sd_id128_to_string(boot_id, sid);
+ r = update_json_data(h, flags, "_BOOT_ID", sid, strlen(sid));
+ if (r < 0)
+ goto finish;
+
+ for (;;) {
+ const void *data;
+ size_t size;
+
+ r = sd_journal_enumerate_data(j, &data, &size);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ r = 0;
+ goto finish;
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to read journal: %m");
+ goto finish;
+ }
+ if (r == 0)
+ break;
+
+ r = update_json_data_split(h, flags, output_fields, data, size);
+ if (r < 0)
+ goto finish;
+ }
+
+ array = new(JsonVariant*, hashmap_size(h)*2);
+ if (!array) {
+ r = log_oom();
+ goto finish;
+ }
+
+ HASHMAP_FOREACH(d, h, i) {
+ assert(d->n_values > 0);
+
+ array[n++] = json_variant_ref(d->name);
+
+ if (d->n_values == 1)
+ array[n++] = json_variant_ref(d->values[0]);
+ else {
+ _cleanup_(json_variant_unrefp) JsonVariant *q = NULL;
+
+ r = json_variant_new_array(&q, d->values, d->n_values);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create JSON array: %m");
+ goto finish;
+ }
+
+ array[n++] = TAKE_PTR(q);
+ }
+ }
+
+ r = json_variant_new_object(&object, array, n);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate JSON object: %m");
+ goto finish;
+ }
+
+ json_variant_dump(object,
+ output_mode_to_json_format_flags(mode) |
+ (FLAGS_SET(flags, OUTPUT_COLOR) ? JSON_FORMAT_COLOR : 0),
+ f, NULL);
+
+ r = 0;
+
+finish:
+ while ((d = hashmap_steal_first(h))) {
+ size_t k;
+
+ json_variant_unref(d->name);
+ for (k = 0; k < d->n_values; k++)
+ json_variant_unref(d->values[k]);
+
+ free(d);
+ }
+
+ hashmap_free(h);
+
+ json_variant_unref_many(array, n);
+ free(array);
+
+ return r;
+}
+
+static int output_cat(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ Set *output_fields,
+ const size_t highlight[2]) {
+
+ const void *data;
+ size_t l;
+ int r;
+ const char *highlight_on = "", *highlight_off = "";
+
+ assert(j);
+ assert(f);
+
+ if (flags & OUTPUT_COLOR) {
+ highlight_on = ANSI_HIGHLIGHT_RED;
+ highlight_off = ANSI_NORMAL;
+ }
+
+ sd_journal_set_data_threshold(j, 0);
+
+ r = sd_journal_get_data(j, "MESSAGE", &data, &l);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+ if (r < 0) {
+ /* An entry without MESSAGE=? */
+ if (r == -ENOENT)
+ return 0;
+
+ return log_error_errno(r, "Failed to get data: %m");
+ }
+
+ assert(l >= 8);
+
+ if (highlight && (flags & OUTPUT_COLOR)) {
+ assert(highlight[0] <= highlight[1]);
+ assert(highlight[1] <= l - 8);
+
+ fwrite((const char*) data + 8, 1, highlight[0], f);
+ fwrite(highlight_on, 1, strlen(highlight_on), f);
+ fwrite((const char*) data + 8 + highlight[0], 1, highlight[1] - highlight[0], f);
+ fwrite(highlight_off, 1, strlen(highlight_off), f);
+ fwrite((const char*) data + 8 + highlight[1], 1, l - 8 - highlight[1], f);
+ } else
+ fwrite((const char*) data + 8, 1, l - 8, f);
+ fputc('\n', f);
+
+ return 0;
+}
+
+static int (*output_funcs[_OUTPUT_MODE_MAX])(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ Set *output_fields,
+ const size_t highlight[2]) = {
+
+ [OUTPUT_SHORT] = output_short,
+ [OUTPUT_SHORT_ISO] = output_short,
+ [OUTPUT_SHORT_ISO_PRECISE] = output_short,
+ [OUTPUT_SHORT_PRECISE] = output_short,
+ [OUTPUT_SHORT_MONOTONIC] = output_short,
+ [OUTPUT_SHORT_UNIX] = output_short,
+ [OUTPUT_SHORT_FULL] = output_short,
+ [OUTPUT_VERBOSE] = output_verbose,
+ [OUTPUT_EXPORT] = output_export,
+ [OUTPUT_JSON] = output_json,
+ [OUTPUT_JSON_PRETTY] = output_json,
+ [OUTPUT_JSON_SSE] = output_json,
+ [OUTPUT_JSON_SEQ] = output_json,
+ [OUTPUT_CAT] = output_cat,
+ [OUTPUT_WITH_UNIT] = output_short,
+};
+
+int show_journal_entry(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ char **output_fields,
+ const size_t highlight[2],
+ bool *ellipsized) {
+
+ int ret;
+ _cleanup_set_free_free_ Set *fields = NULL;
+ assert(mode >= 0);
+ assert(mode < _OUTPUT_MODE_MAX);
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ if (output_fields) {
+ fields = set_new(&string_hash_ops);
+ if (!fields)
+ return log_oom();
+
+ ret = set_put_strdupv(fields, output_fields);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = output_funcs[mode](f, j, mode, n_columns, flags, fields, highlight);
+
+ if (ellipsized && ret > 0)
+ *ellipsized = true;
+
+ return ret;
+}
+
+static int maybe_print_begin_newline(FILE *f, OutputFlags *flags) {
+ assert(f);
+ assert(flags);
+
+ if (!(*flags & OUTPUT_BEGIN_NEWLINE))
+ return 0;
+
+ /* Print a beginning new line if that's request, but only once
+ * on the first line we print. */
+
+ fputc('\n', f);
+ *flags &= ~OUTPUT_BEGIN_NEWLINE;
+ return 0;
+}
+
+int show_journal(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ OutputFlags flags,
+ bool *ellipsized) {
+
+ int r;
+ unsigned line = 0;
+ bool need_seek = false;
+ int warn_cutoff = flags & OUTPUT_WARN_CUTOFF;
+
+ assert(j);
+ assert(mode >= 0);
+ assert(mode < _OUTPUT_MODE_MAX);
+
+ if (how_many == (unsigned) -1)
+ need_seek = true;
+ else {
+ /* Seek to end */
+ r = sd_journal_seek_tail(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to seek to tail: %m");
+
+ r = sd_journal_previous_skip(j, how_many);
+ if (r < 0)
+ return log_error_errno(r, "Failed to skip previous: %m");
+ }
+
+ for (;;) {
+ for (;;) {
+ usec_t usec;
+
+ if (need_seek) {
+ r = sd_journal_next(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to iterate through journal: %m");
+ }
+
+ if (r == 0)
+ break;
+
+ need_seek = true;
+
+ if (not_before > 0) {
+ r = sd_journal_get_monotonic_usec(j, &usec, NULL);
+
+ /* -ESTALE is returned if the
+ timestamp is not from this boot */
+ if (r == -ESTALE)
+ continue;
+ else if (r < 0)
+ return log_error_errno(r, "Failed to get journal time: %m");
+
+ if (usec < not_before)
+ continue;
+ }
+
+ line++;
+ maybe_print_begin_newline(f, &flags);
+
+ r = show_journal_entry(f, j, mode, n_columns, flags, NULL, NULL, ellipsized);
+ if (r < 0)
+ return r;
+ }
+
+ if (warn_cutoff && line < how_many && not_before > 0) {
+ sd_id128_t boot_id;
+ usec_t cutoff = 0;
+
+ /* Check whether the cutoff line is too early */
+
+ r = sd_id128_get_boot(&boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot id: %m");
+
+ r = sd_journal_get_cutoff_monotonic_usec(j, boot_id, &cutoff, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get journal cutoff time: %m");
+
+ if (r > 0 && not_before < cutoff) {
+ maybe_print_begin_newline(f, &flags);
+ fprintf(f, "Warning: Journal has been rotated since unit was started. Log output is incomplete or unavailable.\n");
+ }
+
+ warn_cutoff = false;
+ }
+
+ if (!(flags & OUTPUT_FOLLOW))
+ break;
+
+ r = sd_journal_wait(j, USEC_INFINITY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for journal: %m");
+
+ }
+
+ return 0;
+}
+
+int add_matches_for_unit(sd_journal *j, const char *unit) {
+ const char *m1, *m2, *m3, *m4;
+ int r;
+
+ assert(j);
+ assert(unit);
+
+ m1 = strjoina("_SYSTEMD_UNIT=", unit);
+ m2 = strjoina("COREDUMP_UNIT=", unit);
+ m3 = strjoina("UNIT=", unit);
+ m4 = strjoina("OBJECT_SYSTEMD_UNIT=", unit);
+
+ (void)(
+ /* Look for messages from the service itself */
+ (r = sd_journal_add_match(j, m1, 0)) ||
+
+ /* Look for coredumps of the service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, "MESSAGE_ID=fc2e22bc6ee647b6b90729ab34a250b1", 0)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0)) ||
+ (r = sd_journal_add_match(j, m2, 0)) ||
+
+ /* Look for messages from PID 1 about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, "_PID=1", 0)) ||
+ (r = sd_journal_add_match(j, m3, 0)) ||
+
+ /* Look for messages from authorized daemons about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0)) ||
+ (r = sd_journal_add_match(j, m4, 0))
+ );
+
+ if (r == 0 && endswith(unit, ".slice")) {
+ const char *m5;
+
+ m5 = strjoina("_SYSTEMD_SLICE=", unit);
+
+ /* Show all messages belonging to a slice */
+ (void)(
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m5, 0))
+ );
+ }
+
+ return r;
+}
+
+int add_matches_for_user_unit(sd_journal *j, const char *unit, uid_t uid) {
+ int r;
+ char *m1, *m2, *m3, *m4;
+ char muid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)];
+
+ assert(j);
+ assert(unit);
+
+ m1 = strjoina("_SYSTEMD_USER_UNIT=", unit);
+ m2 = strjoina("USER_UNIT=", unit);
+ m3 = strjoina("COREDUMP_USER_UNIT=", unit);
+ m4 = strjoina("OBJECT_SYSTEMD_USER_UNIT=", unit);
+ sprintf(muid, "_UID="UID_FMT, uid);
+
+ (void) (
+ /* Look for messages from the user service itself */
+ (r = sd_journal_add_match(j, m1, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+
+ /* Look for messages from systemd about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m2, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+
+ /* Look for coredumps of the service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m3, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0)) ||
+
+ /* Look for messages from authorized daemons about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m4, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0))
+ );
+
+ if (r == 0 && endswith(unit, ".slice")) {
+ const char *m5;
+
+ m5 = strjoina("_SYSTEMD_SLICE=", unit);
+
+ /* Show all messages belonging to a slice */
+ (void)(
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m5, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0))
+ );
+ }
+
+ return r;
+}
+
+static int get_boot_id_for_machine(const char *machine, sd_id128_t *boot_id) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, rootfd = -1;
+ pid_t pid, child;
+ char buf[37];
+ ssize_t k;
+ int r;
+
+ assert(machine);
+ assert(boot_id);
+
+ if (!machine_name_is_valid(machine))
+ return -EINVAL;
+
+ r = container_get_leader(machine, &pid);
+ if (r < 0)
+ return r;
+
+ r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, NULL, &rootfd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-bootidns)", "(sd-bootid)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidnsfd, mntnsfd, -1, -1, rootfd, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ int fd;
+
+ pair[0] = safe_close(pair[0]);
+
+ fd = open("/proc/sys/kernel/random/boot_id", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ _exit(EXIT_FAILURE);
+
+ r = loop_read_exact(fd, buf, 36, false);
+ safe_close(fd);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ k = send(pair[1], buf, 36, MSG_NOSIGNAL);
+ if (k != 36)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-bootidns)", child, 0);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EIO;
+
+ k = recv(pair[0], buf, 36, 0);
+ if (k != 36)
+ return -EIO;
+
+ buf[36] = 0;
+ r = sd_id128_from_string(buf, boot_id);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int add_match_this_boot(sd_journal *j, const char *machine) {
+ char match[9+32+1] = "_BOOT_ID=";
+ sd_id128_t boot_id;
+ int r;
+
+ assert(j);
+
+ if (machine) {
+ r = get_boot_id_for_machine(machine, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot id of container %s: %m", machine);
+ } else {
+ r = sd_id128_get_boot(&boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot id: %m");
+ }
+
+ sd_id128_to_string(boot_id, match + 9);
+ r = sd_journal_add_match(j, match, strlen(match));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add conjunction: %m");
+
+ return 0;
+}
+
+int show_journal_by_unit(
+ FILE *f,
+ const char *unit,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ uid_t uid,
+ OutputFlags flags,
+ int journal_open_flags,
+ bool system_unit,
+ bool *ellipsized) {
+
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert(mode >= 0);
+ assert(mode < _OUTPUT_MODE_MAX);
+ assert(unit);
+
+ if (how_many <= 0)
+ return 0;
+
+ r = sd_journal_open(&j, journal_open_flags);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open journal: %m");
+
+ r = add_match_this_boot(j, NULL);
+ if (r < 0)
+ return r;
+
+ if (system_unit)
+ r = add_matches_for_unit(j, unit);
+ else
+ r = add_matches_for_user_unit(j, unit, uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add unit matches: %m");
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *filter;
+
+ filter = journal_make_match_string(j);
+ if (!filter)
+ return log_oom();
+
+ log_debug("Journal filter: %s", filter);
+ }
+
+ return show_journal(f, j, mode, n_columns, not_before, how_many, flags, ellipsized);
+}
diff --git a/src/shared/logs-show.h b/src/shared/logs-show.h
new file mode 100644
index 0000000..1e0c4ea
--- /dev/null
+++ b/src/shared/logs-show.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "sd-journal.h"
+
+#include "macro.h"
+#include "output-mode.h"
+#include "time-util.h"
+#include "util.h"
+
+int show_journal_entry(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ char **output_fields,
+ const size_t highlight[2],
+ bool *ellipsized);
+int show_journal(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ OutputFlags flags,
+ bool *ellipsized);
+
+int add_match_this_boot(sd_journal *j, const char *machine);
+
+int add_matches_for_unit(
+ sd_journal *j,
+ const char *unit);
+
+int add_matches_for_user_unit(
+ sd_journal *j,
+ const char *unit,
+ uid_t uid);
+
+int show_journal_by_unit(
+ FILE *f,
+ const char *unit,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ uid_t uid,
+ OutputFlags flags,
+ int journal_open_flags,
+ bool system_unit,
+ bool *ellipsized);
+
+void json_escape(
+ FILE *f,
+ const char* p,
+ size_t l,
+ OutputFlags flags);
diff --git a/src/shared/loop-util.c b/src/shared/loop-util.c
new file mode 100644
index 0000000..bf426eb
--- /dev/null
+++ b/src/shared/loop-util.c
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/loop.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "loop-util.h"
+#include "stat-util.h"
+
+int loop_device_make(int fd, int open_flags, LoopDevice **ret) {
+ const struct loop_info64 info = {
+ .lo_flags = LO_FLAGS_AUTOCLEAR|LO_FLAGS_PARTSCAN|(open_flags == O_RDONLY ? LO_FLAGS_READ_ONLY : 0),
+ };
+
+ _cleanup_close_ int control = -1, loop = -1;
+ _cleanup_free_ char *loopdev = NULL;
+ struct stat st;
+ LoopDevice *d;
+ int nr, r;
+
+ assert(fd >= 0);
+ assert(ret);
+ assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (S_ISBLK(st.st_mode)) {
+ int copy;
+
+ /* If this is already a block device, store a copy of the fd as it is */
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ d = new0(LoopDevice, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (LoopDevice) {
+ .fd = copy,
+ .nr = -1,
+ .relinquished = true, /* It's not allocated by us, don't destroy it when this object is freed */
+ };
+
+ *ret = d;
+ return d->fd;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return r;
+
+ control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (control < 0)
+ return -errno;
+
+ nr = ioctl(control, LOOP_CTL_GET_FREE);
+ if (nr < 0)
+ return -errno;
+
+ if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
+ return -ENOMEM;
+
+ loop = open(loopdev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
+ if (loop < 0)
+ return -errno;
+
+ if (ioctl(loop, LOOP_SET_FD, fd) < 0)
+ return -errno;
+
+ if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0)
+ return -errno;
+
+ d = new(LoopDevice, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (LoopDevice) {
+ .fd = TAKE_FD(loop),
+ .node = TAKE_PTR(loopdev),
+ .nr = nr,
+ };
+
+ *ret = d;
+ return d->fd;
+}
+
+int loop_device_make_by_path(const char *path, int open_flags, LoopDevice **ret) {
+ _cleanup_close_ int fd = -1;
+
+ assert(path);
+ assert(ret);
+ assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
+
+ fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
+ if (fd < 0)
+ return -errno;
+
+ return loop_device_make(fd, open_flags, ret);
+}
+
+LoopDevice* loop_device_unref(LoopDevice *d) {
+ if (!d)
+ return NULL;
+
+ if (d->fd >= 0) {
+
+ if (d->nr >= 0 && !d->relinquished) {
+ if (ioctl(d->fd, LOOP_CLR_FD) < 0)
+ log_debug_errno(errno, "Failed to clear loop device: %m");
+
+ }
+
+ safe_close(d->fd);
+ }
+
+ if (d->nr >= 0 && !d->relinquished) {
+ _cleanup_close_ int control = -1;
+
+ control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (control < 0)
+ log_debug_errno(errno, "Failed to open loop control device: %m");
+ else {
+ if (ioctl(control, LOOP_CTL_REMOVE, d->nr) < 0)
+ log_debug_errno(errno, "Failed to remove loop device: %m");
+ }
+ }
+
+ free(d->node);
+ return mfree(d);
+}
+
+void loop_device_relinquish(LoopDevice *d) {
+ assert(d);
+
+ /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
+ * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
+
+ d->relinquished = true;
+}
diff --git a/src/shared/loop-util.h b/src/shared/loop-util.h
new file mode 100644
index 0000000..d78466c
--- /dev/null
+++ b/src/shared/loop-util.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "macro.h"
+
+typedef struct LoopDevice LoopDevice;
+
+/* Some helpers for setting up loopback block devices */
+
+struct LoopDevice {
+ int fd;
+ int nr;
+ char *node;
+ bool relinquished;
+};
+
+int loop_device_make(int fd, int open_flags, LoopDevice **ret);
+int loop_device_make_by_path(const char *path, int open_flags, LoopDevice **ret);
+
+LoopDevice* loop_device_unref(LoopDevice *d);
+DEFINE_TRIVIAL_CLEANUP_FUNC(LoopDevice*, loop_device_unref);
+
+void loop_device_relinquish(LoopDevice *d);
diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c
new file mode 100644
index 0000000..af06ab2
--- /dev/null
+++ b/src/shared/machine-image.c
@@ -0,0 +1,1249 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/fs.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "dissect-image.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "lockfile-util.h"
+#include "log.h"
+#include "loop-util.h"
+#include "machine-image.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "os-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "xattr-util.h"
+
+static const char* const image_search_path[_IMAGE_CLASS_MAX] = {
+ [IMAGE_MACHINE] = "/etc/machines\0" /* only place symlinks here */
+ "/run/machines\0" /* and here too */
+ "/var/lib/machines\0" /* the main place for images */
+ "/var/lib/container\0" /* legacy */
+ "/usr/local/lib/machines\0"
+ "/usr/lib/machines\0",
+
+ [IMAGE_PORTABLE] = "/etc/portables\0" /* only place symlinks here */
+ "/run/portables\0" /* and here too */
+ "/var/lib/portables\0" /* the main place for images */
+ "/usr/local/lib/portables\0"
+ "/usr/lib/portables\0",
+};
+
+static Image *image_free(Image *i) {
+ assert(i);
+
+ free(i->name);
+ free(i->path);
+
+ free(i->hostname);
+ strv_free(i->machine_info);
+ strv_free(i->os_release);
+
+ return mfree(i);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(Image, image, image_free);
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(image_hash_ops, char, string_hash_func, string_compare_func,
+ Image, image_unref);
+
+static char **image_settings_path(Image *image) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char *fn, *s;
+ unsigned i = 0;
+
+ assert(image);
+
+ l = new0(char*, 4);
+ if (!l)
+ return NULL;
+
+ fn = strjoina(image->name, ".nspawn");
+
+ FOREACH_STRING(s, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") {
+ l[i] = strappend(s, fn);
+ if (!l[i])
+ return NULL;
+
+ i++;
+ }
+
+ l[i] = file_in_same_dir(image->path, fn);
+ if (!l[i])
+ return NULL;
+
+ return TAKE_PTR(l);
+}
+
+static char *image_roothash_path(Image *image) {
+ const char *fn;
+
+ assert(image);
+
+ fn = strjoina(image->name, ".roothash");
+
+ return file_in_same_dir(image->path, fn);
+}
+
+static int image_new(
+ ImageType t,
+ const char *pretty,
+ const char *path,
+ const char *filename,
+ bool read_only,
+ usec_t crtime,
+ usec_t mtime,
+ Image **ret) {
+
+ _cleanup_(image_unrefp) Image *i = NULL;
+
+ assert(t >= 0);
+ assert(t < _IMAGE_TYPE_MAX);
+ assert(pretty);
+ assert(filename);
+ assert(ret);
+
+ i = new0(Image, 1);
+ if (!i)
+ return -ENOMEM;
+
+ i->n_ref = 1;
+ i->type = t;
+ i->read_only = read_only;
+ i->crtime = crtime;
+ i->mtime = mtime;
+ i->usage = i->usage_exclusive = (uint64_t) -1;
+ i->limit = i->limit_exclusive = (uint64_t) -1;
+
+ i->name = strdup(pretty);
+ if (!i->name)
+ return -ENOMEM;
+
+ if (path)
+ i->path = strjoin(path, "/", filename);
+ else
+ i->path = strdup(filename);
+ if (!i->path)
+ return -ENOMEM;
+
+ path_simplify(i->path, false);
+
+ *ret = TAKE_PTR(i);
+
+ return 0;
+}
+
+static int extract_pretty(const char *path, const char *suffix, char **ret) {
+ _cleanup_free_ char *name = NULL;
+ const char *p;
+ size_t n;
+
+ assert(path);
+ assert(ret);
+
+ p = last_path_component(path);
+ n = strcspn(p, "/");
+
+ name = strndup(p, n);
+ if (!name)
+ return -ENOMEM;
+
+ if (suffix) {
+ char *e;
+
+ e = endswith(name, suffix);
+ if (!e)
+ return -EINVAL;
+
+ *e = 0;
+ }
+
+ if (!image_name_is_valid(name))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(name);
+ return 0;
+}
+
+static int image_make(
+ const char *pretty,
+ int dfd,
+ const char *path,
+ const char *filename,
+ const struct stat *st,
+ Image **ret) {
+
+ _cleanup_free_ char *pretty_buffer = NULL;
+ struct stat stbuf;
+ bool read_only;
+ int r;
+
+ assert(dfd >= 0 || dfd == AT_FDCWD);
+ assert(filename);
+
+ /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
+ * devices into /var/lib/machines/, and treat them normally.
+ *
+ * This function returns -ENOENT if we can't find the image after all, and -EMEDIUMTYPE if it's not a file we
+ * recognize. */
+
+ if (!st) {
+ if (fstatat(dfd, filename, &stbuf, 0) < 0)
+ return -errno;
+
+ st = &stbuf;
+ }
+
+ read_only =
+ (path && path_startswith(path, "/usr")) ||
+ (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
+
+ if (S_ISDIR(st->st_mode)) {
+ _cleanup_close_ int fd = -1;
+ unsigned file_attr = 0;
+
+ if (!ret)
+ return 0;
+
+ if (!pretty) {
+ r = extract_pretty(filename, NULL, &pretty_buffer);
+ if (r < 0)
+ return r;
+
+ pretty = pretty_buffer;
+ }
+
+ fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ /* btrfs subvolumes have inode 256 */
+ if (st->st_ino == 256) {
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (r) {
+ BtrfsSubvolInfo info;
+
+ /* It's a btrfs subvolume */
+
+ r = btrfs_subvol_get_info_fd(fd, 0, &info);
+ if (r < 0)
+ return r;
+
+ r = image_new(IMAGE_SUBVOLUME,
+ pretty,
+ path,
+ filename,
+ info.read_only || read_only,
+ info.otime,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ if (btrfs_quota_scan_ongoing(fd) == 0) {
+ BtrfsQuotaInfo quota;
+
+ r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
+ if (r >= 0) {
+ (*ret)->usage = quota.referenced;
+ (*ret)->usage_exclusive = quota.exclusive;
+
+ (*ret)->limit = quota.referenced_max;
+ (*ret)->limit_exclusive = quota.exclusive_max;
+ }
+ }
+
+ return 0;
+ }
+ }
+
+ /* If the IMMUTABLE bit is set, we consider the
+ * directory read-only. Since the ioctl is not
+ * supported everywhere we ignore failures. */
+ (void) read_attr_fd(fd, &file_attr);
+
+ /* It's just a normal directory. */
+ r = image_new(IMAGE_DIRECTORY,
+ pretty,
+ path,
+ filename,
+ read_only || (file_attr & FS_IMMUTABLE_FL),
+ 0,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ return 0;
+
+ } else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) {
+ usec_t crtime = 0;
+
+ /* It's a RAW disk image */
+
+ if (!ret)
+ return 0;
+
+ (void) fd_getcrtime_at(dfd, filename, &crtime, 0);
+
+ if (!pretty) {
+ r = extract_pretty(filename, ".raw", &pretty_buffer);
+ if (r < 0)
+ return r;
+
+ pretty = pretty_buffer;
+ }
+
+ r = image_new(IMAGE_RAW,
+ pretty,
+ path,
+ filename,
+ !(st->st_mode & 0222) || read_only,
+ crtime,
+ timespec_load(&st->st_mtim),
+ ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->usage = (*ret)->usage_exclusive = st->st_blocks * 512;
+ (*ret)->limit = (*ret)->limit_exclusive = st->st_size;
+
+ return 0;
+
+ } else if (S_ISBLK(st->st_mode)) {
+ _cleanup_close_ int block_fd = -1;
+ uint64_t size = UINT64_MAX;
+
+ /* A block device */
+
+ if (!ret)
+ return 0;
+
+ if (!pretty) {
+ r = extract_pretty(filename, NULL, &pretty_buffer);
+ if (r < 0)
+ return r;
+
+ pretty = pretty_buffer;
+ }
+
+ block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
+ if (block_fd < 0)
+ log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path, filename);
+ else {
+ /* Refresh stat data after opening the node */
+ if (fstat(block_fd, &stbuf) < 0)
+ return -errno;
+ st = &stbuf;
+
+ if (!S_ISBLK(st->st_mode)) /* Verify that what we opened is actually what we think it is */
+ return -ENOTTY;
+
+ if (!read_only) {
+ int state = 0;
+
+ if (ioctl(block_fd, BLKROGET, &state) < 0)
+ log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path, filename);
+ else if (state)
+ read_only = true;
+ }
+
+ if (ioctl(block_fd, BLKGETSIZE64, &size) < 0)
+ log_debug_errno(errno, "Failed to issue BLKGETSIZE64 on device %s/%s, ignoring: %m", path, filename);
+
+ block_fd = safe_close(block_fd);
+ }
+
+ r = image_new(IMAGE_BLOCK,
+ pretty,
+ path,
+ filename,
+ !(st->st_mode & 0222) || read_only,
+ 0,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ if (size != 0 && size != UINT64_MAX)
+ (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
+
+ return 0;
+ }
+
+ return -EMEDIUMTYPE;
+}
+
+int image_find(ImageClass class, const char *name, Image **ret) {
+ const char *path;
+ int r;
+
+ assert(class >= 0);
+ assert(class < _IMAGE_CLASS_MAX);
+ assert(name);
+
+ /* There are no images with invalid names */
+ if (!image_name_is_valid(name))
+ return -ENOENT;
+
+ NULSTR_FOREACH(path, image_search_path[class]) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct stat st;
+
+ d = opendir(path);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people to
+ * symlink block devices into the search path */
+ if (fstatat(dirfd(d), name, &st, 0) < 0) {
+ _cleanup_free_ char *raw = NULL;
+
+ if (errno != ENOENT)
+ return -errno;
+
+ raw = strappend(name, ".raw");
+ if (!raw)
+ return -ENOMEM;
+
+ if (fstatat(dirfd(d), raw, &st, 0) < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ r = image_make(name, dirfd(d), path, raw, &st, ret);
+
+ } else {
+ if (!S_ISDIR(st.st_mode) && !S_ISBLK(st.st_mode))
+ continue;
+
+ r = image_make(name, dirfd(d), path, name, &st, ret);
+ }
+ if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
+ continue;
+ if (r < 0)
+ return r;
+
+ if (ret)
+ (*ret)->discoverable = true;
+
+ return 1;
+ }
+
+ if (class == IMAGE_MACHINE && streq(name, ".host")) {
+ r = image_make(".host", AT_FDCWD, NULL, "/", NULL, ret);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ (*ret)->discoverable = true;
+
+ return r;
+ }
+
+ return -ENOENT;
+};
+
+int image_from_path(const char *path, Image **ret) {
+
+ /* Note that we don't set the 'discoverable' field of the returned object, because we don't check here whether
+ * the image is in the image search path. And if it is we don't know if the path we used is actually not
+ * overridden by another, different image earlier in the search path */
+
+ if (path_equal(path, "/"))
+ return image_make(".host", AT_FDCWD, NULL, "/", NULL, ret);
+
+ return image_make(NULL, AT_FDCWD, NULL, path, NULL, ret);
+}
+
+int image_find_harder(ImageClass class, const char *name_or_path, Image **ret) {
+ if (image_name_is_valid(name_or_path))
+ return image_find(class, name_or_path, ret);
+
+ return image_from_path(name_or_path, ret);
+}
+
+int image_discover(ImageClass class, Hashmap *h) {
+ const char *path;
+ int r;
+
+ assert(class >= 0);
+ assert(class < _IMAGE_CLASS_MAX);
+ assert(h);
+
+ NULSTR_FOREACH(path, image_search_path[class]) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(path);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ _cleanup_(image_unrefp) Image *image = NULL;
+ _cleanup_free_ char *truncated = NULL;
+ const char *pretty;
+ struct stat st;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people
+ * to symlink block devices into the search path */
+ if (fstatat(dirfd(d), de->d_name, &st, 0) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ if (S_ISREG(st.st_mode)) {
+ const char *e;
+
+ e = endswith(de->d_name, ".raw");
+ if (!e)
+ continue;
+
+ truncated = strndup(de->d_name, e - de->d_name);
+ if (!truncated)
+ return -ENOMEM;
+
+ pretty = truncated;
+ } else if (S_ISDIR(st.st_mode) || S_ISBLK(st.st_mode))
+ pretty = de->d_name;
+ else
+ continue;
+
+ if (!image_name_is_valid(pretty))
+ continue;
+
+ if (hashmap_contains(h, pretty))
+ continue;
+
+ r = image_make(pretty, dirfd(d), path, de->d_name, &st, &image);
+ if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
+ continue;
+ if (r < 0)
+ return r;
+
+ image->discoverable = true;
+
+ r = hashmap_put(h, image->name, image);
+ if (r < 0)
+ return r;
+
+ image = NULL;
+ }
+ }
+
+ if (class == IMAGE_MACHINE && !hashmap_contains(h, ".host")) {
+ _cleanup_(image_unrefp) Image *image = NULL;
+
+ r = image_make(".host", AT_FDCWD, NULL, "/", NULL, &image);
+ if (r < 0)
+ return r;
+
+ image->discoverable = true;
+
+ r = hashmap_put(h, image->name, image);
+ if (r < 0)
+ return r;
+
+ image = NULL;
+ }
+
+ return 0;
+}
+
+int image_remove(Image *i) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
+ _cleanup_strv_free_ char **settings = NULL;
+ _cleanup_free_ char *roothash = NULL;
+ char **j;
+ int r;
+
+ assert(i);
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ settings = image_settings_path(i);
+ if (!settings)
+ return -ENOMEM;
+
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
+ /* Make sure we don't interfere with a running nspawn */
+ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+
+ /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
+ * big guns */
+ if (unlink(i->path) < 0) {
+ r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+
+ case IMAGE_DIRECTORY:
+ /* Allow deletion of read-only directories */
+ (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
+ r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMAGE_BLOCK:
+
+ /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
+ * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
+ * the thing (it's most likely a symlink after all). */
+
+ if (path_startswith(i->path, "/dev"))
+ break;
+
+ _fallthrough_;
+ case IMAGE_RAW:
+ if (unlink(i->path) < 0)
+ return -errno;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ STRV_FOREACH(j, settings) {
+ if (unlink(*j) < 0 && errno != ENOENT)
+ log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
+ }
+
+ if (unlink(roothash) < 0 && errno != ENOENT)
+ log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
+
+ return 0;
+}
+
+static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
+ _cleanup_free_ char *rs = NULL;
+ const char *fn;
+
+ fn = strjoina(new_name, suffix);
+
+ rs = file_in_same_dir(path, fn);
+ if (!rs)
+ return -ENOMEM;
+
+ return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
+}
+
+int image_rename(Image *i, const char *new_name) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
+ _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
+ _cleanup_strv_free_ char **settings = NULL;
+ unsigned file_attr = 0;
+ char **j;
+ int r;
+
+ assert(i);
+
+ if (!image_name_is_valid(new_name))
+ return -EINVAL;
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ settings = image_settings_path(i);
+ if (!settings)
+ return -ENOMEM;
+
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
+ /* Make sure we don't interfere with a running nspawn */
+ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ /* Make sure nobody takes the new name, between the time we
+ * checked it is currently unused in all search paths, and the
+ * time we take possession of it */
+ r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
+ if (r < 0)
+ return r;
+
+ r = image_find(IMAGE_MACHINE, new_name, NULL);
+ if (r >= 0)
+ return -EEXIST;
+ if (r != -ENOENT)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_DIRECTORY:
+ /* Turn of the immutable bit while we rename the image, so that we can rename it */
+ (void) read_attr_path(i->path, &file_attr);
+
+ if (file_attr & FS_IMMUTABLE_FL)
+ (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
+
+ _fallthrough_;
+ case IMAGE_SUBVOLUME:
+ new_path = file_in_same_dir(i->path, new_name);
+ break;
+
+ case IMAGE_BLOCK:
+
+ /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
+ if (path_startswith(i->path, "/dev"))
+ return -EROFS;
+
+ new_path = file_in_same_dir(i->path, new_name);
+ break;
+
+ case IMAGE_RAW: {
+ const char *fn;
+
+ fn = strjoina(new_name, ".raw");
+ new_path = file_in_same_dir(i->path, fn);
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (!new_path)
+ return -ENOMEM;
+
+ nn = strdup(new_name);
+ if (!nn)
+ return -ENOMEM;
+
+ r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
+ if (r < 0)
+ return r;
+
+ /* Restore the immutable bit, if it was set before */
+ if (file_attr & FS_IMMUTABLE_FL)
+ (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
+
+ free_and_replace(i->path, new_path);
+ free_and_replace(i->name, nn);
+
+ STRV_FOREACH(j, settings) {
+ r = rename_auxiliary_file(*j, new_name, ".nspawn");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
+ }
+
+ r = rename_auxiliary_file(roothash, new_name, ".roothash");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
+
+ return 0;
+}
+
+static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
+ _cleanup_free_ char *rs = NULL;
+ const char *fn;
+
+ fn = strjoina(new_name, suffix);
+
+ rs = file_in_same_dir(path, fn);
+ if (!rs)
+ return -ENOMEM;
+
+ return copy_file_atomic(path, rs, 0664, 0, COPY_REFLINK);
+}
+
+int image_clone(Image *i, const char *new_name, bool read_only) {
+ _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT;
+ _cleanup_strv_free_ char **settings = NULL;
+ _cleanup_free_ char *roothash = NULL;
+ const char *new_path;
+ char **j;
+ int r;
+
+ assert(i);
+
+ if (!image_name_is_valid(new_name))
+ return -EINVAL;
+
+ settings = image_settings_path(i);
+ if (!settings)
+ return -ENOMEM;
+
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
+ /* Make sure nobody takes the new name, between the time we
+ * checked it is currently unused in all search paths, and the
+ * time we take possession of it */
+ r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
+ if (r < 0)
+ return r;
+
+ r = image_find(IMAGE_MACHINE, new_name, NULL);
+ if (r >= 0)
+ return -EEXIST;
+ if (r != -ENOENT)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+ case IMAGE_DIRECTORY:
+ /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
+ * directory. */
+
+ new_path = strjoina("/var/lib/machines/", new_name);
+
+ r = btrfs_subvol_snapshot(i->path, new_path,
+ (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
+ BTRFS_SNAPSHOT_FALLBACK_COPY |
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+ BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
+ BTRFS_SNAPSHOT_RECURSIVE |
+ BTRFS_SNAPSHOT_QUOTA);
+ if (r >= 0)
+ /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
+ (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
+
+ break;
+
+ case IMAGE_RAW:
+ new_path = strjoina("/var/lib/machines/", new_name, ".raw");
+
+ r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK);
+ break;
+
+ case IMAGE_BLOCK:
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(j, settings) {
+ r = clone_auxiliary_file(*j, new_name, ".nspawn");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
+ }
+
+ r = clone_auxiliary_file(roothash, new_name, ".roothash");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
+
+ return 0;
+}
+
+int image_read_only(Image *i, bool b) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
+ int r;
+
+ assert(i);
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ /* Make sure we don't interfere with a running nspawn */
+ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+
+ /* Note that we set the flag only on the top-level
+ * subvolume of the image. */
+
+ r = btrfs_subvol_set_read_only(i->path, b);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMAGE_DIRECTORY:
+ /* For simple directory trees we cannot use the access
+ mode of the top-level directory, since it has an
+ effect on the container itself. However, we can
+ use the "immutable" flag, to at least make the
+ top-level directory read-only. It's not as good as
+ a read-only subvolume, but at least something, and
+ we can read the value back. */
+
+ r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL, NULL);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMAGE_RAW: {
+ struct stat st;
+
+ if (stat(i->path, &st) < 0)
+ return -errno;
+
+ if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
+ return -errno;
+
+ /* If the images is now read-only, it's a good time to
+ * defrag it, given that no write patterns will
+ * fragment it again. */
+ if (b)
+ (void) btrfs_defrag(i->path);
+ break;
+ }
+
+ case IMAGE_BLOCK: {
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ int state = b;
+
+ fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTTY;
+
+ if (ioctl(fd, BLKROSET, &state) < 0)
+ return -errno;
+
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
+ _cleanup_free_ char *p = NULL;
+ LockFile t = LOCK_FILE_INIT;
+ struct stat st;
+ int r;
+
+ assert(path);
+ assert(global);
+ assert(local);
+
+ /* Locks an image path. This actually creates two locks: one
+ * "local" one, next to the image path itself, which might be
+ * shared via NFS. And another "global" one, in /run, that
+ * uses the device/inode number. This has the benefit that we
+ * can even lock a tree that is a mount point, correctly. */
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
+ *local = *global = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
+
+ if (path_equal(path, "/"))
+ return -EBUSY;
+
+ if (stat(path, &st) >= 0) {
+ if (S_ISBLK(st.st_mode))
+ r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
+ else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
+ r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
+ else
+ return -ENOTTY;
+
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ /* For block devices we don't need the "local" lock, as the major/minor lock above should be sufficient, since
+ * block devices are device local anyway. */
+ if (!path_startswith(path, "/dev")) {
+ r = make_lock_file_for(path, operation, &t);
+ if (r < 0) {
+ if ((operation & LOCK_SH) && r == -EROFS)
+ log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path);
+ else
+ return r;
+ }
+ }
+
+ if (p) {
+ mkdir_p("/run/systemd/nspawn/locks", 0700);
+
+ r = make_lock_file(p, operation, global);
+ if (r < 0) {
+ release_lock_file(&t);
+ return r;
+ }
+ } else
+ *global = (LockFile) LOCK_FILE_INIT;
+
+ *local = t;
+ return 0;
+}
+
+int image_set_limit(Image *i, uint64_t referenced_max) {
+ assert(i);
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ if (i->type != IMAGE_SUBVOLUME)
+ return -EOPNOTSUPP;
+
+ /* We set the quota both for the subvolume as well as for the
+ * subtree. The latter is mostly for historical reasons, since
+ * we didn't use to have a concept of subtree quota, and hence
+ * only modified the subvolume quota. */
+
+ (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
+ (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
+ return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
+}
+
+int image_read_metadata(Image *i) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
+ int r;
+
+ assert(i);
+
+ r = image_path_lock(i->path, LOCK_SH|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+ case IMAGE_DIRECTORY: {
+ _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
+ sd_id128_t machine_id = SD_ID128_NULL;
+ _cleanup_free_ char *hostname = NULL;
+ _cleanup_free_ char *path = NULL;
+
+ r = chase_symlinks("/etc/hostname", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to chase /etc/hostname in image %s: %m", i->name);
+ else if (r >= 0) {
+ r = read_etc_hostname(path, &hostname);
+ if (r < 0)
+ log_debug_errno(errno, "Failed to read /etc/hostname of image %s: %m", i->name);
+ }
+
+ path = mfree(path);
+
+ r = chase_symlinks("/etc/machine-id", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to chase /etc/machine-id in image %s: %m", i->name);
+ else if (r >= 0) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ log_debug_errno(errno, "Failed to open %s: %m", path);
+ else {
+ r = id128_read_fd(fd, ID128_PLAIN, &machine_id);
+ if (r < 0)
+ log_debug_errno(r, "Image %s contains invalid machine ID.", i->name);
+ }
+ }
+
+ path = mfree(path);
+
+ r = chase_symlinks("/etc/machine-info", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to chase /etc/machine-info in image %s: %m", i->name);
+ else if (r >= 0) {
+ r = load_env_file_pairs(NULL, path, &machine_info);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse machine-info data of %s: %m", i->name);
+ }
+
+ r = load_os_release_pairs(i->path, &os_release);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read os-release in image, ignoring: %m");
+
+ free_and_replace(i->hostname, hostname);
+ i->machine_id = machine_id;
+ strv_free_and_replace(i->machine_info, machine_info);
+ strv_free_and_replace(i->os_release, os_release);
+
+ break;
+ }
+
+ case IMAGE_RAW:
+ case IMAGE_BLOCK: {
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+
+ r = loop_device_make_by_path(i->path, O_RDONLY, &d);
+ if (r < 0)
+ return r;
+
+ r = dissect_image(d->fd, NULL, 0, DISSECT_IMAGE_REQUIRE_ROOT, &m);
+ if (r < 0)
+ return r;
+
+ r = dissected_image_acquire_metadata(m);
+ if (r < 0)
+ return r;
+
+ free_and_replace(i->hostname, m->hostname);
+ i->machine_id = m->machine_id;
+ strv_free_and_replace(i->machine_info, m->machine_info);
+ strv_free_and_replace(i->os_release, m->os_release);
+
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ i->metadata_valid = true;
+
+ return 0;
+}
+
+int image_name_lock(const char *name, int operation, LockFile *ret) {
+ const char *p;
+
+ assert(name);
+ assert(ret);
+
+ /* Locks an image name, regardless of the precise path used. */
+
+ if (!image_name_is_valid(name))
+ return -EINVAL;
+
+ if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
+ *ret = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
+
+ if (streq(name, ".host"))
+ return -EBUSY;
+
+ mkdir_p("/run/systemd/nspawn/locks", 0700);
+ p = strjoina("/run/systemd/nspawn/locks/name-", name);
+
+ return make_lock_file(p, operation, ret);
+}
+
+bool image_name_is_valid(const char *s) {
+ if (!filename_is_valid(s))
+ return false;
+
+ if (string_has_cc(s, NULL))
+ return false;
+
+ if (!utf8_is_valid(s))
+ return false;
+
+ /* Temporary files for atomically creating new files */
+ if (startswith(s, ".#"))
+ return false;
+
+ return true;
+}
+
+bool image_in_search_path(ImageClass class, const char *image) {
+ const char *path;
+
+ assert(image);
+
+ NULSTR_FOREACH(path, image_search_path[class]) {
+ const char *p;
+ size_t k;
+
+ p = path_startswith(image, path);
+ if (!p)
+ continue;
+
+ /* Make sure there's a filename following */
+ k = strcspn(p, "/");
+ if (k == 0)
+ continue;
+
+ p += k;
+
+ /* Accept trailing slashes */
+ if (p[strspn(p, "/")] == 0)
+ return true;
+
+ }
+
+ return false;
+}
+
+static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
+ [IMAGE_DIRECTORY] = "directory",
+ [IMAGE_SUBVOLUME] = "subvolume",
+ [IMAGE_RAW] = "raw",
+ [IMAGE_BLOCK] = "block",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);
diff --git a/src/shared/machine-image.h b/src/shared/machine-image.h
new file mode 100644
index 0000000..9fd4589
--- /dev/null
+++ b/src/shared/machine-image.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "sd-id128.h"
+
+#include "hashmap.h"
+#include "lockfile-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+typedef enum ImageClass {
+ IMAGE_MACHINE,
+ IMAGE_PORTABLE,
+ _IMAGE_CLASS_MAX,
+ _IMAGE_CLASS_INVALID = -1
+} ImageClass;
+
+typedef enum ImageType {
+ IMAGE_DIRECTORY,
+ IMAGE_SUBVOLUME,
+ IMAGE_RAW,
+ IMAGE_BLOCK,
+ _IMAGE_TYPE_MAX,
+ _IMAGE_TYPE_INVALID = -1
+} ImageType;
+
+typedef struct Image {
+ unsigned n_ref;
+
+ ImageType type;
+ char *name;
+ char *path;
+ bool read_only;
+
+ usec_t crtime;
+ usec_t mtime;
+
+ uint64_t usage;
+ uint64_t usage_exclusive;
+ uint64_t limit;
+ uint64_t limit_exclusive;
+
+ char *hostname;
+ sd_id128_t machine_id;
+ char **machine_info;
+ char **os_release;
+
+ bool metadata_valid:1;
+ bool discoverable:1; /* true if we know for sure that image_find() would find the image given just the short name */
+
+ void *userdata;
+} Image;
+
+Image *image_unref(Image *i);
+Image *image_ref(Image *i);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Image*, image_unref);
+
+int image_find(ImageClass class, const char *name, Image **ret);
+int image_from_path(const char *path, Image **ret);
+int image_find_harder(ImageClass class, const char *name_or_path, Image **ret);
+int image_discover(ImageClass class, Hashmap *map);
+
+int image_remove(Image *i);
+int image_rename(Image *i, const char *new_name);
+int image_clone(Image *i, const char *new_name, bool read_only);
+int image_read_only(Image *i, bool b);
+
+const char* image_type_to_string(ImageType t) _const_;
+ImageType image_type_from_string(const char *s) _pure_;
+
+bool image_name_is_valid(const char *s) _pure_;
+
+int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local);
+int image_name_lock(const char *name, int operation, LockFile *ret);
+
+int image_set_limit(Image *i, uint64_t referenced_max);
+
+int image_read_metadata(Image *i);
+
+bool image_in_search_path(ImageClass class, const char *image);
+
+static inline bool IMAGE_IS_HIDDEN(const struct Image *i) {
+ assert(i);
+
+ return i->name && i->name[0] == '.';
+}
+
+static inline bool IMAGE_IS_VENDOR(const struct Image *i) {
+ assert(i);
+
+ return i->path && path_startswith(i->path, "/usr");
+}
+
+static inline bool IMAGE_IS_HOST(const struct Image *i) {
+ assert(i);
+
+ if (i->name && streq(i->name, ".host"))
+ return true;
+
+ if (i->path && path_equal(i->path, "/"))
+ return true;
+
+ return false;
+}
+
+extern const struct hash_ops image_hash_ops;
diff --git a/src/shared/machine-pool.c b/src/shared/machine-pool.c
new file mode 100644
index 0000000..de4f704
--- /dev/null
+++ b/src/shared/machine-pool.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/statfs.h>
+
+#include "btrfs-util.h"
+#include "label.h"
+#include "machine-pool.h"
+#include "missing.h"
+#include "stat-util.h"
+
+static int check_btrfs(void) {
+ struct statfs sfs;
+
+ if (statfs("/var/lib/machines", &sfs) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ if (statfs("/var/lib", &sfs) < 0)
+ return -errno;
+ }
+
+ return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
+}
+
+int setup_machine_directory(sd_bus_error *error) {
+ int r;
+
+ r = check_btrfs();
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to determine whether /var/lib/machines is located on btrfs: %m");
+ if (r == 0)
+ return 0;
+
+ (void) btrfs_subvol_make_label("/var/lib/machines");
+
+ r = btrfs_quota_enable("/var/lib/machines", true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable quota for /var/lib/machines, ignoring: %m");
+
+ r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, ignoring: %m");
+
+ return 1;
+}
diff --git a/src/shared/machine-pool.h b/src/shared/machine-pool.h
new file mode 100644
index 0000000..6f59a18
--- /dev/null
+++ b/src/shared/machine-pool.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdint.h>
+
+#include "sd-bus.h"
+
+int setup_machine_directory(sd_bus_error *error);
diff --git a/src/shared/main-func.h b/src/shared/main-func.h
new file mode 100644
index 0000000..3c182e8
--- /dev/null
+++ b/src/shared/main-func.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdlib.h>
+
+#include "pager.h"
+#include "selinux-util.h"
+#include "spawn-ask-password-agent.h"
+#include "spawn-polkit-agent.h"
+#include "static-destruct.h"
+
+#define _DEFINE_MAIN_FUNCTION(intro, impl, ret) \
+ int main(int argc, char *argv[]) { \
+ int r; \
+ intro; \
+ r = impl; \
+ static_destruct(); \
+ ask_password_agent_close(); \
+ polkit_agent_close(); \
+ mac_selinux_finish(); \
+ pager_close(); \
+ return ret; \
+ }
+
+/* Negative return values from impl are mapped to EXIT_FAILURE, and
+ * everything else means success! */
+#define DEFINE_MAIN_FUNCTION(impl) \
+ _DEFINE_MAIN_FUNCTION(,impl(argc, argv), r < 0 ? EXIT_FAILURE : EXIT_SUCCESS)
+
+/* Zero is mapped to EXIT_SUCCESS, negative values are mapped to EXIT_FAILURE,
+ * and postive values are propagated.
+ * Note: "true" means failure! */
+#define DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(impl) \
+ _DEFINE_MAIN_FUNCTION(,impl(argc, argv), r < 0 ? EXIT_FAILURE : r)
diff --git a/src/shared/meson.build b/src/shared/meson.build
new file mode 100644
index 0000000..99d6ba1
--- /dev/null
+++ b/src/shared/meson.build
@@ -0,0 +1,277 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+shared_sources = files('''
+ acl-util.h
+ acpi-fpdt.c
+ acpi-fpdt.h
+ apparmor-util.c
+ apparmor-util.h
+ ask-password-api.c
+ ask-password-api.h
+ barrier.c
+ barrier.h
+ base-filesystem.c
+ base-filesystem.h
+ bitmap.c
+ bitmap.h
+ blkid-util.h
+ boot-timestamps.c
+ boot-timestamps.h
+ bootspec.c
+ bootspec.h
+ bpf-program.c
+ bpf-program.h
+ bus-unit-util.c
+ bus-unit-util.h
+ bus-util.c
+ bus-util.h
+ calendarspec.c
+ calendarspec.h
+ cgroup-show.c
+ cgroup-show.h
+ clean-ipc.c
+ clean-ipc.h
+ clock-util.c
+ clock-util.h
+ condition.c
+ condition.h
+ conf-parser.c
+ conf-parser.h
+ cpu-set-util.c
+ cpu-set-util.h
+ crypt-util.c
+ crypt-util.h
+ daemon-util.h
+ dev-setup.c
+ dev-setup.h
+ dissect-image.c
+ dissect-image.h
+ dns-domain.c
+ dns-domain.h
+ dropin.c
+ dropin.h
+ efivars.c
+ efivars.h
+ enable-mempool.c
+ env-file-label.c
+ env-file-label.h
+ exec-util.c
+ exec-util.h
+ exit-status.c
+ exit-status.h
+ fdset.c
+ fdset.h
+ fileio-label.c
+ fileio-label.h
+ firewall-util.h
+ format-table.c
+ format-table.h
+ fstab-util.c
+ fstab-util.h
+ generator.c
+ generator.h
+ gpt.h
+ id128-print.c
+ id128-print.h
+ ima-util.c
+ ima-util.h
+ import-util.c
+ import-util.h
+ initreq.h
+ install-printf.c
+ install-printf.h
+ install.c
+ install.h
+ ip-protocol-list.c
+ ip-protocol-list.h
+ journal-importer.c
+ journal-importer.h
+ journal-util.c
+ journal-util.h
+ json-internal.h
+ json.c
+ json.h
+ lockfile-util.c
+ lockfile-util.h
+ logs-show.c
+ logs-show.h
+ loop-util.c
+ loop-util.h
+ machine-image.c
+ machine-image.h
+ machine-pool.c
+ machine-pool.h
+ main-func.h
+ module-util.h
+ mount-util.c
+ mount-util.h
+ nscd-flush.c
+ nscd-flush.h
+ nsflags.c
+ nsflags.h
+ os-util.c
+ os-util.h
+ output-mode.c
+ output-mode.h
+ pager.c
+ pager.h
+ path-lookup.c
+ path-lookup.h
+ pretty-print.c
+ pretty-print.h
+ ptyfwd.c
+ ptyfwd.h
+ reboot-util.c
+ reboot-util.h
+ resolve-util.c
+ resolve-util.h
+ seccomp-util.h
+ securebits-util.c
+ securebits-util.h
+ serialize.c
+ serialize.h
+ sleep-config.c
+ sleep-config.h
+ spawn-ask-password-agent.c
+ spawn-ask-password-agent.h
+ spawn-polkit-agent.c
+ spawn-polkit-agent.h
+ specifier.c
+ specifier.h
+ switch-root.c
+ switch-root.h
+ sysctl-util.c
+ sysctl-util.h
+ tmpfile-util-label.c
+ tmpfile-util-label.h
+ tomoyo-util.c
+ tomoyo-util.h
+ udev-util.c
+ udev-util.h
+ uid-range.c
+ uid-range.h
+ utmp-wtmp.h
+ verbs.c
+ verbs.h
+ vlan-util.c
+ vlan-util.h
+ volatile-util.c
+ volatile-util.h
+ watchdog.c
+ watchdog.h
+ web-util.c
+ web-util.h
+ wireguard-netlink.h
+ xml.c
+ xml.h
+'''.split())
+
+if get_option('tests') != 'false'
+ shared_sources += files('tests.c', 'tests.h')
+endif
+
+test_tables_h = files('test-tables.h')
+shared_sources += test_tables_h
+
+if conf.get('HAVE_ACL') == 1
+ shared_sources += files('acl-util.c')
+endif
+
+if conf.get('ENABLE_UTMP') == 1
+ shared_sources += files('utmp-wtmp.c')
+endif
+
+if conf.get('HAVE_SECCOMP') == 1
+ shared_sources += files('seccomp-util.c')
+endif
+
+if conf.get('HAVE_LIBIPTC') == 1
+ shared_sources += files('firewall-util.c')
+endif
+
+if conf.get('HAVE_KMOD') == 1
+ shared_sources += files('module-util.c')
+endif
+
+generate_ip_protocol_list = find_program('generate-ip-protocol-list.sh')
+ip_protocol_list_txt = custom_target(
+ 'ip-protocol-list.txt',
+ output : 'ip-protocol-list.txt',
+ command : [generate_ip_protocol_list, cpp],
+ capture : true)
+
+fname = 'ip-protocol-from-name.gperf'
+gperf_file = custom_target(
+ fname,
+ input : ip_protocol_list_txt,
+ output : fname,
+ command : [generate_gperfs, 'ip_protocol', 'IPPROTO_', '@INPUT@'],
+ capture : true)
+
+fname = 'ip-protocol-from-name.h'
+target1 = custom_target(
+ fname,
+ input : gperf_file,
+ output : fname,
+ command : [gperf,
+ '-L', 'ANSI-C', '-t', '--ignore-case',
+ '-N', 'lookup_ip_protocol',
+ '-H', 'hash_ip_protocol_name',
+ '-p', '-C',
+ '@INPUT@'],
+ capture : true)
+
+fname = 'ip-protocol-to-name.h'
+awkscript = 'ip-protocol-to-name.awk'
+target2 = custom_target(
+ fname,
+ input : [awkscript, ip_protocol_list_txt],
+ output : fname,
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+shared_generated_gperf_headers = [target1, target2]
+shared_sources += shared_generated_gperf_headers
+
+libshared_name = 'systemd-shared-@0@'.format(meson.project_version())
+
+libshared_deps = [threads,
+ librt,
+ libcap,
+ libacl,
+ libcryptsetup,
+ libgcrypt,
+ libiptc,
+ libkmod,
+ libmount,
+ libseccomp,
+ libselinux,
+ libidn,
+ libxz,
+ liblz4,
+ libblkid]
+
+libshared_sym_path = '@0@/libshared.sym'.format(meson.current_source_dir())
+
+libshared_static = static_library(
+ libshared_name,
+ shared_sources,
+ include_directories : includes,
+ dependencies : libshared_deps,
+ c_args : ['-fvisibility=default'])
+
+libshared = shared_library(
+ libshared_name,
+ libudev_sources,
+ include_directories : includes,
+ link_args : ['-shared',
+ '-Wl,--version-script=' + libshared_sym_path],
+ link_whole : [libshared_static,
+ libbasic,
+ libbasic_gcrypt,
+ libsystemd_static,
+ libjournal_client],
+ c_args : ['-fvisibility=default'],
+ dependencies : libshared_deps,
+ install : true,
+ install_dir : rootlibexecdir)
diff --git a/src/shared/module-util.c b/src/shared/module-util.c
new file mode 100644
index 0000000..a34fe8f
--- /dev/null
+++ b/src/shared/module-util.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "module-util.h"
+
+int module_load_and_warn(struct kmod_ctx *ctx, const char *module, bool verbose) {
+ const int probe_flags = KMOD_PROBE_APPLY_BLACKLIST;
+ struct kmod_list *itr;
+ _cleanup_(kmod_module_unref_listp) struct kmod_list *modlist = NULL;
+ int r = 0;
+
+ /* verbose==true means we should log at non-debug level if we
+ * fail to find or load the module. */
+
+ log_debug("Loading module: %s", module);
+
+ r = kmod_module_new_from_lookup(ctx, module, &modlist);
+ if (r < 0)
+ return log_full_errno(verbose ? LOG_ERR : LOG_DEBUG, r,
+ "Failed to lookup module alias '%s': %m", module);
+
+ if (!modlist) {
+ log_full_errno(verbose ? LOG_ERR : LOG_DEBUG, r,
+ "Failed to find module '%s'", module);
+ return -ENOENT;
+ }
+
+ kmod_list_foreach(itr, modlist) {
+ _cleanup_(kmod_module_unrefp) struct kmod_module *mod = NULL;
+ int state, err;
+
+ mod = kmod_module_get_module(itr);
+ state = kmod_module_get_initstate(mod);
+
+ switch (state) {
+ case KMOD_MODULE_BUILTIN:
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Module '%s' is builtin", kmod_module_get_name(mod));
+ break;
+
+ case KMOD_MODULE_LIVE:
+ log_debug("Module '%s' is already loaded", kmod_module_get_name(mod));
+ break;
+
+ default:
+ err = kmod_module_probe_insert_module(mod, probe_flags,
+ NULL, NULL, NULL, NULL);
+ if (err == 0)
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Inserted module '%s'", kmod_module_get_name(mod));
+ else if (err == KMOD_PROBE_APPLY_BLACKLIST)
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Module '%s' is blacklisted", kmod_module_get_name(mod));
+ else {
+ assert(err < 0);
+
+ log_full_errno(!verbose ? LOG_DEBUG :
+ err == -ENODEV ? LOG_NOTICE :
+ err == -ENOENT ? LOG_WARNING :
+ LOG_ERR,
+ err,
+ "Failed to insert module '%s': %m",
+ kmod_module_get_name(mod));
+ if (!IN_SET(err, -ENODEV, -ENOENT))
+ r = err;
+ }
+ }
+ }
+
+ return r;
+}
diff --git a/src/shared/module-util.h b/src/shared/module-util.h
new file mode 100644
index 0000000..c386c5b
--- /dev/null
+++ b/src/shared/module-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <libkmod.h>
+
+#include "macro.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_ctx*, kmod_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_module*, kmod_module_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_list*, kmod_module_unref_list);
+
+int module_load_and_warn(struct kmod_ctx *ctx, const char *module, bool verbose);
diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c
new file mode 100644
index 0000000..9fa995f
--- /dev/null
+++ b/src/shared/mount-util.c
@@ -0,0 +1,570 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+/* Include later */
+#include <libmount.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int umount_recursive(const char *prefix, int flags) {
+ bool again;
+ int n = 0, r;
+
+ /* Try to umount everything recursively below a
+ * directory. Also, take care of stacked mounts, and keep
+ * unmounting them until they are gone. */
+
+ do {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+
+ again = false;
+ r = 0;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return -errno;
+
+ (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
+
+ for (;;) {
+ _cleanup_free_ char *path = NULL, *p = NULL;
+ int k;
+
+ k = fscanf(proc_self_mountinfo,
+ "%*s " /* (1) mount id */
+ "%*s " /* (2) parent id */
+ "%*s " /* (3) major:minor */
+ "%*s " /* (4) root */
+ "%ms " /* (5) mount point */
+ "%*s" /* (6) mount options */
+ "%*[^-]" /* (7) optional fields */
+ "- " /* (8) separator */
+ "%*s " /* (9) file system type */
+ "%*s" /* (10) mount source */
+ "%*s" /* (11) mount options 2 */
+ "%*[^\n]", /* some rubbish at the end */
+ &path);
+ if (k != 1) {
+ if (k == EOF)
+ break;
+
+ continue;
+ }
+
+ r = cunescape(path, UNESCAPE_RELAX, &p);
+ if (r < 0)
+ return r;
+
+ if (!path_startswith(p, prefix))
+ continue;
+
+ if (umount2(p, flags) < 0) {
+ r = log_debug_errno(errno, "Failed to umount %s: %m", p);
+ continue;
+ }
+
+ log_debug("Successfully unmounted %s", p);
+
+ again = true;
+ n++;
+
+ break;
+ }
+
+ } while (again);
+
+ return r ? r : n;
+}
+
+static int get_mount_flags(const char *path, unsigned long *flags) {
+ struct statvfs buf;
+
+ if (statvfs(path, &buf) < 0)
+ return -errno;
+ *flags = buf.f_flag;
+ return 0;
+}
+
+/* Use this function only if do you have direct access to /proc/self/mountinfo
+ * and need the caller to open it for you. This is the case when /proc is
+ * masked or not mounted. Otherwise, use bind_remount_recursive. */
+int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
+ _cleanup_set_free_free_ Set *done = NULL;
+ _cleanup_free_ char *cleaned = NULL;
+ int r;
+
+ assert(proc_self_mountinfo);
+
+ /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
+ * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
+ * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
+ * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
+ * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
+ * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
+ * future submounts that have been triggered via autofs.
+ *
+ * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
+ * remount operation. Note that we'll ignore the blacklist for the top-level path. */
+
+ cleaned = strdup(prefix);
+ if (!cleaned)
+ return -ENOMEM;
+
+ path_simplify(cleaned, false);
+
+ done = set_new(&path_hash_ops);
+ if (!done)
+ return -ENOMEM;
+
+ for (;;) {
+ _cleanup_set_free_free_ Set *todo = NULL;
+ bool top_autofs = false;
+ char *x;
+ unsigned long orig_flags;
+
+ todo = set_new(&path_hash_ops);
+ if (!todo)
+ return -ENOMEM;
+
+ rewind(proc_self_mountinfo);
+
+ for (;;) {
+ _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
+ int k;
+
+ k = fscanf(proc_self_mountinfo,
+ "%*s " /* (1) mount id */
+ "%*s " /* (2) parent id */
+ "%*s " /* (3) major:minor */
+ "%*s " /* (4) root */
+ "%ms " /* (5) mount point */
+ "%*s" /* (6) mount options (superblock) */
+ "%*[^-]" /* (7) optional fields */
+ "- " /* (8) separator */
+ "%ms " /* (9) file system type */
+ "%*s" /* (10) mount source */
+ "%*s" /* (11) mount options (bind mount) */
+ "%*[^\n]", /* some rubbish at the end */
+ &path,
+ &type);
+ if (k != 2) {
+ if (k == EOF)
+ break;
+
+ continue;
+ }
+
+ r = cunescape(path, UNESCAPE_RELAX, &p);
+ if (r < 0)
+ return r;
+
+ if (!path_startswith(p, cleaned))
+ continue;
+
+ /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
+ * operate on. */
+ if (!path_equal(cleaned, p)) {
+ bool blacklisted = false;
+ char **i;
+
+ STRV_FOREACH(i, blacklist) {
+
+ if (path_equal(*i, cleaned))
+ continue;
+
+ if (!path_startswith(*i, cleaned))
+ continue;
+
+ if (path_startswith(p, *i)) {
+ blacklisted = true;
+ log_debug("Not remounting %s blacklisted by %s, called for %s", p, *i, cleaned);
+ break;
+ }
+ }
+ if (blacklisted)
+ continue;
+ }
+
+ /* Let's ignore autofs mounts. If they aren't
+ * triggered yet, we want to avoid triggering
+ * them, as we don't make any guarantees for
+ * future submounts anyway. If they are
+ * already triggered, then we will find
+ * another entry for this. */
+ if (streq(type, "autofs")) {
+ top_autofs = top_autofs || path_equal(cleaned, p);
+ continue;
+ }
+
+ if (!set_contains(done, p)) {
+ r = set_consume(todo, p);
+ p = NULL;
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* If we have no submounts to process anymore and if
+ * the root is either already done, or an autofs, we
+ * are done */
+ if (set_isempty(todo) &&
+ (top_autofs || set_contains(done, cleaned)))
+ return 0;
+
+ if (!set_contains(done, cleaned) &&
+ !set_contains(todo, cleaned)) {
+ /* The prefix directory itself is not yet a mount, make it one. */
+ if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
+ return -errno;
+
+ orig_flags = 0;
+ (void) get_mount_flags(cleaned, &orig_flags);
+ orig_flags &= ~MS_RDONLY;
+
+ if (mount(NULL, cleaned, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
+ return -errno;
+
+ log_debug("Made top-level directory %s a mount point.", prefix);
+
+ x = strdup(cleaned);
+ if (!x)
+ return -ENOMEM;
+
+ r = set_consume(done, x);
+ if (r < 0)
+ return r;
+ }
+
+ while ((x = set_steal_first(todo))) {
+
+ r = set_consume(done, x);
+ if (IN_SET(r, 0, -EEXIST))
+ continue;
+ if (r < 0)
+ return r;
+
+ /* Deal with mount points that are obstructed by a later mount */
+ r = path_is_mount_point(x, NULL, 0);
+ if (IN_SET(r, 0, -ENOENT))
+ continue;
+ if (IN_SET(r, -EACCES, -EPERM)) {
+ /* Even if root user invoke this, submounts under private FUSE or NFS mount points
+ * may not be acceessed. E.g.,
+ *
+ * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt
+ * $ bindfs --no-allow-other ~/mnt ~/mnt
+ *
+ * Then, root user cannot access the mount point ~/mnt/mnt.
+ * In such cases, the submounts are ignored, as we have no way to manage them. */
+ log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x);
+ continue;
+ }
+ if (r < 0)
+ return r;
+
+ /* Try to reuse the original flag set */
+ orig_flags = 0;
+ (void) get_mount_flags(x, &orig_flags);
+ orig_flags &= ~MS_RDONLY;
+
+ if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
+ return -errno;
+
+ log_debug("Remounted %s read-only.", x);
+ }
+ }
+}
+
+int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return -errno;
+
+ (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
+
+ return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
+}
+
+int mount_move_root(const char *path) {
+ assert(path);
+
+ if (chdir(path) < 0)
+ return -errno;
+
+ if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
+ return -errno;
+
+ if (chroot(".") < 0)
+ return -errno;
+
+ if (chdir("/") < 0)
+ return -errno;
+
+ return 0;
+}
+
+int repeat_unmount(const char *path, int flags) {
+ bool done = false;
+
+ assert(path);
+
+ /* If there are multiple mounts on a mount point, this
+ * removes them all */
+
+ for (;;) {
+ if (umount2(path, flags) < 0) {
+
+ if (errno == EINVAL)
+ return done;
+
+ return -errno;
+ }
+
+ done = true;
+ }
+}
+
+const char* mode_to_inaccessible_node(mode_t mode) {
+ /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
+ * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
+ * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
+ * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
+ * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
+ * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
+
+ switch(mode & S_IFMT) {
+ case S_IFREG:
+ return "/run/systemd/inaccessible/reg";
+
+ case S_IFDIR:
+ return "/run/systemd/inaccessible/dir";
+
+ case S_IFCHR:
+ if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
+ return "/run/systemd/inaccessible/chr";
+ return "/run/systemd/inaccessible/sock";
+
+ case S_IFBLK:
+ if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
+ return "/run/systemd/inaccessible/blk";
+ return "/run/systemd/inaccessible/sock";
+
+ case S_IFIFO:
+ return "/run/systemd/inaccessible/fifo";
+
+ case S_IFSOCK:
+ return "/run/systemd/inaccessible/sock";
+ }
+ return NULL;
+}
+
+#define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
+static char* mount_flags_to_string(long unsigned flags) {
+ char *x;
+ _cleanup_free_ char *y = NULL;
+ long unsigned overflow;
+
+ overflow = flags & ~(MS_RDONLY |
+ MS_NOSUID |
+ MS_NODEV |
+ MS_NOEXEC |
+ MS_SYNCHRONOUS |
+ MS_REMOUNT |
+ MS_MANDLOCK |
+ MS_DIRSYNC |
+ MS_NOATIME |
+ MS_NODIRATIME |
+ MS_BIND |
+ MS_MOVE |
+ MS_REC |
+ MS_SILENT |
+ MS_POSIXACL |
+ MS_UNBINDABLE |
+ MS_PRIVATE |
+ MS_SLAVE |
+ MS_SHARED |
+ MS_RELATIME |
+ MS_KERNMOUNT |
+ MS_I_VERSION |
+ MS_STRICTATIME |
+ MS_LAZYTIME);
+
+ if (flags == 0 || overflow != 0)
+ if (asprintf(&y, "%lx", overflow) < 0)
+ return NULL;
+
+ x = strjoin(FLAG(MS_RDONLY),
+ FLAG(MS_NOSUID),
+ FLAG(MS_NODEV),
+ FLAG(MS_NOEXEC),
+ FLAG(MS_SYNCHRONOUS),
+ FLAG(MS_REMOUNT),
+ FLAG(MS_MANDLOCK),
+ FLAG(MS_DIRSYNC),
+ FLAG(MS_NOATIME),
+ FLAG(MS_NODIRATIME),
+ FLAG(MS_BIND),
+ FLAG(MS_MOVE),
+ FLAG(MS_REC),
+ FLAG(MS_SILENT),
+ FLAG(MS_POSIXACL),
+ FLAG(MS_UNBINDABLE),
+ FLAG(MS_PRIVATE),
+ FLAG(MS_SLAVE),
+ FLAG(MS_SHARED),
+ FLAG(MS_RELATIME),
+ FLAG(MS_KERNMOUNT),
+ FLAG(MS_I_VERSION),
+ FLAG(MS_STRICTATIME),
+ FLAG(MS_LAZYTIME),
+ y);
+ if (!x)
+ return NULL;
+ if (!y)
+ x[strlen(x) - 1] = '\0'; /* truncate the last | */
+ return x;
+}
+
+int mount_verbose(
+ int error_log_level,
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options) {
+
+ _cleanup_free_ char *fl = NULL, *o = NULL;
+ unsigned long f;
+ int r;
+
+ r = mount_option_mangle(options, flags, &f, &o);
+ if (r < 0)
+ return log_full_errno(error_log_level, r,
+ "Failed to mangle mount options %s: %m",
+ strempty(options));
+
+ fl = mount_flags_to_string(f);
+
+ if ((f & MS_REMOUNT) && !what && !type)
+ log_debug("Remounting %s (%s \"%s\")...",
+ where, strnull(fl), strempty(o));
+ else if (!what && !type)
+ log_debug("Mounting %s (%s \"%s\")...",
+ where, strnull(fl), strempty(o));
+ else if ((f & MS_BIND) && !type)
+ log_debug("Bind-mounting %s on %s (%s \"%s\")...",
+ what, where, strnull(fl), strempty(o));
+ else if (f & MS_MOVE)
+ log_debug("Moving mount %s → %s (%s \"%s\")...",
+ what, where, strnull(fl), strempty(o));
+ else
+ log_debug("Mounting %s on %s (%s \"%s\")...",
+ strna(type), where, strnull(fl), strempty(o));
+ if (mount(what, where, type, f, o) < 0)
+ return log_full_errno(error_log_level, errno,
+ "Failed to mount %s (type %s) on %s (%s \"%s\"): %m",
+ strna(what), strna(type), where, strnull(fl), strempty(o));
+ return 0;
+}
+
+int umount_verbose(const char *what) {
+ log_debug("Umounting %s...", what);
+ if (umount(what) < 0)
+ return log_error_errno(errno, "Failed to unmount %s: %m", what);
+ return 0;
+}
+
+int mount_option_mangle(
+ const char *options,
+ unsigned long mount_flags,
+ unsigned long *ret_mount_flags,
+ char **ret_remaining_options) {
+
+ const struct libmnt_optmap *map;
+ _cleanup_free_ char *ret = NULL;
+ const char *p;
+ int r;
+
+ /* This extracts mount flags from the mount options, and store
+ * non-mount-flag options to '*ret_remaining_options'.
+ * E.g.,
+ * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
+ * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
+ * "size=1630748k,mode=700,uid=1000,gid=1000".
+ * See more examples in test-mount-utils.c.
+ *
+ * Note that if 'options' does not contain any non-mount-flag options,
+ * then '*ret_remaining_options' is set to NULL instread of empty string.
+ * Note that this does not check validity of options stored in
+ * '*ret_remaining_options'.
+ * Note that if 'options' is NULL, then this just copies 'mount_flags'
+ * to '*ret_mount_flags'. */
+
+ assert(ret_mount_flags);
+ assert(ret_remaining_options);
+
+ map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
+ if (!map)
+ return -EINVAL;
+
+ p = options;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ const struct libmnt_optmap *ent;
+
+ r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ for (ent = map; ent->name; ent++) {
+ /* All entries in MNT_LINUX_MAP do not take any argument.
+ * Thus, ent->name does not contain "=" or "[=]". */
+ if (!streq(word, ent->name))
+ continue;
+
+ if (!(ent->mask & MNT_INVERT))
+ mount_flags |= ent->id;
+ else if (mount_flags & ent->id)
+ mount_flags ^= ent->id;
+
+ break;
+ }
+
+ /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
+ if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
+ return -ENOMEM;
+ }
+
+ *ret_mount_flags = mount_flags;
+ *ret_remaining_options = TAKE_PTR(ret);
+
+ return 0;
+}
diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h
new file mode 100644
index 0000000..00df1b0
--- /dev/null
+++ b/src/shared/mount-util.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <mntent.h>
+#include <stdio.h>
+
+#include "macro.h"
+
+int repeat_unmount(const char *path, int flags);
+int umount_recursive(const char *target, int flags);
+int bind_remount_recursive(const char *prefix, bool ro, char **blacklist);
+int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo);
+
+int mount_move_root(const char *path);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, endmntent);
+#define _cleanup_endmntent_ _cleanup_(endmntentp)
+
+int mount_verbose(
+ int error_log_level,
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options);
+int umount_verbose(const char *where);
+
+int mount_option_mangle(
+ const char *options,
+ unsigned long mount_flags,
+ unsigned long *ret_mount_flags,
+ char **ret_remaining_options);
+
+const char* mode_to_inaccessible_node(mode_t mode);
diff --git a/src/shared/nscd-flush.c b/src/shared/nscd-flush.c
new file mode 100644
index 0000000..5a04468
--- /dev/null
+++ b/src/shared/nscd-flush.c
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#include <sys/poll.h>
+
+#include "fd-util.h"
+#include "io-util.h"
+#include "nscd-flush.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+#define NSCD_FLUSH_CACHE_TIMEOUT_USEC (5*USEC_PER_SEC)
+
+struct nscdInvalidateRequest {
+ int32_t version;
+ int32_t type; /* in glibc this is an enum. We don't replicate this here 1:1. Also, wtf, how unportable is that
+ * even? */
+ int32_t key_len;
+ char dbname[];
+};
+
+static const union sockaddr_union nscd_sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/nscd/socket",
+};
+
+static int nscd_flush_cache_one(const char *database, usec_t end) {
+ size_t req_size, has_written = 0, has_read = 0, l;
+ struct nscdInvalidateRequest *req;
+ _cleanup_close_ int fd = -1;
+ int32_t resp;
+ int events;
+
+ assert(database);
+
+ l = strlen(database);
+ req_size = offsetof(struct nscdInvalidateRequest, dbname) + l + 1;
+
+ req = alloca(req_size);
+ *req = (struct nscdInvalidateRequest) {
+ .version = 2,
+ .type = 10,
+ .key_len = l + 1,
+ };
+
+ strcpy(req->dbname, database);
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_debug_errno(errno, "Failed to allocate nscd socket: %m");
+
+ /* Note: connect() returns EINPROGRESS if O_NONBLOCK is set and establishing a connection takes time. The
+ * kernel lets us know this way that the connection is now being established, and we should watch with poll()
+ * to learn when it is fully established. That said, AF_UNIX on Linux never triggers this IRL (connect() is
+ * always instant on AF_UNIX), hence handling this is mostly just an excercise in defensive, protocol-agnostic
+ * programming.
+ *
+ * connect() returns EAGAIN if the socket's backlog limit has been reached. When we see this we give up right
+ * away, after all this entire function here is written in a defensive style so that a non-responding nscd
+ * doesn't stall us for good. (Even if we wanted to handle this better: the Linux kernel doesn't really have a
+ * nice way to connect() to a server synchronously with a time limit that would also cover dealing with the
+ * backlog limit. After all SO_RCVTIMEO and SR_SNDTIMEO don't apply to connect(), and alarm() is frickin' ugly
+ * and not really reasonably usable from threads-aware code.) */
+ if (connect(fd, &nscd_sa.sa, SOCKADDR_UN_LEN(nscd_sa.un)) < 0) {
+ if (errno == EAGAIN)
+ return log_debug_errno(errno, "nscd is overloaded (backlog limit reached) and refuses to take further connections: %m");
+ if (errno != EINPROGRESS)
+ return log_debug_errno(errno, "Failed to connect to nscd socket: %m");
+
+ /* Continue in case of EINPROGRESS, but don't bother with send() or recv() until being notified that
+ * establishing the connection is complete. */
+ events = 0;
+ } else
+ events = POLLIN|POLLOUT; /* Let's assume initially that we can write and read to the fd, to suppress
+ * one poll() invocation */
+ for (;;) {
+ usec_t p;
+
+ if (events & POLLOUT) {
+ ssize_t m;
+
+ assert(has_written < req_size);
+
+ m = send(fd, (uint8_t*) req + has_written, req_size - has_written, MSG_NOSIGNAL);
+ if (m < 0) {
+ if (errno != EAGAIN) /* Note that EAGAIN is returned by the kernel whenever it can't
+ * take the data right now, and that includes if the connect() is
+ * asynchronous and we saw EINPROGRESS on it, and it hasn't
+ * completed yet. */
+ return log_debug_errno(errno, "Failed to write to nscd socket: %m");
+ } else
+ has_written += m;
+ }
+
+ if (events & (POLLIN|POLLERR|POLLHUP)) {
+ ssize_t m;
+
+ if (has_read >= sizeof(resp))
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Response from nscd longer than expected: %m");
+
+ m = recv(fd, (uint8_t*) &resp + has_read, sizeof(resp) - has_read, 0);
+ if (m < 0) {
+ if (errno != EAGAIN)
+ return log_debug_errno(errno, "Failed to read from nscd socket: %m");
+ } else if (m == 0) { /* EOF */
+ if (has_read == 0 && has_written >= req_size) /* Older nscd immediately terminated the
+ * connection, accept that as OK */
+ return 1;
+
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "nscd prematurely ended connection.");
+ } else
+ has_read += m;
+ }
+
+ if (has_written >= req_size && has_read >= sizeof(resp)) { /* done? */
+ if (resp < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "nscd sent us a negative error numer: %i", resp);
+ if (resp > 0)
+ return log_debug_errno(resp, "nscd return failure code on invalidating '%s'.", database);
+ return 1;
+ }
+
+ p = now(CLOCK_MONOTONIC);
+ if (p >= end)
+ return -ETIMEDOUT;
+
+ events = fd_wait_for_event(fd, POLLIN | (has_written < req_size ? POLLOUT : 0), end - p);
+ if (events < 0)
+ return events;
+ }
+}
+
+int nscd_flush_cache(char **databases) {
+ usec_t end;
+ int r = 0;
+ char **i;
+
+ /* Tries to invalidate the specified database in nscd. We do this carefully, with a 5s time-out, so that we
+ * don't block indefinitely on another service. */
+
+ end = usec_add(now(CLOCK_MONOTONIC), NSCD_FLUSH_CACHE_TIMEOUT_USEC);
+
+ STRV_FOREACH(i, databases) {
+ int k;
+
+ k = nscd_flush_cache_one(*i, end);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
diff --git a/src/shared/nscd-flush.h b/src/shared/nscd-flush.h
new file mode 100644
index 0000000..22774bf
--- /dev/null
+++ b/src/shared/nscd-flush.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int nscd_flush_cache(char **databases);
diff --git a/src/shared/nsflags.c b/src/shared/nsflags.c
new file mode 100644
index 0000000..8cc2d08
--- /dev/null
+++ b/src/shared/nsflags.c
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "nsflags.h"
+#include "string-util.h"
+
+const struct namespace_flag_map namespace_flag_map[] = {
+ { CLONE_NEWCGROUP, "cgroup" },
+ { CLONE_NEWIPC, "ipc" },
+ { CLONE_NEWNET, "net" },
+ /* So, the mount namespace flag is called CLONE_NEWNS for historical reasons. Let's expose it here under a more
+ * explanatory name: "mnt". This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
+ { CLONE_NEWNS, "mnt" },
+ { CLONE_NEWPID, "pid" },
+ { CLONE_NEWUSER, "user" },
+ { CLONE_NEWUTS, "uts" },
+ {}
+};
+
+int namespace_flags_from_string(const char *name, unsigned long *ret) {
+ unsigned long flags = 0;
+ int r;
+
+ assert_se(ret);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ unsigned long f = 0;
+ unsigned i;
+
+ r = extract_first_word(&name, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ for (i = 0; namespace_flag_map[i].name; i++)
+ if (streq(word, namespace_flag_map[i].name)) {
+ f = namespace_flag_map[i].flag;
+ break;
+ }
+
+ if (f == 0)
+ return -EINVAL;
+
+ flags |= f;
+ }
+
+ *ret = flags;
+ return 0;
+}
+
+int namespace_flags_to_string(unsigned long flags, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ unsigned i;
+
+ for (i = 0; namespace_flag_map[i].name; i++) {
+ if ((flags & namespace_flag_map[i].flag) != namespace_flag_map[i].flag)
+ continue;
+
+ if (!strextend_with_separator(&s, " ", namespace_flag_map[i].name, NULL))
+ return -ENOMEM;
+ }
+
+ if (!s) {
+ s = strdup("");
+ if (!s)
+ return -ENOMEM;
+ }
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
diff --git a/src/shared/nsflags.h b/src/shared/nsflags.h
new file mode 100644
index 0000000..0aeb0bc
--- /dev/null
+++ b/src/shared/nsflags.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "missing_sched.h"
+
+/* The combination of all namespace flags defined by the kernel. The right type for this isn't clear. setns() and
+ * unshare() expect these flags to be passed as (signed) "int", while clone() wants them as "unsigned long". The latter
+ * is definitely more appropriate for a flags parameter, and also the larger type of the two, hence let's stick to that
+ * here. */
+#define NAMESPACE_FLAGS_ALL \
+ ((unsigned long) (CLONE_NEWCGROUP| \
+ CLONE_NEWIPC| \
+ CLONE_NEWNET| \
+ CLONE_NEWNS| \
+ CLONE_NEWPID| \
+ CLONE_NEWUSER| \
+ CLONE_NEWUTS))
+
+#define NAMESPACE_FLAGS_INITIAL ((unsigned long) -1)
+
+int namespace_flags_from_string(const char *name, unsigned long *ret);
+int namespace_flags_to_string(unsigned long flags, char **ret);
+
+struct namespace_flag_map {
+ unsigned long flag;
+ const char *name;
+};
+
+extern const struct namespace_flag_map namespace_flag_map[];
diff --git a/src/shared/os-util.c b/src/shared/os-util.c
new file mode 100644
index 0000000..b2d5ce3
--- /dev/null
+++ b/src/shared/os-util.c
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "os-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int path_is_os_tree(const char *path) {
+ int r;
+
+ assert(path);
+
+ /* Does the path exist at all? If not, generate an error immediately. This is useful so that a missing root dir
+ * always results in -ENOENT, and we can properly distuingish the case where the whole root doesn't exist from
+ * the case where just the os-release file is missing. */
+ if (laccess(path, F_OK) < 0)
+ return -errno;
+
+ /* We use {/etc|/usr/lib}/os-release as flag file if something is an OS */
+ r = open_os_release(path, NULL, NULL);
+ if (r == -ENOENT) /* We got nothing */
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int open_os_release(const char *root, char **ret_path, int *ret_fd) {
+ _cleanup_free_ char *q = NULL;
+ const char *p;
+ int k;
+
+ FOREACH_STRING(p, "/etc/os-release", "/usr/lib/os-release") {
+ k = chase_symlinks(p, root, CHASE_PREFIX_ROOT|(ret_fd ? CHASE_OPEN : 0), (ret_path ? &q : NULL));
+ if (k != -ENOENT)
+ break;
+ }
+ if (k < 0)
+ return k;
+
+ if (ret_fd) {
+ int real_fd;
+
+ /* Convert the O_PATH fd into a proper, readable one */
+ real_fd = fd_reopen(k, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ safe_close(k);
+ if (real_fd < 0)
+ return real_fd;
+
+ *ret_fd = real_fd;
+ }
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(q);
+
+ return 0;
+}
+
+int fopen_os_release(const char *root, char **ret_path, FILE **ret_file) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ FILE *f;
+ int r;
+
+ if (!ret_file)
+ return open_os_release(root, ret_path, NULL);
+
+ r = open_os_release(root, ret_path ? &p : NULL, &fd);
+ if (r < 0)
+ return r;
+
+ f = fdopen(fd, "r");
+ if (!f)
+ return -errno;
+ fd = -1;
+
+ *ret_file = f;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+
+ return 0;
+}
+
+int parse_os_release(const char *root, ...) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ r = fopen_os_release(root, &p, &f);
+ if (r < 0)
+ return r;
+
+ va_start(ap, root);
+ r = parse_env_filev(f, p, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int load_os_release_pairs(const char *root, char ***ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = fopen_os_release(root, &p, &f);
+ if (r < 0)
+ return r;
+
+ return load_env_file_pairs(f, p, ret);
+}
diff --git a/src/shared/os-util.h b/src/shared/os-util.h
new file mode 100644
index 0000000..27ec7ac
--- /dev/null
+++ b/src/shared/os-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+int path_is_os_tree(const char *path);
+
+int open_os_release(const char *root, char **ret_path, int *ret_fd);
+int fopen_os_release(const char *root, char **ret_path, FILE **ret_file);
+
+int parse_os_release(const char *root, ...) _sentinel_;
+int load_os_release_pairs(const char *root, char ***ret);
diff --git a/src/shared/output-mode.c b/src/shared/output-mode.c
new file mode 100644
index 0000000..107b345
--- /dev/null
+++ b/src/shared/output-mode.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "output-mode.h"
+#include "string-table.h"
+
+JsonFormatFlags output_mode_to_json_format_flags(OutputMode m) {
+
+ switch (m) {
+
+ case OUTPUT_JSON_SSE:
+ return JSON_FORMAT_SSE;
+
+ case OUTPUT_JSON_SEQ:
+ return JSON_FORMAT_SEQ;
+
+ case OUTPUT_JSON_PRETTY:
+ return JSON_FORMAT_PRETTY;
+
+ default:
+ return JSON_FORMAT_NEWLINE;
+ }
+}
+
+static const char *const output_mode_table[_OUTPUT_MODE_MAX] = {
+ [OUTPUT_SHORT] = "short",
+ [OUTPUT_SHORT_FULL] = "short-full",
+ [OUTPUT_SHORT_ISO] = "short-iso",
+ [OUTPUT_SHORT_ISO_PRECISE] = "short-iso-precise",
+ [OUTPUT_SHORT_PRECISE] = "short-precise",
+ [OUTPUT_SHORT_MONOTONIC] = "short-monotonic",
+ [OUTPUT_SHORT_UNIX] = "short-unix",
+ [OUTPUT_VERBOSE] = "verbose",
+ [OUTPUT_EXPORT] = "export",
+ [OUTPUT_JSON] = "json",
+ [OUTPUT_JSON_PRETTY] = "json-pretty",
+ [OUTPUT_JSON_SSE] = "json-sse",
+ [OUTPUT_JSON_SEQ] = "json-seq",
+ [OUTPUT_CAT] = "cat",
+ [OUTPUT_WITH_UNIT] = "with-unit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(output_mode, OutputMode);
diff --git a/src/shared/output-mode.h b/src/shared/output-mode.h
new file mode 100644
index 0000000..00b6032
--- /dev/null
+++ b/src/shared/output-mode.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "json.h"
+#include "macro.h"
+
+typedef enum OutputMode {
+ OUTPUT_SHORT,
+ OUTPUT_SHORT_FULL,
+ OUTPUT_SHORT_ISO,
+ OUTPUT_SHORT_ISO_PRECISE,
+ OUTPUT_SHORT_PRECISE,
+ OUTPUT_SHORT_MONOTONIC,
+ OUTPUT_SHORT_UNIX,
+ OUTPUT_VERBOSE,
+ OUTPUT_EXPORT,
+ OUTPUT_JSON,
+ OUTPUT_JSON_PRETTY,
+ OUTPUT_JSON_SSE,
+ OUTPUT_JSON_SEQ,
+ OUTPUT_CAT,
+ OUTPUT_WITH_UNIT,
+ _OUTPUT_MODE_MAX,
+ _OUTPUT_MODE_INVALID = -1
+} OutputMode;
+
+static inline bool OUTPUT_MODE_IS_JSON(OutputMode m) {
+ return IN_SET(m, OUTPUT_JSON, OUTPUT_JSON_PRETTY, OUTPUT_JSON_SSE, OUTPUT_JSON_SEQ);
+}
+
+/* The output flags definitions are shared by the logs and process tree output. Some apply to both, some only to the
+ * logs output, others only to the process tree output. */
+
+typedef enum OutputFlags {
+ OUTPUT_SHOW_ALL = 1 << 0,
+ OUTPUT_FOLLOW = 1 << 1,
+ OUTPUT_WARN_CUTOFF = 1 << 2,
+ OUTPUT_FULL_WIDTH = 1 << 3,
+ OUTPUT_COLOR = 1 << 4,
+ OUTPUT_CATALOG = 1 << 5,
+ OUTPUT_BEGIN_NEWLINE = 1 << 6,
+ OUTPUT_UTC = 1 << 7,
+ OUTPUT_KERNEL_THREADS = 1 << 8,
+ OUTPUT_NO_HOSTNAME = 1 << 9,
+} OutputFlags;
+
+JsonFormatFlags output_mode_to_json_format_flags(OutputMode m);
+
+const char* output_mode_to_string(OutputMode m) _const_;
+OutputMode output_mode_from_string(const char *s) _pure_;
diff --git a/src/shared/pager.c b/src/shared/pager.c
new file mode 100644
index 0000000..bf2597e
--- /dev/null
+++ b/src/shared/pager.c
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "copy.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "pager.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+
+static pid_t pager_pid = 0;
+
+static int stored_stdout = -1;
+static int stored_stderr = -1;
+static bool stdout_redirected = false;
+static bool stderr_redirected = false;
+
+_noreturn_ static void pager_fallback(void) {
+ int r;
+
+ r = copy_bytes(STDIN_FILENO, STDOUT_FILENO, (uint64_t) -1, 0);
+ if (r < 0) {
+ log_error_errno(r, "Internal pager failed: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+}
+
+static int no_quit_on_interrupt(int exe_name_fd, const char *less_opts) {
+ _cleanup_fclose_ FILE *file = NULL;
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ assert(exe_name_fd >= 0);
+ assert(less_opts);
+
+ /* This takes ownership of exe_name_fd */
+ file = fdopen(exe_name_fd, "r");
+ if (!file) {
+ safe_close(exe_name_fd);
+ return log_error_errno(errno, "Failed to create FILE object: %m");
+ }
+
+ /* Find the last line */
+ for (;;) {
+ _cleanup_free_ char *t = NULL;
+
+ r = read_line(file, LONG_LINE_MAX, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read from socket: %m");
+ if (r == 0)
+ break;
+
+ free_and_replace(line, t);
+ }
+
+ /* We only treat "less" specially.
+ * Return true whenever option K is *not* set. */
+ r = streq_ptr(line, "less") && !strchr(less_opts, 'K');
+
+ log_debug("Pager executable is \"%s\", options \"%s\", quit_on_interrupt: %s",
+ strnull(line), less_opts, yes_no(!r));
+ return r;
+}
+
+int pager_open(PagerFlags flags) {
+ _cleanup_close_pair_ int fd[2] = { -1, -1 }, exe_name_pipe[2] = { -1, -1 };
+ _cleanup_strv_free_ char **pager_args = NULL;
+ const char *pager, *less_opts;
+ int r;
+
+ if (flags & PAGER_DISABLE)
+ return 0;
+
+ if (pager_pid > 0)
+ return 1;
+
+ if (terminal_is_dumb())
+ return 0;
+
+ if (!is_main_thread())
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Pager invoked from wrong thread.");
+
+ pager = getenv("SYSTEMD_PAGER");
+ if (!pager)
+ pager = getenv("PAGER");
+
+ if (pager) {
+ pager_args = strv_split(pager, WHITESPACE);
+ if (!pager_args)
+ return log_oom();
+
+ /* If the pager is explicitly turned off, honour it */
+ if (strv_isempty(pager_args) || strv_equal(pager_args, STRV_MAKE("cat")))
+ return 0;
+ }
+
+ /* Determine and cache number of columns/lines before we spawn the pager so that we get the value from the
+ * actual tty */
+ (void) columns();
+ (void) lines();
+
+ if (pipe2(fd, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to create pager pipe: %m");
+
+ /* This is a pipe to feed the name of the executed pager binary into the parent */
+ if (pipe2(exe_name_pipe, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to create exe_name pipe: %m");
+
+ /* Initialize a good set of less options */
+ less_opts = getenv("SYSTEMD_LESS");
+ if (!less_opts)
+ less_opts = "FRSXMK";
+ if (flags & PAGER_JUMP_TO_END)
+ less_opts = strjoina(less_opts, " +G");
+
+ r = safe_fork("(pager)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pager_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *less_charset, *exe;
+
+ /* In the child start the pager */
+
+ if (dup2(fd[0], STDIN_FILENO) < 0) {
+ log_error_errno(errno, "Failed to duplicate file descriptor to STDIN: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close_pair(fd);
+
+ if (setenv("LESS", less_opts, 1) < 0) {
+ log_error_errno(errno, "Failed to set environment variable LESS: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* Initialize a good charset for less. This is
+ * particularly important if we output UTF-8
+ * characters. */
+ less_charset = getenv("SYSTEMD_LESSCHARSET");
+ if (!less_charset && is_locale_utf8())
+ less_charset = "utf-8";
+ if (less_charset &&
+ setenv("LESSCHARSET", less_charset, 1) < 0) {
+ log_error_errno(errno, "Failed to set environment variable LESSCHARSET: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (pager_args) {
+ r = loop_write(exe_name_pipe[1], pager_args[0], strlen(pager_args[0]) + 1, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write pager name to socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ execvp(pager_args[0], pager_args);
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to execute '%s', using fallback pagers: %m", pager_args[0]);
+ }
+
+ /* Debian's alternatives command for pagers is
+ * called 'pager'. Note that we do not call
+ * sensible-pagers here, since that is just a
+ * shell script that implements a logic that
+ * is similar to this one anyway, but is
+ * Debian-specific. */
+ FOREACH_STRING(exe, "pager", "less", "more") {
+ r = loop_write(exe_name_pipe[1], exe, strlen(exe) + 1, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write pager name to socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+ execlp(exe, exe, NULL);
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to execute '%s', using next fallback pager: %m", exe);
+ }
+
+ r = loop_write(exe_name_pipe[1], "(built-in)", strlen("(built-in") + 1, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write pager name to socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+ pager_fallback();
+ /* not reached */
+ }
+
+ /* Return in the parent */
+ stored_stdout = fcntl(STDOUT_FILENO, F_DUPFD_CLOEXEC, 3);
+ if (dup2(fd[1], STDOUT_FILENO) < 0) {
+ stored_stdout = safe_close(stored_stdout);
+ return log_error_errno(errno, "Failed to duplicate pager pipe: %m");
+ }
+ stdout_redirected = true;
+
+ stored_stderr = fcntl(STDERR_FILENO, F_DUPFD_CLOEXEC, 3);
+ if (dup2(fd[1], STDERR_FILENO) < 0) {
+ stored_stderr = safe_close(stored_stderr);
+ return log_error_errno(errno, "Failed to duplicate pager pipe: %m");
+ }
+ stderr_redirected = true;
+
+ exe_name_pipe[1] = safe_close(exe_name_pipe[1]);
+
+ r = no_quit_on_interrupt(TAKE_FD(exe_name_pipe[0]), less_opts);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ (void) ignore_signals(SIGINT, -1);
+
+ return 1;
+}
+
+void pager_close(void) {
+
+ if (pager_pid <= 0)
+ return;
+
+ /* Inform pager that we are done */
+ (void) fflush(stdout);
+ if (stdout_redirected)
+ if (stored_stdout < 0 || dup2(stored_stdout, STDOUT_FILENO) < 0)
+ (void) close(STDOUT_FILENO);
+ stored_stdout = safe_close(stored_stdout);
+ (void) fflush(stderr);
+ if (stderr_redirected)
+ if (stored_stderr < 0 || dup2(stored_stderr, STDERR_FILENO) < 0)
+ (void) close(STDERR_FILENO);
+ stored_stderr = safe_close(stored_stderr);
+ stdout_redirected = stderr_redirected = false;
+
+ (void) kill(pager_pid, SIGCONT);
+ (void) wait_for_terminate(pager_pid, NULL);
+ pager_pid = 0;
+}
+
+bool pager_have(void) {
+ return pager_pid > 0;
+}
+
+int show_man_page(const char *desc, bool null_stdio) {
+ const char *args[4] = { "man", NULL, NULL, NULL };
+ char *e = NULL;
+ pid_t pid;
+ size_t k;
+ int r;
+
+ k = strlen(desc);
+
+ if (desc[k-1] == ')')
+ e = strrchr(desc, '(');
+
+ if (e) {
+ char *page = NULL, *section = NULL;
+
+ page = strndupa(desc, e - desc);
+ section = strndupa(e + 1, desc + k - e - 2);
+
+ args[1] = section;
+ args[2] = page;
+ } else
+ args[1] = desc;
+
+ r = safe_fork("(man)", FORK_RESET_SIGNALS|FORK_DEATHSIG|(null_stdio ? FORK_NULL_STDIO : 0)|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execvp(args[0], (char**) args);
+ log_error_errno(errno, "Failed to execute man: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ return wait_for_terminate_and_check(NULL, pid, 0);
+}
diff --git a/src/shared/pager.h b/src/shared/pager.h
new file mode 100644
index 0000000..8299e23
--- /dev/null
+++ b/src/shared/pager.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+typedef enum PagerFlags {
+ PAGER_DISABLE = 1 << 0,
+ PAGER_JUMP_TO_END = 1 << 1,
+} PagerFlags;
+
+int pager_open(PagerFlags flags);
+void pager_close(void);
+bool pager_have(void) _pure_;
+
+int show_man_page(const char *page, bool null_stdio);
diff --git a/src/shared/path-lookup.c b/src/shared/path-lookup.c
new file mode 100644
index 0000000..442fde7
--- /dev/null
+++ b/src/shared/path-lookup.c
@@ -0,0 +1,903 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "fs-util.h"
+#include "install.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int xdg_user_runtime_dir(char **ret, const char *suffix) {
+ const char *e;
+ char *j;
+
+ assert(ret);
+ assert(suffix);
+
+ e = getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return -ENXIO;
+
+ j = strappend(e, suffix);
+ if (!j)
+ return -ENOMEM;
+
+ *ret = j;
+ return 0;
+}
+
+int xdg_user_config_dir(char **ret, const char *suffix) {
+ const char *e;
+ char *j;
+ int r;
+
+ assert(ret);
+
+ e = getenv("XDG_CONFIG_HOME");
+ if (e)
+ j = strappend(e, suffix);
+ else {
+ _cleanup_free_ char *home = NULL;
+
+ r = get_home_dir(&home);
+ if (r < 0)
+ return r;
+
+ j = strjoin(home, "/.config", suffix);
+ }
+
+ if (!j)
+ return -ENOMEM;
+
+ *ret = j;
+ return 0;
+}
+
+int xdg_user_data_dir(char **ret, const char *suffix) {
+ const char *e;
+ char *j;
+ int r;
+
+ assert(ret);
+ assert(suffix);
+
+ /* We don't treat /etc/xdg/systemd here as the spec
+ * suggests because we assume that is a link to
+ * /etc/systemd/ anyway. */
+
+ e = getenv("XDG_DATA_HOME");
+ if (e)
+ j = strappend(e, suffix);
+ else {
+ _cleanup_free_ char *home = NULL;
+
+ r = get_home_dir(&home);
+ if (r < 0)
+ return r;
+
+ j = strjoin(home, "/.local/share", suffix);
+ }
+ if (!j)
+ return -ENOMEM;
+
+ *ret = j;
+ return 1;
+}
+
+static const char* const user_data_unit_paths[] = {
+ "/usr/local/lib/systemd/user",
+ "/usr/local/share/systemd/user",
+ USER_DATA_UNIT_PATH,
+ "/usr/lib/systemd/user",
+ "/usr/share/systemd/user",
+ NULL
+};
+
+static const char* const user_config_unit_paths[] = {
+ USER_CONFIG_UNIT_PATH,
+ "/etc/systemd/user",
+ NULL
+};
+
+int xdg_user_dirs(char ***ret_config_dirs, char ***ret_data_dirs) {
+ /* Implement the mechanisms defined in
+ *
+ * http://standards.freedesktop.org/basedir-spec/basedir-spec-0.6.html
+ *
+ * We look in both the config and the data dirs because we
+ * want to encourage that distributors ship their unit files
+ * as data, and allow overriding as configuration.
+ */
+ const char *e;
+ _cleanup_strv_free_ char **config_dirs = NULL, **data_dirs = NULL;
+
+ e = getenv("XDG_CONFIG_DIRS");
+ if (e) {
+ config_dirs = strv_split(e, ":");
+ if (!config_dirs)
+ return -ENOMEM;
+ }
+
+ e = getenv("XDG_DATA_DIRS");
+ if (e)
+ data_dirs = strv_split(e, ":");
+ else
+ data_dirs = strv_new("/usr/local/share",
+ "/usr/share");
+ if (!data_dirs)
+ return -ENOMEM;
+
+ *ret_config_dirs = TAKE_PTR(config_dirs);
+ *ret_data_dirs = TAKE_PTR(data_dirs);
+
+ return 0;
+}
+
+static char** user_dirs(
+ const char *persistent_config,
+ const char *runtime_config,
+ const char *global_persistent_config,
+ const char *global_runtime_config,
+ const char *generator,
+ const char *generator_early,
+ const char *generator_late,
+ const char *transient,
+ const char *persistent_control,
+ const char *runtime_control) {
+
+ _cleanup_strv_free_ char **config_dirs = NULL, **data_dirs = NULL;
+ _cleanup_free_ char *data_home = NULL;
+ _cleanup_strv_free_ char **res = NULL;
+ int r;
+
+ r = xdg_user_dirs(&config_dirs, &data_dirs);
+ if (r < 0)
+ return NULL;
+
+ r = xdg_user_data_dir(&data_home, "/systemd/user");
+ if (r < 0 && r != -ENXIO)
+ return NULL;
+
+ /* Now merge everything we found. */
+ if (strv_extend(&res, persistent_control) < 0)
+ return NULL;
+
+ if (strv_extend(&res, runtime_control) < 0)
+ return NULL;
+
+ if (strv_extend(&res, transient) < 0)
+ return NULL;
+
+ if (strv_extend(&res, generator_early) < 0)
+ return NULL;
+
+ if (strv_extend_strv_concat(&res, config_dirs, "/systemd/user") < 0)
+ return NULL;
+
+ if (strv_extend(&res, persistent_config) < 0)
+ return NULL;
+
+ /* global config has lower priority than the user config of the same type */
+ if (strv_extend(&res, global_persistent_config) < 0)
+ return NULL;
+
+ if (strv_extend_strv(&res, (char**) user_config_unit_paths, false) < 0)
+ return NULL;
+
+ if (strv_extend(&res, runtime_config) < 0)
+ return NULL;
+
+ if (strv_extend(&res, global_runtime_config) < 0)
+ return NULL;
+
+ if (strv_extend(&res, generator) < 0)
+ return NULL;
+
+ if (strv_extend(&res, data_home) < 0)
+ return NULL;
+
+ if (strv_extend_strv_concat(&res, data_dirs, "/systemd/user") < 0)
+ return NULL;
+
+ if (strv_extend_strv(&res, (char**) user_data_unit_paths, false) < 0)
+ return NULL;
+
+ if (strv_extend(&res, generator_late) < 0)
+ return NULL;
+
+ if (path_strv_make_absolute_cwd(res) < 0)
+ return NULL;
+
+ return TAKE_PTR(res);
+}
+
+bool path_is_user_data_dir(const char *path) {
+ assert(path);
+
+ return strv_contains((char**) user_data_unit_paths, path);
+}
+
+bool path_is_user_config_dir(const char *path) {
+ assert(path);
+
+ return strv_contains((char**) user_config_unit_paths, path);
+}
+
+static int acquire_generator_dirs(
+ UnitFileScope scope,
+ const char *tempdir,
+ char **generator,
+ char **generator_early,
+ char **generator_late) {
+
+ _cleanup_free_ char *x = NULL, *y = NULL, *z = NULL;
+ const char *prefix;
+
+ assert(generator);
+ assert(generator_early);
+ assert(generator_late);
+ assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER, UNIT_FILE_GLOBAL));
+
+ if (scope == UNIT_FILE_GLOBAL)
+ return -EOPNOTSUPP;
+
+ if (tempdir)
+ prefix = tempdir;
+ else if (scope == UNIT_FILE_SYSTEM)
+ prefix = "/run/systemd";
+ else {
+ /* UNIT_FILE_USER */
+ const char *e;
+
+ e = getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return -ENXIO;
+
+ prefix = strjoina(e, "/systemd");
+ }
+
+ x = strappend(prefix, "/generator");
+ if (!x)
+ return -ENOMEM;
+
+ y = strappend(prefix, "/generator.early");
+ if (!y)
+ return -ENOMEM;
+
+ z = strappend(prefix, "/generator.late");
+ if (!z)
+ return -ENOMEM;
+
+ *generator = TAKE_PTR(x);
+ *generator_early = TAKE_PTR(y);
+ *generator_late = TAKE_PTR(z);
+
+ return 0;
+}
+
+static int acquire_transient_dir(
+ UnitFileScope scope,
+ const char *tempdir,
+ char **ret) {
+
+ char *transient;
+
+ assert(ret);
+ assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER, UNIT_FILE_GLOBAL));
+
+ if (scope == UNIT_FILE_GLOBAL)
+ return -EOPNOTSUPP;
+
+ if (tempdir)
+ transient = strjoin(tempdir, "/transient");
+ else if (scope == UNIT_FILE_SYSTEM)
+ transient = strdup("/run/systemd/transient");
+ else
+ return xdg_user_runtime_dir(ret, "/systemd/transient");
+
+ if (!transient)
+ return -ENOMEM;
+ *ret = transient;
+ return 0;
+}
+
+static int acquire_config_dirs(UnitFileScope scope, char **persistent, char **runtime) {
+ _cleanup_free_ char *a = NULL, *b = NULL;
+ int r;
+
+ assert(persistent);
+ assert(runtime);
+
+ switch (scope) {
+
+ case UNIT_FILE_SYSTEM:
+ a = strdup(SYSTEM_CONFIG_UNIT_PATH);
+ b = strdup("/run/systemd/system");
+ break;
+
+ case UNIT_FILE_GLOBAL:
+ a = strdup(USER_CONFIG_UNIT_PATH);
+ b = strdup("/run/systemd/user");
+ break;
+
+ case UNIT_FILE_USER:
+ r = xdg_user_config_dir(&a, "/systemd/user");
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ r = xdg_user_runtime_dir(runtime, "/systemd/user");
+ if (r < 0) {
+ if (r != -ENXIO)
+ return r;
+
+ /* If XDG_RUNTIME_DIR is not set, don't consider that fatal, simply initialize the runtime
+ * directory to NULL */
+ *runtime = NULL;
+ }
+
+ *persistent = TAKE_PTR(a);
+
+ return 0;
+
+ default:
+ assert_not_reached("Hmm, unexpected scope value.");
+ }
+
+ if (!a || !b)
+ return -ENOMEM;
+
+ *persistent = TAKE_PTR(a);
+ *runtime = TAKE_PTR(b);
+
+ return 0;
+}
+
+static int acquire_control_dirs(UnitFileScope scope, char **persistent, char **runtime) {
+ _cleanup_free_ char *a = NULL;
+ int r;
+
+ assert(persistent);
+ assert(runtime);
+
+ switch (scope) {
+
+ case UNIT_FILE_SYSTEM: {
+ _cleanup_free_ char *b = NULL;
+
+ a = strdup("/etc/systemd/system.control");
+ if (!a)
+ return -ENOMEM;
+
+ b = strdup("/run/systemd/system.control");
+ if (!b)
+ return -ENOMEM;
+
+ *runtime = TAKE_PTR(b);
+
+ break;
+ }
+
+ case UNIT_FILE_USER:
+ r = xdg_user_config_dir(&a, "/systemd/user.control");
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ r = xdg_user_runtime_dir(runtime, "/systemd/user.control");
+ if (r < 0) {
+ if (r != -ENXIO)
+ return r;
+
+ /* If XDG_RUNTIME_DIR is not set, don't consider this fatal, simply initialize the directory to
+ * NULL */
+ *runtime = NULL;
+ }
+
+ break;
+
+ case UNIT_FILE_GLOBAL:
+ return -EOPNOTSUPP;
+
+ default:
+ assert_not_reached("Hmm, unexpected scope value.");
+ }
+
+ *persistent = TAKE_PTR(a);
+
+ return 0;
+}
+
+static int acquire_attached_dirs(
+ UnitFileScope scope,
+ char **ret_persistent,
+ char **ret_runtime) {
+
+ _cleanup_free_ char *a = NULL, *b = NULL;
+
+ assert(ret_persistent);
+ assert(ret_runtime);
+
+ /* Portable services are not available to regular users for now. */
+ if (scope != UNIT_FILE_SYSTEM)
+ return -EOPNOTSUPP;
+
+ a = strdup("/etc/systemd/system.attached");
+ if (!a)
+ return -ENOMEM;
+
+ b = strdup("/run/systemd/system.attached");
+ if (!b)
+ return -ENOMEM;
+
+ *ret_persistent = TAKE_PTR(a);
+ *ret_runtime = TAKE_PTR(b);
+
+ return 0;
+}
+
+static int patch_root_prefix(char **p, const char *root_dir) {
+ char *c;
+
+ assert(p);
+
+ if (!*p)
+ return 0;
+
+ c = prefix_root(root_dir, *p);
+ if (!c)
+ return -ENOMEM;
+
+ free(*p);
+ *p = c;
+
+ return 0;
+}
+
+static int patch_root_prefix_strv(char **l, const char *root_dir) {
+ char **i;
+ int r;
+
+ if (!root_dir)
+ return 0;
+
+ STRV_FOREACH(i, l) {
+ r = patch_root_prefix(i, root_dir);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int lookup_paths_init(
+ LookupPaths *p,
+ UnitFileScope scope,
+ LookupPathsFlags flags,
+ const char *root_dir) {
+
+ _cleanup_(rmdir_and_freep) char *tempdir = NULL;
+ _cleanup_free_ char
+ *root = NULL,
+ *persistent_config = NULL, *runtime_config = NULL,
+ *global_persistent_config = NULL, *global_runtime_config = NULL,
+ *generator = NULL, *generator_early = NULL, *generator_late = NULL,
+ *transient = NULL,
+ *persistent_control = NULL, *runtime_control = NULL,
+ *persistent_attached = NULL, *runtime_attached = NULL;
+ bool append = false; /* Add items from SYSTEMD_UNIT_PATH before normal directories */
+ _cleanup_strv_free_ char **paths = NULL;
+ const char *e;
+ int r;
+
+ assert(p);
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+#if HAVE_SPLIT_USR
+ flags |= LOOKUP_PATHS_SPLIT_USR;
+#endif
+
+ if (!empty_or_root(root_dir)) {
+ if (scope == UNIT_FILE_USER)
+ return -EINVAL;
+
+ r = is_dir(root_dir, true);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOTDIR;
+
+ root = strdup(root_dir);
+ if (!root)
+ return -ENOMEM;
+ }
+
+ if (flags & LOOKUP_PATHS_TEMPORARY_GENERATED) {
+ r = mkdtemp_malloc("/tmp/systemd-temporary-XXXXXX", &tempdir);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create temporary directory: %m");
+ }
+
+ /* Note: when XDG_RUNTIME_DIR is not set this will not return -ENXIO, but simply set runtime_config to NULL */
+ r = acquire_config_dirs(scope, &persistent_config, &runtime_config);
+ if (r < 0)
+ return r;
+
+ if (scope == UNIT_FILE_USER) {
+ r = acquire_config_dirs(UNIT_FILE_GLOBAL, &global_persistent_config, &global_runtime_config);
+ if (r < 0)
+ return r;
+ }
+
+ if ((flags & LOOKUP_PATHS_EXCLUDE_GENERATED) == 0) {
+ /* Note: if XDG_RUNTIME_DIR is not set, this will fail completely with ENXIO */
+ r = acquire_generator_dirs(scope, tempdir,
+ &generator, &generator_early, &generator_late);
+ if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENXIO))
+ return r;
+ }
+
+ /* Note: if XDG_RUNTIME_DIR is not set, this will fail completely with ENXIO */
+ r = acquire_transient_dir(scope, tempdir, &transient);
+ if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENXIO))
+ return r;
+
+ /* Note: when XDG_RUNTIME_DIR is not set this will not return -ENXIO, but simply set runtime_control to NULL */
+ r = acquire_control_dirs(scope, &persistent_control, &runtime_control);
+ if (r < 0 && r != -EOPNOTSUPP)
+ return r;
+
+ r = acquire_attached_dirs(scope, &persistent_attached, &runtime_attached);
+ if (r < 0 && r != -EOPNOTSUPP)
+ return r;
+
+ /* First priority is whatever has been passed to us via env vars */
+ e = getenv("SYSTEMD_UNIT_PATH");
+ if (e) {
+ const char *k;
+
+ k = endswith(e, ":");
+ if (k) {
+ e = strndupa(e, k - e);
+ append = true;
+ }
+
+ /* FIXME: empty components in other places should be rejected. */
+
+ r = path_split_and_make_absolute(e, &paths);
+ if (r < 0)
+ return r;
+ }
+
+ if (!paths || append) {
+ /* Let's figure something out. */
+
+ _cleanup_strv_free_ char **add = NULL;
+
+ /* For the user units we include share/ in the search
+ * path in order to comply with the XDG basedir spec.
+ * For the system stuff we avoid such nonsense. OTOH
+ * we include /lib in the search path for the system
+ * stuff but avoid it for user stuff. */
+
+ switch (scope) {
+
+ case UNIT_FILE_SYSTEM:
+ add = strv_new(
+ /* If you modify this you also want to modify
+ * systemdsystemunitpath= in systemd.pc.in! */
+ STRV_IFNOTNULL(persistent_control),
+ STRV_IFNOTNULL(runtime_control),
+ STRV_IFNOTNULL(transient),
+ STRV_IFNOTNULL(generator_early),
+ persistent_config,
+ SYSTEM_CONFIG_UNIT_PATH,
+ "/etc/systemd/system",
+ STRV_IFNOTNULL(persistent_attached),
+ runtime_config,
+ "/run/systemd/system",
+ STRV_IFNOTNULL(runtime_attached),
+ STRV_IFNOTNULL(generator),
+ "/usr/local/lib/systemd/system",
+ SYSTEM_DATA_UNIT_PATH,
+ "/usr/lib/systemd/system",
+ STRV_IFNOTNULL(flags & LOOKUP_PATHS_SPLIT_USR ? "/lib/systemd/system" : NULL),
+ STRV_IFNOTNULL(generator_late));
+ break;
+
+ case UNIT_FILE_GLOBAL:
+ add = strv_new(
+ /* If you modify this you also want to modify
+ * systemduserunitpath= in systemd.pc.in, and
+ * the arrays in user_dirs() above! */
+ STRV_IFNOTNULL(persistent_control),
+ STRV_IFNOTNULL(runtime_control),
+ STRV_IFNOTNULL(transient),
+ STRV_IFNOTNULL(generator_early),
+ persistent_config,
+ USER_CONFIG_UNIT_PATH,
+ "/etc/systemd/user",
+ runtime_config,
+ "/run/systemd/user",
+ STRV_IFNOTNULL(generator),
+ "/usr/local/share/systemd/user",
+ "/usr/share/systemd/user",
+ "/usr/local/lib/systemd/user",
+ USER_DATA_UNIT_PATH,
+ "/usr/lib/systemd/user",
+ STRV_IFNOTNULL(generator_late));
+ break;
+
+ case UNIT_FILE_USER:
+ add = user_dirs(persistent_config, runtime_config,
+ global_persistent_config, global_runtime_config,
+ generator, generator_early, generator_late,
+ transient,
+ persistent_control, runtime_control);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unexpected scope?");
+ }
+
+ if (!add)
+ return -ENOMEM;
+
+ if (paths) {
+ r = strv_extend_strv(&paths, add, true);
+ if (r < 0)
+ return r;
+ } else
+ /* Small optimization: if paths is NULL (and it usually is), we can simply assign 'add' to it,
+ * and don't have to copy anything */
+ paths = TAKE_PTR(add);
+ }
+
+ r = patch_root_prefix(&persistent_config, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&runtime_config, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix(&generator, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&generator_early, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&generator_late, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix(&transient, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix(&persistent_control, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&runtime_control, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix(&persistent_attached, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&runtime_attached, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix_strv(paths, root);
+ if (r < 0)
+ return -ENOMEM;
+
+ *p = (LookupPaths) {
+ .search_path = strv_uniq(paths),
+
+ .persistent_config = TAKE_PTR(persistent_config),
+ .runtime_config = TAKE_PTR(runtime_config),
+
+ .generator = TAKE_PTR(generator),
+ .generator_early = TAKE_PTR(generator_early),
+ .generator_late = TAKE_PTR(generator_late),
+
+ .transient = TAKE_PTR(transient),
+
+ .persistent_control = TAKE_PTR(persistent_control),
+ .runtime_control = TAKE_PTR(runtime_control),
+
+ .persistent_attached = TAKE_PTR(persistent_attached),
+ .runtime_attached = TAKE_PTR(runtime_attached),
+
+ .root_dir = TAKE_PTR(root),
+ .temporary_dir = TAKE_PTR(tempdir),
+ };
+
+ paths = NULL;
+ return 0;
+}
+
+void lookup_paths_free(LookupPaths *p) {
+ if (!p)
+ return;
+
+ p->search_path = strv_free(p->search_path);
+
+ p->persistent_config = mfree(p->persistent_config);
+ p->runtime_config = mfree(p->runtime_config);
+
+ p->persistent_attached = mfree(p->persistent_attached);
+ p->runtime_attached = mfree(p->runtime_attached);
+
+ p->generator = mfree(p->generator);
+ p->generator_early = mfree(p->generator_early);
+ p->generator_late = mfree(p->generator_late);
+
+ p->transient = mfree(p->transient);
+
+ p->persistent_control = mfree(p->persistent_control);
+ p->runtime_control = mfree(p->runtime_control);
+
+ p->root_dir = mfree(p->root_dir);
+ p->temporary_dir = mfree(p->temporary_dir);
+}
+
+int lookup_paths_reduce(LookupPaths *p) {
+ _cleanup_free_ struct stat *stats = NULL;
+ size_t n_stats = 0, allocated = 0;
+ size_t c = 0;
+ int r;
+
+ assert(p);
+
+ /* Drop duplicates and non-existing directories from the search path. We figure out whether two directories are
+ * the same by comparing their device and inode numbers. */
+
+ if (!p->search_path)
+ return 0;
+
+ while (p->search_path[c]) {
+ struct stat st;
+ size_t k;
+
+ /* Never strip the transient and control directories from the path */
+ if (path_equal_ptr(p->search_path[c], p->transient) ||
+ path_equal_ptr(p->search_path[c], p->persistent_control) ||
+ path_equal_ptr(p->search_path[c], p->runtime_control)) {
+ c++;
+ continue;
+ }
+
+ r = chase_symlinks_and_stat(p->search_path[c], p->root_dir, 0, NULL, &st);
+ if (r == -ENOENT)
+ goto remove_item;
+ if (r < 0) {
+ /* If something we don't grok happened, let's better leave it in. */
+ log_debug_errno(r, "Failed to chase and stat %s: %m", p->search_path[c]);
+ c++;
+ continue;
+ }
+
+ for (k = 0; k < n_stats; k++)
+ if (stats[k].st_dev == st.st_dev &&
+ stats[k].st_ino == st.st_ino)
+ break;
+
+ if (k < n_stats) /* Is there already an entry with the same device/inode? */
+ goto remove_item;
+
+ if (!GREEDY_REALLOC(stats, allocated, n_stats+1))
+ return -ENOMEM;
+
+ stats[n_stats++] = st;
+ c++;
+ continue;
+
+ remove_item:
+ free(p->search_path[c]);
+ memmove(p->search_path + c,
+ p->search_path + c + 1,
+ (strv_length(p->search_path + c + 1) + 1) * sizeof(char*));
+ }
+
+ if (strv_isempty(p->search_path)) {
+ log_debug("Ignoring unit files.");
+ p->search_path = strv_free(p->search_path);
+ } else {
+ _cleanup_free_ char *t;
+
+ t = strv_join(p->search_path, "\n\t");
+ if (!t)
+ return -ENOMEM;
+
+ log_debug("Looking for unit files in (higher priority first):\n\t%s", t);
+ }
+
+ return 0;
+}
+
+int lookup_paths_mkdir_generator(LookupPaths *p) {
+ int r, q;
+
+ assert(p);
+
+ if (!p->generator || !p->generator_early || !p->generator_late)
+ return -EINVAL;
+
+ r = mkdir_p_label(p->generator, 0755);
+
+ q = mkdir_p_label(p->generator_early, 0755);
+ if (q < 0 && r >= 0)
+ r = q;
+
+ q = mkdir_p_label(p->generator_late, 0755);
+ if (q < 0 && r >= 0)
+ r = q;
+
+ return r;
+}
+
+void lookup_paths_trim_generator(LookupPaths *p) {
+ assert(p);
+
+ /* Trim empty dirs */
+
+ if (p->generator)
+ (void) rmdir(p->generator);
+ if (p->generator_early)
+ (void) rmdir(p->generator_early);
+ if (p->generator_late)
+ (void) rmdir(p->generator_late);
+}
+
+void lookup_paths_flush_generator(LookupPaths *p) {
+ assert(p);
+
+ /* Flush the generated unit files in full */
+
+ if (p->generator)
+ (void) rm_rf(p->generator, REMOVE_ROOT|REMOVE_PHYSICAL);
+ if (p->generator_early)
+ (void) rm_rf(p->generator_early, REMOVE_ROOT|REMOVE_PHYSICAL);
+ if (p->generator_late)
+ (void) rm_rf(p->generator_late, REMOVE_ROOT|REMOVE_PHYSICAL);
+
+ if (p->temporary_dir)
+ (void) rm_rf(p->temporary_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+char **generator_binary_paths(UnitFileScope scope) {
+
+ switch (scope) {
+
+ case UNIT_FILE_SYSTEM:
+ return strv_new("/run/systemd/system-generators",
+ "/etc/systemd/system-generators",
+ "/usr/local/lib/systemd/system-generators",
+ SYSTEM_GENERATOR_PATH);
+
+ case UNIT_FILE_GLOBAL:
+ case UNIT_FILE_USER:
+ return strv_new("/run/systemd/user-generators",
+ "/etc/systemd/user-generators",
+ "/usr/local/lib/systemd/user-generators",
+ USER_GENERATOR_PATH);
+
+ default:
+ assert_not_reached("Hmm, unexpected scope.");
+ }
+}
diff --git a/src/shared/path-lookup.h b/src/shared/path-lookup.h
new file mode 100644
index 0000000..cb7d4d5
--- /dev/null
+++ b/src/shared/path-lookup.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+typedef struct LookupPaths LookupPaths;
+
+#include "install.h"
+#include "macro.h"
+
+typedef enum LookupPathsFlags {
+ LOOKUP_PATHS_EXCLUDE_GENERATED = 1 << 0,
+ LOOKUP_PATHS_TEMPORARY_GENERATED = 1 << 1,
+ LOOKUP_PATHS_SPLIT_USR = 1 << 2,
+} LookupPathsFlags;
+
+struct LookupPaths {
+ /* Where we look for unit files. This includes the individual special paths below, but also any vendor
+ * supplied, static unit file paths. */
+ char **search_path;
+
+ /* Where we shall create or remove our installation symlinks, aka "configuration", and where the user/admin
+ * shall place his own unit files. */
+ char *persistent_config;
+ char *runtime_config;
+
+ /* Where units from a portable service image shall be placed. */
+ char *persistent_attached;
+ char *runtime_attached;
+
+ /* Where to place generated unit files (i.e. those a "generator" tool generated). Note the special semantics of
+ * this directory: the generators are flushed each time a "systemctl daemon-reload" is issued. The user should
+ * not alter these directories directly. */
+ char *generator;
+ char *generator_early;
+ char *generator_late;
+
+ /* Where to place transient unit files (i.e. those created dynamically via the bus API). Note the special
+ * semantics of this directory: all units created transiently have their unit files removed as the transient
+ * unit is unloaded. The user should not alter this directory directly. */
+ char *transient;
+
+ /* Where the snippets created by "systemctl set-property" are placed. Note that for transient units, the
+ * snippets are placed in the transient directory though (see above). The user should not alter this directory
+ * directly. */
+ char *persistent_control;
+ char *runtime_control;
+
+ /* The root directory prepended to all items above, or NULL */
+ char *root_dir;
+
+ /* A temporary directory when running in test mode, to be nuked */
+ char *temporary_dir;
+};
+
+int lookup_paths_init(LookupPaths *p, UnitFileScope scope, LookupPathsFlags flags, const char *root_dir);
+
+int xdg_user_dirs(char ***ret_config_dirs, char ***ret_data_dirs);
+int xdg_user_runtime_dir(char **ret, const char *suffix);
+int xdg_user_config_dir(char **ret, const char *suffix);
+int xdg_user_data_dir(char **ret, const char *suffix);
+
+bool path_is_user_data_dir(const char *path);
+bool path_is_user_config_dir(const char *path);
+
+int lookup_paths_reduce(LookupPaths *p);
+
+int lookup_paths_mkdir_generator(LookupPaths *p);
+void lookup_paths_trim_generator(LookupPaths *p);
+void lookup_paths_flush_generator(LookupPaths *p);
+
+void lookup_paths_free(LookupPaths *p);
+
+char **generator_binary_paths(UnitFileScope scope);
diff --git a/src/shared/pretty-print.c b/src/shared/pretty-print.c
new file mode 100644
index 0000000..de6274a
--- /dev/null
+++ b/src/shared/pretty-print.c
@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/utsname.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "def.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static bool urlify_enabled(void) {
+ static int cached_urlify_enabled = -1;
+
+ /* Unfortunately 'less' doesn't support links like this yet 😭, hence let's disable this as long as there's a
+ * pager in effect. Let's drop this check as soon as less got fixed a and enough time passed so that it's safe
+ * to assume that a link-enabled 'less' version has hit most installations. */
+
+ if (cached_urlify_enabled < 0) {
+ int val;
+
+ val = getenv_bool("SYSTEMD_URLIFY");
+ if (val >= 0)
+ cached_urlify_enabled = val;
+ else
+ cached_urlify_enabled = colors_enabled() && !pager_have();
+ }
+
+ return cached_urlify_enabled;
+}
+
+int terminal_urlify(const char *url, const char *text, char **ret) {
+ char *n;
+
+ assert(url);
+
+ /* Takes an URL and a pretty string and formats it as clickable link for the terminal. See
+ * https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda for details. */
+
+ if (isempty(text))
+ text = url;
+
+ if (urlify_enabled())
+ n = strjoin("\x1B]8;;", url, "\a", text, "\x1B]8;;\a");
+ else
+ n = strdup(text);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int file_url_from_path(const char *path, char **ret) {
+ _cleanup_free_ char *absolute = NULL;
+ struct utsname u;
+ char *url = NULL;
+ int r;
+
+ if (uname(&u) < 0)
+ return -errno;
+
+ if (!path_is_absolute(path)) {
+ r = path_make_absolute_cwd(path, &absolute);
+ if (r < 0)
+ return r;
+
+ path = absolute;
+ }
+
+ /* As suggested by https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda, let's include the local
+ * hostname here. Note that we don't use gethostname_malloc() or gethostname_strict() since we are interested
+ * in the raw string the kernel has set, whatever it may be, under the assumption that terminals are not overly
+ * careful with validating the strings either. */
+
+ url = strjoin("file://", u.nodename, path);
+ if (!url)
+ return -ENOMEM;
+
+ *ret = url;
+ return 0;
+}
+
+int terminal_urlify_path(const char *path, const char *text, char **ret) {
+ _cleanup_free_ char *url = NULL;
+ int r;
+
+ assert(path);
+
+ /* Much like terminal_urlify() above, but takes a file system path as input
+ * and turns it into a proper file:// URL first. */
+
+ if (isempty(path))
+ return -EINVAL;
+
+ if (isempty(text))
+ text = path;
+
+ if (!urlify_enabled()) {
+ char *n;
+
+ n = strdup(text);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+ }
+
+ r = file_url_from_path(path, &url);
+ if (r < 0)
+ return r;
+
+ return terminal_urlify(url, text, ret);
+}
+
+int terminal_urlify_man(const char *page, const char *section, char **ret) {
+ const char *url, *text;
+
+ url = strjoina("man:", page, "(", section, ")");
+ text = strjoina(page, "(", section, ") man page");
+
+ return terminal_urlify(url, text, ret);
+}
+
+static int cat_file(const char *filename, bool newline) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *urlified = NULL;
+ int r;
+
+ f = fopen(filename, "re");
+ if (!f)
+ return -errno;
+
+ r = terminal_urlify_path(filename, NULL, &urlified);
+ if (r < 0)
+ return r;
+
+ printf("%s%s# %s%s\n",
+ newline ? "\n" : "",
+ ansi_highlight_blue(),
+ urlified,
+ ansi_normal());
+ fflush(stdout);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read \"%s\": %m", filename);
+ if (r == 0)
+ break;
+
+ puts(line);
+ }
+
+ return 0;
+}
+
+int cat_files(const char *file, char **dropins, CatFlags flags) {
+ char **path;
+ int r;
+
+ if (file) {
+ r = cat_file(file, false);
+ if (r == -ENOENT && (flags & CAT_FLAGS_MAIN_FILE_OPTIONAL))
+ printf("%s# config file %s not found%s\n",
+ ansi_highlight_magenta(),
+ file,
+ ansi_normal());
+ else if (r < 0)
+ return log_warning_errno(r, "Failed to cat %s: %m", file);
+ }
+
+ STRV_FOREACH(path, dropins) {
+ r = cat_file(*path, file || path != dropins);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to cat %s: %m", *path);
+ }
+
+ return 0;
+}
+
+void print_separator(void) {
+
+ /* Outputs a separator line that resolves to whitespace when copied from the terminal. We do that by outputting
+ * one line filled with spaces with ANSI underline set, followed by a second (empty) line. */
+
+ if (underline_enabled()) {
+ size_t i, c;
+
+ c = columns();
+
+ flockfile(stdout);
+ fputs_unlocked(ANSI_UNDERLINE, stdout);
+
+ for (i = 0; i < c; i++)
+ fputc_unlocked(' ', stdout);
+
+ fputs_unlocked(ANSI_NORMAL "\n\n", stdout);
+ funlockfile(stdout);
+ } else
+ fputs("\n\n", stdout);
+}
+
+int conf_files_cat(const char *root, const char *name) {
+ _cleanup_strv_free_ char **dirs = NULL, **files = NULL;
+ _cleanup_free_ char *path = NULL;
+ const char *dir;
+ char **t;
+ int r;
+
+ NULSTR_FOREACH(dir, CONF_PATHS_NULSTR("")) {
+ assert(endswith(dir, "/"));
+ r = strv_extendf(&dirs, "%s%s.d", dir, name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build directory list: %m");
+ }
+
+ r = conf_files_list_strv(&files, ".conf", root, 0, (const char* const*) dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query file list: %m");
+
+ path = path_join(root, "/etc", name);
+ if (!path)
+ return log_oom();
+
+ if (DEBUG_LOGGING) {
+ log_debug("Looking for configuration in:");
+ log_debug(" %s", path);
+ STRV_FOREACH(t, dirs)
+ log_debug(" %s/*.conf", *t);
+ }
+
+ /* show */
+ return cat_files(path, files, CAT_FLAGS_MAIN_FILE_OPTIONAL);
+}
diff --git a/src/shared/pretty-print.h b/src/shared/pretty-print.h
new file mode 100644
index 0000000..12ab9ac
--- /dev/null
+++ b/src/shared/pretty-print.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+void print_separator(void);
+
+int file_url_from_path(const char *path, char **ret);
+
+int terminal_urlify(const char *url, const char *text, char **ret);
+int terminal_urlify_path(const char *path, const char *text, char **ret);
+int terminal_urlify_man(const char *page, const char *section, char **ret);
+
+typedef enum CatFlags {
+ CAT_FLAGS_MAIN_FILE_OPTIONAL = 1 << 0,
+} CatFlags;
+
+int cat_files(const char *file, char **dropins, CatFlags flags);
+int conf_files_cat(const char *root, const char *name);
diff --git a/src/shared/ptyfwd.c b/src/shared/ptyfwd.c
new file mode 100644
index 0000000..fe17b37
--- /dev/null
+++ b/src/shared/ptyfwd.c
@@ -0,0 +1,631 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "ptyfwd.h"
+#include "terminal-util.h"
+#include "time-util.h"
+
+struct PTYForward {
+ sd_event *event;
+
+ int master;
+
+ PTYForwardFlags flags;
+
+ sd_event_source *stdin_event_source;
+ sd_event_source *stdout_event_source;
+ sd_event_source *master_event_source;
+
+ sd_event_source *sigwinch_event_source;
+
+ struct termios saved_stdin_attr;
+ struct termios saved_stdout_attr;
+
+ bool saved_stdin:1;
+ bool saved_stdout:1;
+
+ bool stdin_readable:1;
+ bool stdin_hangup:1;
+ bool stdout_writable:1;
+ bool stdout_hangup:1;
+ bool master_readable:1;
+ bool master_writable:1;
+ bool master_hangup:1;
+
+ bool read_from_master:1;
+
+ bool done:1;
+ bool drain:1;
+
+ bool last_char_set:1;
+ char last_char;
+
+ char in_buffer[LINE_MAX], out_buffer[LINE_MAX];
+ size_t in_buffer_full, out_buffer_full;
+
+ usec_t escape_timestamp;
+ unsigned escape_counter;
+
+ PTYForwardHandler handler;
+ void *userdata;
+};
+
+#define ESCAPE_USEC (1*USEC_PER_SEC)
+
+static void pty_forward_disconnect(PTYForward *f) {
+
+ if (f) {
+ f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
+ f->stdout_event_source = sd_event_source_unref(f->stdout_event_source);
+
+ f->master_event_source = sd_event_source_unref(f->master_event_source);
+ f->sigwinch_event_source = sd_event_source_unref(f->sigwinch_event_source);
+ f->event = sd_event_unref(f->event);
+
+ if (f->saved_stdout)
+ tcsetattr(STDOUT_FILENO, TCSANOW, &f->saved_stdout_attr);
+ if (f->saved_stdin)
+ tcsetattr(STDIN_FILENO, TCSANOW, &f->saved_stdin_attr);
+
+ f->saved_stdout = f->saved_stdin = false;
+ }
+
+ /* STDIN/STDOUT should not be nonblocking normally, so let's unconditionally reset it */
+ (void) fd_nonblock(STDIN_FILENO, false);
+ (void) fd_nonblock(STDOUT_FILENO, false);
+}
+
+static int pty_forward_done(PTYForward *f, int rcode) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ assert(f);
+
+ if (f->done)
+ return 0;
+
+ e = sd_event_ref(f->event);
+
+ f->done = true;
+ pty_forward_disconnect(f);
+
+ if (f->handler)
+ return f->handler(f, rcode, f->userdata);
+ else
+ return sd_event_exit(e, rcode < 0 ? EXIT_FAILURE : rcode);
+}
+
+static bool look_for_escape(PTYForward *f, const char *buffer, size_t n) {
+ const char *p;
+
+ assert(f);
+ assert(buffer);
+ assert(n > 0);
+
+ for (p = buffer; p < buffer + n; p++) {
+
+ /* Check for ^] */
+ if (*p == 0x1D) {
+ usec_t nw = now(CLOCK_MONOTONIC);
+
+ if (f->escape_counter == 0 || nw > f->escape_timestamp + ESCAPE_USEC) {
+ f->escape_timestamp = nw;
+ f->escape_counter = 1;
+ } else {
+ (f->escape_counter)++;
+
+ if (f->escape_counter >= 3)
+ return true;
+ }
+ } else {
+ f->escape_timestamp = 0;
+ f->escape_counter = 0;
+ }
+ }
+
+ return false;
+}
+
+static bool ignore_vhangup(PTYForward *f) {
+ assert(f);
+
+ if (f->flags & PTY_FORWARD_IGNORE_VHANGUP)
+ return true;
+
+ if ((f->flags & PTY_FORWARD_IGNORE_INITIAL_VHANGUP) && !f->read_from_master)
+ return true;
+
+ return false;
+}
+
+static bool drained(PTYForward *f) {
+ int q = 0;
+
+ assert(f);
+
+ if (f->out_buffer_full > 0)
+ return false;
+
+ if (f->master_readable)
+ return false;
+
+ if (ioctl(f->master, TIOCINQ, &q) < 0)
+ log_debug_errno(errno, "TIOCINQ failed on master: %m");
+ else if (q > 0)
+ return false;
+
+ if (ioctl(f->master, TIOCOUTQ, &q) < 0)
+ log_debug_errno(errno, "TIOCOUTQ failed on master: %m");
+ else if (q > 0)
+ return false;
+
+ return true;
+}
+
+static int shovel(PTYForward *f) {
+ ssize_t k;
+
+ assert(f);
+
+ while ((f->stdin_readable && f->in_buffer_full <= 0) ||
+ (f->master_writable && f->in_buffer_full > 0) ||
+ (f->master_readable && f->out_buffer_full <= 0) ||
+ (f->stdout_writable && f->out_buffer_full > 0)) {
+
+ if (f->stdin_readable && f->in_buffer_full < LINE_MAX) {
+
+ k = read(STDIN_FILENO, f->in_buffer + f->in_buffer_full, LINE_MAX - f->in_buffer_full);
+ if (k < 0) {
+
+ if (errno == EAGAIN)
+ f->stdin_readable = false;
+ else if (IN_SET(errno, EIO, EPIPE, ECONNRESET)) {
+ f->stdin_readable = false;
+ f->stdin_hangup = true;
+
+ f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
+ } else {
+ log_error_errno(errno, "read(): %m");
+ return pty_forward_done(f, -errno);
+ }
+ } else if (k == 0) {
+ /* EOF on stdin */
+ f->stdin_readable = false;
+ f->stdin_hangup = true;
+
+ f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
+ } else {
+ /* Check if ^] has been pressed three times within one second. If we get this we quite
+ * immediately. */
+ if (look_for_escape(f, f->in_buffer + f->in_buffer_full, k))
+ return pty_forward_done(f, -ECANCELED);
+
+ f->in_buffer_full += (size_t) k;
+ }
+ }
+
+ if (f->master_writable && f->in_buffer_full > 0) {
+
+ k = write(f->master, f->in_buffer, f->in_buffer_full);
+ if (k < 0) {
+
+ if (IN_SET(errno, EAGAIN, EIO))
+ f->master_writable = false;
+ else if (IN_SET(errno, EPIPE, ECONNRESET)) {
+ f->master_writable = f->master_readable = false;
+ f->master_hangup = true;
+
+ f->master_event_source = sd_event_source_unref(f->master_event_source);
+ } else {
+ log_error_errno(errno, "write(): %m");
+ return pty_forward_done(f, -errno);
+ }
+ } else {
+ assert(f->in_buffer_full >= (size_t) k);
+ memmove(f->in_buffer, f->in_buffer + k, f->in_buffer_full - k);
+ f->in_buffer_full -= k;
+ }
+ }
+
+ if (f->master_readable && f->out_buffer_full < LINE_MAX) {
+
+ k = read(f->master, f->out_buffer + f->out_buffer_full, LINE_MAX - f->out_buffer_full);
+ if (k < 0) {
+
+ /* Note that EIO on the master device
+ * might be caused by vhangup() or
+ * temporary closing of everything on
+ * the other side, we treat it like
+ * EAGAIN here and try again, unless
+ * ignore_vhangup is off. */
+
+ if (errno == EAGAIN || (errno == EIO && ignore_vhangup(f)))
+ f->master_readable = false;
+ else if (IN_SET(errno, EPIPE, ECONNRESET, EIO)) {
+ f->master_readable = f->master_writable = false;
+ f->master_hangup = true;
+
+ f->master_event_source = sd_event_source_unref(f->master_event_source);
+ } else {
+ log_error_errno(errno, "read(): %m");
+ return pty_forward_done(f, -errno);
+ }
+ } else {
+ f->read_from_master = true;
+ f->out_buffer_full += (size_t) k;
+ }
+ }
+
+ if (f->stdout_writable && f->out_buffer_full > 0) {
+
+ k = write(STDOUT_FILENO, f->out_buffer, f->out_buffer_full);
+ if (k < 0) {
+
+ if (errno == EAGAIN)
+ f->stdout_writable = false;
+ else if (IN_SET(errno, EIO, EPIPE, ECONNRESET)) {
+ f->stdout_writable = false;
+ f->stdout_hangup = true;
+ f->stdout_event_source = sd_event_source_unref(f->stdout_event_source);
+ } else {
+ log_error_errno(errno, "write(): %m");
+ return pty_forward_done(f, -errno);
+ }
+
+ } else {
+
+ if (k > 0) {
+ f->last_char = f->out_buffer[k-1];
+ f->last_char_set = true;
+ }
+
+ assert(f->out_buffer_full >= (size_t) k);
+ memmove(f->out_buffer, f->out_buffer + k, f->out_buffer_full - k);
+ f->out_buffer_full -= k;
+ }
+ }
+ }
+
+ if (f->stdin_hangup || f->stdout_hangup || f->master_hangup) {
+ /* Exit the loop if any side hung up and if there's
+ * nothing more to write or nothing we could write. */
+
+ if ((f->out_buffer_full <= 0 || f->stdout_hangup) &&
+ (f->in_buffer_full <= 0 || f->master_hangup))
+ return pty_forward_done(f, 0);
+ }
+
+ /* If we were asked to drain, and there's nothing more to handle from the master, then call the callback
+ * too. */
+ if (f->drain && drained(f))
+ return pty_forward_done(f, 0);
+
+ return 0;
+}
+
+static int on_master_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ PTYForward *f = userdata;
+
+ assert(f);
+ assert(e);
+ assert(e == f->master_event_source);
+ assert(fd >= 0);
+ assert(fd == f->master);
+
+ if (revents & (EPOLLIN|EPOLLHUP))
+ f->master_readable = true;
+
+ if (revents & (EPOLLOUT|EPOLLHUP))
+ f->master_writable = true;
+
+ return shovel(f);
+}
+
+static int on_stdin_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ PTYForward *f = userdata;
+
+ assert(f);
+ assert(e);
+ assert(e == f->stdin_event_source);
+ assert(fd >= 0);
+ assert(fd == STDIN_FILENO);
+
+ if (revents & (EPOLLIN|EPOLLHUP))
+ f->stdin_readable = true;
+
+ return shovel(f);
+}
+
+static int on_stdout_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ PTYForward *f = userdata;
+
+ assert(f);
+ assert(e);
+ assert(e == f->stdout_event_source);
+ assert(fd >= 0);
+ assert(fd == STDOUT_FILENO);
+
+ if (revents & (EPOLLOUT|EPOLLHUP))
+ f->stdout_writable = true;
+
+ return shovel(f);
+}
+
+static int on_sigwinch_event(sd_event_source *e, const struct signalfd_siginfo *si, void *userdata) {
+ PTYForward *f = userdata;
+ struct winsize ws;
+
+ assert(f);
+ assert(e);
+ assert(e == f->sigwinch_event_source);
+
+ /* The window size changed, let's forward that. */
+ if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &ws) >= 0)
+ (void) ioctl(f->master, TIOCSWINSZ, &ws);
+
+ return 0;
+}
+
+int pty_forward_new(
+ sd_event *event,
+ int master,
+ PTYForwardFlags flags,
+ PTYForward **ret) {
+
+ _cleanup_(pty_forward_freep) PTYForward *f = NULL;
+ struct winsize ws;
+ int r;
+
+ f = new(PTYForward, 1);
+ if (!f)
+ return -ENOMEM;
+
+ *f = (struct PTYForward) {
+ .flags = flags,
+ .master = -1,
+ };
+
+ if (event)
+ f->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&f->event);
+ if (r < 0)
+ return r;
+ }
+
+ if (!(flags & PTY_FORWARD_READ_ONLY)) {
+ r = fd_nonblock(STDIN_FILENO, true);
+ if (r < 0)
+ return r;
+
+ r = fd_nonblock(STDOUT_FILENO, true);
+ if (r < 0)
+ return r;
+ }
+
+ r = fd_nonblock(master, true);
+ if (r < 0)
+ return r;
+
+ f->master = master;
+
+ if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &ws) < 0) {
+ /* If we can't get the resolution from the output fd, then use our internal, regular width/height,
+ * i.e. something derived from $COLUMNS and $LINES if set. */
+
+ ws = (struct winsize) {
+ .ws_row = lines(),
+ .ws_col = columns(),
+ };
+ }
+
+ (void) ioctl(master, TIOCSWINSZ, &ws);
+
+ if (!(flags & PTY_FORWARD_READ_ONLY)) {
+ if (tcgetattr(STDIN_FILENO, &f->saved_stdin_attr) >= 0) {
+ struct termios raw_stdin_attr;
+
+ f->saved_stdin = true;
+
+ raw_stdin_attr = f->saved_stdin_attr;
+ cfmakeraw(&raw_stdin_attr);
+ raw_stdin_attr.c_oflag = f->saved_stdin_attr.c_oflag;
+ tcsetattr(STDIN_FILENO, TCSANOW, &raw_stdin_attr);
+ }
+
+ if (tcgetattr(STDOUT_FILENO, &f->saved_stdout_attr) >= 0) {
+ struct termios raw_stdout_attr;
+
+ f->saved_stdout = true;
+
+ raw_stdout_attr = f->saved_stdout_attr;
+ cfmakeraw(&raw_stdout_attr);
+ raw_stdout_attr.c_iflag = f->saved_stdout_attr.c_iflag;
+ raw_stdout_attr.c_lflag = f->saved_stdout_attr.c_lflag;
+ tcsetattr(STDOUT_FILENO, TCSANOW, &raw_stdout_attr);
+ }
+
+ r = sd_event_add_io(f->event, &f->stdin_event_source, STDIN_FILENO, EPOLLIN|EPOLLET, on_stdin_event, f);
+ if (r < 0 && r != -EPERM)
+ return r;
+
+ if (r >= 0)
+ (void) sd_event_source_set_description(f->stdin_event_source, "ptyfwd-stdin");
+ }
+
+ r = sd_event_add_io(f->event, &f->stdout_event_source, STDOUT_FILENO, EPOLLOUT|EPOLLET, on_stdout_event, f);
+ if (r == -EPERM)
+ /* stdout without epoll support. Likely redirected to regular file. */
+ f->stdout_writable = true;
+ else if (r < 0)
+ return r;
+ else
+ (void) sd_event_source_set_description(f->stdout_event_source, "ptyfwd-stdout");
+
+ r = sd_event_add_io(f->event, &f->master_event_source, master, EPOLLIN|EPOLLOUT|EPOLLET, on_master_event, f);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(f->master_event_source, "ptyfwd-master");
+
+ r = sd_event_add_signal(f->event, &f->sigwinch_event_source, SIGWINCH, on_sigwinch_event, f);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(f->sigwinch_event_source, "ptyfwd-sigwinch");
+
+ *ret = TAKE_PTR(f);
+
+ return 0;
+}
+
+PTYForward *pty_forward_free(PTYForward *f) {
+ pty_forward_disconnect(f);
+ return mfree(f);
+}
+
+int pty_forward_get_last_char(PTYForward *f, char *ch) {
+ assert(f);
+ assert(ch);
+
+ if (!f->last_char_set)
+ return -ENXIO;
+
+ *ch = f->last_char;
+ return 0;
+}
+
+int pty_forward_set_ignore_vhangup(PTYForward *f, bool b) {
+ int r;
+
+ assert(f);
+
+ if (!!(f->flags & PTY_FORWARD_IGNORE_VHANGUP) == b)
+ return 0;
+
+ SET_FLAG(f->flags, PTY_FORWARD_IGNORE_VHANGUP, b);
+
+ if (!ignore_vhangup(f)) {
+
+ /* We shall now react to vhangup()s? Let's check
+ * immediately if we might be in one */
+
+ f->master_readable = true;
+ r = shovel(f);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+bool pty_forward_get_ignore_vhangup(PTYForward *f) {
+ assert(f);
+
+ return !!(f->flags & PTY_FORWARD_IGNORE_VHANGUP);
+}
+
+bool pty_forward_is_done(PTYForward *f) {
+ assert(f);
+
+ return f->done;
+}
+
+void pty_forward_set_handler(PTYForward *f, PTYForwardHandler cb, void *userdata) {
+ assert(f);
+
+ f->handler = cb;
+ f->userdata = userdata;
+}
+
+bool pty_forward_drain(PTYForward *f) {
+ assert(f);
+
+ /* Starts draining the forwarder. Specifically:
+ *
+ * - Returns true if there are no unprocessed bytes from the pty, false otherwise
+ *
+ * - Makes sure the handler function is called the next time the number of unprocessed bytes hits zero
+ */
+
+ f->drain = true;
+ return drained(f);
+}
+
+int pty_forward_set_priority(PTYForward *f, int64_t priority) {
+ int r;
+ assert(f);
+
+ r = sd_event_source_set_priority(f->stdin_event_source, priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(f->stdout_event_source, priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(f->master_event_source, priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(f->sigwinch_event_source, priority);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int pty_forward_set_width_height(PTYForward *f, unsigned width, unsigned height) {
+ struct winsize ws;
+
+ assert(f);
+
+ if (width == (unsigned) -1 && height == (unsigned) -1)
+ return 0; /* noop */
+
+ if (width != (unsigned) -1 &&
+ (width == 0 || width > USHRT_MAX))
+ return -ERANGE;
+
+ if (height != (unsigned) -1 &&
+ (height == 0 || height > USHRT_MAX))
+ return -ERANGE;
+
+ if (width == (unsigned) -1 || height == (unsigned) -1) {
+ if (ioctl(f->master, TIOCGWINSZ, &ws) < 0)
+ return -errno;
+
+ if (width != (unsigned) -1)
+ ws.ws_col = width;
+ if (height != (unsigned) -1)
+ ws.ws_row = height;
+ } else
+ ws = (struct winsize) {
+ .ws_row = height,
+ .ws_col = width,
+ };
+
+ if (ioctl(f->master, TIOCSWINSZ, &ws) < 0)
+ return -errno;
+
+ /* Make sure we ignore SIGWINCH window size events from now on */
+ f->sigwinch_event_source = sd_event_source_unref(f->sigwinch_event_source);
+
+ return 0;
+}
diff --git a/src/shared/ptyfwd.h b/src/shared/ptyfwd.h
new file mode 100644
index 0000000..887d3cb
--- /dev/null
+++ b/src/shared/ptyfwd.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-event.h"
+
+#include "macro.h"
+
+typedef struct PTYForward PTYForward;
+
+typedef enum PTYForwardFlags {
+ PTY_FORWARD_READ_ONLY = 1,
+
+ /* Continue reading after hangup? */
+ PTY_FORWARD_IGNORE_VHANGUP = 2,
+
+ /* Continue reading after hangup but only if we never read anything else? */
+ PTY_FORWARD_IGNORE_INITIAL_VHANGUP = 4,
+} PTYForwardFlags;
+
+typedef int (*PTYForwardHandler)(PTYForward *f, int rcode, void *userdata);
+
+int pty_forward_new(sd_event *event, int master, PTYForwardFlags flags, PTYForward **f);
+PTYForward *pty_forward_free(PTYForward *f);
+
+int pty_forward_get_last_char(PTYForward *f, char *ch);
+
+int pty_forward_set_ignore_vhangup(PTYForward *f, bool ignore_vhangup);
+bool pty_forward_get_ignore_vhangup(PTYForward *f);
+
+bool pty_forward_is_done(PTYForward *f);
+
+void pty_forward_set_handler(PTYForward *f, PTYForwardHandler handler, void *userdata);
+
+bool pty_forward_drain(PTYForward *f);
+
+int pty_forward_set_priority(PTYForward *f, int64_t priority);
+
+int pty_forward_set_width_height(PTYForward *f, unsigned width, unsigned height);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(PTYForward*, pty_forward_free);
diff --git a/src/shared/reboot-util.c b/src/shared/reboot-util.c
new file mode 100644
index 0000000..ca40159
--- /dev/null
+++ b/src/shared/reboot-util.c
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "raw-reboot.h"
+#include "reboot-util.h"
+#include "string-util.h"
+#include "umask-util.h"
+#include "virt.h"
+
+int update_reboot_parameter_and_warn(const char *parameter) {
+ int r;
+
+ if (isempty(parameter)) {
+ if (unlink("/run/systemd/reboot-param") < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
+ }
+
+ return 0;
+ }
+
+ RUN_WITH_UMASK(0022) {
+ r = write_string_file("/run/systemd/reboot-param", parameter,
+ WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to write reboot parameter file: %m");
+ }
+
+ return 0;
+}
+
+int reboot_with_parameter(RebootFlags flags) {
+ int r;
+
+ /* Reboots the system with a parameter that is read from /run/systemd/reboot-param. Returns 0 if REBOOT_DRY_RUN
+ * was set and the actual reboot operation was hence skipped. If REBOOT_FALLBACK is set and the reboot with
+ * parameter doesn't work out a fallback to classic reboot() is attempted. If REBOOT_FALLBACK is not set, 0 is
+ * returned instead, which should be considered indication for the caller to fall back to reboot() on its own,
+ * or somehow else deal with this. If REBOOT_LOG is specified will log about what it is going to do, as well as
+ * all errors. */
+
+ if (detect_container() == 0) {
+ _cleanup_free_ char *parameter = NULL;
+
+ r = read_one_line_file("/run/systemd/reboot-param", &parameter);
+ if (r < 0 && r != -ENOENT)
+ log_full_errno(flags & REBOOT_LOG ? LOG_WARNING : LOG_DEBUG, r,
+ "Failed to read reboot parameter file, ignoring: %m");
+
+ if (!isempty(parameter)) {
+
+ log_full(flags & REBOOT_LOG ? LOG_INFO : LOG_DEBUG,
+ "Rebooting with argument '%s'.", parameter);
+
+ if (flags & REBOOT_DRY_RUN)
+ return 0;
+
+ (void) raw_reboot(LINUX_REBOOT_CMD_RESTART2, parameter);
+
+ log_full_errno(flags & REBOOT_LOG ? LOG_WARNING : LOG_DEBUG, errno,
+ "Failed to reboot with parameter, retrying without: %m");
+ }
+ }
+
+ if (!(flags & REBOOT_FALLBACK))
+ return 0;
+
+ log_full(flags & REBOOT_LOG ? LOG_INFO : LOG_DEBUG, "Rebooting.");
+
+ if (flags & REBOOT_DRY_RUN)
+ return 0;
+
+ (void) reboot(RB_AUTOBOOT);
+
+ return log_full_errno(flags & REBOOT_LOG ? LOG_ERR : LOG_DEBUG, errno, "Failed to reboot: %m");
+}
diff --git a/src/shared/reboot-util.h b/src/shared/reboot-util.h
new file mode 100644
index 0000000..d459333
--- /dev/null
+++ b/src/shared/reboot-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int update_reboot_parameter_and_warn(const char *parameter);
+
+typedef enum RebootFlags {
+ REBOOT_LOG = 1 << 0, /* log about what we are going to do and all errors */
+ REBOOT_DRY_RUN = 1 << 1, /* return 0 right before actually doing the reboot */
+ REBOOT_FALLBACK = 1 << 2, /* fallback to plain reboot() if argument-based reboot doesn't work, isn't configured or doesn't apply otherwise */
+} RebootFlags;
+
+int reboot_with_parameter(RebootFlags flags);
diff --git a/src/shared/resolve-util.c b/src/shared/resolve-util.c
new file mode 100644
index 0000000..a5d4a14
--- /dev/null
+++ b/src/shared/resolve-util.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "conf-parser.h"
+#include "resolve-util.h"
+#include "string-table.h"
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_resolve_support, resolve_support, ResolveSupport, "Failed to parse resolve support setting");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dnssec_mode, dnssec_mode, DnssecMode, "Failed to parse DNSSEC mode setting");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dns_over_tls_mode, dns_over_tls_mode, DnsOverTlsMode, "Failed to parse DNS-over-TLS mode setting");
+
+static const char* const resolve_support_table[_RESOLVE_SUPPORT_MAX] = {
+ [RESOLVE_SUPPORT_NO] = "no",
+ [RESOLVE_SUPPORT_YES] = "yes",
+ [RESOLVE_SUPPORT_RESOLVE] = "resolve",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(resolve_support, ResolveSupport, RESOLVE_SUPPORT_YES);
+
+static const char* const dnssec_mode_table[_DNSSEC_MODE_MAX] = {
+ [DNSSEC_NO] = "no",
+ [DNSSEC_ALLOW_DOWNGRADE] = "allow-downgrade",
+ [DNSSEC_YES] = "yes",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dnssec_mode, DnssecMode, DNSSEC_YES);
+
+static const char* const dns_over_tls_mode_table[_DNS_OVER_TLS_MODE_MAX] = {
+ [DNS_OVER_TLS_NO] = "no",
+ [DNS_OVER_TLS_OPPORTUNISTIC] = "opportunistic",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_over_tls_mode, DnsOverTlsMode, _DNS_OVER_TLS_MODE_INVALID);
diff --git a/src/shared/resolve-util.h b/src/shared/resolve-util.h
new file mode 100644
index 0000000..5883342
--- /dev/null
+++ b/src/shared/resolve-util.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "conf-parser.h"
+#include "macro.h"
+
+typedef enum ResolveSupport ResolveSupport;
+typedef enum DnssecMode DnssecMode;
+typedef enum DnsOverTlsMode DnsOverTlsMode;
+
+enum ResolveSupport {
+ RESOLVE_SUPPORT_NO,
+ RESOLVE_SUPPORT_YES,
+ RESOLVE_SUPPORT_RESOLVE,
+ _RESOLVE_SUPPORT_MAX,
+ _RESOLVE_SUPPORT_INVALID = -1
+};
+
+enum DnssecMode {
+ /* No DNSSEC validation is done */
+ DNSSEC_NO,
+
+ /* Validate locally, if the server knows DO, but if not,
+ * don't. Don't trust the AD bit. If the server doesn't do
+ * DNSSEC properly, downgrade to non-DNSSEC operation. Of
+ * course, we then are vulnerable to a downgrade attack, but
+ * that's life and what is configured. */
+ DNSSEC_ALLOW_DOWNGRADE,
+
+ /* Insist on DNSSEC server support, and rather fail than downgrading. */
+ DNSSEC_YES,
+
+ _DNSSEC_MODE_MAX,
+ _DNSSEC_MODE_INVALID = -1
+};
+
+enum DnsOverTlsMode {
+ /* No connection is made for DNS-over-TLS */
+ DNS_OVER_TLS_NO,
+
+ /* Try to connect using DNS-over-TLS, but if connection fails,
+ * fallback to using an unencrypted connection */
+ DNS_OVER_TLS_OPPORTUNISTIC,
+
+ _DNS_OVER_TLS_MODE_MAX,
+ _DNS_OVER_TLS_MODE_INVALID = -1
+};
+
+CONFIG_PARSER_PROTOTYPE(config_parse_resolve_support);
+CONFIG_PARSER_PROTOTYPE(config_parse_dnssec_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_dns_over_tls_mode);
+
+const char* resolve_support_to_string(ResolveSupport p) _const_;
+ResolveSupport resolve_support_from_string(const char *s) _pure_;
+
+const char* dnssec_mode_to_string(DnssecMode p) _const_;
+DnssecMode dnssec_mode_from_string(const char *s) _pure_;
+
+const char* dns_over_tls_mode_to_string(DnsOverTlsMode p) _const_;
+DnsOverTlsMode dns_over_tls_mode_from_string(const char *s) _pure_;
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
new file mode 100644
index 0000000..cc58b3c
--- /dev/null
+++ b/src/shared/seccomp-util.c
@@ -0,0 +1,1764 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <linux/seccomp.h>
+#include <seccomp.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/shm.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "macro.h"
+#include "nsflags.h"
+#include "process-util.h"
+#include "seccomp-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+#include "errno-list.h"
+
+const uint32_t seccomp_local_archs[] = {
+
+ /* Note: always list the native arch we are compiled as last, so that users can blacklist seccomp(), but our own calls to it still succeed */
+
+#if defined(__x86_64__) && defined(__ILP32__)
+ SCMP_ARCH_X86,
+ SCMP_ARCH_X86_64,
+ SCMP_ARCH_X32, /* native */
+#elif defined(__x86_64__) && !defined(__ILP32__)
+ SCMP_ARCH_X86,
+ SCMP_ARCH_X32,
+ SCMP_ARCH_X86_64, /* native */
+#elif defined(__i386__)
+ SCMP_ARCH_X86,
+#elif defined(__aarch64__)
+ SCMP_ARCH_ARM,
+ SCMP_ARCH_AARCH64, /* native */
+#elif defined(__arm__)
+ SCMP_ARCH_ARM,
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64N32, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64N32, /* native */
+#elif defined(__powerpc64__) && __BYTE_ORDER == __BIG_ENDIAN
+ SCMP_ARCH_PPC,
+ SCMP_ARCH_PPC64LE,
+ SCMP_ARCH_PPC64, /* native */
+#elif defined(__powerpc64__) && __BYTE_ORDER == __LITTLE_ENDIAN
+ SCMP_ARCH_PPC,
+ SCMP_ARCH_PPC64,
+ SCMP_ARCH_PPC64LE, /* native */
+#elif defined(__powerpc__)
+ SCMP_ARCH_PPC,
+#elif defined(__s390x__)
+ SCMP_ARCH_S390,
+ SCMP_ARCH_S390X, /* native */
+#elif defined(__s390__)
+ SCMP_ARCH_S390,
+#endif
+ (uint32_t) -1
+ };
+
+const char* seccomp_arch_to_string(uint32_t c) {
+ /* Maintain order used in <seccomp.h>.
+ *
+ * Names used here should be the same as those used for ConditionArchitecture=,
+ * except for "subarchitectures" like x32. */
+
+ switch(c) {
+ case SCMP_ARCH_NATIVE:
+ return "native";
+ case SCMP_ARCH_X86:
+ return "x86";
+ case SCMP_ARCH_X86_64:
+ return "x86-64";
+ case SCMP_ARCH_X32:
+ return "x32";
+ case SCMP_ARCH_ARM:
+ return "arm";
+ case SCMP_ARCH_AARCH64:
+ return "arm64";
+ case SCMP_ARCH_MIPS:
+ return "mips";
+ case SCMP_ARCH_MIPS64:
+ return "mips64";
+ case SCMP_ARCH_MIPS64N32:
+ return "mips64-n32";
+ case SCMP_ARCH_MIPSEL:
+ return "mips-le";
+ case SCMP_ARCH_MIPSEL64:
+ return "mips64-le";
+ case SCMP_ARCH_MIPSEL64N32:
+ return "mips64-le-n32";
+ case SCMP_ARCH_PPC:
+ return "ppc";
+ case SCMP_ARCH_PPC64:
+ return "ppc64";
+ case SCMP_ARCH_PPC64LE:
+ return "ppc64-le";
+ case SCMP_ARCH_S390:
+ return "s390";
+ case SCMP_ARCH_S390X:
+ return "s390x";
+ default:
+ return NULL;
+ }
+}
+
+int seccomp_arch_from_string(const char *n, uint32_t *ret) {
+ if (!n)
+ return -EINVAL;
+
+ assert(ret);
+
+ if (streq(n, "native"))
+ *ret = SCMP_ARCH_NATIVE;
+ else if (streq(n, "x86"))
+ *ret = SCMP_ARCH_X86;
+ else if (streq(n, "x86-64"))
+ *ret = SCMP_ARCH_X86_64;
+ else if (streq(n, "x32"))
+ *ret = SCMP_ARCH_X32;
+ else if (streq(n, "arm"))
+ *ret = SCMP_ARCH_ARM;
+ else if (streq(n, "arm64"))
+ *ret = SCMP_ARCH_AARCH64;
+ else if (streq(n, "mips"))
+ *ret = SCMP_ARCH_MIPS;
+ else if (streq(n, "mips64"))
+ *ret = SCMP_ARCH_MIPS64;
+ else if (streq(n, "mips64-n32"))
+ *ret = SCMP_ARCH_MIPS64N32;
+ else if (streq(n, "mips-le"))
+ *ret = SCMP_ARCH_MIPSEL;
+ else if (streq(n, "mips64-le"))
+ *ret = SCMP_ARCH_MIPSEL64;
+ else if (streq(n, "mips64-le-n32"))
+ *ret = SCMP_ARCH_MIPSEL64N32;
+ else if (streq(n, "ppc"))
+ *ret = SCMP_ARCH_PPC;
+ else if (streq(n, "ppc64"))
+ *ret = SCMP_ARCH_PPC64;
+ else if (streq(n, "ppc64-le"))
+ *ret = SCMP_ARCH_PPC64LE;
+ else if (streq(n, "s390"))
+ *ret = SCMP_ARCH_S390;
+ else if (streq(n, "s390x"))
+ *ret = SCMP_ARCH_S390X;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action) {
+ scmp_filter_ctx seccomp;
+ int r;
+
+ /* Much like seccomp_init(), but initializes the filter for one specific architecture only, without affecting
+ * any others. Also, turns off the NNP fiddling. */
+
+ seccomp = seccomp_init(default_action);
+ if (!seccomp)
+ return -ENOMEM;
+
+ if (arch != SCMP_ARCH_NATIVE &&
+ arch != seccomp_arch_native()) {
+
+ r = seccomp_arch_remove(seccomp, seccomp_arch_native());
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_arch_add(seccomp, arch);
+ if (r < 0)
+ goto finish;
+
+ assert(seccomp_arch_exist(seccomp, arch) >= 0);
+ assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) == -EEXIST);
+ assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) == -EEXIST);
+ } else {
+ assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) >= 0);
+ assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) >= 0);
+ }
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_ACT_BADARCH, SCMP_ACT_ALLOW);
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ goto finish;
+
+ *ret = seccomp;
+ return 0;
+
+finish:
+ seccomp_release(seccomp);
+ return r;
+}
+
+static bool is_basic_seccomp_available(void) {
+ return prctl(PR_GET_SECCOMP, 0, 0, 0, 0) >= 0;
+}
+
+static bool is_seccomp_filter_available(void) {
+ return prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0) < 0 &&
+ errno == EFAULT;
+}
+
+bool is_seccomp_available(void) {
+ static int cached_enabled = -1;
+
+ if (cached_enabled < 0)
+ cached_enabled =
+ is_basic_seccomp_available() &&
+ is_seccomp_filter_available();
+
+ return cached_enabled;
+}
+
+const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
+ [SYSCALL_FILTER_SET_DEFAULT] = {
+ .name = "@default",
+ .help = "System calls that are always permitted",
+ .value =
+ "clock_getres\0"
+ "clock_gettime\0"
+ "clock_nanosleep\0"
+ "execve\0"
+ "exit\0"
+ "exit_group\0"
+ "futex\0"
+ "get_robust_list\0"
+ "get_thread_area\0"
+ "getegid\0"
+ "getegid32\0"
+ "geteuid\0"
+ "geteuid32\0"
+ "getgid\0"
+ "getgid32\0"
+ "getgroups\0"
+ "getgroups32\0"
+ "getpgid\0"
+ "getpgrp\0"
+ "getpid\0"
+ "getppid\0"
+ "getresgid\0"
+ "getresgid32\0"
+ "getresuid\0"
+ "getresuid32\0"
+ "getrlimit\0" /* make sure processes can query stack size and such */
+ "getsid\0"
+ "gettid\0"
+ "gettimeofday\0"
+ "getuid\0"
+ "getuid32\0"
+ "membarrier\0"
+ "nanosleep\0"
+ "pause\0"
+ "prlimit64\0"
+ "restart_syscall\0"
+ "rt_sigreturn\0"
+ "sched_yield\0"
+ "set_robust_list\0"
+ "set_thread_area\0"
+ "set_tid_address\0"
+ "set_tls\0"
+ "sigreturn\0"
+ "time\0"
+ "ugetrlimit\0"
+ },
+ [SYSCALL_FILTER_SET_AIO] = {
+ .name = "@aio",
+ .help = "Asynchronous IO",
+ .value =
+ "io_cancel\0"
+ "io_destroy\0"
+ "io_getevents\0"
+ "io_pgetevents\0"
+ "io_setup\0"
+ "io_submit\0"
+ },
+ [SYSCALL_FILTER_SET_BASIC_IO] = {
+ .name = "@basic-io",
+ .help = "Basic IO",
+ .value =
+ "_llseek\0"
+ "close\0"
+ "dup\0"
+ "dup2\0"
+ "dup3\0"
+ "lseek\0"
+ "pread64\0"
+ "preadv\0"
+ "preadv2\0"
+ "pwrite64\0"
+ "pwritev\0"
+ "pwritev2\0"
+ "read\0"
+ "readv\0"
+ "write\0"
+ "writev\0"
+ },
+ [SYSCALL_FILTER_SET_CHOWN] = {
+ .name = "@chown",
+ .help = "Change ownership of files and directories",
+ .value =
+ "chown\0"
+ "chown32\0"
+ "fchown\0"
+ "fchown32\0"
+ "fchownat\0"
+ "lchown\0"
+ "lchown32\0"
+ },
+ [SYSCALL_FILTER_SET_CLOCK] = {
+ .name = "@clock",
+ .help = "Change the system time",
+ .value =
+ "adjtimex\0"
+ "clock_adjtime\0"
+ "clock_settime\0"
+ "settimeofday\0"
+ "stime\0"
+ },
+ [SYSCALL_FILTER_SET_CPU_EMULATION] = {
+ .name = "@cpu-emulation",
+ .help = "System calls for CPU emulation functionality",
+ .value =
+ "modify_ldt\0"
+ "subpage_prot\0"
+ "switch_endian\0"
+ "vm86\0"
+ "vm86old\0"
+ },
+ [SYSCALL_FILTER_SET_DEBUG] = {
+ .name = "@debug",
+ .help = "Debugging, performance monitoring and tracing functionality",
+ .value =
+ "lookup_dcookie\0"
+ "perf_event_open\0"
+ "ptrace\0"
+ "rtas\0"
+#ifdef __NR_s390_runtime_instr
+ "s390_runtime_instr\0"
+#endif
+ "sys_debug_setcontext\0"
+ },
+ [SYSCALL_FILTER_SET_FILE_SYSTEM] = {
+ .name = "@file-system",
+ .help = "File system operations",
+ .value =
+ "access\0"
+ "chdir\0"
+ "chmod\0"
+ "close\0"
+ "creat\0"
+ "faccessat\0"
+ "fallocate\0"
+ "fchdir\0"
+ "fchmod\0"
+ "fchmodat\0"
+ "fcntl\0"
+ "fcntl64\0"
+ "fgetxattr\0"
+ "flistxattr\0"
+ "fremovexattr\0"
+ "fsetxattr\0"
+ "fstat\0"
+ "fstat64\0"
+ "fstatat64\0"
+ "fstatfs\0"
+ "fstatfs64\0"
+ "ftruncate\0"
+ "ftruncate64\0"
+ "futimesat\0"
+ "getcwd\0"
+ "getdents\0"
+ "getdents64\0"
+ "getxattr\0"
+ "inotify_add_watch\0"
+ "inotify_init\0"
+ "inotify_init1\0"
+ "inotify_rm_watch\0"
+ "lgetxattr\0"
+ "link\0"
+ "linkat\0"
+ "listxattr\0"
+ "llistxattr\0"
+ "lremovexattr\0"
+ "lsetxattr\0"
+ "lstat\0"
+ "lstat64\0"
+ "mkdir\0"
+ "mkdirat\0"
+ "mknod\0"
+ "mknodat\0"
+ "mmap\0"
+ "mmap2\0"
+ "munmap\0"
+ "newfstatat\0"
+ "oldfstat\0"
+ "oldlstat\0"
+ "oldstat\0"
+ "open\0"
+ "openat\0"
+ "readlink\0"
+ "readlinkat\0"
+ "removexattr\0"
+ "rename\0"
+ "renameat\0"
+ "renameat2\0"
+ "rmdir\0"
+ "setxattr\0"
+ "stat\0"
+ "stat64\0"
+ "statfs\0"
+ "statfs64\0"
+#ifdef __NR_statx
+ "statx\0"
+#endif
+ "symlink\0"
+ "symlinkat\0"
+ "truncate\0"
+ "truncate64\0"
+ "unlink\0"
+ "unlinkat\0"
+ "utime\0"
+ "utimensat\0"
+ "utimes\0"
+ },
+ [SYSCALL_FILTER_SET_IO_EVENT] = {
+ .name = "@io-event",
+ .help = "Event loop system calls",
+ .value =
+ "_newselect\0"
+ "epoll_create\0"
+ "epoll_create1\0"
+ "epoll_ctl\0"
+ "epoll_ctl_old\0"
+ "epoll_pwait\0"
+ "epoll_wait\0"
+ "epoll_wait_old\0"
+ "eventfd\0"
+ "eventfd2\0"
+ "poll\0"
+ "ppoll\0"
+ "pselect6\0"
+ "select\0"
+ },
+ [SYSCALL_FILTER_SET_IPC] = {
+ .name = "@ipc",
+ .help = "SysV IPC, POSIX Message Queues or other IPC",
+ .value =
+ "ipc\0"
+ "memfd_create\0"
+ "mq_getsetattr\0"
+ "mq_notify\0"
+ "mq_open\0"
+ "mq_timedreceive\0"
+ "mq_timedsend\0"
+ "mq_unlink\0"
+ "msgctl\0"
+ "msgget\0"
+ "msgrcv\0"
+ "msgsnd\0"
+ "pipe\0"
+ "pipe2\0"
+ "process_vm_readv\0"
+ "process_vm_writev\0"
+ "semctl\0"
+ "semget\0"
+ "semop\0"
+ "semtimedop\0"
+ "shmat\0"
+ "shmctl\0"
+ "shmdt\0"
+ "shmget\0"
+ },
+ [SYSCALL_FILTER_SET_KEYRING] = {
+ .name = "@keyring",
+ .help = "Kernel keyring access",
+ .value =
+ "add_key\0"
+ "keyctl\0"
+ "request_key\0"
+ },
+ [SYSCALL_FILTER_SET_MEMLOCK] = {
+ .name = "@memlock",
+ .help = "Memory locking control",
+ .value =
+ "mlock\0"
+ "mlock2\0"
+ "mlockall\0"
+ "munlock\0"
+ "munlockall\0"
+ },
+ [SYSCALL_FILTER_SET_MODULE] = {
+ .name = "@module",
+ .help = "Loading and unloading of kernel modules",
+ .value =
+ "delete_module\0"
+ "finit_module\0"
+ "init_module\0"
+ },
+ [SYSCALL_FILTER_SET_MOUNT] = {
+ .name = "@mount",
+ .help = "Mounting and unmounting of file systems",
+ .value =
+ "chroot\0"
+ "mount\0"
+ "pivot_root\0"
+ "umount\0"
+ "umount2\0"
+ },
+ [SYSCALL_FILTER_SET_NETWORK_IO] = {
+ .name = "@network-io",
+ .help = "Network or Unix socket IO, should not be needed if not network facing",
+ .value =
+ "accept\0"
+ "accept4\0"
+ "bind\0"
+ "connect\0"
+ "getpeername\0"
+ "getsockname\0"
+ "getsockopt\0"
+ "listen\0"
+ "recv\0"
+ "recvfrom\0"
+ "recvmmsg\0"
+ "recvmsg\0"
+ "send\0"
+ "sendmmsg\0"
+ "sendmsg\0"
+ "sendto\0"
+ "setsockopt\0"
+ "shutdown\0"
+ "socket\0"
+ "socketcall\0"
+ "socketpair\0"
+ },
+ [SYSCALL_FILTER_SET_OBSOLETE] = {
+ /* some unknown even to libseccomp */
+ .name = "@obsolete",
+ .help = "Unusual, obsolete or unimplemented system calls",
+ .value =
+ "_sysctl\0"
+ "afs_syscall\0"
+ "bdflush\0"
+ "break\0"
+ "create_module\0"
+ "ftime\0"
+ "get_kernel_syms\0"
+ "getpmsg\0"
+ "gtty\0"
+ "idle\0"
+ "lock\0"
+ "mpx\0"
+ "prof\0"
+ "profil\0"
+ "putpmsg\0"
+ "query_module\0"
+ "security\0"
+ "sgetmask\0"
+ "ssetmask\0"
+ "stty\0"
+ "sysfs\0"
+ "tuxcall\0"
+ "ulimit\0"
+ "uselib\0"
+ "ustat\0"
+ "vserver\0"
+ },
+ [SYSCALL_FILTER_SET_PRIVILEGED] = {
+ .name = "@privileged",
+ .help = "All system calls which need super-user capabilities",
+ .value =
+ "@chown\0"
+ "@clock\0"
+ "@module\0"
+ "@raw-io\0"
+ "@reboot\0"
+ "@swap\0"
+ "_sysctl\0"
+ "acct\0"
+ "bpf\0"
+ "capset\0"
+ "chroot\0"
+ "fanotify_init\0"
+ "nfsservctl\0"
+ "open_by_handle_at\0"
+ "pivot_root\0"
+ "quotactl\0"
+ "setdomainname\0"
+ "setfsuid\0"
+ "setfsuid32\0"
+ "setgroups\0"
+ "setgroups32\0"
+ "sethostname\0"
+ "setresuid\0"
+ "setresuid32\0"
+ "setreuid\0"
+ "setreuid32\0"
+ "setuid\0" /* We list the explicit system calls here, as @setuid also includes setgid() which is not necessarily privileged */
+ "setuid32\0"
+ "vhangup\0"
+ },
+ [SYSCALL_FILTER_SET_PROCESS] = {
+ .name = "@process",
+ .help = "Process control, execution, namespaceing operations",
+ .value =
+ "arch_prctl\0"
+ "capget\0" /* Able to query arbitrary processes */
+ "clone\0"
+ "execveat\0"
+ "fork\0"
+ "getrusage\0"
+ "kill\0"
+ "prctl\0"
+ "rt_sigqueueinfo\0"
+ "rt_tgsigqueueinfo\0"
+ "setns\0"
+ "swapcontext\0" /* Some archs e.g. powerpc32 are using it to do userspace context switches */
+ "tgkill\0"
+ "times\0"
+ "tkill\0"
+ "unshare\0"
+ "vfork\0"
+ "wait4\0"
+ "waitid\0"
+ "waitpid\0"
+ },
+ [SYSCALL_FILTER_SET_RAW_IO] = {
+ .name = "@raw-io",
+ .help = "Raw I/O port access",
+ .value =
+ "ioperm\0"
+ "iopl\0"
+ "pciconfig_iobase\0"
+ "pciconfig_read\0"
+ "pciconfig_write\0"
+#ifdef __NR_s390_pci_mmio_read
+ "s390_pci_mmio_read\0"
+#endif
+#ifdef __NR_s390_pci_mmio_write
+ "s390_pci_mmio_write\0"
+#endif
+ },
+ [SYSCALL_FILTER_SET_REBOOT] = {
+ .name = "@reboot",
+ .help = "Reboot and reboot preparation/kexec",
+ .value =
+ "kexec_file_load\0"
+ "kexec_load\0"
+ "reboot\0"
+ },
+ [SYSCALL_FILTER_SET_RESOURCES] = {
+ .name = "@resources",
+ .help = "Alter resource settings",
+ .value =
+ "ioprio_set\0"
+ "mbind\0"
+ "migrate_pages\0"
+ "move_pages\0"
+ "nice\0"
+ "sched_setaffinity\0"
+ "sched_setattr\0"
+ "sched_setparam\0"
+ "sched_setscheduler\0"
+ "set_mempolicy\0"
+ "setpriority\0"
+ "setrlimit\0"
+ },
+ [SYSCALL_FILTER_SET_SETUID] = {
+ .name = "@setuid",
+ .help = "Operations for changing user/group credentials",
+ .value =
+ "setgid\0"
+ "setgid32\0"
+ "setgroups\0"
+ "setgroups32\0"
+ "setregid\0"
+ "setregid32\0"
+ "setresgid\0"
+ "setresgid32\0"
+ "setresuid\0"
+ "setresuid32\0"
+ "setreuid\0"
+ "setreuid32\0"
+ "setuid\0"
+ "setuid32\0"
+ },
+ [SYSCALL_FILTER_SET_SIGNAL] = {
+ .name = "@signal",
+ .help = "Process signal handling",
+ .value =
+ "rt_sigaction\0"
+ "rt_sigpending\0"
+ "rt_sigprocmask\0"
+ "rt_sigsuspend\0"
+ "rt_sigtimedwait\0"
+ "sigaction\0"
+ "sigaltstack\0"
+ "signal\0"
+ "signalfd\0"
+ "signalfd4\0"
+ "sigpending\0"
+ "sigprocmask\0"
+ "sigsuspend\0"
+ },
+ [SYSCALL_FILTER_SET_SWAP] = {
+ .name = "@swap",
+ .help = "Enable/disable swap devices",
+ .value =
+ "swapoff\0"
+ "swapon\0"
+ },
+ [SYSCALL_FILTER_SET_SYNC] = {
+ .name = "@sync",
+ .help = "Synchronize files and memory to storage",
+ .value =
+ "fdatasync\0"
+ "fsync\0"
+ "msync\0"
+ "sync\0"
+ "sync_file_range\0"
+ "syncfs\0"
+ },
+ [SYSCALL_FILTER_SET_SYSTEM_SERVICE] = {
+ .name = "@system-service",
+ .help = "General system service operations",
+ .value =
+ "@aio\0"
+ "@basic-io\0"
+ "@chown\0"
+ "@default\0"
+ "@file-system\0"
+ "@io-event\0"
+ "@ipc\0"
+ "@keyring\0"
+ "@memlock\0"
+ "@network-io\0"
+ "@process\0"
+ "@resources\0"
+ "@setuid\0"
+ "@signal\0"
+ "@sync\0"
+ "@timer\0"
+ "brk\0"
+ "capget\0"
+ "capset\0"
+ "copy_file_range\0"
+ "fadvise64\0"
+ "fadvise64_64\0"
+ "flock\0"
+ "get_mempolicy\0"
+ "getcpu\0"
+ "getpriority\0"
+ "getrandom\0"
+ "ioctl\0"
+ "ioprio_get\0"
+ "kcmp\0"
+ "madvise\0"
+ "mprotect\0"
+ "mremap\0"
+ "name_to_handle_at\0"
+ "oldolduname\0"
+ "olduname\0"
+ "personality\0"
+ "readahead\0"
+ "readdir\0"
+ "remap_file_pages\0"
+ "sched_get_priority_max\0"
+ "sched_get_priority_min\0"
+ "sched_getaffinity\0"
+ "sched_getattr\0"
+ "sched_getparam\0"
+ "sched_getscheduler\0"
+ "sched_rr_get_interval\0"
+ "sched_yield\0"
+ "sendfile\0"
+ "sendfile64\0"
+ "setfsgid\0"
+ "setfsgid32\0"
+ "setfsuid\0"
+ "setfsuid32\0"
+ "setpgid\0"
+ "setsid\0"
+ "splice\0"
+ "sysinfo\0"
+ "tee\0"
+ "umask\0"
+ "uname\0"
+ "userfaultfd\0"
+ "vmsplice\0"
+ },
+ [SYSCALL_FILTER_SET_TIMER] = {
+ .name = "@timer",
+ .help = "Schedule operations by time",
+ .value =
+ "alarm\0"
+ "getitimer\0"
+ "setitimer\0"
+ "timer_create\0"
+ "timer_delete\0"
+ "timer_getoverrun\0"
+ "timer_gettime\0"
+ "timer_settime\0"
+ "timerfd_create\0"
+ "timerfd_gettime\0"
+ "timerfd_settime\0"
+ "times\0"
+ },
+};
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name) {
+ unsigned i;
+
+ if (isempty(name) || name[0] != '@')
+ return NULL;
+
+ for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++)
+ if (streq(syscall_filter_sets[i].name, name))
+ return syscall_filter_sets + i;
+
+ return NULL;
+}
+
+static int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action, char **exclude, bool log_missing);
+
+int seccomp_add_syscall_filter_item(scmp_filter_ctx *seccomp, const char *name, uint32_t action, char **exclude, bool log_missing) {
+ assert(seccomp);
+ assert(name);
+
+ if (strv_contains(exclude, name))
+ return 0;
+
+ if (name[0] == '@') {
+ const SyscallFilterSet *other;
+
+ other = syscall_filter_set_find(name);
+ if (!other)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Filter set %s is not known!",
+ name);
+
+ return seccomp_add_syscall_filter_set(seccomp, other, action, exclude, log_missing);
+
+ } else {
+ int id, r;
+
+ id = seccomp_syscall_resolve_name(name);
+ if (id == __NR_SCMP_ERROR) {
+ if (log_missing)
+ log_debug("System call %s is not known, ignoring.", name);
+ return 0;
+ }
+
+ r = seccomp_rule_add_exact(seccomp, action, id, 0);
+ if (r < 0) {
+ /* If the system call is not known on this architecture, then that's fine, let's ignore it */
+ bool ignore = r == -EDOM;
+
+ if (!ignore || log_missing)
+ log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m",
+ name, id, ignore ? ", ignoring" : "");
+ if (!ignore)
+ return r;
+ }
+
+ return 0;
+ }
+}
+
+static int seccomp_add_syscall_filter_set(
+ scmp_filter_ctx seccomp,
+ const SyscallFilterSet *set,
+ uint32_t action,
+ char **exclude,
+ bool log_missing) {
+
+ const char *sys;
+ int r;
+
+ assert(seccomp);
+ assert(set);
+
+ NULSTR_FOREACH(sys, set->value) {
+ r = seccomp_add_syscall_filter_item(seccomp, sys, action, exclude, log_missing);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing) {
+ uint32_t arch;
+ int r;
+
+ assert(set);
+
+ /* The one-stop solution: allocate a seccomp object, add the specified filter to it, and apply it. Once for
+ * each local arch. */
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, default_action);
+ if (r < 0)
+ return r;
+
+ r = seccomp_add_syscall_filter_set(seccomp, set, action, NULL, log_missing);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add filter set: %m");
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing) {
+ uint32_t arch;
+ int r;
+
+ /* Similar to seccomp_load_syscall_filter_set(), but takes a raw Set* of syscalls, instead of a
+ * SyscallFilterSet* table. */
+
+ if (hashmap_isempty(set) && default_action == SCMP_ACT_ALLOW)
+ return 0;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ Iterator i;
+ void *syscall_id, *val;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, default_action);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_KEY(val, syscall_id, set, i) {
+ uint32_t a = action;
+ int id = PTR_TO_INT(syscall_id) - 1;
+ int error = PTR_TO_INT(val);
+
+ if (action != SCMP_ACT_ALLOW && error >= 0)
+ a = SCMP_ACT_ERRNO(error);
+
+ r = seccomp_rule_add_exact(seccomp, a, id, 0);
+ if (r < 0) {
+ /* If the system call is not known on this architecture, then that's fine, let's ignore it */
+ _cleanup_free_ char *n = NULL;
+ bool ignore;
+
+ n = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, id);
+ ignore = r == -EDOM;
+ if (!ignore || log_missing)
+ log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m",
+ strna(n), id, ignore ? ", ignoring" : "");
+ if (!ignore)
+ return r;
+ }
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_parse_syscall_filter_full(
+ const char *name,
+ int errno_num,
+ Hashmap *filter,
+ SeccompParseFlags flags,
+ const char *unit,
+ const char *filename,
+ unsigned line) {
+
+ int r;
+
+ assert(name);
+ assert(filter);
+
+ if (name[0] == '@') {
+ const SyscallFilterSet *set;
+ const char *i;
+
+ set = syscall_filter_set_find(name);
+ if (!set) {
+ if (!(flags & SECCOMP_PARSE_PERMISSIVE))
+ return -EINVAL;
+
+ log_syntax(unit, flags & SECCOMP_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
+ "Unknown system call group, ignoring: %s", name);
+ return 0;
+ }
+
+ NULSTR_FOREACH(i, set->value) {
+ /* Call ourselves again, for the group to parse. Note that we downgrade logging here (i.e. take
+ * away the SECCOMP_PARSE_LOG flag) since any issues in the group table are our own problem,
+ * not a problem in user configuration data and we shouldn't pretend otherwise by complaining
+ * about them. */
+ r = seccomp_parse_syscall_filter_full(i, errno_num, filter, flags &~ SECCOMP_PARSE_LOG, unit, filename, line);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ int id;
+
+ id = seccomp_syscall_resolve_name(name);
+ if (id == __NR_SCMP_ERROR) {
+ if (!(flags & SECCOMP_PARSE_PERMISSIVE))
+ return -EINVAL;
+
+ log_syntax(unit, flags & SECCOMP_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
+ "Failed to parse system call, ignoring: %s", name);
+ return 0;
+ }
+
+ /* If we previously wanted to forbid a syscall and now
+ * we want to allow it, then remove it from the list. */
+ if (!(flags & SECCOMP_PARSE_INVERT) == !!(flags & SECCOMP_PARSE_WHITELIST)) {
+ r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num));
+ if (r < 0)
+ switch (r) {
+ case -ENOMEM:
+ return flags & SECCOMP_PARSE_LOG ? log_oom() : -ENOMEM;
+ case -EEXIST:
+ assert_se(hashmap_update(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num)) == 0);
+ break;
+ default:
+ return r;
+ }
+ } else
+ (void) hashmap_remove(filter, INT_TO_PTR(id + 1));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_namespaces(unsigned long retain) {
+ uint32_t arch;
+ int r;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *s = NULL;
+
+ (void) namespace_flags_to_string(retain, &s);
+ log_debug("Restricting namespace to: %s.", strna(s));
+ }
+
+ /* NOOP? */
+ if ((retain & NAMESPACE_FLAGS_ALL) == NAMESPACE_FLAGS_ALL)
+ return 0;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ unsigned i;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ if ((retain & NAMESPACE_FLAGS_ALL) == 0)
+ /* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall
+ * altogether. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setns),
+ 0);
+ else
+ /* Otherwise, block only the invocations with the appropriate flags in the loop below, but also the
+ * special invocation with a zero flags argument, right here. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setns),
+ 1,
+ SCMP_A1(SCMP_CMP_EQ, 0));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ for (i = 0; namespace_flag_map[i].name; i++) {
+ unsigned long f;
+
+ f = namespace_flag_map[i].flag;
+ if ((retain & f) == f) {
+ log_debug("Permitting %s.", namespace_flag_map[i].name);
+ continue;
+ }
+
+ log_debug("Blocking %s.", namespace_flag_map[i].name);
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(unshare),
+ 1,
+ SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add unshare() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ break;
+ }
+
+ /* On s390/s390x the first two parameters to clone are switched */
+ if (!IN_SET(arch, SCMP_ARCH_S390, SCMP_ARCH_S390X))
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(clone),
+ 1,
+ SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+ else
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(clone),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ break;
+ }
+
+ if ((retain & NAMESPACE_FLAGS_ALL) != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setns),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ break;
+ }
+ }
+ }
+ if (r < 0)
+ continue;
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install namespace restriction rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_protect_sysctl(void) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ if (IN_SET(arch, SCMP_ARCH_X32, SCMP_ARCH_AARCH64))
+ /* No _sysctl syscall */
+ continue;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(_sysctl),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add _sysctl() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install sysctl protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_address_families(Set *address_families, bool whitelist) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ bool supported;
+ Iterator i;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ switch (arch) {
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X32:
+ case SCMP_ARCH_ARM:
+ case SCMP_ARCH_AARCH64:
+ case SCMP_ARCH_PPC:
+ case SCMP_ARCH_PPC64:
+ case SCMP_ARCH_PPC64LE:
+ case SCMP_ARCH_MIPSEL64N32:
+ case SCMP_ARCH_MIPS64N32:
+ case SCMP_ARCH_MIPSEL64:
+ case SCMP_ARCH_MIPS64:
+ /* These we know we support (i.e. are the ones that do not use socketcall()) */
+ supported = true;
+ break;
+
+ case SCMP_ARCH_S390:
+ case SCMP_ARCH_S390X:
+ case SCMP_ARCH_X86:
+ case SCMP_ARCH_MIPSEL:
+ case SCMP_ARCH_MIPS:
+ default:
+ /* These we either know we don't support (i.e. are the ones that do use socketcall()), or we
+ * don't know */
+ supported = false;
+ break;
+ }
+
+ if (!supported)
+ continue;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ if (whitelist) {
+ int af, first = 0, last = 0;
+ void *afp;
+
+ /* If this is a whitelist, we first block the address families that are out of range and then
+ * everything that is not in the set. First, we find the lowest and highest address family in
+ * the set. */
+
+ SET_FOREACH(afp, address_families, i) {
+ af = PTR_TO_INT(afp);
+
+ if (af <= 0 || af >= af_max())
+ continue;
+
+ if (first == 0 || af < first)
+ first = af;
+
+ if (last == 0 || af > last)
+ last = af;
+ }
+
+ assert((first == 0) == (last == 0));
+
+ if (first == 0) {
+
+ /* No entries in the valid range, block everything */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ } else {
+
+ /* Block everything below the first entry */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_LT, first));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ /* Block everything above the last entry */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_GT, last));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ /* Block everything between the first and last entry */
+ for (af = 1; af < af_max(); af++) {
+
+ if (set_contains(address_families, INT_TO_PTR(af)))
+ continue;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_EQ, af));
+ if (r < 0)
+ break;
+ }
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ } else {
+ void *af;
+
+ /* If this is a blacklist, then generate one rule for
+ * each address family that are then combined in OR
+ * checks. */
+
+ SET_FOREACH(af, address_families, i) {
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
+ if (r < 0)
+ break;
+ }
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install socket family rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_realtime(void) {
+ static const int permitted_policies[] = {
+ SCHED_OTHER,
+ SCHED_BATCH,
+ SCHED_IDLE,
+ };
+
+ int r, max_policy = 0;
+ uint32_t arch;
+ unsigned i;
+
+ /* Determine the highest policy constant we want to allow */
+ for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
+ if (permitted_policies[i] > max_policy)
+ max_policy = permitted_policies[i];
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int p;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ /* Go through all policies with lower values than that, and block them -- unless they appear in the
+ * whitelist. */
+ for (p = 0; p < max_policy; p++) {
+ bool good = false;
+
+ /* Check if this is in the whitelist. */
+ for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
+ if (permitted_policies[i] == p) {
+ good = true;
+ break;
+ }
+
+ if (good)
+ continue;
+
+ /* Deny this policy */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sched_setscheduler),
+ 1,
+ SCMP_A1(SCMP_CMP_EQ, p));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ /* Blacklist all other policies, i.e. the ones with higher values. Note that all comparisons are
+ * unsigned here, hence no need no check for < 0 values. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sched_setscheduler),
+ 1,
+ SCMP_A1(SCMP_CMP_GT, max_policy));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install realtime protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+static int add_seccomp_syscall_filter(scmp_filter_ctx seccomp,
+ uint32_t arch,
+ int nr,
+ unsigned arg_cnt,
+ const struct scmp_arg_cmp arg) {
+ int r;
+
+ r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM), nr, arg_cnt, arg);
+ if (r < 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = seccomp_syscall_resolve_num_arch(arch, nr);
+ log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
+ strna(n),
+ seccomp_arch_to_string(arch));
+ }
+
+ return r;
+}
+
+/* For known architectures, check that syscalls are indeed defined or not. */
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+assert_cc(SCMP_SYS(shmget) > 0);
+assert_cc(SCMP_SYS(shmat) > 0);
+assert_cc(SCMP_SYS(shmdt) > 0);
+#elif defined(__i386__) || defined(__powerpc64__)
+assert_cc(SCMP_SYS(shmget) < 0);
+assert_cc(SCMP_SYS(shmat) < 0);
+assert_cc(SCMP_SYS(shmdt) < 0);
+#endif
+
+int seccomp_memory_deny_write_execute(void) {
+
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ switch (arch) {
+
+ case SCMP_ARCH_X86:
+ filter_syscall = SCMP_SYS(mmap2);
+ block_syscall = SCMP_SYS(mmap);
+ break;
+
+ case SCMP_ARCH_PPC:
+ case SCMP_ARCH_PPC64:
+ case SCMP_ARCH_PPC64LE:
+ filter_syscall = SCMP_SYS(mmap);
+
+ /* Note that shmat() isn't available, and the call is multiplexed through ipc().
+ * We ignore that here, which means there's still a way to get writable/executable
+ * memory, if an IPC key is mapped like this. That's a pity, but no total loss. */
+
+ break;
+
+ case SCMP_ARCH_ARM:
+ filter_syscall = SCMP_SYS(mmap2); /* arm has only mmap2 */
+ shmat_syscall = SCMP_SYS(shmat);
+ break;
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X32:
+ case SCMP_ARCH_AARCH64:
+ filter_syscall = SCMP_SYS(mmap); /* amd64, x32, and arm64 have only mmap */
+ shmat_syscall = SCMP_SYS(shmat);
+ break;
+
+ /* Please add more definitions here, if you port systemd to other architectures! */
+
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__powerpc__) && !defined(__powerpc64__) && !defined(__arm__) && !defined(__aarch64__)
+#warning "Consider adding the right mmap() syscall definitions here!"
+#endif
+ }
+
+ /* Can't filter mmap() on this arch, then skip it */
+ if (filter_syscall == 0)
+ continue;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = add_seccomp_syscall_filter(seccomp, arch, filter_syscall,
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
+ if (r < 0)
+ continue;
+
+ if (block_syscall != 0) {
+ r = add_seccomp_syscall_filter(seccomp, arch, block_syscall, 0, (const struct scmp_arg_cmp){} );
+ if (r < 0)
+ continue;
+ }
+
+ r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(mprotect),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
+ if (r < 0)
+ continue;
+
+#ifdef __NR_pkey_mprotect
+ r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(pkey_mprotect),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
+ if (r < 0)
+ continue;
+#endif
+
+ if (shmat_syscall != 0) {
+ r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(shmat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
+ if (r < 0)
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install MemoryDenyWriteExecute= rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_archs(Set *archs) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ Iterator i;
+ void *id;
+ int r;
+
+ /* This installs a filter with no rules, but that restricts the system call architectures to the specified
+ * list.
+ *
+ * There are some qualifications. However the most important use is to stop processes from bypassing
+ * system call restrictions, in case they used a broader (multiplexing) syscall which is only available
+ * in a non-native architecture. There are no holes in this use case, at least so far. */
+
+ /* Note libseccomp includes our "native" (current) architecture in the filter by default.
+ * We do not remove it. For example, our callers expect to be able to call execve() afterwards
+ * to run a program with the restrictions applied. */
+ seccomp = seccomp_init(SCMP_ACT_ALLOW);
+ if (!seccomp)
+ return -ENOMEM;
+
+ SET_FOREACH(id, archs, i) {
+ r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ /* The vdso for x32 assumes that x86-64 syscalls are available. Let's allow them, since x32
+ * x32 syscalls should basically match x86-64 for everything except the pointer type.
+ * The important thing is that you can block the old 32-bit x86 syscalls.
+ * https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=850047 */
+
+ if (seccomp_arch_native() == SCMP_ARCH_X32 ||
+ set_contains(archs, UINT32_TO_PTR(SCMP_ARCH_X32 + 1))) {
+
+ r = seccomp_arch_add(seccomp, SCMP_ARCH_X86_64);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ return r;
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to restrict system call architectures, skipping: %m");
+
+ return 0;
+}
+
+int parse_syscall_archs(char **l, Set **archs) {
+ _cleanup_set_free_ Set *_archs;
+ char **s;
+ int r;
+
+ assert(l);
+ assert(archs);
+
+ r = set_ensure_allocated(&_archs, NULL);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(s, l) {
+ uint32_t a;
+
+ r = seccomp_arch_from_string(*s, &a);
+ if (r < 0)
+ return -EINVAL;
+
+ r = set_put(_archs, UINT32_TO_PTR(a + 1));
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ *archs = TAKE_PTR(_archs);
+
+ return 0;
+}
+
+int seccomp_filter_set_add(Hashmap *filter, bool add, const SyscallFilterSet *set) {
+ const char *i;
+ int r;
+
+ assert(set);
+
+ NULSTR_FOREACH(i, set->value) {
+
+ if (i[0] == '@') {
+ const SyscallFilterSet *more;
+
+ more = syscall_filter_set_find(i);
+ if (!more)
+ return -ENXIO;
+
+ r = seccomp_filter_set_add(filter, add, more);
+ if (r < 0)
+ return r;
+ } else {
+ int id;
+
+ id = seccomp_syscall_resolve_name(i);
+ if (id == __NR_SCMP_ERROR) {
+ log_debug("Couldn't resolve system call, ignoring: %s", i);
+ continue;
+ }
+
+ if (add) {
+ r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(-1));
+ if (r < 0)
+ return r;
+ } else
+ (void) hashmap_remove(filter, INT_TO_PTR(id + 1));
+ }
+ }
+
+ return 0;
+}
+
+int seccomp_lock_personality(unsigned long personality) {
+ uint32_t arch;
+ int r;
+
+ if (personality >= PERSONALITY_INVALID)
+ return -EINVAL;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(personality),
+ 1,
+ SCMP_A0(SCMP_CMP_NE, personality));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to enable personality lock for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
new file mode 100644
index 0000000..d8a36c4
--- /dev/null
+++ b/src/shared/seccomp-util.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <seccomp.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "set.h"
+
+const char* seccomp_arch_to_string(uint32_t c);
+int seccomp_arch_from_string(const char *n, uint32_t *ret);
+
+int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action);
+
+bool is_seccomp_available(void);
+
+typedef struct SyscallFilterSet {
+ const char *name;
+ const char *help;
+ const char *value;
+} SyscallFilterSet;
+
+enum {
+ /* Please leave DEFAULT first, but sort the rest alphabetically */
+ SYSCALL_FILTER_SET_DEFAULT,
+ SYSCALL_FILTER_SET_AIO,
+ SYSCALL_FILTER_SET_BASIC_IO,
+ SYSCALL_FILTER_SET_CHOWN,
+ SYSCALL_FILTER_SET_CLOCK,
+ SYSCALL_FILTER_SET_CPU_EMULATION,
+ SYSCALL_FILTER_SET_DEBUG,
+ SYSCALL_FILTER_SET_FILE_SYSTEM,
+ SYSCALL_FILTER_SET_IO_EVENT,
+ SYSCALL_FILTER_SET_IPC,
+ SYSCALL_FILTER_SET_KEYRING,
+ SYSCALL_FILTER_SET_MEMLOCK,
+ SYSCALL_FILTER_SET_MODULE,
+ SYSCALL_FILTER_SET_MOUNT,
+ SYSCALL_FILTER_SET_NETWORK_IO,
+ SYSCALL_FILTER_SET_OBSOLETE,
+ SYSCALL_FILTER_SET_PRIVILEGED,
+ SYSCALL_FILTER_SET_PROCESS,
+ SYSCALL_FILTER_SET_RAW_IO,
+ SYSCALL_FILTER_SET_REBOOT,
+ SYSCALL_FILTER_SET_RESOURCES,
+ SYSCALL_FILTER_SET_SETUID,
+ SYSCALL_FILTER_SET_SIGNAL,
+ SYSCALL_FILTER_SET_SWAP,
+ SYSCALL_FILTER_SET_SYNC,
+ SYSCALL_FILTER_SET_SYSTEM_SERVICE,
+ SYSCALL_FILTER_SET_TIMER,
+ _SYSCALL_FILTER_SET_MAX
+};
+
+extern const SyscallFilterSet syscall_filter_sets[];
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name);
+
+int seccomp_filter_set_add(Hashmap *s, bool b, const SyscallFilterSet *set);
+
+int seccomp_add_syscall_filter_item(scmp_filter_ctx *ctx, const char *name, uint32_t action, char **exclude, bool log_missing);
+
+int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing);
+int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing);
+
+typedef enum SeccompParseFlags {
+ SECCOMP_PARSE_INVERT = 1 << 0,
+ SECCOMP_PARSE_WHITELIST = 1 << 1,
+ SECCOMP_PARSE_LOG = 1 << 2,
+ SECCOMP_PARSE_PERMISSIVE = 1 << 3,
+} SeccompParseFlags;
+
+int seccomp_parse_syscall_filter_full(
+ const char *name, int errno_num, Hashmap *filter, SeccompParseFlags flags,
+ const char *unit, const char *filename, unsigned line);
+
+static inline int seccomp_parse_syscall_filter(const char *name, int errno_num, Hashmap *filter, SeccompParseFlags flags) {
+ return seccomp_parse_syscall_filter_full(name, errno_num, filter, flags, NULL, NULL, 0);
+}
+
+int seccomp_restrict_archs(Set *archs);
+int seccomp_restrict_namespaces(unsigned long retain);
+int seccomp_protect_sysctl(void);
+int seccomp_restrict_address_families(Set *address_families, bool whitelist);
+int seccomp_restrict_realtime(void);
+int seccomp_memory_deny_write_execute(void);
+int seccomp_lock_personality(unsigned long personality);
+
+extern const uint32_t seccomp_local_archs[];
+
+#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
+ for (unsigned _i = ({ (arch) = seccomp_local_archs[0]; 0; }); \
+ seccomp_local_archs[_i] != (uint32_t) -1; \
+ (arch) = seccomp_local_archs[++_i])
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(scmp_filter_ctx, seccomp_release);
+
+int parse_syscall_archs(char **l, Set **archs);
diff --git a/src/shared/securebits-util.c b/src/shared/securebits-util.c
new file mode 100644
index 0000000..6d31dfe
--- /dev/null
+++ b/src/shared/securebits-util.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "securebits-util.h"
+#include "string-util.h"
+
+int secure_bits_to_string_alloc(int i, char **s) {
+ _cleanup_free_ char *str = NULL;
+ size_t len;
+ int r;
+
+ assert(s);
+
+ r = asprintf(&str, "%s%s%s%s%s%s",
+ (i & (1 << SECURE_KEEP_CAPS)) ? "keep-caps " : "",
+ (i & (1 << SECURE_KEEP_CAPS_LOCKED)) ? "keep-caps-locked " : "",
+ (i & (1 << SECURE_NO_SETUID_FIXUP)) ? "no-setuid-fixup " : "",
+ (i & (1 << SECURE_NO_SETUID_FIXUP_LOCKED)) ? "no-setuid-fixup-locked " : "",
+ (i & (1 << SECURE_NOROOT)) ? "noroot " : "",
+ (i & (1 << SECURE_NOROOT_LOCKED)) ? "noroot-locked " : "");
+ if (r < 0)
+ return -ENOMEM;
+
+ len = strlen(str);
+ if (len != 0)
+ str[len - 1] = '\0';
+
+ *s = TAKE_PTR(str);
+
+ return 0;
+}
+
+int secure_bits_from_string(const char *s) {
+ int secure_bits = 0;
+ const char *p;
+ int r;
+
+ for (p = s;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return r;
+ if (r <= 0)
+ break;
+
+ if (streq(word, "keep-caps"))
+ secure_bits |= 1 << SECURE_KEEP_CAPS;
+ else if (streq(word, "keep-caps-locked"))
+ secure_bits |= 1 << SECURE_KEEP_CAPS_LOCKED;
+ else if (streq(word, "no-setuid-fixup"))
+ secure_bits |= 1 << SECURE_NO_SETUID_FIXUP;
+ else if (streq(word, "no-setuid-fixup-locked"))
+ secure_bits |= 1 << SECURE_NO_SETUID_FIXUP_LOCKED;
+ else if (streq(word, "noroot"))
+ secure_bits |= 1 << SECURE_NOROOT;
+ else if (streq(word, "noroot-locked"))
+ secure_bits |= 1 << SECURE_NOROOT_LOCKED;
+ }
+
+ return secure_bits;
+}
diff --git a/src/shared/securebits-util.h b/src/shared/securebits-util.h
new file mode 100644
index 0000000..b5ec6ee
--- /dev/null
+++ b/src/shared/securebits-util.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "missing_securebits.h"
+
+int secure_bits_to_string_alloc(int i, char **s);
+int secure_bits_from_string(const char *s);
+
+static inline bool secure_bits_is_valid(int i) {
+ return ((SECURE_ALL_BITS | SECURE_ALL_LOCKS) & i) == i;
+}
+
+static inline int secure_bits_to_string_alloc_with_check(int n, char **s) {
+ if (!secure_bits_is_valid(n))
+ return -EINVAL;
+
+ return secure_bits_to_string_alloc(n, s);
+}
diff --git a/src/shared/serialize.c b/src/shared/serialize.c
new file mode 100644
index 0000000..0333f87
--- /dev/null
+++ b/src/shared/serialize.c
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mman.h>
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "escape.h"
+#include "fileio.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+int serialize_item(FILE *f, const char *key, const char *value) {
+ assert(f);
+ assert(key);
+
+ if (!value)
+ return 0;
+
+ /* Make sure that anything we serialize we can also read back again with read_line() with a maximum line size
+ * of LONG_LINE_MAX. This is a safety net only. All code calling us should filter this out earlier anyway. */
+ if (strlen(key) + 1 + strlen(value) + 1 > LONG_LINE_MAX) {
+ log_warning("Attempted to serialize overly long item '%s', refusing.", key);
+ return -EINVAL;
+ }
+
+ fputs(key, f);
+ fputc('=', f);
+ fputs(value, f);
+ fputc('\n', f);
+
+ return 1;
+}
+
+int serialize_item_escaped(FILE *f, const char *key, const char *value) {
+ _cleanup_free_ char *c = NULL;
+
+ assert(f);
+ assert(key);
+
+ if (!value)
+ return 0;
+
+ c = cescape(value);
+ if (!c)
+ return log_oom();
+
+ return serialize_item(f, key, c);
+}
+
+int serialize_item_format(FILE *f, const char *key, const char *format, ...) {
+ char buf[LONG_LINE_MAX];
+ va_list ap;
+ int k;
+
+ assert(f);
+ assert(key);
+ assert(format);
+
+ va_start(ap, format);
+ k = vsnprintf(buf, sizeof(buf), format, ap);
+ va_end(ap);
+
+ if (k < 0 || (size_t) k >= sizeof(buf) || strlen(key) + 1 + k + 1 > LONG_LINE_MAX) {
+ log_warning("Attempted to serialize overly long item '%s', refusing.", key);
+ return -EINVAL;
+ }
+
+ fputs(key, f);
+ fputc('=', f);
+ fputs(buf, f);
+ fputc('\n', f);
+
+ return 1;
+}
+
+int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd) {
+ int copy;
+
+ assert(f);
+ assert(key);
+
+ if (fd < 0)
+ return 0;
+
+ copy = fdset_put_dup(fds, fd);
+ if (copy < 0)
+ return log_error_errno(copy, "Failed to add file descriptor to serialization set: %m");
+
+ return serialize_item_format(f, key, "%i", copy);
+}
+
+int serialize_usec(FILE *f, const char *key, usec_t usec) {
+ assert(f);
+ assert(key);
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ return serialize_item_format(f, key, USEC_FMT, usec);
+}
+
+int serialize_dual_timestamp(FILE *f, const char *name, const dual_timestamp *t) {
+ assert(f);
+ assert(name);
+ assert(t);
+
+ if (!dual_timestamp_is_set(t))
+ return 0;
+
+ return serialize_item_format(f, name, USEC_FMT " " USEC_FMT, t->realtime, t->monotonic);
+}
+
+int serialize_strv(FILE *f, const char *key, char **l) {
+ int ret = 0, r;
+ char **i;
+
+ /* Returns the first error, or positive if anything was serialized, 0 otherwise. */
+
+ STRV_FOREACH(i, l) {
+ r = serialize_item_escaped(f, key, *i);
+ if ((ret >= 0 && r < 0) ||
+ (ret == 0 && r > 0))
+ ret = r;
+ }
+
+ return ret;
+}
+
+int deserialize_usec(const char *value, usec_t *ret) {
+ int r;
+
+ assert(value);
+
+ r = safe_atou64(value, ret);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse usec value \"%s\": %m", value);
+
+ return 0;
+}
+
+int deserialize_dual_timestamp(const char *value, dual_timestamp *t) {
+ uint64_t a, b;
+ int r, pos;
+
+ assert(value);
+ assert(t);
+
+ pos = strspn(value, WHITESPACE);
+ if (value[pos] == '-')
+ return -EINVAL;
+ pos += strspn(value + pos, DIGITS);
+ pos += strspn(value + pos, WHITESPACE);
+ if (value[pos] == '-')
+ return -EINVAL;
+
+ r = sscanf(value, "%" PRIu64 "%" PRIu64 "%n", &a, &b, &pos);
+ if (r != 2)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse dual timestamp value \"%s\".",
+ value);
+
+ if (value[pos] != '\0')
+ /* trailing garbage */
+ return -EINVAL;
+
+ t->realtime = a;
+ t->monotonic = b;
+
+ return 0;
+}
+
+int deserialize_environment(const char *value, char ***list) {
+ _cleanup_free_ char *unescaped = NULL;
+ int r;
+
+ assert(value);
+ assert(list);
+
+ /* Changes the *environment strv inline. */
+
+ r = cunescape(value, 0, &unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape: %m");
+
+ r = strv_env_replace(list, unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append environment variable: %m");
+
+ unescaped = NULL; /* now part of 'list' */
+ return 0;
+}
+
+int open_serialization_fd(const char *ident) {
+ int fd;
+
+ fd = memfd_create(ident, MFD_CLOEXEC);
+ if (fd < 0) {
+ const char *path;
+
+ path = getpid_cached() == 1 ? "/run/systemd" : "/tmp";
+ fd = open_tmpfile_unlinkable(path, O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ log_debug("Serializing %s to %s.", ident, path);
+ } else
+ log_debug("Serializing %s to memfd.", ident);
+
+ return fd;
+}
diff --git a/src/shared/serialize.h b/src/shared/serialize.h
new file mode 100644
index 0000000..4cbd98b
--- /dev/null
+++ b/src/shared/serialize.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+#include "fdset.h"
+#include "macro.h"
+
+int serialize_item(FILE *f, const char *key, const char *value);
+int serialize_item_escaped(FILE *f, const char *key, const char *value);
+int serialize_item_format(FILE *f, const char *key, const char *value, ...) _printf_(3,4);
+int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd);
+int serialize_usec(FILE *f, const char *key, usec_t usec);
+int serialize_dual_timestamp(FILE *f, const char *key, const dual_timestamp *t);
+int serialize_strv(FILE *f, const char *key, char **l);
+
+static inline int serialize_bool(FILE *f, const char *key, bool b) {
+ return serialize_item(f, key, yes_no(b));
+}
+
+int deserialize_usec(const char *value, usec_t *timestamp);
+int deserialize_dual_timestamp(const char *value, dual_timestamp *t);
+int deserialize_environment(const char *value, char ***environment);
+
+int open_serialization_fd(const char *ident);
diff --git a/src/shared/sleep-config.c b/src/shared/sleep-config.c
new file mode 100644
index 0000000..2e22bd0
--- /dev/null
+++ b/src/shared/sleep-config.c
@@ -0,0 +1,436 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2018 Dell Inc.
+***/
+
+#include <errno.h>
+#include <linux/fs.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "sleep-config.h"
+#include "string-util.h"
+#include "strv.h"
+
+int parse_sleep_config(const char *verb, bool *ret_allow, char ***ret_modes, char ***ret_states, usec_t *ret_delay) {
+ int allow_suspend = -1, allow_hibernate = -1,
+ allow_s2h = -1, allow_hybrid_sleep = -1;
+ bool allow;
+ _cleanup_strv_free_ char
+ **suspend_mode = NULL, **suspend_state = NULL,
+ **hibernate_mode = NULL, **hibernate_state = NULL,
+ **hybrid_mode = NULL, **hybrid_state = NULL;
+ _cleanup_strv_free_ char **modes, **states; /* always initialized below */
+ usec_t delay = 180 * USEC_PER_MINUTE;
+
+ const ConfigTableItem items[] = {
+ { "Sleep", "AllowSuspend", config_parse_tristate, 0, &allow_suspend },
+ { "Sleep", "AllowHibernation", config_parse_tristate, 0, &allow_hibernate },
+ { "Sleep", "AllowSuspendThenHibernate", config_parse_tristate, 0, &allow_s2h },
+ { "Sleep", "AllowHybridSleep", config_parse_tristate, 0, &allow_hybrid_sleep },
+
+ { "Sleep", "SuspendMode", config_parse_strv, 0, &suspend_mode },
+ { "Sleep", "SuspendState", config_parse_strv, 0, &suspend_state },
+ { "Sleep", "HibernateMode", config_parse_strv, 0, &hibernate_mode },
+ { "Sleep", "HibernateState", config_parse_strv, 0, &hibernate_state },
+ { "Sleep", "HybridSleepMode", config_parse_strv, 0, &hybrid_mode },
+ { "Sleep", "HybridSleepState", config_parse_strv, 0, &hybrid_state },
+
+ { "Sleep", "HibernateDelaySec", config_parse_sec, 0, &delay},
+ {}
+ };
+
+ (void) config_parse_many_nulstr(PKGSYSCONFDIR "/sleep.conf",
+ CONF_PATHS_NULSTR("systemd/sleep.conf.d"),
+ "Sleep\0", config_item_table_lookup, items,
+ CONFIG_PARSE_WARN, NULL);
+
+ if (streq(verb, "suspend")) {
+ allow = allow_suspend != 0;
+
+ /* empty by default */
+ modes = TAKE_PTR(suspend_mode);
+
+ if (suspend_state)
+ states = TAKE_PTR(suspend_state);
+ else
+ states = strv_new("mem", "standby", "freeze");
+
+ } else if (streq(verb, "hibernate")) {
+ allow = allow_hibernate != 0;
+
+ if (hibernate_mode)
+ modes = TAKE_PTR(hibernate_mode);
+ else
+ modes = strv_new("platform", "shutdown");
+
+ if (hibernate_state)
+ states = TAKE_PTR(hibernate_state);
+ else
+ states = strv_new("disk");
+
+ } else if (streq(verb, "hybrid-sleep")) {
+ allow = allow_hybrid_sleep > 0 ||
+ (allow_suspend != 0 && allow_hibernate != 0);
+
+ if (hybrid_mode)
+ modes = TAKE_PTR(hybrid_mode);
+ else
+ modes = strv_new("suspend", "platform", "shutdown");
+
+ if (hybrid_state)
+ states = TAKE_PTR(hybrid_state);
+ else
+ states = strv_new("disk");
+
+ } else if (streq(verb, "suspend-then-hibernate")) {
+ allow = allow_s2h > 0 ||
+ (allow_suspend != 0 && allow_hibernate != 0);
+
+ modes = states = NULL;
+ } else
+ assert_not_reached("what verb");
+
+ if ((!modes && STR_IN_SET(verb, "hibernate", "hybrid-sleep")) ||
+ (!states && !streq(verb, "suspend-then-hibernate")))
+ return log_oom();
+
+ if (ret_allow)
+ *ret_allow = allow;
+ if (ret_modes)
+ *ret_modes = TAKE_PTR(modes);
+ if (ret_states)
+ *ret_states = TAKE_PTR(states);
+ if (ret_delay)
+ *ret_delay = delay;
+
+ return 0;
+}
+
+int can_sleep_state(char **types) {
+ char **type;
+ int r;
+ _cleanup_free_ char *p = NULL;
+
+ if (strv_isempty(types))
+ return true;
+
+ /* If /sys is read-only we cannot sleep */
+ if (access("/sys/power/state", W_OK) < 0)
+ return false;
+
+ r = read_one_line_file("/sys/power/state", &p);
+ if (r < 0)
+ return false;
+
+ STRV_FOREACH(type, types) {
+ const char *word, *state;
+ size_t l, k;
+
+ k = strlen(*type);
+ FOREACH_WORD_SEPARATOR(word, l, p, WHITESPACE, state)
+ if (l == k && memcmp(word, *type, l) == 0)
+ return true;
+ }
+
+ return false;
+}
+
+int can_sleep_disk(char **types) {
+ char **type;
+ int r;
+ _cleanup_free_ char *p = NULL;
+
+ if (strv_isempty(types))
+ return true;
+
+ /* If /sys is read-only we cannot sleep */
+ if (access("/sys/power/disk", W_OK) < 0) {
+ log_debug_errno(errno, "/sys/power/disk is not writable: %m");
+ return false;
+ }
+
+ r = read_one_line_file("/sys/power/disk", &p);
+ if (r < 0) {
+ log_debug_errno(r, "Couldn't read /sys/power/disk: %m");
+ return false;
+ }
+
+ STRV_FOREACH(type, types) {
+ const char *word, *state;
+ size_t l, k;
+
+ k = strlen(*type);
+ FOREACH_WORD_SEPARATOR(word, l, p, WHITESPACE, state) {
+ if (l == k && memcmp(word, *type, l) == 0)
+ return true;
+
+ if (l == k + 2 &&
+ word[0] == '[' &&
+ memcmp(word + 1, *type, l - 2) == 0 &&
+ word[l-1] == ']')
+ return true;
+ }
+ }
+
+ return false;
+}
+
+#define HIBERNATION_SWAP_THRESHOLD 0.98
+
+int find_hibernate_location(char **device, char **type, size_t *size, size_t *used) {
+ _cleanup_fclose_ FILE *f;
+ unsigned i;
+
+ f = fopen("/proc/swaps", "re");
+ if (!f) {
+ log_full(errno == ENOENT ? LOG_DEBUG : LOG_WARNING,
+ "Failed to retrieve open /proc/swaps: %m");
+ assert(errno > 0);
+ return -errno;
+ }
+
+ (void) fscanf(f, "%*s %*s %*s %*s %*s\n");
+
+ for (i = 1;; i++) {
+ _cleanup_free_ char *dev_field = NULL, *type_field = NULL;
+ size_t size_field, used_field;
+ int k;
+
+ k = fscanf(f,
+ "%ms " /* device/file */
+ "%ms " /* type of swap */
+ "%zu " /* swap size */
+ "%zu " /* used */
+ "%*i\n", /* priority */
+ &dev_field, &type_field, &size_field, &used_field);
+ if (k == EOF)
+ break;
+ if (k != 4) {
+ log_warning("Failed to parse /proc/swaps:%u", i);
+ continue;
+ }
+
+ if (streq(type_field, "file")) {
+
+ if (endswith(dev_field, "\\040(deleted)")) {
+ log_warning("Ignoring deleted swap file '%s'.", dev_field);
+ continue;
+ }
+
+ } else if (streq(type_field, "partition")) {
+ const char *fn;
+
+ fn = path_startswith(dev_field, "/dev/");
+ if (fn && startswith(fn, "zram")) {
+ log_debug("Ignoring compressed RAM swap device '%s'.", dev_field);
+ continue;
+ }
+ }
+
+ if (device)
+ *device = TAKE_PTR(dev_field);
+ if (type)
+ *type = TAKE_PTR(type_field);
+ if (size)
+ *size = size_field;
+ if (used)
+ *used = used_field;
+ return 0;
+ }
+
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOSYS),
+ "No swap partitions were found.");
+}
+
+static bool enough_swap_for_hibernation(void) {
+ _cleanup_free_ char *active = NULL;
+ unsigned long long act = 0;
+ size_t size = 0, used = 0;
+ int r;
+
+ if (getenv_bool("SYSTEMD_BYPASS_HIBERNATION_MEMORY_CHECK") > 0)
+ return true;
+
+ r = find_hibernate_location(NULL, NULL, &size, &used);
+ if (r < 0)
+ return false;
+
+ r = get_proc_field("/proc/meminfo", "Active(anon)", WHITESPACE, &active);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to retrieve Active(anon) from /proc/meminfo: %m");
+ return false;
+ }
+
+ r = safe_atollu(active, &act);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse Active(anon) from /proc/meminfo: %s: %m", active);
+ return false;
+ }
+
+ r = act <= (size - used) * HIBERNATION_SWAP_THRESHOLD;
+ log_debug("%s swap for hibernation, Active(anon)=%llu kB, size=%zu kB, used=%zu kB, threshold=%.2g%%",
+ r ? "Enough" : "Not enough", act, size, used, 100*HIBERNATION_SWAP_THRESHOLD);
+
+ return r;
+}
+
+int read_fiemap(int fd, struct fiemap **ret) {
+ _cleanup_free_ struct fiemap *fiemap = NULL, *result_fiemap = NULL;
+ struct stat statinfo;
+ uint32_t result_extents = 0;
+ uint64_t fiemap_start = 0, fiemap_length;
+ const size_t n_extra = DIV_ROUND_UP(sizeof(struct fiemap), sizeof(struct fiemap_extent));
+ size_t fiemap_allocated = n_extra, result_fiemap_allocated = n_extra;
+
+ if (fstat(fd, &statinfo) < 0)
+ return log_debug_errno(errno, "Cannot determine file size: %m");
+ if (!S_ISREG(statinfo.st_mode))
+ return -ENOTTY;
+ fiemap_length = statinfo.st_size;
+
+ /* Zero this out in case we run on a file with no extents */
+ fiemap = calloc(n_extra, sizeof(struct fiemap_extent));
+ if (!fiemap)
+ return -ENOMEM;
+
+ result_fiemap = malloc_multiply(n_extra, sizeof(struct fiemap_extent));
+ if (!result_fiemap)
+ return -ENOMEM;
+
+ /* XFS filesystem has incorrect implementation of fiemap ioctl and
+ * returns extents for only one block-group at a time, so we need
+ * to handle it manually, starting the next fiemap call from the end
+ * of the last extent
+ */
+ while (fiemap_start < fiemap_length) {
+ *fiemap = (struct fiemap) {
+ .fm_start = fiemap_start,
+ .fm_length = fiemap_length,
+ .fm_flags = FIEMAP_FLAG_SYNC,
+ };
+
+ /* Find out how many extents there are */
+ if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0)
+ return log_debug_errno(errno, "Failed to read extents: %m");
+
+ /* Nothing to process */
+ if (fiemap->fm_mapped_extents == 0)
+ break;
+
+ /* Resize fiemap to allow us to read in the extents, result fiemap has to hold all
+ * the extents for the whole file. Add space for the initial struct fiemap. */
+ if (!greedy_realloc0((void**) &fiemap, &fiemap_allocated,
+ n_extra + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent)))
+ return -ENOMEM;
+
+ fiemap->fm_extent_count = fiemap->fm_mapped_extents;
+ fiemap->fm_mapped_extents = 0;
+
+ if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0)
+ return log_debug_errno(errno, "Failed to read extents: %m");
+
+ /* Resize result_fiemap to allow us to copy in the extents */
+ if (!greedy_realloc((void**) &result_fiemap, &result_fiemap_allocated,
+ n_extra + result_extents + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent)))
+ return -ENOMEM;
+
+ memcpy(result_fiemap->fm_extents + result_extents,
+ fiemap->fm_extents,
+ sizeof(struct fiemap_extent) * fiemap->fm_mapped_extents);
+
+ result_extents += fiemap->fm_mapped_extents;
+
+ /* Highly unlikely that it is zero */
+ if (_likely_(fiemap->fm_mapped_extents > 0)) {
+ uint32_t i = fiemap->fm_mapped_extents - 1;
+
+ fiemap_start = fiemap->fm_extents[i].fe_logical +
+ fiemap->fm_extents[i].fe_length;
+
+ if (fiemap->fm_extents[i].fe_flags & FIEMAP_EXTENT_LAST)
+ break;
+ }
+ }
+
+ memcpy(result_fiemap, fiemap, sizeof(struct fiemap));
+ result_fiemap->fm_mapped_extents = result_extents;
+ *ret = TAKE_PTR(result_fiemap);
+ return 0;
+}
+
+static int can_sleep_internal(const char *verb, bool check_allowed);
+
+static bool can_s2h(void) {
+ const char *p;
+ int r;
+
+ r = access("/sys/class/rtc/rtc0/wakealarm", W_OK);
+ if (r < 0) {
+ log_full(errno == ENOENT ? LOG_DEBUG : LOG_WARNING,
+ "/sys/class/rct/rct0/wakealarm is not writable %m");
+ return false;
+ }
+
+ FOREACH_STRING(p, "suspend", "hibernate") {
+ r = can_sleep_internal(p, false);
+ if (IN_SET(r, 0, -ENOSPC, -EADV)) {
+ log_debug("Unable to %s system.", p);
+ return false;
+ }
+ if (r < 0)
+ return log_debug_errno(r, "Failed to check if %s is possible: %m", p);
+ }
+
+ return true;
+}
+
+static int can_sleep_internal(const char *verb, bool check_allowed) {
+ bool allow;
+ _cleanup_strv_free_ char **modes = NULL, **states = NULL;
+ int r;
+
+ assert(STR_IN_SET(verb, "suspend", "hibernate", "hybrid-sleep", "suspend-then-hibernate"));
+
+ r = parse_sleep_config(verb, &allow, &modes, &states, NULL);
+ if (r < 0)
+ return false;
+
+ if (check_allowed && !allow) {
+ log_debug("Sleep mode \"%s\" is disabled by configuration.", verb);
+ return false;
+ }
+
+ if (streq(verb, "suspend-then-hibernate"))
+ return can_s2h();
+
+ if (!can_sleep_state(states) || !can_sleep_disk(modes))
+ return false;
+
+ if (streq(verb, "suspend"))
+ return true;
+
+ if (!enough_swap_for_hibernation())
+ return -ENOSPC;
+
+ return true;
+}
+
+int can_sleep(const char *verb) {
+ return can_sleep_internal(verb, true);
+}
diff --git a/src/shared/sleep-config.h b/src/shared/sleep-config.h
new file mode 100644
index 0000000..c584f44
--- /dev/null
+++ b/src/shared/sleep-config.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/fiemap.h>
+#include "time-util.h"
+
+int read_fiemap(int fd, struct fiemap **ret);
+int parse_sleep_config(const char *verb, bool *ret_allow, char ***ret_modes, char ***ret_states, usec_t *ret_delay);
+int find_hibernate_location(char **device, char **type, size_t *size, size_t *used);
+
+int can_sleep(const char *verb);
+int can_sleep_disk(char **types);
+int can_sleep_state(char **types);
diff --git a/src/shared/spawn-ask-password-agent.c b/src/shared/spawn-ask-password-agent.c
new file mode 100644
index 0000000..309071c
--- /dev/null
+++ b/src/shared/spawn-ask-password-agent.c
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "process-util.h"
+#include "spawn-ask-password-agent.h"
+#include "util.h"
+
+static pid_t agent_pid = 0;
+
+int ask_password_agent_open(void) {
+ int r;
+
+ if (agent_pid > 0)
+ return 0;
+
+ /* We check STDIN here, not STDOUT, since this is about input,
+ * not output */
+ if (!isatty(STDIN_FILENO))
+ return 0;
+
+ if (!is_main_thread())
+ return -EPERM;
+
+ r = fork_agent("(sd-askpwagent)",
+ NULL, 0,
+ &agent_pid,
+ SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH,
+ SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH, "--watch", NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fork TTY ask password agent: %m");
+
+ return 1;
+}
+
+void ask_password_agent_close(void) {
+
+ if (agent_pid <= 0)
+ return;
+
+ /* Inform agent that we are done */
+ (void) kill_and_sigcont(agent_pid, SIGTERM);
+ (void) wait_for_terminate(agent_pid, NULL);
+ agent_pid = 0;
+}
diff --git a/src/shared/spawn-ask-password-agent.h b/src/shared/spawn-ask-password-agent.h
new file mode 100644
index 0000000..97e73bd
--- /dev/null
+++ b/src/shared/spawn-ask-password-agent.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int ask_password_agent_open(void);
+void ask_password_agent_close(void);
diff --git a/src/shared/spawn-polkit-agent.c b/src/shared/spawn-polkit-agent.c
new file mode 100644
index 0000000..180cb79
--- /dev/null
+++ b/src/shared/spawn-polkit-agent.c
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "spawn-polkit-agent.h"
+#include "stdio-util.h"
+#include "time-util.h"
+#include "util.h"
+
+#if ENABLE_POLKIT
+static pid_t agent_pid = 0;
+
+int polkit_agent_open(void) {
+ char notify_fd[DECIMAL_STR_MAX(int) + 1];
+ int pipe_fd[2], r;
+
+ if (agent_pid > 0)
+ return 0;
+
+ /* Clients that run as root don't need to activate/query polkit */
+ if (geteuid() == 0)
+ return 0;
+
+ /* We check STDIN here, not STDOUT, since this is about input, not output */
+ if (!isatty(STDIN_FILENO))
+ return 0;
+
+ if (!is_main_thread())
+ return -EPERM;
+
+ if (pipe2(pipe_fd, 0) < 0)
+ return -errno;
+
+ xsprintf(notify_fd, "%i", pipe_fd[1]);
+
+ r = fork_agent("(polkit-agent)",
+ &pipe_fd[1], 1,
+ &agent_pid,
+ POLKIT_AGENT_BINARY_PATH,
+ POLKIT_AGENT_BINARY_PATH, "--notify-fd", notify_fd, "--fallback", NULL);
+
+ /* Close the writing side, because that's the one for the agent */
+ safe_close(pipe_fd[1]);
+
+ if (r < 0)
+ log_error_errno(r, "Failed to fork TTY ask password agent: %m");
+ else
+ /* Wait until the agent closes the fd */
+ fd_wait_for_event(pipe_fd[0], POLLHUP, USEC_INFINITY);
+
+ safe_close(pipe_fd[0]);
+
+ return r;
+}
+
+void polkit_agent_close(void) {
+
+ if (agent_pid <= 0)
+ return;
+
+ /* Inform agent that we are done */
+ (void) kill_and_sigcont(agent_pid, SIGTERM);
+ (void) wait_for_terminate(agent_pid, NULL);
+ agent_pid = 0;
+}
+
+#else
+
+int polkit_agent_open(void) {
+ return 0;
+}
+
+void polkit_agent_close(void) {
+}
+
+#endif
diff --git a/src/shared/spawn-polkit-agent.h b/src/shared/spawn-polkit-agent.h
new file mode 100644
index 0000000..190b970
--- /dev/null
+++ b/src/shared/spawn-polkit-agent.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "bus-util.h"
+
+int polkit_agent_open(void);
+void polkit_agent_close(void);
+
+static inline int polkit_agent_open_if_enabled(
+ BusTransport transport,
+ bool ask_password) {
+
+ /* Open the polkit agent as a child process if necessary */
+
+ if (transport != BUS_TRANSPORT_LOCAL)
+ return 0;
+
+ if (!ask_password)
+ return 0;
+
+ return polkit_agent_open();
+}
diff --git a/src/shared/specifier.c b/src/shared/specifier.c
new file mode 100644
index 0000000..b8f7537
--- /dev/null
+++ b/src/shared/specifier.c
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/utsname.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+/*
+ * Generic infrastructure for replacing %x style specifiers in
+ * strings. Will call a callback for each replacement.
+ */
+
+/* Any ASCII character or digit: our pool of potential specifiers,
+ * and "%" used for escaping. */
+#define POSSIBLE_SPECIFIERS ALPHANUMERICAL "%"
+
+int specifier_printf(const char *text, const Specifier table[], const void *userdata, char **_ret) {
+ size_t l, allocated = 0;
+ _cleanup_free_ char *ret = NULL;
+ char *t;
+ const char *f;
+ bool percent = false;
+ int r;
+
+ assert(text);
+ assert(table);
+
+ l = strlen(text);
+ if (!GREEDY_REALLOC(ret, allocated, l + 1))
+ return -ENOMEM;
+ t = ret;
+
+ for (f = text; *f; f++, l--)
+ if (percent) {
+ if (*f == '%')
+ *(t++) = '%';
+ else {
+ const Specifier *i;
+
+ for (i = table; i->specifier; i++)
+ if (i->specifier == *f)
+ break;
+
+ if (i->lookup) {
+ _cleanup_free_ char *w = NULL;
+ size_t k, j;
+
+ r = i->lookup(i->specifier, i->data, userdata, &w);
+ if (r < 0)
+ return r;
+
+ j = t - ret;
+ k = strlen(w);
+
+ if (!GREEDY_REALLOC(ret, allocated, j + k + l + 1))
+ return -ENOMEM;
+ memcpy(ret + j, w, k);
+ t = ret + j + k;
+ } else if (strchr(POSSIBLE_SPECIFIERS, *f))
+ /* Oops, an unknown specifier. */
+ return -EBADSLT;
+ else {
+ *(t++) = '%';
+ *(t++) = *f;
+ }
+ }
+
+ percent = false;
+ } else if (*f == '%')
+ percent = true;
+ else
+ *(t++) = *f;
+
+ /* If string ended with a stray %, also end with % */
+ if (percent)
+ *(t++) = '%';
+ *(t++) = 0;
+
+ /* Try to deallocate unused bytes, but don't sweat it too much */
+ if ((size_t)(t - ret) < allocated) {
+ t = realloc(ret, t - ret);
+ if (t)
+ ret = t;
+ }
+
+ *_ret = TAKE_PTR(ret);
+ return 0;
+}
+
+/* Generic handler for simple string replacements */
+
+int specifier_string(char specifier, const void *data, const void *userdata, char **ret) {
+ char *n;
+
+ n = strdup(strempty(data));
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_machine_id(char specifier, const void *data, const void *userdata, char **ret) {
+ sd_id128_t id;
+ char *n;
+ int r;
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return r;
+
+ n = new(char, 33);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = sd_id128_to_string(id, n);
+ return 0;
+}
+
+int specifier_boot_id(char specifier, const void *data, const void *userdata, char **ret) {
+ sd_id128_t id;
+ char *n;
+ int r;
+
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return r;
+
+ n = new(char, 33);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = sd_id128_to_string(id, n);
+ return 0;
+}
+
+int specifier_host_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *n;
+
+ n = gethostname_malloc();
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_kernel_release(char specifier, const void *data, const void *userdata, char **ret) {
+ struct utsname uts;
+ char *n;
+ int r;
+
+ r = uname(&uts);
+ if (r < 0)
+ return -errno;
+
+ n = strdup(uts.release);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_group_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *t;
+
+ t = gid_to_name(getgid());
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+int specifier_group_id(char specifier, const void *data, const void *userdata, char **ret) {
+ if (asprintf(ret, UID_FMT, getgid()) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int specifier_user_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *t;
+
+ /* If we are UID 0 (root), this will not result in NSS, otherwise it might. This is good, as we want to be able
+ * to run this in PID 1, where our user ID is 0, but where NSS lookups are not allowed.
+
+ * We don't use getusername_malloc() here, because we don't want to look at $USER, to remain consistent with
+ * specifer_user_id() below.
+ */
+
+ t = uid_to_name(getuid());
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+int specifier_user_id(char specifier, const void *data, const void *userdata, char **ret) {
+
+ if (asprintf(ret, UID_FMT, getuid()) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int specifier_user_home(char specifier, const void *data, const void *userdata, char **ret) {
+
+ /* On PID 1 (which runs as root) this will not result in NSS,
+ * which is good. See above */
+
+ return get_home_dir(ret);
+}
+
+int specifier_user_shell(char specifier, const void *data, const void *userdata, char **ret) {
+
+ /* On PID 1 (which runs as root) this will not result in NSS,
+ * which is good. See above */
+
+ return get_shell(ret);
+}
+
+int specifier_tmp_dir(char specifier, const void *data, const void *userdata, char **ret) {
+ const char *p;
+ char *copy;
+ int r;
+
+ r = tmp_dir(&p);
+ if (r < 0)
+ return r;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return 0;
+}
+
+int specifier_var_tmp_dir(char specifier, const void *data, const void *userdata, char **ret) {
+ const char *p;
+ char *copy;
+ int r;
+
+ r = var_tmp_dir(&p);
+ if (r < 0)
+ return r;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return 0;
+}
+
+int specifier_escape_strv(char **l, char ***ret) {
+ char **z, **p, **q;
+
+ assert(ret);
+
+ if (strv_isempty(l)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ z = new(char*, strv_length(l)+1);
+ if (!z)
+ return -ENOMEM;
+
+ for (p = l, q = z; *p; p++, q++) {
+
+ *q = specifier_escape(*p);
+ if (!*q) {
+ strv_free(z);
+ return -ENOMEM;
+ }
+ }
+
+ *q = NULL;
+ *ret = z;
+
+ return 0;
+}
diff --git a/src/shared/specifier.h b/src/shared/specifier.h
new file mode 100644
index 0000000..d0221ef
--- /dev/null
+++ b/src/shared/specifier.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "string-util.h"
+
+typedef int (*SpecifierCallback)(char specifier, const void *data, const void *userdata, char **ret);
+
+typedef struct Specifier {
+ const char specifier;
+ const SpecifierCallback lookup;
+ const void *data;
+} Specifier;
+
+int specifier_printf(const char *text, const Specifier table[], const void *userdata, char **ret);
+
+int specifier_string(char specifier, const void *data, const void *userdata, char **ret);
+
+int specifier_machine_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_boot_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_host_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_kernel_release(char specifier, const void *data, const void *userdata, char **ret);
+
+int specifier_group_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_group_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_home(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_shell(char specifier, const void *data, const void *userdata, char **ret);
+
+int specifier_tmp_dir(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_var_tmp_dir(char specifier, const void *data, const void *userdata, char **ret);
+
+static inline char* specifier_escape(const char *string) {
+ return strreplace(string, "%", "%%");
+}
+
+int specifier_escape_strv(char **l, char ***ret);
diff --git a/src/shared/switch-root.c b/src/shared/switch-root.c
new file mode 100644
index 0000000..dbb4622
--- /dev/null
+++ b/src/shared/switch-root.c
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "base-filesystem.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "switch-root.h"
+#include "user-util.h"
+#include "util.h"
+
+int switch_root(const char *new_root,
+ const char *old_root_after, /* path below the new root, where to place the old root after the transition */
+ bool unmount_old_root,
+ unsigned long mount_flags) { /* MS_MOVE or MS_BIND */
+
+ _cleanup_free_ char *resolved_old_root_after = NULL;
+ _cleanup_close_ int old_root_fd = -1;
+ bool old_root_remove;
+ const char *i;
+ int r;
+
+ assert(new_root);
+ assert(old_root_after);
+
+ if (path_equal(new_root, "/"))
+ return 0;
+
+ /* Check if we shall remove the contents of the old root */
+ old_root_remove = in_initrd();
+ if (old_root_remove) {
+ old_root_fd = open("/", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
+ if (old_root_fd < 0)
+ return log_error_errno(errno, "Failed to open root directory: %m");
+ }
+
+ /* Determine where we shall place the old root after the transition */
+ r = chase_symlinks(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after);
+ if (r == 0) /* Doesn't exist yet. Let's create it */
+ (void) mkdir_p_label(resolved_old_root_after, 0755);
+
+ /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted MS_SHARED. Hence
+ * remount them MS_PRIVATE here as a work-around.
+ *
+ * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */
+ if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0)
+ return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m");
+
+ FOREACH_STRING(i, "/sys", "/dev", "/run", "/proc") {
+ _cleanup_free_ char *chased = NULL;
+
+ r = chase_symlinks(i, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &chased);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, i);
+ if (r > 0) {
+ /* Already exists. Let's see if it is a mount point already. */
+ r = path_is_mount_point(chased, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased);
+ if (r > 0) /* If it is already mounted, then do nothing */
+ continue;
+ } else
+ /* Doesn't exist yet? */
+ (void) mkdir_p_label(chased, 0755);
+
+ if (mount(i, chased, NULL, mount_flags, NULL) < 0)
+ return log_error_errno(errno, "Failed to mount %s to %s: %m", i, chased);
+ }
+
+ /* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants
+ * them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error
+ * and switch_root() nevertheless. */
+ (void) base_filesystem_create(new_root, UID_INVALID, GID_INVALID);
+
+ if (chdir(new_root) < 0)
+ return log_error_errno(errno, "Failed to change directory to %s: %m", new_root);
+
+ /* We first try a pivot_root() so that we can umount the old root dir. In many cases (i.e. where rootfs is /),
+ * that's not possible however, and hence we simply overmount root */
+ if (pivot_root(new_root, resolved_old_root_after) >= 0) {
+
+ /* Immediately get rid of the old root, if detach_oldroot is set.
+ * Since we are running off it we need to do this lazily. */
+ if (unmount_old_root) {
+ r = umount_recursive(old_root_after, MNT_DETACH);
+ if (r < 0)
+ log_warning_errno(r, "Failed to unmount old root directory tree, ignoring: %m");
+ }
+
+ } else if (mount(new_root, "/", NULL, MS_MOVE, NULL) < 0)
+ return log_error_errno(errno, "Failed to move %s to /: %m", new_root);
+
+ if (chroot(".") < 0)
+ return log_error_errno(errno, "Failed to change root: %m");
+
+ if (chdir("/") < 0)
+ return log_error_errno(errno, "Failed to change directory: %m");
+
+ if (old_root_fd >= 0) {
+ struct stat rb;
+
+ if (fstat(old_root_fd, &rb) < 0)
+ log_warning_errno(errno, "Failed to stat old root directory, leaving: %m");
+ else {
+ (void) rm_rf_children(old_root_fd, 0, &rb);
+ old_root_fd = -1;
+ }
+ }
+
+ return 0;
+}
diff --git a/src/shared/switch-root.h b/src/shared/switch-root.h
new file mode 100644
index 0000000..f4d48cb
--- /dev/null
+++ b/src/shared/switch-root.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+int switch_root(const char *new_root, const char *oldroot, bool detach_oldroot, unsigned long mountflags);
diff --git a/src/shared/sysctl-util.c b/src/shared/sysctl-util.c
new file mode 100644
index 0000000..480e6c3
--- /dev/null
+++ b/src/shared/sysctl-util.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "sysctl-util.h"
+
+char *sysctl_normalize(char *s) {
+ char *n;
+
+ n = strpbrk(s, "/.");
+ /* If the first separator is a slash, the path is
+ * assumed to be normalized and slashes remain slashes
+ * and dots remains dots. */
+ if (!n || *n == '/')
+ return s;
+
+ /* Otherwise, dots become slashes and slashes become
+ * dots. Fun. */
+ while (n) {
+ if (*n == '.')
+ *n = '/';
+ else
+ *n = '.';
+
+ n = strpbrk(n + 1, "/.");
+ }
+
+ return s;
+}
+
+int sysctl_write(const char *property, const char *value) {
+ char *p;
+ _cleanup_close_ int fd = -1;
+
+ assert(property);
+ assert(value);
+
+ log_debug("Setting '%s' to '%.*s'.", property, (int) strcspn(value, NEWLINE), value);
+
+ p = strjoina("/proc/sys/", property);
+ fd = open(p, O_WRONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (!endswith(value, "\n"))
+ value = strjoina(value, "\n");
+
+ if (write(fd, value, strlen(value)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int sysctl_read(const char *property, char **content) {
+ char *p;
+
+ assert(property);
+ assert(content);
+
+ p = strjoina("/proc/sys/", property);
+ return read_full_file(p, content, NULL);
+}
diff --git a/src/shared/sysctl-util.h b/src/shared/sysctl-util.h
new file mode 100644
index 0000000..fd7c78b
--- /dev/null
+++ b/src/shared/sysctl-util.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+char *sysctl_normalize(char *s);
+int sysctl_read(const char *property, char **value);
+int sysctl_write(const char *property, const char *value);
+
diff --git a/src/shared/test-tables.h b/src/shared/test-tables.h
new file mode 100644
index 0000000..4eeda3b
--- /dev/null
+++ b/src/shared/test-tables.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef const char* (*lookup_t)(int);
+typedef int (*reverse_t)(const char*);
+
+static inline void _test_table(const char *name,
+ lookup_t lookup,
+ reverse_t reverse,
+ int size,
+ bool sparse) {
+ int i, boring = 0;
+
+ for (i = -1; i < size + 1; i++) {
+ const char* val = lookup(i);
+ int rev;
+
+ if (val) {
+ rev = reverse(val);
+ boring = 0;
+ } else {
+ rev = reverse("--no-such--value----");
+ boring += i >= 0;
+ }
+
+ if (boring < 1 || i == size)
+ printf("%s: %d → %s → %d\n", name, i, val, rev);
+ else if (boring == 1)
+ printf("%*s ...\n", (int) strlen(name), "");
+
+ assert_se(!(i >= 0 && i < size ?
+ sparse ? rev != i && rev != -1 : val == NULL || rev != i :
+ val != NULL || rev != -1));
+ }
+}
+
+#define test_table(lower, upper) \
+ _test_table(STRINGIFY(lower), lower##_to_string, lower##_from_string, _##upper##_MAX, false)
+
+#define test_table_sparse(lower, upper) \
+ _test_table(STRINGIFY(lower), lower##_to_string, lower##_from_string, _##upper##_MAX, true)
diff --git a/src/shared/tests.c b/src/shared/tests.c
new file mode 100644
index 0000000..11ea12e
--- /dev/null
+++ b/src/shared/tests.c
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <util.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the POSIX
+ * version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "path-util.h"
+#include "strv.h"
+#include "tests.h"
+
+char* setup_fake_runtime_dir(void) {
+ char t[] = "/tmp/fake-xdg-runtime-XXXXXX", *p;
+
+ assert_se(mkdtemp(t));
+ assert_se(setenv("XDG_RUNTIME_DIR", t, 1) >= 0);
+ assert_se(p = strdup(t));
+
+ return p;
+}
+
+static void load_testdata_env(void) {
+ static bool called = false;
+ _cleanup_free_ char *s = NULL;
+ _cleanup_free_ char *envpath = NULL;
+ _cleanup_strv_free_ char **pairs = NULL;
+ char **k, **v;
+
+ if (called)
+ return;
+ called = true;
+
+ assert_se(readlink_and_make_absolute("/proc/self/exe", &s) >= 0);
+ dirname(s);
+
+ envpath = path_join(s, "systemd-runtest.env");
+ if (load_env_file_pairs(NULL, envpath, &pairs) < 0)
+ return;
+
+ STRV_FOREACH_PAIR(k, v, pairs)
+ setenv(*k, *v, 0);
+}
+
+const char* get_testdata_dir(void) {
+ const char *env;
+
+ load_testdata_env();
+
+ /* if the env var is set, use that */
+ env = getenv("SYSTEMD_TEST_DATA");
+ if (!env)
+ env = SYSTEMD_TEST_DATA;
+ if (access(env, F_OK) < 0) {
+ fprintf(stderr, "ERROR: $SYSTEMD_TEST_DATA directory [%s] does not exist\n", env);
+ exit(EXIT_FAILURE);
+ }
+
+ return env;
+}
+
+const char* get_catalog_dir(void) {
+ const char *env;
+
+ load_testdata_env();
+
+ /* if the env var is set, use that */
+ env = getenv("SYSTEMD_CATALOG_DIR");
+ if (!env)
+ env = SYSTEMD_CATALOG_DIR;
+ if (access(env, F_OK) < 0) {
+ fprintf(stderr, "ERROR: $SYSTEMD_CATALOG_DIR directory [%s] does not exist\n", env);
+ exit(EXIT_FAILURE);
+ }
+ return env;
+}
+
+bool slow_tests_enabled(void) {
+ int r;
+
+ r = getenv_bool("SYSTEMD_SLOW_TESTS");
+ if (r >= 0)
+ return r;
+
+ if (r != -ENXIO)
+ log_warning_errno(r, "Cannot parse $SYSTEMD_SLOW_TESTS, ignoring.");
+ return SYSTEMD_SLOW_TESTS_DEFAULT;
+}
+
+void test_setup_logging(int level) {
+ log_set_max_level(level);
+ log_parse_environment();
+ log_open();
+}
+
+int log_tests_skipped(const char *message) {
+ log_notice("%s: %s, skipping tests.",
+ program_invocation_short_name, message);
+ return EXIT_TEST_SKIP;
+}
+
+int log_tests_skipped_errno(int r, const char *message) {
+ log_notice_errno(r, "%s: %s, skipping tests: %m",
+ program_invocation_short_name, message);
+ return EXIT_TEST_SKIP;
+}
+
+bool have_namespaces(void) {
+ siginfo_t si = {};
+ pid_t pid;
+
+ /* Checks whether namespaces are available. In some cases they aren't. We do this by calling unshare(), and we
+ * do so in a child process in order not to affect our own process. */
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ /* child */
+ if (unshare(CLONE_NEWNS) < 0)
+ _exit(EXIT_FAILURE);
+
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(waitid(P_PID, pid, &si, WEXITED) >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+
+ if (si.si_status == EXIT_SUCCESS)
+ return true;
+
+ if (si.si_status == EXIT_FAILURE)
+ return false;
+
+ assert_not_reached("unexpected exit code");
+}
diff --git a/src/shared/tests.h b/src/shared/tests.h
new file mode 100644
index 0000000..718196f
--- /dev/null
+++ b/src/shared/tests.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+char* setup_fake_runtime_dir(void);
+const char* get_testdata_dir(void);
+const char* get_catalog_dir(void);
+bool slow_tests_enabled(void);
+void test_setup_logging(int level);
+int log_tests_skipped(const char *message);
+int log_tests_skipped_errno(int r, const char *message);
+
+bool have_namespaces(void);
diff --git a/src/shared/tmpfile-util-label.c b/src/shared/tmpfile-util-label.c
new file mode 100644
index 0000000..c12d7c1
--- /dev/null
+++ b/src/shared/tmpfile-util-label.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/stat.h>
+
+#include "selinux-util.h"
+#include "tmpfile-util-label.h"
+#include "tmpfile-util.h"
+
+int fopen_temporary_label(
+ const char *target,
+ const char *path,
+ FILE **f,
+ char **temp_path) {
+
+ int r;
+
+ r = mac_selinux_create_file_prepare(target, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = fopen_temporary(path, f, temp_path);
+
+ mac_selinux_create_file_clear();
+
+ return r;
+}
diff --git a/src/shared/tmpfile-util-label.h b/src/shared/tmpfile-util-label.h
new file mode 100644
index 0000000..97a8751
--- /dev/null
+++ b/src/shared/tmpfile-util-label.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+/* These functions are split out of tmpfile-util.h (and not for example just flags to the functions they wrap) in order
+ * to optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but
+ * not for all */
+
+int fopen_temporary_label(const char *target, const char *path, FILE **f, char **temp_path);
diff --git a/src/shared/tomoyo-util.c b/src/shared/tomoyo-util.c
new file mode 100644
index 0000000..75c24d8
--- /dev/null
+++ b/src/shared/tomoyo-util.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "tomoyo-util.h"
+
+bool mac_tomoyo_use(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0)
+ cached_use = (access("/sys/kernel/security/tomoyo/version",
+ F_OK) == 0);
+
+ return cached_use;
+}
diff --git a/src/shared/tomoyo-util.h b/src/shared/tomoyo-util.h
new file mode 100644
index 0000000..06e8227
--- /dev/null
+++ b/src/shared/tomoyo-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+bool mac_tomoyo_use(void);
diff --git a/src/shared/udev-util.c b/src/shared/udev-util.c
new file mode 100644
index 0000000..4200032
--- /dev/null
+++ b/src/shared/udev-util.c
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "log.h"
+#include "parse-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "udev-util.h"
+#include "udev.h"
+
+static const char* const resolve_name_timing_table[_RESOLVE_NAME_TIMING_MAX] = {
+ [RESOLVE_NAME_NEVER] = "never",
+ [RESOLVE_NAME_LATE] = "late",
+ [RESOLVE_NAME_EARLY] = "early",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(resolve_name_timing, ResolveNameTiming);
+
+int udev_parse_config_full(
+ unsigned *ret_children_max,
+ usec_t *ret_exec_delay_usec,
+ usec_t *ret_event_timeout_usec,
+ ResolveNameTiming *ret_resolve_name_timing) {
+
+ _cleanup_free_ char *log_val = NULL, *children_max = NULL, *exec_delay = NULL, *event_timeout = NULL, *resolve_names = NULL;
+ int r;
+
+ r = parse_env_file(NULL, "/etc/udev/udev.conf",
+ "udev_log", &log_val,
+ "children_max", &children_max,
+ "exec_delay", &exec_delay,
+ "event_timeout", &event_timeout,
+ "resolve_names", &resolve_names);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (log_val) {
+ const char *log;
+ size_t n;
+
+ /* unquote */
+ n = strlen(log_val);
+ if (n >= 2 &&
+ ((log_val[0] == '"' && log_val[n-1] == '"') ||
+ (log_val[0] == '\'' && log_val[n-1] == '\''))) {
+ log_val[n - 1] = '\0';
+ log = log_val + 1;
+ } else
+ log = log_val;
+
+ /* we set the udev log level here explicitly, this is supposed
+ * to regulate the code in libudev/ and udev/. */
+ r = log_set_max_level_from_string_realm(LOG_REALM_UDEV, log);
+ if (r < 0)
+ log_debug_errno(r, "/etc/udev/udev.conf: failed to set udev log level '%s', ignoring: %m", log);
+ }
+
+ if (ret_children_max && children_max) {
+ r = safe_atou(children_max, ret_children_max);
+ if (r < 0)
+ log_notice_errno(r, "/etc/udev/udev.conf: failed to set parse children_max=%s, ignoring: %m", children_max);
+ }
+
+ if (ret_exec_delay_usec && exec_delay) {
+ r = parse_sec(exec_delay, ret_exec_delay_usec);
+ if (r < 0)
+ log_notice_errno(r, "/etc/udev/udev.conf: failed to set parse exec_delay=%s, ignoring: %m", exec_delay);
+ }
+
+ if (ret_event_timeout_usec && event_timeout) {
+ r = parse_sec(event_timeout, ret_event_timeout_usec);
+ if (r < 0)
+ log_notice_errno(r, "/etc/udev/udev.conf: failed to set parse event_timeout=%s, ignoring: %m", event_timeout);
+ }
+
+ if (ret_resolve_name_timing && resolve_names) {
+ ResolveNameTiming t;
+
+ t = resolve_name_timing_from_string(resolve_names);
+ if (t < 0)
+ log_notice("/etc/udev/udev.conf: failed to set parse resolve_names=%s, ignoring.", resolve_names);
+ else
+ *ret_resolve_name_timing = t;
+ }
+
+ return 0;
+}
+
+struct DeviceMonitorData {
+ const char *sysname;
+ sd_device *device;
+};
+
+static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ struct DeviceMonitorData *data = userdata;
+ const char *sysname;
+
+ assert(device);
+ assert(data);
+ assert(data->sysname);
+ assert(!data->device);
+
+ if (sd_device_get_sysname(device, &sysname) >= 0 && streq(sysname, data->sysname)) {
+ data->device = sd_device_ref(device);
+ return sd_event_exit(sd_device_monitor_get_event(monitor), 0);
+ }
+
+ return 0;
+}
+
+int device_wait_for_initialization(sd_device *device, const char *subsystem, sd_device **ret) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ struct DeviceMonitorData data = {};
+ int r;
+
+ assert(device);
+ assert(subsystem);
+
+ if (sd_device_get_is_initialized(device) > 0) {
+ if (ret)
+ *ret = sd_device_ref(device);
+ return 0;
+ }
+
+ assert_se(sd_device_get_sysname(device, &data.sysname) >= 0);
+
+ /* Wait until the device is initialized, so that we can get access to the ID_PATH property */
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default event: %m");
+
+ r = sd_device_monitor_new(&monitor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire monitor: %m");
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, subsystem, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add %s subsystem match to monitor: %m", subsystem);
+
+ r = sd_device_monitor_attach_event(monitor, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event to device monitor: %m");
+
+ r = sd_device_monitor_start(monitor, device_monitor_handler, &data);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ /* Check again, maybe things changed. Udev will re-read the db if the device wasn't initialized
+ * yet. */
+ if (sd_device_get_is_initialized(device) > 0) {
+ if (ret)
+ *ret = sd_device_ref(device);
+ return 0;
+ }
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+ if (ret)
+ *ret = TAKE_PTR(data.device);
+ return 0;
+}
diff --git a/src/shared/udev-util.h b/src/shared/udev-util.h
new file mode 100644
index 0000000..932c4a9
--- /dev/null
+++ b/src/shared/udev-util.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-device.h"
+
+#include "time-util.h"
+
+typedef enum ResolveNameTiming {
+ RESOLVE_NAME_NEVER,
+ RESOLVE_NAME_LATE,
+ RESOLVE_NAME_EARLY,
+ _RESOLVE_NAME_TIMING_MAX,
+ _RESOLVE_NAME_TIMING_INVALID = -1,
+} ResolveNameTiming;
+
+ResolveNameTiming resolve_name_timing_from_string(const char *s) _pure_;
+const char *resolve_name_timing_to_string(ResolveNameTiming i) _const_;
+
+int udev_parse_config_full(
+ unsigned *ret_children_max,
+ usec_t *ret_exec_delay_usec,
+ usec_t *ret_event_timeout_usec,
+ ResolveNameTiming *ret_resolve_name_timing);
+
+static inline int udev_parse_config(void) {
+ return udev_parse_config_full(NULL, NULL, NULL, NULL);
+}
+
+int device_wait_for_initialization(sd_device *device, const char *subsystem, sd_device **ret);
diff --git a/src/shared/uid-range.c b/src/shared/uid-range.c
new file mode 100644
index 0000000..5fa7bd2
--- /dev/null
+++ b/src/shared/uid-range.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "uid-range.h"
+#include "user-util.h"
+#include "util.h"
+
+static bool uid_range_intersect(UidRange *range, uid_t start, uid_t nr) {
+ assert(range);
+
+ return range->start <= start + nr &&
+ range->start + range->nr >= start;
+}
+
+static void uid_range_coalesce(UidRange **p, unsigned *n) {
+ unsigned i, j;
+
+ assert(p);
+ assert(n);
+
+ for (i = 0; i < *n; i++) {
+ for (j = i + 1; j < *n; j++) {
+ UidRange *x = (*p)+i, *y = (*p)+j;
+
+ if (uid_range_intersect(x, y->start, y->nr)) {
+ uid_t begin, end;
+
+ begin = MIN(x->start, y->start);
+ end = MAX(x->start + x->nr, y->start + y->nr);
+
+ x->start = begin;
+ x->nr = end - begin;
+
+ if (*n > j+1)
+ memmove(y, y+1, sizeof(UidRange) * (*n - j -1));
+
+ (*n)--;
+ j--;
+ }
+ }
+ }
+}
+
+static int uid_range_compare(const UidRange *a, const UidRange *b) {
+ int r;
+
+ r = CMP(a->start, b->start);
+ if (r != 0)
+ return r;
+
+ return CMP(a->nr, b->nr);
+}
+
+int uid_range_add(UidRange **p, unsigned *n, uid_t start, uid_t nr) {
+ bool found = false;
+ UidRange *x;
+ unsigned i;
+
+ assert(p);
+ assert(n);
+
+ if (nr <= 0)
+ return 0;
+
+ for (i = 0; i < *n; i++) {
+ x = (*p) + i;
+ if (uid_range_intersect(x, start, nr)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ uid_t begin, end;
+
+ begin = MIN(x->start, start);
+ end = MAX(x->start + x->nr, start + nr);
+
+ x->start = begin;
+ x->nr = end - begin;
+ } else {
+ UidRange *t;
+
+ t = reallocarray(*p, *n + 1, sizeof(UidRange));
+ if (!t)
+ return -ENOMEM;
+
+ *p = t;
+ x = t + ((*n) ++);
+
+ x->start = start;
+ x->nr = nr;
+ }
+
+ typesafe_qsort(*p, *n, uid_range_compare);
+ uid_range_coalesce(p, n);
+
+ return *n;
+}
+
+int uid_range_add_str(UidRange **p, unsigned *n, const char *s) {
+ uid_t start, nr;
+ const char *t;
+ int r;
+
+ assert(p);
+ assert(n);
+ assert(s);
+
+ t = strchr(s, '-');
+ if (t) {
+ char *b;
+ uid_t end;
+
+ b = strndupa(s, t - s);
+ r = parse_uid(b, &start);
+ if (r < 0)
+ return r;
+
+ r = parse_uid(t+1, &end);
+ if (r < 0)
+ return r;
+
+ if (end < start)
+ return -EINVAL;
+
+ nr = end - start + 1;
+ } else {
+ r = parse_uid(s, &start);
+ if (r < 0)
+ return r;
+
+ nr = 1;
+ }
+
+ return uid_range_add(p, n, start, nr);
+}
+
+int uid_range_next_lower(const UidRange *p, unsigned n, uid_t *uid) {
+ uid_t closest = UID_INVALID, candidate;
+ unsigned i;
+
+ assert(p);
+ assert(uid);
+
+ candidate = *uid - 1;
+
+ for (i = 0; i < n; i++) {
+ uid_t begin, end;
+
+ begin = p[i].start;
+ end = p[i].start + p[i].nr - 1;
+
+ if (candidate >= begin && candidate <= end) {
+ *uid = candidate;
+ return 1;
+ }
+
+ if (end < candidate)
+ closest = end;
+ }
+
+ if (closest == UID_INVALID)
+ return -EBUSY;
+
+ *uid = closest;
+ return 1;
+}
+
+bool uid_range_contains(const UidRange *p, unsigned n, uid_t uid) {
+ unsigned i;
+
+ assert(p);
+ assert(uid);
+
+ for (i = 0; i < n; i++)
+ if (uid >= p[i].start && uid < p[i].start + p[i].nr)
+ return true;
+
+ return false;
+}
diff --git a/src/shared/uid-range.h b/src/shared/uid-range.h
new file mode 100644
index 0000000..49ba382
--- /dev/null
+++ b/src/shared/uid-range.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+typedef struct UidRange {
+ uid_t start, nr;
+} UidRange;
+
+int uid_range_add(UidRange **p, unsigned *n, uid_t start, uid_t nr);
+int uid_range_add_str(UidRange **p, unsigned *n, const char *s);
+
+int uid_range_next_lower(const UidRange *p, unsigned n, uid_t *uid);
+bool uid_range_contains(const UidRange *p, unsigned n, uid_t uid);
diff --git a/src/shared/utmp-wtmp.c b/src/shared/utmp-wtmp.c
new file mode 100644
index 0000000..ef9427f
--- /dev/null
+++ b/src/shared/utmp-wtmp.c
@@ -0,0 +1,427 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+#include <utmpx.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "utmp-wtmp.h"
+
+int utmp_get_runlevel(int *runlevel, int *previous) {
+ struct utmpx *found, lookup = { .ut_type = RUN_LVL };
+ int r;
+ const char *e;
+
+ assert(runlevel);
+
+ /* If these values are set in the environment this takes
+ * precedence. Presumably, sysvinit does this to work around a
+ * race condition that would otherwise exist where we'd always
+ * go to disk and hence might read runlevel data that might be
+ * very new and does not apply to the current script being
+ * executed. */
+
+ e = getenv("RUNLEVEL");
+ if (e && e[0] > 0) {
+ *runlevel = e[0];
+
+ if (previous) {
+ /* $PREVLEVEL seems to be an Upstart thing */
+
+ e = getenv("PREVLEVEL");
+ if (e && e[0] > 0)
+ *previous = e[0];
+ else
+ *previous = 0;
+ }
+
+ return 0;
+ }
+
+ if (utmpxname(_PATH_UTMPX) < 0)
+ return -errno;
+
+ setutxent();
+
+ found = getutxid(&lookup);
+ if (!found)
+ r = -errno;
+ else {
+ int a, b;
+
+ a = found->ut_pid & 0xFF;
+ b = (found->ut_pid >> 8) & 0xFF;
+
+ *runlevel = a;
+ if (previous)
+ *previous = b;
+
+ r = 0;
+ }
+
+ endutxent();
+
+ return r;
+}
+
+static void init_timestamp(struct utmpx *store, usec_t t) {
+ assert(store);
+
+ if (t <= 0)
+ t = now(CLOCK_REALTIME);
+
+ store->ut_tv.tv_sec = t / USEC_PER_SEC;
+ store->ut_tv.tv_usec = t % USEC_PER_SEC;
+}
+
+static void init_entry(struct utmpx *store, usec_t t) {
+ struct utsname uts = {};
+
+ assert(store);
+
+ init_timestamp(store, t);
+
+ if (uname(&uts) >= 0)
+ strncpy(store->ut_host, uts.release, sizeof(store->ut_host));
+
+ strncpy(store->ut_line, "~", sizeof(store->ut_line)); /* or ~~ ? */
+ strncpy(store->ut_id, "~~", sizeof(store->ut_id));
+}
+
+static int write_entry_utmp(const struct utmpx *store) {
+ int r;
+
+ assert(store);
+
+ /* utmp is similar to wtmp, but there is only one entry for
+ * each entry type resp. user; i.e. basically a key/value
+ * table. */
+
+ if (utmpxname(_PATH_UTMPX) < 0)
+ return -errno;
+
+ setutxent();
+
+ if (!pututxline(store))
+ r = -errno;
+ else
+ r = 0;
+
+ endutxent();
+
+ return r;
+}
+
+static int write_entry_wtmp(const struct utmpx *store) {
+ assert(store);
+
+ /* wtmp is a simple append-only file where each entry is
+ simply appended to the end; i.e. basically a log. */
+
+ errno = 0;
+ updwtmpx(_PATH_WTMPX, store);
+ return -errno;
+}
+
+static int write_utmp_wtmp(const struct utmpx *store_utmp, const struct utmpx *store_wtmp) {
+ int r, s;
+
+ r = write_entry_utmp(store_utmp);
+ s = write_entry_wtmp(store_wtmp);
+
+ if (r >= 0)
+ r = s;
+
+ /* If utmp/wtmp have been disabled, that's a good thing, hence
+ * ignore the errors */
+ if (r == -ENOENT)
+ r = 0;
+
+ return r;
+}
+
+static int write_entry_both(const struct utmpx *store) {
+ return write_utmp_wtmp(store, store);
+}
+
+int utmp_put_shutdown(void) {
+ struct utmpx store = {};
+
+ init_entry(&store, 0);
+
+ store.ut_type = RUN_LVL;
+ strncpy(store.ut_user, "shutdown", sizeof(store.ut_user));
+
+ return write_entry_both(&store);
+}
+
+int utmp_put_reboot(usec_t t) {
+ struct utmpx store = {};
+
+ init_entry(&store, t);
+
+ store.ut_type = BOOT_TIME;
+ strncpy(store.ut_user, "reboot", sizeof(store.ut_user));
+
+ return write_entry_both(&store);
+}
+
+_pure_ static const char *sanitize_id(const char *id) {
+ size_t l;
+
+ assert(id);
+ l = strlen(id);
+
+ if (l <= sizeof(((struct utmpx*) NULL)->ut_id))
+ return id;
+
+ return id + l - sizeof(((struct utmpx*) NULL)->ut_id);
+}
+
+int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user) {
+ struct utmpx store = {
+ .ut_type = INIT_PROCESS,
+ .ut_pid = pid,
+ .ut_session = sid,
+ };
+ int r;
+
+ assert(id);
+
+ init_timestamp(&store, 0);
+
+ /* ut_id needs only be nul-terminated if it is shorter than sizeof(ut_id) */
+ strncpy(store.ut_id, sanitize_id(id), sizeof(store.ut_id));
+
+ if (line)
+ strncpy(store.ut_line, basename(line), sizeof(store.ut_line));
+
+ r = write_entry_both(&store);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(ut_type, LOGIN_PROCESS, USER_PROCESS)) {
+ store.ut_type = LOGIN_PROCESS;
+ r = write_entry_both(&store);
+ if (r < 0)
+ return r;
+ }
+
+ if (ut_type == USER_PROCESS) {
+ store.ut_type = USER_PROCESS;
+ strncpy(store.ut_user, user, sizeof(store.ut_user)-1);
+ r = write_entry_both(&store);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int utmp_put_dead_process(const char *id, pid_t pid, int code, int status) {
+ struct utmpx lookup = {
+ .ut_type = INIT_PROCESS /* looks for DEAD_PROCESS, LOGIN_PROCESS, USER_PROCESS, too */
+ }, store, store_wtmp, *found;
+
+ assert(id);
+
+ setutxent();
+
+ /* ut_id needs only be nul-terminated if it is shorter than sizeof(ut_id) */
+ strncpy(lookup.ut_id, sanitize_id(id), sizeof(lookup.ut_id));
+
+ found = getutxid(&lookup);
+ if (!found)
+ return 0;
+
+ if (found->ut_pid != pid)
+ return 0;
+
+ memcpy(&store, found, sizeof(store));
+ store.ut_type = DEAD_PROCESS;
+ store.ut_exit.e_termination = code;
+ store.ut_exit.e_exit = status;
+
+ zero(store.ut_user);
+ zero(store.ut_host);
+ zero(store.ut_tv);
+
+ memcpy(&store_wtmp, &store, sizeof(store_wtmp));
+ /* wtmp wants the current time */
+ init_timestamp(&store_wtmp, 0);
+
+ return write_utmp_wtmp(&store, &store_wtmp);
+}
+
+int utmp_put_runlevel(int runlevel, int previous) {
+ struct utmpx store = {};
+ int r;
+
+ assert(runlevel > 0);
+
+ if (previous <= 0) {
+ /* Find the old runlevel automatically */
+
+ r = utmp_get_runlevel(&previous, NULL);
+ if (r < 0) {
+ if (r != -ESRCH)
+ return r;
+
+ previous = 0;
+ }
+ }
+
+ if (previous == runlevel)
+ return 0;
+
+ init_entry(&store, 0);
+
+ store.ut_type = RUN_LVL;
+ store.ut_pid = (runlevel & 0xFF) | ((previous & 0xFF) << 8);
+ strncpy(store.ut_user, "runlevel", sizeof(store.ut_user));
+
+ return write_entry_both(&store);
+}
+
+#define TIMEOUT_MSEC 50
+
+static int write_to_terminal(const char *tty, const char *message) {
+ _cleanup_close_ int fd = -1;
+ const char *p;
+ size_t left;
+ usec_t end;
+
+ assert(tty);
+ assert(message);
+
+ fd = open(tty, O_WRONLY|O_NONBLOCK|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0 || !isatty(fd))
+ return -errno;
+
+ p = message;
+ left = strlen(message);
+
+ end = now(CLOCK_MONOTONIC) + TIMEOUT_MSEC*USEC_PER_MSEC;
+
+ while (left > 0) {
+ ssize_t n;
+ struct pollfd pollfd = {
+ .fd = fd,
+ .events = POLLOUT,
+ };
+ usec_t t;
+ int k;
+
+ t = now(CLOCK_MONOTONIC);
+
+ if (t >= end)
+ return -ETIME;
+
+ k = poll(&pollfd, 1, (end - t) / USEC_PER_MSEC);
+ if (k < 0)
+ return -errno;
+
+ if (k == 0)
+ return -ETIME;
+
+ n = write(fd, p, left);
+ if (n < 0) {
+ if (errno == EAGAIN)
+ continue;
+
+ return -errno;
+ }
+
+ assert((size_t) n <= left);
+
+ p += n;
+ left -= n;
+ }
+
+ return 0;
+}
+
+int utmp_wall(
+ const char *message,
+ const char *username,
+ const char *origin_tty,
+ bool (*match_tty)(const char *tty, void *userdata),
+ void *userdata) {
+
+ _cleanup_free_ char *text = NULL, *hn = NULL, *un = NULL, *stdin_tty = NULL;
+ char date[FORMAT_TIMESTAMP_MAX];
+ struct utmpx *u;
+ int r;
+
+ hn = gethostname_malloc();
+ if (!hn)
+ return -ENOMEM;
+ if (!username) {
+ un = getlogname_malloc();
+ if (!un)
+ return -ENOMEM;
+ }
+
+ if (!origin_tty) {
+ getttyname_harder(STDIN_FILENO, &stdin_tty);
+ origin_tty = stdin_tty;
+ }
+
+ if (asprintf(&text,
+ "\a\r\n"
+ "Broadcast message from %s@%s%s%s (%s):\r\n\r\n"
+ "%s\r\n\r\n",
+ un ?: username, hn,
+ origin_tty ? " on " : "", strempty(origin_tty),
+ format_timestamp(date, sizeof(date), now(CLOCK_REALTIME)),
+ message) < 0)
+ return -ENOMEM;
+
+ setutxent();
+
+ r = 0;
+
+ while ((u = getutxent())) {
+ _cleanup_free_ char *buf = NULL;
+ const char *path;
+ int q;
+
+ if (u->ut_type != USER_PROCESS || u->ut_user[0] == 0)
+ continue;
+
+ /* this access is fine, because STRLEN("/dev/") << 32 (UT_LINESIZE) */
+ if (path_startswith(u->ut_line, "/dev/"))
+ path = u->ut_line;
+ else {
+ if (asprintf(&buf, "/dev/%.*s", (int) sizeof(u->ut_line), u->ut_line) < 0)
+ return -ENOMEM;
+
+ path = buf;
+ }
+
+ if (!match_tty || match_tty(path, userdata)) {
+ q = write_to_terminal(path, text);
+ if (q < 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
diff --git a/src/shared/utmp-wtmp.h b/src/shared/utmp-wtmp.h
new file mode 100644
index 0000000..9e433cf
--- /dev/null
+++ b/src/shared/utmp-wtmp.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "time-util.h"
+#include "util.h"
+
+#if ENABLE_UTMP
+int utmp_get_runlevel(int *runlevel, int *previous);
+
+int utmp_put_shutdown(void);
+int utmp_put_reboot(usec_t timestamp);
+int utmp_put_runlevel(int runlevel, int previous);
+
+int utmp_put_dead_process(const char *id, pid_t pid, int code, int status);
+int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user);
+
+int utmp_wall(
+ const char *message,
+ const char *username,
+ const char *origin_tty,
+ bool (*match_tty)(const char *tty, void *userdata),
+ void *userdata);
+
+#else /* ENABLE_UTMP */
+
+static inline int utmp_get_runlevel(int *runlevel, int *previous) {
+ return -ESRCH;
+}
+static inline int utmp_put_shutdown(void) {
+ return 0;
+}
+static inline int utmp_put_reboot(usec_t timestamp) {
+ return 0;
+}
+static inline int utmp_put_runlevel(int runlevel, int previous) {
+ return 0;
+}
+static inline int utmp_put_dead_process(const char *id, pid_t pid, int code, int status) {
+ return 0;
+}
+static inline int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user) {
+ return 0;
+}
+static inline int utmp_wall(
+ const char *message,
+ const char *username,
+ const char *origin_tty,
+ bool (*match_tty)(const char *tty, void *userdata),
+ void *userdata) {
+ return 0;
+}
+
+#endif /* ENABLE_UTMP */
diff --git a/src/shared/verbs.c b/src/shared/verbs.c
new file mode 100644
index 0000000..7c5dcb0
--- /dev/null
+++ b/src/shared/verbs.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "env-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "verbs.h"
+#include "virt.h"
+
+/* Wraps running_in_chroot() which is used in various places, but also adds an environment variable check so external
+ * processes can reliably force this on.
+ */
+bool running_in_chroot_or_offline(void) {
+ int r;
+
+ /* Added to support use cases like rpm-ostree, where from %post scripts we only want to execute "preset", but
+ * not "start"/"restart" for example.
+ *
+ * See docs/ENVIRONMENT.md for docs.
+ */
+ r = getenv_bool("SYSTEMD_OFFLINE");
+ if (r < 0 && r != -ENXIO)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_OFFLINE: %m");
+ else if (r >= 0)
+ return r > 0;
+
+ /* We've had this condition check for a long time which basically checks for legacy chroot case like Fedora's
+ * "mock", which is used for package builds. We don't want to try to start systemd services there, since
+ * without --new-chroot we don't even have systemd running, and even if we did, adding a concept of background
+ * daemons to builds would be an enormous change, requiring considering things like how the journal output is
+ * handled, etc. And there's really not a use case today for a build talking to a service.
+ *
+ * Note this call itself also looks for a different variable SYSTEMD_IGNORE_CHROOT=1.
+ */
+ r = running_in_chroot();
+ if (r < 0)
+ log_debug_errno(r, "running_in_chroot(): %m");
+
+ return r > 0;
+}
+
+int dispatch_verb(int argc, char *argv[], const Verb verbs[], void *userdata) {
+ const Verb *verb;
+ const char *name;
+ unsigned i;
+ int left, r;
+
+ assert(verbs);
+ assert(verbs[0].dispatch);
+ assert(argc >= 0);
+ assert(argv);
+ assert(argc >= optind);
+
+ left = argc - optind;
+ argv += optind;
+ optind = 0;
+ name = argv[0];
+
+ for (i = 0;; i++) {
+ bool found;
+
+ /* At the end of the list? */
+ if (!verbs[i].dispatch) {
+ if (name)
+ log_error("Unknown operation %s.", name);
+ else
+ log_error("Requires operation parameter.");
+ return -EINVAL;
+ }
+
+ if (name)
+ found = streq(name, verbs[i].verb);
+ else
+ found = verbs[i].flags & VERB_DEFAULT;
+
+ if (found) {
+ verb = &verbs[i];
+ break;
+ }
+ }
+
+ assert(verb);
+
+ if (!name)
+ left = 1;
+
+ if (verb->min_args != VERB_ANY &&
+ (unsigned) left < verb->min_args)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too few arguments.");
+
+ if (verb->max_args != VERB_ANY &&
+ (unsigned) left > verb->max_args)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ if ((verb->flags & VERB_ONLINE_ONLY) && running_in_chroot_or_offline()) {
+ if (name)
+ log_info("Running in chroot, ignoring request: %s", name);
+ else
+ log_info("Running in chroot, ignoring request.");
+ return 0;
+ }
+
+ if (verb->flags & VERB_MUST_BE_ROOT) {
+ r = must_be_root();
+ if (r < 0)
+ return r;
+ }
+
+ if (name)
+ return verb->dispatch(left, argv, userdata);
+ else {
+ char* fake[2] = {
+ (char*) verb->verb,
+ NULL
+ };
+
+ return verb->dispatch(1, fake, userdata);
+ }
+}
diff --git a/src/shared/verbs.h b/src/shared/verbs.h
new file mode 100644
index 0000000..010c0df
--- /dev/null
+++ b/src/shared/verbs.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#define VERB_ANY ((unsigned) -1)
+
+typedef enum VerbFlags {
+ VERB_DEFAULT = 1 << 0,
+ VERB_ONLINE_ONLY = 1 << 1,
+ VERB_MUST_BE_ROOT = 1 << 2,
+} VerbFlags;
+
+typedef struct {
+ const char *verb;
+ unsigned min_args, max_args;
+ VerbFlags flags;
+ int (* const dispatch)(int argc, char *argv[], void *userdata);
+} Verb;
+
+bool running_in_chroot_or_offline(void);
+
+int dispatch_verb(int argc, char *argv[], const Verb verbs[], void *userdata);
diff --git a/src/shared/vlan-util.c b/src/shared/vlan-util.c
new file mode 100644
index 0000000..2f9df7d
--- /dev/null
+++ b/src/shared/vlan-util.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "conf-parser.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "vlan-util.h"
+
+int parse_vlanid(const char *p, uint16_t *ret) {
+ uint16_t id;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ r = safe_atou16(p, &id);
+ if (r < 0)
+ return r;
+ if (!vlanid_is_valid(id))
+ return -ERANGE;
+
+ *ret = id;
+ return 0;
+}
+
+int config_parse_default_port_vlanid(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ uint16_t *id = data;
+
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(rvalue, "none")) {
+ *id = 0;
+ return 0;
+ }
+
+ return config_parse_vlanid(unit, filename, line, section, section_line,
+ lvalue, ltype, rvalue, data, userdata);
+}
+
+int config_parse_vlanid(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t *id = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_vlanid(rvalue, id);
+ if (r == -ERANGE) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "VLAN identifier outside of valid range 0…4094, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse VLAN identifier value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/src/shared/vlan-util.h b/src/shared/vlan-util.h
new file mode 100644
index 0000000..ebe4331
--- /dev/null
+++ b/src/shared/vlan-util.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+#include "conf-parser.h"
+
+#define VLANID_MAX 4094
+#define VLANID_INVALID UINT16_MAX
+
+/* Note that we permit VLAN Id 0 here, as that is apparently OK by the Linux kernel */
+static inline bool vlanid_is_valid(uint16_t id) {
+ return id <= VLANID_MAX;
+}
+
+int parse_vlanid(const char *p, uint16_t *ret);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_default_port_vlanid);
+CONFIG_PARSER_PROTOTYPE(config_parse_vlanid);
diff --git a/src/shared/volatile-util.c b/src/shared/volatile-util.c
new file mode 100644
index 0000000..4d75bc0
--- /dev/null
+++ b/src/shared/volatile-util.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "volatile-util.h"
+
+int query_volatile_mode(VolatileMode *ret) {
+ _cleanup_free_ char *mode = NULL;
+ VolatileMode m = VOLATILE_NO;
+ int r;
+
+ r = proc_cmdline_get_key("systemd.volatile", PROC_CMDLINE_VALUE_OPTIONAL, &mode);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ goto finish;
+
+ if (mode) {
+ m = volatile_mode_from_string(mode);
+ if (m < 0)
+ return -EINVAL;
+ } else
+ m = VOLATILE_YES;
+
+ r = 1;
+
+finish:
+ *ret = m;
+ return r;
+}
+
+static const char* const volatile_mode_table[_VOLATILE_MODE_MAX] = {
+ [VOLATILE_NO] = "no",
+ [VOLATILE_YES] = "yes",
+ [VOLATILE_STATE] = "state",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(volatile_mode, VolatileMode, VOLATILE_YES);
diff --git a/src/shared/volatile-util.h b/src/shared/volatile-util.h
new file mode 100644
index 0000000..8761c44
--- /dev/null
+++ b/src/shared/volatile-util.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef enum VolatileMode {
+ VOLATILE_NO,
+ VOLATILE_YES,
+ VOLATILE_STATE,
+ _VOLATILE_MODE_MAX,
+ _VOLATILE_MODE_INVALID = -1
+} VolatileMode;
+
+VolatileMode volatile_mode_from_string(const char *s);
+const char* volatile_mode_to_string(VolatileMode m);
+
+int query_volatile_mode(VolatileMode *ret);
diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c
new file mode 100644
index 0000000..c423af6
--- /dev/null
+++ b/src/shared/watchdog.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <syslog.h>
+#include <unistd.h>
+#include <linux/watchdog.h>
+
+#include "fd-util.h"
+#include "log.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "watchdog.h"
+
+static int watchdog_fd = -1;
+static char *watchdog_device = NULL;
+static usec_t watchdog_timeout = USEC_INFINITY;
+
+static int update_timeout(void) {
+ int r;
+
+ if (watchdog_fd < 0)
+ return 0;
+
+ if (watchdog_timeout == USEC_INFINITY)
+ return 0;
+ else if (watchdog_timeout == 0) {
+ int flags;
+
+ flags = WDIOS_DISABLECARD;
+ r = ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to disable hardware watchdog: %m");
+ } else {
+ int sec, flags;
+ char buf[FORMAT_TIMESPAN_MAX];
+
+ sec = (int) DIV_ROUND_UP(watchdog_timeout, USEC_PER_SEC);
+ r = ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &sec);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to set timeout to %is: %m", sec);
+
+ watchdog_timeout = (usec_t) sec * USEC_PER_SEC;
+ log_info("Set hardware watchdog to %s.", format_timespan(buf, sizeof(buf), watchdog_timeout, 0));
+
+ flags = WDIOS_ENABLECARD;
+ r = ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags);
+ if (r < 0) {
+ /* ENOTTY means the watchdog is always enabled so we're fine */
+ log_full(errno == ENOTTY ? LOG_DEBUG : LOG_WARNING,
+ "Failed to enable hardware watchdog: %m");
+ if (errno != ENOTTY)
+ return -errno;
+ }
+
+ r = ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to ping hardware watchdog: %m");
+ }
+
+ return 0;
+}
+
+static int open_watchdog(void) {
+ struct watchdog_info ident;
+
+ if (watchdog_fd >= 0)
+ return 0;
+
+ watchdog_fd = open(watchdog_device ?: "/dev/watchdog",
+ O_WRONLY|O_CLOEXEC);
+ if (watchdog_fd < 0)
+ return -errno;
+
+ if (ioctl(watchdog_fd, WDIOC_GETSUPPORT, &ident) >= 0)
+ log_info("Hardware watchdog '%s', version %x",
+ ident.identity,
+ ident.firmware_version);
+
+ return update_timeout();
+}
+
+int watchdog_set_device(char *path) {
+ return free_and_strdup(&watchdog_device, path);
+}
+
+int watchdog_set_timeout(usec_t *usec) {
+ int r;
+
+ watchdog_timeout = *usec;
+
+ /* If we didn't open the watchdog yet and didn't get any
+ * explicit timeout value set, don't do anything */
+ if (watchdog_fd < 0 && watchdog_timeout == USEC_INFINITY)
+ return 0;
+
+ if (watchdog_fd < 0)
+ r = open_watchdog();
+ else
+ r = update_timeout();
+
+ *usec = watchdog_timeout;
+
+ return r;
+}
+
+int watchdog_ping(void) {
+ int r;
+
+ if (watchdog_fd < 0) {
+ r = open_watchdog();
+ if (r < 0)
+ return r;
+ }
+
+ r = ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to ping hardware watchdog: %m");
+
+ return 0;
+}
+
+void watchdog_close(bool disarm) {
+ int r;
+
+ if (watchdog_fd < 0)
+ return;
+
+ if (disarm) {
+ int flags;
+
+ /* Explicitly disarm it */
+ flags = WDIOS_DISABLECARD;
+ r = ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags);
+ if (r < 0)
+ log_warning_errno(errno, "Failed to disable hardware watchdog: %m");
+
+ /* To be sure, use magic close logic, too */
+ for (;;) {
+ static const char v = 'V';
+
+ if (write(watchdog_fd, &v, 1) > 0)
+ break;
+
+ if (errno != EINTR) {
+ log_error_errno(errno, "Failed to disarm watchdog timer: %m");
+ break;
+ }
+ }
+ }
+
+ watchdog_fd = safe_close(watchdog_fd);
+}
diff --git a/src/shared/watchdog.h b/src/shared/watchdog.h
new file mode 100644
index 0000000..a345e4b
--- /dev/null
+++ b/src/shared/watchdog.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+#include "util.h"
+
+int watchdog_set_device(char *path);
+int watchdog_set_timeout(usec_t *usec);
+int watchdog_ping(void);
+void watchdog_close(bool disarm);
+
+static inline void watchdog_free_device(void) {
+ (void) watchdog_set_device(NULL);
+}
diff --git a/src/shared/web-util.c b/src/shared/web-util.c
new file mode 100644
index 0000000..edf650d
--- /dev/null
+++ b/src/shared/web-util.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdbool.h>
+
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+#include "web-util.h"
+
+bool http_etag_is_valid(const char *etag) {
+ if (isempty(etag))
+ return false;
+
+ if (!endswith(etag, "\""))
+ return false;
+
+ if (!STARTSWITH_SET(etag, "\"", "W/\""))
+ return false;
+
+ return true;
+}
+
+bool http_url_is_valid(const char *url) {
+ const char *p;
+
+ if (isempty(url))
+ return false;
+
+ p = STARTSWITH_SET(url, "http://", "https://");
+ if (!p)
+ return false;
+
+ if (isempty(p))
+ return false;
+
+ return ascii_is_valid(p);
+}
+
+bool documentation_url_is_valid(const char *url) {
+ const char *p;
+
+ if (isempty(url))
+ return false;
+
+ if (http_url_is_valid(url))
+ return true;
+
+ p = STARTSWITH_SET(url, "file:/", "info:", "man:");
+ if (isempty(p))
+ return false;
+
+ return ascii_is_valid(p);
+}
diff --git a/src/shared/web-util.h b/src/shared/web-util.h
new file mode 100644
index 0000000..c9e67e5
--- /dev/null
+++ b/src/shared/web-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+bool http_url_is_valid(const char *url) _pure_;
+
+bool documentation_url_is_valid(const char *url) _pure_;
+
+bool http_etag_is_valid(const char *etag);
diff --git a/src/shared/wireguard-netlink.h b/src/shared/wireguard-netlink.h
new file mode 100644
index 0000000..eb17091
--- /dev/null
+++ b/src/shared/wireguard-netlink.h
@@ -0,0 +1,179 @@
+#pragma once
+
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR MIT)
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Documentation
+ * =============
+ *
+ * The below enums and macros are for interfacing with WireGuard, using generic
+ * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two
+ * methods: get and set. Note that while they share many common attributes, these
+ * two functions actually accept a slightly different set of inputs and outputs.
+ *
+ * WG_CMD_GET_DEVICE
+ * -----------------
+ *
+ * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain
+ * one but not both of:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ *
+ * The kernel will then return several messages (NLM_F_MULTI) containing the following
+ * tree of nested items:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN
+ * WGDEVICE_A_PUBLIC_KEY: len WG_KEY_LEN
+ * WGDEVICE_A_LISTEN_PORT: NLA_U16
+ * WGDEVICE_A_FWMARK: NLA_U32
+ * WGDEVICE_A_PEERS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN
+ * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN
+ * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6
+ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16
+ * WGPEER_A_LAST_HANDSHAKE_TIME: struct timespec
+ * WGPEER_A_RX_BYTES: NLA_U64
+ * WGPEER_A_TX_BYTES: NLA_U64
+ * WGPEER_A_ALLOWEDIPS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGALLOWEDIP_A_FAMILY: NLA_U16
+ * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr
+ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8
+ * 1: NLA_NESTED
+ * ...
+ * 2: NLA_NESTED
+ * ...
+ * ...
+ * 1: NLA_NESTED
+ * ...
+ * ...
+ *
+ * It is possible that all of the allowed IPs of a single peer will not
+ * fit within a single netlink message. In that case, the same peer will
+ * be written in the following message, except it will only contain
+ * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several
+ * times in a row for the same peer. It is then up to the receiver to
+ * coalesce adjacent peers. Likewise, it is possible that all peers will
+ * not fit within a single message. So, subsequent peers will be sent
+ * in following messages, except those will only contain WGDEVICE_A_IFNAME
+ * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these
+ * messages to form the complete list of peers.
+ *
+ * Since this is an NLA_F_DUMP command, the final message will always be
+ * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message
+ * contains an integer error code. It is either zero or a negative error
+ * code corresponding to the errno.
+ *
+ * WG_CMD_SET_DEVICE
+ * -----------------
+ *
+ * May only be called via NLM_F_REQUEST. The command should contain the following
+ * tree of nested items, containing one but not both of WGDEVICE_A_IFINDEX
+ * and WGDEVICE_A_IFNAME:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current
+ * peers should be removed prior to adding the list below.
+ * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove
+ * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly
+ * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable
+ * WGDEVICE_A_PEERS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN
+ * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the specified peer
+ * should be removed rather than added/updated and/or
+ * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed IPs of
+ * this peer should be removed prior to adding the list below.
+ * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove
+ * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6
+ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable
+ * WGPEER_A_ALLOWEDIPS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGALLOWEDIP_A_FAMILY: NLA_U16
+ * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr
+ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8
+ * 1: NLA_NESTED
+ * ...
+ * 2: NLA_NESTED
+ * ...
+ * ...
+ * 1: NLA_NESTED
+ * ...
+ * ...
+ *
+ * It is possible that the amount of configuration data exceeds that of
+ * the maximum message length accepted by the kernel. In that case,
+ * several messages should be sent one after another, with each
+ * successive one filling in information not contained in the prior. Note
+ * that if WGDEVICE_F_REPLACE_PEERS is specified in the first message, it
+ * probably should not be specified in fragments that come after, so that
+ * the list of peers is only cleared the first time but appened after.
+ * Likewise for peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the
+ * first message of a peer, it likely should not be specified in subsequent
+ * fragments.
+ *
+ * If an error occurs, NLMSG_ERROR will reply containing an errno.
+ */
+
+#define WG_GENL_NAME "wireguard"
+#define WG_GENL_VERSION 1
+
+#define WG_KEY_LEN 32
+
+enum wg_cmd {
+ WG_CMD_GET_DEVICE,
+ WG_CMD_SET_DEVICE,
+ __WG_CMD_MAX
+};
+#define WG_CMD_MAX (__WG_CMD_MAX - 1)
+
+enum wgdevice_flag {
+ WGDEVICE_F_REPLACE_PEERS = 1U << 0
+};
+enum wgdevice_attribute {
+ WGDEVICE_A_UNSPEC,
+ WGDEVICE_A_IFINDEX,
+ WGDEVICE_A_IFNAME,
+ WGDEVICE_A_PRIVATE_KEY,
+ WGDEVICE_A_PUBLIC_KEY,
+ WGDEVICE_A_FLAGS,
+ WGDEVICE_A_LISTEN_PORT,
+ WGDEVICE_A_FWMARK,
+ WGDEVICE_A_PEERS,
+ __WGDEVICE_A_LAST
+};
+#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1)
+
+enum wgpeer_flag {
+ WGPEER_F_REMOVE_ME = 1U << 0,
+ WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1
+};
+enum wgpeer_attribute {
+ WGPEER_A_UNSPEC,
+ WGPEER_A_PUBLIC_KEY,
+ WGPEER_A_PRESHARED_KEY,
+ WGPEER_A_FLAGS,
+ WGPEER_A_ENDPOINT,
+ WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
+ WGPEER_A_LAST_HANDSHAKE_TIME,
+ WGPEER_A_RX_BYTES,
+ WGPEER_A_TX_BYTES,
+ WGPEER_A_ALLOWEDIPS,
+ __WGPEER_A_LAST
+};
+#define WGPEER_A_MAX (__WGPEER_A_LAST - 1)
+
+enum wgallowedip_attribute {
+ WGALLOWEDIP_A_UNSPEC,
+ WGALLOWEDIP_A_FAMILY,
+ WGALLOWEDIP_A_IPADDR,
+ WGALLOWEDIP_A_CIDR_MASK,
+ __WGALLOWEDIP_A_LAST
+};
+#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1)
diff --git a/src/shared/xml.c b/src/shared/xml.c
new file mode 100644
index 0000000..2709076
--- /dev/null
+++ b/src/shared/xml.c
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "macro.h"
+#include "string-util.h"
+#include "xml.h"
+
+enum {
+ STATE_NULL,
+ STATE_TEXT,
+ STATE_TAG,
+ STATE_ATTRIBUTE,
+};
+
+static void inc_lines(unsigned *line, const char *s, size_t n) {
+ const char *p = s;
+
+ if (!line)
+ return;
+
+ for (;;) {
+ const char *f;
+
+ f = memchr(p, '\n', n);
+ if (!f)
+ return;
+
+ n -= (f - p) + 1;
+ p = f + 1;
+ (*line)++;
+ }
+}
+
+/* We don't actually do real XML here. We only read a simplistic
+ * subset, that is a bit less strict that XML and lacks all the more
+ * complex features, like entities, or namespaces. However, we do
+ * support some HTML5-like simplifications */
+
+int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
+ const char *c, *e, *b;
+ char *ret;
+ int t;
+
+ assert(p);
+ assert(*p);
+ assert(name);
+ assert(state);
+
+ t = PTR_TO_INT(*state);
+ c = *p;
+
+ if (t == STATE_NULL) {
+ if (line)
+ *line = 1;
+ t = STATE_TEXT;
+ }
+
+ for (;;) {
+ if (*c == 0)
+ return XML_END;
+
+ switch (t) {
+
+ case STATE_TEXT: {
+ int x;
+
+ e = strchrnul(c, '<');
+ if (e > c) {
+ /* More text... */
+ ret = strndup(c, e - c);
+ if (!ret)
+ return -ENOMEM;
+
+ inc_lines(line, c, e - c);
+
+ *name = ret;
+ *p = e;
+ *state = INT_TO_PTR(STATE_TEXT);
+
+ return XML_TEXT;
+ }
+
+ assert(*e == '<');
+ b = c + 1;
+
+ if (startswith(b, "!--")) {
+ /* A comment */
+ e = strstr(b + 3, "-->");
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, b, e + 3 - b);
+
+ c = e + 3;
+ continue;
+ }
+
+ if (*b == '?') {
+ /* Processing instruction */
+
+ e = strstr(b + 1, "?>");
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, b, e + 2 - b);
+
+ c = e + 2;
+ continue;
+ }
+
+ if (*b == '!') {
+ /* DTD */
+
+ e = strchr(b + 1, '>');
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, b, e + 1 - b);
+
+ c = e + 1;
+ continue;
+ }
+
+ if (*b == '/') {
+ /* A closing tag */
+ x = XML_TAG_CLOSE;
+ b++;
+ } else
+ x = XML_TAG_OPEN;
+
+ e = strpbrk(b, WHITESPACE "/>");
+ if (!e)
+ return -EINVAL;
+
+ ret = strndup(b, e - b);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = e;
+ *state = INT_TO_PTR(STATE_TAG);
+
+ return x;
+ }
+
+ case STATE_TAG:
+
+ b = c + strspn(c, WHITESPACE);
+ if (*b == 0)
+ return -EINVAL;
+
+ inc_lines(line, c, b - c);
+
+ e = b + strcspn(b, WHITESPACE "=/>");
+ if (e > b) {
+ /* An attribute */
+
+ ret = strndup(b, e - b);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = e;
+ *state = INT_TO_PTR(STATE_ATTRIBUTE);
+
+ return XML_ATTRIBUTE_NAME;
+ }
+
+ if (startswith(b, "/>")) {
+ /* An empty tag */
+
+ *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
+ *p = b + 2;
+ *state = INT_TO_PTR(STATE_TEXT);
+
+ return XML_TAG_CLOSE_EMPTY;
+ }
+
+ if (*b != '>')
+ return -EINVAL;
+
+ c = b + 1;
+ t = STATE_TEXT;
+ continue;
+
+ case STATE_ATTRIBUTE:
+
+ if (*c == '=') {
+ c++;
+
+ if (IN_SET(*c, '\'', '"')) {
+ /* Tag with a quoted value */
+
+ e = strchr(c+1, *c);
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, c, e - c);
+
+ ret = strndup(c+1, e - c - 1);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = e + 1;
+ *state = INT_TO_PTR(STATE_TAG);
+
+ return XML_ATTRIBUTE_VALUE;
+
+ }
+
+ /* Tag with a value without quotes */
+
+ b = strpbrk(c, WHITESPACE ">");
+ if (!b)
+ b = c;
+
+ ret = strndup(c, b - c);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = b;
+ *state = INT_TO_PTR(STATE_TAG);
+ return XML_ATTRIBUTE_VALUE;
+ }
+
+ t = STATE_TAG;
+ continue;
+ }
+
+ }
+
+ assert_not_reached("Bad state");
+}
diff --git a/src/shared/xml.h b/src/shared/xml.h
new file mode 100644
index 0000000..8da2ff5
--- /dev/null
+++ b/src/shared/xml.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+enum {
+ XML_END,
+ XML_TEXT,
+ XML_TAG_OPEN,
+ XML_TAG_CLOSE,
+ XML_TAG_CLOSE_EMPTY,
+ XML_ATTRIBUTE_NAME,
+ XML_ATTRIBUTE_VALUE,
+};
+
+int xml_tokenize(const char **p, char **name, void **state, unsigned *line);
diff --git a/src/sleep/sleep.c b/src/sleep/sleep.c
new file mode 100644
index 0000000..5b7984a
--- /dev/null
+++ b/src/sleep/sleep.c
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2010-2017 Canonical
+ Copyright © 2018 Dell Inc.
+***/
+
+#include <errno.h>
+#include <getopt.h>
+#include <linux/fiemap.h>
+#include <stdio.h>
+
+#include "sd-messages.h"
+
+#include "def.h"
+#include "exec-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "sleep-config.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static char* arg_verb = NULL;
+
+static int write_hibernate_location_info(void) {
+ _cleanup_free_ char *device = NULL, *type = NULL;
+ _cleanup_free_ struct fiemap *fiemap = NULL;
+ char offset_str[DECIMAL_STR_MAX(uint64_t)];
+ char device_str[DECIMAL_STR_MAX(uint64_t)];
+ _cleanup_close_ int fd = -1;
+ struct stat stb;
+ uint64_t offset;
+ int r;
+
+ r = find_hibernate_location(&device, &type, NULL, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to find hibernation location: %m");
+
+ /* if it's a swap partition, we just write the disk to /sys/power/resume */
+ if (streq(type, "partition")) {
+ r = write_string_file("/sys/power/resume", device, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_debug_errno(r, "Faileed to write partitoin device to /sys/power/resume: %m");
+
+ return r;
+ }
+ if (!streq(type, "file"))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid hibernate type: %s", type);
+
+ /* Only available in 4.17+ */
+ if (access("/sys/power/resume_offset", W_OK) < 0) {
+ if (errno == ENOENT) {
+ log_debug("Kernel too old, can't configure resume offset, ignoring.");
+ return 0;
+ }
+
+ return log_debug_errno(errno, "/sys/power/resume_offset not writeable: %m");
+ }
+
+ fd = open(device, O_RDONLY | O_CLOEXEC | O_NONBLOCK);
+ if (fd < 0)
+ return log_debug_errno(errno, "Unable to open '%s': %m", device);
+ r = fstat(fd, &stb);
+ if (r < 0)
+ return log_debug_errno(errno, "Unable to stat %s: %m", device);
+
+ r = read_fiemap(fd, &fiemap);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to read extent map for '%s': %m", device);
+ if (fiemap->fm_mapped_extents == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "No extents found in '%s'", device);
+
+ offset = fiemap->fm_extents[0].fe_physical / page_size();
+ xsprintf(offset_str, "%" PRIu64, offset);
+ r = write_string_file("/sys/power/resume_offset", offset_str, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to write offset '%s': %m", offset_str);
+
+ xsprintf(device_str, "%lx", (unsigned long)stb.st_dev);
+ r = write_string_file("/sys/power/resume", device_str, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to write device '%s': %m", device_str);
+
+ return 0;
+}
+
+static int write_mode(char **modes) {
+ int r = 0;
+ char **mode;
+
+ STRV_FOREACH(mode, modes) {
+ int k;
+
+ k = write_string_file("/sys/power/disk", *mode, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (k >= 0)
+ return 0;
+
+ log_debug_errno(k, "Failed to write '%s' to /sys/power/disk: %m", *mode);
+ if (r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int write_state(FILE **f, char **states) {
+ char **state;
+ int r = 0;
+
+ STRV_FOREACH(state, states) {
+ int k;
+
+ k = write_string_stream(*f, *state, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (k >= 0)
+ return 0;
+ log_debug_errno(k, "Failed to write '%s' to /sys/power/state: %m", *state);
+ if (r >= 0)
+ r = k;
+
+ fclose(*f);
+ *f = fopen("/sys/power/state", "we");
+ if (!*f)
+ return -errno;
+ }
+
+ return r;
+}
+
+static int execute(char **modes, char **states) {
+ char *arguments[] = {
+ NULL,
+ (char*) "pre",
+ arg_verb,
+ NULL
+ };
+ static const char* const dirs[] = {
+ SYSTEM_SLEEP_PATH,
+ NULL
+ };
+
+ int r;
+ _cleanup_fclose_ FILE *f = NULL;
+
+ /* This file is opened first, so that if we hit an error,
+ * we can abort before modifying any state. */
+ f = fopen("/sys/power/state", "we");
+ if (!f)
+ return log_error_errno(errno, "Failed to open /sys/power/state: %m");
+
+ setvbuf(f, NULL, _IONBF, 0);
+
+ /* Configure the hibernation mode */
+ if (!strv_isempty(modes)) {
+ r = write_hibernate_location_info();
+ if (r < 0)
+ return log_error_errno(r, "Failed to write hibernation disk offset: %m");
+ r = write_mode(modes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write mode to /sys/power/disk: %m");;
+ }
+
+ execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments, NULL);
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_SLEEP_START_STR,
+ LOG_MESSAGE("Suspending system..."),
+ "SLEEP=%s", arg_verb);
+
+ r = write_state(&f, states);
+ if (r < 0)
+ log_struct_errno(LOG_ERR, r,
+ "MESSAGE_ID=" SD_MESSAGE_SLEEP_STOP_STR,
+ LOG_MESSAGE("Failed to suspend system. System resumed again: %m"),
+ "SLEEP=%s", arg_verb);
+ else
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_SLEEP_STOP_STR,
+ LOG_MESSAGE("System resumed."),
+ "SLEEP=%s", arg_verb);
+
+ arguments[1] = (char*) "post";
+ execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments, NULL);
+
+ return r;
+}
+
+static int rtc_read_time(uint64_t *ret_sec) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ r = read_one_line_file("/sys/class/rtc/rtc0/since_epoch", &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read RTC time: %m");
+
+ r = safe_atou64(t, ret_sec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse RTC time '%s': %m", t);
+
+ return 0;
+}
+
+static int rtc_write_wake_alarm(uint64_t sec) {
+ char buf[DECIMAL_STR_MAX(uint64_t)];
+ int r;
+
+ xsprintf(buf, "%" PRIu64, sec);
+
+ r = write_string_file("/sys/class/rtc/rtc0/wakealarm", buf, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write '%s' to /sys/class/rtc/rtc0/wakealarm: %m", buf);
+
+ return 0;
+}
+
+static int execute_s2h(usec_t hibernate_delay_sec) {
+
+ _cleanup_strv_free_ char **hibernate_modes = NULL, **hibernate_states = NULL,
+ **suspend_modes = NULL, **suspend_states = NULL;
+ usec_t original_time, wake_time, cmp_time;
+ int r;
+
+ r = parse_sleep_config("suspend", NULL, &suspend_modes, &suspend_states, NULL);
+ if (r < 0)
+ return r;
+
+ r = parse_sleep_config("hibernate", NULL, &hibernate_modes, &hibernate_states, NULL);
+ if (r < 0)
+ return r;
+
+ r = rtc_read_time(&original_time);
+ if (r < 0)
+ return r;
+
+ wake_time = original_time + DIV_ROUND_UP(hibernate_delay_sec, USEC_PER_SEC);
+ r = rtc_write_wake_alarm(wake_time);
+ if (r < 0)
+ return r;
+
+ log_debug("Set RTC wake alarm for %" PRIu64, wake_time);
+
+ r = execute(suspend_modes, suspend_states);
+ if (r < 0)
+ return r;
+
+ /* Reset RTC right-away */
+ r = rtc_write_wake_alarm(0);
+ if (r < 0)
+ return r;
+
+ r = rtc_read_time(&cmp_time);
+ if (r < 0)
+ return r;
+
+ log_debug("Woke up at %"PRIu64, cmp_time);
+
+ if (cmp_time < wake_time) /* We woke up before the alarm time, we are done. */
+ return 0;
+
+ /* If woken up after alarm time, hibernate */
+ r = execute(hibernate_modes, hibernate_states);
+ if (r < 0) {
+ log_notice("Couldn't hibernate, will try to suspend again.");
+ r = execute(suspend_modes, suspend_states);
+ if (r < 0) {
+ log_notice("Could neither hibernate nor suspend again, giving up.");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-suspend.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s COMMAND\n\n"
+ "Suspend the system, hibernate the system, or both.\n\n"
+ " -h --help Show this help and exit\n"
+ " --version Print version string and exit\n"
+ "\nCommands:\n"
+ " suspend Suspend the system\n"
+ " hibernate Hibernate the system\n"
+ " hybrid-sleep Both hibernate and suspend the system\n"
+ " suspend-then-hibernate Initially suspend and then hibernate\n"
+ " the system after a fixed period of time\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+ switch(c) {
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (argc - optind != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Usage: %s COMMAND",
+ program_invocation_short_name);
+
+ arg_verb = argv[optind];
+
+ if (!STR_IN_SET(arg_verb, "suspend", "hibernate", "hybrid-sleep", "suspend-then-hibernate"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown command '%s'.", arg_verb);
+
+ return 1 /* work to do */;
+}
+
+static int run(int argc, char *argv[]) {
+ bool allow;
+ _cleanup_strv_free_ char **modes = NULL, **states = NULL;
+ usec_t delay = 0;
+ int r;
+
+ log_setup_service();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = parse_sleep_config(arg_verb, &allow, &modes, &states, &delay);
+ if (r < 0)
+ return r;
+
+ if (!allow)
+ return log_error_errno(SYNTHETIC_ERRNO(EACCES),
+ "Sleep mode \"%s\" is disabled by configuration, refusing.",
+ arg_verb);
+
+ if (streq(arg_verb, "suspend-then-hibernate"))
+ return execute_s2h(delay);
+ else
+ return execute(modes, states);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/sleep/sleep.conf b/src/sleep/sleep.conf
new file mode 100644
index 0000000..dc2ed37
--- /dev/null
+++ b/src/sleep/sleep.conf
@@ -0,0 +1,25 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See systemd-sleep.conf(5) for details
+
+[Sleep]
+#AllowSuspend=yes
+#AllowHibernation=yes
+#AllowSuspendThenHibernate=yes
+#AllowHybridSleep=yes
+#SuspendMode=
+#SuspendState=mem standby freeze
+#HibernateMode=platform shutdown
+#HibernateState=disk
+#HybridSleepMode=suspend platform shutdown
+#HybridSleepState=disk
+#HibernateDelaySec=180min
diff --git a/src/socket-proxy/socket-proxyd.c b/src/socket-proxy/socket-proxyd.c
new file mode 100644
index 0000000..bac5c16
--- /dev/null
+++ b/src/socket-proxy/socket-proxyd.c
@@ -0,0 +1,665 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+#include "sd-resolve.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "resolve-private.h"
+#include "set.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "util.h"
+
+#define BUFFER_SIZE (256 * 1024)
+static unsigned arg_connections_max = 256;
+
+static const char *arg_remote_host = NULL;
+
+typedef struct Context {
+ sd_event *event;
+ sd_resolve *resolve;
+
+ Set *listen;
+ Set *connections;
+} Context;
+
+typedef struct Connection {
+ Context *context;
+
+ int server_fd, client_fd;
+ int server_to_client_buffer[2]; /* a pipe */
+ int client_to_server_buffer[2]; /* a pipe */
+
+ size_t server_to_client_buffer_full, client_to_server_buffer_full;
+ size_t server_to_client_buffer_size, client_to_server_buffer_size;
+
+ sd_event_source *server_event_source, *client_event_source;
+
+ sd_resolve_query *resolve_query;
+} Connection;
+
+static void connection_free(Connection *c) {
+ assert(c);
+
+ if (c->context)
+ set_remove(c->context->connections, c);
+
+ sd_event_source_unref(c->server_event_source);
+ sd_event_source_unref(c->client_event_source);
+
+ safe_close(c->server_fd);
+ safe_close(c->client_fd);
+
+ safe_close_pair(c->server_to_client_buffer);
+ safe_close_pair(c->client_to_server_buffer);
+
+ sd_resolve_query_unref(c->resolve_query);
+
+ free(c);
+}
+
+static void context_clear(Context *context) {
+ assert(context);
+
+ set_free_with_destructor(context->listen, sd_event_source_unref);
+ set_free_with_destructor(context->connections, connection_free);
+
+ sd_event_unref(context->event);
+ sd_resolve_unref(context->resolve);
+}
+
+static int connection_create_pipes(Connection *c, int buffer[static 2], size_t *sz) {
+ int r;
+
+ assert(c);
+ assert(buffer);
+ assert(sz);
+
+ if (buffer[0] >= 0)
+ return 0;
+
+ r = pipe2(buffer, O_CLOEXEC|O_NONBLOCK);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to allocate pipe buffer: %m");
+
+ (void) fcntl(buffer[0], F_SETPIPE_SZ, BUFFER_SIZE);
+
+ r = fcntl(buffer[0], F_GETPIPE_SZ);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to get pipe buffer size: %m");
+
+ assert(r > 0);
+ *sz = r;
+
+ return 0;
+}
+
+static int connection_shovel(
+ Connection *c,
+ int *from, int buffer[2], int *to,
+ size_t *full, size_t *sz,
+ sd_event_source **from_source, sd_event_source **to_source) {
+
+ bool shoveled;
+
+ assert(c);
+ assert(from);
+ assert(buffer);
+ assert(buffer[0] >= 0);
+ assert(buffer[1] >= 0);
+ assert(to);
+ assert(full);
+ assert(sz);
+ assert(from_source);
+ assert(to_source);
+
+ do {
+ ssize_t z;
+
+ shoveled = false;
+
+ if (*full < *sz && *from >= 0 && *to >= 0) {
+ z = splice(*from, NULL, buffer[1], NULL, *sz - *full, SPLICE_F_MOVE|SPLICE_F_NONBLOCK);
+ if (z > 0) {
+ *full += z;
+ shoveled = true;
+ } else if (z == 0 || IN_SET(errno, EPIPE, ECONNRESET)) {
+ *from_source = sd_event_source_unref(*from_source);
+ *from = safe_close(*from);
+ } else if (!IN_SET(errno, EAGAIN, EINTR))
+ return log_error_errno(errno, "Failed to splice: %m");
+ }
+
+ if (*full > 0 && *to >= 0) {
+ z = splice(buffer[0], NULL, *to, NULL, *full, SPLICE_F_MOVE|SPLICE_F_NONBLOCK);
+ if (z > 0) {
+ *full -= z;
+ shoveled = true;
+ } else if (z == 0 || IN_SET(errno, EPIPE, ECONNRESET)) {
+ *to_source = sd_event_source_unref(*to_source);
+ *to = safe_close(*to);
+ } else if (!IN_SET(errno, EAGAIN, EINTR))
+ return log_error_errno(errno, "Failed to splice: %m");
+ }
+ } while (shoveled);
+
+ return 0;
+}
+
+static int connection_enable_event_sources(Connection *c);
+
+static int traffic_cb(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Connection *c = userdata;
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(c);
+
+ r = connection_shovel(c,
+ &c->server_fd, c->server_to_client_buffer, &c->client_fd,
+ &c->server_to_client_buffer_full, &c->server_to_client_buffer_size,
+ &c->server_event_source, &c->client_event_source);
+ if (r < 0)
+ goto quit;
+
+ r = connection_shovel(c,
+ &c->client_fd, c->client_to_server_buffer, &c->server_fd,
+ &c->client_to_server_buffer_full, &c->client_to_server_buffer_size,
+ &c->client_event_source, &c->server_event_source);
+ if (r < 0)
+ goto quit;
+
+ /* EOF on both sides? */
+ if (c->server_fd == -1 && c->client_fd == -1)
+ goto quit;
+
+ /* Server closed, and all data written to client? */
+ if (c->server_fd == -1 && c->server_to_client_buffer_full <= 0)
+ goto quit;
+
+ /* Client closed, and all data written to server? */
+ if (c->client_fd == -1 && c->client_to_server_buffer_full <= 0)
+ goto quit;
+
+ r = connection_enable_event_sources(c);
+ if (r < 0)
+ goto quit;
+
+ return 1;
+
+quit:
+ connection_free(c);
+ return 0; /* ignore errors, continue serving */
+}
+
+static int connection_enable_event_sources(Connection *c) {
+ uint32_t a = 0, b = 0;
+ int r;
+
+ assert(c);
+
+ if (c->server_to_client_buffer_full > 0)
+ b |= EPOLLOUT;
+ if (c->server_to_client_buffer_full < c->server_to_client_buffer_size)
+ a |= EPOLLIN;
+
+ if (c->client_to_server_buffer_full > 0)
+ a |= EPOLLOUT;
+ if (c->client_to_server_buffer_full < c->client_to_server_buffer_size)
+ b |= EPOLLIN;
+
+ if (c->server_event_source)
+ r = sd_event_source_set_io_events(c->server_event_source, a);
+ else if (c->server_fd >= 0)
+ r = sd_event_add_io(c->context->event, &c->server_event_source, c->server_fd, a, traffic_cb, c);
+ else
+ r = 0;
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up server event source: %m");
+
+ if (c->client_event_source)
+ r = sd_event_source_set_io_events(c->client_event_source, b);
+ else if (c->client_fd >= 0)
+ r = sd_event_add_io(c->context->event, &c->client_event_source, c->client_fd, b, traffic_cb, c);
+ else
+ r = 0;
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up client event source: %m");
+
+ return 0;
+}
+
+static int connection_complete(Connection *c) {
+ int r;
+
+ assert(c);
+
+ r = connection_create_pipes(c, c->server_to_client_buffer, &c->server_to_client_buffer_size);
+ if (r < 0)
+ goto fail;
+
+ r = connection_create_pipes(c, c->client_to_server_buffer, &c->client_to_server_buffer_size);
+ if (r < 0)
+ goto fail;
+
+ r = connection_enable_event_sources(c);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ connection_free(c);
+ return 0; /* ignore errors, continue serving */
+}
+
+static int connect_cb(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Connection *c = userdata;
+ socklen_t solen;
+ int error, r;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(c);
+
+ solen = sizeof(error);
+ r = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &solen);
+ if (r < 0) {
+ log_error_errno(errno, "Failed to issue SO_ERROR: %m");
+ goto fail;
+ }
+
+ if (error != 0) {
+ log_error_errno(error, "Failed to connect to remote host: %m");
+ goto fail;
+ }
+
+ c->client_event_source = sd_event_source_unref(c->client_event_source);
+
+ return connection_complete(c);
+
+fail:
+ connection_free(c);
+ return 0; /* ignore errors, continue serving */
+}
+
+static int connection_start(Connection *c, struct sockaddr *sa, socklen_t salen) {
+ int r;
+
+ assert(c);
+ assert(sa);
+ assert(salen);
+
+ c->client_fd = socket(sa->sa_family, SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0);
+ if (c->client_fd < 0) {
+ log_error_errno(errno, "Failed to get remote socket: %m");
+ goto fail;
+ }
+
+ r = connect(c->client_fd, sa, salen);
+ if (r < 0) {
+ if (errno == EINPROGRESS) {
+ r = sd_event_add_io(c->context->event, &c->client_event_source, c->client_fd, EPOLLOUT, connect_cb, c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add connection socket: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_enabled(c->client_event_source, SD_EVENT_ONESHOT);
+ if (r < 0) {
+ log_error_errno(r, "Failed to enable oneshot event source: %m");
+ goto fail;
+ }
+ } else {
+ log_error_errno(errno, "Failed to connect to remote host: %m");
+ goto fail;
+ }
+ } else {
+ r = connection_complete(c);
+ if (r < 0)
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ connection_free(c);
+ return 0; /* ignore errors, continue serving */
+}
+
+static int resolve_handler(sd_resolve_query *q, int ret, const struct addrinfo *ai, Connection *c) {
+ assert(q);
+ assert(c);
+
+ if (ret != 0) {
+ log_error("Failed to resolve host: %s", gai_strerror(ret));
+ goto fail;
+ }
+
+ c->resolve_query = sd_resolve_query_unref(c->resolve_query);
+
+ return connection_start(c, ai->ai_addr, ai->ai_addrlen);
+
+fail:
+ connection_free(c);
+ return 0; /* ignore errors, continue serving */
+}
+
+static int resolve_remote(Connection *c) {
+
+ static const struct addrinfo hints = {
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_STREAM,
+ .ai_flags = AI_ADDRCONFIG
+ };
+
+ union sockaddr_union sa = {};
+ const char *node, *service;
+ int r;
+
+ if (IN_SET(arg_remote_host[0], '/', '@')) {
+ int salen;
+
+ salen = sockaddr_un_set_path(&sa.un, arg_remote_host);
+ if (salen < 0) {
+ log_error_errno(salen, "Specified address doesn't fit in an AF_UNIX address, refusing: %m");
+ goto fail;
+ }
+
+ return connection_start(c, &sa.sa, salen);
+ }
+
+ service = strrchr(arg_remote_host, ':');
+ if (service) {
+ node = strndupa(arg_remote_host, service - arg_remote_host);
+ service++;
+ } else {
+ node = arg_remote_host;
+ service = "80";
+ }
+
+ log_debug("Looking up address info for %s:%s", node, service);
+ r = resolve_getaddrinfo(c->context->resolve, &c->resolve_query, node, service, &hints, resolve_handler, NULL, c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to resolve remote host: %m");
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ connection_free(c);
+ return 0; /* ignore errors, continue serving */
+}
+
+static int add_connection_socket(Context *context, int fd) {
+ Connection *c;
+ int r;
+
+ assert(context);
+ assert(fd >= 0);
+
+ if (set_size(context->connections) > arg_connections_max) {
+ log_warning("Hit connection limit, refusing connection.");
+ safe_close(fd);
+ return 0;
+ }
+
+ r = set_ensure_allocated(&context->connections, NULL);
+ if (r < 0) {
+ log_oom();
+ return 0;
+ }
+
+ c = new0(Connection, 1);
+ if (!c) {
+ log_oom();
+ return 0;
+ }
+
+ c->context = context;
+ c->server_fd = fd;
+ c->client_fd = -1;
+ c->server_to_client_buffer[0] = c->server_to_client_buffer[1] = -1;
+ c->client_to_server_buffer[0] = c->client_to_server_buffer[1] = -1;
+
+ r = set_put(context->connections, c);
+ if (r < 0) {
+ free(c);
+ log_oom();
+ return 0;
+ }
+
+ return resolve_remote(c);
+}
+
+static int accept_cb(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_free_ char *peer = NULL;
+ Context *context = userdata;
+ int nfd = -1, r;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(revents & EPOLLIN);
+ assert(context);
+
+ nfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (nfd < 0) {
+ if (errno != -EAGAIN)
+ log_warning_errno(errno, "Failed to accept() socket: %m");
+ } else {
+ getpeername_pretty(nfd, true, &peer);
+ log_debug("New connection from %s", strna(peer));
+
+ r = add_connection_socket(context, nfd);
+ if (r < 0) {
+ log_error_errno(r, "Failed to accept connection, ignoring: %m");
+ safe_close(fd);
+ }
+ }
+
+ r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
+ if (r < 0) {
+ log_error_errno(r, "Error while re-enabling listener with ONESHOT: %m");
+ sd_event_exit(context->event, r);
+ return r;
+ }
+
+ return 1;
+}
+
+static int add_listen_socket(Context *context, int fd) {
+ sd_event_source *source;
+ int r;
+
+ assert(context);
+ assert(fd >= 0);
+
+ r = set_ensure_allocated(&context->listen, NULL);
+ if (r < 0) {
+ log_oom();
+ return r;
+ }
+
+ r = sd_is_socket(fd, 0, SOCK_STREAM, 1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine socket type: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Passed in socket is not a stream socket.");
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mark file descriptor non-blocking: %m");
+
+ r = sd_event_add_io(context->event, &source, fd, EPOLLIN, accept_cb, context);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add event source: %m");
+
+ r = set_put(context->listen, source);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add source to set: %m");
+ sd_event_source_unref(source);
+ return r;
+ }
+
+ /* Set the watcher to oneshot in case other processes are also
+ * watching to accept(). */
+ r = sd_event_source_set_enabled(source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable oneshot mode: %m");
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-socket-proxyd", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s [HOST:PORT]\n"
+ "%1$s [SOCKET]\n\n"
+ "Bidirectionally proxy local sockets to another (possibly remote) socket.\n\n"
+ " -c --connections-max= Set the maximum number of connections to be accepted\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ "\nSee the %2$s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_IGNORE_ENV
+ };
+
+ static const struct option options[] = {
+ { "connections-max", required_argument, NULL, 'c' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "c:h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'c':
+ r = safe_atou(optarg, &arg_connections_max);
+ if (r < 0) {
+ log_error("Failed to parse --connections-max= argument: %s", optarg);
+ return r;
+ }
+
+ if (arg_connections_max < 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Connection limit is too low.");
+
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not enough parameters.");
+
+ if (argc != optind+1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many parameters.");
+
+ arg_remote_host = argv[optind];
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_clear) Context context = {};
+ int r, n, fd;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = sd_event_default(&context.event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ r = sd_resolve_default(&context.resolve);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate resolver: %m");
+
+ r = sd_resolve_attach_event(context.resolve, context.event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach resolver: %m");
+
+ sd_event_set_watchdog(context.event, true);
+
+ r = sd_listen_fds(1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to receive sockets from parent.");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Didn't get any sockets passed in.");
+
+ n = r;
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
+ r = add_listen_socket(&context, fd);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_loop(context.event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/stdio-bridge/stdio-bridge.c b/src/stdio-bridge/stdio-bridge.c
new file mode 100644
index 0000000..7060897
--- /dev/null
+++ b/src/stdio-bridge/stdio-bridge.c
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <poll.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "build.h"
+#include "bus-internal.h"
+#include "bus-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "util.h"
+
+#define DEFAULT_BUS_PATH "unix:path=/run/dbus/system_bus_socket"
+
+static const char *arg_bus_path = DEFAULT_BUS_PATH;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+
+static int help(void) {
+
+ printf("%s [OPTIONS...]\n\n"
+ "STDIO or socket-activatable proxy to a given DBus endpoint.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -p --bus-path=PATH Path to the kernel bus (default: %s)\n"
+ " -M --machine=MACHINE Name of machine to connect to\n",
+ program_invocation_short_name, DEFAULT_BUS_PATH);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_MACHINE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "bus-path", required_argument, NULL, 'p' },
+ { "machine", required_argument, NULL, 'M' },
+ {},
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hsup:", options, NULL)) >= 0) {
+
+ switch (c) {
+
+ case 'h':
+ help();
+ return 0;
+
+ case ARG_VERSION:
+ return version();
+
+ case '?':
+ return -EINVAL;
+
+ case 'p':
+ arg_bus_path = optarg;
+
+ break;
+
+ case 'M':
+ arg_bus_path = optarg;
+
+ arg_transport = BUS_TRANSPORT_MACHINE;
+
+ break;
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown option code %c", c);
+ }
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *a = NULL, *b = NULL;
+ sd_id128_t server_id;
+ bool is_unix;
+ int r, in_fd, out_fd;
+
+ log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = sd_listen_fds(0);
+ if (r == 0) {
+ in_fd = STDIN_FILENO;
+ out_fd = STDOUT_FILENO;
+ } else if (r == 1) {
+ in_fd = SD_LISTEN_FDS_START;
+ out_fd = SD_LISTEN_FDS_START;
+ } else {
+ log_error("Illegal number of file descriptors passed.");
+ return -EINVAL;
+ }
+
+ is_unix =
+ sd_is_socket(in_fd, AF_UNIX, 0, 0) > 0 &&
+ sd_is_socket(out_fd, AF_UNIX, 0, 0) > 0;
+
+ r = sd_bus_new(&a);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate bus: %m");
+
+ if (arg_transport == BUS_TRANSPORT_MACHINE)
+ r = bus_set_address_system_machine(a, arg_bus_path);
+ else
+ r = sd_bus_set_address(a, arg_bus_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set address to connect to: %m");
+
+ r = sd_bus_negotiate_fds(a, is_unix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set FD negotiation: %m");
+
+ r = sd_bus_start(a);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start bus client: %m");
+
+ r = sd_bus_get_bus_id(a, &server_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get server ID: %m");
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate bus: %m");
+
+ r = sd_bus_set_fd(b, in_fd, out_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set fds: %m");
+
+ r = sd_bus_set_server(b, 1, server_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set server mode: %m");
+
+ r = sd_bus_negotiate_fds(b, is_unix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set FD negotiation: %m");
+
+ r = sd_bus_set_anonymous(b, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set anonymous authentication: %m");
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start bus client: %m");
+
+ for (;;) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int events_a, events_b, fd;
+ uint64_t timeout_a, timeout_b, t;
+ struct timespec _ts, *ts;
+
+ r = sd_bus_process(a, &m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to process bus a: %m");
+
+ if (m) {
+ r = sd_bus_send(b, m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send message: %m");
+ }
+
+ if (r > 0)
+ continue;
+
+ r = sd_bus_process(b, &m);
+ if (r < 0)
+ /* treat 'connection reset by peer' as clean exit condition */
+ return r == -ECONNRESET ? 0 : r;
+
+ if (m) {
+ r = sd_bus_send(a, m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send message: %m");
+ }
+
+ if (r > 0)
+ continue;
+
+ fd = sd_bus_get_fd(a);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to get fd: %m");
+
+ events_a = sd_bus_get_events(a);
+ if (events_a < 0)
+ return log_error_errno(events_a, "Failed to get events mask: %m");
+
+ r = sd_bus_get_timeout(a, &timeout_a);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get timeout: %m");
+
+ events_b = sd_bus_get_events(b);
+ if (events_b < 0)
+ return log_error_errno(events_b, "Failed to get events mask: %m");
+
+ r = sd_bus_get_timeout(b, &timeout_b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get timeout: %m");
+
+ t = timeout_a;
+ if (t == (uint64_t) -1 || (timeout_b != (uint64_t) -1 && timeout_b < timeout_a))
+ t = timeout_b;
+
+ if (t == (uint64_t) -1)
+ ts = NULL;
+ else {
+ usec_t nw;
+
+ nw = now(CLOCK_MONOTONIC);
+ if (t > nw)
+ t -= nw;
+ else
+ t = 0;
+
+ ts = timespec_store(&_ts, t);
+ }
+
+ {
+ struct pollfd p[3] = {
+ {.fd = fd, .events = events_a, },
+ {.fd = STDIN_FILENO, .events = events_b & POLLIN, },
+ {.fd = STDOUT_FILENO, .events = events_b & POLLOUT, }};
+
+ r = ppoll(p, ELEMENTSOF(p), ts, NULL);
+ }
+ if (r < 0)
+ return log_error_errno(errno, "ppoll() failed: %m");
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/sulogin-shell/sulogin-shell.c b/src/sulogin-shell/sulogin-shell.c
new file mode 100644
index 0000000..6d65efb
--- /dev/null
+++ b/src/sulogin-shell/sulogin-shell.c
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2017 Felipe Sateler
+***/
+
+#include <errno.h>
+#include <sys/prctl.h>
+
+#include "bus-util.h"
+#include "bus-error.h"
+#include "def.h"
+#include "env-util.h"
+#include "log.h"
+#include "process-util.h"
+#include "sd-bus.h"
+#include "signal-util.h"
+
+static int reload_manager(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ log_info("Reloading system manager configuration");
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Reload");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Note we use an extra-long timeout here. This is because a reload or reexec means generators are rerun which
+ * are timed out after DEFAULT_TIMEOUT_USEC. Let's use twice that time here, so that the generators can have
+ * their timeout, and for everything else there's the same time budget in place. */
+
+ r = sd_bus_call(bus, m, DEFAULT_TIMEOUT_USEC * 2, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reload daemon: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int start_default_target(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ log_info("Starting default target");
+
+ /* Start these units only if we can replace base.target with it */
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnit",
+ &error,
+ NULL,
+ "ss", "default.target", "isolate");
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int fork_wait(const char* const cmdline[]) {
+ pid_t pid;
+ int r;
+
+ r = safe_fork("(sulogin)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execv(cmdline[0], (char**) cmdline);
+ log_error_errno(errno, "Failed to execute %s: %m", cmdline[0]);
+ _exit(EXIT_FAILURE); /* Operational error */
+ }
+
+ return wait_for_terminate_and_check(cmdline[0], pid, WAIT_LOG_ABNORMAL);
+}
+
+static void print_mode(const char* mode) {
+ printf("You are in %s mode. After logging in, type \"journalctl -xb\" to view\n"
+ "system logs, \"systemctl reboot\" to reboot, \"systemctl default\" or \"exit\"\n"
+ "to boot into default mode.\n", mode);
+ fflush(stdout);
+}
+
+int main(int argc, char *argv[]) {
+ const char* sulogin_cmdline[] = {
+ SULOGIN,
+ NULL, /* --force */
+ NULL
+ };
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ log_setup_service();
+
+ print_mode(argc > 1 ? argv[1] : "");
+
+ if (getenv_bool("SYSTEMD_SULOGIN_FORCE") > 0)
+ /* allows passwordless logins if root account is locked. */
+ sulogin_cmdline[1] = "--force";
+
+ (void) fork_wait(sulogin_cmdline);
+
+ r = bus_connect_system_systemd(&bus);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to get D-Bus connection: %m");
+ r = 0;
+ } else {
+ (void) reload_manager(bus);
+
+ r = start_default_target(bus);
+ }
+
+ return r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/sysctl/sysctl.c b/src/sysctl/sysctl.c
new file mode 100644
index 0000000..c67d790
--- /dev/null
+++ b/src/sysctl/sysctl.c
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "conf-files.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "log.h"
+#include "main-func.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+#include "sysctl-util.h"
+#include "util.h"
+
+static char **arg_prefixes = NULL;
+static bool arg_cat_config = false;
+static PagerFlags arg_pager_flags = 0;
+
+STATIC_DESTRUCTOR_REGISTER(arg_prefixes, strv_freep);
+
+static int apply_all(OrderedHashmap *sysctl_options) {
+ char *property, *value;
+ Iterator i;
+ int r = 0;
+
+ ORDERED_HASHMAP_FOREACH_KEY(value, property, sysctl_options, i) {
+ int k;
+
+ k = sysctl_write(property, value);
+ if (k < 0) {
+ /* If the sysctl is not available in the kernel or we are running with reduced privileges and
+ * cannot write it, then log about the issue at LOG_NOTICE level, and proceed without
+ * failing. (EROFS is treated as a permission problem here, since that's how container managers
+ * usually protected their sysctls.) In all other cases log an error and make the tool fail. */
+
+ if (IN_SET(k, -EPERM, -EACCES, -EROFS, -ENOENT))
+ log_notice_errno(k, "Couldn't write '%s' to '%s', ignoring: %m", value, property);
+ else {
+ log_error_errno(k, "Couldn't write '%s' to '%s': %m", value, property);
+ if (r == 0)
+ r = k;
+ }
+ }
+ }
+
+ return r;
+}
+
+static bool test_prefix(const char *p) {
+ char **i;
+
+ if (strv_isempty(arg_prefixes))
+ return true;
+
+ STRV_FOREACH(i, arg_prefixes) {
+ const char *t;
+
+ t = path_startswith(*i, "/proc/sys/");
+ if (!t)
+ t = *i;
+ if (path_startswith(p, t))
+ return true;
+ }
+
+ return false;
+}
+
+static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ignore_enoent) {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned c = 0;
+ int r;
+
+ assert(path);
+
+ r = search_and_fopen(path, "re", NULL, (const char**) CONF_PATHS_STRV("sysctl.d"), &f);
+ if (r < 0) {
+ if (ignore_enoent && r == -ENOENT)
+ return 0;
+
+ return log_error_errno(r, "Failed to open file '%s', ignoring: %m", path);
+ }
+
+ log_debug("Parsing %s", path);
+ for (;;) {
+ char *p, *value, *new_value, *property, *existing;
+ _cleanup_free_ char *l = NULL;
+ void *v;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &l);
+ if (k == 0)
+ break;
+ if (k < 0)
+ return log_error_errno(k, "Failed to read file '%s', ignoring: %m", path);
+
+ c++;
+
+ p = strstrip(l);
+
+ if (isempty(p))
+ continue;
+ if (strchr(COMMENTS "\n", *p))
+ continue;
+
+ value = strchr(p, '=');
+ if (!value) {
+ log_error("Line is not an assignment at '%s:%u': %s", path, c, p);
+
+ if (r == 0)
+ r = -EINVAL;
+ continue;
+ }
+
+ *value = 0;
+ value++;
+
+ p = sysctl_normalize(strstrip(p));
+ value = strstrip(value);
+
+ if (!test_prefix(p))
+ continue;
+
+ existing = ordered_hashmap_get2(sysctl_options, p, &v);
+ if (existing) {
+ if (streq(value, existing))
+ continue;
+
+ log_debug("Overwriting earlier assignment of %s at '%s:%u'.", p, path, c);
+ free(ordered_hashmap_remove(sysctl_options, p));
+ free(v);
+ }
+
+ property = strdup(p);
+ if (!property)
+ return log_oom();
+
+ new_value = strdup(value);
+ if (!new_value) {
+ free(property);
+ return log_oom();
+ }
+
+ k = ordered_hashmap_put(sysctl_options, property, new_value);
+ if (k < 0) {
+ log_error_errno(k, "Failed to add sysctl variable %s to hashmap: %m", property);
+ free(property);
+ free(new_value);
+ return k;
+ }
+ }
+
+ return r;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-sysctl.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CONFIGURATION FILE...]\n\n"
+ "Applies kernel sysctl settings.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --cat-config Show configuration files\n"
+ " --prefix=PATH Only apply rules with the specified prefix\n"
+ " --no-pager Do not pipe output into a pager\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_CAT_CONFIG,
+ ARG_PREFIX,
+ ARG_NO_PAGER,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "cat-config", no_argument, NULL, ARG_CAT_CONFIG },
+ { "prefix", required_argument, NULL, ARG_PREFIX },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_CAT_CONFIG:
+ arg_cat_config = true;
+ break;
+
+ case ARG_PREFIX: {
+ char *p;
+
+ /* We used to require people to specify absolute paths
+ * in /proc/sys in the past. This is kinda useless, but
+ * we need to keep compatibility. We now support any
+ * sysctl name available. */
+ sysctl_normalize(optarg);
+
+ if (path_startswith(optarg, "/proc/sys"))
+ p = strdup(optarg);
+ else
+ p = strappend("/proc/sys/", optarg);
+ if (!p)
+ return log_oom();
+
+ if (strv_consume(&arg_prefixes, p) < 0)
+ return log_oom();
+
+ break;
+ }
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_cat_config && argc > optind)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Positional arguments are not allowed with --cat-config");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(ordered_hashmap_free_free_freep) OrderedHashmap *sysctl_options = NULL;
+ int r, k;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ sysctl_options = ordered_hashmap_new(&path_hash_ops);
+ if (!sysctl_options)
+ return log_oom();
+
+ r = 0;
+
+ if (argc > optind) {
+ int i;
+
+ for (i = optind; i < argc; i++) {
+ k = parse_file(sysctl_options, argv[i], false);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+ } else {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char**) CONF_PATHS_STRV("sysctl.d"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate sysctl.d files: %m");
+
+ if (arg_cat_config) {
+ (void) pager_open(arg_pager_flags);
+
+ return cat_files(NULL, files, 0);
+ }
+
+ STRV_FOREACH(f, files) {
+ k = parse_file(sysctl_options, *f, true);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+ }
+
+ k = apply_all(sysctl_options);
+ if (k < 0 && r == 0)
+ r = k;
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/system-update-generator/system-update-generator.c b/src/system-update-generator/system-update-generator.c
new file mode 100644
index 0000000..77e265d
--- /dev/null
+++ b/src/system-update-generator/system-update-generator.c
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "fs-util.h"
+#include "generator.h"
+#include "log.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "string-util.h"
+#include "util.h"
+
+/*
+ * Implements the logic described in systemd.offline-updates(7).
+ */
+
+static const char *arg_dest = NULL;
+
+static int generate_symlink(void) {
+ const char *p = NULL;
+
+ if (laccess("/system-update", F_OK) < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ log_error_errno(errno, "Failed to check for system update: %m");
+ return -EINVAL;
+ }
+
+ p = strjoina(arg_dest, "/" SPECIAL_DEFAULT_TARGET);
+ if (symlink(SYSTEM_DATA_UNIT_PATH "/system-update.target", p) < 0)
+ return log_error_errno(errno, "Failed to create symlink %s: %m", p);
+
+ return 1;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ assert(key);
+
+ /* Check if a run level is specified on the kernel command line. The
+ * command line has higher priority than any on-disk configuration, so
+ * it'll make any symlink we create moot.
+ */
+
+ if (streq(key, "systemd.unit") && !proc_cmdline_value_missing(key, value))
+ log_warning("Offline system update overridden by kernel command line systemd.unit= setting");
+ else if (!value && runlevel_to_target(key))
+ log_warning("Offline system update overridden by runlevel \"%s\" on the kernel command line", key);
+
+ return 0;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r;
+
+ assert_se(arg_dest = dest_early);
+
+ r = generate_symlink();
+ if (r < 0)
+ return r;
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
new file mode 100644
index 0000000..63dae2c
--- /dev/null
+++ b/src/systemctl/systemctl.c
@@ -0,0 +1,8823 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <locale.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/reboot.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+#include "sd-event.h"
+#include "sd-login.h"
+
+#include "alloc-util.h"
+#include "bootspec.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-message.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "copy.h"
+#include "dropin.h"
+#include "efivars.h"
+#include "env-util.h"
+#include "escape.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "initreq.h"
+#include "install.h"
+#include "io-util.h"
+#include "list.h"
+#include "locale-util.h"
+#include "log.h"
+#include "logs-show.h"
+#include "macro.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "reboot-util.h"
+#include "rlimit-util.h"
+#include "set.h"
+#include "sigbus.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "spawn-ask-password-agent.h"
+#include "spawn-polkit-agent.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "unit-def.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "util.h"
+#include "utmp-wtmp.h"
+#include "verbs.h"
+#include "virt.h"
+
+/* The init script exit status codes
+ 0 program is running or service is OK
+ 1 program is dead and /var/run pid file exists
+ 2 program is dead and /var/lock lock file exists
+ 3 program is not running
+ 4 program or service status is unknown
+ 5-99 reserved for future LSB use
+ 100-149 reserved for distribution use
+ 150-199 reserved for application use
+ 200-254 reserved
+*/
+enum {
+ EXIT_PROGRAM_RUNNING_OR_SERVICE_OK = 0,
+ EXIT_PROGRAM_DEAD_AND_PID_EXISTS = 1,
+ EXIT_PROGRAM_DEAD_AND_LOCK_FILE_EXISTS = 2,
+ EXIT_PROGRAM_NOT_RUNNING = 3,
+ EXIT_PROGRAM_OR_SERVICES_STATUS_UNKNOWN = 4,
+};
+
+static char **arg_types = NULL;
+static char **arg_states = NULL;
+static char **arg_properties = NULL;
+static bool arg_all = false;
+static enum dependency {
+ DEPENDENCY_FORWARD,
+ DEPENDENCY_REVERSE,
+ DEPENDENCY_AFTER,
+ DEPENDENCY_BEFORE,
+ _DEPENDENCY_MAX
+} arg_dependency = DEPENDENCY_FORWARD;
+static const char *arg_job_mode = "replace";
+static UnitFileScope arg_scope = UNIT_FILE_SYSTEM;
+static bool arg_wait = false;
+static bool arg_no_block = false;
+static bool arg_no_legend = false;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_no_wtmp = false;
+static bool arg_no_sync = false;
+static bool arg_no_wall = false;
+static bool arg_no_reload = false;
+static bool arg_value = false;
+static bool arg_show_types = false;
+static bool arg_ignore_inhibitors = false;
+static bool arg_dry_run = false;
+static bool arg_quiet = false;
+static bool arg_full = false;
+static bool arg_recursive = false;
+static int arg_force = 0;
+static bool arg_ask_password = false;
+static bool arg_runtime = false;
+static UnitFilePresetMode arg_preset_mode = UNIT_FILE_PRESET_FULL;
+static char **arg_wall = NULL;
+static const char *arg_kill_who = NULL;
+static int arg_signal = SIGTERM;
+static char *arg_root = NULL;
+static usec_t arg_when = 0;
+static char *arg_esp_path = NULL;
+static char *argv_cmdline = NULL;
+static enum action {
+ ACTION_SYSTEMCTL,
+ ACTION_HALT,
+ ACTION_POWEROFF,
+ ACTION_REBOOT,
+ ACTION_KEXEC,
+ ACTION_EXIT,
+ ACTION_SUSPEND,
+ ACTION_HIBERNATE,
+ ACTION_HYBRID_SLEEP,
+ ACTION_SUSPEND_THEN_HIBERNATE,
+ ACTION_RUNLEVEL2,
+ ACTION_RUNLEVEL3,
+ ACTION_RUNLEVEL4,
+ ACTION_RUNLEVEL5,
+ ACTION_RESCUE,
+ ACTION_EMERGENCY,
+ ACTION_DEFAULT,
+ ACTION_RELOAD,
+ ACTION_REEXEC,
+ ACTION_RUNLEVEL,
+ ACTION_CANCEL_SHUTDOWN,
+ _ACTION_MAX,
+ _ACTION_INVALID = -1
+} arg_action = ACTION_SYSTEMCTL;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static unsigned arg_lines = 10;
+static OutputMode arg_output = OUTPUT_SHORT;
+static bool arg_plain = false;
+static bool arg_firmware_setup = false;
+static bool arg_now = false;
+static bool arg_jobs_before = false;
+static bool arg_jobs_after = false;
+
+static int daemon_reload(int argc, char *argv[], void* userdata);
+static int trivial_method(int argc, char *argv[], void *userdata);
+static int halt_now(enum action a);
+static int get_state_one_unit(sd_bus *bus, const char *name, UnitActiveState *active_state);
+
+static bool original_stdout_is_tty;
+
+typedef enum BusFocus {
+ BUS_FULL, /* The full bus indicated via --system or --user */
+ BUS_MANAGER, /* The manager itself, possibly directly, possibly via the bus */
+ _BUS_FOCUS_MAX
+} BusFocus;
+
+static sd_bus *busses[_BUS_FOCUS_MAX] = {};
+
+static UnitFileFlags args_to_flags(void) {
+ return (arg_runtime ? UNIT_FILE_RUNTIME : 0) |
+ (arg_force ? UNIT_FILE_FORCE : 0);
+}
+
+static int acquire_bus(BusFocus focus, sd_bus **ret) {
+ int r;
+
+ assert(focus < _BUS_FOCUS_MAX);
+ assert(ret);
+
+ /* We only go directly to the manager, if we are using a local transport */
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ focus = BUS_FULL;
+
+ if (getenv_bool("SYSTEMCTL_FORCE_BUS") > 0)
+ focus = BUS_FULL;
+
+ if (!busses[focus]) {
+ bool user;
+
+ user = arg_scope != UNIT_FILE_SYSTEM;
+
+ if (focus == BUS_MANAGER)
+ r = bus_connect_transport_systemd(arg_transport, arg_host, user, &busses[focus]);
+ else
+ r = bus_connect_transport(arg_transport, arg_host, user, &busses[focus]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to bus: %m");
+
+ (void) sd_bus_set_allow_interactive_authorization(busses[focus], arg_ask_password);
+ }
+
+ *ret = busses[focus];
+ return 0;
+}
+
+static void release_busses(void) {
+ BusFocus w;
+
+ for (w = 0; w < _BUS_FOCUS_MAX; w++)
+ busses[w] = sd_bus_flush_close_unref(busses[w]);
+}
+
+static void ask_password_agent_open_if_enabled(void) {
+ /* Open the password agent as a child process if necessary */
+
+ if (arg_dry_run)
+ return;
+
+ if (!arg_ask_password)
+ return;
+
+ if (arg_scope != UNIT_FILE_SYSTEM)
+ return;
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return;
+
+ ask_password_agent_open();
+}
+
+static void polkit_agent_open_maybe(void) {
+ /* Open the polkit agent as a child process if necessary */
+
+ if (arg_scope != UNIT_FILE_SYSTEM)
+ return;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+}
+
+static OutputFlags get_output_flags(void) {
+ return
+ arg_all * OUTPUT_SHOW_ALL |
+ (arg_full || !on_tty() || pager_have()) * OUTPUT_FULL_WIDTH |
+ colors_enabled() * OUTPUT_COLOR |
+ !arg_quiet * OUTPUT_WARN_CUTOFF;
+}
+
+static int translate_bus_error_to_exit_status(int r, const sd_bus_error *error) {
+ assert(error);
+
+ if (!sd_bus_error_is_set(error))
+ return r;
+
+ if (sd_bus_error_has_name(error, SD_BUS_ERROR_ACCESS_DENIED) ||
+ sd_bus_error_has_name(error, BUS_ERROR_ONLY_BY_DEPENDENCY) ||
+ sd_bus_error_has_name(error, BUS_ERROR_NO_ISOLATION) ||
+ sd_bus_error_has_name(error, BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE))
+ return EXIT_NOPERMISSION;
+
+ if (sd_bus_error_has_name(error, BUS_ERROR_NO_SUCH_UNIT))
+ return EXIT_NOTINSTALLED;
+
+ if (sd_bus_error_has_name(error, BUS_ERROR_JOB_TYPE_NOT_APPLICABLE) ||
+ sd_bus_error_has_name(error, SD_BUS_ERROR_NOT_SUPPORTED))
+ return EXIT_NOTIMPLEMENTED;
+
+ if (sd_bus_error_has_name(error, BUS_ERROR_LOAD_FAILED))
+ return EXIT_NOTCONFIGURED;
+
+ if (r != 0)
+ return r;
+
+ return EXIT_FAILURE;
+}
+
+static bool install_client_side(void) {
+ /* Decides when to execute enable/disable/... operations
+ * client-side rather than server-side. */
+
+ if (running_in_chroot_or_offline())
+ return true;
+
+ if (sd_booted() <= 0)
+ return true;
+
+ if (!isempty(arg_root))
+ return true;
+
+ if (arg_scope == UNIT_FILE_GLOBAL)
+ return true;
+
+ /* Unsupported environment variable, mostly for debugging purposes */
+ if (getenv_bool("SYSTEMCTL_INSTALL_CLIENT_SIDE") > 0)
+ return true;
+
+ return false;
+}
+
+static int compare_unit_info(const UnitInfo *a, const UnitInfo *b) {
+ const char *d1, *d2;
+ int r;
+
+ /* First, order by machine */
+ if (!a->machine && b->machine)
+ return -1;
+ if (a->machine && !b->machine)
+ return 1;
+ if (a->machine && b->machine) {
+ r = strcasecmp(a->machine, b->machine);
+ if (r != 0)
+ return r;
+ }
+
+ /* Second, order by unit type */
+ d1 = strrchr(a->id, '.');
+ d2 = strrchr(b->id, '.');
+ if (d1 && d2) {
+ r = strcasecmp(d1, d2);
+ if (r != 0)
+ return r;
+ }
+
+ /* Third, order by name */
+ return strcasecmp(a->id, b->id);
+}
+
+static const char* unit_type_suffix(const char *name) {
+ const char *dot;
+
+ dot = strrchr(name, '.');
+ if (!dot)
+ return "";
+
+ return dot + 1;
+}
+
+static bool output_show_unit(const UnitInfo *u, char **patterns) {
+ assert(u);
+
+ if (!strv_fnmatch_or_empty(patterns, u->id, FNM_NOESCAPE))
+ return false;
+
+ if (arg_types && !strv_find(arg_types, unit_type_suffix(u->id)))
+ return false;
+
+ if (arg_all)
+ return true;
+
+ /* Note that '--all' is not purely a state filter, but also a
+ * filter that hides units that "follow" other units (which is
+ * used for device units that appear under different names). */
+ if (!isempty(u->following))
+ return false;
+
+ if (!strv_isempty(arg_states))
+ return true;
+
+ /* By default show all units except the ones in inactive
+ * state and with no pending job */
+ if (u->job_id > 0)
+ return true;
+
+ if (streq(u->active_state, "inactive"))
+ return false;
+
+ return true;
+}
+
+static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
+ unsigned circle_len = 0, id_len, max_id_len, load_len, active_len, sub_len, job_len, desc_len, max_desc_len;
+ const UnitInfo *u;
+ unsigned n_shown = 0;
+ int job_count = 0;
+ bool full = arg_full || FLAGS_SET(arg_pager_flags, PAGER_DISABLE);
+
+ max_id_len = STRLEN("UNIT");
+ load_len = STRLEN("LOAD");
+ active_len = STRLEN("ACTIVE");
+ sub_len = STRLEN("SUB");
+ job_len = STRLEN("JOB");
+ max_desc_len = STRLEN("DESCRIPTION");
+
+ for (u = unit_infos; u < unit_infos + c; u++) {
+ max_id_len = MAX(max_id_len, strlen(u->id) + (u->machine ? strlen(u->machine)+1 : 0));
+ load_len = MAX(load_len, strlen(u->load_state));
+ active_len = MAX(active_len, strlen(u->active_state));
+ sub_len = MAX(sub_len, strlen(u->sub_state));
+ max_desc_len = MAX(max_desc_len, strlen(u->description));
+
+ if (u->job_id != 0) {
+ job_len = MAX(job_len, strlen(u->job_type));
+ job_count++;
+ }
+
+ if (!arg_no_legend &&
+ (streq(u->active_state, "failed") ||
+ STR_IN_SET(u->load_state, "error", "not-found", "bad-setting", "masked")))
+ circle_len = 2;
+ }
+
+ if (!arg_full && original_stdout_is_tty) {
+ unsigned basic_len;
+
+ id_len = MIN(max_id_len, 25u); /* as much as it needs, but at most 25 for now */
+ basic_len = circle_len + 1 + id_len + 1 + load_len + 1 + active_len + 1 + sub_len + 1;
+
+ if (job_count)
+ basic_len += job_len + 1;
+
+ if (basic_len < (unsigned) columns()) {
+ unsigned extra_len, incr;
+ extra_len = columns() - basic_len;
+
+ /* Either UNIT already got 25, or is fully satisfied.
+ * Grant up to 25 to DESC now. */
+ incr = MIN(extra_len, 25u);
+ desc_len = incr;
+ extra_len -= incr;
+
+ /* Of the remainder give as much as the ID needs to the ID, and give the rest to the
+ * description but not more than it needs. */
+ if (extra_len > 0) {
+ incr = MIN(max_id_len - id_len, extra_len);
+ id_len += incr;
+ desc_len += MIN(extra_len - incr, max_desc_len - desc_len);
+ }
+ } else
+ desc_len = 0;
+ } else {
+ id_len = max_id_len;
+ desc_len = max_desc_len;
+ }
+
+ for (u = unit_infos; u < unit_infos + c; u++) {
+ _cleanup_free_ char *e = NULL, *j = NULL;
+ const char *on_underline = "", *off_underline = "";
+ const char *on_loaded = "", *off_loaded = "";
+ const char *on_active = "", *off_active = "";
+ const char *on_circle = "", *off_circle = "";
+ const char *id;
+ bool circle = false, underline = false;
+
+ if (!n_shown && !arg_no_legend) {
+
+ if (circle_len > 0)
+ fputs(" ", stdout);
+
+ printf("%s%-*s %-*s %-*s %-*s ",
+ ansi_underline(),
+ id_len, "UNIT",
+ load_len, "LOAD",
+ active_len, "ACTIVE",
+ sub_len, "SUB");
+
+ if (job_count)
+ printf("%-*s ", job_len, "JOB");
+
+ printf("%-*.*s%s\n",
+ desc_len,
+ full ? -1 : (int) desc_len,
+ "DESCRIPTION",
+ ansi_normal());
+ }
+
+ n_shown++;
+
+ if (u + 1 < unit_infos + c &&
+ !streq(unit_type_suffix(u->id), unit_type_suffix((u + 1)->id))) {
+ on_underline = ansi_underline();
+ off_underline = ansi_normal();
+ underline = true;
+ }
+
+ if (STR_IN_SET(u->load_state, "error", "not-found", "bad-setting", "masked") && !arg_plain) {
+ on_circle = ansi_highlight_yellow();
+ off_circle = ansi_normal();
+ circle = true;
+ on_loaded = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
+ off_loaded = underline ? on_underline : ansi_normal();
+ } else if (streq(u->active_state, "failed") && !arg_plain) {
+ on_circle = ansi_highlight_red();
+ off_circle = ansi_normal();
+ circle = true;
+ on_active = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
+ off_active = underline ? on_underline : ansi_normal();
+ }
+
+ if (u->machine) {
+ j = strjoin(u->machine, ":", u->id);
+ if (!j)
+ return log_oom();
+
+ id = j;
+ } else
+ id = u->id;
+
+ if (arg_full) {
+ e = ellipsize(id, id_len, 33);
+ if (!e)
+ return log_oom();
+
+ id = e;
+ }
+
+ if (circle_len > 0)
+ printf("%s%s%s ", on_circle, circle ? special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE) : " ", off_circle);
+
+ printf("%s%s%-*s%s %s%-*s%s %s%-*s %-*s%s %-*s",
+ on_underline,
+ on_active, id_len, id, off_active,
+ on_loaded, load_len, u->load_state, off_loaded,
+ on_active, active_len, u->active_state,
+ sub_len, u->sub_state, off_active,
+ job_count ? job_len + 1 : 0, u->job_id ? u->job_type : "");
+
+ printf("%-*.*s%s\n",
+ desc_len,
+ full ? -1 : (int) desc_len,
+ u->description,
+ off_underline);
+ }
+
+ if (!arg_no_legend) {
+ const char *on, *off;
+
+ if (n_shown) {
+ puts("\n"
+ "LOAD = Reflects whether the unit definition was properly loaded.\n"
+ "ACTIVE = The high-level unit activation state, i.e. generalization of SUB.\n"
+ "SUB = The low-level unit activation state, values depend on unit type.");
+ puts(job_count ? "JOB = Pending job for the unit.\n" : "");
+ on = ansi_highlight();
+ off = ansi_normal();
+ } else {
+ on = ansi_highlight_red();
+ off = ansi_normal();
+ }
+
+ if (arg_all)
+ printf("%s%u loaded units listed.%s\n"
+ "To show all installed unit files use 'systemctl list-unit-files'.\n",
+ on, n_shown, off);
+ else
+ printf("%s%u loaded units listed.%s Pass --all to see loaded but inactive units, too.\n"
+ "To show all installed unit files use 'systemctl list-unit-files'.\n",
+ on, n_shown, off);
+ }
+
+ return 0;
+}
+
+static int get_unit_list(
+ sd_bus *bus,
+ const char *machine,
+ char **patterns,
+ UnitInfo **unit_infos,
+ int c,
+ sd_bus_message **_reply) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ size_t size = c;
+ int r;
+ UnitInfo u;
+ bool fallback = false;
+
+ assert(bus);
+ assert(unit_infos);
+ assert(_reply);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListUnitsByPatterns");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, arg_states);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, patterns);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0 && (sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD) ||
+ sd_bus_error_has_name(&error, SD_BUS_ERROR_ACCESS_DENIED))) {
+ /* Fallback to legacy ListUnitsFiltered method */
+ fallback = true;
+ log_debug_errno(r, "Failed to list units: %s Falling back to ListUnitsFiltered method.", bus_error_message(&error, r));
+ m = sd_bus_message_unref(m);
+ sd_bus_error_free(&error);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListUnitsFiltered");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, arg_states);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to list units: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = bus_parse_unit_info(reply, &u)) > 0) {
+ u.machine = machine;
+
+ if (!output_show_unit(&u, fallback ? patterns : NULL))
+ continue;
+
+ if (!GREEDY_REALLOC(*unit_infos, size, c+1))
+ return log_oom();
+
+ (*unit_infos)[c++] = u;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ *_reply = TAKE_PTR(reply);
+
+ return c;
+}
+
+static void message_set_freep(Set **set) {
+ set_free_with_destructor(*set, sd_bus_message_unref);
+}
+
+static int get_unit_list_recursive(
+ sd_bus *bus,
+ char **patterns,
+ UnitInfo **_unit_infos,
+ Set **_replies,
+ char ***_machines) {
+
+ _cleanup_free_ UnitInfo *unit_infos = NULL;
+ _cleanup_(message_set_freep) Set *replies;
+ sd_bus_message *reply;
+ int c, r;
+
+ assert(bus);
+ assert(_replies);
+ assert(_unit_infos);
+ assert(_machines);
+
+ replies = set_new(NULL);
+ if (!replies)
+ return log_oom();
+
+ c = get_unit_list(bus, NULL, patterns, &unit_infos, 0, &reply);
+ if (c < 0)
+ return c;
+
+ r = set_put(replies, reply);
+ if (r < 0) {
+ sd_bus_message_unref(reply);
+ return log_oom();
+ }
+
+ if (arg_recursive) {
+ _cleanup_strv_free_ char **machines = NULL;
+ char **i;
+
+ r = sd_get_machine_names(&machines);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get machine names: %m");
+
+ STRV_FOREACH(i, machines) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *container = NULL;
+ int k;
+
+ r = sd_bus_open_system_machine(&container, *i);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to connect to container %s, ignoring: %m", *i);
+ continue;
+ }
+
+ k = get_unit_list(container, *i, patterns, &unit_infos, c, &reply);
+ if (k < 0)
+ return k;
+
+ c = k;
+
+ r = set_put(replies, reply);
+ if (r < 0) {
+ sd_bus_message_unref(reply);
+ return log_oom();
+ }
+ }
+
+ *_machines = TAKE_PTR(machines);
+ } else
+ *_machines = NULL;
+
+ *_unit_infos = TAKE_PTR(unit_infos);
+
+ *_replies = TAKE_PTR(replies);
+
+ return c;
+}
+
+static int list_units(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ UnitInfo *unit_infos = NULL;
+ _cleanup_(message_set_freep) Set *replies = NULL;
+ _cleanup_strv_free_ char **machines = NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = get_unit_list_recursive(bus, strv_skip(argv, 1), &unit_infos, &replies, &machines);
+ if (r < 0)
+ return r;
+
+ typesafe_qsort(unit_infos, r, compare_unit_info);
+ return output_units_list(unit_infos, r);
+}
+
+static int get_triggered_units(
+ sd_bus *bus,
+ const char* path,
+ char*** ret) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(ret);
+
+ r = sd_bus_get_property_strv(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "Triggers",
+ &error,
+ ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine triggers: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int get_listening(
+ sd_bus *bus,
+ const char* unit_path,
+ char*** listening) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *type, *path;
+ int r, n = 0;
+
+ r = sd_bus_get_property(
+ bus,
+ "org.freedesktop.systemd1",
+ unit_path,
+ "org.freedesktop.systemd1.Socket",
+ "Listen",
+ &error,
+ &reply,
+ "a(ss)");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get list of listening sockets: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(ss)", &type, &path)) > 0) {
+
+ r = strv_extend(listening, type);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend(listening, path);
+ if (r < 0)
+ return log_oom();
+
+ n++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return n;
+}
+
+struct socket_info {
+ const char *machine;
+ const char* id;
+
+ char* type;
+ char* path;
+
+ /* Note: triggered is a list here, although it almost certainly
+ * will always be one unit. Nevertheless, dbus API allows for multiple
+ * values, so let's follow that. */
+ char** triggered;
+
+ /* The strv above is shared. free is set only in the first one. */
+ bool own_triggered;
+};
+
+static int socket_info_compare(const struct socket_info *a, const struct socket_info *b) {
+ int r;
+
+ assert(a);
+ assert(b);
+
+ if (!a->machine && b->machine)
+ return -1;
+ if (a->machine && !b->machine)
+ return 1;
+ if (a->machine && b->machine) {
+ r = strcasecmp(a->machine, b->machine);
+ if (r != 0)
+ return r;
+ }
+
+ r = strcmp(a->path, b->path);
+ if (r == 0)
+ r = strcmp(a->type, b->type);
+
+ return r;
+}
+
+static int output_sockets_list(struct socket_info *socket_infos, unsigned cs) {
+ struct socket_info *s;
+ unsigned pathlen = STRLEN("LISTEN"),
+ typelen = STRLEN("TYPE") * arg_show_types,
+ socklen = STRLEN("UNIT"),
+ servlen = STRLEN("ACTIVATES");
+ const char *on, *off;
+
+ for (s = socket_infos; s < socket_infos + cs; s++) {
+ unsigned tmp = 0;
+ char **a;
+
+ socklen = MAX(socklen, strlen(s->id));
+ if (arg_show_types)
+ typelen = MAX(typelen, strlen(s->type));
+ pathlen = MAX(pathlen, strlen(s->path) + (s->machine ? strlen(s->machine)+1 : 0));
+
+ STRV_FOREACH(a, s->triggered)
+ tmp += strlen(*a) + 2*(a != s->triggered);
+ servlen = MAX(servlen, tmp);
+ }
+
+ if (cs) {
+ if (!arg_no_legend)
+ printf("%-*s %-*.*s%-*s %s\n",
+ pathlen, "LISTEN",
+ typelen + arg_show_types, typelen + arg_show_types, "TYPE ",
+ socklen, "UNIT",
+ "ACTIVATES");
+
+ for (s = socket_infos; s < socket_infos + cs; s++) {
+ _cleanup_free_ char *j = NULL;
+ const char *path;
+ char **a;
+
+ if (s->machine) {
+ j = strjoin(s->machine, ":", s->path);
+ if (!j)
+ return log_oom();
+ path = j;
+ } else
+ path = s->path;
+
+ if (arg_show_types)
+ printf("%-*s %-*s %-*s",
+ pathlen, path, typelen, s->type, socklen, s->id);
+ else
+ printf("%-*s %-*s",
+ pathlen, path, socklen, s->id);
+ STRV_FOREACH(a, s->triggered)
+ printf("%s %s",
+ a == s->triggered ? "" : ",", *a);
+ printf("\n");
+ }
+
+ on = ansi_highlight();
+ off = ansi_normal();
+ if (!arg_no_legend)
+ printf("\n");
+ } else {
+ on = ansi_highlight_red();
+ off = ansi_normal();
+ }
+
+ if (!arg_no_legend) {
+ printf("%s%u sockets listed.%s\n", on, cs, off);
+ if (!arg_all)
+ printf("Pass --all to see loaded but inactive sockets, too.\n");
+ }
+
+ return 0;
+}
+
+static int list_sockets(int argc, char *argv[], void *userdata) {
+ _cleanup_(message_set_freep) Set *replies = NULL;
+ _cleanup_strv_free_ char **machines = NULL;
+ _cleanup_free_ UnitInfo *unit_infos = NULL;
+ _cleanup_free_ struct socket_info *socket_infos = NULL;
+ const UnitInfo *u;
+ struct socket_info *s;
+ unsigned cs = 0;
+ size_t size = 0;
+ int r = 0, n;
+ sd_bus *bus;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ n = get_unit_list_recursive(bus, strv_skip(argv, 1), &unit_infos, &replies, &machines);
+ if (n < 0)
+ return n;
+
+ for (u = unit_infos; u < unit_infos + n; u++) {
+ _cleanup_strv_free_ char **listening = NULL, **triggered = NULL;
+ int i, c;
+
+ if (!endswith(u->id, ".socket"))
+ continue;
+
+ r = get_triggered_units(bus, u->unit_path, &triggered);
+ if (r < 0)
+ goto cleanup;
+
+ c = get_listening(bus, u->unit_path, &listening);
+ if (c < 0) {
+ r = c;
+ goto cleanup;
+ }
+
+ if (!GREEDY_REALLOC(socket_infos, size, cs + c)) {
+ r = log_oom();
+ goto cleanup;
+ }
+
+ for (i = 0; i < c; i++)
+ socket_infos[cs + i] = (struct socket_info) {
+ .machine = u->machine,
+ .id = u->id,
+ .type = listening[i*2],
+ .path = listening[i*2 + 1],
+ .triggered = triggered,
+ .own_triggered = i==0,
+ };
+
+ /* from this point on we will cleanup those socket_infos */
+ cs += c;
+ free(listening);
+ listening = triggered = NULL; /* avoid cleanup */
+ }
+
+ typesafe_qsort(socket_infos, cs, socket_info_compare);
+
+ output_sockets_list(socket_infos, cs);
+
+ cleanup:
+ assert(cs == 0 || socket_infos);
+ for (s = socket_infos; s < socket_infos + cs; s++) {
+ free(s->type);
+ free(s->path);
+ if (s->own_triggered)
+ strv_free(s->triggered);
+ }
+
+ return r;
+}
+
+static int get_next_elapse(
+ sd_bus *bus,
+ const char *path,
+ dual_timestamp *next) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ dual_timestamp t;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(next);
+
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Timer",
+ "NextElapseUSecMonotonic",
+ &error,
+ 't',
+ &t.monotonic);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get next elapse time: %s", bus_error_message(&error, r));
+
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Timer",
+ "NextElapseUSecRealtime",
+ &error,
+ 't',
+ &t.realtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get next elapse time: %s", bus_error_message(&error, r));
+
+ *next = t;
+ return 0;
+}
+
+static int get_last_trigger(
+ sd_bus *bus,
+ const char *path,
+ usec_t *last) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(last);
+
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Timer",
+ "LastTriggerUSec",
+ &error,
+ 't',
+ last);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get last trigger time: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+struct timer_info {
+ const char* machine;
+ const char* id;
+ usec_t next_elapse;
+ usec_t last_trigger;
+ char** triggered;
+};
+
+static int timer_info_compare(const struct timer_info *a, const struct timer_info *b) {
+ int r;
+
+ assert(a);
+ assert(b);
+
+ if (!a->machine && b->machine)
+ return -1;
+ if (a->machine && !b->machine)
+ return 1;
+ if (a->machine && b->machine) {
+ r = strcasecmp(a->machine, b->machine);
+ if (r != 0)
+ return r;
+ }
+
+ r = CMP(a->next_elapse, b->next_elapse);
+ if (r != 0)
+ return r;
+
+ return strcmp(a->id, b->id);
+}
+
+static int output_timers_list(struct timer_info *timer_infos, unsigned n) {
+ struct timer_info *t;
+ unsigned
+ nextlen = STRLEN("NEXT"),
+ leftlen = STRLEN("LEFT"),
+ lastlen = STRLEN("LAST"),
+ passedlen = STRLEN("PASSED"),
+ unitlen = STRLEN("UNIT"),
+ activatelen = STRLEN("ACTIVATES");
+
+ const char *on, *off;
+
+ assert(timer_infos || n == 0);
+
+ for (t = timer_infos; t < timer_infos + n; t++) {
+ unsigned ul = 0;
+ char **a;
+
+ if (t->next_elapse > 0) {
+ char tstamp[FORMAT_TIMESTAMP_MAX] = "", trel[FORMAT_TIMESTAMP_RELATIVE_MAX] = "";
+
+ format_timestamp(tstamp, sizeof(tstamp), t->next_elapse);
+ nextlen = MAX(nextlen, strlen(tstamp) + 1);
+
+ format_timestamp_relative(trel, sizeof(trel), t->next_elapse);
+ leftlen = MAX(leftlen, strlen(trel));
+ }
+
+ if (t->last_trigger > 0) {
+ char tstamp[FORMAT_TIMESTAMP_MAX] = "", trel[FORMAT_TIMESTAMP_RELATIVE_MAX] = "";
+
+ format_timestamp(tstamp, sizeof(tstamp), t->last_trigger);
+ lastlen = MAX(lastlen, strlen(tstamp) + 1);
+
+ format_timestamp_relative(trel, sizeof(trel), t->last_trigger);
+ passedlen = MAX(passedlen, strlen(trel));
+ }
+
+ unitlen = MAX(unitlen, strlen(t->id) + (t->machine ? strlen(t->machine)+1 : 0));
+
+ STRV_FOREACH(a, t->triggered)
+ ul += strlen(*a) + 2*(a != t->triggered);
+
+ activatelen = MAX(activatelen, ul);
+ }
+
+ if (n > 0) {
+ if (!arg_no_legend)
+ printf("%-*s %-*s %-*s %-*s %-*s %s\n",
+ nextlen, "NEXT",
+ leftlen, "LEFT",
+ lastlen, "LAST",
+ passedlen, "PASSED",
+ unitlen, "UNIT",
+ "ACTIVATES");
+
+ for (t = timer_infos; t < timer_infos + n; t++) {
+ _cleanup_free_ char *j = NULL;
+ const char *unit;
+ char tstamp1[FORMAT_TIMESTAMP_MAX] = "n/a", trel1[FORMAT_TIMESTAMP_RELATIVE_MAX] = "n/a";
+ char tstamp2[FORMAT_TIMESTAMP_MAX] = "n/a", trel2[FORMAT_TIMESTAMP_RELATIVE_MAX] = "n/a";
+ char **a;
+
+ format_timestamp(tstamp1, sizeof(tstamp1), t->next_elapse);
+ format_timestamp_relative(trel1, sizeof(trel1), t->next_elapse);
+
+ format_timestamp(tstamp2, sizeof(tstamp2), t->last_trigger);
+ format_timestamp_relative(trel2, sizeof(trel2), t->last_trigger);
+
+ if (t->machine) {
+ j = strjoin(t->machine, ":", t->id);
+ if (!j)
+ return log_oom();
+ unit = j;
+ } else
+ unit = t->id;
+
+ printf("%-*s %-*s %-*s %-*s %-*s",
+ nextlen, tstamp1, leftlen, trel1, lastlen, tstamp2, passedlen, trel2, unitlen, unit);
+
+ STRV_FOREACH(a, t->triggered)
+ printf("%s %s",
+ a == t->triggered ? "" : ",", *a);
+ printf("\n");
+ }
+
+ on = ansi_highlight();
+ off = ansi_normal();
+ if (!arg_no_legend)
+ printf("\n");
+ } else {
+ on = ansi_highlight_red();
+ off = ansi_normal();
+ }
+
+ if (!arg_no_legend) {
+ printf("%s%u timers listed.%s\n", on, n, off);
+ if (!arg_all)
+ printf("Pass --all to see loaded but inactive timers, too.\n");
+ }
+
+ return 0;
+}
+
+static usec_t calc_next_elapse(dual_timestamp *nw, dual_timestamp *next) {
+ usec_t next_elapse;
+
+ assert(nw);
+ assert(next);
+
+ if (next->monotonic != USEC_INFINITY && next->monotonic > 0) {
+ usec_t converted;
+
+ if (next->monotonic > nw->monotonic)
+ converted = nw->realtime + (next->monotonic - nw->monotonic);
+ else
+ converted = nw->realtime - (nw->monotonic - next->monotonic);
+
+ if (next->realtime != USEC_INFINITY && next->realtime > 0)
+ next_elapse = MIN(converted, next->realtime);
+ else
+ next_elapse = converted;
+
+ } else
+ next_elapse = next->realtime;
+
+ return next_elapse;
+}
+
+static int list_timers(int argc, char *argv[], void *userdata) {
+ _cleanup_(message_set_freep) Set *replies = NULL;
+ _cleanup_strv_free_ char **machines = NULL;
+ _cleanup_free_ struct timer_info *timer_infos = NULL;
+ _cleanup_free_ UnitInfo *unit_infos = NULL;
+ struct timer_info *t;
+ const UnitInfo *u;
+ size_t size = 0;
+ int n, c = 0;
+ dual_timestamp nw;
+ sd_bus *bus;
+ int r = 0;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ n = get_unit_list_recursive(bus, strv_skip(argv, 1), &unit_infos, &replies, &machines);
+ if (n < 0)
+ return n;
+
+ dual_timestamp_get(&nw);
+
+ for (u = unit_infos; u < unit_infos + n; u++) {
+ _cleanup_strv_free_ char **triggered = NULL;
+ dual_timestamp next = DUAL_TIMESTAMP_NULL;
+ usec_t m, last = 0;
+
+ if (!endswith(u->id, ".timer"))
+ continue;
+
+ r = get_triggered_units(bus, u->unit_path, &triggered);
+ if (r < 0)
+ goto cleanup;
+
+ r = get_next_elapse(bus, u->unit_path, &next);
+ if (r < 0)
+ goto cleanup;
+
+ get_last_trigger(bus, u->unit_path, &last);
+
+ if (!GREEDY_REALLOC(timer_infos, size, c+1)) {
+ r = log_oom();
+ goto cleanup;
+ }
+
+ m = calc_next_elapse(&nw, &next);
+
+ timer_infos[c++] = (struct timer_info) {
+ .machine = u->machine,
+ .id = u->id,
+ .next_elapse = m,
+ .last_trigger = last,
+ .triggered = TAKE_PTR(triggered),
+ };
+ }
+
+ typesafe_qsort(timer_infos, c, timer_info_compare);
+
+ output_timers_list(timer_infos, c);
+
+ cleanup:
+ for (t = timer_infos; t < timer_infos + c; t++)
+ strv_free(t->triggered);
+
+ return r;
+}
+
+static int compare_unit_file_list(const UnitFileList *a, const UnitFileList *b) {
+ const char *d1, *d2;
+
+ d1 = strrchr(a->path, '.');
+ d2 = strrchr(b->path, '.');
+
+ if (d1 && d2) {
+ int r;
+
+ r = strcasecmp(d1, d2);
+ if (r != 0)
+ return r;
+ }
+
+ return strcasecmp(basename(a->path), basename(b->path));
+}
+
+static bool output_show_unit_file(const UnitFileList *u, char **states, char **patterns) {
+ assert(u);
+
+ if (!strv_fnmatch_or_empty(patterns, basename(u->path), FNM_NOESCAPE))
+ return false;
+
+ if (!strv_isempty(arg_types)) {
+ const char *dot;
+
+ dot = strrchr(u->path, '.');
+ if (!dot)
+ return false;
+
+ if (!strv_find(arg_types, dot+1))
+ return false;
+ }
+
+ if (!strv_isempty(states) &&
+ !strv_find(states, unit_file_state_to_string(u->state)))
+ return false;
+
+ return true;
+}
+
+static void output_unit_file_list(const UnitFileList *units, unsigned c) {
+ unsigned max_id_len, id_cols, state_cols;
+ const UnitFileList *u;
+
+ max_id_len = STRLEN("UNIT FILE");
+ state_cols = STRLEN("STATE");
+
+ for (u = units; u < units + c; u++) {
+ max_id_len = MAX(max_id_len, strlen(basename(u->path)));
+ state_cols = MAX(state_cols, strlen(unit_file_state_to_string(u->state)));
+ }
+
+ if (!arg_full) {
+ unsigned basic_cols;
+
+ id_cols = MIN(max_id_len, 25u);
+ basic_cols = 1 + id_cols + state_cols;
+ if (basic_cols < (unsigned) columns())
+ id_cols += MIN(columns() - basic_cols, max_id_len - id_cols);
+ } else
+ id_cols = max_id_len;
+
+ if (!arg_no_legend && c > 0)
+ printf("%s%-*s %-*s%s\n",
+ ansi_underline(),
+ id_cols, "UNIT FILE",
+ state_cols, "STATE",
+ ansi_normal());
+
+ for (u = units; u < units + c; u++) {
+ const char *on_underline = NULL, *on_color = NULL, *off = NULL, *id;
+ _cleanup_free_ char *e = NULL;
+ bool underline;
+
+ underline = u + 1 < units + c &&
+ !streq(unit_type_suffix(u->path), unit_type_suffix((u + 1)->path));
+
+ if (underline)
+ on_underline = ansi_underline();
+
+ if (IN_SET(u->state,
+ UNIT_FILE_MASKED,
+ UNIT_FILE_MASKED_RUNTIME,
+ UNIT_FILE_DISABLED,
+ UNIT_FILE_BAD))
+ on_color = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
+ else if (u->state == UNIT_FILE_ENABLED)
+ on_color = underline ? ansi_highlight_green_underline() : ansi_highlight_green();
+
+ if (on_underline || on_color)
+ off = ansi_normal();
+
+ id = basename(u->path);
+
+ e = arg_full ? NULL : ellipsize(id, id_cols, 33);
+
+ printf("%s%-*s %s%-*s%s\n",
+ strempty(on_underline),
+ id_cols, e ? e : id,
+ strempty(on_color), state_cols, unit_file_state_to_string(u->state), strempty(off));
+ }
+
+ if (!arg_no_legend)
+ printf("\n%u unit files listed.\n", c);
+}
+
+static int list_unit_files(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ UnitFileList *units = NULL;
+ UnitFileList *unit;
+ size_t size = 0;
+ unsigned c = 0;
+ const char *state;
+ char *path;
+ int r;
+ bool fallback = false;
+
+ if (install_client_side()) {
+ Hashmap *h;
+ UnitFileList *u;
+ Iterator i;
+ unsigned n_units;
+
+ h = hashmap_new(&string_hash_ops);
+ if (!h)
+ return log_oom();
+
+ r = unit_file_get_list(arg_scope, arg_root, h, arg_states, strv_skip(argv, 1));
+ if (r < 0) {
+ unit_file_list_free(h);
+ return log_error_errno(r, "Failed to get unit file list: %m");
+ }
+
+ n_units = hashmap_size(h);
+
+ units = new(UnitFileList, n_units ?: 1); /* avoid malloc(0) */
+ if (!units) {
+ unit_file_list_free(h);
+ return log_oom();
+ }
+
+ HASHMAP_FOREACH(u, h, i) {
+ if (!output_show_unit_file(u, NULL, NULL))
+ continue;
+
+ units[c++] = *u;
+ free(u);
+ }
+
+ assert(c <= n_units);
+ hashmap_free(h);
+
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListUnitFilesByPatterns");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, arg_states);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, strv_skip(argv, 1));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0 && sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD)) {
+ /* Fallback to legacy ListUnitFiles method */
+ fallback = true;
+ log_debug_errno(r, "Failed to list unit files: %s Falling back to ListUnitsFiles method.", bus_error_message(&error, r));
+ m = sd_bus_message_unref(m);
+ sd_bus_error_free(&error);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListUnitFiles");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to list unit files: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(ss)", &path, &state)) > 0) {
+
+ if (!GREEDY_REALLOC(units, size, c + 1))
+ return log_oom();
+
+ units[c] = (struct UnitFileList) {
+ path,
+ unit_file_state_from_string(state)
+ };
+
+ if (output_show_unit_file(&units[c],
+ fallback ? arg_states : NULL,
+ fallback ? strv_skip(argv, 1) : NULL))
+ c++;
+
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ typesafe_qsort(units, c, compare_unit_file_list);
+ output_unit_file_list(units, c);
+
+ if (install_client_side())
+ for (unit = units; unit < units + c; unit++)
+ free(unit->path);
+
+ return 0;
+}
+
+static int list_dependencies_print(const char *name, int level, unsigned branches, bool last) {
+ _cleanup_free_ char *n = NULL;
+ size_t max_len = MAX(columns(),20u);
+ size_t len = 0;
+ int i;
+
+ if (!arg_plain) {
+
+ for (i = level - 1; i >= 0; i--) {
+ len += 2;
+ if (len > max_len - 3 && !arg_full) {
+ printf("%s...\n",max_len % 2 ? "" : " ");
+ return 0;
+ }
+ printf("%s", special_glyph(branches & (1 << i) ? SPECIAL_GLYPH_TREE_VERTICAL : SPECIAL_GLYPH_TREE_SPACE));
+ }
+ len += 2;
+
+ if (len > max_len - 3 && !arg_full) {
+ printf("%s...\n",max_len % 2 ? "" : " ");
+ return 0;
+ }
+
+ printf("%s", special_glyph(last ? SPECIAL_GLYPH_TREE_RIGHT : SPECIAL_GLYPH_TREE_BRANCH));
+ }
+
+ if (arg_full) {
+ printf("%s\n", name);
+ return 0;
+ }
+
+ n = ellipsize(name, max_len-len, 100);
+ if (!n)
+ return log_oom();
+
+ printf("%s\n", n);
+ return 0;
+}
+
+static int list_dependencies_get_dependencies(sd_bus *bus, const char *name, char ***deps) {
+ struct DependencyStatusInfo {
+ char **dep[5];
+ } info = {};
+
+ static const struct bus_properties_map map[_DEPENDENCY_MAX][6] = {
+ [DEPENDENCY_FORWARD] = {
+ { "Requires", "as", NULL, offsetof(struct DependencyStatusInfo, dep[0]) },
+ { "Requisite", "as", NULL, offsetof(struct DependencyStatusInfo, dep[1]) },
+ { "Wants", "as", NULL, offsetof(struct DependencyStatusInfo, dep[2]) },
+ { "ConsistsOf", "as", NULL, offsetof(struct DependencyStatusInfo, dep[3]) },
+ { "BindsTo", "as", NULL, offsetof(struct DependencyStatusInfo, dep[4]) },
+ {}
+ },
+ [DEPENDENCY_REVERSE] = {
+ { "RequiredBy", "as", NULL, offsetof(struct DependencyStatusInfo, dep[0]) },
+ { "RequisiteOf", "as", NULL, offsetof(struct DependencyStatusInfo, dep[1]) },
+ { "WantedBy", "as", NULL, offsetof(struct DependencyStatusInfo, dep[2]) },
+ { "PartOf", "as", NULL, offsetof(struct DependencyStatusInfo, dep[3]) },
+ { "BoundBy", "as", NULL, offsetof(struct DependencyStatusInfo, dep[4]) },
+ {}
+ },
+ [DEPENDENCY_AFTER] = {
+ { "After", "as", NULL, offsetof(struct DependencyStatusInfo, dep[0]) },
+ {}
+ },
+ [DEPENDENCY_BEFORE] = {
+ { "Before", "as", NULL, offsetof(struct DependencyStatusInfo, dep[0]) },
+ {}
+ },
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_strv_free_ char **ret = NULL;
+ _cleanup_free_ char *path = NULL;
+ int i, r;
+
+ assert(bus);
+ assert(name);
+ assert(deps);
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return log_oom();
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.systemd1",
+ path,
+ map[arg_dependency],
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get properties of %s: %s", name, bus_error_message(&error, r));
+
+ if (IN_SET(arg_dependency, DEPENDENCY_AFTER, DEPENDENCY_BEFORE)) {
+ *deps = info.dep[0];
+ return 0;
+ }
+
+ for (i = 0; i < 5; i++) {
+ r = strv_extend_strv(&ret, info.dep[i], true);
+ if (r < 0)
+ return log_oom();
+ info.dep[i] = strv_free(info.dep[i]);
+ }
+
+ *deps = TAKE_PTR(ret);
+
+ return 0;
+}
+
+static int list_dependencies_compare(char * const *a, char * const *b) {
+ if (unit_name_to_type(*a) == UNIT_TARGET && unit_name_to_type(*b) != UNIT_TARGET)
+ return 1;
+ if (unit_name_to_type(*a) != UNIT_TARGET && unit_name_to_type(*b) == UNIT_TARGET)
+ return -1;
+
+ return strcasecmp(*a, *b);
+}
+
+static int list_dependencies_one(
+ sd_bus *bus,
+ const char *name,
+ int level,
+ char ***units,
+ unsigned branches) {
+
+ _cleanup_strv_free_ char **deps = NULL;
+ char **c;
+ int r = 0;
+
+ assert(bus);
+ assert(name);
+ assert(units);
+
+ r = strv_extend(units, name);
+ if (r < 0)
+ return log_oom();
+
+ r = list_dependencies_get_dependencies(bus, name, &deps);
+ if (r < 0)
+ return r;
+
+ typesafe_qsort(deps, strv_length(deps), list_dependencies_compare);
+
+ STRV_FOREACH(c, deps) {
+ if (strv_contains(*units, *c)) {
+ if (!arg_plain) {
+ printf(" ");
+ r = list_dependencies_print("...", level + 1, (branches << 1) | (c[1] == NULL ? 0 : 1), 1);
+ if (r < 0)
+ return r;
+ }
+ continue;
+ }
+
+ if (arg_plain)
+ printf(" ");
+ else {
+ UnitActiveState active_state = _UNIT_ACTIVE_STATE_INVALID;
+ const char *on;
+
+ (void) get_state_one_unit(bus, *c, &active_state);
+
+ switch (active_state) {
+ case UNIT_ACTIVE:
+ case UNIT_RELOADING:
+ case UNIT_ACTIVATING:
+ on = ansi_highlight_green();
+ break;
+
+ case UNIT_INACTIVE:
+ case UNIT_DEACTIVATING:
+ on = ansi_normal();
+ break;
+
+ default:
+ on = ansi_highlight_red();
+ break;
+ }
+
+ printf("%s%s%s ", on, special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE), ansi_normal());
+ }
+
+ r = list_dependencies_print(*c, level, branches, c[1] == NULL);
+ if (r < 0)
+ return r;
+
+ if (arg_all || unit_name_to_type(*c) == UNIT_TARGET) {
+ r = list_dependencies_one(bus, *c, level + 1, units, (branches << 1) | (c[1] == NULL ? 0 : 1));
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!arg_plain)
+ strv_remove(*units, name);
+
+ return 0;
+}
+
+static int list_dependencies(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **units = NULL;
+ _cleanup_free_ char *unit = NULL;
+ const char *u;
+ sd_bus *bus;
+ int r;
+
+ if (argv[1]) {
+ r = unit_name_mangle(argv[1], arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ u = unit;
+ } else
+ u = SPECIAL_DEFAULT_TARGET;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ puts(u);
+
+ return list_dependencies_one(bus, u, 0, &units, 0);
+}
+
+struct machine_info {
+ bool is_host;
+ char *name;
+ char *state;
+ char *control_group;
+ uint32_t n_failed_units;
+ uint32_t n_jobs;
+ usec_t timestamp;
+};
+
+static const struct bus_properties_map machine_info_property_map[] = {
+ { "SystemState", "s", NULL, offsetof(struct machine_info, state) },
+ { "NJobs", "u", NULL, offsetof(struct machine_info, n_jobs) },
+ { "NFailedUnits", "u", NULL, offsetof(struct machine_info, n_failed_units) },
+ { "ControlGroup", "s", NULL, offsetof(struct machine_info, control_group) },
+ { "UserspaceTimestamp", "t", NULL, offsetof(struct machine_info, timestamp) },
+ {}
+};
+
+static void machine_info_clear(struct machine_info *info) {
+ assert(info);
+
+ free(info->name);
+ free(info->state);
+ free(info->control_group);
+ zero(*info);
+}
+
+static void free_machines_list(struct machine_info *machine_infos, int n) {
+ int i;
+
+ if (!machine_infos)
+ return;
+
+ for (i = 0; i < n; i++)
+ machine_info_clear(&machine_infos[i]);
+
+ free(machine_infos);
+}
+
+static int compare_machine_info(const struct machine_info *a, const struct machine_info *b) {
+ int r;
+
+ r = CMP(b->is_host, a->is_host);
+ if (r != 0)
+ return r;
+
+ return strcasecmp(a->name, b->name);
+}
+
+static int get_machine_properties(sd_bus *bus, struct machine_info *mi) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *container = NULL;
+ int r;
+
+ assert(mi);
+
+ if (!bus) {
+ r = sd_bus_open_system_machine(&container, mi->name);
+ if (r < 0)
+ return r;
+
+ bus = container;
+ }
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ machine_info_property_map,
+ BUS_MAP_STRDUP,
+ NULL,
+ NULL,
+ mi);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static bool output_show_machine(const char *name, char **patterns) {
+ return strv_fnmatch_or_empty(patterns, name, FNM_NOESCAPE);
+}
+
+static int get_machine_list(
+ sd_bus *bus,
+ struct machine_info **_machine_infos,
+ char **patterns) {
+
+ struct machine_info *machine_infos = NULL;
+ _cleanup_strv_free_ char **m = NULL;
+ _cleanup_free_ char *hn = NULL;
+ size_t sz = 0;
+ char **i;
+ int c = 0, r;
+
+ hn = gethostname_malloc();
+ if (!hn)
+ return log_oom();
+
+ if (output_show_machine(hn, patterns)) {
+ if (!GREEDY_REALLOC0(machine_infos, sz, c+1))
+ return log_oom();
+
+ machine_infos[c].is_host = true;
+ machine_infos[c].name = TAKE_PTR(hn);
+
+ (void) get_machine_properties(bus, &machine_infos[c]);
+ c++;
+ }
+
+ r = sd_get_machine_names(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get machine list: %m");
+
+ STRV_FOREACH(i, m) {
+ _cleanup_free_ char *class = NULL;
+
+ if (!output_show_machine(*i, patterns))
+ continue;
+
+ sd_machine_get_class(*i, &class);
+ if (!streq_ptr(class, "container"))
+ continue;
+
+ if (!GREEDY_REALLOC0(machine_infos, sz, c+1)) {
+ free_machines_list(machine_infos, c);
+ return log_oom();
+ }
+
+ machine_infos[c].is_host = false;
+ machine_infos[c].name = strdup(*i);
+ if (!machine_infos[c].name) {
+ free_machines_list(machine_infos, c);
+ return log_oom();
+ }
+
+ (void) get_machine_properties(NULL, &machine_infos[c]);
+ c++;
+ }
+
+ *_machine_infos = machine_infos;
+ return c;
+}
+
+static void output_machines_list(struct machine_info *machine_infos, unsigned n) {
+ struct machine_info *m;
+ unsigned
+ circle_len = 0,
+ namelen = STRLEN("NAME"),
+ statelen = STRLEN("STATE"),
+ failedlen = STRLEN("FAILED"),
+ jobslen = STRLEN("JOBS");
+
+ assert(machine_infos || n == 0);
+
+ for (m = machine_infos; m < machine_infos + n; m++) {
+ namelen = MAX(namelen,
+ strlen(m->name) + (m->is_host ? STRLEN(" (host)") : 0));
+ statelen = MAX(statelen, strlen_ptr(m->state));
+ failedlen = MAX(failedlen, DECIMAL_STR_WIDTH(m->n_failed_units));
+ jobslen = MAX(jobslen, DECIMAL_STR_WIDTH(m->n_jobs));
+
+ if (!arg_plain && !streq_ptr(m->state, "running"))
+ circle_len = 2;
+ }
+
+ if (!arg_no_legend) {
+ if (circle_len > 0)
+ fputs(" ", stdout);
+
+ printf("%-*s %-*s %-*s %-*s\n",
+ namelen, "NAME",
+ statelen, "STATE",
+ failedlen, "FAILED",
+ jobslen, "JOBS");
+ }
+
+ for (m = machine_infos; m < machine_infos + n; m++) {
+ const char *on_state = "", *off_state = "";
+ const char *on_failed = "", *off_failed = "";
+ bool circle = false;
+
+ if (streq_ptr(m->state, "degraded")) {
+ on_state = ansi_highlight_red();
+ off_state = ansi_normal();
+ circle = true;
+ } else if (!streq_ptr(m->state, "running")) {
+ on_state = ansi_highlight_yellow();
+ off_state = ansi_normal();
+ circle = true;
+ }
+
+ if (m->n_failed_units > 0) {
+ on_failed = ansi_highlight_red();
+ off_failed = ansi_normal();
+ } else
+ on_failed = off_failed = "";
+
+ if (circle_len > 0)
+ printf("%s%s%s ", on_state, circle ? special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE) : " ", off_state);
+
+ if (m->is_host)
+ printf("%-*s (host) %s%-*s%s %s%*" PRIu32 "%s %*" PRIu32 "\n",
+ (int) (namelen - (STRLEN(" (host)"))),
+ strna(m->name),
+ on_state, statelen, strna(m->state), off_state,
+ on_failed, failedlen, m->n_failed_units, off_failed,
+ jobslen, m->n_jobs);
+ else
+ printf("%-*s %s%-*s%s %s%*" PRIu32 "%s %*" PRIu32 "\n",
+ namelen, strna(m->name),
+ on_state, statelen, strna(m->state), off_state,
+ on_failed, failedlen, m->n_failed_units, off_failed,
+ jobslen, m->n_jobs);
+ }
+
+ if (!arg_no_legend)
+ printf("\n%u machines listed.\n", n);
+}
+
+static int list_machines(int argc, char *argv[], void *userdata) {
+ struct machine_info *machine_infos = NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = get_machine_list(bus, &machine_infos, strv_skip(argv, 1));
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ typesafe_qsort(machine_infos, r, compare_machine_info);
+ output_machines_list(machine_infos, r);
+ free_machines_list(machine_infos, r);
+
+ return 0;
+}
+
+static int get_default(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *_path = NULL;
+ const char *path;
+ int r;
+
+ if (install_client_side()) {
+ r = unit_file_get_default(arg_scope, arg_root, &_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default target: %m");
+ path = _path;
+
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "GetDefaultTarget",
+ &error,
+ &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default target: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ if (path)
+ printf("%s\n", path);
+
+ return 0;
+}
+
+static int set_default(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *unit = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ int r;
+
+ assert(argc >= 2);
+ assert(argv);
+
+ r = unit_name_mangle_with_suffix(argv[1], arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, ".target", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ if (install_client_side()) {
+ r = unit_file_set_default(arg_scope, UNIT_FILE_FORCE, arg_root, unit, &changes, &n_changes);
+ unit_file_dump_changes(r, "set default", changes, n_changes, arg_quiet);
+
+ if (r > 0)
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ sd_bus *bus;
+
+ polkit_agent_open_maybe();
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "SetDefaultTarget",
+ &error,
+ &reply,
+ "sb", unit, 1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set default target: %s", bus_error_message(&error, r));
+
+ r = bus_deserialize_and_dump_unit_file_changes(reply, arg_quiet, &changes, &n_changes);
+ if (r < 0)
+ goto finish;
+
+ /* Try to reload if enabled */
+ if (!arg_no_reload)
+ r = daemon_reload(argc, argv, userdata);
+ else
+ r = 0;
+ }
+
+finish:
+ unit_file_changes_free(changes, n_changes);
+
+ return r;
+}
+
+static int output_waiting_jobs(sd_bus *bus, uint32_t id, const char *method, const char *prefix) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *name, *type;
+ uint32_t other_id;
+ int r;
+
+ assert(bus);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ method,
+ &error,
+ &reply,
+ "u", id);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get waiting jobs for job %" PRIu32, id);
+
+ r = sd_bus_message_enter_container(reply, 'a', "(usssoo)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(usssoo)", &other_id, &name, &type, NULL, NULL, NULL)) > 0)
+ printf("%s %u (%s/%s)\n", prefix, other_id, name, type);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+struct job_info {
+ uint32_t id;
+ const char *name, *type, *state;
+};
+
+static void output_jobs_list(sd_bus *bus, const struct job_info* jobs, unsigned n, bool skipped) {
+ unsigned id_len, unit_len, type_len, state_len;
+ const struct job_info *j;
+ const char *on, *off;
+ bool shorten = false;
+
+ assert(n == 0 || jobs);
+
+ if (n == 0) {
+ if (!arg_no_legend) {
+ on = ansi_highlight_green();
+ off = ansi_normal();
+
+ printf("%sNo jobs %s.%s\n", on, skipped ? "listed" : "running", off);
+ }
+ return;
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ id_len = STRLEN("JOB");
+ unit_len = STRLEN("UNIT");
+ type_len = STRLEN("TYPE");
+ state_len = STRLEN("STATE");
+
+ for (j = jobs; j < jobs + n; j++) {
+ uint32_t id = j->id;
+ assert(j->name && j->type && j->state);
+
+ id_len = MAX(id_len, DECIMAL_STR_WIDTH(id));
+ unit_len = MAX(unit_len, strlen(j->name));
+ type_len = MAX(type_len, strlen(j->type));
+ state_len = MAX(state_len, strlen(j->state));
+ }
+
+ if (!arg_full && id_len + 1 + unit_len + type_len + 1 + state_len > columns()) {
+ unit_len = MAX(33u, columns() - id_len - type_len - state_len - 3);
+ shorten = true;
+ }
+
+ if (!arg_no_legend)
+ printf("%*s %-*s %-*s %-*s\n",
+ id_len, "JOB",
+ unit_len, "UNIT",
+ type_len, "TYPE",
+ state_len, "STATE");
+
+ for (j = jobs; j < jobs + n; j++) {
+ _cleanup_free_ char *e = NULL;
+
+ if (streq(j->state, "running")) {
+ on = ansi_highlight();
+ off = ansi_normal();
+ } else
+ on = off = "";
+
+ e = shorten ? ellipsize(j->name, unit_len, 33) : NULL;
+ printf("%*u %s%-*s%s %-*s %s%-*s%s\n",
+ id_len, j->id,
+ on, unit_len, e ? e : j->name, off,
+ type_len, j->type,
+ on, state_len, j->state, off);
+
+ if (arg_jobs_after)
+ output_waiting_jobs(bus, j->id, "GetJobAfter", "\twaiting for job");
+ if (arg_jobs_before)
+ output_waiting_jobs(bus, j->id, "GetJobBefore", "\tblocking job");
+ }
+
+ if (!arg_no_legend) {
+ on = ansi_highlight();
+ off = ansi_normal();
+
+ printf("\n%s%u jobs listed%s.\n", on, n, off);
+ }
+}
+
+static bool output_show_job(struct job_info *job, char **patterns) {
+ return strv_fnmatch_or_empty(patterns, job->name, FNM_NOESCAPE);
+}
+
+static int list_jobs(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ struct job_info *jobs = NULL;
+ const char *name, *type, *state;
+ bool skipped = false;
+ size_t size = 0;
+ unsigned c = 0;
+ sd_bus *bus;
+ uint32_t id;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListJobs",
+ &error,
+ &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list jobs: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "(usssoo)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(usssoo)", &id, &name, &type, &state, NULL, NULL)) > 0) {
+ struct job_info job = { id, name, type, state };
+
+ if (!output_show_job(&job, strv_skip(argv, 1))) {
+ skipped = true;
+ continue;
+ }
+
+ if (!GREEDY_REALLOC(jobs, size, c + 1))
+ return log_oom();
+
+ jobs[c++] = job;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ (void) pager_open(arg_pager_flags);
+
+ output_jobs_list(bus, jobs, c, skipped);
+ return 0;
+}
+
+static int cancel_job(int argc, char *argv[], void *userdata) {
+ sd_bus *bus;
+ char **name;
+ int r = 0;
+
+ if (argc <= 1)
+ return trivial_method(argc, argv, userdata);
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ STRV_FOREACH(name, strv_skip(argv, 1)) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ uint32_t id;
+ int q;
+
+ q = safe_atou32(*name, &id);
+ if (q < 0)
+ return log_error_errno(q, "Failed to parse job id \"%s\": %m", *name);
+
+ q = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "CancelJob",
+ &error,
+ NULL,
+ "u", id);
+ if (q < 0) {
+ log_error_errno(q, "Failed to cancel job %"PRIu32": %s", id, bus_error_message(&error, q));
+ if (r == 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
+
+static int need_daemon_reload(sd_bus *bus, const char *unit) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *path;
+ int b, r;
+
+ /* We ignore all errors here, since this is used to show a
+ * warning only */
+
+ /* We don't use unit_dbus_path_from_name() directly since we
+ * don't want to load the unit if it isn't loaded. */
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "GetUnit",
+ NULL,
+ &reply,
+ "s", unit);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "NeedDaemonReload",
+ NULL,
+ 'b', &b);
+ if (r < 0)
+ return r;
+
+ return b;
+}
+
+static void warn_unit_file_changed(const char *name) {
+ assert(name);
+
+ log_warning("%sWarning:%s The unit file, source configuration file or drop-ins of %s changed on disk. Run 'systemctl%s daemon-reload' to reload units.",
+ ansi_highlight_red(),
+ ansi_normal(),
+ name,
+ arg_scope == UNIT_FILE_SYSTEM ? "" : " --user");
+}
+
+static int unit_file_find_path(LookupPaths *lp, const char *unit_name, char **ret_unit_path) {
+ char **p;
+
+ assert(lp);
+ assert(unit_name);
+
+ STRV_FOREACH(p, lp->search_path) {
+ _cleanup_free_ char *path = NULL, *lpath = NULL;
+ int r;
+
+ path = path_join(*p, unit_name);
+ if (!path)
+ return log_oom();
+
+ r = chase_symlinks(path, arg_root, 0, &lpath);
+ if (r == -ENOENT)
+ continue;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to access path \"%s\": %m", path);
+
+ if (ret_unit_path)
+ *ret_unit_path = TAKE_PTR(lpath);
+
+ return 1;
+ }
+
+ if (ret_unit_path)
+ *ret_unit_path = NULL;
+
+ return 0;
+}
+
+static int unit_find_template_path(
+ const char *unit_name,
+ LookupPaths *lp,
+ char **ret_fragment_path,
+ char **ret_template) {
+
+ _cleanup_free_ char *t = NULL, *f = NULL;
+ int r;
+
+ /* Returns 1 if a fragment was found, 0 if not found, negative on error. */
+
+ r = unit_file_find_path(lp, unit_name, &f);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (ret_fragment_path)
+ *ret_fragment_path = TAKE_PTR(f);
+ if (ret_template)
+ *ret_template = NULL;
+ return r; /* found a real unit */
+ }
+
+ r = unit_name_template(unit_name, &t);
+ if (r == -EINVAL) {
+ if (ret_fragment_path)
+ *ret_fragment_path = NULL;
+ if (ret_template)
+ *ret_template = NULL;
+
+ return 0; /* not a template, does not exist */
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine template name: %m");
+
+ r = unit_file_find_path(lp, t, ret_fragment_path);
+ if (r < 0)
+ return r;
+
+ if (ret_template)
+ *ret_template = r > 0 ? TAKE_PTR(t) : NULL;
+
+ return r;
+}
+
+static int unit_find_paths(
+ sd_bus *bus,
+ const char *unit_name,
+ LookupPaths *lp,
+ bool force_client_side,
+ char **ret_fragment_path,
+ char ***ret_dropin_paths) {
+
+ _cleanup_strv_free_ char **dropins = NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ /**
+ * Finds where the unit is defined on disk. Returns 0 if the unit is not found. Returns 1 if it is found, and
+ * sets:
+ * - the path to the unit in *ret_frament_path, if it exists on disk,
+ * - and a strv of existing drop-ins in *ret_dropin_paths, if the arg is not NULL and any dropins were found.
+ *
+ * Returns -ERFKILL if the unit is masked, and -EKEYREJECTED if the unit file could not be loaded for some
+ * reason (the latter only applies if we are going through the service manager)
+ */
+
+ assert(unit_name);
+ assert(ret_fragment_path);
+ assert(lp);
+
+ /* Go via the bus to acquire the path, unless we are explicitly told not to, or when the unit name is a template */
+ if (!force_client_side &&
+ !install_client_side() &&
+ !unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *load_state = NULL, *unit = NULL;
+
+ unit = unit_dbus_path_from_name(unit_name);
+ if (!unit)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ unit,
+ "org.freedesktop.systemd1.Unit",
+ "LoadState",
+ &error,
+ &load_state);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get LoadState: %s", bus_error_message(&error, r));
+
+ if (streq(load_state, "masked"))
+ return -ERFKILL;
+ if (streq(load_state, "not-found")) {
+ r = 0;
+ goto not_found;
+ }
+ if (!streq(load_state, "loaded"))
+ return -EKEYREJECTED;
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ unit,
+ "org.freedesktop.systemd1.Unit",
+ "FragmentPath",
+ &error,
+ &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get FragmentPath: %s", bus_error_message(&error, r));
+
+ if (ret_dropin_paths) {
+ r = sd_bus_get_property_strv(
+ bus,
+ "org.freedesktop.systemd1",
+ unit,
+ "org.freedesktop.systemd1.Unit",
+ "DropInPaths",
+ &error,
+ &dropins);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get DropInPaths: %s", bus_error_message(&error, r));
+ }
+ } else {
+ _cleanup_set_free_ Set *names = NULL;
+ _cleanup_free_ char *template = NULL;
+
+ names = set_new(NULL);
+ if (!names)
+ return log_oom();
+
+ r = unit_find_template_path(unit_name, lp, &path, &template);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (null_or_empty_path(path))
+ /* The template is masked. Let's cut the process short. */
+ return -ERFKILL;
+
+ /* We found the unit file. If we followed symlinks, this name might be
+ * different then the unit_name with started with. Look for dropins matching
+ * that "final" name. */
+ r = set_put(names, basename(path));
+ } else if (!template)
+ /* No unit file, let's look for dropins matching the original name.
+ * systemd has fairly complicated rules (based on unit type and provenience),
+ * which units are allowed not to have the main unit file. We err on the
+ * side of including too many files, and always try to load dropins. */
+ r = set_put(names, unit_name);
+ else
+ /* The cases where we allow a unit to exist without the main file are
+ * never valid for templates. Don't try to load dropins in this case. */
+ goto not_found;
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to add unit name: %m");
+
+ if (ret_dropin_paths) {
+ r = unit_file_find_dropin_conf_paths(arg_root, lp->search_path, NULL, names, &dropins);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ r = 0;
+
+ if (!isempty(path)) {
+ *ret_fragment_path = TAKE_PTR(path);
+ r = 1;
+ } else
+ *ret_fragment_path = NULL;
+
+ if (ret_dropin_paths) {
+ if (!strv_isempty(dropins)) {
+ *ret_dropin_paths = TAKE_PTR(dropins);
+ r = 1;
+ } else
+ *ret_dropin_paths = NULL;
+ }
+
+ not_found:
+ if (r == 0 && !arg_force)
+ log_error("No files found for %s.", unit_name);
+
+ return r;
+}
+
+static int get_state_one_unit(sd_bus *bus, const char *name, UnitActiveState *active_state) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *buf = NULL, *path = NULL;
+ UnitActiveState state;
+ int r;
+
+ assert(name);
+ assert(active_state);
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "ActiveState",
+ &error,
+ &buf);
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve unit state: %s", bus_error_message(&error, r));
+
+ state = unit_active_state_from_string(buf);
+ if (state == _UNIT_ACTIVE_STATE_INVALID) {
+ log_error("Invalid unit state '%s' for: %s", buf, name);
+ return -EINVAL;
+ }
+
+ *active_state = state;
+ return 0;
+}
+
+static int unit_is_masked(sd_bus *bus, LookupPaths *lp, const char *name) {
+ _cleanup_free_ char *load_state = NULL;
+ int r;
+
+ if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_free_ char *path = NULL;
+
+ /* A template cannot be loaded, but it can be still masked, so
+ * we need to use a different method. */
+
+ r = unit_file_find_path(lp, name, &path);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false;
+ return null_or_empty_path(path);
+ }
+
+ r = unit_load_state(bus, name, &load_state);
+ if (r < 0)
+ return r;
+
+ return streq(load_state, "masked");
+}
+
+static int check_triggering_units(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *n = NULL, *path = NULL, *load_state = NULL;
+ _cleanup_strv_free_ char **triggered_by = NULL;
+ bool print_warning_label = true;
+ UnitActiveState active_state;
+ char **i;
+ int r;
+
+ r = unit_name_mangle(name, 0, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ r = unit_load_state(bus, name, &load_state);
+ if (r < 0)
+ return r;
+
+ if (streq(load_state, "masked"))
+ return 0;
+
+ path = unit_dbus_path_from_name(n);
+ if (!path)
+ return log_oom();
+
+ r = sd_bus_get_property_strv(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "TriggeredBy",
+ &error,
+ &triggered_by);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get triggered by array of %s: %s", n, bus_error_message(&error, r));
+
+ STRV_FOREACH(i, triggered_by) {
+ r = get_state_one_unit(bus, *i, &active_state);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(active_state, UNIT_ACTIVE, UNIT_RELOADING))
+ continue;
+
+ if (print_warning_label) {
+ log_warning("Warning: Stopping %s, but it can still be activated by:", n);
+ print_warning_label = false;
+ }
+
+ log_warning(" %s", *i);
+ }
+
+ return 0;
+}
+
+static const struct {
+ const char *verb;
+ const char *method;
+} unit_actions[] = {
+ { "start", "StartUnit" },
+ { "stop", "StopUnit" },
+ { "condstop", "StopUnit" },
+ { "reload", "ReloadUnit" },
+ { "restart", "RestartUnit" },
+ { "try-restart", "TryRestartUnit" },
+ { "condrestart", "TryRestartUnit" },
+ { "reload-or-restart", "ReloadOrRestartUnit" },
+ { "try-reload-or-restart", "ReloadOrTryRestartUnit" },
+ { "reload-or-try-restart", "ReloadOrTryRestartUnit" },
+ { "condreload", "ReloadOrTryRestartUnit" },
+ { "force-reload", "ReloadOrTryRestartUnit" }
+};
+
+static const char *verb_to_method(const char *verb) {
+ uint i;
+
+ for (i = 0; i < ELEMENTSOF(unit_actions); i++)
+ if (streq_ptr(unit_actions[i].verb, verb))
+ return unit_actions[i].method;
+
+ return "StartUnit";
+}
+
+static const char *method_to_verb(const char *method) {
+ uint i;
+
+ for (i = 0; i < ELEMENTSOF(unit_actions); i++)
+ if (streq_ptr(unit_actions[i].method, method))
+ return unit_actions[i].verb;
+
+ return "n/a";
+}
+
+typedef struct {
+ sd_bus_slot *match;
+ sd_event *event;
+ Set *unit_paths;
+ bool any_failed;
+} WaitContext;
+
+static void wait_context_free(WaitContext *c) {
+ c->match = sd_bus_slot_unref(c->match);
+ c->event = sd_event_unref(c->event);
+ c->unit_paths = set_free_free(c->unit_paths);
+}
+
+static int on_properties_changed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ const char *path, *interface, *active_state = NULL, *job_path = NULL;
+ WaitContext *c = userdata;
+ bool is_failed;
+ int r;
+
+ /* Called whenever we get a PropertiesChanged signal. Checks if ActiveState changed to inactive/failed.
+ *
+ * Signal parameters: (s interface, a{sv} changed_properties, as invalidated_properties) */
+
+ path = sd_bus_message_get_path(m);
+ if (!set_contains(c->unit_paths, path))
+ return 0;
+
+ r = sd_bus_message_read(m, "s", &interface);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!streq(interface, "org.freedesktop.systemd1.Unit")) /* ActiveState is on the Unit interface */
+ return 0;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ const char *s;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv");
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0) /* end of array */
+ break;
+
+ r = sd_bus_message_read(m, "s", &s); /* Property name */
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (streq(s, "ActiveState")) {
+ r = sd_bus_message_read(m, "v", "s", &active_state);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (job_path) /* Found everything we need */
+ break;
+
+ } else if (streq(s, "Job")) {
+ uint32_t job_id;
+
+ r = sd_bus_message_read(m, "v", "(uo)", &job_id, &job_path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ /* There's still a job pending for this unit, let's ignore this for now, and return right-away. */
+ if (job_id != 0)
+ return 0;
+
+ if (active_state) /* Found everything we need */
+ break;
+
+ } else {
+ r = sd_bus_message_skip(m, "v"); /* Other property */
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ /* If this didn't contain the ActiveState property we can't do anything */
+ if (!active_state)
+ return 0;
+
+ is_failed = streq(active_state, "failed");
+ if (streq(active_state, "inactive") || is_failed) {
+ log_debug("%s became %s, dropping from --wait tracking", path, active_state);
+ free(set_remove(c->unit_paths, path));
+ c->any_failed = c->any_failed || is_failed;
+ } else
+ log_debug("ActiveState on %s changed to %s", path, active_state);
+
+ if (set_isempty(c->unit_paths))
+ sd_event_exit(c->event, EXIT_SUCCESS);
+
+ return 0;
+}
+
+static int start_unit_one(
+ sd_bus *bus,
+ const char *method,
+ const char *name,
+ const char *mode,
+ sd_bus_error *error,
+ BusWaitForJobs *w,
+ WaitContext *wait_context) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *path;
+ int r;
+
+ assert(method);
+ assert(name);
+ assert(mode);
+ assert(error);
+
+ if (wait_context) {
+ _cleanup_free_ char *unit_path = NULL;
+
+ log_debug("Watching for property changes of %s", name);
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "RefUnit",
+ error,
+ NULL,
+ "s", name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to RefUnit %s: %s", name, bus_error_message(error, r));
+
+ unit_path = unit_dbus_path_from_name(name);
+ if (!unit_path)
+ return log_oom();
+
+ r = set_put_strdup(wait_context->unit_paths, unit_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add unit path %s to set: %m", unit_path);
+
+ r = sd_bus_match_signal_async(bus,
+ &wait_context->match,
+ NULL,
+ unit_path,
+ "org.freedesktop.DBus.Properties",
+ "PropertiesChanged",
+ on_properties_changed, NULL, wait_context);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for PropertiesChanged signal: %m");
+ }
+
+ log_debug("%s dbus call org.freedesktop.systemd1.Manager %s(%s, %s)",
+ arg_dry_run ? "Would execute" : "Executing",
+ method, name, mode);
+ if (arg_dry_run)
+ return 0;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ method,
+ error,
+ &reply,
+ "ss", name, mode);
+ if (r < 0) {
+ const char *verb;
+
+ /* There's always a fallback possible for legacy actions. */
+ if (arg_action != ACTION_SYSTEMCTL)
+ return r;
+
+ verb = method_to_verb(method);
+
+ log_error("Failed to %s %s: %s", verb, name, bus_error_message(error, r));
+
+ if (!sd_bus_error_has_name(error, BUS_ERROR_NO_SUCH_UNIT) &&
+ !sd_bus_error_has_name(error, BUS_ERROR_UNIT_MASKED) &&
+ !sd_bus_error_has_name(error, BUS_ERROR_JOB_TYPE_NOT_APPLICABLE))
+ log_error("See %s logs and 'systemctl%s status%s %s' for details.",
+ arg_scope == UNIT_FILE_SYSTEM ? "system" : "user",
+ arg_scope == UNIT_FILE_SYSTEM ? "" : " --user",
+ name[0] == '-' ? " --" : "",
+ name);
+
+ return r;
+ }
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (need_daemon_reload(bus, name) > 0)
+ warn_unit_file_changed(name);
+
+ if (w) {
+ log_debug("Adding %s to the set", path);
+ r = bus_wait_for_jobs_add(w, path);
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+static int expand_names(sd_bus *bus, char **names, const char* suffix, char ***ret) {
+ _cleanup_strv_free_ char **mangled = NULL, **globs = NULL;
+ char **name;
+ int r, i;
+
+ assert(bus);
+ assert(ret);
+
+ STRV_FOREACH(name, names) {
+ char *t;
+ UnitNameMangle options = UNIT_NAME_MANGLE_GLOB | (arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN);
+
+ if (suffix)
+ r = unit_name_mangle_with_suffix(*name, options, suffix, &t);
+ else
+ r = unit_name_mangle(*name, options, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle name: %m");
+
+ if (string_is_glob(t))
+ r = strv_consume(&globs, t);
+ else
+ r = strv_consume(&mangled, t);
+ if (r < 0)
+ return log_oom();
+ }
+
+ /* Query the manager only if any of the names are a glob, since
+ * this is fairly expensive */
+ if (!strv_isempty(globs)) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ UnitInfo *unit_infos = NULL;
+ size_t allocated, n;
+
+ r = get_unit_list(bus, NULL, globs, &unit_infos, 0, &reply);
+ if (r < 0)
+ return r;
+
+ n = strv_length(mangled);
+ allocated = n + 1;
+
+ for (i = 0; i < r; i++) {
+ if (!GREEDY_REALLOC(mangled, allocated, n+2))
+ return log_oom();
+
+ mangled[n] = strdup(unit_infos[i].id);
+ if (!mangled[n])
+ return log_oom();
+
+ mangled[++n] = NULL;
+ }
+ }
+
+ *ret = TAKE_PTR(mangled);
+
+ return 0;
+}
+
+static const struct {
+ const char *target;
+ const char *verb;
+ const char *mode;
+} action_table[_ACTION_MAX] = {
+ [ACTION_HALT] = { SPECIAL_HALT_TARGET, "halt", "replace-irreversibly" },
+ [ACTION_POWEROFF] = { SPECIAL_POWEROFF_TARGET, "poweroff", "replace-irreversibly" },
+ [ACTION_REBOOT] = { SPECIAL_REBOOT_TARGET, "reboot", "replace-irreversibly" },
+ [ACTION_KEXEC] = { SPECIAL_KEXEC_TARGET, "kexec", "replace-irreversibly" },
+ [ACTION_RUNLEVEL2] = { SPECIAL_MULTI_USER_TARGET, NULL, "isolate" },
+ [ACTION_RUNLEVEL3] = { SPECIAL_MULTI_USER_TARGET, NULL, "isolate" },
+ [ACTION_RUNLEVEL4] = { SPECIAL_MULTI_USER_TARGET, NULL, "isolate" },
+ [ACTION_RUNLEVEL5] = { SPECIAL_GRAPHICAL_TARGET, NULL, "isolate" },
+ [ACTION_RESCUE] = { SPECIAL_RESCUE_TARGET, "rescue", "isolate" },
+ [ACTION_EMERGENCY] = { SPECIAL_EMERGENCY_TARGET, "emergency", "isolate" },
+ [ACTION_DEFAULT] = { SPECIAL_DEFAULT_TARGET, "default", "isolate" },
+ [ACTION_EXIT] = { SPECIAL_EXIT_TARGET, "exit", "replace-irreversibly" },
+ [ACTION_SUSPEND] = { SPECIAL_SUSPEND_TARGET, "suspend", "replace-irreversibly" },
+ [ACTION_HIBERNATE] = { SPECIAL_HIBERNATE_TARGET, "hibernate", "replace-irreversibly" },
+ [ACTION_HYBRID_SLEEP] = { SPECIAL_HYBRID_SLEEP_TARGET, "hybrid-sleep", "replace-irreversibly" },
+ [ACTION_SUSPEND_THEN_HIBERNATE] = { SPECIAL_SUSPEND_THEN_HIBERNATE_TARGET, "suspend-then-hibernate", "replace-irreversibly" },
+};
+
+static enum action verb_to_action(const char *verb) {
+ enum action i;
+
+ for (i = 0; i < _ACTION_MAX; i++)
+ if (streq_ptr(action_table[i].verb, verb))
+ return i;
+
+ return _ACTION_INVALID;
+}
+
+static int start_unit(int argc, char *argv[], void *userdata) {
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_(wait_context_free) WaitContext wait_context = {};
+ const char *method, *mode, *one_name, *suffix = NULL;
+ _cleanup_free_ char **stopped_units = NULL; /* Do not use _cleanup_strv_free_ */
+ _cleanup_strv_free_ char **names = NULL;
+ int r, ret = EXIT_SUCCESS;
+ sd_bus *bus;
+ char **name;
+
+ if (arg_wait && !STR_IN_SET(argv[0], "start", "restart")) {
+ log_error("--wait may only be used with the 'start' or 'restart' commands.");
+ return -EINVAL;
+ }
+
+ /* we cannot do sender tracking on the private bus, so we need the full
+ * one for RefUnit to implement --wait */
+ r = acquire_bus(arg_wait ? BUS_FULL : BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ ask_password_agent_open_if_enabled();
+ polkit_agent_open_maybe();
+
+ if (arg_action == ACTION_SYSTEMCTL) {
+ enum action action;
+
+ action = verb_to_action(argv[0]);
+
+ if (action != _ACTION_INVALID) {
+ method = "StartUnit";
+ mode = action_table[action].mode;
+ one_name = action_table[action].target;
+ } else {
+ if (streq(argv[0], "isolate")) {
+ method = "StartUnit";
+ mode = "isolate";
+
+ suffix = ".target";
+ } else {
+ method = verb_to_method(argv[0]);
+ mode = arg_job_mode;
+ }
+ one_name = NULL;
+ }
+ } else {
+ assert(arg_action >= 0 && arg_action < _ACTION_MAX);
+ assert(action_table[arg_action].target);
+ assert(action_table[arg_action].mode);
+
+ method = "StartUnit";
+ mode = action_table[arg_action].mode;
+ one_name = action_table[arg_action].target;
+ }
+
+ if (one_name) {
+ names = strv_new(one_name);
+ if (!names)
+ return log_oom();
+ } else {
+ r = expand_names(bus, strv_skip(argv, 1), suffix, &names);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+ }
+
+ if (!arg_no_block) {
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch jobs: %m");
+ }
+
+ if (arg_wait) {
+ wait_context.unit_paths = set_new(&string_hash_ops);
+ if (!wait_context.unit_paths)
+ return log_oom();
+
+ r = sd_bus_call_method_async(
+ bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Subscribe",
+ NULL, NULL,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable subscription: %m");
+ r = sd_event_default(&wait_context.event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+ r = sd_bus_attach_event(bus, wait_context.event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+ }
+
+ STRV_FOREACH(name, names) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = start_unit_one(bus, method, *name, mode, &error, w, arg_wait ? &wait_context : NULL);
+ if (ret == EXIT_SUCCESS && r < 0)
+ ret = translate_bus_error_to_exit_status(r, &error);
+
+ if (r >= 0 && streq(method, "StopUnit")) {
+ r = strv_push(&stopped_units, *name);
+ if (r < 0)
+ return log_oom();
+ }
+ }
+
+ if (!arg_no_block) {
+ const char* extra_args[4] = {};
+ int arg_count = 0;
+
+ if (arg_scope != UNIT_FILE_SYSTEM)
+ extra_args[arg_count++] = "--user";
+
+ assert(IN_SET(arg_transport, BUS_TRANSPORT_LOCAL, BUS_TRANSPORT_REMOTE, BUS_TRANSPORT_MACHINE));
+ if (arg_transport == BUS_TRANSPORT_REMOTE) {
+ extra_args[arg_count++] = "-H";
+ extra_args[arg_count++] = arg_host;
+ } else if (arg_transport == BUS_TRANSPORT_MACHINE) {
+ extra_args[arg_count++] = "-M";
+ extra_args[arg_count++] = arg_host;
+ }
+
+ r = bus_wait_for_jobs(w, arg_quiet, extra_args);
+ if (r < 0)
+ return r;
+
+ /* When stopping units, warn if they can still be triggered by
+ * another active unit (socket, path, timer) */
+ if (!arg_quiet)
+ STRV_FOREACH(name, stopped_units)
+ (void) check_triggering_units(bus, *name);
+ }
+
+ if (ret == EXIT_SUCCESS && arg_wait && !set_isempty(wait_context.unit_paths)) {
+ r = sd_event_loop(wait_context.event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+ if (wait_context.any_failed)
+ ret = EXIT_FAILURE;
+ }
+
+ return ret;
+}
+
+#if ENABLE_LOGIND
+static int logind_set_wall_message(void) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+ _cleanup_free_ char *m = NULL;
+ int r;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ m = strv_join(arg_wall, " ");
+ if (!m)
+ return log_oom();
+
+ log_debug("%s wall message \"%s\".", arg_dry_run ? "Would set" : "Setting", m);
+ if (arg_dry_run)
+ return 0;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "SetWallMessage",
+ &error,
+ NULL,
+ "sb",
+ m,
+ !arg_no_wall);
+
+ if (r < 0)
+ return log_warning_errno(r, "Failed to set wall message, ignoring: %s", bus_error_message(&error, r));
+ return 0;
+}
+#endif
+
+/* Ask systemd-logind, which might grant access to unprivileged users
+ * through polkit */
+static int logind_reboot(enum action a) {
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *method, *description;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ switch (a) {
+
+ case ACTION_POWEROFF:
+ method = "PowerOff";
+ description = "power off system";
+ break;
+
+ case ACTION_REBOOT:
+ method = "Reboot";
+ description = "reboot system";
+ break;
+
+ case ACTION_HALT:
+ method = "Halt";
+ description = "halt system";
+ break;
+
+ case ACTION_SUSPEND:
+ method = "Suspend";
+ description = "suspend system";
+ break;
+
+ case ACTION_HIBERNATE:
+ method = "Hibernate";
+ description = "hibernate system";
+ break;
+
+ case ACTION_HYBRID_SLEEP:
+ method = "HybridSleep";
+ description = "put system into hybrid sleep";
+ break;
+
+ case ACTION_SUSPEND_THEN_HIBERNATE:
+ method = "SuspendThenHibernate";
+ description = "put system into suspend followed by hibernate";
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ polkit_agent_open_maybe();
+ (void) logind_set_wall_message();
+
+ log_debug("%s org.freedesktop.login1.Manager %s dbus call.", arg_dry_run ? "Would execute" : "Executing", method);
+ if (arg_dry_run)
+ return 0;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ method,
+ &error,
+ NULL,
+ "b", arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to %s via logind: %s", description, bus_error_message(&error, r));
+
+ return 0;
+#else
+ return -ENOSYS;
+#endif
+}
+
+static int logind_check_inhibitors(enum action a) {
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_strv_free_ char **sessions = NULL;
+ const char *what, *who, *why, *mode;
+ uint32_t uid, pid;
+ sd_bus *bus;
+ unsigned c = 0;
+ char **s;
+ int r;
+
+ if (arg_ignore_inhibitors || arg_force > 0)
+ return 0;
+
+ if (arg_when > 0)
+ return 0;
+
+ if (geteuid() == 0)
+ return 0;
+
+ if (!on_tty())
+ return 0;
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return 0;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "ListInhibitors",
+ NULL,
+ &reply,
+ NULL);
+ if (r < 0)
+ /* If logind is not around, then there are no inhibitors... */
+ return 0;
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssuu)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(ssssuu)", &what, &who, &why, &mode, &uid, &pid)) > 0) {
+ _cleanup_free_ char *comm = NULL, *user = NULL;
+ _cleanup_strv_free_ char **sv = NULL;
+
+ if (!streq(mode, "block"))
+ continue;
+
+ sv = strv_split(what, ":");
+ if (!sv)
+ return log_oom();
+
+ if (!pid_is_valid((pid_t) pid)) {
+ log_error("Invalid PID "PID_FMT".", (pid_t) pid);
+ return -ERANGE;
+ }
+
+ if (!strv_contains(sv,
+ IN_SET(a,
+ ACTION_HALT,
+ ACTION_POWEROFF,
+ ACTION_REBOOT,
+ ACTION_KEXEC) ? "shutdown" : "sleep"))
+ continue;
+
+ get_process_comm(pid, &comm);
+ user = uid_to_name(uid);
+
+ log_warning("Operation inhibited by \"%s\" (PID "PID_FMT" \"%s\", user %s), reason is \"%s\".",
+ who, (pid_t) pid, strna(comm), strna(user), why);
+
+ c++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ /* Check for current sessions */
+ sd_get_sessions(&sessions);
+ STRV_FOREACH(s, sessions) {
+ _cleanup_free_ char *type = NULL, *tty = NULL, *seat = NULL, *user = NULL, *service = NULL, *class = NULL;
+
+ if (sd_session_get_uid(*s, &uid) < 0 || uid == getuid())
+ continue;
+
+ if (sd_session_get_class(*s, &class) < 0 || !streq(class, "user"))
+ continue;
+
+ if (sd_session_get_type(*s, &type) < 0 || !STR_IN_SET(type, "x11", "wayland", "tty", "mir"))
+ continue;
+
+ sd_session_get_tty(*s, &tty);
+ sd_session_get_seat(*s, &seat);
+ sd_session_get_service(*s, &service);
+ user = uid_to_name(uid);
+
+ log_warning("User %s is logged in on %s.", strna(user), isempty(tty) ? (isempty(seat) ? strna(service) : seat) : tty);
+ c++;
+ }
+
+ if (c <= 0)
+ return 0;
+
+ log_error("Please retry operation after closing inhibitors and logging out other users.\nAlternatively, ignore inhibitors and users with 'systemctl %s -i'.",
+ action_table[a].verb);
+
+ return -EPERM;
+#else
+ return 0;
+#endif
+}
+
+static int logind_prepare_firmware_setup(void) {
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "SetRebootToFirmwareSetup",
+ &error,
+ NULL,
+ "b", true);
+ if (r < 0)
+ return log_error_errno(r, "Cannot indicate to EFI to boot into setup mode: %s", bus_error_message(&error, r));
+
+ return 0;
+#else
+ log_error("Cannot remotely indicate to EFI to boot into setup mode.");
+ return -ENOSYS;
+#endif
+}
+
+static int prepare_firmware_setup(void) {
+ int r;
+
+ if (!arg_firmware_setup)
+ return 0;
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL) {
+
+ r = efi_set_reboot_to_firmware(true);
+ if (r < 0)
+ log_debug_errno(r, "Cannot indicate to EFI to boot into setup mode, will retry via logind: %m");
+ else
+ return r;
+ }
+
+ return logind_prepare_firmware_setup();
+}
+
+static int load_kexec_kernel(void) {
+ _cleanup_(boot_config_free) BootConfig config = {};
+ _cleanup_free_ char *where = NULL, *kernel = NULL, *initrd = NULL, *options = NULL;
+ const BootEntry *e;
+ pid_t pid;
+ int r;
+
+ if (kexec_loaded()) {
+ log_debug("Kexec kernel already loaded.");
+ return 0;
+ }
+
+ if (access(KEXEC, X_OK) < 0)
+ return log_error_errno(errno, KEXEC" is not available: %m");
+
+ r = find_default_boot_entry(arg_esp_path, &where, &config, &e);
+ if (r == -ENOKEY) /* find_default_boot_entry() doesn't warn about this case */
+ return log_error_errno(r, "Cannot find the ESP partition mount point.");
+ if (r < 0)
+ /* But it logs about all these cases, hence don't log here again */
+ return r;
+
+ if (strv_length(e->initrd) > 1) {
+ log_error("Boot entry specifies multiple initrds, which is not supported currently.");
+ return -EINVAL;
+ }
+
+ kernel = path_join(where, e->kernel);
+ if (!strv_isempty(e->initrd))
+ initrd = path_join(where, *e->initrd);
+ options = strv_join(e->options, " ");
+ if (!options)
+ return log_oom();
+
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO,
+ "%s "KEXEC" --load \"%s\" --append \"%s\"%s%s%s",
+ arg_dry_run ? "Would run" : "Running",
+ kernel,
+ options,
+ initrd ? " --initrd \"" : NULL, strempty(initrd), initrd ? "\"" : "");
+ if (arg_dry_run)
+ return 0;
+
+ r = safe_fork("(kexec)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char* const args[] = {
+ KEXEC,
+ "--load", kernel,
+ "--append", options,
+ initrd ? "--initrd" : NULL, initrd,
+ NULL };
+
+ /* Child */
+ execv(args[0], (char * const *) args);
+ _exit(EXIT_FAILURE);
+ }
+
+ r = wait_for_terminate_and_check("kexec", pid, WAIT_LOG);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ /* Command failed */
+ return -EPROTO;
+ return 0;
+}
+
+static int set_exit_code(uint8_t code) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "SetExitCode",
+ &error,
+ NULL,
+ "y", code);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set exit code: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int start_special(int argc, char *argv[], void *userdata) {
+ enum action a;
+ int r;
+ bool termination_action; /* an action that terminates the manager,
+ * can be performed also by signal. */
+
+ assert(argv);
+
+ a = verb_to_action(argv[0]);
+
+ r = logind_check_inhibitors(a);
+ if (r < 0)
+ return r;
+
+ if (arg_force >= 2) {
+ r = must_be_root();
+ if (r < 0)
+ return r;
+ }
+
+ r = prepare_firmware_setup();
+ if (r < 0)
+ return r;
+
+ if (a == ACTION_REBOOT && argc > 1) {
+ r = update_reboot_parameter_and_warn(argv[1]);
+ if (r < 0)
+ return r;
+
+ } else if (a == ACTION_KEXEC) {
+ r = load_kexec_kernel();
+ if (r < 0 && arg_force >= 1)
+ log_notice("Failed to load kexec kernel, continuing without.");
+ else if (r < 0)
+ return r;
+
+ } else if (a == ACTION_EXIT && argc > 1) {
+ uint8_t code;
+
+ /* If the exit code is not given on the command line,
+ * don't reset it to zero: just keep it as it might
+ * have been set previously. */
+
+ r = safe_atou8(argv[1], &code);
+ if (r < 0)
+ return log_error_errno(r, "Invalid exit code.");
+
+ r = set_exit_code(code);
+ if (r < 0)
+ return r;
+ }
+
+ termination_action = IN_SET(a,
+ ACTION_HALT,
+ ACTION_POWEROFF,
+ ACTION_REBOOT);
+ if (termination_action && arg_force >= 2)
+ return halt_now(a);
+
+ if (arg_force >= 1 &&
+ (termination_action || IN_SET(a, ACTION_KEXEC, ACTION_EXIT)))
+ r = trivial_method(argc, argv, userdata);
+ else {
+ /* First try logind, to allow authentication with polkit */
+ if (IN_SET(a,
+ ACTION_POWEROFF,
+ ACTION_REBOOT,
+ ACTION_HALT,
+ ACTION_SUSPEND,
+ ACTION_HIBERNATE,
+ ACTION_HYBRID_SLEEP,
+ ACTION_SUSPEND_THEN_HIBERNATE)) {
+
+ r = logind_reboot(a);
+ if (r >= 0)
+ return r;
+ if (IN_SET(r, -EOPNOTSUPP, -EINPROGRESS))
+ /* requested operation is not supported or already in progress */
+ return r;
+
+ /* On all other errors, try low-level operation. In order to minimize the difference between
+ * operation with and without logind, we explicitly enable non-blocking mode for this, as
+ * logind's shutdown operations are always non-blocking. */
+
+ arg_no_block = true;
+
+ } else if (IN_SET(a, ACTION_EXIT, ACTION_KEXEC))
+ /* Since exit/kexec are so close in behaviour to power-off/reboot, let's also make them
+ * asynchronous, in order to not confuse the user needlessly with unexpected behaviour. */
+ arg_no_block = true;
+
+ r = start_unit(argc, argv, userdata);
+ }
+
+ if (termination_action && arg_force < 2 &&
+ IN_SET(r, -ENOENT, -ETIMEDOUT))
+ log_notice("It is possible to perform action directly, see discussion of --force --force in man:systemctl(1).");
+
+ return r;
+}
+
+static int start_system_special(int argc, char *argv[], void *userdata) {
+ /* Like start_special above, but raises an error when running in user mode */
+
+ if (arg_scope != UNIT_FILE_SYSTEM)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Bad action for %s mode.",
+ arg_scope == UNIT_FILE_GLOBAL ? "--global" : "--user");
+
+ return start_special(argc, argv, userdata);
+}
+
+static int check_unit_generic(int code, const UnitActiveState good_states[], int nb_states, char **args) {
+ _cleanup_strv_free_ char **names = NULL;
+ UnitActiveState active_state;
+ sd_bus *bus;
+ char **name;
+ int r, i;
+ bool found = false;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = expand_names(bus, args, NULL, &names);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ STRV_FOREACH(name, names) {
+ r = get_state_one_unit(bus, *name, &active_state);
+ if (r < 0)
+ return r;
+
+ if (!arg_quiet)
+ puts(unit_active_state_to_string(active_state));
+
+ for (i = 0; i < nb_states; ++i)
+ if (good_states[i] == active_state)
+ found = true;
+ }
+
+ /* use the given return code for the case that we won't find
+ * any unit which matches the list */
+ return found ? 0 : code;
+}
+
+static int check_unit_active(int argc, char *argv[], void *userdata) {
+ static const UnitActiveState states[] = {
+ UNIT_ACTIVE,
+ UNIT_RELOADING,
+ };
+
+ /* According to LSB: 3, "program is not running" */
+ return check_unit_generic(EXIT_PROGRAM_NOT_RUNNING, states, ELEMENTSOF(states), strv_skip(argv, 1));
+}
+
+static int check_unit_failed(int argc, char *argv[], void *userdata) {
+ static const UnitActiveState states[] = {
+ UNIT_FAILED,
+ };
+
+ return check_unit_generic(EXIT_PROGRAM_DEAD_AND_PID_EXISTS, states, ELEMENTSOF(states), strv_skip(argv, 1));
+}
+
+static int kill_unit(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **names = NULL;
+ char *kill_who = NULL, **name;
+ sd_bus *bus;
+ int r, q;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ if (!arg_kill_who)
+ arg_kill_who = "all";
+
+ /* --fail was specified */
+ if (streq(arg_job_mode, "fail"))
+ kill_who = strjoina(arg_kill_who, "-fail");
+
+ r = expand_names(bus, strv_skip(argv, 1), NULL, &names);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ STRV_FOREACH(name, names) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ q = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "KillUnit",
+ &error,
+ NULL,
+ "ssi", *name, kill_who ? kill_who : arg_kill_who, arg_signal);
+ if (q < 0) {
+ log_error_errno(q, "Failed to kill unit %s: %s", *name, bus_error_message(&error, q));
+ if (r == 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
+
+typedef struct ExecStatusInfo {
+ char *name;
+
+ char *path;
+ char **argv;
+
+ bool ignore;
+
+ usec_t start_timestamp;
+ usec_t exit_timestamp;
+ pid_t pid;
+ int code;
+ int status;
+
+ LIST_FIELDS(struct ExecStatusInfo, exec);
+} ExecStatusInfo;
+
+static void exec_status_info_free(ExecStatusInfo *i) {
+ assert(i);
+
+ free(i->name);
+ free(i->path);
+ strv_free(i->argv);
+ free(i);
+}
+
+static int exec_status_info_deserialize(sd_bus_message *m, ExecStatusInfo *i) {
+ uint64_t start_timestamp, exit_timestamp, start_timestamp_monotonic, exit_timestamp_monotonic;
+ const char *path;
+ uint32_t pid;
+ int32_t code, status;
+ int ignore, r;
+
+ assert(m);
+ assert(i);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_STRUCT, "sasbttttuii");
+ if (r < 0)
+ return bus_log_parse_error(r);
+ else if (r == 0)
+ return 0;
+
+ r = sd_bus_message_read(m, "s", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ i->path = strdup(path);
+ if (!i->path)
+ return log_oom();
+
+ r = sd_bus_message_read_strv(m, &i->argv);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(m,
+ "bttttuii",
+ &ignore,
+ &start_timestamp, &start_timestamp_monotonic,
+ &exit_timestamp, &exit_timestamp_monotonic,
+ &pid,
+ &code, &status);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ i->ignore = ignore;
+ i->start_timestamp = (usec_t) start_timestamp;
+ i->exit_timestamp = (usec_t) exit_timestamp;
+ i->pid = (pid_t) pid;
+ i->code = code;
+ i->status = status;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+}
+
+typedef struct UnitCondition {
+ char *name;
+ char *param;
+ bool trigger;
+ bool negate;
+ int tristate;
+
+ LIST_FIELDS(struct UnitCondition, conditions);
+} UnitCondition;
+
+static void unit_condition_free(UnitCondition *c) {
+ if (!c)
+ return;
+
+ free(c->name);
+ free(c->param);
+ free(c);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(UnitCondition*, unit_condition_free);
+
+typedef struct UnitStatusInfo {
+ const char *id;
+ const char *load_state;
+ const char *active_state;
+ const char *sub_state;
+ const char *unit_file_state;
+ const char *unit_file_preset;
+
+ const char *description;
+ const char *following;
+
+ char **documentation;
+
+ const char *fragment_path;
+ const char *source_path;
+ const char *control_group;
+
+ char **dropin_paths;
+
+ const char *load_error;
+ const char *result;
+
+ usec_t inactive_exit_timestamp;
+ usec_t inactive_exit_timestamp_monotonic;
+ usec_t active_enter_timestamp;
+ usec_t active_exit_timestamp;
+ usec_t inactive_enter_timestamp;
+
+ bool need_daemon_reload;
+ bool transient;
+
+ /* Service */
+ pid_t main_pid;
+ pid_t control_pid;
+ const char *status_text;
+ const char *pid_file;
+ bool running:1;
+ int status_errno;
+
+ usec_t start_timestamp;
+ usec_t exit_timestamp;
+
+ int exit_code, exit_status;
+
+ usec_t condition_timestamp;
+ bool condition_result;
+ LIST_HEAD(UnitCondition, conditions);
+
+ usec_t assert_timestamp;
+ bool assert_result;
+ bool failed_assert_trigger;
+ bool failed_assert_negate;
+ const char *failed_assert;
+ const char *failed_assert_parameter;
+ usec_t next_elapse_real;
+ usec_t next_elapse_monotonic;
+
+ /* Socket */
+ unsigned n_accepted;
+ unsigned n_connections;
+ unsigned n_refused;
+ bool accept;
+
+ /* Pairs of type, path */
+ char **listen;
+
+ /* Device */
+ const char *sysfs_path;
+
+ /* Mount, Automount */
+ const char *where;
+
+ /* Swap */
+ const char *what;
+
+ /* CGroup */
+ uint64_t memory_current;
+ uint64_t memory_min;
+ uint64_t memory_low;
+ uint64_t memory_high;
+ uint64_t memory_max;
+ uint64_t memory_swap_max;
+ uint64_t memory_limit;
+ uint64_t cpu_usage_nsec;
+ uint64_t tasks_current;
+ uint64_t tasks_max;
+
+ uint64_t ip_ingress_bytes;
+ uint64_t ip_egress_bytes;
+
+ LIST_HEAD(ExecStatusInfo, exec);
+} UnitStatusInfo;
+
+static void unit_status_info_free(UnitStatusInfo *info) {
+ ExecStatusInfo *p;
+ UnitCondition *c;
+
+ strv_free(info->documentation);
+ strv_free(info->dropin_paths);
+ strv_free(info->listen);
+
+ while ((c = info->conditions)) {
+ LIST_REMOVE(conditions, info->conditions, c);
+ unit_condition_free(c);
+ }
+
+ while ((p = info->exec)) {
+ LIST_REMOVE(exec, info->exec, p);
+ exec_status_info_free(p);
+ }
+}
+
+static void print_status_info(
+ sd_bus *bus,
+ UnitStatusInfo *i,
+ bool *ellipsized) {
+
+ char since1[FORMAT_TIMESTAMP_RELATIVE_MAX], since2[FORMAT_TIMESTAMP_MAX];
+ const char *s1, *s2, *active_on, *active_off, *on, *off, *ss;
+ _cleanup_free_ char *formatted_path = NULL;
+ ExecStatusInfo *p;
+ usec_t timestamp;
+ const char *path;
+ char **t, **t2;
+ int r;
+
+ assert(i);
+
+ /* This shows pretty information about a unit. See
+ * print_property() for a low-level property printer */
+
+ if (streq_ptr(i->active_state, "failed")) {
+ active_on = ansi_highlight_red();
+ active_off = ansi_normal();
+ } else if (STRPTR_IN_SET(i->active_state, "active", "reloading")) {
+ active_on = ansi_highlight_green();
+ active_off = ansi_normal();
+ } else
+ active_on = active_off = "";
+
+ printf("%s%s%s %s", active_on, special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE), active_off, strna(i->id));
+
+ if (i->description && !streq_ptr(i->id, i->description))
+ printf(" - %s", i->description);
+
+ printf("\n");
+
+ if (i->following)
+ printf(" Follow: unit currently follows state of %s\n", i->following);
+
+ if (STRPTR_IN_SET(i->load_state, "error", "not-found", "bad-setting")) {
+ on = ansi_highlight_red();
+ off = ansi_normal();
+ } else
+ on = off = "";
+
+ path = i->source_path ?: i->fragment_path;
+ if (path && terminal_urlify_path(path, NULL, &formatted_path) >= 0)
+ path = formatted_path;
+
+ if (!isempty(i->load_error))
+ printf(" Loaded: %s%s%s (Reason: %s)\n",
+ on, strna(i->load_state), off, i->load_error);
+ else if (path && !isempty(i->unit_file_state) && !isempty(i->unit_file_preset) &&
+ !STR_IN_SET(i->unit_file_state, "generated", "transient"))
+ printf(" Loaded: %s%s%s (%s; %s; vendor preset: %s)\n",
+ on, strna(i->load_state), off, path, i->unit_file_state, i->unit_file_preset);
+ else if (path && !isempty(i->unit_file_state))
+ printf(" Loaded: %s%s%s (%s; %s)\n",
+ on, strna(i->load_state), off, path, i->unit_file_state);
+ else if (path)
+ printf(" Loaded: %s%s%s (%s)\n",
+ on, strna(i->load_state), off, path);
+ else
+ printf(" Loaded: %s%s%s\n",
+ on, strna(i->load_state), off);
+
+ if (i->transient)
+ printf("Transient: yes\n");
+
+ if (!strv_isempty(i->dropin_paths)) {
+ _cleanup_free_ char *dir = NULL;
+ bool last = false;
+ char ** dropin;
+
+ STRV_FOREACH(dropin, i->dropin_paths) {
+ _cleanup_free_ char *dropin_formatted = NULL;
+ const char *df;
+
+ if (!dir || last) {
+ printf(dir ? " " :
+ " Drop-In: ");
+
+ dir = mfree(dir);
+
+ dir = dirname_malloc(*dropin);
+ if (!dir) {
+ log_oom();
+ return;
+ }
+
+ printf("%s\n"
+ " %s", dir,
+ special_glyph(SPECIAL_GLYPH_TREE_RIGHT));
+ }
+
+ last = ! (*(dropin + 1) && startswith(*(dropin + 1), dir));
+
+ if (terminal_urlify_path(*dropin, basename(*dropin), &dropin_formatted) >= 0)
+ df = dropin_formatted;
+ else
+ df = *dropin;
+
+ printf("%s%s", df, last ? "\n" : ", ");
+ }
+ }
+
+ ss = streq_ptr(i->active_state, i->sub_state) ? NULL : i->sub_state;
+ if (ss)
+ printf(" Active: %s%s (%s)%s",
+ active_on, strna(i->active_state), ss, active_off);
+ else
+ printf(" Active: %s%s%s",
+ active_on, strna(i->active_state), active_off);
+
+ if (!isempty(i->result) && !streq(i->result, "success"))
+ printf(" (Result: %s)", i->result);
+
+ timestamp = STRPTR_IN_SET(i->active_state, "active", "reloading") ? i->active_enter_timestamp :
+ STRPTR_IN_SET(i->active_state, "inactive", "failed") ? i->inactive_enter_timestamp :
+ STRPTR_IN_SET(i->active_state, "activating") ? i->inactive_exit_timestamp :
+ i->active_exit_timestamp;
+
+ s1 = format_timestamp_relative(since1, sizeof(since1), timestamp);
+ s2 = format_timestamp(since2, sizeof(since2), timestamp);
+
+ if (s1)
+ printf(" since %s; %s\n", s2, s1);
+ else if (s2)
+ printf(" since %s\n", s2);
+ else
+ printf("\n");
+
+ if (endswith(i->id, ".timer")) {
+ char tstamp1[FORMAT_TIMESTAMP_RELATIVE_MAX],
+ tstamp2[FORMAT_TIMESTAMP_MAX];
+ const char *next_rel_time, *next_time;
+ dual_timestamp nw, next = {i->next_elapse_real,
+ i->next_elapse_monotonic};
+ usec_t next_elapse;
+
+ printf(" Trigger: ");
+
+ dual_timestamp_get(&nw);
+ next_elapse = calc_next_elapse(&nw, &next);
+ next_rel_time = format_timestamp_relative(tstamp1, sizeof tstamp1, next_elapse);
+ next_time = format_timestamp(tstamp2, sizeof tstamp2, next_elapse);
+
+ if (next_time && next_rel_time)
+ printf("%s; %s\n", next_time, next_rel_time);
+ else
+ printf("n/a\n");
+ }
+
+ if (!i->condition_result && i->condition_timestamp > 0) {
+ UnitCondition *c;
+ int n = 0;
+
+ s1 = format_timestamp_relative(since1, sizeof(since1), i->condition_timestamp);
+ s2 = format_timestamp(since2, sizeof(since2), i->condition_timestamp);
+
+ printf("Condition: start %scondition failed%s at %s%s%s\n",
+ ansi_highlight_yellow(), ansi_normal(),
+ s2, s1 ? "; " : "", strempty(s1));
+
+ LIST_FOREACH(conditions, c, i->conditions)
+ if (c->tristate < 0)
+ n++;
+
+ LIST_FOREACH(conditions, c, i->conditions)
+ if (c->tristate < 0)
+ printf(" %s %s=%s%s%s was not met\n",
+ --n ? special_glyph(SPECIAL_GLYPH_TREE_BRANCH) : special_glyph(SPECIAL_GLYPH_TREE_RIGHT),
+ c->name,
+ c->trigger ? "|" : "",
+ c->negate ? "!" : "",
+ c->param);
+ }
+
+ if (!i->assert_result && i->assert_timestamp > 0) {
+ s1 = format_timestamp_relative(since1, sizeof(since1), i->assert_timestamp);
+ s2 = format_timestamp(since2, sizeof(since2), i->assert_timestamp);
+
+ printf(" Assert: start %sassertion failed%s at %s%s%s\n",
+ ansi_highlight_red(), ansi_normal(),
+ s2, s1 ? "; " : "", strempty(s1));
+ if (i->failed_assert_trigger)
+ printf(" none of the trigger assertions were met\n");
+ else if (i->failed_assert)
+ printf(" %s=%s%s was not met\n",
+ i->failed_assert,
+ i->failed_assert_negate ? "!" : "",
+ i->failed_assert_parameter);
+ }
+
+ if (i->sysfs_path)
+ printf(" Device: %s\n", i->sysfs_path);
+ if (i->where)
+ printf(" Where: %s\n", i->where);
+ if (i->what)
+ printf(" What: %s\n", i->what);
+
+ STRV_FOREACH(t, i->documentation) {
+ _cleanup_free_ char *formatted = NULL;
+ const char *q;
+
+ if (terminal_urlify(*t, NULL, &formatted) >= 0)
+ q = formatted;
+ else
+ q = *t;
+
+ printf(" %*s %s\n", 9, t == i->documentation ? "Docs:" : "", q);
+ }
+
+ STRV_FOREACH_PAIR(t, t2, i->listen)
+ printf(" %*s %s (%s)\n", 9, t == i->listen ? "Listen:" : "", *t2, *t);
+
+ if (i->accept) {
+ printf(" Accepted: %u; Connected: %u;", i->n_accepted, i->n_connections);
+ if (i->n_refused)
+ printf(" Refused: %u", i->n_refused);
+ printf("\n");
+ }
+
+ LIST_FOREACH(exec, p, i->exec) {
+ _cleanup_free_ char *argv = NULL;
+ bool good;
+
+ /* Only show exited processes here */
+ if (p->code == 0)
+ continue;
+
+ argv = strv_join(p->argv, " ");
+ printf(" Process: "PID_FMT" %s=%s ", p->pid, p->name, strna(argv));
+
+ good = is_clean_exit(p->code, p->status, EXIT_CLEAN_DAEMON, NULL);
+ if (!good) {
+ on = ansi_highlight_red();
+ off = ansi_normal();
+ } else
+ on = off = "";
+
+ printf("%s(code=%s, ", on, sigchld_code_to_string(p->code));
+
+ if (p->code == CLD_EXITED) {
+ const char *c;
+
+ printf("status=%i", p->status);
+
+ c = exit_status_to_string(p->status, EXIT_STATUS_SYSTEMD);
+ if (c)
+ printf("/%s", c);
+
+ } else
+ printf("signal=%s", signal_to_string(p->status));
+
+ printf(")%s\n", off);
+
+ if (i->main_pid == p->pid &&
+ i->start_timestamp == p->start_timestamp &&
+ i->exit_timestamp == p->start_timestamp)
+ /* Let's not show this twice */
+ i->main_pid = 0;
+
+ if (p->pid == i->control_pid)
+ i->control_pid = 0;
+ }
+
+ if (i->main_pid > 0 || i->control_pid > 0) {
+ if (i->main_pid > 0) {
+ printf(" Main PID: "PID_FMT, i->main_pid);
+
+ if (i->running) {
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL) {
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(i->main_pid, &comm);
+ if (comm)
+ printf(" (%s)", comm);
+ }
+
+ } else if (i->exit_code > 0) {
+ printf(" (code=%s, ", sigchld_code_to_string(i->exit_code));
+
+ if (i->exit_code == CLD_EXITED) {
+ const char *c;
+
+ printf("status=%i", i->exit_status);
+
+ c = exit_status_to_string(i->exit_status, EXIT_STATUS_SYSTEMD);
+ if (c)
+ printf("/%s", c);
+
+ } else
+ printf("signal=%s", signal_to_string(i->exit_status));
+ printf(")");
+ }
+ }
+
+ if (i->control_pid > 0) {
+ _cleanup_free_ char *c = NULL;
+
+ if (i->main_pid > 0)
+ fputs("; Control PID: ", stdout);
+ else
+ fputs("Cntrl PID: ", stdout); /* if first in column, abbreviated so it fits alignment */
+
+ printf(PID_FMT, i->control_pid);
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL) {
+ (void) get_process_comm(i->control_pid, &c);
+ if (c)
+ printf(" (%s)", c);
+ }
+ }
+
+ printf("\n");
+ }
+
+ if (i->status_text)
+ printf(" Status: \"%s\"\n", i->status_text);
+ if (i->status_errno > 0)
+ printf(" Error: %i (%s)\n", i->status_errno, strerror(i->status_errno));
+
+ if (i->ip_ingress_bytes != (uint64_t) -1 && i->ip_egress_bytes != (uint64_t) -1) {
+ char buf_in[FORMAT_BYTES_MAX], buf_out[FORMAT_BYTES_MAX];
+
+ printf(" IP: %s in, %s out\n",
+ format_bytes(buf_in, sizeof(buf_in), i->ip_ingress_bytes),
+ format_bytes(buf_out, sizeof(buf_out), i->ip_egress_bytes));
+ }
+
+ if (i->tasks_current != (uint64_t) -1) {
+ printf(" Tasks: %" PRIu64, i->tasks_current);
+
+ if (i->tasks_max != (uint64_t) -1)
+ printf(" (limit: %" PRIu64 ")\n", i->tasks_max);
+ else
+ printf("\n");
+ }
+
+ if (i->memory_current != (uint64_t) -1) {
+ char buf[FORMAT_BYTES_MAX];
+
+ printf(" Memory: %s", format_bytes(buf, sizeof(buf), i->memory_current));
+
+ if (i->memory_min > 0 || i->memory_low > 0 ||
+ i->memory_high != CGROUP_LIMIT_MAX || i->memory_max != CGROUP_LIMIT_MAX ||
+ i->memory_swap_max != CGROUP_LIMIT_MAX ||
+ i->memory_limit != CGROUP_LIMIT_MAX) {
+ const char *prefix = "";
+
+ printf(" (");
+ if (i->memory_min > 0) {
+ printf("%smin: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_min));
+ prefix = " ";
+ }
+ if (i->memory_low > 0) {
+ printf("%slow: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_low));
+ prefix = " ";
+ }
+ if (i->memory_high != CGROUP_LIMIT_MAX) {
+ printf("%shigh: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_high));
+ prefix = " ";
+ }
+ if (i->memory_max != CGROUP_LIMIT_MAX) {
+ printf("%smax: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_max));
+ prefix = " ";
+ }
+ if (i->memory_swap_max != CGROUP_LIMIT_MAX) {
+ printf("%sswap max: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_swap_max));
+ prefix = " ";
+ }
+ if (i->memory_limit != CGROUP_LIMIT_MAX) {
+ printf("%slimit: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_limit));
+ prefix = " ";
+ }
+ printf(")");
+ }
+ printf("\n");
+ }
+
+ if (i->cpu_usage_nsec != (uint64_t) -1) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ printf(" CPU: %s\n", format_timespan(buf, sizeof(buf), i->cpu_usage_nsec / NSEC_PER_USEC, USEC_PER_MSEC));
+ }
+
+ if (i->control_group) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ static const char prefix[] = " ";
+ unsigned c;
+
+ printf(" CGroup: %s\n", i->control_group);
+
+ c = columns();
+ if (c > sizeof(prefix) - 1)
+ c -= sizeof(prefix) - 1;
+ else
+ c = 0;
+
+ r = unit_show_processes(bus, i->id, i->control_group, prefix, c, get_output_flags(), &error);
+ if (r == -EBADR) {
+ unsigned k = 0;
+ pid_t extra[2];
+
+ /* Fallback for older systemd versions where the GetUnitProcesses() call is not yet available */
+
+ if (i->main_pid > 0)
+ extra[k++] = i->main_pid;
+
+ if (i->control_pid > 0)
+ extra[k++] = i->control_pid;
+
+ show_cgroup_and_extra(SYSTEMD_CGROUP_CONTROLLER, i->control_group, prefix, c, extra, k, get_output_flags());
+ } else if (r < 0)
+ log_warning_errno(r, "Failed to dump process list for '%s', ignoring: %s", i->id, bus_error_message(&error, r));
+ }
+
+ if (i->id && arg_transport == BUS_TRANSPORT_LOCAL)
+ show_journal_by_unit(
+ stdout,
+ i->id,
+ arg_output,
+ 0,
+ i->inactive_exit_timestamp_monotonic,
+ arg_lines,
+ getuid(),
+ get_output_flags() | OUTPUT_BEGIN_NEWLINE,
+ SD_JOURNAL_LOCAL_ONLY,
+ arg_scope == UNIT_FILE_SYSTEM,
+ ellipsized);
+
+ if (i->need_daemon_reload)
+ warn_unit_file_changed(i->id);
+}
+
+static void show_unit_help(UnitStatusInfo *i) {
+ char **p;
+
+ assert(i);
+
+ if (!i->documentation) {
+ log_info("Documentation for %s not known.", i->id);
+ return;
+ }
+
+ STRV_FOREACH(p, i->documentation)
+ if (startswith(*p, "man:"))
+ show_man_page(*p + 4, false);
+ else
+ log_info("Can't show: %s", *p);
+}
+
+static int map_main_pid(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ UnitStatusInfo *i = userdata;
+ uint32_t u;
+ int r;
+
+ r = sd_bus_message_read(m, "u", &u);
+ if (r < 0)
+ return r;
+
+ i->main_pid = (pid_t) u;
+ i->running = u > 0;
+
+ return 0;
+}
+
+static int map_load_error(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ const char *message, **p = userdata;
+ int r;
+
+ r = sd_bus_message_read(m, "(ss)", NULL, &message);
+ if (r < 0)
+ return r;
+
+ if (!isempty(message))
+ *p = message;
+
+ return 0;
+}
+
+static int map_listen(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ const char *type, *path;
+ char ***p = userdata;
+ int r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(m, "(ss)", &type, &path)) > 0) {
+
+ r = strv_extend(p, type);
+ if (r < 0)
+ return r;
+
+ r = strv_extend(p, path);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int map_conditions(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ UnitStatusInfo *i = userdata;
+ const char *cond, *param;
+ int trigger, negate;
+ int32_t state;
+ int r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sbbsi)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(m, "(sbbsi)", &cond, &trigger, &negate, &param, &state)) > 0) {
+ _cleanup_(unit_condition_freep) UnitCondition *c = NULL;
+
+ c = new0(UnitCondition, 1);
+ if (!c)
+ return -ENOMEM;
+
+ c->name = strdup(cond);
+ c->param = strdup(param);
+ if (!c->name || !c->param)
+ return -ENOMEM;
+
+ c->trigger = trigger;
+ c->negate = negate;
+ c->tristate = state;
+
+ LIST_PREPEND(conditions, i->conditions, c);
+ c = NULL;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int map_asserts(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ UnitStatusInfo *i = userdata;
+ const char *cond, *param;
+ int trigger, negate;
+ int32_t state;
+ int r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sbbsi)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(m, "(sbbsi)", &cond, &trigger, &negate, &param, &state)) > 0) {
+ if (state < 0 && (!trigger || !i->failed_assert)) {
+ i->failed_assert = cond;
+ i->failed_assert_trigger = trigger;
+ i->failed_assert_negate = negate;
+ i->failed_assert_parameter = param;
+ }
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int map_exec(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ _cleanup_free_ ExecStatusInfo *info = NULL;
+ ExecStatusInfo *last;
+ UnitStatusInfo *i = userdata;
+ int r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sasbttttuii)");
+ if (r < 0)
+ return r;
+
+ info = new0(ExecStatusInfo, 1);
+ if (!info)
+ return -ENOMEM;
+
+ LIST_FIND_TAIL(exec, i->exec, last);
+
+ while ((r = exec_status_info_deserialize(m, info)) > 0) {
+
+ info->name = strdup(member);
+ if (!info->name)
+ return -ENOMEM;
+
+ LIST_INSERT_AFTER(exec, i->exec, last, info);
+ last = info;
+
+ info = new0(ExecStatusInfo, 1);
+ if (!info)
+ return -ENOMEM;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int print_property(const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all) {
+ char bus_type;
+ const char *contents;
+ int r;
+
+ assert(name);
+ assert(m);
+
+ /* This is a low-level property printer, see
+ * print_status_info() for the nicer output */
+
+ r = sd_bus_message_peek_type(m, &bus_type, &contents);
+ if (r < 0)
+ return r;
+
+ switch (bus_type) {
+
+ case SD_BUS_TYPE_STRUCT:
+
+ if (contents[0] == SD_BUS_TYPE_UINT32 && streq(name, "Job")) {
+ uint32_t u;
+
+ r = sd_bus_message_read(m, "(uo)", &u, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (u > 0)
+ bus_print_property_value(name, expected_value, value, "%"PRIu32, u);
+ else if (all)
+ bus_print_property_value(name, expected_value, value, "%s", "");
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRING && streq(name, "Unit")) {
+ const char *s;
+
+ r = sd_bus_message_read(m, "(so)", &s, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (all || !isempty(s))
+ bus_print_property_value(name, expected_value, value, "%s", s);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRING && streq(name, "LoadError")) {
+ const char *a = NULL, *b = NULL;
+
+ r = sd_bus_message_read(m, "(ss)", &a, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (all || !isempty(a) || !isempty(b))
+ bus_print_property_value(name, expected_value, value, "%s \"%s\"", strempty(a), strempty(b));
+
+ return 1;
+ } else if (streq_ptr(name, "SystemCallFilter")) {
+ _cleanup_strv_free_ char **l = NULL;
+ int whitelist;
+
+ r = sd_bus_message_enter_container(m, 'r', "bas");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(m, "b", &whitelist);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_strv(m, &l);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (all || whitelist || !strv_isempty(l)) {
+ bool first = true;
+ char **i;
+
+ if (!value) {
+ fputs(name, stdout);
+ fputc('=', stdout);
+ }
+
+ if (!whitelist)
+ fputc('~', stdout);
+
+ STRV_FOREACH(i, l) {
+ if (first)
+ first = false;
+ else
+ fputc(' ', stdout);
+
+ fputs(*i, stdout);
+ }
+ fputc('\n', stdout);
+ }
+
+ return 1;
+ }
+
+ break;
+
+ case SD_BUS_TYPE_ARRAY:
+
+ if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "EnvironmentFiles")) {
+ const char *path;
+ int ignore;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sb)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(sb)", &path, &ignore)) > 0)
+ bus_print_property_value(name, expected_value, value, "%s (ignore_errors=%s)", path, yes_no(ignore));
+
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "Paths")) {
+ const char *type, *path;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(ss)", &type, &path)) > 0)
+ bus_print_property_value(name, expected_value, value, "%s (%s)", path, type);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "Listen")) {
+ const char *type, *path;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(ss)", &type, &path)) > 0)
+ bus_print_property_value(name, expected_value, value, "%s (%s)", path, type);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "TimersMonotonic")) {
+ const char *base;
+ uint64_t v, next_elapse;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(stt)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(stt)", &base, &v, &next_elapse)) > 0) {
+ char timespan1[FORMAT_TIMESPAN_MAX], timespan2[FORMAT_TIMESPAN_MAX];
+
+ bus_print_property_value(name, expected_value, value, "{ %s=%s ; next_elapse=%s }", base,
+ format_timespan(timespan1, sizeof(timespan1), v, 0),
+ format_timespan(timespan2, sizeof(timespan2), next_elapse, 0));
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "TimersCalendar")) {
+ const char *base, *spec;
+ uint64_t next_elapse;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sst)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(sst)", &base, &spec, &next_elapse)) > 0) {
+ char timestamp[FORMAT_TIMESTAMP_MAX];
+
+ bus_print_property_value(name, expected_value, value, "{ %s=%s ; next_elapse=%s }", base, spec,
+ format_timestamp(timestamp, sizeof(timestamp), next_elapse));
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && startswith(name, "Exec")) {
+ ExecStatusInfo info = {};
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sasbttttuii)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = exec_status_info_deserialize(m, &info)) > 0) {
+ char timestamp1[FORMAT_TIMESTAMP_MAX], timestamp2[FORMAT_TIMESTAMP_MAX];
+ _cleanup_free_ char *tt;
+
+ tt = strv_join(info.argv, " ");
+
+ bus_print_property_value(name, expected_value, value,
+ "{ path=%s ; argv[]=%s ; ignore_errors=%s ; start_time=[%s] ; stop_time=[%s] ; pid="PID_FMT" ; code=%s ; status=%i%s%s }",
+ strna(info.path),
+ strna(tt),
+ yes_no(info.ignore),
+ strna(format_timestamp(timestamp1, sizeof(timestamp1), info.start_timestamp)),
+ strna(format_timestamp(timestamp2, sizeof(timestamp2), info.exit_timestamp)),
+ info.pid,
+ sigchld_code_to_string(info.code),
+ info.status,
+ info.code == CLD_EXITED ? "" : "/",
+ strempty(info.code == CLD_EXITED ? NULL : signal_to_string(info.status)));
+
+ free(info.path);
+ strv_free(info.argv);
+ zero(info);
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "DeviceAllow")) {
+ const char *path, *rwm;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(ss)", &path, &rwm)) > 0)
+ bus_print_property_value(name, expected_value, value, "%s %s", strna(path), strna(rwm));
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN &&
+ STR_IN_SET(name, "IODeviceWeight", "BlockIODeviceWeight")) {
+ const char *path;
+ uint64_t weight;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(st)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(st)", &path, &weight)) > 0)
+ bus_print_property_value(name, expected_value, value, "%s %"PRIu64, strna(path), weight);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN &&
+ (cgroup_io_limit_type_from_string(name) >= 0 ||
+ STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth"))) {
+ const char *path;
+ uint64_t bandwidth;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(st)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(st)", &path, &bandwidth)) > 0)
+ bus_print_property_value(name, expected_value, value, "%s %"PRIu64, strna(path), bandwidth);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN &&
+ streq(name, "IODeviceLatencyTargetUSec")) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ const char *path;
+ uint64_t target;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(st)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(st)", &path, &target)) > 0)
+ bus_print_property_value(name, expected_value, value, "%s %s", strna(path),
+ format_timespan(ts, sizeof(ts), target, 1));
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_BYTE && streq(name, "StandardInputData")) {
+ _cleanup_free_ char *h = NULL;
+ const void *p;
+ size_t sz;
+ ssize_t n;
+
+ r = sd_bus_message_read_array(m, 'y', &p, &sz);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ n = base64mem(p, sz, &h);
+ if (n < 0)
+ return log_oom();
+
+ bus_print_property_value(name, expected_value, value, "%s", h);
+
+ return 1;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+typedef enum SystemctlShowMode{
+ SYSTEMCTL_SHOW_PROPERTIES,
+ SYSTEMCTL_SHOW_STATUS,
+ SYSTEMCTL_SHOW_HELP,
+ _SYSTEMCTL_SHOW_MODE_MAX,
+ _SYSTEMCTL_SHOW_MODE_INVALID = -1,
+} SystemctlShowMode;
+
+static const char* const systemctl_show_mode_table[_SYSTEMCTL_SHOW_MODE_MAX] = {
+ [SYSTEMCTL_SHOW_PROPERTIES] = "show",
+ [SYSTEMCTL_SHOW_STATUS] = "status",
+ [SYSTEMCTL_SHOW_HELP] = "help",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(systemctl_show_mode, SystemctlShowMode);
+
+static int show_one(
+ sd_bus *bus,
+ const char *path,
+ const char *unit,
+ SystemctlShowMode show_mode,
+ bool *new_line,
+ bool *ellipsized) {
+
+ static const struct bus_properties_map property_map[] = {
+ { "LoadState", "s", NULL, offsetof(UnitStatusInfo, load_state) },
+ { "ActiveState", "s", NULL, offsetof(UnitStatusInfo, active_state) },
+ { "Documentation", "as", NULL, offsetof(UnitStatusInfo, documentation) },
+ {}
+ }, status_map[] = {
+ { "Id", "s", NULL, offsetof(UnitStatusInfo, id) },
+ { "LoadState", "s", NULL, offsetof(UnitStatusInfo, load_state) },
+ { "ActiveState", "s", NULL, offsetof(UnitStatusInfo, active_state) },
+ { "SubState", "s", NULL, offsetof(UnitStatusInfo, sub_state) },
+ { "UnitFileState", "s", NULL, offsetof(UnitStatusInfo, unit_file_state) },
+ { "UnitFilePreset", "s", NULL, offsetof(UnitStatusInfo, unit_file_preset) },
+ { "Description", "s", NULL, offsetof(UnitStatusInfo, description) },
+ { "Following", "s", NULL, offsetof(UnitStatusInfo, following) },
+ { "Documentation", "as", NULL, offsetof(UnitStatusInfo, documentation) },
+ { "FragmentPath", "s", NULL, offsetof(UnitStatusInfo, fragment_path) },
+ { "SourcePath", "s", NULL, offsetof(UnitStatusInfo, source_path) },
+ { "ControlGroup", "s", NULL, offsetof(UnitStatusInfo, control_group) },
+ { "DropInPaths", "as", NULL, offsetof(UnitStatusInfo, dropin_paths) },
+ { "LoadError", "(ss)", map_load_error, offsetof(UnitStatusInfo, load_error) },
+ { "Result", "s", NULL, offsetof(UnitStatusInfo, result) },
+ { "InactiveExitTimestamp", "t", NULL, offsetof(UnitStatusInfo, inactive_exit_timestamp) },
+ { "InactiveExitTimestampMonotonic", "t", NULL, offsetof(UnitStatusInfo, inactive_exit_timestamp_monotonic) },
+ { "ActiveEnterTimestamp", "t", NULL, offsetof(UnitStatusInfo, active_enter_timestamp) },
+ { "ActiveExitTimestamp", "t", NULL, offsetof(UnitStatusInfo, active_exit_timestamp) },
+ { "InactiveEnterTimestamp", "t", NULL, offsetof(UnitStatusInfo, inactive_enter_timestamp) },
+ { "NeedDaemonReload", "b", NULL, offsetof(UnitStatusInfo, need_daemon_reload) },
+ { "Transient", "b", NULL, offsetof(UnitStatusInfo, transient) },
+ { "ExecMainPID", "u", NULL, offsetof(UnitStatusInfo, main_pid) },
+ { "MainPID", "u", map_main_pid, 0 },
+ { "ControlPID", "u", NULL, offsetof(UnitStatusInfo, control_pid) },
+ { "StatusText", "s", NULL, offsetof(UnitStatusInfo, status_text) },
+ { "PIDFile", "s", NULL, offsetof(UnitStatusInfo, pid_file) },
+ { "StatusErrno", "i", NULL, offsetof(UnitStatusInfo, status_errno) },
+ { "ExecMainStartTimestamp", "t", NULL, offsetof(UnitStatusInfo, start_timestamp) },
+ { "ExecMainExitTimestamp", "t", NULL, offsetof(UnitStatusInfo, exit_timestamp) },
+ { "ExecMainCode", "i", NULL, offsetof(UnitStatusInfo, exit_code) },
+ { "ExecMainStatus", "i", NULL, offsetof(UnitStatusInfo, exit_status) },
+ { "ConditionTimestamp", "t", NULL, offsetof(UnitStatusInfo, condition_timestamp) },
+ { "ConditionResult", "b", NULL, offsetof(UnitStatusInfo, condition_result) },
+ { "Conditions", "a(sbbsi)", map_conditions, 0 },
+ { "AssertTimestamp", "t", NULL, offsetof(UnitStatusInfo, assert_timestamp) },
+ { "AssertResult", "b", NULL, offsetof(UnitStatusInfo, assert_result) },
+ { "Asserts", "a(sbbsi)", map_asserts, 0 },
+ { "NextElapseUSecRealtime", "t", NULL, offsetof(UnitStatusInfo, next_elapse_real) },
+ { "NextElapseUSecMonotonic", "t", NULL, offsetof(UnitStatusInfo, next_elapse_monotonic) },
+ { "NAccepted", "u", NULL, offsetof(UnitStatusInfo, n_accepted) },
+ { "NConnections", "u", NULL, offsetof(UnitStatusInfo, n_connections) },
+ { "NRefused", "u", NULL, offsetof(UnitStatusInfo, n_refused) },
+ { "Accept", "b", NULL, offsetof(UnitStatusInfo, accept) },
+ { "Listen", "a(ss)", map_listen, offsetof(UnitStatusInfo, listen) },
+ { "SysFSPath", "s", NULL, offsetof(UnitStatusInfo, sysfs_path) },
+ { "Where", "s", NULL, offsetof(UnitStatusInfo, where) },
+ { "What", "s", NULL, offsetof(UnitStatusInfo, what) },
+ { "MemoryCurrent", "t", NULL, offsetof(UnitStatusInfo, memory_current) },
+ { "MemoryMin", "t", NULL, offsetof(UnitStatusInfo, memory_min) },
+ { "MemoryLow", "t", NULL, offsetof(UnitStatusInfo, memory_low) },
+ { "MemoryHigh", "t", NULL, offsetof(UnitStatusInfo, memory_high) },
+ { "MemoryMax", "t", NULL, offsetof(UnitStatusInfo, memory_max) },
+ { "MemorySwapMax", "t", NULL, offsetof(UnitStatusInfo, memory_swap_max) },
+ { "MemoryLimit", "t", NULL, offsetof(UnitStatusInfo, memory_limit) },
+ { "CPUUsageNSec", "t", NULL, offsetof(UnitStatusInfo, cpu_usage_nsec) },
+ { "TasksCurrent", "t", NULL, offsetof(UnitStatusInfo, tasks_current) },
+ { "TasksMax", "t", NULL, offsetof(UnitStatusInfo, tasks_max) },
+ { "IPIngressBytes", "t", NULL, offsetof(UnitStatusInfo, ip_ingress_bytes) },
+ { "IPEgressBytes", "t", NULL, offsetof(UnitStatusInfo, ip_egress_bytes) },
+ { "ExecStartPre", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecStart", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecStartPost", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecReload", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecStopPre", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecStop", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecStopPost", "a(sasbttttuii)", map_exec, 0 },
+ {}
+ };
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_set_free_ Set *found_properties = NULL;
+ _cleanup_(unit_status_info_free) UnitStatusInfo info = {
+ .memory_current = (uint64_t) -1,
+ .memory_high = CGROUP_LIMIT_MAX,
+ .memory_max = CGROUP_LIMIT_MAX,
+ .memory_swap_max = CGROUP_LIMIT_MAX,
+ .memory_limit = (uint64_t) -1,
+ .cpu_usage_nsec = (uint64_t) -1,
+ .tasks_current = (uint64_t) -1,
+ .tasks_max = (uint64_t) -1,
+ .ip_ingress_bytes = (uint64_t) -1,
+ .ip_egress_bytes = (uint64_t) -1,
+ };
+ char **pp;
+ int r;
+
+ assert(path);
+ assert(new_line);
+
+ log_debug("Showing one %s", path);
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ show_mode == SYSTEMCTL_SHOW_STATUS ? status_map : property_map,
+ BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ &reply,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get properties: %s", bus_error_message(&error, r));
+
+ if (unit && streq_ptr(info.load_state, "not-found") && streq_ptr(info.active_state, "inactive")) {
+ log_full(show_mode == SYSTEMCTL_SHOW_STATUS ? LOG_ERR : LOG_DEBUG,
+ "Unit %s could not be found.", unit);
+
+ if (show_mode == SYSTEMCTL_SHOW_STATUS)
+ return EXIT_PROGRAM_OR_SERVICES_STATUS_UNKNOWN;
+ else if (show_mode == SYSTEMCTL_SHOW_HELP)
+ return -ENOENT;
+ }
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ if (show_mode == SYSTEMCTL_SHOW_STATUS) {
+ print_status_info(bus, &info, ellipsized);
+
+ if (info.active_state && !STR_IN_SET(info.active_state, "active", "reloading"))
+ return EXIT_PROGRAM_NOT_RUNNING;
+
+ return EXIT_PROGRAM_RUNNING_OR_SERVICE_OK;
+
+ } else if (show_mode == SYSTEMCTL_SHOW_HELP) {
+ show_unit_help(&info);
+ return 0;
+ }
+
+ r = sd_bus_message_rewind(reply, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to rewind: %s", bus_error_message(&error, r));
+
+ r = bus_message_print_all_properties(reply, print_property, arg_properties, arg_value, arg_all, &found_properties);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ STRV_FOREACH(pp, arg_properties)
+ if (!set_contains(found_properties, *pp))
+ log_debug("Property %s does not exist.", *pp);
+
+ return 0;
+}
+
+static int get_unit_dbus_path_by_pid(
+ sd_bus *bus,
+ uint32_t pid,
+ char **unit) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ char *u;
+ int r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "GetUnitByPID",
+ &error,
+ &reply,
+ "u", pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit for PID %"PRIu32": %s", pid, bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &u);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ u = strdup(u);
+ if (!u)
+ return log_oom();
+
+ *unit = u;
+ return 0;
+}
+
+static int show_all(
+ sd_bus *bus,
+ bool *new_line,
+ bool *ellipsized) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ UnitInfo *unit_infos = NULL;
+ const UnitInfo *u;
+ unsigned c;
+ int r, ret = 0;
+
+ r = get_unit_list(bus, NULL, NULL, &unit_infos, 0, &reply);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ c = (unsigned) r;
+
+ typesafe_qsort(unit_infos, c, compare_unit_info);
+
+ for (u = unit_infos; u < unit_infos + c; u++) {
+ _cleanup_free_ char *p = NULL;
+
+ p = unit_dbus_path_from_name(u->id);
+ if (!p)
+ return log_oom();
+
+ r = show_one(bus, p, u->id, SYSTEMCTL_SHOW_STATUS, new_line, ellipsized);
+ if (r < 0)
+ return r;
+ else if (r > 0 && ret == 0)
+ ret = r;
+ }
+
+ return ret;
+}
+
+static int show_system_status(sd_bus *bus) {
+ char since1[FORMAT_TIMESTAMP_RELATIVE_MAX], since2[FORMAT_TIMESTAMP_MAX];
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(machine_info_clear) struct machine_info mi = {};
+ _cleanup_free_ char *hn = NULL;
+ const char *on, *off;
+ int r;
+
+ hn = gethostname_malloc();
+ if (!hn)
+ return log_oom();
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ machine_info_property_map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ &mi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read server status: %s", bus_error_message(&error, r));
+
+ if (streq_ptr(mi.state, "degraded")) {
+ on = ansi_highlight_red();
+ off = ansi_normal();
+ } else if (streq_ptr(mi.state, "running")) {
+ on = ansi_highlight_green();
+ off = ansi_normal();
+ } else {
+ on = ansi_highlight_yellow();
+ off = ansi_normal();
+ }
+
+ printf("%s%s%s %s\n", on, special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE), off, arg_host ? arg_host : hn);
+
+ printf(" State: %s%s%s\n",
+ on, strna(mi.state), off);
+
+ printf(" Jobs: %" PRIu32 " queued\n", mi.n_jobs);
+ printf(" Failed: %" PRIu32 " units\n", mi.n_failed_units);
+
+ printf(" Since: %s; %s\n",
+ format_timestamp(since2, sizeof(since2), mi.timestamp),
+ format_timestamp_relative(since1, sizeof(since1), mi.timestamp));
+
+ printf(" CGroup: %s\n", mi.control_group ?: "/");
+ if (IN_SET(arg_transport,
+ BUS_TRANSPORT_LOCAL,
+ BUS_TRANSPORT_MACHINE)) {
+ static const char prefix[] = " ";
+ unsigned c;
+
+ c = columns();
+ if (c > sizeof(prefix) - 1)
+ c -= sizeof(prefix) - 1;
+ else
+ c = 0;
+
+ show_cgroup(SYSTEMD_CGROUP_CONTROLLER, strempty(mi.control_group), prefix, c, get_output_flags());
+ }
+
+ return 0;
+}
+
+static int show(int argc, char *argv[], void *userdata) {
+ bool new_line = false, ellipsized = false;
+ SystemctlShowMode show_mode;
+ int r, ret = 0;
+ sd_bus *bus;
+
+ assert(argv);
+
+ show_mode = systemctl_show_mode_from_string(argv[0]);
+ if (show_mode < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid argument.");
+
+ if (show_mode == SYSTEMCTL_SHOW_HELP && argc <= 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This command expects one or more unit names. Did you mean --help?");
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ /* If no argument is specified inspect the manager itself */
+ if (show_mode == SYSTEMCTL_SHOW_PROPERTIES && argc <= 1)
+ return show_one(bus, "/org/freedesktop/systemd1", NULL, show_mode, &new_line, &ellipsized);
+
+ if (show_mode == SYSTEMCTL_SHOW_STATUS && argc <= 1) {
+
+ show_system_status(bus);
+ new_line = true;
+
+ if (arg_all)
+ ret = show_all(bus, &new_line, &ellipsized);
+ } else {
+ _cleanup_free_ char **patterns = NULL;
+ char **name;
+
+ STRV_FOREACH(name, strv_skip(argv, 1)) {
+ _cleanup_free_ char *path = NULL, *unit = NULL;
+ uint32_t id;
+
+ if (safe_atou32(*name, &id) < 0) {
+ if (strv_push(&patterns, *name) < 0)
+ return log_oom();
+
+ continue;
+ } else if (show_mode == SYSTEMCTL_SHOW_PROPERTIES) {
+ /* Interpret as job id */
+ if (asprintf(&path, "/org/freedesktop/systemd1/job/%u", id) < 0)
+ return log_oom();
+
+ } else {
+ /* Interpret as PID */
+ r = get_unit_dbus_path_by_pid(bus, id, &path);
+ if (r < 0) {
+ ret = r;
+ continue;
+ }
+
+ r = unit_name_from_dbus_path(path, &unit);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = show_one(bus, path, unit, show_mode, &new_line, &ellipsized);
+ if (r < 0)
+ return r;
+ else if (r > 0 && ret == 0)
+ ret = r;
+ }
+
+ if (!strv_isempty(patterns)) {
+ _cleanup_strv_free_ char **names = NULL;
+
+ r = expand_names(bus, patterns, NULL, &names);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ STRV_FOREACH(name, names) {
+ _cleanup_free_ char *path;
+
+ path = unit_dbus_path_from_name(*name);
+ if (!path)
+ return log_oom();
+
+ r = show_one(bus, path, *name, show_mode, &new_line, &ellipsized);
+ if (r < 0)
+ return r;
+ if (r > 0 && ret == 0)
+ ret = r;
+ }
+ }
+ }
+
+ if (ellipsized && !arg_quiet)
+ printf("Hint: Some lines were ellipsized, use -l to show in full.\n");
+
+ return ret;
+}
+
+static int cat(int argc, char *argv[], void *userdata) {
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+ _cleanup_strv_free_ char **names = NULL;
+ char **name;
+ sd_bus *bus;
+ bool first = true;
+ int r;
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL) {
+ log_error("Cannot remotely cat units.");
+ return -EINVAL;
+ }
+
+ r = lookup_paths_init(&lp, arg_scope, 0, arg_root);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine unit paths: %m");
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = expand_names(bus, strv_skip(argv, 1), NULL, &names);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ (void) pager_open(arg_pager_flags);
+
+ STRV_FOREACH(name, names) {
+ _cleanup_free_ char *fragment_path = NULL;
+ _cleanup_strv_free_ char **dropin_paths = NULL;
+
+ r = unit_find_paths(bus, *name, &lp, false, &fragment_path, &dropin_paths);
+ if (r == -ERFKILL) {
+ printf("%s# Unit %s is masked%s.\n",
+ ansi_highlight_magenta(),
+ *name,
+ ansi_normal());
+ continue;
+ }
+ if (r == -EKEYREJECTED) {
+ printf("%s# Unit %s could not be loaded.%s\n",
+ ansi_highlight_magenta(),
+ *name,
+ ansi_normal());
+ continue;
+ }
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOENT;
+
+ if (first)
+ first = false;
+ else
+ puts("");
+
+ if (need_daemon_reload(bus, *name) > 0) /* ignore errors (<0), this is informational output */
+ fprintf(stderr,
+ "%s# Warning: %s changed on disk, the version systemd has loaded is outdated.\n"
+ "%s# This output shows the current version of the unit's original fragment and drop-in files.\n"
+ "%s# If fragments or drop-ins were added or removed, they are not properly reflected in this output.\n"
+ "%s# Run 'systemctl%s daemon-reload' to reload units.%s\n",
+ ansi_highlight_red(),
+ *name,
+ ansi_highlight_red(),
+ ansi_highlight_red(),
+ ansi_highlight_red(),
+ arg_scope == UNIT_FILE_SYSTEM ? "" : " --user",
+ ansi_normal());
+
+ r = cat_files(fragment_path, dropin_paths, 0);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int set_property(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *n = NULL;
+ UnitType t;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "SetUnitProperties");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = unit_name_mangle(argv[1], arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ t = unit_name_to_type(n);
+ if (t < 0) {
+ log_error("Invalid unit type: %s", n);
+ return -EINVAL;
+ }
+
+ r = sd_bus_message_append(m, "sb", n, arg_runtime);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_append_unit_property_assignment_many(m, t, strv_skip(argv, 2));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set unit properties on %s: %s", n, bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int daemon_reload(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ const char *method;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ switch (arg_action) {
+
+ case ACTION_RELOAD:
+ method = "Reload";
+ break;
+
+ case ACTION_REEXEC:
+ method = "Reexecute";
+ break;
+
+ case ACTION_SYSTEMCTL:
+ method = streq(argv[0], "daemon-reexec") ? "Reexecute" :
+ /* "daemon-reload" */ "Reload";
+ break;
+
+ default:
+ assert_not_reached("Unexpected action");
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ method);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Note we use an extra-long timeout here. This is because a reload or reexec means generators are rerun which
+ * are timed out after DEFAULT_TIMEOUT_USEC. Let's use twice that time here, so that the generators can have
+ * their timeout, and for everything else there's the same time budget in place. */
+
+ r = sd_bus_call(bus, m, DEFAULT_TIMEOUT_USEC * 2, &error, NULL);
+
+ /* On reexecution, we expect a disconnect, not a reply */
+ if (IN_SET(r, -ETIMEDOUT, -ECONNRESET) && streq(method, "Reexecute"))
+ r = 0;
+
+ if (r < 0 && arg_action == ACTION_SYSTEMCTL)
+ return log_error_errno(r, "Failed to reload daemon: %s", bus_error_message(&error, r));
+
+ /* Note that for the legacy commands (i.e. those with action != ACTION_SYSTEMCTL) we support fallbacks to the
+ * old ways of doing things, hence don't log any error in that case here. */
+
+ return r < 0 ? r : 0;
+}
+
+static int trivial_method(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *method;
+ sd_bus *bus;
+ int r;
+
+ if (arg_dry_run)
+ return 0;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ method =
+ streq(argv[0], "clear-jobs") ||
+ streq(argv[0], "cancel") ? "ClearJobs" :
+ streq(argv[0], "reset-failed") ? "ResetFailed" :
+ streq(argv[0], "halt") ? "Halt" :
+ streq(argv[0], "reboot") ? "Reboot" :
+ streq(argv[0], "kexec") ? "KExec" :
+ streq(argv[0], "exit") ? "Exit" :
+ /* poweroff */ "PowerOff";
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ method,
+ &error,
+ NULL,
+ NULL);
+ if (r < 0 && arg_action == ACTION_SYSTEMCTL)
+ return log_error_errno(r, "Failed to execute operation: %s", bus_error_message(&error, r));
+
+ /* Note that for the legacy commands (i.e. those with action != ACTION_SYSTEMCTL) we support fallbacks to the
+ * old ways of doing things, hence don't log any error in that case here. */
+
+ return r < 0 ? r : 0;
+}
+
+static int reset_failed(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **names = NULL;
+ sd_bus *bus;
+ char **name;
+ int r, q;
+
+ if (argc <= 1)
+ return trivial_method(argc, argv, userdata);
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ r = expand_names(bus, strv_skip(argv, 1), NULL, &names);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ STRV_FOREACH(name, names) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ q = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ResetFailedUnit",
+ &error,
+ NULL,
+ "s", *name);
+ if (q < 0) {
+ log_error_errno(q, "Failed to reset failed state of unit %s: %s", *name, bus_error_message(&error, q));
+ if (r == 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
+
+static int print_variable(const char *s) {
+ const char *sep;
+ _cleanup_free_ char *esc = NULL;
+
+ sep = strchr(s, '=');
+ if (!sep)
+ return log_error_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "Invalid environment block");
+
+ esc = shell_maybe_quote(sep + 1, ESCAPE_POSIX);
+ if (!esc)
+ return log_oom();
+
+ printf("%.*s=%s\n", (int)(sep-s), s, esc);
+ return 0;
+}
+
+static int show_environment(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *text;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_get_property(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Environment",
+ &error,
+ &reply,
+ "as");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get environment: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "s");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read_basic(reply, SD_BUS_TYPE_STRING, &text)) > 0) {
+ r = print_variable(text);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+static int switch_root(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *cmdline_init = NULL;
+ const char *root, *init;
+ sd_bus *bus;
+ int r;
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL) {
+ log_error("Cannot switch root remotely.");
+ return -EINVAL;
+ }
+
+ if (argc < 2 || argc > 3) {
+ log_error("Wrong number of arguments.");
+ return -EINVAL;
+ }
+
+ root = argv[1];
+
+ if (argc >= 3)
+ init = argv[2];
+ else {
+ r = proc_cmdline_get_key("init", 0, &cmdline_init);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse /proc/cmdline: %m");
+
+ init = cmdline_init;
+ }
+
+ init = empty_to_null(init);
+ if (init) {
+ const char *root_systemd_path = NULL, *root_init_path = NULL;
+
+ root_systemd_path = strjoina(root, "/" SYSTEMD_BINARY_PATH);
+ root_init_path = strjoina(root, "/", init);
+
+ /* If the passed init is actually the same as the
+ * systemd binary, then let's suppress it. */
+ if (files_same(root_init_path, root_systemd_path, 0) > 0)
+ init = NULL;
+ }
+
+ /* Instruct PID1 to exclude us from its killing spree applied during
+ * the transition. Otherwise we would exit with a failure status even
+ * though the switch to the new root has succeed. */
+ argv_cmdline[0] = '@';
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ /* If we are slow to exit after the root switch, the new systemd instance
+ * will send us a signal to terminate. Just ignore it and exit normally.
+ * This way the unit does not end up as failed.
+ */
+ r = ignore_signals(SIGTERM, -1);
+ if (r < 0)
+ log_warning_errno(r, "Failed to change disposition of SIGTERM to ignore: %m");
+
+ log_debug("Switching root - root: %s; init: %s", root, strna(init));
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "SwitchRoot",
+ &error,
+ NULL,
+ "ss", root, init);
+ if (r < 0) {
+ (void) default_signals(SIGTERM, -1);
+
+ return log_error_errno(r, "Failed to switch root: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int set_environment(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ const char *method;
+ sd_bus *bus;
+ int r;
+
+ assert(argc > 1);
+ assert(argv);
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ method = streq(argv[0], "set-environment")
+ ? "SetEnvironment"
+ : "UnsetEnvironment";
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ method);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, strv_skip(argv, 1));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set environment: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int import_environment(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "SetEnvironment");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (argc < 2)
+ r = sd_bus_message_append_strv(m, environ);
+ else {
+ char **a, **b;
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ STRV_FOREACH(a, strv_skip(argv, 1)) {
+
+ if (!env_name_is_valid(*a)) {
+ log_error("Not a valid environment variable name: %s", *a);
+ return -EINVAL;
+ }
+
+ STRV_FOREACH(b, environ) {
+ const char *eq;
+
+ eq = startswith(*b, *a);
+ if (eq && *eq == '=') {
+
+ r = sd_bus_message_append(m, "s", *b);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ break;
+ }
+ }
+ }
+
+ r = sd_bus_message_close_container(m);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to import environment: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int enable_sysv_units(const char *verb, char **args) {
+ int r = 0;
+
+#if HAVE_SYSV_COMPAT
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ unsigned f = 0;
+
+ /* Processes all SysV units, and reshuffles the array so that afterwards only the native units remain */
+
+ if (arg_scope != UNIT_FILE_SYSTEM)
+ return 0;
+
+ if (getenv_bool("SYSTEMCTL_SKIP_SYSV") > 0)
+ return 0;
+
+ if (!STR_IN_SET(verb,
+ "enable",
+ "disable",
+ "is-enabled"))
+ return 0;
+
+ r = lookup_paths_init(&paths, arg_scope, LOOKUP_PATHS_EXCLUDE_GENERATED, arg_root);
+ if (r < 0)
+ return r;
+
+ r = 0;
+ while (args[f]) {
+
+ const char *argv[] = {
+ ROOTLIBEXECDIR "/systemd-sysv-install",
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ };
+
+ _cleanup_free_ char *p = NULL, *q = NULL, *l = NULL;
+ bool found_native = false, found_sysv;
+ const char *name;
+ unsigned c = 1;
+ pid_t pid;
+ int j;
+
+ name = args[f++];
+
+ if (!endswith(name, ".service"))
+ continue;
+
+ if (path_is_absolute(name))
+ continue;
+
+ j = unit_file_exists(arg_scope, &paths, name);
+ if (j < 0 && !IN_SET(j, -ELOOP, -ERFKILL, -EADDRNOTAVAIL))
+ return log_error_errno(j, "Failed to lookup unit file state: %m");
+ found_native = j != 0;
+
+ /* If we have both a native unit and a SysV script, enable/disable them both (below); for is-enabled,
+ * prefer the native unit */
+ if (found_native && streq(verb, "is-enabled"))
+ continue;
+
+ p = path_join(arg_root, SYSTEM_SYSVINIT_PATH, name);
+ if (!p)
+ return log_oom();
+
+ p[strlen(p) - STRLEN(".service")] = 0;
+ found_sysv = access(p, F_OK) >= 0;
+ if (!found_sysv)
+ continue;
+
+ if (!arg_quiet) {
+ if (found_native)
+ log_info("Synchronizing state of %s with SysV service script with %s.", name, argv[0]);
+ else
+ log_info("%s is not a native service, redirecting to systemd-sysv-install.", name);
+ }
+
+ if (!isempty(arg_root))
+ argv[c++] = q = strappend("--root=", arg_root);
+
+ argv[c++] = verb;
+ argv[c++] = basename(p);
+ argv[c] = NULL;
+
+ l = strv_join((char**)argv, " ");
+ if (!l)
+ return log_oom();
+
+ if (!arg_quiet)
+ log_info("Executing: %s", l);
+
+ j = safe_fork("(sysv-install)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (j < 0)
+ return j;
+ if (j == 0) {
+ /* Child */
+ execv(argv[0], (char**) argv);
+ log_error_errno(errno, "Failed to execute %s: %m", argv[0]);
+ _exit(EXIT_FAILURE);
+ }
+
+ j = wait_for_terminate_and_check("sysv-install", pid, WAIT_LOG_ABNORMAL);
+ if (j < 0)
+ return j;
+ if (streq(verb, "is-enabled")) {
+ if (j == EXIT_SUCCESS) {
+ if (!arg_quiet)
+ puts("enabled");
+ r = 1;
+ } else {
+ if (!arg_quiet)
+ puts("disabled");
+ }
+
+ } else if (j != EXIT_SUCCESS)
+ return -EBADE; /* We don't warn here, under the assumption the script already showed an explanation */
+
+ if (found_native)
+ continue;
+
+ /* Remove this entry, so that we don't try enabling it as native unit */
+ assert(f > 0);
+ f--;
+ assert(args[f] == name);
+ strv_remove(args, name);
+ }
+
+#endif
+ return r;
+}
+
+static int mangle_names(char **original_names, char ***mangled_names) {
+ char **i, **l, **name;
+ int r;
+
+ l = i = new(char*, strv_length(original_names) + 1);
+ if (!l)
+ return log_oom();
+
+ STRV_FOREACH(name, original_names) {
+
+ /* When enabling units qualified path names are OK,
+ * too, hence allow them explicitly. */
+
+ if (is_path(*name)) {
+ *i = strdup(*name);
+ if (!*i) {
+ strv_free(l);
+ return log_oom();
+ }
+ } else {
+ r = unit_name_mangle(*name, arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, i);
+ if (r < 0) {
+ *i = NULL;
+ strv_free(l);
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+ }
+ }
+
+ i++;
+ }
+
+ *i = NULL;
+ *mangled_names = l;
+
+ return 0;
+}
+
+static int normalize_filenames(char **names) {
+ char **u;
+ int r;
+
+ STRV_FOREACH(u, names)
+ if (!path_is_absolute(*u)) {
+ char* normalized_path;
+
+ if (!isempty(arg_root))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Non-absolute paths are not allowed when --root is used: %s",
+ *u);
+
+ if (!strchr(*u,'/'))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Link argument does contain at least one directory separator: %s",
+ *u);
+
+ r = path_make_absolute_cwd(*u, &normalized_path);
+ if (r < 0)
+ return r;
+
+ free_and_replace(*u, normalized_path);
+ }
+
+ return 0;
+}
+
+static int normalize_names(char **names, bool warn_if_path) {
+ char **u;
+ bool was_path = false;
+
+ STRV_FOREACH(u, names) {
+ int r;
+
+ if (!is_path(*u))
+ continue;
+
+ r = free_and_strdup(u, basename(*u));
+ if (r < 0)
+ return log_error_errno(r, "Failed to normalize unit file path: %m");
+
+ was_path = true;
+ }
+
+ if (warn_if_path && was_path)
+ log_warning("Warning: Can't execute disable on the unit file path. Proceeding with the unit name.");
+
+ return 0;
+}
+
+static int unit_exists(LookupPaths *lp, const char *unit) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *path = NULL;
+ static const struct bus_properties_map property_map[] = {
+ { "LoadState", "s", NULL, offsetof(UnitStatusInfo, load_state) },
+ { "ActiveState", "s", NULL, offsetof(UnitStatusInfo, active_state)},
+ {},
+ };
+ UnitStatusInfo info = {};
+ sd_bus *bus;
+ int r;
+
+ if (unit_name_is_valid(unit, UNIT_NAME_TEMPLATE))
+ return unit_find_template_path(unit, lp, NULL, NULL);
+
+ path = unit_dbus_path_from_name(unit);
+ if (!path)
+ return log_oom();
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_map_all_properties(bus, "org.freedesktop.systemd1", path, property_map, 0, &error, &m, &info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get properties: %s", bus_error_message(&error, r));
+
+ return !streq_ptr(info.load_state, "not-found") || !streq_ptr(info.active_state, "inactive");
+}
+
+static int enable_unit(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **names = NULL;
+ const char *verb = argv[0];
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ int carries_install_info = -1;
+ bool ignore_carries_install_info = arg_quiet;
+ int r;
+
+ if (!argv[1])
+ return 0;
+
+ r = mangle_names(strv_skip(argv, 1), &names);
+ if (r < 0)
+ return r;
+
+ r = enable_sysv_units(verb, names);
+ if (r < 0)
+ return r;
+
+ /* If the operation was fully executed by the SysV compat, let's finish early */
+ if (strv_isempty(names)) {
+ if (arg_no_reload || install_client_side())
+ return 0;
+ return daemon_reload(argc, argv, userdata);
+ }
+
+ if (streq(verb, "disable")) {
+ r = normalize_names(names, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (streq(verb, "link")) {
+ r = normalize_filenames(names);
+ if (r < 0)
+ return r;
+ }
+
+ if (install_client_side()) {
+ UnitFileFlags flags;
+
+ flags = args_to_flags();
+ if (streq(verb, "enable")) {
+ r = unit_file_enable(arg_scope, flags, arg_root, names, &changes, &n_changes);
+ carries_install_info = r;
+ } else if (streq(verb, "disable"))
+ r = unit_file_disable(arg_scope, flags, arg_root, names, &changes, &n_changes);
+ else if (streq(verb, "reenable")) {
+ r = unit_file_reenable(arg_scope, flags, arg_root, names, &changes, &n_changes);
+ carries_install_info = r;
+ } else if (streq(verb, "link"))
+ r = unit_file_link(arg_scope, flags, arg_root, names, &changes, &n_changes);
+ else if (streq(verb, "preset")) {
+ r = unit_file_preset(arg_scope, flags, arg_root, names, arg_preset_mode, &changes, &n_changes);
+ } else if (streq(verb, "mask"))
+ r = unit_file_mask(arg_scope, flags, arg_root, names, &changes, &n_changes);
+ else if (streq(verb, "unmask"))
+ r = unit_file_unmask(arg_scope, flags, arg_root, names, &changes, &n_changes);
+ else if (streq(verb, "revert"))
+ r = unit_file_revert(arg_scope, arg_root, names, &changes, &n_changes);
+ else
+ assert_not_reached("Unknown verb");
+
+ unit_file_dump_changes(r, verb, changes, n_changes, arg_quiet);
+ if (r < 0)
+ goto finish;
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool expect_carries_install_info = false;
+ bool send_runtime = true, send_force = true, send_preset_mode = false;
+ const char *method;
+ sd_bus *bus;
+
+ if (STR_IN_SET(verb, "mask", "unmask")) {
+ char **name;
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+
+ r = lookup_paths_init(&lp, arg_scope, 0, arg_root);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(name, names) {
+ r = unit_exists(&lp, *name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ log_notice("Unit %s does not exist, proceeding anyway.", *name);
+ }
+ }
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ if (streq(verb, "enable")) {
+ method = "EnableUnitFiles";
+ expect_carries_install_info = true;
+ } else if (streq(verb, "disable")) {
+ method = "DisableUnitFiles";
+ send_force = false;
+ } else if (streq(verb, "reenable")) {
+ method = "ReenableUnitFiles";
+ expect_carries_install_info = true;
+ } else if (streq(verb, "link"))
+ method = "LinkUnitFiles";
+ else if (streq(verb, "preset")) {
+
+ if (arg_preset_mode != UNIT_FILE_PRESET_FULL) {
+ method = "PresetUnitFilesWithMode";
+ send_preset_mode = true;
+ } else
+ method = "PresetUnitFiles";
+
+ expect_carries_install_info = true;
+ ignore_carries_install_info = true;
+ } else if (streq(verb, "mask"))
+ method = "MaskUnitFiles";
+ else if (streq(verb, "unmask")) {
+ method = "UnmaskUnitFiles";
+ send_force = false;
+ } else if (streq(verb, "revert")) {
+ method = "RevertUnitFiles";
+ send_runtime = send_force = false;
+ } else
+ assert_not_reached("Unknown verb");
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ method);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, names);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (send_preset_mode) {
+ r = sd_bus_message_append(m, "s", unit_file_preset_mode_to_string(arg_preset_mode));
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (send_runtime) {
+ r = sd_bus_message_append(m, "b", arg_runtime);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (send_force) {
+ r = sd_bus_message_append(m, "b", arg_force);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to %s unit: %s", verb, bus_error_message(&error, r));
+
+ if (expect_carries_install_info) {
+ r = sd_bus_message_read(reply, "b", &carries_install_info);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ r = bus_deserialize_and_dump_unit_file_changes(reply, arg_quiet, &changes, &n_changes);
+ if (r < 0)
+ goto finish;
+
+ /* Try to reload if enabled */
+ if (!arg_no_reload)
+ r = daemon_reload(argc, argv, userdata);
+ else
+ r = 0;
+ }
+
+ if (carries_install_info == 0 && !ignore_carries_install_info)
+ log_notice("The unit files have no installation config (WantedBy=, RequiredBy=, Also=,\n"
+ "Alias= settings in the [Install] section, and DefaultInstance= for template\n"
+ "units). This means they are not meant to be enabled using systemctl.\n"
+ " \n" /* trick: the space is needed so that the line does not get stripped from output */
+ "Possible reasons for having this kind of units are:\n"
+ "%1$s A unit may be statically enabled by being symlinked from another unit's\n"
+ " .wants/ or .requires/ directory.\n"
+ "%1$s A unit's purpose may be to act as a helper for some other unit which has\n"
+ " a requirement dependency on it.\n"
+ "%1$s A unit may be started when needed via activation (socket, path, timer,\n"
+ " D-Bus, udev, scripted systemctl call, ...).\n"
+ "%1$s In case of template units, the unit is meant to be enabled with some\n"
+ " instance name specified.",
+ special_glyph(SPECIAL_GLYPH_BULLET));
+
+ if (arg_now && STR_IN_SET(argv[0], "enable", "disable", "mask")) {
+ sd_bus *bus;
+ size_t len, i;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ goto finish;
+
+ len = strv_length(names);
+ {
+ char *new_args[len + 2];
+
+ new_args[0] = (char*) (streq(argv[0], "enable") ? "start" : "stop");
+ for (i = 0; i < len; i++)
+ new_args[i + 1] = basename(names[i]);
+ new_args[i + 1] = NULL;
+
+ r = start_unit(len + 1, new_args, userdata);
+ }
+ }
+
+finish:
+ unit_file_changes_free(changes, n_changes);
+
+ return r;
+}
+
+static int add_dependency(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **names = NULL;
+ _cleanup_free_ char *target = NULL;
+ const char *verb = argv[0];
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ UnitDependency dep;
+ int r = 0;
+
+ if (!argv[1])
+ return 0;
+
+ r = unit_name_mangle_with_suffix(argv[1], arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, ".target", &target);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ r = mangle_names(strv_skip(argv, 2), &names);
+ if (r < 0)
+ return r;
+
+ if (streq(verb, "add-wants"))
+ dep = UNIT_WANTS;
+ else if (streq(verb, "add-requires"))
+ dep = UNIT_REQUIRES;
+ else
+ assert_not_reached("Unknown verb");
+
+ if (install_client_side()) {
+ r = unit_file_add_dependency(arg_scope, args_to_flags(), arg_root, names, target, dep, &changes, &n_changes);
+ unit_file_dump_changes(r, "add dependency on", changes, n_changes, arg_quiet);
+
+ if (r > 0)
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "AddDependencyUnitFiles");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, names);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "ssbb", target, unit_dependency_to_string(dep), arg_runtime, arg_force);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add dependency: %s", bus_error_message(&error, r));
+
+ r = bus_deserialize_and_dump_unit_file_changes(reply, arg_quiet, &changes, &n_changes);
+ if (r < 0)
+ goto finish;
+
+ if (arg_no_reload) {
+ r = 0;
+ goto finish;
+ }
+
+ r = daemon_reload(argc, argv, userdata);
+ }
+
+finish:
+ unit_file_changes_free(changes, n_changes);
+
+ return r;
+}
+
+static int preset_all(int argc, char *argv[], void *userdata) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ int r;
+
+ if (install_client_side()) {
+ r = unit_file_preset_all(arg_scope, args_to_flags(), arg_root, arg_preset_mode, &changes, &n_changes);
+ unit_file_dump_changes(r, "preset", changes, n_changes, arg_quiet);
+
+ if (r > 0)
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ sd_bus *bus;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "PresetAllUnitFiles",
+ &error,
+ &reply,
+ "sbb",
+ unit_file_preset_mode_to_string(arg_preset_mode),
+ arg_runtime,
+ arg_force);
+ if (r < 0)
+ return log_error_errno(r, "Failed to preset all units: %s", bus_error_message(&error, r));
+
+ r = bus_deserialize_and_dump_unit_file_changes(reply, arg_quiet, &changes, &n_changes);
+ if (r < 0)
+ goto finish;
+
+ if (arg_no_reload) {
+ r = 0;
+ goto finish;
+ }
+
+ r = daemon_reload(argc, argv, userdata);
+ }
+
+finish:
+ unit_file_changes_free(changes, n_changes);
+
+ return r;
+}
+
+static int show_installation_targets_client_side(const char *name) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0, i;
+ UnitFileFlags flags;
+ char **p;
+ int r;
+
+ p = STRV_MAKE(name);
+ flags = UNIT_FILE_DRY_RUN |
+ (arg_runtime ? UNIT_FILE_RUNTIME : 0);
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, flags, NULL, p, &changes, &n_changes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file links for %s: %m", name);
+
+ for (i = 0; i < n_changes; i++)
+ if (changes[i].type == UNIT_FILE_UNLINK)
+ printf(" %s\n", changes[i].path);
+
+ return 0;
+}
+
+static int show_installation_targets(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *link;
+ int r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "GetUnitFileLinks",
+ &error,
+ &reply,
+ "sb", name, arg_runtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit file links for %s: %s", name, bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "s");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "s", &link)) > 0)
+ printf(" %s\n", link);
+
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+static int unit_is_enabled(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **names = NULL;
+ bool enabled;
+ char **name;
+ int r;
+
+ r = mangle_names(strv_skip(argv, 1), &names);
+ if (r < 0)
+ return r;
+
+ r = enable_sysv_units(argv[0], names);
+ if (r < 0)
+ return r;
+
+ enabled = r > 0;
+
+ if (install_client_side()) {
+ STRV_FOREACH(name, names) {
+ UnitFileState state;
+
+ r = unit_file_get_state(arg_scope, arg_root, *name, &state);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit file state for %s: %m", *name);
+
+ if (IN_SET(state,
+ UNIT_FILE_ENABLED,
+ UNIT_FILE_ENABLED_RUNTIME,
+ UNIT_FILE_STATIC,
+ UNIT_FILE_INDIRECT,
+ UNIT_FILE_GENERATED))
+ enabled = true;
+
+ if (!arg_quiet) {
+ puts(unit_file_state_to_string(state));
+ if (arg_full) {
+ r = show_installation_targets_client_side(*name);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(name, names) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *s;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "GetUnitFileState",
+ &error,
+ &reply,
+ "s", *name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit file state for %s: %s", *name, bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &s);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (STR_IN_SET(s, "enabled", "enabled-runtime", "static", "indirect", "generated"))
+ enabled = true;
+
+ if (!arg_quiet) {
+ puts(s);
+ if (arg_full) {
+ r = show_installation_targets(bus, *name);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+ }
+
+ return enabled ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+static int match_startup_finished(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ char **state = userdata;
+ int r;
+
+ assert(state);
+
+ r = sd_bus_get_property_string(
+ sd_bus_message_get_bus(m),
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "SystemState",
+ NULL,
+ state);
+
+ sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), r);
+ return 0;
+}
+
+static int is_system_running(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *slot_startup_finished = NULL;
+ _cleanup_(sd_event_unrefp) sd_event* event = NULL;
+ _cleanup_free_ char *state = NULL;
+ sd_bus *bus;
+ int r;
+
+ if (running_in_chroot() > 0 || (arg_transport == BUS_TRANSPORT_LOCAL && !sd_booted())) {
+ if (!arg_quiet)
+ puts("offline");
+ return EXIT_FAILURE;
+ }
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ if (arg_wait) {
+ r = sd_event_default(&event);
+ if (r >= 0)
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r >= 0)
+ r = sd_bus_match_signal_async(
+ bus,
+ &slot_startup_finished,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartupFinished",
+ match_startup_finished, NULL, &state);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to request match for StartupFinished: %m");
+ arg_wait = false;
+ }
+ }
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "SystemState",
+ &error,
+ &state);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to query system state: %s", bus_error_message(&error, r));
+
+ if (!arg_quiet)
+ puts("unknown");
+ return EXIT_FAILURE;
+ }
+
+ if (arg_wait && STR_IN_SET(state, "initializing", "starting")) {
+ r = sd_event_loop(event);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to get property from event loop: %m");
+ if (!arg_quiet)
+ puts("unknown");
+ return EXIT_FAILURE;
+ }
+ }
+
+ if (!arg_quiet)
+ puts(state);
+
+ return streq(state, "running") ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+static int create_edit_temp_file(const char *new_path, const char *original_path, char **ret_tmp_fn) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(new_path);
+ assert(original_path);
+ assert(ret_tmp_fn);
+
+ r = tempfn_random(new_path, NULL, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine temporary filename for \"%s\": %m", new_path);
+
+ r = mkdir_parents(new_path, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create directories for \"%s\": %m", new_path);
+
+ r = copy_file(original_path, t, 0, 0644, 0, COPY_REFLINK);
+ if (r == -ENOENT) {
+
+ r = touch(t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create temporary file \"%s\": %m", t);
+
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to create temporary file for \"%s\": %m", new_path);
+
+ *ret_tmp_fn = TAKE_PTR(t);
+
+ return 0;
+}
+
+static int get_file_to_edit(
+ const LookupPaths *paths,
+ const char *name,
+ char **ret_path) {
+
+ _cleanup_free_ char *path = NULL, *run = NULL;
+
+ assert(name);
+ assert(ret_path);
+
+ path = strjoin(paths->persistent_config, "/", name);
+ if (!path)
+ return log_oom();
+
+ if (arg_runtime) {
+ run = strjoin(paths->runtime_config, "/", name);
+ if (!run)
+ return log_oom();
+ }
+
+ if (arg_runtime) {
+ if (access(path, F_OK) >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Refusing to create \"%s\" because it would be overridden by \"%s\" anyway.",
+ run, path);
+
+ *ret_path = TAKE_PTR(run);
+ } else
+ *ret_path = TAKE_PTR(path);
+
+ return 0;
+}
+
+static int unit_file_create_new(
+ const LookupPaths *paths,
+ const char *unit_name,
+ const char *suffix,
+ char **ret_new_path,
+ char **ret_tmp_path) {
+
+ _cleanup_free_ char *new_path = NULL, *tmp_path = NULL;
+ const char *ending;
+ int r;
+
+ assert(unit_name);
+ assert(ret_new_path);
+ assert(ret_tmp_path);
+
+ ending = strjoina(unit_name, suffix);
+ r = get_file_to_edit(paths, ending, &new_path);
+ if (r < 0)
+ return r;
+
+ r = create_edit_temp_file(new_path, new_path, &tmp_path);
+ if (r < 0)
+ return r;
+
+ *ret_new_path = TAKE_PTR(new_path);
+ *ret_tmp_path = TAKE_PTR(tmp_path);
+
+ return 0;
+}
+
+static int unit_file_create_copy(
+ const LookupPaths *paths,
+ const char *unit_name,
+ const char *fragment_path,
+ char **ret_new_path,
+ char **ret_tmp_path) {
+
+ _cleanup_free_ char *new_path = NULL, *tmp_path = NULL;
+ int r;
+
+ assert(fragment_path);
+ assert(unit_name);
+ assert(ret_new_path);
+ assert(ret_tmp_path);
+
+ r = get_file_to_edit(paths, unit_name, &new_path);
+ if (r < 0)
+ return r;
+
+ if (!path_equal(fragment_path, new_path) && access(new_path, F_OK) >= 0) {
+ char response;
+
+ r = ask_char(&response, "yn", "\"%s\" already exists. Overwrite with \"%s\"? [(y)es, (n)o] ", new_path, fragment_path);
+ if (r < 0)
+ return r;
+ if (response != 'y')
+ return log_warning_errno(SYNTHETIC_ERRNO(EKEYREJECTED), "%s skipped.", unit_name);
+ }
+
+ r = create_edit_temp_file(new_path, fragment_path, &tmp_path);
+ if (r < 0)
+ return r;
+
+ *ret_new_path = TAKE_PTR(new_path);
+ *ret_tmp_path = TAKE_PTR(tmp_path);
+
+ return 0;
+}
+
+static int run_editor(char **paths) {
+ int r;
+
+ assert(paths);
+
+ r = safe_fork("(editor)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG|FORK_WAIT, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ char **editor_args = NULL, **tmp_path, **original_path;
+ size_t n_editor_args = 0, i = 1, argc;
+ const char **args, *editor, *p;
+
+ argc = strv_length(paths)/2 + 1;
+
+ /* SYSTEMD_EDITOR takes precedence over EDITOR which takes precedence over VISUAL
+ * If neither SYSTEMD_EDITOR nor EDITOR nor VISUAL are present,
+ * we try to execute well known editors
+ */
+ editor = getenv("SYSTEMD_EDITOR");
+ if (!editor)
+ editor = getenv("EDITOR");
+ if (!editor)
+ editor = getenv("VISUAL");
+
+ if (!isempty(editor)) {
+ editor_args = strv_split(editor, WHITESPACE);
+ if (!editor_args) {
+ (void) log_oom();
+ _exit(EXIT_FAILURE);
+ }
+ n_editor_args = strv_length(editor_args);
+ argc += n_editor_args - 1;
+ }
+
+ args = newa(const char*, argc + 1);
+
+ if (n_editor_args > 0) {
+ args[0] = editor_args[0];
+ for (; i < n_editor_args; i++)
+ args[i] = editor_args[i];
+ }
+
+ STRV_FOREACH_PAIR(original_path, tmp_path, paths)
+ args[i++] = *tmp_path;
+ args[i] = NULL;
+
+ if (n_editor_args > 0)
+ execvp(args[0], (char* const*) args);
+
+ FOREACH_STRING(p, "editor", "nano", "vim", "vi") {
+ args[0] = p;
+ execvp(p, (char* const*) args);
+ /* We do not fail if the editor doesn't exist
+ * because we want to try each one of them before
+ * failing.
+ */
+ if (errno != ENOENT) {
+ log_error_errno(errno, "Failed to execute %s: %m", editor);
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ log_error("Cannot edit unit(s), no editor available. Please set either $SYSTEMD_EDITOR, $EDITOR or $VISUAL.");
+ _exit(EXIT_FAILURE);
+ }
+
+ return 0;
+}
+
+static int find_paths_to_edit(sd_bus *bus, char **names, char ***paths) {
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+ char **name;
+ int r;
+
+ assert(names);
+ assert(paths);
+
+ r = lookup_paths_init(&lp, arg_scope, 0, arg_root);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(name, names) {
+ _cleanup_free_ char *path = NULL, *new_path = NULL, *tmp_path = NULL, *tmp_name = NULL;
+ const char *unit_name;
+
+ r = unit_find_paths(bus, *name, &lp, false, &path, NULL);
+ if (r == -EKEYREJECTED) {
+ /* If loading of the unit failed server side complete, then the server won't tell us the unit
+ * file path. In that case, find the file client side. */
+ log_debug_errno(r, "Unit '%s' was not loaded correctly, retrying client-side.", *name);
+ r = unit_find_paths(bus, *name, &lp, true, &path, NULL);
+ }
+ if (r == -ERFKILL)
+ return log_error_errno(r, "Unit '%s' masked, cannot edit.", *name);
+ if (r < 0)
+ return r;
+
+ if (r == 0) {
+ assert(!path);
+
+ if (!arg_force) {
+ log_info("Run 'systemctl edit%s --force --full %s' to create a new unit.",
+ arg_scope == UNIT_FILE_GLOBAL ? " --global" :
+ arg_scope == UNIT_FILE_USER ? " --user" : "",
+ *name);
+ return -ENOENT;
+ }
+
+ /* Create a new unit from scratch */
+ unit_name = *name;
+ r = unit_file_create_new(&lp, unit_name,
+ arg_full ? NULL : ".d/override.conf",
+ &new_path, &tmp_path);
+ } else {
+ assert(path);
+
+ unit_name = basename(path);
+ /* We follow unit aliases, but we need to propagate the instance */
+ if (unit_name_is_valid(*name, UNIT_NAME_INSTANCE) &&
+ unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_free_ char *instance = NULL;
+
+ r = unit_name_to_instance(*name, &instance);
+ if (r < 0)
+ return r;
+
+ r = unit_name_replace_instance(unit_name, instance, &tmp_name);
+ if (r < 0)
+ return r;
+
+ unit_name = tmp_name;
+ }
+
+ if (arg_full)
+ r = unit_file_create_copy(&lp, unit_name, path, &new_path, &tmp_path);
+ else
+ r = unit_file_create_new(&lp, unit_name, ".d/override.conf", &new_path, &tmp_path);
+ }
+ if (r < 0)
+ return r;
+
+ r = strv_push_pair(paths, new_path, tmp_path);
+ if (r < 0)
+ return log_oom();
+
+ new_path = tmp_path = NULL;
+ }
+
+ return 0;
+}
+
+static int edit(int argc, char *argv[], void *userdata) {
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+ _cleanup_strv_free_ char **names = NULL;
+ _cleanup_strv_free_ char **paths = NULL;
+ char **original, **tmp;
+ sd_bus *bus;
+ int r;
+
+ if (!on_tty()) {
+ log_error("Cannot edit units if not on a tty.");
+ return -EINVAL;
+ }
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL) {
+ log_error("Cannot edit units remotely.");
+ return -EINVAL;
+ }
+
+ r = lookup_paths_init(&lp, arg_scope, 0, arg_root);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine unit paths: %m");
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = expand_names(bus, strv_skip(argv, 1), NULL, &names);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ STRV_FOREACH(tmp, names) {
+ r = unit_is_masked(bus, &lp, *tmp);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ log_error("Cannot edit %s: unit is masked.", *tmp);
+ return -EINVAL;
+ }
+ }
+
+ r = find_paths_to_edit(bus, names, &paths);
+ if (r < 0)
+ return r;
+
+ if (strv_isempty(paths))
+ return -ENOENT;
+
+ r = run_editor(paths);
+ if (r < 0)
+ goto end;
+
+ STRV_FOREACH_PAIR(original, tmp, paths) {
+ /* If the temporary file is empty we ignore it.
+ * This allows the user to cancel the modification.
+ */
+ if (null_or_empty_path(*tmp)) {
+ log_warning("Editing \"%s\" canceled: temporary file is empty.", *original);
+ continue;
+ }
+
+ r = rename(*tmp, *original);
+ if (r < 0) {
+ r = log_error_errno(errno, "Failed to rename \"%s\" to \"%s\": %m", *tmp, *original);
+ goto end;
+ }
+ }
+
+ r = 0;
+
+ if (!arg_no_reload && !install_client_side())
+ r = daemon_reload(argc, argv, userdata);
+
+end:
+ STRV_FOREACH_PAIR(original, tmp, paths) {
+ (void) unlink(*tmp);
+
+ /* Removing empty dropin dirs */
+ if (!arg_full) {
+ _cleanup_free_ char *dir;
+
+ dir = dirname_malloc(*original);
+ if (!dir)
+ return log_oom();
+
+ /* no need to check if the dir is empty, rmdir
+ * does nothing if it is not the case.
+ */
+ (void) rmdir(dir);
+ }
+ }
+
+ return r;
+}
+
+static int systemctl_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("systemctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Query or send control commands to the systemd manager.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --system Connect to system manager\n"
+ " --user Connect to user service manager\n"
+ " -H --host=[USER@]HOST\n"
+ " Operate on remote host\n"
+ " -M --machine=CONTAINER\n"
+ " Operate on local container\n"
+ " -t --type=TYPE List units of a particular type\n"
+ " --state=STATE List units with particular LOAD or SUB or ACTIVE state\n"
+ " -p --property=NAME Show only properties by this name\n"
+ " -a --all Show all properties/all units currently in memory,\n"
+ " including dead/empty ones. To list all units installed on\n"
+ " the system, use the 'list-unit-files' command instead.\n"
+ " --failed Same as --state=failed\n"
+ " -l --full Don't ellipsize unit names on output\n"
+ " -r --recursive Show unit list of host and local containers\n"
+ " --reverse Show reverse dependencies with 'list-dependencies'\n"
+ " --job-mode=MODE Specify how to deal with already queued jobs, when\n"
+ " queueing a new job\n"
+ " --show-types When showing sockets, explicitly show their type\n"
+ " --value When showing properties, only print the value\n"
+ " -i --ignore-inhibitors\n"
+ " When shutting down or sleeping, ignore inhibitors\n"
+ " --kill-who=WHO Who to send signal to\n"
+ " -s --signal=SIGNAL Which signal to send\n"
+ " --now Start or stop unit in addition to enabling or disabling it\n"
+ " --dry-run Only print what would be done\n"
+ " -q --quiet Suppress output\n"
+ " --wait For (re)start, wait until service stopped again\n"
+ " For is-system-running, wait until startup is completed\n"
+ " --no-block Do not wait until operation finished\n"
+ " --no-wall Don't send wall message before halt/power-off/reboot\n"
+ " --no-reload Don't reload daemon after en-/dis-abling unit files\n"
+ " --no-legend Do not print a legend (column headers and hints)\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-ask-password\n"
+ " Do not ask for system passwords\n"
+ " --global Enable/disable/mask unit files globally\n"
+ " --runtime Enable/disable/mask unit files temporarily until next\n"
+ " reboot\n"
+ " -f --force When enabling unit files, override existing symlinks\n"
+ " When shutting down, execute action immediately\n"
+ " --preset-mode= Apply only enable, only disable, or all presets\n"
+ " --root=PATH Enable/disable/mask unit files in the specified root\n"
+ " directory\n"
+ " -n --lines=INTEGER Number of journal entries to show\n"
+ " -o --output=STRING Change journal output mode (short, short-precise,\n"
+ " short-iso, short-iso-precise, short-full,\n"
+ " short-monotonic, short-unix,\n"
+ " verbose, export, json, json-pretty, json-sse, cat)\n"
+ " --firmware-setup Tell the firmware to show the setup menu on next boot\n"
+ " --plain Print unit dependencies as a list instead of a tree\n\n"
+ "Unit Commands:\n"
+ " list-units [PATTERN...] List units currently in memory\n"
+ " list-sockets [PATTERN...] List socket units currently in memory,\n"
+ " ordered by address\n"
+ " list-timers [PATTERN...] List timer units currently in memory,\n"
+ " ordered by next elapse\n"
+ " start UNIT... Start (activate) one or more units\n"
+ " stop UNIT... Stop (deactivate) one or more units\n"
+ " reload UNIT... Reload one or more units\n"
+ " restart UNIT... Start or restart one or more units\n"
+ " try-restart UNIT... Restart one or more units if active\n"
+ " reload-or-restart UNIT... Reload one or more units if possible,\n"
+ " otherwise start or restart\n"
+ " try-reload-or-restart UNIT... If active, reload one or more units,\n"
+ " if supported, otherwise restart\n"
+ " isolate UNIT Start one unit and stop all others\n"
+ " kill UNIT... Send signal to processes of a unit\n"
+ " is-active PATTERN... Check whether units are active\n"
+ " is-failed PATTERN... Check whether units are failed\n"
+ " status [PATTERN...|PID...] Show runtime status of one or more units\n"
+ " show [PATTERN...|JOB...] Show properties of one or more\n"
+ " units/jobs or the manager\n"
+ " cat PATTERN... Show files and drop-ins of specified units\n"
+ " set-property UNIT PROPERTY=VALUE... Sets one or more properties of a unit\n"
+ " help PATTERN...|PID... Show manual for one or more units\n"
+ " reset-failed [PATTERN...] Reset failed state for all, one, or more\n"
+ " units\n"
+ " list-dependencies [UNIT] Recursively show units which are required\n"
+ " or wanted by this unit or by which this\n"
+ " unit is required or wanted\n\n"
+ "Unit File Commands:\n"
+ " list-unit-files [PATTERN...] List installed unit files\n"
+ " enable [UNIT...|PATH...] Enable one or more unit files\n"
+ " disable UNIT... Disable one or more unit files\n"
+ " reenable UNIT... Reenable one or more unit files\n"
+ " preset UNIT... Enable/disable one or more unit files\n"
+ " based on preset configuration\n"
+ " preset-all Enable/disable all unit files based on\n"
+ " preset configuration\n"
+ " is-enabled UNIT... Check whether unit files are enabled\n"
+ " mask UNIT... Mask one or more units\n"
+ " unmask UNIT... Unmask one or more units\n"
+ " link PATH... Link one or more units files into\n"
+ " the search path\n"
+ " revert UNIT... Revert one or more unit files to vendor\n"
+ " version\n"
+ " add-wants TARGET UNIT... Add 'Wants' dependency for the target\n"
+ " on specified one or more units\n"
+ " add-requires TARGET UNIT... Add 'Requires' dependency for the target\n"
+ " on specified one or more units\n"
+ " edit UNIT... Edit one or more unit files\n"
+ " get-default Get the name of the default target\n"
+ " set-default TARGET Set the default target\n\n"
+ "Machine Commands:\n"
+ " list-machines [PATTERN...] List local containers and host\n\n"
+ "Job Commands:\n"
+ " list-jobs [PATTERN...] List jobs\n"
+ " cancel [JOB...] Cancel all, one, or more jobs\n\n"
+ "Environment Commands:\n"
+ " show-environment Dump environment\n"
+ " set-environment VARIABLE=VALUE... Set one or more environment variables\n"
+ " unset-environment VARIABLE... Unset one or more environment variables\n"
+ " import-environment [VARIABLE...] Import all or some environment variables\n\n"
+ "Manager Lifecycle Commands:\n"
+ " daemon-reload Reload systemd manager configuration\n"
+ " daemon-reexec Reexecute systemd manager\n\n"
+ "System Commands:\n"
+ " is-system-running Check whether system is fully running\n"
+ " default Enter system default mode\n"
+ " rescue Enter system rescue mode\n"
+ " emergency Enter system emergency mode\n"
+ " halt Shut down and halt the system\n"
+ " poweroff Shut down and power-off the system\n"
+ " reboot [ARG] Shut down and reboot the system\n"
+ " kexec Shut down and reboot the system with kexec\n"
+ " exit [EXIT_CODE] Request user instance or container exit\n"
+ " switch-root ROOT [INIT] Change to a different root file system\n"
+ " suspend Suspend the system\n"
+ " hibernate Hibernate the system\n"
+ " hybrid-sleep Hibernate and suspend the system\n"
+ " suspend-then-hibernate Suspend the system, wake after a period of\n"
+ " time and put it into hibernate\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int halt_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("halt", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]%s\n\n"
+ "%s the system.\n\n"
+ " --help Show this help\n"
+ " --halt Halt the machine\n"
+ " -p --poweroff Switch off the machine\n"
+ " --reboot Reboot the machine\n"
+ " -f --force Force immediate halt/power-off/reboot\n"
+ " -w --wtmp-only Don't halt/power-off/reboot, just write wtmp record\n"
+ " -d --no-wtmp Don't write wtmp record\n"
+ " --no-wall Don't send wall message before halt/power-off/reboot\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , arg_action == ACTION_REBOOT ? " [ARG]" : "",
+ arg_action == ACTION_REBOOT ? "Reboot" :
+ arg_action == ACTION_POWEROFF ? "Power off" :
+ "Halt"
+ , link
+ );
+
+ return 0;
+}
+
+static int shutdown_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("shutdown", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [TIME] [WALL...]\n\n"
+ "Shut down the system.\n\n"
+ " --help Show this help\n"
+ " -H --halt Halt the machine\n"
+ " -P --poweroff Power-off the machine\n"
+ " -r --reboot Reboot the machine\n"
+ " -h Equivalent to --poweroff, overridden by --halt\n"
+ " -k Don't halt/power-off/reboot, just send warnings\n"
+ " --no-wall Don't send wall message before halt/power-off/reboot\n"
+ " -c Cancel a pending shutdown\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int telinit_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("telinit", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {COMMAND}\n\n"
+ "Send control commands to the init daemon.\n\n"
+ " --help Show this help\n"
+ " --no-wall Don't send wall message before halt/power-off/reboot\n\n"
+ "Commands:\n"
+ " 0 Power-off the machine\n"
+ " 6 Reboot the machine\n"
+ " 2, 3, 4, 5 Start runlevelX.target unit\n"
+ " 1, s, S Enter rescue mode\n"
+ " q, Q Reload init daemon configuration\n"
+ " u, U Reexecute init daemon\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int runlevel_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("runlevel", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Prints the previous and current runlevel of the init system.\n\n"
+ " --help Show this help\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static void help_types(void) {
+ if (!arg_no_legend)
+ puts("Available unit types:");
+
+ DUMP_STRING_TABLE(unit_type, UnitType, _UNIT_TYPE_MAX);
+}
+
+static void help_states(void) {
+ if (!arg_no_legend)
+ puts("Available unit load states:");
+ DUMP_STRING_TABLE(unit_load_state, UnitLoadState, _UNIT_LOAD_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable unit active states:");
+ DUMP_STRING_TABLE(unit_active_state, UnitActiveState, _UNIT_ACTIVE_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable automount unit substates:");
+ DUMP_STRING_TABLE(automount_state, AutomountState, _AUTOMOUNT_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable device unit substates:");
+ DUMP_STRING_TABLE(device_state, DeviceState, _DEVICE_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable mount unit substates:");
+ DUMP_STRING_TABLE(mount_state, MountState, _MOUNT_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable path unit substates:");
+ DUMP_STRING_TABLE(path_state, PathState, _PATH_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable scope unit substates:");
+ DUMP_STRING_TABLE(scope_state, ScopeState, _SCOPE_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable service unit substates:");
+ DUMP_STRING_TABLE(service_state, ServiceState, _SERVICE_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable slice unit substates:");
+ DUMP_STRING_TABLE(slice_state, SliceState, _SLICE_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable socket unit substates:");
+ DUMP_STRING_TABLE(socket_state, SocketState, _SOCKET_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable swap unit substates:");
+ DUMP_STRING_TABLE(swap_state, SwapState, _SWAP_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable target unit substates:");
+ DUMP_STRING_TABLE(target_state, TargetState, _TARGET_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable timer unit substates:");
+ DUMP_STRING_TABLE(timer_state, TimerState, _TIMER_STATE_MAX);
+}
+
+static int systemctl_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_FAIL = 0x100,
+ ARG_REVERSE,
+ ARG_AFTER,
+ ARG_BEFORE,
+ ARG_DRY_RUN,
+ ARG_SHOW_TYPES,
+ ARG_IRREVERSIBLE,
+ ARG_IGNORE_DEPENDENCIES,
+ ARG_VALUE,
+ ARG_VERSION,
+ ARG_USER,
+ ARG_SYSTEM,
+ ARG_GLOBAL,
+ ARG_NO_BLOCK,
+ ARG_NO_LEGEND,
+ ARG_NO_PAGER,
+ ARG_NO_WALL,
+ ARG_ROOT,
+ ARG_NO_RELOAD,
+ ARG_KILL_WHO,
+ ARG_NO_ASK_PASSWORD,
+ ARG_FAILED,
+ ARG_RUNTIME,
+ ARG_PLAIN,
+ ARG_STATE,
+ ARG_JOB_MODE,
+ ARG_PRESET_MODE,
+ ARG_FIRMWARE_SETUP,
+ ARG_NOW,
+ ARG_MESSAGE,
+ ARG_WAIT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "type", required_argument, NULL, 't' },
+ { "property", required_argument, NULL, 'p' },
+ { "all", no_argument, NULL, 'a' },
+ { "reverse", no_argument, NULL, ARG_REVERSE },
+ { "after", no_argument, NULL, ARG_AFTER },
+ { "before", no_argument, NULL, ARG_BEFORE },
+ { "show-types", no_argument, NULL, ARG_SHOW_TYPES },
+ { "failed", no_argument, NULL, ARG_FAILED }, /* compatibility only */
+ { "full", no_argument, NULL, 'l' },
+ { "job-mode", required_argument, NULL, ARG_JOB_MODE },
+ { "fail", no_argument, NULL, ARG_FAIL }, /* compatibility only */
+ { "irreversible", no_argument, NULL, ARG_IRREVERSIBLE }, /* compatibility only */
+ { "ignore-dependencies", no_argument, NULL, ARG_IGNORE_DEPENDENCIES }, /* compatibility only */
+ { "ignore-inhibitors", no_argument, NULL, 'i' },
+ { "value", no_argument, NULL, ARG_VALUE },
+ { "user", no_argument, NULL, ARG_USER },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "global", no_argument, NULL, ARG_GLOBAL },
+ { "wait", no_argument, NULL, ARG_WAIT },
+ { "no-block", no_argument, NULL, ARG_NO_BLOCK },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-wall", no_argument, NULL, ARG_NO_WALL },
+ { "dry-run", no_argument, NULL, ARG_DRY_RUN },
+ { "quiet", no_argument, NULL, 'q' },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "force", no_argument, NULL, 'f' },
+ { "no-reload", no_argument, NULL, ARG_NO_RELOAD },
+ { "kill-who", required_argument, NULL, ARG_KILL_WHO },
+ { "signal", required_argument, NULL, 's' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "runtime", no_argument, NULL, ARG_RUNTIME },
+ { "lines", required_argument, NULL, 'n' },
+ { "output", required_argument, NULL, 'o' },
+ { "plain", no_argument, NULL, ARG_PLAIN },
+ { "state", required_argument, NULL, ARG_STATE },
+ { "recursive", no_argument, NULL, 'r' },
+ { "preset-mode", required_argument, NULL, ARG_PRESET_MODE },
+ { "firmware-setup", no_argument, NULL, ARG_FIRMWARE_SETUP },
+ { "now", no_argument, NULL, ARG_NOW },
+ { "message", required_argument, NULL, ARG_MESSAGE },
+ {}
+ };
+
+ const char *p;
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ /* we default to allowing interactive authorization only in systemctl (not in the legacy commands) */
+ arg_ask_password = true;
+
+ while ((c = getopt_long(argc, argv, "ht:p:alqfs:H:M:n:o:ir.::", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return systemctl_help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 't': {
+ if (isempty(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--type= requires arguments.");
+
+ for (p = optarg;;) {
+ _cleanup_free_ char *type = NULL;
+
+ r = extract_first_word(&p, &type, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse type: %s", optarg);
+ if (r == 0)
+ break;
+
+ if (streq(type, "help")) {
+ help_types();
+ return 0;
+ }
+
+ if (unit_type_from_string(type) >= 0) {
+ if (strv_push(&arg_types, type) < 0)
+ return log_oom();
+ type = NULL;
+ continue;
+ }
+
+ /* It's much nicer to use --state= for
+ * load states, but let's support this
+ * in --types= too for compatibility
+ * with old versions */
+ if (unit_load_state_from_string(type) >= 0) {
+ if (strv_push(&arg_states, type) < 0)
+ return log_oom();
+ type = NULL;
+ continue;
+ }
+
+ log_error("Unknown unit type or load state '%s'.", type);
+ return log_info_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Use -t help to see a list of allowed values.");
+ }
+
+ break;
+ }
+
+ case 'p': {
+ /* Make sure that if the empty property list
+ was specified, we won't show any properties. */
+ if (isempty(optarg) && !arg_properties) {
+ arg_properties = new0(char*, 1);
+ if (!arg_properties)
+ return log_oom();
+ } else
+ for (p = optarg;;) {
+ _cleanup_free_ char *prop = NULL;
+
+ r = extract_first_word(&p, &prop, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse property: %s", optarg);
+ if (r == 0)
+ break;
+
+ if (strv_push(&arg_properties, prop) < 0)
+ return log_oom();
+
+ prop = NULL;
+ }
+
+ /* If the user asked for a particular
+ * property, show it to him, even if it is
+ * empty. */
+ arg_all = true;
+
+ break;
+ }
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case ARG_REVERSE:
+ arg_dependency = DEPENDENCY_REVERSE;
+ break;
+
+ case ARG_AFTER:
+ arg_dependency = DEPENDENCY_AFTER;
+ arg_jobs_after = true;
+ break;
+
+ case ARG_BEFORE:
+ arg_dependency = DEPENDENCY_BEFORE;
+ arg_jobs_before = true;
+ break;
+
+ case ARG_SHOW_TYPES:
+ arg_show_types = true;
+ break;
+
+ case ARG_VALUE:
+ arg_value = true;
+ break;
+
+ case ARG_JOB_MODE:
+ arg_job_mode = optarg;
+ break;
+
+ case ARG_FAIL:
+ arg_job_mode = "fail";
+ break;
+
+ case ARG_IRREVERSIBLE:
+ arg_job_mode = "replace-irreversibly";
+ break;
+
+ case ARG_IGNORE_DEPENDENCIES:
+ arg_job_mode = "ignore-dependencies";
+ break;
+
+ case ARG_USER:
+ arg_scope = UNIT_FILE_USER;
+ break;
+
+ case ARG_SYSTEM:
+ arg_scope = UNIT_FILE_SYSTEM;
+ break;
+
+ case ARG_GLOBAL:
+ arg_scope = UNIT_FILE_GLOBAL;
+ break;
+
+ case ARG_WAIT:
+ arg_wait = true;
+ break;
+
+ case ARG_NO_BLOCK:
+ arg_no_block = true;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_no_legend = true;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_WALL:
+ arg_no_wall = true;
+ break;
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, false, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case ARG_FAILED:
+ if (strv_extend(&arg_states, "failed") < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_DRY_RUN:
+ arg_dry_run = true;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case 'f':
+ arg_force++;
+ break;
+
+ case ARG_NO_RELOAD:
+ arg_no_reload = true;
+ break;
+
+ case ARG_KILL_WHO:
+ arg_kill_who = optarg;
+ break;
+
+ case 's':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(signal, int, _NSIG);
+ return 0;
+ }
+
+ arg_signal = signal_from_string(optarg);
+ if (arg_signal < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse signal string %s.",
+ optarg);
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_RUNTIME:
+ arg_runtime = true;
+ break;
+
+ case 'n':
+ if (safe_atou(optarg, &arg_lines) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse lines '%s'",
+ optarg);
+ break;
+
+ case 'o':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(output_mode, OutputMode, _OUTPUT_MODE_MAX);
+ return 0;
+ }
+
+ arg_output = output_mode_from_string(optarg);
+ if (arg_output < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown output '%s'.",
+ optarg);
+ break;
+
+ case 'i':
+ arg_ignore_inhibitors = true;
+ break;
+
+ case ARG_PLAIN:
+ arg_plain = true;
+ break;
+
+ case ARG_FIRMWARE_SETUP:
+ arg_firmware_setup = true;
+ break;
+
+ case ARG_STATE: {
+ if (isempty(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--state= requires arguments.");
+
+ for (p = optarg;;) {
+ _cleanup_free_ char *s = NULL;
+
+ r = extract_first_word(&p, &s, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse state: %s", optarg);
+ if (r == 0)
+ break;
+
+ if (streq(s, "help")) {
+ help_states();
+ return 0;
+ }
+
+ if (strv_push(&arg_states, s) < 0)
+ return log_oom();
+
+ s = NULL;
+ }
+ break;
+ }
+
+ case 'r':
+ if (geteuid() != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "--recursive requires root privileges.");
+
+ arg_recursive = true;
+ break;
+
+ case ARG_PRESET_MODE:
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(unit_file_preset_mode, UnitFilePresetMode, _UNIT_FILE_PRESET_MAX);
+ return 0;
+ }
+
+ arg_preset_mode = unit_file_preset_mode_from_string(optarg);
+ if (arg_preset_mode < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse preset mode: %s.", optarg);
+
+ break;
+
+ case ARG_NOW:
+ arg_now = true;
+ break;
+
+ case ARG_MESSAGE:
+ if (strv_extend(&arg_wall, optarg) < 0)
+ return log_oom();
+ break;
+
+ case '.':
+ /* Output an error mimicking getopt, and print a hint afterwards */
+ log_error("%s: invalid option -- '.'", program_invocation_name);
+ log_notice("Hint: to specify units starting with a dash, use \"--\":\n"
+ " %s [OPTIONS...] {COMMAND} -- -.%s ...",
+ program_invocation_name, optarg ?: "mount");
+ _fallthrough_;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL && arg_scope != UNIT_FILE_SYSTEM)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot access user instance remotely.");
+
+ if (arg_wait && arg_no_block)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--wait may not be combined with --no-block.");
+
+ return 1;
+}
+
+static int halt_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_HELP = 0x100,
+ ARG_HALT,
+ ARG_REBOOT,
+ ARG_NO_WALL
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, ARG_HELP },
+ { "halt", no_argument, NULL, ARG_HALT },
+ { "poweroff", no_argument, NULL, 'p' },
+ { "reboot", no_argument, NULL, ARG_REBOOT },
+ { "force", no_argument, NULL, 'f' },
+ { "wtmp-only", no_argument, NULL, 'w' },
+ { "no-wtmp", no_argument, NULL, 'd' },
+ { "no-sync", no_argument, NULL, 'n' },
+ { "no-wall", no_argument, NULL, ARG_NO_WALL },
+ {}
+ };
+
+ int c, r, runlevel;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ if (utmp_get_runlevel(&runlevel, NULL) >= 0)
+ if (IN_SET(runlevel, '0', '6'))
+ arg_force = 2;
+
+ while ((c = getopt_long(argc, argv, "pfwdnih", options, NULL)) >= 0)
+ switch (c) {
+
+ case ARG_HELP:
+ return halt_help();
+
+ case ARG_HALT:
+ arg_action = ACTION_HALT;
+ break;
+
+ case 'p':
+ if (arg_action != ACTION_REBOOT)
+ arg_action = ACTION_POWEROFF;
+ break;
+
+ case ARG_REBOOT:
+ arg_action = ACTION_REBOOT;
+ break;
+
+ case 'f':
+ arg_force = 2;
+ break;
+
+ case 'w':
+ arg_dry_run = true;
+ break;
+
+ case 'd':
+ arg_no_wtmp = true;
+ break;
+
+ case 'n':
+ arg_no_sync = true;
+ break;
+
+ case ARG_NO_WALL:
+ arg_no_wall = true;
+ break;
+
+ case 'i':
+ case 'h':
+ /* Compatibility nops */
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_action == ACTION_REBOOT && (argc == optind || argc == optind + 1)) {
+ r = update_reboot_parameter_and_warn(argc == optind + 1 ? argv[optind] : NULL);
+ if (r < 0)
+ return r;
+ } else if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ return 1;
+}
+
+static int parse_shutdown_time_spec(const char *t, usec_t *_u) {
+ assert(t);
+ assert(_u);
+
+ if (streq(t, "now"))
+ *_u = 0;
+ else if (!strchr(t, ':')) {
+ uint64_t u;
+
+ if (safe_atou64(t, &u) < 0)
+ return -EINVAL;
+
+ *_u = now(CLOCK_REALTIME) + USEC_PER_MINUTE * u;
+ } else {
+ char *e = NULL;
+ long hour, minute;
+ struct tm tm = {};
+ time_t s;
+ usec_t n;
+
+ errno = 0;
+ hour = strtol(t, &e, 10);
+ if (errno > 0 || *e != ':' || hour < 0 || hour > 23)
+ return -EINVAL;
+
+ minute = strtol(e+1, &e, 10);
+ if (errno > 0 || *e != 0 || minute < 0 || minute > 59)
+ return -EINVAL;
+
+ n = now(CLOCK_REALTIME);
+ s = (time_t) (n / USEC_PER_SEC);
+
+ assert_se(localtime_r(&s, &tm));
+
+ tm.tm_hour = (int) hour;
+ tm.tm_min = (int) minute;
+ tm.tm_sec = 0;
+
+ s = mktime(&tm);
+ assert(s >= 0);
+
+ *_u = (usec_t) s * USEC_PER_SEC;
+
+ while (*_u <= n)
+ *_u += USEC_PER_DAY;
+ }
+
+ return 0;
+}
+
+static int shutdown_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_HELP = 0x100,
+ ARG_NO_WALL
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, ARG_HELP },
+ { "halt", no_argument, NULL, 'H' },
+ { "poweroff", no_argument, NULL, 'P' },
+ { "reboot", no_argument, NULL, 'r' },
+ { "kexec", no_argument, NULL, 'K' }, /* not documented extension */
+ { "no-wall", no_argument, NULL, ARG_NO_WALL },
+ {}
+ };
+
+ char **wall = NULL;
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "HPrhkKat:fFc", options, NULL)) >= 0)
+ switch (c) {
+
+ case ARG_HELP:
+ return shutdown_help();
+
+ case 'H':
+ arg_action = ACTION_HALT;
+ break;
+
+ case 'P':
+ arg_action = ACTION_POWEROFF;
+ break;
+
+ case 'r':
+ if (kexec_loaded())
+ arg_action = ACTION_KEXEC;
+ else
+ arg_action = ACTION_REBOOT;
+ break;
+
+ case 'K':
+ arg_action = ACTION_KEXEC;
+ break;
+
+ case 'h':
+ if (arg_action != ACTION_HALT)
+ arg_action = ACTION_POWEROFF;
+ break;
+
+ case 'k':
+ arg_dry_run = true;
+ break;
+
+ case ARG_NO_WALL:
+ arg_no_wall = true;
+ break;
+
+ case 'a':
+ case 't': /* Note that we also ignore any passed argument to -t, not just the -t itself */
+ case 'f':
+ case 'F':
+ /* Compatibility nops */
+ break;
+
+ case 'c':
+ arg_action = ACTION_CANCEL_SHUTDOWN;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (argc > optind && arg_action != ACTION_CANCEL_SHUTDOWN) {
+ r = parse_shutdown_time_spec(argv[optind], &arg_when);
+ if (r < 0) {
+ log_error("Failed to parse time specification: %s", argv[optind]);
+ return r;
+ }
+ } else
+ arg_when = now(CLOCK_REALTIME) + USEC_PER_MINUTE;
+
+ if (argc > optind && arg_action == ACTION_CANCEL_SHUTDOWN)
+ /* No time argument for shutdown cancel */
+ wall = argv + optind;
+ else if (argc > optind + 1)
+ /* We skip the time argument */
+ wall = argv + optind + 1;
+
+ if (wall) {
+ arg_wall = strv_copy(wall);
+ if (!arg_wall)
+ return log_oom();
+ }
+
+ optind = argc;
+
+ return 1;
+}
+
+static int telinit_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_HELP = 0x100,
+ ARG_NO_WALL
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, ARG_HELP },
+ { "no-wall", no_argument, NULL, ARG_NO_WALL },
+ {}
+ };
+
+ static const struct {
+ char from;
+ enum action to;
+ } table[] = {
+ { '0', ACTION_POWEROFF },
+ { '6', ACTION_REBOOT },
+ { '1', ACTION_RESCUE },
+ { '2', ACTION_RUNLEVEL2 },
+ { '3', ACTION_RUNLEVEL3 },
+ { '4', ACTION_RUNLEVEL4 },
+ { '5', ACTION_RUNLEVEL5 },
+ { 's', ACTION_RESCUE },
+ { 'S', ACTION_RESCUE },
+ { 'q', ACTION_RELOAD },
+ { 'Q', ACTION_RELOAD },
+ { 'u', ACTION_REEXEC },
+ { 'U', ACTION_REEXEC }
+ };
+
+ unsigned i;
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "", options, NULL)) >= 0)
+ switch (c) {
+
+ case ARG_HELP:
+ return telinit_help();
+
+ case ARG_NO_WALL:
+ arg_no_wall = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: required argument missing.",
+ program_invocation_short_name);
+
+ if (optind + 1 < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ if (strlen(argv[optind]) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected single character argument.");
+
+ for (i = 0; i < ELEMENTSOF(table); i++)
+ if (table[i].from == argv[optind][0])
+ break;
+
+ if (i >= ELEMENTSOF(table))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown command '%s'.", argv[optind]);
+
+ arg_action = table[i].to;
+
+ optind++;
+
+ return 1;
+}
+
+static int runlevel_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_HELP = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, ARG_HELP },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "", options, NULL)) >= 0)
+ switch (c) {
+
+ case ARG_HELP:
+ return runlevel_help();
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ return 1;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ assert(argc >= 0);
+ assert(argv);
+
+ if (program_invocation_short_name) {
+
+ if (strstr(program_invocation_short_name, "halt")) {
+ arg_action = ACTION_HALT;
+ return halt_parse_argv(argc, argv);
+
+ } else if (strstr(program_invocation_short_name, "poweroff")) {
+ arg_action = ACTION_POWEROFF;
+ return halt_parse_argv(argc, argv);
+
+ } else if (strstr(program_invocation_short_name, "reboot")) {
+ if (kexec_loaded())
+ arg_action = ACTION_KEXEC;
+ else
+ arg_action = ACTION_REBOOT;
+ return halt_parse_argv(argc, argv);
+
+ } else if (strstr(program_invocation_short_name, "shutdown")) {
+ arg_action = ACTION_POWEROFF;
+ return shutdown_parse_argv(argc, argv);
+
+ } else if (strstr(program_invocation_short_name, "init")) {
+
+ /* Matches invocations as "init" as well as "telinit", which are synonymous when run as PID !=
+ * 1 on SysV.
+ *
+ * On SysV "telinit" was the official command to communicate with PID 1, but "init" would
+ * redirect itself to "telinit" if called with PID != 1. We follow the same logic here still,
+ * though we add one level of indirection, as we implement "telinit" in "systemctl". Hence, for
+ * us if you invoke "init" you get "systemd", but it will execve() "systemctl" immediately with
+ * argv[] unmodified if PID is != 1. If you invoke "telinit" you directly get "systemctl". In
+ * both cases we shall do the same thing, which is why we do strstr(p_i_s_n, "init") here, as a
+ * quick way to match both.
+ *
+ * Also see redirect_telinit() in src/core/main.c. */
+
+ if (sd_booted() > 0) {
+ arg_action = _ACTION_INVALID;
+ return telinit_parse_argv(argc, argv);
+ } else {
+ /* Hmm, so some other init system is running, we need to forward this request to
+ * it. For now we simply guess that it is Upstart. */
+
+ (void) rlimit_nofile_safe();
+ execv(TELINIT, argv);
+
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Couldn't find an alternative telinit implementation to spawn.");
+ }
+
+ } else if (strstr(program_invocation_short_name, "runlevel")) {
+ arg_action = ACTION_RUNLEVEL;
+ return runlevel_parse_argv(argc, argv);
+ }
+ }
+
+ arg_action = ACTION_SYSTEMCTL;
+ return systemctl_parse_argv(argc, argv);
+}
+
+#if HAVE_SYSV_COMPAT
+_pure_ static int action_to_runlevel(void) {
+ static const char table[_ACTION_MAX] = {
+ [ACTION_HALT] = '0',
+ [ACTION_POWEROFF] = '0',
+ [ACTION_REBOOT] = '6',
+ [ACTION_RUNLEVEL2] = '2',
+ [ACTION_RUNLEVEL3] = '3',
+ [ACTION_RUNLEVEL4] = '4',
+ [ACTION_RUNLEVEL5] = '5',
+ [ACTION_RESCUE] = '1'
+ };
+
+ assert(arg_action >= 0 && arg_action < _ACTION_MAX);
+
+ return table[arg_action];
+}
+#endif
+
+static int talk_initctl(void) {
+#if HAVE_SYSV_COMPAT
+ struct init_request request = {
+ .magic = INIT_MAGIC,
+ .sleeptime = 0,
+ .cmd = INIT_CMD_RUNLVL
+ };
+
+ _cleanup_close_ int fd = -1;
+ char rl;
+ int r;
+ const char *p;
+
+ rl = action_to_runlevel();
+ if (!rl)
+ return 0;
+
+ request.runlevel = rl;
+
+ FOREACH_STRING(p, "/run/initctl", "/dev/initctl") {
+ fd = open(p, O_WRONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
+ if (fd >= 0 || errno != ENOENT)
+ break;
+ }
+ if (fd < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open initctl fifo: %m");
+ }
+
+ r = loop_write(fd, &request, sizeof(request), false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write to %s: %m", p);
+
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+static int systemctl_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "list-units", VERB_ANY, VERB_ANY, VERB_DEFAULT|VERB_ONLINE_ONLY, list_units },
+ { "list-unit-files", VERB_ANY, VERB_ANY, 0, list_unit_files },
+ { "list-sockets", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, list_sockets },
+ { "list-timers", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, list_timers },
+ { "list-jobs", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, list_jobs },
+ { "list-machines", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY|VERB_MUST_BE_ROOT, list_machines },
+ { "clear-jobs", VERB_ANY, 1, VERB_ONLINE_ONLY, trivial_method },
+ { "cancel", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, cancel_job },
+ { "start", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "stop", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "condstop", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit }, /* For compatibility with ALTLinux */
+ { "reload", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "restart", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "try-restart", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "reload-or-restart", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "reload-or-try-restart", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit }, /* For compatbility with old systemctl <= 228 */
+ { "try-reload-or-restart", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "force-reload", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit }, /* For compatibility with SysV */
+ { "condreload", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit }, /* For compatibility with ALTLinux */
+ { "condrestart", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit }, /* For compatibility with RH */
+ { "isolate", 2, 2, VERB_ONLINE_ONLY, start_unit },
+ { "kill", 2, VERB_ANY, VERB_ONLINE_ONLY, kill_unit },
+ { "is-active", 2, VERB_ANY, VERB_ONLINE_ONLY, check_unit_active },
+ { "check", 2, VERB_ANY, VERB_ONLINE_ONLY, check_unit_active }, /* deprecated alias of is-active */
+ { "is-failed", 2, VERB_ANY, VERB_ONLINE_ONLY, check_unit_failed },
+ { "show", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, show },
+ { "cat", 2, VERB_ANY, VERB_ONLINE_ONLY, cat },
+ { "status", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, show },
+ { "help", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, show },
+ { "daemon-reload", VERB_ANY, 1, VERB_ONLINE_ONLY, daemon_reload },
+ { "daemon-reexec", VERB_ANY, 1, VERB_ONLINE_ONLY, daemon_reload },
+ { "show-environment", VERB_ANY, 1, VERB_ONLINE_ONLY, show_environment },
+ { "set-environment", 2, VERB_ANY, VERB_ONLINE_ONLY, set_environment },
+ { "unset-environment", 2, VERB_ANY, VERB_ONLINE_ONLY, set_environment },
+ { "import-environment", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, import_environment },
+ { "halt", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "poweroff", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "reboot", VERB_ANY, 2, VERB_ONLINE_ONLY, start_system_special },
+ { "kexec", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "suspend", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "hibernate", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "hybrid-sleep", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "suspend-then-hibernate",VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "default", VERB_ANY, 1, VERB_ONLINE_ONLY, start_special },
+ { "rescue", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "emergency", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "exit", VERB_ANY, 2, VERB_ONLINE_ONLY, start_special },
+ { "reset-failed", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, reset_failed },
+ { "enable", 2, VERB_ANY, 0, enable_unit },
+ { "disable", 2, VERB_ANY, 0, enable_unit },
+ { "is-enabled", 2, VERB_ANY, 0, unit_is_enabled },
+ { "reenable", 2, VERB_ANY, 0, enable_unit },
+ { "preset", 2, VERB_ANY, 0, enable_unit },
+ { "preset-all", VERB_ANY, 1, 0, preset_all },
+ { "mask", 2, VERB_ANY, 0, enable_unit },
+ { "unmask", 2, VERB_ANY, 0, enable_unit },
+ { "link", 2, VERB_ANY, 0, enable_unit },
+ { "revert", 2, VERB_ANY, 0, enable_unit },
+ { "switch-root", 2, VERB_ANY, VERB_ONLINE_ONLY, switch_root },
+ { "list-dependencies", VERB_ANY, 2, VERB_ONLINE_ONLY, list_dependencies },
+ { "set-default", 2, 2, 0, set_default },
+ { "get-default", VERB_ANY, 1, 0, get_default },
+ { "set-property", 3, VERB_ANY, VERB_ONLINE_ONLY, set_property },
+ { "is-system-running", VERB_ANY, 1, 0, is_system_running },
+ { "add-wants", 3, VERB_ANY, 0, add_dependency },
+ { "add-requires", 3, VERB_ANY, 0, add_dependency },
+ { "edit", 2, VERB_ANY, VERB_ONLINE_ONLY, edit },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int reload_with_fallback(void) {
+ /* First, try systemd via D-Bus. */
+ if (daemon_reload(0, NULL, NULL) >= 0)
+ return 0;
+
+ /* Nothing else worked, so let's try signals */
+ assert(IN_SET(arg_action, ACTION_RELOAD, ACTION_REEXEC));
+
+ if (kill(1, arg_action == ACTION_RELOAD ? SIGHUP : SIGTERM) < 0)
+ return log_error_errno(errno, "kill() failed: %m");
+
+ return 0;
+}
+
+static int start_with_fallback(void) {
+ /* First, try systemd via D-Bus. */
+ if (start_unit(0, NULL, NULL) >= 0)
+ return 0;
+
+ /* Nothing else worked, so let's try /dev/initctl */
+ if (talk_initctl() > 0)
+ return 0;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to talk to init daemon.");
+}
+
+static int halt_now(enum action a) {
+ /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
+ * synced explicitly in advance. */
+ if (!arg_no_sync && !arg_dry_run)
+ (void) sync();
+
+ /* Make sure C-A-D is handled by the kernel from this point on... */
+ if (!arg_dry_run)
+ (void) reboot(RB_ENABLE_CAD);
+
+ switch (a) {
+
+ case ACTION_HALT:
+ if (!arg_quiet)
+ log_info("Halting.");
+ if (arg_dry_run)
+ return 0;
+ (void) reboot(RB_HALT_SYSTEM);
+ return -errno;
+
+ case ACTION_POWEROFF:
+ if (!arg_quiet)
+ log_info("Powering off.");
+ if (arg_dry_run)
+ return 0;
+ (void) reboot(RB_POWER_OFF);
+ return -errno;
+
+ case ACTION_KEXEC:
+ case ACTION_REBOOT:
+ return reboot_with_parameter(REBOOT_FALLBACK |
+ (arg_quiet ? 0 : REBOOT_LOG) |
+ (arg_dry_run ? REBOOT_DRY_RUN : 0));
+
+ default:
+ assert_not_reached("Unknown action.");
+ }
+}
+
+static int logind_schedule_shutdown(void) {
+
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char date[FORMAT_TIMESTAMP_MAX];
+ const char *action;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ switch (arg_action) {
+ case ACTION_HALT:
+ action = "halt";
+ break;
+ case ACTION_POWEROFF:
+ action = "poweroff";
+ break;
+ case ACTION_KEXEC:
+ action = "kexec";
+ break;
+ case ACTION_EXIT:
+ action = "exit";
+ break;
+ case ACTION_REBOOT:
+ default:
+ action = "reboot";
+ break;
+ }
+
+ if (arg_dry_run)
+ action = strjoina("dry-", action);
+
+ (void) logind_set_wall_message();
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "ScheduleShutdown",
+ &error,
+ NULL,
+ "st",
+ action,
+ arg_when);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to call ScheduleShutdown in logind, proceeding with immediate shutdown: %s", bus_error_message(&error, r));
+
+ if (!arg_quiet)
+ log_info("Shutdown scheduled for %s, use 'shutdown -c' to cancel.", format_timestamp(date, sizeof(date), arg_when));
+ return 0;
+#else
+ log_error("Cannot schedule shutdown without logind support, proceeding with immediate shutdown.");
+ return -ENOSYS;
+#endif
+}
+
+static int halt_main(void) {
+ int r;
+
+ r = logind_check_inhibitors(arg_action);
+ if (r < 0)
+ return r;
+
+ /* Delayed shutdown requested, and was successful */
+ if (arg_when > 0 && logind_schedule_shutdown() == 0)
+ return 0;
+ /* no delay, or logind failed or is not at all available */
+
+ if (geteuid() != 0) {
+ if (arg_dry_run || arg_force > 0) {
+ (void) must_be_root();
+ return -EPERM;
+ }
+
+ /* Try logind if we are a normal user and no special
+ * mode applies. Maybe polkit allows us to shutdown
+ * the machine. */
+ if (IN_SET(arg_action, ACTION_POWEROFF, ACTION_REBOOT, ACTION_HALT)) {
+ r = logind_reboot(arg_action);
+ if (r >= 0)
+ return r;
+ if (IN_SET(r, -EOPNOTSUPP, -EINPROGRESS))
+ /* requested operation is not
+ * supported on the local system or
+ * already in progress */
+ return r;
+ /* on all other errors, try low-level operation */
+ }
+ }
+
+ /* In order to minimize the difference between operation with and
+ * without logind, we explicitly enable non-blocking mode for this,
+ * as logind's shutdown operations are always non-blocking. */
+ arg_no_block = true;
+
+ if (!arg_dry_run && !arg_force)
+ return start_with_fallback();
+
+ assert(geteuid() == 0);
+
+ if (!arg_no_wtmp) {
+ if (sd_booted() > 0)
+ log_debug("Not writing utmp record, assuming that systemd-update-utmp is used.");
+ else {
+ r = utmp_put_shutdown();
+ if (r < 0)
+ log_warning_errno(r, "Failed to write utmp record: %m");
+ }
+ }
+
+ if (arg_dry_run)
+ return 0;
+
+ r = halt_now(arg_action);
+ return log_error_errno(r, "Failed to reboot: %m");
+}
+
+static int runlevel_main(void) {
+ int r, runlevel, previous;
+
+ r = utmp_get_runlevel(&runlevel, &previous);
+ if (r < 0) {
+ puts("unknown");
+ return r;
+ }
+
+ printf("%c %c\n",
+ previous <= 0 ? 'N' : previous,
+ runlevel <= 0 ? 'N' : runlevel);
+
+ return 0;
+}
+
+static int logind_cancel_shutdown(void) {
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ (void) logind_set_wall_message();
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "CancelScheduledShutdown",
+ &error,
+ NULL, NULL);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to talk to logind, shutdown hasn't been cancelled: %s", bus_error_message(&error, r));
+
+ return 0;
+#else
+ log_error("Not compiled with logind support, cannot cancel scheduled shutdowns.");
+ return -ENOSYS;
+#endif
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ argv_cmdline = argv[0];
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ /* The journal merging logic potentially needs a lot of fds. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ sigbus_install();
+
+ /* Explicitly not on_tty() to avoid setting cached value.
+ * This becomes relevant for piping output which might be
+ * ellipsized. */
+ original_stdout_is_tty = isatty(STDOUT_FILENO);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ goto finish;
+
+ if (arg_action != ACTION_SYSTEMCTL && running_in_chroot() > 0) {
+ if (!arg_quiet)
+ log_info("Running in chroot, ignoring request.");
+ r = 0;
+ goto finish;
+ }
+
+ /* systemctl_main() will print an error message for the bus
+ * connection, but only if it needs to */
+
+ switch (arg_action) {
+
+ case ACTION_SYSTEMCTL:
+ r = systemctl_main(argc, argv);
+ break;
+
+ /* Legacy command aliases set arg_action. They provide some fallbacks,
+ * e.g. to tell sysvinit to reboot after you have installed systemd
+ * binaries. */
+
+ case ACTION_HALT:
+ case ACTION_POWEROFF:
+ case ACTION_REBOOT:
+ case ACTION_KEXEC:
+ r = halt_main();
+ break;
+
+ case ACTION_RUNLEVEL2:
+ case ACTION_RUNLEVEL3:
+ case ACTION_RUNLEVEL4:
+ case ACTION_RUNLEVEL5:
+ case ACTION_RESCUE:
+ r = start_with_fallback();
+ break;
+
+ case ACTION_RELOAD:
+ case ACTION_REEXEC:
+ r = reload_with_fallback();
+ break;
+
+ case ACTION_CANCEL_SHUTDOWN:
+ r = logind_cancel_shutdown();
+ break;
+
+ case ACTION_RUNLEVEL:
+ r = runlevel_main();
+ break;
+
+ case ACTION_EXIT:
+ case ACTION_SUSPEND:
+ case ACTION_HIBERNATE:
+ case ACTION_HYBRID_SLEEP:
+ case ACTION_SUSPEND_THEN_HIBERNATE:
+ case ACTION_EMERGENCY:
+ case ACTION_DEFAULT:
+ /* systemctl verbs with no equivalent in the legacy commands.
+ * These cannot appear in arg_action. Fall through. */
+
+ case _ACTION_INVALID:
+ default:
+ assert_not_reached("Unknown action");
+ }
+
+finish:
+ release_busses();
+
+ strv_free(arg_types);
+ strv_free(arg_states);
+ strv_free(arg_properties);
+
+ strv_free(arg_wall);
+ free(arg_root);
+ free(arg_esp_path);
+
+ /* Note that we return r here, not 0, so that we can implement the LSB-like return codes */
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/systemctl/systemd-sysv-install.SKELETON b/src/systemctl/systemd-sysv-install.SKELETON
new file mode 100755
index 0000000..8c16cf9
--- /dev/null
+++ b/src/systemctl/systemd-sysv-install.SKELETON
@@ -0,0 +1,49 @@
+#!/bin/sh
+# This script is called by "systemctl enable/disable" when the given unit is a
+# SysV init.d script. It needs to call the distribution's mechanism for
+# enabling/disabling those, such as chkconfig, update-rc.d, or similar. This
+# can optionally take a --root argument for enabling a SysV init script
+# in a chroot or similar.
+set -e
+
+usage() {
+ echo "Usage: $0 [--root=path] enable|disable|is-enabled <sysv script name>" >&2
+ exit 1
+}
+
+unset ROOT
+
+# parse options
+eval set -- "$(getopt -o r: --long root: -- "$@")"
+while true; do
+ case "$1" in
+ -r|--root)
+ ROOT="$2"
+ shift 2 ;;
+ --) shift ; break ;;
+ *) usage ;;
+ esac
+done
+
+NAME="$2"
+[ -n "$NAME" ] || usage
+
+case "$1" in
+ enable)
+ # call the command to enable SysV init script $NAME here
+ # (consider optional $ROOT)
+ echo "IMPLEMENT ME: enabling SysV init.d script $NAME"
+ ;;
+ disable)
+ # call the command to disable SysV init script $NAME here
+ # (consider optional $ROOT)
+ echo "IMPLEMENT ME: disabling SysV init.d script $NAME"
+ ;;
+ is-enabled)
+ # exit with 0 if $NAME is enabled, non-zero if it is disabled
+ # (consider optional $ROOT)
+ echo "IMPLEMENT ME: checking SysV init.d script $NAME"
+ ;;
+ *)
+ usage ;;
+esac
diff --git a/src/systemd/_sd-common.h b/src/systemd/_sd-common.h
new file mode 100644
index 0000000..b3ee7bb
--- /dev/null
+++ b/src/systemd/_sd-common.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdcommonhfoo
+#define foosdcommonhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+/* This is a private header; never even think of including this directly! */
+
+#if defined(__INCLUDE_LEVEL__) && __INCLUDE_LEVEL__ <= 1
+# error "Do not include _sd-common.h directly; it is a private header."
+#endif
+
+typedef void (*_sd_destroy_t)(void *userdata);
+
+#ifndef _sd_printf_
+# if __GNUC__ >= 4
+# define _sd_printf_(a,b) __attribute__((__format__(printf, a, b)))
+# else
+# define _sd_printf_(a,b)
+# endif
+#endif
+
+#ifndef _sd_sentinel_
+# define _sd_sentinel_ __attribute__((__sentinel__))
+#endif
+
+#ifndef _sd_packed_
+# define _sd_packed_ __attribute__((__packed__))
+#endif
+
+#ifndef _sd_pure_
+# define _sd_pure_ __attribute__((__pure__))
+#endif
+
+#ifndef _SD_STRINGIFY
+# define _SD_XSTRINGIFY(x) #x
+# define _SD_STRINGIFY(x) _SD_XSTRINGIFY(x)
+#endif
+
+#ifndef _SD_BEGIN_DECLARATIONS
+# ifdef __cplusplus
+# define _SD_BEGIN_DECLARATIONS \
+ extern "C" { \
+ struct _sd_useless_struct_to_allow_trailing_semicolon_
+# else
+# define _SD_BEGIN_DECLARATIONS \
+ struct _sd_useless_struct_to_allow_trailing_semicolon_
+# endif
+#endif
+
+#ifndef _SD_END_DECLARATIONS
+# ifdef __cplusplus
+# define _SD_END_DECLARATIONS \
+ } \
+ struct _sd_useless_cpp_struct_to_allow_trailing_semicolon_
+# else
+# define _SD_END_DECLARATIONS \
+ struct _sd_useless_struct_to_allow_trailing_semicolon_
+# endif
+#endif
+
+#ifndef _SD_ARRAY_STATIC
+# if __STDC_VERSION__ >= 199901L
+# define _SD_ARRAY_STATIC static
+# else
+# define _SD_ARRAY_STATIC
+# endif
+#endif
+
+#define _SD_DEFINE_POINTER_CLEANUP_FUNC(type, func) \
+ static __inline__ void func##p(type **p) { \
+ if (*p) \
+ func(*p); \
+ } \
+ struct _sd_useless_struct_to_allow_trailing_semicolon_
+
+#endif
diff --git a/src/systemd/meson.build b/src/systemd/meson.build
new file mode 100644
index 0000000..75c48b0
--- /dev/null
+++ b/src/systemd/meson.build
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+_systemd_headers = '''
+ sd-bus.h
+ sd-bus-protocol.h
+ sd-bus-vtable.h
+ sd-daemon.h
+ sd-device.h
+ sd-event.h
+ sd-hwdb.h
+ sd-id128.h
+ sd-journal.h
+ sd-login.h
+ sd-messages.h
+'''.split()
+
+# https://github.com/mesonbuild/meson/issues/1633
+systemd_headers = files(_systemd_headers)
+
+_not_installed_headers = '''
+ sd-dhcp6-client.h
+ sd-dhcp6-lease.h
+ sd-dhcp-client.h
+ sd-dhcp-lease.h
+ sd-dhcp-server.h
+ sd-ipv4acd.h
+ sd-ipv4ll.h
+ sd-lldp.h
+ sd-ndisc.h
+ sd-netlink.h
+ sd-network.h
+ sd-path.h
+ sd-radv.h
+ sd-resolve.h
+ sd-utf8.h
+'''.split()
+
+install_headers(
+ systemd_headers,
+ '_sd-common.h',
+ subdir : 'systemd')
+
+
+############################################################
+
+opts = [['c'],
+ ['c', '-ansi'],
+ ['c', '-std=iso9899:1990'],
+ ['c', '-std=iso9899:2011']]
+
+if cc.has_argument('-std=iso9899:2017')
+ opts += [['c', '-std=iso9899:2017']]
+endif
+
+if add_languages('cpp', required : false)
+ opts += [['c++'],
+ ['c++', '-std=c++98'],
+ ['c++', '-std=c++11']]
+ if cc.has_argument('-std=c++14')
+ opts += [['c++', '-std=c++14']]
+ endif
+ if cc.has_argument('-std=c++17')
+ opts += [['c++', '-std=c++17']]
+ endif
+endif
+
+foreach header : _systemd_headers + _not_installed_headers + ['../libudev/libudev.h']
+ foreach opt : opts
+ name = ''.join(['cc-', header.split('/')[-1], ':'] + opt)
+ if want_tests != 'false'
+ test(name,
+ check_compilation_sh,
+ args : cc.cmd_array() + ['-c', '-x'] + opt +
+ ['-Werror', '-include',
+ join_paths(meson.current_source_dir(), header)])
+ endif
+ endforeach
+endforeach
diff --git a/src/systemd/sd-bus-protocol.h b/src/systemd/sd-bus-protocol.h
new file mode 100644
index 0000000..a3f4e74
--- /dev/null
+++ b/src/systemd/sd-bus-protocol.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdbusprotocolhfoo
+#define foosdbusprotocolhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/* Types of message */
+
+enum {
+ _SD_BUS_MESSAGE_TYPE_INVALID = 0,
+ SD_BUS_MESSAGE_METHOD_CALL,
+ SD_BUS_MESSAGE_METHOD_RETURN,
+ SD_BUS_MESSAGE_METHOD_ERROR,
+ SD_BUS_MESSAGE_SIGNAL,
+ _SD_BUS_MESSAGE_TYPE_MAX
+};
+
+/* Primitive types */
+
+enum {
+ _SD_BUS_TYPE_INVALID = 0,
+ SD_BUS_TYPE_BYTE = 'y',
+ SD_BUS_TYPE_BOOLEAN = 'b',
+ SD_BUS_TYPE_INT16 = 'n',
+ SD_BUS_TYPE_UINT16 = 'q',
+ SD_BUS_TYPE_INT32 = 'i',
+ SD_BUS_TYPE_UINT32 = 'u',
+ SD_BUS_TYPE_INT64 = 'x',
+ SD_BUS_TYPE_UINT64 = 't',
+ SD_BUS_TYPE_DOUBLE = 'd',
+ SD_BUS_TYPE_STRING = 's',
+ SD_BUS_TYPE_OBJECT_PATH = 'o',
+ SD_BUS_TYPE_SIGNATURE = 'g',
+ SD_BUS_TYPE_UNIX_FD = 'h',
+ SD_BUS_TYPE_ARRAY = 'a',
+ SD_BUS_TYPE_VARIANT = 'v',
+ SD_BUS_TYPE_STRUCT = 'r', /* not actually used in signatures */
+ SD_BUS_TYPE_STRUCT_BEGIN = '(',
+ SD_BUS_TYPE_STRUCT_END = ')',
+ SD_BUS_TYPE_DICT_ENTRY = 'e', /* not actually used in signatures */
+ SD_BUS_TYPE_DICT_ENTRY_BEGIN = '{',
+ SD_BUS_TYPE_DICT_ENTRY_END = '}'
+};
+
+/* Well-known errors. Note that this is only a sanitized subset of the
+ * errors that the reference implementation generates. */
+
+#define SD_BUS_ERROR_FAILED "org.freedesktop.DBus.Error.Failed"
+#define SD_BUS_ERROR_NO_MEMORY "org.freedesktop.DBus.Error.NoMemory"
+#define SD_BUS_ERROR_SERVICE_UNKNOWN "org.freedesktop.DBus.Error.ServiceUnknown"
+#define SD_BUS_ERROR_NAME_HAS_NO_OWNER "org.freedesktop.DBus.Error.NameHasNoOwner"
+#define SD_BUS_ERROR_NO_REPLY "org.freedesktop.DBus.Error.NoReply"
+#define SD_BUS_ERROR_IO_ERROR "org.freedesktop.DBus.Error.IOError"
+#define SD_BUS_ERROR_BAD_ADDRESS "org.freedesktop.DBus.Error.BadAddress"
+#define SD_BUS_ERROR_NOT_SUPPORTED "org.freedesktop.DBus.Error.NotSupported"
+#define SD_BUS_ERROR_LIMITS_EXCEEDED "org.freedesktop.DBus.Error.LimitsExceeded"
+#define SD_BUS_ERROR_ACCESS_DENIED "org.freedesktop.DBus.Error.AccessDenied"
+#define SD_BUS_ERROR_AUTH_FAILED "org.freedesktop.DBus.Error.AuthFailed"
+#define SD_BUS_ERROR_NO_SERVER "org.freedesktop.DBus.Error.NoServer"
+#define SD_BUS_ERROR_TIMEOUT "org.freedesktop.DBus.Error.Timeout"
+#define SD_BUS_ERROR_NO_NETWORK "org.freedesktop.DBus.Error.NoNetwork"
+#define SD_BUS_ERROR_ADDRESS_IN_USE "org.freedesktop.DBus.Error.AddressInUse"
+#define SD_BUS_ERROR_DISCONNECTED "org.freedesktop.DBus.Error.Disconnected"
+#define SD_BUS_ERROR_INVALID_ARGS "org.freedesktop.DBus.Error.InvalidArgs"
+#define SD_BUS_ERROR_FILE_NOT_FOUND "org.freedesktop.DBus.Error.FileNotFound"
+#define SD_BUS_ERROR_FILE_EXISTS "org.freedesktop.DBus.Error.FileExists"
+#define SD_BUS_ERROR_UNKNOWN_METHOD "org.freedesktop.DBus.Error.UnknownMethod"
+#define SD_BUS_ERROR_UNKNOWN_OBJECT "org.freedesktop.DBus.Error.UnknownObject"
+#define SD_BUS_ERROR_UNKNOWN_INTERFACE "org.freedesktop.DBus.Error.UnknownInterface"
+#define SD_BUS_ERROR_UNKNOWN_PROPERTY "org.freedesktop.DBus.Error.UnknownProperty"
+#define SD_BUS_ERROR_PROPERTY_READ_ONLY "org.freedesktop.DBus.Error.PropertyReadOnly"
+#define SD_BUS_ERROR_UNIX_PROCESS_ID_UNKNOWN "org.freedesktop.DBus.Error.UnixProcessIdUnknown"
+#define SD_BUS_ERROR_INVALID_SIGNATURE "org.freedesktop.DBus.Error.InvalidSignature"
+#define SD_BUS_ERROR_INCONSISTENT_MESSAGE "org.freedesktop.DBus.Error.InconsistentMessage"
+#define SD_BUS_ERROR_MATCH_RULE_NOT_FOUND "org.freedesktop.DBus.Error.MatchRuleNotFound"
+#define SD_BUS_ERROR_MATCH_RULE_INVALID "org.freedesktop.DBus.Error.MatchRuleInvalid"
+#define SD_BUS_ERROR_INTERACTIVE_AUTHORIZATION_REQUIRED \
+ "org.freedesktop.DBus.Error.InteractiveAuthorizationRequired"
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-bus-vtable.h b/src/systemd/sd-bus-vtable.h
new file mode 100644
index 0000000..4350a8c
--- /dev/null
+++ b/src/systemd/sd-bus-vtable.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdbusvtablehfoo
+#define foosdbusvtablehfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_bus_vtable sd_bus_vtable;
+
+#include "sd-bus.h"
+
+enum {
+ _SD_BUS_VTABLE_START = '<',
+ _SD_BUS_VTABLE_END = '>',
+ _SD_BUS_VTABLE_METHOD = 'M',
+ _SD_BUS_VTABLE_SIGNAL = 'S',
+ _SD_BUS_VTABLE_PROPERTY = 'P',
+ _SD_BUS_VTABLE_WRITABLE_PROPERTY = 'W'
+};
+
+enum {
+ SD_BUS_VTABLE_DEPRECATED = 1ULL << 0,
+ SD_BUS_VTABLE_HIDDEN = 1ULL << 1,
+ SD_BUS_VTABLE_UNPRIVILEGED = 1ULL << 2,
+ SD_BUS_VTABLE_METHOD_NO_REPLY = 1ULL << 3,
+ SD_BUS_VTABLE_PROPERTY_CONST = 1ULL << 4,
+ SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE = 1ULL << 5,
+ SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION = 1ULL << 6,
+ SD_BUS_VTABLE_PROPERTY_EXPLICIT = 1ULL << 7,
+ _SD_BUS_VTABLE_CAPABILITY_MASK = 0xFFFFULL << 40
+};
+
+#define SD_BUS_VTABLE_CAPABILITY(x) ((uint64_t) (((x)+1) & 0xFFFF) << 40)
+
+struct sd_bus_vtable {
+ /* Please do not initialize this structure directly, use the
+ * macros below instead */
+
+ uint8_t type:8;
+ uint64_t flags:56;
+ union {
+ struct {
+ size_t element_size;
+ } start;
+ struct {
+ const char *member;
+ const char *signature;
+ const char *result;
+ sd_bus_message_handler_t handler;
+ size_t offset;
+ } method;
+ struct {
+ const char *member;
+ const char *signature;
+ } signal;
+ struct {
+ const char *member;
+ const char *signature;
+ sd_bus_property_get_t get;
+ sd_bus_property_set_t set;
+ size_t offset;
+ } property;
+ } x;
+};
+
+#define SD_BUS_VTABLE_START(_flags) \
+ { \
+ .type = _SD_BUS_VTABLE_START, \
+ .flags = _flags, \
+ .x = { \
+ .start = { \
+ .element_size = sizeof(sd_bus_vtable) \
+ }, \
+ }, \
+ }
+
+#define SD_BUS_METHOD_WITH_OFFSET(_member, _signature, _result, _handler, _offset, _flags) \
+ { \
+ .type = _SD_BUS_VTABLE_METHOD, \
+ .flags = _flags, \
+ .x = { \
+ .method = { \
+ .member = _member, \
+ .signature = _signature, \
+ .result = _result, \
+ .handler = _handler, \
+ .offset = _offset, \
+ }, \
+ }, \
+ }
+#define SD_BUS_METHOD(_member, _signature, _result, _handler, _flags) \
+ SD_BUS_METHOD_WITH_OFFSET(_member, _signature, _result, _handler, 0, _flags)
+
+#define SD_BUS_SIGNAL(_member, _signature, _flags) \
+ { \
+ .type = _SD_BUS_VTABLE_SIGNAL, \
+ .flags = _flags, \
+ .x = { \
+ .signal = { \
+ .member = _member, \
+ .signature = _signature, \
+ }, \
+ }, \
+ }
+
+#define SD_BUS_PROPERTY(_member, _signature, _get, _offset, _flags) \
+ { \
+ .type = _SD_BUS_VTABLE_PROPERTY, \
+ .flags = _flags, \
+ .x = { \
+ .property = { \
+ .member = _member, \
+ .signature = _signature, \
+ .get = _get, \
+ .set = NULL, \
+ .offset = _offset, \
+ }, \
+ }, \
+ }
+
+#define SD_BUS_WRITABLE_PROPERTY(_member, _signature, _get, _set, _offset, _flags) \
+ { \
+ .type = _SD_BUS_VTABLE_WRITABLE_PROPERTY, \
+ .flags = _flags, \
+ .x = { \
+ .property = { \
+ .member = _member, \
+ .signature = _signature, \
+ .get = _get, \
+ .set = _set, \
+ .offset = _offset, \
+ }, \
+ }, \
+ }
+
+#define SD_BUS_VTABLE_END \
+ { \
+ .type = _SD_BUS_VTABLE_END, \
+ .flags = 0, \
+ .x = { { 0 } }, \
+ }
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-bus.h b/src/systemd/sd-bus.h
new file mode 100644
index 0000000..129cc93
--- /dev/null
+++ b/src/systemd/sd-bus.h
@@ -0,0 +1,506 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdbushfoo
+#define foosdbushfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+#define SD_BUS_DEFAULT ((sd_bus *) 1)
+#define SD_BUS_DEFAULT_USER ((sd_bus *) 2)
+#define SD_BUS_DEFAULT_SYSTEM ((sd_bus *) 3)
+
+/* Types */
+
+typedef struct sd_bus sd_bus;
+typedef struct sd_bus_message sd_bus_message;
+typedef struct sd_bus_slot sd_bus_slot;
+typedef struct sd_bus_creds sd_bus_creds;
+typedef struct sd_bus_track sd_bus_track;
+
+typedef struct {
+ const char *name;
+ const char *message;
+ int _need_free;
+} sd_bus_error;
+
+typedef struct {
+ const char* name;
+ int code;
+} sd_bus_error_map;
+
+/* Flags */
+
+enum {
+ SD_BUS_CREDS_PID = 1ULL << 0,
+ SD_BUS_CREDS_TID = 1ULL << 1,
+ SD_BUS_CREDS_PPID = 1ULL << 2,
+ SD_BUS_CREDS_UID = 1ULL << 3,
+ SD_BUS_CREDS_EUID = 1ULL << 4,
+ SD_BUS_CREDS_SUID = 1ULL << 5,
+ SD_BUS_CREDS_FSUID = 1ULL << 6,
+ SD_BUS_CREDS_GID = 1ULL << 7,
+ SD_BUS_CREDS_EGID = 1ULL << 8,
+ SD_BUS_CREDS_SGID = 1ULL << 9,
+ SD_BUS_CREDS_FSGID = 1ULL << 10,
+ SD_BUS_CREDS_SUPPLEMENTARY_GIDS = 1ULL << 11,
+ SD_BUS_CREDS_COMM = 1ULL << 12,
+ SD_BUS_CREDS_TID_COMM = 1ULL << 13,
+ SD_BUS_CREDS_EXE = 1ULL << 14,
+ SD_BUS_CREDS_CMDLINE = 1ULL << 15,
+ SD_BUS_CREDS_CGROUP = 1ULL << 16,
+ SD_BUS_CREDS_UNIT = 1ULL << 17,
+ SD_BUS_CREDS_SLICE = 1ULL << 18,
+ SD_BUS_CREDS_USER_UNIT = 1ULL << 19,
+ SD_BUS_CREDS_USER_SLICE = 1ULL << 20,
+ SD_BUS_CREDS_SESSION = 1ULL << 21,
+ SD_BUS_CREDS_OWNER_UID = 1ULL << 22,
+ SD_BUS_CREDS_EFFECTIVE_CAPS = 1ULL << 23,
+ SD_BUS_CREDS_PERMITTED_CAPS = 1ULL << 24,
+ SD_BUS_CREDS_INHERITABLE_CAPS = 1ULL << 25,
+ SD_BUS_CREDS_BOUNDING_CAPS = 1ULL << 26,
+ SD_BUS_CREDS_SELINUX_CONTEXT = 1ULL << 27,
+ SD_BUS_CREDS_AUDIT_SESSION_ID = 1ULL << 28,
+ SD_BUS_CREDS_AUDIT_LOGIN_UID = 1ULL << 29,
+ SD_BUS_CREDS_TTY = 1ULL << 30,
+ SD_BUS_CREDS_UNIQUE_NAME = 1ULL << 31,
+ SD_BUS_CREDS_WELL_KNOWN_NAMES = 1ULL << 32,
+ SD_BUS_CREDS_DESCRIPTION = 1ULL << 33,
+ SD_BUS_CREDS_AUGMENT = 1ULL << 63, /* special flag, if on sd-bus will augment creds struct, in a potentially race-full way. */
+ _SD_BUS_CREDS_ALL = (1ULL << 34) -1
+};
+
+enum {
+ SD_BUS_NAME_REPLACE_EXISTING = 1ULL << 0,
+ SD_BUS_NAME_ALLOW_REPLACEMENT = 1ULL << 1,
+ SD_BUS_NAME_QUEUE = 1ULL << 2
+};
+
+/* Callbacks */
+
+typedef int (*sd_bus_message_handler_t)(sd_bus_message *m, void *userdata, sd_bus_error *ret_error);
+typedef int (*sd_bus_property_get_t) (sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+typedef int (*sd_bus_property_set_t) (sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *ret_error);
+typedef int (*sd_bus_object_find_t) (sd_bus *bus, const char *path, const char *interface, void *userdata, void **ret_found, sd_bus_error *ret_error);
+typedef int (*sd_bus_node_enumerator_t) (sd_bus *bus, const char *prefix, void *userdata, char ***ret_nodes, sd_bus_error *ret_error);
+typedef int (*sd_bus_track_handler_t) (sd_bus_track *track, void *userdata);
+typedef _sd_destroy_t sd_bus_destroy_t;
+
+#include "sd-bus-protocol.h"
+#include "sd-bus-vtable.h"
+
+/* Connections */
+
+int sd_bus_default(sd_bus **ret);
+int sd_bus_default_user(sd_bus **ret);
+int sd_bus_default_system(sd_bus **ret);
+
+int sd_bus_open(sd_bus **ret);
+int sd_bus_open_with_description(sd_bus **ret, const char *description);
+int sd_bus_open_user(sd_bus **ret);
+int sd_bus_open_user_with_description(sd_bus **ret, const char *description);
+int sd_bus_open_system(sd_bus **ret);
+int sd_bus_open_system_with_description(sd_bus **ret, const char *description);
+int sd_bus_open_system_remote(sd_bus **ret, const char *host);
+int sd_bus_open_system_machine(sd_bus **ret, const char *machine);
+
+int sd_bus_new(sd_bus **ret);
+
+int sd_bus_set_address(sd_bus *bus, const char *address);
+int sd_bus_set_fd(sd_bus *bus, int input_fd, int output_fd);
+int sd_bus_set_exec(sd_bus *bus, const char *path, char *const argv[]);
+int sd_bus_get_address(sd_bus *bus, const char **address);
+int sd_bus_set_bus_client(sd_bus *bus, int b);
+int sd_bus_is_bus_client(sd_bus *bus);
+int sd_bus_set_server(sd_bus *bus, int b, sd_id128_t bus_id);
+int sd_bus_is_server(sd_bus *bus);
+int sd_bus_set_anonymous(sd_bus *bus, int b);
+int sd_bus_is_anonymous(sd_bus *bus);
+int sd_bus_set_trusted(sd_bus *bus, int b);
+int sd_bus_is_trusted(sd_bus *bus);
+int sd_bus_set_monitor(sd_bus *bus, int b);
+int sd_bus_is_monitor(sd_bus *bus);
+int sd_bus_set_description(sd_bus *bus, const char *description);
+int sd_bus_get_description(sd_bus *bus, const char **description);
+int sd_bus_negotiate_creds(sd_bus *bus, int b, uint64_t creds_mask);
+int sd_bus_negotiate_timestamp(sd_bus *bus, int b);
+int sd_bus_negotiate_fds(sd_bus *bus, int b);
+int sd_bus_can_send(sd_bus *bus, char type);
+int sd_bus_get_creds_mask(sd_bus *bus, uint64_t *creds_mask);
+int sd_bus_set_allow_interactive_authorization(sd_bus *bus, int b);
+int sd_bus_get_allow_interactive_authorization(sd_bus *bus);
+int sd_bus_set_exit_on_disconnect(sd_bus *bus, int b);
+int sd_bus_get_exit_on_disconnect(sd_bus *bus);
+int sd_bus_set_close_on_exit(sd_bus *bus, int b);
+int sd_bus_get_close_on_exit(sd_bus *bus);
+int sd_bus_set_watch_bind(sd_bus *bus, int b);
+int sd_bus_get_watch_bind(sd_bus *bus);
+int sd_bus_set_connected_signal(sd_bus *bus, int b);
+int sd_bus_get_connected_signal(sd_bus *bus);
+int sd_bus_set_sender(sd_bus *bus, const char *sender);
+int sd_bus_get_sender(sd_bus *bus, const char **ret);
+
+int sd_bus_start(sd_bus *bus);
+
+int sd_bus_try_close(sd_bus *bus);
+void sd_bus_close(sd_bus *bus);
+
+sd_bus *sd_bus_ref(sd_bus *bus);
+sd_bus *sd_bus_unref(sd_bus *bus);
+sd_bus *sd_bus_close_unref(sd_bus *bus);
+sd_bus *sd_bus_flush_close_unref(sd_bus *bus);
+
+void sd_bus_default_flush_close(void);
+
+int sd_bus_is_open(sd_bus *bus);
+int sd_bus_is_ready(sd_bus *bus);
+
+int sd_bus_get_bus_id(sd_bus *bus, sd_id128_t *id);
+int sd_bus_get_scope(sd_bus *bus, const char **scope);
+int sd_bus_get_tid(sd_bus *bus, pid_t *tid);
+int sd_bus_get_owner_creds(sd_bus *bus, uint64_t creds_mask, sd_bus_creds **ret);
+
+int sd_bus_send(sd_bus *bus, sd_bus_message *m, uint64_t *cookie);
+int sd_bus_send_to(sd_bus *bus, sd_bus_message *m, const char *destination, uint64_t *cookie);
+int sd_bus_call(sd_bus *bus, sd_bus_message *m, uint64_t usec, sd_bus_error *ret_error, sd_bus_message **reply);
+int sd_bus_call_async(sd_bus *bus, sd_bus_slot **slot, sd_bus_message *m, sd_bus_message_handler_t callback, void *userdata, uint64_t usec);
+
+int sd_bus_get_fd(sd_bus *bus);
+int sd_bus_get_events(sd_bus *bus);
+int sd_bus_get_timeout(sd_bus *bus, uint64_t *timeout_usec);
+int sd_bus_process(sd_bus *bus, sd_bus_message **r);
+int sd_bus_process_priority(sd_bus *bus, int64_t max_priority, sd_bus_message **r);
+int sd_bus_wait(sd_bus *bus, uint64_t timeout_usec);
+int sd_bus_flush(sd_bus *bus);
+
+sd_bus_slot* sd_bus_get_current_slot(sd_bus *bus);
+sd_bus_message* sd_bus_get_current_message(sd_bus *bus);
+sd_bus_message_handler_t sd_bus_get_current_handler(sd_bus *bus);
+void* sd_bus_get_current_userdata(sd_bus *bus);
+
+int sd_bus_attach_event(sd_bus *bus, sd_event *e, int priority);
+int sd_bus_detach_event(sd_bus *bus);
+sd_event *sd_bus_get_event(sd_bus *bus);
+
+int sd_bus_get_n_queued_read(sd_bus *bus, uint64_t *ret);
+int sd_bus_get_n_queued_write(sd_bus *bus, uint64_t *ret);
+
+int sd_bus_set_method_call_timeout(sd_bus *bus, uint64_t usec);
+int sd_bus_get_method_call_timeout(sd_bus *bus, uint64_t *ret);
+
+int sd_bus_add_filter(sd_bus *bus, sd_bus_slot **slot, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_add_match(sd_bus *bus, sd_bus_slot **slot, const char *match, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_add_match_async(sd_bus *bus, sd_bus_slot **slot, const char *match, sd_bus_message_handler_t callback, sd_bus_message_handler_t install_callback, void *userdata);
+int sd_bus_add_object(sd_bus *bus, sd_bus_slot **slot, const char *path, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_add_fallback(sd_bus *bus, sd_bus_slot **slot, const char *prefix, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_add_object_vtable(sd_bus *bus, sd_bus_slot **slot, const char *path, const char *interface, const sd_bus_vtable *vtable, void *userdata);
+int sd_bus_add_fallback_vtable(sd_bus *bus, sd_bus_slot **slot, const char *prefix, const char *interface, const sd_bus_vtable *vtable, sd_bus_object_find_t find, void *userdata);
+int sd_bus_add_node_enumerator(sd_bus *bus, sd_bus_slot **slot, const char *path, sd_bus_node_enumerator_t callback, void *userdata);
+int sd_bus_add_object_manager(sd_bus *bus, sd_bus_slot **slot, const char *path);
+
+/* Slot object */
+
+sd_bus_slot* sd_bus_slot_ref(sd_bus_slot *slot);
+sd_bus_slot* sd_bus_slot_unref(sd_bus_slot *slot);
+
+sd_bus* sd_bus_slot_get_bus(sd_bus_slot *slot);
+void *sd_bus_slot_get_userdata(sd_bus_slot *slot);
+void *sd_bus_slot_set_userdata(sd_bus_slot *slot, void *userdata);
+int sd_bus_slot_set_description(sd_bus_slot *slot, const char *description);
+int sd_bus_slot_get_description(sd_bus_slot *slot, const char **description);
+int sd_bus_slot_get_floating(sd_bus_slot *slot);
+int sd_bus_slot_set_floating(sd_bus_slot *slot, int b);
+int sd_bus_slot_set_destroy_callback(sd_bus_slot *s, sd_bus_destroy_t callback);
+int sd_bus_slot_get_destroy_callback(sd_bus_slot *s, sd_bus_destroy_t *callback);
+
+sd_bus_message* sd_bus_slot_get_current_message(sd_bus_slot *slot);
+sd_bus_message_handler_t sd_bus_slot_get_current_handler(sd_bus_slot *slot);
+void *sd_bus_slot_get_current_userdata(sd_bus_slot *slot);
+
+/* Message object */
+
+int sd_bus_message_new(sd_bus *bus, sd_bus_message **m, uint8_t type);
+int sd_bus_message_new_signal(sd_bus *bus, sd_bus_message **m, const char *path, const char *interface, const char *member);
+int sd_bus_message_new_method_call(sd_bus *bus, sd_bus_message **m, const char *destination, const char *path, const char *interface, const char *member);
+int sd_bus_message_new_method_return(sd_bus_message *call, sd_bus_message **m);
+int sd_bus_message_new_method_error(sd_bus_message *call, sd_bus_message **m, const sd_bus_error *e);
+int sd_bus_message_new_method_errorf(sd_bus_message *call, sd_bus_message **m, const char *name, const char *format, ...) _sd_printf_(4, 5);
+int sd_bus_message_new_method_errno(sd_bus_message *call, sd_bus_message **m, int error, const sd_bus_error *e);
+int sd_bus_message_new_method_errnof(sd_bus_message *call, sd_bus_message **m, int error, const char *format, ...) _sd_printf_(4, 5);
+
+sd_bus_message* sd_bus_message_ref(sd_bus_message *m);
+sd_bus_message* sd_bus_message_unref(sd_bus_message *m);
+
+int sd_bus_message_seal(sd_bus_message *m, uint64_t cookie, uint64_t timeout_usec);
+
+int sd_bus_message_get_type(sd_bus_message *m, uint8_t *type);
+int sd_bus_message_get_cookie(sd_bus_message *m, uint64_t *cookie);
+int sd_bus_message_get_reply_cookie(sd_bus_message *m, uint64_t *cookie);
+int sd_bus_message_get_priority(sd_bus_message *m, int64_t *priority);
+
+int sd_bus_message_get_expect_reply(sd_bus_message *m);
+int sd_bus_message_get_auto_start(sd_bus_message *m);
+int sd_bus_message_get_allow_interactive_authorization(sd_bus_message *m);
+
+const char *sd_bus_message_get_signature(sd_bus_message *m, int complete);
+const char *sd_bus_message_get_path(sd_bus_message *m);
+const char *sd_bus_message_get_interface(sd_bus_message *m);
+const char *sd_bus_message_get_member(sd_bus_message *m);
+const char *sd_bus_message_get_destination(sd_bus_message *m);
+const char *sd_bus_message_get_sender(sd_bus_message *m);
+const sd_bus_error *sd_bus_message_get_error(sd_bus_message *m);
+int sd_bus_message_get_errno(sd_bus_message *m);
+
+int sd_bus_message_get_monotonic_usec(sd_bus_message *m, uint64_t *usec);
+int sd_bus_message_get_realtime_usec(sd_bus_message *m, uint64_t *usec);
+int sd_bus_message_get_seqnum(sd_bus_message *m, uint64_t* seqnum);
+
+sd_bus* sd_bus_message_get_bus(sd_bus_message *m);
+sd_bus_creds *sd_bus_message_get_creds(sd_bus_message *m); /* do not unref the result */
+
+int sd_bus_message_is_signal(sd_bus_message *m, const char *interface, const char *member);
+int sd_bus_message_is_method_call(sd_bus_message *m, const char *interface, const char *member);
+int sd_bus_message_is_method_error(sd_bus_message *m, const char *name);
+int sd_bus_message_is_empty(sd_bus_message *m);
+int sd_bus_message_has_signature(sd_bus_message *m, const char *signature);
+
+int sd_bus_message_set_expect_reply(sd_bus_message *m, int b);
+int sd_bus_message_set_auto_start(sd_bus_message *m, int b);
+int sd_bus_message_set_allow_interactive_authorization(sd_bus_message *m, int b);
+
+int sd_bus_message_set_destination(sd_bus_message *m, const char *destination);
+int sd_bus_message_set_sender(sd_bus_message *m, const char *sender);
+int sd_bus_message_set_priority(sd_bus_message *m, int64_t priority);
+
+int sd_bus_message_append(sd_bus_message *m, const char *types, ...);
+int sd_bus_message_appendv(sd_bus_message *m, const char *types, va_list ap);
+int sd_bus_message_append_basic(sd_bus_message *m, char type, const void *p);
+int sd_bus_message_append_array(sd_bus_message *m, char type, const void *ptr, size_t size);
+int sd_bus_message_append_array_space(sd_bus_message *m, char type, size_t size, void **ptr);
+int sd_bus_message_append_array_iovec(sd_bus_message *m, char type, const struct iovec *iov, unsigned n);
+int sd_bus_message_append_array_memfd(sd_bus_message *m, char type, int memfd, uint64_t offset, uint64_t size);
+int sd_bus_message_append_string_space(sd_bus_message *m, size_t size, char **s);
+int sd_bus_message_append_string_iovec(sd_bus_message *m, const struct iovec *iov, unsigned n);
+int sd_bus_message_append_string_memfd(sd_bus_message *m, int memfd, uint64_t offset, uint64_t size);
+int sd_bus_message_append_strv(sd_bus_message *m, char **l);
+int sd_bus_message_open_container(sd_bus_message *m, char type, const char *contents);
+int sd_bus_message_close_container(sd_bus_message *m);
+int sd_bus_message_copy(sd_bus_message *m, sd_bus_message *source, int all);
+
+int sd_bus_message_read(sd_bus_message *m, const char *types, ...);
+int sd_bus_message_readv(sd_bus_message *m, const char *types, va_list ap);
+int sd_bus_message_read_basic(sd_bus_message *m, char type, void *p);
+int sd_bus_message_read_array(sd_bus_message *m, char type, const void **ptr, size_t *size);
+int sd_bus_message_read_strv(sd_bus_message *m, char ***l); /* free the result! */
+int sd_bus_message_skip(sd_bus_message *m, const char *types);
+int sd_bus_message_enter_container(sd_bus_message *m, char type, const char *contents);
+int sd_bus_message_exit_container(sd_bus_message *m);
+int sd_bus_message_peek_type(sd_bus_message *m, char *type, const char **contents);
+int sd_bus_message_verify_type(sd_bus_message *m, char type, const char *contents);
+int sd_bus_message_at_end(sd_bus_message *m, int complete);
+int sd_bus_message_rewind(sd_bus_message *m, int complete);
+
+/* Bus management */
+
+int sd_bus_get_unique_name(sd_bus *bus, const char **unique);
+int sd_bus_request_name(sd_bus *bus, const char *name, uint64_t flags);
+int sd_bus_request_name_async(sd_bus *bus, sd_bus_slot **ret_slot, const char *name, uint64_t flags, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_release_name(sd_bus *bus, const char *name);
+int sd_bus_release_name_async(sd_bus *bus, sd_bus_slot **ret_slot, const char *name, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_list_names(sd_bus *bus, char ***acquired, char ***activatable); /* free the results */
+int sd_bus_get_name_creds(sd_bus *bus, const char *name, uint64_t mask, sd_bus_creds **creds); /* unref the result! */
+int sd_bus_get_name_machine_id(sd_bus *bus, const char *name, sd_id128_t *machine);
+
+/* Convenience calls */
+
+int sd_bus_call_method(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, sd_bus_message **reply, const char *types, ...);
+int sd_bus_call_method_async(sd_bus *bus, sd_bus_slot **slot, const char *destination, const char *path, const char *interface, const char *member, sd_bus_message_handler_t callback, void *userdata, const char *types, ...);
+int sd_bus_get_property(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, sd_bus_message **reply, const char *type);
+int sd_bus_get_property_trivial(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, char type, void *ret_ptr);
+int sd_bus_get_property_string(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, char **ret); /* free the result! */
+int sd_bus_get_property_strv(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, char ***ret); /* free the result! */
+int sd_bus_set_property(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, const char *type, ...);
+
+int sd_bus_reply_method_return(sd_bus_message *call, const char *types, ...);
+int sd_bus_reply_method_error(sd_bus_message *call, const sd_bus_error *e);
+int sd_bus_reply_method_errorf(sd_bus_message *call, const char *name, const char *format, ...) _sd_printf_(3, 4);
+int sd_bus_reply_method_errno(sd_bus_message *call, int error, const sd_bus_error *e);
+int sd_bus_reply_method_errnof(sd_bus_message *call, int error, const char *format, ...) _sd_printf_(3, 4);
+
+int sd_bus_emit_signal(sd_bus *bus, const char *path, const char *interface, const char *member, const char *types, ...);
+
+int sd_bus_emit_properties_changed_strv(sd_bus *bus, const char *path, const char *interface, char **names);
+int sd_bus_emit_properties_changed(sd_bus *bus, const char *path, const char *interface, const char *name, ...) _sd_sentinel_;
+
+int sd_bus_emit_object_added(sd_bus *bus, const char *path);
+int sd_bus_emit_object_removed(sd_bus *bus, const char *path);
+int sd_bus_emit_interfaces_added_strv(sd_bus *bus, const char *path, char **interfaces);
+int sd_bus_emit_interfaces_added(sd_bus *bus, const char *path, const char *interface, ...) _sd_sentinel_;
+int sd_bus_emit_interfaces_removed_strv(sd_bus *bus, const char *path, char **interfaces);
+int sd_bus_emit_interfaces_removed(sd_bus *bus, const char *path, const char *interface, ...) _sd_sentinel_;
+
+int sd_bus_query_sender_creds(sd_bus_message *call, uint64_t mask, sd_bus_creds **creds);
+int sd_bus_query_sender_privilege(sd_bus_message *call, int capability);
+
+int sd_bus_match_signal(sd_bus *bus, sd_bus_slot **ret, const char *sender, const char *path, const char *interface, const char *member, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_match_signal_async(sd_bus *bus, sd_bus_slot **ret, const char *sender, const char *path, const char *interface, const char *member, sd_bus_message_handler_t match_callback, sd_bus_message_handler_t add_callback, void *userdata);
+
+/* Credential handling */
+
+int sd_bus_creds_new_from_pid(sd_bus_creds **ret, pid_t pid, uint64_t creds_mask);
+sd_bus_creds *sd_bus_creds_ref(sd_bus_creds *c);
+sd_bus_creds *sd_bus_creds_unref(sd_bus_creds *c);
+uint64_t sd_bus_creds_get_mask(const sd_bus_creds *c);
+uint64_t sd_bus_creds_get_augmented_mask(const sd_bus_creds *c);
+
+int sd_bus_creds_get_pid(sd_bus_creds *c, pid_t *pid);
+int sd_bus_creds_get_ppid(sd_bus_creds *c, pid_t *ppid);
+int sd_bus_creds_get_tid(sd_bus_creds *c, pid_t *tid);
+int sd_bus_creds_get_uid(sd_bus_creds *c, uid_t *uid);
+int sd_bus_creds_get_euid(sd_bus_creds *c, uid_t *euid);
+int sd_bus_creds_get_suid(sd_bus_creds *c, uid_t *suid);
+int sd_bus_creds_get_fsuid(sd_bus_creds *c, uid_t *fsuid);
+int sd_bus_creds_get_gid(sd_bus_creds *c, gid_t *gid);
+int sd_bus_creds_get_egid(sd_bus_creds *c, gid_t *egid);
+int sd_bus_creds_get_sgid(sd_bus_creds *c, gid_t *sgid);
+int sd_bus_creds_get_fsgid(sd_bus_creds *c, gid_t *fsgid);
+int sd_bus_creds_get_supplementary_gids(sd_bus_creds *c, const gid_t **gids);
+int sd_bus_creds_get_comm(sd_bus_creds *c, const char **comm);
+int sd_bus_creds_get_tid_comm(sd_bus_creds *c, const char **comm);
+int sd_bus_creds_get_exe(sd_bus_creds *c, const char **exe);
+int sd_bus_creds_get_cmdline(sd_bus_creds *c, char ***cmdline);
+int sd_bus_creds_get_cgroup(sd_bus_creds *c, const char **cgroup);
+int sd_bus_creds_get_unit(sd_bus_creds *c, const char **unit);
+int sd_bus_creds_get_slice(sd_bus_creds *c, const char **slice);
+int sd_bus_creds_get_user_unit(sd_bus_creds *c, const char **unit);
+int sd_bus_creds_get_user_slice(sd_bus_creds *c, const char **slice);
+int sd_bus_creds_get_session(sd_bus_creds *c, const char **session);
+int sd_bus_creds_get_owner_uid(sd_bus_creds *c, uid_t *uid);
+int sd_bus_creds_has_effective_cap(sd_bus_creds *c, int capability);
+int sd_bus_creds_has_permitted_cap(sd_bus_creds *c, int capability);
+int sd_bus_creds_has_inheritable_cap(sd_bus_creds *c, int capability);
+int sd_bus_creds_has_bounding_cap(sd_bus_creds *c, int capability);
+int sd_bus_creds_get_selinux_context(sd_bus_creds *c, const char **context);
+int sd_bus_creds_get_audit_session_id(sd_bus_creds *c, uint32_t *sessionid);
+int sd_bus_creds_get_audit_login_uid(sd_bus_creds *c, uid_t *loginuid);
+int sd_bus_creds_get_tty(sd_bus_creds *c, const char **tty);
+int sd_bus_creds_get_unique_name(sd_bus_creds *c, const char **name);
+int sd_bus_creds_get_well_known_names(sd_bus_creds *c, char ***names);
+int sd_bus_creds_get_description(sd_bus_creds *c, const char **name);
+
+/* Error structures */
+
+#define SD_BUS_ERROR_MAKE_CONST(name, message) ((const sd_bus_error) {(name), (message), 0})
+#define SD_BUS_ERROR_NULL SD_BUS_ERROR_MAKE_CONST(NULL, NULL)
+
+void sd_bus_error_free(sd_bus_error *e);
+int sd_bus_error_set(sd_bus_error *e, const char *name, const char *message);
+int sd_bus_error_setf(sd_bus_error *e, const char *name, const char *format, ...) _sd_printf_(3, 4);
+int sd_bus_error_set_const(sd_bus_error *e, const char *name, const char *message);
+int sd_bus_error_set_errno(sd_bus_error *e, int error);
+int sd_bus_error_set_errnof(sd_bus_error *e, int error, const char *format, ...) _sd_printf_(3, 4);
+int sd_bus_error_set_errnofv(sd_bus_error *e, int error, const char *format, va_list ap) _sd_printf_(3,0);
+int sd_bus_error_get_errno(const sd_bus_error *e);
+int sd_bus_error_copy(sd_bus_error *dest, const sd_bus_error *e);
+int sd_bus_error_move(sd_bus_error *dest, sd_bus_error *e);
+int sd_bus_error_is_set(const sd_bus_error *e);
+int sd_bus_error_has_name(const sd_bus_error *e, const char *name);
+
+#define SD_BUS_ERROR_MAP(_name, _code) \
+ { \
+ .name = _name, \
+ .code = _code, \
+ }
+#define SD_BUS_ERROR_MAP_END \
+ { \
+ .name = NULL, \
+ .code = - 'x', \
+ }
+
+int sd_bus_error_add_map(const sd_bus_error_map *map);
+
+/* Auxiliary macros */
+
+#define SD_BUS_MESSAGE_APPEND_ID128(x) 16, \
+ (x).bytes[0], (x).bytes[1], (x).bytes[2], (x).bytes[3], \
+ (x).bytes[4], (x).bytes[5], (x).bytes[6], (x).bytes[7], \
+ (x).bytes[8], (x).bytes[9], (x).bytes[10], (x).bytes[11], \
+ (x).bytes[12], (x).bytes[13], (x).bytes[14], (x).bytes[15]
+
+#define SD_BUS_MESSAGE_READ_ID128(x) 16, \
+ &(x).bytes[0], &(x).bytes[1], &(x).bytes[2], &(x).bytes[3], \
+ &(x).bytes[4], &(x).bytes[5], &(x).bytes[6], &(x).bytes[7], \
+ &(x).bytes[8], &(x).bytes[9], &(x).bytes[10], &(x).bytes[11], \
+ &(x).bytes[12], &(x).bytes[13], &(x).bytes[14], &(x).bytes[15]
+
+/* Label escaping */
+
+int sd_bus_path_encode(const char *prefix, const char *external_id, char **ret_path);
+int sd_bus_path_encode_many(char **out, const char *path_template, ...);
+int sd_bus_path_decode(const char *path, const char *prefix, char **ret_external_id);
+int sd_bus_path_decode_many(const char *path, const char *path_template, ...);
+
+/* Tracking peers */
+
+int sd_bus_track_new(sd_bus *bus, sd_bus_track **track, sd_bus_track_handler_t handler, void *userdata);
+sd_bus_track* sd_bus_track_ref(sd_bus_track *track);
+sd_bus_track* sd_bus_track_unref(sd_bus_track *track);
+
+sd_bus* sd_bus_track_get_bus(sd_bus_track *track);
+void *sd_bus_track_get_userdata(sd_bus_track *track);
+void *sd_bus_track_set_userdata(sd_bus_track *track, void *userdata);
+
+int sd_bus_track_add_sender(sd_bus_track *track, sd_bus_message *m);
+int sd_bus_track_remove_sender(sd_bus_track *track, sd_bus_message *m);
+int sd_bus_track_add_name(sd_bus_track *track, const char *name);
+int sd_bus_track_remove_name(sd_bus_track *track, const char *name);
+
+int sd_bus_track_set_recursive(sd_bus_track *track, int b);
+int sd_bus_track_get_recursive(sd_bus_track *track);
+
+unsigned sd_bus_track_count(sd_bus_track *track);
+int sd_bus_track_count_sender(sd_bus_track *track, sd_bus_message *m);
+int sd_bus_track_count_name(sd_bus_track *track, const char *name);
+
+const char* sd_bus_track_contains(sd_bus_track *track, const char *name);
+const char* sd_bus_track_first(sd_bus_track *track);
+const char* sd_bus_track_next(sd_bus_track *track);
+
+int sd_bus_track_set_destroy_callback(sd_bus_track *s, sd_bus_destroy_t callback);
+int sd_bus_track_get_destroy_callback(sd_bus_track *s, sd_bus_destroy_t *ret);
+
+/* Define helpers so that __attribute__((cleanup(sd_bus_unrefp))) and similar may be used. */
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus, sd_bus_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus, sd_bus_close_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus, sd_bus_flush_close_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus_slot, sd_bus_slot_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus_message, sd_bus_message_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus_creds, sd_bus_creds_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus_track, sd_bus_track_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-daemon.h b/src/systemd/sd-daemon.h
new file mode 100644
index 0000000..62b0f72
--- /dev/null
+++ b/src/systemd/sd-daemon.h
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosddaemonhfoo
+#define foosddaemonhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/*
+ The following functionality is provided:
+
+ - Support for logging with log levels on stderr
+ - File descriptor passing for socket-based activation
+ - Daemon startup and status notification
+ - Detection of systemd boots
+
+ See sd-daemon(3) for more information.
+*/
+
+/*
+ Log levels for usage on stderr:
+
+ fprintf(stderr, SD_NOTICE "Hello World!\n");
+
+ This is similar to printk() usage in the kernel.
+*/
+#define SD_EMERG "<0>" /* system is unusable */
+#define SD_ALERT "<1>" /* action must be taken immediately */
+#define SD_CRIT "<2>" /* critical conditions */
+#define SD_ERR "<3>" /* error conditions */
+#define SD_WARNING "<4>" /* warning conditions */
+#define SD_NOTICE "<5>" /* normal but significant condition */
+#define SD_INFO "<6>" /* informational */
+#define SD_DEBUG "<7>" /* debug-level messages */
+
+/* The first passed file descriptor is fd 3 */
+#define SD_LISTEN_FDS_START 3
+
+/*
+ Returns how many file descriptors have been passed, or a negative
+ errno code on failure. Optionally, removes the $LISTEN_FDS and
+ $LISTEN_PID file descriptors from the environment (recommended, but
+ problematic in threaded environments). If r is the return value of
+ this function you'll find the file descriptors passed as fds
+ SD_LISTEN_FDS_START to SD_LISTEN_FDS_START+r-1. Returns a negative
+ errno style error code on failure. This function call ensures that
+ the FD_CLOEXEC flag is set for the passed file descriptors, to make
+ sure they are not passed on to child processes. If FD_CLOEXEC shall
+ not be set, the caller needs to unset it after this call for all file
+ descriptors that are used.
+
+ See sd_listen_fds(3) for more information.
+*/
+int sd_listen_fds(int unset_environment);
+
+int sd_listen_fds_with_names(int unset_environment, char ***names);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if
+ the file descriptor is a FIFO in the file system stored under the
+ specified path, 0 otherwise. If path is NULL a path name check will
+ not be done and the call only verifies if the file descriptor
+ refers to a FIFO. Returns a negative errno style error code on
+ failure.
+
+ See sd_is_fifo(3) for more information.
+*/
+int sd_is_fifo(int fd, const char *path);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if
+ the file descriptor is a special character device on the file
+ system stored under the specified path, 0 otherwise.
+ If path is NULL a path name check will not be done and the call
+ only verifies if the file descriptor refers to a special character.
+ Returns a negative errno style error code on failure.
+
+ See sd_is_special(3) for more information.
+*/
+int sd_is_special(int fd, const char *path);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if
+ the file descriptor is a socket of the specified family (AF_INET,
+ ...) and type (SOCK_DGRAM, SOCK_STREAM, ...), 0 otherwise. If
+ family is 0 a socket family check will not be done. If type is 0 a
+ socket type check will not be done and the call only verifies if
+ the file descriptor refers to a socket. If listening is > 0 it is
+ verified that the socket is in listening mode. (i.e. listen() has
+ been called) If listening is == 0 it is verified that the socket is
+ not in listening mode. If listening is < 0 no listening mode check
+ is done. Returns a negative errno style error code on failure.
+
+ See sd_is_socket(3) for more information.
+*/
+int sd_is_socket(int fd, int family, int type, int listening);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if
+ the file descriptor is an Internet socket, of the specified family
+ (either AF_INET or AF_INET6) and the specified type (SOCK_DGRAM,
+ SOCK_STREAM, ...), 0 otherwise. If version is 0 a protocol version
+ check is not done. If type is 0 a socket type check will not be
+ done. If port is 0 a socket port check will not be done. The
+ listening flag is used the same way as in sd_is_socket(). Returns a
+ negative errno style error code on failure.
+
+ See sd_is_socket_inet(3) for more information.
+*/
+int sd_is_socket_inet(int fd, int family, int type, int listening, uint16_t port);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if the
+ file descriptor is an Internet socket of the specified type
+ (SOCK_DGRAM, SOCK_STREAM, ...), and if the address of the socket is
+ the same as the address specified by addr. The listening flag is used
+ the same way as in sd_is_socket(). Returns a negative errno style
+ error code on failure.
+
+ See sd_is_socket_sockaddr(3) for more information.
+*/
+int sd_is_socket_sockaddr(int fd, int type, const struct sockaddr* addr, unsigned addr_len, int listening);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if
+ the file descriptor is an AF_UNIX socket of the specified type
+ (SOCK_DGRAM, SOCK_STREAM, ...) and path, 0 otherwise. If type is 0
+ a socket type check will not be done. If path is NULL a socket path
+ check will not be done. For normal AF_UNIX sockets set length to
+ 0. For abstract namespace sockets set length to the length of the
+ socket name (including the initial 0 byte), and pass the full
+ socket path in path (including the initial 0 byte). The listening
+ flag is used the same way as in sd_is_socket(). Returns a negative
+ errno style error code on failure.
+
+ See sd_is_socket_unix(3) for more information.
+*/
+int sd_is_socket_unix(int fd, int type, int listening, const char *path, size_t length);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if
+ the file descriptor is a POSIX Message Queue of the specified name,
+ 0 otherwise. If path is NULL a message queue name check is not
+ done. Returns a negative errno style error code on failure.
+
+ See sd_is_mq(3) for more information.
+*/
+int sd_is_mq(int fd, const char *path);
+
+/*
+ Informs systemd about changed daemon state. This takes a number of
+ newline separated environment-style variable assignments in a
+ string. The following variables are known:
+
+ MAINPID=... The main PID of a daemon, in case systemd did not
+ fork off the process itself. Example: "MAINPID=4711"
+
+ READY=1 Tells systemd that daemon startup or daemon reload
+ is finished (only relevant for services of Type=notify).
+ The passed argument is a boolean "1" or "0". Since there
+ is little value in signaling non-readiness the only
+ value daemons should send is "READY=1".
+
+ RELOADING=1 Tell systemd that the daemon began reloading its
+ configuration. When the configuration has been
+ reloaded completely, READY=1 should be sent to inform
+ systemd about this.
+
+ STOPPING=1 Tells systemd that the daemon is about to go down.
+
+ STATUS=... Passes a single-line status string back to systemd
+ that describes the daemon state. This is free-form
+ and can be used for various purposes: general state
+ feedback, fsck-like programs could pass completion
+ percentages and failing programs could pass a human
+ readable error message. Example: "STATUS=Completed
+ 66% of file system check..."
+
+ ERRNO=... If a daemon fails, the errno-style error code,
+ formatted as string. Example: "ERRNO=2" for ENOENT.
+
+ BUSERROR=... If a daemon fails, the D-Bus error-style error
+ code. Example: "BUSERROR=org.freedesktop.DBus.Error.TimedOut"
+
+ WATCHDOG=1 Tells systemd to update the watchdog timestamp.
+ Services using this feature should do this in
+ regular intervals. A watchdog framework can use the
+ timestamps to detect failed services. Also see
+ sd_watchdog_enabled() below.
+
+ WATCHDOG_USEC=...
+ Reset watchdog_usec value during runtime.
+ To reset watchdog_usec value, start the service again.
+ Example: "WATCHDOG_USEC=20000000"
+
+ FDSTORE=1 Store the file descriptors passed along with the
+ message in the per-service file descriptor store,
+ and pass them to the main process again on next
+ invocation. This variable is only supported with
+ sd_pid_notify_with_fds().
+
+ FDSTOREREMOVE=1
+ Remove one or more file descriptors from the file
+ descriptor store, identified by the name specified
+ in FDNAME=, see below.
+
+ FDNAME= A name to assign to new file descriptors stored in the
+ file descriptor store, or the name of the file descriptors
+ to remove in case of FDSTOREREMOVE=1.
+
+ Daemons can choose to send additional variables. However, it is
+ recommended to prefix variable names not listed above with X_.
+
+ Returns a negative errno-style error code on failure. Returns > 0
+ if systemd could be notified, 0 if it couldn't possibly because
+ systemd is not running.
+
+ Example: When a daemon finished starting up, it could issue this
+ call to notify systemd about it:
+
+ sd_notify(0, "READY=1");
+
+ See sd_notifyf() for more complete examples.
+
+ See sd_notify(3) for more information.
+*/
+int sd_notify(int unset_environment, const char *state);
+
+/*
+ Similar to sd_notify() but takes a format string.
+
+ Example 1: A daemon could send the following after initialization:
+
+ sd_notifyf(0, "READY=1\n"
+ "STATUS=Processing requests...\n"
+ "MAINPID=%lu",
+ (unsigned long) getpid());
+
+ Example 2: A daemon could send the following shortly before
+ exiting, on failure:
+
+ sd_notifyf(0, "STATUS=Failed to start up: %s\n"
+ "ERRNO=%i",
+ strerror(errno),
+ errno);
+
+ See sd_notifyf(3) for more information.
+*/
+int sd_notifyf(int unset_environment, const char *format, ...) _sd_printf_(2,3);
+
+/*
+ Similar to sd_notify(), but send the message on behalf of another
+ process, if the appropriate permissions are available.
+*/
+int sd_pid_notify(pid_t pid, int unset_environment, const char *state);
+
+/*
+ Similar to sd_notifyf(), but send the message on behalf of another
+ process, if the appropriate permissions are available.
+*/
+int sd_pid_notifyf(pid_t pid, int unset_environment, const char *format, ...) _sd_printf_(3,4);
+
+/*
+ Similar to sd_pid_notify(), but also passes the specified fd array
+ to the service manager for storage. This is particularly useful for
+ FDSTORE=1 messages.
+*/
+int sd_pid_notify_with_fds(pid_t pid, int unset_environment, const char *state, const int *fds, unsigned n_fds);
+
+/*
+ Returns > 0 if the system was booted with systemd. Returns < 0 on
+ error. Returns 0 if the system was not booted with systemd. Note
+ that all of the functions above handle non-systemd boots just
+ fine. You should NOT protect them with a call to this function. Also
+ note that this function checks whether the system, not the user
+ session is controlled by systemd. However the functions above work
+ for both user and system services.
+
+ See sd_booted(3) for more information.
+*/
+int sd_booted(void);
+
+/*
+ Returns > 0 if the service manager expects watchdog keep-alive
+ events to be sent regularly via sd_notify(0, "WATCHDOG=1"). Returns
+ 0 if it does not expect this. If the usec argument is non-NULL
+ returns the watchdog timeout in µs after which the service manager
+ will act on a process that has not sent a watchdog keep alive
+ message. This function is useful to implement services that
+ recognize automatically if they are being run under supervision of
+ systemd with WatchdogSec= set. It is recommended for clients to
+ generate keep-alive pings via sd_notify(0, "WATCHDOG=1") every half
+ of the returned time.
+
+ See sd_watchdog_enabled(3) for more information.
+*/
+int sd_watchdog_enabled(int unset_environment, uint64_t *usec);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-device.h b/src/systemd/sd-device.h
new file mode 100644
index 0000000..3c5c88c
--- /dev/null
+++ b/src/systemd/sd-device.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosddevicehfoo
+#define foosddevicehfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_device sd_device;
+typedef struct sd_device_enumerator sd_device_enumerator;
+typedef struct sd_device_monitor sd_device_monitor;
+
+/* callback */
+
+typedef int (*sd_device_monitor_handler_t)(sd_device_monitor *m, sd_device *device, void *userdata);
+
+/* device */
+
+sd_device *sd_device_ref(sd_device *device);
+sd_device *sd_device_unref(sd_device *device);
+
+int sd_device_new_from_syspath(sd_device **ret, const char *syspath);
+int sd_device_new_from_devnum(sd_device **ret, char type, dev_t devnum);
+int sd_device_new_from_subsystem_sysname(sd_device **ret, const char *subsystem, const char *sysname);
+int sd_device_new_from_device_id(sd_device **ret, const char *id);
+
+int sd_device_get_parent(sd_device *child, sd_device **ret);
+int sd_device_get_parent_with_subsystem_devtype(sd_device *child, const char *subsystem, const char *devtype, sd_device **ret);
+
+int sd_device_get_syspath(sd_device *device, const char **ret);
+int sd_device_get_subsystem(sd_device *device, const char **ret);
+int sd_device_get_devtype(sd_device *device, const char **ret);
+int sd_device_get_devnum(sd_device *device, dev_t *devnum);
+int sd_device_get_ifindex(sd_device *device, int *ifindex);
+int sd_device_get_driver(sd_device *device, const char **ret);
+int sd_device_get_devpath(sd_device *device, const char **ret);
+int sd_device_get_devname(sd_device *device, const char **ret);
+int sd_device_get_sysname(sd_device *device, const char **ret);
+int sd_device_get_sysnum(sd_device *device, const char **ret);
+
+int sd_device_get_is_initialized(sd_device *device);
+int sd_device_get_usec_since_initialized(sd_device *device, uint64_t *usec);
+
+const char *sd_device_get_tag_first(sd_device *device);
+const char *sd_device_get_tag_next(sd_device *device);
+const char *sd_device_get_devlink_first(sd_device *device);
+const char *sd_device_get_devlink_next(sd_device *device);
+const char *sd_device_get_property_first(sd_device *device, const char **value);
+const char *sd_device_get_property_next(sd_device *device, const char **value);
+const char *sd_device_get_sysattr_first(sd_device *device);
+const char *sd_device_get_sysattr_next(sd_device *device);
+
+int sd_device_has_tag(sd_device *device, const char *tag);
+int sd_device_get_property_value(sd_device *device, const char *key, const char **value);
+int sd_device_get_sysattr_value(sd_device *device, const char *sysattr, const char **_value);
+
+int sd_device_set_sysattr_value(sd_device *device, const char *sysattr, const char *value);
+
+/* device enumerator */
+
+int sd_device_enumerator_new(sd_device_enumerator **ret);
+sd_device_enumerator *sd_device_enumerator_ref(sd_device_enumerator *enumerator);
+sd_device_enumerator *sd_device_enumerator_unref(sd_device_enumerator *enumerator);
+
+sd_device *sd_device_enumerator_get_device_first(sd_device_enumerator *enumerator);
+sd_device *sd_device_enumerator_get_device_next(sd_device_enumerator *enumerator);
+sd_device *sd_device_enumerator_get_subsystem_first(sd_device_enumerator *enumerator);
+sd_device *sd_device_enumerator_get_subsystem_next(sd_device_enumerator *enumerator);
+
+int sd_device_enumerator_add_match_subsystem(sd_device_enumerator *enumerator, const char *subsystem, int match);
+int sd_device_enumerator_add_match_sysattr(sd_device_enumerator *enumerator, const char *sysattr, const char *value, int match);
+int sd_device_enumerator_add_match_property(sd_device_enumerator *enumerator, const char *property, const char *value);
+int sd_device_enumerator_add_match_sysname(sd_device_enumerator *enumerator, const char *sysname);
+int sd_device_enumerator_add_match_tag(sd_device_enumerator *enumerator, const char *tag);
+int sd_device_enumerator_add_match_parent(sd_device_enumerator *enumerator, sd_device *parent);
+int sd_device_enumerator_allow_uninitialized(sd_device_enumerator *enumerator);
+
+/* device monitor */
+
+int sd_device_monitor_new(sd_device_monitor **ret);
+sd_device_monitor *sd_device_monitor_ref(sd_device_monitor *m);
+sd_device_monitor *sd_device_monitor_unref(sd_device_monitor *m);
+
+int sd_device_monitor_set_receive_buffer_size(sd_device_monitor *m, size_t size);
+int sd_device_monitor_attach_event(sd_device_monitor *m, sd_event *event);
+int sd_device_monitor_detach_event(sd_device_monitor *m);
+sd_event *sd_device_monitor_get_event(sd_device_monitor *m);
+sd_event_source *sd_device_monitor_get_event_source(sd_device_monitor *m);
+int sd_device_monitor_start(sd_device_monitor *m, sd_device_monitor_handler_t callback, void *userdata);
+int sd_device_monitor_stop(sd_device_monitor *m);
+
+int sd_device_monitor_filter_add_match_subsystem_devtype(sd_device_monitor *m, const char *subsystem, const char *devtype);
+int sd_device_monitor_filter_add_match_tag(sd_device_monitor *m, const char *tag);
+int sd_device_monitor_filter_update(sd_device_monitor *m);
+int sd_device_monitor_filter_remove(sd_device_monitor *m);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_device, sd_device_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_device_enumerator, sd_device_enumerator_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_device_monitor, sd_device_monitor_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-dhcp-client.h b/src/systemd/sd-dhcp-client.h
new file mode 100644
index 0000000..bd0d429
--- /dev/null
+++ b/src/systemd/sd-dhcp-client.h
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosddhcpclienthfoo
+#define foosddhcpclienthfoo
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <stdbool.h>
+
+#include "sd-dhcp-lease.h"
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+enum {
+ SD_DHCP_CLIENT_EVENT_STOP = 0,
+ SD_DHCP_CLIENT_EVENT_IP_ACQUIRE = 1,
+ SD_DHCP_CLIENT_EVENT_IP_CHANGE = 2,
+ SD_DHCP_CLIENT_EVENT_EXPIRED = 3,
+ SD_DHCP_CLIENT_EVENT_RENEW = 4,
+};
+
+enum {
+ SD_DHCP_OPTION_PAD = 0,
+ SD_DHCP_OPTION_SUBNET_MASK = 1,
+ SD_DHCP_OPTION_TIME_OFFSET = 2,
+ SD_DHCP_OPTION_ROUTER = 3,
+ SD_DHCP_OPTION_DOMAIN_NAME_SERVER = 6,
+ SD_DHCP_OPTION_HOST_NAME = 12,
+ SD_DHCP_OPTION_BOOT_FILE_SIZE = 13,
+ SD_DHCP_OPTION_DOMAIN_NAME = 15,
+ SD_DHCP_OPTION_ROOT_PATH = 17,
+ SD_DHCP_OPTION_ENABLE_IP_FORWARDING = 19,
+ SD_DHCP_OPTION_ENABLE_IP_FORWARDING_NL = 20,
+ SD_DHCP_OPTION_POLICY_FILTER = 21,
+ SD_DHCP_OPTION_INTERFACE_MDR = 22,
+ SD_DHCP_OPTION_INTERFACE_TTL = 23,
+ SD_DHCP_OPTION_INTERFACE_MTU_AGING_TIMEOUT = 24,
+ SD_DHCP_OPTION_INTERFACE_MTU = 26,
+ SD_DHCP_OPTION_BROADCAST = 28,
+ /* Windows 10 option to send when Anonymize=true */
+ SD_DHCP_OPTION_ROUTER_DISCOVER = 31,
+ SD_DHCP_OPTION_STATIC_ROUTE = 33,
+ SD_DHCP_OPTION_NTP_SERVER = 42,
+ SD_DHCP_OPTION_VENDOR_SPECIFIC = 43,
+ /* Windows 10 option to send when Anonymize=true */
+ SD_DHCP_OPTION_NETBIOS_NAMESERVER = 44,
+ /* Windows 10 option to send when Anonymize=true */
+ SD_DHCP_OPTION_NETBIOS_NODETYPE = 46,
+ /* Windows 10 option to send when Anonymize=true */
+ SD_DHCP_OPTION_NETBIOS_SCOPE = 47,
+ SD_DHCP_OPTION_REQUESTED_IP_ADDRESS = 50,
+ SD_DHCP_OPTION_IP_ADDRESS_LEASE_TIME = 51,
+ SD_DHCP_OPTION_OVERLOAD = 52,
+ SD_DHCP_OPTION_MESSAGE_TYPE = 53,
+ SD_DHCP_OPTION_SERVER_IDENTIFIER = 54,
+ SD_DHCP_OPTION_PARAMETER_REQUEST_LIST = 55,
+ SD_DHCP_OPTION_ERROR_MESSAGE = 56,
+ SD_DHCP_OPTION_MAXIMUM_MESSAGE_SIZE = 57,
+ SD_DHCP_OPTION_RENEWAL_T1_TIME = 58,
+ SD_DHCP_OPTION_REBINDING_T2_TIME = 59,
+ SD_DHCP_OPTION_VENDOR_CLASS_IDENTIFIER = 60,
+ SD_DHCP_OPTION_CLIENT_IDENTIFIER = 61,
+ SD_DHCP_OPTION_USER_CLASS = 77,
+ SD_DHCP_OPTION_FQDN = 81,
+ SD_DHCP_OPTION_NEW_POSIX_TIMEZONE = 100,
+ SD_DHCP_OPTION_NEW_TZDB_TIMEZONE = 101,
+ SD_DHCP_OPTION_DOMAIN_SEARCH_LIST = 119,
+ SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE = 121,
+ SD_DHCP_OPTION_PRIVATE_BASE = 224,
+ /* Windows 10 option to send when Anonymize=true */
+ SD_DHCP_OPTION_PRIVATE_CLASSLESS_STATIC_ROUTE = 249,
+ /* Windows 10 option to send when Anonymize=true */
+ SD_DHCP_OPTION_PRIVATE_PROXY_AUTODISCOVERY = 252,
+ SD_DHCP_OPTION_PRIVATE_LAST = 254,
+ SD_DHCP_OPTION_END = 255,
+};
+
+typedef struct sd_dhcp_client sd_dhcp_client;
+
+typedef void (*sd_dhcp_client_callback_t)(sd_dhcp_client *client, int event, void *userdata);
+int sd_dhcp_client_set_callback(
+ sd_dhcp_client *client,
+ sd_dhcp_client_callback_t cb,
+ void *userdata);
+
+int sd_dhcp_client_set_request_option(
+ sd_dhcp_client *client,
+ uint8_t option);
+int sd_dhcp_client_set_request_address(
+ sd_dhcp_client *client,
+ const struct in_addr *last_address);
+int sd_dhcp_client_set_request_broadcast(
+ sd_dhcp_client *client,
+ int broadcast);
+int sd_dhcp_client_set_ifindex(
+ sd_dhcp_client *client,
+ int interface_index);
+int sd_dhcp_client_set_mac(
+ sd_dhcp_client *client,
+ const uint8_t *addr,
+ size_t addr_len,
+ uint16_t arp_type);
+int sd_dhcp_client_set_client_id(
+ sd_dhcp_client *client,
+ uint8_t type,
+ const uint8_t *data,
+ size_t data_len);
+int sd_dhcp_client_set_iaid_duid(
+ sd_dhcp_client *client,
+ bool iaid_set,
+ uint32_t iaid,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len);
+int sd_dhcp_client_set_iaid_duid_llt(
+ sd_dhcp_client *client,
+ bool iaid_set,
+ uint32_t iaid,
+ uint64_t llt_time);
+int sd_dhcp_client_set_duid(
+ sd_dhcp_client *client,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len);
+int sd_dhcp_client_set_duid_llt(
+ sd_dhcp_client *client,
+ uint64_t llt_time);
+int sd_dhcp_client_get_client_id(
+ sd_dhcp_client *client,
+ uint8_t *type,
+ const uint8_t **data,
+ size_t *data_len);
+int sd_dhcp_client_set_mtu(
+ sd_dhcp_client *client,
+ uint32_t mtu);
+int sd_dhcp_client_set_client_port(
+ sd_dhcp_client *client,
+ uint16_t port);
+int sd_dhcp_client_set_hostname(
+ sd_dhcp_client *client,
+ const char *hostname);
+int sd_dhcp_client_set_vendor_class_identifier(
+ sd_dhcp_client *client,
+ const char *vci);
+int sd_dhcp_client_set_user_class(
+ sd_dhcp_client *client,
+ const char* const *user_class);
+int sd_dhcp_client_get_lease(
+ sd_dhcp_client *client,
+ sd_dhcp_lease **ret);
+
+int sd_dhcp_client_stop(sd_dhcp_client *client);
+int sd_dhcp_client_start(sd_dhcp_client *client);
+
+sd_dhcp_client *sd_dhcp_client_ref(sd_dhcp_client *client);
+sd_dhcp_client *sd_dhcp_client_unref(sd_dhcp_client *client);
+
+/* NOTE: anonymize parameter is used to initialize PRL memory with different
+ * options when using RFC7844 Anonymity Profiles */
+int sd_dhcp_client_new(sd_dhcp_client **ret, int anonymize);
+
+int sd_dhcp_client_attach_event(
+ sd_dhcp_client *client,
+ sd_event *event,
+ int64_t priority);
+int sd_dhcp_client_detach_event(sd_dhcp_client *client);
+sd_event *sd_dhcp_client_get_event(sd_dhcp_client *client);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_dhcp_client, sd_dhcp_client_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-dhcp-lease.h b/src/systemd/sd-dhcp-lease.h
new file mode 100644
index 0000000..4875f10
--- /dev/null
+++ b/src/systemd/sd-dhcp-lease.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosddhcpleasehfoo
+#define foosddhcpleasehfoo
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_dhcp_lease sd_dhcp_lease;
+typedef struct sd_dhcp_route sd_dhcp_route;
+
+sd_dhcp_lease *sd_dhcp_lease_ref(sd_dhcp_lease *lease);
+sd_dhcp_lease *sd_dhcp_lease_unref(sd_dhcp_lease *lease);
+
+int sd_dhcp_lease_get_address(sd_dhcp_lease *lease, struct in_addr *addr);
+int sd_dhcp_lease_get_lifetime(sd_dhcp_lease *lease, uint32_t *lifetime);
+int sd_dhcp_lease_get_t1(sd_dhcp_lease *lease, uint32_t *t1);
+int sd_dhcp_lease_get_t2(sd_dhcp_lease *lease, uint32_t *t2);
+int sd_dhcp_lease_get_broadcast(sd_dhcp_lease *lease, struct in_addr *addr);
+int sd_dhcp_lease_get_netmask(sd_dhcp_lease *lease, struct in_addr *addr);
+int sd_dhcp_lease_get_router(sd_dhcp_lease *lease, struct in_addr *addr);
+int sd_dhcp_lease_get_next_server(sd_dhcp_lease *lease, struct in_addr *addr);
+int sd_dhcp_lease_get_server_identifier(sd_dhcp_lease *lease, struct in_addr *addr);
+int sd_dhcp_lease_get_dns(sd_dhcp_lease *lease, const struct in_addr **addr);
+int sd_dhcp_lease_get_ntp(sd_dhcp_lease *lease, const struct in_addr **addr);
+int sd_dhcp_lease_get_mtu(sd_dhcp_lease *lease, uint16_t *mtu);
+int sd_dhcp_lease_get_domainname(sd_dhcp_lease *lease, const char **domainname);
+int sd_dhcp_lease_get_search_domains(sd_dhcp_lease *lease, char ***domains);
+int sd_dhcp_lease_get_hostname(sd_dhcp_lease *lease, const char **hostname);
+int sd_dhcp_lease_get_root_path(sd_dhcp_lease *lease, const char **root_path);
+int sd_dhcp_lease_get_routes(sd_dhcp_lease *lease, sd_dhcp_route ***routes);
+int sd_dhcp_lease_get_vendor_specific(sd_dhcp_lease *lease, const void **data, size_t *data_len);
+int sd_dhcp_lease_get_client_id(sd_dhcp_lease *lease, const void **client_id, size_t *client_id_len);
+int sd_dhcp_lease_get_timezone(sd_dhcp_lease *lease, const char **timezone);
+
+int sd_dhcp_route_get_destination(sd_dhcp_route *route, struct in_addr *destination);
+int sd_dhcp_route_get_destination_prefix_length(sd_dhcp_route *route, uint8_t *length);
+int sd_dhcp_route_get_gateway(sd_dhcp_route *route, struct in_addr *gateway);
+int sd_dhcp_route_get_option(sd_dhcp_route *route);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_dhcp_lease, sd_dhcp_lease_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-dhcp-server.h b/src/systemd/sd-dhcp-server.h
new file mode 100644
index 0000000..2d5125d
--- /dev/null
+++ b/src/systemd/sd-dhcp-server.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosddhcpserverhfoo
+#define foosddhcpserverhfoo
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <netinet/in.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_dhcp_server sd_dhcp_server;
+
+int sd_dhcp_server_new(sd_dhcp_server **ret, int ifindex);
+
+sd_dhcp_server *sd_dhcp_server_ref(sd_dhcp_server *server);
+sd_dhcp_server *sd_dhcp_server_unref(sd_dhcp_server *server);
+
+int sd_dhcp_server_attach_event(sd_dhcp_server *client, sd_event *event, int64_t priority);
+int sd_dhcp_server_detach_event(sd_dhcp_server *client);
+sd_event *sd_dhcp_server_get_event(sd_dhcp_server *client);
+
+int sd_dhcp_server_is_running(sd_dhcp_server *server);
+
+int sd_dhcp_server_start(sd_dhcp_server *server);
+int sd_dhcp_server_stop(sd_dhcp_server *server);
+
+int sd_dhcp_server_configure_pool(sd_dhcp_server *server, struct in_addr *address, unsigned char prefixlen, uint32_t offset, uint32_t size);
+
+int sd_dhcp_server_set_timezone(sd_dhcp_server *server, const char *timezone);
+int sd_dhcp_server_set_dns(sd_dhcp_server *server, const struct in_addr ntp[], unsigned n);
+int sd_dhcp_server_set_ntp(sd_dhcp_server *server, const struct in_addr dns[], unsigned n);
+int sd_dhcp_server_set_emit_router(sd_dhcp_server *server, int enabled);
+
+int sd_dhcp_server_set_max_lease_time(sd_dhcp_server *server, uint32_t t);
+int sd_dhcp_server_set_default_lease_time(sd_dhcp_server *server, uint32_t t);
+
+int sd_dhcp_server_forcerenew(sd_dhcp_server *server);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_dhcp_server, sd_dhcp_server_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-dhcp6-client.h b/src/systemd/sd-dhcp6-client.h
new file mode 100644
index 0000000..43d38f5
--- /dev/null
+++ b/src/systemd/sd-dhcp6-client.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosddhcp6clienthfoo
+#define foosddhcp6clienthfoo
+
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <sys/types.h>
+
+#include "sd-dhcp6-lease.h"
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+enum {
+ SD_DHCP6_CLIENT_EVENT_STOP = 0,
+ SD_DHCP6_CLIENT_EVENT_RESEND_EXPIRE = 10,
+ SD_DHCP6_CLIENT_EVENT_RETRANS_MAX = 11,
+ SD_DHCP6_CLIENT_EVENT_IP_ACQUIRE = 12,
+ SD_DHCP6_CLIENT_EVENT_INFORMATION_REQUEST = 13,
+};
+
+enum {
+ SD_DHCP6_OPTION_CLIENTID = 1,
+ SD_DHCP6_OPTION_SERVERID = 2,
+ SD_DHCP6_OPTION_IA_NA = 3,
+ SD_DHCP6_OPTION_IA_TA = 4,
+ SD_DHCP6_OPTION_IAADDR = 5,
+ SD_DHCP6_OPTION_ORO = 6,
+ SD_DHCP6_OPTION_PREFERENCE = 7,
+ SD_DHCP6_OPTION_ELAPSED_TIME = 8,
+ SD_DHCP6_OPTION_RELAY_MSG = 9,
+ /* option code 10 is unassigned */
+ SD_DHCP6_OPTION_AUTH = 11,
+ SD_DHCP6_OPTION_UNICAST = 12,
+ SD_DHCP6_OPTION_STATUS_CODE = 13,
+ SD_DHCP6_OPTION_RAPID_COMMIT = 14,
+ SD_DHCP6_OPTION_USER_CLASS = 15,
+ SD_DHCP6_OPTION_VENDOR_CLASS = 16,
+ SD_DHCP6_OPTION_VENDOR_OPTS = 17,
+ SD_DHCP6_OPTION_INTERFACE_ID = 18,
+ SD_DHCP6_OPTION_RECONF_MSG = 19,
+ SD_DHCP6_OPTION_RECONF_ACCEPT = 20,
+
+ SD_DHCP6_OPTION_DNS_SERVERS = 23, /* RFC 3646 */
+ SD_DHCP6_OPTION_DOMAIN_LIST = 24, /* RFC 3646 */
+ SD_DHCP6_OPTION_IA_PD = 25, /* RFC 3633, prefix delegation */
+ SD_DHCP6_OPTION_IA_PD_PREFIX = 26, /* RFC 3633, prefix delegation */
+
+ SD_DHCP6_OPTION_SNTP_SERVERS = 31, /* RFC 4075, deprecated */
+
+ /* option code 35 is unassigned */
+
+ SD_DHCP6_OPTION_FQDN = 39, /* RFC 4704 */
+
+ SD_DHCP6_OPTION_NTP_SERVER = 56, /* RFC 5908 */
+
+ /* option codes 89-142 are unassigned */
+ /* option codes 144-65535 are unassigned */
+};
+
+typedef struct sd_dhcp6_client sd_dhcp6_client;
+
+typedef void (*sd_dhcp6_client_callback_t)(sd_dhcp6_client *client, int event, void *userdata);
+int sd_dhcp6_client_set_callback(
+ sd_dhcp6_client *client,
+ sd_dhcp6_client_callback_t cb,
+ void *userdata);
+
+int sd_dhcp6_client_set_ifindex(
+ sd_dhcp6_client *client,
+ int interface_index);
+int sd_dhcp6_client_set_local_address(
+ sd_dhcp6_client *client,
+ const struct in6_addr *local_address);
+int sd_dhcp6_client_set_mac(
+ sd_dhcp6_client *client,
+ const uint8_t *addr,
+ size_t addr_len,
+ uint16_t arp_type);
+int sd_dhcp6_client_set_duid(
+ sd_dhcp6_client *client,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len);
+int sd_dhcp6_client_set_duid_llt(
+ sd_dhcp6_client *client,
+ uint64_t llt_time);
+int sd_dhcp6_client_set_iaid(
+ sd_dhcp6_client *client,
+ uint32_t iaid);
+int sd_dhcp6_client_set_fqdn(
+ sd_dhcp6_client *client,
+ const char *fqdn);
+int sd_dhcp6_client_set_information_request(
+ sd_dhcp6_client *client,
+ int enabled);
+int sd_dhcp6_client_get_information_request(
+ sd_dhcp6_client *client,
+ int *enabled);
+int sd_dhcp6_client_set_request_option(
+ sd_dhcp6_client *client,
+ uint16_t option);
+int sd_dhcp6_client_get_prefix_delegation(sd_dhcp6_client *client,
+ int *delegation);
+int sd_dhcp6_client_set_prefix_delegation(sd_dhcp6_client *client,
+ int delegation);
+int sd_dhcp6_client_get_address_request(sd_dhcp6_client *client,
+ int *request);
+int sd_dhcp6_client_set_address_request(sd_dhcp6_client *client,
+ int request);
+int sd_dhcp6_client_set_transaction_id(sd_dhcp6_client *client, uint32_t transaction_id);
+
+int sd_dhcp6_client_get_lease(
+ sd_dhcp6_client *client,
+ sd_dhcp6_lease **ret);
+
+int sd_dhcp6_client_stop(sd_dhcp6_client *client);
+int sd_dhcp6_client_start(sd_dhcp6_client *client);
+int sd_dhcp6_client_is_running(sd_dhcp6_client *client);
+int sd_dhcp6_client_attach_event(
+ sd_dhcp6_client *client,
+ sd_event *event,
+ int64_t priority);
+int sd_dhcp6_client_detach_event(sd_dhcp6_client *client);
+sd_event *sd_dhcp6_client_get_event(sd_dhcp6_client *client);
+sd_dhcp6_client *sd_dhcp6_client_ref(sd_dhcp6_client *client);
+sd_dhcp6_client *sd_dhcp6_client_unref(sd_dhcp6_client *client);
+int sd_dhcp6_client_new(sd_dhcp6_client **ret);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_dhcp6_client, sd_dhcp6_client_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-dhcp6-lease.h b/src/systemd/sd-dhcp6-lease.h
new file mode 100644
index 0000000..33a32a6
--- /dev/null
+++ b/src/systemd/sd-dhcp6-lease.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosddhcp6leasehfoo
+#define foosddhcp6leasehfoo
+
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <netinet/in.h>
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_dhcp6_lease sd_dhcp6_lease;
+
+void sd_dhcp6_lease_reset_address_iter(sd_dhcp6_lease *lease);
+int sd_dhcp6_lease_get_address(sd_dhcp6_lease *lease,
+ struct in6_addr *addr,
+ uint32_t *lifetime_preferred,
+ uint32_t *lifetime_valid);
+void sd_dhcp6_lease_reset_pd_prefix_iter(sd_dhcp6_lease *lease);
+int sd_dhcp6_lease_get_pd(sd_dhcp6_lease *lease, struct in6_addr *prefix,
+ uint8_t *prefix_len,
+ uint32_t *lifetime_preferred,
+ uint32_t *lifetime_valid);
+
+int sd_dhcp6_lease_get_dns(sd_dhcp6_lease *lease, struct in6_addr **addrs);
+int sd_dhcp6_lease_get_domains(sd_dhcp6_lease *lease, char ***domains);
+int sd_dhcp6_lease_get_ntp_addrs(sd_dhcp6_lease *lease,
+ struct in6_addr **addrs);
+int sd_dhcp6_lease_get_ntp_fqdn(sd_dhcp6_lease *lease, char ***ntp_fqdn);
+
+sd_dhcp6_lease *sd_dhcp6_lease_ref(sd_dhcp6_lease *lease);
+sd_dhcp6_lease *sd_dhcp6_lease_unref(sd_dhcp6_lease *lease);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_dhcp6_lease, sd_dhcp6_lease_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-event.h b/src/systemd/sd-event.h
new file mode 100644
index 0000000..787a12f
--- /dev/null
+++ b/src/systemd/sd-event.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdeventhfoo
+#define foosdeventhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/epoll.h>
+#include <sys/inotify.h>
+#include <sys/signalfd.h>
+#include <sys/types.h>
+#include <time.h>
+
+#include "_sd-common.h"
+
+/*
+ Why is this better than pure epoll?
+
+ - Supports event source prioritization
+ - Scales better with a large number of time events because it does not require one timerfd each
+ - Automatically tries to coalesce timer events system-wide
+ - Handles signals, child PIDs, inotify events
+ - Supports systemd-style automatic watchdog event generation
+*/
+
+_SD_BEGIN_DECLARATIONS;
+
+#define SD_EVENT_DEFAULT ((sd_event *) 1)
+
+typedef struct sd_event sd_event;
+typedef struct sd_event_source sd_event_source;
+
+enum {
+ SD_EVENT_OFF = 0,
+ SD_EVENT_ON = 1,
+ SD_EVENT_ONESHOT = -1
+};
+
+enum {
+ SD_EVENT_INITIAL,
+ SD_EVENT_ARMED,
+ SD_EVENT_PENDING,
+ SD_EVENT_RUNNING,
+ SD_EVENT_EXITING,
+ SD_EVENT_FINISHED,
+ SD_EVENT_PREPARING
+};
+
+enum {
+ /* And everything in-between and outside is good too */
+ SD_EVENT_PRIORITY_IMPORTANT = -100,
+ SD_EVENT_PRIORITY_NORMAL = 0,
+ SD_EVENT_PRIORITY_IDLE = 100
+};
+
+typedef int (*sd_event_handler_t)(sd_event_source *s, void *userdata);
+typedef int (*sd_event_io_handler_t)(sd_event_source *s, int fd, uint32_t revents, void *userdata);
+typedef int (*sd_event_time_handler_t)(sd_event_source *s, uint64_t usec, void *userdata);
+typedef int (*sd_event_signal_handler_t)(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata);
+#if defined _GNU_SOURCE || _POSIX_C_SOURCE >= 199309L
+typedef int (*sd_event_child_handler_t)(sd_event_source *s, const siginfo_t *si, void *userdata);
+#else
+typedef void* sd_event_child_handler_t;
+#endif
+typedef int (*sd_event_inotify_handler_t)(sd_event_source *s, const struct inotify_event *event, void *userdata);
+typedef _sd_destroy_t sd_event_destroy_t;
+
+int sd_event_default(sd_event **e);
+
+int sd_event_new(sd_event **e);
+sd_event* sd_event_ref(sd_event *e);
+sd_event* sd_event_unref(sd_event *e);
+
+int sd_event_add_io(sd_event *e, sd_event_source **s, int fd, uint32_t events, sd_event_io_handler_t callback, void *userdata);
+int sd_event_add_time(sd_event *e, sd_event_source **s, clockid_t clock, uint64_t usec, uint64_t accuracy, sd_event_time_handler_t callback, void *userdata);
+int sd_event_add_signal(sd_event *e, sd_event_source **s, int sig, sd_event_signal_handler_t callback, void *userdata);
+int sd_event_add_child(sd_event *e, sd_event_source **s, pid_t pid, int options, sd_event_child_handler_t callback, void *userdata);
+int sd_event_add_inotify(sd_event *e, sd_event_source **s, const char *path, uint32_t mask, sd_event_inotify_handler_t callback, void *userdata);
+int sd_event_add_defer(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
+int sd_event_add_post(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
+int sd_event_add_exit(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
+
+int sd_event_prepare(sd_event *e);
+int sd_event_wait(sd_event *e, uint64_t usec);
+int sd_event_dispatch(sd_event *e);
+int sd_event_run(sd_event *e, uint64_t usec);
+int sd_event_loop(sd_event *e);
+int sd_event_exit(sd_event *e, int code);
+
+int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec);
+
+int sd_event_get_fd(sd_event *e);
+int sd_event_get_state(sd_event *e);
+int sd_event_get_tid(sd_event *e, pid_t *tid);
+int sd_event_get_exit_code(sd_event *e, int *code);
+int sd_event_set_watchdog(sd_event *e, int b);
+int sd_event_get_watchdog(sd_event *e);
+int sd_event_get_iteration(sd_event *e, uint64_t *ret);
+
+sd_event_source* sd_event_source_ref(sd_event_source *s);
+sd_event_source* sd_event_source_unref(sd_event_source *s);
+
+sd_event *sd_event_source_get_event(sd_event_source *s);
+void* sd_event_source_get_userdata(sd_event_source *s);
+void* sd_event_source_set_userdata(sd_event_source *s, void *userdata);
+
+int sd_event_source_set_description(sd_event_source *s, const char *description);
+int sd_event_source_get_description(sd_event_source *s, const char **description);
+int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback);
+int sd_event_source_get_pending(sd_event_source *s);
+int sd_event_source_get_priority(sd_event_source *s, int64_t *priority);
+int sd_event_source_set_priority(sd_event_source *s, int64_t priority);
+int sd_event_source_get_enabled(sd_event_source *s, int *enabled);
+int sd_event_source_set_enabled(sd_event_source *s, int enabled);
+int sd_event_source_get_io_fd(sd_event_source *s);
+int sd_event_source_set_io_fd(sd_event_source *s, int fd);
+int sd_event_source_get_io_fd_own(sd_event_source *s);
+int sd_event_source_set_io_fd_own(sd_event_source *s, int own);
+int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events);
+int sd_event_source_set_io_events(sd_event_source *s, uint32_t events);
+int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents);
+int sd_event_source_get_time(sd_event_source *s, uint64_t *usec);
+int sd_event_source_set_time(sd_event_source *s, uint64_t usec);
+int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec);
+int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec);
+int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock);
+int sd_event_source_get_signal(sd_event_source *s);
+int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid);
+int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *ret);
+int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback);
+int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret);
+int sd_event_source_get_floating(sd_event_source *s);
+int sd_event_source_set_floating(sd_event_source *s, int b);
+
+/* Define helpers so that __attribute__((cleanup(sd_event_unrefp))) and similar may be used. */
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event, sd_event_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event_source, sd_event_source_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-hwdb.h b/src/systemd/sd-hwdb.h
new file mode 100644
index 0000000..2b0cd7f
--- /dev/null
+++ b/src/systemd/sd-hwdb.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdhwdbhfoo
+#define foosdhwdbhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_hwdb sd_hwdb;
+
+sd_hwdb *sd_hwdb_ref(sd_hwdb *hwdb);
+sd_hwdb *sd_hwdb_unref(sd_hwdb *hwdb);
+
+int sd_hwdb_new(sd_hwdb **ret);
+
+int sd_hwdb_get(sd_hwdb *hwdb, const char *modalias, const char *key, const char **value);
+
+int sd_hwdb_seek(sd_hwdb *hwdb, const char *modalias);
+int sd_hwdb_enumerate(sd_hwdb *hwdb, const char **key, const char **value);
+
+/* the inverse condition avoids ambiguity of dangling 'else' after the macro */
+#define SD_HWDB_FOREACH_PROPERTY(hwdb, modalias, key, value) \
+ if (sd_hwdb_seek(hwdb, modalias) < 0) { } \
+ else while (sd_hwdb_enumerate(hwdb, &(key), &(value)) > 0)
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_hwdb, sd_hwdb_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-id128.h b/src/systemd/sd-id128.h
new file mode 100644
index 0000000..bdf88ed
--- /dev/null
+++ b/src/systemd/sd-id128.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdid128hfoo
+#define foosdid128hfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/* 128-bit ID APIs. See sd-id128(3) for more information. */
+
+typedef union sd_id128 sd_id128_t;
+
+union sd_id128 {
+ uint8_t bytes[16];
+ uint64_t qwords[2];
+};
+
+#define SD_ID128_STRING_MAX 33
+
+char *sd_id128_to_string(sd_id128_t id, char s[_SD_ARRAY_STATIC SD_ID128_STRING_MAX]);
+int sd_id128_from_string(const char *s, sd_id128_t *ret);
+
+int sd_id128_randomize(sd_id128_t *ret);
+
+int sd_id128_get_machine(sd_id128_t *ret);
+int sd_id128_get_boot(sd_id128_t *ret);
+int sd_id128_get_invocation(sd_id128_t *ret);
+
+int sd_id128_get_machine_app_specific(sd_id128_t app_id, sd_id128_t *ret);
+int sd_id128_get_boot_app_specific(sd_id128_t app_id, sd_id128_t *ret);
+
+#define SD_ID128_ARRAY(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) \
+ { .bytes = { 0x##v0, 0x##v1, 0x##v2, 0x##v3, 0x##v4, 0x##v5, 0x##v6, 0x##v7, \
+ 0x##v8, 0x##v9, 0x##v10, 0x##v11, 0x##v12, 0x##v13, 0x##v14, 0x##v15 }}
+
+#define SD_ID128_MAKE(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) \
+ ((const sd_id128_t) SD_ID128_ARRAY(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
+
+
+/* Note that SD_ID128_FORMAT_VAL will evaluate the passed argument 16
+ * times. It is hence not a good idea to call this macro with an
+ * expensive function as parameter or an expression with side
+ * effects */
+
+#define SD_ID128_FORMAT_STR "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x"
+#define SD_ID128_FORMAT_VAL(x) (x).bytes[0], (x).bytes[1], (x).bytes[2], (x).bytes[3], (x).bytes[4], (x).bytes[5], (x).bytes[6], (x).bytes[7], (x).bytes[8], (x).bytes[9], (x).bytes[10], (x).bytes[11], (x).bytes[12], (x).bytes[13], (x).bytes[14], (x).bytes[15]
+
+#define SD_ID128_CONST_STR(x) \
+ ((const char[SD_ID128_STRING_MAX]) { \
+ ((x).bytes[0] >> 4) >= 10 ? 'a' + ((x).bytes[0] >> 4) - 10 : '0' + ((x).bytes[0] >> 4), \
+ ((x).bytes[0] & 15) >= 10 ? 'a' + ((x).bytes[0] & 15) - 10 : '0' + ((x).bytes[0] & 15), \
+ ((x).bytes[1] >> 4) >= 10 ? 'a' + ((x).bytes[1] >> 4) - 10 : '0' + ((x).bytes[1] >> 4), \
+ ((x).bytes[1] & 15) >= 10 ? 'a' + ((x).bytes[1] & 15) - 10 : '0' + ((x).bytes[1] & 15), \
+ ((x).bytes[2] >> 4) >= 10 ? 'a' + ((x).bytes[2] >> 4) - 10 : '0' + ((x).bytes[2] >> 4), \
+ ((x).bytes[2] & 15) >= 10 ? 'a' + ((x).bytes[2] & 15) - 10 : '0' + ((x).bytes[2] & 15), \
+ ((x).bytes[3] >> 4) >= 10 ? 'a' + ((x).bytes[3] >> 4) - 10 : '0' + ((x).bytes[3] >> 4), \
+ ((x).bytes[3] & 15) >= 10 ? 'a' + ((x).bytes[3] & 15) - 10 : '0' + ((x).bytes[3] & 15), \
+ ((x).bytes[4] >> 4) >= 10 ? 'a' + ((x).bytes[4] >> 4) - 10 : '0' + ((x).bytes[4] >> 4), \
+ ((x).bytes[4] & 15) >= 10 ? 'a' + ((x).bytes[4] & 15) - 10 : '0' + ((x).bytes[4] & 15), \
+ ((x).bytes[5] >> 4) >= 10 ? 'a' + ((x).bytes[5] >> 4) - 10 : '0' + ((x).bytes[5] >> 4), \
+ ((x).bytes[5] & 15) >= 10 ? 'a' + ((x).bytes[5] & 15) - 10 : '0' + ((x).bytes[5] & 15), \
+ ((x).bytes[6] >> 4) >= 10 ? 'a' + ((x).bytes[6] >> 4) - 10 : '0' + ((x).bytes[6] >> 4), \
+ ((x).bytes[6] & 15) >= 10 ? 'a' + ((x).bytes[6] & 15) - 10 : '0' + ((x).bytes[6] & 15), \
+ ((x).bytes[7] >> 4) >= 10 ? 'a' + ((x).bytes[7] >> 4) - 10 : '0' + ((x).bytes[7] >> 4), \
+ ((x).bytes[7] & 15) >= 10 ? 'a' + ((x).bytes[7] & 15) - 10 : '0' + ((x).bytes[7] & 15), \
+ ((x).bytes[8] >> 4) >= 10 ? 'a' + ((x).bytes[8] >> 4) - 10 : '0' + ((x).bytes[8] >> 4), \
+ ((x).bytes[8] & 15) >= 10 ? 'a' + ((x).bytes[8] & 15) - 10 : '0' + ((x).bytes[8] & 15), \
+ ((x).bytes[9] >> 4) >= 10 ? 'a' + ((x).bytes[9] >> 4) - 10 : '0' + ((x).bytes[9] >> 4), \
+ ((x).bytes[9] & 15) >= 10 ? 'a' + ((x).bytes[9] & 15) - 10 : '0' + ((x).bytes[9] & 15), \
+ ((x).bytes[10] >> 4) >= 10 ? 'a' + ((x).bytes[10] >> 4) - 10 : '0' + ((x).bytes[10] >> 4), \
+ ((x).bytes[10] & 15) >= 10 ? 'a' + ((x).bytes[10] & 15) - 10 : '0' + ((x).bytes[10] & 15), \
+ ((x).bytes[11] >> 4) >= 10 ? 'a' + ((x).bytes[11] >> 4) - 10 : '0' + ((x).bytes[11] >> 4), \
+ ((x).bytes[11] & 15) >= 10 ? 'a' + ((x).bytes[11] & 15) - 10 : '0' + ((x).bytes[11] & 15), \
+ ((x).bytes[12] >> 4) >= 10 ? 'a' + ((x).bytes[12] >> 4) - 10 : '0' + ((x).bytes[12] >> 4), \
+ ((x).bytes[12] & 15) >= 10 ? 'a' + ((x).bytes[12] & 15) - 10 : '0' + ((x).bytes[12] & 15), \
+ ((x).bytes[13] >> 4) >= 10 ? 'a' + ((x).bytes[13] >> 4) - 10 : '0' + ((x).bytes[13] >> 4), \
+ ((x).bytes[13] & 15) >= 10 ? 'a' + ((x).bytes[13] & 15) - 10 : '0' + ((x).bytes[13] & 15), \
+ ((x).bytes[14] >> 4) >= 10 ? 'a' + ((x).bytes[14] >> 4) - 10 : '0' + ((x).bytes[14] >> 4), \
+ ((x).bytes[14] & 15) >= 10 ? 'a' + ((x).bytes[14] & 15) - 10 : '0' + ((x).bytes[14] & 15), \
+ ((x).bytes[15] >> 4) >= 10 ? 'a' + ((x).bytes[15] >> 4) - 10 : '0' + ((x).bytes[15] >> 4), \
+ ((x).bytes[15] & 15) >= 10 ? 'a' + ((x).bytes[15] & 15) - 10 : '0' + ((x).bytes[15] & 15), \
+ 0 })
+
+#define SD_ID128_MAKE_STR(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \
+ #a #b #c #d #e #f #g #h #i #j #k #l #m #n #o #p
+
+_sd_pure_ static __inline__ int sd_id128_equal(sd_id128_t a, sd_id128_t b) {
+ return memcmp(&a, &b, 16) == 0;
+}
+
+_sd_pure_ static __inline__ int sd_id128_is_null(sd_id128_t a) {
+ return a.qwords[0] == 0 && a.qwords[1] == 0;
+}
+
+_sd_pure_ static __inline__ int sd_id128_is_allf(sd_id128_t a) {
+ return a.qwords[0] == UINT64_C(0xFFFFFFFFFFFFFFFF) && a.qwords[1] == UINT64_C(0xFFFFFFFFFFFFFFFF);
+}
+
+#define SD_ID128_NULL ((const sd_id128_t) { .qwords = { 0, 0 }})
+#define SD_ID128_ALLF ((const sd_id128_t) { .qwords = { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF) }})
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-ipv4acd.h b/src/systemd/sd-ipv4acd.h
new file mode 100644
index 0000000..039ed3c
--- /dev/null
+++ b/src/systemd/sd-ipv4acd.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdipv4acdfoo
+#define foosdipv4acdfoo
+
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <net/ethernet.h>
+#include <netinet/in.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+enum {
+ SD_IPV4ACD_EVENT_STOP = 0,
+ SD_IPV4ACD_EVENT_BIND = 1,
+ SD_IPV4ACD_EVENT_CONFLICT = 2,
+};
+
+typedef struct sd_ipv4acd sd_ipv4acd;
+typedef void (*sd_ipv4acd_callback_t)(sd_ipv4acd *acd, int event, void *userdata);
+
+int sd_ipv4acd_detach_event(sd_ipv4acd *acd);
+int sd_ipv4acd_attach_event(sd_ipv4acd *acd, sd_event *event, int64_t priority);
+int sd_ipv4acd_get_address(sd_ipv4acd *acd, struct in_addr *address);
+int sd_ipv4acd_set_callback(sd_ipv4acd *acd, sd_ipv4acd_callback_t cb, void *userdata);
+int sd_ipv4acd_set_mac(sd_ipv4acd *acd, const struct ether_addr *addr);
+int sd_ipv4acd_set_ifindex(sd_ipv4acd *acd, int interface_index);
+int sd_ipv4acd_set_address(sd_ipv4acd *acd, const struct in_addr *address);
+int sd_ipv4acd_is_running(sd_ipv4acd *acd);
+int sd_ipv4acd_start(sd_ipv4acd *acd);
+int sd_ipv4acd_stop(sd_ipv4acd *acd);
+sd_ipv4acd *sd_ipv4acd_ref(sd_ipv4acd *acd);
+sd_ipv4acd *sd_ipv4acd_unref(sd_ipv4acd *acd);
+int sd_ipv4acd_new(sd_ipv4acd **ret);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_ipv4acd, sd_ipv4acd_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-ipv4ll.h b/src/systemd/sd-ipv4ll.h
new file mode 100644
index 0000000..71bd4cf
--- /dev/null
+++ b/src/systemd/sd-ipv4ll.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdipv4llfoo
+#define foosdipv4llfoo
+
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <net/ethernet.h>
+#include <netinet/in.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+enum {
+ SD_IPV4LL_EVENT_STOP = 0,
+ SD_IPV4LL_EVENT_BIND = 1,
+ SD_IPV4LL_EVENT_CONFLICT = 2,
+};
+
+typedef struct sd_ipv4ll sd_ipv4ll;
+typedef void (*sd_ipv4ll_callback_t)(sd_ipv4ll *ll, int event, void *userdata);
+
+int sd_ipv4ll_detach_event(sd_ipv4ll *ll);
+int sd_ipv4ll_attach_event(sd_ipv4ll *ll, sd_event *event, int64_t priority);
+int sd_ipv4ll_get_address(sd_ipv4ll *ll, struct in_addr *address);
+int sd_ipv4ll_set_callback(sd_ipv4ll *ll, sd_ipv4ll_callback_t cb, void *userdata);
+int sd_ipv4ll_set_mac(sd_ipv4ll *ll, const struct ether_addr *addr);
+int sd_ipv4ll_set_ifindex(sd_ipv4ll *ll, int interface_index);
+int sd_ipv4ll_set_address(sd_ipv4ll *ll, const struct in_addr *address);
+int sd_ipv4ll_set_address_seed(sd_ipv4ll *ll, uint64_t seed);
+int sd_ipv4ll_is_running(sd_ipv4ll *ll);
+int sd_ipv4ll_restart(sd_ipv4ll *ll);
+int sd_ipv4ll_start(sd_ipv4ll *ll);
+int sd_ipv4ll_stop(sd_ipv4ll *ll);
+sd_ipv4ll *sd_ipv4ll_ref(sd_ipv4ll *ll);
+sd_ipv4ll *sd_ipv4ll_unref(sd_ipv4ll *ll);
+int sd_ipv4ll_new(sd_ipv4ll **ret);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_ipv4ll, sd_ipv4ll_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-journal.h b/src/systemd/sd-journal.h
new file mode 100644
index 0000000..b4bf3b9
--- /dev/null
+++ b/src/systemd/sd-journal.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdjournalhfoo
+#define foosdjournalhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <syslog.h>
+
+#include "sd-id128.h"
+
+#include "_sd-common.h"
+
+/* Journal APIs. See sd-journal(3) for more information. */
+
+_SD_BEGIN_DECLARATIONS;
+
+/* Write to daemon */
+int sd_journal_print(int priority, const char *format, ...) _sd_printf_(2, 3);
+int sd_journal_printv(int priority, const char *format, va_list ap) _sd_printf_(2, 0);
+int sd_journal_send(const char *format, ...) _sd_printf_(1, 0) _sd_sentinel_;
+int sd_journal_sendv(const struct iovec *iov, int n);
+int sd_journal_perror(const char *message);
+
+/* Used by the macros below. You probably don't want to call this directly. */
+int sd_journal_print_with_location(int priority, const char *file, const char *line, const char *func, const char *format, ...) _sd_printf_(5, 6);
+int sd_journal_printv_with_location(int priority, const char *file, const char *line, const char *func, const char *format, va_list ap) _sd_printf_(5, 0);
+int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) _sd_printf_(4, 0) _sd_sentinel_;
+int sd_journal_sendv_with_location(const char *file, const char *line, const char *func, const struct iovec *iov, int n);
+int sd_journal_perror_with_location(const char *file, const char *line, const char *func, const char *message);
+
+/* implicitly add code location to messages sent, if this is enabled */
+#ifndef SD_JOURNAL_SUPPRESS_LOCATION
+
+#define sd_journal_print(priority, ...) sd_journal_print_with_location(priority, "CODE_FILE=" __FILE__, "CODE_LINE=" _SD_STRINGIFY(__LINE__), __func__, __VA_ARGS__)
+#define sd_journal_printv(priority, format, ap) sd_journal_printv_with_location(priority, "CODE_FILE=" __FILE__, "CODE_LINE=" _SD_STRINGIFY(__LINE__), __func__, format, ap)
+#define sd_journal_send(...) sd_journal_send_with_location("CODE_FILE=" __FILE__, "CODE_LINE=" _SD_STRINGIFY(__LINE__), __func__, __VA_ARGS__)
+#define sd_journal_sendv(iovec, n) sd_journal_sendv_with_location("CODE_FILE=" __FILE__, "CODE_LINE=" _SD_STRINGIFY(__LINE__), __func__, iovec, n)
+#define sd_journal_perror(message) sd_journal_perror_with_location("CODE_FILE=" __FILE__, "CODE_LINE=" _SD_STRINGIFY(__LINE__), __func__, message)
+
+#endif
+
+int sd_journal_stream_fd(const char *identifier, int priority, int level_prefix);
+
+/* Browse journal stream */
+
+typedef struct sd_journal sd_journal;
+
+/* Open flags */
+enum {
+ SD_JOURNAL_LOCAL_ONLY = 1 << 0,
+ SD_JOURNAL_RUNTIME_ONLY = 1 << 1,
+ SD_JOURNAL_SYSTEM = 1 << 2,
+ SD_JOURNAL_CURRENT_USER = 1 << 3,
+ SD_JOURNAL_OS_ROOT = 1 << 4,
+
+ SD_JOURNAL_SYSTEM_ONLY = SD_JOURNAL_SYSTEM /* deprecated name */
+};
+
+/* Wakeup event types */
+enum {
+ SD_JOURNAL_NOP,
+ SD_JOURNAL_APPEND,
+ SD_JOURNAL_INVALIDATE
+};
+
+int sd_journal_open(sd_journal **ret, int flags);
+int sd_journal_open_directory(sd_journal **ret, const char *path, int flags);
+int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags);
+int sd_journal_open_files(sd_journal **ret, const char **paths, int flags);
+int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags);
+int sd_journal_open_container(sd_journal **ret, const char *machine, int flags); /* deprecated */
+void sd_journal_close(sd_journal *j);
+
+int sd_journal_previous(sd_journal *j);
+int sd_journal_next(sd_journal *j);
+
+int sd_journal_previous_skip(sd_journal *j, uint64_t skip);
+int sd_journal_next_skip(sd_journal *j, uint64_t skip);
+
+int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret);
+int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id);
+
+int sd_journal_set_data_threshold(sd_journal *j, size_t sz);
+int sd_journal_get_data_threshold(sd_journal *j, size_t *sz);
+
+int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *l);
+int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *l);
+void sd_journal_restart_data(sd_journal *j);
+
+int sd_journal_add_match(sd_journal *j, const void *data, size_t size);
+int sd_journal_add_disjunction(sd_journal *j);
+int sd_journal_add_conjunction(sd_journal *j);
+void sd_journal_flush_matches(sd_journal *j);
+
+int sd_journal_seek_head(sd_journal *j);
+int sd_journal_seek_tail(sd_journal *j);
+int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec);
+int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec);
+int sd_journal_seek_cursor(sd_journal *j, const char *cursor);
+
+int sd_journal_get_cursor(sd_journal *j, char **cursor);
+int sd_journal_test_cursor(sd_journal *j, const char *cursor);
+
+int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to);
+int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, const sd_id128_t boot_id, uint64_t *from, uint64_t *to);
+
+int sd_journal_get_usage(sd_journal *j, uint64_t *bytes);
+
+int sd_journal_query_unique(sd_journal *j, const char *field);
+int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l);
+void sd_journal_restart_unique(sd_journal *j);
+
+int sd_journal_enumerate_fields(sd_journal *j, const char **field);
+void sd_journal_restart_fields(sd_journal *j);
+
+int sd_journal_get_fd(sd_journal *j);
+int sd_journal_get_events(sd_journal *j);
+int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec);
+int sd_journal_process(sd_journal *j);
+int sd_journal_wait(sd_journal *j, uint64_t timeout_usec);
+int sd_journal_reliable_fd(sd_journal *j);
+
+int sd_journal_get_catalog(sd_journal *j, char **text);
+int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **text);
+
+int sd_journal_has_runtime_files(sd_journal *j);
+int sd_journal_has_persistent_files(sd_journal *j);
+
+/* The inverse condition avoids ambiguity of dangling 'else' after the macro */
+#define SD_JOURNAL_FOREACH(j) \
+ if (sd_journal_seek_head(j) < 0) { } \
+ else while (sd_journal_next(j) > 0)
+
+/* The inverse condition avoids ambiguity of dangling 'else' after the macro */
+#define SD_JOURNAL_FOREACH_BACKWARDS(j) \
+ if (sd_journal_seek_tail(j) < 0) { } \
+ else while (sd_journal_previous(j) > 0)
+
+/* Iterate through the data fields of the current journal entry */
+#define SD_JOURNAL_FOREACH_DATA(j, data, l) \
+ for (sd_journal_restart_data(j); sd_journal_enumerate_data((j), &(data), &(l)) > 0; )
+
+/* Iterate through the all known values of a specific field */
+#define SD_JOURNAL_FOREACH_UNIQUE(j, data, l) \
+ for (sd_journal_restart_unique(j); sd_journal_enumerate_unique((j), &(data), &(l)) > 0; )
+
+/* Iterate through all known field names */
+#define SD_JOURNAL_FOREACH_FIELD(j, field) \
+ for (sd_journal_restart_fields(j); sd_journal_enumerate_fields((j), &(field)) > 0; )
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_journal, sd_journal_close);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-lldp.h b/src/systemd/sd-lldp.h
new file mode 100644
index 0000000..bf3afad
--- /dev/null
+++ b/src/systemd/sd-lldp.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdlldphfoo
+#define foosdlldphfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <sys/types.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/* IEEE 802.1AB-2009 Clause 8: TLV Types */
+enum {
+ SD_LLDP_TYPE_END = 0,
+ SD_LLDP_TYPE_CHASSIS_ID = 1,
+ SD_LLDP_TYPE_PORT_ID = 2,
+ SD_LLDP_TYPE_TTL = 3,
+ SD_LLDP_TYPE_PORT_DESCRIPTION = 4,
+ SD_LLDP_TYPE_SYSTEM_NAME = 5,
+ SD_LLDP_TYPE_SYSTEM_DESCRIPTION = 6,
+ SD_LLDP_TYPE_SYSTEM_CAPABILITIES = 7,
+ SD_LLDP_TYPE_MGMT_ADDRESS = 8,
+ SD_LLDP_TYPE_PRIVATE = 127,
+};
+
+/* IEEE 802.1AB-2009 Clause 8.5.2: Chassis subtypes */
+enum {
+ SD_LLDP_CHASSIS_SUBTYPE_RESERVED = 0,
+ SD_LLDP_CHASSIS_SUBTYPE_CHASSIS_COMPONENT = 1,
+ SD_LLDP_CHASSIS_SUBTYPE_INTERFACE_ALIAS = 2,
+ SD_LLDP_CHASSIS_SUBTYPE_PORT_COMPONENT = 3,
+ SD_LLDP_CHASSIS_SUBTYPE_MAC_ADDRESS = 4,
+ SD_LLDP_CHASSIS_SUBTYPE_NETWORK_ADDRESS = 5,
+ SD_LLDP_CHASSIS_SUBTYPE_INTERFACE_NAME = 6,
+ SD_LLDP_CHASSIS_SUBTYPE_LOCALLY_ASSIGNED = 7,
+};
+
+/* IEEE 802.1AB-2009 Clause 8.5.3: Port subtype */
+enum {
+ SD_LLDP_PORT_SUBTYPE_RESERVED = 0,
+ SD_LLDP_PORT_SUBTYPE_INTERFACE_ALIAS = 1,
+ SD_LLDP_PORT_SUBTYPE_PORT_COMPONENT = 2,
+ SD_LLDP_PORT_SUBTYPE_MAC_ADDRESS = 3,
+ SD_LLDP_PORT_SUBTYPE_NETWORK_ADDRESS = 4,
+ SD_LLDP_PORT_SUBTYPE_INTERFACE_NAME = 5,
+ SD_LLDP_PORT_SUBTYPE_AGENT_CIRCUIT_ID = 6,
+ SD_LLDP_PORT_SUBTYPE_LOCALLY_ASSIGNED = 7,
+};
+
+/* IEEE 802.1AB-2009 Clause 8.5.8: System capabilities */
+enum {
+ SD_LLDP_SYSTEM_CAPABILITIES_OTHER = 1 << 0,
+ SD_LLDP_SYSTEM_CAPABILITIES_REPEATER = 1 << 1,
+ SD_LLDP_SYSTEM_CAPABILITIES_BRIDGE = 1 << 2,
+ SD_LLDP_SYSTEM_CAPABILITIES_WLAN_AP = 1 << 3,
+ SD_LLDP_SYSTEM_CAPABILITIES_ROUTER = 1 << 4,
+ SD_LLDP_SYSTEM_CAPABILITIES_PHONE = 1 << 5,
+ SD_LLDP_SYSTEM_CAPABILITIES_DOCSIS = 1 << 6,
+ SD_LLDP_SYSTEM_CAPABILITIES_STATION = 1 << 7,
+ SD_LLDP_SYSTEM_CAPABILITIES_CVLAN = 1 << 8,
+ SD_LLDP_SYSTEM_CAPABILITIES_SVLAN = 1 << 9,
+ SD_LLDP_SYSTEM_CAPABILITIES_TPMR = 1 << 10,
+};
+
+#define SD_LLDP_SYSTEM_CAPABILITIES_ALL ((uint16_t) -1)
+
+#define SD_LLDP_SYSTEM_CAPABILITIES_ALL_ROUTERS \
+ ((uint16_t) \
+ (SD_LLDP_SYSTEM_CAPABILITIES_REPEATER| \
+ SD_LLDP_SYSTEM_CAPABILITIES_BRIDGE| \
+ SD_LLDP_SYSTEM_CAPABILITIES_WLAN_AP| \
+ SD_LLDP_SYSTEM_CAPABILITIES_ROUTER| \
+ SD_LLDP_SYSTEM_CAPABILITIES_DOCSIS| \
+ SD_LLDP_SYSTEM_CAPABILITIES_CVLAN| \
+ SD_LLDP_SYSTEM_CAPABILITIES_SVLAN| \
+ SD_LLDP_SYSTEM_CAPABILITIES_TPMR))
+
+#define SD_LLDP_OUI_802_1 (uint8_t[]) { 0x00, 0x80, 0xc2 }
+#define SD_LLDP_OUI_802_3 (uint8_t[]) { 0x00, 0x12, 0x0f }
+
+/* IEEE 802.1AB-2009 Annex E */
+enum {
+ SD_LLDP_OUI_802_1_SUBTYPE_PORT_VLAN_ID = 1,
+ SD_LLDP_OUI_802_1_SUBTYPE_PORT_PROTOCOL_VLAN_ID = 2,
+ SD_LLDP_OUI_802_1_SUBTYPE_VLAN_NAME = 3,
+ SD_LLDP_OUI_802_1_SUBTYPE_PROTOCOL_IDENTITY = 4,
+ SD_LLDP_OUI_802_1_SUBTYPE_VID_USAGE_DIGEST = 5,
+ SD_LLDP_OUI_802_1_SUBTYPE_MANAGEMENT_VID = 6,
+ SD_LLDP_OUI_802_1_SUBTYPE_LINK_AGGREGATION = 7,
+};
+
+/* IEEE 802.1AB-2009 Annex F */
+enum {
+ SD_LLDP_OUI_802_3_SUBTYPE_MAC_PHY_CONFIG_STATUS = 1,
+ SD_LLDP_OUI_802_3_SUBTYPE_POWER_VIA_MDI = 2,
+ SD_LLDP_OUI_802_3_SUBTYPE_LINK_AGGREGATION = 3,
+ SD_LLDP_OUI_802_3_SUBTYPE_MAXIMUM_FRAME_SIZE = 4,
+};
+
+typedef struct sd_lldp sd_lldp;
+typedef struct sd_lldp_neighbor sd_lldp_neighbor;
+
+typedef enum sd_lldp_event {
+ SD_LLDP_EVENT_ADDED,
+ SD_LLDP_EVENT_REMOVED,
+ SD_LLDP_EVENT_UPDATED,
+ SD_LLDP_EVENT_REFRESHED,
+ _SD_LLDP_EVENT_MAX,
+ _SD_LLDP_EVENT_INVALID = -1,
+} sd_lldp_event;
+
+typedef void (*sd_lldp_callback_t)(sd_lldp *lldp, sd_lldp_event event, sd_lldp_neighbor *n, void *userdata);
+
+int sd_lldp_new(sd_lldp **ret);
+sd_lldp* sd_lldp_ref(sd_lldp *lldp);
+sd_lldp* sd_lldp_unref(sd_lldp *lldp);
+
+int sd_lldp_start(sd_lldp *lldp);
+int sd_lldp_stop(sd_lldp *lldp);
+
+int sd_lldp_attach_event(sd_lldp *lldp, sd_event *event, int64_t priority);
+int sd_lldp_detach_event(sd_lldp *lldp);
+sd_event *sd_lldp_get_event(sd_lldp *lldp);
+
+int sd_lldp_set_callback(sd_lldp *lldp, sd_lldp_callback_t cb, void *userdata);
+int sd_lldp_set_ifindex(sd_lldp *lldp, int ifindex);
+
+/* Controls how much and what to store in the neighbors database */
+int sd_lldp_set_neighbors_max(sd_lldp *lldp, uint64_t n);
+int sd_lldp_match_capabilities(sd_lldp *lldp, uint16_t mask);
+int sd_lldp_set_filter_address(sd_lldp *lldp, const struct ether_addr *address);
+
+int sd_lldp_get_neighbors(sd_lldp *lldp, sd_lldp_neighbor ***neighbors);
+
+int sd_lldp_neighbor_from_raw(sd_lldp_neighbor **ret, const void *raw, size_t raw_size);
+sd_lldp_neighbor *sd_lldp_neighbor_ref(sd_lldp_neighbor *n);
+sd_lldp_neighbor *sd_lldp_neighbor_unref(sd_lldp_neighbor *n);
+
+/* Access to LLDP frame metadata */
+int sd_lldp_neighbor_get_source_address(sd_lldp_neighbor *n, struct ether_addr* address);
+int sd_lldp_neighbor_get_destination_address(sd_lldp_neighbor *n, struct ether_addr* address);
+int sd_lldp_neighbor_get_timestamp(sd_lldp_neighbor *n, clockid_t clock, uint64_t *ret);
+int sd_lldp_neighbor_get_raw(sd_lldp_neighbor *n, const void **ret, size_t *size);
+
+/* High-level, direct, parsed out field access. These fields exist at most once, hence may be queried directly. */
+int sd_lldp_neighbor_get_chassis_id(sd_lldp_neighbor *n, uint8_t *type, const void **ret, size_t *size);
+int sd_lldp_neighbor_get_chassis_id_as_string(sd_lldp_neighbor *n, const char **ret);
+int sd_lldp_neighbor_get_port_id(sd_lldp_neighbor *n, uint8_t *type, const void **ret, size_t *size);
+int sd_lldp_neighbor_get_port_id_as_string(sd_lldp_neighbor *n, const char **ret);
+int sd_lldp_neighbor_get_ttl(sd_lldp_neighbor *n, uint16_t *ret_sec);
+int sd_lldp_neighbor_get_system_name(sd_lldp_neighbor *n, const char **ret);
+int sd_lldp_neighbor_get_system_description(sd_lldp_neighbor *n, const char **ret);
+int sd_lldp_neighbor_get_port_description(sd_lldp_neighbor *n, const char **ret);
+int sd_lldp_neighbor_get_system_capabilities(sd_lldp_neighbor *n, uint16_t *ret);
+int sd_lldp_neighbor_get_enabled_capabilities(sd_lldp_neighbor *n, uint16_t *ret);
+
+/* Low-level, iterative TLV access. This is for evertyhing else, it iteratively goes through all available TLVs
+ * (including the ones covered with the calls above), and allows multiple TLVs for the same fields. */
+int sd_lldp_neighbor_tlv_rewind(sd_lldp_neighbor *n);
+int sd_lldp_neighbor_tlv_next(sd_lldp_neighbor *n);
+int sd_lldp_neighbor_tlv_get_type(sd_lldp_neighbor *n, uint8_t *type);
+int sd_lldp_neighbor_tlv_is_type(sd_lldp_neighbor *n, uint8_t type);
+int sd_lldp_neighbor_tlv_get_oui(sd_lldp_neighbor *n, uint8_t oui[_SD_ARRAY_STATIC 3], uint8_t *subtype);
+int sd_lldp_neighbor_tlv_is_oui(sd_lldp_neighbor *n, const uint8_t oui[_SD_ARRAY_STATIC 3], uint8_t subtype);
+int sd_lldp_neighbor_tlv_get_raw(sd_lldp_neighbor *n, const void **ret, size_t *size);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_lldp, sd_lldp_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_lldp_neighbor, sd_lldp_neighbor_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-login.h b/src/systemd/sd-login.h
new file mode 100644
index 0000000..50be543
--- /dev/null
+++ b/src/systemd/sd-login.h
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdloginhfoo
+#define foosdloginhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "_sd-common.h"
+
+/*
+ * A few points:
+ *
+ * Instead of returning an empty string array or empty uid array, we
+ * may return NULL.
+ *
+ * Free the data the library returns with libc free(). String arrays
+ * are NULL terminated, and you need to free the array itself, in
+ * addition to the strings contained.
+ *
+ * We return error codes as negative errno, kernel-style. On success, we
+ * return 0 or positive.
+ *
+ * These functions access data in /proc, /sys/fs/cgroup, and /run. All
+ * of these are virtual file systems; therefore, accesses are
+ * relatively cheap.
+ *
+ * See sd-login(3) for more information.
+ */
+
+_SD_BEGIN_DECLARATIONS;
+
+/* Get session from PID. Note that 'shared' processes of a user are
+ * not attached to a session, but only attached to a user. This will
+ * return an error for system processes and 'shared' processes of a
+ * user. */
+int sd_pid_get_session(pid_t pid, char **session);
+
+/* Get UID of the owner of the session of the PID (or in case the
+ * process is a 'shared' user process, the UID of that user is
+ * returned). This will not return the UID of the process, but rather
+ * the UID of the owner of the cgroup that the process is in. This will
+ * return an error for system processes. */
+int sd_pid_get_owner_uid(pid_t pid, uid_t *uid);
+
+/* Get systemd non-slice unit (i.e. service) name from PID, for system
+ * services. This will return an error for non-service processes. */
+int sd_pid_get_unit(pid_t pid, char **unit);
+
+/* Get systemd non-slice unit (i.e. service) name from PID, for user
+ * services. This will return an error for non-user-service
+ * processes. */
+int sd_pid_get_user_unit(pid_t pid, char **unit);
+
+/* Get slice name from PID. */
+int sd_pid_get_slice(pid_t pid, char **slice);
+
+/* Get user slice name from PID. */
+int sd_pid_get_user_slice(pid_t pid, char **slice);
+
+/* Get machine name from PID, for processes assigned to a VM or
+ * container. This will return an error for non-machine processes. */
+int sd_pid_get_machine_name(pid_t pid, char **machine);
+
+/* Get the control group from a PID, relative to the root of the
+ * hierarchy. */
+int sd_pid_get_cgroup(pid_t pid, char **cgroup);
+
+/* Similar to sd_pid_get_session(), but retrieves data about the peer
+ * of a connected AF_UNIX socket */
+int sd_peer_get_session(int fd, char **session);
+
+/* Similar to sd_pid_get_owner_uid(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
+int sd_peer_get_owner_uid(int fd, uid_t *uid);
+
+/* Similar to sd_pid_get_unit(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
+int sd_peer_get_unit(int fd, char **unit);
+
+/* Similar to sd_pid_get_user_unit(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
+int sd_peer_get_user_unit(int fd, char **unit);
+
+/* Similar to sd_pid_get_slice(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
+int sd_peer_get_slice(int fd, char **slice);
+
+/* Similar to sd_pid_get_user_slice(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
+int sd_peer_get_user_slice(int fd, char **slice);
+
+/* Similar to sd_pid_get_machine_name(), but retrieves data about the
+ * peer of a connected AF_UNIX socket */
+int sd_peer_get_machine_name(int fd, char **machine);
+
+/* Similar to sd_pid_get_cgroup(), but retrieves data about the peer
+ * of a connected AF_UNIX socket. */
+int sd_peer_get_cgroup(pid_t pid, char **cgroup);
+
+/* Get state from UID. Possible states: offline, lingering, online, active, closing */
+int sd_uid_get_state(uid_t uid, char **state);
+
+/* Return primary session of user, if there is any */
+int sd_uid_get_display(uid_t uid, char **session);
+
+/* Return 1 if UID has session on seat. If require_active is true, this will
+ * look for active sessions only. */
+int sd_uid_is_on_seat(uid_t uid, int require_active, const char *seat);
+
+/* Return sessions of user. If require_active is true, this will look for
+ * active sessions only. Returns the number of sessions.
+ * If sessions is NULL, this will just return the number of sessions. */
+int sd_uid_get_sessions(uid_t uid, int require_active, char ***sessions);
+
+/* Return seats of user is on. If require_active is true, this will look for
+ * active seats only. Returns the number of seats.
+ * If seats is NULL, this will just return the number of seats. */
+int sd_uid_get_seats(uid_t uid, int require_active, char ***seats);
+
+/* Return 1 if the session is active. */
+int sd_session_is_active(const char *session);
+
+/* Return 1 if the session is remote. */
+int sd_session_is_remote(const char *session);
+
+/* Get state from session. Possible states: online, active, closing.
+ * This function is a more generic version of sd_session_is_active(). */
+int sd_session_get_state(const char *session, char **state);
+
+/* Determine user ID of session */
+int sd_session_get_uid(const char *session, uid_t *uid);
+
+/* Determine seat of session */
+int sd_session_get_seat(const char *session, char **seat);
+
+/* Determine the (PAM) service name this session was registered by. */
+int sd_session_get_service(const char *session, char **service);
+
+/* Determine the type of this session, i.e. one of "tty", "x11", "wayland", "mir" or "unspecified". */
+int sd_session_get_type(const char *session, char **type);
+
+/* Determine the class of this session, i.e. one of "user", "greeter" or "lock-screen". */
+int sd_session_get_class(const char *session, char **clazz);
+
+/* Determine the desktop brand of this session, i.e. something like "GNOME", "KDE" or "systemd-console". */
+int sd_session_get_desktop(const char *session, char **desktop);
+
+/* Determine the X11 display of this session. */
+int sd_session_get_display(const char *session, char **display);
+
+/* Determine the remote host of this session. */
+int sd_session_get_remote_host(const char *session, char **remote_host);
+
+/* Determine the remote user of this session (if provided by PAM). */
+int sd_session_get_remote_user(const char *session, char **remote_user);
+
+/* Determine the TTY of this session. */
+int sd_session_get_tty(const char *session, char **display);
+
+/* Determine the VT number of this session. */
+int sd_session_get_vt(const char *session, unsigned *vtnr);
+
+/* Return active session and user of seat */
+int sd_seat_get_active(const char *seat, char **session, uid_t *uid);
+
+/* Return sessions and users on seat. Returns number of sessions.
+ * If sessions is NULL, this returns only the number of sessions. */
+int sd_seat_get_sessions(const char *seat, char ***sessions, uid_t **uid, unsigned *n_uids);
+
+/* Return whether the seat is multi-session capable */
+int sd_seat_can_multi_session(const char *seat);
+
+/* Return whether the seat is TTY capable, i.e. suitable for showing console UIs */
+int sd_seat_can_tty(const char *seat);
+
+/* Return whether the seat is graphics capable, i.e. suitable for showing graphical UIs */
+int sd_seat_can_graphical(const char *seat);
+
+/* Return the class of machine */
+int sd_machine_get_class(const char *machine, char **clazz);
+
+/* Return the list if host-side network interface indices of a machine */
+int sd_machine_get_ifindices(const char *machine, int **ifindices);
+
+/* Get all seats, store in *seats. Returns the number of seats. If
+ * seats is NULL, this only returns the number of seats. */
+int sd_get_seats(char ***seats);
+
+/* Get all sessions, store in *sessions. Returns the number of
+ * sessions. If sessions is NULL, this only returns the number of sessions. */
+int sd_get_sessions(char ***sessions);
+
+/* Get all logged in users, store in *users. Returns the number of
+ * users. If users is NULL, this only returns the number of users. */
+int sd_get_uids(uid_t **users);
+
+/* Get all running virtual machines/containers */
+int sd_get_machine_names(char ***machines);
+
+/* Monitor object */
+typedef struct sd_login_monitor sd_login_monitor;
+
+/* Create a new monitor. Category must be NULL, "seat", "session",
+ * "uid", or "machine" to get monitor events for the specific category
+ * (or all). */
+int sd_login_monitor_new(const char *category, sd_login_monitor** ret);
+
+/* Destroys the passed monitor. Returns NULL. */
+sd_login_monitor* sd_login_monitor_unref(sd_login_monitor *m);
+
+/* Flushes the monitor */
+int sd_login_monitor_flush(sd_login_monitor *m);
+
+/* Get FD from monitor */
+int sd_login_monitor_get_fd(sd_login_monitor *m);
+
+/* Get poll() mask to monitor */
+int sd_login_monitor_get_events(sd_login_monitor *m);
+
+/* Get timeout for poll(), as usec value relative to CLOCK_MONOTONIC's epoch */
+int sd_login_monitor_get_timeout(sd_login_monitor *m, uint64_t *timeout_usec);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_login_monitor, sd_login_monitor_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-messages.h b/src/systemd/sd-messages.h
new file mode 100644
index 0000000..293db8e
--- /dev/null
+++ b/src/systemd/sd-messages.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdmessageshfoo
+#define foosdmessageshfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "sd-id128.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/* Hey! If you add a new message here, you *must* also update the message catalog with an appropriate explanation */
+
+/* And if you add a new ID here, make sure to generate a random one with "systemd-id128 new". Do not use any other IDs,
+ * and do not count them up manually. */
+
+#define SD_MESSAGE_JOURNAL_START SD_ID128_MAKE(f7,73,79,a8,49,0b,40,8b,be,5f,69,40,50,5a,77,7b)
+#define SD_MESSAGE_JOURNAL_START_STR SD_ID128_MAKE_STR(f7,73,79,a8,49,0b,40,8b,be,5f,69,40,50,5a,77,7b)
+#define SD_MESSAGE_JOURNAL_STOP SD_ID128_MAKE(d9,3f,b3,c9,c2,4d,45,1a,97,ce,a6,15,ce,59,c0,0b)
+#define SD_MESSAGE_JOURNAL_STOP_STR SD_ID128_MAKE_STR(d9,3f,b3,c9,c2,4d,45,1a,97,ce,a6,15,ce,59,c0,0b)
+#define SD_MESSAGE_JOURNAL_DROPPED SD_ID128_MAKE(a5,96,d6,fe,7b,fa,49,94,82,8e,72,30,9e,95,d6,1e)
+#define SD_MESSAGE_JOURNAL_DROPPED_STR SD_ID128_MAKE_STR(a5,96,d6,fe,7b,fa,49,94,82,8e,72,30,9e,95,d6,1e)
+#define SD_MESSAGE_JOURNAL_MISSED SD_ID128_MAKE(e9,bf,28,e6,e8,34,48,1b,b6,f4,8f,54,8a,d1,36,06)
+#define SD_MESSAGE_JOURNAL_MISSED_STR SD_ID128_MAKE_STR(e9,bf,28,e6,e8,34,48,1b,b6,f4,8f,54,8a,d1,36,06)
+#define SD_MESSAGE_JOURNAL_USAGE SD_ID128_MAKE(ec,38,7f,57,7b,84,4b,8f,a9,48,f3,3c,ad,9a,75,e6)
+#define SD_MESSAGE_JOURNAL_USAGE_STR SD_ID128_MAKE_STR(ec,38,7f,57,7b,84,4b,8f,a9,48,f3,3c,ad,9a,75,e6)
+
+#define SD_MESSAGE_COREDUMP SD_ID128_MAKE(fc,2e,22,bc,6e,e6,47,b6,b9,07,29,ab,34,a2,50,b1)
+#define SD_MESSAGE_COREDUMP_STR SD_ID128_MAKE_STR(fc,2e,22,bc,6e,e6,47,b6,b9,07,29,ab,34,a2,50,b1)
+#define SD_MESSAGE_TRUNCATED_CORE SD_ID128_MAKE(5a,ad,d8,e9,54,dc,4b,1a,8c,95,4d,63,fd,9e,11,37)
+#define SD_MESSAGE_TRUNCATED_CORE_STR SD_ID128_MAKE_STR(5a,ad,d8,e9,54,dc,4b,1a,8c,95,4d,63,fd,9e,11,37)
+#define SD_MESSAGE_BACKTRACE SD_ID128_MAKE(1f,4e,0a,44,a8,86,49,93,9a,ae,a3,4f,c6,da,8c,95)
+#define SD_MESSAGE_BACKTRACE_STR SD_ID128_MAKE_STR(1f,4e,0a,44,a8,86,49,93,9a,ae,a3,4f,c6,da,8c,95)
+
+#define SD_MESSAGE_SESSION_START SD_ID128_MAKE(8d,45,62,0c,1a,43,48,db,b1,74,10,da,57,c6,0c,66)
+#define SD_MESSAGE_SESSION_START_STR SD_ID128_MAKE_STR(8d,45,62,0c,1a,43,48,db,b1,74,10,da,57,c6,0c,66)
+#define SD_MESSAGE_SESSION_STOP SD_ID128_MAKE(33,54,93,94,24,b4,45,6d,98,02,ca,83,33,ed,42,4a)
+#define SD_MESSAGE_SESSION_STOP_STR SD_ID128_MAKE_STR(33,54,93,94,24,b4,45,6d,98,02,ca,83,33,ed,42,4a)
+#define SD_MESSAGE_SEAT_START SD_ID128_MAKE(fc,be,fc,5d,a2,3d,42,80,93,f9,7c,82,a9,29,0f,7b)
+#define SD_MESSAGE_SEAT_START_STR SD_ID128_MAKE_STR(fc,be,fc,5d,a2,3d,42,80,93,f9,7c,82,a9,29,0f,7b)
+#define SD_MESSAGE_SEAT_STOP SD_ID128_MAKE(e7,85,2b,fe,46,78,4e,d0,ac,cd,e0,4b,c8,64,c2,d5)
+#define SD_MESSAGE_SEAT_STOP_STR SD_ID128_MAKE_STR(e7,85,2b,fe,46,78,4e,d0,ac,cd,e0,4b,c8,64,c2,d5)
+#define SD_MESSAGE_MACHINE_START SD_ID128_MAKE(24,d8,d4,45,25,73,40,24,96,06,83,81,a6,31,2d,f2)
+#define SD_MESSAGE_MACHINE_START_STR SD_ID128_MAKE_STR(24,d8,d4,45,25,73,40,24,96,06,83,81,a6,31,2d,f2)
+#define SD_MESSAGE_MACHINE_STOP SD_ID128_MAKE(58,43,2b,d3,ba,ce,47,7c,b5,14,b5,63,81,b8,a7,58)
+#define SD_MESSAGE_MACHINE_STOP_STR SD_ID128_MAKE_STR(58,43,2b,d3,ba,ce,47,7c,b5,14,b5,63,81,b8,a7,58)
+
+#define SD_MESSAGE_TIME_CHANGE SD_ID128_MAKE(c7,a7,87,07,9b,35,4e,aa,a9,e7,7b,37,18,93,cd,27)
+#define SD_MESSAGE_TIME_CHANGE_STR SD_ID128_MAKE_STR(c7,a7,87,07,9b,35,4e,aa,a9,e7,7b,37,18,93,cd,27)
+#define SD_MESSAGE_TIMEZONE_CHANGE SD_ID128_MAKE(45,f8,2f,4a,ef,7a,4b,bf,94,2c,e8,61,d1,f2,09,90)
+#define SD_MESSAGE_TIMEZONE_CHANGE_STR SD_ID128_MAKE_STR(45,f8,2f,4a,ef,7a,4b,bf,94,2c,e8,61,d1,f2,09,90)
+
+#define SD_MESSAGE_TAINTED SD_ID128_MAKE(50,87,6a,9d,b0,0f,4c,40,bd,e1,a2,ad,38,1c,3a,1b)
+#define SD_MESSAGE_TAINTED_STR SD_ID128_MAKE_STR(50,87,6a,9d,b0,0f,4c,40,bd,e1,a2,ad,38,1c,3a,1b)
+#define SD_MESSAGE_STARTUP_FINISHED SD_ID128_MAKE(b0,7a,24,9c,d0,24,41,4a,82,dd,00,cd,18,13,78,ff)
+#define SD_MESSAGE_STARTUP_FINISHED_STR SD_ID128_MAKE_STR(b0,7a,24,9c,d0,24,41,4a,82,dd,00,cd,18,13,78,ff)
+#define SD_MESSAGE_USER_STARTUP_FINISHED \
+ SD_ID128_MAKE(ee,d0,0a,68,ff,d8,4e,31,88,21,05,fd,97,3a,bd,d1)
+#define SD_MESSAGE_USER_STARTUP_FINISHED_STR \
+ SD_ID128_MAKE_STR(ee,d0,0a,68,ff,d8,4e,31,88,21,05,fd,97,3a,bd,d1)
+
+#define SD_MESSAGE_SLEEP_START SD_ID128_MAKE(6b,bd,95,ee,97,79,41,e4,97,c4,8b,e2,7c,25,41,28)
+#define SD_MESSAGE_SLEEP_START_STR SD_ID128_MAKE_STR(6b,bd,95,ee,97,79,41,e4,97,c4,8b,e2,7c,25,41,28)
+#define SD_MESSAGE_SLEEP_STOP SD_ID128_MAKE(88,11,e6,df,2a,8e,40,f5,8a,94,ce,a2,6f,8e,bf,14)
+#define SD_MESSAGE_SLEEP_STOP_STR SD_ID128_MAKE_STR(88,11,e6,df,2a,8e,40,f5,8a,94,ce,a2,6f,8e,bf,14)
+
+#define SD_MESSAGE_SHUTDOWN SD_ID128_MAKE(98,26,88,66,d1,d5,4a,49,9c,4e,98,92,1d,93,bc,40)
+#define SD_MESSAGE_SHUTDOWN_STR SD_ID128_MAKE_STR(98,26,88,66,d1,d5,4a,49,9c,4e,98,92,1d,93,bc,40)
+
+/* The messages below are actually about jobs, not really about units, the macros are misleadingly named. Moreover
+ * SD_MESSAGE_UNIT_FAILED is not actually about a failing unit but about a failed start job. A job either finishes with
+ * SD_MESSAGE_UNIT_STARTED or with SD_MESSAGE_UNIT_FAILED hence. */
+#define SD_MESSAGE_UNIT_STARTING SD_ID128_MAKE(7d,49,58,e8,42,da,4a,75,8f,6c,1c,dc,7b,36,dc,c5)
+#define SD_MESSAGE_UNIT_STARTING_STR SD_ID128_MAKE_STR(7d,49,58,e8,42,da,4a,75,8f,6c,1c,dc,7b,36,dc,c5)
+#define SD_MESSAGE_UNIT_STARTED SD_ID128_MAKE(39,f5,34,79,d3,a0,45,ac,8e,11,78,62,48,23,1f,bf)
+#define SD_MESSAGE_UNIT_STARTED_STR SD_ID128_MAKE_STR(39,f5,34,79,d3,a0,45,ac,8e,11,78,62,48,23,1f,bf)
+#define SD_MESSAGE_UNIT_FAILED SD_ID128_MAKE(be,02,cf,68,55,d2,42,8b,a4,0d,f7,e9,d0,22,f0,3d)
+#define SD_MESSAGE_UNIT_FAILED_STR SD_ID128_MAKE_STR(be,02,cf,68,55,d2,42,8b,a4,0d,f7,e9,d0,22,f0,3d)
+#define SD_MESSAGE_UNIT_STOPPING SD_ID128_MAKE(de,5b,42,6a,63,be,47,a7,b6,ac,3e,aa,c8,2e,2f,6f)
+#define SD_MESSAGE_UNIT_STOPPING_STR SD_ID128_MAKE_STR(de,5b,42,6a,63,be,47,a7,b6,ac,3e,aa,c8,2e,2f,6f)
+#define SD_MESSAGE_UNIT_STOPPED SD_ID128_MAKE(9d,1a,aa,27,d6,01,40,bd,96,36,54,38,aa,d2,02,86)
+#define SD_MESSAGE_UNIT_STOPPED_STR SD_ID128_MAKE_STR(9d,1a,aa,27,d6,01,40,bd,96,36,54,38,aa,d2,02,86)
+#define SD_MESSAGE_UNIT_RELOADING SD_ID128_MAKE(d3,4d,03,7f,ff,18,47,e6,ae,66,9a,37,0e,69,47,25)
+#define SD_MESSAGE_UNIT_RELOADING_STR SD_ID128_MAKE_STR(d3,4d,03,7f,ff,18,47,e6,ae,66,9a,37,0e,69,47,25)
+#define SD_MESSAGE_UNIT_RELOADED SD_ID128_MAKE(7b,05,eb,c6,68,38,42,22,ba,a8,88,11,79,cf,da,54)
+#define SD_MESSAGE_UNIT_RELOADED_STR SD_ID128_MAKE_STR(7b,05,eb,c6,68,38,42,22,ba,a8,88,11,79,cf,da,54)
+
+#define SD_MESSAGE_UNIT_RESTART_SCHEDULED SD_ID128_MAKE(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)
+#define SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR \
+ SD_ID128_MAKE_STR(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)
+
+#define SD_MESSAGE_UNIT_RESOURCES SD_ID128_MAKE(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
+#define SD_MESSAGE_UNIT_RESOURCES_STR SD_ID128_MAKE_STR(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
+
+#define SD_MESSAGE_UNIT_SUCCESS SD_ID128_MAKE(7a,d2,d1,89,f7,e9,4e,70,a3,8c,78,13,54,91,24,48)
+#define SD_MESSAGE_UNIT_SUCCESS_STR SD_ID128_MAKE_STR(7a,d2,d1,89,f7,e9,4e,70,a3,8c,78,13,54,91,24,48)
+#define SD_MESSAGE_UNIT_FAILURE_RESULT SD_ID128_MAKE(d9,b3,73,ed,55,a6,4f,eb,82,42,e0,2d,be,79,a4,9c)
+#define SD_MESSAGE_UNIT_FAILURE_RESULT_STR \
+ SD_ID128_MAKE_STR(d9,b3,73,ed,55,a6,4f,eb,82,42,e0,2d,be,79,a4,9c)
+
+#define SD_MESSAGE_SPAWN_FAILED SD_ID128_MAKE(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
+#define SD_MESSAGE_SPAWN_FAILED_STR SD_ID128_MAKE_STR(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
+
+#define SD_MESSAGE_UNIT_PROCESS_EXIT SD_ID128_MAKE(98,e3,22,20,3f,7a,4e,d2,90,d0,9f,e0,3c,09,fe,15)
+#define SD_MESSAGE_UNIT_PROCESS_EXIT_STR SD_ID128_MAKE_STR(98,e3,22,20,3f,7a,4e,d2,90,d0,9f,e0,3c,09,fe,15)
+
+#define SD_MESSAGE_FORWARD_SYSLOG_MISSED SD_ID128_MAKE(00,27,22,9c,a0,64,41,81,a7,6c,4e,92,45,8a,fa,2e)
+#define SD_MESSAGE_FORWARD_SYSLOG_MISSED_STR \
+ SD_ID128_MAKE_STR(00,27,22,9c,a0,64,41,81,a7,6c,4e,92,45,8a,fa,2e)
+
+#define SD_MESSAGE_OVERMOUNTING SD_ID128_MAKE(1d,ee,03,69,c7,fc,47,36,b7,09,9b,38,ec,b4,6e,e7)
+#define SD_MESSAGE_OVERMOUNTING_STR SD_ID128_MAKE_STR(1d,ee,03,69,c7,fc,47,36,b7,09,9b,38,ec,b4,6e,e7)
+
+#define SD_MESSAGE_LID_OPENED SD_ID128_MAKE(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,6f)
+#define SD_MESSAGE_LID_OPENED_STR SD_ID128_MAKE_STR(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,6f)
+#define SD_MESSAGE_LID_CLOSED SD_ID128_MAKE(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,70)
+#define SD_MESSAGE_LID_CLOSED_STR SD_ID128_MAKE_STR(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,70)
+#define SD_MESSAGE_SYSTEM_DOCKED SD_ID128_MAKE(f5,f4,16,b8,62,07,4b,28,92,7a,48,c3,ba,7d,51,ff)
+#define SD_MESSAGE_SYSTEM_DOCKED_STR SD_ID128_MAKE_STR(f5,f4,16,b8,62,07,4b,28,92,7a,48,c3,ba,7d,51,ff)
+#define SD_MESSAGE_SYSTEM_UNDOCKED SD_ID128_MAKE(51,e1,71,bd,58,52,48,56,81,10,14,4c,51,7c,ca,53)
+#define SD_MESSAGE_SYSTEM_UNDOCKED_STR SD_ID128_MAKE_STR(51,e1,71,bd,58,52,48,56,81,10,14,4c,51,7c,ca,53)
+#define SD_MESSAGE_POWER_KEY SD_ID128_MAKE(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,71)
+#define SD_MESSAGE_POWER_KEY_STR SD_ID128_MAKE_STR(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,71)
+#define SD_MESSAGE_SUSPEND_KEY SD_ID128_MAKE(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,72)
+#define SD_MESSAGE_SUSPEND_KEY_STR SD_ID128_MAKE_STR(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,72)
+#define SD_MESSAGE_HIBERNATE_KEY SD_ID128_MAKE(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,73)
+#define SD_MESSAGE_HIBERNATE_KEY_STR SD_ID128_MAKE_STR(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,73)
+
+#define SD_MESSAGE_INVALID_CONFIGURATION SD_ID128_MAKE(c7,72,d2,4e,9a,88,4c,be,b9,ea,12,62,5c,30,6c,01)
+#define SD_MESSAGE_INVALID_CONFIGURATION_STR \
+ SD_ID128_MAKE_STR(c7,72,d2,4e,9a,88,4c,be,b9,ea,12,62,5c,30,6c,01)
+
+#define SD_MESSAGE_DNSSEC_FAILURE SD_ID128_MAKE(16,75,d7,f1,72,17,40,98,b1,10,8b,f8,c7,dc,8f,5d)
+#define SD_MESSAGE_DNSSEC_FAILURE_STR SD_ID128_MAKE_STR(16,75,d7,f1,72,17,40,98,b1,10,8b,f8,c7,dc,8f,5d)
+#define SD_MESSAGE_DNSSEC_TRUST_ANCHOR_REVOKED \
+ SD_ID128_MAKE(4d,44,08,cf,d0,d1,44,85,91,84,d1,e6,5d,7c,8a,65)
+#define SD_MESSAGE_DNSSEC_TRUST_ANCHOR_REVOKED_STR \
+ SD_ID128_MAKE_STR(4d,44,08,cf,d0,d1,44,85,91,84,d1,e6,5d,7c,8a,65)
+#define SD_MESSAGE_DNSSEC_DOWNGRADE SD_ID128_MAKE(36,db,2d,fa,5a,90,45,e1,bd,4a,f5,f9,3e,1c,f0,57)
+#define SD_MESSAGE_DNSSEC_DOWNGRADE_STR SD_ID128_MAKE_STR(36,db,2d,fa,5a,90,45,e1,bd,4a,f5,f9,3e,1c,f0,57)
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-ndisc.h b/src/systemd/sd-ndisc.h
new file mode 100644
index 0000000..d1bee34
--- /dev/null
+++ b/src/systemd/sd-ndisc.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdndiscfoo
+#define foosdndiscfoo
+
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/* Neightbor Discovery Options, RFC 4861, Section 4.6 and
+ * https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xhtml#icmpv6-parameters-5 */
+enum {
+ SD_NDISC_OPTION_SOURCE_LL_ADDRESS = 1,
+ SD_NDISC_OPTION_TARGET_LL_ADDRESS = 2,
+ SD_NDISC_OPTION_PREFIX_INFORMATION = 3,
+ SD_NDISC_OPTION_MTU = 5,
+ SD_NDISC_OPTION_ROUTE_INFORMATION = 24,
+ SD_NDISC_OPTION_RDNSS = 25,
+ SD_NDISC_OPTION_FLAGS_EXTENSION = 26,
+ SD_NDISC_OPTION_DNSSL = 31,
+ SD_NDISC_OPTION_CAPTIVE_PORTAL = 37,
+};
+
+/* Route preference, RFC 4191, Section 2.1 */
+enum {
+ SD_NDISC_PREFERENCE_LOW = 3U,
+ SD_NDISC_PREFERENCE_MEDIUM = 0U,
+ SD_NDISC_PREFERENCE_HIGH = 1U,
+};
+
+typedef struct sd_ndisc sd_ndisc;
+typedef struct sd_ndisc_router sd_ndisc_router;
+
+typedef enum sd_ndisc_event {
+ SD_NDISC_EVENT_TIMEOUT,
+ SD_NDISC_EVENT_ROUTER,
+ _SD_NDISC_EVENT_MAX,
+ _SD_NDISC_EVENT_INVALID = -1,
+} sd_ndisc_event;
+
+typedef void (*sd_ndisc_callback_t)(sd_ndisc *nd, sd_ndisc_event event, sd_ndisc_router *rt, void *userdata);
+
+int sd_ndisc_new(sd_ndisc **ret);
+sd_ndisc *sd_ndisc_ref(sd_ndisc *nd);
+sd_ndisc *sd_ndisc_unref(sd_ndisc *nd);
+
+int sd_ndisc_start(sd_ndisc *nd);
+int sd_ndisc_stop(sd_ndisc *nd);
+
+int sd_ndisc_attach_event(sd_ndisc *nd, sd_event *event, int64_t priority);
+int sd_ndisc_detach_event(sd_ndisc *nd);
+sd_event *sd_ndisc_get_event(sd_ndisc *nd);
+
+int sd_ndisc_set_callback(sd_ndisc *nd, sd_ndisc_callback_t cb, void *userdata);
+int sd_ndisc_set_ifindex(sd_ndisc *nd, int interface_index);
+int sd_ndisc_set_mac(sd_ndisc *nd, const struct ether_addr *mac_addr);
+
+int sd_ndisc_get_mtu(sd_ndisc *nd, uint32_t *ret);
+int sd_ndisc_get_hop_limit(sd_ndisc *nd, uint8_t *ret);
+
+int sd_ndisc_router_from_raw(sd_ndisc_router **ret, const void *raw, size_t raw_size);
+sd_ndisc_router *sd_ndisc_router_ref(sd_ndisc_router *rt);
+sd_ndisc_router *sd_ndisc_router_unref(sd_ndisc_router *rt);
+
+int sd_ndisc_router_get_address(sd_ndisc_router *rt, struct in6_addr *ret_addr);
+int sd_ndisc_router_get_timestamp(sd_ndisc_router *rt, clockid_t clock, uint64_t *ret);
+int sd_ndisc_router_get_raw(sd_ndisc_router *rt, const void **ret, size_t *size);
+
+int sd_ndisc_router_get_hop_limit(sd_ndisc_router *rt, uint8_t *ret);
+int sd_ndisc_router_get_flags(sd_ndisc_router *rt, uint64_t *ret_flags);
+int sd_ndisc_router_get_preference(sd_ndisc_router *rt, unsigned *ret);
+int sd_ndisc_router_get_lifetime(sd_ndisc_router *rt, uint16_t *ret_lifetime);
+int sd_ndisc_router_get_mtu(sd_ndisc_router *rt, uint32_t *ret);
+
+/* Generic option access */
+int sd_ndisc_router_option_rewind(sd_ndisc_router *rt);
+int sd_ndisc_router_option_next(sd_ndisc_router *rt);
+int sd_ndisc_router_option_get_type(sd_ndisc_router *rt, uint8_t *ret);
+int sd_ndisc_router_option_is_type(sd_ndisc_router *rt, uint8_t type);
+int sd_ndisc_router_option_get_raw(sd_ndisc_router *rt, const void **ret, size_t *size);
+
+/* Specific option access: SD_NDISC_OPTION_PREFIX_INFORMATION */
+int sd_ndisc_router_prefix_get_valid_lifetime(sd_ndisc_router *rt, uint32_t *ret);
+int sd_ndisc_router_prefix_get_preferred_lifetime(sd_ndisc_router *rt, uint32_t *ret);
+int sd_ndisc_router_prefix_get_flags(sd_ndisc_router *rt, uint8_t *ret);
+int sd_ndisc_router_prefix_get_address(sd_ndisc_router *rt, struct in6_addr *ret_addr);
+int sd_ndisc_router_prefix_get_prefixlen(sd_ndisc_router *rt, unsigned *prefixlen);
+
+/* Specific option access: SD_NDISC_OPTION_ROUTE_INFORMATION */
+int sd_ndisc_router_route_get_lifetime(sd_ndisc_router *rt, uint32_t *ret);
+int sd_ndisc_router_route_get_address(sd_ndisc_router *rt, struct in6_addr *ret_addr);
+int sd_ndisc_router_route_get_prefixlen(sd_ndisc_router *rt, unsigned *prefixlen);
+int sd_ndisc_router_route_get_preference(sd_ndisc_router *rt, unsigned *ret);
+
+/* Specific option access: SD_NDISC_OPTION_RDNSS */
+int sd_ndisc_router_rdnss_get_addresses(sd_ndisc_router *rt, const struct in6_addr **ret);
+int sd_ndisc_router_rdnss_get_lifetime(sd_ndisc_router *rt, uint32_t *ret);
+
+/* Specific option access: SD_NDISC_OPTION_DNSSL */
+int sd_ndisc_router_dnssl_get_domains(sd_ndisc_router *rt, char ***ret);
+int sd_ndisc_router_dnssl_get_lifetime(sd_ndisc_router *rt, uint32_t *ret);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_ndisc, sd_ndisc_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_ndisc_router, sd_ndisc_router_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-netlink.h b/src/systemd/sd-netlink.h
new file mode 100644
index 0000000..9e6e437
--- /dev/null
+++ b/src/systemd/sd-netlink.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdnetlinkhfoo
+#define foosdnetlinkhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <linux/neighbour.h>
+#include <linux/rtnetlink.h>
+#include <net/ethernet.h>
+#include <netinet/ether.h>
+#include <netinet/in.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_netlink sd_netlink;
+typedef struct sd_genl_socket sd_genl_socket;
+typedef struct sd_netlink_message sd_netlink_message;
+typedef struct sd_netlink_slot sd_netlink_slot;
+typedef enum {SD_GENL_ID_CTRL, SD_GENL_WIREGUARD, SD_GENL_FOU} sd_genl_family;
+
+/* callback */
+
+typedef int (*sd_netlink_message_handler_t)(sd_netlink *nl, sd_netlink_message *m, void *userdata);
+typedef _sd_destroy_t sd_netlink_destroy_t;
+
+/* bus */
+int sd_netlink_new_from_netlink(sd_netlink **nl, int fd);
+int sd_netlink_open(sd_netlink **nl);
+int sd_netlink_open_fd(sd_netlink **nl, int fd);
+int sd_netlink_inc_rcvbuf(sd_netlink *nl, const size_t size);
+
+sd_netlink *sd_netlink_ref(sd_netlink *nl);
+sd_netlink *sd_netlink_unref(sd_netlink *nl);
+
+int sd_netlink_send(sd_netlink *nl, sd_netlink_message *message, uint32_t *serial);
+int sd_netlink_call_async(sd_netlink *nl, sd_netlink_slot **ret_slot, sd_netlink_message *message,
+ sd_netlink_message_handler_t callback, sd_netlink_destroy_t destoy_callback,
+ void *userdata, uint64_t usec, const char *description);
+int sd_netlink_call(sd_netlink *nl, sd_netlink_message *message, uint64_t timeout,
+ sd_netlink_message **reply);
+
+int sd_netlink_get_events(sd_netlink *nl);
+int sd_netlink_get_timeout(sd_netlink *nl, uint64_t *timeout);
+int sd_netlink_process(sd_netlink *nl, sd_netlink_message **ret);
+int sd_netlink_wait(sd_netlink *nl, uint64_t timeout);
+
+int sd_netlink_add_match(sd_netlink *nl, sd_netlink_slot **ret_slot, uint16_t match,
+ sd_netlink_message_handler_t callback,
+ sd_netlink_destroy_t destroy_callback,
+ void *userdata, const char *description);
+
+int sd_netlink_attach_event(sd_netlink *nl, sd_event *e, int64_t priority);
+int sd_netlink_detach_event(sd_netlink *nl);
+
+int sd_netlink_message_append_string(sd_netlink_message *m, unsigned short type, const char *data);
+int sd_netlink_message_append_flag(sd_netlink_message *m, unsigned short type);
+int sd_netlink_message_append_u8(sd_netlink_message *m, unsigned short type, uint8_t data);
+int sd_netlink_message_append_u16(sd_netlink_message *m, unsigned short type, uint16_t data);
+int sd_netlink_message_append_u32(sd_netlink_message *m, unsigned short type, uint32_t data);
+int sd_netlink_message_append_data(sd_netlink_message *m, unsigned short type, const void *data, size_t len);
+int sd_netlink_message_append_in_addr(sd_netlink_message *m, unsigned short type, const struct in_addr *data);
+int sd_netlink_message_append_in6_addr(sd_netlink_message *m, unsigned short type, const struct in6_addr *data);
+int sd_netlink_message_append_sockaddr_in(sd_netlink_message *m, unsigned short type, const struct sockaddr_in *data);
+int sd_netlink_message_append_sockaddr_in6(sd_netlink_message *m, unsigned short type, const struct sockaddr_in6 *data);
+int sd_netlink_message_append_ether_addr(sd_netlink_message *m, unsigned short type, const struct ether_addr *data);
+int sd_netlink_message_append_cache_info(sd_netlink_message *m, unsigned short type, const struct ifa_cacheinfo *info);
+
+int sd_netlink_message_open_container(sd_netlink_message *m, unsigned short type);
+int sd_netlink_message_open_container_union(sd_netlink_message *m, unsigned short type, const char *key);
+int sd_netlink_message_close_container(sd_netlink_message *m);
+
+int sd_netlink_message_read(sd_netlink_message *m, unsigned short type, size_t size, void *data);
+int sd_netlink_message_read_string(sd_netlink_message *m, unsigned short type, const char **data);
+int sd_netlink_message_read_u8(sd_netlink_message *m, unsigned short type, uint8_t *data);
+int sd_netlink_message_read_u16(sd_netlink_message *m, unsigned short type, uint16_t *data);
+int sd_netlink_message_read_u32(sd_netlink_message *m, unsigned short type, uint32_t *data);
+int sd_netlink_message_read_ether_addr(sd_netlink_message *m, unsigned short type, struct ether_addr *data);
+int sd_netlink_message_read_cache_info(sd_netlink_message *m, unsigned short type, struct ifa_cacheinfo *info);
+int sd_netlink_message_read_in_addr(sd_netlink_message *m, unsigned short type, struct in_addr *data);
+int sd_netlink_message_read_in6_addr(sd_netlink_message *m, unsigned short type, struct in6_addr *data);
+int sd_netlink_message_enter_container(sd_netlink_message *m, unsigned short type);
+int sd_netlink_message_exit_container(sd_netlink_message *m);
+
+int sd_netlink_message_open_array(sd_netlink_message *m, uint16_t type);
+int sd_netlink_message_cancel_array(sd_netlink_message *m);
+
+int sd_netlink_message_rewind(sd_netlink_message *m);
+
+sd_netlink_message *sd_netlink_message_next(sd_netlink_message *m);
+
+sd_netlink_message *sd_netlink_message_ref(sd_netlink_message *m);
+sd_netlink_message *sd_netlink_message_unref(sd_netlink_message *m);
+
+int sd_netlink_message_request_dump(sd_netlink_message *m, int dump);
+int sd_netlink_message_is_error(sd_netlink_message *m);
+int sd_netlink_message_get_errno(sd_netlink_message *m);
+int sd_netlink_message_get_type(sd_netlink_message *m, uint16_t *type);
+int sd_netlink_message_set_flags(sd_netlink_message *m, uint16_t flags);
+int sd_netlink_message_is_broadcast(sd_netlink_message *m);
+
+/* rtnl */
+
+int sd_rtnl_message_new_link(sd_netlink *nl, sd_netlink_message **ret, uint16_t msg_type, int index);
+int sd_rtnl_message_new_addr_update(sd_netlink *nl, sd_netlink_message **ret, int index, int family);
+int sd_rtnl_message_new_addr(sd_netlink *nl, sd_netlink_message **ret, uint16_t msg_type, int index, int family);
+int sd_rtnl_message_new_route(sd_netlink *nl, sd_netlink_message **ret, uint16_t nlmsg_type, int rtm_family, unsigned char rtm_protocol);
+int sd_rtnl_message_new_neigh(sd_netlink *nl, sd_netlink_message **ret, uint16_t msg_type, int index, int nda_family);
+
+int sd_rtnl_message_get_family(sd_netlink_message *m, int *family);
+
+int sd_rtnl_message_addr_set_prefixlen(sd_netlink_message *m, unsigned char prefixlen);
+int sd_rtnl_message_addr_set_scope(sd_netlink_message *m, unsigned char scope);
+int sd_rtnl_message_addr_set_flags(sd_netlink_message *m, unsigned char flags);
+int sd_rtnl_message_addr_get_family(sd_netlink_message *m, int *family);
+int sd_rtnl_message_addr_get_prefixlen(sd_netlink_message *m, unsigned char *prefixlen);
+int sd_rtnl_message_addr_get_scope(sd_netlink_message *m, unsigned char *scope);
+int sd_rtnl_message_addr_get_flags(sd_netlink_message *m, unsigned char *flags);
+int sd_rtnl_message_addr_get_ifindex(sd_netlink_message *m, int *ifindex);
+
+int sd_rtnl_message_link_set_flags(sd_netlink_message *m, unsigned flags, unsigned change);
+int sd_rtnl_message_link_set_type(sd_netlink_message *m, unsigned type);
+int sd_rtnl_message_link_set_family(sd_netlink_message *m, unsigned family);
+int sd_rtnl_message_link_get_ifindex(sd_netlink_message *m, int *ifindex);
+int sd_rtnl_message_link_get_flags(sd_netlink_message *m, unsigned *flags);
+int sd_rtnl_message_link_get_type(sd_netlink_message *m, unsigned short *type);
+
+int sd_rtnl_message_route_set_dst_prefixlen(sd_netlink_message *m, unsigned char prefixlen);
+int sd_rtnl_message_route_set_src_prefixlen(sd_netlink_message *m, unsigned char prefixlen);
+int sd_rtnl_message_route_set_scope(sd_netlink_message *m, unsigned char scope);
+int sd_rtnl_message_route_set_flags(sd_netlink_message *m, unsigned flags);
+int sd_rtnl_message_route_set_table(sd_netlink_message *m, unsigned char table);
+int sd_rtnl_message_route_set_type(sd_netlink_message *m, unsigned char type);
+int sd_rtnl_message_route_get_flags(sd_netlink_message *m, unsigned *flags);
+int sd_rtnl_message_route_get_family(sd_netlink_message *m, int *family);
+int sd_rtnl_message_route_set_family(sd_netlink_message *m, int family);
+int sd_rtnl_message_route_get_protocol(sd_netlink_message *m, unsigned char *protocol);
+int sd_rtnl_message_route_get_scope(sd_netlink_message *m, unsigned char *scope);
+int sd_rtnl_message_route_get_tos(sd_netlink_message *m, unsigned char *tos);
+int sd_rtnl_message_route_get_table(sd_netlink_message *m, unsigned char *table);
+int sd_rtnl_message_route_get_dst_prefixlen(sd_netlink_message *m, unsigned char *dst_len);
+int sd_rtnl_message_route_get_src_prefixlen(sd_netlink_message *m, unsigned char *src_len);
+int sd_rtnl_message_route_get_type(sd_netlink_message *m, unsigned char *type);
+
+int sd_rtnl_message_neigh_set_flags(sd_netlink_message *m, uint8_t flags);
+int sd_rtnl_message_neigh_set_state(sd_netlink_message *m, uint16_t state);
+int sd_rtnl_message_neigh_get_family(sd_netlink_message *m, int *family);
+int sd_rtnl_message_neigh_get_ifindex(sd_netlink_message *m, int *family);
+int sd_rtnl_message_neigh_get_state(sd_netlink_message *m, uint16_t *state);
+int sd_rtnl_message_neigh_get_flags(sd_netlink_message *m, uint8_t *flags);
+
+int sd_rtnl_message_new_addrlabel(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int ifindex, int ifal_family);
+int sd_rtnl_message_addrlabel_set_prefixlen(sd_netlink_message *m, unsigned char prefixlen);
+int sd_rtnl_message_addrlabel_get_prefixlen(sd_netlink_message *m, unsigned char *prefixlen);
+
+int sd_rtnl_message_new_routing_policy_rule(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int ifal_family);
+int sd_rtnl_message_routing_policy_rule_set_tos(sd_netlink_message *m, unsigned char tos);
+int sd_rtnl_message_routing_policy_rule_get_tos(sd_netlink_message *m, unsigned char *tos);
+int sd_rtnl_message_routing_policy_rule_set_table(sd_netlink_message *m, unsigned char table);
+int sd_rtnl_message_routing_policy_rule_get_table(sd_netlink_message *m, unsigned char *table);
+int sd_rtnl_message_routing_policy_rule_set_rtm_src_prefixlen(sd_netlink_message *m, unsigned char len);
+int sd_rtnl_message_routing_policy_rule_get_rtm_src_prefixlen(sd_netlink_message *m, unsigned char *len);
+int sd_rtnl_message_routing_policy_rule_set_rtm_dst_prefixlen(sd_netlink_message *m, unsigned char len);
+int sd_rtnl_message_routing_policy_rule_get_rtm_dst_prefixlen(sd_netlink_message *m, unsigned char *len);
+int sd_rtnl_message_routing_policy_rule_set_rtm_type(sd_netlink_message *m, unsigned char type);
+int sd_rtnl_message_routing_policy_rule_get_rtm_type(sd_netlink_message *m, unsigned char *type);
+int sd_rtnl_message_routing_policy_rule_set_flags(sd_netlink_message *m, unsigned flags);
+int sd_rtnl_message_routing_policy_rule_get_flags(sd_netlink_message *m, unsigned *flags);
+
+/* genl */
+int sd_genl_socket_open(sd_netlink **nl);
+int sd_genl_message_new(sd_netlink *nl, sd_genl_family family, uint8_t cmd, sd_netlink_message **m);
+
+/* slot */
+sd_netlink_slot *sd_netlink_slot_ref(sd_netlink_slot *nl);
+sd_netlink_slot *sd_netlink_slot_unref(sd_netlink_slot *nl);
+
+sd_netlink *sd_netlink_slot_get_netlink(sd_netlink_slot *slot);
+void *sd_netlink_slot_get_userdata(sd_netlink_slot *slot);
+void *sd_netlink_slot_set_userdata(sd_netlink_slot *slot, void *userdata);
+int sd_netlink_slot_get_destroy_callback(sd_netlink_slot *slot, sd_netlink_destroy_t *callback);
+int sd_netlink_slot_set_destroy_callback(sd_netlink_slot *slot, sd_netlink_destroy_t callback);
+int sd_netlink_slot_get_floating(sd_netlink_slot *slot);
+int sd_netlink_slot_set_floating(sd_netlink_slot *slot, int b);
+int sd_netlink_slot_get_description(sd_netlink_slot *slot, const char **description);
+int sd_netlink_slot_set_description(sd_netlink_slot *slot, const char *description);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_netlink, sd_netlink_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_netlink_message, sd_netlink_message_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_netlink_slot, sd_netlink_slot_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-network.h b/src/systemd/sd-network.h
new file mode 100644
index 0000000..cc6bca9
--- /dev/null
+++ b/src/systemd/sd-network.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdnetworkhfoo
+#define foosdnetworkhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "_sd-common.h"
+
+/*
+ * A few points:
+ *
+ * Instead of returning an empty string array or empty integer array, we
+ * may return NULL.
+ *
+ * Free the data the library returns with libc free(). String arrays
+ * are NULL terminated, and you need to free the array itself in
+ * addition to the strings contained.
+ *
+ * We return error codes as negative errno, kernel-style. On success, we
+ * return 0 or positive.
+ *
+ * These functions access data in /run. This is a virtual file system;
+ * therefore, accesses are relatively cheap.
+ *
+ * See sd-network(3) for more information.
+ */
+
+_SD_BEGIN_DECLARATIONS;
+
+/* Get overall operational state
+ * Possible states: down, up, dormant, carrier, degraded, routable
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of any links
+ */
+int sd_network_get_operational_state(char **state);
+
+/* Get DNS entries for all links. These are string representations of
+ * IP addresses */
+int sd_network_get_dns(char ***dns);
+
+/* Get NTP entries for all links. These are domain names or string
+ * representations of IP addresses */
+int sd_network_get_ntp(char ***ntp);
+
+/* Get the search domains for all links. */
+int sd_network_get_search_domains(char ***domains);
+
+/* Get the search domains for all links. */
+int sd_network_get_route_domains(char ***domains);
+
+/* Get setup state from ifindex.
+ * Possible states:
+ * pending: udev is still processing the link, we don't yet know if we will manage it
+ * failed: networkd failed to manage the link
+ * configuring: in the process of retrieving configuration or configuring the link
+ * configured: link configured successfully
+ * unmanaged: networkd is not handling the link
+ * linger: the link is gone, but has not yet been dropped by networkd
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link
+ */
+int sd_network_link_get_setup_state(int ifindex, char **state);
+
+/* Get operational state from ifindex.
+ * Possible states:
+ * off: the device is powered down
+ * no-carrier: the device is powered up, but it does not yet have a carrier
+ * dormant: the device has a carrier, but is not yet ready for normal traffic
+ * carrier: the link has a carrier
+ * degraded: the link has carrier and addresses valid on the local link configured
+ * routable: the link has carrier and routable address configured
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link
+ */
+int sd_network_link_get_operational_state(int ifindex, char **state);
+
+/* Indicates whether the network is relevant to being online.
+ * Possible return codes:
+ * 0: the connection is not required
+ * 1: the connection is required to consider the system online
+ * <0: networkd is not aware of the link
+ */
+int sd_network_link_get_required_for_online(int ifindex);
+
+/* Get path to .network file applied to link */
+int sd_network_link_get_network_file(int ifindex, char **filename);
+
+/* Get DNS entries for a given link. These are string representations of
+ * IP addresses */
+int sd_network_link_get_dns(int ifindex, char ***ret);
+
+/* Get NTP entries for a given link. These are domain names or string
+ * representations of IP addresses */
+int sd_network_link_get_ntp(int ifindex, char ***ret);
+
+/* Indicates whether or not LLMNR should be enabled for the link
+ * Possible levels of support: yes, no, resolve
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link
+ */
+int sd_network_link_get_llmnr(int ifindex, char **llmnr);
+
+/* Indicates whether or not MulticastDNS should be enabled for the
+ * link.
+ * Possible levels of support: yes, no, resolve
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link
+ */
+int sd_network_link_get_mdns(int ifindex, char **mdns);
+
+/* Indicates whether or not DNS-over-TLS should be enabled for the
+ * link.
+ * Possible levels of support: strict, no, opportunistic
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link
+ */
+int sd_network_link_get_dns_over_tls(int ifindex, char **dns_over_tls);
+
+/* Indicates whether or not DNSSEC should be enabled for the link
+ * Possible levels of support: yes, no, allow-downgrade
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link
+ */
+int sd_network_link_get_dnssec(int ifindex, char **dnssec);
+
+/* Returns the list of per-interface DNSSEC negative trust anchors
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link, or has no such data
+ */
+int sd_network_link_get_dnssec_negative_trust_anchors(int ifindex, char ***nta);
+
+/* Get the search DNS domain names for a given link. */
+int sd_network_link_get_search_domains(int ifindex, char ***domains);
+
+/* Get the route DNS domain names for a given link. */
+int sd_network_link_get_route_domains(int ifindex, char ***domains);
+
+/* Get whether this link shall be used as 'default route' for DNS queries */
+int sd_network_link_get_dns_default_route(int ifindex);
+
+/* Get the carrier interface indexes to which current link is bound to. */
+int sd_network_link_get_carrier_bound_to(int ifindex, int **ifindexes);
+
+/* Get the CARRIERS that are bound to current link. */
+int sd_network_link_get_carrier_bound_by(int ifindex, int **ifindexes);
+
+/* Get the timezone that was learnt on a specific link. */
+int sd_network_link_get_timezone(int ifindex, char **timezone);
+
+/* Monitor object */
+typedef struct sd_network_monitor sd_network_monitor;
+
+/* Create a new monitor. Category must be NULL, "links" or "leases". */
+int sd_network_monitor_new(sd_network_monitor **ret, const char *category);
+
+/* Destroys the passed monitor. Returns NULL. */
+sd_network_monitor* sd_network_monitor_unref(sd_network_monitor *m);
+
+/* Flushes the monitor */
+int sd_network_monitor_flush(sd_network_monitor *m);
+
+/* Get FD from monitor */
+int sd_network_monitor_get_fd(sd_network_monitor *m);
+
+/* Get poll() mask to monitor */
+int sd_network_monitor_get_events(sd_network_monitor *m);
+
+/* Get timeout for poll(), as usec value relative to CLOCK_MONOTONIC's epoch */
+int sd_network_monitor_get_timeout(sd_network_monitor *m, uint64_t *timeout_usec);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_network_monitor, sd_network_monitor_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-path.h b/src/systemd/sd-path.h
new file mode 100644
index 0000000..1637987
--- /dev/null
+++ b/src/systemd/sd-path.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdpathhfoo
+#define foosdpathhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+enum {
+ /* Temporary files */
+ SD_PATH_TEMPORARY,
+ SD_PATH_TEMPORARY_LARGE,
+
+ /* Vendor supplied data */
+ SD_PATH_SYSTEM_BINARIES,
+ SD_PATH_SYSTEM_INCLUDE,
+ SD_PATH_SYSTEM_LIBRARY_PRIVATE,
+ SD_PATH_SYSTEM_LIBRARY_ARCH,
+ SD_PATH_SYSTEM_SHARED,
+ SD_PATH_SYSTEM_CONFIGURATION_FACTORY,
+ SD_PATH_SYSTEM_STATE_FACTORY,
+
+ /* System configuration, runtime, state, ... */
+ SD_PATH_SYSTEM_CONFIGURATION,
+ SD_PATH_SYSTEM_RUNTIME,
+ SD_PATH_SYSTEM_RUNTIME_LOGS,
+ SD_PATH_SYSTEM_STATE_PRIVATE,
+ SD_PATH_SYSTEM_STATE_LOGS,
+ SD_PATH_SYSTEM_STATE_CACHE,
+ SD_PATH_SYSTEM_STATE_SPOOL,
+
+ /* Vendor supplied data */
+ SD_PATH_USER_BINARIES,
+ SD_PATH_USER_LIBRARY_PRIVATE,
+ SD_PATH_USER_LIBRARY_ARCH,
+ SD_PATH_USER_SHARED,
+
+ /* User configuration, state, runtime ... */
+ SD_PATH_USER_CONFIGURATION, /* takes both actual configuration (like /etc) and state (like /var/lib) */
+ SD_PATH_USER_RUNTIME,
+ SD_PATH_USER_STATE_CACHE,
+
+ /* User resources */
+ SD_PATH_USER, /* $HOME itself */
+ SD_PATH_USER_DOCUMENTS,
+ SD_PATH_USER_MUSIC,
+ SD_PATH_USER_PICTURES,
+ SD_PATH_USER_VIDEOS,
+ SD_PATH_USER_DOWNLOAD,
+ SD_PATH_USER_PUBLIC,
+ SD_PATH_USER_TEMPLATES,
+ SD_PATH_USER_DESKTOP,
+
+ /* Search paths */
+ SD_PATH_SEARCH_BINARIES,
+ SD_PATH_SEARCH_BINARIES_DEFAULT,
+ SD_PATH_SEARCH_LIBRARY_PRIVATE,
+ SD_PATH_SEARCH_LIBRARY_ARCH,
+ SD_PATH_SEARCH_SHARED,
+ SD_PATH_SEARCH_CONFIGURATION_FACTORY,
+ SD_PATH_SEARCH_STATE_FACTORY,
+ SD_PATH_SEARCH_CONFIGURATION,
+
+ _SD_PATH_MAX,
+};
+
+int sd_path_home(uint64_t type, const char *suffix, char **path);
+int sd_path_search(uint64_t type, const char *suffix, char ***paths);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-radv.h b/src/systemd/sd-radv.h
new file mode 100644
index 0000000..93861b9
--- /dev/null
+++ b/src/systemd/sd-radv.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdradvfoo
+#define foosdradvfoo
+
+/***
+ Copyright © 2017 Intel Corporation. All rights reserved.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+
+#include "_sd-common.h"
+#include "sd-event.h"
+#include "sd-ndisc.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+#define SD_RADV_DEFAULT_MIN_TIMEOUT_USEC (200*USEC_PER_SEC)
+#define SD_RADV_DEFAULT_MAX_TIMEOUT_USEC (600*USEC_PER_SEC)
+
+#define SD_RADV_DEFAULT_DNS_LIFETIME_USEC (3*SD_RADV_DEFAULT_MAX_TIMEOUT_USEC)
+
+typedef struct sd_radv sd_radv;
+typedef struct sd_radv_prefix sd_radv_prefix;
+
+/* Router Advertisement */
+int sd_radv_new(sd_radv **ret);
+sd_radv *sd_radv_ref(sd_radv *ra);
+sd_radv *sd_radv_unref(sd_radv *ra);
+
+int sd_radv_attach_event(sd_radv *ra, sd_event *event, int64_t priority);
+int sd_radv_detach_event(sd_radv *nd);
+sd_event *sd_radv_get_event(sd_radv *ra);
+
+int sd_radv_start(sd_radv *ra);
+int sd_radv_stop(sd_radv *ra);
+
+int sd_radv_set_ifindex(sd_radv *ra, int interface_index);
+int sd_radv_set_mac(sd_radv *ra, const struct ether_addr *mac_addr);
+int sd_radv_set_mtu(sd_radv *ra, uint32_t mtu);
+int sd_radv_set_hop_limit(sd_radv *ra, uint8_t hop_limit);
+int sd_radv_set_router_lifetime(sd_radv *ra, uint32_t router_lifetime);
+int sd_radv_set_managed_information(sd_radv *ra, int managed);
+int sd_radv_set_other_information(sd_radv *ra, int other);
+int sd_radv_set_preference(sd_radv *ra, unsigned preference);
+int sd_radv_add_prefix(sd_radv *ra, sd_radv_prefix *p, int dynamic);
+sd_radv_prefix *sd_radv_remove_prefix(sd_radv *ra, const struct in6_addr *prefix,
+ unsigned char prefixlen);
+int sd_radv_set_rdnss(sd_radv *ra, uint32_t lifetime,
+ const struct in6_addr *dns, size_t n_dns);
+int sd_radv_set_dnssl(sd_radv *ra, uint32_t lifetime, char **search_list);
+
+/* Advertised prefixes */
+int sd_radv_prefix_new(sd_radv_prefix **ret);
+sd_radv_prefix *sd_radv_prefix_ref(sd_radv_prefix *ra);
+sd_radv_prefix *sd_radv_prefix_unref(sd_radv_prefix *ra);
+
+int sd_radv_prefix_set_prefix(sd_radv_prefix *p, const struct in6_addr *in6_addr,
+ unsigned char prefixlen);
+int sd_radv_prefix_set_onlink(sd_radv_prefix *p, int onlink);
+int sd_radv_prefix_set_address_autoconfiguration(sd_radv_prefix *p,
+ int address_autoconfiguration);
+int sd_radv_prefix_set_valid_lifetime(sd_radv_prefix *p,
+ uint32_t valid_lifetime);
+int sd_radv_prefix_set_preferred_lifetime(sd_radv_prefix *p,
+ uint32_t preferred_lifetime);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_radv, sd_radv_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_radv_prefix, sd_radv_prefix_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-resolve.h b/src/systemd/sd-resolve.h
new file mode 100644
index 0000000..d78e8db
--- /dev/null
+++ b/src/systemd/sd-resolve.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdresolvehfoo
+#define foosdresolvehfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+/* 'struct addrinfo' needs _GNU_SOURCE */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+
+#include <inttypes.h>
+#include <netdb.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/* An opaque sd-resolve session structure */
+typedef struct sd_resolve sd_resolve;
+
+/* An opaque sd-resolve query structure */
+typedef struct sd_resolve_query sd_resolve_query;
+
+/* A callback on completion */
+typedef int (*sd_resolve_getaddrinfo_handler_t)(sd_resolve_query *q, int ret, const struct addrinfo *ai, void *userdata);
+typedef int (*sd_resolve_getnameinfo_handler_t)(sd_resolve_query *q, int ret, const char *host, const char *serv, void *userdata);
+typedef _sd_destroy_t sd_resolve_destroy_t;
+
+enum {
+ SD_RESOLVE_GET_HOST = 1 << 0,
+ SD_RESOLVE_GET_SERVICE = 1 << 1,
+ SD_RESOLVE_GET_BOTH = SD_RESOLVE_GET_HOST | SD_RESOLVE_GET_SERVICE,
+};
+
+int sd_resolve_default(sd_resolve **ret);
+
+/* Allocate a new sd-resolve session. */
+int sd_resolve_new(sd_resolve **ret);
+
+/* Free a sd-resolve session. This destroys all attached
+ * sd_resolve_query objects automatically. */
+sd_resolve* sd_resolve_unref(sd_resolve *resolve);
+sd_resolve* sd_resolve_ref(sd_resolve *resolve);
+
+/* Return the UNIX file descriptor to poll() for events on. Use this
+ * function to integrate sd-resolve with your custom main loop. */
+int sd_resolve_get_fd(sd_resolve *resolve);
+
+/* Return the poll() events (a combination of flags like POLLIN,
+ * POLLOUT, ...) to check for. */
+int sd_resolve_get_events(sd_resolve *resolve);
+
+/* Return the poll() timeout to pass. Returns (uint64_t) -1 as
+ * timeout if no timeout is needed. */
+int sd_resolve_get_timeout(sd_resolve *resolve, uint64_t *timeout_usec);
+
+/* Process pending responses. After this function is called, you can
+ * get the next completed query object(s) using
+ * sd_resolve_get_next(). */
+int sd_resolve_process(sd_resolve *resolve);
+
+/* Wait for a resolve event to complete. */
+int sd_resolve_wait(sd_resolve *resolve, uint64_t timeout_usec);
+
+int sd_resolve_get_tid(sd_resolve *resolve, pid_t *tid);
+
+int sd_resolve_attach_event(sd_resolve *resolve, sd_event *e, int64_t priority);
+int sd_resolve_detach_event(sd_resolve *resolve);
+sd_event *sd_resolve_get_event(sd_resolve *resolve);
+
+/* Issue a name-to-address query on the specified session. The
+ * arguments are compatible with those of libc's
+ * getaddrinfo(3). The function returns a new query object. When the
+ * query is completed, you may retrieve the results using
+ * sd_resolve_getaddrinfo_done(). */
+int sd_resolve_getaddrinfo(sd_resolve *resolve, sd_resolve_query **q, const char *node, const char *service, const struct addrinfo *hints, sd_resolve_getaddrinfo_handler_t callback, void *userdata);
+
+/* Issue an address-to-name query on the specified session. The
+ * arguments are compatible with those of libc's
+ * getnameinfo(3). The function returns a new query object. When the
+ * query is completed, you may retrieve the results using
+ * sd_resolve_getnameinfo_done(). Set gethost (resp. getserv) to non-zero
+ * if you want to query the hostname (resp. the service name). */
+int sd_resolve_getnameinfo(sd_resolve *resolve, sd_resolve_query **q, const struct sockaddr *sa, socklen_t salen, int flags, uint64_t get, sd_resolve_getnameinfo_handler_t callback, void *userdata);
+
+sd_resolve_query *sd_resolve_query_ref(sd_resolve_query *q);
+sd_resolve_query *sd_resolve_query_unref(sd_resolve_query *q);
+
+/* Returns non-zero when the query operation specified by q has been completed. */
+int sd_resolve_query_is_done(sd_resolve_query *q);
+
+void *sd_resolve_query_get_userdata(sd_resolve_query *q);
+void *sd_resolve_query_set_userdata(sd_resolve_query *q, void *userdata);
+int sd_resolve_query_get_destroy_callback(sd_resolve_query *q, sd_resolve_destroy_t *destroy_callback);
+int sd_resolve_query_set_destroy_callback(sd_resolve_query *q, sd_resolve_destroy_t destroy_callback);
+int sd_resolve_query_get_floating(sd_resolve_query *q);
+int sd_resolve_query_set_floating(sd_resolve_query *q, int b);
+
+sd_resolve *sd_resolve_query_get_resolve(sd_resolve_query *q);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_resolve, sd_resolve_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_resolve_query, sd_resolve_query_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-utf8.h b/src/systemd/sd-utf8.h
new file mode 100644
index 0000000..151b423
--- /dev/null
+++ b/src/systemd/sd-utf8.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#ifndef foosdutf8hfoo
+#define foosdutf8hfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+_sd_pure_ const char *sd_utf8_is_valid(const char *s);
+_sd_pure_ const char *sd_ascii_is_valid(const char *s);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/sysusers/sysusers.c b/src/sysusers/sysusers.c
new file mode 100644
index 0000000..df28bcf
--- /dev/null
+++ b/src/sysusers/sysusers.c
@@ -0,0 +1,1997 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <utmp.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "copy.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "main-func.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "set.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util-label.h"
+#include "uid-range.h"
+#include "user-util.h"
+#include "utf8.h"
+#include "util.h"
+
+typedef enum ItemType {
+ ADD_USER = 'u',
+ ADD_GROUP = 'g',
+ ADD_MEMBER = 'm',
+ ADD_RANGE = 'r',
+} ItemType;
+
+typedef struct Item {
+ ItemType type;
+
+ char *name;
+ char *uid_path;
+ char *gid_path;
+ char *description;
+ char *home;
+ char *shell;
+
+ gid_t gid;
+ uid_t uid;
+
+ bool gid_set:1;
+
+ /* When set the group with the specified gid must exist
+ * and the check if a uid clashes with the gid is skipped.
+ */
+ bool id_set_strict:1;
+
+ bool uid_set:1;
+
+ bool todo_user:1;
+ bool todo_group:1;
+} Item;
+
+static char *arg_root = NULL;
+static bool arg_cat_config = false;
+static const char *arg_replace = NULL;
+static bool arg_inline = false;
+static PagerFlags arg_pager_flags = 0;
+
+static OrderedHashmap *users = NULL, *groups = NULL;
+static OrderedHashmap *todo_uids = NULL, *todo_gids = NULL;
+static OrderedHashmap *members = NULL;
+
+static Hashmap *database_by_uid = NULL, *database_by_username = NULL;
+static Hashmap *database_by_gid = NULL, *database_by_groupname = NULL;
+static Set *database_users = NULL, *database_groups = NULL;
+
+static uid_t search_uid = UID_INVALID;
+static UidRange *uid_range = NULL;
+static unsigned n_uid_range = 0;
+
+STATIC_DESTRUCTOR_REGISTER(groups, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(users, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(members, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(todo_uids, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(todo_gids, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(database_by_uid, hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(database_by_username, hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(database_users, set_free_freep);
+STATIC_DESTRUCTOR_REGISTER(database_by_gid, hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(database_by_groupname, hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(database_groups, set_free_freep);
+STATIC_DESTRUCTOR_REGISTER(uid_range, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
+
+static int load_user_database(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *passwd_path;
+ struct passwd *pw;
+ int r;
+
+ passwd_path = prefix_roota(arg_root, "/etc/passwd");
+ f = fopen(passwd_path, "re");
+ if (!f)
+ return errno == ENOENT ? 0 : -errno;
+
+ r = hashmap_ensure_allocated(&database_by_username, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&database_by_uid, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_allocated(&database_users, NULL);
+ if (r < 0)
+ return r;
+
+ while ((r = fgetpwent_sane(f, &pw)) > 0) {
+ char *n;
+ int k, q;
+
+ n = strdup(pw->pw_name);
+ if (!n)
+ return -ENOMEM;
+
+ k = set_put(database_users, n);
+ if (k < 0) {
+ free(n);
+ return k;
+ }
+
+ k = hashmap_put(database_by_username, n, UID_TO_PTR(pw->pw_uid));
+ if (k < 0 && k != -EEXIST)
+ return k;
+
+ q = hashmap_put(database_by_uid, UID_TO_PTR(pw->pw_uid), n);
+ if (q < 0 && q != -EEXIST)
+ return q;
+ }
+ return r;
+}
+
+static int load_group_database(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *group_path;
+ struct group *gr;
+ int r;
+
+ group_path = prefix_roota(arg_root, "/etc/group");
+ f = fopen(group_path, "re");
+ if (!f)
+ return errno == ENOENT ? 0 : -errno;
+
+ r = hashmap_ensure_allocated(&database_by_groupname, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&database_by_gid, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_allocated(&database_groups, NULL);
+ if (r < 0)
+ return r;
+
+ while ((r = fgetgrent_sane(f, &gr)) > 0) {
+ char *n;
+ int k, q;
+
+ n = strdup(gr->gr_name);
+ if (!n)
+ return -ENOMEM;
+
+ k = set_put(database_groups, n);
+ if (k < 0) {
+ free(n);
+ return k;
+ }
+
+ k = hashmap_put(database_by_groupname, n, GID_TO_PTR(gr->gr_gid));
+ if (k < 0 && k != -EEXIST)
+ return k;
+
+ q = hashmap_put(database_by_gid, GID_TO_PTR(gr->gr_gid), n);
+ if (q < 0 && q != -EEXIST)
+ return q;
+ }
+ return r;
+}
+
+static int make_backup(const char *target, const char *x) {
+ _cleanup_close_ int src = -1;
+ _cleanup_fclose_ FILE *dst = NULL;
+ _cleanup_free_ char *temp = NULL;
+ char *backup;
+ struct timespec ts[2];
+ struct stat st;
+ int r;
+
+ src = open(x, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (src < 0) {
+ if (errno == ENOENT) /* No backup necessary... */
+ return 0;
+
+ return -errno;
+ }
+
+ if (fstat(src, &st) < 0)
+ return -errno;
+
+ r = fopen_temporary_label(target, x, &dst, &temp);
+ if (r < 0)
+ return r;
+
+ r = copy_bytes(src, fileno(dst), (uint64_t) -1, COPY_REFLINK);
+ if (r < 0)
+ goto fail;
+
+ /* Don't fail on chmod() or chown(). If it stays owned by us
+ * and/or unreadable by others, then it isn't too bad... */
+
+ backup = strjoina(x, "-");
+
+ /* Copy over the access mask */
+ r = fchmod_and_chown(fileno(dst), st.st_mode & 07777, st.st_uid, st.st_gid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to change access mode or ownership of %s: %m", backup);
+
+ ts[0] = st.st_atim;
+ ts[1] = st.st_mtim;
+ if (futimens(fileno(dst), ts) < 0)
+ log_warning_errno(errno, "Failed to fix access and modification time of %s: %m", backup);
+
+ r = fflush_sync_and_check(dst);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp, backup) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ unlink(temp);
+ return r;
+}
+
+static int putgrent_with_members(const struct group *gr, FILE *group) {
+ char **a;
+
+ assert(gr);
+ assert(group);
+
+ a = ordered_hashmap_get(members, gr->gr_name);
+ if (a) {
+ _cleanup_strv_free_ char **l = NULL;
+ bool added = false;
+ char **i;
+
+ l = strv_copy(gr->gr_mem);
+ if (!l)
+ return -ENOMEM;
+
+ STRV_FOREACH(i, a) {
+ if (strv_find(l, *i))
+ continue;
+
+ if (strv_extend(&l, *i) < 0)
+ return -ENOMEM;
+
+ added = true;
+ }
+
+ if (added) {
+ struct group t;
+ int r;
+
+ strv_uniq(l);
+ strv_sort(l);
+
+ t = *gr;
+ t.gr_mem = l;
+
+ r = putgrent_sane(&t, group);
+ return r < 0 ? r : 1;
+ }
+ }
+
+ return putgrent_sane(gr, group);
+}
+
+#if ENABLE_GSHADOW
+static int putsgent_with_members(const struct sgrp *sg, FILE *gshadow) {
+ char **a;
+
+ assert(sg);
+ assert(gshadow);
+
+ a = ordered_hashmap_get(members, sg->sg_namp);
+ if (a) {
+ _cleanup_strv_free_ char **l = NULL;
+ bool added = false;
+ char **i;
+
+ l = strv_copy(sg->sg_mem);
+ if (!l)
+ return -ENOMEM;
+
+ STRV_FOREACH(i, a) {
+ if (strv_find(l, *i))
+ continue;
+
+ if (strv_extend(&l, *i) < 0)
+ return -ENOMEM;
+
+ added = true;
+ }
+
+ if (added) {
+ struct sgrp t;
+ int r;
+
+ strv_uniq(l);
+ strv_sort(l);
+
+ t = *sg;
+ t.sg_mem = l;
+
+ r = putsgent_sane(&t, gshadow);
+ return r < 0 ? r : 1;
+ }
+ }
+
+ return putsgent_sane(sg, gshadow);
+}
+#endif
+
+static int sync_rights(FILE *from, FILE *to) {
+ struct stat st;
+
+ if (fstat(fileno(from), &st) < 0)
+ return -errno;
+
+ return fchmod_and_chown(fileno(to), st.st_mode & 07777, st.st_uid, st.st_gid);
+}
+
+static int rename_and_apply_smack(const char *temp_path, const char *dest_path) {
+ int r = 0;
+ if (rename(temp_path, dest_path) < 0)
+ return -errno;
+
+#ifdef SMACK_RUN_LABEL
+ r = mac_smack_apply(dest_path, SMACK_ATTR_ACCESS, SMACK_FLOOR_LABEL);
+ if (r < 0)
+ return r;
+#endif
+ return r;
+}
+
+static const char* default_shell(uid_t uid) {
+ return uid == 0 ? "/bin/sh" : "/sbin/nologin";
+}
+
+static int write_temporary_passwd(const char *passwd_path, FILE **tmpfile, char **tmpfile_path) {
+ _cleanup_fclose_ FILE *original = NULL, *passwd = NULL;
+ _cleanup_(unlink_and_freep) char *passwd_tmp = NULL;
+ struct passwd *pw = NULL;
+ Iterator iterator;
+ Item *i;
+ int r;
+
+ if (ordered_hashmap_size(todo_uids) == 0)
+ return 0;
+
+ r = fopen_temporary_label("/etc/passwd", passwd_path, &passwd, &passwd_tmp);
+ if (r < 0)
+ return r;
+
+ original = fopen(passwd_path, "re");
+ if (original) {
+
+ r = sync_rights(original, passwd);
+ if (r < 0)
+ return r;
+
+ while ((r = fgetpwent_sane(original, &pw)) > 0) {
+
+ i = ordered_hashmap_get(users, pw->pw_name);
+ if (i && i->todo_user) {
+ log_error("%s: User \"%s\" already exists.", passwd_path, pw->pw_name);
+ return -EEXIST;
+ }
+
+ if (ordered_hashmap_contains(todo_uids, UID_TO_PTR(pw->pw_uid))) {
+ log_error("%s: Detected collision for UID " UID_FMT ".", passwd_path, pw->pw_uid);
+ return -EEXIST;
+ }
+
+ /* Make sure we keep the NIS entries (if any) at the end. */
+ if (IN_SET(pw->pw_name[0], '+', '-'))
+ break;
+
+ r = putpwent_sane(pw, passwd);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ } else {
+ if (errno != ENOENT)
+ return -errno;
+ if (fchmod(fileno(passwd), 0644) < 0)
+ return -errno;
+ }
+
+ ORDERED_HASHMAP_FOREACH(i, todo_uids, iterator) {
+ struct passwd n = {
+ .pw_name = i->name,
+ .pw_uid = i->uid,
+ .pw_gid = i->gid,
+ .pw_gecos = i->description,
+
+ /* "x" means the password is stored in the shadow file */
+ .pw_passwd = (char*) "x",
+
+ /* We default to the root directory as home */
+ .pw_dir = i->home ? i->home : (char*) "/",
+
+ /* Initialize the shell to nologin, with one exception:
+ * for root we patch in something special */
+ .pw_shell = i->shell ?: (char*) default_shell(i->uid),
+ };
+
+ r = putpwent_sane(&n, passwd);
+ if (r < 0)
+ return r;
+ }
+
+ /* Append the remaining NIS entries if any */
+ while (pw) {
+ r = putpwent_sane(pw, passwd);
+ if (r < 0)
+ return r;
+
+ r = fgetpwent_sane(original, &pw);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ r = fflush_and_check(passwd);
+ if (r < 0)
+ return r;
+
+ *tmpfile = TAKE_PTR(passwd);
+ *tmpfile_path = TAKE_PTR(passwd_tmp);
+
+ return 0;
+}
+
+static int write_temporary_shadow(const char *shadow_path, FILE **tmpfile, char **tmpfile_path) {
+ _cleanup_fclose_ FILE *original = NULL, *shadow = NULL;
+ _cleanup_(unlink_and_freep) char *shadow_tmp = NULL;
+ struct spwd *sp = NULL;
+ Iterator iterator;
+ long lstchg;
+ Item *i;
+ int r;
+
+ if (ordered_hashmap_size(todo_uids) == 0)
+ return 0;
+
+ r = fopen_temporary_label("/etc/shadow", shadow_path, &shadow, &shadow_tmp);
+ if (r < 0)
+ return r;
+
+ lstchg = (long) (now(CLOCK_REALTIME) / USEC_PER_DAY);
+
+ original = fopen(shadow_path, "re");
+ if (original) {
+
+ r = sync_rights(original, shadow);
+ if (r < 0)
+ return r;
+
+ while ((r = fgetspent_sane(original, &sp)) > 0) {
+
+ i = ordered_hashmap_get(users, sp->sp_namp);
+ if (i && i->todo_user) {
+ /* we will update the existing entry */
+ sp->sp_lstchg = lstchg;
+
+ /* only the /etc/shadow stage is left, so we can
+ * safely remove the item from the todo set */
+ i->todo_user = false;
+ ordered_hashmap_remove(todo_uids, UID_TO_PTR(i->uid));
+ }
+
+ /* Make sure we keep the NIS entries (if any) at the end. */
+ if (IN_SET(sp->sp_namp[0], '+', '-'))
+ break;
+
+ r = putspent_sane(sp, shadow);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ } else {
+ if (errno != ENOENT)
+ return -errno;
+ if (fchmod(fileno(shadow), 0000) < 0)
+ return -errno;
+ }
+
+ ORDERED_HASHMAP_FOREACH(i, todo_uids, iterator) {
+ struct spwd n = {
+ .sp_namp = i->name,
+ .sp_pwdp = (char*) "!!",
+ .sp_lstchg = lstchg,
+ .sp_min = -1,
+ .sp_max = -1,
+ .sp_warn = -1,
+ .sp_inact = -1,
+ .sp_expire = -1,
+ .sp_flag = (unsigned long) -1, /* this appears to be what everybody does ... */
+ };
+
+ r = putspent_sane(&n, shadow);
+ if (r < 0)
+ return r;
+ }
+
+ /* Append the remaining NIS entries if any */
+ while (sp) {
+ r = putspent_sane(sp, shadow);
+ if (r < 0)
+ return r;
+
+ r = fgetspent_sane(original, &sp);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+ if (!IN_SET(errno, 0, ENOENT))
+ return -errno;
+
+ r = fflush_sync_and_check(shadow);
+ if (r < 0)
+ return r;
+
+ *tmpfile = TAKE_PTR(shadow);
+ *tmpfile_path = TAKE_PTR(shadow_tmp);
+
+ return 0;
+}
+
+static int write_temporary_group(const char *group_path, FILE **tmpfile, char **tmpfile_path) {
+ _cleanup_fclose_ FILE *original = NULL, *group = NULL;
+ _cleanup_(unlink_and_freep) char *group_tmp = NULL;
+ bool group_changed = false;
+ struct group *gr = NULL;
+ Iterator iterator;
+ Item *i;
+ int r;
+
+ if (ordered_hashmap_size(todo_gids) == 0 && ordered_hashmap_size(members) == 0)
+ return 0;
+
+ r = fopen_temporary_label("/etc/group", group_path, &group, &group_tmp);
+ if (r < 0)
+ return r;
+
+ original = fopen(group_path, "re");
+ if (original) {
+
+ r = sync_rights(original, group);
+ if (r < 0)
+ return r;
+
+ while ((r = fgetgrent_sane(original, &gr)) > 0) {
+ /* Safety checks against name and GID collisions. Normally,
+ * this should be unnecessary, but given that we look at the
+ * entries anyway here, let's make an extra verification
+ * step that we don't generate duplicate entries. */
+
+ i = ordered_hashmap_get(groups, gr->gr_name);
+ if (i && i->todo_group) {
+ log_error("%s: Group \"%s\" already exists.", group_path, gr->gr_name);
+ return -EEXIST;
+ }
+
+ if (ordered_hashmap_contains(todo_gids, GID_TO_PTR(gr->gr_gid))) {
+ log_error("%s: Detected collision for GID " GID_FMT ".", group_path, gr->gr_gid);
+ return -EEXIST;
+ }
+
+ /* Make sure we keep the NIS entries (if any) at the end. */
+ if (IN_SET(gr->gr_name[0], '+', '-'))
+ break;
+
+ r = putgrent_with_members(gr, group);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ group_changed = true;
+ }
+ if (r < 0)
+ return r;
+
+ } else {
+ if (errno != ENOENT)
+ return -errno;
+ if (fchmod(fileno(group), 0644) < 0)
+ return -errno;
+ }
+
+ ORDERED_HASHMAP_FOREACH(i, todo_gids, iterator) {
+ struct group n = {
+ .gr_name = i->name,
+ .gr_gid = i->gid,
+ .gr_passwd = (char*) "x",
+ };
+
+ r = putgrent_with_members(&n, group);
+ if (r < 0)
+ return r;
+
+ group_changed = true;
+ }
+
+ /* Append the remaining NIS entries if any */
+ while (gr) {
+ r = putgrent_sane(gr, group);
+ if (r < 0)
+ return r;
+
+ r = fgetgrent_sane(original, &gr);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ r = fflush_sync_and_check(group);
+ if (r < 0)
+ return r;
+
+ if (group_changed) {
+ *tmpfile = TAKE_PTR(group);
+ *tmpfile_path = TAKE_PTR(group_tmp);
+ }
+ return 0;
+}
+
+static int write_temporary_gshadow(const char * gshadow_path, FILE **tmpfile, char **tmpfile_path) {
+#if ENABLE_GSHADOW
+ _cleanup_fclose_ FILE *original = NULL, *gshadow = NULL;
+ _cleanup_(unlink_and_freep) char *gshadow_tmp = NULL;
+ bool group_changed = false;
+ Iterator iterator;
+ Item *i;
+ int r;
+
+ if (ordered_hashmap_size(todo_gids) == 0 && ordered_hashmap_size(members) == 0)
+ return 0;
+
+ r = fopen_temporary_label("/etc/gshadow", gshadow_path, &gshadow, &gshadow_tmp);
+ if (r < 0)
+ return r;
+
+ original = fopen(gshadow_path, "re");
+ if (original) {
+ struct sgrp *sg;
+
+ r = sync_rights(original, gshadow);
+ if (r < 0)
+ return r;
+
+ while ((r = fgetsgent_sane(original, &sg)) > 0) {
+
+ i = ordered_hashmap_get(groups, sg->sg_namp);
+ if (i && i->todo_group) {
+ log_error("%s: Group \"%s\" already exists.", gshadow_path, sg->sg_namp);
+ return -EEXIST;
+ }
+
+ r = putsgent_with_members(sg, gshadow);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ group_changed = true;
+ }
+ if (r < 0)
+ return r;
+
+ } else {
+ if (errno != ENOENT)
+ return -errno;
+ if (fchmod(fileno(gshadow), 0000) < 0)
+ return -errno;
+ }
+
+ ORDERED_HASHMAP_FOREACH(i, todo_gids, iterator) {
+ struct sgrp n = {
+ .sg_namp = i->name,
+ .sg_passwd = (char*) "!!",
+ };
+
+ r = putsgent_with_members(&n, gshadow);
+ if (r < 0)
+ return r;
+
+ group_changed = true;
+ }
+
+ r = fflush_sync_and_check(gshadow);
+ if (r < 0)
+ return r;
+
+ if (group_changed) {
+ *tmpfile = TAKE_PTR(gshadow);
+ *tmpfile_path = TAKE_PTR(gshadow_tmp);
+ }
+ return 0;
+#else
+ return 0;
+#endif
+}
+
+static int write_files(void) {
+ _cleanup_fclose_ FILE *passwd = NULL, *group = NULL, *shadow = NULL, *gshadow = NULL;
+ _cleanup_(unlink_and_freep) char *passwd_tmp = NULL, *group_tmp = NULL, *shadow_tmp = NULL, *gshadow_tmp = NULL;
+ const char *passwd_path = NULL, *group_path = NULL, *shadow_path = NULL, *gshadow_path = NULL;
+ int r;
+
+ passwd_path = prefix_roota(arg_root, "/etc/passwd");
+ shadow_path = prefix_roota(arg_root, "/etc/shadow");
+ group_path = prefix_roota(arg_root, "/etc/group");
+ gshadow_path = prefix_roota(arg_root, "/etc/gshadow");
+
+ r = write_temporary_group(group_path, &group, &group_tmp);
+ if (r < 0)
+ return r;
+
+ r = write_temporary_gshadow(gshadow_path, &gshadow, &gshadow_tmp);
+ if (r < 0)
+ return r;
+
+ r = write_temporary_passwd(passwd_path, &passwd, &passwd_tmp);
+ if (r < 0)
+ return r;
+
+ r = write_temporary_shadow(shadow_path, &shadow, &shadow_tmp);
+ if (r < 0)
+ return r;
+
+ /* Make a backup of the old files */
+ if (group) {
+ r = make_backup("/etc/group", group_path);
+ if (r < 0)
+ return r;
+ }
+ if (gshadow) {
+ r = make_backup("/etc/gshadow", gshadow_path);
+ if (r < 0)
+ return r;
+ }
+
+ if (passwd) {
+ r = make_backup("/etc/passwd", passwd_path);
+ if (r < 0)
+ return r;
+ }
+ if (shadow) {
+ r = make_backup("/etc/shadow", shadow_path);
+ if (r < 0)
+ return r;
+ }
+
+ /* And make the new files count */
+ if (group) {
+ r = rename_and_apply_smack(group_tmp, group_path);
+ if (r < 0)
+ return r;
+
+ group_tmp = mfree(group_tmp);
+ }
+ if (gshadow) {
+ r = rename_and_apply_smack(gshadow_tmp, gshadow_path);
+ if (r < 0)
+ return r;
+
+ gshadow_tmp = mfree(gshadow_tmp);
+ }
+
+ if (passwd) {
+ r = rename_and_apply_smack(passwd_tmp, passwd_path);
+ if (r < 0)
+ return r;
+
+ passwd_tmp = mfree(passwd_tmp);
+ }
+ if (shadow) {
+ r = rename_and_apply_smack(shadow_tmp, shadow_path);
+ if (r < 0)
+ return r;
+
+ shadow_tmp = mfree(shadow_tmp);
+ }
+
+ return 0;
+}
+
+static int uid_is_ok(uid_t uid, const char *name, bool check_with_gid) {
+ struct passwd *p;
+ struct group *g;
+ const char *n;
+ Item *i;
+
+ /* Let's see if we already have assigned the UID a second time */
+ if (ordered_hashmap_get(todo_uids, UID_TO_PTR(uid)))
+ return 0;
+
+ /* Try to avoid using uids that are already used by a group
+ * that doesn't have the same name as our new user. */
+ if (check_with_gid) {
+ i = ordered_hashmap_get(todo_gids, GID_TO_PTR(uid));
+ if (i && !streq(i->name, name))
+ return 0;
+ }
+
+ /* Let's check the files directly */
+ if (hashmap_contains(database_by_uid, UID_TO_PTR(uid)))
+ return 0;
+
+ if (check_with_gid) {
+ n = hashmap_get(database_by_gid, GID_TO_PTR(uid));
+ if (n && !streq(n, name))
+ return 0;
+ }
+
+ /* Let's also check via NSS, to avoid UID clashes over LDAP and such, just in case */
+ if (!arg_root) {
+ errno = 0;
+ p = getpwuid(uid);
+ if (p)
+ return 0;
+ if (!IN_SET(errno, 0, ENOENT))
+ return -errno;
+
+ if (check_with_gid) {
+ errno = 0;
+ g = getgrgid((gid_t) uid);
+ if (g) {
+ if (!streq(g->gr_name, name))
+ return 0;
+ } else if (!IN_SET(errno, 0, ENOENT))
+ return -errno;
+ }
+ }
+
+ return 1;
+}
+
+static int root_stat(const char *p, struct stat *st) {
+ const char *fix;
+
+ fix = prefix_roota(arg_root, p);
+ if (stat(fix, st) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int read_id_from_file(Item *i, uid_t *_uid, gid_t *_gid) {
+ struct stat st;
+ bool found_uid = false, found_gid = false;
+ uid_t uid = 0;
+ gid_t gid = 0;
+
+ assert(i);
+
+ /* First, try to get the gid directly */
+ if (_gid && i->gid_path && root_stat(i->gid_path, &st) >= 0) {
+ gid = st.st_gid;
+ found_gid = true;
+ }
+
+ /* Then, try to get the uid directly */
+ if ((_uid || (_gid && !found_gid))
+ && i->uid_path
+ && root_stat(i->uid_path, &st) >= 0) {
+
+ uid = st.st_uid;
+ found_uid = true;
+
+ /* If we need the gid, but had no success yet, also derive it from the uid path */
+ if (_gid && !found_gid) {
+ gid = st.st_gid;
+ found_gid = true;
+ }
+ }
+
+ /* If that didn't work yet, then let's reuse the gid as uid */
+ if (_uid && !found_uid && i->gid_path) {
+
+ if (found_gid) {
+ uid = (uid_t) gid;
+ found_uid = true;
+ } else if (root_stat(i->gid_path, &st) >= 0) {
+ uid = (uid_t) st.st_gid;
+ found_uid = true;
+ }
+ }
+
+ if (_uid) {
+ if (!found_uid)
+ return 0;
+
+ *_uid = uid;
+ }
+
+ if (_gid) {
+ if (!found_gid)
+ return 0;
+
+ *_gid = gid;
+ }
+
+ return 1;
+}
+
+static int add_user(Item *i) {
+ void *z;
+ int r;
+
+ assert(i);
+
+ /* Check the database directly */
+ z = hashmap_get(database_by_username, i->name);
+ if (z) {
+ log_debug("User %s already exists.", i->name);
+ i->uid = PTR_TO_UID(z);
+ i->uid_set = true;
+ return 0;
+ }
+
+ if (!arg_root) {
+ struct passwd *p;
+
+ /* Also check NSS */
+ errno = 0;
+ p = getpwnam(i->name);
+ if (p) {
+ log_debug("User %s already exists.", i->name);
+ i->uid = p->pw_uid;
+ i->uid_set = true;
+
+ r = free_and_strdup(&i->description, p->pw_gecos);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+ }
+ if (!IN_SET(errno, 0, ENOENT))
+ return log_error_errno(errno, "Failed to check if user %s already exists: %m", i->name);
+ }
+
+ /* Try to use the suggested numeric uid */
+ if (i->uid_set) {
+ r = uid_is_ok(i->uid, i->name, !i->id_set_strict);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify uid " UID_FMT ": %m", i->uid);
+ if (r == 0) {
+ log_debug("Suggested user ID " UID_FMT " for %s already used.", i->uid, i->name);
+ i->uid_set = false;
+ }
+ }
+
+ /* If that didn't work, try to read it from the specified path */
+ if (!i->uid_set) {
+ uid_t c;
+
+ if (read_id_from_file(i, &c, NULL) > 0) {
+
+ if (c <= 0 || !uid_range_contains(uid_range, n_uid_range, c))
+ log_debug("User ID " UID_FMT " of file not suitable for %s.", c, i->name);
+ else {
+ r = uid_is_ok(c, i->name, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify uid " UID_FMT ": %m", i->uid);
+ else if (r > 0) {
+ i->uid = c;
+ i->uid_set = true;
+ } else
+ log_debug("User ID " UID_FMT " of file for %s is already used.", c, i->name);
+ }
+ }
+ }
+
+ /* Otherwise, try to reuse the group ID */
+ if (!i->uid_set && i->gid_set) {
+ r = uid_is_ok((uid_t) i->gid, i->name, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify uid " UID_FMT ": %m", i->uid);
+ if (r > 0) {
+ i->uid = (uid_t) i->gid;
+ i->uid_set = true;
+ }
+ }
+
+ /* And if that didn't work either, let's try to find a free one */
+ if (!i->uid_set) {
+ for (;;) {
+ r = uid_range_next_lower(uid_range, n_uid_range, &search_uid);
+ if (r < 0) {
+ log_error("No free user ID available for %s.", i->name);
+ return r;
+ }
+
+ r = uid_is_ok(search_uid, i->name, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify uid " UID_FMT ": %m", i->uid);
+ else if (r > 0)
+ break;
+ }
+
+ i->uid_set = true;
+ i->uid = search_uid;
+ }
+
+ r = ordered_hashmap_ensure_allocated(&todo_uids, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = ordered_hashmap_put(todo_uids, UID_TO_PTR(i->uid), i);
+ if (r < 0)
+ return log_oom();
+
+ i->todo_user = true;
+ log_info("Creating user %s (%s) with uid " UID_FMT " and gid " GID_FMT ".", i->name, strna(i->description), i->uid, i->gid);
+
+ return 0;
+}
+
+static int gid_is_ok(gid_t gid) {
+ struct group *g;
+ struct passwd *p;
+
+ if (ordered_hashmap_get(todo_gids, GID_TO_PTR(gid)))
+ return 0;
+
+ /* Avoid reusing gids that are already used by a different user */
+ if (ordered_hashmap_get(todo_uids, UID_TO_PTR(gid)))
+ return 0;
+
+ if (hashmap_contains(database_by_gid, GID_TO_PTR(gid)))
+ return 0;
+
+ if (hashmap_contains(database_by_uid, UID_TO_PTR(gid)))
+ return 0;
+
+ if (!arg_root) {
+ errno = 0;
+ g = getgrgid(gid);
+ if (g)
+ return 0;
+ if (!IN_SET(errno, 0, ENOENT))
+ return -errno;
+
+ errno = 0;
+ p = getpwuid((uid_t) gid);
+ if (p)
+ return 0;
+ if (!IN_SET(errno, 0, ENOENT))
+ return -errno;
+ }
+
+ return 1;
+}
+
+static int add_group(Item *i) {
+ void *z;
+ int r;
+
+ assert(i);
+
+ /* Check the database directly */
+ z = hashmap_get(database_by_groupname, i->name);
+ if (z) {
+ log_debug("Group %s already exists.", i->name);
+ i->gid = PTR_TO_GID(z);
+ i->gid_set = true;
+ return 0;
+ }
+
+ /* Also check NSS */
+ if (!arg_root) {
+ struct group *g;
+
+ errno = 0;
+ g = getgrnam(i->name);
+ if (g) {
+ log_debug("Group %s already exists.", i->name);
+ i->gid = g->gr_gid;
+ i->gid_set = true;
+ return 0;
+ }
+ if (!IN_SET(errno, 0, ENOENT))
+ return log_error_errno(errno, "Failed to check if group %s already exists: %m", i->name);
+ }
+
+ /* Try to use the suggested numeric gid */
+ if (i->gid_set) {
+ r = gid_is_ok(i->gid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify gid " GID_FMT ": %m", i->gid);
+ if (i->id_set_strict) {
+ /* If we require the gid to already exist we can return here:
+ * r > 0: means the gid does not exist -> fail
+ * r == 0: means the gid exists -> nothing more to do.
+ */
+ if (r > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to create %s: please create GID %d",
+ i->name, i->gid);
+ if (r == 0)
+ return 0;
+ }
+ if (r == 0) {
+ log_debug("Suggested group ID " GID_FMT " for %s already used.", i->gid, i->name);
+ i->gid_set = false;
+ }
+ }
+
+ /* Try to reuse the numeric uid, if there's one */
+ if (!i->gid_set && i->uid_set) {
+ r = gid_is_ok((gid_t) i->uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify gid " GID_FMT ": %m", i->gid);
+ if (r > 0) {
+ i->gid = (gid_t) i->uid;
+ i->gid_set = true;
+ }
+ }
+
+ /* If that didn't work, try to read it from the specified path */
+ if (!i->gid_set) {
+ gid_t c;
+
+ if (read_id_from_file(i, NULL, &c) > 0) {
+
+ if (c <= 0 || !uid_range_contains(uid_range, n_uid_range, c))
+ log_debug("Group ID " GID_FMT " of file not suitable for %s.", c, i->name);
+ else {
+ r = gid_is_ok(c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify gid " GID_FMT ": %m", i->gid);
+ else if (r > 0) {
+ i->gid = c;
+ i->gid_set = true;
+ } else
+ log_debug("Group ID " GID_FMT " of file for %s already used.", c, i->name);
+ }
+ }
+ }
+
+ /* And if that didn't work either, let's try to find a free one */
+ if (!i->gid_set) {
+ for (;;) {
+ /* We look for new GIDs in the UID pool! */
+ r = uid_range_next_lower(uid_range, n_uid_range, &search_uid);
+ if (r < 0) {
+ log_error("No free group ID available for %s.", i->name);
+ return r;
+ }
+
+ r = gid_is_ok(search_uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify gid " GID_FMT ": %m", i->gid);
+ else if (r > 0)
+ break;
+ }
+
+ i->gid_set = true;
+ i->gid = search_uid;
+ }
+
+ r = ordered_hashmap_ensure_allocated(&todo_gids, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = ordered_hashmap_put(todo_gids, GID_TO_PTR(i->gid), i);
+ if (r < 0)
+ return log_oom();
+
+ i->todo_group = true;
+ log_info("Creating group %s with gid " GID_FMT ".", i->name, i->gid);
+
+ return 0;
+}
+
+static int process_item(Item *i) {
+ int r;
+
+ assert(i);
+
+ switch (i->type) {
+
+ case ADD_USER: {
+ Item *j;
+
+ j = ordered_hashmap_get(groups, i->name);
+ if (j && j->todo_group) {
+ /* When the group with the same name is already in queue,
+ * use the information about the group and do not create
+ * duplicated group entry. */
+ i->gid_set = j->gid_set;
+ i->gid = j->gid;
+ i->id_set_strict = true;
+ } else {
+ r = add_group(i);
+ if (r < 0)
+ return r;
+ }
+
+ return add_user(i);
+ }
+
+ case ADD_GROUP:
+ return add_group(i);
+
+ default:
+ assert_not_reached("Unknown item type");
+ }
+}
+
+static Item* item_free(Item *i) {
+ if (!i)
+ return NULL;
+
+ free(i->name);
+ free(i->uid_path);
+ free(i->gid_path);
+ free(i->description);
+ free(i->home);
+ free(i->shell);
+ return mfree(i);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Item*, item_free);
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(item_hash_ops, char, string_hash_func, string_compare_func, Item, item_free);
+
+static int add_implicit(void) {
+ char *g, **l;
+ Iterator iterator;
+ int r;
+
+ /* Implicitly create additional users and groups, if they were listed in "m" lines */
+ ORDERED_HASHMAP_FOREACH_KEY(l, g, members, iterator) {
+ char **m;
+
+ STRV_FOREACH(m, l)
+ if (!ordered_hashmap_get(users, *m)) {
+ _cleanup_(item_freep) Item *j = NULL;
+
+ r = ordered_hashmap_ensure_allocated(&users, &item_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ j = new0(Item, 1);
+ if (!j)
+ return log_oom();
+
+ j->type = ADD_USER;
+ j->name = strdup(*m);
+ if (!j->name)
+ return log_oom();
+
+ r = ordered_hashmap_put(users, j->name, j);
+ if (r < 0)
+ return log_oom();
+
+ log_debug("Adding implicit user '%s' due to m line", j->name);
+ j = NULL;
+ }
+
+ if (!(ordered_hashmap_get(users, g) ||
+ ordered_hashmap_get(groups, g))) {
+ _cleanup_(item_freep) Item *j = NULL;
+
+ r = ordered_hashmap_ensure_allocated(&groups, &item_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ j = new0(Item, 1);
+ if (!j)
+ return log_oom();
+
+ j->type = ADD_GROUP;
+ j->name = strdup(g);
+ if (!j->name)
+ return log_oom();
+
+ r = ordered_hashmap_put(groups, j->name, j);
+ if (r < 0)
+ return log_oom();
+
+ log_debug("Adding implicit group '%s' due to m line", j->name);
+ j = NULL;
+ }
+ }
+
+ return 0;
+}
+
+static bool item_equal(Item *a, Item *b) {
+ assert(a);
+ assert(b);
+
+ if (a->type != b->type)
+ return false;
+
+ if (!streq_ptr(a->name, b->name))
+ return false;
+
+ if (!streq_ptr(a->uid_path, b->uid_path))
+ return false;
+
+ if (!streq_ptr(a->gid_path, b->gid_path))
+ return false;
+
+ if (!streq_ptr(a->description, b->description))
+ return false;
+
+ if (a->uid_set != b->uid_set)
+ return false;
+
+ if (a->uid_set && a->uid != b->uid)
+ return false;
+
+ if (a->gid_set != b->gid_set)
+ return false;
+
+ if (a->gid_set && a->gid != b->gid)
+ return false;
+
+ if (!streq_ptr(a->home, b->home))
+ return false;
+
+ if (!streq_ptr(a->shell, b->shell))
+ return false;
+
+ return true;
+}
+
+DEFINE_PRIVATE_HASH_OPS_FULL(members_hash_ops, char, string_hash_func, string_compare_func, free, char*, strv_free);
+
+static int parse_line(const char *fname, unsigned line, const char *buffer) {
+
+ static const Specifier specifier_table[] = {
+ { 'm', specifier_machine_id, NULL },
+ { 'b', specifier_boot_id, NULL },
+ { 'H', specifier_host_name, NULL },
+ { 'v', specifier_kernel_release, NULL },
+ { 'T', specifier_tmp_dir, NULL },
+ { 'V', specifier_var_tmp_dir, NULL },
+ {}
+ };
+
+ _cleanup_free_ char *action = NULL,
+ *name = NULL, *resolved_name = NULL,
+ *id = NULL, *resolved_id = NULL,
+ *description = NULL, *resolved_description = NULL,
+ *home = NULL, *resolved_home = NULL,
+ *shell, *resolved_shell = NULL;
+ _cleanup_(item_freep) Item *i = NULL;
+ Item *existing;
+ OrderedHashmap *h;
+ int r;
+ const char *p;
+
+ assert(fname);
+ assert(line >= 1);
+ assert(buffer);
+
+ /* Parse columns */
+ p = buffer;
+ r = extract_many_words(&p, NULL, EXTRACT_QUOTES,
+ &action, &name, &id, &description, &home, &shell, NULL);
+ if (r < 0) {
+ log_error("[%s:%u] Syntax error.", fname, line);
+ return r;
+ }
+ if (r < 2) {
+ log_error("[%s:%u] Missing action and name columns.", fname, line);
+ return -EINVAL;
+ }
+ if (!isempty(p)) {
+ log_error("[%s:%u] Trailing garbage.", fname, line);
+ return -EINVAL;
+ }
+
+ /* Verify action */
+ if (strlen(action) != 1) {
+ log_error("[%s:%u] Unknown modifier '%s'", fname, line, action);
+ return -EINVAL;
+ }
+
+ if (!IN_SET(action[0], ADD_USER, ADD_GROUP, ADD_MEMBER, ADD_RANGE)) {
+ log_error("[%s:%u] Unknown command type '%c'.", fname, line, action[0]);
+ return -EBADMSG;
+ }
+
+ /* Verify name */
+ if (isempty(name) || streq(name, "-"))
+ name = mfree(name);
+
+ if (name) {
+ r = specifier_printf(name, specifier_table, NULL, &resolved_name);
+ if (r < 0) {
+ log_error("[%s:%u] Failed to replace specifiers: %s", fname, line, name);
+ return r;
+ }
+
+ if (!valid_user_group_name(resolved_name)) {
+ log_error("[%s:%u] '%s' is not a valid user or group name.", fname, line, resolved_name);
+ return -EINVAL;
+ }
+ }
+
+ /* Verify id */
+ if (isempty(id) || streq(id, "-"))
+ id = mfree(id);
+
+ if (id) {
+ r = specifier_printf(id, specifier_table, NULL, &resolved_id);
+ if (r < 0) {
+ log_error("[%s:%u] Failed to replace specifiers: %s", fname, line, name);
+ return r;
+ }
+ }
+
+ /* Verify description */
+ if (isempty(description) || streq(description, "-"))
+ description = mfree(description);
+
+ if (description) {
+ r = specifier_printf(description, specifier_table, NULL, &resolved_description);
+ if (r < 0) {
+ log_error("[%s:%u] Failed to replace specifiers: %s", fname, line, description);
+ return r;
+ }
+
+ if (!valid_gecos(resolved_description)) {
+ log_error("[%s:%u] '%s' is not a valid GECOS field.", fname, line, resolved_description);
+ return -EINVAL;
+ }
+ }
+
+ /* Verify home */
+ if (isempty(home) || streq(home, "-"))
+ home = mfree(home);
+
+ if (home) {
+ r = specifier_printf(home, specifier_table, NULL, &resolved_home);
+ if (r < 0) {
+ log_error("[%s:%u] Failed to replace specifiers: %s", fname, line, home);
+ return r;
+ }
+
+ if (!valid_home(resolved_home)) {
+ log_error("[%s:%u] '%s' is not a valid home directory field.", fname, line, resolved_home);
+ return -EINVAL;
+ }
+ }
+
+ /* Verify shell */
+ if (isempty(shell) || streq(shell, "-"))
+ shell = mfree(shell);
+
+ if (shell) {
+ r = specifier_printf(shell, specifier_table, NULL, &resolved_shell);
+ if (r < 0) {
+ log_error("[%s:%u] Failed to replace specifiers: %s", fname, line, shell);
+ return r;
+ }
+
+ if (!valid_shell(resolved_shell)) {
+ log_error("[%s:%u] '%s' is not a valid login shell field.", fname, line, resolved_shell);
+ return -EINVAL;
+ }
+ }
+
+ switch (action[0]) {
+
+ case ADD_RANGE:
+ if (resolved_name) {
+ log_error("[%s:%u] Lines of type 'r' don't take a name field.", fname, line);
+ return -EINVAL;
+ }
+
+ if (!resolved_id) {
+ log_error("[%s:%u] Lines of type 'r' require a ID range in the third field.", fname, line);
+ return -EINVAL;
+ }
+
+ if (description || home || shell) {
+ log_error("[%s:%u] Lines of type '%c' don't take a %s field.",
+ fname, line, action[0],
+ description ? "GECOS" : home ? "home directory" : "login shell");
+ return -EINVAL;
+ }
+
+ r = uid_range_add_str(&uid_range, &n_uid_range, resolved_id);
+ if (r < 0) {
+ log_error("[%s:%u] Invalid UID range %s.", fname, line, resolved_id);
+ return -EINVAL;
+ }
+
+ return 0;
+
+ case ADD_MEMBER: {
+ char **l;
+
+ /* Try to extend an existing member or group item */
+ if (!name) {
+ log_error("[%s:%u] Lines of type 'm' require a user name in the second field.", fname, line);
+ return -EINVAL;
+ }
+
+ if (!resolved_id) {
+ log_error("[%s:%u] Lines of type 'm' require a group name in the third field.", fname, line);
+ return -EINVAL;
+ }
+
+ if (!valid_user_group_name(resolved_id)) {
+ log_error("[%s:%u] '%s' is not a valid user or group name.", fname, line, resolved_id);
+ return -EINVAL;
+ }
+
+ if (description || home || shell) {
+ log_error("[%s:%u] Lines of type '%c' don't take a %s field.",
+ fname, line, action[0],
+ description ? "GECOS" : home ? "home directory" : "login shell");
+ return -EINVAL;
+ }
+
+ r = ordered_hashmap_ensure_allocated(&members, &members_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ l = ordered_hashmap_get(members, resolved_id);
+ if (l) {
+ /* A list for this group name already exists, let's append to it */
+ r = strv_push(&l, resolved_name);
+ if (r < 0)
+ return log_oom();
+
+ resolved_name = NULL;
+
+ assert_se(ordered_hashmap_update(members, resolved_id, l) >= 0);
+ } else {
+ /* No list for this group name exists yet, create one */
+
+ l = new0(char *, 2);
+ if (!l)
+ return -ENOMEM;
+
+ l[0] = resolved_name;
+ l[1] = NULL;
+
+ r = ordered_hashmap_put(members, resolved_id, l);
+ if (r < 0) {
+ free(l);
+ return log_oom();
+ }
+
+ resolved_id = resolved_name = NULL;
+ }
+
+ return 0;
+ }
+
+ case ADD_USER:
+ if (!name) {
+ log_error("[%s:%u] Lines of type 'u' require a user name in the second field.", fname, line);
+ return -EINVAL;
+ }
+
+ r = ordered_hashmap_ensure_allocated(&users, &item_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ i = new0(Item, 1);
+ if (!i)
+ return log_oom();
+
+ if (resolved_id) {
+ if (path_is_absolute(resolved_id)) {
+ i->uid_path = TAKE_PTR(resolved_id);
+ path_simplify(i->uid_path, false);
+ } else {
+ _cleanup_free_ char *uid = NULL, *gid = NULL;
+ if (split_pair(resolved_id, ":", &uid, &gid) == 0) {
+ r = parse_gid(gid, &i->gid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse GID: '%s': %m", id);
+ i->gid_set = true;
+ i->id_set_strict = true;
+ free_and_replace(resolved_id, uid);
+ }
+ if (!streq(resolved_id, "-")) {
+ r = parse_uid(resolved_id, &i->uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse UID: '%s': %m", id);
+ i->uid_set = true;
+ }
+ }
+ }
+
+ i->description = TAKE_PTR(resolved_description);
+ i->home = TAKE_PTR(resolved_home);
+ i->shell = TAKE_PTR(resolved_shell);
+
+ h = users;
+ break;
+
+ case ADD_GROUP:
+ if (!name) {
+ log_error("[%s:%u] Lines of type 'g' require a user name in the second field.", fname, line);
+ return -EINVAL;
+ }
+
+ if (description || home || shell) {
+ log_error("[%s:%u] Lines of type '%c' don't take a %s field.",
+ fname, line, action[0],
+ description ? "GECOS" : home ? "home directory" : "login shell");
+ return -EINVAL;
+ }
+
+ r = ordered_hashmap_ensure_allocated(&groups, &item_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ i = new0(Item, 1);
+ if (!i)
+ return log_oom();
+
+ if (resolved_id) {
+ if (path_is_absolute(resolved_id)) {
+ i->gid_path = TAKE_PTR(resolved_id);
+ path_simplify(i->gid_path, false);
+ } else {
+ r = parse_gid(resolved_id, &i->gid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse GID: '%s': %m", id);
+
+ i->gid_set = true;
+ }
+ }
+
+ h = groups;
+ break;
+
+ default:
+ return -EBADMSG;
+ }
+
+ i->type = action[0];
+ i->name = TAKE_PTR(resolved_name);
+
+ existing = ordered_hashmap_get(h, i->name);
+ if (existing) {
+
+ /* Two identical items are fine */
+ if (!item_equal(existing, i))
+ log_warning("Two or more conflicting lines for %s configured, ignoring.", i->name);
+
+ return 0;
+ }
+
+ r = ordered_hashmap_put(h, i->name, i);
+ if (r < 0)
+ return log_oom();
+
+ i = NULL;
+ return 0;
+}
+
+static int read_config_file(const char *fn, bool ignore_enoent) {
+ _cleanup_fclose_ FILE *rf = NULL;
+ FILE *f = NULL;
+ unsigned v = 0;
+ int r = 0;
+
+ assert(fn);
+
+ if (streq(fn, "-"))
+ f = stdin;
+ else {
+ r = search_and_fopen(fn, "re", arg_root, (const char**) CONF_PATHS_STRV("sysusers.d"), &rf);
+ if (r < 0) {
+ if (ignore_enoent && r == -ENOENT)
+ return 0;
+
+ return log_error_errno(r, "Failed to open '%s', ignoring: %m", fn);
+ }
+
+ f = rf;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &line);
+ if (k < 0)
+ return log_error_errno(k, "Failed to read '%s': %m", fn);
+ if (k == 0)
+ break;
+
+ v++;
+
+ l = strstrip(line);
+ if (IN_SET(*l, 0, '#'))
+ continue;
+
+ k = parse_line(fn, v, l);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ if (ferror(f)) {
+ log_error_errno(errno, "Failed to read from file %s: %m", fn);
+ if (r == 0)
+ r = -EIO;
+ }
+
+ return r;
+}
+
+static int cat_config(void) {
+ _cleanup_strv_free_ char **files = NULL;
+ int r;
+
+ r = conf_files_list_with_replacement(arg_root, CONF_PATHS_STRV("sysusers.d"), arg_replace, &files, NULL);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ return cat_files(NULL, files, 0);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-sysusers.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CONFIGURATION FILE...]\n\n"
+ "Creates system user accounts.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --cat-config Show configuration files\n"
+ " --root=PATH Operate on an alternate filesystem root\n"
+ " --replace=PATH Treat arguments as replacement for PATH\n"
+ " --inline Treat arguments as configuration lines\n"
+ " --no-pager Do not pipe output into a pager\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_CAT_CONFIG,
+ ARG_ROOT,
+ ARG_REPLACE,
+ ARG_INLINE,
+ ARG_NO_PAGER,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "cat-config", no_argument, NULL, ARG_CAT_CONFIG },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "replace", required_argument, NULL, ARG_REPLACE },
+ { "inline", no_argument, NULL, ARG_INLINE },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_CAT_CONFIG:
+ arg_cat_config = true;
+ break;
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, true, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_REPLACE:
+ if (!path_is_absolute(optarg) ||
+ !endswith(optarg, ".conf"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "The argument to --replace= must an absolute path to a config file");
+
+ arg_replace = optarg;
+ break;
+
+ case ARG_INLINE:
+ arg_inline = true;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_replace && arg_cat_config)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --replace= is not supported with --cat-config");
+
+ if (arg_replace && optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "When --replace= is given, some configuration items must be specified");
+
+ return 1;
+}
+
+static int parse_arguments(char **args) {
+ char **arg;
+ unsigned pos = 1;
+ int r;
+
+ STRV_FOREACH(arg, args) {
+ if (arg_inline)
+ /* Use (argument):n, where n==1 for the first positional arg */
+ r = parse_line("(argument)", pos, *arg);
+ else
+ r = read_config_file(*arg, false);
+ if (r < 0)
+ return r;
+
+ pos++;
+ }
+
+ return 0;
+}
+
+static int read_config_files(char **args) {
+ _cleanup_strv_free_ char **files = NULL;
+ _cleanup_free_ char *p = NULL;
+ char **f;
+ int r;
+
+ r = conf_files_list_with_replacement(arg_root, CONF_PATHS_STRV("sysusers.d"), arg_replace, &files, &p);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(f, files)
+ if (p && path_equal(*f, p)) {
+ log_debug("Parsing arguments at position \"%s\"…", *f);
+
+ r = parse_arguments(args);
+ if (r < 0)
+ return r;
+ } else {
+ log_debug("Reading config file \"%s\"…", *f);
+
+ /* Just warn, ignore result otherwise */
+ (void) read_config_file(*f, true);
+ }
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_close_ int lock = -1;
+ Iterator iterator;
+ Item *i;
+ int r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ log_setup_service();
+
+ if (arg_cat_config)
+ return cat_config();
+
+ umask(0022);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return log_error_errno(r, "SELinux setup failed: %m");
+
+ /* If command line arguments are specified along with --replace, read all
+ * configuration files and insert the positional arguments at the specified
+ * place. Otherwise, if command line arguments are specified, execute just
+ * them, and finally, without --replace= or any positional arguments, just
+ * read configuration and execute it.
+ */
+ if (arg_replace || optind >= argc)
+ r = read_config_files(argv + optind);
+ else
+ r = parse_arguments(argv + optind);
+ if (r < 0)
+ return r;
+
+ /* Let's tell nss-systemd not to synthesize the "root" and "nobody" entries for it, so that our detection
+ * whether the names or UID/GID area already used otherwise doesn't get confused. After all, even though
+ * nss-systemd synthesizes these users/groups, they should still appear in /etc/passwd and /etc/group, as the
+ * synthesizing logic is merely supposed to be fallback for cases where we run with a completely unpopulated
+ * /etc. */
+ if (setenv("SYSTEMD_NSS_BYPASS_SYNTHETIC", "1", 1) < 0)
+ return log_error_errno(errno, "Failed to set SYSTEMD_NSS_BYPASS_SYNTHETIC environment variable: %m");
+
+ if (!uid_range) {
+ /* Default to default range of 1..SYSTEM_UID_MAX */
+ r = uid_range_add(&uid_range, &n_uid_range, 1, SYSTEM_UID_MAX);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = add_implicit();
+ if (r < 0)
+ return r;
+
+ lock = take_etc_passwd_lock(arg_root);
+ if (lock < 0)
+ return log_error_errno(lock, "Failed to take /etc/passwd lock: %m");
+
+ r = load_user_database();
+ if (r < 0)
+ return log_error_errno(r, "Failed to load user database: %m");
+
+ r = load_group_database();
+ if (r < 0)
+ return log_error_errno(r, "Failed to read group database: %m");
+
+ ORDERED_HASHMAP_FOREACH(i, groups, iterator)
+ (void) process_item(i);
+
+ ORDERED_HASHMAP_FOREACH(i, users, iterator)
+ (void) process_item(i);
+
+ r = write_files();
+ if (r < 0)
+ return log_error_errno(r, "Failed to write files: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/sysv-generator/sysv-generator.c b/src/sysv-generator/sysv-generator.c
new file mode 100644
index 0000000..514b6e0
--- /dev/null
+++ b/src/sysv-generator/sysv-generator.c
@@ -0,0 +1,956 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "generator.h"
+#include "hashmap.h"
+#include "hexdecoct.h"
+#include "install.h"
+#include "log.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "set.h"
+#include "special.h"
+#include "specifier.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+
+static const struct {
+ const char *path;
+ const char *target;
+} rcnd_table[] = {
+ /* Standard SysV runlevels for start-up */
+ { "rc1.d", SPECIAL_RESCUE_TARGET },
+ { "rc2.d", SPECIAL_MULTI_USER_TARGET },
+ { "rc3.d", SPECIAL_MULTI_USER_TARGET },
+ { "rc4.d", SPECIAL_MULTI_USER_TARGET },
+ { "rc5.d", SPECIAL_GRAPHICAL_TARGET },
+
+ /* We ignore the SysV runlevels for shutdown here, as SysV services get default dependencies anyway, and that
+ * means they are shut down anyway at system power off if running. */
+};
+
+static const char *arg_dest = NULL;
+
+typedef struct SysvStub {
+ char *name;
+ char *path;
+ char *description;
+ int sysv_start_priority;
+ char *pid_file;
+ char **before;
+ char **after;
+ char **wants;
+ char **wanted_by;
+ bool has_lsb;
+ bool reload;
+ bool loaded;
+} SysvStub;
+
+static void free_sysvstub(SysvStub *s) {
+ if (!s)
+ return;
+
+ free(s->name);
+ free(s->path);
+ free(s->description);
+ free(s->pid_file);
+ strv_free(s->before);
+ strv_free(s->after);
+ strv_free(s->wants);
+ strv_free(s->wanted_by);
+ free(s);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(SysvStub*, free_sysvstub);
+
+static void free_sysvstub_hashmapp(Hashmap **h) {
+ hashmap_free_with_destructor(*h, free_sysvstub);
+}
+
+static int add_alias(const char *service, const char *alias) {
+ const char *link;
+ int r;
+
+ assert(service);
+ assert(alias);
+
+ link = strjoina(arg_dest, "/", alias);
+
+ r = symlink(service, link);
+ if (r < 0) {
+ if (errno == EEXIST)
+ return 0;
+
+ return -errno;
+ }
+
+ return 1;
+}
+
+static int generate_unit_file(SysvStub *s) {
+ _cleanup_free_ char *path_escaped = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit;
+ char **p;
+ int r;
+
+ assert(s);
+
+ if (!s->loaded)
+ return 0;
+
+ path_escaped = specifier_escape(s->path);
+ if (!path_escaped)
+ return log_oom();
+
+ unit = strjoina(arg_dest, "/", s->name);
+
+ /* We might already have a symlink with the same name from a Provides:,
+ * or from backup files like /etc/init.d/foo.bak. Real scripts always win,
+ * so remove an existing link */
+ if (is_symlink(unit) > 0) {
+ log_warning("Overwriting existing symlink %s with real service.", unit);
+ (void) unlink(unit);
+ }
+
+ f = fopen(unit, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by systemd-sysv-generator\n\n"
+ "[Unit]\n"
+ "Documentation=man:systemd-sysv-generator(8)\n"
+ "SourcePath=%s\n",
+ path_escaped);
+
+ if (s->description) {
+ _cleanup_free_ char *t;
+
+ t = specifier_escape(s->description);
+ if (!t)
+ return log_oom();
+
+ fprintf(f, "Description=%s\n", t);
+ }
+
+ STRV_FOREACH(p, s->before)
+ fprintf(f, "Before=%s\n", *p);
+ STRV_FOREACH(p, s->after)
+ fprintf(f, "After=%s\n", *p);
+ STRV_FOREACH(p, s->wants)
+ fprintf(f, "Wants=%s\n", *p);
+
+ fprintf(f,
+ "\n[Service]\n"
+ "Type=forking\n"
+ "Restart=no\n"
+ "TimeoutSec=5min\n"
+ "IgnoreSIGPIPE=no\n"
+ "KillMode=process\n"
+ "GuessMainPID=no\n"
+ "RemainAfterExit=%s\n",
+ yes_no(!s->pid_file));
+
+ if (s->pid_file) {
+ _cleanup_free_ char *t;
+
+ t = specifier_escape(s->pid_file);
+ if (!t)
+ return log_oom();
+
+ fprintf(f, "PIDFile=%s\n", t);
+ }
+
+ /* Consider two special LSB exit codes a clean exit */
+ if (s->has_lsb)
+ fprintf(f,
+ "SuccessExitStatus=%i %i\n",
+ EXIT_NOTINSTALLED,
+ EXIT_NOTCONFIGURED);
+
+ fprintf(f,
+ "ExecStart=%s start\n"
+ "ExecStop=%s stop\n",
+ path_escaped, path_escaped);
+
+ if (s->reload)
+ fprintf(f, "ExecReload=%s reload\n", path_escaped);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit %s: %m", unit);
+
+ STRV_FOREACH(p, s->wanted_by)
+ (void) generator_add_symlink(arg_dest, *p, "wants", s->name);
+
+ return 1;
+}
+
+static bool usage_contains_reload(const char *line) {
+ return (strcasestr(line, "{reload|") ||
+ strcasestr(line, "{reload}") ||
+ strcasestr(line, "{reload\"") ||
+ strcasestr(line, "|reload|") ||
+ strcasestr(line, "|reload}") ||
+ strcasestr(line, "|reload\""));
+}
+
+static char *sysv_translate_name(const char *name) {
+ _cleanup_free_ char *c = NULL;
+ char *res;
+
+ c = strdup(name);
+ if (!c)
+ return NULL;
+
+ res = endswith(c, ".sh");
+ if (res)
+ *res = 0;
+
+ if (unit_name_mangle(c, 0, &res) < 0)
+ return NULL;
+
+ return res;
+}
+
+static int sysv_translate_facility(SysvStub *s, unsigned line, const char *name, char **ret) {
+
+ /* We silently ignore the $ prefix here. According to the LSB
+ * spec it simply indicates whether something is a
+ * standardized name or a distribution-specific one. Since we
+ * just follow what already exists and do not introduce new
+ * uses or names we don't care who introduced a new name. */
+
+ static const char * const table[] = {
+ /* LSB defined facilities */
+ "local_fs", NULL,
+ "network", SPECIAL_NETWORK_ONLINE_TARGET,
+ "named", SPECIAL_NSS_LOOKUP_TARGET,
+ "portmap", SPECIAL_RPCBIND_TARGET,
+ "remote_fs", SPECIAL_REMOTE_FS_TARGET,
+ "syslog", NULL,
+ "time", SPECIAL_TIME_SYNC_TARGET,
+ };
+
+ const char *filename;
+ char *filename_no_sh, *e, *m;
+ const char *n;
+ unsigned i;
+ int r;
+
+ assert(name);
+ assert(s);
+ assert(ret);
+
+ filename = basename(s->path);
+
+ n = *name == '$' ? name + 1 : name;
+
+ for (i = 0; i < ELEMENTSOF(table); i += 2) {
+ if (!streq(table[i], n))
+ continue;
+
+ if (!table[i+1]) {
+ *ret = NULL;
+ return 0;
+ }
+
+ m = strdup(table[i+1]);
+ if (!m)
+ return log_oom();
+
+ *ret = m;
+ return 1;
+ }
+
+ /* If we don't know this name, fallback heuristics to figure
+ * out whether something is a target or a service alias. */
+
+ /* Facilities starting with $ are most likely targets */
+ if (*name == '$') {
+ r = unit_name_build(n, NULL, ".target", ret);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u] Could not build name for facility %s: %m", s->path, line, name);
+
+ return 1;
+ }
+
+ /* Strip ".sh" suffix from file name for comparison */
+ filename_no_sh = strdupa(filename);
+ e = endswith(filename_no_sh, ".sh");
+ if (e) {
+ *e = '\0';
+ filename = filename_no_sh;
+ }
+
+ /* Names equaling the file name of the services are redundant */
+ if (streq_ptr(n, filename)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ /* Everything else we assume to be normal service names */
+ m = sysv_translate_name(n);
+ if (!m)
+ return log_oom();
+
+ *ret = m;
+ return 1;
+}
+
+static int handle_provides(SysvStub *s, unsigned line, const char *full_text, const char *text) {
+ int r;
+
+ assert(s);
+ assert(full_text);
+ assert(text);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *m = NULL;
+
+ r = extract_first_word(&text, &word, NULL, EXTRACT_QUOTES|EXTRACT_RELAX);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u] Failed to parse word from provides string: %m", s->path, line);
+ if (r == 0)
+ break;
+
+ r = sysv_translate_facility(s, line, word, &m);
+ if (r <= 0) /* continue on error */
+ continue;
+
+ switch (unit_name_to_type(m)) {
+
+ case UNIT_SERVICE:
+ log_debug("Adding Provides: alias '%s' for '%s'", m, s->name);
+ r = add_alias(s->name, m);
+ if (r < 0)
+ log_warning_errno(r, "[%s:%u] Failed to add LSB Provides name %s, ignoring: %m", s->path, line, m);
+ break;
+
+ case UNIT_TARGET:
+
+ /* NB: SysV targets which are provided by a
+ * service are pulled in by the services, as
+ * an indication that the generic service is
+ * now available. This is strictly one-way.
+ * The targets do NOT pull in SysV services! */
+
+ r = strv_extend(&s->before, m);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend(&s->wants, m);
+ if (r < 0)
+ return log_oom();
+
+ if (streq(m, SPECIAL_NETWORK_ONLINE_TARGET)) {
+ r = strv_extend(&s->before, SPECIAL_NETWORK_TARGET);
+ if (r < 0)
+ return log_oom();
+ r = strv_extend(&s->wants, SPECIAL_NETWORK_TARGET);
+ if (r < 0)
+ return log_oom();
+ }
+
+ break;
+
+ case _UNIT_TYPE_INVALID:
+ log_warning("Unit name '%s' is invalid", m);
+ break;
+
+ default:
+ log_warning("Unknown unit type for unit '%s'", m);
+ }
+ }
+
+ return 0;
+}
+
+static int handle_dependencies(SysvStub *s, unsigned line, const char *full_text, const char *text) {
+ int r;
+
+ assert(s);
+ assert(full_text);
+ assert(text);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *m = NULL;
+ bool is_before;
+
+ r = extract_first_word(&text, &word, NULL, EXTRACT_QUOTES|EXTRACT_RELAX);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u] Failed to parse word from provides string: %m", s->path, line);
+ if (r == 0)
+ break;
+
+ r = sysv_translate_facility(s, line, word, &m);
+ if (r <= 0) /* continue on error */
+ continue;
+
+ is_before = startswith_no_case(full_text, "X-Start-Before:");
+
+ if (streq(m, SPECIAL_NETWORK_ONLINE_TARGET) && !is_before) {
+ /* the network-online target is special, as it needs to be actively pulled in */
+ r = strv_extend(&s->after, m);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend(&s->wants, m);
+ } else
+ r = strv_extend(is_before ? &s->before : &s->after, m);
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+static int load_sysv(SysvStub *s) {
+ _cleanup_fclose_ FILE *f;
+ unsigned line = 0;
+ int r;
+ enum {
+ NORMAL,
+ DESCRIPTION,
+ LSB,
+ LSB_DESCRIPTION,
+ USAGE_CONTINUATION
+ } state = NORMAL;
+ _cleanup_free_ char *short_description = NULL, *long_description = NULL, *chkconfig_description = NULL;
+ char *description;
+ bool supports_reload = false;
+
+ assert(s);
+
+ f = fopen(s->path, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open %s: %m", s->path);
+ }
+
+ log_debug("Loading SysV script %s", s->path);
+
+ for (;;) {
+ _cleanup_free_ char *l = NULL;
+ char *t;
+
+ r = read_line(f, LONG_LINE_MAX, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read configuration file '%s': %m", s->path);
+ if (r == 0)
+ break;
+
+ line++;
+
+ t = strstrip(l);
+ if (*t != '#') {
+ /* Try to figure out whether this init script supports
+ * the reload operation. This heuristic looks for
+ * "Usage" lines which include the reload option. */
+ if (state == USAGE_CONTINUATION ||
+ (state == NORMAL && strcasestr(t, "usage"))) {
+ if (usage_contains_reload(t)) {
+ supports_reload = true;
+ state = NORMAL;
+ } else if (t[strlen(t)-1] == '\\')
+ state = USAGE_CONTINUATION;
+ else
+ state = NORMAL;
+ }
+
+ continue;
+ }
+
+ if (state == NORMAL && streq(t, "### BEGIN INIT INFO")) {
+ state = LSB;
+ s->has_lsb = true;
+ continue;
+ }
+
+ if (IN_SET(state, LSB_DESCRIPTION, LSB) && streq(t, "### END INIT INFO")) {
+ state = NORMAL;
+ continue;
+ }
+
+ t++;
+ t += strspn(t, WHITESPACE);
+
+ if (state == NORMAL) {
+
+ /* Try to parse Red Hat style description */
+
+ if (startswith_no_case(t, "description:")) {
+
+ size_t k;
+ const char *j;
+
+ k = strlen(t);
+ if (k > 0 && t[k-1] == '\\') {
+ state = DESCRIPTION;
+ t[k-1] = 0;
+ }
+
+ j = empty_to_null(strstrip(t+12));
+
+ r = free_and_strdup(&chkconfig_description, j);
+ if (r < 0)
+ return log_oom();
+
+ } else if (startswith_no_case(t, "pidfile:")) {
+ const char *fn;
+
+ state = NORMAL;
+
+ fn = strstrip(t+8);
+ if (!path_is_absolute(fn)) {
+ log_error("[%s:%u] PID file not absolute. Ignoring.", s->path, line);
+ continue;
+ }
+
+ r = free_and_strdup(&s->pid_file, fn);
+ if (r < 0)
+ return log_oom();
+ }
+
+ } else if (state == DESCRIPTION) {
+
+ /* Try to parse Red Hat style description
+ * continuation */
+
+ size_t k;
+ char *j;
+
+ k = strlen(t);
+ if (k > 0 && t[k-1] == '\\')
+ t[k-1] = 0;
+ else
+ state = NORMAL;
+
+ j = strstrip(t);
+ if (!isempty(j)) {
+ char *d = NULL;
+
+ if (chkconfig_description)
+ d = strjoin(chkconfig_description, " ", j);
+ else
+ d = strdup(j);
+ if (!d)
+ return log_oom();
+
+ free(chkconfig_description);
+ chkconfig_description = d;
+ }
+
+ } else if (IN_SET(state, LSB, LSB_DESCRIPTION)) {
+
+ if (startswith_no_case(t, "Provides:")) {
+ state = LSB;
+
+ r = handle_provides(s, line, t, t + 9);
+ if (r < 0)
+ return r;
+
+ } else if (startswith_no_case(t, "Required-Start:") ||
+ startswith_no_case(t, "Should-Start:") ||
+ startswith_no_case(t, "X-Start-Before:") ||
+ startswith_no_case(t, "X-Start-After:")) {
+
+ state = LSB;
+
+ r = handle_dependencies(s, line, t, strchr(t, ':') + 1);
+ if (r < 0)
+ return r;
+
+ } else if (startswith_no_case(t, "Description:")) {
+ const char *j;
+
+ state = LSB_DESCRIPTION;
+
+ j = empty_to_null(strstrip(t+12));
+
+ r = free_and_strdup(&long_description, j);
+ if (r < 0)
+ return log_oom();
+
+ } else if (startswith_no_case(t, "Short-Description:")) {
+ const char *j;
+
+ state = LSB;
+
+ j = empty_to_null(strstrip(t+18));
+
+ r = free_and_strdup(&short_description, j);
+ if (r < 0)
+ return log_oom();
+
+ } else if (state == LSB_DESCRIPTION) {
+
+ if (startswith(l, "#\t") || startswith(l, "# ")) {
+ const char *j;
+
+ j = strstrip(t);
+ if (!isempty(j)) {
+ char *d = NULL;
+
+ if (long_description)
+ d = strjoin(long_description, " ", t);
+ else
+ d = strdup(j);
+ if (!d)
+ return log_oom();
+
+ free(long_description);
+ long_description = d;
+ }
+
+ } else
+ state = LSB;
+ }
+ }
+ }
+
+ s->reload = supports_reload;
+
+ /* We use the long description only if
+ * no short description is set. */
+
+ if (short_description)
+ description = short_description;
+ else if (chkconfig_description)
+ description = chkconfig_description;
+ else if (long_description)
+ description = long_description;
+ else
+ description = NULL;
+
+ if (description) {
+ char *d;
+
+ d = strappend(s->has_lsb ? "LSB: " : "SYSV: ", description);
+ if (!d)
+ return log_oom();
+
+ s->description = d;
+ }
+
+ s->loaded = true;
+ return 0;
+}
+
+static int fix_order(SysvStub *s, Hashmap *all_services) {
+ SysvStub *other;
+ Iterator j;
+ int r;
+
+ assert(s);
+
+ if (!s->loaded)
+ return 0;
+
+ if (s->sysv_start_priority < 0)
+ return 0;
+
+ HASHMAP_FOREACH(other, all_services, j) {
+ if (s == other)
+ continue;
+
+ if (!other->loaded)
+ continue;
+
+ if (other->sysv_start_priority < 0)
+ continue;
+
+ /* If both units have modern headers we don't care
+ * about the priorities */
+ if (s->has_lsb && other->has_lsb)
+ continue;
+
+ if (other->sysv_start_priority < s->sysv_start_priority) {
+ r = strv_extend(&s->after, other->name);
+ if (r < 0)
+ return log_oom();
+
+ } else if (other->sysv_start_priority > s->sysv_start_priority) {
+ r = strv_extend(&s->before, other->name);
+ if (r < 0)
+ return log_oom();
+ } else
+ continue;
+
+ /* FIXME: Maybe we should compare the name here lexicographically? */
+ }
+
+ return 0;
+}
+
+static int acquire_search_path(const char *def, const char *envvar, char ***ret) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char *e;
+ int r;
+
+ assert(def);
+ assert(envvar);
+
+ e = getenv(envvar);
+ if (e) {
+ r = path_split_and_make_absolute(e, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make $%s search path absolute: %m", envvar);
+ }
+
+ if (strv_isempty(l)) {
+ strv_free(l);
+
+ l = strv_new(def);
+ if (!l)
+ return log_oom();
+ }
+
+ if (!path_strv_resolve_uniq(l, NULL))
+ return log_oom();
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+}
+
+static int enumerate_sysv(const LookupPaths *lp, Hashmap *all_services) {
+ _cleanup_strv_free_ char **sysvinit_path = NULL;
+ char **path;
+ int r;
+
+ assert(lp);
+
+ r = acquire_search_path(SYSTEM_SYSVINIT_PATH, "SYSTEMD_SYSVINIT_PATH", &sysvinit_path);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(path, sysvinit_path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(*path);
+ if (!d) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Opening %s failed, ignoring: %m", *path);
+ continue;
+ }
+
+ FOREACH_DIRENT(de, d, log_error_errno(errno, "Failed to enumerate directory %s, ignoring: %m", *path)) {
+ _cleanup_free_ char *fpath = NULL, *name = NULL;
+ _cleanup_(free_sysvstubp) SysvStub *service = NULL;
+ struct stat st;
+
+ if (fstatat(dirfd(d), de->d_name, &st, 0) < 0) {
+ log_warning_errno(errno, "stat() failed on %s/%s, ignoring: %m", *path, de->d_name);
+ continue;
+ }
+
+ if (!(st.st_mode & S_IXUSR))
+ continue;
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ name = sysv_translate_name(de->d_name);
+ if (!name)
+ return log_oom();
+
+ if (hashmap_contains(all_services, name))
+ continue;
+
+ r = unit_file_exists(UNIT_FILE_SYSTEM, lp, name);
+ if (r < 0 && !IN_SET(r, -ELOOP, -ERFKILL, -EADDRNOTAVAIL)) {
+ log_debug_errno(r, "Failed to detect whether %s exists, skipping: %m", name);
+ continue;
+ } else if (r != 0) {
+ log_debug("Native unit for %s already exists, skipping.", name);
+ continue;
+ }
+
+ fpath = strjoin(*path, "/", de->d_name);
+ if (!fpath)
+ return log_oom();
+
+ service = new0(SysvStub, 1);
+ if (!service)
+ return log_oom();
+
+ service->sysv_start_priority = -1;
+ service->name = TAKE_PTR(name);
+ service->path = TAKE_PTR(fpath);
+
+ r = hashmap_put(all_services, service->name, service);
+ if (r < 0)
+ return log_oom();
+
+ service = NULL;
+ }
+ }
+
+ return 0;
+}
+
+static int set_dependencies_from_rcnd(const LookupPaths *lp, Hashmap *all_services) {
+ Set *runlevel_services[ELEMENTSOF(rcnd_table)] = {};
+ _cleanup_strv_free_ char **sysvrcnd_path = NULL;
+ SysvStub *service;
+ unsigned i;
+ Iterator j;
+ char **p;
+ int r;
+
+ assert(lp);
+
+ r = acquire_search_path(SYSTEM_SYSVRCND_PATH, "SYSTEMD_SYSVRCND_PATH", &sysvrcnd_path);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, sysvrcnd_path) {
+ for (i = 0; i < ELEMENTSOF(rcnd_table); i ++) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_free_ char *path = NULL;
+ struct dirent *de;
+
+ path = strjoin(*p, "/", rcnd_table[i].path);
+ if (!path) {
+ r = log_oom();
+ goto finish;
+ }
+
+ d = opendir(path);
+ if (!d) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Opening %s failed, ignoring: %m", path);
+
+ continue;
+ }
+
+ FOREACH_DIRENT(de, d, log_error_errno(errno, "Failed to enumerate directory %s, ignoring: %m", path)) {
+ _cleanup_free_ char *name = NULL, *fpath = NULL;
+ int a, b;
+
+ if (de->d_name[0] != 'S')
+ continue;
+
+ if (strlen(de->d_name) < 4)
+ continue;
+
+ a = undecchar(de->d_name[1]);
+ b = undecchar(de->d_name[2]);
+
+ if (a < 0 || b < 0)
+ continue;
+
+ fpath = strjoin(*p, "/", de->d_name);
+ if (!fpath) {
+ r = log_oom();
+ goto finish;
+ }
+
+ name = sysv_translate_name(de->d_name + 3);
+ if (!name) {
+ r = log_oom();
+ goto finish;
+ }
+
+ service = hashmap_get(all_services, name);
+ if (!service) {
+ log_debug("Ignoring %s symlink in %s, not generating %s.", de->d_name, rcnd_table[i].path, name);
+ continue;
+ }
+
+ service->sysv_start_priority = MAX(a*10 + b, service->sysv_start_priority);
+
+ r = set_ensure_allocated(&runlevel_services[i], NULL);
+ if (r < 0) {
+ log_oom();
+ goto finish;
+ }
+
+ r = set_put(runlevel_services[i], service);
+ if (r < 0) {
+ log_oom();
+ goto finish;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < ELEMENTSOF(rcnd_table); i ++)
+ SET_FOREACH(service, runlevel_services[i], j) {
+ r = strv_extend(&service->before, rcnd_table[i].target);
+ if (r < 0) {
+ log_oom();
+ goto finish;
+ }
+ r = strv_extend(&service->wanted_by, rcnd_table[i].target);
+ if (r < 0) {
+ log_oom();
+ goto finish;
+ }
+ }
+
+ r = 0;
+
+finish:
+ for (i = 0; i < ELEMENTSOF(rcnd_table); i++)
+ set_free(runlevel_services[i]);
+
+ return r;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ _cleanup_(free_sysvstub_hashmapp) Hashmap *all_services = NULL;
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+ SysvStub *service;
+ Iterator j;
+ int r;
+
+ assert_se(arg_dest = dest_late);
+
+ r = lookup_paths_init(&lp, UNIT_FILE_SYSTEM, LOOKUP_PATHS_EXCLUDE_GENERATED, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find lookup paths: %m");
+
+ all_services = hashmap_new(&string_hash_ops);
+ if (!all_services)
+ return log_oom();
+
+ r = enumerate_sysv(&lp, all_services);
+ if (r < 0)
+ return r;
+
+ r = set_dependencies_from_rcnd(&lp, all_services);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(service, all_services, j)
+ (void) load_sysv(service);
+
+ HASHMAP_FOREACH(service, all_services, j) {
+ (void) fix_order(service, all_services);
+ (void) generate_unit_file(service);
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/test/generate-sym-test.py b/src/test/generate-sym-test.py
new file mode 100755
index 0000000..357cce8
--- /dev/null
+++ b/src/test/generate-sym-test.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+import sys, re
+
+print('#include <stdio.h>')
+for header in sys.argv[2:]:
+ print('#include "{}"'.format(header.split('/')[-1]))
+
+print('''
+void* functions[] = {''')
+
+for line in open(sys.argv[1]):
+ match = re.search('^ +([a-zA-Z0-9_]+);', line)
+ if match:
+ print(' {},'.format(match.group(1)))
+
+print('''};
+
+int main(void) {
+ unsigned i;
+ for (i = 0; i < sizeof(functions)/sizeof(void*); i++)
+ printf("%p\\n", functions[i]);
+ return 0;
+}''')
diff --git a/src/test/meson.build b/src/test/meson.build
new file mode 100644
index 0000000..08026ea
--- /dev/null
+++ b/src/test/meson.build
@@ -0,0 +1,1067 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+awkscript = 'test-hashmap-ordered.awk'
+test_hashmap_ordered_c = custom_target(
+ 'test-hashmap-ordered.c',
+ input : [awkscript, 'test-hashmap-plain.c'],
+ output : 'test-hashmap-ordered.c',
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true,
+ build_by_default : want_tests != 'false')
+
+test_include_dir = include_directories('.')
+
+path = run_command('sh', ['-c', 'echo "$PATH"']).stdout()
+test_env = environment()
+test_env.set('SYSTEMD_KBD_MODEL_MAP', kbd_model_map)
+test_env.set('SYSTEMD_LANGUAGE_FALLBACK_MAP', language_fallback_map)
+test_env.set('PATH', path)
+test_env.prepend('PATH', meson.build_root())
+
+############################################################
+
+generate_sym_test_py = find_program('generate-sym-test.py')
+
+test_libsystemd_sym_c = custom_target(
+ 'test-libsystemd-sym.c',
+ input : [libsystemd_sym_path] + systemd_headers,
+ output : 'test-libsystemd-sym.c',
+ command : [generate_sym_test_py, libsystemd_sym_path] + systemd_headers,
+ capture : true,
+ build_by_default : want_tests != 'false')
+
+test_libudev_sym_c = custom_target(
+ 'test-libudev-sym.c',
+ input : [libudev_sym_path, libudev_h_path],
+ output : 'test-libudev-sym.c',
+ command : [generate_sym_test_py, '@INPUT0@', '@INPUT1@'],
+ capture : true,
+ build_by_default : want_tests != 'false')
+
+test_dlopen_c = files('test-dlopen.c')
+
+############################################################
+
+test_systemd_tmpfiles_py = find_program('test-systemd-tmpfiles.py')
+
+############################################################
+
+tests += [
+ [['src/test/test-device-nodes.c'],
+ [],
+ []],
+
+ [['src/test/test-engine.c',
+ 'src/test/test-helper.c'],
+ [libcore,
+ libudev,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-emergency-action.c'],
+ [libcore,
+ libshared],
+ []],
+
+ [['src/test/test-chown-rec.c'],
+ [libcore,
+ libshared],
+ []],
+
+ [['src/test/test-job-type.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-ns.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid],
+ '', 'manual'],
+
+ [['src/test/test-nscd-flush.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid],
+ '', 'manual'],
+
+ [['src/test/test-loopback.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-hostname.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid],
+ '', 'unsafe'],
+
+ [['src/test/test-dns-domain.c'],
+ [libcore,
+ libshared,
+ libsystemd_network],
+ []],
+
+ [['src/test/test-boot-timestamps.c'],
+ [],
+ [],
+ 'ENABLE_EFI'],
+
+ [['src/test/test-unit-name.c',
+ 'src/test/test-helper.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-unit-file.c',
+ 'src/test/test-helper.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-serialize.c'],
+ [],
+ []],
+
+ [['src/test/test-utf8.c'],
+ [],
+ []],
+
+ [['src/test/test-dev-setup.c'],
+ [],
+ []],
+
+ [['src/test/test-capability.c'],
+ [],
+ [libcap]],
+
+ [['src/test/test-async.c'],
+ [],
+ [],
+ '', 'timeout=120'],
+
+ [['src/test/test-locale-util.c'],
+ [],
+ []],
+
+ [['src/test/test-copy.c'],
+ [],
+ []],
+
+ [['src/test/test-static-destruct.c'],
+ [],
+ []],
+
+ [['src/test/test-sigbus.c'],
+ [],
+ []],
+
+ [['src/test/test-condition.c'],
+ [],
+ []],
+
+ [['src/test/test-fdset.c'],
+ [],
+ []],
+
+ [['src/test/test-fstab-util.c'],
+ [],
+ []],
+
+ [['src/test/test-random-util.c'],
+ [],
+ []],
+
+ [['src/test/test-format-table.c'],
+ [],
+ []],
+
+ [['src/test/test-ratelimit.c'],
+ [],
+ []],
+
+ [['src/test/test-util.c'],
+ [],
+ []],
+
+ [['src/test/test-json.c'],
+ [],
+ []],
+
+ [['src/test/test-mount-util.c'],
+ [],
+ []],
+
+ [['src/test/test-mountpoint-util.c'],
+ [],
+ []],
+
+ [['src/test/test-exec-util.c'],
+ [],
+ []],
+
+ [['src/test/test-hexdecoct.c'],
+ [],
+ []],
+
+ [['src/test/test-alloc-util.c'],
+ [],
+ []],
+
+ [['src/test/test-xattr-util.c'],
+ [],
+ []],
+
+ [['src/test/test-io-util.c'],
+ [],
+ []],
+
+ [['src/test/test-glob-util.c'],
+ [],
+ []],
+
+ [['src/test/test-fs-util.c'],
+ [],
+ []],
+
+ [['src/test/test-proc-cmdline.c'],
+ [],
+ []],
+
+ [['src/test/test-fd-util.c'],
+ [],
+ []],
+
+ [['src/test/test-web-util.c'],
+ [],
+ []],
+
+ [['src/test/test-cpu-set-util.c'],
+ [],
+ []],
+
+ [['src/test/test-stat-util.c'],
+ [],
+ []],
+
+ [['src/test/test-os-util.c'],
+ [],
+ []],
+
+ [['src/test/test-escape.c'],
+ [],
+ []],
+
+ [['src/test/test-specifier.c'],
+ [],
+ []],
+
+ [['src/test/test-string-util.c'],
+ [],
+ []],
+
+ [['src/test/test-extract-word.c'],
+ [],
+ []],
+
+ [['src/test/test-parse-util.c'],
+ [],
+ []],
+
+ [['src/test/test-user-util.c'],
+ [],
+ []],
+
+ [['src/test/test-hostname-util.c'],
+ [],
+ []],
+
+ [['src/test/test-process-util.c'],
+ [],
+ []],
+
+ [['src/test/test-terminal-util.c'],
+ [],
+ []],
+
+ [['src/test/test-path-lookup.c'],
+ [],
+ []],
+
+ [['src/test/test-pretty-print.c'],
+ [],
+ []],
+
+ [['src/test/test-uid-range.c'],
+ [],
+ []],
+
+ [['src/test/test-cap-list.c',
+ generated_gperf_headers],
+ [],
+ [libcap]],
+
+ [['src/test/test-socket-util.c'],
+ [],
+ []],
+
+ [['src/test/test-in-addr-util.c'],
+ [],
+ []],
+
+ [['src/test/test-barrier.c'],
+ [],
+ []],
+
+ [['src/test/test-tmpfiles.c'],
+ [],
+ []],
+
+ [['src/test/test-namespace.c'],
+ [libcore,
+ libshared],
+ [threads,
+ libblkid]],
+
+ [['src/test/test-verbs.c'],
+ [],
+ []],
+
+ [['src/test/test-install-root.c'],
+ [],
+ []],
+
+ [['src/test/test-acl-util.c'],
+ [],
+ [],
+ 'HAVE_ACL'],
+
+ [['src/test/test-seccomp.c'],
+ [],
+ [libseccomp],
+ 'HAVE_SECCOMP'],
+
+ [['src/test/test-rlimit-util.c'],
+ [],
+ []],
+
+ [['src/test/test-ask-password-api.c'],
+ [],
+ [],
+ '', 'manual'],
+
+ [['src/test/test-dissect-image.c'],
+ [],
+ [libblkid],
+ '', 'manual'],
+
+ [['src/test/test-signal-util.c'],
+ [],
+ []],
+
+ [['src/test/test-selinux.c'],
+ [],
+ []],
+
+ [['src/test/test-sizeof.c'],
+ [libbasic],
+ []],
+
+ [['src/test/test-bpf.c',
+ 'src/test/test-helper.c'],
+ [libcore,
+ libshared],
+ [libmount,
+ threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libblkid]],
+
+ [['src/test/test-watch-pid.c',
+ 'src/test/test-helper.c'],
+ [libcore,
+ libshared],
+ [libmount,
+ threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libblkid]],
+
+ [['src/test/test-hashmap.c',
+ 'src/test/test-hashmap-plain.c',
+ test_hashmap_ordered_c],
+ [],
+ [],
+ '', 'timeout=90'],
+
+ [['src/test/test-set.c'],
+ [],
+ []],
+
+ [['src/test/test-set-disable-mempool.c'],
+ [],
+ [threads]],
+
+ [['src/test/test-bitmap.c'],
+ [],
+ []],
+
+ [['src/test/test-xml.c'],
+ [],
+ []],
+
+ [['src/test/test-list.c'],
+ [],
+ []],
+
+ [['src/test/test-procfs-util.c'],
+ [],
+ []],
+
+ [['src/test/test-unaligned.c'],
+ [],
+ []],
+
+ [['src/test/test-tables.c',
+ 'src/shared/test-tables.h',
+ 'src/journal/journald-server.c',
+ 'src/journal/journald-server.h'],
+ [libcore,
+ libjournal_core,
+ libudev_core,
+ libudev_static,
+ libsystemd_network,
+ libshared],
+ [threads,
+ libseccomp,
+ libmount,
+ libxz,
+ liblz4,
+ libblkid],
+ '', '', [], libudev_core_includes],
+
+ [['src/test/test-prioq.c'],
+ [],
+ []],
+
+ [['src/test/test-fileio.c'],
+ [],
+ []],
+
+ [['src/test/test-time-util.c'],
+ [],
+ []],
+
+ [['src/test/test-clock.c'],
+ [],
+ []],
+
+ [['src/test/test-architecture.c'],
+ [],
+ []],
+
+ [['src/test/test-log.c'],
+ [],
+ []],
+
+ [['src/test/test-ipcrm.c'],
+ [],
+ [],
+ '', 'unsafe'],
+
+ [['src/test/test-btrfs.c'],
+ [],
+ [],
+ '', 'manual'],
+
+
+ [['src/test/test-firewall-util.c'],
+ [libshared],
+ [],
+ 'HAVE_LIBIPTC'],
+
+ [['src/test/test-netlink-manual.c'],
+ [],
+ [libkmod],
+ 'HAVE_KMOD', 'manual'],
+
+ [['src/test/test-ellipsize.c'],
+ [],
+ []],
+
+ [['src/test/test-date.c'],
+ [],
+ []],
+
+ [['src/test/test-sleep.c'],
+ [],
+ []],
+
+ [['src/test/test-replace-var.c'],
+ [],
+ []],
+
+ [['src/test/test-calendarspec.c'],
+ [],
+ []],
+
+ [['src/test/test-strip-tab-ansi.c'],
+ [],
+ []],
+
+ [['src/test/test-daemon.c'],
+ [],
+ []],
+
+ [['src/test/test-cgroup.c'],
+ [],
+ [],
+ '', 'manual'],
+
+ [['src/test/test-cgroup-mask.c',
+ 'src/test/test-helper.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-cgroup-util.c'],
+ [],
+ []],
+
+ [['src/test/test-env-util.c'],
+ [],
+ []],
+
+ [['src/test/test-strbuf.c'],
+ [],
+ []],
+
+ [['src/test/test-strv.c'],
+ [],
+ []],
+
+ [['src/test/test-path-util.c'],
+ [],
+ []],
+
+ [['src/test/test-path.c',
+ 'src/test/test-helper.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-execute.c',
+ 'src/test/test-helper.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid],
+ '', 'timeout=360'],
+
+ [['src/test/test-siphash24.c'],
+ [],
+ []],
+
+ [['src/test/test-strxcpyx.c'],
+ [],
+ []],
+
+ [['src/test/test-install.c'],
+ [libcore,
+ libshared],
+ [],
+ '', 'manual'],
+
+ [['src/test/test-watchdog.c'],
+ [],
+ []],
+
+ [['src/test/test-sched-prio.c',
+ 'src/test/test-helper.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-conf-files.c'],
+ [],
+ []],
+
+ [['src/test/test-conf-parser.c'],
+ [],
+ []],
+
+ [['src/test/test-af-list.c',
+ generated_gperf_headers],
+ [],
+ []],
+
+ [['src/test/test-arphrd-list.c',
+ generated_gperf_headers],
+ [],
+ []],
+
+ [['src/test/test-ip-protocol-list.c',
+ shared_generated_gperf_headers],
+ [],
+ []],
+
+ [['src/test/test-journal-importer.c'],
+ [],
+ []],
+
+ [['src/test/test-libudev.c'],
+ [libshared],
+ []],
+
+ [['src/test/test-udev.c'],
+ [libudev_core,
+ libudev_static,
+ libsystemd_network,
+ libshared],
+ [threads,
+ librt,
+ libblkid,
+ libkmod,
+ libacl],
+ '', 'manual', '-DLOG_REALM=LOG_REALM_UDEV'],
+
+ [['src/test/test-id128.c'],
+ [],
+ []],
+
+ [['src/test/test-hash.c'],
+ [],
+ []],
+
+ [['src/test/test-gcrypt-util.c'],
+ [],
+ [],
+ 'HAVE_GCRYPT'],
+
+ [['src/test/test-nss.c'],
+ [],
+ [libdl],
+ 'ENABLE_NSS', 'manual'],
+
+ [['src/test/test-umount.c',
+ 'src/core/mount-setup.c',
+ 'src/core/mount-setup.h',
+ 'src/core/umount.c',
+ 'src/core/umount.h'],
+ [],
+ [libmount]],
+
+ [['src/test/test-bus-util.c'],
+ [],
+ []],
+
+ [['src/test/test-sd-hwdb.c'],
+ [],
+ []],
+]
+
+############################################################
+
+# define some tests here, because the link_with deps were not defined earlier
+
+tests += [
+ [['src/journal/test-journal.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-send.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-syslog.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4,
+ libselinux]],
+
+ [['src/journal/test-journal-match.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-enum.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4],
+ '', 'timeout=360'],
+
+ [['src/journal/test-journal-stream.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-flush.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-init.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-config.c'],
+ [libjournal_core,
+ libshared],
+ [libxz,
+ liblz4,
+ libselinux]],
+
+ [['src/journal/test-journal-verify.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-interleaving.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-mmap-cache.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-catalog.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-compress.c'],
+ [libjournal_core,
+ libshared],
+ [liblz4,
+ libxz]],
+
+ [['src/journal/test-compress-benchmark.c'],
+ [libjournal_core,
+ libshared],
+ [liblz4,
+ libxz],
+ '', 'timeout=90'],
+
+ [['src/journal/test-audit-type.c'],
+ [libjournal_core,
+ libshared],
+ [liblz4,
+ libxz]],
+]
+
+############################################################
+
+tests += [
+ [['src/libsystemd/sd-bus/test-bus-address.c'],
+ [],
+ [threads]],
+
+ [['src/libsystemd/sd-bus/test-bus-marshal.c'],
+ [],
+ [threads,
+ libglib,
+ libgobject,
+ libgio,
+ libdbus]],
+
+ [['src/libsystemd/sd-bus/test-bus-signature.c'],
+ [],
+ [threads]],
+
+ [['src/libsystemd/sd-bus/test-bus-watch-bind.c'],
+ [],
+ [threads], '', 'timeout=120'],
+
+ [['src/libsystemd/sd-bus/test-bus-chat.c'],
+ [],
+ [threads]],
+
+ [['src/libsystemd/sd-bus/test-bus-cleanup.c'],
+ [],
+ [threads,
+ libseccomp]],
+
+ [['src/libsystemd/sd-bus/test-bus-error.c'],
+ [libshared_static,
+ libsystemd_static],
+ []],
+
+ [['src/libsystemd/sd-bus/test-bus-track.c'],
+ [],
+ [libseccomp]],
+
+ [['src/libsystemd/sd-bus/test-bus-server.c'],
+ [],
+ [threads]],
+
+ [['src/libsystemd/sd-bus/test-bus-objects.c'],
+ [],
+ [threads]],
+
+ [['src/libsystemd/sd-bus/test-bus-vtable.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-bus/test-bus-gvariant.c'],
+ [],
+ [libglib,
+ libgobject,
+ libgio]],
+
+ [['src/libsystemd/sd-bus/test-bus-creds.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-bus/test-bus-match.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-bus/test-bus-benchmark.c'],
+ [],
+ [threads],
+ '', 'manual'],
+
+ [['src/libsystemd/sd-bus/test-bus-introspect.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-event/test-event.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-netlink/test-netlink.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-netlink/test-local-addresses.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-resolve/test-resolve.c'],
+ [],
+ [threads],
+ '', 'timeout=120'],
+
+ [['src/libsystemd/sd-login/test-login.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-device/test-sd-device.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-device/test-sd-device-thread.c'],
+ [libbasic,
+ libshared_static,
+ libsystemd],
+ [threads]],
+
+ [['src/libsystemd/sd-device/test-udev-device-thread.c'],
+ [libbasic,
+ libshared_static,
+ libudev],
+ [threads]],
+
+ [['src/libsystemd/sd-device/test-sd-device-monitor.c'],
+ [],
+ []],
+
+]
+
+if cxx_cmd != ''
+ tests += [
+ [['src/libsystemd/sd-bus/test-bus-vtable-cc.cc'],
+ [],
+ []]
+ ]
+endif
+
+############################################################
+
+tests += [
+ [['src/libsystemd-network/test-dhcp-option.c',
+ 'src/libsystemd-network/dhcp-protocol.h',
+ 'src/libsystemd-network/dhcp-internal.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-sd-dhcp-lease.c',
+ 'src/libsystemd-network/dhcp-lease-internal.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-dhcp-client.c',
+ 'src/libsystemd-network/dhcp-protocol.h',
+ 'src/libsystemd-network/dhcp-internal.h',
+ 'src/systemd/sd-dhcp-client.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-dhcp-server.c'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-ipv4ll.c',
+ 'src/libsystemd-network/arp-util.h',
+ 'src/systemd/sd-ipv4ll.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-ipv4ll-manual.c',
+ 'src/systemd/sd-ipv4ll.h'],
+ [libshared,
+ libsystemd_network],
+ [],
+ '', 'manual'],
+
+ [['src/libsystemd-network/test-acd.c',
+ 'src/systemd/sd-ipv4acd.h'],
+ [libshared,
+ libsystemd_network],
+ [],
+ '', 'manual'],
+
+ [['src/libsystemd-network/test-ndisc-rs.c',
+ 'src/libsystemd-network/dhcp-identifier.h',
+ 'src/libsystemd-network/dhcp-identifier.c',
+ 'src/libsystemd-network/icmp6-util.h',
+ 'src/systemd/sd-dhcp6-client.h',
+ 'src/systemd/sd-ndisc.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-ndisc-ra.c',
+ 'src/libsystemd-network/icmp6-util.h',
+ 'src/systemd/sd-ndisc.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-dhcp6-client.c',
+ 'src/libsystemd-network/dhcp-identifier.h',
+ 'src/libsystemd-network/dhcp-identifier.c',
+ 'src/libsystemd-network/dhcp6-internal.h',
+ 'src/systemd/sd-dhcp6-client.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-lldp.c'],
+ [libshared,
+ libsystemd_network],
+ []],
+]
+
+############################################################
+
+tests += [
+ [['src/login/test-login-shared.c'],
+ [],
+ []],
+
+ [['src/login/test-inhibit.c'],
+ [],
+ [],
+ '', 'manual'],
+
+ [['src/login/test-login-tables.c'],
+ [liblogind_core,
+ libshared],
+ [threads]],
+]
diff --git a/src/test/test-acl-util.c b/src/test/test-acl-util.c
new file mode 100644
index 0000000..df87974
--- /dev/null
+++ b/src/test/test-acl-util.c
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "acl-util.h"
+#include "fd-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+
+static void test_add_acls_for_user(void) {
+ char fn[] = "/tmp/test-empty.XXXXXX";
+ _cleanup_close_ int fd = -1;
+ char *cmd;
+ uid_t uid;
+ int r;
+
+ fd = mkostemp_safe(fn);
+ assert_se(fd >= 0);
+
+ /* Use the mode that user journal files use */
+ assert_se(fchmod(fd, 0640) == 0);
+
+ cmd = strjoina("ls -l ", fn);
+ assert_se(system(cmd) == 0);
+
+ cmd = strjoina("getfacl -p ", fn);
+ assert_se(system(cmd) == 0);
+
+ if (getuid() == 0) {
+ const char *nobody = NOBODY_USER_NAME;
+ r = get_user_creds(&nobody, &uid, NULL, NULL, NULL, 0);
+ if (r < 0)
+ uid = 0;
+ } else
+ uid = getuid();
+
+ r = add_acls_for_user(fd, uid);
+ assert_se(r >= 0);
+
+ cmd = strjoina("ls -l ", fn);
+ assert_se(system(cmd) == 0);
+
+ cmd = strjoina("getfacl -p ", fn);
+ assert_se(system(cmd) == 0);
+
+ /* set the acls again */
+
+ r = add_acls_for_user(fd, uid);
+ assert_se(r >= 0);
+
+ cmd = strjoina("ls -l ", fn);
+ assert_se(system(cmd) == 0);
+
+ cmd = strjoina("getfacl -p ", fn);
+ assert_se(system(cmd) == 0);
+
+ unlink(fn);
+}
+
+int main(int argc, char **argv) {
+ test_add_acls_for_user();
+
+ return 0;
+}
diff --git a/src/test/test-af-list.c b/src/test/test-af-list.c
new file mode 100644
index 0000000..c8ef329
--- /dev/null
+++ b/src/test/test-af-list.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+#include <sys/socket.h>
+
+#include "macro.h"
+#include "string-util.h"
+#include "util.h"
+
+_unused_
+static const struct af_name* lookup_af(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "af-from-name.h"
+#include "af-list.h"
+#include "af-to-name.h"
+
+int main(int argc, const char *argv[]) {
+
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(af_names); i++) {
+ if (af_names[i]) {
+ assert_se(streq(af_to_name(i), af_names[i]));
+ assert_se(af_from_name(af_names[i]) == (int) i);
+ }
+ }
+
+ assert_se(af_to_name(af_max()) == NULL);
+ assert_se(af_to_name(-1) == NULL);
+ assert_se(af_from_name("huddlduddl") == -EINVAL);
+ assert_se(af_from_name("") == -EINVAL);
+
+ return 0;
+}
diff --git a/src/test/test-alloc-util.c b/src/test/test-alloc-util.c
new file mode 100644
index 0000000..40c32d7
--- /dev/null
+++ b/src/test/test-alloc-util.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdint.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "util.h"
+
+static void test_alloca(void) {
+ static const uint8_t zero[997] = { };
+ char *t;
+
+ t = alloca_align(17, 512);
+ assert_se(!((uintptr_t)t & 0xff));
+ memzero(t, 17);
+
+ t = alloca0_align(997, 1024);
+ assert_se(!((uintptr_t)t & 0x1ff));
+ assert_se(!memcmp(t, zero, 997));
+}
+
+static void test_memdup_multiply_and_greedy_realloc(void) {
+ int org[] = {1, 2, 3};
+ _cleanup_free_ int *dup;
+ int *p;
+ size_t i, allocated = 3;
+
+ dup = (int*) memdup_suffix0_multiply(org, sizeof(int), 3);
+ assert_se(dup);
+ assert_se(dup[0] == 1);
+ assert_se(dup[1] == 2);
+ assert_se(dup[2] == 3);
+ assert_se(*(uint8_t*) (dup + 3) == (uint8_t) 0);
+ free(dup);
+
+ dup = (int*) memdup_multiply(org, sizeof(int), 3);
+ assert_se(dup);
+ assert_se(dup[0] == 1);
+ assert_se(dup[1] == 2);
+ assert_se(dup[2] == 3);
+
+ p = dup;
+ assert_se(greedy_realloc0((void**) &dup, &allocated, 2, sizeof(int)) == p);
+
+ p = (int *) greedy_realloc0((void**) &dup, &allocated, 10, sizeof(int));
+ assert_se(p == dup);
+ assert_se(allocated >= 10);
+ assert_se(p[0] == 1);
+ assert_se(p[1] == 2);
+ assert_se(p[2] == 3);
+ for (i = 3; i < allocated; i++)
+ assert_se(p[i] == 0);
+}
+
+static void test_bool_assign(void) {
+ bool b, c, *cp = &c, d, e, f, g, h;
+
+ b = 123;
+ *cp = -11;
+ d = 0xF & 0xFF;
+ e = b & d;
+ f = 0x0;
+ g = cp; /* cast from pointer */
+ h = NULL; /* cast from pointer */
+
+ assert(b);
+ assert(c);
+ assert(d);
+ assert(e);
+ assert(!f);
+ assert(g);
+ assert(!h);
+}
+
+int main(int argc, char *argv[]) {
+ test_alloca();
+ test_memdup_multiply_and_greedy_realloc();
+ test_bool_assign();
+
+ return 0;
+}
diff --git a/src/test/test-architecture.c b/src/test/test-architecture.c
new file mode 100644
index 0000000..8c43bfc
--- /dev/null
+++ b/src/test/test-architecture.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "architecture.h"
+#include "log.h"
+#include "tests.h"
+#include "util.h"
+#include "virt.h"
+
+int main(int argc, char *argv[]) {
+ int a, v;
+ const char *p;
+
+ test_setup_logging(LOG_INFO);
+
+ assert_se(architecture_from_string("") < 0);
+ assert_se(architecture_from_string(NULL) < 0);
+ assert_se(architecture_from_string("hoge") < 0);
+ assert_se(architecture_to_string(-1) == NULL);
+ assert_se(architecture_from_string(architecture_to_string(0)) == 0);
+ assert_se(architecture_from_string(architecture_to_string(1)) == 1);
+
+ v = detect_virtualization();
+ if (IN_SET(v, -EPERM, -EACCES))
+ return log_tests_skipped("Cannot detect virtualization");
+
+ assert_se(v >= 0);
+
+ log_info("virtualization=%s id=%s",
+ VIRTUALIZATION_IS_CONTAINER(v) ? "container" :
+ VIRTUALIZATION_IS_VM(v) ? "vm" : "n/a",
+ virtualization_to_string(v));
+
+ a = uname_architecture();
+ assert_se(a >= 0);
+
+ p = architecture_to_string(a);
+ assert_se(p);
+ log_info("uname architecture=%s", p);
+ assert_se(architecture_from_string(p) == a);
+
+ a = native_architecture();
+ assert_se(a >= 0);
+
+ p = architecture_to_string(a);
+ assert_se(p);
+ log_info("native architecture=%s", p);
+ assert_se(architecture_from_string(p) == a);
+
+ log_info("primary library architecture=" LIB_ARCH_TUPLE);
+
+ return 0;
+}
diff --git a/src/test/test-arphrd-list.c b/src/test/test-arphrd-list.c
new file mode 100644
index 0000000..3005fc1
--- /dev/null
+++ b/src/test/test-arphrd-list.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if_arp.h>
+#include <string.h>
+
+#include "macro.h"
+#include "missing_network.h"
+#include "string-util.h"
+
+_unused_ \
+static const struct arphrd_name* lookup_arphrd(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "arphrd-from-name.h"
+#include "arphrd-list.h"
+#include "arphrd-to-name.h"
+
+int main(int argc, const char *argv[]) {
+
+ unsigned i;
+
+ for (i = 1; i < ELEMENTSOF(arphrd_names); i++) {
+ if (arphrd_names[i]) {
+ assert_se(streq(arphrd_to_name(i), arphrd_names[i]));
+ assert_se(arphrd_from_name(arphrd_names[i]) == (int) i);
+ }
+ }
+
+ assert_se(arphrd_to_name(arphrd_max()) == NULL);
+ assert_se(arphrd_to_name(0) == NULL);
+ assert_se(arphrd_from_name("huddlduddl") == -EINVAL);
+ assert_se(arphrd_from_name("") == -EINVAL);
+
+ return 0;
+}
diff --git a/src/test/test-ask-password-api.c b/src/test/test-ask-password-api.c
new file mode 100644
index 0000000..23b06be
--- /dev/null
+++ b/src/test/test-ask-password-api.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "ask-password-api.h"
+#include "log.h"
+#include "strv.h"
+
+static void ask_password(void) {
+ int r;
+ _cleanup_strv_free_ char **ret = NULL;
+
+ r = ask_password_tty(-1, "hello?", "da key", 0, 0, NULL, &ret);
+ assert(r >= 0);
+ assert(strv_length(ret) == 1);
+
+ log_info("Got %s", *ret);
+}
+
+int main(int argc, char **argv) {
+ log_parse_environment();
+
+ ask_password();
+}
diff --git a/src/test/test-async.c b/src/test/test-async.c
new file mode 100644
index 0000000..4f53078
--- /dev/null
+++ b/src/test/test-async.c
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "async.h"
+#include "macro.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static bool test_async = false;
+
+static void *async_func(void *arg) {
+ test_async = true;
+
+ return NULL;
+}
+
+int main(int argc, char *argv[]) {
+ int fd;
+ char name[] = "/tmp/test-asynchronous_close.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ asynchronous_close(fd);
+
+ assert_se(asynchronous_job(async_func, NULL) >= 0);
+
+ assert_se(asynchronous_sync(NULL) >= 0);
+
+ sleep(1);
+
+ assert_se(fcntl(fd, F_GETFD) == -1);
+ assert_se(test_async);
+
+ (void) unlink(name);
+
+ return 0;
+}
diff --git a/src/test/test-barrier.c b/src/test/test-barrier.c
new file mode 100644
index 0000000..6469129
--- /dev/null
+++ b/src/test/test-barrier.c
@@ -0,0 +1,463 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+/*
+ * IPC barrier tests
+ * These tests verify the correct behavior of the IPC Barrier implementation.
+ * Note that the tests use alarm-timers to verify dead-locks and timeouts. These
+ * might not work on slow machines where 20ms are too short to perform specific
+ * operations (though, very unlikely). In case that turns out true, we have to
+ * increase it at the slightly cost of lengthen test-duration on other machines.
+ */
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "barrier.h"
+#include "util.h"
+#include "tests.h"
+#include "virt.h"
+
+/* 20ms to test deadlocks; All timings use multiples of this constant as
+ * alarm/sleep timers. If this timeout is too small for slow machines to perform
+ * the requested operations, we have to increase it. On an i7 this works fine
+ * with 1ms base-time, so 20ms should be just fine for everyone. */
+#define BASE_TIME (20 * USEC_PER_MSEC)
+
+static void set_alarm(usec_t usecs) {
+ struct itimerval v = { };
+
+ timeval_store(&v.it_value, usecs);
+ assert_se(setitimer(ITIMER_REAL, &v, NULL) >= 0);
+}
+
+static void sleep_for(usec_t usecs) {
+ /* stupid usleep() might fail if >1000000 */
+ assert_se(usecs < USEC_PER_SEC);
+ usleep(usecs);
+}
+
+#define TEST_BARRIER(_FUNCTION, _CHILD_CODE, _WAIT_CHILD, _PARENT_CODE, _WAIT_PARENT) \
+ static void _FUNCTION(void) { \
+ Barrier b = BARRIER_NULL; \
+ pid_t pid1, pid2; \
+ \
+ assert_se(barrier_create(&b) >= 0); \
+ assert_se(b.me > 0); \
+ assert_se(b.them > 0); \
+ assert_se(b.pipe[0] > 0); \
+ assert_se(b.pipe[1] > 0); \
+ \
+ pid1 = fork(); \
+ assert_se(pid1 >= 0); \
+ if (pid1 == 0) { \
+ barrier_set_role(&b, BARRIER_CHILD); \
+ { _CHILD_CODE; } \
+ exit(42); \
+ } \
+ \
+ pid2 = fork(); \
+ assert_se(pid2 >= 0); \
+ if (pid2 == 0) { \
+ barrier_set_role(&b, BARRIER_PARENT); \
+ { _PARENT_CODE; } \
+ exit(42); \
+ } \
+ \
+ barrier_destroy(&b); \
+ set_alarm(999999); \
+ { _WAIT_CHILD; } \
+ { _WAIT_PARENT; } \
+ set_alarm(0); \
+ }
+
+#define TEST_BARRIER_WAIT_SUCCESS(_pid) \
+ ({ \
+ int pidr, status; \
+ pidr = waitpid(_pid, &status, 0); \
+ assert_se(pidr == _pid); \
+ assert_se(WIFEXITED(status)); \
+ assert_se(WEXITSTATUS(status) == 42); \
+ })
+
+#define TEST_BARRIER_WAIT_ALARM(_pid) \
+ ({ \
+ int pidr, status; \
+ pidr = waitpid(_pid, &status, 0); \
+ assert_se(pidr == _pid); \
+ assert_se(WIFSIGNALED(status)); \
+ assert_se(WTERMSIG(status) == SIGALRM); \
+ })
+
+/*
+ * Test basic sync points
+ * This places a barrier in both processes and waits synchronously for them.
+ * The timeout makes sure the sync works as expected. The sleep_for() on one side
+ * makes sure the exit of the parent does not overwrite previous barriers. Due
+ * to the sleep_for(), we know that the parent already exited, thus there's a
+ * pending HUP on the pipe. However, the barrier_sync() prefers reads on the
+ * eventfd, thus we can safely wait on the barrier.
+ */
+TEST_BARRIER(test_barrier_sync,
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ sleep_for(BASE_TIME * 2);
+ assert_se(barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next()
+ * This places a barrier in the parent and syncs on it. The child sleeps while
+ * the parent places the barrier and then waits for a barrier. The wait will
+ * succeed as the child hasn't read the parent's barrier, yet. The following
+ * barrier and sync synchronize the exit.
+ */
+TEST_BARRIER(test_barrier_wait_next,
+ ({
+ sleep_for(BASE_TIME);
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_wait_next(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME * 4);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next() multiple times
+ * This places two barriers in the parent and waits for the child to exit. The
+ * child sleeps 20ms so both barriers _should_ be in place. It then waits for
+ * the parent to place the next barrier twice. The first call will fetch both
+ * barriers and return. However, the second call will stall as the parent does
+ * not place a 3rd barrier (the sleep caught two barriers). wait_next() is does
+ * not look at barrier-links so this stall is expected. Thus this test times
+ * out.
+ */
+TEST_BARRIER(test_barrier_wait_next_twice,
+ ({
+ sleep_for(BASE_TIME);
+ set_alarm(BASE_TIME);
+ assert_se(barrier_wait_next(&b));
+ assert_se(barrier_wait_next(&b));
+ assert_se(0);
+ }),
+ TEST_BARRIER_WAIT_ALARM(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ sleep_for(BASE_TIME * 4);
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next() with local barriers
+ * This is the same as test_barrier_wait_next_twice, but places local barriers
+ * between both waits. This does not have any effect on the wait so it times out
+ * like the other test.
+ */
+TEST_BARRIER(test_barrier_wait_next_twice_local,
+ ({
+ sleep_for(BASE_TIME);
+ set_alarm(BASE_TIME);
+ assert_se(barrier_wait_next(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_wait_next(&b));
+ assert_se(0);
+ }),
+ TEST_BARRIER_WAIT_ALARM(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ sleep_for(BASE_TIME * 4);
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next() with sync_next()
+ * This is again the same as test_barrier_wait_next_twice but uses a
+ * synced wait as the second wait. This works just fine because the local state
+ * has no barriers placed, therefore, the remote is always in sync.
+ */
+TEST_BARRIER(test_barrier_wait_next_twice_sync,
+ ({
+ sleep_for(BASE_TIME);
+ set_alarm(BASE_TIME);
+ assert_se(barrier_wait_next(&b));
+ assert_se(barrier_sync_next(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next() with sync_next() and local barriers
+ * This is again the same as test_barrier_wait_next_twice_local but uses a
+ * synced wait as the second wait. This works just fine because the local state
+ * is in sync with the remote.
+ */
+TEST_BARRIER(test_barrier_wait_next_twice_local_sync,
+ ({
+ sleep_for(BASE_TIME);
+ set_alarm(BASE_TIME);
+ assert_se(barrier_wait_next(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync_next(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test sync_next() and sync()
+ * This tests sync_*() synchronizations and makes sure they work fine if the
+ * local state is behind the remote state.
+ */
+TEST_BARRIER(test_barrier_sync_next,
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_sync_next(&b));
+ assert_se(barrier_sync(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync_next(&b));
+ assert_se(barrier_sync_next(&b));
+ assert_se(barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ sleep_for(BASE_TIME);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test sync_next() and sync() with local barriers
+ * This tests timeouts if sync_*() is used if local barriers are placed but the
+ * remote didn't place any.
+ */
+TEST_BARRIER(test_barrier_sync_next_local,
+ ({
+ set_alarm(BASE_TIME);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync_next(&b));
+ assert_se(0);
+ }),
+ TEST_BARRIER_WAIT_ALARM(pid1),
+ ({
+ sleep_for(BASE_TIME * 2);
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test sync_next() and sync() with local barriers and abortion
+ * This is the same as test_barrier_sync_next_local but aborts the sync in the
+ * parent. Therefore, the sync_next() succeeds just fine due to the abortion.
+ */
+TEST_BARRIER(test_barrier_sync_next_local_abort,
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(!barrier_sync_next(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ assert_se(barrier_abort(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test matched wait_abortion()
+ * This runs wait_abortion() with remote abortion.
+ */
+TEST_BARRIER(test_barrier_wait_abortion,
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_wait_abortion(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ assert_se(barrier_abort(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test unmatched wait_abortion()
+ * This runs wait_abortion() without any remote abortion going on. It thus must
+ * timeout.
+ */
+TEST_BARRIER(test_barrier_wait_abortion_unmatched,
+ ({
+ set_alarm(BASE_TIME);
+ assert_se(barrier_wait_abortion(&b));
+ assert_se(0);
+ }),
+ TEST_BARRIER_WAIT_ALARM(pid1),
+ ({
+ sleep_for(BASE_TIME * 2);
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test matched wait_abortion() with local abortion
+ * This runs wait_abortion() with local and remote abortion.
+ */
+TEST_BARRIER(test_barrier_wait_abortion_local,
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_abort(&b));
+ assert_se(!barrier_wait_abortion(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ assert_se(barrier_abort(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test unmatched wait_abortion() with local abortion
+ * This runs wait_abortion() with only local abortion. This must time out.
+ */
+TEST_BARRIER(test_barrier_wait_abortion_local_unmatched,
+ ({
+ set_alarm(BASE_TIME);
+ assert_se(barrier_abort(&b));
+ assert_se(!barrier_wait_abortion(&b));
+ assert_se(0);
+ }),
+ TEST_BARRIER_WAIT_ALARM(pid1),
+ ({
+ sleep_for(BASE_TIME * 2);
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test child exit
+ * Place barrier and sync with the child. The child only exits()s, which should
+ * cause an implicit abortion and wake the parent.
+ */
+TEST_BARRIER(test_barrier_exit,
+ ({
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(!barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test child exit with sleep
+ * Same as test_barrier_exit but verifies the test really works due to the
+ * child-exit. We add a usleep() which triggers the alarm in the parent and
+ * causes the test to time out.
+ */
+TEST_BARRIER(test_barrier_no_exit,
+ ({
+ sleep_for(BASE_TIME * 2);
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME);
+ assert_se(barrier_place(&b));
+ assert_se(!barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_ALARM(pid2));
+
+/*
+ * Test pending exit against sync
+ * The parent places a barrier *and* exits. The 20ms wait in the child
+ * guarantees both are pending. However, our logic prefers pending barriers over
+ * pending exit-abortions (unlike normal abortions), thus the wait_next() must
+ * succeed, same for the sync_next() as our local barrier-count is smaller than
+ * the remote. Once we place a barrier our count is equal, so the sync still
+ * succeeds. Only if we place one more barrier, we're ahead of the remote, thus
+ * we will fail due to HUP on the pipe.
+ */
+TEST_BARRIER(test_barrier_pending_exit,
+ ({
+ set_alarm(BASE_TIME * 4);
+ sleep_for(BASE_TIME * 2);
+ assert_se(barrier_wait_next(&b));
+ assert_se(barrier_sync_next(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync_next(&b));
+ assert_se(barrier_place(&b));
+ assert_se(!barrier_sync_next(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ assert_se(barrier_place(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+int main(int argc, char *argv[]) {
+ int v;
+ test_setup_logging(LOG_INFO);
+
+ if (!slow_tests_enabled())
+ return log_tests_skipped("slow tests are disabled");
+
+ /*
+ * This test uses real-time alarms and sleeps to test for CPU races
+ * explicitly. This is highly fragile if your system is under load. We
+ * already increased the BASE_TIME value to make the tests more robust,
+ * but that just makes the test take significantly longer. Given the recent
+ * issues when running the test in a virtualized environments, limit it
+ * to bare metal machines only, to minimize false-positives in CIs.
+ */
+ v = detect_virtualization();
+ if (IN_SET(v, -EPERM, -EACCES))
+ return log_tests_skipped("Cannot detect virtualization");
+
+ if (v != VIRTUALIZATION_NONE)
+ return log_tests_skipped("This test requires a baremetal machine");
+
+ test_barrier_sync();
+ test_barrier_wait_next();
+ test_barrier_wait_next_twice();
+ test_barrier_wait_next_twice_sync();
+ test_barrier_wait_next_twice_local();
+ test_barrier_wait_next_twice_local_sync();
+ test_barrier_sync_next();
+ test_barrier_sync_next_local();
+ test_barrier_sync_next_local_abort();
+ test_barrier_wait_abortion();
+ test_barrier_wait_abortion_unmatched();
+ test_barrier_wait_abortion_local();
+ test_barrier_wait_abortion_local_unmatched();
+ test_barrier_exit();
+ test_barrier_no_exit();
+ test_barrier_pending_exit();
+
+ return 0;
+}
diff --git a/src/test/test-bitmap.c b/src/test/test-bitmap.c
new file mode 100644
index 0000000..f73d36e
--- /dev/null
+++ b/src/test/test-bitmap.c
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "bitmap.h"
+
+int main(int argc, const char *argv[]) {
+ _cleanup_bitmap_free_ Bitmap *b = NULL, *b2 = NULL;
+ Iterator it;
+ unsigned n = (unsigned) -1, i = 0;
+
+ b = bitmap_new();
+ assert_se(b);
+
+ assert_se(bitmap_ensure_allocated(&b) == 0);
+ bitmap_free(b);
+ b = NULL;
+ assert_se(bitmap_ensure_allocated(&b) == 0);
+
+ assert_se(bitmap_isset(b, 0) == false);
+ assert_se(bitmap_isset(b, 1) == false);
+ assert_se(bitmap_isset(b, 256) == false);
+ assert_se(bitmap_isclear(b) == true);
+
+ assert_se(bitmap_set(b, 0) == 0);
+ assert_se(bitmap_isset(b, 0) == true);
+ assert_se(bitmap_isclear(b) == false);
+ bitmap_unset(b, 0);
+ assert_se(bitmap_isset(b, 0) == false);
+ assert_se(bitmap_isclear(b) == true);
+
+ assert_se(bitmap_set(b, 1) == 0);
+ assert_se(bitmap_isset(b, 1) == true);
+ assert_se(bitmap_isclear(b) == false);
+ bitmap_unset(b, 1);
+ assert_se(bitmap_isset(b, 1) == false);
+ assert_se(bitmap_isclear(b) == true);
+
+ assert_se(bitmap_set(b, 256) == 0);
+ assert_se(bitmap_isset(b, 256) == true);
+ assert_se(bitmap_isclear(b) == false);
+ bitmap_unset(b, 256);
+ assert_se(bitmap_isset(b, 256) == false);
+ assert_se(bitmap_isclear(b) == true);
+
+ assert_se(bitmap_set(b, 32) == 0);
+ bitmap_unset(b, 0);
+ assert_se(bitmap_isset(b, 32) == true);
+ bitmap_unset(b, 32);
+
+ BITMAP_FOREACH(n, NULL, it)
+ assert_not_reached("NULL bitmap");
+
+ assert_se(bitmap_set(b, 0) == 0);
+ assert_se(bitmap_set(b, 1) == 0);
+ assert_se(bitmap_set(b, 256) == 0);
+
+ BITMAP_FOREACH(n, b, it) {
+ assert_se(n == i);
+ if (i == 0)
+ i = 1;
+ else if (i == 1)
+ i = 256;
+ else if (i == 256)
+ i = (unsigned) -1;
+ }
+
+ assert_se(i == (unsigned) -1);
+
+ i = 0;
+
+ BITMAP_FOREACH(n, b, it) {
+ assert_se(n == i);
+ if (i == 0)
+ i = 1;
+ else if (i == 1)
+ i = 256;
+ else if (i == 256)
+ i = (unsigned) -1;
+ }
+
+ assert_se(i == (unsigned) -1);
+
+ b2 = bitmap_copy(b);
+ assert_se(b2);
+ assert_se(bitmap_equal(b, b2) == true);
+ assert_se(bitmap_equal(b, b) == true);
+ assert_se(bitmap_equal(b, NULL) == false);
+ assert_se(bitmap_equal(NULL, b) == false);
+ assert_se(bitmap_equal(NULL, NULL) == true);
+
+ bitmap_clear(b);
+ assert_se(bitmap_isclear(b) == true);
+ assert_se(bitmap_equal(b, b2) == false);
+ bitmap_free(b2);
+ b2 = NULL;
+
+ assert_se(bitmap_set(b, (unsigned) -1) == -ERANGE);
+
+ bitmap_free(b);
+ b = NULL;
+ assert_se(bitmap_ensure_allocated(&b) == 0);
+ assert_se(bitmap_ensure_allocated(&b2) == 0);
+
+ assert_se(bitmap_equal(b, b2));
+ assert_se(bitmap_set(b, 0) == 0);
+ bitmap_unset(b, 0);
+ assert_se(bitmap_equal(b, b2));
+
+ assert_se(bitmap_set(b, 1) == 0);
+ bitmap_clear(b);
+ assert_se(bitmap_equal(b, b2));
+
+ assert_se(bitmap_set(b, 0) == 0);
+ assert_se(bitmap_set(b2, 0) == 0);
+ assert_se(bitmap_equal(b, b2));
+
+ return 0;
+}
diff --git a/src/test/test-boot-timestamps.c b/src/test/test-boot-timestamps.c
new file mode 100644
index 0000000..a79e0cf
--- /dev/null
+++ b/src/test/test-boot-timestamps.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "acpi-fpdt.h"
+#include "boot-timestamps.h"
+#include "efivars.h"
+#include "log.h"
+#include "tests.h"
+#include "util.h"
+
+static int test_acpi_fpdt(void) {
+ char ts_start[FORMAT_TIMESPAN_MAX], ts_exit[FORMAT_TIMESPAN_MAX], ts_span[FORMAT_TIMESPAN_MAX];
+ usec_t loader_start, loader_exit;
+ int r;
+
+ r = acpi_get_boot_usec(&loader_start, &loader_exit);
+ if (r < 0) {
+ bool ok = r == -ENOENT || (getuid() != 0 && r == -EACCES) || r == -ENODATA;
+
+ log_full_errno(ok ? LOG_DEBUG : LOG_ERR, r, "Failed to read ACPI FPDT: %m");
+ return ok ? 0 : r;
+ }
+
+ log_info("ACPI FPDT: loader start=%s exit=%s duration=%s",
+ format_timespan(ts_start, sizeof(ts_start), loader_start, USEC_PER_MSEC),
+ format_timespan(ts_exit, sizeof(ts_exit), loader_exit, USEC_PER_MSEC),
+ format_timespan(ts_span, sizeof(ts_span), loader_exit - loader_start, USEC_PER_MSEC));
+ return 1;
+}
+
+static int test_efi_loader(void) {
+ char ts_start[FORMAT_TIMESPAN_MAX], ts_exit[FORMAT_TIMESPAN_MAX], ts_span[FORMAT_TIMESPAN_MAX];
+ usec_t loader_start, loader_exit;
+ int r;
+
+ r = efi_loader_get_boot_usec(&loader_start, &loader_exit);
+ if (r < 0) {
+ bool ok = r == -ENOENT || (getuid() != 0 && r == -EACCES) || r == -EOPNOTSUPP;
+
+ log_full_errno(ok ? LOG_DEBUG : LOG_ERR, r, "Failed to read EFI loader data: %m");
+ return ok ? 0 : r;
+ }
+
+ log_info("EFI Loader: start=%s exit=%s duration=%s",
+ format_timespan(ts_start, sizeof(ts_start), loader_start, USEC_PER_MSEC),
+ format_timespan(ts_exit, sizeof(ts_exit), loader_exit, USEC_PER_MSEC),
+ format_timespan(ts_span, sizeof(ts_span), loader_exit - loader_start, USEC_PER_MSEC));
+ return 1;
+}
+
+static int test_boot_timestamps(void) {
+ char s[MAX(FORMAT_TIMESPAN_MAX, FORMAT_TIMESTAMP_MAX)];
+ dual_timestamp fw, l, k;
+ int r;
+
+ dual_timestamp_from_monotonic(&k, 0);
+
+ r = boot_timestamps(NULL, &fw, &l);
+ if (r < 0) {
+ bool ok = r == -ENOENT || (getuid() != 0 && r == -EACCES) || r == -EOPNOTSUPP;
+
+ log_full_errno(ok ? LOG_DEBUG : LOG_ERR, r, "Failed to read variables: %m");
+ return ok ? 0 : r;
+ }
+
+ log_info("Firmware began %s before kernel.", format_timespan(s, sizeof(s), fw.monotonic, 0));
+ log_info("Loader began %s before kernel.", format_timespan(s, sizeof(s), l.monotonic, 0));
+ log_info("Firmware began %s.", format_timestamp(s, sizeof(s), fw.realtime));
+ log_info("Loader began %s.", format_timestamp(s, sizeof(s), l.realtime));
+ log_info("Kernel began %s.", format_timestamp(s, sizeof(s), k.realtime));
+ return 1;
+}
+
+int main(int argc, char* argv[]) {
+ int p, q, r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ p = test_acpi_fpdt();
+ assert(p >= 0);
+ q = test_efi_loader();
+ assert(q >= 0);
+ r = test_boot_timestamps();
+ assert(r >= 0);
+
+ if (p == 0 && q == 0 && r == 0)
+ return log_tests_skipped("access to firmware variables not possible");
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-bpf.c b/src/test/test-bpf.c
new file mode 100644
index 0000000..cd8d68f
--- /dev/null
+++ b/src/test/test-bpf.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <linux/libbpf.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "bpf-firewall.h"
+#include "bpf-program.h"
+#include "load-fragment.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "service.h"
+#include "test-helper.h"
+#include "tests.h"
+#include "unit.h"
+
+/* We use the same limit here that PID 1 bumps RLIMIT_MEMLOCK to if it can */
+#define CAN_MEMLOCK_SIZE (64U*1024U*1024U)
+
+static bool can_memlock(void) {
+ void *p;
+ bool b;
+
+ /* Let's see if we can mlock() a larger blob of memory. BPF programs are charged against
+ * RLIMIT_MEMLOCK, hence let's first make sure we can lock memory at all, and skip the test if we
+ * cannot. Why not check RLIMIT_MEMLOCK explicitly? Because in container environments the
+ * RLIMIT_MEMLOCK value we see might not match the RLIMIT_MEMLOCK value actually in effect. */
+
+ p = mmap(NULL, CAN_MEMLOCK_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
+ if (p == MAP_FAILED)
+ return false;
+
+ b = mlock(p, CAN_MEMLOCK_SIZE) >= 0;
+ if (b)
+ assert_se(munlock(p, CAN_MEMLOCK_SIZE) >= 0);
+
+ assert_se(munmap(p, CAN_MEMLOCK_SIZE) >= 0);
+ return b;
+}
+
+int main(int argc, char *argv[]) {
+ struct bpf_insn exit_insn[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ CGroupContext *cc = NULL;
+ _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *u;
+ char log_buf[65535];
+ struct rlimit rl;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (is_run_on_travis_ci())
+ return log_tests_skipped("test-bpf fails on Travis CI: https://github.com/systemd/systemd/issues/9666");
+
+ assert_se(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0);
+ rl.rlim_cur = rl.rlim_max = MAX3(rl.rlim_cur, rl.rlim_max, CAN_MEMLOCK_SIZE);
+ (void) setrlimit(RLIMIT_MEMLOCK, &rl);
+
+ if (!can_memlock())
+ return log_tests_skipped("Can't use mlock(), skipping.");
+
+ r = enter_cgroup_subroot();
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ assert_se(set_unit_path(get_testdata_dir()) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
+ assert(r == 0);
+
+ r = bpf_program_add_instructions(p, exit_insn, ELEMENTSOF(exit_insn));
+ assert(r == 0);
+
+ if (getuid() != 0)
+ return log_tests_skipped("not running as root");
+
+ r = bpf_firewall_supported();
+ if (r == BPF_FIREWALL_UNSUPPORTED)
+ return log_tests_skipped("BPF firewalling not supported");
+ assert_se(r > 0);
+
+ if (r == BPF_FIREWALL_SUPPORTED_WITH_MULTI)
+ log_notice("BPF firewalling with BPF_F_ALLOW_MULTI supported. Yay!");
+ else
+ log_notice("BPF firewalling (though without BPF_F_ALLOW_MULTI) supported. Good.");
+
+ r = bpf_program_load_kernel(p, log_buf, ELEMENTSOF(log_buf));
+ assert(r >= 0);
+
+ p = bpf_program_unref(p);
+
+ /* The simple tests suceeded. Now let's try full unit-based use-case. */
+
+ assert_se(manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m) >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ assert_se(u = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(u, "foo.service") == 0);
+ assert_se(cc = unit_get_cgroup_context(u));
+ u->perpetual = true;
+
+ cc->ip_accounting = true;
+
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "10.0.1.0/24", &cc->ip_address_allow, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "127.0.0.2", &cc->ip_address_allow, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.3", &cc->ip_address_deny, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "10.0.3.2/24", &cc->ip_address_deny, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.1/25", &cc->ip_address_deny, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.4", &cc->ip_address_deny, NULL) == 0);
+
+ assert(cc->ip_address_allow);
+ assert(cc->ip_address_allow->items_next);
+ assert(!cc->ip_address_allow->items_next->items_next);
+
+ /* The deny list is defined redundantly, let's ensure it got properly reduced */
+ assert(cc->ip_address_deny);
+ assert(cc->ip_address_deny->items_next);
+ assert(!cc->ip_address_deny->items_next->items_next);
+
+ assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/bin/ping -c 1 127.0.0.2 -W 5", SERVICE(u)->exec_command, u) == 0);
+ assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/bin/ping -c 1 127.0.0.3 -W 5", SERVICE(u)->exec_command, u) == 0);
+
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]);
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next);
+ assert_se(!SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->command_next);
+
+ SERVICE(u)->type = SERVICE_ONESHOT;
+ u->load_state = UNIT_LOADED;
+
+ unit_dump(u, stdout, NULL);
+
+ r = bpf_firewall_compile(u);
+ if (IN_SET(r, -ENOTTY, -ENOSYS, -EPERM))
+ return log_tests_skipped("Kernel doesn't support the necessary bpf bits (masked out via seccomp?)");
+ assert_se(r >= 0);
+
+ assert(u->ip_bpf_ingress);
+ assert(u->ip_bpf_egress);
+
+ r = bpf_program_load_kernel(u->ip_bpf_ingress, log_buf, ELEMENTSOF(log_buf));
+
+ log_notice("log:");
+ log_notice("-------");
+ log_notice("%s", log_buf);
+ log_notice("-------");
+
+ assert(r >= 0);
+
+ r = bpf_program_load_kernel(u->ip_bpf_egress, log_buf, ELEMENTSOF(log_buf));
+
+ log_notice("log:");
+ log_notice("-------");
+ log_notice("%s", log_buf);
+ log_notice("-------");
+
+ assert(r >= 0);
+
+ assert_se(unit_start(u) >= 0);
+
+ while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED))
+ assert_se(sd_event_run(m->event, UINT64_MAX) >= 0);
+
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code == CLD_EXITED &&
+ SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status == EXIT_SUCCESS);
+
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.code != CLD_EXITED ||
+ SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.status != EXIT_SUCCESS);
+
+ return 0;
+}
diff --git a/src/test/test-btrfs.c b/src/test/test-btrfs.c
new file mode 100644
index 0000000..5e5638c
--- /dev/null
+++ b/src/test/test-btrfs.c
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+
+#include "btrfs-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ BtrfsQuotaInfo quota;
+ int r, fd;
+
+ fd = open("/", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ log_error_errno(errno, "Failed to open root directory: %m");
+ else {
+ char ts[FORMAT_TIMESTAMP_MAX], bs[FORMAT_BYTES_MAX];
+ BtrfsSubvolInfo info;
+
+ r = btrfs_subvol_get_info_fd(fd, 0, &info);
+ if (r < 0)
+ log_error_errno(r, "Failed to get subvolume info: %m");
+ else {
+ log_info("otime: %s", format_timestamp(ts, sizeof(ts), info.otime));
+ log_info("read-only (search): %s", yes_no(info.read_only));
+ }
+
+ r = btrfs_qgroup_get_quota_fd(fd, 0, &quota);
+ if (r < 0)
+ log_error_errno(r, "Failed to get quota info: %m");
+ else {
+ log_info("referenced: %s", strna(format_bytes(bs, sizeof(bs), quota.referenced)));
+ log_info("exclusive: %s", strna(format_bytes(bs, sizeof(bs), quota.exclusive)));
+ log_info("referenced_max: %s", strna(format_bytes(bs, sizeof(bs), quota.referenced_max)));
+ log_info("exclusive_max: %s", strna(format_bytes(bs, sizeof(bs), quota.exclusive_max)));
+ }
+
+ r = btrfs_subvol_get_read_only_fd(fd);
+ if (r < 0)
+ log_error_errno(r, "Failed to get read only flag: %m");
+ else
+ log_info("read-only (ioctl): %s", yes_no(r));
+
+ safe_close(fd);
+ }
+
+ r = btrfs_subvol_make("/xxxtest");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ r = write_string_file("/xxxtest/afile", "ljsadhfljasdkfhlkjdsfha", WRITE_STRING_FILE_CREATE);
+ if (r < 0)
+ log_error_errno(r, "Failed to write file: %m");
+
+ r = btrfs_subvol_snapshot("/xxxtest", "/xxxtest2", 0);
+ if (r < 0)
+ log_error_errno(r, "Failed to make snapshot: %m");
+
+ r = btrfs_subvol_snapshot("/xxxtest", "/xxxtest3", BTRFS_SNAPSHOT_READ_ONLY);
+ if (r < 0)
+ log_error_errno(r, "Failed to make snapshot: %m");
+
+ r = btrfs_subvol_remove("/xxxtest", BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ log_error_errno(r, "Failed to remove subvolume: %m");
+
+ r = btrfs_subvol_remove("/xxxtest2", BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ log_error_errno(r, "Failed to remove subvolume: %m");
+
+ r = btrfs_subvol_remove("/xxxtest3", BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ log_error_errno(r, "Failed to remove subvolume: %m");
+
+ r = btrfs_subvol_snapshot("/etc", "/etc2", BTRFS_SNAPSHOT_READ_ONLY|BTRFS_SNAPSHOT_FALLBACK_COPY);
+ if (r < 0)
+ log_error_errno(r, "Failed to make snapshot: %m");
+
+ r = btrfs_subvol_remove("/etc2", BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ log_error_errno(r, "Failed to remove subvolume: %m");
+
+ r = btrfs_subvol_make("/xxxrectest");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ r = btrfs_subvol_make("/xxxrectest/xxxrectest2");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ r = btrfs_subvol_make("/xxxrectest/xxxrectest3");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ r = btrfs_subvol_make("/xxxrectest/xxxrectest3/sub");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ if (mkdir("/xxxrectest/dir", 0755) < 0)
+ log_error_errno(errno, "Failed to make directory: %m");
+
+ r = btrfs_subvol_make("/xxxrectest/dir/xxxrectest4");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ if (mkdir("/xxxrectest/dir/xxxrectest4/dir", 0755) < 0)
+ log_error_errno(errno, "Failed to make directory: %m");
+
+ r = btrfs_subvol_make("/xxxrectest/dir/xxxrectest4/dir/xxxrectest5");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ if (mkdir("/xxxrectest/mnt", 0755) < 0)
+ log_error_errno(errno, "Failed to make directory: %m");
+
+ r = btrfs_subvol_snapshot("/xxxrectest", "/xxxrectest2", BTRFS_SNAPSHOT_RECURSIVE);
+ if (r < 0)
+ log_error_errno(r, "Failed to snapshot subvolume: %m");
+
+ r = btrfs_subvol_remove("/xxxrectest", BTRFS_REMOVE_QUOTA|BTRFS_REMOVE_RECURSIVE);
+ if (r < 0)
+ log_error_errno(r, "Failed to recursively remove subvolume: %m");
+
+ r = btrfs_subvol_remove("/xxxrectest2", BTRFS_REMOVE_QUOTA|BTRFS_REMOVE_RECURSIVE);
+ if (r < 0)
+ log_error_errno(r, "Failed to recursively remove subvolume: %m");
+
+ r = btrfs_subvol_make("/xxxquotatest");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ r = btrfs_subvol_auto_qgroup("/xxxquotatest", 0, true);
+ if (r < 0)
+ log_error_errno(r, "Failed to set up auto qgroup: %m");
+
+ r = btrfs_subvol_make("/xxxquotatest/beneath");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ r = btrfs_subvol_auto_qgroup("/xxxquotatest/beneath", 0, false);
+ if (r < 0)
+ log_error_errno(r, "Failed to set up auto qgroup: %m");
+
+ r = btrfs_qgroup_set_limit("/xxxquotatest/beneath", 0, 4ULL * 1024 * 1024 * 1024);
+ if (r < 0)
+ log_error_errno(r, "Failed to set up quota limit: %m");
+
+ r = btrfs_subvol_set_subtree_quota_limit("/xxxquotatest", 0, 5ULL * 1024 * 1024 * 1024);
+ if (r < 0)
+ log_error_errno(r, "Failed to set up quota limit: %m");
+
+ r = btrfs_subvol_snapshot("/xxxquotatest", "/xxxquotatest2", BTRFS_SNAPSHOT_RECURSIVE|BTRFS_SNAPSHOT_QUOTA);
+ if (r < 0)
+ log_error_errno(r, "Failed to setup snapshot: %m");
+
+ r = btrfs_qgroup_get_quota("/xxxquotatest2/beneath", 0, &quota);
+ if (r < 0)
+ log_error_errno(r, "Failed to query quota: %m");
+
+ assert_se(quota.referenced_max == 4ULL * 1024 * 1024 * 1024);
+
+ r = btrfs_subvol_get_subtree_quota("/xxxquotatest2", 0, &quota);
+ if (r < 0)
+ log_error_errno(r, "Failed to query quota: %m");
+
+ assert_se(quota.referenced_max == 5ULL * 1024 * 1024 * 1024);
+
+ r = btrfs_subvol_remove("/xxxquotatest", BTRFS_REMOVE_QUOTA|BTRFS_REMOVE_RECURSIVE);
+ if (r < 0)
+ log_error_errno(r, "Failed remove subvolume: %m");
+
+ r = btrfs_subvol_remove("/xxxquotatest2", BTRFS_REMOVE_QUOTA|BTRFS_REMOVE_RECURSIVE);
+ if (r < 0)
+ log_error_errno(r, "Failed remove subvolume: %m");
+
+ return 0;
+}
diff --git a/src/test/test-bus-util.c b/src/test/test-bus-util.c
new file mode 100644
index 0000000..a536608
--- /dev/null
+++ b/src/test/test-bus-util.c
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "bus-util.h"
+#include "log.h"
+#include "tests.h"
+
+static int callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ return 1;
+}
+
+static void destroy_callback(void *userdata) {
+ int *n_called = userdata;
+
+ (*n_called) ++;
+}
+
+static void test_destroy_callback(void) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ sd_bus_slot *slot = NULL;
+ sd_bus_destroy_t t;
+
+ int r, n_called = 0;
+
+ log_info("/* %s */", __func__);
+
+ r = bus_open_system_watch_bind_with_description(&bus, "test-bus");
+ if (r < 0) {
+ log_error_errno(r, "Failed to connect to bus: %m");
+ return;
+ }
+
+ r = sd_bus_request_name_async(bus, &slot, "org.freedesktop.systemd.test-bus-util", 0, callback, &n_called);
+ assert(r == 1);
+
+ assert_se(sd_bus_slot_get_destroy_callback(slot, NULL) == 0);
+ assert_se(sd_bus_slot_get_destroy_callback(slot, &t) == 0);
+
+ assert_se(sd_bus_slot_set_destroy_callback(slot, destroy_callback) == 0);
+ assert_se(sd_bus_slot_get_destroy_callback(slot, NULL) == 1);
+ assert_se(sd_bus_slot_get_destroy_callback(slot, &t) == 1);
+ assert_se(t == destroy_callback);
+
+ /* Force cleanup so we can look at n_called */
+ assert(n_called == 0);
+ sd_bus_slot_unref(slot);
+ assert(n_called == 1);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_destroy_callback();
+
+ return 0;
+}
diff --git a/src/test/test-calendarspec.c b/src/test/test-calendarspec.c
new file mode 100644
index 0000000..10c9f63
--- /dev/null
+++ b/src/test/test-calendarspec.c
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+
+#include "alloc-util.h"
+#include "calendarspec.h"
+#include "string-util.h"
+#include "util.h"
+
+static void test_one(const char *input, const char *output) {
+ CalendarSpec *c;
+ _cleanup_free_ char *p = NULL, *q = NULL;
+ usec_t u;
+ char buf[FORMAT_TIMESTAMP_MAX];
+ int r;
+
+ assert_se(calendar_spec_from_string(input, &c) >= 0);
+
+ assert_se(calendar_spec_to_string(c, &p) >= 0);
+ printf("\"%s\" → \"%s\"\n", input, p);
+
+ assert_se(streq(p, output));
+
+ u = now(CLOCK_REALTIME);
+ r = calendar_spec_next_usec(c, u, &u);
+ printf("Next: %s\n", r < 0 ? strerror(-r) : format_timestamp(buf, sizeof(buf), u));
+ calendar_spec_free(c);
+
+ assert_se(calendar_spec_from_string(p, &c) >= 0);
+ assert_se(calendar_spec_to_string(c, &q) >= 0);
+ calendar_spec_free(c);
+
+ assert_se(streq(q, p));
+}
+
+static void test_next(const char *input, const char *new_tz, usec_t after, usec_t expect) {
+ CalendarSpec *c;
+ usec_t u;
+ char *old_tz;
+ char buf[FORMAT_TIMESTAMP_MAX];
+ int r;
+
+ old_tz = getenv("TZ");
+ if (old_tz)
+ old_tz = strdupa(old_tz);
+
+ if (new_tz)
+ assert_se(setenv("TZ", new_tz, 1) >= 0);
+ else
+ assert_se(unsetenv("TZ") >= 0);
+ tzset();
+
+ assert_se(calendar_spec_from_string(input, &c) >= 0);
+
+ printf("\"%s\"\n", input);
+
+ u = after;
+ r = calendar_spec_next_usec(c, after, &u);
+ printf("At: %s\n", r < 0 ? strerror(-r) : format_timestamp_us(buf, sizeof buf, u));
+ if (expect != (usec_t)-1)
+ assert_se(r >= 0 && u == expect);
+ else
+ assert(r == -ENOENT);
+
+ calendar_spec_free(c);
+
+ if (old_tz)
+ assert_se(setenv("TZ", old_tz, 1) >= 0);
+ else
+ assert_se(unsetenv("TZ") >= 0);
+ tzset();
+}
+
+static void test_timestamp(void) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ _cleanup_free_ char *t = NULL;
+ CalendarSpec *c;
+ usec_t x, y;
+
+ /* Ensure that a timestamp is also a valid calendar specification. Convert forth and back */
+
+ x = now(CLOCK_REALTIME);
+
+ assert_se(format_timestamp_us(buf, sizeof(buf), x));
+ printf("%s\n", buf);
+ assert_se(calendar_spec_from_string(buf, &c) >= 0);
+ assert_se(calendar_spec_to_string(c, &t) >= 0);
+ calendar_spec_free(c);
+ printf("%s\n", t);
+
+ assert_se(parse_timestamp(t, &y) >= 0);
+ assert_se(y == x);
+}
+
+static void test_hourly_bug_4031(void) {
+ CalendarSpec *c;
+ usec_t n, u, w;
+ char buf[FORMAT_TIMESTAMP_MAX], zaf[FORMAT_TIMESTAMP_MAX];
+ int r;
+
+ assert_se(calendar_spec_from_string("hourly", &c) >= 0);
+ n = now(CLOCK_REALTIME);
+ assert_se((r = calendar_spec_next_usec(c, n, &u)) >= 0);
+
+ printf("Now: %s (%"PRIu64")\n", format_timestamp_us(buf, sizeof buf, n), n);
+ printf("Next hourly: %s (%"PRIu64")\n", r < 0 ? strerror(-r) : format_timestamp_us(buf, sizeof buf, u), u);
+
+ assert_se((r = calendar_spec_next_usec(c, u, &w)) >= 0);
+ printf("Next hourly: %s (%"PRIu64")\n", r < 0 ? strerror(-r) : format_timestamp_us(zaf, sizeof zaf, w), w);
+
+ assert_se(n < u);
+ assert_se(u <= n + USEC_PER_HOUR);
+ assert_se(u < w);
+ assert_se(w <= u + USEC_PER_HOUR);
+
+ calendar_spec_free(c);
+}
+
+int main(int argc, char* argv[]) {
+ CalendarSpec *c;
+
+ test_one("Sat,Thu,Mon-Wed,Sat-Sun", "Mon..Thu,Sat,Sun *-*-* 00:00:00");
+ test_one("Sat,Thu,Mon..Wed,Sat..Sun", "Mon..Thu,Sat,Sun *-*-* 00:00:00");
+ test_one("Mon,Sun 12-*-* 2,1:23", "Mon,Sun 2012-*-* 01,02:23:00");
+ test_one("Wed *-1", "Wed *-*-01 00:00:00");
+ test_one("Wed-Wed,Wed *-1", "Wed *-*-01 00:00:00");
+ test_one("Wed..Wed,Wed *-1", "Wed *-*-01 00:00:00");
+ test_one("Wed, 17:48", "Wed *-*-* 17:48:00");
+ test_one("Wednesday,", "Wed *-*-* 00:00:00");
+ test_one("Wed-Sat,Tue 12-10-15 1:2:3", "Tue..Sat 2012-10-15 01:02:03");
+ test_one("Wed..Sat,Tue 12-10-15 1:2:3", "Tue..Sat 2012-10-15 01:02:03");
+ test_one("*-*-7 0:0:0", "*-*-07 00:00:00");
+ test_one("10-15", "*-10-15 00:00:00");
+ test_one("monday *-12-* 17:00", "Mon *-12-* 17:00:00");
+ test_one("Mon,Fri *-*-3,1,2 *:30:45", "Mon,Fri *-*-01,02,03 *:30:45");
+ test_one("12,14,13,12:20,10,30", "*-*-* 12,13,14:10,20,30:00");
+ test_one("mon,fri *-1/2-1,3 *:30:45", "Mon,Fri *-01/2-01,03 *:30:45");
+ test_one("03-05 08:05:40", "*-03-05 08:05:40");
+ test_one("08:05:40", "*-*-* 08:05:40");
+ test_one("05:40", "*-*-* 05:40:00");
+ test_one("Sat,Sun 12-05 08:05:40", "Sat,Sun *-12-05 08:05:40");
+ test_one("Sat,Sun 08:05:40", "Sat,Sun *-*-* 08:05:40");
+ test_one("2003-03-05 05:40", "2003-03-05 05:40:00");
+ test_one("2003-03-05", "2003-03-05 00:00:00");
+ test_one("03-05", "*-03-05 00:00:00");
+ test_one("hourly", "*-*-* *:00:00");
+ test_one("daily", "*-*-* 00:00:00");
+ test_one("monthly", "*-*-01 00:00:00");
+ test_one("weekly", "Mon *-*-* 00:00:00");
+ test_one("minutely", "*-*-* *:*:00");
+ test_one("quarterly", "*-01,04,07,10-01 00:00:00");
+ test_one("semi-annually", "*-01,07-01 00:00:00");
+ test_one("annually", "*-01-01 00:00:00");
+ test_one("*:2/3", "*-*-* *:02/3:00");
+ test_one("2015-10-25 01:00:00 uTc", "2015-10-25 01:00:00 UTC");
+ test_one("2015-10-25 01:00:00 Asia/Vladivostok", "2015-10-25 01:00:00 Asia/Vladivostok");
+ test_one("weekly Pacific/Auckland", "Mon *-*-* 00:00:00 Pacific/Auckland");
+ test_one("2016-03-27 03:17:00.4200005", "2016-03-27 03:17:00.420001");
+ test_one("2016-03-27 03:17:00/0.42", "2016-03-27 03:17:00/0.420000");
+ test_one("9..11,13:00,30", "*-*-* 09..11,13:00,30:00");
+ test_one("1..3-1..3 1..3:1..3", "*-01..03-01..03 01..03:01..03:00");
+ test_one("00:00:1.125..2.125", "*-*-* 00:00:01.125000..02.125000");
+ test_one("00:00:1.0..3.8", "*-*-* 00:00:01..03");
+ test_one("00:00:01..03", "*-*-* 00:00:01..03");
+ test_one("00:00:01/2,02..03", "*-*-* 00:00:01/2,02..03");
+ test_one("*-*~1 Utc", "*-*~01 00:00:00 UTC");
+ test_one("*-*~05,3 ", "*-*~03,05 00:00:00");
+ test_one("*-*~* 00:00:00", "*-*-* 00:00:00");
+ test_one("Monday", "Mon *-*-* 00:00:00");
+ test_one("Monday *-*-*", "Mon *-*-* 00:00:00");
+ test_one("*-*-*", "*-*-* 00:00:00");
+ test_one("*:*:*", "*-*-* *:*:*");
+ test_one("*:*", "*-*-* *:*:00");
+ test_one("12:*", "*-*-* 12:*:00");
+ test_one("*:30", "*-*-* *:30:00");
+ test_one("93..00-*-*", "1993..2000-*-* 00:00:00");
+ test_one("00..07-*-*", "2000..2007-*-* 00:00:00");
+ test_one("*:20..39/5", "*-*-* *:20..35/5:00");
+ test_one("00:00:20..40/1", "*-*-* 00:00:20..40");
+ test_one("*~03/1,03..05", "*-*~03/1,03..05 00:00:00");
+ /* UNIX timestamps are always UTC */
+ test_one("@1493187147", "2017-04-26 06:12:27 UTC");
+ test_one("@1493187147 UTC", "2017-04-26 06:12:27 UTC");
+ test_one("@0", "1970-01-01 00:00:00 UTC");
+ test_one("@0 UTC", "1970-01-01 00:00:00 UTC");
+
+ test_next("2016-03-27 03:17:00", "", 12345, 1459048620000000);
+ test_next("2016-03-27 03:17:00", "CET", 12345, 1459041420000000);
+ test_next("2016-03-27 03:17:00", "EET", 12345, -1);
+ test_next("2016-03-27 03:17:00 UTC", NULL, 12345, 1459048620000000);
+ test_next("2016-03-27 03:17:00 UTC", "", 12345, 1459048620000000);
+ test_next("2016-03-27 03:17:00 UTC", "CET", 12345, 1459048620000000);
+ test_next("2016-03-27 03:17:00 UTC", "EET", 12345, 1459048620000000);
+ test_next("2016-03-27 03:17:00.420000001 UTC", "EET", 12345, 1459048620420000);
+ test_next("2016-03-27 03:17:00.4200005 UTC", "EET", 12345, 1459048620420001);
+ test_next("2015-11-13 09:11:23.42", "EET", 12345, 1447398683420000);
+ test_next("2015-11-13 09:11:23.42/1.77", "EET", 1447398683420000, 1447398685190000);
+ test_next("2015-11-13 09:11:23.42/1.77", "EET", 1447398683419999, 1447398683420000);
+ test_next("Sun 16:00:00", "CET", 1456041600123456, 1456066800000000);
+ test_next("*-04-31", "", 12345, -1);
+ test_next("2016-02~01 UTC", "", 12345, 1456704000000000);
+ test_next("Mon 2017-05~01..07 UTC", "", 12345, 1496016000000000);
+ test_next("Mon 2017-05~07/1 UTC", "", 12345, 1496016000000000);
+ test_next("2017-08-06 9,11,13,15,17:00 UTC", "", 1502029800000000, 1502031600000000);
+ test_next("2017-08-06 9..17/2:00 UTC", "", 1502029800000000, 1502031600000000);
+ test_next("2016-12-* 3..21/6:00 UTC", "", 1482613200000001, 1482634800000000);
+ test_next("2017-09-24 03:30:00 Pacific/Auckland", "", 12345, 1506177000000000);
+ // Due to daylight saving time - 2017-09-24 02:30:00 does not exist
+ test_next("2017-09-24 02:30:00 Pacific/Auckland", "", 12345, -1);
+ test_next("2017-04-02 02:30:00 Pacific/Auckland", "", 12345, 1491053400000000);
+ // Confirm that even though it's a time change here (backward) 02:30 happens only once
+ test_next("2017-04-02 02:30:00 Pacific/Auckland", "", 1491053400000000, -1);
+ test_next("2017-04-02 03:30:00 Pacific/Auckland", "", 12345, 1491060600000000);
+ // Confirm that timezones in the Spec work regardless of current timezone
+ test_next("2017-09-09 20:42:00 Pacific/Auckland", "", 12345, 1504946520000000);
+ test_next("2017-09-09 20:42:00 Pacific/Auckland", "EET", 12345, 1504946520000000);
+
+ assert_se(calendar_spec_from_string("test", &c) < 0);
+ assert_se(calendar_spec_from_string(" utc", &c) < 0);
+ assert_se(calendar_spec_from_string(" ", &c) < 0);
+ assert_se(calendar_spec_from_string("", &c) < 0);
+ assert_se(calendar_spec_from_string("7", &c) < 0);
+ assert_se(calendar_spec_from_string("121212:1:2", &c) < 0);
+ assert_se(calendar_spec_from_string("2000-03-05.23 00:00:00", &c) < 0);
+ assert_se(calendar_spec_from_string("2000-03-05 00:00.1:00", &c) < 0);
+ assert_se(calendar_spec_from_string("00:00:00/0.00000001", &c) < 0);
+ assert_se(calendar_spec_from_string("00:00:00.0..00.9", &c) < 0);
+ assert_se(calendar_spec_from_string("2016~11-22", &c) < 0);
+ assert_se(calendar_spec_from_string("*-*~5/5", &c) < 0);
+ assert_se(calendar_spec_from_string("Monday.. 12:00", &c) < 0);
+ assert_se(calendar_spec_from_string("Monday..", &c) < 0);
+ assert_se(calendar_spec_from_string("-00:+00/-5", &c) < 0);
+ assert_se(calendar_spec_from_string("00:+00/-5", &c) < 0);
+ assert_se(calendar_spec_from_string("2016- 11- 24 12: 30: 00", &c) < 0);
+ assert_se(calendar_spec_from_string("*~29", &c) < 0);
+ assert_se(calendar_spec_from_string("*~16..31", &c) < 0);
+ assert_se(calendar_spec_from_string("12..1/2-*", &c) < 0);
+ assert_se(calendar_spec_from_string("*:05..05", &c) < 0);
+ assert_se(calendar_spec_from_string("*:05..10/6", &c) < 0);
+ assert_se(calendar_spec_from_string("20/4:00", &c) < 0);
+ assert_se(calendar_spec_from_string("00:00/60", &c) < 0);
+ assert_se(calendar_spec_from_string("00:00:2300", &c) < 0);
+ assert_se(calendar_spec_from_string("00:00:18446744073709551615", &c) < 0);
+ assert_se(calendar_spec_from_string("@88588582097858858", &c) == -ERANGE);
+
+ test_timestamp();
+ test_hourly_bug_4031();
+
+ return 0;
+}
diff --git a/src/test/test-cap-list.c b/src/test/test-cap-list.c
new file mode 100644
index 0000000..de5fa72
--- /dev/null
+++ b/src/test/test-cap-list.c
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/prctl.h>
+
+#include "alloc-util.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "util.h"
+
+/* verify the capability parser */
+static void test_cap_list(void) {
+ int i;
+
+ assert_se(!capability_to_name(-1));
+ assert_se(!capability_to_name(capability_list_length()));
+
+ for (i = 0; i < capability_list_length(); i++) {
+ const char *n;
+
+ assert_se(n = capability_to_name(i));
+ assert_se(capability_from_name(n) == i);
+ printf("%s = %i\n", n, i);
+ }
+
+ assert_se(capability_from_name("asdfbsd") == -EINVAL);
+ assert_se(capability_from_name("CAP_AUDIT_READ") == CAP_AUDIT_READ);
+ assert_se(capability_from_name("cap_audit_read") == CAP_AUDIT_READ);
+ assert_se(capability_from_name("cAp_aUdIt_rEAd") == CAP_AUDIT_READ);
+ assert_se(capability_from_name("0") == 0);
+ assert_se(capability_from_name("15") == 15);
+ assert_se(capability_from_name("-1") == -EINVAL);
+
+ for (i = 0; i < capability_list_length(); i++) {
+ _cleanup_cap_free_charp_ char *a = NULL;
+ const char *b;
+ unsigned u;
+
+ assert_se(a = cap_to_name(i));
+
+ /* quit the loop as soon as libcap starts returning
+ * numeric ids, formatted as strings */
+ if (safe_atou(a, &u) >= 0)
+ break;
+
+ assert_se(b = capability_to_name(i));
+
+ printf("%s vs. %s\n", a, b);
+
+ assert_se(strcasecmp(a, b) == 0);
+ }
+}
+
+static void test_capability_set_one(uint64_t c, const char *t) {
+ _cleanup_free_ char *t1 = NULL;
+ uint64_t c1, c_masked = c & ((UINT64_C(1) << capability_list_length()) - 1);
+
+ assert_se(capability_set_to_string_alloc(c, &t1) == 0);
+ assert_se(streq(t1, t));
+
+ assert_se(capability_set_from_string(t1, &c1) == 0);
+ assert_se(c1 == c_masked);
+
+ free(t1);
+ assert_se(t1 = strjoin("'cap_chown cap_dac_override' \"cap_setgid cap_setuid\"", t,
+ " hogehoge foobar 12345 3.14 -3 ", t));
+ assert_se(capability_set_from_string(t1, &c1) == 0);
+ assert_se(c1 == c_masked);
+}
+
+static void test_capability_set(void) {
+ uint64_t c;
+
+ assert_se(capability_set_from_string(NULL, &c) == 0);
+ assert_se(c == 0);
+
+ assert_se(capability_set_from_string("", &c) == 0);
+ assert_se(c == 0);
+
+ assert_se(capability_set_from_string("0", &c) == 0);
+ assert_se(c == UINT64_C(1));
+
+ assert_se(capability_set_from_string("1", &c) == 0);
+ assert_se(c == UINT64_C(1) << 1);
+
+ assert_se(capability_set_from_string("0 1 2 3", &c) == 0);
+ assert_se(c == (UINT64_C(1) << 4) - 1);
+
+ test_capability_set_one(0, "");
+ test_capability_set_one(
+ UINT64_C(1) << CAP_DAC_OVERRIDE,
+ "cap_dac_override");
+ test_capability_set_one(
+ UINT64_C(1) << CAP_DAC_OVERRIDE |
+ UINT64_C(1) << capability_list_length(),
+ "cap_dac_override");
+ test_capability_set_one(
+ UINT64_C(1) << capability_list_length(), "");
+ test_capability_set_one(
+ UINT64_C(1) << CAP_CHOWN |
+ UINT64_C(1) << CAP_DAC_OVERRIDE |
+ UINT64_C(1) << CAP_DAC_READ_SEARCH |
+ UINT64_C(1) << CAP_FOWNER |
+ UINT64_C(1) << CAP_SETGID |
+ UINT64_C(1) << CAP_SETUID |
+ UINT64_C(1) << CAP_SYS_PTRACE |
+ UINT64_C(1) << CAP_SYS_ADMIN |
+ UINT64_C(1) << CAP_AUDIT_CONTROL |
+ UINT64_C(1) << CAP_MAC_OVERRIDE |
+ UINT64_C(1) << CAP_SYSLOG |
+ UINT64_C(1) << (capability_list_length() + 1),
+ "cap_chown cap_dac_override cap_dac_read_search cap_fowner "
+ "cap_setgid cap_setuid cap_sys_ptrace cap_sys_admin "
+ "cap_audit_control cap_mac_override cap_syslog");
+}
+
+int main(int argc, char *argv[]) {
+ test_cap_list();
+ test_capability_set();
+
+ return 0;
+}
diff --git a/src/test/test-capability.c b/src/test/test-capability.c
new file mode 100644
index 0000000..325d7c8
--- /dev/null
+++ b/src/test/test-capability.c
@@ -0,0 +1,245 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/in.h>
+#include <pwd.h>
+#include <sys/capability.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "missing_prctl.h"
+#include "parse-util.h"
+#include "tests.h"
+#include "util.h"
+
+static uid_t test_uid = -1;
+static gid_t test_gid = -1;
+
+#if HAS_FEATURE_ADDRESS_SANITIZER
+/* Keep CAP_SYS_PTRACE when running under Address Sanitizer */
+static const uint64_t test_flags = UINT64_C(1) << CAP_SYS_PTRACE;
+#else
+/* We keep CAP_DAC_OVERRIDE to avoid errors with gcov when doing test coverage */
+static const uint64_t test_flags = UINT64_C(1) << CAP_DAC_OVERRIDE;
+#endif
+
+/* verify cap_last_cap() against /proc/sys/kernel/cap_last_cap */
+static void test_last_cap_file(void) {
+ _cleanup_free_ char *content = NULL;
+ unsigned long val = 0;
+ int r;
+
+ r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content);
+ assert_se(r >= 0);
+
+ r = safe_atolu(content, &val);
+ assert_se(r >= 0);
+ assert_se(val != 0);
+ assert_se(val == cap_last_cap());
+}
+
+/* verify cap_last_cap() against syscall probing */
+static void test_last_cap_probe(void) {
+ unsigned long p = (unsigned long)CAP_LAST_CAP;
+
+ if (prctl(PR_CAPBSET_READ, p) < 0) {
+ for (p--; p > 0; p --)
+ if (prctl(PR_CAPBSET_READ, p) >= 0)
+ break;
+ } else {
+ for (;; p++)
+ if (prctl(PR_CAPBSET_READ, p+1) < 0)
+ break;
+ }
+
+ assert_se(p != 0);
+ assert_se(p == cap_last_cap());
+}
+
+static void fork_test(void (*test_func)(void)) {
+ pid_t pid = 0;
+
+ pid = fork();
+ assert_se(pid >= 0);
+ if (pid == 0) {
+ test_func();
+ exit(EXIT_SUCCESS);
+ } else if (pid > 0) {
+ int status;
+
+ assert_se(waitpid(pid, &status, 0) > 0);
+ assert_se(WIFEXITED(status) && WEXITSTATUS(status) == 0);
+ }
+}
+
+static void show_capabilities(void) {
+ cap_t caps;
+ char *text;
+
+ caps = cap_get_proc();
+ assert_se(caps);
+
+ text = cap_to_text(caps, NULL);
+ assert_se(text);
+
+ log_info("Capabilities:%s", text);
+ cap_free(caps);
+ cap_free(text);
+}
+
+static int setup_tests(bool *run_ambient) {
+ struct passwd *nobody;
+ int r;
+
+ nobody = getpwnam(NOBODY_USER_NAME);
+ if (!nobody)
+ return log_error_errno(errno, "Could not find nobody user: %m");
+
+ test_uid = nobody->pw_uid;
+ test_gid = nobody->pw_gid;
+
+ *run_ambient = false;
+
+ r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0);
+
+ /* There's support for PR_CAP_AMBIENT if the prctl() call
+ * succeeded or error code was something else than EINVAL. The
+ * EINVAL check should be good enough to rule out false
+ * positives. */
+
+ if (r >= 0 || errno != EINVAL)
+ *run_ambient = true;
+
+ return 0;
+}
+
+static void test_drop_privileges_keep_net_raw(void) {
+ int sock;
+
+ sock = socket(AF_INET, SOCK_RAW, IPPROTO_UDP);
+ assert_se(sock >= 0);
+ safe_close(sock);
+
+ assert_se(drop_privileges(test_uid, test_gid, test_flags | (1ULL << CAP_NET_RAW)) >= 0);
+ assert_se(getuid() == test_uid);
+ assert_se(getgid() == test_gid);
+ show_capabilities();
+
+ sock = socket(AF_INET, SOCK_RAW, IPPROTO_UDP);
+ assert_se(sock >= 0);
+ safe_close(sock);
+}
+
+static void test_drop_privileges_dontkeep_net_raw(void) {
+ int sock;
+
+ sock = socket(AF_INET, SOCK_RAW, IPPROTO_UDP);
+ assert_se(sock >= 0);
+ safe_close(sock);
+
+ assert_se(drop_privileges(test_uid, test_gid, test_flags) >= 0);
+ assert_se(getuid() == test_uid);
+ assert_se(getgid() == test_gid);
+ show_capabilities();
+
+ sock = socket(AF_INET, SOCK_RAW, IPPROTO_UDP);
+ assert_se(sock < 0);
+}
+
+static void test_drop_privileges_fail(void) {
+ assert_se(drop_privileges(test_uid, test_gid, test_flags) >= 0);
+ assert_se(getuid() == test_uid);
+ assert_se(getgid() == test_gid);
+
+ assert_se(drop_privileges(test_uid, test_gid, test_flags) < 0);
+ assert_se(drop_privileges(0, 0, test_flags) < 0);
+}
+
+static void test_drop_privileges(void) {
+ fork_test(test_drop_privileges_keep_net_raw);
+ fork_test(test_drop_privileges_dontkeep_net_raw);
+ fork_test(test_drop_privileges_fail);
+}
+
+static void test_have_effective_cap(void) {
+ assert_se(have_effective_cap(CAP_KILL));
+ assert_se(have_effective_cap(CAP_CHOWN));
+
+ assert_se(drop_privileges(test_uid, test_gid, test_flags | (1ULL << CAP_KILL)) >= 0);
+ assert_se(getuid() == test_uid);
+ assert_se(getgid() == test_gid);
+
+ assert_se(have_effective_cap(CAP_KILL));
+ assert_se(!have_effective_cap(CAP_CHOWN));
+}
+
+static void test_update_inherited_set(void) {
+ cap_t caps;
+ uint64_t set = 0;
+ cap_flag_value_t fv;
+
+ caps = cap_get_proc();
+ assert_se(caps);
+
+ set = (UINT64_C(1) << CAP_CHOWN);
+
+ assert_se(!capability_update_inherited_set(caps, set));
+ assert_se(!cap_get_flag(caps, CAP_CHOWN, CAP_INHERITABLE, &fv));
+ assert(fv == CAP_SET);
+
+ cap_free(caps);
+}
+
+static void test_set_ambient_caps(void) {
+ cap_t caps;
+ uint64_t set = 0;
+ cap_flag_value_t fv;
+
+ assert_se(prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) == 0);
+
+ set = (UINT64_C(1) << CAP_CHOWN);
+
+ assert_se(!capability_ambient_set_apply(set, true));
+
+ caps = cap_get_proc();
+ assert_se(!cap_get_flag(caps, CAP_CHOWN, CAP_INHERITABLE, &fv));
+ assert(fv == CAP_SET);
+ cap_free(caps);
+
+ assert_se(prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) == 1);
+}
+
+int main(int argc, char *argv[]) {
+ bool run_ambient;
+
+ test_setup_logging(LOG_INFO);
+
+ test_last_cap_file();
+ test_last_cap_probe();
+
+ log_info("have ambient caps: %s", yes_no(ambient_capabilities_supported()));
+
+ if (getuid() != 0)
+ return log_tests_skipped("not running as root");
+
+ if (setup_tests(&run_ambient) < 0)
+ return log_tests_skipped("setup failed");
+
+ show_capabilities();
+
+ test_drop_privileges();
+ test_update_inherited_set();
+
+ fork_test(test_have_effective_cap);
+
+ if (run_ambient)
+ fork_test(test_set_ambient_caps);
+
+ return 0;
+}
diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c
new file mode 100644
index 0000000..7f6c0c2
--- /dev/null
+++ b/src/test/test-cgroup-mask.c
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+
+#include "cgroup.h"
+#include "cgroup-util.h"
+#include "macro.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "test-helper.h"
+#include "tests.h"
+#include "unit.h"
+
+#define ASSERT_CGROUP_MASK(got, expected) \
+ log_cgroup_mask(got, expected); \
+ assert_se(got == expected)
+
+#define ASSERT_CGROUP_MASK_JOINED(got, expected) ASSERT_CGROUP_MASK(got, CGROUP_MASK_EXTEND_JOINED(expected))
+
+static void log_cgroup_mask(CGroupMask got, CGroupMask expected) {
+ _cleanup_free_ char *e_store = NULL, *g_store = NULL;
+
+ assert_se(cg_mask_to_string(expected, &e_store) >= 0);
+ log_info("Expected mask: %s\n", e_store);
+ assert_se(cg_mask_to_string(got, &g_store) >= 0);
+ log_info("Got mask: %s\n", g_store);
+}
+
+static int test_cgroup_mask(void) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *son, *daughter, *parent, *root, *grandchild, *parent_deep, *nomem_parent, *nomem_leaf;
+ int r;
+ CGroupMask cpu_accounting_mask = get_cpu_accounting_mask();
+
+ r = enter_cgroup_subroot();
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ /* Prepare the manager. */
+ assert_se(set_unit_path(get_testdata_dir()) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m);
+ if (IN_SET(r, -EPERM, -EACCES)) {
+ log_error_errno(r, "manager_new: %m");
+ return log_tests_skipped("cannot create manager");
+ }
+
+ assert_se(r >= 0);
+
+ /* Turn off all kinds of default accouning, so that we can
+ * verify the masks resulting of our configuration and nothing
+ * else. */
+ m->default_cpu_accounting =
+ m->default_memory_accounting =
+ m->default_blockio_accounting =
+ m->default_io_accounting =
+ m->default_tasks_accounting = false;
+ m->default_tasks_max = (uint64_t) -1;
+
+ assert_se(r >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ /* Load units and verify hierarchy. */
+ assert_se(manager_load_startable_unit_or_warn(m, "parent.slice", NULL, &parent) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "son.service", NULL, &son) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "daughter.service", NULL, &daughter) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "grandchild.service", NULL, &grandchild) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "parent-deep.slice", NULL, &parent_deep) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "nomem.slice", NULL, &nomem_parent) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "nomemleaf.service", NULL, &nomem_leaf) >= 0);
+ assert_se(UNIT_DEREF(son->slice) == parent);
+ assert_se(UNIT_DEREF(daughter->slice) == parent);
+ assert_se(UNIT_DEREF(parent_deep->slice) == parent);
+ assert_se(UNIT_DEREF(grandchild->slice) == parent_deep);
+ assert_se(UNIT_DEREF(nomem_leaf->slice) == nomem_parent);
+ root = UNIT_DEREF(parent->slice);
+ assert_se(UNIT_DEREF(nomem_parent->slice) == root);
+
+ /* Verify per-unit cgroups settings. */
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(son), CGROUP_MASK_CPU);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(daughter), cpu_accounting_mask);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(grandchild), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent_deep), CGROUP_MASK_MEMORY);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(nomem_parent), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(nomem_leaf), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(root), 0);
+
+ /* Verify aggregation of member masks */
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(son), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(daughter), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(grandchild), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent_deep), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(nomem_parent), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(nomem_leaf), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+
+ /* Verify aggregation of sibling masks. */
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(son), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(daughter), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(grandchild), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent_deep), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(nomem_parent), (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(nomem_leaf), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+
+ /* Verify aggregation of target masks. */
+ ASSERT_CGROUP_MASK(unit_get_target_mask(son), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(daughter), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(grandchild), 0);
+ ASSERT_CGROUP_MASK(unit_get_target_mask(parent_deep), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(nomem_parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(nomem_leaf), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_IO | CGROUP_MASK_BLKIO) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(root), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+
+ return 0;
+}
+
+static void test_cg_mask_to_string_one(CGroupMask mask, const char *t) {
+ _cleanup_free_ char *b = NULL;
+
+ assert_se(cg_mask_to_string(mask, &b) >= 0);
+ assert_se(streq_ptr(b, t));
+}
+
+static void test_cg_mask_to_string(void) {
+ test_cg_mask_to_string_one(0, NULL);
+ test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct io blkio memory devices pids bpf-firewall bpf-devices");
+ test_cg_mask_to_string_one(CGROUP_MASK_CPU, "cpu");
+ test_cg_mask_to_string_one(CGROUP_MASK_CPUACCT, "cpuacct");
+ test_cg_mask_to_string_one(CGROUP_MASK_IO, "io");
+ test_cg_mask_to_string_one(CGROUP_MASK_BLKIO, "blkio");
+ test_cg_mask_to_string_one(CGROUP_MASK_MEMORY, "memory");
+ test_cg_mask_to_string_one(CGROUP_MASK_DEVICES, "devices");
+ test_cg_mask_to_string_one(CGROUP_MASK_PIDS, "pids");
+ test_cg_mask_to_string_one(CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT, "cpu cpuacct");
+ test_cg_mask_to_string_one(CGROUP_MASK_CPU|CGROUP_MASK_PIDS, "cpu pids");
+ test_cg_mask_to_string_one(CGROUP_MASK_CPUACCT|CGROUP_MASK_PIDS, "cpuacct pids");
+ test_cg_mask_to_string_one(CGROUP_MASK_DEVICES|CGROUP_MASK_PIDS, "devices pids");
+ test_cg_mask_to_string_one(CGROUP_MASK_IO|CGROUP_MASK_BLKIO, "io blkio");
+}
+
+int main(int argc, char* argv[]) {
+ int rc = EXIT_SUCCESS;
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_cg_mask_to_string();
+ TEST_REQ_RUNNING_SYSTEMD(rc = test_cgroup_mask());
+
+ return rc;
+}
diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c
new file mode 100644
index 0000000..a3239d7
--- /dev/null
+++ b/src/test/test-cgroup-util.c
@@ -0,0 +1,487 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "build.h"
+#include "cgroup-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "test-helper.h"
+#include "tests.h"
+#include "user-util.h"
+#include "util.h"
+
+static void check_p_d_u(const char *path, int code, const char *result) {
+ _cleanup_free_ char *unit = NULL;
+ int r;
+
+ r = cg_path_decode_unit(path, &unit);
+ printf("%s: %s → %s %d expected %s %d\n", __func__, path, unit, r, result, code);
+ assert_se(r == code);
+ assert_se(streq_ptr(unit, result));
+}
+
+static void test_path_decode_unit(void) {
+ check_p_d_u("getty@tty2.service", 0, "getty@tty2.service");
+ check_p_d_u("getty@tty2.service/", 0, "getty@tty2.service");
+ check_p_d_u("getty@tty2.service/xxx", 0, "getty@tty2.service");
+ check_p_d_u("getty@.service/", -ENXIO, NULL);
+ check_p_d_u("getty@.service", -ENXIO, NULL);
+ check_p_d_u("getty.service", 0, "getty.service");
+ check_p_d_u("getty", -ENXIO, NULL);
+ check_p_d_u("getty/waldo", -ENXIO, NULL);
+ check_p_d_u("_cpu.service", 0, "cpu.service");
+}
+
+static void check_p_g_u(const char *path, int code, const char *result) {
+ _cleanup_free_ char *unit = NULL;
+ int r;
+
+ r = cg_path_get_unit(path, &unit);
+ printf("%s: %s → %s %d expected %s %d\n", __func__, path, unit, r, result, code);
+ assert_se(r == code);
+ assert_se(streq_ptr(unit, result));
+}
+
+static void test_path_get_unit(void) {
+ check_p_g_u("/system.slice/foobar.service/sdfdsaf", 0, "foobar.service");
+ check_p_g_u("/system.slice/getty@tty5.service", 0, "getty@tty5.service");
+ check_p_g_u("/system.slice/getty@tty5.service/aaa/bbb", 0, "getty@tty5.service");
+ check_p_g_u("/system.slice/getty@tty5.service/", 0, "getty@tty5.service");
+ check_p_g_u("/system.slice/getty@tty6.service/tty5", 0, "getty@tty6.service");
+ check_p_g_u("sadfdsafsda", -ENXIO, NULL);
+ check_p_g_u("/system.slice/getty####@tty6.service/xxx", -ENXIO, NULL);
+ check_p_g_u("/system.slice/system-waldo.slice/foobar.service/sdfdsaf", 0, "foobar.service");
+ check_p_g_u("/system.slice/system-waldo.slice/_cpu.service/sdfdsaf", 0, "cpu.service");
+ check_p_g_u("/user.slice/user-1000.slice/user@1000.service/server.service", 0, "user@1000.service");
+ check_p_g_u("/user.slice/user-1000.slice/user@.service/server.service", -ENXIO, NULL);
+}
+
+static void check_p_g_u_u(const char *path, int code, const char *result) {
+ _cleanup_free_ char *unit = NULL;
+ int r;
+
+ r = cg_path_get_user_unit(path, &unit);
+ printf("%s: %s → %s %d expected %s %d\n", __func__, path, unit, r, result, code);
+ assert_se(r == code);
+ assert_se(streq_ptr(unit, result));
+}
+
+static void test_path_get_user_unit(void) {
+ check_p_g_u_u("/user.slice/user-1000.slice/session-2.scope/foobar.service", 0, "foobar.service");
+ check_p_g_u_u("/user.slice/user-1000.slice/session-2.scope/waldo.slice/foobar.service", 0, "foobar.service");
+ check_p_g_u_u("/user.slice/user-1002.slice/session-2.scope/foobar.service/waldo", 0, "foobar.service");
+ check_p_g_u_u("/user.slice/user-1000.slice/session-2.scope/foobar.service/waldo/uuuux", 0, "foobar.service");
+ check_p_g_u_u("/user.slice/user-1000.slice/session-2.scope/waldo/waldo/uuuux", -ENXIO, NULL);
+ check_p_g_u_u("/user.slice/user-1000.slice/session-2.scope/foobar@pie.service/pa/po", 0, "foobar@pie.service");
+ check_p_g_u_u("/session-2.scope/foobar@pie.service/pa/po", 0, "foobar@pie.service");
+ check_p_g_u_u("/xyz.slice/xyz-waldo.slice/session-77.scope/foobar@pie.service/pa/po", 0, "foobar@pie.service");
+ check_p_g_u_u("/meh.service", -ENXIO, NULL);
+ check_p_g_u_u("/session-3.scope/_cpu.service", 0, "cpu.service");
+ check_p_g_u_u("/user.slice/user-1000.slice/user@1000.service/server.service", 0, "server.service");
+ check_p_g_u_u("/user.slice/user-1000.slice/user@1000.service/foobar.slice/foobar@pie.service", 0, "foobar@pie.service");
+ check_p_g_u_u("/user.slice/user-1000.slice/user@.service/server.service", -ENXIO, NULL);
+}
+
+static void check_p_g_s(const char *path, int code, const char *result) {
+ _cleanup_free_ char *s = NULL;
+
+ assert_se(cg_path_get_session(path, &s) == code);
+ assert_se(streq_ptr(s, result));
+}
+
+static void test_path_get_session(void) {
+ check_p_g_s("/user.slice/user-1000.slice/session-2.scope/foobar.service", 0, "2");
+ check_p_g_s("/session-3.scope", 0, "3");
+ check_p_g_s("/session-.scope", -ENXIO, NULL);
+ check_p_g_s("", -ENXIO, NULL);
+}
+
+static void check_p_g_o_u(const char *path, int code, uid_t result) {
+ uid_t uid = 0;
+
+ assert_se(cg_path_get_owner_uid(path, &uid) == code);
+ assert_se(uid == result);
+}
+
+static void test_path_get_owner_uid(void) {
+ check_p_g_o_u("/user.slice/user-1000.slice/session-2.scope/foobar.service", 0, 1000);
+ check_p_g_o_u("/user.slice/user-1006.slice", 0, 1006);
+ check_p_g_o_u("", -ENXIO, 0);
+}
+
+static void check_p_g_slice(const char *path, int code, const char *result) {
+ _cleanup_free_ char *s = NULL;
+
+ assert_se(cg_path_get_slice(path, &s) == code);
+ assert_se(streq_ptr(s, result));
+}
+
+static void test_path_get_slice(void) {
+ check_p_g_slice("/user.slice", 0, "user.slice");
+ check_p_g_slice("/foobar", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_slice("/user.slice/user-waldo.slice", 0, "user-waldo.slice");
+ check_p_g_slice("", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_slice("foobar", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_slice("foobar.slice", 0, "foobar.slice");
+ check_p_g_slice("foo.slice/foo-bar.slice/waldo.service", 0, "foo-bar.slice");
+}
+
+static void check_p_g_u_slice(const char *path, int code, const char *result) {
+ _cleanup_free_ char *s = NULL;
+
+ assert_se(cg_path_get_user_slice(path, &s) == code);
+ assert_se(streq_ptr(s, result));
+}
+
+static void test_path_get_user_slice(void) {
+ check_p_g_u_slice("/user.slice", -ENXIO, NULL);
+ check_p_g_u_slice("/foobar", -ENXIO, NULL);
+ check_p_g_u_slice("/user.slice/user-waldo.slice", -ENXIO, NULL);
+ check_p_g_u_slice("", -ENXIO, NULL);
+ check_p_g_u_slice("foobar", -ENXIO, NULL);
+ check_p_g_u_slice("foobar.slice", -ENXIO, NULL);
+ check_p_g_u_slice("foo.slice/foo-bar.slice/waldo.service", -ENXIO, NULL);
+
+ check_p_g_u_slice("foo.slice/foo-bar.slice/user@1000.service", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_u_slice("foo.slice/foo-bar.slice/user@1000.service/", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_u_slice("foo.slice/foo-bar.slice/user@1000.service///", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_u_slice("foo.slice/foo-bar.slice/user@1000.service/waldo.service", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_u_slice("foo.slice/foo-bar.slice/user@1000.service/piep.slice/foo.service", 0, "piep.slice");
+ check_p_g_u_slice("/foo.slice//foo-bar.slice/user@1000.service/piep.slice//piep-pap.slice//foo.service", 0, "piep-pap.slice");
+}
+
+static void test_get_paths(void) {
+ _cleanup_free_ char *a = NULL;
+
+ assert_se(cg_get_root_path(&a) >= 0);
+ log_info("Root = %s", a);
+}
+
+static void test_proc(void) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r;
+
+ d = opendir("/proc");
+ assert_se(d);
+
+ FOREACH_DIRENT(de, d, break) {
+ _cleanup_free_ char *path = NULL, *path_shifted = NULL, *session = NULL, *unit = NULL, *user_unit = NULL, *machine = NULL, *slice = NULL;
+ pid_t pid;
+ uid_t uid = UID_INVALID;
+
+ if (!IN_SET(de->d_type, DT_DIR, DT_UNKNOWN))
+ continue;
+
+ r = parse_pid(de->d_name, &pid);
+ if (r < 0)
+ continue;
+
+ if (is_kernel_thread(pid))
+ continue;
+
+ cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &path);
+ cg_pid_get_path_shifted(pid, NULL, &path_shifted);
+ cg_pid_get_owner_uid(pid, &uid);
+ cg_pid_get_session(pid, &session);
+ cg_pid_get_unit(pid, &unit);
+ cg_pid_get_user_unit(pid, &user_unit);
+ cg_pid_get_machine_name(pid, &machine);
+ cg_pid_get_slice(pid, &slice);
+
+ printf(PID_FMT"\t%s\t%s\t"UID_FMT"\t%s\t%s\t%s\t%s\t%s\n",
+ pid,
+ path,
+ path_shifted,
+ uid,
+ session,
+ unit,
+ user_unit,
+ machine,
+ slice);
+ }
+}
+
+static void test_escape_one(const char *s, const char *r) {
+ _cleanup_free_ char *b;
+
+ b = cg_escape(s);
+ assert_se(b);
+ assert_se(streq(b, r));
+
+ assert_se(streq(cg_unescape(b), s));
+}
+
+static void test_escape(void) {
+ test_escape_one("foobar", "foobar");
+ test_escape_one(".foobar", "_.foobar");
+ test_escape_one("foobar.service", "foobar.service");
+ test_escape_one("cgroup.service", "_cgroup.service");
+ test_escape_one("tasks", "_tasks");
+ if (access("/sys/fs/cgroup/cpu", F_OK) == 0)
+ test_escape_one("cpu.service", "_cpu.service");
+ test_escape_one("_foobar", "__foobar");
+ test_escape_one("", "_");
+ test_escape_one("_", "__");
+ test_escape_one(".", "_.");
+}
+
+static void test_controller_is_valid(void) {
+ assert_se(cg_controller_is_valid("foobar"));
+ assert_se(cg_controller_is_valid("foo_bar"));
+ assert_se(cg_controller_is_valid("name=foo"));
+ assert_se(!cg_controller_is_valid(""));
+ assert_se(!cg_controller_is_valid("name="));
+ assert_se(!cg_controller_is_valid("="));
+ assert_se(!cg_controller_is_valid("cpu,cpuacct"));
+ assert_se(!cg_controller_is_valid("_"));
+ assert_se(!cg_controller_is_valid("_foobar"));
+ assert_se(!cg_controller_is_valid("tatü"));
+}
+
+static void test_slice_to_path_one(const char *unit, const char *path, int error) {
+ _cleanup_free_ char *ret = NULL;
+ int r;
+
+ log_info("unit: %s", unit);
+
+ r = cg_slice_to_path(unit, &ret);
+ log_info("actual: %s / %d", strnull(ret), r);
+ log_info("expect: %s / %d", strnull(path), error);
+ assert_se(r == error);
+ assert_se(streq_ptr(ret, path));
+}
+
+static void test_slice_to_path(void) {
+ test_slice_to_path_one("foobar.slice", "foobar.slice", 0);
+ test_slice_to_path_one("foobar-waldo.slice", "foobar.slice/foobar-waldo.slice", 0);
+ test_slice_to_path_one("foobar-waldo.service", NULL, -EINVAL);
+ test_slice_to_path_one(SPECIAL_ROOT_SLICE, "", 0);
+ test_slice_to_path_one("--.slice", NULL, -EINVAL);
+ test_slice_to_path_one("-", NULL, -EINVAL);
+ test_slice_to_path_one("-foo-.slice", NULL, -EINVAL);
+ test_slice_to_path_one("-foo.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foo-.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foo--bar.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foo.slice/foo--bar.slice", NULL, -EINVAL);
+ test_slice_to_path_one("a-b.slice", "a.slice/a-b.slice", 0);
+ test_slice_to_path_one("a-b-c-d-e.slice", "a.slice/a-b.slice/a-b-c.slice/a-b-c-d.slice/a-b-c-d-e.slice", 0);
+
+ test_slice_to_path_one("foobar@.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foobar@waldo.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foobar@waldo.service", NULL, -EINVAL);
+ test_slice_to_path_one("-foo@-.slice", NULL, -EINVAL);
+ test_slice_to_path_one("-foo@.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foo@-.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foo@@bar.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foo.slice/foo@@bar.slice", NULL, -EINVAL);
+}
+
+static void test_shift_path_one(const char *raw, const char *root, const char *shifted) {
+ const char *s = NULL;
+
+ assert_se(cg_shift_path(raw, root, &s) >= 0);
+ assert_se(streq(s, shifted));
+}
+
+static void test_shift_path(void) {
+
+ test_shift_path_one("/foobar/waldo", "/", "/foobar/waldo");
+ test_shift_path_one("/foobar/waldo", "", "/foobar/waldo");
+ test_shift_path_one("/foobar/waldo", "/foobar", "/waldo");
+ test_shift_path_one("/foobar/waldo", "/hogehoge", "/foobar/waldo");
+}
+
+static void test_mask_supported(void) {
+
+ CGroupMask m;
+ CGroupController c;
+
+ assert_se(cg_mask_supported(&m) >= 0);
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
+ printf("'%s' is supported: %s\n", cgroup_controller_to_string(c), yes_no(m & CGROUP_CONTROLLER_TO_MASK(c)));
+}
+
+static void test_is_cgroup_fs(void) {
+ struct statfs sfs;
+ assert_se(statfs("/sys/fs/cgroup", &sfs) == 0);
+ if (is_temporary_fs(&sfs))
+ assert_se(statfs("/sys/fs/cgroup/systemd", &sfs) == 0);
+ assert_se(is_cgroup_fs(&sfs));
+}
+
+static void test_fd_is_cgroup_fs(void) {
+ int fd;
+
+ fd = open("/sys/fs/cgroup", O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ assert_se(fd >= 0);
+ if (fd_is_temporary_fs(fd)) {
+ fd = safe_close(fd);
+ fd = open("/sys/fs/cgroup/systemd", O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ assert_se(fd >= 0);
+ }
+ assert_se(fd_is_cgroup_fs(fd));
+ fd = safe_close(fd);
+}
+
+static void test_is_wanted_print(bool header) {
+ _cleanup_free_ char *cmdline = NULL;
+
+ log_info("-- %s --", __func__);
+ assert_se(proc_cmdline(&cmdline) >= 0);
+ log_info("cmdline: %s", cmdline);
+ if (header) {
+
+ log_info(_CGROUP_HIEARCHY_);
+ (void) system("findmnt -n /sys/fs/cgroup");
+ }
+
+ log_info("is_unified_wanted() → %s", yes_no(cg_is_unified_wanted()));
+ log_info("is_hybrid_wanted() → %s", yes_no(cg_is_hybrid_wanted()));
+ log_info("is_legacy_wanted() → %s", yes_no(cg_is_legacy_wanted()));
+ log_info(" ");
+}
+
+static void test_is_wanted(void) {
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy=0", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy=0 "
+ "systemd.legacy_systemd_cgroup_controller", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy=0 "
+ "systemd.legacy_systemd_cgroup_controller=0", 1) >= 0);
+ test_is_wanted_print(false);
+
+ /* cgroup_no_v1=all implies unified cgroup hierarchy, unless otherwise
+ * explicitly specified. */
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "cgroup_no_v1=all", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "cgroup_no_v1=all "
+ "systemd.unified_cgroup_hierarchy=0", 1) >= 0);
+ test_is_wanted_print(false);
+}
+
+static void test_cg_tests(void) {
+ int all, hybrid, systemd, r;
+
+ r = cg_unified_flush();
+ if (r == -ENOMEDIUM) {
+ log_notice_errno(r, "Skipping cg hierarchy tests: %m");
+ return;
+ }
+ assert_se(r == 0);
+
+ all = cg_all_unified();
+ assert_se(IN_SET(all, 0, 1));
+
+ hybrid = cg_hybrid_unified();
+ assert_se(IN_SET(hybrid, 0, 1));
+
+ systemd = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ assert_se(IN_SET(systemd, 0, 1));
+
+ if (all) {
+ assert_se(systemd);
+ assert_se(!hybrid);
+
+ } else if (hybrid) {
+ assert_se(systemd);
+ assert_se(!all);
+
+ } else
+ assert_se(!systemd);
+}
+
+static void test_cg_get_keyed_attribute(void) {
+ _cleanup_free_ char *val = NULL;
+ char *vals3[3] = {}, *vals3a[3] = {};
+ int i, r;
+
+ r = cg_get_keyed_attribute("cpu", "/init.scope", "no_such_file", STRV_MAKE("no_such_attr"), &val);
+ if (r == -ENOMEDIUM) {
+ log_info_errno(r, "Skipping most of %s, /sys/fs/cgroup not accessible: %m", __func__);
+ return;
+ }
+
+ assert_se(r == -ENOENT);
+ assert_se(val == NULL);
+
+ if (access("/sys/fs/cgroup/init.scope/cpu.stat", R_OK) < 0) {
+ log_info_errno(errno, "Skipping most of %s, /init.scope/cpu.stat not accessible: %m", __func__);
+ return;
+ }
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("no_such_attr"), &val) == -ENXIO);
+ assert_se(val == NULL);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec"), &val) == 0);
+ log_info("cpu /init.scope cpu.stat [usage_usec] → \"%s\"", val);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "no_such_attr"), vals3) == -ENXIO);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "usage_usec"), vals3) == -ENXIO);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat",
+ STRV_MAKE("usage_usec", "user_usec", "system_usec"), vals3) == 0);
+ log_info("cpu /init.scope cpu.stat [usage_usec user_usec system_usec] → \"%s\", \"%s\", \"%s\"",
+ vals3[0], vals3[1], vals3[2]);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat",
+ STRV_MAKE("system_usec", "user_usec", "usage_usec"), vals3a) == 0);
+ log_info("cpu /init.scope cpu.stat [system_usec user_usec usage_usec] → \"%s\", \"%s\", \"%s\"",
+ vals3a[0], vals3a[1], vals3a[2]);
+
+ for (i = 0; i < 3; i++) {
+ free(vals3[i]);
+ free(vals3a[i]);
+ }
+}
+
+int main(void) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_path_decode_unit();
+ test_path_get_unit();
+ test_path_get_user_unit();
+ test_path_get_session();
+ test_path_get_owner_uid();
+ test_path_get_slice();
+ test_path_get_user_slice();
+ TEST_REQ_RUNNING_SYSTEMD(test_get_paths());
+ test_proc();
+ TEST_REQ_RUNNING_SYSTEMD(test_escape());
+ test_controller_is_valid();
+ test_slice_to_path();
+ test_shift_path();
+ TEST_REQ_RUNNING_SYSTEMD(test_mask_supported());
+ TEST_REQ_RUNNING_SYSTEMD(test_is_cgroup_fs());
+ TEST_REQ_RUNNING_SYSTEMD(test_fd_is_cgroup_fs());
+ test_is_wanted_print(true);
+ test_is_wanted_print(false); /* run twice to test caching */
+ test_is_wanted();
+ test_cg_tests();
+ test_cg_get_keyed_attribute();
+
+ return 0;
+}
diff --git a/src/test/test-cgroup.c b/src/test/test-cgroup.c
new file mode 100644
index 0000000..5cdfd2d
--- /dev/null
+++ b/src/test/test-cgroup.c
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+#include <unistd.h>
+
+#include "cgroup-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ char *path;
+ char *c, *p;
+
+ assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-a") == 0);
+ assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-a") == 0);
+ assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-b") == 0);
+ assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-b/test-c") == 0);
+ assert_se(cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, "/test-b", 0) == 0);
+
+ assert_se(cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, getpid_cached(), &path) == 0);
+ assert_se(streq(path, "/test-b"));
+ free(path);
+
+ assert_se(cg_attach(SYSTEMD_CGROUP_CONTROLLER, "/test-a", 0) == 0);
+
+ assert_se(cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, getpid_cached(), &path) == 0);
+ assert_se(path_equal(path, "/test-a"));
+ free(path);
+
+ assert_se(cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, "/test-b/test-d", 0) == 0);
+
+ assert_se(cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, getpid_cached(), &path) == 0);
+ assert_se(path_equal(path, "/test-b/test-d"));
+ free(path);
+
+ assert_se(cg_get_path(SYSTEMD_CGROUP_CONTROLLER, "/test-b/test-d", NULL, &path) == 0);
+ assert_se(path_equal(path, "/sys/fs/cgroup/systemd/test-b/test-d"));
+ free(path);
+
+ assert_se(cg_is_empty(SYSTEMD_CGROUP_CONTROLLER, "/test-a") > 0);
+ assert_se(cg_is_empty(SYSTEMD_CGROUP_CONTROLLER, "/test-b") > 0);
+ assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a") > 0);
+ assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b") == 0);
+
+ assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a", 0, 0, NULL, NULL, NULL) == 0);
+ assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", 0, 0, NULL, NULL, NULL) > 0);
+
+ assert_se(cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", SYSTEMD_CGROUP_CONTROLLER, "/test-a", 0) > 0);
+
+ assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a") == 0);
+ assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b") > 0);
+
+ assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a", 0, 0, NULL, NULL, NULL) > 0);
+ assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", 0, 0, NULL, NULL, NULL) == 0);
+
+ cg_trim(SYSTEMD_CGROUP_CONTROLLER, "/", false);
+
+ assert_se(cg_rmdir(SYSTEMD_CGROUP_CONTROLLER, "/test-b") < 0);
+ assert_se(cg_rmdir(SYSTEMD_CGROUP_CONTROLLER, "/test-a") >= 0);
+
+ assert_se(cg_split_spec("foobar:/", &c, &p) == 0);
+ assert_se(streq(c, "foobar"));
+ assert_se(streq(p, "/"));
+ free(c);
+ free(p);
+
+ assert_se(cg_split_spec("foobar:", &c, &p) < 0);
+ assert_se(cg_split_spec("foobar:asdfd", &c, &p) < 0);
+ assert_se(cg_split_spec(":///", &c, &p) < 0);
+ assert_se(cg_split_spec(":", &c, &p) < 0);
+ assert_se(cg_split_spec("", &c, &p) < 0);
+ assert_se(cg_split_spec("fo/obar:/", &c, &p) < 0);
+
+ assert_se(cg_split_spec("/", &c, &p) >= 0);
+ assert_se(c == NULL);
+ assert_se(streq(p, "/"));
+ free(p);
+
+ assert_se(cg_split_spec("foo", &c, &p) >= 0);
+ assert_se(streq(c, "foo"));
+ assert_se(p == NULL);
+ free(c);
+
+ return 0;
+}
diff --git a/src/test/test-chown-rec.c b/src/test/test-chown-rec.c
new file mode 100644
index 0000000..305d44f
--- /dev/null
+++ b/src/test/test-chown-rec.c
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/xattr.h>
+
+#include "alloc-util.h"
+#include "chown-recursive.h"
+#include "log.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+static const uint8_t acl[] = {
+ 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x02, 0x00, 0x07, 0x00,
+ 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x07, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x10, 0x00, 0x07, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x20, 0x00, 0x05, 0x00,
+ 0xff, 0xff, 0xff, 0xff,
+};
+
+static const uint8_t default_acl[] = {
+ 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x04, 0x00, 0x07, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x08, 0x00, 0x07, 0x00,
+ 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x07, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x20, 0x00, 0x05, 0x00,
+ 0xff, 0xff, 0xff, 0xff,
+};
+
+static bool has_xattr(const char *p) {
+ char buffer[sizeof(acl) * 4];
+
+ if (lgetxattr(p, "system.posix_acl_access", buffer, sizeof(buffer)) < 0) {
+ if (IN_SET(errno, EOPNOTSUPP, ENOTTY, ENODATA, ENOSYS))
+ return false;
+ }
+
+ return true;
+}
+
+static void test_chown_recursive(void) {
+ _cleanup_(rm_rf_physical_and_freep) char *t = NULL;
+ struct stat st;
+ const char *p;
+
+ umask(022);
+ assert_se(mkdtemp_malloc(NULL, &t) >= 0);
+
+ p = strjoina(t, "/dir");
+ assert_se(mkdir(p, 0777) >= 0);
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISDIR(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == 0);
+ assert_se(st.st_gid == 0);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/symlink");
+ assert_se(symlink("../../", p) >= 0);
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISLNK(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0777);
+ assert_se(st.st_uid == 0);
+ assert_se(st.st_gid == 0);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/reg");
+ assert_se(mknod(p, S_IFREG|0777, 0) >= 0);
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISREG(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == 0);
+ assert_se(st.st_gid == 0);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/sock");
+ assert_se(mknod(p, S_IFSOCK|0777, 0) >= 0);
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISSOCK(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == 0);
+ assert_se(st.st_gid == 0);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/fifo");
+ assert_se(mknod(p, S_IFIFO|0777, 0) >= 0);
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISFIFO(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == 0);
+ assert_se(st.st_gid == 0);
+ assert_se(!has_xattr(p));
+
+ /* We now apply an xattr to the dir, and check it again */
+ p = strjoina(t, "/dir");
+ assert_se(setxattr(p, "system.posix_acl_access", acl, sizeof(acl), 0) >= 0);
+ assert_se(setxattr(p, "system.posix_acl_default", default_acl, sizeof(default_acl), 0) >= 0);
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISDIR(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0775); /* acl change changed the mode too */
+ assert_se(st.st_uid == 0);
+ assert_se(st.st_gid == 0);
+ assert_se(has_xattr(p));
+
+ assert_se(path_chown_recursive(t, 1, 2) >= 0);
+
+ p = strjoina(t, "/dir");
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISDIR(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0775);
+ assert_se(st.st_uid == 1);
+ assert_se(st.st_gid == 2);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/symlink");
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISLNK(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0777);
+ assert_se(st.st_uid == 1);
+ assert_se(st.st_gid == 2);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/reg");
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISREG(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == 1);
+ assert_se(st.st_gid == 2);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/sock");
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISSOCK(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == 1);
+ assert_se(st.st_gid == 2);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/fifo");
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISFIFO(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == 1);
+ assert_se(st.st_gid == 2);
+ assert_se(!has_xattr(p));
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ if (geteuid() != 0)
+ return log_tests_skipped("not running as root");
+
+ test_chown_recursive();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-clock.c b/src/test/test-clock.c
new file mode 100644
index 0000000..018e679
--- /dev/null
+++ b/src/test/test-clock.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2016 Canonical Ltd.
+***/
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "clock-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "tmpfile-util.h"
+
+static void test_clock_is_localtime(void) {
+ _cleanup_(unlink_tempfilep) char adjtime[] = "/tmp/test-adjtime.XXXXXX";
+ _cleanup_fclose_ FILE* f = NULL;
+
+ static const struct scenario {
+ const char* contents;
+ int expected_result;
+ } scenarios[] = {
+ /* adjtime configures UTC */
+ {"0.0 0 0\n0\nUTC\n", 0},
+ /* adjtime configures local time */
+ {"0.0 0 0\n0\nLOCAL\n", 1},
+ /* no final EOL */
+ {"0.0 0 0\n0\nUTC", 0},
+ {"0.0 0 0\n0\nLOCAL", 1},
+ /* empty value -> defaults to UTC */
+ {"0.0 0 0\n0\n", 0},
+ /* unknown value -> defaults to UTC */
+ {"0.0 0 0\n0\nFOO\n", 0},
+ /* no third line */
+ {"0.0 0 0", 0},
+ {"0.0 0 0\n", 0},
+ {"0.0 0 0\n0", 0},
+ };
+
+ /* without an adjtime file we default to UTC */
+ assert_se(clock_is_localtime("/nonexisting/adjtime") == 0);
+
+ assert_se(fmkostemp_safe(adjtime, "w", &f) == 0);
+ log_info("adjtime test file: %s", adjtime);
+
+ for (size_t i = 0; i < ELEMENTSOF(scenarios); ++i) {
+ log_info("scenario #%zu:, expected result %i", i, scenarios[i].expected_result);
+ log_info("%s", scenarios[i].contents);
+ rewind(f);
+ ftruncate(fileno(f), 0);
+ assert_se(write_string_stream(f, scenarios[i].contents, WRITE_STRING_FILE_AVOID_NEWLINE) == 0);
+ assert_se(clock_is_localtime(adjtime) == scenarios[i].expected_result);
+ }
+}
+
+/* Test with the real /etc/adjtime */
+static void test_clock_is_localtime_system(void) {
+ int r;
+ r = clock_is_localtime(NULL);
+
+ if (access("/etc/adjtime", F_OK) == 0) {
+ log_info("/etc/adjtime exists, clock_is_localtime() == %i", r);
+ /* if /etc/adjtime exists we expect some answer, no error or
+ * crash */
+ assert_se(IN_SET(r, 0, 1));
+ } else
+ /* default is UTC if there is no /etc/adjtime */
+ assert_se(r == 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_clock_is_localtime();
+ test_clock_is_localtime_system();
+
+ return 0;
+}
diff --git a/src/test/test-condition.c b/src/test/test-condition.c
new file mode 100644
index 0000000..5c2d00a
--- /dev/null
+++ b/src/test/test-condition.c
@@ -0,0 +1,694 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "apparmor-util.h"
+#include "architecture.h"
+#include "audit-util.h"
+#include "cgroup-util.h"
+#include "condition.h"
+#include "efivars.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "ima-util.h"
+#include "log.h"
+#include "macro.h"
+#include "selinux-util.h"
+#include "set.h"
+#include "smack-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tomoyo-util.h"
+#include "user-util.h"
+#include "tests.h"
+#include "util.h"
+#include "virt.h"
+
+static void test_condition_test_path(void) {
+ Condition *condition;
+
+ condition = condition_new(CONDITION_PATH_EXISTS, "/bin/sh", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_EXISTS, "/bin/s?", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_EXISTS_GLOB, "/bin/s?", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_EXISTS_GLOB, "/bin/s?", false, true);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_EXISTS, "/thiscertainlywontexist", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_EXISTS, "/thiscertainlywontexist", false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_DIRECTORY, "/bin", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_DIRECTORY_NOT_EMPTY, "/bin", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_FILE_NOT_EMPTY, "/bin/sh", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_FILE_IS_EXECUTABLE, "/bin/sh", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_FILE_IS_EXECUTABLE, "/etc/passwd", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_MOUNT_POINT, "/proc", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_MOUNT_POINT, "/", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_MOUNT_POINT, "/bin", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_READ_WRITE, "/tmp", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_SYMBOLIC_LINK, "/dev/stdout", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+}
+
+static void test_condition_test_control_group_controller(void) {
+ Condition *condition;
+ CGroupMask system_mask;
+ CGroupController controller;
+ _cleanup_free_ char *controller_name = NULL;
+ int r;
+
+ r = cg_unified_flush();
+ if (r < 0) {
+ log_notice_errno(r, "Skipping ConditionControlGroupController tests: %m");
+ return;
+ }
+
+ /* Invalid controllers are ignored */
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, "thisisnotarealcontroller", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, "thisisnotarealcontroller", false, true);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ assert_se(cg_mask_supported(&system_mask) >= 0);
+
+ /* Individual valid controllers one by one */
+ for (controller = 0; controller < _CGROUP_CONTROLLER_MAX; controller++) {
+ const char *local_controller_name = cgroup_controller_to_string(controller);
+ log_info("chosen controller is '%s'", local_controller_name);
+ if (system_mask & CGROUP_CONTROLLER_TO_MASK(controller)) {
+ log_info("this controller is available");
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, local_controller_name, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, local_controller_name, false, true);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+ } else {
+ log_info("this controller is unavailable");
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, local_controller_name, false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, local_controller_name, false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+ }
+ }
+
+ /* Multiple valid controllers at the same time */
+ assert_se(cg_mask_to_string(system_mask, &controller_name) >= 0);
+
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, strempty(controller_name), false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, strempty(controller_name), false, true);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+}
+
+static void test_condition_test_ac_power(void) {
+ Condition *condition;
+
+ condition = condition_new(CONDITION_AC_POWER, "true", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition) == on_ac_power());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_AC_POWER, "false", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition) != on_ac_power());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_AC_POWER, "false", false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition) == on_ac_power());
+ condition_free(condition);
+}
+
+static void test_condition_test_host(void) {
+ _cleanup_free_ char *hostname = NULL;
+ char sid[SD_ID128_STRING_MAX];
+ Condition *condition;
+ sd_id128_t id;
+ int r;
+
+ r = sd_id128_get_machine(&id);
+ assert_se(r >= 0);
+ assert_se(sd_id128_to_string(id, sid));
+
+ condition = condition_new(CONDITION_HOST, sid, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_HOST, "garbage value jjjjjjjjjjjjjj", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_HOST, sid, false, true);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ hostname = gethostname_malloc();
+ assert_se(hostname);
+
+ /* if hostname looks like an id128 then skip testing it */
+ if (id128_is_valid(hostname))
+ log_notice("hostname is an id128, skipping test");
+ else {
+ condition = condition_new(CONDITION_HOST, hostname, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+ }
+}
+
+static void test_condition_test_architecture(void) {
+ Condition *condition;
+ const char *sa;
+ int a;
+
+ a = uname_architecture();
+ assert_se(a >= 0);
+
+ sa = architecture_to_string(a);
+ assert_se(sa);
+
+ condition = condition_new(CONDITION_ARCHITECTURE, sa, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_ARCHITECTURE, "garbage value", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_ARCHITECTURE, sa, false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition) == 0);
+ condition_free(condition);
+}
+
+static void test_condition_test_kernel_command_line(void) {
+ Condition *condition;
+
+ condition = condition_new(CONDITION_KERNEL_COMMAND_LINE, "thisreallyshouldntbeonthekernelcommandline", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_COMMAND_LINE, "andthis=neither", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+}
+
+static void test_condition_test_kernel_version(void) {
+ Condition *condition;
+ struct utsname u;
+ const char *v;
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "*thisreallyshouldntbeinthekernelversion*", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "*", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ assert_se(uname(&u) >= 0);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, u.release, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ strshorten(u.release, 4);
+ strcpy(strchr(u.release, 0), "*");
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, u.release, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ /* 0.1.2 would be a very very very old kernel */
+ condition = condition_new(CONDITION_KERNEL_VERSION, "> 0.1.2", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, ">= 0.1.2", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "< 0.1.2", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "<= 0.1.2", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "= 0.1.2", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ /* 4711.8.15 is a very very very future kernel */
+ condition = condition_new(CONDITION_KERNEL_VERSION, "< 4711.8.15", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "<= 4711.8.15", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "= 4711.8.15", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "> 4711.8.15", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, ">= 4711.8.15", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ assert_se(uname(&u) >= 0);
+
+ v = strjoina(">=", u.release);
+ condition = condition_new(CONDITION_KERNEL_VERSION, v, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ v = strjoina("= ", u.release);
+ condition = condition_new(CONDITION_KERNEL_VERSION, v, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ v = strjoina("<=", u.release);
+ condition = condition_new(CONDITION_KERNEL_VERSION, v, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ v = strjoina("> ", u.release);
+ condition = condition_new(CONDITION_KERNEL_VERSION, v, false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ v = strjoina("< ", u.release);
+ condition = condition_new(CONDITION_KERNEL_VERSION, v, false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+}
+
+static void test_condition_test_null(void) {
+ Condition *condition;
+
+ condition = condition_new(CONDITION_NULL, NULL, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_NULL, NULL, false, true);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+}
+
+static void test_condition_test_security(void) {
+ Condition *condition;
+
+ condition = condition_new(CONDITION_SECURITY, "garbage oifdsjfoidsjoj", false, false);
+ assert_se(condition);
+ assert_se(!condition_test(condition));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "selinux", false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition) != mac_selinux_use());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "apparmor", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition) == mac_apparmor_use());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "tomoyo", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition) == mac_tomoyo_use());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "ima", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition) == use_ima());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "smack", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition) == mac_smack_use());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "audit", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition) == use_audit());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "uefi-secureboot", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition) == is_efi_secure_boot());
+ condition_free(condition);
+}
+
+static void print_securities(void) {
+ log_info("------ enabled security technologies ------");
+ log_info("SELinux: %s", yes_no(mac_selinux_use()));
+ log_info("AppArmor: %s", yes_no(mac_apparmor_use()));
+ log_info("Tomoyo: %s", yes_no(mac_tomoyo_use()));
+ log_info("IMA: %s", yes_no(use_ima()));
+ log_info("SMACK: %s", yes_no(mac_smack_use()));
+ log_info("Audit: %s", yes_no(use_audit()));
+ log_info("UEFI secure boot: %s", yes_no(is_efi_secure_boot()));
+ log_info("-------------------------------------------");
+}
+
+static void test_condition_test_virtualization(void) {
+ Condition *condition;
+ const char *virt;
+ int r;
+
+ condition = condition_new(CONDITION_VIRTUALIZATION, "garbage oifdsjfoidsjoj", false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionVirtualization=garbage → %i", r);
+ assert_se(r == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_VIRTUALIZATION, "container", false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionVirtualization=container → %i", r);
+ assert_se(r == !!detect_container());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_VIRTUALIZATION, "vm", false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionVirtualization=vm → %i", r);
+ assert_se(r == (detect_vm() && !detect_container()));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_VIRTUALIZATION, "private-users", false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionVirtualization=private-users → %i", r);
+ assert_se(r == !!running_in_userns());
+ condition_free(condition);
+
+ NULSTR_FOREACH(virt,
+ "kvm\0"
+ "qemu\0"
+ "bochs\0"
+ "xen\0"
+ "uml\0"
+ "vmware\0"
+ "oracle\0"
+ "microsoft\0"
+ "zvm\0"
+ "parallels\0"
+ "bhyve\0"
+ "vm_other\0") {
+
+ condition = condition_new(CONDITION_VIRTUALIZATION, virt, false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionVirtualization=%s → %i", virt, r);
+ assert_se(r >= 0);
+ condition_free(condition);
+ }
+}
+
+static void test_condition_test_user(void) {
+ Condition *condition;
+ char* uid;
+ char* username;
+ int r;
+
+ condition = condition_new(CONDITION_USER, "garbage oifdsjfoidsjoj", false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionUser=garbage → %i", r);
+ assert_se(r == 0);
+ condition_free(condition);
+
+ assert_se(asprintf(&uid, "%"PRIu32, UINT32_C(0xFFFF)) > 0);
+ condition = condition_new(CONDITION_USER, uid, false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionUser=%s → %i", uid, r);
+ assert_se(r == 0);
+ condition_free(condition);
+ free(uid);
+
+ assert_se(asprintf(&uid, "%u", (unsigned)getuid()) > 0);
+ condition = condition_new(CONDITION_USER, uid, false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionUser=%s → %i", uid, r);
+ assert_se(r > 0);
+ condition_free(condition);
+ free(uid);
+
+ assert_se(asprintf(&uid, "%u", (unsigned)getuid()+1) > 0);
+ condition = condition_new(CONDITION_USER, uid, false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionUser=%s → %i", uid, r);
+ assert_se(r == 0);
+ condition_free(condition);
+ free(uid);
+
+ username = getusername_malloc();
+ assert_se(username);
+ condition = condition_new(CONDITION_USER, username, false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionUser=%s → %i", username, r);
+ assert_se(r > 0);
+ condition_free(condition);
+ free(username);
+
+ username = (char*)(geteuid() == 0 ? NOBODY_USER_NAME : "root");
+ condition = condition_new(CONDITION_USER, username, false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionUser=%s → %i", username, r);
+ assert_se(r == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_USER, "@system", false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionUser=@system → %i", r);
+ if (uid_is_system(getuid()) || uid_is_system(geteuid()))
+ assert_se(r > 0);
+ else
+ assert_se(r == 0);
+ condition_free(condition);
+}
+
+static void test_condition_test_group(void) {
+ Condition *condition;
+ char* gid;
+ char* groupname;
+ gid_t *gids, max_gid;
+ int ngroups_max, ngroups, r, i;
+
+ assert_se(0 < asprintf(&gid, "%u", UINT32_C(0xFFFF)));
+ condition = condition_new(CONDITION_GROUP, gid, false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionGroup=%s → %i", gid, r);
+ assert_se(r == 0);
+ condition_free(condition);
+ free(gid);
+
+ assert_se(0 < asprintf(&gid, "%u", getgid()));
+ condition = condition_new(CONDITION_GROUP, gid, false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionGroup=%s → %i", gid, r);
+ assert_se(r > 0);
+ condition_free(condition);
+ free(gid);
+
+ ngroups_max = sysconf(_SC_NGROUPS_MAX);
+ assert(ngroups_max > 0);
+
+ gids = newa(gid_t, ngroups_max);
+
+ ngroups = getgroups(ngroups_max, gids);
+ assert(ngroups >= 0);
+
+ max_gid = getgid();
+ for (i = 0; i < ngroups; i++) {
+ assert_se(0 < asprintf(&gid, "%u", gids[i]));
+ condition = condition_new(CONDITION_GROUP, gid, false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionGroup=%s → %i", gid, r);
+ assert_se(r > 0);
+ condition_free(condition);
+ free(gid);
+ max_gid = gids[i] > max_gid ? gids[i] : max_gid;
+
+ groupname = gid_to_name(gids[i]);
+ assert_se(groupname);
+ condition = condition_new(CONDITION_GROUP, groupname, false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionGroup=%s → %i", groupname, r);
+ assert_se(r > 0);
+ condition_free(condition);
+ free(groupname);
+ max_gid = gids[i] > max_gid ? gids[i] : max_gid;
+ }
+
+ assert_se(0 < asprintf(&gid, "%u", max_gid+1));
+ condition = condition_new(CONDITION_GROUP, gid, false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionGroup=%s → %i", gid, r);
+ assert_se(r == 0);
+ condition_free(condition);
+ free(gid);
+
+ groupname = (char*)(geteuid() == 0 ? NOBODY_GROUP_NAME : "root");
+ condition = condition_new(CONDITION_GROUP, groupname, false, false);
+ assert_se(condition);
+ r = condition_test(condition);
+ log_info("ConditionGroup=%s → %i", groupname, r);
+ assert_se(r == 0);
+ condition_free(condition);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_condition_test_path();
+ test_condition_test_ac_power();
+ test_condition_test_host();
+ test_condition_test_architecture();
+ test_condition_test_kernel_command_line();
+ test_condition_test_kernel_version();
+ test_condition_test_null();
+ test_condition_test_security();
+ print_securities();
+ test_condition_test_virtualization();
+ test_condition_test_user();
+ test_condition_test_group();
+ test_condition_test_control_group_controller();
+
+ return 0;
+}
diff --git a/src/test/test-conf-files.c b/src/test/test-conf-files.c
new file mode 100644
index 0000000..9fd8b6b
--- /dev/null
+++ b/src/test/test-conf-files.c
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2014 Michael Marineau
+***/
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "user-util.h"
+#include "util.h"
+
+static void setup_test_dir(char *tmp_dir, const char *files, ...) {
+ va_list ap;
+
+ assert_se(mkdtemp(tmp_dir));
+
+ va_start(ap, files);
+ while (files) {
+ _cleanup_free_ char *path;
+
+ assert_se(path = strappend(tmp_dir, files));
+ (void) mkdir_parents(path, 0755);
+ assert_se(write_string_file(path, "foobar", WRITE_STRING_FILE_CREATE) >= 0);
+
+ files = va_arg(ap, const char *);
+ }
+ va_end(ap);
+}
+
+static void test_conf_files_list(bool use_root) {
+ char tmp_dir[] = "/tmp/test-conf-files-XXXXXX";
+ _cleanup_strv_free_ char **found_files = NULL, **found_files2 = NULL;
+ const char *root_dir, *search_1, *search_2, *expect_a, *expect_b, *expect_c, *mask;
+
+ log_debug("/* %s(%s) */", __func__, yes_no(use_root));
+
+ setup_test_dir(tmp_dir,
+ "/dir1/a.conf",
+ "/dir2/a.conf",
+ "/dir2/b.conf",
+ "/dir2/c.foo",
+ "/dir2/d.conf",
+ NULL);
+
+ mask = strjoina(tmp_dir, "/dir1/d.conf");
+ assert_se(symlink("/dev/null", mask) >= 0);
+
+ if (use_root) {
+ root_dir = tmp_dir;
+ search_1 = "/dir1";
+ search_2 = "/dir2";
+ } else {
+ root_dir = NULL;
+ search_1 = strjoina(tmp_dir, "/dir1");
+ search_2 = strjoina(tmp_dir, "/dir2");
+ }
+
+ expect_a = strjoina(tmp_dir, "/dir1/a.conf");
+ expect_b = strjoina(tmp_dir, "/dir2/b.conf");
+ expect_c = strjoina(tmp_dir, "/dir2/c.foo");
+
+ log_debug("/* Check when filtered by suffix */");
+
+ assert_se(conf_files_list(&found_files, ".conf", root_dir, CONF_FILES_FILTER_MASKED, search_1, search_2, NULL) == 0);
+ strv_print(found_files);
+
+ assert_se(found_files);
+ assert_se(streq_ptr(found_files[0], expect_a));
+ assert_se(streq_ptr(found_files[1], expect_b));
+ assert_se(!found_files[2]);
+
+ log_debug("/* Check when unfiltered */");
+ assert_se(conf_files_list(&found_files2, NULL, root_dir, CONF_FILES_FILTER_MASKED, search_1, search_2, NULL) == 0);
+ strv_print(found_files2);
+
+ assert_se(found_files2);
+ assert_se(streq_ptr(found_files2[0], expect_a));
+ assert_se(streq_ptr(found_files2[1], expect_b));
+ assert_se(streq_ptr(found_files2[2], expect_c));
+ assert_se(!found_files2[3]);
+
+ assert_se(rm_rf(tmp_dir, REMOVE_ROOT|REMOVE_PHYSICAL) == 0);
+}
+
+static void test_conf_files_insert(const char *root) {
+ _cleanup_strv_free_ char **s = NULL;
+
+ log_info("/* %s root=%s */", __func__, strempty(root));
+
+ char **dirs = STRV_MAKE("/dir1", "/dir2", "/dir3");
+
+ _cleanup_free_ const char
+ *foo1 = prefix_root(root, "/dir1/foo.conf"),
+ *foo2 = prefix_root(root, "/dir2/foo.conf"),
+ *bar2 = prefix_root(root, "/dir2/bar.conf"),
+ *zzz3 = prefix_root(root, "/dir3/zzz.conf"),
+ *whatever = prefix_root(root, "/whatever.conf");
+
+ assert_se(conf_files_insert(&s, root, dirs, "/dir2/foo.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(foo2)));
+
+ /* The same file again, https://github.com/systemd/systemd/issues/11124 */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir2/foo.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(foo2)));
+
+ /* Lower priority → new entry is ignored */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir3/foo.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(foo2)));
+
+ /* Higher priority → new entry replaces */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir1/foo.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(foo1)));
+
+ /* Earlier basename */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir2/bar.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(bar2, foo1)));
+
+ /* Later basename */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir3/zzz.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(bar2, foo1, zzz3)));
+
+ /* All lower priority → all ignored */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir3/zzz.conf") == 0);
+ assert_se(conf_files_insert(&s, root, dirs, "/dir2/bar.conf") == 0);
+ assert_se(conf_files_insert(&s, root, dirs, "/dir3/bar.conf") == 0);
+ assert_se(conf_files_insert(&s, root, dirs, "/dir2/foo.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(bar2, foo1, zzz3)));
+
+ /* Two entries that don't match any of the directories, but match basename */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir4/zzz.conf") == 0);
+ assert_se(conf_files_insert(&s, root, dirs, "/zzz.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(bar2, foo1, zzz3)));
+
+ /* An entry that doesn't match any of the directories, no match at all */
+ assert_se(conf_files_insert(&s, root, dirs, "/whatever.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(bar2, foo1, whatever, zzz3)));
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_conf_files_list(false);
+ test_conf_files_list(true);
+ test_conf_files_insert(NULL);
+ test_conf_files_insert("/root");
+ test_conf_files_insert("/root/");
+
+ return 0;
+}
diff --git a/src/test/test-conf-parser.c b/src/test/test-conf-parser.c
new file mode 100644
index 0000000..1738938
--- /dev/null
+++ b/src/test/test-conf-parser.c
@@ -0,0 +1,388 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "conf-parser.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void test_config_parse_path_one(const char *rvalue, const char *expected) {
+ _cleanup_free_ char *path = NULL;
+
+ assert_se(config_parse_path("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &path, NULL) >= 0);
+ assert_se(streq_ptr(expected, path));
+}
+
+static void test_config_parse_log_level_one(const char *rvalue, int expected) {
+ int log_level = 0;
+
+ assert_se(config_parse_log_level("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &log_level, NULL) >= 0);
+ assert_se(expected == log_level);
+}
+
+static void test_config_parse_log_facility_one(const char *rvalue, int expected) {
+ int log_facility = 0;
+
+ assert_se(config_parse_log_facility("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &log_facility, NULL) >= 0);
+ assert_se(expected == log_facility);
+}
+
+static void test_config_parse_iec_size_one(const char *rvalue, size_t expected) {
+ size_t iec_size = 0;
+
+ assert_se(config_parse_iec_size("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &iec_size, NULL) >= 0);
+ assert_se(expected == iec_size);
+}
+
+static void test_config_parse_si_size_one(const char *rvalue, size_t expected) {
+ size_t si_size = 0;
+
+ assert_se(config_parse_si_size("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &si_size, NULL) >= 0);
+ assert_se(expected == si_size);
+}
+
+static void test_config_parse_int_one(const char *rvalue, int expected) {
+ int v = -1;
+
+ assert_se(config_parse_int("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &v, NULL) >= 0);
+ assert_se(expected == v);
+}
+
+static void test_config_parse_unsigned_one(const char *rvalue, unsigned expected) {
+ unsigned v = 0;
+
+ assert_se(config_parse_unsigned("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &v, NULL) >= 0);
+ assert_se(expected == v);
+}
+
+static void test_config_parse_strv_one(const char *rvalue, char **expected) {
+ _cleanup_strv_free_ char **strv = NULL;
+
+ assert_se(config_parse_strv("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &strv, NULL) >= 0);
+ assert_se(strv_equal(expected, strv));
+}
+
+static void test_config_parse_mode_one(const char *rvalue, mode_t expected) {
+ mode_t v = 0;
+
+ assert_se(config_parse_mode("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &v, NULL) >= 0);
+ assert_se(expected == v);
+}
+
+static void test_config_parse_sec_one(const char *rvalue, usec_t expected) {
+ usec_t v = 0;
+
+ assert_se(config_parse_sec("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &v, NULL) >= 0);
+ assert_se(expected == v);
+}
+
+static void test_config_parse_nsec_one(const char *rvalue, nsec_t expected) {
+ nsec_t v = 0;
+
+ assert_se(config_parse_nsec("unit", "filename", 1, "nsection", 1, "lvalue", 0, rvalue, &v, NULL) >= 0);
+ assert_se(expected == v);
+}
+
+static void test_config_parse_path(void) {
+ test_config_parse_path_one("/path", "/path");
+ test_config_parse_path_one("/path//////////", "/path");
+ test_config_parse_path_one("///path/foo///bar////bar//", "/path/foo/bar/bar");
+ test_config_parse_path_one("/path//./////hogehoge///.", "/path/hogehoge");
+ test_config_parse_path_one("/path/\xc3\x80", "/path/\xc3\x80");
+
+ test_config_parse_path_one("not_absolute/path", NULL);
+ test_config_parse_path_one("/path/\xc3\x7f", NULL);
+}
+
+static void test_config_parse_log_level(void) {
+ test_config_parse_log_level_one("debug", LOG_DEBUG);
+ test_config_parse_log_level_one("info", LOG_INFO);
+
+ test_config_parse_log_level_one("garbage", 0);
+}
+
+static void test_config_parse_log_facility(void) {
+ test_config_parse_log_facility_one("mail", LOG_MAIL);
+ test_config_parse_log_facility_one("user", LOG_USER);
+
+ test_config_parse_log_facility_one("garbage", 0);
+}
+
+static void test_config_parse_iec_size(void) {
+ test_config_parse_iec_size_one("1024", 1024);
+ test_config_parse_iec_size_one("2K", 2048);
+ test_config_parse_iec_size_one("10M", 10 * 1024 * 1024);
+ test_config_parse_iec_size_one("1G", 1 * 1024 * 1024 * 1024);
+ test_config_parse_iec_size_one("0G", 0);
+ test_config_parse_iec_size_one("0", 0);
+
+ test_config_parse_iec_size_one("-982", 0);
+ test_config_parse_iec_size_one("49874444198739873000000G", 0);
+ test_config_parse_iec_size_one("garbage", 0);
+}
+
+static void test_config_parse_si_size(void) {
+ test_config_parse_si_size_one("1024", 1024);
+ test_config_parse_si_size_one("2K", 2000);
+ test_config_parse_si_size_one("10M", 10 * 1000 * 1000);
+ test_config_parse_si_size_one("1G", 1 * 1000 * 1000 * 1000);
+ test_config_parse_si_size_one("0G", 0);
+ test_config_parse_si_size_one("0", 0);
+
+ test_config_parse_si_size_one("-982", 0);
+ test_config_parse_si_size_one("49874444198739873000000G", 0);
+ test_config_parse_si_size_one("garbage", 0);
+}
+
+static void test_config_parse_int(void) {
+ test_config_parse_int_one("1024", 1024);
+ test_config_parse_int_one("-1024", -1024);
+ test_config_parse_int_one("0", 0);
+
+ test_config_parse_int_one("99999999999999999999999999999999999999999999999999999999", -1);
+ test_config_parse_int_one("-99999999999999999999999999999999999999999999999999999999", -1);
+ test_config_parse_int_one("1G", -1);
+ test_config_parse_int_one("garbage", -1);
+}
+
+static void test_config_parse_unsigned(void) {
+ test_config_parse_unsigned_one("10241024", 10241024);
+ test_config_parse_unsigned_one("1024", 1024);
+ test_config_parse_unsigned_one("0", 0);
+
+ test_config_parse_unsigned_one("99999999999999999999999999999999999999999999999999999999", 0);
+ test_config_parse_unsigned_one("1G", 0);
+ test_config_parse_unsigned_one("garbage", 0);
+ test_config_parse_unsigned_one("1000garbage", 0);
+}
+
+static void test_config_parse_strv(void) {
+ test_config_parse_strv_one("", STRV_MAKE_EMPTY);
+ test_config_parse_strv_one("foo", STRV_MAKE("foo"));
+ test_config_parse_strv_one("foo bar foo", STRV_MAKE("foo", "bar", "foo"));
+ test_config_parse_strv_one("\"foo bar\" foo", STRV_MAKE("foo bar", "foo"));
+ test_config_parse_strv_one("\xc3\x80", STRV_MAKE("\xc3\x80"));
+ test_config_parse_strv_one("\xc3\x7f", STRV_MAKE("\xc3\x7f"));
+}
+
+static void test_config_parse_mode(void) {
+ test_config_parse_mode_one("777", 0777);
+ test_config_parse_mode_one("644", 0644);
+
+ test_config_parse_mode_one("-777", 0);
+ test_config_parse_mode_one("999", 0);
+ test_config_parse_mode_one("garbage", 0);
+ test_config_parse_mode_one("777garbage", 0);
+ test_config_parse_mode_one("777 garbage", 0);
+}
+
+static void test_config_parse_sec(void) {
+ test_config_parse_sec_one("1", 1 * USEC_PER_SEC);
+ test_config_parse_sec_one("1s", 1 * USEC_PER_SEC);
+ test_config_parse_sec_one("100ms", 100 * USEC_PER_MSEC);
+ test_config_parse_sec_one("5min 20s", 5 * 60 * USEC_PER_SEC + 20 * USEC_PER_SEC);
+
+ test_config_parse_sec_one("-1", 0);
+ test_config_parse_sec_one("10foo", 0);
+ test_config_parse_sec_one("garbage", 0);
+}
+
+static void test_config_parse_nsec(void) {
+ test_config_parse_nsec_one("1", 1);
+ test_config_parse_nsec_one("1s", 1 * NSEC_PER_SEC);
+ test_config_parse_nsec_one("100ms", 100 * NSEC_PER_MSEC);
+ test_config_parse_nsec_one("5min 20s", 5 * 60 * NSEC_PER_SEC + 20 * NSEC_PER_SEC);
+
+ test_config_parse_nsec_one("-1", 0);
+ test_config_parse_nsec_one("10foo", 0);
+ test_config_parse_nsec_one("garbage", 0);
+}
+
+static void test_config_parse_iec_uint64(void) {
+ uint64_t offset = 0;
+ assert_se(config_parse_iec_uint64(NULL, "/this/file", 11, "Section", 22, "Size", 0, "4M", &offset, NULL) == 0);
+ assert_se(offset == 4 * 1024 * 1024);
+
+ assert_se(config_parse_iec_uint64(NULL, "/this/file", 11, "Section", 22, "Size", 0, "4.5M", &offset, NULL) == 0);
+}
+
+#define x10(x) x x x x x x x x x x
+#define x100(x) x10(x10(x))
+#define x1000(x) x10(x100(x))
+
+static const char* const config_file[] = {
+ "[Section]\n"
+ "setting1=1\n",
+
+ "[Section]\n"
+ "setting1=1", /* no terminating newline */
+
+ "\n\n\n\n[Section]\n\n\n"
+ "setting1=1", /* some whitespace, no terminating newline */
+
+ "[Section]\n"
+ "[Section]\n"
+ "setting1=1\n"
+ "setting1=2\n"
+ "setting1=1\n", /* repeated settings */
+
+ "[Section]\n"
+ "setting1=1\\\n" /* normal continuation */
+ "2\\\n"
+ "3\n",
+
+ "[Section]\n"
+ "#hogehoge\\\n" /* continuation is ignored in comment */
+ "setting1=1\\\n" /* normal continuation */
+ "2\\\n"
+ "3\n",
+
+ "[Section]\n"
+ "setting1=1\\\n" /* normal continuation */
+ "#hogehoge\\\n" /* commented out line in continuation is ignored */
+ "2\\\n"
+ "3\n",
+
+ "[Section]\n"
+ " #hogehoge\\\n" /* whitespaces before comments */
+ " setting1=1\\\n" /* whitespaces before key */
+ "2\\\n"
+ "3\n",
+
+ "[Section]\n"
+ " setting1=1\\\n" /* whitespaces before key */
+ " #hogehoge\\\n" /* commented out line prefixed with whitespaces in continuation */
+ "2\\\n"
+ "3\n",
+
+ "[Section]\n"
+ "setting1=1\\\n" /* continuation with extra trailing backslash at the end */
+ "2\\\n"
+ "3\\\n",
+
+ "[Section]\n"
+ "setting1=1\\\\\\\n" /* continuation with trailing escape symbols */
+ "\\\\2\n", /* note that C requires one level of escaping, so the
+ * parser gets "…1 BS BS BS NL BS BS 2 NL", which
+ * it translates into "…1 BS BS SP BS BS 2" */
+
+ "\n[Section]\n\n"
+ "setting1=" /* a line above LINE_MAX length */
+ x1000("ABCD")
+ "\n",
+
+ "[Section]\n"
+ "setting1=" /* a line above LINE_MAX length, with continuation */
+ x1000("ABCD") "\\\n"
+ "foobar",
+
+ "[Section]\n"
+ "setting1=" /* a line above LINE_MAX length, with continuation */
+ x1000("ABCD") "\\\n" /* and an extra trailing backslash */
+ "foobar\\\n",
+
+ "[Section]\n"
+ "setting1=" /* a line above the allowed limit: 9 + 1050000 + 1 */
+ x1000(x1000("x") x10("abcde")) "\n",
+
+ "[Section]\n"
+ "setting1=" /* many continuation lines, together above the limit */
+ x1000(x1000("x") x10("abcde") "\\\n") "xxx",
+};
+
+static void test_config_parse(unsigned i, const char *s) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-conf-parser.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *setting1 = NULL;
+ int r;
+
+ const ConfigTableItem items[] = {
+ { "Section", "setting1", config_parse_string, 0, &setting1},
+ {}
+ };
+
+ log_info("== %s[%i] ==", __func__, i);
+
+ assert_se(fmkostemp_safe(name, "r+", &f) == 0);
+ assert_se(fwrite(s, strlen(s), 1, f) == 1);
+ rewind(f);
+
+ /*
+ int config_parse(const char *unit,
+ const char *filename,
+ FILE *f,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ bool relaxed,
+ bool allow_include,
+ bool warn,
+ void *userdata)
+ */
+
+ r = config_parse(NULL, name, f,
+ "Section\0",
+ config_item_table_lookup, items,
+ CONFIG_PARSE_WARN, NULL);
+
+ switch (i) {
+ case 0 ... 3:
+ assert_se(r == 0);
+ assert_se(streq(setting1, "1"));
+ break;
+
+ case 4 ... 9:
+ assert_se(r == 0);
+ assert_se(streq(setting1, "1 2 3"));
+ break;
+
+ case 10:
+ assert_se(r == 0);
+ assert_se(streq(setting1, "1\\\\ \\\\2"));
+ break;
+
+ case 11:
+ assert_se(r == 0);
+ assert_se(streq(setting1, x1000("ABCD")));
+ break;
+
+ case 12 ... 13:
+ assert_se(r == 0);
+ assert_se(streq(setting1, x1000("ABCD") " foobar"));
+ break;
+
+ case 14 ... 15:
+ assert_se(r == -ENOBUFS);
+ assert_se(setting1 == NULL);
+ break;
+ }
+}
+
+int main(int argc, char **argv) {
+ unsigned i;
+
+ log_parse_environment();
+ log_open();
+
+ test_config_parse_path();
+ test_config_parse_log_level();
+ test_config_parse_log_facility();
+ test_config_parse_iec_size();
+ test_config_parse_si_size();
+ test_config_parse_int();
+ test_config_parse_unsigned();
+ test_config_parse_strv();
+ test_config_parse_mode();
+ test_config_parse_sec();
+ test_config_parse_nsec();
+ test_config_parse_iec_uint64();
+
+ for (i = 0; i < ELEMENTSOF(config_file); i++)
+ test_config_parse(i, config_file[i]);
+
+ return 0;
+}
diff --git a/src/test/test-copy.c b/src/test/test-copy.c
new file mode 100644
index 0000000..b17a1c5
--- /dev/null
+++ b/src/test/test-copy.c
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "copy.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+static void test_copy_file(void) {
+ _cleanup_free_ char *buf = NULL;
+ char fn[] = "/tmp/test-copy_file.XXXXXX";
+ char fn_copy[] = "/tmp/test-copy_file.XXXXXX";
+ size_t sz = 0;
+ int fd;
+
+ log_info("%s", __func__);
+
+ fd = mkostemp_safe(fn);
+ assert_se(fd >= 0);
+ close(fd);
+
+ fd = mkostemp_safe(fn_copy);
+ assert_se(fd >= 0);
+ close(fd);
+
+ assert_se(write_string_file(fn, "foo bar bar bar foo", WRITE_STRING_FILE_CREATE) == 0);
+
+ assert_se(copy_file(fn, fn_copy, 0, 0644, 0, COPY_REFLINK) == 0);
+
+ assert_se(read_full_file(fn_copy, &buf, &sz) == 0);
+ assert_se(streq(buf, "foo bar bar bar foo\n"));
+ assert_se(sz == 20);
+
+ unlink(fn);
+ unlink(fn_copy);
+}
+
+static void test_copy_file_fd(void) {
+ char in_fn[] = "/tmp/test-copy-file-fd-XXXXXX";
+ char out_fn[] = "/tmp/test-copy-file-fd-XXXXXX";
+ _cleanup_close_ int in_fd = -1, out_fd = -1;
+ char text[] = "boohoo\nfoo\n\tbar\n";
+ char buf[64] = {0};
+
+ log_info("%s", __func__);
+
+ in_fd = mkostemp_safe(in_fn);
+ assert_se(in_fd >= 0);
+ out_fd = mkostemp_safe(out_fn);
+ assert_se(out_fd >= 0);
+
+ assert_se(write_string_file(in_fn, text, WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(copy_file_fd("/a/file/which/does/not/exist/i/guess", out_fd, COPY_REFLINK) < 0);
+ assert_se(copy_file_fd(in_fn, out_fd, COPY_REFLINK) >= 0);
+ assert_se(lseek(out_fd, SEEK_SET, 0) == 0);
+
+ assert_se(read(out_fd, buf, sizeof(buf)) == sizeof(text) - 1);
+ assert_se(streq(buf, text));
+
+ unlink(in_fn);
+ unlink(out_fn);
+}
+
+static void test_copy_tree(void) {
+ char original_dir[] = "/tmp/test-copy_tree/";
+ char copy_dir[] = "/tmp/test-copy_tree-copy/";
+ char **files = STRV_MAKE("file", "dir1/file", "dir1/dir2/file", "dir1/dir2/dir3/dir4/dir5/file");
+ char **links = STRV_MAKE("link", "file",
+ "link2", "dir1/file");
+ char **p, **link;
+ const char *unixsockp;
+ struct stat st;
+
+ log_info("%s", __func__);
+
+ (void) rm_rf(copy_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf(original_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+
+ STRV_FOREACH(p, files) {
+ _cleanup_free_ char *f;
+
+ assert_se(f = strappend(original_dir, *p));
+
+ assert_se(mkdir_parents(f, 0755) >= 0);
+ assert_se(write_string_file(f, "file", WRITE_STRING_FILE_CREATE) == 0);
+ }
+
+ STRV_FOREACH_PAIR(link, p, links) {
+ _cleanup_free_ char *f, *l;
+
+ assert_se(f = strappend(original_dir, *p));
+ assert_se(l = strappend(original_dir, *link));
+
+ assert_se(mkdir_parents(l, 0755) >= 0);
+ assert_se(symlink(f, l) == 0);
+ }
+
+ unixsockp = strjoina(original_dir, "unixsock");
+ assert_se(mknod(unixsockp, S_IFSOCK|0644, 0) >= 0);
+
+ assert_se(copy_tree(original_dir, copy_dir, UID_INVALID, GID_INVALID, COPY_REFLINK|COPY_MERGE) == 0);
+
+ STRV_FOREACH(p, files) {
+ _cleanup_free_ char *buf, *f;
+ size_t sz;
+
+ assert_se(f = strappend(copy_dir, *p));
+
+ assert_se(access(f, F_OK) == 0);
+ assert_se(read_full_file(f, &buf, &sz) == 0);
+ assert_se(streq(buf, "file\n"));
+ }
+
+ STRV_FOREACH_PAIR(link, p, links) {
+ _cleanup_free_ char *target, *f, *l;
+
+ assert_se(f = strjoin(original_dir, *p));
+ assert_se(l = strjoin(copy_dir, *link));
+
+ assert_se(chase_symlinks(l, NULL, 0, &target) == 1);
+ assert_se(path_equal(f, target));
+ }
+
+ unixsockp = strjoina(copy_dir, "unixsock");
+ assert_se(stat(unixsockp, &st) >= 0);
+ assert_se(S_ISSOCK(st.st_mode));
+
+ assert_se(copy_tree(original_dir, copy_dir, UID_INVALID, GID_INVALID, COPY_REFLINK) < 0);
+ assert_se(copy_tree("/tmp/inexistent/foo/bar/fsdoi", copy_dir, UID_INVALID, GID_INVALID, COPY_REFLINK) < 0);
+
+ (void) rm_rf(copy_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf(original_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static void test_copy_bytes(void) {
+ _cleanup_close_pair_ int pipefd[2] = {-1, -1};
+ _cleanup_close_ int infd = -1;
+ int r, r2;
+ char buf[1024], buf2[1024];
+
+ infd = open("/usr/lib/os-release", O_RDONLY|O_CLOEXEC);
+ if (infd < 0)
+ infd = open("/etc/os-release", O_RDONLY|O_CLOEXEC);
+ assert_se(infd >= 0);
+
+ assert_se(pipe2(pipefd, O_CLOEXEC) == 0);
+
+ r = copy_bytes(infd, pipefd[1], (uint64_t) -1, 0);
+ assert_se(r == 0);
+
+ r = read(pipefd[0], buf, sizeof(buf));
+ assert_se(r >= 0);
+
+ assert_se(lseek(infd, 0, SEEK_SET) == 0);
+ r2 = read(infd, buf2, sizeof(buf2));
+ assert_se(r == r2);
+
+ assert_se(strneq(buf, buf2, r));
+
+ /* test copy_bytes with invalid descriptors */
+ r = copy_bytes(pipefd[0], pipefd[0], 1, 0);
+ assert_se(r == -EBADF);
+
+ r = copy_bytes(pipefd[1], pipefd[1], 1, 0);
+ assert_se(r == -EBADF);
+
+ r = copy_bytes(pipefd[1], infd, 1, 0);
+ assert_se(r == -EBADF);
+}
+
+static void test_copy_bytes_regular_file(const char *src, bool try_reflink, uint64_t max_bytes) {
+ char fn2[] = "/tmp/test-copy-file-XXXXXX";
+ char fn3[] = "/tmp/test-copy-file-XXXXXX";
+ _cleanup_close_ int fd = -1, fd2 = -1, fd3 = -1;
+ int r;
+ struct stat buf, buf2, buf3;
+
+ log_info("%s try_reflink=%s max_bytes=%" PRIu64, __func__, yes_no(try_reflink), max_bytes);
+
+ fd = open(src, O_RDONLY | O_CLOEXEC | O_NOCTTY);
+ assert_se(fd >= 0);
+
+ fd2 = mkostemp_safe(fn2);
+ assert_se(fd2 >= 0);
+
+ fd3 = mkostemp_safe(fn3);
+ assert_se(fd3 >= 0);
+
+ r = copy_bytes(fd, fd2, max_bytes, try_reflink ? COPY_REFLINK : 0);
+ if (max_bytes == (uint64_t) -1)
+ assert_se(r == 0);
+ else
+ assert_se(IN_SET(r, 0, 1));
+
+ assert_se(fstat(fd, &buf) == 0);
+ assert_se(fstat(fd2, &buf2) == 0);
+ assert_se((uint64_t) buf2.st_size == MIN((uint64_t) buf.st_size, max_bytes));
+
+ if (max_bytes < (uint64_t) -1)
+ /* Make sure the file is now higher than max_bytes */
+ assert_se(ftruncate(fd2, max_bytes + 1) == 0);
+
+ assert_se(lseek(fd2, 0, SEEK_SET) == 0);
+
+ r = copy_bytes(fd2, fd3, max_bytes, try_reflink ? COPY_REFLINK : 0);
+ if (max_bytes == (uint64_t) -1)
+ assert_se(r == 0);
+ else
+ /* We cannot distinguish between the input being exactly max_bytes
+ * or longer than max_bytes (without trying to read one more byte,
+ * or calling stat, or FION_READ, etc, and we don't want to do any
+ * of that). So we expect "truncation" since we know that file we
+ * are copying is exactly max_bytes bytes. */
+ assert_se(r == 1);
+
+ assert_se(fstat(fd3, &buf3) == 0);
+
+ if (max_bytes == (uint64_t) -1)
+ assert_se(buf3.st_size == buf2.st_size);
+ else
+ assert_se((uint64_t) buf3.st_size == max_bytes);
+
+ unlink(fn2);
+ unlink(fn3);
+}
+
+static void test_copy_atomic(void) {
+ _cleanup_(rm_rf_physical_and_freep) char *p = NULL;
+ const char *q;
+ int r;
+
+ assert_se(mkdtemp_malloc(NULL, &p) >= 0);
+
+ q = strjoina(p, "/fstab");
+
+ r = copy_file_atomic("/etc/fstab", q, 0644, 0, COPY_REFLINK);
+ if (r == -ENOENT)
+ return;
+
+ assert_se(copy_file_atomic("/etc/fstab", q, 0644, 0, COPY_REFLINK) == -EEXIST);
+
+ assert_se(copy_file_atomic("/etc/fstab", q, 0644, 0, COPY_REPLACE) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_copy_file();
+ test_copy_file_fd();
+ test_copy_tree();
+ test_copy_bytes();
+ test_copy_bytes_regular_file(argv[0], false, (uint64_t) -1);
+ test_copy_bytes_regular_file(argv[0], true, (uint64_t) -1);
+ test_copy_bytes_regular_file(argv[0], false, 1000); /* smaller than copy buffer size */
+ test_copy_bytes_regular_file(argv[0], true, 1000);
+ test_copy_bytes_regular_file(argv[0], false, 32000); /* larger than copy buffer size */
+ test_copy_bytes_regular_file(argv[0], true, 32000);
+ test_copy_atomic();
+
+ return 0;
+}
diff --git a/src/test/test-cpu-set-util.c b/src/test/test-cpu-set-util.c
new file mode 100644
index 0000000..c927245
--- /dev/null
+++ b/src/test/test-cpu-set-util.c
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "cpu-set-util.h"
+#include "macro.h"
+
+static void test_parse_cpu_set(void) {
+ cpu_set_t *c = NULL;
+ int ncpus;
+ int cpu;
+
+ /* Simple range (from CPUAffinity example) */
+ ncpus = parse_cpu_set_and_warn("1 2", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus >= 1024);
+ assert_se(CPU_ISSET_S(1, CPU_ALLOC_SIZE(ncpus), c));
+ assert_se(CPU_ISSET_S(2, CPU_ALLOC_SIZE(ncpus), c));
+ assert_se(CPU_COUNT_S(CPU_ALLOC_SIZE(ncpus), c) == 2);
+ c = cpu_set_mfree(c);
+
+ /* A more interesting range */
+ ncpus = parse_cpu_set_and_warn("0 1 2 3 8 9 10 11", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus >= 1024);
+ assert_se(CPU_COUNT_S(CPU_ALLOC_SIZE(ncpus), c) == 8);
+ for (cpu = 0; cpu < 4; cpu++)
+ assert_se(CPU_ISSET_S(cpu, CPU_ALLOC_SIZE(ncpus), c));
+ for (cpu = 8; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, CPU_ALLOC_SIZE(ncpus), c));
+ c = cpu_set_mfree(c);
+
+ /* Quoted strings */
+ ncpus = parse_cpu_set_and_warn("8 '9' 10 \"11\"", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus >= 1024);
+ assert_se(CPU_COUNT_S(CPU_ALLOC_SIZE(ncpus), c) == 4);
+ for (cpu = 8; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, CPU_ALLOC_SIZE(ncpus), c));
+ c = cpu_set_mfree(c);
+
+ /* Use commas as separators */
+ ncpus = parse_cpu_set_and_warn("0,1,2,3 8,9,10,11", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus >= 1024);
+ assert_se(CPU_COUNT_S(CPU_ALLOC_SIZE(ncpus), c) == 8);
+ for (cpu = 0; cpu < 4; cpu++)
+ assert_se(CPU_ISSET_S(cpu, CPU_ALLOC_SIZE(ncpus), c));
+ for (cpu = 8; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, CPU_ALLOC_SIZE(ncpus), c));
+ c = cpu_set_mfree(c);
+
+ /* Commas with spaces (and trailing comma, space) */
+ ncpus = parse_cpu_set_and_warn("0, 1, 2, 3, 4, 5, 6, 7, ", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus >= 1024);
+ assert_se(CPU_COUNT_S(CPU_ALLOC_SIZE(ncpus), c) == 8);
+ for (cpu = 0; cpu < 8; cpu++)
+ assert_se(CPU_ISSET_S(cpu, CPU_ALLOC_SIZE(ncpus), c));
+ c = cpu_set_mfree(c);
+
+ /* Ranges */
+ ncpus = parse_cpu_set_and_warn("0-3,8-11", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus >= 1024);
+ assert_se(CPU_COUNT_S(CPU_ALLOC_SIZE(ncpus), c) == 8);
+ for (cpu = 0; cpu < 4; cpu++)
+ assert_se(CPU_ISSET_S(cpu, CPU_ALLOC_SIZE(ncpus), c));
+ for (cpu = 8; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, CPU_ALLOC_SIZE(ncpus), c));
+ c = cpu_set_mfree(c);
+
+ /* Ranges with trailing comma, space */
+ ncpus = parse_cpu_set_and_warn("0-3 8-11, ", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus >= 1024);
+ assert_se(CPU_COUNT_S(CPU_ALLOC_SIZE(ncpus), c) == 8);
+ for (cpu = 0; cpu < 4; cpu++)
+ assert_se(CPU_ISSET_S(cpu, CPU_ALLOC_SIZE(ncpus), c));
+ for (cpu = 8; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, CPU_ALLOC_SIZE(ncpus), c));
+ c = cpu_set_mfree(c);
+
+ /* Negative range (returns empty cpu_set) */
+ ncpus = parse_cpu_set_and_warn("3-0", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus >= 1024);
+ assert_se(CPU_COUNT_S(CPU_ALLOC_SIZE(ncpus), c) == 0);
+ c = cpu_set_mfree(c);
+
+ /* Overlapping ranges */
+ ncpus = parse_cpu_set_and_warn("0-7 4-11", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus >= 1024);
+ assert_se(CPU_COUNT_S(CPU_ALLOC_SIZE(ncpus), c) == 12);
+ for (cpu = 0; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, CPU_ALLOC_SIZE(ncpus), c));
+ c = cpu_set_mfree(c);
+
+ /* Mix ranges and individual CPUs */
+ ncpus = parse_cpu_set_and_warn("0,1 4-11", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus >= 1024);
+ assert_se(CPU_COUNT_S(CPU_ALLOC_SIZE(ncpus), c) == 10);
+ assert_se(CPU_ISSET_S(0, CPU_ALLOC_SIZE(ncpus), c));
+ assert_se(CPU_ISSET_S(1, CPU_ALLOC_SIZE(ncpus), c));
+ for (cpu = 4; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, CPU_ALLOC_SIZE(ncpus), c));
+ c = cpu_set_mfree(c);
+
+ /* Garbage */
+ ncpus = parse_cpu_set_and_warn("0 1 2 3 garbage", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus < 0);
+ assert_se(!c);
+
+ /* Range with garbage */
+ ncpus = parse_cpu_set_and_warn("0-3 8-garbage", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus < 0);
+ assert_se(!c);
+
+ /* Empty string */
+ c = NULL;
+ ncpus = parse_cpu_set_and_warn("", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus == 0); /* empty string returns 0 */
+ assert_se(!c);
+
+ /* Runaway quoted string */
+ ncpus = parse_cpu_set_and_warn("0 1 2 3 \"4 5 6 7 ", &c, NULL, "fake", 1, "CPUAffinity");
+ assert_se(ncpus < 0);
+ assert_se(!c);
+}
+
+int main(int argc, char *argv[]) {
+ test_parse_cpu_set();
+
+ return 0;
+}
diff --git a/src/test/test-daemon.c b/src/test/test-daemon.c
new file mode 100644
index 0000000..a4b96da
--- /dev/null
+++ b/src/test/test-daemon.c
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "parse-util.h"
+#include "strv.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_strv_free_ char **l = NULL;
+ int n, i;
+ usec_t duration = USEC_PER_SEC / 10;
+
+ if (argc >= 2) {
+ unsigned x;
+
+ assert_se(safe_atou(argv[1], &x) >= 0);
+ duration = x * USEC_PER_SEC;
+ }
+
+ n = sd_listen_fds_with_names(false, &l);
+ if (n < 0) {
+ log_error_errno(n, "Failed to get listening fds: %m");
+ return EXIT_FAILURE;
+ }
+
+ for (i = 0; i < n; i++)
+ log_info("fd=%i name=%s\n", SD_LISTEN_FDS_START + i, l[i]);
+
+ sd_notify(0,
+ "STATUS=Starting up");
+ usleep(duration);
+
+ sd_notify(0,
+ "STATUS=Running\n"
+ "READY=1");
+ usleep(duration);
+
+ sd_notify(0,
+ "STATUS=Reloading\n"
+ "RELOADING=1");
+ usleep(duration);
+
+ sd_notify(0,
+ "STATUS=Running\n"
+ "READY=1");
+ usleep(duration);
+
+ sd_notify(0,
+ "STATUS=Quitting\n"
+ "STOPPING=1");
+ usleep(duration);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-date.c b/src/test/test-date.c
new file mode 100644
index 0000000..cba51e2
--- /dev/null
+++ b/src/test/test-date.c
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+
+#include "alloc-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_should_pass(const char *p) {
+ usec_t t, q;
+ char buf[FORMAT_TIMESTAMP_MAX], buf_relative[FORMAT_TIMESTAMP_RELATIVE_MAX];
+
+ log_info("Test: %s", p);
+ assert_se(parse_timestamp(p, &t) >= 0);
+ assert_se(format_timestamp_us(buf, sizeof(buf), t));
+ log_info("\"%s\" → \"%s\"", p, buf);
+
+ assert_se(parse_timestamp(buf, &q) >= 0);
+ if (q != t) {
+ char tmp[FORMAT_TIMESTAMP_MAX];
+
+ log_error("round-trip failed: \"%s\" → \"%s\"",
+ buf, format_timestamp_us(tmp, sizeof(tmp), q));
+ }
+ assert_se(q == t);
+
+ assert_se(format_timestamp_relative(buf_relative, sizeof(buf_relative), t));
+ log_info("%s", strna(buf_relative));
+}
+
+static void test_should_parse(const char *p) {
+ usec_t t;
+
+ log_info("Test: %s", p);
+ assert_se(parse_timestamp(p, &t) >= 0);
+ log_info("\"%s\" → \"@%" PRI_USEC "\"", p, t);
+}
+
+static void test_should_fail(const char *p) {
+ usec_t t;
+ int r;
+
+ log_info("Test: %s", p);
+ r = parse_timestamp(p, &t);
+ if (r >= 0)
+ log_info("\"%s\" → \"@%" PRI_USEC "\" (unexpected)", p, t);
+ else
+ log_info("parse_timestamp() returns %d (expected)", r);
+ assert_se(r < 0);
+}
+
+static void test_one(const char *p) {
+ _cleanup_free_ char *with_utc;
+
+ with_utc = strjoin(p, " UTC");
+ test_should_pass(p);
+ test_should_pass(with_utc);
+}
+
+static void test_one_noutc(const char *p) {
+ _cleanup_free_ char *with_utc;
+
+ with_utc = strjoin(p, " UTC");
+ test_should_pass(p);
+ test_should_fail(with_utc);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_one("17:41");
+ test_one("18:42:44");
+ test_one("18:42:44.0");
+ test_one("18:42:44.999999999999");
+ test_one("12-10-02 12:13:14");
+ test_one("12-10-2 12:13:14");
+ test_one("12-10-03 12:13");
+ test_one("2012-12-30 18:42");
+ test_one("2012-10-02");
+ test_one("Tue 2012-10-02");
+ test_one("yesterday");
+ test_one("today");
+ test_one("tomorrow");
+ test_one_noutc("16:20 UTC");
+ test_one_noutc("16:20 Asia/Seoul");
+ test_one_noutc("tomorrow Asia/Seoul");
+ test_one_noutc("2012-12-30 18:42 Asia/Seoul");
+ test_one_noutc("now");
+ test_one_noutc("+2d");
+ test_one_noutc("+2y 4d");
+ test_one_noutc("5months ago");
+ test_one_noutc("@1395716396");
+ test_should_parse("1970-1-1 UTC");
+ test_should_pass("1970-1-1 00:00:01 UTC");
+ test_should_fail("1969-12-31 UTC");
+ test_should_fail("-100y");
+ test_should_fail("today UTC UTC");
+ test_should_fail("now Asia/Seoul");
+ test_should_fail("+2d Asia/Seoul");
+ test_should_fail("@1395716396 Asia/Seoul");
+#if SIZEOF_TIME_T == 8
+ test_should_pass("9999-12-30 23:59:59 UTC");
+ test_should_fail("9999-12-31 00:00:00 UTC");
+ test_should_fail("10000-01-01 00:00:00 UTC");
+#elif SIZEOF_TIME_T == 4
+ test_should_pass("2038-01-19 03:14:07 UTC");
+ test_should_fail("2038-01-19 03:14:08 UTC");
+#endif
+
+ return 0;
+}
diff --git a/src/test/test-dev-setup.c b/src/test/test-dev-setup.c
new file mode 100644
index 0000000..9414ea6
--- /dev/null
+++ b/src/test/test-dev-setup.c
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "capability-util.h"
+#include "dev-setup.h"
+#include "fs-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "tmpfile-util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *p = NULL;
+ const char *f;
+ struct stat st;
+
+ if (have_effective_cap(CAP_DAC_OVERRIDE) <= 0)
+ return EXIT_TEST_SKIP;
+
+ assert_se(mkdtemp_malloc("/tmp/test-dev-setupXXXXXX", &p) >= 0);
+
+ f = prefix_roota(p, "/run");
+ assert_se(mkdir(f, 0755) >= 0);
+
+ assert_se(make_inaccessible_nodes(p, 1, 1) >= 0);
+
+ f = prefix_roota(p, "/run/systemd/inaccessible/reg");
+ assert_se(stat(f, &st) >= 0);
+ assert_se(S_ISREG(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0000);
+
+ f = prefix_roota(p, "/run/systemd/inaccessible/dir");
+ assert_se(stat(f, &st) >= 0);
+ assert_se(S_ISDIR(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0000);
+
+ f = prefix_roota(p, "/run/systemd/inaccessible/fifo");
+ assert_se(stat(f, &st) >= 0);
+ assert_se(S_ISFIFO(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0000);
+
+ f = prefix_roota(p, "/run/systemd/inaccessible/sock");
+ assert_se(stat(f, &st) >= 0);
+ assert_se(S_ISSOCK(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0000);
+
+ f = prefix_roota(p, "/run/systemd/inaccessible/chr");
+ if (stat(f, &st) < 0)
+ assert_se(errno == ENOENT);
+ else {
+ assert_se(S_ISCHR(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0000);
+ }
+
+ f = prefix_roota(p, "/run/systemd/inaccessible/blk");
+ if (stat(f, &st) < 0)
+ assert_se(errno == ENOENT);
+ else {
+ assert_se(S_ISBLK(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0000);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-device-nodes.c b/src/test/test-device-nodes.c
new file mode 100644
index 0000000..ad8d9ac
--- /dev/null
+++ b/src/test/test-device-nodes.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "device-nodes.h"
+#include "string-util.h"
+#include "util.h"
+
+/* helpers for test_encode_devnode_name */
+static char *do_encode_string(const char *in) {
+ size_t out_len = strlen(in) * 4 + 1;
+ char *out = malloc(out_len);
+
+ assert_se(out);
+ assert_se(encode_devnode_name(in, out, out_len) >= 0);
+ puts(out);
+
+ return out;
+}
+
+static bool expect_encoded_as(const char *in, const char *expected) {
+ _cleanup_free_ char *encoded = do_encode_string(in);
+ return streq(encoded, expected);
+}
+
+static void test_encode_devnode_name(void) {
+ assert_se(expect_encoded_as("systemd sucks", "systemd\\x20sucks"));
+ assert_se(expect_encoded_as("pinkiepie", "pinkiepie"));
+ assert_se(expect_encoded_as("valíd\\ųtf8", "valíd\\x5cųtf8"));
+ assert_se(expect_encoded_as("s/ash/ng", "s\\x2fash\\x2fng"));
+ assert_se(expect_encoded_as("/", "\\x2f"));
+ assert_se(expect_encoded_as("!", "\\x21"));
+}
+
+int main(int argc, char *argv[]) {
+ test_encode_devnode_name();
+
+ return 0;
+}
diff --git a/src/test/test-dissect-image.c b/src/test/test-dissect-image.c
new file mode 100644
index 0000000..7b32e83
--- /dev/null
+++ b/src/test/test-dissect-image.c
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+
+#include "dissect-image.h"
+#include "log.h"
+#include "loop-util.h"
+#include "string-util.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+ int r, i;
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (argc < 2) {
+ log_error("Requires one command line argument.");
+ return EXIT_FAILURE;
+ }
+
+ r = loop_device_make_by_path(argv[1], O_RDONLY, &d);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set up loopback device: %m");
+ return EXIT_FAILURE;
+ }
+
+ r = dissect_image(d->fd, NULL, 0, DISSECT_IMAGE_REQUIRE_ROOT, &m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to dissect image: %m");
+ return EXIT_FAILURE;
+ }
+
+ for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+
+ if (!m->partitions[i].found)
+ continue;
+
+ printf("Found %s partition, %s of type %s at #%i (%s)\n",
+ partition_designator_to_string(i),
+ m->partitions[i].rw ? "writable" : "read-only",
+ strna(m->partitions[i].fstype),
+ m->partitions[i].partno,
+ strna(m->partitions[i].node));
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-dlopen.c b/src/test/test-dlopen.c
new file mode 100644
index 0000000..148ebaa
--- /dev/null
+++ b/src/test/test-dlopen.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dlfcn.h>
+#include <stdlib.h>
+
+#include "macro.h"
+
+int main(int argc, char **argv) {
+ void *handle;
+
+ assert_se(handle = dlopen(argv[1], RTLD_NOW));
+ assert_se(dlclose(handle) == 0);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-dns-domain.c b/src/test/test-dns-domain.c
new file mode 100644
index 0000000..ead5311
--- /dev/null
+++ b/src/test/test-dns-domain.c
@@ -0,0 +1,815 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "macro.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_dns_label_unescape_one(const char *what, const char *expect, size_t buffer_sz, int ret, int ret_ldh) {
+ char buffer[buffer_sz];
+ int r;
+ const char *w = what;
+
+ log_info("%s, %s, %zu, →%d/%d", what, expect, buffer_sz, ret, ret_ldh);
+
+ r = dns_label_unescape(&w, buffer, buffer_sz, 0);
+ assert_se(r == ret);
+ if (r >= 0)
+ assert_se(streq(buffer, expect));
+
+ w = what;
+ r = dns_label_unescape(&w, buffer, buffer_sz, DNS_LABEL_LDH);
+ assert_se(r == ret_ldh);
+ if (r >= 0)
+ assert_se(streq(buffer, expect));
+
+ w = what;
+ r = dns_label_unescape(&w, buffer, buffer_sz, DNS_LABEL_NO_ESCAPES);
+ const int ret_noe = strchr(what, '\\') ? -EINVAL : ret;
+ assert_se(r == ret_noe);
+ if (r >= 0)
+ assert_se(streq(buffer, expect));
+}
+
+static void test_dns_label_unescape(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_label_unescape_one("hallo", "hallo", 6, 5, 5);
+ test_dns_label_unescape_one("hallo", "hallo", 4, -ENOBUFS, -ENOBUFS);
+ test_dns_label_unescape_one("", "", 10, 0, 0);
+ test_dns_label_unescape_one("hallo\\.foobar", "hallo.foobar", 20, 12, -EINVAL);
+ test_dns_label_unescape_one("hallo.foobar", "hallo", 10, 5, 5);
+ test_dns_label_unescape_one("hallo\n.foobar", "hallo", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_one("hallo\\", "hallo", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_one("hallo\\032 ", "hallo ", 20, 7, -EINVAL);
+ test_dns_label_unescape_one(".", "", 20, 0, 0);
+ test_dns_label_unescape_one("..", "", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_one(".foobar", "", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_one("foobar.", "foobar", 20, 6, 6);
+ test_dns_label_unescape_one("foobar..", "foobar", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_one("foo-bar", "foo-bar", 20, 7, 7);
+ test_dns_label_unescape_one("foo-", "foo-", 20, 4, -EINVAL);
+ test_dns_label_unescape_one("-foo", "-foo", 20, 4, -EINVAL);
+ test_dns_label_unescape_one("-foo-", "-foo-", 20, 5, -EINVAL);
+ test_dns_label_unescape_one("foo-.", "foo-", 20, 4, -EINVAL);
+ test_dns_label_unescape_one("foo.-", "foo", 20, 3, 3);
+ test_dns_label_unescape_one("foo\\032", "foo ", 20, 4, -EINVAL);
+ test_dns_label_unescape_one("foo\\045", "foo-", 20, 4, -EINVAL);
+ test_dns_label_unescape_one("głąb", "głąb", 20, 6, -EINVAL);
+}
+
+static void test_dns_name_to_wire_format_one(const char *what, const char *expect, size_t buffer_sz, int ret) {
+ uint8_t buffer[buffer_sz];
+ int r;
+
+ log_info("%s, %s, %zu, →%d", what, expect, buffer_sz, ret);
+
+ r = dns_name_to_wire_format(what, buffer, buffer_sz, false);
+ assert_se(r == ret);
+
+ if (r < 0)
+ return;
+
+ assert_se(!memcmp(buffer, expect, r));
+}
+
+static void test_dns_name_to_wire_format(void) {
+ static const char out0[] = { 0 };
+ static const char out1[] = { 3, 'f', 'o', 'o', 0 };
+ static const char out2[] = { 5, 'h', 'a', 'l', 'l', 'o', 3, 'f', 'o', 'o', 3, 'b', 'a', 'r', 0 };
+ static const char out3[] = { 4, ' ', 'f', 'o', 'o', 3, 'b', 'a', 'r', 0 };
+ static const char out4[] = { 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 3, 'a', '1', '2', 0 };
+
+ log_info("/* %s */", __func__);
+
+ test_dns_name_to_wire_format_one("", out0, sizeof(out0), sizeof(out0));
+
+ test_dns_name_to_wire_format_one("foo", out1, sizeof(out1), sizeof(out1));
+ test_dns_name_to_wire_format_one("foo", out1, sizeof(out1) + 1, sizeof(out1));
+ test_dns_name_to_wire_format_one("foo", out1, sizeof(out1) - 1, -ENOBUFS);
+
+ test_dns_name_to_wire_format_one("hallo.foo.bar", out2, sizeof(out2), sizeof(out2));
+ test_dns_name_to_wire_format_one("hallo.foo..bar", NULL, 32, -EINVAL);
+
+ test_dns_name_to_wire_format_one("\\032foo.bar", out3, sizeof(out3), sizeof(out3));
+
+ test_dns_name_to_wire_format_one("a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a123", NULL, 500, -EINVAL);
+ test_dns_name_to_wire_format_one("a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12", out4, sizeof(out4), sizeof(out4));
+}
+
+static void test_dns_label_unescape_suffix_one(const char *what, const char *expect1, const char *expect2, size_t buffer_sz, int ret1, int ret2) {
+ char buffer[buffer_sz];
+ const char *label;
+ int r;
+
+ log_info("%s, %s, %s, %zu, %d, %d", what, expect1, expect2, buffer_sz, ret1, ret2);
+
+ label = what + strlen(what);
+
+ r = dns_label_unescape_suffix(what, &label, buffer, buffer_sz);
+ assert_se(r == ret1);
+ if (r >= 0)
+ assert_se(streq(buffer, expect1));
+
+ r = dns_label_unescape_suffix(what, &label, buffer, buffer_sz);
+ assert_se(r == ret2);
+ if (r >= 0)
+ assert_se(streq(buffer, expect2));
+}
+
+static void test_dns_label_unescape_suffix(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_label_unescape_suffix_one("hallo", "hallo", "", 6, 5, 0);
+ test_dns_label_unescape_suffix_one("hallo", "hallo", "", 4, -ENOBUFS, -ENOBUFS);
+ test_dns_label_unescape_suffix_one("", "", "", 10, 0, 0);
+ test_dns_label_unescape_suffix_one("hallo\\.foobar", "hallo.foobar", "", 20, 12, 0);
+ test_dns_label_unescape_suffix_one("hallo.foobar", "foobar", "hallo", 10, 6, 5);
+ test_dns_label_unescape_suffix_one("hallo.foobar\n", "foobar", "foobar", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_suffix_one("hallo\\", "hallo", "hallo", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_suffix_one("hallo\\032 ", "hallo ", "", 20, 7, 0);
+ test_dns_label_unescape_suffix_one(".", "", "", 20, 0, 0);
+ test_dns_label_unescape_suffix_one("..", "", "", 20, 0, -EINVAL);
+ test_dns_label_unescape_suffix_one(".foobar", "foobar", "", 20, 6, -EINVAL);
+ test_dns_label_unescape_suffix_one("foobar.", "foobar", "", 20, 6, 0);
+ test_dns_label_unescape_suffix_one("foo\\\\bar", "foo\\bar", "", 20, 7, 0);
+ test_dns_label_unescape_suffix_one("foo.bar", "bar", "foo", 20, 3, 3);
+ test_dns_label_unescape_suffix_one("foo..bar", "bar", "", 20, 3, -EINVAL);
+ test_dns_label_unescape_suffix_one("foo...bar", "bar", "", 20, 3, -EINVAL);
+ test_dns_label_unescape_suffix_one("foo\\.bar", "foo.bar", "", 20, 7, 0);
+ test_dns_label_unescape_suffix_one("foo\\\\.bar", "bar", "foo\\", 20, 3, 4);
+ test_dns_label_unescape_suffix_one("foo\\\\\\.bar", "foo\\.bar", "", 20, 8, 0);
+}
+
+static void test_dns_label_escape_one(const char *what, size_t l, const char *expect, int ret) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ log_info("%s, %zu, %s, →%d", what, l, expect, ret);
+
+ r = dns_label_escape_new(what, l, &t);
+ assert_se(r == ret);
+
+ if (r < 0)
+ return;
+
+ assert_se(streq_ptr(expect, t));
+}
+
+static void test_dns_label_escape(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_label_escape_one("", 0, NULL, -EINVAL);
+ test_dns_label_escape_one("hallo", 5, "hallo", 5);
+ test_dns_label_escape_one("hallo", 6, "hallo\\000", 9);
+ test_dns_label_escape_one("hallo hallo.foobar,waldi", 24, "hallo\\032hallo\\.foobar\\044waldi", 31);
+}
+
+static void test_dns_name_normalize_one(const char *what, const char *expect, int ret) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ r = dns_name_normalize(what, 0, &t);
+ assert_se(r == ret);
+
+ if (r < 0)
+ return;
+
+ assert_se(streq_ptr(expect, t));
+}
+
+static void test_dns_name_normalize(void) {
+ test_dns_name_normalize_one("", ".", 0);
+ test_dns_name_normalize_one("f", "f", 0);
+ test_dns_name_normalize_one("f.waldi", "f.waldi", 0);
+ test_dns_name_normalize_one("f \\032.waldi", "f\\032\\032.waldi", 0);
+ test_dns_name_normalize_one("\\000", "\\000", 0);
+ test_dns_name_normalize_one("..", NULL, -EINVAL);
+ test_dns_name_normalize_one(".foobar", NULL, -EINVAL);
+ test_dns_name_normalize_one("foobar.", "foobar", 0);
+ test_dns_name_normalize_one(".", ".", 0);
+}
+
+static void test_dns_name_equal_one(const char *a, const char *b, int ret) {
+ int r;
+
+ r = dns_name_equal(a, b);
+ assert_se(r == ret);
+
+ r = dns_name_equal(b, a);
+ assert_se(r == ret);
+}
+
+static void test_dns_name_equal(void) {
+ test_dns_name_equal_one("", "", true);
+ test_dns_name_equal_one("x", "x", true);
+ test_dns_name_equal_one("x", "x.", true);
+ test_dns_name_equal_one("abc.def", "abc.def", true);
+ test_dns_name_equal_one("abc.def", "ABC.def", true);
+ test_dns_name_equal_one("abc.def", "CBA.def", false);
+ test_dns_name_equal_one("", "xxx", false);
+ test_dns_name_equal_one("ab", "a", false);
+ test_dns_name_equal_one("\\000", "\\000", true);
+ test_dns_name_equal_one(".", "", true);
+ test_dns_name_equal_one(".", ".", true);
+ test_dns_name_equal_one("..", "..", -EINVAL);
+}
+
+static void test_dns_name_between_one(const char *a, const char *b, const char *c, int ret) {
+ int r;
+
+ r = dns_name_between(a, b, c);
+ assert_se(r == ret);
+
+ r = dns_name_between(c, b, a);
+ if (ret >= 0)
+ assert_se(r == 0 || dns_name_equal(a, c) > 0);
+ else
+ assert_se(r == ret);
+}
+
+static void test_dns_name_between(void) {
+ /* see https://tools.ietf.org/html/rfc4034#section-6.1
+ Note that we use "\033.z.example" in stead of "\001.z.example" as we
+ consider the latter invalid */
+ test_dns_name_between_one("example", "a.example", "yljkjljk.a.example", true);
+ test_dns_name_between_one("a.example", "yljkjljk.a.example", "Z.a.example", true);
+ test_dns_name_between_one("yljkjljk.a.example", "Z.a.example", "zABC.a.EXAMPLE", true);
+ test_dns_name_between_one("Z.a.example", "zABC.a.EXAMPLE", "z.example", true);
+ test_dns_name_between_one("zABC.a.EXAMPLE", "z.example", "\\033.z.example", true);
+ test_dns_name_between_one("z.example", "\\033.z.example", "*.z.example", true);
+ test_dns_name_between_one("\\033.z.example", "*.z.example", "\\200.z.example", true);
+ test_dns_name_between_one("*.z.example", "\\200.z.example", "example", true);
+ test_dns_name_between_one("\\200.z.example", "example", "a.example", true);
+
+ test_dns_name_between_one("example", "a.example", "example", true);
+ test_dns_name_between_one("example", "example", "example", false);
+ test_dns_name_between_one("example", "example", "yljkjljk.a.example", false);
+ test_dns_name_between_one("example", "yljkjljk.a.example", "yljkjljk.a.example", false);
+ test_dns_name_between_one("hkps.pool.sks-keyservers.net", "_pgpkey-https._tcp.hkps.pool.sks-keyservers.net", "ipv4.pool.sks-keyservers.net", true);
+}
+
+static void test_dns_name_endswith_one(const char *a, const char *b, int ret) {
+ assert_se(dns_name_endswith(a, b) == ret);
+}
+
+static void test_dns_name_endswith(void) {
+ test_dns_name_endswith_one("", "", true);
+ test_dns_name_endswith_one("", "xxx", false);
+ test_dns_name_endswith_one("xxx", "", true);
+ test_dns_name_endswith_one("x", "x", true);
+ test_dns_name_endswith_one("x", "y", false);
+ test_dns_name_endswith_one("x.y", "y", true);
+ test_dns_name_endswith_one("x.y", "Y", true);
+ test_dns_name_endswith_one("x.y", "x", false);
+ test_dns_name_endswith_one("x.y.z", "Z", true);
+ test_dns_name_endswith_one("x.y.z", "y.Z", true);
+ test_dns_name_endswith_one("x.y.z", "x.y.Z", true);
+ test_dns_name_endswith_one("x.y.z", "waldo", false);
+ test_dns_name_endswith_one("x.y.z.u.v.w", "y.z", false);
+ test_dns_name_endswith_one("x.y.z.u.v.w", "u.v.w", true);
+ test_dns_name_endswith_one("x.y\001.z", "waldo", -EINVAL);
+}
+
+static void test_dns_name_startswith_one(const char *a, const char *b, int ret) {
+ assert_se(dns_name_startswith(a, b) == ret);
+}
+
+static void test_dns_name_startswith(void) {
+ test_dns_name_startswith_one("", "", true);
+ test_dns_name_startswith_one("", "xxx", false);
+ test_dns_name_startswith_one("xxx", "", true);
+ test_dns_name_startswith_one("x", "x", true);
+ test_dns_name_startswith_one("x", "y", false);
+ test_dns_name_startswith_one("x.y", "x.y", true);
+ test_dns_name_startswith_one("x.y", "y.x", false);
+ test_dns_name_startswith_one("x.y", "x", true);
+ test_dns_name_startswith_one("x.y", "X", true);
+ test_dns_name_startswith_one("x.y", "y", false);
+ test_dns_name_startswith_one("x.y", "", true);
+ test_dns_name_startswith_one("x.y", "X", true);
+}
+
+static void test_dns_name_is_root(void) {
+ assert_se(dns_name_is_root(""));
+ assert_se(dns_name_is_root("."));
+ assert_se(!dns_name_is_root("xxx"));
+ assert_se(!dns_name_is_root("xxx."));
+ assert_se(!dns_name_is_root(".."));
+}
+
+static void test_dns_name_is_single_label(void) {
+ assert_se(!dns_name_is_single_label(""));
+ assert_se(!dns_name_is_single_label("."));
+ assert_se(!dns_name_is_single_label(".."));
+ assert_se(dns_name_is_single_label("x"));
+ assert_se(dns_name_is_single_label("x."));
+ assert_se(!dns_name_is_single_label("xx.yy"));
+}
+
+static void test_dns_name_reverse_one(const char *address, const char *name) {
+ _cleanup_free_ char *p = NULL;
+ union in_addr_union a, b = {};
+ int familya, familyb;
+
+ assert_se(in_addr_from_string_auto(address, &familya, &a) >= 0);
+ assert_se(dns_name_reverse(familya, &a, &p) >= 0);
+ assert_se(streq(p, name));
+ assert_se(dns_name_address(p, &familyb, &b) > 0);
+ assert_se(familya == familyb);
+ assert_se(in_addr_equal(familya, &a, &b));
+}
+
+static void test_dns_name_reverse(void) {
+ test_dns_name_reverse_one("47.11.8.15", "15.8.11.47.in-addr.arpa");
+ test_dns_name_reverse_one("fe80::47", "7.4.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.8.e.f.ip6.arpa");
+ test_dns_name_reverse_one("127.0.0.1", "1.0.0.127.in-addr.arpa");
+ test_dns_name_reverse_one("::1", "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa");
+}
+
+static void test_dns_name_concat_one(const char *a, const char *b, int r, const char *result) {
+ _cleanup_free_ char *p = NULL;
+
+ assert_se(dns_name_concat(a, b, 0, &p) == r);
+ assert_se(streq_ptr(p, result));
+}
+
+static void test_dns_name_concat(void) {
+ test_dns_name_concat_one("", "", 0, ".");
+ test_dns_name_concat_one(".", "", 0, ".");
+ test_dns_name_concat_one("", ".", 0, ".");
+ test_dns_name_concat_one(".", ".", 0, ".");
+ test_dns_name_concat_one("foo", "bar", 0, "foo.bar");
+ test_dns_name_concat_one("foo.foo", "bar.bar", 0, "foo.foo.bar.bar");
+ test_dns_name_concat_one("foo", NULL, 0, "foo");
+ test_dns_name_concat_one("foo", ".", 0, "foo");
+ test_dns_name_concat_one("foo.", "bar.", 0, "foo.bar");
+ test_dns_name_concat_one(NULL, NULL, 0, ".");
+ test_dns_name_concat_one(NULL, ".", 0, ".");
+ test_dns_name_concat_one(NULL, "foo", 0, "foo");
+}
+
+static void test_dns_name_is_valid_one(const char *s, int ret, int ret_ldh) {
+ log_info("%s, →%d", s, ret);
+
+ assert_se(dns_name_is_valid(s) == ret);
+ assert_se(dns_name_is_valid_ldh(s) == ret_ldh);
+}
+
+static void test_dns_name_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_name_is_valid_one("foo", 1, 1);
+ test_dns_name_is_valid_one("foo.", 1, 1);
+ test_dns_name_is_valid_one("foo..", 0, 0);
+ test_dns_name_is_valid_one("Foo", 1, 1);
+ test_dns_name_is_valid_one("foo.bar", 1, 1);
+ test_dns_name_is_valid_one("foo.bar.baz", 1, 1);
+ test_dns_name_is_valid_one("", 1, 1);
+ test_dns_name_is_valid_one("foo..bar", 0, 0);
+ test_dns_name_is_valid_one(".foo.bar", 0, 0);
+ test_dns_name_is_valid_one("foo.bar.", 1, 1);
+ test_dns_name_is_valid_one("foo.bar..", 0, 0);
+ test_dns_name_is_valid_one("\\zbar", 0, 0);
+ test_dns_name_is_valid_one("ä", 1, 0);
+ test_dns_name_is_valid_one("\n", 0, 0);
+
+ test_dns_name_is_valid_one("dash-", 1, 0);
+ test_dns_name_is_valid_one("-dash", 1, 0);
+ test_dns_name_is_valid_one("dash-dash", 1, 1);
+ test_dns_name_is_valid_one("foo.dash-", 1, 0);
+ test_dns_name_is_valid_one("foo.-dash", 1, 0);
+ test_dns_name_is_valid_one("foo.dash-dash", 1, 1);
+ test_dns_name_is_valid_one("foo.dash-.bar", 1, 0);
+ test_dns_name_is_valid_one("foo.-dash.bar", 1, 0);
+ test_dns_name_is_valid_one("foo.dash-dash.bar", 1, 1);
+ test_dns_name_is_valid_one("dash-.bar", 1, 0);
+ test_dns_name_is_valid_one("-dash.bar", 1, 0);
+ test_dns_name_is_valid_one("dash-dash.bar", 1, 1);
+ test_dns_name_is_valid_one("-.bar", 1, 0);
+ test_dns_name_is_valid_one("foo.-", 1, 0);
+
+ /* 256 characters */
+ test_dns_name_is_valid_one("a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345", 0, 0);
+
+ /* 255 characters */
+ test_dns_name_is_valid_one("a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a1234", 0, 0);
+
+ /* 254 characters */
+ test_dns_name_is_valid_one("a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a123", 0, 0);
+
+ /* 253 characters */
+ test_dns_name_is_valid_one("a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12", 1, 1);
+
+ /* label of 64 chars length */
+ test_dns_name_is_valid_one("a123456789a123456789a123456789a123456789a123456789a123456789a123", 0, 0);
+
+ /* label of 63 chars length */
+ test_dns_name_is_valid_one("a123456789a123456789a123456789a123456789a123456789a123456789a12", 1, 1);
+}
+
+static void test_dns_service_name_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(dns_service_name_is_valid("Lennart's Compüter"));
+ assert_se(dns_service_name_is_valid("piff.paff"));
+
+ assert_se(!dns_service_name_is_valid(NULL));
+ assert_se(!dns_service_name_is_valid(""));
+ assert_se(!dns_service_name_is_valid("foo\nbar"));
+ assert_se(!dns_service_name_is_valid("foo\201bar"));
+ assert_se(!dns_service_name_is_valid("this is an overly long string that is certainly longer than 63 characters"));
+}
+
+static void test_dns_srv_type_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(dns_srv_type_is_valid("_http._tcp"));
+ assert_se(dns_srv_type_is_valid("_foo-bar._tcp"));
+ assert_se(dns_srv_type_is_valid("_w._udp"));
+ assert_se(dns_srv_type_is_valid("_a800._tcp"));
+ assert_se(dns_srv_type_is_valid("_a-800._tcp"));
+
+ assert_se(!dns_srv_type_is_valid(NULL));
+ assert_se(!dns_srv_type_is_valid(""));
+ assert_se(!dns_srv_type_is_valid("x"));
+ assert_se(!dns_srv_type_is_valid("_foo"));
+ assert_se(!dns_srv_type_is_valid("_tcp"));
+ assert_se(!dns_srv_type_is_valid("_"));
+ assert_se(!dns_srv_type_is_valid("_foo."));
+ assert_se(!dns_srv_type_is_valid("_föo._tcp"));
+ assert_se(!dns_srv_type_is_valid("_f\no._tcp"));
+ assert_se(!dns_srv_type_is_valid("_800._tcp"));
+ assert_se(!dns_srv_type_is_valid("_-800._tcp"));
+ assert_se(!dns_srv_type_is_valid("_-foo._tcp"));
+ assert_se(!dns_srv_type_is_valid("_piep._foo._udp"));
+}
+
+static void test_dnssd_srv_type_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(dnssd_srv_type_is_valid("_http._tcp"));
+ assert_se(dnssd_srv_type_is_valid("_foo-bar._tcp"));
+ assert_se(dnssd_srv_type_is_valid("_w._udp"));
+ assert_se(dnssd_srv_type_is_valid("_a800._tcp"));
+ assert_se(dnssd_srv_type_is_valid("_a-800._tcp"));
+
+ assert_se(!dnssd_srv_type_is_valid(NULL));
+ assert_se(!dnssd_srv_type_is_valid(""));
+ assert_se(!dnssd_srv_type_is_valid("x"));
+ assert_se(!dnssd_srv_type_is_valid("_foo"));
+ assert_se(!dnssd_srv_type_is_valid("_tcp"));
+ assert_se(!dnssd_srv_type_is_valid("_"));
+ assert_se(!dnssd_srv_type_is_valid("_foo."));
+ assert_se(!dnssd_srv_type_is_valid("_föo._tcp"));
+ assert_se(!dnssd_srv_type_is_valid("_f\no._tcp"));
+ assert_se(!dnssd_srv_type_is_valid("_800._tcp"));
+ assert_se(!dnssd_srv_type_is_valid("_-800._tcp"));
+ assert_se(!dnssd_srv_type_is_valid("_-foo._tcp"));
+ assert_se(!dnssd_srv_type_is_valid("_piep._foo._udp"));
+ assert_se(!dnssd_srv_type_is_valid("_foo._unknown"));
+}
+
+static void test_dns_service_join_one(const char *a, const char *b, const char *c, int r, const char *d) {
+ _cleanup_free_ char *x = NULL, *y = NULL, *z = NULL, *t = NULL;
+
+ log_info("%s, %s, %s, →%d, %s", a, b, c, r, d);
+
+ assert_se(dns_service_join(a, b, c, &t) == r);
+ assert_se(streq_ptr(t, d));
+
+ if (r < 0)
+ return;
+
+ assert_se(dns_service_split(t, &x, &y, &z) >= 0);
+ assert_se(streq_ptr(a, x));
+ assert_se(streq_ptr(b, y));
+ assert_se(dns_name_equal(c, z) > 0);
+}
+
+static void test_dns_service_join(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_service_join_one("", "", "", -EINVAL, NULL);
+ test_dns_service_join_one("", "_http._tcp", "", -EINVAL, NULL);
+ test_dns_service_join_one("", "_http._tcp", "foo", -EINVAL, NULL);
+ test_dns_service_join_one("foo", "", "foo", -EINVAL, NULL);
+ test_dns_service_join_one("foo", "foo", "foo", -EINVAL, NULL);
+
+ test_dns_service_join_one("foo", "_http._tcp", "", 0, "foo._http._tcp");
+ test_dns_service_join_one(NULL, "_http._tcp", "", 0, "_http._tcp");
+ test_dns_service_join_one("foo", "_http._tcp", "foo", 0, "foo._http._tcp.foo");
+ test_dns_service_join_one(NULL, "_http._tcp", "foo", 0, "_http._tcp.foo");
+ test_dns_service_join_one("Lennart's PC", "_pc._tcp", "foo.bar.com", 0, "Lennart\\039s\\032PC._pc._tcp.foo.bar.com");
+ test_dns_service_join_one(NULL, "_pc._tcp", "foo.bar.com", 0, "_pc._tcp.foo.bar.com");
+}
+
+static void test_dns_service_split_one(const char *joined, const char *a, const char *b, const char *c, int r) {
+ _cleanup_free_ char *x = NULL, *y = NULL, *z = NULL, *t = NULL;
+
+ log_info("%s, %s, %s, %s, →%d", joined, a, b, c, r);
+
+ assert_se(dns_service_split(joined, &x, &y, &z) == r);
+ assert_se(streq_ptr(x, a));
+ assert_se(streq_ptr(y, b));
+ assert_se(streq_ptr(z, c));
+
+ if (r < 0)
+ return;
+
+ if (y) {
+ assert_se(dns_service_join(x, y, z, &t) == 0);
+ assert_se(dns_name_equal(joined, t) > 0);
+ } else
+ assert_se(!x && dns_name_equal(z, joined) > 0);
+}
+
+static void test_dns_service_split(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_service_split_one("", NULL, NULL, ".", 0);
+ test_dns_service_split_one("foo", NULL, NULL, "foo", 0);
+ test_dns_service_split_one("foo.bar", NULL, NULL, "foo.bar", 0);
+ test_dns_service_split_one("_foo.bar", NULL, NULL, "_foo.bar", 0);
+ test_dns_service_split_one("_foo._bar", NULL, "_foo._bar", ".", 0);
+ test_dns_service_split_one("_meh._foo._bar", "_meh", "_foo._bar", ".", 0);
+ test_dns_service_split_one("Wuff\\032Wuff._foo._bar.waldo.com", "Wuff Wuff", "_foo._bar", "waldo.com", 0);
+}
+
+static void test_dns_name_change_suffix_one(const char *name, const char *old_suffix, const char *new_suffix, int r, const char *result) {
+ _cleanup_free_ char *s = NULL;
+
+ log_info("%s, %s, %s, →%s", name, old_suffix, new_suffix, result);
+
+ assert_se(dns_name_change_suffix(name, old_suffix, new_suffix, &s) == r);
+ assert_se(streq_ptr(s, result));
+}
+
+static void test_dns_name_change_suffix(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_name_change_suffix_one("foo.bar", "bar", "waldo", 1, "foo.waldo");
+ test_dns_name_change_suffix_one("foo.bar.waldi.quux", "foo.bar.waldi.quux", "piff.paff", 1, "piff.paff");
+ test_dns_name_change_suffix_one("foo.bar.waldi.quux", "bar.waldi.quux", "piff.paff", 1, "foo.piff.paff");
+ test_dns_name_change_suffix_one("foo.bar.waldi.quux", "waldi.quux", "piff.paff", 1, "foo.bar.piff.paff");
+ test_dns_name_change_suffix_one("foo.bar.waldi.quux", "quux", "piff.paff", 1, "foo.bar.waldi.piff.paff");
+ test_dns_name_change_suffix_one("foo.bar.waldi.quux", "", "piff.paff", 1, "foo.bar.waldi.quux.piff.paff");
+ test_dns_name_change_suffix_one("", "", "piff.paff", 1, "piff.paff");
+ test_dns_name_change_suffix_one("", "", "", 1, ".");
+ test_dns_name_change_suffix_one("a", "b", "c", 0, NULL);
+}
+
+static void test_dns_name_suffix_one(const char *name, unsigned n_labels, const char *result, int ret) {
+ const char *p = NULL;
+
+ log_info("%s, %d, →%s, %d", name, n_labels, result, ret);
+
+ assert_se(ret == dns_name_suffix(name, n_labels, &p));
+ assert_se(streq_ptr(p, result));
+}
+
+static void test_dns_name_suffix(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_name_suffix_one("foo.bar", 2, "foo.bar", 0);
+ test_dns_name_suffix_one("foo.bar", 1, "bar", 1);
+ test_dns_name_suffix_one("foo.bar", 0, "", 2);
+ test_dns_name_suffix_one("foo.bar", 3, NULL, -EINVAL);
+ test_dns_name_suffix_one("foo.bar", 4, NULL, -EINVAL);
+
+ test_dns_name_suffix_one("bar", 1, "bar", 0);
+ test_dns_name_suffix_one("bar", 0, "", 1);
+ test_dns_name_suffix_one("bar", 2, NULL, -EINVAL);
+ test_dns_name_suffix_one("bar", 3, NULL, -EINVAL);
+
+ test_dns_name_suffix_one("", 0, "", 0);
+ test_dns_name_suffix_one("", 1, NULL, -EINVAL);
+ test_dns_name_suffix_one("", 2, NULL, -EINVAL);
+}
+
+static void test_dns_name_count_labels_one(const char *name, int n) {
+ log_info("%s, →%d", name, n);
+
+ assert_se(dns_name_count_labels(name) == n);
+}
+
+static void test_dns_name_count_labels(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_name_count_labels_one("foo.bar.quux.", 3);
+ test_dns_name_count_labels_one("foo.bar.quux", 3);
+ test_dns_name_count_labels_one("foo.bar.", 2);
+ test_dns_name_count_labels_one("foo.bar", 2);
+ test_dns_name_count_labels_one("foo.", 1);
+ test_dns_name_count_labels_one("foo", 1);
+ test_dns_name_count_labels_one("", 0);
+ test_dns_name_count_labels_one(".", 0);
+ test_dns_name_count_labels_one("..", -EINVAL);
+}
+
+static void test_dns_name_equal_skip_one(const char *a, unsigned n_labels, const char *b, int ret) {
+ log_info("%s, %u, %s, →%d", a, n_labels, b, ret);
+
+ assert_se(dns_name_equal_skip(a, n_labels, b) == ret);
+}
+
+static void test_dns_name_equal_skip(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_name_equal_skip_one("foo", 0, "bar", 0);
+ test_dns_name_equal_skip_one("foo", 0, "foo", 1);
+ test_dns_name_equal_skip_one("foo", 1, "foo", 0);
+ test_dns_name_equal_skip_one("foo", 2, "foo", 0);
+
+ test_dns_name_equal_skip_one("foo.bar", 0, "foo.bar", 1);
+ test_dns_name_equal_skip_one("foo.bar", 1, "foo.bar", 0);
+ test_dns_name_equal_skip_one("foo.bar", 2, "foo.bar", 0);
+ test_dns_name_equal_skip_one("foo.bar", 3, "foo.bar", 0);
+
+ test_dns_name_equal_skip_one("foo.bar", 0, "bar", 0);
+ test_dns_name_equal_skip_one("foo.bar", 1, "bar", 1);
+ test_dns_name_equal_skip_one("foo.bar", 2, "bar", 0);
+ test_dns_name_equal_skip_one("foo.bar", 3, "bar", 0);
+
+ test_dns_name_equal_skip_one("foo.bar", 0, "", 0);
+ test_dns_name_equal_skip_one("foo.bar", 1, "", 0);
+ test_dns_name_equal_skip_one("foo.bar", 2, "", 1);
+ test_dns_name_equal_skip_one("foo.bar", 3, "", 0);
+
+ test_dns_name_equal_skip_one("", 0, "", 1);
+ test_dns_name_equal_skip_one("", 1, "", 0);
+ test_dns_name_equal_skip_one("", 1, "foo", 0);
+ test_dns_name_equal_skip_one("", 2, "foo", 0);
+}
+
+static void test_dns_name_compare_func(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(dns_name_compare_func("", "") == 0);
+ assert_se(dns_name_compare_func("", ".") == 0);
+ assert_se(dns_name_compare_func(".", "") == 0);
+ assert_se(dns_name_compare_func("foo", "foo.") == 0);
+ assert_se(dns_name_compare_func("foo.", "foo") == 0);
+ assert_se(dns_name_compare_func("foo", "foo") == 0);
+ assert_se(dns_name_compare_func("foo.", "foo.") == 0);
+ assert_se(dns_name_compare_func("heise.de", "HEISE.DE.") == 0);
+
+ assert_se(dns_name_compare_func("de.", "heise.de") != 0);
+}
+
+static void test_dns_name_common_suffix_one(const char *a, const char *b, const char *result) {
+ const char *c;
+
+ log_info("%s, %s, →%s", a, b, result);
+
+ assert_se(dns_name_common_suffix(a, b, &c) >= 0);
+ assert_se(streq(c, result));
+}
+
+static void test_dns_name_common_suffix(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_name_common_suffix_one("", "", "");
+ test_dns_name_common_suffix_one("foo", "", "");
+ test_dns_name_common_suffix_one("", "foo", "");
+ test_dns_name_common_suffix_one("foo", "bar", "");
+ test_dns_name_common_suffix_one("bar", "foo", "");
+ test_dns_name_common_suffix_one("foo", "foo", "foo");
+ test_dns_name_common_suffix_one("quux.foo", "foo", "foo");
+ test_dns_name_common_suffix_one("foo", "quux.foo", "foo");
+ test_dns_name_common_suffix_one("this.is.a.short.sentence", "this.is.another.short.sentence", "short.sentence");
+ test_dns_name_common_suffix_one("FOO.BAR", "tEST.bAR", "BAR");
+}
+
+static void test_dns_name_apply_idna_one(const char *s, int expected, const char *result) {
+ _cleanup_free_ char *buf = NULL;
+ int r;
+
+ r = dns_name_apply_idna(s, &buf);
+ log_debug("dns_name_apply_idna: \"%s\" → %d/\"%s\" (expected %d/\"%s\")",
+ s, r, strnull(buf), expected, strnull(result));
+
+ /* Different libidn2 versions are more and less accepting
+ * of underscore-prefixed names. So let's list the lowest
+ * expected return value. */
+ assert_se(r >= expected);
+ if (expected == 1)
+ assert_se(dns_name_equal(buf, result) == 1);
+}
+
+static void test_dns_name_apply_idna(void) {
+#if HAVE_LIBIDN2 || HAVE_LIBIDN
+ const int ret = 1;
+#else
+ const int ret = 0;
+#endif
+ log_info("/* %s */", __func__);
+
+ /* IDNA2008 forbids names with hyphens in third and fourth positions
+ * (https://tools.ietf.org/html/rfc5891#section-4.2.3.1).
+ * IDNA2003 does not have this restriction
+ * (https://tools.ietf.org/html/rfc3490#section-5).
+ * This means that when using libidn we will transform and test more
+ * labels. If registrars follow IDNA2008 we'll just be performing a
+ * useless lookup.
+ */
+#if HAVE_LIBIDN
+ const int ret2 = 1;
+#else
+ const int ret2 = 0;
+#endif
+
+ test_dns_name_apply_idna_one("", ret, "");
+ test_dns_name_apply_idna_one("foo", ret, "foo");
+ test_dns_name_apply_idna_one("foo.", ret, "foo");
+ test_dns_name_apply_idna_one("foo.bar", ret, "foo.bar");
+ test_dns_name_apply_idna_one("foo.bar.", ret, "foo.bar");
+ test_dns_name_apply_idna_one("föö", ret, "xn--f-1gaa");
+ test_dns_name_apply_idna_one("föö.", ret, "xn--f-1gaa");
+ test_dns_name_apply_idna_one("föö.bär", ret, "xn--f-1gaa.xn--br-via");
+ test_dns_name_apply_idna_one("föö.bär.", ret, "xn--f-1gaa.xn--br-via");
+ test_dns_name_apply_idna_one("xn--f-1gaa.xn--br-via", ret, "xn--f-1gaa.xn--br-via");
+
+ test_dns_name_apply_idna_one("_443._tcp.fedoraproject.org", ret2,
+ "_443._tcp.fedoraproject.org");
+ test_dns_name_apply_idna_one("_443", ret2, "_443");
+ test_dns_name_apply_idna_one("gateway", ret, "gateway");
+ test_dns_name_apply_idna_one("_gateway", ret2, "_gateway");
+
+ test_dns_name_apply_idna_one("r3---sn-ab5l6ne7.googlevideo.com", ret2,
+ ret2 ? "r3---sn-ab5l6ne7.googlevideo.com" : "");
+}
+
+static void test_dns_name_is_valid_or_address(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(dns_name_is_valid_or_address(NULL) == 0);
+ assert_se(dns_name_is_valid_or_address("") == 0);
+ assert_se(dns_name_is_valid_or_address("foobar") > 0);
+ assert_se(dns_name_is_valid_or_address("foobar.com") > 0);
+ assert_se(dns_name_is_valid_or_address("foobar..com") == 0);
+ assert_se(dns_name_is_valid_or_address("foobar.com.") > 0);
+ assert_se(dns_name_is_valid_or_address("127.0.0.1") > 0);
+ assert_se(dns_name_is_valid_or_address("::") > 0);
+ assert_se(dns_name_is_valid_or_address("::1") > 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_dns_label_unescape();
+ test_dns_label_unescape_suffix();
+ test_dns_label_escape();
+ test_dns_name_normalize();
+ test_dns_name_equal();
+ test_dns_name_endswith();
+ test_dns_name_startswith();
+ test_dns_name_between();
+ test_dns_name_is_root();
+ test_dns_name_is_single_label();
+ test_dns_name_reverse();
+ test_dns_name_concat();
+ test_dns_name_is_valid();
+ test_dns_name_to_wire_format();
+ test_dns_service_name_is_valid();
+ test_dns_srv_type_is_valid();
+ test_dnssd_srv_type_is_valid();
+ test_dns_service_join();
+ test_dns_service_split();
+ test_dns_name_change_suffix();
+ test_dns_name_suffix();
+ test_dns_name_count_labels();
+ test_dns_name_equal_skip();
+ test_dns_name_compare_func();
+ test_dns_name_common_suffix();
+ test_dns_name_apply_idna();
+ test_dns_name_is_valid_or_address();
+
+ return 0;
+}
diff --git a/src/test/test-ellipsize.c b/src/test/test-ellipsize.c
new file mode 100644
index 0000000..92692bd
--- /dev/null
+++ b/src/test/test-ellipsize.c
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+#include "utf8.h"
+
+static void test_ellipsize_mem_one(const char *s, size_t old_length, size_t new_length) {
+ _cleanup_free_ char *n = NULL;
+ _cleanup_free_ char *t1 = NULL, *t2 = NULL, *t3 = NULL;
+ char buf[LINE_MAX];
+ bool has_wide_chars;
+ size_t max_width;
+
+ n = memdup_suffix0(s, old_length);
+
+ if (!utf8_is_valid(n))
+ /* We don't support invalid sequences… */
+ return;
+
+ /* Report out inputs. We duplicate the data so that cellescape
+ * can properly report truncated multibyte sequences. */
+ log_info("%s \"%s\" old_length=%zu/%zu new_length=%zu", __func__,
+ cellescape(buf, sizeof buf, n),
+ old_length, utf8_console_width(n),
+ new_length);
+
+ /* To keep this test simple, any case with wide chars starts with this glyph */
+ has_wide_chars = startswith(s, "你");
+ max_width = MIN(utf8_console_width(n), new_length);
+
+ t1 = ellipsize_mem(n, old_length, new_length, 30);
+ log_info("30%% → %s utf8_console_width=%zu", t1, utf8_console_width(t1));
+ if (!has_wide_chars)
+ assert_se(utf8_console_width(t1) == max_width);
+ else
+ assert_se(utf8_console_width(t1) <= max_width);
+
+ t2 = ellipsize_mem(n, old_length, new_length, 90);
+ log_info("90%% → %s utf8_console_width=%zu", t2, utf8_console_width(t2));
+ if (!has_wide_chars)
+ assert_se(utf8_console_width(t2) == max_width);
+ else
+ assert_se(utf8_console_width(t2) <= max_width);
+
+ t3 = ellipsize_mem(n, old_length, new_length, 100);
+ log_info("100%% → %s utf8_console_width=%zu", t3, utf8_console_width(t3));
+ if (!has_wide_chars)
+ assert_se(utf8_console_width(t3) == max_width);
+ else
+ assert_se(utf8_console_width(t3) <= max_width);
+
+ if (new_length >= old_length) {
+ assert_se(streq(t1, n));
+ assert_se(streq(t2, n));
+ assert_se(streq(t3, n));
+ }
+}
+
+static void test_ellipsize_mem(void) {
+ const char *s;
+ ssize_t l, k;
+
+ FOREACH_STRING(s,
+ "_XXXXXXXXXXX_", /* ASCII */
+ "_aąęółśćńżźć_", /* two-byte utf-8 */
+ "გამარჯობა", /* multi-byte utf-8 */
+ "你好世界", /* wide characters */
+ "你გą世óoó界") /* a mix */
+
+ for (l = strlen(s); l >= 0; l--)
+ for (k = strlen(s) + 1; k >= 0; k--)
+ test_ellipsize_mem_one(s, l, k);
+}
+
+static void test_ellipsize_one(const char *p) {
+ _cleanup_free_ char *t;
+ t = ellipsize(p, columns(), 70);
+ puts(t);
+ free(t);
+ t = ellipsize(p, columns(), 0);
+ puts(t);
+ free(t);
+ t = ellipsize(p, columns(), 100);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 0, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 1, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 2, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 3, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 4, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 5, 50);
+ puts(t);
+}
+
+static void test_ellipsize(void) {
+ test_ellipsize_one(DIGITS LETTERS DIGITS LETTERS);
+ test_ellipsize_one("한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어");
+ test_ellipsize_one("-日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国");
+ test_ellipsize_one("中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国-中国中国中国中国中国中国中国中国中国中国中国中国中国");
+ test_ellipsize_one("sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd");
+ test_ellipsize_one("🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮");
+ test_ellipsize_one("Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.");
+ test_ellipsize_one("shórt");
+}
+
+int main(int argc, char *argv[]) {
+ test_ellipsize_mem();
+ test_ellipsize();
+
+ return 0;
+}
diff --git a/src/test/test-emergency-action.c b/src/test/test-emergency-action.c
new file mode 100644
index 0000000..8ce28ed
--- /dev/null
+++ b/src/test/test-emergency-action.c
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "emergency-action.h"
+#include "tests.h"
+
+static void test_parse_emergency_action(void) {
+ EmergencyAction x;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_emergency_action("none", false, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_NONE);
+ assert_se(parse_emergency_action("reboot", false, &x) == -EOPNOTSUPP);
+ assert_se(parse_emergency_action("reboot-force", false, &x) == -EOPNOTSUPP);
+ assert_se(parse_emergency_action("reboot-immediate", false, &x) == -EOPNOTSUPP);
+ assert_se(parse_emergency_action("poweroff", false, &x) == -EOPNOTSUPP);
+ assert_se(parse_emergency_action("poweroff-force", false, &x) == -EOPNOTSUPP);
+ assert_se(parse_emergency_action("poweroff-immediate", false, &x) == -EOPNOTSUPP);
+ assert_se(x == EMERGENCY_ACTION_NONE);
+ assert_se(parse_emergency_action("exit", false, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_EXIT);
+ assert_se(parse_emergency_action("exit-force", false, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_EXIT_FORCE);
+ assert_se(parse_emergency_action("exit-forcee", false, &x) == -EINVAL);
+
+ assert_se(parse_emergency_action("none", true, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_NONE);
+ assert_se(parse_emergency_action("reboot", true, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_REBOOT);
+ assert_se(parse_emergency_action("reboot-force", true, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_REBOOT_FORCE);
+ assert_se(parse_emergency_action("reboot-immediate", true, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_REBOOT_IMMEDIATE);
+ assert_se(parse_emergency_action("poweroff", true, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_POWEROFF);
+ assert_se(parse_emergency_action("poweroff-force", true, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_POWEROFF_FORCE);
+ assert_se(parse_emergency_action("poweroff-immediate", true, &x) == 0);
+ assert_se(parse_emergency_action("exit", true, &x) == 0);
+ assert_se(parse_emergency_action("exit-force", true, &x) == 0);
+ assert_se(parse_emergency_action("exit-forcee", true, &x) == -EINVAL);
+ assert_se(x == EMERGENCY_ACTION_EXIT_FORCE);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_INFO);
+
+ test_parse_emergency_action();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-engine.c b/src/test/test-engine.c
new file mode 100644
index 0000000..0673d36
--- /dev/null
+++ b/src/test/test-engine.c
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "bus-util.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "test-helper.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error err = SD_BUS_ERROR_NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *g = NULL, *h = NULL, *unit_with_multiple_dashes = NULL;
+ Job *j;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ r = enter_cgroup_subroot();
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ /* prepare the test */
+ assert_se(set_unit_path(get_testdata_dir()) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m);
+ if (MANAGER_SKIP_TEST(r))
+ return log_tests_skipped_errno(r, "manager_new");
+ assert_se(r >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ printf("Load1:\n");
+ assert_se(manager_load_startable_unit_or_warn(m, "a.service", NULL, &a) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "b.service", NULL, &b) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "c.service", NULL, &c) >= 0);
+ manager_dump_units(m, stdout, "\t");
+
+ printf("Test1: (Trivial)\n");
+ r = manager_add_job(m, JOB_START, c, JOB_REPLACE, &err, &j);
+ if (sd_bus_error_is_set(&err))
+ log_error("error: %s: %s", err.name, err.message);
+ assert_se(r == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Load2:\n");
+ manager_clear_jobs(m);
+ assert_se(manager_load_startable_unit_or_warn(m, "d.service", NULL, &d) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "e.service", NULL, &e) >= 0);
+ manager_dump_units(m, stdout, "\t");
+
+ printf("Test2: (Cyclic Order, Unfixable)\n");
+ assert_se(manager_add_job(m, JOB_START, d, JOB_REPLACE, NULL, &j) == -EDEADLK);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Test3: (Cyclic Order, Fixable, Garbage Collector)\n");
+ assert_se(manager_add_job(m, JOB_START, e, JOB_REPLACE, NULL, &j) == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Test4: (Identical transaction)\n");
+ assert_se(manager_add_job(m, JOB_START, e, JOB_FAIL, NULL, &j) == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Load3:\n");
+ assert_se(manager_load_startable_unit_or_warn(m, "g.service", NULL, &g) >= 0);
+ manager_dump_units(m, stdout, "\t");
+
+ printf("Test5: (Colliding transaction, fail)\n");
+ assert_se(manager_add_job(m, JOB_START, g, JOB_FAIL, NULL, &j) == -EDEADLK);
+
+ printf("Test6: (Colliding transaction, replace)\n");
+ assert_se(manager_add_job(m, JOB_START, g, JOB_REPLACE, NULL, &j) == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Test7: (Unmergeable job type, fail)\n");
+ assert_se(manager_add_job(m, JOB_STOP, g, JOB_FAIL, NULL, &j) == -EDEADLK);
+
+ printf("Test8: (Mergeable job type, fail)\n");
+ assert_se(manager_add_job(m, JOB_RESTART, g, JOB_FAIL, NULL, &j) == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Test9: (Unmergeable job type, replace)\n");
+ assert_se(manager_add_job(m, JOB_STOP, g, JOB_REPLACE, NULL, &j) == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Load4:\n");
+ assert_se(manager_load_startable_unit_or_warn(m, "h.service", NULL, &h) >= 0);
+ manager_dump_units(m, stdout, "\t");
+
+ printf("Test10: (Unmergeable job type of auxiliary job, fail)\n");
+ assert_se(manager_add_job(m, JOB_START, h, JOB_FAIL, NULL, &j) == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], b));
+ assert_se(!hashmap_get(b->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+ assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], c));
+ assert_se(!hashmap_get(c->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+
+ assert_se(unit_add_dependency(a, UNIT_PROPAGATES_RELOAD_TO, b, true, UNIT_DEPENDENCY_UDEV) == 0);
+ assert_se(unit_add_dependency(a, UNIT_PROPAGATES_RELOAD_TO, c, true, UNIT_DEPENDENCY_PROC_SWAP) == 0);
+
+ assert_se(hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], b));
+ assert_se(hashmap_get(b->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+ assert_se(hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], c));
+ assert_se(hashmap_get(c->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+
+ unit_remove_dependencies(a, UNIT_DEPENDENCY_UDEV);
+
+ assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], b));
+ assert_se(!hashmap_get(b->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+ assert_se(hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], c));
+ assert_se(hashmap_get(c->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+
+ unit_remove_dependencies(a, UNIT_DEPENDENCY_PROC_SWAP);
+
+ assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], b));
+ assert_se(!hashmap_get(b->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+ assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], c));
+ assert_se(!hashmap_get(c->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+
+ assert_se(manager_load_unit(m, "unit-with-multiple-dashes.service", NULL, NULL, &unit_with_multiple_dashes) >= 0);
+
+ assert_se(strv_equal(unit_with_multiple_dashes->documentation, STRV_MAKE("man:test", "man:override2", "man:override3")));
+ assert_se(streq_ptr(unit_with_multiple_dashes->description, "override4"));
+
+ return 0;
+}
diff --git a/src/test/test-env-util.c b/src/test/test-env-util.c
new file mode 100644
index 0000000..f57a260
--- /dev/null
+++ b/src/test/test-env-util.c
@@ -0,0 +1,325 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "serialize.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static void test_strv_env_delete(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL, **c = NULL, **d = NULL;
+
+ a = strv_new("FOO=BAR", "WALDO=WALDO", "WALDO=", "PIEP", "SCHLUMPF=SMURF");
+ assert_se(a);
+
+ b = strv_new("PIEP", "FOO");
+ assert_se(b);
+
+ c = strv_new("SCHLUMPF");
+ assert_se(c);
+
+ d = strv_env_delete(a, 2, b, c);
+ assert_se(d);
+
+ assert_se(streq(d[0], "WALDO=WALDO"));
+ assert_se(streq(d[1], "WALDO="));
+ assert_se(strv_length(d) == 2);
+}
+
+static void test_strv_env_get(void) {
+ char **l;
+
+ l = STRV_MAKE("ONE_OR_TWO=1", "THREE=3", "ONE_OR_TWO=2", "FOUR=4");
+
+ assert_se(streq(strv_env_get(l, "ONE_OR_TWO"), "2"));
+ assert_se(streq(strv_env_get(l, "THREE"), "3"));
+ assert_se(streq(strv_env_get(l, "FOUR"), "4"));
+}
+
+static void test_strv_env_unset(void) {
+ _cleanup_strv_free_ char **l = NULL;
+
+ l = strv_new("PIEP", "SCHLUMPF=SMURFF", "NANANANA=YES");
+ assert_se(l);
+
+ assert_se(strv_env_unset(l, "SCHLUMPF") == l);
+
+ assert_se(streq(l[0], "PIEP"));
+ assert_se(streq(l[1], "NANANANA=YES"));
+ assert_se(strv_length(l) == 2);
+}
+
+static void test_strv_env_set(void) {
+ _cleanup_strv_free_ char **l = NULL, **r = NULL;
+
+ l = strv_new("PIEP", "SCHLUMPF=SMURFF", "NANANANA=YES");
+ assert_se(l);
+
+ r = strv_env_set(l, "WALDO=WALDO");
+ assert_se(r);
+
+ assert_se(streq(r[0], "PIEP"));
+ assert_se(streq(r[1], "SCHLUMPF=SMURFF"));
+ assert_se(streq(r[2], "NANANANA=YES"));
+ assert_se(streq(r[3], "WALDO=WALDO"));
+ assert_se(strv_length(r) == 4);
+}
+
+static void test_strv_env_merge(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL, **r = NULL;
+
+ a = strv_new("FOO=BAR", "WALDO=WALDO", "WALDO=", "PIEP", "SCHLUMPF=SMURF");
+ assert_se(a);
+
+ b = strv_new("FOO=KKK", "FOO=", "PIEP=", "SCHLUMPF=SMURFF", "NANANANA=YES");
+ assert_se(b);
+
+ r = strv_env_merge(2, a, b);
+ assert_se(r);
+ assert_se(streq(r[0], "FOO="));
+ assert_se(streq(r[1], "WALDO="));
+ assert_se(streq(r[2], "PIEP"));
+ assert_se(streq(r[3], "SCHLUMPF=SMURFF"));
+ assert_se(streq(r[4], "PIEP="));
+ assert_se(streq(r[5], "NANANANA=YES"));
+ assert_se(strv_length(r) == 6);
+
+ assert_se(strv_env_clean(r) == r);
+ assert_se(streq(r[0], "FOO="));
+ assert_se(streq(r[1], "WALDO="));
+ assert_se(streq(r[2], "SCHLUMPF=SMURFF"));
+ assert_se(streq(r[3], "PIEP="));
+ assert_se(streq(r[4], "NANANANA=YES"));
+ assert_se(strv_length(r) == 5);
+}
+
+static void test_env_strv_get_n(void) {
+ const char *_env[] = {
+ "FOO=NO NO NO",
+ "FOO=BAR BAR",
+ "BAR=waldo",
+ "PATH=unset",
+ NULL
+ };
+ char **env = (char**) _env;
+
+ assert_se(streq(strv_env_get_n(env, "FOO__", 3, 0), "BAR BAR"));
+ assert_se(streq(strv_env_get_n(env, "FOO__", 3, REPLACE_ENV_USE_ENVIRONMENT), "BAR BAR"));
+ assert_se(streq(strv_env_get_n(env, "FOO", 3, 0), "BAR BAR"));
+ assert_se(streq(strv_env_get_n(env, "FOO", 3, REPLACE_ENV_USE_ENVIRONMENT), "BAR BAR"));
+
+ assert_se(streq(strv_env_get_n(env, "PATH__", 4, 0), "unset"));
+ assert_se(streq(strv_env_get_n(env, "PATH", 4, 0), "unset"));
+ assert_se(streq(strv_env_get_n(env, "PATH__", 4, REPLACE_ENV_USE_ENVIRONMENT), "unset"));
+ assert_se(streq(strv_env_get_n(env, "PATH", 4, REPLACE_ENV_USE_ENVIRONMENT), "unset"));
+
+ env[3] = NULL; /* kill our $PATH */
+
+ assert_se(!strv_env_get_n(env, "PATH__", 4, 0));
+ assert_se(!strv_env_get_n(env, "PATH", 4, 0));
+ assert_se(streq(strv_env_get_n(env, "PATH__", 4, REPLACE_ENV_USE_ENVIRONMENT),
+ getenv("PATH")));
+ assert_se(streq(strv_env_get_n(env, "PATH", 4, REPLACE_ENV_USE_ENVIRONMENT),
+ getenv("PATH")));
+}
+
+static void test_replace_env(bool braceless) {
+ const char *env[] = {
+ "FOO=BAR BAR",
+ "BAR=waldo",
+ NULL
+ };
+ _cleanup_free_ char *t = NULL, *s = NULL, *q = NULL, *r = NULL, *p = NULL;
+ unsigned flags = REPLACE_ENV_ALLOW_BRACELESS*braceless;
+
+ t = replace_env("FOO=$FOO=${FOO}", (char**) env, flags);
+ assert_se(streq(t, braceless ? "FOO=BAR BAR=BAR BAR" : "FOO=$FOO=BAR BAR"));
+
+ s = replace_env("BAR=$BAR=${BAR}", (char**) env, flags);
+ assert_se(streq(s, braceless ? "BAR=waldo=waldo" : "BAR=$BAR=waldo"));
+
+ q = replace_env("BARBAR=$BARBAR=${BARBAR}", (char**) env, flags);
+ assert_se(streq(q, braceless ? "BARBAR==" : "BARBAR=$BARBAR="));
+
+ r = replace_env("BAR=$BAR$BAR${BAR}${BAR}", (char**) env, flags);
+ assert_se(streq(r, braceless ? "BAR=waldowaldowaldowaldo" : "BAR=$BAR$BARwaldowaldo"));
+
+ p = replace_env("${BAR}$BAR$BAR", (char**) env, flags);
+ assert_se(streq(p, braceless ? "waldowaldowaldo" : "waldo$BAR$BAR"));
+}
+
+static void test_replace_env2(bool extended) {
+ const char *env[] = {
+ "FOO=foo",
+ "BAR=bar",
+ NULL
+ };
+ _cleanup_free_ char *t = NULL, *s = NULL, *q = NULL, *r = NULL, *p = NULL, *x = NULL;
+ unsigned flags = REPLACE_ENV_ALLOW_EXTENDED*extended;
+
+ t = replace_env("FOO=${FOO:-${BAR}}", (char**) env, flags);
+ assert_se(streq(t, extended ? "FOO=foo" : "FOO=${FOO:-bar}"));
+
+ s = replace_env("BAR=${XXX:-${BAR}}", (char**) env, flags);
+ assert_se(streq(s, extended ? "BAR=bar" : "BAR=${XXX:-bar}"));
+
+ q = replace_env("XXX=${XXX:+${BAR}}", (char**) env, flags);
+ assert_se(streq(q, extended ? "XXX=" : "XXX=${XXX:+bar}"));
+
+ r = replace_env("FOO=${FOO:+${BAR}}", (char**) env, flags);
+ assert_se(streq(r, extended ? "FOO=bar" : "FOO=${FOO:+bar}"));
+
+ p = replace_env("FOO=${FOO:-${BAR}post}", (char**) env, flags);
+ assert_se(streq(p, extended ? "FOO=foo" : "FOO=${FOO:-barpost}"));
+
+ x = replace_env("XXX=${XXX:+${BAR}post}", (char**) env, flags);
+ assert_se(streq(x, extended ? "XXX=" : "XXX=${XXX:+barpost}"));
+}
+
+static void test_replace_env_argv(void) {
+ const char *env[] = {
+ "FOO=BAR BAR",
+ "BAR=waldo",
+ NULL
+ };
+ const char *line[] = {
+ "FOO$FOO",
+ "FOO$FOOFOO",
+ "FOO${FOO}$FOO",
+ "FOO${FOO}",
+ "${FOO}",
+ "$FOO",
+ "$FOO$FOO",
+ "${FOO}${BAR}",
+ "${FOO",
+ "FOO$$${FOO}",
+ "$$FOO${FOO}",
+ "${FOO:-${BAR}}",
+ "${QUUX:-${FOO}}",
+ "${FOO:+${BAR}}",
+ "${QUUX:+${BAR}}",
+ "${FOO:+|${BAR}|}}",
+ "${FOO:+|${BAR}{|}",
+ NULL
+ };
+ _cleanup_strv_free_ char **r = NULL;
+
+ r = replace_env_argv((char**) line, (char**) env);
+ assert_se(r);
+ assert_se(streq(r[0], "FOO$FOO"));
+ assert_se(streq(r[1], "FOO$FOOFOO"));
+ assert_se(streq(r[2], "FOOBAR BAR$FOO"));
+ assert_se(streq(r[3], "FOOBAR BAR"));
+ assert_se(streq(r[4], "BAR BAR"));
+ assert_se(streq(r[5], "BAR"));
+ assert_se(streq(r[6], "BAR"));
+ assert_se(streq(r[7], "BAR BARwaldo"));
+ assert_se(streq(r[8], "${FOO"));
+ assert_se(streq(r[9], "FOO$BAR BAR"));
+ assert_se(streq(r[10], "$FOOBAR BAR"));
+ assert_se(streq(r[11], "${FOO:-waldo}"));
+ assert_se(streq(r[12], "${QUUX:-BAR BAR}"));
+ assert_se(streq(r[13], "${FOO:+waldo}"));
+ assert_se(streq(r[14], "${QUUX:+waldo}"));
+ assert_se(streq(r[15], "${FOO:+|waldo|}}"));
+ assert_se(streq(r[16], "${FOO:+|waldo{|}"));
+ assert_se(strv_length(r) == 17);
+}
+
+static void test_env_clean(void) {
+ _cleanup_strv_free_ char **e;
+
+ e = strv_new("FOOBAR=WALDO",
+ "FOOBAR=WALDO",
+ "FOOBAR",
+ "F",
+ "X=",
+ "F=F",
+ "=",
+ "=F",
+ "",
+ "0000=000",
+ "äöüß=abcd",
+ "abcd=äöüß",
+ "xyz\n=xyz",
+ "xyz=xyz\n",
+ "another=one",
+ "another=final one");
+ assert_se(e);
+ assert_se(!strv_env_is_valid(e));
+ assert_se(strv_env_clean(e) == e);
+ assert_se(strv_env_is_valid(e));
+
+ assert_se(streq(e[0], "FOOBAR=WALDO"));
+ assert_se(streq(e[1], "X="));
+ assert_se(streq(e[2], "F=F"));
+ assert_se(streq(e[3], "abcd=äöüß"));
+ assert_se(streq(e[4], "xyz=xyz\n"));
+ assert_se(streq(e[5], "another=final one"));
+ assert_se(e[6] == NULL);
+}
+
+static void test_env_name_is_valid(void) {
+ assert_se(env_name_is_valid("test"));
+
+ assert_se(!env_name_is_valid(NULL));
+ assert_se(!env_name_is_valid(""));
+ assert_se(!env_name_is_valid("xxx\a"));
+ assert_se(!env_name_is_valid("xxx\007b"));
+ assert_se(!env_name_is_valid("\007\009"));
+ assert_se(!env_name_is_valid("5_starting_with_a_number_is_wrong"));
+ assert_se(!env_name_is_valid("#¤%&?_only_numbers_letters_and_underscore_allowed"));
+}
+
+static void test_env_value_is_valid(void) {
+ assert_se(env_value_is_valid(""));
+ assert_se(env_value_is_valid("głąb kapuściany"));
+ assert_se(env_value_is_valid("printf \"\\x1b]0;<mock-chroot>\\x07<mock-chroot>\""));
+ assert_se(env_value_is_valid("tab\tcharacter"));
+ assert_se(env_value_is_valid("new\nline"));
+}
+
+static void test_env_assignment_is_valid(void) {
+ assert_se(env_assignment_is_valid("a="));
+ assert_se(env_assignment_is_valid("b=głąb kapuściany"));
+ assert_se(env_assignment_is_valid("c=\\007\\009\\011"));
+ assert_se(env_assignment_is_valid("e=printf \"\\x1b]0;<mock-chroot>\\x07<mock-chroot>\""));
+ assert_se(env_assignment_is_valid("f=tab\tcharacter"));
+ assert_se(env_assignment_is_valid("g=new\nline"));
+
+ assert_se(!env_assignment_is_valid("="));
+ assert_se(!env_assignment_is_valid("a b="));
+ assert_se(!env_assignment_is_valid("a ="));
+ assert_se(!env_assignment_is_valid(" b="));
+ /* no dots or dashes: http://tldp.org/LDP/abs/html/gotchas.html */
+ assert_se(!env_assignment_is_valid("a.b="));
+ assert_se(!env_assignment_is_valid("a-b="));
+ assert_se(!env_assignment_is_valid("\007=głąb kapuściany"));
+ assert_se(!env_assignment_is_valid("c\009=\007\009\011"));
+ assert_se(!env_assignment_is_valid("głąb=printf \"\x1b]0;<mock-chroot>\x07<mock-chroot>\""));
+}
+
+int main(int argc, char *argv[]) {
+ test_strv_env_delete();
+ test_strv_env_get();
+ test_strv_env_unset();
+ test_strv_env_set();
+ test_strv_env_merge();
+ test_env_strv_get_n();
+ test_replace_env(false);
+ test_replace_env(true);
+ test_replace_env2(false);
+ test_replace_env2(true);
+ test_replace_env_argv();
+ test_env_clean();
+ test_env_name_is_valid();
+ test_env_value_is_valid();
+ test_env_assignment_is_valid();
+
+ return 0;
+}
diff --git a/src/test/test-escape.c b/src/test/test-escape.c
new file mode 100644
index 0000000..4ee4aa9
--- /dev/null
+++ b/src/test/test-escape.c
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "macro.h"
+#include "tests.h"
+
+static void test_cescape(void) {
+ _cleanup_free_ char *escaped;
+
+ assert_se(escaped = cescape("abc\\\"\b\f\n\r\t\v\a\003\177\234\313"));
+ assert_se(streq(escaped, "abc\\\\\\\"\\b\\f\\n\\r\\t\\v\\a\\003\\177\\234\\313"));
+}
+
+static void test_cunescape(void) {
+ _cleanup_free_ char *unescaped;
+
+ assert_se(cunescape("abc\\\\\\\"\\b\\f\\a\\n\\r\\t\\v\\003\\177\\234\\313\\000\\x00", 0, &unescaped) < 0);
+ assert_se(cunescape("abc\\\\\\\"\\b\\f\\a\\n\\r\\t\\v\\003\\177\\234\\313\\000\\x00", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "abc\\\"\b\f\a\n\r\t\v\003\177\234\313\\000\\x00"));
+ unescaped = mfree(unescaped);
+
+ /* incomplete sequences */
+ assert_se(cunescape("\\x0", 0, &unescaped) < 0);
+ assert_se(cunescape("\\x0", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "\\x0"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\x", 0, &unescaped) < 0);
+ assert_se(cunescape("\\x", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "\\x"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\", 0, &unescaped) < 0);
+ assert_se(cunescape("\\", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "\\"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\11", 0, &unescaped) < 0);
+ assert_se(cunescape("\\11", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "\\11"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\1", 0, &unescaped) < 0);
+ assert_se(cunescape("\\1", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "\\1"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\u0000", 0, &unescaped) < 0);
+ assert_se(cunescape("\\u00DF\\U000000df\\u03a0\\U00000041", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "ßßΠA"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\073", 0, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, ";"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("A=A\\\\x0aB", 0, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "A=A\\x0aB"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("A=A\\\\x0aB", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "A=A\\x0aB"));
+}
+
+static void test_shell_escape_one(const char *s, const char *bad, const char *expected) {
+ _cleanup_free_ char *r;
+
+ assert_se(r = shell_escape(s, bad));
+ assert_se(streq_ptr(r, expected));
+}
+
+static void test_shell_escape(void) {
+ test_shell_escape_one("", "", "");
+ test_shell_escape_one("\\", "", "\\\\");
+ test_shell_escape_one("foobar", "", "foobar");
+ test_shell_escape_one("foobar", "o", "f\\o\\obar");
+ test_shell_escape_one("foo:bar,baz", ",:", "foo\\:bar\\,baz");
+}
+
+static void test_shell_maybe_quote_one(const char *s,
+ EscapeStyle style,
+ const char *expected) {
+ _cleanup_free_ char *ret = NULL;
+
+ assert_se(ret = shell_maybe_quote(s, style));
+ log_debug("[%s] → [%s] (%s)", s, ret, expected);
+ assert_se(streq(ret, expected));
+}
+
+static void test_shell_maybe_quote(void) {
+
+ test_shell_maybe_quote_one("", ESCAPE_BACKSLASH, "");
+ test_shell_maybe_quote_one("", ESCAPE_POSIX, "");
+ test_shell_maybe_quote_one("\\", ESCAPE_BACKSLASH, "\"\\\\\"");
+ test_shell_maybe_quote_one("\\", ESCAPE_POSIX, "$'\\\\'");
+ test_shell_maybe_quote_one("\"", ESCAPE_BACKSLASH, "\"\\\"\"");
+ test_shell_maybe_quote_one("\"", ESCAPE_POSIX, "$'\"'");
+ test_shell_maybe_quote_one("foobar", ESCAPE_BACKSLASH, "foobar");
+ test_shell_maybe_quote_one("foobar", ESCAPE_POSIX, "foobar");
+ test_shell_maybe_quote_one("foo bar", ESCAPE_BACKSLASH, "\"foo bar\"");
+ test_shell_maybe_quote_one("foo bar", ESCAPE_POSIX, "$'foo bar'");
+ test_shell_maybe_quote_one("foo\tbar", ESCAPE_BACKSLASH, "\"foo\tbar\"");
+ test_shell_maybe_quote_one("foo\tbar", ESCAPE_POSIX, "$'foo\\tbar'");
+ test_shell_maybe_quote_one("foo\nbar", ESCAPE_BACKSLASH, "\"foo\nbar\"");
+ test_shell_maybe_quote_one("foo\nbar", ESCAPE_POSIX, "$'foo\\nbar'");
+ test_shell_maybe_quote_one("foo \"bar\" waldo", ESCAPE_BACKSLASH, "\"foo \\\"bar\\\" waldo\"");
+ test_shell_maybe_quote_one("foo \"bar\" waldo", ESCAPE_POSIX, "$'foo \"bar\" waldo'");
+ test_shell_maybe_quote_one("foo$bar", ESCAPE_BACKSLASH, "\"foo\\$bar\"");
+ test_shell_maybe_quote_one("foo$bar", ESCAPE_POSIX, "$'foo$bar'");
+
+ /* Note that current users disallow control characters, so this "test"
+ * is here merely to establish current behaviour. If control characters
+ * were allowed, they should be quoted, i.e. \001 should become \\001. */
+ test_shell_maybe_quote_one("a\nb\001", ESCAPE_BACKSLASH, "\"a\nb\001\"");
+ test_shell_maybe_quote_one("a\nb\001", ESCAPE_POSIX, "$'a\\nb\001'");
+
+ test_shell_maybe_quote_one("foo!bar", ESCAPE_BACKSLASH, "\"foo!bar\"");
+ test_shell_maybe_quote_one("foo!bar", ESCAPE_POSIX, "$'foo!bar'");
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_cescape();
+ test_cunescape();
+ test_shell_escape();
+ test_shell_maybe_quote();
+
+ return 0;
+}
diff --git a/src/test/test-exec-util.c b/src/test/test-exec-util.c
new file mode 100644
index 0000000..21a4538
--- /dev/null
+++ b/src/test/test-exec-util.c
@@ -0,0 +1,377 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "copy.h"
+#include "def.h"
+#include "env-util.h"
+#include "exec-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static int here = 0, here2 = 0, here3 = 0;
+void *ignore_stdout_args[] = {&here, &here2, &here3};
+
+/* noop handlers, just check that arguments are passed correctly */
+static int ignore_stdout_func(int fd, void *arg) {
+ assert(fd >= 0);
+ assert(arg == &here);
+ safe_close(fd);
+
+ return 0;
+}
+static int ignore_stdout_func2(int fd, void *arg) {
+ assert(fd >= 0);
+ assert(arg == &here2);
+ safe_close(fd);
+
+ return 0;
+}
+static int ignore_stdout_func3(int fd, void *arg) {
+ assert(fd >= 0);
+ assert(arg == &here3);
+ safe_close(fd);
+
+ return 0;
+}
+
+static const gather_stdout_callback_t ignore_stdout[] = {
+ ignore_stdout_func,
+ ignore_stdout_func2,
+ ignore_stdout_func3,
+};
+
+static void test_execute_directory(bool gather_stdout) {
+ char template_lo[] = "/tmp/test-exec-util.lo.XXXXXXX";
+ char template_hi[] = "/tmp/test-exec-util.hi.XXXXXXX";
+ const char * dirs[] = {template_hi, template_lo, NULL};
+ const char *name, *name2, *name3,
+ *overridden, *override,
+ *masked, *mask,
+ *masked2, *mask2, /* the mask is non-executable */
+ *masked2e, *mask2e; /* the mask is executable */
+
+ log_info("/* %s (%s) */", __func__, gather_stdout ? "gathering stdout" : "asynchronous");
+
+ assert_se(mkdtemp(template_lo));
+ assert_se(mkdtemp(template_hi));
+
+ name = strjoina(template_lo, "/script");
+ name2 = strjoina(template_hi, "/script2");
+ name3 = strjoina(template_lo, "/useless");
+ overridden = strjoina(template_lo, "/overridden");
+ override = strjoina(template_hi, "/overridden");
+ masked = strjoina(template_lo, "/masked");
+ mask = strjoina(template_hi, "/masked");
+ masked2 = strjoina(template_lo, "/masked2");
+ mask2 = strjoina(template_hi, "/masked2");
+ masked2e = strjoina(template_lo, "/masked2e");
+ mask2e = strjoina(template_hi, "/masked2e");
+
+ assert_se(write_string_file(name,
+ "#!/bin/sh\necho 'Executing '$0\ntouch $(dirname $0)/it_works",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(name2,
+ "#!/bin/sh\necho 'Executing '$0\ntouch $(dirname $0)/it_works2",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(overridden,
+ "#!/bin/sh\necho 'Executing '$0\ntouch $(dirname $0)/failed",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(override,
+ "#!/bin/sh\necho 'Executing '$0",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(masked,
+ "#!/bin/sh\necho 'Executing '$0\ntouch $(dirname $0)/failed",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(masked2,
+ "#!/bin/sh\necho 'Executing '$0\ntouch $(dirname $0)/failed",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(masked2e,
+ "#!/bin/sh\necho 'Executing '$0\ntouch $(dirname $0)/failed",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(symlink("/dev/null", mask) == 0);
+ assert_se(touch(mask2) == 0);
+ assert_se(touch(mask2e) == 0);
+ assert_se(touch(name3) >= 0);
+
+ assert_se(chmod(name, 0755) == 0);
+ assert_se(chmod(name2, 0755) == 0);
+ assert_se(chmod(overridden, 0755) == 0);
+ assert_se(chmod(override, 0755) == 0);
+ assert_se(chmod(masked, 0755) == 0);
+ assert_se(chmod(masked2, 0755) == 0);
+ assert_se(chmod(masked2e, 0755) == 0);
+ assert_se(chmod(mask2e, 0755) == 0);
+
+ if (gather_stdout)
+ execute_directories(dirs, DEFAULT_TIMEOUT_USEC, ignore_stdout, ignore_stdout_args, NULL, NULL);
+ else
+ execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, NULL, NULL);
+
+ assert_se(chdir(template_lo) == 0);
+ assert_se(access("it_works", F_OK) >= 0);
+ assert_se(access("failed", F_OK) < 0);
+
+ assert_se(chdir(template_hi) == 0);
+ assert_se(access("it_works2", F_OK) >= 0);
+ assert_se(access("failed", F_OK) < 0);
+
+ (void) rm_rf(template_lo, REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf(template_hi, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static void test_execution_order(void) {
+ char template_lo[] = "/tmp/test-exec-util-lo.XXXXXXX";
+ char template_hi[] = "/tmp/test-exec-util-hi.XXXXXXX";
+ const char *dirs[] = {template_hi, template_lo, NULL};
+ const char *name, *name2, *name3, *overridden, *override, *masked, *mask;
+ const char *output, *t;
+ _cleanup_free_ char *contents = NULL;
+
+ assert_se(mkdtemp(template_lo));
+ assert_se(mkdtemp(template_hi));
+
+ output = strjoina(template_hi, "/output");
+
+ log_info("/* %s >>%s */", __func__, output);
+
+ /* write files in "random" order */
+ name2 = strjoina(template_lo, "/90-bar");
+ name = strjoina(template_hi, "/80-foo");
+ name3 = strjoina(template_lo, "/last");
+ overridden = strjoina(template_lo, "/30-override");
+ override = strjoina(template_hi, "/30-override");
+ masked = strjoina(template_lo, "/10-masked");
+ mask = strjoina(template_hi, "/10-masked");
+
+ t = strjoina("#!/bin/sh\necho $(basename $0) >>", output);
+ assert_se(write_string_file(name, t, WRITE_STRING_FILE_CREATE) == 0);
+
+ t = strjoina("#!/bin/sh\necho $(basename $0) >>", output);
+ assert_se(write_string_file(name2, t, WRITE_STRING_FILE_CREATE) == 0);
+
+ t = strjoina("#!/bin/sh\necho $(basename $0) >>", output);
+ assert_se(write_string_file(name3, t, WRITE_STRING_FILE_CREATE) == 0);
+
+ t = strjoina("#!/bin/sh\necho OVERRIDDEN >>", output);
+ assert_se(write_string_file(overridden, t, WRITE_STRING_FILE_CREATE) == 0);
+
+ t = strjoina("#!/bin/sh\necho $(basename $0) >>", output);
+ assert_se(write_string_file(override, t, WRITE_STRING_FILE_CREATE) == 0);
+
+ t = strjoina("#!/bin/sh\necho MASKED >>", output);
+ assert_se(write_string_file(masked, t, WRITE_STRING_FILE_CREATE) == 0);
+
+ assert_se(symlink("/dev/null", mask) == 0);
+
+ assert_se(chmod(name, 0755) == 0);
+ assert_se(chmod(name2, 0755) == 0);
+ assert_se(chmod(name3, 0755) == 0);
+ assert_se(chmod(overridden, 0755) == 0);
+ assert_se(chmod(override, 0755) == 0);
+ assert_se(chmod(masked, 0755) == 0);
+
+ execute_directories(dirs, DEFAULT_TIMEOUT_USEC, ignore_stdout, ignore_stdout_args, NULL, NULL);
+
+ assert_se(read_full_file(output, &contents, NULL) >= 0);
+ assert_se(streq(contents, "30-override\n80-foo\n90-bar\nlast\n"));
+
+ (void) rm_rf(template_lo, REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf(template_hi, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static int gather_stdout_one(int fd, void *arg) {
+ char ***s = arg, *t;
+ char buf[128] = {};
+
+ assert_se(s);
+ assert_se(read(fd, buf, sizeof buf) >= 0);
+ safe_close(fd);
+
+ assert_se(t = strndup(buf, sizeof buf));
+ assert_se(strv_push(s, t) >= 0);
+
+ return 0;
+}
+static int gather_stdout_two(int fd, void *arg) {
+ char ***s = arg, **t;
+
+ STRV_FOREACH(t, *s)
+ assert_se(write(fd, *t, strlen(*t)) == (ssize_t) strlen(*t));
+ safe_close(fd);
+
+ return 0;
+}
+static int gather_stdout_three(int fd, void *arg) {
+ char **s = arg;
+ char buf[128] = {};
+
+ assert_se(read(fd, buf, sizeof buf - 1) > 0);
+ safe_close(fd);
+ assert_se(*s = strndup(buf, sizeof buf));
+
+ return 0;
+}
+
+const gather_stdout_callback_t gather_stdout[] = {
+ gather_stdout_one,
+ gather_stdout_two,
+ gather_stdout_three,
+};
+
+static void test_stdout_gathering(void) {
+ char template[] = "/tmp/test-exec-util.XXXXXXX";
+ const char *dirs[] = {template, NULL};
+ const char *name, *name2, *name3;
+ int r;
+
+ char **tmp = NULL; /* this is only used in the forked process, no cleanup here */
+ _cleanup_free_ char *output = NULL;
+
+ void* args[] = {&tmp, &tmp, &output};
+
+ assert_se(mkdtemp(template));
+
+ log_info("/* %s */", __func__);
+
+ /* write files */
+ name = strjoina(template, "/10-foo");
+ name2 = strjoina(template, "/20-bar");
+ name3 = strjoina(template, "/30-last");
+
+ assert_se(write_string_file(name,
+ "#!/bin/sh\necho a\necho b\necho c\n",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(name2,
+ "#!/bin/sh\necho d\n",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(name3,
+ "#!/bin/sh\nsleep 1",
+ WRITE_STRING_FILE_CREATE) == 0);
+
+ assert_se(chmod(name, 0755) == 0);
+ assert_se(chmod(name2, 0755) == 0);
+ assert_se(chmod(name3, 0755) == 0);
+
+ r = execute_directories(dirs, DEFAULT_TIMEOUT_USEC, gather_stdout, args, NULL, NULL);
+ assert_se(r >= 0);
+
+ log_info("got: %s", output);
+
+ assert_se(streq(output, "a\nb\nc\nd\n"));
+}
+
+static void test_environment_gathering(void) {
+ char template[] = "/tmp/test-exec-util.XXXXXXX", **p;
+ const char *dirs[] = {template, NULL};
+ const char *name, *name2, *name3, *old;
+ int r;
+
+ char **tmp = NULL; /* this is only used in the forked process, no cleanup here */
+ _cleanup_strv_free_ char **env = NULL;
+
+ void* const args[] = { &tmp, &tmp, &env };
+
+ assert_se(mkdtemp(template));
+
+ log_info("/* %s */", __func__);
+
+ /* write files */
+ name = strjoina(template, "/10-foo");
+ name2 = strjoina(template, "/20-bar");
+ name3 = strjoina(template, "/30-last");
+
+ assert_se(write_string_file(name,
+ "#!/bin/sh\n"
+ "echo A=23\n",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(name2,
+ "#!/bin/sh\n"
+ "echo A=22:$A\n\n\n", /* substitution from previous generator */
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(name3,
+ "#!/bin/sh\n"
+ "echo A=$A:24\n"
+ "echo B=12\n"
+ "echo C=000\n"
+ "echo C=001\n" /* variable overwriting */
+ /* various invalid entries */
+ "echo unset A\n"
+ "echo unset A=\n"
+ "echo unset A=B\n"
+ "echo unset \n"
+ "echo A B=C\n"
+ "echo A\n"
+ /* test variable assignment without newline */
+ "echo PATH=$PATH:/no/such/file", /* no newline */
+ WRITE_STRING_FILE_CREATE) == 0);
+
+ assert_se(chmod(name, 0755) == 0);
+ assert_se(chmod(name2, 0755) == 0);
+ assert_se(chmod(name3, 0755) == 0);
+
+ /* When booting in containers or without initramfs there might not be
+ * any PATH in the environ and if there is no PATH /bin/sh built-in
+ * PATH may leak and override systemd's DEFAULT_PATH which is not
+ * good. Force our own PATH in environment, to prevent expansion of sh
+ * built-in $PATH */
+ old = getenv("PATH");
+ r = setenv("PATH", "no-sh-built-in-path", 1);
+ assert_se(r >= 0);
+
+ r = execute_directories(dirs, DEFAULT_TIMEOUT_USEC, gather_environment, args, NULL, NULL);
+ assert_se(r >= 0);
+
+ STRV_FOREACH(p, env)
+ log_info("got env: \"%s\"", *p);
+
+ assert_se(streq(strv_env_get(env, "A"), "22:23:24"));
+ assert_se(streq(strv_env_get(env, "B"), "12"));
+ assert_se(streq(strv_env_get(env, "C"), "001"));
+ assert_se(streq(strv_env_get(env, "PATH"), "no-sh-built-in-path:/no/such/file"));
+
+ /* now retest with "default" path passed in, as created by
+ * manager_default_environment */
+ env = strv_free(env);
+ env = strv_new("PATH=" DEFAULT_PATH);
+ assert_se(env);
+
+ r = execute_directories(dirs, DEFAULT_TIMEOUT_USEC, gather_environment, args, NULL, env);
+ assert_se(r >= 0);
+
+ STRV_FOREACH(p, env)
+ log_info("got env: \"%s\"", *p);
+
+ assert_se(streq(strv_env_get(env, "A"), "22:23:24"));
+ assert_se(streq(strv_env_get(env, "B"), "12"));
+ assert_se(streq(strv_env_get(env, "C"), "001"));
+ assert_se(streq(strv_env_get(env, "PATH"), DEFAULT_PATH ":/no/such/file"));
+
+ /* reset environ PATH */
+ (void) setenv("PATH", old, 1);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_execute_directory(true);
+ test_execute_directory(false);
+ test_execution_order();
+ test_stdout_gathering();
+ test_environment_gathering();
+
+ return 0;
+}
diff --git a/src/test/test-execute.c b/src/test/test-execute.c
new file mode 100644
index 0000000..eb8f7c4
--- /dev/null
+++ b/src/test/test-execute.c
@@ -0,0 +1,832 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <grp.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+
+#include "capability-util.h"
+#include "cpu-set-util.h"
+#include "errno-list.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "manager.h"
+#include "missing_prctl.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "service.h"
+#include "stat-util.h"
+#include "test-helper.h"
+#include "tests.h"
+#include "unit.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+static bool can_unshare;
+
+typedef void (*test_function_t)(Manager *m);
+
+static void check(Manager *m, Unit *unit, int status_expected, int code_expected) {
+ Service *service = NULL;
+ usec_t ts;
+ usec_t timeout = 2 * USEC_PER_MINUTE;
+
+ assert_se(m);
+ assert_se(unit);
+
+ service = SERVICE(unit);
+ printf("%s\n", unit->id);
+ exec_context_dump(&service->exec_context, stdout, "\t");
+ ts = now(CLOCK_MONOTONIC);
+ while (!IN_SET(service->state, SERVICE_DEAD, SERVICE_FAILED)) {
+ int r;
+ usec_t n;
+
+ r = sd_event_run(m->event, 100 * USEC_PER_MSEC);
+ assert_se(r >= 0);
+
+ n = now(CLOCK_MONOTONIC);
+ if (ts + timeout < n) {
+ log_error("Test timeout when testing %s", unit->id);
+ r = unit_kill(unit, KILL_ALL, SIGKILL, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to kill %s: %m", unit->id);
+ exit(EXIT_FAILURE);
+ }
+ }
+ exec_status_dump(&service->main_exec_status, stdout, "\t");
+ assert_se(service->main_exec_status.status == status_expected);
+ assert_se(service->main_exec_status.code == code_expected);
+}
+
+static bool check_nobody_user_and_group(void) {
+ static int cache = -1;
+ struct passwd *p;
+ struct group *g;
+
+ if (cache >= 0)
+ return !!cache;
+
+ if (!synthesize_nobody())
+ goto invalid;
+
+ p = getpwnam(NOBODY_USER_NAME);
+ if (!p ||
+ !streq(p->pw_name, NOBODY_USER_NAME) ||
+ p->pw_uid != UID_NOBODY ||
+ p->pw_gid != GID_NOBODY)
+ goto invalid;
+
+ p = getpwuid(UID_NOBODY);
+ if (!p ||
+ !streq(p->pw_name, NOBODY_USER_NAME) ||
+ p->pw_uid != UID_NOBODY ||
+ p->pw_gid != GID_NOBODY)
+ goto invalid;
+
+ g = getgrnam(NOBODY_GROUP_NAME);
+ if (!g ||
+ !streq(g->gr_name, NOBODY_GROUP_NAME) ||
+ g->gr_gid != GID_NOBODY)
+ goto invalid;
+
+ g = getgrgid(GID_NOBODY);
+ if (!g ||
+ !streq(g->gr_name, NOBODY_GROUP_NAME) ||
+ g->gr_gid != GID_NOBODY)
+ goto invalid;
+
+ cache = 1;
+ return true;
+
+invalid:
+ cache = 0;
+ return false;
+}
+
+static bool check_user_has_group_with_same_name(const char *name) {
+ struct passwd *p;
+ struct group *g;
+
+ assert(name);
+
+ p = getpwnam(name);
+ if (!p ||
+ !streq(p->pw_name, name))
+ return false;
+
+ g = getgrgid(p->pw_gid);
+ if (!g ||
+ !streq(g->gr_name, name))
+ return false;
+
+ return true;
+}
+
+static bool is_inaccessible_available(void) {
+ const char *p;
+
+ FOREACH_STRING(p,
+ "/run/systemd/inaccessible/reg",
+ "/run/systemd/inaccessible/dir",
+ "/run/systemd/inaccessible/chr",
+ "/run/systemd/inaccessible/blk",
+ "/run/systemd/inaccessible/fifo",
+ "/run/systemd/inaccessible/sock"
+ ) {
+ if (access(p, F_OK) < 0)
+ return false;
+ }
+
+ return true;
+}
+
+static void test(Manager *m, const char *unit_name, int status_expected, int code_expected) {
+ Unit *unit;
+
+ assert_se(unit_name);
+
+ assert_se(manager_load_startable_unit_or_warn(m, unit_name, NULL, &unit) >= 0);
+ assert_se(unit_start(unit) >= 0);
+ check(m, unit, status_expected, code_expected);
+}
+
+static void test_exec_bindpaths(Manager *m) {
+ assert_se(mkdir_p("/tmp/test-exec-bindpaths", 0755) >= 0);
+ assert_se(mkdir_p("/tmp/test-exec-bindreadonlypaths", 0755) >= 0);
+
+ test(m, "exec-bindpaths.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+
+ (void) rm_rf("/tmp/test-exec-bindpaths", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/tmp/test-exec-bindreadonlypaths", REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static void test_exec_cpuaffinity(Manager *m) {
+ _cleanup_cpu_free_ cpu_set_t *c = NULL;
+ unsigned n;
+
+ assert_se(c = cpu_set_malloc(&n));
+ assert_se(sched_getaffinity(0, CPU_ALLOC_SIZE(n), c) >= 0);
+
+ if (CPU_ISSET_S(0, CPU_ALLOC_SIZE(n), c) == 0) {
+ log_notice("Cannot use CPU 0, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-cpuaffinity1.service", 0, CLD_EXITED);
+ test(m, "exec-cpuaffinity2.service", 0, CLD_EXITED);
+
+ if (CPU_ISSET_S(1, CPU_ALLOC_SIZE(n), c) == 0 ||
+ CPU_ISSET_S(2, CPU_ALLOC_SIZE(n), c) == 0) {
+ log_notice("Cannot use CPU 1 or 2, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ test(m, "exec-cpuaffinity3.service", 0, CLD_EXITED);
+}
+
+static void test_exec_workingdirectory(Manager *m) {
+ assert_se(mkdir_p("/tmp/test-exec_workingdirectory", 0755) >= 0);
+
+ test(m, "exec-workingdirectory.service", 0, CLD_EXITED);
+ test(m, "exec-workingdirectory-trailing-dot.service", 0, CLD_EXITED);
+
+ (void) rm_rf("/tmp/test-exec_workingdirectory", REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static void test_exec_personality(Manager *m) {
+#if defined(__x86_64__)
+ test(m, "exec-personality-x86-64.service", 0, CLD_EXITED);
+
+#elif defined(__s390__)
+ test(m, "exec-personality-s390.service", 0, CLD_EXITED);
+
+#elif defined(__powerpc64__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+ test(m, "exec-personality-ppc64.service", 0, CLD_EXITED);
+# else
+ test(m, "exec-personality-ppc64le.service", 0, CLD_EXITED);
+# endif
+
+#elif defined(__aarch64__)
+ test(m, "exec-personality-aarch64.service", 0, CLD_EXITED);
+
+#elif defined(__i386__)
+ test(m, "exec-personality-x86.service", 0, CLD_EXITED);
+#else
+ log_notice("Unknown personality, skipping %s", __func__);
+#endif
+}
+
+static void test_exec_ignoresigpipe(Manager *m) {
+ test(m, "exec-ignoresigpipe-yes.service", 0, CLD_EXITED);
+ test(m, "exec-ignoresigpipe-no.service", SIGPIPE, CLD_KILLED);
+}
+
+static void test_exec_privatetmp(Manager *m) {
+ assert_se(touch("/tmp/test-exec_privatetmp") >= 0);
+
+ test(m, "exec-privatetmp-yes.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-privatetmp-no.service", 0, CLD_EXITED);
+
+ unlink("/tmp/test-exec_privatetmp");
+}
+
+static void test_exec_privatedevices(Manager *m) {
+ int r;
+
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping %s", __func__);
+ return;
+ }
+ if (!is_inaccessible_available()) {
+ log_notice("Testing without inaccessible, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-privatedevices-yes.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-privatedevices-no.service", 0, CLD_EXITED);
+ test(m, "exec-privatedevices-disabled-by-prefix.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+
+ /* We use capsh to test if the capabilities are
+ * properly set, so be sure that it exists */
+ r = find_binary("capsh", NULL);
+ if (r < 0) {
+ log_notice_errno(r, "Could not find capsh binary, skipping remaining tests in %s: %m", __func__);
+ return;
+ }
+
+ test(m, "exec-privatedevices-yes-capability-mknod.service", 0, CLD_EXITED);
+ test(m, "exec-privatedevices-no-capability-mknod.service", 0, CLD_EXITED);
+ test(m, "exec-privatedevices-yes-capability-sys-rawio.service", 0, CLD_EXITED);
+ test(m, "exec-privatedevices-no-capability-sys-rawio.service", 0, CLD_EXITED);
+}
+
+static void test_exec_protectkernelmodules(Manager *m) {
+ int r;
+
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping %s", __func__);
+ return;
+ }
+ if (!is_inaccessible_available()) {
+ log_notice("Testing without inaccessible, skipping %s", __func__);
+ return;
+ }
+
+ r = find_binary("capsh", NULL);
+ if (r < 0) {
+ log_notice_errno(r, "Skipping %s, could not find capsh binary: %m", __func__);
+ return;
+ }
+
+ test(m, "exec-protectkernelmodules-no-capabilities.service", 0, CLD_EXITED);
+ test(m, "exec-protectkernelmodules-yes-capabilities.service", 0, CLD_EXITED);
+ test(m, "exec-protectkernelmodules-yes-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+}
+
+static void test_exec_readonlypaths(Manager *m) {
+
+ test(m, "exec-readonlypaths-simple.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+
+ if (path_is_read_only_fs("/var") > 0) {
+ log_notice("Directory /var is readonly, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ test(m, "exec-readonlypaths.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-readonlypaths-with-bindpaths.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-readonlypaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+}
+
+static void test_exec_readwritepaths(Manager *m) {
+
+ if (path_is_read_only_fs("/") > 0) {
+ log_notice("Root directory is readonly, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-readwritepaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+}
+
+static void test_exec_inaccessiblepaths(Manager *m) {
+
+ if (!is_inaccessible_available()) {
+ log_notice("Testing without inaccessible, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-inaccessiblepaths-proc.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+
+ if (path_is_read_only_fs("/") > 0) {
+ log_notice("Root directory is readonly, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ test(m, "exec-inaccessiblepaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+}
+
+static void test_exec_temporaryfilesystem(Manager *m) {
+
+ test(m, "exec-temporaryfilesystem-options.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-temporaryfilesystem-ro.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-temporaryfilesystem-rw.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-temporaryfilesystem-usr.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+}
+
+static void test_exec_systemcallfilter(Manager *m) {
+#if HAVE_SECCOMP
+ int r;
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-systemcallfilter-not-failing.service", 0, CLD_EXITED);
+ test(m, "exec-systemcallfilter-not-failing2.service", 0, CLD_EXITED);
+ test(m, "exec-systemcallfilter-failing.service", SIGSYS, CLD_KILLED);
+ test(m, "exec-systemcallfilter-failing2.service", SIGSYS, CLD_KILLED);
+
+ r = find_binary("python3", NULL);
+ if (r < 0) {
+ log_notice_errno(r, "Skipping remaining tests in %s, could not find python3 binary: %m", __func__);
+ return;
+ }
+
+ test(m, "exec-systemcallfilter-with-errno-name.service", errno_from_name("EILSEQ"), CLD_EXITED);
+ test(m, "exec-systemcallfilter-with-errno-number.service", 255, CLD_EXITED);
+ test(m, "exec-systemcallfilter-with-errno-multi.service", errno_from_name("EILSEQ"), CLD_EXITED);
+#endif
+}
+
+static void test_exec_systemcallerrornumber(Manager *m) {
+#if HAVE_SECCOMP
+ int r;
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+
+ r = find_binary("python3", NULL);
+ if (r < 0) {
+ log_notice_errno(r, "Skipping %s, could not find python3 binary: %m", __func__);
+ return;
+ }
+
+ test(m, "exec-systemcallerrornumber-name.service", errno_from_name("EACCES"), CLD_EXITED);
+ test(m, "exec-systemcallerrornumber-number.service", 255, CLD_EXITED);
+#endif
+}
+
+static void test_exec_restrictnamespaces(Manager *m) {
+#if HAVE_SECCOMP
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-restrictnamespaces-no.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-restrictnamespaces-yes.service", 1, CLD_EXITED);
+ test(m, "exec-restrictnamespaces-mnt.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-restrictnamespaces-mnt-blacklist.service", 1, CLD_EXITED);
+ test(m, "exec-restrictnamespaces-merge-and.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-restrictnamespaces-merge-or.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-restrictnamespaces-merge-all.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+#endif
+}
+
+static void test_exec_systemcallfilter_system(Manager *m) {
+#if HAVE_SECCOMP
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-systemcallfilter-system-user.service", 0, CLD_EXITED);
+
+ if (!check_nobody_user_and_group()) {
+ log_notice("nobody user/group is not synthesized or may conflict to other entries, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ if (!STR_IN_SET(NOBODY_USER_NAME, "nobody", "nfsnobody")) {
+ log_notice("Unsupported nobody user name '%s', skipping remaining tests in %s", NOBODY_USER_NAME, __func__);
+ return;
+ }
+
+ test(m, "exec-systemcallfilter-system-user-" NOBODY_USER_NAME ".service", 0, CLD_EXITED);
+#endif
+}
+
+static void test_exec_user(Manager *m) {
+ test(m, "exec-user.service", 0, CLD_EXITED);
+
+ if (!check_nobody_user_and_group()) {
+ log_notice("nobody user/group is not synthesized or may conflict to other entries, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ if (!STR_IN_SET(NOBODY_USER_NAME, "nobody", "nfsnobody")) {
+ log_notice("Unsupported nobody user name '%s', skipping remaining tests in %s", NOBODY_USER_NAME, __func__);
+ return;
+ }
+
+ test(m, "exec-user-" NOBODY_USER_NAME ".service", 0, CLD_EXITED);
+}
+
+static void test_exec_group(Manager *m) {
+ test(m, "exec-group.service", 0, CLD_EXITED);
+
+ if (!check_nobody_user_and_group()) {
+ log_notice("nobody user/group is not synthesized or may conflict to other entries, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ if (!STR_IN_SET(NOBODY_GROUP_NAME, "nobody", "nfsnobody", "nogroup")) {
+ log_notice("Unsupported nobody group name '%s', skipping remaining tests in %s", NOBODY_GROUP_NAME, __func__);
+ return;
+ }
+
+ test(m, "exec-group-" NOBODY_GROUP_NAME ".service", 0, CLD_EXITED);
+}
+
+static void test_exec_supplementarygroups(Manager *m) {
+ test(m, "exec-supplementarygroups.service", 0, CLD_EXITED);
+ test(m, "exec-supplementarygroups-single-group.service", 0, CLD_EXITED);
+ test(m, "exec-supplementarygroups-single-group-user.service", 0, CLD_EXITED);
+ test(m, "exec-supplementarygroups-multiple-groups-default-group-user.service", 0, CLD_EXITED);
+ test(m, "exec-supplementarygroups-multiple-groups-withgid.service", 0, CLD_EXITED);
+ test(m, "exec-supplementarygroups-multiple-groups-withuid.service", 0, CLD_EXITED);
+}
+
+static void test_exec_dynamicuser(Manager *m) {
+
+ test(m, "exec-dynamicuser-fixeduser.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ if (check_user_has_group_with_same_name("adm"))
+ test(m, "exec-dynamicuser-fixeduser-adm.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ if (check_user_has_group_with_same_name("games"))
+ test(m, "exec-dynamicuser-fixeduser-games.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-dynamicuser-fixeduser-one-supplementarygroup.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-dynamicuser-supplementarygroups.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-dynamicuser-statedir.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+
+ (void) rm_rf("/var/lib/test-dynamicuser-migrate", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/test-dynamicuser-migrate2", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/private/test-dynamicuser-migrate", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/private/test-dynamicuser-migrate2", REMOVE_ROOT|REMOVE_PHYSICAL);
+
+ test(m, "exec-dynamicuser-statedir-migrate-step1.service", 0, CLD_EXITED);
+ test(m, "exec-dynamicuser-statedir-migrate-step2.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+
+ (void) rm_rf("/var/lib/test-dynamicuser-migrate", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/test-dynamicuser-migrate2", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/private/test-dynamicuser-migrate", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/private/test-dynamicuser-migrate2", REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static void test_exec_environment(Manager *m) {
+ test(m, "exec-environment.service", 0, CLD_EXITED);
+ test(m, "exec-environment-multiple.service", 0, CLD_EXITED);
+ test(m, "exec-environment-empty.service", 0, CLD_EXITED);
+}
+
+static void test_exec_environmentfile(Manager *m) {
+ static const char e[] =
+ "VAR1='word1 word2'\n"
+ "VAR2=word3 \n"
+ "# comment1\n"
+ "\n"
+ "; comment2\n"
+ " ; # comment3\n"
+ "line without an equal\n"
+ "VAR3='$word 5 6'\n"
+ "VAR4='new\nline'\n"
+ "VAR5=password\\with\\backslashes";
+ int r;
+
+ r = write_string_file("/tmp/test-exec_environmentfile.conf", e, WRITE_STRING_FILE_CREATE);
+ assert_se(r == 0);
+
+ test(m, "exec-environmentfile.service", 0, CLD_EXITED);
+
+ (void) unlink("/tmp/test-exec_environmentfile.conf");
+}
+
+static void test_exec_passenvironment(Manager *m) {
+ /* test-execute runs under MANAGER_USER which, by default, forwards all
+ * variables present in the environment, but only those that are
+ * present _at the time it is created_!
+ *
+ * So these PassEnvironment checks are still expected to work, since we
+ * are ensuring the variables are not present at manager creation (they
+ * are unset explicitly in main) and are only set here.
+ *
+ * This is still a good approximation of how a test for MANAGER_SYSTEM
+ * would work.
+ */
+ assert_se(setenv("VAR1", "word1 word2", 1) == 0);
+ assert_se(setenv("VAR2", "word3", 1) == 0);
+ assert_se(setenv("VAR3", "$word 5 6", 1) == 0);
+ assert_se(setenv("VAR4", "new\nline", 1) == 0);
+ assert_se(setenv("VAR5", "passwordwithbackslashes", 1) == 0);
+ test(m, "exec-passenvironment.service", 0, CLD_EXITED);
+ test(m, "exec-passenvironment-repeated.service", 0, CLD_EXITED);
+ test(m, "exec-passenvironment-empty.service", 0, CLD_EXITED);
+ assert_se(unsetenv("VAR1") == 0);
+ assert_se(unsetenv("VAR2") == 0);
+ assert_se(unsetenv("VAR3") == 0);
+ assert_se(unsetenv("VAR4") == 0);
+ assert_se(unsetenv("VAR5") == 0);
+ test(m, "exec-passenvironment-absent.service", 0, CLD_EXITED);
+}
+
+static void test_exec_umask(Manager *m) {
+ test(m, "exec-umask-default.service", 0, CLD_EXITED);
+ test(m, "exec-umask-0177.service", 0, CLD_EXITED);
+}
+
+static void test_exec_runtimedirectory(Manager *m) {
+ test(m, "exec-runtimedirectory.service", 0, CLD_EXITED);
+ test(m, "exec-runtimedirectory-mode.service", 0, CLD_EXITED);
+ test(m, "exec-runtimedirectory-owner.service", 0, CLD_EXITED);
+
+ if (!check_nobody_user_and_group()) {
+ log_notice("nobody user/group is not synthesized or may conflict to other entries, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ if (!STR_IN_SET(NOBODY_GROUP_NAME, "nobody", "nfsnobody", "nogroup")) {
+ log_notice("Unsupported nobody group name '%s', skipping remaining tests in %s", NOBODY_GROUP_NAME, __func__);
+ return;
+ }
+
+ test(m, "exec-runtimedirectory-owner-" NOBODY_GROUP_NAME ".service", 0, CLD_EXITED);
+}
+
+static void test_exec_capabilityboundingset(Manager *m) {
+ int r;
+
+ r = find_binary("capsh", NULL);
+ if (r < 0) {
+ log_notice_errno(r, "Skipping %s, could not find capsh binary: %m", __func__);
+ return;
+ }
+
+ if (have_effective_cap(CAP_CHOWN) <= 0 ||
+ have_effective_cap(CAP_FOWNER) <= 0 ||
+ have_effective_cap(CAP_KILL) <= 0) {
+ log_notice("Skipping %s, this process does not have enough capabilities", __func__);
+ return;
+ }
+
+ test(m, "exec-capabilityboundingset-simple.service", 0, CLD_EXITED);
+ test(m, "exec-capabilityboundingset-reset.service", 0, CLD_EXITED);
+ test(m, "exec-capabilityboundingset-merge.service", 0, CLD_EXITED);
+ test(m, "exec-capabilityboundingset-invert.service", 0, CLD_EXITED);
+}
+
+static void test_exec_basic(Manager *m) {
+ test(m, "exec-basic.service", 0, CLD_EXITED);
+}
+
+static void test_exec_ambientcapabilities(Manager *m) {
+ int r;
+
+ /* Check if the kernel has support for ambient capabilities. Run
+ * the tests only if that's the case. Clearing all ambient
+ * capabilities is fine, since we are expecting them to be unset
+ * in the first place for the tests. */
+ r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0);
+ if (r < 0 && IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS)) {
+ log_notice("Skipping %s, the kernel does not support ambient capabilities", __func__);
+ return;
+ }
+
+ if (have_effective_cap(CAP_CHOWN) <= 0 ||
+ have_effective_cap(CAP_NET_RAW) <= 0) {
+ log_notice("Skipping %s, this process does not have enough capabilities", __func__);
+ return;
+ }
+
+ test(m, "exec-ambientcapabilities.service", 0, CLD_EXITED);
+ test(m, "exec-ambientcapabilities-merge.service", 0, CLD_EXITED);
+
+ if (!check_nobody_user_and_group()) {
+ log_notice("nobody user/group is not synthesized or may conflict to other entries, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ if (!STR_IN_SET(NOBODY_USER_NAME, "nobody", "nfsnobody")) {
+ log_notice("Unsupported nobody user name '%s', skipping remaining tests in %s", NOBODY_USER_NAME, __func__);
+ return;
+ }
+
+ test(m, "exec-ambientcapabilities-" NOBODY_USER_NAME ".service", 0, CLD_EXITED);
+ test(m, "exec-ambientcapabilities-merge-" NOBODY_USER_NAME ".service", 0, CLD_EXITED);
+}
+
+static void test_exec_privatenetwork(Manager *m) {
+ int r;
+
+ r = find_binary("ip", NULL);
+ if (r < 0) {
+ log_notice_errno(r, "Skipping %s, could not find ip binary: %m", __func__);
+ return;
+ }
+
+ test(m, "exec-privatenetwork-yes.service", can_unshare ? 0 : EXIT_NETWORK, CLD_EXITED);
+}
+
+static void test_exec_oomscoreadjust(Manager *m) {
+ test(m, "exec-oomscoreadjust-positive.service", 0, CLD_EXITED);
+
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping remaining tests in %s", __func__);
+ return;
+ }
+ test(m, "exec-oomscoreadjust-negative.service", 0, CLD_EXITED);
+}
+
+static void test_exec_ioschedulingclass(Manager *m) {
+ test(m, "exec-ioschedulingclass-none.service", 0, CLD_EXITED);
+ test(m, "exec-ioschedulingclass-idle.service", 0, CLD_EXITED);
+ test(m, "exec-ioschedulingclass-best-effort.service", 0, CLD_EXITED);
+
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping remaining tests in %s", __func__);
+ return;
+ }
+ test(m, "exec-ioschedulingclass-realtime.service", 0, CLD_EXITED);
+}
+
+static void test_exec_unsetenvironment(Manager *m) {
+ test(m, "exec-unsetenvironment.service", 0, CLD_EXITED);
+}
+
+static void test_exec_specifier(Manager *m) {
+ test(m, "exec-specifier.service", 0, CLD_EXITED);
+ test(m, "exec-specifier@foo-bar.service", 0, CLD_EXITED);
+ test(m, "exec-specifier-interpolation.service", 0, CLD_EXITED);
+}
+
+static void test_exec_standardinput(Manager *m) {
+ test(m, "exec-standardinput-data.service", 0, CLD_EXITED);
+ test(m, "exec-standardinput-file.service", 0, CLD_EXITED);
+}
+
+static void test_exec_standardoutput(Manager *m) {
+ test(m, "exec-standardoutput-file.service", 0, CLD_EXITED);
+}
+
+static void test_exec_standardoutput_append(Manager *m) {
+ test(m, "exec-standardoutput-append.service", 0, CLD_EXITED);
+}
+
+static int run_tests(UnitFileScope scope, const test_function_t *tests) {
+ const test_function_t *test = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ assert_se(tests);
+
+ r = manager_new(scope, MANAGER_TEST_RUN_BASIC, &m);
+ if (MANAGER_SKIP_TEST(r))
+ return log_tests_skipped_errno(r, "manager_new");
+ assert_se(r >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ for (test = tests; test && *test; test++)
+ (*test)(m);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ _cleanup_free_ char *test_execute_path = NULL;
+ _cleanup_hashmap_free_ Hashmap *s = NULL;
+ static const test_function_t user_tests[] = {
+ test_exec_basic,
+ test_exec_ambientcapabilities,
+ test_exec_bindpaths,
+ test_exec_capabilityboundingset,
+ test_exec_cpuaffinity,
+ test_exec_environment,
+ test_exec_environmentfile,
+ test_exec_group,
+ test_exec_ignoresigpipe,
+ test_exec_inaccessiblepaths,
+ test_exec_ioschedulingclass,
+ test_exec_oomscoreadjust,
+ test_exec_passenvironment,
+ test_exec_personality,
+ test_exec_privatedevices,
+ test_exec_privatenetwork,
+ test_exec_privatetmp,
+ test_exec_protectkernelmodules,
+ test_exec_readonlypaths,
+ test_exec_readwritepaths,
+ test_exec_restrictnamespaces,
+ test_exec_runtimedirectory,
+ test_exec_standardinput,
+ test_exec_standardoutput,
+ test_exec_standardoutput_append,
+ test_exec_supplementarygroups,
+ test_exec_systemcallerrornumber,
+ test_exec_systemcallfilter,
+ test_exec_temporaryfilesystem,
+ test_exec_umask,
+ test_exec_unsetenvironment,
+ test_exec_user,
+ test_exec_workingdirectory,
+ NULL,
+ };
+ static const test_function_t system_tests[] = {
+ test_exec_dynamicuser,
+ test_exec_specifier,
+ test_exec_systemcallfilter_system,
+ NULL,
+ };
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+#if HAS_FEATURE_ADDRESS_SANITIZER
+ if (is_run_on_travis_ci()) {
+ log_notice("Running on TravisCI under ASan, skipping, see https://github.com/systemd/systemd/issues/10696");
+ return EXIT_TEST_SKIP;
+ }
+#endif
+
+ (void) unsetenv("USER");
+ (void) unsetenv("LOGNAME");
+ (void) unsetenv("SHELL");
+ (void) unsetenv("HOME");
+
+ can_unshare = have_namespaces();
+
+ /* It is needed otherwise cgroup creation fails */
+ if (getuid() != 0)
+ return log_tests_skipped("not root");
+
+ r = enter_cgroup_subroot();
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+ test_execute_path = path_join(get_testdata_dir(), "test-execute");
+ assert_se(set_unit_path(test_execute_path) >= 0);
+
+ /* Unset VAR1, VAR2 and VAR3 which are used in the PassEnvironment test
+ * cases, otherwise (and if they are present in the environment),
+ * `manager_default_environment` will copy them into the default
+ * environment which is passed to each created job, which will make the
+ * tests that expect those not to be present to fail.
+ */
+ assert_se(unsetenv("VAR1") == 0);
+ assert_se(unsetenv("VAR2") == 0);
+ assert_se(unsetenv("VAR3") == 0);
+
+ r = run_tests(UNIT_FILE_USER, user_tests);
+ if (r != 0)
+ return r;
+
+ r = run_tests(UNIT_FILE_SYSTEM, system_tests);
+ if (r != 0)
+ return r;
+
+#if HAVE_SECCOMP
+ /* The following tests are for 1beab8b0d0ff2d7d1436b52d4a0c3d56dc908962. */
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping unshare() filtered tests.");
+ return 0;
+ }
+
+ assert_se(s = hashmap_new(NULL));
+ r = seccomp_syscall_resolve_name("unshare");
+ assert_se(r != __NR_SCMP_ERROR);
+ assert_se(hashmap_put(s, UINT32_TO_PTR(r + 1), INT_TO_PTR(-1)) >= 0);
+ assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EOPNOTSUPP), true) >= 0);
+ assert_se(unshare(CLONE_NEWNS) < 0);
+ assert_se(errno == EOPNOTSUPP);
+
+ can_unshare = false;
+
+ r = run_tests(UNIT_FILE_USER, user_tests);
+ if (r != 0)
+ return r;
+
+ return run_tests(UNIT_FILE_SYSTEM, system_tests);
+#else
+ return 0;
+#endif
+}
diff --git a/src/test/test-extract-word.c b/src/test/test-extract-word.c
new file mode 100644
index 0000000..3001938
--- /dev/null
+++ b/src/test/test-extract-word.c
@@ -0,0 +1,539 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "extract-word.h"
+#include "log.h"
+#include "string-util.h"
+
+static void test_extract_first_word(void) {
+ const char *p, *original;
+ char *t;
+
+ p = original = "foobar waldo";
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "foobar"));
+ free(t);
+ assert_se(p == original + 7);
+
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "waldo"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word(&p, &t, NULL, 0) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "\"foobar\" \'waldo\'";
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "\"foobar\""));
+ free(t);
+ assert_se(p == original + 9);
+
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "\'waldo\'"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word(&p, &t, NULL, 0) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "\"foobar\" \'waldo\'";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES) > 0);
+ assert_se(streq(t, "foobar"));
+ free(t);
+ assert_se(p == original + 9);
+
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES) > 0);
+ assert_se(streq(t, "waldo"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word(&p, &t, NULL, 0) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "\"";
+ assert_se(extract_first_word(&p, &t, NULL, 0) == 1);
+ assert_se(streq(t, "\""));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\"";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES) == -EINVAL);
+ assert_se(p == original + 1);
+
+ p = original = "\'";
+ assert_se(extract_first_word(&p, &t, NULL, 0) == 1);
+ assert_se(streq(t, "\'"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\'";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES) == -EINVAL);
+ assert_se(p == original + 1);
+
+ p = original = "\'fooo";
+ assert_se(extract_first_word(&p, &t, NULL, 0) == 1);
+ assert_se(streq(t, "\'fooo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\'fooo";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "\'fooo";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "fooo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\"fooo";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "fooo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "yay\'foo\'bar";
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "yay\'foo\'bar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "yay\'foo\'bar";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES) > 0);
+ assert_se(streq(t, "yayfoobar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " foobar ";
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "foobar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " foo\\ba\\x6ar ";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE) > 0);
+ assert_se(streq(t, "foo\ba\x6ar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " foo\\ba\\x6ar ";
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "foobax6ar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " f\\u00f6o \"pi\\U0001F4A9le\" ";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE) > 0);
+ assert_se(streq(t, "föo"));
+ free(t);
+ assert_se(p == original + 13);
+
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE) > 0);
+ assert_se(streq(t, "pi\360\237\222\251le"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "fooo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE_RELAX) > 0);
+ assert_se(streq(t, "fooo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE_RELAX|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "fooo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE|EXTRACT_CUNESCAPE_RELAX) > 0);
+ assert_se(streq(t, "fooo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word(&p, &t, NULL, 0) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "foo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "foo::bar";
+ assert_se(extract_first_word(&p, &t, ":", 0) == 1);
+ assert_se(streq(t, "foo"));
+ free(t);
+ assert_se(p == original + 5);
+
+ assert_se(extract_first_word(&p, &t, ":", 0) == 1);
+ assert_se(streq(t, "bar"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word(&p, &t, ":", 0) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "foo\\:bar::waldo";
+ assert_se(extract_first_word(&p, &t, ":", 0) == 1);
+ assert_se(streq(t, "foo:bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ assert_se(extract_first_word(&p, &t, ":", 0) == 1);
+ assert_se(streq(t, "waldo"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word(&p, &t, ":", 0) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE_RELAX) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE_RELAX|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "foo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE|EXTRACT_CUNESCAPE_RELAX|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "foo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "fooo bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE_RELAX) > 0);
+ assert_se(streq(t, "fooo bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE_RELAX|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "fooo bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE|EXTRACT_CUNESCAPE_RELAX) > 0);
+ assert_se(streq(t, "fooo\\ bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "\\w+@\\K[\\d.]+";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE) == -EINVAL);
+ assert_se(p == original + 1);
+
+ p = original = "\\w+@\\K[\\d.]+";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE|EXTRACT_CUNESCAPE_RELAX) > 0);
+ assert_se(streq(t, "\\w+@\\K[\\d.]+"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\\w+\\b";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE|EXTRACT_CUNESCAPE_RELAX) > 0);
+ assert_se(streq(t, "\\w+\b"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "-N ''";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES) > 0);
+ assert_se(streq(t, "-N"));
+ free(t);
+ assert_se(p == original + 3);
+
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_QUOTES) > 0);
+ assert_se(streq(t, ""));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = ":foo\\:bar::waldo:";
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_DONT_COALESCE_SEPARATORS) == 1);
+ assert_se(t);
+ assert_se(streq(t, ""));
+ free(t);
+ assert_se(p == original + 1);
+
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_DONT_COALESCE_SEPARATORS) == 1);
+ assert_se(streq(t, "foo:bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_DONT_COALESCE_SEPARATORS) == 1);
+ assert_se(t);
+ assert_se(streq(t, ""));
+ free(t);
+ assert_se(p == original + 11);
+
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_DONT_COALESCE_SEPARATORS) == 1);
+ assert_se(streq(t, "waldo"));
+ free(t);
+ assert_se(p == original + 17);
+
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_DONT_COALESCE_SEPARATORS) == 1);
+ assert_se(streq(t, ""));
+ free(t);
+ assert_se(p == NULL);
+
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_DONT_COALESCE_SEPARATORS) == 0);
+ assert_se(!t);
+ assert_se(!p);
+
+ p = "foo\\xbar";
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "fooxbar"));
+ free(t);
+ assert_se(p == NULL);
+
+ p = "foo\\xbar";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_RETAIN_ESCAPE) > 0);
+ assert_se(streq(t, "foo\\xbar"));
+ free(t);
+ assert_se(p == NULL);
+}
+
+static void test_extract_first_word_and_warn(void) {
+ const char *p, *original;
+ char *t;
+
+ p = original = "foobar waldo";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "foobar"));
+ free(t);
+ assert_se(p == original + 7);
+
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "waldo"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "\"foobar\" \'waldo\'";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_QUOTES, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "foobar"));
+ free(t);
+ assert_se(p == original + 9);
+
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_QUOTES, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "waldo"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "\"";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_QUOTES, NULL, "fake", 1, original) == -EINVAL);
+ assert_se(p == original + 1);
+
+ p = original = "\'";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_QUOTES, NULL, "fake", 1, original) == -EINVAL);
+ assert_se(p == original + 1);
+
+ p = original = "\'fooo";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_QUOTES, NULL, "fake", 1, original) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "\'fooo";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_QUOTES|EXTRACT_RELAX, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " foo\\ba\\x6ar ";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "foo\ba\x6ar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " foo\\ba\\x6ar ";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "foobax6ar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " f\\u00f6o \"pi\\U0001F4A9le\" ";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "föo"));
+ free(t);
+ assert_se(p == original + 13);
+
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "pi\360\237\222\251le"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_RELAX, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_QUOTES, NULL, "fake", 1, original) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_QUOTES|EXTRACT_RELAX, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "foo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE, NULL, "fake", 1, original) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE|EXTRACT_RELAX, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "foo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_RELAX, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo\\ bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "\\w+@\\K[\\d.]+";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "\\w+@\\K[\\d.]+"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\\w+\\b";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "\\w+\b"));
+ free(t);
+ assert_se(isempty(p));
+}
+
+static void test_extract_many_words(void) {
+ const char *p, *original;
+ char *a, *b, *c;
+
+ p = original = "foobar waldi piep";
+ assert_se(extract_many_words(&p, NULL, 0, &a, &b, &c, NULL) == 3);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(a, "foobar"));
+ assert_se(streq_ptr(b, "waldi"));
+ assert_se(streq_ptr(c, "piep"));
+ free(a);
+ free(b);
+ free(c);
+
+ p = original = "'foobar' wa\"ld\"i ";
+ assert_se(extract_many_words(&p, NULL, 0, &a, &b, &c, NULL) == 2);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(a, "'foobar'"));
+ assert_se(streq_ptr(b, "wa\"ld\"i"));
+ assert_se(streq_ptr(c, NULL));
+ free(a);
+ free(b);
+
+ p = original = "'foobar' wa\"ld\"i ";
+ assert_se(extract_many_words(&p, NULL, EXTRACT_QUOTES, &a, &b, &c, NULL) == 2);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(a, "foobar"));
+ assert_se(streq_ptr(b, "waldi"));
+ assert_se(streq_ptr(c, NULL));
+ free(a);
+ free(b);
+
+ p = original = "";
+ assert_se(extract_many_words(&p, NULL, 0, &a, &b, &c, NULL) == 0);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(a, NULL));
+ assert_se(streq_ptr(b, NULL));
+ assert_se(streq_ptr(c, NULL));
+
+ p = original = " ";
+ assert_se(extract_many_words(&p, NULL, 0, &a, &b, &c, NULL) == 0);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(a, NULL));
+ assert_se(streq_ptr(b, NULL));
+ assert_se(streq_ptr(c, NULL));
+
+ p = original = "foobar";
+ assert_se(extract_many_words(&p, NULL, 0, NULL) == 0);
+ assert_se(p == original);
+
+ p = original = "foobar waldi";
+ assert_se(extract_many_words(&p, NULL, 0, &a, NULL) == 1);
+ assert_se(p == original+7);
+ assert_se(streq_ptr(a, "foobar"));
+ free(a);
+
+ p = original = " foobar ";
+ assert_se(extract_many_words(&p, NULL, 0, &a, NULL) == 1);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(a, "foobar"));
+ free(a);
+}
+
+int main(int argc, char *argv[]) {
+ log_parse_environment();
+ log_open();
+
+ test_extract_first_word();
+ test_extract_first_word_and_warn();
+ test_extract_many_words();
+
+ return 0;
+}
diff --git a/src/test/test-fd-util.c b/src/test/test-fd-util.c
new file mode 100644
index 0000000..7a0a2ad
--- /dev/null
+++ b/src/test/test-fd-util.c
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "serialize.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void test_close_many(void) {
+ int fds[3];
+ char name0[] = "/tmp/test-close-many.XXXXXX";
+ char name1[] = "/tmp/test-close-many.XXXXXX";
+ char name2[] = "/tmp/test-close-many.XXXXXX";
+
+ fds[0] = mkostemp_safe(name0);
+ fds[1] = mkostemp_safe(name1);
+ fds[2] = mkostemp_safe(name2);
+
+ close_many(fds, 2);
+
+ assert_se(fcntl(fds[0], F_GETFD) == -1);
+ assert_se(fcntl(fds[1], F_GETFD) == -1);
+ assert_se(fcntl(fds[2], F_GETFD) >= 0);
+
+ safe_close(fds[2]);
+
+ unlink(name0);
+ unlink(name1);
+ unlink(name2);
+}
+
+static void test_close_nointr(void) {
+ char name[] = "/tmp/test-test-close_nointr.XXXXXX";
+ int fd;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(close_nointr(fd) >= 0);
+ assert_se(close_nointr(fd) < 0);
+
+ unlink(name);
+}
+
+static void test_same_fd(void) {
+ _cleanup_close_pair_ int p[2] = { -1, -1 };
+ _cleanup_close_ int a = -1, b = -1, c = -1;
+
+ assert_se(pipe2(p, O_CLOEXEC) >= 0);
+ assert_se((a = fcntl(p[0], F_DUPFD, 3)) >= 0);
+ assert_se((b = open("/dev/null", O_RDONLY|O_CLOEXEC)) >= 0);
+ assert_se((c = fcntl(a, F_DUPFD, 3)) >= 0);
+
+ assert_se(same_fd(p[0], p[0]) > 0);
+ assert_se(same_fd(p[1], p[1]) > 0);
+ assert_se(same_fd(a, a) > 0);
+ assert_se(same_fd(b, b) > 0);
+
+ assert_se(same_fd(a, p[0]) > 0);
+ assert_se(same_fd(p[0], a) > 0);
+ assert_se(same_fd(c, p[0]) > 0);
+ assert_se(same_fd(p[0], c) > 0);
+ assert_se(same_fd(a, c) > 0);
+ assert_se(same_fd(c, a) > 0);
+
+ assert_se(same_fd(p[0], p[1]) == 0);
+ assert_se(same_fd(p[1], p[0]) == 0);
+ assert_se(same_fd(p[0], b) == 0);
+ assert_se(same_fd(b, p[0]) == 0);
+ assert_se(same_fd(p[1], a) == 0);
+ assert_se(same_fd(a, p[1]) == 0);
+ assert_se(same_fd(p[1], b) == 0);
+ assert_se(same_fd(b, p[1]) == 0);
+
+ assert_se(same_fd(a, b) == 0);
+ assert_se(same_fd(b, a) == 0);
+}
+
+static void test_open_serialization_fd(void) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open_serialization_fd("test");
+ assert_se(fd >= 0);
+
+ assert_se(write(fd, "test\n", 5) == 5);
+}
+
+static void test_acquire_data_fd_one(unsigned flags) {
+ char wbuffer[196*1024 - 7];
+ char rbuffer[sizeof(wbuffer)];
+ int fd;
+
+ fd = acquire_data_fd("foo", 3, flags);
+ assert_se(fd >= 0);
+
+ zero(rbuffer);
+ assert_se(read(fd, rbuffer, sizeof(rbuffer)) == 3);
+ assert_se(streq(rbuffer, "foo"));
+
+ fd = safe_close(fd);
+
+ fd = acquire_data_fd("", 0, flags);
+ assert_se(fd >= 0);
+
+ zero(rbuffer);
+ assert_se(read(fd, rbuffer, sizeof(rbuffer)) == 0);
+ assert_se(streq(rbuffer, ""));
+
+ fd = safe_close(fd);
+
+ random_bytes(wbuffer, sizeof(wbuffer));
+
+ fd = acquire_data_fd(wbuffer, sizeof(wbuffer), flags);
+ assert_se(fd >= 0);
+
+ zero(rbuffer);
+ assert_se(read(fd, rbuffer, sizeof(rbuffer)) == sizeof(rbuffer));
+ assert_se(memcmp(rbuffer, wbuffer, sizeof(rbuffer)) == 0);
+
+ fd = safe_close(fd);
+}
+
+static void test_acquire_data_fd(void) {
+
+ test_acquire_data_fd_one(0);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL);
+ test_acquire_data_fd_one(ACQUIRE_NO_MEMFD);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD);
+ test_acquire_data_fd_one(ACQUIRE_NO_PIPE);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_PIPE);
+ test_acquire_data_fd_one(ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE|ACQUIRE_NO_TMPFILE);
+}
+
+static void test_fd_move_above_stdio(void) {
+ int original_stdin, new_fd;
+
+ original_stdin = fcntl(0, F_DUPFD, 3);
+ assert_se(original_stdin >= 3);
+ assert_se(close_nointr(0) != EBADF);
+
+ new_fd = open("/dev/null", O_RDONLY);
+ assert_se(new_fd == 0);
+
+ new_fd = fd_move_above_stdio(new_fd);
+ assert_se(new_fd >= 3);
+
+ assert_se(dup(original_stdin) == 0);
+ assert_se(close_nointr(original_stdin) != EBADF);
+ assert_se(close_nointr(new_fd) != EBADF);
+}
+
+static void test_rearrange_stdio(void) {
+ pid_t pid;
+ int r;
+
+ r = safe_fork("rearrange", FORK_WAIT|FORK_LOG, &pid);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ _cleanup_free_ char *path = NULL;
+ char buffer[10];
+
+ /* Child */
+
+ safe_close(STDERR_FILENO); /* Let's close an fd < 2, to make it more interesting */
+
+ assert_se(rearrange_stdio(-1, -1, -1) >= 0);
+
+ assert_se(fd_get_path(STDIN_FILENO, &path) >= 0);
+ assert_se(path_equal(path, "/dev/null"));
+ path = mfree(path);
+
+ assert_se(fd_get_path(STDOUT_FILENO, &path) >= 0);
+ assert_se(path_equal(path, "/dev/null"));
+ path = mfree(path);
+
+ assert_se(fd_get_path(STDOUT_FILENO, &path) >= 0);
+ assert_se(path_equal(path, "/dev/null"));
+ path = mfree(path);
+
+ safe_close(STDIN_FILENO);
+ safe_close(STDOUT_FILENO);
+ safe_close(STDERR_FILENO);
+
+ {
+ int pair[2];
+ assert_se(pipe(pair) >= 0);
+ assert_se(pair[0] == 0);
+ assert_se(pair[1] == 1);
+ assert_se(fd_move_above_stdio(0) == 3);
+ }
+ assert_se(open("/dev/full", O_WRONLY|O_CLOEXEC) == 0);
+ assert_se(acquire_data_fd("foobar", 6, 0) == 2);
+
+ assert_se(rearrange_stdio(2, 0, 1) >= 0);
+
+ assert_se(write(1, "x", 1) < 0 && errno == ENOSPC);
+ assert_se(write(2, "z", 1) == 1);
+ assert_se(read(3, buffer, sizeof(buffer)) == 1);
+ assert_se(buffer[0] == 'z');
+ assert_se(read(0, buffer, sizeof(buffer)) == 6);
+ assert_se(memcmp(buffer, "foobar", 6) == 0);
+
+ assert_se(rearrange_stdio(-1, 1, 2) >= 0);
+ assert_se(write(1, "a", 1) < 0 && errno == ENOSPC);
+ assert_se(write(2, "y", 1) == 1);
+ assert_se(read(3, buffer, sizeof(buffer)) == 1);
+ assert_se(buffer[0] == 'y');
+
+ assert_se(fd_get_path(0, &path) >= 0);
+ assert_se(path_equal(path, "/dev/null"));
+ path = mfree(path);
+
+ _exit(EXIT_SUCCESS);
+ }
+}
+
+static void assert_equal_fd(int fd1, int fd2) {
+
+ for (;;) {
+ uint8_t a[4096], b[4096];
+ ssize_t x, y;
+
+ x = read(fd1, a, sizeof(a));
+ assert(x >= 0);
+
+ y = read(fd2, b, sizeof(b));
+ assert(y >= 0);
+
+ assert(x == y);
+
+ if (x == 0)
+ break;
+
+ assert(memcmp(a, b, x) == 0);
+ }
+}
+
+static void test_fd_duplicate_data_fd(void) {
+ _cleanup_close_ int fd1 = -1, fd2 = -1;
+ _cleanup_(close_pairp) int sfd[2] = { -1, -1 };
+ _cleanup_(sigkill_waitp) pid_t pid = -1;
+ uint64_t i, j;
+ int r;
+
+ fd1 = open("/etc/fstab", O_RDONLY|O_CLOEXEC);
+ if (fd1 >= 0) {
+
+ fd2 = fd_duplicate_data_fd(fd1);
+ assert_se(fd2 >= 0);
+
+ assert_se(lseek(fd1, 0, SEEK_SET) == 0);
+ assert_equal_fd(fd1, fd2);
+ }
+
+ fd1 = safe_close(fd1);
+ fd2 = safe_close(fd2);
+
+ fd1 = acquire_data_fd("hallo", 6, 0);
+ assert_se(fd1 >= 0);
+
+ fd2 = fd_duplicate_data_fd(fd1);
+ assert_se(fd2 >= 0);
+
+ safe_close(fd1);
+ fd1 = acquire_data_fd("hallo", 6, 0);
+ assert_se(fd1 >= 0);
+
+ assert_equal_fd(fd1, fd2);
+
+ fd1 = safe_close(fd1);
+ fd2 = safe_close(fd2);
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, sfd) >= 0);
+
+ r = safe_fork("(sd-pipe)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* child */
+
+ sfd[0] = safe_close(sfd[0]);
+
+ for (i = 0; i < 1536*1024 / sizeof(uint64_t); i++)
+ assert_se(write(sfd[1], &i, sizeof(i)) == sizeof(i));
+
+ sfd[1] = safe_close(sfd[1]);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ sfd[1] = safe_close(sfd[1]);
+
+ fd2 = fd_duplicate_data_fd(sfd[0]);
+ assert_se(fd2 >= 0);
+
+ for (i = 0; i < 1536*1024 / sizeof(uint64_t); i++) {
+ assert_se(read(fd2, &j, sizeof(j)) == sizeof(j));
+ assert_se(i == j);
+ }
+
+ assert_se(read(fd2, &j, sizeof(j)) == 0);
+}
+
+static void test_read_nr_open(void) {
+ log_info("nr-open: %i", read_nr_open());
+}
+
+int main(int argc, char *argv[]) {
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_close_many();
+ test_close_nointr();
+ test_same_fd();
+ test_open_serialization_fd();
+ test_acquire_data_fd();
+ test_fd_move_above_stdio();
+ test_rearrange_stdio();
+ test_fd_duplicate_data_fd();
+ test_read_nr_open();
+
+ return 0;
+}
diff --git a/src/test/test-fdset.c b/src/test/test-fdset.c
new file mode 100644
index 0000000..fb9d397
--- /dev/null
+++ b/src/test/test-fdset.c
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fdset.h"
+#include "macro.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void test_fdset_new_fill(void) {
+ int fd = -1;
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+ char name[] = "/tmp/test-fdset_new_fill.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(fdset_new_fill(&fdset) >= 0);
+ assert_se(fdset_contains(fdset, fd));
+
+ unlink(name);
+}
+
+static void test_fdset_put_dup(void) {
+ _cleanup_close_ int fd = -1;
+ int copyfd = -1;
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+ char name[] = "/tmp/test-fdset_put_dup.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+ copyfd = fdset_put_dup(fdset, fd);
+ assert_se(copyfd >= 0 && copyfd != fd);
+ assert_se(fdset_contains(fdset, copyfd));
+ assert_se(!fdset_contains(fdset, fd));
+
+ unlink(name);
+}
+
+static void test_fdset_cloexec(void) {
+ int fd = -1;
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+ int flags = -1;
+ char name[] = "/tmp/test-fdset_cloexec.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+ assert_se(fdset_put(fdset, fd));
+
+ assert_se(fdset_cloexec(fdset, false) >= 0);
+ flags = fcntl(fd, F_GETFD);
+ assert_se(flags >= 0);
+ assert_se(!(flags & FD_CLOEXEC));
+
+ assert_se(fdset_cloexec(fdset, true) >= 0);
+ flags = fcntl(fd, F_GETFD);
+ assert_se(flags >= 0);
+ assert_se(flags & FD_CLOEXEC);
+
+ unlink(name);
+}
+
+static void test_fdset_close_others(void) {
+ int fd = -1;
+ int copyfd = -1;
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+ int flags = -1;
+ char name[] = "/tmp/test-fdset_close_others.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+ copyfd = fdset_put_dup(fdset, fd);
+ assert_se(copyfd >= 0);
+
+ assert_se(fdset_close_others(fdset) >= 0);
+ flags = fcntl(fd, F_GETFD);
+ assert_se(flags < 0);
+ flags = fcntl(copyfd, F_GETFD);
+ assert_se(flags >= 0);
+
+ unlink(name);
+}
+
+static void test_fdset_remove(void) {
+ _cleanup_close_ int fd = -1;
+ FDSet *fdset = NULL;
+ char name[] = "/tmp/test-fdset_remove.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+ assert_se(fdset_put(fdset, fd) >= 0);
+ assert_se(fdset_remove(fdset, fd) >= 0);
+ assert_se(!fdset_contains(fdset, fd));
+ fdset_free(fdset);
+
+ assert_se(fcntl(fd, F_GETFD) >= 0);
+
+ unlink(name);
+}
+
+static void test_fdset_iterate(void) {
+ int fd = -1;
+ FDSet *fdset = NULL;
+ char name[] = "/tmp/test-fdset_iterate.XXXXXX";
+ Iterator i;
+ int c = 0;
+ int a;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+ assert_se(fdset_put(fdset, fd) >= 0);
+ assert_se(fdset_put(fdset, fd) >= 0);
+ assert_se(fdset_put(fdset, fd) >= 0);
+
+ FDSET_FOREACH(a, fdset, i) {
+ c++;
+ assert_se(a == fd);
+ }
+ assert_se(c == 1);
+
+ fdset_free(fdset);
+
+ unlink(name);
+}
+
+static void test_fdset_isempty(void) {
+ int fd;
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+ char name[] = "/tmp/test-fdset_isempty.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+
+ assert_se(fdset_isempty(fdset));
+ assert_se(fdset_put(fdset, fd) >= 0);
+ assert_se(!fdset_isempty(fdset));
+
+ unlink(name);
+}
+
+static void test_fdset_steal_first(void) {
+ int fd;
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+ char name[] = "/tmp/test-fdset_steal_first.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+
+ assert_se(fdset_steal_first(fdset) < 0);
+ assert_se(fdset_put(fdset, fd) >= 0);
+ assert_se(fdset_steal_first(fdset) == fd);
+ assert_se(fdset_steal_first(fdset) < 0);
+ assert_se(fdset_put(fdset, fd) >= 0);
+
+ unlink(name);
+}
+
+static void test_fdset_new_array(void) {
+ int fds[] = {10, 11, 12, 13};
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+
+ assert_se(fdset_new_array(&fdset, fds, 4) >= 0);
+ assert_se(fdset_size(fdset) == 4);
+ assert_se(fdset_contains(fdset, 10));
+ assert_se(fdset_contains(fdset, 11));
+ assert_se(fdset_contains(fdset, 12));
+ assert_se(fdset_contains(fdset, 13));
+}
+
+int main(int argc, char *argv[]) {
+ test_fdset_new_fill();
+ test_fdset_put_dup();
+ test_fdset_cloexec();
+ test_fdset_close_others();
+ test_fdset_remove();
+ test_fdset_iterate();
+ test_fdset_isempty();
+ test_fdset_steal_first();
+ test_fdset_new_array();
+
+ return 0;
+}
diff --git a/src/test/test-fileio.c b/src/test/test-fileio.c
new file mode 100644
index 0000000..2ddaabe
--- /dev/null
+++ b/src/test/test-fileio.c
@@ -0,0 +1,856 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "ctype.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void test_parse_env_file(void) {
+ _cleanup_(unlink_tempfilep) char
+ t[] = "/tmp/test-fileio-in-XXXXXX",
+ p[] = "/tmp/test-fileio-out-XXXXXX";
+ FILE *f;
+ _cleanup_free_ char *one = NULL, *two = NULL, *three = NULL, *four = NULL, *five = NULL,
+ *six = NULL, *seven = NULL, *eight = NULL, *nine = NULL, *ten = NULL,
+ *eleven = NULL, *twelve = NULL, *thirteen = NULL;
+ _cleanup_strv_free_ char **a = NULL, **b = NULL;
+ char **i;
+ unsigned k;
+ int r;
+
+ assert_se(fmkostemp_safe(t, "w", &f) == 0);
+ fputs("one=BAR \n"
+ "# comment\n"
+ " # comment \n"
+ " ; comment \n"
+ " two = bar \n"
+ "invalid line\n"
+ "invalid line #comment\n"
+ "three = \"333\n"
+ "xxxx\"\n"
+ "four = \'44\\\"44\'\n"
+ "five = \"55\\\"55\" \"FIVE\" cinco \n"
+ "six = seis sechs\\\n"
+ " sis\n"
+ "seven=\"sevenval\" #nocomment\n"
+ "eight=eightval #nocomment\n"
+ "export nine=nineval\n"
+ "ten=ignored\n"
+ "ten=ignored\n"
+ "ten=\n"
+ "eleven=\\value\n"
+ "twelve=\"\\value\"\n"
+ "thirteen='\\value'", f);
+
+ fflush(f);
+ fclose(f);
+
+ r = load_env_file(NULL, t, &a);
+ assert_se(r >= 0);
+
+ STRV_FOREACH(i, a)
+ log_info("Got: <%s>", *i);
+
+ assert_se(streq_ptr(a[0], "one=BAR"));
+ assert_se(streq_ptr(a[1], "two=bar"));
+ assert_se(streq_ptr(a[2], "three=333\nxxxx"));
+ assert_se(streq_ptr(a[3], "four=44\\\"44"));
+ assert_se(streq_ptr(a[4], "five=55\"55FIVEcinco"));
+ assert_se(streq_ptr(a[5], "six=seis sechs sis"));
+ assert_se(streq_ptr(a[6], "seven=sevenval#nocomment"));
+ assert_se(streq_ptr(a[7], "eight=eightval #nocomment"));
+ assert_se(streq_ptr(a[8], "export nine=nineval"));
+ assert_se(streq_ptr(a[9], "ten="));
+ assert_se(streq_ptr(a[10], "eleven=value"));
+ assert_se(streq_ptr(a[11], "twelve=\\value"));
+ assert_se(streq_ptr(a[12], "thirteen=\\value"));
+ assert_se(a[13] == NULL);
+
+ strv_env_clean(a);
+
+ k = 0;
+ STRV_FOREACH(i, b) {
+ log_info("Got2: <%s>", *i);
+ assert_se(streq(*i, a[k++]));
+ }
+
+ r = parse_env_file(
+ NULL, t,
+ "one", &one,
+ "two", &two,
+ "three", &three,
+ "four", &four,
+ "five", &five,
+ "six", &six,
+ "seven", &seven,
+ "eight", &eight,
+ "export nine", &nine,
+ "ten", &ten,
+ "eleven", &eleven,
+ "twelve", &twelve,
+ "thirteen", &thirteen);
+
+ assert_se(r >= 0);
+
+ log_info("one=[%s]", strna(one));
+ log_info("two=[%s]", strna(two));
+ log_info("three=[%s]", strna(three));
+ log_info("four=[%s]", strna(four));
+ log_info("five=[%s]", strna(five));
+ log_info("six=[%s]", strna(six));
+ log_info("seven=[%s]", strna(seven));
+ log_info("eight=[%s]", strna(eight));
+ log_info("export nine=[%s]", strna(nine));
+ log_info("ten=[%s]", strna(nine));
+ log_info("eleven=[%s]", strna(eleven));
+ log_info("twelve=[%s]", strna(twelve));
+ log_info("thirteen=[%s]", strna(thirteen));
+
+ assert_se(streq(one, "BAR"));
+ assert_se(streq(two, "bar"));
+ assert_se(streq(three, "333\nxxxx"));
+ assert_se(streq(four, "44\\\"44"));
+ assert_se(streq(five, "55\"55FIVEcinco"));
+ assert_se(streq(six, "seis sechs sis"));
+ assert_se(streq(seven, "sevenval#nocomment"));
+ assert_se(streq(eight, "eightval #nocomment"));
+ assert_se(streq(nine, "nineval"));
+ assert_se(ten == NULL);
+ assert_se(streq(eleven, "value"));
+ assert_se(streq(twelve, "\\value"));
+ assert_se(streq(thirteen, "\\value"));
+
+ {
+ /* prepare a temporary file to write the environment to */
+ _cleanup_close_ int fd = mkostemp_safe(p);
+ assert_se(fd >= 0);
+ }
+
+ r = write_env_file(p, a);
+ assert_se(r >= 0);
+
+ r = load_env_file(NULL, p, &b);
+ assert_se(r >= 0);
+}
+
+static void test_parse_multiline_env_file(void) {
+ _cleanup_(unlink_tempfilep) char
+ t[] = "/tmp/test-fileio-in-XXXXXX",
+ p[] = "/tmp/test-fileio-out-XXXXXX";
+ FILE *f;
+ _cleanup_strv_free_ char **a = NULL, **b = NULL;
+ char **i;
+ int r;
+
+ assert_se(fmkostemp_safe(t, "w", &f) == 0);
+ fputs("one=BAR\\\n"
+ " VAR\\\n"
+ "\tGAR\n"
+ "#comment\n"
+ "two=\"bar\\\n"
+ " var\\\n"
+ "\tgar\"\n"
+ "#comment\n"
+ "tri=\"bar \\\n"
+ " var \\\n"
+ "\tgar \"\n", f);
+
+ fflush(f);
+ fclose(f);
+
+ r = load_env_file(NULL, t, &a);
+ assert_se(r >= 0);
+
+ STRV_FOREACH(i, a)
+ log_info("Got: <%s>", *i);
+
+ assert_se(streq_ptr(a[0], "one=BAR VAR\tGAR"));
+ assert_se(streq_ptr(a[1], "two=bar var\tgar"));
+ assert_se(streq_ptr(a[2], "tri=bar var \tgar "));
+ assert_se(a[3] == NULL);
+
+ {
+ _cleanup_close_ int fd = mkostemp_safe(p);
+ assert_se(fd >= 0);
+ }
+
+ r = write_env_file(p, a);
+ assert_se(r >= 0);
+
+ r = load_env_file(NULL, p, &b);
+ assert_se(r >= 0);
+}
+
+static void test_merge_env_file(void) {
+ _cleanup_(unlink_tempfilep) char t[] = "/tmp/test-fileio-XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **a = NULL;
+ char **i;
+ int r;
+
+ assert_se(fmkostemp_safe(t, "w", &f) == 0);
+ log_info("/* %s (%s) */", __func__, t);
+
+ r = write_string_stream(f,
+ "one=1 \n"
+ "twelve=${one}2\n"
+ "twentyone=2${one}\n"
+ "one=2\n"
+ "twentytwo=2${one}\n"
+ "xxx_minus_three=$xxx - 3\n"
+ "xxx=0x$one$one$one\n"
+ "yyy=${one:-fallback}\n"
+ "zzz=${one:+replacement}\n"
+ "zzzz=${foobar:-${nothing}}\n"
+ "zzzzz=${nothing:+${nothing}}\n"
+ , WRITE_STRING_FILE_AVOID_NEWLINE);
+ assert(r >= 0);
+
+ r = merge_env_file(&a, NULL, t);
+ assert_se(r >= 0);
+ strv_sort(a);
+
+ STRV_FOREACH(i, a)
+ log_info("Got: <%s>", *i);
+
+ assert_se(streq(a[0], "one=2"));
+ assert_se(streq(a[1], "twelve=12"));
+ assert_se(streq(a[2], "twentyone=21"));
+ assert_se(streq(a[3], "twentytwo=22"));
+ assert_se(streq(a[4], "xxx=0x222"));
+ assert_se(streq(a[5], "xxx_minus_three= - 3"));
+ assert_se(streq(a[6], "yyy=2"));
+ assert_se(streq(a[7], "zzz=replacement"));
+ assert_se(streq(a[8], "zzzz="));
+ assert_se(streq(a[9], "zzzzz="));
+ assert_se(a[10] == NULL);
+
+ r = merge_env_file(&a, NULL, t);
+ assert_se(r >= 0);
+ strv_sort(a);
+
+ STRV_FOREACH(i, a)
+ log_info("Got2: <%s>", *i);
+
+ assert_se(streq(a[0], "one=2"));
+ assert_se(streq(a[1], "twelve=12"));
+ assert_se(streq(a[2], "twentyone=21"));
+ assert_se(streq(a[3], "twentytwo=22"));
+ assert_se(streq(a[4], "xxx=0x222"));
+ assert_se(streq(a[5], "xxx_minus_three=0x222 - 3"));
+ assert_se(streq(a[6], "yyy=2"));
+ assert_se(streq(a[7], "zzz=replacement"));
+ assert_se(streq(a[8], "zzzz="));
+ assert_se(streq(a[9], "zzzzz="));
+ assert_se(a[10] == NULL);
+}
+
+static void test_merge_env_file_invalid(void) {
+ _cleanup_(unlink_tempfilep) char t[] = "/tmp/test-fileio-XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **a = NULL;
+ char **i;
+ int r;
+
+ assert_se(fmkostemp_safe(t, "w", &f) == 0);
+ log_info("/* %s (%s) */", __func__, t);
+
+ r = write_string_stream(f,
+ "unset one \n"
+ "unset one= \n"
+ "unset one=1 \n"
+ "one \n"
+ "one = \n"
+ "one two =\n"
+ "\x20two=\n"
+ "#comment=comment\n"
+ ";comment2=comment2\n"
+ "#\n"
+ "\n\n" /* empty line */
+ , WRITE_STRING_FILE_AVOID_NEWLINE);
+ assert(r >= 0);
+
+ r = merge_env_file(&a, NULL, t);
+ assert_se(r >= 0);
+
+ STRV_FOREACH(i, a)
+ log_info("Got: <%s>", *i);
+
+ assert_se(strv_isempty(a));
+}
+
+static void test_executable_is_script(void) {
+ _cleanup_(unlink_tempfilep) char t[] = "/tmp/test-fileio-XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ char *command;
+ int r;
+
+ assert_se(fmkostemp_safe(t, "w", &f) == 0);
+ fputs("#! /bin/script -a -b \ngoo goo", f);
+ fflush(f);
+
+ r = executable_is_script(t, &command);
+ assert_se(r > 0);
+ assert_se(streq(command, "/bin/script"));
+ free(command);
+
+ r = executable_is_script("/bin/sh", &command);
+ assert_se(r == 0);
+
+ r = executable_is_script("/usr/bin/yum", &command);
+ assert_se(r > 0 || r == -ENOENT);
+ if (r > 0) {
+ assert_se(startswith(command, "/"));
+ free(command);
+ }
+}
+
+static void test_status_field(void) {
+ _cleanup_free_ char *t = NULL, *p = NULL, *s = NULL, *z = NULL;
+ unsigned long long total = 0, buffers = 0;
+ int r;
+
+ assert_se(get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t) == 0);
+ puts(t);
+ assert_se(streq(t, "1"));
+
+ r = get_proc_field("/proc/meminfo", "MemTotal", WHITESPACE, &p);
+ if (r != -ENOENT) {
+ assert_se(r == 0);
+ puts(p);
+ assert_se(safe_atollu(p, &total) == 0);
+ }
+
+ r = get_proc_field("/proc/meminfo", "Buffers", WHITESPACE, &s);
+ if (r != -ENOENT) {
+ assert_se(r == 0);
+ puts(s);
+ assert_se(safe_atollu(s, &buffers) == 0);
+ }
+
+ if (p)
+ assert_se(buffers < total);
+
+ /* Seccomp should be a good test for field full of zeros. */
+ r = get_proc_field("/proc/meminfo", "Seccomp", WHITESPACE, &z);
+ if (r != -ENOENT) {
+ assert_se(r == 0);
+ puts(z);
+ assert_se(safe_atollu(z, &buffers) == 0);
+ }
+}
+
+static void test_capeff(void) {
+ int pid, p;
+
+ for (pid = 0; pid < 2; pid++) {
+ _cleanup_free_ char *capeff = NULL;
+ int r;
+
+ r = get_process_capeff(0, &capeff);
+ log_info("capeff: '%s' (r=%d)", capeff, r);
+
+ if (IN_SET(r, -ENOENT, -EPERM))
+ return;
+
+ assert_se(r == 0);
+ assert_se(*capeff);
+ p = capeff[strspn(capeff, HEXDIGITS)];
+ assert_se(!p || isspace(p));
+ }
+}
+
+static void test_write_string_stream(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-write_string_stream-XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ int fd;
+ char buf[64];
+
+ fd = mkostemp_safe(fn);
+ assert_se(fd >= 0);
+
+ f = fdopen(fd, "r");
+ assert_se(f);
+ assert_se(write_string_stream(f, "boohoo", 0) < 0);
+ f = safe_fclose(f);
+
+ f = fopen(fn, "r+");
+ assert_se(f);
+
+ assert_se(write_string_stream(f, "boohoo", 0) == 0);
+ rewind(f);
+
+ assert_se(fgets(buf, sizeof(buf), f));
+ assert_se(streq(buf, "boohoo\n"));
+ f = safe_fclose(f);
+
+ f = fopen(fn, "w+");
+ assert_se(f);
+
+ assert_se(write_string_stream(f, "boohoo", WRITE_STRING_FILE_AVOID_NEWLINE) == 0);
+ rewind(f);
+
+ assert_se(fgets(buf, sizeof(buf), f));
+ printf(">%s<", buf);
+ assert_se(streq(buf, "boohoo"));
+}
+
+static void test_write_string_file(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-write_string_file-XXXXXX";
+ char buf[64] = {};
+ _cleanup_close_ int fd;
+
+ fd = mkostemp_safe(fn);
+ assert_se(fd >= 0);
+
+ assert_se(write_string_file(fn, "boohoo", WRITE_STRING_FILE_CREATE) == 0);
+
+ assert_se(read(fd, buf, sizeof(buf)) == 7);
+ assert_se(streq(buf, "boohoo\n"));
+}
+
+static void test_write_string_file_no_create(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-write_string_file_no_create-XXXXXX";
+ _cleanup_close_ int fd;
+ char buf[64] = {0};
+
+ fd = mkostemp_safe(fn);
+ assert_se(fd >= 0);
+
+ assert_se(write_string_file("/a/file/which/does/not/exists/i/guess", "boohoo", 0) < 0);
+ assert_se(write_string_file(fn, "boohoo", 0) == 0);
+
+ assert_se(read(fd, buf, sizeof(buf)) == STRLEN("boohoo\n"));
+ assert_se(streq(buf, "boohoo\n"));
+}
+
+static void test_write_string_file_verify(void) {
+ _cleanup_free_ char *buf = NULL, *buf2 = NULL;
+ int r;
+
+ assert_se(read_one_line_file("/proc/cmdline", &buf) >= 0);
+ assert_se(buf2 = strjoin(buf, "\n"));
+
+ r = write_string_file("/proc/cmdline", buf, 0);
+ assert_se(IN_SET(r, -EACCES, -EIO));
+ r = write_string_file("/proc/cmdline", buf2, 0);
+ assert_se(IN_SET(r, -EACCES, -EIO));
+
+ assert_se(write_string_file("/proc/cmdline", buf, WRITE_STRING_FILE_VERIFY_ON_FAILURE) == 0);
+ assert_se(write_string_file("/proc/cmdline", buf2, WRITE_STRING_FILE_VERIFY_ON_FAILURE) == 0);
+
+ r = write_string_file("/proc/cmdline", buf, WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_AVOID_NEWLINE);
+ assert_se(IN_SET(r, -EACCES, -EIO));
+ assert_se(write_string_file("/proc/cmdline", buf2, WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_AVOID_NEWLINE) == 0);
+}
+
+static void test_load_env_file_pairs(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-load_env_file_pairs-XXXXXX";
+ int fd, r;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ char **k, **v;
+
+ fd = mkostemp_safe(fn);
+ assert_se(fd >= 0);
+
+ r = write_string_file(fn,
+ "NAME=\"Arch Linux\"\n"
+ "ID=arch\n"
+ "PRETTY_NAME=\"Arch Linux\"\n"
+ "ANSI_COLOR=\"0;36\"\n"
+ "HOME_URL=\"https://www.archlinux.org/\"\n"
+ "SUPPORT_URL=\"https://bbs.archlinux.org/\"\n"
+ "BUG_REPORT_URL=\"https://bugs.archlinux.org/\"\n",
+ WRITE_STRING_FILE_CREATE);
+ assert_se(r == 0);
+
+ f = fdopen(fd, "r");
+ assert_se(f);
+
+ r = load_env_file_pairs(f, fn, &l);
+ assert_se(r >= 0);
+
+ assert_se(strv_length(l) == 14);
+ STRV_FOREACH_PAIR(k, v, l) {
+ assert_se(STR_IN_SET(*k, "NAME", "ID", "PRETTY_NAME", "ANSI_COLOR", "HOME_URL", "SUPPORT_URL", "BUG_REPORT_URL"));
+ printf("%s=%s\n", *k, *v);
+ if (streq(*k, "NAME")) assert_se(streq(*v, "Arch Linux"));
+ if (streq(*k, "ID")) assert_se(streq(*v, "arch"));
+ if (streq(*k, "PRETTY_NAME")) assert_se(streq(*v, "Arch Linux"));
+ if (streq(*k, "ANSI_COLOR")) assert_se(streq(*v, "0;36"));
+ if (streq(*k, "HOME_URL")) assert_se(streq(*v, "https://www.archlinux.org/"));
+ if (streq(*k, "SUPPORT_URL")) assert_se(streq(*v, "https://bbs.archlinux.org/"));
+ if (streq(*k, "BUG_REPORT_URL")) assert_se(streq(*v, "https://bugs.archlinux.org/"));
+ }
+}
+
+static void test_search_and_fopen(void) {
+ const char *dirs[] = {"/tmp/foo/bar", "/tmp", NULL};
+
+ char name[] = "/tmp/test-search_and_fopen.XXXXXX";
+ int fd, r;
+ FILE *f;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ close(fd);
+
+ r = search_and_fopen(basename(name), "r", NULL, dirs, &f);
+ assert_se(r >= 0);
+ fclose(f);
+
+ r = search_and_fopen(name, "r", NULL, dirs, &f);
+ assert_se(r >= 0);
+ fclose(f);
+
+ r = search_and_fopen(basename(name), "r", "/", dirs, &f);
+ assert_se(r >= 0);
+ fclose(f);
+
+ r = search_and_fopen("/a/file/which/does/not/exist/i/guess", "r", NULL, dirs, &f);
+ assert_se(r < 0);
+ r = search_and_fopen("afilewhichdoesnotexistiguess", "r", NULL, dirs, &f);
+ assert_se(r < 0);
+
+ r = unlink(name);
+ assert_se(r == 0);
+
+ r = search_and_fopen(basename(name), "r", NULL, dirs, &f);
+ assert_se(r < 0);
+}
+
+static void test_search_and_fopen_nulstr(void) {
+ const char dirs[] = "/tmp/foo/bar\0/tmp\0";
+
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-search_and_fopen.XXXXXX";
+ int fd, r;
+ FILE *f;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ close(fd);
+
+ r = search_and_fopen_nulstr(basename(name), "r", NULL, dirs, &f);
+ assert_se(r >= 0);
+ fclose(f);
+
+ r = search_and_fopen_nulstr(name, "r", NULL, dirs, &f);
+ assert_se(r >= 0);
+ fclose(f);
+
+ r = search_and_fopen_nulstr("/a/file/which/does/not/exist/i/guess", "r", NULL, dirs, &f);
+ assert_se(r < 0);
+ r = search_and_fopen_nulstr("afilewhichdoesnotexistiguess", "r", NULL, dirs, &f);
+ assert_se(r < 0);
+
+ r = unlink(name);
+ assert_se(r == 0);
+
+ r = search_and_fopen_nulstr(basename(name), "r", NULL, dirs, &f);
+ assert_se(r < 0);
+}
+
+static void test_writing_tmpfile(void) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-systemd_writing_tmpfile.XXXXXX";
+ _cleanup_free_ char *contents = NULL;
+ size_t size;
+ _cleanup_close_ int fd = -1;
+ struct iovec iov[3];
+ int r;
+
+ iov[0] = IOVEC_MAKE_STRING("abc\n");
+ iov[1] = IOVEC_MAKE_STRING(ALPHANUMERICAL "\n");
+ iov[2] = IOVEC_MAKE_STRING("");
+
+ fd = mkostemp_safe(name);
+ printf("tmpfile: %s", name);
+
+ r = writev(fd, iov, 3);
+ assert_se(r >= 0);
+
+ r = read_full_file(name, &contents, &size);
+ assert_se(r == 0);
+ printf("contents: %s", contents);
+ assert_se(streq(contents, "abc\n" ALPHANUMERICAL "\n"));
+}
+
+static void test_tempfn(void) {
+ char *ret = NULL, *p;
+
+ assert_se(tempfn_xxxxxx("/foo/bar/waldo", NULL, &ret) >= 0);
+ assert_se(streq_ptr(ret, "/foo/bar/.#waldoXXXXXX"));
+ free(ret);
+
+ assert_se(tempfn_xxxxxx("/foo/bar/waldo", "[miau]", &ret) >= 0);
+ assert_se(streq_ptr(ret, "/foo/bar/.#[miau]waldoXXXXXX"));
+ free(ret);
+
+ assert_se(tempfn_random("/foo/bar/waldo", NULL, &ret) >= 0);
+ assert_se(p = startswith(ret, "/foo/bar/.#waldo"));
+ assert_se(strlen(p) == 16);
+ assert_se(in_charset(p, "0123456789abcdef"));
+ free(ret);
+
+ assert_se(tempfn_random("/foo/bar/waldo", "[wuff]", &ret) >= 0);
+ assert_se(p = startswith(ret, "/foo/bar/.#[wuff]waldo"));
+ assert_se(strlen(p) == 16);
+ assert_se(in_charset(p, "0123456789abcdef"));
+ free(ret);
+
+ assert_se(tempfn_random_child("/foo/bar/waldo", NULL, &ret) >= 0);
+ assert_se(p = startswith(ret, "/foo/bar/waldo/.#"));
+ assert_se(strlen(p) == 16);
+ assert_se(in_charset(p, "0123456789abcdef"));
+ free(ret);
+
+ assert_se(tempfn_random_child("/foo/bar/waldo", "[kikiriki]", &ret) >= 0);
+ assert_se(p = startswith(ret, "/foo/bar/waldo/.#[kikiriki]"));
+ assert_se(strlen(p) == 16);
+ assert_se(in_charset(p, "0123456789abcdef"));
+ free(ret);
+}
+
+static const char chars[] =
+ "Aąę„”\n루\377";
+
+static void test_fgetc(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char c;
+
+ f = fmemopen((void*) chars, sizeof(chars), "re");
+ assert_se(f);
+
+ for (unsigned i = 0; i < sizeof(chars); i++) {
+ assert_se(safe_fgetc(f, &c) == 1);
+ assert_se(c == chars[i]);
+
+ /* EOF is -1, and hence we can't push value 255 in this way */
+ assert_se(ungetc(c, f) != EOF || c == EOF);
+ assert_se(c == EOF || safe_fgetc(f, &c) == 1);
+ assert_se(c == chars[i]);
+
+ /* But it works when we push it properly cast */
+ assert_se(ungetc((unsigned char) c, f) != EOF);
+ assert_se(safe_fgetc(f, &c) == 1);
+ assert_se(c == chars[i]);
+ }
+
+ assert_se(safe_fgetc(f, &c) == 0);
+}
+
+static const char buffer[] =
+ "Some test data\n"
+ "루Non-ascii chars: ąę„”\n"
+ "terminators\r\n"
+ "and even more\n\r"
+ "now the same with a NUL\n\0"
+ "and more\r\0"
+ "and even more\r\n\0"
+ "and yet even more\n\r\0"
+ "With newlines, and a NUL byte\0"
+ "\n"
+ "an empty line\n"
+ "an ignored line\n"
+ "and a very long line that is supposed to be truncated, because it is so long\n";
+
+static void test_read_line_one_file(FILE *f) {
+ _cleanup_free_ char *line = NULL;
+
+ assert_se(read_line(f, (size_t) -1, &line) == 15 && streq(line, "Some test data"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) > 0 && streq(line, "루Non-ascii chars: ąę„”"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) == 13 && streq(line, "terminators"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) == 15 && streq(line, "and even more"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) == 25 && streq(line, "now the same with a NUL"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) == 10 && streq(line, "and more"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) == 16 && streq(line, "and even more"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) == 20 && streq(line, "and yet even more"));
+ line = mfree(line);
+
+ assert_se(read_line(f, 1024, &line) == 30 && streq(line, "With newlines, and a NUL byte"));
+ line = mfree(line);
+
+ assert_se(read_line(f, 1024, &line) == 1 && streq(line, ""));
+ line = mfree(line);
+
+ assert_se(read_line(f, 1024, &line) == 14 && streq(line, "an empty line"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, NULL) == 16);
+
+ assert_se(read_line(f, 16, &line) == -ENOBUFS);
+ line = mfree(line);
+
+ /* read_line() stopped when it hit the limit, that means when we continue reading we'll read at the first
+ * character after the previous limit. Let's make use of tha to continue our test. */
+ assert_se(read_line(f, 1024, &line) == 62 && streq(line, "line that is supposed to be truncated, because it is so long"));
+ line = mfree(line);
+
+ assert_se(read_line(f, 1024, &line) == 0 && streq(line, ""));
+}
+
+static void test_read_line(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ f = fmemopen((void*) buffer, sizeof(buffer), "re");
+ assert_se(f);
+
+ test_read_line_one_file(f);
+}
+
+static void test_read_line2(void) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-fileio.XXXXXX";
+ int fd;
+ _cleanup_fclose_ FILE *f = NULL;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se((size_t) write(fd, buffer, sizeof(buffer)) == sizeof(buffer));
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(f = fdopen(fd, "r"));
+
+ test_read_line_one_file(f);
+}
+
+static void test_read_line3(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ f = fopen("/proc/cmdline", "re");
+ if (!f && IN_SET(errno, ENOENT, EPERM))
+ return;
+ assert_se(f);
+
+ r = read_line(f, LINE_MAX, &line);
+ assert_se((size_t) r == strlen(line) + 1);
+ assert_se(read_line(f, LINE_MAX, NULL) == 0);
+}
+
+static void test_read_line4(void) {
+ static const struct {
+ size_t length;
+ const char *string;
+ } eof_endings[] = {
+ /* Each of these will be followed by EOF and should generate the one same single string */
+ { 3, "foo" },
+ { 4, "foo\n" },
+ { 4, "foo\r" },
+ { 4, "foo\0" },
+ { 5, "foo\n\0" },
+ { 5, "foo\r\0" },
+ { 5, "foo\r\n" },
+ { 5, "foo\n\r" },
+ { 6, "foo\r\n\0" },
+ { 6, "foo\n\r\0" },
+ };
+
+ size_t i;
+ int r;
+
+ for (i = 0; i < ELEMENTSOF(eof_endings); i++) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *s = NULL;
+
+ assert_se(f = fmemopen((void*) eof_endings[i].string, eof_endings[i].length, "r"));
+
+ r = read_line(f, (size_t) -1, &s);
+ assert_se((size_t) r == eof_endings[i].length);
+ assert_se(streq_ptr(s, "foo"));
+
+ assert_se(read_line(f, (size_t) -1, NULL) == 0); /* Ensure we hit EOF */
+ }
+}
+
+static void test_read_nul_string(void) {
+ static const char test[] = "string nr. 1\0"
+ "string nr. 2\n\0"
+ "\377empty string follows\0"
+ "\0"
+ "final string\n is empty\0"
+ "\0";
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *s = NULL;
+
+ assert_se(f = fmemopen((void*) test, sizeof(test)-1, "r"));
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 13 && streq_ptr(s, "string nr. 1"));
+ s = mfree(s);
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 14 && streq_ptr(s, "string nr. 2\n"));
+ s = mfree(s);
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 22 && streq_ptr(s, "\377empty string follows"));
+ s = mfree(s);
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 1 && streq_ptr(s, ""));
+ s = mfree(s);
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 23 && streq_ptr(s, "final string\n is empty"));
+ s = mfree(s);
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 1 && streq_ptr(s, ""));
+ s = mfree(s);
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 0 && streq_ptr(s, ""));
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_parse_env_file();
+ test_parse_multiline_env_file();
+ test_merge_env_file();
+ test_merge_env_file_invalid();
+ test_executable_is_script();
+ test_status_field();
+ test_capeff();
+ test_write_string_stream();
+ test_write_string_file();
+ test_write_string_file_no_create();
+ test_write_string_file_verify();
+ test_load_env_file_pairs();
+ test_search_and_fopen();
+ test_search_and_fopen_nulstr();
+ test_writing_tmpfile();
+ test_tempfn();
+ test_fgetc();
+ test_read_line();
+ test_read_line2();
+ test_read_line3();
+ test_read_line4();
+ test_read_nul_string();
+
+ return 0;
+}
diff --git a/src/test/test-firewall-util.c b/src/test/test-firewall-util.c
new file mode 100644
index 0000000..1788e8d
--- /dev/null
+++ b/src/test/test-firewall-util.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "firewall-util.h"
+#include "log.h"
+#include "tests.h"
+
+#define MAKE_IN_ADDR_UNION(a,b,c,d) (union in_addr_union) { .in.s_addr = htobe32((uint32_t) (a) << 24 | (uint32_t) (b) << 16 | (uint32_t) (c) << 8 | (uint32_t) (d))}
+
+int main(int argc, char *argv[]) {
+ int r;
+ test_setup_logging(LOG_DEBUG);
+
+ r = fw_add_masquerade(true, AF_INET, 0, NULL, 0, "foobar", NULL, 0);
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ r = fw_add_masquerade(true, AF_INET, 0, NULL, 0, "foobar", NULL, 0);
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ r = fw_add_masquerade(false, AF_INET, 0, NULL, 0, "foobar", NULL, 0);
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ r = fw_add_local_dnat(true, AF_INET, IPPROTO_TCP, NULL, NULL, 0, NULL, 0, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 4), 815, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ r = fw_add_local_dnat(true, AF_INET, IPPROTO_TCP, NULL, NULL, 0, NULL, 0, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 4), 815, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ r = fw_add_local_dnat(true, AF_INET, IPPROTO_TCP, NULL, NULL, 0, NULL, 0, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 5), 815, &MAKE_IN_ADDR_UNION(1, 2, 3, 4));
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ r = fw_add_local_dnat(false, AF_INET, IPPROTO_TCP, NULL, NULL, 0, NULL, 0, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 5), 815, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ return 0;
+}
diff --git a/src/test/test-format-table.c b/src/test/test-format-table.c
new file mode 100644
index 0000000..5bede5c
--- /dev/null
+++ b/src/test/test-format-table.c
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "format-table.h"
+#include "string-util.h"
+#include "time-util.h"
+
+static void test_issue_9549(void) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ _cleanup_free_ char *formatted = NULL;
+
+ assert_se(table = table_new("name", "type", "ro", "usage", "created", "modified"));
+ assert_se(table_set_align_percent(table, TABLE_HEADER_CELL(3), 100) >= 0);
+ assert_se(table_add_many(table,
+ TABLE_STRING, "foooo",
+ TABLE_STRING, "raw",
+ TABLE_BOOLEAN, false,
+ TABLE_SIZE, (uint64_t) (673.7*1024*1024),
+ TABLE_STRING, "Wed 2018-07-11 00:10:33 JST",
+ TABLE_STRING, "Wed 2018-07-11 00:16:00 JST") >= 0);
+
+ table_set_width(table, 75);
+ assert_se(table_format(table, &formatted) >= 0);
+
+ printf("%s\n", formatted);
+ assert_se(streq(formatted,
+ "NAME TYPE RO USAGE CREATED MODIFIED \n"
+ "foooo raw no 673.6M Wed 2018-07-11 00:10:33 J… Wed 2018-07-11 00:16:00 JST\n"
+ ));
+}
+
+int main(int argc, char *argv[]) {
+
+ _cleanup_(table_unrefp) Table *t = NULL;
+ _cleanup_free_ char *formatted = NULL;
+
+ assert_se(setenv("COLUMNS", "40", 1) >= 0);
+
+ assert_se(t = table_new("one", "two", "three"));
+
+ assert_se(table_set_align_percent(t, TABLE_HEADER_CELL(2), 100) >= 0);
+
+ assert_se(table_add_many(t,
+ TABLE_STRING, "xxx",
+ TABLE_STRING, "yyy",
+ TABLE_BOOLEAN, true) >= 0);
+
+ assert_se(table_add_many(t,
+ TABLE_STRING, "a long field",
+ TABLE_STRING, "yyy",
+ TABLE_BOOLEAN, false) >= 0);
+
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "ONE TWO THREE\n"
+ "xxx yyy yes\n"
+ "a long field yyy no\n"));
+
+ formatted = mfree(formatted);
+
+ table_set_width(t, 40);
+
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "ONE TWO THREE\n"
+ "xxx yyy yes\n"
+ "a long field yyy no\n"));
+
+ formatted = mfree(formatted);
+
+ table_set_width(t, 12);
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "ONE TWO THR…\n"
+ "xxx yyy yes\n"
+ "a … yyy no\n"));
+
+ formatted = mfree(formatted);
+
+ table_set_width(t, 5);
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "… … …\n"
+ "… … …\n"
+ "… … …\n"));
+
+ formatted = mfree(formatted);
+
+ table_set_width(t, 3);
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "… … …\n"
+ "… … …\n"
+ "… … …\n"));
+
+ formatted = mfree(formatted);
+
+ table_set_width(t, (size_t) -1);
+ assert_se(table_set_sort(t, (size_t) 0, (size_t) 2, (size_t) -1) >= 0);
+
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "ONE TWO THREE\n"
+ "a long field yyy no\n"
+ "xxx yyy yes\n"));
+
+ formatted = mfree(formatted);
+
+ table_set_header(t, false);
+
+ assert_se(table_add_many(t,
+ TABLE_STRING, "fäää",
+ TABLE_STRING, "uuu",
+ TABLE_BOOLEAN, true) >= 0);
+
+ assert_se(table_add_many(t,
+ TABLE_STRING, "fäää",
+ TABLE_STRING, "zzz",
+ TABLE_BOOLEAN, false) >= 0);
+
+ assert_se(table_add_many(t,
+ TABLE_EMPTY,
+ TABLE_SIZE, (uint64_t) 4711,
+ TABLE_TIMESPAN, (usec_t) 5*USEC_PER_MINUTE) >= 0);
+
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "a long field yyy no\n"
+ "fäää zzz no\n"
+ "fäää uuu yes\n"
+ "xxx yyy yes\n"
+ " 4.6K 5min\n"));
+
+ formatted = mfree(formatted);
+
+ assert_se(table_set_display(t, (size_t) 2, (size_t) 0, (size_t) 2, (size_t) 0, (size_t) 0, (size_t) -1) >= 0);
+
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ " no a long f… no a long f… a long fi…\n"
+ " no fäää no fäää fäää \n"
+ " yes fäää yes fäää fäää \n"
+ " yes xxx yes xxx xxx \n"
+ "5min 5min \n"));
+
+ test_issue_9549();
+
+ return 0;
+}
diff --git a/src/test/test-fs-util.c b/src/test/test-fs-util.c
new file mode 100644
index 0000000..e049abc
--- /dev/null
+++ b/src/test/test-fs-util.c
@@ -0,0 +1,743 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "id128-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+static const char *arg_test_dir = NULL;
+
+static void test_chase_symlinks(void) {
+ _cleanup_free_ char *result = NULL;
+ char *temp;
+ const char *top, *p, *pslash, *q, *qslash;
+ struct stat st;
+ int r, pfd;
+
+ log_info("/* %s */", __func__);
+
+ temp = strjoina(arg_test_dir ?: "/tmp", "/test-chase.XXXXXX");
+ assert_se(mkdtemp(temp));
+
+ top = strjoina(temp, "/top");
+ assert_se(mkdir(top, 0700) >= 0);
+
+ p = strjoina(top, "/dot");
+ if (symlink(".", p) < 0) {
+ assert_se(IN_SET(errno, EINVAL, ENOSYS, ENOTTY, EPERM));
+ log_tests_skipped_errno(errno, "symlink() not possible");
+ goto cleanup;
+ };
+
+ p = strjoina(top, "/dotdot");
+ assert_se(symlink("..", p) >= 0);
+
+ p = strjoina(top, "/dotdota");
+ assert_se(symlink("../a", p) >= 0);
+
+ p = strjoina(temp, "/a");
+ assert_se(symlink("b", p) >= 0);
+
+ p = strjoina(temp, "/b");
+ assert_se(symlink("/usr", p) >= 0);
+
+ p = strjoina(temp, "/start");
+ assert_se(symlink("top/dot/dotdota", p) >= 0);
+
+ /* Paths that use symlinks underneath the "root" */
+
+ r = chase_symlinks(p, NULL, 0, &result);
+ assert_se(r > 0);
+ assert_se(path_equal(result, "/usr"));
+ result = mfree(result);
+
+ pslash = strjoina(p, "/");
+ r = chase_symlinks(pslash, NULL, 0, &result);
+ assert_se(r > 0);
+ assert_se(path_equal(result, "/usr/"));
+ result = mfree(result);
+
+ r = chase_symlinks(p, temp, 0, &result);
+ assert_se(r == -ENOENT);
+
+ r = chase_symlinks(pslash, temp, 0, &result);
+ assert_se(r == -ENOENT);
+
+ q = strjoina(temp, "/usr");
+
+ r = chase_symlinks(p, temp, CHASE_NONEXISTENT, &result);
+ assert_se(r == 0);
+ assert_se(path_equal(result, q));
+ result = mfree(result);
+
+ qslash = strjoina(q, "/");
+
+ r = chase_symlinks(pslash, temp, CHASE_NONEXISTENT, &result);
+ assert_se(r == 0);
+ assert_se(path_equal(result, qslash));
+ result = mfree(result);
+
+ assert_se(mkdir(q, 0700) >= 0);
+
+ r = chase_symlinks(p, temp, 0, &result);
+ assert_se(r > 0);
+ assert_se(path_equal(result, q));
+ result = mfree(result);
+
+ r = chase_symlinks(pslash, temp, 0, &result);
+ assert_se(r > 0);
+ assert_se(path_equal(result, qslash));
+ result = mfree(result);
+
+ p = strjoina(temp, "/slash");
+ assert_se(symlink("/", p) >= 0);
+
+ r = chase_symlinks(p, NULL, 0, &result);
+ assert_se(r > 0);
+ assert_se(path_equal(result, "/"));
+ result = mfree(result);
+
+ r = chase_symlinks(p, temp, 0, &result);
+ assert_se(r > 0);
+ assert_se(path_equal(result, temp));
+ result = mfree(result);
+
+ /* Paths that would "escape" outside of the "root" */
+
+ p = strjoina(temp, "/6dots");
+ assert_se(symlink("../../..", p) >= 0);
+
+ r = chase_symlinks(p, temp, 0, &result);
+ assert_se(r > 0 && path_equal(result, temp));
+ result = mfree(result);
+
+ p = strjoina(temp, "/6dotsusr");
+ assert_se(symlink("../../../usr", p) >= 0);
+
+ r = chase_symlinks(p, temp, 0, &result);
+ assert_se(r > 0 && path_equal(result, q));
+ result = mfree(result);
+
+ p = strjoina(temp, "/top/8dotsusr");
+ assert_se(symlink("../../../../usr", p) >= 0);
+
+ r = chase_symlinks(p, temp, 0, &result);
+ assert_se(r > 0 && path_equal(result, q));
+ result = mfree(result);
+
+ /* Paths that contain repeated slashes */
+
+ p = strjoina(temp, "/slashslash");
+ assert_se(symlink("///usr///", p) >= 0);
+
+ r = chase_symlinks(p, NULL, 0, &result);
+ assert_se(r > 0);
+ assert_se(path_equal(result, "/usr"));
+ result = mfree(result);
+
+ r = chase_symlinks(p, temp, 0, &result);
+ assert_se(r > 0);
+ assert_se(path_equal(result, q));
+ result = mfree(result);
+
+ /* Paths using . */
+
+ r = chase_symlinks("/etc/./.././", NULL, 0, &result);
+ assert_se(r > 0);
+ assert_se(path_equal(result, "/"));
+ result = mfree(result);
+
+ r = chase_symlinks("/etc/./.././", "/etc", 0, &result);
+ assert_se(r > 0 && path_equal(result, "/etc"));
+ result = mfree(result);
+
+ r = chase_symlinks("/../.././//../../etc", NULL, 0, &result);
+ assert_se(r > 0);
+ assert_se(streq(result, "/etc"));
+ result = mfree(result);
+
+ r = chase_symlinks("/../.././//../../test-chase.fsldajfl", NULL, CHASE_NONEXISTENT, &result);
+ assert_se(r == 0);
+ assert_se(streq(result, "/test-chase.fsldajfl"));
+ result = mfree(result);
+
+ r = chase_symlinks("/../.././//../../etc", "/", CHASE_PREFIX_ROOT, &result);
+ assert_se(r > 0);
+ assert_se(streq(result, "/etc"));
+ result = mfree(result);
+
+ r = chase_symlinks("/../.././//../../test-chase.fsldajfl", "/", CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &result);
+ assert_se(r == 0);
+ assert_se(streq(result, "/test-chase.fsldajfl"));
+ result = mfree(result);
+
+ r = chase_symlinks("/etc/machine-id/foo", NULL, 0, &result);
+ assert_se(r == -ENOTDIR);
+ result = mfree(result);
+
+ /* Path that loops back to self */
+
+ p = strjoina(temp, "/recursive-symlink");
+ assert_se(symlink("recursive-symlink", p) >= 0);
+ r = chase_symlinks(p, NULL, 0, &result);
+ assert_se(r == -ELOOP);
+
+ /* Path which doesn't exist */
+
+ p = strjoina(temp, "/idontexist");
+ r = chase_symlinks(p, NULL, 0, &result);
+ assert_se(r == -ENOENT);
+
+ r = chase_symlinks(p, NULL, CHASE_NONEXISTENT, &result);
+ assert_se(r == 0);
+ assert_se(path_equal(result, p));
+ result = mfree(result);
+
+ p = strjoina(temp, "/idontexist/meneither");
+ r = chase_symlinks(p, NULL, 0, &result);
+ assert_se(r == -ENOENT);
+
+ r = chase_symlinks(p, NULL, CHASE_NONEXISTENT, &result);
+ assert_se(r == 0);
+ assert_se(path_equal(result, p));
+ result = mfree(result);
+
+ /* Path which doesn't exist, but contains weird stuff */
+
+ p = strjoina(temp, "/idontexist/..");
+ r = chase_symlinks(p, NULL, 0, &result);
+ assert_se(r == -ENOENT);
+
+ r = chase_symlinks(p, NULL, CHASE_NONEXISTENT, &result);
+ assert_se(r == -ENOENT);
+
+ p = strjoina(temp, "/target");
+ q = strjoina(temp, "/top");
+ assert_se(symlink(q, p) >= 0);
+ p = strjoina(temp, "/target/idontexist");
+ r = chase_symlinks(p, NULL, 0, &result);
+ assert_se(r == -ENOENT);
+
+ if (geteuid() == 0) {
+ p = strjoina(temp, "/priv1");
+ assert_se(mkdir(p, 0755) >= 0);
+
+ q = strjoina(p, "/priv2");
+ assert_se(mkdir(q, 0755) >= 0);
+
+ assert_se(chase_symlinks(q, NULL, CHASE_SAFE, NULL) >= 0);
+
+ assert_se(chown(q, UID_NOBODY, GID_NOBODY) >= 0);
+ assert_se(chase_symlinks(q, NULL, CHASE_SAFE, NULL) >= 0);
+
+ assert_se(chown(p, UID_NOBODY, GID_NOBODY) >= 0);
+ assert_se(chase_symlinks(q, NULL, CHASE_SAFE, NULL) >= 0);
+
+ assert_se(chown(q, 0, 0) >= 0);
+ assert_se(chase_symlinks(q, NULL, CHASE_SAFE, NULL) == -ENOLINK);
+
+ assert_se(rmdir(q) >= 0);
+ assert_se(symlink("/etc/passwd", q) >= 0);
+ assert_se(chase_symlinks(q, NULL, CHASE_SAFE, NULL) == -ENOLINK);
+
+ assert_se(chown(p, 0, 0) >= 0);
+ assert_se(chase_symlinks(q, NULL, CHASE_SAFE, NULL) >= 0);
+ }
+
+ p = strjoina(temp, "/machine-id-test");
+ assert_se(symlink("/usr/../etc/./machine-id", p) >= 0);
+
+ pfd = chase_symlinks(p, NULL, CHASE_OPEN, NULL);
+ if (pfd != -ENOENT) {
+ _cleanup_close_ int fd = -1;
+ sd_id128_t a, b;
+
+ assert_se(pfd >= 0);
+
+ fd = fd_reopen(pfd, O_RDONLY|O_CLOEXEC);
+ assert_se(fd >= 0);
+ safe_close(pfd);
+
+ assert_se(id128_read_fd(fd, ID128_PLAIN, &a) >= 0);
+ assert_se(sd_id128_get_machine(&b) >= 0);
+ assert_se(sd_id128_equal(a, b));
+ }
+
+ /* Test CHASE_NOFOLLOW */
+
+ p = strjoina(temp, "/target");
+ q = strjoina(temp, "/symlink");
+ assert_se(symlink(p, q) >= 0);
+ pfd = chase_symlinks(q, NULL, CHASE_OPEN|CHASE_NOFOLLOW, &result);
+ assert_se(pfd > 0);
+ assert_se(path_equal(result, q));
+ assert_se(fstat(pfd, &st) >= 0);
+ assert_se(S_ISLNK(st.st_mode));
+ result = mfree(result);
+
+ /* s1 -> s2 -> nonexistent */
+ q = strjoina(temp, "/s1");
+ assert_se(symlink("s2", q) >= 0);
+ p = strjoina(temp, "/s2");
+ assert_se(symlink("nonexistent", p) >= 0);
+ pfd = chase_symlinks(q, NULL, CHASE_OPEN|CHASE_NOFOLLOW, &result);
+ assert_se(pfd > 0);
+ assert_se(path_equal(result, q));
+ assert_se(fstat(pfd, &st) >= 0);
+ assert_se(S_ISLNK(st.st_mode));
+ result = mfree(result);
+
+ /* Test CHASE_ONE */
+
+ p = strjoina(temp, "/start");
+ r = chase_symlinks(p, NULL, CHASE_STEP, &result);
+ assert_se(r == 0);
+ p = strjoina(temp, "/top/dot/dotdota");
+ assert_se(streq(p, result));
+ result = mfree(result);
+
+ r = chase_symlinks(p, NULL, CHASE_STEP, &result);
+ assert_se(r == 0);
+ p = strjoina(temp, "/top/./dotdota");
+ assert_se(streq(p, result));
+ result = mfree(result);
+
+ r = chase_symlinks(p, NULL, CHASE_STEP, &result);
+ assert_se(r == 0);
+ p = strjoina(temp, "/top/../a");
+ assert_se(streq(p, result));
+ result = mfree(result);
+
+ r = chase_symlinks(p, NULL, CHASE_STEP, &result);
+ assert_se(r == 0);
+ p = strjoina(temp, "/a");
+ assert_se(streq(p, result));
+ result = mfree(result);
+
+ r = chase_symlinks(p, NULL, CHASE_STEP, &result);
+ assert_se(r == 0);
+ p = strjoina(temp, "/b");
+ assert_se(streq(p, result));
+ result = mfree(result);
+
+ r = chase_symlinks(p, NULL, CHASE_STEP, &result);
+ assert_se(r == 0);
+ assert_se(streq("/usr", result));
+ result = mfree(result);
+
+ r = chase_symlinks("/usr", NULL, CHASE_STEP, &result);
+ assert_se(r > 0);
+ assert_se(streq("/usr", result));
+ result = mfree(result);
+
+ cleanup:
+ assert_se(rm_rf(temp, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
+static void test_unlink_noerrno(void) {
+ char *name;
+ int fd;
+
+ log_info("/* %s */", __func__);
+
+ name = strjoina(arg_test_dir ?: "/tmp", "/test-close_nointr.XXXXXX");
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(close_nointr(fd) >= 0);
+
+ {
+ PROTECT_ERRNO;
+ errno = 42;
+ assert_se(unlink_noerrno(name) >= 0);
+ assert_se(errno == 42);
+ assert_se(unlink_noerrno(name) < 0);
+ assert_se(errno == 42);
+ }
+}
+
+static void test_readlink_and_make_absolute(void) {
+ const char *tempdir, *name, *name2, *name_alias;
+ _cleanup_free_ char *r1 = NULL, *r2 = NULL, *pwd = NULL;
+
+ log_info("/* %s */", __func__);
+
+ tempdir = strjoina(arg_test_dir ?: "/tmp", "/test-readlink_and_make_absolute");
+ name = strjoina(tempdir, "/original");
+ name2 = "test-readlink_and_make_absolute/original";
+ name_alias = strjoina(arg_test_dir ?: "/tmp", "/test-readlink_and_make_absolute-alias");
+
+ assert_se(mkdir_safe(tempdir, 0755, getuid(), getgid(), MKDIR_WARN_MODE) >= 0);
+ assert_se(touch(name) >= 0);
+
+ if (symlink(name, name_alias) < 0) {
+ assert_se(IN_SET(errno, EINVAL, ENOSYS, ENOTTY, EPERM));
+ log_tests_skipped_errno(errno, "symlink() not possible");
+ } else {
+ assert_se(readlink_and_make_absolute(name_alias, &r1) >= 0);
+ assert_se(streq(r1, name));
+ assert_se(unlink(name_alias) >= 0);
+
+ assert_se(safe_getcwd(&pwd) >= 0);
+
+ assert_se(chdir(tempdir) >= 0);
+ assert_se(symlink(name2, name_alias) >= 0);
+ assert_se(readlink_and_make_absolute(name_alias, &r2) >= 0);
+ assert_se(streq(r2, name));
+ assert_se(unlink(name_alias) >= 0);
+
+ assert_se(chdir(pwd) >= 0);
+ }
+
+ assert_se(rm_rf(tempdir, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
+static void test_get_files_in_directory(void) {
+ _cleanup_strv_free_ char **l = NULL, **t = NULL;
+
+ assert_se(get_files_in_directory(arg_test_dir ?: "/tmp", &l) >= 0);
+ assert_se(get_files_in_directory(".", &t) >= 0);
+ assert_se(get_files_in_directory(".", NULL) >= 0);
+}
+
+static void test_var_tmp(void) {
+ _cleanup_free_ char *tmpdir_backup = NULL, *temp_backup = NULL, *tmp_backup = NULL;
+ const char *tmp_dir = NULL, *t;
+
+ log_info("/* %s */", __func__);
+
+ t = getenv("TMPDIR");
+ if (t) {
+ tmpdir_backup = strdup(t);
+ assert_se(tmpdir_backup);
+ }
+
+ t = getenv("TEMP");
+ if (t) {
+ temp_backup = strdup(t);
+ assert_se(temp_backup);
+ }
+
+ t = getenv("TMP");
+ if (t) {
+ tmp_backup = strdup(t);
+ assert_se(tmp_backup);
+ }
+
+ assert_se(unsetenv("TMPDIR") >= 0);
+ assert_se(unsetenv("TEMP") >= 0);
+ assert_se(unsetenv("TMP") >= 0);
+
+ assert_se(var_tmp_dir(&tmp_dir) >= 0);
+ assert_se(streq(tmp_dir, "/var/tmp"));
+
+ assert_se(setenv("TMPDIR", "/tmp", true) >= 0);
+ assert_se(streq(getenv("TMPDIR"), "/tmp"));
+
+ assert_se(var_tmp_dir(&tmp_dir) >= 0);
+ assert_se(streq(tmp_dir, "/tmp"));
+
+ assert_se(setenv("TMPDIR", "/88_does_not_exist_88", true) >= 0);
+ assert_se(streq(getenv("TMPDIR"), "/88_does_not_exist_88"));
+
+ assert_se(var_tmp_dir(&tmp_dir) >= 0);
+ assert_se(streq(tmp_dir, "/var/tmp"));
+
+ if (tmpdir_backup) {
+ assert_se(setenv("TMPDIR", tmpdir_backup, true) >= 0);
+ assert_se(streq(getenv("TMPDIR"), tmpdir_backup));
+ }
+
+ if (temp_backup) {
+ assert_se(setenv("TEMP", temp_backup, true) >= 0);
+ assert_se(streq(getenv("TEMP"), temp_backup));
+ }
+
+ if (tmp_backup) {
+ assert_se(setenv("TMP", tmp_backup, true) >= 0);
+ assert_se(streq(getenv("TMP"), tmp_backup));
+ }
+}
+
+static void test_dot_or_dot_dot(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!dot_or_dot_dot(NULL));
+ assert_se(!dot_or_dot_dot(""));
+ assert_se(!dot_or_dot_dot("xxx"));
+ assert_se(dot_or_dot_dot("."));
+ assert_se(dot_or_dot_dot(".."));
+ assert_se(!dot_or_dot_dot(".foo"));
+ assert_se(!dot_or_dot_dot("..foo"));
+}
+
+static void test_access_fd(void) {
+ _cleanup_(rmdir_and_freep) char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *a;
+
+ log_info("/* %s */", __func__);
+
+ a = strjoina(arg_test_dir ?: "/tmp", "/access-fd.XXXXXX");
+ assert_se(mkdtemp_malloc(a, &p) >= 0);
+
+ fd = open(p, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ assert_se(fd >= 0);
+
+ assert_se(access_fd(fd, R_OK) >= 0);
+ assert_se(access_fd(fd, F_OK) >= 0);
+ assert_se(access_fd(fd, W_OK) >= 0);
+
+ assert_se(fchmod(fd, 0000) >= 0);
+
+ assert_se(access_fd(fd, F_OK) >= 0);
+
+ if (geteuid() == 0) {
+ assert_se(access_fd(fd, R_OK) >= 0);
+ assert_se(access_fd(fd, W_OK) >= 0);
+ } else {
+ assert_se(access_fd(fd, R_OK) == -EACCES);
+ assert_se(access_fd(fd, W_OK) == -EACCES);
+ }
+}
+
+static void test_touch_file(void) {
+ uid_t test_uid, test_gid;
+ _cleanup_(rm_rf_physical_and_freep) char *p = NULL;
+ struct stat st;
+ const char *a;
+ usec_t test_mtime;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ test_uid = geteuid() == 0 ? 65534 : getuid();
+ test_gid = geteuid() == 0 ? 65534 : getgid();
+
+ test_mtime = usec_sub_unsigned(now(CLOCK_REALTIME), USEC_PER_WEEK);
+
+ a = strjoina(arg_test_dir ?: "/dev/shm", "/touch-file-XXXXXX");
+ assert_se(mkdtemp_malloc(a, &p) >= 0);
+
+ a = strjoina(p, "/regular");
+ r = touch_file(a, false, test_mtime, test_uid, test_gid, 0640);
+ if (r < 0) {
+ assert_se(IN_SET(r, -EINVAL, -ENOSYS, -ENOTTY, -EPERM));
+ log_tests_skipped_errno(errno, "touch_file() not possible");
+ return;
+ }
+
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISREG(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+
+ a = strjoina(p, "/dir");
+ assert_se(mkdir(a, 0775) >= 0);
+ assert_se(touch_file(a, false, test_mtime, test_uid, test_gid, 0640) >= 0);
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISDIR(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+
+ a = strjoina(p, "/fifo");
+ assert_se(mkfifo(a, 0775) >= 0);
+ assert_se(touch_file(a, false, test_mtime, test_uid, test_gid, 0640) >= 0);
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISFIFO(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+
+ a = strjoina(p, "/sock");
+ assert_se(mknod(a, 0775 | S_IFSOCK, 0) >= 0);
+ assert_se(touch_file(a, false, test_mtime, test_uid, test_gid, 0640) >= 0);
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISSOCK(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+
+ if (geteuid() == 0) {
+ a = strjoina(p, "/cdev");
+ r = mknod(a, 0775 | S_IFCHR, makedev(0, 0));
+ if (r < 0 && errno == EPERM && detect_container() > 0) {
+ log_notice("Running in unprivileged container? Skipping remaining tests in %s", __func__);
+ return;
+ }
+ assert_se(r >= 0);
+ assert_se(touch_file(a, false, test_mtime, test_uid, test_gid, 0640) >= 0);
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISCHR(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+
+ a = strjoina(p, "/bdev");
+ assert_se(mknod(a, 0775 | S_IFBLK, makedev(0, 0)) >= 0);
+ assert_se(touch_file(a, false, test_mtime, test_uid, test_gid, 0640) >= 0);
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISBLK(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+ }
+
+ a = strjoina(p, "/lnk");
+ assert_se(symlink("target", a) >= 0);
+ assert_se(touch_file(a, false, test_mtime, test_uid, test_gid, 0640) >= 0);
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISLNK(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+}
+
+static void test_unlinkat_deallocate(void) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(tempfn_random_child(arg_test_dir, "unlink-deallocation", &p) >= 0);
+
+ fd = open(p, O_WRONLY|O_CLOEXEC|O_CREAT|O_EXCL, 0600);
+ assert_se(fd >= 0);
+
+ assert_se(write(fd, "hallo\n", 6) == 6);
+
+ assert_se(fstat(fd, &st) >= 0);
+ assert_se(st.st_size == 6);
+ assert_se(st.st_blocks > 0);
+ assert_se(st.st_nlink == 1);
+
+ assert_se(unlinkat_deallocate(AT_FDCWD, p, 0) >= 0);
+
+ assert_se(fstat(fd, &st) >= 0);
+ assert_se(IN_SET(st.st_size, 0, 6)); /* depending on whether hole punching worked the size will be 6
+ (it worked) or 0 (we had to resort to truncation) */
+ assert_se(st.st_blocks == 0);
+ assert_se(st.st_nlink == 0);
+}
+
+static void test_fsync_directory_of_file(void) {
+ _cleanup_close_ int fd = -1;
+
+ log_info("/* %s */", __func__);
+
+ fd = open_tmpfile_unlinkable(arg_test_dir, O_RDWR);
+ assert_se(fd >= 0);
+
+ assert_se(fsync_directory_of_file(fd) >= 0);
+}
+
+static void test_rename_noreplace(void) {
+ static const char* const table[] = {
+ "/reg",
+ "/dir",
+ "/fifo",
+ "/socket",
+ "/symlink",
+ NULL
+ };
+
+ _cleanup_(rm_rf_physical_and_freep) char *z = NULL;
+ const char *j = NULL;
+ char **a, **b;
+
+ log_info("/* %s */", __func__);
+
+ if (arg_test_dir)
+ j = strjoina(arg_test_dir, "/testXXXXXX");
+ assert_se(mkdtemp_malloc(j, &z) >= 0);
+
+ j = strjoina(z, table[0]);
+ assert_se(touch(j) >= 0);
+
+ j = strjoina(z, table[1]);
+ assert_se(mkdir(j, 0777) >= 0);
+
+ j = strjoina(z, table[2]);
+ (void) mkfifo(j, 0777);
+
+ j = strjoina(z, table[3]);
+ (void) mknod(j, S_IFSOCK | 0777, 0);
+
+ j = strjoina(z, table[4]);
+ (void) symlink("foobar", j);
+
+ STRV_FOREACH(a, (char**) table) {
+ _cleanup_free_ char *x = NULL, *y = NULL;
+
+ x = strjoin(z, *a);
+ assert_se(x);
+
+ if (access(x, F_OK) < 0) {
+ assert_se(errno == ENOENT);
+ continue;
+ }
+
+ STRV_FOREACH(b, (char**) table) {
+ _cleanup_free_ char *w = NULL;
+
+ w = strjoin(w, *b);
+ assert_se(w);
+
+ if (access(w, F_OK) < 0) {
+ assert_se(errno == ENOENT);
+ continue;
+ }
+
+ assert_se(rename_noreplace(AT_FDCWD, w, AT_FDCWD, y) == -EEXIST);
+ }
+
+ y = strjoin(z, "/somethingelse");
+ assert_se(y);
+
+ assert_se(rename_noreplace(AT_FDCWD, x, AT_FDCWD, y) >= 0);
+ assert_se(rename_noreplace(AT_FDCWD, y, AT_FDCWD, x) >= 0);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ arg_test_dir = argv[1];
+
+ test_unlink_noerrno();
+ test_get_files_in_directory();
+ test_readlink_and_make_absolute();
+ test_var_tmp();
+ test_chase_symlinks();
+ test_dot_or_dot_dot();
+ test_access_fd();
+ test_touch_file();
+ test_unlinkat_deallocate();
+ test_fsync_directory_of_file();
+ test_rename_noreplace();
+
+ return 0;
+}
diff --git a/src/test/test-fstab-util.c b/src/test/test-fstab-util.c
new file mode 100644
index 0000000..bd873a3
--- /dev/null
+++ b/src/test/test-fstab-util.c
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "fstab-util.h"
+#include "log.h"
+#include "string-util.h"
+#include "util.h"
+
+/*
+int fstab_filter_options(const char *opts, const char *names,
+ const char **namefound, char **value, char **filtered);
+*/
+
+static void do_fstab_filter_options(const char *opts,
+ const char *remove,
+ int r_expected,
+ const char *name_expected,
+ const char *value_expected,
+ const char *filtered_expected) {
+
+ int r;
+ const char *name;
+ _cleanup_free_ char *value, *filtered;
+
+ r = fstab_filter_options(opts, remove, &name, &value, &filtered);
+ log_info("\"%s\" → %d, \"%s\", \"%s\", \"%s\", expected %d, \"%s\", \"%s\", \"%s\"",
+ opts, r, name, value, filtered,
+ r_expected, name_expected, value_expected, filtered_expected ?: opts);
+ assert_se(r == r_expected);
+ assert_se(streq_ptr(name, name_expected));
+ assert_se(streq_ptr(value, value_expected));
+ assert_se(streq_ptr(filtered, filtered_expected ?: opts));
+
+ /* also test the malloc-less mode */
+ r = fstab_filter_options(opts, remove, &name, NULL, NULL);
+ log_info("\"%s\" → %d, \"%s\", expected %d, \"%s\"",
+ opts, r, name,
+ r_expected, name_expected);
+ assert_se(r == r_expected);
+ assert_se(streq_ptr(name, name_expected));
+}
+
+static void test_fstab_filter_options(void) {
+ do_fstab_filter_options("opt=0", "opt\0x-opt\0", 1, "opt", "0", "");
+ do_fstab_filter_options("opt=0", "x-opt\0opt\0", 1, "opt", "0", "");
+ do_fstab_filter_options("opt", "opt\0x-opt\0", 1, "opt", NULL, "");
+ do_fstab_filter_options("opt", "x-opt\0opt\0", 1, "opt", NULL, "");
+ do_fstab_filter_options("x-opt", "x-opt\0opt\0", 1, "x-opt", NULL, "");
+
+ do_fstab_filter_options("opt=0,other", "opt\0x-opt\0", 1, "opt", "0", "other");
+ do_fstab_filter_options("opt=0,other", "x-opt\0opt\0", 1, "opt", "0", "other");
+ do_fstab_filter_options("opt,other", "opt\0x-opt\0", 1, "opt", NULL, "other");
+ do_fstab_filter_options("opt,other", "x-opt\0opt\0", 1, "opt", NULL, "other");
+ do_fstab_filter_options("x-opt,other", "opt\0x-opt\0", 1, "x-opt", NULL, "other");
+
+ do_fstab_filter_options("opto=0,other", "opt\0x-opt\0", 0, NULL, NULL, NULL);
+ do_fstab_filter_options("opto,other", "opt\0x-opt\0", 0, NULL, NULL, NULL);
+ do_fstab_filter_options("x-opto,other", "opt\0x-opt\0", 0, NULL, NULL, NULL);
+
+ do_fstab_filter_options("first,opt=0", "opt\0x-opt\0", 1, "opt", "0", "first");
+ do_fstab_filter_options("first=1,opt=0", "opt\0x-opt\0", 1, "opt", "0", "first=1");
+ do_fstab_filter_options("first,opt=", "opt\0x-opt\0", 1, "opt", "", "first");
+ do_fstab_filter_options("first=1,opt", "opt\0x-opt\0", 1, "opt", NULL, "first=1");
+ do_fstab_filter_options("first=1,x-opt", "opt\0x-opt\0", 1, "x-opt", NULL, "first=1");
+
+ do_fstab_filter_options("first,opt=0,last=1", "opt\0x-opt\0", 1, "opt", "0", "first,last=1");
+ do_fstab_filter_options("first=1,opt=0,last=2", "x-opt\0opt\0", 1, "opt", "0", "first=1,last=2");
+ do_fstab_filter_options("first,opt,last", "opt\0", 1, "opt", NULL, "first,last");
+ do_fstab_filter_options("first=1,opt,last", "x-opt\0opt\0", 1, "opt", NULL, "first=1,last");
+ do_fstab_filter_options("first=,opt,last", "opt\0noopt\0", 1, "opt", NULL, "first=,last");
+
+ /* check repeated options */
+ do_fstab_filter_options("first,opt=0,noopt=1,last=1", "opt\0noopt\0", 1, "noopt", "1", "first,last=1");
+ do_fstab_filter_options("first=1,opt=0,last=2,opt=1", "opt\0", 1, "opt", "1", "first=1,last=2");
+ do_fstab_filter_options("x-opt=0,x-opt=1", "opt\0x-opt\0", 1, "x-opt", "1", "");
+ do_fstab_filter_options("opt=0,x-opt=1", "opt\0x-opt\0", 1, "x-opt", "1", "");
+
+ /* check that semicolons are not misinterpreted */
+ do_fstab_filter_options("opt=0;", "opt\0", 1, "opt", "0;", "");
+ do_fstab_filter_options("opt;=0", "x-opt\0opt\0noopt\0x-noopt\0", 0, NULL, NULL, NULL);
+ do_fstab_filter_options("opt;", "opt\0x-opt\0", 0, NULL, NULL, NULL);
+
+ /* check that spaces are not misinterpreted */
+ do_fstab_filter_options("opt=0 ", "opt\0", 1, "opt", "0 ", "");
+ do_fstab_filter_options("opt =0", "x-opt\0opt\0noopt\0x-noopt\0", 0, NULL, NULL, NULL);
+ do_fstab_filter_options(" opt ", "opt\0x-opt\0", 0, NULL, NULL, NULL);
+
+ /* check function will NULL args */
+ do_fstab_filter_options(NULL, "opt\0", 0, NULL, NULL, "");
+ do_fstab_filter_options("", "opt\0", 0, NULL, NULL, "");
+}
+
+static void test_fstab_find_pri(void) {
+ int pri = -1;
+
+ assert_se(fstab_find_pri("pri", &pri) == 0);
+ assert_se(pri == -1);
+
+ assert_se(fstab_find_pri("pri=11", &pri) == 1);
+ assert_se(pri == 11);
+
+ assert_se(fstab_find_pri("opt,pri=12,opt", &pri) == 1);
+ assert_se(pri == 12);
+
+ assert_se(fstab_find_pri("opt,opt,pri=12,pri=13", &pri) == 1);
+ assert_se(pri == 13);
+}
+
+static void test_fstab_yes_no_option(void) {
+ assert_se(fstab_test_yes_no_option("nofail,fail,nofail", "nofail\0fail\0") == true);
+ assert_se(fstab_test_yes_no_option("nofail,nofail,fail", "nofail\0fail\0") == false);
+ assert_se(fstab_test_yes_no_option("abc,cde,afail", "nofail\0fail\0") == false);
+ assert_se(fstab_test_yes_no_option("nofail,fail=0,nofail=0", "nofail\0fail\0") == true);
+ assert_se(fstab_test_yes_no_option("nofail,nofail=0,fail=0", "nofail\0fail\0") == false);
+}
+
+static void test_fstab_node_to_udev_node(void) {
+ char *n;
+
+ n = fstab_node_to_udev_node("LABEL=applé/jack");
+ puts(n);
+ assert_se(streq(n, "/dev/disk/by-label/applé\\x2fjack"));
+ free(n);
+
+ n = fstab_node_to_udev_node("PARTLABEL=pinkié pie");
+ puts(n);
+ assert_se(streq(n, "/dev/disk/by-partlabel/pinkié\\x20pie"));
+ free(n);
+
+ n = fstab_node_to_udev_node("UUID=037b9d94-148e-4ee4-8d38-67bfe15bb535");
+ puts(n);
+ assert_se(streq(n, "/dev/disk/by-uuid/037b9d94-148e-4ee4-8d38-67bfe15bb535"));
+ free(n);
+
+ n = fstab_node_to_udev_node("PARTUUID=037b9d94-148e-4ee4-8d38-67bfe15bb535");
+ puts(n);
+ assert_se(streq(n, "/dev/disk/by-partuuid/037b9d94-148e-4ee4-8d38-67bfe15bb535"));
+ free(n);
+
+ n = fstab_node_to_udev_node("PONIES=awesome");
+ puts(n);
+ assert_se(streq(n, "PONIES=awesome"));
+ free(n);
+
+ n = fstab_node_to_udev_node("/dev/xda1");
+ puts(n);
+ assert_se(streq(n, "/dev/xda1"));
+ free(n);
+}
+
+int main(void) {
+ test_fstab_filter_options();
+ test_fstab_find_pri();
+ test_fstab_yes_no_option();
+ test_fstab_node_to_udev_node();
+
+ return 0;
+}
diff --git a/src/test/test-gcrypt-util.c b/src/test/test-gcrypt-util.c
new file mode 100644
index 0000000..b28b4eb
--- /dev/null
+++ b/src/test/test-gcrypt-util.c
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "gcrypt-util.h"
+#include "macro.h"
+#include "string-util.h"
+
+static void test_string_hashsum(void) {
+ _cleanup_free_ char *out1 = NULL, *out2 = NULL, *out3 = NULL, *out4 = NULL;
+
+ assert_se(string_hashsum("asdf", 4, GCRY_MD_SHA224, &out1) == 0);
+ /* echo -n 'asdf' | sha224sum - */
+ assert_se(streq(out1, "7872a74bcbf298a1e77d507cd95d4f8d96131cbbd4cdfc571e776c8a"));
+
+ assert_se(string_hashsum("asdf", 4, GCRY_MD_SHA256, &out2) == 0);
+ /* echo -n 'asdf' | sha256sum - */
+ assert_se(streq(out2, "f0e4c2f76c58916ec258f246851bea091d14d4247a2fc3e18694461b1816e13b"));
+
+ assert_se(string_hashsum("", 0, GCRY_MD_SHA224, &out3) == 0);
+ /* echo -n '' | sha224sum - */
+ assert_se(streq(out3, "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f"));
+
+ assert_se(string_hashsum("", 0, GCRY_MD_SHA256, &out4) == 0);
+ /* echo -n '' | sha256sum - */
+ assert_se(streq(out4, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"));
+}
+
+int main(int argc, char **argv) {
+ test_string_hashsum();
+
+ return 0;
+}
diff --git a/src/test/test-glob-util.c b/src/test/test-glob-util.c
new file mode 100644
index 0000000..b4f4144
--- /dev/null
+++ b/src/test/test-glob-util.c
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <glob.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "macro.h"
+#include "rm-rf.h"
+#include "tmpfile-util.h"
+
+static void test_glob_exists(void) {
+ char name[] = "/tmp/test-glob_exists.XXXXXX";
+ int fd = -1;
+ int r;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ close(fd);
+
+ r = glob_exists("/tmp/test-glob_exists*");
+ assert_se(r == 1);
+
+ r = unlink(name);
+ assert_se(r == 0);
+ r = glob_exists("/tmp/test-glob_exists*");
+ assert_se(r == 0);
+}
+
+static void closedir_wrapper(void* v) {
+ (void) closedir(v);
+}
+
+static void test_glob_no_dot(void) {
+ char template[] = "/tmp/test-glob-util.XXXXXXX";
+ const char *fn;
+
+ _cleanup_globfree_ glob_t g = {
+ .gl_closedir = closedir_wrapper,
+ .gl_readdir = (struct dirent *(*)(void *)) readdir_no_dot,
+ .gl_opendir = (void *(*)(const char *)) opendir,
+ .gl_lstat = lstat,
+ .gl_stat = stat,
+ };
+
+ int r;
+
+ assert_se(mkdtemp(template));
+
+ fn = strjoina(template, "/*");
+ r = glob(fn, GLOB_NOSORT|GLOB_BRACE|GLOB_ALTDIRFUNC, NULL, &g);
+ assert_se(r == GLOB_NOMATCH);
+
+ fn = strjoina(template, "/.*");
+ r = glob(fn, GLOB_NOSORT|GLOB_BRACE|GLOB_ALTDIRFUNC, NULL, &g);
+ assert_se(r == GLOB_NOMATCH);
+
+ (void) rm_rf(template, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static void test_safe_glob(void) {
+ char template[] = "/tmp/test-glob-util.XXXXXXX";
+ const char *fn, *fn2, *fname;
+
+ _cleanup_globfree_ glob_t g = {};
+ int r;
+
+ assert_se(mkdtemp(template));
+
+ fn = strjoina(template, "/*");
+ r = safe_glob(fn, 0, &g);
+ assert_se(r == -ENOENT);
+
+ fn2 = strjoina(template, "/.*");
+ r = safe_glob(fn2, GLOB_NOSORT|GLOB_BRACE, &g);
+ assert_se(r == -ENOENT);
+
+ fname = strjoina(template, "/.foobar");
+ assert_se(touch(fname) == 0);
+
+ r = safe_glob(fn, 0, &g);
+ assert_se(r == -ENOENT);
+
+ r = safe_glob(fn2, GLOB_NOSORT|GLOB_BRACE, &g);
+ assert_se(r == 0);
+ assert_se(g.gl_pathc == 1);
+ assert_se(streq(g.gl_pathv[0], fname));
+ assert_se(g.gl_pathv[1] == NULL);
+
+ (void) rm_rf(template, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+int main(void) {
+ test_glob_exists();
+ test_glob_no_dot();
+ test_safe_glob();
+
+ return 0;
+}
diff --git a/src/test/test-hash.c b/src/test/test-hash.c
new file mode 100644
index 0000000..44d1044
--- /dev/null
+++ b/src/test/test-hash.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "log.h"
+#include "string-util.h"
+#include "khash.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(khash_unrefp) khash *h = NULL, *copy = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(khash_new(&h, NULL) == -EINVAL);
+ assert_se(khash_new(&h, "") == -EINVAL);
+
+ r = khash_supported();
+ assert_se(r >= 0);
+ if (r == 0)
+ return log_tests_skipped("khash not supported on this kernel");
+
+ assert_se(khash_new(&h, "foobar") == -EOPNOTSUPP); /* undefined hash function */
+
+ assert_se(khash_new(&h, "sha256") >= 0);
+ assert_se(khash_get_size(h) == 32);
+ assert_se(streq(khash_get_algorithm(h), "sha256"));
+
+ assert_se(khash_digest_string(h, &s) >= 0);
+ assert_se(streq(s, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"));
+ s = mfree(s);
+
+ assert_se(khash_put(h, "foobar", 6) >= 0);
+ assert_se(khash_digest_string(h, &s) >= 0);
+ assert_se(streq(s, "c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2"));
+ s = mfree(s);
+
+ assert_se(khash_put(h, "piep", 4) >= 0);
+ assert_se(khash_digest_string(h, &s) >= 0);
+ assert_se(streq(s, "f114d872b5ea075d3be9040d0b7a429514b3f9324a8e8e3dc3fb24c34ee56bea"));
+ s = mfree(s);
+
+ assert_se(khash_put(h, "foo", 3) >= 0);
+ assert_se(khash_dup(h, &copy) >= 0);
+
+ assert_se(khash_put(h, "bar", 3) >= 0);
+ assert_se(khash_put(copy, "bar", 3) >= 0);
+
+ assert_se(khash_digest_string(h, &s) >= 0);
+ assert_se(streq(s, "c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2"));
+ s = mfree(s);
+
+ assert_se(khash_digest_string(copy, &s) >= 0);
+ assert_se(streq(s, "c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2"));
+ s = mfree(s);
+
+ h = khash_unref(h);
+
+ assert_se(khash_new_with_key(&h, "hmac(sha256)", "quux", 4) >= 0);
+ assert_se(khash_get_size(h) == 32);
+ assert_se(streq(khash_get_algorithm(h), "hmac(sha256)"));
+
+ assert_se(khash_digest_string(h, &s) >= 0);
+ assert_se(streq(s, "abed9f8218ab473f77218a6a7d39abf1d21fa46d0700c4898e330ba88309d5ae"));
+ s = mfree(s);
+
+ assert_se(khash_put(h, "foobar", 6) >= 0);
+ assert_se(khash_digest_string(h, &s) >= 0);
+ assert_se(streq(s, "33f6c70a60db66007d5325d5d1dea37c371354e5b83347a59ad339ce9f4ba3dc"));
+
+ return 0;
+}
diff --git a/src/test/test-hashmap-ordered.awk b/src/test/test-hashmap-ordered.awk
new file mode 100644
index 0000000..10f4386
--- /dev/null
+++ b/src/test/test-hashmap-ordered.awk
@@ -0,0 +1,11 @@
+BEGIN {
+ print "/* GENERATED FILE */";
+ print "#define ORDERED"
+}
+{
+ if (!match($0, "^#include"))
+ gsub(/hashmap/, "ordered_hashmap");
+ gsub(/HASHMAP/, "ORDERED_HASHMAP");
+ gsub(/Hashmap/, "OrderedHashmap");
+ print
+}
diff --git a/src/test/test-hashmap-plain.c b/src/test/test-hashmap-plain.c
new file mode 100644
index 0000000..5376aa8
--- /dev/null
+++ b/src/test/test-hashmap-plain.c
@@ -0,0 +1,1014 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "hashmap.h"
+#include "log.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "util.h"
+
+void test_hashmap_funcs(void);
+
+static void test_hashmap_replace(void) {
+ Hashmap *m;
+ char *val1, *val2, *val3, *val4, *val5, *r;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+
+ val1 = strdup("val1");
+ assert_se(val1);
+ val2 = strdup("val2");
+ assert_se(val2);
+ val3 = strdup("val3");
+ assert_se(val3);
+ val4 = strdup("val4");
+ assert_se(val4);
+ val5 = strdup("val5");
+ assert_se(val5);
+
+ hashmap_put(m, "key 1", val1);
+ hashmap_put(m, "key 2", val2);
+ hashmap_put(m, "key 3", val3);
+ hashmap_put(m, "key 4", val4);
+
+ hashmap_replace(m, "key 3", val1);
+ r = hashmap_get(m, "key 3");
+ assert_se(streq(r, "val1"));
+
+ hashmap_replace(m, "key 5", val5);
+ r = hashmap_get(m, "key 5");
+ assert_se(streq(r, "val5"));
+
+ free(val1);
+ free(val2);
+ free(val3);
+ free(val4);
+ free(val5);
+ hashmap_free(m);
+}
+
+static void test_hashmap_copy(void) {
+ Hashmap *m, *copy;
+ char *val1, *val2, *val3, *val4, *r;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("val1");
+ assert_se(val1);
+ val2 = strdup("val2");
+ assert_se(val2);
+ val3 = strdup("val3");
+ assert_se(val3);
+ val4 = strdup("val4");
+ assert_se(val4);
+
+ m = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, "key 1", val1);
+ hashmap_put(m, "key 2", val2);
+ hashmap_put(m, "key 3", val3);
+ hashmap_put(m, "key 4", val4);
+
+ copy = hashmap_copy(m);
+
+ r = hashmap_get(copy, "key 1");
+ assert_se(streq(r, "val1"));
+ r = hashmap_get(copy, "key 2");
+ assert_se(streq(r, "val2"));
+ r = hashmap_get(copy, "key 3");
+ assert_se(streq(r, "val3"));
+ r = hashmap_get(copy, "key 4");
+ assert_se(streq(r, "val4"));
+
+ hashmap_free_free(copy);
+ hashmap_free(m);
+}
+
+static void test_hashmap_get_strv(void) {
+ Hashmap *m;
+ char **strv;
+ char *val1, *val2, *val3, *val4;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("val1");
+ assert_se(val1);
+ val2 = strdup("val2");
+ assert_se(val2);
+ val3 = strdup("val3");
+ assert_se(val3);
+ val4 = strdup("val4");
+ assert_se(val4);
+
+ m = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, "key 1", val1);
+ hashmap_put(m, "key 2", val2);
+ hashmap_put(m, "key 3", val3);
+ hashmap_put(m, "key 4", val4);
+
+ strv = hashmap_get_strv(m);
+
+#ifndef ORDERED
+ strv = strv_sort(strv);
+#endif
+
+ assert_se(streq(strv[0], "val1"));
+ assert_se(streq(strv[1], "val2"));
+ assert_se(streq(strv[2], "val3"));
+ assert_se(streq(strv[3], "val4"));
+
+ strv_free(strv);
+
+ hashmap_free(m);
+}
+
+static void test_hashmap_move_one(void) {
+ Hashmap *m, *n;
+ char *val1, *val2, *val3, *val4, *r;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("val1");
+ assert_se(val1);
+ val2 = strdup("val2");
+ assert_se(val2);
+ val3 = strdup("val3");
+ assert_se(val3);
+ val4 = strdup("val4");
+ assert_se(val4);
+
+ m = hashmap_new(&string_hash_ops);
+ n = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, "key 1", val1);
+ hashmap_put(m, "key 2", val2);
+ hashmap_put(m, "key 3", val3);
+ hashmap_put(m, "key 4", val4);
+
+ assert_se(hashmap_move_one(n, NULL, "key 3") == -ENOENT);
+ assert_se(hashmap_move_one(n, m, "key 5") == -ENOENT);
+ assert_se(hashmap_move_one(n, m, "key 3") == 0);
+ assert_se(hashmap_move_one(n, m, "key 4") == 0);
+
+ r = hashmap_get(n, "key 3");
+ assert_se(r && streq(r, "val3"));
+ r = hashmap_get(n, "key 4");
+ assert_se(r && streq(r, "val4"));
+ r = hashmap_get(m, "key 3");
+ assert_se(!r);
+
+ assert_se(hashmap_move_one(n, m, "key 3") == -EEXIST);
+
+ hashmap_free_free(m);
+ hashmap_free_free(n);
+}
+
+static void test_hashmap_move(void) {
+ Hashmap *m, *n;
+ char *val1, *val2, *val3, *val4, *r;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("val1");
+ assert_se(val1);
+ val2 = strdup("val2");
+ assert_se(val2);
+ val3 = strdup("val3");
+ assert_se(val3);
+ val4 = strdup("val4");
+ assert_se(val4);
+
+ m = hashmap_new(&string_hash_ops);
+ n = hashmap_new(&string_hash_ops);
+
+ hashmap_put(n, "key 1", strdup(val1));
+ hashmap_put(m, "key 1", val1);
+ hashmap_put(m, "key 2", val2);
+ hashmap_put(m, "key 3", val3);
+ hashmap_put(m, "key 4", val4);
+
+ assert_se(hashmap_move(n, NULL) == 0);
+ assert_se(hashmap_move(n, m) == 0);
+
+ assert_se(hashmap_size(m) == 1);
+ r = hashmap_get(m, "key 1");
+ assert_se(r && streq(r, "val1"));
+
+ r = hashmap_get(n, "key 1");
+ assert_se(r && streq(r, "val1"));
+ r = hashmap_get(n, "key 2");
+ assert_se(r && streq(r, "val2"));
+ r = hashmap_get(n, "key 3");
+ assert_se(r && streq(r, "val3"));
+ r = hashmap_get(n, "key 4");
+ assert_se(r && streq(r, "val4"));
+
+ hashmap_free_free(m);
+ hashmap_free_free(n);
+}
+
+static void test_hashmap_update(void) {
+ Hashmap *m;
+ char *val1, *val2, *r;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ val1 = strdup("old_value");
+ assert_se(val1);
+ val2 = strdup("new_value");
+ assert_se(val2);
+
+ hashmap_put(m, "key 1", val1);
+ r = hashmap_get(m, "key 1");
+ assert_se(streq(r, "old_value"));
+
+ assert_se(hashmap_update(m, "key 2", val2) == -ENOENT);
+ r = hashmap_get(m, "key 1");
+ assert_se(streq(r, "old_value"));
+
+ assert_se(hashmap_update(m, "key 1", val2) == 0);
+ r = hashmap_get(m, "key 1");
+ assert_se(streq(r, "new_value"));
+
+ free(val1);
+ free(val2);
+ hashmap_free(m);
+}
+
+static void test_hashmap_put(void) {
+ Hashmap *m = NULL;
+ int valid_hashmap_put;
+ void *val1 = (void*) "val 1";
+ void *val2 = (void*) "val 2";
+ _cleanup_free_ char* key1 = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(hashmap_ensure_allocated(&m, &string_hash_ops) >= 0);
+ assert_se(m);
+
+ valid_hashmap_put = hashmap_put(m, "key 1", val1);
+ assert_se(valid_hashmap_put == 1);
+ assert_se(hashmap_put(m, "key 1", val1) == 0);
+ assert_se(hashmap_put(m, "key 1", val2) == -EEXIST);
+ key1 = strdup("key 1");
+ assert_se(hashmap_put(m, key1, val1) == 0);
+ assert_se(hashmap_put(m, key1, val2) == -EEXIST);
+
+ hashmap_free(m);
+}
+
+static void test_hashmap_remove(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ char *r;
+
+ log_info("/* %s */", __func__);
+
+ r = hashmap_remove(NULL, "key 1");
+ assert_se(r == NULL);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ r = hashmap_remove(m, "no such key");
+ assert_se(r == NULL);
+
+ hashmap_put(m, "key 1", (void*) "val 1");
+ hashmap_put(m, "key 2", (void*) "val 2");
+
+ r = hashmap_remove(m, "key 1");
+ assert_se(streq(r, "val 1"));
+
+ r = hashmap_get(m, "key 2");
+ assert_se(streq(r, "val 2"));
+ assert_se(!hashmap_get(m, "key 1"));
+}
+
+static void test_hashmap_remove2(void) {
+ _cleanup_hashmap_free_free_free_ Hashmap *m = NULL;
+ char key1[] = "key 1";
+ char key2[] = "key 2";
+ char val1[] = "val 1";
+ char val2[] = "val 2";
+ void *r, *r2;
+
+ log_info("/* %s */", __func__);
+
+ r = hashmap_remove2(NULL, "key 1", &r2);
+ assert_se(r == NULL);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ r = hashmap_remove2(m, "no such key", &r2);
+ assert_se(r == NULL);
+
+ hashmap_put(m, strdup(key1), strdup(val1));
+ hashmap_put(m, strdup(key2), strdup(val2));
+
+ r = hashmap_remove2(m, key1, &r2);
+ assert_se(streq(r, val1));
+ assert_se(streq(r2, key1));
+ free(r);
+ free(r2);
+
+ r = hashmap_get(m, key2);
+ assert_se(streq(r, val2));
+ assert_se(!hashmap_get(m, key1));
+}
+
+static void test_hashmap_remove_value(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ char *r;
+
+ char val1[] = "val 1";
+ char val2[] = "val 2";
+
+ log_info("/* %s */", __func__);
+
+ r = hashmap_remove_value(NULL, "key 1", val1);
+ assert_se(r == NULL);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ r = hashmap_remove_value(m, "key 1", val1);
+ assert_se(r == NULL);
+
+ hashmap_put(m, "key 1", val1);
+ hashmap_put(m, "key 2", val2);
+
+ r = hashmap_remove_value(m, "key 1", val1);
+ assert_se(streq(r, "val 1"));
+
+ r = hashmap_get(m, "key 2");
+ assert_se(streq(r, "val 2"));
+ assert_se(!hashmap_get(m, "key 1"));
+
+ r = hashmap_remove_value(m, "key 2", val1);
+ assert_se(r == NULL);
+
+ r = hashmap_get(m, "key 2");
+ assert_se(streq(r, "val 2"));
+ assert_se(!hashmap_get(m, "key 1"));
+}
+
+static void test_hashmap_remove_and_put(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ int valid;
+ char *r;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ valid = hashmap_remove_and_put(m, "invalid key", "new key", NULL);
+ assert_se(valid == -ENOENT);
+
+ valid = hashmap_put(m, "key 1", (void*) (const char *) "val 1");
+ assert_se(valid == 1);
+
+ valid = hashmap_remove_and_put(NULL, "key 1", "key 2", (void*) (const char *) "val 2");
+ assert_se(valid == -ENOENT);
+
+ valid = hashmap_remove_and_put(m, "key 1", "key 2", (void*) (const char *) "val 2");
+ assert_se(valid == 0);
+
+ r = hashmap_get(m, "key 2");
+ assert_se(streq(r, "val 2"));
+ assert_se(!hashmap_get(m, "key 1"));
+
+ valid = hashmap_put(m, "key 3", (void*) (const char *) "val 3");
+ assert_se(valid == 1);
+ valid = hashmap_remove_and_put(m, "key 3", "key 2", (void*) (const char *) "val 2");
+ assert_se(valid == -EEXIST);
+}
+
+static void test_hashmap_remove_and_replace(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ int valid;
+ void *key1 = UINT_TO_PTR(1);
+ void *key2 = UINT_TO_PTR(2);
+ void *key3 = UINT_TO_PTR(3);
+ void *r;
+ int i, j;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&trivial_hash_ops);
+ assert_se(m);
+
+ valid = hashmap_remove_and_replace(m, key1, key2, NULL);
+ assert_se(valid == -ENOENT);
+
+ valid = hashmap_put(m, key1, key1);
+ assert_se(valid == 1);
+
+ valid = hashmap_remove_and_replace(NULL, key1, key2, key2);
+ assert_se(valid == -ENOENT);
+
+ valid = hashmap_remove_and_replace(m, key1, key2, key2);
+ assert_se(valid == 0);
+
+ r = hashmap_get(m, key2);
+ assert_se(r == key2);
+ assert_se(!hashmap_get(m, key1));
+
+ valid = hashmap_put(m, key3, key3);
+ assert_se(valid == 1);
+ valid = hashmap_remove_and_replace(m, key3, key2, key2);
+ assert_se(valid == 0);
+ r = hashmap_get(m, key2);
+ assert_se(r == key2);
+ assert_se(!hashmap_get(m, key3));
+
+ /* Repeat this test several times to increase the chance of hitting
+ * the less likely case in hashmap_remove_and_replace where it
+ * compensates for the backward shift. */
+ for (i = 0; i < 20; i++) {
+ hashmap_clear(m);
+
+ for (j = 1; j < 7; j++)
+ hashmap_put(m, UINT_TO_PTR(10*i + j), UINT_TO_PTR(10*i + j));
+ valid = hashmap_remove_and_replace(m, UINT_TO_PTR(10*i + 1),
+ UINT_TO_PTR(10*i + 2),
+ UINT_TO_PTR(10*i + 2));
+ assert_se(valid == 0);
+ assert_se(!hashmap_get(m, UINT_TO_PTR(10*i + 1)));
+ for (j = 2; j < 7; j++) {
+ r = hashmap_get(m, UINT_TO_PTR(10*i + j));
+ assert_se(r == UINT_TO_PTR(10*i + j));
+ }
+ }
+}
+
+static void test_hashmap_ensure_allocated(void) {
+ Hashmap *m;
+ int valid_hashmap;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+
+ valid_hashmap = hashmap_ensure_allocated(&m, &string_hash_ops);
+ assert_se(valid_hashmap == 0);
+
+ assert_se(m);
+ hashmap_free(m);
+}
+
+static void test_hashmap_foreach_key(void) {
+ Hashmap *m;
+ Iterator i;
+ bool key_found[] = { false, false, false, false };
+ const char *s;
+ const char *key;
+ static const char key_table[] =
+ "key 1\0"
+ "key 2\0"
+ "key 3\0"
+ "key 4\0";
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+
+ NULSTR_FOREACH(key, key_table)
+ hashmap_put(m, key, (void*) (const char*) "my dummy val");
+
+ HASHMAP_FOREACH_KEY(s, key, m, i) {
+ assert(s);
+ if (!key_found[0] && streq(key, "key 1"))
+ key_found[0] = true;
+ else if (!key_found[1] && streq(key, "key 2"))
+ key_found[1] = true;
+ else if (!key_found[2] && streq(key, "key 3"))
+ key_found[2] = true;
+ else if (!key_found[3] && streq(key, "fail"))
+ key_found[3] = true;
+ }
+
+ assert_se(m);
+ assert_se(key_found[0] && key_found[1] && key_found[2] && !key_found[3]);
+
+ hashmap_free(m);
+}
+
+static void test_hashmap_foreach(void) {
+ Hashmap *m;
+ Iterator i;
+ bool value_found[] = { false, false, false, false };
+ char *val1, *val2, *val3, *val4, *s;
+ unsigned count;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("my val1");
+ assert_se(val1);
+ val2 = strdup("my val2");
+ assert_se(val2);
+ val3 = strdup("my val3");
+ assert_se(val3);
+ val4 = strdup("my val4");
+ assert_se(val4);
+
+ m = NULL;
+
+ count = 0;
+ HASHMAP_FOREACH(s, m, i)
+ count++;
+ assert_se(count == 0);
+
+ m = hashmap_new(&string_hash_ops);
+
+ count = 0;
+ HASHMAP_FOREACH(s, m, i)
+ count++;
+ assert_se(count == 0);
+
+ hashmap_put(m, "Key 1", val1);
+ hashmap_put(m, "Key 2", val2);
+ hashmap_put(m, "Key 3", val3);
+ hashmap_put(m, "Key 4", val4);
+
+ HASHMAP_FOREACH(s, m, i) {
+ if (!value_found[0] && streq(s, val1))
+ value_found[0] = true;
+ else if (!value_found[1] && streq(s, val2))
+ value_found[1] = true;
+ else if (!value_found[2] && streq(s, val3))
+ value_found[2] = true;
+ else if (!value_found[3] && streq(s, val4))
+ value_found[3] = true;
+ }
+
+ assert_se(m);
+ assert_se(value_found[0] && value_found[1] && value_found[2] && value_found[3]);
+
+ hashmap_free_free(m);
+}
+
+static void test_hashmap_merge(void) {
+ Hashmap *m;
+ Hashmap *n;
+ char *val1, *val2, *val3, *val4, *r;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("my val1");
+ assert_se(val1);
+ val2 = strdup("my val2");
+ assert_se(val2);
+ val3 = strdup("my val3");
+ assert_se(val3);
+ val4 = strdup("my val4");
+ assert_se(val4);
+
+ n = hashmap_new(&string_hash_ops);
+ m = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, "Key 1", val1);
+ hashmap_put(m, "Key 2", val2);
+ hashmap_put(n, "Key 3", val3);
+ hashmap_put(n, "Key 4", val4);
+
+ assert_se(hashmap_merge(m, n) == 0);
+ r = hashmap_get(m, "Key 3");
+ assert_se(r && streq(r, "my val3"));
+ r = hashmap_get(m, "Key 4");
+ assert_se(r && streq(r, "my val4"));
+
+ assert_se(n);
+ assert_se(m);
+ hashmap_free(n);
+ hashmap_free_free(m);
+}
+
+static void test_hashmap_contains(void) {
+ Hashmap *m;
+ char *val1;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("my val");
+ assert_se(val1);
+
+ m = hashmap_new(&string_hash_ops);
+
+ assert_se(!hashmap_contains(m, "Key 1"));
+ hashmap_put(m, "Key 1", val1);
+ assert_se(hashmap_contains(m, "Key 1"));
+ assert_se(!hashmap_contains(m, "Key 2"));
+
+ assert_se(!hashmap_contains(NULL, "Key 1"));
+
+ assert_se(m);
+ hashmap_free_free(m);
+}
+
+static void test_hashmap_isempty(void) {
+ Hashmap *m;
+ char *val1;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("my val");
+ assert_se(val1);
+
+ m = hashmap_new(&string_hash_ops);
+
+ assert_se(hashmap_isempty(m));
+ hashmap_put(m, "Key 1", val1);
+ assert_se(!hashmap_isempty(m));
+
+ assert_se(m);
+ hashmap_free_free(m);
+}
+
+static void test_hashmap_size(void) {
+ Hashmap *m;
+ char *val1, *val2, *val3, *val4;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("my val");
+ assert_se(val1);
+ val2 = strdup("my val");
+ assert_se(val2);
+ val3 = strdup("my val");
+ assert_se(val3);
+ val4 = strdup("my val");
+ assert_se(val4);
+
+ assert_se(hashmap_size(NULL) == 0);
+ assert_se(hashmap_buckets(NULL) == 0);
+
+ m = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, "Key 1", val1);
+ hashmap_put(m, "Key 2", val2);
+ hashmap_put(m, "Key 3", val3);
+ hashmap_put(m, "Key 4", val4);
+
+ assert_se(m);
+ assert_se(hashmap_size(m) == 4);
+ assert_se(hashmap_buckets(m) >= 4);
+ hashmap_free_free(m);
+}
+
+static void test_hashmap_get(void) {
+ Hashmap *m;
+ char *r;
+ char *val;
+
+ log_info("/* %s */", __func__);
+
+ val = strdup("my val");
+ assert_se(val);
+
+ r = hashmap_get(NULL, "Key 1");
+ assert_se(r == NULL);
+
+ m = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, "Key 1", val);
+
+ r = hashmap_get(m, "Key 1");
+ assert_se(streq(r, val));
+
+ r = hashmap_get(m, "no such key");
+ assert_se(r == NULL);
+
+ assert_se(m);
+ hashmap_free_free(m);
+}
+
+static void test_hashmap_get2(void) {
+ Hashmap *m;
+ char *r;
+ char *val;
+ char key_orig[] = "Key 1";
+ void *key_copy;
+
+ log_info("/* %s */", __func__);
+
+ val = strdup("my val");
+ assert_se(val);
+
+ key_copy = strdup(key_orig);
+ assert_se(key_copy);
+
+ r = hashmap_get2(NULL, key_orig, &key_copy);
+ assert_se(r == NULL);
+
+ m = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, key_copy, val);
+ key_copy = NULL;
+
+ r = hashmap_get2(m, key_orig, &key_copy);
+ assert_se(streq(r, val));
+ assert_se(key_orig != key_copy);
+ assert_se(streq(key_orig, key_copy));
+
+ r = hashmap_get2(m, "no such key", NULL);
+ assert_se(r == NULL);
+
+ assert_se(m);
+ hashmap_free_free_free(m);
+}
+
+static void crippled_hashmap_func(const void *p, struct siphash *state) {
+ return trivial_hash_func(INT_TO_PTR(PTR_TO_INT(p) & 0xff), state);
+}
+
+static const struct hash_ops crippled_hashmap_ops = {
+ .hash = crippled_hashmap_func,
+ .compare = trivial_compare_func,
+};
+
+static void test_hashmap_many(void) {
+ Hashmap *h;
+ unsigned i, j;
+ void *v, *k;
+ bool slow = slow_tests_enabled();
+ const struct {
+ const char *title;
+ const struct hash_ops *ops;
+ unsigned n_entries;
+ } tests[] = {
+ { "trivial_hashmap_ops", NULL, slow ? 1 << 20 : 240 },
+ { "crippled_hashmap_ops", &crippled_hashmap_ops, slow ? 1 << 14 : 140 },
+ };
+
+ log_info("/* %s (%s) */", __func__, slow ? "slow" : "fast");
+
+ for (j = 0; j < ELEMENTSOF(tests); j++) {
+ usec_t ts = now(CLOCK_MONOTONIC), n;
+ char b[FORMAT_TIMESPAN_MAX];
+
+ assert_se(h = hashmap_new(tests[j].ops));
+
+ for (i = 1; i < tests[j].n_entries*3; i+=3) {
+ assert_se(hashmap_put(h, UINT_TO_PTR(i), UINT_TO_PTR(i)) >= 0);
+ assert_se(PTR_TO_UINT(hashmap_get(h, UINT_TO_PTR(i))) == i);
+ }
+
+ for (i = 1; i < tests[j].n_entries*3; i++)
+ assert_se(hashmap_contains(h, UINT_TO_PTR(i)) == (i % 3 == 1));
+
+ log_info("%s %u <= %u * 0.8 = %g",
+ tests[j].title, hashmap_size(h), hashmap_buckets(h), hashmap_buckets(h) * 0.8);
+
+ assert_se(hashmap_size(h) <= hashmap_buckets(h) * 0.8);
+ assert_se(hashmap_size(h) == tests[j].n_entries);
+
+ while (!hashmap_isempty(h)) {
+ k = hashmap_first_key(h);
+ v = hashmap_remove(h, k);
+ assert_se(v == k);
+ }
+
+ hashmap_free(h);
+
+ n = now(CLOCK_MONOTONIC);
+ log_info("test took %s", format_timespan(b, sizeof b, n - ts, 0));
+ }
+}
+
+extern unsigned custom_counter;
+extern const struct hash_ops boring_hash_ops, custom_hash_ops;
+
+static void test_hashmap_free(void) {
+ Hashmap *h;
+ bool slow = slow_tests_enabled();
+ usec_t ts, n;
+ char b[FORMAT_TIMESPAN_MAX];
+ unsigned n_entries = slow ? 1 << 20 : 240;
+
+ const struct {
+ const char *title;
+ const struct hash_ops *ops;
+ unsigned expect_counter;
+ } tests[] = {
+ { "string_hash_ops", &boring_hash_ops, 2 * n_entries},
+ { "custom_free_hash_ops", &custom_hash_ops, 0 },
+ };
+
+ log_info("/* %s (%s, %u entries) */", __func__, slow ? "slow" : "fast", n_entries);
+
+ for (unsigned j = 0; j < ELEMENTSOF(tests); j++) {
+ ts = now(CLOCK_MONOTONIC);
+ assert_se(h = hashmap_new(tests[j].ops));
+
+ custom_counter = 0;
+ for (unsigned i = 0; i < n_entries; i++) {
+ char s[DECIMAL_STR_MAX(unsigned)];
+ char *k, *v;
+
+ xsprintf(s, "%u", i);
+ assert_se(k = strdup(s));
+ assert_se(v = strdup(s));
+ custom_counter += 2;
+
+ assert_se(hashmap_put(h, k, v) >= 0);
+ }
+
+ hashmap_free(h);
+
+ n = now(CLOCK_MONOTONIC);
+ log_info("%s test took %s", tests[j].title, format_timespan(b, sizeof b, n - ts, 0));
+
+ assert_se(custom_counter == tests[j].expect_counter);
+ }
+}
+
+static void test_hashmap_first(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(!hashmap_first(m));
+ assert_se(hashmap_put(m, "key 1", (void*) "val 1") == 1);
+ assert_se(streq(hashmap_first(m), "val 1"));
+ assert_se(hashmap_put(m, "key 2", (void*) "val 2") == 1);
+#ifdef ORDERED
+ assert_se(streq(hashmap_first(m), "val 1"));
+ assert_se(hashmap_remove(m, "key 1"));
+ assert_se(streq(hashmap_first(m), "val 2"));
+#endif
+}
+
+static void test_hashmap_first_key(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(!hashmap_first_key(m));
+ assert_se(hashmap_put(m, "key 1", NULL) == 1);
+ assert_se(streq(hashmap_first_key(m), "key 1"));
+ assert_se(hashmap_put(m, "key 2", NULL) == 1);
+#ifdef ORDERED
+ assert_se(streq(hashmap_first_key(m), "key 1"));
+ assert_se(hashmap_remove(m, "key 1") == NULL);
+ assert_se(streq(hashmap_first_key(m), "key 2"));
+#endif
+}
+
+static void test_hashmap_steal_first_key(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(!hashmap_steal_first_key(m));
+ assert_se(hashmap_put(m, "key 1", NULL) == 1);
+ assert_se(streq(hashmap_steal_first_key(m), "key 1"));
+
+ assert_se(hashmap_isempty(m));
+}
+
+static void test_hashmap_steal_first(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ int seen[3] = {};
+ char *val;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(hashmap_put(m, "key 1", (void*) "1") == 1);
+ assert_se(hashmap_put(m, "key 2", (void*) "22") == 1);
+ assert_se(hashmap_put(m, "key 3", (void*) "333") == 1);
+
+ while ((val = hashmap_steal_first(m)))
+ seen[strlen(val) - 1]++;
+
+ assert_se(seen[0] == 1 && seen[1] == 1 && seen[2] == 1);
+
+ assert_se(hashmap_isempty(m));
+}
+
+static void test_hashmap_clear_free_free(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(hashmap_put(m, strdup("key 1"), NULL) == 1);
+ assert_se(hashmap_put(m, strdup("key 2"), NULL) == 1);
+ assert_se(hashmap_put(m, strdup("key 3"), NULL) == 1);
+
+ hashmap_clear_free_free(m);
+ assert_se(hashmap_isempty(m));
+
+ assert_se(hashmap_put(m, strdup("key 1"), strdup("value 1")) == 1);
+ assert_se(hashmap_put(m, strdup("key 2"), strdup("value 2")) == 1);
+ assert_se(hashmap_put(m, strdup("key 3"), strdup("value 3")) == 1);
+
+ hashmap_clear_free_free(m);
+ assert_se(hashmap_isempty(m));
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(test_hash_ops_key, char, string_hash_func, string_compare_func, free);
+DEFINE_PRIVATE_HASH_OPS_FULL(test_hash_ops_full, char, string_hash_func, string_compare_func, free, char, free);
+
+static void test_hashmap_clear_free_with_destructor(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&test_hash_ops_key);
+ assert_se(m);
+
+ assert_se(hashmap_put(m, strdup("key 1"), NULL) == 1);
+ assert_se(hashmap_put(m, strdup("key 2"), NULL) == 1);
+ assert_se(hashmap_put(m, strdup("key 3"), NULL) == 1);
+
+ hashmap_clear_free(m);
+ assert_se(hashmap_isempty(m));
+ m = hashmap_free(m);
+
+ m = hashmap_new(&test_hash_ops_full);
+ assert_se(m);
+
+ assert_se(hashmap_put(m, strdup("key 1"), strdup("value 1")) == 1);
+ assert_se(hashmap_put(m, strdup("key 2"), strdup("value 2")) == 1);
+ assert_se(hashmap_put(m, strdup("key 3"), strdup("value 3")) == 1);
+
+ hashmap_clear_free(m);
+ assert_se(hashmap_isempty(m));
+}
+
+static void test_hashmap_reserve(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+
+ assert_se(hashmap_reserve(m, 1) == 0);
+ assert_se(hashmap_buckets(m) < 1000);
+ assert_se(hashmap_reserve(m, 1000) == 0);
+ assert_se(hashmap_buckets(m) >= 1000);
+ assert_se(hashmap_isempty(m));
+
+ assert_se(hashmap_put(m, "key 1", (void*) "val 1") == 1);
+
+ assert_se(hashmap_reserve(m, UINT_MAX) == -ENOMEM);
+ assert_se(hashmap_reserve(m, UINT_MAX - 1) == -ENOMEM);
+}
+
+void test_hashmap_funcs(void) {
+ log_parse_environment();
+ log_open();
+
+ test_hashmap_copy();
+ test_hashmap_get_strv();
+ test_hashmap_move_one();
+ test_hashmap_move();
+ test_hashmap_replace();
+ test_hashmap_update();
+ test_hashmap_put();
+ test_hashmap_remove();
+ test_hashmap_remove2();
+ test_hashmap_remove_value();
+ test_hashmap_remove_and_put();
+ test_hashmap_remove_and_replace();
+ test_hashmap_ensure_allocated();
+ test_hashmap_foreach();
+ test_hashmap_foreach_key();
+ test_hashmap_contains();
+ test_hashmap_merge();
+ test_hashmap_isempty();
+ test_hashmap_get();
+ test_hashmap_get2();
+ test_hashmap_size();
+ test_hashmap_many();
+ test_hashmap_free();
+ test_hashmap_first();
+ test_hashmap_first_key();
+ test_hashmap_steal_first_key();
+ test_hashmap_steal_first();
+ test_hashmap_clear_free_free();
+ test_hashmap_clear_free_with_destructor();
+ test_hashmap_reserve();
+}
diff --git a/src/test/test-hashmap.c b/src/test/test-hashmap.c
new file mode 100644
index 0000000..ee4c0e6
--- /dev/null
+++ b/src/test/test-hashmap.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "hashmap.h"
+#include "util.h"
+
+unsigned custom_counter = 0;
+static void custom_destruct(void* p) {
+ custom_counter--;
+ free(p);
+}
+
+DEFINE_HASH_OPS_FULL(boring_hash_ops, char, string_hash_func, string_compare_func, free, char, free);
+DEFINE_HASH_OPS_FULL(custom_hash_ops, char, string_hash_func, string_compare_func, custom_destruct, char, custom_destruct);
+
+void test_hashmap_funcs(void);
+void test_ordered_hashmap_funcs(void);
+
+static void test_ordered_hashmap_next(void) {
+ _cleanup_ordered_hashmap_free_ OrderedHashmap *m = NULL;
+ int i;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(m = ordered_hashmap_new(NULL));
+ for (i = -2; i <= 2; i++)
+ assert_se(ordered_hashmap_put(m, INT_TO_PTR(i), INT_TO_PTR(i+10)) == 1);
+ for (i = -2; i <= 1; i++)
+ assert_se(ordered_hashmap_next(m, INT_TO_PTR(i)) == INT_TO_PTR(i+11));
+ assert_se(!ordered_hashmap_next(m, INT_TO_PTR(2)));
+ assert_se(!ordered_hashmap_next(NULL, INT_TO_PTR(1)));
+ assert_se(!ordered_hashmap_next(m, INT_TO_PTR(3)));
+}
+
+typedef struct Item {
+ int seen;
+} Item;
+static void item_seen(Item *item) {
+ item->seen++;
+}
+
+static void test_hashmap_free_with_destructor(void) {
+ Hashmap *m;
+ struct Item items[4] = {};
+ unsigned i;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(m = hashmap_new(NULL));
+ for (i = 0; i < ELEMENTSOF(items) - 1; i++)
+ assert_se(hashmap_put(m, INT_TO_PTR(i), items + i) == 1);
+
+ m = hashmap_free_with_destructor(m, item_seen);
+ assert_se(items[0].seen == 1);
+ assert_se(items[1].seen == 1);
+ assert_se(items[2].seen == 1);
+ assert_se(items[3].seen == 0);
+}
+
+static void test_uint64_compare_func(void) {
+ const uint64_t a = 0x100, b = 0x101;
+
+ assert_se(uint64_compare_func(&a, &a) == 0);
+ assert_se(uint64_compare_func(&a, &b) == -1);
+ assert_se(uint64_compare_func(&b, &a) == 1);
+}
+
+static void test_trivial_compare_func(void) {
+ assert_se(trivial_compare_func(INT_TO_PTR('a'), INT_TO_PTR('a')) == 0);
+ assert_se(trivial_compare_func(INT_TO_PTR('a'), INT_TO_PTR('b')) == -1);
+ assert_se(trivial_compare_func(INT_TO_PTR('b'), INT_TO_PTR('a')) == 1);
+}
+
+static void test_string_compare_func(void) {
+ assert_se(string_compare_func("fred", "wilma") != 0);
+ assert_se(string_compare_func("fred", "fred") == 0);
+}
+
+static void compare_cache(Hashmap *map, IteratedCache *cache) {
+ const void **keys = NULL, **values = NULL;
+ unsigned num, idx;
+ Iterator iter;
+ void *k, *v;
+
+ assert_se(iterated_cache_get(cache, &keys, &values, &num) == 0);
+ assert_se(num == 0 || keys);
+ assert_se(num == 0 || values);
+
+ idx = 0;
+ HASHMAP_FOREACH_KEY(v, k, map, iter) {
+ assert_se(v == values[idx]);
+ assert_se(k == keys[idx]);
+
+ idx++;
+ }
+
+ assert_se(idx == num);
+}
+
+static void test_iterated_cache(void) {
+ Hashmap *m;
+ IteratedCache *c;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(m = hashmap_new(NULL));
+ assert_se(c = hashmap_iterated_cache_new(m));
+ compare_cache(m, c);
+
+ for (int stage = 0; stage < 100; stage++) {
+
+ for (int i = 0; i < 100; i++) {
+ int foo = stage * 1000 + i;
+
+ assert_se(hashmap_put(m, INT_TO_PTR(foo), INT_TO_PTR(foo + 777)) == 1);
+ }
+
+ compare_cache(m, c);
+
+ if (!(stage % 10)) {
+ for (int i = 0; i < 100; i++) {
+ int foo = stage * 1000 + i;
+
+ assert_se(hashmap_remove(m, INT_TO_PTR(foo)) == INT_TO_PTR(foo + 777));
+ }
+
+ compare_cache(m, c);
+ }
+ }
+
+ hashmap_clear(m);
+ compare_cache(m, c);
+
+ assert_se(hashmap_free(m) == NULL);
+ assert_se(iterated_cache_free(c) == NULL);
+}
+
+static void test_path_hashmap(void) {
+ _cleanup_hashmap_free_ Hashmap *h = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(h = hashmap_new(&path_hash_ops));
+
+ assert_se(hashmap_put(h, "foo", INT_TO_PTR(1)) >= 0);
+ assert_se(hashmap_put(h, "/foo", INT_TO_PTR(2)) >= 0);
+ assert_se(hashmap_put(h, "//foo", INT_TO_PTR(3)) == -EEXIST);
+ assert_se(hashmap_put(h, "//foox/", INT_TO_PTR(4)) >= 0);
+ assert_se(hashmap_put(h, "/foox////", INT_TO_PTR(5)) == -EEXIST);
+ assert_se(hashmap_put(h, "foo//////bar/quux//", INT_TO_PTR(6)) >= 0);
+ assert_se(hashmap_put(h, "foo/bar//quux/", INT_TO_PTR(8)) == -EEXIST);
+
+ assert_se(hashmap_get(h, "foo") == INT_TO_PTR(1));
+ assert_se(hashmap_get(h, "foo/") == INT_TO_PTR(1));
+ assert_se(hashmap_get(h, "foo////") == INT_TO_PTR(1));
+ assert_se(hashmap_get(h, "/foo") == INT_TO_PTR(2));
+ assert_se(hashmap_get(h, "//foo") == INT_TO_PTR(2));
+ assert_se(hashmap_get(h, "/////foo////") == INT_TO_PTR(2));
+ assert_se(hashmap_get(h, "/////foox////") == INT_TO_PTR(4));
+ assert_se(hashmap_get(h, "/foox/") == INT_TO_PTR(4));
+ assert_se(hashmap_get(h, "/foox") == INT_TO_PTR(4));
+ assert_se(!hashmap_get(h, "foox"));
+ assert_se(hashmap_get(h, "foo/bar/quux") == INT_TO_PTR(6));
+ assert_se(hashmap_get(h, "foo////bar////quux/////") == INT_TO_PTR(6));
+ assert_se(!hashmap_get(h, "/foo////bar////quux/////"));
+}
+
+int main(int argc, const char *argv[]) {
+ test_hashmap_funcs();
+ test_ordered_hashmap_funcs();
+
+ test_ordered_hashmap_next();
+ test_hashmap_free_with_destructor();
+ test_uint64_compare_func();
+ test_trivial_compare_func();
+ test_string_compare_func();
+ test_iterated_cache();
+ test_path_hashmap();
+
+ return 0;
+}
diff --git a/src/test/test-helper.c b/src/test/test-helper.c
new file mode 100644
index 0000000..5b79d12
--- /dev/null
+++ b/src/test/test-helper.c
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "test-helper.h"
+#include "random-util.h"
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "string-util.h"
+
+int enter_cgroup_subroot(void) {
+ _cleanup_free_ char *cgroup_root = NULL, *cgroup_subroot = NULL;
+ CGroupMask supported;
+ int r;
+
+ r = cg_pid_get_path(NULL, 0, &cgroup_root);
+ if (r == -ENOMEDIUM)
+ return log_warning_errno(r, "cg_pid_get_path(NULL, 0, ...) failed: %m");
+ assert(r >= 0);
+
+ assert_se(asprintf(&cgroup_subroot, "%s/%" PRIx64, cgroup_root, random_u64()) >= 0);
+ assert_se(cg_mask_supported(&supported) >= 0);
+
+ /* If this fails, then we don't mind as the later cgroup operations will fail too, and it's fine if we handle
+ * any errors at that point. */
+
+ r = cg_create_everywhere(supported, _CGROUP_MASK_ALL, cgroup_subroot);
+ if (r < 0)
+ return r;
+
+ return cg_attach_everywhere(supported, cgroup_subroot, 0, NULL, NULL);
+}
+
+/* https://docs.travis-ci.com/user/environment-variables#default-environment-variables */
+bool is_run_on_travis_ci(void) {
+ return streq_ptr(getenv("TRAVIS"), "true");
+}
diff --git a/src/test/test-helper.h b/src/test/test-helper.h
new file mode 100644
index 0000000..77af40d
--- /dev/null
+++ b/src/test/test-helper.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/***
+ Copyright © 2013 Holger Hans Peter Freyther
+***/
+
+#include "sd-daemon.h"
+
+#include "macro.h"
+
+#define TEST_REQ_RUNNING_SYSTEMD(x) \
+ if (sd_booted() > 0) { \
+ x; \
+ } else { \
+ printf("systemd not booted skipping '%s'\n", #x); \
+ }
+
+#define MANAGER_SKIP_TEST(r) \
+ IN_SET(r, \
+ -EPERM, \
+ -EACCES, \
+ -EADDRINUSE, \
+ -EHOSTDOWN, \
+ -ENOENT, \
+ -ENOMEDIUM /* cannot determine cgroup */ \
+ )
+
+int enter_cgroup_subroot(void);
+
+bool is_run_on_travis_ci(void);
diff --git a/src/test/test-hexdecoct.c b/src/test/test-hexdecoct.c
new file mode 100644
index 0000000..5221742
--- /dev/null
+++ b/src/test/test-hexdecoct.c
@@ -0,0 +1,355 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "string-util.h"
+
+static void test_hexchar(void) {
+ assert_se(hexchar(0xa) == 'a');
+ assert_se(hexchar(0x0) == '0');
+}
+
+static void test_unhexchar(void) {
+ assert_se(unhexchar('a') == 0xA);
+ assert_se(unhexchar('A') == 0xA);
+ assert_se(unhexchar('0') == 0x0);
+}
+
+static void test_base32hexchar(void) {
+ assert_se(base32hexchar(0) == '0');
+ assert_se(base32hexchar(9) == '9');
+ assert_se(base32hexchar(10) == 'A');
+ assert_se(base32hexchar(31) == 'V');
+}
+
+static void test_unbase32hexchar(void) {
+ assert_se(unbase32hexchar('0') == 0);
+ assert_se(unbase32hexchar('9') == 9);
+ assert_se(unbase32hexchar('A') == 10);
+ assert_se(unbase32hexchar('V') == 31);
+ assert_se(unbase32hexchar('=') == -EINVAL);
+}
+
+static void test_base64char(void) {
+ assert_se(base64char(0) == 'A');
+ assert_se(base64char(26) == 'a');
+ assert_se(base64char(63) == '/');
+}
+
+static void test_unbase64char(void) {
+ assert_se(unbase64char('A') == 0);
+ assert_se(unbase64char('Z') == 25);
+ assert_se(unbase64char('a') == 26);
+ assert_se(unbase64char('z') == 51);
+ assert_se(unbase64char('0') == 52);
+ assert_se(unbase64char('9') == 61);
+ assert_se(unbase64char('+') == 62);
+ assert_se(unbase64char('/') == 63);
+ assert_se(unbase64char('=') == -EINVAL);
+}
+
+static void test_octchar(void) {
+ assert_se(octchar(00) == '0');
+ assert_se(octchar(07) == '7');
+}
+
+static void test_unoctchar(void) {
+ assert_se(unoctchar('0') == 00);
+ assert_se(unoctchar('7') == 07);
+}
+
+static void test_decchar(void) {
+ assert_se(decchar(0) == '0');
+ assert_se(decchar(9) == '9');
+}
+
+static void test_undecchar(void) {
+ assert_se(undecchar('0') == 0);
+ assert_se(undecchar('9') == 9);
+}
+
+static void test_unhexmem_one(const char *s, size_t l, int retval) {
+ _cleanup_free_ char *hex = NULL;
+ _cleanup_free_ void *mem = NULL;
+ size_t len;
+
+ assert_se(unhexmem(s, l, &mem, &len) == retval);
+ if (retval == 0) {
+ char *answer;
+
+ if (l == (size_t) -1)
+ l = strlen(s);
+
+ assert_se(hex = hexmem(mem, len));
+ answer = strndupa(strempty(s), l);
+ assert_se(streq(delete_chars(answer, WHITESPACE), hex));
+ }
+}
+
+static void test_unhexmem(void) {
+ const char *hex = "efa2149213";
+ const char *hex_space = " e f a\n 2\r 14\n\r\t9\t2 \n1\r3 \r\r\t";
+ const char *hex_invalid = "efa214921o";
+
+ test_unhexmem_one(NULL, 0, 0);
+ test_unhexmem_one("", 0, 0);
+ test_unhexmem_one("", (size_t) -1, 0);
+ test_unhexmem_one(" \n \t\r \t\t \n\n\n", (size_t) -1, 0);
+ test_unhexmem_one(hex_invalid, strlen(hex_invalid), -EINVAL);
+ test_unhexmem_one(hex_invalid, (size_t) - 1, -EINVAL);
+ test_unhexmem_one(hex, strlen(hex) - 1, -EPIPE);
+ test_unhexmem_one(hex, strlen(hex), 0);
+ test_unhexmem_one(hex, (size_t) -1, 0);
+ test_unhexmem_one(hex_space, strlen(hex_space), 0);
+ test_unhexmem_one(hex_space, (size_t) -1, 0);
+}
+
+/* https://tools.ietf.org/html/rfc4648#section-10 */
+static void test_base32hexmem(void) {
+ char *b32;
+
+ b32 = base32hexmem("", STRLEN(""), true);
+ assert_se(b32);
+ assert_se(streq(b32, ""));
+ free(b32);
+
+ b32 = base32hexmem("f", STRLEN("f"), true);
+ assert_se(b32);
+ assert_se(streq(b32, "CO======"));
+ free(b32);
+
+ b32 = base32hexmem("fo", STRLEN("fo"), true);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNG===="));
+ free(b32);
+
+ b32 = base32hexmem("foo", STRLEN("foo"), true);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMU==="));
+ free(b32);
+
+ b32 = base32hexmem("foob", STRLEN("foob"), true);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMUOG="));
+ free(b32);
+
+ b32 = base32hexmem("fooba", STRLEN("fooba"), true);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMUOJ1"));
+ free(b32);
+
+ b32 = base32hexmem("foobar", STRLEN("foobar"), true);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMUOJ1E8======"));
+ free(b32);
+
+ b32 = base32hexmem("", STRLEN(""), false);
+ assert_se(b32);
+ assert_se(streq(b32, ""));
+ free(b32);
+
+ b32 = base32hexmem("f", STRLEN("f"), false);
+ assert_se(b32);
+ assert_se(streq(b32, "CO"));
+ free(b32);
+
+ b32 = base32hexmem("fo", STRLEN("fo"), false);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNG"));
+ free(b32);
+
+ b32 = base32hexmem("foo", STRLEN("foo"), false);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMU"));
+ free(b32);
+
+ b32 = base32hexmem("foob", STRLEN("foob"), false);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMUOG"));
+ free(b32);
+
+ b32 = base32hexmem("fooba", STRLEN("fooba"), false);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMUOJ1"));
+ free(b32);
+
+ b32 = base32hexmem("foobar", STRLEN("foobar"), false);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMUOJ1E8"));
+ free(b32);
+}
+
+static void test_unbase32hexmem_one(const char *hex, bool padding, int retval, const char *ans) {
+ _cleanup_free_ void *mem = NULL;
+ size_t len;
+
+ assert_se(unbase32hexmem(hex, (size_t) -1, padding, &mem, &len) == retval);
+ if (retval == 0) {
+ char *str;
+
+ str = strndupa(mem, len);
+ assert_se(streq(str, ans));
+ }
+}
+
+static void test_unbase32hexmem(void) {
+ test_unbase32hexmem_one("", true, 0, "");
+
+ test_unbase32hexmem_one("CO======", true, 0, "f");
+ test_unbase32hexmem_one("CPNG====", true, 0, "fo");
+ test_unbase32hexmem_one("CPNMU===", true, 0, "foo");
+ test_unbase32hexmem_one("CPNMUOG=", true, 0, "foob");
+ test_unbase32hexmem_one("CPNMUOJ1", true, 0, "fooba");
+ test_unbase32hexmem_one("CPNMUOJ1E8======", true, 0, "foobar");
+
+ test_unbase32hexmem_one("A", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("A=======", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAA=====", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAAAA==", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("AB======", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAB====", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAAB===", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAAAAB=", true, -EINVAL, NULL);
+
+ test_unbase32hexmem_one("XPNMUOJ1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CXNMUOJ1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPXMUOJ1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPNXUOJ1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPNMXOJ1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPNMUXJ1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPNMUOX1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPNMUOJX", true, -EINVAL, NULL);
+
+ test_unbase32hexmem_one("", false, 0, "");
+ test_unbase32hexmem_one("CO", false, 0, "f");
+ test_unbase32hexmem_one("CPNG", false, 0, "fo");
+ test_unbase32hexmem_one("CPNMU", false, 0, "foo");
+ test_unbase32hexmem_one("CPNMUOG", false, 0, "foob");
+ test_unbase32hexmem_one("CPNMUOJ1", false, 0, "fooba");
+ test_unbase32hexmem_one("CPNMUOJ1E8", false, 0, "foobar");
+ test_unbase32hexmem_one("CPNMUOG=", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPNMUOJ1E8======", false, -EINVAL, NULL);
+
+ test_unbase32hexmem_one("A", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAA", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAAAA", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("AB", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAB", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAAB", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAAAAB", false, -EINVAL, NULL);
+}
+
+/* https://tools.ietf.org/html/rfc4648#section-10 */
+static void test_base64mem(void) {
+ char *b64;
+
+ assert_se(base64mem("", STRLEN(""), &b64) == 0);
+ assert_se(streq(b64, ""));
+ free(b64);
+
+ assert_se(base64mem("f", STRLEN("f"), &b64) == 4);
+ assert_se(streq(b64, "Zg=="));
+ free(b64);
+
+ assert_se(base64mem("fo", STRLEN("fo"), &b64) == 4);
+ assert_se(streq(b64, "Zm8="));
+ free(b64);
+
+ assert_se(base64mem("foo", STRLEN("foo"), &b64) == 4);
+ assert_se(streq(b64, "Zm9v"));
+ free(b64);
+
+ assert_se(base64mem("foob", STRLEN("foob"), &b64) == 8);
+ assert_se(streq(b64, "Zm9vYg=="));
+ free(b64);
+
+ assert_se(base64mem("fooba", STRLEN("fooba"), &b64) == 8);
+ assert_se(streq(b64, "Zm9vYmE="));
+ free(b64);
+
+ assert_se(base64mem("foobar", STRLEN("foobar"), &b64) == 8);
+ assert_se(streq(b64, "Zm9vYmFy"));
+ free(b64);
+}
+
+static void test_unbase64mem_one(const char *input, const char *output, int ret) {
+ _cleanup_free_ void *buffer = NULL;
+ size_t size = 0;
+
+ assert_se(unbase64mem(input, (size_t) -1, &buffer, &size) == ret);
+
+ if (ret >= 0) {
+ assert_se(size == strlen(output));
+ assert_se(memcmp(buffer, output, size) == 0);
+ assert_se(((char*) buffer)[size] == 0);
+ }
+}
+
+static void test_unbase64mem(void) {
+
+ test_unbase64mem_one("", "", 0);
+ test_unbase64mem_one("Zg==", "f", 0);
+ test_unbase64mem_one("Zm8=", "fo", 0);
+ test_unbase64mem_one("Zm9v", "foo", 0);
+ test_unbase64mem_one("Zm9vYg==", "foob", 0);
+ test_unbase64mem_one("Zm9vYmE=", "fooba", 0);
+ test_unbase64mem_one("Zm9vYmFy", "foobar", 0);
+
+ test_unbase64mem_one(" ", "", 0);
+ test_unbase64mem_one(" \n\r ", "", 0);
+ test_unbase64mem_one(" Zg\n== ", "f", 0);
+ test_unbase64mem_one(" Zm 8=\r", "fo", 0);
+ test_unbase64mem_one(" Zm9\n\r\r\nv ", "foo", 0);
+ test_unbase64mem_one(" Z m9vYg==\n\r", "foob", 0);
+ test_unbase64mem_one(" Zm 9vYmE= ", "fooba", 0);
+ test_unbase64mem_one(" Z m9v YmFy ", "foobar", 0);
+
+ test_unbase64mem_one("A", NULL, -EPIPE);
+ test_unbase64mem_one("A====", NULL, -EINVAL);
+ test_unbase64mem_one("AAB==", NULL, -EINVAL);
+ test_unbase64mem_one(" A A A B = ", NULL, -EINVAL);
+ test_unbase64mem_one(" Z m 8 = q u u x ", NULL, -ENAMETOOLONG);
+}
+
+static void test_hexdump(void) {
+ uint8_t data[146];
+ unsigned i;
+
+ hexdump(stdout, NULL, 0);
+ hexdump(stdout, "", 0);
+ hexdump(stdout, "", 1);
+ hexdump(stdout, "x", 1);
+ hexdump(stdout, "x", 2);
+ hexdump(stdout, "foobar", 7);
+ hexdump(stdout, "f\nobar", 7);
+ hexdump(stdout, "xxxxxxxxxxxxxxxxxxxxyz", 23);
+
+ for (i = 0; i < ELEMENTSOF(data); i++)
+ data[i] = i*2;
+
+ hexdump(stdout, data, sizeof(data));
+}
+
+int main(int argc, char *argv[]) {
+ test_hexchar();
+ test_unhexchar();
+ test_base32hexchar();
+ test_unbase32hexchar();
+ test_base64char();
+ test_unbase64char();
+ test_octchar();
+ test_unoctchar();
+ test_decchar();
+ test_undecchar();
+ test_unhexmem();
+ test_base32hexmem();
+ test_unbase32hexmem();
+ test_base64mem();
+ test_unbase64mem();
+ test_hexdump();
+
+ return 0;
+}
diff --git a/src/test/test-hostname-util.c b/src/test/test-hostname-util.c
new file mode 100644
index 0000000..4126a24
--- /dev/null
+++ b/src/test/test-hostname-util.c
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void test_hostname_is_valid(void) {
+ assert_se(hostname_is_valid("foobar", false));
+ assert_se(hostname_is_valid("foobar.com", false));
+ assert_se(!hostname_is_valid("foobar.com.", false));
+ assert_se(hostname_is_valid("fooBAR", false));
+ assert_se(hostname_is_valid("fooBAR.com", false));
+ assert_se(!hostname_is_valid("fooBAR.", false));
+ assert_se(!hostname_is_valid("fooBAR.com.", false));
+ assert_se(!hostname_is_valid("fööbar", false));
+ assert_se(!hostname_is_valid("", false));
+ assert_se(!hostname_is_valid(".", false));
+ assert_se(!hostname_is_valid("..", false));
+ assert_se(!hostname_is_valid("foobar.", false));
+ assert_se(!hostname_is_valid(".foobar", false));
+ assert_se(!hostname_is_valid("foo..bar", false));
+ assert_se(!hostname_is_valid("foo.bar..", false));
+ assert_se(!hostname_is_valid("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", false));
+ assert_se(!hostname_is_valid("au-xph5-rvgrdsb5hcxc-47et3a5vvkrc-server-wyoz4elpdpe3.openstack.local", false));
+
+ assert_se(hostname_is_valid("foobar", true));
+ assert_se(hostname_is_valid("foobar.com", true));
+ assert_se(hostname_is_valid("foobar.com.", true));
+ assert_se(hostname_is_valid("fooBAR", true));
+ assert_se(hostname_is_valid("fooBAR.com", true));
+ assert_se(!hostname_is_valid("fooBAR.", true));
+ assert_se(hostname_is_valid("fooBAR.com.", true));
+ assert_se(!hostname_is_valid("fööbar", true));
+ assert_se(!hostname_is_valid("", true));
+ assert_se(!hostname_is_valid(".", true));
+ assert_se(!hostname_is_valid("..", true));
+ assert_se(!hostname_is_valid("foobar.", true));
+ assert_se(!hostname_is_valid(".foobar", true));
+ assert_se(!hostname_is_valid("foo..bar", true));
+ assert_se(!hostname_is_valid("foo.bar..", true));
+ assert_se(!hostname_is_valid("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true));
+}
+
+static void test_hostname_cleanup(void) {
+ char *s;
+
+ s = strdupa("foobar");
+ assert_se(streq(hostname_cleanup(s), "foobar"));
+ s = strdupa("foobar.com");
+ assert_se(streq(hostname_cleanup(s), "foobar.com"));
+ s = strdupa("foobar.com.");
+ assert_se(streq(hostname_cleanup(s), "foobar.com"));
+ s = strdupa("foo-bar.-com-.");
+ assert_se(streq(hostname_cleanup(s), "foo-bar.com"));
+ s = strdupa("foo-bar-.-com-.");
+ assert_se(streq(hostname_cleanup(s), "foo-bar--com"));
+ s = strdupa("--foo-bar.-com");
+ assert_se(streq(hostname_cleanup(s), "foo-bar.com"));
+ s = strdupa("fooBAR");
+ assert_se(streq(hostname_cleanup(s), "fooBAR"));
+ s = strdupa("fooBAR.com");
+ assert_se(streq(hostname_cleanup(s), "fooBAR.com"));
+ s = strdupa("fooBAR.");
+ assert_se(streq(hostname_cleanup(s), "fooBAR"));
+ s = strdupa("fooBAR.com.");
+ assert_se(streq(hostname_cleanup(s), "fooBAR.com"));
+ s = strdupa("fööbar");
+ assert_se(streq(hostname_cleanup(s), "fbar"));
+ s = strdupa("");
+ assert_se(isempty(hostname_cleanup(s)));
+ s = strdupa(".");
+ assert_se(isempty(hostname_cleanup(s)));
+ s = strdupa("..");
+ assert_se(isempty(hostname_cleanup(s)));
+ s = strdupa("foobar.");
+ assert_se(streq(hostname_cleanup(s), "foobar"));
+ s = strdupa(".foobar");
+ assert_se(streq(hostname_cleanup(s), "foobar"));
+ s = strdupa("foo..bar");
+ assert_se(streq(hostname_cleanup(s), "foo.bar"));
+ s = strdupa("foo.bar..");
+ assert_se(streq(hostname_cleanup(s), "foo.bar"));
+ s = strdupa("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
+ assert_se(streq(hostname_cleanup(s), "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"));
+ s = strdupa("xxxx........xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
+ assert_se(streq(hostname_cleanup(s), "xxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"));
+}
+
+static void test_read_etc_hostname(void) {
+ char path[] = "/tmp/hostname.XXXXXX";
+ char *hostname;
+ int fd;
+
+ fd = mkostemp_safe(path);
+ assert(fd > 0);
+ close(fd);
+
+ /* simple hostname */
+ assert_se(write_string_file(path, "foo", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_etc_hostname(path, &hostname) == 0);
+ assert_se(streq(hostname, "foo"));
+ hostname = mfree(hostname);
+
+ /* with comment */
+ assert_se(write_string_file(path, "# comment\nfoo", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_etc_hostname(path, &hostname) == 0);
+ assert_se(hostname);
+ assert_se(streq(hostname, "foo"));
+ hostname = mfree(hostname);
+
+ /* with comment and extra whitespace */
+ assert_se(write_string_file(path, "# comment\n\n foo ", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_etc_hostname(path, &hostname) == 0);
+ assert_se(hostname);
+ assert_se(streq(hostname, "foo"));
+ hostname = mfree(hostname);
+
+ /* cleans up name */
+ assert_se(write_string_file(path, "!foo/bar.com", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_etc_hostname(path, &hostname) == 0);
+ assert_se(hostname);
+ assert_se(streq(hostname, "foobar.com"));
+ hostname = mfree(hostname);
+
+ /* no value set */
+ hostname = (char*) 0x1234;
+ assert_se(write_string_file(path, "# nothing here\n", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_etc_hostname(path, &hostname) == -ENOENT);
+ assert_se(hostname == (char*) 0x1234); /* does not touch argument on error */
+
+ /* nonexisting file */
+ assert_se(read_etc_hostname("/non/existing", &hostname) == -ENOENT);
+ assert_se(hostname == (char*) 0x1234); /* does not touch argument on error */
+
+ unlink(path);
+}
+
+int main(int argc, char *argv[]) {
+ log_parse_environment();
+ log_open();
+
+ test_hostname_is_valid();
+ test_hostname_cleanup();
+ test_read_etc_hostname();
+
+ return 0;
+}
diff --git a/src/test/test-hostname.c b/src/test/test-hostname.c
new file mode 100644
index 0000000..710c057
--- /dev/null
+++ b/src/test/test-hostname.c
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "hostname-setup.h"
+#include "util.h"
+
+int main(int argc, char* argv[]) {
+ int r;
+
+ r = hostname_setup();
+ if (r < 0)
+ log_error_errno(r, "hostname: %m");
+
+ return 0;
+}
diff --git a/src/test/test-id128.c b/src/test/test-id128.c
new file mode 100644
index 0000000..ec50e05
--- /dev/null
+++ b/src/test/test-id128.c
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+
+#include "sd-daemon.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "id128-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#define ID128_WALDI SD_ID128_MAKE(01, 02, 03, 04, 05, 06, 07, 08, 09, 0a, 0b, 0c, 0d, 0e, 0f, 10)
+#define STR_WALDI "0102030405060708090a0b0c0d0e0f10"
+#define UUID_WALDI "01020304-0506-0708-090a-0b0c0d0e0f10"
+
+int main(int argc, char *argv[]) {
+ sd_id128_t id, id2;
+ char t[33], q[37];
+ _cleanup_free_ char *b = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert_se(sd_id128_randomize(&id) == 0);
+ printf("random: %s\n", sd_id128_to_string(id, t));
+
+ assert_se(sd_id128_from_string(t, &id2) == 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ if (sd_booted() > 0) {
+ assert_se(sd_id128_get_machine(&id) == 0);
+ printf("machine: %s\n", sd_id128_to_string(id, t));
+
+ assert_se(sd_id128_get_boot(&id) == 0);
+ printf("boot: %s\n", sd_id128_to_string(id, t));
+ }
+
+ printf("waldi: %s\n", sd_id128_to_string(ID128_WALDI, t));
+ assert_se(streq(t, STR_WALDI));
+
+ assert_se(asprintf(&b, SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(ID128_WALDI)) == 32);
+ printf("waldi2: %s\n", b);
+ assert_se(streq(t, b));
+
+ printf("waldi3: %s\n", id128_to_uuid_string(ID128_WALDI, q));
+ assert_se(streq(q, UUID_WALDI));
+
+ b = mfree(b);
+ assert_se(asprintf(&b, ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(ID128_WALDI)) == 36);
+ printf("waldi4: %s\n", b);
+ assert_se(streq(q, b));
+
+ assert_se(sd_id128_from_string(STR_WALDI, &id) >= 0);
+ assert_se(sd_id128_equal(id, ID128_WALDI));
+
+ assert_se(sd_id128_from_string(UUID_WALDI, &id) >= 0);
+ assert_se(sd_id128_equal(id, ID128_WALDI));
+
+ assert_se(sd_id128_from_string("", &id) < 0);
+ assert_se(sd_id128_from_string("01020304-0506-0708-090a-0b0c0d0e0f101", &id) < 0);
+ assert_se(sd_id128_from_string("01020304-0506-0708-090a-0b0c0d0e0f10-", &id) < 0);
+ assert_se(sd_id128_from_string("01020304-0506-0708-090a0b0c0d0e0f10", &id) < 0);
+ assert_se(sd_id128_from_string("010203040506-0708-090a-0b0c0d0e0f10", &id) < 0);
+
+ assert_se(id128_is_valid(STR_WALDI));
+ assert_se(id128_is_valid(UUID_WALDI));
+ assert_se(!id128_is_valid(""));
+ assert_se(!id128_is_valid("01020304-0506-0708-090a-0b0c0d0e0f101"));
+ assert_se(!id128_is_valid("01020304-0506-0708-090a-0b0c0d0e0f10-"));
+ assert_se(!id128_is_valid("01020304-0506-0708-090a0b0c0d0e0f10"));
+ assert_se(!id128_is_valid("010203040506-0708-090a-0b0c0d0e0f10"));
+
+ fd = open_tmpfile_unlinkable(NULL, O_RDWR|O_CLOEXEC);
+ assert_se(fd >= 0);
+
+ /* First, write as UUID */
+ assert_se(sd_id128_randomize(&id) >= 0);
+ assert_se(id128_write_fd(fd, ID128_UUID, id, false) >= 0);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_PLAIN, &id2) == -EINVAL);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_UUID, &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_ANY, &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ /* Second, write as plain */
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(ftruncate(fd, 0) >= 0);
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+ assert_se(id128_write_fd(fd, ID128_PLAIN, id, false) >= 0);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_UUID, &id2) == -EINVAL);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_PLAIN, &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_ANY, &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ /* Third, write plain without trailing newline */
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(ftruncate(fd, 0) >= 0);
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+ assert_se(write(fd, sd_id128_to_string(id, t), 32) == 32);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_UUID, &id2) == -EINVAL);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_PLAIN, &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ /* Third, write UUID without trailing newline */
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(ftruncate(fd, 0) >= 0);
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+ assert_se(write(fd, id128_to_uuid_string(id, q), 36) == 36);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_PLAIN, &id2) == -EINVAL);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_UUID, &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ r = sd_id128_get_machine_app_specific(SD_ID128_MAKE(f0,3d,aa,eb,1c,33,4b,43,a7,32,17,29,44,bf,77,2e), &id);
+ if (r == -EOPNOTSUPP)
+ log_info("khash not supported on this kernel, skipping sd_id128_get_machine_app_specific() checks");
+ else {
+ assert_se(r >= 0);
+ assert_se(sd_id128_get_machine_app_specific(SD_ID128_MAKE(f0,3d,aa,eb,1c,33,4b,43,a7,32,17,29,44,bf,77,2e), &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+ assert_se(sd_id128_get_machine_app_specific(SD_ID128_MAKE(51,df,0b,4b,c3,b0,4c,97,80,e2,99,b9,8c,a3,73,b8), &id2) >= 0);
+ assert_se(!sd_id128_equal(id, id2));
+ }
+
+ /* Query the invocation ID */
+ r = sd_id128_get_invocation(&id);
+ if (r < 0)
+ log_warning_errno(r, "Failed to get invocation ID, ignoring: %m");
+ else
+ log_info("Invocation ID: " SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id));
+
+ return 0;
+}
diff --git a/src/test/test-in-addr-util.c b/src/test/test-in-addr-util.c
new file mode 100644
index 0000000..16844e9
--- /dev/null
+++ b/src/test/test-in-addr-util.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/in.h>
+
+#include "log.h"
+#include "in-addr-util.h"
+
+static void test_in_addr_prefix_from_string(
+ const char *p,
+ int family,
+ int ret,
+ const union in_addr_union *u,
+ unsigned char prefixlen,
+ int ret_refuse,
+ unsigned char prefixlen_refuse,
+ int ret_legacy,
+ unsigned char prefixlen_legacy) {
+
+ union in_addr_union q;
+ unsigned char l;
+ int f, r;
+
+ r = in_addr_prefix_from_string(p, family, &q, &l);
+ assert_se(r == ret);
+
+ if (r < 0)
+ return;
+
+ assert_se(in_addr_equal(family, &q, u));
+ assert_se(l == prefixlen);
+
+ r = in_addr_prefix_from_string_auto(p, &f, &q, &l);
+ assert_se(r >= 0);
+
+ assert_se(f == family);
+ assert_se(in_addr_equal(family, &q, u));
+ assert_se(l == prefixlen);
+
+ r = in_addr_prefix_from_string_auto_internal(p, PREFIXLEN_REFUSE, &f, &q, &l);
+ assert_se(r == ret_refuse);
+
+ if (r >= 0) {
+ assert_se(f == family);
+ assert_se(in_addr_equal(family, &q, u));
+ assert_se(l == prefixlen_refuse);
+ }
+
+ r = in_addr_prefix_from_string_auto_internal(p, PREFIXLEN_LEGACY, &f, &q, &l);
+ assert_se(r == ret_legacy);
+
+ if (r >= 0) {
+ assert_se(f == family);
+ assert_se(in_addr_equal(family, &q, u));
+ assert_se(l == prefixlen_legacy);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_in_addr_prefix_from_string("", AF_INET, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+ test_in_addr_prefix_from_string("/", AF_INET, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+ test_in_addr_prefix_from_string("/8", AF_INET, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+ test_in_addr_prefix_from_string("1.2.3.4", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32, -ENOANO, 0, 0, 8);
+ test_in_addr_prefix_from_string("1.2.3.4/0", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 0, 0, 0, 0, 0);
+ test_in_addr_prefix_from_string("1.2.3.4/1", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 1, 0, 1, 0, 1);
+ test_in_addr_prefix_from_string("1.2.3.4/2", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 2, 0, 2, 0, 2);
+ test_in_addr_prefix_from_string("1.2.3.4/32", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32, 0, 32, 0, 32);
+ test_in_addr_prefix_from_string("1.2.3.4/33", AF_INET, -ERANGE, NULL, 0, -ERANGE, 0, -ERANGE, 0);
+ test_in_addr_prefix_from_string("1.2.3.4/-1", AF_INET, -ERANGE, NULL, 0, -ERANGE, 0, -ERANGE, 0);
+ test_in_addr_prefix_from_string("::1", AF_INET, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+
+ test_in_addr_prefix_from_string("", AF_INET6, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+ test_in_addr_prefix_from_string("/", AF_INET6, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+ test_in_addr_prefix_from_string("/8", AF_INET6, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+ test_in_addr_prefix_from_string("::1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128, -ENOANO, 0, 0, 0);
+ test_in_addr_prefix_from_string("::1/0", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 0, 0, 0, 0, 0);
+ test_in_addr_prefix_from_string("::1/1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 1, 0, 1, 0, 1);
+ test_in_addr_prefix_from_string("::1/2", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 2, 0, 2, 0, 2);
+ test_in_addr_prefix_from_string("::1/32", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 32, 0, 32, 0, 32);
+ test_in_addr_prefix_from_string("::1/33", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 33, 0, 33, 0, 33);
+ test_in_addr_prefix_from_string("::1/64", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 64, 0, 64, 0, 64);
+ test_in_addr_prefix_from_string("::1/128", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128, 0, 128, 0, 128);
+ test_in_addr_prefix_from_string("::1/129", AF_INET6, -ERANGE, NULL, 0, -ERANGE, 0, -ERANGE, 0);
+ test_in_addr_prefix_from_string("::1/-1", AF_INET6, -ERANGE, NULL, 0, -ERANGE, 0, -ERANGE, 0);
+
+ return 0;
+}
diff --git a/src/test/test-install-root.c b/src/test/test-install-root.c
new file mode 100644
index 0000000..73ea68f
--- /dev/null
+++ b/src/test/test-install-root.c
@@ -0,0 +1,1082 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "install.h"
+#include "mkdir.h"
+#include "rm-rf.h"
+#include "special.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_basic_mask_and_enable(const char *root) {
+ const char *p;
+ UnitFileState state;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", NULL) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/a.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", NULL) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/b.service");
+ assert_se(symlink("a.service", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", NULL) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/c.service");
+ assert_se(symlink("/usr/lib/systemd/system/a.service", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", NULL) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/d.service");
+ assert_se(symlink("c.service", p) >= 0);
+
+ /* This one is interesting, as d follows a relative, then an absolute symlink */
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", NULL) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_mask(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/dev/null"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/a.service");
+ assert_se(streq(changes[0].path, p));
+
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", &state) >= 0 && state == UNIT_FILE_MASKED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", &state) >= 0 && state == UNIT_FILE_MASKED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", &state) >= 0 && state == UNIT_FILE_MASKED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_MASKED);
+
+ /* Enabling a masked unit should fail! */
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) == -ERFKILL);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_unmask(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/a.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) == 1);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/a.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/a.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+
+ /* Enabling it again should succeed but be a NOP */
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 0);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/a.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ /* Disabling a disabled unit must suceed but be a NOP */
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 0);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ /* Let's enable this indirectly via a symlink */
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("d.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/a.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/a.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+
+ /* Let's try to reenable */
+
+ assert_se(unit_file_reenable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("b.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/a.service");
+ assert_se(streq(changes[0].path, p));
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[1].source, "/usr/lib/systemd/system/a.service"));
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+}
+
+static void test_linked_units(const char *root) {
+ const char *p, *q;
+ UnitFileState state;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0, i;
+
+ /*
+ * We'll test three cases here:
+ *
+ * a) a unit file in /opt, that we use "systemctl link" and
+ * "systemctl enable" on to make it available to the system
+ *
+ * b) a unit file in /opt, that is statically linked into
+ * /usr/lib/systemd/system, that "enable" should work on
+ * correctly.
+ *
+ * c) a unit file in /opt, that is linked into
+ * /etc/systemd/system, and where "enable" should result in
+ * -ELOOP, since using information from /etc to generate
+ * information in /etc should not be allowed.
+ */
+
+ p = strjoina(root, "/opt/linked.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/opt/linked2.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/opt/linked3.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked2.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked3.service", NULL) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/linked2.service");
+ assert_se(symlink("/opt/linked2.service", p) >= 0);
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/linked3.service");
+ assert_se(symlink("/opt/linked3.service", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked2.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked3.service", &state) >= 0 && state == UNIT_FILE_LINKED);
+
+ /* First, let's link the unit into the search path */
+ assert_se(unit_file_link(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("/opt/linked.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/opt/linked.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/linked.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", &state) >= 0 && state == UNIT_FILE_LINKED);
+
+ /* Let's unlink it from the search path again */
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("linked.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/linked.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", NULL) == -ENOENT);
+
+ /* Now, let's not just link it, but also enable it */
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("/opt/linked.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 2);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/linked.service");
+ q = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/linked.service");
+ for (i = 0 ; i < n_changes; i++) {
+ assert_se(changes[i].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[i].source, "/opt/linked.service"));
+
+ if (p && streq(changes[i].path, p))
+ p = NULL;
+ else if (q && streq(changes[i].path, q))
+ q = NULL;
+ else
+ assert_not_reached("wut?");
+ }
+ assert(!p && !q);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+
+ /* And let's unlink it again */
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("linked.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 2);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/linked.service");
+ q = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/linked.service");
+ for (i = 0; i < n_changes; i++) {
+ assert_se(changes[i].type == UNIT_FILE_UNLINK);
+
+ if (p && streq(changes[i].path, p))
+ p = NULL;
+ else if (q && streq(changes[i].path, q))
+ q = NULL;
+ else
+ assert_not_reached("wut?");
+ }
+ assert(!p && !q);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", NULL) == -ENOENT);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("linked2.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 2);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/linked2.service");
+ q = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/linked2.service");
+ for (i = 0 ; i < n_changes; i++) {
+ assert_se(changes[i].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[i].source, "/opt/linked2.service"));
+
+ if (p && streq(changes[i].path, p))
+ p = NULL;
+ else if (q && streq(changes[i].path, q))
+ q = NULL;
+ else
+ assert_not_reached("wut?");
+ }
+ assert(!p && !q);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("linked3.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(startswith(changes[0].path, root));
+ assert_se(endswith(changes[0].path, "linked3.service"));
+ assert_se(streq(changes[0].source, "/opt/linked3.service"));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+}
+
+static void test_default(const char *root) {
+ _cleanup_free_ char *def = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ const char *p;
+
+ p = strjoina(root, "/usr/lib/systemd/system/test-default-real.target");
+ assert_se(write_string_file(p, "# pretty much empty", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/test-default.target");
+ assert_se(symlink("test-default-real.target", p) >= 0);
+
+ assert_se(unit_file_get_default(UNIT_FILE_SYSTEM, root, &def) == -ENOENT);
+
+ assert_se(unit_file_set_default(UNIT_FILE_SYSTEM, 0, root, "idontexist.target", &changes, &n_changes) == -ENOENT);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == -ENOENT);
+ assert_se(streq_ptr(changes[0].path, "idontexist.target"));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_default(UNIT_FILE_SYSTEM, root, &def) == -ENOENT);
+
+ assert_se(unit_file_set_default(UNIT_FILE_SYSTEM, 0, root, "test-default.target", &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/test-default-real.target"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH "/" SPECIAL_DEFAULT_TARGET);
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_default(UNIT_FILE_SYSTEM, root, &def) >= 0);
+ assert_se(streq_ptr(def, "test-default-real.target"));
+}
+
+static void test_add_dependency(const char *root) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ const char *p;
+
+ p = strjoina(root, "/usr/lib/systemd/system/real-add-dependency-test-target.target");
+ assert_se(write_string_file(p, "# pretty much empty", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/add-dependency-test-target.target");
+ assert_se(symlink("real-add-dependency-test-target.target", p) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/real-add-dependency-test-service.service");
+ assert_se(write_string_file(p, "# pretty much empty", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/add-dependency-test-service.service");
+ assert_se(symlink("real-add-dependency-test-service.service", p) >= 0);
+
+ assert_se(unit_file_add_dependency(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("add-dependency-test-service.service"), "add-dependency-test-target.target", UNIT_WANTS, &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/real-add-dependency-test-service.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/real-add-dependency-test-target.target.wants/real-add-dependency-test-service.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+}
+
+static void test_template_enable(const char *root) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ UnitFileState state;
+ const char *p;
+
+ log_info("== %s ==", __func__);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/template@.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "DefaultInstance=def\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/template-symlink@.service");
+ assert_se(symlink("template@.service", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ log_info("== %s with template@.service enabled ==", __func__);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template@.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/template@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/template@def.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template@.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ log_info("== %s with template@foo.service enabled ==", __func__);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template@foo.service"), &changes, &n_changes) >= 0);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/template@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/template@foo.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template@foo.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@quux.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@quux.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ log_info("== %s with template-symlink@quux.service enabled ==", __func__);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template-symlink@quux.service"), &changes, &n_changes) >= 0);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/template@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/template@quux.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@quux.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@quux.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+}
+
+static void test_indirect(const char *root) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ UnitFileState state;
+ const char *p;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirecta.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectb.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectc.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/indirecta.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "Also=indirectb.service\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/indirectb.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/indirectc.service");
+ assert_se(symlink("indirecta.service", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirecta.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectb.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectc.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("indirectc.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/indirectb.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/indirectb.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirecta.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectb.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectc.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
+
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("indirectc.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/indirectb.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+}
+
+static void test_preset_and_list(const char *root) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0, i;
+ const char *p, *q;
+ UnitFileState state;
+ bool got_yes = false, got_no = false;
+ Iterator j;
+ UnitFileList *fl;
+ Hashmap *h;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/preset-yes.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/preset-no.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system-preset/test.preset");
+ assert_se(write_string_file(p,
+ "enable *-yes.*\n"
+ "disable *\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("preset-yes.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/preset-yes.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/preset-yes.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("preset-yes.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/preset-yes.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("preset-no.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+ assert_se(n_changes == 0);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_preset_all(UNIT_FILE_SYSTEM, 0, root, UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+
+ assert_se(n_changes > 0);
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/preset-yes.service");
+
+ for (i = 0; i < n_changes; i++) {
+
+ if (changes[i].type == UNIT_FILE_SYMLINK) {
+ assert_se(streq(changes[i].source, "/usr/lib/systemd/system/preset-yes.service"));
+ assert_se(streq(changes[i].path, p));
+ } else
+ assert_se(changes[i].type == UNIT_FILE_UNLINK);
+ }
+
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(h = hashmap_new(&string_hash_ops));
+ assert_se(unit_file_get_list(UNIT_FILE_SYSTEM, root, h, NULL, NULL) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/preset-yes.service");
+ q = strjoina(root, "/usr/lib/systemd/system/preset-no.service");
+
+ HASHMAP_FOREACH(fl, h, j) {
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, basename(fl->path), &state) >= 0);
+ assert_se(fl->state == state);
+
+ if (streq(fl->path, p)) {
+ got_yes = true;
+ assert_se(fl->state == UNIT_FILE_ENABLED);
+ } else if (streq(fl->path, q)) {
+ got_no = true;
+ assert_se(fl->state == UNIT_FILE_DISABLED);
+ } else
+ assert_se(IN_SET(fl->state, UNIT_FILE_DISABLED, UNIT_FILE_STATIC, UNIT_FILE_INDIRECT));
+ }
+
+ unit_file_list_free(h);
+
+ assert_se(got_yes && got_no);
+}
+
+static void test_revert(const char *root) {
+ const char *p;
+ UnitFileState state;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+
+ assert(root);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "xx.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "yy.service", NULL) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/xx.service");
+ assert_se(write_string_file(p, "# Empty\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "xx.service", NULL) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "xx.service", &state) >= 0 && state == UNIT_FILE_STATIC);
+
+ /* Initially there's nothing to revert */
+ assert_se(unit_file_revert(UNIT_FILE_SYSTEM, root, STRV_MAKE("xx.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 0);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/xx.service");
+ assert_se(write_string_file(p, "# Empty override\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ /* Revert the override file */
+ assert_se(unit_file_revert(UNIT_FILE_SYSTEM, root, STRV_MAKE("xx.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/xx.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p, "# Empty dropin\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ /* Revert the dropin file */
+ assert_se(unit_file_revert(UNIT_FILE_SYSTEM, root, STRV_MAKE("xx.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ assert_se(streq(changes[0].path, p));
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/xx.service.d");
+ assert_se(changes[1].type == UNIT_FILE_UNLINK);
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+}
+
+static void test_preset_order(const char *root) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ const char *p;
+ UnitFileState state;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-1.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-2.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/prefix-1.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/prefix-2.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system-preset/test.preset");
+ assert_se(write_string_file(p,
+ "enable prefix-1.service\n"
+ "disable prefix-*.service\n"
+ "enable prefix-2.service\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-1.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-2.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("prefix-1.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/prefix-1.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/prefix-1.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-2.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("prefix-2.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+ assert_se(n_changes == 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-2.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+}
+
+static void test_static_instance(const char *root) {
+ UnitFileState state;
+ const char *p;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@foo.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/static-instance@.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/static-instance@foo.service");
+ assert_se(symlink("static-instance@.service", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@foo.service", &state) >= 0 && state == UNIT_FILE_STATIC);
+}
+
+static void test_with_dropin(const char *root) {
+ const char *p;
+ UnitFileState state;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-1.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-4a.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-4b.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-1.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-1.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=graphical.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-1.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/with-dropin-2.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-2.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=graphical.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-3.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/with-dropin-3.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=graphical.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-4a.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/with-dropin-4a.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "Also=with-dropin-4b.service\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-4a.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-4b.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-4b.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-1.service"), &changes, &n_changes) == 1);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-1.service"));
+ assert_se(streq(changes[1].source, "/usr/lib/systemd/system/with-dropin-1.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/with-dropin-1.service");
+ assert_se(streq(changes[0].path, p));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/graphical.target.wants/with-dropin-1.service");
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-2.service"), &changes, &n_changes) == 1);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, SYSTEM_CONFIG_UNIT_PATH"/with-dropin-2.service"));
+ assert_se(streq(changes[1].source, SYSTEM_CONFIG_UNIT_PATH"/with-dropin-2.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/with-dropin-2.service");
+ assert_se(streq(changes[0].path, p));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/graphical.target.wants/with-dropin-2.service");
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-3.service"), &changes, &n_changes) == 1);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-3.service"));
+ assert_se(streq(changes[1].source, "/usr/lib/systemd/system/with-dropin-3.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/with-dropin-3.service");
+ assert_se(streq(changes[0].path, p));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/graphical.target.wants/with-dropin-3.service");
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-4a.service"), &changes, &n_changes) == 2);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-4a.service"));
+ assert_se(streq(changes[1].source, "/usr/lib/systemd/system/with-dropin-4b.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/with-dropin-4a.service");
+ assert_se(streq(changes[0].path, p));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/with-dropin-4b.service");
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-4a.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-4b.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+}
+
+static void test_with_dropin_template(const char *root) {
+ const char *p;
+ UnitFileState state;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-1@.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2@.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3@.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-1@.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-1@.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=graphical.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-1@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-2@.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-2@instance-1.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=graphical.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-3@.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "DefaultInstance=instance-1\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-3@.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "DefaultInstance=instance-2\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-1@instance-1.service"), &changes, &n_changes) == 1);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-1@.service"));
+ assert_se(streq(changes[1].source, "/usr/lib/systemd/system/with-dropin-1@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/with-dropin-1@instance-1.service");
+ assert_se(streq(changes[0].path, p));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/graphical.target.wants/with-dropin-1@instance-1.service");
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-2@instance-1.service"), &changes, &n_changes) == 1);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-2@.service"));
+ assert_se(streq(changes[1].source, "/usr/lib/systemd/system/with-dropin-2@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/with-dropin-2@instance-1.service");
+ assert_se(streq(changes[0].path, p));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/graphical.target.wants/with-dropin-2@instance-1.service");
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-2@instance-2.service"), &changes, &n_changes) == 1);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-2@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/with-dropin-2@instance-2.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-3@.service"), &changes, &n_changes) == 1);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-3@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/with-dropin-3@instance-2.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-1@instance-1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2@instance-1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2@instance-2.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3@instance-1.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3@instance-2.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+}
+
+static void test_preset_multiple_instances(const char *root) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ const char *p;
+ UnitFileState state;
+
+ /* Set up template service files and preset file */
+ p = strjoina(root, "/usr/lib/systemd/system/foo@.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "DefaultInstance=def\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system-preset/test.preset");
+ assert_se(write_string_file(p,
+ "enable foo@.service bar0 bar1 bartest\n"
+ "enable emptylist@.service\n" /* This line ensures the old functionality for templated unit still works */
+ "disable *\n" , WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bar0.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ /* Preset a single instantiated unit specified in the list */
+ assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("foo@bar0.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bar0.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/foo@bar0.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("foo@bar0.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/foo@bar0.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ /* Check for preset-all case, only instances on the list should be enabled, not including the default instance */
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bar1.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bartest.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_preset_all(UNIT_FILE_SYSTEM, 0, root, UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+ assert_se(n_changes > 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bar0.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bar1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bartest.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+
+ unit_file_changes_free(changes, n_changes);
+}
+
+int main(int argc, char *argv[]) {
+ char root[] = "/tmp/rootXXXXXX";
+ const char *p;
+
+ assert_se(mkdtemp(root));
+
+ p = strjoina(root, "/usr/lib/systemd/system/");
+ assert_se(mkdir_p(p, 0755) >= 0);
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/");
+ assert_se(mkdir_p(p, 0755) >= 0);
+
+ p = strjoina(root, "/run/systemd/system/");
+ assert_se(mkdir_p(p, 0755) >= 0);
+
+ p = strjoina(root, "/opt/");
+ assert_se(mkdir_p(p, 0755) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system-preset/");
+ assert_se(mkdir_p(p, 0755) >= 0);
+
+ test_basic_mask_and_enable(root);
+ test_linked_units(root);
+ test_default(root);
+ test_add_dependency(root);
+ test_template_enable(root);
+ test_indirect(root);
+ test_preset_and_list(root);
+ test_preset_order(root);
+ test_preset_multiple_instances(root);
+ test_revert(root);
+ test_static_instance(root);
+ test_with_dropin(root);
+ test_with_dropin_template(root);
+
+ assert_se(rm_rf(root, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+
+ return 0;
+}
diff --git a/src/test/test-install.c b/src/test/test-install.c
new file mode 100644
index 0000000..62daacc
--- /dev/null
+++ b/src/test/test-install.c
@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "install.h"
+#include "tests.h"
+
+static void dump_changes(UnitFileChange *c, unsigned n) {
+ unsigned i;
+
+ assert_se(n == 0 || c);
+
+ for (i = 0; i < n; i++) {
+ if (c[i].type == UNIT_FILE_UNLINK)
+ printf("rm '%s'\n", c[i].path);
+ else if (c[i].type == UNIT_FILE_SYMLINK)
+ printf("ln -s '%s' '%s'\n", c[i].source, c[i].path);
+ }
+}
+
+int main(int argc, char* argv[]) {
+ Hashmap *h;
+ UnitFileList *p;
+ Iterator i;
+ int r;
+ const char *const files[] = { "avahi-daemon.service", NULL };
+ const char *const files2[] = { "/home/lennart/test.service", NULL };
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ UnitFileState state = 0;
+
+ test_setup_logging(LOG_DEBUG);
+
+ h = hashmap_new(&string_hash_ops);
+ r = unit_file_get_list(UNIT_FILE_SYSTEM, NULL, h, NULL, NULL);
+ assert_se(r == 0);
+
+ HASHMAP_FOREACH(p, h, i) {
+ UnitFileState s = _UNIT_FILE_STATE_INVALID;
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(p->path), &s);
+
+ assert_se((r < 0 && p->state == UNIT_FILE_BAD) ||
+ (p->state == s));
+
+ fprintf(stderr, "%s (%s)\n",
+ p->path,
+ unit_file_state_to_string(p->state));
+ }
+
+ unit_file_list_free(h);
+
+ log_info("/*** enable **/");
+
+ r = unit_file_enable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ log_info("/*** enable2 **/");
+
+ r = unit_file_enable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_ENABLED);
+
+ log_info("/*** disable ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_DISABLED);
+
+ log_info("/*** mask ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_mask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+ log_info("/*** mask2 ***/");
+ r = unit_file_mask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_MASKED);
+
+ log_info("/*** unmask ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_unmask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+ log_info("/*** unmask2 ***/");
+ r = unit_file_unmask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_DISABLED);
+
+ log_info("/*** mask ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_mask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_MASKED);
+
+ log_info("/*** disable ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+ log_info("/*** disable2 ***/");
+ r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_MASKED);
+
+ log_info("/*** umask ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_unmask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_DISABLED);
+
+ log_info("/*** enable files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_enable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files2, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_ENABLED);
+
+ log_info("/*** disable files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, STRV_MAKE(basename(files2[0])), &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r < 0);
+
+ log_info("/*** link files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_link(UNIT_FILE_SYSTEM, 0, NULL, (char**) files2, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_LINKED);
+
+ log_info("/*** disable files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, STRV_MAKE(basename(files2[0])), &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r < 0);
+
+ log_info("/*** link files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_link(UNIT_FILE_SYSTEM, 0, NULL, (char**) files2, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_LINKED);
+
+ log_info("/*** reenable files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_reenable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files2, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_ENABLED);
+
+ log_info("/*** disable files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, STRV_MAKE(basename(files2[0])), &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r < 0);
+ log_info("/*** preset files ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_preset(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, UNIT_FILE_PRESET_FULL, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files[0]), &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_ENABLED);
+
+ return 0;
+}
diff --git a/src/test/test-io-util.c b/src/test/test-io-util.c
new file mode 100644
index 0000000..42e632a
--- /dev/null
+++ b/src/test/test-io-util.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "macro.h"
+
+static void test_sparse_write_one(int fd, const char *buffer, size_t n) {
+ char check[n];
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(ftruncate(fd, 0) >= 0);
+ assert_se(sparse_write(fd, buffer, n, 4) == (ssize_t) n);
+
+ assert_se(lseek(fd, 0, SEEK_CUR) == (off_t) n);
+ assert_se(ftruncate(fd, n) >= 0);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(read(fd, check, n) == (ssize_t) n);
+
+ assert_se(memcmp(buffer, check, n) == 0);
+}
+
+static void test_sparse_write(void) {
+ const char test_a[] = "test";
+ const char test_b[] = "\0\0\0\0test\0\0\0\0";
+ const char test_c[] = "\0\0test\0\0\0\0";
+ const char test_d[] = "\0\0test\0\0\0test\0\0\0\0test\0\0\0\0\0test\0\0\0test\0\0\0\0test\0\0\0\0\0\0\0\0";
+ const char test_e[] = "test\0\0\0\0test";
+ _cleanup_close_ int fd = -1;
+ char fn[] = "/tmp/sparseXXXXXX";
+
+ fd = mkostemp(fn, O_CLOEXEC);
+ assert_se(fd >= 0);
+ unlink(fn);
+
+ test_sparse_write_one(fd, test_a, sizeof(test_a));
+ test_sparse_write_one(fd, test_b, sizeof(test_b));
+ test_sparse_write_one(fd, test_c, sizeof(test_c));
+ test_sparse_write_one(fd, test_d, sizeof(test_d));
+ test_sparse_write_one(fd, test_e, sizeof(test_e));
+}
+
+int main(void) {
+ test_sparse_write();
+
+ return 0;
+}
diff --git a/src/test/test-ip-protocol-list.c b/src/test/test-ip-protocol-list.c
new file mode 100644
index 0000000..79390e5
--- /dev/null
+++ b/src/test/test-ip-protocol-list.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/in.h>
+
+#include "macro.h"
+#include "ip-protocol-list.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+static void test_int(int i) {
+ char str[DECIMAL_STR_MAX(int)];
+
+ assert_se(ip_protocol_from_name(ip_protocol_to_name(i)) == i);
+
+ xsprintf(str, "%i", i);
+ assert_se(ip_protocol_from_name(ip_protocol_to_name(parse_ip_protocol(str))) == i);
+}
+
+static void test_int_fail(int i) {
+ char str[DECIMAL_STR_MAX(int)];
+
+ assert_se(!ip_protocol_to_name(i));
+
+ xsprintf(str, "%i", i);
+ assert_se(parse_ip_protocol(str) == -EINVAL);
+}
+
+static void test_str(const char *s) {
+ assert_se(streq(ip_protocol_to_name(ip_protocol_from_name(s)), s));
+ assert_se(streq(ip_protocol_to_name(parse_ip_protocol(s)), s));
+}
+
+static void test_str_fail(const char *s) {
+ assert_se(ip_protocol_from_name(s) == -EINVAL);
+ assert_se(parse_ip_protocol(s) == -EINVAL);
+}
+
+static void test_parse_ip_protocol(const char *s, int expected) {
+ assert_se(parse_ip_protocol(s) == expected);
+}
+
+int main(int argc, const char *argv[]) {
+ test_int(IPPROTO_TCP);
+ test_int(IPPROTO_DCCP);
+ test_int_fail(-1);
+ test_int_fail(1024 * 1024);
+
+ test_str("sctp");
+ test_str("udp");
+ test_str_fail("hoge");
+ test_str_fail("-1");
+ test_str_fail("1000000000");
+
+ test_parse_ip_protocol("sctp", IPPROTO_SCTP);
+ test_parse_ip_protocol("ScTp", IPPROTO_SCTP);
+ test_parse_ip_protocol("ip", IPPROTO_IP);
+ test_parse_ip_protocol("", IPPROTO_IP);
+ test_parse_ip_protocol("1", 1);
+ test_parse_ip_protocol("0", 0);
+ test_parse_ip_protocol("-10", -EINVAL);
+ test_parse_ip_protocol("100000000", -EINVAL);
+
+ return 0;
+}
diff --git a/src/test/test-ipcrm.c b/src/test/test-ipcrm.c
new file mode 100644
index 0000000..4b658a0
--- /dev/null
+++ b/src/test/test-ipcrm.c
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "clean-ipc.h"
+#include "user-util.h"
+#include "tests.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ uid_t uid;
+ int r;
+ const char* name = argv[1] ?: NOBODY_USER_NAME;
+
+ test_setup_logging(LOG_INFO);
+
+ r = get_user_creds(&name, &uid, NULL, NULL, NULL, 0);
+ if (r == -ESRCH)
+ return log_tests_skipped("Failed to resolve user");
+ if (r < 0) {
+ log_error_errno(r, "Failed to resolve \"%s\": %m", name);
+ return EXIT_FAILURE;
+ }
+
+ r = clean_ipc_by_uid(uid);
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/test/test-job-type.c b/src/test/test-job-type.c
new file mode 100644
index 0000000..d51e0d9
--- /dev/null
+++ b/src/test/test-job-type.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+
+#include "job.h"
+#include "service.h"
+#include "unit.h"
+
+int main(int argc, char *argv[]) {
+ JobType a, b, c, ab, bc, ab_c, bc_a, a_bc;
+ const ServiceState test_states[] = { SERVICE_DEAD, SERVICE_RUNNING };
+ unsigned i;
+ bool merged_ab;
+
+ /* fake a unit */
+ static Service s = {
+ .meta.load_state = UNIT_LOADED,
+ .type = SERVICE_SIMPLE,
+ };
+ Unit *u = UNIT(&s);
+
+ for (i = 0; i < ELEMENTSOF(test_states); i++) {
+ s.state = test_states[i];
+ printf("\nWith collapsing for service state %s\n"
+ "=========================================\n", service_state_to_string(s.state));
+ for (a = 0; a < _JOB_TYPE_MAX_MERGING; a++) {
+ for (b = 0; b < _JOB_TYPE_MAX_MERGING; b++) {
+
+ ab = a;
+ merged_ab = (job_type_merge_and_collapse(&ab, b, u) >= 0);
+
+ if (!job_type_is_mergeable(a, b)) {
+ assert_se(!merged_ab);
+ printf("Not mergeable: %s + %s\n", job_type_to_string(a), job_type_to_string(b));
+ continue;
+ }
+
+ assert_se(merged_ab);
+ printf("%s + %s = %s\n", job_type_to_string(a), job_type_to_string(b), job_type_to_string(ab));
+
+ for (c = 0; c < _JOB_TYPE_MAX_MERGING; c++) {
+
+ /* Verify transitivity of mergeability of job types */
+ assert_se(!job_type_is_mergeable(a, b) ||
+ !job_type_is_mergeable(b, c) ||
+ job_type_is_mergeable(a, c));
+
+ /* Verify that merged entries can be merged with the same entries
+ * they can be merged with separately */
+ assert_se(!job_type_is_mergeable(a, c) || job_type_is_mergeable(ab, c));
+ assert_se(!job_type_is_mergeable(b, c) || job_type_is_mergeable(ab, c));
+
+ /* Verify that if a merged with b is not mergeable with c, then
+ * either a or b is not mergeable with c either. */
+ assert_se(job_type_is_mergeable(ab, c) || !job_type_is_mergeable(a, c) || !job_type_is_mergeable(b, c));
+
+ bc = b;
+ if (job_type_merge_and_collapse(&bc, c, u) >= 0) {
+
+ /* Verify associativity */
+
+ ab_c = ab;
+ assert_se(job_type_merge_and_collapse(&ab_c, c, u) == 0);
+
+ bc_a = bc;
+ assert_se(job_type_merge_and_collapse(&bc_a, a, u) == 0);
+
+ a_bc = a;
+ assert_se(job_type_merge_and_collapse(&a_bc, bc, u) == 0);
+
+ assert_se(ab_c == bc_a);
+ assert_se(ab_c == a_bc);
+
+ printf("%s + %s + %s = %s\n", job_type_to_string(a), job_type_to_string(b), job_type_to_string(c), job_type_to_string(ab_c));
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/src/test/test-journal-importer.c b/src/test/test-journal-importer.c
new file mode 100644
index 0000000..cddbfa7
--- /dev/null
+++ b/src/test/test-journal-importer.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "alloc-util.h"
+#include "log.h"
+#include "journal-importer.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void assert_iovec_entry(const struct iovec *iovec, const char* content) {
+ assert_se(strlen(content) == iovec->iov_len);
+ assert_se(memcmp(content, iovec->iov_base, iovec->iov_len) == 0);
+}
+
+#define COREDUMP_PROC_GROUP \
+ "COREDUMP_PROC_CGROUP=1:name=systemd:/\n" \
+ "0::/user.slice/user-1002.slice/user@1002.service/gnome-terminal-server.service\n"
+
+static void test_basic_parsing(void) {
+ _cleanup_(journal_importer_cleanup) JournalImporter imp = {};
+ _cleanup_free_ char *journal_data_path = NULL;
+ int r;
+
+ journal_data_path = path_join(get_testdata_dir(), "journal-data/journal-1.txt");
+ imp.fd = open(journal_data_path, O_RDONLY|O_CLOEXEC);
+ assert_se(imp.fd >= 0);
+
+ do
+ r = journal_importer_process_data(&imp);
+ while (r == 0 && !journal_importer_eof(&imp));
+ assert_se(r == 1);
+
+ /* We read one entry, so we should get EOF on next read, but not yet */
+ assert_se(!journal_importer_eof(&imp));
+
+ assert_se(imp.iovw.count == 6);
+ assert_iovec_entry(&imp.iovw.iovec[0], "_BOOT_ID=1531fd22ec84429e85ae888b12fadb91");
+ assert_iovec_entry(&imp.iovw.iovec[1], "_TRANSPORT=journal");
+ assert_iovec_entry(&imp.iovw.iovec[2], COREDUMP_PROC_GROUP);
+ assert_iovec_entry(&imp.iovw.iovec[3], "COREDUMP_RLIMIT=-1");
+ assert_iovec_entry(&imp.iovw.iovec[4], COREDUMP_PROC_GROUP);
+ assert_iovec_entry(&imp.iovw.iovec[5], "_SOURCE_REALTIME_TIMESTAMP=1478389147837945");
+
+ /* Let's check if we get EOF now */
+ r = journal_importer_process_data(&imp);
+ assert_se(r == 0);
+ assert_se(journal_importer_eof(&imp));
+}
+
+static void test_bad_input(void) {
+ _cleanup_(journal_importer_cleanup) JournalImporter imp = {};
+ _cleanup_free_ char *journal_data_path = NULL;
+ int r;
+
+ journal_data_path = path_join(get_testdata_dir(), "journal-data/journal-2.txt");
+ imp.fd = open(journal_data_path, O_RDONLY|O_CLOEXEC);
+ assert_se(imp.fd >= 0);
+
+ do
+ r = journal_importer_process_data(&imp);
+ while (!journal_importer_eof(&imp));
+ assert_se(r == 0); /* If we don't have enough input, 0 is returned */
+
+ assert_se(journal_importer_eof(&imp));
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_basic_parsing();
+ test_bad_input();
+
+ return 0;
+}
diff --git a/src/test/test-json.c b/src/test/test-json.c
new file mode 100644
index 0000000..fdf1b4f
--- /dev/null
+++ b/src/test/test-json.c
@@ -0,0 +1,461 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <math.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "json-internal.h"
+#include "json.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static void test_tokenizer(const char *data, ...) {
+ unsigned line = 0, column = 0;
+ void *state = NULL;
+ va_list ap;
+
+ va_start(ap, data);
+
+ for (;;) {
+ unsigned token_line, token_column;
+ _cleanup_free_ char *str = NULL;
+ JsonValue v = JSON_VALUE_NULL;
+ int t, tt;
+
+ t = json_tokenize(&data, &str, &v, &token_line, &token_column, &state, &line, &column);
+ tt = va_arg(ap, int);
+
+ assert_se(t == tt);
+
+ if (t == JSON_TOKEN_END || t < 0)
+ break;
+
+ else if (t == JSON_TOKEN_STRING) {
+ const char *nn;
+
+ nn = va_arg(ap, const char *);
+ assert_se(streq_ptr(nn, str));
+
+ } else if (t == JSON_TOKEN_REAL) {
+ long double d;
+
+ d = va_arg(ap, long double);
+
+ /* Valgrind doesn't support long double calculations and automatically downgrades to 80bit:
+ * http://www.valgrind.org/docs/manual/manual-core.html#manual-core.limits.
+ * Some architectures might not support long double either.
+ */
+
+ assert_se(fabsl(d - v.real) < 1e-10 ||
+ fabsl((d - v.real) / v.real) < 1e-10);
+
+ } else if (t == JSON_TOKEN_INTEGER) {
+ intmax_t i;
+
+ i = va_arg(ap, intmax_t);
+ assert_se(i == v.integer);
+
+ } else if (t == JSON_TOKEN_UNSIGNED) {
+ uintmax_t u;
+
+ u = va_arg(ap, uintmax_t);
+ assert_se(u == v.unsig);
+
+ } else if (t == JSON_TOKEN_BOOLEAN) {
+ bool b;
+
+ b = va_arg(ap, int);
+ assert_se(b == v.boolean);
+ }
+ }
+
+ va_end(ap);
+}
+
+typedef void (*Test)(JsonVariant *);
+
+static void test_variant(const char *data, Test test) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *w = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ r = json_parse(data, &v, NULL, NULL);
+ assert_se(r == 0);
+ assert_se(v);
+
+ r = json_variant_format(v, 0, &s);
+ assert_se(r >= 0);
+ assert_se(s);
+
+ log_info("formatted normally: %s\n", s);
+
+ r = json_parse(data, &w, NULL, NULL);
+ assert_se(r == 0);
+ assert_se(w);
+ assert_se(json_variant_has_type(v, json_variant_type(w)));
+ assert_se(json_variant_has_type(w, json_variant_type(v)));
+ assert_se(json_variant_equal(v, w));
+
+ s = mfree(s);
+ w = json_variant_unref(w);
+
+ r = json_variant_format(v, JSON_FORMAT_PRETTY, &s);
+ assert_se(r >= 0);
+ assert_se(s);
+
+ log_info("formatted prettily:\n%s", s);
+
+ r = json_parse(data, &w, NULL, NULL);
+ assert_se(r == 0);
+ assert_se(w);
+
+ assert_se(json_variant_has_type(v, json_variant_type(w)));
+ assert_se(json_variant_has_type(w, json_variant_type(v)));
+ assert_se(json_variant_equal(v, w));
+
+ s = mfree(s);
+ r = json_variant_format(v, JSON_FORMAT_COLOR, &s);
+ assert_se(r >= 0);
+ assert_se(s);
+ printf("Normal with color: %s\n", s);
+
+ s = mfree(s);
+ r = json_variant_format(v, JSON_FORMAT_COLOR|JSON_FORMAT_PRETTY, &s);
+ assert_se(r >= 0);
+ assert_se(s);
+ printf("Pretty with color:\n%s\n", s);
+
+ if (test)
+ test(v);
+}
+
+static void test_1(JsonVariant *v) {
+ JsonVariant *p, *q;
+ unsigned i;
+
+ /* 3 keys + 3 values */
+ assert_se(json_variant_elements(v) == 6);
+
+ /* has k */
+ p = json_variant_by_key(v, "k");
+ assert_se(p && json_variant_type(p) == JSON_VARIANT_STRING);
+
+ /* k equals v */
+ assert_se(streq(json_variant_string(p), "v"));
+
+ /* has foo */
+ p = json_variant_by_key(v, "foo");
+ assert_se(p && json_variant_type(p) == JSON_VARIANT_ARRAY && json_variant_elements(p) == 3);
+
+ /* check foo[0] = 1, foo[1] = 2, foo[2] = 3 */
+ for (i = 0; i < 3; ++i) {
+ q = json_variant_by_index(p, i);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_UNSIGNED && json_variant_unsigned(q) == (i+1));
+ assert_se(q && json_variant_has_type(q, JSON_VARIANT_INTEGER) && json_variant_integer(q) == (i+1));
+ }
+
+ /* has bar */
+ p = json_variant_by_key(v, "bar");
+ assert_se(p && json_variant_type(p) == JSON_VARIANT_OBJECT && json_variant_elements(p) == 2);
+
+ /* zap is null */
+ q = json_variant_by_key(p, "zap");
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_NULL);
+}
+
+static void test_2(JsonVariant *v) {
+ JsonVariant *p, *q;
+
+ /* 2 keys + 2 values */
+ assert_se(json_variant_elements(v) == 4);
+
+ /* has mutant */
+ p = json_variant_by_key(v, "mutant");
+ assert_se(p && json_variant_type(p) == JSON_VARIANT_ARRAY && json_variant_elements(p) == 4);
+
+ /* mutant[0] == 1 */
+ q = json_variant_by_index(p, 0);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_UNSIGNED && json_variant_unsigned(q) == 1);
+ assert_se(q && json_variant_has_type(q, JSON_VARIANT_INTEGER) && json_variant_integer(q) == 1);
+
+ /* mutant[1] == null */
+ q = json_variant_by_index(p, 1);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_NULL);
+
+ /* mutant[2] == "1" */
+ q = json_variant_by_index(p, 2);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_STRING && streq(json_variant_string(q), "1"));
+
+ /* mutant[3] == JSON_VARIANT_OBJECT */
+ q = json_variant_by_index(p, 3);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_OBJECT && json_variant_elements(q) == 2);
+
+ /* has 1 */
+ p = json_variant_by_key(q, "1");
+ assert_se(p && json_variant_type(p) == JSON_VARIANT_ARRAY && json_variant_elements(p) == 2);
+
+ /* "1"[0] == 1 */
+ q = json_variant_by_index(p, 0);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_UNSIGNED && json_variant_unsigned(q) == 1);
+ assert_se(q && json_variant_has_type(q, JSON_VARIANT_INTEGER) && json_variant_integer(q) == 1);
+
+ /* "1"[1] == "1" */
+ q = json_variant_by_index(p, 1);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_STRING && streq(json_variant_string(q), "1"));
+
+ /* has thisisaverylongproperty */
+ p = json_variant_by_key(v, "thisisaverylongproperty");
+ assert_se(p && json_variant_type(p) == JSON_VARIANT_REAL && fabsl(json_variant_real(p) - 1.27) < 0.001);
+}
+
+static void test_zeroes(JsonVariant *v) {
+ size_t i;
+
+ /* Make sure zero is how we expect it. */
+
+ assert_se(json_variant_elements(v) == 13);
+
+ for (i = 0; i < json_variant_elements(v); i++) {
+ JsonVariant *w;
+ size_t j;
+
+ assert_se(w = json_variant_by_index(v, i));
+
+ assert_se(json_variant_integer(w) == 0);
+ assert_se(json_variant_unsigned(w) == 0U);
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ assert_se(json_variant_real(w) == 0.0L);
+#pragma GCC diagnostic pop
+
+ assert_se(json_variant_is_integer(w));
+ assert_se(json_variant_is_unsigned(w));
+ assert_se(json_variant_is_real(w));
+ assert_se(json_variant_is_number(w));
+
+ assert_se(!json_variant_is_negative(w));
+
+ assert_se(IN_SET(json_variant_type(w), JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL));
+
+ for (j = 0; j < json_variant_elements(v); j++) {
+ JsonVariant *q;
+
+ assert_se(q = json_variant_by_index(v, j));
+
+ assert_se(json_variant_equal(w, q));
+ }
+ }
+}
+
+static void test_build(void) {
+ _cleanup_(json_variant_unrefp) JsonVariant *a = NULL, *b = NULL;
+ _cleanup_free_ char *s = NULL, *t = NULL;
+
+ assert_se(json_build(&a, JSON_BUILD_STRING("hallo")) >= 0);
+ assert_se(json_build(&b, JSON_BUILD_LITERAL(" \"hallo\" ")) >= 0);
+ assert_se(json_variant_equal(a, b));
+
+ b = json_variant_unref(b);
+
+ assert_se(json_build(&b, JSON_BUILD_VARIANT(a)) >= 0);
+ assert_se(json_variant_equal(a, b));
+
+ b = json_variant_unref(b);
+ assert_se(json_build(&b, JSON_BUILD_STRING("pief")) >= 0);
+ assert_se(!json_variant_equal(a, b));
+
+ a = json_variant_unref(a);
+ b = json_variant_unref(b);
+
+ assert_se(json_build(&a, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("one", JSON_BUILD_INTEGER(7)),
+ JSON_BUILD_PAIR("two", JSON_BUILD_REAL(2.0)),
+ JSON_BUILD_PAIR("three", JSON_BUILD_INTEGER(0)))) >= 0);
+
+ assert_se(json_build(&b, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("two", JSON_BUILD_INTEGER(2)),
+ JSON_BUILD_PAIR("three", JSON_BUILD_REAL(0)),
+ JSON_BUILD_PAIR("one", JSON_BUILD_REAL(7)))) >= 0);
+
+ assert_se(json_variant_equal(a, b));
+
+ a = json_variant_unref(a);
+ b = json_variant_unref(b);
+
+ const char* arr_1234[] = {"one", "two", "three", "four", NULL};
+ assert_se(json_build(&a, JSON_BUILD_ARRAY(JSON_BUILD_OBJECT(JSON_BUILD_PAIR("x", JSON_BUILD_BOOLEAN(true)),
+ JSON_BUILD_PAIR("y", JSON_BUILD_OBJECT(JSON_BUILD_PAIR("this", JSON_BUILD_NULL)))),
+ JSON_BUILD_VARIANT(NULL),
+ JSON_BUILD_LITERAL(NULL),
+ JSON_BUILD_STRING(NULL),
+ JSON_BUILD_NULL,
+ JSON_BUILD_INTEGER(77),
+ JSON_BUILD_ARRAY(JSON_BUILD_VARIANT(JSON_VARIANT_STRING_CONST("foobar")),
+ JSON_BUILD_VARIANT(JSON_VARIANT_STRING_CONST("zzz"))),
+ JSON_BUILD_STRV((char**) arr_1234))) >= 0);
+
+ assert_se(json_variant_format(a, 0, &s) >= 0);
+ log_info("GOT: %s\n", s);
+ assert_se(json_parse(s, &b, NULL, NULL) >= 0);
+ assert_se(json_variant_equal(a, b));
+
+ a = json_variant_unref(a);
+ b = json_variant_unref(b);
+
+ assert_se(json_build(&a, JSON_BUILD_REAL(M_PIl)) >= 0);
+
+ s = mfree(s);
+ assert_se(json_variant_format(a, 0, &s) >= 0);
+ log_info("GOT: %s\n", s);
+ assert_se(json_parse(s, &b, NULL, NULL) >= 0);
+ assert_se(json_variant_format(b, 0, &t) >= 0);
+ log_info("GOT: %s\n", t);
+
+ assert_se(streq(s, t));
+
+ a = json_variant_unref(a);
+ b = json_variant_unref(b);
+
+ assert_se(json_build(&a, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("x", JSON_BUILD_STRING("y")),
+ JSON_BUILD_PAIR("z", JSON_BUILD_STRING("a")),
+ JSON_BUILD_PAIR("b", JSON_BUILD_STRING("c"))
+ )) >= 0);
+
+ assert_se(json_build(&b, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("x", JSON_BUILD_STRING("y")),
+ JSON_BUILD_PAIR_CONDITION(false, "p", JSON_BUILD_STRING("q")),
+ JSON_BUILD_PAIR_CONDITION(true, "z", JSON_BUILD_STRING("a")),
+ JSON_BUILD_PAIR_CONDITION(false, "j", JSON_BUILD_ARRAY(JSON_BUILD_STRING("k"), JSON_BUILD_STRING("u"), JSON_BUILD_STRING("i"))),
+ JSON_BUILD_PAIR("b", JSON_BUILD_STRING("c"))
+ )) >= 0);
+
+ assert_se(json_variant_equal(a, b));
+}
+
+static void test_source(void) {
+ static const char data[] =
+ "\n"
+ "\n"
+ "{\n"
+ "\"foo\" : \"bar\", \n"
+ "\"qüüx\" : [ 1, 2, 3,\n"
+ "4,\n"
+ "5 ],\n"
+ "\"miep\" : { \"hallo\" : 1 },\n"
+ "\n"
+ "\"zzzzzz\" \n"
+ ":\n"
+ "[ true, \n"
+ "false, 7.5, {} ]\n"
+ "}\n";
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ printf("--- original begin ---\n"
+ "%s"
+ "--- original end ---\n", data);
+
+ assert_se(f = fmemopen((void*) data, strlen(data), "r"));
+
+ assert_se(json_parse_file(f, "waldo", &v, NULL, NULL) >= 0);
+
+ printf("--- non-pretty begin ---\n");
+ json_variant_dump(v, 0, stdout, NULL);
+ printf("\n--- non-pretty end ---\n");
+
+ printf("--- pretty begin ---\n");
+ json_variant_dump(v, JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR|JSON_FORMAT_SOURCE, stdout, NULL);
+ printf("--- pretty end ---\n");
+}
+
+static void test_depth(void) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ unsigned i;
+ int r;
+
+ v = JSON_VARIANT_STRING_CONST("start");
+
+ /* Let's verify that the maximum depth checks work */
+
+ for (i = 0;; i++) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+
+ assert_se(i <= UINT16_MAX);
+ if (i & 1)
+ r = json_variant_new_array(&w, &v, 1);
+ else
+ r = json_variant_new_object(&w, (JsonVariant*[]) { JSON_VARIANT_STRING_CONST("key"), v }, 2);
+ if (r == -ELNRNG) {
+ log_info("max depth at %u", i);
+ break;
+ }
+
+ assert_se(r >= 0);
+
+ json_variant_unref(v);
+ v = TAKE_PTR(w);
+ }
+
+ json_variant_dump(v, 0, stdout, NULL);
+ fputs("\n", stdout);
+}
+
+int main(int argc, char *argv[]) {
+
+ log_set_max_level(LOG_DEBUG);
+ log_parse_environment();
+ log_open();
+
+ test_tokenizer("x", -EINVAL);
+ test_tokenizer("", JSON_TOKEN_END);
+ test_tokenizer(" ", JSON_TOKEN_END);
+ test_tokenizer("0", JSON_TOKEN_UNSIGNED, (uintmax_t) 0, JSON_TOKEN_END);
+ test_tokenizer("-0", JSON_TOKEN_INTEGER, (intmax_t) 0, JSON_TOKEN_END);
+ test_tokenizer("1234", JSON_TOKEN_UNSIGNED, (uintmax_t) 1234, JSON_TOKEN_END);
+ test_tokenizer("-1234", JSON_TOKEN_INTEGER, (intmax_t) -1234, JSON_TOKEN_END);
+ test_tokenizer("18446744073709551615", JSON_TOKEN_UNSIGNED, (uintmax_t) UINT64_MAX, JSON_TOKEN_END);
+ test_tokenizer("-9223372036854775808", JSON_TOKEN_INTEGER, (intmax_t) INT64_MIN, JSON_TOKEN_END);
+ test_tokenizer("18446744073709551616", JSON_TOKEN_REAL, (long double) 18446744073709551616.0L, JSON_TOKEN_END);
+ test_tokenizer("-9223372036854775809", JSON_TOKEN_REAL, (long double) -9223372036854775809.0L, JSON_TOKEN_END);
+ test_tokenizer("-1234", JSON_TOKEN_INTEGER, (intmax_t) -1234, JSON_TOKEN_END);
+ test_tokenizer("3.141", JSON_TOKEN_REAL, (long double) 3.141, JSON_TOKEN_END);
+ test_tokenizer("0.0", JSON_TOKEN_REAL, (long double) 0.0, JSON_TOKEN_END);
+ test_tokenizer("7e3", JSON_TOKEN_REAL, (long double) 7e3, JSON_TOKEN_END);
+ test_tokenizer("-7e-3", JSON_TOKEN_REAL, (long double) -7e-3, JSON_TOKEN_END);
+ test_tokenizer("true", JSON_TOKEN_BOOLEAN, true, JSON_TOKEN_END);
+ test_tokenizer("false", JSON_TOKEN_BOOLEAN, false, JSON_TOKEN_END);
+ test_tokenizer("null", JSON_TOKEN_NULL, JSON_TOKEN_END);
+ test_tokenizer("{}", JSON_TOKEN_OBJECT_OPEN, JSON_TOKEN_OBJECT_CLOSE, JSON_TOKEN_END);
+ test_tokenizer("\t {\n} \n", JSON_TOKEN_OBJECT_OPEN, JSON_TOKEN_OBJECT_CLOSE, JSON_TOKEN_END);
+ test_tokenizer("[]", JSON_TOKEN_ARRAY_OPEN, JSON_TOKEN_ARRAY_CLOSE, JSON_TOKEN_END);
+ test_tokenizer("\t [] \n\n", JSON_TOKEN_ARRAY_OPEN, JSON_TOKEN_ARRAY_CLOSE, JSON_TOKEN_END);
+ test_tokenizer("\"\"", JSON_TOKEN_STRING, "", JSON_TOKEN_END);
+ test_tokenizer("\"foo\"", JSON_TOKEN_STRING, "foo", JSON_TOKEN_END);
+ test_tokenizer("\"foo\\nfoo\"", JSON_TOKEN_STRING, "foo\nfoo", JSON_TOKEN_END);
+ test_tokenizer("{\"foo\" : \"bar\"}", JSON_TOKEN_OBJECT_OPEN, JSON_TOKEN_STRING, "foo", JSON_TOKEN_COLON, JSON_TOKEN_STRING, "bar", JSON_TOKEN_OBJECT_CLOSE, JSON_TOKEN_END);
+ test_tokenizer("{\"foo\" : [true, false]}", JSON_TOKEN_OBJECT_OPEN, JSON_TOKEN_STRING, "foo", JSON_TOKEN_COLON, JSON_TOKEN_ARRAY_OPEN, JSON_TOKEN_BOOLEAN, true, JSON_TOKEN_COMMA, JSON_TOKEN_BOOLEAN, false, JSON_TOKEN_ARRAY_CLOSE, JSON_TOKEN_OBJECT_CLOSE, JSON_TOKEN_END);
+ test_tokenizer("\"\xef\xbf\xbd\"", JSON_TOKEN_STRING, "\xef\xbf\xbd", JSON_TOKEN_END);
+ test_tokenizer("\"\\ufffd\"", JSON_TOKEN_STRING, "\xef\xbf\xbd", JSON_TOKEN_END);
+ test_tokenizer("\"\\uf\"", -EINVAL);
+ test_tokenizer("\"\\ud800a\"", -EINVAL);
+ test_tokenizer("\"\\udc00\\udc00\"", -EINVAL);
+ test_tokenizer("\"\\ud801\\udc37\"", JSON_TOKEN_STRING, "\xf0\x90\x90\xb7", JSON_TOKEN_END);
+
+ test_tokenizer("[1, 2, -3]", JSON_TOKEN_ARRAY_OPEN, JSON_TOKEN_UNSIGNED, (uintmax_t) 1, JSON_TOKEN_COMMA, JSON_TOKEN_UNSIGNED, (uintmax_t) 2, JSON_TOKEN_COMMA, JSON_TOKEN_INTEGER, (intmax_t) -3, JSON_TOKEN_ARRAY_CLOSE, JSON_TOKEN_END);
+
+ test_variant("{\"k\": \"v\", \"foo\": [1, 2, 3], \"bar\": {\"zap\": null}}", test_1);
+ test_variant("{\"mutant\": [1, null, \"1\", {\"1\": [1, \"1\"]}], \"thisisaverylongproperty\": 1.27}", test_2);
+ test_variant("{\"foo\" : \"\\uDBFF\\uDFFF\\\"\\uD9FF\\uDFFFFFF\\\"\\uDBFF\\uDFFF\\\"\\uD9FF\\uDFFF\\uDBFF\\uDFFFF\\uDBFF\\uDFFF\\uDBFF\\uDFFF\\uDBFF\\uDFFF\\uDBFF\\uDFFF\\\"\\uD9FF\\uDFFFFF\\\"\\uDBFF\\uDFFF\\\"\\uD9FF\\uDFFF\\uDBFF\\uDFFF\"}", NULL);
+
+ test_variant("[ 0, -0, 0.0, -0.0, 0.000, -0.000, 0e0, -0e0, 0e+0, -0e-0, 0e-0, -0e000, 0e+000 ]", test_zeroes);
+
+ test_build();
+
+ test_source();
+
+ test_depth();
+
+ return 0;
+}
diff --git a/src/test/test-libudev.c b/src/test/test-libudev.c
new file mode 100644
index 0000000..15c0f88
--- /dev/null
+++ b/src/test/test-libudev.c
@@ -0,0 +1,559 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "build.h"
+#include "fd-util.h"
+#include "libudev-list-internal.h"
+#include "libudev-util.h"
+#include "log.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+static void print_device(struct udev_device *device) {
+ const char *str;
+ dev_t devnum;
+ int count;
+ struct udev_list_entry *list_entry;
+
+ log_info("*** device: %p ***", device);
+ str = udev_device_get_action(device);
+ if (str != NULL)
+ log_info("action: '%s'", str);
+
+ str = udev_device_get_syspath(device);
+ log_info("syspath: '%s'", str);
+
+ str = udev_device_get_sysname(device);
+ log_info("sysname: '%s'", str);
+
+ str = udev_device_get_sysnum(device);
+ if (str != NULL)
+ log_info("sysnum: '%s'", str);
+
+ str = udev_device_get_devpath(device);
+ log_info("devpath: '%s'", str);
+
+ str = udev_device_get_subsystem(device);
+ if (str != NULL)
+ log_info("subsystem: '%s'", str);
+
+ str = udev_device_get_devtype(device);
+ if (str != NULL)
+ log_info("devtype: '%s'", str);
+
+ str = udev_device_get_driver(device);
+ if (str != NULL)
+ log_info("driver: '%s'", str);
+
+ str = udev_device_get_devnode(device);
+ if (str != NULL)
+ log_info("devname: '%s'", str);
+
+ devnum = udev_device_get_devnum(device);
+ if (major(devnum) > 0)
+ log_info("devnum: %u:%u", major(devnum), minor(devnum));
+
+ count = 0;
+ udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(device)) {
+ log_info("link: '%s'", udev_list_entry_get_name(list_entry));
+ count++;
+ }
+ if (count > 0)
+ log_info("found %i links", count);
+
+ count = 0;
+ udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(device)) {
+ log_info("property: '%s=%s'",
+ udev_list_entry_get_name(list_entry),
+ udev_list_entry_get_value(list_entry));
+ count++;
+ }
+ if (count > 0)
+ log_info("found %i properties", count);
+
+ str = udev_device_get_property_value(device, "MAJOR");
+ if (str != NULL)
+ log_info("MAJOR: '%s'", str);
+
+ str = udev_device_get_sysattr_value(device, "dev");
+ if (str != NULL)
+ log_info("attr{dev}: '%s'", str);
+}
+
+static void test_device(struct udev *udev, const char *syspath) {
+ _cleanup_(udev_device_unrefp) struct udev_device *device;
+
+ log_info("looking at device: %s", syspath);
+ device = udev_device_new_from_syspath(udev, syspath);
+ if (device == NULL)
+ log_warning_errno(errno, "udev_device_new_from_syspath: %m");
+ else
+ print_device(device);
+}
+
+static void test_device_parents(struct udev *udev, const char *syspath) {
+ _cleanup_(udev_device_unrefp) struct udev_device *device;
+ struct udev_device *device_parent;
+
+ log_info("looking at device: %s", syspath);
+ device = udev_device_new_from_syspath(udev, syspath);
+ if (device == NULL)
+ return;
+
+ log_info("looking at parents");
+ device_parent = device;
+ do {
+ print_device(device_parent);
+ device_parent = udev_device_get_parent(device_parent);
+ } while (device_parent != NULL);
+
+ log_info("looking at parents again");
+ device_parent = device;
+ do {
+ print_device(device_parent);
+ device_parent = udev_device_get_parent(device_parent);
+ } while (device_parent != NULL);
+}
+
+static void test_device_devnum(struct udev *udev) {
+ dev_t devnum = makedev(1, 3);
+ _cleanup_(udev_device_unrefp) struct udev_device *device;
+
+ log_info("looking up device: %u:%u", major(devnum), minor(devnum));
+ device = udev_device_new_from_devnum(udev, 'c', devnum);
+ if (device == NULL)
+ log_warning_errno(errno, "udev_device_new_from_devnum: %m");
+ else
+ print_device(device);
+}
+
+static void test_device_subsys_name(struct udev *udev, const char *subsys, const char *dev) {
+ _cleanup_(udev_device_unrefp) struct udev_device *device;
+
+ log_info("looking up device: '%s:%s'", subsys, dev);
+ device = udev_device_new_from_subsystem_sysname(udev, subsys, dev);
+ if (device == NULL)
+ log_warning_errno(errno, "udev_device_new_from_subsystem_sysname: %m");
+ else
+ print_device(device);
+}
+
+static int test_enumerate_print_list(struct udev_enumerate *enumerate) {
+ struct udev_list_entry *list_entry;
+ int count = 0;
+
+ udev_list_entry_foreach(list_entry, udev_enumerate_get_list_entry(enumerate)) {
+ struct udev_device *device;
+
+ device = udev_device_new_from_syspath(udev_enumerate_get_udev(enumerate),
+ udev_list_entry_get_name(list_entry));
+ if (device != NULL) {
+ log_info("device: '%s' (%s)",
+ udev_device_get_syspath(device),
+ udev_device_get_subsystem(device));
+ udev_device_unref(device);
+ count++;
+ }
+ }
+ log_info("found %i devices", count);
+ return count;
+}
+
+static void test_monitor(struct udev *udev) {
+ _cleanup_(udev_monitor_unrefp) struct udev_monitor *udev_monitor;
+ _cleanup_close_ int fd_ep;
+ int fd_udev;
+ struct epoll_event ep_udev = {
+ .events = EPOLLIN,
+ }, ep_stdin = {
+ .events = EPOLLIN,
+ .data.fd = STDIN_FILENO,
+ };
+
+ fd_ep = epoll_create1(EPOLL_CLOEXEC);
+ assert_se(fd_ep >= 0);
+
+ udev_monitor = udev_monitor_new_from_netlink(udev, "udev");
+ assert_se(udev_monitor != NULL);
+
+ fd_udev = udev_monitor_get_fd(udev_monitor);
+ ep_udev.data.fd = fd_udev;
+
+ assert_se(udev_monitor_filter_add_match_subsystem_devtype(udev_monitor, "block", NULL) >= 0);
+ assert_se(udev_monitor_filter_add_match_subsystem_devtype(udev_monitor, "tty", NULL) >= 0);
+ assert_se(udev_monitor_filter_add_match_subsystem_devtype(udev_monitor, "usb", "usb_device") >= 0);
+
+ assert_se(udev_monitor_enable_receiving(udev_monitor) >= 0);
+
+ assert_se(epoll_ctl(fd_ep, EPOLL_CTL_ADD, fd_udev, &ep_udev) >= 0);
+ assert_se(epoll_ctl(fd_ep, EPOLL_CTL_ADD, STDIN_FILENO, &ep_stdin) >= 0);
+
+ for (;;) {
+ int fdcount;
+ struct epoll_event ev[4];
+ struct udev_device *device;
+ int i;
+
+ printf("waiting for events from udev, press ENTER to exit\n");
+ fdcount = epoll_wait(fd_ep, ev, ELEMENTSOF(ev), -1);
+ printf("epoll fd count: %i\n", fdcount);
+
+ for (i = 0; i < fdcount; i++) {
+ if (ev[i].data.fd == fd_udev && ev[i].events & EPOLLIN) {
+ device = udev_monitor_receive_device(udev_monitor);
+ if (device == NULL) {
+ printf("no device from socket\n");
+ continue;
+ }
+ print_device(device);
+ udev_device_unref(device);
+ } else if (ev[i].data.fd == STDIN_FILENO && ev[i].events & EPOLLIN) {
+ printf("exiting loop\n");
+ return;
+ }
+ }
+ }
+}
+
+static void test_queue(struct udev *udev) {
+ struct udev_queue *udev_queue;
+ bool empty;
+
+ udev_queue = udev_queue_new(udev);
+ assert_se(udev_queue);
+
+ empty = udev_queue_get_queue_is_empty(udev_queue);
+ log_info("queue is %s", empty ? "empty" : "not empty");
+ udev_queue_unref(udev_queue);
+}
+
+static int test_enumerate(struct udev *udev, const char *subsystem) {
+ struct udev_enumerate *udev_enumerate;
+ int r;
+
+ log_info("enumerate '%s'", subsystem == NULL ? "<all>" : subsystem);
+ udev_enumerate = udev_enumerate_new(udev);
+ if (udev_enumerate == NULL)
+ return -1;
+ udev_enumerate_add_match_subsystem(udev_enumerate, subsystem);
+ udev_enumerate_scan_devices(udev_enumerate);
+ test_enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+
+ log_info("enumerate 'net' + duplicated scan + null + zero");
+ udev_enumerate = udev_enumerate_new(udev);
+ if (udev_enumerate == NULL)
+ return -1;
+ udev_enumerate_add_match_subsystem(udev_enumerate, "net");
+ udev_enumerate_scan_devices(udev_enumerate);
+ udev_enumerate_scan_devices(udev_enumerate);
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/zero");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/null");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/zero");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/null");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/zero");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/null");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/null");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/zero");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/zero");
+ udev_enumerate_scan_devices(udev_enumerate);
+ test_enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+
+ log_info("enumerate 'block'");
+ udev_enumerate = udev_enumerate_new(udev);
+ if (udev_enumerate == NULL)
+ return -1;
+ udev_enumerate_add_match_subsystem(udev_enumerate,"block");
+ r = udev_enumerate_add_match_is_initialized(udev_enumerate);
+ if (r < 0) {
+ udev_enumerate_unref(udev_enumerate);
+ return r;
+ }
+ udev_enumerate_scan_devices(udev_enumerate);
+ test_enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+
+ log_info("enumerate 'not block'");
+ udev_enumerate = udev_enumerate_new(udev);
+ if (udev_enumerate == NULL)
+ return -1;
+ udev_enumerate_add_nomatch_subsystem(udev_enumerate, "block");
+ udev_enumerate_scan_devices(udev_enumerate);
+ test_enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+
+ log_info("enumerate 'pci, mem, vc'");
+ udev_enumerate = udev_enumerate_new(udev);
+ if (udev_enumerate == NULL)
+ return -1;
+ udev_enumerate_add_match_subsystem(udev_enumerate, "pci");
+ udev_enumerate_add_match_subsystem(udev_enumerate, "mem");
+ udev_enumerate_add_match_subsystem(udev_enumerate, "vc");
+ udev_enumerate_scan_devices(udev_enumerate);
+ test_enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+
+ log_info("enumerate 'subsystem'");
+ udev_enumerate = udev_enumerate_new(udev);
+ if (udev_enumerate == NULL)
+ return -1;
+ udev_enumerate_scan_subsystems(udev_enumerate);
+ test_enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+
+ log_info("enumerate 'property IF_FS_*=filesystem'");
+ udev_enumerate = udev_enumerate_new(udev);
+ if (udev_enumerate == NULL)
+ return -1;
+ udev_enumerate_add_match_property(udev_enumerate, "ID_FS*", "filesystem");
+ udev_enumerate_scan_devices(udev_enumerate);
+ test_enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+ return 0;
+}
+
+static void test_hwdb(struct udev *udev, const char *modalias) {
+ struct udev_hwdb *hwdb;
+ struct udev_list_entry *entry;
+
+ hwdb = udev_hwdb_new(udev);
+
+ udev_list_entry_foreach(entry, udev_hwdb_get_properties_list_entry(hwdb, modalias, 0))
+ log_info("'%s'='%s'", udev_list_entry_get_name(entry), udev_list_entry_get_value(entry));
+
+ hwdb = udev_hwdb_unref(hwdb);
+ assert_se(hwdb == NULL);
+}
+
+static void test_util_replace_whitespace_one_len(const char *str, size_t len, const char *expected) {
+ _cleanup_free_ char *result = NULL;
+ int r;
+
+ result = new(char, len + 1);
+ assert_se(result);
+ r = util_replace_whitespace(str, result, len);
+ assert_se((size_t) r == strlen(expected));
+ assert_se(streq(result, expected));
+}
+
+static void test_util_replace_whitespace_one(const char *str, const char *expected) {
+ test_util_replace_whitespace_one_len(str, strlen(str), expected);
+}
+
+static void test_util_replace_whitespace(void) {
+ test_util_replace_whitespace_one("hogehoge", "hogehoge");
+ test_util_replace_whitespace_one("hoge hoge", "hoge_hoge");
+ test_util_replace_whitespace_one(" hoge hoge ", "hoge_hoge");
+ test_util_replace_whitespace_one(" ", "");
+ test_util_replace_whitespace_one("hoge ", "hoge");
+
+ test_util_replace_whitespace_one_len("hoge hoge ", 9, "hoge_hoge");
+ test_util_replace_whitespace_one_len("hoge hoge ", 8, "hoge_hog");
+ test_util_replace_whitespace_one_len("hoge hoge ", 7, "hoge_ho");
+ test_util_replace_whitespace_one_len("hoge hoge ", 6, "hoge_h");
+ test_util_replace_whitespace_one_len("hoge hoge ", 5, "hoge");
+ test_util_replace_whitespace_one_len("hoge hoge ", 4, "hoge");
+ test_util_replace_whitespace_one_len("hoge hoge ", 3, "hog");
+ test_util_replace_whitespace_one_len("hoge hoge ", 2, "ho");
+ test_util_replace_whitespace_one_len("hoge hoge ", 1, "h");
+ test_util_replace_whitespace_one_len("hoge hoge ", 0, "");
+
+ test_util_replace_whitespace_one_len(" hoge hoge ", 16, "hoge_hoge");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 15, "hoge_hoge");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 14, "hoge_hog");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 13, "hoge_ho");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 12, "hoge_h");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 11, "hoge");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 10, "hoge");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 9, "hoge");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 8, "hoge");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 7, "hog");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 6, "ho");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 5, "h");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 4, "");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 3, "");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 2, "");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 1, "");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 0, "");
+}
+
+static void test_util_resolve_subsys_kernel_one(const char *str, bool read_value, int retval, const char *expected) {
+ char result[UTIL_PATH_SIZE];
+ int r;
+
+ r = util_resolve_subsys_kernel(str, result, sizeof(result), read_value);
+ assert_se(r == retval);
+ if (r >= 0)
+ assert_se(streq(result, expected));
+}
+
+static void test_util_resolve_subsys_kernel(void) {
+ test_util_resolve_subsys_kernel_one("hoge", false, -EINVAL, NULL);
+ test_util_resolve_subsys_kernel_one("[hoge", false, -EINVAL, NULL);
+ test_util_resolve_subsys_kernel_one("[hoge/foo", false, -EINVAL, NULL);
+ test_util_resolve_subsys_kernel_one("[hoge/]", false, -ENODEV, NULL);
+
+ test_util_resolve_subsys_kernel_one("[net/lo]", false, 0, "/sys/devices/virtual/net/lo");
+ test_util_resolve_subsys_kernel_one("[net/lo]/", false, 0, "/sys/devices/virtual/net/lo");
+ test_util_resolve_subsys_kernel_one("[net/lo]hoge", false, 0, "/sys/devices/virtual/net/lo/hoge");
+ test_util_resolve_subsys_kernel_one("[net/lo]/hoge", false, 0, "/sys/devices/virtual/net/lo/hoge");
+
+ test_util_resolve_subsys_kernel_one("[net/lo]", true, -EINVAL, NULL);
+ test_util_resolve_subsys_kernel_one("[net/lo]/", true, -EINVAL, NULL);
+ test_util_resolve_subsys_kernel_one("[net/lo]hoge", true, 0, "");
+ test_util_resolve_subsys_kernel_one("[net/lo]/hoge", true, 0, "");
+ test_util_resolve_subsys_kernel_one("[net/lo]address", true, 0, "00:00:00:00:00:00");
+ test_util_resolve_subsys_kernel_one("[net/lo]/address", true, 0, "00:00:00:00:00:00");
+}
+
+static void test_list(void) {
+ struct udev_list list = {};
+ struct udev_list_entry *e;
+
+ /* empty list */
+ udev_list_init(&list, false);
+ assert_se(!udev_list_get_entry(&list));
+
+ /* unique == false */
+ udev_list_init(&list, false);
+ assert_se(udev_list_entry_add(&list, "aaa", "hoge"));
+ assert_se(udev_list_entry_add(&list, "aaa", "hogehoge"));
+ assert_se(udev_list_entry_add(&list, "bbb", "foo"));
+ e = udev_list_get_entry(&list);
+ assert_se(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "aaa"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "hoge"));
+ e = udev_list_entry_get_next(e);
+ assert_se(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "aaa"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "hogehoge"));
+ e = udev_list_entry_get_next(e);
+ assert_se(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "bbb"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "foo"));
+ assert_se(!udev_list_entry_get_next(e));
+
+ assert_se(!udev_list_entry_get_by_name(e, "aaa"));
+ assert_se(!udev_list_entry_get_by_name(e, "bbb"));
+ assert_se(!udev_list_entry_get_by_name(e, "ccc"));
+ udev_list_cleanup(&list);
+
+ /* unique == true */
+ udev_list_init(&list, true);
+ assert_se(udev_list_entry_add(&list, "aaa", "hoge"));
+ assert_se(udev_list_entry_add(&list, "aaa", "hogehoge"));
+ assert_se(udev_list_entry_add(&list, "bbb", "foo"));
+ e = udev_list_get_entry(&list);
+ assert_se(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "aaa"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "hogehoge"));
+ e = udev_list_entry_get_next(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "bbb"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "foo"));
+ assert_se(!udev_list_entry_get_next(e));
+
+ e = udev_list_entry_get_by_name(e, "bbb");
+ assert_se(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "bbb"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "foo"));
+ e = udev_list_entry_get_by_name(e, "aaa");
+ assert_se(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "aaa"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "hogehoge"));
+ assert_se(!udev_list_entry_get_by_name(e, "ccc"));
+ udev_list_cleanup(&list);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(udev_unrefp) struct udev *udev = NULL;
+ bool arg_monitor = false;
+ static const struct option options[] = {
+ { "syspath", required_argument, NULL, 'p' },
+ { "subsystem", required_argument, NULL, 's' },
+ { "debug", no_argument, NULL, 'd' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { "monitor", no_argument, NULL, 'm' },
+ {}
+ };
+ const char *syspath = "/devices/virtual/mem/null";
+ const char *subsystem = NULL;
+ int c;
+
+ udev = udev_new();
+ log_info("context: %p", udev);
+ if (udev == NULL) {
+ log_info("no context");
+ return 1;
+ }
+
+ while ((c = getopt_long(argc, argv, "p:s:dhVm", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'p':
+ syspath = optarg;
+ break;
+
+ case 's':
+ subsystem = optarg;
+ break;
+
+ case 'd':
+ if (log_get_max_level() < LOG_INFO)
+ log_set_max_level(LOG_INFO);
+ break;
+
+ case 'h':
+ printf("--debug --syspath= --subsystem= --help\n");
+ return EXIT_SUCCESS;
+
+ case 'V':
+ printf("%s\n", GIT_VERSION);
+ return EXIT_SUCCESS;
+
+ case 'm':
+ arg_monitor = true;
+ break;
+
+ case '?':
+ return EXIT_FAILURE;
+
+ default:
+ assert_not_reached("Unhandled option code.");
+ }
+
+ /* add sys path if needed */
+ if (!startswith(syspath, "/sys"))
+ syspath = strjoina("/sys/", syspath);
+
+ test_device(udev, syspath);
+ test_device_devnum(udev);
+ test_device_subsys_name(udev, "block", "sda");
+ test_device_subsys_name(udev, "subsystem", "pci");
+ test_device_subsys_name(udev, "drivers", "scsi:sd");
+ test_device_subsys_name(udev, "module", "printk");
+ test_device_parents(udev, syspath);
+
+ test_enumerate(udev, subsystem);
+
+ test_queue(udev);
+
+ test_hwdb(udev, "usb:v0D50p0011*");
+
+ if (arg_monitor)
+ test_monitor(udev);
+
+ test_util_replace_whitespace();
+ test_util_resolve_subsys_kernel();
+
+ test_list();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-list.c b/src/test/test-list.c
new file mode 100644
index 0000000..24e0496
--- /dev/null
+++ b/src/test/test-list.c
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2013 Jan Janssen
+***/
+
+#include "list.h"
+#include "util.h"
+
+int main(int argc, const char *argv[]) {
+ size_t i;
+ typedef struct list_item {
+ LIST_FIELDS(struct list_item, item);
+ } list_item;
+ LIST_HEAD(list_item, head);
+ list_item items[4];
+ list_item *cursor;
+
+ LIST_HEAD_INIT(head);
+ assert_se(head == NULL);
+
+ for (i = 0; i < ELEMENTSOF(items); i++) {
+ LIST_INIT(item, &items[i]);
+ assert_se(LIST_JUST_US(item, &items[i]));
+ LIST_PREPEND(item, head, &items[i]);
+ }
+
+ i = 0;
+ LIST_FOREACH_OTHERS(item, cursor, &items[2]) {
+ i++;
+ assert_se(cursor != &items[2]);
+ }
+ assert_se(i == ELEMENTSOF(items)-1);
+
+ i = 0;
+ LIST_FOREACH_OTHERS(item, cursor, &items[0]) {
+ i++;
+ assert_se(cursor != &items[0]);
+ }
+ assert_se(i == ELEMENTSOF(items)-1);
+
+ i = 0;
+ LIST_FOREACH_OTHERS(item, cursor, &items[3]) {
+ i++;
+ assert_se(cursor != &items[3]);
+ }
+ assert_se(i == ELEMENTSOF(items)-1);
+
+ assert_se(!LIST_JUST_US(item, head));
+
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[1].item_next == &items[0]);
+ assert_se(items[2].item_next == &items[1]);
+ assert_se(items[3].item_next == &items[2]);
+
+ assert_se(items[0].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_FIND_HEAD(item, &items[0], cursor);
+ assert_se(cursor == &items[3]);
+
+ LIST_FIND_TAIL(item, &items[3], cursor);
+ assert_se(cursor == &items[0]);
+
+ LIST_REMOVE(item, head, &items[1]);
+ assert_se(LIST_JUST_US(item, &items[1]));
+
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[2].item_next == &items[0]);
+ assert_se(items[3].item_next == &items[2]);
+
+ assert_se(items[0].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_INSERT_AFTER(item, head, &items[3], &items[1]);
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[2].item_next == &items[0]);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+
+ assert_se(items[0].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_REMOVE(item, head, &items[1]);
+ assert_se(LIST_JUST_US(item, &items[1]));
+
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[2].item_next == &items[0]);
+ assert_se(items[3].item_next == &items[2]);
+
+ assert_se(items[0].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_INSERT_BEFORE(item, head, &items[2], &items[1]);
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[2].item_next == &items[0]);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+
+ assert_se(items[0].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_REMOVE(item, head, &items[0]);
+ assert_se(LIST_JUST_US(item, &items[0]));
+
+ assert_se(items[2].item_next == NULL);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_INSERT_BEFORE(item, head, &items[3], &items[0]);
+ assert_se(items[2].item_next == NULL);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+ assert_se(items[0].item_next == &items[3]);
+
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == &items[0]);
+ assert_se(items[0].item_prev == NULL);
+ assert_se(head == &items[0]);
+
+ LIST_REMOVE(item, head, &items[0]);
+ assert_se(LIST_JUST_US(item, &items[0]));
+
+ assert_se(items[2].item_next == NULL);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_INSERT_BEFORE(item, head, NULL, &items[0]);
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[2].item_next == &items[0]);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+
+ assert_se(items[0].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_REMOVE(item, head, &items[0]);
+ assert_se(LIST_JUST_US(item, &items[0]));
+
+ assert_se(items[2].item_next == NULL);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_REMOVE(item, head, &items[1]);
+ assert_se(LIST_JUST_US(item, &items[1]));
+
+ assert_se(items[2].item_next == NULL);
+ assert_se(items[3].item_next == &items[2]);
+
+ assert_se(items[2].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_REMOVE(item, head, &items[2]);
+ assert_se(LIST_JUST_US(item, &items[2]));
+ assert_se(LIST_JUST_US(item, head));
+
+ LIST_REMOVE(item, head, &items[3]);
+ assert_se(LIST_JUST_US(item, &items[3]));
+
+ assert_se(head == NULL);
+
+ for (i = 0; i < ELEMENTSOF(items); i++) {
+ assert_se(LIST_JUST_US(item, &items[i]));
+ LIST_APPEND(item, head, &items[i]);
+ }
+
+ assert_se(!LIST_JUST_US(item, head));
+
+ assert_se(items[0].item_next == &items[1]);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[2].item_next == &items[3]);
+ assert_se(items[3].item_next == NULL);
+
+ assert_se(items[0].item_prev == NULL);
+ assert_se(items[1].item_prev == &items[0]);
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[3].item_prev == &items[2]);
+
+ for (i = 0; i < ELEMENTSOF(items); i++)
+ LIST_REMOVE(item, head, &items[i]);
+
+ assert_se(head == NULL);
+
+ return 0;
+}
diff --git a/src/test/test-locale-util.c b/src/test/test-locale-util.c
new file mode 100644
index 0000000..c6f8c1f
--- /dev/null
+++ b/src/test/test-locale-util.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "locale-util.h"
+#include "macro.h"
+#include "strv.h"
+
+static void test_get_locales(void) {
+ _cleanup_strv_free_ char **locales = NULL;
+ char **p;
+ int r;
+
+ r = get_locales(&locales);
+ assert_se(r >= 0);
+ assert_se(locales);
+
+ STRV_FOREACH(p, locales) {
+ puts(*p);
+ assert_se(locale_is_valid(*p));
+ }
+}
+
+static void test_locale_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(locale_is_valid("en_EN.utf8"));
+ assert_se(locale_is_valid("fr_FR.utf8"));
+ assert_se(locale_is_valid("fr_FR@euro"));
+ assert_se(locale_is_valid("fi_FI"));
+ assert_se(locale_is_valid("POSIX"));
+ assert_se(locale_is_valid("C"));
+
+ assert_se(!locale_is_valid(""));
+ assert_se(!locale_is_valid("/usr/bin/foo"));
+ assert_se(!locale_is_valid("\x01gar\x02 bage\x03"));
+}
+
+static void test_keymaps(void) {
+ _cleanup_strv_free_ char **kmaps = NULL;
+ char **p;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(!keymap_is_valid(""));
+ assert_se(!keymap_is_valid("/usr/bin/foo"));
+ assert_se(!keymap_is_valid("\x01gar\x02 bage\x03"));
+
+ r = get_keymaps(&kmaps);
+ if (r == -ENOENT)
+ return; /* skip test if no keymaps are installed */
+
+ assert_se(r >= 0);
+ assert_se(kmaps);
+
+ STRV_FOREACH(p, kmaps) {
+ puts(*p);
+ assert_se(keymap_is_valid(*p));
+ }
+
+ assert_se(keymap_is_valid("uk"));
+ assert_se(keymap_is_valid("de-nodeadkeys"));
+ assert_se(keymap_is_valid("ANSI-dvorak"));
+ assert_se(keymap_is_valid("unicode"));
+}
+
+#define dump_glyph(x) log_info(STRINGIFY(x) ": %s", special_glyph(x))
+static void dump_special_glyphs(void) {
+ assert_cc(SPECIAL_GLYPH_DEPRESSED_SMILEY + 1 == _SPECIAL_GLYPH_MAX);
+
+ log_info("/* %s */", __func__);
+
+ log_info("is_locale_utf8: %s", yes_no(is_locale_utf8()));
+
+ dump_glyph(SPECIAL_GLYPH_TREE_VERTICAL);
+ dump_glyph(SPECIAL_GLYPH_TREE_BRANCH);
+ dump_glyph(SPECIAL_GLYPH_TREE_RIGHT);
+ dump_glyph(SPECIAL_GLYPH_TREE_SPACE);
+ dump_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET);
+ dump_glyph(SPECIAL_GLYPH_BLACK_CIRCLE);
+ dump_glyph(SPECIAL_GLYPH_BULLET);
+ dump_glyph(SPECIAL_GLYPH_ARROW);
+ dump_glyph(SPECIAL_GLYPH_MDASH);
+ dump_glyph(SPECIAL_GLYPH_ELLIPSIS);
+ dump_glyph(SPECIAL_GLYPH_MU);
+ dump_glyph(SPECIAL_GLYPH_CHECK_MARK);
+ dump_glyph(SPECIAL_GLYPH_CROSS_MARK);
+ dump_glyph(SPECIAL_GLYPH_ECSTATIC_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_HAPPY_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_NEUTRAL_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_UNHAPPY_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_DEPRESSED_SMILEY);
+}
+
+int main(int argc, char *argv[]) {
+ test_get_locales();
+ test_locale_is_valid();
+ test_keymaps();
+
+ dump_special_glyphs();
+
+ return 0;
+}
diff --git a/src/test/test-log.c b/src/test/test-log.c
new file mode 100644
index 0000000..1e010c0
--- /dev/null
+++ b/src/test/test-log.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stddef.h>
+#include <unistd.h>
+
+#include "format-util.h"
+#include "log.h"
+#include "process-util.h"
+#include "util.h"
+
+assert_cc(LOG_REALM_REMOVE_LEVEL(LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, LOG_FTP | LOG_DEBUG))
+ == LOG_REALM_SYSTEMD);
+assert_cc(LOG_REALM_REMOVE_LEVEL(LOG_REALM_PLUS_LEVEL(LOG_REALM_UDEV, LOG_LOCAL7 | LOG_DEBUG))
+ == LOG_REALM_UDEV);
+assert_cc((LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, LOG_LOCAL3 | LOG_DEBUG) & LOG_FACMASK)
+ == LOG_LOCAL3);
+assert_cc((LOG_REALM_PLUS_LEVEL(LOG_REALM_UDEV, LOG_USER | LOG_INFO) & LOG_PRIMASK)
+ == LOG_INFO);
+
+assert_cc(IS_SYNTHETIC_ERRNO(SYNTHETIC_ERRNO(EINVAL)));
+assert_cc(!IS_SYNTHETIC_ERRNO(EINVAL));
+assert_cc(IS_SYNTHETIC_ERRNO(SYNTHETIC_ERRNO(0)));
+assert_cc(!IS_SYNTHETIC_ERRNO(0));
+
+#define X10(x) x x x x x x x x x x
+#define X100(x) X10(X10(x))
+#define X1000(x) X100(X10(x))
+
+static void test_log_struct(void) {
+ log_struct(LOG_INFO,
+ "MESSAGE=Waldo PID="PID_FMT" (no errno)", getpid_cached(),
+ "SERVICE=piepapo");
+
+ log_struct_errno(LOG_INFO, EILSEQ,
+ "MESSAGE=Waldo PID="PID_FMT": %m (normal)", getpid_cached(),
+ "SERVICE=piepapo");
+
+ log_struct_errno(LOG_INFO, SYNTHETIC_ERRNO(EILSEQ),
+ "MESSAGE=Waldo PID="PID_FMT": %m (synthetic)", getpid_cached(),
+ "SERVICE=piepapo");
+
+ log_struct(LOG_INFO,
+ "MESSAGE=Foobar PID="PID_FMT, getpid_cached(),
+ "FORMAT_STR_TEST=1=%i A=%c 2=%hi 3=%li 4=%lli 1=%p foo=%s 2.5=%g 3.5=%g 4.5=%Lg",
+ (int) 1, 'A', (short) 2, (long int) 3, (long long int) 4, (void*) 1, "foo", (float) 2.5f, (double) 3.5, (long double) 4.5,
+ "SUFFIX=GOT IT");
+}
+
+static void test_long_lines(void) {
+ log_object_internal(LOG_NOTICE,
+ EUCLEAN,
+ X1000("abcd_") ".txt",
+ 1000000,
+ X1000("fff") "unc",
+ "OBJECT=",
+ X1000("obj_") "ect",
+ "EXTRA=",
+ X1000("ext_") "tra",
+ "asdfasdf %s asdfasdfa", "foobar");
+}
+
+int main(int argc, char* argv[]) {
+ int target;
+
+ for (target = 0; target < _LOG_TARGET_MAX; target++) {
+ log_set_target(target);
+ log_open();
+
+ test_log_struct();
+ test_long_lines();
+ }
+
+ assert_se(log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), "foo") == -EUCLEAN);
+
+ return 0;
+}
diff --git a/src/test/test-loopback.c b/src/test/test-loopback.c
new file mode 100644
index 0000000..89b760f
--- /dev/null
+++ b/src/test/test-loopback.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "log.h"
+#include "loopback-setup.h"
+#include "tests.h"
+
+int main(int argc, char* argv[]) {
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ r = loopback_setup();
+ if (r < 0)
+ log_error_errno(r, "loopback: %m");
+
+ return r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c
new file mode 100644
index 0000000..6986405
--- /dev/null
+++ b/src/test/test-mount-util.c
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "mount-util.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_mount_option_mangle(void) {
+ char *opts = NULL;
+ unsigned long f;
+
+ assert_se(mount_option_mangle(NULL, MS_RDONLY|MS_NOSUID, &f, &opts) == 0);
+ assert_se(f == (MS_RDONLY|MS_NOSUID));
+ assert_se(opts == NULL);
+
+ assert_se(mount_option_mangle("", MS_RDONLY|MS_NOSUID, &f, &opts) == 0);
+ assert_se(f == (MS_RDONLY|MS_NOSUID));
+ assert_se(opts == NULL);
+
+ assert_se(mount_option_mangle("ro,nosuid,nodev,noexec", 0, &f, &opts) == 0);
+ assert_se(f == (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC));
+ assert_se(opts == NULL);
+
+ assert_se(mount_option_mangle("ro,nosuid,nodev,noexec,mode=755", 0, &f, &opts) == 0);
+ assert_se(f == (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC));
+ assert_se(streq(opts, "mode=755"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("rw,nosuid,foo,hogehoge,nodev,mode=755", 0, &f, &opts) == 0);
+ assert_se(f == (MS_NOSUID|MS_NODEV));
+ assert_se(streq(opts, "foo,hogehoge,mode=755"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("rw,nosuid,nodev,noexec,relatime,net_cls,net_prio", MS_RDONLY, &f, &opts) == 0);
+ assert_se(f == (MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME));
+ assert_se(streq(opts, "net_cls,net_prio"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000", MS_RDONLY, &f, &opts) == 0);
+ assert_se(f == (MS_NOSUID|MS_NODEV|MS_RELATIME));
+ assert_se(streq(opts, "size=1630748k,mode=700,uid=1000,gid=1000"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("size=1630748k,rw,gid=1000,,,nodev,relatime,,mode=700,nosuid,uid=1000", MS_RDONLY, &f, &opts) == 0);
+ assert_se(f == (MS_NOSUID|MS_NODEV|MS_RELATIME));
+ assert_se(streq(opts, "size=1630748k,gid=1000,mode=700,uid=1000"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("rw,exec,size=8143984k,nr_inodes=2035996,mode=755", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, &f, &opts) == 0);
+ assert_se(f == (MS_NOSUID|MS_NODEV));
+ assert_se(streq(opts, "size=8143984k,nr_inodes=2035996,mode=755"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("rw,relatime,fmask=0022,,,dmask=0022", MS_RDONLY, &f, &opts) == 0);
+ assert_se(f == MS_RELATIME);
+ assert_se(streq(opts, "fmask=0022,dmask=0022"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("rw,relatime,fmask=0022,dmask=0022,\"hogehoge", MS_RDONLY, &f, &opts) < 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_mount_option_mangle();
+
+ return 0;
+}
diff --git a/src/test/test-mountpoint-util.c b/src/test/test-mountpoint-util.c
new file mode 100644
index 0000000..8e45c0b
--- /dev/null
+++ b/src/test/test-mountpoint-util.c
@@ -0,0 +1,267 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "log.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_mount_propagation_flags(const char *name, int ret, unsigned long expected) {
+ long unsigned flags;
+
+ log_info("/* %s(%s) */", __func__, name);
+
+ assert_se(mount_propagation_flags_from_string(name, &flags) == ret);
+
+ if (ret >= 0) {
+ const char *c;
+
+ assert_se(flags == expected);
+
+ c = mount_propagation_flags_to_string(flags);
+ if (isempty(name))
+ assert_se(isempty(c));
+ else
+ assert_se(streq(c, name));
+ }
+}
+
+static void test_mnt_id(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ Hashmap *h;
+ Iterator i;
+ char *p;
+ void *k;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(f = fopen("/proc/self/mountinfo", "re"));
+ assert_se(h = hashmap_new(&trivial_hash_ops));
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL, *path = NULL;
+ int mnt_id;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r == 0)
+ break;
+ assert_se(r > 0);
+
+ assert_se(sscanf(line, "%i %*s %*s %*s %ms", &mnt_id, &path) == 2);
+
+ log_debug("mountinfo: %s → %i", path, mnt_id);
+
+ assert_se(hashmap_put(h, INT_TO_PTR(mnt_id), path) >= 0);
+ path = NULL;
+ }
+
+ HASHMAP_FOREACH_KEY(p, k, h, i) {
+ int mnt_id = PTR_TO_INT(k), mnt_id2;
+
+ r = path_get_mnt_id(p, &mnt_id2);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to get the mnt id of %s: %m\n", p);
+ continue;
+ }
+
+ log_debug("mnt ids of %s are %i, %i\n", p, mnt_id, mnt_id2);
+
+ if (mnt_id == mnt_id2)
+ continue;
+
+ /* The ids don't match? If so, then there are two mounts on the same path, let's check if
+ * that's really the case */
+ char *t = hashmap_get(h, INT_TO_PTR(mnt_id2));
+ log_debug("the other path for mnt id %i is %s\n", mnt_id2, t);
+ assert_se(path_equal(p, t));
+ }
+
+ hashmap_free_free(h);
+}
+
+static void test_path_is_mount_point(void) {
+ int fd;
+ char tmp_dir[] = "/tmp/test-path-is-mount-point-XXXXXX";
+ _cleanup_free_ char *file1 = NULL, *file2 = NULL, *link1 = NULL, *link2 = NULL;
+ _cleanup_free_ char *dir1 = NULL, *dir1file = NULL, *dirlink1 = NULL, *dirlink1file = NULL;
+ _cleanup_free_ char *dir2 = NULL, *dir2file = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(path_is_mount_point("/", NULL, AT_SYMLINK_FOLLOW) > 0);
+ assert_se(path_is_mount_point("/", NULL, 0) > 0);
+ assert_se(path_is_mount_point("//", NULL, AT_SYMLINK_FOLLOW) > 0);
+ assert_se(path_is_mount_point("//", NULL, 0) > 0);
+
+ assert_se(path_is_mount_point("/proc", NULL, AT_SYMLINK_FOLLOW) > 0);
+ assert_se(path_is_mount_point("/proc", NULL, 0) > 0);
+ assert_se(path_is_mount_point("/proc/", NULL, AT_SYMLINK_FOLLOW) > 0);
+ assert_se(path_is_mount_point("/proc/", NULL, 0) > 0);
+
+ assert_se(path_is_mount_point("/proc/1", NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point("/proc/1", NULL, 0) == 0);
+ assert_se(path_is_mount_point("/proc/1/", NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point("/proc/1/", NULL, 0) == 0);
+
+ assert_se(path_is_mount_point("/sys", NULL, AT_SYMLINK_FOLLOW) > 0);
+ assert_se(path_is_mount_point("/sys", NULL, 0) > 0);
+ assert_se(path_is_mount_point("/sys/", NULL, AT_SYMLINK_FOLLOW) > 0);
+ assert_se(path_is_mount_point("/sys/", NULL, 0) > 0);
+
+ /* we'll create a hierarchy of different kinds of dir/file/link
+ * layouts:
+ *
+ * <tmp>/file1, <tmp>/file2
+ * <tmp>/link1 -> file1, <tmp>/link2 -> file2
+ * <tmp>/dir1/
+ * <tmp>/dir1/file
+ * <tmp>/dirlink1 -> dir1
+ * <tmp>/dirlink1file -> dirlink1/file
+ * <tmp>/dir2/
+ * <tmp>/dir2/file
+ */
+
+ /* file mountpoints */
+ assert_se(mkdtemp(tmp_dir) != NULL);
+ file1 = path_join(tmp_dir, "file1");
+ assert_se(file1);
+ file2 = path_join(tmp_dir, "file2");
+ assert_se(file2);
+ fd = open(file1, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0664);
+ assert_se(fd > 0);
+ close(fd);
+ fd = open(file2, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0664);
+ assert_se(fd > 0);
+ close(fd);
+ link1 = path_join(tmp_dir, "link1");
+ assert_se(link1);
+ assert_se(symlink("file1", link1) == 0);
+ link2 = path_join(tmp_dir, "link2");
+ assert_se(link1);
+ assert_se(symlink("file2", link2) == 0);
+
+ assert_se(path_is_mount_point(file1, NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point(file1, NULL, 0) == 0);
+ assert_se(path_is_mount_point(link1, NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point(link1, NULL, 0) == 0);
+
+ /* directory mountpoints */
+ dir1 = path_join(tmp_dir, "dir1");
+ assert_se(dir1);
+ assert_se(mkdir(dir1, 0755) == 0);
+ dirlink1 = path_join(tmp_dir, "dirlink1");
+ assert_se(dirlink1);
+ assert_se(symlink("dir1", dirlink1) == 0);
+ dirlink1file = path_join(tmp_dir, "dirlink1file");
+ assert_se(dirlink1file);
+ assert_se(symlink("dirlink1/file", dirlink1file) == 0);
+ dir2 = path_join(tmp_dir, "dir2");
+ assert_se(dir2);
+ assert_se(mkdir(dir2, 0755) == 0);
+
+ assert_se(path_is_mount_point(dir1, NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point(dir1, NULL, 0) == 0);
+ assert_se(path_is_mount_point(dirlink1, NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point(dirlink1, NULL, 0) == 0);
+
+ /* file in subdirectory mountpoints */
+ dir1file = path_join(dir1, "file");
+ assert_se(dir1file);
+ fd = open(dir1file, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0664);
+ assert_se(fd > 0);
+ close(fd);
+
+ assert_se(path_is_mount_point(dir1file, NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point(dir1file, NULL, 0) == 0);
+ assert_se(path_is_mount_point(dirlink1file, NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point(dirlink1file, NULL, 0) == 0);
+
+ /* these tests will only work as root */
+ if (mount(file1, file2, NULL, MS_BIND, NULL) >= 0) {
+ int rf, rt, rdf, rdt, rlf, rlt, rl1f, rl1t;
+ const char *file2d;
+
+ /* files */
+ /* capture results in vars, to avoid dangling mounts on failure */
+ log_info("%s: %s", __func__, file2);
+ rf = path_is_mount_point(file2, NULL, 0);
+ rt = path_is_mount_point(file2, NULL, AT_SYMLINK_FOLLOW);
+
+ file2d = strjoina(file2, "/");
+ log_info("%s: %s", __func__, file2d);
+ rdf = path_is_mount_point(file2d, NULL, 0);
+ rdt = path_is_mount_point(file2d, NULL, AT_SYMLINK_FOLLOW);
+
+ log_info("%s: %s", __func__, link2);
+ rlf = path_is_mount_point(link2, NULL, 0);
+ rlt = path_is_mount_point(link2, NULL, AT_SYMLINK_FOLLOW);
+
+ assert_se(umount(file2) == 0);
+
+ assert_se(rf == 1);
+ assert_se(rt == 1);
+ assert_se(rdf == -ENOTDIR);
+ assert_se(rdt == -ENOTDIR);
+ assert_se(rlf == 0);
+ assert_se(rlt == 1);
+
+ /* dirs */
+ dir2file = path_join(dir2, "file");
+ assert_se(dir2file);
+ fd = open(dir2file, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0664);
+ assert_se(fd > 0);
+ close(fd);
+
+ assert_se(mount(dir2, dir1, NULL, MS_BIND, NULL) >= 0);
+
+ log_info("%s: %s", __func__, dir1);
+ rf = path_is_mount_point(dir1, NULL, 0);
+ rt = path_is_mount_point(dir1, NULL, AT_SYMLINK_FOLLOW);
+ log_info("%s: %s", __func__, dirlink1);
+ rlf = path_is_mount_point(dirlink1, NULL, 0);
+ rlt = path_is_mount_point(dirlink1, NULL, AT_SYMLINK_FOLLOW);
+ log_info("%s: %s", __func__, dirlink1file);
+ /* its parent is a mount point, but not /file itself */
+ rl1f = path_is_mount_point(dirlink1file, NULL, 0);
+ rl1t = path_is_mount_point(dirlink1file, NULL, AT_SYMLINK_FOLLOW);
+
+ assert_se(umount(dir1) == 0);
+
+ assert_se(rf == 1);
+ assert_se(rt == 1);
+ assert_se(rlf == 0);
+ assert_se(rlt == 1);
+ assert_se(rl1f == 0);
+ assert_se(rl1t == 0);
+
+ } else
+ printf("Skipping bind mount file test: %m\n");
+
+ assert_se(rm_rf(tmp_dir, REMOVE_ROOT|REMOVE_PHYSICAL) == 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_mount_propagation_flags("shared", 0, MS_SHARED);
+ test_mount_propagation_flags("slave", 0, MS_SLAVE);
+ test_mount_propagation_flags("private", 0, MS_PRIVATE);
+ test_mount_propagation_flags(NULL, 0, 0);
+ test_mount_propagation_flags("", 0, 0);
+ test_mount_propagation_flags("xxxx", -EINVAL, 0);
+ test_mount_propagation_flags(" ", -EINVAL, 0);
+
+ test_mnt_id();
+ test_path_is_mount_point();
+
+ return 0;
+}
diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c
new file mode 100644
index 0000000..cc2efec
--- /dev/null
+++ b/src/test/test-namespace.c
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "namespace.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_tmpdir(const char *id, const char *A, const char *B) {
+ _cleanup_free_ char *a, *b;
+ struct stat x, y;
+ char *c, *d;
+
+ assert_se(setup_tmp_dirs(id, &a, &b) == 0);
+ assert_se(startswith(a, A));
+ assert_se(startswith(b, B));
+
+ assert_se(stat(a, &x) >= 0);
+ assert_se(stat(b, &y) >= 0);
+
+ assert_se(S_ISDIR(x.st_mode));
+ assert_se(S_ISDIR(y.st_mode));
+
+ assert_se((x.st_mode & 01777) == 0700);
+ assert_se((y.st_mode & 01777) == 0700);
+
+ c = strjoina(a, "/tmp");
+ d = strjoina(b, "/tmp");
+
+ assert_se(stat(c, &x) >= 0);
+ assert_se(stat(d, &y) >= 0);
+
+ assert_se(S_ISDIR(x.st_mode));
+ assert_se(S_ISDIR(y.st_mode));
+
+ assert_se((x.st_mode & 01777) == 01777);
+ assert_se((y.st_mode & 01777) == 01777);
+
+ assert_se(rmdir(c) >= 0);
+ assert_se(rmdir(d) >= 0);
+
+ assert_se(rmdir(a) >= 0);
+ assert_se(rmdir(b) >= 0);
+}
+
+static int test_netns(void) {
+ _cleanup_close_pair_ int s[2] = { -1, -1 };
+ pid_t pid1, pid2, pid3;
+ int r, n = 0;
+ siginfo_t si;
+
+ if (geteuid() > 0)
+ return log_tests_skipped("not root");
+
+ assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, s) >= 0);
+
+ pid1 = fork();
+ assert_se(pid1 >= 0);
+
+ if (pid1 == 0) {
+ r = setup_netns(s);
+ assert_se(r >= 0);
+ _exit(r);
+ }
+
+ pid2 = fork();
+ assert_se(pid2 >= 0);
+
+ if (pid2 == 0) {
+ r = setup_netns(s);
+ assert_se(r >= 0);
+ exit(r);
+ }
+
+ pid3 = fork();
+ assert_se(pid3 >= 0);
+
+ if (pid3 == 0) {
+ r = setup_netns(s);
+ assert_se(r >= 0);
+ exit(r);
+ }
+
+ r = wait_for_terminate(pid1, &si);
+ assert_se(r >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+ n += si.si_status;
+
+ r = wait_for_terminate(pid2, &si);
+ assert_se(r >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+ n += si.si_status;
+
+ r = wait_for_terminate(pid3, &si);
+ assert_se(r >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+ n += si.si_status;
+
+ assert_se(n == 1);
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char *argv[]) {
+ sd_id128_t bid;
+ char boot_id[SD_ID128_STRING_MAX];
+ _cleanup_free_ char *x = NULL, *y = NULL, *z = NULL, *zz = NULL;
+
+ test_setup_logging(LOG_INFO);
+
+ if (!have_namespaces()) {
+ log_tests_skipped("Don't have namespace support");
+ return EXIT_TEST_SKIP;
+ }
+
+ assert_se(sd_id128_get_boot(&bid) >= 0);
+ sd_id128_to_string(bid, boot_id);
+
+ x = strjoin("/tmp/systemd-private-", boot_id, "-abcd.service-");
+ y = strjoin("/var/tmp/systemd-private-", boot_id, "-abcd.service-");
+ assert_se(x && y);
+
+ test_tmpdir("abcd.service", x, y);
+
+ z = strjoin("/tmp/systemd-private-", boot_id, "-sys-devices-pci0000:00-0000:00:1a.0-usb3-3\\x2d1-3\\x2d1:1.0-bluetooth-hci0.device-");
+ zz = strjoin("/var/tmp/systemd-private-", boot_id, "-sys-devices-pci0000:00-0000:00:1a.0-usb3-3\\x2d1-3\\x2d1:1.0-bluetooth-hci0.device-");
+
+ assert_se(z && zz);
+
+ test_tmpdir("sys-devices-pci0000:00-0000:00:1a.0-usb3-3\\x2d1-3\\x2d1:1.0-bluetooth-hci0.device", z, zz);
+
+ return test_netns();
+}
diff --git a/src/test/test-netlink-manual.c b/src/test/test-netlink-manual.c
new file mode 100644
index 0000000..1ebe8d1
--- /dev/null
+++ b/src/test/test-netlink-manual.c
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <arpa/inet.h>
+#include <libkmod.h>
+#include <linux/ip.h>
+#include <net/if.h>
+#include <linux/if_tunnel.h>
+
+#include "sd-netlink.h"
+
+#include "macro.h"
+#include "module-util.h"
+#include "tests.h"
+#include "util.h"
+
+static int load_module(const char *mod_name) {
+ _cleanup_(kmod_unrefp) struct kmod_ctx *ctx = NULL;
+ _cleanup_(kmod_module_unref_listp) struct kmod_list *list = NULL;
+ struct kmod_list *l;
+ int r;
+
+ ctx = kmod_new(NULL, NULL);
+ if (!ctx)
+ return log_oom();
+
+ r = kmod_module_new_from_lookup(ctx, mod_name, &list);
+ if (r < 0)
+ return r;
+
+ kmod_list_foreach(l, list) {
+ _cleanup_(kmod_module_unrefp) struct kmod_module *mod = NULL;
+
+ mod = kmod_module_get_module(l);
+
+ r = kmod_module_probe_insert_module(mod, 0, NULL, NULL, NULL, NULL);
+ if (r > 0)
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+static int test_tunnel_configure(sd_netlink *rtnl) {
+ int r;
+ sd_netlink_message *m, *n;
+ struct in_addr local, remote;
+
+ /* skip test if module cannot be loaded */
+ r = load_module("ipip");
+ if (r < 0)
+ return log_tests_skipped_errno(r, "failed to load module 'ipip'");
+
+ r = load_module("sit");
+ if (r < 0)
+ return log_tests_skipped_errno(r, "failed to load module 'sit'");
+
+ if (getuid() != 0)
+ return log_tests_skipped("not root");
+
+ /* IPIP tunnel */
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0) >= 0);
+ assert_se(m);
+
+ assert_se(sd_netlink_message_append_string(m, IFLA_IFNAME, "ipip-tunnel") >= 0);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_MTU, 1234)>= 0);
+
+ assert_se(sd_netlink_message_open_container(m, IFLA_LINKINFO) >= 0);
+
+ assert_se(sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "ipip") >= 0);
+
+ inet_pton(AF_INET, "192.168.21.1", &local.s_addr);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_IPTUN_LOCAL, local.s_addr) >= 0);
+
+ inet_pton(AF_INET, "192.168.21.2", &remote.s_addr);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_IPTUN_REMOTE, remote.s_addr) >= 0);
+
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+
+ assert_se(sd_netlink_call(rtnl, m, -1, 0) == 1);
+
+ assert_se((m = sd_netlink_message_unref(m)) == NULL);
+
+ /* sit */
+ assert_se(sd_rtnl_message_new_link(rtnl, &n, RTM_NEWLINK, 0) >= 0);
+ assert_se(n);
+
+ assert_se(sd_netlink_message_append_string(n, IFLA_IFNAME, "sit-tunnel") >= 0);
+ assert_se(sd_netlink_message_append_u32(n, IFLA_MTU, 1234)>= 0);
+
+ assert_se(sd_netlink_message_open_container(n, IFLA_LINKINFO) >= 0);
+
+ assert_se(sd_netlink_message_open_container_union(n, IFLA_INFO_DATA, "sit") >= 0);
+
+ assert_se(sd_netlink_message_append_u8(n, IFLA_IPTUN_PROTO, IPPROTO_IPIP) >= 0);
+
+ inet_pton(AF_INET, "192.168.21.3", &local.s_addr);
+ assert_se(sd_netlink_message_append_u32(n, IFLA_IPTUN_LOCAL, local.s_addr) >= 0);
+
+ inet_pton(AF_INET, "192.168.21.4", &remote.s_addr);
+ assert_se(sd_netlink_message_append_u32(n, IFLA_IPTUN_REMOTE, remote.s_addr) >= 0);
+
+ assert_se(sd_netlink_message_close_container(n) >= 0);
+ assert_se(sd_netlink_message_close_container(n) >= 0);
+
+ assert_se(sd_netlink_call(rtnl, n, -1, 0) == 1);
+
+ assert_se((n = sd_netlink_message_unref(n)) == NULL);
+
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char *argv[]) {
+ sd_netlink *rtnl;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ assert_se(rtnl);
+
+ r = test_tunnel_configure(rtnl);
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+
+ return r;
+}
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
new file mode 100644
index 0000000..d3dbb54
--- /dev/null
+++ b/src/test/test-ns.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "namespace.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ const char * const writable[] = {
+ "/home",
+ "-/home/lennart/projects/foobar", /* this should be masked automatically */
+ NULL
+ };
+
+ const char * const readonly[] = {
+ /* "/", */
+ /* "/usr", */
+ "/boot",
+ "/lib",
+ "/usr/lib",
+ "-/lib64",
+ "-/usr/lib64",
+ NULL
+ };
+
+ const char *inaccessible[] = {
+ "/home/lennart/projects",
+ NULL
+ };
+
+ static const NamespaceInfo ns_info = {
+ .private_dev = true,
+ .protect_control_groups = true,
+ .protect_kernel_tunables = true,
+ .protect_kernel_modules = true,
+ };
+
+ char *root_directory;
+ char *projects_directory;
+ int r;
+ char tmp_dir[] = "/tmp/systemd-private-XXXXXX",
+ var_tmp_dir[] = "/var/tmp/systemd-private-XXXXXX";
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(mkdtemp(tmp_dir));
+ assert_se(mkdtemp(var_tmp_dir));
+
+ root_directory = getenv("TEST_NS_CHROOT");
+ projects_directory = getenv("TEST_NS_PROJECTS");
+
+ if (projects_directory)
+ inaccessible[0] = projects_directory;
+
+ log_info("Inaccessible directory: '%s'", inaccessible[0]);
+ if (root_directory)
+ log_info("Chroot: '%s'", root_directory);
+ else
+ log_info("Not chrooted");
+
+ r = setup_namespace(root_directory,
+ NULL,
+ &ns_info,
+ (char **) writable,
+ (char **) readonly,
+ (char **) inaccessible,
+ NULL,
+ &(BindMount) { .source = (char*) "/usr/bin", .destination = (char*) "/etc/systemd", .read_only = true }, 1,
+ &(TemporaryFileSystem) { .path = (char*) "/var", .options = (char*) "ro" }, 1,
+ tmp_dir,
+ var_tmp_dir,
+ PROTECT_HOME_NO,
+ PROTECT_SYSTEM_NO,
+ 0,
+ 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to setup namespace: %m");
+
+ log_info("Usage:\n"
+ " sudo TEST_NS_PROJECTS=/home/lennart/projects ./test-ns\n"
+ " sudo TEST_NS_CHROOT=/home/alban/debian-tree TEST_NS_PROJECTS=/home/alban/debian-tree/home/alban/Documents ./test-ns");
+
+ return 1;
+ }
+
+ execl("/bin/sh", "/bin/sh", NULL);
+ log_error_errno(errno, "execl(): %m");
+
+ return 1;
+}
diff --git a/src/test/test-nscd-flush.c b/src/test/test-nscd-flush.c
new file mode 100644
index 0000000..97c2195
--- /dev/null
+++ b/src/test/test-nscd-flush.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "main-func.h"
+#include "nscd-flush.h"
+#include "strv.h"
+#include "tests.h"
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ r = nscd_flush_cache(STRV_MAKE("group", "passwd", "hosts"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to flush NSCD cache");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/test/test-nss.c b/src/test/test-nss.c
new file mode 100644
index 0000000..20aa6cf
--- /dev/null
+++ b/src/test/test-nss.c
@@ -0,0 +1,516 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dlfcn.h>
+#include <net/if.h>
+#include <stdlib.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "errno-list.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "local-addresses.h"
+#include "log.h"
+#include "nss-util.h"
+#include "path-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+static const char* nss_status_to_string(enum nss_status status, char *buf, size_t buf_len) {
+ switch (status) {
+ case NSS_STATUS_TRYAGAIN:
+ return "NSS_STATUS_TRYAGAIN";
+ case NSS_STATUS_UNAVAIL:
+ return "NSS_STATUS_UNAVAIL";
+ case NSS_STATUS_NOTFOUND:
+ return "NSS_STATUS_NOTFOUND";
+ case NSS_STATUS_SUCCESS:
+ return "NSS_STATUS_SUCCESS";
+ case NSS_STATUS_RETURN:
+ return "NSS_STATUS_RETURN";
+ default:
+ snprintf(buf, buf_len, "%i", status);
+ return buf;
+ }
+};
+
+static const char* af_to_string(int family, char *buf, size_t buf_len) {
+ const char *name;
+
+ if (family == AF_UNSPEC)
+ return "*";
+
+ name = af_to_name(family);
+ if (name)
+ return name;
+
+ snprintf(buf, buf_len, "%i", family);
+ return buf;
+}
+
+static void* open_handle(const char* dir, const char* module, int flags) {
+ const char *path = NULL;
+ void *handle;
+
+ if (dir)
+ path = strjoina(dir, "/libnss_", module, ".so.2");
+ if (!path || access(path, F_OK) < 0)
+ path = strjoina("libnss_", module, ".so.2");
+
+ handle = dlopen(path, flags);
+ if (!handle)
+ log_error("Failed to load module %s: %s", module, dlerror());
+ return handle;
+}
+
+static int print_gaih_addrtuples(const struct gaih_addrtuple *tuples) {
+ const struct gaih_addrtuple *it;
+ int n = 0;
+
+ for (it = tuples; it; it = it->next) {
+ _cleanup_free_ char *a = NULL;
+ union in_addr_union u;
+ int r;
+ char family_name[DECIMAL_STR_MAX(int)];
+ char ifname[IF_NAMESIZE];
+
+ memcpy(&u, it->addr, 16);
+ r = in_addr_to_string(it->family, &u, &a);
+ assert_se(IN_SET(r, 0, -EAFNOSUPPORT));
+ if (r == -EAFNOSUPPORT)
+ assert_se(a = hexmem(it->addr, 16));
+
+ if (it->scopeid == 0)
+ goto numerical_index;
+
+ if (if_indextoname(it->scopeid, ifname) == NULL) {
+ log_warning_errno(errno, "if_indextoname(%d) failed: %m", it->scopeid);
+ numerical_index:
+ xsprintf(ifname, "%i", it->scopeid);
+ };
+
+ log_info(" \"%s\" %s %s %%%s",
+ it->name,
+ af_to_string(it->family, family_name, sizeof family_name),
+ a,
+ ifname);
+ n ++;
+ }
+ return n;
+}
+
+static void print_struct_hostent(struct hostent *host, const char *canon) {
+ char **s;
+
+ log_info(" \"%s\"", host->h_name);
+ STRV_FOREACH(s, host->h_aliases)
+ log_info(" alias \"%s\"", *s);
+ STRV_FOREACH(s, host->h_addr_list) {
+ union in_addr_union u;
+ _cleanup_free_ char *a = NULL;
+ char family_name[DECIMAL_STR_MAX(int)];
+ int r;
+
+ assert_se((unsigned) host->h_length == FAMILY_ADDRESS_SIZE(host->h_addrtype));
+ memcpy(&u, *s, host->h_length);
+ r = in_addr_to_string(host->h_addrtype, &u, &a);
+ assert_se(r == 0);
+ log_info(" %s %s",
+ af_to_string(host->h_addrtype, family_name, sizeof family_name),
+ a);
+ }
+ if (canon)
+ log_info(" canonical: \"%s\"", canon);
+}
+
+static void test_gethostbyname4_r(void *handle, const char *module, const char *name) {
+ const char *fname;
+ _nss_gethostbyname4_r_t f;
+ char buffer[2000];
+ struct gaih_addrtuple *pat = NULL;
+ int errno1 = 999, errno2 = 999; /* nss-dns doesn't set those */
+ int32_t ttl = INT32_MAX; /* nss-dns wants to return the lowest ttl,
+ and will access this variable through *ttlp,
+ so we need to set it to something.
+ I'm not sure if this is a bug in nss-dns
+ or not. */
+ enum nss_status status;
+ char pretty_status[DECIMAL_STR_MAX(enum nss_status)];
+ int n;
+
+ fname = strjoina("_nss_", module, "_gethostbyname4_r");
+ f = dlsym(handle, fname);
+ log_debug("dlsym(0x%p, %s) → 0x%p", handle, fname, f);
+ assert_se(f);
+
+ status = f(name, &pat, buffer, sizeof buffer, &errno1, &errno2, &ttl);
+ if (status == NSS_STATUS_SUCCESS) {
+ log_info("%s(\"%s\") → status=%s%-20spat=buffer+0x%tx errno=%d/%s h_errno=%d/%s ttl=%"PRIi32,
+ fname, name,
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ pat ? (char*) pat - buffer : 0,
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2),
+ ttl);
+ n = print_gaih_addrtuples(pat);
+ } else {
+ log_info("%s(\"%s\") → status=%s%-20spat=0x%p errno=%d/%s h_errno=%d/%s",
+ fname, name,
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ pat,
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2));
+ n = 0;
+ }
+
+ if (STR_IN_SET(module, "resolve", "mymachines") && status == NSS_STATUS_UNAVAIL)
+ return;
+
+ if (STR_IN_SET(module, "myhostname", "resolve") && streq(name, "localhost")) {
+ assert_se(status == NSS_STATUS_SUCCESS);
+ assert_se(n == 2);
+ }
+}
+
+static void test_gethostbyname3_r(void *handle, const char *module, const char *name, int af) {
+ const char *fname;
+ _nss_gethostbyname3_r_t f;
+ char buffer[2000];
+ int errno1 = 999, errno2 = 999; /* nss-dns doesn't set those */
+ int32_t ttl = INT32_MAX; /* nss-dns wants to return the lowest ttl,
+ and will access this variable through *ttlp,
+ so we need to set it to something.
+ I'm not sure if this is a bug in nss-dns
+ or not. */
+ enum nss_status status;
+ char pretty_status[DECIMAL_STR_MAX(enum nss_status)];
+ struct hostent host;
+ char *canon;
+ char family_name[DECIMAL_STR_MAX(int)];
+
+ fname = strjoina("_nss_", module, "_gethostbyname3_r");
+ f = dlsym(handle, fname);
+ log_debug("dlsym(0x%p, %s) → 0x%p", handle, fname, f);
+ assert_se(f);
+
+ status = f(name, af, &host, buffer, sizeof buffer, &errno1, &errno2, &ttl, &canon);
+ log_info("%s(\"%s\", %s) → status=%s%-20serrno=%d/%s h_errno=%d/%s ttl=%"PRIi32,
+ fname, name, af_to_string(af, family_name, sizeof family_name),
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2),
+ ttl);
+ if (status == NSS_STATUS_SUCCESS)
+ print_struct_hostent(&host, canon);
+}
+
+static void test_gethostbyname2_r(void *handle, const char *module, const char *name, int af) {
+ const char *fname;
+ _nss_gethostbyname2_r_t f;
+ char buffer[2000];
+ int errno1 = 999, errno2 = 999; /* nss-dns doesn't set those */
+ enum nss_status status;
+ char pretty_status[DECIMAL_STR_MAX(enum nss_status)];
+ struct hostent host;
+ char family_name[DECIMAL_STR_MAX(int)];
+
+ fname = strjoina("_nss_", module, "_gethostbyname2_r");
+ f = dlsym(handle, fname);
+ log_debug("dlsym(0x%p, %s) → 0x%p", handle, fname, f);
+ assert_se(f);
+
+ status = f(name, af, &host, buffer, sizeof buffer, &errno1, &errno2);
+ log_info("%s(\"%s\", %s) → status=%s%-20serrno=%d/%s h_errno=%d/%s",
+ fname, name, af_to_string(af, family_name, sizeof family_name),
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2));
+ if (status == NSS_STATUS_SUCCESS)
+ print_struct_hostent(&host, NULL);
+}
+
+static void test_gethostbyname_r(void *handle, const char *module, const char *name) {
+ const char *fname;
+ _nss_gethostbyname_r_t f;
+ char buffer[2000];
+ int errno1 = 999, errno2 = 999; /* nss-dns doesn't set those */
+ enum nss_status status;
+ char pretty_status[DECIMAL_STR_MAX(enum nss_status)];
+ struct hostent host;
+
+ fname = strjoina("_nss_", module, "_gethostbyname_r");
+ f = dlsym(handle, fname);
+ log_debug("dlsym(0x%p, %s) → 0x%p", handle, fname, f);
+ assert_se(f);
+
+ status = f(name, &host, buffer, sizeof buffer, &errno1, &errno2);
+ log_info("%s(\"%s\") → status=%s%-20serrno=%d/%s h_errno=%d/%s",
+ fname, name,
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2));
+ if (status == NSS_STATUS_SUCCESS)
+ print_struct_hostent(&host, NULL);
+}
+
+static void test_gethostbyaddr2_r(void *handle,
+ const char *module,
+ const void* addr, socklen_t len,
+ int af) {
+
+ const char *fname;
+ _nss_gethostbyaddr2_r_t f;
+ char buffer[2000];
+ int errno1 = 999, errno2 = 999; /* nss-dns doesn't set those */
+ enum nss_status status;
+ char pretty_status[DECIMAL_STR_MAX(enum nss_status)];
+ struct hostent host;
+ int32_t ttl = INT32_MAX;
+ _cleanup_free_ char *addr_pretty = NULL;
+
+ fname = strjoina("_nss_", module, "_gethostbyaddr2_r");
+ f = dlsym(handle, fname);
+
+ log_full_errno(f ? LOG_DEBUG : LOG_INFO, errno,
+ "dlsym(0x%p, %s) → 0x%p: %m", handle, fname, f);
+ if (!f)
+ return;
+
+ assert_se(in_addr_to_string(af, addr, &addr_pretty) >= 0);
+
+ status = f(addr, len, af, &host, buffer, sizeof buffer, &errno1, &errno2, &ttl);
+ log_info("%s(\"%s\") → status=%s%-20serrno=%d/%s h_errno=%d/%s ttl=%"PRIi32,
+ fname, addr_pretty,
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2),
+ ttl);
+ if (status == NSS_STATUS_SUCCESS)
+ print_struct_hostent(&host, NULL);
+}
+
+static void test_gethostbyaddr_r(void *handle,
+ const char *module,
+ const void* addr, socklen_t len,
+ int af) {
+
+ const char *fname;
+ _nss_gethostbyaddr_r_t f;
+ char buffer[2000];
+ int errno1 = 999, errno2 = 999; /* nss-dns doesn't set those */
+ enum nss_status status;
+ char pretty_status[DECIMAL_STR_MAX(enum nss_status)];
+ struct hostent host;
+ _cleanup_free_ char *addr_pretty = NULL;
+
+ fname = strjoina("_nss_", module, "_gethostbyaddr_r");
+ f = dlsym(handle, fname);
+
+ log_full_errno(f ? LOG_DEBUG : LOG_INFO, errno,
+ "dlsym(0x%p, %s) → 0x%p: %m", handle, fname, f);
+ if (!f)
+ return;
+
+ assert_se(in_addr_to_string(af, addr, &addr_pretty) >= 0);
+
+ status = f(addr, len, af, &host, buffer, sizeof buffer, &errno1, &errno2);
+ log_info("%s(\"%s\") → status=%s%-20serrno=%d/%s h_errno=%d/%s",
+ fname, addr_pretty,
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2));
+ if (status == NSS_STATUS_SUCCESS)
+ print_struct_hostent(&host, NULL);
+}
+
+static void test_byname(void *handle, const char *module, const char *name) {
+ test_gethostbyname4_r(handle, module, name);
+ puts("");
+
+ test_gethostbyname3_r(handle, module, name, AF_INET);
+ puts("");
+ test_gethostbyname3_r(handle, module, name, AF_INET6);
+ puts("");
+ test_gethostbyname3_r(handle, module, name, AF_UNSPEC);
+ puts("");
+ test_gethostbyname3_r(handle, module, name, AF_LOCAL);
+ puts("");
+
+ test_gethostbyname2_r(handle, module, name, AF_INET);
+ puts("");
+ test_gethostbyname2_r(handle, module, name, AF_INET6);
+ puts("");
+ test_gethostbyname2_r(handle, module, name, AF_UNSPEC);
+ puts("");
+ test_gethostbyname2_r(handle, module, name, AF_LOCAL);
+ puts("");
+
+ test_gethostbyname_r(handle, module, name);
+ puts("");
+}
+
+static void test_byaddr(void *handle,
+ const char *module,
+ const void* addr, socklen_t len,
+ int af) {
+ test_gethostbyaddr2_r(handle, module, addr, len, af);
+ puts("");
+
+ test_gethostbyaddr_r(handle, module, addr, len, af);
+ puts("");
+}
+
+static int make_addresses(struct local_address **addresses) {
+ int n;
+ size_t n_alloc;
+ _cleanup_free_ struct local_address *addrs = NULL;
+
+ n = local_addresses(NULL, 0, AF_UNSPEC, &addrs);
+ if (n < 0)
+ log_info_errno(n, "Failed to query local addresses: %m");
+
+ n_alloc = n; /* we _can_ do that */
+ if (!GREEDY_REALLOC(addrs, n_alloc, n + 3))
+ return log_oom();
+
+ addrs[n++] = (struct local_address) { .family = AF_INET,
+ .address.in = { htobe32(0x7F000001) } };
+ addrs[n++] = (struct local_address) { .family = AF_INET,
+ .address.in = { htobe32(0x7F000002) } };
+ addrs[n++] = (struct local_address) { .family = AF_INET6,
+ .address.in6 = in6addr_loopback };
+ return 0;
+}
+
+static int test_one_module(const char* dir,
+ const char *module,
+ char **names,
+ struct local_address *addresses,
+ int n_addresses) {
+ void *handle;
+ char **name;
+ int i;
+
+ log_info("======== %s ========", module);
+
+ handle = open_handle(dir, module, RTLD_LAZY|RTLD_NODELETE);
+ if (!handle)
+ return -EINVAL;
+
+ STRV_FOREACH(name, names)
+ test_byname(handle, module, *name);
+
+ for (i = 0; i < n_addresses; i++)
+ test_byaddr(handle, module,
+ &addresses[i].address,
+ FAMILY_ADDRESS_SIZE(addresses[i].family),
+ addresses[i].family);
+
+ log_info(" ");
+ dlclose(handle);
+ return 0;
+}
+
+static int parse_argv(int argc, char **argv,
+ char ***the_modules,
+ char ***the_names,
+ struct local_address **the_addresses, int *n_addresses) {
+
+ int r, n = 0;
+ _cleanup_strv_free_ char **modules = NULL, **names = NULL;
+ _cleanup_free_ struct local_address *addrs = NULL;
+ size_t n_allocated = 0;
+
+ if (argc > 1)
+ modules = strv_new(argv[1]);
+ else
+ modules = strv_new(
+#if ENABLE_NSS_MYHOSTNAME
+ "myhostname",
+#endif
+#if ENABLE_NSS_RESOLVE
+ "resolve",
+#endif
+#if ENABLE_NSS_MYMACHINES
+ "mymachines",
+#endif
+ "dns");
+ if (!modules)
+ return -ENOMEM;
+
+ if (argc > 2) {
+ char **name;
+ int family;
+ union in_addr_union address;
+
+ STRV_FOREACH(name, argv + 2) {
+ r = in_addr_from_string_auto(*name, &family, &address);
+ if (r < 0) {
+ /* assume this is a name */
+ r = strv_extend(&names, *name);
+ if (r < 0)
+ return r;
+ } else {
+ if (!GREEDY_REALLOC0(addrs, n_allocated, n + 1))
+ return -ENOMEM;
+
+ addrs[n++] = (struct local_address) { .family = family,
+ .address = address };
+ }
+ }
+ } else {
+ _cleanup_free_ char *hostname;
+
+ hostname = gethostname_malloc();
+ if (!hostname)
+ return -ENOMEM;
+
+ names = strv_new("localhost", "_gateway", "foo_no_such_host", hostname);
+ if (!names)
+ return -ENOMEM;
+
+ n = make_addresses(&addrs);
+ if (n < 0)
+ return n;
+ }
+
+ *the_modules = modules;
+ *the_names = names;
+ modules = names = NULL;
+ *the_addresses = addrs;
+ *n_addresses = n;
+ addrs = NULL;
+ return 0;
+}
+
+int main(int argc, char **argv) {
+ _cleanup_free_ char *dir = NULL;
+ _cleanup_strv_free_ char **modules = NULL, **names = NULL;
+ _cleanup_free_ struct local_address *addresses = NULL;
+ int n_addresses = 0;
+ char **module;
+ int r;
+
+ log_set_max_level(LOG_INFO);
+ log_parse_environment();
+
+ r = parse_argv(argc, argv, &modules, &names, &addresses, &n_addresses);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse arguments: %m");
+ return EXIT_FAILURE;
+ }
+
+ dir = dirname_malloc(argv[0]);
+ if (!dir)
+ return EXIT_FAILURE;
+
+ STRV_FOREACH(module, modules) {
+ r = test_one_module(dir, *module, names, addresses, n_addresses);
+ if (r < 0)
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-os-util.c b/src/test/test-os-util.c
new file mode 100644
index 0000000..c215a2e
--- /dev/null
+++ b/src/test/test-os-util.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "log.h"
+#include "os-util.h"
+#include "tests.h"
+
+static void test_path_is_os_tree(void) {
+ assert_se(path_is_os_tree("/") > 0);
+ assert_se(path_is_os_tree("/etc") == 0);
+ assert_se(path_is_os_tree("/idontexist") == -ENOENT);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_path_is_os_tree();
+
+ return 0;
+}
diff --git a/src/test/test-parse-util.c b/src/test/test-parse-util.c
new file mode 100644
index 0000000..d732f40
--- /dev/null
+++ b/src/test/test-parse-util.c
@@ -0,0 +1,853 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <locale.h>
+#include <math.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "errno-list.h"
+#include "log.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+static void test_parse_boolean(void) {
+ assert_se(parse_boolean("1") == 1);
+ assert_se(parse_boolean("y") == 1);
+ assert_se(parse_boolean("Y") == 1);
+ assert_se(parse_boolean("yes") == 1);
+ assert_se(parse_boolean("YES") == 1);
+ assert_se(parse_boolean("true") == 1);
+ assert_se(parse_boolean("TRUE") == 1);
+ assert_se(parse_boolean("on") == 1);
+ assert_se(parse_boolean("ON") == 1);
+
+ assert_se(parse_boolean("0") == 0);
+ assert_se(parse_boolean("n") == 0);
+ assert_se(parse_boolean("N") == 0);
+ assert_se(parse_boolean("no") == 0);
+ assert_se(parse_boolean("NO") == 0);
+ assert_se(parse_boolean("false") == 0);
+ assert_se(parse_boolean("FALSE") == 0);
+ assert_se(parse_boolean("off") == 0);
+ assert_se(parse_boolean("OFF") == 0);
+
+ assert_se(parse_boolean("garbage") < 0);
+ assert_se(parse_boolean("") < 0);
+ assert_se(parse_boolean("full") < 0);
+}
+
+static void test_parse_pid(void) {
+ int r;
+ pid_t pid;
+
+ r = parse_pid("100", &pid);
+ assert_se(r == 0);
+ assert_se(pid == 100);
+
+ r = parse_pid("0x7FFFFFFF", &pid);
+ assert_se(r == 0);
+ assert_se(pid == 2147483647);
+
+ pid = 65; /* pid is left unchanged on ERANGE. Set to known arbitrary value. */
+ r = parse_pid("0", &pid);
+ assert_se(r == -ERANGE);
+ assert_se(pid == 65);
+
+ pid = 65; /* pid is left unchanged on ERANGE. Set to known arbitrary value. */
+ r = parse_pid("-100", &pid);
+ assert_se(r == -ERANGE);
+ assert_se(pid == 65);
+
+ pid = 65; /* pid is left unchanged on ERANGE. Set to known arbitrary value. */
+ r = parse_pid("0xFFFFFFFFFFFFFFFFF", &pid);
+ assert_se(r == -ERANGE);
+ assert_se(pid == 65);
+
+ r = parse_pid("junk", &pid);
+ assert_se(r == -EINVAL);
+
+ r = parse_pid("", &pid);
+ assert_se(r == -EINVAL);
+}
+
+static void test_parse_mode(void) {
+ mode_t m;
+
+ assert_se(parse_mode("-1", &m) < 0);
+ assert_se(parse_mode("", &m) < 0);
+ assert_se(parse_mode("888", &m) < 0);
+ assert_se(parse_mode("77777", &m) < 0);
+
+ assert_se(parse_mode("544", &m) >= 0 && m == 0544);
+ assert_se(parse_mode("777", &m) >= 0 && m == 0777);
+ assert_se(parse_mode("7777", &m) >= 0 && m == 07777);
+ assert_se(parse_mode("0", &m) >= 0 && m == 0);
+}
+
+static void test_parse_size(void) {
+ uint64_t bytes;
+
+ assert_se(parse_size("", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("111", 1024, &bytes) == 0);
+ assert_se(bytes == 111);
+
+ assert_se(parse_size("111.4", 1024, &bytes) == 0);
+ assert_se(bytes == 111);
+
+ assert_se(parse_size(" 112 B", 1024, &bytes) == 0);
+ assert_se(bytes == 112);
+
+ assert_se(parse_size(" 112.6 B", 1024, &bytes) == 0);
+ assert_se(bytes == 112);
+
+ assert_se(parse_size("3.5 K", 1024, &bytes) == 0);
+ assert_se(bytes == 3*1024 + 512);
+
+ assert_se(parse_size("3. K", 1024, &bytes) == 0);
+ assert_se(bytes == 3*1024);
+
+ assert_se(parse_size("3.0 K", 1024, &bytes) == 0);
+ assert_se(bytes == 3*1024);
+
+ assert_se(parse_size("3. 0 K", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size(" 4 M 11.5K", 1024, &bytes) == 0);
+ assert_se(bytes == 4*1024*1024 + 11 * 1024 + 512);
+
+ assert_se(parse_size("3B3.5G", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("3.5G3B", 1024, &bytes) == 0);
+ assert_se(bytes == 3ULL*1024*1024*1024 + 512*1024*1024 + 3);
+
+ assert_se(parse_size("3.5G 4B", 1024, &bytes) == 0);
+ assert_se(bytes == 3ULL*1024*1024*1024 + 512*1024*1024 + 4);
+
+ assert_se(parse_size("3B3G4T", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("4T3G3B", 1024, &bytes) == 0);
+ assert_se(bytes == (4ULL*1024 + 3)*1024*1024*1024 + 3);
+
+ assert_se(parse_size(" 4 T 3 G 3 B", 1024, &bytes) == 0);
+ assert_se(bytes == (4ULL*1024 + 3)*1024*1024*1024 + 3);
+
+ assert_se(parse_size("12P", 1024, &bytes) == 0);
+ assert_se(bytes == 12ULL * 1024*1024*1024*1024*1024);
+
+ assert_se(parse_size("12P12P", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("3E 2P", 1024, &bytes) == 0);
+ assert_se(bytes == (3 * 1024 + 2ULL) * 1024*1024*1024*1024*1024);
+
+ assert_se(parse_size("12X", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("12.5X", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("12.5e3", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("1024E", 1024, &bytes) == -ERANGE);
+ assert_se(parse_size("-1", 1024, &bytes) == -ERANGE);
+ assert_se(parse_size("-1024E", 1024, &bytes) == -ERANGE);
+
+ assert_se(parse_size("-1024P", 1024, &bytes) == -ERANGE);
+
+ assert_se(parse_size("-10B 20K", 1024, &bytes) == -ERANGE);
+}
+
+static void test_parse_range(void) {
+ unsigned lower, upper;
+
+ /* Successful cases */
+ assert_se(parse_range("111", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 111);
+
+ assert_se(parse_range("111-123", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 123);
+
+ assert_se(parse_range("123-111", &lower, &upper) == 0);
+ assert_se(lower == 123);
+ assert_se(upper == 111);
+
+ assert_se(parse_range("123-123", &lower, &upper) == 0);
+ assert_se(lower == 123);
+ assert_se(upper == 123);
+
+ assert_se(parse_range("0", &lower, &upper) == 0);
+ assert_se(lower == 0);
+ assert_se(upper == 0);
+
+ assert_se(parse_range("0-15", &lower, &upper) == 0);
+ assert_se(lower == 0);
+ assert_se(upper == 15);
+
+ assert_se(parse_range("15-0", &lower, &upper) == 0);
+ assert_se(lower == 15);
+ assert_se(upper == 0);
+
+ assert_se(parse_range("128-65535", &lower, &upper) == 0);
+ assert_se(lower == 128);
+ assert_se(upper == 65535);
+
+ assert_se(parse_range("1024-4294967295", &lower, &upper) == 0);
+ assert_se(lower == 1024);
+ assert_se(upper == 4294967295);
+
+ /* Leading whitespace is acceptable */
+ assert_se(parse_range(" 111", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 111);
+
+ assert_se(parse_range(" 111-123", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 123);
+
+ assert_se(parse_range("111- 123", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 123);
+
+ assert_se(parse_range("\t111-\t123", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 123);
+
+ assert_se(parse_range(" \t 111- \t 123", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 123);
+
+ /* Error cases, make sure they fail as expected */
+ lower = upper = 9999;
+ assert_se(parse_range("111garbage", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("garbage111", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("garbage", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111-123garbage", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111garbage-123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ /* Empty string */
+ lower = upper = 9999;
+ assert_se(parse_range("", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ /* 111--123 will pass -123 to safe_atou which returns -ERANGE for negative */
+ assert_se(parse_range("111--123", &lower, &upper) == -ERANGE);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("-123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("-111-123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111-123-", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111.4-123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111-123.4", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111,4-123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111-123,4", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ /* Error on trailing dash */
+ assert_se(parse_range("111-", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111-123-", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111--", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111- ", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ /* Whitespace is not a separator */
+ assert_se(parse_range("111 123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111\t123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111 \t 123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ /* Trailing whitespace is invalid (from safe_atou) */
+ assert_se(parse_range("111 ", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111-123 ", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111 -123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111 -123 ", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111\t-123\t", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111 \t -123 \t ", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ /* Out of the "unsigned" range, this is 1<<64 */
+ assert_se(parse_range("0-18446744073709551616", &lower, &upper) == -ERANGE);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+}
+
+static void test_safe_atolli(void) {
+ int r;
+ long long l;
+
+ r = safe_atolli("12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 12345);
+
+ r = safe_atolli(" 12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 12345);
+
+ r = safe_atolli("-12345", &l);
+ assert_se(r == 0);
+ assert_se(l == -12345);
+
+ r = safe_atolli(" -12345", &l);
+ assert_se(r == 0);
+ assert_se(l == -12345);
+
+ r = safe_atolli("12345678901234567890", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atolli("-12345678901234567890", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atolli("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atolli("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atolli("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atolli("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atou16(void) {
+ int r;
+ uint16_t l;
+
+ r = safe_atou16("12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 12345);
+
+ r = safe_atou16(" 12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 12345);
+
+ r = safe_atou16("123456", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atou16("-1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atou16(" -1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atou16("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atou16("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atou16("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atou16("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atoi16(void) {
+ int r;
+ int16_t l;
+
+ r = safe_atoi16("-12345", &l);
+ assert_se(r == 0);
+ assert_se(l == -12345);
+
+ r = safe_atoi16(" -12345", &l);
+ assert_se(r == 0);
+ assert_se(l == -12345);
+
+ r = safe_atoi16("32767", &l);
+ assert_se(r == 0);
+ assert_se(l == 32767);
+
+ r = safe_atoi16(" 32767", &l);
+ assert_se(r == 0);
+ assert_se(l == 32767);
+
+ r = safe_atoi16("36536", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoi16("-32769", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoi16("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoi16("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoi16("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoi16("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atoux16(void) {
+ int r;
+ uint16_t l;
+
+ r = safe_atoux16("1234", &l);
+ assert_se(r == 0);
+ assert_se(l == 0x1234);
+
+ r = safe_atoux16("abcd", &l);
+ assert_se(r == 0);
+ assert_se(l == 0xabcd);
+
+ r = safe_atoux16(" 1234", &l);
+ assert_se(r == 0);
+ assert_se(l == 0x1234);
+
+ r = safe_atoux16("12345", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoux16("-1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoux16(" -1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoux16("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoux16("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoux16("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoux16("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atou64(void) {
+ int r;
+ uint64_t l;
+
+ r = safe_atou64("12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 12345);
+
+ r = safe_atou64(" 12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 12345);
+
+ r = safe_atou64("18446744073709551617", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atou64("-1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atou64(" -1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atou64("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atou64("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atou64("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atou64("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atoi64(void) {
+ int r;
+ int64_t l;
+
+ r = safe_atoi64("-12345", &l);
+ assert_se(r == 0);
+ assert_se(l == -12345);
+
+ r = safe_atoi64(" -12345", &l);
+ assert_se(r == 0);
+ assert_se(l == -12345);
+
+ r = safe_atoi64("32767", &l);
+ assert_se(r == 0);
+ assert_se(l == 32767);
+
+ r = safe_atoi64(" 32767", &l);
+ assert_se(r == 0);
+ assert_se(l == 32767);
+
+ r = safe_atoi64("9223372036854775813", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoi64("-9223372036854775813", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoi64("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoi64("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoi64("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoi64("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atod(void) {
+ int r;
+ double d;
+ char *e;
+
+ r = safe_atod("junk", &d);
+ assert_se(r == -EINVAL);
+
+ r = safe_atod("0.2244", &d);
+ assert_se(r == 0);
+ assert_se(fabs(d - 0.2244) < 0.000001);
+
+ r = safe_atod("0,5", &d);
+ assert_se(r == -EINVAL);
+
+ errno = 0;
+ strtod("0,5", &e);
+ assert_se(*e == ',');
+
+ r = safe_atod("", &d);
+ assert_se(r == -EINVAL);
+
+ /* Check if this really is locale independent */
+ if (setlocale(LC_NUMERIC, "de_DE.utf8")) {
+
+ r = safe_atod("0.2244", &d);
+ assert_se(r == 0);
+ assert_se(fabs(d - 0.2244) < 0.000001);
+
+ r = safe_atod("0,5", &d);
+ assert_se(r == -EINVAL);
+
+ errno = 0;
+ assert_se(fabs(strtod("0,5", &e) - 0.5) < 0.00001);
+
+ r = safe_atod("", &d);
+ assert_se(r == -EINVAL);
+ }
+
+ /* And check again, reset */
+ assert_se(setlocale(LC_NUMERIC, "C"));
+
+ r = safe_atod("0.2244", &d);
+ assert_se(r == 0);
+ assert_se(fabs(d - 0.2244) < 0.000001);
+
+ r = safe_atod("0,5", &d);
+ assert_se(r == -EINVAL);
+
+ errno = 0;
+ strtod("0,5", &e);
+ assert_se(*e == ',');
+
+ r = safe_atod("", &d);
+ assert_se(r == -EINVAL);
+}
+
+static void test_parse_percent(void) {
+ assert_se(parse_percent("") == -EINVAL);
+ assert_se(parse_percent("foo") == -EINVAL);
+ assert_se(parse_percent("0") == -EINVAL);
+ assert_se(parse_percent("50") == -EINVAL);
+ assert_se(parse_percent("100") == -EINVAL);
+ assert_se(parse_percent("-1") == -EINVAL);
+ assert_se(parse_percent("0%") == 0);
+ assert_se(parse_percent("55%") == 55);
+ assert_se(parse_percent("100%") == 100);
+ assert_se(parse_percent("-7%") == -ERANGE);
+ assert_se(parse_percent("107%") == -ERANGE);
+ assert_se(parse_percent("%") == -EINVAL);
+ assert_se(parse_percent("%%") == -EINVAL);
+ assert_se(parse_percent("%1") == -EINVAL);
+ assert_se(parse_percent("1%%") == -EINVAL);
+ assert_se(parse_percent("3.2%") == -EINVAL);
+}
+
+static void test_parse_percent_unbounded(void) {
+ assert_se(parse_percent_unbounded("101%") == 101);
+ assert_se(parse_percent_unbounded("400%") == 400);
+}
+
+static void test_parse_permille(void) {
+ assert_se(parse_permille("") == -EINVAL);
+ assert_se(parse_permille("foo") == -EINVAL);
+ assert_se(parse_permille("0") == -EINVAL);
+ assert_se(parse_permille("50") == -EINVAL);
+ assert_se(parse_permille("100") == -EINVAL);
+ assert_se(parse_permille("-1") == -EINVAL);
+
+ assert_se(parse_permille("0‰") == 0);
+ assert_se(parse_permille("555‰") == 555);
+ assert_se(parse_permille("1000‰") == 1000);
+ assert_se(parse_permille("-7‰") == -ERANGE);
+ assert_se(parse_permille("1007‰") == -ERANGE);
+ assert_se(parse_permille("‰") == -EINVAL);
+ assert_se(parse_permille("‰‰") == -EINVAL);
+ assert_se(parse_permille("‰1") == -EINVAL);
+ assert_se(parse_permille("1‰‰") == -EINVAL);
+ assert_se(parse_permille("3.2‰") == -EINVAL);
+
+ assert_se(parse_permille("0%") == 0);
+ assert_se(parse_permille("55%") == 550);
+ assert_se(parse_permille("55.5%") == 555);
+ assert_se(parse_permille("100%") == 1000);
+ assert_se(parse_permille("-7%") == -ERANGE);
+ assert_se(parse_permille("107%") == -ERANGE);
+ assert_se(parse_permille("%") == -EINVAL);
+ assert_se(parse_permille("%%") == -EINVAL);
+ assert_se(parse_permille("%1") == -EINVAL);
+ assert_se(parse_permille("1%%") == -EINVAL);
+ assert_se(parse_permille("3.21%") == -EINVAL);
+}
+
+static void test_parse_permille_unbounded(void) {
+ assert_se(parse_permille_unbounded("1001‰") == 1001);
+ assert_se(parse_permille_unbounded("4000‰") == 4000);
+ assert_se(parse_permille_unbounded("2147483647‰") == 2147483647);
+ assert_se(parse_permille_unbounded("2147483648‰") == -ERANGE);
+ assert_se(parse_permille_unbounded("4294967295‰") == -ERANGE);
+ assert_se(parse_permille_unbounded("4294967296‰") == -ERANGE);
+
+ assert_se(parse_permille_unbounded("101%") == 1010);
+ assert_se(parse_permille_unbounded("400%") == 4000);
+ assert_se(parse_permille_unbounded("214748364.7%") == 2147483647);
+ assert_se(parse_permille_unbounded("214748364.8%") == -ERANGE);
+ assert_se(parse_permille_unbounded("429496729.5%") == -ERANGE);
+ assert_se(parse_permille_unbounded("429496729.6%") == -ERANGE);
+}
+
+static void test_parse_nice(void) {
+ int n;
+
+ assert_se(parse_nice("0", &n) >= 0 && n == 0);
+ assert_se(parse_nice("+0", &n) >= 0 && n == 0);
+ assert_se(parse_nice("-1", &n) >= 0 && n == -1);
+ assert_se(parse_nice("-2", &n) >= 0 && n == -2);
+ assert_se(parse_nice("1", &n) >= 0 && n == 1);
+ assert_se(parse_nice("2", &n) >= 0 && n == 2);
+ assert_se(parse_nice("+1", &n) >= 0 && n == 1);
+ assert_se(parse_nice("+2", &n) >= 0 && n == 2);
+ assert_se(parse_nice("-20", &n) >= 0 && n == -20);
+ assert_se(parse_nice("19", &n) >= 0 && n == 19);
+ assert_se(parse_nice("+19", &n) >= 0 && n == 19);
+
+ assert_se(parse_nice("", &n) == -EINVAL);
+ assert_se(parse_nice("-", &n) == -EINVAL);
+ assert_se(parse_nice("+", &n) == -EINVAL);
+ assert_se(parse_nice("xx", &n) == -EINVAL);
+ assert_se(parse_nice("-50", &n) == -ERANGE);
+ assert_se(parse_nice("50", &n) == -ERANGE);
+ assert_se(parse_nice("+50", &n) == -ERANGE);
+ assert_se(parse_nice("-21", &n) == -ERANGE);
+ assert_se(parse_nice("20", &n) == -ERANGE);
+ assert_se(parse_nice("+20", &n) == -ERANGE);
+}
+
+static void test_parse_dev(void) {
+ dev_t dev;
+
+ assert_se(parse_dev("", &dev) == -EINVAL);
+ assert_se(parse_dev("junk", &dev) == -EINVAL);
+ assert_se(parse_dev("0", &dev) == -EINVAL);
+ assert_se(parse_dev("5", &dev) == -EINVAL);
+ assert_se(parse_dev("5:", &dev) == -EINVAL);
+ assert_se(parse_dev(":5", &dev) == -EINVAL);
+ assert_se(parse_dev("-1:-1", &dev) == -EINVAL);
+#if SIZEOF_DEV_T < 8
+ assert_se(parse_dev("4294967295:4294967295", &dev) == -EINVAL);
+#endif
+ assert_se(parse_dev("8:11", &dev) >= 0 && major(dev) == 8 && minor(dev) == 11);
+ assert_se(parse_dev("0:0", &dev) >= 0 && major(dev) == 0 && minor(dev) == 0);
+}
+
+static void test_parse_errno(void) {
+ assert_se(parse_errno("EILSEQ") == EILSEQ);
+ assert_se(parse_errno("EINVAL") == EINVAL);
+ assert_se(parse_errno("0") == 0);
+ assert_se(parse_errno("1") == 1);
+ assert_se(parse_errno("4095") == 4095);
+
+ assert_se(parse_errno("-1") == -ERANGE);
+ assert_se(parse_errno("-3") == -ERANGE);
+ assert_se(parse_errno("4096") == -ERANGE);
+
+ assert_se(parse_errno("") == -EINVAL);
+ assert_se(parse_errno("12.3") == -EINVAL);
+ assert_se(parse_errno("123junk") == -EINVAL);
+ assert_se(parse_errno("junk123") == -EINVAL);
+ assert_se(parse_errno("255EILSEQ") == -EINVAL);
+ assert_se(parse_errno("EINVAL12") == -EINVAL);
+ assert_se(parse_errno("-EINVAL") == -EINVAL);
+ assert_se(parse_errno("EINVALaaa") == -EINVAL);
+}
+
+static void test_parse_syscall_and_errno(void) {
+ _cleanup_free_ char *n = NULL;
+ int e;
+
+ assert_se(parse_syscall_and_errno("uname:EILSEQ", &n, &e) >= 0);
+ assert_se(streq(n, "uname"));
+ assert_se(e == errno_from_name("EILSEQ") && e >= 0);
+ n = mfree(n);
+
+ assert_se(parse_syscall_and_errno("uname:EINVAL", &n, &e) >= 0);
+ assert_se(streq(n, "uname"));
+ assert_se(e == errno_from_name("EINVAL") && e >= 0);
+ n = mfree(n);
+
+ assert_se(parse_syscall_and_errno("@sync:4095", &n, &e) >= 0);
+ assert_se(streq(n, "@sync"));
+ assert_se(e == 4095);
+ n = mfree(n);
+
+ /* If errno is omitted, then e is set to -1 */
+ assert_se(parse_syscall_and_errno("mount", &n, &e) >= 0);
+ assert_se(streq(n, "mount"));
+ assert_se(e == -1);
+ n = mfree(n);
+
+ /* parse_syscall_and_errno() does not check the syscall name is valid or not. */
+ assert_se(parse_syscall_and_errno("hoge:255", &n, &e) >= 0);
+ assert_se(streq(n, "hoge"));
+ assert_se(e == 255);
+ n = mfree(n);
+
+ /* The function checks the syscall name is empty or not. */
+ assert_se(parse_syscall_and_errno("", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno(":255", &n, &e) == -EINVAL);
+
+ /* errno must be a valid errno name or number between 0 and ERRNO_MAX == 4095 */
+ assert_se(parse_syscall_and_errno("hoge:4096", &n, &e) == -ERANGE);
+ assert_se(parse_syscall_and_errno("hoge:-3", &n, &e) == -ERANGE);
+ assert_se(parse_syscall_and_errno("hoge:12.3", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno("hoge:123junk", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno("hoge:junk123", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno("hoge:255:EILSEQ", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno("hoge:-EINVAL", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno("hoge:EINVALaaa", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno("hoge:", &n, &e) == -EINVAL);
+}
+
+static void test_parse_mtu(void) {
+ uint32_t mtu = 0;
+
+ assert_se(parse_mtu(AF_UNSPEC, "1500", &mtu) >= 0 && mtu == 1500);
+ assert_se(parse_mtu(AF_UNSPEC, "1400", &mtu) >= 0 && mtu == 1400);
+ assert_se(parse_mtu(AF_UNSPEC, "65535", &mtu) >= 0 && mtu == 65535);
+ assert_se(parse_mtu(AF_UNSPEC, "65536", &mtu) >= 0 && mtu == 65536);
+ assert_se(parse_mtu(AF_UNSPEC, "4294967295", &mtu) >= 0 && mtu == 4294967295);
+ assert_se(parse_mtu(AF_UNSPEC, "500", &mtu) >= 0 && mtu == 500);
+ assert_se(parse_mtu(AF_UNSPEC, "1280", &mtu) >= 0 && mtu == 1280);
+ assert_se(parse_mtu(AF_INET6, "1280", &mtu) >= 0 && mtu == 1280);
+ assert_se(parse_mtu(AF_INET6, "1279", &mtu) == -ERANGE);
+ assert_se(parse_mtu(AF_UNSPEC, "4294967296", &mtu) == -ERANGE);
+ assert_se(parse_mtu(AF_INET6, "4294967296", &mtu) == -ERANGE);
+ assert_se(parse_mtu(AF_INET6, "68", &mtu) == -ERANGE);
+ assert_se(parse_mtu(AF_UNSPEC, "68", &mtu) >= 0 && mtu == 68);
+ assert_se(parse_mtu(AF_UNSPEC, "67", &mtu) == -ERANGE);
+ assert_se(parse_mtu(AF_UNSPEC, "0", &mtu) == -ERANGE);
+ assert_se(parse_mtu(AF_UNSPEC, "", &mtu) == -EINVAL);
+}
+
+int main(int argc, char *argv[]) {
+ log_parse_environment();
+ log_open();
+
+ test_parse_boolean();
+ test_parse_pid();
+ test_parse_mode();
+ test_parse_size();
+ test_parse_range();
+ test_safe_atolli();
+ test_safe_atou16();
+ test_safe_atoi16();
+ test_safe_atoux16();
+ test_safe_atou64();
+ test_safe_atoi64();
+ test_safe_atod();
+ test_parse_percent();
+ test_parse_percent_unbounded();
+ test_parse_permille();
+ test_parse_permille_unbounded();
+ test_parse_nice();
+ test_parse_dev();
+ test_parse_errno();
+ test_parse_syscall_and_errno();
+ test_parse_mtu();
+
+ return 0;
+}
diff --git a/src/test/test-path-lookup.c b/src/test/test-path-lookup.c
new file mode 100644
index 0000000..f208559
--- /dev/null
+++ b/src/test/test-path-lookup.c
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#include "log.h"
+#include "path-lookup.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static void test_paths(UnitFileScope scope) {
+ char template[] = "/tmp/test-path-lookup.XXXXXXX";
+
+ _cleanup_(lookup_paths_free) LookupPaths lp_without_env = {};
+ _cleanup_(lookup_paths_free) LookupPaths lp_with_env = {};
+ char *systemd_unit_path;
+
+ assert_se(mkdtemp(template));
+
+ assert_se(unsetenv("SYSTEMD_UNIT_PATH") == 0);
+ assert_se(lookup_paths_init(&lp_without_env, scope, 0, NULL) >= 0);
+ assert_se(!strv_isempty(lp_without_env.search_path));
+ assert_se(lookup_paths_reduce(&lp_without_env) >= 0);
+
+ systemd_unit_path = strjoina(template, "/systemd-unit-path");
+ assert_se(setenv("SYSTEMD_UNIT_PATH", systemd_unit_path, 1) == 0);
+ assert_se(lookup_paths_init(&lp_with_env, scope, 0, NULL) == 0);
+ assert_se(strv_length(lp_with_env.search_path) == 1);
+ assert_se(streq(lp_with_env.search_path[0], systemd_unit_path));
+ assert_se(lookup_paths_reduce(&lp_with_env) >= 0);
+ assert_se(strv_isempty(lp_with_env.search_path));
+
+ assert_se(rm_rf(template, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
+static void test_user_and_global_paths(void) {
+ _cleanup_(lookup_paths_free) LookupPaths lp_global = {}, lp_user = {};
+ char **u, **g, **p;
+ unsigned k = 0;
+
+ assert_se(unsetenv("SYSTEMD_UNIT_PATH") == 0);
+ assert_se(unsetenv("XDG_DATA_DIRS") == 0);
+ assert_se(unsetenv("XDG_CONFIG_DIRS") == 0);
+
+ assert_se(lookup_paths_init(&lp_global, UNIT_FILE_GLOBAL, 0, NULL) == 0);
+ assert_se(lookup_paths_init(&lp_user, UNIT_FILE_USER, 0, NULL) == 0);
+ g = lp_global.search_path;
+ u = lp_user.search_path;
+
+ /* Go over all entries in global search path, and verify
+ * that they also exist in the user search path. Skip any
+ * entries in user search path which don't exist in the global
+ * one, but not vice versa. */
+ log_info("/* %s */", __func__);
+ STRV_FOREACH(p, g) {
+ while (u[k] && !streq(*p, u[k])) {
+ log_info("+ %s", u[k]);
+ k++;
+ }
+ log_info(" %s", *p);
+ assert(u[k]); /* If NULL, we didn't find a matching entry */
+ k++;
+ }
+ STRV_FOREACH(p, u + k)
+ log_info("+ %s", *p);
+}
+
+static void print_generator_binary_paths(UnitFileScope scope) {
+ _cleanup_strv_free_ char **paths;
+ char **dir;
+
+ log_info("Generators dirs (%s):", scope == UNIT_FILE_SYSTEM ? "system" : "user");
+
+ paths = generator_binary_paths(scope);
+ STRV_FOREACH(dir, paths)
+ log_info(" %s", *dir);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_paths(UNIT_FILE_SYSTEM);
+ test_paths(UNIT_FILE_USER);
+ test_paths(UNIT_FILE_GLOBAL);
+
+ test_user_and_global_paths();
+
+ print_generator_binary_paths(UNIT_FILE_SYSTEM);
+ print_generator_binary_paths(UNIT_FILE_USER);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-path-util.c b/src/test/test-path-util.c
new file mode 100644
index 0000000..c64ca7b
--- /dev/null
+++ b/src/test/test-path-util.c
@@ -0,0 +1,619 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "util.h"
+
+#define test_path_compare(a, b, result) { \
+ assert_se(path_compare(a, b) == result); \
+ assert_se(path_compare(b, a) == -result); \
+ assert_se(path_equal(a, b) == !result); \
+ assert_se(path_equal(b, a) == !result); \
+ }
+
+static void test_path_simplify(const char *in, const char *out, const char *out_dot) {
+ char *p;
+
+ p = strdupa(in);
+ assert_se(streq(path_simplify(p, false), out));
+
+ p = strdupa(in);
+ assert_se(streq(path_simplify(p, true), out_dot));
+}
+
+static void test_path(void) {
+ _cleanup_close_ int fd = -1;
+
+ test_path_compare("/goo", "/goo", 0);
+ test_path_compare("/goo", "/goo", 0);
+ test_path_compare("//goo", "/goo", 0);
+ test_path_compare("//goo/////", "/goo", 0);
+ test_path_compare("goo/////", "goo", 0);
+
+ test_path_compare("/goo/boo", "/goo//boo", 0);
+ test_path_compare("//goo/boo", "/goo/boo//", 0);
+
+ test_path_compare("/", "///", 0);
+
+ test_path_compare("/x", "x/", 1);
+ test_path_compare("x/", "/", -1);
+
+ test_path_compare("/x/./y", "x/y", 1);
+ test_path_compare("x/.y", "x/y", -1);
+
+ test_path_compare("foo", "/foo", -1);
+ test_path_compare("/foo", "/foo/bar", -1);
+ test_path_compare("/foo/aaa", "/foo/b", -1);
+ test_path_compare("/foo/aaa", "/foo/b/a", -1);
+ test_path_compare("/foo/a", "/foo/aaa", -1);
+ test_path_compare("/foo/a/b", "/foo/aaa", -1);
+
+ assert_se(path_is_absolute("/"));
+ assert_se(!path_is_absolute("./"));
+
+ assert_se(is_path("/dir"));
+ assert_se(is_path("a/b"));
+ assert_se(!is_path("."));
+
+ assert_se(streq(basename("./aa/bb/../file.da."), "file.da."));
+ assert_se(streq(basename("/aa///.file"), ".file"));
+ assert_se(streq(basename("/aa///file..."), "file..."));
+ assert_se(streq(basename("file.../"), ""));
+
+ fd = open("/", O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY);
+ assert_se(fd >= 0);
+ assert_se(fd_is_mount_point(fd, "/", 0) > 0);
+
+ test_path_simplify("aaa/bbb////ccc", "aaa/bbb/ccc", "aaa/bbb/ccc");
+ test_path_simplify("//aaa/.////ccc", "/aaa/./ccc", "/aaa/ccc");
+ test_path_simplify("///", "/", "/");
+ test_path_simplify("///.//", "/.", "/");
+ test_path_simplify("///.//.///", "/./.", "/");
+ test_path_simplify("////.././///../.", "/.././../.", "/../..");
+ test_path_simplify(".", ".", ".");
+ test_path_simplify("./", ".", ".");
+ test_path_simplify(".///.//./.", "./././.", ".");
+ test_path_simplify(".///.//././/", "./././.", ".");
+ test_path_simplify("//./aaa///.//./.bbb/..///c.//d.dd///..eeee/.",
+ "/./aaa/././.bbb/../c./d.dd/..eeee/.",
+ "/aaa/.bbb/../c./d.dd/..eeee");
+ test_path_simplify("//./aaa///.//./.bbb/..///c.//d.dd///..eeee/..",
+ "/./aaa/././.bbb/../c./d.dd/..eeee/..",
+ "/aaa/.bbb/../c./d.dd/..eeee/..");
+ test_path_simplify(".//./aaa///.//./.bbb/..///c.//d.dd///..eeee/..",
+ "././aaa/././.bbb/../c./d.dd/..eeee/..",
+ "aaa/.bbb/../c./d.dd/..eeee/..");
+ test_path_simplify("..//./aaa///.//./.bbb/..///c.//d.dd///..eeee/..",
+ ".././aaa/././.bbb/../c./d.dd/..eeee/..",
+ "../aaa/.bbb/../c./d.dd/..eeee/..");
+
+ assert_se(PATH_IN_SET("/bin", "/", "/bin", "/foo"));
+ assert_se(PATH_IN_SET("/bin", "/bin"));
+ assert_se(PATH_IN_SET("/bin", "/foo/bar", "/bin"));
+ assert_se(PATH_IN_SET("/", "/", "/", "/foo/bar"));
+ assert_se(!PATH_IN_SET("/", "/abc", "/def"));
+
+ assert_se(path_equal_ptr(NULL, NULL));
+ assert_se(path_equal_ptr("/a", "/a"));
+ assert_se(!path_equal_ptr("/a", "/b"));
+ assert_se(!path_equal_ptr("/a", NULL));
+ assert_se(!path_equal_ptr(NULL, "/a"));
+}
+
+static void test_path_equal_root(void) {
+ /* Nail down the details of how path_equal("/", ...) works. */
+
+ assert_se(path_equal("/", "/"));
+ assert_se(path_equal("/", "//"));
+
+ assert_se(!path_equal("/", "/./"));
+ assert_se(!path_equal("/", "/../"));
+
+ assert_se(!path_equal("/", "/.../"));
+
+ /* Make sure that files_same works as expected. */
+
+ assert_se(files_same("/", "/", 0) > 0);
+ assert_se(files_same("/", "/", AT_SYMLINK_NOFOLLOW) > 0);
+ assert_se(files_same("/", "//", 0) > 0);
+ assert_se(files_same("/", "//", AT_SYMLINK_NOFOLLOW) > 0);
+
+ assert_se(files_same("/", "/./", 0) > 0);
+ assert_se(files_same("/", "/./", AT_SYMLINK_NOFOLLOW) > 0);
+ assert_se(files_same("/", "/../", 0) > 0);
+ assert_se(files_same("/", "/../", AT_SYMLINK_NOFOLLOW) > 0);
+
+ assert_se(files_same("/", "/.../", 0) == -ENOENT);
+ assert_se(files_same("/", "/.../", AT_SYMLINK_NOFOLLOW) == -ENOENT);
+
+ /* The same for path_equal_or_files_same. */
+
+ assert_se(path_equal_or_files_same("/", "/", 0));
+ assert_se(path_equal_or_files_same("/", "/", AT_SYMLINK_NOFOLLOW));
+ assert_se(path_equal_or_files_same("/", "//", 0));
+ assert_se(path_equal_or_files_same("/", "//", AT_SYMLINK_NOFOLLOW));
+
+ assert_se(path_equal_or_files_same("/", "/./", 0));
+ assert_se(path_equal_or_files_same("/", "/./", AT_SYMLINK_NOFOLLOW));
+ assert_se(path_equal_or_files_same("/", "/../", 0));
+ assert_se(path_equal_or_files_same("/", "/../", AT_SYMLINK_NOFOLLOW));
+
+ assert_se(!path_equal_or_files_same("/", "/.../", 0));
+ assert_se(!path_equal_or_files_same("/", "/.../", AT_SYMLINK_NOFOLLOW));
+}
+
+static void test_find_binary(const char *self) {
+ char *p;
+
+ assert_se(find_binary("/bin/sh", &p) == 0);
+ puts(p);
+ assert_se(path_equal(p, "/bin/sh"));
+ free(p);
+
+ assert_se(find_binary(self, &p) == 0);
+ puts(p);
+ /* libtool might prefix the binary name with "lt-" */
+ assert_se(endswith(p, "/lt-test-path-util") || endswith(p, "/test-path-util"));
+ assert_se(path_is_absolute(p));
+ free(p);
+
+ assert_se(find_binary("sh", &p) == 0);
+ puts(p);
+ assert_se(endswith(p, "/sh"));
+ assert_se(path_is_absolute(p));
+ free(p);
+
+ assert_se(find_binary("xxxx-xxxx", &p) == -ENOENT);
+ assert_se(find_binary("/some/dir/xxxx-xxxx", &p) == -ENOENT);
+}
+
+static void test_prefixes(void) {
+ static const char* values[] = { "/a/b/c/d", "/a/b/c", "/a/b", "/a", "", NULL};
+ unsigned i;
+ char s[PATH_MAX];
+ bool b;
+
+ i = 0;
+ PATH_FOREACH_PREFIX_MORE(s, "/a/b/c/d") {
+ log_error("---%s---", s);
+ assert_se(streq(s, values[i++]));
+ }
+ assert_se(values[i] == NULL);
+
+ i = 1;
+ PATH_FOREACH_PREFIX(s, "/a/b/c/d") {
+ log_error("---%s---", s);
+ assert_se(streq(s, values[i++]));
+ }
+ assert_se(values[i] == NULL);
+
+ i = 0;
+ PATH_FOREACH_PREFIX_MORE(s, "////a////b////c///d///////")
+ assert_se(streq(s, values[i++]));
+ assert_se(values[i] == NULL);
+
+ i = 1;
+ PATH_FOREACH_PREFIX(s, "////a////b////c///d///////")
+ assert_se(streq(s, values[i++]));
+ assert_se(values[i] == NULL);
+
+ PATH_FOREACH_PREFIX(s, "////")
+ assert_not_reached("Wut?");
+
+ b = false;
+ PATH_FOREACH_PREFIX_MORE(s, "////") {
+ assert_se(!b);
+ assert_se(streq(s, ""));
+ b = true;
+ }
+ assert_se(b);
+
+ PATH_FOREACH_PREFIX(s, "")
+ assert_not_reached("wut?");
+
+ b = false;
+ PATH_FOREACH_PREFIX_MORE(s, "") {
+ assert_se(!b);
+ assert_se(streq(s, ""));
+ b = true;
+ }
+}
+
+static void test_path_join(void) {
+
+#define test_join(expected, ...) { \
+ _cleanup_free_ char *z = NULL; \
+ z = path_join(__VA_ARGS__); \
+ log_debug("got \"%s\", expected \"%s\"", z, expected); \
+ assert_se(streq(z, expected)); \
+ }
+
+ test_join("/root/a/b/c", "/root", "/a/b", "/c");
+ test_join("/root/a/b/c", "/root", "a/b", "c");
+ test_join("/root/a/b/c", "/root", "/a/b", "c");
+ test_join("/root/c", "/root", "/", "c");
+ test_join("/root/", "/root", "/", NULL);
+
+ test_join("/a/b/c", "", "/a/b", "/c");
+ test_join("a/b/c", "", "a/b", "c");
+ test_join("/a/b/c", "", "/a/b", "c");
+ test_join("/c", "", "/", "c");
+ test_join("/", "", "/", NULL);
+
+ test_join("/a/b/c", NULL, "/a/b", "/c");
+ test_join("a/b/c", NULL, "a/b", "c");
+ test_join("/a/b/c", NULL, "/a/b", "c");
+ test_join("/c", NULL, "/", "c");
+ test_join("/", NULL, "/", NULL);
+
+ test_join("", "", NULL);
+ test_join("", NULL, "");
+ test_join("", NULL, NULL);
+
+ test_join("foo/bar", "foo", "bar");
+ test_join("foo/bar", "", "foo", "bar");
+ test_join("foo/bar", NULL, "foo", NULL, "bar");
+ test_join("foo/bar", "", "foo", "", "bar", "");
+ test_join("foo/bar", "", "", "", "", "foo", "", "", "", "bar", "", "", "");
+
+ test_join("//foo///bar//", "", "/", "", "/foo/", "", "/", "", "/bar/", "", "/", "");
+ test_join("/foo/bar/", "/", "foo", "/", "bar", "/");
+ test_join("foo/bar/baz", "foo", "bar", "baz");
+ test_join("foo/bar/baz", "foo/", "bar", "/baz");
+ test_join("foo//bar//baz", "foo/", "/bar/", "/baz");
+ test_join("//foo////bar////baz//", "//foo/", "///bar/", "///baz//");
+}
+
+static void test_fsck_exists(void) {
+ /* Ensure we use a sane default for PATH. */
+ unsetenv("PATH");
+
+ /* fsck.minix is provided by util-linux and will probably exist. */
+ assert_se(fsck_exists("minix") == 1);
+
+ assert_se(fsck_exists("AbCdE") == 0);
+ assert_se(fsck_exists("/../bin/") == 0);
+}
+
+static void test_make_relative(void) {
+ char *result;
+
+ assert_se(path_make_relative("some/relative/path", "/some/path", &result) < 0);
+ assert_se(path_make_relative("/some/path", "some/relative/path", &result) < 0);
+ assert_se(path_make_relative("/some/dotdot/../path", "/some/path", &result) < 0);
+
+#define test(from_dir, to_path, expected) { \
+ _cleanup_free_ char *z = NULL; \
+ path_make_relative(from_dir, to_path, &z); \
+ assert_se(streq(z, expected)); \
+ }
+
+ test("/", "/", ".");
+ test("/", "/some/path", "some/path");
+ test("/some/path", "/some/path", ".");
+ test("/some/path", "/some/path/in/subdir", "in/subdir");
+ test("/some/path", "/", "../..");
+ test("/some/path", "/some/other/path", "../other/path");
+ test("/some/path/./dot", "/some/further/path", "../../further/path");
+ test("//extra.//.//./.slashes//./won't////fo.ol///anybody//", "/././/extra././/.slashes////ar.e/.just/././.fine///", "../../../ar.e/.just/.fine");
+}
+
+static void test_strv_resolve(void) {
+ char tmp_dir[] = "/tmp/test-path-util-XXXXXX";
+ _cleanup_strv_free_ char **search_dirs = NULL;
+ _cleanup_strv_free_ char **absolute_dirs = NULL;
+ char **d;
+
+ assert_se(mkdtemp(tmp_dir) != NULL);
+
+ search_dirs = strv_new("/dir1", "/dir2", "/dir3");
+ assert_se(search_dirs);
+ STRV_FOREACH(d, search_dirs) {
+ char *p = strappend(tmp_dir, *d);
+ assert_se(p);
+ assert_se(strv_push(&absolute_dirs, p) == 0);
+ }
+
+ assert_se(mkdir(absolute_dirs[0], 0700) == 0);
+ assert_se(mkdir(absolute_dirs[1], 0700) == 0);
+ assert_se(symlink("dir2", absolute_dirs[2]) == 0);
+
+ path_strv_resolve(search_dirs, tmp_dir);
+ assert_se(streq(search_dirs[0], "/dir1"));
+ assert_se(streq(search_dirs[1], "/dir2"));
+ assert_se(streq(search_dirs[2], "/dir2"));
+
+ assert_se(rm_rf(tmp_dir, REMOVE_ROOT|REMOVE_PHYSICAL) == 0);
+}
+
+static void test_path_startswith(void) {
+ const char *p;
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo");
+ assert_se(streq_ptr(p, "bar/barfoo/"));
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo/");
+ assert_se(streq_ptr(p, "bar/barfoo/"));
+
+ p = path_startswith("/foo/bar/barfoo/", "/");
+ assert_se(streq_ptr(p, "foo/bar/barfoo/"));
+
+ p = path_startswith("/foo/bar/barfoo/", "////");
+ assert_se(streq_ptr(p, "foo/bar/barfoo/"));
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo//bar/////barfoo///");
+ assert_se(streq_ptr(p, ""));
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo/bar/barfoo////");
+ assert_se(streq_ptr(p, ""));
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo/bar///barfoo/");
+ assert_se(streq_ptr(p, ""));
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo////bar/barfoo/");
+ assert_se(streq_ptr(p, ""));
+
+ p = path_startswith("/foo/bar/barfoo/", "////foo/bar/barfoo/");
+ assert_se(streq_ptr(p, ""));
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo/bar/barfoo");
+ assert_se(streq_ptr(p, ""));
+
+ assert_se(!path_startswith("/foo/bar/barfoo/", "/foo/bar/barfooa/"));
+ assert_se(!path_startswith("/foo/bar/barfoo/", "/foo/bar/barfooa"));
+ assert_se(!path_startswith("/foo/bar/barfoo/", ""));
+ assert_se(!path_startswith("/foo/bar/barfoo/", "/bar/foo"));
+ assert_se(!path_startswith("/foo/bar/barfoo/", "/f/b/b/"));
+}
+
+static void test_prefix_root_one(const char *r, const char *p, const char *expected) {
+ _cleanup_free_ char *s = NULL;
+ const char *t;
+
+ assert_se(s = prefix_root(r, p));
+ assert_se(streq_ptr(s, expected));
+
+ t = prefix_roota(r, p);
+ assert_se(t);
+ assert_se(streq_ptr(t, expected));
+}
+
+static void test_prefix_root(void) {
+ test_prefix_root_one("/", "/foo", "/foo");
+ test_prefix_root_one(NULL, "/foo", "/foo");
+ test_prefix_root_one("", "/foo", "/foo");
+ test_prefix_root_one("///", "/foo", "/foo");
+ test_prefix_root_one("/", "////foo", "/foo");
+ test_prefix_root_one(NULL, "////foo", "/foo");
+
+ test_prefix_root_one("/foo", "/bar", "/foo/bar");
+ test_prefix_root_one("/foo", "bar", "/foo/bar");
+ test_prefix_root_one("foo", "bar", "foo/bar");
+ test_prefix_root_one("/foo/", "/bar", "/foo/bar");
+ test_prefix_root_one("/foo/", "//bar", "/foo/bar");
+ test_prefix_root_one("/foo///", "//bar", "/foo/bar");
+}
+
+static void test_file_in_same_dir(void) {
+ char *t;
+
+ t = file_in_same_dir("/", "a");
+ assert_se(streq(t, "/a"));
+ free(t);
+
+ t = file_in_same_dir("/", "/a");
+ assert_se(streq(t, "/a"));
+ free(t);
+
+ t = file_in_same_dir("", "a");
+ assert_se(streq(t, "a"));
+ free(t);
+
+ t = file_in_same_dir("a/", "a");
+ assert_se(streq(t, "a/a"));
+ free(t);
+
+ t = file_in_same_dir("bar/foo", "bar");
+ assert_se(streq(t, "bar/bar"));
+ free(t);
+}
+
+static void test_last_path_component(void) {
+ assert_se(last_path_component(NULL) == NULL);
+ assert_se(streq(last_path_component("a/b/c"), "c"));
+ assert_se(streq(last_path_component("a/b/c/"), "c/"));
+ assert_se(streq(last_path_component("/"), "/"));
+ assert_se(streq(last_path_component("//"), "/"));
+ assert_se(streq(last_path_component("///"), "/"));
+ assert_se(streq(last_path_component("."), "."));
+ assert_se(streq(last_path_component("./."), "."));
+ assert_se(streq(last_path_component("././"), "./"));
+ assert_se(streq(last_path_component("././/"), ".//"));
+ assert_se(streq(last_path_component("/foo/a"), "a"));
+ assert_se(streq(last_path_component("/foo/a/"), "a/"));
+ assert_se(streq(last_path_component(""), ""));
+ assert_se(streq(last_path_component("a"), "a"));
+ assert_se(streq(last_path_component("a/"), "a/"));
+ assert_se(streq(last_path_component("/a"), "a"));
+ assert_se(streq(last_path_component("/a/"), "a/"));
+}
+
+static void test_path_extract_filename_one(const char *input, const char *output, int ret) {
+ _cleanup_free_ char *k = NULL;
+ int r;
+
+ r = path_extract_filename(input, &k);
+ log_info("%s → %s/%s [expected: %s/%s]", strnull(input), strnull(k), strerror(-r), strnull(output), strerror(-ret));
+ assert_se(streq_ptr(k, output));
+ assert_se(r == ret);
+}
+
+static void test_path_extract_filename(void) {
+ test_path_extract_filename_one(NULL, NULL, -EINVAL);
+ test_path_extract_filename_one("a/b/c", "c", 0);
+ test_path_extract_filename_one("a/b/c/", "c", 0);
+ test_path_extract_filename_one("/", NULL, -EINVAL);
+ test_path_extract_filename_one("//", NULL, -EINVAL);
+ test_path_extract_filename_one("///", NULL, -EINVAL);
+ test_path_extract_filename_one(".", NULL, -EINVAL);
+ test_path_extract_filename_one("./.", NULL, -EINVAL);
+ test_path_extract_filename_one("././", NULL, -EINVAL);
+ test_path_extract_filename_one("././/", NULL, -EINVAL);
+ test_path_extract_filename_one("/foo/a", "a", 0);
+ test_path_extract_filename_one("/foo/a/", "a", 0);
+ test_path_extract_filename_one("", NULL, -EINVAL);
+ test_path_extract_filename_one("a", "a", 0);
+ test_path_extract_filename_one("a/", "a", 0);
+ test_path_extract_filename_one("/a", "a", 0);
+ test_path_extract_filename_one("/a/", "a", 0);
+ test_path_extract_filename_one("/////////////a/////////////", "a", 0);
+ test_path_extract_filename_one("xx/.", NULL, -EINVAL);
+ test_path_extract_filename_one("xx/..", NULL, -EINVAL);
+ test_path_extract_filename_one("..", NULL, -EINVAL);
+ test_path_extract_filename_one("/..", NULL, -EINVAL);
+ test_path_extract_filename_one("../", NULL, -EINVAL);
+ test_path_extract_filename_one(".", NULL, -EINVAL);
+ test_path_extract_filename_one("/.", NULL, -EINVAL);
+ test_path_extract_filename_one("./", NULL, -EINVAL);
+}
+
+static void test_filename_is_valid(void) {
+ char foo[FILENAME_MAX+2];
+ int i;
+
+ assert_se(!filename_is_valid(""));
+ assert_se(!filename_is_valid("/bar/foo"));
+ assert_se(!filename_is_valid("/"));
+ assert_se(!filename_is_valid("."));
+ assert_se(!filename_is_valid(".."));
+
+ for (i=0; i<FILENAME_MAX+1; i++)
+ foo[i] = 'a';
+ foo[FILENAME_MAX+1] = '\0';
+
+ assert_se(!filename_is_valid(foo));
+
+ assert_se(filename_is_valid("foo_bar-333"));
+ assert_se(filename_is_valid("o.o"));
+}
+
+static void test_hidden_or_backup_file(void) {
+ assert_se(hidden_or_backup_file(".hidden"));
+ assert_se(hidden_or_backup_file("..hidden"));
+ assert_se(!hidden_or_backup_file("hidden."));
+
+ assert_se(hidden_or_backup_file("backup~"));
+ assert_se(hidden_or_backup_file(".backup~"));
+
+ assert_se(hidden_or_backup_file("lost+found"));
+ assert_se(hidden_or_backup_file("aquota.user"));
+ assert_se(hidden_or_backup_file("aquota.group"));
+
+ assert_se(hidden_or_backup_file("test.rpmnew"));
+ assert_se(hidden_or_backup_file("test.dpkg-old"));
+ assert_se(hidden_or_backup_file("test.dpkg-remove"));
+ assert_se(hidden_or_backup_file("test.swp"));
+
+ assert_se(!hidden_or_backup_file("test.rpmnew."));
+ assert_se(!hidden_or_backup_file("test.dpkg-old.foo"));
+}
+
+static void test_systemd_installation_has_version(const char *path) {
+ int r;
+ const unsigned versions[] = {0, 231, PROJECT_VERSION, 999};
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(versions); i++) {
+ r = systemd_installation_has_version(path, versions[i]);
+ assert_se(r >= 0);
+ log_info("%s has systemd >= %u: %s",
+ path ?: "Current installation", versions[i], yes_no(r));
+ }
+}
+
+static void test_skip_dev_prefix(void) {
+
+ assert_se(streq(skip_dev_prefix("/"), "/"));
+ assert_se(streq(skip_dev_prefix("/dev"), ""));
+ assert_se(streq(skip_dev_prefix("/dev/"), ""));
+ assert_se(streq(skip_dev_prefix("/dev/foo"), "foo"));
+ assert_se(streq(skip_dev_prefix("/dev/foo/bar"), "foo/bar"));
+ assert_se(streq(skip_dev_prefix("//dev"), ""));
+ assert_se(streq(skip_dev_prefix("//dev//"), ""));
+ assert_se(streq(skip_dev_prefix("/dev///foo"), "foo"));
+ assert_se(streq(skip_dev_prefix("///dev///foo///bar"), "foo///bar"));
+ assert_se(streq(skip_dev_prefix("//foo"), "//foo"));
+ assert_se(streq(skip_dev_prefix("foo"), "foo"));
+}
+
+static void test_empty_or_root(void) {
+ assert_se(empty_or_root(NULL));
+ assert_se(empty_or_root(""));
+ assert_se(empty_or_root("/"));
+ assert_se(empty_or_root("//"));
+ assert_se(empty_or_root("///"));
+ assert_se(empty_or_root("/////////////////"));
+ assert_se(!empty_or_root("xxx"));
+ assert_se(!empty_or_root("/xxx"));
+ assert_se(!empty_or_root("/xxx/"));
+ assert_se(!empty_or_root("//yy//"));
+}
+
+static void test_path_startswith_set(void) {
+
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar", "/foo/quux", "/foo/bar", "/zzz"), ""));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar", "/foo/quux", "/foo/", "/zzz"), "bar"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar", "/foo/quux", "/foo", "/zzz"), "bar"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar", "/foo/quux", "/", "/zzz"), "foo/bar"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar", "/foo/quux", "", "/zzz"), NULL));
+
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar2", "/foo/quux", "/foo/bar", "/zzz"), NULL));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar2", "/foo/quux", "/foo/", "/zzz"), "bar2"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar2", "/foo/quux", "/foo", "/zzz"), "bar2"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar2", "/foo/quux", "/", "/zzz"), "foo/bar2"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar2", "/foo/quux", "", "/zzz"), NULL));
+
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo2/bar", "/foo/quux", "/foo/bar", "/zzz"), NULL));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo2/bar", "/foo/quux", "/foo/", "/zzz"), NULL));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo2/bar", "/foo/quux", "/foo", "/zzz"), NULL));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo2/bar", "/foo/quux", "/", "/zzz"), "foo2/bar"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo2/bar", "/foo/quux", "", "/zzz"), NULL));
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_path();
+ test_path_equal_root();
+ test_find_binary(argv[0]);
+ test_prefixes();
+ test_path_join();
+ test_fsck_exists();
+ test_make_relative();
+ test_strv_resolve();
+ test_path_startswith();
+ test_prefix_root();
+ test_file_in_same_dir();
+ test_last_path_component();
+ test_path_extract_filename();
+ test_filename_is_valid();
+ test_hidden_or_backup_file();
+ test_skip_dev_prefix();
+ test_empty_or_root();
+ test_path_startswith_set();
+
+ test_systemd_installation_has_version(argv[1]); /* NULL is OK */
+
+ return 0;
+}
diff --git a/src/test/test-path.c b/src/test/test-path.c
new file mode 100644
index 0000000..07a0e41
--- /dev/null
+++ b/src/test/test-path.c
@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "all-units.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "manager.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "test-helper.h"
+#include "tests.h"
+#include "unit.h"
+#include "util.h"
+
+typedef void (*test_function_t)(Manager *m);
+
+static int setup_test(Manager **m) {
+ char **tests_path = STRV_MAKE("exists", "existsglobFOOBAR", "changed", "modified", "unit",
+ "directorynotempty", "makedirectory");
+ char **test_path;
+ Manager *tmp = NULL;
+ int r;
+
+ assert_se(m);
+
+ r = enter_cgroup_subroot();
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &tmp);
+ if (MANAGER_SKIP_TEST(r))
+ return log_tests_skipped_errno(r, "manager_new");
+ assert_se(r >= 0);
+ assert_se(manager_startup(tmp, NULL, NULL) >= 0);
+
+ STRV_FOREACH(test_path, tests_path) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin("/tmp/test-path_", *test_path);
+ assert_se(p);
+
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL);
+ }
+
+ *m = tmp;
+
+ return 0;
+}
+
+static void shutdown_test(Manager *m) {
+ assert_se(m);
+
+ manager_free(m);
+}
+
+static void check_stop_unlink(Manager *m, Unit *unit, const char *test_path, const char *service_name) {
+ _cleanup_free_ char *tmp = NULL;
+ Unit *service_unit = NULL;
+ Service *service = NULL;
+ usec_t ts;
+ usec_t timeout = 2 * USEC_PER_SEC;
+
+ assert_se(m);
+ assert_se(unit);
+ assert_se(test_path);
+
+ if (!service_name) {
+ assert_se(tmp = strreplace(unit->id, ".path", ".service"));
+ service_unit = manager_get_unit(m, tmp);
+ } else
+ service_unit = manager_get_unit(m, service_name);
+ assert_se(service_unit);
+ service = SERVICE(service_unit);
+
+ ts = now(CLOCK_MONOTONIC);
+ /* We process events until the service related to the path has been successfully started */
+ while (service->result != SERVICE_SUCCESS || service->state != SERVICE_START) {
+ usec_t n;
+ int r;
+
+ r = sd_event_run(m->event, 100 * USEC_PER_MSEC);
+ assert_se(r >= 0);
+
+ printf("%s: state = %s; result = %s \n",
+ service_unit->id,
+ service_state_to_string(service->state),
+ service_result_to_string(service->result));
+
+ /* But we timeout if the service has not been started in the allocated time */
+ n = now(CLOCK_MONOTONIC);
+ if (ts + timeout < n) {
+ log_error("Test timeout when testing %s", unit->id);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ assert_se(unit_stop(unit) >= 0);
+ (void) rm_rf(test_path, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static void test_path_exists(Manager *m) {
+ const char *test_path = "/tmp/test-path_exists";
+ Unit *unit = NULL;
+
+ assert_se(m);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-exists.path", NULL, &unit) >= 0);
+ assert_se(unit_start(unit) >= 0);
+
+ assert_se(touch(test_path) >= 0);
+
+ check_stop_unlink(m, unit, test_path, NULL);
+}
+
+static void test_path_existsglob(Manager *m) {
+ const char *test_path = "/tmp/test-path_existsglobFOOBAR";
+ Unit *unit = NULL;
+
+ assert_se(m);
+ assert_se(manager_load_startable_unit_or_warn(m, "path-existsglob.path", NULL, &unit) >= 0);
+ assert_se(unit_start(unit) >= 0);
+
+ assert_se(touch(test_path) >= 0);
+
+ check_stop_unlink(m, unit, test_path, NULL);
+}
+
+static void test_path_changed(Manager *m) {
+ const char *test_path = "/tmp/test-path_changed";
+ FILE *f;
+ Unit *unit = NULL;
+
+ assert_se(m);
+
+ assert_se(touch(test_path) >= 0);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-changed.path", NULL, &unit) >= 0);
+ assert_se(unit_start(unit) >= 0);
+
+ f = fopen(test_path, "w");
+ assert_se(f);
+ fclose(f);
+
+ check_stop_unlink(m, unit, test_path, NULL);
+}
+
+static void test_path_modified(Manager *m) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *test_path = "/tmp/test-path_modified";
+ Unit *unit = NULL;
+
+ assert_se(m);
+
+ assert_se(touch(test_path) >= 0);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-modified.path", NULL, &unit) >= 0);
+ assert_se(unit_start(unit) >= 0);
+
+ f = fopen(test_path, "w");
+ assert_se(f);
+ fputs("test", f);
+
+ check_stop_unlink(m, unit, test_path, NULL);
+}
+
+static void test_path_unit(Manager *m) {
+ const char *test_path = "/tmp/test-path_unit";
+ Unit *unit = NULL;
+
+ assert_se(m);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-unit.path", NULL, &unit) >= 0);
+ assert_se(unit_start(unit) >= 0);
+
+ assert_se(touch(test_path) >= 0);
+
+ check_stop_unlink(m, unit, test_path, "path-mycustomunit.service");
+}
+
+static void test_path_directorynotempty(Manager *m) {
+ const char *test_path = "/tmp/test-path_directorynotempty/";
+ Unit *unit = NULL;
+
+ assert_se(m);
+
+ assert_se(access(test_path, F_OK) < 0);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-directorynotempty.path", NULL, &unit) >= 0);
+ assert_se(unit_start(unit) >= 0);
+
+ /* MakeDirectory default to no */
+ assert_se(access(test_path, F_OK) < 0);
+
+ assert_se(mkdir_p(test_path, 0755) >= 0);
+ assert_se(touch(strjoina(test_path, "test_file")) >= 0);
+
+ check_stop_unlink(m, unit, test_path, NULL);
+}
+
+static void test_path_makedirectory_directorymode(Manager *m) {
+ const char *test_path = "/tmp/test-path_makedirectory/";
+ Unit *unit = NULL;
+ struct stat s;
+
+ assert_se(m);
+
+ assert_se(access(test_path, F_OK) < 0);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-makedirectory.path", NULL, &unit) >= 0);
+ assert_se(unit_start(unit) >= 0);
+
+ /* Check if the directory has been created */
+ assert_se(access(test_path, F_OK) >= 0);
+
+ /* Check the mode we specified with DirectoryMode=0744 */
+ assert_se(stat(test_path, &s) >= 0);
+ assert_se((s.st_mode & S_IRWXU) == 0700);
+ assert_se((s.st_mode & S_IRWXG) == 0040);
+ assert_se((s.st_mode & S_IRWXO) == 0004);
+
+ assert_se(unit_stop(unit) >= 0);
+ (void) rm_rf(test_path, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+int main(int argc, char *argv[]) {
+ static const test_function_t tests[] = {
+ test_path_exists,
+ test_path_existsglob,
+ test_path_changed,
+ test_path_modified,
+ test_path_unit,
+ test_path_directorynotempty,
+ test_path_makedirectory_directorymode,
+ NULL,
+ };
+
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ _cleanup_free_ char *test_path = NULL;
+ const test_function_t *test = NULL;
+ Manager *m = NULL;
+
+ umask(022);
+
+ test_setup_logging(LOG_INFO);
+
+ test_path = path_join(get_testdata_dir(), "test-path");
+ assert_se(set_unit_path(test_path) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ for (test = tests; test && *test; test++) {
+ int r;
+
+ /* We create a clean environment for each test */
+ r = setup_test(&m);
+ if (r != 0)
+ return r;
+
+ (*test)(m);
+
+ shutdown_test(m);
+ }
+
+ return 0;
+}
diff --git a/src/test/test-pretty-print.c b/src/test/test-pretty-print.c
new file mode 100644
index 0000000..53ec512
--- /dev/null
+++ b/src/test/test-pretty-print.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "pretty-print.h"
+#include "strv.h"
+#include "tests.h"
+
+static void test_terminal_urlify(void) {
+ _cleanup_free_ char *formatted = NULL;
+
+ assert_se(terminal_urlify("https://www.freedesktop.org/wiki/Software/systemd/", "systemd homepage", &formatted) >= 0);
+ printf("Hey, considere visiting the %s right now! It is very good!\n", formatted);
+
+ formatted = mfree(formatted);
+
+ assert_se(terminal_urlify_path("/etc/fstab", "this link to your /etc/fstab", &formatted) >= 0);
+ printf("Or click on %s to have a look at it!\n", formatted);
+}
+
+static void test_cat_files(void) {
+ assert_se(cat_files("/no/such/file", NULL, 0) == -ENOENT);
+ assert_se(cat_files("/no/such/file", NULL, CAT_FLAGS_MAIN_FILE_OPTIONAL) == 0);
+
+ if (access("/etc/fstab", R_OK) >= 0)
+ assert_se(cat_files("/etc/fstab", STRV_MAKE("/etc/fstab", "/etc/fstab"), 0) == 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ test_terminal_urlify();
+ test_cat_files();
+
+ print_separator();
+
+ return 0;
+}
diff --git a/src/test/test-prioq.c b/src/test/test-prioq.c
new file mode 100644
index 0000000..53c9e09
--- /dev/null
+++ b/src/test/test-prioq.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "prioq.h"
+#include "set.h"
+#include "siphash24.h"
+#include "util.h"
+
+#define SET_SIZE 1024*4
+
+static int unsigned_compare(const unsigned *a, const unsigned *b) {
+ return CMP(*a, *b);
+}
+
+static void test_unsigned(void) {
+ _cleanup_(prioq_freep) Prioq *q = NULL;
+ unsigned buffer[SET_SIZE], i, u, n;
+
+ srand(0);
+
+ assert_se(q = prioq_new(trivial_compare_func));
+
+ for (i = 0; i < ELEMENTSOF(buffer); i++) {
+ u = (unsigned) rand();
+ buffer[i] = u;
+ assert_se(prioq_put(q, UINT_TO_PTR(u), NULL) >= 0);
+
+ n = prioq_size(q);
+ assert_se(prioq_remove(q, UINT_TO_PTR(u), &n) == 0);
+ }
+
+ typesafe_qsort(buffer, ELEMENTSOF(buffer), unsigned_compare);
+
+ for (i = 0; i < ELEMENTSOF(buffer); i++) {
+ assert_se(prioq_size(q) == ELEMENTSOF(buffer) - i);
+
+ u = PTR_TO_UINT(prioq_pop(q));
+ assert_se(buffer[i] == u);
+ }
+
+ assert_se(prioq_isempty(q));
+}
+
+struct test {
+ unsigned value;
+ unsigned idx;
+};
+
+static int test_compare(const struct test *x, const struct test *y) {
+ return CMP(x->value, y->value);
+}
+
+static void test_hash(const struct test *x, struct siphash *state) {
+ siphash24_compress(&x->value, sizeof(x->value), state);
+}
+
+DEFINE_PRIVATE_HASH_OPS(test_hash_ops, struct test, test_hash, test_compare);
+
+static void test_struct(void) {
+ _cleanup_(prioq_freep) Prioq *q = NULL;
+ _cleanup_(set_freep) Set *s = NULL;
+ unsigned previous = 0, i;
+ struct test *t;
+
+ srand(0);
+
+ assert_se(q = prioq_new((compare_func_t) test_compare));
+ assert_se(s = set_new(&test_hash_ops));
+
+ assert_se(prioq_peek(q) == NULL);
+ assert_se(prioq_peek_by_index(q, 0) == NULL);
+ assert_se(prioq_peek_by_index(q, 1) == NULL);
+ assert_se(prioq_peek_by_index(q, (unsigned) -1) == NULL);
+
+ for (i = 0; i < SET_SIZE; i++) {
+ assert_se(t = new0(struct test, 1));
+ t->value = (unsigned) rand();
+
+ assert_se(prioq_put(q, t, &t->idx) >= 0);
+
+ if (i % 4 == 0)
+ assert_se(set_consume(s, t) >= 0);
+ }
+
+ for (i = 0; i < SET_SIZE; i++)
+ assert_se(prioq_peek_by_index(q, i));
+ assert_se(prioq_peek_by_index(q, SET_SIZE) == NULL);
+
+ unsigned count = 0;
+ PRIOQ_FOREACH_ITEM(q, t) {
+ assert_se(t);
+ count++;
+ }
+ assert_se(count == SET_SIZE);
+
+ while ((t = set_steal_first(s))) {
+ assert_se(prioq_remove(q, t, &t->idx) == 1);
+ assert_se(prioq_remove(q, t, &t->idx) == 0);
+ assert_se(prioq_remove(q, t, NULL) == 0);
+
+ free(t);
+ }
+
+ for (i = 0; i < SET_SIZE * 3 / 4; i++) {
+ assert_se(prioq_size(q) == (SET_SIZE * 3 / 4) - i);
+
+ assert_se(t = prioq_pop(q));
+ assert_se(prioq_remove(q, t, &t->idx) == 0);
+ assert_se(prioq_remove(q, t, NULL) == 0);
+ assert_se(previous <= t->value);
+
+ previous = t->value;
+ free(t);
+ }
+
+ assert_se(prioq_isempty(q));
+ assert_se(set_isempty(s));
+}
+
+int main(int argc, char* argv[]) {
+
+ test_unsigned();
+ test_struct();
+
+ return 0;
+}
diff --git a/src/test/test-proc-cmdline.c b/src/test/test-proc-cmdline.c
new file mode 100644
index 0000000..b0fc217
--- /dev/null
+++ b/src/test/test-proc-cmdline.c
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "log.h"
+#include "macro.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "string-util.h"
+#include "util.h"
+
+static int obj;
+
+static int parse_item(const char *key, const char *value, void *data) {
+ assert_se(key);
+ assert_se(data == &obj);
+
+ log_info("kernel cmdline option <%s> = <%s>", key, strna(value));
+ return 0;
+}
+
+static void test_proc_cmdline_parse(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(proc_cmdline_parse(parse_item, &obj, PROC_CMDLINE_STRIP_RD_PREFIX) >= 0);
+}
+
+static void test_proc_cmdline_override(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(putenv((char*) "SYSTEMD_PROC_CMDLINE=foo_bar=quux wuff-piep=tuet zumm some_arg_with_space='foo bar' and_one_more=\"zzz aaa\"") == 0);
+
+ /* Test if the override works */
+ _cleanup_free_ char *line = NULL, *value = NULL;
+ assert_se(proc_cmdline(&line) >= 0);
+
+ /* Test if parsing makes uses of the override */
+ assert_se(streq(line, "foo_bar=quux wuff-piep=tuet zumm some_arg_with_space='foo bar' and_one_more=\"zzz aaa\""));
+ assert_se(proc_cmdline_get_key("foo_bar", 0, &value) > 0 && streq_ptr(value, "quux"));
+ value = mfree(value);
+
+ assert_se(proc_cmdline_get_key("some_arg_with_space", 0, &value) > 0 && streq_ptr(value, "foo bar"));
+ value = mfree(value);
+
+ assert_se(proc_cmdline_get_key("and_one_more", 0, &value) > 0 && streq_ptr(value, "zzz aaa"));
+ value = mfree(value);
+}
+
+static int parse_item_given(const char *key, const char *value, void *data) {
+ assert_se(key);
+ assert_se(data);
+
+ bool *strip = data;
+
+ log_info("%s: option <%s> = <%s>", __func__, key, strna(value));
+ if (proc_cmdline_key_streq(key, "foo_bar"))
+ assert_se(streq(value, "quux"));
+ else if (proc_cmdline_key_streq(key, "wuff-piep"))
+ assert_se(streq(value, "tuet "));
+ else if (proc_cmdline_key_streq(key, "space"))
+ assert_se(streq(value, "x y z"));
+ else if (proc_cmdline_key_streq(key, "miepf"))
+ assert_se(streq(value, "uuu"));
+ else if (in_initrd() && *strip && proc_cmdline_key_streq(key, "zumm"))
+ assert_se(!value);
+ else if (in_initrd() && !*strip && proc_cmdline_key_streq(key, "rd.zumm"))
+ assert_se(!value);
+ else
+ assert_not_reached("Bad key!");
+
+ return 0;
+}
+
+static void test_proc_cmdline_given(bool flip_initrd) {
+ log_info("/* %s (flip: %s) */", __func__, yes_no(flip_initrd));
+
+ if (flip_initrd)
+ in_initrd_force(!in_initrd());
+
+ bool t = true, f = false;
+ assert_se(proc_cmdline_parse_given("foo_bar=quux wuff-piep=\"tuet \" rd.zumm space='x y z' miepf=\"uuu\"",
+ parse_item_given, &t, PROC_CMDLINE_STRIP_RD_PREFIX) >= 0);
+
+ assert_se(proc_cmdline_parse_given("foo_bar=quux wuff-piep=\"tuet \" rd.zumm space='x y z' miepf=\"uuu\"",
+ parse_item_given, &f, 0) >= 0);
+
+ if (flip_initrd)
+ in_initrd_force(!in_initrd());
+}
+
+static void test_proc_cmdline_get_key(void) {
+ _cleanup_free_ char *value = NULL;
+
+ log_info("/* %s */", __func__);
+ assert_se(putenv((char*) "SYSTEMD_PROC_CMDLINE=foo_bar=quux wuff-piep=tuet zumm spaaace='ö ü ß' ticks=\"''\"") == 0);
+
+ assert_se(proc_cmdline_get_key("", 0, &value) == -EINVAL);
+ assert_se(proc_cmdline_get_key("abc", 0, NULL) == 0);
+ assert_se(proc_cmdline_get_key("abc", 0, &value) == 0 && value == NULL);
+ assert_se(proc_cmdline_get_key("abc", PROC_CMDLINE_VALUE_OPTIONAL, &value) == 0 && value == NULL);
+
+ assert_se(proc_cmdline_get_key("foo_bar", 0, &value) > 0 && streq_ptr(value, "quux"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("foo_bar", PROC_CMDLINE_VALUE_OPTIONAL, &value) > 0 && streq_ptr(value, "quux"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("foo-bar", 0, &value) > 0 && streq_ptr(value, "quux"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("foo-bar", PROC_CMDLINE_VALUE_OPTIONAL, &value) > 0 && streq_ptr(value, "quux"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("foo-bar", 0, NULL) == 0);
+ assert_se(proc_cmdline_get_key("foo-bar", PROC_CMDLINE_VALUE_OPTIONAL, NULL) == -EINVAL);
+
+ assert_se(proc_cmdline_get_key("wuff-piep", 0, &value) > 0 && streq_ptr(value, "tuet"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("wuff-piep", PROC_CMDLINE_VALUE_OPTIONAL, &value) > 0 && streq_ptr(value, "tuet"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("wuff_piep", 0, &value) > 0 && streq_ptr(value, "tuet"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("wuff_piep", PROC_CMDLINE_VALUE_OPTIONAL, &value) > 0 && streq_ptr(value, "tuet"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("wuff_piep", 0, NULL) == 0);
+ assert_se(proc_cmdline_get_key("wuff_piep", PROC_CMDLINE_VALUE_OPTIONAL, NULL) == -EINVAL);
+
+ assert_se(proc_cmdline_get_key("zumm", 0, &value) == 0 && value == NULL);
+ assert_se(proc_cmdline_get_key("zumm", PROC_CMDLINE_VALUE_OPTIONAL, &value) > 0 && value == NULL);
+ assert_se(proc_cmdline_get_key("zumm", 0, NULL) > 0);
+
+ assert_se(proc_cmdline_get_key("spaaace", 0, &value) > 0 && streq_ptr(value, "ö ü ß"));
+ value = mfree(value);
+
+ assert_se(proc_cmdline_get_key("ticks", 0, &value) > 0 && streq_ptr(value, "''"));
+}
+
+static void test_proc_cmdline_get_bool(void) {
+ bool value = false;
+
+ log_info("/* %s */", __func__);
+ assert_se(putenv((char*) "SYSTEMD_PROC_CMDLINE=foo_bar bar-waldo=1 x_y-z=0 quux=miep") == 0);
+
+ assert_se(proc_cmdline_get_bool("", &value) == -EINVAL);
+ assert_se(proc_cmdline_get_bool("abc", &value) == 0 && value == false);
+ assert_se(proc_cmdline_get_bool("foo_bar", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("foo-bar", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("bar-waldo", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("bar_waldo", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("x_y-z", &value) > 0 && value == false);
+ assert_se(proc_cmdline_get_bool("x-y-z", &value) > 0 && value == false);
+ assert_se(proc_cmdline_get_bool("x-y_z", &value) > 0 && value == false);
+ assert_se(proc_cmdline_get_bool("x_y_z", &value) > 0 && value == false);
+ assert_se(proc_cmdline_get_bool("quux", &value) == -EINVAL && value == false);
+}
+
+static void test_proc_cmdline_get_key_many(void) {
+ _cleanup_free_ char *value1 = NULL, *value2 = NULL, *value3 = NULL, *value4 = NULL, *value5 = NULL, *value6 = NULL;
+
+ log_info("/* %s */", __func__);
+ assert_se(putenv((char*) "SYSTEMD_PROC_CMDLINE=foo_bar=quux wuff-piep=tuet zumm SPACE='one two' doubleticks=\" aaa aaa \"") == 0);
+
+ assert_se(proc_cmdline_get_key_many(0,
+ "wuff-piep", &value3,
+ "foo_bar", &value1,
+ "idontexist", &value2,
+ "zumm", &value4,
+ "SPACE", &value5,
+ "doubleticks", &value6) == 4);
+
+ assert_se(streq_ptr(value1, "quux"));
+ assert_se(!value2);
+ assert_se(streq_ptr(value3, "tuet"));
+ assert_se(!value4);
+ assert_se(streq_ptr(value5, "one two"));
+ assert_se(streq_ptr(value6, " aaa aaa "));
+}
+
+static void test_proc_cmdline_key_streq(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(proc_cmdline_key_streq("", ""));
+ assert_se(proc_cmdline_key_streq("a", "a"));
+ assert_se(!proc_cmdline_key_streq("", "a"));
+ assert_se(!proc_cmdline_key_streq("a", ""));
+ assert_se(proc_cmdline_key_streq("a", "a"));
+ assert_se(!proc_cmdline_key_streq("a", "b"));
+ assert_se(proc_cmdline_key_streq("x-y-z", "x-y-z"));
+ assert_se(proc_cmdline_key_streq("x-y-z", "x_y_z"));
+ assert_se(proc_cmdline_key_streq("x-y-z", "x-y_z"));
+ assert_se(proc_cmdline_key_streq("x-y-z", "x_y-z"));
+ assert_se(proc_cmdline_key_streq("x_y-z", "x-y_z"));
+ assert_se(!proc_cmdline_key_streq("x_y-z", "x-z_z"));
+}
+
+static void test_proc_cmdline_key_startswith(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(proc_cmdline_key_startswith("", ""));
+ assert_se(proc_cmdline_key_startswith("x", ""));
+ assert_se(!proc_cmdline_key_startswith("", "x"));
+ assert_se(proc_cmdline_key_startswith("x", "x"));
+ assert_se(!proc_cmdline_key_startswith("x", "y"));
+ assert_se(!proc_cmdline_key_startswith("foo-bar", "quux"));
+ assert_se(proc_cmdline_key_startswith("foo-bar", "foo"));
+ assert_se(proc_cmdline_key_startswith("foo-bar", "foo-bar"));
+ assert_se(proc_cmdline_key_startswith("foo-bar", "foo_bar"));
+ assert_se(proc_cmdline_key_startswith("foo-bar", "foo_"));
+ assert_se(!proc_cmdline_key_startswith("foo-bar", "foo_xx"));
+}
+
+static void test_runlevel_to_target(void) {
+ log_info("/* %s */", __func__);
+
+ in_initrd_force(false);
+ assert_se(streq_ptr(runlevel_to_target(NULL), NULL));
+ assert_se(streq_ptr(runlevel_to_target("unknown-runlevel"), NULL));
+ assert_se(streq_ptr(runlevel_to_target("rd.unknown-runlevel"), NULL));
+ assert_se(streq_ptr(runlevel_to_target("3"), SPECIAL_MULTI_USER_TARGET));
+ assert_se(streq_ptr(runlevel_to_target("rd.rescue"), NULL));
+
+ in_initrd_force(true);
+ assert_se(streq_ptr(runlevel_to_target(NULL), NULL));
+ assert_se(streq_ptr(runlevel_to_target("unknown-runlevel"), NULL));
+ assert_se(streq_ptr(runlevel_to_target("rd.unknown-runlevel"), NULL));
+ assert_se(streq_ptr(runlevel_to_target("3"), NULL));
+ assert_se(streq_ptr(runlevel_to_target("rd.rescue"), SPECIAL_RESCUE_TARGET));
+}
+
+int main(void) {
+ log_parse_environment();
+ log_open();
+
+ test_proc_cmdline_parse();
+ test_proc_cmdline_override();
+ test_proc_cmdline_given(false);
+ /* Repeat the same thing, but now flip our ininitrdness */
+ test_proc_cmdline_given(true);
+ test_proc_cmdline_key_streq();
+ test_proc_cmdline_key_startswith();
+ test_proc_cmdline_get_key();
+ test_proc_cmdline_get_bool();
+ test_proc_cmdline_get_key_many();
+ test_runlevel_to_target();
+
+ return 0;
+}
diff --git a/src/test/test-process-util.c b/src/test/test-process-util.c
new file mode 100644
index 0000000..b5ba651
--- /dev/null
+++ b/src/test/test-process-util.c
@@ -0,0 +1,632 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/personality.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "test-helper.h"
+#include "tests.h"
+#include "util.h"
+#include "virt.h"
+
+static void test_get_process_comm(pid_t pid) {
+ struct stat st;
+ _cleanup_free_ char *a = NULL, *c = NULL, *d = NULL, *f = NULL, *i = NULL;
+ _cleanup_free_ char *env = NULL;
+ char path[STRLEN("/proc//comm") + DECIMAL_STR_MAX(pid_t)];
+ pid_t e;
+ uid_t u;
+ gid_t g;
+ dev_t h;
+ int r;
+
+ xsprintf(path, "/proc/"PID_FMT"/comm", pid);
+
+ if (stat(path, &st) == 0) {
+ assert_se(get_process_comm(pid, &a) >= 0);
+ log_info("PID"PID_FMT" comm: '%s'", pid, a);
+ } else
+ log_warning("%s not exist.", path);
+
+ assert_se(get_process_cmdline(pid, 0, true, &c) >= 0);
+ log_info("PID"PID_FMT" cmdline: '%s'", pid, c);
+
+ assert_se(get_process_cmdline(pid, 8, false, &d) >= 0);
+ log_info("PID"PID_FMT" cmdline truncated to 8: '%s'", pid, d);
+
+ free(d);
+ assert_se(get_process_cmdline(pid, 1, false, &d) >= 0);
+ log_info("PID"PID_FMT" cmdline truncated to 1: '%s'", pid, d);
+
+ assert_se(get_process_ppid(pid, &e) >= 0);
+ log_info("PID"PID_FMT" PPID: "PID_FMT, pid, e);
+ assert_se(pid == 1 ? e == 0 : e > 0);
+
+ assert_se(is_kernel_thread(pid) == 0 || pid != 1);
+
+ r = get_process_exe(pid, &f);
+ assert_se(r >= 0 || r == -EACCES);
+ log_info("PID"PID_FMT" exe: '%s'", pid, strna(f));
+
+ assert_se(get_process_uid(pid, &u) == 0);
+ log_info("PID"PID_FMT" UID: "UID_FMT, pid, u);
+
+ assert_se(get_process_gid(pid, &g) == 0);
+ log_info("PID"PID_FMT" GID: "GID_FMT, pid, g);
+
+ r = get_process_environ(pid, &env);
+ assert_se(r >= 0 || r == -EACCES);
+ log_info("PID"PID_FMT" strlen(environ): %zi", pid, env ? (ssize_t)strlen(env) : (ssize_t)-errno);
+
+ if (!detect_container())
+ assert_se(get_ctty_devnr(pid, &h) == -ENXIO || pid != 1);
+
+ (void) getenv_for_pid(pid, "PATH", &i);
+ log_info("PID"PID_FMT" $PATH: '%s'", pid, strna(i));
+}
+
+static void test_get_process_comm_escape_one(const char *input, const char *output) {
+ _cleanup_free_ char *n = NULL;
+
+ log_info("input: <%s> — output: <%s>", input, output);
+
+ assert_se(prctl(PR_SET_NAME, input) >= 0);
+ assert_se(get_process_comm(0, &n) >= 0);
+
+ log_info("got: <%s>", n);
+
+ assert_se(streq_ptr(n, output));
+}
+
+static void test_get_process_comm_escape(void) {
+ _cleanup_free_ char *saved = NULL;
+
+ assert_se(get_process_comm(0, &saved) >= 0);
+
+ test_get_process_comm_escape_one("", "");
+ test_get_process_comm_escape_one("foo", "foo");
+ test_get_process_comm_escape_one("012345678901234", "012345678901234");
+ test_get_process_comm_escape_one("0123456789012345", "012345678901234");
+ test_get_process_comm_escape_one("äöüß", "\\303\\244\\303…");
+ test_get_process_comm_escape_one("xäöüß", "x\\303\\244…");
+ test_get_process_comm_escape_one("xxäöüß", "xx\\303\\244…");
+ test_get_process_comm_escape_one("xxxäöüß", "xxx\\303\\244…");
+ test_get_process_comm_escape_one("xxxxäöüß", "xxxx\\303\\244…");
+ test_get_process_comm_escape_one("xxxxxäöüß", "xxxxx\\303…");
+
+ assert_se(prctl(PR_SET_NAME, saved) >= 0);
+}
+
+static void test_pid_is_unwaited(void) {
+ pid_t pid;
+
+ pid = fork();
+ assert_se(pid >= 0);
+ if (pid == 0) {
+ _exit(EXIT_SUCCESS);
+ } else {
+ int status;
+
+ waitpid(pid, &status, 0);
+ assert_se(!pid_is_unwaited(pid));
+ }
+ assert_se(pid_is_unwaited(getpid_cached()));
+ assert_se(!pid_is_unwaited(-1));
+}
+
+static void test_pid_is_alive(void) {
+ pid_t pid;
+
+ pid = fork();
+ assert_se(pid >= 0);
+ if (pid == 0) {
+ _exit(EXIT_SUCCESS);
+ } else {
+ int status;
+
+ waitpid(pid, &status, 0);
+ assert_se(!pid_is_alive(pid));
+ }
+ assert_se(pid_is_alive(getpid_cached()));
+ assert_se(!pid_is_alive(-1));
+}
+
+static void test_personality(void) {
+
+ assert_se(personality_to_string(PER_LINUX));
+ assert_se(!personality_to_string(PERSONALITY_INVALID));
+
+ assert_se(streq(personality_to_string(PER_LINUX), architecture_to_string(native_architecture())));
+
+ assert_se(personality_from_string(personality_to_string(PER_LINUX)) == PER_LINUX);
+ assert_se(personality_from_string(architecture_to_string(native_architecture())) == PER_LINUX);
+
+#ifdef __x86_64__
+ assert_se(streq_ptr(personality_to_string(PER_LINUX), "x86-64"));
+ assert_se(streq_ptr(personality_to_string(PER_LINUX32), "x86"));
+
+ assert_se(personality_from_string("x86-64") == PER_LINUX);
+ assert_se(personality_from_string("x86") == PER_LINUX32);
+ assert_se(personality_from_string("ia64") == PERSONALITY_INVALID);
+ assert_se(personality_from_string(NULL) == PERSONALITY_INVALID);
+
+ assert_se(personality_from_string(personality_to_string(PER_LINUX32)) == PER_LINUX32);
+#endif
+}
+
+static void test_get_process_cmdline_harder(void) {
+ char path[] = "/tmp/test-cmdlineXXXXXX";
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *line = NULL;
+ pid_t pid;
+
+ if (geteuid() != 0) {
+ log_info("Skipping %s: not root", __func__);
+ return;
+ }
+
+ if (!have_namespaces()) {
+ log_notice("Testing without namespaces, skipping %s", __func__);
+ return;
+ }
+
+#if HAVE_VALGRIND_VALGRIND_H
+ /* valgrind patches open(/proc//cmdline)
+ * so, test_get_process_cmdline_harder fails always
+ * See https://github.com/systemd/systemd/pull/3555#issuecomment-226564908 */
+ if (RUNNING_ON_VALGRIND) {
+ log_info("Skipping %s: running on valgrind", __func__);
+ return;
+ }
+#endif
+
+ pid = fork();
+ if (pid > 0) {
+ siginfo_t si;
+
+ (void) wait_for_terminate(pid, &si);
+
+ assert_se(si.si_code == CLD_EXITED);
+ assert_se(si.si_status == 0);
+
+ return;
+ }
+
+ assert_se(pid == 0);
+ assert_se(unshare(CLONE_NEWNS) >= 0);
+
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ log_warning_errno(errno, "mount(..., \"/\", MS_SLAVE|MS_REC, ...) failed: %m");
+ assert_se(IN_SET(errno, EPERM, EACCES));
+ return;
+ }
+
+ fd = mkostemp(path, O_CLOEXEC);
+ assert_se(fd >= 0);
+
+ /* Note that we don't unmount the following bind-mount at the end of the test because the kernel
+ * will clear up its /proc/PID/ hierarchy automatically as soon as the test stops. */
+ if (mount(path, "/proc/self/cmdline", "bind", MS_BIND, NULL) < 0) {
+ /* This happens under selinux… Abort the test in this case. */
+ log_warning_errno(errno, "mount(..., \"/proc/self/cmdline\", \"bind\", ...) failed: %m");
+ assert_se(IN_SET(errno, EPERM, EACCES));
+ return;
+ }
+
+ assert_se(unlink(path) >= 0);
+
+ assert_se(prctl(PR_SET_NAME, "testa") >= 0);
+
+ assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) == -ENOENT);
+
+ assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0);
+ assert_se(streq(line, "[testa]"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 1, true, &line) >= 0);
+ assert_se(streq(line, ""));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 2, true, &line) >= 0);
+ assert_se(streq(line, "["));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 3, true, &line) >= 0);
+ assert_se(streq(line, "[."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 4, true, &line) >= 0);
+ assert_se(streq(line, "[.."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 5, true, &line) >= 0);
+ assert_se(streq(line, "[..."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 6, true, &line) >= 0);
+ assert_se(streq(line, "[...]"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 7, true, &line) >= 0);
+ assert_se(streq(line, "[t...]"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 8, true, &line) >= 0);
+ assert_se(streq(line, "[testa]"));
+ line = mfree(line);
+
+ assert_se(write(fd, "\0\0\0\0\0\0\0\0\0", 10) == 10);
+
+ assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) == -ENOENT);
+
+ assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0);
+ assert_se(streq(line, "[testa]"));
+ line = mfree(line);
+
+ assert_se(write(fd, "foo\0bar\0\0\0\0\0", 10) == 10);
+
+ assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) >= 0);
+ assert_se(streq(line, "foo bar"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0);
+ assert_se(streq(line, "foo bar"));
+ line = mfree(line);
+
+ assert_se(write(fd, "quux", 4) == 4);
+ assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) >= 0);
+ assert_se(streq(line, "foo bar quux"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0);
+ assert_se(streq(line, "foo bar quux"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 1, true, &line) >= 0);
+ assert_se(streq(line, ""));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 2, true, &line) >= 0);
+ assert_se(streq(line, "."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 3, true, &line) >= 0);
+ assert_se(streq(line, ".."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 4, true, &line) >= 0);
+ assert_se(streq(line, "..."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 5, true, &line) >= 0);
+ assert_se(streq(line, "f..."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 6, true, &line) >= 0);
+ assert_se(streq(line, "fo..."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 7, true, &line) >= 0);
+ assert_se(streq(line, "foo..."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 8, true, &line) >= 0);
+ assert_se(streq(line, "foo..."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 9, true, &line) >= 0);
+ assert_se(streq(line, "foo b..."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 10, true, &line) >= 0);
+ assert_se(streq(line, "foo ba..."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 11, true, &line) >= 0);
+ assert_se(streq(line, "foo bar..."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 12, true, &line) >= 0);
+ assert_se(streq(line, "foo bar..."));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 13, true, &line) >= 0);
+ assert_se(streq(line, "foo bar quux"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 14, true, &line) >= 0);
+ assert_se(streq(line, "foo bar quux"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 1000, true, &line) >= 0);
+ assert_se(streq(line, "foo bar quux"));
+ line = mfree(line);
+
+ assert_se(ftruncate(fd, 0) >= 0);
+ assert_se(prctl(PR_SET_NAME, "aaaa bbbb cccc") >= 0);
+
+ assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) == -ENOENT);
+
+ assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0);
+ assert_se(streq(line, "[aaaa bbbb cccc]"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 10, true, &line) >= 0);
+ assert_se(streq(line, "[aaaa...]"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 11, true, &line) >= 0);
+ assert_se(streq(line, "[aaaa...]"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 12, true, &line) >= 0);
+ assert_se(streq(line, "[aaaa b...]"));
+ line = mfree(line);
+
+ safe_close(fd);
+ _exit(EXIT_SUCCESS);
+}
+
+static void test_rename_process_now(const char *p, int ret) {
+ _cleanup_free_ char *comm = NULL, *cmdline = NULL;
+ int r;
+
+ r = rename_process(p);
+ assert_se(r == ret ||
+ (ret == 0 && r >= 0) ||
+ (ret > 0 && r > 0));
+
+ if (r < 0)
+ return;
+
+#if HAVE_VALGRIND_VALGRIND_H
+ /* see above, valgrind is weird, we can't verify what we are doing here */
+ if (RUNNING_ON_VALGRIND)
+ return;
+#endif
+
+ assert_se(get_process_comm(0, &comm) >= 0);
+ log_info("comm = <%s>", comm);
+ assert_se(strneq(comm, p, TASK_COMM_LEN-1));
+
+ r = get_process_cmdline(0, 0, false, &cmdline);
+ assert_se(r >= 0);
+ /* we cannot expect cmdline to be renamed properly without privileges */
+ if (geteuid() == 0) {
+ if (r == 0 && detect_container() > 0)
+ log_info("cmdline = <%s> (not verified, Running in unprivileged container?)", cmdline);
+ else {
+ log_info("cmdline = <%s>", cmdline);
+ assert_se(strneq(p, cmdline, STRLEN("test-process-util")));
+ assert_se(startswith(p, cmdline));
+ }
+ } else
+ log_info("cmdline = <%s> (not verified)", cmdline);
+}
+
+static void test_rename_process_one(const char *p, int ret) {
+ siginfo_t si;
+ pid_t pid;
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ /* child */
+ test_rename_process_now(p, ret);
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate(pid, &si) >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+ assert_se(si.si_status == EXIT_SUCCESS);
+}
+
+static void test_rename_process_multi(void) {
+ pid_t pid;
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid > 0) {
+ siginfo_t si;
+
+ assert_se(wait_for_terminate(pid, &si) >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+ assert_se(si.si_status == EXIT_SUCCESS);
+
+ return;
+ }
+
+ /* child */
+ test_rename_process_now("one", 1);
+ test_rename_process_now("more", 0); /* longer than "one", hence truncated */
+ (void) setresuid(99, 99, 99); /* change uid when running privileged */
+ test_rename_process_now("time!", 0);
+ test_rename_process_now("0", 1); /* shorter than "one", should fit */
+ test_rename_process_one("", -EINVAL);
+ test_rename_process_one(NULL, -EINVAL);
+ _exit(EXIT_SUCCESS);
+}
+
+static void test_rename_process(void) {
+ test_rename_process_one(NULL, -EINVAL);
+ test_rename_process_one("", -EINVAL);
+ test_rename_process_one("foo", 1); /* should always fit */
+ test_rename_process_one("this is a really really long process name, followed by some more words", 0); /* unlikely to fit */
+ test_rename_process_one("1234567", 1); /* should always fit */
+ test_rename_process_multi(); /* multiple invocations and dropped privileges */
+}
+
+static void test_getpid_cached(void) {
+ siginfo_t si;
+ pid_t a, b, c, d, e, f, child;
+
+ a = raw_getpid();
+ b = getpid_cached();
+ c = getpid();
+
+ assert_se(a == b && a == c);
+
+ child = fork();
+ assert_se(child >= 0);
+
+ if (child == 0) {
+ /* In child */
+ a = raw_getpid();
+ b = getpid_cached();
+ c = getpid();
+
+ assert_se(a == b && a == c);
+ _exit(EXIT_SUCCESS);
+ }
+
+ d = raw_getpid();
+ e = getpid_cached();
+ f = getpid();
+
+ assert_se(a == d && a == e && a == f);
+
+ assert_se(wait_for_terminate(child, &si) >= 0);
+ assert_se(si.si_status == 0);
+ assert_se(si.si_code == CLD_EXITED);
+}
+
+#define MEASURE_ITERATIONS (10000000LLU)
+
+static void test_getpid_measure(void) {
+ unsigned long long i;
+ usec_t t, q;
+
+ t = now(CLOCK_MONOTONIC);
+ for (i = 0; i < MEASURE_ITERATIONS; i++)
+ (void) getpid();
+ q = now(CLOCK_MONOTONIC) - t;
+
+ log_info(" glibc getpid(): %llu/s\n", (unsigned long long) (MEASURE_ITERATIONS*USEC_PER_SEC/q));
+
+ t = now(CLOCK_MONOTONIC);
+ for (i = 0; i < MEASURE_ITERATIONS; i++)
+ (void) getpid_cached();
+ q = now(CLOCK_MONOTONIC) - t;
+
+ log_info("getpid_cached(): %llu/s\n", (unsigned long long) (MEASURE_ITERATIONS*USEC_PER_SEC/q));
+}
+
+static void test_safe_fork(void) {
+ siginfo_t status;
+ pid_t pid;
+ int r;
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ r = safe_fork("(test-child)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_NULL_STDIO|FORK_REOPEN_LOG, &pid);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* child */
+ usleep(100 * USEC_PER_MSEC);
+
+ _exit(88);
+ }
+
+ assert_se(wait_for_terminate(pid, &status) >= 0);
+ assert_se(status.si_code == CLD_EXITED);
+ assert_se(status.si_status == 88);
+}
+
+static void test_pid_to_ptr(void) {
+
+ assert_se(PTR_TO_PID(NULL) == 0);
+ assert_se(PID_TO_PTR(0) == NULL);
+
+ assert_se(PTR_TO_PID(PID_TO_PTR(1)) == 1);
+ assert_se(PTR_TO_PID(PID_TO_PTR(2)) == 2);
+ assert_se(PTR_TO_PID(PID_TO_PTR(-1)) == -1);
+ assert_se(PTR_TO_PID(PID_TO_PTR(-2)) == -2);
+
+ assert_se(PTR_TO_PID(PID_TO_PTR(INT16_MAX)) == INT16_MAX);
+ assert_se(PTR_TO_PID(PID_TO_PTR(INT16_MIN)) == INT16_MIN);
+
+#if SIZEOF_PID_T >= 4
+ assert_se(PTR_TO_PID(PID_TO_PTR(INT32_MAX)) == INT32_MAX);
+ assert_se(PTR_TO_PID(PID_TO_PTR(INT32_MIN)) == INT32_MIN);
+#endif
+}
+
+static void test_ioprio_class_from_to_string_one(const char *val, int expected) {
+ assert_se(ioprio_class_from_string(val) == expected);
+ if (expected >= 0) {
+ _cleanup_free_ char *s = NULL;
+ unsigned ret;
+
+ assert_se(ioprio_class_to_string_alloc(expected, &s) == 0);
+ /* We sometimes get a class number and sometimes a number back */
+ assert_se(streq(s, val) ||
+ safe_atou(val, &ret) == 0);
+ }
+}
+
+static void test_ioprio_class_from_to_string(void) {
+ test_ioprio_class_from_to_string_one("none", IOPRIO_CLASS_NONE);
+ test_ioprio_class_from_to_string_one("realtime", IOPRIO_CLASS_RT);
+ test_ioprio_class_from_to_string_one("best-effort", IOPRIO_CLASS_BE);
+ test_ioprio_class_from_to_string_one("idle", IOPRIO_CLASS_IDLE);
+ test_ioprio_class_from_to_string_one("0", 0);
+ test_ioprio_class_from_to_string_one("1", 1);
+ test_ioprio_class_from_to_string_one("7", 7);
+ test_ioprio_class_from_to_string_one("8", 8);
+ test_ioprio_class_from_to_string_one("9", -1);
+ test_ioprio_class_from_to_string_one("-1", -1);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ saved_argc = argc;
+ saved_argv = argv;
+
+ if (argc > 1) {
+ pid_t pid = 0;
+
+ (void) parse_pid(argv[1], &pid);
+ test_get_process_comm(pid);
+ } else {
+ TEST_REQ_RUNNING_SYSTEMD(test_get_process_comm(1));
+ test_get_process_comm(getpid());
+ }
+
+ test_get_process_comm_escape();
+ test_pid_is_unwaited();
+ test_pid_is_alive();
+ test_personality();
+ test_get_process_cmdline_harder();
+ test_rename_process();
+ test_getpid_cached();
+ test_getpid_measure();
+ test_safe_fork();
+ test_pid_to_ptr();
+ test_ioprio_class_from_to_string();
+
+ return 0;
+}
diff --git a/src/test/test-procfs-util.c b/src/test/test-procfs-util.c
new file mode 100644
index 0000000..1d06129
--- /dev/null
+++ b/src/test/test-procfs-util.c
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "log.h"
+#include "parse-util.h"
+#include "procfs-util.h"
+
+int main(int argc, char *argv[]) {
+ char buf[CONST_MAX(FORMAT_TIMESPAN_MAX, FORMAT_BYTES_MAX)];
+ nsec_t nsec;
+ uint64_t v;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ assert_se(procfs_cpu_get_usage(&nsec) >= 0);
+ log_info("Current system CPU time: %s", format_timespan(buf, sizeof(buf), nsec/NSEC_PER_USEC, 1));
+
+ assert_se(procfs_memory_get_used(&v) >= 0);
+ log_info("Current memory usage: %s", format_bytes(buf, sizeof(buf), v));
+
+ assert_se(procfs_tasks_get_current(&v) >= 0);
+ log_info("Current number of tasks: %" PRIu64, v);
+
+ assert_se(procfs_tasks_get_limit(&v) >= 0);
+ log_info("Limit of tasks: %" PRIu64, v);
+ assert_se(v > 0);
+ assert_se(procfs_tasks_set_limit(v) >= 0);
+
+ if (v > 100) {
+ uint64_t w;
+ r = procfs_tasks_set_limit(v-1);
+ assert_se(IN_SET(r, 0, -EPERM, -EACCES, -EROFS));
+
+ assert_se(procfs_tasks_get_limit(&w) >= 0);
+ assert_se((r == 0 && w == v - 1) || (r < 0 && w == v));
+
+ assert_se(procfs_tasks_set_limit(v) >= 0);
+
+ assert_se(procfs_tasks_get_limit(&w) >= 0);
+ assert_se(v == w);
+ }
+
+ return 0;
+}
diff --git a/src/test/test-random-util.c b/src/test/test-random-util.c
new file mode 100644
index 0000000..94c431f
--- /dev/null
+++ b/src/test/test-random-util.c
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "hexdecoct.h"
+#include "random-util.h"
+#include "log.h"
+#include "tests.h"
+
+static void test_genuine_random_bytes(RandomFlags flags) {
+ uint8_t buf[16] = {};
+ unsigned i;
+
+ log_info("/* %s */", __func__);
+
+ for (i = 1; i < sizeof buf; i++) {
+ assert_se(genuine_random_bytes(buf, i, flags) == 0);
+ if (i + 1 < sizeof buf)
+ assert_se(buf[i] == 0);
+
+ hexdump(stdout, buf, i);
+ }
+}
+
+static void test_pseudo_random_bytes(void) {
+ uint8_t buf[16] = {};
+ unsigned i;
+
+ log_info("/* %s */", __func__);
+
+ for (i = 1; i < sizeof buf; i++) {
+ pseudo_random_bytes(buf, i);
+ if (i + 1 < sizeof buf)
+ assert_se(buf[i] == 0);
+
+ hexdump(stdout, buf, i);
+ }
+}
+
+static void test_rdrand(void) {
+ int r, i;
+
+ for (i = 0; i < 10; i++) {
+ unsigned long x = 0;
+
+ r = rdrand(&x);
+ if (r < 0) {
+ log_error_errno(r, "RDRAND failed: %m");
+ return;
+ }
+
+ printf("%lx\n", x);
+ }
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_genuine_random_bytes(RANDOM_EXTEND_WITH_PSEUDO);
+ test_genuine_random_bytes(0);
+ test_genuine_random_bytes(RANDOM_BLOCK);
+ test_genuine_random_bytes(RANDOM_ALLOW_RDRAND);
+
+ test_pseudo_random_bytes();
+
+ test_rdrand();
+
+ return 0;
+}
diff --git a/src/test/test-ratelimit.c b/src/test/test-ratelimit.c
new file mode 100644
index 0000000..9f344e1
--- /dev/null
+++ b/src/test/test-ratelimit.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "macro.h"
+#include "ratelimit.h"
+#include "time-util.h"
+
+static void test_ratelimit_below(void) {
+ int i;
+ RATELIMIT_DEFINE(ratelimit, 1 * USEC_PER_SEC, 10);
+
+ for (i = 0; i < 10; i++)
+ assert_se(ratelimit_below(&ratelimit));
+ assert_se(!ratelimit_below(&ratelimit));
+ sleep(1);
+ for (i = 0; i < 10; i++)
+ assert_se(ratelimit_below(&ratelimit));
+
+ RATELIMIT_INIT(ratelimit, 0, 10);
+ for (i = 0; i < 10000; i++)
+ assert_se(ratelimit_below(&ratelimit));
+}
+
+int main(int argc, char *argv[]) {
+ test_ratelimit_below();
+
+ return 0;
+}
diff --git a/src/test/test-replace-var.c b/src/test/test-replace-var.c
new file mode 100644
index 0000000..55208b1
--- /dev/null
+++ b/src/test/test-replace-var.c
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+
+#include "macro.h"
+#include "replace-var.h"
+#include "string-util.h"
+#include "util.h"
+
+static char *lookup(const char *variable, void *userdata) {
+ return strjoin("<<<", variable, ">>>");
+}
+
+int main(int argc, char *argv[]) {
+ char *r;
+
+ assert_se(r = replace_var("@@@foobar@xyz@HALLO@foobar@test@@testtest@TEST@...@@@", lookup, NULL));
+ puts(r);
+ assert_se(streq(r, "@@@foobar@xyz<<<HALLO>>>foobar@test@@testtest<<<TEST>>>...@@@"));
+ free(r);
+
+ assert_se(r = strreplace("XYZFFFFXYZFFFFXYZ", "XYZ", "ABC"));
+ puts(r);
+ assert_se(streq(r, "ABCFFFFABCFFFFABC"));
+ free(r);
+
+ return 0;
+}
diff --git a/src/test/test-rlimit-util.c b/src/test/test-rlimit-util.c
new file mode 100644
index 0000000..771719a
--- /dev/null
+++ b/src/test/test-rlimit-util.c
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/resource.h>
+
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "rlimit-util.h"
+#include "string-util.h"
+#include "util.h"
+
+static void test_rlimit_parse_format(int resource, const char *string, rlim_t soft, rlim_t hard, int ret, const char *formatted) {
+ _cleanup_free_ char *f = NULL;
+ struct rlimit rl = {
+ .rlim_cur = 4711,
+ .rlim_max = 4712,
+ }, rl2 = {
+ .rlim_cur = 4713,
+ .rlim_max = 4714
+ };
+
+ assert_se(rlimit_parse(resource, string, &rl) == ret);
+ if (ret < 0)
+ return;
+
+ assert_se(rl.rlim_cur == soft);
+ assert_se(rl.rlim_max == hard);
+
+ assert_se(rlimit_format(&rl, &f) >= 0);
+ assert_se(streq(formatted, f));
+
+ assert_se(rlimit_parse(resource, formatted, &rl2) >= 0);
+ assert_se(memcmp(&rl, &rl2, sizeof(struct rlimit)) == 0);
+}
+
+int main(int argc, char *argv[]) {
+ struct rlimit old, new, high;
+ struct rlimit err = {
+ .rlim_cur = 10,
+ .rlim_max = 5,
+ };
+ int i;
+
+ log_parse_environment();
+ log_open();
+
+ assert_se(drop_capability(CAP_SYS_RESOURCE) == 0);
+
+ assert_se(getrlimit(RLIMIT_NOFILE, &old) == 0);
+ new.rlim_cur = MIN(5U, old.rlim_max);
+ new.rlim_max = old.rlim_max;
+ assert_se(setrlimit(RLIMIT_NOFILE, &new) >= 0);
+
+ assert_se(rlimit_from_string("NOFILE") == RLIMIT_NOFILE);
+ assert_se(rlimit_from_string("LimitNOFILE") == -1);
+ assert_se(rlimit_from_string("RLIMIT_NOFILE") == -1);
+ assert_se(rlimit_from_string("xxxNOFILE") == -1);
+ assert_se(rlimit_from_string("DefaultLimitNOFILE") == -1);
+
+ assert_se(rlimit_from_string_harder("NOFILE") == RLIMIT_NOFILE);
+ assert_se(rlimit_from_string_harder("LimitNOFILE") == RLIMIT_NOFILE);
+ assert_se(rlimit_from_string_harder("RLIMIT_NOFILE") == RLIMIT_NOFILE);
+ assert_se(rlimit_from_string_harder("xxxNOFILE") == -1);
+ assert_se(rlimit_from_string_harder("DefaultLimitNOFILE") == -1);
+
+ for (i = 0; i < _RLIMIT_MAX; i++) {
+ _cleanup_free_ char *prefixed = NULL;
+ const char *p;
+
+ assert_se(p = rlimit_to_string(i));
+ log_info("%i = %s", i, p);
+
+ assert_se(rlimit_from_string(p) == i);
+ assert_se(rlimit_from_string_harder(p) == i);
+
+ assert_se(prefixed = strjoin("Limit", p));
+
+ assert_se(rlimit_from_string(prefixed) < 0);
+ assert_se(rlimit_from_string_harder(prefixed) == i);
+
+ prefixed = mfree(prefixed);
+ assert_se(prefixed = strjoin("RLIMIT_", p));
+
+ assert_se(rlimit_from_string(prefixed) < 0);
+ assert_se(rlimit_from_string_harder(prefixed) == i);
+ }
+
+ assert_se(streq_ptr(rlimit_to_string(RLIMIT_NOFILE), "NOFILE"));
+ assert_se(rlimit_to_string(-1) == NULL);
+
+ assert_se(getrlimit(RLIMIT_NOFILE, &old) == 0);
+ assert_se(setrlimit_closest(RLIMIT_NOFILE, &old) == 0);
+ assert_se(getrlimit(RLIMIT_NOFILE, &new) == 0);
+ assert_se(old.rlim_cur == new.rlim_cur);
+ assert_se(old.rlim_max == new.rlim_max);
+
+ assert_se(getrlimit(RLIMIT_NOFILE, &old) == 0);
+ high = RLIMIT_MAKE_CONST(old.rlim_max == RLIM_INFINITY ? old.rlim_max : old.rlim_max + 1);
+ assert_se(setrlimit_closest(RLIMIT_NOFILE, &high) == 0);
+ assert_se(getrlimit(RLIMIT_NOFILE, &new) == 0);
+ assert_se(new.rlim_max == old.rlim_max);
+ assert_se(new.rlim_cur == new.rlim_max);
+
+ assert_se(getrlimit(RLIMIT_NOFILE, &old) == 0);
+ assert_se(setrlimit_closest(RLIMIT_NOFILE, &err) == -EINVAL);
+ assert_se(getrlimit(RLIMIT_NOFILE, &new) == 0);
+ assert_se(old.rlim_cur == new.rlim_cur);
+ assert_se(old.rlim_max == new.rlim_max);
+
+ test_rlimit_parse_format(RLIMIT_NOFILE, "4:5", 4, 5, 0, "4:5");
+ test_rlimit_parse_format(RLIMIT_NOFILE, "6", 6, 6, 0, "6");
+ test_rlimit_parse_format(RLIMIT_NOFILE, "infinity", RLIM_INFINITY, RLIM_INFINITY, 0, "infinity");
+ test_rlimit_parse_format(RLIMIT_NOFILE, "infinity:infinity", RLIM_INFINITY, RLIM_INFINITY, 0, "infinity");
+ test_rlimit_parse_format(RLIMIT_NOFILE, "8:infinity", 8, RLIM_INFINITY, 0, "8:infinity");
+ test_rlimit_parse_format(RLIMIT_CPU, "25min:13h", (25*USEC_PER_MINUTE) / USEC_PER_SEC, (13*USEC_PER_HOUR) / USEC_PER_SEC, 0, "1500:46800");
+ test_rlimit_parse_format(RLIMIT_NOFILE, "", 0, 0, -EINVAL, NULL);
+ test_rlimit_parse_format(RLIMIT_NOFILE, "5:4", 0, 0, -EILSEQ, NULL);
+ test_rlimit_parse_format(RLIMIT_NOFILE, "5:4:3", 0, 0, -EINVAL, NULL);
+ test_rlimit_parse_format(RLIMIT_NICE, "20", 20, 20, 0, "20");
+ test_rlimit_parse_format(RLIMIT_NICE, "40", 40, 40, 0, "40");
+ test_rlimit_parse_format(RLIMIT_NICE, "41", 41, 41, -ERANGE, "41");
+ test_rlimit_parse_format(RLIMIT_NICE, "0", 0, 0, 0, "0");
+ test_rlimit_parse_format(RLIMIT_NICE, "-7", 27, 27, 0, "27");
+ test_rlimit_parse_format(RLIMIT_NICE, "-20", 40, 40, 0, "40");
+ test_rlimit_parse_format(RLIMIT_NICE, "-21", 41, 41, -ERANGE, "41");
+ test_rlimit_parse_format(RLIMIT_NICE, "-0", 20, 20, 0, "20");
+ test_rlimit_parse_format(RLIMIT_NICE, "+7", 13, 13, 0, "13");
+ test_rlimit_parse_format(RLIMIT_NICE, "+19", 1, 1, 0, "1");
+ test_rlimit_parse_format(RLIMIT_NICE, "+20", 0, 0, -ERANGE, "0");
+ test_rlimit_parse_format(RLIMIT_NICE, "+0", 20, 20, 0, "20");
+
+ return 0;
+}
diff --git a/src/test/test-sched-prio.c b/src/test/test-sched-prio.c
new file mode 100644
index 0000000..1aa1781
--- /dev/null
+++ b/src/test/test-sched-prio.c
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2012 Holger Hans Peter Freyther
+***/
+
+#include <sched.h>
+
+#include "all-units.h"
+#include "macro.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "test-helper.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *idle_ok, *idle_bad, *rr_ok, *rr_bad, *rr_sched;
+ Service *ser;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = enter_cgroup_subroot();
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ /* prepare the test */
+ assert_se(set_unit_path(get_testdata_dir()) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m);
+ if (MANAGER_SKIP_TEST(r))
+ return log_tests_skipped_errno(r, "manager_new");
+ assert_se(r >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ /* load idle ok */
+ assert_se(manager_load_startable_unit_or_warn(m, "sched_idle_ok.service", NULL, &idle_ok) >= 0);
+ ser = SERVICE(idle_ok);
+ assert_se(ser->exec_context.cpu_sched_policy == SCHED_OTHER);
+ assert_se(ser->exec_context.cpu_sched_priority == 0);
+
+ /*
+ * load idle bad. This should print a warning but we have no way to look at it.
+ */
+ assert_se(manager_load_startable_unit_or_warn(m, "sched_idle_bad.service", NULL, &idle_bad) >= 0);
+ ser = SERVICE(idle_ok);
+ assert_se(ser->exec_context.cpu_sched_policy == SCHED_OTHER);
+ assert_se(ser->exec_context.cpu_sched_priority == 0);
+
+ /*
+ * load rr ok.
+ * Test that the default priority is moving from 0 to 1.
+ */
+ assert_se(manager_load_startable_unit_or_warn(m, "sched_rr_ok.service", NULL, &rr_ok) >= 0);
+ ser = SERVICE(rr_ok);
+ assert_se(ser->exec_context.cpu_sched_policy == SCHED_RR);
+ assert_se(ser->exec_context.cpu_sched_priority == 1);
+
+ /*
+ * load rr bad.
+ * Test that the value of 0 and 100 is ignored.
+ */
+ assert_se(manager_load_startable_unit_or_warn(m, "sched_rr_bad.service", NULL, &rr_bad) >= 0);
+ ser = SERVICE(rr_bad);
+ assert_se(ser->exec_context.cpu_sched_policy == SCHED_RR);
+ assert_se(ser->exec_context.cpu_sched_priority == 1);
+
+ /*
+ * load rr change.
+ * Test that anything between 1 and 99 can be set.
+ */
+ assert_se(manager_load_startable_unit_or_warn(m, "sched_rr_change.service", NULL, &rr_sched) >= 0);
+ ser = SERVICE(rr_sched);
+ assert_se(ser->exec_context.cpu_sched_policy == SCHED_RR);
+ assert_se(ser->exec_context.cpu_sched_priority == 99);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-sd-hwdb.c b/src/test/test-sd-hwdb.c
new file mode 100644
index 0000000..17ca6a0
--- /dev/null
+++ b/src/test/test-sd-hwdb.c
@@ -0,0 +1,74 @@
+#include "sd-hwdb.h"
+
+#include "alloc-util.h"
+#include "errno.h"
+#include "tests.h"
+
+static int test_failed_enumerate(void) {
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL;
+ const char *key, *value;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = sd_hwdb_new(&hwdb);
+ if (r == -ENOENT)
+ return r;
+ assert_se(r == 0);
+
+ assert_se(sd_hwdb_seek(hwdb, "no-such-modalias-should-exist") == 0);
+
+ assert_se(sd_hwdb_enumerate(hwdb, &key, &value) == 0);
+ assert_se(sd_hwdb_enumerate(hwdb, &key, NULL) == -EINVAL);
+ assert_se(sd_hwdb_enumerate(hwdb, NULL, &value) == -EINVAL);
+
+ return 0;
+}
+
+#define DELL_MODALIAS \
+ "evdev:atkbd:dmi:bvnXXX:bvrYYY:bdZZZ:svnDellXXX:pnYYY"
+
+static void test_basic_enumerate(void) {
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL;
+ const char *key, *value;
+ size_t len1 = 0, len2 = 0;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(sd_hwdb_new(&hwdb) == 0);
+
+ assert_se(sd_hwdb_seek(hwdb, DELL_MODALIAS) == 0);
+
+ for (;;) {
+ r = sd_hwdb_enumerate(hwdb, &key, &value);
+ assert(IN_SET(r, 0, 1));
+ if (r == 0)
+ break;
+ assert(key);
+ assert(value);
+ log_debug("A: \"%s\" → \"%s\"", key, value);
+ len1 += strlen(key) + strlen(value);
+ }
+
+ SD_HWDB_FOREACH_PROPERTY(hwdb, DELL_MODALIAS, key, value) {
+ log_debug("B: \"%s\" → \"%s\"", key, value);
+ len2 += strlen(key) + strlen(value);
+ }
+
+ assert_se(len1 == len2);
+}
+
+int main(int argc, char *argv[]) {
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ r = test_failed_enumerate();
+ if (r < 0)
+ return log_tests_skipped_errno(r, "cannot open hwdb");
+
+ test_basic_enumerate();
+
+ return 0;
+}
diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c
new file mode 100644
index 0000000..fbfeedd
--- /dev/null
+++ b/src/test/test-seccomp.c
@@ -0,0 +1,768 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <poll.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/mman.h>
+#include <sys/personality.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "nsflags.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "seccomp-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+#include "virt.h"
+
+#if SCMP_SYS(socket) < 0 || defined(__i386__) || defined(__s390x__) || defined(__s390__)
+/* On these archs, socket() is implemented via the socketcall() syscall multiplexer,
+ * and we can't restrict it hence via seccomp. */
+# define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 1
+#else
+# define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0
+#endif
+
+static void test_seccomp_arch_to_string(void) {
+ uint32_t a, b;
+ const char *name;
+
+ log_info("/* %s */", __func__);
+
+ a = seccomp_arch_native();
+ assert_se(a > 0);
+ name = seccomp_arch_to_string(a);
+ assert_se(name);
+ assert_se(seccomp_arch_from_string(name, &b) >= 0);
+ assert_se(a == b);
+}
+
+static void test_architecture_table(void) {
+ const char *n, *n2;
+
+ log_info("/* %s */", __func__);
+
+ NULSTR_FOREACH(n,
+ "native\0"
+ "x86\0"
+ "x86-64\0"
+ "x32\0"
+ "arm\0"
+ "arm64\0"
+ "mips\0"
+ "mips64\0"
+ "mips64-n32\0"
+ "mips-le\0"
+ "mips64-le\0"
+ "mips64-le-n32\0"
+ "ppc\0"
+ "ppc64\0"
+ "ppc64-le\0"
+ "s390\0"
+ "s390x\0") {
+ uint32_t c;
+
+ assert_se(seccomp_arch_from_string(n, &c) >= 0);
+ n2 = seccomp_arch_to_string(c);
+ log_info("seccomp-arch: %s → 0x%"PRIx32" → %s", n, c, n2);
+ assert_se(streq_ptr(n, n2));
+ }
+}
+
+static void test_syscall_filter_set_find(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!syscall_filter_set_find(NULL));
+ assert_se(!syscall_filter_set_find(""));
+ assert_se(!syscall_filter_set_find("quux"));
+ assert_se(!syscall_filter_set_find("@quux"));
+
+ assert_se(syscall_filter_set_find("@clock") == syscall_filter_sets + SYSCALL_FILTER_SET_CLOCK);
+ assert_se(syscall_filter_set_find("@default") == syscall_filter_sets + SYSCALL_FILTER_SET_DEFAULT);
+ assert_se(syscall_filter_set_find("@raw-io") == syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO);
+}
+
+static void test_filter_sets(void) {
+ unsigned i;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) {
+ pid_t pid;
+
+ log_info("Testing %s", syscall_filter_sets[i].name);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) { /* Child? */
+ int fd;
+
+ /* If we look at the default set (or one that includes it), whitelist instead of blacklist */
+ if (IN_SET(i, SYSCALL_FILTER_SET_DEFAULT, SYSCALL_FILTER_SET_SYSTEM_SERVICE))
+ r = seccomp_load_syscall_filter_set(SCMP_ACT_ERRNO(EUCLEAN), syscall_filter_sets + i, SCMP_ACT_ALLOW, true);
+ else
+ r = seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + i, SCMP_ACT_ERRNO(EUCLEAN), true);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ /* Test the sycall filter with one random system call */
+ fd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC);
+ if (IN_SET(i, SYSCALL_FILTER_SET_IO_EVENT, SYSCALL_FILTER_SET_DEFAULT))
+ assert_se(fd < 0 && errno == EUCLEAN);
+ else {
+ assert_se(fd >= 0);
+ safe_close(fd);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check(syscall_filter_sets[i].name, pid, WAIT_LOG) == EXIT_SUCCESS);
+ }
+}
+
+static void test_filter_sets_ordered(void) {
+ size_t i;
+
+ log_info("/* %s */", __func__);
+
+ /* Ensure "@default" always remains at the beginning of the list */
+ assert_se(SYSCALL_FILTER_SET_DEFAULT == 0);
+ assert_se(streq(syscall_filter_sets[0].name, "@default"));
+
+ for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) {
+ const char *k, *p = NULL;
+
+ /* Make sure each group has a description */
+ assert_se(!isempty(syscall_filter_sets[0].help));
+
+ /* Make sure the groups are ordered alphabetically, except for the first entry */
+ assert_se(i < 2 || strcmp(syscall_filter_sets[i-1].name, syscall_filter_sets[i].name) < 0);
+
+ NULSTR_FOREACH(k, syscall_filter_sets[i].value) {
+
+ /* Ensure each syscall list is in itself ordered, but groups before names */
+ assert_se(!p ||
+ (*p == '@' && *k != '@') ||
+ (((*p == '@' && *k == '@') ||
+ (*p != '@' && *k != '@')) &&
+ strcmp(p, k) < 0));
+
+ p = k;
+ }
+ }
+}
+
+static void test_restrict_namespace(void) {
+ char *s = NULL;
+ unsigned long ul;
+ pid_t pid;
+
+ if (!have_namespaces()) {
+ log_notice("Testing without namespaces, skipping %s", __func__);
+ return;
+ }
+
+ log_info("/* %s */", __func__);
+
+ assert_se(namespace_flags_to_string(0, &s) == 0 && streq(s, ""));
+ s = mfree(s);
+ assert_se(namespace_flags_to_string(CLONE_NEWNS, &s) == 0 && streq(s, "mnt"));
+ s = mfree(s);
+ assert_se(namespace_flags_to_string(CLONE_NEWNS|CLONE_NEWIPC, &s) == 0 && streq(s, "ipc mnt"));
+ s = mfree(s);
+ assert_se(namespace_flags_to_string(CLONE_NEWCGROUP, &s) == 0 && streq(s, "cgroup"));
+ s = mfree(s);
+
+ assert_se(namespace_flags_from_string("mnt", &ul) == 0 && ul == CLONE_NEWNS);
+ assert_se(namespace_flags_from_string(NULL, &ul) == 0 && ul == 0);
+ assert_se(namespace_flags_from_string("", &ul) == 0 && ul == 0);
+ assert_se(namespace_flags_from_string("uts", &ul) == 0 && ul == CLONE_NEWUTS);
+ assert_se(namespace_flags_from_string("mnt uts ipc", &ul) == 0 && ul == (CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC));
+
+ assert_se(namespace_flags_to_string(CLONE_NEWUTS, &s) == 0 && streq(s, "uts"));
+ assert_se(namespace_flags_from_string(s, &ul) == 0 && ul == CLONE_NEWUTS);
+ s = mfree(s);
+ assert_se(namespace_flags_from_string("ipc", &ul) == 0 && ul == CLONE_NEWIPC);
+ assert_se(namespace_flags_to_string(ul, &s) == 0 && streq(s, "ipc"));
+ s = mfree(s);
+
+ assert_se(namespace_flags_to_string(NAMESPACE_FLAGS_ALL, &s) == 0);
+ assert_se(streq(s, "cgroup ipc net mnt pid user uts"));
+ assert_se(namespace_flags_from_string(s, &ul) == 0 && ul == NAMESPACE_FLAGS_ALL);
+ s = mfree(s);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping remaining tests in %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+
+ assert_se(seccomp_restrict_namespaces(CLONE_NEWNS|CLONE_NEWNET) >= 0);
+
+ assert_se(unshare(CLONE_NEWNS) == 0);
+ assert_se(unshare(CLONE_NEWNET) == 0);
+ assert_se(unshare(CLONE_NEWUTS) == -1);
+ assert_se(errno == EPERM);
+ assert_se(unshare(CLONE_NEWIPC) == -1);
+ assert_se(errno == EPERM);
+ assert_se(unshare(CLONE_NEWNET|CLONE_NEWUTS) == -1);
+ assert_se(errno == EPERM);
+
+ /* We use fd 0 (stdin) here, which of course will fail with EINVAL on setns(). Except of course our
+ * seccomp filter worked, and hits first and makes it return EPERM */
+ assert_se(setns(0, CLONE_NEWNS) == -1);
+ assert_se(errno == EINVAL);
+ assert_se(setns(0, CLONE_NEWNET) == -1);
+ assert_se(errno == EINVAL);
+ assert_se(setns(0, CLONE_NEWUTS) == -1);
+ assert_se(errno == EPERM);
+ assert_se(setns(0, CLONE_NEWIPC) == -1);
+ assert_se(errno == EPERM);
+ assert_se(setns(0, CLONE_NEWNET|CLONE_NEWUTS) == -1);
+ assert_se(errno == EPERM);
+ assert_se(setns(0, 0) == -1);
+ assert_se(errno == EPERM);
+
+ pid = raw_clone(CLONE_NEWNS);
+ assert_se(pid >= 0);
+ if (pid == 0)
+ _exit(EXIT_SUCCESS);
+ pid = raw_clone(CLONE_NEWNET);
+ assert_se(pid >= 0);
+ if (pid == 0)
+ _exit(EXIT_SUCCESS);
+ pid = raw_clone(CLONE_NEWUTS);
+ assert_se(pid < 0);
+ assert_se(errno == EPERM);
+ pid = raw_clone(CLONE_NEWIPC);
+ assert_se(pid < 0);
+ assert_se(errno == EPERM);
+ pid = raw_clone(CLONE_NEWNET|CLONE_NEWUTS);
+ assert_se(pid < 0);
+ assert_se(errno == EPERM);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("nsseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_protect_sysctl(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ /* in containers _sysctl() is likely missing anyway */
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+#if __NR__sysctl > 0
+ assert_se(syscall(__NR__sysctl, NULL) < 0);
+ assert_se(errno == EFAULT);
+#endif
+
+ assert_se(seccomp_protect_sysctl() >= 0);
+
+#if __NR__sysctl > 0
+ assert_se(syscall(__NR__sysctl, 0, 0, 0) < 0);
+ assert_se(errno == EPERM);
+#endif
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("sysctlseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_restrict_address_families(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ int fd;
+ Set *s;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+ safe_close(fd);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+ safe_close(fd);
+
+ fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+ safe_close(fd);
+
+ assert_se(s = set_new(NULL));
+ assert_se(set_put(s, INT_TO_PTR(AF_UNIX)) >= 0);
+
+ assert_se(seccomp_restrict_address_families(s, false) >= 0);
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+ safe_close(fd);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
+ assert_se(fd >= 0);
+ safe_close(fd);
+#else
+ assert_se(fd < 0);
+ assert_se(errno == EAFNOSUPPORT);
+#endif
+
+ fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+ safe_close(fd);
+
+ set_clear(s);
+
+ assert_se(set_put(s, INT_TO_PTR(AF_INET)) >= 0);
+
+ assert_se(seccomp_restrict_address_families(s, true) >= 0);
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+ safe_close(fd);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
+ assert_se(fd >= 0);
+ safe_close(fd);
+#else
+ assert_se(fd < 0);
+ assert_se(errno == EAFNOSUPPORT);
+#endif
+
+ fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
+#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
+ assert_se(fd >= 0);
+ safe_close(fd);
+#else
+ assert_se(fd < 0);
+ assert_se(errno == EAFNOSUPPORT);
+#endif
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("socketseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_restrict_realtime(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ /* in containers RT privs are likely missing anyway */
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ assert_se(sched_setscheduler(0, SCHED_FIFO, &(struct sched_param) { .sched_priority = 1 }) >= 0);
+ assert_se(sched_setscheduler(0, SCHED_RR, &(struct sched_param) { .sched_priority = 1 }) >= 0);
+ assert_se(sched_setscheduler(0, SCHED_IDLE, &(struct sched_param) { .sched_priority = 0 }) >= 0);
+ assert_se(sched_setscheduler(0, SCHED_BATCH, &(struct sched_param) { .sched_priority = 0 }) >= 0);
+ assert_se(sched_setscheduler(0, SCHED_OTHER, &(struct sched_param) {}) >= 0);
+
+ assert_se(seccomp_restrict_realtime() >= 0);
+
+ assert_se(sched_setscheduler(0, SCHED_IDLE, &(struct sched_param) { .sched_priority = 0 }) >= 0);
+ assert_se(sched_setscheduler(0, SCHED_BATCH, &(struct sched_param) { .sched_priority = 0 }) >= 0);
+ assert_se(sched_setscheduler(0, SCHED_OTHER, &(struct sched_param) {}) >= 0);
+
+ assert_se(sched_setscheduler(0, SCHED_FIFO, &(struct sched_param) { .sched_priority = 1 }) < 0);
+ assert_se(errno == EPERM);
+ assert_se(sched_setscheduler(0, SCHED_RR, &(struct sched_param) { .sched_priority = 1 }) < 0);
+ assert_se(errno == EPERM);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("realtimeseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_memory_deny_write_execute_mmap(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ void *p;
+
+ p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
+ assert_se(p != MAP_FAILED);
+ assert_se(munmap(p, page_size()) >= 0);
+
+ p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
+ assert_se(p != MAP_FAILED);
+ assert_se(munmap(p, page_size()) >= 0);
+
+ assert_se(seccomp_memory_deny_write_execute() >= 0);
+
+ p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
+#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc64__) || defined(__arm__) || defined(__aarch64__)
+ assert_se(p == MAP_FAILED);
+ assert_se(errno == EPERM);
+#else /* unknown architectures */
+ assert_se(p != MAP_FAILED);
+ assert_se(munmap(p, page_size()) >= 0);
+#endif
+
+ p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
+ assert_se(p != MAP_FAILED);
+ assert_se(munmap(p, page_size()) >= 0);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("memoryseccomp-mmap", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_memory_deny_write_execute_shmat(void) {
+ int shmid;
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ shmid = shmget(IPC_PRIVATE, page_size(), 0);
+ assert_se(shmid >= 0);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ void *p;
+
+ p = shmat(shmid, NULL, 0);
+ assert_se(p != MAP_FAILED);
+ assert_se(shmdt(p) == 0);
+
+ p = shmat(shmid, NULL, SHM_EXEC);
+ assert_se(p != MAP_FAILED);
+ assert_se(shmdt(p) == 0);
+
+ assert_se(seccomp_memory_deny_write_execute() >= 0);
+
+ p = shmat(shmid, NULL, SHM_EXEC);
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+ assert_se(p == MAP_FAILED);
+ assert_se(errno == EPERM);
+#else /* __i386__, __powerpc64__, and "unknown" architectures */
+ assert_se(p != MAP_FAILED);
+ assert_se(shmdt(p) == 0);
+#endif
+
+ p = shmat(shmid, NULL, 0);
+ assert_se(p != MAP_FAILED);
+ assert_se(shmdt(p) == 0);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("memoryseccomp-shmat", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_restrict_archs(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ _cleanup_set_free_ Set *s = NULL;
+
+ assert_se(access("/", F_OK) >= 0);
+
+ assert_se(s = set_new(NULL));
+
+#ifdef __x86_64__
+ assert_se(set_put(s, UINT32_TO_PTR(SCMP_ARCH_X86+1)) >= 0);
+#endif
+ assert_se(seccomp_restrict_archs(s) >= 0);
+
+ assert_se(access("/", F_OK) >= 0);
+ assert_se(seccomp_restrict_archs(NULL) >= 0);
+
+ assert_se(access("/", F_OK) >= 0);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("archseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_load_syscall_filter_set_raw(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ _cleanup_hashmap_free_ Hashmap *s = NULL;
+
+ assert_se(access("/", F_OK) >= 0);
+ assert_se(poll(NULL, 0, 0) == 0);
+
+ assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, NULL, SCMP_ACT_KILL, true) >= 0);
+ assert_se(access("/", F_OK) >= 0);
+ assert_se(poll(NULL, 0, 0) == 0);
+
+ assert_se(s = hashmap_new(NULL));
+#if SCMP_SYS(access) >= 0
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_access + 1), INT_TO_PTR(-1)) >= 0);
+#else
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_faccessat + 1), INT_TO_PTR(-1)) >= 0);
+#endif
+
+ assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EUCLEAN), true) >= 0);
+
+ assert_se(access("/", F_OK) < 0);
+ assert_se(errno == EUCLEAN);
+
+ assert_se(poll(NULL, 0, 0) == 0);
+
+ s = hashmap_free(s);
+
+ assert_se(s = hashmap_new(NULL));
+#if SCMP_SYS(access) >= 0
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_access + 1), INT_TO_PTR(EILSEQ)) >= 0);
+#else
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_faccessat + 1), INT_TO_PTR(EILSEQ)) >= 0);
+#endif
+
+ assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EUCLEAN), true) >= 0);
+
+ assert_se(access("/", F_OK) < 0);
+ assert_se(errno == EILSEQ);
+
+ assert_se(poll(NULL, 0, 0) == 0);
+
+ s = hashmap_free(s);
+
+ assert_se(s = hashmap_new(NULL));
+#if SCMP_SYS(poll) >= 0
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_poll + 1), INT_TO_PTR(-1)) >= 0);
+#else
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_ppoll + 1), INT_TO_PTR(-1)) >= 0);
+#endif
+
+ assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EUNATCH), true) >= 0);
+
+ assert_se(access("/", F_OK) < 0);
+ assert_se(errno == EILSEQ);
+
+ assert_se(poll(NULL, 0, 0) < 0);
+ assert_se(errno == EUNATCH);
+
+ s = hashmap_free(s);
+
+ assert_se(s = hashmap_new(NULL));
+#if SCMP_SYS(poll) >= 0
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_poll + 1), INT_TO_PTR(EILSEQ)) >= 0);
+#else
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_ppoll + 1), INT_TO_PTR(EILSEQ)) >= 0);
+#endif
+
+ assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EUNATCH), true) >= 0);
+
+ assert_se(access("/", F_OK) < 0);
+ assert_se(errno == EILSEQ);
+
+ assert_se(poll(NULL, 0, 0) < 0);
+ assert_se(errno == EILSEQ);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("syscallrawseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_lock_personality(void) {
+ unsigned long current;
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ assert_se(opinionated_personality(&current) >= 0);
+
+ log_info("current personality=%lu", current);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ assert_se(seccomp_lock_personality(current) >= 0);
+
+ assert_se((unsigned long) safe_personality(current) == current);
+
+ /* Note, we also test that safe_personality() works correctly, by checkig whether errno is properly
+ * set, in addition to the return value */
+ errno = 0;
+ assert_se(safe_personality(PER_LINUX | ADDR_NO_RANDOMIZE) == -EPERM);
+ assert_se(errno == EPERM);
+
+ assert_se(safe_personality(PER_LINUX | MMAP_PAGE_ZERO) == -EPERM);
+ assert_se(safe_personality(PER_LINUX | ADDR_COMPAT_LAYOUT) == -EPERM);
+ assert_se(safe_personality(PER_LINUX | READ_IMPLIES_EXEC) == -EPERM);
+ assert_se(safe_personality(PER_LINUX_32BIT) == -EPERM);
+ assert_se(safe_personality(PER_SVR4) == -EPERM);
+ assert_se(safe_personality(PER_BSD) == -EPERM);
+ assert_se(safe_personality(current == PER_LINUX ? PER_LINUX32 : PER_LINUX) == -EPERM);
+ assert_se(safe_personality(PER_LINUX32_3GB) == -EPERM);
+ assert_se(safe_personality(PER_UW7) == -EPERM);
+ assert_se(safe_personality(0x42) == -EPERM);
+
+ assert_se(safe_personality(PERSONALITY_INVALID) == -EPERM); /* maybe remove this later */
+
+ assert_se((unsigned long) personality(current) == current);
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("lockpersonalityseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_seccomp_arch_to_string();
+ test_architecture_table();
+ test_syscall_filter_set_find();
+ test_filter_sets();
+ test_filter_sets_ordered();
+ test_restrict_namespace();
+ test_protect_sysctl();
+ test_restrict_address_families();
+ test_restrict_realtime();
+ test_memory_deny_write_execute_mmap();
+ test_memory_deny_write_execute_shmat();
+ test_restrict_archs();
+ test_load_syscall_filter_set_raw();
+ test_lock_personality();
+
+ return 0;
+}
diff --git a/src/test/test-selinux.c b/src/test/test-selinux.c
new file mode 100644
index 0000000..59b4f71
--- /dev/null
+++ b/src/test/test-selinux.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "selinux-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "time-util.h"
+#include "util.h"
+
+static void test_testing(void) {
+ bool b;
+
+ log_info("============ %s ==========", __func__);
+
+ b = mac_selinux_use();
+ log_info("mac_selinux_use → %s", yes_no(b));
+
+ b = mac_selinux_use();
+ log_info("mac_selinux_use → %s", yes_no(b));
+
+ mac_selinux_retest();
+
+ b = mac_selinux_use();
+ log_info("mac_selinux_use → %s", yes_no(b));
+
+ b = mac_selinux_use();
+ log_info("mac_selinux_use → %s", yes_no(b));
+}
+
+static void test_loading(void) {
+ usec_t n1, n2;
+ int r;
+
+ log_info("============ %s ==========", __func__);
+
+ n1 = now(CLOCK_MONOTONIC);
+ r = mac_selinux_init();
+ n2 = now(CLOCK_MONOTONIC);
+ log_info_errno(r, "mac_selinux_init → %d %.2fs (%m)", r, (n2 - n1)/1e6);
+}
+
+static void test_cleanup(void) {
+ usec_t n1, n2;
+
+ log_info("============ %s ==========", __func__);
+
+ n1 = now(CLOCK_MONOTONIC);
+ mac_selinux_finish();
+ n2 = now(CLOCK_MONOTONIC);
+ log_info("mac_selinux_finish → %.2fs", (n2 - n1)/1e6);
+}
+
+static void test_misc(const char* fname) {
+ _cleanup_(mac_selinux_freep) char *label = NULL, *label2 = NULL, *label3 = NULL;
+ int r;
+ _cleanup_close_ int fd = -1;
+
+ log_info("============ %s ==========", __func__);
+
+ r = mac_selinux_get_our_label(&label);
+ log_info_errno(r, "mac_selinux_get_our_label → %d, \"%s\" (%m)",
+ r, strnull(label));
+
+ r = mac_selinux_get_create_label_from_exe(fname, &label2);
+ log_info_errno(r, "mac_selinux_create_label_from_exe → %d, \"%s\" (%m)",
+ r, strnull(label2));
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+
+ r = mac_selinux_get_child_mls_label(fd, fname, label2, &label3);
+ log_info_errno(r, "mac_selinux_get_child_mls_label → %d, \"%s\" (%m)",
+ r, strnull(label3));
+}
+
+static void test_create_file_prepare(const char* fname) {
+ int r;
+
+ log_info("============ %s ==========", __func__);
+
+ r = mac_selinux_create_file_prepare(fname, S_IRWXU);
+ log_info_errno(r, "mac_selinux_create_file_prepare → %d (%m)", r);
+
+ mac_selinux_create_file_clear();
+}
+
+int main(int argc, char **argv) {
+ const char *path = SYSTEMD_BINARY_PATH;
+ if (argc >= 2)
+ path = argv[1];
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_testing();
+ test_loading();
+ test_misc(path);
+ test_create_file_prepare(path);
+ test_cleanup();
+
+ return 0;
+}
diff --git a/src/test/test-serialize.c b/src/test/test-serialize.c
new file mode 100644
index 0000000..a57d5db
--- /dev/null
+++ b/src/test/test-serialize.c
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "serialize.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+char long_string[LONG_LINE_MAX+1];
+
+static void test_serialize_item(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-serialize.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+
+ assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
+ log_info("/* %s (%s) */", __func__, fn);
+
+ assert_se(serialize_item(f, "a", NULL) == 0);
+ assert_se(serialize_item(f, "a", "bbb") == 1);
+ assert_se(serialize_item(f, "a", "bbb") == 1);
+ assert_se(serialize_item(f, "a", long_string) == -EINVAL);
+ assert_se(serialize_item(f, long_string, "a") == -EINVAL);
+ assert_se(serialize_item(f, long_string, long_string) == -EINVAL);
+
+ rewind(f);
+
+ _cleanup_free_ char *line1 = NULL, *line2 = NULL, *line3 = NULL;
+ assert_se(read_line(f, LONG_LINE_MAX, &line1) > 0);
+ assert_se(streq(line1, "a=bbb"));
+ assert_se(read_line(f, LONG_LINE_MAX, &line2) > 0);
+ assert_se(streq(line2, "a=bbb"));
+ assert_se(read_line(f, LONG_LINE_MAX, &line3) == 0);
+ assert_se(streq(line3, ""));
+}
+
+static void test_serialize_item_escaped(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-serialize.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+
+ assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
+ log_info("/* %s (%s) */", __func__, fn);
+
+ assert_se(serialize_item_escaped(f, "a", NULL) == 0);
+ assert_se(serialize_item_escaped(f, "a", "bbb") == 1);
+ assert_se(serialize_item_escaped(f, "a", "bbb") == 1);
+ assert_se(serialize_item_escaped(f, "a", long_string) == -EINVAL);
+ assert_se(serialize_item_escaped(f, long_string, "a") == -EINVAL);
+ assert_se(serialize_item_escaped(f, long_string, long_string) == -EINVAL);
+
+ rewind(f);
+
+ _cleanup_free_ char *line1 = NULL, *line2 = NULL, *line3 = NULL;
+ assert_se(read_line(f, LONG_LINE_MAX, &line1) > 0);
+ assert_se(streq(line1, "a=bbb"));
+ assert_se(read_line(f, LONG_LINE_MAX, &line2) > 0);
+ assert_se(streq(line2, "a=bbb"));
+ assert_se(read_line(f, LONG_LINE_MAX, &line3) == 0);
+ assert_se(streq(line3, ""));
+}
+
+static void test_serialize_usec(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-serialize.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+
+ assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
+ log_info("/* %s (%s) */", __func__, fn);
+
+ assert_se(serialize_usec(f, "usec1", USEC_INFINITY) == 0);
+ assert_se(serialize_usec(f, "usec2", 0) == 1);
+ assert_se(serialize_usec(f, "usec3", USEC_INFINITY-1) == 1);
+
+ rewind(f);
+
+ _cleanup_free_ char *line1 = NULL, *line2 = NULL;
+ usec_t x;
+
+ assert_se(read_line(f, LONG_LINE_MAX, &line1) > 0);
+ assert_se(streq(line1, "usec2=0"));
+ assert_se(deserialize_usec(line1 + 6, &x) == 0);
+ assert_se(x == 0);
+
+ assert_se(read_line(f, LONG_LINE_MAX, &line2) > 0);
+ assert_se(startswith(line2, "usec3="));
+ assert_se(deserialize_usec(line2 + 6, &x) == 0);
+ assert_se(x == USEC_INFINITY-1);
+}
+
+static void test_serialize_strv(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-serialize.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+
+ char **strv = STRV_MAKE("a", "b", "foo foo",
+ "nasty1 \"",
+ "\"nasty2 ",
+ "nasty3 '",
+ "\"nasty4 \"",
+ "nasty5\n",
+ "\nnasty5\nfoo=bar",
+ "\nnasty5\nfoo=bar");
+
+ assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
+ log_info("/* %s (%s) */", __func__, fn);
+
+ assert_se(serialize_strv(f, "strv1", NULL) == 0);
+ assert_se(serialize_strv(f, "strv2", STRV_MAKE_EMPTY) == 0);
+ assert_se(serialize_strv(f, "strv3", strv) == 1);
+ assert_se(serialize_strv(f, "strv4", STRV_MAKE(long_string)) == -EINVAL);
+
+ rewind(f);
+
+ _cleanup_strv_free_ char **strv2 = NULL;
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r == 0)
+ break;
+ assert_se(r > 0);
+
+ const char *t = startswith(line, "strv3=");
+ assert_se(t);
+
+ char *un;
+ assert_se(cunescape(t, 0, &un) >= 0);
+ assert_se(strv_consume(&strv2, un) >= 0);
+ }
+
+ assert_se(strv_equal(strv, strv2));
+}
+
+static void test_deserialize_environment(void) {
+ _cleanup_strv_free_ char **env;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(env = strv_new("A=1"));
+
+ assert_se(deserialize_environment("B=2", &env) >= 0);
+ assert_se(deserialize_environment("FOO%%=a\\177b\\nc\\td e", &env) >= 0);
+
+ assert_se(strv_equal(env, STRV_MAKE("A=1", "B=2", "FOO%%=a\177b\nc\td e")));
+
+ assert_se(deserialize_environment("foo\\", &env) < 0);
+ assert_se(deserialize_environment("bar\\_baz", &env) < 0);
+}
+
+static void test_serialize_environment(void) {
+ _cleanup_strv_free_ char **env = NULL, **env2 = NULL;
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-env-util.XXXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
+ log_info("/* %s (%s) */", __func__, fn);
+
+ assert_se(env = strv_new("A=1",
+ "B=2",
+ "C=ąęółń",
+ "D=D=a\\x0Ab",
+ "FOO%%=a\177b\nc\td e"));
+
+ assert_se(serialize_strv(f, "env", env) == 1);
+ assert_se(fflush_and_check(f) == 0);
+
+ rewind(f);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ assert_se(r >= 0);
+
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+
+ assert_se(startswith(l, "env="));
+
+ r = deserialize_environment(l+4, &env2);
+ assert_se(r >= 0);
+ }
+ assert_se(feof(f));
+
+ assert_se(strv_equal(env, env2));
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ memset(long_string, 'x', sizeof(long_string)-1);
+ char_array_0(long_string);
+
+ test_serialize_item();
+ test_serialize_item_escaped();
+ test_serialize_usec();
+ test_serialize_strv();
+ test_deserialize_environment();
+ test_serialize_environment();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-set-disable-mempool.c b/src/test/test-set-disable-mempool.c
new file mode 100644
index 0000000..aea83d2
--- /dev/null
+++ b/src/test/test-set-disable-mempool.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <pthread.h>
+
+#include "process-util.h"
+#include "set.h"
+#include "tests.h"
+
+#define NUM 100
+
+static void* thread(void *p) {
+ Set **s = p;
+
+ assert_se(s);
+ assert_se(*s);
+
+ assert_se(!is_main_thread());
+ assert_se(set_size(*s) == NUM);
+ *s = set_free(*s);
+
+ return NULL;
+}
+
+static void test_one(const char *val) {
+ pthread_t t;
+ int x[NUM] = {};
+ unsigned i;
+ Set *s;
+
+ log_info("Testing with SYSTEMD_MEMPOOL=%s", val);
+ assert_se(setenv("SYSTEMD_MEMPOOL", val, true) == 0);
+ assert_se(is_main_thread());
+
+ assert_se(s = set_new(NULL));
+ for (i = 0; i < NUM; i++)
+ assert_se(set_put(s, &x[i]));
+
+ assert_se(pthread_create(&t, NULL, thread, &s) == 0);
+ assert_se(pthread_join(t, NULL) == 0);
+
+ assert_se(!s);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_one("0");
+ /* The value $SYSTEMD_MEMPOOL= is cached. So the following
+ * test should also succeed. */
+ test_one("1");
+
+ return 0;
+}
diff --git a/src/test/test-set.c b/src/test/test-set.c
new file mode 100644
index 0000000..340edeb
--- /dev/null
+++ b/src/test/test-set.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "set.h"
+
+static void test_set_steal_first(void) {
+ _cleanup_set_free_ Set *m = NULL;
+ int seen[3] = {};
+ char *val;
+
+ m = set_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(set_put(m, (void*) "1") == 1);
+ assert_se(set_put(m, (void*) "22") == 1);
+ assert_se(set_put(m, (void*) "333") == 1);
+
+ while ((val = set_steal_first(m)))
+ seen[strlen(val) - 1]++;
+
+ assert_se(seen[0] == 1 && seen[1] == 1 && seen[2] == 1);
+
+ assert_se(set_isempty(m));
+}
+
+typedef struct Item {
+ int seen;
+} Item;
+static void item_seen(Item *item) {
+ item->seen++;
+}
+
+static void test_set_free_with_destructor(void) {
+ Set *m;
+ struct Item items[4] = {};
+ unsigned i;
+
+ assert_se(m = set_new(NULL));
+ for (i = 0; i < ELEMENTSOF(items) - 1; i++)
+ assert_se(set_put(m, items + i) == 1);
+
+ m = set_free_with_destructor(m, item_seen);
+ assert_se(items[0].seen == 1);
+ assert_se(items[1].seen == 1);
+ assert_se(items[2].seen == 1);
+ assert_se(items[3].seen == 0);
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(item_hash_ops, void, trivial_hash_func, trivial_compare_func, Item, item_seen);
+
+static void test_set_free_with_hash_ops(void) {
+ Set *m;
+ struct Item items[4] = {};
+ unsigned i;
+
+ assert_se(m = set_new(&item_hash_ops));
+ for (i = 0; i < ELEMENTSOF(items) - 1; i++)
+ assert_se(set_put(m, items + i) == 1);
+
+ m = set_free(m);
+ assert_se(items[0].seen == 1);
+ assert_se(items[1].seen == 1);
+ assert_se(items[2].seen == 1);
+ assert_se(items[3].seen == 0);
+}
+
+static void test_set_put(void) {
+ _cleanup_set_free_ Set *m = NULL;
+
+ m = set_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(set_put(m, (void*) "1") == 1);
+ assert_se(set_put(m, (void*) "22") == 1);
+ assert_se(set_put(m, (void*) "333") == 1);
+ assert_se(set_put(m, (void*) "333") == 0);
+ assert_se(set_remove(m, (void*) "333"));
+ assert_se(set_put(m, (void*) "333") == 1);
+ assert_se(set_put(m, (void*) "333") == 0);
+ assert_se(set_put(m, (void*) "22") == 0);
+}
+
+int main(int argc, const char *argv[]) {
+ test_set_steal_first();
+ test_set_free_with_destructor();
+ test_set_free_with_hash_ops();
+ test_set_put();
+
+ return 0;
+}
diff --git a/src/test/test-sigbus.c b/src/test/test-sigbus.c
new file mode 100644
index 0000000..d2666dd
--- /dev/null
+++ b/src/test/test-sigbus.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mman.h>
+
+#if HAVE_VALGRIND_VALGRIND_H
+# include <valgrind/valgrind.h>
+#endif
+
+#include "fd-util.h"
+#include "sigbus.h"
+#include "tests.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_close_ int fd = -1;
+ char template[] = "/tmp/sigbus-test-XXXXXX";
+ void *addr = NULL;
+ uint8_t *p;
+
+ test_setup_logging(LOG_INFO);
+
+#if HAS_FEATURE_ADDRESS_SANITIZER
+ return log_tests_skipped("address-sanitizer is enabled");
+#endif
+#if HAVE_VALGRIND_VALGRIND_H
+ if (RUNNING_ON_VALGRIND)
+ return log_tests_skipped("This test cannot run on valgrind");
+#endif
+
+ sigbus_install();
+
+ assert_se(sigbus_pop(&addr) == 0);
+
+ assert_se((fd = mkostemp(template, O_RDWR|O_CREAT|O_EXCL)) >= 0);
+ assert_se(unlink(template) >= 0);
+ assert_se(posix_fallocate(fd, 0, page_size() * 8) >= 0);
+
+ p = mmap(NULL, page_size() * 16, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ assert_se(p != MAP_FAILED);
+
+ assert_se(sigbus_pop(&addr) == 0);
+
+ p[0] = 0xFF;
+ assert_se(sigbus_pop(&addr) == 0);
+
+ p[page_size()] = 0xFF;
+ assert_se(sigbus_pop(&addr) == 0);
+
+ p[page_size()*8] = 0xFF;
+ p[page_size()*8+1] = 0xFF;
+ p[page_size()*10] = 0xFF;
+ assert_se(sigbus_pop(&addr) > 0);
+ assert_se(addr == p + page_size() * 8);
+ assert_se(sigbus_pop(&addr) > 0);
+ assert_se(addr == p + page_size() * 10);
+ assert_se(sigbus_pop(&addr) == 0);
+
+ sigbus_reset();
+}
diff --git a/src/test/test-signal-util.c b/src/test/test-signal-util.c
new file mode 100644
index 0000000..8ea4163
--- /dev/null
+++ b/src/test/test-signal-util.c
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <signal.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "macro.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "process-util.h"
+
+#define info(sig) log_info(#sig " = " STRINGIFY(sig) " = %d", sig)
+
+static void test_rt_signals(void) {
+ info(SIGRTMIN);
+ info(SIGRTMAX);
+
+ /* We use signals SIGRTMIN+0 to SIGRTMIN+24 unconditionally */
+ assert(SIGRTMAX - SIGRTMIN >= 24);
+}
+
+static void test_signal_to_string_one(int val) {
+ const char *p;
+
+ assert_se(p = signal_to_string(val));
+
+ assert_se(signal_from_string(p) == val);
+
+ p = strjoina("SIG", p);
+ assert_se(signal_from_string(p) == val);
+}
+
+static void test_signal_from_string_one(const char *s, int val) {
+ const char *p;
+
+ assert_se(signal_from_string(s) == val);
+
+ p = strjoina("SIG", s);
+ assert_se(signal_from_string(p) == val);
+}
+
+static void test_signal_from_string_number(const char *s, int val) {
+ const char *p;
+
+ assert_se(signal_from_string(s) == val);
+
+ p = strjoina("SIG", s);
+ assert_se(signal_from_string(p) == -EINVAL);
+}
+
+static void test_signal_from_string(void) {
+ char buf[STRLEN("RTMIN+") + DECIMAL_STR_MAX(int) + 1];
+
+ test_signal_to_string_one(SIGHUP);
+ test_signal_to_string_one(SIGTERM);
+ test_signal_to_string_one(SIGRTMIN);
+ test_signal_to_string_one(SIGRTMIN+3);
+ test_signal_to_string_one(SIGRTMAX-4);
+
+ test_signal_from_string_one("RTMIN", SIGRTMIN);
+ test_signal_from_string_one("RTMAX", SIGRTMAX);
+
+ xsprintf(buf, "RTMIN+%d", SIGRTMAX-SIGRTMIN);
+ test_signal_from_string_one(buf, SIGRTMAX);
+
+ xsprintf(buf, "RTMIN+%d", INT_MAX);
+ test_signal_from_string_one(buf, -ERANGE);
+
+ xsprintf(buf, "RTMAX-%d", SIGRTMAX-SIGRTMIN);
+ test_signal_from_string_one(buf, SIGRTMIN);
+
+ xsprintf(buf, "RTMAX-%d", INT_MAX);
+ test_signal_from_string_one(buf, -ERANGE);
+
+ test_signal_from_string_one("", -EINVAL);
+ test_signal_from_string_one("hup", -EINVAL);
+ test_signal_from_string_one("HOGEHOGE", -EINVAL);
+
+ test_signal_from_string_one("RTMIN-5", -EINVAL);
+ test_signal_from_string_one("RTMIN- 5", -EINVAL);
+ test_signal_from_string_one("RTMIN -5", -EINVAL);
+ test_signal_from_string_one("RTMIN+ 5", -EINVAL);
+ test_signal_from_string_one("RTMIN +5", -EINVAL);
+ test_signal_from_string_one("RTMIN+100", -ERANGE);
+ test_signal_from_string_one("RTMIN+-3", -EINVAL);
+ test_signal_from_string_one("RTMIN++3", -EINVAL);
+ test_signal_from_string_one("RTMIN+HUP", -EINVAL);
+ test_signal_from_string_one("RTMIN3", -EINVAL);
+
+ test_signal_from_string_one("RTMAX+5", -EINVAL);
+ test_signal_from_string_one("RTMAX+ 5", -EINVAL);
+ test_signal_from_string_one("RTMAX +5", -EINVAL);
+ test_signal_from_string_one("RTMAX- 5", -EINVAL);
+ test_signal_from_string_one("RTMAX -5", -EINVAL);
+ test_signal_from_string_one("RTMAX-100", -ERANGE);
+ test_signal_from_string_one("RTMAX-+3", -EINVAL);
+ test_signal_from_string_one("RTMAX--3", -EINVAL);
+ test_signal_from_string_one("RTMAX-HUP", -EINVAL);
+
+ test_signal_from_string_number("3", 3);
+ test_signal_from_string_number("+5", 5);
+ test_signal_from_string_number(" +5", 5);
+ test_signal_from_string_number("10000", -ERANGE);
+ test_signal_from_string_number("-2", -ERANGE);
+}
+
+static void test_block_signals(void) {
+ sigset_t ss;
+
+ assert_se(sigprocmask(0, NULL, &ss) >= 0);
+
+ assert_se(sigismember(&ss, SIGUSR1) == 0);
+ assert_se(sigismember(&ss, SIGALRM) == 0);
+ assert_se(sigismember(&ss, SIGVTALRM) == 0);
+
+ {
+ BLOCK_SIGNALS(SIGUSR1, SIGVTALRM);
+
+ assert_se(sigprocmask(0, NULL, &ss) >= 0);
+ assert_se(sigismember(&ss, SIGUSR1) == 1);
+ assert_se(sigismember(&ss, SIGALRM) == 0);
+ assert_se(sigismember(&ss, SIGVTALRM) == 1);
+
+ }
+
+ assert_se(sigprocmask(0, NULL, &ss) >= 0);
+ assert_se(sigismember(&ss, SIGUSR1) == 0);
+ assert_se(sigismember(&ss, SIGALRM) == 0);
+ assert_se(sigismember(&ss, SIGVTALRM) == 0);
+}
+
+static void test_ignore_signals(void) {
+ assert_se(ignore_signals(SIGINT, -1) >= 0);
+ assert_se(kill(getpid_cached(), SIGINT) >= 0);
+ assert_se(ignore_signals(SIGUSR1, SIGUSR2, SIGTERM, SIGPIPE, -1) >= 0);
+ assert_se(kill(getpid_cached(), SIGUSR1) >= 0);
+ assert_se(kill(getpid_cached(), SIGUSR2) >= 0);
+ assert_se(kill(getpid_cached(), SIGTERM) >= 0);
+ assert_se(kill(getpid_cached(), SIGPIPE) >= 0);
+ assert_se(default_signals(SIGINT, SIGUSR1, SIGUSR2, SIGTERM, SIGPIPE, -1) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_rt_signals();
+ test_signal_from_string();
+ test_block_signals();
+ test_ignore_signals();
+
+ return 0;
+}
diff --git a/src/test/test-siphash24.c b/src/test/test-siphash24.c
new file mode 100644
index 0000000..218200e
--- /dev/null
+++ b/src/test/test-siphash24.c
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "siphash24.h"
+#include "util.h"
+
+#define ITERATIONS 10000000ULL
+
+static void do_test(const uint8_t *in, size_t len, const uint8_t *key) {
+ struct siphash state = {};
+ uint64_t out;
+ unsigned i, j;
+
+ out = siphash24(in, len, key);
+ assert_se(out == 0xa129ca6149be45e5);
+
+ /* verify the internal state as given in the above paper */
+ siphash24_init(&state, key);
+ assert_se(state.v0 == 0x7469686173716475);
+ assert_se(state.v1 == 0x6b617f6d656e6665);
+ assert_se(state.v2 == 0x6b7f62616d677361);
+ assert_se(state.v3 == 0x7b6b696e727e6c7b);
+ siphash24_compress(in, len, &state);
+ assert_se(state.v0 == 0x4a017198de0a59e0);
+ assert_se(state.v1 == 0x0d52f6f62a4f59a4);
+ assert_se(state.v2 == 0x634cb3577b01fd3d);
+ assert_se(state.v3 == 0xa5224d6f55c7d9c8);
+ out = siphash24_finalize(&state);
+ assert_se(out == 0xa129ca6149be45e5);
+ assert_se(state.v0 == 0xf6bcd53893fecff1);
+ assert_se(state.v1 == 0x54b9964c7ea0d937);
+ assert_se(state.v2 == 0x1b38329c099bb55a);
+ assert_se(state.v3 == 0x1814bb89ad7be679);
+
+ /* verify that decomposing the input in three chunks gives the
+ same result */
+ for (i = 0; i < len; i++) {
+ for (j = i; j < len; j++) {
+ siphash24_init(&state, key);
+ siphash24_compress(in, i, &state);
+ siphash24_compress(&in[i], j - i, &state);
+ siphash24_compress(&in[j], len - j, &state);
+ out = siphash24_finalize(&state);
+ assert_se(out == 0xa129ca6149be45e5);
+ }
+ }
+}
+
+static void test_short_hashes(void) {
+ const uint8_t one[] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16 };
+ const uint8_t key[16] = { 0x22, 0x24, 0x41, 0x22, 0x55, 0x77, 0x88, 0x07,
+ 0x23, 0x09, 0x23, 0x14, 0x0c, 0x33, 0x0e, 0x0f};
+ uint8_t two[sizeof one] = {};
+
+ struct siphash state1 = {}, state2 = {};
+ unsigned i, j;
+
+ siphash24_init(&state1, key);
+ siphash24_init(&state2, key);
+
+ /* hashing 1, 2, 3, 4, 5, ..., 16 bytes, with the byte after the buffer different */
+ for (i = 1; i <= sizeof one; i++) {
+ siphash24_compress(one, i, &state1);
+
+ two[i-1] = one[i-1];
+ siphash24_compress(two, i, &state2);
+
+ assert_se(memcmp(&state1, &state2, sizeof state1) == 0);
+ }
+
+ /* hashing n and 1, n and 2, n and 3, ..., n-1 and 1, n-2 and 2, ... */
+ for (i = sizeof one; i > 0; i--) {
+ zero(two);
+
+ for (j = 1; j <= sizeof one; j++) {
+ siphash24_compress(one, i, &state1);
+ siphash24_compress(one, j, &state1);
+
+ siphash24_compress(one, i, &state2);
+ two[j-1] = one[j-1];
+ siphash24_compress(two, j, &state2);
+
+ assert_se(memcmp(&state1, &state2, sizeof state1) == 0);
+ }
+ }
+}
+
+/* see https://131002.net/siphash/siphash.pdf, Appendix A */
+int main(int argc, char *argv[]) {
+ const uint8_t in[15] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e };
+ const uint8_t key[16] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
+ uint8_t in_buf[20];
+
+ /* Test with same input but different alignments. */
+ memcpy(in_buf, in, sizeof(in));
+ do_test(in_buf, sizeof(in), key);
+ memcpy(in_buf + 1, in, sizeof(in));
+ do_test(in_buf + 1, sizeof(in), key);
+ memcpy(in_buf + 2, in, sizeof(in));
+ do_test(in_buf + 2, sizeof(in), key);
+ memcpy(in_buf + 4, in, sizeof(in));
+ do_test(in_buf + 4, sizeof(in), key);
+
+ test_short_hashes();
+}
diff --git a/src/test/test-sizeof.c b/src/test/test-sizeof.c
new file mode 100644
index 0000000..35b0876
--- /dev/null
+++ b/src/test/test-sizeof.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#define __STDC_WANT_IEC_60559_TYPES_EXT__
+#include <float.h>
+
+#include "time-util.h"
+
+/* Print information about various types. Useful when diagnosing
+ * gcc diagnostics on an unfamiliar architecture. */
+
+#pragma GCC diagnostic ignored "-Wtype-limits"
+
+#define info(t) \
+ printf("%s → %zu bits%s, %zu byte alignment\n", STRINGIFY(t), \
+ sizeof(t)*CHAR_BIT, \
+ strstr(STRINGIFY(t), "signed") ? "" : \
+ (t)-1 < (t)0 ? ", signed" : ", unsigned", \
+ __alignof__(t))
+
+enum Enum {
+ enum_value,
+};
+
+enum BigEnum {
+ big_enum_value = UINT64_C(1),
+};
+
+enum BigEnum2 {
+ big_enum2_pos = UINT64_C(1),
+ big_enum2_neg = UINT64_C(-1),
+};
+
+int main(void) {
+ info(char);
+ info(signed char);
+ info(unsigned char);
+ info(short unsigned);
+ info(unsigned);
+ info(long unsigned);
+ info(long long unsigned);
+ info(__syscall_ulong_t);
+ info(__syscall_slong_t);
+
+ info(float);
+ info(double);
+ info(long double);
+
+#ifdef FLT128_MAX
+ info(_Float128);
+ info(_Float64);
+ info(_Float64x);
+ info(_Float32);
+ info(_Float32x);
+#endif
+
+ info(size_t);
+ info(ssize_t);
+ info(time_t);
+ info(usec_t);
+ info(__time_t);
+ info(pid_t);
+ info(uid_t);
+ info(gid_t);
+
+ info(enum Enum);
+ info(enum BigEnum);
+ info(enum BigEnum2);
+ assert_cc(sizeof(enum BigEnum2) == 8);
+ printf("big_enum2_pos → %zu\n", sizeof(big_enum2_pos));
+ printf("big_enum2_neg → %zu\n", sizeof(big_enum2_neg));
+
+ return 0;
+}
diff --git a/src/test/test-sleep.c b/src/test/test-sleep.c
new file mode 100644
index 0000000..2a6d5e7
--- /dev/null
+++ b/src/test/test-sleep.c
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <inttypes.h>
+#include <linux/fiemap.h>
+#include <stdio.h>
+
+#include "fd-util.h"
+#include "log.h"
+#include "sleep-config.h"
+#include "strv.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_parse_sleep_config(void) {
+ const char *verb;
+
+ log_info("/* %s */", __func__);
+
+ FOREACH_STRING(verb, "suspend", "hibernate", "hybrid-sleep", "suspend-then-hibernate")
+ assert_se(parse_sleep_config(verb, NULL, NULL, NULL, NULL) == 0);
+}
+
+static int test_fiemap(const char *path) {
+ _cleanup_free_ struct fiemap *fiemap = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ fd = open(path, O_RDONLY | O_CLOEXEC | O_NONBLOCK);
+ if (fd < 0)
+ return log_error_errno(errno, "failed to open %s: %m", path);
+ r = read_fiemap(fd, &fiemap);
+ if (r == -EOPNOTSUPP)
+ exit(log_tests_skipped("Not supported"));
+ if (r < 0)
+ return log_error_errno(r, "Unable to read extent map for '%s': %m", path);
+ log_info("extent map information for %s:", path);
+ log_info("\t start: %" PRIu64, (uint64_t) fiemap->fm_start);
+ log_info("\t length: %" PRIu64, (uint64_t) fiemap->fm_length);
+ log_info("\t flags: %" PRIu32, fiemap->fm_flags);
+ log_info("\t number of mapped extents: %" PRIu32, fiemap->fm_mapped_extents);
+ log_info("\t extent count: %" PRIu32, fiemap->fm_extent_count);
+ if (fiemap->fm_extent_count > 0)
+ log_info("\t first extent location: %" PRIu64,
+ (uint64_t) (fiemap->fm_extents[0].fe_physical / page_size()));
+
+ return 0;
+}
+
+static void test_sleep(void) {
+ _cleanup_strv_free_ char
+ **standby = strv_new("standby"),
+ **mem = strv_new("mem"),
+ **disk = strv_new("disk"),
+ **suspend = strv_new("suspend"),
+ **reboot = strv_new("reboot"),
+ **platform = strv_new("platform"),
+ **shutdown = strv_new("shutdown"),
+ **freez = strv_new("freeze");
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ log_info("/= configuration =/");
+ log_info("Standby configured: %s", yes_no(can_sleep_state(standby) > 0));
+ log_info("Suspend configured: %s", yes_no(can_sleep_state(mem) > 0));
+ log_info("Hibernate configured: %s", yes_no(can_sleep_state(disk) > 0));
+ log_info("Hibernate+Suspend (Hybrid-Sleep) configured: %s", yes_no(can_sleep_disk(suspend) > 0));
+ log_info("Hibernate+Reboot configured: %s", yes_no(can_sleep_disk(reboot) > 0));
+ log_info("Hibernate+Platform configured: %s", yes_no(can_sleep_disk(platform) > 0));
+ log_info("Hibernate+Shutdown configured: %s", yes_no(can_sleep_disk(shutdown) > 0));
+ log_info("Freeze configured: %s", yes_no(can_sleep_state(freez) > 0));
+
+ log_info("/= running system =/");
+ r = can_sleep("suspend");
+ log_info("Suspend configured and possible: %s", r >= 0 ? yes_no(r) : strerror(-r));
+ r = can_sleep("hibernate");
+ log_info("Hibernation configured and possible: %s", r >= 0 ? yes_no(r) : strerror(-r));
+ r = can_sleep("hybrid-sleep");
+ log_info("Hybrid-sleep configured and possible: %s", r >= 0 ? yes_no(r) : strerror(-r));
+ r = can_sleep("suspend-then-hibernate");
+ log_info("Suspend-then-Hibernate configured and possible: %s", r >= 0 ? yes_no(r) : strerror(-r));
+}
+
+int main(int argc, char* argv[]) {
+ int i, r = 0, k;
+
+ test_setup_logging(LOG_INFO);
+
+ if (getuid() != 0)
+ log_warning("This program is unlikely to work for unprivileged users");
+
+ test_parse_sleep_config();
+ test_sleep();
+
+ if (argc <= 1)
+ assert_se(test_fiemap(argv[0]) == 0);
+ else
+ for (i = 1; i < argc; i++) {
+ k = test_fiemap(argv[i]);
+ if (r == 0)
+ r = k;
+ }
+
+ return r;
+}
diff --git a/src/test/test-socket-util.c b/src/test/test-socket-util.c
new file mode 100644
index 0000000..07ff487
--- /dev/null
+++ b/src/test/test-socket-util.c
@@ -0,0 +1,832 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <grp.h>
+
+#include "alloc-util.h"
+#include "async.h"
+#include "escape.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "in-addr-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_network.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void test_ifname_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert(ifname_valid("foo"));
+ assert(ifname_valid("eth0"));
+
+ assert(!ifname_valid("0"));
+ assert(!ifname_valid("99"));
+ assert(ifname_valid("a99"));
+ assert(ifname_valid("99a"));
+
+ assert(!ifname_valid(NULL));
+ assert(!ifname_valid(""));
+ assert(!ifname_valid(" "));
+ assert(!ifname_valid(" foo"));
+ assert(!ifname_valid("bar\n"));
+ assert(!ifname_valid("."));
+ assert(!ifname_valid(".."));
+ assert(ifname_valid("foo.bar"));
+ assert(!ifname_valid("x:y"));
+
+ assert(ifname_valid("xxxxxxxxxxxxxxx"));
+ assert(!ifname_valid("xxxxxxxxxxxxxxxx"));
+}
+
+static void test_socket_address_parse_one(const char *in, int ret, int family, const char *expected) {
+ SocketAddress a;
+ _cleanup_free_ char *out = NULL;
+ int r;
+
+ r = socket_address_parse(&a, in);
+ if (r >= 0)
+ assert_se(socket_address_print(&a, &out) >= 0);
+
+ log_info("\"%s\" → %s → \"%s\" (expect \"%s\")", in,
+ r >= 0 ? "✓" : "✗", empty_to_dash(out), r >= 0 ? expected ?: in : "-");
+ assert_se(r == ret);
+ if (r >= 0) {
+ assert_se(a.sockaddr.sa.sa_family == family);
+ assert_se(streq(out, expected ?: in));
+ }
+}
+
+#define SUN_PATH_LEN (sizeof(((struct sockaddr_un){}).sun_path))
+assert_cc(sizeof(((struct sockaddr_un){}).sun_path) == 108);
+
+static void test_socket_address_parse(void) {
+ log_info("/* %s */", __func__);
+
+ test_socket_address_parse_one("junk", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("192.168.1.1", -EINVAL, 0, NULL);
+ test_socket_address_parse_one(".168.1.1", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("989.168.1.1", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("192.168.1.1:65536", -ERANGE, 0, NULL);
+ test_socket_address_parse_one("192.168.1.1:0", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("0", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("65536", -ERANGE, 0, NULL);
+
+ const int default_family = socket_ipv6_is_supported() ? AF_INET6 : AF_INET;
+
+ test_socket_address_parse_one("65535", 0, default_family, "[::]:65535");
+
+ /* The checks below will pass even if ipv6 is disabled in
+ * kernel. The underlying glibc's inet_pton() is just a string
+ * parser and doesn't make any syscalls. */
+
+ test_socket_address_parse_one("[::1]", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]8888", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("::1", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]:0", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]:65536", -ERANGE, 0, NULL);
+ test_socket_address_parse_one("[a:b:1]:8888", -EINVAL, 0, NULL);
+
+ test_socket_address_parse_one("8888", 0, default_family, "[::]:8888");
+ test_socket_address_parse_one("[2001:0db8:0000:85a3:0000:0000:ac1f:8001]:8888", 0, AF_INET6,
+ "[2001:db8:0:85a3::ac1f:8001]:8888");
+ test_socket_address_parse_one("[::1]:8888", 0, AF_INET6, NULL);
+ test_socket_address_parse_one("192.168.1.254:8888", 0, AF_INET, NULL);
+ test_socket_address_parse_one("/foo/bar", 0, AF_UNIX, NULL);
+ test_socket_address_parse_one("/", 0, AF_UNIX, NULL);
+ test_socket_address_parse_one("@abstract", 0, AF_UNIX, NULL);
+
+ {
+ char aaa[SUN_PATH_LEN + 1] = "@";
+
+ memset(aaa + 1, 'a', SUN_PATH_LEN - 1);
+ char_array_0(aaa);
+
+ test_socket_address_parse_one(aaa, -EINVAL, 0, NULL);
+
+ aaa[SUN_PATH_LEN - 1] = '\0';
+ test_socket_address_parse_one(aaa, 0, AF_UNIX, NULL);
+ }
+
+ test_socket_address_parse_one("vsock:2:1234", 0, AF_VSOCK, NULL);
+ test_socket_address_parse_one("vsock::1234", 0, AF_VSOCK, NULL);
+ test_socket_address_parse_one("vsock:2:1234x", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("vsock:2x:1234", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("vsock:2", -EINVAL, 0, NULL);
+}
+
+static void test_socket_print_unix_one(const char *in, size_t len_in, const char *expected) {
+ _cleanup_free_ char *out = NULL, *c = NULL;
+
+ SocketAddress a = { .sockaddr = { .un = { .sun_family = AF_UNIX } },
+ .size = offsetof(struct sockaddr_un, sun_path) + len_in,
+ .type = SOCK_STREAM,
+ };
+ memcpy(a.sockaddr.un.sun_path, in, len_in);
+
+ assert_se(socket_address_print(&a, &out) >= 0);
+ assert_se(c = cescape(in));
+ log_info("\"%s\" → \"%s\" (expect \"%s\")", in, out, expected);
+ assert_se(streq(out, expected));
+}
+
+static void test_socket_print_unix(void) {
+ log_info("/* %s */", __func__);
+
+ /* Some additional tests for abstract addresses which we don't parse */
+
+ test_socket_print_unix_one("\0\0\0\0", 4, "@\\000\\000\\000");
+ test_socket_print_unix_one("@abs", 5, "@abs");
+ test_socket_print_unix_one("\n", 2, "\\n");
+ test_socket_print_unix_one("", 1, "<unnamed>");
+ test_socket_print_unix_one("\0", 1, "<unnamed>");
+ test_socket_print_unix_one("\0_________________________there's 108 characters in this string_____________________________________________", 108,
+ "@_________________________there\\'s 108 characters in this string_____________________________________________");
+ test_socket_print_unix_one("////////////////////////////////////////////////////////////////////////////////////////////////////////////", 108,
+ "////////////////////////////////////////////////////////////////////////////////////////////////////////////");
+ test_socket_print_unix_one("////////////////////////////////////////////////////////////////////////////////////////////////////////////", 109,
+ "////////////////////////////////////////////////////////////////////////////////////////////////////////////");
+ test_socket_print_unix_one("\0\a\b\n\255", 6, "@\\a\\b\\n\\255\\000");
+}
+
+static void test_socket_address_parse_netlink(void) {
+ SocketAddress a;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socket_address_parse_netlink(&a, "junk") < 0);
+ assert_se(socket_address_parse_netlink(&a, "") < 0);
+
+ assert_se(socket_address_parse_netlink(&a, "route") >= 0);
+ assert_se(socket_address_parse_netlink(&a, "route 10") >= 0);
+ assert_se(a.sockaddr.sa.sa_family == AF_NETLINK);
+ assert_se(a.protocol == NETLINK_ROUTE);
+
+ /* With spaces and tabs */
+ assert_se(socket_address_parse_netlink(&a, " kobject-uevent ") >= 0);
+ assert_se(socket_address_parse_netlink(&a, " \t kobject-uevent \t 10 \t") >= 0);
+ assert_se(a.sockaddr.sa.sa_family == AF_NETLINK);
+ assert_se(a.protocol == NETLINK_KOBJECT_UEVENT);
+
+ assert_se(socket_address_parse_netlink(&a, "kobject-uevent\t10") >= 0);
+ assert_se(a.sockaddr.sa.sa_family == AF_NETLINK);
+ assert_se(a.protocol == NETLINK_KOBJECT_UEVENT);
+
+ /* oss-fuzz #6884 */
+ assert_se(socket_address_parse_netlink(&a, "\xff") < 0);
+}
+
+static void test_socket_address_equal(void) {
+ SocketAddress a, b;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_parse(&b, "192.168.1.1:888") >= 0);
+ assert_se(!socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_parse(&b, "192.16.1.1:8888") >= 0);
+ assert_se(!socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_parse(&b, "8888") >= 0);
+ assert_se(!socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_parse(&b, "/foo/bar/") >= 0);
+ assert_se(!socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_parse(&b, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "/foo/bar") >= 0);
+ assert_se(socket_address_parse(&b, "/foo/bar") >= 0);
+ assert_se(socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "[::1]:8888") >= 0);
+ assert_se(socket_address_parse(&b, "[::1]:8888") >= 0);
+ assert_se(socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "@abstract") >= 0);
+ assert_se(socket_address_parse(&b, "@abstract") >= 0);
+ assert_se(socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse_netlink(&a, "firewall") >= 0);
+ assert_se(socket_address_parse_netlink(&b, "firewall") >= 0);
+ assert_se(socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "vsock:2:1234") >= 0);
+ assert_se(socket_address_parse(&b, "vsock:2:1234") >= 0);
+ assert_se(socket_address_equal(&a, &b));
+ assert_se(socket_address_parse(&b, "vsock:2:1235") >= 0);
+ assert_se(!socket_address_equal(&a, &b));
+ assert_se(socket_address_parse(&b, "vsock:3:1234") >= 0);
+ assert_se(!socket_address_equal(&a, &b));
+}
+
+static void test_socket_address_get_path(void) {
+ SocketAddress a;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(!socket_address_get_path(&a));
+
+ assert_se(socket_address_parse(&a, "@abstract") >= 0);
+ assert_se(!socket_address_get_path(&a));
+
+ assert_se(socket_address_parse(&a, "[::1]:8888") >= 0);
+ assert_se(!socket_address_get_path(&a));
+
+ assert_se(socket_address_parse(&a, "/foo/bar") >= 0);
+ assert_se(streq(socket_address_get_path(&a), "/foo/bar"));
+
+ assert_se(socket_address_parse(&a, "vsock:2:1234") >= 0);
+ assert_se(!socket_address_get_path(&a));
+}
+
+static void test_socket_address_is(void) {
+ SocketAddress a;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_is(&a, "192.168.1.1:8888", SOCK_STREAM));
+ assert_se(!socket_address_is(&a, "route", SOCK_STREAM));
+ assert_se(!socket_address_is(&a, "192.168.1.1:8888", SOCK_RAW));
+}
+
+static void test_socket_address_is_netlink(void) {
+ SocketAddress a;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socket_address_parse_netlink(&a, "route 10") >= 0);
+ assert_se(socket_address_is_netlink(&a, "route 10"));
+ assert_se(!socket_address_is_netlink(&a, "192.168.1.1:8888"));
+ assert_se(!socket_address_is_netlink(&a, "route 1"));
+}
+
+static void test_in_addr_is_null(void) {
+ union in_addr_union i = {};
+
+ log_info("/* %s */", __func__);
+
+ assert_se(in_addr_is_null(AF_INET, &i) == true);
+ assert_se(in_addr_is_null(AF_INET6, &i) == true);
+
+ i.in.s_addr = 0x1000000;
+ assert_se(in_addr_is_null(AF_INET, &i) == false);
+ assert_se(in_addr_is_null(AF_INET6, &i) == false);
+
+ assert_se(in_addr_is_null(-1, &i) == -EAFNOSUPPORT);
+}
+
+static void test_in_addr_prefix_intersect_one(unsigned f, const char *a, unsigned apl, const char *b, unsigned bpl, int result) {
+ union in_addr_union ua, ub;
+
+ assert_se(in_addr_from_string(f, a, &ua) >= 0);
+ assert_se(in_addr_from_string(f, b, &ub) >= 0);
+
+ assert_se(in_addr_prefix_intersect(f, &ua, apl, &ub, bpl) == result);
+}
+
+static void test_in_addr_prefix_intersect(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_prefix_intersect_one(AF_INET, "255.255.255.255", 32, "255.255.255.254", 32, 0);
+ test_in_addr_prefix_intersect_one(AF_INET, "255.255.255.255", 0, "255.255.255.255", 32, 1);
+ test_in_addr_prefix_intersect_one(AF_INET, "0.0.0.0", 0, "47.11.8.15", 32, 1);
+
+ test_in_addr_prefix_intersect_one(AF_INET, "1.1.1.1", 24, "1.1.1.1", 24, 1);
+ test_in_addr_prefix_intersect_one(AF_INET, "2.2.2.2", 24, "1.1.1.1", 24, 0);
+
+ test_in_addr_prefix_intersect_one(AF_INET, "1.1.1.1", 24, "1.1.1.127", 25, 1);
+ test_in_addr_prefix_intersect_one(AF_INET, "1.1.1.1", 24, "1.1.1.127", 26, 1);
+ test_in_addr_prefix_intersect_one(AF_INET, "1.1.1.1", 25, "1.1.1.127", 25, 1);
+ test_in_addr_prefix_intersect_one(AF_INET, "1.1.1.1", 25, "1.1.1.255", 25, 0);
+
+ test_in_addr_prefix_intersect_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", 128, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe", 128, 0);
+ test_in_addr_prefix_intersect_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", 0, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", 128, 1);
+ test_in_addr_prefix_intersect_one(AF_INET6, "::", 0, "beef:beef:beef:beef:beef:beef:beef:beef", 128, 1);
+
+ test_in_addr_prefix_intersect_one(AF_INET6, "1::2", 64, "1::2", 64, 1);
+ test_in_addr_prefix_intersect_one(AF_INET6, "2::2", 64, "1::2", 64, 0);
+
+ test_in_addr_prefix_intersect_one(AF_INET6, "1::1", 120, "1::007f", 121, 1);
+ test_in_addr_prefix_intersect_one(AF_INET6, "1::1", 120, "1::007f", 122, 1);
+ test_in_addr_prefix_intersect_one(AF_INET6, "1::1", 121, "1::007f", 121, 1);
+ test_in_addr_prefix_intersect_one(AF_INET6, "1::1", 121, "1::00ff", 121, 0);
+}
+
+static void test_in_addr_prefix_next_one(unsigned f, const char *before, unsigned pl, const char *after) {
+ union in_addr_union ubefore, uafter, t;
+
+ assert_se(in_addr_from_string(f, before, &ubefore) >= 0);
+
+ t = ubefore;
+ assert_se((in_addr_prefix_next(f, &t, pl) > 0) == !!after);
+
+ if (after) {
+ assert_se(in_addr_from_string(f, after, &uafter) >= 0);
+ assert_se(in_addr_equal(f, &t, &uafter) > 0);
+ }
+}
+
+static void test_in_addr_prefix_next(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_prefix_next_one(AF_INET, "192.168.0.0", 24, "192.168.1.0");
+ test_in_addr_prefix_next_one(AF_INET, "192.168.0.0", 16, "192.169.0.0");
+ test_in_addr_prefix_next_one(AF_INET, "192.168.0.0", 20, "192.168.16.0");
+
+ test_in_addr_prefix_next_one(AF_INET, "0.0.0.0", 32, "0.0.0.1");
+ test_in_addr_prefix_next_one(AF_INET, "255.255.255.255", 32, NULL);
+ test_in_addr_prefix_next_one(AF_INET, "255.255.255.0", 24, NULL);
+
+ test_in_addr_prefix_next_one(AF_INET6, "4400::", 128, "4400::0001");
+ test_in_addr_prefix_next_one(AF_INET6, "4400::", 120, "4400::0100");
+ test_in_addr_prefix_next_one(AF_INET6, "4400::", 127, "4400::0002");
+ test_in_addr_prefix_next_one(AF_INET6, "4400::", 8, "4500::");
+ test_in_addr_prefix_next_one(AF_INET6, "4400::", 7, "4600::");
+
+ test_in_addr_prefix_next_one(AF_INET6, "::", 128, "::1");
+
+ test_in_addr_prefix_next_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", 128, NULL);
+ test_in_addr_prefix_next_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00", 120, NULL);
+}
+
+static void test_in_addr_to_string_one(int f, const char *addr) {
+ union in_addr_union ua;
+ _cleanup_free_ char *r = NULL;
+
+ assert_se(in_addr_from_string(f, addr, &ua) >= 0);
+ assert_se(in_addr_to_string(f, &ua, &r) >= 0);
+ printf("test_in_addr_to_string_one: %s == %s\n", addr, r);
+ assert_se(streq(addr, r));
+}
+
+static void test_in_addr_to_string(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_to_string_one(AF_INET, "192.168.0.1");
+ test_in_addr_to_string_one(AF_INET, "10.11.12.13");
+ test_in_addr_to_string_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff");
+ test_in_addr_to_string_one(AF_INET6, "::1");
+ test_in_addr_to_string_one(AF_INET6, "fe80::");
+}
+
+static void test_in_addr_ifindex_to_string_one(int f, const char *a, int ifindex, const char *b) {
+ _cleanup_free_ char *r = NULL;
+ union in_addr_union ua, uuaa;
+ int ff, ifindex2;
+
+ assert_se(in_addr_from_string(f, a, &ua) >= 0);
+ assert_se(in_addr_ifindex_to_string(f, &ua, ifindex, &r) >= 0);
+ printf("test_in_addr_ifindex_to_string_one: %s == %s\n", b, r);
+ assert_se(streq(b, r));
+
+ assert_se(in_addr_ifindex_from_string_auto(b, &ff, &uuaa, &ifindex2) >= 0);
+ assert_se(ff == f);
+ assert_se(in_addr_equal(f, &ua, &uuaa));
+ assert_se(ifindex2 == ifindex || ifindex2 == 0);
+}
+
+static void test_in_addr_ifindex_to_string(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_ifindex_to_string_one(AF_INET, "192.168.0.1", 7, "192.168.0.1");
+ test_in_addr_ifindex_to_string_one(AF_INET, "10.11.12.13", 9, "10.11.12.13");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", 10, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "::1", 11, "::1");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "fe80::", 12, "fe80::%12");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "fe80::", 0, "fe80::");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "fe80::14", 12, "fe80::14%12");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "fe80::15", -7, "fe80::15");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "fe80::16", LOOPBACK_IFINDEX, "fe80::16%1");
+}
+
+static void test_in_addr_ifindex_from_string_auto(void) {
+ int family, ifindex;
+ union in_addr_union ua;
+
+ log_info("/* %s */", __func__);
+ /* Most in_addr_ifindex_from_string_auto() invocations have already been tested above, but let's test some more */
+
+ assert_se(in_addr_ifindex_from_string_auto("fe80::17", &family, &ua, &ifindex) >= 0);
+ assert_se(family == AF_INET6);
+ assert_se(ifindex == 0);
+
+ assert_se(in_addr_ifindex_from_string_auto("fe80::18%19", &family, &ua, &ifindex) >= 0);
+ assert_se(family == AF_INET6);
+ assert_se(ifindex == 19);
+
+ assert_se(in_addr_ifindex_from_string_auto("fe80::18%lo", &family, &ua, &ifindex) >= 0);
+ assert_se(family == AF_INET6);
+ assert_se(ifindex == LOOPBACK_IFINDEX);
+
+ assert_se(in_addr_ifindex_from_string_auto("fe80::19%thisinterfacecantexist", &family, &ua, &ifindex) == -ENODEV);
+}
+
+static void test_sockaddr_equal(void) {
+ union sockaddr_union a = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = 0,
+ .in.sin_addr.s_addr = htobe32(INADDR_ANY),
+ };
+ union sockaddr_union b = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = 0,
+ .in.sin_addr.s_addr = htobe32(INADDR_ANY),
+ };
+ union sockaddr_union c = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = 0,
+ .in.sin_addr.s_addr = htobe32(1234),
+ };
+ union sockaddr_union d = {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_port = 0,
+ .in6.sin6_addr = IN6ADDR_ANY_INIT,
+ };
+ union sockaddr_union e = {
+ .vm.svm_family = AF_VSOCK,
+ .vm.svm_port = 0,
+ .vm.svm_cid = VMADDR_CID_ANY,
+ };
+
+ log_info("/* %s */", __func__);
+
+ assert_se(sockaddr_equal(&a, &a));
+ assert_se(sockaddr_equal(&a, &b));
+ assert_se(sockaddr_equal(&d, &d));
+ assert_se(sockaddr_equal(&e, &e));
+ assert_se(!sockaddr_equal(&a, &c));
+ assert_se(!sockaddr_equal(&b, &c));
+ assert_se(!sockaddr_equal(&a, &e));
+}
+
+static void test_sockaddr_un_len(void) {
+ log_info("/* %s */", __func__);
+
+ static const struct sockaddr_un fs = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/foo/bar/waldo",
+ };
+
+ static const struct sockaddr_un abstract = {
+ .sun_family = AF_UNIX,
+ .sun_path = "\0foobar",
+ };
+
+ assert_se(SOCKADDR_UN_LEN(fs) == offsetof(struct sockaddr_un, sun_path) + strlen(fs.sun_path) + 1);
+ assert_se(SOCKADDR_UN_LEN(abstract) == offsetof(struct sockaddr_un, sun_path) + 1 + strlen(abstract.sun_path + 1));
+}
+
+static void test_in_addr_is_multicast(void) {
+ union in_addr_union a, b;
+ int f;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(in_addr_from_string_auto("192.168.3.11", &f, &a) >= 0);
+ assert_se(in_addr_is_multicast(f, &a) == 0);
+
+ assert_se(in_addr_from_string_auto("224.0.0.1", &f, &a) >= 0);
+ assert_se(in_addr_is_multicast(f, &a) == 1);
+
+ assert_se(in_addr_from_string_auto("FF01:0:0:0:0:0:0:1", &f, &b) >= 0);
+ assert_se(in_addr_is_multicast(f, &b) == 1);
+
+ assert_se(in_addr_from_string_auto("2001:db8::c:69b:aeff:fe53:743e", &f, &b) >= 0);
+ assert_se(in_addr_is_multicast(f, &b) == 0);
+}
+
+static void test_getpeercred_getpeergroups(void) {
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = safe_fork("(getpeercred)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ static const gid_t gids[] = { 3, 4, 5, 6, 7 };
+ gid_t *test_gids;
+ size_t n_test_gids;
+ uid_t test_uid;
+ gid_t test_gid;
+ struct ucred ucred;
+ int pair[2];
+
+ if (geteuid() == 0) {
+ test_uid = 1;
+ test_gid = 2;
+ test_gids = (gid_t*) gids;
+ n_test_gids = ELEMENTSOF(gids);
+
+ assert_se(setgroups(n_test_gids, test_gids) >= 0);
+ assert_se(setresgid(test_gid, test_gid, test_gid) >= 0);
+ assert_se(setresuid(test_uid, test_uid, test_uid) >= 0);
+
+ } else {
+ long ngroups_max;
+
+ test_uid = getuid();
+ test_gid = getgid();
+
+ ngroups_max = sysconf(_SC_NGROUPS_MAX);
+ assert(ngroups_max > 0);
+
+ test_gids = newa(gid_t, ngroups_max);
+
+ r = getgroups(ngroups_max, test_gids);
+ assert_se(r >= 0);
+ n_test_gids = (size_t) r;
+ }
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, pair) >= 0);
+
+ assert_se(getpeercred(pair[0], &ucred) >= 0);
+
+ assert_se(ucred.uid == test_uid);
+ assert_se(ucred.gid == test_gid);
+ assert_se(ucred.pid == getpid_cached());
+
+ {
+ _cleanup_free_ gid_t *peer_groups = NULL;
+
+ r = getpeergroups(pair[0], &peer_groups);
+ assert_se(r >= 0 || IN_SET(r, -EOPNOTSUPP, -ENOPROTOOPT));
+
+ if (r >= 0) {
+ assert_se((size_t) r == n_test_gids);
+ assert_se(memcmp(peer_groups, test_gids, sizeof(gid_t) * n_test_gids) == 0);
+ }
+ }
+
+ safe_close_pair(pair);
+ _exit(EXIT_SUCCESS);
+ }
+}
+
+static void test_passfd_read(void) {
+ static const char file_contents[] = "test contents for passfd";
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) >= 0);
+
+ r = safe_fork("(passfd_read)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* Child */
+ char tmpfile[] = "/tmp/test-socket-util-passfd-read-XXXXXX";
+ _cleanup_close_ int tmpfd = -1;
+
+ pair[0] = safe_close(pair[0]);
+
+ tmpfd = mkostemp_safe(tmpfile);
+ assert_se(tmpfd >= 0);
+ assert_se(write(tmpfd, file_contents, strlen(file_contents)) == (ssize_t) strlen(file_contents));
+ tmpfd = safe_close(tmpfd);
+
+ tmpfd = open(tmpfile, O_RDONLY);
+ assert_se(tmpfd >= 0);
+ assert_se(unlink(tmpfile) == 0);
+
+ assert_se(send_one_fd(pair[1], tmpfd, MSG_DONTWAIT) == 0);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Parent */
+ char buf[64];
+ struct iovec iov = IOVEC_INIT(buf, sizeof(buf)-1);
+ _cleanup_close_ int fd = -1;
+
+ pair[1] = safe_close(pair[1]);
+
+ assert_se(receive_one_fd_iov(pair[0], &iov, 1, MSG_DONTWAIT, &fd) == 0);
+
+ assert_se(fd >= 0);
+ r = read(fd, buf, sizeof(buf)-1);
+ assert_se(r >= 0);
+ buf[r] = 0;
+ assert_se(streq(buf, file_contents));
+}
+
+static void test_passfd_contents_read(void) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ static const char file_contents[] = "test contents in the file";
+ static const char wire_contents[] = "test contents on the wire";
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) >= 0);
+
+ r = safe_fork("(passfd_contents_read)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* Child */
+ struct iovec iov = IOVEC_INIT_STRING(wire_contents);
+ char tmpfile[] = "/tmp/test-socket-util-passfd-contents-read-XXXXXX";
+ _cleanup_close_ int tmpfd = -1;
+
+ pair[0] = safe_close(pair[0]);
+
+ tmpfd = mkostemp_safe(tmpfile);
+ assert_se(tmpfd >= 0);
+ assert_se(write(tmpfd, file_contents, strlen(file_contents)) == (ssize_t) strlen(file_contents));
+ tmpfd = safe_close(tmpfd);
+
+ tmpfd = open(tmpfile, O_RDONLY);
+ assert_se(tmpfd >= 0);
+ assert_se(unlink(tmpfile) == 0);
+
+ assert_se(send_one_fd_iov(pair[1], tmpfd, &iov, 1, MSG_DONTWAIT) > 0);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Parent */
+ char buf[64];
+ struct iovec iov = IOVEC_INIT(buf, sizeof(buf)-1);
+ _cleanup_close_ int fd = -1;
+ ssize_t k;
+
+ pair[1] = safe_close(pair[1]);
+
+ k = receive_one_fd_iov(pair[0], &iov, 1, MSG_DONTWAIT, &fd);
+ assert_se(k > 0);
+ buf[k] = 0;
+ assert_se(streq(buf, wire_contents));
+
+ assert_se(fd >= 0);
+ r = read(fd, buf, sizeof(buf)-1);
+ assert_se(r >= 0);
+ buf[r] = 0;
+ assert_se(streq(buf, file_contents));
+}
+
+static void test_receive_nopassfd(void) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ static const char wire_contents[] = "no fd passed here";
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) >= 0);
+
+ r = safe_fork("(receive_nopassfd)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* Child */
+ struct iovec iov = IOVEC_INIT_STRING(wire_contents);
+
+ pair[0] = safe_close(pair[0]);
+
+ assert_se(send_one_fd_iov(pair[1], -1, &iov, 1, MSG_DONTWAIT) > 0);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Parent */
+ char buf[64];
+ struct iovec iov = IOVEC_INIT(buf, sizeof(buf)-1);
+ int fd = -999;
+ ssize_t k;
+
+ pair[1] = safe_close(pair[1]);
+
+ k = receive_one_fd_iov(pair[0], &iov, 1, MSG_DONTWAIT, &fd);
+ assert_se(k > 0);
+ buf[k] = 0;
+ assert_se(streq(buf, wire_contents));
+
+ /* no fd passed here, confirm it was reset */
+ assert_se(fd == -1);
+}
+
+static void test_send_nodata_nofd(void) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) >= 0);
+
+ r = safe_fork("(send_nodata_nofd)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* Child */
+ pair[0] = safe_close(pair[0]);
+
+ assert_se(send_one_fd_iov(pair[1], -1, NULL, 0, MSG_DONTWAIT) == -EINVAL);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Parent */
+ char buf[64];
+ struct iovec iov = IOVEC_INIT(buf, sizeof(buf)-1);
+ int fd = -999;
+ ssize_t k;
+
+ pair[1] = safe_close(pair[1]);
+
+ k = receive_one_fd_iov(pair[0], &iov, 1, MSG_DONTWAIT, &fd);
+ /* recvmsg() will return errno EAGAIN if nothing was sent */
+ assert_se(k == -EAGAIN);
+
+ /* receive_one_fd_iov returned error, so confirm &fd wasn't touched */
+ assert_se(fd == -999);
+}
+
+static void test_send_emptydata(void) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) >= 0);
+
+ r = safe_fork("(send_emptydata)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* Child */
+ struct iovec iov = IOVEC_INIT_STRING(""); /* zero-length iov */
+ assert_se(iov.iov_len == 0);
+
+ pair[0] = safe_close(pair[0]);
+
+ /* This will succeed, since iov is set. */
+ assert_se(send_one_fd_iov(pair[1], -1, &iov, 1, MSG_DONTWAIT) == 0);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Parent */
+ char buf[64];
+ struct iovec iov = IOVEC_INIT(buf, sizeof(buf)-1);
+ int fd = -999;
+ ssize_t k;
+
+ pair[1] = safe_close(pair[1]);
+
+ k = receive_one_fd_iov(pair[0], &iov, 1, MSG_DONTWAIT, &fd);
+ /* receive_one_fd_iov() returns -EIO if an fd is not found and no data was returned. */
+ assert_se(k == -EIO);
+
+ /* receive_one_fd_iov returned error, so confirm &fd wasn't touched */
+ assert_se(fd == -999);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_ifname_valid();
+
+ test_socket_address_parse();
+ test_socket_print_unix();
+ test_socket_address_parse_netlink();
+ test_socket_address_equal();
+ test_socket_address_get_path();
+ test_socket_address_is();
+ test_socket_address_is_netlink();
+
+ test_in_addr_is_null();
+ test_in_addr_prefix_intersect();
+ test_in_addr_prefix_next();
+ test_in_addr_to_string();
+ test_in_addr_ifindex_to_string();
+ test_in_addr_ifindex_from_string_auto();
+
+ test_sockaddr_equal();
+
+ test_sockaddr_un_len();
+
+ test_in_addr_is_multicast();
+
+ test_getpeercred_getpeergroups();
+
+ test_passfd_read();
+ test_passfd_contents_read();
+ test_receive_nopassfd();
+ test_send_nodata_nofd();
+ test_send_emptydata();
+
+ return 0;
+}
diff --git a/src/test/test-specifier.c b/src/test/test-specifier.c
new file mode 100644
index 0000000..a0ffdf6
--- /dev/null
+++ b/src/test/test-specifier.c
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "log.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static void test_specifier_escape_one(const char *a, const char *b) {
+ _cleanup_free_ char *x = NULL;
+
+ x = specifier_escape(a);
+ assert_se(streq_ptr(x, b));
+}
+
+static void test_specifier_escape(void) {
+ test_specifier_escape_one(NULL, NULL);
+ test_specifier_escape_one("", "");
+ test_specifier_escape_one("%", "%%");
+ test_specifier_escape_one("foo bar", "foo bar");
+ test_specifier_escape_one("foo%bar", "foo%%bar");
+ test_specifier_escape_one("%%%%%", "%%%%%%%%%%");
+}
+
+static void test_specifier_escape_strv_one(char **a, char **b) {
+ _cleanup_strv_free_ char **x = NULL;
+
+ assert_se(specifier_escape_strv(a, &x) >= 0);
+ assert_se(strv_equal(x, b));
+}
+
+static void test_specifier_escape_strv(void) {
+ test_specifier_escape_strv_one(NULL, NULL);
+ test_specifier_escape_strv_one(STRV_MAKE(NULL), STRV_MAKE(NULL));
+ test_specifier_escape_strv_one(STRV_MAKE(""), STRV_MAKE(""));
+ test_specifier_escape_strv_one(STRV_MAKE("foo"), STRV_MAKE("foo"));
+ test_specifier_escape_strv_one(STRV_MAKE("%"), STRV_MAKE("%%"));
+ test_specifier_escape_strv_one(STRV_MAKE("foo", "%", "foo%", "%foo", "foo%foo", "quux", "%%%"), STRV_MAKE("foo", "%%", "foo%%", "%%foo", "foo%%foo", "quux", "%%%%%%"));
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_specifier_escape();
+ test_specifier_escape_strv();
+
+ return 0;
+}
diff --git a/src/test/test-stat-util.c b/src/test/test-stat-util.c
new file mode 100644
index 0000000..0e2155e
--- /dev/null
+++ b/src/test/test-stat-util.c
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <linux/magic.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "tmpfile-util.h"
+
+static void test_files_same(void) {
+ _cleanup_close_ int fd = -1;
+ char name[] = "/tmp/test-files_same.XXXXXX";
+ char name_alias[] = "/tmp/test-files_same.alias";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(symlink(name, name_alias) >= 0);
+
+ assert_se(files_same(name, name, 0));
+ assert_se(files_same(name, name, AT_SYMLINK_NOFOLLOW));
+ assert_se(files_same(name, name_alias, 0));
+ assert_se(!files_same(name, name_alias, AT_SYMLINK_NOFOLLOW));
+
+ unlink(name);
+ unlink(name_alias);
+}
+
+static void test_is_symlink(void) {
+ char name[] = "/tmp/test-is_symlink.XXXXXX";
+ char name_link[] = "/tmp/test-is_symlink.link";
+ _cleanup_close_ int fd = -1;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(symlink(name, name_link) >= 0);
+
+ assert_se(is_symlink(name) == 0);
+ assert_se(is_symlink(name_link) == 1);
+ assert_se(is_symlink("/a/file/which/does/not/exist/i/guess") < 0);
+
+ unlink(name);
+ unlink(name_link);
+}
+
+static void test_path_is_fs_type(void) {
+ /* run might not be a mount point in build chroots */
+ if (path_is_mount_point("/run", NULL, AT_SYMLINK_FOLLOW) > 0) {
+ assert_se(path_is_fs_type("/run", TMPFS_MAGIC) > 0);
+ assert_se(path_is_fs_type("/run", BTRFS_SUPER_MAGIC) == 0);
+ }
+ assert_se(path_is_fs_type("/proc", PROC_SUPER_MAGIC) > 0);
+ assert_se(path_is_fs_type("/proc", BTRFS_SUPER_MAGIC) == 0);
+ assert_se(path_is_fs_type("/i-dont-exist", BTRFS_SUPER_MAGIC) == -ENOENT);
+}
+
+static void test_path_is_temporary_fs(void) {
+ /* run might not be a mount point in build chroots */
+ if (path_is_mount_point("/run", NULL, AT_SYMLINK_FOLLOW) > 0)
+ assert_se(path_is_temporary_fs("/run") > 0);
+ assert_se(path_is_temporary_fs("/proc") == 0);
+ assert_se(path_is_temporary_fs("/i-dont-exist") == -ENOENT);
+}
+
+static void test_fd_is_network_ns(void) {
+ _cleanup_close_ int fd = -1;
+ assert_se(fd_is_network_ns(STDIN_FILENO) == 0);
+ assert_se(fd_is_network_ns(STDERR_FILENO) == 0);
+ assert_se(fd_is_network_ns(STDOUT_FILENO) == 0);
+
+ assert_se((fd = open("/proc/self/ns/mnt", O_CLOEXEC|O_RDONLY)) >= 0);
+ assert_se(IN_SET(fd_is_network_ns(fd), 0, -EUCLEAN));
+ fd = safe_close(fd);
+
+ assert_se((fd = open("/proc/self/ns/net", O_CLOEXEC|O_RDONLY)) >= 0);
+ assert_se(IN_SET(fd_is_network_ns(fd), 1, -EUCLEAN));
+}
+
+static void test_device_major_minor_valid(void) {
+ /* on glibc dev_t is 64bit, even though in the kernel it is only 32bit */
+ assert_cc(sizeof(dev_t) == sizeof(uint64_t));
+
+ assert_se(DEVICE_MAJOR_VALID(0U));
+ assert_se(DEVICE_MINOR_VALID(0U));
+
+ assert_se(DEVICE_MAJOR_VALID(1U));
+ assert_se(DEVICE_MINOR_VALID(1U));
+
+ assert_se(!DEVICE_MAJOR_VALID(-1U));
+ assert_se(!DEVICE_MINOR_VALID(-1U));
+
+ assert_se(DEVICE_MAJOR_VALID(1U << 10));
+ assert_se(DEVICE_MINOR_VALID(1U << 10));
+
+ assert_se(DEVICE_MAJOR_VALID((1U << 12) - 1));
+ assert_se(DEVICE_MINOR_VALID((1U << 20) - 1));
+
+ assert_se(!DEVICE_MAJOR_VALID((1U << 12)));
+ assert_se(!DEVICE_MINOR_VALID((1U << 20)));
+
+ assert_se(!DEVICE_MAJOR_VALID(1U << 25));
+ assert_se(!DEVICE_MINOR_VALID(1U << 25));
+
+ assert_se(!DEVICE_MAJOR_VALID(UINT32_MAX));
+ assert_se(!DEVICE_MINOR_VALID(UINT32_MAX));
+
+ assert_se(!DEVICE_MAJOR_VALID(UINT64_MAX));
+ assert_se(!DEVICE_MINOR_VALID(UINT64_MAX));
+
+ assert_se(DEVICE_MAJOR_VALID(major(0)));
+ assert_se(DEVICE_MINOR_VALID(minor(0)));
+}
+
+static void test_device_path_make_canonical_one(const char *path) {
+ _cleanup_free_ char *resolved = NULL, *raw = NULL;
+ struct stat st;
+ dev_t devno;
+ mode_t mode;
+ int r;
+
+ assert_se(stat(path, &st) >= 0);
+ r = device_path_make_canonical(st.st_mode, st.st_rdev, &resolved);
+ if (r == -ENOENT) /* maybe /dev/char/x:y and /dev/block/x:y are missing in this test environment, because we
+ * run in a container or so? */
+ return;
+
+ assert_se(r >= 0);
+ assert_se(path_equal(path, resolved));
+
+ assert_se(device_path_make_major_minor(st.st_mode, st.st_rdev, &raw) >= 0);
+ assert_se(device_path_parse_major_minor(raw, &mode, &devno) >= 0);
+
+ assert_se(st.st_rdev == devno);
+ assert_se((st.st_mode & S_IFMT) == (mode & S_IFMT));
+}
+
+static void test_device_path_make_canonical(void) {
+
+ test_device_path_make_canonical_one("/dev/null");
+ test_device_path_make_canonical_one("/dev/zero");
+ test_device_path_make_canonical_one("/dev/full");
+ test_device_path_make_canonical_one("/dev/random");
+ test_device_path_make_canonical_one("/dev/urandom");
+ test_device_path_make_canonical_one("/dev/tty");
+
+ if (is_device_node("/run/systemd/inaccessible/chr") > 0) {
+ test_device_path_make_canonical_one("/run/systemd/inaccessible/chr");
+ test_device_path_make_canonical_one("/run/systemd/inaccessible/blk");
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_files_same();
+ test_is_symlink();
+ test_path_is_fs_type();
+ test_path_is_temporary_fs();
+ test_fd_is_network_ns();
+ test_device_major_minor_valid();
+ test_device_path_make_canonical();
+
+ return 0;
+}
diff --git a/src/test/test-static-destruct.c b/src/test/test-static-destruct.c
new file mode 100644
index 0000000..eb0523d
--- /dev/null
+++ b/src/test/test-static-destruct.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "static-destruct.h"
+#include "tests.h"
+
+static int foo = 0;
+static int bar = 0;
+static int baz = 0;
+static char* memory = NULL;
+
+static void test_destroy(int *b) {
+ (*b)++;
+}
+
+STATIC_DESTRUCTOR_REGISTER(foo, test_destroy);
+STATIC_DESTRUCTOR_REGISTER(bar, test_destroy);
+STATIC_DESTRUCTOR_REGISTER(bar, test_destroy);
+STATIC_DESTRUCTOR_REGISTER(baz, test_destroy);
+STATIC_DESTRUCTOR_REGISTER(baz, test_destroy);
+STATIC_DESTRUCTOR_REGISTER(baz, test_destroy);
+STATIC_DESTRUCTOR_REGISTER(memory, freep);
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ assert_se(memory = strdup("hallo"));
+
+ assert_se(foo == 0 && bar == 0 && baz == 0);
+ static_destruct();
+ assert_se(foo == 1 && bar == 2 && baz == 3);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-strbuf.c b/src/test/test-strbuf.c
new file mode 100644
index 0000000..9e93bc1
--- /dev/null
+++ b/src/test/test-strbuf.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "strbuf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static ssize_t add_string(struct strbuf *sb, const char *s) {
+ return strbuf_add_string(sb, s, strlen(s));
+}
+
+static void test_strbuf(void) {
+ _cleanup_(strbuf_cleanupp) struct strbuf *sb;
+ _cleanup_strv_free_ char **l;
+ ssize_t a, b, c, d, e, f, g, h;
+
+ sb = strbuf_new();
+
+ a = add_string(sb, "waldo");
+ b = add_string(sb, "foo");
+ c = add_string(sb, "bar");
+ d = add_string(sb, "waldo"); /* duplicate */
+ e = add_string(sb, "aldo"); /* duplicate */
+ f = add_string(sb, "do"); /* duplicate */
+ g = add_string(sb, "waldorf"); /* not a duplicate: matches from tail */
+ h = add_string(sb, "");
+
+ /* check the content of the buffer directly */
+ l = strv_parse_nulstr(sb->buf, sb->len);
+
+ assert_se(streq(l[0], "")); /* root */
+ assert_se(streq(l[1], "waldo"));
+ assert_se(streq(l[2], "foo"));
+ assert_se(streq(l[3], "bar"));
+ assert_se(streq(l[4], "waldorf"));
+ assert_se(l[5] == NULL);
+
+ assert_se(sb->nodes_count == 5); /* root + 4 non-duplicates */
+ assert_se(sb->dedup_count == 4);
+ assert_se(sb->in_count == 8);
+
+ assert_se(sb->in_len == 29); /* length of all strings added */
+ assert_se(sb->dedup_len == 11); /* length of all strings duplicated */
+ assert_se(sb->len == 23); /* buffer length: in - dedup + \0 for each node */
+
+ /* check the returned offsets and the respective content in the buffer */
+ assert_se(a == 1);
+ assert_se(b == 7);
+ assert_se(c == 11);
+ assert_se(d == 1);
+ assert_se(e == 2);
+ assert_se(f == 4);
+ assert_se(g == 15);
+ assert_se(h == 0);
+
+ assert_se(streq(sb->buf + a, "waldo"));
+ assert_se(streq(sb->buf + b, "foo"));
+ assert_se(streq(sb->buf + c, "bar"));
+ assert_se(streq(sb->buf + d, "waldo"));
+ assert_se(streq(sb->buf + e, "aldo"));
+ assert_se(streq(sb->buf + f, "do"));
+ assert_se(streq(sb->buf + g, "waldorf"));
+ assert_se(streq(sb->buf + h, ""));
+
+ strbuf_complete(sb);
+ assert_se(sb->root == NULL);
+}
+
+int main(int argc, const char *argv[]) {
+ test_strbuf();
+
+ return 0;
+}
diff --git a/src/test/test-string-util.c b/src/test/test-string-util.c
new file mode 100644
index 0000000..8c1f91d
--- /dev/null
+++ b/src/test/test-string-util.c
@@ -0,0 +1,616 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "utf8.h"
+
+static void test_string_erase(void) {
+ char *x;
+
+ x = strdupa("");
+ assert_se(streq(string_erase(x), ""));
+
+ x = strdupa("1");
+ assert_se(streq(string_erase(x), ""));
+
+ x = strdupa("123456789");
+ assert_se(streq(string_erase(x), ""));
+
+ assert_se(x[1] == '\0');
+ assert_se(x[2] == '\0');
+ assert_se(x[3] == '\0');
+ assert_se(x[4] == '\0');
+ assert_se(x[5] == '\0');
+ assert_se(x[6] == '\0');
+ assert_se(x[7] == '\0');
+ assert_se(x[8] == '\0');
+ assert_se(x[9] == '\0');
+}
+
+static void test_free_and_strndup_one(char **t, const char *src, size_t l, const char *expected, bool change) {
+ int r;
+
+ log_debug("%s: \"%s\", \"%s\", %zd (expect \"%s\", %s)",
+ __func__, strnull(*t), strnull(src), l, strnull(expected), yes_no(change));
+
+ r = free_and_strndup(t, src, l);
+ assert_se(streq_ptr(*t, expected));
+ assert_se(r == change); /* check that change occurs only when necessary */
+}
+
+static void test_free_and_strndup(void) {
+ static const struct test_case {
+ const char *src;
+ size_t len;
+ const char *expected;
+ } cases[] = {
+ {"abc", 0, ""},
+ {"abc", 0, ""},
+ {"abc", 1, "a"},
+ {"abc", 2, "ab"},
+ {"abc", 3, "abc"},
+ {"abc", 4, "abc"},
+ {"abc", 5, "abc"},
+ {"abc", 5, "abc"},
+ {"abc", 4, "abc"},
+ {"abc", 3, "abc"},
+ {"abc", 2, "ab"},
+ {"abc", 1, "a"},
+ {"abc", 0, ""},
+
+ {"", 0, ""},
+ {"", 1, ""},
+ {"", 2, ""},
+ {"", 0, ""},
+ {"", 1, ""},
+ {"", 2, ""},
+ {"", 2, ""},
+ {"", 1, ""},
+ {"", 0, ""},
+
+ {NULL, 0, NULL},
+
+ {"foo", 3, "foo"},
+ {"foobar", 6, "foobar"},
+ };
+
+ _cleanup_free_ char *t = NULL;
+ const char *prev_expected = t;
+
+ for (unsigned i = 0; i < ELEMENTSOF(cases); i++) {
+ test_free_and_strndup_one(&t,
+ cases[i].src, cases[i].len, cases[i].expected,
+ !streq_ptr(cases[i].expected, prev_expected));
+ prev_expected = t;
+ }
+}
+
+static void test_ascii_strcasecmp_n(void) {
+
+ assert_se(ascii_strcasecmp_n("", "", 0) == 0);
+ assert_se(ascii_strcasecmp_n("", "", 1) == 0);
+ assert_se(ascii_strcasecmp_n("", "a", 1) < 0);
+ assert_se(ascii_strcasecmp_n("", "a", 2) < 0);
+ assert_se(ascii_strcasecmp_n("a", "", 1) > 0);
+ assert_se(ascii_strcasecmp_n("a", "", 2) > 0);
+ assert_se(ascii_strcasecmp_n("a", "a", 1) == 0);
+ assert_se(ascii_strcasecmp_n("a", "a", 2) == 0);
+ assert_se(ascii_strcasecmp_n("a", "b", 1) < 0);
+ assert_se(ascii_strcasecmp_n("a", "b", 2) < 0);
+ assert_se(ascii_strcasecmp_n("b", "a", 1) > 0);
+ assert_se(ascii_strcasecmp_n("b", "a", 2) > 0);
+ assert_se(ascii_strcasecmp_n("xxxxyxxxx", "xxxxYxxxx", 9) == 0);
+ assert_se(ascii_strcasecmp_n("xxxxxxxxx", "xxxxyxxxx", 9) < 0);
+ assert_se(ascii_strcasecmp_n("xxxxXxxxx", "xxxxyxxxx", 9) < 0);
+ assert_se(ascii_strcasecmp_n("xxxxxxxxx", "xxxxYxxxx", 9) < 0);
+ assert_se(ascii_strcasecmp_n("xxxxXxxxx", "xxxxYxxxx", 9) < 0);
+
+ assert_se(ascii_strcasecmp_n("xxxxYxxxx", "xxxxYxxxx", 9) == 0);
+ assert_se(ascii_strcasecmp_n("xxxxyxxxx", "xxxxxxxxx", 9) > 0);
+ assert_se(ascii_strcasecmp_n("xxxxyxxxx", "xxxxXxxxx", 9) > 0);
+ assert_se(ascii_strcasecmp_n("xxxxYxxxx", "xxxxxxxxx", 9) > 0);
+ assert_se(ascii_strcasecmp_n("xxxxYxxxx", "xxxxXxxxx", 9) > 0);
+}
+
+static void test_ascii_strcasecmp_nn(void) {
+ assert_se(ascii_strcasecmp_nn("", 0, "", 0) == 0);
+ assert_se(ascii_strcasecmp_nn("", 0, "", 1) < 0);
+ assert_se(ascii_strcasecmp_nn("", 1, "", 0) > 0);
+ assert_se(ascii_strcasecmp_nn("", 1, "", 1) == 0);
+
+ assert_se(ascii_strcasecmp_nn("aaaa", 4, "aaAa", 4) == 0);
+ assert_se(ascii_strcasecmp_nn("aaa", 3, "aaAa", 4) < 0);
+ assert_se(ascii_strcasecmp_nn("aaa", 4, "aaAa", 4) < 0);
+ assert_se(ascii_strcasecmp_nn("aaaa", 4, "aaA", 3) > 0);
+ assert_se(ascii_strcasecmp_nn("aaaa", 4, "AAA", 4) > 0);
+
+ assert_se(ascii_strcasecmp_nn("aaaa", 4, "bbbb", 4) < 0);
+ assert_se(ascii_strcasecmp_nn("aaAA", 4, "BBbb", 4) < 0);
+ assert_se(ascii_strcasecmp_nn("BBbb", 4, "aaaa", 4) > 0);
+}
+
+static void test_cellescape(void) {
+ char buf[40];
+
+ assert_se(streq(cellescape(buf, 1, ""), ""));
+ assert_se(streq(cellescape(buf, 1, "1"), ""));
+ assert_se(streq(cellescape(buf, 1, "12"), ""));
+
+ assert_se(streq(cellescape(buf, 2, ""), ""));
+ assert_se(streq(cellescape(buf, 2, "1"), "1"));
+ assert_se(streq(cellescape(buf, 2, "12"), "."));
+ assert_se(streq(cellescape(buf, 2, "123"), "."));
+
+ assert_se(streq(cellescape(buf, 3, ""), ""));
+ assert_se(streq(cellescape(buf, 3, "1"), "1"));
+ assert_se(streq(cellescape(buf, 3, "12"), "12"));
+ assert_se(streq(cellescape(buf, 3, "123"), ".."));
+ assert_se(streq(cellescape(buf, 3, "1234"), ".."));
+
+ assert_se(streq(cellescape(buf, 4, ""), ""));
+ assert_se(streq(cellescape(buf, 4, "1"), "1"));
+ assert_se(streq(cellescape(buf, 4, "12"), "12"));
+ assert_se(streq(cellescape(buf, 4, "123"), "123"));
+ assert_se(streq(cellescape(buf, 4, "1234"), is_locale_utf8() ? "…" : "..."));
+ assert_se(streq(cellescape(buf, 4, "12345"), is_locale_utf8() ? "…" : "..."));
+
+ assert_se(streq(cellescape(buf, 5, ""), ""));
+ assert_se(streq(cellescape(buf, 5, "1"), "1"));
+ assert_se(streq(cellescape(buf, 5, "12"), "12"));
+ assert_se(streq(cellescape(buf, 5, "123"), "123"));
+ assert_se(streq(cellescape(buf, 5, "1234"), "1234"));
+ assert_se(streq(cellescape(buf, 5, "12345"), is_locale_utf8() ? "1…" : "1..."));
+ assert_se(streq(cellescape(buf, 5, "123456"), is_locale_utf8() ? "1…" : "1..."));
+
+ assert_se(streq(cellescape(buf, 1, "\020"), ""));
+ assert_se(streq(cellescape(buf, 2, "\020"), "."));
+ assert_se(streq(cellescape(buf, 3, "\020"), ".."));
+ assert_se(streq(cellescape(buf, 4, "\020"), "…"));
+ assert_se(streq(cellescape(buf, 5, "\020"), "\\020"));
+
+ assert_se(streq(cellescape(buf, 5, "1234\020"), "1…"));
+ assert_se(streq(cellescape(buf, 6, "1234\020"), "12…"));
+ assert_se(streq(cellescape(buf, 7, "1234\020"), "123…"));
+ assert_se(streq(cellescape(buf, 8, "1234\020"), "1234…"));
+ assert_se(streq(cellescape(buf, 9, "1234\020"), "1234\\020"));
+
+ assert_se(streq(cellescape(buf, 1, "\t\n"), ""));
+ assert_se(streq(cellescape(buf, 2, "\t\n"), "."));
+ assert_se(streq(cellescape(buf, 3, "\t\n"), ".."));
+ assert_se(streq(cellescape(buf, 4, "\t\n"), "…"));
+ assert_se(streq(cellescape(buf, 5, "\t\n"), "\\t\\n"));
+
+ assert_se(streq(cellescape(buf, 5, "1234\t\n"), "1…"));
+ assert_se(streq(cellescape(buf, 6, "1234\t\n"), "12…"));
+ assert_se(streq(cellescape(buf, 7, "1234\t\n"), "123…"));
+ assert_se(streq(cellescape(buf, 8, "1234\t\n"), "1234…"));
+ assert_se(streq(cellescape(buf, 9, "1234\t\n"), "1234\\t\\n"));
+
+ assert_se(streq(cellescape(buf, 4, "x\t\020\n"), "…"));
+ assert_se(streq(cellescape(buf, 5, "x\t\020\n"), "x…"));
+ assert_se(streq(cellescape(buf, 6, "x\t\020\n"), "x…"));
+ assert_se(streq(cellescape(buf, 7, "x\t\020\n"), "x\\t…"));
+ assert_se(streq(cellescape(buf, 8, "x\t\020\n"), "x\\t…"));
+ assert_se(streq(cellescape(buf, 9, "x\t\020\n"), "x\\t…"));
+ assert_se(streq(cellescape(buf, 10, "x\t\020\n"), "x\\t\\020\\n"));
+
+ assert_se(streq(cellescape(buf, 6, "1\011"), "1\\t"));
+ assert_se(streq(cellescape(buf, 6, "1\020"), "1\\020"));
+ assert_se(streq(cellescape(buf, 6, "1\020x"), is_locale_utf8() ? "1…" : "1..."));
+
+ assert_se(streq(cellescape(buf, 40, "1\020"), "1\\020"));
+ assert_se(streq(cellescape(buf, 40, "1\020x"), "1\\020x"));
+
+ assert_se(streq(cellescape(buf, 40, "\a\b\f\n\r\t\v\\\"'"), "\\a\\b\\f\\n\\r\\t\\v\\\\\\\"\\'"));
+ assert_se(streq(cellescape(buf, 6, "\a\b\f\n\r\t\v\\\"'"), is_locale_utf8() ? "\\a…" : "\\a..."));
+ assert_se(streq(cellescape(buf, 7, "\a\b\f\n\r\t\v\\\"'"), is_locale_utf8() ? "\\a…" : "\\a..."));
+ assert_se(streq(cellescape(buf, 8, "\a\b\f\n\r\t\v\\\"'"), is_locale_utf8() ? "\\a\\b…" : "\\a\\b..."));
+
+ assert_se(streq(cellescape(buf, sizeof buf, "1\020"), "1\\020"));
+ assert_se(streq(cellescape(buf, sizeof buf, "1\020x"), "1\\020x"));
+}
+
+static void test_streq_ptr(void) {
+ assert_se(streq_ptr(NULL, NULL));
+ assert_se(!streq_ptr("abc", "cdef"));
+}
+
+static void test_strstrip(void) {
+ char *r;
+ char input[] = " hello, waldo. ";
+
+ r = strstrip(input);
+ assert_se(streq(r, "hello, waldo."));
+}
+
+static void test_strextend(void) {
+ _cleanup_free_ char *str = NULL;
+
+ assert_se(strextend(&str, NULL));
+ assert_se(streq_ptr(str, ""));
+ assert_se(strextend(&str, "", "0", "", "", "123", NULL));
+ assert_se(streq_ptr(str, "0123"));
+ assert_se(strextend(&str, "456", "78", "9", NULL));
+ assert_se(streq_ptr(str, "0123456789"));
+}
+
+static void test_strextend_with_separator(void) {
+ _cleanup_free_ char *str = NULL;
+
+ assert_se(strextend_with_separator(&str, NULL, NULL));
+ assert_se(streq_ptr(str, ""));
+ str = mfree(str);
+
+ assert_se(strextend_with_separator(&str, "...", NULL));
+ assert_se(streq_ptr(str, ""));
+ assert_se(strextend_with_separator(&str, "...", NULL));
+ assert_se(streq_ptr(str, ""));
+ str = mfree(str);
+
+ assert_se(strextend_with_separator(&str, "xyz", "a", "bb", "ccc", NULL));
+ assert_se(streq_ptr(str, "axyzbbxyzccc"));
+ str = mfree(str);
+
+ assert_se(strextend_with_separator(&str, ",", "start", "", "1", "234", NULL));
+ assert_se(streq_ptr(str, "start,,1,234"));
+ assert_se(strextend_with_separator(&str, ";", "more", "5", "678", NULL));
+ assert_se(streq_ptr(str, "start,,1,234;more;5;678"));
+}
+
+static void test_strrep(void) {
+ _cleanup_free_ char *one, *three, *zero;
+ one = strrep("waldo", 1);
+ three = strrep("waldo", 3);
+ zero = strrep("waldo", 0);
+
+ assert_se(streq(one, "waldo"));
+ assert_se(streq(three, "waldowaldowaldo"));
+ assert_se(streq(zero, ""));
+}
+
+static void test_strappend(void) {
+ _cleanup_free_ char *t1, *t2, *t3, *t4;
+
+ t1 = strappend(NULL, NULL);
+ assert_se(streq(t1, ""));
+
+ t2 = strappend(NULL, "suf");
+ assert_se(streq(t2, "suf"));
+
+ t3 = strappend("pre", NULL);
+ assert_se(streq(t3, "pre"));
+
+ t4 = strappend("pre", "suf");
+ assert_se(streq(t4, "presuf"));
+}
+
+static void test_string_has_cc(void) {
+ assert_se(string_has_cc("abc\1", NULL));
+ assert_se(string_has_cc("abc\x7f", NULL));
+ assert_se(string_has_cc("abc\x7f", NULL));
+ assert_se(string_has_cc("abc\t\x7f", "\t"));
+ assert_se(string_has_cc("abc\t\x7f", "\t"));
+ assert_se(string_has_cc("\x7f", "\t"));
+ assert_se(string_has_cc("\x7f", "\t\a"));
+
+ assert_se(!string_has_cc("abc\t\t", "\t"));
+ assert_se(!string_has_cc("abc\t\t\a", "\t\a"));
+ assert_se(!string_has_cc("a\ab\tc", "\t\a"));
+}
+
+static void test_ascii_strlower(void) {
+ char a[] = "AabBcC Jk Ii Od LKJJJ kkd LK";
+ assert_se(streq(ascii_strlower(a), "aabbcc jk ii od lkjjj kkd lk"));
+}
+
+static void test_strshorten(void) {
+ char s[] = "foobar";
+
+ assert_se(strlen(strshorten(s, 6)) == 6);
+ assert_se(strlen(strshorten(s, 12)) == 6);
+ assert_se(strlen(strshorten(s, 2)) == 2);
+ assert_se(strlen(strshorten(s, 0)) == 0);
+}
+
+static void test_strjoina(void) {
+ char *actual;
+
+ actual = strjoina("", "foo", "bar");
+ assert_se(streq(actual, "foobar"));
+
+ actual = strjoina("foo", "bar", "baz");
+ assert_se(streq(actual, "foobarbaz"));
+
+ actual = strjoina("foo", "", "bar", "baz");
+ assert_se(streq(actual, "foobarbaz"));
+
+ actual = strjoina("foo");
+ assert_se(streq(actual, "foo"));
+
+ actual = strjoina(NULL);
+ assert_se(streq(actual, ""));
+
+ actual = strjoina(NULL, "foo");
+ assert_se(streq(actual, ""));
+
+ actual = strjoina("foo", NULL, "bar");
+ assert_se(streq(actual, "foo"));
+}
+
+static void test_strcmp_ptr(void) {
+ assert_se(strcmp_ptr(NULL, NULL) == 0);
+ assert_se(strcmp_ptr("", NULL) > 0);
+ assert_se(strcmp_ptr("foo", NULL) > 0);
+ assert_se(strcmp_ptr(NULL, "") < 0);
+ assert_se(strcmp_ptr(NULL, "bar") < 0);
+ assert_se(strcmp_ptr("foo", "bar") > 0);
+ assert_se(strcmp_ptr("bar", "baz") < 0);
+ assert_se(strcmp_ptr("foo", "foo") == 0);
+ assert_se(strcmp_ptr("", "") == 0);
+}
+
+static void test_foreach_word(void) {
+ const char *word, *state;
+ size_t l;
+ int i = 0;
+ const char test[] = "test abc d\te f ";
+ const char * const expected[] = {
+ "test",
+ "abc",
+ "d",
+ "e",
+ "f",
+ "",
+ NULL
+ };
+
+ FOREACH_WORD(word, l, test, state)
+ assert_se(strneq(expected[i++], word, l));
+}
+
+static void check(const char *test, char** expected, bool trailing) {
+ int i = 0, r;
+
+ printf("<<<%s>>>\n", test);
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&test, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0) {
+ assert_se(!trailing);
+ break;
+ } else if (r < 0) {
+ assert_se(trailing);
+ break;
+ }
+
+ assert_se(streq(word, expected[i++]));
+ printf("<%s>\n", word);
+ }
+ assert_se(expected[i] == NULL);
+}
+
+static void test_foreach_word_quoted(void) {
+ check("test a b c 'd' e '' '' hhh '' '' \"a b c\"",
+ STRV_MAKE("test",
+ "a",
+ "b",
+ "c",
+ "d",
+ "e",
+ "",
+ "",
+ "hhh",
+ "",
+ "",
+ "a b c"),
+ false);
+
+ check("test \"xxx",
+ STRV_MAKE("test"),
+ true);
+
+ check("test\\",
+ STRV_MAKE_EMPTY,
+ true);
+}
+
+static void test_endswith(void) {
+ assert_se(endswith("foobar", "bar"));
+ assert_se(endswith("foobar", ""));
+ assert_se(endswith("foobar", "foobar"));
+ assert_se(endswith("", ""));
+
+ assert_se(!endswith("foobar", "foo"));
+ assert_se(!endswith("foobar", "foobarfoofoo"));
+}
+
+static void test_endswith_no_case(void) {
+ assert_se(endswith_no_case("fooBAR", "bar"));
+ assert_se(endswith_no_case("foobar", ""));
+ assert_se(endswith_no_case("foobar", "FOOBAR"));
+ assert_se(endswith_no_case("", ""));
+
+ assert_se(!endswith_no_case("foobar", "FOO"));
+ assert_se(!endswith_no_case("foobar", "FOOBARFOOFOO"));
+}
+
+static void test_delete_chars(void) {
+ char *s, input[] = " hello, waldo. abc";
+
+ s = delete_chars(input, WHITESPACE);
+ assert_se(streq(s, "hello,waldo.abc"));
+ assert_se(s == input);
+}
+
+static void test_delete_trailing_chars(void) {
+
+ char *s,
+ input1[] = " \n \r k \n \r ",
+ input2[] = "kkkkthiskkkiskkkaktestkkk",
+ input3[] = "abcdef";
+
+ s = delete_trailing_chars(input1, WHITESPACE);
+ assert_se(streq(s, " \n \r k"));
+ assert_se(s == input1);
+
+ s = delete_trailing_chars(input2, "kt");
+ assert_se(streq(s, "kkkkthiskkkiskkkaktes"));
+ assert_se(s == input2);
+
+ s = delete_trailing_chars(input3, WHITESPACE);
+ assert_se(streq(s, "abcdef"));
+ assert_se(s == input3);
+
+ s = delete_trailing_chars(input3, "fe");
+ assert_se(streq(s, "abcd"));
+ assert_se(s == input3);
+}
+
+static void test_delete_trailing_slashes(void) {
+ char s1[] = "foobar//",
+ s2[] = "foobar/",
+ s3[] = "foobar",
+ s4[] = "";
+
+ assert_se(streq(delete_trailing_chars(s1, "_"), "foobar//"));
+ assert_se(streq(delete_trailing_chars(s1, "/"), "foobar"));
+ assert_se(streq(delete_trailing_chars(s2, "/"), "foobar"));
+ assert_se(streq(delete_trailing_chars(s3, "/"), "foobar"));
+ assert_se(streq(delete_trailing_chars(s4, "/"), ""));
+}
+
+static void test_skip_leading_chars(void) {
+ char input1[] = " \n \r k \n \r ",
+ input2[] = "kkkkthiskkkiskkkaktestkkk",
+ input3[] = "abcdef";
+
+ assert_se(streq(skip_leading_chars(input1, WHITESPACE), "k \n \r "));
+ assert_se(streq(skip_leading_chars(input2, "k"), "thiskkkiskkkaktestkkk"));
+ assert_se(streq(skip_leading_chars(input2, "tk"), "hiskkkiskkkaktestkkk"));
+ assert_se(streq(skip_leading_chars(input3, WHITESPACE), "abcdef"));
+ assert_se(streq(skip_leading_chars(input3, "bcaef"), "def"));
+}
+
+static void test_in_charset(void) {
+ assert_se(in_charset("dddaaabbbcccc", "abcd"));
+ assert_se(!in_charset("dddaaabbbcccc", "abc f"));
+}
+
+static void test_split_pair(void) {
+ _cleanup_free_ char *a = NULL, *b = NULL;
+
+ assert_se(split_pair("", "", &a, &b) == -EINVAL);
+ assert_se(split_pair("foo=bar", "", &a, &b) == -EINVAL);
+ assert_se(split_pair("", "=", &a, &b) == -EINVAL);
+ assert_se(split_pair("foo=bar", "=", &a, &b) >= 0);
+ assert_se(streq(a, "foo"));
+ assert_se(streq(b, "bar"));
+ free(a);
+ free(b);
+ assert_se(split_pair("==", "==", &a, &b) >= 0);
+ assert_se(streq(a, ""));
+ assert_se(streq(b, ""));
+ free(a);
+ free(b);
+
+ assert_se(split_pair("===", "==", &a, &b) >= 0);
+ assert_se(streq(a, ""));
+ assert_se(streq(b, "="));
+}
+
+static void test_first_word(void) {
+ assert_se(first_word("Hello", ""));
+ assert_se(first_word("Hello", "Hello"));
+ assert_se(first_word("Hello world", "Hello"));
+ assert_se(first_word("Hello\tworld", "Hello"));
+ assert_se(first_word("Hello\nworld", "Hello"));
+ assert_se(first_word("Hello\rworld", "Hello"));
+ assert_se(first_word("Hello ", "Hello"));
+
+ assert_se(!first_word("Hello", "Hellooo"));
+ assert_se(!first_word("Hello", "xxxxx"));
+ assert_se(!first_word("Hellooo", "Hello"));
+}
+
+static void test_strlen_ptr(void) {
+ assert_se(strlen_ptr("foo") == 3);
+ assert_se(strlen_ptr("") == 0);
+ assert_se(strlen_ptr(NULL) == 0);
+}
+
+static void test_memory_startswith(void) {
+ assert_se(streq(memory_startswith("", 0, ""), ""));
+ assert_se(streq(memory_startswith("", 1, ""), ""));
+ assert_se(streq(memory_startswith("x", 2, ""), "x"));
+ assert_se(!memory_startswith("", 1, "x"));
+ assert_se(!memory_startswith("", 1, "xxxxxxxx"));
+ assert_se(streq(memory_startswith("xxx", 4, "x"), "xx"));
+ assert_se(streq(memory_startswith("xxx", 4, "xx"), "x"));
+ assert_se(streq(memory_startswith("xxx", 4, "xxx"), ""));
+ assert_se(!memory_startswith("xxx", 4, "xxxx"));
+}
+
+static void test_memory_startswith_no_case(void) {
+ assert_se(streq(memory_startswith_no_case("", 0, ""), ""));
+ assert_se(streq(memory_startswith_no_case("", 1, ""), ""));
+ assert_se(streq(memory_startswith_no_case("x", 2, ""), "x"));
+ assert_se(streq(memory_startswith_no_case("X", 2, ""), "X"));
+ assert_se(!memory_startswith_no_case("", 1, "X"));
+ assert_se(!memory_startswith_no_case("", 1, "xxxxXXXX"));
+ assert_se(streq(memory_startswith_no_case("xxx", 4, "X"), "xx"));
+ assert_se(streq(memory_startswith_no_case("XXX", 4, "x"), "XX"));
+ assert_se(streq(memory_startswith_no_case("XXX", 4, "X"), "XX"));
+ assert_se(streq(memory_startswith_no_case("xxx", 4, "XX"), "x"));
+ assert_se(streq(memory_startswith_no_case("XXX", 4, "xx"), "X"));
+ assert_se(streq(memory_startswith_no_case("XXX", 4, "XX"), "X"));
+ assert_se(streq(memory_startswith_no_case("xxx", 4, "XXX"), ""));
+ assert_se(streq(memory_startswith_no_case("XXX", 4, "xxx"), ""));
+ assert_se(streq(memory_startswith_no_case("XXX", 4, "XXX"), ""));
+
+ assert_se(memory_startswith_no_case((char[2]){'x', 'x'}, 2, "xx"));
+ assert_se(memory_startswith_no_case((char[2]){'x', 'X'}, 2, "xX"));
+ assert_se(memory_startswith_no_case((char[2]){'X', 'x'}, 2, "Xx"));
+ assert_se(memory_startswith_no_case((char[2]){'X', 'X'}, 2, "XX"));
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_string_erase();
+ test_free_and_strndup();
+ test_ascii_strcasecmp_n();
+ test_ascii_strcasecmp_nn();
+ test_cellescape();
+ test_streq_ptr();
+ test_strstrip();
+ test_strextend();
+ test_strextend_with_separator();
+ test_strrep();
+ test_strappend();
+ test_string_has_cc();
+ test_ascii_strlower();
+ test_strshorten();
+ test_strjoina();
+ test_strcmp_ptr();
+ test_foreach_word();
+ test_foreach_word_quoted();
+ test_endswith();
+ test_endswith_no_case();
+ test_delete_chars();
+ test_delete_trailing_chars();
+ test_delete_trailing_slashes();
+ test_skip_leading_chars();
+ test_in_charset();
+ test_split_pair();
+ test_first_word();
+ test_strlen_ptr();
+ test_memory_startswith();
+ test_memory_startswith_no_case();
+
+ return 0;
+}
diff --git a/src/test/test-strip-tab-ansi.c b/src/test/test-strip-tab-ansi.c
new file mode 100644
index 0000000..fae384e
--- /dev/null
+++ b/src/test/test-strip-tab-ansi.c
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_free_ char *urlified = NULL, *q = NULL, *qq = NULL;
+ char *p, *z;
+
+ assert_se(p = strdup("\tFoobar\tbar\twaldo\t"));
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ fprintf(stdout, "<%s>\n", p);
+ assert_se(streq(p, " Foobar bar waldo "));
+ free(p);
+
+ assert_se(p = strdup(ANSI_HIGHLIGHT "Hello" ANSI_NORMAL ANSI_HIGHLIGHT_RED " world!" ANSI_NORMAL));
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ fprintf(stdout, "<%s>\n", p);
+ assert_se(streq(p, "Hello world!"));
+ free(p);
+
+ assert_se(p = strdup("\x1B[\x1B[\t\x1B[" ANSI_HIGHLIGHT "\x1B[" "Hello" ANSI_NORMAL ANSI_HIGHLIGHT_RED " world!" ANSI_NORMAL));
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ assert_se(streq(p, "\x1B[\x1B[ \x1B[\x1B[Hello world!"));
+ free(p);
+
+ assert_se(p = strdup("\x1B[waldo"));
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ assert_se(streq(p, "\x1B[waldo"));
+ free(p);
+
+ assert_se(terminal_urlify_path("/etc/fstab", "i am a fabulous link", &urlified) >= 0);
+ assert_se(p = strjoin("something ", urlified, " something-else"));
+ assert_se(q = strdup(p));
+ printf("<%s>\n", p);
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ printf("<%s>\n", p);
+ assert_se(streq(p, "something i am a fabulous link something-else"));
+ p = mfree(p);
+
+ /* Truncate the formatted string in the middle of an ANSI sequence (in which case we shouldn't touch the
+ * incomplete sequence) */
+ z = strstr(q, "fstab");
+ if (z) {
+ *z = 0;
+ assert_se(qq = strdup(q));
+ assert_se(strip_tab_ansi(&q, NULL, NULL));
+ assert_se(streq(q, qq));
+ }
+
+ return 0;
+}
diff --git a/src/test/test-strv.c b/src/test/test-strv.c
new file mode 100644
index 0000000..31ef1ab
--- /dev/null
+++ b/src/test/test-strv.c
@@ -0,0 +1,927 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static void test_specifier_printf(void) {
+ static const Specifier table[] = {
+ { 'a', specifier_string, (char*) "AAAA" },
+ { 'b', specifier_string, (char*) "BBBB" },
+ { 'm', specifier_machine_id, NULL },
+ { 'B', specifier_boot_id, NULL },
+ { 'H', specifier_host_name, NULL },
+ { 'v', specifier_kernel_release, NULL },
+ {}
+ };
+
+ _cleanup_free_ char *w = NULL;
+ int r;
+
+ r = specifier_printf("xxx a=%a b=%b yyy", table, NULL, &w);
+ assert_se(r >= 0);
+ assert_se(w);
+
+ puts(w);
+ assert_se(streq(w, "xxx a=AAAA b=BBBB yyy"));
+
+ free(w);
+ r = specifier_printf("machine=%m, boot=%B, host=%H, version=%v", table, NULL, &w);
+ assert_se(r >= 0);
+ assert_se(w);
+ puts(w);
+}
+
+static void test_str_in_set(void) {
+ assert_se(STR_IN_SET("x", "x", "y", "z"));
+ assert_se(!STR_IN_SET("X", "x", "y", "z"));
+ assert_se(!STR_IN_SET("", "x", "y", "z"));
+ assert_se(STR_IN_SET("x", "w", "x"));
+}
+
+static void test_strptr_in_set(void) {
+ assert_se(STRPTR_IN_SET("x", "x", "y", "z"));
+ assert_se(!STRPTR_IN_SET("X", "x", "y", "z"));
+ assert_se(!STRPTR_IN_SET("", "x", "y", "z"));
+ assert_se(STRPTR_IN_SET("x", "w", "x"));
+
+ assert_se(!STRPTR_IN_SET(NULL, "x", "y", "z"));
+ assert_se(!STRPTR_IN_SET(NULL, ""));
+ /* strv cannot contain a null, hence the result below */
+ assert_se(!STRPTR_IN_SET(NULL, NULL));
+}
+
+static void test_startswith_set(void) {
+ assert_se(!STARTSWITH_SET("foo", "bar", "baz", "waldo"));
+ assert_se(!STARTSWITH_SET("foo", "bar"));
+
+ assert_se(STARTSWITH_SET("abc", "a", "ab", "abc"));
+ assert_se(STARTSWITH_SET("abc", "ax", "ab", "abc"));
+ assert_se(STARTSWITH_SET("abc", "ax", "abx", "abc"));
+ assert_se(!STARTSWITH_SET("abc", "ax", "abx", "abcx"));
+
+ assert_se(streq_ptr(STARTSWITH_SET("foobar", "hhh", "kkk", "foo", "zzz"), "bar"));
+ assert_se(streq_ptr(STARTSWITH_SET("foobar", "hhh", "kkk", "", "zzz"), "foobar"));
+ assert_se(streq_ptr(STARTSWITH_SET("", "hhh", "kkk", "zzz", ""), ""));
+}
+
+static const char* const input_table_multiple[] = {
+ "one",
+ "two",
+ "three",
+ NULL,
+};
+
+static const char* const input_table_quoted[] = {
+ "one",
+ " two\t three ",
+ " four five",
+ NULL,
+};
+
+static const char* const input_table_one[] = {
+ "one",
+ NULL,
+};
+
+static const char* const input_table_none[] = {
+ NULL,
+};
+
+static const char* const input_table_two_empties[] = {
+ "",
+ "",
+ NULL,
+};
+
+static const char* const input_table_one_empty[] = {
+ "",
+ NULL,
+};
+
+static void test_strv_find(void) {
+ assert_se(strv_find((char **)input_table_multiple, "three"));
+ assert_se(!strv_find((char **)input_table_multiple, "four"));
+}
+
+static void test_strv_find_prefix(void) {
+ assert_se(strv_find_prefix((char **)input_table_multiple, "o"));
+ assert_se(strv_find_prefix((char **)input_table_multiple, "one"));
+ assert_se(strv_find_prefix((char **)input_table_multiple, ""));
+ assert_se(!strv_find_prefix((char **)input_table_multiple, "xxx"));
+ assert_se(!strv_find_prefix((char **)input_table_multiple, "onee"));
+}
+
+static void test_strv_find_startswith(void) {
+ char *r;
+
+ r = strv_find_startswith((char **)input_table_multiple, "o");
+ assert_se(r && streq(r, "ne"));
+
+ r = strv_find_startswith((char **)input_table_multiple, "one");
+ assert_se(r && streq(r, ""));
+
+ r = strv_find_startswith((char **)input_table_multiple, "");
+ assert_se(r && streq(r, "one"));
+
+ assert_se(!strv_find_startswith((char **)input_table_multiple, "xxx"));
+ assert_se(!strv_find_startswith((char **)input_table_multiple, "onee"));
+}
+
+static void test_strv_join(void) {
+ _cleanup_free_ char *p = NULL, *q = NULL, *r = NULL, *s = NULL, *t = NULL, *v = NULL, *w = NULL;
+
+ p = strv_join((char **)input_table_multiple, ", ");
+ assert_se(p);
+ assert_se(streq(p, "one, two, three"));
+
+ q = strv_join((char **)input_table_multiple, ";");
+ assert_se(q);
+ assert_se(streq(q, "one;two;three"));
+
+ r = strv_join((char **)input_table_multiple, NULL);
+ assert_se(r);
+ assert_se(streq(r, "one two three"));
+
+ s = strv_join((char **)input_table_one, ", ");
+ assert_se(s);
+ assert_se(streq(s, "one"));
+
+ t = strv_join((char **)input_table_none, ", ");
+ assert_se(t);
+ assert_se(streq(t, ""));
+
+ v = strv_join((char **)input_table_two_empties, ", ");
+ assert_se(v);
+ assert_se(streq(v, ", "));
+
+ w = strv_join((char **)input_table_one_empty, ", ");
+ assert_se(w);
+ assert_se(streq(w, ""));
+}
+
+static void test_strv_join_prefix(void) {
+ _cleanup_free_ char *p = NULL, *q = NULL, *r = NULL, *s = NULL, *t = NULL, *v = NULL, *w = NULL;
+
+ p = strv_join_prefix((char **)input_table_multiple, ", ", "foo");
+ assert_se(p);
+ assert_se(streq(p, "fooone, footwo, foothree"));
+
+ q = strv_join_prefix((char **)input_table_multiple, ";", "foo");
+ assert_se(q);
+ assert_se(streq(q, "fooone;footwo;foothree"));
+
+ r = strv_join_prefix((char **)input_table_multiple, NULL, "foo");
+ assert_se(r);
+ assert_se(streq(r, "fooone footwo foothree"));
+
+ s = strv_join_prefix((char **)input_table_one, ", ", "foo");
+ assert_se(s);
+ assert_se(streq(s, "fooone"));
+
+ t = strv_join_prefix((char **)input_table_none, ", ", "foo");
+ assert_se(t);
+ assert_se(streq(t, ""));
+
+ v = strv_join_prefix((char **)input_table_two_empties, ", ", "foo");
+ assert_se(v);
+ assert_se(streq(v, "foo, foo"));
+
+ w = strv_join_prefix((char **)input_table_one_empty, ", ", "foo");
+ assert_se(w);
+ assert_se(streq(w, "foo"));
+}
+
+static void test_strv_unquote(const char *quoted, char **list) {
+ _cleanup_strv_free_ char **s;
+ _cleanup_free_ char *j;
+ unsigned i = 0;
+ char **t;
+ int r;
+
+ r = strv_split_extract(&s, quoted, WHITESPACE, EXTRACT_QUOTES);
+ assert_se(r == (int) strv_length(list));
+ assert_se(s);
+ j = strv_join(s, " | ");
+ assert_se(j);
+ puts(j);
+
+ STRV_FOREACH(t, s)
+ assert_se(streq(list[i++], *t));
+
+ assert_se(list[i] == NULL);
+}
+
+static void test_invalid_unquote(const char *quoted) {
+ char **s = NULL;
+ int r;
+
+ r = strv_split_extract(&s, quoted, WHITESPACE, EXTRACT_QUOTES);
+ assert_se(s == NULL);
+ assert_se(r == -EINVAL);
+}
+
+static void test_strv_split(void) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char str[] = "one,two,three";
+
+ l = strv_split(str, ",");
+ assert_se(l);
+ assert_se(strv_equal(l, (char**) input_table_multiple));
+
+ strv_free(l);
+
+ l = strv_split(" one two\t three", WHITESPACE);
+ assert_se(l);
+ assert_se(strv_equal(l, (char**) input_table_multiple));
+
+ strv_free(l);
+
+ /* Setting NULL for separator is equivalent to WHITESPACE */
+ l = strv_split(" one two\t three", NULL);
+ assert_se(l);
+ assert_se(strv_equal(l, (char**) input_table_multiple));
+
+ strv_free(l);
+
+ l = strv_split_full(" one two\t three", NULL, 0);
+ assert_se(l);
+ assert_se(strv_equal(l, (char**) input_table_multiple));
+
+ strv_free(l);
+
+ l = strv_split_full(" 'one' \" two\t three \" ' four five'", NULL, SPLIT_QUOTES);
+ assert_se(l);
+ assert_se(strv_equal(l, (char**) input_table_quoted));
+
+ strv_free(l);
+
+ /* missing last quote ignores the last element. */
+ l = strv_split_full(" 'one' \" two\t three \" ' four five' ' ignored element ", NULL, SPLIT_QUOTES);
+ assert_se(l);
+ assert_se(strv_equal(l, (char**) input_table_quoted));
+
+ strv_free(l);
+
+ /* missing last quote, but the last element is _not_ ignored with SPLIT_RELAX. */
+ l = strv_split_full(" 'one' \" two\t three \" ' four five", NULL, SPLIT_QUOTES | SPLIT_RELAX);
+ assert_se(l);
+ assert_se(strv_equal(l, (char**) input_table_quoted));
+
+ strv_free(l);
+
+ /* missing separator between */
+ l = strv_split_full(" 'one' \" two\t three \"' four five'", NULL, SPLIT_QUOTES | SPLIT_RELAX);
+ assert_se(l);
+ assert_se(strv_equal(l, (char**) input_table_quoted));
+
+ strv_free(l);
+
+ l = strv_split_full(" 'one' \" two\t three \"' four five", NULL, SPLIT_QUOTES | SPLIT_RELAX);
+ assert_se(l);
+ assert_se(strv_equal(l, (char**) input_table_quoted));
+}
+
+static void test_strv_split_empty(void) {
+ _cleanup_strv_free_ char **l = NULL;
+
+ l = strv_split("", WHITESPACE);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+
+ strv_free(l);
+ l = strv_split("", NULL);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+
+ strv_free(l);
+ l = strv_split_full("", NULL, 0);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+
+ strv_free(l);
+ l = strv_split_full("", NULL, SPLIT_QUOTES);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+
+ strv_free(l);
+ l = strv_split_full("", WHITESPACE, SPLIT_QUOTES);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+
+ strv_free(l);
+ l = strv_split_full("", WHITESPACE, SPLIT_QUOTES | SPLIT_RELAX);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+
+ strv_free(l);
+ l = strv_split(" ", WHITESPACE);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+
+ strv_free(l);
+ l = strv_split(" ", NULL);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+
+ strv_free(l);
+ l = strv_split_full(" ", NULL, 0);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+
+ strv_free(l);
+ l = strv_split_full(" ", WHITESPACE, SPLIT_QUOTES);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+
+ strv_free(l);
+ l = strv_split_full(" ", NULL, SPLIT_QUOTES);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+
+ strv_free(l);
+ l = strv_split_full(" ", NULL, SPLIT_QUOTES | SPLIT_RELAX);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+}
+
+static void test_strv_split_extract(void) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char *str = ":foo\\:bar::waldo:";
+ int r;
+
+ r = strv_split_extract(&l, str, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ assert_se(r == (int) strv_length(l));
+ assert_se(streq_ptr(l[0], ""));
+ assert_se(streq_ptr(l[1], "foo:bar"));
+ assert_se(streq_ptr(l[2], ""));
+ assert_se(streq_ptr(l[3], "waldo"));
+ assert_se(streq_ptr(l[4], ""));
+ assert_se(streq_ptr(l[5], NULL));
+}
+
+static void test_strv_split_newlines(void) {
+ unsigned i = 0;
+ char **s;
+ _cleanup_strv_free_ char **l = NULL;
+ const char str[] = "one\ntwo\nthree";
+
+ l = strv_split_newlines(str);
+
+ assert_se(l);
+
+ STRV_FOREACH(s, l) {
+ assert_se(streq(*s, input_table_multiple[i++]));
+ }
+}
+
+static void test_strv_split_nulstr(void) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char nulstr[] = "str0\0str1\0str2\0str3\0";
+
+ l = strv_split_nulstr (nulstr);
+ assert_se(l);
+
+ assert_se(streq(l[0], "str0"));
+ assert_se(streq(l[1], "str1"));
+ assert_se(streq(l[2], "str2"));
+ assert_se(streq(l[3], "str3"));
+}
+
+static void test_strv_parse_nulstr(void) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char nulstr[] = "hoge\0hoge2\0hoge3\0\0hoge5\0\0xxx";
+
+ l = strv_parse_nulstr(nulstr, sizeof(nulstr)-1);
+ assert_se(l);
+ puts("Parse nulstr:");
+ strv_print(l);
+
+ assert_se(streq(l[0], "hoge"));
+ assert_se(streq(l[1], "hoge2"));
+ assert_se(streq(l[2], "hoge3"));
+ assert_se(streq(l[3], ""));
+ assert_se(streq(l[4], "hoge5"));
+ assert_se(streq(l[5], ""));
+ assert_se(streq(l[6], "xxx"));
+}
+
+static void test_strv_overlap(void) {
+ const char * const input_table[] = {
+ "one",
+ "two",
+ "three",
+ NULL
+ };
+ const char * const input_table_overlap[] = {
+ "two",
+ NULL
+ };
+ const char * const input_table_unique[] = {
+ "four",
+ "five",
+ "six",
+ NULL
+ };
+
+ assert_se(strv_overlap((char **)input_table, (char**)input_table_overlap));
+ assert_se(!strv_overlap((char **)input_table, (char**)input_table_unique));
+}
+
+static void test_strv_sort(void) {
+ const char* input_table[] = {
+ "durian",
+ "apple",
+ "citrus",
+ "CAPITAL LETTERS FIRST",
+ "banana",
+ NULL
+ };
+
+ strv_sort((char **)input_table);
+
+ assert_se(streq(input_table[0], "CAPITAL LETTERS FIRST"));
+ assert_se(streq(input_table[1], "apple"));
+ assert_se(streq(input_table[2], "banana"));
+ assert_se(streq(input_table[3], "citrus"));
+ assert_se(streq(input_table[4], "durian"));
+}
+
+static void test_strv_extend_strv_concat(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL;
+
+ a = strv_new("without", "suffix");
+ b = strv_new("with", "suffix");
+ assert_se(a);
+ assert_se(b);
+
+ assert_se(strv_extend_strv_concat(&a, b, "_suffix") >= 0);
+
+ assert_se(streq(a[0], "without"));
+ assert_se(streq(a[1], "suffix"));
+ assert_se(streq(a[2], "with_suffix"));
+ assert_se(streq(a[3], "suffix_suffix"));
+}
+
+static void test_strv_extend_strv(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL, **n = NULL;
+
+ a = strv_new("abc", "def", "ghi");
+ b = strv_new("jkl", "mno", "abc", "pqr");
+ assert_se(a);
+ assert_se(b);
+
+ assert_se(strv_extend_strv(&a, b, true) == 3);
+
+ assert_se(streq(a[0], "abc"));
+ assert_se(streq(a[1], "def"));
+ assert_se(streq(a[2], "ghi"));
+ assert_se(streq(a[3], "jkl"));
+ assert_se(streq(a[4], "mno"));
+ assert_se(streq(a[5], "pqr"));
+ assert_se(strv_length(a) == 6);
+
+ assert_se(strv_extend_strv(&n, b, false) >= 0);
+ assert_se(streq(n[0], "jkl"));
+ assert_se(streq(n[1], "mno"));
+ assert_se(streq(n[2], "abc"));
+ assert_se(streq(n[3], "pqr"));
+ assert_se(strv_length(n) == 4);
+}
+
+static void test_strv_extend(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL;
+
+ a = strv_new("test", "test1");
+ assert_se(a);
+ assert_se(strv_extend(&a, "test2") >= 0);
+ assert_se(strv_extend(&b, "test3") >= 0);
+
+ assert_se(streq(a[0], "test"));
+ assert_se(streq(a[1], "test1"));
+ assert_se(streq(a[2], "test2"));
+ assert_se(streq(b[0], "test3"));
+}
+
+static void test_strv_extendf(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL;
+
+ a = strv_new("test", "test1");
+ assert_se(a);
+ assert_se(strv_extendf(&a, "test2 %s %d %s", "foo", 128, "bar") >= 0);
+ assert_se(strv_extendf(&b, "test3 %s %s %d", "bar", "foo", 128) >= 0);
+
+ assert_se(streq(a[0], "test"));
+ assert_se(streq(a[1], "test1"));
+ assert_se(streq(a[2], "test2 foo 128 bar"));
+ assert_se(streq(b[0], "test3 bar foo 128"));
+}
+
+static void test_strv_foreach(void) {
+ _cleanup_strv_free_ char **a;
+ unsigned i = 0;
+ char **check;
+
+ a = strv_new("one", "two", "three");
+
+ assert_se(a);
+
+ STRV_FOREACH(check, a) {
+ assert_se(streq(*check, input_table_multiple[i++]));
+ }
+}
+
+static void test_strv_foreach_backwards(void) {
+ _cleanup_strv_free_ char **a;
+ unsigned i = 2;
+ char **check;
+
+ a = strv_new("one", "two", "three");
+
+ assert_se(a);
+
+ STRV_FOREACH_BACKWARDS(check, a)
+ assert_se(streq_ptr(*check, input_table_multiple[i--]));
+
+ STRV_FOREACH_BACKWARDS(check, (char**) NULL)
+ assert_not_reached("Let's see that we check empty strv right, too.");
+
+ STRV_FOREACH_BACKWARDS(check, (char**) { NULL })
+ assert_not_reached("Let's see that we check empty strv right, too.");
+}
+
+static void test_strv_foreach_pair(void) {
+ _cleanup_strv_free_ char **a = NULL;
+ char **x, **y;
+
+ a = strv_new("pair_one", "pair_one",
+ "pair_two", "pair_two",
+ "pair_three", "pair_three");
+
+ STRV_FOREACH_PAIR(x, y, a) {
+ assert_se(streq(*x, *y));
+ }
+}
+
+static void test_strv_from_stdarg_alloca_one(char **l, const char *first, ...) {
+ char **j;
+ unsigned i;
+
+ j = strv_from_stdarg_alloca(first);
+
+ for (i = 0;; i++) {
+ assert_se(streq_ptr(l[i], j[i]));
+
+ if (!l[i])
+ break;
+ }
+}
+
+static void test_strv_from_stdarg_alloca(void) {
+ test_strv_from_stdarg_alloca_one(STRV_MAKE("foo", "bar"), "foo", "bar", NULL);
+ test_strv_from_stdarg_alloca_one(STRV_MAKE("foo"), "foo", NULL);
+ test_strv_from_stdarg_alloca_one(STRV_MAKE_EMPTY, NULL);
+}
+
+static void test_strv_insert(void) {
+ _cleanup_strv_free_ char **a = NULL;
+
+ assert_se(strv_insert(&a, 0, strdup("first")) == 0);
+ assert_se(streq(a[0], "first"));
+ assert_se(!a[1]);
+
+ assert_se(strv_insert(&a, 0, NULL) == 0);
+ assert_se(streq(a[0], "first"));
+ assert_se(!a[1]);
+
+ assert_se(strv_insert(&a, 1, strdup("two")) == 0);
+ assert_se(streq(a[0], "first"));
+ assert_se(streq(a[1], "two"));
+ assert_se(!a[2]);
+
+ assert_se(strv_insert(&a, 4, strdup("tri")) == 0);
+ assert_se(streq(a[0], "first"));
+ assert_se(streq(a[1], "two"));
+ assert_se(streq(a[2], "tri"));
+ assert_se(!a[3]);
+
+ assert_se(strv_insert(&a, 1, strdup("duo")) == 0);
+ assert_se(streq(a[0], "first"));
+ assert_se(streq(a[1], "duo"));
+ assert_se(streq(a[2], "two"));
+ assert_se(streq(a[3], "tri"));
+ assert_se(!a[4]);
+}
+
+static void test_strv_push_prepend(void) {
+ _cleanup_strv_free_ char **a = NULL;
+
+ a = strv_new("foo", "bar", "three");
+
+ assert_se(strv_push_prepend(&a, strdup("first")) >= 0);
+ assert_se(streq(a[0], "first"));
+ assert_se(streq(a[1], "foo"));
+ assert_se(streq(a[2], "bar"));
+ assert_se(streq(a[3], "three"));
+ assert_se(!a[4]);
+
+ assert_se(strv_consume_prepend(&a, strdup("first2")) >= 0);
+ assert_se(streq(a[0], "first2"));
+ assert_se(streq(a[1], "first"));
+ assert_se(streq(a[2], "foo"));
+ assert_se(streq(a[3], "bar"));
+ assert_se(streq(a[4], "three"));
+ assert_se(!a[5]);
+}
+
+static void test_strv_push(void) {
+ _cleanup_strv_free_ char **a = NULL;
+ char *i, *j;
+
+ assert_se(i = strdup("foo"));
+ assert_se(strv_push(&a, i) >= 0);
+
+ assert_se(i = strdup("a"));
+ assert_se(j = strdup("b"));
+ assert_se(strv_push_pair(&a, i, j) >= 0);
+
+ assert_se(streq_ptr(a[0], "foo"));
+ assert_se(streq_ptr(a[1], "a"));
+ assert_se(streq_ptr(a[2], "b"));
+ assert_se(streq_ptr(a[3], NULL));
+}
+
+static void test_strv_equal(void) {
+ _cleanup_strv_free_ char **a = NULL;
+ _cleanup_strv_free_ char **b = NULL;
+ _cleanup_strv_free_ char **c = NULL;
+
+ a = strv_new("one", "two", "three");
+ assert_se(a);
+ b = strv_new("one", "two", "three");
+ assert_se(a);
+ c = strv_new("one", "two", "three", "four");
+ assert_se(a);
+
+ assert_se(strv_equal(a, a));
+ assert_se(strv_equal(a, b));
+ assert_se(strv_equal(NULL, NULL));
+
+ assert_se(!strv_equal(a, c));
+ assert_se(!strv_equal(b, c));
+ assert_se(!strv_equal(b, NULL));
+}
+
+static void test_strv_is_uniq(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL, **c = NULL, **d = NULL;
+
+ a = strv_new(NULL);
+ assert_se(a);
+ assert_se(strv_is_uniq(a));
+
+ b = strv_new("foo");
+ assert_se(b);
+ assert_se(strv_is_uniq(b));
+
+ c = strv_new("foo", "bar");
+ assert_se(c);
+ assert_se(strv_is_uniq(c));
+
+ d = strv_new("foo", "bar", "waldo", "bar", "piep");
+ assert_se(d);
+ assert_se(!strv_is_uniq(d));
+}
+
+static void test_strv_reverse(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL, **c = NULL, **d = NULL;
+
+ a = strv_new(NULL);
+ assert_se(a);
+
+ strv_reverse(a);
+ assert_se(strv_isempty(a));
+
+ b = strv_new("foo");
+ assert_se(b);
+ strv_reverse(b);
+ assert_se(streq_ptr(b[0], "foo"));
+ assert_se(streq_ptr(b[1], NULL));
+
+ c = strv_new("foo", "bar");
+ assert_se(c);
+ strv_reverse(c);
+ assert_se(streq_ptr(c[0], "bar"));
+ assert_se(streq_ptr(c[1], "foo"));
+ assert_se(streq_ptr(c[2], NULL));
+
+ d = strv_new("foo", "bar", "waldo");
+ assert_se(d);
+ strv_reverse(d);
+ assert_se(streq_ptr(d[0], "waldo"));
+ assert_se(streq_ptr(d[1], "bar"));
+ assert_se(streq_ptr(d[2], "foo"));
+ assert_se(streq_ptr(d[3], NULL));
+}
+
+static void test_strv_shell_escape(void) {
+ _cleanup_strv_free_ char **v = NULL;
+
+ v = strv_new("foo:bar", "bar,baz", "wal\\do");
+ assert_se(v);
+ assert_se(strv_shell_escape(v, ",:"));
+ assert_se(streq_ptr(v[0], "foo\\:bar"));
+ assert_se(streq_ptr(v[1], "bar\\,baz"));
+ assert_se(streq_ptr(v[2], "wal\\\\do"));
+ assert_se(streq_ptr(v[3], NULL));
+}
+
+static void test_strv_skip_one(char **a, size_t n, char **b) {
+ a = strv_skip(a, n);
+ assert_se(strv_equal(a, b));
+}
+
+static void test_strv_skip(void) {
+ test_strv_skip_one(STRV_MAKE("foo", "bar", "baz"), 0, STRV_MAKE("foo", "bar", "baz"));
+ test_strv_skip_one(STRV_MAKE("foo", "bar", "baz"), 1, STRV_MAKE("bar", "baz"));
+ test_strv_skip_one(STRV_MAKE("foo", "bar", "baz"), 2, STRV_MAKE("baz"));
+ test_strv_skip_one(STRV_MAKE("foo", "bar", "baz"), 3, STRV_MAKE(NULL));
+ test_strv_skip_one(STRV_MAKE("foo", "bar", "baz"), 4, STRV_MAKE(NULL));
+ test_strv_skip_one(STRV_MAKE("foo", "bar", "baz"), 55, STRV_MAKE(NULL));
+
+ test_strv_skip_one(STRV_MAKE("quux"), 0, STRV_MAKE("quux"));
+ test_strv_skip_one(STRV_MAKE("quux"), 1, STRV_MAKE(NULL));
+ test_strv_skip_one(STRV_MAKE("quux"), 55, STRV_MAKE(NULL));
+
+ test_strv_skip_one(STRV_MAKE(NULL), 0, STRV_MAKE(NULL));
+ test_strv_skip_one(STRV_MAKE(NULL), 1, STRV_MAKE(NULL));
+ test_strv_skip_one(STRV_MAKE(NULL), 55, STRV_MAKE(NULL));
+}
+
+static void test_strv_extend_n(void) {
+ _cleanup_strv_free_ char **v = NULL;
+
+ v = strv_new("foo", "bar");
+ assert_se(v);
+
+ assert_se(strv_extend_n(&v, "waldo", 3) >= 0);
+ assert_se(strv_extend_n(&v, "piep", 2) >= 0);
+
+ assert_se(streq(v[0], "foo"));
+ assert_se(streq(v[1], "bar"));
+ assert_se(streq(v[2], "waldo"));
+ assert_se(streq(v[3], "waldo"));
+ assert_se(streq(v[4], "waldo"));
+ assert_se(streq(v[5], "piep"));
+ assert_se(streq(v[6], "piep"));
+ assert_se(v[7] == NULL);
+
+ v = strv_free(v);
+
+ assert_se(strv_extend_n(&v, "foo", 1) >= 0);
+ assert_se(strv_extend_n(&v, "bar", 0) >= 0);
+
+ assert_se(streq(v[0], "foo"));
+ assert_se(v[1] == NULL);
+}
+
+static void test_strv_make_nulstr_one(char **l) {
+ _cleanup_free_ char *b = NULL, *c = NULL;
+ _cleanup_strv_free_ char **q = NULL;
+ const char *s = NULL;
+ size_t n, m;
+ unsigned i = 0;
+
+ assert_se(strv_make_nulstr(l, &b, &n) >= 0);
+ assert_se(q = strv_parse_nulstr(b, n));
+ assert_se(strv_equal(l, q));
+
+ assert_se(strv_make_nulstr(q, &c, &m) >= 0);
+ assert_se(m == n);
+ assert_se(memcmp(b, c, m) == 0);
+
+ NULSTR_FOREACH(s, b)
+ assert_se(streq(s, l[i++]));
+ assert_se(i == strv_length(l));
+}
+
+static void test_strv_make_nulstr(void) {
+ test_strv_make_nulstr_one(NULL);
+ test_strv_make_nulstr_one(STRV_MAKE(NULL));
+ test_strv_make_nulstr_one(STRV_MAKE("foo"));
+ test_strv_make_nulstr_one(STRV_MAKE("foo", "bar"));
+ test_strv_make_nulstr_one(STRV_MAKE("foo", "bar", "quuux"));
+}
+
+static void test_strv_free_free(void) {
+ char ***t;
+
+ assert_se(t = new(char**, 3));
+ assert_se(t[0] = strv_new("a", "b"));
+ assert_se(t[1] = strv_new("c", "d", "e"));
+ t[2] = NULL;
+
+ t = strv_free_free(t);
+}
+
+static void test_foreach_string(void) {
+ const char * const t[] = {
+ "foo",
+ "bar",
+ "waldo",
+ NULL
+ };
+ const char *x;
+ unsigned i = 0;
+
+ FOREACH_STRING(x, "foo", "bar", "waldo")
+ assert_se(streq_ptr(t[i++], x));
+
+ assert_se(i == 3);
+
+ FOREACH_STRING(x, "zzz")
+ assert_se(streq(x, "zzz"));
+}
+
+static void test_strv_fnmatch(void) {
+ _cleanup_strv_free_ char **v = NULL;
+
+ assert_se(!strv_fnmatch(STRV_MAKE_EMPTY, "a", 0));
+
+ v = strv_new("*\\*");
+ assert_se(!strv_fnmatch(v, "\\", 0));
+ assert_se(strv_fnmatch(v, "\\", FNM_NOESCAPE));
+}
+
+int main(int argc, char *argv[]) {
+ test_specifier_printf();
+ test_str_in_set();
+ test_strptr_in_set();
+ test_startswith_set();
+ test_strv_foreach();
+ test_strv_foreach_backwards();
+ test_strv_foreach_pair();
+ test_strv_find();
+ test_strv_find_prefix();
+ test_strv_find_startswith();
+ test_strv_join();
+ test_strv_join_prefix();
+
+ test_strv_unquote(" foo=bar \"waldo\" zzz ", STRV_MAKE("foo=bar", "waldo", "zzz"));
+ test_strv_unquote("", STRV_MAKE_EMPTY);
+ test_strv_unquote(" ", STRV_MAKE_EMPTY);
+ test_strv_unquote(" ", STRV_MAKE_EMPTY);
+ test_strv_unquote(" x", STRV_MAKE("x"));
+ test_strv_unquote("x ", STRV_MAKE("x"));
+ test_strv_unquote(" x ", STRV_MAKE("x"));
+ test_strv_unquote(" \"x\" ", STRV_MAKE("x"));
+ test_strv_unquote(" 'x' ", STRV_MAKE("x"));
+ test_strv_unquote(" 'x\"' ", STRV_MAKE("x\""));
+ test_strv_unquote(" \"x'\" ", STRV_MAKE("x'"));
+ test_strv_unquote("a '--b=c \"d e\"'", STRV_MAKE("a", "--b=c \"d e\""));
+
+ /* trailing backslashes */
+ test_strv_unquote(" x\\\\", STRV_MAKE("x\\"));
+ test_invalid_unquote(" x\\");
+
+ test_invalid_unquote("a --b='c \"d e\"''");
+ test_invalid_unquote("a --b='c \"d e\" '\"");
+ test_invalid_unquote("a --b='c \"d e\"garbage");
+ test_invalid_unquote("'");
+ test_invalid_unquote("\"");
+ test_invalid_unquote("'x'y'g");
+
+ test_strv_split();
+ test_strv_split_empty();
+ test_strv_split_extract();
+ test_strv_split_newlines();
+ test_strv_split_nulstr();
+ test_strv_parse_nulstr();
+ test_strv_overlap();
+ test_strv_sort();
+ test_strv_extend_strv();
+ test_strv_extend_strv_concat();
+ test_strv_extend();
+ test_strv_extendf();
+ test_strv_from_stdarg_alloca();
+ test_strv_insert();
+ test_strv_push_prepend();
+ test_strv_push();
+ test_strv_equal();
+ test_strv_is_uniq();
+ test_strv_reverse();
+ test_strv_shell_escape();
+ test_strv_skip();
+ test_strv_extend_n();
+ test_strv_make_nulstr();
+ test_strv_free_free();
+
+ test_foreach_string();
+ test_strv_fnmatch();
+
+ return 0;
+}
diff --git a/src/test/test-strxcpyx.c b/src/test/test-strxcpyx.c
new file mode 100644
index 0000000..21d56d9
--- /dev/null
+++ b/src/test/test-strxcpyx.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "util.h"
+
+static void test_strpcpy(void) {
+ char target[25];
+ char *s = target;
+ size_t space_left;
+
+ space_left = sizeof(target);
+ space_left = strpcpy(&s, space_left, "12345");
+ space_left = strpcpy(&s, space_left, "hey hey hey");
+ space_left = strpcpy(&s, space_left, "waldo");
+ space_left = strpcpy(&s, space_left, "ba");
+ space_left = strpcpy(&s, space_left, "r");
+ space_left = strpcpy(&s, space_left, "foo");
+
+ assert_se(streq(target, "12345hey hey heywaldobar"));
+ assert_se(space_left == 0);
+}
+
+static void test_strpcpyf(void) {
+ char target[25];
+ char *s = target;
+ size_t space_left;
+
+ space_left = sizeof(target);
+ space_left = strpcpyf(&s, space_left, "space left: %zu. ", space_left);
+ space_left = strpcpyf(&s, space_left, "foo%s", "bar");
+
+ assert_se(streq(target, "space left: 25. foobar"));
+ assert_se(space_left == 3);
+
+ /* test overflow */
+ s = target;
+ space_left = strpcpyf(&s, 12, "00 left: %i. ", 999);
+ assert_se(streq(target, "00 left: 99"));
+ assert_se(space_left == 0);
+ assert_se(target[12] == '2');
+}
+
+static void test_strpcpyl(void) {
+ char target[25];
+ char *s = target;
+ size_t space_left;
+
+ space_left = sizeof(target);
+ space_left = strpcpyl(&s, space_left, "waldo", " test", " waldo. ", NULL);
+ space_left = strpcpyl(&s, space_left, "Banana", NULL);
+
+ assert_se(streq(target, "waldo test waldo. Banana"));
+ assert_se(space_left == 1);
+}
+
+static void test_strscpy(void) {
+ char target[25];
+ size_t space_left;
+
+ space_left = sizeof(target);
+ space_left = strscpy(target, space_left, "12345");
+
+ assert_se(streq(target, "12345"));
+ assert_se(space_left == 20);
+}
+
+static void test_strscpyl(void) {
+ char target[25];
+ size_t space_left;
+
+ space_left = sizeof(target);
+ space_left = strscpyl(target, space_left, "12345", "waldo", "waldo", NULL);
+
+ assert_se(streq(target, "12345waldowaldo"));
+ assert_se(space_left == 10);
+}
+
+int main(int argc, char *argv[]) {
+ test_strpcpy();
+ test_strpcpyf();
+ test_strpcpyl();
+ test_strscpy();
+ test_strscpyl();
+
+ return 0;
+}
diff --git a/src/test/test-systemd-tmpfiles.py b/src/test/test-systemd-tmpfiles.py
new file mode 100755
index 0000000..83a66e8
--- /dev/null
+++ b/src/test/test-systemd-tmpfiles.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+import os
+import sys
+import socket
+import subprocess
+import tempfile
+import pwd
+import grp
+
+try:
+ from systemd import id128
+except ImportError:
+ id128 = None
+
+EX_DATAERR = 65 # from sysexits.h
+EXIT_TEST_SKIP = 77
+
+try:
+ subprocess.run
+except AttributeError:
+ sys.exit(EXIT_TEST_SKIP)
+
+exe_with_args = sys.argv[1:]
+
+def test_line(line, *, user, returncode=EX_DATAERR, extra={}):
+ args = ['--user'] if user else []
+ print('Running {} on {!r}'.format(' '.join(exe_with_args + args), line))
+ c = subprocess.run(exe_with_args + ['--create', '-'] + args,
+ input=line, stdout=subprocess.PIPE, universal_newlines=True,
+ **extra)
+ assert c.returncode == returncode, c
+
+def test_invalids(*, user):
+ test_line('asdfa', user=user)
+ test_line('f "open quote', user=user)
+ test_line('f closed quote""', user=user)
+ test_line('Y /unknown/letter', user=user)
+ test_line('w non/absolute/path', user=user)
+ test_line('s', user=user) # s is for short
+ test_line('f!! /too/many/bangs', user=user)
+ test_line('f++ /too/many/plusses', user=user)
+ test_line('f+!+ /too/many/plusses', user=user)
+ test_line('f!+! /too/many/bangs', user=user)
+ test_line('w /unresolved/argument - - - - "%Y"', user=user)
+ test_line('w /unresolved/argument/sandwich - - - - "%v%Y%v"', user=user)
+ test_line('w /unresolved/filename/%Y - - - - "whatever"', user=user)
+ test_line('w /unresolved/filename/sandwich/%v%Y%v - - - - "whatever"', user=user)
+ test_line('w - - - - - "no file specfied"', user=user)
+ test_line('C - - - - - "no file specfied"', user=user)
+ test_line('C non/absolute/path - - - - -', user=user)
+ test_line('b - - - - - -', user=user)
+ test_line('b 1234 - - - - -', user=user)
+ test_line('c - - - - - -', user=user)
+ test_line('c 1234 - - - - -', user=user)
+ test_line('t - - -', user=user)
+ test_line('T - - -', user=user)
+ test_line('a - - -', user=user)
+ test_line('A - - -', user=user)
+ test_line('h - - -', user=user)
+ test_line('H - - -', user=user)
+
+def test_uninitialized_t():
+ if os.getuid() == 0:
+ return
+
+ test_line('w /foo - - - - "specifier for --user %t"',
+ user=True, returncode=0, extra={'env':{}})
+
+def test_content(line, expected, *, user, extra={}):
+ d = tempfile.TemporaryDirectory(prefix='test-systemd-tmpfiles.')
+ arg = d.name + '/arg'
+ spec = line.format(arg)
+ test_line(spec, user=user, returncode=0, extra=extra)
+ content = open(arg).read()
+ print('expect: {!r}\nactual: {!r}'.format(expected, content))
+ assert content == expected
+
+def test_valid_specifiers(*, user):
+ test_content('f {} - - - - two words', 'two words', user=user)
+ if id128:
+ try:
+ test_content('f {} - - - - %m', '{}'.format(id128.get_machine().hex), user=user)
+ except AssertionError as e:
+ print(e)
+ print('/etc/machine-id: {!r}'.format(open('/etc/machine-id').read()))
+ print('/proc/cmdline: {!r}'.format(open('/proc/cmdline').read()))
+ print('skipping')
+ test_content('f {} - - - - %b', '{}'.format(id128.get_boot().hex), user=user)
+ test_content('f {} - - - - %H', '{}'.format(socket.gethostname()), user=user)
+ test_content('f {} - - - - %v', '{}'.format(os.uname().release), user=user)
+ test_content('f {} - - - - %U', '{}'.format(os.getuid()), user=user)
+ test_content('f {} - - - - %G', '{}'.format(os.getgid()), user=user)
+
+ puser = pwd.getpwuid(os.getuid())
+ test_content('f {} - - - - %u', '{}'.format(puser.pw_name), user=user)
+
+ pgroup = grp.getgrgid(os.getgid())
+ test_content('f {} - - - - %g', '{}'.format(pgroup.gr_name), user=user)
+
+ # Note that %h is the only specifier in which we look the environment,
+ # because we check $HOME. Should we even be doing that?
+ home = os.path.expanduser("~")
+ test_content('f {} - - - - %h', '{}'.format(home), user=user)
+
+ xdg_runtime_dir = os.getenv('XDG_RUNTIME_DIR')
+ if xdg_runtime_dir is not None or not user:
+ test_content('f {} - - - - %t',
+ xdg_runtime_dir if user else '/run',
+ user=user)
+
+ xdg_config_home = os.getenv('XDG_CONFIG_HOME')
+ if xdg_config_home is not None or not user:
+ test_content('f {} - - - - %S',
+ xdg_config_home if user else '/var/lib',
+ user=user)
+
+ xdg_cache_home = os.getenv('XDG_CACHE_HOME')
+ if xdg_cache_home is not None or not user:
+ test_content('f {} - - - - %C',
+ xdg_cache_home if user else '/var/cache',
+ user=user)
+
+ if xdg_config_home is not None or not user:
+ test_content('f {} - - - - %L',
+ xdg_config_home + '/log' if user else '/var/log',
+ user=user)
+
+ test_content('f {} - - - - %%', '%', user=user)
+
+if __name__ == '__main__':
+ test_invalids(user=False)
+ test_invalids(user=True)
+ test_uninitialized_t()
+
+ test_valid_specifiers(user=False)
+ test_valid_specifiers(user=True)
diff --git a/src/test/test-tables.c b/src/test/test-tables.c
new file mode 100644
index 0000000..49268ea
--- /dev/null
+++ b/src/test/test-tables.c
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "architecture.h"
+#include "automount.h"
+#include "cgroup.h"
+#include "compress.h"
+#include "condition.h"
+#include "device-internal.h"
+#include "device.h"
+#include "execute.h"
+#include "import-util.h"
+#include "install.h"
+#include "job.h"
+#include "journald-server.h"
+#include "kill.h"
+#include "link-config.h"
+#include "locale-util.h"
+#include "log.h"
+#include "logs-show.h"
+#include "machine-image.h"
+#include "mount.h"
+#include "path.h"
+#include "process-util.h"
+#include "resolve-util.h"
+#include "rlimit-util.h"
+#include "scope.h"
+#include "service.h"
+#include "show-status.h"
+#include "slice.h"
+#include "socket-util.h"
+#include "socket.h"
+#include "swap.h"
+#include "target.h"
+#include "test-tables.h"
+#include "timer.h"
+#include "unit-name.h"
+#include "unit.h"
+#include "util.h"
+#include "virt.h"
+
+int main(int argc, char **argv) {
+ test_table(architecture, ARCHITECTURE);
+ test_table(assert_type, CONDITION_TYPE);
+ test_table(automount_result, AUTOMOUNT_RESULT);
+ test_table(automount_state, AUTOMOUNT_STATE);
+ test_table(cgroup_controller, CGROUP_CONTROLLER);
+ test_table(cgroup_device_policy, CGROUP_DEVICE_POLICY);
+ test_table(cgroup_io_limit_type, CGROUP_IO_LIMIT_TYPE);
+ test_table(collect_mode, COLLECT_MODE);
+ test_table(condition_result, CONDITION_RESULT);
+ test_table(condition_type, CONDITION_TYPE);
+ test_table(device_action, DEVICE_ACTION);
+ test_table(device_state, DEVICE_STATE);
+ test_table(dns_over_tls_mode, DNS_OVER_TLS_MODE);
+ test_table(dnssec_mode, DNSSEC_MODE);
+ test_table(emergency_action, EMERGENCY_ACTION);
+ test_table(exec_directory_type, EXEC_DIRECTORY_TYPE);
+ test_table(exec_input, EXEC_INPUT);
+ test_table(exec_keyring_mode, EXEC_KEYRING_MODE);
+ test_table(exec_output, EXEC_OUTPUT);
+ test_table(exec_preserve_mode, EXEC_PRESERVE_MODE);
+ test_table(exec_utmp_mode, EXEC_UTMP_MODE);
+ test_table(image_type, IMAGE_TYPE);
+ test_table(import_verify, IMPORT_VERIFY);
+ test_table(job_mode, JOB_MODE);
+ test_table(job_result, JOB_RESULT);
+ test_table(job_state, JOB_STATE);
+ test_table(job_type, JOB_TYPE);
+ test_table(kill_mode, KILL_MODE);
+ test_table(kill_who, KILL_WHO);
+ test_table(locale_variable, VARIABLE_LC);
+ test_table(log_target, LOG_TARGET);
+ test_table(mac_policy, MACPOLICY);
+ test_table(manager_state, MANAGER_STATE);
+ test_table(manager_timestamp, MANAGER_TIMESTAMP);
+ test_table(mount_exec_command, MOUNT_EXEC_COMMAND);
+ test_table(mount_result, MOUNT_RESULT);
+ test_table(mount_state, MOUNT_STATE);
+ test_table(name_policy, NAMEPOLICY);
+ test_table(namespace_type, NAMESPACE_TYPE);
+ test_table(notify_access, NOTIFY_ACCESS);
+ test_table(notify_state, NOTIFY_STATE);
+ test_table(output_mode, OUTPUT_MODE);
+ test_table(partition_designator, PARTITION_DESIGNATOR);
+ test_table(path_result, PATH_RESULT);
+ test_table(path_state, PATH_STATE);
+ test_table(path_type, PATH_TYPE);
+ test_table(protect_home, PROTECT_HOME);
+ test_table(protect_system, PROTECT_SYSTEM);
+ test_table(resolve_support, RESOLVE_SUPPORT);
+ test_table(rlimit, RLIMIT);
+ test_table(scope_result, SCOPE_RESULT);
+ test_table(scope_state, SCOPE_STATE);
+ test_table(service_exec_command, SERVICE_EXEC_COMMAND);
+ test_table(service_restart, SERVICE_RESTART);
+ test_table(service_result, SERVICE_RESULT);
+ test_table(service_state, SERVICE_STATE);
+ test_table(service_type, SERVICE_TYPE);
+ test_table(show_status, SHOW_STATUS);
+ test_table(slice_state, SLICE_STATE);
+ test_table(socket_address_bind_ipv6_only, SOCKET_ADDRESS_BIND_IPV6_ONLY);
+ test_table(socket_exec_command, SOCKET_EXEC_COMMAND);
+ test_table(socket_result, SOCKET_RESULT);
+ test_table(socket_state, SOCKET_STATE);
+ test_table(split_mode, SPLIT);
+ test_table(storage, STORAGE);
+ test_table(swap_exec_command, SWAP_EXEC_COMMAND);
+ test_table(swap_result, SWAP_RESULT);
+ test_table(swap_state, SWAP_STATE);
+ test_table(target_state, TARGET_STATE);
+ test_table(timer_base, TIMER_BASE);
+ test_table(timer_result, TIMER_RESULT);
+ test_table(timer_state, TIMER_STATE);
+ test_table(unit_active_state, UNIT_ACTIVE_STATE);
+ test_table(unit_dependency, UNIT_DEPENDENCY);
+ test_table(unit_file_change_type, UNIT_FILE_CHANGE_TYPE);
+ test_table(unit_file_preset_mode, UNIT_FILE_PRESET);
+ test_table(unit_file_state, UNIT_FILE_STATE);
+ test_table(unit_load_state, UNIT_LOAD_STATE);
+ test_table(unit_type, UNIT_TYPE);
+ test_table(virtualization, VIRTUALIZATION);
+
+ test_table_sparse(object_compressed, OBJECT_COMPRESSED);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-terminal-util.c b/src/test/test-terminal-util.c
new file mode 100644
index 0000000..958d369
--- /dev/null
+++ b/src/test/test-terminal-util.c
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void test_default_term_for_tty(void) {
+ puts(default_term_for_tty("/dev/tty23"));
+ puts(default_term_for_tty("/dev/ttyS23"));
+ puts(default_term_for_tty("/dev/tty0"));
+ puts(default_term_for_tty("/dev/pty0"));
+ puts(default_term_for_tty("/dev/pts/0"));
+ puts(default_term_for_tty("/dev/console"));
+ puts(default_term_for_tty("tty23"));
+ puts(default_term_for_tty("ttyS23"));
+ puts(default_term_for_tty("tty0"));
+ puts(default_term_for_tty("pty0"));
+ puts(default_term_for_tty("pts/0"));
+ puts(default_term_for_tty("console"));
+}
+
+static void test_read_one_char(void) {
+ _cleanup_fclose_ FILE *file = NULL;
+ char r;
+ bool need_nl;
+ char name[] = "/tmp/test-read_one_char.XXXXXX";
+
+ assert(fmkostemp_safe(name, "r+", &file) == 0);
+
+ assert_se(fputs("c\n", file) >= 0);
+ rewind(file);
+ assert_se(read_one_char(file, &r, 1000000, &need_nl) >= 0);
+ assert_se(!need_nl);
+ assert_se(r == 'c');
+ assert_se(read_one_char(file, &r, 1000000, &need_nl) < 0);
+
+ rewind(file);
+ assert_se(fputs("foobar\n", file) >= 0);
+ rewind(file);
+ assert_se(read_one_char(file, &r, 1000000, &need_nl) < 0);
+
+ rewind(file);
+ assert_se(fputs("\n", file) >= 0);
+ rewind(file);
+ assert_se(read_one_char(file, &r, 1000000, &need_nl) < 0);
+
+ unlink(name);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ test_default_term_for_tty();
+ test_read_one_char();
+
+ return 0;
+}
diff --git a/src/test/test-time-util.c b/src/test/test-time-util.c
new file mode 100644
index 0000000..eb6041c
--- /dev/null
+++ b/src/test/test-time-util.c
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "random-util.h"
+#include "serialize.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+static void test_parse_sec(void) {
+ usec_t u;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_sec("5s", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+ assert_se(parse_sec("5s500ms", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC + 500 * USEC_PER_MSEC);
+ assert_se(parse_sec(" 5s 500ms ", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC + 500 * USEC_PER_MSEC);
+ assert_se(parse_sec(" 5.5s ", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC + 500 * USEC_PER_MSEC);
+ assert_se(parse_sec(" 5.5s 0.5ms ", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC + 500 * USEC_PER_MSEC + 500);
+ assert_se(parse_sec(" .22s ", &u) >= 0);
+ assert_se(u == 220 * USEC_PER_MSEC);
+ assert_se(parse_sec(" .50y ", &u) >= 0);
+ assert_se(u == USEC_PER_YEAR / 2);
+ assert_se(parse_sec("2.5", &u) >= 0);
+ assert_se(u == 2500 * USEC_PER_MSEC);
+ assert_se(parse_sec(".7", &u) >= 0);
+ assert_se(u == 700 * USEC_PER_MSEC);
+ assert_se(parse_sec("23us", &u) >= 0);
+ assert_se(u == 23);
+ assert_se(parse_sec("23µs", &u) >= 0);
+ assert_se(u == 23);
+ assert_se(parse_sec("infinity", &u) >= 0);
+ assert_se(u == USEC_INFINITY);
+ assert_se(parse_sec(" infinity ", &u) >= 0);
+ assert_se(u == USEC_INFINITY);
+ assert_se(parse_sec("+3.1s", &u) >= 0);
+ assert_se(u == 3100 * USEC_PER_MSEC);
+ assert_se(parse_sec("3.1s.2", &u) >= 0);
+ assert_se(u == 3300 * USEC_PER_MSEC);
+ assert_se(parse_sec("3.1 .2", &u) >= 0);
+ assert_se(u == 3300 * USEC_PER_MSEC);
+ assert_se(parse_sec("3.1 sec .2 sec", &u) >= 0);
+ assert_se(u == 3300 * USEC_PER_MSEC);
+ assert_se(parse_sec("3.1 sec 1.2 sec", &u) >= 0);
+ assert_se(u == 4300 * USEC_PER_MSEC);
+
+ assert_se(parse_sec(" xyz ", &u) < 0);
+ assert_se(parse_sec("", &u) < 0);
+ assert_se(parse_sec(" . ", &u) < 0);
+ assert_se(parse_sec(" 5. ", &u) < 0);
+ assert_se(parse_sec(".s ", &u) < 0);
+ assert_se(parse_sec("-5s ", &u) < 0);
+ assert_se(parse_sec("-0.3s ", &u) < 0);
+ assert_se(parse_sec("-0.0s ", &u) < 0);
+ assert_se(parse_sec("-0.-0s ", &u) < 0);
+ assert_se(parse_sec("0.-0s ", &u) < 0);
+ assert_se(parse_sec("3.-0s ", &u) < 0);
+ assert_se(parse_sec(" infinity .7", &u) < 0);
+ assert_se(parse_sec(".3 infinity", &u) < 0);
+ assert_se(parse_sec("3.+1s", &u) < 0);
+ assert_se(parse_sec("3. 1s", &u) < 0);
+ assert_se(parse_sec("3.s", &u) < 0);
+ assert_se(parse_sec("12.34.56", &u) < 0);
+ assert_se(parse_sec("12..34", &u) < 0);
+ assert_se(parse_sec("..1234", &u) < 0);
+ assert_se(parse_sec("1234..", &u) < 0);
+}
+
+static void test_parse_sec_fix_0(void) {
+ usec_t u;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_sec_fix_0("5s", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+ assert_se(parse_sec_fix_0("0s", &u) >= 0);
+ assert_se(u == USEC_INFINITY);
+ assert_se(parse_sec_fix_0("0", &u) >= 0);
+ assert_se(u == USEC_INFINITY);
+ assert_se(parse_sec_fix_0(" 0", &u) >= 0);
+ assert_se(u == USEC_INFINITY);
+}
+
+static void test_parse_time(void) {
+ usec_t u;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_time("5", &u, 1) >= 0);
+ assert_se(u == 5);
+
+ assert_se(parse_time("5", &u, USEC_PER_MSEC) >= 0);
+ assert_se(u == 5 * USEC_PER_MSEC);
+
+ assert_se(parse_time("5", &u, USEC_PER_SEC) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+
+ assert_se(parse_time("5s", &u, 1) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+
+ assert_se(parse_time("5s", &u, USEC_PER_SEC) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+
+ assert_se(parse_time("5s", &u, USEC_PER_MSEC) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+
+ assert_se(parse_time("11111111111111y", &u, 1) == -ERANGE);
+ assert_se(parse_time("1.1111111111111y", &u, 1) >= 0);
+}
+
+static void test_parse_nsec(void) {
+ nsec_t u;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_nsec("5s", &u) >= 0);
+ assert_se(u == 5 * NSEC_PER_SEC);
+ assert_se(parse_nsec("5s500ms", &u) >= 0);
+ assert_se(u == 5 * NSEC_PER_SEC + 500 * NSEC_PER_MSEC);
+ assert_se(parse_nsec(" 5s 500ms ", &u) >= 0);
+ assert_se(u == 5 * NSEC_PER_SEC + 500 * NSEC_PER_MSEC);
+ assert_se(parse_nsec(" 5.5s ", &u) >= 0);
+ assert_se(u == 5 * NSEC_PER_SEC + 500 * NSEC_PER_MSEC);
+ assert_se(parse_nsec(" 5.5s 0.5ms ", &u) >= 0);
+ assert_se(u == 5 * NSEC_PER_SEC + 500 * NSEC_PER_MSEC + 500 * NSEC_PER_USEC);
+ assert_se(parse_nsec(" .22s ", &u) >= 0);
+ assert_se(u == 220 * NSEC_PER_MSEC);
+ assert_se(parse_nsec(" .50y ", &u) >= 0);
+ assert_se(u == NSEC_PER_YEAR / 2);
+ assert_se(parse_nsec("2.5", &u) >= 0);
+ assert_se(u == 2);
+ assert_se(parse_nsec(".7", &u) >= 0);
+ assert_se(u == 0);
+ assert_se(parse_nsec("infinity", &u) >= 0);
+ assert_se(u == NSEC_INFINITY);
+ assert_se(parse_nsec(" infinity ", &u) >= 0);
+ assert_se(u == NSEC_INFINITY);
+ assert_se(parse_nsec("+3.1s", &u) >= 0);
+ assert_se(u == 3100 * NSEC_PER_MSEC);
+ assert_se(parse_nsec("3.1s.2", &u) >= 0);
+ assert_se(u == 3100 * NSEC_PER_MSEC);
+ assert_se(parse_nsec("3.1 .2s", &u) >= 0);
+ assert_se(u == 200 * NSEC_PER_MSEC + 3);
+ assert_se(parse_nsec("3.1 sec .2 sec", &u) >= 0);
+ assert_se(u == 3300 * NSEC_PER_MSEC);
+ assert_se(parse_nsec("3.1 sec 1.2 sec", &u) >= 0);
+ assert_se(u == 4300 * NSEC_PER_MSEC);
+
+ assert_se(parse_nsec(" xyz ", &u) < 0);
+ assert_se(parse_nsec("", &u) < 0);
+ assert_se(parse_nsec(" . ", &u) < 0);
+ assert_se(parse_nsec(" 5. ", &u) < 0);
+ assert_se(parse_nsec(".s ", &u) < 0);
+ assert_se(parse_nsec(" infinity .7", &u) < 0);
+ assert_se(parse_nsec(".3 infinity", &u) < 0);
+ assert_se(parse_nsec("-5s ", &u) < 0);
+ assert_se(parse_nsec("-0.3s ", &u) < 0);
+ assert_se(parse_nsec("-0.0s ", &u) < 0);
+ assert_se(parse_nsec("-0.-0s ", &u) < 0);
+ assert_se(parse_nsec("0.-0s ", &u) < 0);
+ assert_se(parse_nsec("3.-0s ", &u) < 0);
+ assert_se(parse_nsec(" infinity .7", &u) < 0);
+ assert_se(parse_nsec(".3 infinity", &u) < 0);
+ assert_se(parse_nsec("3.+1s", &u) < 0);
+ assert_se(parse_nsec("3. 1s", &u) < 0);
+ assert_se(parse_nsec("3.s", &u) < 0);
+ assert_se(parse_nsec("12.34.56", &u) < 0);
+ assert_se(parse_nsec("12..34", &u) < 0);
+ assert_se(parse_nsec("..1234", &u) < 0);
+ assert_se(parse_nsec("1234..", &u) < 0);
+ assert_se(parse_nsec("1111111111111y", &u) == -ERANGE);
+ assert_se(parse_nsec("1.111111111111y", &u) >= 0);
+}
+
+static void test_format_timespan_one(usec_t x, usec_t accuracy) {
+ char l[FORMAT_TIMESPAN_MAX];
+ const char *t;
+ usec_t y;
+
+ log_info(USEC_FMT" (at accuracy "USEC_FMT")", x, accuracy);
+
+ assert_se(t = format_timespan(l, sizeof l, x, accuracy));
+ log_info(" = <%s>", t);
+
+ assert_se(parse_sec(t, &y) >= 0);
+ log_info(" = "USEC_FMT, y);
+
+ if (accuracy <= 0)
+ accuracy = 1;
+
+ assert_se(x / accuracy == y / accuracy);
+}
+
+static void test_format_timespan(usec_t accuracy) {
+ log_info("/* %s accuracy="USEC_FMT" */", __func__, accuracy);
+
+ test_format_timespan_one(0, accuracy);
+ test_format_timespan_one(1, accuracy);
+ test_format_timespan_one(1*USEC_PER_SEC, accuracy);
+ test_format_timespan_one(999*USEC_PER_MSEC, accuracy);
+ test_format_timespan_one(1234567, accuracy);
+ test_format_timespan_one(12, accuracy);
+ test_format_timespan_one(123, accuracy);
+ test_format_timespan_one(1234, accuracy);
+ test_format_timespan_one(12345, accuracy);
+ test_format_timespan_one(123456, accuracy);
+ test_format_timespan_one(1234567, accuracy);
+ test_format_timespan_one(12345678, accuracy);
+ test_format_timespan_one(1200000, accuracy);
+ test_format_timespan_one(1230000, accuracy);
+ test_format_timespan_one(1234000, accuracy);
+ test_format_timespan_one(1234500, accuracy);
+ test_format_timespan_one(1234560, accuracy);
+ test_format_timespan_one(1234567, accuracy);
+ test_format_timespan_one(986087, accuracy);
+ test_format_timespan_one(500 * USEC_PER_MSEC, accuracy);
+ test_format_timespan_one(9*USEC_PER_YEAR/5 - 23, accuracy);
+ test_format_timespan_one(USEC_INFINITY, accuracy);
+}
+
+static void test_timezone_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(timezone_is_valid("Europe/Berlin", LOG_ERR));
+ assert_se(timezone_is_valid("Australia/Sydney", LOG_ERR));
+ assert_se(!timezone_is_valid("Europe/Do not exist", LOG_ERR));
+}
+
+static void test_get_timezones(void) {
+ _cleanup_strv_free_ char **zones = NULL;
+ int r;
+ char **zone;
+
+ log_info("/* %s */", __func__);
+
+ r = get_timezones(&zones);
+ assert_se(r == 0);
+
+ STRV_FOREACH(zone, zones)
+ assert_se(timezone_is_valid(*zone, LOG_ERR));
+}
+
+static void test_usec_add(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(usec_add(0, 0) == 0);
+ assert_se(usec_add(1, 4) == 5);
+ assert_se(usec_add(USEC_INFINITY, 5) == USEC_INFINITY);
+ assert_se(usec_add(5, USEC_INFINITY) == USEC_INFINITY);
+ assert_se(usec_add(USEC_INFINITY-5, 2) == USEC_INFINITY-3);
+ assert_se(usec_add(USEC_INFINITY-2, 2) == USEC_INFINITY);
+ assert_se(usec_add(USEC_INFINITY-1, 2) == USEC_INFINITY);
+ assert_se(usec_add(USEC_INFINITY, 2) == USEC_INFINITY);
+}
+
+static void test_usec_sub_unsigned(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(usec_sub_unsigned(0, 0) == 0);
+ assert_se(usec_sub_unsigned(0, 2) == 0);
+ assert_se(usec_sub_unsigned(0, USEC_INFINITY) == 0);
+ assert_se(usec_sub_unsigned(1, 0) == 1);
+ assert_se(usec_sub_unsigned(1, 1) == 0);
+ assert_se(usec_sub_unsigned(1, 2) == 0);
+ assert_se(usec_sub_unsigned(1, 3) == 0);
+ assert_se(usec_sub_unsigned(1, USEC_INFINITY) == 0);
+ assert_se(usec_sub_unsigned(USEC_INFINITY-1, 0) == USEC_INFINITY-1);
+ assert_se(usec_sub_unsigned(USEC_INFINITY-1, 1) == USEC_INFINITY-2);
+ assert_se(usec_sub_unsigned(USEC_INFINITY-1, 2) == USEC_INFINITY-3);
+ assert_se(usec_sub_unsigned(USEC_INFINITY-1, USEC_INFINITY-2) == 1);
+ assert_se(usec_sub_unsigned(USEC_INFINITY-1, USEC_INFINITY-1) == 0);
+ assert_se(usec_sub_unsigned(USEC_INFINITY-1, USEC_INFINITY) == 0);
+ assert_se(usec_sub_unsigned(USEC_INFINITY, 0) == USEC_INFINITY);
+ assert_se(usec_sub_unsigned(USEC_INFINITY, 1) == USEC_INFINITY);
+ assert_se(usec_sub_unsigned(USEC_INFINITY, 2) == USEC_INFINITY);
+ assert_se(usec_sub_unsigned(USEC_INFINITY, USEC_INFINITY) == USEC_INFINITY);
+}
+
+static void test_usec_sub_signed(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(usec_sub_signed(0, 0) == 0);
+ assert_se(usec_sub_signed(4, 1) == 3);
+ assert_se(usec_sub_signed(4, 4) == 0);
+ assert_se(usec_sub_signed(4, 5) == 0);
+ assert_se(usec_sub_signed(USEC_INFINITY-3, -3) == USEC_INFINITY);
+ assert_se(usec_sub_signed(USEC_INFINITY-3, -4) == USEC_INFINITY);
+ assert_se(usec_sub_signed(USEC_INFINITY-3, -5) == USEC_INFINITY);
+ assert_se(usec_sub_signed(USEC_INFINITY, 5) == USEC_INFINITY);
+}
+
+static void test_format_timestamp(void) {
+ unsigned i;
+
+ log_info("/* %s */", __func__);
+
+ for (i = 0; i < 100; i++) {
+ char buf[MAX(FORMAT_TIMESTAMP_MAX, FORMAT_TIMESPAN_MAX)];
+ usec_t x, y;
+
+ random_bytes(&x, sizeof(x));
+ x = x % (2147483600 * USEC_PER_SEC) + 1;
+
+ assert_se(format_timestamp(buf, sizeof(buf), x));
+ log_info("%s", buf);
+ assert_se(parse_timestamp(buf, &y) >= 0);
+ assert_se(x / USEC_PER_SEC == y / USEC_PER_SEC);
+
+ assert_se(format_timestamp_utc(buf, sizeof(buf), x));
+ log_info("%s", buf);
+ assert_se(parse_timestamp(buf, &y) >= 0);
+ assert_se(x / USEC_PER_SEC == y / USEC_PER_SEC);
+
+ assert_se(format_timestamp_us(buf, sizeof(buf), x));
+ log_info("%s", buf);
+ assert_se(parse_timestamp(buf, &y) >= 0);
+ assert_se(x == y);
+
+ assert_se(format_timestamp_us_utc(buf, sizeof(buf), x));
+ log_info("%s", buf);
+ assert_se(parse_timestamp(buf, &y) >= 0);
+ assert_se(x == y);
+
+ assert_se(format_timestamp_relative(buf, sizeof(buf), x));
+ log_info("%s", buf);
+ assert_se(parse_timestamp(buf, &y) >= 0);
+
+ /* The two calls above will run with a slightly different local time. Make sure we are in the same
+ * range however, but give enough leeway that this is unlikely to explode. And of course,
+ * format_timestamp_relative() scales the accuracy with the distance from the current time up to one
+ * month, cover for that too. */
+ assert_se(y > x ? y - x : x - y <= USEC_PER_MONTH + USEC_PER_DAY);
+ }
+}
+
+static void test_format_timestamp_utc_one(usec_t val, const char *result) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ const char *t;
+
+ t = format_timestamp_utc(buf, sizeof(buf), val);
+ assert_se(streq_ptr(t, result));
+}
+
+static void test_format_timestamp_utc(void) {
+ log_info("/* %s */", __func__);
+
+ test_format_timestamp_utc_one(0, NULL);
+ test_format_timestamp_utc_one(1, "Thu 1970-01-01 00:00:00 UTC");
+ test_format_timestamp_utc_one(USEC_PER_SEC, "Thu 1970-01-01 00:00:01 UTC");
+
+#if SIZEOF_TIME_T == 8
+ test_format_timestamp_utc_one(USEC_TIMESTAMP_FORMATTABLE_MAX, "Thu 9999-12-30 23:59:59 UTC");
+ test_format_timestamp_utc_one(USEC_TIMESTAMP_FORMATTABLE_MAX + 1, "--- XXXX-XX-XX XX:XX:XX");
+#elif SIZEOF_TIME_T == 4
+ test_format_timestamp_utc_one(USEC_TIMESTAMP_FORMATTABLE_MAX, "Tue 2038-01-19 03:14:07 UTC");
+ test_format_timestamp_utc_one(USEC_TIMESTAMP_FORMATTABLE_MAX + 1, "--- XXXX-XX-XX XX:XX:XX");
+#endif
+
+ test_format_timestamp_utc_one(USEC_INFINITY, NULL);
+}
+
+static void test_deserialize_dual_timestamp(void) {
+ int r;
+ dual_timestamp t;
+
+ log_info("/* %s */", __func__);
+
+ r = deserialize_dual_timestamp("1234 5678", &t);
+ assert_se(r == 0);
+ assert_se(t.realtime == 1234);
+ assert_se(t.monotonic == 5678);
+
+ r = deserialize_dual_timestamp("1234x 5678", &t);
+ assert_se(r == -EINVAL);
+
+ r = deserialize_dual_timestamp("1234 5678y", &t);
+ assert_se(r == -EINVAL);
+
+ r = deserialize_dual_timestamp("-1234 5678", &t);
+ assert_se(r == -EINVAL);
+
+ r = deserialize_dual_timestamp("1234 -5678", &t);
+ assert_se(r == -EINVAL);
+
+ /* Check that output wasn't modified. */
+ assert_se(t.realtime == 1234);
+ assert_se(t.monotonic == 5678);
+
+ r = deserialize_dual_timestamp("+123 567", &t);
+ assert_se(r == 0);
+ assert_se(t.realtime == 123);
+ assert_se(t.monotonic == 567);
+
+ /* Check that we get "infinity" on overflow. */
+ r = deserialize_dual_timestamp("18446744073709551617 0", &t);
+ assert_se(r == 0);
+ assert_se(t.realtime == USEC_INFINITY);
+ assert_se(t.monotonic == 0);
+}
+
+static void assert_similar(usec_t a, usec_t b) {
+ usec_t d;
+
+ if (a > b)
+ d = a - b;
+ else
+ d = b - a;
+
+ assert(d < 10*USEC_PER_SEC);
+}
+
+static void test_usec_shift_clock(void) {
+ usec_t rt, mn, bt;
+
+ log_info("/* %s */", __func__);
+
+ rt = now(CLOCK_REALTIME);
+ mn = now(CLOCK_MONOTONIC);
+ bt = now(clock_boottime_or_monotonic());
+
+ assert_se(usec_shift_clock(USEC_INFINITY, CLOCK_REALTIME, CLOCK_MONOTONIC) == USEC_INFINITY);
+
+ assert_similar(usec_shift_clock(rt + USEC_PER_HOUR, CLOCK_REALTIME, CLOCK_MONOTONIC), mn + USEC_PER_HOUR);
+ assert_similar(usec_shift_clock(rt + 2*USEC_PER_HOUR, CLOCK_REALTIME, clock_boottime_or_monotonic()), bt + 2*USEC_PER_HOUR);
+ assert_se(usec_shift_clock(rt + 3*USEC_PER_HOUR, CLOCK_REALTIME, CLOCK_REALTIME_ALARM) == rt + 3*USEC_PER_HOUR);
+
+ assert_similar(usec_shift_clock(mn + 4*USEC_PER_HOUR, CLOCK_MONOTONIC, CLOCK_REALTIME_ALARM), rt + 4*USEC_PER_HOUR);
+ assert_similar(usec_shift_clock(mn + 5*USEC_PER_HOUR, CLOCK_MONOTONIC, clock_boottime_or_monotonic()), bt + 5*USEC_PER_HOUR);
+ assert_se(usec_shift_clock(mn + 6*USEC_PER_HOUR, CLOCK_MONOTONIC, CLOCK_MONOTONIC) == mn + 6*USEC_PER_HOUR);
+
+ assert_similar(usec_shift_clock(bt + 7*USEC_PER_HOUR, clock_boottime_or_monotonic(), CLOCK_MONOTONIC), mn + 7*USEC_PER_HOUR);
+ assert_similar(usec_shift_clock(bt + 8*USEC_PER_HOUR, clock_boottime_or_monotonic(), CLOCK_REALTIME_ALARM), rt + 8*USEC_PER_HOUR);
+ assert_se(usec_shift_clock(bt + 9*USEC_PER_HOUR, clock_boottime_or_monotonic(), clock_boottime_or_monotonic()) == bt + 9*USEC_PER_HOUR);
+
+ if (mn > USEC_PER_MINUTE) {
+ assert_similar(usec_shift_clock(rt - 30 * USEC_PER_SEC, CLOCK_REALTIME_ALARM, CLOCK_MONOTONIC), mn - 30 * USEC_PER_SEC);
+ assert_similar(usec_shift_clock(rt - 50 * USEC_PER_SEC, CLOCK_REALTIME, clock_boottime_or_monotonic()), bt - 50 * USEC_PER_SEC);
+ }
+}
+
+static void test_in_utc_timezone(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(setenv("TZ", ":UTC", 1) >= 0);
+ assert_se(in_utc_timezone());
+ assert_se(streq(tzname[0], "UTC"));
+ assert_se(streq(tzname[1], "UTC"));
+ assert_se(timezone == 0);
+ assert_se(daylight == 0);
+
+ assert_se(setenv("TZ", "Europe/Berlin", 1) >= 0);
+ assert_se(!in_utc_timezone());
+ assert_se(streq(tzname[0], "CET"));
+ assert_se(streq(tzname[1], "CEST"));
+
+ assert_se(unsetenv("TZ") >= 0);
+}
+
+int main(int argc, char *argv[]) {
+ uintmax_t x;
+
+ log_info("realtime=" USEC_FMT "\n"
+ "monotonic=" USEC_FMT "\n"
+ "boottime=" USEC_FMT "\n",
+ now(CLOCK_REALTIME),
+ now(CLOCK_MONOTONIC),
+ now(clock_boottime_or_monotonic()));
+
+ test_parse_sec();
+ test_parse_sec_fix_0();
+ test_parse_time();
+ test_parse_nsec();
+ test_format_timespan(1);
+ test_format_timespan(USEC_PER_MSEC);
+ test_format_timespan(USEC_PER_SEC);
+ test_timezone_is_valid();
+ test_get_timezones();
+ test_usec_add();
+ test_usec_sub_signed();
+ test_usec_sub_unsigned();
+ test_format_timestamp();
+ test_format_timestamp_utc();
+ test_deserialize_dual_timestamp();
+ test_usec_shift_clock();
+ test_in_utc_timezone();
+
+ /* Ensure time_t is signed */
+ assert_cc((time_t) -1 < (time_t) 1);
+
+ /* Ensure TIME_T_MAX works correctly */
+ x = (uintmax_t) TIME_T_MAX;
+ x++;
+ assert((time_t) x < 0);
+
+ return 0;
+}
diff --git a/src/test/test-tmpfiles.c b/src/test/test-tmpfiles.c
new file mode 100644
index 0000000..b526871
--- /dev/null
+++ b/src/test/test-tmpfiles.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+int main(int argc, char** argv) {
+ _cleanup_free_ char *cmd = NULL, *cmd2 = NULL, *ans = NULL, *ans2 = NULL, *d = NULL, *tmp = NULL, *line = NULL;
+ _cleanup_close_ int fd = -1, fd2 = -1;
+ const char *p = argv[1] ?: "/tmp";
+ char *pattern;
+
+ test_setup_logging(LOG_DEBUG);
+
+ pattern = strjoina(p, "/systemd-test-XXXXXX");
+
+ fd = open_tmpfile_unlinkable(p, O_RDWR|O_CLOEXEC);
+ assert_se(fd >= 0);
+
+ assert_se(asprintf(&cmd, "ls -l /proc/"PID_FMT"/fd/%d", getpid_cached(), fd) > 0);
+ (void) system(cmd);
+ assert_se(readlink_malloc(cmd + 6, &ans) >= 0);
+ log_debug("link1: %s", ans);
+ assert_se(endswith(ans, " (deleted)"));
+
+ fd2 = mkostemp_safe(pattern);
+ assert_se(fd >= 0);
+ assert_se(unlink(pattern) == 0);
+
+ assert_se(asprintf(&cmd2, "ls -l /proc/"PID_FMT"/fd/%d", getpid_cached(), fd2) > 0);
+ (void) system(cmd2);
+ assert_se(readlink_malloc(cmd2 + 6, &ans2) >= 0);
+ log_debug("link2: %s", ans2);
+ assert_se(endswith(ans2, " (deleted)"));
+
+ pattern = strjoina(p, "/tmpfiles-test");
+ assert_se(tempfn_random(pattern, NULL, &d) >= 0);
+
+ fd = open_tmpfile_linkable(d, O_RDWR|O_CLOEXEC, &tmp);
+ assert_se(fd >= 0);
+ assert_se(write(fd, "foobar\n", 7) == 7);
+
+ assert_se(touch(d) >= 0);
+ assert_se(link_tmpfile(fd, tmp, d) == -EEXIST);
+ assert_se(unlink(d) >= 0);
+ assert_se(link_tmpfile(fd, tmp, d) >= 0);
+
+ assert_se(read_one_line_file(d, &line) >= 0);
+ assert_se(streq(line, "foobar"));
+ assert_se(unlink(d) >= 0);
+
+ return 0;
+}
diff --git a/src/test/test-udev.c b/src/test/test-udev.c
new file mode 100644
index 0000000..ab31f5a
--- /dev/null
+++ b/src/test/test-udev.c
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2003-2004 Greg Kroah-Hartman <greg@kroah.com>
+***/
+
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/signalfd.h>
+#include <unistd.h>
+
+#include "build.h"
+#include "device-private.h"
+#include "fs-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "udev.h"
+
+static int fake_filesystems(void) {
+ static const struct fakefs {
+ const char *src;
+ const char *target;
+ const char *error;
+ bool ignore_mount_error;
+ } fakefss[] = {
+ { "test/tmpfs/sys", "/sys", "Failed to mount test /sys", false },
+ { "test/tmpfs/dev", "/dev", "Failed to mount test /dev", false },
+ { "test/run", "/run", "Failed to mount test /run", false },
+ { "test/run", "/etc/udev/rules.d", "Failed to mount empty /etc/udev/rules.d", true },
+ { "test/run", UDEVLIBEXECDIR "/rules.d", "Failed to mount empty " UDEVLIBEXECDIR "/rules.d", true },
+ };
+ unsigned i;
+
+ if (unshare(CLONE_NEWNS) < 0)
+ return log_error_errno(errno, "Failed to call unshare(): %m");
+
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
+ return log_error_errno(errno, "Failed to mount / as private: %m");
+
+ for (i = 0; i < ELEMENTSOF(fakefss); i++)
+ if (mount(fakefss[i].src, fakefss[i].target, NULL, MS_BIND, NULL) < 0) {
+ log_full_errno(fakefss[i].ignore_mount_error ? LOG_DEBUG : LOG_ERR, errno, "%s: %m", fakefss[i].error);
+ if (!fakefss[i].ignore_mount_error)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(udev_rules_freep) UdevRules *rules = NULL;
+ _cleanup_(udev_event_freep) UdevEvent *event = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ const char *devpath, *devname, *action;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ if (!IN_SET(argc, 2, 3)) {
+ log_error("This program needs one or two arguments, %d given", argc - 1);
+ return -EINVAL;
+ }
+
+ r = fake_filesystems();
+ if (r < 0)
+ return r;
+
+ if (argc == 2) {
+ if (!streq(argv[1], "check")) {
+ log_error("Unknown argument: %s", argv[1]);
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+
+ log_debug("version %s", GIT_VERSION);
+ mac_selinux_init();
+
+ action = argv[1];
+ devpath = argv[2];
+
+ assert_se(udev_rules_new(&rules, RESOLVE_NAME_EARLY) == 0);
+
+ const char *syspath = strjoina("/sys", devpath);
+ r = device_new_from_synthetic_event(&dev, syspath, action);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to open device '%s'", devpath);
+
+ assert_se(event = udev_event_new(dev, 0, NULL));
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, SIGHUP, SIGCHLD, -1) >= 0);
+
+ /* do what devtmpfs usually provides us */
+ if (sd_device_get_devname(dev, &devname) >= 0) {
+ const char *subsystem;
+ mode_t mode = 0600;
+
+ if (sd_device_get_subsystem(dev, &subsystem) >= 0 && streq(subsystem, "block"))
+ mode |= S_IFBLK;
+ else
+ mode |= S_IFCHR;
+
+ if (!streq(action, "remove")) {
+ dev_t devnum = makedev(0, 0);
+
+ (void) mkdir_parents_label(devname, 0755);
+ (void) sd_device_get_devnum(dev, &devnum);
+ if (mknod(devname, mode, devnum) < 0)
+ return log_error_errno(errno, "mknod() failed for '%s': %m", devname);
+ } else {
+ if (unlink(devname) < 0)
+ return log_error_errno(errno, "unlink('%s') failed: %m", devname);
+ (void) rmdir_parents(devname, "/");
+ }
+ }
+
+ udev_event_execute_rules(event, 3 * USEC_PER_SEC, NULL, rules);
+ udev_event_execute_run(event, 3 * USEC_PER_SEC);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/test/test-uid-range.c b/src/test/test-uid-range.c
new file mode 100644
index 0000000..65a556c
--- /dev/null
+++ b/src/test/test-uid-range.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stddef.h>
+
+#include "alloc-util.h"
+#include "uid-range.h"
+#include "user-util.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_free_ UidRange *p = NULL;
+ unsigned n = 0;
+ uid_t search;
+
+ assert_se(uid_range_add_str(&p, &n, "500-999") >= 0);
+ assert_se(n == 1);
+ assert_se(p[0].start == 500);
+ assert_se(p[0].nr == 500);
+
+ assert_se(!uid_range_contains(p, n, 499));
+ assert_se(uid_range_contains(p, n, 500));
+ assert_se(uid_range_contains(p, n, 999));
+ assert_se(!uid_range_contains(p, n, 1000));
+
+ search = UID_INVALID;
+ assert_se(uid_range_next_lower(p, n, &search));
+ assert_se(search == 999);
+ assert_se(uid_range_next_lower(p, n, &search));
+ assert_se(search == 998);
+ search = 501;
+ assert_se(uid_range_next_lower(p, n, &search));
+ assert_se(search == 500);
+ assert_se(uid_range_next_lower(p, n, &search) == -EBUSY);
+
+ assert_se(uid_range_add_str(&p, &n, "1000") >= 0);
+ assert_se(n == 1);
+ assert_se(p[0].start == 500);
+ assert_se(p[0].nr == 501);
+
+ assert_se(uid_range_add_str(&p, &n, "30-40") >= 0);
+ assert_se(n == 2);
+ assert_se(p[0].start == 30);
+ assert_se(p[0].nr == 11);
+ assert_se(p[1].start == 500);
+ assert_se(p[1].nr == 501);
+
+ assert_se(uid_range_add_str(&p, &n, "60-70") >= 0);
+ assert_se(n == 3);
+ assert_se(p[0].start == 30);
+ assert_se(p[0].nr == 11);
+ assert_se(p[1].start == 60);
+ assert_se(p[1].nr == 11);
+ assert_se(p[2].start == 500);
+ assert_se(p[2].nr == 501);
+
+ assert_se(uid_range_add_str(&p, &n, "20-2000") >= 0);
+ assert_se(n == 1);
+ assert_se(p[0].start == 20);
+ assert_se(p[0].nr == 1981);
+
+ assert_se(uid_range_add_str(&p, &n, "2002") >= 0);
+ assert_se(n == 2);
+ assert_se(p[0].start == 20);
+ assert_se(p[0].nr == 1981);
+ assert_se(p[1].start == 2002);
+ assert_se(p[1].nr == 1);
+
+ assert_se(uid_range_add_str(&p, &n, "2001") >= 0);
+ assert_se(n == 1);
+ assert_se(p[0].start == 20);
+ assert_se(p[0].nr == 1983);
+
+ return 0;
+}
diff --git a/src/test/test-umount.c b/src/test/test-umount.c
new file mode 100644
index 0000000..6ab5758
--- /dev/null
+++ b/src/test/test-umount.c
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "log.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "umount.h"
+#include "util.h"
+
+static void test_mount_points_list(const char *fname) {
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, mp_list_head);
+ _cleanup_free_ char *testdata_fname = NULL;
+ MountPoint *m;
+
+ log_info("/* %s(\"%s\") */", __func__, fname ?: "/proc/self/mountinfo");
+
+ if (fname)
+ fname = testdata_fname = path_join(get_testdata_dir(), fname);
+
+ LIST_HEAD_INIT(mp_list_head);
+ assert_se(mount_points_list_get(fname, &mp_list_head) >= 0);
+
+ LIST_FOREACH(mount_point, m, mp_list_head)
+ log_debug("path=%s o=%s f=0x%lx try-ro=%s dev=%u:%u",
+ m->path,
+ strempty(m->remount_options),
+ m->remount_flags,
+ yes_no(m->try_remount_ro),
+ major(m->devnum), minor(m->devnum));
+}
+
+static void test_swap_list(const char *fname) {
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, mp_list_head);
+ _cleanup_free_ char *testdata_fname = NULL;
+ MountPoint *m;
+
+ log_info("/* %s(\"%s\") */", __func__, fname ?: "/proc/swaps");
+
+ if (fname)
+ fname = testdata_fname = path_join(get_testdata_dir(), fname);
+
+ LIST_HEAD_INIT(mp_list_head);
+ assert_se(swap_list_get(fname, &mp_list_head) >= 0);
+
+ LIST_FOREACH(mount_point, m, mp_list_head)
+ log_debug("path=%s o=%s f=0x%lx try-ro=%s dev=%u:%u",
+ m->path,
+ strempty(m->remount_options),
+ m->remount_flags,
+ yes_no(m->try_remount_ro),
+ major(m->devnum), minor(m->devnum));
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_mount_points_list(NULL);
+ test_mount_points_list("/test-umount/empty.mountinfo");
+ test_mount_points_list("/test-umount/garbled.mountinfo");
+ test_mount_points_list("/test-umount/rhbug-1554943.mountinfo");
+
+ test_swap_list(NULL);
+ test_swap_list("/test-umount/example.swaps");
+}
diff --git a/src/test/test-unaligned.c b/src/test/test-unaligned.c
new file mode 100644
index 0000000..96d6b22
--- /dev/null
+++ b/src/test/test-unaligned.c
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sparse-endian.h"
+#include "unaligned.h"
+#include "util.h"
+
+static uint8_t data[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+};
+
+static void test_be(void) {
+ uint8_t scratch[16];
+
+ assert_se(unaligned_read_be16(&data[0]) == 0x0001);
+ assert_se(unaligned_read_be16(&data[1]) == 0x0102);
+
+ assert_se(unaligned_read_be32(&data[0]) == 0x00010203);
+ assert_se(unaligned_read_be32(&data[1]) == 0x01020304);
+ assert_se(unaligned_read_be32(&data[2]) == 0x02030405);
+ assert_se(unaligned_read_be32(&data[3]) == 0x03040506);
+
+ assert_se(unaligned_read_be64(&data[0]) == 0x0001020304050607);
+ assert_se(unaligned_read_be64(&data[1]) == 0x0102030405060708);
+ assert_se(unaligned_read_be64(&data[2]) == 0x0203040506070809);
+ assert_se(unaligned_read_be64(&data[3]) == 0x030405060708090a);
+ assert_se(unaligned_read_be64(&data[4]) == 0x0405060708090a0b);
+ assert_se(unaligned_read_be64(&data[5]) == 0x05060708090a0b0c);
+ assert_se(unaligned_read_be64(&data[6]) == 0x060708090a0b0c0d);
+ assert_se(unaligned_read_be64(&data[7]) == 0x0708090a0b0c0d0e);
+
+ zero(scratch);
+ unaligned_write_be16(&scratch[0], 0x0001);
+ assert_se(memcmp(&scratch[0], &data[0], sizeof(uint16_t)) == 0);
+ zero(scratch);
+ unaligned_write_be16(&scratch[1], 0x0102);
+ assert_se(memcmp(&scratch[1], &data[1], sizeof(uint16_t)) == 0);
+
+ zero(scratch);
+ unaligned_write_be32(&scratch[0], 0x00010203);
+ assert_se(memcmp(&scratch[0], &data[0], sizeof(uint32_t)) == 0);
+ zero(scratch);
+ unaligned_write_be32(&scratch[1], 0x01020304);
+ assert_se(memcmp(&scratch[1], &data[1], sizeof(uint32_t)) == 0);
+ zero(scratch);
+ unaligned_write_be32(&scratch[2], 0x02030405);
+ assert_se(memcmp(&scratch[2], &data[2], sizeof(uint32_t)) == 0);
+ zero(scratch);
+ unaligned_write_be32(&scratch[3], 0x03040506);
+ assert_se(memcmp(&scratch[3], &data[3], sizeof(uint32_t)) == 0);
+
+ zero(scratch);
+ unaligned_write_be64(&scratch[0], 0x0001020304050607);
+ assert_se(memcmp(&scratch[0], &data[0], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[1], 0x0102030405060708);
+ assert_se(memcmp(&scratch[1], &data[1], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[2], 0x0203040506070809);
+ assert_se(memcmp(&scratch[2], &data[2], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[3], 0x030405060708090a);
+ assert_se(memcmp(&scratch[3], &data[3], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[4], 0x0405060708090a0b);
+ assert_se(memcmp(&scratch[4], &data[4], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[5], 0x05060708090a0b0c);
+ assert_se(memcmp(&scratch[5], &data[5], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[6], 0x060708090a0b0c0d);
+ assert_se(memcmp(&scratch[6], &data[6], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[7], 0x0708090a0b0c0d0e);
+ assert_se(memcmp(&scratch[7], &data[7], sizeof(uint64_t)) == 0);
+}
+
+static void test_le(void) {
+ uint8_t scratch[16];
+
+ assert_se(unaligned_read_le16(&data[0]) == 0x0100);
+ assert_se(unaligned_read_le16(&data[1]) == 0x0201);
+
+ assert_se(unaligned_read_le32(&data[0]) == 0x03020100);
+ assert_se(unaligned_read_le32(&data[1]) == 0x04030201);
+ assert_se(unaligned_read_le32(&data[2]) == 0x05040302);
+ assert_se(unaligned_read_le32(&data[3]) == 0x06050403);
+
+ assert_se(unaligned_read_le64(&data[0]) == 0x0706050403020100);
+ assert_se(unaligned_read_le64(&data[1]) == 0x0807060504030201);
+ assert_se(unaligned_read_le64(&data[2]) == 0x0908070605040302);
+ assert_se(unaligned_read_le64(&data[3]) == 0x0a09080706050403);
+ assert_se(unaligned_read_le64(&data[4]) == 0x0b0a090807060504);
+ assert_se(unaligned_read_le64(&data[5]) == 0x0c0b0a0908070605);
+ assert_se(unaligned_read_le64(&data[6]) == 0x0d0c0b0a09080706);
+ assert_se(unaligned_read_le64(&data[7]) == 0x0e0d0c0b0a090807);
+
+ zero(scratch);
+ unaligned_write_le16(&scratch[0], 0x0100);
+ assert_se(memcmp(&scratch[0], &data[0], sizeof(uint16_t)) == 0);
+ zero(scratch);
+ unaligned_write_le16(&scratch[1], 0x0201);
+ assert_se(memcmp(&scratch[1], &data[1], sizeof(uint16_t)) == 0);
+
+ zero(scratch);
+ unaligned_write_le32(&scratch[0], 0x03020100);
+
+ assert_se(memcmp(&scratch[0], &data[0], sizeof(uint32_t)) == 0);
+ zero(scratch);
+ unaligned_write_le32(&scratch[1], 0x04030201);
+ assert_se(memcmp(&scratch[1], &data[1], sizeof(uint32_t)) == 0);
+ zero(scratch);
+ unaligned_write_le32(&scratch[2], 0x05040302);
+ assert_se(memcmp(&scratch[2], &data[2], sizeof(uint32_t)) == 0);
+ zero(scratch);
+ unaligned_write_le32(&scratch[3], 0x06050403);
+ assert_se(memcmp(&scratch[3], &data[3], sizeof(uint32_t)) == 0);
+
+ zero(scratch);
+ unaligned_write_le64(&scratch[0], 0x0706050403020100);
+ assert_se(memcmp(&scratch[0], &data[0], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[1], 0x0807060504030201);
+ assert_se(memcmp(&scratch[1], &data[1], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[2], 0x0908070605040302);
+ assert_se(memcmp(&scratch[2], &data[2], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[3], 0x0a09080706050403);
+ assert_se(memcmp(&scratch[3], &data[3], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[4], 0x0B0A090807060504);
+ assert_se(memcmp(&scratch[4], &data[4], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[5], 0x0c0b0a0908070605);
+ assert_se(memcmp(&scratch[5], &data[5], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[6], 0x0d0c0b0a09080706);
+ assert_se(memcmp(&scratch[6], &data[6], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[7], 0x0e0d0c0b0a090807);
+ assert_se(memcmp(&scratch[7], &data[7], sizeof(uint64_t)) == 0);
+}
+
+static void test_ne(void) {
+ uint16_t x = 4711;
+ uint32_t y = 123456;
+ uint64_t z = 9876543210;
+
+ /* Note that we don't bother actually testing alignment issues in this function, after all the _ne() functions
+ * are just aliases for the _le() or _be() implementations, which we test extensively above. Hence, in this
+ * function, just ensure that they map to the right version on the local architecture. */
+
+ assert_se(unaligned_read_ne16(&x) == 4711);
+ assert_se(unaligned_read_ne32(&y) == 123456);
+ assert_se(unaligned_read_ne64(&z) == 9876543210);
+
+ unaligned_write_ne16(&x, 1);
+ unaligned_write_ne32(&y, 2);
+ unaligned_write_ne64(&z, 3);
+
+ assert_se(x == 1);
+ assert_se(y == 2);
+ assert_se(z == 3);
+}
+
+int main(int argc, const char *argv[]) {
+ test_be();
+ test_le();
+ test_ne();
+ return 0;
+}
diff --git a/src/test/test-unit-file.c b/src/test/test-unit-file.c
new file mode 100644
index 0000000..f5578f9
--- /dev/null
+++ b/src/test/test-unit-file.c
@@ -0,0 +1,930 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/capability.h>
+#include <unistd.h>
+
+#include "all-units.h"
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "conf-parser.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "install-printf.h"
+#include "install.h"
+#include "load-fragment.h"
+#include "macro.h"
+#include "rm-rf.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "test-helper.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+static int test_unit_file_get_set(void) {
+ int r;
+ Hashmap *h;
+ Iterator i;
+ UnitFileList *p;
+
+ h = hashmap_new(&string_hash_ops);
+ assert_se(h);
+
+ r = unit_file_get_list(UNIT_FILE_SYSTEM, NULL, h, NULL, NULL);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return log_tests_skipped_errno(r, "unit_file_get_list");
+
+ log_full_errno(r == 0 ? LOG_INFO : LOG_ERR, r,
+ "unit_file_get_list: %m");
+ if (r < 0)
+ return EXIT_FAILURE;
+
+ HASHMAP_FOREACH(p, h, i)
+ printf("%s = %s\n", p->path, unit_file_state_to_string(p->state));
+
+ unit_file_list_free(h);
+
+ return 0;
+}
+
+static void check_execcommand(ExecCommand *c,
+ const char* path,
+ const char* argv0,
+ const char* argv1,
+ const char* argv2,
+ bool ignore) {
+ size_t n;
+
+ assert_se(c);
+ log_info("expect: \"%s\" [\"%s\" \"%s\" \"%s\"]",
+ path, argv0 ?: path, argv1, argv2);
+ n = strv_length(c->argv);
+ log_info("actual: \"%s\" [\"%s\" \"%s\" \"%s\"]",
+ c->path, c->argv[0], n > 0 ? c->argv[1] : NULL, n > 1 ? c->argv[2] : NULL);
+ assert_se(streq(c->path, path));
+ assert_se(streq(c->argv[0], argv0 ?: path));
+ if (n > 0)
+ assert_se(streq_ptr(c->argv[1], argv1));
+ if (n > 1)
+ assert_se(streq_ptr(c->argv[2], argv2));
+ assert_se(!!(c->flags & EXEC_COMMAND_IGNORE_FAILURE) == ignore);
+}
+
+static void test_config_parse_exec(void) {
+ /* int config_parse_exec(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) */
+ int r;
+
+ ExecCommand *c = NULL, *c1;
+ const char *ccc;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ _cleanup_(unit_freep) Unit *u = NULL;
+
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m);
+ if (MANAGER_SKIP_TEST(r)) {
+ log_notice_errno(r, "Skipping test: manager_new: %m");
+ return;
+ }
+
+ assert_se(r >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ assert_se(u = unit_new(m, sizeof(Service)));
+
+ log_info("/* basic test */");
+ r = config_parse_exec(NULL, "fake", 1, "section", 1,
+ "LValue", 0, "/RValue r1",
+ &c, u);
+ assert_se(r >= 0);
+ check_execcommand(c, "/RValue", "/RValue", "r1", NULL, false);
+
+ r = config_parse_exec(NULL, "fake", 2, "section", 1,
+ "LValue", 0, "/RValue///slashes r1///",
+ &c, u);
+
+ log_info("/* test slashes */");
+ assert_se(r >= 0);
+ c1 = c->command_next;
+ check_execcommand(c1, "/RValue/slashes", "/RValue///slashes", "r1///", NULL, false);
+
+ log_info("/* trailing slash */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "/RValue/ argv0 r1",
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* honour_argv0 */");
+ r = config_parse_exec(NULL, "fake", 3, "section", 1,
+ "LValue", 0, "@/RValue///slashes2 ///argv0 r1",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue/slashes2", "///argv0", "r1", NULL, false);
+
+ log_info("/* honour_argv0, no args */");
+ r = config_parse_exec(NULL, "fake", 3, "section", 1,
+ "LValue", 0, "@/RValue",
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* no command, whitespace only, reset */");
+ r = config_parse_exec(NULL, "fake", 3, "section", 1,
+ "LValue", 0, " ",
+ &c, u);
+ assert_se(r == 0);
+ assert_se(c == NULL);
+
+ log_info("/* ignore && honour_argv0 */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "-@/RValue///slashes3 argv0a r1",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c;
+ check_execcommand(c1, "/RValue/slashes3", "argv0a", "r1", NULL, true);
+
+ log_info("/* ignore && honour_argv0 */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "@-/RValue///slashes4 argv0b r1",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue/slashes4", "argv0b", "r1", NULL, true);
+
+ log_info("/* ignore && ignore */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "--/RValue argv0 r1",
+ &c, u);
+ assert_se(r == 0);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* ignore && ignore (2) */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "-@-/RValue argv0 r1",
+ &c, u);
+ assert_se(r == 0);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* semicolon */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "-@/RValue argv0 r1 ; "
+ "/goo/goo boo",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue", "argv0", "r1", NULL, true);
+
+ c1 = c1->command_next;
+ check_execcommand(c1, "/goo/goo", NULL, "boo", NULL, false);
+
+ log_info("/* two semicolons in a row */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "-@/RValue argv0 r1 ; ; "
+ "/goo/goo boo",
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue", "argv0", "r1", NULL, true);
+
+ /* second command fails because the executable name is ";" */
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* trailing semicolon */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "-@/RValue argv0 r1 ; ",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue", "argv0", "r1", NULL, true);
+
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* trailing semicolon, no whitespace */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "-@/RValue argv0 r1 ;",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue", "argv0", "r1", NULL, true);
+
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* trailing semicolon in single quotes */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "-@/RValue argv0 r1 ';'",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue", "argv0", "r1", ";", true);
+
+ log_info("/* escaped semicolon */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/bin/find \\;",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/bin/find", NULL, ";", NULL, false);
+
+ log_info("/* escaped semicolon with following arg */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/sbin/find \\; /x",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/sbin/find", NULL, ";", "/x", false);
+
+ log_info("/* escaped semicolon as part of an expression */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/sbin/find \\;x",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/sbin/find", NULL, "\\;x", NULL, false);
+
+ log_info("/* encoded semicolon */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/bin/find \\073",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/bin/find", NULL, ";", NULL, false);
+
+ log_info("/* quoted semicolon */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/bin/find \";\"",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/bin/find", NULL, ";", NULL, false);
+
+ log_info("/* quoted semicolon with following arg */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/sbin/find \";\" /x",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/sbin/find", NULL, ";", "/x", false);
+
+ log_info("/* spaces in the filename */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "\"/PATH WITH SPACES/daemon\" -1 -2",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/PATH WITH SPACES/daemon", NULL, "-1", "-2", false);
+
+ log_info("/* spaces in the filename, no args */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "\"/PATH WITH SPACES/daemon -1 -2\"",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/PATH WITH SPACES/daemon -1 -2", NULL, NULL, NULL, false);
+
+ log_info("/* spaces in the filename, everything quoted */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "\"/PATH WITH SPACES/daemon\" \"-1\" '-2'",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/PATH WITH SPACES/daemon", NULL, "-1", "-2", false);
+
+ log_info("/* escaped spaces in the filename */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "\"/PATH\\sWITH\\sSPACES/daemon\" '-1 -2'",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/PATH WITH SPACES/daemon", NULL, "-1 -2", NULL, false);
+
+ log_info("/* escaped spaces in the filename (2) */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "\"/PATH\\x20WITH\\x20SPACES/daemon\" \"-1 -2\"",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/PATH WITH SPACES/daemon", NULL, "-1 -2", NULL, false);
+
+ for (ccc = "abfnrtv\\\'\"x"; *ccc; ccc++) {
+ /* \\x is an incomplete hexadecimal sequence, invalid because of the slash */
+ char path[] = "/path\\X";
+ path[sizeof(path) - 2] = *ccc;
+
+ log_info("/* invalid character: \\%c */", *ccc);
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, path,
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ assert_se(c1->command_next == NULL);
+ }
+
+ log_info("/* valid character: \\s */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "/path\\s",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/path ", NULL, NULL, NULL, false);
+
+ log_info("/* quoted backslashes */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/bin/grep '\\w+\\K'",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/bin/grep", NULL, "\\w+\\K", NULL, false);
+
+ log_info("/* trailing backslash: \\ */");
+ /* backslash is invalid */
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "/path\\",
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* missing ending ' */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "/path 'foo",
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* missing ending ' with trailing backslash */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "/path 'foo\\",
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* invalid space between modifiers */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "- /path",
+ &c, u);
+ assert_se(r == 0);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* only modifiers, no path */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "-",
+ &c, u);
+ assert_se(r == 0);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* empty argument, reset */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "",
+ &c, u);
+ assert_se(r == 0);
+ assert_se(c == NULL);
+
+ exec_command_free_list(c);
+}
+
+static void test_config_parse_log_extra_fields(void) {
+ /* int config_parse_log_extra_fields(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) */
+
+ int r;
+
+ _cleanup_(manager_freep) Manager *m = NULL;
+ _cleanup_(unit_freep) Unit *u = NULL;
+ ExecContext c = {};
+
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m);
+ if (MANAGER_SKIP_TEST(r)) {
+ log_notice_errno(r, "Skipping test: manager_new: %m");
+ return;
+ }
+
+ assert_se(r >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ assert_se(u = unit_new(m, sizeof(Service)));
+
+ log_info("/* %s – basic test */", __func__);
+ r = config_parse_log_extra_fields(NULL, "fake", 1, "section", 1,
+ "LValue", 0, "FOO=BAR \"QOOF=quux ' ' \"",
+ &c, u);
+ assert_se(r >= 0);
+ assert_se(c.n_log_extra_fields == 2);
+ assert_se(strneq(c.log_extra_fields[0].iov_base, "FOO=BAR", c.log_extra_fields[0].iov_len));
+ assert_se(strneq(c.log_extra_fields[1].iov_base, "QOOF=quux ' ' ", c.log_extra_fields[1].iov_len));
+
+ log_info("/* %s – add some */", __func__);
+ r = config_parse_log_extra_fields(NULL, "fake", 1, "section", 1,
+ "LValue", 0, "FOO2=BAR2 QOOF2=quux ' '",
+ &c, u);
+ assert_se(r >= 0);
+ assert_se(c.n_log_extra_fields == 4);
+ assert_se(strneq(c.log_extra_fields[0].iov_base, "FOO=BAR", c.log_extra_fields[0].iov_len));
+ assert_se(strneq(c.log_extra_fields[1].iov_base, "QOOF=quux ' ' ", c.log_extra_fields[1].iov_len));
+ assert_se(strneq(c.log_extra_fields[2].iov_base, "FOO2=BAR2", c.log_extra_fields[2].iov_len));
+ assert_se(strneq(c.log_extra_fields[3].iov_base, "QOOF2=quux", c.log_extra_fields[3].iov_len));
+
+ exec_context_dump(&c, stdout, " --> ");
+
+ log_info("/* %s – reset */", __func__);
+ r = config_parse_log_extra_fields(NULL, "fake", 1, "section", 1,
+ "LValue", 0, "",
+ &c, u);
+ assert_se(r >= 0);
+ assert_se(c.n_log_extra_fields == 0);
+
+ exec_context_free_log_extra_fields(&c);
+
+ log_info("/* %s – bye */", __func__);
+}
+
+#define env_file_1 \
+ "a=a\n" \
+ "b=b\\\n" \
+ "c\n" \
+ "d=d\\\n" \
+ "e\\\n" \
+ "f\n" \
+ "g=g\\ \n" \
+ "h=h\n" \
+ "i=i\\"
+
+#define env_file_2 \
+ "a=a\\\n"
+
+#define env_file_3 \
+ "#SPAMD_ARGS=\"-d --socketpath=/var/lib/bulwark/spamd \\\n" \
+ "#--nouser-config \\\n" \
+ "normal=line"
+
+#define env_file_4 \
+ "# Generated\n" \
+ "\n" \
+ "HWMON_MODULES=\"coretemp f71882fg\"\n" \
+ "\n" \
+ "# For compatibility reasons\n" \
+ "\n" \
+ "MODULE_0=coretemp\n" \
+ "MODULE_1=f71882fg"
+
+#define env_file_5 \
+ "a=\n" \
+ "b="
+
+static void test_load_env_file_1(void) {
+ _cleanup_strv_free_ char **data = NULL;
+ int r;
+
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-load-env-file.XXXXXX";
+ _cleanup_close_ int fd;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, env_file_1, strlen(env_file_1)) == strlen(env_file_1));
+
+ r = load_env_file(NULL, name, &data);
+ assert_se(r == 0);
+ assert_se(streq(data[0], "a=a"));
+ assert_se(streq(data[1], "b=bc"));
+ assert_se(streq(data[2], "d=def"));
+ assert_se(streq(data[3], "g=g "));
+ assert_se(streq(data[4], "h=h"));
+ assert_se(streq(data[5], "i=i"));
+ assert_se(data[6] == NULL);
+}
+
+static void test_load_env_file_2(void) {
+ _cleanup_strv_free_ char **data = NULL;
+ int r;
+
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-load-env-file.XXXXXX";
+ _cleanup_close_ int fd;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, env_file_2, strlen(env_file_2)) == strlen(env_file_2));
+
+ r = load_env_file(NULL, name, &data);
+ assert_se(r == 0);
+ assert_se(streq(data[0], "a=a"));
+ assert_se(data[1] == NULL);
+}
+
+static void test_load_env_file_3(void) {
+ _cleanup_strv_free_ char **data = NULL;
+ int r;
+
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-load-env-file.XXXXXX";
+ _cleanup_close_ int fd;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, env_file_3, strlen(env_file_3)) == strlen(env_file_3));
+
+ r = load_env_file(NULL, name, &data);
+ assert_se(r == 0);
+ assert_se(data == NULL);
+}
+
+static void test_load_env_file_4(void) {
+ _cleanup_strv_free_ char **data = NULL;
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-load-env-file.XXXXXX";
+ _cleanup_close_ int fd;
+ int r;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, env_file_4, strlen(env_file_4)) == strlen(env_file_4));
+
+ r = load_env_file(NULL, name, &data);
+ assert_se(r == 0);
+ assert_se(streq(data[0], "HWMON_MODULES=coretemp f71882fg"));
+ assert_se(streq(data[1], "MODULE_0=coretemp"));
+ assert_se(streq(data[2], "MODULE_1=f71882fg"));
+ assert_se(data[3] == NULL);
+}
+
+static void test_load_env_file_5(void) {
+ _cleanup_strv_free_ char **data = NULL;
+ int r;
+
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-load-env-file.XXXXXX";
+ _cleanup_close_ int fd;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, env_file_5, strlen(env_file_5)) == strlen(env_file_5));
+
+ r = load_env_file(NULL, name, &data);
+ assert_se(r == 0);
+ assert_se(streq(data[0], "a="));
+ assert_se(streq(data[1], "b="));
+ assert_se(data[2] == NULL);
+}
+
+static void test_install_printf(void) {
+ char name[] = "name.service",
+ path[] = "/run/systemd/system/name.service";
+ UnitFileInstallInfo i = { .name = name, .path = path, };
+ UnitFileInstallInfo i2 = { .name= name, .path = path, };
+ char name3[] = "name@inst.service",
+ path3[] = "/run/systemd/system/name.service";
+ UnitFileInstallInfo i3 = { .name = name3, .path = path3, };
+ UnitFileInstallInfo i4 = { .name = name3, .path = path3, };
+
+ _cleanup_free_ char *mid = NULL, *bid = NULL, *host = NULL, *gid = NULL, *group = NULL, *uid = NULL, *user = NULL;
+
+ assert_se(specifier_machine_id('m', NULL, NULL, &mid) >= 0 && mid);
+ assert_se(specifier_boot_id('b', NULL, NULL, &bid) >= 0 && bid);
+ assert_se(host = gethostname_malloc());
+ assert_se(group = gid_to_name(getgid()));
+ assert_se(asprintf(&gid, UID_FMT, getgid()) >= 0);
+ assert_se(user = uid_to_name(getuid()));
+ assert_se(asprintf(&uid, UID_FMT, getuid()) >= 0);
+
+#define expect(src, pattern, result) \
+ do { \
+ _cleanup_free_ char *t = NULL; \
+ _cleanup_free_ char \
+ *d1 = strdup(i.name), \
+ *d2 = strdup(i.path); \
+ assert_se(install_full_printf(&src, pattern, &t) >= 0 || !result); \
+ memzero(i.name, strlen(i.name)); \
+ memzero(i.path, strlen(i.path)); \
+ assert_se(d1 && d2); \
+ if (result) { \
+ printf("%s\n", t); \
+ assert_se(streq(t, result)); \
+ } else assert_se(t == NULL); \
+ strcpy(i.name, d1); \
+ strcpy(i.path, d2); \
+ } while (false)
+
+ expect(i, "%n", "name.service");
+ expect(i, "%N", "name");
+ expect(i, "%p", "name");
+ expect(i, "%i", "");
+ expect(i, "%j", "name");
+ expect(i, "%g", group);
+ expect(i, "%G", gid);
+ expect(i, "%u", user);
+ expect(i, "%U", uid);
+
+ expect(i, "%m", mid);
+ expect(i, "%b", bid);
+ expect(i, "%H", host);
+
+ expect(i2, "%g", group);
+ expect(i2, "%G", gid);
+ expect(i2, "%u", user);
+ expect(i2, "%U", uid);
+
+ expect(i3, "%n", "name@inst.service");
+ expect(i3, "%N", "name@inst");
+ expect(i3, "%p", "name");
+ expect(i3, "%g", group);
+ expect(i3, "%G", gid);
+ expect(i3, "%u", user);
+ expect(i3, "%U", uid);
+
+ expect(i3, "%m", mid);
+ expect(i3, "%b", bid);
+ expect(i3, "%H", host);
+
+ expect(i4, "%g", group);
+ expect(i4, "%G", gid);
+ expect(i4, "%u", user);
+ expect(i4, "%U", uid);
+}
+
+static uint64_t make_cap(int cap) {
+ return ((uint64_t) 1ULL << (uint64_t) cap);
+}
+
+static void test_config_parse_capability_set(void) {
+ /* int config_parse_capability_set(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) */
+ int r;
+ uint64_t capability_bounding_set = 0;
+
+ r = config_parse_capability_set(NULL, "fake", 1, "section", 1,
+ "CapabilityBoundingSet", 0, "CAP_NET_RAW",
+ &capability_bounding_set, NULL);
+ assert_se(r >= 0);
+ assert_se(capability_bounding_set == make_cap(CAP_NET_RAW));
+
+ r = config_parse_capability_set(NULL, "fake", 1, "section", 1,
+ "CapabilityBoundingSet", 0, "CAP_NET_ADMIN",
+ &capability_bounding_set, NULL);
+ assert_se(r >= 0);
+ assert_se(capability_bounding_set == (make_cap(CAP_NET_RAW) | make_cap(CAP_NET_ADMIN)));
+
+ r = config_parse_capability_set(NULL, "fake", 1, "section", 1,
+ "CapabilityBoundingSet", 0, "~CAP_NET_ADMIN",
+ &capability_bounding_set, NULL);
+ assert_se(r >= 0);
+ assert_se(capability_bounding_set == make_cap(CAP_NET_RAW));
+
+ r = config_parse_capability_set(NULL, "fake", 1, "section", 1,
+ "CapabilityBoundingSet", 0, "",
+ &capability_bounding_set, NULL);
+ assert_se(r >= 0);
+ assert_se(capability_bounding_set == UINT64_C(0));
+
+ r = config_parse_capability_set(NULL, "fake", 1, "section", 1,
+ "CapabilityBoundingSet", 0, "~",
+ &capability_bounding_set, NULL);
+ assert_se(r >= 0);
+ assert_se(cap_test_all(capability_bounding_set));
+
+ capability_bounding_set = 0;
+ r = config_parse_capability_set(NULL, "fake", 1, "section", 1,
+ "CapabilityBoundingSet", 0, " 'CAP_NET_RAW' WAT_CAP??? CAP_NET_ADMIN CAP'_trailing_garbage",
+ &capability_bounding_set, NULL);
+ assert_se(r >= 0);
+ assert_se(capability_bounding_set == (make_cap(CAP_NET_RAW) | make_cap(CAP_NET_ADMIN)));
+}
+
+static void test_config_parse_rlimit(void) {
+ struct rlimit * rl[_RLIMIT_MAX] = {};
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "55", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == 55);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == rl[RLIMIT_NOFILE]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "55:66", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == 55);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_max == 66);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "infinity", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == RLIM_INFINITY);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == rl[RLIMIT_NOFILE]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "infinity:infinity", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == RLIM_INFINITY);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == rl[RLIMIT_NOFILE]->rlim_max);
+
+ rl[RLIMIT_NOFILE]->rlim_cur = 10;
+ rl[RLIMIT_NOFILE]->rlim_max = 20;
+
+ /* Invalid values don't change rl */
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "10:20:30", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == 10);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_max == 20);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "wat:wat", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == 10);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_max == 20);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "66:wat", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == 10);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_max == 20);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "200:100", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == 10);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_max == 20);
+
+ rl[RLIMIT_NOFILE] = mfree(rl[RLIMIT_NOFILE]);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitCPU", RLIMIT_CPU, "56", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_CPU]);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == 56);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == rl[RLIMIT_CPU]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitCPU", RLIMIT_CPU, "57s", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_CPU]);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == 57);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == rl[RLIMIT_CPU]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitCPU", RLIMIT_CPU, "40s:1m", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_CPU]);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == 40);
+ assert_se(rl[RLIMIT_CPU]->rlim_max == 60);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitCPU", RLIMIT_CPU, "infinity", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_CPU]);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == RLIM_INFINITY);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == rl[RLIMIT_CPU]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitCPU", RLIMIT_CPU, "1234ms", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_CPU]);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == 2);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == rl[RLIMIT_CPU]->rlim_max);
+
+ rl[RLIMIT_CPU] = mfree(rl[RLIMIT_CPU]);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "58", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == 58);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == rl[RLIMIT_RTTIME]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "58:60", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == 58);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_max == 60);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "59s", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == 59 * USEC_PER_SEC);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == rl[RLIMIT_RTTIME]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "59s:123s", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == 59 * USEC_PER_SEC);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_max == 123 * USEC_PER_SEC);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "infinity", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == RLIM_INFINITY);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == rl[RLIMIT_RTTIME]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "infinity:infinity", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == RLIM_INFINITY);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == rl[RLIMIT_RTTIME]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "2345ms", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == 2345 * USEC_PER_MSEC);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == rl[RLIMIT_RTTIME]->rlim_max);
+
+ rl[RLIMIT_RTTIME] = mfree(rl[RLIMIT_RTTIME]);
+}
+
+static void test_config_parse_pass_environ(void) {
+ /* int config_parse_pass_environ(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) */
+ int r;
+ _cleanup_strv_free_ char **passenv = NULL;
+
+ r = config_parse_pass_environ(NULL, "fake", 1, "section", 1,
+ "PassEnvironment", 0, "A B",
+ &passenv, NULL);
+ assert_se(r >= 0);
+ assert_se(strv_length(passenv) == 2);
+ assert_se(streq(passenv[0], "A"));
+ assert_se(streq(passenv[1], "B"));
+
+ r = config_parse_pass_environ(NULL, "fake", 1, "section", 1,
+ "PassEnvironment", 0, "",
+ &passenv, NULL);
+ assert_se(r >= 0);
+ assert_se(strv_isempty(passenv));
+
+ r = config_parse_pass_environ(NULL, "fake", 1, "section", 1,
+ "PassEnvironment", 0, "'invalid name' 'normal_name' A=1 \\",
+ &passenv, NULL);
+ assert_se(r >= 0);
+ assert_se(strv_length(passenv) == 1);
+ assert_se(streq(passenv[0], "normal_name"));
+
+}
+
+static void test_unit_dump_config_items(void) {
+ unit_dump_config_items(stdout);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = enter_cgroup_subroot();
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ r = test_unit_file_get_set();
+ test_config_parse_exec();
+ test_config_parse_log_extra_fields();
+ test_config_parse_capability_set();
+ test_config_parse_rlimit();
+ test_config_parse_pass_environ();
+ test_load_env_file_1();
+ test_load_env_file_2();
+ test_load_env_file_3();
+ test_load_env_file_4();
+ test_load_env_file_5();
+ TEST_REQ_RUNNING_SYSTEMD(test_install_printf());
+ test_unit_dump_config_items();
+
+ return r;
+}
diff --git a/src/test/test-unit-name.c b/src/test/test-unit-name.c
new file mode 100644
index 0000000..0d3674c
--- /dev/null
+++ b/src/test/test-unit-name.c
@@ -0,0 +1,848 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "all-units.h"
+#include "glob-util.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "manager.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "special.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "test-helper.h"
+#include "tests.h"
+#include "unit-def.h"
+#include "unit-name.h"
+#include "unit-printf.h"
+#include "unit.h"
+#include "user-util.h"
+#include "util.h"
+
+static void test_unit_name_is_valid(void) {
+ assert_se( unit_name_is_valid("foo.service", UNIT_NAME_ANY));
+ assert_se( unit_name_is_valid("foo.service", UNIT_NAME_PLAIN));
+ assert_se(!unit_name_is_valid("foo.service", UNIT_NAME_INSTANCE));
+ assert_se(!unit_name_is_valid("foo.service", UNIT_NAME_TEMPLATE));
+ assert_se(!unit_name_is_valid("foo.service", UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE));
+
+ assert_se( unit_name_is_valid("foo@bar.service", UNIT_NAME_ANY));
+ assert_se(!unit_name_is_valid("foo@bar.service", UNIT_NAME_PLAIN));
+ assert_se( unit_name_is_valid("foo@bar.service", UNIT_NAME_INSTANCE));
+ assert_se(!unit_name_is_valid("foo@bar.service", UNIT_NAME_TEMPLATE));
+ assert_se( unit_name_is_valid("foo@bar.service", UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE));
+
+ assert_se( unit_name_is_valid("foo@bar@bar.service", UNIT_NAME_ANY));
+ assert_se(!unit_name_is_valid("foo@bar@bar.service", UNIT_NAME_PLAIN));
+ assert_se( unit_name_is_valid("foo@bar@bar.service", UNIT_NAME_INSTANCE));
+ assert_se(!unit_name_is_valid("foo@bar@bar.service", UNIT_NAME_TEMPLATE));
+ assert_se( unit_name_is_valid("foo@bar@bar.service", UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE));
+
+ assert_se( unit_name_is_valid("foo@.service", UNIT_NAME_ANY));
+ assert_se(!unit_name_is_valid("foo@.service", UNIT_NAME_PLAIN));
+ assert_se(!unit_name_is_valid("foo@.service", UNIT_NAME_INSTANCE));
+ assert_se( unit_name_is_valid("foo@.service", UNIT_NAME_TEMPLATE));
+ assert_se( unit_name_is_valid("foo@.service", UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE));
+
+ assert_se(!unit_name_is_valid(".service", UNIT_NAME_ANY));
+ assert_se(!unit_name_is_valid("", UNIT_NAME_ANY));
+ assert_se(!unit_name_is_valid("foo.waldo", UNIT_NAME_ANY));
+ assert_se(!unit_name_is_valid("@.service", UNIT_NAME_ANY));
+ assert_se(!unit_name_is_valid("@piep.service", UNIT_NAME_ANY));
+
+ assert_se( unit_name_is_valid("user@1000.slice", UNIT_NAME_ANY));
+ assert_se( unit_name_is_valid("user@1000.slice", UNIT_NAME_INSTANCE));
+ assert_se(!unit_name_is_valid("user@1000.slice", UNIT_NAME_TEMPLATE));
+}
+
+static void test_unit_name_replace_instance_one(const char *pattern, const char *repl, const char *expected, int ret) {
+ _cleanup_free_ char *t = NULL;
+ assert_se(unit_name_replace_instance(pattern, repl, &t) == ret);
+ puts(strna(t));
+ assert_se(streq_ptr(t, expected));
+}
+
+static void test_unit_name_replace_instance(void) {
+ puts("-------------------------------------------------");
+ test_unit_name_replace_instance_one("foo@.service", "waldo", "foo@waldo.service", 0);
+ test_unit_name_replace_instance_one("foo@xyz.service", "waldo", "foo@waldo.service", 0);
+ test_unit_name_replace_instance_one("xyz", "waldo", NULL, -EINVAL);
+ test_unit_name_replace_instance_one("", "waldo", NULL, -EINVAL);
+ test_unit_name_replace_instance_one("foo.service", "waldo", NULL, -EINVAL);
+ test_unit_name_replace_instance_one(".service", "waldo", NULL, -EINVAL);
+ test_unit_name_replace_instance_one("foo@", "waldo", NULL, -EINVAL);
+ test_unit_name_replace_instance_one("@bar", "waldo", NULL, -EINVAL);
+}
+
+static void test_unit_name_from_path_one(const char *path, const char *suffix, const char *expected, int ret) {
+ _cleanup_free_ char *t = NULL;
+
+ assert_se(unit_name_from_path(path, suffix, &t) == ret);
+ puts(strna(t));
+ assert_se(streq_ptr(t, expected));
+
+ if (t) {
+ _cleanup_free_ char *k = NULL;
+ assert_se(unit_name_to_path(t, &k) == 0);
+ puts(strna(k));
+ assert_se(path_equal(k, empty_to_root(path)));
+ }
+}
+
+static void test_unit_name_from_path(void) {
+ puts("-------------------------------------------------");
+ test_unit_name_from_path_one("/waldo", ".mount", "waldo.mount", 0);
+ test_unit_name_from_path_one("/waldo/quuix", ".mount", "waldo-quuix.mount", 0);
+ test_unit_name_from_path_one("/waldo/quuix/", ".mount", "waldo-quuix.mount", 0);
+ test_unit_name_from_path_one("", ".mount", "-.mount", 0);
+ test_unit_name_from_path_one("/", ".mount", "-.mount", 0);
+ test_unit_name_from_path_one("///", ".mount", "-.mount", 0);
+ test_unit_name_from_path_one("/foo/../bar", ".mount", NULL, -EINVAL);
+ test_unit_name_from_path_one("/foo/./bar", ".mount", NULL, -EINVAL);
+}
+
+static void test_unit_name_from_path_instance_one(const char *pattern, const char *path, const char *suffix, const char *expected, int ret) {
+ _cleanup_free_ char *t = NULL;
+
+ assert_se(unit_name_from_path_instance(pattern, path, suffix, &t) == ret);
+ puts(strna(t));
+ assert_se(streq_ptr(t, expected));
+
+ if (t) {
+ _cleanup_free_ char *k = NULL, *v = NULL;
+
+ assert_se(unit_name_to_instance(t, &k) > 0);
+ assert_se(unit_name_path_unescape(k, &v) == 0);
+ assert_se(path_equal(v, empty_to_root(path)));
+ }
+}
+
+static void test_unit_name_from_path_instance(void) {
+ puts("-------------------------------------------------");
+
+ test_unit_name_from_path_instance_one("waldo", "/waldo", ".mount", "waldo@waldo.mount", 0);
+ test_unit_name_from_path_instance_one("waldo", "/waldo////quuix////", ".mount", "waldo@waldo-quuix.mount", 0);
+ test_unit_name_from_path_instance_one("waldo", "/", ".mount", "waldo@-.mount", 0);
+ test_unit_name_from_path_instance_one("waldo", "", ".mount", "waldo@-.mount", 0);
+ test_unit_name_from_path_instance_one("waldo", "///", ".mount", "waldo@-.mount", 0);
+ test_unit_name_from_path_instance_one("waldo", "..", ".mount", NULL, -EINVAL);
+ test_unit_name_from_path_instance_one("waldo", "/foo", ".waldi", NULL, -EINVAL);
+ test_unit_name_from_path_instance_one("wa--ldo", "/--", ".mount", "wa--ldo@\\x2d\\x2d.mount", 0);
+}
+
+static void test_unit_name_to_path_one(const char *unit, const char *path, int ret) {
+ _cleanup_free_ char *p = NULL;
+
+ assert_se(unit_name_to_path(unit, &p) == ret);
+ assert_se(streq_ptr(path, p));
+}
+
+static void test_unit_name_to_path(void) {
+ test_unit_name_to_path_one("home.mount", "/home", 0);
+ test_unit_name_to_path_one("home-lennart.mount", "/home/lennart", 0);
+ test_unit_name_to_path_one("home-lennart-.mount", NULL, -EINVAL);
+ test_unit_name_to_path_one("-home-lennart.mount", NULL, -EINVAL);
+ test_unit_name_to_path_one("-home--lennart.mount", NULL, -EINVAL);
+ test_unit_name_to_path_one("home-..-lennart.mount", NULL, -EINVAL);
+ test_unit_name_to_path_one("", NULL, -EINVAL);
+ test_unit_name_to_path_one("home/foo", NULL, -EINVAL);
+}
+
+static void test_unit_name_mangle_one(bool allow_globs, const char *pattern, const char *expect, int ret) {
+ _cleanup_free_ char *t = NULL;
+
+ assert_se(unit_name_mangle(pattern, (allow_globs * UNIT_NAME_MANGLE_GLOB) | UNIT_NAME_MANGLE_WARN, &t) == ret);
+ puts(strna(t));
+ assert_se(streq_ptr(t, expect));
+
+ if (t) {
+ _cleanup_free_ char *k = NULL;
+
+ assert_se(unit_name_is_valid(t, UNIT_NAME_ANY) ||
+ (allow_globs && string_is_glob(t)));
+
+ assert_se(unit_name_mangle(t, (allow_globs * UNIT_NAME_MANGLE_GLOB) | UNIT_NAME_MANGLE_WARN, &k) == 0);
+ assert_se(streq_ptr(t, k));
+ }
+}
+
+static void test_unit_name_mangle(void) {
+ puts("-------------------------------------------------");
+ test_unit_name_mangle_one(false, "foo.service", "foo.service", 0);
+ test_unit_name_mangle_one(false, "/home", "home.mount", 1);
+ test_unit_name_mangle_one(false, "/dev/sda", "dev-sda.device", 1);
+ test_unit_name_mangle_one(false, "üxknürz.service", "\\xc3\\xbcxkn\\xc3\\xbcrz.service", 1);
+ test_unit_name_mangle_one(false, "foobar-meh...waldi.service", "foobar-meh...waldi.service", 0);
+ test_unit_name_mangle_one(false, "_____####----.....service", "_____\\x23\\x23\\x23\\x23----.....service", 1);
+ test_unit_name_mangle_one(false, "_____##@;;;,,,##----.....service", "_____\\x23\\x23@\\x3b\\x3b\\x3b\\x2c\\x2c\\x2c\\x23\\x23----.....service", 1);
+ test_unit_name_mangle_one(false, "xxx@@@@/////\\\\\\\\\\yyy.service", "xxx@@@@-----\\\\\\\\\\yyy.service", 1);
+ test_unit_name_mangle_one(false, "", NULL, -EINVAL);
+
+ test_unit_name_mangle_one(true, "foo.service", "foo.service", 0);
+ test_unit_name_mangle_one(true, "foo", "foo.service", 1);
+ test_unit_name_mangle_one(true, "foo*", "foo*", 0);
+ test_unit_name_mangle_one(true, "ü*", "\\xc3\\xbc*", 1);
+}
+
+static int test_unit_printf(void) {
+ _cleanup_free_ char *mid = NULL, *bid = NULL, *host = NULL, *gid = NULL, *group = NULL, *uid = NULL, *user = NULL, *shell = NULL, *home = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *u;
+ int r;
+
+ assert_se(specifier_machine_id('m', NULL, NULL, &mid) >= 0 && mid);
+ assert_se(specifier_boot_id('b', NULL, NULL, &bid) >= 0 && bid);
+ assert_se(host = gethostname_malloc());
+ assert_se(user = uid_to_name(getuid()));
+ assert_se(group = gid_to_name(getgid()));
+ assert_se(asprintf(&uid, UID_FMT, getuid()));
+ assert_se(asprintf(&gid, UID_FMT, getgid()));
+ assert_se(get_home_dir(&home) >= 0);
+ assert_se(get_shell(&shell) >= 0);
+
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m);
+ if (MANAGER_SKIP_TEST(r))
+ return log_tests_skipped_errno(r, "manager_new");
+ assert_se(r == 0);
+
+#define expect(unit, pattern, expected) \
+ { \
+ char *e; \
+ _cleanup_free_ char *t = NULL; \
+ assert_se(unit_full_printf(unit, pattern, &t) >= 0); \
+ printf("result: %s\nexpect: %s\n", t, expected); \
+ if ((e = endswith(expected, "*"))) \
+ assert_se(strncmp(t, e, e-expected)); \
+ else \
+ assert_se(streq(t, expected)); \
+ }
+
+ assert_se(u = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(u, "blah.service") == 0);
+ assert_se(unit_add_name(u, "blah.service") == 0);
+
+ /* general tests */
+ expect(u, "%%", "%");
+ expect(u, "%%s", "%s");
+ expect(u, "%,", "%,");
+ expect(u, "%", "%");
+
+ /* normal unit */
+ expect(u, "%n", "blah.service");
+ expect(u, "%f", "/blah");
+ expect(u, "%N", "blah");
+ expect(u, "%p", "blah");
+ expect(u, "%P", "blah");
+ expect(u, "%i", "");
+ expect(u, "%I", "");
+ expect(u, "%j", "blah");
+ expect(u, "%J", "blah");
+ expect(u, "%g", group);
+ expect(u, "%G", gid);
+ expect(u, "%u", user);
+ expect(u, "%U", uid);
+ expect(u, "%h", home);
+ expect(u, "%m", mid);
+ expect(u, "%b", bid);
+ expect(u, "%H", host);
+ expect(u, "%t", "/run/user/*");
+
+ /* templated */
+ assert_se(u = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(u, "blah@foo-foo.service") == 0);
+ assert_se(unit_add_name(u, "blah@foo-foo.service") == 0);
+
+ expect(u, "%n", "blah@foo-foo.service");
+ expect(u, "%N", "blah@foo-foo");
+ expect(u, "%f", "/foo/foo");
+ expect(u, "%p", "blah");
+ expect(u, "%P", "blah");
+ expect(u, "%i", "foo-foo");
+ expect(u, "%I", "foo/foo");
+ expect(u, "%j", "blah");
+ expect(u, "%J", "blah");
+ expect(u, "%g", group);
+ expect(u, "%G", gid);
+ expect(u, "%u", user);
+ expect(u, "%U", uid);
+ expect(u, "%h", home);
+ expect(u, "%m", mid);
+ expect(u, "%b", bid);
+ expect(u, "%H", host);
+ expect(u, "%t", "/run/user/*");
+
+ /* templated with components */
+ assert_se(u = unit_new(m, sizeof(Slice)));
+ assert_se(unit_add_name(u, "blah-blah\\x2d.slice") == 0);
+
+ expect(u, "%n", "blah-blah\\x2d.slice");
+ expect(u, "%N", "blah-blah\\x2d");
+ expect(u, "%f", "/blah/blah-");
+ expect(u, "%p", "blah-blah\\x2d");
+ expect(u, "%P", "blah/blah-");
+ expect(u, "%i", "");
+ expect(u, "%I", "");
+ expect(u, "%j", "blah\\x2d");
+ expect(u, "%J", "blah-");
+
+#undef expect
+
+ return 0;
+}
+
+static void test_unit_instance_is_valid(void) {
+ assert_se(unit_instance_is_valid("fooBar"));
+ assert_se(unit_instance_is_valid("foo-bar"));
+ assert_se(unit_instance_is_valid("foo.stUff"));
+ assert_se(unit_instance_is_valid("fOo123.stuff"));
+ assert_se(unit_instance_is_valid("@f_oo123.Stuff"));
+
+ assert_se(!unit_instance_is_valid("$¢£"));
+ assert_se(!unit_instance_is_valid(""));
+ assert_se(!unit_instance_is_valid("foo bar"));
+ assert_se(!unit_instance_is_valid("foo/bar"));
+}
+
+static void test_unit_prefix_is_valid(void) {
+ assert_se(unit_prefix_is_valid("fooBar"));
+ assert_se(unit_prefix_is_valid("foo-bar"));
+ assert_se(unit_prefix_is_valid("foo.stUff"));
+ assert_se(unit_prefix_is_valid("fOo123.stuff"));
+ assert_se(unit_prefix_is_valid("foo123.Stuff"));
+
+ assert_se(!unit_prefix_is_valid("$¢£"));
+ assert_se(!unit_prefix_is_valid(""));
+ assert_se(!unit_prefix_is_valid("foo bar"));
+ assert_se(!unit_prefix_is_valid("foo/bar"));
+ assert_se(!unit_prefix_is_valid("@foo-bar"));
+}
+
+static void test_unit_name_change_suffix(void) {
+ char *t;
+
+ assert_se(unit_name_change_suffix("foo.mount", ".service", &t) == 0);
+ assert_se(streq(t, "foo.service"));
+ free(t);
+
+ assert_se(unit_name_change_suffix("foo@stuff.service", ".socket", &t) == 0);
+ assert_se(streq(t, "foo@stuff.socket"));
+ free(t);
+}
+
+static void test_unit_name_build(void) {
+ char *t;
+
+ assert_se(unit_name_build("foo", "bar", ".service", &t) == 0);
+ assert_se(streq(t, "foo@bar.service"));
+ free(t);
+
+ assert_se(unit_name_build("fo0-stUff_b", "bar", ".mount", &t) == 0);
+ assert_se(streq(t, "fo0-stUff_b@bar.mount"));
+ free(t);
+
+ assert_se(unit_name_build("foo", NULL, ".service", &t) == 0);
+ assert_se(streq(t, "foo.service"));
+ free(t);
+}
+
+static void test_slice_name_is_valid(void) {
+ assert_se( slice_name_is_valid(SPECIAL_ROOT_SLICE));
+ assert_se( slice_name_is_valid("foo.slice"));
+ assert_se( slice_name_is_valid("foo-bar.slice"));
+ assert_se( slice_name_is_valid("foo-bar-baz.slice"));
+ assert_se(!slice_name_is_valid("-foo-bar-baz.slice"));
+ assert_se(!slice_name_is_valid("foo-bar-baz-.slice"));
+ assert_se(!slice_name_is_valid("-foo-bar-baz-.slice"));
+ assert_se(!slice_name_is_valid("foo-bar--baz.slice"));
+ assert_se(!slice_name_is_valid("foo--bar--baz.slice"));
+ assert_se(!slice_name_is_valid(".slice"));
+ assert_se(!slice_name_is_valid(""));
+ assert_se(!slice_name_is_valid("foo.service"));
+
+ assert_se(!slice_name_is_valid("foo@.slice"));
+ assert_se(!slice_name_is_valid("foo@bar.slice"));
+ assert_se(!slice_name_is_valid("foo-bar@baz.slice"));
+ assert_se(!slice_name_is_valid("foo@bar@baz.slice"));
+ assert_se(!slice_name_is_valid("foo@bar-baz.slice"));
+ assert_se(!slice_name_is_valid("-foo-bar-baz@.slice"));
+ assert_se(!slice_name_is_valid("foo-bar-baz@-.slice"));
+ assert_se(!slice_name_is_valid("foo-bar-baz@a--b.slice"));
+ assert_se(!slice_name_is_valid("-foo-bar-baz@-.slice"));
+ assert_se(!slice_name_is_valid("foo-bar--baz@.slice"));
+ assert_se(!slice_name_is_valid("foo--bar--baz@.slice"));
+ assert_se(!slice_name_is_valid("@.slice"));
+ assert_se(!slice_name_is_valid("foo@bar.service"));
+}
+
+static void test_build_subslice(void) {
+ char *a;
+ char *b;
+
+ assert_se(slice_build_subslice(SPECIAL_ROOT_SLICE, "foo", &a) >= 0);
+ assert_se(slice_build_subslice(a, "bar", &b) >= 0);
+ free(a);
+ assert_se(slice_build_subslice(b, "barfoo", &a) >= 0);
+ free(b);
+ assert_se(slice_build_subslice(a, "foobar", &b) >= 0);
+ free(a);
+ assert_se(streq(b, "foo-bar-barfoo-foobar.slice"));
+ free(b);
+
+ assert_se(slice_build_subslice("foo.service", "bar", &a) < 0);
+ assert_se(slice_build_subslice("foo", "bar", &a) < 0);
+}
+
+static void test_build_parent_slice_one(const char *name, const char *expect, int ret) {
+ _cleanup_free_ char *s = NULL;
+
+ assert_se(slice_build_parent_slice(name, &s) == ret);
+ assert_se(streq_ptr(s, expect));
+}
+
+static void test_build_parent_slice(void) {
+ test_build_parent_slice_one(SPECIAL_ROOT_SLICE, NULL, 0);
+ test_build_parent_slice_one("foo.slice", SPECIAL_ROOT_SLICE, 1);
+ test_build_parent_slice_one("foo-bar.slice", "foo.slice", 1);
+ test_build_parent_slice_one("foo-bar-baz.slice", "foo-bar.slice", 1);
+ test_build_parent_slice_one("foo-bar--baz.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("-foo-bar.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo-bar-.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo-bar.service", NULL, -EINVAL);
+ test_build_parent_slice_one(".slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo@bar.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo-bar@baz.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo-bar--@baz.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("-foo-bar@bar.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo-bar@-.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo@bar.service", NULL, -EINVAL);
+ test_build_parent_slice_one("@.slice", NULL, -EINVAL);
+}
+
+static void test_unit_name_to_instance(void) {
+ char *instance;
+ int r;
+
+ r = unit_name_to_instance("foo@bar.service", &instance);
+ assert_se(r >= 0);
+ assert_se(streq(instance, "bar"));
+ free(instance);
+
+ r = unit_name_to_instance("foo@.service", &instance);
+ assert_se(r >= 0);
+ assert_se(streq(instance, ""));
+ free(instance);
+
+ r = unit_name_to_instance("fo0-stUff_b@b.service", &instance);
+ assert_se(r >= 0);
+ assert_se(streq(instance, "b"));
+ free(instance);
+
+ r = unit_name_to_instance("foo.service", &instance);
+ assert_se(r == 0);
+ assert_se(!instance);
+
+ r = unit_name_to_instance("fooj@unk", &instance);
+ assert_se(r < 0);
+
+ r = unit_name_to_instance("foo@", &instance);
+ assert_se(r < 0);
+}
+
+static void test_unit_name_escape(void) {
+ _cleanup_free_ char *r;
+
+ r = unit_name_escape("ab+-c.a/bc@foo.service");
+ assert_se(r);
+ assert_se(streq(r, "ab\\x2b\\x2dc.a-bc\\x40foo.service"));
+}
+
+static void test_u_n_t_one(const char *name, const char *expected, int ret) {
+ _cleanup_free_ char *f = NULL;
+
+ assert_se(unit_name_template(name, &f) == ret);
+ printf("got: %s, expected: %s\n", strna(f), strna(expected));
+ assert_se(streq_ptr(f, expected));
+}
+
+static void test_unit_name_template(void) {
+ test_u_n_t_one("foo@bar.service", "foo@.service", 0);
+ test_u_n_t_one("foo.mount", NULL, -EINVAL);
+}
+
+static void test_unit_name_path_unescape_one(const char *name, const char *path, int ret) {
+ _cleanup_free_ char *p = NULL;
+
+ assert_se(unit_name_path_unescape(name, &p) == ret);
+ assert_se(streq_ptr(path, p));
+}
+
+static void test_unit_name_path_unescape(void) {
+
+ test_unit_name_path_unescape_one("foo", "/foo", 0);
+ test_unit_name_path_unescape_one("foo-bar", "/foo/bar", 0);
+ test_unit_name_path_unescape_one("foo-.bar", "/foo/.bar", 0);
+ test_unit_name_path_unescape_one("foo-bar-baz", "/foo/bar/baz", 0);
+ test_unit_name_path_unescape_one("-", "/", 0);
+ test_unit_name_path_unescape_one("--", NULL, -EINVAL);
+ test_unit_name_path_unescape_one("-foo-bar", NULL, -EINVAL);
+ test_unit_name_path_unescape_one("foo--bar", NULL, -EINVAL);
+ test_unit_name_path_unescape_one("foo-bar-", NULL, -EINVAL);
+ test_unit_name_path_unescape_one(".-bar", NULL, -EINVAL);
+ test_unit_name_path_unescape_one("foo-..", NULL, -EINVAL);
+ test_unit_name_path_unescape_one("", NULL, -EINVAL);
+}
+
+static void test_unit_name_to_prefix_one(const char *input, int ret, const char *output) {
+ _cleanup_free_ char *k = NULL;
+
+ assert_se(unit_name_to_prefix(input, &k) == ret);
+ assert_se(streq_ptr(k, output));
+}
+
+static void test_unit_name_to_prefix(void) {
+ test_unit_name_to_prefix_one("foobar.service", 0, "foobar");
+ test_unit_name_to_prefix_one("", -EINVAL, NULL);
+ test_unit_name_to_prefix_one("foobar", -EINVAL, NULL);
+ test_unit_name_to_prefix_one(".service", -EINVAL, NULL);
+ test_unit_name_to_prefix_one("quux.quux", -EINVAL, NULL);
+ test_unit_name_to_prefix_one("quux.mount", 0, "quux");
+ test_unit_name_to_prefix_one("quux-quux.mount", 0, "quux-quux");
+ test_unit_name_to_prefix_one("quux@bar.mount", 0, "quux");
+ test_unit_name_to_prefix_one("quux-@.mount", 0, "quux-");
+ test_unit_name_to_prefix_one("@.mount", -EINVAL, NULL);
+}
+
+static void test_unit_name_from_dbus_path_one(const char *input, int ret, const char *output) {
+ _cleanup_free_ char *k = NULL;
+
+ assert_se(unit_name_from_dbus_path(input, &k) == ret);
+ assert_se(streq_ptr(k, output));
+}
+
+static void test_unit_name_from_dbus_path(void) {
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dbus_2esocket", 0, "dbus.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/_2d_2emount", 0, "-.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/_2d_2eslice", 0, "-.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/accounts_2ddaemon_2eservice", 0, "accounts-daemon.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/auditd_2eservice", 0, "auditd.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/basic_2etarget", 0, "basic.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/bluetooth_2etarget", 0, "bluetooth.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/boot_2eautomount", 0, "boot.automount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/boot_2emount", 0, "boot.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/btrfs_2emount", 0, "btrfs.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/cryptsetup_2dpre_2etarget", 0, "cryptsetup-pre.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/cryptsetup_2etarget", 0, "cryptsetup.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dbus_2eservice", 0, "dbus.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dbus_2esocket", 0, "dbus.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dcdrom_2edevice", 0, "dev-cdrom.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dINTEL_5fSSDSA2M120G2GC_5fCVPO044405HH120QGN_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dINTEL_SSDSA2M120G2GC_CVPO044405HH120QGN.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dINTEL_5fSSDSA2M120G2GC_5fCVPO044405HH120QGN_5cx2dpart1_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dINTEL_SSDSA2M120G2GC_CVPO044405HH120QGN\\x2dpart1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dINTEL_5fSSDSA2M160G2GC_5fCVPO951003RY160AGN_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dINTEL_SSDSA2M160G2GC_CVPO951003RY160AGN.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dINTEL_5fSSDSA2M160G2GC_5fCVPO951003RY160AGN_5cx2dpart1_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dINTEL_SSDSA2M160G2GC_CVPO951003RY160AGN\\x2dpart1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dINTEL_5fSSDSA2M160G2GC_5fCVPO951003RY160AGN_5cx2dpart2_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dINTEL_SSDSA2M160G2GC_CVPO951003RY160AGN\\x2dpart2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dINTEL_5fSSDSA2M160G2GC_5fCVPO951003RY160AGN_5cx2dpart3_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dINTEL_SSDSA2M160G2GC_CVPO951003RY160AGN\\x2dpart3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dTSSTcorp_5fCDDVDW_5fTS_5cx2dL633C_5fR6176GLZB14646_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dTSSTcorp_CDDVDW_TS\\x2dL633C_R6176GLZB14646.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2dwwn_5cx2d0x50015179591245ae_2edevice", 0, "dev-disk-by\\x2did-wwn\\x2d0x50015179591245ae.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2dwwn_5cx2d0x50015179591245ae_5cx2dpart1_2edevice", 0, "dev-disk-by\\x2did-wwn\\x2d0x50015179591245ae\\x2dpart1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2dwwn_5cx2d0x50015179591245ae_5cx2dpart2_2edevice", 0, "dev-disk-by\\x2did-wwn\\x2d0x50015179591245ae\\x2dpart2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2dwwn_5cx2d0x50015179591245ae_5cx2dpart3_2edevice", 0, "dev-disk-by\\x2did-wwn\\x2d0x50015179591245ae\\x2dpart3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2dwwn_5cx2d0x500151795946eab5_2edevice", 0, "dev-disk-by\\x2did-wwn\\x2d0x500151795946eab5.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2dwwn_5cx2d0x500151795946eab5_5cx2dpart1_2edevice", 0, "dev-disk-by\\x2did-wwn\\x2d0x500151795946eab5\\x2dpart1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dlabel_2d_5cxe3_5cx82_5cxb7_5cxe3_5cx82_5cxb9_5cxe3_5cx83_5cx86_5cxe3_5cx83_5cxa0_5cxe3_5cx81_5cxa7_5cxe4_5cxba_5cx88_5cxe7_5cxb4_5cx84_5cxe6_5cxb8_5cx88_5cxe3_5cx81_5cxbf_2edevice", 0, "dev-disk-by\\x2dlabel-\\xe3\\x82\\xb7\\xe3\\x82\\xb9\\xe3\\x83\\x86\\xe3\\x83\\xa0\\xe3\\x81\\xa7\\xe4\\xba\\x88\\xe7\\xb4\\x84\\xe6\\xb8\\x88\\xe3\\x81\\xbf.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpartuuid_2d59834e50_5cx2d01_2edevice", 0, "dev-disk-by\\x2dpartuuid-59834e50\\x2d01.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpartuuid_2d63e2a7b3_5cx2d01_2edevice", 0, "dev-disk-by\\x2dpartuuid-63e2a7b3\\x2d01.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpartuuid_2d63e2a7b3_5cx2d02_2edevice", 0, "dev-disk-by\\x2dpartuuid-63e2a7b3\\x2d02.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpartuuid_2d63e2a7b3_5cx2d03_2edevice", 0, "dev-disk-by\\x2dpartuuid-63e2a7b3\\x2d03.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d1_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d1_5cx2dpart1_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d1\\x2dpart1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d1_5cx2dpart2_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d1\\x2dpart2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d1_5cx2dpart3_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d1\\x2dpart3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d2_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d6_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d6.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d6_5cx2dpart1_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d6\\x2dpart1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2duuid_2d1A34E3F034E3CD37_2edevice", 0, "dev-disk-by\\x2duuid-1A34E3F034E3CD37.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2duuid_2dB670EBFE70EBC2EB_2edevice", 0, "dev-disk-by\\x2duuid-B670EBFE70EBC2EB.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2duuid_2dFCD4F509D4F4C6C4_2edevice", 0, "dev-disk-by\\x2duuid-FCD4F509D4F4C6C4.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2duuid_2db49ead57_5cx2d907c_5cx2d446c_5cx2db405_5cx2d5ca6cd865f5e_2edevice", 0, "dev-disk-by\\x2duuid-b49ead57\\x2d907c\\x2d446c\\x2db405\\x2d5ca6cd865f5e.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dhugepages_2emount", 0, "dev-hugepages.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dmqueue_2emount", 0, "dev-mqueue.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2drfkill_2edevice", 0, "dev-rfkill.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsda1_2edevice", 0, "dev-sda1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsda2_2edevice", 0, "dev-sda2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsda3_2edevice", 0, "dev-sda3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsda_2edevice", 0, "dev-sda.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsdb1_2edevice", 0, "dev-sdb1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsdb_2edevice", 0, "dev-sdb.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsr0_2edevice", 0, "dev-sr0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS0_2edevice", 0, "dev-ttyS0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS10_2edevice", 0, "dev-ttyS10.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS11_2edevice", 0, "dev-ttyS11.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS12_2edevice", 0, "dev-ttyS12.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS13_2edevice", 0, "dev-ttyS13.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS14_2edevice", 0, "dev-ttyS14.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS15_2edevice", 0, "dev-ttyS15.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS16_2edevice", 0, "dev-ttyS16.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS17_2edevice", 0, "dev-ttyS17.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS18_2edevice", 0, "dev-ttyS18.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS19_2edevice", 0, "dev-ttyS19.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS1_2edevice", 0, "dev-ttyS1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS20_2edevice", 0, "dev-ttyS20.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS21_2edevice", 0, "dev-ttyS21.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS22_2edevice", 0, "dev-ttyS22.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS23_2edevice", 0, "dev-ttyS23.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS24_2edevice", 0, "dev-ttyS24.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS25_2edevice", 0, "dev-ttyS25.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS26_2edevice", 0, "dev-ttyS26.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS27_2edevice", 0, "dev-ttyS27.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS28_2edevice", 0, "dev-ttyS28.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS29_2edevice", 0, "dev-ttyS29.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS2_2edevice", 0, "dev-ttyS2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS30_2edevice", 0, "dev-ttyS30.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS31_2edevice", 0, "dev-ttyS31.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS3_2edevice", 0, "dev-ttyS3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS4_2edevice", 0, "dev-ttyS4.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS5_2edevice", 0, "dev-ttyS5.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS6_2edevice", 0, "dev-ttyS6.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS7_2edevice", 0, "dev-ttyS7.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS8_2edevice", 0, "dev-ttyS8.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS9_2edevice", 0, "dev-ttyS9.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dcmdline_2eservice", 0, "dracut-cmdline.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dinitqueue_2eservice", 0, "dracut-initqueue.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dmount_2eservice", 0, "dracut-mount.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dpre_2dmount_2eservice", 0, "dracut-pre-mount.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dpre_2dpivot_2eservice", 0, "dracut-pre-pivot.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dpre_2dtrigger_2eservice", 0, "dracut-pre-trigger.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dpre_2dudev_2eservice", 0, "dracut-pre-udev.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dshutdown_2eservice", 0, "dracut-shutdown.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/ebtables_2eservice", 0, "ebtables.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/emergency_2eservice", 0, "emergency.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/emergency_2etarget", 0, "emergency.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/fedora_2dimport_2dstate_2eservice", 0, "fedora-import-state.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/fedora_2dreadonly_2eservice", 0, "fedora-readonly.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/firewalld_2eservice", 0, "firewalld.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/getty_2dpre_2etarget", 0, "getty-pre.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/getty_2etarget", 0, "getty.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/getty_40tty1_2eservice", 0, "getty@tty1.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/graphical_2etarget", 0, "graphical.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/home_2emount", 0, "home.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/init_2escope", 0, "init.scope");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2dcleanup_2eservice", 0, "initrd-cleanup.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2dfs_2etarget", 0, "initrd-fs.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2dparse_2detc_2eservice", 0, "initrd-parse-etc.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2droot_2ddevice_2etarget", 0, "initrd-root-device.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2droot_2dfs_2etarget", 0, "initrd-root-fs.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2dswitch_2droot_2eservice", 0, "initrd-switch-root.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2dswitch_2droot_2etarget", 0, "initrd-switch-root.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2dudevadm_2dcleanup_2ddb_2eservice", 0, "initrd-udevadm-cleanup-db.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2etarget", 0, "initrd.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/ip6tables_2eservice", 0, "ip6tables.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/ipset_2eservice", 0, "ipset.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/iptables_2eservice", 0, "iptables.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/irqbalance_2eservice", 0, "irqbalance.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/kmod_2dstatic_2dnodes_2eservice", 0, "kmod-static-nodes.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/ldconfig_2eservice", 0, "ldconfig.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/lightdm_2eservice", 0, "lightdm.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/livesys_2dlate_2eservice", 0, "livesys-late.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/lm_5fsensors_2eservice", 0, "lm_sensors.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/local_2dfs_2dpre_2etarget", 0, "local-fs-pre.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/local_2dfs_2etarget", 0, "local-fs.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/machines_2etarget", 0, "machines.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/mcelog_2eservice", 0, "mcelog.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/multi_2duser_2etarget", 0, "multi-user.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/network_2dpre_2etarget", 0, "network-pre.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/network_2etarget", 0, "network.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/nss_2dlookup_2etarget", 0, "nss-lookup.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/nss_2duser_2dlookup_2etarget", 0, "nss-user-lookup.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/paths_2etarget", 0, "paths.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/plymouth_2dquit_2dwait_2eservice", 0, "plymouth-quit-wait.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/plymouth_2dquit_2eservice", 0, "plymouth-quit.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/plymouth_2dstart_2eservice", 0, "plymouth-start.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/polkit_2eservice", 0, "polkit.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/proc_2dsys_2dfs_2dbinfmt_5fmisc_2eautomount", 0, "proc-sys-fs-binfmt_misc.automount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/proc_2dsys_2dfs_2dbinfmt_5fmisc_2emount", 0, "proc-sys-fs-binfmt_misc.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/rc_2dlocal_2eservice", 0, "rc-local.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/remote_2dcryptsetup_2etarget", 0, "remote-cryptsetup.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/remote_2dfs_2dpre_2etarget", 0, "remote-fs-pre.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/remote_2dfs_2etarget", 0, "remote-fs.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/rescue_2eservice", 0, "rescue.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/rescue_2etarget", 0, "rescue.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/run_2duser_2d1000_2emount", 0, "run-user-1000.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/session_2d2_2escope", 0, "session-2.scope");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/shutdown_2etarget", 0, "shutdown.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/slices_2etarget", 0, "slices.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/smartd_2eservice", 0, "smartd.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sockets_2etarget", 0, "sockets.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sound_2etarget", 0, "sound.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sshd_2dkeygen_2etarget", 0, "sshd-keygen.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sshd_2dkeygen_40ecdsa_2eservice", 0, "sshd-keygen@ecdsa.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sshd_2dkeygen_40ed25519_2eservice", 0, "sshd-keygen@ed25519.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sshd_2dkeygen_40rsa_2eservice", 0, "sshd-keygen@rsa.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sshd_2eservice", 0, "sshd.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/swap_2etarget", 0, "swap.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a02_2e0_2dbacklight_2dacpi_5fvideo0_2edevice", 0, "sys-devices-pci0000:00-0000:00:02.0-backlight-acpi_video0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a02_2e0_2ddrm_2dcard0_2dcard0_5cx2dLVDS_5cx2d1_2dintel_5fbacklight_2edevice", 0, "sys-devices-pci0000:00-0000:00:02.0-drm-card0-card0\\x2dLVDS\\x2d1-intel_backlight.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1a_2e0_2dusb1_2d1_5cx2d1_2d1_5cx2d1_2e6_2d1_5cx2d1_2e6_3a1_2e0_2dbluetooth_2dhci0_2edevice", 0, "sys-devices-pci0000:00-0000:00:1a.0-usb1-1\\x2d1-1\\x2d1.6-1\\x2d1.6:1.0-bluetooth-hci0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1b_2e0_2dsound_2dcard0_2edevice", 0, "sys-devices-pci0000:00-0000:00:1b.0-sound-card0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1c_2e0_2d0000_3a02_3a00_2e0_2dnet_2dwlp2s0_2edevice", 0, "sys-devices-pci0000:00-0000:00:1c.0-0000:02:00.0-net-wlp2s0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1c_2e2_2d0000_3a04_3a00_2e0_2dnet_2denp4s0_2edevice", 0, "sys-devices-pci0000:00-0000:00:1c.2-0000:04:00.0-net-enp4s0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data1_2dhost0_2dtarget0_3a0_3a0_2d0_3a0_3a0_3a0_2dblock_2dsda_2dsda1_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata1-host0-target0:0:0-0:0:0:0-block-sda-sda1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data1_2dhost0_2dtarget0_3a0_3a0_2d0_3a0_3a0_3a0_2dblock_2dsda_2dsda2_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata1-host0-target0:0:0-0:0:0:0-block-sda-sda2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data1_2dhost0_2dtarget0_3a0_3a0_2d0_3a0_3a0_3a0_2dblock_2dsda_2dsda3_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata1-host0-target0:0:0-0:0:0:0-block-sda-sda3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data1_2dhost0_2dtarget0_3a0_3a0_2d0_3a0_3a0_3a0_2dblock_2dsda_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata1-host0-target0:0:0-0:0:0:0-block-sda.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data2_2dhost1_2dtarget1_3a0_3a0_2d1_3a0_3a0_3a0_2dblock_2dsr0_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata2-host1-target1:0:0-1:0:0:0-block-sr0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data6_2dhost5_2dtarget5_3a0_3a0_2d5_3a0_3a0_3a0_2dblock_2dsdb_2dsdb1_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata6-host5-target5:0:0-5:0:0:0-block-sdb-sdb1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data6_2dhost5_2dtarget5_3a0_3a0_2d5_3a0_3a0_3a0_2dblock_2dsdb_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata6-host5-target5:0:0-5:0:0:0-block-sdb.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS0_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS10_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS10.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS11_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS11.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS12_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS12.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS13_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS13.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS14_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS14.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS15_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS15.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS16_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS16.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS17_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS17.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS18_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS18.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS19_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS19.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS1_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS20_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS20.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS21_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS21.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS22_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS22.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS23_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS23.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS24_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS24.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS25_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS25.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS26_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS26.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS27_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS27.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS28_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS28.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS29_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS29.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS2_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS30_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS30.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS31_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS31.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS3_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS4_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS4.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS5_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS5.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS6_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS6.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS7_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS7.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS8_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS8.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS9_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS9.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dvirtual_2dmisc_2drfkill_2edevice", 0, "sys-devices-virtual-misc-rfkill.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dfs_2dfuse_2dconnections_2emount", 0, "sys-fs-fuse-connections.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dkernel_2dconfig_2emount", 0, "sys-kernel-config.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dkernel_2ddebug_2emount", 0, "sys-kernel-debug.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dmodule_2dconfigfs_2edevice", 0, "sys-module-configfs.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dsubsystem_2dbluetooth_2ddevices_2dhci0_2edevice", 0, "sys-subsystem-bluetooth-devices-hci0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dsubsystem_2dnet_2ddevices_2denp4s0_2edevice", 0, "sys-subsystem-net-devices-enp4s0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dsubsystem_2dnet_2ddevices_2dwlp2s0_2edevice", 0, "sys-subsystem-net-devices-wlp2s0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sysinit_2etarget", 0, "sysinit.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/syslog_2eservice", 0, "syslog.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/syslog_2esocket", 0, "syslog.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/syslog_2etarget", 0, "syslog.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sysroot_2emount", 0, "sysroot.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/system_2dgetty_2eslice", 0, "system-getty.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/system_2dsshd_5cx2dkeygen_2eslice", 0, "system-sshd\\x2dkeygen.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/system_2dsystemd_5cx2dbacklight_2eslice", 0, "system-systemd\\x2dbacklight.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/system_2dsystemd_5cx2dcoredump_2eslice", 0, "system-systemd\\x2dcoredump.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/system_2duser_5cx2druntime_5cx2ddir_2eslice", 0, "system-user\\x2druntime\\x2ddir.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/system_2eslice", 0, "system.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dask_2dpassword_2dconsole_2epath", 0, "systemd-ask-password-console.path");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dask_2dpassword_2dconsole_2eservice", 0, "systemd-ask-password-console.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dask_2dpassword_2dwall_2epath", 0, "systemd-ask-password-wall.path");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dask_2dpassword_2dwall_2eservice", 0, "systemd-ask-password-wall.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dbacklight_40backlight_3aacpi_5fvideo0_2eservice", 0, "systemd-backlight@backlight:acpi_video0.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dbacklight_40backlight_3aintel_5fbacklight_2eservice", 0, "systemd-backlight@backlight:intel_backlight.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dbinfmt_2eservice", 0, "systemd-binfmt.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dcoredump_2esocket", 0, "systemd-coredump.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dcoredump_400_2eservice", 0, "systemd-coredump@0.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dfirstboot_2eservice", 0, "systemd-firstboot.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dfsck_2droot_2eservice", 0, "systemd-fsck-root.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dhwdb_2dupdate_2eservice", 0, "systemd-hwdb-update.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dinitctl_2eservice", 0, "systemd-initctl.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dinitctl_2esocket", 0, "systemd-initctl.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2djournal_2dcatalog_2dupdate_2eservice", 0, "systemd-journal-catalog-update.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2djournal_2dflush_2eservice", 0, "systemd-journal-flush.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2djournald_2daudit_2esocket", 0, "systemd-journald-audit.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2djournald_2ddev_2dlog_2esocket", 0, "systemd-journald-dev-log.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2djournald_2eservice", 0, "systemd-journald.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2djournald_2esocket", 0, "systemd-journald.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dlogind_2eservice", 0, "systemd-logind.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dmachine_2did_2dcommit_2eservice", 0, "systemd-machine-id-commit.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dmodules_2dload_2eservice", 0, "systemd-modules-load.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dnetworkd_2eservice", 0, "systemd-networkd.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dnetworkd_2esocket", 0, "systemd-networkd.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2drandom_2dseed_2eservice", 0, "systemd-random-seed.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dremount_2dfs_2eservice", 0, "systemd-remount-fs.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dresolved_2eservice", 0, "systemd-resolved.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2drfkill_2eservice", 0, "systemd-rfkill.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2drfkill_2esocket", 0, "systemd-rfkill.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dsysctl_2eservice", 0, "systemd-sysctl.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dsysusers_2eservice", 0, "systemd-sysusers.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dtimesyncd_2eservice", 0, "systemd-timesyncd.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dtmpfiles_2dclean_2eservice", 0, "systemd-tmpfiles-clean.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dtmpfiles_2dclean_2etimer", 0, "systemd-tmpfiles-clean.timer");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dtmpfiles_2dsetup_2ddev_2eservice", 0, "systemd-tmpfiles-setup-dev.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dtmpfiles_2dsetup_2eservice", 0, "systemd-tmpfiles-setup.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dudev_2dtrigger_2eservice", 0, "systemd-udev-trigger.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dudevd_2dcontrol_2esocket", 0, "systemd-udevd-control.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dudevd_2dkernel_2esocket", 0, "systemd-udevd-kernel.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dudevd_2eservice", 0, "systemd-udevd.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dupdate_2ddone_2eservice", 0, "systemd-update-done.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dupdate_2dutmp_2drunlevel_2eservice", 0, "systemd-update-utmp-runlevel.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dupdate_2dutmp_2eservice", 0, "systemd-update-utmp.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2duser_2dsessions_2eservice", 0, "systemd-user-sessions.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dvconsole_2dsetup_2eservice", 0, "systemd-vconsole-setup.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/time_2dsync_2etarget", 0, "time-sync.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/timers_2etarget", 0, "timers.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/tmp_2emount", 0, "tmp.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/umount_2etarget", 0, "umount.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/unbound_2danchor_2eservice", 0, "unbound-anchor.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/unbound_2danchor_2etimer", 0, "unbound-anchor.timer");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/upower_2eservice", 0, "upower.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/user_2d1000_2eslice", 0, "user-1000.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/user_2druntime_2ddir_401000_2eservice", 0, "user-runtime-dir@1000.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/user_2eslice", 0, "user.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/user_401000_2eservice", 0, "user@1000.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/usr_2dlocal_2dtexlive_2emount", 0, "usr-local-texlive.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/var_2dlib_2dmachines_2emount", 0, "var-lib-machines.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/wpa_5fsupplicant_2eservice", 0, "wpa_supplicant.service");
+}
+
+int main(int argc, char* argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ int r, rc = 0;
+
+ test_setup_logging(LOG_INFO);
+
+ r = enter_cgroup_subroot();
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ test_unit_name_is_valid();
+ test_unit_name_replace_instance();
+ test_unit_name_from_path();
+ test_unit_name_from_path_instance();
+ test_unit_name_mangle();
+ test_unit_name_to_path();
+ TEST_REQ_RUNNING_SYSTEMD(rc = test_unit_printf());
+ test_unit_instance_is_valid();
+ test_unit_prefix_is_valid();
+ test_unit_name_change_suffix();
+ test_unit_name_build();
+ test_slice_name_is_valid();
+ test_build_subslice();
+ test_build_parent_slice();
+ test_unit_name_to_instance();
+ test_unit_name_escape();
+ test_unit_name_template();
+ test_unit_name_path_unescape();
+ test_unit_name_to_prefix();
+ test_unit_name_from_dbus_path();
+
+ return rc;
+}
diff --git a/src/test/test-user-util.c b/src/test/test-user-util.c
new file mode 100644
index 0000000..801824a
--- /dev/null
+++ b/src/test/test-user-util.c
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "path-util.h"
+
+static void test_uid_to_name_one(uid_t uid, const char *name) {
+ _cleanup_free_ char *t = NULL;
+
+ log_info("/* %s("UID_FMT", \"%s\") */", __func__, uid, name);
+
+ assert_se(t = uid_to_name(uid));
+ if (!synthesize_nobody() && streq(name, NOBODY_USER_NAME)) {
+ log_info("(skipping detailed tests because nobody is not synthesized)");
+ return;
+ }
+ assert_se(streq_ptr(t, name));
+}
+
+static void test_gid_to_name_one(gid_t gid, const char *name) {
+ _cleanup_free_ char *t = NULL;
+
+ log_info("/* %s("GID_FMT", \"%s\") */", __func__, gid, name);
+
+ assert_se(t = gid_to_name(gid));
+ if (!synthesize_nobody() && streq(name, NOBODY_GROUP_NAME)) {
+ log_info("(skipping detailed tests because nobody is not synthesized)");
+ return;
+ }
+ assert_se(streq_ptr(t, name));
+}
+
+static void test_parse_uid(void) {
+ int r;
+ uid_t uid;
+
+ log_info("/* %s */", __func__);
+
+ r = parse_uid("100", &uid);
+ assert_se(r == 0);
+ assert_se(uid == 100);
+
+ r = parse_uid("65535", &uid);
+ assert_se(r == -ENXIO);
+
+ r = parse_uid("asdsdas", &uid);
+ assert_se(r == -EINVAL);
+}
+
+static void test_uid_ptr(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(UID_TO_PTR(0) != NULL);
+ assert_se(UID_TO_PTR(1000) != NULL);
+
+ assert_se(PTR_TO_UID(UID_TO_PTR(0)) == 0);
+ assert_se(PTR_TO_UID(UID_TO_PTR(1000)) == 1000);
+}
+
+static void test_valid_user_group_name(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!valid_user_group_name(NULL));
+ assert_se(!valid_user_group_name(""));
+ assert_se(!valid_user_group_name("1"));
+ assert_se(!valid_user_group_name("65535"));
+ assert_se(!valid_user_group_name("-1"));
+ assert_se(!valid_user_group_name("-kkk"));
+ assert_se(!valid_user_group_name("rööt"));
+ assert_se(!valid_user_group_name("."));
+ assert_se(!valid_user_group_name("eff.eff"));
+ assert_se(!valid_user_group_name("foo\nbar"));
+ assert_se(!valid_user_group_name("0123456789012345678901234567890123456789"));
+ assert_se(!valid_user_group_name_or_id("aaa:bbb"));
+
+ assert_se(valid_user_group_name("root"));
+ assert_se(valid_user_group_name("lennart"));
+ assert_se(valid_user_group_name("LENNART"));
+ assert_se(valid_user_group_name("_kkk"));
+ assert_se(valid_user_group_name("kkk-"));
+ assert_se(valid_user_group_name("kk-k"));
+
+ assert_se(valid_user_group_name("some5"));
+ assert_se(!valid_user_group_name("5some"));
+ assert_se(valid_user_group_name("INNER5NUMBER"));
+}
+
+static void test_valid_user_group_name_or_id(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!valid_user_group_name_or_id(NULL));
+ assert_se(!valid_user_group_name_or_id(""));
+ assert_se(valid_user_group_name_or_id("0"));
+ assert_se(valid_user_group_name_or_id("1"));
+ assert_se(valid_user_group_name_or_id("65534"));
+ assert_se(!valid_user_group_name_or_id("65535"));
+ assert_se(valid_user_group_name_or_id("65536"));
+ assert_se(!valid_user_group_name_or_id("-1"));
+ assert_se(!valid_user_group_name_or_id("-kkk"));
+ assert_se(!valid_user_group_name_or_id("rööt"));
+ assert_se(!valid_user_group_name_or_id("."));
+ assert_se(!valid_user_group_name_or_id("eff.eff"));
+ assert_se(!valid_user_group_name_or_id("foo\nbar"));
+ assert_se(!valid_user_group_name_or_id("0123456789012345678901234567890123456789"));
+ assert_se(!valid_user_group_name_or_id("aaa:bbb"));
+
+ assert_se(valid_user_group_name_or_id("root"));
+ assert_se(valid_user_group_name_or_id("lennart"));
+ assert_se(valid_user_group_name_or_id("LENNART"));
+ assert_se(valid_user_group_name_or_id("_kkk"));
+ assert_se(valid_user_group_name_or_id("kkk-"));
+ assert_se(valid_user_group_name_or_id("kk-k"));
+
+ assert_se(valid_user_group_name_or_id("some5"));
+ assert_se(!valid_user_group_name_or_id("5some"));
+ assert_se(valid_user_group_name_or_id("INNER5NUMBER"));
+}
+
+static void test_valid_gecos(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!valid_gecos(NULL));
+ assert_se(valid_gecos(""));
+ assert_se(valid_gecos("test"));
+ assert_se(valid_gecos("Ümläüt"));
+ assert_se(!valid_gecos("In\nvalid"));
+ assert_se(!valid_gecos("In:valid"));
+}
+
+static void test_valid_home(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!valid_home(NULL));
+ assert_se(!valid_home(""));
+ assert_se(!valid_home("."));
+ assert_se(!valid_home("/home/.."));
+ assert_se(!valid_home("/home/../"));
+ assert_se(!valid_home("/home\n/foo"));
+ assert_se(!valid_home("./piep"));
+ assert_se(!valid_home("piep"));
+ assert_se(!valid_home("/home/user:lennart"));
+
+ assert_se(valid_home("/"));
+ assert_se(valid_home("/home"));
+ assert_se(valid_home("/home/foo"));
+}
+
+static void test_get_user_creds_one(const char *id, const char *name, uid_t uid, gid_t gid, const char *home, const char *shell) {
+ const char *rhome = NULL;
+ const char *rshell = NULL;
+ uid_t ruid = UID_INVALID;
+ gid_t rgid = GID_INVALID;
+ int r;
+
+ log_info("/* %s(\"%s\", \"%s\", "UID_FMT", "GID_FMT", \"%s\", \"%s\") */",
+ __func__, id, name, uid, gid, home, shell);
+
+ r = get_user_creds(&id, &ruid, &rgid, &rhome, &rshell, 0);
+ log_info_errno(r, "got \"%s\", "UID_FMT", "GID_FMT", \"%s\", \"%s\": %m",
+ id, ruid, rgid, strnull(rhome), strnull(rshell));
+ if (!synthesize_nobody() && streq(name, NOBODY_USER_NAME)) {
+ log_info("(skipping detailed tests because nobody is not synthesized)");
+ return;
+ }
+ assert_se(r == 0);
+ assert_se(streq_ptr(id, name));
+ assert_se(ruid == uid);
+ assert_se(rgid == gid);
+ assert_se(path_equal(rhome, home));
+ assert_se(path_equal(rshell, shell));
+}
+
+static void test_get_group_creds_one(const char *id, const char *name, gid_t gid) {
+ gid_t rgid = GID_INVALID;
+ int r;
+
+ log_info("/* %s(\"%s\", \"%s\", "GID_FMT") */", __func__, id, name, gid);
+
+ r = get_group_creds(&id, &rgid, 0);
+ log_info_errno(r, "got \"%s\", "GID_FMT": %m", id, rgid);
+ if (!synthesize_nobody() && streq(name, NOBODY_GROUP_NAME)) {
+ log_info("(skipping detailed tests because nobody is not synthesized)");
+ return;
+ }
+ assert_se(r == 0);
+ assert_se(streq_ptr(id, name));
+ assert_se(rgid == gid);
+}
+
+int main(int argc, char *argv[]) {
+ test_uid_to_name_one(0, "root");
+ test_uid_to_name_one(UID_NOBODY, NOBODY_USER_NAME);
+ test_uid_to_name_one(0xFFFF, "65535");
+ test_uid_to_name_one(0xFFFFFFFF, "4294967295");
+
+ test_gid_to_name_one(0, "root");
+ test_gid_to_name_one(GID_NOBODY, NOBODY_GROUP_NAME);
+ test_gid_to_name_one(TTY_GID, "tty");
+ test_gid_to_name_one(0xFFFF, "65535");
+ test_gid_to_name_one(0xFFFFFFFF, "4294967295");
+
+ test_get_user_creds_one("root", "root", 0, 0, "/root", "/bin/sh");
+ test_get_user_creds_one("0", "root", 0, 0, "/root", "/bin/sh");
+ test_get_user_creds_one(NOBODY_USER_NAME, NOBODY_USER_NAME, UID_NOBODY, GID_NOBODY, "/", "/sbin/nologin");
+ test_get_user_creds_one("65534", NOBODY_USER_NAME, UID_NOBODY, GID_NOBODY, "/", "/sbin/nologin");
+
+ test_get_group_creds_one("root", "root", 0);
+ test_get_group_creds_one("0", "root", 0);
+ test_get_group_creds_one(NOBODY_GROUP_NAME, NOBODY_GROUP_NAME, GID_NOBODY);
+ test_get_group_creds_one("65534", NOBODY_GROUP_NAME, GID_NOBODY);
+
+ test_parse_uid();
+ test_uid_ptr();
+
+ test_valid_user_group_name();
+ test_valid_user_group_name_or_id();
+ test_valid_gecos();
+ test_valid_home();
+
+ return 0;
+}
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
new file mode 100644
index 0000000..9849530
--- /dev/null
+++ b/src/test/test-utf8.c
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+#include "util.h"
+
+static void test_utf8_is_printable(void) {
+ assert_se(utf8_is_printable("ascii is valid\tunicode", 22));
+ assert_se(utf8_is_printable("\342\204\242", 3));
+ assert_se(!utf8_is_printable("\341\204", 2));
+ assert_se(utf8_is_printable("ąę", 4));
+}
+
+static void test_utf8_is_valid(void) {
+ assert_se(utf8_is_valid("ascii is valid unicode"));
+ assert_se(utf8_is_valid("\342\204\242"));
+ assert_se(!utf8_is_valid("\341\204"));
+}
+
+static void test_ascii_is_valid(void) {
+ assert_se( ascii_is_valid("alsdjf\t\vbarr\nba z"));
+ assert_se(!ascii_is_valid("\342\204\242"));
+ assert_se(!ascii_is_valid("\341\204"));
+}
+
+static void test_ascii_is_valid_n(void) {
+ assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 17));
+ assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 16));
+ assert_se(!ascii_is_valid_n("alsdjf\t\vbarr\nba z", 18));
+ assert_se(!ascii_is_valid_n("\342\204\242", 3));
+ assert_se(!ascii_is_valid_n("\342\204\242", 2));
+ assert_se(!ascii_is_valid_n("\342\204\242", 1));
+ assert_se( ascii_is_valid_n("\342\204\242", 0));
+}
+
+static void test_utf8_encoded_valid_unichar(void) {
+ assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3);
+ assert_se(utf8_encoded_valid_unichar("\302\256") == 2);
+ assert_se(utf8_encoded_valid_unichar("a") == 1);
+ assert_se(utf8_encoded_valid_unichar("\341\204") < 0);
+ assert_se(utf8_encoded_valid_unichar("\341\204\341\204") < 0);
+}
+
+static void test_utf8_escaping(void) {
+ _cleanup_free_ char *p1, *p2, *p3;
+
+ p1 = utf8_escape_invalid("goo goo goo");
+ puts(p1);
+ assert_se(utf8_is_valid(p1));
+
+ p2 = utf8_escape_invalid("\341\204\341\204");
+ puts(p2);
+ assert_se(utf8_is_valid(p2));
+
+ p3 = utf8_escape_invalid("\341\204");
+ puts(p3);
+ assert_se(utf8_is_valid(p3));
+}
+
+static void test_utf8_escaping_printable(void) {
+ _cleanup_free_ char *p1, *p2, *p3, *p4, *p5, *p6;
+
+ p1 = utf8_escape_non_printable("goo goo goo");
+ puts(p1);
+ assert_se(utf8_is_valid(p1));
+
+ p2 = utf8_escape_non_printable("\341\204\341\204");
+ puts(p2);
+ assert_se(utf8_is_valid(p2));
+
+ p3 = utf8_escape_non_printable("\341\204");
+ puts(p3);
+ assert_se(utf8_is_valid(p3));
+
+ p4 = utf8_escape_non_printable("ąę\n가너도루\n1234\n\341\204\341\204\n\001 \019\20\a");
+ puts(p4);
+ assert_se(utf8_is_valid(p4));
+
+ p5 = utf8_escape_non_printable("\001 \019\20\a");
+ puts(p5);
+ assert_se(utf8_is_valid(p5));
+
+ p6 = utf8_escape_non_printable("\xef\xbf\x30\x13");
+ puts(p6);
+ assert_se(utf8_is_valid(p6));
+}
+
+static void test_utf16_to_utf8(void) {
+ const char16_t utf16[] = { htole16('a'), htole16(0xd800), htole16('b'), htole16(0xdc00), htole16('c'), htole16(0xd801), htole16(0xdc37) };
+ static const char utf8[] = { 'a', 'b', 'c', 0xf0, 0x90, 0x90, 0xb7 };
+ _cleanup_free_ char16_t *b = NULL;
+ _cleanup_free_ char *a = NULL;
+
+ /* Convert UTF-16 to UTF-8, filtering embedded bad chars */
+ a = utf16_to_utf8(utf16, sizeof(utf16));
+ assert_se(a);
+ assert_se(memcmp(a, utf8, sizeof(utf8)) == 0);
+
+ /* Convert UTF-8 to UTF-16, and back */
+ b = utf8_to_utf16(utf8, sizeof(utf8));
+ assert_se(b);
+
+ free(a);
+ a = utf16_to_utf8(b, char16_strlen(b) * 2);
+ assert_se(a);
+ assert_se(strlen(a) == sizeof(utf8));
+ assert_se(memcmp(a, utf8, sizeof(utf8)) == 0);
+}
+
+static void test_utf8_n_codepoints(void) {
+ assert_se(utf8_n_codepoints("abc") == 3);
+ assert_se(utf8_n_codepoints("zażółcić gęślą jaźń") == 19);
+ assert_se(utf8_n_codepoints("串") == 1);
+ assert_se(utf8_n_codepoints("") == 0);
+ assert_se(utf8_n_codepoints("…👊🔪💐…") == 5);
+ assert_se(utf8_n_codepoints("\xF1") == (size_t) -1);
+}
+
+static void test_utf8_console_width(void) {
+ assert_se(utf8_console_width("abc") == 3);
+ assert_se(utf8_console_width("zażółcić gęślą jaźń") == 19);
+ assert_se(utf8_console_width("串") == 2);
+ assert_se(utf8_console_width("") == 0);
+ assert_se(utf8_console_width("…👊🔪💐…") == 8);
+ assert_se(utf8_console_width("\xF1") == (size_t) -1);
+}
+
+static void test_utf8_to_utf16(void) {
+ const char *p;
+
+ FOREACH_STRING(p,
+ "abc",
+ "zażółcić gęślą jaźń",
+ "串",
+ "",
+ "…👊🔪💐…") {
+
+ _cleanup_free_ char16_t *a = NULL;
+ _cleanup_free_ char *b = NULL;
+
+ a = utf8_to_utf16(p, strlen(p));
+ assert_se(a);
+
+ b = utf16_to_utf8(a, char16_strlen(a) * 2);
+ assert_se(b);
+ assert_se(streq(p, b));
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_utf8_is_valid();
+ test_utf8_is_printable();
+ test_ascii_is_valid();
+ test_ascii_is_valid_n();
+ test_utf8_encoded_valid_unichar();
+ test_utf8_escaping();
+ test_utf8_escaping_printable();
+ test_utf16_to_utf8();
+ test_utf8_n_codepoints();
+ test_utf8_console_width();
+ test_utf8_to_utf16();
+
+ return 0;
+}
diff --git a/src/test/test-util.c b/src/test/test-util.c
new file mode 100644
index 0000000..ffacd65
--- /dev/null
+++ b/src/test/test-util.c
@@ -0,0 +1,421 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "def.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_align_power2(void) {
+ unsigned long i, p2;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(ALIGN_POWER2(0) == 0);
+ assert_se(ALIGN_POWER2(1) == 1);
+ assert_se(ALIGN_POWER2(2) == 2);
+ assert_se(ALIGN_POWER2(3) == 4);
+ assert_se(ALIGN_POWER2(12) == 16);
+
+ assert_se(ALIGN_POWER2(ULONG_MAX) == 0);
+ assert_se(ALIGN_POWER2(ULONG_MAX - 1) == 0);
+ assert_se(ALIGN_POWER2(ULONG_MAX - 1024) == 0);
+ assert_se(ALIGN_POWER2(ULONG_MAX / 2) == ULONG_MAX / 2 + 1);
+ assert_se(ALIGN_POWER2(ULONG_MAX + 1) == 0);
+
+ for (i = 1; i < 131071; ++i) {
+ for (p2 = 1; p2 < i; p2 <<= 1)
+ /* empty */ ;
+
+ assert_se(ALIGN_POWER2(i) == p2);
+ }
+
+ for (i = ULONG_MAX - 1024; i < ULONG_MAX; ++i) {
+ for (p2 = 1; p2 && p2 < i; p2 <<= 1)
+ /* empty */ ;
+
+ assert_se(ALIGN_POWER2(i) == p2);
+ }
+}
+
+static void test_max(void) {
+ static const struct {
+ int a;
+ int b[CONST_MAX(10, 100)];
+ } val1 = {
+ .a = CONST_MAX(10, 100),
+ };
+ int d = 0;
+ unsigned long x = 12345;
+ unsigned long y = 54321;
+ const char str[] = "a_string_constant";
+ const unsigned long long arr[] = {9999ULL, 10ULL, 0ULL, 3000ULL, 2000ULL, 1000ULL, 100ULL, 9999999ULL};
+ void *p = (void *)str;
+ void *q = (void *)&str[16];
+
+ log_info("/* %s */", __func__);
+
+ assert_cc(sizeof(val1.b) == sizeof(int) * 100);
+
+ /* CONST_MAX returns (void) instead of a value if the passed arguments
+ * are not of the same type or not constant expressions. */
+ assert_cc(__builtin_types_compatible_p(typeof(CONST_MAX(1, 10)), int));
+ assert_cc(__builtin_types_compatible_p(typeof(CONST_MAX(1, 1U)), void));
+
+ assert_se(val1.a == 100);
+ assert_se(MAX(++d, 0) == 1);
+ assert_se(d == 1);
+
+ assert_cc(MAXSIZE(char[3], uint16_t) == 3);
+ assert_cc(MAXSIZE(char[3], uint32_t) == 4);
+ assert_cc(MAXSIZE(char, long) == sizeof(long));
+
+ assert_se(MAX(-5, 5) == 5);
+ assert_se(MAX(5, 5) == 5);
+ assert_se(MAX(MAX(1, MAX(2, MAX(3, 4))), 5) == 5);
+ assert_se(MAX(MAX(1, MAX(2, MAX(3, 2))), 1) == 3);
+ assert_se(MAX(MIN(1, MIN(2, MIN(3, 4))), 5) == 5);
+ assert_se(MAX(MAX(1, MIN(2, MIN(3, 2))), 1) == 2);
+ assert_se(LESS_BY(8, 4) == 4);
+ assert_se(LESS_BY(8, 8) == 0);
+ assert_se(LESS_BY(4, 8) == 0);
+ assert_se(LESS_BY(16, LESS_BY(8, 4)) == 12);
+ assert_se(LESS_BY(4, LESS_BY(8, 4)) == 0);
+ assert_se(CMP(3, 5) == -1);
+ assert_se(CMP(5, 3) == 1);
+ assert_se(CMP(5, 5) == 0);
+ assert_se(CMP(x, y) == -1);
+ assert_se(CMP(y, x) == 1);
+ assert_se(CMP(x, x) == 0);
+ assert_se(CMP(y, y) == 0);
+ assert_se(CMP(UINT64_MAX, (uint64_t) 0) == 1);
+ assert_se(CMP((uint64_t) 0, UINT64_MAX) == -1);
+ assert_se(CMP(UINT64_MAX, UINT64_MAX) == 0);
+ assert_se(CMP(INT64_MIN, INT64_MAX) == -1);
+ assert_se(CMP(INT64_MAX, INT64_MIN) == 1);
+ assert_se(CMP(INT64_MAX, INT64_MAX) == 0);
+ assert_se(CMP(INT64_MIN, INT64_MIN) == 0);
+ assert_se(CMP(INT64_MAX, (int64_t) 0) == 1);
+ assert_se(CMP((int64_t) 0, INT64_MIN) == 1);
+ assert_se(CMP(INT64_MIN, (int64_t) 0) == -1);
+ assert_se(CMP((int64_t) 0, INT64_MAX) == -1);
+ assert_se(CMP(&str[2], &str[7]) == -1);
+ assert_se(CMP(&str[2], &str[2]) == 0);
+ assert_se(CMP(&str[7], (const char *)str) == 1);
+ assert_se(CMP(str[2], str[7]) == 1);
+ assert_se(CMP(str[7], *str) == 1);
+ assert_se(CMP((const unsigned long long *)arr, &arr[3]) == -1);
+ assert_se(CMP(*arr, arr[3]) == 1);
+ assert_se(CMP(p, q) == -1);
+ assert_se(CMP(q, p) == 1);
+ assert_se(CMP(p, p) == 0);
+ assert_se(CMP(q, q) == 0);
+ assert_se(CLAMP(-5, 0, 1) == 0);
+ assert_se(CLAMP(5, 0, 1) == 1);
+ assert_se(CLAMP(5, -10, 1) == 1);
+ assert_se(CLAMP(5, -10, 10) == 5);
+ assert_se(CLAMP(CLAMP(0, -10, 10), CLAMP(-5, 10, 20), CLAMP(100, -5, 20)) == 10);
+}
+
+#pragma GCC diagnostic push
+#ifdef __clang__
+# pragma GCC diagnostic ignored "-Waddress-of-packed-member"
+#endif
+
+static void test_container_of(void) {
+ struct mytype {
+ uint8_t pad1[3];
+ uint64_t v1;
+ uint8_t pad2[2];
+ uint32_t v2;
+ } myval = { };
+
+ log_info("/* %s */", __func__);
+
+ assert_cc(sizeof(myval) >= 17);
+ assert_se(container_of(&myval.v1, struct mytype, v1) == &myval);
+ assert_se(container_of(&myval.v2, struct mytype, v2) == &myval);
+ assert_se(container_of(&container_of(&myval.v2,
+ struct mytype,
+ v2)->v1,
+ struct mytype,
+ v1) == &myval);
+}
+
+#pragma GCC diagnostic pop
+
+static void test_div_round_up(void) {
+ int div;
+
+ log_info("/* %s */", __func__);
+
+ /* basic tests */
+ assert_se(DIV_ROUND_UP(0, 8) == 0);
+ assert_se(DIV_ROUND_UP(1, 8) == 1);
+ assert_se(DIV_ROUND_UP(8, 8) == 1);
+ assert_se(DIV_ROUND_UP(12, 8) == 2);
+ assert_se(DIV_ROUND_UP(16, 8) == 2);
+
+ /* test multiple evaluation */
+ div = 0;
+ assert_se(DIV_ROUND_UP(div++, 8) == 0 && div == 1);
+ assert_se(DIV_ROUND_UP(++div, 8) == 1 && div == 2);
+ assert_se(DIV_ROUND_UP(8, div++) == 4 && div == 3);
+ assert_se(DIV_ROUND_UP(8, ++div) == 2 && div == 4);
+
+ /* overflow test with exact division */
+ assert_se(sizeof(0U) == 4);
+ assert_se(0xfffffffaU % 10U == 0U);
+ assert_se(0xfffffffaU / 10U == 429496729U);
+ assert_se(DIV_ROUND_UP(0xfffffffaU, 10U) == 429496729U);
+ assert_se((0xfffffffaU + 10U - 1U) / 10U == 0U);
+ assert_se(0xfffffffaU / 10U + !!(0xfffffffaU % 10U) == 429496729U);
+
+ /* overflow test with rounded division */
+ assert_se(0xfffffffdU % 10U == 3U);
+ assert_se(0xfffffffdU / 10U == 429496729U);
+ assert_se(DIV_ROUND_UP(0xfffffffdU, 10U) == 429496730U);
+ assert_se((0xfffffffdU + 10U - 1U) / 10U == 0U);
+ assert_se(0xfffffffdU / 10U + !!(0xfffffffdU % 10U) == 429496730U);
+}
+
+static void test_u64log2(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(u64log2(0) == 0);
+ assert_se(u64log2(8) == 3);
+ assert_se(u64log2(9) == 3);
+ assert_se(u64log2(15) == 3);
+ assert_se(u64log2(16) == 4);
+ assert_se(u64log2(1024*1024) == 20);
+ assert_se(u64log2(1024*1024+5) == 20);
+}
+
+static void test_protect_errno(void) {
+ log_info("/* %s */", __func__);
+
+ errno = 12;
+ {
+ PROTECT_ERRNO;
+ errno = 11;
+ }
+ assert_se(errno == 12);
+}
+
+static void test_unprotect_errno_inner_function(void) {
+ PROTECT_ERRNO;
+
+ errno = 2222;
+}
+
+static void test_unprotect_errno(void) {
+ log_info("/* %s */", __func__);
+
+ errno = 4711;
+
+ PROTECT_ERRNO;
+
+ errno = 815;
+
+ UNPROTECT_ERRNO;
+
+ assert_se(errno == 4711);
+
+ test_unprotect_errno_inner_function();
+
+ assert_se(errno == 4711);
+}
+
+static void test_in_set(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(IN_SET(1, 1));
+ assert_se(IN_SET(1, 1, 2, 3, 4));
+ assert_se(IN_SET(2, 1, 2, 3, 4));
+ assert_se(IN_SET(3, 1, 2, 3, 4));
+ assert_se(IN_SET(4, 1, 2, 3, 4));
+ assert_se(!IN_SET(0, 1));
+ assert_se(!IN_SET(0, 1, 2, 3, 4));
+}
+
+static void test_log2i(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(log2i(1) == 0);
+ assert_se(log2i(2) == 1);
+ assert_se(log2i(3) == 1);
+ assert_se(log2i(4) == 2);
+ assert_se(log2i(32) == 5);
+ assert_se(log2i(33) == 5);
+ assert_se(log2i(63) == 5);
+ assert_se(log2i(INT_MAX) == sizeof(int)*8-2);
+}
+
+static void test_eqzero(void) {
+ const uint32_t zeros[] = {0, 0, 0};
+ const uint32_t ones[] = {1, 1};
+ const uint32_t mixed[] = {0, 1, 0, 0, 0};
+ const uint8_t longer[] = {[55] = 255};
+
+ log_info("/* %s */", __func__);
+
+ assert_se(eqzero(zeros));
+ assert_se(!eqzero(ones));
+ assert_se(!eqzero(mixed));
+ assert_se(!eqzero(longer));
+}
+
+static void test_raw_clone(void) {
+ pid_t parent, pid, pid2;
+
+ log_info("/* %s */", __func__);
+
+ parent = getpid();
+ log_info("before clone: getpid()→"PID_FMT, parent);
+ assert_se(raw_getpid() == parent);
+
+ pid = raw_clone(0);
+ assert_se(pid >= 0);
+
+ pid2 = raw_getpid();
+ log_info("raw_clone: "PID_FMT" getpid()→"PID_FMT" raw_getpid()→"PID_FMT,
+ pid, getpid(), pid2);
+ if (pid == 0) {
+ assert_se(pid2 != parent);
+ _exit(EXIT_SUCCESS);
+ } else {
+ int status;
+
+ assert_se(pid2 == parent);
+ waitpid(pid, &status, __WCLONE);
+ assert_se(WIFEXITED(status) && WEXITSTATUS(status) == EXIT_SUCCESS);
+ }
+
+ errno = 0;
+ assert_se(raw_clone(CLONE_FS|CLONE_NEWNS) == -1);
+ assert_se(errno == EINVAL);
+}
+
+static void test_physical_memory(void) {
+ uint64_t p;
+ char buf[FORMAT_BYTES_MAX];
+
+ log_info("/* %s */", __func__);
+
+ p = physical_memory();
+ assert_se(p > 0);
+ assert_se(p < UINT64_MAX);
+ assert_se(p % page_size() == 0);
+
+ log_info("Memory: %s (%" PRIu64 ")", format_bytes(buf, sizeof(buf), p), p);
+}
+
+static void test_physical_memory_scale(void) {
+ uint64_t p;
+
+ log_info("/* %s */", __func__);
+
+ p = physical_memory();
+
+ assert_se(physical_memory_scale(0, 100) == 0);
+ assert_se(physical_memory_scale(100, 100) == p);
+
+ log_info("Memory original: %" PRIu64, physical_memory());
+ log_info("Memory scaled by 50%%: %" PRIu64, physical_memory_scale(50, 100));
+ log_info("Memory divided by 2: %" PRIu64, physical_memory() / 2);
+ log_info("Page size: %zu", page_size());
+
+ /* There might be an uneven number of pages, hence permit these calculations to be half a page off... */
+ assert_se(page_size()/2 + physical_memory_scale(50, 100) - p/2 <= page_size());
+ assert_se(physical_memory_scale(200, 100) == p*2);
+
+ assert_se(physical_memory_scale(0, 1) == 0);
+ assert_se(physical_memory_scale(1, 1) == p);
+ assert_se(physical_memory_scale(2, 1) == p*2);
+
+ assert_se(physical_memory_scale(0, 2) == 0);
+
+ assert_se(page_size()/2 + physical_memory_scale(1, 2) - p/2 <= page_size());
+ assert_se(physical_memory_scale(2, 2) == p);
+ assert_se(physical_memory_scale(4, 2) == p*2);
+
+ assert_se(physical_memory_scale(0, UINT32_MAX) == 0);
+ assert_se(physical_memory_scale(UINT32_MAX, UINT32_MAX) == p);
+
+ /* overflow */
+ assert_se(physical_memory_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX);
+}
+
+static void test_system_tasks_max(void) {
+ uint64_t t;
+
+ log_info("/* %s */", __func__);
+
+ t = system_tasks_max();
+ assert_se(t > 0);
+ assert_se(t < UINT64_MAX);
+
+ log_info("Max tasks: %" PRIu64, t);
+}
+
+static void test_system_tasks_max_scale(void) {
+ uint64_t t;
+
+ log_info("/* %s */", __func__);
+
+ t = system_tasks_max();
+
+ assert_se(system_tasks_max_scale(0, 100) == 0);
+ assert_se(system_tasks_max_scale(100, 100) == t);
+
+ assert_se(system_tasks_max_scale(0, 1) == 0);
+ assert_se(system_tasks_max_scale(1, 1) == t);
+ assert_se(system_tasks_max_scale(2, 1) == 2*t);
+
+ assert_se(system_tasks_max_scale(0, 2) == 0);
+ assert_se(system_tasks_max_scale(1, 2) == t/2);
+ assert_se(system_tasks_max_scale(2, 2) == t);
+ assert_se(system_tasks_max_scale(3, 2) == (3*t)/2);
+ assert_se(system_tasks_max_scale(4, 2) == t*2);
+
+ assert_se(system_tasks_max_scale(0, UINT32_MAX) == 0);
+ assert_se(system_tasks_max_scale((UINT32_MAX-1)/2, UINT32_MAX-1) == t/2);
+ assert_se(system_tasks_max_scale(UINT32_MAX, UINT32_MAX) == t);
+
+ /* overflow */
+
+ assert_se(system_tasks_max_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ test_align_power2();
+ test_max();
+ test_container_of();
+ test_div_round_up();
+ test_u64log2();
+ test_protect_errno();
+ test_unprotect_errno();
+ test_in_set();
+ test_log2i();
+ test_eqzero();
+ test_raw_clone();
+ test_physical_memory();
+ test_physical_memory_scale();
+ test_system_tasks_max();
+ test_system_tasks_max_scale();
+
+ return 0;
+}
diff --git a/src/test/test-verbs.c b/src/test/test-verbs.c
new file mode 100644
index 0000000..efb9664
--- /dev/null
+++ b/src/test/test-verbs.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "macro.h"
+#include "strv.h"
+#include "verbs.h"
+
+static int noop_dispatcher(int argc, char *argv[], void *userdata) {
+ return 0;
+}
+
+#define test_dispatch_one(argv, verbs, expected) \
+ optind = 0; \
+ assert_se(dispatch_verb(strv_length(argv), argv, verbs, NULL) == expected);
+
+static void test_verbs(void) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, noop_dispatcher },
+ { "list-images", VERB_ANY, 1, 0, noop_dispatcher },
+ { "list", VERB_ANY, 2, VERB_DEFAULT, noop_dispatcher },
+ { "status", 2, VERB_ANY, 0, noop_dispatcher },
+ { "show", VERB_ANY, VERB_ANY, 0, noop_dispatcher },
+ { "terminate", 2, VERB_ANY, 0, noop_dispatcher },
+ { "login", 2, 2, 0, noop_dispatcher },
+ { "copy-to", 3, 4, 0, noop_dispatcher },
+ {}
+ };
+
+ /* not found */
+ test_dispatch_one(STRV_MAKE("command-not-found"), verbs, -EINVAL);
+
+ /* found */
+ test_dispatch_one(STRV_MAKE("show"), verbs, 0);
+
+ /* found, too few args */
+ test_dispatch_one(STRV_MAKE("copy-to", "foo"), verbs, -EINVAL);
+
+ /* found, meets min args */
+ test_dispatch_one(STRV_MAKE("status", "foo", "bar"), verbs, 0);
+
+ /* found, too many args */
+ test_dispatch_one(STRV_MAKE("copy-to", "foo", "bar", "baz", "quux", "qaax"), verbs, -EINVAL);
+
+ /* no verb, but a default is set */
+ test_dispatch_one(STRV_MAKE_EMPTY, verbs, 0);
+}
+
+static void test_verbs_no_default(void) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, noop_dispatcher },
+ {},
+ };
+
+ test_dispatch_one(STRV_MAKE(NULL), verbs, -EINVAL);
+}
+
+int main(int argc, char *argv[]) {
+ test_verbs();
+ test_verbs_no_default();
+
+ return 0;
+}
diff --git a/src/test/test-watch-pid.c b/src/test/test-watch-pid.c
new file mode 100644
index 0000000..2c6ca0a
--- /dev/null
+++ b/src/test/test-watch-pid.c
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "log.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "service.h"
+#include "test-helper.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *a, *b, *c, *u;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (getuid() != 0)
+ return log_tests_skipped("not root");
+ r = enter_cgroup_subroot();
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ assert_se(set_unit_path(get_testdata_dir()) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ assert_se(manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m) >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ assert_se(a = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(a, "a.service") >= 0);
+ assert_se(set_isempty(a->pids));
+
+ assert_se(b = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(b, "b.service") >= 0);
+ assert_se(set_isempty(b->pids));
+
+ assert_se(c = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(c, "c.service") >= 0);
+ assert_se(set_isempty(c->pids));
+
+ assert_se(hashmap_isempty(m->watch_pids));
+ assert_se(manager_get_unit_by_pid(m, 4711) == NULL);
+
+ assert_se(unit_watch_pid(a, 4711) >= 0);
+ assert_se(manager_get_unit_by_pid(m, 4711) == a);
+
+ assert_se(unit_watch_pid(a, 4711) >= 0);
+ assert_se(manager_get_unit_by_pid(m, 4711) == a);
+
+ assert_se(unit_watch_pid(b, 4711) >= 0);
+ u = manager_get_unit_by_pid(m, 4711);
+ assert_se(u == a || u == b);
+
+ assert_se(unit_watch_pid(b, 4711) >= 0);
+ u = manager_get_unit_by_pid(m, 4711);
+ assert_se(u == a || u == b);
+
+ assert_se(unit_watch_pid(c, 4711) >= 0);
+ u = manager_get_unit_by_pid(m, 4711);
+ assert_se(u == a || u == b || u == c);
+
+ assert_se(unit_watch_pid(c, 4711) >= 0);
+ u = manager_get_unit_by_pid(m, 4711);
+ assert_se(u == a || u == b || u == c);
+
+ unit_unwatch_pid(b, 4711);
+ u = manager_get_unit_by_pid(m, 4711);
+ assert_se(u == a || u == c);
+
+ unit_unwatch_pid(b, 4711);
+ u = manager_get_unit_by_pid(m, 4711);
+ assert_se(u == a || u == c);
+
+ unit_unwatch_pid(a, 4711);
+ assert_se(manager_get_unit_by_pid(m, 4711) == c);
+
+ unit_unwatch_pid(a, 4711);
+ assert_se(manager_get_unit_by_pid(m, 4711) == c);
+
+ unit_unwatch_pid(c, 4711);
+ assert_se(manager_get_unit_by_pid(m, 4711) == NULL);
+
+ unit_unwatch_pid(c, 4711);
+ assert_se(manager_get_unit_by_pid(m, 4711) == NULL);
+
+ return 0;
+}
diff --git a/src/test/test-watchdog.c b/src/test/test-watchdog.c
new file mode 100644
index 0000000..ab66d5c
--- /dev/null
+++ b/src/test/test-watchdog.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <string.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "tests.h"
+#include "watchdog.h"
+
+int main(int argc, char *argv[]) {
+ usec_t t;
+ unsigned i, count;
+ int r;
+ bool slow;
+
+ test_setup_logging(LOG_DEBUG);
+
+ slow = slow_tests_enabled();
+
+ t = slow ? 10 * USEC_PER_SEC : 1 * USEC_PER_SEC;
+ count = slow ? 5 : 3;
+
+ r = watchdog_set_timeout(&t);
+ if (r < 0)
+ log_warning_errno(r, "Failed to open watchdog: %m");
+ if (r == -EPERM)
+ t = 0;
+
+ for (i = 0; i < count; i++) {
+ log_info("Pinging...");
+ r = watchdog_ping();
+ if (r < 0)
+ log_warning_errno(r, "Failed to ping watchdog: %m");
+
+ usleep(t/2);
+ }
+
+ watchdog_close(true);
+ return 0;
+}
diff --git a/src/test/test-web-util.c b/src/test/test-web-util.c
new file mode 100644
index 0000000..dd1df8c
--- /dev/null
+++ b/src/test/test-web-util.c
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "macro.h"
+#include "web-util.h"
+
+static void test_is_valid_documentation_url(void) {
+ assert_se(documentation_url_is_valid("http://www.freedesktop.org/wiki/Software/systemd"));
+ assert_se(documentation_url_is_valid("https://www.kernel.org/doc/Documentation/binfmt_misc.txt")); /* dead */
+ assert_se(documentation_url_is_valid("https://www.kernel.org/doc/Documentation/admin-guide/binfmt-misc.rst"));
+ assert_se(documentation_url_is_valid("https://www.kernel.org/doc/html/latest/admin-guide/binfmt-misc.html"));
+ assert_se(documentation_url_is_valid("file:/foo/foo"));
+ assert_se(documentation_url_is_valid("man:systemd.special(7)"));
+ assert_se(documentation_url_is_valid("info:bar"));
+
+ assert_se(!documentation_url_is_valid("foo:"));
+ assert_se(!documentation_url_is_valid("info:"));
+ assert_se(!documentation_url_is_valid(""));
+}
+
+int main(int argc, char *argv[]) {
+ test_is_valid_documentation_url();
+
+ return 0;
+}
diff --git a/src/test/test-xattr-util.c b/src/test/test-xattr-util.c
new file mode 100644
index 0000000..3e6df96
--- /dev/null
+++ b/src/test/test-xattr-util.c
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "xattr-util.h"
+
+static void test_fgetxattrat_fake(void) {
+ char t[] = "/var/tmp/xattrtestXXXXXX";
+ _cleanup_close_ int fd = -1;
+ const char *x;
+ char v[3];
+ int r;
+ size_t size;
+
+ assert_se(mkdtemp(t));
+ x = strjoina(t, "/test");
+ assert_se(touch(x) >= 0);
+
+ r = setxattr(x, "user.foo", "bar", 3, 0);
+ if (r < 0 && errno == EOPNOTSUPP) /* no xattrs supported on /var/tmp... */
+ goto cleanup;
+ assert_se(r >= 0);
+
+ fd = open(t, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY);
+ assert_se(fd >= 0);
+
+ assert_se(fgetxattrat_fake(fd, "test", "user.foo", v, 3, 0, &size) >= 0);
+ assert_se(size == 3);
+ assert_se(memcmp(v, "bar", 3) == 0);
+
+ safe_close(fd);
+ fd = open("/", O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY);
+ assert_se(fd >= 0);
+ assert_se(fgetxattrat_fake(fd, "usr", "user.idontexist", v, 3, 0, &size) == -ENODATA);
+
+cleanup:
+ assert_se(unlink(x) >= 0);
+ assert_se(rmdir(t) >= 0);
+}
+
+static void test_getcrtime(void) {
+
+ _cleanup_close_ int fd = -1;
+ char ts[FORMAT_TIMESTAMP_MAX];
+ const char *vt;
+ usec_t usec, k;
+ int r;
+
+ assert_se(tmp_dir(&vt) >= 0);
+
+ fd = open_tmpfile_unlinkable(vt, O_RDWR);
+ assert_se(fd >= 0);
+
+ r = fd_getcrtime(fd, &usec);
+ if (r < 0)
+ log_debug_errno(r, "btime: %m");
+ else
+ log_debug("btime: %s", format_timestamp(ts, sizeof(ts), usec));
+
+ k = now(CLOCK_REALTIME);
+
+ r = fd_setcrtime(fd, 1519126446UL * USEC_PER_SEC);
+ if (!IN_SET(r, -EOPNOTSUPP, -ENOTTY)) {
+ assert_se(fd_getcrtime(fd, &usec) >= 0);
+ assert_se(k < 1519126446UL * USEC_PER_SEC ||
+ usec == 1519126446UL * USEC_PER_SEC);
+ }
+}
+
+int main(void) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_fgetxattrat_fake();
+ test_getcrtime();
+
+ return 0;
+}
diff --git a/src/test/test-xml.c b/src/test/test-xml.c
new file mode 100644
index 0000000..63e7a10
--- /dev/null
+++ b/src/test/test-xml.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdarg.h>
+
+#include "alloc-util.h"
+#include "string-util.h"
+#include "util.h"
+#include "xml.h"
+
+static void test_one(const char *data, ...) {
+ void *state = NULL;
+ va_list ap;
+
+ va_start(ap, data);
+
+ for (;;) {
+ _cleanup_free_ char *name = NULL;
+ int t, tt;
+ const char *nn;
+
+ t = xml_tokenize(&data, &name, &state, NULL);
+ assert_se(t >= 0);
+
+ tt = va_arg(ap, int);
+ assert_se(tt >= 0);
+
+ assert_se(t == tt);
+ if (t == XML_END)
+ break;
+
+ nn = va_arg(ap, const char *);
+ assert_se(streq_ptr(nn, name));
+ }
+
+ va_end(ap);
+}
+
+int main(int argc, char *argv[]) {
+
+ test_one("", XML_END);
+
+ test_one("<foo></foo>",
+ XML_TAG_OPEN, "foo",
+ XML_TAG_CLOSE, "foo",
+ XML_END);
+
+ test_one("<foo waldo=piep meh=\"huhu\"/>",
+ XML_TAG_OPEN, "foo",
+ XML_ATTRIBUTE_NAME, "waldo",
+ XML_ATTRIBUTE_VALUE, "piep",
+ XML_ATTRIBUTE_NAME, "meh",
+ XML_ATTRIBUTE_VALUE, "huhu",
+ XML_TAG_CLOSE_EMPTY, NULL,
+ XML_END);
+
+ test_one("xxxx\n"
+ "<foo><?xml foo?> <!-- zzzz --> </foo>",
+ XML_TEXT, "xxxx\n",
+ XML_TAG_OPEN, "foo",
+ XML_TEXT, " ",
+ XML_TEXT, " ",
+ XML_TAG_CLOSE, "foo",
+ XML_END);
+
+ return 0;
+}
diff --git a/src/time-wait-sync/time-wait-sync.c b/src/time-wait-sync/time-wait-sync.c
new file mode 100644
index 0000000..d02633c
--- /dev/null
+++ b/src/time-wait-sync/time-wait-sync.c
@@ -0,0 +1,252 @@
+/*
+ * systemd service to wait until kernel realtime clock is synchronized
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <sys/timerfd.h>
+#include <sys/timex.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "main-func.h"
+#include "missing.h"
+#include "signal-util.h"
+#include "time-util.h"
+
+typedef struct ClockState {
+ int timerfd_fd; /* non-negative is descriptor from timerfd_create */
+ int adjtime_state; /* return value from last adjtimex(2) call */
+ sd_event_source *timerfd_event_source; /* non-null is the active io event source */
+ int inotify_fd;
+ sd_event_source *inotify_event_source;
+ int run_systemd_wd;
+ int run_systemd_timesync_wd;
+ bool has_watchfile;
+} ClockState;
+
+static void clock_state_release_timerfd(ClockState *sp) {
+ sp->timerfd_event_source = sd_event_source_unref(sp->timerfd_event_source);
+ sp->timerfd_fd = safe_close(sp->timerfd_fd);
+}
+
+static void clock_state_release(ClockState *sp) {
+ clock_state_release_timerfd(sp);
+ sp->inotify_event_source = sd_event_source_unref(sp->inotify_event_source);
+ sp->inotify_fd = safe_close(sp->inotify_fd);
+}
+
+static int clock_state_update(ClockState *sp, sd_event *event);
+
+static int update_notify_run_systemd_timesync(ClockState *sp) {
+ sp->run_systemd_timesync_wd = inotify_add_watch(sp->inotify_fd, "/run/systemd/timesync", IN_CREATE|IN_DELETE_SELF);
+ return sp->run_systemd_timesync_wd;
+}
+
+static int timerfd_handler(sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+ ClockState *sp = userdata;
+
+ return clock_state_update(sp, sd_event_source_get_event(s));
+}
+
+static void process_inotify_event(sd_event *event, ClockState *sp, struct inotify_event *e) {
+ if (e->wd == sp->run_systemd_wd) {
+ /* Only thing we care about is seeing if we can start watching /run/systemd/timesync. */
+ if (sp->run_systemd_timesync_wd < 0)
+ update_notify_run_systemd_timesync(sp);
+ } else if (e->wd == sp->run_systemd_timesync_wd) {
+ if (e->mask & IN_DELETE_SELF) {
+ /* Somebody removed /run/systemd/timesync. */
+ (void) inotify_rm_watch(sp->inotify_fd, sp->run_systemd_timesync_wd);
+ sp->run_systemd_timesync_wd = -1;
+ } else
+ /* Somebody might have created /run/systemd/timesync/synchronized. */
+ clock_state_update(sp, event);
+ }
+}
+
+static int inotify_handler(sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+ sd_event *event = sd_event_source_get_event(s);
+ ClockState *sp = userdata;
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+
+ l = read(fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return log_warning_errno(errno, "Lost access to inotify: %m");
+ }
+ FOREACH_INOTIFY_EVENT(e, buffer, l)
+ process_inotify_event(event, sp, e);
+
+ return 0;
+}
+
+static int clock_state_update(
+ ClockState *sp,
+ sd_event *event) {
+
+ char buf[MAX((size_t)FORMAT_TIMESTAMP_MAX, STRLEN("unrepresentable"))];
+ struct timex tx = {};
+ const char * ts;
+ usec_t t;
+ int r;
+
+ clock_state_release_timerfd(sp);
+
+ /* The kernel supports cancelling timers whenever its realtime clock is "set" (which can happen in a variety of
+ * ways, generally adjustments of at least 500 ms). The way this module works is we set up a timerfd that will
+ * wake when the clock is set, and when that happens we read the clock synchronization state from the return
+ * value of adjtimex(2), which supports the NTP time adjustment protocol.
+ *
+ * The kernel determines whether the clock is synchronized using driver-specific tests, based on time
+ * information passed by an application, generally through adjtimex(2). If the application asserts the clock is
+ * synchronized, but does not also do something that "sets the clock", the timer will not be cancelled and
+ * synchronization will not be detected.
+ *
+ * Similarly, this service will never complete if the application sets the time without also providing
+ * information that adjtimex(2) can use to determine that the clock is synchronized. This generally doesn't
+ * happen, but can if the system has a hardware clock that is accurate enough that the adjustment is too small
+ * to be a "set".
+ *
+ * Both these failure-to-detect situations are covered by having the presence/creation of
+ * /run/systemd/timesync/synchronized, which is considered sufficient to indicate a synchronized clock even if
+ * the kernel has not been updated.
+ *
+ * For timesyncd the initial setting of the time uses settimeofday(2), which sets the clock but does not mark
+ * it synchronized. When an NTP source is selected it sets the clock again with clock_adjtime(2) which marks it
+ * synchronized and also touches /run/systemd/timesync/synchronized which covers the case when the clock wasn't
+ * "set". */
+
+ r = time_change_fd();
+ if (r < 0) {
+ log_error_errno(r, "Failed to create timerfd: %m");
+ goto finish;
+ }
+ sp->timerfd_fd = r;
+
+ r = adjtimex(&tx);
+ if (r < 0) {
+ log_error_errno(errno, "Failed to read adjtimex state: %m");
+ goto finish;
+ }
+ sp->adjtime_state = r;
+
+ if (tx.status & STA_NANO)
+ tx.time.tv_usec /= 1000;
+ t = timeval_load(&tx.time);
+ ts = format_timestamp_us_utc(buf, sizeof(buf), t);
+ if (!ts)
+ strcpy(buf, "unrepresentable");
+ log_info("adjtime state %d status %x time %s", sp->adjtime_state, tx.status, ts);
+
+ sp->has_watchfile = access("/run/systemd/timesync/synchronized", F_OK) >= 0;
+ if (sp->has_watchfile)
+ /* Presence of watch file overrides adjtime_state */
+ r = 0;
+ else if (sp->adjtime_state == TIME_ERROR) {
+ /* Not synchronized. Do a one-shot wait on the descriptor and inform the caller we need to keep
+ * running. */
+ r = sd_event_add_io(event, &sp->timerfd_event_source, sp->timerfd_fd,
+ EPOLLIN, timerfd_handler, sp);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create time change monitor source: %m");
+ goto finish;
+ }
+ r = 1;
+ } else
+ /* Synchronized; we can exit. */
+ r = 0;
+
+ finish:
+ if (r <= 0)
+ (void) sd_event_exit(event, r);
+ return r;
+}
+
+static int run(int argc, char * argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *event;
+ _cleanup_(clock_state_release) ClockState state = {
+ .timerfd_fd = -1,
+ .inotify_fd = -1,
+ .run_systemd_wd = -1,
+ .run_systemd_timesync_wd = -1,
+ };
+ int r;
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ r = sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create sigterm event source: %m");
+
+ r = sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create sigint event source: %m");
+
+ r = sd_event_set_watchdog(event, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create watchdog event source: %m");
+
+ r = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to create inotify descriptor: %m");
+
+ state.inotify_fd = r;
+
+ r = sd_event_add_io(event, &state.inotify_event_source, state.inotify_fd,
+ EPOLLIN, inotify_handler, &state);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create notify event source: %m");
+
+ r = inotify_add_watch(state.inotify_fd, "/run/systemd/", IN_CREATE);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to watch /run/systemd/: %m");
+
+ state.run_systemd_wd = r;
+
+ (void) update_notify_run_systemd_timesync(&state);
+
+ r = clock_state_update(&state, event);
+ if (r > 0) {
+ r = sd_event_loop(event);
+ if (r < 0)
+ log_error_errno(r, "Failed in event loop: %m");
+ }
+
+ if (state.has_watchfile)
+ log_debug("Exit enabled by: /run/systemd/timesync/synchonized");
+
+ if (state.adjtime_state == TIME_ERROR)
+ log_info("Exit without adjtimex synchronized.");
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/timedate/meson.build b/src/timedate/meson.build
new file mode 100644
index 0000000..46c7360
--- /dev/null
+++ b/src/timedate/meson.build
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+if conf.get('ENABLE_TIMEDATED') == 1
+ install_data('org.freedesktop.timedate1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.timedate1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.timedate1.policy',
+ install_dir : polkitpolicydir)
+endif
diff --git a/src/timedate/org.freedesktop.timedate1.conf b/src/timedate/org.freedesktop.timedate1.conf
new file mode 100644
index 0000000..53f6c84
--- /dev/null
+++ b/src/timedate/org.freedesktop.timedate1.conf
@@ -0,0 +1,29 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.timedate1"/>
+ <allow send_destination="org.freedesktop.timedate1"/>
+ <allow receive_sender="org.freedesktop.timedate1"/>
+ </policy>
+
+ <policy context="default">
+ <allow send_destination="org.freedesktop.timedate1"/>
+ <allow receive_sender="org.freedesktop.timedate1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/timedate/org.freedesktop.timedate1.policy b/src/timedate/org.freedesktop.timedate1.policy
new file mode 100644
index 0000000..b6303ba
--- /dev/null
+++ b/src/timedate/org.freedesktop.timedate1.policy
@@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1+
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.timedate1.set-time">
+ <description gettext-domain="systemd">Set system time</description>
+ <message gettext-domain="systemd">Authentication is required to set the system time.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.timedate1.set-timezone org.freedesktop.timedate1.set-ntp</annotate>
+ </action>
+
+ <action id="org.freedesktop.timedate1.set-timezone">
+ <description gettext-domain="systemd">Set system timezone</description>
+ <message gettext-domain="systemd">Authentication is required to set the system timezone.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.timedate1.set-local-rtc">
+ <description gettext-domain="systemd">Set RTC to local timezone or UTC</description>
+ <message gettext-domain="systemd">Authentication is required to control whether the RTC stores the local or UTC time.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.timedate1.set-ntp">
+ <description gettext-domain="systemd">Turn network time synchronization on or off</description>
+ <message gettext-domain="systemd">Authentication is required to control whether network time synchronization shall be enabled.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/timedate/org.freedesktop.timedate1.service b/src/timedate/org.freedesktop.timedate1.service
new file mode 100644
index 0000000..d5f3a6e
--- /dev/null
+++ b/src/timedate/org.freedesktop.timedate1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.timedate1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.timedate1.service
diff --git a/src/timedate/timedatectl.c b/src/timedate/timedatectl.c
new file mode 100644
index 0000000..1e7b262
--- /dev/null
+++ b/src/timedate/timedatectl.c
@@ -0,0 +1,883 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <locale.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "bus-error.h"
+#include "bus-util.h"
+#include "in-addr-util.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "spawn-polkit-agent.h"
+#include "sparse-endian.h"
+#include "string-table.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+#include "verbs.h"
+
+static PagerFlags arg_pager_flags = 0;
+static bool arg_ask_password = true;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static char *arg_host = NULL;
+static bool arg_adjust_system_clock = false;
+static bool arg_monitor = false;
+static char **arg_property = NULL;
+static bool arg_value = false;
+static bool arg_all = false;
+
+typedef struct StatusInfo {
+ usec_t time;
+ const char *timezone;
+
+ usec_t rtc_time;
+ bool rtc_local;
+
+ bool ntp_capable;
+ bool ntp_active;
+ bool ntp_synced;
+} StatusInfo;
+
+static void print_status_info(const StatusInfo *i) {
+ const char *old_tz = NULL, *tz;
+ bool have_time = false;
+ char a[LINE_MAX];
+ struct tm tm;
+ time_t sec;
+ size_t n;
+ int r;
+
+ assert(i);
+
+ /* Save the old $TZ */
+ tz = getenv("TZ");
+ if (tz)
+ old_tz = strdupa(tz);
+
+ /* Set the new $TZ */
+ if (setenv("TZ", isempty(i->timezone) ? "UTC" : i->timezone, true) < 0)
+ log_warning_errno(errno, "Failed to set TZ environment variable, ignoring: %m");
+ else
+ tzset();
+
+ if (i->time != 0) {
+ sec = (time_t) (i->time / USEC_PER_SEC);
+ have_time = true;
+ } else if (IN_SET(arg_transport, BUS_TRANSPORT_LOCAL, BUS_TRANSPORT_MACHINE)) {
+ sec = time(NULL);
+ have_time = true;
+ } else
+ log_warning("Could not get time from timedated and not operating locally, ignoring.");
+
+ if (have_time) {
+ n = strftime(a, sizeof a, "%a %Y-%m-%d %H:%M:%S %Z", localtime_r(&sec, &tm));
+ printf(" Local time: %s\n", n > 0 ? a : "n/a");
+
+ n = strftime(a, sizeof a, "%a %Y-%m-%d %H:%M:%S UTC", gmtime_r(&sec, &tm));
+ printf(" Universal time: %s\n", n > 0 ? a : "n/a");
+ } else {
+ printf(" Local time: %s\n", "n/a");
+ printf(" Universal time: %s\n", "n/a");
+ }
+
+ if (i->rtc_time > 0) {
+ time_t rtc_sec;
+
+ rtc_sec = (time_t) (i->rtc_time / USEC_PER_SEC);
+ n = strftime(a, sizeof a, "%a %Y-%m-%d %H:%M:%S", gmtime_r(&rtc_sec, &tm));
+ printf(" RTC time: %s\n", n > 0 ? a : "n/a");
+ } else
+ printf(" RTC time: %s\n", "n/a");
+
+ if (have_time)
+ n = strftime(a, sizeof a, "%Z, %z", localtime_r(&sec, &tm));
+
+ /* Restore the $TZ */
+ if (old_tz)
+ r = setenv("TZ", old_tz, true);
+ else
+ r = unsetenv("TZ");
+ if (r < 0)
+ log_warning_errno(errno, "Failed to set TZ environment variable, ignoring: %m");
+ else
+ tzset();
+
+ printf(" Time zone: %s (%s)\n"
+ "System clock synchronized: %s\n"
+ " NTP service: %s\n"
+ " RTC in local TZ: %s\n",
+ strna(i->timezone), have_time && n > 0 ? a : "n/a",
+ yes_no(i->ntp_synced),
+ i->ntp_capable ? (i->ntp_active ? "active" : "inactive") : "n/a",
+ yes_no(i->rtc_local));
+
+ if (i->rtc_local)
+ printf("\n%s"
+ "Warning: The system is configured to read the RTC time in the local time zone.\n"
+ " This mode cannot be fully supported. It will create various problems\n"
+ " with time zone changes and daylight saving time adjustments. The RTC\n"
+ " time is never updated, it relies on external facilities to maintain it.\n"
+ " If at all possible, use RTC in UTC by calling\n"
+ " 'timedatectl set-local-rtc 0'.%s\n", ansi_highlight(), ansi_normal());
+}
+
+static int show_status(int argc, char **argv, void *userdata) {
+ StatusInfo info = {};
+ static const struct bus_properties_map map[] = {
+ { "Timezone", "s", NULL, offsetof(StatusInfo, timezone) },
+ { "LocalRTC", "b", NULL, offsetof(StatusInfo, rtc_local) },
+ { "NTP", "b", NULL, offsetof(StatusInfo, ntp_active) },
+ { "CanNTP", "b", NULL, offsetof(StatusInfo, ntp_capable) },
+ { "NTPSynchronized", "b", NULL, offsetof(StatusInfo, ntp_synced) },
+ { "TimeUSec", "t", NULL, offsetof(StatusInfo, time) },
+ { "RTCTimeUSec", "t", NULL, offsetof(StatusInfo, rtc_time) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.timedate1",
+ "/org/freedesktop/timedate1",
+ map,
+ BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ &m,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query server: %s", bus_error_message(&error, r));
+
+ print_status_info(&info);
+
+ return r;
+}
+
+static int show_properties(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ r = bus_print_all_properties(bus,
+ "org.freedesktop.timedate1",
+ "/org/freedesktop/timedate1",
+ NULL,
+ arg_property,
+ arg_value,
+ arg_all,
+ NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+static int set_time(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool relative = false, interactive = arg_ask_password;
+ sd_bus *bus = userdata;
+ usec_t t;
+ int r;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = parse_timestamp(argv[1], &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse time specification '%s': %m", argv[1]);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.timedate1",
+ "/org/freedesktop/timedate1",
+ "org.freedesktop.timedate1",
+ "SetTime",
+ &error,
+ NULL,
+ "xbb", (int64_t) t, relative, interactive);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set time: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int set_timezone(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.timedate1",
+ "/org/freedesktop/timedate1",
+ "org.freedesktop.timedate1",
+ "SetTimezone",
+ &error,
+ NULL,
+ "sb", argv[1], arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set time zone: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int set_local_rtc(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r, b;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ b = parse_boolean(argv[1]);
+ if (b < 0)
+ return log_error_errno(b, "Failed to parse local RTC setting '%s': %m", argv[1]);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.timedate1",
+ "/org/freedesktop/timedate1",
+ "org.freedesktop.timedate1",
+ "SetLocalRTC",
+ &error,
+ NULL,
+ "bbb", b, arg_adjust_system_clock, arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set local RTC: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int set_ntp(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int b, r;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ b = parse_boolean(argv[1]);
+ if (b < 0)
+ return log_error_errno(b, "Failed to parse NTP setting '%s': %m", argv[1]);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.timedate1",
+ "/org/freedesktop/timedate1",
+ "org.freedesktop.timedate1",
+ "SetNTP",
+ &error,
+ NULL,
+ "bb", b, arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set ntp: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int list_timezones(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+ char** zones;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.timedate1",
+ "/org/freedesktop/timedate1",
+ "org.freedesktop.timedate1",
+ "ListTimezones",
+ &error,
+ &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request list of time zones: %s",
+ bus_error_message(&error, r));
+
+ r = sd_bus_message_read_strv(reply, &zones);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ (void) pager_open(arg_pager_flags);
+ strv_print(zones);
+
+ return 0;
+}
+
+typedef struct NTPStatusInfo {
+ const char *server_name;
+ char *server_address;
+ usec_t poll_interval, poll_max, poll_min;
+ usec_t root_distance_max;
+
+ uint32_t leap, version, mode, stratum;
+ int32_t precision;
+ usec_t root_delay, root_dispersion;
+ union {
+ char str[5];
+ uint32_t val;
+ } reference;
+ usec_t origin, recv, trans, dest;
+
+ bool spike;
+ uint64_t packet_count;
+ usec_t jitter;
+
+ int64_t freq;
+} NTPStatusInfo;
+
+static void ntp_status_info_clear(NTPStatusInfo *p) {
+ p->server_address = mfree(p->server_address);
+}
+
+static const char * const ntp_leap_table[4] = {
+ [0] = "normal",
+ [1] = "last minute of the day has 61 seconds",
+ [2] = "last minute of the day has 59 seconds",
+ [3] = "not synchronized",
+};
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wtype-limits"
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(ntp_leap, uint32_t);
+#pragma GCC diagnostic pop
+
+static void print_ntp_status_info(NTPStatusInfo *i) {
+ char ts[FORMAT_TIMESPAN_MAX], tmin[FORMAT_TIMESPAN_MAX], tmax[FORMAT_TIMESPAN_MAX];
+ usec_t delay, t14, t23, offset, root_distance;
+ bool offset_sign;
+
+ assert(i);
+
+ /*
+ * "Timestamp Name ID When Generated
+ * ------------------------------------------------------------
+ * Originate Timestamp T1 time request sent by client
+ * Receive Timestamp T2 time request received by server
+ * Transmit Timestamp T3 time reply sent by server
+ * Destination Timestamp T4 time reply received by client
+ *
+ * The round-trip delay, d, and system clock offset, t, are defined as:
+ * d = (T4 - T1) - (T3 - T2) t = ((T2 - T1) + (T3 - T4)) / 2"
+ */
+
+ printf(" Server: %s (%s)\n",
+ i->server_address, i->server_name);
+ printf("Poll interval: %s (min: %s; max %s)\n",
+ format_timespan(ts, sizeof(ts), i->poll_interval, 0),
+ format_timespan(tmin, sizeof(tmin), i->poll_min, 0),
+ format_timespan(tmax, sizeof(tmax), i->poll_max, 0));
+
+ if (i->packet_count == 0) {
+ printf(" Packet count: 0\n");
+ return;
+ }
+
+ if (i->dest < i->origin || i->trans < i->recv || i->dest - i->origin < i->trans - i->recv) {
+ log_error("Invalid NTP response");
+ return;
+ }
+
+ delay = (i->dest - i->origin) - (i->trans - i->recv);
+
+ t14 = i->origin + i->dest;
+ t23 = i->recv + i->trans;
+ offset_sign = t14 < t23;
+ offset = (offset_sign ? t23 - t14 : t14 - t23) / 2;
+
+ root_distance = i->root_delay / 2 + i->root_dispersion;
+
+ printf(" Leap: %s\n"
+ " Version: %" PRIu32 "\n"
+ " Stratum: %" PRIu32 "\n",
+ ntp_leap_to_string(i->leap),
+ i->version,
+ i->stratum);
+ if (i->stratum <= 1)
+ printf(" Reference: %s\n", i->reference.str);
+ else
+ printf(" Reference: %" PRIX32 "\n", be32toh(i->reference.val));
+ printf(" Precision: %s (%" PRIi32 ")\n",
+ format_timespan(ts, sizeof(ts), DIV_ROUND_UP((nsec_t) (exp2(i->precision) * NSEC_PER_SEC), NSEC_PER_USEC), 0),
+ i->precision);
+ printf("Root distance: %s (max: %s)\n",
+ format_timespan(ts, sizeof(ts), root_distance, 0),
+ format_timespan(tmax, sizeof(tmax), i->root_distance_max, 0));
+ printf(" Offset: %s%s\n",
+ offset_sign ? "+" : "-",
+ format_timespan(ts, sizeof(ts), offset, 0));
+ printf(" Delay: %s\n",
+ format_timespan(ts, sizeof(ts), delay, 0));
+ printf(" Jitter: %s\n",
+ format_timespan(ts, sizeof(ts), i->jitter, 0));
+ printf(" Packet count: %" PRIu64 "\n", i->packet_count);
+
+ if (!i->spike)
+ printf(" Frequency: %+.3fppm\n",
+ (double) i->freq / 0x10000);
+}
+
+static int map_server_address(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ char **p = (char **) userdata;
+ const void *d;
+ int family, r;
+ size_t sz;
+
+ assert(p);
+
+ r = sd_bus_message_enter_container(m, 'r', "iay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "i", &family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(m, 'y', &d, &sz);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ if (sz == 0 && family == AF_UNSPEC) {
+ *p = mfree(*p);
+ return 0;
+ }
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown address family %i", family);
+
+ if (sz != FAMILY_ADDRESS_SIZE(family))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid address size");
+
+ r = in_addr_to_string(family, d, p);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int map_ntp_message(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ NTPStatusInfo *p = userdata;
+ const void *d;
+ size_t sz;
+ int32_t b;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_enter_container(m, 'r', "uuuuittayttttbtt");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "uuuuitt",
+ &p->leap, &p->version, &p->mode, &p->stratum, &p->precision,
+ &p->root_delay, &p->root_dispersion);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(m, 'y', &d, &sz);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "ttttbtt",
+ &p->origin, &p->recv, &p->trans, &p->dest,
+ &b, &p->packet_count, &p->jitter);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ if (sz != 4)
+ return -EINVAL;
+
+ memcpy(p->reference.str, d, sz);
+
+ p->spike = b;
+
+ return 0;
+}
+
+static int show_timesync_status_once(sd_bus *bus) {
+ static const struct bus_properties_map map_timesync[] = {
+ { "ServerName", "s", NULL, offsetof(NTPStatusInfo, server_name) },
+ { "ServerAddress", "(iay)", map_server_address, offsetof(NTPStatusInfo, server_address) },
+ { "PollIntervalUSec", "t", NULL, offsetof(NTPStatusInfo, poll_interval) },
+ { "PollIntervalMinUSec", "t", NULL, offsetof(NTPStatusInfo, poll_min) },
+ { "PollIntervalMaxUSec", "t", NULL, offsetof(NTPStatusInfo, poll_max) },
+ { "RootDistanceMaxUSec", "t", NULL, offsetof(NTPStatusInfo, root_distance_max) },
+ { "NTPMessage", "(uuuuittayttttbtt)", map_ntp_message, 0 },
+ { "Frequency", "x", NULL, offsetof(NTPStatusInfo, freq) },
+ {}
+ };
+ _cleanup_(ntp_status_info_clear) NTPStatusInfo info = {};
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.timesync1",
+ "/org/freedesktop/timesync1",
+ map_timesync,
+ BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ &m,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query server: %s", bus_error_message(&error, r));
+
+ if (arg_monitor && !terminal_is_dumb())
+ fputs(ANSI_HOME_CLEAR, stdout);
+
+ print_ntp_status_info(&info);
+
+ return 0;
+}
+
+static int on_properties_changed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ const char *name;
+ int r;
+
+ assert(m);
+
+ r = sd_bus_message_read(m, "s", &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read interface name: %m");
+
+ if (!streq_ptr(name, "org.freedesktop.timesync1.Manager"))
+ return 0;
+
+ return show_timesync_status_once(sd_bus_message_get_bus(m));
+}
+
+static int show_timesync_status(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ r = show_timesync_status_once(bus);
+ if (r < 0)
+ return r;
+
+ if (!arg_monitor)
+ return 0;
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get event loop: %m");
+
+ r = sd_bus_match_signal(bus,
+ NULL,
+ "org.freedesktop.timesync1",
+ "/org/freedesktop/timesync1",
+ "org.freedesktop.DBus.Properties",
+ "PropertiesChanged",
+ on_properties_changed, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for PropertiesChanged signal: %m");
+
+ r = sd_bus_attach_event(bus, event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return 0;
+}
+
+static int print_timesync_property(const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all) {
+ char type;
+ const char *contents;
+ int r;
+
+ assert(name);
+ assert(m);
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRUCT:
+ if (streq(name, "NTPMessage")) {
+ _cleanup_(ntp_status_info_clear) NTPStatusInfo i = {};
+ char ts[FORMAT_TIMESPAN_MAX], stamp[FORMAT_TIMESTAMP_MAX];
+
+ r = map_ntp_message(NULL, NULL, m, NULL, &i);
+ if (r < 0)
+ return r;
+
+ if (i.packet_count == 0)
+ return 1;
+
+ if (!value) {
+ fputs(name, stdout);
+ fputc('=', stdout);
+ }
+
+ printf("{ Leap=%u, Version=%u, Mode=%u, Stratum=%u, Precision=%i,",
+ i.leap, i.version, i.mode, i.stratum, i.precision);
+ printf(" RootDelay=%s,",
+ format_timespan(ts, sizeof(ts), i.root_delay, 0));
+ printf(" RootDispersion=%s,",
+ format_timespan(ts, sizeof(ts), i.root_dispersion, 0));
+
+ if (i.stratum == 1)
+ printf(" Reference=%s,", i.reference.str);
+ else
+ printf(" Reference=%" PRIX32 ",", be32toh(i.reference.val));
+
+ printf(" OriginateTimestamp=%s,",
+ format_timestamp(stamp, sizeof(stamp), i.origin));
+ printf(" ReceiveTimestamp=%s,",
+ format_timestamp(stamp, sizeof(stamp), i.recv));
+ printf(" TransmitTimestamp=%s,",
+ format_timestamp(stamp, sizeof(stamp), i.trans));
+ printf(" DestinationTimestamp=%s,",
+ format_timestamp(stamp, sizeof(stamp), i.dest));
+ printf(" Ignored=%s PacketCount=%" PRIu64 ",",
+ yes_no(i.spike), i.packet_count);
+ printf(" Jitter=%s }\n",
+ format_timespan(ts, sizeof(ts), i.jitter, 0));
+
+ return 1;
+
+ } else if (streq(name, "ServerAddress")) {
+ _cleanup_free_ char *str = NULL;
+
+ r = map_server_address(NULL, NULL, m, NULL, &str);
+ if (r < 0)
+ return r;
+
+ if (arg_all || !isempty(str))
+ bus_print_property_value(name, expected_value, value, "%s", str);
+
+ return 1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int show_timesync(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ r = bus_print_all_properties(bus,
+ "org.freedesktop.timesync1",
+ "/org/freedesktop/timesync1",
+ print_timesync_property,
+ arg_property,
+ arg_value,
+ arg_all,
+ NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("timedatectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "Query or change system time and date settings.\n\n"
+ " -h --help Show this help message\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-ask-password Do not prompt for password\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --adjust-system-clock Adjust system clock when changing local RTC mode\n"
+ " --monitor Monitor status of systemd-timesyncd\n"
+ " -p --property=NAME Show only properties by this name\n"
+ " -a --all Show all properties, including empty ones\n"
+ " --value When showing properties, only print the value\n"
+ "\n"
+ "Commands:\n"
+ " status Show current time settings\n"
+ " show Show properties of systemd-timedated\n"
+ " set-time TIME Set system time\n"
+ " set-timezone ZONE Set system time zone\n"
+ " list-timezones Show known time zones\n"
+ " set-local-rtc BOOL Control whether RTC is in local time\n"
+ " set-ntp BOOL Enable or disable network time synchronization\n"
+ "\n"
+ "systemd-timesyncd Commands:\n"
+ " timesync-status Show status of systemd-timesyncd\n"
+ " show-timesync Show properties of systemd-timesyncd\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int verb_help(int argc, char **argv, void *userdata) {
+ return help();
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_ADJUST_SYSTEM_CLOCK,
+ ARG_NO_ASK_PASSWORD,
+ ARG_MONITOR,
+ ARG_VALUE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "adjust-system-clock", no_argument, NULL, ARG_ADJUST_SYSTEM_CLOCK },
+ { "monitor", no_argument, NULL, ARG_MONITOR },
+ { "property", required_argument, NULL, 'p' },
+ { "all", no_argument, NULL, 'a' },
+ { "value", no_argument, NULL, ARG_VALUE },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hH:M:p:a", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case ARG_ADJUST_SYSTEM_CLOCK:
+ arg_adjust_system_clock = true;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_MONITOR:
+ arg_monitor = true;
+ break;
+
+ case 'p': {
+ r = strv_extend(&arg_property, optarg);
+ if (r < 0)
+ return log_oom();
+
+ /* If the user asked for a particular
+ * property, show it to him, even if it is
+ * empty. */
+ arg_all = true;
+ break;
+ }
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case ARG_VALUE:
+ arg_value = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int timedatectl_main(sd_bus *bus, int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "status", VERB_ANY, 1, VERB_DEFAULT, show_status },
+ { "show", VERB_ANY, 1, 0, show_properties },
+ { "set-time", 2, 2, 0, set_time },
+ { "set-timezone", 2, 2, 0, set_timezone },
+ { "list-timezones", VERB_ANY, 1, 0, list_timezones },
+ { "set-local-rtc", 2, 2, 0, set_local_rtc },
+ { "set-ntp", 2, 2, 0, set_ntp },
+ { "timesync-status", VERB_ANY, 1, 0, show_timesync_status },
+ { "show-timesync", VERB_ANY, 1, 0, show_timesync },
+ { "help", VERB_ANY, VERB_ANY, 0, verb_help }, /* Not documented, but supported since it is created. */
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, bus);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, &bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ return timedatectl_main(bus, argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/timedate/timedated.c b/src/timedate/timedated.c
new file mode 100644
index 0000000..eeb17b6
--- /dev/null
+++ b/src/timedate/timedated.c
@@ -0,0 +1,1048 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "clock-util.h"
+#include "def.h"
+#include "fileio-label.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "list.h"
+#include "main-func.h"
+#include "missing_capability.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-def.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "util.h"
+
+#define NULL_ADJTIME_UTC "0.0 0 0\n0\nUTC\n"
+#define NULL_ADJTIME_LOCAL "0.0 0 0\n0\nLOCAL\n"
+
+typedef struct UnitStatusInfo {
+ char *name;
+ char *load_state;
+ char *unit_file_state;
+ char *active_state;
+ char *path;
+
+ LIST_FIELDS(struct UnitStatusInfo, units);
+} UnitStatusInfo;
+
+typedef struct Context {
+ char *zone;
+ bool local_rtc;
+ Hashmap *polkit_registry;
+ sd_bus_message *cache;
+
+ sd_bus_slot *slot_job_removed;
+
+ LIST_HEAD(UnitStatusInfo, units);
+} Context;
+
+static void unit_status_info_clear(UnitStatusInfo *p) {
+ assert(p);
+
+ p->load_state = mfree(p->load_state);
+ p->unit_file_state = mfree(p->unit_file_state);
+ p->active_state = mfree(p->active_state);
+}
+
+static void unit_status_info_free(UnitStatusInfo *p) {
+ assert(p);
+
+ unit_status_info_clear(p);
+ free(p->name);
+ free(p->path);
+ free(p);
+}
+
+static void context_clear(Context *c) {
+ UnitStatusInfo *p;
+
+ assert(c);
+
+ free(c->zone);
+ bus_verify_polkit_async_registry_free(c->polkit_registry);
+ sd_bus_message_unref(c->cache);
+
+ sd_bus_slot_unref(c->slot_job_removed);
+
+ while ((p = c->units)) {
+ LIST_REMOVE(units, c->units, p);
+ unit_status_info_free(p);
+ }
+}
+
+static int context_add_ntp_service(Context *c, const char *s) {
+ UnitStatusInfo *u;
+
+ if (!unit_name_is_valid(s, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ /* Do not add this if it is already listed */
+ LIST_FOREACH(units, u, c->units)
+ if (streq(u->name, s))
+ return 0;
+
+ u = new0(UnitStatusInfo, 1);
+ if (!u)
+ return -ENOMEM;
+
+ u->name = strdup(s);
+ if (!u->name) {
+ free(u);
+ return -ENOMEM;
+ }
+
+ LIST_APPEND(units, c->units, u);
+
+ return 0;
+}
+
+static int context_parse_ntp_services(Context *c) {
+ const char *env, *p;
+ int r;
+
+ assert(c);
+
+ env = getenv("SYSTEMD_TIMEDATED_NTP_SERVICES");
+ if (!env) {
+ r = context_add_ntp_service(c, "systemd-timesyncd.service");
+ if (r < 0)
+ log_warning_errno(r, "Failed to add NTP service \"systemd-timesyncd.service\", ignoring: %m");
+
+ return 0;
+ }
+
+ for (p = env;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, ":", 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_error("Invalid syntax, ignoring: %s", env);
+ break;
+ }
+
+ r = context_add_ntp_service(c, word);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add NTP service \"%s\", ignoring: %m", word);
+ }
+
+ return 0;
+}
+
+static int context_ntp_service_is_active(Context *c) {
+ UnitStatusInfo *info;
+ int count = 0;
+
+ assert(c);
+
+ /* Call context_update_ntp_status() to update UnitStatusInfo before calling this. */
+
+ LIST_FOREACH(units, info, c->units)
+ count += !STRPTR_IN_SET(info->active_state, "inactive", "failed");
+
+ return count;
+}
+
+static int context_ntp_service_is_enabled(Context *c) {
+ UnitStatusInfo *info;
+ int count = 0;
+
+ assert(c);
+
+ /* Call context_update_ntp_status() to update UnitStatusInfo before calling this. */
+
+ LIST_FOREACH(units, info, c->units)
+ count += !STRPTR_IN_SET(info->unit_file_state, "masked", "masked-runtime", "disabled", "bad");
+
+ return count;
+}
+
+static int context_ntp_service_exists(Context *c) {
+ UnitStatusInfo *info;
+ int count = 0;
+
+ assert(c);
+
+ /* Call context_update_ntp_status() to update UnitStatusInfo before calling this. */
+
+ LIST_FOREACH(units, info, c->units)
+ count += streq_ptr(info->load_state, "loaded");
+
+ return count;
+}
+
+static int context_read_data(Context *c) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(c);
+
+ r = get_timezone(&t);
+ if (r == -EINVAL)
+ log_warning_errno(r, "/etc/localtime should be a symbolic link to a time zone data file in /usr/share/zoneinfo/.");
+ else if (r < 0)
+ log_warning_errno(r, "Failed to get target of /etc/localtime: %m");
+
+ free_and_replace(c->zone, t);
+
+ c->local_rtc = clock_is_localtime(NULL) > 0;
+
+ return 0;
+}
+
+static int context_write_data_timezone(Context *c) {
+ _cleanup_free_ char *p = NULL;
+ int r = 0;
+
+ assert(c);
+
+ if (isempty(c->zone)) {
+ if (unlink("/etc/localtime") < 0 && errno != ENOENT)
+ r = -errno;
+
+ return r;
+ }
+
+ p = strappend("../usr/share/zoneinfo/", c->zone);
+ if (!p)
+ return log_oom();
+
+ r = symlink_atomic(p, "/etc/localtime");
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int context_write_data_local_rtc(Context *c) {
+ int r;
+ _cleanup_free_ char *s = NULL, *w = NULL;
+
+ assert(c);
+
+ r = read_full_file("/etc/adjtime", &s, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return r;
+
+ if (!c->local_rtc)
+ return 0;
+
+ w = strdup(NULL_ADJTIME_LOCAL);
+ if (!w)
+ return -ENOMEM;
+ } else {
+ char *p;
+ const char *e = "\n"; /* default if there is less than 3 lines */
+ const char *prepend = "";
+ size_t a, b;
+
+ p = strchrnul(s, '\n');
+ if (*p == '\0')
+ /* only one line, no \n terminator */
+ prepend = "\n0\n";
+ else if (p[1] == '\0') {
+ /* only one line, with \n terminator */
+ ++p;
+ prepend = "0\n";
+ } else {
+ p = strchr(p+1, '\n');
+ if (!p) {
+ /* only two lines, no \n terminator */
+ prepend = "\n";
+ p = s + strlen(s);
+ } else {
+ char *end;
+ /* third line might have a \n terminator or not */
+ p++;
+ end = strchr(p, '\n');
+ /* if we actually have a fourth line, use that as suffix "e", otherwise the default \n */
+ if (end)
+ e = end;
+ }
+ }
+
+ a = p - s;
+ b = strlen(e);
+
+ w = new(char, a + (c->local_rtc ? 5 : 3) + strlen(prepend) + b + 1);
+ if (!w)
+ return -ENOMEM;
+
+ *(char*) mempcpy(stpcpy(stpcpy(mempcpy(w, s, a), prepend), c->local_rtc ? "LOCAL" : "UTC"), e, b) = 0;
+
+ if (streq(w, NULL_ADJTIME_UTC)) {
+ if (unlink("/etc/adjtime") < 0)
+ if (errno != ENOENT)
+ return -errno;
+
+ return 0;
+ }
+ }
+
+ mac_selinux_init();
+ return write_string_file_atomic_label("/etc/adjtime", w);
+}
+
+static int context_update_ntp_status(Context *c, sd_bus *bus, sd_bus_message *m) {
+ static const struct bus_properties_map map[] = {
+ { "LoadState", "s", NULL, offsetof(UnitStatusInfo, load_state) },
+ { "ActiveState", "s", NULL, offsetof(UnitStatusInfo, active_state) },
+ { "UnitFileState", "s", NULL, offsetof(UnitStatusInfo, unit_file_state) },
+ {}
+ };
+ UnitStatusInfo *u;
+ int r;
+
+ assert(c);
+ assert(bus);
+
+ /* Suppress calling context_update_ntp_status() multiple times within single DBus transaction. */
+ if (m) {
+ if (m == c->cache)
+ return 0;
+
+ sd_bus_message_unref(c->cache);
+ c->cache = sd_bus_message_ref(m);
+ }
+
+ LIST_FOREACH(units, u, c->units) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+
+ unit_status_info_clear(u);
+
+ path = unit_dbus_path_from_name(u->name);
+ if (!path)
+ return -ENOMEM;
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get properties: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int match_job_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ Context *c = userdata;
+ UnitStatusInfo *u;
+ const char *path;
+ unsigned n = 0;
+ int r;
+
+ assert(c);
+ assert(m);
+
+ r = sd_bus_message_read(m, "uoss", NULL, &path, NULL, NULL);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ LIST_FOREACH(units, u, c->units)
+ if (streq_ptr(path, u->path))
+ u->path = mfree(u->path);
+ else
+ n += !!u->path;
+
+ if (n == 0) {
+ (void) sd_bus_emit_properties_changed(sd_bus_message_get_bus(m), "/org/freedesktop/timedate1", "org.freedesktop.timedate1", "NTP", NULL);
+
+ c->slot_job_removed = sd_bus_slot_unref(c->slot_job_removed);
+ }
+
+ return 0;
+}
+
+static int unit_start_or_stop(UnitStatusInfo *u, sd_bus *bus, sd_bus_error *error, bool start) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *path;
+ int r;
+
+ assert(u);
+ assert(bus);
+ assert(error);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ start ? "StartUnit" : "StopUnit",
+ error,
+ &reply,
+ "ss",
+ u->name,
+ "replace");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = free_and_strdup(&u->path, path);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int unit_enable_or_disable(UnitStatusInfo *u, sd_bus *bus, sd_bus_error *error, bool enable) {
+ int r;
+
+ assert(u);
+ assert(bus);
+ assert(error);
+
+ /* Call context_update_ntp_status() to update UnitStatusInfo before calling this. */
+
+ if (streq(u->unit_file_state, "enabled") == enable)
+ return 0;
+
+ if (enable)
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "EnableUnitFiles",
+ error,
+ NULL,
+ "asbb", 1,
+ u->name,
+ false, true);
+ else
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "DisableUnitFiles",
+ error,
+ NULL,
+ "asb", 1,
+ u->name,
+ false);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Reload",
+ error,
+ NULL,
+ NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_time, "t", now(CLOCK_REALTIME));
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_ntp_sync, "b", ntp_synced());
+
+static int property_get_rtc_time(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ struct tm tm;
+ usec_t t;
+ int r;
+
+ zero(tm);
+ r = clock_get_hwclock(&tm);
+ if (r == -EBUSY) {
+ log_warning("/dev/rtc is busy. Is somebody keeping it open continuously? That's not a good idea... Returning a bogus RTC timestamp.");
+ t = 0;
+ } else if (r == -ENOENT) {
+ log_debug("/dev/rtc not found.");
+ t = 0; /* no RTC found */
+ } else if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read RTC: %m");
+ else
+ t = (usec_t) timegm(&tm) * USEC_PER_SEC;
+
+ return sd_bus_message_append(reply, "t", t);
+}
+
+static int property_get_can_ntp(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ int r;
+
+ assert(c);
+ assert(bus);
+ assert(property);
+ assert(reply);
+ assert(error);
+
+ if (c->slot_job_removed)
+ /* When the previous request is not finished, then assume NTP is enabled. */
+ return sd_bus_message_append(reply, "b", true);
+
+ r = context_update_ntp_status(c, bus, reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append(reply, "b", context_ntp_service_exists(c) > 0);
+}
+
+static int property_get_ntp(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ int r;
+
+ assert(c);
+ assert(bus);
+ assert(property);
+ assert(reply);
+ assert(error);
+
+ if (c->slot_job_removed)
+ /* When the previous request is not finished, then assume NTP is active. */
+ return sd_bus_message_append(reply, "b", true);
+
+ r = context_update_ntp_status(c, bus, reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append(reply, "b", context_ntp_service_is_active(c) > 0);
+}
+
+static int method_set_timezone(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ Context *c = userdata;
+ int interactive, r;
+ const char *z;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read(m, "sb", &z, &interactive);
+ if (r < 0)
+ return r;
+
+ if (!timezone_is_valid(z, LOG_DEBUG))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid time zone '%s'", z);
+
+ if (streq_ptr(z, c->zone))
+ return sd_bus_reply_method_return(m, NULL);
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_TIME,
+ "org.freedesktop.timedate1.set-timezone",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = free_and_strdup(&c->zone, z);
+ if (r < 0)
+ return r;
+
+ /* 1. Write new configuration file */
+ r = context_write_data_timezone(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set time zone: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set time zone: %m");
+ }
+
+ /* 2. Make glibc notice the new timezone */
+ tzset();
+
+ /* 3. Tell the kernel our timezone */
+ r = clock_set_timezone(NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to tell kernel about timezone, ignoring: %m");
+
+ if (c->local_rtc) {
+ struct timespec ts;
+ struct tm tm;
+
+ /* 4. Sync RTC from system clock, with the new delta */
+ assert_se(clock_gettime(CLOCK_REALTIME, &ts) == 0);
+ assert_se(localtime_r(&ts.tv_sec, &tm));
+
+ r = clock_set_hwclock(&tm);
+ if (r < 0)
+ log_debug_errno(r, "Failed to sync time to hardware clock, ignoring: %m");
+ }
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_TIMEZONE_CHANGE_STR,
+ "TIMEZONE=%s", c->zone,
+ "TIMEZONE_SHORTNAME=%s", tzname[daylight],
+ "DAYLIGHT=%i", daylight,
+ LOG_MESSAGE("Changed time zone to '%s' (%s).", c->zone, tzname[daylight]));
+
+ (void) sd_bus_emit_properties_changed(sd_bus_message_get_bus(m), "/org/freedesktop/timedate1", "org.freedesktop.timedate1", "Timezone", NULL);
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_set_local_rtc(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int lrtc, fix_system, interactive;
+ Context *c = userdata;
+ struct timespec ts;
+ int r;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read(m, "bbb", &lrtc, &fix_system, &interactive);
+ if (r < 0)
+ return r;
+
+ if (lrtc == c->local_rtc)
+ return sd_bus_reply_method_return(m, NULL);
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_TIME,
+ "org.freedesktop.timedate1.set-local-rtc",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1;
+
+ c->local_rtc = lrtc;
+
+ /* 1. Write new configuration file */
+ r = context_write_data_local_rtc(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set RTC to local/UTC: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set RTC to local/UTC: %m");
+ }
+
+ /* 2. Tell the kernel our timezone */
+ r = clock_set_timezone(NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to tell kernel about timezone, ignoring: %m");
+
+ /* 3. Synchronize clocks */
+ assert_se(clock_gettime(CLOCK_REALTIME, &ts) == 0);
+
+ if (fix_system) {
+ struct tm tm;
+
+ /* Sync system clock from RTC; first, initialize the timezone fields of struct tm. */
+ if (c->local_rtc)
+ localtime_r(&ts.tv_sec, &tm);
+ else
+ gmtime_r(&ts.tv_sec, &tm);
+
+ /* Override the main fields of struct tm, but not the timezone fields */
+ r = clock_get_hwclock(&tm);
+ if (r < 0)
+ log_debug_errno(r, "Failed to get hardware clock, ignoring: %m");
+ else {
+ /* And set the system clock with this */
+ if (c->local_rtc)
+ ts.tv_sec = mktime(&tm);
+ else
+ ts.tv_sec = timegm(&tm);
+
+ if (clock_settime(CLOCK_REALTIME, &ts) < 0)
+ log_debug_errno(errno, "Failed to update system clock, ignoring: %m");
+ }
+
+ } else {
+ struct tm tm;
+
+ /* Sync RTC from system clock */
+ if (c->local_rtc)
+ localtime_r(&ts.tv_sec, &tm);
+ else
+ gmtime_r(&ts.tv_sec, &tm);
+
+ r = clock_set_hwclock(&tm);
+ if (r < 0)
+ log_debug_errno(r, "Failed to sync time to hardware clock, ignoring: %m");
+ }
+
+ log_info("RTC configured to %s time.", c->local_rtc ? "local" : "UTC");
+
+ (void) sd_bus_emit_properties_changed(sd_bus_message_get_bus(m), "/org/freedesktop/timedate1", "org.freedesktop.timedate1", "LocalRTC", NULL);
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_set_time(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ sd_bus *bus = sd_bus_message_get_bus(m);
+ int relative, interactive, r;
+ Context *c = userdata;
+ int64_t utc;
+ struct timespec ts;
+ usec_t start;
+ struct tm tm;
+
+ assert(m);
+ assert(c);
+
+ if (c->slot_job_removed)
+ return sd_bus_error_set(error, BUS_ERROR_AUTOMATIC_TIME_SYNC_ENABLED, "Previous request is not finished, refusing.");
+
+ r = context_update_ntp_status(c, bus, m);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to update context: %m");
+
+ if (context_ntp_service_is_active(c) > 0)
+ return sd_bus_error_set(error, BUS_ERROR_AUTOMATIC_TIME_SYNC_ENABLED, "Automatic time synchronization is enabled");
+
+ /* this only gets used if dbus does not provide a timestamp */
+ start = now(CLOCK_MONOTONIC);
+
+ r = sd_bus_message_read(m, "xbb", &utc, &relative, &interactive);
+ if (r < 0)
+ return r;
+
+ if (!relative && utc <= 0)
+ return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid absolute time");
+
+ if (relative && utc == 0)
+ return sd_bus_reply_method_return(m, NULL);
+
+ if (relative) {
+ usec_t n, x;
+
+ n = now(CLOCK_REALTIME);
+ x = n + utc;
+
+ if ((utc > 0 && x < n) ||
+ (utc < 0 && x > n))
+ return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Time value overflow");
+
+ timespec_store(&ts, x);
+ } else
+ timespec_store(&ts, (usec_t) utc);
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_TIME,
+ "org.freedesktop.timedate1.set-time",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1;
+
+ /* adjust ts for time spent in program */
+ r = sd_bus_message_get_monotonic_usec(m, &start);
+ /* when sd_bus_message_get_monotonic_usec() returns -ENODATA it does not modify &start */
+ if (r < 0 && r != -ENODATA)
+ return r;
+
+ timespec_store(&ts, timespec_load(&ts) + (now(CLOCK_MONOTONIC) - start));
+
+ /* Set system clock */
+ if (clock_settime(CLOCK_REALTIME, &ts) < 0) {
+ log_error_errno(errno, "Failed to set local time: %m");
+ return sd_bus_error_set_errnof(error, errno, "Failed to set local time: %m");
+ }
+
+ /* Sync down to RTC */
+ if (c->local_rtc)
+ localtime_r(&ts.tv_sec, &tm);
+ else
+ gmtime_r(&ts.tv_sec, &tm);
+
+ r = clock_set_hwclock(&tm);
+ if (r < 0)
+ log_debug_errno(r, "Failed to update hardware clock, ignoring: %m");
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_TIME_CHANGE_STR,
+ "REALTIME="USEC_FMT, timespec_load(&ts),
+ LOG_MESSAGE("Changed local time to %s", ctime(&ts.tv_sec)));
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_set_ntp(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *slot = NULL;
+ sd_bus *bus = sd_bus_message_get_bus(m);
+ Context *c = userdata;
+ UnitStatusInfo *u;
+ int enable, interactive, q, r;
+
+ assert(m);
+ assert(bus);
+ assert(c);
+
+ r = sd_bus_message_read(m, "bb", &enable, &interactive);
+ if (r < 0)
+ return r;
+
+ r = context_update_ntp_status(c, bus, m);
+ if (r < 0)
+ return r;
+
+ if (context_ntp_service_exists(c) <= 0)
+ return sd_bus_error_set(error, BUS_ERROR_NO_NTP_SUPPORT, "NTP not supported");
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_TIME,
+ "org.freedesktop.timedate1.set-ntp",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1;
+
+ /* This method may be called frequently. Forget the previous job if it has not completed yet. */
+ LIST_FOREACH(units, u, c->units)
+ u->path = mfree(u->path);
+
+ if (!c->slot_job_removed) {
+ r = sd_bus_match_signal_async(
+ bus,
+ &slot,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "JobRemoved",
+ match_job_removed, NULL, c);
+ if (r < 0)
+ return r;
+ }
+
+ if (!enable)
+ LIST_FOREACH(units, u, c->units) {
+ if (!streq(u->load_state, "loaded"))
+ continue;
+
+ q = unit_enable_or_disable(u, bus, error, enable);
+ if (q < 0)
+ r = q;
+
+ q = unit_start_or_stop(u, bus, error, enable);
+ if (q < 0)
+ r = q;
+ }
+
+ else if (context_ntp_service_is_enabled(c) <= 0)
+ LIST_FOREACH(units, u, c->units) {
+ if (!streq(u->load_state, "loaded"))
+ continue;
+
+ r = unit_enable_or_disable(u, bus, error, enable);
+ if (r < 0)
+ continue;
+
+ r = unit_start_or_stop(u, bus, error, enable);
+ break;
+ }
+
+ else
+ LIST_FOREACH(units, u, c->units) {
+ if (!streq(u->load_state, "loaded") ||
+ !streq(u->unit_file_state, "enabled"))
+ continue;
+
+ r = unit_start_or_stop(u, bus, error, enable);
+ break;
+ }
+
+ if (r < 0)
+ return r;
+
+ if (slot)
+ c->slot_job_removed = TAKE_PTR(slot);
+
+ log_info("Set NTP to %sd", enable_disable(enable));
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_list_timezones(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_strv_free_ char **zones = NULL;
+ int r;
+
+ assert(m);
+
+ r = get_timezones(&zones);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read list of time zones: %m");
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(reply, zones);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static const sd_bus_vtable timedate_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Timezone", "s", NULL, offsetof(Context, zone), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("LocalRTC", "b", bus_property_get_bool, offsetof(Context, local_rtc), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CanNTP", "b", property_get_can_ntp, 0, 0),
+ SD_BUS_PROPERTY("NTP", "b", property_get_ntp, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("NTPSynchronized", "b", property_get_ntp_sync, 0, 0),
+ SD_BUS_PROPERTY("TimeUSec", "t", property_get_time, 0, 0),
+ SD_BUS_PROPERTY("RTCTimeUSec", "t", property_get_rtc_time, 0, 0),
+ SD_BUS_METHOD("SetTime", "xbb", NULL, method_set_time, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetTimezone", "sb", NULL, method_set_timezone, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLocalRTC", "bbb", NULL, method_set_local_rtc, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetNTP", "bb", NULL, method_set_ntp, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListTimezones", NULL, "as", method_list_timezones, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END,
+};
+
+static int connect_bus(Context *c, sd_event *event, sd_bus **_bus) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(c);
+ assert(event);
+ assert(_bus);
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get system bus connection: %m");
+
+ r = sd_bus_add_object_vtable(bus, NULL, "/org/freedesktop/timedate1", "org.freedesktop.timedate1", timedate_vtable, c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register object: %m");
+
+ r = sd_bus_request_name_async(bus, NULL, "org.freedesktop.timedate1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_clear) Context context = {};
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments.");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ (void) sd_event_set_watchdog(event, true);
+
+ r = sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to install SIGINT handler: %m");
+
+ r = sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to install SIGTERM handler: %m");
+
+ r = connect_bus(&context, event, &bus);
+ if (r < 0)
+ return r;
+
+ (void) sd_bus_negotiate_timestamp(bus, true);
+
+ r = context_read_data(&context);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read time zone data: %m");
+
+ r = context_parse_ntp_services(&context);
+ if (r < 0)
+ return r;
+
+ r = bus_event_loop_with_idle(event, bus, "org.freedesktop.timedate1", DEFAULT_EXIT_USEC, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/timesync/meson.build b/src/timesync/meson.build
new file mode 100644
index 0000000..b79ef08
--- /dev/null
+++ b/src/timesync/meson.build
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+systemd_timesyncd_sources = files('''
+ timesyncd.c
+ timesyncd-bus.c
+ timesyncd-bus.h
+ timesyncd-conf.c
+ timesyncd-conf.h
+ timesyncd-manager.c
+ timesyncd-manager.h
+ timesyncd-ntp-message.h
+ timesyncd-server.c
+ timesyncd-server.h
+'''.split())
+
+timesyncd_gperf_c = custom_target(
+ 'timesyncd-gperf.c',
+ input : 'timesyncd-gperf.gperf',
+ output : 'timesyncd-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+systemd_timesyncd_sources += [timesyncd_gperf_c]
+
+if conf.get('ENABLE_TIMESYNCD') == 1
+ timesyncd_conf = configure_file(
+ input : 'timesyncd.conf.in',
+ output : 'timesyncd.conf',
+ configuration : substs)
+ install_data(timesyncd_conf,
+ install_dir : pkgsysconfdir)
+ install_data('org.freedesktop.timesync1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.timesync1.service',
+ install_dir : dbussystemservicedir)
+endif
+
+############################################################
+
+tests += [
+ [['src/timesync/test-timesync.c',
+ 'src/timesync/timesyncd-manager.c',
+ 'src/timesync/timesyncd-manager.h',
+ 'src/timesync/timesyncd-conf.c',
+ 'src/timesync/timesyncd-conf.h',
+ 'src/timesync/timesyncd-server.c',
+ 'src/timesync/timesyncd-server.h',
+ timesyncd_gperf_c],
+ [libshared],
+ [libm],
+ 'ENABLE_TIMESYNCD'],
+]
diff --git a/src/timesync/org.freedesktop.timesync1.conf b/src/timesync/org.freedesktop.timesync1.conf
new file mode 100644
index 0000000..eccdbec
--- /dev/null
+++ b/src/timesync/org.freedesktop.timesync1.conf
@@ -0,0 +1,42 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="systemd-timesync">
+ <allow own="org.freedesktop.timesync1"/>
+ <allow send_destination="org.freedesktop.timesync1"/>
+ <allow receive_sender="org.freedesktop.timesync1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.timesync1"/>
+
+ <allow send_destination="org.freedesktop.timesync1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.timesync1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.timesync1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.timesync1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <allow receive_sender="org.freedesktop.timesync1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/timesync/org.freedesktop.timesync1.service b/src/timesync/org.freedesktop.timesync1.service
new file mode 100644
index 0000000..84b4b4f
--- /dev/null
+++ b/src/timesync/org.freedesktop.timesync1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.timesync1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.timesync1.service
diff --git a/src/timesync/test-timesync.c b/src/timesync/test-timesync.c
new file mode 100644
index 0000000..bd03a1d
--- /dev/null
+++ b/src/timesync/test-timesync.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+/* Some unit tests for the helper functions in timesyncd. */
+
+#include "log.h"
+#include "macro.h"
+#include "timesyncd-conf.h"
+#include "tests.h"
+
+static void test_manager_parse_string(void) {
+ /* Make sure that NTP_SERVERS is configured to something
+ * that we can actually parse successfully. */
+
+ _cleanup_(manager_freep) Manager *m = NULL;
+
+ assert_se(manager_new(&m) == 0);
+
+ assert_se(!m->have_fallbacks);
+ assert_se(manager_parse_server_string(m, SERVER_FALLBACK, NTP_SERVERS) == 0);
+ assert_se(m->have_fallbacks);
+ assert_se(manager_parse_fallback_string(m, NTP_SERVERS) == 0);
+
+ assert_se(manager_parse_server_string(m, SERVER_SYSTEM, "time1.foobar.com time2.foobar.com axrfav.,avf..ra 12345..123") == 0);
+ assert_se(manager_parse_server_string(m, SERVER_FALLBACK, "time1.foobar.com time2.foobar.com axrfav.,avf..ra 12345..123") == 0);
+ assert_se(manager_parse_server_string(m, SERVER_LINK, "time1.foobar.com time2.foobar.com axrfav.,avf..ra 12345..123") == 0);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_manager_parse_string();
+
+ return 0;
+}
diff --git a/src/timesync/timesyncd-bus.c b/src/timesync/timesyncd-bus.c
new file mode 100644
index 0000000..5a5896f
--- /dev/null
+++ b/src/timesync/timesyncd-bus.c
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-protocol.h"
+#include "bus-util.h"
+#include "in-addr-util.h"
+#include "log.h"
+#include "macro.h"
+#include "time-util.h"
+#include "timesyncd-bus.h"
+
+static int property_get_servers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ServerName *p, **s = userdata;
+ int r;
+
+ assert(s);
+ assert(bus);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(names, p, *s) {
+ r = sd_bus_message_append(reply, "s", p->string);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_current_server_name(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ServerName **s = userdata;
+
+ assert(s);
+ assert(bus);
+ assert(reply);
+
+ return sd_bus_message_append(reply, "s", *s ? (*s)->string : NULL);
+}
+
+static int property_get_current_server_address(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ServerAddress *a;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ a = *(ServerAddress **) userdata;
+
+ if (!a)
+ return sd_bus_message_append(reply, "(iay)", AF_UNSPEC, 0);
+
+ assert(IN_SET(a->sockaddr.sa.sa_family, AF_INET, AF_INET6));
+
+ r = sd_bus_message_open_container(reply, 'r', "iay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "i", a->sockaddr.sa.sa_family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y',
+ a->sockaddr.sa.sa_family == AF_INET ? (void*) &a->sockaddr.in.sin_addr : (void*) &a->sockaddr.in6.sin6_addr,
+ FAMILY_ADDRESS_SIZE(a->sockaddr.sa.sa_family));
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static usec_t ntp_ts_short_to_usec(const struct ntp_ts_short *ts) {
+ return be16toh(ts->sec) * USEC_PER_SEC + (be16toh(ts->frac) * USEC_PER_SEC) / (usec_t) 0x10000ULL;
+}
+
+static usec_t ntp_ts_to_usec(const struct ntp_ts *ts) {
+ return (be32toh(ts->sec) - OFFSET_1900_1970) * USEC_PER_SEC + (be32toh(ts->frac) * USEC_PER_SEC) / (usec_t) 0x100000000ULL;
+}
+
+static int property_get_ntp_message(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ int r;
+
+ assert(m);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'r', "uuuuittayttttbtt");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "uuuuitt",
+ NTP_FIELD_LEAP(m->ntpmsg.field),
+ NTP_FIELD_VERSION(m->ntpmsg.field),
+ NTP_FIELD_MODE(m->ntpmsg.field),
+ m->ntpmsg.stratum,
+ m->ntpmsg.precision,
+ ntp_ts_short_to_usec(&m->ntpmsg.root_delay),
+ ntp_ts_short_to_usec(&m->ntpmsg.root_dispersion));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', m->ntpmsg.refid, 4);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "ttttbtt",
+ timespec_load(&m->origin_time),
+ ntp_ts_to_usec(&m->ntpmsg.recv_time),
+ ntp_ts_to_usec(&m->ntpmsg.trans_time),
+ timespec_load(&m->dest_time),
+ m->spike,
+ m->packet_count,
+ (usec_t) (m->samples_jitter * USEC_PER_SEC));
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("LinkNTPServers", "as", property_get_servers, offsetof(Manager, link_servers), 0),
+ SD_BUS_PROPERTY("SystemNTPServers", "as", property_get_servers, offsetof(Manager, system_servers), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FallbackNTPServers", "as", property_get_servers, offsetof(Manager, fallback_servers), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ServerName", "s", property_get_current_server_name, offsetof(Manager, current_server_name), 0),
+ SD_BUS_PROPERTY("ServerAddress", "(iay)", property_get_current_server_address, offsetof(Manager, current_server_address), 0),
+ SD_BUS_PROPERTY("RootDistanceMaxUSec", "t", bus_property_get_usec, offsetof(Manager, max_root_distance_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PollIntervalMinUSec", "t", bus_property_get_usec, offsetof(Manager, poll_interval_min_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PollIntervalMaxUSec", "t", bus_property_get_usec, offsetof(Manager, poll_interval_max_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PollIntervalUSec", "t", bus_property_get_usec, offsetof(Manager, poll_interval_usec), 0),
+ SD_BUS_PROPERTY("NTPMessage", "(uuuuittayttttbtt)", property_get_ntp_message, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Frequency", "x", NULL, offsetof(Manager, drift_freq), 0),
+
+ SD_BUS_VTABLE_END
+};
+
+int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->bus)
+ return 0;
+
+ r = bus_open_system_watch_bind_with_description(&m->bus, "bus-api-timesync");
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to bus: %m");
+
+ r = sd_bus_add_object_vtable(m->bus, NULL, "/org/freedesktop/timesync1", "org.freedesktop.timesync1.Manager", manager_vtable, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add manager object vtable: %m");
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.timesync1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ return 0;
+}
diff --git a/src/timesync/timesyncd-bus.h b/src/timesync/timesyncd-bus.h
new file mode 100644
index 0000000..6670ffb
--- /dev/null
+++ b/src/timesync/timesyncd-bus.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "timesyncd-manager.h"
+
+int manager_connect_bus(Manager *m);
diff --git a/src/timesync/timesyncd-conf.c b/src/timesync/timesyncd-conf.c
new file mode 100644
index 0000000..a26c2da
--- /dev/null
+++ b/src/timesync/timesyncd-conf.c
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "def.h"
+#include "dns-domain.h"
+#include "extract-word.h"
+#include "string-util.h"
+#include "timesyncd-conf.h"
+#include "timesyncd-manager.h"
+#include "timesyncd-server.h"
+
+int manager_parse_server_string(Manager *m, ServerType type, const char *string) {
+ ServerName *first;
+ int r;
+
+ assert(m);
+ assert(string);
+
+ first = type == SERVER_FALLBACK ? m->fallback_servers : m->system_servers;
+
+ if (type == SERVER_FALLBACK)
+ m->have_fallbacks = true;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ bool found = false;
+ ServerName *n;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timesyncd server syntax \"%s\": %m", string);
+ if (r == 0)
+ break;
+
+ r = dns_name_is_valid_or_address(word);
+ if (r < 0)
+ return log_error_errno(r, "Failed to check validity of NTP server name or address '%s': %m", word);
+ if (r == 0) {
+ log_error("Invalid NTP server name or address, ignoring: %s", word);
+ continue;
+ }
+
+ /* Filter out duplicates */
+ LIST_FOREACH(names, n, first)
+ if (streq_ptr(n->string, word)) {
+ found = true;
+ break;
+ }
+
+ if (found)
+ continue;
+
+ r = server_name_new(m, NULL, type, word);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int manager_parse_fallback_string(Manager *m, const char *string) {
+ if (m->have_fallbacks)
+ return 0;
+
+ return manager_parse_server_string(m, SERVER_FALLBACK, string);
+}
+
+int config_parse_servers(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Manager *m = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue))
+ manager_flush_server_names(m, ltype);
+ else {
+ r = manager_parse_server_string(m, ltype, rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse NTP server string '%s'. Ignoring.", rvalue);
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+int manager_parse_config_file(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = config_parse_many_nulstr(PKGSYSCONFDIR "/timesyncd.conf",
+ CONF_PATHS_NULSTR("systemd/timesyncd.conf.d"),
+ "Time\0",
+ config_item_perf_lookup, timesyncd_gperf_lookup,
+ CONFIG_PARSE_WARN, m);
+ if (r < 0)
+ return r;
+
+ if (m->poll_interval_min_usec < 16 * USEC_PER_SEC) {
+ log_warning("Invalid PollIntervalMinSec=. Using default value.");
+ m->poll_interval_min_usec = NTP_POLL_INTERVAL_MIN_USEC;
+ }
+
+ if (m->poll_interval_max_usec < m->poll_interval_min_usec) {
+ log_warning("PollIntervalMaxSec= is smaller than PollIntervalMinSec=. Using default value.");
+ m->poll_interval_max_usec = MAX(NTP_POLL_INTERVAL_MAX_USEC, m->poll_interval_min_usec * 32);
+ }
+
+ return r;
+}
diff --git a/src/timesync/timesyncd-conf.h b/src/timesync/timesyncd-conf.h
new file mode 100644
index 0000000..bbe27cf
--- /dev/null
+++ b/src/timesync/timesyncd-conf.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "conf-parser.h"
+#include "timesyncd-manager.h"
+
+const struct ConfigPerfItem* timesyncd_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+int manager_parse_server_string(Manager *m, ServerType type, const char *string);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_servers);
+
+int manager_parse_config_file(Manager *m);
+int manager_parse_fallback_string(Manager *m, const char *string);
diff --git a/src/timesync/timesyncd-gperf.gperf b/src/timesync/timesyncd-gperf.gperf
new file mode 100644
index 0000000..b502027
--- /dev/null
+++ b/src/timesync/timesyncd-gperf.gperf
@@ -0,0 +1,25 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "timesyncd-conf.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name timesyncd_gperf_hash
+%define lookup-function-name timesyncd_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Time.NTP, config_parse_servers, SERVER_SYSTEM, 0
+Time.Servers, config_parse_servers, SERVER_SYSTEM, 0
+Time.FallbackNTP, config_parse_servers, SERVER_FALLBACK, 0
+Time.RootDistanceMaxSec, config_parse_sec, 0, offsetof(Manager, max_root_distance_usec)
+Time.PollIntervalMinSec, config_parse_sec, 0, offsetof(Manager, poll_interval_min_usec)
+Time.PollIntervalMaxSec, config_parse_sec, 0, offsetof(Manager, poll_interval_max_usec)
diff --git a/src/timesync/timesyncd-manager.c b/src/timesync/timesyncd-manager.c
new file mode 100644
index 0000000..6fde4a3
--- /dev/null
+++ b/src/timesync/timesyncd-manager.c
@@ -0,0 +1,1124 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <math.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <resolv.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/timerfd.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <time.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "list.h"
+#include "log.h"
+#include "missing.h"
+#include "network-util.h"
+#include "ratelimit.h"
+#include "resolve-private.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "timesyncd-conf.h"
+#include "timesyncd-manager.h"
+#include "util.h"
+
+#ifndef ADJ_SETOFFSET
+#define ADJ_SETOFFSET 0x0100 /* add 'time' to current time */
+#endif
+
+/* expected accuracy of time synchronization; used to adjust the poll interval */
+#define NTP_ACCURACY_SEC 0.2
+
+/*
+ * Maximum delta in seconds which the system clock is gradually adjusted
+ * (slewed) to approach the network time. Deltas larger that this are set by
+ * letting the system time jump. The kernel's limit for adjtime is 0.5s.
+ */
+#define NTP_MAX_ADJUST 0.4
+
+/* Default of maximum acceptable root distance in microseconds. */
+#define NTP_MAX_ROOT_DISTANCE (5 * USEC_PER_SEC)
+
+/* Maximum number of missed replies before selecting another source. */
+#define NTP_MAX_MISSED_REPLIES 2
+
+#define RETRY_USEC (30*USEC_PER_SEC)
+#define RATELIMIT_INTERVAL_USEC (10*USEC_PER_SEC)
+#define RATELIMIT_BURST 10
+
+#define TIMEOUT_USEC (10*USEC_PER_SEC)
+
+static int manager_arm_timer(Manager *m, usec_t next);
+static int manager_clock_watch_setup(Manager *m);
+static int manager_listen_setup(Manager *m);
+static void manager_listen_stop(Manager *m);
+
+static double ntp_ts_short_to_d(const struct ntp_ts_short *ts) {
+ return be16toh(ts->sec) + (be16toh(ts->frac) / 65536.0);
+}
+
+static double ntp_ts_to_d(const struct ntp_ts *ts) {
+ return be32toh(ts->sec) + ((double)be32toh(ts->frac) / UINT_MAX);
+}
+
+static double ts_to_d(const struct timespec *ts) {
+ return ts->tv_sec + (1.0e-9 * ts->tv_nsec);
+}
+
+static int manager_timeout(sd_event_source *source, usec_t usec, void *userdata) {
+ _cleanup_free_ char *pretty = NULL;
+ Manager *m = userdata;
+
+ assert(m);
+ assert(m->current_server_name);
+ assert(m->current_server_address);
+
+ server_address_pretty(m->current_server_address, &pretty);
+ log_info("Timed out waiting for reply from %s (%s).", strna(pretty), m->current_server_name->string);
+
+ return manager_connect(m);
+}
+
+static int manager_send_request(Manager *m) {
+ _cleanup_free_ char *pretty = NULL;
+ struct ntp_msg ntpmsg = {
+ /*
+ * "The client initializes the NTP message header, sends the request
+ * to the server, and strips the time of day from the Transmit
+ * Timestamp field of the reply. For this purpose, all the NTP
+ * header fields are set to 0, except the Mode, VN, and optional
+ * Transmit Timestamp fields."
+ */
+ .field = NTP_FIELD(0, 4, NTP_MODE_CLIENT),
+ };
+ ssize_t len;
+ int r;
+
+ assert(m);
+ assert(m->current_server_name);
+ assert(m->current_server_address);
+
+ m->event_timeout = sd_event_source_unref(m->event_timeout);
+
+ r = manager_listen_setup(m);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to setup connection socket: %m");
+
+ /*
+ * Set transmit timestamp, remember it; the server will send that back
+ * as the origin timestamp and we have an indication that this is the
+ * matching answer to our request.
+ *
+ * The actual value does not matter, We do not care about the correct
+ * NTP UINT_MAX fraction; we just pass the plain nanosecond value.
+ */
+ assert_se(clock_gettime(clock_boottime_or_monotonic(), &m->trans_time_mon) >= 0);
+ assert_se(clock_gettime(CLOCK_REALTIME, &m->trans_time) >= 0);
+ ntpmsg.trans_time.sec = htobe32(m->trans_time.tv_sec + OFFSET_1900_1970);
+ ntpmsg.trans_time.frac = htobe32(m->trans_time.tv_nsec);
+
+ server_address_pretty(m->current_server_address, &pretty);
+
+ len = sendto(m->server_socket, &ntpmsg, sizeof(ntpmsg), MSG_DONTWAIT, &m->current_server_address->sockaddr.sa, m->current_server_address->socklen);
+ if (len == sizeof(ntpmsg)) {
+ m->pending = true;
+ log_debug("Sent NTP request to %s (%s).", strna(pretty), m->current_server_name->string);
+ } else {
+ log_debug_errno(errno, "Sending NTP request to %s (%s) failed: %m", strna(pretty), m->current_server_name->string);
+ return manager_connect(m);
+ }
+
+ /* re-arm timer with increasing timeout, in case the packets never arrive back */
+ if (m->retry_interval > 0) {
+ if (m->retry_interval < m->poll_interval_max_usec)
+ m->retry_interval *= 2;
+ } else
+ m->retry_interval = m->poll_interval_min_usec;
+
+ r = manager_arm_timer(m, m->retry_interval);
+ if (r < 0)
+ return log_error_errno(r, "Failed to rearm timer: %m");
+
+ m->missed_replies++;
+ if (m->missed_replies > NTP_MAX_MISSED_REPLIES) {
+ r = sd_event_add_time(
+ m->event,
+ &m->event_timeout,
+ clock_boottime_or_monotonic(),
+ now(clock_boottime_or_monotonic()) + TIMEOUT_USEC, 0,
+ manager_timeout, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to arm timeout timer: %m");
+ }
+
+ return 0;
+}
+
+static int manager_timer(sd_event_source *source, usec_t usec, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+
+ return manager_send_request(m);
+}
+
+static int manager_arm_timer(Manager *m, usec_t next) {
+ int r;
+
+ assert(m);
+
+ if (next == 0) {
+ m->event_timer = sd_event_source_unref(m->event_timer);
+ return 0;
+ }
+
+ if (m->event_timer) {
+ r = sd_event_source_set_time(m->event_timer, now(clock_boottime_or_monotonic()) + next);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(m->event_timer, SD_EVENT_ONESHOT);
+ }
+
+ return sd_event_add_time(
+ m->event,
+ &m->event_timer,
+ clock_boottime_or_monotonic(),
+ now(clock_boottime_or_monotonic()) + next, 0,
+ manager_timer, m);
+}
+
+static int manager_clock_watch(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+
+ /* rearm timer */
+ manager_clock_watch_setup(m);
+
+ /* skip our own jumps */
+ if (m->jumped) {
+ m->jumped = false;
+ return 0;
+ }
+
+ /* resync */
+ log_debug("System time changed. Resyncing.");
+ m->poll_resync = true;
+
+ return manager_send_request(m);
+}
+
+/* wake up when the system time changes underneath us */
+static int manager_clock_watch_setup(Manager *m) {
+ int r;
+
+ assert(m);
+
+ m->event_clock_watch = sd_event_source_unref(m->event_clock_watch);
+ safe_close(m->clock_watch_fd);
+
+ m->clock_watch_fd = time_change_fd();
+ if (m->clock_watch_fd < 0)
+ return log_error_errno(m->clock_watch_fd, "Failed to create timerfd: %m");
+
+ r = sd_event_add_io(m->event, &m->event_clock_watch, m->clock_watch_fd, EPOLLIN, manager_clock_watch, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create clock watch event source: %m");
+
+ return 0;
+}
+
+static int manager_adjust_clock(Manager *m, double offset, int leap_sec) {
+ struct timex tmx = {};
+ int r;
+
+ assert(m);
+
+ /*
+ * For small deltas, tell the kernel to gradually adjust the system
+ * clock to the NTP time, larger deltas are just directly set.
+ */
+ if (fabs(offset) < NTP_MAX_ADJUST) {
+ tmx.modes = ADJ_STATUS | ADJ_NANO | ADJ_OFFSET | ADJ_TIMECONST | ADJ_MAXERROR | ADJ_ESTERROR;
+ tmx.status = STA_PLL;
+ tmx.offset = offset * NSEC_PER_SEC;
+ tmx.constant = log2i(m->poll_interval_usec / USEC_PER_SEC) - 4;
+ tmx.maxerror = 0;
+ tmx.esterror = 0;
+ log_debug(" adjust (slew): %+.3f sec", offset);
+ } else {
+ tmx.modes = ADJ_STATUS | ADJ_NANO | ADJ_SETOFFSET | ADJ_MAXERROR | ADJ_ESTERROR;
+
+ /* ADJ_NANO uses nanoseconds in the microseconds field */
+ tmx.time.tv_sec = (long)offset;
+ tmx.time.tv_usec = (offset - tmx.time.tv_sec) * NSEC_PER_SEC;
+ tmx.maxerror = 0;
+ tmx.esterror = 0;
+
+ /* the kernel expects -0.3s as {-1, 7000.000.000} */
+ if (tmx.time.tv_usec < 0) {
+ tmx.time.tv_sec -= 1;
+ tmx.time.tv_usec += NSEC_PER_SEC;
+ }
+
+ m->jumped = true;
+ log_debug(" adjust (jump): %+.3f sec", offset);
+ }
+
+ /*
+ * An unset STA_UNSYNC will enable the kernel's 11-minute mode,
+ * which syncs the system time periodically to the RTC.
+ *
+ * In case the RTC runs in local time, never touch the RTC,
+ * we have no way to properly handle daylight saving changes and
+ * mobile devices moving between time zones.
+ */
+ if (m->rtc_local_time)
+ tmx.status |= STA_UNSYNC;
+
+ switch (leap_sec) {
+ case 1:
+ tmx.status |= STA_INS;
+ break;
+ case -1:
+ tmx.status |= STA_DEL;
+ break;
+ }
+
+ r = clock_adjtime(CLOCK_REALTIME, &tmx);
+ if (r < 0)
+ return -errno;
+
+ /* If touch fails, there isn't much we can do. Maybe it'll work next time. */
+ (void) touch("/var/lib/systemd/timesync/clock");
+ (void) touch("/run/systemd/timesync/synchronized");
+
+ m->drift_freq = tmx.freq;
+
+ log_debug(" status : %04i %s\n"
+ " time now : %"PRI_TIME".%03"PRI_USEC"\n"
+ " constant : %"PRI_TIMEX"\n"
+ " offset : %+.3f sec\n"
+ " freq offset : %+"PRI_TIMEX" (%+"PRI_TIMEX" ppm)\n",
+ tmx.status, tmx.status & STA_UNSYNC ? "unsync" : "sync",
+ tmx.time.tv_sec, tmx.time.tv_usec / NSEC_PER_MSEC,
+ tmx.constant,
+ (double)tmx.offset / NSEC_PER_SEC,
+ tmx.freq, tmx.freq / 65536);
+
+ return 0;
+}
+
+static bool manager_sample_spike_detection(Manager *m, double offset, double delay) {
+ unsigned i, idx_cur, idx_new, idx_min;
+ double jitter;
+ double j;
+
+ assert(m);
+
+ m->packet_count++;
+
+ /* ignore initial sample */
+ if (m->packet_count == 1)
+ return false;
+
+ /* store the current data in our samples array */
+ idx_cur = m->samples_idx;
+ idx_new = (idx_cur + 1) % ELEMENTSOF(m->samples);
+ m->samples_idx = idx_new;
+ m->samples[idx_new].offset = offset;
+ m->samples[idx_new].delay = delay;
+
+ /* calculate new jitter value from the RMS differences relative to the lowest delay sample */
+ jitter = m->samples_jitter;
+ for (idx_min = idx_cur, i = 0; i < ELEMENTSOF(m->samples); i++)
+ if (m->samples[i].delay > 0 && m->samples[i].delay < m->samples[idx_min].delay)
+ idx_min = i;
+
+ j = 0;
+ for (i = 0; i < ELEMENTSOF(m->samples); i++)
+ j += pow(m->samples[i].offset - m->samples[idx_min].offset, 2);
+ m->samples_jitter = sqrt(j / (ELEMENTSOF(m->samples) - 1));
+
+ /* ignore samples when resyncing */
+ if (m->poll_resync)
+ return false;
+
+ /* always accept offset if we are farther off than the round-trip delay */
+ if (fabs(offset) > delay)
+ return false;
+
+ /* we need a few samples before looking at them */
+ if (m->packet_count < 4)
+ return false;
+
+ /* do not accept anything worse than the maximum possible error of the best sample */
+ if (fabs(offset) > m->samples[idx_min].delay)
+ return true;
+
+ /* compare the difference between the current offset to the previous offset and jitter */
+ return fabs(offset - m->samples[idx_cur].offset) > 3 * jitter;
+}
+
+static void manager_adjust_poll(Manager *m, double offset, bool spike) {
+ assert(m);
+
+ if (m->poll_resync) {
+ m->poll_interval_usec = m->poll_interval_min_usec;
+ m->poll_resync = false;
+ return;
+ }
+
+ /* set to minimal poll interval */
+ if (!spike && fabs(offset) > NTP_ACCURACY_SEC) {
+ m->poll_interval_usec = m->poll_interval_min_usec;
+ return;
+ }
+
+ /* increase polling interval */
+ if (fabs(offset) < NTP_ACCURACY_SEC * 0.25) {
+ if (m->poll_interval_usec < m->poll_interval_max_usec)
+ m->poll_interval_usec *= 2;
+ return;
+ }
+
+ /* decrease polling interval */
+ if (spike || fabs(offset) > NTP_ACCURACY_SEC * 0.75) {
+ if (m->poll_interval_usec > m->poll_interval_min_usec)
+ m->poll_interval_usec /= 2;
+ return;
+ }
+}
+
+static int manager_receive_response(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ struct ntp_msg ntpmsg;
+
+ struct iovec iov = {
+ .iov_base = &ntpmsg,
+ .iov_len = sizeof(ntpmsg),
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(struct timeval))];
+ } control;
+ union sockaddr_union server_addr;
+ struct msghdr msghdr = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_name = &server_addr,
+ .msg_namelen = sizeof(server_addr),
+ };
+ struct cmsghdr *cmsg;
+ struct timespec *recv_time = NULL;
+ ssize_t len;
+ double origin, receive, trans, dest;
+ double delay, offset;
+ double root_distance;
+ bool spike;
+ int leap_sec;
+ int r;
+
+ assert(source);
+ assert(m);
+
+ if (revents & (EPOLLHUP|EPOLLERR)) {
+ log_warning("Server connection returned error.");
+ return manager_connect(m);
+ }
+
+ len = recvmsg(fd, &msghdr, MSG_DONTWAIT);
+ if (len < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ log_warning("Error receiving message. Disconnecting.");
+ return manager_connect(m);
+ }
+
+ /* Too short or too long packet? */
+ if (iov.iov_len < sizeof(struct ntp_msg) || (msghdr.msg_flags & MSG_TRUNC)) {
+ log_warning("Invalid response from server. Disconnecting.");
+ return manager_connect(m);
+ }
+
+ if (!m->current_server_name ||
+ !m->current_server_address ||
+ !sockaddr_equal(&server_addr, &m->current_server_address->sockaddr)) {
+ log_debug("Response from unknown server.");
+ return 0;
+ }
+
+ CMSG_FOREACH(cmsg, &msghdr) {
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+
+ switch (cmsg->cmsg_type) {
+ case SCM_TIMESTAMPNS:
+ recv_time = (struct timespec *) CMSG_DATA(cmsg);
+ break;
+ }
+ }
+ if (!recv_time)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid packet timestamp.");
+
+ if (!m->pending) {
+ log_debug("Unexpected reply. Ignoring.");
+ return 0;
+ }
+
+ m->missed_replies = 0;
+
+ /* check our "time cookie" (we just stored nanoseconds in the fraction field) */
+ if (be32toh(ntpmsg.origin_time.sec) != m->trans_time.tv_sec + OFFSET_1900_1970 ||
+ be32toh(ntpmsg.origin_time.frac) != (unsigned long) m->trans_time.tv_nsec) {
+ log_debug("Invalid reply; not our transmit time. Ignoring.");
+ return 0;
+ }
+
+ m->event_timeout = sd_event_source_unref(m->event_timeout);
+
+ if (be32toh(ntpmsg.recv_time.sec) < TIME_EPOCH + OFFSET_1900_1970 ||
+ be32toh(ntpmsg.trans_time.sec) < TIME_EPOCH + OFFSET_1900_1970) {
+ log_debug("Invalid reply, returned times before epoch. Ignoring.");
+ return manager_connect(m);
+ }
+
+ if (NTP_FIELD_LEAP(ntpmsg.field) == NTP_LEAP_NOTINSYNC ||
+ ntpmsg.stratum == 0 || ntpmsg.stratum >= 16) {
+ log_debug("Server is not synchronized. Disconnecting.");
+ return manager_connect(m);
+ }
+
+ if (!IN_SET(NTP_FIELD_VERSION(ntpmsg.field), 3, 4)) {
+ log_debug("Response NTPv%d. Disconnecting.", NTP_FIELD_VERSION(ntpmsg.field));
+ return manager_connect(m);
+ }
+
+ if (NTP_FIELD_MODE(ntpmsg.field) != NTP_MODE_SERVER) {
+ log_debug("Unsupported mode %d. Disconnecting.", NTP_FIELD_MODE(ntpmsg.field));
+ return manager_connect(m);
+ }
+
+ root_distance = ntp_ts_short_to_d(&ntpmsg.root_delay) / 2 + ntp_ts_short_to_d(&ntpmsg.root_dispersion);
+ if (root_distance > (double) m->max_root_distance_usec / (double) USEC_PER_SEC) {
+ log_debug("Server has too large root distance. Disconnecting.");
+ return manager_connect(m);
+ }
+
+ /* valid packet */
+ m->pending = false;
+ m->retry_interval = 0;
+
+ /* Stop listening */
+ manager_listen_stop(m);
+
+ /* announce leap seconds */
+ if (NTP_FIELD_LEAP(ntpmsg.field) & NTP_LEAP_PLUSSEC)
+ leap_sec = 1;
+ else if (NTP_FIELD_LEAP(ntpmsg.field) & NTP_LEAP_MINUSSEC)
+ leap_sec = -1;
+ else
+ leap_sec = 0;
+
+ /*
+ * "Timestamp Name ID When Generated
+ * ------------------------------------------------------------
+ * Originate Timestamp T1 time request sent by client
+ * Receive Timestamp T2 time request received by server
+ * Transmit Timestamp T3 time reply sent by server
+ * Destination Timestamp T4 time reply received by client
+ *
+ * The round-trip delay, d, and system clock offset, t, are defined as:
+ * d = (T4 - T1) - (T3 - T2) t = ((T2 - T1) + (T3 - T4)) / 2"
+ */
+ origin = ts_to_d(&m->trans_time) + OFFSET_1900_1970;
+ receive = ntp_ts_to_d(&ntpmsg.recv_time);
+ trans = ntp_ts_to_d(&ntpmsg.trans_time);
+ dest = ts_to_d(recv_time) + OFFSET_1900_1970;
+
+ offset = ((receive - origin) + (trans - dest)) / 2;
+ delay = (dest - origin) - (trans - receive);
+
+ spike = manager_sample_spike_detection(m, offset, delay);
+
+ manager_adjust_poll(m, offset, spike);
+
+ log_debug("NTP response:\n"
+ " leap : %u\n"
+ " version : %u\n"
+ " mode : %u\n"
+ " stratum : %u\n"
+ " precision : %.6f sec (%d)\n"
+ " root distance: %.6f sec\n"
+ " reference : %.4s\n"
+ " origin : %.3f\n"
+ " receive : %.3f\n"
+ " transmit : %.3f\n"
+ " dest : %.3f\n"
+ " offset : %+.3f sec\n"
+ " delay : %+.3f sec\n"
+ " packet count : %"PRIu64"\n"
+ " jitter : %.3f%s\n"
+ " poll interval: " USEC_FMT "\n",
+ NTP_FIELD_LEAP(ntpmsg.field),
+ NTP_FIELD_VERSION(ntpmsg.field),
+ NTP_FIELD_MODE(ntpmsg.field),
+ ntpmsg.stratum,
+ exp2(ntpmsg.precision), ntpmsg.precision,
+ root_distance,
+ ntpmsg.stratum == 1 ? ntpmsg.refid : "n/a",
+ origin - OFFSET_1900_1970,
+ receive - OFFSET_1900_1970,
+ trans - OFFSET_1900_1970,
+ dest - OFFSET_1900_1970,
+ offset, delay,
+ m->packet_count,
+ m->samples_jitter, spike ? " spike" : "",
+ m->poll_interval_usec / USEC_PER_SEC);
+
+ if (!spike) {
+ m->sync = true;
+ r = manager_adjust_clock(m, offset, leap_sec);
+ if (r < 0)
+ log_error_errno(r, "Failed to call clock_adjtime(): %m");
+ }
+
+ /* Save NTP response */
+ m->ntpmsg = ntpmsg;
+ m->origin_time = m->trans_time;
+ m->dest_time = *recv_time;
+ m->spike = spike;
+
+ log_debug("interval/delta/delay/jitter/drift " USEC_FMT "s/%+.3fs/%.3fs/%.3fs/%+"PRIi64"ppm%s",
+ m->poll_interval_usec / USEC_PER_SEC, offset, delay, m->samples_jitter, m->drift_freq / 65536,
+ spike ? " (ignored)" : "");
+
+ (void) sd_bus_emit_properties_changed(m->bus, "/org/freedesktop/timesync1", "org.freedesktop.timesync1.Manager", "NTPMessage", NULL);
+
+ if (!m->good) {
+ _cleanup_free_ char *pretty = NULL;
+
+ m->good = true;
+
+ server_address_pretty(m->current_server_address, &pretty);
+ /* "for the first time", as further successful syncs will not be logged. */
+ log_info("Synchronized to time server for the first time %s (%s).", strna(pretty), m->current_server_name->string);
+ sd_notifyf(false, "STATUS=Synchronized to time server for the first time %s (%s).", strna(pretty), m->current_server_name->string);
+ }
+
+ r = manager_arm_timer(m, m->poll_interval_usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to rearm timer: %m");
+
+ return 0;
+}
+
+static int manager_listen_setup(Manager *m) {
+ union sockaddr_union addr = {};
+ int r;
+
+ assert(m);
+
+ if (m->server_socket >= 0)
+ return 0;
+
+ assert(!m->event_receive);
+ assert(m->current_server_address);
+
+ addr.sa.sa_family = m->current_server_address->sockaddr.sa.sa_family;
+
+ m->server_socket = socket(addr.sa.sa_family, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+ if (m->server_socket < 0)
+ return -errno;
+
+ r = bind(m->server_socket, &addr.sa, m->current_server_address->socklen);
+ if (r < 0)
+ return -errno;
+
+ r = setsockopt_int(m->server_socket, SOL_SOCKET, SO_TIMESTAMPNS, true);
+ if (r < 0)
+ return r;
+
+ (void) setsockopt_int(m->server_socket, IPPROTO_IP, IP_TOS, IPTOS_LOWDELAY);
+
+ return sd_event_add_io(m->event, &m->event_receive, m->server_socket, EPOLLIN, manager_receive_response, m);
+}
+
+static void manager_listen_stop(Manager *m) {
+ assert(m);
+
+ m->event_receive = sd_event_source_unref(m->event_receive);
+ m->server_socket = safe_close(m->server_socket);
+}
+
+static int manager_begin(Manager *m) {
+ _cleanup_free_ char *pretty = NULL;
+ int r;
+
+ assert(m);
+ assert_return(m->current_server_name, -EHOSTUNREACH);
+ assert_return(m->current_server_address, -EHOSTUNREACH);
+
+ m->good = false;
+ m->missed_replies = NTP_MAX_MISSED_REPLIES;
+ if (m->poll_interval_usec == 0)
+ m->poll_interval_usec = m->poll_interval_min_usec;
+
+ server_address_pretty(m->current_server_address, &pretty);
+ log_debug("Connecting to time server %s (%s).", strna(pretty), m->current_server_name->string);
+ sd_notifyf(false, "STATUS=Connecting to time server %s (%s).", strna(pretty), m->current_server_name->string);
+
+ r = manager_clock_watch_setup(m);
+ if (r < 0)
+ return r;
+
+ return manager_send_request(m);
+}
+
+void manager_set_server_name(Manager *m, ServerName *n) {
+ assert(m);
+
+ if (m->current_server_name == n)
+ return;
+
+ m->current_server_name = n;
+ m->current_server_address = NULL;
+
+ manager_disconnect(m);
+
+ if (n)
+ log_debug("Selected server %s.", n->string);
+}
+
+void manager_set_server_address(Manager *m, ServerAddress *a) {
+ assert(m);
+
+ if (m->current_server_address == a)
+ return;
+
+ m->current_server_address = a;
+ /* If a is NULL, we are just clearing the address, without
+ * changing the name. Keep the existing name in that case. */
+ if (a)
+ m->current_server_name = a->name;
+
+ manager_disconnect(m);
+
+ if (a) {
+ _cleanup_free_ char *pretty = NULL;
+ server_address_pretty(a, &pretty);
+ log_debug("Selected address %s of server %s.", strna(pretty), a->name->string);
+ }
+}
+
+static int manager_resolve_handler(sd_resolve_query *q, int ret, const struct addrinfo *ai, Manager *m) {
+ int r;
+
+ assert(q);
+ assert(m);
+ assert(m->current_server_name);
+
+ m->resolve_query = sd_resolve_query_unref(m->resolve_query);
+
+ if (ret != 0) {
+ log_debug("Failed to resolve %s: %s", m->current_server_name->string, gai_strerror(ret));
+
+ /* Try next host */
+ return manager_connect(m);
+ }
+
+ for (; ai; ai = ai->ai_next) {
+ _cleanup_free_ char *pretty = NULL;
+ ServerAddress *a;
+
+ assert(ai->ai_addr);
+ assert(ai->ai_addrlen >= offsetof(struct sockaddr, sa_data));
+
+ if (!IN_SET(ai->ai_addr->sa_family, AF_INET, AF_INET6)) {
+ log_warning("Unsuitable address protocol for %s", m->current_server_name->string);
+ continue;
+ }
+
+ r = server_address_new(m->current_server_name, &a, (const union sockaddr_union*) ai->ai_addr, ai->ai_addrlen);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add server address: %m");
+
+ server_address_pretty(a, &pretty);
+ log_debug("Resolved address %s for %s.", pretty, m->current_server_name->string);
+ }
+
+ if (!m->current_server_name->addresses) {
+ log_error("Failed to find suitable address for host %s.", m->current_server_name->string);
+
+ /* Try next host */
+ return manager_connect(m);
+ }
+
+ manager_set_server_address(m, m->current_server_name->addresses);
+
+ return manager_begin(m);
+}
+
+static int manager_retry_connect(sd_event_source *source, usec_t usec, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+
+ return manager_connect(m);
+}
+
+int manager_connect(Manager *m) {
+ int r;
+
+ assert(m);
+
+ manager_disconnect(m);
+
+ m->event_retry = sd_event_source_unref(m->event_retry);
+ if (!ratelimit_below(&m->ratelimit)) {
+ log_debug("Delaying attempts to contact servers.");
+
+ r = sd_event_add_time(m->event, &m->event_retry, clock_boottime_or_monotonic(), now(clock_boottime_or_monotonic()) + RETRY_USEC, 0, manager_retry_connect, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create retry timer: %m");
+
+ return 0;
+ }
+
+ /* If we already are operating on some address, switch to the
+ * next one. */
+ if (m->current_server_address && m->current_server_address->addresses_next)
+ manager_set_server_address(m, m->current_server_address->addresses_next);
+ else {
+ struct addrinfo hints = {
+ .ai_flags = AI_NUMERICSERV|AI_ADDRCONFIG,
+ .ai_socktype = SOCK_DGRAM,
+ };
+
+ /* Hmm, we are through all addresses, let's look for the next host instead */
+ if (m->current_server_name && m->current_server_name->names_next)
+ manager_set_server_name(m, m->current_server_name->names_next);
+ else {
+ ServerName *f;
+ bool restart = true;
+
+ /* Our current server name list is exhausted,
+ * let's find the next one to iterate. First
+ * we try the system list, then the link list.
+ * After having processed the link list we
+ * jump back to the system list. However, if
+ * both lists are empty, we change to the
+ * fallback list. */
+ if (!m->current_server_name || m->current_server_name->type == SERVER_LINK) {
+ f = m->system_servers;
+ if (!f)
+ f = m->link_servers;
+ } else {
+ f = m->link_servers;
+ if (!f)
+ f = m->system_servers;
+ else
+ restart = false;
+ }
+
+ if (!f)
+ f = m->fallback_servers;
+
+ if (!f) {
+ manager_set_server_name(m, NULL);
+ log_debug("No server found.");
+ return 0;
+ }
+
+ if (restart && !m->exhausted_servers && m->poll_interval_usec) {
+ log_debug("Waiting after exhausting servers.");
+ r = sd_event_add_time(m->event, &m->event_retry, clock_boottime_or_monotonic(), now(clock_boottime_or_monotonic()) + m->poll_interval_usec, 0, manager_retry_connect, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create retry timer: %m");
+
+ m->exhausted_servers = true;
+
+ /* Increase the polling interval */
+ if (m->poll_interval_usec < m->poll_interval_max_usec)
+ m->poll_interval_usec *= 2;
+
+ return 0;
+ }
+
+ m->exhausted_servers = false;
+
+ manager_set_server_name(m, f);
+ }
+
+ /* Tell the resolver to reread /etc/resolv.conf, in
+ * case it changed. */
+ res_init();
+
+ /* Flush out any previously resolved addresses */
+ server_name_flush_addresses(m->current_server_name);
+
+ log_debug("Resolving %s...", m->current_server_name->string);
+
+ r = resolve_getaddrinfo(m->resolve, &m->resolve_query, m->current_server_name->string, "123", &hints, manager_resolve_handler, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create resolver: %m");
+
+ return 1;
+ }
+
+ r = manager_begin(m);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+void manager_disconnect(Manager *m) {
+ assert(m);
+
+ m->resolve_query = sd_resolve_query_unref(m->resolve_query);
+
+ m->event_timer = sd_event_source_unref(m->event_timer);
+
+ manager_listen_stop(m);
+
+ m->event_clock_watch = sd_event_source_unref(m->event_clock_watch);
+ m->clock_watch_fd = safe_close(m->clock_watch_fd);
+
+ m->event_timeout = sd_event_source_unref(m->event_timeout);
+
+ sd_notifyf(false, "STATUS=Idle.");
+}
+
+void manager_flush_server_names(Manager *m, ServerType t) {
+ assert(m);
+
+ if (t == SERVER_SYSTEM)
+ while (m->system_servers)
+ server_name_free(m->system_servers);
+
+ if (t == SERVER_LINK)
+ while (m->link_servers)
+ server_name_free(m->link_servers);
+
+ if (t == SERVER_FALLBACK)
+ while (m->fallback_servers)
+ server_name_free(m->fallback_servers);
+}
+
+void manager_free(Manager *m) {
+ if (!m)
+ return;
+
+ manager_disconnect(m);
+ manager_flush_server_names(m, SERVER_SYSTEM);
+ manager_flush_server_names(m, SERVER_LINK);
+ manager_flush_server_names(m, SERVER_FALLBACK);
+
+ sd_event_source_unref(m->event_retry);
+
+ sd_event_source_unref(m->network_event_source);
+ sd_network_monitor_unref(m->network_monitor);
+
+ sd_resolve_unref(m->resolve);
+ sd_event_unref(m->event);
+
+ sd_bus_flush_close_unref(m->bus);
+
+ free(m);
+}
+
+static int manager_network_read_link_servers(Manager *m) {
+ _cleanup_strv_free_ char **ntp = NULL;
+ ServerName *n, *nx;
+ char **i;
+ bool changed = false;
+ int r;
+
+ assert(m);
+
+ r = sd_network_get_ntp(&ntp);
+ if (r < 0) {
+ if (r == -ENOMEM)
+ log_oom();
+ else
+ log_debug_errno(r, "Failed to get link NTP servers: %m");
+ goto clear;
+ }
+
+ LIST_FOREACH(names, n, m->link_servers)
+ n->marked = true;
+
+ STRV_FOREACH(i, ntp) {
+ bool found = false;
+
+ r = dns_name_is_valid_or_address(*i);
+ if (r < 0) {
+ log_error_errno(r, "Failed to check validity of NTP server name or address '%s': %m", *i);
+ goto clear;
+ } else if (r == 0) {
+ log_error("Invalid NTP server name or address, ignoring: %s", *i);
+ continue;
+ }
+
+ LIST_FOREACH(names, n, m->link_servers)
+ if (streq(n->string, *i)) {
+ n->marked = false;
+ found = true;
+ break;
+ }
+
+ if (!found) {
+ r = server_name_new(m, NULL, SERVER_LINK, *i);
+ if (r < 0) {
+ log_oom();
+ goto clear;
+ }
+
+ changed = true;
+ }
+ }
+
+ LIST_FOREACH_SAFE(names, n, nx, m->link_servers)
+ if (n->marked) {
+ server_name_free(n);
+ changed = true;
+ }
+
+ return changed;
+
+clear:
+ manager_flush_server_names(m, SERVER_LINK);
+ return r;
+}
+
+static bool manager_is_connected(Manager *m) {
+ /* Return true when the manager is sending a request, resolving a server name, or
+ * in a poll interval. */
+ return m->server_socket >= 0 || m->resolve_query || m->event_timer;
+}
+
+static int manager_network_event_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ bool changed, connected, online;
+ int r;
+
+ assert(m);
+
+ sd_network_monitor_flush(m->network_monitor);
+
+ /* When manager_network_read_link_servers() failed, we assume that the servers are changed. */
+ changed = !!manager_network_read_link_servers(m);
+
+ /* check if the machine is online */
+ online = network_is_online();
+
+ /* check if the client is currently connected */
+ connected = manager_is_connected(m);
+
+ if (connected && !online) {
+ log_info("No network connectivity, watching for changes.");
+ manager_disconnect(m);
+
+ } else if ((!connected || changed) && online) {
+ log_info("Network configuration changed, trying to establish connection.");
+
+ if (m->current_server_address)
+ r = manager_begin(m);
+ else
+ r = manager_connect(m);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int manager_network_monitor_listen(Manager *m) {
+ int r, fd, events;
+
+ assert(m);
+
+ r = sd_network_monitor_new(&m->network_monitor, NULL);
+ if (r == -ENOENT) {
+ log_info("systemd does not appear to be running, not listening for systemd-networkd events.");
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ fd = sd_network_monitor_get_fd(m->network_monitor);
+ if (fd < 0)
+ return fd;
+
+ events = sd_network_monitor_get_events(m->network_monitor);
+ if (events < 0)
+ return events;
+
+ r = sd_event_add_io(m->event, &m->network_event_source, fd, events, manager_network_event_handler, m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int manager_new(Manager **ret) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new0(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ m->max_root_distance_usec = NTP_MAX_ROOT_DISTANCE;
+ m->poll_interval_min_usec = NTP_POLL_INTERVAL_MIN_USEC;
+ m->poll_interval_max_usec = NTP_POLL_INTERVAL_MAX_USEC;
+
+ m->server_socket = m->clock_watch_fd = -1;
+
+ RATELIMIT_INIT(m->ratelimit, RATELIMIT_INTERVAL_USEC, RATELIMIT_BURST);
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ (void) sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ r = sd_resolve_default(&m->resolve);
+ if (r < 0)
+ return r;
+
+ r = sd_resolve_attach_event(m->resolve, m->event, 0);
+ if (r < 0)
+ return r;
+
+ r = manager_network_monitor_listen(m);
+ if (r < 0)
+ return r;
+
+ (void) manager_network_read_link_servers(m);
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
diff --git a/src/timesync/timesyncd-manager.h b/src/timesync/timesyncd-manager.h
new file mode 100644
index 0000000..97c4e2f
--- /dev/null
+++ b/src/timesync/timesyncd-manager.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/timex.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+#include "sd-network.h"
+#include "sd-resolve.h"
+
+#include "list.h"
+#include "ratelimit.h"
+#include "time-util.h"
+#include "timesyncd-ntp-message.h"
+
+typedef struct Manager Manager;
+
+#include "timesyncd-server.h"
+
+/*
+ * "A client MUST NOT under any conditions use a poll interval less
+ * than 15 seconds."
+ */
+#define NTP_POLL_INTERVAL_MIN_USEC (32 * USEC_PER_SEC)
+#define NTP_POLL_INTERVAL_MAX_USEC (2048 * USEC_PER_SEC)
+
+struct Manager {
+ sd_bus *bus;
+ sd_event *event;
+ sd_resolve *resolve;
+
+ LIST_HEAD(ServerName, system_servers);
+ LIST_HEAD(ServerName, link_servers);
+ LIST_HEAD(ServerName, fallback_servers);
+
+ bool have_fallbacks:1;
+
+ RateLimit ratelimit;
+ bool exhausted_servers;
+
+ /* network */
+ sd_event_source *network_event_source;
+ sd_network_monitor *network_monitor;
+
+ /* peer */
+ sd_resolve_query *resolve_query;
+ sd_event_source *event_receive;
+ ServerName *current_server_name;
+ ServerAddress *current_server_address;
+ int server_socket;
+ int missed_replies;
+ uint64_t packet_count;
+ sd_event_source *event_timeout;
+ bool good;
+
+ /* last sent packet */
+ struct timespec trans_time_mon;
+ struct timespec trans_time;
+ usec_t retry_interval;
+ bool pending;
+
+ /* poll timer */
+ sd_event_source *event_timer;
+ usec_t poll_interval_usec;
+ usec_t poll_interval_min_usec;
+ usec_t poll_interval_max_usec;
+ bool poll_resync;
+
+ /* history data */
+ struct {
+ double offset;
+ double delay;
+ } samples[8];
+ unsigned samples_idx;
+ double samples_jitter;
+ usec_t max_root_distance_usec;
+
+ /* last change */
+ bool jumped;
+ bool sync;
+ int64_t drift_freq;
+
+ /* watch for time changes */
+ sd_event_source *event_clock_watch;
+ int clock_watch_fd;
+
+ /* Retry connections */
+ sd_event_source *event_retry;
+
+ /* RTC runs in local time, leave it alone */
+ bool rtc_local_time;
+
+ /* NTP response */
+ struct ntp_msg ntpmsg;
+ struct timespec origin_time, dest_time;
+ bool spike;
+};
+
+int manager_new(Manager **ret);
+void manager_free(Manager *m);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+void manager_set_server_name(Manager *m, ServerName *n);
+void manager_set_server_address(Manager *m, ServerAddress *a);
+void manager_flush_server_names(Manager *m, ServerType t);
+
+int manager_connect(Manager *m);
+void manager_disconnect(Manager *m);
diff --git a/src/timesync/timesyncd-ntp-message.h b/src/timesync/timesyncd-ntp-message.h
new file mode 100644
index 0000000..14fba6d
--- /dev/null
+++ b/src/timesync/timesyncd-ntp-message.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sparse-endian.h"
+
+/* NTP protocol, packet header */
+#define NTP_LEAP_PLUSSEC 1
+#define NTP_LEAP_MINUSSEC 2
+#define NTP_LEAP_NOTINSYNC 3
+#define NTP_MODE_CLIENT 3
+#define NTP_MODE_SERVER 4
+#define NTP_FIELD_LEAP(f) (((f) >> 6) & 3)
+#define NTP_FIELD_VERSION(f) (((f) >> 3) & 7)
+#define NTP_FIELD_MODE(f) ((f) & 7)
+#define NTP_FIELD(l, v, m) (((l) << 6) | ((v) << 3) | (m))
+
+/*
+ * "NTP timestamps are represented as a 64-bit unsigned fixed-point number,
+ * in seconds relative to 0h on 1 January 1900."
+ */
+#define OFFSET_1900_1970 UINT64_C(2208988800)
+
+struct ntp_ts {
+ be32_t sec;
+ be32_t frac;
+} _packed_;
+
+struct ntp_ts_short {
+ be16_t sec;
+ be16_t frac;
+} _packed_;
+
+struct ntp_msg {
+ uint8_t field;
+ uint8_t stratum;
+ int8_t poll;
+ int8_t precision;
+ struct ntp_ts_short root_delay;
+ struct ntp_ts_short root_dispersion;
+ char refid[4];
+ struct ntp_ts reference_time;
+ struct ntp_ts origin_time;
+ struct ntp_ts recv_time;
+ struct ntp_ts trans_time;
+} _packed_;
diff --git a/src/timesync/timesyncd-server.c b/src/timesync/timesyncd-server.c
new file mode 100644
index 0000000..aadecb6
--- /dev/null
+++ b/src/timesync/timesyncd-server.c
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "timesyncd-server.h"
+
+int server_address_new(
+ ServerName *n,
+ ServerAddress **ret,
+ const union sockaddr_union *sockaddr,
+ socklen_t socklen) {
+
+ ServerAddress *a, *tail;
+
+ assert(n);
+ assert(sockaddr);
+ assert(socklen >= offsetof(struct sockaddr, sa_data));
+ assert(socklen <= sizeof(union sockaddr_union));
+
+ a = new0(ServerAddress, 1);
+ if (!a)
+ return -ENOMEM;
+
+ memcpy(&a->sockaddr, sockaddr, socklen);
+ a->socklen = socklen;
+
+ LIST_FIND_TAIL(addresses, n->addresses, tail);
+ LIST_INSERT_AFTER(addresses, n->addresses, tail, a);
+ a->name = n;
+
+ if (ret)
+ *ret = a;
+
+ return 0;
+}
+
+ServerAddress* server_address_free(ServerAddress *a) {
+ if (!a)
+ return NULL;
+
+ if (a->name) {
+ LIST_REMOVE(addresses, a->name->addresses, a);
+
+ if (a->name->manager && a->name->manager->current_server_address == a)
+ manager_set_server_address(a->name->manager, NULL);
+ }
+
+ return mfree(a);
+}
+
+int server_name_new(
+ Manager *m,
+ ServerName **ret,
+ ServerType type,
+ const char *string) {
+
+ ServerName *n, *tail;
+
+ assert(m);
+ assert(string);
+
+ n = new0(ServerName, 1);
+ if (!n)
+ return -ENOMEM;
+
+ n->type = type;
+ n->string = strdup(string);
+ if (!n->string) {
+ free(n);
+ return -ENOMEM;
+ }
+
+ if (type == SERVER_SYSTEM) {
+ LIST_FIND_TAIL(names, m->system_servers, tail);
+ LIST_INSERT_AFTER(names, m->system_servers, tail, n);
+ } else if (type == SERVER_LINK) {
+ LIST_FIND_TAIL(names, m->link_servers, tail);
+ LIST_INSERT_AFTER(names, m->link_servers, tail, n);
+ } else if (type == SERVER_FALLBACK) {
+ LIST_FIND_TAIL(names, m->fallback_servers, tail);
+ LIST_INSERT_AFTER(names, m->fallback_servers, tail, n);
+ } else
+ assert_not_reached("Unknown server type");
+
+ n->manager = m;
+
+ if (type != SERVER_FALLBACK &&
+ m->current_server_name &&
+ m->current_server_name->type == SERVER_FALLBACK)
+ manager_set_server_name(m, NULL);
+
+ log_debug("Added new server %s.", string);
+
+ if (ret)
+ *ret = n;
+
+ return 0;
+}
+
+ServerName *server_name_free(ServerName *n) {
+ if (!n)
+ return NULL;
+
+ server_name_flush_addresses(n);
+
+ if (n->manager) {
+ if (n->type == SERVER_SYSTEM)
+ LIST_REMOVE(names, n->manager->system_servers, n);
+ else if (n->type == SERVER_LINK)
+ LIST_REMOVE(names, n->manager->link_servers, n);
+ else if (n->type == SERVER_FALLBACK)
+ LIST_REMOVE(names, n->manager->fallback_servers, n);
+ else
+ assert_not_reached("Unknown server type");
+
+ if (n->manager->current_server_name == n)
+ manager_set_server_name(n->manager, NULL);
+ }
+
+ log_debug("Removed server %s.", n->string);
+
+ free(n->string);
+ return mfree(n);
+}
+
+void server_name_flush_addresses(ServerName *n) {
+ assert(n);
+
+ while (n->addresses)
+ server_address_free(n->addresses);
+}
diff --git a/src/timesync/timesyncd-server.h b/src/timesync/timesyncd-server.h
new file mode 100644
index 0000000..d30bd2a
--- /dev/null
+++ b/src/timesync/timesyncd-server.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "list.h"
+#include "socket-util.h"
+
+typedef struct ServerAddress ServerAddress;
+typedef struct ServerName ServerName;
+
+typedef enum ServerType {
+ SERVER_SYSTEM,
+ SERVER_FALLBACK,
+ SERVER_LINK,
+} ServerType;
+
+#include "timesyncd-manager.h"
+
+struct ServerAddress {
+ ServerName *name;
+
+ union sockaddr_union sockaddr;
+ socklen_t socklen;
+
+ LIST_FIELDS(ServerAddress, addresses);
+};
+
+struct ServerName {
+ Manager *manager;
+
+ ServerType type;
+ char *string;
+
+ bool marked:1;
+
+ LIST_HEAD(ServerAddress, addresses);
+ LIST_FIELDS(ServerName, names);
+};
+
+int server_address_new(ServerName *n, ServerAddress **ret, const union sockaddr_union *sockaddr, socklen_t socklen);
+ServerAddress* server_address_free(ServerAddress *a);
+static inline int server_address_pretty(ServerAddress *a, char **pretty) {
+ return sockaddr_pretty(&a->sockaddr.sa, a->socklen, true, true, pretty);
+}
+
+int server_name_new(Manager *m, ServerName **ret, ServerType type,const char *string);
+ServerName *server_name_free(ServerName *n);
+void server_name_flush_addresses(ServerName *n);
diff --git a/src/timesync/timesyncd.c b/src/timesync/timesyncd.c
new file mode 100644
index 0000000..b4f70c0
--- /dev/null
+++ b/src/timesync/timesyncd.c
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "capability-util.h"
+#include "clock-util.h"
+#include "daemon-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "network-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "timesyncd-bus.h"
+#include "timesyncd-conf.h"
+#include "timesyncd-manager.h"
+#include "user-util.h"
+
+#define STATE_DIR "/var/lib/systemd/timesync"
+#define CLOCK_FILE STATE_DIR "/clock"
+
+static int load_clock_timestamp(uid_t uid, gid_t gid) {
+ _cleanup_close_ int fd = -1;
+ usec_t min = TIME_EPOCH * USEC_PER_SEC;
+ usec_t ct;
+ int r;
+
+ /* Let's try to make sure that the clock is always
+ * monotonically increasing, by saving the clock whenever we
+ * have a new NTP time, or when we shut down, and restoring it
+ * when we start again. This is particularly helpful on
+ * systems lacking a battery backed RTC. We also will adjust
+ * the time to at least the build time of systemd. */
+
+ fd = open(CLOCK_FILE, O_RDWR|O_CLOEXEC, 0644);
+ if (fd >= 0) {
+ struct stat st;
+ usec_t stamp;
+
+ /* check if the recorded time is later than the compiled-in one */
+ r = fstat(fd, &st);
+ if (r >= 0) {
+ stamp = timespec_load(&st.st_mtim);
+ if (stamp > min)
+ min = stamp;
+ }
+
+ if (geteuid() == 0) {
+ /* Try to fix the access mode, so that we can still
+ touch the file after dropping priviliges */
+ r = fchmod_and_chown(fd, 0644, uid, gid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to chmod or chown %s, ignoring: %m", CLOCK_FILE);
+ }
+
+ } else {
+ r = mkdir_safe_label(STATE_DIR, 0755, uid, gid,
+ MKDIR_FOLLOW_SYMLINK | MKDIR_WARN_MODE);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to create state directory, ignoring: %m");
+ goto settime;
+ }
+
+ /* create stamp file with the compiled-in date */
+ r = touch_file(CLOCK_FILE, false, min, uid, gid, 0644);
+ if (r < 0)
+ log_debug_errno(r, "Failed to create %s, ignoring: %m", CLOCK_FILE);
+ }
+
+settime:
+ ct = now(CLOCK_REALTIME);
+ if (ct < min) {
+ struct timespec ts;
+ char date[FORMAT_TIMESTAMP_MAX];
+
+ log_info("System clock time unset or jumped backwards, restoring from recorded timestamp: %s",
+ format_timestamp(date, sizeof(date), min));
+
+ if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, min)) < 0)
+ log_error_errno(errno, "Failed to restore system clock, ignoring: %m");
+ }
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(notify_on_cleanup) const char *notify_message = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ const char *user = "systemd-timesync";
+ uid_t uid, uid_current;
+ gid_t gid;
+ int r;
+
+ log_set_facility(LOG_CRON);
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program does not take arguments.");
+
+ uid = uid_current = geteuid();
+ gid = getegid();
+
+ if (uid_current == 0) {
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Cannot resolve user name %s: %m", user);
+ }
+
+ r = load_clock_timestamp(uid, gid);
+ if (r < 0)
+ return r;
+
+ /* Drop privileges, but only if we have been started as root. If we are not running as root we assume all
+ * privileges are already dropped. */
+ if (uid_current == 0) {
+ r = drop_privileges(uid, gid, (1ULL << CAP_SYS_TIME));
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop privileges: %m");
+ }
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate manager: %m");
+
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not connect to bus: %m");
+
+ if (clock_is_localtime(NULL) > 0) {
+ log_info("The system is configured to read the RTC time in the local time zone. "
+ "This mode cannot be fully supported. All system time to RTC updates are disabled.");
+ m->rtc_local_time = true;
+ }
+
+ r = manager_parse_config_file(m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse configuration file: %m");
+
+ r = manager_parse_fallback_string(m, NTP_SERVERS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse fallback server strings: %m");
+
+ log_debug("systemd-timesyncd running as pid " PID_FMT, getpid_cached());
+
+ notify_message = notify_start("READY=1\n"
+ "STATUS=Daemon is running",
+ NOTIFY_STOPPING);
+
+ if (network_is_online()) {
+ r = manager_connect(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_loop(m->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ /* if we got an authoritative time, store it in the file system */
+ if (m->sync) {
+ r = touch(CLOCK_FILE);
+ if (r < 0)
+ log_debug_errno(r, "Failed to touch %s, ignoring: %m", CLOCK_FILE);
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/timesync/timesyncd.conf.in b/src/timesync/timesyncd.conf.in
new file mode 100644
index 0000000..f91c034
--- /dev/null
+++ b/src/timesync/timesyncd.conf.in
@@ -0,0 +1,19 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See timesyncd.conf(5) for details.
+
+[Time]
+#NTP=
+#FallbackNTP=@NTP_SERVERS@
+#RootDistanceMaxSec=5
+#PollIntervalMinSec=32
+#PollIntervalMaxSec=2048
diff --git a/src/tmpfiles/tmpfiles.c b/src/tmpfiles/tmpfiles.c
new file mode 100644
index 0000000..b66765b
--- /dev/null
+++ b/src/tmpfiles/tmpfiles.c
@@ -0,0 +1,3275 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <getopt.h>
+#include <glob.h>
+#include <limits.h>
+#include <linux/fs.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+#include <sysexits.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sd-path.h"
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "capability-util.h"
+#include "chattr-util.h"
+#include "conf-files.h"
+#include "copy.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "io-util.h"
+#include "label.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "mountpoint-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "rm-rf.h"
+#include "selinux-util.h"
+#include "set.h"
+#include "specifier.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "util.h"
+
+/* This reads all files listed in /etc/tmpfiles.d/?*.conf and creates
+ * them in the file system. This is intended to be used to create
+ * properly owned directories beneath /tmp, /var/tmp, /run, which are
+ * volatile and hence need to be recreated on bootup. */
+
+typedef enum OperationMask {
+ OPERATION_CREATE = 1 << 0,
+ OPERATION_REMOVE = 1 << 1,
+ OPERATION_CLEAN = 1 << 2,
+} OperationMask;
+
+typedef enum ItemType {
+ /* These ones take file names */
+ CREATE_FILE = 'f',
+ TRUNCATE_FILE = 'F',
+ CREATE_DIRECTORY = 'd',
+ TRUNCATE_DIRECTORY = 'D',
+ CREATE_SUBVOLUME = 'v',
+ CREATE_SUBVOLUME_INHERIT_QUOTA = 'q',
+ CREATE_SUBVOLUME_NEW_QUOTA = 'Q',
+ CREATE_FIFO = 'p',
+ CREATE_SYMLINK = 'L',
+ CREATE_CHAR_DEVICE = 'c',
+ CREATE_BLOCK_DEVICE = 'b',
+ COPY_FILES = 'C',
+
+ /* These ones take globs */
+ WRITE_FILE = 'w',
+ EMPTY_DIRECTORY = 'e',
+ SET_XATTR = 't',
+ RECURSIVE_SET_XATTR = 'T',
+ SET_ACL = 'a',
+ RECURSIVE_SET_ACL = 'A',
+ SET_ATTRIBUTE = 'h',
+ RECURSIVE_SET_ATTRIBUTE = 'H',
+ IGNORE_PATH = 'x',
+ IGNORE_DIRECTORY_PATH = 'X',
+ REMOVE_PATH = 'r',
+ RECURSIVE_REMOVE_PATH = 'R',
+ RELABEL_PATH = 'z',
+ RECURSIVE_RELABEL_PATH = 'Z',
+ ADJUST_MODE = 'm', /* legacy, 'z' is identical to this */
+} ItemType;
+
+typedef struct Item {
+ ItemType type;
+
+ char *path;
+ char *argument;
+ char **xattrs;
+#if HAVE_ACL
+ acl_t acl_access;
+ acl_t acl_default;
+#endif
+ uid_t uid;
+ gid_t gid;
+ mode_t mode;
+ usec_t age;
+
+ dev_t major_minor;
+ unsigned attribute_value;
+ unsigned attribute_mask;
+
+ bool uid_set:1;
+ bool gid_set:1;
+ bool mode_set:1;
+ bool age_set:1;
+ bool mask_perms:1;
+ bool attribute_set:1;
+
+ bool keep_first_level:1;
+
+ bool force:1;
+
+ bool allow_failure:1;
+
+ OperationMask done;
+} Item;
+
+typedef struct ItemArray {
+ Item *items;
+ size_t n_items;
+ size_t allocated;
+
+ struct ItemArray *parent;
+ Set *children;
+} ItemArray;
+
+typedef enum DirectoryType {
+ DIRECTORY_RUNTIME,
+ DIRECTORY_STATE,
+ DIRECTORY_CACHE,
+ DIRECTORY_LOGS,
+ _DIRECTORY_TYPE_MAX,
+} DirectoryType;
+
+static bool arg_cat_config = false;
+static bool arg_user = false;
+static OperationMask arg_operation = 0;
+static bool arg_boot = false;
+static PagerFlags arg_pager_flags = 0;
+
+static char **arg_include_prefixes = NULL;
+static char **arg_exclude_prefixes = NULL;
+static char *arg_root = NULL;
+static char *arg_replace = NULL;
+
+#define MAX_DEPTH 256
+
+static OrderedHashmap *items = NULL, *globs = NULL;
+static Set *unix_sockets = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(items, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(globs, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(unix_sockets, set_free_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_include_prefixes, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_exclude_prefixes, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
+
+static int specifier_machine_id_safe(char specifier, const void *data, const void *userdata, char **ret);
+static int specifier_directory(char specifier, const void *data, const void *userdata, char **ret);
+
+static const Specifier specifier_table[] = {
+ { 'm', specifier_machine_id_safe, NULL },
+ { 'b', specifier_boot_id, NULL },
+ { 'H', specifier_host_name, NULL },
+ { 'v', specifier_kernel_release, NULL },
+
+ { 'g', specifier_group_name, NULL },
+ { 'G', specifier_group_id, NULL },
+ { 'U', specifier_user_id, NULL },
+ { 'u', specifier_user_name, NULL },
+ { 'h', specifier_user_home, NULL },
+
+ { 't', specifier_directory, UINT_TO_PTR(DIRECTORY_RUNTIME) },
+ { 'S', specifier_directory, UINT_TO_PTR(DIRECTORY_STATE) },
+ { 'C', specifier_directory, UINT_TO_PTR(DIRECTORY_CACHE) },
+ { 'L', specifier_directory, UINT_TO_PTR(DIRECTORY_LOGS) },
+ { 'T', specifier_tmp_dir, NULL },
+ { 'V', specifier_var_tmp_dir, NULL },
+ {}
+};
+
+static int specifier_machine_id_safe(char specifier, const void *data, const void *userdata, char **ret) {
+ int r;
+
+ /* If /etc/machine_id is missing or empty (e.g. in a chroot environment)
+ * return a recognizable error so that the caller can skip the rule
+ * gracefully. */
+
+ r = specifier_machine_id(specifier, data, userdata, ret);
+ if (IN_SET(r, -ENOENT, -ENOMEDIUM))
+ return -ENXIO;
+
+ return r;
+}
+
+static int specifier_directory(char specifier, const void *data, const void *userdata, char **ret) {
+ struct table_entry {
+ uint64_t type;
+ const char *suffix;
+ };
+
+ static const struct table_entry paths_system[] = {
+ [DIRECTORY_RUNTIME] = { SD_PATH_SYSTEM_RUNTIME },
+ [DIRECTORY_STATE] = { SD_PATH_SYSTEM_STATE_PRIVATE },
+ [DIRECTORY_CACHE] = { SD_PATH_SYSTEM_STATE_CACHE },
+ [DIRECTORY_LOGS] = { SD_PATH_SYSTEM_STATE_LOGS },
+ };
+
+ static const struct table_entry paths_user[] = {
+ [DIRECTORY_RUNTIME] = { SD_PATH_USER_RUNTIME },
+ [DIRECTORY_STATE] = { SD_PATH_USER_CONFIGURATION },
+ [DIRECTORY_CACHE] = { SD_PATH_USER_STATE_CACHE },
+ [DIRECTORY_LOGS] = { SD_PATH_USER_CONFIGURATION, "log" },
+ };
+
+ unsigned i;
+ const struct table_entry *paths;
+
+ assert_cc(ELEMENTSOF(paths_system) == ELEMENTSOF(paths_user));
+ paths = arg_user ? paths_user : paths_system;
+
+ i = PTR_TO_UINT(data);
+ assert(i < ELEMENTSOF(paths_system));
+
+ return sd_path_home(paths[i].type, paths[i].suffix, ret);
+}
+
+static int log_unresolvable_specifier(const char *filename, unsigned line) {
+ static bool notified = false;
+
+ /* In system mode, this is called when /etc is not fully initialized (e.g.
+ * in a chroot environment) where some specifiers are unresolvable. In user
+ * mode, this is called when some variables are not defined. These cases are
+ * not considered as an error so log at LOG_NOTICE only for the first time
+ * and then downgrade this to LOG_DEBUG for the rest. */
+
+ log_full(notified ? LOG_DEBUG : LOG_NOTICE,
+ "[%s:%u] Failed to resolve specifier: %s, skipping",
+ filename, line,
+ arg_user ? "Required $XDG_... variable not defined" : "uninitialized /etc detected");
+
+ if (!notified)
+ log_notice("All rules containing unresolvable specifiers will be skipped.");
+
+ notified = true;
+ return 0;
+}
+
+static int user_config_paths(char*** ret) {
+ _cleanup_strv_free_ char **config_dirs = NULL, **data_dirs = NULL;
+ _cleanup_free_ char *persistent_config = NULL, *runtime_config = NULL, *data_home = NULL;
+ _cleanup_strv_free_ char **res = NULL;
+ int r;
+
+ r = xdg_user_dirs(&config_dirs, &data_dirs);
+ if (r < 0)
+ return r;
+
+ r = xdg_user_config_dir(&persistent_config, "/user-tmpfiles.d");
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ r = xdg_user_runtime_dir(&runtime_config, "/user-tmpfiles.d");
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ r = xdg_user_data_dir(&data_home, "/user-tmpfiles.d");
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ r = strv_extend_strv_concat(&res, config_dirs, "/user-tmpfiles.d");
+ if (r < 0)
+ return r;
+
+ r = strv_extend(&res, persistent_config);
+ if (r < 0)
+ return r;
+
+ r = strv_extend(&res, runtime_config);
+ if (r < 0)
+ return r;
+
+ r = strv_extend(&res, data_home);
+ if (r < 0)
+ return r;
+
+ r = strv_extend_strv_concat(&res, data_dirs, "/user-tmpfiles.d");
+ if (r < 0)
+ return r;
+
+ r = path_strv_make_absolute_cwd(res);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(res);
+ return 0;
+}
+
+static bool needs_glob(ItemType t) {
+ return IN_SET(t,
+ WRITE_FILE,
+ IGNORE_PATH,
+ IGNORE_DIRECTORY_PATH,
+ REMOVE_PATH,
+ RECURSIVE_REMOVE_PATH,
+ EMPTY_DIRECTORY,
+ ADJUST_MODE,
+ RELABEL_PATH,
+ RECURSIVE_RELABEL_PATH,
+ SET_XATTR,
+ RECURSIVE_SET_XATTR,
+ SET_ACL,
+ RECURSIVE_SET_ACL,
+ SET_ATTRIBUTE,
+ RECURSIVE_SET_ATTRIBUTE);
+}
+
+static bool takes_ownership(ItemType t) {
+ return IN_SET(t,
+ CREATE_FILE,
+ TRUNCATE_FILE,
+ CREATE_DIRECTORY,
+ EMPTY_DIRECTORY,
+ TRUNCATE_DIRECTORY,
+ CREATE_SUBVOLUME,
+ CREATE_SUBVOLUME_INHERIT_QUOTA,
+ CREATE_SUBVOLUME_NEW_QUOTA,
+ CREATE_FIFO,
+ CREATE_SYMLINK,
+ CREATE_CHAR_DEVICE,
+ CREATE_BLOCK_DEVICE,
+ COPY_FILES,
+ WRITE_FILE,
+ IGNORE_PATH,
+ IGNORE_DIRECTORY_PATH,
+ REMOVE_PATH,
+ RECURSIVE_REMOVE_PATH);
+}
+
+static struct Item* find_glob(OrderedHashmap *h, const char *match) {
+ ItemArray *j;
+ Iterator i;
+
+ ORDERED_HASHMAP_FOREACH(j, h, i) {
+ size_t n;
+
+ for (n = 0; n < j->n_items; n++) {
+ Item *item = j->items + n;
+
+ if (fnmatch(item->path, match, FNM_PATHNAME|FNM_PERIOD) == 0)
+ return item;
+ }
+ }
+
+ return NULL;
+}
+
+static int load_unix_sockets(void) {
+ _cleanup_set_free_free_ Set *sockets = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ if (unix_sockets)
+ return 0;
+
+ /* We maintain a cache of the sockets we found in /proc/net/unix to speed things up a little. */
+
+ sockets = set_new(&path_hash_ops);
+ if (!sockets)
+ return log_oom();
+
+ f = fopen("/proc/net/unix", "re");
+ if (!f)
+ return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to open /proc/net/unix, ignoring: %m");
+
+ /* Skip header */
+ r = read_line(f, LONG_LINE_MAX, NULL);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to skip /proc/net/unix header line: %m");
+ if (r == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EIO), "Premature end of file reading /proc/net/unix.");
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL, *s = NULL;
+ char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/net/unix line, ignoring: %m");
+ if (r == 0) /* EOF */
+ break;
+
+ p = strchr(line, ':');
+ if (!p)
+ continue;
+
+ if (strlen(p) < 37)
+ continue;
+
+ p += 37;
+ p += strspn(p, WHITESPACE);
+ p += strcspn(p, WHITESPACE); /* skip one more word */
+ p += strspn(p, WHITESPACE);
+
+ if (*p != '/')
+ continue;
+
+ s = strdup(p);
+ if (!s)
+ return log_oom();
+
+ path_simplify(s, false);
+
+ r = set_consume(sockets, s);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return log_warning_errno(r, "Failed to add AF_UNIX socket to set, ignoring: %m");
+
+ TAKE_PTR(s);
+ }
+
+ unix_sockets = TAKE_PTR(sockets);
+ return 1;
+}
+
+static bool unix_socket_alive(const char *fn) {
+ assert(fn);
+
+ if (load_unix_sockets() < 0)
+ return true; /* We don't know, so assume yes */
+
+ return !!set_get(unix_sockets, (char*) fn);
+}
+
+static int dir_is_mount_point(DIR *d, const char *subdir) {
+
+ int mount_id_parent, mount_id;
+ int r_p, r;
+
+ r_p = name_to_handle_at_loop(dirfd(d), ".", NULL, &mount_id_parent, 0);
+ if (r_p < 0)
+ r_p = -errno;
+
+ r = name_to_handle_at_loop(dirfd(d), subdir, NULL, &mount_id, 0);
+ if (r < 0)
+ r = -errno;
+
+ /* got no handle; make no assumptions, return error */
+ if (r_p < 0 && r < 0)
+ return r_p;
+
+ /* got both handles; if they differ, it is a mount point */
+ if (r_p >= 0 && r >= 0)
+ return mount_id_parent != mount_id;
+
+ /* got only one handle; assume different mount points if one
+ * of both queries was not supported by the filesystem */
+ if (IN_SET(r_p, -ENOSYS, -EOPNOTSUPP) || IN_SET(r, -ENOSYS, -EOPNOTSUPP))
+ return true;
+
+ /* return error */
+ if (r_p < 0)
+ return r_p;
+ return r;
+}
+
+static DIR* xopendirat_nomod(int dirfd, const char *path) {
+ DIR *dir;
+
+ dir = xopendirat(dirfd, path, O_NOFOLLOW|O_NOATIME);
+ if (dir)
+ return dir;
+
+ log_debug_errno(errno, "Cannot open %sdirectory \"%s\": %m", dirfd == AT_FDCWD ? "" : "sub", path);
+ if (errno != EPERM)
+ return NULL;
+
+ dir = xopendirat(dirfd, path, O_NOFOLLOW);
+ if (!dir)
+ log_debug_errno(errno, "Cannot open %sdirectory \"%s\": %m", dirfd == AT_FDCWD ? "" : "sub", path);
+
+ return dir;
+}
+
+static DIR* opendir_nomod(const char *path) {
+ return xopendirat_nomod(AT_FDCWD, path);
+}
+
+static int dir_cleanup(
+ Item *i,
+ const char *p,
+ DIR *d,
+ const struct stat *ds,
+ usec_t cutoff,
+ dev_t rootdev,
+ bool mountpoint,
+ int maxdepth,
+ bool keep_this_level) {
+
+ struct dirent *dent;
+ struct timespec times[2];
+ bool deleted = false;
+ int r = 0;
+
+ FOREACH_DIRENT_ALL(dent, d, break) {
+ struct stat s;
+ usec_t age;
+ _cleanup_free_ char *sub_path = NULL;
+
+ if (dot_or_dot_dot(dent->d_name))
+ continue;
+
+ if (fstatat(dirfd(d), dent->d_name, &s, AT_SYMLINK_NOFOLLOW) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ /* FUSE, NFS mounts, SELinux might return EACCES */
+ r = log_full_errno(errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
+ "stat(%s/%s) failed: %m", p, dent->d_name);
+ continue;
+ }
+
+ /* Stay on the same filesystem */
+ if (s.st_dev != rootdev) {
+ log_debug("Ignoring \"%s/%s\": different filesystem.", p, dent->d_name);
+ continue;
+ }
+
+ /* Try to detect bind mounts of the same filesystem instance; they
+ * do not differ in device major/minors. This type of query is not
+ * supported on all kernels or filesystem types though. */
+ if (S_ISDIR(s.st_mode) && dir_is_mount_point(d, dent->d_name) > 0) {
+ log_debug("Ignoring \"%s/%s\": different mount of the same filesystem.",
+ p, dent->d_name);
+ continue;
+ }
+
+ sub_path = strjoin(p, "/", dent->d_name);
+ if (!sub_path) {
+ r = log_oom();
+ goto finish;
+ }
+
+ /* Is there an item configured for this path? */
+ if (ordered_hashmap_get(items, sub_path)) {
+ log_debug("Ignoring \"%s\": a separate entry exists.", sub_path);
+ continue;
+ }
+
+ if (find_glob(globs, sub_path)) {
+ log_debug("Ignoring \"%s\": a separate glob exists.", sub_path);
+ continue;
+ }
+
+ if (S_ISDIR(s.st_mode)) {
+
+ if (mountpoint &&
+ streq(dent->d_name, "lost+found") &&
+ s.st_uid == 0) {
+ log_debug("Ignoring \"%s\".", sub_path);
+ continue;
+ }
+
+ if (maxdepth <= 0)
+ log_warning("Reached max depth on \"%s\".", sub_path);
+ else {
+ _cleanup_closedir_ DIR *sub_dir;
+ int q;
+
+ sub_dir = xopendirat_nomod(dirfd(d), dent->d_name);
+ if (!sub_dir) {
+ if (errno != ENOENT)
+ r = log_error_errno(errno, "opendir(%s) failed: %m", sub_path);
+
+ continue;
+ }
+
+ q = dir_cleanup(i, sub_path, sub_dir, &s, cutoff, rootdev, false, maxdepth-1, false);
+ if (q < 0)
+ r = q;
+ }
+
+ /* Note: if you are wondering why we don't
+ * support the sticky bit for excluding
+ * directories from cleaning like we do it for
+ * other file system objects: well, the sticky
+ * bit already has a meaning for directories,
+ * so we don't want to overload that. */
+
+ if (keep_this_level) {
+ log_debug("Keeping \"%s\".", sub_path);
+ continue;
+ }
+
+ /* Ignore ctime, we change it when deleting */
+ age = timespec_load(&s.st_mtim);
+ if (age >= cutoff) {
+ char a[FORMAT_TIMESTAMP_MAX];
+ /* Follows spelling in stat(1). */
+ log_debug("Directory \"%s\": modify time %s is too new.",
+ sub_path,
+ format_timestamp_us(a, sizeof(a), age));
+ continue;
+ }
+
+ age = timespec_load(&s.st_atim);
+ if (age >= cutoff) {
+ char a[FORMAT_TIMESTAMP_MAX];
+ log_debug("Directory \"%s\": access time %s is too new.",
+ sub_path,
+ format_timestamp_us(a, sizeof(a), age));
+ continue;
+ }
+
+ log_debug("Removing directory \"%s\".", sub_path);
+ if (unlinkat(dirfd(d), dent->d_name, AT_REMOVEDIR) < 0)
+ if (!IN_SET(errno, ENOENT, ENOTEMPTY))
+ r = log_error_errno(errno, "rmdir(%s): %m", sub_path);
+
+ } else {
+ /* Skip files for which the sticky bit is
+ * set. These are semantics we define, and are
+ * unknown elsewhere. See XDG_RUNTIME_DIR
+ * specification for details. */
+ if (s.st_mode & S_ISVTX) {
+ log_debug("Skipping \"%s\": sticky bit set.", sub_path);
+ continue;
+ }
+
+ if (mountpoint && S_ISREG(s.st_mode))
+ if (s.st_uid == 0 && STR_IN_SET(dent->d_name,
+ ".journal",
+ "aquota.user",
+ "aquota.group")) {
+ log_debug("Skipping \"%s\".", sub_path);
+ continue;
+ }
+
+ /* Ignore sockets that are listed in /proc/net/unix */
+ if (S_ISSOCK(s.st_mode) && unix_socket_alive(sub_path)) {
+ log_debug("Skipping \"%s\": live socket.", sub_path);
+ continue;
+ }
+
+ /* Ignore device nodes */
+ if (S_ISCHR(s.st_mode) || S_ISBLK(s.st_mode)) {
+ log_debug("Skipping \"%s\": a device.", sub_path);
+ continue;
+ }
+
+ /* Keep files on this level around if this is
+ * requested */
+ if (keep_this_level) {
+ log_debug("Keeping \"%s\".", sub_path);
+ continue;
+ }
+
+ age = timespec_load(&s.st_mtim);
+ if (age >= cutoff) {
+ char a[FORMAT_TIMESTAMP_MAX];
+ /* Follows spelling in stat(1). */
+ log_debug("File \"%s\": modify time %s is too new.",
+ sub_path,
+ format_timestamp_us(a, sizeof(a), age));
+ continue;
+ }
+
+ age = timespec_load(&s.st_atim);
+ if (age >= cutoff) {
+ char a[FORMAT_TIMESTAMP_MAX];
+ log_debug("File \"%s\": access time %s is too new.",
+ sub_path,
+ format_timestamp_us(a, sizeof(a), age));
+ continue;
+ }
+
+ age = timespec_load(&s.st_ctim);
+ if (age >= cutoff) {
+ char a[FORMAT_TIMESTAMP_MAX];
+ log_debug("File \"%s\": change time %s is too new.",
+ sub_path,
+ format_timestamp_us(a, sizeof(a), age));
+ continue;
+ }
+
+ log_debug("unlink \"%s\"", sub_path);
+
+ if (unlinkat(dirfd(d), dent->d_name, 0) < 0)
+ if (errno != ENOENT)
+ r = log_error_errno(errno, "unlink(%s): %m", sub_path);
+
+ deleted = true;
+ }
+ }
+
+finish:
+ if (deleted) {
+ usec_t age1, age2;
+ char a[FORMAT_TIMESTAMP_MAX], b[FORMAT_TIMESTAMP_MAX];
+
+ /* Restore original directory timestamps */
+ times[0] = ds->st_atim;
+ times[1] = ds->st_mtim;
+
+ age1 = timespec_load(&ds->st_atim);
+ age2 = timespec_load(&ds->st_mtim);
+ log_debug("Restoring access and modification time on \"%s\": %s, %s",
+ p,
+ format_timestamp_us(a, sizeof(a), age1),
+ format_timestamp_us(b, sizeof(b), age2));
+ if (futimens(dirfd(d), times) < 0)
+ log_error_errno(errno, "utimensat(%s): %m", p);
+ }
+
+ return r;
+}
+
+static bool dangerous_hardlinks(void) {
+ _cleanup_free_ char *value = NULL;
+ static int cached = -1;
+ int r;
+
+ /* Check whether the fs.protected_hardlinks sysctl is on. If we can't determine it we assume its off, as that's
+ * what the upstream default is. */
+
+ if (cached >= 0)
+ return cached;
+
+ r = read_one_line_file("/proc/sys/fs/protected_hardlinks", &value);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read fs.protected_hardlinks sysctl: %m");
+ return true;
+ }
+
+ r = parse_boolean(value);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse fs.protected_hardlinks sysctl: %m");
+ return true;
+ }
+
+ cached = r == 0;
+ return cached;
+}
+
+static bool hardlink_vulnerable(const struct stat *st) {
+ assert(st);
+
+ return !S_ISDIR(st->st_mode) && st->st_nlink > 1 && dangerous_hardlinks();
+}
+
+static int fd_set_perms(Item *i, int fd, const char *path, const struct stat *st) {
+ struct stat stbuf;
+
+ assert(i);
+ assert(fd);
+ assert(path);
+
+ if (!i->mode_set && !i->uid_set && !i->gid_set)
+ goto shortcut;
+
+ if (!st) {
+ if (fstat(fd, &stbuf) < 0)
+ return log_error_errno(errno, "fstat(%s) failed: %m", path);
+ st = &stbuf;
+ }
+
+ if (hardlink_vulnerable(st))
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Refusing to set permissions on hardlinked file %s while the fs.protected_hardlinks sysctl is turned off.",
+ path);
+
+ if (i->mode_set) {
+ if (S_ISLNK(st->st_mode))
+ log_debug("Skipping mode fix for symlink %s.", path);
+ else {
+ mode_t m = i->mode;
+
+ if (i->mask_perms) {
+ if (!(st->st_mode & 0111))
+ m &= ~0111;
+ if (!(st->st_mode & 0222))
+ m &= ~0222;
+ if (!(st->st_mode & 0444))
+ m &= ~0444;
+ if (!S_ISDIR(st->st_mode))
+ m &= ~07000; /* remove sticky/sgid/suid bit, unless directory */
+ }
+
+ if (m == (st->st_mode & 07777))
+ log_debug("\"%s\" has correct mode %o already.", path, st->st_mode);
+ else {
+ log_debug("Changing \"%s\" to mode %o.", path, m);
+ if (fchmod_opath(fd, m) < 0)
+ return log_error_errno(errno, "fchmod() of %s failed: %m", path);
+ }
+ }
+ }
+
+ if ((i->uid_set && i->uid != st->st_uid) ||
+ (i->gid_set && i->gid != st->st_gid)) {
+ log_debug("Changing \"%s\" to owner "UID_FMT":"GID_FMT,
+ path,
+ i->uid_set ? i->uid : UID_INVALID,
+ i->gid_set ? i->gid : GID_INVALID);
+
+ if (fchownat(fd,
+ "",
+ i->uid_set ? i->uid : UID_INVALID,
+ i->gid_set ? i->gid : GID_INVALID,
+ AT_EMPTY_PATH) < 0)
+ return log_error_errno(errno, "fchownat() of %s failed: %m", path);
+ }
+
+shortcut:
+ return label_fix(path, 0);
+}
+
+static int path_open_parent_safe(const char *path) {
+ _cleanup_free_ char *dn = NULL;
+ int fd;
+
+ if (path_equal(path, "/") || !path_is_normalized(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to open parent of '%s': invalid path.",
+ path);
+
+ dn = dirname_malloc(path);
+ if (!dn)
+ return log_oom();
+
+ fd = chase_symlinks(dn, NULL, CHASE_OPEN|CHASE_SAFE|CHASE_WARN, NULL);
+ if (fd < 0 && fd != -ENOLINK)
+ return log_error_errno(fd, "Failed to validate path %s: %m", path);
+
+ return fd;
+}
+
+static int path_open_safe(const char *path) {
+ int fd;
+
+ /* path_open_safe() returns a file descriptor opened with O_PATH after
+ * verifying that the path doesn't contain unsafe transitions, except
+ * for its final component as the function does not follow symlink. */
+
+ assert(path);
+
+ if (!path_is_normalized(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to open invalid path '%s'.",
+ path);
+
+ fd = chase_symlinks(path, NULL, CHASE_OPEN|CHASE_SAFE|CHASE_WARN|CHASE_NOFOLLOW, NULL);
+ if (fd < 0 && fd != -ENOLINK)
+ return log_error_errno(fd, "Failed to validate path %s: %m", path);
+
+ return fd;
+}
+
+static int path_set_perms(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ assert(i);
+ assert(path);
+
+ fd = path_open_safe(path);
+ if (fd < 0)
+ return fd;
+
+ return fd_set_perms(i, fd, path, NULL);
+}
+
+static int parse_xattrs_from_arg(Item *i) {
+ const char *p;
+ int r;
+
+ assert(i);
+ assert(i->argument);
+
+ p = i->argument;
+
+ for (;;) {
+ _cleanup_free_ char *name = NULL, *value = NULL, *xattr = NULL;
+
+ r = extract_first_word(&p, &xattr, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse extended attribute '%s', ignoring: %m", p);
+ if (r <= 0)
+ break;
+
+ r = split_pair(xattr, "=", &name, &value);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse extended attribute, ignoring: %s", xattr);
+ continue;
+ }
+
+ if (isempty(name) || isempty(value)) {
+ log_warning("Malformed extended attribute found, ignoring: %s", xattr);
+ continue;
+ }
+
+ if (strv_push_pair(&i->xattrs, name, value) < 0)
+ return log_oom();
+
+ name = value = NULL;
+ }
+
+ return 0;
+}
+
+static int fd_set_xattrs(Item *i, int fd, const char *path, const struct stat *st) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ char **name, **value;
+
+ assert(i);
+ assert(fd);
+ assert(path);
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+
+ STRV_FOREACH_PAIR(name, value, i->xattrs) {
+ log_debug("Setting extended attribute '%s=%s' on %s.", *name, *value, path);
+ if (setxattr(procfs_path, *name, *value, strlen(*value), 0) < 0)
+ return log_error_errno(errno, "Setting extended attribute %s=%s on %s failed: %m",
+ *name, *value, path);
+ }
+ return 0;
+}
+
+static int path_set_xattrs(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ assert(i);
+ assert(path);
+
+ fd = path_open_safe(path);
+ if (fd < 0)
+ return fd;
+
+ return fd_set_xattrs(i, fd, path, NULL);
+}
+
+static int parse_acls_from_arg(Item *item) {
+#if HAVE_ACL
+ int r;
+
+ assert(item);
+
+ /* If force (= modify) is set, we will not modify the acl
+ * afterwards, so the mask can be added now if necessary. */
+
+ r = parse_acl(item->argument, &item->acl_access, &item->acl_default, !item->force);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse ACL \"%s\": %m. Ignoring", item->argument);
+#else
+ log_warning_errno(SYNTHETIC_ERRNO(ENOSYS), "ACLs are not supported. Ignoring");
+#endif
+
+ return 0;
+}
+
+#if HAVE_ACL
+static int path_set_acl(const char *path, const char *pretty, acl_type_t type, acl_t acl, bool modify) {
+ _cleanup_(acl_free_charpp) char *t = NULL;
+ _cleanup_(acl_freep) acl_t dup = NULL;
+ int r;
+
+ /* Returns 0 for success, positive error if already warned,
+ * negative error otherwise. */
+
+ if (modify) {
+ r = acls_for_file(path, type, acl, &dup);
+ if (r < 0)
+ return r;
+
+ r = calc_acl_mask_if_needed(&dup);
+ if (r < 0)
+ return r;
+ } else {
+ dup = acl_dup(acl);
+ if (!dup)
+ return -errno;
+
+ /* the mask was already added earlier if needed */
+ }
+
+ r = add_base_acls_if_needed(&dup, path);
+ if (r < 0)
+ return r;
+
+ t = acl_to_any_text(dup, NULL, ',', TEXT_ABBREVIATE);
+ log_debug("Setting %s ACL %s on %s.",
+ type == ACL_TYPE_ACCESS ? "access" : "default",
+ strna(t), pretty);
+
+ r = acl_set_file(path, type, dup);
+ if (r < 0)
+ /* Return positive to indicate we already warned */
+ return -log_error_errno(errno,
+ "Setting %s ACL \"%s\" on %s failed: %m",
+ type == ACL_TYPE_ACCESS ? "access" : "default",
+ strna(t), pretty);
+
+ return 0;
+}
+#endif
+
+static int fd_set_acls(Item *item, int fd, const char *path, const struct stat *st) {
+ int r = 0;
+#if HAVE_ACL
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ struct stat stbuf;
+
+ assert(item);
+ assert(fd);
+ assert(path);
+
+ if (!st) {
+ if (fstat(fd, &stbuf) < 0)
+ return log_error_errno(errno, "fstat(%s) failed: %m", path);
+ st = &stbuf;
+ }
+
+ if (hardlink_vulnerable(st))
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Refusing to set ACLs on hardlinked file %s while the fs.protected_hardlinks sysctl is turned off.",
+ path);
+
+ if (S_ISLNK(st->st_mode)) {
+ log_debug("Skipping ACL fix for symlink %s.", path);
+ return 0;
+ }
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+
+ if (item->acl_access)
+ r = path_set_acl(procfs_path, path, ACL_TYPE_ACCESS, item->acl_access, item->force);
+
+ /* set only default acls to folders */
+ if (r == 0 && item->acl_default && S_ISDIR(st->st_mode))
+ r = path_set_acl(procfs_path, path, ACL_TYPE_DEFAULT, item->acl_default, item->force);
+
+ if (r > 0)
+ return -r; /* already warned */
+ if (r == -EOPNOTSUPP) {
+ log_debug_errno(r, "ACLs not supported by file system at %s", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "ACL operation on \"%s\" failed: %m", path);
+#endif
+ return r;
+}
+
+static int path_set_acls(Item *item, const char *path) {
+ int r = 0;
+#if HAVE_ACL
+ _cleanup_close_ int fd = -1;
+
+ assert(item);
+ assert(path);
+
+ fd = path_open_safe(path);
+ if (fd < 0)
+ return fd;
+
+ r = fd_set_acls(item, fd, path, NULL);
+#endif
+ return r;
+}
+
+#define ATTRIBUTES_ALL \
+ (FS_NOATIME_FL | \
+ FS_SYNC_FL | \
+ FS_DIRSYNC_FL | \
+ FS_APPEND_FL | \
+ FS_COMPR_FL | \
+ FS_NODUMP_FL | \
+ FS_EXTENT_FL | \
+ FS_IMMUTABLE_FL | \
+ FS_JOURNAL_DATA_FL | \
+ FS_SECRM_FL | \
+ FS_UNRM_FL | \
+ FS_NOTAIL_FL | \
+ FS_TOPDIR_FL | \
+ FS_NOCOW_FL)
+
+static int parse_attribute_from_arg(Item *item) {
+
+ static const struct {
+ char character;
+ unsigned value;
+ } attributes[] = {
+ { 'A', FS_NOATIME_FL }, /* do not update atime */
+ { 'S', FS_SYNC_FL }, /* Synchronous updates */
+ { 'D', FS_DIRSYNC_FL }, /* dirsync behaviour (directories only) */
+ { 'a', FS_APPEND_FL }, /* writes to file may only append */
+ { 'c', FS_COMPR_FL }, /* Compress file */
+ { 'd', FS_NODUMP_FL }, /* do not dump file */
+ { 'e', FS_EXTENT_FL }, /* Extents */
+ { 'i', FS_IMMUTABLE_FL }, /* Immutable file */
+ { 'j', FS_JOURNAL_DATA_FL }, /* Reserved for ext3 */
+ { 's', FS_SECRM_FL }, /* Secure deletion */
+ { 'u', FS_UNRM_FL }, /* Undelete */
+ { 't', FS_NOTAIL_FL }, /* file tail should not be merged */
+ { 'T', FS_TOPDIR_FL }, /* Top of directory hierarchies */
+ { 'C', FS_NOCOW_FL }, /* Do not cow file */
+ };
+
+ enum {
+ MODE_ADD,
+ MODE_DEL,
+ MODE_SET
+ } mode = MODE_ADD;
+
+ unsigned value = 0, mask = 0;
+ const char *p;
+
+ assert(item);
+
+ p = item->argument;
+ if (p) {
+ if (*p == '+') {
+ mode = MODE_ADD;
+ p++;
+ } else if (*p == '-') {
+ mode = MODE_DEL;
+ p++;
+ } else if (*p == '=') {
+ mode = MODE_SET;
+ p++;
+ }
+ }
+
+ if (isempty(p) && mode != MODE_SET)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Setting file attribute on '%s' needs an attribute specification.",
+ item->path);
+
+ for (; p && *p ; p++) {
+ unsigned i, v;
+
+ for (i = 0; i < ELEMENTSOF(attributes); i++)
+ if (*p == attributes[i].character)
+ break;
+
+ if (i >= ELEMENTSOF(attributes))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown file attribute '%c' on '%s'.",
+ *p, item->path);
+
+ v = attributes[i].value;
+
+ SET_FLAG(value, v, IN_SET(mode, MODE_ADD, MODE_SET));
+
+ mask |= v;
+ }
+
+ if (mode == MODE_SET)
+ mask |= ATTRIBUTES_ALL;
+
+ assert(mask != 0);
+
+ item->attribute_mask = mask;
+ item->attribute_value = value;
+ item->attribute_set = true;
+
+ return 0;
+}
+
+static int fd_set_attribute(Item *item, int fd, const char *path, const struct stat *st) {
+ _cleanup_close_ int procfs_fd = -1;
+ struct stat stbuf;
+ unsigned f;
+ int r;
+
+ assert(item);
+ assert(fd);
+ assert(path);
+
+ if (!item->attribute_set || item->attribute_mask == 0)
+ return 0;
+
+ if (!st) {
+ if (fstat(fd, &stbuf) < 0)
+ return log_error_errno(errno, "fstat(%s) failed: %m", path);
+ st = &stbuf;
+ }
+
+ /* Issuing the file attribute ioctls on device nodes is not
+ * safe, as that will be delivered to the drivers, not the
+ * file system containing the device node. */
+ if (!S_ISREG(st->st_mode) && !S_ISDIR(st->st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Setting file flags is only supported on regular files and directories, cannot set on '%s'.",
+ path);
+
+ f = item->attribute_value & item->attribute_mask;
+
+ /* Mask away directory-specific flags */
+ if (!S_ISDIR(st->st_mode))
+ f &= ~FS_DIRSYNC_FL;
+
+ procfs_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NOATIME);
+ if (procfs_fd < 0)
+ return log_error_errno(procfs_fd, "Failed to re-open '%s': %m", path);
+
+ r = chattr_fd(procfs_fd, f, item->attribute_mask, NULL);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOTTY, -EOPNOTSUPP) ? LOG_DEBUG : LOG_WARNING,
+ r,
+ "Cannot set file attribute for '%s', value=0x%08x, mask=0x%08x, ignoring: %m",
+ path, item->attribute_value, item->attribute_mask);
+
+ return 0;
+}
+
+static int path_set_attribute(Item *item, const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ if (!item->attribute_set || item->attribute_mask == 0)
+ return 0;
+
+ fd = path_open_safe(path);
+ if (fd < 0)
+ return fd;
+
+ return fd_set_attribute(item, fd, path, NULL);
+}
+
+static int write_one_file(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1, dir_fd = -1;
+ char *bn;
+ int r;
+
+ assert(i);
+ assert(path);
+ assert(i->argument);
+ assert(i->type == WRITE_FILE);
+
+ /* Validate the path and keep the fd on the directory for opening the
+ * file so we're sure that it can't be changed behind our back. */
+ dir_fd = path_open_parent_safe(path);
+ if (dir_fd < 0)
+ return dir_fd;
+
+ bn = basename(path);
+
+ /* Follows symlinks */
+ fd = openat(dir_fd, bn, O_NONBLOCK|O_CLOEXEC|O_WRONLY|O_NOCTTY, i->mode);
+ if (fd < 0) {
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "Not writing missing file \"%s\": %m", path);
+ return 0;
+ }
+ return log_error_errno(errno, "Failed to open file \"%s\": %m", path);
+ }
+
+ /* 'w' is allowed to write into any kind of files. */
+ log_debug("Writing to \"%s\".", path);
+
+ r = loop_write(fd, i->argument, strlen(i->argument), false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write file \"%s\": %m", path);
+
+ return fd_set_perms(i, fd, path, NULL);
+}
+
+static int create_file(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1, dir_fd = -1;
+ struct stat stbuf, *st = NULL;
+ int r = 0;
+ char *bn;
+
+ assert(i);
+ assert(path);
+ assert(i->type == CREATE_FILE);
+
+ /* 'f' operates on regular files exclusively. */
+
+ /* Validate the path and keep the fd on the directory for opening the
+ * file so we're sure that it can't be changed behind our back. */
+ dir_fd = path_open_parent_safe(path);
+ if (dir_fd < 0)
+ return dir_fd;
+
+ bn = basename(path);
+
+ RUN_WITH_UMASK(0000) {
+ mac_selinux_create_file_prepare(path, S_IFREG);
+ fd = openat(dir_fd, bn, O_CREAT|O_EXCL|O_NOFOLLOW|O_NONBLOCK|O_CLOEXEC|O_WRONLY|O_NOCTTY, i->mode);
+ mac_selinux_create_file_clear();
+ }
+
+ if (fd < 0) {
+ /* Even on a read-only filesystem, open(2) returns EEXIST if the
+ * file already exists. It returns EROFS only if it needs to
+ * create the file. */
+ if (errno != EEXIST)
+ return log_error_errno(errno, "Failed to create file %s: %m", path);
+
+ /* Re-open the file. At that point it must exist since open(2)
+ * failed with EEXIST. We still need to check if the perms/mode
+ * need to be changed. For read-only filesystems, we let
+ * fd_set_perms() report the error if the perms need to be
+ * modified. */
+ fd = openat(dir_fd, bn, O_NOFOLLOW|O_CLOEXEC|O_PATH, i->mode);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to re-open file %s: %m", path);
+
+ if (fstat(fd, &stbuf) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", path);
+
+ if (!S_ISREG(stbuf.st_mode)) {
+ log_error("%s exists and is not a regular file.", path);
+ return -EEXIST;
+ }
+
+ st = &stbuf;
+ } else {
+
+ log_debug("\"%s\" has been created.", path);
+
+ if (i->argument) {
+ log_debug("Writing to \"%s\".", path);
+
+ r = loop_write(fd, i->argument, strlen(i->argument), false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write file \"%s\": %m", path);
+ }
+ }
+
+ return fd_set_perms(i, fd, path, st);
+}
+
+static int truncate_file(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1, dir_fd = -1;
+ struct stat stbuf, *st = NULL;
+ bool erofs = false;
+ int r = 0;
+ char *bn;
+
+ assert(i);
+ assert(path);
+ assert(i->type == TRUNCATE_FILE);
+
+ /* We want to operate on regular file exclusively especially since
+ * O_TRUNC is unspecified if the file is neither a regular file nor a
+ * fifo nor a terminal device. Therefore we first open the file and make
+ * sure it's a regular one before truncating it. */
+
+ /* Validate the path and keep the fd on the directory for opening the
+ * file so we're sure that it can't be changed behind our back. */
+ dir_fd = path_open_parent_safe(path);
+ if (dir_fd < 0)
+ return dir_fd;
+
+ bn = basename(path);
+
+ RUN_WITH_UMASK(0000) {
+ mac_selinux_create_file_prepare(path, S_IFREG);
+ fd = openat(dir_fd, bn, O_CREAT|O_NOFOLLOW|O_NONBLOCK|O_CLOEXEC|O_WRONLY|O_NOCTTY, i->mode);
+ mac_selinux_create_file_clear();
+ }
+
+ if (fd < 0) {
+ if (errno != EROFS)
+ return log_error_errno(errno, "Failed to open/create file %s: %m", path);
+
+ /* On a read-only filesystem, we don't want to fail if the
+ * target is already empty and the perms are set. So we still
+ * proceed with the sanity checks and let the remaining
+ * operations fail with EROFS if they try to modify the target
+ * file. */
+
+ fd = openat(dir_fd, bn, O_NOFOLLOW|O_CLOEXEC|O_PATH, i->mode);
+ if (fd < 0) {
+ if (errno == ENOENT) {
+ log_error("Cannot create file %s on a read-only file system.", path);
+ return -EROFS;
+ }
+
+ return log_error_errno(errno, "Failed to re-open file %s: %m", path);
+ }
+
+ erofs = true;
+ }
+
+ if (fstat(fd, &stbuf) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", path);
+
+ if (!S_ISREG(stbuf.st_mode)) {
+ log_error("%s exists and is not a regular file.", path);
+ return -EEXIST;
+ }
+
+ if (stbuf.st_size > 0) {
+ if (ftruncate(fd, 0) < 0) {
+ r = erofs ? -EROFS : -errno;
+ return log_error_errno(r, "Failed to truncate file %s: %m", path);
+ }
+ } else
+ st = &stbuf;
+
+ log_debug("\"%s\" has been created.", path);
+
+ if (i->argument) {
+ log_debug("Writing to \"%s\".", path);
+
+ r = loop_write(fd, i->argument, strlen(i->argument), false);
+ if (r < 0) {
+ r = erofs ? -EROFS : r;
+ return log_error_errno(r, "Failed to write file %s: %m", path);
+ }
+ }
+
+ return fd_set_perms(i, fd, path, st);
+}
+
+static int copy_files(Item *i) {
+ _cleanup_close_ int dfd = -1, fd = -1;
+ char *bn;
+ int r;
+
+ log_debug("Copying tree \"%s\" to \"%s\".", i->argument, i->path);
+
+ bn = basename(i->path);
+
+ /* Validate the path and use the returned directory fd for copying the
+ * target so we're sure that the path can't be changed behind our
+ * back. */
+ dfd = path_open_parent_safe(i->path);
+ if (dfd < 0)
+ return dfd;
+
+ r = copy_tree_at(AT_FDCWD, i->argument,
+ dfd, bn,
+ i->uid_set ? i->uid : UID_INVALID,
+ i->gid_set ? i->gid : GID_INVALID,
+ COPY_REFLINK | COPY_MERGE_EMPTY);
+ if (r < 0) {
+ struct stat a, b;
+
+ /* If the target already exists on read-only filesystems, trying
+ * to create the target will not fail with EEXIST but with
+ * EROFS. */
+ if (r == -EROFS && faccessat(dfd, bn, F_OK, AT_SYMLINK_NOFOLLOW) == 0)
+ r = -EEXIST;
+
+ if (r != -EEXIST)
+ return log_error_errno(r, "Failed to copy files to %s: %m", i->path);
+
+ if (stat(i->argument, &a) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", i->argument);
+
+ if (fstatat(dfd, bn, &b, AT_SYMLINK_NOFOLLOW) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", i->path);
+
+ if ((a.st_mode ^ b.st_mode) & S_IFMT) {
+ log_debug("Can't copy to %s, file exists already and is of different type", i->path);
+ return 0;
+ }
+ }
+
+ fd = openat(dfd, bn, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to openat(%s): %m", i->path);
+
+ return fd_set_perms(i, fd, i->path, NULL);
+}
+
+typedef enum {
+ CREATION_NORMAL,
+ CREATION_EXISTING,
+ CREATION_FORCE,
+ _CREATION_MODE_MAX,
+ _CREATION_MODE_INVALID = -1
+} CreationMode;
+
+static const char *creation_mode_verb_table[_CREATION_MODE_MAX] = {
+ [CREATION_NORMAL] = "Created",
+ [CREATION_EXISTING] = "Found existing",
+ [CREATION_FORCE] = "Created replacement",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(creation_mode_verb, CreationMode);
+
+static int create_directory_or_subvolume(const char *path, mode_t mode, bool subvol, CreationMode *creation) {
+ _cleanup_close_ int pfd = -1;
+ CreationMode c;
+ int r;
+
+ assert(path);
+
+ if (!creation)
+ creation = &c;
+
+ pfd = path_open_parent_safe(path);
+ if (pfd < 0)
+ return pfd;
+
+ if (subvol) {
+ if (btrfs_is_subvol(empty_to_root(arg_root)) <= 0)
+
+ /* Don't create a subvolume unless the root directory is
+ * one, too. We do this under the assumption that if the
+ * root directory is just a plain directory (i.e. very
+ * light-weight), we shouldn't try to split it up into
+ * subvolumes (i.e. more heavy-weight). Thus, chroot()
+ * environments and suchlike will get a full brtfs
+ * subvolume set up below their tree only if they
+ * specifically set up a btrfs subvolume for the root
+ * dir too. */
+
+ subvol = false;
+ else {
+ RUN_WITH_UMASK((~mode) & 0777)
+ r = btrfs_subvol_make_fd(pfd, basename(path));
+ }
+ } else
+ r = 0;
+
+ if (!subvol || r == -ENOTTY)
+ RUN_WITH_UMASK(0000)
+ r = mkdirat_label(pfd, basename(path), mode);
+
+ if (r < 0) {
+ int k;
+
+ if (!IN_SET(r, -EEXIST, -EROFS))
+ return log_error_errno(r, "Failed to create directory or subvolume \"%s\": %m", path);
+
+ k = is_dir_fd(pfd);
+ if (k == -ENOENT && r == -EROFS)
+ return log_error_errno(r, "%s does not exist and cannot be created as the file system is read-only.", path);
+ if (k < 0)
+ return log_error_errno(k, "Failed to check if %s exists: %m", path);
+ if (!k) {
+ log_warning("\"%s\" already exists and is not a directory.", path);
+ return -EEXIST;
+ }
+
+ *creation = CREATION_EXISTING;
+ } else
+ *creation = CREATION_NORMAL;
+
+ log_debug("%s directory \"%s\".", creation_mode_verb_to_string(*creation), path);
+
+ r = openat(pfd, basename(path), O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to open directory '%s': %m", basename(path));
+
+ return r;
+}
+
+static int create_directory(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ assert(i);
+ assert(IN_SET(i->type, CREATE_DIRECTORY, TRUNCATE_DIRECTORY));
+
+ fd = create_directory_or_subvolume(path, i->mode, false, NULL);
+ if (fd == -EEXIST)
+ return 0;
+ if (fd < 0)
+ return fd;
+
+ return fd_set_perms(i, fd, path, NULL);
+}
+
+static int create_subvolume(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1;
+ CreationMode creation;
+ int r, q = 0;
+
+ assert(i);
+ assert(IN_SET(i->type, CREATE_SUBVOLUME, CREATE_SUBVOLUME_NEW_QUOTA, CREATE_SUBVOLUME_INHERIT_QUOTA));
+
+ fd = create_directory_or_subvolume(path, i->mode, true, &creation);
+ if (fd == -EEXIST)
+ return 0;
+ if (fd < 0)
+ return fd;
+
+ if (creation == CREATION_NORMAL &&
+ IN_SET(i->type, CREATE_SUBVOLUME_NEW_QUOTA, CREATE_SUBVOLUME_INHERIT_QUOTA)) {
+ r = btrfs_subvol_auto_qgroup_fd(fd, 0, i->type == CREATE_SUBVOLUME_NEW_QUOTA);
+ if (r == -ENOTTY)
+ log_debug_errno(r, "Couldn't adjust quota for subvolume \"%s\" (unsupported fs or dir not a subvolume): %m", i->path);
+ else if (r == -EROFS)
+ log_debug_errno(r, "Couldn't adjust quota for subvolume \"%s\" (fs is read-only).", i->path);
+ else if (r == -ENOPROTOOPT)
+ log_debug_errno(r, "Couldn't adjust quota for subvolume \"%s\" (quota support is disabled).", i->path);
+ else if (r < 0)
+ q = log_error_errno(r, "Failed to adjust quota for subvolume \"%s\": %m", i->path);
+ else if (r > 0)
+ log_debug("Adjusted quota for subvolume \"%s\".", i->path);
+ else if (r == 0)
+ log_debug("Quota for subvolume \"%s\" already in place, no change made.", i->path);
+ }
+
+ r = fd_set_perms(i, fd, path, NULL);
+ if (q < 0) /* prefer the quota change error from above */
+ return q;
+
+ return r;
+}
+
+static int empty_directory(Item *i, const char *path) {
+ int r;
+
+ assert(i);
+ assert(i->type == EMPTY_DIRECTORY);
+
+ r = is_dir(path, false);
+ if (r == -ENOENT) {
+ /* Option "e" operates only on existing objects. Do not
+ * print errors about non-existent files or directories */
+ log_debug("Skipping missing directory: %s", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "is_dir() failed on path %s: %m", path);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "'%s' already exists and is not a directory.",
+ path);
+
+ return path_set_perms(i, path);
+}
+
+static int create_device(Item *i, mode_t file_type) {
+ _cleanup_close_ int dfd = -1, fd = -1;
+ CreationMode creation;
+ char *bn;
+ int r;
+
+ assert(i);
+ assert(IN_SET(file_type, S_IFBLK, S_IFCHR));
+
+ bn = basename(i->path);
+
+ /* Validate the path and use the returned directory fd for copying the
+ * target so we're sure that the path can't be changed behind our
+ * back. */
+ dfd = path_open_parent_safe(i->path);
+ if (dfd < 0)
+ return dfd;
+
+ RUN_WITH_UMASK(0000) {
+ mac_selinux_create_file_prepare(i->path, file_type);
+ r = mknodat(dfd, bn, i->mode | file_type, i->major_minor);
+ mac_selinux_create_file_clear();
+ }
+
+ if (r < 0) {
+ struct stat st;
+
+ if (errno == EPERM) {
+ log_debug("We lack permissions, possibly because of cgroup configuration; "
+ "skipping creation of device node %s.", i->path);
+ return 0;
+ }
+
+ if (errno != EEXIST)
+ return log_error_errno(errno, "Failed to create device node %s: %m", i->path);
+
+ if (fstatat(dfd, bn, &st, 0) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", i->path);
+
+ if ((st.st_mode & S_IFMT) != file_type) {
+
+ if (i->force) {
+
+ RUN_WITH_UMASK(0000) {
+ mac_selinux_create_file_prepare(i->path, file_type);
+ /* FIXME: need to introduce mknodat_atomic() */
+ r = mknod_atomic(i->path, i->mode | file_type, i->major_minor);
+ mac_selinux_create_file_clear();
+ }
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to create device node \"%s\": %m", i->path);
+ creation = CREATION_FORCE;
+ } else {
+ log_debug("%s is not a device node.", i->path);
+ return 0;
+ }
+ } else
+ creation = CREATION_EXISTING;
+ } else
+ creation = CREATION_NORMAL;
+
+ log_debug("%s %s device node \"%s\" %u:%u.",
+ creation_mode_verb_to_string(creation),
+ i->type == CREATE_BLOCK_DEVICE ? "block" : "char",
+ i->path, major(i->mode), minor(i->mode));
+
+ fd = openat(dfd, bn, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to openat(%s): %m", i->path);
+
+ return fd_set_perms(i, fd, i->path, NULL);
+}
+
+static int create_fifo(Item *i, const char *path) {
+ _cleanup_close_ int pfd = -1, fd = -1;
+ CreationMode creation;
+ struct stat st;
+ char *bn;
+ int r;
+
+ pfd = path_open_parent_safe(path);
+ if (pfd < 0)
+ return pfd;
+
+ bn = basename(path);
+
+ RUN_WITH_UMASK(0000) {
+ mac_selinux_create_file_prepare(path, S_IFIFO);
+ r = mkfifoat(pfd, bn, i->mode);
+ mac_selinux_create_file_clear();
+ }
+
+ if (r < 0) {
+ if (errno != EEXIST)
+ return log_error_errno(errno, "Failed to create fifo %s: %m", path);
+
+ if (fstatat(pfd, bn, &st, AT_SYMLINK_NOFOLLOW) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", path);
+
+ if (!S_ISFIFO(st.st_mode)) {
+
+ if (i->force) {
+ RUN_WITH_UMASK(0000) {
+ mac_selinux_create_file_prepare(path, S_IFIFO);
+ r = mkfifoat_atomic(pfd, bn, i->mode);
+ mac_selinux_create_file_clear();
+ }
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to create fifo %s: %m", path);
+ creation = CREATION_FORCE;
+ } else {
+ log_warning("\"%s\" already exists and is not a fifo.", path);
+ return 0;
+ }
+ } else
+ creation = CREATION_EXISTING;
+ } else
+ creation = CREATION_NORMAL;
+
+ log_debug("%s fifo \"%s\".", creation_mode_verb_to_string(creation), path);
+
+ fd = openat(pfd, bn, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to openat(%s): %m", path);
+
+ return fd_set_perms(i, fd, i->path, NULL);
+}
+
+typedef int (*action_t)(Item *i, const char *path);
+typedef int (*fdaction_t)(Item *i, int fd, const char *path, const struct stat *st);
+
+static int item_do(Item *i, int fd, const char *path, fdaction_t action) {
+ struct stat st;
+ int r = 0, q;
+
+ assert(i);
+ assert(path);
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0) {
+ r = log_error_errno(errno, "fstat() on file failed: %m");
+ goto finish;
+ }
+
+ /* This returns the first error we run into, but nevertheless
+ * tries to go on */
+ r = action(i, fd, path, &st);
+
+ if (S_ISDIR(st.st_mode)) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ /* The passed 'fd' was opened with O_PATH. We need to convert
+ * it into a 'regular' fd before reading the directory content. */
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+
+ d = opendir(procfs_path);
+ if (!d) {
+ log_error_errno(errno, "Failed to opendir() '%s': %m", procfs_path);
+ if (r == 0)
+ r = -errno;
+ goto finish;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, q = -errno; goto finish) {
+ int de_fd;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ de_fd = openat(fd, de->d_name, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (de_fd < 0)
+ q = log_error_errno(errno, "Failed to open() file '%s': %m", de->d_name);
+ else {
+ _cleanup_free_ char *de_path = NULL;
+
+ de_path = path_join(path, de->d_name);
+ if (!de_path)
+ q = log_oom();
+ else
+ /* Pass ownership of dirent fd over */
+ q = item_do(i, de_fd, de_path, action);
+ }
+
+ if (q < 0 && r == 0)
+ r = q;
+ }
+ }
+finish:
+ safe_close(fd);
+ return r;
+}
+
+static int glob_item(Item *i, action_t action) {
+ _cleanup_globfree_ glob_t g = {
+ .gl_opendir = (void *(*)(const char *)) opendir_nomod,
+ };
+ int r = 0, k;
+ char **fn;
+
+ k = safe_glob(i->path, GLOB_NOSORT|GLOB_BRACE, &g);
+ if (k < 0 && k != -ENOENT)
+ return log_error_errno(k, "glob(%s) failed: %m", i->path);
+
+ STRV_FOREACH(fn, g.gl_pathv) {
+ k = action(i, *fn);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int glob_item_recursively(Item *i, fdaction_t action) {
+ _cleanup_globfree_ glob_t g = {
+ .gl_opendir = (void *(*)(const char *)) opendir_nomod,
+ };
+ int r = 0, k;
+ char **fn;
+
+ k = safe_glob(i->path, GLOB_NOSORT|GLOB_BRACE, &g);
+ if (k < 0 && k != -ENOENT)
+ return log_error_errno(k, "glob(%s) failed: %m", i->path);
+
+ STRV_FOREACH(fn, g.gl_pathv) {
+ _cleanup_close_ int fd = -1;
+
+ /* Make sure we won't trigger/follow file object (such as
+ * device nodes, automounts, ...) pointed out by 'fn' with
+ * O_PATH. Note, when O_PATH is used, flags other than
+ * O_CLOEXEC, O_DIRECTORY, and O_NOFOLLOW are ignored. */
+
+ fd = open(*fn, O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd < 0) {
+ log_error_errno(errno, "Opening '%s' failed: %m", *fn);
+ if (r == 0)
+ r = -errno;
+ continue;
+ }
+
+ k = item_do(i, fd, *fn, action);
+ if (k < 0 && r == 0)
+ r = k;
+
+ /* we passed fd ownership to the previous call */
+ fd = -1;
+ }
+
+ return r;
+}
+
+static int create_item(Item *i) {
+ CreationMode creation;
+ int r = 0;
+
+ assert(i);
+
+ log_debug("Running create action for entry %c %s", (char) i->type, i->path);
+
+ switch (i->type) {
+
+ case IGNORE_PATH:
+ case IGNORE_DIRECTORY_PATH:
+ case REMOVE_PATH:
+ case RECURSIVE_REMOVE_PATH:
+ return 0;
+
+ case CREATE_FILE:
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ r = create_file(i, i->path);
+ if (r < 0)
+ return r;
+ break;
+
+ case TRUNCATE_FILE:
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ r = truncate_file(i, i->path);
+ if (r < 0)
+ return r;
+ break;
+
+ case COPY_FILES:
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ r = copy_files(i);
+ if (r < 0)
+ return r;
+ break;
+
+ case WRITE_FILE:
+ r = glob_item(i, write_one_file);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case CREATE_DIRECTORY:
+ case TRUNCATE_DIRECTORY:
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ r = create_directory(i, i->path);
+ if (r < 0)
+ return r;
+ break;
+
+ case CREATE_SUBVOLUME:
+ case CREATE_SUBVOLUME_INHERIT_QUOTA:
+ case CREATE_SUBVOLUME_NEW_QUOTA:
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ r = create_subvolume(i, i->path);
+ if (r < 0)
+ return r;
+ break;
+
+ case EMPTY_DIRECTORY:
+ r = glob_item(i, empty_directory);
+ if (r < 0)
+ return r;
+ break;
+
+ case CREATE_FIFO:
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ r = create_fifo(i, i->path);
+ if (r < 0)
+ return r;
+ break;
+
+ case CREATE_SYMLINK: {
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ mac_selinux_create_file_prepare(i->path, S_IFLNK);
+ r = symlink(i->argument, i->path);
+ mac_selinux_create_file_clear();
+
+ if (r < 0) {
+ _cleanup_free_ char *x = NULL;
+
+ if (errno != EEXIST)
+ return log_error_errno(errno, "symlink(%s, %s) failed: %m", i->argument, i->path);
+
+ r = readlink_malloc(i->path, &x);
+ if (r < 0 || !streq(i->argument, x)) {
+
+ if (i->force) {
+ mac_selinux_create_file_prepare(i->path, S_IFLNK);
+ r = symlink_atomic(i->argument, i->path);
+ mac_selinux_create_file_clear();
+
+ if (IN_SET(r, -EISDIR, -EEXIST, -ENOTEMPTY)) {
+ r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL);
+ if (r < 0)
+ return log_error_errno(r, "rm -fr %s failed: %m", i->path);
+
+ mac_selinux_create_file_prepare(i->path, S_IFLNK);
+ r = symlink(i->argument, i->path) < 0 ? -errno : 0;
+ mac_selinux_create_file_clear();
+ }
+ if (r < 0)
+ return log_error_errno(r, "symlink(%s, %s) failed: %m", i->argument, i->path);
+
+ creation = CREATION_FORCE;
+ } else {
+ log_debug("\"%s\" is not a symlink or does not point to the correct path.", i->path);
+ return 0;
+ }
+ } else
+ creation = CREATION_EXISTING;
+ } else
+
+ creation = CREATION_NORMAL;
+ log_debug("%s symlink \"%s\".", creation_mode_verb_to_string(creation), i->path);
+ break;
+ }
+
+ case CREATE_BLOCK_DEVICE:
+ case CREATE_CHAR_DEVICE:
+ if (have_effective_cap(CAP_MKNOD) == 0) {
+ /* In a container we lack CAP_MKNOD. We shouldn't attempt to create the device node in that
+ * case to avoid noise, and we don't support virtualized devices in containers anyway. */
+
+ log_debug("We lack CAP_MKNOD, skipping creation of device node %s.", i->path);
+ return 0;
+ }
+
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ r = create_device(i, i->type == CREATE_BLOCK_DEVICE ? S_IFBLK : S_IFCHR);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case ADJUST_MODE:
+ case RELABEL_PATH:
+ r = glob_item(i, path_set_perms);
+ if (r < 0)
+ return r;
+ break;
+
+ case RECURSIVE_RELABEL_PATH:
+ r = glob_item_recursively(i, fd_set_perms);
+ if (r < 0)
+ return r;
+ break;
+
+ case SET_XATTR:
+ r = glob_item(i, path_set_xattrs);
+ if (r < 0)
+ return r;
+ break;
+
+ case RECURSIVE_SET_XATTR:
+ r = glob_item_recursively(i, fd_set_xattrs);
+ if (r < 0)
+ return r;
+ break;
+
+ case SET_ACL:
+ r = glob_item(i, path_set_acls);
+ if (r < 0)
+ return r;
+ break;
+
+ case RECURSIVE_SET_ACL:
+ r = glob_item_recursively(i, fd_set_acls);
+ if (r < 0)
+ return r;
+ break;
+
+ case SET_ATTRIBUTE:
+ r = glob_item(i, path_set_attribute);
+ if (r < 0)
+ return r;
+ break;
+
+ case RECURSIVE_SET_ATTRIBUTE:
+ r = glob_item_recursively(i, fd_set_attribute);
+ if (r < 0)
+ return r;
+ break;
+ }
+
+ return 0;
+}
+
+static int remove_item_instance(Item *i, const char *instance) {
+ int r;
+
+ assert(i);
+
+ switch (i->type) {
+
+ case REMOVE_PATH:
+ if (remove(instance) < 0 && errno != ENOENT)
+ return log_error_errno(errno, "rm(%s): %m", instance);
+
+ break;
+
+ case RECURSIVE_REMOVE_PATH:
+ /* FIXME: we probably should use dir_cleanup() here instead of rm_rf() so that 'x' is honoured. */
+ log_debug("rm -rf \"%s\"", instance);
+ r = rm_rf(instance, REMOVE_ROOT|REMOVE_SUBVOLUME|REMOVE_PHYSICAL);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "rm_rf(%s): %m", instance);
+
+ break;
+
+ default:
+ assert_not_reached("wut?");
+ }
+
+ return 0;
+}
+
+static int remove_item(Item *i) {
+ int r;
+
+ assert(i);
+
+ log_debug("Running remove action for entry %c %s", (char) i->type, i->path);
+
+ switch (i->type) {
+
+ case TRUNCATE_DIRECTORY:
+ /* FIXME: we probably should use dir_cleanup() here instead of rm_rf() so that 'x' is honoured. */
+ log_debug("rm -rf \"%s\"", i->path);
+ r = rm_rf(i->path, REMOVE_PHYSICAL);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "rm_rf(%s): %m", i->path);
+
+ return 0;
+
+ case REMOVE_PATH:
+ case RECURSIVE_REMOVE_PATH:
+ return glob_item(i, remove_item_instance);
+
+ default:
+ return 0;
+ }
+}
+
+static int clean_item_instance(Item *i, const char* instance) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct stat s, ps;
+ bool mountpoint;
+ usec_t cutoff, n;
+ char timestamp[FORMAT_TIMESTAMP_MAX];
+
+ assert(i);
+
+ if (!i->age_set)
+ return 0;
+
+ n = now(CLOCK_REALTIME);
+ if (n < i->age)
+ return 0;
+
+ cutoff = n - i->age;
+
+ d = opendir_nomod(instance);
+ if (!d) {
+ if (IN_SET(errno, ENOENT, ENOTDIR)) {
+ log_debug_errno(errno, "Directory \"%s\": %m", instance);
+ return 0;
+ }
+
+ return log_error_errno(errno, "Failed to open directory %s: %m", instance);
+ }
+
+ if (fstat(dirfd(d), &s) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", i->path);
+
+ if (!S_ISDIR(s.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTDIR),
+ "%s is not a directory.", i->path);
+
+ if (fstatat(dirfd(d), "..", &ps, AT_SYMLINK_NOFOLLOW) != 0)
+ return log_error_errno(errno, "stat(%s/..) failed: %m", i->path);
+
+ mountpoint = s.st_dev != ps.st_dev || s.st_ino == ps.st_ino;
+
+ log_debug("Cleanup threshold for %s \"%s\" is %s",
+ mountpoint ? "mount point" : "directory",
+ instance,
+ format_timestamp_us(timestamp, sizeof(timestamp), cutoff));
+
+ return dir_cleanup(i, instance, d, &s, cutoff, s.st_dev, mountpoint,
+ MAX_DEPTH, i->keep_first_level);
+}
+
+static int clean_item(Item *i) {
+ assert(i);
+
+ log_debug("Running clean action for entry %c %s", (char) i->type, i->path);
+
+ switch (i->type) {
+ case CREATE_DIRECTORY:
+ case CREATE_SUBVOLUME:
+ case CREATE_SUBVOLUME_INHERIT_QUOTA:
+ case CREATE_SUBVOLUME_NEW_QUOTA:
+ case TRUNCATE_DIRECTORY:
+ case IGNORE_PATH:
+ case COPY_FILES:
+ clean_item_instance(i, i->path);
+ return 0;
+ case EMPTY_DIRECTORY:
+ case IGNORE_DIRECTORY_PATH:
+ return glob_item(i, clean_item_instance);
+ default:
+ return 0;
+ }
+}
+
+static int process_item(Item *i, OperationMask operation) {
+ OperationMask todo;
+ int r, q, p;
+
+ assert(i);
+
+ todo = operation & ~i->done;
+ if (todo == 0) /* Everything already done? */
+ return 0;
+
+ i->done |= operation;
+
+ r = chase_symlinks(i->path, NULL, CHASE_NO_AUTOFS|CHASE_WARN, NULL);
+ if (r == -EREMOTE) {
+ log_notice_errno(r, "Skipping %s", i->path);
+ return 0;
+ }
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine whether '%s' is below autofs, ignoring: %m", i->path);
+
+ r = FLAGS_SET(operation, OPERATION_CREATE) ? create_item(i) : 0;
+ /* Failure can only be tolerated for create */
+ if (i->allow_failure)
+ r = 0;
+
+ q = FLAGS_SET(operation, OPERATION_REMOVE) ? remove_item(i) : 0;
+ p = FLAGS_SET(operation, OPERATION_CLEAN) ? clean_item(i) : 0;
+
+ return r < 0 ? r :
+ q < 0 ? q :
+ p;
+}
+
+static int process_item_array(ItemArray *array, OperationMask operation) {
+ int r = 0;
+ size_t n;
+
+ assert(array);
+
+ /* Create any parent first. */
+ if (FLAGS_SET(operation, OPERATION_CREATE) && array->parent)
+ r = process_item_array(array->parent, operation & OPERATION_CREATE);
+
+ /* Clean up all children first */
+ if ((operation & (OPERATION_REMOVE|OPERATION_CLEAN)) && !set_isempty(array->children)) {
+ Iterator i;
+ ItemArray *c;
+
+ SET_FOREACH(c, array->children, i) {
+ int k;
+
+ k = process_item_array(c, operation & (OPERATION_REMOVE|OPERATION_CLEAN));
+ if (k < 0 && r == 0)
+ r = k;
+ }
+ }
+
+ for (n = 0; n < array->n_items; n++) {
+ int k;
+
+ k = process_item(array->items + n, operation);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static void item_free_contents(Item *i) {
+ assert(i);
+ free(i->path);
+ free(i->argument);
+ strv_free(i->xattrs);
+
+#if HAVE_ACL
+ acl_free(i->acl_access);
+ acl_free(i->acl_default);
+#endif
+}
+
+static ItemArray* item_array_free(ItemArray *a) {
+ size_t n;
+
+ if (!a)
+ return NULL;
+
+ for (n = 0; n < a->n_items; n++)
+ item_free_contents(a->items + n);
+
+ set_free(a->children);
+ free(a->items);
+ return mfree(a);
+}
+
+static int item_compare(const Item *a, const Item *b) {
+ /* Make sure that the ownership taking item is put first, so
+ * that we first create the node, and then can adjust it */
+
+ if (takes_ownership(a->type) && !takes_ownership(b->type))
+ return -1;
+ if (!takes_ownership(a->type) && takes_ownership(b->type))
+ return 1;
+
+ return CMP(a->type, b->type);
+}
+
+static bool item_compatible(Item *a, Item *b) {
+ assert(a);
+ assert(b);
+ assert(streq(a->path, b->path));
+
+ if (takes_ownership(a->type) && takes_ownership(b->type))
+ /* check if the items are the same */
+ return streq_ptr(a->argument, b->argument) &&
+
+ a->uid_set == b->uid_set &&
+ a->uid == b->uid &&
+
+ a->gid_set == b->gid_set &&
+ a->gid == b->gid &&
+
+ a->mode_set == b->mode_set &&
+ a->mode == b->mode &&
+
+ a->age_set == b->age_set &&
+ a->age == b->age &&
+
+ a->mask_perms == b->mask_perms &&
+
+ a->keep_first_level == b->keep_first_level &&
+
+ a->major_minor == b->major_minor;
+
+ return true;
+}
+
+static bool should_include_path(const char *path) {
+ char **prefix;
+
+ STRV_FOREACH(prefix, arg_exclude_prefixes)
+ if (path_startswith(path, *prefix)) {
+ log_debug("Entry \"%s\" matches exclude prefix \"%s\", skipping.",
+ path, *prefix);
+ return false;
+ }
+
+ STRV_FOREACH(prefix, arg_include_prefixes)
+ if (path_startswith(path, *prefix)) {
+ log_debug("Entry \"%s\" matches include prefix \"%s\".", path, *prefix);
+ return true;
+ }
+
+ /* no matches, so we should include this path only if we
+ * have no whitelist at all */
+ if (strv_isempty(arg_include_prefixes))
+ return true;
+
+ log_debug("Entry \"%s\" does not match any include prefix, skipping.", path);
+ return false;
+}
+
+static int specifier_expansion_from_arg(Item *i) {
+ _cleanup_free_ char *unescaped = NULL, *resolved = NULL;
+ char **xattr;
+ int r;
+
+ assert(i);
+
+ if (i->argument == NULL)
+ return 0;
+
+ switch (i->type) {
+ case COPY_FILES:
+ case CREATE_SYMLINK:
+ case CREATE_FILE:
+ case TRUNCATE_FILE:
+ case WRITE_FILE:
+ r = cunescape(i->argument, 0, &unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape parameter to write: %s", i->argument);
+
+ r = specifier_printf(unescaped, specifier_table, NULL, &resolved);
+ if (r < 0)
+ return r;
+
+ free_and_replace(i->argument, resolved);
+ break;
+
+ case SET_XATTR:
+ case RECURSIVE_SET_XATTR:
+ assert(i->xattrs);
+
+ STRV_FOREACH (xattr, i->xattrs) {
+ r = specifier_printf(*xattr, specifier_table, NULL, &resolved);
+ if (r < 0)
+ return r;
+
+ free_and_replace(*xattr, resolved);
+ }
+ break;
+
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int patch_var_run(const char *fname, unsigned line, char **path) {
+ const char *k;
+ char *n;
+
+ assert(path);
+ assert(*path);
+
+ /* Optionally rewrites lines referencing /var/run/, to use /run/ instead. Why bother? tmpfiles merges lines in
+ * some cases and detects conflicts in others. If files/directories are specified through two equivalent lines
+ * this is problematic as neither case will be detected. Ideally we'd detect these cases by resolving symlinks
+ * early, but that's precisely not what we can do here as this code very likely is running very early on, at a
+ * time where the paths in question are not available yet, or even more importantly, our own tmpfiles rules
+ * might create the paths that are intermediary to the listed paths. We can't really cover the generic case,
+ * but the least we can do is cover the specific case of /var/run vs. /run, as /var/run is a legacy name for
+ * /run only, and we explicitly document that and require that on systemd systems the former is a symlink to
+ * the latter. Moreover files below this path are by far the primary usecase for tmpfiles.d/. */
+
+ k = path_startswith(*path, "/var/run/");
+ if (isempty(k)) /* Don't complain about other paths than /var/run, and not about /var/run itself either. */
+ return 0;
+
+ n = strjoin("/run/", k);
+ if (!n)
+ return log_oom();
+
+ /* Also log about this briefly. We do so at LOG_NOTICE level, as we fixed up the situation automatically, hence
+ * there's no immediate need for action by the user. However, in the interest of making things less confusing
+ * to the user, let's still inform the user that these snippets should really be updated. */
+
+ log_notice("[%s:%u] Line references path below legacy directory /var/run/, updating %s → %s; please update the tmpfiles.d/ drop-in file accordingly.", fname, line, *path, n);
+
+ free_and_replace(*path, n);
+
+ return 0;
+}
+
+static int parse_line(const char *fname, unsigned line, const char *buffer, bool *invalid_config) {
+
+ _cleanup_free_ char *action = NULL, *mode = NULL, *user = NULL, *group = NULL, *age = NULL, *path = NULL;
+ _cleanup_(item_free_contents) Item i = {};
+ ItemArray *existing;
+ OrderedHashmap *h;
+ int r, pos;
+ bool force = false, boot = false, allow_failure = false;
+
+ assert(fname);
+ assert(line >= 1);
+ assert(buffer);
+
+ r = extract_many_words(
+ &buffer,
+ NULL,
+ EXTRACT_QUOTES,
+ &action,
+ &path,
+ &mode,
+ &user,
+ &group,
+ &age,
+ NULL);
+ if (r < 0) {
+ if (IN_SET(r, -EINVAL, -EBADSLT))
+ /* invalid quoting and such or an unknown specifier */
+ *invalid_config = true;
+ return log_error_errno(r, "[%s:%u] Failed to parse line: %m", fname, line);
+ } else if (r < 2) {
+ *invalid_config = true;
+ log_error("[%s:%u] Syntax error.", fname, line);
+ return -EIO;
+ }
+
+ if (!isempty(buffer) && !streq(buffer, "-")) {
+ i.argument = strdup(buffer);
+ if (!i.argument)
+ return log_oom();
+ }
+
+ if (isempty(action)) {
+ *invalid_config = true;
+ log_error("[%s:%u] Command too short '%s'.", fname, line, action);
+ return -EINVAL;
+ }
+
+ for (pos = 1; action[pos]; pos++) {
+ if (action[pos] == '!' && !boot)
+ boot = true;
+ else if (action[pos] == '+' && !force)
+ force = true;
+ else if (action[pos] == '-' && !allow_failure)
+ allow_failure = true;
+ else {
+ *invalid_config = true;
+ log_error("[%s:%u] Unknown modifiers in command '%s'",
+ fname, line, action);
+ return -EINVAL;
+ }
+ }
+
+ if (boot && !arg_boot) {
+ log_debug("Ignoring entry %s \"%s\" because --boot is not specified.",
+ action, path);
+ return 0;
+ }
+
+ i.type = action[0];
+ i.force = force;
+ i.allow_failure = allow_failure;
+
+ r = specifier_printf(path, specifier_table, NULL, &i.path);
+ if (r == -ENXIO)
+ return log_unresolvable_specifier(fname, line);
+ if (r < 0) {
+ if (IN_SET(r, -EINVAL, -EBADSLT))
+ *invalid_config = true;
+ return log_error_errno(r, "[%s:%u] Failed to replace specifiers: %s", fname, line, path);
+ }
+
+ r = patch_var_run(fname, line, &i.path);
+ if (r < 0)
+ return r;
+
+ switch (i.type) {
+
+ case CREATE_DIRECTORY:
+ case CREATE_SUBVOLUME:
+ case CREATE_SUBVOLUME_INHERIT_QUOTA:
+ case CREATE_SUBVOLUME_NEW_QUOTA:
+ case EMPTY_DIRECTORY:
+ case TRUNCATE_DIRECTORY:
+ case CREATE_FIFO:
+ case IGNORE_PATH:
+ case IGNORE_DIRECTORY_PATH:
+ case REMOVE_PATH:
+ case RECURSIVE_REMOVE_PATH:
+ case ADJUST_MODE:
+ case RELABEL_PATH:
+ case RECURSIVE_RELABEL_PATH:
+ if (i.argument)
+ log_warning("[%s:%u] %c lines don't take argument fields, ignoring.", fname, line, i.type);
+
+ break;
+
+ case CREATE_FILE:
+ case TRUNCATE_FILE:
+ break;
+
+ case CREATE_SYMLINK:
+ if (!i.argument) {
+ i.argument = strappend("/usr/share/factory/", i.path);
+ if (!i.argument)
+ return log_oom();
+ }
+ break;
+
+ case WRITE_FILE:
+ if (!i.argument) {
+ *invalid_config = true;
+ log_error("[%s:%u] Write file requires argument.", fname, line);
+ return -EBADMSG;
+ }
+ break;
+
+ case COPY_FILES:
+ if (!i.argument) {
+ i.argument = strappend("/usr/share/factory/", i.path);
+ if (!i.argument)
+ return log_oom();
+ } else if (!path_is_absolute(i.argument)) {
+ *invalid_config = true;
+ log_error("[%s:%u] Source path is not absolute.", fname, line);
+ return -EBADMSG;
+ }
+
+ path_simplify(i.argument, false);
+ break;
+
+ case CREATE_CHAR_DEVICE:
+ case CREATE_BLOCK_DEVICE:
+ if (!i.argument) {
+ *invalid_config = true;
+ log_error("[%s:%u] Device file requires argument.", fname, line);
+ return -EBADMSG;
+ }
+
+ r = parse_dev(i.argument, &i.major_minor);
+ if (r < 0) {
+ *invalid_config = true;
+ log_error_errno(r, "[%s:%u] Can't parse device file major/minor '%s'.", fname, line, i.argument);
+ return -EBADMSG;
+ }
+
+ break;
+
+ case SET_XATTR:
+ case RECURSIVE_SET_XATTR:
+ if (!i.argument) {
+ *invalid_config = true;
+ log_error("[%s:%u] Set extended attribute requires argument.", fname, line);
+ return -EBADMSG;
+ }
+ r = parse_xattrs_from_arg(&i);
+ if (r < 0)
+ return r;
+ break;
+
+ case SET_ACL:
+ case RECURSIVE_SET_ACL:
+ if (!i.argument) {
+ *invalid_config = true;
+ log_error("[%s:%u] Set ACLs requires argument.", fname, line);
+ return -EBADMSG;
+ }
+ r = parse_acls_from_arg(&i);
+ if (r < 0)
+ return r;
+ break;
+
+ case SET_ATTRIBUTE:
+ case RECURSIVE_SET_ATTRIBUTE:
+ if (!i.argument) {
+ *invalid_config = true;
+ log_error("[%s:%u] Set file attribute requires argument.", fname, line);
+ return -EBADMSG;
+ }
+ r = parse_attribute_from_arg(&i);
+ if (IN_SET(r, -EINVAL, -EBADSLT))
+ *invalid_config = true;
+ if (r < 0)
+ return r;
+ break;
+
+ default:
+ log_error("[%s:%u] Unknown command type '%c'.", fname, line, (char) i.type);
+ *invalid_config = true;
+ return -EBADMSG;
+ }
+
+ if (!path_is_absolute(i.path)) {
+ log_error("[%s:%u] Path '%s' not absolute.", fname, line, i.path);
+ *invalid_config = true;
+ return -EBADMSG;
+ }
+
+ path_simplify(i.path, false);
+
+ if (!should_include_path(i.path))
+ return 0;
+
+ r = specifier_expansion_from_arg(&i);
+ if (r == -ENXIO)
+ return log_unresolvable_specifier(fname, line);
+ if (r < 0) {
+ if (IN_SET(r, -EINVAL, -EBADSLT))
+ *invalid_config = true;
+ return log_error_errno(r, "[%s:%u] Failed to substitute specifiers in argument: %m",
+ fname, line);
+ }
+
+ if (arg_root) {
+ char *p;
+
+ p = prefix_root(arg_root, i.path);
+ if (!p)
+ return log_oom();
+
+ free_and_replace(i.path, p);
+ }
+
+ if (!isempty(user) && !streq(user, "-")) {
+ const char *u = user;
+
+ r = get_user_creds(&u, &i.uid, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING);
+ if (r < 0) {
+ *invalid_config = true;
+ return log_error_errno(r, "[%s:%u] Unknown user '%s'.", fname, line, user);
+ }
+
+ i.uid_set = true;
+ }
+
+ if (!isempty(group) && !streq(group, "-")) {
+ const char *g = group;
+
+ r = get_group_creds(&g, &i.gid, USER_CREDS_ALLOW_MISSING);
+ if (r < 0) {
+ *invalid_config = true;
+ log_error("[%s:%u] Unknown group '%s'.", fname, line, group);
+ return r;
+ }
+
+ i.gid_set = true;
+ }
+
+ if (!isempty(mode) && !streq(mode, "-")) {
+ const char *mm = mode;
+ unsigned m;
+
+ if (*mm == '~') {
+ i.mask_perms = true;
+ mm++;
+ }
+
+ if (parse_mode(mm, &m) < 0) {
+ *invalid_config = true;
+ log_error("[%s:%u] Invalid mode '%s'.", fname, line, mode);
+ return -EBADMSG;
+ }
+
+ i.mode = m;
+ i.mode_set = true;
+ } else
+ i.mode = IN_SET(i.type, CREATE_DIRECTORY, TRUNCATE_DIRECTORY, CREATE_SUBVOLUME, CREATE_SUBVOLUME_INHERIT_QUOTA, CREATE_SUBVOLUME_NEW_QUOTA) ? 0755 : 0644;
+
+ if (!isempty(age) && !streq(age, "-")) {
+ const char *a = age;
+
+ if (*a == '~') {
+ i.keep_first_level = true;
+ a++;
+ }
+
+ if (parse_sec(a, &i.age) < 0) {
+ *invalid_config = true;
+ log_error("[%s:%u] Invalid age '%s'.", fname, line, age);
+ return -EBADMSG;
+ }
+
+ i.age_set = true;
+ }
+
+ h = needs_glob(i.type) ? globs : items;
+
+ existing = ordered_hashmap_get(h, i.path);
+ if (existing) {
+ size_t n;
+
+ for (n = 0; n < existing->n_items; n++) {
+ if (!item_compatible(existing->items + n, &i)) {
+ log_notice("[%s:%u] Duplicate line for path \"%s\", ignoring.",
+ fname, line, i.path);
+ return 0;
+ }
+ }
+ } else {
+ existing = new0(ItemArray, 1);
+ if (!existing)
+ return log_oom();
+
+ r = ordered_hashmap_put(h, i.path, existing);
+ if (r < 0) {
+ free(existing);
+ return log_oom();
+ }
+ }
+
+ if (!GREEDY_REALLOC(existing->items, existing->allocated, existing->n_items + 1))
+ return log_oom();
+
+ existing->items[existing->n_items++] = i;
+ i = (struct Item) {};
+
+ /* Sort item array, to enforce stable ordering of application */
+ typesafe_qsort(existing->items, existing->n_items, item_compare);
+
+ return 0;
+}
+
+static int cat_config(char **config_dirs, char **args) {
+ _cleanup_strv_free_ char **files = NULL;
+ int r;
+
+ r = conf_files_list_with_replacement(arg_root, config_dirs, arg_replace, &files, NULL);
+ if (r < 0)
+ return r;
+
+ return cat_files(NULL, files, 0);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-tmpfiles", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CONFIGURATION FILE...]\n\n"
+ "Creates, deletes and cleans up volatile and temporary files and directories.\n\n"
+ " -h --help Show this help\n"
+ " --user Execute user configuration\n"
+ " --version Show package version\n"
+ " --cat-config Show configuration files\n"
+ " --create Create marked files/directories\n"
+ " --clean Clean up marked directories\n"
+ " --remove Remove marked files/directories\n"
+ " --boot Execute actions only safe at boot\n"
+ " --prefix=PATH Only apply rules with the specified prefix\n"
+ " --exclude-prefix=PATH Ignore rules with the specified prefix\n"
+ " --root=PATH Operate on an alternate filesystem root\n"
+ " --replace=PATH Treat arguments as replacement for PATH\n"
+ " --no-pager Do not pipe output into a pager\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_CAT_CONFIG,
+ ARG_USER,
+ ARG_CREATE,
+ ARG_CLEAN,
+ ARG_REMOVE,
+ ARG_BOOT,
+ ARG_PREFIX,
+ ARG_EXCLUDE_PREFIX,
+ ARG_ROOT,
+ ARG_REPLACE,
+ ARG_NO_PAGER,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "user", no_argument, NULL, ARG_USER },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "cat-config", no_argument, NULL, ARG_CAT_CONFIG },
+ { "create", no_argument, NULL, ARG_CREATE },
+ { "clean", no_argument, NULL, ARG_CLEAN },
+ { "remove", no_argument, NULL, ARG_REMOVE },
+ { "boot", no_argument, NULL, ARG_BOOT },
+ { "prefix", required_argument, NULL, ARG_PREFIX },
+ { "exclude-prefix", required_argument, NULL, ARG_EXCLUDE_PREFIX },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "replace", required_argument, NULL, ARG_REPLACE },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_CAT_CONFIG:
+ arg_cat_config = true;
+ break;
+
+ case ARG_USER:
+ arg_user = true;
+ break;
+
+ case ARG_CREATE:
+ arg_operation |= OPERATION_CREATE;
+ break;
+
+ case ARG_CLEAN:
+ arg_operation |= OPERATION_CLEAN;
+ break;
+
+ case ARG_REMOVE:
+ arg_operation |= OPERATION_REMOVE;
+ break;
+
+ case ARG_BOOT:
+ arg_boot = true;
+ break;
+
+ case ARG_PREFIX:
+ if (strv_push(&arg_include_prefixes, optarg) < 0)
+ return log_oom();
+ break;
+
+ case ARG_EXCLUDE_PREFIX:
+ if (strv_push(&arg_exclude_prefixes, optarg) < 0)
+ return log_oom();
+ break;
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, true, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_REPLACE:
+ if (!path_is_absolute(optarg) ||
+ !endswith(optarg, ".conf"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "The argument to --replace= must an absolute path to a config file");
+
+ arg_replace = optarg;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_operation == 0 && !arg_cat_config)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "You need to specify at least one of --clean, --create or --remove.");
+
+ if (arg_replace && arg_cat_config)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --replace= is not supported with --cat-config");
+
+ if (arg_replace && optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "When --replace= is given, some configuration items must be specified");
+
+ return 1;
+}
+
+static int read_config_file(char **config_dirs, const char *fn, bool ignore_enoent, bool *invalid_config) {
+ _cleanup_fclose_ FILE *_f = NULL;
+ Iterator iterator;
+ unsigned v = 0;
+ FILE *f;
+ Item *i;
+ int r = 0;
+
+ assert(fn);
+
+ if (streq(fn, "-")) {
+ log_debug("Reading config from stdin…");
+ fn = "<stdin>";
+ f = stdin;
+ } else {
+ r = search_and_fopen(fn, "re", arg_root, (const char**) config_dirs, &_f);
+ if (r < 0) {
+ if (ignore_enoent && r == -ENOENT) {
+ log_debug_errno(r, "Failed to open \"%s\", ignoring: %m", fn);
+ return 0;
+ }
+
+ return log_error_errno(r, "Failed to open '%s': %m", fn);
+ }
+ log_debug("Reading config file \"%s\"…", fn);
+ f = _f;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ bool invalid_line = false;
+ char *l;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &line);
+ if (k < 0)
+ return log_error_errno(k, "Failed to read '%s': %m", fn);
+ if (k == 0)
+ break;
+
+ v++;
+
+ l = strstrip(line);
+ if (IN_SET(*l, 0, '#'))
+ continue;
+
+ k = parse_line(fn, v, l, &invalid_line);
+ if (k < 0) {
+ if (invalid_line)
+ /* Allow reporting with a special code if the caller requested this */
+ *invalid_config = true;
+ else if (r == 0)
+ /* The first error becomes our return value */
+ r = k;
+ }
+ }
+
+ /* we have to determine age parameter for each entry of type X */
+ ORDERED_HASHMAP_FOREACH(i, globs, iterator) {
+ Iterator iter;
+ Item *j, *candidate_item = NULL;
+
+ if (i->type != IGNORE_DIRECTORY_PATH)
+ continue;
+
+ ORDERED_HASHMAP_FOREACH(j, items, iter) {
+ if (!IN_SET(j->type, CREATE_DIRECTORY, TRUNCATE_DIRECTORY, CREATE_SUBVOLUME, CREATE_SUBVOLUME_INHERIT_QUOTA, CREATE_SUBVOLUME_NEW_QUOTA))
+ continue;
+
+ if (path_equal(j->path, i->path)) {
+ candidate_item = j;
+ break;
+ }
+
+ if ((!candidate_item && path_startswith(i->path, j->path)) ||
+ (candidate_item && path_startswith(j->path, candidate_item->path) && (fnmatch(i->path, j->path, FNM_PATHNAME | FNM_PERIOD) == 0)))
+ candidate_item = j;
+ }
+
+ if (candidate_item && candidate_item->age_set) {
+ i->age = candidate_item->age;
+ i->age_set = true;
+ }
+ }
+
+ if (ferror(f)) {
+ log_error_errno(errno, "Failed to read from file %s: %m", fn);
+ if (r == 0)
+ r = -EIO;
+ }
+
+ return r;
+}
+
+static int parse_arguments(char **config_dirs, char **args, bool *invalid_config) {
+ char **arg;
+ int r;
+
+ STRV_FOREACH(arg, args) {
+ r = read_config_file(config_dirs, *arg, false, invalid_config);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int read_config_files(char **config_dirs, char **args, bool *invalid_config) {
+ _cleanup_strv_free_ char **files = NULL;
+ _cleanup_free_ char *p = NULL;
+ char **f;
+ int r;
+
+ r = conf_files_list_with_replacement(arg_root, config_dirs, arg_replace, &files, &p);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(f, files)
+ if (p && path_equal(*f, p)) {
+ log_debug("Parsing arguments at position \"%s\"…", *f);
+
+ r = parse_arguments(config_dirs, args, invalid_config);
+ if (r < 0)
+ return r;
+ } else
+ /* Just warn, ignore result otherwise.
+ * read_config_file() has some debug output, so no need to print anything. */
+ (void) read_config_file(config_dirs, *f, true, invalid_config);
+
+ return 0;
+}
+
+static int link_parent(ItemArray *a) {
+ const char *path;
+ char *prefix;
+ int r;
+
+ assert(a);
+
+ /* Finds the closest "parent" item array for the specified item array. Then registers the specified item array
+ * as child of it, and fills the parent in, linking them both ways. This allows us to later create parents
+ * before their children, and clean up/remove children before their parents. */
+
+ if (a->n_items <= 0)
+ return 0;
+
+ path = a->items[0].path;
+ prefix = newa(char, strlen(path) + 1);
+ PATH_FOREACH_PREFIX(prefix, path) {
+ ItemArray *j;
+
+ j = ordered_hashmap_get(items, prefix);
+ if (!j)
+ j = ordered_hashmap_get(globs, prefix);
+ if (j) {
+ r = set_ensure_allocated(&j->children, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(j->children, a);
+ if (r < 0)
+ return log_oom();
+
+ a->parent = j;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(item_array_hash_ops, char, string_hash_func, string_compare_func,
+ ItemArray, item_array_free);
+
+static int run(int argc, char *argv[]) {
+ _cleanup_strv_free_ char **config_dirs = NULL;
+ bool invalid_config = false;
+ Iterator iterator;
+ ItemArray *a;
+ enum {
+ PHASE_REMOVE_AND_CLEAN,
+ PHASE_CREATE,
+ _PHASE_MAX
+ } phase;
+ int r, k;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ log_setup_service();
+
+ if (arg_user) {
+ r = user_config_paths(&config_dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize configuration directory list: %m");
+ } else {
+ config_dirs = strv_split_nulstr(CONF_PATHS_NULSTR("tmpfiles.d"));
+ if (!config_dirs)
+ return log_oom();
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *t = NULL;
+
+ t = strv_join(config_dirs, "\n\t");
+ if (t)
+ log_debug("Looking for configuration files in (higher priority first):\n\t%s", t);
+ }
+
+ if (arg_cat_config) {
+ (void) pager_open(arg_pager_flags);
+
+ return cat_config(config_dirs, argv + optind);
+ }
+
+ umask(0022);
+
+ mac_selinux_init();
+
+ items = ordered_hashmap_new(&item_array_hash_ops);
+ globs = ordered_hashmap_new(&item_array_hash_ops);
+ if (!items || !globs)
+ return log_oom();
+
+ /* If command line arguments are specified along with --replace, read all
+ * configuration files and insert the positional arguments at the specified
+ * place. Otherwise, if command line arguments are specified, execute just
+ * them, and finally, without --replace= or any positional arguments, just
+ * read configuration and execute it.
+ */
+ if (arg_replace || optind >= argc)
+ r = read_config_files(config_dirs, argv + optind, &invalid_config);
+ else
+ r = parse_arguments(config_dirs, argv + optind, &invalid_config);
+ if (r < 0)
+ return r;
+
+ /* Let's now link up all child/parent relationships */
+ ORDERED_HASHMAP_FOREACH(a, items, iterator) {
+ r = link_parent(a);
+ if (r < 0)
+ return r;
+ }
+ ORDERED_HASHMAP_FOREACH(a, globs, iterator) {
+ r = link_parent(a);
+ if (r < 0)
+ return r;
+ }
+
+ /* If multiple operations are requested, let's first run the remove/clean operations, and only then the create
+ * operations. i.e. that we first clean out the platform we then build on. */
+ for (phase = 0; phase < _PHASE_MAX; phase++) {
+ OperationMask op;
+
+ if (phase == PHASE_REMOVE_AND_CLEAN)
+ op = arg_operation & (OPERATION_REMOVE|OPERATION_CLEAN);
+ else if (phase == PHASE_CREATE)
+ op = arg_operation & OPERATION_CREATE;
+ else
+ assert_not_reached("unexpected phase");
+
+ if (op == 0) /* Nothing requested in this phase */
+ continue;
+
+ /* The non-globbing ones usually create things, hence we apply them first */
+ ORDERED_HASHMAP_FOREACH(a, items, iterator) {
+ k = process_item_array(a, op);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ /* The globbing ones usually alter things, hence we apply them second. */
+ ORDERED_HASHMAP_FOREACH(a, globs, iterator) {
+ k = process_item_array(a, op);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+ }
+
+ if (ERRNO_IS_RESOURCE(-r))
+ return r;
+ if (invalid_config)
+ return EX_DATAERR;
+ if (r < 0)
+ return EX_CANTCREAT;
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/tty-ask-password-agent/tty-ask-password-agent.c b/src/tty-ask-password-agent/tty-ask-password-agent.c
new file mode 100644
index 0000000..fc165ff
--- /dev/null
+++ b/src/tty-ask-password-agent/tty-ask-password-agent.c
@@ -0,0 +1,869 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2015 Werner Fink
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <sys/prctl.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "ask-password-api.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "io-util.h"
+#include "macro.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+#include "utmp-wtmp.h"
+
+static enum {
+ ACTION_LIST,
+ ACTION_QUERY,
+ ACTION_WATCH,
+ ACTION_WALL
+} arg_action = ACTION_QUERY;
+
+static bool arg_plymouth = false;
+static bool arg_console = false;
+static const char *arg_device = NULL;
+
+static int ask_password_plymouth(
+ const char *message,
+ usec_t until,
+ AskPasswordFlags flags,
+ const char *flag_file,
+ char ***ret) {
+
+ static const union sockaddr_union sa = PLYMOUTH_SOCKET;
+ _cleanup_close_ int fd = -1, notify = -1;
+ _cleanup_free_ char *packet = NULL;
+ ssize_t k;
+ int r, n;
+ struct pollfd pollfd[2] = {};
+ char buffer[LINE_MAX];
+ size_t p = 0;
+ enum {
+ POLL_SOCKET,
+ POLL_INOTIFY
+ };
+
+ assert(ret);
+
+ if (flag_file) {
+ notify = inotify_init1(IN_CLOEXEC|IN_NONBLOCK);
+ if (notify < 0)
+ return -errno;
+
+ r = inotify_add_watch(notify, flag_file, IN_ATTRIB); /* for the link count */
+ if (r < 0)
+ return -errno;
+ }
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return -errno;
+
+ if (flags & ASK_PASSWORD_ACCEPT_CACHED) {
+ packet = strdup("c");
+ n = 1;
+ } else if (asprintf(&packet, "*\002%c%s%n", (int) (strlen(message) + 1), message, &n) < 0)
+ packet = NULL;
+ if (!packet)
+ return -ENOMEM;
+
+ r = loop_write(fd, packet, n + 1, true);
+ if (r < 0)
+ return r;
+
+ pollfd[POLL_SOCKET].fd = fd;
+ pollfd[POLL_SOCKET].events = POLLIN;
+ pollfd[POLL_INOTIFY].fd = notify;
+ pollfd[POLL_INOTIFY].events = POLLIN;
+
+ for (;;) {
+ int sleep_for = -1, j;
+
+ if (until > 0) {
+ usec_t y;
+
+ y = now(CLOCK_MONOTONIC);
+
+ if (y > until) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ sleep_for = (int) ((until - y) / USEC_PER_MSEC);
+ }
+
+ if (flag_file && access(flag_file, F_OK) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ j = poll(pollfd, notify >= 0 ? 2 : 1, sleep_for);
+ if (j < 0) {
+ if (errno == EINTR)
+ continue;
+
+ r = -errno;
+ goto finish;
+ } else if (j == 0) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ if (notify >= 0 && pollfd[POLL_INOTIFY].revents != 0)
+ (void) flush_fd(notify);
+
+ if (pollfd[POLL_SOCKET].revents == 0)
+ continue;
+
+ k = read(fd, buffer + p, sizeof(buffer) - p);
+ if (k < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ continue;
+
+ r = -errno;
+ goto finish;
+ } else if (k == 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ p += k;
+
+ if (p < 1)
+ continue;
+
+ if (buffer[0] == 5) {
+
+ if (flags & ASK_PASSWORD_ACCEPT_CACHED) {
+ /* Hmm, first try with cached
+ * passwords failed, so let's retry
+ * with a normal password request */
+ packet = mfree(packet);
+
+ if (asprintf(&packet, "*\002%c%s%n", (int) (strlen(message) + 1), message, &n) < 0) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = loop_write(fd, packet, n+1, true);
+ if (r < 0)
+ goto finish;
+
+ flags &= ~ASK_PASSWORD_ACCEPT_CACHED;
+ p = 0;
+ continue;
+ }
+
+ /* No password, because UI not shown */
+ r = -ENOENT;
+ goto finish;
+
+ } else if (IN_SET(buffer[0], 2, 9)) {
+ uint32_t size;
+ char **l;
+
+ /* One or more answers */
+ if (p < 5)
+ continue;
+
+ memcpy(&size, buffer+1, sizeof(size));
+ size = le32toh(size);
+ if (size + 5 > sizeof(buffer)) {
+ r = -EIO;
+ goto finish;
+ }
+
+ if (p-5 < size)
+ continue;
+
+ l = strv_parse_nulstr(buffer + 5, size);
+ if (!l) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ *ret = l;
+ break;
+
+ } else {
+ /* Unknown packet */
+ r = -EIO;
+ goto finish;
+ }
+ }
+
+ r = 0;
+
+finish:
+ explicit_bzero_safe(buffer, sizeof(buffer));
+ return r;
+}
+
+static int send_passwords(const char *socket_name, char **passwords) {
+ _cleanup_free_ char *packet = NULL;
+ _cleanup_close_ int socket_fd = -1;
+ union sockaddr_union sa = {};
+ size_t packet_length = 1;
+ char **p, *d;
+ ssize_t n;
+ int r, salen;
+
+ assert(socket_name);
+
+ salen = sockaddr_un_set_path(&sa.un, socket_name);
+ if (salen < 0)
+ return salen;
+
+ STRV_FOREACH(p, passwords)
+ packet_length += strlen(*p) + 1;
+
+ packet = new(char, packet_length);
+ if (!packet)
+ return -ENOMEM;
+
+ packet[0] = '+';
+
+ d = packet + 1;
+ STRV_FOREACH(p, passwords)
+ d = stpcpy(d, *p) + 1;
+
+ socket_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+ if (socket_fd < 0) {
+ r = log_debug_errno(errno, "socket(): %m");
+ goto finish;
+ }
+
+ n = sendto(socket_fd, packet, packet_length, MSG_NOSIGNAL, &sa.sa, salen);
+ if (n < 0) {
+ r = log_debug_errno(errno, "sendto(): %m");
+ goto finish;
+ }
+
+ r = (int) n;
+
+finish:
+ explicit_bzero_safe(packet, packet_length);
+ return r;
+}
+
+static int parse_password(const char *filename, char **wall) {
+ _cleanup_free_ char *socket_name = NULL, *message = NULL;
+ bool accept_cached = false, echo = false;
+ uint64_t not_after = 0;
+ unsigned pid = 0;
+
+ const ConfigTableItem items[] = {
+ { "Ask", "Socket", config_parse_string, 0, &socket_name },
+ { "Ask", "NotAfter", config_parse_uint64, 0, &not_after },
+ { "Ask", "Message", config_parse_string, 0, &message },
+ { "Ask", "PID", config_parse_unsigned, 0, &pid },
+ { "Ask", "AcceptCached", config_parse_bool, 0, &accept_cached },
+ { "Ask", "Echo", config_parse_bool, 0, &echo },
+ {}
+ };
+
+ int r;
+
+ assert(filename);
+
+ r = config_parse(NULL, filename, NULL,
+ NULL,
+ config_item_table_lookup, items,
+ CONFIG_PARSE_RELAXED|CONFIG_PARSE_WARN, NULL);
+ if (r < 0)
+ return r;
+
+ if (!socket_name)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid password file %s", filename);
+
+ if (not_after > 0 && now(CLOCK_MONOTONIC) > not_after)
+ return 0;
+
+ if (pid > 0 && !pid_is_alive(pid))
+ return 0;
+
+ if (arg_action == ACTION_LIST)
+ printf("'%s' (PID %u)\n", message, pid);
+
+ else if (arg_action == ACTION_WALL) {
+ char *_wall;
+
+ if (asprintf(&_wall,
+ "%s%sPassword entry required for \'%s\' (PID %u).\r\n"
+ "Please enter password with the systemd-tty-ask-password-agent tool:",
+ strempty(*wall),
+ *wall ? "\r\n\r\n" : "",
+ message,
+ pid) < 0)
+ return log_oom();
+
+ free(*wall);
+ *wall = _wall;
+
+ } else {
+ _cleanup_strv_free_erase_ char **passwords = NULL;
+
+ assert(IN_SET(arg_action, ACTION_QUERY, ACTION_WATCH));
+
+ if (access(socket_name, W_OK) < 0) {
+ if (arg_action == ACTION_QUERY)
+ log_info("Not querying '%s' (PID %u), lacking privileges.", message, pid);
+
+ return 0;
+ }
+
+ if (arg_plymouth)
+ r = ask_password_plymouth(message, not_after, accept_cached ? ASK_PASSWORD_ACCEPT_CACHED : 0, filename, &passwords);
+ else {
+ int tty_fd = -1;
+
+ if (arg_console) {
+ const char *con = arg_device ?: "/dev/console";
+
+ tty_fd = acquire_terminal(con, ACQUIRE_TERMINAL_WAIT, USEC_INFINITY);
+ if (tty_fd < 0)
+ return log_error_errno(tty_fd, "Failed to acquire %s: %m", con);
+
+ r = reset_terminal_fd(tty_fd, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to reset terminal, ignoring: %m");
+ }
+
+ r = ask_password_tty(tty_fd, message, NULL, not_after,
+ (echo ? ASK_PASSWORD_ECHO : 0) |
+ (arg_console ? ASK_PASSWORD_CONSOLE_COLOR : 0),
+ filename, &passwords);
+
+ if (arg_console) {
+ tty_fd = safe_close(tty_fd);
+ release_terminal();
+ }
+ }
+
+ /* If the query went away, that's OK */
+ if (IN_SET(r, -ETIME, -ENOENT))
+ return 0;
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to query password: %m");
+
+ r = send_passwords(socket_name, passwords);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send: %m");
+ }
+
+ return 0;
+}
+
+static int wall_tty_block(void) {
+ _cleanup_free_ char *p = NULL;
+ dev_t devnr;
+ int fd, r;
+
+ r = get_ctty_devnr(0, &devnr);
+ if (r == -ENXIO) /* We have no controlling tty */
+ return -ENOTTY;
+ if (r < 0)
+ return log_error_errno(r, "Failed to get controlling TTY: %m");
+
+ if (asprintf(&p, "/run/systemd/ask-password-block/%u:%u", major(devnr), minor(devnr)) < 0)
+ return log_oom();
+
+ (void) mkdir_parents_label(p, 0700);
+ (void) mkfifo(p, 0600);
+
+ fd = open(p, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0)
+ return log_debug_errno(errno, "Failed to open %s: %m", p);
+
+ return fd;
+}
+
+static bool wall_tty_match(const char *path, void *userdata) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ if (!path_is_absolute(path))
+ path = strjoina("/dev/", path);
+
+ if (lstat(path, &st) < 0) {
+ log_debug_errno(errno, "Failed to stat %s: %m", path);
+ return true;
+ }
+
+ if (!S_ISCHR(st.st_mode)) {
+ log_debug("%s is not a character device.", path);
+ return true;
+ }
+
+ /* We use named pipes to ensure that wall messages suggesting
+ * password entry are not printed over password prompts
+ * already shown. We use the fact here that opening a pipe in
+ * non-blocking mode for write-only will succeed only if
+ * there's some writer behind it. Using pipes has the
+ * advantage that the block will automatically go away if the
+ * process dies. */
+
+ if (asprintf(&p, "/run/systemd/ask-password-block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0) {
+ log_oom();
+ return true;
+ }
+
+ fd = open(p, O_WRONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0) {
+ log_debug_errno(errno, "Failed to open the wall pipe: %m");
+ return 1;
+ }
+
+ /* What, we managed to open the pipe? Then this tty is filtered. */
+ return 0;
+}
+
+static int show_passwords(void) {
+ _cleanup_closedir_ DIR *d;
+ struct dirent *de;
+ int r = 0;
+
+ d = opendir("/run/systemd/ask-password");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /run/systemd/ask-password: %m");
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return log_error_errno(errno, "Failed to read directory: %m")) {
+ _cleanup_free_ char *p = NULL, *wall = NULL;
+ int q;
+
+ /* We only support /dev on tmpfs, hence we can rely on
+ * d_type to be reliable */
+
+ if (de->d_type != DT_REG)
+ continue;
+
+ if (hidden_or_backup_file(de->d_name))
+ continue;
+
+ if (!startswith(de->d_name, "ask."))
+ continue;
+
+ p = strappend("/run/systemd/ask-password/", de->d_name);
+ if (!p)
+ return log_oom();
+
+ q = parse_password(p, &wall);
+ if (q < 0 && r == 0)
+ r = q;
+
+ if (wall)
+ (void) utmp_wall(wall, NULL, NULL, wall_tty_match, NULL);
+ }
+
+ return r;
+}
+
+static int watch_passwords(void) {
+ enum {
+ FD_INOTIFY,
+ FD_SIGNAL,
+ _FD_MAX
+ };
+
+ _cleanup_close_ int notify = -1, signal_fd = -1, tty_block_fd = -1;
+ struct pollfd pollfd[_FD_MAX] = {};
+ sigset_t mask;
+ int r;
+
+ tty_block_fd = wall_tty_block();
+
+ (void) mkdir_p_label("/run/systemd/ask-password", 0755);
+
+ notify = inotify_init1(IN_CLOEXEC);
+ if (notify < 0)
+ return log_error_errno(errno, "Failed to allocate directory watch: %m");
+
+ if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_CLOSE_WRITE|IN_MOVED_TO) < 0) {
+ if (errno == ENOSPC)
+ return log_error_errno(errno, "Failed to add /run/systemd/ask-password to directory watch: inotify watch limit reached");
+ else
+ return log_error_errno(errno, "Failed to add /run/systemd/ask-password to directory watch: %m");
+ }
+
+ assert_se(sigemptyset(&mask) >= 0);
+ assert_se(sigset_add_many(&mask, SIGINT, SIGTERM, -1) >= 0);
+ assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) >= 0);
+
+ signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
+ if (signal_fd < 0)
+ return log_error_errno(errno, "Failed to allocate signal file descriptor: %m");
+
+ pollfd[FD_INOTIFY].fd = notify;
+ pollfd[FD_INOTIFY].events = POLLIN;
+ pollfd[FD_SIGNAL].fd = signal_fd;
+ pollfd[FD_SIGNAL].events = POLLIN;
+
+ for (;;) {
+ r = show_passwords();
+ if (r < 0)
+ log_error_errno(r, "Failed to show password: %m");
+
+ if (poll(pollfd, _FD_MAX, -1) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return -errno;
+ }
+
+ if (pollfd[FD_INOTIFY].revents != 0)
+ (void) flush_fd(notify);
+
+ if (pollfd[FD_SIGNAL].revents != 0)
+ break;
+ }
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-tty-ask-password-agent", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Process system password requests.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --list Show pending password requests\n"
+ " --query Process pending password requests\n"
+ " --watch Continuously process password requests\n"
+ " --wall Continuously forward password requests to wall\n"
+ " --plymouth Ask question with Plymouth instead of on TTY\n"
+ " --console Ask question on /dev/console instead of current TTY\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_LIST = 0x100,
+ ARG_QUERY,
+ ARG_WATCH,
+ ARG_WALL,
+ ARG_PLYMOUTH,
+ ARG_CONSOLE,
+ ARG_VERSION
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "list", no_argument, NULL, ARG_LIST },
+ { "query", no_argument, NULL, ARG_QUERY },
+ { "watch", no_argument, NULL, ARG_WATCH },
+ { "wall", no_argument, NULL, ARG_WALL },
+ { "plymouth", no_argument, NULL, ARG_PLYMOUTH },
+ { "console", optional_argument, NULL, ARG_CONSOLE },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_LIST:
+ arg_action = ACTION_LIST;
+ break;
+
+ case ARG_QUERY:
+ arg_action = ACTION_QUERY;
+ break;
+
+ case ARG_WATCH:
+ arg_action = ACTION_WATCH;
+ break;
+
+ case ARG_WALL:
+ arg_action = ACTION_WALL;
+ break;
+
+ case ARG_PLYMOUTH:
+ arg_plymouth = true;
+ break;
+
+ case ARG_CONSOLE:
+ arg_console = true;
+ if (optarg) {
+
+ if (isempty(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Empty console device path is not allowed.");
+
+ arg_device = optarg;
+ }
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind != argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s takes no arguments.", program_invocation_short_name);
+
+ if (arg_plymouth || arg_console) {
+
+ if (!IN_SET(arg_action, ACTION_QUERY, ACTION_WATCH))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Options --query and --watch conflict.");
+
+ if (arg_plymouth && arg_console)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Options --plymouth and --console conflict.");
+ }
+
+ return 1;
+}
+
+/*
+ * To be able to ask on all terminal devices of /dev/console
+ * the devices are collected. If more than one device is found,
+ * then on each of the terminals a inquiring task is forked.
+ * Every task has its own session and its own controlling terminal.
+ * If one of the tasks does handle a password, the remaining tasks
+ * will be terminated.
+ */
+static int ask_on_this_console(const char *tty, pid_t *ret_pid, int argc, char *argv[]) {
+ struct sigaction sig = {
+ .sa_handler = nop_signal_handler,
+ .sa_flags = SA_NOCLDSTOP | SA_RESTART,
+ };
+ pid_t pid;
+ int r;
+
+ assert_se(sigprocmask_many(SIG_UNBLOCK, NULL, SIGHUP, SIGCHLD, -1) >= 0);
+
+ assert_se(sigemptyset(&sig.sa_mask) >= 0);
+ assert_se(sigaction(SIGCHLD, &sig, NULL) >= 0);
+
+ sig.sa_handler = SIG_DFL;
+ assert_se(sigaction(SIGHUP, &sig, NULL) >= 0);
+
+ r = safe_fork("(sd-passwd)", FORK_RESET_SIGNALS|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ int ac;
+
+ assert_se(prctl(PR_SET_PDEATHSIG, SIGHUP) >= 0);
+
+ for (ac = 0; ac < argc; ac++) {
+ if (streq(argv[ac], "--console")) {
+ argv[ac] = strjoina("--console=", tty);
+ break;
+ }
+ }
+
+ assert(ac < argc);
+
+ execv(SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH, argv);
+ _exit(EXIT_FAILURE);
+ }
+
+ *ret_pid = pid;
+ return 0;
+}
+
+static void terminate_agents(Set *pids) {
+ struct timespec ts;
+ siginfo_t status = {};
+ sigset_t set;
+ Iterator i;
+ void *p;
+ int r, signum;
+
+ /*
+ * Request termination of the remaining processes as those
+ * are not required anymore.
+ */
+ SET_FOREACH(p, pids, i)
+ (void) kill(PTR_TO_PID(p), SIGTERM);
+
+ /*
+ * Collect the processes which have go away.
+ */
+ assert_se(sigemptyset(&set) >= 0);
+ assert_se(sigaddset(&set, SIGCHLD) >= 0);
+ timespec_store(&ts, 50 * USEC_PER_MSEC);
+
+ while (!set_isempty(pids)) {
+
+ zero(status);
+ r = waitid(P_ALL, 0, &status, WEXITED|WNOHANG);
+ if (r < 0 && errno == EINTR)
+ continue;
+
+ if (r == 0 && status.si_pid > 0) {
+ set_remove(pids, PID_TO_PTR(status.si_pid));
+ continue;
+ }
+
+ signum = sigtimedwait(&set, NULL, &ts);
+ if (signum < 0) {
+ if (errno != EAGAIN)
+ log_error_errno(errno, "sigtimedwait() failed: %m");
+ break;
+ }
+ assert(signum == SIGCHLD);
+ }
+
+ /*
+ * Kill hanging processes.
+ */
+ SET_FOREACH(p, pids, i) {
+ log_warning("Failed to terminate child %d, killing it", PTR_TO_PID(p));
+ (void) kill(PTR_TO_PID(p), SIGKILL);
+ }
+}
+
+static int ask_on_consoles(int argc, char *argv[]) {
+ _cleanup_set_free_ Set *pids = NULL;
+ _cleanup_strv_free_ char **consoles = NULL;
+ siginfo_t status = {};
+ char **tty;
+ pid_t pid;
+ int r;
+
+ r = get_kernel_consoles(&consoles);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine devices of /dev/console: %m");
+
+ pids = set_new(NULL);
+ if (!pids)
+ return log_oom();
+
+ /* Start an agent on each console. */
+ STRV_FOREACH(tty, consoles) {
+ r = ask_on_this_console(*tty, &pid, argc, argv);
+ if (r < 0)
+ return r;
+
+ if (set_put(pids, PID_TO_PTR(pid)) < 0)
+ return log_oom();
+ }
+
+ /* Wait for an agent to exit. */
+ for (;;) {
+ zero(status);
+
+ if (waitid(P_ALL, 0, &status, WEXITED) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return log_error_errno(errno, "waitid() failed: %m");
+ }
+
+ set_remove(pids, PID_TO_PTR(status.si_pid));
+ break;
+ }
+
+ if (!is_clean_exit(status.si_code, status.si_status, EXIT_CLEAN_DAEMON, NULL))
+ log_error("Password agent failed with: %d", status.si_status);
+
+ terminate_agents(pids);
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_console && !arg_device)
+ /*
+ * Spawn a separate process for each console device.
+ */
+ return ask_on_consoles(argc, argv);
+
+ if (arg_device) {
+ /*
+ * Later on, a controlling terminal will be acquired,
+ * therefore the current process has to become a session
+ * leader and should not have a controlling terminal already.
+ */
+ (void) setsid();
+ (void) release_terminal();
+ }
+
+ if (IN_SET(arg_action, ACTION_WATCH, ACTION_WALL))
+ return watch_passwords();
+ else
+ return show_passwords();
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/udev/.vimrc b/src/udev/.vimrc
new file mode 100644
index 0000000..366fbdc
--- /dev/null
+++ b/src/udev/.vimrc
@@ -0,0 +1,4 @@
+" 'set exrc' in ~/.vimrc will read .vimrc from the current directory
+set tabstop=8
+set shiftwidth=8
+set expandtab
diff --git a/src/udev/ata_id/ata_id.c b/src/udev/ata_id/ata_id.c
new file mode 100644
index 0000000..6c2233e
--- /dev/null
+++ b/src/udev/ata_id/ata_id.c
@@ -0,0 +1,651 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * ata_id - reads product/serial number from ATA drives
+ *
+ * Copyright © 2009-2010 David Zeuthen <zeuthen@gmail.com>
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/bsg.h>
+#include <linux/hdreg.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/sg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "libudev-util.h"
+#include "log.h"
+#include "udev-util.h"
+#include "util.h"
+
+#define COMMAND_TIMEOUT_MSEC (30 * 1000)
+
+static int disk_scsi_inquiry_command(
+ int fd,
+ void *buf,
+ size_t buf_len) {
+
+ uint8_t cdb[6] = {
+ /* INQUIRY, see SPC-4 section 6.4 */
+ [0] = 0x12, /* OPERATION CODE: INQUIRY */
+ [3] = (buf_len >> 8), /* ALLOCATION LENGTH */
+ [4] = (buf_len & 0xff),
+ };
+ uint8_t sense[32] = {};
+ struct sg_io_v4 io_v4 = {
+ .guard = 'Q',
+ .protocol = BSG_PROTOCOL_SCSI,
+ .subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD,
+ .request_len = sizeof(cdb),
+ .request = (uintptr_t) cdb,
+ .max_response_len = sizeof(sense),
+ .response = (uintptr_t) sense,
+ .din_xfer_len = buf_len,
+ .din_xferp = (uintptr_t) buf,
+ .timeout = COMMAND_TIMEOUT_MSEC,
+ };
+ int ret;
+
+ ret = ioctl(fd, SG_IO, &io_v4);
+ if (ret != 0) {
+ /* could be that the driver doesn't do version 4, try version 3 */
+ if (errno == EINVAL) {
+ struct sg_io_hdr io_hdr = {
+ .interface_id = 'S',
+ .cmdp = (unsigned char*) cdb,
+ .cmd_len = sizeof (cdb),
+ .dxferp = buf,
+ .dxfer_len = buf_len,
+ .sbp = sense,
+ .mx_sb_len = sizeof(sense),
+ .dxfer_direction = SG_DXFER_FROM_DEV,
+ .timeout = COMMAND_TIMEOUT_MSEC,
+ };
+
+ ret = ioctl(fd, SG_IO, &io_hdr);
+ if (ret != 0)
+ return ret;
+
+ /* even if the ioctl succeeds, we need to check the return value */
+ if (!(io_hdr.status == 0 &&
+ io_hdr.host_status == 0 &&
+ io_hdr.driver_status == 0)) {
+ errno = EIO;
+ return -1;
+ }
+ } else
+ return ret;
+ }
+
+ /* even if the ioctl succeeds, we need to check the return value */
+ if (!(io_v4.device_status == 0 &&
+ io_v4.transport_status == 0 &&
+ io_v4.driver_status == 0)) {
+ errno = EIO;
+ return -1;
+ }
+
+ return 0;
+}
+
+static int disk_identify_command(
+ int fd,
+ void *buf,
+ size_t buf_len) {
+
+ uint8_t cdb[12] = {
+ /*
+ * ATA Pass-Through 12 byte command, as described in
+ *
+ * T10 04-262r8 ATA Command Pass-Through
+ *
+ * from http://www.t10.org/ftp/t10/document.04/04-262r8.pdf
+ */
+ [0] = 0xa1, /* OPERATION CODE: 12 byte pass through */
+ [1] = 4 << 1, /* PROTOCOL: PIO Data-in */
+ [2] = 0x2e, /* OFF_LINE=0, CK_COND=1, T_DIR=1, BYT_BLOK=1, T_LENGTH=2 */
+ [3] = 0, /* FEATURES */
+ [4] = 1, /* SECTORS */
+ [5] = 0, /* LBA LOW */
+ [6] = 0, /* LBA MID */
+ [7] = 0, /* LBA HIGH */
+ [8] = 0 & 0x4F, /* SELECT */
+ [9] = 0xEC, /* Command: ATA IDENTIFY DEVICE */
+ };
+ uint8_t sense[32] = {};
+ uint8_t *desc = sense + 8;
+ struct sg_io_v4 io_v4 = {
+ .guard = 'Q',
+ .protocol = BSG_PROTOCOL_SCSI,
+ .subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD,
+ .request_len = sizeof(cdb),
+ .request = (uintptr_t) cdb,
+ .max_response_len = sizeof(sense),
+ .response = (uintptr_t) sense,
+ .din_xfer_len = buf_len,
+ .din_xferp = (uintptr_t) buf,
+ .timeout = COMMAND_TIMEOUT_MSEC,
+ };
+ int ret;
+
+ ret = ioctl(fd, SG_IO, &io_v4);
+ if (ret != 0) {
+ /* could be that the driver doesn't do version 4, try version 3 */
+ if (errno == EINVAL) {
+ struct sg_io_hdr io_hdr = {
+ .interface_id = 'S',
+ .cmdp = (unsigned char*) cdb,
+ .cmd_len = sizeof (cdb),
+ .dxferp = buf,
+ .dxfer_len = buf_len,
+ .sbp = sense,
+ .mx_sb_len = sizeof (sense),
+ .dxfer_direction = SG_DXFER_FROM_DEV,
+ .timeout = COMMAND_TIMEOUT_MSEC,
+ };
+
+ ret = ioctl(fd, SG_IO, &io_hdr);
+ if (ret != 0)
+ return ret;
+ } else
+ return ret;
+ }
+
+ if (!(sense[0] == 0x72 && desc[0] == 0x9 && desc[1] == 0x0c)) {
+ errno = EIO;
+ return -1;
+ }
+
+ return 0;
+}
+
+static int disk_identify_packet_device_command(
+ int fd,
+ void *buf,
+ size_t buf_len) {
+
+ uint8_t cdb[16] = {
+ /*
+ * ATA Pass-Through 16 byte command, as described in
+ *
+ * T10 04-262r8 ATA Command Pass-Through
+ *
+ * from http://www.t10.org/ftp/t10/document.04/04-262r8.pdf
+ */
+ [0] = 0x85, /* OPERATION CODE: 16 byte pass through */
+ [1] = 4 << 1, /* PROTOCOL: PIO Data-in */
+ [2] = 0x2e, /* OFF_LINE=0, CK_COND=1, T_DIR=1, BYT_BLOK=1, T_LENGTH=2 */
+ [3] = 0, /* FEATURES */
+ [4] = 0, /* FEATURES */
+ [5] = 0, /* SECTORS */
+ [6] = 1, /* SECTORS */
+ [7] = 0, /* LBA LOW */
+ [8] = 0, /* LBA LOW */
+ [9] = 0, /* LBA MID */
+ [10] = 0, /* LBA MID */
+ [11] = 0, /* LBA HIGH */
+ [12] = 0, /* LBA HIGH */
+ [13] = 0, /* DEVICE */
+ [14] = 0xA1, /* Command: ATA IDENTIFY PACKET DEVICE */
+ [15] = 0, /* CONTROL */
+ };
+ uint8_t sense[32] = {};
+ uint8_t *desc = sense + 8;
+ struct sg_io_v4 io_v4 = {
+ .guard = 'Q',
+ .protocol = BSG_PROTOCOL_SCSI,
+ .subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD,
+ .request_len = sizeof (cdb),
+ .request = (uintptr_t) cdb,
+ .max_response_len = sizeof (sense),
+ .response = (uintptr_t) sense,
+ .din_xfer_len = buf_len,
+ .din_xferp = (uintptr_t) buf,
+ .timeout = COMMAND_TIMEOUT_MSEC,
+ };
+ int ret;
+
+ ret = ioctl(fd, SG_IO, &io_v4);
+ if (ret != 0) {
+ /* could be that the driver doesn't do version 4, try version 3 */
+ if (errno == EINVAL) {
+ struct sg_io_hdr io_hdr = {
+ .interface_id = 'S',
+ .cmdp = (unsigned char*) cdb,
+ .cmd_len = sizeof (cdb),
+ .dxferp = buf,
+ .dxfer_len = buf_len,
+ .sbp = sense,
+ .mx_sb_len = sizeof (sense),
+ .dxfer_direction = SG_DXFER_FROM_DEV,
+ .timeout = COMMAND_TIMEOUT_MSEC,
+ };
+
+ ret = ioctl(fd, SG_IO, &io_hdr);
+ if (ret != 0)
+ return ret;
+ } else
+ return ret;
+ }
+
+ if (!(sense[0] == 0x72 && desc[0] == 0x9 && desc[1] == 0x0c)) {
+ errno = EIO;
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * disk_identify_get_string:
+ * @identify: A block of IDENTIFY data
+ * @offset_words: Offset of the string to get, in words.
+ * @dest: Destination buffer for the string.
+ * @dest_len: Length of destination buffer, in bytes.
+ *
+ * Copies the ATA string from @identify located at @offset_words into @dest.
+ */
+static void disk_identify_get_string(
+ uint8_t identify[512],
+ unsigned offset_words,
+ char *dest,
+ size_t dest_len) {
+
+ unsigned c1;
+ unsigned c2;
+
+ while (dest_len > 0) {
+ c1 = identify[offset_words * 2 + 1];
+ c2 = identify[offset_words * 2];
+ *dest = c1;
+ dest++;
+ *dest = c2;
+ dest++;
+ offset_words++;
+ dest_len -= 2;
+ }
+}
+
+static void disk_identify_fixup_string(
+ uint8_t identify[512],
+ unsigned offset_words,
+ size_t len) {
+ disk_identify_get_string(identify, offset_words,
+ (char *) identify + offset_words * 2, len);
+}
+
+static void disk_identify_fixup_uint16 (uint8_t identify[512], unsigned offset_words) {
+ uint16_t *p;
+
+ p = (uint16_t *) identify;
+ p[offset_words] = le16toh (p[offset_words]);
+}
+
+/**
+ * disk_identify:
+ * @fd: File descriptor for the block device.
+ * @out_identify: Return location for IDENTIFY data.
+ * @out_is_packet_device: Return location for whether returned data is from a IDENTIFY PACKET DEVICE.
+ *
+ * Sends the IDENTIFY DEVICE or IDENTIFY PACKET DEVICE command to the
+ * device represented by @fd. If successful, then the result will be
+ * copied into @out_identify and @out_is_packet_device.
+ *
+ * This routine is based on code from libatasmart, LGPL v2.1.
+ *
+ * Returns: 0 if the data was successfully obtained, otherwise
+ * non-zero with errno set.
+ */
+static int disk_identify(int fd,
+ uint8_t out_identify[512],
+ int *out_is_packet_device) {
+ int ret;
+ uint8_t inquiry_buf[36];
+ int peripheral_device_type;
+ int all_nul_bytes;
+ int n;
+ int is_packet_device = 0;
+
+ /* init results */
+ memzero(out_identify, 512);
+
+ /* If we were to use ATA PASS_THROUGH (12) on an ATAPI device
+ * we could accidentally blank media. This is because MMC's BLANK
+ * command has the same op-code (0x61).
+ *
+ * To prevent this from happening we bail out if the device
+ * isn't a Direct Access Block Device, e.g. SCSI type 0x00
+ * (CD/DVD devices are type 0x05). So we send a SCSI INQUIRY
+ * command first... libata is handling this via its SCSI
+ * emulation layer.
+ *
+ * This also ensures that we're actually dealing with a device
+ * that understands SCSI commands.
+ *
+ * (Yes, it is a bit perverse that we're tunneling the ATA
+ * command through SCSI and relying on the ATA driver
+ * emulating SCSI well-enough...)
+ *
+ * (See commit 160b069c25690bfb0c785994c7c3710289179107 for
+ * the original bug-fix and see http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=556635
+ * for the original bug-report.)
+ */
+ ret = disk_scsi_inquiry_command (fd, inquiry_buf, sizeof (inquiry_buf));
+ if (ret != 0)
+ goto out;
+
+ /* SPC-4, section 6.4.2: Standard INQUIRY data */
+ peripheral_device_type = inquiry_buf[0] & 0x1f;
+ if (peripheral_device_type == 0x05)
+ {
+ is_packet_device = 1;
+ ret = disk_identify_packet_device_command(fd, out_identify, 512);
+ goto check_nul_bytes;
+ }
+ if (peripheral_device_type != 0x00) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* OK, now issue the IDENTIFY DEVICE command */
+ ret = disk_identify_command(fd, out_identify, 512);
+ if (ret != 0)
+ goto out;
+
+ check_nul_bytes:
+ /* Check if IDENTIFY data is all NUL bytes - if so, bail */
+ all_nul_bytes = 1;
+ for (n = 0; n < 512; n++) {
+ if (out_identify[n] != '\0') {
+ all_nul_bytes = 0;
+ break;
+ }
+ }
+
+ if (all_nul_bytes) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+out:
+ if (out_is_packet_device != NULL)
+ *out_is_packet_device = is_packet_device;
+ return ret;
+}
+
+int main(int argc, char *argv[]) {
+ struct hd_driveid id;
+ union {
+ uint8_t byte[512];
+ uint16_t wyde[256];
+ } identify;
+ char model[41];
+ char model_enc[256];
+ char serial[21];
+ char revision[9];
+ const char *node = NULL;
+ int export = 0;
+ _cleanup_close_ int fd = -1;
+ uint16_t word;
+ int is_packet_device = 0;
+ static const struct option options[] = {
+ { "export", no_argument, NULL, 'x' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ log_set_target(LOG_TARGET_AUTO);
+ udev_parse_config();
+ log_parse_environment();
+ log_open();
+
+ for (;;) {
+ int option;
+
+ option = getopt_long(argc, argv, "xh", options, NULL);
+ if (option == -1)
+ break;
+
+ switch (option) {
+ case 'x':
+ export = 1;
+ break;
+ case 'h':
+ printf("Usage: ata_id [--export] [--help] <device>\n"
+ " -x,--export print values as environment keys\n"
+ " -h,--help print this help text\n\n");
+ return 0;
+ }
+ }
+
+ node = argv[optind];
+ if (node == NULL) {
+ log_error("no node specified");
+ return 1;
+ }
+
+ fd = open(node, O_RDONLY|O_NONBLOCK|O_CLOEXEC);
+ if (fd < 0) {
+ log_error("unable to open '%s'", node);
+ return 1;
+ }
+
+ if (disk_identify(fd, identify.byte, &is_packet_device) == 0) {
+ /*
+ * fix up only the fields from the IDENTIFY data that we are going to
+ * use and copy it into the hd_driveid struct for convenience
+ */
+ disk_identify_fixup_string(identify.byte, 10, 20); /* serial */
+ disk_identify_fixup_string(identify.byte, 23, 8); /* fwrev */
+ disk_identify_fixup_string(identify.byte, 27, 40); /* model */
+ disk_identify_fixup_uint16(identify.byte, 0); /* configuration */
+ disk_identify_fixup_uint16(identify.byte, 75); /* queue depth */
+ disk_identify_fixup_uint16(identify.byte, 76); /* SATA capabilities */
+ disk_identify_fixup_uint16(identify.byte, 82); /* command set supported */
+ disk_identify_fixup_uint16(identify.byte, 83); /* command set supported */
+ disk_identify_fixup_uint16(identify.byte, 84); /* command set supported */
+ disk_identify_fixup_uint16(identify.byte, 85); /* command set supported */
+ disk_identify_fixup_uint16(identify.byte, 86); /* command set supported */
+ disk_identify_fixup_uint16(identify.byte, 87); /* command set supported */
+ disk_identify_fixup_uint16(identify.byte, 89); /* time required for SECURITY ERASE UNIT */
+ disk_identify_fixup_uint16(identify.byte, 90); /* time required for enhanced SECURITY ERASE UNIT */
+ disk_identify_fixup_uint16(identify.byte, 91); /* current APM values */
+ disk_identify_fixup_uint16(identify.byte, 94); /* current AAM value */
+ disk_identify_fixup_uint16(identify.byte, 108); /* WWN */
+ disk_identify_fixup_uint16(identify.byte, 109); /* WWN */
+ disk_identify_fixup_uint16(identify.byte, 110); /* WWN */
+ disk_identify_fixup_uint16(identify.byte, 111); /* WWN */
+ disk_identify_fixup_uint16(identify.byte, 128); /* device lock function */
+ disk_identify_fixup_uint16(identify.byte, 217); /* nominal media rotation rate */
+ memcpy(&id, identify.byte, sizeof id);
+ } else {
+ /* If this fails, then try HDIO_GET_IDENTITY */
+ if (ioctl(fd, HDIO_GET_IDENTITY, &id) != 0) {
+ log_debug_errno(errno, "HDIO_GET_IDENTITY failed for '%s': %m", node);
+ return 2;
+ }
+ }
+
+ memcpy(model, id.model, 40);
+ model[40] = '\0';
+ udev_util_encode_string(model, model_enc, sizeof(model_enc));
+ util_replace_whitespace((char *) id.model, model, 40);
+ util_replace_chars(model, NULL);
+ util_replace_whitespace((char *) id.serial_no, serial, 20);
+ util_replace_chars(serial, NULL);
+ util_replace_whitespace((char *) id.fw_rev, revision, 8);
+ util_replace_chars(revision, NULL);
+
+ if (export) {
+ /* Set this to convey the disk speaks the ATA protocol */
+ printf("ID_ATA=1\n");
+
+ if ((id.config >> 8) & 0x80) {
+ /* This is an ATAPI device */
+ switch ((id.config >> 8) & 0x1f) {
+ case 0:
+ printf("ID_TYPE=cd\n");
+ break;
+ case 1:
+ printf("ID_TYPE=tape\n");
+ break;
+ case 5:
+ printf("ID_TYPE=cd\n");
+ break;
+ case 7:
+ printf("ID_TYPE=optical\n");
+ break;
+ default:
+ printf("ID_TYPE=generic\n");
+ break;
+ }
+ } else {
+ printf("ID_TYPE=disk\n");
+ }
+ printf("ID_BUS=ata\n");
+ printf("ID_MODEL=%s\n", model);
+ printf("ID_MODEL_ENC=%s\n", model_enc);
+ printf("ID_REVISION=%s\n", revision);
+ if (serial[0] != '\0') {
+ printf("ID_SERIAL=%s_%s\n", model, serial);
+ printf("ID_SERIAL_SHORT=%s\n", serial);
+ } else {
+ printf("ID_SERIAL=%s\n", model);
+ }
+
+ if (id.command_set_1 & (1<<5)) {
+ printf("ID_ATA_WRITE_CACHE=1\n");
+ printf("ID_ATA_WRITE_CACHE_ENABLED=%d\n", (id.cfs_enable_1 & (1<<5)) ? 1 : 0);
+ }
+ if (id.command_set_1 & (1<<10)) {
+ printf("ID_ATA_FEATURE_SET_HPA=1\n");
+ printf("ID_ATA_FEATURE_SET_HPA_ENABLED=%d\n", (id.cfs_enable_1 & (1<<10)) ? 1 : 0);
+
+ /*
+ * TODO: use the READ NATIVE MAX ADDRESS command to get the native max address
+ * so it is easy to check whether the protected area is in use.
+ */
+ }
+ if (id.command_set_1 & (1<<3)) {
+ printf("ID_ATA_FEATURE_SET_PM=1\n");
+ printf("ID_ATA_FEATURE_SET_PM_ENABLED=%d\n", (id.cfs_enable_1 & (1<<3)) ? 1 : 0);
+ }
+ if (id.command_set_1 & (1<<1)) {
+ printf("ID_ATA_FEATURE_SET_SECURITY=1\n");
+ printf("ID_ATA_FEATURE_SET_SECURITY_ENABLED=%d\n", (id.cfs_enable_1 & (1<<1)) ? 1 : 0);
+ printf("ID_ATA_FEATURE_SET_SECURITY_ERASE_UNIT_MIN=%d\n", id.trseuc * 2);
+ if ((id.cfs_enable_1 & (1<<1))) /* enabled */ {
+ if (id.dlf & (1<<8))
+ printf("ID_ATA_FEATURE_SET_SECURITY_LEVEL=maximum\n");
+ else
+ printf("ID_ATA_FEATURE_SET_SECURITY_LEVEL=high\n");
+ }
+ if (id.dlf & (1<<5))
+ printf("ID_ATA_FEATURE_SET_SECURITY_ENHANCED_ERASE_UNIT_MIN=%d\n", id.trsEuc * 2);
+ if (id.dlf & (1<<4))
+ printf("ID_ATA_FEATURE_SET_SECURITY_EXPIRE=1\n");
+ if (id.dlf & (1<<3))
+ printf("ID_ATA_FEATURE_SET_SECURITY_FROZEN=1\n");
+ if (id.dlf & (1<<2))
+ printf("ID_ATA_FEATURE_SET_SECURITY_LOCKED=1\n");
+ }
+ if (id.command_set_1 & (1<<0)) {
+ printf("ID_ATA_FEATURE_SET_SMART=1\n");
+ printf("ID_ATA_FEATURE_SET_SMART_ENABLED=%d\n", (id.cfs_enable_1 & (1<<0)) ? 1 : 0);
+ }
+ if (id.command_set_2 & (1<<9)) {
+ printf("ID_ATA_FEATURE_SET_AAM=1\n");
+ printf("ID_ATA_FEATURE_SET_AAM_ENABLED=%d\n", (id.cfs_enable_2 & (1<<9)) ? 1 : 0);
+ printf("ID_ATA_FEATURE_SET_AAM_VENDOR_RECOMMENDED_VALUE=%d\n", id.acoustic >> 8);
+ printf("ID_ATA_FEATURE_SET_AAM_CURRENT_VALUE=%d\n", id.acoustic & 0xff);
+ }
+ if (id.command_set_2 & (1<<5)) {
+ printf("ID_ATA_FEATURE_SET_PUIS=1\n");
+ printf("ID_ATA_FEATURE_SET_PUIS_ENABLED=%d\n", (id.cfs_enable_2 & (1<<5)) ? 1 : 0);
+ }
+ if (id.command_set_2 & (1<<3)) {
+ printf("ID_ATA_FEATURE_SET_APM=1\n");
+ printf("ID_ATA_FEATURE_SET_APM_ENABLED=%d\n", (id.cfs_enable_2 & (1<<3)) ? 1 : 0);
+ if ((id.cfs_enable_2 & (1<<3)))
+ printf("ID_ATA_FEATURE_SET_APM_CURRENT_VALUE=%d\n", id.CurAPMvalues & 0xff);
+ }
+ if (id.command_set_2 & (1<<0))
+ printf("ID_ATA_DOWNLOAD_MICROCODE=1\n");
+
+ /*
+ * Word 76 indicates the capabilities of a SATA device. A PATA device shall set
+ * word 76 to 0000h or FFFFh. If word 76 is set to 0000h or FFFFh, then
+ * the device does not claim compliance with the Serial ATA specification and words
+ * 76 through 79 are not valid and shall be ignored.
+ */
+
+ word = identify.wyde[76];
+ if (!IN_SET(word, 0x0000, 0xffff)) {
+ printf("ID_ATA_SATA=1\n");
+ /*
+ * If bit 2 of word 76 is set to one, then the device supports the Gen2
+ * signaling rate of 3.0 Gb/s (see SATA 2.6).
+ *
+ * If bit 1 of word 76 is set to one, then the device supports the Gen1
+ * signaling rate of 1.5 Gb/s (see SATA 2.6).
+ */
+ if (word & (1<<2))
+ printf("ID_ATA_SATA_SIGNAL_RATE_GEN2=1\n");
+ if (word & (1<<1))
+ printf("ID_ATA_SATA_SIGNAL_RATE_GEN1=1\n");
+ }
+
+ /* Word 217 indicates the nominal media rotation rate of the device */
+ word = identify.wyde[217];
+ if (word == 0x0001)
+ printf ("ID_ATA_ROTATION_RATE_RPM=0\n"); /* non-rotating e.g. SSD */
+ else if (word >= 0x0401 && word <= 0xfffe)
+ printf ("ID_ATA_ROTATION_RATE_RPM=%d\n", word);
+
+ /*
+ * Words 108-111 contain a mandatory World Wide Name (WWN) in the NAA IEEE Registered identifier
+ * format. Word 108 bits (15:12) shall contain 5h, indicating that the naming authority is IEEE.
+ * All other values are reserved.
+ */
+ word = identify.wyde[108];
+ if ((word & 0xf000) == 0x5000) {
+ uint64_t wwwn;
+
+ wwwn = identify.wyde[108];
+ wwwn <<= 16;
+ wwwn |= identify.wyde[109];
+ wwwn <<= 16;
+ wwwn |= identify.wyde[110];
+ wwwn <<= 16;
+ wwwn |= identify.wyde[111];
+ printf("ID_WWN=0x%1$" PRIx64 "\n"
+ "ID_WWN_WITH_EXTENSION=0x%1$" PRIx64 "\n",
+ wwwn);
+ }
+
+ /* from Linux's include/linux/ata.h */
+ if (IN_SET(identify.wyde[0], 0x848a, 0x844a) ||
+ (identify.wyde[83] & 0xc004) == 0x4004)
+ printf("ID_ATA_CFA=1\n");
+ } else {
+ if (serial[0] != '\0')
+ printf("%s_%s\n", model, serial);
+ else
+ printf("%s\n", model);
+ }
+
+ return 0;
+}
diff --git a/src/udev/cdrom_id/cdrom_id.c b/src/udev/cdrom_id/cdrom_id.c
new file mode 100644
index 0000000..aed24a3
--- /dev/null
+++ b/src/udev/cdrom_id/cdrom_id.c
@@ -0,0 +1,1048 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * cdrom_id - optical drive and media information prober
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/cdrom.h>
+#include <scsi/sg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "random-util.h"
+#include "udev-util.h"
+#include "util.h"
+
+/* device info */
+static unsigned cd_cd_rom;
+static unsigned cd_cd_r;
+static unsigned cd_cd_rw;
+static unsigned cd_dvd_rom;
+static unsigned cd_dvd_r;
+static unsigned cd_dvd_rw;
+static unsigned cd_dvd_ram;
+static unsigned cd_dvd_plus_r;
+static unsigned cd_dvd_plus_rw;
+static unsigned cd_dvd_plus_r_dl;
+static unsigned cd_dvd_plus_rw_dl;
+static unsigned cd_bd;
+static unsigned cd_bd_r;
+static unsigned cd_bd_re;
+static unsigned cd_hddvd;
+static unsigned cd_hddvd_r;
+static unsigned cd_hddvd_rw;
+static unsigned cd_mo;
+static unsigned cd_mrw;
+static unsigned cd_mrw_w;
+
+/* media info */
+static unsigned cd_media;
+static unsigned cd_media_cd_rom;
+static unsigned cd_media_cd_r;
+static unsigned cd_media_cd_rw;
+static unsigned cd_media_dvd_rom;
+static unsigned cd_media_dvd_r;
+static unsigned cd_media_dvd_rw;
+static unsigned cd_media_dvd_rw_ro; /* restricted overwrite mode */
+static unsigned cd_media_dvd_rw_seq; /* sequential mode */
+static unsigned cd_media_dvd_ram;
+static unsigned cd_media_dvd_plus_r;
+static unsigned cd_media_dvd_plus_rw;
+static unsigned cd_media_dvd_plus_r_dl;
+static unsigned cd_media_dvd_plus_rw_dl;
+static unsigned cd_media_bd;
+static unsigned cd_media_bd_r;
+static unsigned cd_media_bd_re;
+static unsigned cd_media_hddvd;
+static unsigned cd_media_hddvd_r;
+static unsigned cd_media_hddvd_rw;
+static unsigned cd_media_mo;
+static unsigned cd_media_mrw;
+static unsigned cd_media_mrw_w;
+
+static const char *cd_media_state = NULL;
+static unsigned cd_media_session_next;
+static unsigned cd_media_session_count;
+static unsigned cd_media_track_count;
+static unsigned cd_media_track_count_data;
+static unsigned cd_media_track_count_audio;
+static unsigned long long int cd_media_session_last_offset;
+
+#define ERRCODE(s) ((((s)[2] & 0x0F) << 16) | ((s)[12] << 8) | ((s)[13]))
+#define SK(errcode) (((errcode) >> 16) & 0xF)
+#define ASC(errcode) (((errcode) >> 8) & 0xFF)
+#define ASCQ(errcode) ((errcode) & 0xFF)
+
+static bool is_mounted(const char *device) {
+ struct stat statbuf;
+ FILE *fp;
+ int maj, min;
+ bool mounted = false;
+
+ if (stat(device, &statbuf) < 0)
+ return false;
+
+ fp = fopen("/proc/self/mountinfo", "re");
+ if (fp == NULL)
+ return false;
+ while (fscanf(fp, "%*s %*s %i:%i %*[^\n]", &maj, &min) == 2) {
+ if (makedev(maj, min) == statbuf.st_rdev) {
+ mounted = true;
+ break;
+ }
+ }
+ fclose(fp);
+ return mounted;
+}
+
+static void info_scsi_cmd_err(const char *cmd, int err) {
+ if (err == -1) {
+ log_debug("%s failed", cmd);
+ return;
+ }
+ log_debug("%s failed with SK=%Xh/ASC=%02Xh/ACQ=%02Xh", cmd, SK(err), ASC(err), ASCQ(err));
+}
+
+struct scsi_cmd {
+ struct cdrom_generic_command cgc;
+ union {
+ struct request_sense s;
+ unsigned char u[18];
+ } _sense;
+ struct sg_io_hdr sg_io;
+};
+
+static void scsi_cmd_init(struct scsi_cmd *cmd) {
+ memzero(cmd, sizeof(struct scsi_cmd));
+ cmd->cgc.quiet = 1;
+ cmd->cgc.sense = &cmd->_sense.s;
+ cmd->sg_io.interface_id = 'S';
+ cmd->sg_io.mx_sb_len = sizeof(cmd->_sense);
+ cmd->sg_io.cmdp = cmd->cgc.cmd;
+ cmd->sg_io.sbp = cmd->_sense.u;
+ cmd->sg_io.flags = SG_FLAG_LUN_INHIBIT | SG_FLAG_DIRECT_IO;
+}
+
+static void scsi_cmd_set(struct scsi_cmd *cmd, size_t i, unsigned char arg) {
+ cmd->sg_io.cmd_len = i + 1;
+ cmd->cgc.cmd[i] = arg;
+}
+
+#define CHECK_CONDITION 0x01
+
+static int scsi_cmd_run(struct scsi_cmd *cmd, int fd, unsigned char *buf, size_t bufsize) {
+ int ret = 0;
+
+ if (bufsize > 0) {
+ cmd->sg_io.dxferp = buf;
+ cmd->sg_io.dxfer_len = bufsize;
+ cmd->sg_io.dxfer_direction = SG_DXFER_FROM_DEV;
+ } else {
+ cmd->sg_io.dxfer_direction = SG_DXFER_NONE;
+ }
+ if (ioctl(fd, SG_IO, &cmd->sg_io))
+ return -1;
+
+ if ((cmd->sg_io.info & SG_INFO_OK_MASK) != SG_INFO_OK) {
+ errno = EIO;
+ ret = -1;
+ if (cmd->sg_io.masked_status & CHECK_CONDITION) {
+ ret = ERRCODE(cmd->_sense.u);
+ if (ret == 0)
+ ret = -1;
+ }
+ }
+ return ret;
+}
+
+static int media_lock(int fd, bool lock) {
+ int err;
+
+ /* disable the kernel's lock logic */
+ err = ioctl(fd, CDROM_CLEAR_OPTIONS, CDO_LOCK);
+ if (err < 0)
+ log_debug("CDROM_CLEAR_OPTIONS, CDO_LOCK failed");
+
+ err = ioctl(fd, CDROM_LOCKDOOR, lock ? 1 : 0);
+ if (err < 0)
+ log_debug("CDROM_LOCKDOOR failed");
+
+ return err;
+}
+
+static int media_eject(int fd) {
+ struct scsi_cmd sc;
+ int err;
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x1b);
+ scsi_cmd_set(&sc, 4, 0x02);
+ scsi_cmd_set(&sc, 5, 0);
+ err = scsi_cmd_run(&sc, fd, NULL, 0);
+ if ((err != 0)) {
+ info_scsi_cmd_err("START_STOP_UNIT", err);
+ return -1;
+ }
+ return 0;
+}
+
+static int cd_capability_compat(int fd) {
+ int capability;
+
+ capability = ioctl(fd, CDROM_GET_CAPABILITY, NULL);
+ if (capability < 0) {
+ log_debug("CDROM_GET_CAPABILITY failed");
+ return -1;
+ }
+
+ if (capability & CDC_CD_R)
+ cd_cd_r = 1;
+ if (capability & CDC_CD_RW)
+ cd_cd_rw = 1;
+ if (capability & CDC_DVD)
+ cd_dvd_rom = 1;
+ if (capability & CDC_DVD_R)
+ cd_dvd_r = 1;
+ if (capability & CDC_DVD_RAM)
+ cd_dvd_ram = 1;
+ if (capability & CDC_MRW)
+ cd_mrw = 1;
+ if (capability & CDC_MRW_W)
+ cd_mrw_w = 1;
+ return 0;
+}
+
+static int cd_media_compat(int fd) {
+ if (ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT) != CDS_DISC_OK) {
+ log_debug("CDROM_DRIVE_STATUS != CDS_DISC_OK");
+ return -1;
+ }
+ cd_media = 1;
+ return 0;
+}
+
+static int cd_inquiry(int fd) {
+ struct scsi_cmd sc;
+ unsigned char inq[128];
+ int err;
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x12);
+ scsi_cmd_set(&sc, 4, 36);
+ scsi_cmd_set(&sc, 5, 0);
+ err = scsi_cmd_run(&sc, fd, inq, 36);
+ if ((err != 0)) {
+ info_scsi_cmd_err("INQUIRY", err);
+ return -1;
+ }
+
+ if ((inq[0] & 0x1F) != 5) {
+ log_debug("not an MMC unit");
+ return -1;
+ }
+
+ log_debug("INQUIRY: [%.8s][%.16s][%.4s]", inq + 8, inq + 16, inq + 32);
+ return 0;
+}
+
+static void feature_profile_media(int cur_profile) {
+ switch (cur_profile) {
+ case 0x03:
+ case 0x04:
+ case 0x05:
+ log_debug("profile 0x%02x ", cur_profile);
+ cd_media = 1;
+ cd_media_mo = 1;
+ break;
+ case 0x08:
+ log_debug("profile 0x%02x media_cd_rom", cur_profile);
+ cd_media = 1;
+ cd_media_cd_rom = 1;
+ break;
+ case 0x09:
+ log_debug("profile 0x%02x media_cd_r", cur_profile);
+ cd_media = 1;
+ cd_media_cd_r = 1;
+ break;
+ case 0x0a:
+ log_debug("profile 0x%02x media_cd_rw", cur_profile);
+ cd_media = 1;
+ cd_media_cd_rw = 1;
+ break;
+ case 0x10:
+ log_debug("profile 0x%02x media_dvd_ro", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_rom = 1;
+ break;
+ case 0x11:
+ log_debug("profile 0x%02x media_dvd_r", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_r = 1;
+ break;
+ case 0x12:
+ log_debug("profile 0x%02x media_dvd_ram", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_ram = 1;
+ break;
+ case 0x13:
+ log_debug("profile 0x%02x media_dvd_rw_ro", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_rw = 1;
+ cd_media_dvd_rw_ro = 1;
+ break;
+ case 0x14:
+ log_debug("profile 0x%02x media_dvd_rw_seq", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_rw = 1;
+ cd_media_dvd_rw_seq = 1;
+ break;
+ case 0x1B:
+ log_debug("profile 0x%02x media_dvd_plus_r", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_plus_r = 1;
+ break;
+ case 0x1A:
+ log_debug("profile 0x%02x media_dvd_plus_rw", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_plus_rw = 1;
+ break;
+ case 0x2A:
+ log_debug("profile 0x%02x media_dvd_plus_rw_dl", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_plus_rw_dl = 1;
+ break;
+ case 0x2B:
+ log_debug("profile 0x%02x media_dvd_plus_r_dl", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_plus_r_dl = 1;
+ break;
+ case 0x40:
+ log_debug("profile 0x%02x media_bd", cur_profile);
+ cd_media = 1;
+ cd_media_bd = 1;
+ break;
+ case 0x41:
+ case 0x42:
+ log_debug("profile 0x%02x media_bd_r", cur_profile);
+ cd_media = 1;
+ cd_media_bd_r = 1;
+ break;
+ case 0x43:
+ log_debug("profile 0x%02x media_bd_re", cur_profile);
+ cd_media = 1;
+ cd_media_bd_re = 1;
+ break;
+ case 0x50:
+ log_debug("profile 0x%02x media_hddvd", cur_profile);
+ cd_media = 1;
+ cd_media_hddvd = 1;
+ break;
+ case 0x51:
+ log_debug("profile 0x%02x media_hddvd_r", cur_profile);
+ cd_media = 1;
+ cd_media_hddvd_r = 1;
+ break;
+ case 0x52:
+ log_debug("profile 0x%02x media_hddvd_rw", cur_profile);
+ cd_media = 1;
+ cd_media_hddvd_rw = 1;
+ break;
+ default:
+ log_debug("profile 0x%02x <ignored>", cur_profile);
+ break;
+ }
+}
+
+static int feature_profiles(const unsigned char *profiles, size_t size) {
+ unsigned i;
+
+ for (i = 0; i+4 <= size; i += 4) {
+ int profile;
+
+ profile = profiles[i] << 8 | profiles[i+1];
+ switch (profile) {
+ case 0x03:
+ case 0x04:
+ case 0x05:
+ log_debug("profile 0x%02x mo", profile);
+ cd_mo = 1;
+ break;
+ case 0x08:
+ log_debug("profile 0x%02x cd_rom", profile);
+ cd_cd_rom = 1;
+ break;
+ case 0x09:
+ log_debug("profile 0x%02x cd_r", profile);
+ cd_cd_r = 1;
+ break;
+ case 0x0A:
+ log_debug("profile 0x%02x cd_rw", profile);
+ cd_cd_rw = 1;
+ break;
+ case 0x10:
+ log_debug("profile 0x%02x dvd_rom", profile);
+ cd_dvd_rom = 1;
+ break;
+ case 0x12:
+ log_debug("profile 0x%02x dvd_ram", profile);
+ cd_dvd_ram = 1;
+ break;
+ case 0x13:
+ case 0x14:
+ log_debug("profile 0x%02x dvd_rw", profile);
+ cd_dvd_rw = 1;
+ break;
+ case 0x1B:
+ log_debug("profile 0x%02x dvd_plus_r", profile);
+ cd_dvd_plus_r = 1;
+ break;
+ case 0x1A:
+ log_debug("profile 0x%02x dvd_plus_rw", profile);
+ cd_dvd_plus_rw = 1;
+ break;
+ case 0x2A:
+ log_debug("profile 0x%02x dvd_plus_rw_dl", profile);
+ cd_dvd_plus_rw_dl = 1;
+ break;
+ case 0x2B:
+ log_debug("profile 0x%02x dvd_plus_r_dl", profile);
+ cd_dvd_plus_r_dl = 1;
+ break;
+ case 0x40:
+ cd_bd = 1;
+ log_debug("profile 0x%02x bd", profile);
+ break;
+ case 0x41:
+ case 0x42:
+ cd_bd_r = 1;
+ log_debug("profile 0x%02x bd_r", profile);
+ break;
+ case 0x43:
+ cd_bd_re = 1;
+ log_debug("profile 0x%02x bd_re", profile);
+ break;
+ case 0x50:
+ cd_hddvd = 1;
+ log_debug("profile 0x%02x hddvd", profile);
+ break;
+ case 0x51:
+ cd_hddvd_r = 1;
+ log_debug("profile 0x%02x hddvd_r", profile);
+ break;
+ case 0x52:
+ cd_hddvd_rw = 1;
+ log_debug("profile 0x%02x hddvd_rw", profile);
+ break;
+ default:
+ log_debug("profile 0x%02x <ignored>", profile);
+ break;
+ }
+ }
+ return 0;
+}
+
+/* returns 0 if media was detected */
+static int cd_profiles_old_mmc(int fd) {
+ struct scsi_cmd sc;
+ int err;
+
+ unsigned char header[32];
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x51);
+ scsi_cmd_set(&sc, 8, sizeof(header));
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, header, sizeof(header));
+ if ((err != 0)) {
+ info_scsi_cmd_err("READ DISC INFORMATION", err);
+ if (cd_media == 1) {
+ log_debug("no current profile, but disc is present; assuming CD-ROM");
+ cd_media_cd_rom = 1;
+ cd_media_track_count = 1;
+ cd_media_track_count_data = 1;
+ return 0;
+ } else {
+ log_debug("no current profile, assuming no media");
+ return -1;
+ }
+ };
+
+ cd_media = 1;
+
+ if (header[2] & 16) {
+ cd_media_cd_rw = 1;
+ log_debug("profile 0x0a media_cd_rw");
+ } else if ((header[2] & 3) < 2 && cd_cd_r) {
+ cd_media_cd_r = 1;
+ log_debug("profile 0x09 media_cd_r");
+ } else {
+ cd_media_cd_rom = 1;
+ log_debug("profile 0x08 media_cd_rom");
+ }
+ return 0;
+}
+
+/* returns 0 if media was detected */
+static int cd_profiles(int fd) {
+ struct scsi_cmd sc;
+ unsigned char features[65530];
+ unsigned cur_profile = 0;
+ unsigned len;
+ unsigned i;
+ int err;
+ int ret;
+
+ ret = -1;
+
+ /* First query the current profile */
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x46);
+ scsi_cmd_set(&sc, 8, 8);
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, features, 8);
+ if ((err != 0)) {
+ info_scsi_cmd_err("GET CONFIGURATION", err);
+ /* handle pre-MMC2 drives which do not support GET CONFIGURATION */
+ if (SK(err) == 0x5 && IN_SET(ASC(err), 0x20, 0x24)) {
+ log_debug("drive is pre-MMC2 and does not support 46h get configuration command");
+ log_debug("trying to work around the problem");
+ ret = cd_profiles_old_mmc(fd);
+ }
+ goto out;
+ }
+
+ cur_profile = features[6] << 8 | features[7];
+ if (cur_profile > 0) {
+ log_debug("current profile 0x%02x", cur_profile);
+ feature_profile_media(cur_profile);
+ ret = 0; /* we have media */
+ } else {
+ log_debug("no current profile, assuming no media");
+ }
+
+ len = features[0] << 24 | features[1] << 16 | features[2] << 8 | features[3];
+ log_debug("GET CONFIGURATION: size of features buffer 0x%04x", len);
+
+ if (len > sizeof(features)) {
+ log_debug("cannot get features in a single query, truncating");
+ len = sizeof(features);
+ } else if (len <= 8)
+ len = sizeof(features);
+
+ /* Now get the full feature buffer */
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x46);
+ scsi_cmd_set(&sc, 7, ( len >> 8 ) & 0xff);
+ scsi_cmd_set(&sc, 8, len & 0xff);
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, features, len);
+ if ((err != 0)) {
+ info_scsi_cmd_err("GET CONFIGURATION", err);
+ return -1;
+ }
+
+ /* parse the length once more, in case the drive decided to have other features suddenly :) */
+ len = features[0] << 24 | features[1] << 16 | features[2] << 8 | features[3];
+ log_debug("GET CONFIGURATION: size of features buffer 0x%04x", len);
+
+ if (len > sizeof(features)) {
+ log_debug("cannot get features in a single query, truncating");
+ len = sizeof(features);
+ }
+
+ /* device features */
+ for (i = 8; i+4 < len; i += (4 + features[i+3])) {
+ unsigned feature;
+
+ feature = features[i] << 8 | features[i+1];
+
+ switch (feature) {
+ case 0x00:
+ log_debug("GET CONFIGURATION: feature 'profiles', with %i entries", features[i+3] / 4);
+ feature_profiles(&features[i]+4, MIN(features[i+3], len - i - 4));
+ break;
+ default:
+ log_debug("GET CONFIGURATION: feature 0x%04x <ignored>, with 0x%02x bytes", feature, features[i+3]);
+ break;
+ }
+ }
+out:
+ return ret;
+}
+
+static int cd_media_info(int fd) {
+ struct scsi_cmd sc;
+ unsigned char header[32];
+ static const char *media_status[] = {
+ "blank",
+ "appendable",
+ "complete",
+ "other"
+ };
+ int err;
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x51);
+ scsi_cmd_set(&sc, 8, sizeof(header) & 0xff);
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, header, sizeof(header));
+ if ((err != 0)) {
+ info_scsi_cmd_err("READ DISC INFORMATION", err);
+ return -1;
+ };
+
+ cd_media = 1;
+ log_debug("disk type %02x", header[8]);
+ log_debug("hardware reported media status: %s", media_status[header[2] & 3]);
+
+ /* exclude plain CDROM, some fake cdroms return 0 for "blank" media here */
+ if (!cd_media_cd_rom)
+ cd_media_state = media_status[header[2] & 3];
+
+ /* fresh DVD-RW in restricted overwite mode reports itself as
+ * "appendable"; change it to "blank" to make it consistent with what
+ * gets reported after blanking, and what userspace expects */
+ if (cd_media_dvd_rw_ro && (header[2] & 3) == 1)
+ cd_media_state = media_status[0];
+
+ /* DVD+RW discs (and DVD-RW in restricted mode) once formatted are
+ * always "complete", DVD-RAM are "other" or "complete" if the disc is
+ * write protected; we need to check the contents if it is blank */
+ if ((cd_media_dvd_rw_ro || cd_media_dvd_plus_rw || cd_media_dvd_plus_rw_dl || cd_media_dvd_ram) && (header[2] & 3) > 1) {
+ unsigned char buffer[32 * 2048];
+ unsigned char len;
+ int offset;
+
+ if (cd_media_dvd_ram) {
+ /* a write protected dvd-ram may report "complete" status */
+
+ unsigned char dvdstruct[8];
+ unsigned char format[12];
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0xAD);
+ scsi_cmd_set(&sc, 7, 0xC0);
+ scsi_cmd_set(&sc, 9, sizeof(dvdstruct));
+ scsi_cmd_set(&sc, 11, 0);
+ err = scsi_cmd_run(&sc, fd, dvdstruct, sizeof(dvdstruct));
+ if ((err != 0)) {
+ info_scsi_cmd_err("READ DVD STRUCTURE", err);
+ return -1;
+ }
+ if (dvdstruct[4] & 0x02) {
+ cd_media_state = media_status[2];
+ log_debug("write-protected DVD-RAM media inserted");
+ goto determined;
+ }
+
+ /* let's make sure we don't try to read unformatted media */
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x23);
+ scsi_cmd_set(&sc, 8, sizeof(format));
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, format, sizeof(format));
+ if ((err != 0)) {
+ info_scsi_cmd_err("READ DVD FORMAT CAPACITIES", err);
+ return -1;
+ }
+
+ len = format[3];
+ if (len & 7 || len < 16) {
+ log_debug("invalid format capacities length");
+ return -1;
+ }
+
+ switch(format[8] & 3) {
+ case 1:
+ log_debug("unformatted DVD-RAM media inserted");
+ /* This means that last format was interrupted
+ * or failed, blank dvd-ram discs are factory
+ * formatted. Take no action here as it takes
+ * quite a while to reformat a dvd-ram and it's
+ * not automatically started */
+ goto determined;
+
+ case 2:
+ log_debug("formatted DVD-RAM media inserted");
+ break;
+
+ case 3:
+ cd_media = 0; //return no media
+ log_debug("format capacities returned no media");
+ return -1;
+ }
+ }
+
+ /* Take a closer look at formatted media (unformatted DVD+RW
+ * has "blank" status", DVD-RAM was examined earlier) and check
+ * for ISO and UDF PVDs or a fs superblock presence and do it
+ * in one ioctl (we need just sectors 0 and 16) */
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x28);
+ scsi_cmd_set(&sc, 5, 0);
+ scsi_cmd_set(&sc, 8, 32);
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, buffer, sizeof(buffer));
+ if ((err != 0)) {
+ cd_media = 0;
+ info_scsi_cmd_err("READ FIRST 32 BLOCKS", err);
+ return -1;
+ }
+
+ /* if any non-zero data is found in sector 16 (iso and udf) or
+ * eventually 0 (fat32 boot sector, ext2 superblock, etc), disc
+ * is assumed non-blank */
+
+ for (offset = 32768; offset < (32768 + 2048); offset++) {
+ if (buffer [offset]) {
+ log_debug("data in block 16, assuming complete");
+ goto determined;
+ }
+ }
+
+ for (offset = 0; offset < 2048; offset++) {
+ if (buffer [offset]) {
+ log_debug("data in block 0, assuming complete");
+ goto determined;
+ }
+ }
+
+ cd_media_state = media_status[0];
+ log_debug("no data in blocks 0 or 16, assuming blank");
+ }
+
+determined:
+ /* "other" is e. g. DVD-RAM, can't append sessions there; DVDs in
+ * restricted overwrite mode can never append, only in sequential mode */
+ if ((header[2] & 3) < 2 && !cd_media_dvd_rw_ro)
+ cd_media_session_next = header[10] << 8 | header[5];
+ cd_media_session_count = header[9] << 8 | header[4];
+ cd_media_track_count = header[11] << 8 | header[6];
+
+ return 0;
+}
+
+static int cd_media_toc(int fd) {
+ struct scsi_cmd sc;
+ unsigned char header[12];
+ unsigned char toc[65536];
+ unsigned len, i, num_tracks;
+ unsigned char *p;
+ int err;
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x43);
+ scsi_cmd_set(&sc, 6, 1);
+ scsi_cmd_set(&sc, 8, sizeof(header) & 0xff);
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, header, sizeof(header));
+ if ((err != 0)) {
+ info_scsi_cmd_err("READ TOC", err);
+ return -1;
+ }
+
+ len = (header[0] << 8 | header[1]) + 2;
+ log_debug("READ TOC: len: %d, start track: %d, end track: %d", len, header[2], header[3]);
+ if (len > sizeof(toc))
+ return -1;
+ if (len < 2)
+ return -1;
+ /* 2: first track, 3: last track */
+ num_tracks = header[3] - header[2] + 1;
+
+ /* empty media has no tracks */
+ if (len < 8)
+ return 0;
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x43);
+ scsi_cmd_set(&sc, 6, header[2]); /* First Track/Session Number */
+ scsi_cmd_set(&sc, 7, (len >> 8) & 0xff);
+ scsi_cmd_set(&sc, 8, len & 0xff);
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, toc, len);
+ if ((err != 0)) {
+ info_scsi_cmd_err("READ TOC (tracks)", err);
+ return -1;
+ }
+
+ /* Take care to not iterate beyond the last valid track as specified in
+ * the TOC, but also avoid going beyond the TOC length, just in case
+ * the last track number is invalidly large */
+ for (p = toc+4, i = 4; i < len-8 && num_tracks > 0; i += 8, p += 8, --num_tracks) {
+ unsigned block;
+ unsigned is_data_track;
+
+ is_data_track = (p[1] & 0x04) != 0;
+
+ block = p[4] << 24 | p[5] << 16 | p[6] << 8 | p[7];
+ log_debug("track=%u info=0x%x(%s) start_block=%u",
+ p[2], p[1] & 0x0f, is_data_track ? "data":"audio", block);
+
+ if (is_data_track)
+ cd_media_track_count_data++;
+ else
+ cd_media_track_count_audio++;
+ }
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x43);
+ scsi_cmd_set(&sc, 2, 1); /* Session Info */
+ scsi_cmd_set(&sc, 8, sizeof(header));
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, header, sizeof(header));
+ if ((err != 0)) {
+ info_scsi_cmd_err("READ TOC (multi session)", err);
+ return -1;
+ }
+ len = header[4+4] << 24 | header[4+5] << 16 | header[4+6] << 8 | header[4+7];
+ log_debug("last track %u starts at block %u", header[4+2], len);
+ cd_media_session_last_offset = (unsigned long long int)len * 2048;
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "lock-media", no_argument, NULL, 'l' },
+ { "unlock-media", no_argument, NULL, 'u' },
+ { "eject-media", no_argument, NULL, 'e' },
+ { "debug", no_argument, NULL, 'd' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+ bool eject = false;
+ bool lock = false;
+ bool unlock = false;
+ const char *node = NULL;
+ int fd = -1;
+ int cnt;
+ int rc = 0;
+
+ log_set_target(LOG_TARGET_AUTO);
+ udev_parse_config();
+ log_parse_environment();
+ log_open();
+
+ for (;;) {
+ int option;
+
+ option = getopt_long(argc, argv, "deluh", options, NULL);
+ if (option == -1)
+ break;
+
+ switch (option) {
+ case 'l':
+ lock = true;
+ break;
+ case 'u':
+ unlock = true;
+ break;
+ case 'e':
+ eject = true;
+ break;
+ case 'd':
+ log_set_target(LOG_TARGET_CONSOLE);
+ log_set_max_level(LOG_DEBUG);
+ log_open();
+ break;
+ case 'h':
+ printf("Usage: cdrom_id [options] <device>\n"
+ " -l,--lock-media lock the media (to enable eject request events)\n"
+ " -u,--unlock-media unlock the media\n"
+ " -e,--eject-media eject the media\n"
+ " -d,--debug debug to stderr\n"
+ " -h,--help print this help text\n\n");
+ goto exit;
+ default:
+ rc = 1;
+ goto exit;
+ }
+ }
+
+ node = argv[optind];
+ if (!node) {
+ log_error("no device");
+ rc = 1;
+ goto exit;
+ }
+
+ initialize_srand();
+ for (cnt = 20; cnt > 0; cnt--) {
+ struct timespec duration;
+
+ fd = open(node, O_RDONLY|O_NONBLOCK|O_CLOEXEC|(is_mounted(node) ? 0 : O_EXCL));
+ if (fd >= 0 || errno != EBUSY)
+ break;
+ duration.tv_sec = 0;
+ duration.tv_nsec = (100 * 1000 * 1000) + (rand() % 100 * 1000 * 1000);
+ nanosleep(&duration, NULL);
+ }
+ if (fd < 0) {
+ log_debug("unable to open '%s'", node);
+ rc = 1;
+ goto exit;
+ }
+ log_debug("probing: '%s'", node);
+
+ /* same data as original cdrom_id */
+ if (cd_capability_compat(fd) < 0) {
+ rc = 1;
+ goto exit;
+ }
+
+ /* check for media - don't bail if there's no media as we still need to
+ * to read profiles */
+ cd_media_compat(fd);
+
+ /* check if drive talks MMC */
+ if (cd_inquiry(fd) < 0)
+ goto work;
+
+ /* read drive and possibly current profile */
+ if (cd_profiles(fd) != 0)
+ goto work;
+
+ /* at this point we are guaranteed to have media in the drive - find out more about it */
+
+ /* get session/track info */
+ cd_media_toc(fd);
+
+ /* get writable media state */
+ cd_media_info(fd);
+
+work:
+ /* lock the media, so we enable eject button events */
+ if (lock && cd_media) {
+ log_debug("PREVENT_ALLOW_MEDIUM_REMOVAL (lock)");
+ media_lock(fd, true);
+ }
+
+ if (unlock && cd_media) {
+ log_debug("PREVENT_ALLOW_MEDIUM_REMOVAL (unlock)");
+ media_lock(fd, false);
+ }
+
+ if (eject) {
+ log_debug("PREVENT_ALLOW_MEDIUM_REMOVAL (unlock)");
+ media_lock(fd, false);
+ log_debug("START_STOP_UNIT (eject)");
+ media_eject(fd);
+ }
+
+ printf("ID_CDROM=1\n");
+ if (cd_cd_rom)
+ printf("ID_CDROM_CD=1\n");
+ if (cd_cd_r)
+ printf("ID_CDROM_CD_R=1\n");
+ if (cd_cd_rw)
+ printf("ID_CDROM_CD_RW=1\n");
+ if (cd_dvd_rom)
+ printf("ID_CDROM_DVD=1\n");
+ if (cd_dvd_r)
+ printf("ID_CDROM_DVD_R=1\n");
+ if (cd_dvd_rw)
+ printf("ID_CDROM_DVD_RW=1\n");
+ if (cd_dvd_ram)
+ printf("ID_CDROM_DVD_RAM=1\n");
+ if (cd_dvd_plus_r)
+ printf("ID_CDROM_DVD_PLUS_R=1\n");
+ if (cd_dvd_plus_rw)
+ printf("ID_CDROM_DVD_PLUS_RW=1\n");
+ if (cd_dvd_plus_r_dl)
+ printf("ID_CDROM_DVD_PLUS_R_DL=1\n");
+ if (cd_dvd_plus_rw_dl)
+ printf("ID_CDROM_DVD_PLUS_RW_DL=1\n");
+ if (cd_bd)
+ printf("ID_CDROM_BD=1\n");
+ if (cd_bd_r)
+ printf("ID_CDROM_BD_R=1\n");
+ if (cd_bd_re)
+ printf("ID_CDROM_BD_RE=1\n");
+ if (cd_hddvd)
+ printf("ID_CDROM_HDDVD=1\n");
+ if (cd_hddvd_r)
+ printf("ID_CDROM_HDDVD_R=1\n");
+ if (cd_hddvd_rw)
+ printf("ID_CDROM_HDDVD_RW=1\n");
+ if (cd_mo)
+ printf("ID_CDROM_MO=1\n");
+ if (cd_mrw)
+ printf("ID_CDROM_MRW=1\n");
+ if (cd_mrw_w)
+ printf("ID_CDROM_MRW_W=1\n");
+
+ if (cd_media)
+ printf("ID_CDROM_MEDIA=1\n");
+ if (cd_media_mo)
+ printf("ID_CDROM_MEDIA_MO=1\n");
+ if (cd_media_mrw)
+ printf("ID_CDROM_MEDIA_MRW=1\n");
+ if (cd_media_mrw_w)
+ printf("ID_CDROM_MEDIA_MRW_W=1\n");
+ if (cd_media_cd_rom)
+ printf("ID_CDROM_MEDIA_CD=1\n");
+ if (cd_media_cd_r)
+ printf("ID_CDROM_MEDIA_CD_R=1\n");
+ if (cd_media_cd_rw)
+ printf("ID_CDROM_MEDIA_CD_RW=1\n");
+ if (cd_media_dvd_rom)
+ printf("ID_CDROM_MEDIA_DVD=1\n");
+ if (cd_media_dvd_r)
+ printf("ID_CDROM_MEDIA_DVD_R=1\n");
+ if (cd_media_dvd_ram)
+ printf("ID_CDROM_MEDIA_DVD_RAM=1\n");
+ if (cd_media_dvd_rw)
+ printf("ID_CDROM_MEDIA_DVD_RW=1\n");
+ if (cd_media_dvd_plus_r)
+ printf("ID_CDROM_MEDIA_DVD_PLUS_R=1\n");
+ if (cd_media_dvd_plus_rw)
+ printf("ID_CDROM_MEDIA_DVD_PLUS_RW=1\n");
+ if (cd_media_dvd_plus_rw_dl)
+ printf("ID_CDROM_MEDIA_DVD_PLUS_RW_DL=1\n");
+ if (cd_media_dvd_plus_r_dl)
+ printf("ID_CDROM_MEDIA_DVD_PLUS_R_DL=1\n");
+ if (cd_media_bd)
+ printf("ID_CDROM_MEDIA_BD=1\n");
+ if (cd_media_bd_r)
+ printf("ID_CDROM_MEDIA_BD_R=1\n");
+ if (cd_media_bd_re)
+ printf("ID_CDROM_MEDIA_BD_RE=1\n");
+ if (cd_media_hddvd)
+ printf("ID_CDROM_MEDIA_HDDVD=1\n");
+ if (cd_media_hddvd_r)
+ printf("ID_CDROM_MEDIA_HDDVD_R=1\n");
+ if (cd_media_hddvd_rw)
+ printf("ID_CDROM_MEDIA_HDDVD_RW=1\n");
+
+ if (cd_media_state != NULL)
+ printf("ID_CDROM_MEDIA_STATE=%s\n", cd_media_state);
+ if (cd_media_session_next > 0)
+ printf("ID_CDROM_MEDIA_SESSION_NEXT=%u\n", cd_media_session_next);
+ if (cd_media_session_count > 0)
+ printf("ID_CDROM_MEDIA_SESSION_COUNT=%u\n", cd_media_session_count);
+ if (cd_media_session_count > 1 && cd_media_session_last_offset > 0)
+ printf("ID_CDROM_MEDIA_SESSION_LAST_OFFSET=%llu\n", cd_media_session_last_offset);
+ if (cd_media_track_count > 0)
+ printf("ID_CDROM_MEDIA_TRACK_COUNT=%u\n", cd_media_track_count);
+ if (cd_media_track_count_audio > 0)
+ printf("ID_CDROM_MEDIA_TRACK_COUNT_AUDIO=%u\n", cd_media_track_count_audio);
+ if (cd_media_track_count_data > 0)
+ printf("ID_CDROM_MEDIA_TRACK_COUNT_DATA=%u\n", cd_media_track_count_data);
+exit:
+ if (fd >= 0)
+ close(fd);
+ log_close();
+ return rc;
+}
diff --git a/src/udev/generate-keyboard-keys-gperf.sh b/src/udev/generate-keyboard-keys-gperf.sh
new file mode 100755
index 0000000..efb0da2
--- /dev/null
+++ b/src/udev/generate-keyboard-keys-gperf.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+set -eu
+awk ' BEGIN {
+ print "%{\n\
+#if __GNUC__ >= 7\n\
+_Pragma(\"GCC diagnostic ignored \\\"-Wimplicit-fallthrough\\\"\")\n\
+#endif\n\
+%}"
+ print "struct key_name { const char* name; unsigned short id; };"
+ print "%null-strings"
+ print "%%"
+ }
+
+ /^KEY_/ { print tolower(substr($1 ,5)) ", " $1 }
+ { print tolower($1) ", " $1 }
+' < "$1"
diff --git a/src/udev/generate-keyboard-keys-list.sh b/src/udev/generate-keyboard-keys-list.sh
new file mode 100755
index 0000000..c055f7c
--- /dev/null
+++ b/src/udev/generate-keyboard-keys-list.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include linux/input.h - </dev/null | awk '
+ /\<(KEY_(MAX|MIN_INTERESTING))|(BTN_(MISC|MOUSE|JOYSTICK|GAMEPAD|DIGI|WHEEL|TRIGGER_HAPPY))\>/ { next }
+ /^#define[ \t]+(KEY|BTN)_[^ ]+[ \t]+[0-9BK]/ { print $2 }
+'
diff --git a/src/udev/meson.build b/src/udev/meson.build
new file mode 100644
index 0000000..9d3f6d1
--- /dev/null
+++ b/src/udev/meson.build
@@ -0,0 +1,199 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+udevadm_sources = files('''
+ udevadm.c
+ udevadm.h
+ udevadm-control.c
+ udevadm-hwdb.c
+ udevadm-info.c
+ udevadm-monitor.c
+ udevadm-settle.c
+ udevadm-test.c
+ udevadm-test-builtin.c
+ udevadm-trigger.c
+ udevadm-util.c
+ udevadm-util.h
+'''.split())
+
+systemd_udevd_sources = files('udevd.c')
+
+libudev_core_sources = '''
+ udev.h
+ udev-ctrl.c
+ udev-ctrl.h
+ udev-event.c
+ udev-node.c
+ udev-node.h
+ udev-rules.c
+ udev-watch.c
+ udev-watch.h
+ udev-builtin.c
+ udev-builtin.h
+ udev-builtin-btrfs.c
+ udev-builtin-hwdb.c
+ udev-builtin-input_id.c
+ udev-builtin-keyboard.c
+ udev-builtin-net_id.c
+ udev-builtin-net_setup_link.c
+ udev-builtin-path_id.c
+ udev-builtin-usb_id.c
+ net/link-config.c
+ net/link-config.h
+ net/ethtool-util.c
+ net/ethtool-util.h
+ net/naming-scheme.c
+ net/naming-scheme.h
+'''.split()
+
+if conf.get('HAVE_KMOD') == 1
+ libudev_core_sources += ['udev-builtin-kmod.c']
+endif
+
+if conf.get('HAVE_BLKID') == 1
+ libudev_core_sources += ['udev-builtin-blkid.c']
+endif
+
+if conf.get('HAVE_ACL') == 1
+ libudev_core_sources += ['udev-builtin-uaccess.c',
+ logind_acl_c,
+ sd_login_sources]
+endif
+
+############################################################
+
+generate_keyboard_keys_list = find_program('generate-keyboard-keys-list.sh')
+keyboard_keys_list_txt = custom_target(
+ 'keyboard-keys-list.txt',
+ output : 'keyboard-keys-list.txt',
+ command : [generate_keyboard_keys_list, cpp],
+ capture : true)
+
+generate_keyboard_keys_gperf = find_program('generate-keyboard-keys-gperf.sh')
+fname = 'keyboard-keys-from-name.gperf'
+gperf_file = custom_target(
+ fname,
+ input : keyboard_keys_list_txt,
+ output : fname,
+ command : [generate_keyboard_keys_gperf, '@INPUT@'],
+ capture : true)
+
+fname = 'keyboard-keys-from-name.h'
+keyboard_keys_from_name_h = custom_target(
+ fname,
+ input : gperf_file,
+ output : fname,
+ command : [gperf,
+ '-L', 'ANSI-C', '-t',
+ '-N', 'keyboard_lookup_key',
+ '-H', 'hash_key_name',
+ '-p', '-C',
+ '@INPUT@'],
+ capture : true)
+
+############################################################
+
+link_config_gperf_c = custom_target(
+ 'link-config-gperf.c',
+ input : 'net/link-config-gperf.gperf',
+ output : 'link-config-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+############################################################
+
+if get_option('link-udev-shared')
+ udev_link_with = [libshared]
+ udev_rpath = rootlibexecdir
+else
+ udev_link_with = [libshared_static,
+ libsystemd_static]
+ udev_rpath = ''
+endif
+
+libudev_basic = static_library(
+ 'udev-basic',
+ libudev_sources,
+ include_directories : includes,
+ c_args : ['-fvisibility=default'])
+
+libudev_static = static_library(
+ 'udev_static',
+ 'udev.h',
+ include_directories : includes,
+ link_with : udev_link_with,
+ link_whole : libudev_basic)
+
+static_libudev = get_option('static-libudev')
+static_libudev_pic = static_libudev == 'true' or static_libudev == 'pic'
+install_libudev_static = static_library(
+ 'udev',
+ basic_sources,
+ shared_sources,
+ libsystemd_sources,
+ libudev_sources,
+ disable_mempool_c,
+ include_directories : includes,
+ build_by_default : static_libudev != 'false',
+ install : static_libudev != 'false',
+ install_dir : rootlibdir,
+ link_depends : libudev_sym,
+ dependencies : libshared_deps + [libmount],
+ c_args : static_libudev_pic ? [] : ['-fno-PIC'],
+ pic : static_libudev_pic)
+
+libudev = shared_library(
+ 'udev',
+ disable_mempool_c,
+ version : libudev_version,
+ include_directories : includes,
+ link_args : ['-shared',
+ '-Wl,--version-script=' + libudev_sym_path],
+ link_with : [libsystemd_static, libshared_static],
+ link_whole : libudev_basic,
+ dependencies : [threads],
+ link_depends : libudev_sym,
+ install : true,
+ install_dir : rootlibdir)
+
+libudev_core_includes = [includes, include_directories('net')]
+libudev_core = static_library(
+ 'udev-core',
+ libudev_core_sources,
+ link_config_gperf_c,
+ keyboard_keys_from_name_h,
+ include_directories : libudev_core_includes,
+ c_args : ['-DLOG_REALM=LOG_REALM_UDEV'],
+ link_with : udev_link_with,
+ dependencies : [libblkid, libkmod])
+
+foreach prog : [['ata_id/ata_id.c'],
+ ['cdrom_id/cdrom_id.c'],
+ ['scsi_id/scsi_id.c',
+ 'scsi_id/scsi_id.h',
+ 'scsi_id/scsi_serial.c',
+ 'scsi_id/scsi.h'],
+ ['v4l_id/v4l_id.c'],
+ ['mtd_probe/mtd_probe.c',
+ 'mtd_probe/mtd_probe.h',
+ 'mtd_probe/probe_smartmedia.c']]
+
+ executable(prog[0].split('/')[0],
+ prog,
+ include_directories : includes,
+ c_args : ['-DLOG_REALM=LOG_REALM_UDEV'],
+ link_with : [libudev_static],
+ install_rpath : udev_rpath,
+ install : true,
+ install_dir : udevlibexecdir)
+endforeach
+
+install_data('udev.conf',
+ install_dir : join_paths(sysconfdir, 'udev'))
+
+configure_file(
+ input : 'udev.pc.in',
+ output : 'udev.pc',
+ configuration : substs,
+ install_dir : pkgconfigdatadir == 'no' ? '' : pkgconfigdatadir)
+
+meson.add_install_script('sh', '-c',
+ mkdir_p.format(join_paths(sysconfdir, 'udev/rules.d')))
diff --git a/src/udev/mtd_probe/mtd_probe.c b/src/udev/mtd_probe/mtd_probe.c
new file mode 100644
index 0000000..f47b133
--- /dev/null
+++ b/src/udev/mtd_probe/mtd_probe.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright © 2010 - Maxim Levitsky
+ *
+ * mtd_probe is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mtd_probe is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with mtd_probe; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <mtd/mtd-user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "mtd_probe.h"
+
+int main(int argc, char** argv) {
+ _cleanup_close_ int mtd_fd = -1;
+ mtd_info_t mtd_info;
+
+ if (argc != 2) {
+ printf("usage: mtd_probe /dev/mtd[n]\n");
+ return EXIT_FAILURE;
+ }
+
+ mtd_fd = open(argv[1], O_RDONLY|O_CLOEXEC);
+ if (mtd_fd < 0) {
+ log_error_errno(errno, "Failed to open: %m");
+ return EXIT_FAILURE;
+ }
+
+ if (ioctl(mtd_fd, MEMGETINFO, &mtd_info) < 0) {
+ log_error_errno(errno, "Failed to issue MEMGETINFO ioctl: %m");
+ return EXIT_FAILURE;
+ }
+
+ if (probe_smart_media(mtd_fd, &mtd_info) < 0)
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/udev/mtd_probe/mtd_probe.h b/src/udev/mtd_probe/mtd_probe.h
new file mode 100644
index 0000000..1d8c93b
--- /dev/null
+++ b/src/udev/mtd_probe/mtd_probe.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#pragma once
+
+/*
+ * Copyright © 2010 - Maxim Levitsky
+ *
+ * mtd_probe is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mtd_probe is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with mtd_probe; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+
+#include <mtd/mtd-user.h>
+
+#include "macro.h"
+
+/* Full oob structure as written on the flash */
+struct sm_oob {
+ uint32_t reserved;
+ uint8_t data_status;
+ uint8_t block_status;
+ uint8_t lba_copy1[2];
+ uint8_t ecc2[3];
+ uint8_t lba_copy2[2];
+ uint8_t ecc1[3];
+} _packed_;
+
+/* one sector is always 512 bytes, but it can consist of two nand pages */
+#define SM_SECTOR_SIZE 512
+
+/* oob area is also 16 bytes, but might be from two pages */
+#define SM_OOB_SIZE 16
+
+/* This is maximum zone size, and all devices that have more that one zone
+ have this size */
+#define SM_MAX_ZONE_SIZE 1024
+
+/* support for small page nand */
+#define SM_SMALL_PAGE 256
+#define SM_SMALL_OOB_SIZE 8
+
+int probe_smart_media(int mtd_fd, mtd_info_t *info);
diff --git a/src/udev/mtd_probe/probe_smartmedia.c b/src/udev/mtd_probe/probe_smartmedia.c
new file mode 100644
index 0000000..f8e1b14
--- /dev/null
+++ b/src/udev/mtd_probe/probe_smartmedia.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright © 2010 - Maxim Levitsky
+ *
+ * mtd_probe is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mtd_probe is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with mtd_probe; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <mtd/mtd-user.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "mtd_probe.h"
+
+static const uint8_t cis_signature[] = {
+ 0x01, 0x03, 0xD9, 0x01, 0xFF, 0x18, 0x02, 0xDF, 0x01, 0x20
+};
+
+int probe_smart_media(int mtd_fd, mtd_info_t* info) {
+ int sector_size;
+ int block_size;
+ int size_in_megs;
+ int spare_count;
+ _cleanup_free_ uint8_t *cis_buffer = NULL;
+ int offset;
+ int cis_found = 0;
+
+ cis_buffer = malloc(SM_SECTOR_SIZE);
+ if (!cis_buffer)
+ return log_oom();
+
+ if (info->type != MTD_NANDFLASH)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not marked MTD_NANDFLASH.");
+
+ sector_size = info->writesize;
+ block_size = info->erasesize;
+ size_in_megs = info->size / (1024 * 1024);
+
+ if (!IN_SET(sector_size, SM_SECTOR_SIZE, SM_SMALL_PAGE))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected sector size: %i", sector_size);
+
+ switch(size_in_megs) {
+ case 1:
+ case 2:
+ spare_count = 6;
+ break;
+ case 4:
+ spare_count = 12;
+ break;
+ default:
+ spare_count = 24;
+ break;
+ }
+
+ for (offset = 0; offset < block_size * spare_count; offset += sector_size) {
+ (void) lseek(mtd_fd, SEEK_SET, offset);
+
+ if (read(mtd_fd, cis_buffer, SM_SECTOR_SIZE) == SM_SECTOR_SIZE) {
+ cis_found = 1;
+ break;
+ }
+ }
+
+ if (!cis_found)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "CIS not found");
+
+ if (memcmp(cis_buffer, cis_signature, sizeof(cis_signature)) != 0 &&
+ memcmp(cis_buffer + SM_SMALL_PAGE, cis_signature, sizeof(cis_signature)) != 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "CIS signature didn't match");
+
+ printf("MTD_FTL=smartmedia\n");
+ return 0;
+}
diff --git a/src/udev/net/ethtool-util.c b/src/udev/net/ethtool-util.c
new file mode 100644
index 0000000..0dcec03
--- /dev/null
+++ b/src/udev/net/ethtool-util.c
@@ -0,0 +1,787 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+
+#include "conf-parser.h"
+#include "ethtool-util.h"
+#include "link-config.h"
+#include "log.h"
+#include "missing.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "strxcpyx.h"
+#include "util.h"
+
+static const char* const duplex_table[_DUP_MAX] = {
+ [DUP_FULL] = "full",
+ [DUP_HALF] = "half"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(duplex, Duplex);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_duplex, duplex, Duplex, "Failed to parse duplex setting");
+
+static const char* const wol_table[_WOL_MAX] = {
+ [WOL_PHY] = "phy",
+ [WOL_UCAST] = "unicast",
+ [WOL_MCAST] = "multicast",
+ [WOL_BCAST] = "broadcast",
+ [WOL_ARP] = "arp",
+ [WOL_MAGIC] = "magic",
+ [WOL_MAGICSECURE] = "secureon",
+ [WOL_OFF] = "off",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(wol, WakeOnLan);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_wol, wol, WakeOnLan, "Failed to parse WakeOnLan setting");
+
+static const char* const port_table[] = {
+ [NET_DEV_PORT_TP] = "tp",
+ [NET_DEV_PORT_AUI] = "aui",
+ [NET_DEV_PORT_MII] = "mii",
+ [NET_DEV_PORT_FIBRE] = "fibre",
+ [NET_DEV_PORT_BNC] = "bnc",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(port, NetDevPort);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_port, port, NetDevPort, "Failed to parse Port setting");
+
+static const char* const netdev_feature_table[_NET_DEV_FEAT_MAX] = {
+ [NET_DEV_FEAT_GSO] = "tx-generic-segmentation",
+ [NET_DEV_FEAT_GRO] = "rx-gro",
+ [NET_DEV_FEAT_LRO] = "rx-lro",
+ [NET_DEV_FEAT_TSO] = "tx-tcp-segmentation",
+ [NET_DEV_FEAT_TSO6] = "tx-tcp6-segmentation",
+};
+
+static const char* const ethtool_link_mode_bit_table[] = {
+ [ETHTOOL_LINK_MODE_10baseT_Half_BIT] = "10baset-half",
+ [ETHTOOL_LINK_MODE_10baseT_Full_BIT] = "10baset-full",
+ [ETHTOOL_LINK_MODE_100baseT_Half_BIT] = "100baset-half",
+ [ETHTOOL_LINK_MODE_100baseT_Full_BIT] = "100baset-full",
+ [ETHTOOL_LINK_MODE_1000baseT_Half_BIT] = "1000baset-half",
+ [ETHTOOL_LINK_MODE_1000baseT_Full_BIT] = "1000baset-full",
+ [ETHTOOL_LINK_MODE_Autoneg_BIT] = "autonegotiation",
+ [ETHTOOL_LINK_MODE_TP_BIT] = "tp",
+ [ETHTOOL_LINK_MODE_AUI_BIT] = "aui",
+ [ETHTOOL_LINK_MODE_MII_BIT] = "mii",
+ [ETHTOOL_LINK_MODE_FIBRE_BIT] = "fibre",
+ [ETHTOOL_LINK_MODE_BNC_BIT] = "bnc",
+ [ETHTOOL_LINK_MODE_10000baseT_Full_BIT] = "10000baset-full",
+ [ETHTOOL_LINK_MODE_Pause_BIT] = "pause",
+ [ETHTOOL_LINK_MODE_Asym_Pause_BIT] = "asym-pause",
+ [ETHTOOL_LINK_MODE_2500baseX_Full_BIT] = "2500basex-full",
+ [ETHTOOL_LINK_MODE_Backplane_BIT] = "backplane",
+ [ETHTOOL_LINK_MODE_1000baseKX_Full_BIT] = "1000basekx-full",
+ [ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT] = "10000basekx4-full",
+ [ETHTOOL_LINK_MODE_10000baseKR_Full_BIT] = "10000basekr-full",
+ [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = "10000baser-fec",
+ [ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT] = "20000basemld2-full",
+ [ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT] = "20000basekr2-full",
+ [ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT] = "40000basekr4-full",
+ [ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT] = "40000basecr4-full",
+ [ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT] = "40000basesr4-full",
+ [ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT] = "40000baselr4-full",
+ [ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT] = "56000basekr4-full",
+ [ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT] = "56000basecr4-full",
+ [ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT] = "56000basesr4-full",
+ [ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT] = "56000baselr4-full",
+ [ETHTOOL_LINK_MODE_25000baseCR_Full_BIT] = "25000basecr-full",
+ [ETHTOOL_LINK_MODE_25000baseKR_Full_BIT] = "25000basekr-full",
+ [ETHTOOL_LINK_MODE_25000baseSR_Full_BIT] = "25000basesr-full",
+ [ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT] = "50000basecr2-full",
+ [ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT] = "50000basekr2-full",
+ [ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT] = "100000basekr4-full",
+ [ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT] = "100000basesr4-full",
+ [ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT] = "100000basecr4-full",
+ [ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT] = "100000baselr4-er4-full",
+ [ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT] = "50000basesr2-full",
+ [ETHTOOL_LINK_MODE_1000baseX_Full_BIT] = "1000basex-full",
+ [ETHTOOL_LINK_MODE_10000baseCR_Full_BIT] = "10000basecr-full",
+ [ETHTOOL_LINK_MODE_10000baseSR_Full_BIT] = "10000basesr-full",
+ [ETHTOOL_LINK_MODE_10000baseLR_Full_BIT] = "10000baselr-full",
+ [ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT] = "10000baselrm-full",
+ [ETHTOOL_LINK_MODE_10000baseER_Full_BIT] = "10000baseer-full",
+ [ETHTOOL_LINK_MODE_2500baseT_Full_BIT] = "2500baset-full",
+ [ETHTOOL_LINK_MODE_5000baseT_Full_BIT] = "5000baset-full",
+ [ETHTOOL_LINK_MODE_FEC_NONE_BIT] = "fec-none",
+ [ETHTOOL_LINK_MODE_FEC_RS_BIT] = "fec-rs",
+ [ETHTOOL_LINK_MODE_FEC_BASER_BIT] = "fec-baser",
+};
+/* Make sure the array is large enough to fit all bits */
+assert_cc((ELEMENTSOF(ethtool_link_mode_bit_table)-1) / 32 < ELEMENTSOF(((struct link_config){}).advertise));
+
+DEFINE_STRING_TABLE_LOOKUP(ethtool_link_mode_bit, enum ethtool_link_mode_bit_indices);
+
+int ethtool_connect(int *ret) {
+ int fd;
+
+ assert_return(ret, -EINVAL);
+
+ fd = socket_ioctl_fd();
+ if (fd < 0)
+ return fd;
+
+ *ret = fd;
+
+ return 0;
+}
+
+int ethtool_get_driver(int *fd, const char *ifname, char **ret) {
+ struct ethtool_drvinfo ecmd = {
+ .cmd = ETHTOOL_GDRVINFO
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd
+ };
+ char *d;
+ int r;
+
+ if (*fd < 0) {
+ r = ethtool_connect(fd);
+ if (r < 0)
+ return log_warning_errno(r, "link_config: could not connect to ethtool: %m");
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ d = strdup(ecmd.driver);
+ if (!d)
+ return -ENOMEM;
+
+ *ret = d;
+ return 0;
+}
+
+int ethtool_set_speed(int *fd, const char *ifname, unsigned speed, Duplex duplex) {
+ struct ethtool_cmd ecmd = {
+ .cmd = ETHTOOL_GSET
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd
+ };
+ bool need_update = false;
+ int r;
+
+ if (speed == 0 && duplex == _DUP_INVALID)
+ return 0;
+
+ if (*fd < 0) {
+ r = ethtool_connect(fd);
+ if (r < 0)
+ return log_warning_errno(r, "link_config: could not connect to ethtool: %m");
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ethtool_cmd_speed(&ecmd) != speed) {
+ ethtool_cmd_speed_set(&ecmd, speed);
+ need_update = true;
+ }
+
+ switch (duplex) {
+ case DUP_HALF:
+ if (ecmd.duplex != DUPLEX_HALF) {
+ ecmd.duplex = DUPLEX_HALF;
+ need_update = true;
+ }
+ break;
+ case DUP_FULL:
+ if (ecmd.duplex != DUPLEX_FULL) {
+ ecmd.duplex = DUPLEX_FULL;
+ need_update = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SSET;
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int ethtool_set_wol(int *fd, const char *ifname, WakeOnLan wol) {
+ struct ethtool_wolinfo ecmd = {
+ .cmd = ETHTOOL_GWOL
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd
+ };
+ bool need_update = false;
+ int r;
+
+ if (wol == _WOL_INVALID)
+ return 0;
+
+ if (*fd < 0) {
+ r = ethtool_connect(fd);
+ if (r < 0)
+ return log_warning_errno(r, "link_config: could not connect to ethtool: %m");
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ switch (wol) {
+ case WOL_PHY:
+ if (ecmd.wolopts != WAKE_PHY) {
+ ecmd.wolopts = WAKE_PHY;
+ need_update = true;
+ }
+ break;
+ case WOL_UCAST:
+ if (ecmd.wolopts != WAKE_UCAST) {
+ ecmd.wolopts = WAKE_UCAST;
+ need_update = true;
+ }
+ break;
+ case WOL_MCAST:
+ if (ecmd.wolopts != WAKE_MCAST) {
+ ecmd.wolopts = WAKE_MCAST;
+ need_update = true;
+ }
+ break;
+ case WOL_BCAST:
+ if (ecmd.wolopts != WAKE_BCAST) {
+ ecmd.wolopts = WAKE_BCAST;
+ need_update = true;
+ }
+ break;
+ case WOL_ARP:
+ if (ecmd.wolopts != WAKE_ARP) {
+ ecmd.wolopts = WAKE_ARP;
+ need_update = true;
+ }
+ break;
+ case WOL_MAGIC:
+ if (ecmd.wolopts != WAKE_MAGIC) {
+ ecmd.wolopts = WAKE_MAGIC;
+ need_update = true;
+ }
+ break;
+ case WOL_MAGICSECURE:
+ if (ecmd.wolopts != WAKE_MAGICSECURE) {
+ ecmd.wolopts = WAKE_MAGICSECURE;
+ need_update = true;
+ }
+ break;
+ case WOL_OFF:
+ if (ecmd.wolopts != 0) {
+ ecmd.wolopts = 0;
+ need_update = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SWOL;
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int get_stringset(int fd, struct ifreq *ifr, int stringset_id, struct ethtool_gstrings **gstrings) {
+ _cleanup_free_ struct ethtool_gstrings *strings = NULL;
+ struct {
+ struct ethtool_sset_info info;
+ uint32_t space;
+ } buffer = {
+ .info = {
+ .cmd = ETHTOOL_GSSET_INFO,
+ .sset_mask = UINT64_C(1) << stringset_id,
+ },
+ };
+ unsigned len;
+ int r;
+
+ ifr->ifr_data = (void *) &buffer.info;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ if (!buffer.info.sset_mask)
+ return -EINVAL;
+
+ len = buffer.info.data[0];
+
+ strings = malloc0(sizeof(struct ethtool_gstrings) + len * ETH_GSTRING_LEN);
+ if (!strings)
+ return -ENOMEM;
+
+ strings->cmd = ETHTOOL_GSTRINGS;
+ strings->string_set = stringset_id;
+ strings->len = len;
+
+ ifr->ifr_data = (void *) strings;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ *gstrings = TAKE_PTR(strings);
+
+ return 0;
+}
+
+static int find_feature_index(struct ethtool_gstrings *strings, const char *feature) {
+ unsigned i;
+
+ for (i = 0; i < strings->len; i++) {
+ if (streq((char *) &strings->data[i * ETH_GSTRING_LEN], feature))
+ return i;
+ }
+
+ return -1;
+}
+
+int ethtool_set_features(int *fd, const char *ifname, int *features) {
+ _cleanup_free_ struct ethtool_gstrings *strings = NULL;
+ struct ethtool_sfeatures *sfeatures;
+ int block, bit, i, r;
+ struct ifreq ifr = {};
+
+ if (*fd < 0) {
+ r = ethtool_connect(fd);
+ if (r < 0)
+ return log_warning_errno(r, "link_config: could not connect to ethtool: %m");
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = get_stringset(*fd, &ifr, ETH_SS_FEATURES, &strings);
+ if (r < 0)
+ return log_warning_errno(r, "link_config: could not get ethtool features for %s", ifname);
+
+ sfeatures = alloca0(sizeof(struct ethtool_sfeatures) + DIV_ROUND_UP(strings->len, 32U) * sizeof(sfeatures->features[0]));
+ sfeatures->cmd = ETHTOOL_SFEATURES;
+ sfeatures->size = DIV_ROUND_UP(strings->len, 32U);
+
+ for (i = 0; i < _NET_DEV_FEAT_MAX; i++) {
+
+ if (features[i] != -1) {
+
+ r = find_feature_index(strings, netdev_feature_table[i]);
+ if (r < 0) {
+ log_warning_errno(r, "link_config: could not find feature: %s", netdev_feature_table[i]);
+ continue;
+ }
+
+ block = r / 32;
+ bit = r % 32;
+
+ sfeatures->features[block].valid |= 1 << bit;
+
+ if (features[i])
+ sfeatures->features[block].requested |= 1 << bit;
+ else
+ sfeatures->features[block].requested &= ~(1 << bit);
+ }
+ }
+
+ ifr.ifr_data = (void *) sfeatures;
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return log_warning_errno(r, "link_config: could not set ethtool features for %s", ifname);
+
+ return 0;
+}
+
+static int get_glinksettings(int fd, struct ifreq *ifr, struct ethtool_link_usettings **g) {
+ struct ecmd {
+ struct ethtool_link_settings req;
+ __u32 link_mode_data[3 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ } ecmd = {
+ .req.cmd = ETHTOOL_GLINKSETTINGS,
+ };
+ struct ethtool_link_usettings *u;
+ unsigned offset;
+ int r;
+
+ /* The interaction user/kernel via the new API requires a small ETHTOOL_GLINKSETTINGS
+ handshake first to agree on the length of the link mode bitmaps. If kernel doesn't
+ agree with user, it returns the bitmap length it is expecting from user as a negative
+ length (and cmd field is 0). When kernel and user agree, kernel returns valid info in
+ all fields (ie. link mode length > 0 and cmd is ETHTOOL_GLINKSETTINGS). Based on
+ https://github.com/torvalds/linux/commit/3f1ac7a700d039c61d8d8b99f28d605d489a60cf
+ */
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ecmd.req.link_mode_masks_nwords >= 0 || ecmd.req.cmd != ETHTOOL_GLINKSETTINGS)
+ return -EOPNOTSUPP;
+
+ ecmd.req.link_mode_masks_nwords = -ecmd.req.link_mode_masks_nwords;
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ecmd.req.link_mode_masks_nwords <= 0 || ecmd.req.cmd != ETHTOOL_GLINKSETTINGS)
+ return -EOPNOTSUPP;
+
+ u = new0(struct ethtool_link_usettings , 1);
+ if (!u)
+ return -ENOMEM;
+
+ u->base = ecmd.req;
+
+ offset = 0;
+ memcpy(u->link_modes.supported, &ecmd.link_mode_data[offset], 4 * ecmd.req.link_mode_masks_nwords);
+
+ offset += ecmd.req.link_mode_masks_nwords;
+ memcpy(u->link_modes.advertising, &ecmd.link_mode_data[offset], 4 * ecmd.req.link_mode_masks_nwords);
+
+ offset += ecmd.req.link_mode_masks_nwords;
+ memcpy(u->link_modes.lp_advertising, &ecmd.link_mode_data[offset], 4 * ecmd.req.link_mode_masks_nwords);
+
+ *g = u;
+
+ return 0;
+}
+
+static int get_gset(int fd, struct ifreq *ifr, struct ethtool_link_usettings **u) {
+ struct ethtool_link_usettings *e;
+ struct ethtool_cmd ecmd = {
+ .cmd = ETHTOOL_GSET,
+ };
+ int r;
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ e = new0(struct ethtool_link_usettings, 1);
+ if (!e)
+ return -ENOMEM;
+
+ e->base.cmd = ETHTOOL_GSET;
+
+ e->base.link_mode_masks_nwords = 1;
+ e->base.speed = ethtool_cmd_speed(&ecmd);
+ e->base.duplex = ecmd.duplex;
+ e->base.port = ecmd.port;
+ e->base.phy_address = ecmd.phy_address;
+ e->base.autoneg = ecmd.autoneg;
+ e->base.mdio_support = ecmd.mdio_support;
+
+ e->link_modes.supported[0] = ecmd.supported;
+ e->link_modes.advertising[0] = ecmd.advertising;
+ e->link_modes.lp_advertising[0] = ecmd.lp_advertising;
+
+ *u = e;
+
+ return 0;
+}
+
+static int set_slinksettings(int fd, struct ifreq *ifr, const struct ethtool_link_usettings *u) {
+ struct {
+ struct ethtool_link_settings req;
+ __u32 link_mode_data[3 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ } ecmd = {};
+ unsigned offset;
+ int r;
+
+ if (u->base.cmd != ETHTOOL_GLINKSETTINGS || u->base.link_mode_masks_nwords <= 0)
+ return -EINVAL;
+
+ ecmd.req = u->base;
+ ecmd.req.cmd = ETHTOOL_SLINKSETTINGS;
+ offset = 0;
+ memcpy(&ecmd.link_mode_data[offset], u->link_modes.supported, 4 * ecmd.req.link_mode_masks_nwords);
+
+ offset += ecmd.req.link_mode_masks_nwords;
+ memcpy(&ecmd.link_mode_data[offset], u->link_modes.advertising, 4 * ecmd.req.link_mode_masks_nwords);
+
+ offset += ecmd.req.link_mode_masks_nwords;
+ memcpy(&ecmd.link_mode_data[offset], u->link_modes.lp_advertising, 4 * ecmd.req.link_mode_masks_nwords);
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int set_sset(int fd, struct ifreq *ifr, const struct ethtool_link_usettings *u) {
+ struct ethtool_cmd ecmd = {
+ .cmd = ETHTOOL_SSET,
+ };
+ int r;
+
+ if (u->base.cmd != ETHTOOL_GSET || u->base.link_mode_masks_nwords <= 0)
+ return -EINVAL;
+
+ ecmd.supported = u->link_modes.supported[0];
+ ecmd.advertising = u->link_modes.advertising[0];
+ ecmd.lp_advertising = u->link_modes.lp_advertising[0];
+
+ ethtool_cmd_speed_set(&ecmd, u->base.speed);
+
+ ecmd.duplex = u->base.duplex;
+ ecmd.port = u->base.port;
+ ecmd.phy_address = u->base.phy_address;
+ ecmd.autoneg = u->base.autoneg;
+ ecmd.mdio_support = u->base.mdio_support;
+ ecmd.eth_tp_mdix = u->base.eth_tp_mdix;
+ ecmd.eth_tp_mdix_ctrl = u->base.eth_tp_mdix_ctrl;
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+/* If autonegotiation is disabled, the speed and duplex represent the fixed link
+ * mode and are writable if the driver supports multiple link modes. If it is
+ * enabled then they are read-only. If the link is up they represent the negotiated
+ * link mode; if the link is down, the speed is 0, %SPEED_UNKNOWN or the highest
+ * enabled speed and @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode.
+ */
+int ethtool_set_glinksettings(int *fd, const char *ifname, struct link_config *link) {
+ _cleanup_free_ struct ethtool_link_usettings *u = NULL;
+ struct ifreq ifr = {};
+ int r;
+
+ if (link->autonegotiation != AUTONEG_DISABLE && eqzero(link->advertise)) {
+ log_info("link_config: autonegotiation is unset or enabled, the speed and duplex are not writable.");
+ return 0;
+ }
+
+ if (*fd < 0) {
+ r = ethtool_connect(fd);
+ if (r < 0)
+ return log_warning_errno(r, "link_config: could not connect to ethtool: %m");
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = get_glinksettings(*fd, &ifr, &u);
+ if (r < 0) {
+ r = get_gset(*fd, &ifr, &u);
+ if (r < 0)
+ return log_warning_errno(r, "link_config: Cannot get device settings for %s : %m", ifname);
+ }
+
+ if (link->speed)
+ u->base.speed = DIV_ROUND_UP(link->speed, 1000000);
+
+ if (link->duplex != _DUP_INVALID)
+ u->base.duplex = link->duplex;
+
+ if (link->port != _NET_DEV_PORT_INVALID)
+ u->base.port = link->port;
+
+ if (link->autonegotiation >= 0)
+ u->base.autoneg = link->autonegotiation;
+
+ if (!eqzero(link->advertise)) {
+ u->base.autoneg = AUTONEG_ENABLE;
+ memcpy(&u->link_modes.advertising, link->advertise, sizeof(link->advertise));
+ memzero((uint8_t*) &u->link_modes.advertising + sizeof(link->advertise),
+ ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NBYTES - sizeof(link->advertise));
+ }
+
+ if (u->base.cmd == ETHTOOL_GLINKSETTINGS)
+ r = set_slinksettings(*fd, &ifr, u);
+ else
+ r = set_sset(*fd, &ifr, u);
+ if (r < 0)
+ return log_warning_errno(r, "link_config: Cannot set device settings for %s : %m", ifname);
+
+ return r;
+}
+
+int config_parse_channel(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ link_config *config = data;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse channel value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (k < 1) {
+ log_syntax(unit, LOG_ERR, filename, line, -EINVAL, "Invalid %s value, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "RxChannels")) {
+ config->channels.rx_count = k;
+ config->channels.rx_count_set = true;
+ } else if (streq(lvalue, "TxChannels")) {
+ config->channels.tx_count = k;
+ config->channels.tx_count_set = true;
+ } else if (streq(lvalue, "OtherChannels")) {
+ config->channels.other_count = k;
+ config->channels.other_count_set = true;
+ } else if (streq(lvalue, "CombinedChannels")) {
+ config->channels.combined_count = k;
+ config->channels.combined_count_set = true;
+ }
+
+ return 0;
+}
+
+int ethtool_set_channels(int *fd, const char *ifname, netdev_channels *channels) {
+ struct ethtool_channels ecmd = {
+ .cmd = ETHTOOL_GCHANNELS
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd
+ };
+
+ bool need_update = false;
+ int r;
+
+ if (*fd < 0) {
+ r = ethtool_connect(fd);
+ if (r < 0)
+ return log_warning_errno(r, "link_config: could not connect to ethtool: %m");
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (channels->rx_count_set && ecmd.rx_count != channels->rx_count) {
+ ecmd.rx_count = channels->rx_count;
+ need_update = true;
+ }
+
+ if (channels->tx_count_set && ecmd.tx_count != channels->tx_count) {
+ ecmd.tx_count = channels->tx_count;
+ need_update = true;
+ }
+
+ if (channels->other_count_set && ecmd.other_count != channels->other_count) {
+ ecmd.other_count = channels->other_count;
+ need_update = true;
+ }
+
+ if (channels->combined_count_set && ecmd.combined_count != channels->combined_count) {
+ ecmd.combined_count = channels->combined_count;
+ need_update = true;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SCHANNELS;
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int config_parse_advertise(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ link_config *config = data;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty string resets the value. */
+ zero(config->advertise);
+ return 0;
+ }
+
+ for (p = rvalue;;) {
+ _cleanup_free_ char *w = NULL;
+ enum ethtool_link_mode_bit_indices mode;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to split advertise modes '%s', ignoring: %m", rvalue);
+ break;
+ }
+ if (r == 0)
+ break;
+
+ mode = ethtool_link_mode_bit_from_string(w);
+ if (mode < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse advertise mode, ignoring: %s", w);
+ continue;
+ }
+
+ config->advertise[mode / 32] |= 1UL << (mode % 32);
+ }
+
+ return 0;
+}
diff --git a/src/udev/net/ethtool-util.h b/src/udev/net/ethtool-util.h
new file mode 100644
index 0000000..618b26b
--- /dev/null
+++ b/src/udev/net/ethtool-util.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <macro.h>
+#include <linux/ethtool.h>
+
+#include "conf-parser.h"
+#include "missing_network.h"
+
+struct link_config;
+
+/* we can't use DUPLEX_ prefix, as it
+ * clashes with <linux/ethtool.h> */
+typedef enum Duplex {
+ DUP_HALF = DUPLEX_HALF,
+ DUP_FULL = DUPLEX_FULL,
+ _DUP_MAX,
+ _DUP_INVALID = -1
+} Duplex;
+
+typedef enum WakeOnLan {
+ WOL_PHY,
+ WOL_UCAST,
+ WOL_MCAST,
+ WOL_BCAST,
+ WOL_ARP,
+ WOL_MAGIC,
+ WOL_MAGICSECURE,
+ WOL_OFF,
+ _WOL_MAX,
+ _WOL_INVALID = -1
+} WakeOnLan;
+
+typedef enum NetDevFeature {
+ NET_DEV_FEAT_GSO,
+ NET_DEV_FEAT_GRO,
+ NET_DEV_FEAT_LRO,
+ NET_DEV_FEAT_TSO,
+ NET_DEV_FEAT_TSO6,
+ _NET_DEV_FEAT_MAX,
+ _NET_DEV_FEAT_INVALID = -1
+} NetDevFeature;
+
+typedef enum NetDevPort {
+ NET_DEV_PORT_TP = PORT_TP,
+ NET_DEV_PORT_AUI = PORT_AUI,
+ NET_DEV_PORT_MII = PORT_MII,
+ NET_DEV_PORT_FIBRE = PORT_FIBRE,
+ NET_DEV_PORT_BNC = PORT_BNC,
+ NET_DEV_PORT_DA = PORT_DA,
+ NET_DEV_PORT_NONE = PORT_NONE,
+ NET_DEV_PORT_OTHER = PORT_OTHER,
+ _NET_DEV_PORT_MAX,
+ _NET_DEV_PORT_INVALID = -1
+} NetDevPort;
+
+#define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32 (SCHAR_MAX)
+#define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NBYTES (4 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32)
+
+/* layout of the struct passed from/to userland */
+struct ethtool_link_usettings {
+ struct ethtool_link_settings base;
+
+ struct {
+ uint32_t supported[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ uint32_t advertising[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ uint32_t lp_advertising[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ } link_modes;
+};
+
+typedef struct netdev_channels {
+ uint32_t rx_count;
+ uint32_t tx_count;
+ uint32_t other_count;
+ uint32_t combined_count;
+
+ bool rx_count_set;
+ bool tx_count_set;
+ bool other_count_set;
+ bool combined_count_set;
+} netdev_channels;
+
+int ethtool_connect(int *ret);
+
+int ethtool_get_driver(int *fd, const char *ifname, char **ret);
+int ethtool_set_speed(int *fd, const char *ifname, unsigned speed, Duplex duplex);
+int ethtool_set_wol(int *fd, const char *ifname, WakeOnLan wol);
+int ethtool_set_features(int *fd, const char *ifname, int *features);
+int ethtool_set_glinksettings(int *fd, const char *ifname, struct link_config *link);
+int ethtool_set_channels(int *fd, const char *ifname, netdev_channels *channels);
+
+const char *duplex_to_string(Duplex d) _const_;
+Duplex duplex_from_string(const char *d) _pure_;
+
+const char *wol_to_string(WakeOnLan wol) _const_;
+WakeOnLan wol_from_string(const char *wol) _pure_;
+
+const char *port_to_string(NetDevPort port) _const_;
+NetDevPort port_from_string(const char *port) _pure_;
+
+const char *ethtool_link_mode_bit_to_string(enum ethtool_link_mode_bit_indices val) _const_;
+enum ethtool_link_mode_bit_indices ethtool_link_mode_bit_from_string(const char *str) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_duplex);
+CONFIG_PARSER_PROTOTYPE(config_parse_wol);
+CONFIG_PARSER_PROTOTYPE(config_parse_port);
+CONFIG_PARSER_PROTOTYPE(config_parse_channel);
+CONFIG_PARSER_PROTOTYPE(config_parse_advertise);
diff --git a/src/udev/net/link-config-gperf.gperf b/src/udev/net/link-config-gperf.gperf
new file mode 100644
index 0000000..2bc18bf
--- /dev/null
+++ b/src/udev/net/link-config-gperf.gperf
@@ -0,0 +1,54 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "ethtool-util.h"
+#include "link-config.h"
+#include "network-internal.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name link_config_gperf_hash
+%define lookup-function-name link_config_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Match.MACAddress, config_parse_hwaddrs, 0, offsetof(link_config, match_mac)
+Match.OriginalName, config_parse_ifnames, 0, offsetof(link_config, match_name)
+Match.Path, config_parse_strv, 0, offsetof(link_config, match_path)
+Match.Driver, config_parse_strv, 0, offsetof(link_config, match_driver)
+Match.Type, config_parse_strv, 0, offsetof(link_config, match_type)
+Match.Host, config_parse_net_condition, CONDITION_HOST, offsetof(link_config, match_host)
+Match.Virtualization, config_parse_net_condition, CONDITION_VIRTUALIZATION, offsetof(link_config, match_virt)
+Match.KernelCommandLine, config_parse_net_condition, CONDITION_KERNEL_COMMAND_LINE, offsetof(link_config, match_kernel_cmdline)
+Match.KernelVersion, config_parse_net_condition, CONDITION_KERNEL_VERSION, offsetof(link_config, match_kernel_version)
+Match.Architecture, config_parse_net_condition, CONDITION_ARCHITECTURE, offsetof(link_config, match_arch)
+Link.Description, config_parse_string, 0, offsetof(link_config, description)
+Link.MACAddressPolicy, config_parse_mac_policy, 0, offsetof(link_config, mac_policy)
+Link.MACAddress, config_parse_hwaddr, 0, offsetof(link_config, mac)
+Link.NamePolicy, config_parse_name_policy, 0, offsetof(link_config, name_policy)
+Link.Name, config_parse_ifname, 0, offsetof(link_config, name)
+Link.Alias, config_parse_ifalias, 0, offsetof(link_config, alias)
+Link.MTUBytes, config_parse_mtu, AF_UNSPEC, offsetof(link_config, mtu)
+Link.BitsPerSecond, config_parse_si_size, 0, offsetof(link_config, speed)
+Link.Duplex, config_parse_duplex, 0, offsetof(link_config, duplex)
+Link.AutoNegotiation, config_parse_tristate, 0, offsetof(link_config, autonegotiation)
+Link.WakeOnLan, config_parse_wol, 0, offsetof(link_config, wol)
+Link.Port, config_parse_port, 0, offsetof(link_config, port)
+Link.GenericSegmentationOffload, config_parse_tristate, 0, offsetof(link_config, features[NET_DEV_FEAT_GSO])
+Link.TCPSegmentationOffload, config_parse_tristate, 0, offsetof(link_config, features[NET_DEV_FEAT_TSO])
+Link.TCP6SegmentationOffload, config_parse_tristate, 0, offsetof(link_config, features[NET_DEV_FEAT_TSO6])
+Link.UDPSegmentationOffload, config_parse_warn_compat, DISABLED_LEGACY, 0
+Link.GenericReceiveOffload, config_parse_tristate, 0, offsetof(link_config, features[NET_DEV_FEAT_GRO])
+Link.LargeReceiveOffload, config_parse_tristate, 0, offsetof(link_config, features[NET_DEV_FEAT_LRO])
+Link.RxChannels, config_parse_channel, 0, 0
+Link.TxChannels, config_parse_channel, 0, 0
+Link.OtherChannels, config_parse_channel, 0, 0
+Link.CombinedChannels, config_parse_channel, 0, 0
+Link.Advertise, config_parse_advertise, 0, 0
diff --git a/src/udev/net/link-config.c b/src/udev/net/link-config.c
new file mode 100644
index 0000000..eb2477c
--- /dev/null
+++ b/src/udev/net/link-config.c
@@ -0,0 +1,535 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <netinet/ether.h>
+
+#include "sd-device.h"
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "device-util.h"
+#include "ethtool-util.h"
+#include "fd-util.h"
+#include "link-config.h"
+#include "log.h"
+#include "missing_network.h"
+#include "naming-scheme.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "random-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+struct link_config_ctx {
+ LIST_HEAD(link_config, links);
+
+ int ethtool_fd;
+
+ bool enable_name_policy;
+
+ sd_netlink *rtnl;
+
+ usec_t link_dirs_ts_usec;
+};
+
+static const char* const link_dirs[] = {
+ "/etc/systemd/network",
+ "/run/systemd/network",
+ "/usr/lib/systemd/network",
+#if HAVE_SPLIT_USR
+ "/lib/systemd/network",
+#endif
+ NULL};
+
+static void link_config_free(link_config *link) {
+ if (!link)
+ return;
+
+ free(link->filename);
+
+ set_free_free(link->match_mac);
+ strv_free(link->match_path);
+ strv_free(link->match_driver);
+ strv_free(link->match_type);
+ free(link->match_name);
+ free(link->match_host);
+ free(link->match_virt);
+ free(link->match_kernel_cmdline);
+ free(link->match_kernel_version);
+ free(link->match_arch);
+
+ free(link->description);
+ free(link->mac);
+ free(link->name_policy);
+ free(link->name);
+ free(link->alias);
+
+ free(link);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(link_config*, link_config_free);
+
+static void link_configs_free(link_config_ctx *ctx) {
+ link_config *link, *link_next;
+
+ if (!ctx)
+ return;
+
+ LIST_FOREACH_SAFE(links, link, link_next, ctx->links)
+ link_config_free(link);
+}
+
+void link_config_ctx_free(link_config_ctx *ctx) {
+ if (!ctx)
+ return;
+
+ safe_close(ctx->ethtool_fd);
+
+ sd_netlink_unref(ctx->rtnl);
+
+ link_configs_free(ctx);
+
+ free(ctx);
+
+ return;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(link_config_ctx*, link_config_ctx_free);
+
+int link_config_ctx_new(link_config_ctx **ret) {
+ _cleanup_(link_config_ctx_freep) link_config_ctx *ctx = NULL;
+
+ if (!ret)
+ return -EINVAL;
+
+ ctx = new0(link_config_ctx, 1);
+ if (!ctx)
+ return -ENOMEM;
+
+ LIST_HEAD_INIT(ctx->links);
+
+ ctx->ethtool_fd = -1;
+
+ ctx->enable_name_policy = true;
+
+ *ret = TAKE_PTR(ctx);
+
+ return 0;
+}
+
+static int load_link(link_config_ctx *ctx, const char *filename) {
+ _cleanup_(link_config_freep) link_config *link = NULL;
+ _cleanup_fclose_ FILE *file = NULL;
+ int i;
+ int r;
+
+ assert(ctx);
+ assert(filename);
+
+ file = fopen(filename, "re");
+ if (!file) {
+ if (errno == ENOENT)
+ return 0;
+ else
+ return -errno;
+ }
+
+ if (null_or_empty_fd(fileno(file))) {
+ log_debug("Skipping empty file: %s", filename);
+ return 0;
+ }
+
+ link = new0(link_config, 1);
+ if (!link)
+ return log_oom();
+
+ link->mac_policy = _MACPOLICY_INVALID;
+ link->wol = _WOL_INVALID;
+ link->duplex = _DUP_INVALID;
+ link->port = _NET_DEV_PORT_INVALID;
+ link->autonegotiation = -1;
+
+ for (i = 0; i < (int)ELEMENTSOF(link->features); i++)
+ link->features[i] = -1;
+
+ r = config_parse(NULL, filename, file,
+ "Match\0Link\0Ethernet\0",
+ config_item_perf_lookup, link_config_gperf_lookup,
+ CONFIG_PARSE_WARN, link);
+ if (r < 0)
+ return r;
+ else
+ log_debug("Parsed configuration file %s", filename);
+
+ if (link->speed > UINT_MAX)
+ return -ERANGE;
+
+ link->filename = strdup(filename);
+ if (!link->filename)
+ return log_oom();
+
+ LIST_PREPEND(links, ctx->links, link);
+ link = NULL;
+
+ return 0;
+}
+
+static bool enable_name_policy(void) {
+ bool b;
+
+ return proc_cmdline_get_bool("net.ifnames", &b) <= 0 || b;
+}
+
+static int link_name_type(sd_device *device, unsigned *type) {
+ const char *s;
+ int r;
+
+ r = sd_device_get_sysattr_value(device, "name_assign_type", &s);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "Failed to query name_assign_type: %m");
+
+ r = safe_atou(s, type);
+ if (r < 0)
+ return log_device_warning_errno(device, r, "Failed to parse name_assign_type \"%s\": %m", s);
+
+ log_device_debug(device, "Device has name_assign_type=%d", *type);
+ return 0;
+}
+
+int link_config_load(link_config_ctx *ctx) {
+ _cleanup_strv_free_ char **files;
+ char **f;
+ int r;
+
+ link_configs_free(ctx);
+
+ if (!enable_name_policy()) {
+ ctx->enable_name_policy = false;
+ log_info("Network interface NamePolicy= disabled on kernel command line, ignoring.");
+ }
+
+ /* update timestamp */
+ paths_check_timestamp(link_dirs, &ctx->link_dirs_ts_usec, true);
+
+ r = conf_files_list_strv(&files, ".link", NULL, 0, link_dirs);
+ if (r < 0)
+ return log_error_errno(r, "failed to enumerate link files: %m");
+
+ STRV_FOREACH_BACKWARDS(f, files) {
+ r = load_link(ctx, *f);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+bool link_config_should_reload(link_config_ctx *ctx) {
+ return paths_check_timestamp(link_dirs, &ctx->link_dirs_ts_usec, false);
+}
+
+int link_config_get(link_config_ctx *ctx, sd_device *device, link_config **ret) {
+ link_config *link;
+
+ assert(ctx);
+ assert(device);
+ assert(ret);
+
+ LIST_FOREACH(links, link, ctx->links) {
+ const char *address = NULL, *id_path = NULL, *parent_driver = NULL, *id_net_driver = NULL, *devtype = NULL, *sysname = NULL;
+ sd_device *parent;
+
+ (void) sd_device_get_sysattr_value(device, "address", &address);
+ (void) sd_device_get_property_value(device, "ID_PATH", &id_path);
+ if (sd_device_get_parent(device, &parent) >= 0)
+ (void) sd_device_get_driver(parent, &parent_driver);
+ (void) sd_device_get_property_value(device, "ID_NET_DRIVER", &id_net_driver);
+ (void) sd_device_get_devtype(device, &devtype);
+ (void) sd_device_get_sysname(device, &sysname);
+
+ if (net_match_config(link->match_mac, link->match_path, link->match_driver,
+ link->match_type, link->match_name, link->match_host,
+ link->match_virt, link->match_kernel_cmdline,
+ link->match_kernel_version, link->match_arch,
+ address ? ether_aton(address) : NULL,
+ id_path,
+ parent_driver,
+ id_net_driver,
+ devtype,
+ sysname)) {
+ if (link->match_name) {
+ unsigned char name_assign_type = NET_NAME_UNKNOWN;
+ const char *attr_value;
+
+ if (sd_device_get_sysattr_value(device, "name_assign_type", &attr_value) >= 0)
+ (void) safe_atou8(attr_value, &name_assign_type);
+
+ if (name_assign_type == NET_NAME_ENUM) {
+ log_warning("Config file %s applies to device based on potentially unpredictable interface name '%s'",
+ link->filename, sysname);
+ *ret = link;
+
+ return 0;
+ } else if (name_assign_type == NET_NAME_RENAMED) {
+ log_warning("Config file %s matches device based on renamed interface name '%s', ignoring",
+ link->filename, sysname);
+
+ continue;
+ }
+ }
+
+ log_debug("Config file %s applies to device %s",
+ link->filename, sysname);
+
+ *ret = link;
+ return 0;
+ }
+ }
+
+ *ret = NULL;
+ return -ENOENT;
+}
+
+static bool mac_is_random(sd_device *device) {
+ const char *s;
+ unsigned type;
+ int r;
+
+ /* if we can't get the assign type, assume it is not random */
+ if (sd_device_get_sysattr_value(device, "addr_assign_type", &s) < 0)
+ return false;
+
+ r = safe_atou(s, &type);
+ if (r < 0)
+ return false;
+
+ return type == NET_ADDR_RANDOM;
+}
+
+static int get_mac(sd_device *device, bool want_random,
+ struct ether_addr *mac) {
+ int r;
+
+ if (want_random)
+ random_bytes(mac->ether_addr_octet, ETH_ALEN);
+ else {
+ uint64_t result;
+
+ r = net_get_unique_predictable_data(device, &result);
+ if (r < 0)
+ return r;
+
+ assert_cc(ETH_ALEN <= sizeof(result));
+ memcpy(mac->ether_addr_octet, &result, ETH_ALEN);
+ }
+
+ /* see eth_random_addr in the kernel */
+ mac->ether_addr_octet[0] &= 0xfe; /* clear multicast bit */
+ mac->ether_addr_octet[0] |= 0x02; /* set local assignment bit (IEEE802) */
+
+ return 0;
+}
+
+int link_config_apply(link_config_ctx *ctx, link_config *config,
+ sd_device *device, const char **name) {
+ struct ether_addr generated_mac;
+ struct ether_addr *mac = NULL;
+ const char *new_name = NULL;
+ const char *old_name;
+ unsigned speed, name_type = NET_NAME_UNKNOWN;
+ NamePolicy policy;
+ int r, ifindex;
+
+ assert(ctx);
+ assert(config);
+ assert(device);
+ assert(name);
+
+ r = sd_device_get_sysname(device, &old_name);
+ if (r < 0)
+ return r;
+
+ r = ethtool_set_glinksettings(&ctx->ethtool_fd, old_name, config);
+ if (r < 0) {
+
+ if (config->port != _NET_DEV_PORT_INVALID)
+ log_warning_errno(r, "Could not set port (%s) of %s: %m", port_to_string(config->port), old_name);
+
+ if (!eqzero(config->advertise))
+ log_warning_errno(r, "Could not set advertise mode: %m"); /* TODO: include modes in the log message. */
+
+ if (config->speed) {
+ speed = DIV_ROUND_UP(config->speed, 1000000);
+ if (r == -EOPNOTSUPP) {
+ r = ethtool_set_speed(&ctx->ethtool_fd, old_name, speed, config->duplex);
+ if (r < 0)
+ log_warning_errno(r, "Could not set speed of %s to %u Mbps: %m", old_name, speed);
+ }
+ }
+
+ if (config->duplex !=_DUP_INVALID)
+ log_warning_errno(r, "Could not set duplex of %s to (%s): %m", old_name, duplex_to_string(config->duplex));
+ }
+
+ r = ethtool_set_wol(&ctx->ethtool_fd, old_name, config->wol);
+ if (r < 0)
+ log_warning_errno(r, "Could not set WakeOnLan of %s to %s: %m",
+ old_name, wol_to_string(config->wol));
+
+ r = ethtool_set_features(&ctx->ethtool_fd, old_name, config->features);
+ if (r < 0)
+ log_warning_errno(r, "Could not set offload features of %s: %m", old_name);
+
+ if (config->channels.rx_count_set || config->channels.tx_count_set || config->channels.other_count_set || config->channels.combined_count_set) {
+ r = ethtool_set_channels(&ctx->ethtool_fd, old_name, &config->channels);
+ if (r < 0)
+ log_warning_errno(r, "Could not set channels of %s: %m", old_name);
+ }
+
+ r = sd_device_get_ifindex(device, &ifindex);
+ if (r < 0)
+ return log_device_warning_errno(device, r, "Could not find ifindex: %m");
+
+
+ (void) link_name_type(device, &name_type);
+
+ if (IN_SET(name_type, NET_NAME_USER, NET_NAME_RENAMED)
+ && !naming_scheme_has(NAMING_ALLOW_RERENAMES)) {
+ log_device_debug(device, "Device already has a name given by userspace, not renaming.");
+ goto no_rename;
+ }
+
+ if (ctx->enable_name_policy && config->name_policy)
+ for (NamePolicy *p = config->name_policy; !new_name && *p != _NAMEPOLICY_INVALID; p++) {
+ policy = *p;
+
+ switch (policy) {
+ case NAMEPOLICY_KERNEL:
+ if (name_type != NET_NAME_PREDICTABLE)
+ continue;
+
+ /* The kernel claims to have given a predictable name, keep it. */
+ log_device_debug(device, "Policy *%s*: keeping predictable kernel name",
+ name_policy_to_string(policy));
+ goto no_rename;
+ case NAMEPOLICY_KEEP:
+ if (!IN_SET(name_type, NET_NAME_USER, NET_NAME_RENAMED))
+ continue;
+
+ log_device_debug(device, "Policy *%s*: keeping existing userspace name",
+ name_policy_to_string(policy));
+ goto no_rename;
+ case NAMEPOLICY_DATABASE:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_FROM_DATABASE", &new_name);
+ break;
+ case NAMEPOLICY_ONBOARD:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_ONBOARD", &new_name);
+ break;
+ case NAMEPOLICY_SLOT:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_SLOT", &new_name);
+ break;
+ case NAMEPOLICY_PATH:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_PATH", &new_name);
+ break;
+ case NAMEPOLICY_MAC:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_MAC", &new_name);
+ break;
+ default:
+ assert_not_reached("invalid policy");
+ }
+ }
+
+ if (new_name)
+ log_device_debug(device, "Policy *%s* yields \"%s\".", name_policy_to_string(policy), new_name);
+ else if (config->name) {
+ new_name = config->name;
+ log_device_debug(device, "Policies didn't yield a name, using specified Name=%s.", new_name);
+ } else
+ log_device_debug(device, "Policies didn't yield a name and Name= is not given, not renaming.");
+ no_rename:
+
+ switch (config->mac_policy) {
+ case MACPOLICY_PERSISTENT:
+ if (mac_is_random(device)) {
+ r = get_mac(device, false, &generated_mac);
+ if (r == -ENOENT) {
+ log_warning_errno(r, "Could not generate persistent MAC address for %s: %m", old_name);
+ break;
+ } else if (r < 0)
+ return r;
+ mac = &generated_mac;
+ }
+ break;
+ case MACPOLICY_RANDOM:
+ if (!mac_is_random(device)) {
+ r = get_mac(device, true, &generated_mac);
+ if (r == -ENOENT) {
+ log_warning_errno(r, "Could not generate random MAC address for %s: %m", old_name);
+ break;
+ } else if (r < 0)
+ return r;
+ mac = &generated_mac;
+ }
+ break;
+ case MACPOLICY_NONE:
+ default:
+ mac = config->mac;
+ }
+
+ r = rtnl_set_link_properties(&ctx->rtnl, ifindex, config->alias, mac, config->mtu);
+ if (r < 0)
+ return log_warning_errno(r, "Could not set Alias=, MACAddress= or MTU= on %s: %m", old_name);
+
+ *name = new_name;
+
+ return 0;
+}
+
+int link_get_driver(link_config_ctx *ctx, sd_device *device, char **ret) {
+ const char *name;
+ char *driver = NULL;
+ int r;
+
+ r = sd_device_get_sysname(device, &name);
+ if (r < 0)
+ return r;
+
+ r = ethtool_get_driver(&ctx->ethtool_fd, name, &driver);
+ if (r < 0)
+ return r;
+
+ *ret = driver;
+ return 0;
+}
+
+static const char* const mac_policy_table[_MACPOLICY_MAX] = {
+ [MACPOLICY_PERSISTENT] = "persistent",
+ [MACPOLICY_RANDOM] = "random",
+ [MACPOLICY_NONE] = "none",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mac_policy, MACPolicy);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_mac_policy, mac_policy, MACPolicy,
+ "Failed to parse MAC address policy");
+
+static const char* const name_policy_table[_NAMEPOLICY_MAX] = {
+ [NAMEPOLICY_KERNEL] = "kernel",
+ [NAMEPOLICY_KEEP] = "keep",
+ [NAMEPOLICY_DATABASE] = "database",
+ [NAMEPOLICY_ONBOARD] = "onboard",
+ [NAMEPOLICY_SLOT] = "slot",
+ [NAMEPOLICY_PATH] = "path",
+ [NAMEPOLICY_MAC] = "mac",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(name_policy, NamePolicy);
+DEFINE_CONFIG_PARSE_ENUMV(config_parse_name_policy, name_policy, NamePolicy,
+ _NAMEPOLICY_INVALID,
+ "Failed to parse interface name policy");
diff --git a/src/udev/net/link-config.h b/src/udev/net/link-config.h
new file mode 100644
index 0000000..1113b10
--- /dev/null
+++ b/src/udev/net/link-config.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-device.h"
+
+#include "condition.h"
+#include "conf-parser.h"
+#include "ethtool-util.h"
+#include "list.h"
+#include "set.h"
+
+typedef struct link_config_ctx link_config_ctx;
+typedef struct link_config link_config;
+
+typedef enum MACPolicy {
+ MACPOLICY_PERSISTENT,
+ MACPOLICY_RANDOM,
+ MACPOLICY_NONE,
+ _MACPOLICY_MAX,
+ _MACPOLICY_INVALID = -1
+} MACPolicy;
+
+typedef enum NamePolicy {
+ NAMEPOLICY_KERNEL,
+ NAMEPOLICY_KEEP,
+ NAMEPOLICY_DATABASE,
+ NAMEPOLICY_ONBOARD,
+ NAMEPOLICY_SLOT,
+ NAMEPOLICY_PATH,
+ NAMEPOLICY_MAC,
+ _NAMEPOLICY_MAX,
+ _NAMEPOLICY_INVALID = -1
+} NamePolicy;
+
+struct link_config {
+ char *filename;
+
+ Set *match_mac;
+ char **match_path;
+ char **match_driver;
+ char **match_type;
+ char **match_name;
+ Condition *match_host;
+ Condition *match_virt;
+ Condition *match_kernel_cmdline;
+ Condition *match_kernel_version;
+ Condition *match_arch;
+
+ char *description;
+ struct ether_addr *mac;
+ MACPolicy mac_policy;
+ NamePolicy *name_policy;
+ char *name;
+ char *alias;
+ uint32_t mtu;
+ size_t speed;
+ Duplex duplex;
+ int autonegotiation;
+ uint32_t advertise[2];
+ WakeOnLan wol;
+ NetDevPort port;
+ int features[_NET_DEV_FEAT_MAX];
+ netdev_channels channels;
+
+ LIST_FIELDS(link_config, links);
+};
+
+int link_config_ctx_new(link_config_ctx **ret);
+void link_config_ctx_free(link_config_ctx *ctx);
+
+int link_config_load(link_config_ctx *ctx);
+bool link_config_should_reload(link_config_ctx *ctx);
+
+int link_config_get(link_config_ctx *ctx, sd_device *device, struct link_config **ret);
+int link_config_apply(link_config_ctx *ctx, struct link_config *config, sd_device *device, const char **name);
+int link_get_driver(link_config_ctx *ctx, sd_device *device, char **ret);
+
+const char *name_policy_to_string(NamePolicy p) _const_;
+NamePolicy name_policy_from_string(const char *p) _pure_;
+
+const char *mac_policy_to_string(MACPolicy p) _const_;
+MACPolicy mac_policy_from_string(const char *p) _pure_;
+
+/* gperf lookup function */
+const struct ConfigPerfItem* link_config_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_mac_policy);
+CONFIG_PARSER_PROTOTYPE(config_parse_name_policy);
diff --git a/src/udev/net/naming-scheme.c b/src/udev/net/naming-scheme.c
new file mode 100644
index 0000000..27cede5
--- /dev/null
+++ b/src/udev/net/naming-scheme.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#include "alloc-util.h"
+#include "naming-scheme.h"
+#include "proc-cmdline.h"
+#include "string-util.h"
+
+static const NamingScheme naming_schemes[] = {
+ { "v238", NAMING_V238 },
+ { "v239", NAMING_V239 },
+ { "v240", NAMING_V240 },
+ /* … add more schemes here, as the logic to name devices is updated … */
+};
+
+static const NamingScheme* naming_scheme_from_name(const char *name) {
+ size_t i;
+
+ if (streq(name, "latest"))
+ return naming_schemes + ELEMENTSOF(naming_schemes) - 1;
+
+ for (i = 0; i < ELEMENTSOF(naming_schemes); i++)
+ if (streq(naming_schemes[i].name, name))
+ return naming_schemes + i;
+
+ return NULL;
+}
+
+const NamingScheme* naming_scheme(void) {
+ static const NamingScheme *cache = NULL;
+ _cleanup_free_ char *buffer = NULL;
+ const char *e, *k;
+
+ if (cache)
+ return cache;
+
+ /* Acquire setting from the kernel command line */
+ (void) proc_cmdline_get_key("net.naming-scheme", 0, &buffer);
+
+ /* Also acquire it from an env var */
+ e = getenv("NET_NAMING_SCHEME");
+ if (e) {
+ if (*e == ':') {
+ /* If prefixed with ':' the kernel cmdline takes precedence */
+ k = buffer ?: e + 1;
+ } else
+ k = e; /* Otherwise the env var takes precedence */
+ } else
+ k = buffer;
+
+ if (k) {
+ cache = naming_scheme_from_name(k);
+ if (cache) {
+ log_info("Using interface naming scheme '%s'.", cache->name);
+ return cache;
+ }
+
+ log_warning("Unknown interface naming scheme '%s' requested, ignoring.", k);
+ }
+
+ cache = naming_scheme_from_name(DEFAULT_NET_NAMING_SCHEME);
+ assert(cache);
+ log_info("Using default interface naming scheme '%s'.", cache->name);
+
+ return cache;
+}
diff --git a/src/udev/net/naming-scheme.h b/src/udev/net/naming-scheme.h
new file mode 100644
index 0000000..0b3d9bf
--- /dev/null
+++ b/src/udev/net/naming-scheme.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+/* So here's the deal: net_id is supposed to be an excercise in providing stable names for network devices. However, we
+ * also want to keep updating the naming scheme used in future versions of net_id. These two goals of course are
+ * contradictory: on one hand we want things to not change and on the other hand we want them to improve. Our way out
+ * of this dilemma is to introduce the "naming scheme" concept: each time we improve the naming logic we define a new
+ * flag for it. Then, we keep a list of schemes, each identified by a name associated with the flags it implements. Via
+ * a kernel command line and environment variable we then allow the user to pick the scheme they want us to follow:
+ * installers could "freeze" the used scheme at the moment of installation this way.
+ *
+ * Developers: each time you tweak the naming logic here, define a new flag below, and condition the tweak with
+ * it. Each time we do a release we'll then add a new scheme entry and include all newly defined flags.
+ *
+ * Note that this is only half a solution to the problem though: not only udev/net_id gets updated all the time, the
+ * kernel gets too. And thus a kernel that previously didn't expose some sysfs attribute we look for might eventually
+ * do, and thus affect our naming scheme too. Thus, enforcing a naming scheme will make interfacing more stable across
+ * OS versions, but not fully stabilize them. */
+typedef enum NamingSchemeFlags {
+ /* First, the individual features */
+ NAMING_SR_IOV_V = 1 << 0, /* Use "v" suffix for SR-IOV, see 609948c7043a40008b8299529c978ed8e11de8f6*/
+ NAMING_NPAR_ARI = 1 << 1, /* Use NPAR "ARI", see 6bc04997b6eab35d1cb9fa73889892702c27be09 */
+ NAMING_INFINIBAND = 1 << 2, /* Use "ib" prefix for infiniband, see 938d30aa98df887797c9e05074a562ddacdcdf5e */
+ NAMING_ZERO_ACPI_INDEX = 1 << 3, /* Allow zero acpi_index field, see d81186ef4f6a888a70f20a1e73a812d6acb9e22f */
+ NAMING_ALLOW_RERENAMES = 1 << 4, /* Allow re-renaming of devices, see #9006 */
+
+ /* And now the masks that combine the features above */
+ NAMING_V238 = 0,
+ NAMING_V239 = NAMING_V238 | NAMING_SR_IOV_V | NAMING_NPAR_ARI,
+ NAMING_V240 = NAMING_V239 | NAMING_INFINIBAND | NAMING_ZERO_ACPI_INDEX | NAMING_ALLOW_RERENAMES,
+
+ _NAMING_SCHEME_FLAGS_INVALID = -1,
+} NamingSchemeFlags;
+
+typedef struct NamingScheme {
+ const char *name;
+ NamingSchemeFlags flags;
+} NamingScheme;
+
+const NamingScheme* naming_scheme(void);
+
+static inline bool naming_scheme_has(NamingSchemeFlags flags) {
+ return FLAGS_SET(naming_scheme()->flags, flags);
+}
diff --git a/src/udev/scsi_id/README b/src/udev/scsi_id/README
new file mode 100644
index 0000000..9cfe739
--- /dev/null
+++ b/src/udev/scsi_id/README
@@ -0,0 +1,4 @@
+scsi_id - generate a SCSI unique identifier for a given SCSI device
+
+Please send questions, comments or patches to <patmans@us.ibm.com> or
+<linux-hotplug-devel@lists.sourceforge.net>.
diff --git a/src/udev/scsi_id/scsi.h b/src/udev/scsi_id/scsi.h
new file mode 100644
index 0000000..208b3e7
--- /dev/null
+++ b/src/udev/scsi_id/scsi.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#pragma once
+
+/*
+ * scsi.h
+ *
+ * General scsi and linux scsi specific defines and structs.
+ *
+ * Copyright (C) IBM Corp. 2003
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 of the License.
+ */
+
+#include <scsi/scsi.h>
+
+struct scsi_ioctl_command {
+ unsigned inlen; /* excluding scsi command length */
+ unsigned outlen;
+ unsigned char data[1];
+ /* on input, scsi command starts here then opt. data */
+};
+
+/*
+ * Default 5 second timeout
+ */
+#define DEF_TIMEOUT 5000
+
+#define SENSE_BUFF_LEN 32
+
+/*
+ * The request buffer size passed to the SCSI INQUIRY commands, use 254,
+ * as this is a nice value for some devices, especially some of the usb
+ * mass storage devices.
+ */
+#define SCSI_INQ_BUFF_LEN 254
+
+/*
+ * SCSI INQUIRY vendor and model (really product) lengths.
+ */
+#define VENDOR_LENGTH 8
+#define MODEL_LENGTH 16
+
+#define INQUIRY_CMD 0x12
+#define INQUIRY_CMDLEN 6
+
+/*
+ * INQUIRY VPD page 0x83 identifier descriptor related values. Reference the
+ * SCSI Primary Commands specification for details.
+ */
+
+/*
+ * id type values of id descriptors. These are assumed to fit in 4 bits.
+ */
+#define SCSI_ID_VENDOR_SPECIFIC 0
+#define SCSI_ID_T10_VENDOR 1
+#define SCSI_ID_EUI_64 2
+#define SCSI_ID_NAA 3
+#define SCSI_ID_RELPORT 4
+#define SCSI_ID_TGTGROUP 5
+#define SCSI_ID_LUNGROUP 6
+#define SCSI_ID_MD5 7
+#define SCSI_ID_NAME 8
+
+/*
+ * Supported NAA values. These fit in 4 bits, so the "don't care" value
+ * cannot conflict with real values.
+ */
+#define SCSI_ID_NAA_DONT_CARE 0xff
+#define SCSI_ID_NAA_IEEE_REG 0x05
+#define SCSI_ID_NAA_IEEE_REG_EXTENDED 0x06
+
+/*
+ * Supported Code Set values.
+ */
+#define SCSI_ID_BINARY 1
+#define SCSI_ID_ASCII 2
+
+struct scsi_id_search_values {
+ u_char id_type;
+ u_char naa_type;
+ u_char code_set;
+};
+
+/*
+ * Following are the "true" SCSI status codes. Linux has traditionally
+ * used a 1 bit right and masked version of these. So now CHECK_CONDITION
+ * and friends (in <scsi/scsi.h>) are deprecated.
+ */
+#define SCSI_CHECK_CONDITION 0x02
+#define SCSI_CONDITION_MET 0x04
+#define SCSI_BUSY 0x08
+#define SCSI_IMMEDIATE 0x10
+#define SCSI_IMMEDIATE_CONDITION_MET 0x14
+#define SCSI_RESERVATION_CONFLICT 0x18
+#define SCSI_COMMAND_TERMINATED 0x22
+#define SCSI_TASK_SET_FULL 0x28
+#define SCSI_ACA_ACTIVE 0x30
+#define SCSI_TASK_ABORTED 0x40
diff --git a/src/udev/scsi_id/scsi_id.c b/src/udev/scsi_id/scsi_id.c
new file mode 100644
index 0000000..2698cdd
--- /dev/null
+++ b/src/udev/scsi_id/scsi_id.c
@@ -0,0 +1,597 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright © IBM Corp. 2003
+ * Copyright © SUSE Linux Products GmbH, 2006
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "build.h"
+#include "fd-util.h"
+#include "libudev-util.h"
+#include "scsi_id.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-util.h"
+
+static const struct option options[] = {
+ { "device", required_argument, NULL, 'd' },
+ { "config", required_argument, NULL, 'f' },
+ { "page", required_argument, NULL, 'p' },
+ { "blacklisted", no_argument, NULL, 'b' },
+ { "whitelisted", no_argument, NULL, 'g' },
+ { "replace-whitespace", no_argument, NULL, 'u' },
+ { "sg-version", required_argument, NULL, 's' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "version", no_argument, NULL, 'V' }, /* don't advertise -V */
+ { "export", no_argument, NULL, 'x' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+};
+
+static bool all_good = false;
+static bool dev_specified = false;
+static char config_file[MAX_PATH_LEN] = "/etc/scsi_id.config";
+static enum page_code default_page_code = PAGE_UNSPECIFIED;
+static int sg_version = 4;
+static bool reformat_serial = false;
+static bool export = false;
+static char vendor_str[64];
+static char model_str[64];
+static char vendor_enc_str[256];
+static char model_enc_str[256];
+static char revision_str[16];
+static char type_str[16];
+
+static void set_type(const char *from, char *to, size_t len) {
+ int type_num;
+ char *eptr;
+ const char *type = "generic";
+
+ type_num = strtoul(from, &eptr, 0);
+ if (eptr != from) {
+ switch (type_num) {
+ case 0:
+ type = "disk";
+ break;
+ case 1:
+ type = "tape";
+ break;
+ case 4:
+ type = "optical";
+ break;
+ case 5:
+ type = "cd";
+ break;
+ case 7:
+ type = "optical";
+ break;
+ case 0xe:
+ type = "disk";
+ break;
+ case 0xf:
+ type = "optical";
+ break;
+ default:
+ break;
+ }
+ }
+ strscpy(to, len, type);
+}
+
+/*
+ * get_value:
+ *
+ * buf points to an '=' followed by a quoted string ("foo") or a string ending
+ * with a space or ','.
+ *
+ * Return a pointer to the NUL terminated string, returns NULL if no
+ * matches.
+ */
+static char *get_value(char **buffer) {
+ static const char *quote_string = "\"\n";
+ static const char *comma_string = ",\n";
+ char *val;
+ const char *end;
+
+ if (**buffer == '"') {
+ /*
+ * skip leading quote, terminate when quote seen
+ */
+ (*buffer)++;
+ end = quote_string;
+ } else {
+ end = comma_string;
+ }
+ val = strsep(buffer, end);
+ if (val && end == quote_string)
+ /*
+ * skip trailing quote
+ */
+ (*buffer)++;
+
+ while (isspace(**buffer))
+ (*buffer)++;
+
+ return val;
+}
+
+static int argc_count(char *opts) {
+ int i = 0;
+ while (*opts != '\0')
+ if (*opts++ == ' ')
+ i++;
+ return i;
+}
+
+/*
+ * get_file_options:
+ *
+ * If vendor == NULL, find a line in the config file with only "OPTIONS=";
+ * if vendor and model are set find the first OPTIONS line in the config
+ * file that matches. Set argc and argv to match the OPTIONS string.
+ *
+ * vendor and model can end in '\n'.
+ */
+static int get_file_options(const char *vendor, const char *model,
+ int *argc, char ***newargv) {
+ _cleanup_free_ char *buffer = NULL;
+ _cleanup_fclose_ FILE *f;
+ char *buf;
+ char *str1;
+ char *vendor_in, *model_in, *options_in; /* read in from file */
+ int lineno;
+ int c;
+ int retval = 0;
+
+ f = fopen(config_file, "re");
+ if (f == NULL) {
+ if (errno == ENOENT)
+ return 1;
+ else {
+ log_error_errno(errno, "can't open %s: %m", config_file);
+ return -1;
+ }
+ }
+
+ /*
+ * Allocate a buffer rather than put it on the stack so we can
+ * keep it around to parse any options (any allocated newargv
+ * points into this buffer for its strings).
+ */
+ buffer = malloc(MAX_BUFFER_LEN);
+ if (!buffer)
+ return log_oom();
+
+ *newargv = NULL;
+ lineno = 0;
+ for (;;) {
+ vendor_in = model_in = options_in = NULL;
+
+ buf = fgets(buffer, MAX_BUFFER_LEN, f);
+ if (buf == NULL)
+ break;
+ lineno++;
+ if (buf[strlen(buffer) - 1] != '\n') {
+ log_error("Config file line %d too long", lineno);
+ break;
+ }
+
+ while (isspace(*buf))
+ buf++;
+
+ /* blank or all whitespace line */
+ if (*buf == '\0')
+ continue;
+
+ /* comment line */
+ if (*buf == '#')
+ continue;
+
+ str1 = strsep(&buf, "=");
+ if (str1 && strcaseeq(str1, "VENDOR")) {
+ str1 = get_value(&buf);
+ if (!str1) {
+ retval = log_oom();
+ break;
+ }
+ vendor_in = str1;
+
+ str1 = strsep(&buf, "=");
+ if (str1 && strcaseeq(str1, "MODEL")) {
+ str1 = get_value(&buf);
+ if (!str1) {
+ retval = log_oom();
+ break;
+ }
+ model_in = str1;
+ str1 = strsep(&buf, "=");
+ }
+ }
+
+ if (str1 && strcaseeq(str1, "OPTIONS")) {
+ str1 = get_value(&buf);
+ if (!str1) {
+ retval = log_oom();
+ break;
+ }
+ options_in = str1;
+ }
+
+ /*
+ * Only allow: [vendor=foo[,model=bar]]options=stuff
+ */
+ if (!options_in || (!vendor_in && model_in)) {
+ log_error("Error parsing config file line %d '%s'", lineno, buffer);
+ retval = -1;
+ break;
+ }
+ if (vendor == NULL) {
+ if (vendor_in == NULL)
+ break;
+ } else if (vendor_in &&
+ startswith(vendor, vendor_in) &&
+ (!model_in || startswith(model, model_in))) {
+ /*
+ * Matched vendor and optionally model.
+ *
+ * Note: a short vendor_in or model_in can
+ * give a partial match (that is FOO
+ * matches FOOBAR).
+ */
+ break;
+ }
+ }
+
+ if (retval == 0) {
+ if (vendor_in != NULL || model_in != NULL ||
+ options_in != NULL) {
+ /*
+ * Something matched. Allocate newargv, and store
+ * values found in options_in.
+ */
+ strcpy(buffer, options_in);
+ c = argc_count(buffer) + 2;
+ *newargv = calloc(c, sizeof(**newargv));
+ if (!*newargv)
+ retval = log_oom();
+ else {
+ *argc = c;
+ c = 0;
+ /*
+ * argv[0] at 0 is skipped by getopt, but
+ * store the buffer address there for
+ * later freeing
+ */
+ (*newargv)[c] = buffer;
+ for (c = 1; c < *argc; c++)
+ (*newargv)[c] = strsep(&buffer, " \t");
+ buffer = NULL;
+ }
+ } else {
+ /* No matches */
+ retval = 1;
+ }
+ }
+ return retval;
+}
+
+static void help(void) {
+ printf("Usage: %s [OPTION...] DEVICE\n\n"
+ "SCSI device identification.\n\n"
+ " -h --help Print this message\n"
+ " --version Print version of the program\n\n"
+ " -d --device= Device node for SG_IO commands\n"
+ " -f --config= Location of config file\n"
+ " -p --page=0x80|0x83|pre-spc3-83 SCSI page (0x80, 0x83, pre-spc3-83)\n"
+ " -s --sg-version=3|4 Use SGv3 or SGv4\n"
+ " -b --blacklisted Treat device as blacklisted\n"
+ " -g --whitelisted Treat device as whitelisted\n"
+ " -u --replace-whitespace Replace all whitespace by underscores\n"
+ " -v --verbose Verbose logging\n"
+ " -x --export Print values as environment keys\n"
+ , program_invocation_short_name);
+
+}
+
+static int set_options(int argc, char **argv,
+ char *maj_min_dev) {
+ int option;
+
+ /*
+ * optind is a global extern used by getopt. Since we can call
+ * set_options twice (once for command line, and once for config
+ * file) we have to reset this back to 1.
+ */
+ optind = 1;
+ while ((option = getopt_long(argc, argv, "d:f:gp:uvVxhbs:", options, NULL)) >= 0)
+ switch (option) {
+ case 'b':
+ all_good = false;
+ break;
+
+ case 'd':
+ dev_specified = true;
+ strscpy(maj_min_dev, MAX_PATH_LEN, optarg);
+ break;
+
+ case 'f':
+ strscpy(config_file, MAX_PATH_LEN, optarg);
+ break;
+
+ case 'g':
+ all_good = true;
+ break;
+
+ case 'h':
+ help();
+ exit(EXIT_SUCCESS);
+
+ case 'p':
+ if (streq(optarg, "0x80"))
+ default_page_code = PAGE_80;
+ else if (streq(optarg, "0x83"))
+ default_page_code = PAGE_83;
+ else if (streq(optarg, "pre-spc3-83"))
+ default_page_code = PAGE_83_PRE_SPC3;
+ else {
+ log_error("Unknown page code '%s'", optarg);
+ return -1;
+ }
+ break;
+
+ case 's':
+ sg_version = atoi(optarg);
+ if (sg_version < 3 || sg_version > 4) {
+ log_error("Unknown SG version '%s'", optarg);
+ return -1;
+ }
+ break;
+
+ case 'u':
+ reformat_serial = true;
+ break;
+
+ case 'v':
+ log_set_target(LOG_TARGET_CONSOLE);
+ log_set_max_level(LOG_DEBUG);
+ log_open();
+ break;
+
+ case 'V':
+ printf("%s\n", GIT_VERSION);
+ exit(EXIT_SUCCESS);
+
+ case 'x':
+ export = true;
+ break;
+
+ case '?':
+ return -1;
+
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ if (optind < argc && !dev_specified) {
+ dev_specified = true;
+ strscpy(maj_min_dev, MAX_PATH_LEN, argv[optind]);
+ }
+
+ return 0;
+}
+
+static int per_dev_options(struct scsi_id_device *dev_scsi, int *good_bad, int *page_code) {
+ int retval;
+ int newargc;
+ char **newargv = NULL;
+ int option;
+
+ *good_bad = all_good;
+ *page_code = default_page_code;
+
+ retval = get_file_options(vendor_str, model_str, &newargc, &newargv);
+
+ optind = 1; /* reset this global extern */
+ while (retval == 0) {
+ option = getopt_long(newargc, newargv, "bgp:", options, NULL);
+ if (option == -1)
+ break;
+
+ switch (option) {
+ case 'b':
+ *good_bad = 0;
+ break;
+
+ case 'g':
+ *good_bad = 1;
+ break;
+
+ case 'p':
+ if (streq(optarg, "0x80")) {
+ *page_code = PAGE_80;
+ } else if (streq(optarg, "0x83")) {
+ *page_code = PAGE_83;
+ } else if (streq(optarg, "pre-spc3-83")) {
+ *page_code = PAGE_83_PRE_SPC3;
+ } else {
+ log_error("Unknown page code '%s'", optarg);
+ retval = -1;
+ }
+ break;
+
+ default:
+ log_error("Unknown or bad option '%c' (0x%x)", option, option);
+ retval = -1;
+ break;
+ }
+ }
+
+ if (newargv) {
+ free(newargv[0]);
+ free(newargv);
+ }
+ return retval;
+}
+
+static int set_inq_values(struct scsi_id_device *dev_scsi, const char *path) {
+ int retval;
+
+ dev_scsi->use_sg = sg_version;
+
+ retval = scsi_std_inquiry(dev_scsi, path);
+ if (retval)
+ return retval;
+
+ udev_util_encode_string(dev_scsi->vendor, vendor_enc_str, sizeof(vendor_enc_str));
+ udev_util_encode_string(dev_scsi->model, model_enc_str, sizeof(model_enc_str));
+
+ util_replace_whitespace(dev_scsi->vendor, vendor_str, sizeof(vendor_str)-1);
+ util_replace_chars(vendor_str, NULL);
+ util_replace_whitespace(dev_scsi->model, model_str, sizeof(model_str)-1);
+ util_replace_chars(model_str, NULL);
+ set_type(dev_scsi->type, type_str, sizeof(type_str));
+ util_replace_whitespace(dev_scsi->revision, revision_str, sizeof(revision_str)-1);
+ util_replace_chars(revision_str, NULL);
+ return 0;
+}
+
+/*
+ * scsi_id: try to get an id, if one is found, printf it to stdout.
+ * returns a value passed to exit() - 0 if printed an id, else 1.
+ */
+static int scsi_id(char *maj_min_dev) {
+ struct scsi_id_device dev_scsi = {};
+ int good_dev;
+ int page_code;
+ int retval = 0;
+
+ if (set_inq_values(&dev_scsi, maj_min_dev) < 0) {
+ retval = 1;
+ goto out;
+ }
+
+ /* get per device (vendor + model) options from the config file */
+ per_dev_options(&dev_scsi, &good_dev, &page_code);
+ if (!good_dev) {
+ retval = 1;
+ goto out;
+ }
+
+ /* read serial number from mode pages (no values for optical drives) */
+ scsi_get_serial(&dev_scsi, maj_min_dev, page_code, MAX_SERIAL_LEN);
+
+ if (export) {
+ char serial_str[MAX_SERIAL_LEN];
+
+ printf("ID_SCSI=1\n");
+ printf("ID_VENDOR=%s\n", vendor_str);
+ printf("ID_VENDOR_ENC=%s\n", vendor_enc_str);
+ printf("ID_MODEL=%s\n", model_str);
+ printf("ID_MODEL_ENC=%s\n", model_enc_str);
+ printf("ID_REVISION=%s\n", revision_str);
+ printf("ID_TYPE=%s\n", type_str);
+ if (dev_scsi.serial[0] != '\0') {
+ util_replace_whitespace(dev_scsi.serial, serial_str, sizeof(serial_str)-1);
+ util_replace_chars(serial_str, NULL);
+ printf("ID_SERIAL=%s\n", serial_str);
+ util_replace_whitespace(dev_scsi.serial_short, serial_str, sizeof(serial_str)-1);
+ util_replace_chars(serial_str, NULL);
+ printf("ID_SERIAL_SHORT=%s\n", serial_str);
+ }
+ if (dev_scsi.wwn[0] != '\0') {
+ printf("ID_WWN=0x%s\n", dev_scsi.wwn);
+ if (dev_scsi.wwn_vendor_extension[0] != '\0') {
+ printf("ID_WWN_VENDOR_EXTENSION=0x%s\n", dev_scsi.wwn_vendor_extension);
+ printf("ID_WWN_WITH_EXTENSION=0x%s%s\n", dev_scsi.wwn, dev_scsi.wwn_vendor_extension);
+ } else
+ printf("ID_WWN_WITH_EXTENSION=0x%s\n", dev_scsi.wwn);
+ }
+ if (dev_scsi.tgpt_group[0] != '\0')
+ printf("ID_TARGET_PORT=%s\n", dev_scsi.tgpt_group);
+ if (dev_scsi.unit_serial_number[0] != '\0')
+ printf("ID_SCSI_SERIAL=%s\n", dev_scsi.unit_serial_number);
+ goto out;
+ }
+
+ if (dev_scsi.serial[0] == '\0') {
+ retval = 1;
+ goto out;
+ }
+
+ if (reformat_serial) {
+ char serial_str[MAX_SERIAL_LEN];
+
+ util_replace_whitespace(dev_scsi.serial, serial_str, sizeof(serial_str)-1);
+ util_replace_chars(serial_str, NULL);
+ printf("%s\n", serial_str);
+ goto out;
+ }
+
+ printf("%s\n", dev_scsi.serial);
+out:
+ return retval;
+}
+
+int main(int argc, char **argv) {
+ int retval = 0;
+ char maj_min_dev[MAX_PATH_LEN];
+ int newargc;
+ char **newargv = NULL;
+
+ log_set_target(LOG_TARGET_AUTO);
+ udev_parse_config();
+ log_parse_environment();
+ log_open();
+
+ /*
+ * Get config file options.
+ */
+ retval = get_file_options(NULL, NULL, &newargc, &newargv);
+ if (retval < 0) {
+ retval = 1;
+ goto exit;
+ }
+ if (retval == 0) {
+ assert(newargv);
+
+ if (set_options(newargc, newargv, maj_min_dev) < 0) {
+ retval = 2;
+ goto exit;
+ }
+ }
+
+ /*
+ * Get command line options (overriding any config file settings).
+ */
+ if (set_options(argc, argv, maj_min_dev) < 0)
+ exit(EXIT_FAILURE);
+
+ if (!dev_specified) {
+ log_error("No device specified.");
+ retval = 1;
+ goto exit;
+ }
+
+ retval = scsi_id(maj_min_dev);
+
+exit:
+ if (newargv) {
+ free(newargv[0]);
+ free(newargv);
+ }
+ log_close();
+ return retval;
+}
diff --git a/src/udev/scsi_id/scsi_id.h b/src/udev/scsi_id/scsi_id.h
new file mode 100644
index 0000000..70e804f
--- /dev/null
+++ b/src/udev/scsi_id/scsi_id.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#pragma once
+
+/*
+ * Copyright © IBM Corp. 2003
+ */
+
+#define MAX_PATH_LEN 512
+
+/*
+ * MAX_ATTR_LEN: maximum length of the result of reading a sysfs
+ * attribute.
+ */
+#define MAX_ATTR_LEN 256
+
+/*
+ * MAX_SERIAL_LEN: the maximum length of the serial number, including
+ * added prefixes such as vendor and product (model) strings.
+ */
+#define MAX_SERIAL_LEN 256
+
+/*
+ * MAX_BUFFER_LEN: maximum buffer size and line length used while reading
+ * the config file.
+ */
+#define MAX_BUFFER_LEN 256
+
+struct scsi_id_device {
+ char vendor[9];
+ char model[17];
+ char revision[5];
+ char type[33];
+ char kernel[64];
+ char serial[MAX_SERIAL_LEN];
+ char serial_short[MAX_SERIAL_LEN];
+ int use_sg;
+
+ /* Always from page 0x80 e.g. 'B3G1P8500RWT' - may not be unique */
+ char unit_serial_number[MAX_SERIAL_LEN];
+
+ /* NULs if not set - otherwise hex encoding using lower-case e.g. '50014ee0016eb572' */
+ char wwn[17];
+
+ /* NULs if not set - otherwise hex encoding using lower-case e.g. '0xe00000d80000' */
+ char wwn_vendor_extension[17];
+
+ /* NULs if not set - otherwise decimal number */
+ char tgpt_group[8];
+};
+
+int scsi_std_inquiry(struct scsi_id_device *dev_scsi, const char *devname);
+int scsi_get_serial(struct scsi_id_device *dev_scsi, const char *devname,
+ int page_code, int len);
+
+/*
+ * Page code values.
+ */
+enum page_code {
+ PAGE_83_PRE_SPC3 = -0x83,
+ PAGE_UNSPECIFIED = 0x00,
+ PAGE_80 = 0x80,
+ PAGE_83 = 0x83,
+};
diff --git a/src/udev/scsi_id/scsi_serial.c b/src/udev/scsi_id/scsi_serial.c
new file mode 100644
index 0000000..c67d047
--- /dev/null
+++ b/src/udev/scsi_id/scsi_serial.c
@@ -0,0 +1,897 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright © IBM Corp. 2003
+ *
+ * Author: Patrick Mansfield<patmans@us.ibm.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/bsg.h>
+#include <linux/types.h>
+#include <scsi/scsi.h>
+#include <scsi/sg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "random-util.h"
+#include "scsi.h"
+#include "scsi_id.h"
+#include "string-util.h"
+#include "util.h"
+
+/*
+ * A priority based list of id, naa, and binary/ascii for the identifier
+ * descriptor in VPD page 0x83.
+ *
+ * Brute force search for a match starting with the first value in the
+ * following id_search_list. This is not a performance issue, since there
+ * is normally one or some small number of descriptors.
+ */
+static const struct scsi_id_search_values id_search_list[] = {
+ { SCSI_ID_TGTGROUP, SCSI_ID_NAA_DONT_CARE, SCSI_ID_BINARY },
+ { SCSI_ID_NAA, SCSI_ID_NAA_IEEE_REG_EXTENDED, SCSI_ID_BINARY },
+ { SCSI_ID_NAA, SCSI_ID_NAA_IEEE_REG_EXTENDED, SCSI_ID_ASCII },
+ { SCSI_ID_NAA, SCSI_ID_NAA_IEEE_REG, SCSI_ID_BINARY },
+ { SCSI_ID_NAA, SCSI_ID_NAA_IEEE_REG, SCSI_ID_ASCII },
+ /*
+ * Devices already exist using NAA values that are now marked
+ * reserved. These should not conflict with other values, or it is
+ * a bug in the device. As long as we find the IEEE extended one
+ * first, we really don't care what other ones are used. Using
+ * don't care here means that a device that returns multiple
+ * non-IEEE descriptors in a random order will get different
+ * names.
+ */
+ { SCSI_ID_NAA, SCSI_ID_NAA_DONT_CARE, SCSI_ID_BINARY },
+ { SCSI_ID_NAA, SCSI_ID_NAA_DONT_CARE, SCSI_ID_ASCII },
+ { SCSI_ID_EUI_64, SCSI_ID_NAA_DONT_CARE, SCSI_ID_BINARY },
+ { SCSI_ID_EUI_64, SCSI_ID_NAA_DONT_CARE, SCSI_ID_ASCII },
+ { SCSI_ID_T10_VENDOR, SCSI_ID_NAA_DONT_CARE, SCSI_ID_BINARY },
+ { SCSI_ID_T10_VENDOR, SCSI_ID_NAA_DONT_CARE, SCSI_ID_ASCII },
+ { SCSI_ID_VENDOR_SPECIFIC, SCSI_ID_NAA_DONT_CARE, SCSI_ID_BINARY },
+ { SCSI_ID_VENDOR_SPECIFIC, SCSI_ID_NAA_DONT_CARE, SCSI_ID_ASCII },
+};
+
+static const char hex_str[]="0123456789abcdef";
+
+/*
+ * Values returned in the result/status, only the ones used by the code
+ * are used here.
+ */
+
+#define DID_NO_CONNECT 0x01 /* Unable to connect before timeout */
+#define DID_BUS_BUSY 0x02 /* Bus remain busy until timeout */
+#define DID_TIME_OUT 0x03 /* Timed out for some other reason */
+#define DRIVER_TIMEOUT 0x06
+#define DRIVER_SENSE 0x08 /* Sense_buffer has been set */
+
+/* The following "category" function returns one of the following */
+#define SG_ERR_CAT_CLEAN 0 /* No errors or other information */
+#define SG_ERR_CAT_MEDIA_CHANGED 1 /* interpreted from sense buffer */
+#define SG_ERR_CAT_RESET 2 /* interpreted from sense buffer */
+#define SG_ERR_CAT_TIMEOUT 3
+#define SG_ERR_CAT_RECOVERED 4 /* Successful command after recovered err */
+#define SG_ERR_CAT_NOTSUPPORTED 5 /* Illegal / unsupported command */
+#define SG_ERR_CAT_SENSE 98 /* Something else in the sense buffer */
+#define SG_ERR_CAT_OTHER 99 /* Some other error/warning */
+
+static int do_scsi_page80_inquiry(struct scsi_id_device *dev_scsi, int fd,
+ char *serial, char *serial_short, int max_len);
+
+static int sg_err_category_new(int scsi_status, int msg_status, int
+ host_status, int driver_status, const
+ unsigned char *sense_buffer, int sb_len) {
+ scsi_status &= 0x7e;
+
+ /*
+ * XXX change to return only two values - failed or OK.
+ */
+
+ if (!scsi_status && !host_status && !driver_status)
+ return SG_ERR_CAT_CLEAN;
+
+ if (IN_SET(scsi_status, SCSI_CHECK_CONDITION, SCSI_COMMAND_TERMINATED) ||
+ (driver_status & 0xf) == DRIVER_SENSE) {
+ if (sense_buffer && (sb_len > 2)) {
+ int sense_key;
+ unsigned char asc;
+
+ if (sense_buffer[0] & 0x2) {
+ sense_key = sense_buffer[1] & 0xf;
+ asc = sense_buffer[2];
+ } else {
+ sense_key = sense_buffer[2] & 0xf;
+ asc = (sb_len > 12) ? sense_buffer[12] : 0;
+ }
+
+ if (sense_key == RECOVERED_ERROR)
+ return SG_ERR_CAT_RECOVERED;
+ else if (sense_key == UNIT_ATTENTION) {
+ if (0x28 == asc)
+ return SG_ERR_CAT_MEDIA_CHANGED;
+ if (0x29 == asc)
+ return SG_ERR_CAT_RESET;
+ } else if (sense_key == ILLEGAL_REQUEST)
+ return SG_ERR_CAT_NOTSUPPORTED;
+ }
+ return SG_ERR_CAT_SENSE;
+ }
+ if (host_status) {
+ if (IN_SET(host_status, DID_NO_CONNECT, DID_BUS_BUSY, DID_TIME_OUT))
+ return SG_ERR_CAT_TIMEOUT;
+ }
+ if (driver_status) {
+ if (driver_status == DRIVER_TIMEOUT)
+ return SG_ERR_CAT_TIMEOUT;
+ }
+ return SG_ERR_CAT_OTHER;
+}
+
+static int sg_err_category3(struct sg_io_hdr *hp) {
+ return sg_err_category_new(hp->status, hp->msg_status,
+ hp->host_status, hp->driver_status,
+ hp->sbp, hp->sb_len_wr);
+}
+
+static int sg_err_category4(struct sg_io_v4 *hp) {
+ return sg_err_category_new(hp->device_status, 0,
+ hp->transport_status, hp->driver_status,
+ (unsigned char *)(uintptr_t)hp->response,
+ hp->response_len);
+}
+
+static int scsi_dump_sense(struct scsi_id_device *dev_scsi,
+ unsigned char *sense_buffer, int sb_len) {
+ int s;
+ int code;
+ int sense_class;
+ int sense_key;
+ int asc, ascq;
+
+ /*
+ * Figure out and print the sense key, asc and ascq.
+ *
+ * If you want to suppress these for a particular drive model, add
+ * a black list entry in the scsi_id config file.
+ *
+ * XXX We probably need to: lookup the sense/asc/ascq in a retry
+ * table, and if found return 1 (after dumping the sense, asc, and
+ * ascq). So, if/when we get something like a power on/reset,
+ * we'll retry the command.
+ */
+
+ if (sb_len < 1) {
+ log_debug("%s: sense buffer empty", dev_scsi->kernel);
+ return -1;
+ }
+
+ sense_class = (sense_buffer[0] >> 4) & 0x07;
+ code = sense_buffer[0] & 0xf;
+
+ if (sense_class == 7) {
+ /*
+ * extended sense data.
+ */
+ s = sense_buffer[7] + 8;
+ if (sb_len < s) {
+ log_debug("%s: sense buffer too small %d bytes, %d bytes too short",
+ dev_scsi->kernel, sb_len, s - sb_len);
+ return -1;
+ }
+ if (IN_SET(code, 0x0, 0x1)) {
+ sense_key = sense_buffer[2] & 0xf;
+ if (s < 14) {
+ /*
+ * Possible?
+ */
+ log_debug("%s: sense result too" " small %d bytes",
+ dev_scsi->kernel, s);
+ return -1;
+ }
+ asc = sense_buffer[12];
+ ascq = sense_buffer[13];
+ } else if (IN_SET(code, 0x2, 0x3)) {
+ sense_key = sense_buffer[1] & 0xf;
+ asc = sense_buffer[2];
+ ascq = sense_buffer[3];
+ } else {
+ log_debug("%s: invalid sense code 0x%x",
+ dev_scsi->kernel, code);
+ return -1;
+ }
+ log_debug("%s: sense key 0x%x ASC 0x%x ASCQ 0x%x",
+ dev_scsi->kernel, sense_key, asc, ascq);
+ } else {
+ if (sb_len < 4) {
+ log_debug("%s: sense buffer too small %d bytes, %d bytes too short",
+ dev_scsi->kernel, sb_len, 4 - sb_len);
+ return -1;
+ }
+
+ if (sense_buffer[0] < 15)
+ log_debug("%s: old sense key: 0x%x", dev_scsi->kernel, sense_buffer[0] & 0x0f);
+ else
+ log_debug("%s: sense = %2x %2x",
+ dev_scsi->kernel, sense_buffer[0], sense_buffer[2]);
+ log_debug("%s: non-extended sense class %d code 0x%0x",
+ dev_scsi->kernel, sense_class, code);
+
+ }
+
+ return -1;
+}
+
+static int scsi_dump(struct scsi_id_device *dev_scsi, struct sg_io_hdr *io) {
+ if (!io->status && !io->host_status && !io->msg_status &&
+ !io->driver_status) {
+ /*
+ * Impossible, should not be called.
+ */
+ log_debug("%s: called with no error", __FUNCTION__);
+ return -1;
+ }
+
+ log_debug("%s: sg_io failed status 0x%x 0x%x 0x%x 0x%x",
+ dev_scsi->kernel, io->driver_status, io->host_status, io->msg_status, io->status);
+ if (io->status == SCSI_CHECK_CONDITION)
+ return scsi_dump_sense(dev_scsi, io->sbp, io->sb_len_wr);
+ else
+ return -1;
+}
+
+static int scsi_dump_v4(struct scsi_id_device *dev_scsi, struct sg_io_v4 *io) {
+ if (!io->device_status && !io->transport_status &&
+ !io->driver_status) {
+ /*
+ * Impossible, should not be called.
+ */
+ log_debug("%s: called with no error", __FUNCTION__);
+ return -1;
+ }
+
+ log_debug("%s: sg_io failed status 0x%x 0x%x 0x%x",
+ dev_scsi->kernel, io->driver_status, io->transport_status, io->device_status);
+ if (io->device_status == SCSI_CHECK_CONDITION)
+ return scsi_dump_sense(dev_scsi, (unsigned char *)(uintptr_t)io->response,
+ io->response_len);
+ else
+ return -1;
+}
+
+static int scsi_inquiry(struct scsi_id_device *dev_scsi, int fd,
+ unsigned char evpd, unsigned char page,
+ unsigned char *buf, unsigned buflen) {
+ unsigned char inq_cmd[INQUIRY_CMDLEN] =
+ { INQUIRY_CMD, evpd, page, 0, buflen, 0 };
+ unsigned char sense[SENSE_BUFF_LEN];
+ void *io_buf;
+ struct sg_io_v4 io_v4;
+ struct sg_io_hdr io_hdr;
+ int retry = 3; /* rather random */
+ int retval;
+
+ if (buflen > SCSI_INQ_BUFF_LEN) {
+ log_debug("buflen %d too long", buflen);
+ return -1;
+ }
+
+resend:
+ if (dev_scsi->use_sg == 4) {
+ memzero(&io_v4, sizeof(struct sg_io_v4));
+ io_v4.guard = 'Q';
+ io_v4.protocol = BSG_PROTOCOL_SCSI;
+ io_v4.subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD;
+ io_v4.request_len = sizeof(inq_cmd);
+ io_v4.request = (uintptr_t)inq_cmd;
+ io_v4.max_response_len = sizeof(sense);
+ io_v4.response = (uintptr_t)sense;
+ io_v4.din_xfer_len = buflen;
+ io_v4.din_xferp = (uintptr_t)buf;
+ io_buf = (void *)&io_v4;
+ } else {
+ memzero(&io_hdr, sizeof(struct sg_io_hdr));
+ io_hdr.interface_id = 'S';
+ io_hdr.cmd_len = sizeof(inq_cmd);
+ io_hdr.mx_sb_len = sizeof(sense);
+ io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+ io_hdr.dxfer_len = buflen;
+ io_hdr.dxferp = buf;
+ io_hdr.cmdp = inq_cmd;
+ io_hdr.sbp = sense;
+ io_hdr.timeout = DEF_TIMEOUT;
+ io_buf = (void *)&io_hdr;
+ }
+
+ retval = ioctl(fd, SG_IO, io_buf);
+ if (retval < 0) {
+ if (IN_SET(errno, EINVAL, ENOSYS) && dev_scsi->use_sg == 4) {
+ dev_scsi->use_sg = 3;
+ goto resend;
+ }
+ log_debug_errno(errno, "%s: ioctl failed: %m", dev_scsi->kernel);
+ goto error;
+ }
+
+ if (dev_scsi->use_sg == 4)
+ retval = sg_err_category4(io_buf);
+ else
+ retval = sg_err_category3(io_buf);
+
+ switch (retval) {
+ case SG_ERR_CAT_NOTSUPPORTED:
+ buf[1] = 0;
+ _fallthrough_;
+ case SG_ERR_CAT_CLEAN:
+ case SG_ERR_CAT_RECOVERED:
+ retval = 0;
+ break;
+
+ default:
+ if (dev_scsi->use_sg == 4)
+ retval = scsi_dump_v4(dev_scsi, io_buf);
+ else
+ retval = scsi_dump(dev_scsi, io_buf);
+ }
+
+ if (!retval) {
+ retval = buflen;
+ } else if (retval > 0) {
+ if (--retry > 0)
+ goto resend;
+ retval = -1;
+ }
+
+error:
+ if (retval < 0)
+ log_debug("%s: Unable to get INQUIRY vpd %d page 0x%x.",
+ dev_scsi->kernel, evpd, page);
+
+ return retval;
+}
+
+/* Get list of supported EVPD pages */
+static int do_scsi_page0_inquiry(struct scsi_id_device *dev_scsi, int fd,
+ unsigned char *buffer, unsigned len) {
+ int retval;
+
+ memzero(buffer, len);
+ retval = scsi_inquiry(dev_scsi, fd, 1, 0x0, buffer, len);
+ if (retval < 0)
+ return 1;
+
+ if (buffer[1] != 0) {
+ log_debug("%s: page 0 not available.", dev_scsi->kernel);
+ return 1;
+ }
+ if (buffer[3] > len) {
+ log_debug("%s: page 0 buffer too long %d", dev_scsi->kernel, buffer[3]);
+ return 1;
+ }
+
+ /*
+ * Following check is based on code once included in the 2.5.x
+ * kernel.
+ *
+ * Some ill behaved devices return the standard inquiry here
+ * rather than the evpd data, snoop the data to verify.
+ */
+ if (buffer[3] > MODEL_LENGTH) {
+ /*
+ * If the vendor id appears in the page assume the page is
+ * invalid.
+ */
+ if (strneq((char *)&buffer[VENDOR_LENGTH], dev_scsi->vendor, VENDOR_LENGTH)) {
+ log_debug("%s: invalid page0 data", dev_scsi->kernel);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * The caller checks that serial is long enough to include the vendor +
+ * model.
+ */
+static int prepend_vendor_model(struct scsi_id_device *dev_scsi, char *serial) {
+ int ind;
+
+ strncpy(serial, dev_scsi->vendor, VENDOR_LENGTH);
+ strncat(serial, dev_scsi->model, MODEL_LENGTH);
+ ind = strlen(serial);
+
+ /*
+ * This is not a complete check, since we are using strncat/cpy
+ * above, ind will never be too large.
+ */
+ if (ind != (VENDOR_LENGTH + MODEL_LENGTH)) {
+ log_debug("%s: expected length %d, got length %d",
+ dev_scsi->kernel, (VENDOR_LENGTH + MODEL_LENGTH), ind);
+ return -1;
+ }
+ return ind;
+}
+
+/*
+ * check_fill_0x83_id - check the page 0x83 id, if OK allocate and fill
+ * serial number.
+ */
+static int check_fill_0x83_id(struct scsi_id_device *dev_scsi,
+ unsigned char *page_83,
+ const struct scsi_id_search_values
+ *id_search, char *serial, char *serial_short,
+ int max_len, char *wwn,
+ char *wwn_vendor_extension, char *tgpt_group) {
+ int i, j, s, len;
+
+ /*
+ * ASSOCIATION must be with the device (value 0)
+ * or with the target port for SCSI_ID_TGTPORT
+ */
+ if ((page_83[1] & 0x30) == 0x10) {
+ if (id_search->id_type != SCSI_ID_TGTGROUP)
+ return 1;
+ } else if ((page_83[1] & 0x30) != 0)
+ return 1;
+
+ if ((page_83[1] & 0x0f) != id_search->id_type)
+ return 1;
+
+ /*
+ * Possibly check NAA sub-type.
+ */
+ if ((id_search->naa_type != SCSI_ID_NAA_DONT_CARE) &&
+ (id_search->naa_type != (page_83[4] & 0xf0) >> 4))
+ return 1;
+
+ /*
+ * Check for matching code set - ASCII or BINARY.
+ */
+ if ((page_83[0] & 0x0f) != id_search->code_set)
+ return 1;
+
+ /*
+ * page_83[3]: identifier length
+ */
+ len = page_83[3];
+ if ((page_83[0] & 0x0f) != SCSI_ID_ASCII)
+ /*
+ * If not ASCII, use two bytes for each binary value.
+ */
+ len *= 2;
+
+ /*
+ * Add one byte for the NUL termination, and one for the id_type.
+ */
+ len += 2;
+ if (id_search->id_type == SCSI_ID_VENDOR_SPECIFIC)
+ len += VENDOR_LENGTH + MODEL_LENGTH;
+
+ if (max_len < len) {
+ log_debug("%s: length %d too short - need %d",
+ dev_scsi->kernel, max_len, len);
+ return 1;
+ }
+
+ if (id_search->id_type == SCSI_ID_TGTGROUP && tgpt_group != NULL) {
+ unsigned group;
+
+ group = ((unsigned)page_83[6] << 8) | page_83[7];
+ sprintf(tgpt_group,"%x", group);
+ return 1;
+ }
+
+ serial[0] = hex_str[id_search->id_type];
+
+ /*
+ * For SCSI_ID_VENDOR_SPECIFIC prepend the vendor and model before
+ * the id since it is not unique across all vendors and models,
+ * this differs from SCSI_ID_T10_VENDOR, where the vendor is
+ * included in the identifier.
+ */
+ if (id_search->id_type == SCSI_ID_VENDOR_SPECIFIC)
+ if (prepend_vendor_model(dev_scsi, &serial[1]) < 0)
+ return 1;
+
+ i = 4; /* offset to the start of the identifier */
+ s = j = strlen(serial);
+ if ((page_83[0] & 0x0f) == SCSI_ID_ASCII) {
+ /*
+ * ASCII descriptor.
+ */
+ while (i < (4 + page_83[3]))
+ serial[j++] = page_83[i++];
+ } else {
+ /*
+ * Binary descriptor, convert to ASCII, using two bytes of
+ * ASCII for each byte in the page_83.
+ */
+ while (i < (4 + page_83[3])) {
+ serial[j++] = hex_str[(page_83[i] & 0xf0) >> 4];
+ serial[j++] = hex_str[page_83[i] & 0x0f];
+ i++;
+ }
+ }
+
+ strcpy(serial_short, &serial[s]);
+
+ if (id_search->id_type == SCSI_ID_NAA && wwn != NULL) {
+ strncpy(wwn, &serial[s], 16);
+ if (wwn_vendor_extension != NULL)
+ strncpy(wwn_vendor_extension, &serial[s + 16], 16);
+ }
+
+ return 0;
+}
+
+/* Extract the raw binary from VPD 0x83 pre-SPC devices */
+static int check_fill_0x83_prespc3(struct scsi_id_device *dev_scsi,
+ unsigned char *page_83,
+ const struct scsi_id_search_values
+ *id_search, char *serial, char *serial_short, int max_len) {
+ int i, j;
+
+ serial[0] = hex_str[SCSI_ID_NAA];
+ /* serial has been memset to zero before */
+ j = strlen(serial); /* j = 1; */
+
+ for (i = 0; (i < page_83[3]) && (j < max_len-3); ++i) {
+ serial[j++] = hex_str[(page_83[4+i] & 0xf0) >> 4];
+ serial[j++] = hex_str[ page_83[4+i] & 0x0f];
+ }
+ serial[max_len-1] = 0;
+ strncpy(serial_short, serial, max_len-1);
+ return 0;
+}
+
+/* Get device identification VPD page */
+static int do_scsi_page83_inquiry(struct scsi_id_device *dev_scsi, int fd,
+ char *serial, char *serial_short, int len,
+ char *unit_serial_number, char *wwn,
+ char *wwn_vendor_extension, char *tgpt_group) {
+ int retval;
+ unsigned id_ind, j;
+ unsigned char page_83[SCSI_INQ_BUFF_LEN];
+
+ /* also pick up the page 80 serial number */
+ do_scsi_page80_inquiry(dev_scsi, fd, NULL, unit_serial_number, MAX_SERIAL_LEN);
+
+ memzero(page_83, SCSI_INQ_BUFF_LEN);
+ retval = scsi_inquiry(dev_scsi, fd, 1, PAGE_83, page_83,
+ SCSI_INQ_BUFF_LEN);
+ if (retval < 0)
+ return 1;
+
+ if (page_83[1] != PAGE_83) {
+ log_debug("%s: Invalid page 0x83", dev_scsi->kernel);
+ return 1;
+ }
+
+ /*
+ * XXX Some devices (IBM 3542) return all spaces for an identifier if
+ * the LUN is not actually configured. This leads to identifiers of
+ * the form: "1 ".
+ */
+
+ /*
+ * Model 4, 5, and (some) model 6 EMC Symmetrix devices return
+ * a page 83 reply according to SCSI-2 format instead of SPC-2/3.
+ *
+ * The SCSI-2 page 83 format returns an IEEE WWN in binary
+ * encoded hexi-decimal in the 16 bytes following the initial
+ * 4-byte page 83 reply header.
+ *
+ * Both the SPC-2 and SPC-3 formats return an IEEE WWN as part
+ * of an Identification descriptor. The 3rd byte of the first
+ * Identification descriptor is a reserved (BSZ) byte field.
+ *
+ * Reference the 7th byte of the page 83 reply to determine
+ * whether the reply is compliant with SCSI-2 or SPC-2/3
+ * specifications. A zero value in the 7th byte indicates
+ * an SPC-2/3 conformant reply, (i.e., the reserved field of the
+ * first Identification descriptor). This byte will be non-zero
+ * for a SCSI-2 conformant page 83 reply from these EMC
+ * Symmetrix models since the 7th byte of the reply corresponds
+ * to the 4th and 5th nibbles of the 6-byte OUI for EMC, that is,
+ * 0x006048.
+ */
+
+ if (page_83[6] != 0)
+ return check_fill_0x83_prespc3(dev_scsi, page_83, id_search_list,
+ serial, serial_short, len);
+
+ /*
+ * Search for a match in the prioritized id_search_list - since WWN ids
+ * come first we can pick up the WWN in check_fill_0x83_id().
+ */
+ for (id_ind = 0;
+ id_ind < sizeof(id_search_list)/sizeof(id_search_list[0]);
+ id_ind++) {
+ /*
+ * Examine each descriptor returned. There is normally only
+ * one or a small number of descriptors.
+ */
+ for (j = 4; j <= (unsigned)page_83[3] + 3; j += page_83[j + 3] + 4) {
+ retval = check_fill_0x83_id(dev_scsi, &page_83[j],
+ &id_search_list[id_ind],
+ serial, serial_short, len,
+ wwn, wwn_vendor_extension,
+ tgpt_group);
+ if (!retval)
+ return retval;
+ else if (retval < 0)
+ return retval;
+ }
+ }
+ return 1;
+}
+
+/*
+ * Get device identification VPD page for older SCSI-2 device which is not
+ * compliant with either SPC-2 or SPC-3 format.
+ *
+ * Return the hard coded error code value 2 if the page 83 reply is not
+ * conformant to the SCSI-2 format.
+ */
+static int do_scsi_page83_prespc3_inquiry(struct scsi_id_device *dev_scsi, int fd,
+ char *serial, char *serial_short, int len) {
+ int retval;
+ int i, j;
+ unsigned char page_83[SCSI_INQ_BUFF_LEN];
+
+ memzero(page_83, SCSI_INQ_BUFF_LEN);
+ retval = scsi_inquiry(dev_scsi, fd, 1, PAGE_83, page_83, SCSI_INQ_BUFF_LEN);
+ if (retval < 0)
+ return 1;
+
+ if (page_83[1] != PAGE_83) {
+ log_debug("%s: Invalid page 0x83", dev_scsi->kernel);
+ return 1;
+ }
+ /*
+ * Model 4, 5, and (some) model 6 EMC Symmetrix devices return
+ * a page 83 reply according to SCSI-2 format instead of SPC-2/3.
+ *
+ * The SCSI-2 page 83 format returns an IEEE WWN in binary
+ * encoded hexi-decimal in the 16 bytes following the initial
+ * 4-byte page 83 reply header.
+ *
+ * Both the SPC-2 and SPC-3 formats return an IEEE WWN as part
+ * of an Identification descriptor. The 3rd byte of the first
+ * Identification descriptor is a reserved (BSZ) byte field.
+ *
+ * Reference the 7th byte of the page 83 reply to determine
+ * whether the reply is compliant with SCSI-2 or SPC-2/3
+ * specifications. A zero value in the 7th byte indicates
+ * an SPC-2/3 conformant reply, (i.e., the reserved field of the
+ * first Identification descriptor). This byte will be non-zero
+ * for a SCSI-2 conformant page 83 reply from these EMC
+ * Symmetrix models since the 7th byte of the reply corresponds
+ * to the 4th and 5th nibbles of the 6-byte OUI for EMC, that is,
+ * 0x006048.
+ */
+ if (page_83[6] == 0)
+ return 2;
+
+ serial[0] = hex_str[SCSI_ID_NAA];
+ /*
+ * The first four bytes contain data, not a descriptor.
+ */
+ i = 4;
+ j = strlen(serial);
+ /*
+ * Binary descriptor, convert to ASCII,
+ * using two bytes of ASCII for each byte
+ * in the page_83.
+ */
+ while (i < (page_83[3]+4)) {
+ serial[j++] = hex_str[(page_83[i] & 0xf0) >> 4];
+ serial[j++] = hex_str[page_83[i] & 0x0f];
+ i++;
+ }
+ return 0;
+}
+
+/* Get unit serial number VPD page */
+static int do_scsi_page80_inquiry(struct scsi_id_device *dev_scsi, int fd,
+ char *serial, char *serial_short, int max_len) {
+ int retval;
+ int ser_ind;
+ int i;
+ int len;
+ unsigned char buf[SCSI_INQ_BUFF_LEN];
+
+ memzero(buf, SCSI_INQ_BUFF_LEN);
+ retval = scsi_inquiry(dev_scsi, fd, 1, PAGE_80, buf, SCSI_INQ_BUFF_LEN);
+ if (retval < 0)
+ return retval;
+
+ if (buf[1] != PAGE_80) {
+ log_debug("%s: Invalid page 0x80", dev_scsi->kernel);
+ return 1;
+ }
+
+ len = 1 + VENDOR_LENGTH + MODEL_LENGTH + buf[3];
+ if (max_len < len) {
+ log_debug("%s: length %d too short - need %d",
+ dev_scsi->kernel, max_len, len);
+ return 1;
+ }
+ /*
+ * Prepend 'S' to avoid unlikely collision with page 0x83 vendor
+ * specific type where we prepend '0' + vendor + model.
+ */
+ len = buf[3];
+ if (serial != NULL) {
+ serial[0] = 'S';
+ ser_ind = prepend_vendor_model(dev_scsi, &serial[1]);
+ if (ser_ind < 0)
+ return 1;
+ ser_ind++; /* for the leading 'S' */
+ for (i = 4; i < len + 4; i++, ser_ind++)
+ serial[ser_ind] = buf[i];
+ }
+ if (serial_short != NULL) {
+ memcpy(serial_short, &buf[4], len);
+ serial_short[len] = '\0';
+ }
+ return 0;
+}
+
+int scsi_std_inquiry(struct scsi_id_device *dev_scsi, const char *devname) {
+ int fd;
+ unsigned char buf[SCSI_INQ_BUFF_LEN];
+ struct stat statbuf;
+ int err = 0;
+
+ fd = open(devname, O_RDONLY | O_NONBLOCK | O_CLOEXEC);
+ if (fd < 0) {
+ log_debug_errno(errno, "scsi_id: cannot open %s: %m", devname);
+ return 1;
+ }
+
+ if (fstat(fd, &statbuf) < 0) {
+ log_debug_errno(errno, "scsi_id: cannot stat %s: %m", devname);
+ err = 2;
+ goto out;
+ }
+ sprintf(dev_scsi->kernel,"%d:%d", major(statbuf.st_rdev),
+ minor(statbuf.st_rdev));
+
+ memzero(buf, SCSI_INQ_BUFF_LEN);
+ err = scsi_inquiry(dev_scsi, fd, 0, 0, buf, SCSI_INQ_BUFF_LEN);
+ if (err < 0)
+ goto out;
+
+ err = 0;
+ memcpy(dev_scsi->vendor, buf + 8, 8);
+ dev_scsi->vendor[8] = '\0';
+ memcpy(dev_scsi->model, buf + 16, 16);
+ dev_scsi->model[16] = '\0';
+ memcpy(dev_scsi->revision, buf + 32, 4);
+ dev_scsi->revision[4] = '\0';
+ sprintf(dev_scsi->type,"%x", buf[0] & 0x1f);
+
+out:
+ close(fd);
+ return err;
+}
+
+int scsi_get_serial(struct scsi_id_device *dev_scsi, const char *devname,
+ int page_code, int len) {
+ unsigned char page0[SCSI_INQ_BUFF_LEN];
+ int fd = -1;
+ int cnt;
+ int ind;
+ int retval;
+
+ memzero(dev_scsi->serial, len);
+ initialize_srand();
+ for (cnt = 20; cnt > 0; cnt--) {
+ struct timespec duration;
+
+ fd = open(devname, O_RDONLY | O_NONBLOCK | O_CLOEXEC);
+ if (fd >= 0 || errno != EBUSY)
+ break;
+ duration.tv_sec = 0;
+ duration.tv_nsec = (200 * 1000 * 1000) + (rand() % 100 * 1000 * 1000);
+ nanosleep(&duration, NULL);
+ }
+ if (fd < 0)
+ return 1;
+
+ if (page_code == PAGE_80) {
+ if (do_scsi_page80_inquiry(dev_scsi, fd, dev_scsi->serial, dev_scsi->serial_short, len)) {
+ retval = 1;
+ goto completed;
+ } else {
+ retval = 0;
+ goto completed;
+ }
+ } else if (page_code == PAGE_83) {
+ if (do_scsi_page83_inquiry(dev_scsi, fd, dev_scsi->serial, dev_scsi->serial_short, len, dev_scsi->unit_serial_number, dev_scsi->wwn, dev_scsi->wwn_vendor_extension, dev_scsi->tgpt_group)) {
+ retval = 1;
+ goto completed;
+ } else {
+ retval = 0;
+ goto completed;
+ }
+ } else if (page_code == PAGE_83_PRE_SPC3) {
+ retval = do_scsi_page83_prespc3_inquiry(dev_scsi, fd, dev_scsi->serial, dev_scsi->serial_short, len);
+ if (retval) {
+ /*
+ * Fallback to servicing a SPC-2/3 compliant page 83
+ * inquiry if the page 83 reply format does not
+ * conform to pre-SPC3 expectations.
+ */
+ if (retval == 2) {
+ if (do_scsi_page83_inquiry(dev_scsi, fd, dev_scsi->serial, dev_scsi->serial_short, len, dev_scsi->unit_serial_number, dev_scsi->wwn, dev_scsi->wwn_vendor_extension, dev_scsi->tgpt_group)) {
+ retval = 1;
+ goto completed;
+ } else {
+ retval = 0;
+ goto completed;
+ }
+ }
+ else {
+ retval = 1;
+ goto completed;
+ }
+ } else {
+ retval = 0;
+ goto completed;
+ }
+ } else if (page_code != 0x00) {
+ log_debug("%s: unsupported page code 0x%d", dev_scsi->kernel, page_code);
+ retval = 1;
+ goto completed;
+ }
+
+ /*
+ * Get page 0, the page of the pages. By default, try from best to
+ * worst of supported pages: 0x83 then 0x80.
+ */
+ if (do_scsi_page0_inquiry(dev_scsi, fd, page0, SCSI_INQ_BUFF_LEN)) {
+ /*
+ * Don't try anything else. Black list if a specific page
+ * should be used for this vendor+model, or maybe have an
+ * optional fall-back to page 0x80 or page 0x83.
+ */
+ retval = 1;
+ goto completed;
+ }
+
+ for (ind = 4; ind <= page0[3] + 3; ind++)
+ if (page0[ind] == PAGE_83)
+ if (!do_scsi_page83_inquiry(dev_scsi, fd,
+ dev_scsi->serial, dev_scsi->serial_short, len, dev_scsi->unit_serial_number, dev_scsi->wwn, dev_scsi->wwn_vendor_extension, dev_scsi->tgpt_group)) {
+ /*
+ * Success
+ */
+ retval = 0;
+ goto completed;
+ }
+
+ for (ind = 4; ind <= page0[3] + 3; ind++)
+ if (page0[ind] == PAGE_80)
+ if (!do_scsi_page80_inquiry(dev_scsi, fd,
+ dev_scsi->serial, dev_scsi->serial_short, len)) {
+ /*
+ * Success
+ */
+ retval = 0;
+ goto completed;
+ }
+ retval = 1;
+
+completed:
+ close(fd);
+ return retval;
+}
diff --git a/src/udev/udev-builtin-blkid.c b/src/udev/udev-builtin-blkid.c
new file mode 100644
index 0000000..69d6c4b
--- /dev/null
+++ b/src/udev/udev-builtin-blkid.c
@@ -0,0 +1,318 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * probe disks for filesystems and partitions
+ *
+ * Copyright © 2011 Karel Zak <kzak@redhat.com>
+ */
+
+#include <blkid.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "blkid-util.h"
+#include "device-util.h"
+#include "efivars.h"
+#include "fd-util.h"
+#include "gpt.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+
+static void print_property(sd_device *dev, bool test, const char *name, const char *value) {
+ char s[256];
+
+ s[0] = '\0';
+
+ if (streq(name, "TYPE")) {
+ udev_builtin_add_property(dev, test, "ID_FS_TYPE", value);
+
+ } else if (streq(name, "USAGE")) {
+ udev_builtin_add_property(dev, test, "ID_FS_USAGE", value);
+
+ } else if (streq(name, "VERSION")) {
+ udev_builtin_add_property(dev, test, "ID_FS_VERSION", value);
+
+ } else if (streq(name, "UUID")) {
+ blkid_safe_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_UUID", s);
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_UUID_ENC", s);
+
+ } else if (streq(name, "UUID_SUB")) {
+ blkid_safe_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_UUID_SUB", s);
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_UUID_SUB_ENC", s);
+
+ } else if (streq(name, "LABEL")) {
+ blkid_safe_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_LABEL", s);
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_LABEL_ENC", s);
+
+ } else if (streq(name, "PTTYPE")) {
+ udev_builtin_add_property(dev, test, "ID_PART_TABLE_TYPE", value);
+
+ } else if (streq(name, "PTUUID")) {
+ udev_builtin_add_property(dev, test, "ID_PART_TABLE_UUID", value);
+
+ } else if (streq(name, "PART_ENTRY_NAME")) {
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_PART_ENTRY_NAME", s);
+
+ } else if (streq(name, "PART_ENTRY_TYPE")) {
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_PART_ENTRY_TYPE", s);
+
+ } else if (startswith(name, "PART_ENTRY_")) {
+ strscpyl(s, sizeof(s), "ID_", name, NULL);
+ udev_builtin_add_property(dev, test, s, value);
+
+ } else if (streq(name, "SYSTEM_ID")) {
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_SYSTEM_ID", s);
+
+ } else if (streq(name, "PUBLISHER_ID")) {
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_PUBLISHER_ID", s);
+
+ } else if (streq(name, "APPLICATION_ID")) {
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_APPLICATION_ID", s);
+
+ } else if (streq(name, "BOOT_SYSTEM_ID")) {
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_BOOT_SYSTEM_ID", s);
+ }
+}
+
+static int find_gpt_root(sd_device *dev, blkid_probe pr, bool test) {
+
+#if defined(GPT_ROOT_NATIVE) && ENABLE_EFI
+
+ _cleanup_free_ char *root_id = NULL;
+ bool found_esp = false;
+ blkid_partlist pl;
+ int i, nvals, r;
+
+ assert(pr);
+
+ /* Iterate through the partitions on this disk, and see if the
+ * EFI ESP we booted from is on it. If so, find the first root
+ * disk, and add a property indicating its partition UUID. */
+
+ errno = 0;
+ pl = blkid_probe_get_partitions(pr);
+ if (!pl)
+ return -errno ?: -ENOMEM;
+
+ nvals = blkid_partlist_numof_partitions(pl);
+ for (i = 0; i < nvals; i++) {
+ blkid_partition pp;
+ const char *stype, *sid;
+ sd_id128_t type;
+
+ pp = blkid_partlist_get_partition(pl, i);
+ if (!pp)
+ continue;
+
+ sid = blkid_partition_get_uuid(pp);
+ if (!sid)
+ continue;
+
+ stype = blkid_partition_get_type_string(pp);
+ if (!stype)
+ continue;
+
+ if (sd_id128_from_string(stype, &type) < 0)
+ continue;
+
+ if (sd_id128_equal(type, GPT_ESP)) {
+ sd_id128_t id, esp;
+
+ /* We found an ESP, let's see if it matches
+ * the ESP we booted from. */
+
+ if (sd_id128_from_string(sid, &id) < 0)
+ continue;
+
+ r = efi_loader_get_device_part_uuid(&esp);
+ if (r < 0)
+ return r;
+
+ if (sd_id128_equal(id, esp))
+ found_esp = true;
+
+ } else if (sd_id128_equal(type, GPT_ROOT_NATIVE)) {
+ unsigned long long flags;
+
+ flags = blkid_partition_get_flags(pp);
+ if (flags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* We found a suitable root partition, let's
+ * remember the first one. */
+
+ if (!root_id) {
+ root_id = strdup(sid);
+ if (!root_id)
+ return -ENOMEM;
+ }
+ }
+ }
+
+ /* We found the ESP on this disk, and also found a root
+ * partition, nice! Let's export its UUID */
+ if (found_esp && root_id)
+ udev_builtin_add_property(dev, test, "ID_PART_GPT_AUTO_ROOT_UUID", root_id);
+#endif
+
+ return 0;
+}
+
+static int probe_superblocks(blkid_probe pr) {
+ struct stat st;
+ int rc;
+
+ /* TODO: Return negative errno. */
+
+ if (fstat(blkid_probe_get_fd(pr), &st))
+ return -errno;
+
+ blkid_probe_enable_partitions(pr, 1);
+
+ if (!S_ISCHR(st.st_mode) &&
+ blkid_probe_get_size(pr) <= 1024 * 1440 &&
+ blkid_probe_is_wholedisk(pr)) {
+ /*
+ * check if the small disk is partitioned, if yes then
+ * don't probe for filesystems.
+ */
+ blkid_probe_enable_superblocks(pr, 0);
+
+ rc = blkid_do_fullprobe(pr);
+ if (rc < 0)
+ return rc; /* -1 = error, 1 = nothing, 0 = success */
+
+ if (blkid_probe_lookup_value(pr, "PTTYPE", NULL, NULL) == 0)
+ return 0; /* partition table detected */
+ }
+
+ blkid_probe_set_partitions_flags(pr, BLKID_PARTS_ENTRY_DETAILS);
+ blkid_probe_enable_superblocks(pr, 1);
+
+ return blkid_do_safeprobe(pr);
+}
+
+static int builtin_blkid(sd_device *dev, int argc, char *argv[], bool test) {
+ const char *devnode, *root_partition = NULL, *data, *name;
+ _cleanup_(blkid_free_probep) blkid_probe pr = NULL;
+ bool noraid = false, is_gpt = false;
+ _cleanup_close_ int fd = -1;
+ int64_t offset = 0;
+ int nvals, i, r;
+
+ static const struct option options[] = {
+ { "offset", required_argument, NULL, 'o' },
+ { "noraid", no_argument, NULL, 'R' },
+ {}
+ };
+
+ for (;;) {
+ int option;
+
+ option = getopt_long(argc, argv, "o:R", options, NULL);
+ if (option == -1)
+ break;
+
+ switch (option) {
+ case 'o':
+ r = safe_atoi64(optarg, &offset);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to parse '%s' as an integer: %m", optarg);
+ if (offset < 0)
+ return log_device_error_errno(dev, SYNTHETIC_ERRNO(ERANGE), "Invalid offset %"PRIi64": %m", offset);
+ break;
+ case 'R':
+ noraid = true;
+ break;
+ }
+ }
+
+ errno = 0;
+ pr = blkid_new_probe();
+ if (!pr)
+ return log_device_debug_errno(dev, errno > 0 ? errno : ENOMEM, "Failed to create blkid prober: %m");
+
+ blkid_probe_set_superblocks_flags(pr,
+ BLKID_SUBLKS_LABEL | BLKID_SUBLKS_UUID |
+ BLKID_SUBLKS_TYPE | BLKID_SUBLKS_SECTYPE |
+ BLKID_SUBLKS_USAGE | BLKID_SUBLKS_VERSION);
+
+ if (noraid)
+ blkid_probe_filter_superblocks_usage(pr, BLKID_FLTR_NOTIN, BLKID_USAGE_RAID);
+
+ r = sd_device_get_devname(dev, &devnode);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get device name: %m");
+
+ fd = open(devnode, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return log_device_debug_errno(dev, errno, "Failed to open block device %s: %m", devnode);
+
+ errno = 0;
+ r = blkid_probe_set_device(pr, fd, offset, 0);
+ if (r < 0)
+ return log_device_debug_errno(dev, errno > 0 ? errno : ENOMEM, "Failed to set device to blkid prober: %m");
+
+ log_device_debug(dev, "Probe %s with %sraid and offset=%"PRIi64, devnode, noraid ? "no" : "", offset);
+
+ r = probe_superblocks(pr);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to probe superblocks: %m");
+
+ /* If the device is a partition then its parent passed the root partition UUID to the device */
+ (void) sd_device_get_property_value(dev, "ID_PART_GPT_AUTO_ROOT_UUID", &root_partition);
+
+ errno = 0;
+ nvals = blkid_probe_numof_values(pr);
+ if (nvals < 0)
+ return log_device_debug_errno(dev, errno > 0 ? errno : ENOMEM, "Failed to get number of probed values: %m");
+
+ for (i = 0; i < nvals; i++) {
+ if (blkid_probe_get_value(pr, i, &name, &data, NULL) < 0)
+ continue;
+
+ print_property(dev, test, name, data);
+
+ /* Is this a disk with GPT partition table? */
+ if (streq(name, "PTTYPE") && streq(data, "gpt"))
+ is_gpt = true;
+
+ /* Is this a partition that matches the root partition
+ * property inherited from the parent? */
+ if (root_partition && streq(name, "PART_ENTRY_UUID") && streq(data, root_partition))
+ udev_builtin_add_property(dev, test, "ID_PART_GPT_AUTO_ROOT", "1");
+ }
+
+ if (is_gpt)
+ find_gpt_root(dev, pr, test);
+
+ return 0;
+}
+
+const struct udev_builtin udev_builtin_blkid = {
+ .name = "blkid",
+ .cmd = builtin_blkid,
+ .help = "Filesystem and partition probing",
+ .run_once = true,
+};
diff --git a/src/udev/udev-builtin-btrfs.c b/src/udev/udev-builtin-btrfs.c
new file mode 100644
index 0000000..956be59
--- /dev/null
+++ b/src/udev/udev-builtin-btrfs.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+
+#include "device-util.h"
+#include "fd-util.h"
+#include "missing.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+#include "util.h"
+
+static int builtin_btrfs(sd_device *dev, int argc, char *argv[], bool test) {
+ struct btrfs_ioctl_vol_args args = {};
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ if (argc != 3 || !streq(argv[1], "ready"))
+ return log_device_error_errno(dev, SYNTHETIC_ERRNO(EINVAL), "Invalid arguments");
+
+ fd = open("/dev/btrfs-control", O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return log_device_debug_errno(dev, errno, "Failed to open /dev/btrfs-control: %m");
+
+ strscpy(args.name, sizeof(args.name), argv[2]);
+ r = ioctl(fd, BTRFS_IOC_DEVICES_READY, &args);
+ if (r < 0)
+ return log_device_debug_errno(dev, errno, "Failed to call BTRFS_IOC_DEVICES_READY: %m");
+
+ udev_builtin_add_property(dev, test, "ID_BTRFS_READY", one_zero(r == 0));
+ return 0;
+}
+
+const struct udev_builtin udev_builtin_btrfs = {
+ .name = "btrfs",
+ .cmd = builtin_btrfs,
+ .help = "btrfs volume management",
+};
diff --git a/src/udev/udev-builtin-hwdb.c b/src/udev/udev-builtin-hwdb.c
new file mode 100644
index 0000000..225e026
--- /dev/null
+++ b/src/udev/udev-builtin-hwdb.c
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fnmatch.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-hwdb.h"
+
+#include "alloc-util.h"
+#include "device-util.h"
+#include "hwdb-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "udev-builtin.h"
+
+static sd_hwdb *hwdb;
+
+int udev_builtin_hwdb_lookup(sd_device *dev,
+ const char *prefix, const char *modalias,
+ const char *filter, bool test) {
+ _cleanup_free_ char *lookup = NULL;
+ const char *key, *value;
+ int n = 0, r;
+
+ if (!hwdb)
+ return -ENOENT;
+
+ if (prefix) {
+ lookup = strjoin(prefix, modalias);
+ if (!lookup)
+ return -ENOMEM;
+ modalias = lookup;
+ }
+
+ SD_HWDB_FOREACH_PROPERTY(hwdb, modalias, key, value) {
+ if (filter && fnmatch(filter, key, FNM_NOESCAPE) != 0)
+ continue;
+
+ r = udev_builtin_add_property(dev, test, key, value);
+ if (r < 0)
+ return r;
+ n++;
+ }
+ return n;
+}
+
+static const char *modalias_usb(sd_device *dev, char *s, size_t size) {
+ const char *v, *p;
+ uint16_t vn, pn;
+
+ if (sd_device_get_sysattr_value(dev, "idVendor", &v) < 0)
+ return NULL;
+ if (sd_device_get_sysattr_value(dev, "idProduct", &p) < 0)
+ return NULL;
+ if (safe_atoux16(v, &vn) < 0)
+ return NULL;
+ if (safe_atoux16(p, &pn) < 0)
+ return NULL;
+ snprintf(s, size, "usb:v%04Xp%04X*", vn, pn);
+ return s;
+}
+
+static int udev_builtin_hwdb_search(sd_device *dev, sd_device *srcdev,
+ const char *subsystem, const char *prefix,
+ const char *filter, bool test) {
+ sd_device *d;
+ char s[16];
+ bool last = false;
+ int r = 0;
+
+ assert(dev);
+
+ if (!srcdev)
+ srcdev = dev;
+
+ for (d = srcdev; d; ) {
+ const char *dsubsys, *devtype, *modalias = NULL;
+
+ if (sd_device_get_subsystem(d, &dsubsys) < 0)
+ goto next;
+
+ /* look only at devices of a specific subsystem */
+ if (subsystem && !streq(dsubsys, subsystem))
+ goto next;
+
+ (void) sd_device_get_property_value(d, "MODALIAS", &modalias);
+
+ if (streq(dsubsys, "usb") &&
+ sd_device_get_devtype(d, &devtype) >= 0 &&
+ streq(devtype, "usb_device")) {
+ /* if the usb_device does not have a modalias, compose one */
+ if (!modalias)
+ modalias = modalias_usb(d, s, sizeof(s));
+
+ /* avoid looking at any parent device, they are usually just a USB hub */
+ last = true;
+ }
+
+ if (!modalias)
+ goto next;
+
+ r = udev_builtin_hwdb_lookup(dev, prefix, modalias, filter, test);
+ if (r > 0)
+ break;
+
+ if (last)
+ break;
+next:
+ if (sd_device_get_parent(d, &d) < 0)
+ break;
+ }
+
+ return r;
+}
+
+static int builtin_hwdb(sd_device *dev, int argc, char *argv[], bool test) {
+ static const struct option options[] = {
+ { "filter", required_argument, NULL, 'f' },
+ { "device", required_argument, NULL, 'd' },
+ { "subsystem", required_argument, NULL, 's' },
+ { "lookup-prefix", required_argument, NULL, 'p' },
+ {}
+ };
+ const char *filter = NULL;
+ const char *device = NULL;
+ const char *subsystem = NULL;
+ const char *prefix = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *srcdev = NULL;
+ int r;
+
+ if (!hwdb)
+ return -EINVAL;
+
+ for (;;) {
+ int option;
+
+ option = getopt_long(argc, argv, "f:d:s:p:", options, NULL);
+ if (option == -1)
+ break;
+
+ switch (option) {
+ case 'f':
+ filter = optarg;
+ break;
+
+ case 'd':
+ device = optarg;
+ break;
+
+ case 's':
+ subsystem = optarg;
+ break;
+
+ case 'p':
+ prefix = optarg;
+ break;
+ }
+ }
+
+ /* query a specific key given as argument */
+ if (argv[optind]) {
+ r = udev_builtin_hwdb_lookup(dev, prefix, argv[optind], filter, test);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to lookup hwdb: %m");
+ if (r == 0)
+ return log_device_debug_errno(dev, SYNTHETIC_ERRNO(ENODATA), "No entry found from hwdb.");
+ return r;
+ }
+
+ /* read data from another device than the device we will store the data */
+ if (device) {
+ r = sd_device_new_from_device_id(&srcdev, device);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to create sd_device object '%s': %m", device);
+ }
+
+ r = udev_builtin_hwdb_search(dev, srcdev, subsystem, prefix, filter, test);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to lookup hwdb: %m");
+ if (r == 0)
+ return log_device_debug_errno(dev, SYNTHETIC_ERRNO(ENODATA), "No entry found from hwdb.");
+ return r;
+}
+
+/* called at udev startup and reload */
+static int builtin_hwdb_init(void) {
+ int r;
+
+ if (hwdb)
+ return 0;
+
+ r = sd_hwdb_new(&hwdb);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+/* called on udev shutdown and reload request */
+static void builtin_hwdb_exit(void) {
+ hwdb = sd_hwdb_unref(hwdb);
+}
+
+/* called every couple of seconds during event activity; 'true' if config has changed */
+static bool builtin_hwdb_validate(void) {
+ return hwdb_validate(hwdb);
+}
+
+const struct udev_builtin udev_builtin_hwdb = {
+ .name = "hwdb",
+ .cmd = builtin_hwdb,
+ .init = builtin_hwdb_init,
+ .exit = builtin_hwdb_exit,
+ .validate = builtin_hwdb_validate,
+ .help = "Hardware database",
+};
diff --git a/src/udev/udev-builtin-input_id.c b/src/udev/udev-builtin-input_id.c
new file mode 100644
index 0000000..e3db55b
--- /dev/null
+++ b/src/udev/udev-builtin-input_id.c
@@ -0,0 +1,364 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * expose input properties via udev
+ *
+ * Portions Copyright © 2004 David Zeuthen, <david@fubar.dk>
+ * Copyright © 2014 Carlos Garnacho <carlosg@gnome.org>
+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/limits.h>
+#include <linux/input.h>
+
+#include "device-util.h"
+#include "fd-util.h"
+#include "missing.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "udev-builtin.h"
+#include "util.h"
+
+/* we must use this kernel-compatible implementation */
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+#define NBITS(x) ((((x)-1)/BITS_PER_LONG)+1)
+#define OFF(x) ((x)%BITS_PER_LONG)
+#define BIT(x) (1UL<<OFF(x))
+#define LONG(x) ((x)/BITS_PER_LONG)
+#define test_bit(bit, array) ((array[LONG(bit)] >> OFF(bit)) & 1)
+
+struct range {
+ unsigned start;
+ unsigned end;
+};
+
+/* key code ranges above BTN_MISC (start is inclusive, stop is exclusive)*/
+static const struct range high_key_blocks[] = {
+ { KEY_OK, BTN_DPAD_UP },
+ { KEY_ALS_TOGGLE, BTN_TRIGGER_HAPPY }
+};
+
+static int abs_size_mm(const struct input_absinfo *absinfo) {
+ /* Resolution is defined to be in units/mm for ABS_X/Y */
+ return (absinfo->maximum - absinfo->minimum) / absinfo->resolution;
+}
+
+static void extract_info(sd_device *dev, const char *devpath, bool test) {
+ char width[DECIMAL_STR_MAX(int)], height[DECIMAL_STR_MAX(int)];
+ struct input_absinfo xabsinfo = {}, yabsinfo = {};
+ _cleanup_close_ int fd = -1;
+
+ fd = open(devpath, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return;
+
+ if (ioctl(fd, EVIOCGABS(ABS_X), &xabsinfo) < 0 ||
+ ioctl(fd, EVIOCGABS(ABS_Y), &yabsinfo) < 0)
+ return;
+
+ if (xabsinfo.resolution <= 0 || yabsinfo.resolution <= 0)
+ return;
+
+ xsprintf(width, "%d", abs_size_mm(&xabsinfo));
+ xsprintf(height, "%d", abs_size_mm(&yabsinfo));
+
+ udev_builtin_add_property(dev, test, "ID_INPUT_WIDTH_MM", width);
+ udev_builtin_add_property(dev, test, "ID_INPUT_HEIGHT_MM", height);
+}
+
+/*
+ * Read a capability attribute and return bitmask.
+ * @param dev sd_device
+ * @param attr sysfs attribute name (e. g. "capabilities/key")
+ * @param bitmask: Output array which has a sizeof of bitmask_size
+ */
+static void get_cap_mask(sd_device *pdev, const char* attr,
+ unsigned long *bitmask, size_t bitmask_size,
+ bool test) {
+ const char *v;
+ char text[4096];
+ unsigned i;
+ char* word;
+ unsigned long val;
+
+ if (sd_device_get_sysattr_value(pdev, attr, &v) < 0)
+ v = "";
+
+ xsprintf(text, "%s", v);
+ log_device_debug(pdev, "%s raw kernel attribute: %s", attr, text);
+
+ memzero(bitmask, bitmask_size);
+ i = 0;
+ while ((word = strrchr(text, ' ')) != NULL) {
+ val = strtoul(word+1, NULL, 16);
+ if (i < bitmask_size / sizeof(unsigned long))
+ bitmask[i] = val;
+ else
+ log_device_debug(pdev, "Ignoring %s block %lX which is larger than maximum size", attr, val);
+ *word = '\0';
+ ++i;
+ }
+ val = strtoul (text, NULL, 16);
+ if (i < bitmask_size / sizeof(unsigned long))
+ bitmask[i] = val;
+ else
+ log_device_debug(pdev, "Ignoring %s block %lX which is larger than maximum size", attr, val);
+
+ if (test) {
+ /* printf pattern with the right unsigned long number of hex chars */
+ xsprintf(text, " bit %%4u: %%0%zulX\n",
+ 2 * sizeof(unsigned long));
+ log_device_debug(pdev, "%s decoded bit map:", attr);
+ val = bitmask_size / sizeof (unsigned long);
+ /* skip over leading zeros */
+ while (bitmask[val-1] == 0 && val > 0)
+ --val;
+ for (i = 0; i < val; ++i) {
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ log_device_debug(pdev, text, i * BITS_PER_LONG, bitmask[i]);
+ REENABLE_WARNING;
+ }
+ }
+}
+
+/* pointer devices */
+static bool test_pointers(sd_device *dev,
+ const unsigned long* bitmask_ev,
+ const unsigned long* bitmask_abs,
+ const unsigned long* bitmask_key,
+ const unsigned long* bitmask_rel,
+ const unsigned long* bitmask_props,
+ bool test) {
+ int button, axis;
+ bool has_abs_coordinates = false;
+ bool has_rel_coordinates = false;
+ bool has_mt_coordinates = false;
+ bool has_joystick_axes_or_buttons = false;
+ bool is_direct = false;
+ bool has_touch = false;
+ bool has_3d_coordinates = false;
+ bool has_keys = false;
+ bool stylus_or_pen = false;
+ bool finger_but_no_pen = false;
+ bool has_mouse_button = false;
+ bool is_mouse = false;
+ bool is_touchpad = false;
+ bool is_touchscreen = false;
+ bool is_tablet = false;
+ bool is_joystick = false;
+ bool is_accelerometer = false;
+ bool is_pointing_stick= false;
+
+ has_keys = test_bit(EV_KEY, bitmask_ev);
+ has_abs_coordinates = test_bit(ABS_X, bitmask_abs) && test_bit(ABS_Y, bitmask_abs);
+ has_3d_coordinates = has_abs_coordinates && test_bit(ABS_Z, bitmask_abs);
+ is_accelerometer = test_bit(INPUT_PROP_ACCELEROMETER, bitmask_props);
+
+ if (!has_keys && has_3d_coordinates)
+ is_accelerometer = true;
+
+ if (is_accelerometer) {
+ udev_builtin_add_property(dev, test, "ID_INPUT_ACCELEROMETER", "1");
+ return true;
+ }
+
+ is_pointing_stick = test_bit(INPUT_PROP_POINTING_STICK, bitmask_props);
+ stylus_or_pen = test_bit(BTN_STYLUS, bitmask_key) || test_bit(BTN_TOOL_PEN, bitmask_key);
+ finger_but_no_pen = test_bit(BTN_TOOL_FINGER, bitmask_key) && !test_bit(BTN_TOOL_PEN, bitmask_key);
+ for (button = BTN_MOUSE; button < BTN_JOYSTICK && !has_mouse_button; button++)
+ has_mouse_button = test_bit(button, bitmask_key);
+ has_rel_coordinates = test_bit(EV_REL, bitmask_ev) && test_bit(REL_X, bitmask_rel) && test_bit(REL_Y, bitmask_rel);
+ has_mt_coordinates = test_bit(ABS_MT_POSITION_X, bitmask_abs) && test_bit(ABS_MT_POSITION_Y, bitmask_abs);
+
+ /* unset has_mt_coordinates if devices claims to have all abs axis */
+ if (has_mt_coordinates && test_bit(ABS_MT_SLOT, bitmask_abs) && test_bit(ABS_MT_SLOT - 1, bitmask_abs))
+ has_mt_coordinates = false;
+ is_direct = test_bit(INPUT_PROP_DIRECT, bitmask_props);
+ has_touch = test_bit(BTN_TOUCH, bitmask_key);
+
+ /* joysticks don't necessarily have buttons; e. g.
+ * rudders/pedals are joystick-like, but buttonless; they have
+ * other fancy axes. Others have buttons only but no axes.
+ *
+ * The BTN_JOYSTICK range starts after the mouse range, so a mouse
+ * with more than 16 buttons runs into the joystick range (e.g. Mad
+ * Catz Mad Catz M.M.O.TE). Skip those.
+ */
+ if (!test_bit(BTN_JOYSTICK - 1, bitmask_key)) {
+ for (button = BTN_JOYSTICK; button < BTN_DIGI && !has_joystick_axes_or_buttons; button++)
+ has_joystick_axes_or_buttons = test_bit(button, bitmask_key);
+ for (button = BTN_TRIGGER_HAPPY1; button <= BTN_TRIGGER_HAPPY40 && !has_joystick_axes_or_buttons; button++)
+ has_joystick_axes_or_buttons = test_bit(button, bitmask_key);
+ for (button = BTN_DPAD_UP; button <= BTN_DPAD_RIGHT && !has_joystick_axes_or_buttons; button++)
+ has_joystick_axes_or_buttons = test_bit(button, bitmask_key);
+ }
+ for (axis = ABS_RX; axis < ABS_PRESSURE && !has_joystick_axes_or_buttons; axis++)
+ has_joystick_axes_or_buttons = test_bit(axis, bitmask_abs);
+
+ if (has_abs_coordinates) {
+ if (stylus_or_pen)
+ is_tablet = true;
+ else if (finger_but_no_pen && !is_direct)
+ is_touchpad = true;
+ else if (has_mouse_button)
+ /* This path is taken by VMware's USB mouse, which has
+ * absolute axes, but no touch/pressure button. */
+ is_mouse = true;
+ else if (has_touch || is_direct)
+ is_touchscreen = true;
+ else if (has_joystick_axes_or_buttons)
+ is_joystick = true;
+ } else if (has_joystick_axes_or_buttons) {
+ is_joystick = true;
+ }
+
+ if (has_mt_coordinates) {
+ if (stylus_or_pen)
+ is_tablet = true;
+ else if (finger_but_no_pen && !is_direct)
+ is_touchpad = true;
+ else if (has_touch || is_direct)
+ is_touchscreen = true;
+ }
+
+ if (!is_tablet && !is_touchpad && !is_joystick &&
+ has_mouse_button &&
+ (has_rel_coordinates ||
+ !has_abs_coordinates)) /* mouse buttons and no axis */
+ is_mouse = true;
+
+ if (is_pointing_stick)
+ udev_builtin_add_property(dev, test, "ID_INPUT_POINTINGSTICK", "1");
+ if (is_mouse)
+ udev_builtin_add_property(dev, test, "ID_INPUT_MOUSE", "1");
+ if (is_touchpad)
+ udev_builtin_add_property(dev, test, "ID_INPUT_TOUCHPAD", "1");
+ if (is_touchscreen)
+ udev_builtin_add_property(dev, test, "ID_INPUT_TOUCHSCREEN", "1");
+ if (is_joystick)
+ udev_builtin_add_property(dev, test, "ID_INPUT_JOYSTICK", "1");
+ if (is_tablet)
+ udev_builtin_add_property(dev, test, "ID_INPUT_TABLET", "1");
+
+ return is_tablet || is_mouse || is_touchpad || is_touchscreen || is_joystick || is_pointing_stick;
+}
+
+/* key like devices */
+static bool test_key(sd_device *dev,
+ const unsigned long* bitmask_ev,
+ const unsigned long* bitmask_key,
+ bool test) {
+ unsigned i;
+ unsigned long found;
+ unsigned long mask;
+ bool ret = false;
+
+ /* do we have any KEY_* capability? */
+ if (!test_bit(EV_KEY, bitmask_ev)) {
+ log_device_debug(dev, "test_key: no EV_KEY capability");
+ return false;
+ }
+
+ /* only consider KEY_* here, not BTN_* */
+ found = 0;
+ for (i = 0; i < BTN_MISC/BITS_PER_LONG; ++i) {
+ found |= bitmask_key[i];
+ log_device_debug(dev, "test_key: checking bit block %lu for any keys; found=%i", (unsigned long)i*BITS_PER_LONG, found > 0);
+ }
+ /* If there are no keys in the lower block, check the higher blocks */
+ if (!found) {
+ unsigned block;
+ for (block = 0; block < (sizeof(high_key_blocks) / sizeof(struct range)); ++block) {
+ for (i = high_key_blocks[block].start; i < high_key_blocks[block].end; ++i) {
+ if (test_bit(i, bitmask_key)) {
+ log_device_debug(dev, "test_key: Found key %x in high block", i);
+ found = 1;
+ break;
+ }
+ }
+ }
+ }
+
+ if (found > 0) {
+ udev_builtin_add_property(dev, test, "ID_INPUT_KEY", "1");
+ ret = true;
+ }
+
+ /* the first 32 bits are ESC, numbers, and Q to D; if we have all of
+ * those, consider it a full keyboard; do not test KEY_RESERVED, though */
+ mask = 0xFFFFFFFE;
+ if (FLAGS_SET(bitmask_key[0], mask)) {
+ udev_builtin_add_property(dev, test, "ID_INPUT_KEYBOARD", "1");
+ ret = true;
+ }
+
+ return ret;
+}
+
+static int builtin_input_id(sd_device *dev, int argc, char *argv[], bool test) {
+ sd_device *pdev;
+ unsigned long bitmask_ev[NBITS(EV_MAX)];
+ unsigned long bitmask_abs[NBITS(ABS_MAX)];
+ unsigned long bitmask_key[NBITS(KEY_MAX)];
+ unsigned long bitmask_rel[NBITS(REL_MAX)];
+ unsigned long bitmask_props[NBITS(INPUT_PROP_MAX)];
+ const char *sysname, *devnode;
+ bool is_pointer;
+ bool is_key;
+
+ assert(dev);
+
+ /* walk up the parental chain until we find the real input device; the
+ * argument is very likely a subdevice of this, like eventN */
+ for (pdev = dev; pdev; ) {
+ const char *s;
+
+ if (sd_device_get_sysattr_value(pdev, "capabilities/ev", &s) >= 0)
+ break;
+
+ if (sd_device_get_parent_with_subsystem_devtype(pdev, "input", NULL, &pdev) >= 0)
+ continue;
+
+ pdev = NULL;
+ break;
+ }
+
+ if (pdev) {
+ /* Use this as a flag that input devices were detected, so that this
+ * program doesn't need to be called more than once per device */
+ udev_builtin_add_property(dev, test, "ID_INPUT", "1");
+ get_cap_mask(pdev, "capabilities/ev", bitmask_ev, sizeof(bitmask_ev), test);
+ get_cap_mask(pdev, "capabilities/abs", bitmask_abs, sizeof(bitmask_abs), test);
+ get_cap_mask(pdev, "capabilities/rel", bitmask_rel, sizeof(bitmask_rel), test);
+ get_cap_mask(pdev, "capabilities/key", bitmask_key, sizeof(bitmask_key), test);
+ get_cap_mask(pdev, "properties", bitmask_props, sizeof(bitmask_props), test);
+ is_pointer = test_pointers(dev, bitmask_ev, bitmask_abs,
+ bitmask_key, bitmask_rel,
+ bitmask_props, test);
+ is_key = test_key(dev, bitmask_ev, bitmask_key, test);
+ /* Some evdev nodes have only a scrollwheel */
+ if (!is_pointer && !is_key && test_bit(EV_REL, bitmask_ev) &&
+ (test_bit(REL_WHEEL, bitmask_rel) || test_bit(REL_HWHEEL, bitmask_rel)))
+ udev_builtin_add_property(dev, test, "ID_INPUT_KEY", "1");
+ if (test_bit(EV_SW, bitmask_ev))
+ udev_builtin_add_property(dev, test, "ID_INPUT_SWITCH", "1");
+
+ }
+
+ if (sd_device_get_devname(dev, &devnode) >= 0 &&
+ sd_device_get_sysname(dev, &sysname) >= 0 &&
+ startswith(sysname, "event"))
+ extract_info(dev, devnode, test);
+
+ return 0;
+}
+
+const struct udev_builtin udev_builtin_input_id = {
+ .name = "input_id",
+ .cmd = builtin_input_id,
+ .help = "Input device properties",
+};
diff --git a/src/udev/udev-builtin-keyboard.c b/src/udev/udev-builtin-keyboard.c
new file mode 100644
index 0000000..d80cdd2
--- /dev/null
+++ b/src/udev/udev-builtin-keyboard.c
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/input.h>
+
+#include "device-util.h"
+#include "fd-util.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+
+static const struct key_name *keyboard_lookup_key(const char *str, GPERF_LEN_TYPE len);
+#include "keyboard-keys-from-name.h"
+
+static int install_force_release(sd_device *dev, const unsigned *release, unsigned release_count) {
+ sd_device *atkbd;
+ const char *cur;
+ char codes[4096];
+ char *s;
+ size_t l;
+ unsigned i;
+ int r;
+
+ assert(dev);
+ assert(release);
+
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "serio", NULL, &atkbd);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get serio parent: %m");
+
+ r = sd_device_get_sysattr_value(atkbd, "force_release", &cur);
+ if (r < 0)
+ return log_device_error_errno(atkbd, r, "Failed to get force-release attribute: %m");
+
+ s = codes;
+ l = sizeof(codes);
+
+ /* copy current content */
+ l = strpcpy(&s, l, cur);
+
+ /* append new codes */
+ for (i = 0; i < release_count; i++)
+ l = strpcpyf(&s, l, ",%u", release[i]);
+
+ log_device_debug(atkbd, "keyboard: updating force-release list with '%s'", codes);
+ r = sd_device_set_sysattr_value(atkbd, "force_release", codes);
+ if (r < 0)
+ return log_device_error_errno(atkbd, r, "Failed to set force-release attribute: %m");
+
+ return 0;
+}
+
+static int map_keycode(sd_device *dev, int fd, int scancode, const char *keycode) {
+ struct {
+ unsigned scan;
+ unsigned key;
+ } map;
+ char *endptr;
+ const struct key_name *k;
+ unsigned keycode_num;
+
+ /* translate identifier to key code */
+ k = keyboard_lookup_key(keycode, strlen(keycode));
+ if (k) {
+ keycode_num = k->id;
+ } else {
+ /* check if it's a numeric code already */
+ keycode_num = strtoul(keycode, &endptr, 0);
+ if (endptr[0] !='\0')
+ return log_device_error_errno(dev, SYNTHETIC_ERRNO(EINVAL), "Failed to parse key identifier '%s'", keycode);
+ }
+
+ map.scan = scancode;
+ map.key = keycode_num;
+
+ log_device_debug(dev, "keyboard: mapping scan code %d (0x%x) to key code %d (0x%x)",
+ map.scan, map.scan, map.key, map.key);
+
+ if (ioctl(fd, EVIOCSKEYCODE, &map) < 0)
+ return log_device_error_errno(dev, errno, "Failed to call EVIOCSKEYCODE with scan code 0x%x, and key code %d: %m", map.scan, map.key);
+
+ return 0;
+}
+
+static char* parse_token(const char *current, int32_t *val_out) {
+ char *next;
+ int32_t val;
+
+ if (!current)
+ return NULL;
+
+ val = strtol(current, &next, 0);
+ if (*next && *next != ':')
+ return NULL;
+
+ if (next != current)
+ *val_out = val;
+
+ if (*next)
+ next++;
+
+ return next;
+}
+
+static int override_abs(sd_device *dev, int fd, unsigned evcode, const char *value) {
+ struct input_absinfo absinfo;
+ char *next;
+ int r;
+
+ r = ioctl(fd, EVIOCGABS(evcode), &absinfo);
+ if (r < 0)
+ return log_device_error_errno(dev, errno, "Failed to call EVIOCGABS");
+
+ next = parse_token(value, &absinfo.minimum);
+ next = parse_token(next, &absinfo.maximum);
+ next = parse_token(next, &absinfo.resolution);
+ next = parse_token(next, &absinfo.fuzz);
+ next = parse_token(next, &absinfo.flat);
+ if (!next)
+ return log_device_error(dev, "Failed to parse EV_ABS override '%s'", value);
+
+ log_device_debug(dev, "keyboard: %x overridden with %"PRIi32"/%"PRIi32"/%"PRIi32"/%"PRIi32"/%"PRIi32,
+ evcode, absinfo.minimum, absinfo.maximum, absinfo.resolution, absinfo.fuzz, absinfo.flat);
+ r = ioctl(fd, EVIOCSABS(evcode), &absinfo);
+ if (r < 0)
+ return log_device_error_errno(dev, errno, "Failed to call EVIOCSABS");
+
+ return 0;
+}
+
+static int set_trackpoint_sensitivity(sd_device *dev, const char *value) {
+ sd_device *pdev;
+ char val_s[DECIMAL_STR_MAX(int)];
+ int r, val_i;
+
+ assert(dev);
+ assert(value);
+
+ /* The sensitivity sysfs attr belongs to the serio parent device */
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "serio", NULL, &pdev);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get serio parent: %m");
+
+ r = safe_atoi(value, &val_i);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to parse POINTINGSTICK_SENSITIVITY '%s': %m", value);
+ else if (val_i < 0 || val_i > 255)
+ return log_device_error_errno(dev, SYNTHETIC_ERRNO(ERANGE), "POINTINGSTICK_SENSITIVITY %d outside range [0..255]", val_i);
+
+ xsprintf(val_s, "%d", val_i);
+
+ r = sd_device_set_sysattr_value(pdev, "sensitivity", val_s);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to write 'sensitivity' attribute: %m");
+
+ return 0;
+}
+
+static int builtin_keyboard(sd_device *dev, int argc, char *argv[], bool test) {
+ unsigned release[1024];
+ unsigned release_count = 0;
+ _cleanup_close_ int fd = -1;
+ const char *node, *key, *value;
+ int has_abs = -1, r;
+
+ r = sd_device_get_devname(dev, &node);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get device name: %m");
+
+ FOREACH_DEVICE_PROPERTY(dev, key, value) {
+ char *endptr;
+
+ if (startswith(key, "KEYBOARD_KEY_")) {
+ const char *keycode = value;
+ unsigned scancode;
+
+ /* KEYBOARD_KEY_<hex scan code>=<key identifier string> */
+ scancode = strtoul(key + 13, &endptr, 16);
+ if (endptr[0] != '\0') {
+ log_device_warning(dev, "Failed to parse scan code from \"%s\", ignoring", key);
+ continue;
+ }
+
+ /* a leading '!' needs a force-release entry */
+ if (keycode[0] == '!') {
+ keycode++;
+
+ release[release_count] = scancode;
+ if (release_count < ELEMENTSOF(release)-1)
+ release_count++;
+
+ if (keycode[0] == '\0')
+ continue;
+ }
+
+ if (fd < 0) {
+ fd = open(node, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0)
+ return log_device_error_errno(dev, errno, "Failed to open device '%s': %m", node);
+ }
+
+ (void) map_keycode(dev, fd, scancode, keycode);
+ } else if (startswith(key, "EVDEV_ABS_")) {
+ unsigned evcode;
+
+ /* EVDEV_ABS_<EV_ABS code>=<min>:<max>:<res>:<fuzz>:<flat> */
+ evcode = strtoul(key + 10, &endptr, 16);
+ if (endptr[0] != '\0') {
+ log_device_warning(dev, "Failed to parse EV_ABS code from \"%s\", ignoring", key);
+ continue;
+ }
+
+ if (fd < 0) {
+ fd = open(node, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0)
+ return log_device_error_errno(dev, errno, "Failed to open device '%s': %m", node);
+ }
+
+ if (has_abs == -1) {
+ unsigned long bits;
+ int rc;
+
+ rc = ioctl(fd, EVIOCGBIT(0, sizeof(bits)), &bits);
+ if (rc < 0)
+ return log_device_error_errno(dev, errno, "Failed to set EVIOCGBIT");
+
+ has_abs = !!(bits & (1 << EV_ABS));
+ if (!has_abs)
+ log_device_warning(dev, "EVDEV_ABS override set but no EV_ABS present on device");
+ }
+
+ if (!has_abs)
+ continue;
+
+ (void) override_abs(dev, fd, evcode, value);
+ } else if (streq(key, "POINTINGSTICK_SENSITIVITY"))
+ (void) set_trackpoint_sensitivity(dev, value);
+ }
+
+ /* install list of force-release codes */
+ if (release_count > 0)
+ (void) install_force_release(dev, release, release_count);
+
+ return 0;
+}
+
+const struct udev_builtin udev_builtin_keyboard = {
+ .name = "keyboard",
+ .cmd = builtin_keyboard,
+ .help = "Keyboard scan code to key mapping",
+};
diff --git a/src/udev/udev-builtin-kmod.c b/src/udev/udev-builtin-kmod.c
new file mode 100644
index 0000000..ce52149
--- /dev/null
+++ b/src/udev/udev-builtin-kmod.c
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * load kernel modules
+ *
+ * Copyright © 2011 ProFUSION embedded systems
+ */
+
+#include <errno.h>
+#include <libkmod.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "module-util.h"
+#include "string-util.h"
+#include "udev-builtin.h"
+
+static struct kmod_ctx *ctx = NULL;
+
+_printf_(6,0) static void udev_kmod_log(void *data, int priority, const char *file, int line, const char *fn, const char *format, va_list args) {
+ log_internalv(priority, 0, file, line, fn, format, args);
+}
+
+static int builtin_kmod(sd_device *dev, int argc, char *argv[], bool test) {
+ int i;
+
+ if (!ctx)
+ return 0;
+
+ if (argc < 3 || !streq(argv[1], "load"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: expected: load <module>", argv[0]);
+
+ for (i = 2; argv[i]; i++)
+ (void) module_load_and_warn(ctx, argv[i], false);
+
+ return 0;
+}
+
+/* called at udev startup and reload */
+static int builtin_kmod_init(void) {
+ if (ctx)
+ return 0;
+
+ ctx = kmod_new(NULL, NULL);
+ if (!ctx)
+ return -ENOMEM;
+
+ log_debug("Load module index");
+ kmod_set_log_fn(ctx, udev_kmod_log, NULL);
+ kmod_load_resources(ctx);
+ return 0;
+}
+
+/* called on udev shutdown and reload request */
+static void builtin_kmod_exit(void) {
+ log_debug("Unload module index");
+ ctx = kmod_unref(ctx);
+}
+
+/* called every couple of seconds during event activity; 'true' if config has changed */
+static bool builtin_kmod_validate(void) {
+ log_debug("Validate module index");
+ if (!ctx)
+ return false;
+ return (kmod_validate_resources(ctx) != KMOD_RESOURCES_OK);
+}
+
+const struct udev_builtin udev_builtin_kmod = {
+ .name = "kmod",
+ .cmd = builtin_kmod,
+ .init = builtin_kmod_init,
+ .exit = builtin_kmod_exit,
+ .validate = builtin_kmod_validate,
+ .help = "Kernel module loader",
+ .run_once = false,
+};
diff --git a/src/udev/udev-builtin-net_id.c b/src/udev/udev-builtin-net_id.c
new file mode 100644
index 0000000..03b281a
--- /dev/null
+++ b/src/udev/udev-builtin-net_id.c
@@ -0,0 +1,961 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+/*
+ * Predictable network interface device names based on:
+ * - firmware/bios-provided index numbers for on-board devices
+ * - firmware-provided pci-express hotplug slot index number
+ * - physical/geographical location of the hardware
+ * - the interface's MAC address
+ *
+ * http://www.freedesktop.org/wiki/Software/systemd/PredictableNetworkInterfaceNames
+ *
+ * Two character prefixes based on the type of interface:
+ * en — Ethernet
+ * ib — InfiniBand
+ * sl — serial line IP (slip)
+ * wl — wlan
+ * ww — wwan
+ *
+ * Type of names:
+ * b<number> — BCMA bus core number
+ * c<bus_id> — bus id of a grouped CCW or CCW device,
+ * with all leading zeros stripped [s390]
+ * o<index>[n<phys_port_name>|d<dev_port>]
+ * — on-board device index number
+ * s<slot>[f<function>][n<phys_port_name>|d<dev_port>]
+ * — hotplug slot index number
+ * x<MAC> — MAC address
+ * [P<domain>]p<bus>s<slot>[f<function>][n<phys_port_name>|d<dev_port>]
+ * — PCI geographical location
+ * [P<domain>]p<bus>s<slot>[f<function>][u<port>][..][c<config>][i<interface>]
+ * — USB port number chain
+ * v<slot> - VIO slot number (IBM PowerVM)
+ * a<vendor><model>i<instance> — Platform bus ACPI instance id
+ *
+ * All multi-function PCI devices will carry the [f<function>] number in the
+ * device name, including the function 0 device.
+ *
+ * SR-IOV virtual devices are named based on the name of the parent interface,
+ * with a suffix of "v<N>", where <N> is the virtual device number.
+ *
+ * When using PCI geography, The PCI domain is only prepended when it is not 0.
+ *
+ * For USB devices the full chain of port numbers of hubs is composed. If the
+ * name gets longer than the maximum number of 15 characters, the name is not
+ * exported.
+ * The usual USB configuration == 1 and interface == 0 values are suppressed.
+ *
+ * PCI Ethernet card with firmware index "1":
+ * ID_NET_NAME_ONBOARD=eno1
+ * ID_NET_NAME_ONBOARD_LABEL=Ethernet Port 1
+ *
+ * PCI Ethernet card in hotplug slot with firmware index number:
+ * /sys/devices/pci0000:00/0000:00:1c.3/0000:05:00.0/net/ens1
+ * ID_NET_NAME_MAC=enx000000000466
+ * ID_NET_NAME_PATH=enp5s0
+ * ID_NET_NAME_SLOT=ens1
+ *
+ * PCI Ethernet multi-function card with 2 ports:
+ * /sys/devices/pci0000:00/0000:00:1c.0/0000:02:00.0/net/enp2s0f0
+ * ID_NET_NAME_MAC=enx78e7d1ea46da
+ * ID_NET_NAME_PATH=enp2s0f0
+ * /sys/devices/pci0000:00/0000:00:1c.0/0000:02:00.1/net/enp2s0f1
+ * ID_NET_NAME_MAC=enx78e7d1ea46dc
+ * ID_NET_NAME_PATH=enp2s0f1
+ *
+ * PCI wlan card:
+ * /sys/devices/pci0000:00/0000:00:1c.1/0000:03:00.0/net/wlp3s0
+ * ID_NET_NAME_MAC=wlx0024d7e31130
+ * ID_NET_NAME_PATH=wlp3s0
+ *
+ * PCI IB host adapter with 2 ports:
+ * /sys/devices/pci0000:00/0000:00:03.0/0000:15:00.0/net/ibp21s0f0
+ * ID_NET_NAME_PATH=ibp21s0f0
+ * /sys/devices/pci0000:00/0000:00:03.0/0000:15:00.1/net/ibp21s0f1
+ * ID_NET_NAME_PATH=ibp21s0f1
+ *
+ * USB built-in 3G modem:
+ * /sys/devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.4/2-1.4:1.6/net/wwp0s29u1u4i6
+ * ID_NET_NAME_MAC=wwx028037ec0200
+ * ID_NET_NAME_PATH=wwp0s29u1u4i6
+ *
+ * USB Android phone:
+ * /sys/devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/net/enp0s29u1u2
+ * ID_NET_NAME_MAC=enxd626b3450fb5
+ * ID_NET_NAME_PATH=enp0s29u1u2
+ *
+ * s390 grouped CCW interface:
+ * /sys/devices/css0/0.0.0007/0.0.f5f0/group_device/net/encf5f0
+ * ID_NET_NAME_MAC=enx026d3c00000a
+ * ID_NET_NAME_PATH=encf5f0
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/pci_regs.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "naming-scheme.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+
+#define ONBOARD_INDEX_MAX (16*1024-1)
+
+enum netname_type{
+ NET_UNDEF,
+ NET_PCI,
+ NET_USB,
+ NET_BCMA,
+ NET_VIRTIO,
+ NET_CCW,
+ NET_VIO,
+ NET_PLATFORM,
+};
+
+struct netnames {
+ enum netname_type type;
+
+ uint8_t mac[6];
+ bool mac_valid;
+
+ sd_device *pcidev;
+ char pci_slot[IFNAMSIZ];
+ char pci_path[IFNAMSIZ];
+ char pci_onboard[IFNAMSIZ];
+ const char *pci_onboard_label;
+
+ char usb_ports[IFNAMSIZ];
+ char bcma_core[IFNAMSIZ];
+ char ccw_busid[IFNAMSIZ];
+ char vio_slot[IFNAMSIZ];
+ char platform_path[IFNAMSIZ];
+};
+
+struct virtfn_info {
+ sd_device *physfn_pcidev;
+ char suffix[IFNAMSIZ];
+};
+
+/* skip intermediate virtio devices */
+static sd_device *skip_virtio(sd_device *dev) {
+ sd_device *parent;
+
+ /* there can only ever be one virtio bus per parent device, so we can
+ * safely ignore any virtio buses. see
+ * http://lists.linuxfoundation.org/pipermail/virtualization/2015-August/030331.html */
+ for (parent = dev; parent; ) {
+ const char *subsystem;
+
+ if (sd_device_get_subsystem(parent, &subsystem) < 0)
+ break;
+
+ if (!streq(subsystem, "virtio"))
+ break;
+
+ if (sd_device_get_parent(parent, &parent) < 0)
+ return NULL;
+ }
+
+ return parent;
+}
+
+static int get_virtfn_info(sd_device *dev, struct netnames *names, struct virtfn_info *ret) {
+ _cleanup_(sd_device_unrefp) sd_device *physfn_pcidev = NULL;
+ const char *physfn_link_file, *syspath;
+ _cleanup_free_ char *physfn_pci_syspath = NULL;
+ _cleanup_free_ char *virtfn_pci_syspath = NULL;
+ struct dirent *dent;
+ _cleanup_closedir_ DIR *dir = NULL;
+ char suffix[IFNAMSIZ];
+ int r;
+
+ assert(dev);
+ assert(names);
+ assert(ret);
+
+ r = sd_device_get_syspath(names->pcidev, &syspath);
+ if (r < 0)
+ return r;
+
+ /* Check if this is a virtual function. */
+ physfn_link_file = strjoina(syspath, "/physfn");
+ r = chase_symlinks(physfn_link_file, NULL, 0, &physfn_pci_syspath);
+ if (r < 0)
+ return r;
+
+ /* Get physical function's pci device. */
+ r = sd_device_new_from_syspath(&physfn_pcidev, physfn_pci_syspath);
+ if (r < 0)
+ return r;
+
+ /* Find the virtual function number by finding the right virtfn link. */
+ dir = opendir(physfn_pci_syspath);
+ if (!dir)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(dent, dir, break) {
+ _cleanup_free_ char *virtfn_link_file = NULL;
+
+ if (!startswith(dent->d_name, "virtfn"))
+ continue;
+
+ virtfn_link_file = strjoin(physfn_pci_syspath, "/", dent->d_name);
+ if (!virtfn_link_file)
+ return -ENOMEM;
+
+ if (chase_symlinks(virtfn_link_file, NULL, 0, &virtfn_pci_syspath) < 0)
+ continue;
+
+ if (streq(syspath, virtfn_pci_syspath)) {
+ if (!snprintf_ok(suffix, sizeof(suffix), "v%s", &dent->d_name[6]))
+ return -ENOENT;
+
+ break;
+ }
+ }
+ if (isempty(suffix))
+ return -ENOENT;
+
+ ret->physfn_pcidev = TAKE_PTR(physfn_pcidev);
+ strncpy(ret->suffix, suffix, sizeof(ret->suffix));
+
+ return 0;
+}
+
+/* retrieve on-board index number and label from firmware */
+static int dev_pci_onboard(sd_device *dev, struct netnames *names) {
+ unsigned long idx, dev_port = 0;
+ const char *attr, *port_name = NULL;
+ size_t l;
+ char *s;
+ int r;
+
+ /* ACPI _DSM — device specific method for naming a PCI or PCI Express device */
+ if (sd_device_get_sysattr_value(names->pcidev, "acpi_index", &attr) < 0) {
+ /* SMBIOS type 41 — Onboard Devices Extended Information */
+ r = sd_device_get_sysattr_value(names->pcidev, "index", &attr);
+ if (r < 0)
+ return r;
+ }
+
+ r = safe_atolu(attr, &idx);
+ if (r < 0)
+ return r;
+ if (idx == 0 && !naming_scheme_has(NAMING_ZERO_ACPI_INDEX))
+ return -EINVAL;
+
+ /* Some BIOSes report rubbish indexes that are excessively high (2^24-1 is an index VMware likes to report for
+ * example). Let's define a cut-off where we don't consider the index reliable anymore. We pick some arbitrary
+ * cut-off, which is somewhere beyond the realistic number of physical network interface a system might
+ * have. Ideally the kernel would already filter his crap for us, but it doesn't currently. */
+ if (idx > ONBOARD_INDEX_MAX)
+ return -ENOENT;
+
+ /* kernel provided port index for multiple ports on a single PCI function */
+ if (sd_device_get_sysattr_value(dev, "dev_port", &attr) >= 0)
+ dev_port = strtoul(attr, NULL, 10);
+
+ /* kernel provided front panel port name for multiple port PCI device */
+ (void) sd_device_get_sysattr_value(dev, "phys_port_name", &port_name);
+
+ s = names->pci_onboard;
+ l = sizeof(names->pci_onboard);
+ l = strpcpyf(&s, l, "o%lu", idx);
+ if (port_name)
+ l = strpcpyf(&s, l, "n%s", port_name);
+ else if (dev_port > 0)
+ l = strpcpyf(&s, l, "d%lu", dev_port);
+ if (l == 0)
+ names->pci_onboard[0] = '\0';
+
+ if (sd_device_get_sysattr_value(names->pcidev, "label", &names->pci_onboard_label) < 0)
+ names->pci_onboard_label = NULL;
+
+ return 0;
+}
+
+/* read the 256 bytes PCI configuration space to check the multi-function bit */
+static bool is_pci_multifunction(sd_device *dev) {
+ _cleanup_close_ int fd = -1;
+ const char *filename, *syspath;
+ uint8_t config[64];
+
+ if (sd_device_get_syspath(dev, &syspath) < 0)
+ return false;
+
+ filename = strjoina(syspath, "/config");
+ fd = open(filename, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return false;
+ if (read(fd, &config, sizeof(config)) != sizeof(config))
+ return false;
+
+ /* bit 0-6 header type, bit 7 multi/single function device */
+ return config[PCI_HEADER_TYPE] & 0x80;
+}
+
+static bool is_pci_ari_enabled(sd_device *dev) {
+ const char *a;
+
+ if (sd_device_get_sysattr_value(dev, "ari_enabled", &a) < 0)
+ return false;
+
+ return streq(a, "1");
+}
+
+static int dev_pci_slot(sd_device *dev, struct netnames *names) {
+ unsigned long dev_port = 0;
+ unsigned domain, bus, slot, func, hotplug_slot = 0;
+ size_t l;
+ char *s;
+ const char *sysname, *attr, *port_name = NULL, *syspath;
+ _cleanup_(sd_device_unrefp) sd_device *pci = NULL;
+ sd_device *hotplug_slot_dev;
+ char slots[PATH_MAX];
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+ int r;
+
+ r = sd_device_get_sysname(names->pcidev, &sysname);
+ if (r < 0)
+ return r;
+
+ if (sscanf(sysname, "%x:%x:%x.%u", &domain, &bus, &slot, &func) != 4)
+ return -ENOENT;
+
+ if (naming_scheme_has(NAMING_NPAR_ARI) &&
+ is_pci_ari_enabled(names->pcidev))
+ /* ARI devices support up to 256 functions on a single device ("slot"), and interpret the
+ * traditional 5-bit slot and 3-bit function number as a single 8-bit function number,
+ * where the slot makes up the upper 5 bits. */
+ func += slot * 8;
+
+ /* kernel provided port index for multiple ports on a single PCI function */
+ if (sd_device_get_sysattr_value(dev, "dev_port", &attr) >= 0) {
+ dev_port = strtoul(attr, NULL, 10);
+ /* With older kernels IP-over-InfiniBand network interfaces sometimes erroneously
+ * provide the port number in the 'dev_id' sysfs attribute instead of 'dev_port',
+ * which thus stays initialized as 0. */
+ if (dev_port == 0 &&
+ sd_device_get_sysattr_value(dev, "type", &attr) >= 0) {
+ unsigned long type;
+
+ type = strtoul(attr, NULL, 10);
+ if (type == ARPHRD_INFINIBAND &&
+ sd_device_get_sysattr_value(dev, "dev_id", &attr) >= 0)
+ dev_port = strtoul(attr, NULL, 16);
+ }
+ }
+
+ /* kernel provided front panel port name for multi-port PCI device */
+ (void) sd_device_get_sysattr_value(dev, "phys_port_name", &port_name);
+
+ /* compose a name based on the raw kernel's PCI bus, slot numbers */
+ s = names->pci_path;
+ l = sizeof(names->pci_path);
+ if (domain > 0)
+ l = strpcpyf(&s, l, "P%u", domain);
+ l = strpcpyf(&s, l, "p%us%u", bus, slot);
+ if (func > 0 || is_pci_multifunction(names->pcidev))
+ l = strpcpyf(&s, l, "f%u", func);
+ if (port_name)
+ l = strpcpyf(&s, l, "n%s", port_name);
+ else if (dev_port > 0)
+ l = strpcpyf(&s, l, "d%lu", dev_port);
+ if (l == 0)
+ names->pci_path[0] = '\0';
+
+ /* ACPI _SUN — slot user number */
+ r = sd_device_new_from_subsystem_sysname(&pci, "subsystem", "pci");
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_syspath(pci, &syspath);
+ if (r < 0)
+ return r;
+ if (!snprintf_ok(slots, sizeof slots, "%s/slots", syspath))
+ return -ENAMETOOLONG;
+
+ dir = opendir(slots);
+ if (!dir)
+ return -errno;
+
+ hotplug_slot_dev = names->pcidev;
+ while (hotplug_slot_dev) {
+ if (sd_device_get_sysname(hotplug_slot_dev, &sysname) < 0)
+ continue;
+
+ FOREACH_DIRENT_ALL(dent, dir, break) {
+ unsigned i;
+ char str[PATH_MAX];
+ _cleanup_free_ char *address = NULL;
+
+ if (dent->d_name[0] == '.')
+ continue;
+ r = safe_atou_full(dent->d_name, 10, &i);
+ if (r < 0 || i <= 0)
+ continue;
+
+ /* match slot address with device by stripping the function */
+ if (snprintf_ok(str, sizeof str, "%s/%s/address", slots, dent->d_name) &&
+ read_one_line_file(str, &address) >= 0 &&
+ startswith(sysname, address)) {
+ hotplug_slot = i;
+ break;
+ }
+ }
+ if (hotplug_slot > 0)
+ break;
+ if (sd_device_get_parent_with_subsystem_devtype(hotplug_slot_dev, "pci", NULL, &hotplug_slot_dev) < 0)
+ break;
+ rewinddir(dir);
+ }
+
+ if (hotplug_slot > 0) {
+ s = names->pci_slot;
+ l = sizeof(names->pci_slot);
+ if (domain > 0)
+ l = strpcpyf(&s, l, "P%d", domain);
+ l = strpcpyf(&s, l, "s%d", hotplug_slot);
+ if (func > 0 || is_pci_multifunction(names->pcidev))
+ l = strpcpyf(&s, l, "f%d", func);
+ if (port_name)
+ l = strpcpyf(&s, l, "n%s", port_name);
+ else if (dev_port > 0)
+ l = strpcpyf(&s, l, "d%lu", dev_port);
+ if (l == 0)
+ names->pci_slot[0] = '\0';
+ }
+
+ return 0;
+}
+
+static int names_vio(sd_device *dev, struct netnames *names) {
+ sd_device *parent;
+ unsigned busid, slotid, ethid;
+ const char *syspath, *subsystem;
+ int r;
+
+ /* check if our direct parent is a VIO device with no other bus in-between */
+ r = sd_device_get_parent(dev, &parent);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(parent, &subsystem);
+ if (r < 0)
+ return r;
+ if (!streq("vio", subsystem))
+ return -ENOENT;
+
+ /* The devices' $DEVPATH number is tied to (virtual) hardware (slot id
+ * selected in the HMC), thus this provides a reliable naming (e.g.
+ * "/devices/vio/30000002/net/eth1"); we ignore the bus number, as
+ * there should only ever be one bus, and then remove leading zeros. */
+ r = sd_device_get_syspath(dev, &syspath);
+ if (r < 0)
+ return r;
+
+ if (sscanf(syspath, "/sys/devices/vio/%4x%4x/net/eth%u", &busid, &slotid, &ethid) != 3)
+ return -EINVAL;
+
+ xsprintf(names->vio_slot, "v%u", slotid);
+ names->type = NET_VIO;
+ return 0;
+}
+
+#define _PLATFORM_TEST "/sys/devices/platform/vvvvPPPP"
+#define _PLATFORM_PATTERN4 "/sys/devices/platform/%4s%4x:%2x/net/eth%u"
+#define _PLATFORM_PATTERN3 "/sys/devices/platform/%3s%4x:%2x/net/eth%u"
+
+static int names_platform(sd_device *dev, struct netnames *names, bool test) {
+ sd_device *parent;
+ char vendor[5];
+ unsigned model, instance, ethid;
+ const char *syspath, *pattern, *validchars, *subsystem;
+ int r;
+
+ /* check if our direct parent is a platform device with no other bus in-between */
+ r = sd_device_get_parent(dev, &parent);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(parent, &subsystem);
+ if (r < 0)
+ return r;
+
+ if (!streq("platform", subsystem))
+ return -ENOENT;
+
+ r = sd_device_get_syspath(dev, &syspath);
+ if (r < 0)
+ return r;
+
+ /* syspath is too short, to have a valid ACPI instance */
+ if (strlen(syspath) < sizeof _PLATFORM_TEST)
+ return -EINVAL;
+
+ /* Vendor ID can be either PNP ID (3 chars A-Z) or ACPI ID (4 chars A-Z and numerals) */
+ if (syspath[sizeof _PLATFORM_TEST - 1] == ':') {
+ pattern = _PLATFORM_PATTERN4;
+ validchars = UPPERCASE_LETTERS DIGITS;
+ } else {
+ pattern = _PLATFORM_PATTERN3;
+ validchars = UPPERCASE_LETTERS;
+ }
+
+ /* Platform devices are named after ACPI table match, and instance id
+ * eg. "/sys/devices/platform/HISI00C2:00");
+ * The Vendor (3 or 4 char), followed by hexdecimal model number : instance id.
+ */
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+ if (sscanf(syspath, pattern, vendor, &model, &instance, &ethid) != 4)
+ return -EINVAL;
+#pragma GCC diagnostic pop
+
+ if (!in_charset(vendor, validchars))
+ return -ENOENT;
+
+ ascii_strlower(vendor);
+
+ xsprintf(names->platform_path, "a%s%xi%u", vendor, model, instance);
+ names->type = NET_PLATFORM;
+ return 0;
+}
+
+static int names_pci(sd_device *dev, struct netnames *names) {
+ sd_device *parent;
+ struct netnames vf_names = {};
+ struct virtfn_info vf_info = {};
+ const char *subsystem;
+ int r;
+
+ assert(dev);
+ assert(names);
+
+ r = sd_device_get_parent(dev, &parent);
+ if (r < 0)
+ return r;
+ /* skip virtio subsystem if present */
+ parent = skip_virtio(parent);
+
+ if (!parent)
+ return -ENOENT;
+
+ /* check if our direct parent is a PCI device with no other bus in-between */
+ if (sd_device_get_subsystem(parent, &subsystem) >= 0 &&
+ streq("pci", subsystem)) {
+ names->type = NET_PCI;
+ names->pcidev = parent;
+ } else {
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "pci", NULL, &names->pcidev);
+ if (r < 0)
+ return r;
+ }
+
+ if (naming_scheme_has(NAMING_SR_IOV_V) &&
+ get_virtfn_info(dev, names, &vf_info) >= 0) {
+ /* If this is an SR-IOV virtual device, get base name using physical device and add virtfn suffix. */
+ vf_names.pcidev = vf_info.physfn_pcidev;
+ dev_pci_onboard(dev, &vf_names);
+ dev_pci_slot(dev, &vf_names);
+ if (vf_names.pci_onboard[0])
+ if (strlen(vf_names.pci_onboard) + strlen(vf_info.suffix) < sizeof(names->pci_onboard))
+ strscpyl(names->pci_onboard, sizeof(names->pci_onboard),
+ vf_names.pci_onboard, vf_info.suffix, NULL);
+ if (vf_names.pci_slot[0])
+ if (strlen(vf_names.pci_slot) + strlen(vf_info.suffix) < sizeof(names->pci_slot))
+ strscpyl(names->pci_slot, sizeof(names->pci_slot),
+ vf_names.pci_slot, vf_info.suffix, NULL);
+ if (vf_names.pci_path[0])
+ if (strlen(vf_names.pci_path) + strlen(vf_info.suffix) < sizeof(names->pci_path))
+ strscpyl(names->pci_path, sizeof(names->pci_path),
+ vf_names.pci_path, vf_info.suffix, NULL);
+ sd_device_unref(vf_info.physfn_pcidev);
+ } else {
+ dev_pci_onboard(dev, names);
+ dev_pci_slot(dev, names);
+ }
+
+ return 0;
+}
+
+static int names_usb(sd_device *dev, struct netnames *names) {
+ sd_device *usbdev;
+ char name[256], *ports, *config, *interf, *s;
+ const char *sysname;
+ size_t l;
+ int r;
+
+ assert(dev);
+ assert(names);
+
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "usb", "usb_interface", &usbdev);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_sysname(usbdev, &sysname);
+ if (r < 0)
+ return r;
+
+ /* get USB port number chain, configuration, interface */
+ strscpy(name, sizeof(name), sysname);
+ s = strchr(name, '-');
+ if (!s)
+ return -EINVAL;
+ ports = s+1;
+
+ s = strchr(ports, ':');
+ if (!s)
+ return -EINVAL;
+ s[0] = '\0';
+ config = s+1;
+
+ s = strchr(config, '.');
+ if (!s)
+ return -EINVAL;
+ s[0] = '\0';
+ interf = s+1;
+
+ /* prefix every port number in the chain with "u" */
+ s = ports;
+ while ((s = strchr(s, '.')))
+ s[0] = 'u';
+ s = names->usb_ports;
+ l = strpcpyl(&s, sizeof(names->usb_ports), "u", ports, NULL);
+
+ /* append USB config number, suppress the common config == 1 */
+ if (!streq(config, "1"))
+ l = strpcpyl(&s, sizeof(names->usb_ports), "c", config, NULL);
+
+ /* append USB interface number, suppress the interface == 0 */
+ if (!streq(interf, "0"))
+ l = strpcpyl(&s, sizeof(names->usb_ports), "i", interf, NULL);
+ if (l == 0)
+ return -ENAMETOOLONG;
+
+ names->type = NET_USB;
+ return 0;
+}
+
+static int names_bcma(sd_device *dev, struct netnames *names) {
+ sd_device *bcmadev;
+ unsigned core;
+ const char *sysname;
+ int r;
+
+ assert(dev);
+ assert(names);
+
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "bcma", NULL, &bcmadev);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_sysname(bcmadev, &sysname);
+ if (r < 0)
+ return r;
+
+ /* bus num:core num */
+ if (sscanf(sysname, "bcma%*u:%u", &core) != 1)
+ return -EINVAL;
+ /* suppress the common core == 0 */
+ if (core > 0)
+ xsprintf(names->bcma_core, "b%u", core);
+
+ names->type = NET_BCMA;
+ return 0;
+}
+
+static int names_ccw(sd_device *dev, struct netnames *names) {
+ sd_device *cdev;
+ const char *bus_id, *subsys;
+ size_t bus_id_len;
+ size_t bus_id_start;
+ int r;
+
+ assert(dev);
+ assert(names);
+
+ /* Retrieve the associated CCW device */
+ r = sd_device_get_parent(dev, &cdev);
+ if (r < 0)
+ return r;
+
+ /* skip virtio subsystem if present */
+ cdev = skip_virtio(cdev);
+ if (!cdev)
+ return -ENOENT;
+
+ r = sd_device_get_subsystem(cdev, &subsys);
+ if (r < 0)
+ return r;
+
+ /* Network devices are either single or grouped CCW devices */
+ if (!STR_IN_SET(subsys, "ccwgroup", "ccw"))
+ return -ENOENT;
+
+ /* Retrieve bus-ID of the CCW device. The bus-ID uniquely
+ * identifies the network device on the Linux on System z channel
+ * subsystem. Note that the bus-ID contains lowercase characters.
+ */
+ r = sd_device_get_sysname(cdev, &bus_id);
+ if (r < 0)
+ return r;
+
+ /* Check the length of the bus-ID. Rely on that the kernel provides
+ * a correct bus-ID; alternatively, improve this check and parse and
+ * verify each bus-ID part...
+ */
+ bus_id_len = strlen(bus_id);
+ if (!IN_SET(bus_id_len, 8, 9))
+ return -EINVAL;
+
+ /* Strip leading zeros from the bus id for aesthetic purposes. This
+ * keeps the ccw names stable, yet much shorter in general case of
+ * bus_id 0.0.0600 -> 600. This is similar to e.g. how PCI domain is
+ * not prepended when it is zero. Preserve the last 0 for 0.0.0000.
+ */
+ bus_id_start = strspn(bus_id, ".0");
+ bus_id += bus_id_start < bus_id_len ? bus_id_start : bus_id_len - 1;
+
+ /* Store the CCW bus-ID for use as network device name */
+ if (snprintf_ok(names->ccw_busid, sizeof(names->ccw_busid), "c%s", bus_id))
+ names->type = NET_CCW;
+
+ return 0;
+}
+
+static int names_mac(sd_device *dev, struct netnames *names) {
+ const char *s;
+ unsigned long i;
+ unsigned a1, a2, a3, a4, a5, a6;
+ int r;
+
+ /* Some kinds of devices tend to have hardware addresses
+ * that are impossible to use in an iface name.
+ */
+ r = sd_device_get_sysattr_value(dev, "type", &s);
+ if (r < 0)
+ return r;
+
+ i = strtoul(s, NULL, 0);
+ switch (i) {
+ /* The persistent part of a hardware address of an InfiniBand NIC
+ * is 8 bytes long. We cannot fit this much in an iface name.
+ */
+ case ARPHRD_INFINIBAND:
+ return -EINVAL;
+ default:
+ break;
+ }
+
+ /* check for NET_ADDR_PERM, skip random MAC addresses */
+ r = sd_device_get_sysattr_value(dev, "addr_assign_type", &s);
+ if (r < 0)
+ return r;
+ i = strtoul(s, NULL, 0);
+ if (i != 0)
+ return 0;
+
+ r = sd_device_get_sysattr_value(dev, "address", &s);
+ if (r < 0)
+ return r;
+ if (sscanf(s, "%x:%x:%x:%x:%x:%x", &a1, &a2, &a3, &a4, &a5, &a6) != 6)
+ return -EINVAL;
+
+ /* skip empty MAC addresses */
+ if (a1 + a2 + a3 + a4 + a5 + a6 == 0)
+ return -EINVAL;
+
+ names->mac[0] = a1;
+ names->mac[1] = a2;
+ names->mac[2] = a3;
+ names->mac[3] = a4;
+ names->mac[4] = a5;
+ names->mac[5] = a6;
+ names->mac_valid = true;
+ return 0;
+}
+
+/* IEEE Organizationally Unique Identifier vendor string */
+static int ieee_oui(sd_device *dev, struct netnames *names, bool test) {
+ char str[32];
+
+ if (!names->mac_valid)
+ return -ENOENT;
+ /* skip commonly misused 00:00:00 (Xerox) prefix */
+ if (memcmp(names->mac, "\0\0\0", 3) == 0)
+ return -EINVAL;
+ xsprintf(str, "OUI:%02X%02X%02X%02X%02X%02X", names->mac[0],
+ names->mac[1], names->mac[2], names->mac[3], names->mac[4],
+ names->mac[5]);
+ udev_builtin_hwdb_lookup(dev, NULL, str, NULL, test);
+ return 0;
+}
+
+static int builtin_net_id(sd_device *dev, int argc, char *argv[], bool test) {
+ const char *s, *p, *devtype, *prefix = "en";
+ struct netnames names = {};
+ unsigned long i;
+ int r;
+
+ /* handle only ARPHRD_ETHER, ARPHRD_SLIP and ARPHRD_INFINIBAND devices */
+ r = sd_device_get_sysattr_value(dev, "type", &s);
+ if (r < 0)
+ return r;
+
+ i = strtoul(s, NULL, 0);
+ switch (i) {
+ case ARPHRD_ETHER:
+ prefix = "en";
+ break;
+ case ARPHRD_INFINIBAND:
+ if (naming_scheme_has(NAMING_INFINIBAND))
+ prefix = "ib";
+ else
+ return 0;
+ break;
+ case ARPHRD_SLIP:
+ prefix = "sl";
+ break;
+ default:
+ return 0;
+ }
+
+ /* skip stacked devices, like VLANs, ... */
+ r = sd_device_get_sysattr_value(dev, "ifindex", &s);
+ if (r < 0)
+ return r;
+ r = sd_device_get_sysattr_value(dev, "iflink", &p);
+ if (r < 0)
+ return r;
+ if (!streq(s, p))
+ return 0;
+
+ if (sd_device_get_devtype(dev, &devtype) >= 0) {
+ if (streq("wlan", devtype))
+ prefix = "wl";
+ else if (streq("wwan", devtype))
+ prefix = "ww";
+ }
+
+ udev_builtin_add_property(dev, test, "ID_NET_NAMING_SCHEME", naming_scheme()->name);
+
+ r = names_mac(dev, &names);
+ if (r >= 0 && names.mac_valid) {
+ char str[IFNAMSIZ];
+
+ xsprintf(str, "%sx%02x%02x%02x%02x%02x%02x", prefix,
+ names.mac[0], names.mac[1], names.mac[2],
+ names.mac[3], names.mac[4], names.mac[5]);
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_MAC", str);
+
+ ieee_oui(dev, &names, test);
+ }
+
+ /* get path names for Linux on System z network devices */
+ if (names_ccw(dev, &names) >= 0 && names.type == NET_CCW) {
+ char str[IFNAMSIZ];
+
+ if (snprintf_ok(str, sizeof str, "%s%s", prefix, names.ccw_busid))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_PATH", str);
+ return 0;
+ }
+
+ /* get ibmveth/ibmvnic slot-based names. */
+ if (names_vio(dev, &names) >= 0 && names.type == NET_VIO) {
+ char str[IFNAMSIZ];
+
+ if (snprintf_ok(str, sizeof str, "%s%s", prefix, names.vio_slot))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_SLOT", str);
+ return 0;
+ }
+
+ /* get ACPI path names for ARM64 platform devices */
+ if (names_platform(dev, &names, test) >= 0 && names.type == NET_PLATFORM) {
+ char str[IFNAMSIZ];
+
+ if (snprintf_ok(str, sizeof str, "%s%s", prefix, names.platform_path))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_PATH", str);
+ return 0;
+ }
+
+ /* get PCI based path names, we compose only PCI based paths */
+ if (names_pci(dev, &names) < 0)
+ return 0;
+
+ /* plain PCI device */
+ if (names.type == NET_PCI) {
+ char str[IFNAMSIZ];
+
+ if (names.pci_onboard[0] &&
+ snprintf_ok(str, sizeof str, "%s%s", prefix, names.pci_onboard))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_ONBOARD", str);
+
+ if (names.pci_onboard_label &&
+ snprintf_ok(str, sizeof str, "%s%s", prefix, names.pci_onboard_label))
+ udev_builtin_add_property(dev, test, "ID_NET_LABEL_ONBOARD", str);
+
+ if (names.pci_path[0] &&
+ snprintf_ok(str, sizeof str, "%s%s", prefix, names.pci_path))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_PATH", str);
+
+ if (names.pci_slot[0] &&
+ snprintf_ok(str, sizeof str, "%s%s", prefix, names.pci_slot))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_SLOT", str);
+ return 0;
+ }
+
+ /* USB device */
+ if (names_usb(dev, &names) >= 0 && names.type == NET_USB) {
+ char str[IFNAMSIZ];
+
+ if (names.pci_path[0] &&
+ snprintf_ok(str, sizeof str, "%s%s%s", prefix, names.pci_path, names.usb_ports))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_PATH", str);
+
+ if (names.pci_slot[0] &&
+ snprintf_ok(str, sizeof str, "%s%s%s", prefix, names.pci_slot, names.usb_ports))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_SLOT", str);
+ return 0;
+ }
+
+ /* Broadcom bus */
+ if (names_bcma(dev, &names) >= 0 && names.type == NET_BCMA) {
+ char str[IFNAMSIZ];
+
+ if (names.pci_path[0] &&
+ snprintf_ok(str, sizeof str, "%s%s%s", prefix, names.pci_path, names.bcma_core))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_PATH", str);
+
+ if (names.pci_slot[0] &&
+ snprintf(str, sizeof str, "%s%s%s", prefix, names.pci_slot, names.bcma_core))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_SLOT", str);
+ return 0;
+ }
+
+ return 0;
+}
+
+const struct udev_builtin udev_builtin_net_id = {
+ .name = "net_id",
+ .cmd = builtin_net_id,
+ .help = "Network device properties",
+};
diff --git a/src/udev/udev-builtin-net_setup_link.c b/src/udev/udev-builtin-net_setup_link.c
new file mode 100644
index 0000000..a845dfa
--- /dev/null
+++ b/src/udev/udev-builtin-net_setup_link.c
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "device-util.h"
+#include "alloc-util.h"
+#include "link-config.h"
+#include "log.h"
+#include "string-util.h"
+#include "udev-builtin.h"
+
+static link_config_ctx *ctx = NULL;
+
+static int builtin_net_setup_link(sd_device *dev, int argc, char **argv, bool test) {
+ _cleanup_free_ char *driver = NULL;
+ const char *name = NULL;
+ link_config *link;
+ int r;
+
+ if (argc > 1)
+ return log_device_error_errno(dev, SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments.");
+
+ r = link_get_driver(ctx, dev, &driver);
+ if (r >= 0)
+ udev_builtin_add_property(dev, test, "ID_NET_DRIVER", driver);
+
+ r = link_config_get(ctx, dev, &link);
+ if (r < 0) {
+ if (r == -ENOENT)
+ return log_device_debug_errno(dev, r, "No matching link configuration found.");
+
+ return log_device_error_errno(dev, r, "Failed to get link config: %m");
+ }
+
+ r = link_config_apply(ctx, link, dev, &name);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Could not apply link config, ignoring: %m");
+
+ udev_builtin_add_property(dev, test, "ID_NET_LINK_FILE", link->filename);
+
+ if (name)
+ udev_builtin_add_property(dev, test, "ID_NET_NAME", name);
+
+ return 0;
+}
+
+static int builtin_net_setup_link_init(void) {
+ int r;
+
+ if (ctx)
+ return 0;
+
+ r = link_config_ctx_new(&ctx);
+ if (r < 0)
+ return r;
+
+ r = link_config_load(ctx);
+ if (r < 0)
+ return r;
+
+ log_debug("Created link configuration context.");
+ return 0;
+}
+
+static void builtin_net_setup_link_exit(void) {
+ link_config_ctx_free(ctx);
+ ctx = NULL;
+ log_debug("Unloaded link configuration context.");
+}
+
+static bool builtin_net_setup_link_validate(void) {
+ log_debug("Check if link configuration needs reloading.");
+ if (!ctx)
+ return false;
+
+ return link_config_should_reload(ctx);
+}
+
+const struct udev_builtin udev_builtin_net_setup_link = {
+ .name = "net_setup_link",
+ .cmd = builtin_net_setup_link,
+ .init = builtin_net_setup_link_init,
+ .exit = builtin_net_setup_link_exit,
+ .validate = builtin_net_setup_link_validate,
+ .help = "Configure network link",
+ .run_once = false,
+};
diff --git a/src/udev/udev-builtin-path_id.c b/src/udev/udev-builtin-path_id.c
new file mode 100644
index 0000000..7ce1c56
--- /dev/null
+++ b/src/udev/udev-builtin-path_id.c
@@ -0,0 +1,683 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * compose persistent device path
+ *
+ * Logic based on Hannes Reinecke's shell script.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "libudev-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "sysexits.h"
+#include "udev-builtin.h"
+
+_printf_(2,3)
+static void path_prepend(char **path, const char *fmt, ...) {
+ va_list va;
+ _cleanup_free_ char *pre = NULL;
+ int r;
+
+ va_start(va, fmt);
+ r = vasprintf(&pre, fmt, va);
+ va_end(va);
+ if (r < 0) {
+ log_oom();
+ exit(EX_OSERR);
+ }
+
+ if (*path) {
+ char *new;
+
+ new = strjoin(pre, "-", *path);
+ if (!new) {
+ log_oom();
+ exit(EX_OSERR);
+ }
+
+ free_and_replace(*path, new);
+ } else
+ *path = TAKE_PTR(pre);
+}
+
+/*
+** Linux only supports 32 bit luns.
+** See drivers/scsi/scsi_scan.c::scsilun_to_int() for more details.
+*/
+static int format_lun_number(sd_device *dev, char **path) {
+ const char *sysnum;
+ unsigned long lun;
+ int r;
+
+ r = sd_device_get_sysnum(dev, &sysnum);
+ if (r < 0)
+ return r;
+ if (!sysnum)
+ return -ENOENT;
+
+ lun = strtoul(sysnum, NULL, 10);
+ if (lun < 256)
+ /* address method 0, peripheral device addressing with bus id of zero */
+ path_prepend(path, "lun-%lu", lun);
+ else
+ /* handle all other lun addressing methods by using a variant of the original lun format */
+ path_prepend(path, "lun-0x%04lx%04lx00000000", lun & 0xffff, (lun >> 16) & 0xffff);
+
+ return 0;
+}
+
+static sd_device *skip_subsystem(sd_device *dev, const char *subsys) {
+ sd_device *parent;
+
+ assert(dev);
+ assert(subsys);
+
+ for (parent = dev; ; ) {
+ const char *subsystem;
+
+ if (sd_device_get_subsystem(parent, &subsystem) < 0)
+ break;
+
+ if (!streq(subsystem, subsys))
+ break;
+
+ dev = parent;
+ if (sd_device_get_parent(dev, &parent) < 0)
+ break;
+ }
+
+ return dev;
+}
+
+static sd_device *handle_scsi_fibre_channel(sd_device *parent, char **path) {
+ sd_device *targetdev;
+ _cleanup_(sd_device_unrefp) sd_device *fcdev = NULL;
+ const char *port, *sysname;
+ _cleanup_free_ char *lun = NULL;
+
+ assert(parent);
+ assert(path);
+
+ if (sd_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_target", &targetdev) < 0)
+ return NULL;
+ if (sd_device_get_sysname(targetdev, &sysname) < 0)
+ return NULL;
+ if (sd_device_new_from_subsystem_sysname(&fcdev, "fc_transport", sysname) < 0)
+ return NULL;
+ if (sd_device_get_sysattr_value(fcdev, "port_name", &port) < 0)
+ return NULL;
+
+ format_lun_number(parent, &lun);
+ path_prepend(path, "fc-%s-%s", port, lun);
+ return parent;
+}
+
+static sd_device *handle_scsi_sas_wide_port(sd_device *parent, char **path) {
+ sd_device *targetdev, *target_parent;
+ _cleanup_(sd_device_unrefp) sd_device *sasdev = NULL;
+ const char *sas_address, *sysname;
+ _cleanup_free_ char *lun = NULL;
+
+ assert(parent);
+ assert(path);
+
+ if (sd_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_target", &targetdev) < 0)
+ return NULL;
+ if (sd_device_get_parent(targetdev, &target_parent) < 0)
+ return NULL;
+ if (sd_device_get_sysname(target_parent, &sysname) < 0)
+ return NULL;
+ if (sd_device_new_from_subsystem_sysname(&sasdev, "sas_device", sysname) < 0)
+ return NULL;
+ if (sd_device_get_sysattr_value(sasdev, "sas_address", &sas_address) < 0)
+ return NULL;
+
+ format_lun_number(parent, &lun);
+ path_prepend(path, "sas-%s-%s", sas_address, lun);
+ return parent;
+}
+
+static sd_device *handle_scsi_sas(sd_device *parent, char **path) {
+ sd_device *targetdev, *target_parent, *port, *expander;
+ _cleanup_(sd_device_unrefp) sd_device *target_sasdev = NULL, *expander_sasdev = NULL, *port_sasdev = NULL;
+ const char *sas_address = NULL;
+ const char *phy_id;
+ const char *phy_count, *sysname;
+ _cleanup_free_ char *lun = NULL;
+
+ assert(parent);
+ assert(path);
+
+ if (sd_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_target", &targetdev) < 0)
+ return NULL;
+ if (sd_device_get_parent(targetdev, &target_parent) < 0)
+ return NULL;
+ if (sd_device_get_sysname(target_parent, &sysname) < 0)
+ return NULL;
+ /* Get sas device */
+ if (sd_device_new_from_subsystem_sysname(&target_sasdev, "sas_device", sysname) < 0)
+ return NULL;
+ /* The next parent is sas port */
+ if (sd_device_get_parent(target_parent, &port) < 0)
+ return NULL;
+ if (sd_device_get_sysname(port, &sysname) < 0)
+ return NULL;
+ /* Get port device */
+ if (sd_device_new_from_subsystem_sysname(&port_sasdev, "sas_port", sysname) < 0)
+ return NULL;
+ if (sd_device_get_sysattr_value(port_sasdev, "num_phys", &phy_count) < 0)
+ return NULL;
+
+ /* Check if we are simple disk */
+ if (strncmp(phy_count, "1", 2) != 0)
+ return handle_scsi_sas_wide_port(parent, path);
+
+ /* Get connected phy */
+ if (sd_device_get_sysattr_value(target_sasdev, "phy_identifier", &phy_id) < 0)
+ return NULL;
+
+ /* The port's parent is either hba or expander */
+ if (sd_device_get_parent(port, &expander) < 0)
+ return NULL;
+
+ if (sd_device_get_sysname(expander, &sysname) < 0)
+ return NULL;
+ /* Get expander device */
+ if (sd_device_new_from_subsystem_sysname(&expander_sasdev, "sas_device", sysname) >= 0) {
+ /* Get expander's address */
+ if (sd_device_get_sysattr_value(expander_sasdev, "sas_address", &sas_address) < 0)
+ return NULL;
+ }
+
+ format_lun_number(parent, &lun);
+ if (sas_address)
+ path_prepend(path, "sas-exp%s-phy%s-%s", sas_address, phy_id, lun);
+ else
+ path_prepend(path, "sas-phy%s-%s", phy_id, lun);
+
+ return parent;
+}
+
+static sd_device *handle_scsi_iscsi(sd_device *parent, char **path) {
+ sd_device *transportdev;
+ _cleanup_(sd_device_unrefp) sd_device *sessiondev = NULL, *conndev = NULL;
+ const char *target, *connname, *addr, *port;
+ _cleanup_free_ char *lun = NULL;
+ const char *sysname, *sysnum;
+
+ assert(parent);
+ assert(path);
+
+ /* find iscsi session */
+ for (transportdev = parent; ; ) {
+
+ if (sd_device_get_parent(transportdev, &transportdev) < 0)
+ return NULL;
+ if (sd_device_get_sysname(transportdev, &sysname) < 0)
+ return NULL;
+ if (startswith(sysname, "session"))
+ break;
+ }
+
+ /* find iscsi session device */
+ if (sd_device_new_from_subsystem_sysname(&sessiondev, "iscsi_session", sysname) < 0)
+ return NULL;
+
+ if (sd_device_get_sysattr_value(sessiondev, "targetname", &target) < 0)
+ return NULL;
+
+ if (sd_device_get_sysnum(transportdev, &sysnum) < 0 || !sysnum)
+ return NULL;
+ connname = strjoina("connection", sysnum, ":0");
+ if (sd_device_new_from_subsystem_sysname(&conndev, "iscsi_connection", connname) < 0)
+ return NULL;
+
+ if (sd_device_get_sysattr_value(conndev, "persistent_address", &addr) < 0)
+ return NULL;
+ if (sd_device_get_sysattr_value(conndev, "persistent_port", &port) < 0)
+ return NULL;
+
+ format_lun_number(parent, &lun);
+ path_prepend(path, "ip-%s:%s-iscsi-%s-%s", addr, port, target, lun);
+ return parent;
+}
+
+static sd_device *handle_scsi_ata(sd_device *parent, char **path) {
+ sd_device *targetdev, *target_parent;
+ _cleanup_(sd_device_unrefp) sd_device *atadev = NULL;
+ const char *port_no, *sysname;
+
+ assert(parent);
+ assert(path);
+
+ if (sd_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_host", &targetdev) < 0)
+ return NULL;
+
+ if (sd_device_get_parent(targetdev, &target_parent) < 0)
+ return NULL;
+
+ if (sd_device_get_sysname(target_parent, &sysname) < 0)
+ return NULL;
+ if (sd_device_new_from_subsystem_sysname(&atadev, "ata_port", sysname) < 0)
+ return NULL;
+
+ if (sd_device_get_sysattr_value(atadev, "port_no", &port_no) < 0)
+ return NULL;
+
+ path_prepend(path, "ata-%s", port_no);
+ return parent;
+}
+
+static sd_device *handle_scsi_default(sd_device *parent, char **path) {
+ sd_device *hostdev;
+ int host, bus, target, lun;
+ const char *name, *base, *pos;
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+ int basenum = -1;
+
+ assert(parent);
+ assert(path);
+
+ if (sd_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_host", &hostdev) < 0)
+ return NULL;
+
+ if (sd_device_get_sysname(parent, &name) < 0)
+ return NULL;
+ if (sscanf(name, "%d:%d:%d:%d", &host, &bus, &target, &lun) != 4)
+ return NULL;
+
+ /*
+ * Rebase host offset to get the local relative number
+ *
+ * Note: This is by definition racy, unreliable and too simple.
+ * Please do not copy this model anywhere. It's just a left-over
+ * from the time we had no idea how things should look like in
+ * the end.
+ *
+ * Making assumptions about a global in-kernel counter and use
+ * that to calculate a local offset is a very broken concept. It
+ * can only work as long as things are in strict order.
+ *
+ * The kernel needs to export the instance/port number of a
+ * controller directly, without the need for rebase magic like
+ * this. Manual driver unbind/bind, parallel hotplug/unplug will
+ * get into the way of this "I hope it works" logic.
+ */
+
+ if (sd_device_get_syspath(hostdev, &base) < 0)
+ return NULL;
+ pos = strrchr(base, '/');
+ if (!pos)
+ return NULL;
+
+ base = strndupa(base, pos - base);
+ dir = opendir(base);
+ if (!dir)
+ return NULL;
+
+ FOREACH_DIRENT_ALL(dent, dir, break) {
+ char *rest;
+ int i;
+
+ if (dent->d_name[0] == '.')
+ continue;
+ if (!IN_SET(dent->d_type, DT_DIR, DT_LNK))
+ continue;
+ if (!startswith(dent->d_name, "host"))
+ continue;
+ i = strtoul(&dent->d_name[4], &rest, 10);
+ if (rest[0] != '\0')
+ continue;
+ /*
+ * find the smallest number; the host really needs to export its
+ * own instance number per parent device; relying on the global host
+ * enumeration and plainly rebasing the numbers sounds unreliable
+ */
+ if (basenum == -1 || i < basenum)
+ basenum = i;
+ }
+ if (basenum == -1)
+ return hostdev;
+ host -= basenum;
+
+ path_prepend(path, "scsi-%u:%u:%u:%u", host, bus, target, lun);
+ return hostdev;
+}
+
+static sd_device *handle_scsi_hyperv(sd_device *parent, char **path, size_t guid_str_len) {
+ sd_device *hostdev;
+ sd_device *vmbusdev;
+ const char *guid_str;
+ _cleanup_free_ char *lun = NULL;
+ char guid[39];
+ size_t i, k;
+
+ assert(parent);
+ assert(path);
+ assert(guid_str_len < sizeof(guid));
+
+ if (sd_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_host", &hostdev) < 0)
+ return NULL;
+
+ if (sd_device_get_parent(hostdev, &vmbusdev) < 0)
+ return NULL;
+
+ if (sd_device_get_sysattr_value(vmbusdev, "device_id", &guid_str) < 0)
+ return NULL;
+
+ if (strlen(guid_str) < guid_str_len || guid_str[0] != '{' || guid_str[guid_str_len-1] != '}')
+ return NULL;
+
+ for (i = 1, k = 0; i < guid_str_len-1; i++) {
+ if (guid_str[i] == '-')
+ continue;
+ guid[k++] = guid_str[i];
+ }
+ guid[k] = '\0';
+
+ format_lun_number(parent, &lun);
+ path_prepend(path, "vmbus-%s-%s", guid, lun);
+ return parent;
+}
+
+static sd_device *handle_scsi(sd_device *parent, char **path, bool *supported_parent) {
+ const char *devtype, *id, *name;
+
+ if (sd_device_get_devtype(parent, &devtype) < 0 ||
+ !streq(devtype, "scsi_device"))
+ return parent;
+
+ /* firewire */
+ if (sd_device_get_sysattr_value(parent, "ieee1394_id", &id) >= 0) {
+ path_prepend(path, "ieee1394-0x%s", id);
+ *supported_parent = true;
+ return skip_subsystem(parent, "scsi");
+ }
+
+ /* scsi sysfs does not have a "subsystem" for the transport */
+ if (sd_device_get_syspath(parent, &name) < 0)
+ return NULL;
+
+ if (strstr(name, "/rport-")) {
+ *supported_parent = true;
+ return handle_scsi_fibre_channel(parent, path);
+ }
+
+ if (strstr(name, "/end_device-")) {
+ *supported_parent = true;
+ return handle_scsi_sas(parent, path);
+ }
+
+ if (strstr(name, "/session")) {
+ *supported_parent = true;
+ return handle_scsi_iscsi(parent, path);
+ }
+
+ if (strstr(name, "/ata"))
+ return handle_scsi_ata(parent, path);
+
+ if (strstr(name, "/vmbus_"))
+ return handle_scsi_hyperv(parent, path, 37);
+ else if (strstr(name, "/VMBUS"))
+ return handle_scsi_hyperv(parent, path, 38);
+
+ return handle_scsi_default(parent, path);
+}
+
+static sd_device *handle_cciss(sd_device *parent, char **path) {
+ const char *str;
+ unsigned controller, disk;
+
+ if (sd_device_get_sysname(parent, &str) < 0)
+ return NULL;
+ if (sscanf(str, "c%ud%u%*s", &controller, &disk) != 2)
+ return NULL;
+
+ path_prepend(path, "cciss-disk%u", disk);
+ return skip_subsystem(parent, "cciss");
+}
+
+static void handle_scsi_tape(sd_device *dev, char **path) {
+ const char *name;
+
+ /* must be the last device in the syspath */
+ if (*path)
+ return;
+
+ if (sd_device_get_sysname(dev, &name) < 0)
+ return;
+
+ if (startswith(name, "nst") && strchr("lma", name[3]))
+ path_prepend(path, "nst%c", name[3]);
+ else if (startswith(name, "st") && strchr("lma", name[2]))
+ path_prepend(path, "st%c", name[2]);
+}
+
+static sd_device *handle_usb(sd_device *parent, char **path) {
+ const char *devtype, *str, *port;
+
+ if (sd_device_get_devtype(parent, &devtype) < 0)
+ return parent;
+ if (!STR_IN_SET(devtype, "usb_interface", "usb_device"))
+ return parent;
+
+ if (sd_device_get_sysname(parent, &str) < 0)
+ return parent;
+ port = strchr(str, '-');
+ if (!port)
+ return parent;
+ port++;
+
+ path_prepend(path, "usb-0:%s", port);
+ return skip_subsystem(parent, "usb");
+}
+
+static sd_device *handle_bcma(sd_device *parent, char **path) {
+ const char *sysname;
+ unsigned core;
+
+ if (sd_device_get_sysname(parent, &sysname) < 0)
+ return NULL;
+ if (sscanf(sysname, "bcma%*u:%u", &core) != 1)
+ return NULL;
+
+ path_prepend(path, "bcma-%u", core);
+ return parent;
+}
+
+/* Handle devices of AP bus in System z platform. */
+static sd_device *handle_ap(sd_device *parent, char **path) {
+ const char *type, *func;
+
+ assert(parent);
+ assert(path);
+
+ if (sd_device_get_sysattr_value(parent, "type", &type) >= 0 &&
+ sd_device_get_sysattr_value(parent, "ap_functions", &func) >= 0)
+ path_prepend(path, "ap-%s-%s", type, func);
+ else {
+ const char *sysname;
+
+ if (sd_device_get_sysname(parent, &sysname) >= 0)
+ path_prepend(path, "ap-%s", sysname);
+ }
+
+ return skip_subsystem(parent, "ap");
+}
+
+static int builtin_path_id(sd_device *dev, int argc, char *argv[], bool test) {
+ sd_device *parent;
+ _cleanup_free_ char *path = NULL;
+ bool supported_transport = false;
+ bool supported_parent = false;
+ const char *subsystem;
+
+ assert(dev);
+
+ /* walk up the chain of devices and compose path */
+ parent = dev;
+ while (parent) {
+ const char *subsys, *sysname;
+
+ if (sd_device_get_subsystem(parent, &subsys) < 0 ||
+ sd_device_get_sysname(parent, &sysname) < 0) {
+ ;
+ } else if (streq(subsys, "scsi_tape")) {
+ handle_scsi_tape(parent, &path);
+ } else if (streq(subsys, "scsi")) {
+ parent = handle_scsi(parent, &path, &supported_parent);
+ supported_transport = true;
+ } else if (streq(subsys, "cciss")) {
+ parent = handle_cciss(parent, &path);
+ supported_transport = true;
+ } else if (streq(subsys, "usb")) {
+ parent = handle_usb(parent, &path);
+ supported_transport = true;
+ } else if (streq(subsys, "bcma")) {
+ parent = handle_bcma(parent, &path);
+ supported_transport = true;
+ } else if (streq(subsys, "serio")) {
+ const char *sysnum;
+
+ if (sd_device_get_sysnum(parent, &sysnum) >= 0 && sysnum) {
+ path_prepend(&path, "serio-%s", sysnum);
+ parent = skip_subsystem(parent, "serio");
+ }
+ } else if (streq(subsys, "pci")) {
+ path_prepend(&path, "pci-%s", sysname);
+ parent = skip_subsystem(parent, "pci");
+ supported_parent = true;
+ } else if (streq(subsys, "platform")) {
+ path_prepend(&path, "platform-%s", sysname);
+ parent = skip_subsystem(parent, "platform");
+ supported_transport = true;
+ supported_parent = true;
+ } else if (streq(subsys, "acpi")) {
+ path_prepend(&path, "acpi-%s", sysname);
+ parent = skip_subsystem(parent, "acpi");
+ supported_parent = true;
+ } else if (streq(subsys, "xen")) {
+ path_prepend(&path, "xen-%s", sysname);
+ parent = skip_subsystem(parent, "xen");
+ supported_parent = true;
+ } else if (streq(subsys, "virtio")) {
+ parent = skip_subsystem(parent, "virtio");
+ supported_transport = true;
+ } else if (streq(subsys, "scm")) {
+ path_prepend(&path, "scm-%s", sysname);
+ parent = skip_subsystem(parent, "scm");
+ supported_transport = true;
+ supported_parent = true;
+ } else if (streq(subsys, "ccw")) {
+ path_prepend(&path, "ccw-%s", sysname);
+ parent = skip_subsystem(parent, "ccw");
+ supported_transport = true;
+ supported_parent = true;
+ } else if (streq(subsys, "ccwgroup")) {
+ path_prepend(&path, "ccwgroup-%s", sysname);
+ parent = skip_subsystem(parent, "ccwgroup");
+ supported_transport = true;
+ supported_parent = true;
+ } else if (streq(subsys, "ap")) {
+ parent = handle_ap(parent, &path);
+ supported_transport = true;
+ supported_parent = true;
+ } else if (streq(subsys, "iucv")) {
+ path_prepend(&path, "iucv-%s", sysname);
+ parent = skip_subsystem(parent, "iucv");
+ supported_transport = true;
+ supported_parent = true;
+ } else if (streq(subsys, "nvme")) {
+ const char *nsid;
+
+ if (sd_device_get_sysattr_value(dev, "nsid", &nsid) >= 0) {
+ path_prepend(&path, "nvme-%s", nsid);
+ parent = skip_subsystem(parent, "nvme");
+ supported_parent = true;
+ supported_transport = true;
+ }
+ }
+
+ if (!parent)
+ break;
+ if (sd_device_get_parent(parent, &parent) < 0)
+ break;
+ }
+
+ if (!path)
+ return -ENOENT;
+
+ /*
+ * Do not return devices with an unknown parent device type. They
+ * might produce conflicting IDs if the parent does not provide a
+ * unique and predictable name.
+ */
+ if (!supported_parent)
+ return -ENOENT;
+
+ /*
+ * Do not return block devices without a well-known transport. Some
+ * devices do not expose their buses and do not provide a unique
+ * and predictable name that way.
+ */
+ if (sd_device_get_subsystem(dev, &subsystem) >= 0 &&
+ streq(subsystem, "block") &&
+ !supported_transport)
+ return -ENOENT;
+
+ {
+ char tag[UTIL_NAME_SIZE];
+ size_t i;
+ const char *p;
+
+ /* compose valid udev tag name */
+ for (p = path, i = 0; *p; p++) {
+ if ((*p >= '0' && *p <= '9') ||
+ (*p >= 'A' && *p <= 'Z') ||
+ (*p >= 'a' && *p <= 'z') ||
+ *p == '-') {
+ tag[i++] = *p;
+ continue;
+ }
+
+ /* skip all leading '_' */
+ if (i == 0)
+ continue;
+
+ /* avoid second '_' */
+ if (tag[i-1] == '_')
+ continue;
+
+ tag[i++] = '_';
+ }
+ /* strip trailing '_' */
+ while (i > 0 && tag[i-1] == '_')
+ i--;
+ tag[i] = '\0';
+
+ udev_builtin_add_property(dev, test, "ID_PATH", path);
+ udev_builtin_add_property(dev, test, "ID_PATH_TAG", tag);
+ }
+
+ return 0;
+}
+
+const struct udev_builtin udev_builtin_path_id = {
+ .name = "path_id",
+ .cmd = builtin_path_id,
+ .help = "Compose persistent device path",
+ .run_once = true,
+};
diff --git a/src/udev/udev-builtin-uaccess.c b/src/udev/udev-builtin-uaccess.c
new file mode 100644
index 0000000..10a143a
--- /dev/null
+++ b/src/udev/udev-builtin-uaccess.c
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * manage device node user ACL
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#include "sd-login.h"
+
+#include "device-util.h"
+#include "login-util.h"
+#include "logind-acl.h"
+#include "log.h"
+#include "udev-builtin.h"
+
+static int builtin_uaccess(sd_device *dev, int argc, char *argv[], bool test) {
+ const char *path = NULL, *seat;
+ bool changed_acl = false;
+ uid_t uid;
+ int r;
+
+ umask(0022);
+
+ /* don't muck around with ACLs when the system is not running systemd */
+ if (!logind_running())
+ return 0;
+
+ r = sd_device_get_devname(dev, &path);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to get device name: %m");
+ goto finish;
+ }
+
+ if (sd_device_get_property_value(dev, "ID_SEAT", &seat) < 0)
+ seat = "seat0";
+
+ r = sd_seat_get_active(seat, NULL, &uid);
+ if (r < 0) {
+ if (IN_SET(r, -ENXIO, -ENODATA))
+ /* No active session on this seat */
+ r = 0;
+ else
+ log_device_error_errno(dev, r, "Failed to determine active user on seat %s: %m", seat);
+
+ goto finish;
+ }
+
+ r = devnode_acl(path, true, false, 0, true, uid);
+ if (r < 0) {
+ log_device_full(dev, r == -ENOENT ? LOG_DEBUG : LOG_ERR, r, "Failed to apply ACL: %m");
+ goto finish;
+ }
+
+ changed_acl = true;
+ r = 0;
+
+finish:
+ if (path && !changed_acl) {
+ int k;
+
+ /* Better be safe than sorry and reset ACL */
+ k = devnode_acl(path, true, false, 0, false, 0);
+ if (k < 0) {
+ log_device_full(dev, k == -ENOENT ? LOG_DEBUG : LOG_ERR, k, "Failed to apply ACL: %m");
+ if (r >= 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+const struct udev_builtin udev_builtin_uaccess = {
+ .name = "uaccess",
+ .cmd = builtin_uaccess,
+ .help = "Manage device node user ACL",
+};
diff --git a/src/udev/udev-builtin-usb_id.c b/src/udev/udev-builtin-usb_id.c
new file mode 100644
index 0000000..3525d25
--- /dev/null
+++ b/src/udev/udev-builtin-usb_id.c
@@ -0,0 +1,460 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * USB device properties and persistent device path
+ *
+ * Copyright (c) 2005 SUSE Linux Products GmbH, Germany
+ * Author: Hannes Reinecke <hare@suse.de>
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "libudev-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+
+static void set_usb_iftype(char *to, int if_class_num, size_t len) {
+ const char *type = "generic";
+
+ switch (if_class_num) {
+ case 1:
+ type = "audio";
+ break;
+ case 2: /* CDC-Control */
+ break;
+ case 3:
+ type = "hid";
+ break;
+ case 5: /* Physical */
+ break;
+ case 6:
+ type = "media";
+ break;
+ case 7:
+ type = "printer";
+ break;
+ case 8:
+ type = "storage";
+ break;
+ case 9:
+ type = "hub";
+ break;
+ case 0x0a: /* CDC-Data */
+ break;
+ case 0x0b: /* Chip/Smart Card */
+ break;
+ case 0x0d: /* Content Security */
+ break;
+ case 0x0e:
+ type = "video";
+ break;
+ case 0xdc: /* Diagnostic Device */
+ break;
+ case 0xe0: /* Wireless Controller */
+ break;
+ case 0xfe: /* Application-specific */
+ break;
+ case 0xff: /* Vendor-specific */
+ break;
+ default:
+ break;
+ }
+ strncpy(to, type, len);
+ to[len-1] = '\0';
+}
+
+static int set_usb_mass_storage_ifsubtype(char *to, const char *from, size_t len) {
+ int type_num = 0;
+ char *eptr;
+ const char *type = "generic";
+
+ type_num = strtoul(from, &eptr, 0);
+ if (eptr != from) {
+ switch (type_num) {
+ case 1: /* RBC devices */
+ type = "rbc";
+ break;
+ case 2:
+ type = "atapi";
+ break;
+ case 3:
+ type = "tape";
+ break;
+ case 4: /* UFI */
+ type = "floppy";
+ break;
+ case 6: /* Transparent SPC-2 devices */
+ type = "scsi";
+ break;
+ default:
+ break;
+ }
+ }
+ strscpy(to, len, type);
+ return type_num;
+}
+
+static void set_scsi_type(char *to, const char *from, size_t len) {
+ int type_num;
+ char *eptr;
+ const char *type = "generic";
+
+ type_num = strtoul(from, &eptr, 0);
+ if (eptr != from) {
+ switch (type_num) {
+ case 0:
+ case 0xe:
+ type = "disk";
+ break;
+ case 1:
+ type = "tape";
+ break;
+ case 4:
+ case 7:
+ case 0xf:
+ type = "optical";
+ break;
+ case 5:
+ type = "cd";
+ break;
+ default:
+ break;
+ }
+ }
+ strscpy(to, len, type);
+}
+
+#define USB_DT_DEVICE 0x01
+#define USB_DT_INTERFACE 0x04
+
+static int dev_if_packed_info(sd_device *dev, char *ifs_str, size_t len) {
+ _cleanup_free_ char *filename = NULL;
+ _cleanup_close_ int fd = -1;
+ ssize_t size;
+ unsigned char buf[18 + 65535];
+ size_t pos = 0;
+ unsigned strpos = 0;
+ const char *syspath;
+ int r;
+ struct usb_interface_descriptor {
+ uint8_t bLength;
+ uint8_t bDescriptorType;
+ uint8_t bInterfaceNumber;
+ uint8_t bAlternateSetting;
+ uint8_t bNumEndpoints;
+ uint8_t bInterfaceClass;
+ uint8_t bInterfaceSubClass;
+ uint8_t bInterfaceProtocol;
+ uint8_t iInterface;
+ } _packed_;
+
+ r = sd_device_get_syspath(dev, &syspath);
+ if (r < 0)
+ return r;
+ if (asprintf(&filename, "%s/descriptors", syspath) < 0)
+ return log_oom();
+
+ fd = open(filename, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return log_device_debug_errno(dev, errno, "Failed to open USB device 'descriptors' file: %m");
+
+ size = read(fd, buf, sizeof(buf));
+ if (size < 18 || (size_t) size >= sizeof(buf))
+ return -EIO;
+
+ ifs_str[0] = '\0';
+ while (pos + sizeof(struct usb_interface_descriptor) < (size_t) size &&
+ strpos + 7 < len - 2) {
+
+ struct usb_interface_descriptor *desc;
+ char if_str[8];
+
+ desc = (struct usb_interface_descriptor *) &buf[pos];
+ if (desc->bLength < 3)
+ break;
+ pos += desc->bLength;
+
+ if (desc->bDescriptorType != USB_DT_INTERFACE)
+ continue;
+
+ if (snprintf(if_str, 8, ":%02x%02x%02x",
+ desc->bInterfaceClass,
+ desc->bInterfaceSubClass,
+ desc->bInterfaceProtocol) != 7)
+ continue;
+
+ if (strstr(ifs_str, if_str) != NULL)
+ continue;
+
+ memcpy(&ifs_str[strpos], if_str, 8),
+ strpos += 7;
+ }
+
+ if (strpos > 0) {
+ ifs_str[strpos++] = ':';
+ ifs_str[strpos++] = '\0';
+ }
+
+ return 0;
+}
+
+/*
+ * A unique USB identification is generated like this:
+ *
+ * 1.) Get the USB device type from InterfaceClass and InterfaceSubClass
+ * 2.) If the device type is 'Mass-Storage/SPC-2' or 'Mass-Storage/RBC',
+ * use the SCSI vendor and model as USB-Vendor and USB-model.
+ * 3.) Otherwise, use the USB manufacturer and product as
+ * USB-Vendor and USB-model. Any non-printable characters
+ * in those strings will be skipped; a slash '/' will be converted
+ * into a full stop '.'.
+ * 4.) If that fails, too, we will use idVendor and idProduct
+ * as USB-Vendor and USB-model.
+ * 5.) The USB identification is the USB-vendor and USB-model
+ * string concatenated with an underscore '_'.
+ * 6.) If the device supplies a serial number, this number
+ * is concatenated with the identification with an underscore '_'.
+ */
+static int builtin_usb_id(sd_device *dev, int argc, char *argv[], bool test) {
+ char vendor_str[64] = "";
+ char vendor_str_enc[256];
+ const char *vendor_id;
+ char model_str[64] = "";
+ char model_str_enc[256];
+ const char *product_id;
+ char serial_str[UTIL_NAME_SIZE] = "";
+ char packed_if_str[UTIL_NAME_SIZE] = "";
+ char revision_str[64] = "";
+ char type_str[64] = "";
+ char instance_str[64] = "";
+ const char *ifnum = NULL;
+ const char *driver = NULL;
+ char serial[256];
+
+ sd_device *dev_interface, *dev_usb;
+ const char *if_class, *if_subclass;
+ int if_class_num;
+ int protocol = 0;
+ size_t l;
+ char *s;
+
+ const char *syspath, *sysname, *devtype, *interface_syspath;
+ int r;
+
+ assert(dev);
+
+ r = sd_device_get_syspath(dev, &syspath);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_sysname(dev, &sysname);
+ if (r < 0)
+ return r;
+
+ /* shortcut, if we are called directly for a "usb_device" type */
+ if (sd_device_get_devtype(dev, &devtype) >= 0 && streq(devtype, "usb_device")) {
+ dev_if_packed_info(dev, packed_if_str, sizeof(packed_if_str));
+ dev_usb = dev;
+ goto fallback;
+ }
+
+ /* usb interface directory */
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "usb", "usb_interface", &dev_interface);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to access usb_interface: %m");
+
+ r = sd_device_get_syspath(dev_interface, &interface_syspath);
+ if (r < 0)
+ return log_device_debug_errno(dev_interface, r, "Failed to get syspath: %m");
+ (void) sd_device_get_sysattr_value(dev_interface, "bInterfaceNumber", &ifnum);
+ (void) sd_device_get_sysattr_value(dev_interface, "driver", &driver);
+
+ r = sd_device_get_sysattr_value(dev_interface, "bInterfaceClass", &if_class);
+ if (r < 0)
+ return log_device_debug_errno(dev_interface, r, "Failed to get bInterfaceClass attribute: %m");
+
+ if_class_num = strtoul(if_class, NULL, 16);
+ if (if_class_num == 8) {
+ /* mass storage */
+ if (sd_device_get_sysattr_value(dev_interface, "bInterfaceSubClass", &if_subclass) >= 0)
+ protocol = set_usb_mass_storage_ifsubtype(type_str, if_subclass, sizeof(type_str)-1);
+ } else
+ set_usb_iftype(type_str, if_class_num, sizeof(type_str)-1);
+
+ log_device_debug(dev_interface, "if_class:%d protocol:%d", if_class_num, protocol);
+
+ /* usb device directory */
+ r = sd_device_get_parent_with_subsystem_devtype(dev_interface, "usb", "usb_device", &dev_usb);
+ if (r < 0)
+ return log_device_debug_errno(dev_interface, r, "Failed to find parent 'usb' device");
+
+ /* all interfaces of the device in a single string */
+ dev_if_packed_info(dev_usb, packed_if_str, sizeof(packed_if_str));
+
+ /* mass storage : SCSI or ATAPI */
+ if (IN_SET(protocol, 6, 2)) {
+ sd_device *dev_scsi;
+ const char *scsi_sysname, *scsi_model, *scsi_vendor, *scsi_type, *scsi_rev;
+ int host, bus, target, lun;
+
+ /* get scsi device */
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "scsi", "scsi_device", &dev_scsi);
+ if (r < 0) {
+ log_device_debug_errno(dev, r, "Unable to find parent SCSI device");
+ goto fallback;
+ }
+ if (sd_device_get_sysname(dev_scsi, &scsi_sysname) < 0)
+ goto fallback;
+ if (sscanf(scsi_sysname, "%d:%d:%d:%d", &host, &bus, &target, &lun) != 4) {
+ log_device_debug(dev_scsi, "Invalid SCSI device");
+ goto fallback;
+ }
+
+ /* Generic SPC-2 device */
+ r = sd_device_get_sysattr_value(dev_scsi, "vendor", &scsi_vendor);
+ if (r < 0) {
+ log_device_debug_errno(dev_scsi, r, "Failed to get SCSI vendor attribute: %m");
+ goto fallback;
+ }
+ udev_util_encode_string(scsi_vendor, vendor_str_enc, sizeof(vendor_str_enc));
+ util_replace_whitespace(scsi_vendor, vendor_str, sizeof(vendor_str)-1);
+ util_replace_chars(vendor_str, NULL);
+
+ r = sd_device_get_sysattr_value(dev_scsi, "model", &scsi_model);
+ if (r < 0) {
+ log_device_debug_errno(dev_scsi, r, "Failed to get SCSI model attribute: %m");
+ goto fallback;
+ }
+ udev_util_encode_string(scsi_model, model_str_enc, sizeof(model_str_enc));
+ util_replace_whitespace(scsi_model, model_str, sizeof(model_str)-1);
+ util_replace_chars(model_str, NULL);
+
+ r = sd_device_get_sysattr_value(dev_scsi, "type", &scsi_type);
+ if (r < 0) {
+ log_device_debug_errno(dev_scsi, r, "Failed to get SCSI type attribute: %m");
+ goto fallback;
+ }
+ set_scsi_type(type_str, scsi_type, sizeof(type_str)-1);
+
+ r = sd_device_get_sysattr_value(dev_scsi, "rev", &scsi_rev);
+ if (r < 0) {
+ log_device_debug_errno(dev_scsi, r, "Failed to get SCSI revision attribute: %m");
+ goto fallback;
+ }
+ util_replace_whitespace(scsi_rev, revision_str, sizeof(revision_str)-1);
+ util_replace_chars(revision_str, NULL);
+
+ /*
+ * some broken devices have the same identifiers
+ * for all luns, export the target:lun number
+ */
+ sprintf(instance_str, "%d:%d", target, lun);
+ }
+
+fallback:
+ r = sd_device_get_sysattr_value(dev_usb, "idVendor", &vendor_id);
+ if (r < 0)
+ return log_device_debug_errno(dev_usb, r, "Failed to get idVendor attribute: %m");
+
+ r = sd_device_get_sysattr_value(dev_usb, "idProduct", &product_id);
+ if (r < 0)
+ return log_device_debug_errno(dev_usb, r, "Failed to get idProduct attribute: %m");
+
+ /* fallback to USB vendor & device */
+ if (vendor_str[0] == '\0') {
+ const char *usb_vendor;
+
+ if (sd_device_get_sysattr_value(dev_usb, "manufacturer", &usb_vendor) < 0)
+ usb_vendor = vendor_id;
+ udev_util_encode_string(usb_vendor, vendor_str_enc, sizeof(vendor_str_enc));
+ util_replace_whitespace(usb_vendor, vendor_str, sizeof(vendor_str)-1);
+ util_replace_chars(vendor_str, NULL);
+ }
+
+ if (model_str[0] == '\0') {
+ const char *usb_model;
+
+ if (sd_device_get_sysattr_value(dev_usb, "product", &usb_model) < 0)
+ usb_model = product_id;
+ udev_util_encode_string(usb_model, model_str_enc, sizeof(model_str_enc));
+ util_replace_whitespace(usb_model, model_str, sizeof(model_str)-1);
+ util_replace_chars(model_str, NULL);
+ }
+
+ if (revision_str[0] == '\0') {
+ const char *usb_rev;
+
+ if (sd_device_get_sysattr_value(dev_usb, "bcdDevice", &usb_rev) >= 0) {
+ util_replace_whitespace(usb_rev, revision_str, sizeof(revision_str)-1);
+ util_replace_chars(revision_str, NULL);
+ }
+ }
+
+ if (serial_str[0] == '\0') {
+ const char *usb_serial;
+
+ if (sd_device_get_sysattr_value(dev_usb, "serial", &usb_serial) >= 0) {
+ const unsigned char *p;
+
+ /* http://msdn.microsoft.com/en-us/library/windows/hardware/gg487321.aspx */
+ for (p = (unsigned char *) usb_serial; *p != '\0'; p++)
+ if (*p < 0x20 || *p > 0x7f || *p == ',') {
+ usb_serial = NULL;
+ break;
+ }
+
+ if (usb_serial) {
+ util_replace_whitespace(usb_serial, serial_str, sizeof(serial_str)-1);
+ util_replace_chars(serial_str, NULL);
+ }
+ }
+ }
+
+ s = serial;
+ l = strpcpyl(&s, sizeof(serial), vendor_str, "_", model_str, NULL);
+ if (!isempty(serial_str))
+ l = strpcpyl(&s, l, "_", serial_str, NULL);
+
+ if (!isempty(instance_str))
+ strpcpyl(&s, l, "-", instance_str, NULL);
+
+ udev_builtin_add_property(dev, test, "ID_VENDOR", vendor_str);
+ udev_builtin_add_property(dev, test, "ID_VENDOR_ENC", vendor_str_enc);
+ udev_builtin_add_property(dev, test, "ID_VENDOR_ID", vendor_id);
+ udev_builtin_add_property(dev, test, "ID_MODEL", model_str);
+ udev_builtin_add_property(dev, test, "ID_MODEL_ENC", model_str_enc);
+ udev_builtin_add_property(dev, test, "ID_MODEL_ID", product_id);
+ udev_builtin_add_property(dev, test, "ID_REVISION", revision_str);
+ udev_builtin_add_property(dev, test, "ID_SERIAL", serial);
+ if (!isempty(serial_str))
+ udev_builtin_add_property(dev, test, "ID_SERIAL_SHORT", serial_str);
+ if (!isempty(type_str))
+ udev_builtin_add_property(dev, test, "ID_TYPE", type_str);
+ if (!isempty(instance_str))
+ udev_builtin_add_property(dev, test, "ID_INSTANCE", instance_str);
+ udev_builtin_add_property(dev, test, "ID_BUS", "usb");
+ if (!isempty(packed_if_str))
+ udev_builtin_add_property(dev, test, "ID_USB_INTERFACES", packed_if_str);
+ if (ifnum)
+ udev_builtin_add_property(dev, test, "ID_USB_INTERFACE_NUM", ifnum);
+ if (driver)
+ udev_builtin_add_property(dev, test, "ID_USB_DRIVER", driver);
+ return 0;
+}
+
+const struct udev_builtin udev_builtin_usb_id = {
+ .name = "usb_id",
+ .cmd = builtin_usb_id,
+ .help = "USB device properties",
+ .run_once = true,
+};
diff --git a/src/udev/udev-builtin.c b/src/udev/udev-builtin.c
new file mode 100644
index 0000000..48ce295
--- /dev/null
+++ b/src/udev/udev-builtin.c
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "device-private.h"
+#include "device-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "udev-builtin.h"
+
+static bool initialized;
+
+static const struct udev_builtin *builtins[_UDEV_BUILTIN_MAX] = {
+#if HAVE_BLKID
+ [UDEV_BUILTIN_BLKID] = &udev_builtin_blkid,
+#endif
+ [UDEV_BUILTIN_BTRFS] = &udev_builtin_btrfs,
+ [UDEV_BUILTIN_HWDB] = &udev_builtin_hwdb,
+ [UDEV_BUILTIN_INPUT_ID] = &udev_builtin_input_id,
+ [UDEV_BUILTIN_KEYBOARD] = &udev_builtin_keyboard,
+#if HAVE_KMOD
+ [UDEV_BUILTIN_KMOD] = &udev_builtin_kmod,
+#endif
+ [UDEV_BUILTIN_NET_ID] = &udev_builtin_net_id,
+ [UDEV_BUILTIN_NET_LINK] = &udev_builtin_net_setup_link,
+ [UDEV_BUILTIN_PATH_ID] = &udev_builtin_path_id,
+ [UDEV_BUILTIN_USB_ID] = &udev_builtin_usb_id,
+#if HAVE_ACL
+ [UDEV_BUILTIN_UACCESS] = &udev_builtin_uaccess,
+#endif
+};
+
+void udev_builtin_init(void) {
+ unsigned i;
+
+ if (initialized)
+ return;
+
+ for (i = 0; i < _UDEV_BUILTIN_MAX; i++)
+ if (builtins[i] && builtins[i]->init)
+ builtins[i]->init();
+
+ initialized = true;
+}
+
+void udev_builtin_exit(void) {
+ unsigned i;
+
+ if (!initialized)
+ return;
+
+ for (i = 0; i < _UDEV_BUILTIN_MAX; i++)
+ if (builtins[i] && builtins[i]->exit)
+ builtins[i]->exit();
+
+ initialized = false;
+}
+
+bool udev_builtin_validate(void) {
+ unsigned i;
+
+ for (i = 0; i < _UDEV_BUILTIN_MAX; i++)
+ if (builtins[i] && builtins[i]->validate && builtins[i]->validate())
+ return true;
+ return false;
+}
+
+void udev_builtin_list(void) {
+ unsigned i;
+
+ for (i = 0; i < _UDEV_BUILTIN_MAX; i++)
+ if (builtins[i])
+ fprintf(stderr, " %-14s %s\n", builtins[i]->name, builtins[i]->help);
+}
+
+const char *udev_builtin_name(enum udev_builtin_cmd cmd) {
+ assert(cmd >= 0 && cmd < _UDEV_BUILTIN_MAX);
+
+ if (!builtins[cmd])
+ return NULL;
+
+ return builtins[cmd]->name;
+}
+
+bool udev_builtin_run_once(enum udev_builtin_cmd cmd) {
+ assert(cmd >= 0 && cmd < _UDEV_BUILTIN_MAX);
+
+ if (!builtins[cmd])
+ return false;
+
+ return builtins[cmd]->run_once;
+}
+
+enum udev_builtin_cmd udev_builtin_lookup(const char *command) {
+ enum udev_builtin_cmd i;
+ size_t n;
+
+ assert(command);
+
+ command += strspn(command, WHITESPACE);
+ n = strcspn(command, WHITESPACE);
+ for (i = 0; i < _UDEV_BUILTIN_MAX; i++)
+ if (builtins[i] && strneq(builtins[i]->name, command, n))
+ return i;
+
+ return _UDEV_BUILTIN_INVALID;
+}
+
+int udev_builtin_run(sd_device *dev, enum udev_builtin_cmd cmd, const char *command, bool test) {
+ _cleanup_strv_free_ char **argv = NULL;
+
+ assert(dev);
+ assert(cmd >= 0 && cmd < _UDEV_BUILTIN_MAX);
+ assert(command);
+
+ if (!builtins[cmd])
+ return -EOPNOTSUPP;
+
+ argv = strv_split_full(command, NULL, SPLIT_QUOTES | SPLIT_RELAX);
+ if (!argv)
+ return -ENOMEM;
+
+ /* we need '0' here to reset the internal state */
+ optind = 0;
+ return builtins[cmd]->cmd(dev, strv_length(argv), argv, test);
+}
+
+int udev_builtin_add_property(sd_device *dev, bool test, const char *key, const char *val) {
+ int r;
+
+ assert(dev);
+ assert(key);
+
+ r = device_add_property(dev, key, val);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to add property '%s%s%s'",
+ key, val ? "=" : "", strempty(val));
+
+ if (test)
+ printf("%s=%s\n", key, strempty(val));
+
+ return 0;
+}
diff --git a/src/udev/udev-builtin.h b/src/udev/udev-builtin.h
new file mode 100644
index 0000000..e51eefb
--- /dev/null
+++ b/src/udev/udev-builtin.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-device.h"
+
+enum udev_builtin_cmd {
+#if HAVE_BLKID
+ UDEV_BUILTIN_BLKID,
+#endif
+ UDEV_BUILTIN_BTRFS,
+ UDEV_BUILTIN_HWDB,
+ UDEV_BUILTIN_INPUT_ID,
+ UDEV_BUILTIN_KEYBOARD,
+#if HAVE_KMOD
+ UDEV_BUILTIN_KMOD,
+#endif
+ UDEV_BUILTIN_NET_ID,
+ UDEV_BUILTIN_NET_LINK,
+ UDEV_BUILTIN_PATH_ID,
+ UDEV_BUILTIN_USB_ID,
+#if HAVE_ACL
+ UDEV_BUILTIN_UACCESS,
+#endif
+ _UDEV_BUILTIN_MAX,
+ _UDEV_BUILTIN_INVALID = -1,
+};
+
+struct udev_builtin {
+ const char *name;
+ int (*cmd)(sd_device *dev, int argc, char *argv[], bool test);
+ const char *help;
+ int (*init)(void);
+ void (*exit)(void);
+ bool (*validate)(void);
+ bool run_once;
+};
+
+#if HAVE_BLKID
+extern const struct udev_builtin udev_builtin_blkid;
+#endif
+extern const struct udev_builtin udev_builtin_btrfs;
+extern const struct udev_builtin udev_builtin_hwdb;
+extern const struct udev_builtin udev_builtin_input_id;
+extern const struct udev_builtin udev_builtin_keyboard;
+#if HAVE_KMOD
+extern const struct udev_builtin udev_builtin_kmod;
+#endif
+extern const struct udev_builtin udev_builtin_net_id;
+extern const struct udev_builtin udev_builtin_net_setup_link;
+extern const struct udev_builtin udev_builtin_path_id;
+extern const struct udev_builtin udev_builtin_usb_id;
+#if HAVE_ACL
+extern const struct udev_builtin udev_builtin_uaccess;
+#endif
+
+void udev_builtin_init(void);
+void udev_builtin_exit(void);
+enum udev_builtin_cmd udev_builtin_lookup(const char *command);
+const char *udev_builtin_name(enum udev_builtin_cmd cmd);
+bool udev_builtin_run_once(enum udev_builtin_cmd cmd);
+int udev_builtin_run(sd_device *dev, enum udev_builtin_cmd cmd, const char *command, bool test);
+void udev_builtin_list(void);
+bool udev_builtin_validate(void);
+int udev_builtin_add_property(sd_device *dev, bool test, const char *key, const char *val);
+int udev_builtin_hwdb_lookup(sd_device *dev, const char *prefix, const char *modalias,
+ const char *filter, bool test);
diff --git a/src/udev/udev-ctrl.c b/src/udev/udev-ctrl.c
new file mode 100644
index 0000000..c217815
--- /dev/null
+++ b/src/udev/udev-ctrl.c
@@ -0,0 +1,427 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ *
+ * libudev - interface to udev device information
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ */
+
+#include <errno.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "socket-util.h"
+#include "strxcpyx.h"
+#include "udev-ctrl.h"
+#include "util.h"
+
+/* wire protocol magic must match */
+#define UDEV_CTRL_MAGIC 0xdead1dea
+
+enum udev_ctrl_msg_type {
+ UDEV_CTRL_UNKNOWN,
+ UDEV_CTRL_SET_LOG_LEVEL,
+ UDEV_CTRL_STOP_EXEC_QUEUE,
+ UDEV_CTRL_START_EXEC_QUEUE,
+ UDEV_CTRL_RELOAD,
+ UDEV_CTRL_SET_ENV,
+ UDEV_CTRL_SET_CHILDREN_MAX,
+ UDEV_CTRL_PING,
+ UDEV_CTRL_EXIT,
+};
+
+struct udev_ctrl_msg_wire {
+ char version[16];
+ unsigned magic;
+ enum udev_ctrl_msg_type type;
+ union {
+ int intval;
+ char buf[256];
+ };
+};
+
+struct udev_ctrl_msg {
+ unsigned n_ref;
+ struct udev_ctrl_connection *conn;
+ struct udev_ctrl_msg_wire ctrl_msg_wire;
+};
+
+struct udev_ctrl {
+ unsigned n_ref;
+ int sock;
+ union sockaddr_union saddr;
+ socklen_t addrlen;
+ bool bound;
+ bool cleanup_socket;
+ bool connected;
+};
+
+struct udev_ctrl_connection {
+ unsigned n_ref;
+ struct udev_ctrl *uctrl;
+ int sock;
+};
+
+struct udev_ctrl *udev_ctrl_new_from_fd(int fd) {
+ struct udev_ctrl *uctrl;
+ int r;
+
+ uctrl = new0(struct udev_ctrl, 1);
+ if (!uctrl)
+ return NULL;
+ uctrl->n_ref = 1;
+
+ if (fd < 0) {
+ uctrl->sock = socket(AF_LOCAL, SOCK_SEQPACKET|SOCK_NONBLOCK|SOCK_CLOEXEC, 0);
+ if (uctrl->sock < 0) {
+ log_error_errno(errno, "Failed to create socket: %m");
+ udev_ctrl_unref(uctrl);
+ return NULL;
+ }
+ } else {
+ uctrl->bound = true;
+ uctrl->sock = fd;
+ }
+
+ /*
+ * FIXME: remove it as soon as we can depend on this:
+ * http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=90c6bd34f884cd9cee21f1d152baf6c18bcac949
+ */
+ r = setsockopt_int(uctrl->sock, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set SO_PASSCRED: %m");
+
+ uctrl->saddr.un = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ .sun_path = "/run/udev/control",
+ };
+
+ uctrl->addrlen = SOCKADDR_UN_LEN(uctrl->saddr.un);
+ return uctrl;
+}
+
+struct udev_ctrl *udev_ctrl_new(void) {
+ return udev_ctrl_new_from_fd(-1);
+}
+
+int udev_ctrl_enable_receiving(struct udev_ctrl *uctrl) {
+ int err;
+
+ if (!uctrl->bound) {
+ err = bind(uctrl->sock, &uctrl->saddr.sa, uctrl->addrlen);
+ if (err < 0 && errno == EADDRINUSE) {
+ (void) sockaddr_un_unlink(&uctrl->saddr.un);
+ err = bind(uctrl->sock, &uctrl->saddr.sa, uctrl->addrlen);
+ }
+
+ if (err < 0)
+ return log_error_errno(errno, "Failed to bind socket: %m");
+
+ err = listen(uctrl->sock, 0);
+ if (err < 0)
+ return log_error_errno(errno, "Failed to listen: %m");
+
+ uctrl->bound = true;
+ uctrl->cleanup_socket = true;
+ }
+ return 0;
+}
+
+static struct udev_ctrl *udev_ctrl_free(struct udev_ctrl *uctrl) {
+ assert(uctrl);
+
+ safe_close(uctrl->sock);
+ return mfree(uctrl);
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_FUNC(struct udev_ctrl, udev_ctrl);
+DEFINE_TRIVIAL_UNREF_FUNC(struct udev_ctrl, udev_ctrl, udev_ctrl_free);
+
+int udev_ctrl_cleanup(struct udev_ctrl *uctrl) {
+ if (!uctrl)
+ return 0;
+ if (uctrl->cleanup_socket)
+ sockaddr_un_unlink(&uctrl->saddr.un);
+ return 0;
+}
+
+int udev_ctrl_get_fd(struct udev_ctrl *uctrl) {
+ if (!uctrl)
+ return -EINVAL;
+ return uctrl->sock;
+}
+
+struct udev_ctrl_connection *udev_ctrl_get_connection(struct udev_ctrl *uctrl) {
+ struct udev_ctrl_connection *conn;
+ struct ucred ucred = {};
+ int r;
+
+ conn = new(struct udev_ctrl_connection, 1);
+ if (!conn)
+ return NULL;
+ conn->n_ref = 1;
+ conn->uctrl = uctrl;
+
+ conn->sock = accept4(uctrl->sock, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK);
+ if (conn->sock < 0) {
+ if (errno != EINTR)
+ log_error_errno(errno, "Failed to receive ctrl connection: %m");
+ goto err;
+ }
+
+ /* check peer credential of connection */
+ r = getpeercred(conn->sock, &ucred);
+ if (r < 0) {
+ log_error_errno(r, "Failed to receive credentials of ctrl connection: %m");
+ goto err;
+ }
+ if (ucred.uid > 0) {
+ log_error("Sender uid="UID_FMT", message ignored", ucred.uid);
+ goto err;
+ }
+
+ /* enable receiving of the sender credentials in the messages */
+ r = setsockopt_int(conn->sock, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set SO_PASSCRED: %m");
+
+ udev_ctrl_ref(uctrl);
+ return conn;
+err:
+ safe_close(conn->sock);
+ return mfree(conn);
+}
+
+static struct udev_ctrl_connection *udev_ctrl_connection_free(struct udev_ctrl_connection *conn) {
+ assert(conn);
+
+ safe_close(conn->sock);
+ udev_ctrl_unref(conn->uctrl);
+ return mfree(conn);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(struct udev_ctrl_connection, udev_ctrl_connection, udev_ctrl_connection_free);
+
+static int ctrl_send(struct udev_ctrl *uctrl, enum udev_ctrl_msg_type type, int intval, const char *buf, usec_t timeout) {
+ struct udev_ctrl_msg_wire ctrl_msg_wire = {
+ .version = "udev-" STRINGIFY(PROJECT_VERSION),
+ .magic = UDEV_CTRL_MAGIC,
+ .type = type,
+ };
+
+ if (buf)
+ strscpy(ctrl_msg_wire.buf, sizeof(ctrl_msg_wire.buf), buf);
+ else
+ ctrl_msg_wire.intval = intval;
+
+ if (!uctrl->connected) {
+ if (connect(uctrl->sock, &uctrl->saddr.sa, uctrl->addrlen) < 0)
+ return -errno;
+ uctrl->connected = true;
+ }
+ if (send(uctrl->sock, &ctrl_msg_wire, sizeof(ctrl_msg_wire), 0) < 0)
+ return -errno;
+
+ /* wait for peer message handling or disconnect */
+ for (;;) {
+ struct pollfd pfd = {
+ .fd = uctrl->sock,
+ .events = POLLIN,
+ };
+ int r;
+
+ r = poll(&pfd, 1, DIV_ROUND_UP(timeout, USEC_PER_MSEC));
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+ return -errno;
+ }
+ if (r == 0)
+ return -ETIMEDOUT;
+ if (pfd.revents & POLLERR)
+ return -EIO;
+ return 0;
+ }
+}
+
+int udev_ctrl_send_set_log_level(struct udev_ctrl *uctrl, int priority, usec_t timeout) {
+ return ctrl_send(uctrl, UDEV_CTRL_SET_LOG_LEVEL, priority, NULL, timeout);
+}
+
+int udev_ctrl_send_stop_exec_queue(struct udev_ctrl *uctrl, usec_t timeout) {
+ return ctrl_send(uctrl, UDEV_CTRL_STOP_EXEC_QUEUE, 0, NULL, timeout);
+}
+
+int udev_ctrl_send_start_exec_queue(struct udev_ctrl *uctrl, usec_t timeout) {
+ return ctrl_send(uctrl, UDEV_CTRL_START_EXEC_QUEUE, 0, NULL, timeout);
+}
+
+int udev_ctrl_send_reload(struct udev_ctrl *uctrl, usec_t timeout) {
+ return ctrl_send(uctrl, UDEV_CTRL_RELOAD, 0, NULL, timeout);
+}
+
+int udev_ctrl_send_set_env(struct udev_ctrl *uctrl, const char *key, usec_t timeout) {
+ return ctrl_send(uctrl, UDEV_CTRL_SET_ENV, 0, key, timeout);
+}
+
+int udev_ctrl_send_set_children_max(struct udev_ctrl *uctrl, int count, usec_t timeout) {
+ return ctrl_send(uctrl, UDEV_CTRL_SET_CHILDREN_MAX, count, NULL, timeout);
+}
+
+int udev_ctrl_send_ping(struct udev_ctrl *uctrl, usec_t timeout) {
+ return ctrl_send(uctrl, UDEV_CTRL_PING, 0, NULL, timeout);
+}
+
+int udev_ctrl_send_exit(struct udev_ctrl *uctrl, usec_t timeout) {
+ return ctrl_send(uctrl, UDEV_CTRL_EXIT, 0, NULL, timeout);
+}
+
+struct udev_ctrl_msg *udev_ctrl_receive_msg(struct udev_ctrl_connection *conn) {
+ struct udev_ctrl_msg *uctrl_msg;
+ ssize_t size;
+ struct cmsghdr *cmsg;
+ struct iovec iov;
+ char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
+ struct msghdr smsg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = cred_msg,
+ .msg_controllen = sizeof(cred_msg),
+ };
+ struct ucred *cred;
+
+ uctrl_msg = new0(struct udev_ctrl_msg, 1);
+ if (!uctrl_msg)
+ return NULL;
+ uctrl_msg->n_ref = 1;
+ uctrl_msg->conn = conn;
+ udev_ctrl_connection_ref(conn);
+
+ /* wait for the incoming message */
+ for (;;) {
+ struct pollfd pfd[1];
+ int r;
+
+ pfd[0].fd = conn->sock;
+ pfd[0].events = POLLIN;
+
+ r = poll(pfd, 1, 10000);
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+ goto err;
+ } else if (r == 0) {
+ log_error("Timeout waiting for ctrl message");
+ goto err;
+ } else {
+ if (!(pfd[0].revents & POLLIN)) {
+ log_error("Invalid ctrl connection: %m");
+ goto err;
+ }
+ }
+
+ break;
+ }
+
+ iov = IOVEC_MAKE(&uctrl_msg->ctrl_msg_wire, sizeof(struct udev_ctrl_msg_wire));
+
+ size = recvmsg(conn->sock, &smsg, 0);
+ if (size < 0) {
+ log_error_errno(errno, "Failed to receive ctrl message: %m");
+ goto err;
+ }
+
+ cmsg_close_all(&smsg);
+
+ cmsg = CMSG_FIRSTHDR(&smsg);
+
+ if (!cmsg || cmsg->cmsg_type != SCM_CREDENTIALS) {
+ log_error("No sender credentials received, ignoring message");
+ goto err;
+ }
+
+ cred = (struct ucred *) CMSG_DATA(cmsg);
+
+ if (cred->uid != 0) {
+ log_error("Sender uid="UID_FMT", ignoring message", cred->uid);
+ goto err;
+ }
+
+ if (uctrl_msg->ctrl_msg_wire.magic != UDEV_CTRL_MAGIC) {
+ log_error("Message magic 0x%08x doesn't match, ignoring", uctrl_msg->ctrl_msg_wire.magic);
+ goto err;
+ }
+
+ return uctrl_msg;
+err:
+ udev_ctrl_msg_unref(uctrl_msg);
+ return NULL;
+}
+
+static struct udev_ctrl_msg *udev_ctrl_msg_free(struct udev_ctrl_msg *ctrl_msg) {
+ assert(ctrl_msg);
+
+ udev_ctrl_connection_unref(ctrl_msg->conn);
+ return mfree(ctrl_msg);
+}
+
+DEFINE_TRIVIAL_UNREF_FUNC(struct udev_ctrl_msg, udev_ctrl_msg, udev_ctrl_msg_free);
+
+int udev_ctrl_get_set_log_level(struct udev_ctrl_msg *ctrl_msg) {
+ if (ctrl_msg->ctrl_msg_wire.type == UDEV_CTRL_SET_LOG_LEVEL)
+ return ctrl_msg->ctrl_msg_wire.intval;
+ return -1;
+}
+
+int udev_ctrl_get_stop_exec_queue(struct udev_ctrl_msg *ctrl_msg) {
+ if (ctrl_msg->ctrl_msg_wire.type == UDEV_CTRL_STOP_EXEC_QUEUE)
+ return 1;
+ return -1;
+}
+
+int udev_ctrl_get_start_exec_queue(struct udev_ctrl_msg *ctrl_msg) {
+ if (ctrl_msg->ctrl_msg_wire.type == UDEV_CTRL_START_EXEC_QUEUE)
+ return 1;
+ return -1;
+}
+
+int udev_ctrl_get_reload(struct udev_ctrl_msg *ctrl_msg) {
+ if (ctrl_msg->ctrl_msg_wire.type == UDEV_CTRL_RELOAD)
+ return 1;
+ return -1;
+}
+
+const char *udev_ctrl_get_set_env(struct udev_ctrl_msg *ctrl_msg) {
+ if (ctrl_msg->ctrl_msg_wire.type == UDEV_CTRL_SET_ENV)
+ return ctrl_msg->ctrl_msg_wire.buf;
+ return NULL;
+}
+
+int udev_ctrl_get_set_children_max(struct udev_ctrl_msg *ctrl_msg) {
+ if (ctrl_msg->ctrl_msg_wire.type == UDEV_CTRL_SET_CHILDREN_MAX)
+ return ctrl_msg->ctrl_msg_wire.intval;
+ return -1;
+}
+
+int udev_ctrl_get_ping(struct udev_ctrl_msg *ctrl_msg) {
+ if (ctrl_msg->ctrl_msg_wire.type == UDEV_CTRL_PING)
+ return 1;
+ return -1;
+}
+
+int udev_ctrl_get_exit(struct udev_ctrl_msg *ctrl_msg) {
+ if (ctrl_msg->ctrl_msg_wire.type == UDEV_CTRL_EXIT)
+ return 1;
+ return -1;
+}
diff --git a/src/udev/udev-ctrl.h b/src/udev/udev-ctrl.h
new file mode 100644
index 0000000..8e452a4
--- /dev/null
+++ b/src/udev/udev-ctrl.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#pragma once
+
+#include "macro.h"
+#include "time-util.h"
+
+struct udev_ctrl;
+struct udev_ctrl *udev_ctrl_new(void);
+struct udev_ctrl *udev_ctrl_new_from_fd(int fd);
+int udev_ctrl_enable_receiving(struct udev_ctrl *uctrl);
+struct udev_ctrl *udev_ctrl_unref(struct udev_ctrl *uctrl);
+int udev_ctrl_cleanup(struct udev_ctrl *uctrl);
+int udev_ctrl_get_fd(struct udev_ctrl *uctrl);
+int udev_ctrl_send_set_log_level(struct udev_ctrl *uctrl, int priority, usec_t timeout);
+int udev_ctrl_send_stop_exec_queue(struct udev_ctrl *uctrl, usec_t timeout);
+int udev_ctrl_send_start_exec_queue(struct udev_ctrl *uctrl, usec_t timeout);
+int udev_ctrl_send_reload(struct udev_ctrl *uctrl, usec_t timeout);
+int udev_ctrl_send_ping(struct udev_ctrl *uctrl, usec_t timeout);
+int udev_ctrl_send_exit(struct udev_ctrl *uctrl, usec_t timeout);
+int udev_ctrl_send_set_env(struct udev_ctrl *uctrl, const char *key, usec_t timeout);
+int udev_ctrl_send_set_children_max(struct udev_ctrl *uctrl, int count, usec_t timeout);
+
+struct udev_ctrl_connection;
+struct udev_ctrl_connection *udev_ctrl_get_connection(struct udev_ctrl *uctrl);
+struct udev_ctrl_connection *udev_ctrl_connection_ref(struct udev_ctrl_connection *conn);
+struct udev_ctrl_connection *udev_ctrl_connection_unref(struct udev_ctrl_connection *conn);
+
+struct udev_ctrl_msg;
+struct udev_ctrl_msg *udev_ctrl_receive_msg(struct udev_ctrl_connection *conn);
+struct udev_ctrl_msg *udev_ctrl_msg_unref(struct udev_ctrl_msg *ctrl_msg);
+int udev_ctrl_get_set_log_level(struct udev_ctrl_msg *ctrl_msg);
+int udev_ctrl_get_stop_exec_queue(struct udev_ctrl_msg *ctrl_msg);
+int udev_ctrl_get_start_exec_queue(struct udev_ctrl_msg *ctrl_msg);
+int udev_ctrl_get_reload(struct udev_ctrl_msg *ctrl_msg);
+int udev_ctrl_get_ping(struct udev_ctrl_msg *ctrl_msg);
+int udev_ctrl_get_exit(struct udev_ctrl_msg *ctrl_msg);
+const char *udev_ctrl_get_set_env(struct udev_ctrl_msg *ctrl_msg);
+int udev_ctrl_get_set_children_max(struct udev_ctrl_msg *ctrl_msg);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_ctrl*, udev_ctrl_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_ctrl_connection*, udev_ctrl_connection_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_ctrl_msg*, udev_ctrl_msg_unref);
diff --git a/src/udev/udev-event.c b/src/udev/udev-event.c
new file mode 100644
index 0000000..07b7365
--- /dev/null
+++ b/src/udev/udev-event.c
@@ -0,0 +1,893 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "libudev-util.h"
+#include "netlink-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+#include "udev-node.h"
+#include "udev-watch.h"
+#include "udev.h"
+
+typedef struct Spawn {
+ const char *cmd;
+ pid_t pid;
+ usec_t timeout_warn_usec;
+ usec_t timeout_usec;
+ usec_t event_birth_usec;
+ bool accept_failure;
+ int fd_stdout;
+ int fd_stderr;
+ char *result;
+ size_t result_size;
+ size_t result_len;
+} Spawn;
+
+UdevEvent *udev_event_new(sd_device *dev, usec_t exec_delay_usec, sd_netlink *rtnl) {
+ UdevEvent *event;
+
+ assert(dev);
+
+ event = new(UdevEvent, 1);
+ if (!event)
+ return NULL;
+
+ *event = (UdevEvent) {
+ .dev = sd_device_ref(dev),
+ .birth_usec = now(CLOCK_MONOTONIC),
+ .exec_delay_usec = exec_delay_usec,
+ .rtnl = sd_netlink_ref(rtnl),
+ };
+
+ return event;
+}
+
+UdevEvent *udev_event_free(UdevEvent *event) {
+ if (!event)
+ return NULL;
+
+ sd_device_unref(event->dev);
+ sd_device_unref(event->dev_db_clone);
+ sd_netlink_unref(event->rtnl);
+ hashmap_free_free_key(event->run_list);
+ hashmap_free_free_free(event->seclabel_list);
+ free(event->program_result);
+ free(event->name);
+
+ return mfree(event);
+}
+
+enum subst_type {
+ SUBST_DEVNODE,
+ SUBST_ATTR,
+ SUBST_ENV,
+ SUBST_KERNEL,
+ SUBST_KERNEL_NUMBER,
+ SUBST_DRIVER,
+ SUBST_DEVPATH,
+ SUBST_ID,
+ SUBST_MAJOR,
+ SUBST_MINOR,
+ SUBST_RESULT,
+ SUBST_PARENT,
+ SUBST_NAME,
+ SUBST_LINKS,
+ SUBST_ROOT,
+ SUBST_SYS,
+};
+
+struct subst_map_entry {
+ const char *name;
+ const char fmt;
+ enum subst_type type;
+};
+
+static const struct subst_map_entry map[] = {
+ { .name = "devnode", .fmt = 'N', .type = SUBST_DEVNODE },
+ { .name = "tempnode", .fmt = 'N', .type = SUBST_DEVNODE },
+ { .name = "attr", .fmt = 's', .type = SUBST_ATTR },
+ { .name = "sysfs", .fmt = 's', .type = SUBST_ATTR },
+ { .name = "env", .fmt = 'E', .type = SUBST_ENV },
+ { .name = "kernel", .fmt = 'k', .type = SUBST_KERNEL },
+ { .name = "number", .fmt = 'n', .type = SUBST_KERNEL_NUMBER },
+ { .name = "driver", .fmt = 'd', .type = SUBST_DRIVER },
+ { .name = "devpath", .fmt = 'p', .type = SUBST_DEVPATH },
+ { .name = "id", .fmt = 'b', .type = SUBST_ID },
+ { .name = "major", .fmt = 'M', .type = SUBST_MAJOR },
+ { .name = "minor", .fmt = 'm', .type = SUBST_MINOR },
+ { .name = "result", .fmt = 'c', .type = SUBST_RESULT },
+ { .name = "parent", .fmt = 'P', .type = SUBST_PARENT },
+ { .name = "name", .fmt = 'D', .type = SUBST_NAME },
+ { .name = "links", .fmt = 'L', .type = SUBST_LINKS },
+ { .name = "root", .fmt = 'r', .type = SUBST_ROOT },
+ { .name = "sys", .fmt = 'S', .type = SUBST_SYS },
+};
+
+static ssize_t subst_format_var(UdevEvent *event,
+ const struct subst_map_entry *entry, char *attr,
+ char *dest, size_t l) {
+ sd_device *parent, *dev = event->dev;
+ const char *val = NULL;
+ char *s = dest;
+ dev_t devnum;
+ int r;
+
+ assert(entry);
+
+ switch (entry->type) {
+ case SUBST_DEVPATH:
+ r = sd_device_get_devpath(dev, &val);
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val);
+ break;
+ case SUBST_KERNEL:
+ r = sd_device_get_sysname(dev, &val);
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val);
+ break;
+ case SUBST_KERNEL_NUMBER:
+ r = sd_device_get_sysnum(dev, &val);
+ if (r < 0)
+ return r == -ENOENT ? 0 : r;
+ l = strpcpy(&s, l, val);
+ break;
+ case SUBST_ID:
+ if (!event->dev_parent)
+ return 0;
+ r = sd_device_get_sysname(event->dev_parent, &val);
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val);
+ break;
+ case SUBST_DRIVER:
+ if (!event->dev_parent)
+ return 0;
+ r = sd_device_get_driver(event->dev_parent, &val);
+ if (r < 0)
+ return r == -ENOENT ? 0 : r;
+ l = strpcpy(&s, l, val);
+ break;
+ case SUBST_MAJOR:
+ case SUBST_MINOR: {
+ char buf[DECIMAL_STR_MAX(unsigned)];
+
+ r = sd_device_get_devnum(dev, &devnum);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ xsprintf(buf, "%u", r < 0 ? 0 : entry->type == SUBST_MAJOR ? major(devnum) : minor(devnum));
+ l = strpcpy(&s, l, buf);
+ break;
+ }
+ case SUBST_RESULT: {
+ char *rest;
+ int i;
+
+ if (!event->program_result)
+ return 0;
+
+ /* get part of the result string */
+ i = 0;
+ if (attr)
+ i = strtoul(attr, &rest, 10);
+ if (i > 0) {
+ char result[UTIL_PATH_SIZE], tmp[UTIL_PATH_SIZE], *cpos;
+
+ strscpy(result, sizeof(result), event->program_result);
+ cpos = result;
+ while (--i) {
+ while (cpos[0] != '\0' && !isspace(cpos[0]))
+ cpos++;
+ while (isspace(cpos[0]))
+ cpos++;
+ if (cpos[0] == '\0')
+ break;
+ }
+ if (i > 0) {
+ log_error("requested part of result string not found");
+ break;
+ }
+ strscpy(tmp, sizeof(tmp), cpos);
+ /* %{2+}c copies the whole string from the second part on */
+ if (rest[0] != '+') {
+ cpos = strchr(tmp, ' ');
+ if (cpos)
+ cpos[0] = '\0';
+ }
+ l = strpcpy(&s, l, tmp);
+ } else
+ l = strpcpy(&s, l, event->program_result);
+ break;
+ }
+ case SUBST_ATTR: {
+ char vbuf[UTIL_NAME_SIZE];
+ size_t len;
+ int count;
+
+ if (!attr)
+ return -EINVAL;
+
+ /* try to read the value specified by "[dmi/id]product_name" */
+ if (util_resolve_subsys_kernel(attr, vbuf, sizeof(vbuf), true) == 0)
+ val = vbuf;
+
+ /* try to read the attribute the device */
+ if (!val)
+ (void) sd_device_get_sysattr_value(dev, attr, &val);
+
+ /* try to read the attribute of the parent device, other matches have selected */
+ if (!val && event->dev_parent && event->dev_parent != dev)
+ (void) sd_device_get_sysattr_value(event->dev_parent, attr, &val);
+
+ if (!val)
+ return 0;
+
+ /* strip trailing whitespace, and replace unwanted characters */
+ if (val != vbuf)
+ strscpy(vbuf, sizeof(vbuf), val);
+ len = strlen(vbuf);
+ while (len > 0 && isspace(vbuf[--len]))
+ vbuf[len] = '\0';
+ count = util_replace_chars(vbuf, UDEV_ALLOWED_CHARS_INPUT);
+ if (count > 0)
+ log_device_debug(dev, "%i character(s) replaced", count);
+ l = strpcpy(&s, l, vbuf);
+ break;
+ }
+ case SUBST_PARENT:
+ r = sd_device_get_parent(dev, &parent);
+ if (r < 0)
+ return r == -ENODEV ? 0 : r;
+ r = sd_device_get_devname(parent, &val);
+ if (r < 0)
+ return r == -ENOENT ? 0 : r;
+ l = strpcpy(&s, l, val + STRLEN("/dev/"));
+ break;
+ case SUBST_DEVNODE:
+ r = sd_device_get_devname(dev, &val);
+ if (r < 0)
+ return r == -ENOENT ? 0 : r;
+ l = strpcpy(&s, l, val);
+ break;
+ case SUBST_NAME:
+ if (event->name)
+ l = strpcpy(&s, l, event->name);
+ else if (sd_device_get_devname(dev, &val) >= 0)
+ l = strpcpy(&s, l, val + STRLEN("/dev/"));
+ else {
+ r = sd_device_get_sysname(dev, &val);
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val);
+ }
+ break;
+ case SUBST_LINKS:
+ FOREACH_DEVICE_DEVLINK(dev, val)
+ if (s == dest)
+ l = strpcpy(&s, l, val + STRLEN("/dev/"));
+ else
+ l = strpcpyl(&s, l, " ", val + STRLEN("/dev/"), NULL);
+ break;
+ case SUBST_ROOT:
+ l = strpcpy(&s, l, "/dev");
+ break;
+ case SUBST_SYS:
+ l = strpcpy(&s, l, "/sys");
+ break;
+ case SUBST_ENV:
+ if (!attr)
+ return 0;
+ r = sd_device_get_property_value(dev, attr, &val);
+ if (r < 0)
+ return r == -ENOENT ? 0 : r;
+ l = strpcpy(&s, l, val);
+ break;
+ default:
+ assert_not_reached("Unknown format substitution type");
+ }
+
+ return s - dest;
+}
+
+ssize_t udev_event_apply_format(UdevEvent *event,
+ const char *src, char *dest, size_t size,
+ bool replace_whitespace) {
+ const char *from;
+ char *s;
+ size_t l;
+
+ assert(event);
+ assert(event->dev);
+ assert(src);
+ assert(dest);
+ assert(size > 0);
+
+ from = src;
+ s = dest;
+ l = size;
+
+ for (;;) {
+ const struct subst_map_entry *entry = NULL;
+ char attrbuf[UTIL_PATH_SIZE], *attr;
+ bool format_dollar = false;
+ ssize_t subst_len;
+
+ while (from[0] != '\0') {
+ if (from[0] == '$') {
+ /* substitute named variable */
+ unsigned i;
+
+ if (from[1] == '$') {
+ from++;
+ goto copy;
+ }
+
+ for (i = 0; i < ELEMENTSOF(map); i++) {
+ if (startswith(&from[1], map[i].name)) {
+ entry = &map[i];
+ from += strlen(map[i].name)+1;
+ format_dollar = true;
+ goto subst;
+ }
+ }
+ } else if (from[0] == '%') {
+ /* substitute format char */
+ unsigned i;
+
+ if (from[1] == '%') {
+ from++;
+ goto copy;
+ }
+
+ for (i = 0; i < ELEMENTSOF(map); i++) {
+ if (from[1] == map[i].fmt) {
+ entry = &map[i];
+ from += 2;
+ goto subst;
+ }
+ }
+ }
+copy:
+ /* copy char */
+ if (l < 2) /* need space for this char and the terminating NUL */
+ goto out;
+ s[0] = from[0];
+ from++;
+ s++;
+ l--;
+ }
+
+ goto out;
+subst:
+ /* extract possible $format{attr} */
+ if (from[0] == '{') {
+ unsigned i;
+
+ from++;
+ for (i = 0; from[i] != '}'; i++)
+ if (from[i] == '\0') {
+ log_error("missing closing brace for format '%s'", src);
+ goto out;
+ }
+
+ if (i >= sizeof(attrbuf))
+ goto out;
+ memcpy(attrbuf, from, i);
+ attrbuf[i] = '\0';
+ from += i+1;
+ attr = attrbuf;
+ } else
+ attr = NULL;
+
+ subst_len = subst_format_var(event, entry, attr, s, l);
+ if (subst_len < 0) {
+ if (format_dollar)
+ log_device_warning_errno(event->dev, subst_len, "Failed to substitute variable '$%s', ignoring: %m", entry->name);
+ else
+ log_device_warning_errno(event->dev, subst_len, "Failed to apply format '%%%c', ignoring: %m", entry->fmt);
+
+ continue;
+ }
+
+ /* SUBST_RESULT handles spaces itself */
+ if (replace_whitespace && entry->type != SUBST_RESULT)
+ /* util_replace_whitespace can replace in-place,
+ * and does nothing if subst_len == 0
+ */
+ subst_len = util_replace_whitespace(s, s, subst_len);
+
+ s += subst_len;
+ l -= subst_len;
+ }
+
+out:
+ assert(l >= 1);
+ s[0] = '\0';
+ return l;
+}
+
+static int on_spawn_io(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Spawn *spawn = userdata;
+ char buf[4096], *p;
+ size_t size;
+ ssize_t l;
+
+ assert(spawn);
+ assert(fd == spawn->fd_stdout || fd == spawn->fd_stderr);
+ assert(!spawn->result || spawn->result_len < spawn->result_size);
+
+ if (fd == spawn->fd_stdout && spawn->result) {
+ p = spawn->result + spawn->result_len;
+ size = spawn->result_size - spawn->result_len;
+ } else {
+ p = buf;
+ size = sizeof(buf);
+ }
+
+ l = read(fd, p, size - 1);
+ if (l < 0) {
+ if (errno != EAGAIN)
+ log_error_errno(errno, "Failed to read stdout of '%s': %m", spawn->cmd);
+
+ return 0;
+ }
+
+ p[l] = '\0';
+ if (fd == spawn->fd_stdout && spawn->result)
+ spawn->result_len += l;
+
+ /* Log output only if we watch stderr. */
+ if (l > 0 && spawn->fd_stderr >= 0) {
+ _cleanup_strv_free_ char **v = NULL;
+ char **q;
+
+ v = strv_split_newlines(p);
+ if (!v)
+ return 0;
+
+ STRV_FOREACH(q, v)
+ log_debug("'%s'(%s) '%s'", spawn->cmd,
+ fd == spawn->fd_stdout ? "out" : "err", *q);
+ }
+
+ return 0;
+}
+
+static int on_spawn_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
+ Spawn *spawn = userdata;
+ char timeout[FORMAT_TIMESPAN_MAX];
+
+ assert(spawn);
+
+ kill_and_sigcont(spawn->pid, SIGKILL);
+
+ log_error("Spawned process '%s' ["PID_FMT"] timed out after %s, killing", spawn->cmd, spawn->pid,
+ format_timespan(timeout, sizeof(timeout), spawn->timeout_usec, USEC_PER_SEC));
+
+ return 1;
+}
+
+static int on_spawn_timeout_warning(sd_event_source *s, uint64_t usec, void *userdata) {
+ Spawn *spawn = userdata;
+ char timeout[FORMAT_TIMESPAN_MAX];
+
+ assert(spawn);
+
+ log_warning("Spawned process '%s' ["PID_FMT"] is taking longer than %s to complete", spawn->cmd, spawn->pid,
+ format_timespan(timeout, sizeof(timeout), spawn->timeout_warn_usec, USEC_PER_SEC));
+
+ return 1;
+}
+
+static int on_spawn_sigchld(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ Spawn *spawn = userdata;
+ int ret = -EIO;
+
+ assert(spawn);
+
+ switch (si->si_code) {
+ case CLD_EXITED:
+ if (si->si_status == 0)
+ log_debug("Process '%s' succeeded.", spawn->cmd);
+ else
+ log_full(spawn->accept_failure ? LOG_DEBUG : LOG_WARNING,
+ "Process '%s' failed with exit code %i.", spawn->cmd, si->si_status);
+ ret = si->si_status;
+ break;
+ case CLD_KILLED:
+ case CLD_DUMPED:
+ log_error("Process '%s' terminated by signal %s.", spawn->cmd, signal_to_string(si->si_status));
+ break;
+ default:
+ log_error("Process '%s' failed due to unknown reason.", spawn->cmd);
+ }
+
+ sd_event_exit(sd_event_source_get_event(s), ret);
+ return 1;
+}
+
+static int spawn_wait(Spawn *spawn) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ int r;
+
+ assert(spawn);
+
+ r = sd_event_new(&e);
+ if (r < 0)
+ return r;
+
+ if (spawn->timeout_usec > 0) {
+ usec_t usec, age_usec;
+
+ usec = now(CLOCK_MONOTONIC);
+ age_usec = usec - spawn->event_birth_usec;
+ if (age_usec < spawn->timeout_usec) {
+ if (spawn->timeout_warn_usec > 0 &&
+ spawn->timeout_warn_usec < spawn->timeout_usec &&
+ spawn->timeout_warn_usec > age_usec) {
+ spawn->timeout_warn_usec -= age_usec;
+
+ r = sd_event_add_time(e, NULL, CLOCK_MONOTONIC,
+ usec + spawn->timeout_warn_usec, USEC_PER_SEC,
+ on_spawn_timeout_warning, spawn);
+ if (r < 0)
+ return r;
+ }
+
+ spawn->timeout_usec -= age_usec;
+
+ r = sd_event_add_time(e, NULL, CLOCK_MONOTONIC,
+ usec + spawn->timeout_usec, USEC_PER_SEC, on_spawn_timeout, spawn);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (spawn->fd_stdout >= 0) {
+ r = sd_event_add_io(e, NULL, spawn->fd_stdout, EPOLLIN, on_spawn_io, spawn);
+ if (r < 0)
+ return r;
+ }
+
+ if (spawn->fd_stderr >= 0) {
+ r = sd_event_add_io(e, NULL, spawn->fd_stderr, EPOLLIN, on_spawn_io, spawn);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_add_child(e, NULL, spawn->pid, WEXITED, on_spawn_sigchld, spawn);
+ if (r < 0)
+ return r;
+
+ return sd_event_loop(e);
+}
+
+int udev_event_spawn(UdevEvent *event,
+ usec_t timeout_usec,
+ bool accept_failure,
+ const char *cmd,
+ char *result, size_t ressize) {
+ _cleanup_close_pair_ int outpipe[2] = {-1, -1}, errpipe[2] = {-1, -1};
+ _cleanup_strv_free_ char **argv = NULL;
+ char **envp = NULL;
+ Spawn spawn;
+ pid_t pid;
+ int r;
+
+ assert(event);
+ assert(event->dev);
+ assert(result || ressize == 0);
+
+ /* pipes from child to parent */
+ if (result || log_get_max_level() >= LOG_INFO)
+ if (pipe2(outpipe, O_NONBLOCK|O_CLOEXEC) != 0)
+ return log_error_errno(errno, "Failed to create pipe for command '%s': %m", cmd);
+
+ if (log_get_max_level() >= LOG_INFO)
+ if (pipe2(errpipe, O_NONBLOCK|O_CLOEXEC) != 0)
+ return log_error_errno(errno, "Failed to create pipe for command '%s': %m", cmd);
+
+ argv = strv_split_full(cmd, NULL, SPLIT_QUOTES|SPLIT_RELAX);
+ if (!argv)
+ return log_oom();
+
+ if (isempty(argv[0]))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid command '%s'", cmd);
+
+ /* allow programs in /usr/lib/udev/ to be called without the path */
+ if (!path_is_absolute(argv[0])) {
+ char *program;
+
+ program = path_join(UDEVLIBEXECDIR, argv[0]);
+ if (!program)
+ return log_oom();
+
+ free_and_replace(argv[0], program);
+ }
+
+ r = device_get_properties_strv(event->dev, &envp);
+ if (r < 0)
+ return log_device_error_errno(event->dev, r, "Failed to get device properties");
+
+ log_debug("Starting '%s'", cmd);
+
+ r = safe_fork("(spawn)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fork() to execute command '%s': %m", cmd);
+ if (r == 0) {
+ if (rearrange_stdio(-1, outpipe[WRITE_END], errpipe[WRITE_END]) < 0)
+ _exit(EXIT_FAILURE);
+
+ (void) close_all_fds(NULL, 0);
+ (void) rlimit_nofile_safe();
+
+ execve(argv[0], argv, envp);
+ _exit(EXIT_FAILURE);
+ }
+
+ /* parent closed child's ends of pipes */
+ outpipe[WRITE_END] = safe_close(outpipe[WRITE_END]);
+ errpipe[WRITE_END] = safe_close(errpipe[WRITE_END]);
+
+ spawn = (Spawn) {
+ .cmd = cmd,
+ .pid = pid,
+ .accept_failure = accept_failure,
+ .timeout_warn_usec = udev_warn_timeout(timeout_usec),
+ .timeout_usec = timeout_usec,
+ .event_birth_usec = event->birth_usec,
+ .fd_stdout = outpipe[READ_END],
+ .fd_stderr = errpipe[READ_END],
+ .result = result,
+ .result_size = ressize,
+ };
+ r = spawn_wait(&spawn);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for spawned command '%s': %m", cmd);
+
+ if (result)
+ result[spawn.result_len] = '\0';
+
+ return r; /* 0 for success, and positive if the program failed */
+}
+
+static int rename_netif(UdevEvent *event) {
+ sd_device *dev = event->dev;
+ const char *action, *oldname;
+ char name[IFNAMSIZ];
+ int ifindex, r;
+
+ if (!event->name)
+ return 0; /* No new name is requested. */
+
+ r = sd_device_get_sysname(dev, &oldname);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get sysname: %m");
+
+ if (streq(event->name, oldname))
+ return 0; /* The interface name is already requested name. */
+
+ r = sd_device_get_property_value(dev, "ACTION", &action);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get property 'ACTION': %m");
+
+ if (!streq(action, "add"))
+ return 0; /* Rename the interface only when it is added. */
+
+ r = sd_device_get_ifindex(dev, &ifindex);
+ if (r == -ENOENT)
+ return 0; /* Device is not a network interface. */
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get ifindex: %m");
+
+ strscpy(name, IFNAMSIZ, event->name);
+ r = rtnl_set_link_name(&event->rtnl, ifindex, name);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to rename network interface %i from '%s' to '%s': %m", ifindex, oldname, name);
+
+ r = device_rename(dev, event->name);
+ if (r < 0)
+ return log_warning_errno(r, "Network interface %i is renamed from '%s' to '%s', but could not update sd_device object: %m", ifindex, oldname, name);
+
+ log_device_debug(dev, "Network interface %i is renamed from '%s' to '%s'", ifindex, oldname, name);
+
+ return 1;
+}
+
+static int update_devnode(UdevEvent *event) {
+ sd_device *dev = event->dev;
+ const char *action;
+ bool apply;
+ int r;
+
+ r = sd_device_get_devnum(dev, NULL);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get devnum: %m");
+
+ /* remove/update possible left-over symlinks from old database entry */
+ if (event->dev_db_clone)
+ (void) udev_node_update_old_links(dev, event->dev_db_clone);
+
+ if (!event->owner_set) {
+ r = device_get_devnode_uid(dev, &event->uid);
+ if (r < 0 && r != -ENOENT)
+ return log_device_error_errno(dev, r, "Failed to get devnode UID: %m");
+ }
+
+ if (!event->group_set) {
+ r = device_get_devnode_gid(dev, &event->gid);
+ if (r < 0 && r != -ENOENT)
+ return log_device_error_errno(dev, r, "Failed to get devnode GID: %m");
+ }
+
+ if (!event->mode_set) {
+ r = device_get_devnode_mode(dev, &event->mode);
+ if (r < 0 && r != -ENOENT)
+ return log_device_error_errno(dev, r, "Failed to get devnode mode: %m");
+ if (r == -ENOENT) {
+ if (event->gid > 0)
+ /* default 0660 if a group is assigned */
+ event->mode = 0660;
+ else
+ /* default 0600 */
+ event->mode = 0600;
+ }
+ }
+
+ r = sd_device_get_property_value(dev, "ACTION", &action);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get property 'ACTION': %m");
+
+ apply = streq(action, "add") || event->owner_set || event->group_set || event->mode_set;
+ return udev_node_add(dev, apply, event->mode, event->uid, event->gid, event->seclabel_list);
+}
+
+static void event_execute_rules_on_remove(
+ UdevEvent *event,
+ usec_t timeout_usec,
+ Hashmap *properties_list,
+ UdevRules *rules) {
+
+ sd_device *dev = event->dev;
+ int r;
+
+ r = device_read_db_internal(dev, true);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to read database under /run/udev/data/: %m");
+
+ r = device_tag_index(dev, NULL, false);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to remove corresponding tag files under /run/udev/tag/, ignoring: %m");
+
+ r = device_delete_db(dev);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to delete database under /run/udev/data/, ignoring: %m");
+
+ if (sd_device_get_devnum(dev, NULL) >= 0)
+ (void) udev_watch_end(dev);
+
+ (void) udev_rules_apply_to_event(rules, event, timeout_usec, properties_list);
+
+ if (sd_device_get_devnum(dev, NULL) >= 0)
+ (void) udev_node_remove(dev);
+}
+
+int udev_event_execute_rules(UdevEvent *event,
+ usec_t timeout_usec,
+ Hashmap *properties_list,
+ UdevRules *rules) {
+ sd_device *dev = event->dev;
+ const char *subsystem, *action;
+ int r;
+
+ assert(event);
+ assert(rules);
+
+ r = sd_device_get_subsystem(dev, &subsystem);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get subsystem: %m");
+
+ r = sd_device_get_property_value(dev, "ACTION", &action);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get property 'ACTION': %m");
+
+ if (streq(action, "remove")) {
+ event_execute_rules_on_remove(event, timeout_usec, properties_list, rules);
+ return 0;
+ }
+
+ r = device_clone_with_db(dev, &event->dev_db_clone);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to clone sd_device object, ignoring: %m");
+
+ if (event->dev_db_clone) {
+ r = sd_device_get_devnum(dev, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ log_device_debug_errno(dev, r, "Failed to get devnum, ignoring: %m");
+
+ if (streq(action, "move")) {
+ r = device_copy_properties(dev, event->dev_db_clone);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to copy properties from cloned device, ignoring: %m");
+ }
+ } else
+ /* Disable watch during event processing. */
+ (void) udev_watch_end(event->dev_db_clone);
+ }
+
+ (void) udev_rules_apply_to_event(rules, event, timeout_usec, properties_list);
+
+ (void) rename_netif(event);
+ (void) update_devnode(event);
+
+ /* preserve old, or get new initialization timestamp */
+ r = device_ensure_usec_initialized(dev, event->dev_db_clone);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to set initialization timestamp, ignoring: %m");
+
+ /* (re)write database file */
+ r = device_tag_index(dev, event->dev_db_clone, true);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to update tags under /run/udev/tag/, ignoring: %m");
+
+ r = device_update_db(dev);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to update database under /run/udev/data/, ignoring: %m");
+
+ device_set_is_initialized(dev);
+
+ event->dev_db_clone = sd_device_unref(event->dev_db_clone);
+
+ return 0;
+}
+
+void udev_event_execute_run(UdevEvent *event, usec_t timeout_usec) {
+ const char *cmd;
+ void *val;
+ Iterator i;
+
+ HASHMAP_FOREACH_KEY(val, cmd, event->run_list, i) {
+ enum udev_builtin_cmd builtin_cmd = PTR_TO_INT(val);
+ char command[UTIL_PATH_SIZE];
+
+ udev_event_apply_format(event, cmd, command, sizeof(command), false);
+
+ if (builtin_cmd >= 0 && builtin_cmd < _UDEV_BUILTIN_MAX)
+ udev_builtin_run(event->dev, builtin_cmd, command, false);
+ else {
+ if (event->exec_delay_usec > 0) {
+ log_debug("delay execution of '%s'", command);
+ (void) usleep(event->exec_delay_usec);
+ }
+
+ (void) udev_event_spawn(event, timeout_usec, false, command, NULL, 0);
+ }
+ }
+}
diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c
new file mode 100644
index 0000000..1c00dd1
--- /dev/null
+++ b/src/udev/udev-node.c
@@ -0,0 +1,445 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "device-nodes.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "libudev-util.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-node.h"
+
+static int node_symlink(sd_device *dev, const char *node, const char *slink) {
+ _cleanup_free_ char *slink_dirname = NULL, *target = NULL;
+ const char *id_filename, *slink_tmp;
+ struct stat stats;
+ int r;
+
+ assert(dev);
+ assert(node);
+ assert(slink);
+
+ slink_dirname = dirname_malloc(slink);
+ if (!slink_dirname)
+ return log_oom();
+
+ /* use relative link */
+ r = path_make_relative(slink_dirname, node, &target);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get relative path from '%s' to '%s': %m", slink, node);
+
+ /* preserve link with correct target, do not replace node of other device */
+ if (lstat(slink, &stats) == 0) {
+ if (S_ISBLK(stats.st_mode) || S_ISCHR(stats.st_mode)) {
+ log_device_error(dev, "Conflicting device node '%s' found, link to '%s' will not be created.", slink, node);
+ return -EOPNOTSUPP;
+ } else if (S_ISLNK(stats.st_mode)) {
+ _cleanup_free_ char *buf = NULL;
+
+ if (readlink_malloc(slink, &buf) >= 0 &&
+ streq(target, buf)) {
+ log_device_debug(dev, "Preserve already existing symlink '%s' to '%s'", slink, target);
+ (void) label_fix(slink, LABEL_IGNORE_ENOENT);
+ (void) utimensat(AT_FDCWD, slink, NULL, AT_SYMLINK_NOFOLLOW);
+ return 0;
+ }
+ }
+ } else {
+ log_device_debug(dev, "Creating symlink '%s' to '%s'", slink, target);
+ do {
+ r = mkdir_parents_label(slink, 0755);
+ if (!IN_SET(r, 0, -ENOENT))
+ break;
+ mac_selinux_create_file_prepare(slink, S_IFLNK);
+ if (symlink(target, slink) < 0)
+ r = -errno;
+ mac_selinux_create_file_clear();
+ } while (r == -ENOENT);
+ if (r == 0)
+ return 0;
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to create symlink '%s' to '%s', trying to replace '%s': %m", slink, target, slink);
+ }
+
+ log_device_debug(dev, "Atomically replace '%s'", slink);
+ r = device_get_id_filename(dev, &id_filename);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get id_filename: %m");
+ slink_tmp = strjoina(slink, ".tmp-", id_filename);
+ (void) unlink(slink_tmp);
+ do {
+ r = mkdir_parents_label(slink_tmp, 0755);
+ if (!IN_SET(r, 0, -ENOENT))
+ break;
+ mac_selinux_create_file_prepare(slink_tmp, S_IFLNK);
+ if (symlink(target, slink_tmp) < 0)
+ r = -errno;
+ mac_selinux_create_file_clear();
+ } while (r == -ENOENT);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to create symlink '%s' to '%s': %m", slink_tmp, target);
+
+ if (rename(slink_tmp, slink) < 0) {
+ r = log_device_error_errno(dev, errno, "Failed to rename '%s' to '%s' failed: %m", slink_tmp, slink);
+ (void) unlink(slink_tmp);
+ }
+
+ return r;
+}
+
+/* find device node of device with highest priority */
+static int link_find_prioritized(sd_device *dev, bool add, const char *stackdir, char **ret) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ _cleanup_free_ char *target = NULL;
+ struct dirent *dent;
+ int r, priority = 0;
+
+ assert(!add || dev);
+ assert(stackdir);
+ assert(ret);
+
+ if (add) {
+ const char *devnode;
+
+ r = device_get_devlink_priority(dev, &priority);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devname(dev, &devnode);
+ if (r < 0)
+ return r;
+
+ target = strdup(devnode);
+ if (!target)
+ return -ENOMEM;
+ }
+
+ dir = opendir(stackdir);
+ if (!dir) {
+ if (target) {
+ *ret = TAKE_PTR(target);
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(dent, dir, break) {
+ _cleanup_(sd_device_unrefp) sd_device *dev_db = NULL;
+ const char *devnode, *id_filename;
+ int db_prio = 0;
+
+ if (dent->d_name[0] == '\0')
+ break;
+ if (dent->d_name[0] == '.')
+ continue;
+
+ log_device_debug(dev, "Found '%s' claiming '%s'", dent->d_name, stackdir);
+
+ if (device_get_id_filename(dev, &id_filename) < 0)
+ continue;
+
+ /* did we find ourself? */
+ if (streq(dent->d_name, id_filename))
+ continue;
+
+ if (sd_device_new_from_device_id(&dev_db, dent->d_name) < 0)
+ continue;
+
+ if (sd_device_get_devname(dev_db, &devnode) < 0)
+ continue;
+
+ if (device_get_devlink_priority(dev_db, &db_prio) < 0)
+ continue;
+
+ if (target && db_prio <= priority)
+ continue;
+
+ log_device_debug(dev_db, "Device claims priority %i for '%s'", db_prio, stackdir);
+
+ r = free_and_strdup(&target, devnode);
+ if (r < 0)
+ return r;
+ priority = db_prio;
+ }
+
+ if (!target)
+ return -ENOENT;
+
+ *ret = TAKE_PTR(target);
+ return 0;
+}
+
+/* manage "stack of names" with possibly specified device priorities */
+static int link_update(sd_device *dev, const char *slink, bool add) {
+ _cleanup_free_ char *target = NULL, *filename = NULL, *dirname = NULL;
+ char name_enc[PATH_MAX];
+ const char *id_filename;
+ int r;
+
+ assert(dev);
+ assert(slink);
+
+ r = device_get_id_filename(dev, &id_filename);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get id_filename: %m");
+
+ util_path_encode(slink + STRLEN("/dev"), name_enc, sizeof(name_enc));
+ dirname = path_join("/run/udev/links/", name_enc);
+ if (!dirname)
+ return log_oom();
+ filename = path_join(dirname, id_filename);
+ if (!filename)
+ return log_oom();
+
+ if (!add && unlink(filename) == 0)
+ (void) rmdir(dirname);
+
+ r = link_find_prioritized(dev, add, dirname, &target);
+ if (r < 0) {
+ log_device_debug(dev, "No reference left, removing '%s'", slink);
+ if (unlink(slink) == 0)
+ (void) rmdir_parents(slink, "/");
+ } else
+ (void) node_symlink(dev, target, slink);
+
+ if (add)
+ do {
+ _cleanup_close_ int fd = -1;
+
+ r = mkdir_parents(filename, 0755);
+ if (!IN_SET(r, 0, -ENOENT))
+ break;
+ fd = open(filename, O_WRONLY|O_CREAT|O_CLOEXEC|O_TRUNC|O_NOFOLLOW, 0444);
+ if (fd < 0)
+ r = -errno;
+ } while (r == -ENOENT);
+
+ return r;
+}
+
+int udev_node_update_old_links(sd_device *dev, sd_device *dev_old) {
+ const char *name, *devpath;
+ int r;
+
+ assert(dev);
+ assert(dev_old);
+
+ r = sd_device_get_devpath(dev, &devpath);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get devpath: %m");
+
+ /* update possible left-over symlinks */
+ FOREACH_DEVICE_DEVLINK(dev_old, name) {
+ const char *name_current;
+ bool found = false;
+
+ /* check if old link name still belongs to this device */
+ FOREACH_DEVICE_DEVLINK(dev, name_current)
+ if (streq(name, name_current)) {
+ found = true;
+ break;
+ }
+
+ if (found)
+ continue;
+
+ log_device_debug(dev, "Updating old name, '%s' no longer belonging to '%s'",
+ name, devpath);
+ link_update(dev, name, false);
+ }
+
+ return 0;
+}
+
+static int node_permissions_apply(sd_device *dev, bool apply,
+ mode_t mode, uid_t uid, gid_t gid,
+ Hashmap *seclabel_list) {
+ const char *devnode, *subsystem, *id_filename = NULL;
+ struct stat stats;
+ dev_t devnum;
+ int r = 0;
+
+ assert(dev);
+
+ r = sd_device_get_devname(dev, &devnode);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get devname: %m");
+ r = sd_device_get_subsystem(dev, &subsystem);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get subsystem: %m");
+ r = sd_device_get_devnum(dev, &devnum);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get devnum: %m");
+ (void) device_get_id_filename(dev, &id_filename);
+
+ if (streq(subsystem, "block"))
+ mode |= S_IFBLK;
+ else
+ mode |= S_IFCHR;
+
+ if (lstat(devnode, &stats) < 0)
+ return log_device_debug_errno(dev, errno, "cannot stat() node '%s' (%m)", devnode);
+
+ if (((stats.st_mode & S_IFMT) != (mode & S_IFMT)) || (stats.st_rdev != devnum))
+ return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EEXIST), "Found node '%s' with non-matching devnum %s, skip handling",
+ devnode, id_filename);
+
+ if (apply) {
+ bool selinux = false, smack = false;
+ const char *name, *label;
+ Iterator i;
+
+ if ((stats.st_mode & 0777) != (mode & 0777) || stats.st_uid != uid || stats.st_gid != gid) {
+ log_device_debug(dev, "Setting permissions %s, %#o, uid=%u, gid=%u", devnode, mode, uid, gid);
+ if (chmod(devnode, mode) < 0)
+ r = log_device_warning_errno(dev, errno, "Failed to set mode of %s to %#o: %m", devnode, mode);
+ if (chown(devnode, uid, gid) < 0)
+ r = log_device_warning_errno(dev, errno, "Failed to set owner of %s to uid=%u, gid=%u: %m", devnode, uid, gid);
+ } else
+ log_device_debug(dev, "Preserve permissions of %s, %#o, uid=%u, gid=%u", devnode, mode, uid, gid);
+
+ /* apply SECLABEL{$module}=$label */
+ HASHMAP_FOREACH_KEY(label, name, seclabel_list, i) {
+ int q;
+
+ if (streq(name, "selinux")) {
+ selinux = true;
+
+ q = mac_selinux_apply(devnode, label);
+ if (q < 0)
+ log_device_error_errno(dev, q, "SECLABEL: failed to set SELinux label '%s': %m", label);
+ else
+ log_device_debug(dev, "SECLABEL: set SELinux label '%s'", label);
+
+ } else if (streq(name, "smack")) {
+ smack = true;
+
+ q = mac_smack_apply(devnode, SMACK_ATTR_ACCESS, label);
+ if (q < 0)
+ log_device_error_errno(dev, q, "SECLABEL: failed to set SMACK label '%s': %m", label);
+ else
+ log_device_debug(dev, "SECLABEL: set SMACK label '%s'", label);
+
+ } else
+ log_device_error(dev, "SECLABEL: unknown subsystem, ignoring '%s'='%s'", name, label);
+ }
+
+ /* set the defaults */
+ if (!selinux)
+ (void) mac_selinux_fix(devnode, LABEL_IGNORE_ENOENT);
+ if (!smack)
+ (void) mac_smack_apply(devnode, SMACK_ATTR_ACCESS, NULL);
+ }
+
+ /* always update timestamp when we re-use the node, like on media change events */
+ (void) utimensat(AT_FDCWD, devnode, NULL, 0);
+
+ return r;
+}
+
+static int xsprintf_dev_num_path_from_sd_device(sd_device *dev, char **ret) {
+ char filename[DEV_NUM_PATH_MAX], *s;
+ const char *subsystem;
+ dev_t devnum;
+ int r;
+
+ assert(ret);
+
+ r = sd_device_get_subsystem(dev, &subsystem);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devnum(dev, &devnum);
+ if (r < 0)
+ return r;
+
+ xsprintf_dev_num_path(filename,
+ streq(subsystem, "block") ? "block" : "char",
+ devnum);
+
+ s = strdup(filename);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int udev_node_add(sd_device *dev, bool apply,
+ mode_t mode, uid_t uid, gid_t gid,
+ Hashmap *seclabel_list) {
+ const char *devnode, *devlink;
+ _cleanup_free_ char *filename = NULL;
+ int r;
+
+ assert(dev);
+
+ r = sd_device_get_devname(dev, &devnode);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get devnode: %m");
+
+ if (DEBUG_LOGGING) {
+ const char *id_filename = NULL;
+
+ (void) device_get_id_filename(dev, &id_filename);
+ log_device_debug(dev, "Handling device node '%s', devnum=%s, mode=%#o, uid="UID_FMT", gid="GID_FMT,
+ devnode, strnull(id_filename), mode, uid, gid);
+ }
+
+ r = node_permissions_apply(dev, apply, mode, uid, gid, seclabel_list);
+ if (r < 0)
+ return r;
+
+ r = xsprintf_dev_num_path_from_sd_device(dev, &filename);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get device path: %m");
+
+ /* always add /dev/{block,char}/$major:$minor */
+ (void) node_symlink(dev, devnode, filename);
+
+ /* create/update symlinks, add symlinks to name index */
+ FOREACH_DEVICE_DEVLINK(dev, devlink)
+ (void) link_update(dev, devlink, true);
+
+ return 0;
+}
+
+int udev_node_remove(sd_device *dev) {
+ _cleanup_free_ char *filename = NULL;
+ const char *devlink;
+ int r;
+
+ assert(dev);
+
+ /* remove/update symlinks, remove symlinks from name index */
+ FOREACH_DEVICE_DEVLINK(dev, devlink)
+ (void) link_update(dev, devlink, false);
+
+ r = xsprintf_dev_num_path_from_sd_device(dev, &filename);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get device path: %m");
+
+ /* remove /dev/{block,char}/$major:$minor */
+ (void) unlink(filename);
+
+ return 0;
+}
diff --git a/src/udev/udev-node.h b/src/udev/udev-node.h
new file mode 100644
index 0000000..223c8f0
--- /dev/null
+++ b/src/udev/udev-node.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+#include "hashmap.h"
+
+int udev_node_add(sd_device *dev, bool apply,
+ mode_t mode, uid_t uid, gid_t gid,
+ Hashmap *seclabel_list);
+int udev_node_remove(sd_device *dev);
+int udev_node_update_old_links(sd_device *dev, sd_device *dev_old);
diff --git a/src/udev/udev-rules.c b/src/udev/udev-rules.c
new file mode 100644
index 0000000..bc9c6c2
--- /dev/null
+++ b/src/udev/udev-rules.c
@@ -0,0 +1,2664 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "libudev-util.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "strbuf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "sysctl-util.h"
+#include "udev-builtin.h"
+#include "udev.h"
+#include "user-util.h"
+#include "util.h"
+
+#define PREALLOC_TOKEN 2048
+
+struct uid_gid {
+ unsigned name_off;
+ union {
+ uid_t uid;
+ gid_t gid;
+ };
+};
+
+static const char* const rules_dirs[] = {
+ "/etc/udev/rules.d",
+ "/run/udev/rules.d",
+ UDEVLIBEXECDIR "/rules.d",
+ NULL
+};
+
+struct UdevRules {
+ usec_t dirs_ts_usec;
+ ResolveNameTiming resolve_name_timing;
+
+ /* every key in the rules file becomes a token */
+ struct token *tokens;
+ unsigned token_cur;
+ unsigned token_max;
+
+ /* all key strings are copied and de-duplicated in a single continuous string buffer */
+ struct strbuf *strbuf;
+
+ /* during rule parsing, uid/gid lookup results are cached */
+ struct uid_gid *uids;
+ unsigned uids_cur;
+ unsigned uids_max;
+ struct uid_gid *gids;
+ unsigned gids_cur;
+ unsigned gids_max;
+};
+
+static char *rules_str(UdevRules *rules, unsigned off) {
+ return rules->strbuf->buf + off;
+}
+
+static unsigned rules_add_string(UdevRules *rules, const char *s) {
+ return strbuf_add_string(rules->strbuf, s, strlen(s));
+}
+
+/* KEY=="", KEY!="", KEY+="", KEY-="", KEY="", KEY:="" */
+enum operation_type {
+ OP_UNSET,
+
+ OP_MATCH,
+ OP_NOMATCH,
+ OP_MATCH_MAX,
+
+ OP_ADD,
+ OP_REMOVE,
+ OP_ASSIGN,
+ OP_ASSIGN_FINAL,
+};
+
+enum string_glob_type {
+ GL_UNSET,
+ GL_PLAIN, /* no special chars */
+ GL_GLOB, /* shell globs ?,*,[] */
+ GL_SPLIT, /* multi-value A|B */
+ GL_SPLIT_GLOB, /* multi-value with glob A*|B* */
+ GL_SOMETHING, /* commonly used "?*" */
+};
+
+enum string_subst_type {
+ SB_UNSET,
+ SB_NONE,
+ SB_FORMAT,
+ SB_SUBSYS,
+};
+
+/* tokens of a rule are sorted/handled in this order */
+enum token_type {
+ TK_UNSET,
+ TK_RULE,
+
+ TK_M_ACTION, /* val */
+ TK_M_DEVPATH, /* val */
+ TK_M_KERNEL, /* val */
+ TK_M_DEVLINK, /* val */
+ TK_M_NAME, /* val */
+ TK_M_ENV, /* val, attr */
+ TK_M_TAG, /* val */
+ TK_M_SUBSYSTEM, /* val */
+ TK_M_DRIVER, /* val */
+ TK_M_WAITFOR, /* val */
+ TK_M_ATTR, /* val, attr */
+ TK_M_SYSCTL, /* val, attr */
+
+ TK_M_PARENTS_MIN,
+ TK_M_KERNELS, /* val */
+ TK_M_SUBSYSTEMS, /* val */
+ TK_M_DRIVERS, /* val */
+ TK_M_ATTRS, /* val, attr */
+ TK_M_TAGS, /* val */
+ TK_M_PARENTS_MAX,
+
+ TK_M_TEST, /* val, mode_t */
+ TK_M_PROGRAM, /* val */
+ TK_M_IMPORT_FILE, /* val */
+ TK_M_IMPORT_PROG, /* val */
+ TK_M_IMPORT_BUILTIN, /* val */
+ TK_M_IMPORT_DB, /* val */
+ TK_M_IMPORT_CMDLINE, /* val */
+ TK_M_IMPORT_PARENT, /* val */
+ TK_M_RESULT, /* val */
+ TK_M_MAX,
+
+ TK_A_STRING_ESCAPE_NONE,
+ TK_A_STRING_ESCAPE_REPLACE,
+ TK_A_DB_PERSIST,
+ TK_A_INOTIFY_WATCH, /* int */
+ TK_A_DEVLINK_PRIO, /* int */
+ TK_A_OWNER, /* val */
+ TK_A_GROUP, /* val */
+ TK_A_MODE, /* val */
+ TK_A_OWNER_ID, /* uid_t */
+ TK_A_GROUP_ID, /* gid_t */
+ TK_A_MODE_ID, /* mode_t */
+ TK_A_TAG, /* val */
+ TK_A_STATIC_NODE, /* val */
+ TK_A_SECLABEL, /* val, attr */
+ TK_A_ENV, /* val, attr */
+ TK_A_NAME, /* val */
+ TK_A_DEVLINK, /* val */
+ TK_A_ATTR, /* val, attr */
+ TK_A_SYSCTL, /* val, attr */
+ TK_A_RUN_BUILTIN, /* val, bool */
+ TK_A_RUN_PROGRAM, /* val, bool */
+ TK_A_GOTO, /* size_t */
+
+ TK_END,
+};
+
+/* we try to pack stuff in a way that we take only 12 bytes per token */
+struct token {
+ union {
+ unsigned char type; /* same in rule and key */
+ struct {
+ enum token_type type:8;
+ bool can_set_name:1;
+ bool has_static_node:1;
+ unsigned unused:6;
+ unsigned short token_count;
+ unsigned label_off;
+ unsigned short filename_off;
+ unsigned short filename_line;
+ } rule;
+ struct {
+ enum token_type type:8;
+ enum operation_type op:8;
+ enum string_glob_type glob:8;
+ enum string_subst_type subst:4;
+ enum string_subst_type attrsubst:4;
+ unsigned value_off;
+ union {
+ unsigned attr_off;
+ unsigned rule_goto;
+ mode_t mode;
+ uid_t uid;
+ gid_t gid;
+ int devlink_prio;
+ int watch;
+ enum udev_builtin_cmd builtin_cmd;
+ };
+ } key;
+ };
+};
+
+#define MAX_TK 64
+struct rule_tmp {
+ UdevRules *rules;
+ struct token rule;
+ struct token token[MAX_TK];
+ unsigned token_cur;
+};
+
+#if ENABLE_DEBUG_UDEV
+static const char *operation_str(enum operation_type type) {
+ static const char *operation_strs[] = {
+ [OP_UNSET] = "UNSET",
+ [OP_MATCH] = "match",
+ [OP_NOMATCH] = "nomatch",
+ [OP_MATCH_MAX] = "MATCH_MAX",
+
+ [OP_ADD] = "add",
+ [OP_REMOVE] = "remove",
+ [OP_ASSIGN] = "assign",
+ [OP_ASSIGN_FINAL] = "assign-final",
+ };
+
+ return operation_strs[type];
+}
+
+static const char *string_glob_str(enum string_glob_type type) {
+ static const char *string_glob_strs[] = {
+ [GL_UNSET] = "UNSET",
+ [GL_PLAIN] = "plain",
+ [GL_GLOB] = "glob",
+ [GL_SPLIT] = "split",
+ [GL_SPLIT_GLOB] = "split-glob",
+ [GL_SOMETHING] = "split-glob",
+ };
+
+ return string_glob_strs[type];
+}
+
+static const char *token_str(enum token_type type) {
+ static const char *token_strs[] = {
+ [TK_UNSET] = "UNSET",
+ [TK_RULE] = "RULE",
+
+ [TK_M_ACTION] = "M ACTION",
+ [TK_M_DEVPATH] = "M DEVPATH",
+ [TK_M_KERNEL] = "M KERNEL",
+ [TK_M_DEVLINK] = "M DEVLINK",
+ [TK_M_NAME] = "M NAME",
+ [TK_M_ENV] = "M ENV",
+ [TK_M_TAG] = "M TAG",
+ [TK_M_SUBSYSTEM] = "M SUBSYSTEM",
+ [TK_M_DRIVER] = "M DRIVER",
+ [TK_M_WAITFOR] = "M WAITFOR",
+ [TK_M_ATTR] = "M ATTR",
+ [TK_M_SYSCTL] = "M SYSCTL",
+
+ [TK_M_PARENTS_MIN] = "M PARENTS_MIN",
+ [TK_M_KERNELS] = "M KERNELS",
+ [TK_M_SUBSYSTEMS] = "M SUBSYSTEMS",
+ [TK_M_DRIVERS] = "M DRIVERS",
+ [TK_M_ATTRS] = "M ATTRS",
+ [TK_M_TAGS] = "M TAGS",
+ [TK_M_PARENTS_MAX] = "M PARENTS_MAX",
+
+ [TK_M_TEST] = "M TEST",
+ [TK_M_PROGRAM] = "M PROGRAM",
+ [TK_M_IMPORT_FILE] = "M IMPORT_FILE",
+ [TK_M_IMPORT_PROG] = "M IMPORT_PROG",
+ [TK_M_IMPORT_BUILTIN] = "M IMPORT_BUILTIN",
+ [TK_M_IMPORT_DB] = "M IMPORT_DB",
+ [TK_M_IMPORT_CMDLINE] = "M IMPORT_CMDLINE",
+ [TK_M_IMPORT_PARENT] = "M IMPORT_PARENT",
+ [TK_M_RESULT] = "M RESULT",
+ [TK_M_MAX] = "M MAX",
+
+ [TK_A_STRING_ESCAPE_NONE] = "A STRING_ESCAPE_NONE",
+ [TK_A_STRING_ESCAPE_REPLACE] = "A STRING_ESCAPE_REPLACE",
+ [TK_A_DB_PERSIST] = "A DB_PERSIST",
+ [TK_A_INOTIFY_WATCH] = "A INOTIFY_WATCH",
+ [TK_A_DEVLINK_PRIO] = "A DEVLINK_PRIO",
+ [TK_A_OWNER] = "A OWNER",
+ [TK_A_GROUP] = "A GROUP",
+ [TK_A_MODE] = "A MODE",
+ [TK_A_OWNER_ID] = "A OWNER_ID",
+ [TK_A_GROUP_ID] = "A GROUP_ID",
+ [TK_A_STATIC_NODE] = "A STATIC_NODE",
+ [TK_A_SECLABEL] = "A SECLABEL",
+ [TK_A_MODE_ID] = "A MODE_ID",
+ [TK_A_ENV] = "A ENV",
+ [TK_A_TAG] = "A ENV",
+ [TK_A_NAME] = "A NAME",
+ [TK_A_DEVLINK] = "A DEVLINK",
+ [TK_A_ATTR] = "A ATTR",
+ [TK_A_SYSCTL] = "A SYSCTL",
+ [TK_A_RUN_BUILTIN] = "A RUN_BUILTIN",
+ [TK_A_RUN_PROGRAM] = "A RUN_PROGRAM",
+ [TK_A_GOTO] = "A GOTO",
+
+ [TK_END] = "END",
+ };
+
+ return token_strs[type];
+}
+
+static void dump_token(UdevRules *rules, struct token *token) {
+ enum token_type type = token->type;
+ enum operation_type op = token->key.op;
+ enum string_glob_type glob = token->key.glob;
+ const char *value = rules_str(rules, token->key.value_off);
+ const char *attr = &rules->strbuf->buf[token->key.attr_off];
+
+ switch (type) {
+ case TK_RULE:
+ {
+ const char *tks_ptr = (char *)rules->tokens;
+ const char *tk_ptr = (char *)token;
+ unsigned idx = (tk_ptr - tks_ptr) / sizeof(struct token);
+
+ log_debug("* RULE %s:%u, token: %u, count: %u, label: '%s'",
+ &rules->strbuf->buf[token->rule.filename_off], token->rule.filename_line,
+ idx, token->rule.token_count,
+ &rules->strbuf->buf[token->rule.label_off]);
+ break;
+ }
+ case TK_M_ACTION:
+ case TK_M_DEVPATH:
+ case TK_M_KERNEL:
+ case TK_M_SUBSYSTEM:
+ case TK_M_DRIVER:
+ case TK_M_WAITFOR:
+ case TK_M_DEVLINK:
+ case TK_M_NAME:
+ case TK_M_KERNELS:
+ case TK_M_SUBSYSTEMS:
+ case TK_M_DRIVERS:
+ case TK_M_TAGS:
+ case TK_M_PROGRAM:
+ case TK_M_IMPORT_FILE:
+ case TK_M_IMPORT_PROG:
+ case TK_M_IMPORT_DB:
+ case TK_M_IMPORT_CMDLINE:
+ case TK_M_IMPORT_PARENT:
+ case TK_M_RESULT:
+ case TK_A_NAME:
+ case TK_A_DEVLINK:
+ case TK_A_OWNER:
+ case TK_A_GROUP:
+ case TK_A_MODE:
+ case TK_A_RUN_BUILTIN:
+ case TK_A_RUN_PROGRAM:
+ log_debug("%s %s '%s'(%s)",
+ token_str(type), operation_str(op), value, string_glob_str(glob));
+ break;
+ case TK_M_IMPORT_BUILTIN:
+ log_debug("%s %i '%s'", token_str(type), token->key.builtin_cmd, value);
+ break;
+ case TK_M_ATTR:
+ case TK_M_SYSCTL:
+ case TK_M_ATTRS:
+ case TK_M_ENV:
+ case TK_A_ATTR:
+ case TK_A_SYSCTL:
+ case TK_A_ENV:
+ log_debug("%s %s '%s' '%s'(%s)",
+ token_str(type), operation_str(op), attr, value, string_glob_str(glob));
+ break;
+ case TK_M_TAG:
+ case TK_A_TAG:
+ log_debug("%s %s '%s'", token_str(type), operation_str(op), value);
+ break;
+ case TK_A_STRING_ESCAPE_NONE:
+ case TK_A_STRING_ESCAPE_REPLACE:
+ case TK_A_DB_PERSIST:
+ log_debug("%s", token_str(type));
+ break;
+ case TK_M_TEST:
+ log_debug("%s %s '%s'(%s) %#o",
+ token_str(type), operation_str(op), value, string_glob_str(glob), token->key.mode);
+ break;
+ case TK_A_INOTIFY_WATCH:
+ log_debug("%s %u", token_str(type), token->key.watch);
+ break;
+ case TK_A_DEVLINK_PRIO:
+ log_debug("%s %u", token_str(type), token->key.devlink_prio);
+ break;
+ case TK_A_OWNER_ID:
+ log_debug("%s %s %u", token_str(type), operation_str(op), token->key.uid);
+ break;
+ case TK_A_GROUP_ID:
+ log_debug("%s %s %u", token_str(type), operation_str(op), token->key.gid);
+ break;
+ case TK_A_MODE_ID:
+ log_debug("%s %s %#o", token_str(type), operation_str(op), token->key.mode);
+ break;
+ case TK_A_STATIC_NODE:
+ log_debug("%s '%s'", token_str(type), value);
+ break;
+ case TK_A_SECLABEL:
+ log_debug("%s %s '%s' '%s'", token_str(type), operation_str(op), attr, value);
+ break;
+ case TK_A_GOTO:
+ log_debug("%s '%s' %u", token_str(type), value, token->key.rule_goto);
+ break;
+ case TK_END:
+ log_debug("* %s", token_str(type));
+ break;
+ case TK_M_PARENTS_MIN:
+ case TK_M_PARENTS_MAX:
+ case TK_M_MAX:
+ case TK_UNSET:
+ log_debug("Unknown token type %u", type);
+ break;
+ }
+}
+
+static void dump_rules(UdevRules *rules) {
+ unsigned i;
+
+ log_debug("Dumping %u (%zu bytes) tokens, %zu (%zu bytes) strings",
+ rules->token_cur,
+ rules->token_cur * sizeof(struct token),
+ rules->strbuf->nodes_count,
+ rules->strbuf->len);
+ for (i = 0; i < rules->token_cur; i++)
+ dump_token(rules, &rules->tokens[i]);
+}
+#else
+static void dump_token(UdevRules *rules, struct token *token) {}
+static void dump_rules(UdevRules *rules) {}
+#endif /* ENABLE_DEBUG_UDEV */
+
+static int add_token(UdevRules *rules, struct token *token) {
+ /* grow buffer if needed */
+ if (rules->token_cur+1 >= rules->token_max) {
+ struct token *tokens;
+ unsigned add;
+
+ /* double the buffer size */
+ add = rules->token_max;
+ if (add < 8)
+ add = 8;
+
+ tokens = reallocarray(rules->tokens, rules->token_max + add, sizeof(struct token));
+ if (!tokens)
+ return -1;
+ rules->tokens = tokens;
+ rules->token_max += add;
+ }
+ memcpy(&rules->tokens[rules->token_cur], token, sizeof(struct token));
+ rules->token_cur++;
+ return 0;
+}
+
+static void log_unknown_owner(sd_device *dev, int error, const char *entity, const char *owner) {
+ if (IN_SET(abs(error), ENOENT, ESRCH))
+ log_device_error(dev, "Specified %s '%s' unknown", entity, owner);
+ else
+ log_device_error_errno(dev, error, "Failed to resolve %s '%s': %m", entity, owner);
+}
+
+static uid_t add_uid(UdevRules *rules, const char *owner) {
+ unsigned i;
+ uid_t uid = 0;
+ unsigned off;
+ int r;
+
+ /* lookup, if we know it already */
+ for (i = 0; i < rules->uids_cur; i++) {
+ off = rules->uids[i].name_off;
+ if (streq(rules_str(rules, off), owner)) {
+ uid = rules->uids[i].uid;
+ return uid;
+ }
+ }
+ r = get_user_creds(&owner, &uid, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING);
+ if (r < 0)
+ log_unknown_owner(NULL, r, "user", owner);
+
+ /* grow buffer if needed */
+ if (rules->uids_cur+1 >= rules->uids_max) {
+ struct uid_gid *uids;
+ unsigned add;
+
+ /* double the buffer size */
+ add = rules->uids_max;
+ if (add < 1)
+ add = 8;
+
+ uids = reallocarray(rules->uids, rules->uids_max + add, sizeof(struct uid_gid));
+ if (!uids)
+ return uid;
+ rules->uids = uids;
+ rules->uids_max += add;
+ }
+ rules->uids[rules->uids_cur].uid = uid;
+ off = rules_add_string(rules, owner);
+ if (off <= 0)
+ return uid;
+ rules->uids[rules->uids_cur].name_off = off;
+ rules->uids_cur++;
+ return uid;
+}
+
+static gid_t add_gid(UdevRules *rules, const char *group) {
+ unsigned i;
+ gid_t gid = 0;
+ unsigned off;
+ int r;
+
+ /* lookup, if we know it already */
+ for (i = 0; i < rules->gids_cur; i++) {
+ off = rules->gids[i].name_off;
+ if (streq(rules_str(rules, off), group)) {
+ gid = rules->gids[i].gid;
+ return gid;
+ }
+ }
+ r = get_group_creds(&group, &gid, USER_CREDS_ALLOW_MISSING);
+ if (r < 0)
+ log_unknown_owner(NULL, r, "group", group);
+
+ /* grow buffer if needed */
+ if (rules->gids_cur+1 >= rules->gids_max) {
+ struct uid_gid *gids;
+ unsigned add;
+
+ /* double the buffer size */
+ add = rules->gids_max;
+ if (add < 1)
+ add = 8;
+
+ gids = reallocarray(rules->gids, rules->gids_max + add, sizeof(struct uid_gid));
+ if (!gids)
+ return gid;
+ rules->gids = gids;
+ rules->gids_max += add;
+ }
+ rules->gids[rules->gids_cur].gid = gid;
+ off = rules_add_string(rules, group);
+ if (off <= 0)
+ return gid;
+ rules->gids[rules->gids_cur].name_off = off;
+ rules->gids_cur++;
+ return gid;
+}
+
+static int import_property_from_string(sd_device *dev, char *line) {
+ char *key;
+ char *val;
+ size_t len;
+
+ /* find key */
+ key = line;
+ while (isspace(key[0]))
+ key++;
+
+ /* comment or empty line */
+ if (IN_SET(key[0], '#', '\0'))
+ return 0;
+
+ /* split key/value */
+ val = strchr(key, '=');
+ if (!val)
+ return -EINVAL;
+ val[0] = '\0';
+ val++;
+
+ /* find value */
+ while (isspace(val[0]))
+ val++;
+
+ /* terminate key */
+ len = strlen(key);
+ if (len == 0)
+ return -EINVAL;
+ while (isspace(key[len-1]))
+ len--;
+ key[len] = '\0';
+
+ /* terminate value */
+ len = strlen(val);
+ if (len == 0)
+ return -EINVAL;
+ while (isspace(val[len-1]))
+ len--;
+ val[len] = '\0';
+
+ if (len == 0)
+ return -EINVAL;
+
+ /* unquote */
+ if (IN_SET(val[0], '"', '\'')) {
+ if (len == 1 || val[len-1] != val[0])
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Inconsistent quoting: '%s', skip",
+ line);
+ val[len-1] = '\0';
+ val++;
+ }
+
+ return device_add_property(dev, key, val);
+}
+
+static int import_file_into_properties(sd_device *dev, const char *filename) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ f = fopen(filename, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ (void) import_property_from_string(dev, line);
+ }
+
+ return 0;
+}
+
+static int import_program_into_properties(UdevEvent *event,
+ usec_t timeout_usec,
+ const char *program) {
+ char result[UTIL_LINE_SIZE];
+ char *line;
+ int r;
+
+ r = udev_event_spawn(event, timeout_usec, false, program, result, sizeof result);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return -EIO;
+
+ line = result;
+ while (line) {
+ char *pos;
+
+ pos = strchr(line, '\n');
+ if (pos) {
+ pos[0] = '\0';
+ pos = &pos[1];
+ }
+ (void) import_property_from_string(event->dev, line);
+ line = pos;
+ }
+ return 0;
+}
+
+static int import_parent_into_properties(sd_device *dev, const char *filter) {
+ const char *key, *val;
+ sd_device *parent;
+ int r;
+
+ assert(dev);
+ assert(filter);
+
+ r = sd_device_get_parent(dev, &parent);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE_PROPERTY(parent, key, val)
+ if (fnmatch(filter, key, 0) == 0)
+ device_add_property(dev, key, val);
+ return 0;
+}
+
+static void attr_subst_subdir(char *attr, size_t len) {
+ const char *pos, *tail, *path;
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+
+ pos = strstr(attr, "/*/");
+ if (!pos)
+ return;
+
+ tail = pos + 2;
+ path = strndupa(attr, pos - attr + 1); /* include slash at end */
+ dir = opendir(path);
+ if (!dir)
+ return;
+
+ FOREACH_DIRENT_ALL(dent, dir, break)
+ if (dent->d_name[0] != '.') {
+ char n[strlen(dent->d_name) + strlen(tail) + 1];
+
+ strscpyl(n, sizeof n, dent->d_name, tail, NULL);
+ if (faccessat(dirfd(dir), n, F_OK, 0) == 0) {
+ strscpyl(attr, len, path, n, NULL);
+ break;
+ }
+ }
+}
+
+static int get_key(char **line, char **key, enum operation_type *op, char **value) {
+ char *linepos;
+ char *temp;
+ unsigned i, j;
+
+ linepos = *line;
+ if (!linepos || linepos[0] == '\0')
+ return -1;
+
+ /* skip whitespace */
+ while (isspace(linepos[0]) || linepos[0] == ',')
+ linepos++;
+
+ /* get the key */
+ if (linepos[0] == '\0')
+ return -1;
+ *key = linepos;
+
+ for (;;) {
+ linepos++;
+ if (linepos[0] == '\0')
+ return -1;
+ if (isspace(linepos[0]))
+ break;
+ if (linepos[0] == '=')
+ break;
+ if (IN_SET(linepos[0], '+', '-', '!', ':'))
+ if (linepos[1] == '=')
+ break;
+ }
+
+ /* remember end of key */
+ temp = linepos;
+
+ /* skip whitespace after key */
+ while (isspace(linepos[0]))
+ linepos++;
+ if (linepos[0] == '\0')
+ return -1;
+
+ /* get operation type */
+ if (linepos[0] == '=' && linepos[1] == '=') {
+ *op = OP_MATCH;
+ linepos += 2;
+ } else if (linepos[0] == '!' && linepos[1] == '=') {
+ *op = OP_NOMATCH;
+ linepos += 2;
+ } else if (linepos[0] == '+' && linepos[1] == '=') {
+ *op = OP_ADD;
+ linepos += 2;
+ } else if (linepos[0] == '-' && linepos[1] == '=') {
+ *op = OP_REMOVE;
+ linepos += 2;
+ } else if (linepos[0] == '=') {
+ *op = OP_ASSIGN;
+ linepos++;
+ } else if (linepos[0] == ':' && linepos[1] == '=') {
+ *op = OP_ASSIGN_FINAL;
+ linepos += 2;
+ } else
+ return -1;
+
+ /* terminate key */
+ temp[0] = '\0';
+
+ /* skip whitespace after operator */
+ while (isspace(linepos[0]))
+ linepos++;
+ if (linepos[0] == '\0')
+ return -1;
+
+ /* get the value */
+ if (linepos[0] == '"')
+ linepos++;
+ else
+ return -1;
+ *value = linepos;
+
+ /* terminate */
+ for (i = 0, j = 0; ; i++, j++) {
+
+ if (linepos[i] == '"')
+ break;
+
+ if (linepos[i] == '\0')
+ return -1;
+
+ /* double quotes can be escaped */
+ if (linepos[i] == '\\')
+ if (linepos[i+1] == '"')
+ i++;
+
+ linepos[j] = linepos[i];
+ }
+ linepos[j] = '\0';
+
+ /* move line to next key */
+ *line = linepos + i + 1;
+ return 0;
+}
+
+/* extract possible KEY{attr} */
+static const char *get_key_attribute(char *str) {
+ char *pos;
+ char *attr;
+
+ attr = strchr(str, '{');
+ if (attr) {
+ attr++;
+ pos = strchr(attr, '}');
+ if (!pos) {
+ log_error("Missing closing brace for format");
+ return NULL;
+ }
+ pos[0] = '\0';
+ return attr;
+ }
+ return NULL;
+}
+
+static int rule_add_key(struct rule_tmp *rule_tmp, enum token_type type,
+ enum operation_type op,
+ const char *value, const void *data) {
+ struct token *token = rule_tmp->token + rule_tmp->token_cur;
+ const char *attr = NULL;
+
+ if (rule_tmp->token_cur >= ELEMENTSOF(rule_tmp->token))
+ return -E2BIG;
+
+ memzero(token, sizeof(struct token));
+
+ switch (type) {
+ case TK_M_ACTION:
+ case TK_M_DEVPATH:
+ case TK_M_KERNEL:
+ case TK_M_SUBSYSTEM:
+ case TK_M_DRIVER:
+ case TK_M_WAITFOR:
+ case TK_M_DEVLINK:
+ case TK_M_NAME:
+ case TK_M_KERNELS:
+ case TK_M_SUBSYSTEMS:
+ case TK_M_DRIVERS:
+ case TK_M_TAGS:
+ case TK_M_PROGRAM:
+ case TK_M_IMPORT_FILE:
+ case TK_M_IMPORT_PROG:
+ case TK_M_IMPORT_DB:
+ case TK_M_IMPORT_CMDLINE:
+ case TK_M_IMPORT_PARENT:
+ case TK_M_RESULT:
+ case TK_A_OWNER:
+ case TK_A_GROUP:
+ case TK_A_MODE:
+ case TK_A_DEVLINK:
+ case TK_A_NAME:
+ case TK_A_GOTO:
+ case TK_M_TAG:
+ case TK_A_TAG:
+ case TK_A_STATIC_NODE:
+ token->key.value_off = rules_add_string(rule_tmp->rules, value);
+ break;
+ case TK_M_IMPORT_BUILTIN:
+ token->key.value_off = rules_add_string(rule_tmp->rules, value);
+ token->key.builtin_cmd = *(enum udev_builtin_cmd *)data;
+ break;
+ case TK_M_ENV:
+ case TK_M_ATTR:
+ case TK_M_SYSCTL:
+ case TK_M_ATTRS:
+ case TK_A_ATTR:
+ case TK_A_SYSCTL:
+ case TK_A_ENV:
+ case TK_A_SECLABEL:
+ attr = data;
+ token->key.value_off = rules_add_string(rule_tmp->rules, value);
+ token->key.attr_off = rules_add_string(rule_tmp->rules, attr);
+ break;
+ case TK_M_TEST:
+ token->key.value_off = rules_add_string(rule_tmp->rules, value);
+ if (data)
+ token->key.mode = *(mode_t *)data;
+ break;
+ case TK_A_STRING_ESCAPE_NONE:
+ case TK_A_STRING_ESCAPE_REPLACE:
+ case TK_A_DB_PERSIST:
+ break;
+ case TK_A_RUN_BUILTIN:
+ case TK_A_RUN_PROGRAM:
+ token->key.builtin_cmd = *(enum udev_builtin_cmd *)data;
+ token->key.value_off = rules_add_string(rule_tmp->rules, value);
+ break;
+ case TK_A_INOTIFY_WATCH:
+ case TK_A_DEVLINK_PRIO:
+ token->key.devlink_prio = *(int *)data;
+ break;
+ case TK_A_OWNER_ID:
+ token->key.uid = *(uid_t *)data;
+ break;
+ case TK_A_GROUP_ID:
+ token->key.gid = *(gid_t *)data;
+ break;
+ case TK_A_MODE_ID:
+ token->key.mode = *(mode_t *)data;
+ break;
+ case TK_RULE:
+ case TK_M_PARENTS_MIN:
+ case TK_M_PARENTS_MAX:
+ case TK_M_MAX:
+ case TK_END:
+ case TK_UNSET:
+ assert_not_reached("wrong type");
+ }
+
+ if (value && type < TK_M_MAX) {
+ /* check if we need to split or call fnmatch() while matching rules */
+ enum string_glob_type glob;
+ bool has_split, has_glob;
+
+ has_split = strchr(value, '|');
+ has_glob = string_is_glob(value);
+ if (has_split && has_glob)
+ glob = GL_SPLIT_GLOB;
+ else if (has_split)
+ glob = GL_SPLIT;
+ else if (has_glob) {
+ if (streq(value, "?*"))
+ glob = GL_SOMETHING;
+ else
+ glob = GL_GLOB;
+ } else
+ glob = GL_PLAIN;
+
+ token->key.glob = glob;
+ }
+
+ if (value && type > TK_M_MAX) {
+ /* check if assigned value has substitution chars */
+ if (value[0] == '[')
+ token->key.subst = SB_SUBSYS;
+ else if (strchr(value, '%') || strchr(value, '$'))
+ token->key.subst = SB_FORMAT;
+ else
+ token->key.subst = SB_NONE;
+ }
+
+ if (attr) {
+ /* check if property/attribute name has substitution chars */
+ if (attr[0] == '[')
+ token->key.attrsubst = SB_SUBSYS;
+ else if (strchr(attr, '%') || strchr(attr, '$'))
+ token->key.attrsubst = SB_FORMAT;
+ else
+ token->key.attrsubst = SB_NONE;
+ }
+
+ token->key.type = type;
+ token->key.op = op;
+ rule_tmp->token_cur++;
+
+ return 0;
+}
+
+static int sort_token(UdevRules *rules, struct rule_tmp *rule_tmp) {
+ unsigned i;
+ unsigned start = 0;
+ unsigned end = rule_tmp->token_cur;
+
+ for (i = 0; i < rule_tmp->token_cur; i++) {
+ enum token_type next_val = TK_UNSET;
+ unsigned next_idx = 0;
+ unsigned j;
+
+ /* find smallest value */
+ for (j = start; j < end; j++) {
+ if (rule_tmp->token[j].type == TK_UNSET)
+ continue;
+ if (next_val == TK_UNSET || rule_tmp->token[j].type < next_val) {
+ next_val = rule_tmp->token[j].type;
+ next_idx = j;
+ }
+ }
+
+ /* add token and mark done */
+ if (add_token(rules, &rule_tmp->token[next_idx]) != 0)
+ return -1;
+ rule_tmp->token[next_idx].type = TK_UNSET;
+
+ /* shrink range */
+ if (next_idx == start)
+ start++;
+ if (next_idx+1 == end)
+ end--;
+ }
+ return 0;
+}
+
+#define LOG_RULE_FULL(level, fmt, ...) log_full(level, "%s:%u: " fmt, filename, lineno, ##__VA_ARGS__)
+#define LOG_RULE_ERROR(fmt, ...) LOG_RULE_FULL(LOG_ERR, fmt, ##__VA_ARGS__)
+#define LOG_RULE_WARNING(fmt, ...) LOG_RULE_FULL(LOG_WARNING, fmt, ##__VA_ARGS__)
+#define LOG_RULE_DEBUG(fmt, ...) LOG_RULE_FULL(LOG_DEBUG, fmt, ##__VA_ARGS__)
+#define LOG_AND_RETURN(fmt, ...) { LOG_RULE_ERROR(fmt, __VA_ARGS__); return; }
+#define LOG_AND_RETURN_ADD_KEY LOG_AND_RETURN("Temporary rule array too small, aborting event processing with %u items", rule_tmp.token_cur);
+
+static void add_rule(UdevRules *rules, char *line,
+ const char *filename, unsigned filename_off, unsigned lineno) {
+ char *linepos;
+ const char *attr;
+ struct rule_tmp rule_tmp = {
+ .rules = rules,
+ .rule.type = TK_RULE,
+ };
+ int r;
+
+ /* the offset in the rule is limited to unsigned short */
+ if (filename_off < USHRT_MAX)
+ rule_tmp.rule.rule.filename_off = filename_off;
+ rule_tmp.rule.rule.filename_line = lineno;
+
+ linepos = line;
+ for (;;) {
+ char *key;
+ char *value;
+ enum operation_type op;
+
+ if (get_key(&linepos, &key, &op, &value) != 0) {
+ /* Avoid erroring on trailing whitespace. This is probably rare
+ * so save the work for the error case instead of always trying
+ * to strip the trailing whitespace with strstrip(). */
+ while (isblank(*linepos))
+ linepos++;
+
+ /* If we aren't at the end of the line, this is a parsing error.
+ * Make a best effort to describe where the problem is. */
+ if (!strchr(NEWLINE, *linepos)) {
+ char buf[2] = {*linepos};
+ _cleanup_free_ char *tmp;
+
+ tmp = cescape(buf);
+ LOG_RULE_ERROR("Invalid key/value pair, starting at character %tu ('%s')", linepos - line + 1, tmp);
+ if (*linepos == '#')
+ LOG_RULE_ERROR("Hint: comments can only start at beginning of line");
+ }
+ break;
+ }
+
+ if (streq(key, "ACTION")) {
+ if (op > OP_MATCH_MAX)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (rule_add_key(&rule_tmp, TK_M_ACTION, op, value, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (streq(key, "DEVPATH")) {
+ if (op > OP_MATCH_MAX)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (rule_add_key(&rule_tmp, TK_M_DEVPATH, op, value, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (streq(key, "KERNEL")) {
+ if (op > OP_MATCH_MAX)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (rule_add_key(&rule_tmp, TK_M_KERNEL, op, value, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (streq(key, "SUBSYSTEM")) {
+ if (op > OP_MATCH_MAX)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ /* bus, class, subsystem events should all be the same */
+ if (STR_IN_SET(value, "subsystem", "bus", "class")) {
+ if (!streq(value, "subsystem"))
+ LOG_RULE_WARNING("'%s' must be specified as 'subsystem'; please fix", value);
+
+ r = rule_add_key(&rule_tmp, TK_M_SUBSYSTEM, op, "subsystem|class|bus", NULL);
+ } else
+ r = rule_add_key(&rule_tmp, TK_M_SUBSYSTEM, op, value, NULL);
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (streq(key, "DRIVER")) {
+ if (op > OP_MATCH_MAX)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (rule_add_key(&rule_tmp, TK_M_DRIVER, op, value, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (startswith(key, "ATTR{")) {
+ attr = get_key_attribute(key + STRLEN("ATTR"));
+ if (!attr)
+ LOG_AND_RETURN("Failed to parse %s attribute", "ATTR");
+
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", "ATTR");
+
+ if (op < OP_MATCH_MAX)
+ r = rule_add_key(&rule_tmp, TK_M_ATTR, op, value, attr);
+ else
+ r = rule_add_key(&rule_tmp, TK_A_ATTR, op, value, attr);
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (startswith(key, "SYSCTL{")) {
+ attr = get_key_attribute(key + STRLEN("SYSCTL"));
+ if (!attr)
+ LOG_AND_RETURN("Failed to parse %s attribute", "ATTR");
+
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", "ATTR");
+
+ if (op < OP_MATCH_MAX)
+ r = rule_add_key(&rule_tmp, TK_M_SYSCTL, op, value, attr);
+ else
+ r = rule_add_key(&rule_tmp, TK_A_SYSCTL, op, value, attr);
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (startswith(key, "SECLABEL{")) {
+ attr = get_key_attribute(key + STRLEN("SECLABEL"));
+ if (!attr)
+ LOG_AND_RETURN("Failed to parse %s attribute", "SECLABEL");
+
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", "SECLABEL");
+
+ if (rule_add_key(&rule_tmp, TK_A_SECLABEL, op, value, attr) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (streq(key, "KERNELS")) {
+ if (op > OP_MATCH_MAX)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (rule_add_key(&rule_tmp, TK_M_KERNELS, op, value, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (streq(key, "SUBSYSTEMS")) {
+ if (op > OP_MATCH_MAX)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (rule_add_key(&rule_tmp, TK_M_SUBSYSTEMS, op, value, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (streq(key, "DRIVERS")) {
+ if (op > OP_MATCH_MAX)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (rule_add_key(&rule_tmp, TK_M_DRIVERS, op, value, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (startswith(key, "ATTRS{")) {
+ if (op > OP_MATCH_MAX)
+ LOG_AND_RETURN("Invalid %s operation", "ATTRS");
+
+ attr = get_key_attribute(key + STRLEN("ATTRS"));
+ if (!attr)
+ LOG_AND_RETURN("Failed to parse %s attribute", "ATTRS");
+
+ if (startswith(attr, "device/"))
+ LOG_RULE_WARNING("'device' link may not be available in future kernels; please fix");
+ if (strstr(attr, "../"))
+ LOG_RULE_WARNING("Direct reference to parent sysfs directory, may break in future kernels; please fix");
+ if (rule_add_key(&rule_tmp, TK_M_ATTRS, op, value, attr) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (streq(key, "TAGS")) {
+ if (op > OP_MATCH_MAX)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (rule_add_key(&rule_tmp, TK_M_TAGS, op, value, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (startswith(key, "ENV{")) {
+ attr = get_key_attribute(key + STRLEN("ENV"));
+ if (!attr)
+ LOG_AND_RETURN("Failed to parse %s attribute", "ENV");
+
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", "ENV");
+
+ if (op < OP_MATCH_MAX)
+ r = rule_add_key(&rule_tmp, TK_M_ENV, op, value, attr);
+ else {
+ if (STR_IN_SET(attr,
+ "ACTION",
+ "SUBSYSTEM",
+ "DEVTYPE",
+ "MAJOR",
+ "MINOR",
+ "DRIVER",
+ "IFINDEX",
+ "DEVNAME",
+ "DEVLINKS",
+ "DEVPATH",
+ "TAGS"))
+ LOG_AND_RETURN("Invalid ENV attribute, '%s' cannot be set", attr);
+
+ r = rule_add_key(&rule_tmp, TK_A_ENV, op, value, attr);
+ }
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (streq(key, "TAG")) {
+ if (op < OP_MATCH_MAX)
+ r = rule_add_key(&rule_tmp, TK_M_TAG, op, value, NULL);
+ else
+ r = rule_add_key(&rule_tmp, TK_A_TAG, op, value, NULL);
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (streq(key, "PROGRAM")) {
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (rule_add_key(&rule_tmp, TK_M_PROGRAM, op, value, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (streq(key, "RESULT")) {
+ if (op > OP_MATCH_MAX)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (rule_add_key(&rule_tmp, TK_M_RESULT, op, value, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (startswith(key, "IMPORT")) {
+ attr = get_key_attribute(key + STRLEN("IMPORT"));
+ if (!attr) {
+ LOG_RULE_WARNING("Ignoring IMPORT{} with missing type");
+ continue;
+ }
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", "IMPORT");
+
+ if (streq(attr, "program")) {
+ /* find known built-in command */
+ if (value[0] != '/') {
+ const enum udev_builtin_cmd cmd = udev_builtin_lookup(value);
+
+ if (cmd >= 0) {
+ LOG_RULE_DEBUG("IMPORT found builtin '%s', replacing", value);
+ if (rule_add_key(&rule_tmp, TK_M_IMPORT_BUILTIN, op, value, &cmd) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+ continue;
+ }
+ }
+ r = rule_add_key(&rule_tmp, TK_M_IMPORT_PROG, op, value, NULL);
+ } else if (streq(attr, "builtin")) {
+ const enum udev_builtin_cmd cmd = udev_builtin_lookup(value);
+
+ if (cmd < 0) {
+ LOG_RULE_WARNING("IMPORT{builtin} '%s' unknown, ignoring", value);
+ continue;
+ } else
+ r = rule_add_key(&rule_tmp, TK_M_IMPORT_BUILTIN, op, value, &cmd);
+ } else if (streq(attr, "file"))
+ r = rule_add_key(&rule_tmp, TK_M_IMPORT_FILE, op, value, NULL);
+ else if (streq(attr, "db"))
+ r = rule_add_key(&rule_tmp, TK_M_IMPORT_DB, op, value, NULL);
+ else if (streq(attr, "cmdline"))
+ r = rule_add_key(&rule_tmp, TK_M_IMPORT_CMDLINE, op, value, NULL);
+ else if (streq(attr, "parent"))
+ r = rule_add_key(&rule_tmp, TK_M_IMPORT_PARENT, op, value, NULL);
+ else {
+ LOG_RULE_ERROR("Ignoring unknown %s{} type '%s'", "IMPORT", attr);
+ continue;
+ }
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (startswith(key, "TEST")) {
+ mode_t mode = 0;
+
+ if (op > OP_MATCH_MAX)
+ LOG_AND_RETURN("Invalid %s operation", "TEST");
+
+ attr = get_key_attribute(key + STRLEN("TEST"));
+ if (attr) {
+ mode = strtol(attr, NULL, 8);
+ r = rule_add_key(&rule_tmp, TK_M_TEST, op, value, &mode);
+ } else
+ r = rule_add_key(&rule_tmp, TK_M_TEST, op, value, NULL);
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (startswith(key, "RUN")) {
+ attr = get_key_attribute(key + STRLEN("RUN"));
+ if (!attr)
+ attr = "program";
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", "RUN");
+
+ if (streq(attr, "builtin")) {
+ const enum udev_builtin_cmd cmd = udev_builtin_lookup(value);
+
+ if (cmd < 0) {
+ LOG_RULE_ERROR("RUN{builtin}: '%s' unknown, ignoring", value);
+ continue;
+ } else
+ r = rule_add_key(&rule_tmp, TK_A_RUN_BUILTIN, op, value, &cmd);
+ } else if (streq(attr, "program")) {
+ const enum udev_builtin_cmd cmd = _UDEV_BUILTIN_MAX;
+
+ r = rule_add_key(&rule_tmp, TK_A_RUN_PROGRAM, op, value, &cmd);
+ } else {
+ LOG_RULE_ERROR("Ignoring unknown %s{} type '%s'", "RUN", attr);
+ continue;
+ }
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (streq(key, "LABEL")) {
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ rule_tmp.rule.rule.label_off = rules_add_string(rules, value);
+
+ } else if (streq(key, "GOTO")) {
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (rule_add_key(&rule_tmp, TK_A_GOTO, 0, value, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ } else if (startswith(key, "NAME")) {
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (op < OP_MATCH_MAX)
+ r = rule_add_key(&rule_tmp, TK_M_NAME, op, value, NULL);
+ else {
+ if (streq(value, "%k")) {
+ LOG_RULE_WARNING("NAME=\"%%k\" is ignored, because it breaks kernel supplied names; please remove");
+ continue;
+ }
+ if (isempty(value)) {
+ LOG_RULE_DEBUG("NAME=\"\" is ignored, because udev will not delete any device nodes; please remove");
+ continue;
+ }
+ r = rule_add_key(&rule_tmp, TK_A_NAME, op, value, NULL);
+ }
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+ rule_tmp.rule.rule.can_set_name = true;
+
+ } else if (streq(key, "SYMLINK")) {
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ if (op < OP_MATCH_MAX)
+ r = rule_add_key(&rule_tmp, TK_M_DEVLINK, op, value, NULL);
+ else
+ r = rule_add_key(&rule_tmp, TK_A_DEVLINK, op, value, NULL);
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+ rule_tmp.rule.rule.can_set_name = true;
+
+ } else if (streq(key, "OWNER")) {
+ uid_t uid;
+ char *endptr;
+
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ uid = strtoul(value, &endptr, 10);
+ if (endptr[0] == '\0')
+ r = rule_add_key(&rule_tmp, TK_A_OWNER_ID, op, NULL, &uid);
+ else if (rules->resolve_name_timing == RESOLVE_NAME_EARLY && !strchr("$%", value[0])) {
+ uid = add_uid(rules, value);
+ r = rule_add_key(&rule_tmp, TK_A_OWNER_ID, op, NULL, &uid);
+ } else if (rules->resolve_name_timing != RESOLVE_NAME_NEVER)
+ r = rule_add_key(&rule_tmp, TK_A_OWNER, op, value, NULL);
+ else {
+ LOG_RULE_ERROR("Invalid %s operation", key);
+ continue;
+ }
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ rule_tmp.rule.rule.can_set_name = true;
+
+ } else if (streq(key, "GROUP")) {
+ gid_t gid;
+ char *endptr;
+
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ gid = strtoul(value, &endptr, 10);
+ if (endptr[0] == '\0')
+ r = rule_add_key(&rule_tmp, TK_A_GROUP_ID, op, NULL, &gid);
+ else if ((rules->resolve_name_timing == RESOLVE_NAME_EARLY) && !strchr("$%", value[0])) {
+ gid = add_gid(rules, value);
+ r = rule_add_key(&rule_tmp, TK_A_GROUP_ID, op, NULL, &gid);
+ } else if (rules->resolve_name_timing != RESOLVE_NAME_NEVER)
+ r = rule_add_key(&rule_tmp, TK_A_GROUP, op, value, NULL);
+ else {
+ LOG_RULE_ERROR("Invalid %s operation", key);
+ continue;
+ }
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ rule_tmp.rule.rule.can_set_name = true;
+
+ } else if (streq(key, "MODE")) {
+ mode_t mode;
+ char *endptr;
+
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ mode = strtol(value, &endptr, 8);
+ if (endptr[0] == '\0')
+ r = rule_add_key(&rule_tmp, TK_A_MODE_ID, op, NULL, &mode);
+ else
+ r = rule_add_key(&rule_tmp, TK_A_MODE, op, value, NULL);
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ rule_tmp.rule.rule.can_set_name = true;
+
+ } else if (streq(key, "OPTIONS")) {
+ const char *pos;
+
+ if (op == OP_REMOVE)
+ LOG_AND_RETURN("Invalid %s operation", key);
+
+ pos = strstr(value, "link_priority=");
+ if (pos) {
+ int prio = atoi(pos + STRLEN("link_priority="));
+
+ if (rule_add_key(&rule_tmp, TK_A_DEVLINK_PRIO, op, NULL, &prio) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+ }
+
+ pos = strstr(value, "string_escape=");
+ if (pos) {
+ pos += STRLEN("string_escape=");
+ if (startswith(pos, "none"))
+ r = rule_add_key(&rule_tmp, TK_A_STRING_ESCAPE_NONE, op, NULL, NULL);
+ else if (startswith(pos, "replace"))
+ r = rule_add_key(&rule_tmp, TK_A_STRING_ESCAPE_REPLACE, op, NULL, NULL);
+ else {
+ LOG_RULE_ERROR("OPTIONS: unknown string_escape mode '%s', ignoring", pos);
+ r = 0;
+ }
+ if (r < 0)
+ LOG_AND_RETURN_ADD_KEY;
+ }
+
+ pos = strstr(value, "db_persist");
+ if (pos)
+ if (rule_add_key(&rule_tmp, TK_A_DB_PERSIST, op, NULL, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+
+ pos = strstr(value, "nowatch");
+ if (pos) {
+ static const int zero = 0;
+ if (rule_add_key(&rule_tmp, TK_A_INOTIFY_WATCH, op, NULL, &zero) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+ } else {
+ static const int one = 1;
+ pos = strstr(value, "watch");
+ if (pos)
+ if (rule_add_key(&rule_tmp, TK_A_INOTIFY_WATCH, op, NULL, &one) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+ }
+
+ pos = strstr(value, "static_node=");
+ if (pos) {
+ pos += STRLEN("static_node=");
+ if (rule_add_key(&rule_tmp, TK_A_STATIC_NODE, op, pos, NULL) < 0)
+ LOG_AND_RETURN_ADD_KEY;
+ rule_tmp.rule.rule.has_static_node = true;
+ }
+
+ } else
+ LOG_AND_RETURN("Unknown key '%s'", key);
+ }
+
+ /* add rule token and sort tokens */
+ rule_tmp.rule.rule.token_count = 1 + rule_tmp.token_cur;
+ if (add_token(rules, &rule_tmp.rule) != 0 || sort_token(rules, &rule_tmp) != 0)
+ LOG_RULE_ERROR("Failed to add rule token");
+}
+
+static int parse_file(UdevRules *rules, const char *filename) {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned first_token;
+ unsigned filename_off;
+ char line[UTIL_LINE_SIZE];
+ int line_nr = 0;
+ unsigned i;
+
+ f = fopen(filename, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (null_or_empty_fd(fileno(f))) {
+ log_debug("Skipping empty file: %s", filename);
+ return 0;
+ } else
+ log_debug("Reading rules file: %s", filename);
+
+ first_token = rules->token_cur;
+ filename_off = rules_add_string(rules, filename);
+
+ while (fgets(line, sizeof(line), f)) {
+ char *key;
+ size_t len;
+
+ /* skip whitespace */
+ line_nr++;
+ key = line;
+ while (isspace(key[0]))
+ key++;
+
+ /* comment */
+ if (key[0] == '#')
+ continue;
+
+ len = strlen(line);
+ if (len < 3)
+ continue;
+
+ /* continue reading if backslash+newline is found */
+ while (line[len-2] == '\\') {
+ if (!fgets(&line[len-2], (sizeof(line)-len)+2, f))
+ break;
+ if (strlen(&line[len-2]) < 2)
+ break;
+ line_nr++;
+ len = strlen(line);
+ }
+
+ if (len+1 >= sizeof(line)) {
+ log_error("Line too long '%s':%u, ignored", filename, line_nr);
+ continue;
+ }
+ add_rule(rules, key, filename, filename_off, line_nr);
+ }
+
+ /* link GOTOs to LABEL rules in this file to be able to fast-forward */
+ for (i = first_token+1; i < rules->token_cur; i++) {
+ if (rules->tokens[i].type == TK_A_GOTO) {
+ char *label = rules_str(rules, rules->tokens[i].key.value_off);
+ unsigned j;
+
+ for (j = i+1; j < rules->token_cur; j++) {
+ if (rules->tokens[j].type != TK_RULE)
+ continue;
+ if (rules->tokens[j].rule.label_off == 0)
+ continue;
+ if (!streq(label, rules_str(rules, rules->tokens[j].rule.label_off)))
+ continue;
+ rules->tokens[i].key.rule_goto = j;
+ break;
+ }
+ if (rules->tokens[i].key.rule_goto == 0)
+ log_error("GOTO '%s' has no matching label in: '%s'", label, filename);
+ }
+ }
+ return 0;
+}
+
+int udev_rules_new(UdevRules **ret_rules, ResolveNameTiming resolve_name_timing) {
+ _cleanup_(udev_rules_freep) UdevRules *rules = NULL;
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+
+ assert(resolve_name_timing >= 0 && resolve_name_timing < _RESOLVE_NAME_TIMING_MAX);
+
+ rules = new(UdevRules, 1);
+ if (!rules)
+ return -ENOMEM;
+
+ *rules = (UdevRules) {
+ .resolve_name_timing = resolve_name_timing,
+ };
+
+ /* init token array and string buffer */
+ rules->tokens = malloc_multiply(PREALLOC_TOKEN, sizeof(struct token));
+ if (!rules->tokens)
+ return -ENOMEM;
+ rules->token_max = PREALLOC_TOKEN;
+
+ rules->strbuf = strbuf_new();
+ if (!rules->strbuf)
+ return -ENOMEM;
+
+ udev_rules_check_timestamp(rules);
+
+ r = conf_files_list_strv(&files, ".rules", NULL, 0, rules_dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate rules files: %m");
+
+ /*
+ * The offset value in the rules strct is limited; add all
+ * rules file names to the beginning of the string buffer.
+ */
+ STRV_FOREACH(f, files)
+ rules_add_string(rules, *f);
+
+ STRV_FOREACH(f, files)
+ parse_file(rules, *f);
+
+ struct token end_token = { .type = TK_END };
+ add_token(rules, &end_token);
+ log_debug("Rules contain %zu bytes tokens (%u * %zu bytes), %zu bytes strings",
+ rules->token_max * sizeof(struct token), rules->token_max, sizeof(struct token), rules->strbuf->len);
+
+ /* cleanup temporary strbuf data */
+ log_debug("%zu strings (%zu bytes), %zu de-duplicated (%zu bytes), %zu trie nodes used",
+ rules->strbuf->in_count, rules->strbuf->in_len,
+ rules->strbuf->dedup_count, rules->strbuf->dedup_len, rules->strbuf->nodes_count);
+ strbuf_complete(rules->strbuf);
+
+ /* cleanup uid/gid cache */
+ rules->uids = mfree(rules->uids);
+ rules->uids_cur = 0;
+ rules->uids_max = 0;
+ rules->gids = mfree(rules->gids);
+ rules->gids_cur = 0;
+ rules->gids_max = 0;
+
+ dump_rules(rules);
+ *ret_rules = TAKE_PTR(rules);
+ return 0;
+}
+
+UdevRules *udev_rules_free(UdevRules *rules) {
+ if (!rules)
+ return NULL;
+ free(rules->tokens);
+ strbuf_cleanup(rules->strbuf);
+ free(rules->uids);
+ free(rules->gids);
+ return mfree(rules);
+}
+
+bool udev_rules_check_timestamp(UdevRules *rules) {
+ if (!rules)
+ return false;
+
+ return paths_check_timestamp(rules_dirs, &rules->dirs_ts_usec, true);
+}
+
+static int match_key(UdevRules *rules, struct token *token, const char *val) {
+ char *key_value = rules_str(rules, token->key.value_off);
+ char *pos;
+ bool match = false;
+
+ if (!val)
+ val = "";
+
+ switch (token->key.glob) {
+ case GL_PLAIN:
+ match = (streq(key_value, val));
+ break;
+ case GL_GLOB:
+ match = (fnmatch(key_value, val, 0) == 0);
+ break;
+ case GL_SPLIT:
+ {
+ const char *s;
+ size_t len;
+
+ s = rules_str(rules, token->key.value_off);
+ len = strlen(val);
+ for (;;) {
+ const char *next;
+
+ next = strchr(s, '|');
+ if (next) {
+ size_t matchlen = (size_t)(next - s);
+
+ match = (matchlen == len && strneq(s, val, matchlen));
+ if (match)
+ break;
+ } else {
+ match = (streq(s, val));
+ break;
+ }
+ s = &next[1];
+ }
+ break;
+ }
+ case GL_SPLIT_GLOB:
+ {
+ char value[UTIL_PATH_SIZE];
+
+ strscpy(value, sizeof(value), rules_str(rules, token->key.value_off));
+ key_value = value;
+ while (key_value) {
+ pos = strchr(key_value, '|');
+ if (pos) {
+ pos[0] = '\0';
+ pos = &pos[1];
+ }
+ match = (fnmatch(key_value, val, 0) == 0);
+ if (match)
+ break;
+ key_value = pos;
+ }
+ break;
+ }
+ case GL_SOMETHING:
+ match = (val[0] != '\0');
+ break;
+ case GL_UNSET:
+ return -1;
+ }
+
+ if (match && (token->key.op == OP_MATCH))
+ return 0;
+ if (!match && (token->key.op == OP_NOMATCH))
+ return 0;
+ return -1;
+}
+
+static int match_attr(UdevRules *rules, sd_device *dev, UdevEvent *event, struct token *cur) {
+ char nbuf[UTIL_NAME_SIZE], vbuf[UTIL_NAME_SIZE];
+ const char *name, *value;
+ size_t len;
+
+ name = rules_str(rules, cur->key.attr_off);
+ switch (cur->key.attrsubst) {
+ case SB_FORMAT:
+ udev_event_apply_format(event, name, nbuf, sizeof(nbuf), false);
+ name = nbuf;
+ _fallthrough_;
+ case SB_NONE:
+ if (sd_device_get_sysattr_value(dev, name, &value) < 0)
+ return -1;
+ break;
+ case SB_SUBSYS:
+ if (util_resolve_subsys_kernel(name, vbuf, sizeof(vbuf), true) != 0)
+ return -1;
+ value = vbuf;
+ break;
+ default:
+ return -1;
+ }
+
+ /* remove trailing whitespace, if not asked to match for it */
+ len = strlen(value);
+ if (len > 0 && isspace(value[len-1])) {
+ const char *key_value;
+ size_t klen;
+
+ key_value = rules_str(rules, cur->key.value_off);
+ klen = strlen(key_value);
+ if (klen > 0 && !isspace(key_value[klen-1])) {
+ if (value != vbuf) {
+ strscpy(vbuf, sizeof(vbuf), value);
+ value = vbuf;
+ }
+ while (len > 0 && isspace(vbuf[--len]))
+ vbuf[len] = '\0';
+ }
+ }
+
+ return match_key(rules, cur, value);
+}
+
+enum escape_type {
+ ESCAPE_UNSET,
+ ESCAPE_NONE,
+ ESCAPE_REPLACE,
+};
+
+int udev_rules_apply_to_event(
+ UdevRules *rules,
+ UdevEvent *event,
+ usec_t timeout_usec,
+ Hashmap *properties_list) {
+ sd_device *dev = event->dev;
+ enum escape_type esc = ESCAPE_UNSET;
+ struct token *cur, *rule;
+ const char *action, *val;
+ bool can_set_name;
+ int r;
+
+ if (!rules->tokens)
+ return 0;
+
+ r = sd_device_get_property_value(dev, "ACTION", &action);
+ if (r < 0)
+ return r;
+
+ can_set_name = (!streq(action, "remove") &&
+ (sd_device_get_devnum(dev, NULL) >= 0 ||
+ sd_device_get_ifindex(dev, NULL) >= 0));
+
+ /* loop through token list, match, run actions or forward to next rule */
+ cur = &rules->tokens[0];
+ rule = cur;
+ for (;;) {
+ dump_token(rules, cur);
+ switch (cur->type) {
+ case TK_RULE:
+ /* current rule */
+ rule = cur;
+ /* possibly skip rules which want to set NAME, SYMLINK, OWNER, GROUP, MODE */
+ if (!can_set_name && rule->rule.can_set_name)
+ goto nomatch;
+ esc = ESCAPE_UNSET;
+ break;
+ case TK_M_ACTION:
+ if (match_key(rules, cur, action) != 0)
+ goto nomatch;
+ break;
+ case TK_M_DEVPATH:
+ if (sd_device_get_devpath(dev, &val) < 0)
+ goto nomatch;
+ if (match_key(rules, cur, val) != 0)
+ goto nomatch;
+ break;
+ case TK_M_KERNEL:
+ if (sd_device_get_sysname(dev, &val) < 0)
+ goto nomatch;
+ if (match_key(rules, cur, val) != 0)
+ goto nomatch;
+ break;
+ case TK_M_DEVLINK: {
+ const char *devlink;
+ bool match = false;
+
+ FOREACH_DEVICE_DEVLINK(dev, devlink)
+ if (match_key(rules, cur, devlink + STRLEN("/dev/")) == 0) {
+ match = true;
+ break;
+ }
+
+ if (!match)
+ goto nomatch;
+ break;
+ }
+ case TK_M_NAME:
+ if (match_key(rules, cur, event->name) != 0)
+ goto nomatch;
+ break;
+ case TK_M_ENV: {
+ const char *key_name = rules_str(rules, cur->key.attr_off);
+
+ if (sd_device_get_property_value(dev, key_name, &val) < 0) {
+ /* check global properties */
+ if (properties_list)
+ val = hashmap_get(properties_list, key_name);
+ else
+ val = NULL;
+ }
+
+ if (match_key(rules, cur, strempty(val)))
+ goto nomatch;
+ break;
+ }
+ case TK_M_TAG: {
+ bool match = false;
+ const char *tag;
+
+ FOREACH_DEVICE_TAG(dev, tag)
+ if (streq(rules_str(rules, cur->key.value_off), tag)) {
+ match = true;
+ break;
+ }
+
+ if ((!match && (cur->key.op != OP_NOMATCH)) ||
+ (match && (cur->key.op == OP_NOMATCH)))
+ goto nomatch;
+ break;
+ }
+ case TK_M_SUBSYSTEM:
+ if (sd_device_get_subsystem(dev, &val) < 0)
+ goto nomatch;
+ if (match_key(rules, cur, val) != 0)
+ goto nomatch;
+ break;
+ case TK_M_DRIVER:
+ if (sd_device_get_driver(dev, &val) < 0)
+ goto nomatch;
+ if (match_key(rules, cur, val) != 0)
+ goto nomatch;
+ break;
+ case TK_M_ATTR:
+ if (match_attr(rules, dev, event, cur) != 0)
+ goto nomatch;
+ break;
+ case TK_M_SYSCTL: {
+ char filename[UTIL_PATH_SIZE];
+ _cleanup_free_ char *value = NULL;
+ size_t len;
+
+ udev_event_apply_format(event, rules_str(rules, cur->key.attr_off), filename, sizeof(filename), false);
+ sysctl_normalize(filename);
+ if (sysctl_read(filename, &value) < 0)
+ goto nomatch;
+
+ len = strlen(value);
+ while (len > 0 && isspace(value[--len]))
+ value[len] = '\0';
+ if (match_key(rules, cur, value) != 0)
+ goto nomatch;
+ break;
+ }
+ case TK_M_KERNELS:
+ case TK_M_SUBSYSTEMS:
+ case TK_M_DRIVERS:
+ case TK_M_ATTRS:
+ case TK_M_TAGS: {
+ struct token *next;
+
+ /* get whole sequence of parent matches */
+ next = cur;
+ while (next->type > TK_M_PARENTS_MIN && next->type < TK_M_PARENTS_MAX)
+ next++;
+
+ /* loop over parents */
+ event->dev_parent = dev;
+ for (;;) {
+ struct token *key;
+
+ /* loop over sequence of parent match keys */
+ for (key = cur; key < next; key++ ) {
+ dump_token(rules, key);
+ switch(key->type) {
+ case TK_M_KERNELS:
+ if (sd_device_get_sysname(event->dev_parent, &val) < 0)
+ goto try_parent;
+ if (match_key(rules, key, val) != 0)
+ goto try_parent;
+ break;
+ case TK_M_SUBSYSTEMS:
+ if (sd_device_get_subsystem(event->dev_parent, &val) < 0)
+ goto try_parent;
+ if (match_key(rules, key, val) != 0)
+ goto try_parent;
+ break;
+ case TK_M_DRIVERS:
+ if (sd_device_get_driver(event->dev_parent, &val) < 0)
+ goto try_parent;
+ if (match_key(rules, key, val) != 0)
+ goto try_parent;
+ break;
+ case TK_M_ATTRS:
+ if (match_attr(rules, event->dev_parent, event, key) != 0)
+ goto try_parent;
+ break;
+ case TK_M_TAGS: {
+ bool match = sd_device_has_tag(event->dev_parent, rules_str(rules, cur->key.value_off));
+
+ if (match && key->key.op == OP_NOMATCH)
+ goto try_parent;
+ if (!match && key->key.op == OP_MATCH)
+ goto try_parent;
+ break;
+ }
+ default:
+ goto nomatch;
+ }
+ }
+ break;
+
+ try_parent:
+ if (sd_device_get_parent(event->dev_parent, &event->dev_parent) < 0) {
+ event->dev_parent = NULL;
+ goto nomatch;
+ }
+ }
+ /* move behind our sequence of parent match keys */
+ cur = next;
+ continue;
+ }
+ case TK_M_TEST: {
+ char filename[UTIL_PATH_SIZE];
+ struct stat statbuf;
+ int match;
+
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), filename, sizeof(filename), false);
+ if (util_resolve_subsys_kernel(filename, filename, sizeof(filename), false) != 0) {
+ if (filename[0] != '/') {
+ char tmp[UTIL_PATH_SIZE];
+
+ if (sd_device_get_syspath(dev, &val) < 0)
+ goto nomatch;
+
+ strscpy(tmp, sizeof(tmp), filename);
+ strscpyl(filename, sizeof(filename), val, "/", tmp, NULL);
+ }
+ }
+ attr_subst_subdir(filename, sizeof(filename));
+
+ match = (stat(filename, &statbuf) == 0);
+ if (match && cur->key.mode > 0)
+ match = ((statbuf.st_mode & cur->key.mode) > 0);
+ if (match && cur->key.op == OP_NOMATCH)
+ goto nomatch;
+ if (!match && cur->key.op == OP_MATCH)
+ goto nomatch;
+ break;
+ }
+ case TK_M_PROGRAM: {
+ char program[UTIL_PATH_SIZE], result[UTIL_LINE_SIZE];
+
+ event->program_result = mfree(event->program_result);
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), program, sizeof(program), false);
+ log_device_debug(dev, "PROGRAM '%s' %s:%u",
+ program,
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+
+ if (udev_event_spawn(event, timeout_usec, true, program, result, sizeof(result)) != 0) {
+ if (cur->key.op != OP_NOMATCH)
+ goto nomatch;
+ } else {
+ int count;
+
+ delete_trailing_chars(result, "\n");
+ if (IN_SET(esc, ESCAPE_UNSET, ESCAPE_REPLACE)) {
+ count = util_replace_chars(result, UDEV_ALLOWED_CHARS_INPUT);
+ if (count > 0)
+ log_device_debug(dev, "Replaced %i character(s) from result of '%s'" , count, program);
+ }
+ event->program_result = strdup(result);
+ if (cur->key.op == OP_NOMATCH)
+ goto nomatch;
+ }
+ break;
+ }
+ case TK_M_IMPORT_FILE: {
+ char import[UTIL_PATH_SIZE];
+
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), import, sizeof(import), false);
+ if (import_file_into_properties(dev, import) != 0)
+ if (cur->key.op != OP_NOMATCH)
+ goto nomatch;
+ break;
+ }
+ case TK_M_IMPORT_PROG: {
+ char import[UTIL_PATH_SIZE];
+
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), import, sizeof(import), false);
+ log_device_debug(dev, "IMPORT '%s' %s:%u",
+ import,
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+
+ if (import_program_into_properties(event, timeout_usec, import) != 0)
+ if (cur->key.op != OP_NOMATCH)
+ goto nomatch;
+ break;
+ }
+ case TK_M_IMPORT_BUILTIN: {
+ char command[UTIL_PATH_SIZE];
+
+ if (udev_builtin_run_once(cur->key.builtin_cmd)) {
+ /* check if we ran already */
+ if (event->builtin_run & (1 << cur->key.builtin_cmd)) {
+ log_device_debug(dev, "IMPORT builtin skip '%s' %s:%u",
+ udev_builtin_name(cur->key.builtin_cmd),
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+ /* return the result from earlier run */
+ if (event->builtin_ret & (1 << cur->key.builtin_cmd))
+ if (cur->key.op != OP_NOMATCH)
+ goto nomatch;
+ break;
+ }
+ /* mark as ran */
+ event->builtin_run |= (1 << cur->key.builtin_cmd);
+ }
+
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), command, sizeof(command), false);
+ log_device_debug(dev, "IMPORT builtin '%s' %s:%u",
+ udev_builtin_name(cur->key.builtin_cmd),
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+
+ r = udev_builtin_run(dev, cur->key.builtin_cmd, command, false);
+ if (r < 0) {
+ /* remember failure */
+ log_device_debug_errno(dev, r, "IMPORT builtin '%s' fails: %m",
+ udev_builtin_name(cur->key.builtin_cmd));
+ event->builtin_ret |= (1 << cur->key.builtin_cmd);
+ if (cur->key.op != OP_NOMATCH)
+ goto nomatch;
+ }
+ break;
+ }
+ case TK_M_IMPORT_DB: {
+ const char *key;
+
+ key = rules_str(rules, cur->key.value_off);
+ if (event->dev_db_clone &&
+ sd_device_get_property_value(event->dev_db_clone, key, &val) >= 0)
+ device_add_property(dev, key, val);
+ else if (cur->key.op != OP_NOMATCH)
+ goto nomatch;
+ break;
+ }
+ case TK_M_IMPORT_CMDLINE: {
+ _cleanup_free_ char *value = NULL;
+ bool imported = false;
+ const char *key;
+
+ key = rules_str(rules, cur->key.value_off);
+ r = proc_cmdline_get_key(key, PROC_CMDLINE_VALUE_OPTIONAL, &value);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to read %s from /proc/cmdline, ignoring: %m", key);
+ else if (r > 0) {
+ imported = true;
+
+ if (value)
+ device_add_property(dev, key, value);
+ else
+ /* we import simple flags as 'FLAG=1' */
+ device_add_property(dev, key, "1");
+ }
+
+ if (!imported && cur->key.op != OP_NOMATCH)
+ goto nomatch;
+ break;
+ }
+ case TK_M_IMPORT_PARENT: {
+ char import[UTIL_PATH_SIZE];
+
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), import, sizeof(import), false);
+ if (import_parent_into_properties(dev, import) != 0)
+ if (cur->key.op != OP_NOMATCH)
+ goto nomatch;
+ break;
+ }
+ case TK_M_RESULT:
+ if (match_key(rules, cur, event->program_result) != 0)
+ goto nomatch;
+ break;
+ case TK_A_STRING_ESCAPE_NONE:
+ esc = ESCAPE_NONE;
+ break;
+ case TK_A_STRING_ESCAPE_REPLACE:
+ esc = ESCAPE_REPLACE;
+ break;
+ case TK_A_DB_PERSIST:
+ device_set_db_persist(dev);
+ break;
+ case TK_A_INOTIFY_WATCH:
+ if (event->inotify_watch_final)
+ break;
+ if (cur->key.op == OP_ASSIGN_FINAL)
+ event->inotify_watch_final = true;
+ event->inotify_watch = cur->key.watch;
+ break;
+ case TK_A_DEVLINK_PRIO:
+ device_set_devlink_priority(dev, cur->key.devlink_prio);
+ break;
+ case TK_A_OWNER: {
+ char owner[UTIL_NAME_SIZE];
+ const char *ow = owner;
+
+ if (event->owner_final)
+ break;
+ if (cur->key.op == OP_ASSIGN_FINAL)
+ event->owner_final = true;
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), owner, sizeof(owner), false);
+ event->owner_set = true;
+ r = get_user_creds(&ow, &event->uid, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING);
+ if (r < 0) {
+ log_unknown_owner(dev, r, "user", owner);
+ event->uid = 0;
+ }
+ log_device_debug(dev, "OWNER %u %s:%u",
+ event->uid,
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+ break;
+ }
+ case TK_A_GROUP: {
+ char group[UTIL_NAME_SIZE];
+ const char *gr = group;
+
+ if (event->group_final)
+ break;
+ if (cur->key.op == OP_ASSIGN_FINAL)
+ event->group_final = true;
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), group, sizeof(group), false);
+ event->group_set = true;
+ r = get_group_creds(&gr, &event->gid, USER_CREDS_ALLOW_MISSING);
+ if (r < 0) {
+ log_unknown_owner(dev, r, "group", group);
+ event->gid = 0;
+ }
+ log_device_debug(dev, "GROUP %u %s:%u",
+ event->gid,
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+ break;
+ }
+ case TK_A_MODE: {
+ char mode_str[UTIL_NAME_SIZE];
+ mode_t mode;
+
+ if (event->mode_final)
+ break;
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), mode_str, sizeof(mode_str), false);
+ r = parse_mode(mode_str, &mode);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to parse mode '%s': %m", mode_str);
+ break;
+ }
+ if (cur->key.op == OP_ASSIGN_FINAL)
+ event->mode_final = true;
+ event->mode_set = true;
+ event->mode = mode;
+ log_device_debug(dev, "MODE %#o %s:%u",
+ event->mode,
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+ break;
+ }
+ case TK_A_OWNER_ID:
+ if (event->owner_final)
+ break;
+ if (cur->key.op == OP_ASSIGN_FINAL)
+ event->owner_final = true;
+ event->owner_set = true;
+ event->uid = cur->key.uid;
+ log_device_debug(dev, "OWNER %u %s:%u",
+ event->uid,
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+ break;
+ case TK_A_GROUP_ID:
+ if (event->group_final)
+ break;
+ if (cur->key.op == OP_ASSIGN_FINAL)
+ event->group_final = true;
+ event->group_set = true;
+ event->gid = cur->key.gid;
+ log_device_debug(dev, "GROUP %u %s:%u",
+ event->gid,
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+ break;
+ case TK_A_MODE_ID:
+ if (event->mode_final)
+ break;
+ if (cur->key.op == OP_ASSIGN_FINAL)
+ event->mode_final = true;
+ event->mode_set = true;
+ event->mode = cur->key.mode;
+ log_device_debug(dev, "MODE %#o %s:%u",
+ event->mode,
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+ break;
+ case TK_A_SECLABEL: {
+ _cleanup_free_ char *name = NULL, *label = NULL;
+ char label_str[UTIL_LINE_SIZE] = {};
+
+ name = strdup(rules_str(rules, cur->key.attr_off));
+ if (!name)
+ return log_oom();
+
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), label_str, sizeof(label_str), false);
+ if (!isempty(label_str))
+ label = strdup(label_str);
+ else
+ label = strdup(rules_str(rules, cur->key.value_off));
+ if (!label)
+ return log_oom();
+
+ if (IN_SET(cur->key.op, OP_ASSIGN, OP_ASSIGN_FINAL))
+ hashmap_clear_free_free(event->seclabel_list);
+
+ r = hashmap_ensure_allocated(&event->seclabel_list, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = hashmap_put(event->seclabel_list, name, label);
+ if (r < 0)
+ return log_oom();
+ log_device_debug(dev, "SECLABEL{%s}='%s' %s:%u",
+ name, label,
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+ name = label = NULL;
+
+ break;
+ }
+ case TK_A_ENV: {
+ char value_new[UTIL_NAME_SIZE];
+ const char *name, *value_old;
+
+ name = rules_str(rules, cur->key.attr_off);
+ val = rules_str(rules, cur->key.value_off);
+ if (val[0] == '\0') {
+ if (cur->key.op == OP_ADD)
+ break;
+ device_add_property(dev, name, NULL);
+ break;
+ }
+
+ if (cur->key.op == OP_ADD &&
+ sd_device_get_property_value(dev, name, &value_old) >= 0) {
+ char temp[UTIL_NAME_SIZE];
+
+ /* append value separated by space */
+ udev_event_apply_format(event, val, temp, sizeof(temp), false);
+ strscpyl(value_new, sizeof(value_new), value_old, " ", temp, NULL);
+ } else
+ udev_event_apply_format(event, val, value_new, sizeof(value_new), false);
+
+ device_add_property(dev, name, value_new);
+ break;
+ }
+ case TK_A_TAG: {
+ char tag[UTIL_PATH_SIZE];
+ const char *p;
+
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), tag, sizeof(tag), false);
+ if (IN_SET(cur->key.op, OP_ASSIGN, OP_ASSIGN_FINAL))
+ device_cleanup_tags(dev);
+ for (p = tag; *p != '\0'; p++) {
+ if ((*p >= 'a' && *p <= 'z') ||
+ (*p >= 'A' && *p <= 'Z') ||
+ (*p >= '0' && *p <= '9') ||
+ IN_SET(*p, '-', '_'))
+ continue;
+ log_device_error(dev, "Ignoring invalid tag name '%s'", tag);
+ break;
+ }
+ if (cur->key.op == OP_REMOVE)
+ device_remove_tag(dev, tag);
+ else
+ device_add_tag(dev, tag);
+ break;
+ }
+ case TK_A_NAME: {
+ char name_str[UTIL_PATH_SIZE];
+ const char *name;
+ int count;
+
+ name = rules_str(rules, cur->key.value_off);
+ if (event->name_final)
+ break;
+ if (cur->key.op == OP_ASSIGN_FINAL)
+ event->name_final = true;
+ udev_event_apply_format(event, name, name_str, sizeof(name_str), false);
+ if (IN_SET(esc, ESCAPE_UNSET, ESCAPE_REPLACE)) {
+ count = util_replace_chars(name_str, "/");
+ if (count > 0)
+ log_device_debug(dev, "Replaced %i character(s) from result of NAME=\"%s\"", count, name);
+ }
+ if (sd_device_get_devnum(dev, NULL) >= 0 &&
+ (sd_device_get_devname(dev, &val) < 0 ||
+ !streq(name_str, val + STRLEN("/dev/")))) {
+ log_device_error(dev, "Kernel device nodes cannot be renamed, ignoring NAME=\"%s\"; please fix it in %s:%u\n",
+ name,
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+ break;
+ }
+ if (free_and_strdup(&event->name, name_str) < 0)
+ return log_oom();
+
+ log_device_debug(dev, "NAME '%s' %s:%u",
+ event->name,
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+ break;
+ }
+ case TK_A_DEVLINK: {
+ char temp[UTIL_PATH_SIZE], filename[UTIL_PATH_SIZE], *pos, *next;
+ int count = 0;
+
+ if (event->devlink_final)
+ break;
+ if (sd_device_get_devnum(dev, NULL) < 0)
+ break;
+ if (cur->key.op == OP_ASSIGN_FINAL)
+ event->devlink_final = true;
+ if (IN_SET(cur->key.op, OP_ASSIGN, OP_ASSIGN_FINAL))
+ device_cleanup_devlinks(dev);
+
+ /* allow multiple symlinks separated by spaces */
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), temp, sizeof(temp), esc != ESCAPE_NONE);
+ if (esc == ESCAPE_UNSET)
+ count = util_replace_chars(temp, "/ ");
+ else if (esc == ESCAPE_REPLACE)
+ count = util_replace_chars(temp, "/");
+ if (count > 0)
+ log_device_debug(dev, "Replaced %i character(s) from result of LINK" , count);
+ pos = temp;
+ while (isspace(pos[0]))
+ pos++;
+ next = strchr(pos, ' ');
+ while (next) {
+ next[0] = '\0';
+ log_device_debug(dev, "LINK '%s' %s:%u", pos,
+ rules_str(rules, rule->rule.filename_off), rule->rule.filename_line);
+ strscpyl(filename, sizeof(filename), "/dev/", pos, NULL);
+ device_add_devlink(dev, filename);
+ while (isspace(next[1]))
+ next++;
+ pos = &next[1];
+ next = strchr(pos, ' ');
+ }
+ if (pos[0] != '\0') {
+ log_device_debug(dev, "LINK '%s' %s:%u", pos,
+ rules_str(rules, rule->rule.filename_off), rule->rule.filename_line);
+ strscpyl(filename, sizeof(filename), "/dev/", pos, NULL);
+ device_add_devlink(dev, filename);
+ }
+ break;
+ }
+ case TK_A_ATTR: {
+ char attr[UTIL_PATH_SIZE], value[UTIL_NAME_SIZE];
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *key_name;
+
+ key_name = rules_str(rules, cur->key.attr_off);
+ if (util_resolve_subsys_kernel(key_name, attr, sizeof(attr), false) != 0 &&
+ sd_device_get_syspath(dev, &val) >= 0)
+ strscpyl(attr, sizeof(attr), val, "/", key_name, NULL);
+ attr_subst_subdir(attr, sizeof(attr));
+
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), value, sizeof(value), false);
+ log_device_debug(dev, "ATTR '%s' writing '%s' %s:%u", attr, value,
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+ f = fopen(attr, "we");
+ if (!f)
+ log_device_error_errno(dev, errno, "Failed to open ATTR{%s} for writing: %m", attr);
+ else if (fprintf(f, "%s", value) <= 0)
+ log_device_error_errno(dev, errno, "Failed to write ATTR{%s}: %m", attr);
+ break;
+ }
+ case TK_A_SYSCTL: {
+ char filename[UTIL_PATH_SIZE], value[UTIL_NAME_SIZE];
+
+ udev_event_apply_format(event, rules_str(rules, cur->key.attr_off), filename, sizeof(filename), false);
+ sysctl_normalize(filename);
+ udev_event_apply_format(event, rules_str(rules, cur->key.value_off), value, sizeof(value), false);
+ log_device_debug(dev, "SYSCTL '%s' writing '%s' %s:%u", filename, value,
+ rules_str(rules, rule->rule.filename_off), rule->rule.filename_line);
+ r = sysctl_write(filename, value);
+ if (r < 0)
+ log_device_error_errno(dev, r, "Failed to write SYSCTL{%s}='%s': %m", filename, value);
+ break;
+ }
+ case TK_A_RUN_BUILTIN:
+ case TK_A_RUN_PROGRAM: {
+ _cleanup_free_ char *cmd = NULL;
+
+ if (IN_SET(cur->key.op, OP_ASSIGN, OP_ASSIGN_FINAL))
+ hashmap_clear_free_key(event->run_list);
+
+ r = hashmap_ensure_allocated(&event->run_list, NULL);
+ if (r < 0)
+ return log_oom();
+
+ cmd = strdup(rules_str(rules, cur->key.value_off));
+ if (!cmd)
+ return log_oom();
+
+ r = hashmap_put(event->run_list, cmd, INT_TO_PTR(cur->key.builtin_cmd));
+ if (r < 0)
+ return log_oom();
+
+ cmd = NULL;
+
+ log_device_debug(dev, "RUN '%s' %s:%u",
+ rules_str(rules, cur->key.value_off),
+ rules_str(rules, rule->rule.filename_off),
+ rule->rule.filename_line);
+ break;
+ }
+ case TK_A_GOTO:
+ if (cur->key.rule_goto == 0)
+ break;
+ cur = &rules->tokens[cur->key.rule_goto];
+ continue;
+ case TK_END:
+ return 0;
+
+ case TK_M_PARENTS_MIN:
+ case TK_M_PARENTS_MAX:
+ case TK_M_MAX:
+ case TK_UNSET:
+ log_device_error(dev, "Wrong type %u", cur->type);
+ goto nomatch;
+ }
+
+ cur++;
+ continue;
+ nomatch:
+ /* fast-forward to next rule */
+ cur = rule + rule->rule.token_count;
+ }
+
+ return 0;
+}
+
+int udev_rules_apply_static_dev_perms(UdevRules *rules) {
+ struct token *cur;
+ struct token *rule;
+ uid_t uid = 0;
+ gid_t gid = 0;
+ mode_t mode = 0;
+ _cleanup_strv_free_ char **tags = NULL;
+ char **t;
+ FILE *f = NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ if (!rules->tokens)
+ return 0;
+
+ cur = &rules->tokens[0];
+ rule = cur;
+ for (;;) {
+ switch (cur->type) {
+ case TK_RULE:
+ /* current rule */
+ rule = cur;
+
+ /* skip rules without a static_node tag */
+ if (!rule->rule.has_static_node)
+ goto next;
+
+ uid = 0;
+ gid = 0;
+ mode = 0;
+ tags = strv_free(tags);
+ break;
+ case TK_A_OWNER_ID:
+ uid = cur->key.uid;
+ break;
+ case TK_A_GROUP_ID:
+ gid = cur->key.gid;
+ break;
+ case TK_A_MODE_ID:
+ mode = cur->key.mode;
+ break;
+ case TK_A_TAG:
+ r = strv_extend(&tags, rules_str(rules, cur->key.value_off));
+ if (r < 0)
+ goto finish;
+
+ break;
+ case TK_A_STATIC_NODE: {
+ char device_node[UTIL_PATH_SIZE];
+ char tags_dir[UTIL_PATH_SIZE];
+ char tag_symlink[UTIL_PATH_SIZE];
+ struct stat stats;
+
+ /* we assure, that the permissions tokens are sorted before the static token */
+
+ if (mode == 0 && uid == 0 && gid == 0 && !tags)
+ goto next;
+
+ strscpyl(device_node, sizeof(device_node), "/dev/", rules_str(rules, cur->key.value_off), NULL);
+ if (stat(device_node, &stats) != 0)
+ break;
+ if (!S_ISBLK(stats.st_mode) && !S_ISCHR(stats.st_mode))
+ break;
+
+ /* export the tags to a directory as symlinks, allowing otherwise dead nodes to be tagged */
+ if (tags) {
+ STRV_FOREACH(t, tags) {
+ _cleanup_free_ char *unescaped_filename = NULL;
+
+ strscpyl(tags_dir, sizeof(tags_dir), "/run/udev/static_node-tags/", *t, "/", NULL);
+ r = mkdir_p(tags_dir, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s: %m", tags_dir);
+
+ unescaped_filename = xescape(rules_str(rules, cur->key.value_off), "/.");
+
+ strscpyl(tag_symlink, sizeof(tag_symlink), tags_dir, unescaped_filename, NULL);
+ r = symlink(device_node, tag_symlink);
+ if (r < 0 && errno != EEXIST)
+ return log_error_errno(errno, "Failed to create symlink %s -> %s: %m",
+ tag_symlink, device_node);
+ }
+ }
+
+ /* don't touch the permissions if only the tags were set */
+ if (mode == 0 && uid == 0 && gid == 0)
+ break;
+
+ if (mode == 0) {
+ if (gid > 0)
+ mode = 0660;
+ else
+ mode = 0600;
+ }
+ if (mode != (stats.st_mode & 01777)) {
+ r = chmod(device_node, mode);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to chmod '%s' %#o: %m",
+ device_node, mode);
+ else
+ log_debug("chmod '%s' %#o", device_node, mode);
+ }
+
+ if ((uid != 0 && uid != stats.st_uid) || (gid != 0 && gid != stats.st_gid)) {
+ r = chown(device_node, uid, gid);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to chown '%s' %u %u: %m",
+ device_node, uid, gid);
+ else
+ log_debug("chown '%s' %u %u", device_node, uid, gid);
+ }
+
+ utimensat(AT_FDCWD, device_node, NULL, 0);
+ break;
+ }
+ case TK_END:
+ goto finish;
+ }
+
+ cur++;
+ continue;
+next:
+ /* fast-forward to next rule */
+ cur = rule + rule->rule.token_count;
+ continue;
+ }
+
+finish:
+ if (f) {
+ fflush(f);
+ fchmod(fileno(f), 0644);
+ if (ferror(f) || rename(path, "/run/udev/static_node-tags") < 0) {
+ unlink_noerrno("/run/udev/static_node-tags");
+ unlink_noerrno(path);
+ return -errno;
+ }
+ }
+
+ return 0;
+}
diff --git a/src/udev/udev-watch.c b/src/udev/udev-watch.c
new file mode 100644
index 0000000..68b51d0
--- /dev/null
+++ b/src/udev/udev-watch.c
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright © 2009 Canonical Ltd.
+ * Copyright © 2009 Scott James Remnant <scott@netsplit.com>
+ */
+
+#include <sys/inotify.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "stdio-util.h"
+#include "udev-watch.h"
+
+static int inotify_fd = -1;
+
+/* inotify descriptor, will be shared with rules directory;
+ * set to cloexec since we need our children to be able to add
+ * watches for us. */
+int udev_watch_init(void) {
+ inotify_fd = inotify_init1(IN_CLOEXEC);
+ if (inotify_fd < 0)
+ return -errno;
+
+ return inotify_fd;
+}
+
+/* Move any old watches directory out of the way, and then restore the watches. */
+int udev_watch_restore(void) {
+ struct dirent *ent;
+ DIR *dir;
+ int r;
+
+ if (inotify_fd < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid inotify descriptor.");
+
+ if (rename("/run/udev/watch", "/run/udev/watch.old") < 0) {
+ if (errno != ENOENT)
+ return log_warning_errno(errno, "Failed to move watches directory /run/udev/watch. Old watches will not be restored: %m");
+
+ return 0;
+ }
+
+ dir = opendir("/run/udev/watch.old");
+ if (!dir)
+ return log_warning_errno(errno, "Failed to open old watches directory /run/udev/watch.old. Old watches will not be restored: %m");
+
+ FOREACH_DIRENT_ALL(ent, dir, break) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ _cleanup_free_ char *device = NULL;
+
+ if (ent->d_name[0] == '.')
+ continue;
+
+ r = readlinkat_malloc(dirfd(dir), ent->d_name, &device);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read link '/run/udev/watch.old/%s', ignoring: %m", ent->d_name);
+ goto unlink;
+ }
+
+ r = sd_device_new_from_device_id(&dev, device);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to create sd_device object for '%s', ignoring: %m", device);
+ goto unlink;
+ }
+
+ log_device_debug(dev, "Restoring old watch");
+ (void) udev_watch_begin(dev);
+unlink:
+ (void) unlinkat(dirfd(dir), ent->d_name, 0);
+ }
+
+ (void) closedir(dir);
+ (void) rmdir("/run/udev/watch.old");
+
+ return 0;
+}
+
+int udev_watch_begin(sd_device *dev) {
+ char filename[STRLEN("/run/udev/watch/") + DECIMAL_STR_MAX(int)];
+ const char *devnode, *id_filename;
+ int wd, r;
+
+ if (inotify_fd < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid inotify descriptor.");
+
+ r = sd_device_get_devname(dev, &devnode);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get device name: %m");
+
+ log_device_debug(dev, "Adding watch on '%s'", devnode);
+ wd = inotify_add_watch(inotify_fd, devnode, IN_CLOSE_WRITE);
+ if (wd < 0)
+ return log_device_full(dev,
+ errno == ENOENT ? LOG_DEBUG : LOG_ERR,
+ errno,
+ "Failed to add device '%s' to watch: %m", devnode);
+
+ device_set_watch_handle(dev, wd);
+
+ xsprintf(filename, "/run/udev/watch/%d", wd);
+ r = mkdir_parents(filename, 0755);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to create parent directory of '%s': %m", filename);
+ (void) unlink(filename);
+
+ r = device_get_id_filename(dev, &id_filename);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get device id-filename: %m");
+
+ if (symlink(id_filename, filename) < 0)
+ return log_device_error_errno(dev, errno, "Failed to create symlink %s: %m", filename);
+
+ return 0;
+}
+
+int udev_watch_end(sd_device *dev) {
+ char filename[STRLEN("/run/udev/watch/") + DECIMAL_STR_MAX(int)];
+ int wd, r;
+
+ if (inotify_fd < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid inotify descriptor.");
+
+ r = device_get_watch_handle(dev, &wd);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get watch handle, ignoring: %m");
+
+ log_device_debug(dev, "Removing watch");
+ (void) inotify_rm_watch(inotify_fd, wd);
+
+ xsprintf(filename, "/run/udev/watch/%d", wd);
+ (void) unlink(filename);
+
+ device_set_watch_handle(dev, -1);
+
+ return 0;
+}
+
+int udev_watch_lookup(int wd, sd_device **ret) {
+ char filename[STRLEN("/run/udev/watch/") + DECIMAL_STR_MAX(int)];
+ _cleanup_free_ char *device = NULL;
+ int r;
+
+ assert(ret);
+
+ if (inotify_fd < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid inotify descriptor.");
+
+ if (wd < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid watch handle.");
+
+ xsprintf(filename, "/run/udev/watch/%d", wd);
+ r = readlink_malloc(filename, &device);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read link '%s': %m", filename);
+
+ r = sd_device_new_from_device_id(ret, device);
+ if (r == -ENODEV)
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create sd_device object for '%s': %m", device);
+
+ return 1;
+}
diff --git a/src/udev/udev-watch.h b/src/udev/udev-watch.h
new file mode 100644
index 0000000..24a136d
--- /dev/null
+++ b/src/udev/udev-watch.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#pragma once
+
+#include "sd-device.h"
+
+int udev_watch_init(void);
+int udev_watch_restore(void);
+int udev_watch_begin(sd_device *dev);
+int udev_watch_end(sd_device *dev);
+int udev_watch_lookup(int wd, sd_device **ret);
diff --git a/src/udev/udev.conf b/src/udev/udev.conf
new file mode 100644
index 0000000..7deb771
--- /dev/null
+++ b/src/udev/udev.conf
@@ -0,0 +1,10 @@
+# see udev.conf(5) for details
+#
+# udevd is also started in the initrd. When this file is modified you might
+# also want to rebuild the initrd, so that it will include the modified configuration.
+
+#udev_log=info
+#children_max=
+#exec_delay=
+#event_timeout=180
+#resolve_names=early
diff --git a/src/udev/udev.h b/src/udev/udev.h
new file mode 100644
index 0000000..3bc69ff
--- /dev/null
+++ b/src/udev/udev.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#pragma once
+
+/*
+ * Copyright © 2003 Greg Kroah-Hartman <greg@kroah.com>
+ */
+
+#include "sd-device.h"
+#include "sd-netlink.h"
+
+#include "hashmap.h"
+#include "macro.h"
+#include "udev-util.h"
+#include "util.h"
+
+#define READ_END 0
+#define WRITE_END 1
+
+typedef struct UdevEvent {
+ sd_device *dev;
+ sd_device *dev_parent;
+ sd_device *dev_db_clone;
+ char *name;
+ char *program_result;
+ mode_t mode;
+ uid_t uid;
+ gid_t gid;
+ Hashmap *seclabel_list;
+ Hashmap *run_list;
+ usec_t exec_delay_usec;
+ usec_t birth_usec;
+ sd_netlink *rtnl;
+ unsigned builtin_run;
+ unsigned builtin_ret;
+ bool inotify_watch;
+ bool inotify_watch_final;
+ bool group_set;
+ bool group_final;
+ bool owner_set;
+ bool owner_final;
+ bool mode_set;
+ bool mode_final;
+ bool name_final;
+ bool devlink_final;
+ bool run_final;
+} UdevEvent;
+
+/* udev-rules.c */
+typedef struct UdevRules UdevRules;
+
+int udev_rules_new(UdevRules **ret_rules, ResolveNameTiming resolve_name_timing);
+UdevRules *udev_rules_free(UdevRules *rules);
+
+bool udev_rules_check_timestamp(UdevRules *rules);
+int udev_rules_apply_to_event(UdevRules *rules, UdevEvent *event,
+ usec_t timeout_usec,
+ Hashmap *properties_list);
+int udev_rules_apply_static_dev_perms(UdevRules *rules);
+
+static inline usec_t udev_warn_timeout(usec_t timeout_usec) {
+ return DIV_ROUND_UP(timeout_usec, 3);
+}
+
+/* udev-event.c */
+UdevEvent *udev_event_new(sd_device *dev, usec_t exec_delay_usec, sd_netlink *rtnl);
+UdevEvent *udev_event_free(UdevEvent *event);
+ssize_t udev_event_apply_format(UdevEvent *event,
+ const char *src, char *dest, size_t size,
+ bool replace_whitespace);
+int udev_event_spawn(UdevEvent *event,
+ usec_t timeout_usec,
+ bool accept_failure,
+ const char *cmd, char *result, size_t ressize);
+int udev_event_execute_rules(UdevEvent *event,
+ usec_t timeout_usec,
+ Hashmap *properties_list,
+ UdevRules *rules);
+void udev_event_execute_run(UdevEvent *event, usec_t timeout_usec);
+
+/* Cleanup functions */
+DEFINE_TRIVIAL_CLEANUP_FUNC(UdevEvent*, udev_event_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(UdevRules*, udev_rules_free);
diff --git a/src/udev/udev.pc.in b/src/udev/udev.pc.in
new file mode 100644
index 0000000..5acbb2d
--- /dev/null
+++ b/src/udev/udev.pc.in
@@ -0,0 +1,5 @@
+Name: udev
+Description: udev
+Version: @PROJECT_VERSION@
+
+udevdir=@udevlibexecdir@
diff --git a/src/udev/udevadm-control.c b/src/udev/udevadm-control.c
new file mode 100644
index 0000000..7cfc4c9
--- /dev/null
+++ b/src/udev/udevadm-control.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "parse-util.h"
+#include "process-util.h"
+#include "syslog-util.h"
+#include "time-util.h"
+#include "udevadm.h"
+#include "udev-ctrl.h"
+#include "util.h"
+#include "virt.h"
+
+static int help(void) {
+ printf("%s control OPTION\n\n"
+ "Control the udev daemon.\n\n"
+ " -h --help Show this help\n"
+ " -V --version Show package version\n"
+ " -e --exit Instruct the daemon to cleanup and exit\n"
+ " -l --log-priority=LEVEL Set the udev log level for the daemon\n"
+ " -s --stop-exec-queue Do not execute events, queue only\n"
+ " -S --start-exec-queue Execute events, flush queue\n"
+ " -R --reload Reload rules and databases\n"
+ " -p --property=KEY=VALUE Set a global property for all events\n"
+ " -m --children-max=N Maximum number of children\n"
+ " --ping Wait for udev to respond to a ping message\n"
+ " -t --timeout=SECONDS Maximum time to block for a reply\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+int control_main(int argc, char *argv[], void *userdata) {
+ _cleanup_(udev_ctrl_unrefp) struct udev_ctrl *uctrl = NULL;
+ usec_t timeout = 60 * USEC_PER_SEC;
+ int c, r;
+
+ enum {
+ ARG_PING = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "exit", no_argument, NULL, 'e' },
+ { "log-priority", required_argument, NULL, 'l' },
+ { "stop-exec-queue", no_argument, NULL, 's' },
+ { "start-exec-queue", no_argument, NULL, 'S' },
+ { "reload", no_argument, NULL, 'R' },
+ { "reload-rules", no_argument, NULL, 'R' }, /* alias for -R */
+ { "property", required_argument, NULL, 'p' },
+ { "env", required_argument, NULL, 'p' }, /* alias for -p */
+ { "children-max", required_argument, NULL, 'm' },
+ { "ping", no_argument, NULL, ARG_PING },
+ { "timeout", required_argument, NULL, 't' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ r = must_be_root();
+ if (r < 0)
+ return r;
+
+ if (running_in_chroot() > 0) {
+ log_info("Running in chroot, ignoring request.");
+ return 0;
+ }
+
+ if (argc <= 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This command expects one or more options.");
+
+ uctrl = udev_ctrl_new();
+ if (!uctrl)
+ return log_oom();
+
+ while ((c = getopt_long(argc, argv, "el:sSRp:m:t:Vh", options, NULL)) >= 0)
+ switch (c) {
+ case 'e':
+ r = udev_ctrl_send_exit(uctrl, timeout);
+ if (r < 0)
+ return r;
+ break;
+ case 'l':
+ r = log_level_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log priority '%s': %m", optarg);
+
+ r = udev_ctrl_send_set_log_level(uctrl, r, timeout);
+ if (r < 0)
+ return r;
+ break;
+ case 's':
+ r = udev_ctrl_send_stop_exec_queue(uctrl, timeout);
+ if (r < 0)
+ return r;
+ break;
+ case 'S':
+ r = udev_ctrl_send_start_exec_queue(uctrl, timeout);
+ if (r < 0)
+ return r;
+ break;
+ case 'R':
+ r = udev_ctrl_send_reload(uctrl, timeout);
+ if (r < 0)
+ return r;
+ break;
+ case 'p':
+ if (!strchr(optarg, '='))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "expect <KEY>=<value> instead of '%s'", optarg);
+
+ r = udev_ctrl_send_set_env(uctrl, optarg, timeout);
+ if (r < 0)
+ return r;
+ break;
+ case 'm': {
+ unsigned i;
+
+ r = safe_atou(optarg, &i);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse maximum number of events '%s': %m", optarg);
+
+ r = udev_ctrl_send_set_children_max(uctrl, i, timeout);
+ if (r < 0)
+ return r;
+ break;
+ }
+ case ARG_PING:
+ r = udev_ctrl_send_ping(uctrl, timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to udev daemon: %m");
+ break;
+ case 't':
+ r = parse_sec(optarg, &timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timeout value '%s': %m", optarg);
+ break;
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option.");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Extraneous argument: %s", argv[optind]);
+
+ return 0;
+}
diff --git a/src/udev/udevadm-hwdb.c b/src/udev/udevadm-hwdb.c
new file mode 100644
index 0000000..7f8960f
--- /dev/null
+++ b/src/udev/udevadm-hwdb.c
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <getopt.h>
+
+#include "hwdb-util.h"
+#include "udevadm.h"
+#include "util.h"
+
+static const char *arg_test = NULL;
+static const char *arg_root = NULL;
+static const char *arg_hwdb_bin_dir = NULL;
+static bool arg_update = false;
+static bool arg_strict = false;
+
+static int help(void) {
+ printf("%s hwdb [OPTIONS]\n\n"
+ " -h --help Print this message\n"
+ " -V --version Print version of the program\n"
+ " -u --update Update the hardware database\n"
+ " -s --strict When updating, return non-zero exit value on any parsing error\n"
+ " --usr Generate in " UDEVLIBEXECDIR " instead of /etc/udev\n"
+ " -t --test=MODALIAS Query database and print result\n"
+ " -r --root=PATH Alternative root path in the filesystem\n\n"
+ "NOTE:\n"
+ "The sub-command 'hwdb' is deprecated, and is left for backwards compatibility.\n"
+ "Please use systemd-hwdb instead.\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_USR = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "update", no_argument, NULL, 'u' },
+ { "usr", no_argument, NULL, ARG_USR },
+ { "strict", no_argument, NULL, 's' },
+ { "test", required_argument, NULL, 't' },
+ { "root", required_argument, NULL, 'r' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ int c;
+
+ while ((c = getopt_long(argc, argv, "ust:r:Vh", options, NULL)) >= 0)
+ switch(c) {
+ case 'u':
+ arg_update = true;
+ break;
+ case ARG_USR:
+ arg_hwdb_bin_dir = UDEVLIBEXECDIR;
+ break;
+ case 's':
+ arg_strict = true;
+ break;
+ case 't':
+ arg_test = optarg;
+ break;
+ case 'r':
+ arg_root = optarg;
+ break;
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ return 1;
+}
+
+int hwdb_main(int argc, char *argv[], void *userdata) {
+ int r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (!arg_update && !arg_test)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Either --update or --test must be used.");
+
+ if (arg_update) {
+ r = hwdb_update(arg_root, arg_hwdb_bin_dir, arg_strict, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_test)
+ return hwdb_query(arg_test);
+
+ return 0;
+}
diff --git a/src/udev/udevadm-info.c b/src/udev/udevadm-info.c
new file mode 100644
index 0000000..ebd15d3
--- /dev/null
+++ b/src/udev/udevadm-info.c
@@ -0,0 +1,470 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-enumerator-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "string-util.h"
+#include "udevadm.h"
+#include "udevadm-util.h"
+
+typedef enum ActionType {
+ ACTION_QUERY,
+ ACTION_ATTRIBUTE_WALK,
+ ACTION_DEVICE_ID_FILE,
+} ActionType;
+
+typedef enum QueryType {
+ QUERY_NAME,
+ QUERY_PATH,
+ QUERY_SYMLINK,
+ QUERY_PROPERTY,
+ QUERY_ALL,
+} QueryType;
+
+static bool arg_root = false;
+static bool arg_export = false;
+static const char *arg_export_prefix = NULL;
+
+static bool skip_attribute(const char *name) {
+ static const char* const skip[] = {
+ "uevent",
+ "dev",
+ "modalias",
+ "resource",
+ "driver",
+ "subsystem",
+ "module",
+ };
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(skip); i++)
+ if (streq(name, skip[i]))
+ return true;
+ return false;
+}
+
+static void print_all_attributes(sd_device *device, const char *key) {
+ const char *name, *value;
+
+ FOREACH_DEVICE_SYSATTR(device, name) {
+ size_t len;
+
+ if (skip_attribute(name))
+ continue;
+
+ if (sd_device_get_sysattr_value(device, name, &value) < 0)
+ continue;
+
+ /* skip any values that look like a path */
+ if (value[0] == '/')
+ continue;
+
+ /* skip nonprintable attributes */
+ len = strlen(value);
+ while (len > 0 && isprint(value[len-1]))
+ len--;
+ if (len > 0)
+ continue;
+
+ printf(" %s{%s}==\"%s\"\n", key, name, value);
+ }
+ puts("");
+}
+
+static int print_device_chain(sd_device *device) {
+ sd_device *child, *parent;
+ const char *str;
+
+ printf("\n"
+ "Udevadm info starts with the device specified by the devpath and then\n"
+ "walks up the chain of parent devices. It prints for every device\n"
+ "found, all possible attributes in the udev rules key format.\n"
+ "A rule to match, can be composed by the attributes of the device\n"
+ "and the attributes from one single parent device.\n"
+ "\n");
+
+ (void) sd_device_get_devpath(device, &str);
+ printf(" looking at device '%s':\n", str);
+ (void) sd_device_get_sysname(device, &str);
+ printf(" KERNEL==\"%s\"\n", str);
+ if (sd_device_get_subsystem(device, &str) < 0)
+ str = "";
+ printf(" SUBSYSTEM==\"%s\"\n", str);
+ if (sd_device_get_driver(device, &str) < 0)
+ str = "";
+ printf(" DRIVER==\"%s\"\n", str);
+ print_all_attributes(device, "ATTR");
+
+ for (child = device; sd_device_get_parent(child, &parent) >= 0; child = parent) {
+ (void) sd_device_get_devpath(parent, &str);
+ printf(" looking at parent device '%s':\n", str);
+ (void) sd_device_get_sysname(parent, &str);
+ printf(" KERNELS==\"%s\"\n", str);
+ if (sd_device_get_subsystem(parent, &str) < 0)
+ str = "";
+ printf(" SUBSYSTEMS==\"%s\"\n", str);
+ if (sd_device_get_driver(parent, &str) < 0)
+ str = "";
+ printf(" DRIVERS==\"%s\"\n", str);
+ print_all_attributes(parent, "ATTRS");
+ }
+
+ return 0;
+}
+
+static int print_record(sd_device *device) {
+ const char *str, *val;
+ int i;
+
+ (void) sd_device_get_devpath(device, &str);
+ printf("P: %s\n", str);
+
+ if (sd_device_get_devname(device, &str) >= 0) {
+ assert_se(val = path_startswith(str, "/dev/"));
+ printf("N: %s\n", val);
+ }
+
+ if (device_get_devlink_priority(device, &i) >= 0)
+ printf("L: %i\n", i);
+
+ FOREACH_DEVICE_DEVLINK(device, str) {
+ assert_se(val = path_startswith(str, "/dev/"));
+ printf("S: %s\n", val);
+ }
+
+ FOREACH_DEVICE_PROPERTY(device, str, val)
+ printf("E: %s=%s\n", str, val);
+
+ puts("");
+ return 0;
+}
+
+static int stat_device(const char *name, bool export, const char *prefix) {
+ struct stat statbuf;
+
+ if (stat(name, &statbuf) != 0)
+ return -errno;
+
+ if (export) {
+ if (!prefix)
+ prefix = "INFO_";
+ printf("%sMAJOR=%u\n"
+ "%sMINOR=%u\n",
+ prefix, major(statbuf.st_dev),
+ prefix, minor(statbuf.st_dev));
+ } else
+ printf("%u:%u\n", major(statbuf.st_dev), minor(statbuf.st_dev));
+ return 0;
+}
+
+static int export_devices(void) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = device_enumerator_scan_devices(e);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE_AND_SUBSYSTEM(e, d)
+ print_record(d);
+
+ return 0;
+}
+
+static void cleanup_dir(DIR *dir, mode_t mask, int depth) {
+ struct dirent *dent;
+
+ if (depth <= 0)
+ return;
+
+ FOREACH_DIRENT_ALL(dent, dir, break) {
+ struct stat stats;
+
+ if (dent->d_name[0] == '.')
+ continue;
+ if (fstatat(dirfd(dir), dent->d_name, &stats, AT_SYMLINK_NOFOLLOW) != 0)
+ continue;
+ if ((stats.st_mode & mask) != 0)
+ continue;
+ if (S_ISDIR(stats.st_mode)) {
+ _cleanup_closedir_ DIR *dir2 = NULL;
+
+ dir2 = fdopendir(openat(dirfd(dir), dent->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC));
+ if (dir2)
+ cleanup_dir(dir2, mask, depth-1);
+
+ (void) unlinkat(dirfd(dir), dent->d_name, AT_REMOVEDIR);
+ } else
+ (void) unlinkat(dirfd(dir), dent->d_name, 0);
+ }
+}
+
+static void cleanup_db(void) {
+ _cleanup_closedir_ DIR *dir1 = NULL, *dir2 = NULL, *dir3 = NULL, *dir4 = NULL, *dir5 = NULL;
+
+ (void) unlink("/run/udev/queue.bin");
+
+ dir1 = opendir("/run/udev/data");
+ if (dir1)
+ cleanup_dir(dir1, S_ISVTX, 1);
+
+ dir2 = opendir("/run/udev/links");
+ if (dir2)
+ cleanup_dir(dir2, 0, 2);
+
+ dir3 = opendir("/run/udev/tags");
+ if (dir3)
+ cleanup_dir(dir3, 0, 2);
+
+ dir4 = opendir("/run/udev/static_node-tags");
+ if (dir4)
+ cleanup_dir(dir4, 0, 2);
+
+ dir5 = opendir("/run/udev/watch");
+ if (dir5)
+ cleanup_dir(dir5, 0, 1);
+}
+
+static int query_device(QueryType query, sd_device* device) {
+ int r;
+
+ assert(device);
+
+ switch(query) {
+ case QUERY_NAME: {
+ const char *node;
+
+ r = sd_device_get_devname(device, &node);
+ if (r < 0)
+ return log_error_errno(r, "No device node found: %m");
+
+ if (!arg_root)
+ assert_se(node = path_startswith(node, "/dev/"));
+ printf("%s\n", node);
+ return 0;
+ }
+
+ case QUERY_SYMLINK: {
+ const char *devlink, *prefix = "";
+
+ FOREACH_DEVICE_DEVLINK(device, devlink) {
+ if (!arg_root)
+ assert_se(devlink = path_startswith(devlink, "/dev/"));
+ printf("%s%s", prefix, devlink);
+ prefix = " ";
+ }
+ puts("");
+ return 0;
+ }
+
+ case QUERY_PATH: {
+ const char *devpath;
+
+ r = sd_device_get_devpath(device, &devpath);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device path: %m");
+
+ printf("%s\n", devpath);
+ return 0;
+ }
+
+ case QUERY_PROPERTY: {
+ const char *key, *value;
+
+ FOREACH_DEVICE_PROPERTY(device, key, value)
+ if (arg_export)
+ printf("%s%s='%s'\n", strempty(arg_export_prefix), key, value);
+ else
+ printf("%s=%s\n", key, value);
+ return 0;
+ }
+
+ case QUERY_ALL:
+ return print_record(device);
+ }
+
+ assert_not_reached("unknown query type");
+ return 0;
+}
+
+static int help(void) {
+ printf("%s info [OPTIONS] [DEVPATH|FILE]\n\n"
+ "Query sysfs or the udev database.\n\n"
+ " -h --help Print this message\n"
+ " -V --version Print version of the program\n"
+ " -q --query=TYPE Query device information:\n"
+ " name Name of device node\n"
+ " symlink Pointing to node\n"
+ " path sysfs device path\n"
+ " property The device properties\n"
+ " all All values\n"
+ " -p --path=SYSPATH sysfs device path used for query or attribute walk\n"
+ " -n --name=NAME Node or symlink name used for query or attribute walk\n"
+ " -r --root Prepend dev directory to path names\n"
+ " -a --attribute-walk Print all key matches walking along the chain\n"
+ " of parent devices\n"
+ " -d --device-id-of-file=FILE Print major:minor of device containing this file\n"
+ " -x --export Export key/value pairs\n"
+ " -P --export-prefix Export the key name with a prefix\n"
+ " -e --export-db Export the content of the udev database\n"
+ " -c --cleanup-db Clean up the udev database\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+int info_main(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **devices = NULL;
+ _cleanup_free_ char *name = NULL;
+ int c, r;
+
+ static const struct option options[] = {
+ { "name", required_argument, NULL, 'n' },
+ { "path", required_argument, NULL, 'p' },
+ { "query", required_argument, NULL, 'q' },
+ { "attribute-walk", no_argument, NULL, 'a' },
+ { "cleanup-db", no_argument, NULL, 'c' },
+ { "export-db", no_argument, NULL, 'e' },
+ { "root", no_argument, NULL, 'r' },
+ { "device-id-of-file", required_argument, NULL, 'd' },
+ { "export", no_argument, NULL, 'x' },
+ { "export-prefix", required_argument, NULL, 'P' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ ActionType action = ACTION_QUERY;
+ QueryType query = QUERY_ALL;
+
+ while ((c = getopt_long(argc, argv, "aced:n:p:q:rxP:RVh", options, NULL)) >= 0)
+ switch (c) {
+ case 'n':
+ case 'p': {
+ const char *prefix = c == 'n' ? "/dev/" : "/sys/";
+ char *path;
+
+ path = path_join(path_startswith(optarg, prefix) ? NULL : prefix, optarg);
+ if (!path)
+ return log_oom();
+
+ r = strv_consume(&devices, path);
+ if (r < 0)
+ return log_oom();
+ break;
+ }
+
+ case 'q':
+ action = ACTION_QUERY;
+ if (streq(optarg, "property") || streq(optarg, "env"))
+ query = QUERY_PROPERTY;
+ else if (streq(optarg, "name"))
+ query = QUERY_NAME;
+ else if (streq(optarg, "symlink"))
+ query = QUERY_SYMLINK;
+ else if (streq(optarg, "path"))
+ query = QUERY_PATH;
+ else if (streq(optarg, "all"))
+ query = QUERY_ALL;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "unknown query type");
+ break;
+ case 'r':
+ arg_root = true;
+ break;
+ case 'd':
+ action = ACTION_DEVICE_ID_FILE;
+ r = free_and_strdup(&name, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+ case 'a':
+ action = ACTION_ATTRIBUTE_WALK;
+ break;
+ case 'e':
+ return export_devices();
+ case 'c':
+ cleanup_db();
+ return 0;
+ case 'x':
+ arg_export = true;
+ break;
+ case 'P':
+ arg_export = true;
+ arg_export_prefix = optarg;
+ break;
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ if (action == ACTION_DEVICE_ID_FILE) {
+ if (argv[optind])
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Positional arguments are not allowed with -d/--device-id-of-file.");
+ assert(name);
+ return stat_device(name, arg_export, arg_export_prefix);
+ }
+
+ r = strv_extend_strv(&devices, argv + optind, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build argument list: %m");
+
+ if (strv_isempty(devices))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "A device name or path is required");
+ if (action == ACTION_ATTRIBUTE_WALK && strv_length(devices) > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Only one device may be specified with -a/--attribute-walk");
+
+ char **p;
+ STRV_FOREACH(p, devices) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+
+ r = find_device(*p, NULL, &device);
+ if (r == -EINVAL)
+ return log_error_errno(r, "Bad argument \"%s\", expected an absolute path in /dev/ or /sys or a unit name: %m", *p);
+ if (r < 0)
+ return log_error_errno(r, "Unknown device \"%s\": %m", *p);
+
+ if (action == ACTION_QUERY)
+ r = query_device(query, device);
+ else if (action == ACTION_ATTRIBUTE_WALK)
+ r = print_device_chain(device);
+ else
+ assert_not_reached("Unknown action");
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/udev/udevadm-monitor.c b/src/udev/udevadm-monitor.c
new file mode 100644
index 0000000..3dde3f3
--- /dev/null
+++ b/src/udev/udevadm-monitor.c
@@ -0,0 +1,263 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "device-monitor-private.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "set.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "udevadm.h"
+#include "virt.h"
+
+static bool arg_show_property = false;
+static bool arg_print_kernel = false;
+static bool arg_print_udev = false;
+static Set *arg_tag_filter = NULL;
+static Hashmap *arg_subsystem_filter = NULL;
+
+static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ const char *action = NULL, *devpath = NULL, *subsystem = NULL;
+ MonitorNetlinkGroup group = PTR_TO_INT(userdata);
+ struct timespec ts;
+
+ assert(device);
+ assert(IN_SET(group, MONITOR_GROUP_UDEV, MONITOR_GROUP_KERNEL));
+
+ (void) sd_device_get_property_value(device, "ACTION", &action);
+ (void) sd_device_get_devpath(device, &devpath);
+ (void) sd_device_get_subsystem(device, &subsystem);
+
+ assert_se(clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
+
+ printf("%-6s[%"PRI_TIME".%06"PRI_NSEC"] %-8s %s (%s)\n",
+ group == MONITOR_GROUP_UDEV ? "UDEV" : "KERNEL",
+ ts.tv_sec, (nsec_t)ts.tv_nsec/1000,
+ action, devpath, subsystem);
+
+ if (arg_show_property) {
+ const char *key, *value;
+
+ FOREACH_DEVICE_PROPERTY(device, key, value)
+ printf("%s=%s\n", key, value);
+
+ printf("\n");
+ }
+
+ return 0;
+}
+
+static int setup_monitor(MonitorNetlinkGroup sender, sd_event *event, sd_device_monitor **ret) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
+ const char *subsystem, *devtype, *tag;
+ Iterator i;
+ int r;
+
+ r = device_monitor_new_full(&monitor, sender, -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create netlink socket: %m");
+
+ (void) sd_device_monitor_set_receive_buffer_size(monitor, 128*1024*1024);
+
+ r = sd_device_monitor_attach_event(monitor, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event: %m");
+
+ HASHMAP_FOREACH_KEY(devtype, subsystem, arg_subsystem_filter, i) {
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, subsystem, devtype);
+ if (r < 0)
+ return log_error_errno(r, "Failed to apply subsystem filter '%s%s%s': %m",
+ subsystem, devtype ? "/" : "", strempty(devtype));
+ }
+
+ SET_FOREACH(tag, arg_tag_filter, i) {
+ r = sd_device_monitor_filter_add_match_tag(monitor, tag);
+ if (r < 0)
+ return log_error_errno(r, "Failed to apply tag filter '%s': %m", tag);
+ }
+
+ r = sd_device_monitor_start(monitor, device_monitor_handler, INT_TO_PTR(sender));
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(monitor),
+ sender == MONITOR_GROUP_UDEV ? "device-monitor-udev" : "device-monitor-kernel");
+
+ *ret = TAKE_PTR(monitor);
+ return 0;
+}
+
+static int help(void) {
+ printf("%s monitor [OPTIONS]\n\n"
+ "Listen to kernel and udev events.\n\n"
+ " -h --help Show this help\n"
+ " -V --version Show package version\n"
+ " -p --property Print the event properties\n"
+ " -k --kernel Print kernel uevents\n"
+ " -u --udev Print udev events\n"
+ " -s --subsystem-match=SUBSYSTEM[/DEVTYPE] Filter events by subsystem\n"
+ " -t --tag-match=TAG Filter events by tag\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "property", no_argument, NULL, 'p' },
+ { "environment", no_argument, NULL, 'e' }, /* alias for -p */
+ { "kernel", no_argument, NULL, 'k' },
+ { "udev", no_argument, NULL, 'u' },
+ { "subsystem-match", required_argument, NULL, 's' },
+ { "tag-match", required_argument, NULL, 't' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ int r, c;
+
+ while ((c = getopt_long(argc, argv, "pekus:t:Vh", options, NULL)) >= 0)
+ switch (c) {
+ case 'p':
+ case 'e':
+ arg_show_property = true;
+ break;
+ case 'k':
+ arg_print_kernel = true;
+ break;
+ case 'u':
+ arg_print_udev = true;
+ break;
+ case 's': {
+ _cleanup_free_ char *subsystem = NULL, *devtype = NULL;
+ const char *slash;
+
+ slash = strchr(optarg, '/');
+ if (slash) {
+ devtype = strdup(slash + 1);
+ if (!devtype)
+ return -ENOMEM;
+
+ subsystem = strndup(optarg, slash - optarg);
+ } else
+ subsystem = strdup(optarg);
+
+ if (!subsystem)
+ return -ENOMEM;
+
+ r = hashmap_ensure_allocated(&arg_subsystem_filter, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(arg_subsystem_filter, subsystem, devtype);
+ if (r < 0)
+ return r;
+
+ subsystem = devtype = NULL;
+ break;
+ }
+ case 't': {
+ _cleanup_free_ char *tag = NULL;
+
+ r = set_ensure_allocated(&arg_tag_filter, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ tag = strdup(optarg);
+ if (!tag)
+ return -ENOMEM;
+
+ r = set_put(arg_tag_filter, tag);
+ if (r < 0)
+ return r;
+
+ tag = NULL;
+ break;
+ }
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option.");
+ }
+
+ if (!arg_print_kernel && !arg_print_udev) {
+ arg_print_kernel = true;
+ arg_print_udev = true;
+ }
+
+ return 1;
+}
+
+int monitor_main(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *kernel_monitor = NULL, *udev_monitor = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ int r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ goto finalize;
+
+ if (running_in_chroot() > 0) {
+ log_info("Running in chroot, ignoring request.");
+ return 0;
+ }
+
+ /* Callers are expecting to see events as they happen: Line buffering */
+ setlinebuf(stdout);
+
+ r = sd_event_default(&event);
+ if (r < 0) {
+ log_error_errno(r, "Failed to initialize event: %m");
+ goto finalize;
+ }
+
+ assert_se(sigprocmask_many(SIG_UNBLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+
+ printf("monitor will print the received events for:\n");
+ if (arg_print_udev) {
+ r = setup_monitor(MONITOR_GROUP_UDEV, event, &udev_monitor);
+ if (r < 0)
+ goto finalize;
+
+ printf("UDEV - the event which udev sends out after rule processing\n");
+ }
+
+ if (arg_print_kernel) {
+ r = setup_monitor(MONITOR_GROUP_KERNEL, event, &kernel_monitor);
+ if (r < 0)
+ goto finalize;
+
+ printf("KERNEL - the kernel uevent\n");
+ }
+ printf("\n");
+
+ r = sd_event_loop(event);
+ if (r < 0) {
+ log_error_errno(r, "Failed to run event loop: %m");
+ goto finalize;
+ }
+
+ r = 0;
+
+finalize:
+ hashmap_free_free_free(arg_subsystem_filter);
+ set_free_free(arg_tag_filter);
+
+ return r;
+}
diff --git a/src/udev/udevadm-settle.c b/src/udev/udevadm-settle.c
new file mode 100644
index 0000000..8d9c450
--- /dev/null
+++ b/src/udev/udevadm-settle.c
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright © 2009 Canonical Ltd.
+ * Copyright © 2009 Scott James Remnant <scott@netsplit.com>
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "libudev-util.h"
+#include "time-util.h"
+#include "udevadm.h"
+#include "udev-ctrl.h"
+#include "util.h"
+#include "virt.h"
+
+static usec_t arg_timeout = 120 * USEC_PER_SEC;
+static const char *arg_exists = NULL;
+
+static int help(void) {
+ printf("%s settle [OPTIONS]\n\n"
+ "Wait for pending udev events.\n\n"
+ " -h --help Show this help\n"
+ " -V --version Show package version\n"
+ " -t --timeout=SEC Maximum time to wait for events\n"
+ " -E --exit-if-exists=FILE Stop waiting if file exists\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "timeout", required_argument, NULL, 't' },
+ { "exit-if-exists", required_argument, NULL, 'E' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ { "seq-start", required_argument, NULL, 's' }, /* removed */
+ { "seq-end", required_argument, NULL, 'e' }, /* removed */
+ { "quiet", no_argument, NULL, 'q' }, /* removed */
+ {}
+ };
+
+ int c, r;
+
+ while ((c = getopt_long(argc, argv, "t:E:Vhs:e:q", options, NULL)) >= 0) {
+ switch (c) {
+ case 't':
+ r = parse_sec(optarg, &arg_timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timeout value '%s': %m", optarg);
+ break;
+ case 'E':
+ arg_exists = optarg;
+ break;
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case 's':
+ case 'e':
+ case 'q':
+ return log_info_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option -%c no longer supported.",
+ c);
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option.");
+ }
+ }
+
+ return 1;
+}
+
+int settle_main(int argc, char *argv[], void *userdata) {
+ _cleanup_(udev_queue_unrefp) struct udev_queue *queue = NULL;
+ struct pollfd pfd;
+ usec_t deadline;
+ int r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (running_in_chroot() > 0) {
+ log_info("Running in chroot, ignoring request.");
+ return 0;
+ }
+
+ deadline = now(CLOCK_MONOTONIC) + arg_timeout;
+
+ /* guarantee that the udev daemon isn't pre-processing */
+ if (getuid() == 0) {
+ _cleanup_(udev_ctrl_unrefp) struct udev_ctrl *uctrl = NULL;
+
+ uctrl = udev_ctrl_new();
+ if (uctrl) {
+ r = udev_ctrl_send_ping(uctrl, MAX(5 * USEC_PER_SEC, arg_timeout));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to connect to udev daemon: %m");
+ return 0;
+ }
+ }
+ }
+
+ queue = udev_queue_new(NULL);
+ if (!queue)
+ return log_error_errno(errno, "Failed to get udev queue: %m");
+
+ r = udev_queue_get_fd(queue);
+ if (r < 0) {
+ log_debug_errno(r, "Queue is empty, nothing to watch.");
+ return 0;
+ }
+
+ pfd = (struct pollfd) {
+ .events = POLLIN,
+ .fd = r,
+ };
+
+ for (;;) {
+ if (arg_exists && access(arg_exists, F_OK) >= 0)
+ return 0;
+
+ /* exit if queue is empty */
+ if (udev_queue_get_queue_is_empty(queue))
+ return 0;
+
+ if (now(CLOCK_MONOTONIC) >= deadline)
+ return -ETIMEDOUT;
+
+ /* wake up when queue becomes empty */
+ if (poll(&pfd, 1, MSEC_PER_SEC) > 0 && pfd.revents & POLLIN) {
+ r = udev_queue_flush(queue);
+ if (r < 0)
+ return log_error_errno(r, "Failed to flush queue: %m");
+ }
+ }
+}
diff --git a/src/udev/udevadm-test-builtin.c b/src/udev/udevadm-test-builtin.c
new file mode 100644
index 0000000..22c3184
--- /dev/null
+++ b/src/udev/udevadm-test-builtin.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "log.h"
+#include "udev-builtin.h"
+#include "udevadm.h"
+#include "udevadm-util.h"
+
+static const char *arg_command = NULL;
+static const char *arg_syspath = NULL;
+
+static int help(void) {
+ printf("%s test-builtin [OPTIONS] COMMAND DEVPATH\n\n"
+ "Test a built-in command.\n\n"
+ " -h --help Print this message\n"
+ " -V --version Print version of the program\n\n"
+ "Commands:\n"
+ , program_invocation_short_name);
+
+ udev_builtin_list();
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ int c;
+
+ while ((c = getopt_long(argc, argv, "Vh", options, NULL)) >= 0)
+ switch (c) {
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ arg_command = argv[optind++];
+ if (!arg_command)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Command missing.");
+
+ arg_syspath = argv[optind++];
+ if (!arg_syspath)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "syspath missing.");
+
+ return 1;
+}
+
+int builtin_main(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ enum udev_builtin_cmd cmd;
+ int r;
+
+ log_set_max_level(LOG_DEBUG);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ udev_builtin_init();
+
+ cmd = udev_builtin_lookup(arg_command);
+ if (cmd < 0) {
+ log_error("Unknown command '%s'", arg_command);
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = find_device(arg_syspath, "/sys", &dev);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open device '%s': %m", arg_syspath);
+ goto finish;
+ }
+
+ r = udev_builtin_run(dev, cmd, arg_command, true);
+ if (r < 0)
+ log_debug_errno(r, "Builtin command '%s' fails: %m", arg_command);
+
+finish:
+ udev_builtin_exit();
+ return r;
+}
diff --git a/src/udev/udevadm-test.c b/src/udev/udevadm-test.c
new file mode 100644
index 0000000..54c525e
--- /dev/null
+++ b/src/udev/udevadm-test.c
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright © 2003-2004 Greg Kroah-Hartman <greg@kroah.com>
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/signalfd.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "device-private.h"
+#include "device-util.h"
+#include "libudev-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+#include "udev.h"
+#include "udevadm.h"
+
+static const char *arg_action = "add";
+static ResolveNameTiming arg_resolve_name_timing = RESOLVE_NAME_EARLY;
+static char arg_syspath[UTIL_PATH_SIZE] = {};
+
+static int help(void) {
+
+ printf("%s test [OPTIONS] DEVPATH\n\n"
+ "Test an event run.\n\n"
+ " -h --help Show this help\n"
+ " -V --version Show package version\n"
+ " -a --action=ACTION Set action string\n"
+ " -N --resolve-names=early|late|never When to resolve names\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "action", required_argument, NULL, 'a' },
+ { "resolve-names", required_argument, NULL, 'N' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ int c;
+
+ while ((c = getopt_long(argc, argv, "a:N:Vh", options, NULL)) >= 0)
+ switch (c) {
+ case 'a':
+ arg_action = optarg;
+ break;
+ case 'N':
+ arg_resolve_name_timing = resolve_name_timing_from_string(optarg);
+ if (arg_resolve_name_timing < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--resolve-names= must be early, late or never");
+ break;
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ if (!argv[optind])
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "syspath parameter missing.");
+
+ /* add /sys if needed */
+ if (!startswith(argv[optind], "/sys"))
+ strscpyl(arg_syspath, sizeof(arg_syspath), "/sys", argv[optind], NULL);
+ else
+ strscpy(arg_syspath, sizeof(arg_syspath), argv[optind]);
+
+ return 1;
+}
+
+int test_main(int argc, char *argv[], void *userdata) {
+ _cleanup_(udev_rules_freep) UdevRules *rules = NULL;
+ _cleanup_(udev_event_freep) UdevEvent *event = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ const char *cmd, *key, *value;
+ sigset_t mask, sigmask_orig;
+ Iterator i;
+ void *val;
+ int r;
+
+ log_set_max_level(LOG_DEBUG);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ printf("This program is for debugging only, it does not run any program\n"
+ "specified by a RUN key. It may show incorrect results, because\n"
+ "some values may be different, or not available at a simulation run.\n"
+ "\n");
+
+ assert_se(sigprocmask(SIG_SETMASK, NULL, &sigmask_orig) >= 0);
+
+ udev_builtin_init();
+
+ r = udev_rules_new(&rules, arg_resolve_name_timing);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read udev rules: %m");
+ goto out;
+ }
+
+ r = device_new_from_synthetic_event(&dev, arg_syspath, arg_action);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open device '%s': %m", arg_syspath);
+ goto out;
+ }
+
+ /* don't read info from the db */
+ device_seal(dev);
+
+ event = udev_event_new(dev, 0, NULL);
+
+ assert_se(sigfillset(&mask) >= 0);
+ assert_se(sigprocmask(SIG_SETMASK, &mask, &sigmask_orig) >= 0);
+
+ udev_event_execute_rules(event, 60 * USEC_PER_SEC, NULL, rules);
+
+ FOREACH_DEVICE_PROPERTY(dev, key, value)
+ printf("%s=%s\n", key, value);
+
+ HASHMAP_FOREACH_KEY(val, cmd, event->run_list, i) {
+ char program[UTIL_PATH_SIZE];
+
+ udev_event_apply_format(event, cmd, program, sizeof(program), false);
+ printf("run: '%s'\n", program);
+ }
+
+ r = 0;
+out:
+ udev_builtin_exit();
+ return r;
+}
diff --git a/src/udev/udevadm-trigger.c b/src/udev/udevadm-trigger.c
new file mode 100644
index 0000000..9532946
--- /dev/null
+++ b/src/udev/udevadm-trigger.c
@@ -0,0 +1,379 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <errno.h>
+#include <getopt.h>
+
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "device-enumerator-private.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+#include "udevadm.h"
+#include "udevadm-util.h"
+#include "udev-ctrl.h"
+#include "virt.h"
+
+static bool arg_verbose = false;
+static bool arg_dry_run = false;
+
+static int exec_list(sd_device_enumerator *e, const char *action, Set *settle_set) {
+ sd_device *d;
+ int r;
+
+ FOREACH_DEVICE_AND_SUBSYSTEM(e, d) {
+ _cleanup_free_ char *filename = NULL;
+ const char *syspath;
+
+ if (sd_device_get_syspath(d, &syspath) < 0)
+ continue;
+
+ if (arg_verbose)
+ printf("%s\n", syspath);
+ if (arg_dry_run)
+ continue;
+
+ filename = path_join(syspath, "uevent");
+ if (!filename)
+ return log_oom();
+
+ r = write_string_file(filename, action, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to write '%s' to '%s', ignoring: %m", action, filename);
+ continue;
+ }
+
+ if (settle_set) {
+ r = set_put_strdup(settle_set, syspath);
+ if (r < 0)
+ return log_oom();
+ }
+ }
+
+ return 0;
+}
+
+static int device_monitor_handler(sd_device_monitor *m, sd_device *dev, void *userdata) {
+ Set *settle_set = userdata;
+ const char *syspath;
+
+ assert(dev);
+ assert(settle_set);
+
+ if (sd_device_get_syspath(dev, &syspath) < 0)
+ return 0;
+
+ if (arg_verbose)
+ printf("settle %s\n", syspath);
+
+ if (!set_remove(settle_set, syspath))
+ log_debug("Got epoll event on syspath %s not present in syspath set", syspath);
+
+ if (set_isempty(settle_set))
+ return sd_event_exit(sd_device_monitor_get_event(m), 0);
+
+ return 0;
+}
+
+static char* keyval(const char *str, const char **key, const char **val) {
+ char *buf, *pos;
+
+ buf = strdup(str);
+ if (!buf)
+ return NULL;
+
+ pos = strchr(buf, '=');
+ if (pos) {
+ pos[0] = 0;
+ pos++;
+ }
+
+ *key = buf;
+ *val = pos;
+
+ return buf;
+}
+
+static int help(void) {
+ printf("%s trigger [OPTIONS] DEVPATH\n\n"
+ "Request events from the kernel.\n\n"
+ " -h --help Show this help\n"
+ " -V --version Show package version\n"
+ " -v --verbose Print the list of devices while running\n"
+ " -n --dry-run Do not actually trigger the events\n"
+ " -t --type= Type of events to trigger\n"
+ " devices sysfs devices (default)\n"
+ " subsystems sysfs subsystems and drivers\n"
+ " -c --action=ACTION Event action value, default is \"change\"\n"
+ " -s --subsystem-match=SUBSYSTEM Trigger devices from a matching subsystem\n"
+ " -S --subsystem-nomatch=SUBSYSTEM Exclude devices from a matching subsystem\n"
+ " -a --attr-match=FILE[=VALUE] Trigger devices with a matching attribute\n"
+ " -A --attr-nomatch=FILE[=VALUE] Exclude devices with a matching attribute\n"
+ " -p --property-match=KEY=VALUE Trigger devices with a matching property\n"
+ " -g --tag-match=KEY=VALUE Trigger devices with a matching property\n"
+ " -y --sysname-match=NAME Trigger devices with this /sys path\n"
+ " --name-match=NAME Trigger devices with this /dev name\n"
+ " -b --parent-match=NAME Trigger devices with that parent device\n"
+ " -w --settle Wait for the triggered events to complete\n"
+ " --wait-daemon[=SECONDS] Wait for udevd daemon to be initialized\n"
+ " before triggering uevents\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+int trigger_main(int argc, char *argv[], void *userdata) {
+ enum {
+ ARG_NAME = 0x100,
+ ARG_PING,
+ };
+
+ static const struct option options[] = {
+ { "verbose", no_argument, NULL, 'v' },
+ { "dry-run", no_argument, NULL, 'n' },
+ { "type", required_argument, NULL, 't' },
+ { "action", required_argument, NULL, 'c' },
+ { "subsystem-match", required_argument, NULL, 's' },
+ { "subsystem-nomatch", required_argument, NULL, 'S' },
+ { "attr-match", required_argument, NULL, 'a' },
+ { "attr-nomatch", required_argument, NULL, 'A' },
+ { "property-match", required_argument, NULL, 'p' },
+ { "tag-match", required_argument, NULL, 'g' },
+ { "sysname-match", required_argument, NULL, 'y' },
+ { "name-match", required_argument, NULL, ARG_NAME },
+ { "parent-match", required_argument, NULL, 'b' },
+ { "settle", no_argument, NULL, 'w' },
+ { "wait-daemon", optional_argument, NULL, ARG_PING },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+ enum {
+ TYPE_DEVICES,
+ TYPE_SUBSYSTEMS,
+ } device_type = TYPE_DEVICES;
+ const char *action = "change";
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_set_free_free_ Set *settle_set = NULL;
+ usec_t ping_timeout_usec = 5 * USEC_PER_SEC;
+ bool settle = false, ping = false;
+ int c, r;
+
+ if (running_in_chroot() > 0) {
+ log_info("Running in chroot, ignoring request.");
+ return 0;
+ }
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ while ((c = getopt_long(argc, argv, "vnt:c:s:S:a:A:p:g:y:b:wVh", options, NULL)) >= 0) {
+ _cleanup_free_ char *buf = NULL;
+ const char *key, *val;
+
+ switch (c) {
+ case 'v':
+ arg_verbose = true;
+ break;
+ case 'n':
+ arg_dry_run = true;
+ break;
+ case 't':
+ if (streq(optarg, "devices"))
+ device_type = TYPE_DEVICES;
+ else if (streq(optarg, "subsystems"))
+ device_type = TYPE_SUBSYSTEMS;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown type --type=%s", optarg);
+ break;
+ case 'c':
+ if (STR_IN_SET(optarg, "add", "remove", "change"))
+ action = optarg;
+ else
+ log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown action '%s'", optarg);
+
+ break;
+ case 's':
+ r = sd_device_enumerator_add_match_subsystem(e, optarg, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add subsystem match '%s': %m", optarg);
+ break;
+ case 'S':
+ r = sd_device_enumerator_add_match_subsystem(e, optarg, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add negative subsystem match '%s': %m", optarg);
+ break;
+ case 'a':
+ buf = keyval(optarg, &key, &val);
+ if (!buf)
+ return log_oom();
+ r = sd_device_enumerator_add_match_sysattr(e, key, val, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add sysattr match '%s=%s': %m", key, val);
+ break;
+ case 'A':
+ buf = keyval(optarg, &key, &val);
+ if (!buf)
+ return log_oom();
+ r = sd_device_enumerator_add_match_sysattr(e, key, val, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add negative sysattr match '%s=%s': %m", key, val);
+ break;
+ case 'p':
+ buf = keyval(optarg, &key, &val);
+ if (!buf)
+ return log_oom();
+ r = sd_device_enumerator_add_match_property(e, key, val);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add property match '%s=%s': %m", key, val);
+ break;
+ case 'g':
+ r = sd_device_enumerator_add_match_tag(e, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add tag match '%s': %m", optarg);
+ break;
+ case 'y':
+ r = sd_device_enumerator_add_match_sysname(e, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add sysname match '%s': %m", optarg);
+ break;
+ case 'b': {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+
+ r = find_device(optarg, "/sys", &dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open the device '%s': %m", optarg);
+
+ r = sd_device_enumerator_add_match_parent(e, dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add parent match '%s': %m", optarg);
+ break;
+ }
+ case 'w':
+ settle = true;
+ break;
+
+ case ARG_NAME: {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+
+ r = find_device(optarg, "/dev/", &dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open the device '%s': %m", optarg);
+
+ r = sd_device_enumerator_add_match_parent(e, dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add parent match '%s': %m", optarg);
+ break;
+ }
+
+ case ARG_PING: {
+ ping = true;
+ if (optarg) {
+ r = parse_sec(optarg, &ping_timeout_usec);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse timeout value '%s', ignoring: %m", optarg);
+ }
+ break;
+ }
+
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option");
+ }
+ }
+
+ if (!arg_dry_run || ping) {
+ r = must_be_root();
+ if (r < 0)
+ return r;
+ }
+
+ if (ping) {
+ _cleanup_(udev_ctrl_unrefp) struct udev_ctrl *uctrl = NULL;
+
+ uctrl = udev_ctrl_new();
+ if (!uctrl)
+ return log_oom();
+
+ r = udev_ctrl_send_ping(uctrl, ping_timeout_usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to udev daemon: %m");
+ }
+
+ for (; optind < argc; optind++) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+
+ r = find_device(argv[optind], NULL, &dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open the device '%s': %m", argv[optind]);
+
+ r = sd_device_enumerator_add_match_parent(e, dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add parent match '%s': %m", argv[optind]);
+ }
+
+ if (settle) {
+ settle_set = set_new(&string_hash_ops);
+ if (!settle_set)
+ return log_oom();
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default event: %m");
+
+ r = sd_device_monitor_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create device monitor object: %m");
+
+ r = sd_device_monitor_attach_event(m, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event to device monitor: %m");
+
+ r = sd_device_monitor_start(m, device_monitor_handler, settle_set);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+ }
+
+ switch (device_type) {
+ case TYPE_SUBSYSTEMS:
+ r = device_enumerator_scan_subsystems(e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to scan subsystems: %m");
+ break;
+ case TYPE_DEVICES:
+ r = device_enumerator_scan_devices(e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to scan devices: %m");
+ break;
+ default:
+ assert_not_reached("Unknown device type");
+ }
+ r = exec_list(e, action, settle_set);
+ if (r < 0)
+ return r;
+
+ if (event && !set_isempty(settle_set)) {
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+ }
+
+ return 0;
+}
diff --git a/src/udev/udevadm-util.c b/src/udev/udevadm-util.c
new file mode 100644
index 0000000..557982b
--- /dev/null
+++ b/src/udev/udevadm-util.c
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "device-private.h"
+#include "path-util.h"
+#include "udevadm-util.h"
+#include "unit-name.h"
+
+int find_device(const char *id, const char *prefix, sd_device **ret) {
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(id);
+ assert(ret);
+
+ if (prefix) {
+ if (!path_startswith(id, prefix)) {
+ id = path = path_join(prefix, id);
+ if (!path)
+ return -ENOMEM;
+ }
+ } else {
+ /* In cases where the argument is generic (no prefix specified),
+ * check if the argument looks like a device unit name. */
+ if (unit_name_is_valid(id, UNIT_NAME_PLAIN) &&
+ unit_name_to_type(id) == UNIT_DEVICE) {
+ r = unit_name_to_path(id, &path);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to convert \"%s\" to a device path: %m", id);
+ id = path;
+ }
+ }
+
+ if (path_startswith(id, "/sys/"))
+ return sd_device_new_from_syspath(ret, id);
+
+ if (path_startswith(id, "/dev/")) {
+ struct stat st;
+
+ if (stat(id, &st) < 0)
+ return -errno;
+
+ return device_new_from_stat_rdev(ret, &st);
+ }
+
+ return -EINVAL;
+}
diff --git a/src/udev/udevadm-util.h b/src/udev/udevadm-util.h
new file mode 100644
index 0000000..59e8902
--- /dev/null
+++ b/src/udev/udevadm-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#pragma once
+
+#include "sd-device.h"
+
+int find_device(const char *id, const char *prefix, sd_device **ret);
diff --git a/src/udev/udevadm.c b/src/udev/udevadm.c
new file mode 100644
index 0000000..2819431
--- /dev/null
+++ b/src/udev/udevadm.c
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "selinux-util.h"
+#include "string-util.h"
+#include "udevadm.h"
+#include "udev-util.h"
+#include "verbs.h"
+#include "util.h"
+
+static int help(void) {
+ static const char * short_descriptions[][2] = {
+ { "info", "Query sysfs or the udev database" },
+ { "trigger", "Request events from the kernel" },
+ { "settle", "Wait for pending udev events" },
+ { "control", "Control the udev daemon" },
+ { "monitor", "Listen to kernel and udev events" },
+ { "test", "Test an event run" },
+ { "test-builtin", "Test a built-in command" },
+ };
+
+ _cleanup_free_ char *link = NULL;
+ size_t i;
+ int r;
+
+ r = terminal_urlify_man("udevadm", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [--help] [--version] [--debug] COMMAND [COMMAND OPTIONS]\n\n"
+ "Send control commands or test the device manager.\n\n"
+ "Commands:\n"
+ , program_invocation_short_name);
+
+ for (i = 0; i < ELEMENTSOF(short_descriptions); i++)
+ printf(" %-12s %s\n", short_descriptions[i][0], short_descriptions[i][1]);
+
+ printf("\nSee the %s for details.\n", link);
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "debug", no_argument, NULL, 'd' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ {}
+ };
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+dhV", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'd':
+ log_set_max_level(LOG_DEBUG);
+ break;
+
+ case 'h':
+ return help();
+
+ case 'V':
+ return print_version();
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1; /* work to do */
+}
+
+static int version_main(int argc, char *argv[], void *userdata) {
+ return print_version();
+}
+
+static int help_main(int argc, char *argv[], void *userdata) {
+ return help();
+}
+
+static int udevadm_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "info", VERB_ANY, VERB_ANY, 0, info_main },
+ { "trigger", VERB_ANY, VERB_ANY, 0, trigger_main },
+ { "settle", VERB_ANY, VERB_ANY, 0, settle_main },
+ { "control", VERB_ANY, VERB_ANY, 0, control_main },
+ { "monitor", VERB_ANY, VERB_ANY, 0, monitor_main },
+ { "hwdb", VERB_ANY, VERB_ANY, 0, hwdb_main },
+ { "test", VERB_ANY, VERB_ANY, 0, test_main },
+ { "test-builtin", VERB_ANY, VERB_ANY, 0, builtin_main },
+ { "version", VERB_ANY, VERB_ANY, 0, version_main },
+ { "help", VERB_ANY, VERB_ANY, 0, help_main },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ udev_parse_config();
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ log_set_max_level_realm(LOG_REALM_SYSTEMD, log_get_max_level());
+
+ mac_selinux_init();
+ return udevadm_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/udev/udevadm.h b/src/udev/udevadm.h
new file mode 100644
index 0000000..86b24af
--- /dev/null
+++ b/src/udev/udevadm.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#pragma once
+
+#include <stdio.h>
+
+#include "build.h"
+#include "macro.h"
+
+int info_main(int argc, char *argv[], void *userdata);
+int trigger_main(int argc, char *argv[], void *userdata);
+int settle_main(int argc, char *argv[], void *userdata);
+int control_main(int argc, char *argv[], void *userdata);
+int monitor_main(int argc, char *argv[], void *userdata);
+int hwdb_main(int argc, char *argv[], void *userdata);
+int test_main(int argc, char *argv[], void *userdata);
+int builtin_main(int argc, char *argv[], void *userdata);
+
+static inline int print_version(void) {
+ /* Dracut relies on the version being a single integer */
+ puts(STRINGIFY(PROJECT_VERSION));
+ return 0;
+}
diff --git a/src/udev/udevd.c b/src/udev/udevd.c
new file mode 100644
index 0000000..0303f36
--- /dev/null
+++ b/src/udev/udevd.c
@@ -0,0 +1,1855 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright © 2004 Chris Friesen <chris_friesen@sympatico.ca>
+ * Copyright © 2009 Canonical Ltd.
+ * Copyright © 2009 Scott James Remnant <scott@netsplit.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/file.h>
+#include <sys/inotify.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "build.h"
+#include "cgroup-util.h"
+#include "cpu-set-util.h"
+#include "dev-setup.h"
+#include "device-monitor-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "io-util.h"
+#include "libudev-device-internal.h"
+#include "list.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "syslog-util.h"
+#include "udev-builtin.h"
+#include "udev-ctrl.h"
+#include "udev-util.h"
+#include "udev-watch.h"
+#include "udev.h"
+#include "user-util.h"
+
+static bool arg_debug = false;
+static int arg_daemonize = false;
+static ResolveNameTiming arg_resolve_name_timing = RESOLVE_NAME_EARLY;
+static unsigned arg_children_max = 0;
+static usec_t arg_exec_delay_usec = 0;
+static usec_t arg_event_timeout_usec = 180 * USEC_PER_SEC;
+
+typedef struct Manager {
+ sd_event *event;
+ Hashmap *workers;
+ LIST_HEAD(struct event, events);
+ const char *cgroup;
+ pid_t pid; /* the process that originally allocated the manager object */
+
+ UdevRules *rules;
+ Hashmap *properties;
+
+ sd_netlink *rtnl;
+
+ sd_device_monitor *monitor;
+ struct udev_ctrl *ctrl;
+ struct udev_ctrl_connection *ctrl_conn_blocking;
+ int fd_inotify;
+ int worker_watch[2];
+
+ sd_event_source *ctrl_event;
+ sd_event_source *uevent_event;
+ sd_event_source *inotify_event;
+ sd_event_source *kill_workers_event;
+
+ usec_t last_usec;
+
+ bool stop_exec_queue:1;
+ bool exit:1;
+} Manager;
+
+enum event_state {
+ EVENT_UNDEF,
+ EVENT_QUEUED,
+ EVENT_RUNNING,
+};
+
+struct event {
+ Manager *manager;
+ struct worker *worker;
+ enum event_state state;
+
+ sd_device *dev;
+ sd_device *dev_kernel; /* clone of originally received device */
+
+ uint64_t seqnum;
+ uint64_t delaying_seqnum;
+
+ sd_event_source *timeout_warning_event;
+ sd_event_source *timeout_event;
+
+ LIST_FIELDS(struct event, event);
+};
+
+static void event_queue_cleanup(Manager *manager, enum event_state type);
+
+enum worker_state {
+ WORKER_UNDEF,
+ WORKER_RUNNING,
+ WORKER_IDLE,
+ WORKER_KILLED,
+};
+
+struct worker {
+ Manager *manager;
+ pid_t pid;
+ sd_device_monitor *monitor;
+ enum worker_state state;
+ struct event *event;
+};
+
+/* passed from worker to main process */
+struct worker_message {
+};
+
+static void event_free(struct event *event) {
+ if (!event)
+ return;
+
+ assert(event->manager);
+
+ LIST_REMOVE(event, event->manager->events, event);
+ sd_device_unref(event->dev);
+ sd_device_unref(event->dev_kernel);
+
+ sd_event_source_unref(event->timeout_warning_event);
+ sd_event_source_unref(event->timeout_event);
+
+ if (event->worker)
+ event->worker->event = NULL;
+
+ /* only clean up the queue from the process that created it */
+ if (LIST_IS_EMPTY(event->manager->events) &&
+ event->manager->pid == getpid_cached())
+ if (unlink("/run/udev/queue") < 0)
+ log_warning_errno(errno, "Failed to unlink /run/udev/queue: %m");
+
+ free(event);
+}
+
+static void worker_free(struct worker *worker) {
+ if (!worker)
+ return;
+
+ assert(worker->manager);
+
+ hashmap_remove(worker->manager->workers, PID_TO_PTR(worker->pid));
+ sd_device_monitor_unref(worker->monitor);
+ event_free(worker->event);
+
+ free(worker);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct worker *, worker_free);
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(worker_hash_op, void, trivial_hash_func, trivial_compare_func, struct worker, worker_free);
+
+static int worker_new(struct worker **ret, Manager *manager, sd_device_monitor *worker_monitor, pid_t pid) {
+ _cleanup_(worker_freep) struct worker *worker = NULL;
+ int r;
+
+ assert(ret);
+ assert(manager);
+ assert(worker_monitor);
+ assert(pid > 1);
+
+ /* close monitor, but keep address around */
+ device_monitor_disconnect(worker_monitor);
+
+ worker = new(struct worker, 1);
+ if (!worker)
+ return -ENOMEM;
+
+ *worker = (struct worker) {
+ .manager = manager,
+ .monitor = sd_device_monitor_ref(worker_monitor),
+ .pid = pid,
+ };
+
+ r = hashmap_ensure_allocated(&manager->workers, &worker_hash_op);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(manager->workers, PID_TO_PTR(pid), worker);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(worker);
+
+ return 0;
+}
+
+static int on_event_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
+ struct event *event = userdata;
+
+ assert(event);
+ assert(event->worker);
+
+ kill_and_sigcont(event->worker->pid, SIGKILL);
+ event->worker->state = WORKER_KILLED;
+
+ log_device_error(event->dev, "Worker ["PID_FMT"] processing SEQNUM=%"PRIu64" killed", event->worker->pid, event->seqnum);
+
+ return 1;
+}
+
+static int on_event_timeout_warning(sd_event_source *s, uint64_t usec, void *userdata) {
+ struct event *event = userdata;
+
+ assert(event);
+ assert(event->worker);
+
+ log_device_warning(event->dev, "Worker ["PID_FMT"] processing SEQNUM=%"PRIu64" is taking a long time", event->worker->pid, event->seqnum);
+
+ return 1;
+}
+
+static void worker_attach_event(struct worker *worker, struct event *event) {
+ sd_event *e;
+ uint64_t usec;
+
+ assert(worker);
+ assert(worker->manager);
+ assert(event);
+ assert(!event->worker);
+ assert(!worker->event);
+
+ worker->state = WORKER_RUNNING;
+ worker->event = event;
+ event->state = EVENT_RUNNING;
+ event->worker = worker;
+
+ e = worker->manager->event;
+
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &usec) >= 0);
+
+ (void) sd_event_add_time(e, &event->timeout_warning_event, CLOCK_MONOTONIC,
+ usec + udev_warn_timeout(arg_event_timeout_usec), USEC_PER_SEC, on_event_timeout_warning, event);
+
+ (void) sd_event_add_time(e, &event->timeout_event, CLOCK_MONOTONIC,
+ usec + arg_event_timeout_usec, USEC_PER_SEC, on_event_timeout, event);
+}
+
+static void manager_clear_for_worker(Manager *manager) {
+ assert(manager);
+
+ manager->ctrl_event = sd_event_source_unref(manager->ctrl_event);
+ manager->uevent_event = sd_event_source_unref(manager->uevent_event);
+ manager->inotify_event = sd_event_source_unref(manager->inotify_event);
+ manager->kill_workers_event = sd_event_source_unref(manager->kill_workers_event);
+
+ manager->event = sd_event_unref(manager->event);
+
+ manager->workers = hashmap_free(manager->workers);
+ event_queue_cleanup(manager, EVENT_UNDEF);
+
+ manager->monitor = sd_device_monitor_unref(manager->monitor);
+ manager->ctrl_conn_blocking = udev_ctrl_connection_unref(manager->ctrl_conn_blocking);
+ manager->ctrl = udev_ctrl_unref(manager->ctrl);
+
+ manager->worker_watch[READ_END] = safe_close(manager->worker_watch[READ_END]);
+}
+
+static void manager_free(Manager *manager) {
+ if (!manager)
+ return;
+
+ udev_builtin_exit();
+
+ manager_clear_for_worker(manager);
+
+ sd_netlink_unref(manager->rtnl);
+
+ hashmap_free_free_free(manager->properties);
+ udev_rules_free(manager->rules);
+
+ safe_close(manager->fd_inotify);
+ safe_close_pair(manager->worker_watch);
+
+ free(manager);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+static int worker_send_message(int fd) {
+ struct worker_message message = {};
+
+ return loop_write(fd, &message, sizeof(message), false);
+}
+
+static int worker_lock_block_device(sd_device *dev, int *ret_fd) {
+ _cleanup_close_ int fd = -1;
+ const char *val;
+ int r;
+
+ assert(dev);
+ assert(ret_fd);
+
+ /*
+ * Take a shared lock on the device node; this establishes
+ * a concept of device "ownership" to serialize device
+ * access. External processes holding an exclusive lock will
+ * cause udev to skip the event handling; in the case udev
+ * acquired the lock, the external process can block until
+ * udev has finished its event handling.
+ */
+
+ r = sd_device_get_property_value(dev, "ACTION", &val);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get the value of property 'ACTION': %m");
+
+ if (streq(val, "remove"))
+ return 0;
+
+ r = sd_device_get_subsystem(dev, &val);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get subsystem: %m");
+
+ if (!streq(val, "block"))
+ return 0;
+
+ r = sd_device_get_sysname(dev, &val);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get sysname: %m");
+
+ if (STARTSWITH_SET(val, "dm-", "md", "drbd"))
+ return 0;
+
+ r = sd_device_get_devtype(dev, &val);
+ if (r < 0 && r != -ENOENT)
+ return log_device_debug_errno(dev, r, "Failed to get devtype: %m");
+ if (r >= 0 && streq(val, "partition")) {
+ r = sd_device_get_parent(dev, &dev);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get parent device: %m");
+ }
+
+ r = sd_device_get_devname(dev, &val);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get devname: %m");
+
+ fd = open(val, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
+ if (fd < 0) {
+ log_device_debug_errno(dev, errno, "Failed to open '%s', ignoring: %m", val);
+ return 0;
+ }
+
+ if (flock(fd, LOCK_SH|LOCK_NB) < 0)
+ return log_device_debug_errno(dev, errno, "Failed to flock(%s): %m", val);
+
+ *ret_fd = TAKE_FD(fd);
+ return 1;
+}
+
+static int worker_process_device(Manager *manager, sd_device *dev) {
+ _cleanup_(udev_event_freep) UdevEvent *udev_event = NULL;
+ _cleanup_close_ int fd_lock = -1;
+ const char *seqnum, *action;
+ int r;
+
+ assert(manager);
+ assert(dev);
+
+ r = sd_device_get_property_value(dev, "SEQNUM", &seqnum);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get SEQNUM: %m");
+
+ r = sd_device_get_property_value(dev, "ACTION", &action);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get ACTION: %m");
+
+ log_device_debug(dev, "Processing device (SEQNUM=%s, ACTION=%s)", seqnum, action);
+
+ udev_event = udev_event_new(dev, arg_exec_delay_usec, manager->rtnl);
+ if (!udev_event)
+ return -ENOMEM;
+
+ r = worker_lock_block_device(dev, &fd_lock);
+ if (r < 0)
+ return r;
+
+ /* apply rules, create node, symlinks */
+ udev_event_execute_rules(udev_event, arg_event_timeout_usec, manager->properties, manager->rules);
+ udev_event_execute_run(udev_event, arg_event_timeout_usec);
+
+ if (!manager->rtnl)
+ /* in case rtnl was initialized */
+ manager->rtnl = sd_netlink_ref(udev_event->rtnl);
+
+ /* apply/restore inotify watch */
+ if (udev_event->inotify_watch) {
+ (void) udev_watch_begin(dev);
+ r = device_update_db(dev);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to update database under /run/udev/data/: %m");
+ }
+
+ log_device_debug(dev, "Device (SEQNUM=%s, ACTION=%s) processed", seqnum, action);
+
+ return 0;
+}
+
+static int worker_device_monitor_handler(sd_device_monitor *monitor, sd_device *dev, void *userdata) {
+ Manager *manager = userdata;
+ int r;
+
+ assert(dev);
+ assert(manager);
+
+ r = worker_process_device(manager, dev);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to process device, ignoring: %m");
+
+ /* send processed event back to libudev listeners */
+ r = device_monitor_send_device(monitor, NULL, dev);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to send device, ignoring: %m");
+
+ /* send udevd the result of the event execution */
+ r = worker_send_message(manager->worker_watch[WRITE_END]);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to send signal to main daemon, ignoring: %m");
+
+ return 1;
+}
+
+static int worker_main(Manager *_manager, sd_device_monitor *monitor, sd_device *first_device) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = first_device;
+ _cleanup_(manager_freep) Manager *manager = _manager;
+ int r;
+
+ assert(manager);
+ assert(monitor);
+ assert(dev);
+
+ unsetenv("NOTIFY_SOCKET");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, -1) >= 0);
+
+ /* Reset OOM score, we only protect the main daemon. */
+ r = set_oom_score_adjust(0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to reset OOM score, ignoring: %m");
+
+ /* Clear unnecessary data in Manager object.*/
+ manager_clear_for_worker(manager);
+
+ r = sd_event_new(&manager->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ r = sd_event_add_signal(manager->event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set SIGTERM event: %m");
+
+ r = sd_device_monitor_attach_event(monitor, manager->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event loop to device monitor: %m");
+
+ r = sd_device_monitor_start(monitor, worker_device_monitor_handler, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(monitor), "worker-device-monitor");
+
+ /* Process first device */
+ (void) worker_device_monitor_handler(monitor, dev, manager);
+
+ r = sd_event_loop(manager->event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+ return 0;
+}
+
+static int worker_spawn(Manager *manager, struct event *event) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *worker_monitor = NULL;
+ struct worker *worker;
+ pid_t pid;
+ int r;
+
+ /* listen for new events */
+ r = device_monitor_new_full(&worker_monitor, MONITOR_GROUP_NONE, -1);
+ if (r < 0)
+ return r;
+
+ /* allow the main daemon netlink address to send devices to the worker */
+ r = device_monitor_allow_unicast_sender(worker_monitor, manager->monitor);
+ if (r < 0)
+ return log_error_errno(r, "Worker: Failed to set unicast sender: %m");
+
+ r = device_monitor_enable_receiving(worker_monitor);
+ if (r < 0)
+ return log_error_errno(r, "Worker: Failed to enable receiving of device: %m");
+
+ r = safe_fork(NULL, FORK_DEATHSIG, &pid);
+ if (r < 0) {
+ event->state = EVENT_QUEUED;
+ return log_error_errno(r, "Failed to fork() worker: %m");
+ }
+ if (r == 0) {
+ /* Worker process */
+ r = worker_main(manager, worker_monitor, sd_device_ref(event->dev));
+ log_close();
+ _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+ }
+
+ r = worker_new(&worker, manager, worker_monitor, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create worker object: %m");
+
+ worker_attach_event(worker, event);
+
+ log_device_debug(event->dev, "Worker ["PID_FMT"] is forked for processing SEQNUM=%"PRIu64".", pid, event->seqnum);
+ return 0;
+}
+
+static void event_run(Manager *manager, struct event *event) {
+ struct worker *worker;
+ Iterator i;
+ int r;
+
+ assert(manager);
+ assert(event);
+
+ HASHMAP_FOREACH(worker, manager->workers, i) {
+ if (worker->state != WORKER_IDLE)
+ continue;
+
+ r = device_monitor_send_device(manager->monitor, worker->monitor, event->dev);
+ if (r < 0) {
+ log_device_error_errno(event->dev, r, "Worker ["PID_FMT"] did not accept message, killing the worker: %m",
+ worker->pid);
+ (void) kill(worker->pid, SIGKILL);
+ worker->state = WORKER_KILLED;
+ continue;
+ }
+ worker_attach_event(worker, event);
+ return;
+ }
+
+ if (hashmap_size(manager->workers) >= arg_children_max) {
+ if (arg_children_max > 1)
+ log_debug("Maximum number (%u) of children reached.", hashmap_size(manager->workers));
+ return;
+ }
+
+ /* start new worker and pass initial device */
+ worker_spawn(manager, event);
+}
+
+static int event_queue_insert(Manager *manager, sd_device *dev) {
+ _cleanup_(sd_device_unrefp) sd_device *clone = NULL;
+ const char *val, *action;
+ struct event *event;
+ uint64_t seqnum;
+ int r;
+
+ assert(manager);
+ assert(dev);
+
+ /* only one process can add events to the queue */
+ if (manager->pid == 0)
+ manager->pid = getpid_cached();
+
+ assert(manager->pid == getpid_cached());
+
+ /* We only accepts devices received by device monitor. */
+ r = sd_device_get_property_value(dev, "SEQNUM", &val);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(val, &seqnum);
+ if (r < 0)
+ return r;
+
+ if (seqnum == 0)
+ return -EINVAL;
+
+ /* Refuse devices do not have ACTION property. */
+ r = sd_device_get_property_value(dev, "ACTION", &action);
+ if (r < 0)
+ return r;
+
+ /* Save original device to restore the state on failures. */
+ r = device_shallow_clone(dev, &clone);
+ if (r < 0)
+ return r;
+
+ r = device_copy_properties(clone, dev);
+ if (r < 0)
+ return r;
+
+ event = new(struct event, 1);
+ if (!event)
+ return -ENOMEM;
+
+ *event = (struct event) {
+ .manager = manager,
+ .dev = sd_device_ref(dev),
+ .dev_kernel = TAKE_PTR(clone),
+ .seqnum = seqnum,
+ .state = EVENT_QUEUED,
+ };
+
+ if (LIST_IS_EMPTY(manager->events)) {
+ r = touch("/run/udev/queue");
+ if (r < 0)
+ log_warning_errno(r, "Failed to touch /run/udev/queue: %m");
+ }
+
+ LIST_APPEND(event, manager->events, event);
+
+ log_device_debug(dev, "Device (SEQNUM=%"PRIu64", ACTION=%s) is queued", seqnum, action);
+
+ return 0;
+}
+
+static void manager_kill_workers(Manager *manager) {
+ struct worker *worker;
+ Iterator i;
+
+ assert(manager);
+
+ HASHMAP_FOREACH(worker, manager->workers, i) {
+ if (worker->state == WORKER_KILLED)
+ continue;
+
+ worker->state = WORKER_KILLED;
+ (void) kill(worker->pid, SIGTERM);
+ }
+}
+
+/* lookup event for identical, parent, child device */
+static int is_device_busy(Manager *manager, struct event *event) {
+ const char *subsystem, *devpath, *devpath_old = NULL;
+ dev_t devnum = makedev(0, 0);
+ struct event *loop_event;
+ size_t devpath_len;
+ int r, ifindex = 0;
+ bool is_block;
+
+ r = sd_device_get_subsystem(event->dev, &subsystem);
+ if (r < 0)
+ return r;
+
+ is_block = streq(subsystem, "block");
+
+ r = sd_device_get_devpath(event->dev, &devpath);
+ if (r < 0)
+ return r;
+
+ devpath_len = strlen(devpath);
+
+ r = sd_device_get_property_value(event->dev, "DEVPATH_OLD", &devpath_old);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ r = sd_device_get_devnum(event->dev, &devnum);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ r = sd_device_get_ifindex(event->dev, &ifindex);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ /* check if queue contains events we depend on */
+ LIST_FOREACH(event, loop_event, manager->events) {
+ size_t loop_devpath_len, common;
+ const char *loop_devpath;
+
+ /* we already found a later event, earlier cannot block us, no need to check again */
+ if (loop_event->seqnum < event->delaying_seqnum)
+ continue;
+
+ /* event we checked earlier still exists, no need to check again */
+ if (loop_event->seqnum == event->delaying_seqnum)
+ return true;
+
+ /* found ourself, no later event can block us */
+ if (loop_event->seqnum >= event->seqnum)
+ break;
+
+ /* check major/minor */
+ if (major(devnum) != 0) {
+ const char *s;
+ dev_t d;
+
+ if (sd_device_get_subsystem(loop_event->dev, &s) < 0)
+ continue;
+
+ if (sd_device_get_devnum(loop_event->dev, &d) >= 0 &&
+ devnum == d && is_block == streq(s, "block"))
+ return true;
+ }
+
+ /* check network device ifindex */
+ if (ifindex > 0) {
+ int i;
+
+ if (sd_device_get_ifindex(loop_event->dev, &i) >= 0 &&
+ ifindex == i)
+ return true;
+ }
+
+ if (sd_device_get_devpath(loop_event->dev, &loop_devpath) < 0)
+ continue;
+
+ /* check our old name */
+ if (devpath_old && streq(devpath_old, loop_devpath)) {
+ event->delaying_seqnum = loop_event->seqnum;
+ return true;
+ }
+
+ loop_devpath_len = strlen(loop_devpath);
+
+ /* compare devpath */
+ common = MIN(devpath_len, loop_devpath_len);
+
+ /* one devpath is contained in the other? */
+ if (!strneq(devpath, loop_devpath, common))
+ continue;
+
+ /* identical device event found */
+ if (devpath_len == loop_devpath_len) {
+ /* devices names might have changed/swapped in the meantime */
+ if (major(devnum) != 0 || ifindex > 0)
+ continue;
+ event->delaying_seqnum = loop_event->seqnum;
+ return true;
+ }
+
+ /* parent device event found */
+ if (devpath[common] == '/') {
+ event->delaying_seqnum = loop_event->seqnum;
+ return true;
+ }
+
+ /* child device event found */
+ if (loop_devpath[common] == '/') {
+ event->delaying_seqnum = loop_event->seqnum;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static int on_exit_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
+ Manager *manager = userdata;
+
+ assert(manager);
+
+ log_error("Giving up waiting for workers to finish.");
+ sd_event_exit(manager->event, -ETIMEDOUT);
+
+ return 1;
+}
+
+static void manager_exit(Manager *manager) {
+ uint64_t usec;
+ int r;
+
+ assert(manager);
+
+ manager->exit = true;
+
+ sd_notify(false,
+ "STOPPING=1\n"
+ "STATUS=Starting shutdown...");
+
+ /* close sources of new events and discard buffered events */
+ manager->ctrl_event = sd_event_source_unref(manager->ctrl_event);
+ manager->ctrl = udev_ctrl_unref(manager->ctrl);
+
+ manager->inotify_event = sd_event_source_unref(manager->inotify_event);
+ manager->fd_inotify = safe_close(manager->fd_inotify);
+
+ manager->uevent_event = sd_event_source_unref(manager->uevent_event);
+ manager->monitor = sd_device_monitor_unref(manager->monitor);
+
+ /* discard queued events and kill workers */
+ event_queue_cleanup(manager, EVENT_QUEUED);
+ manager_kill_workers(manager);
+
+ assert_se(sd_event_now(manager->event, CLOCK_MONOTONIC, &usec) >= 0);
+
+ r = sd_event_add_time(manager->event, NULL, CLOCK_MONOTONIC,
+ usec + 30 * USEC_PER_SEC, USEC_PER_SEC, on_exit_timeout, manager);
+ if (r < 0)
+ return;
+}
+
+/* reload requested, HUP signal received, rules changed, builtin changed */
+static void manager_reload(Manager *manager) {
+
+ assert(manager);
+
+ sd_notify(false,
+ "RELOADING=1\n"
+ "STATUS=Flushing configuration...");
+
+ manager_kill_workers(manager);
+ manager->rules = udev_rules_free(manager->rules);
+ udev_builtin_exit();
+
+ sd_notifyf(false,
+ "READY=1\n"
+ "STATUS=Processing with %u children at max", arg_children_max);
+}
+
+static int on_kill_workers_event(sd_event_source *s, uint64_t usec, void *userdata) {
+ Manager *manager = userdata;
+
+ assert(manager);
+
+ log_debug("Cleanup idle workers");
+ manager_kill_workers(manager);
+
+ return 1;
+}
+
+static void event_queue_start(Manager *manager) {
+ struct event *event;
+ usec_t usec;
+ int r;
+
+ assert(manager);
+
+ if (LIST_IS_EMPTY(manager->events) ||
+ manager->exit || manager->stop_exec_queue)
+ return;
+
+ assert_se(sd_event_now(manager->event, CLOCK_MONOTONIC, &usec) >= 0);
+ /* check for changed config, every 3 seconds at most */
+ if (manager->last_usec == 0 ||
+ usec - manager->last_usec > 3 * USEC_PER_SEC) {
+ if (udev_rules_check_timestamp(manager->rules) ||
+ udev_builtin_validate())
+ manager_reload(manager);
+
+ manager->last_usec = usec;
+ }
+
+ r = event_source_disable(manager->kill_workers_event);
+ if (r < 0)
+ log_warning_errno(r, "Failed to disable event source for cleaning up idle workers, ignoring: %m");
+
+ udev_builtin_init();
+
+ if (!manager->rules) {
+ r = udev_rules_new(&manager->rules, arg_resolve_name_timing);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read udev rules: %m");
+ return;
+ }
+ }
+
+ LIST_FOREACH(event, event, manager->events) {
+ if (event->state != EVENT_QUEUED)
+ continue;
+
+ /* do not start event if parent or child event is still running */
+ if (is_device_busy(manager, event) != 0)
+ continue;
+
+ event_run(manager, event);
+ }
+}
+
+static void event_queue_cleanup(Manager *manager, enum event_state match_type) {
+ struct event *event, *tmp;
+
+ LIST_FOREACH_SAFE(event, event, tmp, manager->events) {
+ if (match_type != EVENT_UNDEF && match_type != event->state)
+ continue;
+
+ event_free(event);
+ }
+}
+
+static int on_worker(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *manager = userdata;
+
+ assert(manager);
+
+ for (;;) {
+ struct worker_message msg;
+ struct iovec iovec = {
+ .iov_base = &msg,
+ .iov_len = sizeof(msg),
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
+ } control = {};
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ ssize_t size;
+ struct ucred *ucred = NULL;
+ struct worker *worker;
+
+ size = recvmsg(fd, &msghdr, MSG_DONTWAIT);
+ if (size < 0) {
+ if (errno == EINTR)
+ continue;
+ else if (errno == EAGAIN)
+ /* nothing more to read */
+ break;
+
+ return log_error_errno(errno, "Failed to receive message: %m");
+ } else if (size != sizeof(struct worker_message)) {
+ log_warning("Ignoring worker message with invalid size %zi bytes", size);
+ continue;
+ }
+
+ CMSG_FOREACH(cmsg, &msghdr)
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+
+ if (!ucred || ucred->pid <= 0) {
+ log_warning("Ignoring worker message without valid PID");
+ continue;
+ }
+
+ /* lookup worker who sent the signal */
+ worker = hashmap_get(manager->workers, PID_TO_PTR(ucred->pid));
+ if (!worker) {
+ log_debug("Worker ["PID_FMT"] returned, but is no longer tracked", ucred->pid);
+ continue;
+ }
+
+ if (worker->state != WORKER_KILLED)
+ worker->state = WORKER_IDLE;
+
+ /* worker returned */
+ event_free(worker->event);
+ }
+
+ /* we have free workers, try to schedule events */
+ event_queue_start(manager);
+
+ return 1;
+}
+
+static int on_uevent(sd_device_monitor *monitor, sd_device *dev, void *userdata) {
+ Manager *manager = userdata;
+ int r;
+
+ assert(manager);
+
+ device_ensure_usec_initialized(dev, NULL);
+
+ r = event_queue_insert(manager, dev);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to insert device into event queue: %m");
+ return 1;
+ }
+
+ /* we have fresh events, try to schedule them */
+ event_queue_start(manager);
+
+ return 1;
+}
+
+/* receive the udevd message from userspace */
+static int on_ctrl_msg(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *manager = userdata;
+ _cleanup_(udev_ctrl_connection_unrefp) struct udev_ctrl_connection *ctrl_conn = NULL;
+ _cleanup_(udev_ctrl_msg_unrefp) struct udev_ctrl_msg *ctrl_msg = NULL;
+ const char *str;
+ int i, r;
+
+ assert(manager);
+
+ ctrl_conn = udev_ctrl_get_connection(manager->ctrl);
+ if (!ctrl_conn)
+ return 1;
+
+ ctrl_msg = udev_ctrl_receive_msg(ctrl_conn);
+ if (!ctrl_msg)
+ return 1;
+
+ i = udev_ctrl_get_set_log_level(ctrl_msg);
+ if (i >= 0) {
+ log_debug("Received udev control message (SET_LOG_LEVEL), setting log_priority=%i", i);
+ log_set_max_level_realm(LOG_REALM_UDEV, i);
+ log_set_max_level_realm(LOG_REALM_SYSTEMD, i);
+ manager_kill_workers(manager);
+ }
+
+ if (udev_ctrl_get_stop_exec_queue(ctrl_msg) > 0) {
+ log_debug("Received udev control message (STOP_EXEC_QUEUE)");
+ manager->stop_exec_queue = true;
+ }
+
+ if (udev_ctrl_get_start_exec_queue(ctrl_msg) > 0) {
+ log_debug("Received udev control message (START_EXEC_QUEUE)");
+ manager->stop_exec_queue = false;
+ event_queue_start(manager);
+ }
+
+ if (udev_ctrl_get_reload(ctrl_msg) > 0) {
+ log_debug("Received udev control message (RELOAD)");
+ manager_reload(manager);
+ }
+
+ str = udev_ctrl_get_set_env(ctrl_msg);
+ if (str) {
+ _cleanup_free_ char *key = NULL, *val = NULL, *old_key = NULL, *old_val = NULL;
+ char *eq;
+
+ eq = strchr(str, '=');
+ if (!eq) {
+ log_error("Invalid key format '%s'", str);
+ return 1;
+ }
+
+ key = strndup(str, eq - str);
+ if (!key) {
+ log_oom();
+ return 1;
+ }
+
+ old_val = hashmap_remove2(manager->properties, key, (void **) &old_key);
+
+ r = hashmap_ensure_allocated(&manager->properties, &string_hash_ops);
+ if (r < 0) {
+ log_oom();
+ return 1;
+ }
+
+ eq++;
+ if (!isempty(eq)) {
+ log_debug("Received udev control message (ENV), unsetting '%s'", key);
+
+ r = hashmap_put(manager->properties, key, NULL);
+ if (r < 0) {
+ log_oom();
+ return 1;
+ }
+ } else {
+ val = strdup(eq);
+ if (!val) {
+ log_oom();
+ return 1;
+ }
+
+ log_debug("Received udev control message (ENV), setting '%s=%s'", key, val);
+
+ r = hashmap_put(manager->properties, key, val);
+ if (r < 0) {
+ log_oom();
+ return 1;
+ }
+ }
+
+ key = val = NULL;
+ manager_kill_workers(manager);
+ }
+
+ i = udev_ctrl_get_set_children_max(ctrl_msg);
+ if (i >= 0) {
+ log_debug("Receivd udev control message (SET_MAX_CHILDREN), setting children_max=%i", i);
+ arg_children_max = i;
+
+ (void) sd_notifyf(false,
+ "READY=1\n"
+ "STATUS=Processing with %u children at max", arg_children_max);
+ }
+
+ if (udev_ctrl_get_ping(ctrl_msg) > 0)
+ log_debug("Received udev control message (PING)");
+
+ if (udev_ctrl_get_exit(ctrl_msg) > 0) {
+ log_debug("Received udev control message (EXIT)");
+ manager_exit(manager);
+ /* keep reference to block the client until we exit
+ TODO: deal with several blocking exit requests */
+ manager->ctrl_conn_blocking = udev_ctrl_connection_ref(ctrl_conn);
+ }
+
+ return 1;
+}
+
+static int synthesize_change(sd_device *dev) {
+ const char *subsystem, *sysname, *devname, *syspath, *devtype;
+ char filename[PATH_MAX];
+ int r;
+
+ r = sd_device_get_subsystem(dev, &subsystem);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_sysname(dev, &sysname);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devname(dev, &devname);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_syspath(dev, &syspath);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devtype(dev, &devtype);
+ if (r < 0)
+ return r;
+
+ if (streq_ptr("block", subsystem) &&
+ streq_ptr("disk", devtype) &&
+ !startswith(sysname, "dm-")) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ bool part_table_read = false, has_partitions = false;
+ sd_device *d;
+ int fd;
+
+ /*
+ * Try to re-read the partition table. This only succeeds if
+ * none of the devices is busy. The kernel returns 0 if no
+ * partition table is found, and we will not get an event for
+ * the disk.
+ */
+ fd = open(devname, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
+ if (fd >= 0) {
+ r = flock(fd, LOCK_EX|LOCK_NB);
+ if (r >= 0)
+ r = ioctl(fd, BLKRRPART, 0);
+
+ close(fd);
+ if (r >= 0)
+ part_table_read = true;
+ }
+
+ /* search for partitions */
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_parent(e, dev);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(e, "block", true);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ const char *t;
+
+ if (sd_device_get_devtype(d, &t) < 0 ||
+ !streq("partition", t))
+ continue;
+
+ has_partitions = true;
+ break;
+ }
+
+ /*
+ * We have partitions and re-read the table, the kernel already sent
+ * out a "change" event for the disk, and "remove/add" for all
+ * partitions.
+ */
+ if (part_table_read && has_partitions)
+ return 0;
+
+ /*
+ * We have partitions but re-reading the partition table did not
+ * work, synthesize "change" for the disk and all partitions.
+ */
+ log_debug("Device '%s' is closed, synthesising 'change'", devname);
+ strscpyl(filename, sizeof(filename), syspath, "/uevent", NULL);
+ write_string_file(filename, "change", WRITE_STRING_FILE_DISABLE_BUFFER);
+
+ FOREACH_DEVICE(e, d) {
+ const char *t, *n, *s;
+
+ if (sd_device_get_devtype(d, &t) < 0 ||
+ !streq("partition", t))
+ continue;
+
+ if (sd_device_get_devname(d, &n) < 0 ||
+ sd_device_get_syspath(d, &s) < 0)
+ continue;
+
+ log_debug("Device '%s' is closed, synthesising partition '%s' 'change'", devname, n);
+ strscpyl(filename, sizeof(filename), s, "/uevent", NULL);
+ write_string_file(filename, "change", WRITE_STRING_FILE_DISABLE_BUFFER);
+ }
+
+ return 0;
+ }
+
+ log_debug("Device %s is closed, synthesising 'change'", devname);
+ strscpyl(filename, sizeof(filename), syspath, "/uevent", NULL);
+ write_string_file(filename, "change", WRITE_STRING_FILE_DISABLE_BUFFER);
+
+ return 0;
+}
+
+static int on_inotify(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *manager = userdata;
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+ int r;
+
+ assert(manager);
+
+ r = event_source_disable(manager->kill_workers_event);
+ if (r < 0)
+ log_warning_errno(r, "Failed to disable event source for cleaning up idle workers, ignoring: %m");
+
+ l = read(fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 1;
+
+ return log_error_errno(errno, "Failed to read inotify fd: %m");
+ }
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ const char *devnode;
+
+ if (udev_watch_lookup(e->wd, &dev) <= 0)
+ continue;
+
+ if (sd_device_get_devname(dev, &devnode) < 0)
+ continue;
+
+ log_device_debug(dev, "Inotify event: %x for %s", e->mask, devnode);
+ if (e->mask & IN_CLOSE_WRITE)
+ synthesize_change(dev);
+ else if (e->mask & IN_IGNORED)
+ udev_watch_end(dev);
+ }
+
+ return 1;
+}
+
+static int on_sigterm(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *manager = userdata;
+
+ assert(manager);
+
+ manager_exit(manager);
+
+ return 1;
+}
+
+static int on_sighup(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *manager = userdata;
+
+ assert(manager);
+
+ manager_reload(manager);
+
+ return 1;
+}
+
+static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *manager = userdata;
+ int r;
+
+ assert(manager);
+
+ for (;;) {
+ pid_t pid;
+ int status;
+ struct worker *worker;
+
+ pid = waitpid(-1, &status, WNOHANG);
+ if (pid <= 0)
+ break;
+
+ worker = hashmap_get(manager->workers, PID_TO_PTR(pid));
+ if (!worker) {
+ log_warning("Worker ["PID_FMT"] is unknown, ignoring", pid);
+ continue;
+ }
+
+ if (WIFEXITED(status)) {
+ if (WEXITSTATUS(status) == 0)
+ log_debug("Worker ["PID_FMT"] exited", pid);
+ else
+ log_warning("Worker ["PID_FMT"] exited with return code %i", pid, WEXITSTATUS(status));
+ } else if (WIFSIGNALED(status))
+ log_warning("Worker ["PID_FMT"] terminated by signal %i (%s)", pid, WTERMSIG(status), signal_to_string(WTERMSIG(status)));
+ else if (WIFSTOPPED(status)) {
+ log_info("Worker ["PID_FMT"] stopped", pid);
+ continue;
+ } else if (WIFCONTINUED(status)) {
+ log_info("Worker ["PID_FMT"] continued", pid);
+ continue;
+ } else
+ log_warning("Worker ["PID_FMT"] exit with status 0x%04x", pid, status);
+
+ if ((!WIFEXITED(status) || WEXITSTATUS(status) != 0) && worker->event) {
+ log_device_error(worker->event->dev, "Worker ["PID_FMT"] failed", pid);
+
+ /* delete state from disk */
+ device_delete_db(worker->event->dev);
+ device_tag_index(worker->event->dev, NULL, false);
+
+ /* forward kernel event without amending it */
+ r = device_monitor_send_device(manager->monitor, NULL, worker->event->dev_kernel);
+ if (r < 0)
+ log_device_error_errno(worker->event->dev_kernel, r, "Failed to send back device to kernel: %m");
+ }
+
+ worker_free(worker);
+ }
+
+ /* we can start new workers, try to schedule events */
+ event_queue_start(manager);
+
+ /* Disable unnecessary cleanup event */
+ if (hashmap_isempty(manager->workers)) {
+ r = event_source_disable(manager->kill_workers_event);
+ if (r < 0)
+ log_warning_errno(r, "Failed to disable event source for cleaning up idle workers, ignoring: %m");
+ }
+
+ return 1;
+}
+
+static int on_post(sd_event_source *s, void *userdata) {
+ Manager *manager = userdata;
+
+ assert(manager);
+
+ if (!LIST_IS_EMPTY(manager->events))
+ return 1;
+
+ /* There are no pending events. Let's cleanup idle process. */
+
+ if (!hashmap_isempty(manager->workers)) {
+ /* There are idle workers */
+ (void) event_reset_time(manager->event, &manager->kill_workers_event, CLOCK_MONOTONIC,
+ now(CLOCK_MONOTONIC) + 3 * USEC_PER_SEC, USEC_PER_SEC,
+ on_kill_workers_event, manager, 0, "kill-workers-event", false);
+ return 1;
+ }
+
+ /* There are no idle workers. */
+
+ if (manager->exit)
+ return sd_event_exit(manager->event, 0);
+
+ if (manager->cgroup)
+ /* cleanup possible left-over processes in our cgroup */
+ (void) cg_kill(SYSTEMD_CGROUP_CONTROLLER, manager->cgroup, SIGKILL, CGROUP_IGNORE_SELF, NULL, NULL, NULL);
+
+ return 1;
+}
+
+static int listen_fds(int *ret_ctrl, int *ret_netlink) {
+ int ctrl_fd = -1, netlink_fd = -1;
+ int fd, n;
+
+ assert(ret_ctrl);
+ assert(ret_netlink);
+
+ n = sd_listen_fds(true);
+ if (n < 0)
+ return n;
+
+ for (fd = SD_LISTEN_FDS_START; fd < n + SD_LISTEN_FDS_START; fd++) {
+ if (sd_is_socket(fd, AF_LOCAL, SOCK_SEQPACKET, -1) > 0) {
+ if (ctrl_fd >= 0)
+ return -EINVAL;
+ ctrl_fd = fd;
+ continue;
+ }
+
+ if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
+ if (netlink_fd >= 0)
+ return -EINVAL;
+ netlink_fd = fd;
+ continue;
+ }
+
+ return -EINVAL;
+ }
+
+ *ret_ctrl = ctrl_fd;
+ *ret_netlink = netlink_fd;
+
+ return 0;
+}
+
+/*
+ * read the kernel command line, in case we need to get into debug mode
+ * udev.log_priority=<level> syslog priority
+ * udev.children_max=<number of workers> events are fully serialized if set to 1
+ * udev.exec_delay=<number of seconds> delay execution of every executed program
+ * udev.event_timeout=<number of seconds> seconds to wait before terminating an event
+ */
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r = 0;
+
+ assert(key);
+
+ if (!value)
+ return 0;
+
+ if (proc_cmdline_key_streq(key, "udev.log_priority")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = log_level_from_string(value);
+ if (r >= 0)
+ log_set_max_level(r);
+
+ } else if (proc_cmdline_key_streq(key, "udev.event_timeout")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = parse_sec(value, &arg_event_timeout_usec);
+
+ } else if (proc_cmdline_key_streq(key, "udev.children_max")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = safe_atou(value, &arg_children_max);
+
+ } else if (proc_cmdline_key_streq(key, "udev.exec_delay")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = parse_sec(value, &arg_exec_delay_usec);
+
+ } else if (startswith(key, "udev."))
+ log_warning("Unknown udev kernel command line option \"%s\", ignoring", key);
+
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse \"%s=%s\", ignoring: %m", key, value);
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-udevd.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Manages devices.\n\n"
+ " -h --help Print this message\n"
+ " -V --version Print version of the program\n"
+ " -d --daemon Detach and run in the background\n"
+ " -D --debug Enable debug output\n"
+ " -c --children-max=INT Set maximum number of workers\n"
+ " -e --exec-delay=SECONDS Seconds to wait before executing RUN=\n"
+ " -t --event-timeout=SECONDS Seconds to wait before terminating an event\n"
+ " -N --resolve-names=early|late|never\n"
+ " When to resolve users and groups\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "daemon", no_argument, NULL, 'd' },
+ { "debug", no_argument, NULL, 'D' },
+ { "children-max", required_argument, NULL, 'c' },
+ { "exec-delay", required_argument, NULL, 'e' },
+ { "event-timeout", required_argument, NULL, 't' },
+ { "resolve-names", required_argument, NULL, 'N' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "c:de:Dt:N:hV", options, NULL)) >= 0) {
+ switch (c) {
+
+ case 'd':
+ arg_daemonize = true;
+ break;
+ case 'c':
+ r = safe_atou(optarg, &arg_children_max);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse --children-max= value '%s', ignoring: %m", optarg);
+ break;
+ case 'e':
+ r = parse_sec(optarg, &arg_exec_delay_usec);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse --exec-delay= value '%s', ignoring: %m", optarg);
+ break;
+ case 't':
+ r = parse_sec(optarg, &arg_event_timeout_usec);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse --event-timeout= value '%s', ignoring: %m", optarg);
+ break;
+ case 'D':
+ arg_debug = true;
+ break;
+ case 'N': {
+ ResolveNameTiming t;
+
+ t = resolve_name_timing_from_string(optarg);
+ if (t < 0)
+ log_warning("Invalid --resolve-names= value '%s', ignoring.", optarg);
+ else
+ arg_resolve_name_timing = t;
+ break;
+ }
+ case 'h':
+ return help();
+ case 'V':
+ printf("%s\n", GIT_VERSION);
+ return 0;
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unhandled option");
+
+ }
+ }
+
+ return 1;
+}
+
+static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cgroup) {
+ _cleanup_(manager_freep) Manager *manager = NULL;
+ int r;
+
+ assert(ret);
+
+ manager = new(Manager, 1);
+ if (!manager)
+ return log_oom();
+
+ *manager = (Manager) {
+ .fd_inotify = -1,
+ .worker_watch = { -1, -1 },
+ .cgroup = cgroup,
+ };
+
+ manager->ctrl = udev_ctrl_new_from_fd(fd_ctrl);
+ if (!manager->ctrl)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize udev control socket");
+
+ r = udev_ctrl_enable_receiving(manager->ctrl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind udev control socket: %m");
+
+ r = device_monitor_new_full(&manager->monitor, MONITOR_GROUP_KERNEL, fd_uevent);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize device monitor: %m");
+
+ (void) sd_device_monitor_set_receive_buffer_size(manager->monitor, 128 * 1024 * 1024);
+
+ r = device_monitor_enable_receiving(manager->monitor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind netlink socket: %m");
+
+ *ret = TAKE_PTR(manager);
+
+ return 0;
+}
+
+static int main_loop(Manager *manager) {
+ int fd_worker, fd_ctrl, r;
+
+ /* unnamed socket from workers to the main daemon */
+ r = socketpair(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0, manager->worker_watch);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to create socketpair for communicating with workers: %m");
+
+ fd_worker = manager->worker_watch[READ_END];
+
+ r = setsockopt_int(fd_worker, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable SO_PASSCRED: %m");
+
+ r = udev_watch_init();
+ if (r < 0)
+ return log_error_errno(r, "Failed to create inotify descriptor: %m");
+ manager->fd_inotify = r;
+
+ udev_watch_restore();
+
+ /* block and listen to all signals on signalfd */
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, SIGHUP, SIGCHLD, -1) >= 0);
+
+ r = sd_event_default(&manager->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ r = sd_event_add_signal(manager->event, NULL, SIGINT, on_sigterm, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create SIGINT event source: %m");
+
+ r = sd_event_add_signal(manager->event, NULL, SIGTERM, on_sigterm, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create SIGTERM event source: %m");
+
+ r = sd_event_add_signal(manager->event, NULL, SIGHUP, on_sighup, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create SIGHUP event source: %m");
+
+ r = sd_event_add_signal(manager->event, NULL, SIGCHLD, on_sigchld, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create SIGCHLD event source: %m");
+
+ r = sd_event_set_watchdog(manager->event, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create watchdog event source: %m");
+
+ fd_ctrl = udev_ctrl_get_fd(manager->ctrl);
+ if (fd_ctrl < 0)
+ return log_error_errno(fd_ctrl, "Failed to get udev control socket fd: %m");
+
+ r = sd_event_add_io(manager->event, &manager->ctrl_event, fd_ctrl, EPOLLIN, on_ctrl_msg, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create udev control event source: %m");
+
+ /* This needs to be after the inotify and uevent handling, to make sure
+ * that the ping is send back after fully processing the pending uevents
+ * (including the synthetic ones we may create due to inotify events).
+ */
+ r = sd_event_source_set_priority(manager->ctrl_event, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set IDLE event priority for udev control event source: %m");
+
+ r = sd_event_add_io(manager->event, &manager->inotify_event, manager->fd_inotify, EPOLLIN, on_inotify, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create inotify event source: %m");
+
+ r = sd_device_monitor_attach_event(manager->monitor, manager->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event to device monitor: %m");
+
+ r = sd_device_monitor_start(manager->monitor, on_uevent, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(manager->monitor), "device-monitor");
+
+ r = sd_event_add_io(manager->event, NULL, fd_worker, EPOLLIN, on_worker, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create worker event source: %m");
+
+ r = sd_event_add_post(manager->event, NULL, on_post, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create post event source: %m");
+
+ udev_builtin_init();
+
+ r = udev_rules_new(&manager->rules, arg_resolve_name_timing);
+ if (!manager->rules)
+ return log_error_errno(r, "Failed to read udev rules: %m");
+
+ r = udev_rules_apply_static_dev_perms(manager->rules);
+ if (r < 0)
+ log_error_errno(r, "Failed to apply permissions on static device nodes: %m");
+
+ (void) sd_notifyf(false,
+ "READY=1\n"
+ "STATUS=Processing with %u children at max", arg_children_max);
+
+ r = sd_event_loop(manager->event);
+ if (r < 0)
+ log_error_errno(r, "Event loop failed: %m");
+
+ sd_notify(false,
+ "STOPPING=1\n"
+ "STATUS=Shutting down...");
+ return r;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_free_ char *cgroup = NULL;
+ _cleanup_(manager_freep) Manager *manager = NULL;
+ int fd_ctrl = -1, fd_uevent = -1;
+ int r;
+
+ log_set_target(LOG_TARGET_AUTO);
+ udev_parse_config_full(&arg_children_max, &arg_exec_delay_usec, &arg_event_timeout_usec, &arg_resolve_name_timing);
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ if (arg_debug) {
+ log_set_target(LOG_TARGET_CONSOLE);
+ log_set_max_level(LOG_DEBUG);
+ }
+
+ log_set_max_level_realm(LOG_REALM_SYSTEMD, log_get_max_level());
+
+ r = must_be_root();
+ if (r < 0)
+ return r;
+
+ if (arg_children_max == 0) {
+ cpu_set_t cpu_set;
+ unsigned long mem_limit;
+
+ arg_children_max = 8;
+
+ if (sched_getaffinity(0, sizeof(cpu_set), &cpu_set) == 0)
+ arg_children_max += CPU_COUNT(&cpu_set) * 8;
+
+ mem_limit = physical_memory() / (128LU*1024*1024);
+ arg_children_max = MAX(10U, MIN(arg_children_max, mem_limit));
+
+ log_debug("Set children_max to %u", arg_children_max);
+ }
+
+ /* set umask before creating any file/directory */
+ r = chdir("/");
+ if (r < 0)
+ return log_error_errno(errno, "Failed to change dir to '/': %m");
+
+ umask(022);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return log_error_errno(r, "Could not initialize labelling: %m");
+
+ r = mkdir_errno_wrapper("/run/udev", 0755);
+ if (r < 0 && r != -EEXIST)
+ return log_error_errno(r, "Failed to create /run/udev: %m");
+
+ dev_setup(NULL, UID_INVALID, GID_INVALID);
+
+ if (getppid() == 1 && sd_booted() > 0) {
+ /* Get our own cgroup, we regularly kill everything udev has left behind.
+ * We only do this on systemd systems, and only if we are directly spawned
+ * by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
+ if (r < 0) {
+ if (IN_SET(r, -ENOENT, -ENOMEDIUM))
+ log_debug_errno(r, "Dedicated cgroup not found: %m");
+ else
+ log_warning_errno(r, "Failed to get cgroup: %m");
+ }
+ }
+
+ r = listen_fds(&fd_ctrl, &fd_uevent);
+ if (r < 0)
+ return log_error_errno(r, "Failed to listen on fds: %m");
+
+ r = manager_new(&manager, fd_ctrl, fd_uevent, cgroup);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create manager: %m");
+
+ if (arg_daemonize) {
+ pid_t pid;
+
+ log_info("Starting version " GIT_VERSION);
+
+ /* connect /dev/null to stdin, stdout, stderr */
+ if (log_get_max_level() < LOG_DEBUG) {
+ r = make_null_stdio();
+ if (r < 0)
+ log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
+ }
+
+ pid = fork();
+ if (pid < 0)
+ return log_error_errno(errno, "Failed to fork daemon: %m");
+ if (pid > 0)
+ /* parent */
+ return 0;
+
+ /* child */
+ (void) setsid();
+
+ r = set_oom_score_adjust(-1000);
+ if (r < 0)
+ log_debug_errno(r, "Failed to adjust OOM score, ignoring: %m");
+ }
+
+ r = main_loop(manager);
+ /* FIXME: move this into manager_free() */
+ udev_ctrl_cleanup(manager->ctrl);
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/udev/v4l_id/v4l_id.c b/src/udev/v4l_id/v4l_id.c
new file mode 100644
index 0000000..4d48289
--- /dev/null
+++ b/src/udev/v4l_id/v4l_id.c
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2009 Filippo Argiolas <filippo.argiolas@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details:
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <linux/videodev2.h>
+
+#include "fd-util.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+ _cleanup_close_ int fd = -1;
+ char *device;
+ struct v4l2_capability v2cap;
+ int c;
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+ case 'h':
+ printf("%s [-h,--help] <device file>\n\n"
+ "Video4Linux device identification.\n\n"
+ " -h Print this message\n"
+ , program_invocation_short_name);
+ return 0;
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ device = argv[optind];
+ if (device == NULL)
+ return 2;
+
+ fd = open(device, O_RDONLY);
+ if (fd < 0)
+ return 3;
+
+ if (ioctl(fd, VIDIOC_QUERYCAP, &v2cap) == 0) {
+ printf("ID_V4L_VERSION=2\n");
+ printf("ID_V4L_PRODUCT=%s\n", v2cap.card);
+ printf("ID_V4L_CAPABILITIES=:");
+ if ((v2cap.capabilities & V4L2_CAP_VIDEO_CAPTURE) > 0 ||
+ (v2cap.capabilities & V4L2_CAP_VIDEO_CAPTURE_MPLANE) > 0)
+ printf("capture:");
+ if ((v2cap.capabilities & V4L2_CAP_VIDEO_OUTPUT) > 0 ||
+ (v2cap.capabilities & V4L2_CAP_VIDEO_OUTPUT_MPLANE) > 0)
+ printf("video_output:");
+ if ((v2cap.capabilities & V4L2_CAP_VIDEO_OVERLAY) > 0)
+ printf("video_overlay:");
+ if ((v2cap.capabilities & V4L2_CAP_AUDIO) > 0)
+ printf("audio:");
+ if ((v2cap.capabilities & V4L2_CAP_TUNER) > 0)
+ printf("tuner:");
+ if ((v2cap.capabilities & V4L2_CAP_RADIO) > 0)
+ printf("radio:");
+ printf("\n");
+ }
+
+ return 0;
+}
diff --git a/src/update-done/update-done.c b/src/update-done/update-done.c
new file mode 100644
index 0000000..c76c2d1
--- /dev/null
+++ b/src/update-done/update-done.c
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "fileio-label.h"
+#include "selinux-util.h"
+#include "util.h"
+
+#define MESSAGE \
+ "# This file was created by systemd-update-done. Its only \n" \
+ "# purpose is to hold a timestamp of the time this directory\n" \
+ "# was updated. See man:systemd-update-done.service(8).\n"
+
+static int apply_timestamp(const char *path, struct timespec *ts) {
+ _cleanup_free_ char *message = NULL;
+ int r;
+
+ /*
+ * We store the timestamp both as mtime of the file and in the file itself,
+ * to support filesystems which cannot store nanosecond-precision timestamps.
+ */
+
+ if (asprintf(&message,
+ MESSAGE
+ "TIMESTAMP_NSEC=" NSEC_FMT "\n",
+ timespec_load_nsec(ts)) < 0)
+ return log_oom();
+
+ r = write_string_file_atomic_label_ts(path, message, ts);
+ if (r == -EROFS)
+ return log_debug("Cannot create \"%s\", file system is read-only.", path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write \"%s\": %m", path);
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ struct stat st;
+ int r, q = 0;
+
+ log_setup_service();
+
+ if (stat("/usr", &st) < 0) {
+ log_error_errno(errno, "Failed to stat /usr: %m");
+ return EXIT_FAILURE;
+ }
+
+ r = mac_selinux_init();
+ if (r < 0) {
+ log_error_errno(r, "SELinux setup failed: %m");
+ return EXIT_FAILURE;
+ }
+
+ r = apply_timestamp("/etc/.updated", &st.st_mtim);
+ q = apply_timestamp("/var/.updated", &st.st_mtim);
+
+ return r < 0 || q < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/update-utmp/update-utmp.c b/src/update-utmp/update-utmp.c
new file mode 100644
index 0000000..7d66ed3
--- /dev/null
+++ b/src/update-utmp/update-utmp.c
@@ -0,0 +1,269 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#if HAVE_AUDIT
+#include <libaudit.h>
+#endif
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "special.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+#include "utmp-wtmp.h"
+
+typedef struct Context {
+ sd_bus *bus;
+#if HAVE_AUDIT
+ int audit_fd;
+#endif
+} Context;
+
+static void context_clear(Context *c) {
+ assert(c);
+
+ c->bus = sd_bus_flush_close_unref(c->bus);
+#if HAVE_AUDIT
+ if (c->audit_fd >= 0)
+ audit_close(c->audit_fd);
+ c->audit_fd = -1;
+#endif
+}
+
+static usec_t get_startup_time(Context *c) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ usec_t t = 0;
+ int r;
+
+ assert(c);
+
+ r = sd_bus_get_property_trivial(
+ c->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "UserspaceTimestamp",
+ &error,
+ 't', &t);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get timestamp: %s", bus_error_message(&error, r));
+ return 0;
+ }
+
+ return t;
+}
+
+static int get_current_runlevel(Context *c) {
+ static const struct {
+ const int runlevel;
+ const char *special;
+ } table[] = {
+ /* The first target of this list that is active or has
+ * a job scheduled wins. We prefer runlevels 5 and 3
+ * here over the others, since these are the main
+ * runlevels used on Fedora. It might make sense to
+ * change the order on some distributions. */
+ { '5', SPECIAL_GRAPHICAL_TARGET },
+ { '3', SPECIAL_MULTI_USER_TARGET },
+ { '1', SPECIAL_RESCUE_TARGET },
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+ unsigned i;
+
+ assert(c);
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+ _cleanup_free_ char *state = NULL, *path = NULL;
+
+ path = unit_dbus_path_from_name(table[i].special);
+ if (!path)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ c->bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "ActiveState",
+ &error,
+ &state);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to get state: %s", bus_error_message(&error, r));
+
+ if (STR_IN_SET(state, "active", "reloading"))
+ return table[i].runlevel;
+ }
+
+ return 0;
+}
+
+static int on_reboot(Context *c) {
+ int r = 0, q;
+ usec_t t;
+
+ assert(c);
+
+ /* We finished start-up, so let's write the utmp
+ * record and send the audit msg */
+
+#if HAVE_AUDIT
+ if (c->audit_fd >= 0)
+ if (audit_log_user_comm_message(c->audit_fd, AUDIT_SYSTEM_BOOT, "", "systemd-update-utmp", NULL, NULL, NULL, 1) < 0 &&
+ errno != EPERM) {
+ r = log_error_errno(errno, "Failed to send audit message: %m");
+ }
+#endif
+
+ /* If this call fails it will return 0, which
+ * utmp_put_reboot() will then fix to the current time */
+ t = get_startup_time(c);
+
+ q = utmp_put_reboot(t);
+ if (q < 0) {
+ log_error_errno(q, "Failed to write utmp record: %m");
+ r = q;
+ }
+
+ return r;
+}
+
+static int on_shutdown(Context *c) {
+ int r = 0, q;
+
+ assert(c);
+
+ /* We started shut-down, so let's write the utmp
+ * record and send the audit msg */
+
+#if HAVE_AUDIT
+ if (c->audit_fd >= 0)
+ if (audit_log_user_comm_message(c->audit_fd, AUDIT_SYSTEM_SHUTDOWN, "", "systemd-update-utmp", NULL, NULL, NULL, 1) < 0 &&
+ errno != EPERM) {
+ r = log_error_errno(errno, "Failed to send audit message: %m");
+ }
+#endif
+
+ q = utmp_put_shutdown();
+ if (q < 0) {
+ log_error_errno(q, "Failed to write utmp record: %m");
+ r = q;
+ }
+
+ return r;
+}
+
+static int on_runlevel(Context *c) {
+ int r = 0, q, previous, runlevel;
+
+ assert(c);
+
+ /* We finished changing runlevel, so let's write the
+ * utmp record and send the audit msg */
+
+ /* First, get last runlevel */
+ q = utmp_get_runlevel(&previous, NULL);
+
+ if (q < 0) {
+ if (!IN_SET(q, -ESRCH, -ENOENT))
+ return log_error_errno(q, "Failed to get current runlevel: %m");
+
+ previous = 0;
+ }
+
+ /* Secondly, get new runlevel */
+ runlevel = get_current_runlevel(c);
+
+ if (runlevel < 0)
+ return runlevel;
+
+ if (previous == runlevel)
+ return 0;
+
+#if HAVE_AUDIT
+ if (c->audit_fd >= 0) {
+ _cleanup_free_ char *s = NULL;
+
+ if (asprintf(&s, "old-level=%c new-level=%c",
+ previous > 0 ? previous : 'N',
+ runlevel > 0 ? runlevel : 'N') < 0)
+ return log_oom();
+
+ if (audit_log_user_comm_message(c->audit_fd, AUDIT_SYSTEM_RUNLEVEL, s, "systemd-update-utmp", NULL, NULL, NULL, 1) < 0 && errno != EPERM)
+ r = log_error_errno(errno, "Failed to send audit message: %m");
+ }
+#endif
+
+ q = utmp_put_runlevel(runlevel, previous);
+ if (q < 0 && !IN_SET(q, -ESRCH, -ENOENT)) {
+ log_error_errno(q, "Failed to write utmp record: %m");
+ r = q;
+ }
+
+ return r;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(context_clear) Context c = {
+#if HAVE_AUDIT
+ .audit_fd = -1
+#endif
+ };
+ int r;
+
+ if (getppid() != 1) {
+ log_error("This program should be invoked by init only.");
+ return EXIT_FAILURE;
+ }
+
+ if (argc != 2) {
+ log_error("This program requires one argument.");
+ return EXIT_FAILURE;
+ }
+
+ log_setup_service();
+
+ umask(0022);
+
+#if HAVE_AUDIT
+ /* If the kernel lacks netlink or audit support,
+ * don't worry about it. */
+ c.audit_fd = audit_open();
+ if (c.audit_fd < 0 && !IN_SET(errno, EAFNOSUPPORT, EPROTONOSUPPORT))
+ log_error_errno(errno, "Failed to connect to audit log: %m");
+#endif
+ r = bus_connect_system_systemd(&c.bus);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get D-Bus connection: %m");
+ return EXIT_FAILURE;
+ }
+
+ log_debug("systemd-update-utmp running as pid "PID_FMT, getpid_cached());
+
+ if (streq(argv[1], "reboot"))
+ r = on_reboot(&c);
+ else if (streq(argv[1], "shutdown"))
+ r = on_shutdown(&c);
+ else if (streq(argv[1], "runlevel"))
+ r = on_runlevel(&c);
+ else {
+ log_error("Unknown command %s", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ log_debug("systemd-update-utmp stopped as pid "PID_FMT, getpid_cached());
+
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/user-sessions/user-sessions.c b/src/user-sessions/user-sessions.c
new file mode 100644
index 0000000..490d9f0
--- /dev/null
+++ b/src/user-sessions/user-sessions.c
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "fileio.h"
+#include "fileio-label.h"
+#include "fs-util.h"
+#include "main-func.h"
+#include "log.h"
+#include "selinux-util.h"
+#include "string-util.h"
+#include "util.h"
+
+static int run(int argc, char *argv[]) {
+ int r, k;
+
+ if (argc != 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program requires one argument.");
+
+ log_setup_service();
+
+ umask(0022);
+
+ mac_selinux_init();
+
+ if (streq(argv[1], "start")) {
+ r = unlink_or_warn("/run/nologin");
+ k = unlink_or_warn("/etc/nologin");
+ if (r < 0)
+ return r;
+ return k;
+
+ } else if (streq(argv[1], "stop"))
+ return create_shutdown_run_nologin_or_warn();
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown verb '%s'.", argv[1]);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/vconsole/90-vconsole.rules.in b/src/vconsole/90-vconsole.rules.in
new file mode 100644
index 0000000..70679b6
--- /dev/null
+++ b/src/vconsole/90-vconsole.rules.in
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: LGPL-2.1+
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+# Each vtcon keeps its own state of fonts.
+#
+ACTION=="add", SUBSYSTEM=="vtconsole", KERNEL=="vtcon*", RUN+="@rootlibexecdir@/systemd-vconsole-setup"
diff --git a/src/vconsole/meson.build b/src/vconsole/meson.build
new file mode 100644
index 0000000..03065ec
--- /dev/null
+++ b/src/vconsole/meson.build
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+if conf.get('ENABLE_VCONSOLE') == 1
+ vconsole_rules = configure_file(
+ input : '90-vconsole.rules.in',
+ output : '90-vconsole.rules',
+ configuration : substs)
+ install_data(vconsole_rules,
+ install_dir : udevrulesdir)
+endif
diff --git a/src/vconsole/vconsole-setup.c b/src/vconsole/vconsole-setup.c
new file mode 100644
index 0000000..ebdeba3
--- /dev/null
+++ b/src/vconsole/vconsole-setup.c
@@ -0,0 +1,477 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2016 Michal Soltys <soltys@ziu.info>
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/kd.h>
+#include <linux/tiocl.h>
+#include <linux/vt.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sysexits.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+#include "virt.h"
+
+static int verify_vc_device(int fd) {
+ unsigned char data[] = {
+ TIOCL_GETFGCONSOLE,
+ };
+
+ int r;
+
+ r = ioctl(fd, TIOCLINUX, data);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+static int verify_vc_allocation(unsigned idx) {
+ char vcname[sizeof("/dev/vcs") + DECIMAL_STR_MAX(unsigned) - 2];
+
+ xsprintf(vcname, "/dev/vcs%u", idx);
+
+ if (access(vcname, F_OK) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int verify_vc_allocation_byfd(int fd) {
+ struct vt_stat vcs = {};
+
+ if (ioctl(fd, VT_GETSTATE, &vcs) < 0)
+ return -errno;
+
+ return verify_vc_allocation(vcs.v_active);
+}
+
+static int verify_vc_kbmode(int fd) {
+ int curr_mode;
+
+ /*
+ * Make sure we only adjust consoles in K_XLATE or K_UNICODE mode.
+ * Otherwise we would (likely) interfere with X11's processing of the
+ * key events.
+ *
+ * http://lists.freedesktop.org/archives/systemd-devel/2013-February/008573.html
+ */
+
+ if (ioctl(fd, KDGKBMODE, &curr_mode) < 0)
+ return -errno;
+
+ return IN_SET(curr_mode, K_XLATE, K_UNICODE) ? 0 : -EBUSY;
+}
+
+static int toggle_utf8(const char *name, int fd, bool utf8) {
+ int r;
+ struct termios tc = {};
+
+ assert(name);
+
+ r = ioctl(fd, KDSKBMODE, utf8 ? K_UNICODE : K_XLATE);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to %s UTF-8 kbdmode on %s: %m", enable_disable(utf8), name);
+
+ r = loop_write(fd, utf8 ? "\033%G" : "\033%@", 3, false);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to %s UTF-8 term processing on %s: %m", enable_disable(utf8), name);
+
+ r = tcgetattr(fd, &tc);
+ if (r >= 0) {
+ SET_FLAG(tc.c_iflag, IUTF8, utf8);
+ r = tcsetattr(fd, TCSANOW, &tc);
+ }
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to %s iutf8 flag on %s: %m", enable_disable(utf8), name);
+
+ log_debug("UTF-8 kbdmode %sd on %s", enable_disable(utf8), name);
+ return 0;
+}
+
+static int toggle_utf8_sysfs(bool utf8) {
+ int r;
+
+ r = write_string_file("/sys/module/vt/parameters/default_utf8", one_zero(utf8), WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to %s sysfs UTF-8 flag: %m", enable_disable(utf8));
+
+ log_debug("Sysfs UTF-8 flag %sd", enable_disable(utf8));
+ return 0;
+}
+
+static int keyboard_load_and_wait(const char *vc, const char *map, const char *map_toggle, bool utf8) {
+ const char *args[8];
+ unsigned i = 0;
+ pid_t pid;
+ int r;
+
+ /* An empty map means kernel map */
+ if (isempty(map))
+ return 0;
+
+ args[i++] = KBD_LOADKEYS;
+ args[i++] = "-q";
+ args[i++] = "-C";
+ args[i++] = vc;
+ if (utf8)
+ args[i++] = "-u";
+ args[i++] = map;
+ if (map_toggle)
+ args[i++] = map_toggle;
+ args[i++] = NULL;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *cmd;
+
+ cmd = strv_join((char**) args, " ");
+ log_debug("Executing \"%s\"...", strnull(cmd));
+ }
+
+ r = safe_fork("(loadkeys)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ execv(args[0], (char **) args);
+ _exit(EXIT_FAILURE);
+ }
+
+ return wait_for_terminate_and_check(KBD_LOADKEYS, pid, WAIT_LOG);
+}
+
+static int font_load_and_wait(const char *vc, const char *font, const char *map, const char *unimap) {
+ const char *args[9];
+ unsigned i = 0;
+ pid_t pid;
+ int r;
+
+ /* Any part can be set independently */
+ if (isempty(font) && isempty(map) && isempty(unimap))
+ return 0;
+
+ args[i++] = KBD_SETFONT;
+ args[i++] = "-C";
+ args[i++] = vc;
+ if (!isempty(map)) {
+ args[i++] = "-m";
+ args[i++] = map;
+ }
+ if (!isempty(unimap)) {
+ args[i++] = "-u";
+ args[i++] = unimap;
+ }
+ if (!isempty(font))
+ args[i++] = font;
+ args[i++] = NULL;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *cmd;
+
+ cmd = strv_join((char**) args, " ");
+ log_debug("Executing \"%s\"...", strnull(cmd));
+ }
+
+ r = safe_fork("(setfont)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ execv(args[0], (char **) args);
+ _exit(EXIT_FAILURE);
+ }
+
+ return wait_for_terminate_and_check(KBD_SETFONT, pid, WAIT_LOG);
+}
+
+/*
+ * A newly allocated VT uses the font from the source VT. Here
+ * we update all possibly already allocated VTs with the configured
+ * font. It also allows to restart systemd-vconsole-setup.service,
+ * to apply a new font to all VTs.
+ *
+ * We also setup per-console utf8 related stuff: kbdmode, term
+ * processing, stty iutf8.
+ */
+static void setup_remaining_vcs(int src_fd, unsigned src_idx, bool utf8) {
+ struct console_font_op cfo = {
+ .op = KD_FONT_OP_GET,
+ .width = UINT_MAX, .height = UINT_MAX,
+ .charcount = UINT_MAX,
+ };
+ struct unimapinit adv = {};
+ struct unimapdesc unimapd;
+ _cleanup_free_ struct unipair* unipairs = NULL;
+ _cleanup_free_ void *fontbuf = NULL;
+ unsigned i;
+ int r;
+
+ unipairs = new(struct unipair, USHRT_MAX);
+ if (!unipairs) {
+ log_oom();
+ return;
+ }
+
+ /* get metadata of the current font (width, height, count) */
+ r = ioctl(src_fd, KDFONTOP, &cfo);
+ if (r < 0)
+ log_warning_errno(errno, "KD_FONT_OP_GET failed while trying to get the font metadata: %m");
+ else {
+ /* verify parameter sanity first */
+ if (cfo.width > 32 || cfo.height > 32 || cfo.charcount > 512)
+ log_warning("Invalid font metadata - width: %u (max 32), height: %u (max 32), count: %u (max 512)",
+ cfo.width, cfo.height, cfo.charcount);
+ else {
+ /*
+ * Console fonts supported by the kernel are limited in size to 32 x 32 and maximum 512
+ * characters. Thus with 1 bit per pixel it requires up to 65536 bytes. The height always
+ * requries 32 per glyph, regardless of the actual height - see the comment above #define
+ * max_font_size 65536 in drivers/tty/vt/vt.c for more details.
+ */
+ fontbuf = malloc_multiply((cfo.width + 7) / 8 * 32, cfo.charcount);
+ if (!fontbuf) {
+ log_oom();
+ return;
+ }
+ /* get fonts from the source console */
+ cfo.data = fontbuf;
+ r = ioctl(src_fd, KDFONTOP, &cfo);
+ if (r < 0)
+ log_warning_errno(errno, "KD_FONT_OP_GET failed while trying to read the font data: %m");
+ else {
+ unimapd.entries = unipairs;
+ unimapd.entry_ct = USHRT_MAX;
+ r = ioctl(src_fd, GIO_UNIMAP, &unimapd);
+ if (r < 0)
+ log_warning_errno(errno, "GIO_UNIMAP failed while trying to read unicode mappings: %m");
+ else
+ cfo.op = KD_FONT_OP_SET;
+ }
+ }
+ }
+
+ if (cfo.op != KD_FONT_OP_SET)
+ log_warning("Fonts will not be copied to remaining consoles");
+
+ for (i = 1; i <= 63; i++) {
+ char ttyname[sizeof("/dev/tty63")];
+ _cleanup_close_ int fd_d = -1;
+
+ if (i == src_idx || verify_vc_allocation(i) < 0)
+ continue;
+
+ /* try to open terminal */
+ xsprintf(ttyname, "/dev/tty%u", i);
+ fd_d = open_terminal(ttyname, O_RDWR|O_CLOEXEC|O_NOCTTY);
+ if (fd_d < 0) {
+ log_warning_errno(fd_d, "Unable to open tty%u, fonts will not be copied: %m", i);
+ continue;
+ }
+
+ if (verify_vc_kbmode(fd_d) < 0)
+ continue;
+
+ toggle_utf8(ttyname, fd_d, utf8);
+
+ if (cfo.op != KD_FONT_OP_SET)
+ continue;
+
+ r = ioctl(fd_d, KDFONTOP, &cfo);
+ if (r < 0) {
+ int last_errno, mode;
+
+ /* The fonts couldn't have been copied. It might be due to the
+ * terminal being in graphical mode. In this case the kernel
+ * returns -EINVAL which is too generic for distinguishing this
+ * specific case. So we need to retrieve the terminal mode and if
+ * the graphical mode is in used, let's assume that something else
+ * is using the terminal and the failure was expected as we
+ * shouldn't have tried to copy the fonts. */
+
+ last_errno = errno;
+ if (ioctl(fd_d, KDGETMODE, &mode) >= 0 && mode != KD_TEXT)
+ log_debug("KD_FONT_OP_SET skipped: tty%u is not in text mode", i);
+ else
+ log_warning_errno(last_errno, "KD_FONT_OP_SET failed, fonts will not be copied to tty%u: %m", i);
+
+ continue;
+ }
+
+ /*
+ * copy unicode translation table unimapd is a ushort count and a pointer
+ * to an array of struct unipair { ushort, ushort }
+ */
+ r = ioctl(fd_d, PIO_UNIMAPCLR, &adv);
+ if (r < 0) {
+ log_warning_errno(errno, "PIO_UNIMAPCLR failed, unimaps might be incorrect for tty%u: %m", i);
+ continue;
+ }
+
+ r = ioctl(fd_d, PIO_UNIMAP, &unimapd);
+ if (r < 0) {
+ log_warning_errno(errno, "PIO_UNIMAP failed, unimaps might be incorrect for tty%u: %m", i);
+ continue;
+ }
+
+ log_debug("Font and unimap successfully copied to %s", ttyname);
+ }
+}
+
+static int find_source_vc(char **ret_path, unsigned *ret_idx) {
+ _cleanup_free_ char *path = NULL;
+ int r, err = 0;
+ unsigned i;
+
+ path = new(char, sizeof("/dev/tty63"));
+ if (!path)
+ return log_oom();
+
+ for (i = 1; i <= 63; i++) {
+ _cleanup_close_ int fd = -1;
+
+ r = verify_vc_allocation(i);
+ if (r < 0) {
+ if (!err)
+ err = -r;
+ continue;
+ }
+
+ sprintf(path, "/dev/tty%u", i);
+ fd = open_terminal(path, O_RDWR|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0) {
+ if (!err)
+ err = -fd;
+ continue;
+ }
+ r = verify_vc_kbmode(fd);
+ if (r < 0) {
+ if (!err)
+ err = -r;
+ continue;
+ }
+
+ /* all checks passed, return this one as a source console */
+ *ret_idx = i;
+ *ret_path = TAKE_PTR(path);
+ return TAKE_FD(fd);
+ }
+
+ return log_error_errno(err, "No usable source console found: %m");
+}
+
+static int verify_source_vc(char **ret_path, const char *src_vc) {
+ _cleanup_close_ int fd = -1;
+ char *path;
+ int r;
+
+ fd = open_terminal(src_vc, O_RDWR|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open %s: %m", src_vc);
+
+ r = verify_vc_device(fd);
+ if (r < 0)
+ return log_error_errno(r, "Device %s is not a virtual console: %m", src_vc);
+
+ r = verify_vc_allocation_byfd(fd);
+ if (r < 0)
+ return log_error_errno(r, "Virtual console %s is not allocated: %m", src_vc);
+
+ r = verify_vc_kbmode(fd);
+ if (r < 0)
+ return log_error_errno(r, "Virtual console %s is not in K_XLATE or K_UNICODE: %m", src_vc);
+
+ path = strdup(src_vc);
+ if (!path)
+ return log_oom();
+
+ *ret_path = path;
+ return TAKE_FD(fd);
+}
+
+int main(int argc, char **argv) {
+ _cleanup_free_ char
+ *vc = NULL,
+ *vc_keymap = NULL, *vc_keymap_toggle = NULL,
+ *vc_font = NULL, *vc_font_map = NULL, *vc_font_unimap = NULL;
+ _cleanup_close_ int fd = -1;
+ bool utf8, keyboard_ok;
+ unsigned idx = 0;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (argv[1])
+ fd = verify_source_vc(&vc, argv[1]);
+ else
+ fd = find_source_vc(&vc, &idx);
+ if (fd < 0)
+ return EXIT_FAILURE;
+
+ utf8 = is_locale_utf8();
+
+ r = parse_env_file(NULL, "/etc/vconsole.conf",
+ "KEYMAP", &vc_keymap,
+ "KEYMAP_TOGGLE", &vc_keymap_toggle,
+ "FONT", &vc_font,
+ "FONT_MAP", &vc_font_map,
+ "FONT_UNIMAP", &vc_font_unimap);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read /etc/vconsole.conf: %m");
+
+ /* Let the kernel command line override /etc/vconsole.conf */
+ r = proc_cmdline_get_key_many(
+ PROC_CMDLINE_STRIP_RD_PREFIX,
+ "vconsole.keymap", &vc_keymap,
+ "vconsole.keymap_toggle", &vc_keymap_toggle,
+ "vconsole.font", &vc_font,
+ "vconsole.font_map", &vc_font_map,
+ "vconsole.font_unimap", &vc_font_unimap,
+ /* compatibility with obsolete multiple-dot scheme */
+ "vconsole.keymap.toggle", &vc_keymap_toggle,
+ "vconsole.font.map", &vc_font_map,
+ "vconsole.font.unimap", &vc_font_unimap);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read /proc/cmdline: %m");
+
+ (void) toggle_utf8_sysfs(utf8);
+ (void) toggle_utf8(vc, fd, utf8);
+
+ r = font_load_and_wait(vc, vc_font, vc_font_map, vc_font_unimap);
+ keyboard_ok = keyboard_load_and_wait(vc, vc_keymap, vc_keymap_toggle, utf8) == 0;
+
+ if (idx > 0) {
+ if (r == 0)
+ setup_remaining_vcs(fd, idx, utf8);
+ else if (r == EX_OSERR)
+ /* setfont returns EX_OSERR when ioctl(KDFONTOP/PIO_FONTX/PIO_FONTX) fails.
+ * This might mean various things, but in particular lack of a graphical
+ * console. Let's be generous and not treat this as an error. */
+ log_notice("Setting fonts failed with a \"system error\", ignoring.");
+ else
+ log_warning("Setting source virtual console failed, ignoring remaining ones");
+ }
+
+ return IN_SET(r, 0, EX_OSERR) && keyboard_ok ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/veritysetup/veritysetup-generator.c b/src/veritysetup/veritysetup-generator.c
new file mode 100644
index 0000000..65a4e7b
--- /dev/null
+++ b/src/veritysetup/veritysetup-generator.c
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "hexdecoct.h"
+#include "id128-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "unit-name.h"
+
+#define SYSTEMD_VERITYSETUP_SERVICE "systemd-veritysetup@root.service"
+
+static const char *arg_dest = NULL;
+static bool arg_enabled = true;
+static char *arg_root_hash = NULL;
+static char *arg_data_what = NULL;
+static char *arg_hash_what = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_root_hash, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_data_what, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_hash_what, freep);
+
+static int create_device(void) {
+ _cleanup_free_ char *u = NULL, *v = NULL, *d = NULL, *e = NULL, *u_escaped = NULL, *v_escaped = NULL, *root_hash_escaped = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *to;
+ int r;
+
+ /* If all three pieces of information are missing, then verity is turned off */
+ if (!arg_root_hash && !arg_data_what && !arg_hash_what)
+ return 0;
+
+ /* if one of them is missing however, the data is simply incomplete and this is an error */
+ if (!arg_root_hash)
+ log_error("Verity information incomplete, root hash unspecified.");
+ if (!arg_data_what)
+ log_error("Verity information incomplete, root data device unspecified.");
+ if (!arg_hash_what)
+ log_error("Verity information incomplete, root hash device unspecified.");
+
+ if (!arg_root_hash || !arg_data_what || !arg_hash_what)
+ return -EINVAL;
+
+ log_debug("Using root verity data device %s,\n"
+ " hash device %s,\n"
+ " and root hash %s.", arg_data_what, arg_hash_what, arg_root_hash);
+
+ u = fstab_node_to_udev_node(arg_data_what);
+ if (!u)
+ return log_oom();
+ v = fstab_node_to_udev_node(arg_hash_what);
+ if (!v)
+ return log_oom();
+
+ u_escaped = specifier_escape(u);
+ if (!u_escaped)
+ return log_oom();
+ v_escaped = specifier_escape(v);
+ if (!v_escaped)
+ return log_oom();
+
+ r = unit_name_from_path(u, ".device", &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+ r = unit_name_from_path(v, ".device", &e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ root_hash_escaped = specifier_escape(arg_root_hash);
+ if (!root_hash_escaped)
+ return log_oom();
+
+ r = generator_open_unit_file(arg_dest, NULL, SYSTEMD_VERITYSETUP_SERVICE, &f);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "[Unit]\n"
+ "Description=Integrity Protection Setup for %%I\n"
+ "Documentation=man:systemd-veritysetup-generator(8) man:systemd-veritysetup@.service(8)\n"
+ "SourcePath=/proc/cmdline\n"
+ "DefaultDependencies=no\n"
+ "Conflicts=umount.target\n"
+ "BindsTo=%s %s\n"
+ "IgnoreOnIsolate=true\n"
+ "After=cryptsetup-pre.target %s %s\n"
+ "Before=cryptsetup.target umount.target\n"
+ "\n[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart=" ROOTLIBEXECDIR "/systemd-veritysetup attach root '%s' '%s' '%s'\n"
+ "ExecStop=" ROOTLIBEXECDIR "/systemd-veritysetup detach root\n",
+ d, e,
+ d, e,
+ u_escaped, v_escaped, root_hash_escaped);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write file unit "SYSTEMD_VERITYSETUP_SERVICE": %m");
+
+ to = strjoina(arg_dest, "/cryptsetup.target.requires/" SYSTEMD_VERITYSETUP_SERVICE);
+
+ (void) mkdir_parents(to, 0755);
+ if (symlink("../" SYSTEMD_VERITYSETUP_SERVICE, to) < 0)
+ return log_error_errno(errno, "Failed to create symlink %s: %m", to);
+
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ if (proc_cmdline_key_streq(key, "systemd.verity")) {
+
+ r = value ? parse_boolean(value) : 1;
+ if (r < 0)
+ log_warning("Failed to parse verity= kernel command line switch %s. Ignoring.", value);
+ else
+ arg_enabled = r;
+
+ } else if (proc_cmdline_key_streq(key, "roothash")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = free_and_strdup(&arg_root_hash, value);
+ if (r < 0)
+ return log_oom();
+
+ } else if (proc_cmdline_key_streq(key, "systemd.verity_root_data")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = free_and_strdup(&arg_data_what, value);
+ if (r < 0)
+ return log_oom();
+
+ } else if (proc_cmdline_key_streq(key, "systemd.verity_root_hash")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = free_and_strdup(&arg_hash_what, value);
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+static int determine_devices(void) {
+ _cleanup_free_ void *m = NULL;
+ sd_id128_t root_uuid, verity_uuid;
+ char ids[37];
+ size_t l;
+ int r;
+
+ /* Try to automatically derive the root data and hash device paths from the root hash */
+
+ if (!arg_root_hash)
+ return 0;
+
+ if (arg_data_what && arg_hash_what)
+ return 0;
+
+ r = unhexmem(arg_root_hash, strlen(arg_root_hash), &m, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse root hash: %s", arg_root_hash);
+ if (l < sizeof(sd_id128_t)) {
+ log_debug("Root hash is shorter than 128 bits (32 characters), ignoring for discovering verity partition.");
+ return 0;
+ }
+
+ if (!arg_data_what) {
+ memcpy(&root_uuid, m, sizeof(root_uuid));
+
+ arg_data_what = strjoin("/dev/disk/by-partuuid/", id128_to_uuid_string(root_uuid, ids));
+ if (!arg_data_what)
+ return log_oom();
+ }
+
+ if (!arg_hash_what) {
+ memcpy(&verity_uuid, (uint8_t*) m + l - sizeof(verity_uuid), sizeof(verity_uuid));
+
+ arg_hash_what = strjoin("/dev/disk/by-partuuid/", id128_to_uuid_string(verity_uuid, ids));
+ if (!arg_hash_what)
+ return log_oom();
+ }
+
+ return 1;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r;
+
+ assert_se(arg_dest = dest);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse kernel command line: %m");
+
+ /* For now we only support the root device on verity. Later on we might want to add support for /etc/veritytab
+ * or similar to define additional mappings */
+
+ if (!arg_enabled)
+ return 0;
+
+ r = determine_devices();
+ if (r < 0)
+ return r;
+
+ return create_device();
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/veritysetup/veritysetup.c b/src/veritysetup/veritysetup.c
new file mode 100644
index 0000000..9c2fe9a
--- /dev/null
+++ b/src/veritysetup/veritysetup.c
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "crypt-util.h"
+#include "hexdecoct.h"
+#include "log.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "terminal-util.h"
+
+static char *arg_root_hash = NULL;
+static char *arg_data_what = NULL;
+static char *arg_hash_what = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_root_hash, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_data_what, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_hash_what, freep);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-veritysetup@.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s attach VOLUME DATADEVICE HASHDEVICE ROOTHASH\n"
+ "%s detach VOLUME\n\n"
+ "Attaches or detaches an integrity protected block device.\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ int r;
+
+ if (argc <= 1)
+ return help();
+
+ if (argc < 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program requires at least two arguments.");
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (streq(argv[1], "attach")) {
+ _cleanup_free_ void *m = NULL;
+ crypt_status_info status;
+ size_t l;
+
+ if (argc < 6)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "attach requires at least two arguments.");
+
+ r = unhexmem(argv[5], strlen(argv[5]), &m, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse root hash: %m");
+
+ r = crypt_init(&cd, argv[4]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open verity device %s: %m", argv[4]);
+
+ crypt_set_log_callback(cd, cryptsetup_log_glue, NULL);
+
+ status = crypt_status(cd, argv[2]);
+ if (IN_SET(status, CRYPT_ACTIVE, CRYPT_BUSY)) {
+ log_info("Volume %s already active.", argv[2]);
+ return 0;
+ }
+
+ r = crypt_load(cd, CRYPT_VERITY, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load verity superblock: %m");
+
+ r = crypt_set_data_device(cd, argv[3]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to configure data device: %m");
+
+ r = crypt_activate_by_volume_key(cd, argv[2], m, l, CRYPT_ACTIVATE_READONLY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up verity device: %m");
+
+ } else if (streq(argv[1], "detach")) {
+
+ r = crypt_init_by_name(&cd, argv[2]);
+ if (r == -ENODEV) {
+ log_info("Volume %s already inactive.", argv[2]);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "crypt_init_by_name() failed: %m");
+
+ crypt_set_log_callback(cd, cryptsetup_log_glue, NULL);
+
+ r = crypt_deactivate(cd, argv[2]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to deactivate: %m");
+
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown verb %s.", argv[1]);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/version/version.h.in b/src/version/version.h.in
new file mode 100644
index 0000000..9f82d90
--- /dev/null
+++ b/src/version/version.h.in
@@ -0,0 +1 @@
+#define GIT_VERSION "@VCS_TAG@"
diff --git a/src/volatile-root/volatile-root.c b/src/volatile-root/volatile-root.c
new file mode 100644
index 0000000..5da9ce1
--- /dev/null
+++ b/src/volatile-root/volatile-root.c
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "fs-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "volatile-util.h"
+
+static int make_volatile(const char *path) {
+ _cleanup_free_ char *old_usr = NULL;
+ int r;
+
+ r = path_is_mount_point(path, NULL, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Couldn't determine whether %s is a mount point: %m", path);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s is not a mount point.", path);
+
+ r = path_is_temporary_fs(path);
+ if (r < 0)
+ return log_error_errno(r, "Couldn't determine whether %s is a temporary file system: %m", path);
+ if (r > 0) {
+ log_info("%s already is a temporary file system.", path);
+ return 0;
+ }
+
+ r = chase_symlinks("/usr", path, CHASE_PREFIX_ROOT, &old_usr);
+ if (r < 0)
+ return log_error_errno(r, "/usr not available in old root: %m");
+
+ r = mkdir_p("/run/systemd/volatile-sysroot", 0700);
+ if (r < 0)
+ return log_error_errno(r, "Couldn't generate volatile sysroot directory: %m");
+
+ r = mount_verbose(LOG_ERR, "tmpfs", "/run/systemd/volatile-sysroot", "tmpfs", MS_STRICTATIME, "mode=755");
+ if (r < 0)
+ goto finish_rmdir;
+
+ if (mkdir("/run/systemd/volatile-sysroot/usr", 0755) < 0) {
+ r = -errno;
+ goto finish_umount;
+ }
+
+ r = mount_verbose(LOG_ERR, old_usr, "/run/systemd/volatile-sysroot/usr", NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ goto finish_umount;
+
+ r = bind_remount_recursive("/run/systemd/volatile-sysroot/usr", true, NULL);
+ if (r < 0)
+ goto finish_umount;
+
+ r = umount_recursive(path, 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to unmount %s: %m", path);
+ goto finish_umount;
+ }
+
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
+ log_warning_errno(errno, "Failed to remount %s MS_SLAVE|MS_REC: %m", path);
+
+ r = mount_verbose(LOG_ERR, "/run/systemd/volatile-sysroot", path, NULL, MS_MOVE, NULL);
+
+finish_umount:
+ (void) umount_recursive("/run/systemd/volatile-sysroot", 0);
+
+finish_rmdir:
+ (void) rmdir("/run/systemd/volatile-sysroot");
+
+ return r;
+}
+
+static int run(int argc, char *argv[]) {
+ VolatileMode m = _VOLATILE_MODE_INVALID;
+ const char *path;
+ int r;
+
+ log_setup_service();
+
+ if (argc > 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments. Expected directory and mode.");
+
+ r = query_volatile_mode(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine volatile mode from kernel command line.");
+ if (r == 0 && argc >= 2) {
+ /* The kernel command line always wins. However if nothing was set there, the argument passed here wins instead. */
+ m = volatile_mode_from_string(argv[1]);
+ if (m < 0) {
+ log_error("Couldn't parse volatile mode: %s", argv[1]);
+ r = -EINVAL;
+ }
+ }
+
+ if (argc < 3)
+ path = "/sysroot";
+ else {
+ path = argv[2];
+
+ if (isempty(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Directory name cannot be empty.");
+ if (!path_is_absolute(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Directory must be specified as absolute path.");
+ if (path_equal(path, "/"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Directory cannot be the root directory.");
+ }
+
+ if (m != VOLATILE_YES)
+ return 0;
+
+ return make_volatile(path);
+}
+
+DEFINE_MAIN_FUNCTION(run);